diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..a900528e
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+*.pnm -diff -text
diff --git a/.gitignore b/.gitignore
index b1e99046..f38884a2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ android/armv6/share
 android/armv7/share
 lib*/*.a
 *.o
+*.o.*
 *.d
 *.def
 *.dll
@@ -19,6 +20,8 @@ lib*/*.a
 *-example
 *-test
 *_g
+\#*
+.\#*
 *_converted.c
 *_preprocessed.c
 build.log
@@ -29,15 +32,19 @@ build.log
 /ffprobe
 /ffserver
 /config.*
-/version.h
+/coverage.info
+/avversion.h
 conflog.txt
 /doc/*.1
 /doc/*.3
 /doc/*.html
 /doc/*.pod
-/doc/*.texi
+/doc/config.texi
 /doc/avoptions_codec.texi
 /doc/avoptions_format.texi
+/doc/doxy/html/
+/doc/examples/avio_dir_cmd
+/doc/examples/avio_reading
 /doc/examples/decoding_encoding
 /doc/examples/demuxing
 /doc/examples/filtering_audio
@@ -48,14 +55,16 @@ conflog.txt
 /doc/examples/resampling_audio
 /doc/examples/scaling_video
 /doc/fate.txt
-/doc/doxy/html/
 /doc/print_options
 /libavcodec/*_tablegen
 /libavcodec/*_tables.c
 /libavcodec/*_tables.h
 /libavutil/avconfig.h
+/libavutil/ffversion.h
+/src
 /tests/audiogen
 /tests/base64
+/tests/checkasm/checkasm
 /tests/data/
 /tests/rotozoom
 /tests/tiny_psnr
@@ -75,4 +84,3 @@ conflog.txt
 /tools/qt-faststart
 /tools/trasher
 /tools/seek_print
-libavutil/ffversion.h
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..e541ee16
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,26 @@
+language: c
+sudo: false
+os:
+  - linux
+  - osx
+addons:
+  apt:
+    packages:
+      - yasm
+      - diffutils
+compiler:
+  - clang
+  - gcc
+cache:
+  directories:
+    - ffmpeg-samples
+before_install:
+  - if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew update --all; fi
+install:
+  - if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew install yasm; fi
+script:
+  - mkdir -p ffmpeg-samples
+  - ./configure --samples=ffmpeg-samples --cc=$CC
+  - make -j 8
+  - make fate-rsync
+  - make check -j 8
diff --git a/Changelog b/Changelog
index 68ce3f7a..248f8b50 100644
--- a/Changelog
+++ b/Changelog
@@ -1,94 +1,179 @@
 Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.
 
-version <next>:
-
-version 2.7.2:
-- imc: use correct position for flcoeffs2 calculation
-- hevc: check slice address length
-- snow: remove an obsolete av_assert2
-- webp: fix infinite loop in webp_decode_frame
-- wavpack: limit extra_bits to 32 and use get_bits_long
-- ffmpeg: only count got_output/errors in decode_error_stat
-- ffmpeg: exit_on_error if decoding a packet failed
-- pthread_frame: forward error codes when flushing
-- huffyuvdec: validate image size
-- wavpack: use get_bits_long to read up to 32 bits
-- nutdec: check maxpos in read_sm_data before returning success
-- s302m: fix arithmetic exception
-- vc1dec: use get_bits_long and limit the read bits to 32
-- mpegaudiodec: copy AVFloatDSPContext from first context to all contexts
-- avcodec/vp8: Check buffer size in vp8_decode_frame_header()
-- avcodec/vp8: Fix null pointer dereference in ff_vp8_decode_free()
-- avcodec/diracdec: Check for hpel_base allocation failure
-- avcodec/rv34: Clear pointers in ff_rv34_decode_init_thread_copy()
-- avfilter/af_aresample: Check ff_all_* for allocation failures
-- avcodec/pthread_frame: clear priv_data, avoid stale pointer in error case
-- swscale/utils: Clear pix buffers
-- avutil/fifo: Fix the case where func() returns less bytes than requested in av_fifo_generic_write()
-- ffmpeg: Fix cleanup after failed allocation of output_files
-- avformat/mov: Fix deallocation when MOVStreamContext failed to allocate
-- ffmpeg: Fix crash with ost->last_frame allocation failure
-- ffmpeg: Fix cleanup with ost = NULL
-- avcodec/pthread_frame: check avctx on deallocation
-- avcodec/sanm: Reset sizes in destroy_buffers()
-- avcodec/alac: Clear pointers in allocate_buffers()
-- bytestream2: set the reader to the end when reading more than available
-- avcodec/utils: use a minimum 32pixel width in  avcodec_align_dimensions2() for H.264
-- avcodec/mpegvideo: Clear pointers in ff_mpv_common_init()
-- oggparsedirac: check return value of init_get_bits
-- wmalosslessdec: reset frame->nb_samples on packet loss
-- wmalosslessdec: avoid reading 0 bits with get_bits
-- Put a space between string literals and macros.
-- avcodec/rawenc: Use ff_alloc_packet() instead of ff_alloc_packet2()
-- avcodec/aacsbr: check that the element type matches before applying SBR
-- avcodec/h264_slice: Use w/h from the AVFrame instead of mb_w/h
-- vp9/update_prob: prevent out of bounds table read
-- avfilter/vf_transpose: Fix rounding error
-- avcodec/h264_refs: discard mismatching references
-- avcodec/mjpegdec: Fix small picture upscale
-- avcodec/pngdec: Check values before updating context in decode_fctl_chunk()
-- avcodec/pngdec: Copy IHDR & plte state from last thread
-- avcodec/pngdec: Require a IHDR chunk before fctl
-- avcodec/pngdec: Only allow one IHDR chunk
-- wmavoice: limit wmavoice_decode_packet return value to packet size
-- swscale/swscale_unscaled: Fix rounding difference with RGBA output between little and big endian
-- ffmpeg: Do not use the data/size of a bitstream filter after failure
-- swscale/x86/rgb2rgb_template: fix signedness of v in shuffle_bytes_2103_{mmx,mmxext}
-- vda: unlock the pixel buffer base address.
-- swscale/rgb2rgb_template: Fix signedness of v in shuffle_bytes_2103_c()
-- swscale/rgb2rgb_template: Implement shuffle_bytes_0321_c and fix shuffle_bytes_2103_c on BE
-- swscale/rgb2rgb_template: Disable shuffle_bytes_2103_c on big endian
-- swr: Remember previously set int_sample_format from user
-- swresample: soxr implementation for swr_get_out_samples()
-- avformat/swfdec: Do not error out on pixel format changes
-- ffmpeg_opt: Fix forcing fourccs
-- configure: Check for x265_api_get
-- swscale/x86/rgb2rgb_template: don't call emms on sse2/avx functions
-- swscale/x86/rgb2rgb_template: add missing xmm clobbers
-- library.mak: Workaround SDL redefining main and breaking fate tests on mingw
-- vaapi_h264: fix RefPicList[] field flags.
-
-version 2.7.1:
+
+version 3.0.2:
+- avcodec/ttaenc: Reallocate packet if its too small
+- configure: build fix for P5600 with mips code restructuring
+- mips: add support for R6
+- pgssubdec: fix subpicture output colorspace and range
+- avcodec/ac3dec: Reset SPX when switching from EAC3 to AC3
+- avfilter/vf_drawtext: Check return code of load_glyph()
+- avformat/mux: Check that deinit is set before calling it
+- avcodec/takdec: add code that got somehow lost in process of REing
+- avcodec/apedec: fix decoding of stereo files with one channel full of silence
+- avcodec/avpacket: Fix off by 5 error
+- avcodec/h264: Fix for H.264 configuration parsing
+- avcodec/bmp_parser: Ensure remaining_size is not too small in startcode packet crossing corner case
+- avcodec/pngdec: Fix alpha detection with skip_frame
+- Changelog: Make formating consistent
+- avfilter/src_movie: fix how we check for overflows with seek_point
+- avcodec/j2kenc: Add attribution to OpenJPEG project:
+
+
+version 3.0.1:
+- avcodec/libutvideodec: copy frame so it has reference counters when refcounted_frames is set
+- avformat/rtpdec_jpeg: fix low contrast image on low quality setting
+- avformat/mpegtsenc: Fix used service
+- avformat/mpegtsenc: Keep track of the program for each service
+- avformat/file: Add crypto to default whitelist
+- avcodec/mjpegenc_common: Store approximate aspect if exact cannot be stored
+- lavc/hevc: Allow arbitrary garbage in bytestream as long as at least one NAL unit is found.
+- avcodec/resample: Remove disabled and faulty code
+- indeo2: Fix banding artefacts
+- indeo2data: K&R formatting cosmetics
+- avformat/hlsenc: Fix passing options, regression since bc9a5965c815cf7fd998d8ce14a18b8e861dd9ce
+- avutil/random_seed: Add the runtime in cycles of the main loop to the entropy pool
+- avutil/channel_layout: AV_CH_LAYOUT_6POINT1_BACK not reachable in parsing
+- avformat/concatdec: set safe mode to enabled instead of auto
+- avformat/utils: fix dts from pts code in compute_pkt_fields() during ascending delay
+- avformat/rtpenc: Fix integer overflow in NTP_TO_RTP_FORMAT
+- avcodec/dca: clear X96 channels if nothing was decoded
+- fate/aac: Increase fuzz from of fate-aac-pns-encode from 72 to 74 for Loongson
+- avformat/cache: Fix memleak of tree entries
+- lavf/mov: downgrade sidx errors to non-fatal warnings; fixes trac #5216
+- lavf/mov: fix sidx with edit lists
+- avcodec/mjpegdec: Fix decoding slightly odd progressive jpeg
+- swscale/utils: Fix chrSrcHSubSample for GBRAP16
+- swscale/input: Fix GBRAP16 input
+- avutil/pixdesc: Make get_color_type() aware of CIE XYZ formats
+- avcodec/h264: Execute error concealment before marking the frame as done.
+- swscale/x86/output: Fix yuv2planeX_16* with unaligned destination
+- swscale/x86/output: Move code into yuv2planeX_mainloop
+- MAINTAINERS: add myself as an OS/2 maintainer
+- libwebpenc_animencoder: print library messages in verbose log levels
+- libwebpenc_animencoder: zero initialize the WebPAnimEncoderOptions struct
+- configure: check for SEC_I_CONTEXT_EXPIRED before enabling SChannel
+- lavf/http: Add httpproxy to the default protocol whitelist.
+- doc/utils: fix typo for min() description
+- ffserver&ffm: Fixed issues preventing ffserver write_index and files_size from being set correctly which was breaking ffserver streaming.
 - postproc: fix unaligned access
-- avformat: clarify what package needs to be compiled with SSL support
-- avcodec/libx264: Avoid reconfig on equivalent aspect ratios
-- avcodec/flacenc: Fix Invalid Rice order
-- tls_gnutls: fix hang on disconnection
-- avcodec/hevc_ps: Only discard overread VPS if a previous is available
-- ffmpeg: Free last_frame instead of just unref
-- avcodec/ffv1enc: fix bps for >8bit yuv when not explicitly set
-- avio: fix potential crashes when combining ffio_ensure_seekback + crc
-- examples/demuxing_decoding: use properties from frame instead of video_dec_ctx
-- h264: er: Copy from the previous reference only if compatible
-- doc: fix spelling errors
-- configure: only disable VSX for !ppc64el
-- ffmpeg_opt: Check for localtime() failure
-- avformat/singlejpeg: fix standalone compilation
-- configure: Disable VSX on unspecified / generic CPUs
-- avformat: Fix bug in parse_rps for HEVC.
-- takdec: ensure chan2 is a valid channel index
-- avcodec/h264_slice: Use AVFrame dimensions for grayscale handling
+- vc2enc: fix use of uninitialized variables in the rate control system, correctly zero out coefficient array padding
+- aacenc: optimize encoding speed
+- avcodec/diracdec: check bitstream size related fields for overflows
+- avcodec/h264_slice: Check PPS more extensively when its not copied
+
+
+version 3.0:
+- Common Encryption (CENC) MP4 encoding and decoding support
+- DXV decoding
+- extrastereo filter
+- ocr filter
+- alimiter filter
+- stereowiden filter
+- stereotools filter
+- rubberband filter
+- tremolo filter
+- agate filter
+- chromakey filter
+- maskedmerge filter
+- Screenpresso SPV1 decoding
+- chromaprint fingerprinting muxer
+- ffplay dynamic volume control
+- displace filter
+- selectivecolor filter
+- extensive native AAC encoder improvements and removal of experimental flag
+- ADPCM PSX decoder
+- 3dostr, dcstr, fsb, genh, vag, xvag, ads, msf, svag & vpk demuxer
+- zscale filter
+- wve demuxer
+- zero-copy Intel QSV transcoding in ffmpeg
+- shuffleframes filter
+- SDX2 DPCM decoder
+- vibrato filter
+- innoHeim/Rsupport Screen Capture Codec decoder
+- ADPCM AICA decoder
+- Interplay ACM demuxer and audio decoder
+- XMA1 & XMA2 decoder
+- realtime filter
+- anoisesrc audio filter source
+- IVR demuxer
+- compensationdelay filter
+- acompressor filter
+- support encoding 16-bit RLE SGI images
+- apulsator filter
+- sidechaingate audio filter
+- mipsdspr1 option has been renamed to mipsdsp
+- aemphasis filter
+- mips32r5 option has been removed
+- mips64r6 option has been removed
+- DXVA2-accelerated VP9 decoding
+- SOFAlizer: virtual binaural acoustics filter
+- VAAPI VP9 hwaccel
+- audio high-order multiband parametric equalizer
+- automatic bitstream filtering
+- showspectrumpic filter
+- libstagefright support removed
+- spectrumsynth filter
+- ahistogram filter
+- only seek with the right mouse button in ffplay
+- toggle full screen when double-clicking with the left mouse button in ffplay
+- afftfilt filter
+- convolution filter
+- libquvi support removed
+- support for dvaudio in wav and avi
+- libaacplus and libvo-aacenc support removed
+- Cineform HD decoder
+- new DCA decoder with full support for DTS-HD extensions
+- significant performance improvements in Windows Television (WTV) demuxer
+- nnedi deinterlacer
+- streamselect video and astreamselect audio filter
+- swaprect filter
+- metadata video and ametadata audio filter
+- SMPTE VC-2 HQ profile support for the Dirac decoder
+- SMPTE VC-2 native encoder supporting the HQ profile
+
+
+version 2.8:
+- colorkey video filter
+- BFSTM/BCSTM demuxer
+- little-endian ADPCM_THP decoder
+- Hap decoder and encoder
+- DirectDraw Surface image/texture decoder
+- ssim filter
+- optional new ASF demuxer
+- showvolume filter
+- Many improvements to the JPEG 2000 decoder
+- Go2Meeting decoding support
+- adrawgraph audio and drawgraph video filter
+- removegrain video filter
+- Intel QSV-accelerated MPEG-2 video and HEVC encoding
+- Intel QSV-accelerated MPEG-2 video and HEVC decoding
+- Intel QSV-accelerated VC-1 video decoding
+- libkvazaar HEVC encoder
+- erosion, dilation, deflate and inflate video filters
+- Dynamic Audio Normalizer as dynaudnorm filter
+- Reverse video and areverse audio filter
+- Random filter
+- deband filter
+- AAC fixed-point decoding
+- sidechaincompress audio filter
+- bitstream filter for converting HEVC from MP4 to Annex B
+- acrossfade audio filter
+- allyuv and allrgb video sources
+- atadenoise video filter
+- OS X VideoToolbox support
+- aphasemeter filter
+- showfreqs filter
+- vectorscope filter
+- waveform filter
+- hstack and vstack filter
+- Support DNx100 (1440x1080@8)
+- VAAPI hevc hwaccel
+- VDPAU hevc hwaccel
+- framerate filter
+- Switched default encoders for webm to VP9 and Opus
+- Removed experimental flag from the JPEG 2000 encoder
 
 
 version 2.7:
diff --git a/LICENSE.md b/LICENSE.md
index 545d3668..0c53d0f5 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -16,6 +16,7 @@ Specifically, the GPL parts of FFmpeg are:
 - optional x86 optimizations in the files
   - `libavcodec/x86/flac_dsp_gpl.asm`
   - `libavcodec/x86/idct_mmx.c`
+  - `libavfilter/x86/vf_removegrain.asm`
 - libutvideo encoding/decoding wrappers in
   `libavcodec/libutvideo*.cpp`
 - the X11 grabber in `libavdevice/x11grab.c`
@@ -84,6 +85,7 @@ compatible libraries
 The following libraries are under GPL:
 - frei0r
 - libcdio
+- librubberband
 - libutvideo
 - libvidstab
 - libx264
@@ -102,7 +104,7 @@ license version needs to be upgraded by passing `--enable-version3` to configure
 incompatible libraries
 ----------------------
 
-The Fraunhofer AAC library, FAAC and aacplus are under licenses which
+The Fraunhofer AAC library and FAAC are under licenses which
 are incompatible with the GPLv2 and v3. We do not know for certain if their
 licenses are compatible with the LGPL.
 If you wish to enable these libraries, pass `--enable-nonfree` to configure.
diff --git a/MAINTAINERS b/MAINTAINERS
index 795e9047..0705a699 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14,7 +14,6 @@ patches and related discussions.
 Project Leader
 ==============
 
-Michael Niedermayer
   final design decisions
 
 
@@ -43,7 +42,7 @@ QuickTime faststart:
 Miscellaneous Areas
 ===================
 
-documentation                           Stefano Sabatini, Mike Melanson, Timothy Gu
+documentation                           Stefano Sabatini, Mike Melanson, Timothy Gu, Lou Logan
 build system (configure, makefiles)     Diego Biurrun, Mans Rullgard
 project server                          Árpád Gereöffy, Michael Niedermayer, Reimar Doeffinger, Alexander Strasser, Lou Logan
 presets                                 Robert Swain
@@ -72,6 +71,7 @@ Internal Interfaces:
   libavutil/common.h                    Michael Niedermayer
 
 Other:
+  aes_ctr.c, aes_ctr.h                  Eran Kornblau
   bprint                                Nicolas George
   bswap.h
   des                                   Reimar Doeffinger
@@ -138,6 +138,7 @@ Codecs:
   4xm.c                                 Michael Niedermayer
   8bps.c                                Roberto Togni
   8svx.c                                Jaikrishnan Menon
+  aacenc*, aaccoder.c                   Rostislav Pehlivanov
   aasc.c                                Kostya Shishkov
   ac3*                                  Justin Ruggles
   alacenc.c                             Jaikrishnan Menon
@@ -164,6 +165,7 @@ Codecs:
   crystalhd.c                           Philip Langdale
   cscd.c                                Reimar Doeffinger
   dca.c                                 Kostya Shishkov, Benjamin Larsson
+  dirac*                                Rostislav Pehlivanov
   dnxhd*                                Baptiste Coudurier
   dpcm.c                                Mike Melanson
   dss_sp.c                              Oleksij Rempel, Michael Niedermayer
@@ -171,6 +173,7 @@ Codecs:
   dvbsubdec.c                           Anshul Maheshwari
   dxa.c                                 Kostya Shishkov
   eacmv*, eaidct*, eat*                 Peter Ross
+  evrc*                                 Paul B Mahol
   exif.c, exif.h                        Thilo Borgmann
   ffv1*                                 Michael Niedermayer
   ffwavesynth.c                         Nicolas George
@@ -184,6 +187,7 @@ Codecs:
   h261*                                 Michael Niedermayer
   h263*                                 Michael Niedermayer
   h264*                                 Loren Merritt, Michael Niedermayer
+  hap*                                  Tom Butterworth
   huffyuv*                              Michael Niedermayer, Christophe Gisquet
   idcinvideo.c                          Mike Melanson
   imc*                                  Benjamin Larsson
@@ -200,12 +204,13 @@ Codecs:
   libcelt_dec.c                         Nicolas George
   libdirac*                             David Conrad
   libgsm.c                              Michel Bardiaux
+  libkvazaar.c                          Arttu Ylä-Outinen
   libopenjpeg.c                         Jaikrishnan Menon
   libopenjpegenc.c                      Michael Bradshaw
   libschroedinger*                      David Conrad
   libspeexdec.c                         Justin Ruggles
   libtheoraenc.c                        David Conrad
-  libutvideo*                           Derek Buitenhuis
+  libutvideo*                           Carl Eugen Hoyos
   libvorbis.c                           David Conrad
   libvpx*                               James Zern
   libx264.c                             Mans Rullgard, Jason Garrett-Glaser
@@ -237,6 +242,7 @@ Codecs:
   qdm2.c, qdm2data.h                    Roberto Togni, Benjamin Larsson
   qdrw.c                                Kostya Shishkov
   qpeg.c                                Kostya Shishkov
+  qsv*                                  Ivan Uskov
   qtrle.c                               Mike Melanson
   ra144.c, ra144.h, ra288.c, ra288.h    Roberto Togni
   resample2.c                           Michael Niedermayer
@@ -270,6 +276,7 @@ Codecs:
   vb.c                                  Kostya Shishkov
   vble.c                                Derek Buitenhuis
   vc1*                                  Kostya Shishkov, Christophe Gisquet
+  vc2*                                  Rostislav Pehlivanov
   vcr1.c                                Michael Niedermayer
   vda_h264_dec.c                        Xidorn Quan
   vima.c                                Paul B Mahol
@@ -298,11 +305,11 @@ Codecs:
 
 Hardware acceleration:
   crystalhd.c                           Philip Langdale
-  dxva2*                                Laurent Aimar
-  libstagefright.cpp                    Mohamed Naufal
+  dxva2*                                Hendrik Leppkes, Laurent Aimar
   vaapi*                                Gwenole Beauchesne
   vda*                                  Sebastien Zwickert
-  vdpau*                                Carl Eugen Hoyos
+  vdpau*                                Philip Langdale, Carl Eugen Hoyos
+  videotoolbox*                         Sebastien Zwickert
 
 
 libavdevice
@@ -334,6 +341,7 @@ Generic parts:
   graphdump.c                           Nicolas George
 
 Filters:
+  f_drawgraph.c                         Paul B Mahol
   af_adelay.c                           Paul B Mahol
   af_aecho.c                            Paul B Mahol
   af_afade.c                            Paul B Mahol
@@ -341,19 +349,26 @@ Filters:
   af_aphaser.c                          Paul B Mahol
   af_aresample.c                        Michael Niedermayer
   af_astats.c                           Paul B Mahol
-  af_astreamsync.c                      Nicolas George
   af_atempo.c                           Pavel Koshevoy
   af_biquads.c                          Paul B Mahol
+  af_chorus.c                           Paul B Mahol
   af_compand.c                          Paul B Mahol
   af_ladspa.c                           Paul B Mahol
   af_pan.c                              Nicolas George
+  af_sidechaincompress.c                Paul B Mahol
   af_silenceremove.c                    Paul B Mahol
+  avf_aphasemeter.c                     Paul B Mahol
   avf_avectorscope.c                    Paul B Mahol
   avf_showcqt.c                         Muhammad Faiz
   vf_blend.c                            Paul B Mahol
+  vf_chromakey.c                        Timo Rothenpieler
+  vf_colorchannelmixer.c                Paul B Mahol
   vf_colorbalance.c                     Paul B Mahol
+  vf_colorkey.c                         Timo Rothenpieler
+  vf_colorlevels.c                      Paul B Mahol
+  vf_deband.c                           Paul B Mahol
   vf_dejudder.c                         Nicholas Robbins
-  vf_delogo.c                           Jean Delvare (CC <khali@linux-fr.org>)
+  vf_delogo.c                           Jean Delvare (CC <jdelvare@suse.com>)
   vf_drawbox.c/drawgrid                 Andrey Utkin
   vf_extractplanes.c                    Paul B Mahol
   vf_histogram.c                        Paul B Mahol
@@ -362,12 +377,16 @@ Filters:
   vf_il.c                               Paul B Mahol
   vf_lenscorrection.c                   Daniel Oberhoff
   vf_mergeplanes.c                      Paul B Mahol
+  vf_neighbor.c                         Paul B Mahol
   vf_psnr.c                             Paul B Mahol
+  vf_random.c                           Paul B Mahol
   vf_scale.c                            Michael Niedermayer
   vf_separatefields.c                   Paul B Mahol
+  vf_ssim.c                             Paul B Mahol
   vf_stereo3d.c                         Paul B Mahol
   vf_telecine.c                         Paul B Mahol
   vf_yadif.c                            Michael Niedermayer
+  vf_zoompan.c                          Paul B Mahol
 
 Sources:
   vsrc_mandelbrot.c                     Michael Niedermayer
@@ -384,6 +403,7 @@ Generic parts:
 
 Muxers/Demuxers:
   4xm.c                                 Mike Melanson
+  aadec.c                               Vesselin Bontchev (vesselin.bontchev at yandex dot com)
   adtsenc.c                             Robert Swain
   afc.c                                 Paul B Mahol
   aiffdec.c                             Baptiste Coudurier, Matthieu Bouron
@@ -415,6 +435,7 @@ Muxers/Demuxers:
   gxf.c                                 Reimar Doeffinger
   gxfenc.c                              Baptiste Coudurier
   hls.c                                 Anssi Hannula
+  hls encryption (hlsenc.c)             Christian Suloway
   idcin.c                               Mike Melanson
   idroqdec.c                            Mike Melanson
   iff.c                                 Jaikrishnan Menon
@@ -439,6 +460,7 @@ Muxers/Demuxers:
   mm.c                                  Peter Ross
   mov.c                                 Michael Niedermayer, Baptiste Coudurier
   movenc.c                              Baptiste Coudurier, Matthieu Bouron
+  movenccenc.c                          Eran Kornblau
   mpc.c                                 Kostya Shishkov
   mpeg.c                                Michael Niedermayer
   mpegenc.c                             Michael Niedermayer
@@ -455,6 +477,7 @@ Muxers/Demuxers:
   oggdec.c, oggdec.h                    David Conrad
   oggenc.c                              Baptiste Coudurier
   oggparse*.c                           David Conrad
+  oggparsedaala*                        Rostislav Pehlivanov
   oma.c                                 Maxim Poliakovski
   paf.c                                 Paul B Mahol
   psxstr.c                              Mike Melanson
@@ -500,6 +523,7 @@ Muxers/Demuxers:
   wvenc.c                               Paul B Mahol
 
 Protocols:
+  async.c                               Zhang Rui
   bluray.c                              Petri Hintukainen
   ftp.c                                 Lukasz Marek
   http.c                                Ronald S. Bultje
@@ -535,21 +559,22 @@ Amiga / PowerPC                         Colin Ward
 Linux / PowerPC                         Luca Barbato
 Windows MinGW                           Alex Beregszaszi, Ramiro Polla
 Windows Cygwin                          Victor Paesa
-Windows MSVC                            Matthew Oliver
+Windows MSVC                            Matthew Oliver, Hendrik Leppkes
 Windows ICL                             Matthew Oliver
 ADI/Blackfin DSP                        Marc Hoffman
 Sparc                                   Roman Shaposhnik
 x86                                     Michael Niedermayer
+OS/2                                    KO Myung-Hun
 
 
 Releases
 ========
 
+2.8                                     Michael Niedermayer
 2.7                                     Michael Niedermayer
 2.6                                     Michael Niedermayer
 2.5                                     Michael Niedermayer
 2.4                                     Michael Niedermayer
-2.2                                     Michael Niedermayer
 
 If you want to maintain an older release, please contact us
 
@@ -569,6 +594,7 @@ Clément Bœsch                 52D0 3A82 D445 F194 DB8B 2B16 87EE 2CB8 F4B8 FCF
 Daniel Verkamp                78A6 07ED 782C 653E C628 B8B9 F0EB 8DD8 2F0E 21C7
 Diego Biurrun                 8227 1E31 B6D9 4994 7427 E220 9CAE D6CC 4757 FCC5
 FFmpeg release signing key    FCF9 86EA 15E6 E293 A564 4F10 B432 2F04 D676 58D8
+Ganesh Ajjanagadde            C96A 848E 97C3 CEA2 AB72 5CE4 45F9 6A2D 3C36 FB1B
 Gwenole Beauchesne            2E63 B3A6 3E44 37E2 017D 2704 53C7 6266 B153 99C4
 Jaikrishnan Menon             61A1 F09F 01C9 2D45 78E1 C862 25DC 8831 AF70 D368
 Jean Delvare                  7CA6 9F44 60F1 BDC4 1FD2 C858 A552 6B9B B3CD 4E6A
@@ -580,6 +606,7 @@ Michael Niedermayer           9FF2 128B 147E F673 0BAD F133 611E C787 040B 0FAB
 Nicolas George                24CE 01CE 9ACC 5CEB 74D8 8D9D B063 D997 36E5 4C93
 Panagiotis Issaris            6571 13A3 33D9 3726 F728 AA98 F643 B12E ECF3 E029
 Peter Ross                    A907 E02F A6E5 0CD2 34CD 20D2 6760 79C5 AC40 DD6B
+Philip Langdale               5DC5 8D66 5FBA 3A43 18EC 045E F8D6 B194 6A75 682E
 Reimar Doeffinger             C61D 16E5 9E2C D10C 8958 38A4 0899 A2B9 06D4 D9C7
 Reinhard Tartler              9300 5DC2 7E87 6C37 ED7B CA9A 9808 3544 9453 48A4
 Reynaldo H. Verdejo Pinochet  6E27 CD34 170C C78E 4D4F 5F40 C18E 077F 3114 452A
diff --git a/Makefile b/Makefile
index fd59628a..87a98696 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,7 @@ include config.mak
 vpath %.c    $(SRC_PATH)
 vpath %.cpp  $(SRC_PATH)
 vpath %.h    $(SRC_PATH)
+vpath %.inc  $(SRC_PATH)
 vpath %.m    $(SRC_PATH)
 vpath %.S    $(SRC_PATH)
 vpath %.asm  $(SRC_PATH)
@@ -31,7 +32,11 @@ $(foreach prog,$(AVBASENAMES),$(eval OBJS-$(prog)-$(CONFIG_OPENCL) += cmdutils_o
 OBJS-ffmpeg                   += ffmpeg_opt.o ffmpeg_filter.o
 OBJS-ffmpeg-$(HAVE_VDPAU_X11) += ffmpeg_vdpau.o
 OBJS-ffmpeg-$(HAVE_DXVA2_LIB) += ffmpeg_dxva2.o
-OBJS-ffmpeg-$(CONFIG_VDA)     += ffmpeg_vda.o
+ifndef CONFIG_VIDEOTOOLBOX
+OBJS-ffmpeg-$(CONFIG_VDA)     += ffmpeg_videotoolbox.o
+endif
+OBJS-ffmpeg-$(CONFIG_VIDEOTOOLBOX) += ffmpeg_videotoolbox.o
+OBJS-ffmpeg-$(CONFIG_LIBMFX)  += ffmpeg_qsv.o
 OBJS-ffserver                 += ffserver_config.o
 
 TESTTOOLS   = audiogen videogen rotozoom tiny_psnr tiny_ssim base64
@@ -60,6 +65,7 @@ include $(SRC_PATH)/common.mak
 
 FF_EXTRALIBS := $(FFEXTRALIBS)
 FF_DEP_LIBS  := $(DEP_LIBS)
+FF_STATIC_DEP_LIBS := $(STATIC_DEP_LIBS)
 
 all: $(AVPROGS)
 
@@ -80,8 +86,8 @@ SUBDIR_VARS := CLEANFILES EXAMPLES FFLIBS HOSTPROGS TESTPROGS TOOLS      \
                HEADERS ARCH_HEADERS BUILT_HEADERS SKIPHEADERS            \
                ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS     \
                ALTIVEC-OBJS MMX-OBJS YASM-OBJS                           \
-               MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSPR1-OBJS MSA-OBJS       \
-               LOONGSON3-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS
+               MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS         \
+               MMI-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS
 
 define RESET
 $(1) :=
@@ -171,11 +177,15 @@ clean::
 	$(RM) $(CLEANSUFFIXES)
 	$(RM) $(CLEANSUFFIXES:%=tools/%)
 	$(RM) -r coverage-html
-	$(RM) -rf coverage.info lcov
+	$(RM) -rf coverage.info coverage.info.in lcov
 
 distclean::
 	$(RM) $(DISTCLEANSUFFIXES)
-	$(RM) config.* .config libavutil/avconfig.h .version version.h libavutil/ffversion.h libavcodec/codec_names.h
+	$(RM) config.* .config libavutil/avconfig.h .version avversion.h version.h libavutil/ffversion.h libavcodec/codec_names.h
+ifeq ($(SRC_LINK),src)
+	$(RM) src
+endif
+	$(RM) -rf doc/examples/pc-uninstalled
 
 config:
 	$(SRC_PATH)/configure $(value FFMPEG_CONFIGURATION)
diff --git a/README.md b/README.md
index 58e1eff0..24191919 100644
--- a/README.md
+++ b/README.md
@@ -16,12 +16,12 @@ such as audio, video, subtitles and related metadata.
 
 ## Tools
 
-* [ffmpeg](http://ffmpeg.org/ffmpeg.html) is a command line toolbox to
+* [ffmpeg](https://ffmpeg.org/ffmpeg.html) is a command line toolbox to
   manipulate, convert and stream multimedia content.
-* [ffplay](http://ffmpeg.org/ffplay.html) is a minimalistic multimedia player.
-* [ffprobe](http://ffmpeg.org/ffprobe.html) is a simple analysis tool to inspect
+* [ffplay](https://ffmpeg.org/ffplay.html) is a minimalistic multimedia player.
+* [ffprobe](https://ffmpeg.org/ffprobe.html) is a simple analysis tool to inspect
   multimedia content.
-* [ffserver](http://ffmpeg.org/ffserver.html) is a multimedia streaming server
+* [ffserver](https://ffmpeg.org/ffserver.html) is a multimedia streaming server
   for live broadcasts.
 * Additional small tools such as `aviocat`, `ismindex` and `qt-faststart`.
 
@@ -29,8 +29,8 @@ such as audio, video, subtitles and related metadata.
 
 The offline documentation is available in the **doc/** directory.
 
-The online documentation is available in the main [website](http://ffmpeg.org)
-and in the [wiki](http://trac.ffmpeg.org).
+The online documentation is available in the main [website](https://ffmpeg.org)
+and in the [wiki](https://trac.ffmpeg.org).
 
 ### Examples
 
@@ -40,3 +40,10 @@ Coding examples are available in the **doc/examples** directory.
 
 FFmpeg codebase is mainly LGPL-licensed with optional components licensed under
 GPL. Please refer to the LICENSE file for detailed information.
+
+## Contributing
+
+Patches should be submitted to the ffmpeg-devel mailing list using
+`git format-patch` or `git send-email`. Github pull requests should be
+avoided because they are not part of our review process. Few developers
+follow pull requests so they will likely be ignored.
diff --git a/RELEASE b/RELEASE
index 37c2961c..b5021469 100644
--- a/RELEASE
+++ b/RELEASE
@@ -1 +1 @@
-2.7.2
+3.0.2
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 1451d33f..861dc04a 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,10 +1,10 @@
 
-              ┌─────────────────────────────────────┐
-              │ RELEASE NOTES for FFmpeg 2.7 "Nash" │
-              └─────────────────────────────────────┘
+              ┌─────────────────────────────────────────┐
+              │ RELEASE NOTES for FFmpeg 3.0 "Einstein" │
+              └─────────────────────────────────────────┘
 
-   The FFmpeg Project proudly presents FFmpeg 2.7 "Nash", about 3
-   months after the release of FFmpeg 2.6.
+   The FFmpeg Project proudly presents FFmpeg 3.0 "Einstein", about 5
+   months after the release of FFmpeg 2.8.
 
    A complete Changelog is available at the root of the project, and the
    complete Git history on http://source.ffmpeg.org.
diff --git a/android/x86/include/libavcodec/d3d11va.h b/android/x86/include/libavcodec/d3d11va.h
new file mode 100644
index 00000000..d51e2ff8
--- /dev/null
+++ b/android/x86/include/libavcodec/d3d11va.h
@@ -0,0 +1,98 @@
+/*
+ * Direct3D11 HW acceleration
+ *
+ * copyright (c) 2009 Laurent Aimar
+ * copyright (c) 2015 Steve Lhomme
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_D3D11VA_H
+#define AVCODEC_D3D11VA_H
+
+/**
+ * @file
+ * @ingroup lavc_codec_hwaccel_d3d11va
+ * Public libavcodec D3D11VA header.
+ */
+
+#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0600
+#undef _WIN32_WINNT
+#define _WIN32_WINNT 0x0600
+#endif
+
+#include <stdint.h>
+#include <d3d11.h>
+
+/**
+ * @defgroup lavc_codec_hwaccel_d3d11va Direct3D11
+ * @ingroup lavc_codec_hwaccel
+ *
+ * @{
+ */
+
+#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for Direct3D11 and old UVD/UVD+ ATI video cards
+#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO    2 ///< Work around for Direct3D11 and old Intel GPUs with ClearVideo interface
+
+/**
+ * This structure is used to provides the necessary configurations and data
+ * to the Direct3D11 FFmpeg HWAccel implementation.
+ *
+ * The application must make it available as AVCodecContext.hwaccel_context.
+ */
+struct AVD3D11VAContext {
+    /**
+     * D3D11 decoder object
+     */
+    ID3D11VideoDecoder *decoder;
+
+    /**
+      * D3D11 VideoContext
+      */
+    ID3D11VideoContext *video_context;
+
+    /**
+     * D3D11 configuration used to create the decoder
+     */
+    D3D11_VIDEO_DECODER_CONFIG *cfg;
+
+    /**
+     * The number of surface in the surface array
+     */
+    unsigned surface_count;
+
+    /**
+     * The array of Direct3D surfaces used to create the decoder
+     */
+    ID3D11VideoDecoderOutputView **surface;
+
+    /**
+     * A bit field configuring the workarounds needed for using the decoder
+     */
+    uint64_t workaround;
+
+    /**
+     * Private to the FFmpeg AVHWAccel implementation
+     */
+    unsigned report_id;
+};
+
+/**
+ * @}
+ */
+
+#endif /* AVCODEC_D3D11VA_H */
diff --git a/libavcodec/x86/dirac_dwt.h b/android/x86/include/libavcodec/qsv.h
similarity index 64%
rename from libavcodec/x86/dirac_dwt.h
rename to android/x86/include/libavcodec/qsv.h
index 126b2902..e7487c88 100644
--- a/libavcodec/x86/dirac_dwt.h
+++ b/android/x86/include/libavcodec/qsv.h
@@ -1,4 +1,6 @@
 /*
+ * Intel MediaSDK QSV public API
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -16,15 +18,24 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_X86_DIRAC_DWT_H
-#define AVCODEC_X86_DIRAC_DWT_H
+#ifndef AVCODEC_QSV_H
+#define AVCODEC_QSV_H
+
+#include <mfx/mfxvideo.h>
 
-#include "libavcodec/dirac_dwt.h"
+typedef struct AVQSVContext {
+    mfxSession session;
+    int iopattern;
 
-void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
-void ff_horizontal_compose_haar1i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
-void ff_horizontal_compose_haar0i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
+    mfxExtBuffer **ext_buffers;
+    int         nb_ext_buffers;
+} AVQSVContext;
 
-void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type);
+/**
+ * Allocate a new context.
+ *
+ * It must be freed by the caller with av_free().
+ */
+AVQSVContext *av_qsv_alloc_context(void);
 
-#endif
+#endif /* AVCODEC_QSV_H */
diff --git a/android/x86/include/libavutil/twofish.h b/android/x86/include/libavutil/twofish.h
new file mode 100644
index 00000000..813cfecd
--- /dev/null
+++ b/android/x86/include/libavutil/twofish.h
@@ -0,0 +1,70 @@
+/*
+ * An implementation of the TwoFish algorithm
+ * Copyright (c) 2015 Supraja Meedinti
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_TWOFISH_H
+#define AVUTIL_TWOFISH_H
+
+#include <stdint.h>
+
+
+/**
+  * @file
+  * @brief Public header for libavutil TWOFISH algorithm
+  * @defgroup lavu_twofish TWOFISH
+  * @ingroup lavu_crypto
+  * @{
+  */
+
+extern const int av_twofish_size;
+
+struct AVTWOFISH;
+
+/**
+  * Allocate an AVTWOFISH context
+  * To free the struct: av_free(ptr)
+  */
+struct AVTWOFISH *av_twofish_alloc(void);
+
+/**
+  * Initialize an AVTWOFISH context.
+  *
+  * @param ctx an AVTWOFISH context
+  * @param key a key of size ranging from 1 to 32 bytes used for encryption/decryption
+  * @param key_bits number of keybits: 128, 192, 256 If less than the required, padded with zeroes to nearest valid value; return value is 0 if key_bits is 128/192/256, -1 if less than 0, 1 otherwise
+ */
+int av_twofish_init(struct AVTWOFISH *ctx, const uint8_t *key, int key_bits);
+
+/**
+  * Encrypt or decrypt a buffer using a previously initialized context
+  *
+  * @param ctx an AVTWOFISH context
+  * @param dst destination array, can be equal to src
+  * @param src source array, can be equal to dst
+  * @param count number of 16 byte blocks
+  * @paran iv initialization vector for CBC mode, NULL for ECB mode
+  * @param decrypt 0 for encryption, 1 for decryption
+ */
+void av_twofish_crypt(struct AVTWOFISH *ctx, uint8_t *dst, const uint8_t *src, int count, uint8_t* iv, int decrypt);
+
+/**
+ * @}
+ */
+#endif /* AVUTIL_TWOFISH_H */
diff --git a/arch.mak b/arch.mak
index 4508c2a2..08f78b4e 100644
--- a/arch.mak
+++ b/arch.mak
@@ -5,10 +5,10 @@ OBJS-$(HAVE_VFP)     += $(VFP-OBJS)     $(VFP-OBJS-yes)
 OBJS-$(HAVE_NEON)    += $(NEON-OBJS)    $(NEON-OBJS-yes)
 
 OBJS-$(HAVE_MIPSFPU)   += $(MIPSFPU-OBJS)    $(MIPSFPU-OBJS-yes)
-OBJS-$(HAVE_MIPSDSPR1) += $(MIPSDSPR1-OBJS)  $(MIPSDSPR1-OBJS-yes)
+OBJS-$(HAVE_MIPSDSP)   += $(MIPSDSP-OBJS)    $(MIPSDSP-OBJS-yes)
 OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS)  $(MIPSDSPR2-OBJS-yes)
 OBJS-$(HAVE_MSA)       += $(MSA-OBJS)        $(MSA-OBJS-yes)
-OBJS-$(HAVE_LOONGSON3) += $(LOONGSON3-OBJS)  $(LOONGSON3-OBJS-yes)
+OBJS-$(HAVE_MMI)   += $(MMI-OBJS)   $(MMI-OBJS-yes)
 
 OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
 OBJS-$(HAVE_VSX)     += $(VSX-OBJS) $(VSX-OBJS-yes)
diff --git a/cmdutils.c b/cmdutils.c
index 6e7a0bb0..03a48362 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -52,6 +52,7 @@
 #include "libavutil/opt.h"
 #include "libavutil/cpu.h"
 #include "libavutil/ffversion.h"
+#include "libavutil/version.h"
 #include "cmdutils.h"
 #if CONFIG_NETWORK
 #include "libavformat/network.h"
@@ -63,7 +64,7 @@
 
 static int init_report(const char *env);
 
-struct SwsContext *sws_opts;
+AVDictionary *sws_dict;
 AVDictionary *swr_opts;
 AVDictionary *format_opts, *codec_opts, *resample_opts;
 
@@ -73,20 +74,13 @@ int hide_banner = 0;
 
 void init_opts(void)
 {
-
-    if(CONFIG_SWSCALE)
-        sws_opts = sws_getContext(16, 16, 0, 16, 16, 0, SWS_BICUBIC,
-                              NULL, NULL, NULL);
+    av_dict_set(&sws_dict, "flags", "bicubic", 0);
 }
 
 void uninit_opts(void)
 {
-#if CONFIG_SWSCALE
-    sws_freeContext(sws_opts);
-    sws_opts = NULL;
-#endif
-
     av_dict_free(&swr_opts);
+    av_dict_free(&sws_dict);
     av_dict_free(&format_opts);
     av_dict_free(&codec_opts);
     av_dict_free(&resample_opts);
@@ -529,7 +523,7 @@ static const AVOption *opt_find(void *obj, const char *name, const char *unit,
     return o;
 }
 
-#define FLAGS (o->type == AV_OPT_TYPE_FLAGS) ? AV_DICT_APPEND : 0
+#define FLAGS (o->type == AV_OPT_TYPE_FLAGS && (arg[0]=='-' || arg[0]=='+')) ? AV_DICT_APPEND : 0
 int opt_default(void *optctx, const char *opt, const char *arg)
 {
     const AVOption *o;
@@ -540,7 +534,12 @@ int opt_default(void *optctx, const char *opt, const char *arg)
 #if CONFIG_AVRESAMPLE
     const AVClass *rc = avresample_get_class();
 #endif
-    const AVClass *sc, *swr_class;
+#if CONFIG_SWSCALE
+    const AVClass *sc = sws_get_class();
+#endif
+#if CONFIG_SWRESAMPLE
+    const AVClass *swr_class = swr_get_class();
+#endif
 
     if (!strcmp(opt, "debug") || !strcmp(opt, "fdebug"))
         av_log_set_level(AV_LOG_DEBUG);
@@ -564,15 +563,24 @@ int opt_default(void *optctx, const char *opt, const char *arg)
         consumed = 1;
     }
 #if CONFIG_SWSCALE
-    sc = sws_get_class();
-    if (!consumed && opt_find(&sc, opt, NULL, 0,
-                         AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ)) {
-        // XXX we only support sws_flags, not arbitrary sws options
-        int ret = av_opt_set(sws_opts, opt, arg, 0);
+    if (!consumed && (o = opt_find(&sc, opt, NULL, 0,
+                         AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ))) {
+        struct SwsContext *sws = sws_alloc_context();
+        int ret = av_opt_set(sws, opt, arg, 0);
+        sws_freeContext(sws);
+        if (!strcmp(opt, "srcw") || !strcmp(opt, "srch") ||
+            !strcmp(opt, "dstw") || !strcmp(opt, "dsth") ||
+            !strcmp(opt, "src_format") || !strcmp(opt, "dst_format")) {
+            av_log(NULL, AV_LOG_ERROR, "Directly using swscale dimensions/format options is not supported, please use the -s or -pix_fmt options\n");
+            return AVERROR(EINVAL);
+        }
         if (ret < 0) {
             av_log(NULL, AV_LOG_ERROR, "Error setting option %s.\n", opt);
             return ret;
         }
+
+        av_dict_set(&sws_dict, opt, arg, FLAGS);
+
         consumed = 1;
     }
 #else
@@ -582,7 +590,6 @@ int opt_default(void *optctx, const char *opt, const char *arg)
     }
 #endif
 #if CONFIG_SWRESAMPLE
-    swr_class = swr_get_class();
     if (!consumed && (o=opt_find(&swr_class, opt, NULL, 0,
                                     AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ))) {
         struct SwrContext *swr = swr_alloc();
@@ -646,9 +653,7 @@ static void finish_group(OptionParseContext *octx, int group_idx,
     *g             = octx->cur_group;
     g->arg         = arg;
     g->group_def   = l->group_def;
-#if CONFIG_SWSCALE
-    g->sws_opts    = sws_opts;
-#endif
+    g->sws_dict    = sws_dict;
     g->swr_opts    = swr_opts;
     g->codec_opts  = codec_opts;
     g->format_opts = format_opts;
@@ -657,9 +662,7 @@ static void finish_group(OptionParseContext *octx, int group_idx,
     codec_opts  = NULL;
     format_opts = NULL;
     resample_opts = NULL;
-#if CONFIG_SWSCALE
-    sws_opts    = NULL;
-#endif
+    sws_dict    = NULL;
     swr_opts    = NULL;
     init_opts();
 
@@ -715,9 +718,8 @@ void uninit_parse_context(OptionParseContext *octx)
             av_dict_free(&l->groups[j].codec_opts);
             av_dict_free(&l->groups[j].format_opts);
             av_dict_free(&l->groups[j].resample_opts);
-#if CONFIG_SWSCALE
-            sws_freeContext(l->groups[j].sws_opts);
-#endif
+
+            av_dict_free(&l->groups[j].sws_dict);
             av_dict_free(&l->groups[j].swr_opts);
         }
         av_freep(&l->groups);
@@ -1057,7 +1059,8 @@ static int warned_cfg = 0;
                    LIB##LIBNAME##_VERSION_MAJOR,                        \
                    LIB##LIBNAME##_VERSION_MINOR,                        \
                    LIB##LIBNAME##_VERSION_MICRO,                        \
-                   version >> 16, version >> 8 & 0xff, version & 0xff); \
+                   AV_VERSION_MAJOR(version), AV_VERSION_MINOR(version),\
+                   AV_VERSION_MICRO(version));                          \
         }                                                               \
         if (flags & SHOW_CONFIG) {                                      \
             const char *cfg = libname##_configuration();                \
@@ -1076,15 +1079,15 @@ static int warned_cfg = 0;
 
 static void print_all_libs_info(int flags, int level)
 {
-    PRINT_LIB_INFO(avutil,   AVUTIL,   flags, level);
-    PRINT_LIB_INFO(avcodec,  AVCODEC,  flags, level);
-    PRINT_LIB_INFO(avformat, AVFORMAT, flags, level);
-    PRINT_LIB_INFO(avdevice, AVDEVICE, flags, level);
-    PRINT_LIB_INFO(avfilter, AVFILTER, flags, level);
+    PRINT_LIB_INFO(avutil,     AVUTIL,     flags, level);
+    PRINT_LIB_INFO(avcodec,    AVCODEC,    flags, level);
+    PRINT_LIB_INFO(avformat,   AVFORMAT,   flags, level);
+    PRINT_LIB_INFO(avdevice,   AVDEVICE,   flags, level);
+    PRINT_LIB_INFO(avfilter,   AVFILTER,   flags, level);
     PRINT_LIB_INFO(avresample, AVRESAMPLE, flags, level);
-    PRINT_LIB_INFO(swscale,  SWSCALE,  flags, level);
-    PRINT_LIB_INFO(swresample,SWRESAMPLE,  flags, level);
-    PRINT_LIB_INFO(postproc, POSTPROC, flags, level);
+    PRINT_LIB_INFO(swscale,    SWSCALE,    flags, level);
+    PRINT_LIB_INFO(swresample, SWRESAMPLE, flags, level);
+    PRINT_LIB_INFO(postproc,   POSTPROC,   flags, level);
 }
 
 static void print_program_info(int flags, int level)
@@ -1321,16 +1324,47 @@ static void print_codec(const AVCodec *c)
     printf("%s %s [%s]:\n", encoder ? "Encoder" : "Decoder", c->name,
            c->long_name ? c->long_name : "");
 
+    printf("    General capabilities: ");
+    if (c->capabilities & AV_CODEC_CAP_DRAW_HORIZ_BAND)
+        printf("horizband ");
+    if (c->capabilities & AV_CODEC_CAP_DR1)
+        printf("dr1 ");
+    if (c->capabilities & AV_CODEC_CAP_TRUNCATED)
+        printf("trunc ");
+    if (c->capabilities & AV_CODEC_CAP_DELAY)
+        printf("delay ");
+    if (c->capabilities & AV_CODEC_CAP_SMALL_LAST_FRAME)
+        printf("small ");
+    if (c->capabilities & AV_CODEC_CAP_SUBFRAMES)
+        printf("subframes ");
+    if (c->capabilities & AV_CODEC_CAP_EXPERIMENTAL)
+        printf("exp ");
+    if (c->capabilities & AV_CODEC_CAP_CHANNEL_CONF)
+        printf("chconf ");
+    if (c->capabilities & AV_CODEC_CAP_PARAM_CHANGE)
+        printf("paramchange ");
+    if (c->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
+        printf("variable ");
+    if (c->capabilities & (AV_CODEC_CAP_FRAME_THREADS |
+                           AV_CODEC_CAP_SLICE_THREADS |
+                           AV_CODEC_CAP_AUTO_THREADS))
+        printf("threads ");
+    if (!c->capabilities)
+        printf("none");
+    printf("\n");
+
     if (c->type == AVMEDIA_TYPE_VIDEO ||
         c->type == AVMEDIA_TYPE_AUDIO) {
         printf("    Threading capabilities: ");
-        switch (c->capabilities & (CODEC_CAP_FRAME_THREADS |
-                                   CODEC_CAP_SLICE_THREADS)) {
-        case CODEC_CAP_FRAME_THREADS |
-             CODEC_CAP_SLICE_THREADS: printf("frame and slice"); break;
-        case CODEC_CAP_FRAME_THREADS: printf("frame");           break;
-        case CODEC_CAP_SLICE_THREADS: printf("slice");           break;
-        default:                      printf("no");              break;
+        switch (c->capabilities & (AV_CODEC_CAP_FRAME_THREADS |
+                                   AV_CODEC_CAP_SLICE_THREADS |
+                                   AV_CODEC_CAP_AUTO_THREADS)) {
+        case AV_CODEC_CAP_FRAME_THREADS |
+             AV_CODEC_CAP_SLICE_THREADS: printf("frame and slice"); break;
+        case AV_CODEC_CAP_FRAME_THREADS: printf("frame");           break;
+        case AV_CODEC_CAP_SLICE_THREADS: printf("slice");           break;
+        case AV_CODEC_CAP_AUTO_THREADS : printf("auto");            break;
+        default:                         printf("none");            break;
         }
         printf("\n");
     }
@@ -1389,7 +1423,7 @@ static int compare_codec_desc(const void *a, const void *b)
     const AVCodecDescriptor * const *da = a;
     const AVCodecDescriptor * const *db = b;
 
-    return (*da)->type != (*db)->type ? (*da)->type - (*db)->type :
+    return (*da)->type != (*db)->type ? FFDIFFSIGN((*da)->type, (*db)->type) :
            strcmp((*da)->name, (*db)->name);
 }
 
@@ -1503,11 +1537,11 @@ static void print_codecs(int encoder)
 
         while ((codec = next_codec_for_id(desc->id, codec, encoder))) {
             printf(" %c", get_media_type_char(desc->type));
-            printf((codec->capabilities & CODEC_CAP_FRAME_THREADS) ? "F" : ".");
-            printf((codec->capabilities & CODEC_CAP_SLICE_THREADS) ? "S" : ".");
-            printf((codec->capabilities & CODEC_CAP_EXPERIMENTAL)  ? "X" : ".");
-            printf((codec->capabilities & CODEC_CAP_DRAW_HORIZ_BAND)?"B" : ".");
-            printf((codec->capabilities & CODEC_CAP_DR1)           ? "D" : ".");
+            printf((codec->capabilities & AV_CODEC_CAP_FRAME_THREADS) ? "F" : ".");
+            printf((codec->capabilities & AV_CODEC_CAP_SLICE_THREADS) ? "S" : ".");
+            printf((codec->capabilities & AV_CODEC_CAP_EXPERIMENTAL)  ? "X" : ".");
+            printf((codec->capabilities & AV_CODEC_CAP_DRAW_HORIZ_BAND)?"B" : ".");
+            printf((codec->capabilities & AV_CODEC_CAP_DR1)           ? "D" : ".");
 
             printf(" %-20s %s", codec->name, codec->long_name ? codec->long_name : "");
             if (strcmp(codec->name, desc->name))
@@ -1581,17 +1615,17 @@ int show_filters(void *optctx, const char *opt, const char *arg)
                 *(descr_cur++) = '>';
             }
             pad = i ? filter->outputs : filter->inputs;
-            for (j = 0; pad && pad[j].name; j++) {
+            for (j = 0; pad && avfilter_pad_get_name(pad, j); j++) {
                 if (descr_cur >= descr + sizeof(descr) - 4)
                     break;
-                *(descr_cur++) = get_media_type_char(pad[j].type);
+                *(descr_cur++) = get_media_type_char(avfilter_pad_get_type(pad, j));
             }
             if (!j)
                 *(descr_cur++) = ((!i && (filter->flags & AVFILTER_FLAG_DYNAMIC_INPUTS)) ||
                                   ( i && (filter->flags & AVFILTER_FLAG_DYNAMIC_OUTPUTS))) ? 'N' : '|';
         }
         *descr_cur = 0;
-        printf(" %c%c%c %-16s %-10s %s\n",
+        printf(" %c%c%c %-17s %-10s %s\n",
                filter->flags & AVFILTER_FLAG_SUPPORT_TIMELINE ? 'T' : '.',
                filter->flags & AVFILTER_FLAG_SLICE_THREADS    ? 'S' : '.',
                filter->process_command                        ? 'C' : '.',
@@ -1875,64 +1909,6 @@ int read_yesno(void)
     return yesno;
 }
 
-int cmdutils_read_file(const char *filename, char **bufptr, size_t *size)
-{
-    int64_t ret;
-    FILE *f = av_fopen_utf8(filename, "rb");
-
-    if (!f) {
-        ret = AVERROR(errno);
-        av_log(NULL, AV_LOG_ERROR, "Cannot read file '%s': %s\n", filename,
-               strerror(errno));
-        return ret;
-    }
-
-    ret = fseek(f, 0, SEEK_END);
-    if (ret == -1) {
-        ret = AVERROR(errno);
-        goto out;
-    }
-
-    ret = ftell(f);
-    if (ret < 0) {
-        ret = AVERROR(errno);
-        goto out;
-    }
-    *size = ret;
-
-    ret = fseek(f, 0, SEEK_SET);
-    if (ret == -1) {
-        ret = AVERROR(errno);
-        goto out;
-    }
-
-    *bufptr = av_malloc(*size + 1);
-    if (!*bufptr) {
-        av_log(NULL, AV_LOG_ERROR, "Could not allocate file buffer\n");
-        ret = AVERROR(ENOMEM);
-        goto out;
-    }
-    ret = fread(*bufptr, 1, *size, f);
-    if (ret < *size) {
-        av_free(*bufptr);
-        if (ferror(f)) {
-            ret = AVERROR(errno);
-            av_log(NULL, AV_LOG_ERROR, "Error while reading file '%s': %s\n",
-                   filename, strerror(errno));
-        } else
-            ret = AVERROR_EOF;
-    } else {
-        ret = 0;
-        (*bufptr)[(*size)++] = '\0';
-    }
-
-out:
-    if (ret < 0)
-        av_log(NULL, AV_LOG_ERROR, "IO error: %s\n", av_err2str(ret));
-    fclose(f);
-    return ret;
-}
-
 FILE *get_preset_file(char *filename, size_t filename_size,
                       const char *preset_name, int is_path,
                       const char *codec_name)
@@ -2106,7 +2082,10 @@ double get_rotation(AVStream *st)
     theta -= 360*floor(theta/360 + 0.9/360);
 
     if (fabs(theta - 90*round(theta/90)) > 2)
-        av_log_ask_for_sample(NULL, "Odd rotation angle\n");
+        av_log(NULL, AV_LOG_WARNING, "Odd rotation angle.\n"
+               "If you want to help, upload a sample "
+               "of this file to ftp://upload.ffmpeg.org/incoming/ "
+               "and contact the ffmpeg-devel mailing list. (ffmpeg-devel@ffmpeg.org)");
 
     return theta;
 }
diff --git a/cmdutils.h b/cmdutils.h
index a21ce35f..83ea4ad3 100644
--- a/cmdutils.h
+++ b/cmdutils.h
@@ -19,8 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef FFMPEG_CMDUTILS_H
-#define FFMPEG_CMDUTILS_H
+#ifndef CMDUTILS_H
+#define CMDUTILS_H
 
 #include <stdint.h>
 
@@ -46,7 +46,7 @@ extern const int program_birth_year;
 
 extern AVCodecContext *avcodec_opts[AVMEDIA_TYPE_NB];
 extern AVFormatContext *avformat_opts;
-extern struct SwsContext *sws_opts;
+extern AVDictionary *sws_dict;
 extern AVDictionary *swr_opts;
 extern AVDictionary *format_opts, *codec_opts, *resample_opts;
 extern int hide_banner;
@@ -277,7 +277,7 @@ typedef struct OptionGroup {
     AVDictionary *codec_opts;
     AVDictionary *format_opts;
     AVDictionary *resample_opts;
-    struct SwsContext *sws_opts;
+    AVDictionary *sws_dict;
     AVDictionary *swr_opts;
 } OptionGroup;
 
@@ -529,18 +529,6 @@ int show_colors(void *optctx, const char *opt, const char *arg);
  */
 int read_yesno(void);
 
-/**
- * Read the file with name filename, and put its content in a newly
- * allocated 0-terminated buffer.
- *
- * @param filename file to read from
- * @param bufptr location where pointer to buffer is returned
- * @param size   location where size of buffer is returned
- * @return >= 0 in case of success, a negative value corresponding to an
- * AVERROR error code in case of failure.
- */
-int cmdutils_read_file(const char *filename, char **bufptr, size_t *size);
-
 /**
  * Get a file corresponding to a preset file.
  *
diff --git a/cmdutils_opencl.c b/cmdutils_opencl.c
index 61478e27..dd21344a 100644
--- a/cmdutils_opencl.c
+++ b/cmdutils_opencl.c
@@ -206,7 +206,9 @@ static int64_t run_opencl_bench(AVOpenCLExternalEnv *ext_opencl_env)
 
 static int compare_ocl_device_desc(const void *a, const void *b)
 {
-    return ((OpenCLDeviceBenchmark*)a)->runtime - ((OpenCLDeviceBenchmark*)b)->runtime;
+    const OpenCLDeviceBenchmark* va = (const OpenCLDeviceBenchmark*)a;
+    const OpenCLDeviceBenchmark* vb = (const OpenCLDeviceBenchmark*)b;
+    return FFDIFFSIGN(va->runtime , vb->runtime);
 }
 
 int opt_opencl_bench(void *optctx, const char *opt, const char *arg)
diff --git a/common.mak b/common.mak
index eac8bd99..03b51c59 100644
--- a/common.mak
+++ b/common.mak
@@ -18,7 +18,7 @@ ifndef SUBDIR
 ifndef V
 Q      = @
 ECHO   = printf "$(1)\t%s\n" $(2)
-BRIEF  = CC CXX HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES
+BRIEF  = CC CXX OBJCC HOSTCC HOSTLD AS YASM AR LD STRIP CP WINDRES
 SILENT = DEPCC DEPHOSTCC DEPAS DEPYASM RANLIB RM
 
 MSG    = $@
@@ -32,10 +32,12 @@ endif
 ALLFFLIBS = avcodec avdevice avfilter avformat avresample avutil postproc swscale swresample
 
 # NASM requires -I path terminated with /
-IFLAGS     := -I. -I$(SRC_PATH)/
+IFLAGS     := -I. -I$(SRC_LINK)/
 CPPFLAGS   := $(IFLAGS) $(CPPFLAGS)
 CFLAGS     += $(ECFLAGS)
 CCFLAGS     = $(CPPFLAGS) $(CFLAGS)
+OBJCFLAGS  += $(EOBJCFLAGS)
+OBJCCFLAGS  = $(CPPFLAGS) $(CFLAGS) $(OBJCFLAGS)
 ASFLAGS    := $(CPPFLAGS) $(ASFLAGS)
 CXXFLAGS   += $(CPPFLAGS) $(CFLAGS)
 YASMFLAGS  += $(IFLAGS:%=%/) -Pconfig.asm
@@ -45,12 +47,13 @@ LDFLAGS    := $(ALLFFLIBS:%=$(LD_PATH)lib%) $(LDFLAGS)
 
 define COMPILE
        $(call $(1)DEP,$(1))
-       $($(1)) $($(1)FLAGS) $($(1)_DEPFLAGS) $($(1)_C) $($(1)_O) $<
+       $($(1)) $($(1)FLAGS) $($(1)_DEPFLAGS) $($(1)_C) $($(1)_O) $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<)
 endef
 
 COMPILE_C = $(call COMPILE,CC)
 COMPILE_CXX = $(call COMPILE,CXX)
 COMPILE_S = $(call COMPILE,AS)
+COMPILE_M = $(call COMPILE,OBJCC)
 COMPILE_HOSTC = $(call COMPILE,HOSTCC)
 
 %.o: %.c
@@ -60,10 +63,10 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC)
 	$(COMPILE_CXX)
 
 %.o: %.m
-	$(COMPILE_C)
+	$(COMPILE_M)
 
 %.s: %.c
-	$(CC) $(CPPFLAGS) $(CFLAGS) -S -o $@ $<
+	$(CC) $(CCFLAGS) -S -o $@ $<
 
 %.o: %.S
 	$(COMPILE_S)
@@ -81,7 +84,9 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC)
 	$(Q)echo '#include "$*.h"' >$@
 
 %.ver: %.v
-	$(Q)sed 's/$$MAJOR/$($(basename $(@F))_VERSION_MAJOR)/' $^ > $@
+	$(Q)sed 's/$$MAJOR/$($(basename $(@F))_VERSION_MAJOR)/' $^ | sed -e 's/:/:\
+/' -e 's/; /;\
+/g' > $@
 
 %.c %.h: TAG = GEN
 
@@ -118,8 +123,9 @@ TOOLOBJS  := $(TOOLS:%=tools/%.o)
 TOOLS     := $(TOOLS:%=tools/%$(EXESUF))
 HEADERS   += $(HEADERS-yes)
 
-PATH_LIBNAME = $(foreach NAME,$(1),lib$(NAME)/$($(CONFIG_SHARED:yes=S)LIBNAME))
-DEP_LIBS := $(foreach lib,$(FFLIBS),$(call PATH_LIBNAME,$(lib)))
+PATH_LIBNAME = $(foreach NAME,$(1),lib$(NAME)/$($(2)LIBNAME))
+DEP_LIBS := $(foreach lib,$(FFLIBS),$(call PATH_LIBNAME,$(lib),$(CONFIG_SHARED:yes=S)))
+STATIC_DEP_LIBS := $(foreach lib,$(FFLIBS),$(call PATH_LIBNAME,$(lib)))
 
 SRC_DIR    := $(SRC_PATH)/lib$(NAME)
 ALLHEADERS := $(subst $(SRC_DIR)/,$(SUBDIR),$(wildcard $(SRC_DIR)/*.h $(SRC_DIR)/$(ARCH)/*.h))
@@ -146,7 +152,7 @@ $(TOOLOBJS): | tools
 
 OBJDIRS := $(OBJDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SLIBOBJS) $(TESTOBJS))
 
-CLEANSUFFIXES     = *.d *.o *~ *.h.c *.map *.ver *.ho *.gcno *.gcda *$(DEFAULT_YASMD).asm
+CLEANSUFFIXES     = *.d *.o *~ *.h.c *.map *.ver *.ver-sol2 *.ho *.gcno *.gcda *$(DEFAULT_YASMD).asm
 DISTCLEANSUFFIXES = *.pc
 LIBSUFFIXES       = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
 
diff --git a/compat/aix/math.h b/compat/aix/math.h
index 65a89c45..dee13c8d 100644
--- a/compat/aix/math.h
+++ b/compat/aix/math.h
@@ -19,8 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef FFMPEG_COMPAT_AIX_MATH_H
-#define FFMPEG_COMPAT_AIX_MATH_H
+#ifndef COMPAT_AIX_MATH_H
+#define COMPAT_AIX_MATH_H
 
 #define class class_in_math_h_causes_problems
 
@@ -28,4 +28,4 @@
 
 #undef class
 
-#endif /* FFMPEG_COMPAT_AIX_MATH_H */
+#endif /* COMPAT_AIX_MATH_H */
diff --git a/compat/msvcrt/snprintf.h b/compat/msvcrt/snprintf.h
index f02113c5..cd47953e 100644
--- a/compat/msvcrt/snprintf.h
+++ b/compat/msvcrt/snprintf.h
@@ -19,8 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef COMPAT_SNPRINTF_H
-#define COMPAT_SNPRINTF_H
+#ifndef COMPAT_MSVCRT_SNPRINTF_H
+#define COMPAT_MSVCRT_SNPRINTF_H
 
 #include <stdarg.h>
 #include <stdio.h>
@@ -35,4 +35,4 @@ int avpriv_vsnprintf(char *s, size_t n, const char *fmt, va_list ap);
 #define _snprintf avpriv_snprintf
 #define vsnprintf avpriv_vsnprintf
 
-#endif /* COMPAT_SNPRINTF_H */
+#endif /* COMPAT_MSVCRT_SNPRINTF_H */
diff --git a/compat/os2threads.h b/compat/os2threads.h
index 5b6ca557..40a119ff 100644
--- a/compat/os2threads.h
+++ b/compat/os2threads.h
@@ -23,8 +23,8 @@
  * os2threads to pthreads wrapper
  */
 
-#ifndef AVCODEC_OS2PTHREADS_H
-#define AVCODEC_OS2PTHREADS_H
+#ifndef COMPAT_OS2THREADS_H
+#define COMPAT_OS2THREADS_H
 
 #define INCL_DOS
 #include <os2.h>
@@ -32,59 +32,71 @@
 #undef __STRICT_ANSI__          /* for _beginthread() */
 #include <stdlib.h>
 
-#include "libavutil/mem.h"
+#include <sys/builtin.h>
+#include <sys/fmutex.h>
+
+#include "libavutil/attributes.h"
+
+typedef struct {
+    TID tid;
+    void *(*start_routine)(void *);
+    void *arg;
+    void *result;
+} pthread_t;
 
-typedef TID  pthread_t;
 typedef void pthread_attr_t;
 
 typedef HMTX pthread_mutex_t;
 typedef void pthread_mutexattr_t;
 
 typedef struct {
-    HEV  event_sem;
-    int  wait_count;
+    HEV event_sem;
+    HEV ack_sem;
+    volatile unsigned  wait_count;
 } pthread_cond_t;
 
 typedef void pthread_condattr_t;
 
-struct thread_arg {
-    void *(*start_routine)(void *);
-    void *arg;
-};
+typedef struct {
+    volatile int done;
+    _fmutex mtx;
+} pthread_once_t;
+
+#define PTHREAD_ONCE_INIT {0, _FMUTEX_INITIALIZER}
 
 static void thread_entry(void *arg)
 {
-    struct thread_arg *thread_arg = arg;
+    pthread_t *thread = arg;
 
-    thread_arg->start_routine(thread_arg->arg);
-
-    av_free(thread_arg);
+    thread->result = thread->start_routine(thread->arg);
 }
 
-static av_always_inline int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void*), void *arg)
+static av_always_inline int pthread_create(pthread_t *thread,
+                                           const pthread_attr_t *attr,
+                                           void *(*start_routine)(void*),
+                                           void *arg)
 {
-    struct thread_arg *thread_arg;
-
-    thread_arg = av_mallocz(sizeof(struct thread_arg));
-    if (!thread_arg)
-        return ENOMEM;
+    thread->start_routine = start_routine;
+    thread->arg = arg;
+    thread->result = NULL;
 
-    thread_arg->start_routine = start_routine;
-    thread_arg->arg = arg;
-
-    *thread = _beginthread(thread_entry, NULL, 256 * 1024, thread_arg);
+    thread->tid = _beginthread(thread_entry, NULL, 1024 * 1024, thread);
 
     return 0;
 }
 
 static av_always_inline int pthread_join(pthread_t thread, void **value_ptr)
 {
-    DosWaitThread((PTID)&thread, DCWW_WAIT);
+    DosWaitThread(&thread.tid, DCWW_WAIT);
+
+    if (value_ptr)
+        *value_ptr = thread.result;
 
     return 0;
 }
 
-static av_always_inline int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr)
+static av_always_inline int pthread_mutex_init(pthread_mutex_t *mutex,
+                                               const pthread_mutexattr_t *attr)
 {
     DosCreateMutexSem(NULL, (PHMTX)mutex, 0, FALSE);
 
@@ -112,9 +124,11 @@ static av_always_inline int pthread_mutex_unlock(pthread_mutex_t *mutex)
     return 0;
 }
 
-static av_always_inline int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
+static av_always_inline int pthread_cond_init(pthread_cond_t *cond,
+                                              const pthread_condattr_t *attr)
 {
     DosCreateEventSem(NULL, &cond->event_sem, DCE_POSTONE, FALSE);
+    DosCreateEventSem(NULL, &cond->ack_sem, DCE_POSTONE, FALSE);
 
     cond->wait_count = 0;
 
@@ -124,16 +138,16 @@ static av_always_inline int pthread_cond_init(pthread_cond_t *cond, const pthrea
 static av_always_inline int pthread_cond_destroy(pthread_cond_t *cond)
 {
     DosCloseEventSem(cond->event_sem);
+    DosCloseEventSem(cond->ack_sem);
 
     return 0;
 }
 
 static av_always_inline int pthread_cond_signal(pthread_cond_t *cond)
 {
-    if (cond->wait_count > 0) {
+    if (!__atomic_cmpxchg32(&cond->wait_count, 0, 0)) {
         DosPostEventSem(cond->event_sem);
-
-        cond->wait_count--;
+        DosWaitEventSem(cond->ack_sem, SEM_INDEFINITE_WAIT);
     }
 
     return 0;
@@ -141,26 +155,47 @@ static av_always_inline int pthread_cond_signal(pthread_cond_t *cond)
 
 static av_always_inline int pthread_cond_broadcast(pthread_cond_t *cond)
 {
-    while (cond->wait_count > 0) {
-        DosPostEventSem(cond->event_sem);
-
-        cond->wait_count--;
-    }
+    while (!__atomic_cmpxchg32(&cond->wait_count, 0, 0))
+        pthread_cond_signal(cond);
 
     return 0;
 }
 
-static av_always_inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
+static av_always_inline int pthread_cond_wait(pthread_cond_t *cond,
+                                              pthread_mutex_t *mutex)
 {
-    cond->wait_count++;
+    __atomic_increment(&cond->wait_count);
 
     pthread_mutex_unlock(mutex);
 
     DosWaitEventSem(cond->event_sem, SEM_INDEFINITE_WAIT);
 
+    __atomic_decrement(&cond->wait_count);
+
+    DosPostEventSem(cond->ack_sem);
+
     pthread_mutex_lock(mutex);
 
     return 0;
 }
 
-#endif /* AVCODEC_OS2PTHREADS_H */
+static av_always_inline int pthread_once(pthread_once_t *once_control,
+                                         void (*init_routine)(void))
+{
+    if (!once_control->done)
+    {
+        _fmutex_request(&once_control->mtx, 0);
+
+        if (!once_control->done)
+        {
+            init_routine();
+
+            once_control->done = 1;
+        }
+
+        _fmutex_release(&once_control->mtx);
+    }
+
+    return 0;
+}
+#endif /* COMPAT_OS2THREADS_H */
diff --git a/compat/solaris/make_sunver.pl b/compat/solaris/make_sunver.pl
new file mode 100755
index 00000000..0e9ed1d3
--- /dev/null
+++ b/compat/solaris/make_sunver.pl
@@ -0,0 +1,352 @@
+#!/usr/bin/env perl
+
+# make_sunver.pl
+#
+#   Copyright (C) 2010, 2011, 2012, 2013
+#   Free Software Foundation, Inc.
+#
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING.GPLv3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# This script takes at least two arguments, a GNU style version script and
+# a list of object and archive files, and generates a corresponding Sun
+# style version script as follows:
+#
+# Each glob pattern, C++ mangled pattern or literal in the input script is
+# matched against all global symbols in the input objects, emitting those
+# that matched (or nothing if no match was found).
+# A comment with the original pattern and its type is left in the output
+# file to make it easy to understand the matches.
+#
+# It uses elfdump when present (native), GNU readelf otherwise.
+# It depends on the GNU version of c++filt, since it must understand the
+# GNU mangling style.
+
+use FileHandle;
+use IPC::Open2;
+
+# Enforce C locale.
+$ENV{'LC_ALL'} = "C";
+$ENV{'LANG'} = "C";
+
+# Input version script, GNU style.
+my $symvers = shift;
+
+##########
+# Get all the symbols from the library, match them, and add them to a hash.
+
+my %sym_hash = ();
+
+# List of objects and archives to process.
+my @OBJECTS = ();
+
+# List of shared objects to omit from processing.
+my @SHAREDOBJS = ();
+
+# Filter out those input archives that have corresponding shared objects to
+# avoid adding all symbols matched in the archive to the output map.
+foreach $file (@ARGV) {
+    if (($so = $file) =~ s/\.a$/.so/ && -e $so) {
+    printf STDERR "omitted $file -> $so\n";
+    push (@SHAREDOBJS, $so);
+    } else {
+    push (@OBJECTS, $file);
+    }
+}
+
+# We need to detect and ignore hidden symbols.  Solaris nm can only detect
+# this in the harder to parse default output format, and GNU nm not at all,
+# so use elfdump -s in the native case and GNU readelf -s otherwise.
+# GNU objdump -t cannot be used since it produces a variable number of
+# columns.
+
+# The path to elfdump.
+my $elfdump = "/usr/ccs/bin/elfdump";
+
+if (-f $elfdump) {
+    open ELFDUMP,$elfdump.' -s '.(join ' ',@OBJECTS).'|' or die $!;
+    my $skip_arsym = 0;
+
+    while (<ELFDUMP>) {
+    chomp;
+
+    # Ignore empty lines.
+    if (/^$/) {
+        # End of archive symbol table, stop skipping.
+        $skip_arsym = 0 if $skip_arsym;
+        next;
+    }
+
+    # Keep skipping until end of archive symbol table.
+    next if ($skip_arsym);
+
+    # Ignore object name header for individual objects and archives.
+    next if (/:$/);
+
+    # Ignore table header lines.
+    next if (/^Symbol Table Section:/);
+    next if (/index.*value.*size/);
+
+    # Start of archive symbol table: start skipping.
+    if (/^Symbol Table: \(archive/) {
+        $skip_arsym = 1;
+        next;
+    }
+
+    # Split table.
+    (undef, undef, undef, undef, $bind, $oth, undef, $shndx, $name) = split;
+
+    # Error out for unknown input.
+    die "unknown input line:\n$_" unless defined($bind);
+
+    # Ignore local symbols.
+    next if ($bind eq "LOCL");
+    # Ignore hidden symbols.
+    next if ($oth eq "H");
+    # Ignore undefined symbols.
+    next if ($shndx eq "UNDEF");
+    # Error out for unhandled cases.
+    if ($bind !~ /^(GLOB|WEAK)/ or $oth ne "D") {
+        die "unhandled symbol:\n$_";
+    }
+
+    # Remember symbol.
+    $sym_hash{$name}++;
+    }
+    close ELFDUMP or die "$elfdump error";
+} else {
+    open READELF, 'readelf -s -W '.(join ' ',@OBJECTS).'|' or die $!;
+    # Process each symbol.
+    while (<READELF>) {
+    chomp;
+
+    # Ignore empty lines.
+    next if (/^$/);
+
+    # Ignore object name header.
+    next if (/^File: .*$/);
+
+    # Ignore table header lines.
+    next if (/^Symbol table.*contains.*:/);
+    next if (/Num:.*Value.*Size/);
+
+    # Split table.
+    (undef, undef, undef, undef, $bind, $vis, $ndx, $name) = split;
+
+    # Error out for unknown input.
+    die "unknown input line:\n$_" unless defined($bind);
+
+    # Ignore local symbols.
+    next if ($bind eq "LOCAL");
+    # Ignore hidden symbols.
+    next if ($vis eq "HIDDEN");
+    # Ignore undefined symbols.
+    next if ($ndx eq "UND");
+    # Error out for unhandled cases.
+    if ($bind !~ /^(GLOBAL|WEAK)/ or $vis ne "DEFAULT") {
+        die "unhandled symbol:\n$_";
+    }
+
+    # Remember symbol.
+    $sym_hash{$name}++;
+    }
+    close READELF or die "readelf error";
+}
+
+##########
+# The various types of glob patterns.
+#
+# A glob pattern that is to be applied to the demangled name: 'cxx'.
+# A glob patterns that applies directly to the name in the .o files: 'glob'.
+# This pattern is ignored; used for local variables (usually just '*'): 'ign'.
+
+# The type of the current pattern.
+my $glob = 'glob';
+
+# We're currently inside `extern "C++"', which Sun ld doesn't understand.
+my $in_extern = 0;
+
+# The c++filt command to use.  This *must* be GNU c++filt; the Sun Studio
+# c++filt doesn't handle the GNU mangling style.
+my $cxxfilt = $ENV{'CXXFILT'} || "c++filt";
+
+# The current version name.
+my $current_version = "";
+
+# Was there any attempt to match a symbol to this version?
+my $matches_attempted;
+
+# The number of versions which matched this symbol.
+my $matched_symbols;
+
+open F,$symvers or die $!;
+
+# Print information about generating this file
+print "# This file was generated by make_sunver.pl.  DO NOT EDIT!\n";
+print "# It was generated by:\n";
+printf "# %s %s %s\n", $0, $symvers, (join ' ',@ARGV);
+printf "# Omitted archives with corresponding shared libraries: %s\n",
+    (join ' ', @SHAREDOBJS) if $#SHAREDOBJS >= 0;
+print "#\n\n";
+
+print "\$mapfile_version 2\n";
+
+while (<F>) {
+    # Lines of the form '};'
+    if (/^([ \t]*)(\}[ \t]*;[ \t]*)$/) {
+    $glob = 'glob';
+    if ($in_extern) {
+        $in_extern--;
+        print "$1##$2\n";
+    } else {
+        print;
+    }
+    next;
+    }
+
+    # Lines of the form '} SOME_VERSION_NAME_1.0;'
+    if (/^[ \t]*\}[ \tA-Z0-9_.a-z]+;[ \t]*$/) {
+    $glob = 'glob';
+    # We tried to match symbols agains this version, but none matched.
+    # Emit dummy hidden symbol to avoid marking this version WEAK.
+    if ($matches_attempted && $matched_symbols == 0) {
+        print "  hidden:\n";
+        print "    .force_WEAK_off_$current_version = DATA S0x0 V0x0;\n";
+    }
+    print; next;
+    }
+
+    # Comment and blank lines
+    if (/^[ \t]*\#/) { print; next; }
+    if (/^[ \t]*$/) { print; next; }
+
+    # Lines of the form '{'
+    if (/^([ \t]*){$/) {
+    if ($in_extern) {
+        print "$1##{\n";
+    } else {
+        print;
+    }
+    next;
+    }
+
+    # Lines of the form 'SOME_VERSION_NAME_1.1 {'
+    if (/^([A-Z0-9_.]+)[ \t]+{$/) {
+    # Record version name.
+    $current_version = $1;
+    # Reset match attempts, #matched symbols for this version.
+    $matches_attempted = 0;
+    $matched_symbols = 0;
+    print "SYMBOL_VERSION $1 {\n";
+    next;
+    }
+
+    # Ignore 'global:'
+    if (/^[ \t]*global:$/) { print; next; }
+
+    # After 'local:', globs should be ignored, they won't be exported.
+    if (/^[ \t]*local:$/) {
+    $glob = 'ign';
+    print;
+    next;
+    }
+
+    # After 'extern "C++"', globs are C++ patterns
+    if (/^([ \t]*)(extern \"C\+\+\"[ \t]*)$/) {
+    $in_extern++;
+    $glob = 'cxx';
+    # Need to comment, Sun ld cannot handle this.
+    print "$1##$2\n"; next;
+    }
+
+    # Chomp newline now we're done with passing through the input file.
+    chomp;
+
+    # Catch globs.  Note that '{}' is not allowed in globs by this script,
+    # so only '*' and '[]' are available.
+    if (/^([ \t]*)([^ \t;{}#]+);?[ \t]*$/) {
+    my $ws = $1;
+    my $ptn = $2;
+    # Turn the glob into a regex by replacing '*' with '.*', '?' with '.'.
+    # Keep $ptn so we can still print the original form.
+    ($pattern = $ptn) =~ s/\*/\.\*/g;
+    $pattern =~ s/\?/\./g;
+
+    if ($glob eq 'ign') {
+        # We're in a local: * section; just continue.
+        print "$_\n";
+        next;
+    }
+
+    # Print the glob commented for human readers.
+    print "$ws##$ptn ($glob)\n";
+    # We tried to match a symbol to this version.
+    $matches_attempted++;
+
+    if ($glob eq 'glob') {
+        my %ptn_syms = ();
+
+        # Match ptn against symbols in %sym_hash.
+        foreach my $sym (keys %sym_hash) {
+        # Maybe it matches one of the patterns based on the symbol in
+        # the .o file.
+        $ptn_syms{$sym}++ if ($sym =~ /^$pattern$/);
+        }
+
+        foreach my $sym (sort keys(%ptn_syms)) {
+        $matched_symbols++;
+        print "$ws$sym;\n";
+        }
+    } elsif ($glob eq 'cxx') {
+        my %dem_syms = ();
+
+        # Verify that we're actually using GNU c++filt.  Other versions
+        # most likely cannot handle GNU style symbol mangling.
+        my $cxxout = `$cxxfilt --version 2>&1`;
+        $cxxout =~ m/GNU/ or die "$0 requires GNU c++filt to function";
+
+        # Talk to c++filt through a pair of file descriptors.
+        # Need to start a fresh instance per pattern, otherwise the
+        # process grows to 500+ MB.
+        my $pid = open2(*FILTIN, *FILTOUT, $cxxfilt) or die $!;
+
+        # Match ptn against symbols in %sym_hash.
+        foreach my $sym (keys %sym_hash) {
+        # No?  Well, maybe its demangled form matches one of those
+        # patterns.
+        printf FILTOUT "%s\n",$sym;
+        my $dem = <FILTIN>;
+        chomp $dem;
+        $dem_syms{$sym}++ if ($dem =~ /^$pattern$/);
+        }
+
+        close FILTOUT or die "c++filt error";
+        close FILTIN or die "c++filt error";
+        # Need to wait for the c++filt process to avoid lots of zombies.
+        waitpid $pid, 0;
+
+        foreach my $sym (sort keys(%dem_syms)) {
+        $matched_symbols++;
+        print "$ws$sym;\n";
+        }
+    } else {
+        # No?  Well, then ignore it.
+    }
+    next;
+    }
+    # Important sanity check.  This script can't handle lots of formats
+    # that GNU ld can, so be sure to error out if one is seen!
+    die "strange line `$_'";
+}
+close F;
diff --git a/compat/tms470/math.h b/compat/tms470/math.h
index 6234cc59..0a42743a 100644
--- a/compat/tms470/math.h
+++ b/compat/tms470/math.h
@@ -16,8 +16,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef FFMPEG_COMPAT_TMS470_MATH_H
-#define FFMPEG_COMPAT_TMS470_MATH_H
+#ifndef COMPAT_TMS470_MATH_H
+#define COMPAT_TMS470_MATH_H
 
 #include_next <math.h>
 
@@ -27,4 +27,4 @@
 #define INFINITY (*(const float*)((const unsigned []){ 0x7f800000 }))
 #define NAN      (*(const float*)((const unsigned []){ 0x7fc00000 }))
 
-#endif /* FFMPEG_COMPAT_TMS470_MATH_H */
+#endif /* COMPAT_TMS470_MATH_H */
diff --git a/compat/va_copy.h b/compat/va_copy.h
index 3cb5ebee..a40bbe66 100644
--- a/compat/va_copy.h
+++ b/compat/va_copy.h
@@ -19,6 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef COMPAT_VA_COPY_H
+#define COMPAT_VA_COPY_H
+
 #include <stdarg.h>
 
 #if !defined(va_copy) && defined(_MSC_VER)
@@ -27,3 +30,5 @@
 #if !defined(va_copy) && defined(__GNUC__) && __GNUC__ < 3
 #define va_copy(dst, src) __va_copy(dst, src)
 #endif
+
+#endif /* COMPAT_VA_COPY_H */
diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h
index 87e816ff..4ac2a995 100644
--- a/compat/w32pthreads.h
+++ b/compat/w32pthreads.h
@@ -26,8 +26,8 @@
  * w32threads to pthreads wrapper
  */
 
-#ifndef FFMPEG_COMPAT_W32PTHREADS_H
-#define FFMPEG_COMPAT_W32PTHREADS_H
+#ifndef COMPAT_W32PTHREADS_H
+#define COMPAT_W32PTHREADS_H
 
 /* Build up a pthread-like API using underlying Windows API. Have only static
  * methods so as to not conflict with a potentially linked in pthread-win32
@@ -39,6 +39,11 @@
 #include <windows.h>
 #include <process.h>
 
+#if _WIN32_WINNT < 0x0600 && defined(__MINGW32__)
+#undef MemoryBarrier
+#define MemoryBarrier __sync_synchronize
+#endif
+
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"
 #include "libavutil/internal.h"
@@ -82,19 +87,29 @@ static av_unused int pthread_create(pthread_t *thread, const void *unused_attr,
 {
     thread->func   = start_routine;
     thread->arg    = arg;
+#if HAVE_WINRT
+    thread->handle = (void*)CreateThread(NULL, 0, win32thread_worker, thread,
+                                           0, NULL);
+#else
     thread->handle = (void*)_beginthreadex(NULL, 0, win32thread_worker, thread,
                                            0, NULL);
+#endif
     return !thread->handle;
 }
 
-static av_unused void pthread_join(pthread_t thread, void **value_ptr)
+static av_unused int pthread_join(pthread_t thread, void **value_ptr)
 {
     DWORD ret = WaitForSingleObject(thread.handle, INFINITE);
-    if (ret != WAIT_OBJECT_0)
-        return;
+    if (ret != WAIT_OBJECT_0) {
+        if (ret == WAIT_ABANDONED)
+            return EINVAL;
+        else
+            return EDEADLK;
+    }
     if (value_ptr)
         *value_ptr = thread.ret;
     CloseHandle(thread.handle);
+    return 0;
 }
 
 static inline int pthread_mutex_init(pthread_mutex_t *m, void* attr)
@@ -119,6 +134,19 @@ static inline int pthread_mutex_unlock(pthread_mutex_t *m)
 }
 
 #if _WIN32_WINNT >= 0x0600
+typedef INIT_ONCE pthread_once_t;
+#define PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT
+
+static av_unused int pthread_once(pthread_once_t *once_control, void (*init_routine)(void))
+{
+    BOOL pending = FALSE;
+    InitOnceBeginInitialize(once_control, 0, &pending, NULL);
+    if (pending)
+        init_routine();
+    InitOnceComplete(once_control, 0, NULL);
+    return 0;
+}
+
 static inline int pthread_cond_init(pthread_cond_t *cond, const void *unused_attr)
 {
     InitializeConditionVariable(cond);
@@ -126,14 +154,15 @@ static inline int pthread_cond_init(pthread_cond_t *cond, const void *unused_att
 }
 
 /* native condition variables do not destroy */
-static inline void pthread_cond_destroy(pthread_cond_t *cond)
+static inline int pthread_cond_destroy(pthread_cond_t *cond)
 {
-    return;
+    return 0;
 }
 
-static inline void pthread_cond_broadcast(pthread_cond_t *cond)
+static inline int pthread_cond_broadcast(pthread_cond_t *cond)
 {
     WakeAllConditionVariable(cond);
+    return 0;
 }
 
 static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
@@ -142,14 +171,77 @@ static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex
     return 0;
 }
 
-static inline void pthread_cond_signal(pthread_cond_t *cond)
+static inline int pthread_cond_signal(pthread_cond_t *cond)
 {
     WakeConditionVariable(cond);
+    return 0;
 }
 
 #else // _WIN32_WINNT < 0x0600
+
+/* atomic init state of dynamically loaded functions */
+static LONG w32thread_init_state = 0;
+static av_unused void w32thread_init(void);
+
+/* for pre-Windows 6.0 platforms, define INIT_ONCE struct,
+ * compatible to the one used in the native API */
+
+typedef union pthread_once_t  {
+    void * Ptr;    ///< For the Windows 6.0+ native functions
+    LONG state;    ///< For the pre-Windows 6.0 compat code
+} pthread_once_t;
+
+#define PTHREAD_ONCE_INIT {0}
+
+/* function pointers to init once API on windows 6.0+ kernels */
+static BOOL (WINAPI *initonce_begin)(pthread_once_t *lpInitOnce, DWORD dwFlags, BOOL *fPending, void **lpContext);
+static BOOL (WINAPI *initonce_complete)(pthread_once_t *lpInitOnce, DWORD dwFlags, void *lpContext);
+
+/* pre-Windows 6.0 compat using a spin-lock */
+static inline void w32thread_once_fallback(LONG volatile *state, void (*init_routine)(void))
+{
+    switch (InterlockedCompareExchange(state, 1, 0)) {
+    /* Initial run */
+    case 0:
+        init_routine();
+        InterlockedExchange(state, 2);
+        break;
+    /* Another thread is running init */
+    case 1:
+        while (1) {
+            MemoryBarrier();
+            if (*state == 2)
+                break;
+            Sleep(0);
+        }
+        break;
+    /* Initialization complete */
+    case 2:
+        break;
+    }
+}
+
+static av_unused int pthread_once(pthread_once_t *once_control, void (*init_routine)(void))
+{
+    w32thread_once_fallback(&w32thread_init_state, w32thread_init);
+
+    /* Use native functions on Windows 6.0+ */
+    if (initonce_begin && initonce_complete) {
+        BOOL pending = FALSE;
+        initonce_begin(once_control, 0, &pending, NULL);
+        if (pending)
+            init_routine();
+        initonce_complete(once_control, 0, NULL);
+        return 0;
+    }
+
+    w32thread_once_fallback(&once_control->state, init_routine);
+    return 0;
+}
+
 /* for pre-Windows 6.0 platforms we need to define and use our own condition
  * variable and api */
+
 typedef struct  win32_cond_t {
     pthread_mutex_t mtx_broadcast;
     pthread_mutex_t mtx_waiter_count;
@@ -169,6 +261,9 @@ static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex,
 static av_unused int pthread_cond_init(pthread_cond_t *cond, const void *unused_attr)
 {
     win32_cond_t *win32_cond = NULL;
+
+    w32thread_once_fallback(&w32thread_init_state, w32thread_init);
+
     if (cond_init) {
         cond_init(cond);
         return 0;
@@ -191,12 +286,12 @@ static av_unused int pthread_cond_init(pthread_cond_t *cond, const void *unused_
     return 0;
 }
 
-static av_unused void pthread_cond_destroy(pthread_cond_t *cond)
+static av_unused int pthread_cond_destroy(pthread_cond_t *cond)
 {
     win32_cond_t *win32_cond = cond->Ptr;
     /* native condition variables do not destroy */
     if (cond_init)
-        return;
+        return 0;
 
     /* non native condition variables */
     CloseHandle(win32_cond->semaphore);
@@ -205,16 +300,17 @@ static av_unused void pthread_cond_destroy(pthread_cond_t *cond)
     pthread_mutex_destroy(&win32_cond->mtx_broadcast);
     av_freep(&win32_cond);
     cond->Ptr = NULL;
+    return 0;
 }
 
-static av_unused void pthread_cond_broadcast(pthread_cond_t *cond)
+static av_unused int pthread_cond_broadcast(pthread_cond_t *cond)
 {
     win32_cond_t *win32_cond = cond->Ptr;
     int have_waiter;
 
     if (cond_broadcast) {
         cond_broadcast(cond);
-        return;
+        return 0;
     }
 
     /* non native condition variables */
@@ -236,6 +332,7 @@ static av_unused void pthread_cond_broadcast(pthread_cond_t *cond)
     } else
         pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
     pthread_mutex_unlock(&win32_cond->mtx_broadcast);
+    return 0;
 }
 
 static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
@@ -270,13 +367,13 @@ static av_unused int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mu
     return pthread_mutex_lock(mutex);
 }
 
-static av_unused void pthread_cond_signal(pthread_cond_t *cond)
+static av_unused int pthread_cond_signal(pthread_cond_t *cond)
 {
     win32_cond_t *win32_cond = cond->Ptr;
     int have_waiter;
     if (cond_signal) {
         cond_signal(cond);
-        return;
+        return 0;
     }
 
     pthread_mutex_lock(&win32_cond->mtx_broadcast);
@@ -293,6 +390,7 @@ static av_unused void pthread_cond_signal(pthread_cond_t *cond)
     }
 
     pthread_mutex_unlock(&win32_cond->mtx_broadcast);
+    return 0;
 }
 #endif
 
@@ -309,8 +407,12 @@ static av_unused void w32thread_init(void)
         (void*)GetProcAddress(kernel_dll, "WakeConditionVariable");
     cond_wait      =
         (void*)GetProcAddress(kernel_dll, "SleepConditionVariableCS");
+    initonce_begin =
+        (void*)GetProcAddress(kernel_dll, "InitOnceBeginInitialize");
+    initonce_complete =
+        (void*)GetProcAddress(kernel_dll, "InitOnceComplete");
 #endif
 
 }
 
-#endif /* FFMPEG_COMPAT_W32PTHREADS_H */
+#endif /* COMPAT_W32PTHREADS_H */
diff --git a/compat/windows/mslink b/compat/windows/mslink
new file mode 100755
index 00000000..07b2b3e3
--- /dev/null
+++ b/compat/windows/mslink
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+LINK_EXE_PATH=$(dirname "$(command -v cl)")/link
+if [ -x "$LINK_EXE_PATH" ]; then
+    "$LINK_EXE_PATH" $@
+else
+    link $@
+fi
+exit $?
diff --git a/configure b/configure
index 7f1b5c00..206b1690 100755
--- a/configure
+++ b/configure
@@ -128,7 +128,6 @@ Component options:
   --disable-avdevice       disable libavdevice build
   --disable-avcodec        disable libavcodec build
   --disable-avformat       disable libavformat build
-  --disable-avutil         disable libavutil build
   --disable-swresample     disable libswresample build
   --disable-swscale        disable libswscale build
   --disable-postproc       disable libpostproc build
@@ -155,6 +154,7 @@ Hardware accelerators:
   --disable-vaapi          disable VAAPI code [autodetect]
   --disable-vda            disable VDA code [autodetect]
   --disable-vdpau          disable VDPAU code [autodetect]
+  --enable-videotoolbox    enable VideoToolbox code [autodetect]
 
 Individual component options:
   --disable-everything     disable all components listed below
@@ -196,13 +196,17 @@ Individual component options:
 External library support:
   --enable-avisynth        enable reading of AviSynth script files [no]
   --disable-bzlib          disable bzlib [autodetect]
+  --enable-chromaprint     enable audio fingerprinting with chromaprint [no]
   --enable-fontconfig      enable fontconfig, useful for drawtext filter [no]
   --enable-frei0r          enable frei0r video filtering [no]
+  --enable-gcrypt          enable gcrypt, needed for rtmp(t)e support
+                           if openssl, librtmp or gmp is not used [no]
+  --enable-gmp             enable gmp, needed for rtmp(t)e support
+                           if openssl or librtmp is not used [no]
   --enable-gnutls          enable gnutls, needed for https support
                            if openssl is not used [no]
   --disable-iconv          disable iconv [autodetect]
   --enable-ladspa          enable LADSPA audio filtering [no]
-  --enable-libaacplus      enable AAC+ encoding via libaacplus [no]
   --enable-libass          enable libass subtitles rendering,
                            needed for subtitles and ass filter [no]
   --enable-libbluray       enable BluRay reading using libbluray [no]
@@ -222,6 +226,7 @@ External library support:
   --enable-libgsm          enable GSM de/encoding via libgsm [no]
   --enable-libiec61883     enable iec61883 via libiec61883 [no]
   --enable-libilbc         enable iLBC de/encoding via libilbc [no]
+  --enable-libkvazaar      enable HEVC encoding via libkvazaar [no]
   --enable-libmfx          enable HW acceleration through libmfx
   --enable-libmodplug      enable ModPlug via libmodplug [no]
   --enable-libmp3lame      enable MP3 encoding via libmp3lame [no]
@@ -234,21 +239,21 @@ External library support:
   --enable-libopenjpeg     enable JPEG 2000 de/encoding via OpenJPEG [no]
   --enable-libopus         enable Opus de/encoding via libopus [no]
   --enable-libpulse        enable Pulseaudio input via libpulse [no]
-  --enable-libquvi         enable quvi input via libquvi [no]
+  --enable-librubberband   enable rubberband needed for rubberband filter [no]
   --enable-librtmp         enable RTMP[E] support via librtmp [no]
   --enable-libschroedinger enable Dirac de/encoding via libschroedinger [no]
   --enable-libshine        enable fixed-point MP3 encoding via libshine [no]
   --enable-libsmbclient    enable Samba protocol via libsmbclient [no]
+  --enable-libsnappy       enable Snappy compression, needed for hap encoding [no]
   --enable-libsoxr         enable Include libsoxr resampling [no]
   --enable-libspeex        enable Speex de/encoding via libspeex [no]
   --enable-libssh          enable SFTP protocol via libssh [no]
-  --enable-libstagefright-h264  enable H.264 decoding via libstagefright [no]
+  --enable-libtesseract    enable Tesseract, needed for ocr filter [no]
   --enable-libtheora       enable Theora encoding via libtheora [no]
   --enable-libtwolame      enable MP2 encoding via libtwolame [no]
   --enable-libutvideo      enable Ut Video encoding and decoding via libutvideo [no]
   --enable-libv4l2         enable libv4l2/v4l-utils [no]
   --enable-libvidstab      enable video stabilization using vid.stab [no]
-  --enable-libvo-aacenc    enable AAC encoding via libvo-aacenc [no]
   --enable-libvo-amrwbenc  enable AMR-WB encoding via libvo-amrwbenc [no]
   --enable-libvorbis       enable Vorbis en/decoding via libvorbis,
                            native implementation exists [no]
@@ -264,17 +269,21 @@ External library support:
   --enable-libxcb-shape    enable X11 grabbing shape rendering [autodetect]
   --enable-libxvid         enable Xvid encoding via xvidcore,
                            native MPEG-4/Xvid encoder exists [no]
+  --enable-libzimg         enable z.lib, needed for zscale filter [no]
   --enable-libzmq          enable message passing via libzmq [no]
   --enable-libzvbi         enable teletext support via libzvbi [no]
   --disable-lzma           disable lzma [autodetect]
-  --enable-decklink        enable Blackmagick DeckLink I/O support [no]
+  --enable-decklink        enable Blackmagic DeckLink I/O support [no]
   --enable-mmal            enable decoding via MMAL [no]
+  --enable-netcdf          enable NetCDF, needed for sofalizer filter [no]
   --enable-nvenc           enable NVIDIA NVENC support [no]
   --enable-openal          enable OpenAL 1.1 capture support [no]
   --enable-opencl          enable OpenCL code
   --enable-opengl          enable OpenGL rendering [no]
   --enable-openssl         enable openssl, needed for https support
                            if gnutls is not used [no]
+  --disable-schannel       disable SChannel SSP, needed for TLS support on
+                           Windows if openssl and gnutls are not used [autodetect]
   --disable-sdl            disable sdl [autodetect]
   --disable-securetransport disable Secure Transport, needed for TLS support
                            on OSX if openssl and gnutls are not used [autodetect]
@@ -304,6 +313,7 @@ Toolchain options:
   --yasmexe=EXE            use yasm-compatible assembler EXE [$yasmexe_default]
   --cc=CC                  use C compiler CC [$cc_default]
   --cxx=CXX                use C compiler CXX [$cxx_default]
+  --objcc=OCC              use ObjC compiler OCC [$cc_default]
   --dep-cc=DEPCC           use dependency generator DEPCC [$cc_default]
   --ld=LD                  use linker LD [$ld_default]
   --pkg-config=PKGCONFIG   use pkg-config tool PKGCONFIG [$pkg_config_default]
@@ -319,8 +329,10 @@ Toolchain options:
   --host-os=OS             compiler host OS [$target_os]
   --extra-cflags=ECFLAGS   add ECFLAGS to CFLAGS [$CFLAGS]
   --extra-cxxflags=ECFLAGS add ECFLAGS to CXXFLAGS [$CXXFLAGS]
+  --extra-objcflags=FLAGS  add FLAGS to OBJCFLAGS [$CFLAGS]
   --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS [$LDFLAGS]
   --extra-ldexeflags=ELDFLAGS add ELDFLAGS to LDEXEFLAGS [$LDEXEFLAGS]
+  --extra-ldlibflags=ELDFLAGS add ELDFLAGS to LDLIBFLAGS [$LDLIBFLAGS]
   --extra-libs=ELIBS       add ELIBS [$ELIBS]
   --extra-version=STRING   version string suffix []
   --optflags=OPTFLAGS      override optimization-related compiler flags
@@ -332,6 +344,7 @@ Toolchain options:
 
 Advanced options (experts only):
   --malloc-prefix=PREFIX   prefix malloc and related names with PREFIX
+  --custom-allocator=NAME  use a supported custom allocator
   --disable-symver         disable symbol versioning
   --enable-hardcoded-tables use hardcoded tables instead of runtime generation
   --disable-safe-bitstream-reader
@@ -360,6 +373,7 @@ Optimization options (experts only):
   --disable-fma3           disable FMA3 optimizations
   --disable-fma4           disable FMA4 optimizations
   --disable-avx2           disable AVX2 optimizations
+  --disable-aesni          disable AESNI optimizations
   --disable-armv5te        disable armv5te optimizations
   --disable-armv6          disable armv6 optimizations
   --disable-armv6t2        disable armv6t2 optimizations
@@ -367,13 +381,11 @@ Optimization options (experts only):
   --disable-neon           disable NEON optimizations
   --disable-inline-asm     disable use of inline assembly
   --disable-yasm           disable use of nasm/yasm assembly
-  --disable-mips32r5       disable MIPS32R5 optimizations
-  --disable-mips64r6       disable MIPS64R6 optimizations
-  --disable-mipsdspr1      disable MIPS DSP ASE R1 optimizations
+  --disable-mipsdsp        disable MIPS DSP ASE R1 optimizations
   --disable-mipsdspr2      disable MIPS DSP ASE R2 optimizations
   --disable-msa            disable MSA optimizations
   --disable-mipsfpu        disable floating point MIPS optimizations
-  --disable-loongson3      disable Loongson-3 SIMD optimizations
+  --disable-mmi            disable Loongson SIMD optimizations
   --disable-fast-unaligned consider unaligned accesses slow
 
 Developer options (useful when working on FFmpeg itself):
@@ -412,6 +424,18 @@ EOF
 }
 
 quotes='""'
+if test -t 1 && which tput >/dev/null 2>&1; then
+    ncolors=$(tput colors)
+    if test -n "$ncolors" && test $ncolors -ge 8; then
+        bold_color=$(tput bold)
+        warn_color=$(tput setaf 3)
+        error_color=$(tput setaf 1)
+        reset_color=$(tput sgr0)
+    fi
+    # 72 used instead of 80 since that's the default of pr
+    ncols=$(tput cols)
+fi
+: ${ncols:=72}
 
 log(){
     echo "$@" >> $logfile
@@ -423,18 +447,14 @@ log_file(){
     log END $1
 }
 
-echolog(){
-    log "$@"
-    echo "$@"
-}
-
 warn(){
     log "WARNING: $*"
     WARNINGS="${WARNINGS}WARNING: $*\n"
 }
 
 die(){
-    echolog "$@"
+    log "$@"
+    echo "$error_color$bold_color$@$reset_color"
     cat <<EOF
 
 If you think configure made a mistake, make sure you are using the latest
@@ -475,7 +495,7 @@ sh_quote(){
 }
 
 cleanws(){
-    echo "$@" | sed 's/^ *//;s/  */ /g;s/ *$//;s/\\r//g'
+    echo "$@" | sed 's/^ *//;s/[[:space:]][[:space:]]*/ /g;s/ *$//'
 }
 
 filter(){
@@ -612,12 +632,12 @@ enable_deep_weak(){
 }
 
 enabled(){
-    test "${1#!}" = "$1" && op== || op=!=
+    test "${1#!}" = "$1" && op='=' || op=!=
     eval test "x\$${1#!}" $op "xyes"
 }
 
 disabled(){
-    test "${1#!}" = "$1" && op== || op=!=
+    test "${1#!}" = "$1" && op='=' || op=!=
     eval test "x\$${1#!}" $op "xno"
 }
 
@@ -783,6 +803,10 @@ add_asflags(){
     append ASFLAGS $($asflags_filter "$@")
 }
 
+add_objcflags(){
+    append OBJCFLAGS $($objcflags_filter "$@")
+}
+
 add_ldflags(){
     append LDFLAGS $($ldflags_filter "$@")
 }
@@ -791,6 +815,10 @@ add_ldexeflags(){
     append LDEXEFLAGS $($ldflags_filter "$@")
 }
 
+add_ldlibflags(){
+    append LDLIBFLAGS $($ldflags_filter "$@")
+}
+
 add_stripflags(){
     append ASMSTRIPFLAGS "$@"
 }
@@ -849,11 +877,11 @@ check_cxx(){
     check_cmd $cxx $CPPFLAGS $CFLAGS $CXXFLAGS "$@" $CXX_C -o $TMPO $TMPCPP
 }
 
-check_oc(){
-    log check_oc "$@"
+check_objcc(){
+    log check_objcc "$@"
     cat > $TMPM
     log_file $TMPM
-    check_cmd $cc -Werror=missing-prototypes $CPPFLAGS $CFLAGS "$@" $CC_C $(cc_o $TMPO) $TMPM
+    check_cmd $objcc -Werror=missing-prototypes $CPPFLAGS $CFLAGS $OBJCFLAGS "$@" $OBJCC_C $(cc_o $TMPO) $TMPM
 }
 
 check_cpp(){
@@ -885,6 +913,25 @@ void foo(void){ __asm__ volatile($code); }
 EOF
 }
 
+check_inline_asm_flags(){
+    log check_inline_asm_flags "$@"
+    name="$1"
+    code="$2"
+    flags=''
+    shift 2
+    while [ "$1" != "" ]; do
+      append flags $1
+      shift
+    done;
+    disable $name
+    cat > $TMPC <<EOF
+void foo(void){ __asm__ volatile($code); }
+EOF
+    log_file $TMPC
+    check_cmd $cc $CPPFLAGS $CFLAGS $flags "$@" $CC_C $(cc_o $TMPO) $TMPC &&
+    enable $name && add_cflags $flags && add_asflags $flags && add_ldflags $flags
+}
+
 check_insn(){
     log check_insn "$@"
     check_inline_asm ${1}_inline "\"$2\""
@@ -964,6 +1011,19 @@ int x;
 EOF
 }
 
+test_objcflags(){
+    log test_cflags "$@"
+    set -- $($cflags_filter "$@")
+    check_objcc "$@" <<EOF
+int x;
+EOF
+}
+
+check_objcflags(){
+    log check_cflags "$@"
+    test_objcflags "$@" && add_objcflags "$@"
+}
+
 test_ldflags(){
     log test_ldflags "$@"
     check_ld "cc" "$@" <<EOF
@@ -1001,8 +1061,8 @@ int x;
 EOF
 }
 
-check_header_oc(){
-    log check_header_oc "$@"
+check_header_objcc(){
+    log check_header_objcc "$@"
     rm -f -- "$TMPO"
     header=$1
     shift
@@ -1010,7 +1070,7 @@ check_header_oc(){
     {
        echo "#include <$header>"
        echo "int main(void) { return 0; }"
-    } | check_oc && check_stat "$TMPO" && enable_safe $headers
+    } | check_objcc && check_stat "$TMPO" && enable_safe $headers
 }
 
 check_func(){
@@ -1024,6 +1084,21 @@ int main(void){ $func(); }
 EOF
 }
 
+check_complexfunc(){
+    log check_complexfunc "$@"
+    func=$1
+    narg=$2
+    shift 2
+    test $narg = 2 && args="f, g" || args="f * I"
+    disable $func
+    check_ld "cc" "$@" <<EOF && enable $func
+#include <complex.h>
+#include <math.h>
+float foo(complex float f, complex float g) { return $func($args); }
+int main(void){ return (int) foo; }
+EOF
+}
+
 check_mathfunc(){
     log check_mathfunc "$@"
     func=$1
@@ -1304,12 +1379,6 @@ check_host_cpp_condition(){
 EOF
 }
 
-apply(){
-    file=$1
-    shift
-    "$@" < "$file" > "$file.tmp" && mv "$file.tmp" "$file" || rm "$file.tmp"
-}
-
 cp_if_changed(){
     cmp -s "$1" "$2" && echo "$2 is unchanged" && return
     mkdir -p "$(dirname $2)"
@@ -1319,23 +1388,42 @@ cp_if_changed(){
 # CONFIG_LIST contains configurable options, while HAVE_LIST is for
 # system-dependent things.
 
-COMPONENT_LIST="
+AVCODEC_COMPONENTS="
     bsfs
     decoders
-    demuxers
     encoders
-    filters
     hwaccels
+    parsers
+"
+
+AVDEVICE_COMPONENTS="
     indevs
-    muxers
     outdevs
-    parsers
+"
+AVFILTER_COMPONENTS="
+    filters
+"
+AVFORMAT_COMPONENTS="
+    demuxers
+    muxers
     protocols
 "
 
+AVRESAMPLE_COMPONENTS=""
+AVUTIL_COMPONENTS=""
+
+COMPONENT_LIST="
+    $AVCODEC_COMPONENTS
+    $AVDEVICE_COMPONENTS
+    $AVFILTER_COMPONENTS
+    $AVFORMAT_COMPONENTS
+    $AVRESAMPLE_COMPONENTS
+    $AVUTIL_COMPONENTS
+"
+
 EXAMPLE_LIST="
     avio_reading_example
-    avio_list_dir_example
+    avio_dir_cmd_example
     decoding_encoding_example
     demuxing_decoding_example
     extract_mvs_example
@@ -1355,13 +1443,15 @@ EXAMPLE_LIST="
 EXTERNAL_LIBRARY_LIST="
     avisynth
     bzlib
+    chromaprint
     crystalhd
     decklink
     frei0r
+    gcrypt
+    gmp
     gnutls
     iconv
     ladspa
-    libaacplus
     libass
     libbluray
     libbs2b
@@ -1380,6 +1470,7 @@ EXTERNAL_LIBRARY_LIST="
     libgsm
     libiec61883
     libilbc
+    libkvazaar
     libmfx
     libmodplug
     libmp3lame
@@ -1391,21 +1482,21 @@ EXTERNAL_LIBRARY_LIST="
     libopenjpeg
     libopus
     libpulse
-    libquvi
     librtmp
+    librubberband
     libschroedinger
     libshine
     libsmbclient
+    libsnappy
     libsoxr
     libspeex
     libssh
-    libstagefright_h264
+    libtesseract
     libtheora
     libtwolame
     libutvideo
     libv4l2
     libvidstab
-    libvo_aacenc
     libvo_amrwbenc
     libvorbis
     libvpx
@@ -1419,15 +1510,18 @@ EXTERNAL_LIBRARY_LIST="
     libxcb_shape
     libxcb_xfixes
     libxvid
+    libzimg
     libzmq
     libzvbi
     lzma
     mmal
+    netcdf
     nvenc
     openal
     opencl
     opengl
     openssl
+    schannel
     sdl
     securetransport
     x11grab
@@ -1461,6 +1555,7 @@ HWACCEL_LIST="
     vaapi
     vda
     vdpau
+    videotoolbox
     xvmc
 "
 
@@ -1582,17 +1677,22 @@ ARCH_EXT_LIST_MIPS="
     mipsfpu
     mips32r2
     mips32r5
+    mips64r2
+    mips32r6
     mips64r6
-    mipsdspr1
+    mipsdsp
     mipsdspr2
     msa
 "
 
 ARCH_EXT_LIST_LOONGSON="
+    loongson2
     loongson3
+    mmi
 "
 
 ARCH_EXT_LIST_X86_SIMD="
+    aesni
     amd3dnow
     amd3dnowext
     avx
@@ -1679,6 +1779,7 @@ HEADERS_LIST="
     dev_video_bktr_ioctl_bt848_h
     dev_video_meteor_ioctl_meteor_h
     direct_h
+    dirent_h
     dlfcn_h
     d3d11_h
     dxva_h
@@ -1689,6 +1790,9 @@ HEADERS_LIST="
     machine_ioctl_bt848_h
     machine_ioctl_meteor_h
     malloc_h
+    opencv2_core_core_c_h
+    openjpeg_2_1_openjpeg_h
+    openjpeg_2_0_openjpeg_h
     openjpeg_1_5_openjpeg_h
     OpenGL_gl3_h
     poll_h
@@ -1714,15 +1818,24 @@ INTRINSICS_LIST="
     intrinsics_neon
 "
 
+COMPLEX_FUNCS="
+    cabs
+    cexp
+"
+
 MATH_FUNCS="
     atanf
     atan2f
     cbrt
     cbrtf
+    copysign
     cosf
+    erf
     exp2
     exp2f
     expf
+    hypot
+    isfinite
     isinf
     isnan
     ldexpf
@@ -1745,6 +1858,7 @@ MATH_FUNCS="
 SYSTEM_FUNCS="
     access
     aligned_malloc
+    arc4random
     clock_gettime
     closesocket
     CommandLineToArgvW
@@ -1761,7 +1875,6 @@ SYSTEM_FUNCS="
     GetProcessMemoryInfo
     GetProcessTimes
     getrusage
-    getservbyport
     GetSystemTimeAsFileTime
     gettimeofday
     glob
@@ -1772,6 +1885,7 @@ SYSTEM_FUNCS="
     jack_port_get_latency_range
     kbhit
     localtime_r
+    lstat
     lzo1x_999_compress
     mach_absolute_time
     MapViewOfFile
@@ -1785,6 +1899,7 @@ SYSTEM_FUNCS="
     pthread_cancel
     sched_getaffinity
     SetConsoleTextAttribute
+    SetConsoleCtrlHandler
     setmode
     setrlimit
     Sleep
@@ -1792,6 +1907,7 @@ SYSTEM_FUNCS="
     sysconf
     sysctl
     usleep
+    UTGetOSTypeFromString
     VirtualAlloc
     wglGetProcAddress
 "
@@ -1808,9 +1924,9 @@ TOOLCHAIN_FEATURES="
     gnu_as
     gnu_windres
     ibm_asm
+    inline_asm_direct_symbol_refs
     inline_asm_labels
     inline_asm_nonlocal_labels
-    inline_asm_direct_symbol_refs
     pragma_deprecated
     rsync_contimeout
     symver_asm_label
@@ -1844,6 +1960,7 @@ HAVE_LIST="
     $ARCH_FEATURES
     $ATOMICS_LIST
     $BUILTIN_LIST
+    $COMPLEX_FUNCS
     $HAVE_LIST_CMDLINE
     $HAVE_LIST_PUB
     $HEADERS_LIST
@@ -1855,8 +1972,8 @@ HAVE_LIST="
     $TYPES_LIST
     atomics_native
     dos_paths
-    dxva2api_cobj
     dxva2_lib
+    dxva2api_cobj
     libc_msvcrt
     libdc1394_1
     libdc1394_2
@@ -1870,6 +1987,7 @@ HAVE_LIST="
     threads
     vaapi_x11
     vdpau_x11
+    winrt
     xlib
 "
 
@@ -1882,15 +2000,16 @@ CONFIG_EXTRA="
     blockdsp
     bswapdsp
     cabac
+    dirac_parse
     dvprofile
     exif
     faandct
     faanidct
     fdctdsp
+    flacdsp
     fmtconvert
     frame_thread_encoder
-    gcrypt
-    gmp
+    g722dsp
     golomb
     gplv3
     h263dsp
@@ -1906,17 +2025,21 @@ CONFIG_EXTRA="
     iirfilter
     imdct15
     intrax8
+    ividsp
     jpegtables
     lgplv3
+    libx262
     llauddsp
     llviddsp
     lpc
+    lzf
     me_cmp
     mpeg_er
     mpegaudio
     mpegaudiodsp
     mpegvideo
     mpegvideoenc
+    mss34dsp
     pixblockdsp
     qpeldsp
     qsv
@@ -1927,12 +2050,19 @@ CONFIG_EXTRA="
     riffenc
     rtpdec
     rtpenc_chain
+    rv34dsp
     sinewin
+    snappy
     startcode
+    texturedsp
+    texturedspenc
     tpeldsp
     videodsp
     vp3dsp
+    vp56dsp
+    vp8dsp
     wma_freqs
+    wmv2dsp
 "
 
 CMDLINE_SELECT="
@@ -1971,8 +2101,10 @@ CMDLINE_SET="
     assert_level
     build_suffix
     cc
+    objcc
     cpu
     cross_prefix
+    custom_allocator
     cxx
     dep_cc
     doxygen
@@ -2014,6 +2146,7 @@ CMDLINE_SET="
 CMDLINE_APPEND="
     extra_cflags
     extra_cxxflags
+    extra_objcflags
     host_cppflags
 "
 
@@ -2034,13 +2167,15 @@ setend_deps="arm"
 map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
 
 mipsfpu_deps="mips"
-mipsdspr1_deps="mips"
+mipsdsp_deps="mips"
 mipsdspr2_deps="mips"
 mips32r2_deps="mips"
 mips32r5_deps="mips"
+mips32r6_deps="mips"
+mips64r2_deps="mips"
 mips64r6_deps="mips"
-msa_deps="mips"
-loongson3_deps="mips"
+msa_deps="mipsfpu"
+mmi_deps="mips"
 
 altivec_deps="ppc"
 dcbzl_deps="ppc"
@@ -2064,6 +2199,7 @@ sse3_deps="sse2"
 ssse3_deps="sse3"
 sse4_deps="ssse3"
 sse42_deps="sse4"
+aesni_deps="sse42"
 avx_deps="sse42"
 xop_deps="avx"
 fma3_deps="avx"
@@ -2101,30 +2237,34 @@ threads_if_any="$THREADS_LIST"
 
 # subsystems
 dct_select="rdft"
+dirac_parse_select="golomb"
 error_resilience_select="me_cmp"
 faandct_deps="faan fdctdsp"
 faanidct_deps="faan idctdsp"
 frame_thread_encoder_deps="encoders threads"
 intrax8_select="error_resilience"
 mdct_select="fft"
-rdft_select="fft"
 me_cmp_select="fdctdsp idctdsp pixblockdsp"
 mpeg_er_select="error_resilience"
 mpegaudio_select="mpegaudiodsp"
 mpegaudiodsp_select="dct"
-mpegvideo_select="blockdsp h264chroma hpeldsp idctdsp me_cmp videodsp"
+mpegvideo_select="blockdsp h264chroma hpeldsp idctdsp me_cmp mpeg_er videodsp"
 mpegvideoenc_select="me_cmp mpegvideo pixblockdsp qpeldsp"
 qsvdec_select="qsv"
 qsvenc_select="qsv"
+rdft_select="fft"
 
 # decoders / encoders
 aac_decoder_select="imdct15 mdct sinewin"
-aac_encoder_select="audio_frame_queue iirfilter mdct sinewin"
+aac_fixed_decoder_select="mdct sinewin"
+aac_encoder_select="audio_frame_queue iirfilter lpc mdct sinewin"
 aac_latm_decoder_select="aac_decoder aac_latm_parser"
 ac3_decoder_select="ac3_parser ac3dsp bswapdsp fmtconvert mdct"
 ac3_fixed_decoder_select="ac3_parser ac3dsp bswapdsp mdct"
 ac3_encoder_select="ac3dsp audiodsp mdct me_cmp"
 ac3_fixed_encoder_select="ac3dsp audiodsp mdct me_cmp"
+adpcm_g722_decoder_select="g722dsp"
+adpcm_g722_encoder_select="g722dsp"
 aic_decoder_select="golomb idctdsp"
 alac_encoder_select="lpc"
 als_decoder_select="bswapdsp"
@@ -2152,13 +2292,15 @@ comfortnoise_encoder_select="lpc"
 cook_decoder_select="audiodsp mdct sinewin"
 cscd_decoder_select="lzo"
 cscd_decoder_suggest="zlib"
-dca_decoder_select="fmtconvert mdct"
-dirac_decoder_select="dwt golomb videodsp mpegvideoenc"
+dca_decoder_select="mdct"
+dds_decoder_select="texturedsp"
+dirac_decoder_select="dirac_parse dwt golomb videodsp mpegvideoenc"
 dnxhd_decoder_select="blockdsp idctdsp"
 dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp"
 dvvideo_decoder_select="dvprofile idctdsp"
 dvvideo_encoder_select="dvprofile fdctdsp me_cmp pixblockdsp"
 dxa_decoder_select="zlib"
+dxv_decoder_select="lzf texturedsp"
 eac3_decoder_select="ac3_decoder"
 eac3_encoder_select="ac3_encoder"
 eamad_decoder_select="aandcttables blockdsp bswapdsp idctdsp mpegvideo"
@@ -2170,21 +2312,21 @@ ffv1_encoder_select="rangecoder"
 ffvhuff_decoder_select="huffyuv_decoder"
 ffvhuff_encoder_select="huffyuv_encoder"
 fic_decoder_select="golomb"
-flac_decoder_select="golomb"
-flac_encoder_select="bswapdsp golomb lpc"
+flac_decoder_select="flacdsp golomb"
+flac_encoder_select="bswapdsp flacdsp golomb lpc"
+flashsv2_decoder_select="zlib"
+flashsv2_encoder_select="zlib"
 flashsv_decoder_select="zlib"
 flashsv_encoder_select="zlib"
-flashsv2_encoder_select="zlib"
-flashsv2_decoder_select="zlib"
 flv_decoder_select="h263_decoder"
 flv_encoder_select="h263_encoder"
 fourxm_decoder_select="blockdsp bswapdsp"
 fraps_decoder_select="bswapdsp huffman"
 g2m_decoder_select="blockdsp idctdsp jpegtables zlib"
 g729_decoder_select="audiodsp"
-h261_decoder_select="mpeg_er mpegvideo"
+h261_decoder_select="mpegvideo"
 h261_encoder_select="aandcttables mpegvideoenc"
-h263_decoder_select="error_resilience h263_parser h263dsp mpeg_er mpegvideo qpeldsp"
+h263_decoder_select="h263_parser h263dsp mpegvideo qpeldsp"
 h263_encoder_select="aandcttables h263dsp mpegvideoenc"
 h263i_decoder_select="h263_decoder"
 h263p_decoder_select="h263_decoder"
@@ -2195,12 +2337,21 @@ h264_qsv_decoder_deps="libmfx"
 h264_qsv_decoder_select="h264_mp4toannexb_bsf h264_parser qsvdec h264_qsv_hwaccel"
 h264_qsv_encoder_deps="libmfx"
 h264_qsv_encoder_select="qsvenc"
+hap_decoder_select="snappy texturedsp"
+hap_encoder_deps="libsnappy"
+hap_encoder_select="texturedspenc"
 hevc_decoder_select="bswapdsp cabac golomb videodsp"
+hevc_qsv_decoder_deps="libmfx"
+hevc_qsv_decoder_select="hevc_mp4toannexb_bsf hevc_parser qsvdec hevc_qsv_hwaccel"
+hevc_qsv_encoder_deps="libmfx"
+hevc_qsv_encoder_select="qsvenc"
 huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
 huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llviddsp"
 iac_decoder_select="imc_decoder"
 imc_decoder_select="bswapdsp fft mdct sinewin"
 indeo3_decoder_select="hpeldsp"
+indeo4_decoder_select="ividsp"
+indeo5_decoder_select="ividsp"
 interplay_video_decoder_select="hpeldsp"
 jpegls_decoder_select="golomb mjpeg_decoder"
 jpegls_encoder_select="golomb"
@@ -2230,19 +2381,25 @@ mpc7_decoder_select="bswapdsp mpegaudiodsp"
 mpc8_decoder_select="mpegaudiodsp"
 mpeg_xvmc_decoder_deps="X11_extensions_XvMClib_h"
 mpeg_xvmc_decoder_select="mpeg2video_decoder"
-mpegvideo_decoder_select="error_resilience mpeg_er mpegvideo"
-mpeg1video_decoder_select="error_resilience mpeg_er mpegvideo"
+mpegvideo_decoder_select="mpegvideo"
+mpeg1video_decoder_select="mpegvideo"
 mpeg1video_encoder_select="aandcttables mpegvideoenc h263dsp"
-mpeg2video_decoder_select="error_resilience mpeg_er mpegvideo"
+mpeg2video_decoder_select="mpegvideo"
 mpeg2video_encoder_select="aandcttables mpegvideoenc h263dsp"
+mpeg2_qsv_decoder_deps="libmfx"
+mpeg2_qsv_decoder_select="qsvdec mpeg2_qsv_hwaccel"
+mpeg2_qsv_encoder_deps="libmfx"
+mpeg2_qsv_encoder_select="qsvenc"
 mpeg4_decoder_select="h263_decoder mpeg4video_parser"
 mpeg4_encoder_select="h263_encoder"
+msa1_decoder_select="mss34dsp"
 msmpeg4v1_decoder_select="h263_decoder"
 msmpeg4v2_decoder_select="h263_decoder"
 msmpeg4v2_encoder_select="h263_encoder"
 msmpeg4v3_decoder_select="h263_decoder"
 msmpeg4v3_encoder_select="h263_encoder"
-mss2_decoder_select="error_resilience mpeg_er qpeldsp vc1_decoder"
+mss2_decoder_select="vc1_decoder"
+mts2_decoder_select="mss34dsp"
 mxpeg_decoder_select="mjpeg_decoder"
 nellymoser_decoder_select="mdct sinewin"
 nellymoser_encoder_select="audio_frame_queue mdct sinewin"
@@ -2256,17 +2413,19 @@ prores_decoder_select="blockdsp idctdsp"
 prores_encoder_select="fdctdsp"
 qcelp_decoder_select="lsp"
 qdm2_decoder_select="mdct rdft mpegaudiodsp"
-ra_144_encoder_select="audio_frame_queue lpc audiodsp"
 ra_144_decoder_select="audiodsp"
+ra_144_encoder_select="audio_frame_queue lpc audiodsp"
 ralf_decoder_select="golomb"
 rawvideo_decoder_select="bswapdsp"
+rscc_decoder_select="zlib"
 rtjpeg_decoder_select="me_cmp"
-rv10_decoder_select="error_resilience h263_decoder h263dsp mpeg_er"
+rv10_decoder_select="h263_decoder"
 rv10_encoder_select="h263_encoder"
-rv20_decoder_select="error_resilience h263_decoder h263dsp mpeg_er"
+rv20_decoder_select="h263_decoder"
 rv20_encoder_select="h263_encoder"
-rv30_decoder_select="error_resilience golomb h264chroma h264pred h264qpel mpeg_er mpegvideo videodsp"
-rv40_decoder_select="error_resilience golomb h264chroma h264pred h264qpel mpeg_er mpegvideo videodsp"
+rv30_decoder_select="golomb h264pred h264qpel mpegvideo rv34dsp"
+rv40_decoder_select="golomb h264pred h264qpel mpegvideo rv34dsp"
+screenpresso_decoder_select="zlib"
 shorten_decoder_select="golomb"
 sipr_decoder_select="lsp"
 snow_decoder_select="dwt h264qpel hpeldsp me_cmp rangecoder videodsp"
@@ -2290,22 +2449,25 @@ truemotion2_decoder_select="bswapdsp"
 truespeech_decoder_select="bswapdsp"
 tscc_decoder_select="zlib"
 twinvq_decoder_select="mdct lsp sinewin"
+txd_decoder_select="texturedsp"
 utvideo_decoder_select="bswapdsp"
 utvideo_encoder_select="bswapdsp huffman huffyuvencdsp"
 vble_decoder_select="huffyuvdsp"
-vc1_decoder_select="blockdsp error_resilience h263_decoder h264chroma h264qpel intrax8 mpeg_er qpeldsp startcode"
+vc1_decoder_select="blockdsp h263_decoder h264qpel intrax8 qpeldsp startcode"
+vc1_qsv_decoder_deps="libmfx"
+vc1_qsv_decoder_select="qsvdec vc1_qsv_hwaccel"
 vc1image_decoder_select="vc1_decoder"
 vorbis_decoder_select="mdct"
 vorbis_encoder_select="mdct"
 vp3_decoder_select="hpeldsp vp3dsp videodsp"
-vp5_decoder_select="h264chroma hpeldsp videodsp vp3dsp"
-vp6_decoder_select="h264chroma hpeldsp huffman videodsp vp3dsp"
+vp5_decoder_select="h264chroma hpeldsp videodsp vp3dsp vp56dsp"
+vp6_decoder_select="h264chroma hpeldsp huffman videodsp vp3dsp vp56dsp"
 vp6a_decoder_select="vp6_decoder"
 vp6f_decoder_select="vp6_decoder"
-vp7_decoder_select="h264pred videodsp"
-vp8_decoder_select="h264pred videodsp"
+vp7_decoder_select="h264pred videodsp vp8dsp"
+vp8_decoder_select="h264pred videodsp vp8dsp"
 vp9_decoder_select="videodsp vp9_parser"
-webp_decoder_select="vp8_decoder"
+webp_decoder_select="vp8_decoder exif"
 wmalossless_decoder_select="llauddsp"
 wmapro_decoder_select="mdct sinewin wma_freqs"
 wmav1_decoder_select="mdct sinewin wma_freqs"
@@ -2315,10 +2477,12 @@ wmav2_encoder_select="mdct sinewin wma_freqs"
 wmavoice_decoder_select="lsp rdft dct mdct sinewin"
 wmv1_decoder_select="h263_decoder"
 wmv1_encoder_select="h263_encoder"
-wmv2_decoder_select="blockdsp h263_decoder idctdsp intrax8 videodsp"
-wmv2_encoder_select="h263_encoder"
+wmv2_decoder_select="blockdsp h263_decoder idctdsp intrax8 videodsp wmv2dsp"
+wmv2_encoder_select="h263_encoder wmv2dsp"
 wmv3_decoder_select="vc1_decoder"
 wmv3image_decoder_select="wmv3_decoder"
+xma1_decoder_select="wmapro_decoder"
+xma2_decoder_select="wmapro_decoder"
 zerocodec_decoder_select="zlib"
 zlib_decoder_select="zlib"
 zlib_encoder_select="zlib"
@@ -2327,27 +2491,28 @@ zmbv_encoder_select="zlib"
 
 # hardware accelerators
 crystalhd_deps="libcrystalhd_libcrystalhd_if_h"
-d3d11va_deps="d3d11_h dxva_h ID3D11VideoDecoder"
+d3d11va_deps="d3d11_h dxva_h ID3D11VideoDecoder ID3D11VideoContext"
 dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode"
 vaapi_deps="va_va_h"
 vda_deps="VideoDecodeAcceleration_VDADecoder_h pthreads"
 vda_extralibs="-framework CoreFoundation -framework VideoDecodeAcceleration -framework QuartzCore"
 vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h"
+videotoolbox_deps="VideoToolbox_VideoToolbox_h pthreads"
+videotoolbox_extralibs="-framework CoreFoundation -framework VideoToolbox -framework CoreMedia -framework QuartzCore -framework CoreVideo"
 xvmc_deps="X11_extensions_XvMClib_h"
 
 h263_vaapi_hwaccel_deps="vaapi"
 h263_vaapi_hwaccel_select="h263_decoder"
-h263_vdpau_hwaccel_deps="vdpau"
-h263_vdpau_hwaccel_select="h263_decoder"
+h263_videotoolbox_hwaccel_deps="videotoolbox"
+h263_videotoolbox_hwaccel_select="h263_decoder"
 h264_crystalhd_decoder_select="crystalhd h264_mp4toannexb_bsf h264_parser"
 h264_d3d11va_hwaccel_deps="d3d11va"
 h264_d3d11va_hwaccel_select="h264_decoder"
 h264_dxva2_hwaccel_deps="dxva2"
 h264_dxva2_hwaccel_select="h264_decoder"
 h264_mmal_decoder_deps="mmal"
+h264_mmal_decoder_select="mmal"
 h264_mmal_hwaccel_deps="mmal"
-h264_mmal_decoder_select="h264_decoder"
-h264_mmal_encoder_deps="mmal"
 h264_qsv_hwaccel_deps="libmfx"
 h264_vaapi_hwaccel_deps="vaapi"
 h264_vaapi_hwaccel_select="h264_decoder"
@@ -2361,10 +2526,17 @@ h264_vdpau_decoder_deps="vdpau"
 h264_vdpau_decoder_select="h264_decoder"
 h264_vdpau_hwaccel_deps="vdpau"
 h264_vdpau_hwaccel_select="h264_decoder"
+h264_videotoolbox_hwaccel_deps="videotoolbox"
+h264_videotoolbox_hwaccel_select="h264_decoder"
 hevc_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_HEVC"
 hevc_d3d11va_hwaccel_select="hevc_decoder"
 hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC"
 hevc_dxva2_hwaccel_select="hevc_decoder"
+hevc_qsv_hwaccel_deps="libmfx"
+hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC"
+hevc_vaapi_hwaccel_select="hevc_decoder"
+hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
+hevc_vdpau_hwaccel_select="hevc_decoder"
 mpeg_vdpau_decoder_deps="vdpau"
 mpeg_vdpau_decoder_select="mpeg2video_decoder"
 mpeg_xvmc_hwaccel_deps="xvmc"
@@ -2373,6 +2545,8 @@ mpeg1_vdpau_decoder_deps="vdpau"
 mpeg1_vdpau_decoder_select="mpeg1video_decoder"
 mpeg1_vdpau_hwaccel_deps="vdpau"
 mpeg1_vdpau_hwaccel_select="mpeg1video_decoder"
+mpeg1_videotoolbox_hwaccel_deps="videotoolbox"
+mpeg1_videotoolbox_hwaccel_select="mpeg1video_decoder"
 mpeg1_xvmc_hwaccel_deps="xvmc"
 mpeg1_xvmc_hwaccel_select="mpeg1video_decoder"
 mpeg2_crystalhd_decoder_select="crystalhd"
@@ -2380,31 +2554,54 @@ mpeg2_d3d11va_hwaccel_deps="d3d11va"
 mpeg2_d3d11va_hwaccel_select="mpeg2video_decoder"
 mpeg2_dxva2_hwaccel_deps="dxva2"
 mpeg2_dxva2_hwaccel_select="mpeg2video_decoder"
+mpeg2_mmal_decoder_deps="mmal"
+mpeg2_mmal_decoder_select="mmal"
+mpeg2_mmal_hwaccel_deps="mmal"
+mpeg2_qsv_hwaccel_deps="libmfx"
+mpeg2_qsv_hwaccel_select="qsvdec_mpeg2"
 mpeg2_vaapi_hwaccel_deps="vaapi"
 mpeg2_vaapi_hwaccel_select="mpeg2video_decoder"
 mpeg2_vdpau_hwaccel_deps="vdpau"
 mpeg2_vdpau_hwaccel_select="mpeg2video_decoder"
+mpeg2_videotoolbox_hwaccel_deps="videotoolbox"
+mpeg2_videotoolbox_hwaccel_select="mpeg2video_decoder"
 mpeg2_xvmc_hwaccel_deps="xvmc"
 mpeg2_xvmc_hwaccel_select="mpeg2video_decoder"
 mpeg4_crystalhd_decoder_select="crystalhd"
+mpeg4_mmal_decoder_deps="mmal"
+mpeg4_mmal_decoder_select="mmal"
+mpeg4_mmal_hwaccel_deps="mmal"
 mpeg4_vaapi_hwaccel_deps="vaapi"
 mpeg4_vaapi_hwaccel_select="mpeg4_decoder"
 mpeg4_vdpau_decoder_deps="vdpau"
 mpeg4_vdpau_decoder_select="mpeg4_decoder"
 mpeg4_vdpau_hwaccel_deps="vdpau"
 mpeg4_vdpau_hwaccel_select="mpeg4_decoder"
+mpeg4_videotoolbox_hwaccel_deps="videotoolbox"
+mpeg4_videotoolbox_hwaccel_select="mpeg4_decoder"
 msmpeg4_crystalhd_decoder_select="crystalhd"
 vc1_crystalhd_decoder_select="crystalhd"
 vc1_d3d11va_hwaccel_deps="d3d11va"
 vc1_d3d11va_hwaccel_select="vc1_decoder"
 vc1_dxva2_hwaccel_deps="dxva2"
 vc1_dxva2_hwaccel_select="vc1_decoder"
+vc1_mmal_decoder_deps="mmal"
+vc1_mmal_decoder_select="mmal"
+vc1_mmal_hwaccel_deps="mmal"
+vc1_qsv_hwaccel_deps="libmfx"
+vc1_qsv_hwaccel_select="qsvdec_vc1"
 vc1_vaapi_hwaccel_deps="vaapi"
 vc1_vaapi_hwaccel_select="vc1_decoder"
 vc1_vdpau_decoder_deps="vdpau"
 vc1_vdpau_decoder_select="vc1_decoder"
 vc1_vdpau_hwaccel_deps="vdpau"
 vc1_vdpau_hwaccel_select="vc1_decoder"
+vp9_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_VP9"
+vp9_d3d11va_hwaccel_select="vp9_decoder"
+vp9_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_VP9"
+vp9_dxva2_hwaccel_select="vp9_decoder"
+vp9_vaapi_hwaccel_deps="vaapi VADecPictureParameterBufferVP9"
+vp9_vaapi_hwaccel_select="vp9_decoder"
 wmv3_crystalhd_decoder_select="crystalhd"
 wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
 wmv3_dxva2_hwaccel_select="vc1_dxva2_hwaccel"
@@ -2414,16 +2611,16 @@ wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
 
 # parsers
 h264_parser_select="h264_decoder"
-hevc_parser_select="hevc_decoder"
+hevc_parser_select="golomb"
 mpegvideo_parser_select="mpegvideo"
-mpeg4video_parser_select="error_resilience h263dsp mpeg_er mpegvideo qpeldsp"
+mpeg4video_parser_select="h263dsp mpegvideo qpeldsp"
 vc1_parser_select="mpegvideo startcode vc1_decoder"
 
 # bitstream_filters
 mjpeg2jpeg_bsf_select="jpegtables"
 
 # external libraries
-libaacplus_encoder_deps="libaacplus"
+chromaprint_muxer_deps="chromaprint"
 libcelt_decoder_deps="libcelt"
 libdcadec_decoder_deps="libdcadec"
 libfaac_encoder_deps="libfaac"
@@ -2438,6 +2635,7 @@ libgsm_ms_decoder_deps="libgsm"
 libgsm_ms_encoder_deps="libgsm"
 libilbc_decoder_deps="libilbc"
 libilbc_encoder_deps="libilbc"
+libkvazaar_encoder_deps="libkvazaar"
 libmodplug_demuxer_deps="libmodplug"
 libmp3lame_encoder_deps="libmp3lame"
 libmp3lame_encoder_select="audio_frame_queue"
@@ -2451,7 +2649,6 @@ libopenjpeg_encoder_deps="libopenjpeg"
 libopus_decoder_deps="libopus"
 libopus_encoder_deps="libopus"
 libopus_encoder_select="audio_frame_queue"
-libquvi_demuxer_deps="libquvi"
 libschroedinger_decoder_deps="libschroedinger"
 libschroedinger_encoder_deps="libschroedinger"
 libshine_encoder_deps="libshine"
@@ -2459,11 +2656,10 @@ libshine_encoder_select="audio_frame_queue"
 libspeex_decoder_deps="libspeex"
 libspeex_encoder_deps="libspeex"
 libspeex_encoder_select="audio_frame_queue"
-libstagefright_h264_decoder_deps="libstagefright_h264"
 libtheora_encoder_deps="libtheora"
 libtwolame_encoder_deps="libtwolame"
-libvo_aacenc_encoder_deps="libvo_aacenc"
-libvo_aacenc_encoder_select="audio_frame_queue"
+libutvideo_decoder_deps="libutvideo"
+libutvideo_encoder_deps="libutvideo"
 libvo_amrwbenc_encoder_deps="libvo_amrwbenc"
 libvorbis_decoder_deps="libvorbis"
 libvorbis_encoder_deps="libvorbis"
@@ -2475,14 +2671,13 @@ libvpx_vp9_encoder_deps="libvpx"
 libwavpack_encoder_deps="libwavpack"
 libwebp_encoder_deps="libwebp"
 libwebp_anim_encoder_deps="libwebp"
+libx262_encoder_deps="libx262"
 libx264_encoder_deps="libx264"
 libx264rgb_encoder_deps="libx264"
 libx264rgb_encoder_select="libx264_encoder"
 libx265_encoder_deps="libx265"
 libxavs_encoder_deps="libxavs"
 libxvid_encoder_deps="libxvid"
-libutvideo_decoder_deps="libutvideo"
-libutvideo_encoder_deps="libutvideo"
 libzvbi_teletext_decoder_deps="libzvbi"
 nvenc_encoder_deps="nvenc"
 nvenc_h264_encoder_deps="nvenc"
@@ -2491,6 +2686,7 @@ nvenc_hevc_encoder_deps="nvenc"
 # demuxers / muxers
 ac3_demuxer_select="ac3_parser"
 asf_demuxer_select="riffdec"
+asf_o_demuxer_select="riffdec"
 asf_muxer_select="riffenc"
 asf_stream_muxer_select="asf_muxer"
 avi_demuxer_select="riffdec exif"
@@ -2533,7 +2729,7 @@ mxf_opatom_muxer_select="mxf_muxer"
 nut_muxer_select="riffenc"
 nuv_demuxer_select="riffdec"
 oga_muxer_select="ogg_muxer"
-ogg_demuxer_select="golomb"
+ogg_demuxer_select="dirac_parse"
 opus_muxer_select="ogg_muxer"
 psp_muxer_select="mov_muxer"
 rtp_demuxer_select="sdp_demuxer"
@@ -2546,6 +2742,7 @@ sdp_demuxer_select="rtpdec"
 smoothstreaming_muxer_select="ismv_muxer"
 spdif_muxer_select="aac_parser"
 spx_muxer_select="ogg_muxer"
+swf_demuxer_suggest="zlib"
 tak_demuxer_select="tak_parser"
 tg2_muxer_select="mov_muxer"
 tgp_muxer_select="mov_muxer"
@@ -2567,10 +2764,10 @@ avfoundation_indev_extralibs="-framework CoreVideo -framework Foundation -framew
 avfoundation_indev_select="avfoundation"
 bktr_indev_deps_any="dev_bktr_ioctl_bt848_h machine_ioctl_bt848_h dev_video_bktr_ioctl_bt848_h dev_ic_bt8xx_h"
 caca_outdev_deps="libcaca"
-decklink_outdev_deps="decklink pthreads"
-decklink_outdev_extralibs="-lstdc++"
 decklink_indev_deps="decklink pthreads"
 decklink_indev_extralibs="-lstdc++"
+decklink_outdev_deps="decklink pthreads"
+decklink_outdev_extralibs="-lstdc++"
 dshow_indev_deps="IBaseFilter"
 dshow_indev_extralibs="-lpsapi -lole32 -lstrmiids -luuid -loleaut32 -lshlwapi"
 dv1394_indev_deps="dv1394"
@@ -2602,12 +2799,13 @@ v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h"
 v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h"
 vfwcap_indev_deps="capCreateCaptureWindow vfwcap_defines"
 vfwcap_indev_extralibs="-lavicap32"
-xv_outdev_deps="X11_extensions_Xvlib_h XvGetPortAttribute"
-xv_outdev_extralibs="-lXv -lX11 -lXext"
 x11grab_indev_deps="x11grab"
 x11grab_xcb_indev_deps="libxcb"
+xv_outdev_deps="X11_extensions_Xvlib_h XvGetPortAttribute"
+xv_outdev_extralibs="-lXv -lX11 -lXext"
 
 # protocols
+async_protocol_deps="threads"
 bluray_protocol_deps="libbluray"
 ffrtmpcrypt_protocol_deps="!librtmp_protocol"
 ffrtmpcrypt_protocol_deps_any="gcrypt gmp openssl"
@@ -2642,19 +2840,23 @@ sctp_protocol_deps="struct_sctp_event_subscribe"
 sctp_protocol_select="network"
 srtp_protocol_select="rtp_protocol"
 tcp_protocol_select="network"
-tls_gnutls_protocol_deps="gnutls !tls_securetransport_protocol"
+tls_gnutls_protocol_deps="gnutls !tls_schannel_protocol !tls_securetransport_protocol"
 tls_gnutls_protocol_select="tcp_protocol"
-tls_openssl_protocol_deps="openssl !tls_securetransport_protocol !tls_gnutls_protocol"
+tls_openssl_protocol_deps="openssl !tls_schannel_protocol !tls_securetransport_protocol !tls_gnutls_protocol"
 tls_openssl_protocol_select="tcp_protocol"
+tls_schannel_protocol_deps="schannel"
+tls_schannel_protocol_select="tcp_protocol"
 tls_securetransport_protocol_deps="securetransport"
 tls_securetransport_protocol_select="tcp_protocol"
-tls_protocol_deps_any="tls_securetransport_protocol tls_gnutls_protocol tls_openssl_protocol"
+tls_protocol_deps_any="tls_schannel_protocol tls_securetransport_protocol tls_gnutls_protocol tls_openssl_protocol"
 udp_protocol_select="network"
 udplite_protocol_select="network"
 unix_protocol_deps="sys_un_h"
 unix_protocol_select="network"
 
 # filters
+afftfilt_filter_deps="avcodec"
+afftfilt_filter_select="fft"
 amovie_filter_deps="avcodec avformat"
 aresample_filter_deps="swresample"
 ass_filter_deps="libass"
@@ -2675,8 +2877,8 @@ ebur128_filter_deps="gpl"
 eq_filter_deps="gpl"
 fftfilt_filter_deps="avcodec"
 fftfilt_filter_select="rdft"
-flite_filter_deps="libflite"
 find_rect_filter_deps="avcodec avformat gpl"
+flite_filter_deps="libflite"
 frei0r_filter_deps="frei0r dlopen"
 frei0r_src_filter_deps="frei0r dlopen"
 fspp_filter_deps="gpl"
@@ -2692,44 +2894,57 @@ mpdecimate_filter_deps="gpl"
 mpdecimate_filter_select="pixelutils"
 mptestsrc_filter_deps="gpl"
 negate_filter_deps="lut_filter"
-perspective_filter_deps="gpl"
-pp7_filter_deps="gpl"
+nnedi_filter_deps="gpl"
+ocr_filter_deps="libtesseract"
 ocv_filter_deps="libopencv"
 owdenoise_filter_deps="gpl"
 pan_filter_deps="swresample"
+perspective_filter_deps="gpl"
 phase_filter_deps="gpl"
+pp7_filter_deps="gpl"
 pp_filter_deps="gpl postproc"
 pullup_filter_deps="gpl"
 removelogo_filter_deps="avcodec avformat swscale"
 repeatfields_filter_deps="gpl"
 resample_filter_deps="avresample"
+rubberband_filter_deps="librubberband"
 sab_filter_deps="gpl swscale"
+scale2ref_filter_deps="swscale"
 scale_filter_deps="swscale"
 select_filter_select="pixelutils"
-smartblur_filter_deps="gpl swscale"
-showcqt_filter_deps="avcodec"
+showcqt_filter_deps="avcodec avformat swscale"
 showcqt_filter_select="fft"
+showfreqs_filter_deps="avcodec"
+showfreqs_filter_select="fft"
 showspectrum_filter_deps="avcodec"
-showspectrum_filter_select="rdft"
+showspectrum_filter_select="fft"
+showspectrumpic_filter_deps="avcodec"
+showspectrumpic_filter_select="fft"
+smartblur_filter_deps="gpl swscale"
+sofalizer_filter_deps="netcdf avcodec"
+sofalizer_filter_select="fft"
+spectrumsynth_filter_deps="avcodec"
+spectrumsynth_filter_select="fft"
 spp_filter_deps="gpl avcodec"
 spp_filter_select="fft idctdsp fdctdsp me_cmp pixblockdsp"
 stereo3d_filter_deps="gpl"
 subtitles_filter_deps="avformat avcodec libass"
 super2xsai_filter_deps="gpl"
-tinterlace_filter_deps="gpl"
-vidstabdetect_filter_deps="libvidstab"
-vidstabtransform_filter_deps="libvidstab"
 pixfmts_super2xsai_test_deps="super2xsai_filter"
+tinterlace_filter_deps="gpl"
 tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 uspp_filter_deps="gpl avcodec"
+vidstabdetect_filter_deps="libvidstab"
+vidstabtransform_filter_deps="libvidstab"
 zmq_filter_deps="libzmq"
 zoompan_filter_deps="swscale"
+zscale_filter_deps="libzimg"
 
 # examples
-avio_reading="avformat avcodec avutil"
-avio_list_dir="avformat avutil"
 avcodec_example_deps="avcodec avutil"
+avio_dir_cmd="avformat avutil"
+avio_reading="avformat avcodec avutil"
 decoding_encoding_example_deps="avcodec avformat avutil"
 demuxing_decoding_example_deps="avcodec avformat avutil"
 extract_mvs_example_deps="avcodec avformat avutil"
@@ -2800,11 +3015,7 @@ ln_s="ln -s -f"
 nm_default="nm -g"
 objformat="elf"
 pkg_config_default=pkg-config
-if ranlib 2>&1 | grep -q "\-D "; then
-    ranlib_default="ranlib -D"
-else
-    ranlib_default="ranlib"
-fi
+ranlib_default="ranlib"
 strip_default="strip"
 yasmexe_default="yasm"
 windres_default="windres"
@@ -2845,7 +3056,7 @@ sws_max_filter_size_default=256
 set_default sws_max_filter_size
 
 # Enable hwaccels by default.
-enable d3d11va dxva2 vaapi vda vdpau xvmc
+enable d3d11va dxva2 vaapi vda vdpau videotoolbox xvmc
 enable xlib
 
 # build settings
@@ -2874,6 +3085,9 @@ CC_E='-E -o $@'
 CC_O='-o $@'
 CXX_C='-c'
 CXX_O='-o $@'
+OBJCC_C='-c'
+OBJCC_E='-E -o $@'
+OBJCC_O='-o $@'
 LD_O='-o $@'
 LD_LIB='-l%'
 LD_PATH='-L'
@@ -2890,7 +3104,7 @@ target_path='$(CURDIR)'
 
 # since the object filename is not given with the -MM flag, the compiler
 # is only able to print the basename, and we must add the path ourselves
-DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(*F)\\.o,$(@D)/$(*F).o," > $(@:.o=.d)'
+DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< 2>/dev/null | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(@F),$(@D)/$(@F)," > $(@:.o=.d)'
 DEPFLAGS='-MM'
 
 # find source path
@@ -2898,8 +3112,9 @@ if test -f configure; then
     source_path=.
 else
     source_path=$(cd $(dirname "$0"); pwd)
-    echo "$source_path" | grep -q '[[:blank:]]' &&
-        die "Out of tree builds are impossible with whitespace in source path."
+    case "$source_path" in
+        *[[:blank:]]*) die "Out of tree builds are impossible with whitespace in source path." ;;
+    esac
     test -e "$source_path/config.h" &&
         die "Out of tree builds are impossible with config.h in source dir."
 fi
@@ -2958,14 +3173,15 @@ die_unknown(){
     exit 1
 }
 
-print_3_columns() {
-    cat | tr ' ' '\n' | sort | pr -r -3 -t
+print_in_columns() {
+    cols=$(expr $ncols / 24)
+    cat | tr ' ' '\n' | sort | pr -r "-$cols" -w $ncols -t
 }
 
 show_list() {
     suffix=_$1
     shift
-    echo $* | sed s/$suffix//g | print_3_columns
+    echo $* | sed s/$suffix//g | print_in_columns
     exit 0
 }
 
@@ -2999,6 +3215,9 @@ for opt do
         --extra-ldexeflags=*)
             add_ldexeflags $optval
         ;;
+        --extra-ldlibflags=*)
+            add_ldlibflags $optval
+        ;;
         --extra-libs=*)
             add_extralibs $optval
         ;;
@@ -3017,6 +3236,7 @@ for opt do
         --disable-all)
             map 'eval unset \${$(toupper ${v%s})_LIST}' $COMPONENT_LIST
             disable $LIBRARY_LIST $PROGRAM_LIST doc
+            enable avutil
         ;;
         --enable-random|--disable-random)
             action=${opt%%-random}
@@ -3077,6 +3297,18 @@ done
 
 disabled logging && logfile=/dev/null
 
+# Disable all the library-specific components if the library itself
+# is disabled, see AVCODEC_LIST and following _LIST variables.
+
+disable_components(){
+    disabled ${1} && disable $(
+        eval components="\$$(toupper ${1})_COMPONENTS"
+        map 'eval echo \${$(toupper ${v%s})_LIST}' $components
+    )
+}
+
+map 'disable_components $v' $LIBRARY_LIST
+
 echo "# $0 $FFMPEG_CONFIGURATION" > $logfile
 set >> $logfile
 
@@ -3134,9 +3366,14 @@ case "$toolchain" in
         else
             cc_default="c99wrap cl"
         fi
-        ld_default="link"
+        ld_default="$source_path/compat/windows/mslink"
         nm_default="dumpbin -symbols"
         ar_default="lib"
+        case "$arch" in
+        arm*)
+            as_default="armasm"
+            ;;
+        esac
         target_os_default="win32"
         # Use a relative path for TMPDIR. This makes sure all the
         # ffconf temp files are written with a relative path, avoiding
@@ -3178,7 +3415,11 @@ cc_default="${cross_prefix}${cc_default}"
 cxx_default="${cross_prefix}${cxx_default}"
 nm_default="${cross_prefix}${nm_default}"
 pkg_config_default="${cross_prefix}${pkg_config_default}"
-ranlib_default="${cross_prefix}${ranlib_default}"
+if ${cross_prefix}${ranlib_default} 2>&1 | grep -q "\-D "; then
+    ranlib_default="${cross_prefix}${ranlib_default} -D"
+else
+    ranlib_default="${cross_prefix}${ranlib_default}"
+fi
 strip_default="${cross_prefix}${strip_default}"
 windres_default="${cross_prefix}${windres_default}"
 
@@ -3205,7 +3446,7 @@ fi
 
 exesuf() {
     case $1 in
-        mingw32*|win32|win64|cygwin*|*-dos|freedos|opendos|os/2*|symbian) echo .exe ;;
+        mingw32*|mingw64*|win32|win64|cygwin*|*-dos|freedos|opendos|os/2*|symbian) echo .exe ;;
     esac
 }
 
@@ -3328,7 +3569,9 @@ msvc_common_flags(){
             -lz)                  echo zlib.lib ;;
             -lavifil32)           echo vfw32.lib ;;
             -lavicap32)           echo vfw32.lib user32.lib ;;
+            -lx264)               echo libx264.lib ;;
             -l*)                  echo ${flag#-l}.lib ;;
+            -LARGEADDRESSAWARE)   echo $flag ;;
             -L*)                  echo -libpath:${flag#-L} ;;
             *)                    echo $flag ;;
         esac
@@ -3457,6 +3700,7 @@ tms470_flags(){
 probe_cc(){
     pfx=$1
     _cc=$2
+    first=$3
 
     unset _type _ident _cc_c _cc_e _cc_o _flags _cflags
     unset _ld_o _ldflags _ld_lib _ld_path
@@ -3467,8 +3711,8 @@ probe_cc(){
         true # no-op to avoid reading stdin in following checks
     elif $_cc -v 2>&1 | grep -q '^gcc.*LLVM'; then
         _type=llvm_gcc
-        gcc_extra_ver=$(expr "$($_cc --version | head -n1)" : '.*\((.*)\)')
-        _ident="llvm-gcc $($_cc -dumpversion) $gcc_extra_ver"
+        gcc_extra_ver=$(expr "$($_cc --version 2>/dev/null | head -n1)" : '.*\((.*)\)')
+        _ident="llvm-gcc $($_cc -dumpversion 2>/dev/null) $gcc_extra_ver"
         _depflags='-MMD -MF $(@:.o=.d) -MT $@'
         _cflags_speed='-O3'
         _cflags_size='-Os'
@@ -3479,8 +3723,16 @@ probe_cc(){
         gcc_pkg_ver=$(expr "$gcc_version" : '[^ ]* \(([^)]*)\)')
         gcc_ext_ver=$(expr "$gcc_version" : ".*$gcc_pkg_ver $gcc_basever \\(.*\\)")
         _ident=$(cleanws "gcc $gcc_basever $gcc_pkg_ver $gcc_ext_ver")
-        if ! $_cc -dumpversion | grep -q '^2\.'; then
-            _depflags='-MMD -MF $(@:.o=.d) -MT $@'
+        case $gcc_basever in
+            2) ;;
+            2.*) ;;
+            *) _depflags='-MMD -MF $(@:.o=.d) -MT $@' ;;
+        esac
+        if [ "$first" = true ]; then
+            case $gcc_basever in
+                4.2*)
+                warn "gcc 4.2 is outdated and may miscompile FFmpeg. Please use a newer compiler." ;;
+            esac
         fi
         _cflags_speed='-O3'
         _cflags_size='-Os'
@@ -3532,7 +3784,7 @@ probe_cc(){
         _flags_filter=tms470_flags
     elif $_cc -v 2>&1 | grep -q clang; then
         _type=clang
-        _ident=$($_cc --version | head -n1)
+        _ident=$($_cc --version 2>/dev/null | head -n1)
         _depflags='-MMD -MF $(@:.o=.d) -MT $@'
         _cflags_speed='-O3'
         _cflags_size='-Os'
@@ -3595,16 +3847,16 @@ probe_cc(){
         _flags='-nologo -Qdiag-error:4044,10157'
         # -Qvec- -Qsimd- to prevent miscompilation, -GS, fp:precise for consistency
         # with MSVC which enables it by default.
-        _cflags='-D_USE_MATH_DEFINES -FIstdlib.h -Dstrtoll=_strtoi64 -Qms0 -Qvec- -Qsimd- -GS -fp:precise'
+        _cflags='-D_USE_MATH_DEFINES -Qms0 -Qvec- -Qsimd- -GS -fp:precise'
         disable stripping
-    elif $_cc 2>&1 | grep -q Microsoft; then
+    elif $_cc -nologo- 2>&1 | grep -q Microsoft; then
         _type=msvc
         _ident=$($_cc 2>&1 | head -n1)
         _DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< 2>&1 | awk '\''/including/ { sub(/^.*file: */, ""); gsub(/\\/, "/"); if (!match($$0, / /)) print "$@:", $$0 }'\'' > $(@:.o=.d)'
         _DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -showIncludes -Zs'
         _cflags_speed="-O2"
         _cflags_size="-O1"
-        if $_cc 2>&1 | grep -q Linker; then
+        if $_cc -nologo- 2>&1 | grep -q Linker; then
             _ld_o='-out:$@'
         else
             _ld_o='-Fe$@'
@@ -3615,7 +3867,7 @@ probe_cc(){
         _ld_lib='lib%.a'
         _ld_path='-libpath:'
         _flags='-nologo'
-        _cflags='-D_USE_MATH_DEFINES -D_CRT_SECURE_NO_WARNINGS -Dinline=__inline -FIstdlib.h -Dstrtoll=_strtoi64'
+        _cflags='-D_USE_MATH_DEFINES -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS'
         disable stripping
     elif $_cc --version 2>/dev/null | grep -q ^cparser; then
         _type=cparser
@@ -3644,7 +3896,7 @@ set_ccvars(){
     fi
 }
 
-probe_cc cc "$cc"
+probe_cc cc "$cc" "true"
 cflags_filter=$_flags_filter
 cflags_speed=$_cflags_speed
 cflags_size=$_cflags_size
@@ -3662,16 +3914,22 @@ test -n "$cc_type" && enable $cc_type ||
     warn "Unknown C compiler $cc, unable to select optimal CFLAGS"
 
 : ${as_default:=$cc}
+: ${objcc_default:=$cc}
 : ${dep_cc_default:=$cc}
 : ${ld_default:=$cc}
 : ${host_ld_default:=$host_cc}
-set_default ar as dep_cc ld host_ld windres
+set_default ar as objcc dep_cc ld host_ld windres
 
 probe_cc as "$as"
 asflags_filter=$_flags_filter
 add_asflags $_flags $_cflags
 set_ccvars AS
 
+probe_cc objcc "$objcc"
+objcflags_filter=$_flags_filter
+add_objcflags $_flags $_cflags
+set_ccvars OBJC
+
 probe_cc ld "$ld"
 ldflags_filter=$_flags_filter
 add_ldflags $_flags $_ldflags
@@ -3711,6 +3969,7 @@ fi
 
 add_cflags $extra_cflags
 add_cxxflags $extra_cxxflags
+add_objcflags $extra_objcflags
 add_asflags $extra_cflags
 
 if test -n "$sysroot"; then
@@ -3745,6 +4004,17 @@ if test "$cpu" = host; then
             }
             cpu=$(check_native -march || check_native -mcpu)
         ;;
+        clang)
+            check_native(){
+                $cc $1=native -v -c -o $TMPO $TMPC >$TMPE 2>&1 || return
+                sed -n "/cc1.*-target-cpu /{
+                            s/.*-target-cpu \\([^ ]*\\).*/\\1/
+                            p
+                            q
+                        }" $TMPE
+            }
+            cpu=$(check_native -march)
+        ;;
     esac
 
     test "${cpu:-host}" = host &&
@@ -3760,6 +4030,16 @@ case "$arch" in
         arch="arm"
     ;;
     mips*|IP*)
+        case "$arch" in
+        *el)
+            add_cppflags -EL
+            add_ldflags -EL
+        ;;
+        *eb)
+            add_cppflags -EB
+            add_ldflags -EB
+        ;;
+        esac
         arch="mips"
     ;;
     parisc*|hppa*)
@@ -3894,85 +4174,90 @@ elif enabled mips; then
 
     cpuflags="-march=$cpu"
 
-    case $cpu in
-        24kc)
-            disable mips32r5
-            disable mips64r6
-            disable mipsfpu
-            disable mipsdspr1
-            disable mipsdspr2
-            disable msa
-        ;;
-        24kf*)
-            disable mips32r5
-            disable mips64r6
-            disable mipsdspr1
-            disable mipsdspr2
-            disable msa
-        ;;
-        24kec|34kc|1004kc)
-            disable mips32r5
-            disable mips64r6
-            disable mipsfpu
-            disable mipsdspr2
-            disable msa
-        ;;
-        24kef*|34kf*|1004kf*)
-            disable mips32r5
-            disable mips64r6
-            disable mipsdspr2
-            disable msa
-        ;;
-        74kc)
-            disable mips32r5
-            disable mips64r6
-            disable mipsfpu
-            disable msa
-        ;;
-        74kf)
-            disable mips32r5
-            disable mips64r6
-            disable msa
-        ;;
-        p5600)
-            disable mips64r6
-            disable mipsdspr1
-            disable mipsdspr2
+    if [ "$cpu" != "generic" ]; then
+        disable mips32r2
+        disable mips32r5
+        disable mips64r2
+        disable mips32r6
+        disable mips64r6
+        disable loongson2
+        disable loongson3
+
+        case $cpu in
+            24kc|24kf*|24kec|34kc|1004kc|24kef*|34kf*|1004kf*|74kc|74kf)
+                enable mips32r2
+                disable msa
+            ;;
+            p5600|i6400)
+                disable mipsdsp
+                disable mipsdspr2
+            ;;
+            loongson*)
+                enable loongson2
+                enable loongson3
+                enable local_aligned_8 local_aligned_16 local_aligned_32
+                enable simd_align_16
+                enable fast_64bit
+                enable fast_clz
+                enable fast_cmov
+                enable fast_unaligned
+                disable aligned_stack
+                case $cpu in
+                    loongson3*)
+                        cpuflags="-march=loongson3a -mhard-float -fno-expensive-optimizations"
+                    ;;
+                    loongson2e)
+                        cpuflags="-march=loongson2e -mhard-float -fno-expensive-optimizations"
+                    ;;
+                    loongson2f)
+                        cpuflags="-march=loongson2f -mhard-float -fno-expensive-optimizations"
+                    ;;
+                esac
+            ;;
+            *)
+                # Unknown CPU. Disable everything.
+                warn "unknown CPU. Disabling all MIPS optimizations."
+                disable mipsfpu
+                disable mipsdsp
+                disable mipsdspr2
+                disable msa
+                disable mmi
+            ;;
+        esac
 
-            check_cflags "-mtune=p5600"
-        ;;
-        i6400)
-            disable mips32r5
-            disable mipsdspr1
-            disable mipsdspr2
-            disable mipsfpu
-
-            check_cflags "-mtune=i6400 -mabi=64"
-            check_ldflags "-mabi=64"
-        ;;
-        loongson3*)
-            disable mipsfpu
-            disable mips32r2
-            disable mips32r5
-            disable mips64r6
-            disable mipsdspr1
-            disable mipsdspr2
-            disable msa
-            enable local_aligned_8 local_aligned_16
-            enable simd_align_16
-            enable fast_64bit
-            enable fast_clz
-            enable fast_cmov
-            enable fast_unaligned
-            disable aligned_stack
-            cpuflags="-march=loongson3a -mhard-float"
-        ;;
-        generic)
-            disable mips32r5
-            disable mips64r6
-            disable msa
-        ;;
-    esac
+        case $cpu in
+            24kc)
+                disable mipsfpu
+                disable mipsdsp
+                disable mipsdspr2
+            ;;
+            24kf*)
+                disable mipsdsp
+                disable mipsdspr2
+            ;;
+            24kec|34kc|1004kc)
+                disable mipsfpu
+                disable mipsdspr2
+            ;;
+            24kef*|34kf*|1004kf*)
+                disable mipsdspr2
+            ;;
+            74kc)
+                disable mipsfpu
+            ;;
+            p5600)
+                enable mips32r5
+                check_cflags "-mtune=p5600" && check_cflags "-msched-weight -mload-store-pairs -funroll-loops"
+            ;;
+            i6400)
+                enable mips64r6
+                check_cflags "-mtune=i6400 -mabi=64" && check_cflags "-msched-weight -mload-store-pairs -funroll-loops" && check_ldflags "-mabi=64"
+            ;;
+        esac
+    else
+        # We do not disable anything. Is up to the user to disable the unwanted features.
+        warn 'generic cpu selected'
+    fi
 
 elif enabled ppc; then
 
@@ -4079,6 +4364,7 @@ fi
 if [ "$cpu" != generic ]; then
     add_cflags  $cpuflags
     add_asflags $cpuflags
+    test "$cc_type" = "$ld_type" && add_ldflags $cpuflags
 fi
 
 # compiler sanity check
@@ -4172,8 +4458,7 @@ case $target_os in
         enable section_data_rel_ro
         SLIB_INSTALL_NAME='$(SLIBNAME)'
         SLIB_INSTALL_LINKS=
-        # soname not set on purpose
-        SHFLAGS=-shared
+        SHFLAGS='-shared -Wl,-soname,$(SLIBNAME)'
         ;;
     haiku)
         prefix_default="/boot/common"
@@ -4194,6 +4479,7 @@ case $target_os in
             echo "hwcap_1 = OVERRIDE;" > mapfile &&
             add_ldflags -Wl,-M,mapfile
         nm_default='nm -P -g'
+        SLIB_CREATE_DEF_CMD='$(Q)perl $(SRC_PATH)/compat/solaris/make_sunver.pl $$(filter %.ver,$$^) $(OBJS) | grep -v @ > $(SUBDIR)lib$(NAME).ver-sol2'
         ;;
     netbsd)
         disable symver
@@ -4232,12 +4518,17 @@ case $target_os in
         enabled_any pic shared x86_64 ||
             { check_cflags -mdynamic-no-pic && add_asflags -mdynamic-no-pic; }
         ;;
-    mingw32*)
+    msys*)
+        die "Native MSYS builds are discouraged, please use the MINGW environment.";
+        ;;
+    mingw32*|mingw64*)
         if test $target_os = "mingw32ce"; then
             disable network
         else
             target_os=mingw32
         fi
+        decklink_outdev_extralibs="$decklink_outdev_extralibs -lole32 -loleaut32"
+        decklink_indev_extralibs="$decklink_indev_extralibs -lole32 -loleaut32"
         LIBTARGET=i386
         if enabled x86_64; then
             LIBTARGET="i386:x86-64"
@@ -4245,8 +4536,6 @@ case $target_os in
             LIBTARGET=arm-wince
         fi
         enabled shared && ! enabled small && check_cmd $windres --version && enable gnu_windres
-        check_ldflags -Wl,--nxcompat
-        check_ldflags -Wl,--dynamicbase
         enabled x86_32 && check_ldflags -Wl,--large-address-aware
         shlibdir_default="$bindir_default"
         SLIBPREF=""
@@ -4266,10 +4555,25 @@ case $target_os in
         SLIB_INSTALL_LINKS=
         SLIB_INSTALL_EXTRA_SHLIB='$(SLIBNAME:$(SLIBSUF)=.lib)'
         SLIB_INSTALL_EXTRA_LIB='lib$(SLIBNAME:$(SLIBSUF)=.dll.a) $(SLIBNAME_WITH_MAJOR:$(SLIBSUF)=.def)'
-        SHFLAGS='-shared -Wl,--output-def,$$(@:$(SLIBSUF)=.orig.def) -Wl,--out-implib,$(SUBDIR)lib$(SLIBNAME:$(SLIBSUF)=.dll.a) -Wl,--enable-runtime-pseudo-reloc -Wl,--enable-auto-image-base'
+        SHFLAGS='-shared -Wl,--output-def,$$(@:$(SLIBSUF)=.orig.def) -Wl,--out-implib,$(SUBDIR)lib$(SLIBNAME:$(SLIBSUF)=.dll.a) -Wl,--enable-runtime-pseudo-reloc -Wl,--disable-auto-image-base'
         objformat="win32"
         ranlib=:
         enable dos_paths
+        check_ldflags -Wl,--nxcompat,--dynamicbase
+        # Lets work around some stupidity in binutils.
+        # ld will strip relocations from executables even though we need them
+        # for dynamicbase (ASLR).  Using -pie does retain the reloc section
+        # however ld then forgets what the entry point should be (oops) so we
+        # have to manually (re)set it.
+        if enabled x86_32; then
+            add_ldexeflags -Wl,--pic-executable,-e,_mainCRTStartup
+        elif enabled x86_64; then
+            add_ldexeflags -Wl,--pic-executable,-e,mainCRTStartup
+            check_ldflags -Wl,--high-entropy-va # binutils 2.25
+            # Set image base >4GB for extra entropy with HEASLR
+            add_ldexeflags -Wl,--image-base,0x140000000
+            append SHFLAGS -Wl,--image-base,0x180000000
+        fi
         ;;
     win32|win64)
         disable symver
@@ -4393,6 +4697,26 @@ case $target_os in
         ;;
 esac
 
+# test if creating links works
+link_dest=$(mktemp -u $TMPDIR/dest_XXXXXXXX)
+link_name=$(mktemp -u $TMPDIR/name_XXXXXXXX)
+mkdir "$link_dest"
+$ln_s "$link_dest" "$link_name"
+touch "$link_dest/test_file"
+if [ "$source_path" != "." ] && ([ ! -d src ] || [ -L src ]) && [ -e "$link_name/test_file" ]; then
+    # create link to source path
+    [ -e src ] && rm src
+    $ln_s "$source_path" src
+    source_link=src
+else
+    # creating directory links doesn't work
+    # fall back to using the full source path
+    source_link="$source_path"
+fi
+# cleanup
+rm -r "$link_dest"
+rm -r "$link_name"
+
 # determine libc flavour
 
 probe_libc(){
@@ -4446,8 +4770,19 @@ probe_libc(){
         # in such new versions and producing binaries requiring windows 7.0.
         # Therefore explicitly set the default to XP unless the user has
         # set something else on the command line.
+        # Don't do this if WINAPI_FAMILY is set and is set to a non-desktop
+        # family. For these cases, configure is free to use any functions
+        # found in the SDK headers by default. (Alternatively, we could force
+        # _WIN32_WINNT to 0x0602 in that case.)
         check_${pfx}cpp_condition stdlib.h "defined(_WIN32_WINNT)" ||
-            add_${pfx}cppflags -D_WIN32_WINNT=0x0502
+            { check_${pfx}cpp <<EOF && add_${pfx}cppflags -D_WIN32_WINNT=0x0502; }
+#ifdef WINAPI_FAMILY
+#include <winapifamily.h>
+#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#error not desktop
+#endif
+#endif
+EOF
     elif check_${pfx}cpp_condition stddef.h "defined __KLIBC__"; then
         eval ${pfx}libc_type=klibc
     elif check_${pfx}cpp_condition sys/cdefs.h "defined __BIONIC__"; then
@@ -4531,6 +4866,7 @@ die_license_disabled_gpl() {
 
 die_license_disabled gpl frei0r
 die_license_disabled gpl libcdio
+die_license_disabled gpl librubberband
 die_license_disabled gpl libsmbclient
 die_license_disabled gpl libutvideo
 die_license_disabled gpl libvidstab
@@ -4538,19 +4874,17 @@ die_license_disabled gpl libx264
 die_license_disabled gpl libx265
 die_license_disabled gpl libxavs
 die_license_disabled gpl libxvid
-die_license_disabled gpl libzvbi
 die_license_disabled gpl x11grab
 
-die_license_disabled nonfree libaacplus
 die_license_disabled nonfree libfaac
 die_license_disabled nonfree nvenc
 enabled gpl && die_license_disabled_gpl nonfree libfdk_aac
 enabled gpl && die_license_disabled_gpl nonfree openssl
 
+die_license_disabled version3 gmp
 die_license_disabled version3 libopencore_amrnb
 die_license_disabled version3 libopencore_amrwb
 die_license_disabled version3 libsmbclient
-die_license_disabled version3 libvo_aacenc
 die_license_disabled version3 libvo_amrwbenc
 
 enabled version3 && { enabled gpl && enable gplv3 || enable lgplv3; }
@@ -4676,6 +5010,8 @@ elif enabled alpha; then
 
 elif enabled arm; then
 
+    enabled msvc && check_cpp_condition stddef.h "defined _M_ARMT" && enable thumb
+
     check_cpp_condition stddef.h "defined __thumb__" && check_cc <<EOF && enable_weak thumb
 float func(float a, float b){ return a+b; }
 EOF
@@ -4684,7 +5020,9 @@ EOF
 
     if     check_cpp_condition stddef.h "defined __ARM_PCS_VFP"; then
         enable vfp_args
-    elif ! check_cpp_condition stddef.h "defined __ARM_PCS || defined __SOFTFP__"; then
+    elif check_cpp_condition stddef.h "defined _M_ARM_FP && _M_ARM_FP >= 30"; then
+        enable vfp_args
+    elif ! check_cpp_condition stddef.h "defined __ARM_PCS || defined __SOFTFP__" && [ $target_os != darwin ]; then
         case "${cross_prefix:-$cc}" in
             *hardfloat*)         enable vfp_args;   fpabi=vfp ;;
             *) check_ld "cc" <<EOF && enable vfp_args && fpabi=vfp || fpabi=soft ;;
@@ -4723,40 +5061,27 @@ EOF
 
 elif enabled mips; then
 
+    enabled loongson2 && check_inline_asm loongson2 '"dmult.g $8, $9, $10"'
+    enabled loongson3 && check_inline_asm loongson3 '"gsldxc1 $f0, 0($2, $3)"'
+    enabled mmi && check_inline_asm mmi '"punpcklhw $f0, $f0, $f0"'
+
     # Enable minimum ISA based on selected options
-    if enabled mips64 && (enabled mipsdspr1 || enabled mipsdspr2); then
-        add_cflags "-mips64r2"
-        add_asflags "-mips64r2"
-    elif enabled mips64 && enabled mipsfpu && disabled loongson3; then
-        add_cflags "-mips64"
-        add_asflags "-mips64"
-    elif enabled mipsdspr1 || enabled mipsdspr2; then
-        add_cflags "-mips32r2 -mfp32"
-        add_asflags "-mips32r2 -mfp32"
-    elif enabled mips32r5 || enabled mips64r6; then
-        check_cflags "-mfp64"
-        check_ldflags "-mfp64"
+    if enabled mips64; then
+        enabled mips64r6 && check_inline_asm_flags mips64r6 '"dlsa $0, $0, $0, 1"' '-mips64r6'
+        enabled mips64r2 && check_inline_asm_flags mips64r2 '"dext $0, $0, 0, 1"' '-mips64r2'
+        disabled mips64r6 && disabled mips64r2 && check_inline_asm_flags mips64r1 '"daddi $0, $0, 0"' '-mips64'
+    else
+        enabled mips32r6 && check_inline_asm_flags mips32r6 '"aui $0, $0, 0"' '-mips32r6'
+        enabled mips32r5 && check_inline_asm_flags mips32r5 '"eretnc"' '-mips32r5'
+        enabled mips32r2 && check_inline_asm_flags mips32r2 '"ext $0, $0, 0, 1"' '-mips32r2'
+        disabled mips32r6 && disabled mips32r5 && disabled mips32r2 && check_inline_asm_flags mips32r1 '"addi $0, $0, 0"' '-mips32'
     fi
 
-    enabled mips32r5  && check_cflags "-mips32r5 -msched-weight -mload-store-pairs -funroll-loops" &&
-     check_ldflags "-mips32r5" &&
-     check_inline_asm mips32r5  '"ulw $t0, ($t1)"'
-    enabled mips64r6  && check_cflags "-mips64r6 -msched-weight -mload-store-pairs -funroll-loops" &&
-     check_ldflags "-mips64r6" &&
-     check_inline_asm mips64r6  '"aui $t0, $t1, 1"'
-    enabled mipsdspr1 && add_cflags "-mdsp" && add_asflags "-mdsp" &&
-     check_inline_asm mipsdspr1 '"addu.qb $t0, $t1, $t2"'
-    enabled mipsdspr2 && add_cflags "-mdspr2" && add_asflags "-mdspr2" &&
-     check_inline_asm mipsdspr2 '"absq_s.qb $t0, $t1"'
-    enabled mipsfpu   && add_cflags "-mhard-float" && add_asflags "-mhard-float" &&
-     check_inline_asm mipsfpu   '"madd.d $f0, $f2, $f4, $f6"'
-    enabled msa       && check_cflags "-mmsa" && check_ldflags "-mmsa" &&
-     check_inline_asm msa       '"addvi.b $w0, $w1, 1"'
-    enabled loongson3 && check_inline_asm loongson3 '"gsldxc1 $f0, 0($2, $3)"'
-
-    enabled mips32r5 && add_asflags "-mips32r5 -mfp64"
-    enabled mips64r6 && add_asflags "-mips64r6 -mfp64"
-    enabled msa && add_asflags "-mmsa"
+    enabled mipsfpu && check_inline_asm_flags mipsfpu '"cvt.d.l $f0, $f2"' '-mhard-float'
+    enabled mipsfpu && (enabled mips32r5 || enabled mips32r6 || enabled mips64r6) && check_inline_asm_flags mipsfpu '"cvt.d.l $f0, $f1"' '-mfp64'
+    enabled mipsfpu && enabled msa && check_inline_asm_flags msa '"addvi.b $w0, $w1, 1"' '-mmsa' && check_header msa.h || disable msa
+    enabled mipsdsp && check_inline_asm_flags mipsdsp '"addu.qb $t0, $t1, $t2"' '-mdsp'
+    enabled mipsdspr2 && check_inline_asm_flags mipsdspr2 '"absq_s.qb $t0, $t1"' '-mdspr2'
 
 elif enabled parisc; then
 
@@ -4884,6 +5209,8 @@ elif check_func dlopen -ldl; then
     ldl=-ldl
 fi
 
+decklink_outdev_extralibs="$decklink_outdev_extralibs $ldl"
+decklink_indev_extralibs="$decklink_indev_extralibs $ldl"
 frei0r_filter_extralibs='$ldl'
 frei0r_src_filter_extralibs='$ldl'
 ladspa_filter_extralibs='$ldl'
@@ -4891,7 +5218,6 @@ nvenc_encoder_extralibs='$ldl'
 
 if ! disabled network; then
     check_func getaddrinfo $network_extralibs
-    check_func getservbyport $network_extralibs
     check_func inet_aton $network_extralibs
 
     check_type netdb.h "struct addrinfo"
@@ -4938,11 +5264,23 @@ check_builtin sync_val_compare_and_swap "" "int *ptr; int oldval, newval; __sync
 check_builtin gmtime_r time.h "time_t *time; struct tm *tm; gmtime_r(time, tm)"
 check_builtin localtime_r time.h "time_t *time; struct tm *tm; localtime_r(time, tm)"
 
+case "$custom_allocator" in
+    jemalloc)
+        # jemalloc by default does not use a prefix
+        require libjemalloc jemalloc/jemalloc.h malloc -ljemalloc
+    ;;
+    tcmalloc)
+        require_pkg_config libtcmalloc gperftools/tcmalloc.h tc_malloc
+        malloc_prefix=tc_
+    ;;
+esac
+
 check_func_headers malloc.h _aligned_malloc     && enable aligned_malloc
 check_func  ${malloc_prefix}memalign            && enable memalign
 check_func  ${malloc_prefix}posix_memalign      && enable posix_memalign
 
 check_func  access
+check_func  arc4random
 check_func_headers time.h clock_gettime || { check_func_headers time.h clock_gettime -lrt && add_extralibs -lrt && LIBRT="-lrt"; }
 check_func  fcntl
 check_func  fork
@@ -4969,6 +5307,7 @@ check_func_headers conio.h kbhit
 check_func_headers io.h setmode
 check_func_headers lzo/lzo1x.h lzo1x_999_compress
 check_func_headers stdlib.h getenv
+check_func_headers sys/stat.h lstat
 
 check_func_headers windows.h CoTaskMemFree -lole32
 check_func_headers windows.h GetProcessAffinityMask
@@ -4977,6 +5316,7 @@ check_func_headers windows.h GetSystemTimeAsFileTime
 check_func_headers windows.h MapViewOfFile
 check_func_headers windows.h PeekNamedPipe
 check_func_headers windows.h SetConsoleTextAttribute
+check_func_headers windows.h SetConsoleCtrlHandler
 check_func_headers windows.h Sleep
 check_func_headers windows.h VirtualAlloc
 check_struct windows.h "CONDITION_VARIABLE" Ptr
@@ -4985,6 +5325,7 @@ enabled xlib &&
     check_func_headers "X11/Xlib.h X11/extensions/Xvlib.h" XvGetPortAttribute -lXv -lX11 -lXext
 
 check_header direct.h
+check_header dirent.h
 check_header dlfcn.h
 check_header d3d11.h
 check_header dxva.h
@@ -5007,6 +5348,7 @@ check_header valgrind/valgrind.h
 check_header vdpau/vdpau.h
 check_header vdpau/vdpau_x11.h
 check_header VideoDecodeAcceleration/VDADecoder.h
+check_header VideoToolbox/VideoToolbox.h
 check_header windows.h
 check_header X11/extensions/XvMClib.h
 check_header asm/types.h
@@ -5014,16 +5356,30 @@ check_header asm/types.h
 check_lib2 "windows.h shellapi.h" CommandLineToArgvW -lshell32
 check_lib2 "windows.h wincrypt.h" CryptGenRandom -ladvapi32
 check_lib2 "windows.h psapi.h" GetProcessMemoryInfo -lpsapi
+check_lib "CoreServices/CoreServices.h" UTGetOSTypeFromString "-framework CoreServices"
 
 check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss
 
-check_type "windows.h dxva.h" "DXVA_PicParams_HEVC"
+check_type "windows.h dxva.h" "DXVA_PicParams_HEVC" -DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -D_CRT_BUILD_DESKTOP_APP=0
+check_type "windows.h dxva.h" "DXVA_PicParams_VP9" -DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -D_CRT_BUILD_DESKTOP_APP=0
 check_type "windows.h d3d11.h" "ID3D11VideoDecoder"
-check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0600
+check_type "windows.h d3d11.h" "ID3D11VideoContext"
+check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0602
+
+check_type "va/va.h" "VAPictureParameterBufferHEVC"
+check_type "va/va.h" "VADecPictureParameterBufferVP9"
+
+check_type "vdpau/vdpau.h" "VdpPictureInfoHEVC"
+
+check_cpp_condition windows.h "!WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)" && enable winrt || disable winrt
 
 if ! disabled w32threads && ! enabled pthreads; then
     check_func_headers "windows.h process.h" _beginthreadex &&
         enable w32threads || disable w32threads
+    if ! enabled w32threads && enabled winrt; then
+        check_func_headers "windows.h" CreateThread &&
+            enable w32threads || disable w32threads
+    fi
 fi
 
 # check for some common methods of building with pthread support
@@ -5064,6 +5420,8 @@ check_lib math.h sin -lm && LIBM="-lm"
 disabled crystalhd || check_lib libcrystalhd/libcrystalhd_if.h DtsCrystalHDVersion -lcrystalhd || disable crystalhd
 
 atan2f_args=2
+copysign_args=2
+hypot_args=2
 ldexpf_args=2
 powf_args=2
 
@@ -5071,19 +5429,24 @@ for func in $MATH_FUNCS; do
     eval check_mathfunc $func \${${func}_args:-1}
 done
 
+for func in $COMPLEX_FUNCS; do
+    eval check_complexfunc $func \${${func}_args:-1}
+done
+
 # these are off by default, so fail if requested and not available
-enabled avfoundation_indev && { check_header_oc AVFoundation/AVFoundation.h || disable avfoundation_indev; }
+enabled avfoundation_indev && { check_header_objcc AVFoundation/AVFoundation.h || disable avfoundation_indev; }
 enabled avfoundation_indev && { check_lib2 CoreGraphics/CoreGraphics.h CGGetActiveDisplayList -framework CoreGraphics ||
                                 check_lib2 ApplicationServices/ApplicationServices.h CGGetActiveDisplayList -framework ApplicationServices; }
 enabled avisynth          && { { check_lib2 "windows.h" LoadLibrary; } ||
                                { check_lib2 "dlfcn.h" dlopen -ldl; } ||
                                die "ERROR: LoadLibrary/dlopen not found for avisynth"; }
+enabled chromaprint       && require chromaprint chromaprint.h chromaprint_get_version -lchromaprint
 enabled decklink          && { check_header DeckLinkAPI.h || die "ERROR: DeckLinkAPI.h header not found"; }
 enabled frei0r            && { check_header frei0r.h || die "ERROR: frei0r.h header not found"; }
+enabled gmp               && require2 gmp gmp.h mpz_export -lgmp
 enabled gnutls            && require_pkg_config gnutls gnutls/gnutls.h gnutls_global_init
 enabled ladspa            && { check_header ladspa.h || die "ERROR: ladspa.h header not found"; }
 enabled libiec61883       && require libiec61883 libiec61883/iec61883.h iec61883_cmp_connect -lraw1394 -lavc1394 -lrom1394 -liec61883
-enabled libaacplus        && require "libaacplus >= 2.0.0" aacplus.h aacplusEncOpen -laacplus
 enabled libass            && require_pkg_config libass ass/ass.h ass_library_init
 enabled libbluray         && require_pkg_config libbluray libbluray/bluray.h bd_open
 enabled libbs2b           && require_pkg_config libbs2b bs2b.h bs2b_open
@@ -5091,7 +5454,7 @@ enabled libcelt           && require libcelt celt/celt.h celt_decode -lcelt0 &&
                              { check_lib celt/celt.h celt_decoder_create_custom -lcelt0 ||
                                die "ERROR: libcelt must be installed and version must be >= 0.11.0."; }
 enabled libcaca           && require_pkg_config caca caca.h caca_create_canvas
-enabled libdcadec         && require_pkg_config dcadec libdcadec/dca_context.h dcadec_context_create
+enabled libdcadec         && require_pkg_config "dcadec >= 0.1.0" libdcadec/dca_context.h dcadec_context_create
 enabled libfaac           && require2 libfaac "stdint.h faac.h" faacEncGetVersion -lfaac
 enabled libfdk_aac        && { use_pkg_config fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen ||
                                { require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac &&
@@ -5107,32 +5470,35 @@ enabled libgsm            && { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do
                                    check_lib "${gsm_hdr}" gsm_create -lgsm && break;
                                done || die "ERROR: libgsm not found"; }
 enabled libilbc           && require libilbc ilbc.h WebRtcIlbcfix_InitDecode -lilbc
+enabled libkvazaar        && require_pkg_config "kvazaar >= 0.8.1" kvazaar.h kvz_api_get
 enabled libmfx            && require_pkg_config libmfx "mfx/mfxvideo.h" MFXInit
 enabled libmodplug        && require_pkg_config libmodplug libmodplug/modplug.h ModPlug_Load
 enabled libmp3lame        && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_quality -lmp3lame
 enabled libnut            && require libnut libnut.h nut_demuxer_init -lnut
 enabled libopencore_amrnb && require libopencore_amrnb opencore-amrnb/interf_dec.h Decoder_Interface_init -lopencore-amrnb
 enabled libopencore_amrwb && require libopencore_amrwb opencore-amrwb/dec_if.h D_IF_init -lopencore-amrwb
-enabled libopencv         && require_pkg_config opencv opencv/cxcore.h cvCreateImageHeader
+enabled libopencv         && { check_header opencv2/core/core_c.h &&
+                               require_pkg_config opencv opencv2/core/core_c.h cvCreateImageHeader ||
+                               require_pkg_config opencv opencv/cxcore.h cvCreateImageHeader; }
 enabled libopenh264       && require_pkg_config openh264 wels/codec_api.h WelsGetCodecVersion
-enabled libopenjpeg       && { check_lib openjpeg.h opj_version -lopenmj2 -DOPJ_STATIC ||
+enabled libopenjpeg       && { check_lib openjpeg-2.1/openjpeg.h opj_version -lopenjp2 -DOPJ_STATIC ||
+                               check_lib openjpeg-2.0/openjpeg.h opj_version -lopenjp2 -DOPJ_STATIC ||
                                check_lib openjpeg-1.5/openjpeg.h opj_version -lopenjpeg -DOPJ_STATIC ||
                                check_lib openjpeg.h opj_version -lopenjpeg -DOPJ_STATIC ||
                                die "ERROR: libopenjpeg not found"; }
 enabled libopus           && require_pkg_config opus opus_multistream.h opus_multistream_decoder_create
 enabled libpulse          && require_pkg_config libpulse pulse/pulseaudio.h pa_context_new
-enabled libquvi           && require_pkg_config libquvi quvi/quvi.h quvi_init
 enabled librtmp           && require_pkg_config librtmp librtmp/rtmp.h RTMP_Socket
+enabled librubberband     && require_pkg_config "rubberband >= 1.8.1" rubberband/rubberband-c.h rubberband_new
 enabled libschroedinger   && require_pkg_config schroedinger-1.0 schroedinger/schro.h schro_init
 enabled libshine          && require_pkg_config shine shine/layer3.h shine_encode_buffer
 enabled libsmbclient      && { use_pkg_config smbclient libsmbclient.h smbc_init ||
                                require smbclient libsmbclient.h smbc_init -lsmbclient; }
-enabled libsoxr           && require libsoxr soxr.h soxr_create -lsoxr
+enabled libsnappy         && require snappy snappy-c.h snappy_compress -lsnappy
+enabled libsoxr           && require libsoxr soxr.h soxr_create -lsoxr && LIBSOXR="-lsoxr"
 enabled libssh            && require_pkg_config libssh libssh/sftp.h sftp_init
 enabled libspeex          && require_pkg_config speex speex/speex.h speex_decoder_init -lspeex
-enabled libstagefright_h264 && require_cpp libstagefright_h264 "binder/ProcessState.h media/stagefright/MetaData.h
-    media/stagefright/MediaBufferGroup.h media/stagefright/MediaDebug.h media/stagefright/MediaDefs.h
-    media/stagefright/OMXClient.h media/stagefright/OMXCodec.h" android::OMXClient -lstagefright -lmedia -lutils -lbinder -lgnustl_static
+enabled libtesseract      && require_pkg_config tesseract tesseract/capi.h TessBaseAPICreate
 enabled libtheora         && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg
 enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame &&
                              { check_lib twolame.h twolame_encode_buffer_float32_interleaved -ltwolame ||
@@ -5140,16 +5506,35 @@ enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame
 enabled libutvideo        && require_cpp utvideo "stdint.h stdlib.h utvideo/utvideo.h utvideo/Codec.h" 'CCodec*' -lutvideo -lstdc++
 enabled libv4l2           && require_pkg_config libv4l2 libv4l2.h v4l2_ioctl
 enabled libvidstab        && require_pkg_config "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
-enabled libvo_aacenc      && require libvo_aacenc vo-aacenc/voAAC.h voGetAACEncAPI -lvo-aacenc
 enabled libvo_amrwbenc    && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
 enabled libvorbis         && require libvorbis vorbis/vorbisenc.h vorbis_info_init -lvorbisenc -lvorbis -logg
+
 enabled libvpx            && {
-    enabled libvpx_vp8_decoder && { check_lib2 "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_dec_init_ver -lvpx ||
-                                    die "ERROR: libvpx decoder version must be >=0.9.1"; }
-    enabled libvpx_vp8_encoder && { check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_enc_init_ver VP8E_SET_MAX_INTRA_BITRATE_PCT" -lvpx ||
-                                    die "ERROR: libvpx encoder version must be >=0.9.7"; }
-    enabled libvpx_vp9_decoder && { check_lib2 "vpx/vpx_decoder.h vpx/vp8dx.h" "vpx_codec_vp9_dx" -lvpx || disable libvpx_vp9_decoder; }
-    enabled libvpx_vp9_encoder && { check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_vp9_cx VP9E_SET_AQ_MODE" -lvpx || disable libvpx_vp9_encoder; } }
+    enabled libvpx_vp8_decoder && {
+        use_pkg_config "vpx >= 0.9.1" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp8_dx ||
+            check_lib2 "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_dec_init_ver -lvpx ||
+                die "ERROR: libvpx decoder version must be >=0.9.1";
+    }
+    enabled libvpx_vp8_encoder && {
+        use_pkg_config "vpx >= 0.9.7" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp8_cx ||
+            check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_enc_init_ver VP8E_SET_MAX_INTRA_BITRATE_PCT" -lvpx ||
+                die "ERROR: libvpx encoder version must be >=0.9.7";
+    }
+    enabled libvpx_vp9_decoder && {
+        use_pkg_config "vpx >= 1.3.0" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp9_dx ||
+            check_lib2 "vpx/vpx_decoder.h vpx/vp8dx.h" "vpx_codec_vp9_dx" -lvpx ||
+                disable libvpx_vp9_decoder;
+    }
+    enabled libvpx_vp9_encoder && {
+        use_pkg_config "vpx >= 1.3.0" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp9_cx ||
+            check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_vp9_cx VP9E_SET_AQ_MODE" -lvpx ||
+                disable libvpx_vp9_encoder;
+    }
+    if disabled_all libvpx_vp8_decoder libvpx_vp9_decoder libvpx_vp8_encoder libvpx_vp9_encoder; then
+        die "libvpx enabled but no supported decoders found"
+    fi
+}
+
 enabled libwavpack        && require libwavpack wavpack/wavpack.h WavpackOpenFileOutput  -lwavpack
 enabled libwebp           && {
     enabled libwebp_encoder      && require_pkg_config "libwebp >= 0.2.0" webp/encode.h WebPGetEncoderVersion
@@ -5158,14 +5543,19 @@ enabled libx264           && { use_pkg_config x264 "stdint.h x264.h" x264_encode
                                { require libx264 x264.h x264_encoder_encode -lx264 &&
                                  warn "using libx264 without pkg-config"; } } &&
                              { check_cpp_condition x264.h "X264_BUILD >= 118" ||
-                               die "ERROR: libx264 must be installed and version must be >= 0.118."; }
+                               die "ERROR: libx264 must be installed and version must be >= 0.118."; } &&
+                             { check_cpp_condition x264.h "X264_MPEG2" &&
+                               enable libx262; }
 enabled libx265           && require_pkg_config x265 x265.h x265_api_get &&
-                             { check_cpp_condition x265.h "X265_BUILD >= 57" ||
-                               die "ERROR: libx265 version must be >= 57."; }
+                             { check_cpp_condition x265.h "X265_BUILD >= 68" ||
+                               die "ERROR: libx265 version must be >= 68."; }
 enabled libxavs           && require libxavs xavs.h xavs_encoder_encode -lxavs
 enabled libxvid           && require libxvid xvid.h xvid_global -lxvidcore
+enabled libzimg           && require_pkg_config zimg zimg.h zimg_get_api_version
 enabled libzmq            && require_pkg_config libzmq zmq.h zmq_ctx_new
-enabled libzvbi           && require libzvbi libzvbi.h vbi_decoder_new -lzvbi
+enabled libzvbi           && require libzvbi libzvbi.h vbi_decoder_new -lzvbi &&
+                             { check_cpp_condition libzvbi.h "VBI_VERSION_MAJOR > 0 || VBI_VERSION_MINOR > 2 || VBI_VERSION_MINOR == 2 && VBI_VERSION_MICRO >= 28" ||
+                               enabled gpl || die "ERROR: libzvbi requires version 0.2.28 or --enable-gpl."; }
 enabled mmal              && { check_lib interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host ||
                                 { ! enabled cross_compile && {
                                     add_cflags -isystem/opt/vc/include/ -isystem/opt/vc/include/interface/vmcs_host/linux -isystem/opt/vc/include/interface/vcos/pthreads -fgnu89-inline ;
@@ -5173,6 +5563,11 @@ enabled mmal              && { check_lib interface/mmal/mmal.h mmal_port_connect
                                     check_lib interface/mmal/mmal.h mmal_port_connect ; }
                                 check_lib interface/mmal/mmal.h mmal_port_connect ; } ||
                                die "ERROR: mmal not found"; }
+enabled mmal &&
+    (check_code cc interface/mmal/mmal.h "MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS" ||
+     die "ERROR: mmal firmware headers too old")
+
+enabled netcdf            && require_pkg_config netcdf netcdf.h nc_inq_libvers
 enabled nvenc             && { check_header nvEncodeAPI.h || die "ERROR: nvEncodeAPI.h not found."; } &&
                              { check_cpp_condition nvEncodeAPI.h "NVENCAPI_MAJOR_VERSION >= 5" ||
                                die "ERROR: NVENC API version 4 or older is not supported"; } &&
@@ -5194,16 +5589,12 @@ enabled opengl            && { check_lib GL/glx.h glXGetProcAddress "-lGL" ||
                                check_lib2 ES2/gl.h glGetError "-isysroot=${sysroot} -Wl,-framework,OpenGLES" ||
                                die "ERROR: opengl not found."
                              }
-enabled openssl           && { check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto ||
+enabled openssl           && { use_pkg_config openssl openssl/ssl.h SSL_library_init ||
+                               check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto ||
                                check_lib openssl/ssl.h SSL_library_init -lssl32 -leay32 ||
                                check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 ||
                                die "ERROR: openssl not found"; }
-enabled qtkit_indev      && { check_header_oc QTKit/QTKit.h || disable qtkit_indev; }
-
-if enabled gnutls; then
-    { check_lib2 gmp.h mpz_export -lgmp && enable gmp; } ||
-    { check_lib gcrypt.h gcry_mpi_new -lgcrypt && enable gcrypt; }
-fi
+enabled qtkit_indev      && { check_header_objcc QTKit/QTKit.h || disable qtkit_indev; }
 
 # libdc1394 check
 if enabled libdc1394; then
@@ -5213,6 +5604,20 @@ if enabled libdc1394; then
         enable libdc1394_1; } ||
     die "ERROR: No version of libdc1394 found "
 fi
+
+if enabled gcrypt; then
+    GCRYPT_CONFIG="${cross_prefix}libgcrypt-config"
+    if "${GCRYPT_CONFIG}" --version > /dev/null 2>&1; then
+        gcrypt_cflags=$("${GCRYPT_CONFIG}" --cflags)
+        gcrypt_libs=$("${GCRYPT_CONFIG}" --libs)
+        check_func_headers gcrypt.h gcry_mpi_new $gcrypt_cflags $gcrypt_libs ||
+            die "ERROR: gcrypt not found"
+        add_cflags $gcrypt_cflags && add_extralibs $gcrypt_libs
+    else
+        require2 gcrypt gcrypt.h gcry_mpi_new -lgcrypt
+    fi
+fi
+
 if ! disabled sdl; then
     SDL_CONFIG="${cross_prefix}sdl-config"
     if check_pkg_config sdl SDL_events.h SDL_PollEvent; then
@@ -5233,6 +5638,9 @@ if ! disabled sdl; then
             disable sdl
         fi
     fi
+    if test $target_os = "mingw32"; then
+        sdl_libs="$sdl_libs -mconsole"
+    fi
 fi
 enabled sdl && add_cflags $sdl_cflags && add_extralibs $sdl_libs
 
@@ -5240,6 +5648,9 @@ disabled securetransport || { check_func SecIdentityCreate "-Wl,-framework,CoreF
     check_lib2 "Security/SecureTransport.h Security/Security.h" "SSLCreateContext SecItemImport" "-Wl,-framework,CoreFoundation -Wl,-framework,Security" &&
     enable securetransport; }
 
+disabled schannel || { check_func_headers "windows.h Security.h" InitializeSecurityContext -DSECURITY_WIN32 -lSecur32 &&
+                       check_cpp_condition winerror.h "defined(SEC_I_CONTEXT_EXPIRED)" && enable schannel && add_extralibs -lSecur32; }
+
 makeinfo --version > /dev/null 2>&1 && enable makeinfo  || disable makeinfo
 enabled makeinfo \
     && [ 0$(makeinfo --version | grep "texinfo" | sed 's/.*texinfo[^0-9]*\([0-9]*\)\..*/\1/') -ge 5 ] \
@@ -5381,6 +5792,7 @@ check_cflags -Wno-pointer-to-int-cast
 check_cflags -Wstrict-prototypes
 check_cflags -Wempty-body
 enabled extra_warnings && check_cflags -Winline
+enabled extra_warnings && check_cflags -Wcast-qual
 
 check_disable_warning(){
     warning_flag=-W${1#-Wno-}
@@ -5391,11 +5803,13 @@ check_disable_warning -Wno-parentheses
 check_disable_warning -Wno-switch
 check_disable_warning -Wno-format-zero-length
 check_disable_warning -Wno-pointer-sign
+check_disable_warning -Wno-unused-const-variable
 
 # add some linker flags
 check_ldflags -Wl,--warn-common
 check_ldflags -Wl,-rpath-link=libpostproc:libswresample:libswscale:libavfilter:libavdevice:libavformat:libavcodec:libavutil:libavresample
 enabled rpath && add_ldexeflags -Wl,-rpath,$libdir
+enabled rpath && add_ldlibflags -Wl,-rpath,$libdir
 test_ldflags -Wl,-Bsymbolic && append SHFLAGS -Wl,-Bsymbolic
 
 # add some strip flags
@@ -5420,7 +5834,6 @@ enabled xmm_clobber_test &&
                   -Wl,--wrap,avcodec_decode_video2      \
                   -Wl,--wrap,avcodec_decode_subtitle2   \
                   -Wl,--wrap,avcodec_encode_audio2      \
-                  -Wl,--wrap,avcodec_encode_video       \
                   -Wl,--wrap,avcodec_encode_video2      \
                   -Wl,--wrap,avcodec_encode_subtitle    \
                   -Wl,--wrap,swr_convert                \
@@ -5428,10 +5841,14 @@ enabled xmm_clobber_test &&
                   -Wl,--wrap,sws_scale ||
     disable xmm_clobber_test
 
-echo "X{};" > $TMPV
+echo "X { local: *; };" > $TMPV
 if test_ldflags -Wl,--version-script,$TMPV; then
     append SHFLAGS '-Wl,--version-script,\$(SUBDIR)lib\$(NAME).ver'
-    check_cc <<EOF && enable symver_asm_label
+elif test_ldflags -Wl,-M,$TMPV; then
+    append SHFLAGS '-Wl,-M,\$(SUBDIR)lib\$(NAME).ver-sol2'
+fi
+
+check_cc <<EOF && enable symver_asm_label
 void ff_foo(void) __asm__ ("av_foo@VERSION");
 void ff_foo(void) { ${inline_asm+__asm__($quotes);} }
 EOF
@@ -5439,7 +5856,6 @@ EOF
 __asm__(".symver ff_foo,av_foo@VERSION");
 void ff_foo(void) {}
 EOF
-fi
 
 if [ -z "$optflags" ]; then
     if enabled small; then
@@ -5509,7 +5925,11 @@ elif enabled ccc; then
     add_cflags -msg_disable nonstandcast
     add_cflags -msg_disable unsupieee
 elif enabled gcc; then
-    check_optflags -fno-tree-vectorize
+    case $gcc_basever in
+        4.9*) enabled x86 || check_optflags -fno-tree-vectorize ;;
+        4.*)                 check_optflags -fno-tree-vectorize ;;
+        *)    enabled x86 || check_optflags -fno-tree-vectorize ;;
+    esac
     check_cflags -Werror=format-security
     check_cflags -Werror=implicit-function-declaration
     check_cflags -Werror=missing-prototypes
@@ -5562,8 +5982,30 @@ elif enabled_any msvc icl; then
     fi
     # msvcrt10 x64 incorrectly enables log2, only msvcrt12 (MSVC 2013) onwards actually has log2.
     check_cpp_condition crtversion.h "_VC_CRT_MAJOR_VERSION >= 12" || disable log2
+    # The CRT headers contain __declspec(restrict) in a few places, but if redefining
+    # restrict, this might break. MSVC 2010 and 2012 fail with __declspec(__restrict)
+    # (as it ends up if the restrict redefine is done before including stdlib.h), while
+    # MSVC 2013 and newer can handle it fine.
+    # If this declspec fails, force including stdlib.h before the restrict redefinition
+    # happens in config.h.
+    if [ $_restrict != restrict ]; then
+        check_cc <<EOF || add_cflags -FIstdlib.h
+__declspec($_restrict) void* foo(int);
+EOF
+    fi
+    check_func strtoll || add_cflags -Dstrtoll=_strtoi64
 fi
 
+for pfx in "" host_; do
+    varname=${pfx%_}cc_type
+    eval "type=\$$varname"
+    if [ $type = "msvc" ]; then
+        check_${pfx}cc <<EOF || add_${pfx}cflags -Dinline=__inline
+static inline int foo(int a) { return a; }
+EOF
+    fi
+done
+
 case $as_type in
     clang)
         add_asflags -Qunused-arguments
@@ -5649,6 +6091,7 @@ done
 enabled zlib && add_cppflags -DZLIB_CONST
 
 # conditional library dependencies, in linking order
+enabled afftfilt_filter     && prepend avfilter_deps "avcodec"
 enabled amovie_filter       && prepend avfilter_deps "avformat avcodec"
 enabled aresample_filter    && prepend avfilter_deps "swresample"
 enabled asyncts_filter      && prepend avfilter_deps "avresample"
@@ -5666,8 +6109,13 @@ enabled removelogo_filter   && prepend avfilter_deps "avformat avcodec swscale"
 enabled resample_filter && prepend avfilter_deps "avresample"
 enabled sab_filter          && prepend avfilter_deps "swscale"
 enabled scale_filter    && prepend avfilter_deps "swscale"
+enabled scale2ref_filter    && prepend avfilter_deps "swscale"
+enabled sofalizer_filter    && prepend avfilter_deps "avcodec"
+enabled showcqt_filter      && prepend avfilter_deps "avformat avcodec swscale"
+enabled showfreqs_filter    && prepend avfilter_deps "avcodec"
 enabled showspectrum_filter && prepend avfilter_deps "avcodec"
 enabled smartblur_filter    && prepend avfilter_deps "swscale"
+enabled spectrumsynth_filter && prepend avfilter_deps "avcodec"
 enabled subtitles_filter    && prepend avfilter_deps "avformat avcodec"
 enabled uspp_filter         && prepend avfilter_deps "avcodec"
 
@@ -5715,6 +6163,7 @@ if enabled x86; then
     echo "3DNow! extended enabled   ${amd3dnowext-no}"
     echo "SSE enabled               ${sse-no}"
     echo "SSSE3 enabled             ${ssse3-no}"
+    echo "AESNI enabled             ${aesni-no}"
     echo "AVX enabled               ${avx-no}"
     echo "XOP enabled               ${xop-no}"
     echo "FMA3 enabled              ${fma3-no}"
@@ -5738,12 +6187,10 @@ if enabled arm; then
 fi
 if enabled mips; then
     echo "MIPS FPU enabled          ${mipsfpu-no}"
-    echo "MIPS32R5 enabled          ${mips32r5-no}"
-    echo "MIPS64R6 enabled          ${mips64r6-no}"
-    echo "MIPS DSP R1 enabled       ${mipsdspr1-no}"
+    echo "MIPS DSP R1 enabled       ${mipsdsp-no}"
     echo "MIPS DSP R2 enabled       ${mipsdspr2-no}"
     echo "MIPS MSA enabled          ${msa-no}"
-    echo "LOONGSON3 enabled         ${loongson3-no}"
+    echo "LOONGSON MMI enabled      ${mmi-no}"
 fi
 if enabled ppc; then
     echo "AltiVec enabled           ${altivec-no}"
@@ -5774,14 +6221,18 @@ test -n "$random_seed" &&
     echo "random seed               ${random_seed}"
 echo
 
+echo "Enabled programs:"
+print_enabled '' $PROGRAM_LIST | print_in_columns
+echo
+
 echo "External libraries:"
-print_enabled '' $EXTERNAL_LIBRARY_LIST | print_3_columns
+print_enabled '' $EXTERNAL_LIBRARY_LIST | print_in_columns
 echo
 
 for type in decoder encoder hwaccel parser demuxer muxer protocol filter bsf indev outdev; do
     echo "Enabled ${type}s:"
     eval list=\$$(toupper $type)_LIST
-    print_enabled '_*' $list | print_3_columns
+    print_enabled '_*' $list | print_in_columns
     echo
 done
 
@@ -5821,6 +6272,7 @@ DOCDIR=\$(DESTDIR)$docdir
 MANDIR=\$(DESTDIR)$mandir
 PKGCONFIGDIR=\$(DESTDIR)$pkgconfigdir
 SRC_PATH=$source_path
+SRC_LINK=$source_link
 ifndef MAIN_MAKEFILE
 SRC_PATH:=\$(SRC_PATH:.%=..%)
 endif
@@ -5830,6 +6282,7 @@ INTRINSICS=$intrinsics
 CC=$cc
 CXX=$cxx
 AS=$as
+OBJCC=$objcc
 LD=$ld
 DEPCC=$dep_cc
 DEPCCFLAGS=$DEPCCFLAGS \$(CPPFLAGS)
@@ -5847,9 +6300,13 @@ LN_S=$ln_s
 CPPFLAGS=$CPPFLAGS
 CFLAGS=$CFLAGS
 CXXFLAGS=$CXXFLAGS
+OBJCFLAGS=$OBJCFLAGS
 ASFLAGS=$ASFLAGS
 AS_C=$AS_C
 AS_O=$AS_O
+OBJCC_C=$OBJCC_C
+OBJCC_E=$OBJCC_E
+OBJCC_O=$OBJCC_O
 CC_C=$CC_C
 CC_E=$CC_E
 CC_O=$CC_O
@@ -5864,6 +6321,7 @@ DEPWINDRES=$dep_cc
 DOXYGEN=$doxygen
 LDFLAGS=$LDFLAGS
 LDEXEFLAGS=$LDEXEFLAGS
+LDLIBFLAGS=$LDLIBFLAGS
 SHFLAGS=$(echo $($ldflags_filter $SHFLAGS))
 ASMSTRIPFLAGS=$ASMSTRIPFLAGS
 YASMFLAGS=$YASMFLAGS
@@ -5952,7 +6410,7 @@ cat > $TMPH <<EOF
 #define FFMPEG_CONFIG_H
 #define FFMPEG_CONFIGURATION "$(c_escape $FFMPEG_CONFIGURATION)"
 #define FFMPEG_LICENSE "$(c_escape $license)"
-#define CONFIG_THIS_YEAR 2015
+#define CONFIG_THIS_YEAR 2016
 #define FFMPEG_DATADIR "$(eval c_escape $datadir)"
 #define AVCONV_DATADIR "$(eval c_escape $datadir)"
 #define CC_IDENT "$(c_escape ${cc_ident:-Unknown compiler})"
@@ -5981,7 +6439,8 @@ enabled getenv || echo "#define getenv(x) NULL" >> $TMPH
 
 mkdir -p doc
 mkdir -p tests
-echo "@c auto-generated by configure" > doc/config.texi
+mkdir -p tests/api
+echo "@c auto-generated by configure - do not modify! " > doc/config.texi
 
 print_config ARCH_   "$config_files" $ARCH_LIST
 print_config HAVE_   "$config_files" $HAVE_LIST
@@ -6011,7 +6470,7 @@ echo "#endif /* AVUTIL_AVCONFIG_H */" >> $TMPH
 cp_if_changed $TMPH libavutil/avconfig.h
 
 if test -n "$WARNINGS"; then
-    printf "\n$WARNINGS"
+    printf "\n%s%s$WARNINGS%s" "$warn_color" "$bold_color" "$reset_color"
     enabled fatal_warnings && exit 1
 fi
 
@@ -6075,4 +6534,4 @@ pkgconfig_generate libavfilter   "FFmpeg audio/video filtering library" "$LIBAVF
 pkgconfig_generate libpostproc   "FFmpeg postprocessing library"        "$LIBPOSTPROC_VERSION"   ""
 pkgconfig_generate libavresample "Libav audio resampling library"       "$LIBAVRESAMPLE_VERSION" "$LIBM"
 pkgconfig_generate libswscale    "FFmpeg image rescaling library"       "$LIBSWSCALE_VERSION"    "$LIBM"
-pkgconfig_generate libswresample "FFmpeg audio resampling library"      "$LIBSWRESAMPLE_VERSION" "$LIBM"
+pkgconfig_generate libswresample "FFmpeg audio resampling library"      "$LIBSWRESAMPLE_VERSION" "$LIBM $LIBSOXR"
diff --git a/doc/APIchanges b/doc/APIchanges
index 6e64a05f..85d4db70 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -2,19 +2,156 @@ Never assume the API of libav* to be stable unless at least 1 month has passed
 since the last major version increase or the API was added.
 
 The last version increases were:
-libavcodec:    2014-08-09
-libavdevice:   2014-08-09
-libavfilter:   2014-08-09
-libavformat:   2014-08-09
-libavresample: 2014-08-09
-libpostproc:   2014-08-09
-libswresample: 2014-08-09
-libswscale:    2014-08-09
-libavutil:     2014-08-09
+libavcodec:    2015-08-28
+libavdevice:   2015-08-28
+libavfilter:   2015-08-28
+libavformat:   2015-08-28
+libavresample: 2015-08-28
+libpostproc:   2015-08-28
+libswresample: 2015-08-28
+libswscale:    2015-08-28
+libavutil:     2015-08-28
 
 
 API changes, most recent first:
 
+-------- 8< --------- FFmpeg 3.0 was cut here -------- 8< ---------
+
+2016-02-10 - bc9a596 / 9f61abc - lavf 57.25.100 / 57.3.0 - avformat.h
+  Add AVFormatContext.opaque, io_open and io_close, allowing custom IO
+
+2016-02-01 - 1dba837 - lavf 57.24.100 - avformat.h, avio.h
+  Add protocol_whitelist to AVFormatContext, AVIOContext
+
+2016-01-31 - 66e9d2f - lavu 55.17.100 - frame.h
+  Add AV_FRAME_DATA_GOP_TIMECODE for exporting MPEG1/2 GOP timecodes.
+
+2016-01-01 - 5e8b053 / 2c68113 - lavc 57.21.100 / 57.12.0 - avcodec.h
+  Add AVCodecDescriptor.profiles and avcodec_profile_name().
+
+2015-12-28 - 1f9139b - lavf 57.21.100 - avformat.h
+  Add automatic bitstream filtering; add av_apply_bitstream_filters()
+
+2015-12-22 - 39a09e9 - lavfi 6.21.101 - avfilter.h
+  Deprecate avfilter_link_set_closed().
+  Applications are not supposed to mess with links,
+  they should close the sinks.
+
+2015-12-17 - lavc 57.18.100 / 57.11.0 - avcodec.h dirac.h
+  xxxxxxx - Add av_packet_add_side_data().
+  xxxxxxx - Add AVCodecContext.coded_side_data.
+  xxxxxxx - Add AVCPBProperties API.
+  xxxxxxx - Add a new public header dirac.h containing
+            av_dirac_parse_sequence_header()
+
+2015-12-11 - 676a93f - lavf 57.20.100 - avformat.h
+  Add av_program_add_stream_index()
+
+2015-11-29 - 93fb4a4 - lavc 57.16.101 - avcodec.h
+  Deprecate rtp_callback without replacement, i.e. it won't be possible to
+  get image slices before the full frame is encoded any more. The libavformat
+  rtpenc muxer can still be used for RFC-2190 packetization.
+
+2015-11-22 - fe20e34 - lavc 57.16.100 - avcodec.h
+  Add AV_PKT_DATA_FALLBACK_TRACK for making fallback associations between
+  streams.
+
+2015-11-22 - ad317c9 - lavf 57.19.100 - avformat.h
+  Add av_stream_new_side_data().
+
+2015-11-22 - e12f403 - lavu 55.8.100 - xtea.h
+    Add av_xtea_le_init and av_xtea_le_crypt
+
+2015-11-18 - lavu 55.7.100 - mem.h
+  Add av_fast_mallocz()
+
+2015-10-29 - lavc 57.12.100 / 57.8.0 - avcodec.h
+  xxxxxx - Deprecate av_free_packet(). Use av_packet_unref() as replacement,
+           it resets the packet in a more consistent way.
+  xxxxxx - Deprecate av_dup_packet(), it is a no-op for most cases.
+           Use av_packet_ref() to make a non-refcounted AVPacket refcounted.
+  xxxxxx - Add av_packet_alloc(), av_packet_clone(), av_packet_free().
+           They match the AVFrame functions with the same name.
+
+2015-10-27 - 1e477a9 - lavu 55.5.100 - cpu.h
+  Add AV_CPU_FLAG_AESNI.
+
+2015-10-22 - ee573b4 / a17a766 - lavc 57.9.100 / 57.5.0 - avcodec.h
+  Add data and linesize array to AVSubtitleRect, to be used instead of
+  the ones from the embedded AVPicture.
+
+2015-10-22 - 866a417 / dc923bc - lavc 57.8.100 / 57.0.0 - qsv.h
+  Add an API for allocating opaque surfaces.
+
+2015-10-15 - 2c2d162 - lavf 57.4.100
+  Remove the latm demuxer that was a duplicate of the loas demuxer.
+
+2015-10-14 - b994788 / 11c5f43 - lavu 55.4.100 / 55.2.0 - dict.h
+  Change return type of av_dict_copy() from void to int, so that a proper
+  error code can be reported.
+
+2015-09-29 - b01891a / 948f3c1 - lavc 57.3.100 / 57.2.0 - avcodec.h
+  Change type of AVPacket.duration from int to int64_t.
+
+2015-09-17 - 7c46f24 / e3d4784 - lavc 57.3.100 / 57.2.0 - d3d11va.h
+  Add av_d3d11va_alloc_context(). This function must from now on be used for
+  allocating AVD3D11VAContext.
+
+2015-09-15 - lavf 57.2.100 - avformat.h
+  probesize and max_analyze_duration switched to 64bit, both
+  are only accessible through AVOptions
+
+2015-09-15 - lavf 57.1.100 - avformat.h
+  bit_rate was changed to 64bit, make sure you update any
+  printf() or other type sensitive code
+
+2015-09-15 - lavc 57.2.100 - avcodec.h
+  bit_rate/rc_max_rate/rc_min_rate were changed to 64bit, make sure you update
+  any printf() or other type sensitive code
+
+2015-09-07 - lavu 55.0.100 / 55.0.0
+  c734b34 / b8b5d82 - Change type of AVPixFmtDescriptor.flags from uint8_t to uint64_t.
+  f53569a / 6b3ef7f - Change type of AVComponentDescriptor fields from uint16_t to int
+            and drop bit packing.
+  151aa2e / 2268db2 - Add step, offset, and depth to AVComponentDescriptor to replace
+            the deprecated step_minus1, offset_plus1, and depth_minus1.
+
+-------- 8< --------- FFmpeg 2.8 was cut here -------- 8< ---------
+
+2015-08-27 - 1dd854e1 - lavc 56.58.100 - vaapi.h
+  Deprecate old VA-API context (vaapi_context) fields that were only
+  set and used by libavcodec. They are all managed internally now.
+
+2015-08-19 - 9f8e57ef - lavu 54.31.100 - pixfmt.h
+  Add a unique pixel format for VA-API (AV_PIX_FMT_VAAPI) that
+  indicates the nature of the underlying storage: a VA surface. This
+  yields the same value as AV_PIX_FMT_VAAPI_VLD.
+  Deprecate old VA-API related pixel formats: AV_PIX_FMT_VAAPI_MOCO,
+  AV_PIX_FMT_VAAPI_IDCT, AV_PIX_FMT_VAAPI_VLD.
+
+2015-08-02 - lavu 54.30.100 / 54.17.0
+  9ed59f1 / 7a7df34c -  Add av_blowfish_alloc().
+  a130ec9 / ae365453 -  Add av_rc4_alloc().
+  9ca1997 / 5d8bea3b -  Add av_xtea_alloc().
+  3cf08e9 / d9e8b47e -  Add av_des_alloc().
+
+2015-07-27 - lavc 56.56.100 / 56.35.0 - avcodec.h
+  94d68a4 / 7c6eb0a1 - Rename CODEC_FLAG* defines to AV_CODEC_FLAG*.
+  444e987 / def97856 - Rename CODEC_CAP_* defines to AV_CODEC_CAP_*.
+  29d147c / 059a9348 - Rename FF_INPUT_BUFFER_PADDING_SIZE and FF_MIN_BUFFER_SIZE
+              to AV_INPUT_BUFFER_PADDING_SIZE and AV_INPUT_BUFFER_MIN_SIZE.
+
+2015-07-22 - c40ecff - lavc 56.51.100 - avcodec.h
+  Add AV_PKT_DATA_QUALITY_STATS to export the quality value, PSNR, and pict_type
+  of an AVPacket.
+
+2015-07-16 - 8dad213 - lavc 56.49.100
+  Add av_codec_get_codec_properties(), FF_CODEC_PROPERTY_LOSSLESS
+  and FF_CODEC_PROPERTY_CLOSED_CAPTIONS
+
+2015-07-03 - d563e13 / 83212943 - lavu 54.28.100 / 56.15.0
+  Add av_version_info().
+
 -------- 8< --------- FFmpeg 2.7 was cut here -------- 8< ---------
 
 2015-06-04 - cc17b43 - lswr  1.2.100
@@ -692,6 +829,9 @@ API changes, most recent first:
    av_ripemd_update()
    av_ripemd_final()
 
+2013-06-10 - 82ef670 - lavu 52.35.101 - hmac.h
+  Add AV_HMAC_SHA224, AV_HMAC_SHA256, AV_HMAC_SHA384, AV_HMAC_SHA512
+
 2013-06-04 - 30b491f / fc962d4 - lavu 52.35.100 / 52.13.0 - mem.h
   Add av_realloc_array and av_reallocp_array
 
@@ -981,15 +1121,14 @@ lavd 54.4.100 / 54.0.0, lavfi 3.5.0
   Add avresample_set_channel_mapping() for input channel reordering,
   duplication, and silencing.
 
-2012-12-29 - 2ce43b3 / d8fd06c - lavu 52.13.100 / 52.3.0 - avstring.h
-  Add av_basename() and av_dirname().
+2012-12-29 - lavu 52.13.100 / 52.3.0 - avstring.h
+  2ce43b3 / d8fd06c - Add av_basename() and av_dirname().
+  e13d5e9 / c1a02e8 - Add av_pix_fmt_get_chroma_sub_sample and deprecate
+                      avcodec_get_chroma_sub_sample.
 
 2012-11-11 - 03b0787 / 5980f5d - lavu 52.6.100 / 52.2.0 - audioconvert.h
   Rename audioconvert.h to channel_layout.h. audioconvert.h is now deprecated.
 
-2012-11-05 - 7d26be6 / dfde8a3 - lavu 52.5.100 / 52.1.0 - intmath.h
-  Add av_ctz() for trailing zero bit count
-
 2012-10-21 - e3a91c5 / a893655 - lavu 51.77.100 / 51.45.0 - error.h
   Add AVERROR_EXPERIMENTAL
 
diff --git a/doc/Doxyfile b/doc/Doxyfile
index 1b116d4d..845d8dc0 100644
--- a/doc/Doxyfile
+++ b/doc/Doxyfile
@@ -31,7 +31,7 @@ PROJECT_NAME           = FFmpeg
 # This could be handy for archiving the generated documentation or
 # if some version control system is used.
 
-PROJECT_NUMBER         = 2.7.2
+PROJECT_NUMBER         = 3.0.2
 
 # With the PROJECT_LOGO tag one can specify a logo or icon that is included
 # in the documentation. The maximum height of the logo should not exceed 55
@@ -1360,6 +1360,7 @@ PREDEFINED             = "__attribute__(x)=" \
                          "offsetof(x,y)=0x42" \
                          av_alloc_size \
                          AV_GCC_VERSION_AT_LEAST(x,y)=1 \
+                         AV_GCC_VERSION_AT_MOST(x,y)=0 \
                          __GNUC__=1 \
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
diff --git a/doc/Makefile b/doc/Makefile
index 45735311..4a77aac1 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -36,7 +36,7 @@ DOCS-$(CONFIG_MANPAGES)  += $(MANPAGES)
 DOCS-$(CONFIG_TXTPAGES)  += $(TXTPAGES)
 DOCS = $(DOCS-yes)
 
-DOC_EXAMPLES-$(CONFIG_AVIO_LIST_DIR_EXAMPLE)     += avio_list_dir
+DOC_EXAMPLES-$(CONFIG_AVIO_DIR_CMD_EXAMPLE)      += avio_dir_cmd
 DOC_EXAMPLES-$(CONFIG_AVIO_READING_EXAMPLE)      += avio_reading
 DOC_EXAMPLES-$(CONFIG_AVCODEC_EXAMPLE)           += avcodec
 DOC_EXAMPLES-$(CONFIG_DECODING_ENCODING_EXAMPLE) += decoding_encoding
@@ -124,11 +124,12 @@ $(DOCS) doc/doxy/html: | doc/
 $(DOC_EXAMPLES:%$(EXESUF)=%.o): | doc/examples
 OBJDIRS += doc/examples
 
-DOXY_INPUT      = $(addprefix $(SRC_PATH)/, $(INSTHEADERS) $(DOC_EXAMPLES:%$(EXESUF)=%.c) $(LIB_EXAMPLES:%$(EXESUF)=%.c))
+DOXY_INPUT      = $(INSTHEADERS) $(DOC_EXAMPLES:%$(EXESUF)=%.c) $(LIB_EXAMPLES:%$(EXESUF)=%.c)
+DOXY_INPUT_DEPS = $(addprefix $(SRC_PATH)/, $(DOXY_INPUT))
 
 doc/doxy/html: TAG = DOXY
-doc/doxy/html: $(SRC_PATH)/doc/Doxyfile $(SRC_PATH)/doc/doxy-wrapper.sh $(DOXY_INPUT)
-	$(M)$(SRC_PATH)/doc/doxy-wrapper.sh $(SRC_PATH) $< $(DOXYGEN) $(DOXY_INPUT)
+doc/doxy/html: $(SRC_PATH)/doc/Doxyfile $(SRC_PATH)/doc/doxy-wrapper.sh $(DOXY_INPUT_DEPS)
+	$(M)OUT_DIR=$$PWD/doc/doxy; cd $(SRC_PATH); ./doc/doxy-wrapper.sh $$OUT_DIR $< $(DOXYGEN) $(DOXY_INPUT);
 
 install-doc: install-html install-man
 
diff --git a/doc/build_system.txt b/doc/build_system.txt
index 1efe6b53..a9bd4eb6 100644
--- a/doc/build_system.txt
+++ b/doc/build_system.txt
@@ -9,7 +9,7 @@ V
 
 DBG
     Preprocess x86 external assembler files to a .dbg.asm file in the object
-    directory, which then gets compiled. Helps developping those assembler
+    directory, which then gets compiled. Helps in developing those assembler
     files.
 
 DESTDIR
@@ -25,10 +25,10 @@ all
     Default target, builds all the libraries and the executables.
 
 fate
-    Run the fate test suite, note you must have installed it
+    Run the fate test suite, note that you must have installed it.
 
 fate-list
-    Will list all fate/regression test targets
+    List all fate/regression test targets.
 
 install
     Install headers, libraries and programs.
@@ -43,22 +43,22 @@ libavcodec/api-example
     Build the libavcodec basic example.
 
 libswscale/swscale-test
-    Build the swscale self-test (useful also as example).
+    Build the swscale self-test (useful also as an example).
 
 config
-    Reconfigure the project with current configuration.
+    Reconfigure the project with the current configuration.
 
 
 Useful standard make commands:
 make -t <target>
-    Touch all files that otherwise would be build, this is useful to reduce
-    unneeded rebuilding when changing headers, but note you must force rebuilds
+    Touch all files that otherwise would be built, this is useful to reduce
+    unneeded rebuilding when changing headers, but note that you must force rebuilds
     of files that actually need it by hand then.
 
 make -j<num>
-    rebuild with multiple jobs at the same time. Faster on multi processor systems
+    Rebuild with multiple jobs at the same time. Faster on multi processor systems.
 
 make -k
-    continue build in case of errors, this is useful for the regression tests
-    sometimes but note it will still not run all reg tests.
+    Continue build in case of errors, this is useful for the regression tests
+    sometimes but note that it will still not run all reg tests.
 
diff --git a/doc/codecs.texi b/doc/codecs.texi
index 3c035a5e..b481b4a0 100644
--- a/doc/codecs.texi
+++ b/doc/codecs.texi
@@ -129,7 +129,7 @@ should be @code{1 / frame_rate} and timestamp increments should be
 identically 1.
 
 @item g @var{integer} (@emph{encoding,video})
-Set the group of picture size. Default value is 12.
+Set the group of picture (GOP) size. Default value is 12.
 
 @item ar @var{integer} (@emph{decoding/encoding,audio})
 Set audio sampling rate (in Hz).
@@ -475,6 +475,9 @@ per-block quantization parameter (QP)
 motion vector
 @item dct_coeff
 
+@item green_metadata
+display complexity metadata for the upcoming frame, GoP or for a given duration.
+
 @item skip
 
 @item startcode
@@ -814,13 +817,17 @@ for codecs that support it. See also @file{doc/examples/export_mvs.c}.
 Deprecated, use mpegvideo private options instead.
 
 @item threads @var{integer} (@emph{decoding/encoding,video})
+Set the number of threads to be used, in case the selected codec
+implementation supports multi-threading.
 
 Possible values:
 @table @samp
-@item auto
-detect a good number of threads
+@item auto, 0
+automatically select the number of threads to set
 @end table
 
+Default value is @samp{auto}.
+
 @item me_threshold @var{integer} (@emph{encoding,video})
 Set motion estimation threshold.
 
@@ -1042,7 +1049,11 @@ Possible values:
 @item color_primaries @var{integer} (@emph{decoding/encoding,video})
 @item color_trc @var{integer} (@emph{decoding/encoding,video})
 @item colorspace @var{integer} (@emph{decoding/encoding,video})
+
 @item color_range @var{integer} (@emph{decoding/encoding,video})
+If used as input parameter, it serves as a hint to the decoder, which
+color_range the input has.
+
 @item chroma_sample_location @var{integer} (@emph{decoding/encoding,video})
 
 @item log_level_offset @var{integer}
diff --git a/doc/decoders.texi b/doc/decoders.texi
index 2fb533ae..35771140 100644
--- a/doc/decoders.texi
+++ b/doc/decoders.texi
@@ -25,6 +25,13 @@ enabled decoders.
 A description of some of the currently available video decoders
 follows.
 
+@section hevc
+
+HEVC / H.265 decoder.
+
+Note: the @option{skip_loop_filter} option has effect only at level
+@code{all}.
+
 @section rawvideo
 
 Raw video decoder.
@@ -188,6 +195,25 @@ without this library.
 @chapter Subtitles Decoders
 @c man begin SUBTILES DECODERS
 
+@section dvbsub
+
+@subsection Options
+
+@table @option
+@item compute_clut
+@table @option
+@item -1
+Compute clut if no matching CLUT is in the stream.
+@item 0
+Never compute CLUT
+@item 1
+Always compute CLUT and override the one provided in the stream.
+@end table
+@item dvb_substream
+Selects the dvb substream, or all substreams if -1 which is default.
+
+@end table
+
 @section dvdsub
 
 This codec decodes the bitmap subtitles used in DVDs; the same subtitles can
@@ -256,7 +282,13 @@ Sets the display duration of the decoded teletext pages or subtitles in
 miliseconds. Default value is 30000 which is 30 seconds.
 @item txt_transparent
 Force transparent background of the generated teletext bitmaps. Default value
-is 0 which means an opaque (black) background.
+is 0 which means an opaque background.
+@item txt_opacity
+Sets the opacity (0-255) of the teletext background. If
+@option{txt_transparent} is not set, it only affects characters between a start
+box and an end box, typically subtitles. Default value is 0 if
+@option{txt_transparent} is set, 255 otherwise.
+
 @end table
 
 @c man end SUBTILES DECODERS
diff --git a/doc/demuxers.texi b/doc/demuxers.texi
index 35a15614..3947bf64 100644
--- a/doc/demuxers.texi
+++ b/doc/demuxers.texi
@@ -18,6 +18,12 @@ enabled demuxers.
 
 The description of some of the currently available demuxers follows.
 
+@section aa
+
+Audible Format 2, 3, and 4 demuxer.
+
+This demuxer is used to demux Audible Format 2, 3, and 4 (.aa) files.
+
 @section applehttp
 
 Apple HTTP Live Streaming demuxer.
@@ -98,7 +104,7 @@ All subsequent file-related directives apply to that file.
 
 @item @code{ffconcat version 1.0}
 Identify the script type and version. It also sets the @option{safe} option
-to 1 if it was to its default -1.
+to 1 if it was -1.
 
 To make FFmpeg recognize the format automatically, this directive must
 appears exactly as is (no extra space or byte-order-mark) on the very first
@@ -112,6 +118,47 @@ file is not available or accurate.
 If the duration is set for all files, then it is possible to seek in the
 whole concatenated video.
 
+@item @code{inpoint @var{timestamp}}
+In point of the file. When the demuxer opens the file it instantly seeks to the
+specified timestamp. Seeking is done so that all streams can be presented
+successfully at In point.
+
+This directive works best with intra frame codecs, because for non-intra frame
+ones you will usually get extra packets before the actual In point and the
+decoded content will most likely contain frames before In point too.
+
+For each file, packets before the file In point will have timestamps less than
+the calculated start timestamp of the file (negative in case of the first
+file), and the duration of the files (if not specified by the @code{duration}
+directive) will be reduced based on their specified In point.
+
+Because of potential packets before the specified In point, packet timestamps
+may overlap between two concatenated files.
+
+@item @code{outpoint @var{timestamp}}
+Out point of the file. When the demuxer reaches the specified decoding
+timestamp in any of the streams, it handles it as an end of file condition and
+skips the current and all the remaining packets from all streams.
+
+Out point is exclusive, which means that the demuxer will not output packets
+with a decoding timestamp greater or equal to Out point.
+
+This directive works best with intra frame codecs and formats where all streams
+are tightly interleaved. For non-intra frame codecs you will usually get
+additional packets with presentation timestamp after Out point therefore the
+decoded content will most likely contain frames after Out point too. If your
+streams are not tightly interleaved you may not get all the packets from all
+streams before Out point and you may only will be able to decode the earliest
+stream until Out point.
+
+The duration of the files (if not specified by the @code{duration}
+directive) will be reduced based on their specified Out point.
+
+@item @code{file_packet_metadata @var{key=value}}
+Metadata of the packets of the file. The specified metadata will be set for
+each file packet. You can specify this directive multiple times to add multiple
+metadata entries.
+
 @item @code{stream}
 Introduce a stream in the virtual file.
 All subsequent stream-related directives apply to the last introduced
@@ -145,7 +192,9 @@ component.
 
 If set to 0, any file name is accepted.
 
-The default is -1, it is equivalent to 1 if the format was automatically
+The default is 1.
+
+-1 is equivalent to 1 if the format was automatically
 probed and 0 otherwise.
 
 @item auto_convert
@@ -157,8 +206,43 @@ Currently, the only conversion is adding the h264_mp4toannexb bitstream
 filter to H.264 streams in MP4 format. This is necessary in particular if
 there are resolution changes.
 
+@item segment_time_metadata
+If set to 1, every packet will contain the @var{lavf.concat.start_time} and the
+@var{lavf.concat.duration} packet metadata values which are the start_time and
+the duration of the respective file segments in the concatenated output
+expressed in microseconds. The duration metadata is only set if it is known
+based on the concat file.
+The default is 0.
+
 @end table
 
+@subsection Examples
+
+@itemize
+@item
+Use absolute filenames and include some comments:
+@example
+# my first filename
+file /mnt/share/file-1.wav
+# my second filename including whitespace
+file '/mnt/share/file 2.wav'
+# my third filename including whitespace plus single quote
+file '/mnt/share/file 3'\''.wav'
+@end example
+
+@item
+Allow for input format auto-probing, use safe filenames and set the duration of
+the first file:
+@example
+ffconcat version 1.0
+
+file file-1.wav
+duration 20.0
+
+file subdir/file-2.wav
+@end example
+@end itemize
+
 @section flv
 
 Adobe Flash Video Format demuxer.
@@ -183,18 +267,6 @@ track. Track indexes start at 0. The demuxer exports the number of tracks as
 
 For very large files, the @option{max_size} option may have to be adjusted.
 
-@section libquvi
-
-Play media from Internet services using the quvi project.
-
-The demuxer accepts a @option{format} option to request a specific quality. It
-is by default set to @var{best}.
-
-See @url{http://quvi.sourceforge.net/} for more information.
-
-FFmpeg needs to be built with @code{--enable-libquvi} for this demuxer to be
-enabled.
-
 @section gif
 
 Animated GIF demuxer.
@@ -369,17 +441,62 @@ ffmpeg -framerate 10 -pattern_type glob -i "*.png" out.mkv
 @end example
 @end itemize
 
+@section mov/mp4/3gp/Quicktme
+
+Quicktime / MP4 demuxer.
+
+This demuxer accepts the following options:
+@table @option
+@item enable_drefs
+Enable loading of external tracks, disabled by default.
+Enabling this can theoretically leak information in some use cases.
+
+@item use_absolute_path
+Allows loading of external tracks via absolute paths, disabled by default.
+Enabling this poses a security risk. It should only be enabled if the source
+is known to be non malicious.
+
+@end table
+
 @section mpegts
 
 MPEG-2 transport stream demuxer.
 
+This demuxer accepts the following options:
 @table @option
+@item resync_size
+Set size limit for looking up a new synchronization. Default value is
+65536.
 
 @item fix_teletext_pts
-Overrides teletext packet PTS and DTS values with the timestamps calculated
+Override teletext packet PTS and DTS values with the timestamps calculated
 from the PCR of the first program which the teletext stream is part of and is
 not discarded. Default value is 1, set this option to 0 if you want your
 teletext packet PTS and DTS values untouched.
+
+@item ts_packetsize
+Output option carrying the raw packet size in bytes.
+Show the detected raw packet size, cannot be set by the user.
+
+@item scan_all_pmts
+Scan and combine all PMTs. The value is an integer with value from -1
+to 1 (-1 means automatic setting, 1 means enabled, 0 means
+disabled). Default value is -1.
+@end table
+
+@section mpjpeg
+
+MJPEG encapsulated in multi-part MIME demuxer.
+
+This demuxer allows reading of MJPEG, where each frame is represented as a part of
+multipart/x-mixed-replace stream.
+@table @option
+
+@item strict_mime_boundary
+Default implementation applies a relaxed standard to multi-part MIME boundary detection,
+to prevent regression with numerous existing endpoints not generating a proper MIME
+MJPEG stream. Turning this option on by setting it to 1 will result in a stricter check
+of the boundary value.
 @end table
 
 @section rawvideo
diff --git a/doc/developer.texi b/doc/developer.texi
index d9ccf7b9..6db93cef 100644
--- a/doc/developer.texi
+++ b/doc/developer.texi
@@ -28,14 +28,14 @@ this document.
 
 For more detailed legal information about the use of FFmpeg in
 external programs read the @file{LICENSE} file in the source tree and
-consult @url{http://ffmpeg.org/legal.html}.
+consult @url{https://ffmpeg.org/legal.html}.
 
 @section Contributing
 
-There are 3 ways by which code gets into ffmpeg.
+There are 3 ways by which code gets into FFmpeg.
 @itemize @bullet
-@item Submitting Patches to the main developer mailing list
-      see @ref{Submitting patches} for details.
+@item Submitting patches to the main developer mailing list.
+      See @ref{Submitting patches} for details.
 @item Directly committing changes to the main tree.
 @item Committing changes to a git clone, for example on github.com or
       gitorious.org. And asking us to merge these changes.
@@ -65,6 +65,9 @@ rejected by the git repository.
 @item
 You should try to limit your code lines to 80 characters; however, do so if
 and only if this improves readability.
+
+@item
+K&R coding style is used.
 @end itemize
 The presentation is one inspired by 'indent -i4 -kr -nut'.
 
@@ -124,10 +127,10 @@ the @samp{inline} keyword;
 @samp{//} comments;
 
 @item
-designated struct initializers (@samp{struct s x = @{ .i = 17 @};})
+designated struct initializers (@samp{struct s x = @{ .i = 17 @};});
 
 @item
-compound literals (@samp{x = (struct s) @{ 17, 23 @};})
+compound literals (@samp{x = (struct s) @{ 17, 23 @};}).
 @end itemize
 
 These features are supported by all compilers we care about, so we will not
@@ -156,7 +159,7 @@ GCC statement expressions (@samp{(x = (@{ int y = 4; y; @})}).
 All names should be composed with underscores (_), not CamelCase. For example,
 @samp{avfilter_get_video_buffer} is an acceptable function name and
 @samp{AVFilterGetVideo} is not. The exception from this are type names, like
-for example structs and enums; they should always be in the CamelCase
+for example structs and enums; they should always be in CamelCase.
 
 There are the following conventions for naming variables and functions:
 
@@ -394,8 +397,8 @@ or obfuscates the code.
 Make sure that no parts of the codebase that you maintain are missing from the
 @file{MAINTAINERS} file. If something that you want to maintain is missing add it with
 your name after it.
-If at some point you no longer want to maintain some code, then please help
-finding a new maintainer and also don't forget updating the @file{MAINTAINERS} file.
+If at some point you no longer want to maintain some code, then please help in
+finding a new maintainer and also don't forget to update the @file{MAINTAINERS} file.
 @end enumerate
 
 We think our rules are not too hard. If you have comments, contact us.
@@ -407,7 +410,7 @@ First, read the @ref{Coding Rules} above if you did not yet, in particular
 the rules regarding patch submission.
 
 When you submit your patch, please use @code{git format-patch} or
-@code{git send-email}. We cannot read other diffs :-)
+@code{git send-email}. We cannot read other diffs :-).
 
 Also please do not submit a patch which contains several unrelated changes.
 Split it into separate, self-contained pieces. This does not mean splitting
@@ -430,7 +433,7 @@ Also please if you send several patches, send each patch as a separate mail,
 do not attach several unrelated patches to the same mail.
 
 Patches should be posted to the
-@uref{http://lists.ffmpeg.org/mailman/listinfo/ffmpeg-devel, ffmpeg-devel}
+@uref{https://lists.ffmpeg.org/mailman/listinfo/ffmpeg-devel, ffmpeg-devel}
 mailing list. Use @code{git send-email} when possible since it will properly
 send patches without requiring extra care. If you cannot, then send patches
 as base64-encoded attachments, so your patch is not trashed during
@@ -543,6 +546,10 @@ tools/trasher, the noise bitstream filter, and
 should not crash, end in a (near) infinite loop, or allocate ridiculous
 amounts of memory when fed damaged data.
 
+@item
+Did you test your decoder or demuxer against sample files?
+Samples may be obtained at @url{https://samples.ffmpeg.org}.
+
 @item
 Does the patch not mix functional and cosmetic changes?
 
@@ -563,7 +570,7 @@ If the patch fixes a bug, did you provide a verbose analysis of the bug?
 If the patch fixes a bug, did you provide enough information, including
 a sample, so the bug can be reproduced and the fix can be verified?
 Note please do not attach samples >100k to mails but rather provide a
-URL, you can upload to ftp://upload.ffmpeg.org
+URL, you can upload to ftp://upload.ffmpeg.org.
 
 @item
 Did you provide a verbose summary about what the patch does change?
@@ -592,10 +599,10 @@ Lines with similar content should be aligned vertically when doing so
 improves readability.
 
 @item
-Consider to add a regression test for your code.
+Consider adding a regression test for your code.
 
 @item
-If you added YASM code please check that things still work with --disable-yasm
+If you added YASM code please check that things still work with --disable-yasm.
 
 @item
 Make sure you check the return values of function and return appropriate
@@ -633,6 +640,10 @@ not related to the comments received during review. Such patches will
 be rejected. Instead, submit significant changes or new features as
 separate patches.
 
+Everyone is welcome to review patches. Also if you are waiting for your patch
+to be reviewed, please consider helping to review other patches, that is a great
+way to get everyone's patches reviewed sooner.
+
 @anchor{Regression tests}
 @section Regression tests
 
@@ -656,7 +667,6 @@ Once you have a working fate test and fate sample, provide in the commit
 message or introductory message for the patch series that you post to
 the ffmpeg-devel mailing list, a direct link to download the sample media.
 
-
 @subsection Visualizing Test Coverage
 
 The FFmpeg build system allows visualizing the test coverage in an easy
@@ -704,7 +714,7 @@ FFmpeg maintains a set of @strong{release branches}, which are the
 recommended deliverable for system integrators and distributors (such as
 Linux distributions, etc.). At regular times, a @strong{release
 manager} prepares, tests and publishes tarballs on the
-@url{http://ffmpeg.org} website.
+@url{https://ffmpeg.org} website.
 
 There are two kinds of releases:
 
@@ -783,7 +793,7 @@ Prepare the release tarballs in @code{bz2} and @code{gz} formats, and
 supplementing files that contain @code{gpg} signatures
 
 @item
-Publish the tarballs at @url{http://ffmpeg.org/releases}. Create and
+Publish the tarballs at @url{https://ffmpeg.org/releases}. Create and
 push an annotated tag in the form @code{nX}, with @code{X}
 containing the version number.
 
@@ -795,7 +805,7 @@ with a news entry for the website.
 Publish the news entry.
 
 @item
-Send announcement to the mailing list.
+Send an announcement to the mailing list.
 @end enumerate
 
 @bye
diff --git a/doc/doxy-wrapper.sh b/doc/doxy-wrapper.sh
index 9720e540..fe0102b5 100755
--- a/doc/doxy-wrapper.sh
+++ b/doc/doxy-wrapper.sh
@@ -1,21 +1,21 @@
 #!/bin/sh
 
-SRC_PATH="${1}"
+OUT_DIR="${1}"
 DOXYFILE="${2}"
 DOXYGEN="${3}"
 
 shift 3
 
-if [ -e "$SRC_PATH/VERSION" ]; then
-    VERSION=`cat "$SRC_PATH/VERSION"`
+if [ -e "VERSION" ]; then
+    VERSION=`cat "VERSION"`
 else
-    VERSION=`cd "$SRC_PATH"; git describe`
+    VERSION=`git describe`
 fi
 
 $DOXYGEN - <<EOF
 @INCLUDE        = ${DOXYFILE}
 INPUT           = $@
-EXAMPLE_PATH    = ${SRC_PATH}/doc/examples
 HTML_TIMESTAMP  = NO
 PROJECT_NUMBER  = $VERSION
+OUTPUT_DIRECTORY = $OUT_DIR
 EOF
diff --git a/doc/encoders.texi b/doc/encoders.texi
index 753e6833..f38cad3e 100644
--- a/doc/encoders.texi
+++ b/doc/encoders.texi
@@ -30,81 +30,119 @@ follows.
 
 Advanced Audio Coding (AAC) encoder.
 
-This encoder is an experimental FFmpeg-native AAC encoder. Currently only the
-low complexity (AAC-LC) profile is supported. To use this encoder, you must set
-@option{strict} option to @samp{experimental} or lower.
-
-As this encoder is experimental, unexpected behavior may exist from time to
-time. For a more stable AAC encoder, see @ref{libvo-aacenc}. However, be warned
-that it has a worse quality reported by some users.
-
-@c todo @ref{libaacplus}
-See also @ref{libfdk-aac-enc,,libfdk_aac} and @ref{libfaac}.
+This encoder is the default AAC encoder, natively implemented into FFmpeg. Its
+quality is on par or better than libfdk_aac at the default bitrate of 128kbps.
+This encoder also implements more options, profiles and samplerates than
+other encoders (with only the AAC-HE profile pending to be implemented) so this
+encoder has become the default and is the recommended choice.
 
 @subsection Options
 
 @table @option
 @item b
 Set bit rate in bits/s. Setting this automatically activates constant bit rate
-(CBR) mode.
+(CBR) mode. If this option is unspecified it is set to 128kbps.
 
 @item q
 Set quality for variable bit rate (VBR) mode. This option is valid only using
 the @command{ffmpeg} command-line tool. For library interface users, use
 @option{global_quality}.
 
-@item stereo_mode
-Set stereo encoding mode. Possible values:
-
-@table @samp
-@item auto
-Automatically selected by the encoder.
-
-@item ms_off
-Disable middle/side encoding. This is the default.
-
-@item ms_force
-Force middle/side encoding.
-@end table
+@item cutoff
+Set cutoff frequency. If unspecified will allow the encoder to dynamically
+adjust the cutoff to improve clarity on low bitrates.
 
 @item aac_coder
 Set AAC encoder coding method. Possible values:
 
 @table @samp
-@item faac
-FAAC-inspired method.
-
-This method is a simplified reimplementation of the method used in FAAC, which
-sets thresholds proportional to the band energies, and then decreases all the
-thresholds with quantizer steps to find the appropriate quantization with
-distortion below threshold band by band.
-
-The quality of this method is comparable to the two loop searching method
-described below, but somewhat a little better and slower.
-
-@item anmr
-Average noise to mask ratio (ANMR) trellis-based solution.
-
-This has a theoretic best quality out of all the coding methods, but at the
-cost of the slowest speed.
-
 @item twoloop
 Two loop searching (TLS) method.
 
 This method first sets quantizers depending on band thresholds and then tries
 to find an optimal combination by adding or subtracting a specific value from
 all quantizers and adjusting some individual quantizer a little.
+Will tune itself based on whether aac_is/aac_ms/aac_pns are enabled.
+This is the default choice for a coder.
 
-This method produces similar quality with the FAAC method and is the default.
+@item anmr
+Average noise to mask ratio (ANMR) trellis-based solution.
+
+This is an experimental coder which currently produces a lower quality, is more
+unstable and is slower than the default twoloop coder but has potential.
+Currently has no support for the @option{aac_is} or @option{aac_pns} options.
+Not currently recommended.
 
 @item fast
 Constant quantizer method.
 
 This method sets a constant quantizer for all bands. This is the fastest of all
-the methods, yet produces the worst quality.
+the methods and has no rate control or support for @option{aac_is} or
+@option{aac_pns}.
+Not recommended.
 
 @end table
 
+@item aac_ms
+Sets mid/side coding mode. The default value of auto will automatically use
+M/S with bands which will benefit from such coding. Can be forced for all bands
+using the value "enable", which is mainly useful for debugging or disabled using
+"disable".
+
+@item aac_is
+Sets intensity stereo coding tool usage. By default, it's enabled and will
+automatically toggle IS for similar pairs of stereo bands if it's benefitial.
+Can be disabled for debugging by setting the value to "disable".
+
+@item aac_pns
+Uses perceptual noise substitution to replace low entropy high frequency bands
+with imperceivable white noise during the decoding process. By default, it's
+enabled, but can be disabled for debugging purposes by using "disable".
+
+@item aac_tns
+Enables the use of a multitap FIR filter which spans through the high frequency
+bands to hide quantization noise during the encoding process and is reverted
+by the decoder. As well as decreasing unpleasant artifacts in the high range
+this also reduces the entropy in the high bands and allows for more bits to
+be used by the mid-low bands. By default it's enabled but can be disabled for
+debugging by setting the option to "disable".
+
+@item aac_ltp
+Enables the use of the long term prediction extension which increases coding
+efficiency in very low bandwidth situations such as encoding of voice or
+solo piano music by extending constant harmonic peaks in bands throughout
+frames. This option is implied by profile:a aac_low and is incompatible with
+aac_pred. Use in conjunction with @option{-ar} to decrease the samplerate.
+
+@item aac_pred
+Enables the use of a more traditional style of prediction where the spectral
+coefficients transmitted are replaced by the difference of the current
+coefficients minus the previous "predicted" coefficients. In theory and sometimes
+in practice this can improve quality for low to mid bitrate audio.
+This option implies the aac_main profile and is incompatible with aac_ltp.
+
+@item profile
+Sets the encoding profile, possible values:
+
+@table @samp
+@item aac_low
+The default, AAC "Low-complexity" profile. Is the most compatible and produces
+decent quality.
+
+@item mpeg2_aac_low
+Equivalent to -profile:a aac_low -aac_pns 0. PNS was introduced with the MPEG4
+specifications.
+
+@item aac_ltp
+Long term prediction profile, is enabled by and will enable the aac_ltp option.
+Introduced in MPEG4.
+
+@item aac_main
+Main-type prediction profile, is enabled by and will enable the aac_pred option.
+Introduced in MPEG2.
+
+If this option is unspecified it is set to @samp{aac_low}.
+@end table
 @end table
 
 @section ac3 and ac3_fixed
@@ -578,15 +616,13 @@ and slightly improves compression.
 
 libfaac AAC (Advanced Audio Coding) encoder wrapper.
 
-Requires the presence of the libfaac headers and library during
-configuration. You need to explicitly configure the build with
-@code{--enable-libfaac --enable-nonfree}.
-
-This encoder is considered to be of higher quality with respect to the
-@ref{aacenc,,the native experimental FFmpeg AAC encoder}.
+This encoder is of much lower quality and is more unstable than any other AAC
+encoders, so it's highly recommended to instead use other encoders, like
+@ref{aacenc,,the native FFmpeg AAC encoder}.
 
-For more information see the libfaac project at
-@url{http://www.audiocoding.com/faac.html/}.
+This encoder also requires the presence of the libfaac headers and library
+during configuration. You need to explicitly configure the build with
+@code{--enable-libfaac --enable-nonfree}.
 
 @subsection Options
 
@@ -694,9 +730,10 @@ configuration. You need to explicitly configure the build with
 so if you allow the use of GPL, you should configure with
 @code{--enable-gpl --enable-nonfree --enable-libfdk-aac}.
 
-This encoder is considered to be of higher quality with respect to
-both @ref{aacenc,,the native experimental FFmpeg AAC encoder} and
-@ref{libfaac}.
+This encoder is considered to produce output on par or worse at 128kbps to the
+@ref{aacenc,,the native FFmpeg AAC encoder} but can often produce better
+sounding audio at identical or lower bitrates and has support for the
+AAC-HE profiles.
 
 VBR encoding, enabled through the @option{vbr} or @option{flags
 +qscale} options, is experimental and only works with some
@@ -1038,31 +1075,6 @@ Set MPEG audio original flag when set to 1. The default value is 0
 
 @end table
 
-@anchor{libvo-aacenc}
-@section libvo-aacenc
-
-VisualOn AAC encoder.
-
-Requires the presence of the libvo-aacenc headers and library during
-configuration. You need to explicitly configure the build with
-@code{--enable-libvo-aacenc --enable-version3}.
-
-This encoder is considered to be worse than the
-@ref{aacenc,,native experimental FFmpeg AAC encoder}, according to
-multiple sources.
-
-@subsection Options
-
-The VisualOn AAC encoder only support encoding AAC-LC and up to 2
-channels. It is also CBR-only.
-
-@table @option
-
-@item b
-Set bit rate in bits/s.
-
-@end table
-
 @section libvo-amrwbenc
 
 VisualOn Adaptive Multi-Rate Wideband encoder.
@@ -1125,7 +1137,7 @@ kilobits/s.
 
 @item vbr (@emph{vbr}, @emph{hard-cbr}, and @emph{cvbr})
 Set VBR mode. The FFmpeg @option{vbr} option has the following
-valid arguments, with the their @command{opusenc} equivalent options
+valid arguments, with the @command{opusenc} equivalent options
 in parentheses:
 
 @table @samp
@@ -1342,6 +1354,96 @@ disabled
 A description of some of the currently available video encoders
 follows.
 
+@section libopenh264
+
+Cisco libopenh264 H.264/MPEG-4 AVC encoder wrapper.
+
+This encoder requires the presence of the libopenh264 headers and
+library during configuration. You need to explicitly configure the
+build with @code{--enable-libopenh264}. The library is detected using
+@command{pkg-config}.
+
+For more information about the library see
+@url{http://www.openh264.org}.
+
+@subsection Options
+
+The following FFmpeg global options affect the configurations of the
+libopenh264 encoder.
+
+@table @option
+@item b
+Set the bitrate (as a number of bits per second).
+
+@item g
+Set the GOP size.
+
+@item maxrate
+Set the max bitrate (as a number of bits per second).
+
+@item flags +global_header
+Set global header in the bitstream.
+
+@item slices
+Set the number of slices, used in parallelized encoding. Default value
+is 0. This is only used when @option{slice_mode} is set to
+@samp{fixed}.
+
+@item slice_mode
+Set slice mode. Can assume one of the follwing possible values:
+
+@table @samp
+@item fixed
+a fixed number of slices
+@item rowmb
+one slice per row of macroblocks
+@item auto
+automatic number of slices according to number of threads
+@item dyn
+dynamic slicing
+@end table
+
+Default value is @samp{auto}.
+
+@item loopfilter
+Enable loop filter, if set to 1 (automatically enabled). To disable
+set a value of 0.
+
+@item profile
+Set profile restrictions. If set to the value of @samp{main} enable
+CABAC (set the @code{SEncParamExt.iEntropyCodingModeFlag} flag to 1).
+
+@item max_nal_size
+Set maximum NAL size in bytes.
+
+@item allow_skip_frames
+Allow skipping frames to hit the target bitrate if set to 1.
+@end table
+
+@section jpeg2000
+
+The native jpeg 2000 encoder is lossy by default, the @code{-q:v}
+option can be used to set the encoding quality. Lossless encoding
+can be selected with @code{-pred 1}.
+
+@subsection Options
+
+@table @option
+@item format
+Can be set to either @code{j2k} or @code{jp2} (the default) that
+makes it possible to store non-rgb pix_fmts.
+
+@end table
+
+@section snow
+
+@subsection Options
+
+@table @option
+@item iterative_dia_size
+dia size for the iterative motion estimation
+@end table
+
 @section libtheora
 
 libtheora Theora encoder wrapper.
@@ -1416,113 +1518,159 @@ You need to explicitly configure the build with @code{--enable-libvpx}.
 
 @subsection Options
 
-Mapping from FFmpeg to libvpx options with conversion notes in parentheses.
+The following options are supported by the libvpx wrapper. The
+@command{vpxenc}-equivalent options or values are listed in parentheses
+for easy migration.
 
-@table @option
+To reduce the duplication of documentation, only the private options
+and some others requiring special attention are documented here. For
+the documentation of the undocumented generic options, see
+@ref{codec-options,,the Codec Options chapter}.
 
-@item threads
-g_threads
+To get more documentation of the libvpx options, invoke the command
+@command{ffmpeg -h encoder=libvpx}, @command{ffmpeg -h encoder=libvpx-vp9} or
+@command{vpxenc --help}. Further information is available in the libvpx API
+documentation.
 
-@item profile
-g_profile
+@table @option
 
-@item vb
-rc_target_bitrate
+@item b (@emph{target-bitrate})
+Set bitrate in bits/s. Note that FFmpeg's @option{b} option is
+expressed in bits/s, while @command{vpxenc}'s @option{target-bitrate} is in
+kilobits/s.
 
-@item g
-kf_max_dist
+@item g (@emph{kf-max-dist})
 
-@item keyint_min
-kf_min_dist
+@item keyint_min (@emph{kf-min-dist})
 
-@item qmin
-rc_min_quantizer
+@item qmin (@emph{min-q})
 
-@item qmax
-rc_max_quantizer
+@item qmax (@emph{max-q})
 
-@item bufsize, vb
-rc_buf_sz
-@code{(bufsize * 1000 / vb)}
+@item bufsize (@emph{buf-sz}, @emph{buf-optimal-sz})
+Set ratecontrol buffer size (in bits). Note @command{vpxenc}'s options are
+specified in milliseconds, the libvpx wrapper converts this value as follows:
+@code{buf-sz = bufsize * 1000 / bitrate},
+@code{buf-optimal-sz = bufsize * 1000 / bitrate * 5 / 6}.
 
-rc_buf_optimal_sz
-@code{(bufsize * 1000 / vb * 5 / 6)}
+@item rc_init_occupancy (@emph{buf-initial-sz})
+Set number of bits which should be loaded into the rc buffer before decoding
+starts. Note @command{vpxenc}'s option is specified in milliseconds, the libvpx
+wrapper converts this value as follows:
+@code{rc_init_occupancy * 1000 / bitrate}.
 
-@item rc_init_occupancy, vb
-rc_buf_initial_sz
-@code{(rc_init_occupancy * 1000 / vb)}
+@item undershoot-pct
+Set datarate undershoot (min) percentage of the target bitrate.
 
-@item rc_buffer_aggressivity
-rc_undershoot_pct
+@item overshoot-pct
+Set datarate overshoot (max) percentage of the target bitrate.
 
-@item skip_threshold
-rc_dropframe_thresh
+@item skip_threshold (@emph{drop-frame})
 
-@item qcomp
-rc_2pass_vbr_bias_pct
+@item qcomp (@emph{bias-pct})
 
-@item maxrate, vb
-rc_2pass_vbr_maxsection_pct
-@code{(maxrate * 100 / vb)}
+@item maxrate (@emph{maxsection-pct})
+Set GOP max bitrate in bits/s. Note @command{vpxenc}'s option is specified as a
+percentage of the target bitrate, the libvpx wrapper converts this value as
+follows: @code{(maxrate * 100 / bitrate)}.
 
-@item minrate, vb
-rc_2pass_vbr_minsection_pct
-@code{(minrate * 100 / vb)}
+@item minrate (@emph{minsection-pct})
+Set GOP min bitrate in bits/s. Note @command{vpxenc}'s option is specified as a
+percentage of the target bitrate, the libvpx wrapper converts this value as
+follows: @code{(minrate * 100 / bitrate)}.
 
-@item minrate, maxrate, vb
-@code{VPX_CBR}
-@code{(minrate == maxrate == vb)}
+@item minrate, maxrate, b @emph{end-usage=cbr}
+@code{(minrate == maxrate == bitrate)}.
 
-@item crf
-@code{VPX_CQ}, @code{VP8E_SET_CQ_LEVEL}
+@item crf (@emph{end-usage=cq}, @emph{cq-level})
 
-@item quality
-@table @option
-@item @var{best}
-@code{VPX_DL_BEST_QUALITY}
-@item @var{good}
-@code{VPX_DL_GOOD_QUALITY}
-@item @var{realtime}
-@code{VPX_DL_REALTIME}
+@item tune (@emph{tune})
+@table @samp
+@item psnr (@emph{psnr})
+@item ssim (@emph{ssim})
 @end table
 
-@item speed
-@code{VP8E_SET_CPUUSED}
+@item quality, deadline (@emph{deadline})
+@table @samp
+@item best
+Use best quality deadline. Poorly named and quite slow, this option should be
+avoided as it may give worse quality output than good.
+@item good
+Use good quality deadline. This is a good trade-off between speed and quality
+when used with the @option{cpu-used} option.
+@item realtime
+Use realtime quality deadline.
+@end table
 
-@item nr
-@code{VP8E_SET_NOISE_SENSITIVITY}
+@item speed, cpu-used (@emph{cpu-used})
+Set quality/speed ratio modifier. Higher values speed up the encode at the cost
+of quality.
 
-@item mb_threshold
-@code{VP8E_SET_STATIC_THRESHOLD}
+@item nr (@emph{noise-sensitivity})
 
-@item slices
-@code{VP8E_SET_TOKEN_PARTITIONS}
+@item static-thresh
+Set a change threshold on blocks below which they will be skipped by the
+encoder.
+
+@item slices (@emph{token-parts})
+Note that FFmpeg's @option{slices} option gives the total number of partitions,
+while @command{vpxenc}'s @option{token-parts} is given as
+@code{log2(partitions)}.
 
 @item max-intra-rate
-@code{VP8E_SET_MAX_INTRA_BITRATE_PCT}
+Set maximum I-frame bitrate as a percentage of the target bitrate. A value of 0
+means unlimited.
 
 @item force_key_frames
 @code{VPX_EFLAG_FORCE_KF}
 
 @item Alternate reference frame related
 @table @option
-@item vp8flags altref
-@code{VP8E_SET_ENABLEAUTOALTREF}
-@item @var{arnr_max_frames}
-@code{VP8E_SET_ARNR_MAXFRAMES}
-@item @var{arnr_type}
-@code{VP8E_SET_ARNR_TYPE}
-@item @var{arnr_strength}
-@code{VP8E_SET_ARNR_STRENGTH}
-@item @var{rc_lookahead}
-g_lag_in_frames
+@item auto-alt-ref
+Enable use of alternate reference frames (2-pass only).
+@item arnr-max-frames
+Set altref noise reduction max frame count.
+@item arnr-type
+Set altref noise reduction filter type: backward, forward, centered.
+@item arnr-strength
+Set altref noise reduction filter strength.
+@item rc-lookahead, lag-in-frames (@emph{lag-in-frames})
+Set number of frames to look ahead for frametype and ratecontrol.
 @end table
 
-@item vp8flags error_resilient
-g_error_resilient
+@item error-resilient
+Enable error resiliency features.
 
-@item aq_mode
-@code{VP9E_SET_AQ_MODE}
+@item VP9-specific options
+@table @option
+@item lossless
+Enable lossless mode.
+@item tile-columns
+Set number of tile columns to use. Note this is given as
+@code{log2(tile_columns)}. For example, 8 tile columns would be requested by
+setting the @option{tile-columns} option to 3.
+@item tile-rows
+Set number of tile rows to use. Note this is given as @code{log2(tile_rows)}.
+For example, 4 tile rows would be requested by setting the @option{tile-rows}
+option to 2.
+@item frame-parallel
+Enable frame parallel decodability features.
+@item aq-mode
+Set adaptive quantization mode (0: off (default), 1: variance 2: complexity, 3:
+cyclic refresh).
+@item colorspace @emph{color-space}
+Set input color space. The VP9 bitstream supports signaling the following
+colorspaces:
+@table @option
+@item @samp{rgb} @emph{sRGB}
+@item @samp{bt709} @emph{bt709}
+@item @samp{unspecified} @emph{unknown}
+@item @samp{bt470bg} @emph{bt601}
+@item @samp{smpte170m} @emph{smpte170}
+@item @samp{smpte240m} @emph{smpte240}
+@item @samp{bt2020_ncl} @emph{bt2020}
+@end table
+@end table
 
 @end table
 
@@ -1947,6 +2095,10 @@ For example to specify libx264 encoding options with @command{ffmpeg}:
 ffmpeg -i foo.mpg -vcodec libx264 -x264opts keyint=123:min-keyint=20 -an out.mkv
 @end example
 
+@item a53cc @var{boolean}
+Import closed captions (which must be ATSC compatible format) into output.
+Only the mpeg2 and h264 decoders provide these. Default is 0 (off).
+
 @item x264-params (N.A.)
 Override the x264 configuration using a :-separated list of key=value
 parameters.
@@ -2260,6 +2412,180 @@ Setting a higher @option{bits_per_mb} limit will improve the speed.
 For the fastest encoding speed set the @option{qscale} parameter (4 is the
 recommended value) and do not set a size constraint.
 
+@section libkvazaar
+
+Kvazaar H.265/HEVC encoder.
+
+Requires the presence of the libkvazaar headers and library during
+configuration. You need to explicitly configure the build with
+@option{--enable-libkvazaar}.
+
+@subsection Options
+
+@table @option
+
+@item b
+Set target video bitrate in bit/s and enable rate control.
+
+@item kvazaar-params
+Set kvazaar parameters as a list of @var{name}=@var{value} pairs separated
+by commas (,). See kvazaar documentation for a list of options.
+
+@end table
+
+@section QSV encoders
+
+The family of Intel QuickSync Video encoders (MPEG-2, H.264 and HEVC)
+
+The ratecontrol method is selected as follows:
+
+@itemize @bullet
+@item
+When @option{global_quality} is specified, a quality-based mode is used.
+Specifically this means either
+@itemize @minus
+@item
+@var{CQP} - constant quantizer scale, when the @option{qscale} codec flag is
+also set (the @option{-qscale} ffmpeg option).
+
+@item
+@var{LA_ICQ} - intelligent constant quality with lookahead, when the
+@option{look_ahead} option is also set.
+
+@item
+@var{ICQ} -- intelligent constant quality otherwise.
+@end itemize
+
+@item
+Otherwise, a bitrate-based mode is used. For all of those, you should specify at
+least the desired average bitrate with the @option{b} option.
+@itemize @minus
+@item
+@var{LA} - VBR with lookahead, when the @option{look_ahead} option is specified.
+
+@item
+@var{VCM} - video conferencing mode, when the @option{vcm} option is set.
+
+@item
+@var{CBR} - constant bitrate, when @option{maxrate} is specified and equal to
+the average bitrate.
+
+@item
+@var{VBR} - variable bitrate, when @option{maxrate} is specified, but is higher
+than the average bitrate.
+
+@item
+@var{AVBR} - average VBR mode, when @option{maxrate} is not specified. This mode
+is further configured by the @option{avbr_accuracy} and
+@option{avbr_convergence} options.
+@end itemize
+@end itemize
+
+Note that depending on your system, a different mode than the one you specified
+may be selected by the encoder. Set the verbosity level to @var{verbose} or
+higher to see the actual settings used by the QSV runtime.
+
+Additional libavcodec global options are mapped to MSDK options as follows:
+
+@itemize
+@item
+@option{g/gop_size} -> @option{GopPicSize}
+
+@item
+@option{bf/max_b_frames}+1 -> @option{GopRefDist}
+
+@item
+@option{rc_init_occupancy/rc_initial_buffer_occupancy} ->
+@option{InitialDelayInKB}
+
+@item
+@option{slices} -> @option{NumSlice}
+
+@item
+@option{refs} -> @option{NumRefFrame}
+
+@item
+@option{b_strategy/b_frame_strategy} -> @option{BRefType}
+
+@item
+@option{cgop/CLOSED_GOP} codec flag -> @option{GopOptFlag}
+
+@item
+For the @var{CQP} mode, the @option{i_qfactor/i_qoffset} and
+@option{b_qfactor/b_qoffset} set the difference between @var{QPP} and @var{QPI},
+and @var{QPP} and @var{QPB} respectively.
+
+@item
+Setting the @option{coder} option to the value @var{vlc} will make the H.264
+encoder use CAVLC instead of CABAC.
+
+@end itemize
+
+@section vc2
+
+SMPTE VC-2 (previously BBC Dirac Pro). This codec was primarily aimed at
+professional broadcasting but since it supports yuv420, yuv422 and yuv444 at
+8 (limited range or full range), 10 or 12 bits, this makes it suitable for
+other tasks which require low overhead and low compression (like screen
+recording).
+
+@subsection Options
+
+@table @option
+
+@item b
+Sets target video bitrate. Usually that's around 1:6 of the uncompressed
+video bitrate (e.g. for 1920x1080 50fps yuv422p10 that's around 400Mbps). Higher
+values (close to the uncompressed bitrate) turn on lossless compression mode.
+
+@item field_order
+Enables field coding when set (e.g. to tt - top field first) for interlaced
+inputs. Should increase compression with interlaced content as it splits the
+fields and encodes each separately.
+
+@item wavelet_depth
+Sets the total amount of wavelet transforms to apply, between 1 and 5 (default).
+Lower values reduce compression and quality. Less capable decoders may not be
+able to handle values of @option{wavelet_depth} over 3.
+
+@item wavelet_type
+Sets the transform type. Currently only @var{5_3} (LeGall) and @var{9_7}
+(Deslauriers-Dubuc)
+are implemented, with 9_7 being the one with better compression and thus
+is the default.
+
+@item slice_width
+@item slice_height
+Sets the slice size for each slice. Larger values result in better compression.
+For compatibility with other more limited decoders use @option{slice_width} of
+32 and @option{slice_height} of 8.
+
+@item tolerance
+Sets the undershoot tolerance of the rate control system in percent. This is
+to prevent an expensive search from being run.
+
+@item qm
+Sets the quantization matrix preset to use by default or when @option{wavelet_depth}
+is set to 5
+@itemize @minus
+@item
+@var{default}
+Uses the default quantization matrix from the specifications, extended with
+values for the fifth level. This provides a good balance between keeping detail
+and omitting artifacts.
+
+@item
+@var{flat}
+Use a completely zeroed out quantization matrix. This increases PSNR but might
+reduce perception. Use in bogus benchmarks.
+
+@item
+@var{color}
+Reduces detail but attempts to preserve color at extremely low bitrates.
+@end itemize
+
+@end table
+
 @c man end VIDEO ENCODERS
 
 @chapter Subtitles Encoders
diff --git a/doc/errno.txt b/doc/errno.txt
index 31cab26f..933a4de5 100644
--- a/doc/errno.txt
+++ b/doc/errno.txt
@@ -76,7 +76,7 @@ EMFILE           POSIX     ++++++  Too many open files
 EMLINK           POSIX     ++++++  Too many links
 EMSGSIZE         POSIX     +++..+  Message too long
 EMULTIHOP        POSIX     ++4...  Multihop attempted
-ENAMETOOLONG     POSIX  -  ++++++  Filen ame too long
+ENAMETOOLONG     POSIX  -  ++++++  File name too long
 ENAVAIL                    +.....  No XENIX semaphores available
 ENEEDAUTH                  .++...  Need authenticator
 ENETDOWN         POSIX     +++..+  Network is down
diff --git a/doc/examples/Makefile b/doc/examples/Makefile
index 9699f11d..af381599 100644
--- a/doc/examples/Makefile
+++ b/doc/examples/Makefile
@@ -11,13 +11,14 @@ CFLAGS += -Wall -g
 CFLAGS := $(shell pkg-config --cflags $(FFMPEG_LIBS)) $(CFLAGS)
 LDLIBS := $(shell pkg-config --libs $(FFMPEG_LIBS)) $(LDLIBS)
 
-EXAMPLES=       avio_list_dir                      \
+EXAMPLES=       avio_dir_cmd                       \
                 avio_reading                       \
                 decoding_encoding                  \
                 demuxing_decoding                  \
                 extract_mvs                        \
                 filtering_video                    \
                 filtering_audio                    \
+                http_multiclient                   \
                 metadata                           \
                 muxing                             \
                 remuxing                           \
diff --git a/doc/examples/avio_list_dir.c b/doc/examples/avio_dir_cmd.c
similarity index 66%
rename from doc/examples/avio_list_dir.c
rename to doc/examples/avio_dir_cmd.c
index 4060ba62..50c435cf 100644
--- a/doc/examples/avio_list_dir.c
+++ b/doc/examples/avio_dir_cmd.c
@@ -54,28 +54,13 @@ static const char *type_string(int type)
     return "<UNKNOWN>";
 }
 
-int main(int argc, char *argv[])
+static int list_op(const char *input_dir)
 {
-    const char *input_dir = NULL;
     AVIODirEntry *entry = NULL;
     AVIODirContext *ctx = NULL;
     int cnt, ret;
     char filemode[4], uid_and_gid[20];
 
-    av_log_set_level(AV_LOG_DEBUG);
-
-    if (argc != 2) {
-        fprintf(stderr, "usage: %s input_dir\n"
-                "API example program to show how to list files in directory "
-                "accessed through AVIOContext.\n", argv[0]);
-        return 1;
-    }
-    input_dir = argv[1];
-
-    /* register codecs and formats and other lavf/lavc components*/
-    av_register_all();
-    avformat_network_init();
-
     if ((ret = avio_open_dir(&ctx, input_dir, NULL)) < 0) {
         av_log(NULL, AV_LOG_ERROR, "Cannot open directory: %s.\n", av_err2str(ret));
         goto fail;
@@ -114,6 +99,81 @@ int main(int argc, char *argv[])
 
   fail:
     avio_close_dir(&ctx);
+    return ret;
+}
+
+static int del_op(const char *url)
+{
+    int ret = avpriv_io_delete(url);
+    if (ret < 0)
+        av_log(NULL, AV_LOG_ERROR, "Cannot delete '%s': %s.\n", url, av_err2str(ret));
+    return ret;
+}
+
+static int move_op(const char *src, const char *dst)
+{
+    int ret = avpriv_io_move(src, dst);
+    if (ret < 0)
+        av_log(NULL, AV_LOG_ERROR, "Cannot move '%s' into '%s': %s.\n", src, dst, av_err2str(ret));
+    return ret;
+}
+
+
+static void usage(const char *program_name)
+{
+    fprintf(stderr, "usage: %s OPERATION entry1 [entry2]\n"
+            "API example program to show how to manipulate resources "
+            "accessed through AVIOContext.\n"
+            "OPERATIONS:\n"
+            "list      list content of the directory\n"
+            "move      rename content in directory\n"
+            "del       delete content in directory\n",
+            program_name);
+}
+
+int main(int argc, char *argv[])
+{
+    const char *op = NULL;
+    int ret;
+
+    av_log_set_level(AV_LOG_DEBUG);
+
+    if (argc < 2) {
+        usage(argv[0]);
+        return 1;
+    }
+
+    /* register codecs and formats and other lavf/lavc components*/
+    av_register_all();
+    avformat_network_init();
+
+    op = argv[1];
+    if (strcmp(op, "list") == 0) {
+        if (argc < 3) {
+            av_log(NULL, AV_LOG_INFO, "Missing argument for list operation.\n");
+            ret = AVERROR(EINVAL);
+        } else {
+            ret = list_op(argv[2]);
+        }
+    } else if (strcmp(op, "del") == 0) {
+        if (argc < 3) {
+            av_log(NULL, AV_LOG_INFO, "Missing argument for del operation.\n");
+            ret = AVERROR(EINVAL);
+        } else {
+            ret = del_op(argv[2]);
+        }
+    } else if (strcmp(op, "move") == 0) {
+        if (argc < 4) {
+            av_log(NULL, AV_LOG_INFO, "Missing argument for move operation.\n");
+            ret = AVERROR(EINVAL);
+        } else {
+            ret = move_op(argv[2], argv[3]);
+        }
+    } else {
+        av_log(NULL, AV_LOG_INFO, "Invalid operation %s\n", op);
+        ret = AVERROR(EINVAL);
+    }
+
     avformat_network_deinit();
 
     return ret < 0 ? 1 : 0;
diff --git a/doc/examples/decoding_encoding.c b/doc/examples/decoding_encoding.c
index 80da6643..06a98a63 100644
--- a/doc/examples/decoding_encoding.c
+++ b/doc/examples/decoding_encoding.c
@@ -211,7 +211,7 @@ static void audio_encode_example(const char *filename)
         }
         if (got_output) {
             fwrite(pkt.data, 1, pkt.size, f);
-            av_free_packet(&pkt);
+            av_packet_unref(&pkt);
         }
     }
 
@@ -225,7 +225,7 @@ static void audio_encode_example(const char *filename)
 
         if (got_output) {
             fwrite(pkt.data, 1, pkt.size, f);
-            av_free_packet(&pkt);
+            av_packet_unref(&pkt);
         }
     }
     fclose(f);
@@ -245,7 +245,7 @@ static void audio_decode_example(const char *outfilename, const char *filename)
     AVCodecContext *c= NULL;
     int len;
     FILE *f, *outfile;
-    uint8_t inbuf[AUDIO_INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
+    uint8_t inbuf[AUDIO_INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
     AVPacket avpkt;
     AVFrame *decoded_frame = NULL;
 
@@ -454,7 +454,7 @@ static void video_encode_example(const char *filename, int codec_id)
         if (got_output) {
             printf("Write frame %3d (size=%5d)\n", i, pkt.size);
             fwrite(pkt.data, 1, pkt.size, f);
-            av_free_packet(&pkt);
+            av_packet_unref(&pkt);
         }
     }
 
@@ -471,7 +471,7 @@ static void video_encode_example(const char *filename, int codec_id)
         if (got_output) {
             printf("Write frame %3d (size=%5d)\n", i, pkt.size);
             fwrite(pkt.data, 1, pkt.size, f);
-            av_free_packet(&pkt);
+            av_packet_unref(&pkt);
         }
     }
 
@@ -521,7 +521,7 @@ static int decode_write_frame(const char *outfilename, AVCodecContext *avctx,
         /* the picture is allocated by the decoder, no need to free it */
         snprintf(buf, sizeof(buf), outfilename, *frame_count);
         pgm_save(frame->data[0], frame->linesize[0],
-                 avctx->width, avctx->height, buf);
+                 frame->width, frame->height, buf);
         (*frame_count)++;
     }
     if (pkt->data) {
@@ -538,13 +538,13 @@ static void video_decode_example(const char *outfilename, const char *filename)
     int frame_count;
     FILE *f;
     AVFrame *frame;
-    uint8_t inbuf[INBUF_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
+    uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
     AVPacket avpkt;
 
     av_init_packet(&avpkt);
 
     /* set end of buffer to 0 (this ensures that no overreading happens for damaged mpeg streams) */
-    memset(inbuf + INBUF_SIZE, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
     printf("Decode video file %s to %s\n", filename, outfilename);
 
@@ -561,8 +561,8 @@ static void video_decode_example(const char *outfilename, const char *filename)
         exit(1);
     }
 
-    if(codec->capabilities&CODEC_CAP_TRUNCATED)
-        c->flags|= CODEC_FLAG_TRUNCATED; /* we do not send complete frames */
+    if (codec->capabilities & AV_CODEC_CAP_TRUNCATED)
+        c->flags |= AV_CODEC_FLAG_TRUNCATED; // we do not send complete frames
 
     /* For some codecs, such as msmpeg4 and mpeg4, width and height
        MUST be initialized there because this information is not
diff --git a/doc/examples/demuxing_decoding.c b/doc/examples/demuxing_decoding.c
index 98b3a830..59e0ccc9 100644
--- a/doc/examples/demuxing_decoding.c
+++ b/doc/examples/demuxing_decoding.c
@@ -55,17 +55,11 @@ static AVPacket pkt;
 static int video_frame_count = 0;
 static int audio_frame_count = 0;
 
-/* The different ways of decoding and managing data memory. You are not
- * supposed to support all the modes in your application but pick the one most
- * appropriate to your needs. Look for the use of api_mode in this example to
- * see what are the differences of API usage between them */
-enum {
-    API_MODE_OLD                  = 0, /* old method, deprecated */
-    API_MODE_NEW_API_REF_COUNT    = 1, /* new method, using the frame reference counting */
-    API_MODE_NEW_API_NO_REF_COUNT = 2, /* new method, without reference counting */
-};
-
-static int api_mode = API_MODE_OLD;
+/* Enable or disable frame reference counting. You are not supposed to support
+ * both paths in your application but pick the one most appropriate to your
+ * needs. Look for the use of refcount in this example to see what are the
+ * differences of API usage between them. */
+static int refcount = 0;
 
 static int decode_packet(int *got_frame, int cached)
 {
@@ -145,9 +139,9 @@ static int decode_packet(int *got_frame, int cached)
         }
     }
 
-    /* If we use the new API with reference counting, we own the data and need
+    /* If we use frame reference counting, we own the data and need
      * to de-reference it when we don't use it anymore */
-    if (*got_frame && api_mode == API_MODE_NEW_API_REF_COUNT)
+    if (*got_frame && refcount)
         av_frame_unref(frame);
 
     return decoded;
@@ -181,8 +175,7 @@ static int open_codec_context(int *stream_idx,
         }
 
         /* Init the decoders, with or without reference counting */
-        if (api_mode == API_MODE_NEW_API_REF_COUNT)
-            av_dict_set(&opts, "refcounted_frames", "1", 0);
+        av_dict_set(&opts, "refcounted_frames", refcount ? "1" : "0", 0);
         if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) {
             fprintf(stderr, "Failed to open %s codec\n",
                     av_get_media_type_string(type));
@@ -228,28 +221,19 @@ int main (int argc, char **argv)
     int ret = 0, got_frame;
 
     if (argc != 4 && argc != 5) {
-        fprintf(stderr, "usage: %s [-refcount=<old|new_norefcount|new_refcount>] "
-                "input_file video_output_file audio_output_file\n"
+        fprintf(stderr, "usage: %s [-refcount] input_file video_output_file audio_output_file\n"
                 "API example program to show how to read frames from an input file.\n"
                 "This program reads frames from a file, decodes them, and writes decoded\n"
                 "video frames to a rawvideo file named video_output_file, and decoded\n"
                 "audio frames to a rawaudio file named audio_output_file.\n\n"
                 "If the -refcount option is specified, the program use the\n"
                 "reference counting frame system which allows keeping a copy of\n"
-                "the data for longer than one decode call. If unset, it's using\n"
-                "the classic old method.\n"
+                "the data for longer than one decode call.\n"
                 "\n", argv[0]);
         exit(1);
     }
-    if (argc == 5) {
-        const char *mode = argv[1] + strlen("-refcount=");
-        if      (!strcmp(mode, "old"))            api_mode = API_MODE_OLD;
-        else if (!strcmp(mode, "new_norefcount")) api_mode = API_MODE_NEW_API_NO_REF_COUNT;
-        else if (!strcmp(mode, "new_refcount"))   api_mode = API_MODE_NEW_API_REF_COUNT;
-        else {
-            fprintf(stderr, "unknow mode '%s'\n", mode);
-            exit(1);
-        }
+    if (argc == 5 && !strcmp(argv[1], "-refcount")) {
+        refcount = 1;
         argv++;
     }
     src_filename = argv[1];
@@ -315,12 +299,7 @@ int main (int argc, char **argv)
         goto end;
     }
 
-    /* When using the new API, you need to use the libavutil/frame.h API, while
-     * the classic frame management is available in libavcodec */
-    if (api_mode == API_MODE_OLD)
-        frame = avcodec_alloc_frame();
-    else
-        frame = av_frame_alloc();
+    frame = av_frame_alloc();
     if (!frame) {
         fprintf(stderr, "Could not allocate frame\n");
         ret = AVERROR(ENOMEM);
@@ -347,7 +326,7 @@ int main (int argc, char **argv)
             pkt.data += ret;
             pkt.size -= ret;
         } while (pkt.size > 0);
-        av_free_packet(&orig_pkt);
+        av_packet_unref(&orig_pkt);
     }
 
     /* flush cached frames */
@@ -397,10 +376,7 @@ int main (int argc, char **argv)
         fclose(video_dst_file);
     if (audio_dst_file)
         fclose(audio_dst_file);
-    if (api_mode == API_MODE_OLD)
-        avcodec_free_frame(&frame);
-    else
-        av_frame_free(&frame);
+    av_frame_free(&frame);
     av_free(video_dst_data[0]);
 
     return ret < 0;
diff --git a/doc/examples/extract_mvs.c b/doc/examples/extract_mvs.c
index d6fd6133..975189c7 100644
--- a/doc/examples/extract_mvs.c
+++ b/doc/examples/extract_mvs.c
@@ -167,7 +167,7 @@ int main(int argc, char **argv)
             pkt.data += ret;
             pkt.size -= ret;
         } while (pkt.size > 0);
-        av_free_packet(&orig_pkt);
+        av_packet_unref(&orig_pkt);
     }
 
     /* flush cached frames */
diff --git a/doc/examples/filtering_audio.c b/doc/examples/filtering_audio.c
index f5cb8eb8..89c80cfd 100644
--- a/doc/examples/filtering_audio.c
+++ b/doc/examples/filtering_audio.c
@@ -33,7 +33,6 @@
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 #include <libavfilter/avfiltergraph.h>
-#include <libavfilter/avcodec.h>
 #include <libavfilter/buffersink.h>
 #include <libavfilter/buffersrc.h>
 #include <libavutil/opt.h>
@@ -274,10 +273,10 @@ int main(int argc, char **argv)
             }
 
             if (packet.size <= 0)
-                av_free_packet(&packet0);
+                av_packet_unref(&packet0);
         } else {
             /* discard non-wanted packets */
-            av_free_packet(&packet0);
+            av_packet_unref(&packet0);
         }
     }
 end:
diff --git a/doc/examples/filtering_video.c b/doc/examples/filtering_video.c
index c02040ae..3dabf13b 100644
--- a/doc/examples/filtering_video.c
+++ b/doc/examples/filtering_video.c
@@ -33,12 +33,14 @@
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 #include <libavfilter/avfiltergraph.h>
-#include <libavfilter/avcodec.h>
 #include <libavfilter/buffersink.h>
 #include <libavfilter/buffersrc.h>
 #include <libavutil/opt.h>
 
-const char *filter_descr = "scale=78:24";
+const char *filter_descr = "scale=78:24,transpose=cclock";
+/* other way:
+   scale=78:24 [scl]; [scl] transpose=cclock // assumes "[in]" and "[out]" to be input output pads respectively
+ */
 
 static AVFormatContext *fmt_ctx;
 static AVCodecContext *dec_ctx;
@@ -260,7 +262,7 @@ int main(int argc, char **argv)
                 av_frame_unref(frame);
             }
         }
-        av_free_packet(&packet);
+        av_packet_unref(&packet);
     }
 end:
     avfilter_graph_free(&filter_graph);
diff --git a/doc/examples/http_multiclient.c b/doc/examples/http_multiclient.c
new file mode 100644
index 00000000..b9a306d8
--- /dev/null
+++ b/doc/examples/http_multiclient.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015 Stephan Holljes
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * @file
+ * libavformat multi-client network API usage example.
+ *
+ * @example http_multiclient.c
+ * This example will serve a file without decoding or demuxing it over http.
+ * Multiple clients can connect and will receive the same file.
+ */
+
+#include <libavformat/avformat.h>
+#include <libavutil/opt.h>
+#include <unistd.h>
+
+void process_client(AVIOContext *client, const char *in_uri)
+{
+    AVIOContext *input = NULL;
+    uint8_t buf[1024];
+    int ret, n, reply_code;
+    char *resource = NULL;
+    while ((ret = avio_handshake(client)) > 0) {
+        av_opt_get(client, "resource", AV_OPT_SEARCH_CHILDREN, &resource);
+        // check for strlen(resource) is necessary, because av_opt_get()
+        // may return empty string.
+        if (resource && strlen(resource))
+            break;
+    }
+    if (ret < 0)
+        goto end;
+    av_log(client, AV_LOG_TRACE, "resource=%p\n", resource);
+    if (resource && resource[0] == '/' && !strcmp((resource + 1), in_uri)) {
+        reply_code = 200;
+    } else {
+        reply_code = AVERROR_HTTP_NOT_FOUND;
+    }
+    if ((ret = av_opt_set_int(client, "reply_code", reply_code, AV_OPT_SEARCH_CHILDREN)) < 0) {
+        av_log(client, AV_LOG_ERROR, "Failed to set reply_code: %s.\n", av_err2str(ret));
+        goto end;
+    }
+    av_log(client, AV_LOG_TRACE, "Set reply code to %d\n", reply_code);
+
+    while ((ret = avio_handshake(client)) > 0);
+
+    if (ret < 0)
+        goto end;
+
+    fprintf(stderr, "Handshake performed.\n");
+    if (reply_code != 200)
+        goto end;
+    fprintf(stderr, "Opening input file.\n");
+    if ((ret = avio_open2(&input, in_uri, AVIO_FLAG_READ, NULL, NULL)) < 0) {
+        av_log(input, AV_LOG_ERROR, "Failed to open input: %s: %s.\n", in_uri,
+               av_err2str(ret));
+        goto end;
+    }
+    for(;;) {
+        n = avio_read(input, buf, sizeof(buf));
+        if (n < 0) {
+            if (n == AVERROR_EOF)
+                break;
+            av_log(input, AV_LOG_ERROR, "Error reading from input: %s.\n",
+                   av_err2str(n));
+            break;
+        }
+        avio_write(client, buf, n);
+        avio_flush(client);
+    }
+end:
+    fprintf(stderr, "Flushing client\n");
+    avio_flush(client);
+    fprintf(stderr, "Closing client\n");
+    avio_close(client);
+    fprintf(stderr, "Closing input\n");
+    avio_close(input);
+}
+
+int main(int argc, char **argv)
+{
+    av_log_set_level(AV_LOG_TRACE);
+    AVDictionary *options = NULL;
+    AVIOContext *client = NULL, *server = NULL;
+    const char *in_uri, *out_uri;
+    int ret, pid;
+    if (argc < 3) {
+        printf("usage: %s input http://hostname[:port]\n"
+               "API example program to serve http to multiple clients.\n"
+               "\n", argv[0]);
+        return 1;
+    }
+
+    in_uri = argv[1];
+    out_uri = argv[2];
+
+    av_register_all();
+    avformat_network_init();
+
+    if ((ret = av_dict_set(&options, "listen", "2", 0)) < 0) {
+        fprintf(stderr, "Failed to set listen mode for server: %s\n", av_err2str(ret));
+        return ret;
+    }
+    if ((ret = avio_open2(&server, out_uri, AVIO_FLAG_WRITE, NULL, &options)) < 0) {
+        fprintf(stderr, "Failed to open server: %s\n", av_err2str(ret));
+        return ret;
+    }
+    fprintf(stderr, "Entering main loop.\n");
+    for(;;) {
+        if ((ret = avio_accept(server, &client)) < 0)
+            goto end;
+        fprintf(stderr, "Accepted client, forking process.\n");
+        // XXX: Since we don't reap our children and don't ignore signals
+        //      this produces zombie processes.
+        pid = fork();
+        if (pid < 0) {
+            perror("Fork failed");
+            ret = AVERROR(errno);
+            goto end;
+        }
+        if (pid == 0) {
+            fprintf(stderr, "In child.\n");
+            process_client(client, in_uri);
+            avio_close(server);
+            exit(0);
+        }
+        if (pid > 0)
+            avio_close(client);
+    }
+end:
+    avio_close(server);
+    if (ret < 0 && ret != AVERROR_EOF) {
+        fprintf(stderr, "Some errors occurred: %s\n", av_err2str(ret));
+        return 1;
+    }
+    return 0;
+}
diff --git a/doc/examples/muxing.c b/doc/examples/muxing.c
index 8b0ea60b..d4dac5cd 100644
--- a/doc/examples/muxing.c
+++ b/doc/examples/muxing.c
@@ -172,7 +172,7 @@ static void add_stream(OutputStream *ost, AVFormatContext *oc,
 
     /* Some formats want stream headers to be separate. */
     if (oc->oformat->flags & AVFMT_GLOBALHEADER)
-        c->flags |= CODEC_FLAG_GLOBAL_HEADER;
+        c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 }
 
 /**************************************************************/
@@ -230,7 +230,7 @@ static void open_audio(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, A
     /* increment frequency by 110 Hz per second */
     ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate;
 
-    if (c->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)
+    if (c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
         nb_samples = 10000;
     else
         nb_samples = c->frame_size;
@@ -493,44 +493,25 @@ static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
     AVCodecContext *c;
     AVFrame *frame;
     int got_packet = 0;
+    AVPacket pkt = { 0 };
 
     c = ost->st->codec;
 
     frame = get_video_frame(ost);
 
-    if (oc->oformat->flags & AVFMT_RAWPICTURE) {
-        /* a hack to avoid data copy with some raw video muxers */
-        AVPacket pkt;
-        av_init_packet(&pkt);
-
-        if (!frame)
-            return 1;
-
-        pkt.flags        |= AV_PKT_FLAG_KEY;
-        pkt.stream_index  = ost->st->index;
-        pkt.data          = (uint8_t *)frame;
-        pkt.size          = sizeof(AVPicture);
+    av_init_packet(&pkt);
 
-        pkt.pts = pkt.dts = frame->pts;
-        av_packet_rescale_ts(&pkt, c->time_base, ost->st->time_base);
+    /* encode the image */
+    ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);
+    if (ret < 0) {
+        fprintf(stderr, "Error encoding video frame: %s\n", av_err2str(ret));
+        exit(1);
+    }
 
-        ret = av_interleaved_write_frame(oc, &pkt);
+    if (got_packet) {
+        ret = write_frame(oc, &c->time_base, ost->st, &pkt);
     } else {
-        AVPacket pkt = { 0 };
-        av_init_packet(&pkt);
-
-        /* encode the image */
-        ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);
-        if (ret < 0) {
-            fprintf(stderr, "Error encoding video frame: %s\n", av_err2str(ret));
-            exit(1);
-        }
-
-        if (got_packet) {
-            ret = write_frame(oc, &c->time_base, ost->st, &pkt);
-        } else {
-            ret = 0;
-        }
+        ret = 0;
     }
 
     if (ret < 0) {
diff --git a/doc/examples/qsvdec.c b/doc/examples/qsvdec.c
index 6dbb2103..fd934beb 100644
--- a/doc/examples/qsvdec.c
+++ b/doc/examples/qsvdec.c
@@ -116,15 +116,6 @@ static mfxStatus frame_alloc(mfxHDL pthis, mfxFrameAllocRequest *req,
 
 static mfxStatus frame_free(mfxHDL pthis, mfxFrameAllocResponse *resp)
 {
-    DecodeContext *decode = pthis;
-
-    if (decode->surfaces)
-        vaDestroySurfaces(decode->va_dpy, decode->surfaces, decode->nb_surfaces);
-    av_freep(&decode->surfaces);
-    av_freep(&decode->surface_ids);
-    av_freep(&decode->surface_used);
-    decode->nb_surfaces = 0;
-
     return MFX_ERR_NONE;
 }
 
@@ -144,6 +135,16 @@ static mfxStatus frame_get_hdl(mfxHDL pthis, mfxMemId mid, mfxHDL *hdl)
     return MFX_ERR_NONE;
 }
 
+static void free_surfaces(DecodeContext *decode)
+{
+    if (decode->surfaces)
+        vaDestroySurfaces(decode->va_dpy, decode->surfaces, decode->nb_surfaces);
+    av_freep(&decode->surfaces);
+    av_freep(&decode->surface_ids);
+    av_freep(&decode->surface_used);
+    decode->nb_surfaces = 0;
+}
+
 static void free_buffer(void *opaque, uint8_t *data)
 {
     int *used = opaque;
@@ -405,7 +406,7 @@ int main(int argc, char **argv)
     decoder_ctx->codec_id = AV_CODEC_ID_H264;
     if (video_st->codec->extradata_size) {
         decoder_ctx->extradata = av_mallocz(video_st->codec->extradata_size +
-                                            FF_INPUT_BUFFER_PADDING_SIZE);
+                                            AV_INPUT_BUFFER_PADDING_SIZE);
         if (!decoder_ctx->extradata) {
             ret = AVERROR(ENOMEM);
             goto finish;
@@ -467,6 +468,12 @@ int main(int argc, char **argv)
 
     av_frame_free(&frame);
 
+    if (decoder_ctx)
+        av_freep(&decoder_ctx->hwaccel_context);
+    avcodec_free_context(&decoder_ctx);
+
+    free_surfaces(&decode);
+
     if (decode.mfx_session)
         MFXClose(decode.mfx_session);
     if (decode.va_dpy)
@@ -474,10 +481,6 @@ int main(int argc, char **argv)
     if (dpy)
         XCloseDisplay(dpy);
 
-    if (decoder_ctx)
-        av_freep(&decoder_ctx->hwaccel_context);
-    avcodec_free_context(&decoder_ctx);
-
     avio_close(output_ctx);
 
     return ret;
diff --git a/doc/examples/remuxing.c b/doc/examples/remuxing.c
index e9758a8d..65437d9a 100644
--- a/doc/examples/remuxing.c
+++ b/doc/examples/remuxing.c
@@ -101,7 +101,7 @@ int main(int argc, char **argv)
         }
         out_stream->codec->codec_tag = 0;
         if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
-            out_stream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
+            out_stream->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
     }
     av_dump_format(ofmt_ctx, 0, out_filename, 1);
 
@@ -143,7 +143,7 @@ int main(int argc, char **argv)
             fprintf(stderr, "Error muxing packet\n");
             break;
         }
-        av_free_packet(&pkt);
+        av_packet_unref(&pkt);
     }
 
     av_write_trailer(ofmt_ctx);
diff --git a/doc/examples/transcode_aac.c b/doc/examples/transcode_aac.c
index 339d65c7..486e54c2 100644
--- a/doc/examples/transcode_aac.c
+++ b/doc/examples/transcode_aac.c
@@ -192,7 +192,7 @@ static int open_output_file(const char *filename,
      * Mark the encoder so that it behaves accordingly.
      */
     if ((*output_format_context)->oformat->flags & AVFMT_GLOBALHEADER)
-        (*output_codec_context)->flags |= CODEC_FLAG_GLOBAL_HEADER;
+        (*output_codec_context)->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 
     /** Open the encoder for the audio stream to use it later. */
     if ((error = avcodec_open2(*output_codec_context, output_codec, NULL)) < 0) {
@@ -332,7 +332,7 @@ static int decode_audio_frame(AVFrame *frame,
                                        data_present, &input_packet)) < 0) {
         fprintf(stderr, "Could not decode frame (error '%s')\n",
                 get_error_text(error));
-        av_free_packet(&input_packet);
+        av_packet_unref(&input_packet);
         return error;
     }
 
@@ -342,7 +342,7 @@ static int decode_audio_frame(AVFrame *frame,
      */
     if (*finished && *data_present)
         *finished = 0;
-    av_free_packet(&input_packet);
+    av_packet_unref(&input_packet);
     return 0;
 }
 
@@ -571,7 +571,7 @@ static int encode_audio_frame(AVFrame *frame,
                                        frame, data_present)) < 0) {
         fprintf(stderr, "Could not encode frame (error '%s')\n",
                 get_error_text(error));
-        av_free_packet(&output_packet);
+        av_packet_unref(&output_packet);
         return error;
     }
 
@@ -580,11 +580,11 @@ static int encode_audio_frame(AVFrame *frame,
         if ((error = av_write_frame(output_format_context, &output_packet)) < 0) {
             fprintf(stderr, "Could not write frame (error '%s')\n",
                     get_error_text(error));
-            av_free_packet(&output_packet);
+            av_packet_unref(&output_packet);
             return error;
         }
 
-        av_free_packet(&output_packet);
+        av_packet_unref(&output_packet);
     }
 
     return 0;
diff --git a/doc/examples/transcoding.c b/doc/examples/transcoding.c
index 980e1f10..d5d410b1 100644
--- a/doc/examples/transcoding.c
+++ b/doc/examples/transcoding.c
@@ -31,7 +31,6 @@
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
 #include <libavfilter/avfiltergraph.h>
-#include <libavfilter/avcodec.h>
 #include <libavfilter/buffersink.h>
 #include <libavfilter/buffersrc.h>
 #include <libavutil/opt.h>
@@ -161,7 +160,7 @@ static int open_output_file(const char *filename)
         }
 
         if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
-            enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
+            enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 
     }
     av_dump_format(ofmt_ctx, 0, filename, 1);
@@ -449,7 +448,7 @@ static int flush_encoder(unsigned int stream_index)
     int got_frame;
 
     if (!(ofmt_ctx->streams[stream_index]->codec->codec->capabilities &
-                CODEC_CAP_DELAY))
+                AV_CODEC_CAP_DELAY))
         return 0;
 
     while (1) {
@@ -537,7 +536,7 @@ int main(int argc, char **argv)
             if (ret < 0)
                 goto end;
         }
-        av_free_packet(&packet);
+        av_packet_unref(&packet);
     }
 
     /* flush filters and encoders */
@@ -561,7 +560,7 @@ int main(int argc, char **argv)
 
     av_write_trailer(ofmt_ctx);
 end:
-    av_free_packet(&packet);
+    av_packet_unref(&packet);
     av_frame_free(&frame);
     for (i = 0; i < ifmt_ctx->nb_streams; i++) {
         avcodec_close(ifmt_ctx->streams[i]->codec);
diff --git a/doc/faq.texi b/doc/faq.texi
index 5fe716b8..ef111c70 100644
--- a/doc/faq.texi
+++ b/doc/faq.texi
@@ -147,7 +147,7 @@ exec /usr/bin/pkg-config "$@@"
 
 Try a @code{make distclean} in the ffmpeg source directory before the build.
 If this does not help see
-(@url{http://ffmpeg.org/bugreports.html}).
+(@url{https://ffmpeg.org/bugreports.html}).
 
 @section How do I encode single pictures into movies?
 
@@ -311,18 +311,18 @@ invoking ffmpeg with several @option{-i} options.
 For audio, to put all channels together in a single stream (example: two
 mono streams into one stereo stream): this is sometimes called to
 @emph{merge} them, and can be done using the
-@url{http://ffmpeg.org/ffmpeg-filters.html#amerge, @code{amerge}} filter.
+@url{https://ffmpeg.org/ffmpeg-filters.html#amerge, @code{amerge}} filter.
 
 @item
 For audio, to play one on top of the other: this is called to @emph{mix}
 them, and can be done by first merging them into a single stream and then
-using the @url{http://ffmpeg.org/ffmpeg-filters.html#pan, @code{pan}} filter to mix
+using the @url{https://ffmpeg.org/ffmpeg-filters.html#pan, @code{pan}} filter to mix
 the channels at will.
 
 @item
 For video, to display both together, side by side or one on top of a part of
 the other; it can be done using the
-@url{http://ffmpeg.org/ffmpeg-filters.html#overlay, @code{overlay}} video filter.
+@url{https://ffmpeg.org/ffmpeg-filters.html#overlay, @code{overlay}} video filter.
 
 @end itemize
 
@@ -333,19 +333,19 @@ There are several solutions, depending on the exact circumstances.
 
 @subsection Concatenating using the concat @emph{filter}
 
-FFmpeg has a @url{http://ffmpeg.org/ffmpeg-filters.html#concat,
+FFmpeg has a @url{https://ffmpeg.org/ffmpeg-filters.html#concat,
 @code{concat}} filter designed specifically for that, with examples in the
 documentation. This operation is recommended if you need to re-encode.
 
 @subsection Concatenating using the concat @emph{demuxer}
 
-FFmpeg has a @url{http://www.ffmpeg.org/ffmpeg-formats.html#concat,
+FFmpeg has a @url{https://www.ffmpeg.org/ffmpeg-formats.html#concat,
 @code{concat}} demuxer which you can use when you want to avoid a re-encode and
 your format doesn't support file level concatenation.
 
 @subsection Concatenating using the concat @emph{protocol} (file level)
 
-FFmpeg has a @url{http://ffmpeg.org/ffmpeg-protocols.html#concat,
+FFmpeg has a @url{https://ffmpeg.org/ffmpeg-protocols.html#concat,
 @code{concat}} protocol designed specifically for that, with examples in the
 documentation.
 
@@ -485,7 +485,7 @@ scaling adjusts the SAR to keep the DAR constant.
 
 If you want to stretch, or “unstretch”, the image, you need to override the
 information with the
-@url{http://ffmpeg.org/ffmpeg-filters.html#setdar_002c-setsar, @code{setdar or setsar filters}}.
+@url{https://ffmpeg.org/ffmpeg-filters.html#setdar_002c-setsar, @code{setdar or setsar filters}}.
 
 Do not forget to examine carefully the original video to check whether the
 stretching comes from the image or from the aspect ratio information.
@@ -589,7 +589,7 @@ see @file{libavformat/aviobuf.c} in FFmpeg and @file{libmpdemux/demux_lavf.c} in
 
 @section Where is the documentation about ffv1, msmpeg4, asv1, 4xm?
 
-see @url{http://www.ffmpeg.org/~michael/}
+see @url{https://www.ffmpeg.org/~michael/}
 
 @section How do I feed H.263-RTP (and other codecs in RTP) to libavcodec?
 
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 1078ea11..e02807cb 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -253,6 +253,10 @@ Overwrite output files without asking.
 Do not overwrite output files, and exit immediately if a specified
 output file already exists.
 
+@item -stream_loop @var{number} (@emph{input})
+Set number of times input stream shall be looped. Loop 0 means no loop,
+loop -1 means infinite loop.
+
 @item -c[:@var{stream_specifier}] @var{codec} (@emph{input/output,per-stream})
 @itemx -codec[:@var{stream_specifier}] @var{codec} (@emph{input/output,per-stream})
 Select an encoder (when used before an output file) or a decoder (when used
@@ -280,23 +284,27 @@ data read from the input file.
 When used as an output option (before an output filename), stop writing the
 output after its duration reaches @var{duration}.
 
-@var{duration} may be a number in seconds, or in @code{hh:mm:ss[.xxx]} form.
+@var{duration} must be a time duration specification,
+see @ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}.
 
 -to and -t are mutually exclusive and -t has priority.
 
 @item -to @var{position} (@emph{output})
 Stop writing the output at @var{position}.
-@var{position} may be a number in seconds, or in @code{hh:mm:ss[.xxx]} form.
+@var{position} must be a time duration specification,
+see @ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}.
 
 -to and -t are mutually exclusive and -t has priority.
 
 @item -fs @var{limit_size} (@emph{output})
-Set the file size limit, expressed in bytes.
+Set the file size limit, expressed in bytes. No further chunk of bytes is written
+after the limit is exceeded. The size of the output file is slightly more than the
+requested file size.
 
 @item -ss @var{position} (@emph{input/output})
 When used as an input option (before @code{-i}), seeks in this input file to
-@var{position}. Note the in most formats it is not possible to seek exactly, so
-@command{ffmpeg} will seek to the closest seek point before @var{position}.
+@var{position}. Note that in most formats it is not possible to seek exactly,
+so @command{ffmpeg} will seek to the closest seek point before @var{position}.
 When transcoding and @option{-accurate_seek} is enabled (the default), this
 extra segment between the seek point and @var{position} will be decoded and
 discarded. When doing stream copy or when @option{-noaccurate_seek} is used, it
@@ -305,7 +313,13 @@ will be preserved.
 When used as an output option (before an output filename), decodes but discards
 input until the timestamps reach @var{position}.
 
-@var{position} may be either in seconds or in @code{hh:mm:ss[.xxx]} form.
+@var{position} must be a time duration specification,
+see @ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}.
+
+@item -sseof @var{position} (@emph{input/output})
+
+Like the @code{-ss} option but relative to the "end of file". That is negative
+values are earlier in the file, 0 is at EOF.
 
 @item -itsoffset @var{offset} (@emph{input})
 Set the input time offset.
@@ -320,15 +334,15 @@ the time duration specified in @var{offset}.
 @item -timestamp @var{date} (@emph{output})
 Set the recording timestamp in the container.
 
-@var{date} must be a time duration specification,
+@var{date} must be a date specification,
 see @ref{date syntax,,the Date section in the ffmpeg-utils(1) manual,ffmpeg-utils}.
 
 @item -metadata[:metadata_specifier] @var{key}=@var{value} (@emph{output,per-metadata})
 Set a metadata key/value pair.
 
 An optional @var{metadata_specifier} may be given to set metadata
-on streams or chapters. See @code{-map_metadata} documentation for
-details.
+on streams, chapters or programs. See @code{-map_metadata}
+documentation for details.
 
 This option overrides metadata set with @code{-map_metadata}. It is
 also possible to delete metadata by using an empty value.
@@ -343,6 +357,11 @@ To set the language of the first audio stream:
 ffmpeg -i INPUT -metadata:s:a:0 language=eng OUTPUT
 @end example
 
+@item -program [title=@var{title}:][program_num=@var{program_num}:]st=@var{stream}[:st=@var{stream}...] (@emph{output})
+
+Creates a program with the specified @var{title}, @var{program_num} and adds the specified
+@var{stream}(s) to it.
+
 @item -target @var{type} (@emph{output})
 Specify target file type (@code{vcd}, @code{svcd}, @code{dvd}, @code{dv},
 @code{dv50}). @var{type} may be prefixed with @code{pal-}, @code{ntsc-} or
@@ -663,6 +682,16 @@ Use VDPAU (Video Decode and Presentation API for Unix) hardware acceleration.
 
 @item dxva2
 Use DXVA2 (DirectX Video Acceleration) hardware acceleration.
+
+@item qsv
+Use the Intel QuickSync Video acceleration for video transcoding.
+
+Unlike most other values, this option does not enable accelerated decoding (that
+is used automatically whenever a qsv decoder is selected), but accelerated
+transcoding, without copying the frames into the system memory.
+
+For it to work, both the decoder and the encoder must support QSV acceleration
+and no filters must be used.
 @end table
 
 This option has no effect if the selected hwaccel is not available or not
@@ -689,9 +718,27 @@ is not specified, the value of the @var{DISPLAY} environment variable is used
 @item dxva2
 For DXVA2, this option should contain the number of the display adapter to use.
 If this option is not specified, the default adapter is used.
+
+@item qsv
+For QSV, this option corresponds to the valus of MFX_IMPL_* . Allowed values
+are:
+@table @option
+@item auto
+@item sw
+@item hw
+@item auto_any
+@item hw_any
+@item hw2
+@item hw3
+@item hw4
 @end table
 @end table
 
+@item -hwaccels
+List all hardware acceleration methods supported in this build of ffmpeg.
+
+@end table
+
 @section Audio Options
 
 @table @option
@@ -1193,9 +1240,9 @@ The option is intended for cases where features are needed that cannot be
 specified to @command{ffserver} but can be to @command{ffmpeg}.
 
 @item -sdp_file @var{file} (@emph{global})
-Print sdp information to @var{file}.
+Print sdp information for an output stream to @var{file}.
 This allows dumping sdp information when at least one output isn't an
-rtp stream.
+rtp stream. (Requires at least one of the output formats to be rtp).
 
 @item -discard (@emph{input})
 Allows discarding specific streams or frames of streams at the demuxer.
@@ -1221,6 +1268,14 @@ Discard all frames excepts keyframes.
 Discard all frames.
 @end table
 
+@item -abort_on @var{flags} (@emph{global})
+Stop and abort on various conditions. The following flags are available:
+
+@table @option
+@item empty_output
+No packets were passed to the muxer, the output is empty.
+@end table
+
 @item -xerror (@emph{global})
 Stop and exit on error
 
@@ -1299,47 +1354,6 @@ If no such file is found, then ffmpeg will search for a file named
 
 @c man end OPTIONS
 
-@chapter Tips
-@c man begin TIPS
-
-@itemize
-@item
-For streaming at very low bitrates, use a low frame rate
-and a small GOP size. This is especially true for RealVideo where
-the Linux player does not seem to be very fast, so it can miss
-frames. An example is:
-
-@example
-ffmpeg -g 3 -r 3 -t 10 -b:v 50k -s qcif -f rv10 /tmp/b.rm
-@end example
-
-@item
-The parameter 'q' which is displayed while encoding is the current
-quantizer. The value 1 indicates that a very good quality could
-be achieved. The value 31 indicates the worst quality. If q=31 appears
-too often, it means that the encoder cannot compress enough to meet
-your bitrate. You must either increase the bitrate, decrease the
-frame rate or decrease the frame size.
-
-@item
-If your computer is not fast enough, you can speed up the
-compression at the expense of the compression ratio. You can use
-'-me zero' to speed up motion estimation, and '-g 0' to disable
-motion estimation completely (you have only I-frames, which means it
-is about as good as JPEG compression).
-
-@item
-To have very low audio bitrates, reduce the sampling frequency
-(down to 22050 Hz for MPEG audio, 22050 or 11025 for AC-3).
-
-@item
-To have a constant quality (but a variable bitrate), use the option
-'-qscale n' when 'n' is between 1 (excellent quality) and 31 (worst
-quality).
-
-@end itemize
-@c man end TIPS
-
 @chapter Examples
 @c man begin EXAMPLES
 
diff --git a/doc/ffplay.texi b/doc/ffplay.texi
index 1ee3c304..4bc3ced3 100644
--- a/doc/ffplay.texi
+++ b/doc/ffplay.texi
@@ -47,9 +47,17 @@ Disable video.
 @item -sn
 Disable subtitles.
 @item -ss @var{pos}
-Seek to a given position in seconds.
+Seek to @var{pos}. Note that in most formats it is not possible to seek
+exactly, so @command{ffplay} will seek to the nearest seek point to
+@var{pos}.
+
+@var{pos} must be a time duration specification,
+see @ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}.
 @item -t @var{duration}
-play <duration> seconds of audio/video
+Play @var{duration} seconds of audio/video.
+
+@var{duration} must be a time duration specification,
+see @ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}.
 @item -bytes
 Seek by bytes.
 @item -nodisp
@@ -189,6 +197,15 @@ Toggle full screen.
 @item p, SPC
 Pause.
 
+@item m
+Toggle mute.
+
+@item 9, 0
+Decrease and increase volume respectively.
+
+@item /, *
+Decrease and increase volume respectively.
+
 @item a
 Cycle audio channel in the current program.
 
@@ -221,9 +238,12 @@ Seek to the previous/next chapter.
 or if there are no chapters
 Seek backward/forward 10 minutes.
 
-@item mouse click
+@item right mouse click
 Seek to percentage in file corresponding to fraction of width.
 
+@item left mouse double-click
+Toggle full screen.
+
 @end table
 
 @c man end
diff --git a/doc/fftools-common-opts.texi b/doc/fftools-common-opts.texi
new file mode 100644
index 00000000..509c8bca
--- /dev/null
+++ b/doc/fftools-common-opts.texi
@@ -0,0 +1,389 @@
+All the numerical options, if not specified otherwise, accept a string
+representing a number as input, which may be followed by one of the SI
+unit prefixes, for example: 'K', 'M', or 'G'.
+
+If 'i' is appended to the SI unit prefix, the complete prefix will be
+interpreted as a unit prefix for binary multiples, which are based on
+powers of 1024 instead of powers of 1000. Appending 'B' to the SI unit
+prefix multiplies the value by 8. This allows using, for example:
+'KB', 'MiB', 'G' and 'B' as number suffixes.
+
+Options which do not take arguments are boolean options, and set the
+corresponding value to true. They can be set to false by prefixing
+the option name with "no". For example using "-nofoo"
+will set the boolean option with name "foo" to false.
+
+@anchor{Stream specifiers}
+@section Stream specifiers
+Some options are applied per-stream, e.g. bitrate or codec. Stream specifiers
+are used to precisely specify which stream(s) a given option belongs to.
+
+A stream specifier is a string generally appended to the option name and
+separated from it by a colon. E.g. @code{-codec:a:1 ac3} contains the
+@code{a:1} stream specifier, which matches the second audio stream. Therefore, it
+would select the ac3 codec for the second audio stream.
+
+A stream specifier can match several streams, so that the option is applied to all
+of them. E.g. the stream specifier in @code{-b:a 128k} matches all audio
+streams.
+
+An empty stream specifier matches all streams. For example, @code{-codec copy}
+or @code{-codec: copy} would copy all the streams without reencoding.
+
+Possible forms of stream specifiers are:
+@table @option
+@item @var{stream_index}
+Matches the stream with this index. E.g. @code{-threads:1 4} would set the
+thread count for the second stream to 4.
+@item @var{stream_type}[:@var{stream_index}]
+@var{stream_type} is one of following: 'v' or 'V' for video, 'a' for audio, 's'
+for subtitle, 'd' for data, and 't' for attachments. 'v' matches all video
+streams, 'V' only matches video streams which are not attached pictures, video
+thumbnails or cover arts.  If @var{stream_index} is given, then it matches
+stream number @var{stream_index} of this type. Otherwise, it matches all
+streams of this type.
+@item p:@var{program_id}[:@var{stream_index}]
+If @var{stream_index} is given, then it matches the stream with number @var{stream_index}
+in the program with the id @var{program_id}. Otherwise, it matches all streams in the
+program.
+@item #@var{stream_id} or i:@var{stream_id}
+Match the stream by stream id (e.g. PID in MPEG-TS container).
+@item m:@var{key}[:@var{value}]
+Matches streams with the metadata tag @var{key} having the specified value. If
+@var{value} is not given, matches streams that contain the given tag with any
+value.
+@item u
+Matches streams with usable configuration, the codec must be defined and the
+essential information such as video dimension or audio sample rate must be present.
+
+Note that in @command{ffmpeg}, matching by metadata will only work properly for
+input files.
+@end table
+
+@section Generic options
+
+These options are shared amongst the ff* tools.
+
+@table @option
+
+@item -L
+Show license.
+
+@item -h, -?, -help, --help [@var{arg}]
+Show help. An optional parameter may be specified to print help about a specific
+item. If no argument is specified, only basic (non advanced) tool
+options are shown.
+
+Possible values of @var{arg} are:
+@table @option
+@item long
+Print advanced tool options in addition to the basic tool options.
+
+@item full
+Print complete list of options, including shared and private options
+for encoders, decoders, demuxers, muxers, filters, etc.
+
+@item decoder=@var{decoder_name}
+Print detailed information about the decoder named @var{decoder_name}. Use the
+@option{-decoders} option to get a list of all decoders.
+
+@item encoder=@var{encoder_name}
+Print detailed information about the encoder named @var{encoder_name}. Use the
+@option{-encoders} option to get a list of all encoders.
+
+@item demuxer=@var{demuxer_name}
+Print detailed information about the demuxer named @var{demuxer_name}. Use the
+@option{-formats} option to get a list of all demuxers and muxers.
+
+@item muxer=@var{muxer_name}
+Print detailed information about the muxer named @var{muxer_name}. Use the
+@option{-formats} option to get a list of all muxers and demuxers.
+
+@item filter=@var{filter_name}
+Print detailed information about the filter name @var{filter_name}. Use the
+@option{-filters} option to get a list of all filters.
+@end table
+
+@item -version
+Show version.
+
+@item -formats
+Show available formats (including devices).
+
+@item -devices
+Show available devices.
+
+@item -codecs
+Show all codecs known to libavcodec.
+
+Note that the term 'codec' is used throughout this documentation as a shortcut
+for what is more correctly called a media bitstream format.
+
+@item -decoders
+Show available decoders.
+
+@item -encoders
+Show all available encoders.
+
+@item -bsfs
+Show available bitstream filters.
+
+@item -protocols
+Show available protocols.
+
+@item -filters
+Show available libavfilter filters.
+
+@item -pix_fmts
+Show available pixel formats.
+
+@item -sample_fmts
+Show available sample formats.
+
+@item -layouts
+Show channel names and standard channel layouts.
+
+@item -colors
+Show recognized color names.
+
+@item -sources @var{device}[,@var{opt1}=@var{val1}[,@var{opt2}=@var{val2}]...]
+Show autodetected sources of the intput device.
+Some devices may provide system-dependent source names that cannot be autodetected.
+The returned list cannot be assumed to be always complete.
+@example
+ffmpeg -sources pulse,server=192.168.0.4
+@end example
+
+@item -sinks @var{device}[,@var{opt1}=@var{val1}[,@var{opt2}=@var{val2}]...]
+Show autodetected sinks of the output device.
+Some devices may provide system-dependent sink names that cannot be autodetected.
+The returned list cannot be assumed to be always complete.
+@example
+ffmpeg -sinks pulse,server=192.168.0.4
+@end example
+
+@item -loglevel [repeat+]@var{loglevel} | -v [repeat+]@var{loglevel}
+Set the logging level used by the library.
+Adding "repeat+" indicates that repeated log output should not be compressed
+to the first line and the "Last message repeated n times" line will be
+omitted. "repeat" can also be used alone.
+If "repeat" is used alone, and with no prior loglevel set, the default
+loglevel will be used. If multiple loglevel parameters are given, using
+'repeat' will not change the loglevel.
+@var{loglevel} is a string or a number containing one of the following values:
+@table @samp
+@item quiet, -8
+Show nothing at all; be silent.
+@item panic, 0
+Only show fatal errors which could lead the process to crash, such as
+and assert failure. This is not currently used for anything.
+@item fatal, 8
+Only show fatal errors. These are errors after which the process absolutely
+cannot continue after.
+@item error, 16
+Show all errors, including ones which can be recovered from.
+@item warning, 24
+Show all warnings and errors. Any message related to possibly
+incorrect or unexpected events will be shown.
+@item info, 32
+Show informative messages during processing. This is in addition to
+warnings and errors. This is the default value.
+@item verbose, 40
+Same as @code{info}, except more verbose.
+@item debug, 48
+Show everything, including debugging information.
+@item trace, 56
+@end table
+
+By default the program logs to stderr, if coloring is supported by the
+terminal, colors are used to mark errors and warnings. Log coloring
+can be disabled setting the environment variable
+@env{AV_LOG_FORCE_NOCOLOR} or @env{NO_COLOR}, or can be forced setting
+the environment variable @env{AV_LOG_FORCE_COLOR}.
+The use of the environment variable @env{NO_COLOR} is deprecated and
+will be dropped in a following FFmpeg version.
+
+@item -report
+Dump full command line and console output to a file named
+@code{@var{program}-@var{YYYYMMDD}-@var{HHMMSS}.log} in the current
+directory.
+This file can be useful for bug reports.
+It also implies @code{-loglevel verbose}.
+
+Setting the environment variable @env{FFREPORT} to any value has the
+same effect. If the value is a ':'-separated key=value sequence, these
+options will affect the report; option values must be escaped if they
+contain special characters or the options delimiter ':' (see the
+``Quoting and escaping'' section in the ffmpeg-utils manual).
+
+The following options are recognized:
+@table @option
+@item file
+set the file name to use for the report; @code{%p} is expanded to the name
+of the program, @code{%t} is expanded to a timestamp, @code{%%} is expanded
+to a plain @code{%}
+@item level
+set the log verbosity level using a numerical value (see @code{-loglevel}).
+@end table
+
+For example, to output a report to a file named @file{ffreport.log}
+using a log level of @code{32} (alias for log level @code{info}):
+
+@example
+FFREPORT=file=ffreport.log:level=32 ffmpeg -i input output
+@end example
+
+Errors in parsing the environment variable are not fatal, and will not
+appear in the report.
+
+@item -hide_banner
+Suppress printing banner.
+
+All FFmpeg tools will normally show a copyright notice, build options
+and library versions. This option can be used to suppress printing
+this information.
+
+@item -cpuflags flags (@emph{global})
+Allows setting and clearing cpu flags. This option is intended
+for testing. Do not use it unless you know what you're doing.
+@example
+ffmpeg -cpuflags -sse+mmx ...
+ffmpeg -cpuflags mmx ...
+ffmpeg -cpuflags 0 ...
+@end example
+Possible flags for this option are:
+@table @samp
+@item x86
+@table @samp
+@item mmx
+@item mmxext
+@item sse
+@item sse2
+@item sse2slow
+@item sse3
+@item sse3slow
+@item ssse3
+@item atom
+@item sse4.1
+@item sse4.2
+@item avx
+@item avx2
+@item xop
+@item fma3
+@item fma4
+@item 3dnow
+@item 3dnowext
+@item bmi1
+@item bmi2
+@item cmov
+@end table
+@item ARM
+@table @samp
+@item armv5te
+@item armv6
+@item armv6t2
+@item vfp
+@item vfpv3
+@item neon
+@item setend
+@end table
+@item AArch64
+@table @samp
+@item armv8
+@item vfp
+@item neon
+@end table
+@item PowerPC
+@table @samp
+@item altivec
+@end table
+@item Specific Processors
+@table @samp
+@item pentium2
+@item pentium3
+@item pentium4
+@item k6
+@item k62
+@item athlon
+@item athlonxp
+@item k8
+@end table
+@end table
+
+@item -opencl_bench
+This option is used to benchmark all available OpenCL devices and print the
+results. This option is only available when FFmpeg has been compiled with
+@code{--enable-opencl}.
+
+When FFmpeg is configured with @code{--enable-opencl}, the options for the
+global OpenCL context are set via @option{-opencl_options}. See the
+"OpenCL Options" section in the ffmpeg-utils manual for the complete list of
+supported options. Amongst others, these options include the ability to select
+a specific platform and device to run the OpenCL code on. By default, FFmpeg
+will run on the first device of the first platform. While the options for the
+global OpenCL context provide flexibility to the user in selecting the OpenCL
+device of their choice, most users would probably want to select the fastest
+OpenCL device for their system.
+
+This option assists the selection of the most efficient configuration by
+identifying the appropriate device for the user's system. The built-in
+benchmark is run on all the OpenCL devices and the performance is measured for
+each device. The devices in the results list are sorted based on their
+performance with the fastest device listed first. The user can subsequently
+invoke @command{ffmpeg} using the device deemed most appropriate via
+@option{-opencl_options} to obtain the best performance for the OpenCL
+accelerated code.
+
+Typical usage to use the fastest OpenCL device involve the following steps.
+
+Run the command:
+@example
+ffmpeg -opencl_bench
+@end example
+Note down the platform ID (@var{pidx}) and device ID (@var{didx}) of the first
+i.e. fastest device in the list.
+Select the platform and device using the command:
+@example
+ffmpeg -opencl_options platform_idx=@var{pidx}:device_idx=@var{didx} ...
+@end example
+
+@item -opencl_options options (@emph{global})
+Set OpenCL environment options. This option is only available when
+FFmpeg has been compiled with @code{--enable-opencl}.
+
+@var{options} must be a list of @var{key}=@var{value} option pairs
+separated by ':'. See the ``OpenCL Options'' section in the
+ffmpeg-utils manual for the list of supported options.
+@end table
+
+@section AVOptions
+
+These options are provided directly by the libavformat, libavdevice and
+libavcodec libraries. To see the list of available AVOptions, use the
+@option{-help} option. They are separated into two categories:
+@table @option
+@item generic
+These options can be set for any container, codec or device. Generic options
+are listed under AVFormatContext options for containers/devices and under
+AVCodecContext options for codecs.
+@item private
+These options are specific to the given container, device or codec. Private
+options are listed under their corresponding containers/devices/codecs.
+@end table
+
+For example to write an ID3v2.3 header instead of a default ID3v2.4 to
+an MP3 file, use the @option{id3v2_version} private option of the MP3
+muxer:
+@example
+ffmpeg -i input.flac -id3v2_version 3 out.mp3
+@end example
+
+All codec AVOptions are per-stream, and thus a stream specifier
+should be attached to them.
+
+Note: the @option{-nooption} syntax cannot be used for boolean
+AVOptions, use @option{-option 0}/@option{-option 1}.
+
+Note: the old undocumented way of specifying per-stream AVOptions by
+prepending v/a/s to the options name is now obsolete and will be
+removed soon.
diff --git a/doc/filter_design.txt b/doc/filter_design.txt
index fca24a94..e8a7c53e 100644
--- a/doc/filter_design.txt
+++ b/doc/filter_design.txt
@@ -98,7 +98,7 @@ Buffer references ownership and permissions
     The AVFilterLink structure has a few AVFilterBufferRef fields. The
     cur_buf and out_buf were used with the deprecated
     start_frame/draw_slice/end_frame API and should no longer be used.
-    src_buf, cur_buf_copy and partial_buf are used by libavfilter internally
+    src_buf and partial_buf are used by libavfilter internally
     and must not be accessed by filters.
 
   Reference permissions
@@ -232,7 +232,8 @@ Frame scheduling
     one of its inputs, repeatedly until at least one frame has been pushed.
 
     Return values:
-    if request_frame could produce a frame, it should return 0;
+    if request_frame could produce a frame, or at least make progress
+    towards producing a frame, it should return 0;
     if it could not for temporary reasons, it should return AVERROR(EAGAIN);
     if it could not because there are no more frames, it should return
     AVERROR_EOF.
@@ -244,20 +245,18 @@ Frame scheduling
             push_one_frame();
             return 0;
         }
-        while (!frame_pushed) {
-            input = input_where_a_frame_is_most_needed();
-            ret = ff_request_frame(input);
-            if (ret == AVERROR_EOF) {
-                process_eof_on_input();
-            } else if (ret < 0) {
-                return ret;
-            }
+        input = input_where_a_frame_is_most_needed();
+        ret = ff_request_frame(input);
+        if (ret == AVERROR_EOF) {
+            process_eof_on_input();
+        } else if (ret < 0) {
+            return ret;
         }
         return 0;
 
     Note that, except for filters that can have queued frames, request_frame
     does not push frames: it requests them to its input, and as a reaction,
-    the filter_frame method will be called and do the work.
+    the filter_frame method possibly will be called and do the work.
 
 Legacy API
 ==========
diff --git a/doc/filters.texi b/doc/filters.texi
index 4b5c3c11..68f54f19 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -318,6 +318,129 @@ build.
 
 Below is a description of the currently available audio filters.
 
+@section acompressor
+
+A compressor is mainly used to reduce the dynamic range of a signal.
+Especially modern music is mostly compressed at a high ratio to
+improve the overall loudness. It's done to get the highest attention
+of a listener, "fatten" the sound and bring more "power" to the track.
+If a signal is compressed too much it may sound dull or "dead"
+afterwards or it may start to "pump" (which could be a powerful effect
+but can also destroy a track completely).
+The right compression is the key to reach a professional sound and is
+the high art of mixing and mastering. Because of its complex settings
+it may take a long time to get the right feeling for this kind of effect.
+
+Compression is done by detecting the volume above a chosen level
+@code{threshold} and dividing it by the factor set with @code{ratio}.
+So if you set the threshold to -12dB and your signal reaches -6dB a ratio
+of 2:1 will result in a signal at -9dB. Because an exact manipulation of
+the signal would cause distortion of the waveform the reduction can be
+levelled over the time. This is done by setting "Attack" and "Release".
+@code{attack} determines how long the signal has to rise above the threshold
+before any reduction will occur and @code{release} sets the time the signal
+has to fall below the threshold to reduce the reduction again. Shorter signals
+than the chosen attack time will be left untouched.
+The overall reduction of the signal can be made up afterwards with the
+@code{makeup} setting. So compressing the peaks of a signal about 6dB and
+raising the makeup to this level results in a signal twice as loud than the
+source. To gain a softer entry in the compression the @code{knee} flattens the
+hard edge at the threshold in the range of the chosen decibels.
+
+The filter accepts the following options:
+
+@table @option
+@item level_in
+Set input gain. Default is 1. Range is between 0.015625 and 64.
+
+@item threshold
+If a signal of second stream rises above this level it will affect the gain
+reduction of the first stream.
+By default it is 0.125. Range is between 0.00097563 and 1.
+
+@item ratio
+Set a ratio by which the signal is reduced. 1:2 means that if the level
+rose 4dB above the threshold, it will be only 2dB above after the reduction.
+Default is 2. Range is between 1 and 20.
+
+@item attack
+Amount of milliseconds the signal has to rise above the threshold before gain
+reduction starts. Default is 20. Range is between 0.01 and 2000.
+
+@item release
+Amount of milliseconds the signal has to fall below the threshold before
+reduction is decreased again. Default is 250. Range is between 0.01 and 9000.
+
+@item makeup
+Set the amount by how much signal will be amplified after processing.
+Default is 2. Range is from 1 and 64.
+
+@item knee
+Curve the sharp knee around the threshold to enter gain reduction more softly.
+Default is 2.82843. Range is between 1 and 8.
+
+@item link
+Choose if the @code{average} level between all channels of input stream
+or the louder(@code{maximum}) channel of input stream affects the
+reduction. Default is @code{average}.
+
+@item detection
+Should the exact signal be taken in case of @code{peak} or an RMS one in case
+of @code{rms}. Default is @code{rms} which is mostly smoother.
+
+@item mix
+How much to use compressed signal in output. Default is 1.
+Range is between 0 and 1.
+@end table
+
+@section acrossfade
+
+Apply cross fade from one input audio stream to another input audio stream.
+The cross fade is applied for specified duration near the end of first stream.
+
+The filter accepts the following options:
+
+@table @option
+@item nb_samples, ns
+Specify the number of samples for which the cross fade effect has to last.
+At the end of the cross fade effect the first input audio will be completely
+silent. Default is 44100.
+
+@item duration, d
+Specify the duration of the cross fade effect. See
+@ref{time duration syntax,,the Time duration section in the ffmpeg-utils(1) manual,ffmpeg-utils}
+for the accepted syntax.
+By default the duration is determined by @var{nb_samples}.
+If set this option is used instead of @var{nb_samples}.
+
+@item overlap, o
+Should first stream end overlap with second stream start. Default is enabled.
+
+@item curve1
+Set curve for cross fade transition for first stream.
+
+@item curve2
+Set curve for cross fade transition for second stream.
+
+For description of available curve types see @ref{afade} filter description.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Cross fade from one input to another:
+@example
+ffmpeg -i first.flac -i second.flac -filter_complex acrossfade=d=10:c1=exp:c2=exp output.flac
+@end example
+
+@item
+Cross fade from one input to another but without overlapping:
+@example
+ffmpeg -i first.flac -i second.flac -filter_complex acrossfade=d=10:o=0:c1=exp:c2=exp output.flac
+@end example
+@end itemize
+
 @section adelay
 
 Delay one or more audio channels.
@@ -405,6 +528,52 @@ aecho=0.8:0.9:1000|1800:0.3|0.25
 @end example
 @end itemize
 
+@section aemphasis
+Audio emphasis filter creates or restores material directly taken from LPs or
+emphased CDs with different filter curves. E.g. to store music on vinyl the
+signal has to be altered by a filter first to even out the disadvantages of
+this recording medium.
+Once the material is played back the inverse filter has to be applied to
+restore the distortion of the frequency response.
+
+The filter accepts the following options:
+
+@table @option
+@item level_in
+Set input gain.
+
+@item level_out
+Set output gain.
+
+@item mode
+Set filter mode. For restoring material use @code{reproduction} mode, otherwise
+use @code{production} mode. Default is @code{reproduction} mode.
+
+@item type
+Set filter type. Selects medium. Can be one of the following:
+
+@table @option
+@item col
+select Columbia.
+@item emi
+select EMI.
+@item bsi
+select BSI (78RPM).
+@item riaa
+select RIAA.
+@item cd
+select Compact Disc (CD).
+@item 50fm
+select 50µs (FM).
+@item 75fm
+select 75µs (FM).
+@item 50kf
+select 50µs (FM-KF).
+@item 75kf
+select 75µs (FM-KF).
+@end table
+@end table
+
 @section aeval
 
 Modify an audio signal according to the specified expressions.
@@ -469,6 +638,7 @@ aeval=val(0)|-val(1)
 @end example
 @end itemize
 
+@anchor{afade}
 @section afade
 
 Apply fade-in/out effect to input audio.
@@ -522,7 +692,7 @@ select half of sine wave
 select exponential sine wave
 @item log
 select logarithmic
-@item par
+@item ipar
 select inverted parabola
 @item qua
 select quadratic
@@ -532,6 +702,18 @@ select cubic
 select square root
 @item cbr
 select cubic root
+@item par
+select parabola
+@item exp
+select exponential
+@item iqsin
+select inverted quarter of sine wave
+@item ihsin
+select inverted half of sine wave
+@item dese
+select double-exponential seat
+@item desi
+select double-exponential sigmoid
 @end table
 @end table
 
@@ -551,6 +733,83 @@ afade=t=out:st=875:d=25
 @end example
 @end itemize
 
+@section afftfilt
+Apply arbitrary expressions to samples in frequency domain.
+
+@table @option
+@item real
+Set frequency domain real expression for each separate channel separated
+by '|'. Default is "1".
+If the number of input channels is greater than the number of
+expressions, the last specified expression is used for the remaining
+output channels.
+
+@item imag
+Set frequency domain imaginary expression for each separate channel
+separated by '|'. If not set, @var{real} option is used.
+
+Each expression in @var{real} and @var{imag} can contain the following
+constants:
+
+@table @option
+@item sr
+sample rate
+
+@item b
+current frequency bin number
+
+@item nb
+number of available bins
+
+@item ch
+channel number of the current expression
+
+@item chs
+number of channels
+
+@item pts
+current frame pts
+@end table
+
+@item win_size
+Set window size.
+
+It accepts the following values:
+@table @samp
+@item w16
+@item w32
+@item w64
+@item w128
+@item w256
+@item w512
+@item w1024
+@item w2048
+@item w4096
+@item w8192
+@item w16384
+@item w32768
+@item w65536
+@end table
+Default is @code{w4096}
+
+@item win_func
+Set window function. Default is @code{hann}.
+
+@item overlap
+Set window overlap. If set to 1, the recommended overlap for selected
+window function will be picked. Default is @code{0.75}.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Leave almost only low frequencies in audio:
+@example
+afftfilt="1-clip((b/nb)*b,0,1)"
+@end example
+@end itemize
+
 @anchor{aformat}
 @section aformat
 
@@ -580,6 +839,111 @@ Force the output to either unsigned 8-bit or signed 16-bit stereo
 aformat=sample_fmts=u8|s16:channel_layouts=stereo
 @end example
 
+@section agate
+
+A gate is mainly used to reduce lower parts of a signal. This kind of signal
+processing reduces disturbing noise between useful signals.
+
+Gating is done by detecting the volume below a chosen level @var{threshold}
+and divide it by the factor set with @var{ratio}. The bottom of the noise
+floor is set via @var{range}. Because an exact manipulation of the signal
+would cause distortion of the waveform the reduction can be levelled over
+time. This is done by setting @var{attack} and @var{release}.
+
+@var{attack} determines how long the signal has to fall below the threshold
+before any reduction will occur and @var{release} sets the time the signal
+has to raise above the threshold to reduce the reduction again.
+Shorter signals than the chosen attack time will be left untouched.
+
+@table @option
+@item level_in
+Set input level before filtering.
+Default is 1. Allowed range is from 0.015625 to 64.
+
+@item range
+Set the level of gain reduction when the signal is below the threshold.
+Default is 0.06125. Allowed range is from 0 to 1.
+
+@item threshold
+If a signal rises above this level the gain reduction is released.
+Default is 0.125. Allowed range is from 0 to 1.
+
+@item ratio
+Set a ratio about which the signal is reduced.
+Default is 2. Allowed range is from 1 to 9000.
+
+@item attack
+Amount of milliseconds the signal has to rise above the threshold before gain
+reduction stops.
+Default is 20 milliseconds. Allowed range is from 0.01 to 9000.
+
+@item release
+Amount of milliseconds the signal has to fall below the threshold before the
+reduction is increased again. Default is 250 milliseconds.
+Allowed range is from 0.01 to 9000.
+
+@item makeup
+Set amount of amplification of signal after processing.
+Default is 1. Allowed range is from 1 to 64.
+
+@item knee
+Curve the sharp knee around the threshold to enter gain reduction more softly.
+Default is 2.828427125. Allowed range is from 1 to 8.
+
+@item detection
+Choose if exact signal should be taken for detection or an RMS like one.
+Default is rms. Can be peak or rms.
+
+@item link
+Choose if the average level between all channels or the louder channel affects
+the reduction.
+Default is average. Can be average or maximum.
+@end table
+
+@section alimiter
+
+The limiter prevents input signal from raising over a desired threshold.
+This limiter uses lookahead technology to prevent your signal from distorting.
+It means that there is a small delay after signal is processed. Keep in mind
+that the delay it produces is the attack time you set.
+
+The filter accepts the following options:
+
+@table @option
+@item level_in
+Set input gain. Default is 1.
+
+@item level_out
+Set output gain. Default is 1.
+
+@item limit
+Don't let signals above this level pass the limiter. Default is 1.
+
+@item attack
+The limiter will reach its attenuation level in this amount of time in
+milliseconds. Default is 5 milliseconds.
+
+@item release
+Come back from limiting to attenuation 1.0 in this amount of milliseconds.
+Default is 50 milliseconds.
+
+@item asc
+When gain reduction is always needed ASC takes care of releasing to an
+average reduction level rather than reaching a reduction of 0 in the release
+time.
+
+@item asc_level
+Select how much the release time is affected by ASC, 0 means nearly no changes
+in release time while 1 produces higher release times.
+
+@item level
+Auto level output signal. Default is enabled.
+This normalizes audio back to 0dB if enabled.
+@end table
+
+Depending on picked setting it is recommended to upsample input 2x or 4x times
+with @ref{aresample} before applying this filter.
+
 @section allpass
 
 Apply a two-pole all-pass filter with central frequency (in Hz)
@@ -610,6 +974,7 @@ slope
 Specify the band-width of a filter in width_type units.
 @end table
 
+@anchor{amerge}
 @section amerge
 
 Merge two or more audio streams into a single multi-channel stream.
@@ -704,6 +1069,100 @@ stream ends. The default value is 2 seconds.
 
 @end table
 
+@section anequalizer
+
+High-order parametric multiband equalizer for each channel.
+
+It accepts the following parameters:
+@table @option
+@item params
+
+This option string is in format:
+"c@var{chn} f=@var{cf} w=@var{w} g=@var{g} t=@var{f} | ..."
+Each equalizer band is separated by '|'.
+
+@table @option
+@item chn
+Set channel number to which equalization will be applied.
+If input doesn't have that channel the entry is ignored.
+
+@item cf
+Set central frequency for band.
+If input doesn't have that frequency the entry is ignored.
+
+@item w
+Set band width in hertz.
+
+@item g
+Set band gain in dB.
+
+@item f
+Set filter type for band, optional, can be:
+
+@table @samp
+@item 0
+Butterworth, this is default.
+
+@item 1
+Chebyshev type 1.
+
+@item 2
+Chebyshev type 2.
+@end table
+@end table
+
+@item curves
+With this option activated frequency response of anequalizer is displayed
+in video stream.
+
+@item size
+Set video stream size. Only useful if curves option is activated.
+
+@item mgain
+Set max gain that will be displayed. Only useful if curves option is activated.
+Setting this to reasonable value allows to display gain which is derived from
+neighbour bands which are too close to each other and thus produce higher gain
+when both are activated.
+
+@item fscale
+Set frequency scale used to draw frequency response in video output.
+Can be linear or logarithmic. Default is logarithmic.
+
+@item colors
+Set color for each channel curve which is going to be displayed in video stream.
+This is list of color names separated by space or by '|'.
+Unrecognised or missing colors will be replaced by white color.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Lower gain by 10 of central frequency 200Hz and width 100 Hz
+for first 2 channels using Chebyshev type 1 filter:
+@example
+anequalizer=c0 f=200 w=100 g=-10 t=1|c1 f=200 w=100 g=-10 t=1
+@end example
+@end itemize
+
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item change
+Alter existing filter parameters.
+Syntax for the commands is : "@var{fN}|f=@var{freq}|w=@var{width}|g=@var{gain}"
+
+@var{fN} is existing filter number, starting from 0, if no such filter is available
+error is returned.
+@var{freq} set new frequency parameter.
+@var{width} set new width parameter in herz.
+@var{gain} set new gain parameter in dB.
+
+Full filter invocation with asendcmd may look like this:
+asendcmd=c='4.0 anequalizer change 0|f=200|w=50|g=1',anequalizer=...
+@end table
+
 @section anull
 
 Pass the audio source unchanged to the output.
@@ -797,6 +1256,63 @@ It accepts the following values:
 @end table
 @end table
 
+@section apulsator
+
+Audio pulsator is something between an autopanner and a tremolo.
+But it can produce funny stereo effects as well. Pulsator changes the volume
+of the left and right channel based on a LFO (low frequency oscillator) with
+different waveforms and shifted phases.
+This filter have the ability to define an offset between left and right
+channel. An offset of 0 means that both LFO shapes match each other.
+The left and right channel are altered equally - a conventional tremolo.
+An offset of 50% means that the shape of the right channel is exactly shifted
+in phase (or moved backwards about half of the frequency) - pulsator acts as
+an autopanner. At 1 both curves match again. Every setting in between moves the
+phase shift gapless between all stages and produces some "bypassing" sounds with
+sine and triangle waveforms. The more you set the offset near 1 (starting from
+the 0.5) the faster the signal passes from the left to the right speaker.
+
+The filter accepts the following options:
+
+@table @option
+@item level_in
+Set input gain. By default it is 1. Range is [0.015625 - 64].
+
+@item level_out
+Set output gain. By default it is 1. Range is [0.015625 - 64].
+
+@item mode
+Set waveform shape the LFO will use. Can be one of: sine, triangle, square,
+sawup or sawdown. Default is sine.
+
+@item amount
+Set modulation. Define how much of original signal is affected by the LFO.
+
+@item offset_l
+Set left channel offset. Default is 0. Allowed range is [0 - 1].
+
+@item offset_r
+Set right channel offset. Default is 0.5. Allowed range is [0 - 1].
+
+@item width
+Set pulse width. Default is 1. Allowed range is [0 - 2].
+
+@item timing
+Set possible timing mode. Can be one of: bpm, ms or hz. Default is hz.
+
+@item bpm
+Set bpm. Default is 120. Allowed range is [30 - 300]. Only used if timing
+is set to bpm.
+
+@item ms
+Set ms. Default is 500. Allowed range is [10 - 2000]. Only used if timing
+is set to ms.
+
+@item hz
+Set frequency in Hz. Default is 2. Allowed range is [0.01 - 100]. Only used
+if timing is set to hz.
+@end table
+
 @anchor{aresample}
 @section aresample
 
@@ -929,6 +1445,52 @@ It accepts the following option:
 @item length
 Short window length in seconds, used for peak and trough RMS measurement.
 Default is @code{0.05} (50 milliseconds). Allowed range is @code{[0.1 - 10]}.
+
+@item metadata
+
+Set metadata injection. All the metadata keys are prefixed with @code{lavfi.astats.X},
+where @code{X} is channel number starting from 1 or string @code{Overall}. Default is
+disabled.
+
+Available keys for each channel are:
+DC_offset
+Min_level
+Max_level
+Min_difference
+Max_difference
+Mean_difference
+Peak_level
+RMS_peak
+RMS_trough
+Crest_factor
+Flat_factor
+Peak_count
+Bit_depth
+
+and for Overall:
+DC_offset
+Min_level
+Max_level
+Min_difference
+Max_difference
+Mean_difference
+Peak_level
+RMS_level
+RMS_peak
+RMS_trough
+Flat_factor
+Peak_count
+Bit_depth
+Number_of_samples
+
+For example full key look like this @code{lavfi.astats.1.DC_offset} or
+this @code{lavfi.astats.Overall.Peak_count}.
+
+For description what each key means read below.
+
+@item reset
+Set number of frame after which stats are going to be recalculated.
+Default is disabled.
 @end table
 
 A description of each shown parameter follows:
@@ -943,6 +1505,16 @@ Minimal sample level.
 @item Max level
 Maximal sample level.
 
+@item Min difference
+Minimal difference between two consecutive samples.
+
+@item Max difference
+Maximal difference between two consecutive samples.
+
+@item Mean difference
+Mean difference between two consecutive samples.
+The average of each difference between two consecutive samples.
+
 @item Peak level dB
 @item RMS level dB
 Standard peak and RMS level measured in dBFS.
@@ -961,44 +1533,11 @@ Flatness (i.e. consecutive samples with the same value) of the signal at its pea
 @item Peak count
 Number of occasions (not the number of samples) that the signal attained either
 @var{Min level} or @var{Max level}.
-@end table
-
-@section astreamsync
-
-Forward two audio streams and control the order the buffers are forwarded.
 
-The filter accepts the following options:
-
-@table @option
-@item expr, e
-Set the expression deciding which stream should be
-forwarded next: if the result is negative, the first stream is forwarded; if
-the result is positive or zero, the second stream is forwarded. It can use
-the following variables:
-
-@table @var
-@item b1 b2
-number of buffers forwarded so far on each stream
-@item s1 s2
-number of samples forwarded so far on each stream
-@item t1 t2
-current timestamp of each stream
-@end table
-
-The default value is @code{t1-t2}, which means to always forward the stream
-that has a smaller timestamp.
+@item Bit depth
+Overall bit depth of audio. Number of bits used for each sample.
 @end table
 
-@subsection Examples
-
-Stress-test @code{amerge} by randomly sending buffers on the wrong
-input, while avoiding too much of a desynchronization:
-@example
-amovie=file.ogg [a] ; amovie=file.mp3 [b] ;
-[a] [b] astreamsync=(2*random(1))-1+tanh(5*(t1-t2)) [a2] [b2] ;
-[a2] [b2] amerge
-@end example
-
 @section asyncts
 
 Synchronize audio data with timestamps by squeezing/stretching it and/or
@@ -1291,7 +1830,7 @@ the input.
 
 To fix a 5.1 WAV improperly encoded in AAC's native channel order
 @example
-ffmpeg -i in.wav -filter 'channelmap=1|2|0|5|3|4:channel_layout=5.1' out.wav
+ffmpeg -i in.wav -filter 'channelmap=1|2|0|5|3|4:5.1' out.wav
 @end example
 
 @section channelsplit
@@ -1391,6 +1930,8 @@ situations, the attack time (response to the audio getting louder) should be
 shorter than the decay time, because the human ear is more sensitive to sudden
 loud audio than sudden soft audio. A typical value for attack is 0.3 seconds and
 a typical value for decay is 0.8 seconds.
+If specified number of attacks & decays is lower than number of channels, the last
+set attack/decay will be used for all remaining channels.
 
 @item points
 A list of points for the transfer function, specified in dB relative to the
@@ -1436,6 +1977,11 @@ noisy environment:
 compand=.3|.3:1|1:-90/-60|-60/-40|-40/-30|-20/-20:6:0:-90:0.2
 @end example
 
+Another example for audio with whisper and explosion parts:
+@example
+compand=0|0:1|1:-90/-900|-70/-70|-30/-9|0/-3:6:0:0:0
+@end example
+
 @item
 A noise gate for when the noise is at a lower level than the signal:
 @example
@@ -1448,29 +1994,301 @@ than the signal (making it, in some ways, similar to squelch):
 @example
 compand=.1|.1:.1|.1:-45.1/-45.1|-45/-900|0/-900:.01:45:-90:.1
 @end example
-@end itemize
 
-@section dcshift
-Apply a DC shift to the audio.
+@item
+2:1 compression starting at -6dB:
+@example
+compand=points=-80/-80|-6/-6|0/-3.8|20/3.5
+@end example
 
-This can be useful to remove a DC offset (caused perhaps by a hardware problem
-in the recording chain) from the audio. The effect of a DC offset is reduced
-headroom and hence volume. The @ref{astats} filter can be used to determine if
-a signal has a DC offset.
+@item
+2:1 compression starting at -9dB:
+@example
+compand=points=-80/-80|-9/-9|0/-5.3|20/2.9
+@end example
 
-@table @option
-@item shift
-Set the DC shift, allowed range is [-1, 1]. It indicates the amount to shift
-the audio.
+@item
+2:1 compression starting at -12dB:
+@example
+compand=points=-80/-80|-12/-12|0/-6.8|20/1.9
+@end example
 
-@item limitergain
-Optional. It should have a value much less than 1 (e.g. 0.05 or 0.02) and is
-used to prevent clipping.
-@end table
+@item
+2:1 compression starting at -18dB:
+@example
+compand=points=-80/-80|-18/-18|0/-9.8|20/0.7
+@end example
 
-@section earwax
+@item
+3:1 compression starting at -15dB:
+@example
+compand=points=-80/-80|-15/-15|0/-10.8|20/-5.2
+@end example
 
-Make audio easier to listen to on headphones.
+@item
+Compressor/Gate:
+@example
+compand=points=-80/-105|-62/-80|-15.4/-15.4|0/-12|20/-7.6
+@end example
+
+@item
+Expander:
+@example
+compand=attacks=0:points=-80/-169|-54/-80|-49.5/-64.6|-41.1/-41.1|-25.8/-15|-10.8/-4.5|0/0|20/8.3
+@end example
+
+@item
+Hard limiter at -6dB:
+@example
+compand=attacks=0:points=-80/-80|-6/-6|20/-6
+@end example
+
+@item
+Hard limiter at -12dB:
+@example
+compand=attacks=0:points=-80/-80|-12/-12|20/-12
+@end example
+
+@item
+Hard noise gate at -35 dB:
+@example
+compand=attacks=0:points=-80/-115|-35.1/-80|-35/-35|20/20
+@end example
+
+@item
+Soft limiter:
+@example
+compand=attacks=0:points=-80/-80|-12.4/-12.4|-6/-8|0/-6.8|20/-2.8
+@end example
+@end itemize
+
+@section compensationdelay
+
+Compensation Delay Line is a metric based delay to compensate differing
+positions of microphones or speakers.
+
+For example, you have recorded guitar with two microphones placed in
+different location. Because the front of sound wave has fixed speed in
+normal conditions, the phasing of microphones can vary and depends on
+their location and interposition. The best sound mix can be achieved when
+these microphones are in phase (synchronized). Note that distance of
+~30 cm between microphones makes one microphone to capture signal in
+antiphase to another microphone. That makes the final mix sounding moody.
+This filter helps to solve phasing problems by adding different delays
+to each microphone track and make them synchronized.
+
+The best result can be reached when you take one track as base and
+synchronize other tracks one by one with it.
+Remember that synchronization/delay tolerance depends on sample rate, too.
+Higher sample rates will give more tolerance.
+
+It accepts the following parameters:
+
+@table @option
+@item mm
+Set millimeters distance. This is compensation distance for fine tuning.
+Default is 0.
+
+@item cm
+Set cm distance. This is compensation distance for tightening distance setup.
+Default is 0.
+
+@item m
+Set meters distance. This is compensation distance for hard distance setup.
+Default is 0.
+
+@item dry
+Set dry amount. Amount of unprocessed (dry) signal.
+Default is 0.
+
+@item wet
+Set wet amount. Amount of processed (wet) signal.
+Default is 1.
+
+@item temp
+Set temperature degree in Celsius. This is the temperature of the environment.
+Default is 20.
+@end table
+
+@section dcshift
+Apply a DC shift to the audio.
+
+This can be useful to remove a DC offset (caused perhaps by a hardware problem
+in the recording chain) from the audio. The effect of a DC offset is reduced
+headroom and hence volume. The @ref{astats} filter can be used to determine if
+a signal has a DC offset.
+
+@table @option
+@item shift
+Set the DC shift, allowed range is [-1, 1]. It indicates the amount to shift
+the audio.
+
+@item limitergain
+Optional. It should have a value much less than 1 (e.g. 0.05 or 0.02) and is
+used to prevent clipping.
+@end table
+
+@section dynaudnorm
+Dynamic Audio Normalizer.
+
+This filter applies a certain amount of gain to the input audio in order
+to bring its peak magnitude to a target level (e.g. 0 dBFS). However, in
+contrast to more "simple" normalization algorithms, the Dynamic Audio
+Normalizer *dynamically* re-adjusts the gain factor to the input audio.
+This allows for applying extra gain to the "quiet" sections of the audio
+while avoiding distortions or clipping the "loud" sections. In other words:
+The Dynamic Audio Normalizer will "even out" the volume of quiet and loud
+sections, in the sense that the volume of each section is brought to the
+same target level. Note, however, that the Dynamic Audio Normalizer achieves
+this goal *without* applying "dynamic range compressing". It will retain 100%
+of the dynamic range *within* each section of the audio file.
+
+@table @option
+@item f
+Set the frame length in milliseconds. In range from 10 to 8000 milliseconds.
+Default is 500 milliseconds.
+The Dynamic Audio Normalizer processes the input audio in small chunks,
+referred to as frames. This is required, because a peak magnitude has no
+meaning for just a single sample value. Instead, we need to determine the
+peak magnitude for a contiguous sequence of sample values. While a "standard"
+normalizer would simply use the peak magnitude of the complete file, the
+Dynamic Audio Normalizer determines the peak magnitude individually for each
+frame. The length of a frame is specified in milliseconds. By default, the
+Dynamic Audio Normalizer uses a frame length of 500 milliseconds, which has
+been found to give good results with most files.
+Note that the exact frame length, in number of samples, will be determined
+automatically, based on the sampling rate of the individual input audio file.
+
+@item g
+Set the Gaussian filter window size. In range from 3 to 301, must be odd
+number. Default is 31.
+Probably the most important parameter of the Dynamic Audio Normalizer is the
+@code{window size} of the Gaussian smoothing filter. The filter's window size
+is specified in frames, centered around the current frame. For the sake of
+simplicity, this must be an odd number. Consequently, the default value of 31
+takes into account the current frame, as well as the 15 preceding frames and
+the 15 subsequent frames. Using a larger window results in a stronger
+smoothing effect and thus in less gain variation, i.e. slower gain
+adaptation. Conversely, using a smaller window results in a weaker smoothing
+effect and thus in more gain variation, i.e. faster gain adaptation.
+In other words, the more you increase this value, the more the Dynamic Audio
+Normalizer will behave like a "traditional" normalization filter. On the
+contrary, the more you decrease this value, the more the Dynamic Audio
+Normalizer will behave like a dynamic range compressor.
+
+@item p
+Set the target peak value. This specifies the highest permissible magnitude
+level for the normalized audio input. This filter will try to approach the
+target peak magnitude as closely as possible, but at the same time it also
+makes sure that the normalized signal will never exceed the peak magnitude.
+A frame's maximum local gain factor is imposed directly by the target peak
+magnitude. The default value is 0.95 and thus leaves a headroom of 5%*.
+It is not recommended to go above this value.
+
+@item m
+Set the maximum gain factor. In range from 1.0 to 100.0. Default is 10.0.
+The Dynamic Audio Normalizer determines the maximum possible (local) gain
+factor for each input frame, i.e. the maximum gain factor that does not
+result in clipping or distortion. The maximum gain factor is determined by
+the frame's highest magnitude sample. However, the Dynamic Audio Normalizer
+additionally bounds the frame's maximum gain factor by a predetermined
+(global) maximum gain factor. This is done in order to avoid excessive gain
+factors in "silent" or almost silent frames. By default, the maximum gain
+factor is 10.0, For most inputs the default value should be sufficient and
+it usually is not recommended to increase this value. Though, for input
+with an extremely low overall volume level, it may be necessary to allow even
+higher gain factors. Note, however, that the Dynamic Audio Normalizer does
+not simply apply a "hard" threshold (i.e. cut off values above the threshold).
+Instead, a "sigmoid" threshold function will be applied. This way, the
+gain factors will smoothly approach the threshold value, but never exceed that
+value.
+
+@item r
+Set the target RMS. In range from 0.0 to 1.0. Default is 0.0 - disabled.
+By default, the Dynamic Audio Normalizer performs "peak" normalization.
+This means that the maximum local gain factor for each frame is defined
+(only) by the frame's highest magnitude sample. This way, the samples can
+be amplified as much as possible without exceeding the maximum signal
+level, i.e. without clipping. Optionally, however, the Dynamic Audio
+Normalizer can also take into account the frame's root mean square,
+abbreviated RMS. In electrical engineering, the RMS is commonly used to
+determine the power of a time-varying signal. It is therefore considered
+that the RMS is a better approximation of the "perceived loudness" than
+just looking at the signal's peak magnitude. Consequently, by adjusting all
+frames to a constant RMS value, a uniform "perceived loudness" can be
+established. If a target RMS value has been specified, a frame's local gain
+factor is defined as the factor that would result in exactly that RMS value.
+Note, however, that the maximum local gain factor is still restricted by the
+frame's highest magnitude sample, in order to prevent clipping.
+
+@item n
+Enable channels coupling. By default is enabled.
+By default, the Dynamic Audio Normalizer will amplify all channels by the same
+amount. This means the same gain factor will be applied to all channels, i.e.
+the maximum possible gain factor is determined by the "loudest" channel.
+However, in some recordings, it may happen that the volume of the different
+channels is uneven, e.g. one channel may be "quieter" than the other one(s).
+In this case, this option can be used to disable the channel coupling. This way,
+the gain factor will be determined independently for each channel, depending
+only on the individual channel's highest magnitude sample. This allows for
+harmonizing the volume of the different channels.
+
+@item c
+Enable DC bias correction. By default is disabled.
+An audio signal (in the time domain) is a sequence of sample values.
+In the Dynamic Audio Normalizer these sample values are represented in the
+-1.0 to 1.0 range, regardless of the original input format. Normally, the
+audio signal, or "waveform", should be centered around the zero point.
+That means if we calculate the mean value of all samples in a file, or in a
+single frame, then the result should be 0.0 or at least very close to that
+value. If, however, there is a significant deviation of the mean value from
+0.0, in either positive or negative direction, this is referred to as a
+DC bias or DC offset. Since a DC bias is clearly undesirable, the Dynamic
+Audio Normalizer provides optional DC bias correction.
+With DC bias correction enabled, the Dynamic Audio Normalizer will determine
+the mean value, or "DC correction" offset, of each input frame and subtract
+that value from all of the frame's sample values which ensures those samples
+are centered around 0.0 again. Also, in order to avoid "gaps" at the frame
+boundaries, the DC correction offset values will be interpolated smoothly
+between neighbouring frames.
+
+@item b
+Enable alternative boundary mode. By default is disabled.
+The Dynamic Audio Normalizer takes into account a certain neighbourhood
+around each frame. This includes the preceding frames as well as the
+subsequent frames. However, for the "boundary" frames, located at the very
+beginning and at the very end of the audio file, not all neighbouring
+frames are available. In particular, for the first few frames in the audio
+file, the preceding frames are not known. And, similarly, for the last few
+frames in the audio file, the subsequent frames are not known. Thus, the
+question arises which gain factors should be assumed for the missing frames
+in the "boundary" region. The Dynamic Audio Normalizer implements two modes
+to deal with this situation. The default boundary mode assumes a gain factor
+of exactly 1.0 for the missing frames, resulting in a smooth "fade in" and
+"fade out" at the beginning and at the end of the input, respectively.
+
+@item s
+Set the compress factor. In range from 0.0 to 30.0. Default is 0.0.
+By default, the Dynamic Audio Normalizer does not apply "traditional"
+compression. This means that signal peaks will not be pruned and thus the
+full dynamic range will be retained within each local neighbourhood. However,
+in some cases it may be desirable to combine the Dynamic Audio Normalizer's
+normalization algorithm with a more "traditional" compression.
+For this purpose, the Dynamic Audio Normalizer provides an optional compression
+(thresholding) function. If (and only if) the compression feature is enabled,
+all input frames will be processed by a soft knee thresholding function prior
+to the actual normalization process. Put simply, the thresholding function is
+going to prune all samples whose magnitude exceeds a certain threshold value.
+However, the Dynamic Audio Normalizer does not simply apply a fixed threshold
+value. Instead, the threshold value will be adjusted for each individual
+frame.
+In general, smaller parameters result in stronger compression, and vice versa.
+Values below 3.0 are not recommended, because audible distortion may appear.
+@end table
+
+@section earwax
+
+Make audio easier to listen to on headphones.
 
 This filter adds `cues' to 44.1kHz stereo (i.e. audio CD format) audio
 so that when listened to on headphones the stereo image is moved from
@@ -1531,6 +2349,23 @@ equalizer=f=1000:width_type=q:width=1:g=2,equalizer=f=100:width_type=q:width=2:g
 @end example
 @end itemize
 
+@section extrastereo
+
+Linearly increases the difference between left and right channels which
+adds some sort of "live" effect to playback.
+
+The filter accepts the following option:
+
+@table @option
+@item m
+Sets the difference coefficient (default: 2.5). 0.0 means mono sound
+(average of both channels), with 1.0 sound will be unchanged, with
+-1.0 left and right channels will be swapped.
+
+@item c
+Enable clipping. By default is enabled.
+@end table
+
 @section flanger
 Apply a flanging effect to the audio.
 
@@ -1667,6 +2502,9 @@ threshold or gain).
 Controls need to be defined using the following syntax:
 c0=@var{value0}|c1=@var{value1}|c2=@var{value2}|..., where
 @var{valuei} is the value set on the @var{i}-th control.
+Alternatively they can be also defined using the following syntax:
+@var{value0}|@var{value1}|@var{value2}|..., where
+@var{valuei} is the value set on the @var{i}-th control.
 If @option{controls} is set to @code{help}, all available controls and
 their valid ranges are printed.
 
@@ -1738,6 +2576,20 @@ Apply @code{C* Eq10X2 - Stereo 10-band equaliser} effect:
 @example
 ladspa=caps:Eq10X2:c=c0=-48|c9=-24|c3=12|c4=2
 @end example
+
+@item
+Increase volume by 20dB using fast lookahead limiter from Steve Harris
+@code{SWH Plugins} collection:
+@example
+ladspa=fast_lookahead_limiter_1913:fastLookaheadLimiter:20|0|2
+@end example
+
+@item
+Attenuate low frequencies using Multiband EQ from Steve Harris
+@code{SWH Plugins} collection:
+@example
+ladspa=mbeq_1197:mbeq:-24|-24|-24|0|0|0|0|0|0|0|0|0|0|0|0
+@end example
 @end itemize
 
 @subsection Commands
@@ -1784,6 +2636,7 @@ Applies only to double-pole filter.
 The default is 0.707q and gives a Butterworth response.
 @end table
 
+@anchor{pan}
 @section pan
 
 Mix channels with specific gain levels. The filter accepts the output
@@ -1885,6 +2738,221 @@ At end of filtering it displays @code{track_gain} and @code{track_peak}.
 Convert the audio sample format, sample rate and channel layout. It is
 not meant to be used directly.
 
+@section rubberband
+Apply time-stretching and pitch-shifting with librubberband.
+
+The filter accepts the following options:
+
+@table @option
+@item tempo
+Set tempo scale factor.
+
+@item pitch
+Set pitch scale factor.
+
+@item transients
+Set transients detector.
+Possible values are:
+@table @var
+@item crisp
+@item mixed
+@item smooth
+@end table
+
+@item detector
+Set detector.
+Possible values are:
+@table @var
+@item compound
+@item percussive
+@item soft
+@end table
+
+@item phase
+Set phase.
+Possible values are:
+@table @var
+@item laminar
+@item independent
+@end table
+
+@item window
+Set processing window size.
+Possible values are:
+@table @var
+@item standard
+@item short
+@item long
+@end table
+
+@item smoothing
+Set smoothing.
+Possible values are:
+@table @var
+@item off
+@item on
+@end table
+
+@item formant
+Enable formant preservation when shift pitching.
+Possible values are:
+@table @var
+@item shifted
+@item preserved
+@end table
+
+@item pitchq
+Set pitch quality.
+Possible values are:
+@table @var
+@item quality
+@item speed
+@item consistency
+@end table
+
+@item channels
+Set channels.
+Possible values are:
+@table @var
+@item apart
+@item together
+@end table
+@end table
+
+@section sidechaincompress
+
+This filter acts like normal compressor but has the ability to compress
+detected signal using second input signal.
+It needs two input streams and returns one output stream.
+First input stream will be processed depending on second stream signal.
+The filtered signal then can be filtered with other filters in later stages of
+processing. See @ref{pan} and @ref{amerge} filter.
+
+The filter accepts the following options:
+
+@table @option
+@item level_in
+Set input gain. Default is 1. Range is between 0.015625 and 64.
+
+@item threshold
+If a signal of second stream raises above this level it will affect the gain
+reduction of first stream.
+By default is 0.125. Range is between 0.00097563 and 1.
+
+@item ratio
+Set a ratio about which the signal is reduced. 1:2 means that if the level
+raised 4dB above the threshold, it will be only 2dB above after the reduction.
+Default is 2. Range is between 1 and 20.
+
+@item attack
+Amount of milliseconds the signal has to rise above the threshold before gain
+reduction starts. Default is 20. Range is between 0.01 and 2000.
+
+@item release
+Amount of milliseconds the signal has to fall below the threshold before
+reduction is decreased again. Default is 250. Range is between 0.01 and 9000.
+
+@item makeup
+Set the amount by how much signal will be amplified after processing.
+Default is 2. Range is from 1 and 64.
+
+@item knee
+Curve the sharp knee around the threshold to enter gain reduction more softly.
+Default is 2.82843. Range is between 1 and 8.
+
+@item link
+Choose if the @code{average} level between all channels of side-chain stream
+or the louder(@code{maximum}) channel of side-chain stream affects the
+reduction. Default is @code{average}.
+
+@item detection
+Should the exact signal be taken in case of @code{peak} or an RMS one in case
+of @code{rms}. Default is @code{rms} which is mainly smoother.
+
+@item level_sc
+Set sidechain gain. Default is 1. Range is between 0.015625 and 64.
+
+@item mix
+How much to use compressed signal in output. Default is 1.
+Range is between 0 and 1.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Full ffmpeg example taking 2 audio inputs, 1st input to be compressed
+depending on the signal of 2nd input and later compressed signal to be
+merged with 2nd input:
+@example
+ffmpeg -i main.flac -i sidechain.flac -filter_complex "[1:a]asplit=2[sc][mix];[0:a][sc]sidechaincompress[compr];[compr][mix]amerge"
+@end example
+@end itemize
+
+@section sidechaingate
+
+A sidechain gate acts like a normal (wideband) gate but has the ability to
+filter the detected signal before sending it to the gain reduction stage.
+Normally a gate uses the full range signal to detect a level above the
+threshold.
+For example: If you cut all lower frequencies from your sidechain signal
+the gate will decrease the volume of your track only if not enough highs
+appear. With this technique you are able to reduce the resonation of a
+natural drum or remove "rumbling" of muted strokes from a heavily distorted
+guitar.
+It needs two input streams and returns one output stream.
+First input stream will be processed depending on second stream signal.
+
+The filter accepts the following options:
+
+@table @option
+@item level_in
+Set input level before filtering.
+Default is 1. Allowed range is from 0.015625 to 64.
+
+@item range
+Set the level of gain reduction when the signal is below the threshold.
+Default is 0.06125. Allowed range is from 0 to 1.
+
+@item threshold
+If a signal rises above this level the gain reduction is released.
+Default is 0.125. Allowed range is from 0 to 1.
+
+@item ratio
+Set a ratio about which the signal is reduced.
+Default is 2. Allowed range is from 1 to 9000.
+
+@item attack
+Amount of milliseconds the signal has to rise above the threshold before gain
+reduction stops.
+Default is 20 milliseconds. Allowed range is from 0.01 to 9000.
+
+@item release
+Amount of milliseconds the signal has to fall below the threshold before the
+reduction is increased again. Default is 250 milliseconds.
+Allowed range is from 0.01 to 9000.
+
+@item makeup
+Set amount of amplification of signal after processing.
+Default is 1. Allowed range is from 1 to 64.
+
+@item knee
+Curve the sharp knee around the threshold to enter gain reduction more softly.
+Default is 2.828427125. Allowed range is from 1 to 8.
+
+@item detection
+Choose if exact signal should be taken for detection or an RMS like one.
+Default is rms. Can be peak or rms.
+
+@item link
+Choose if the average level between all channels or the louder channel affects
+the reduction.
+Default is average. Can be average or maximum.
+
+@item level_sc
+Set sidechain gain. Default is 1. Range is from 0.015625 to 64.
+@end table
+
 @section silencedetect
 
 Detect silence in an audio stream.
@@ -1978,6 +3046,14 @@ at the beginning of each period of silence.
 For example, if you want to remove long pauses between words but do not want
 to remove the pauses completely. Default value is @code{0}.
 
+@item detection
+Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster
+and works better with digital silence which is exactly 0.
+Default value is @code{rms}.
+
+@item window
+Set ratio used to calculate size of window for detecting silence.
+Default value is @code{0.02}. Allowed range is from @code{0} to @code{10}.
 @end table
 
 @subsection Examples
@@ -1990,8 +3066,177 @@ pressing the record button and the start of the performance:
 @example
 silenceremove=1:5:0.02
 @end example
+
+@item
+Trim all silence encountered from begining to end where there is more than 1
+second of silence in audio:
+@example
+silenceremove=0:0:0:-1:1:-90dB
+@end example
 @end itemize
 
+@section sofalizer
+
+SOFAlizer uses head-related transfer functions (HRTFs) to create virtual
+loudspeakers around the user for binaural listening via headphones (audio
+formats up to 9 channels supported).
+The HRTFs are stored in SOFA files (see @url{http://www.sofacoustics.org/} for a database).
+SOFAlizer is developed at the Acoustics Research Institute (ARI) of the
+Austrian Academy of Sciences.
+
+To enable compilation of this filter you need to configure FFmpeg with
+@code{--enable-netcdf}.
+
+The filter accepts the following options:
+
+@table @option
+@item sofa
+Set the SOFA file used for rendering.
+
+@item gain
+Set gain applied to audio. Value is in dB. Default is 0.
+
+@item rotation
+Set rotation of virtual loudspeakers in deg. Default is 0.
+
+@item elevation
+Set elevation of virtual speakers in deg. Default is 0.
+
+@item radius
+Set distance in meters between loudspeakers and the listener with near-field
+HRTFs. Default is 1.
+
+@item type
+Set processing type. Can be @var{time} or @var{freq}. @var{time} is
+processing audio in time domain which is slow but gives high quality output.
+@var{freq} is processing audio in frequency domain which is fast but gives
+mediocre output. Default is @var{freq}.
+@end table
+
+@section stereotools
+
+This filter has some handy utilities to manage stereo signals, for converting
+M/S stereo recordings to L/R signal while having control over the parameters
+or spreading the stereo image of master track.
+
+The filter accepts the following options:
+
+@table @option
+@item level_in
+Set input level before filtering for both channels. Defaults is 1.
+Allowed range is from 0.015625 to 64.
+
+@item level_out
+Set output level after filtering for both channels. Defaults is 1.
+Allowed range is from 0.015625 to 64.
+
+@item balance_in
+Set input balance between both channels. Default is 0.
+Allowed range is from -1 to 1.
+
+@item balance_out
+Set output balance between both channels. Default is 0.
+Allowed range is from -1 to 1.
+
+@item softclip
+Enable softclipping. Results in analog distortion instead of harsh digital 0dB
+clipping. Disabled by default.
+
+@item mutel
+Mute the left channel. Disabled by default.
+
+@item muter
+Mute the right channel. Disabled by default.
+
+@item phasel
+Change the phase of the left channel. Disabled by default.
+
+@item phaser
+Change the phase of the right channel. Disabled by default.
+
+@item mode
+Set stereo mode. Available values are:
+
+@table @samp
+@item lr>lr
+Left/Right to Left/Right, this is default.
+
+@item lr>ms
+Left/Right to Mid/Side.
+
+@item ms>lr
+Mid/Side to Left/Right.
+
+@item lr>ll
+Left/Right to Left/Left.
+
+@item lr>rr
+Left/Right to Right/Right.
+
+@item lr>l+r
+Left/Right to Left + Right.
+
+@item lr>rl
+Left/Right to Right/Left.
+@end table
+
+@item slev
+Set level of side signal. Default is 1.
+Allowed range is from 0.015625 to 64.
+
+@item sbal
+Set balance of side signal. Default is 0.
+Allowed range is from -1 to 1.
+
+@item mlev
+Set level of the middle signal. Default is 1.
+Allowed range is from 0.015625 to 64.
+
+@item mpan
+Set middle signal pan. Default is 0. Allowed range is from -1 to 1.
+
+@item base
+Set stereo base between mono and inversed channels. Default is 0.
+Allowed range is from -1 to 1.
+
+@item delay
+Set delay in milliseconds how much to delay left from right channel and
+vice versa. Default is 0. Allowed range is from -20 to 20.
+
+@item sclevel
+Set S/C level. Default is 1. Allowed range is from 1 to 100.
+
+@item phase
+Set the stereo phase in degrees. Default is 0. Allowed range is from 0 to 360.
+@end table
+
+@section stereowiden
+
+This filter enhance the stereo effect by suppressing signal common to both
+channels and by delaying the signal of left into right and vice versa,
+thereby widening the stereo effect.
+
+The filter accepts the following options:
+
+@table @option
+@item delay
+Time in milliseconds of the delay of left signal into right and vice versa.
+Default is 20 milliseconds.
+
+@item feedback
+Amount of gain in delayed signal into right and vice versa. Gives a delay
+effect of left signal in right output and vice versa which gives widening
+effect. Default is 0.3.
+
+@item crossfeed
+Cross feed of left into right with inverted phase. This helps in suppressing
+the mono. If the value is 1 it will cancel all the signal common to both
+channels. Default is 0.3.
+
+@item drymix
+Set level of input signal of original channel. Default is 0.8.
+@end table
+
 @section treble
 
 Boost or cut treble (upper) frequencies of the audio using a two-pole
@@ -2028,7 +3273,42 @@ slope
 Determine how steep is the filter's shelf transition.
 @end table
 
-@section volume
+@section tremolo
+
+Sinusoidal amplitude modulation.
+
+The filter accepts the following options:
+
+@table @option
+@item f
+Modulation frequency in Hertz. Modulation frequencies in the subharmonic range
+(20 Hz or lower) will result in a tremolo effect.
+This filter may also be used as a ring modulator by specifying
+a modulation frequency higher than 20 Hz.
+Range is 0.1 - 20000.0. Default value is 5.0 Hz.
+
+@item d
+Depth of modulation as a percentage. Range is 0.0 - 1.0.
+Default value is 0.5.
+@end table
+
+@section vibrato
+
+Sinusoidal phase modulation.
+
+The filter accepts the following options:
+
+@table @option
+@item f
+Modulation frequency in Hertz.
+Range is 0.1 - 20000.0. Default value is 5.0 Hz.
+
+@item d
+Depth of modulation as a percentage. Range is 0.0 - 1.0.
+Default value is 0.5.
+@end table
+
+@section volume
 
 Adjust the input audio volume.
 
@@ -2486,6 +3766,46 @@ ffplay -f lavfi flite=text='No more be grieved for which that thou hast done.'
 For more information about libflite, check:
 @url{http://www.speech.cs.cmu.edu/flite/}
 
+@section anoisesrc
+
+Generate a noise audio signal.
+
+The filter accepts the following options:
+
+@table @option
+@item sample_rate, r
+Specify the sample rate. Default value is 48000 Hz.
+
+@item amplitude, a
+Specify the amplitude (0.0 - 1.0) of the generated audio stream. Default value
+is 1.0.
+
+@item duration, d
+Specify the duration of the generated audio stream. Not specifying this option
+results in noise with an infinite length.
+
+@item color, colour, c
+Specify the color of noise. Available noise colors are white, pink, and brown.
+Default color is white.
+
+@item seed, s
+Specify a value used to seed the PRNG.
+
+@item nb_samples, n
+Set the number of samples per each output frame, default is 1024.
+@end table
+
+@subsection Examples
+
+@itemize
+
+@item
+Generate 60 seconds of pink noise, with a 44.1 kHz sampling rate and an amplitude of 0.5:
+@example
+anoisesrc=d=60:c=pink:r=44100:a=0.5
+@end example
+@end itemize
+
 @section sine
 
 Generate an audio signal made of a sine wave with amplitude 1/8.
@@ -2510,7 +3830,26 @@ Specify the sample rate, default is 44100.
 Specify the duration of the generated audio stream.
 
 @item samples_per_frame
-Set the number of samples per output frame, default is 1024.
+Set the number of samples per output frame.
+
+The expression can contain the following constants:
+
+@table @option
+@item n
+The (sequential) number of the output audio frame, starting from 0.
+
+@item pts
+The PTS (Presentation TimeStamp) of the output audio frame,
+expressed in @var{TB} units.
+
+@item t
+The PTS of the output audio frame, expressed in seconds.
+
+@item TB
+The timebase of the output audio frames.
+@end table
+
+Default is @code{1024}.
 @end table
 
 @subsection Examples
@@ -2531,6 +3870,12 @@ sine=f=220:b=4:d=5
 sine=frequency=220:beep_factor=4:duration=5
 @end example
 
+@item
+Generate a 1 kHz sine wave following @code{1602,1601,1602,1601,1602} NTSC
+pattern:
+@example
+sine=1000:samples_per_frame='st(0,mod(n,5)); 1602-not(not(eq(ld(0),1)+eq(ld(0),3)))'
+@end example
 @end itemize
 
 @c man end AUDIO SOURCES
@@ -2620,6 +3965,44 @@ Slower shaper using OpenType for substitutions and positioning
 The default is @code{auto}.
 @end table
 
+@section atadenoise
+Apply an Adaptive Temporal Averaging Denoiser to the video input.
+
+The filter accepts the following options:
+
+@table @option
+@item 0a
+Set threshold A for 1st plane. Default is 0.02.
+Valid range is 0 to 0.3.
+
+@item 0b
+Set threshold B for 1st plane. Default is 0.04.
+Valid range is 0 to 5.
+
+@item 1a
+Set threshold A for 2nd plane. Default is 0.02.
+Valid range is 0 to 0.3.
+
+@item 1b
+Set threshold B for 2nd plane. Default is 0.04.
+Valid range is 0 to 5.
+
+@item 2a
+Set threshold A for 3rd plane. Default is 0.02.
+Valid range is 0 to 0.3.
+
+@item 2b
+Set threshold B for 3rd plane. Default is 0.04.
+Valid range is 0 to 5.
+
+Threshold A is designed to react on abrupt changes in the input signal and
+threshold B is designed to react on continuous changes in the input signal.
+
+@item s
+Set number of frames filter will use for averaging. Default is 33. Must be odd
+number in range [5, 129].
+@end table
+
 @section bbox
 
 Compute the bounding box for the non-black pixels in the input frame
@@ -2739,6 +4122,7 @@ of @var{all_mode}. Default value is @code{normal}.
 Available values for component modes are:
 @table @samp
 @item addition
+@item addition128
 @item and
 @item average
 @item burn
@@ -2754,6 +4138,7 @@ Available values for component modes are:
 @item lighten
 @item linearlight
 @item multiply
+@item multiply128
 @item negation
 @item normal
 @item or
@@ -2858,6 +4243,12 @@ Apply uncover up-left effect:
 blend=all_expr='if(gte(T*SH*40+Y,H)*gte((T*40*SW+X)*W/H,W),A,B)'
 @end example
 
+@item
+Split diagonally video and shows top and bottom layer on each side:
+@example
+blend=all_expr=if(gt(X,Y*(W/H)),A,B)
+@end example
+
 @item
 Display differences between the current and the previous frame:
 @example
@@ -2953,6 +4344,51 @@ boxblur=luma_radius=min(h\,w)/10:luma_power=1:chroma_radius=min(cw\,ch)/10:chrom
 @end example
 @end itemize
 
+@section chromakey
+YUV colorspace color/chroma keying.
+
+The filter accepts the following options:
+
+@table @option
+@item color
+The color which will be replaced with transparency.
+
+@item similarity
+Similarity percentage with the key color.
+
+0.01 matches only the exact key color, while 1.0 matches everything.
+
+@item blend
+Blend percentage.
+
+0.0 makes pixels either fully transparent, or not transparent at all.
+
+Higher values result in semi-transparent pixels, with a higher transparency
+the more similar the pixels color is to the key color.
+
+@item yuv
+Signals that the color passed is already in YUV instead of RGB.
+
+Litteral colors like "green" or "red" don't make sense with this enabled anymore.
+This can be used to pass exact YUV values as hexadecimal numbers.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Make every green pixel in the input image transparent:
+@example
+ffmpeg -i input.png -vf chromakey=green out.png
+@end example
+
+@item
+Overlay a greenscreen-video on top of a static black background.
+@example
+ffmpeg -f lavfi -i color=c=black:s=1280x720 -i video.mp4 -shortest -filter_complex "[1:v]chromakey=0x70de77:0.1:0.2[ckout];[0:v][ckout]overlay[out]" -map "[out]" output.mkv
+@end example
+@end itemize
+
 @section codecview
 
 Visualize information exported by some codecs.
@@ -2977,6 +4413,9 @@ forward predicted MVs of B-frames
 @item bb
 backward predicted MVs of B-frames
 @end table
+
+@item qp
+Display quantization parameters using the chroma planes
 @end table
 
 @subsection Examples
@@ -3029,6 +4468,45 @@ colorbalance=rs=.3
 @end example
 @end itemize
 
+@section colorkey
+RGB colorspace color keying.
+
+The filter accepts the following options:
+
+@table @option
+@item color
+The color which will be replaced with transparency.
+
+@item similarity
+Similarity percentage with the key color.
+
+0.01 matches only the exact key color, while 1.0 matches everything.
+
+@item blend
+Blend percentage.
+
+0.0 makes pixels either fully transparent, or not transparent at all.
+
+Higher values result in semi-transparent pixels, with a higher transparency
+the more similar the pixels color is to the key color.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Make every green pixel in the input image transparent:
+@example
+ffmpeg -i input.png -vf colorkey=green out.png
+@end example
+
+@item
+Overlay a greenscreen-video on top of a static background image.
+@example
+ffmpeg -i background.png -i video.mp4 -filter_complex "[1:v]colorkey=0x3BBD1E:0.3:0.2[ckout];[0:v][ckout]overlay[out]" -map "[out]" output.flv
+@end example
+@end itemize
+
 @section colorlevels
 
 Adjust video input frames using levels.
@@ -3191,6 +4669,68 @@ For example to convert from BT.601 to SMPTE-240M, use the command:
 colormatrix=bt601:smpte240m
 @end example
 
+@section convolution
+
+Apply convolution 3x3 or 5x5 filter.
+
+The filter accepts the following options:
+
+@table @option
+@item 0m
+@item 1m
+@item 2m
+@item 3m
+Set matrix for each plane.
+Matrix is sequence of 9 or 25 signed integers.
+
+@item 0rdiv
+@item 1rdiv
+@item 2rdiv
+@item 3rdiv
+Set multiplier for calculated value for each plane.
+
+@item 0bias
+@item 1bias
+@item 2bias
+@item 3bias
+Set bias for each plane. This value is added to the result of the multiplication.
+Useful for making the overall image brighter or darker. Default is 0.0.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Apply sharpen:
+@example
+convolution="0 -1 0 -1 5 -1 0 -1 0:0 -1 0 -1 5 -1 0 -1 0:0 -1 0 -1 5 -1 0 -1 0:0 -1 0 -1 5 -1 0 -1 0"
+@end example
+
+@item
+Apply blur:
+@example
+convolution="1 1 1 1 1 1 1 1 1:1 1 1 1 1 1 1 1 1:1 1 1 1 1 1 1 1 1:1 1 1 1 1 1 1 1 1:1/9:1/9:1/9:1/9"
+@end example
+
+@item
+Apply edge enhance:
+@example
+convolution="0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:0 0 0 -1 1 0 0 0 0:5:1:1:1:0:128:128:128"
+@end example
+
+@item
+Apply edge detect:
+@example
+convolution="0 1 0 1 -4 1 0 1 0:0 1 0 1 -4 1 0 1 0:0 1 0 1 -4 1 0 1 0:0 1 0 1 -4 1 0 1 0:5:5:5:1:0:128:128:128"
+@end example
+
+@item
+Apply emboss:
+@example
+convolution="-2 -1 0 -1 1 1 0 1 2:-2 -1 0 -1 1 1 0 1 2:-2 -1 0 -1 1 1 0 1 2:-2 -1 0 -1 1 1 0 1 2"
+@end example
+@end itemize
+
 @section copy
 
 Copy the input source unchanged to the output. This is mainly useful for
@@ -3206,12 +4746,12 @@ It accepts the following parameters:
 @item w, out_w
 The width of the output video. It defaults to @code{iw}.
 This expression is evaluated only once during the filter
-configuration.
+configuration, or when the @samp{w} or @samp{out_w} command is sent.
 
 @item h, out_h
 The height of the output video. It defaults to @code{ih}.
 This expression is evaluated only once during the filter
-configuration.
+configuration, or when the @samp{h} or @samp{out_h} command is sent.
 
 @item x
 The horizontal position, in the input video, of the left edge of the output
@@ -3371,6 +4911,22 @@ crop=in_w/2:in_h/2:y:10+10*sin(n/10)
 @end example
 @end itemize
 
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item w, out_w
+@item h, out_h
+@item x
+@item y
+Set width/height of the output video and the horizontal/vertical position
+in the input video.
+The command accepts the same syntax of the corresponding option.
+
+If the specified expression is not valid, it is kept at its current
+value.
+@end table
+
 @section cropdetect
 
 Auto-detect the crop size.
@@ -3471,7 +5027,7 @@ Can be used in addition to the other key points component
 options. In this case, the unset component(s) will fallback on this
 @option{all} setting.
 @item psfile
-Specify a Photoshop curves file (@code{.asv}) to import the settings from.
+Specify a Photoshop curves file (@code{.acv}) to import the settings from.
 @end table
 
 To avoid some filtergraph syntax conflicts, each key points list need to be
@@ -3516,7 +5072,7 @@ curves=vintage
 @item
 Use a Photoshop preset and redefine the points of the green component:
 @example
-curves=psfile='MyCurvesPresets/purple.asv':green='0.45/0.53'
+curves=psfile='MyCurvesPresets/purple.acv':green='0.45/0.53'
 @end example
 @end itemize
 
@@ -3587,6 +5143,43 @@ Violent denoise using a block size of @code{16x16}:
 dctdnoiz=15:n=4
 @end example
 
+@section deband
+
+Remove banding artifacts from input video.
+It works by replacing banded pixels with average value of referenced pixels.
+
+The filter accepts the following options:
+
+@table @option
+@item 1thr
+@item 2thr
+@item 3thr
+@item 4thr
+Set banding detection threshold for each plane. Default is 0.02.
+Valid range is 0.00003 to 0.5.
+If difference between current pixel and reference pixel is less than threshold,
+it will be considered as banded.
+
+@item range, r
+Banding detection range in pixels. Default is 16. If positive, random number
+in range 0 to set value will be used. If negative, exact absolute value
+will be used.
+The range defines square of four pixels around current pixel.
+
+@item direction, d
+Set direction in radians from which four pixel will be compared. If positive,
+random direction from 0 to set direction will be picked. If negative, exact of
+absolute value will be picked. For example direction 0, -PI or -2*PI radians
+will pick only pixels on same row and -PI/2 will pick only pixels on same
+column.
+
+@item blur
+If enabled, current pixel is compared with average value of all four
+surrounding pixels. The default is enabled. If disabled current pixel is
+compared with all four surrounding pixels. The pixel is considered banded
+if only all four differences with surrounding pixels are less than threshold.
+@end table
+
 @anchor{decimate}
 @section decimate
 
@@ -3627,6 +5220,24 @@ Set whether or not chroma is considered in the metric calculations. Default is
 @code{1}.
 @end table
 
+@section deflate
+
+Apply deflate effect to the video.
+
+This filter replaces the pixel by the local(3x3) average by taking into account
+only values lower than the pixel.
+
+It accepts the following options:
+
+@table @option
+@item threshold0
+@item threshold1
+@item threshold2
+@item threshold3
+Limit the maximum change for each plane, default is 65535.
+If 0, plane will remain unchanged.
+@end table
+
 @section dejudder
 
 Remove judder produced by partially interlaced telecined content.
@@ -3680,7 +5291,9 @@ specified.
 
 @item band, t
 Specify the thickness of the fuzzy edge of the rectangle (added to
-@var{w} and @var{h}). The default value is 4.
+@var{w} and @var{h}). The default value is 1. This option is
+deprecated, setting higher values should no longer be necessary and
+is not recommended.
 
 @item show
 When set to 1, a green rectangle is drawn on the screen to simplify
@@ -3813,6 +5426,85 @@ A number representing position of the first frame with respect to the telecine
 pattern. This is to be used if the stream is cut. The default value is @code{0}.
 @end table
 
+@section dilation
+
+Apply dilation effect to the video.
+
+This filter replaces the pixel by the local(3x3) maximum.
+
+It accepts the following options:
+
+@table @option
+@item threshold0
+@item threshold1
+@item threshold2
+@item threshold3
+Limit the maximum change for each plane, default is 65535.
+If 0, plane will remain unchanged.
+
+@item coordinates
+Flag which specifies the pixel to refer to. Default is 255 i.e. all eight
+pixels are used.
+
+Flags to local 3x3 coordinates maps like this:
+
+    1 2 3
+    4   5
+    6 7 8
+@end table
+
+@section displace
+
+Displace pixels as indicated by second and third input stream.
+
+It takes three input streams and outputs one stream, the first input is the
+source, and second and third input are displacement maps.
+
+The second input specifies how much to displace pixels along the
+x-axis, while the third input specifies how much to displace pixels
+along the y-axis.
+If one of displacement map streams terminates, last frame from that
+displacement map will be used.
+
+Note that once generated, displacements maps can be reused over and over again.
+
+A description of the accepted options follows.
+
+@table @option
+@item edge
+Set displace behavior for pixels that are out of range.
+
+Available values are:
+@table @samp
+@item blank
+Missing pixels are replaced by black pixels.
+
+@item smear
+Adjacent pixels will spread out to replace missing pixels.
+
+@item wrap
+Out of range pixels are wrapped so they point to pixels of other side.
+@end table
+Default is @samp{smear}.
+
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Add ripple effect to rgb input of video size hd720:
+@example
+ffmpeg -i INPUT -f lavfi -i nullsrc=s=hd720,lutrgb=128:128:128 -f lavfi -i nullsrc=s=hd720,geq='r=128+30*sin(2*PI*X/400+T):g=128+30*sin(2*PI*X/400+T):b=128+30*sin(2*PI*X/400+T)' -lavfi '[0][1][2]displace' OUTPUT
+@end example
+
+@item
+Add wave effect to rgb input of video size hd720:
+@example
+ffmpeg -i INPUT -f lavfi -i nullsrc=hd720,geq='r=128+80*(sin(sqrt((X-W/2)*(X-W/2)+(Y-H/2)*(Y-H/2))/220*2*PI+T)):g=128+80*(sin(sqrt((X-W/2)*(X-W/2)+(Y-H/2)*(Y-H/2))/220*2*PI+T)):b=128+80*(sin(sqrt((X-W/2)*(X-W/2)+(Y-H/2)*(Y-H/2))/220*2*PI+T))' -lavfi '[1]split[x][y],[0][x][y]displace' OUTPUT
+@end example
+@end itemize
+
 @section drawbox
 
 Draw a colored box on the input image.
@@ -3909,28 +5601,130 @@ drawbox=x=-t:y=0.5*(ih-iw/2.4)-t:w=iw+t*2:h=iw/2.4+t*2:t=2:c=red
 @end example
 @end itemize
 
-@section drawgrid
+@section drawgraph, adrawgraph
 
-Draw a grid on the input image.
+Draw a graph using input video or audio metadata.
 
 It accepts the following parameters:
 
 @table @option
-@item x
-@item y
-The expressions which specify the coordinates of some point of grid intersection (meant to configure offset). Both default to 0.
+@item m1
+Set 1st frame metadata key from which metadata values will be used to draw a graph.
 
-@item width, w
-@item height, h
-The expressions which specify the width and height of the grid cell, if 0 they are interpreted as the
-input width and height, respectively, minus @code{thickness}, so image gets
-framed. Default to 0.
+@item fg1
+Set 1st foreground color expression.
 
-@item color, c
-Specify the color of the grid. For the general syntax of this option,
-check the "Color" section in the ffmpeg-utils manual. If the special
-value @code{invert} is used, the grid color is the same as the
-video with inverted luma.
+@item m2
+Set 2nd frame metadata key from which metadata values will be used to draw a graph.
+
+@item fg2
+Set 2nd foreground color expression.
+
+@item m3
+Set 3rd frame metadata key from which metadata values will be used to draw a graph.
+
+@item fg3
+Set 3rd foreground color expression.
+
+@item m4
+Set 4th frame metadata key from which metadata values will be used to draw a graph.
+
+@item fg4
+Set 4th foreground color expression.
+
+@item min
+Set minimal value of metadata value.
+
+@item max
+Set maximal value of metadata value.
+
+@item bg
+Set graph background color. Default is white.
+
+@item mode
+Set graph mode.
+
+Available values for mode is:
+@table @samp
+@item bar
+@item dot
+@item line
+@end table
+
+Default is @code{line}.
+
+@item slide
+Set slide mode.
+
+Available values for slide is:
+@table @samp
+@item frame
+Draw new frame when right border is reached.
+
+@item replace
+Replace old columns with new ones.
+
+@item scroll
+Scroll from right to left.
+
+@item rscroll
+Scroll from left to right.
+@end table
+
+Default is @code{frame}.
+
+@item size
+Set size of graph video. For the syntax of this option, check the
+@ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
+The default value is @code{900x256}.
+
+The foreground color expressions can use the following variables:
+@table @option
+@item MIN
+Minimal value of metadata value.
+
+@item MAX
+Maximal value of metadata value.
+
+@item VAL
+Current metadata key value.
+@end table
+
+The color is defined as 0xAABBGGRR.
+@end table
+
+Example using metadata from @ref{signalstats} filter:
+@example
+signalstats,drawgraph=lavfi.signalstats.YAVG:min=0:max=255
+@end example
+
+Example using metadata from @ref{ebur128} filter:
+@example
+ebur128=metadata=1,adrawgraph=lavfi.r128.M:min=-120:max=5
+@end example
+
+@section drawgrid
+
+Draw a grid on the input image.
+
+It accepts the following parameters:
+
+@table @option
+@item x
+@item y
+The expressions which specify the coordinates of some point of grid intersection (meant to configure offset). Both default to 0.
+
+@item width, w
+@item height, h
+The expressions which specify the width and height of the grid cell, if 0 they are interpreted as the
+input width and height, respectively, minus @code{thickness}, so image gets
+framed. Default to 0.
+
+@item color, c
+Specify the color of the grid. For the general syntax of this option,
+check the "Color" section in the ffmpeg-utils manual. If the special
+value @code{invert} is used, the grid color is the same as the
+video with inverted luma.
 
 @item thickness, t
 The expression which sets the thickness of the grid line. Default value is @code{1}.
@@ -4305,14 +6099,20 @@ A 1 character description of the current picture type.
 
 @item pts
 The timestamp of the current frame.
-It can take up to two arguments.
+It can take up to three arguments.
 
 The first argument is the format of the timestamp; it defaults to @code{flt}
 for seconds as a decimal number with microsecond accuracy; @code{hms} stands
 for a formatted @var{[-]HH:MM:SS.mmm} timestamp with millisecond accuracy.
+@code{gmtime} stands for the timestamp of the frame formatted as UTC time;
+@code{localtime} stands for the timestamp of the frame formatted as
+local time zone time.
 
 The second argument is an offset added to the timestamp.
 
+If the format is set to @code{localtime} or @code{gmtime},
+a third argument may be supplied: a strftime() format string.
+By default, @var{YYYY-MM-DD HH:MM:SS} format will be used.
 @end table
 
 @subsection Examples
@@ -4343,7 +6143,7 @@ within the parameter list.
 @item
 Show the text at the center of the video frame:
 @example
-drawtext="fontsize=30:fontfile=FreeSerif.ttf:text='hello world':x=(w-text_w)/2:y=(h-text_h-line_h)/2"
+drawtext="fontsize=30:fontfile=FreeSerif.ttf:text='hello world':x=(w-text_w)/2:y=(h-text_h)/2"
 @end example
 
 @item
@@ -4467,7 +6267,7 @@ The filter accepts the following options:
 @table @option
 @item contrast
 Set the contrast expression. The value must be a float value in range
-@code{-2.0} to @code{2.0}. The default value is "0".
+@code{-2.0} to @code{2.0}. The default value is "1".
 
 @item brightness
 Set the brightness expression. The value must be a float value in
@@ -4569,6 +6369,33 @@ value.
 
 @end table
 
+@section erosion
+
+Apply erosion effect to the video.
+
+This filter replaces the pixel by the local(3x3) minimum.
+
+It accepts the following options:
+
+@table @option
+@item threshold0
+@item threshold1
+@item threshold2
+@item threshold3
+Limit the maximum change for each plane, default is 65535.
+If 0, plane will remain unchanged.
+
+@item coordinates
+Flag which specifies the pixel to refer to. Default is 255 i.e. all eight
+pixels are used.
+
+Flags to local 3x3 coordinates maps like this:
+
+    1 2 3
+    4   5
+    6 7 8
+@end table
+
 @section extractplanes
 
 Extract color channel components from input video stream into
@@ -4631,6 +6458,10 @@ computation time. Default value is 1.
 Set a random seed, must be an integer included between 0 and
 UINT32_MAX. If not specified, or if explicitly set to -1, the filter
 will try to use a good random seed on a best effort basis.
+
+@item pal8
+Set pal8 output pixel format. This option does not work with codebook
+length greater than 256.
 @end table
 
 @section fade
@@ -4783,6 +6614,12 @@ Sharpen:
 fftfilt=dc_Y=0:weight_Y='1+squish(1-(Y+X)/100)'
 @end example
 
+@item
+Blur:
+@example
+fftfilt=dc_Y=0:weight_Y='exp(-4 * ((Y+X)/(W+H)))'
+@end example
+
 @end itemize
 
 @section field
@@ -4828,8 +6665,9 @@ which @code{fieldmatch} is based on. While the semantic and usage are very
 close, some behaviour and options names can differ.
 
 The @ref{decimate} filter currently only works for constant frame rate input.
-Do not use @code{fieldmatch} and @ref{decimate} if your input has mixed
-telecined and progressive content with changing framerate.
+If your input has mixed telecined (30fps) and progressive content with a lower
+framerate like 24fps use the following filterchain to produce the necessary cfr
+stream: @code{dejudder,fps=30000/1001,fieldmatch,decimate}.
 
 The filter accepts the following options:
 
@@ -5157,7 +6995,7 @@ For example:
 ffmpeg -i in.vob -vf "fieldorder=bff" out.dv
 @end example
 
-@section fifo
+@section fifo, afifo
 
 Buffer input images and send them when they are requested.
 
@@ -5367,6 +7205,51 @@ ffmpeg -i LEFT -i RIGHT -filter_complex framepack=frameseq OUTPUT
 ffmpeg -i LEFT -i RIGHT -filter_complex [0:v]scale=w=iw/2[left],[1:v]scale=w=iw/2[right],[left][right]framepack=sbs OUTPUT
 @end example
 
+@section framerate
+
+Change the frame rate by interpolating new video output frames from the source
+frames.
+
+This filter is not designed to function correctly with interlaced media. If
+you wish to change the frame rate of interlaced media then you are required
+to deinterlace before this filter and re-interlace after this filter.
+
+A description of the accepted options follows.
+
+@table @option
+@item fps
+Specify the output frames per second. This option can also be specified
+as a value alone. The default is @code{50}.
+
+@item interp_start
+Specify the start of a range where the output frame will be created as a
+linear interpolation of two frames. The range is [@code{0}-@code{255}],
+the default is @code{15}.
+
+@item interp_end
+Specify the end of a range where the output frame will be created as a
+linear interpolation of two frames. The range is [@code{0}-@code{255}],
+the default is @code{240}.
+
+@item scene
+Specify the level at which a scene change is detected as a value between
+0 and 100 to indicate a new scene; a low value reflects a low
+probability for the current frame to introduce a new scene, while a higher
+value means the current frame is more likely to be one.
+The default is @code{7}.
+
+@item flags
+Specify flags influencing the filter process.
+
+Available value for @var{flags} is:
+
+@table @option
+@item scene_change_detect, scd
+Enable scene change detection using the value of the option @var{scene}.
+This flag is enabled by default.
+@end table
+@end table
+
 @section framestep
 
 Select one frame every N-th frame.
@@ -5601,12 +7484,9 @@ geq=lum=255*gauss((X/W-0.5)*3)*gauss((Y/H-0.5)*3)/gauss(0)/gauss(0),format=gray
 @end example
 
 @item
-Create a linear gradient to use as a mask for another filter, then
-compose with @ref{overlay}. In this example the video will gradually
-become more blurry from the top to the bottom of the y-axis as defined
-by the linear gradient:
+Diagonal split screen to compare filter effect:
 @example
-ffmpeg -i input.mp4 -filter_complex "geq=lum=255*(Y/H),format=gray[grad];[0:v]boxblur=4[blur];[blur][grad]alphamerge[alpha];[0:v][alpha]overlay" output.mp4
+ffmpeg -i input -filter_complex "[0:v]geq=lum=if(gt(X\,Y*(W/H))\,255),format=gray[alpha];[0:v][alpha]alphamerge,curves=preset=color_negative[filtered];[0:v][filtered]overlay" output
 @end example
 @end itemize
 
@@ -5782,106 +7662,43 @@ Compute and draw a color distribution histogram for the input video.
 The computed histogram is a representation of the color component
 distribution in an image.
 
-The filter accepts the following options:
-
-@table @option
-@item mode
-Set histogram mode.
-
-It accepts the following values:
-@table @samp
-@item levels
-Standard histogram that displays the color components distribution in an
-image. Displays color graph for each color component. Shows distribution of
+Standard histogram displays the color components distribution in an image.
+Displays color graph for each color component. Shows distribution of
 the Y, U, V, A or R, G, B components, depending on input format, in the
 current frame. Below each graph a color component scale meter is shown.
 
-@item color
-Displays chroma values (U/V color placement) in a two dimensional
-graph (which is called a vectorscope). The brighter a pixel in the
-vectorscope, the more pixels of the input frame correspond to that pixel
-(i.e., more pixels have this chroma value). The V component is displayed on
-the horizontal (X) axis, with the leftmost side being V = 0 and the rightmost
-side being V = 255. The U component is displayed on the vertical (Y) axis,
-with the top representing U = 0 and the bottom representing U = 255.
-
-The position of a white pixel in the graph corresponds to the chroma value of
-a pixel of the input clip. The graph can therefore be used to read the hue
-(color flavor) and the saturation (the dominance of the hue in the color). As
-the hue of a color changes, it moves around the square. At the center of the
-square the saturation is zero, which means that the corresponding pixel has no
-color. If the amount of a specific color is increased (while leaving the other
-colors unchanged) the saturation increases, and the indicator moves towards
-the edge of the square.
-
-@item color2
-Chroma values in vectorscope, similar as @code{color} but actual chroma values
-are displayed.
-
-@item waveform
-Per row/column color component graph. In row mode, the graph on the left side
-represents color component value 0 and the right side represents value = 255.
-In column mode, the top side represents color component value = 0 and bottom
-side represents value = 255.
-@end table
-Default value is @code{levels}.
+The filter accepts the following options:
 
+@table @option
 @item level_height
-Set height of level in @code{levels}. Default value is @code{200}.
+Set height of level. Default value is @code{200}.
 Allowed range is [50, 2048].
 
 @item scale_height
-Set height of color scale in @code{levels}. Default value is @code{12}.
+Set height of color scale. Default value is @code{12}.
 Allowed range is [0, 40].
 
-@item step
-Set step for @code{waveform} mode. Smaller values are useful to find out how
-many values of the same luminance are distributed across input rows/columns.
-Default value is @code{10}. Allowed range is [1, 255].
-
-@item waveform_mode
-Set mode for @code{waveform}. Can be either @code{row}, or @code{column}.
-Default is @code{row}.
-
-@item waveform_mirror
-Set mirroring mode for @code{waveform}. @code{0} means unmirrored, @code{1}
-means mirrored. In mirrored mode, higher values will be represented on the left
-side for @code{row} mode and at the top for @code{column} mode. Default is
-@code{0} (unmirrored).
-
 @item display_mode
-Set display mode for @code{waveform} and @code{levels}.
+Set display mode.
 It accepts the following values:
 @table @samp
 @item parade
-Display separate graph for the color components side by side in
-@code{row} waveform mode or one below the other in @code{column} waveform mode
-for @code{waveform} histogram mode. For @code{levels} histogram mode,
-per color component graphs are placed below each other.
-
-Using this display mode in @code{waveform} histogram mode makes it easy to
-spot color casts in the highlights and shadows of an image, by comparing the
-contours of the top and the bottom graphs of each waveform. Since whites,
-grays, and blacks are characterized by exactly equal amounts of red, green,
-and blue, neutral areas of the picture should display three waveforms of
-roughly equal width/height. If not, the correction is easy to perform by
-making level adjustments the three waveforms.
+Per color component graphs are placed below each other.
 
 @item overlay
 Presents information identical to that in the @code{parade}, except
 that the graphs representing color components are superimposed directly
 over one another.
-
-This display mode in @code{waveform} histogram mode makes it easier to spot
-relative differences or similarities in overlapping areas of the color
-components that are supposed to be identical, such as neutral whites, grays,
-or blacks.
 @end table
 Default is @code{parade}.
 
 @item levels_mode
-Set mode for @code{levels}. Can be either @code{linear}, or @code{logarithmic}.
+Set mode. Can be either @code{linear}, or @code{logarithmic}.
 Default is @code{linear}.
+
+@item components
+Set what color components to display.
+Default is @code{7}.
 @end table
 
 @subsection Examples
@@ -5937,6 +7754,25 @@ Set the scaling dimension: @code{2} for @code{hq2x}, @code{3} for
 Default is @code{3}.
 @end table
 
+@section hstack
+Stack input videos horizontally.
+
+All streams must be of same pixel format and of same height.
+
+Note that this filter is faster than using @ref{overlay} and @ref{pad} filter
+to create same output.
+
+The filter accept the following option:
+
+@table @option
+@item inputs
+Set number of input streams. Default is 2.
+
+@item shortest
+If set to 1, force the output to terminate when the shortest input
+terminates. Default value is 0.
+@end table
+
 @section hue
 
 Modify the hue and/or the saturation of the input.
@@ -6166,6 +8002,24 @@ Default value is @code{none}.
 Swap luma/chroma/alpha fields. Exchange even & odd lines. Default value is @code{0}.
 @end table
 
+@section inflate
+
+Apply inflate effect to the video.
+
+This filter replaces the pixel by the local(3x3) average by taking into account
+only values higher than the pixel.
+
+It accepts the following options:
+
+@table @option
+@item threshold0
+@item threshold1
+@item threshold2
+@item threshold3
+Limit the maximum change for each plane, default is 65535.
+If 0, plane will remain unchanged.
+@end table
+
 @section interlace
 
 Simple interlacing filter from progressive contents. This interleaves upper (or
@@ -6472,6 +8326,69 @@ lutyuv=y='bitand(val, 128+64+32)'
 @end example
 @end itemize
 
+@section maskedmerge
+
+Merge the first input stream with the second input stream using per pixel
+weights in the third input stream.
+
+A value of 0 in the third stream pixel component means that pixel component
+from first stream is returned unchanged, while maximum value (eg. 255 for
+8-bit videos) means that pixel component from second stream is returned
+unchanged. Intermediate values define the amount of merging between both
+input stream's pixel components.
+
+This filter accepts the following options:
+@table @option
+@item planes
+Set which planes will be processed as bitmap, unprocessed planes will be
+copied from first stream.
+By default value 0xf, all planes will be processed.
+@end table
+
+@section mcdeint
+
+Apply motion-compensation deinterlacing.
+
+It needs one field per frame as input and must thus be used together
+with yadif=1/3 or equivalent.
+
+This filter accepts the following options:
+@table @option
+@item mode
+Set the deinterlacing mode.
+
+It accepts one of the following values:
+@table @samp
+@item fast
+@item medium
+@item slow
+use iterative motion estimation
+@item extra_slow
+like @samp{slow}, but use multiple reference frames.
+@end table
+Default value is @samp{fast}.
+
+@item parity
+Set the picture field parity assumed for the input video. It must be
+one of the following values:
+
+@table @samp
+@item 0, tff
+assume top field first
+@item 1, bff
+assume bottom field first
+@end table
+
+Default value is @samp{bff}.
+
+@item qp
+Set per-block quantization parameter (QP) used by the internal
+encoder.
+
+Higher values should result in a smoother motion vector field but less
+optimal individual vectors. Default value is 1.
+@end table
+
 @section mergeplanes
 
 Merge color channel components from several video streams.
@@ -6531,74 +8448,129 @@ format=rgb24,mergeplanes=0x000102:yuv444p
 @end example
 @end itemize
 
-@section mcdeint
-
-Apply motion-compensation deinterlacing.
+@section metadata, ametadata
 
-It needs one field per frame as input and must thus be used together
-with yadif=1/3 or equivalent.
+Manipulate frame metadata.
 
 This filter accepts the following options:
+
 @table @option
 @item mode
-Set the deinterlacing mode.
-
-It accepts one of the following values:
-@table @samp
-@item fast
-@item medium
-@item slow
-use iterative motion estimation
-@item extra_slow
-like @samp{slow}, but use multiple reference frames.
-@end table
-Default value is @samp{fast}.
+Set mode of operation of the filter.
 
-@item parity
-Set the picture field parity assumed for the input video. It must be
-one of the following values:
+Can be one of the following:
 
 @table @samp
-@item 0, tff
-assume top field first
-@item 1, bff
-assume bottom field first
-@end table
+@item select
+If both @code{value} and @code{key} is set, select frames
+which have such metadata. If only @code{key} is set, select
+every frame that has such key in metadata.
 
-Default value is @samp{bff}.
+@item add
+Add new metadata @code{key} and @code{value}. If key is already available
+do nothing.
 
-@item qp
-Set per-block quantization parameter (QP) used by the internal
-encoder.
+@item modify
+Modify value of already present key.
 
-Higher values should result in a smoother motion vector field but less
-optimal individual vectors. Default value is 1.
+@item delete
+If @code{value} is set, delete only keys that have such value.
+Otherwise, delete key.
+
+@item print
+Print key and its value if metadata was found. If @code{key} is not set print all
+metadata values available in frame.
 @end table
 
-@section mpdecimate
+@item key
+Set key used with all modes. Must be set for all modes except @code{print}.
 
-Drop frames that do not differ greatly from the previous frame in
-order to reduce frame rate.
+@item value
+Set metadata value which will be used. This option is mandatory for
+@code{modify} and @code{add} mode.
 
-The main use of this filter is for very-low-bitrate encoding
-(e.g. streaming over dialup modem), but it could in theory be used for
-fixing movies that were inverse-telecined incorrectly.
+@item function
+Which function to use when comparing metadata value and @code{value}.
 
-A description of the accepted options follows.
+Can be one of following:
 
-@table @option
-@item max
-Set the maximum number of consecutive frames which can be dropped (if
-positive), or the minimum interval between dropped frames (if
-negative). If the value is 0, the frame is dropped unregarding the
-number of previous sequentially dropped frames.
+@table @samp
+@item same_str
+Values are interpreted as strings, returns true if metadata value is same as @code{value}.
 
-Default value is 0.
+@item starts_with
+Values are interpreted as strings, returns true if metadata value starts with
+the @code{value} option string.
 
-@item hi
-@item lo
-@item frac
-Set the dropping threshold values.
+@item less
+Values are interpreted as floats, returns true if metadata value is less than @code{value}.
+
+@item equal
+Values are interpreted as floats, returns true if @code{value} is equal with metadata value.
+
+@item greater
+Values are interpreted as floats, returns true if metadata value is greater than @code{value}.
+
+@item expr
+Values are interpreted as floats, returns true if expression from option @code{expr}
+evaluates to true.
+@end table
+
+@item expr
+Set expression which is used when @code{function} is set to @code{expr}.
+The expression is evaluated through the eval API and can contain the following
+constants:
+
+@table @option
+@item VALUE1
+Float representation of @code{value} from metadata key.
+
+@item VALUE2
+Float representation of @code{value} as supplied by user in @code{value} option.
+@end table
+
+@item file
+If specified in @code{print} mode, output is written to the named file. When
+filename equals "-" data is written to standard output.
+If @code{file} option is not set, output is written to the log with AV_LOG_INFO
+loglevel.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Print all metadata values for frames with key @code{lavfi.singnalstats.YDIF} with values
+between 0 and 1.
+@example
+@end example
+signalstats,metadata=print:key=lavfi.signalstats.YDIF:value=0:function=expr:expr='between(VALUE1,0,1)'
+@end itemize
+
+@section mpdecimate
+
+Drop frames that do not differ greatly from the previous frame in
+order to reduce frame rate.
+
+The main use of this filter is for very-low-bitrate encoding
+(e.g. streaming over dialup modem), but it could in theory be used for
+fixing movies that were inverse-telecined incorrectly.
+
+A description of the accepted options follows.
+
+@table @option
+@item max
+Set the maximum number of consecutive frames which can be dropped (if
+positive), or the minimum interval between dropped frames (if
+negative). If the value is 0, the frame is dropped unregarding the
+number of previous sequentially dropped frames.
+
+Default value is 0.
+
+@item hi
+@item lo
+@item frac
+Set the dropping threshold values.
 
 Values for @option{hi} and @option{lo} are for 8x8 pixel blocks and
 represent actual pixel value differences, so a threshold of 64
@@ -6621,6 +8593,115 @@ Negate input video.
 It accepts an integer in input; if non-zero it negates the
 alpha component (if available). The default value in input is 0.
 
+@section nnedi
+
+Deinterlace video using neural network edge directed interpolation.
+
+This filter accepts the following options:
+
+@table @option
+@item weights
+Mandatory option, without binary file filter can not work.
+Currently file can be found here:
+https://github.com/dubhater/vapoursynth-nnedi3/blob/master/src/nnedi3_weights.bin
+
+@item deint
+Set which frames to deinterlace, by default it is @code{all}.
+Can be @code{all} or @code{interlaced}.
+
+@item field
+Set mode of operation.
+
+Can be one of the following:
+
+@table @samp
+@item af
+Use frame flags, both fields.
+@item a
+Use frame flags, single field.
+@item t
+Use top field only.
+@item b
+Use bottom field only.
+@item ft
+Use both fields, top first.
+@item fb
+Use both fields, bottom first.
+@end table
+
+@item planes
+Set which planes to process, by default filter process all frames.
+
+@item nsize
+Set size of local neighborhood around each pixel, used by the predictor neural
+network.
+
+Can be one of the following:
+
+@table @samp
+@item s8x6
+@item s16x6
+@item s32x6
+@item s48x6
+@item s8x4
+@item s16x4
+@item s32x4
+@end table
+
+@item nns
+Set the number of neurons in predicctor neural network.
+Can be one of the following:
+
+@table @samp
+@item n16
+@item n32
+@item n64
+@item n128
+@item n256
+@end table
+
+@item qual
+Controls the number of different neural network predictions that are blended
+together to compute the final output value. Can be @code{fast}, default or
+@code{slow}.
+
+@item etype
+Set which set of weights to use in the predictor.
+Can be one of the following:
+
+@table @samp
+@item a
+weights trained to minimize absolute error
+@item s
+weights trained to minimize squared error
+@end table
+
+@item pscrn
+Controls whether or not the prescreener neural network is used to decide
+which pixels should be processed by the predictor neural network and which
+can be handled by simple cubic interpolation.
+The prescreener is trained to know whether cubic interpolation will be
+sufficient for a pixel or whether it should be predicted by the predictor nn.
+The computational complexity of the prescreener nn is much less than that of
+the predictor nn. Since most pixels can be handled by cubic interpolation,
+using the prescreener generally results in much faster processing.
+The prescreener is pretty accurate, so the difference between using it and not
+using it is almost always unnoticeable.
+
+Can be one of the following:
+
+@table @samp
+@item none
+@item original
+@item new
+@end table
+
+Default is @code{new}.
+
+@item fapprox
+Set various debugging flags.
+@end table
+
 @section noformat
 
 Force libavfilter not to use any of the specified pixel formats for the
@@ -6705,6 +8786,30 @@ noise=alls=20:allf=t+u
 
 Pass the video source unchanged to the output.
 
+@section ocr
+Optical Character Recognition
+
+This filter uses Tesseract for optical character recognition.
+
+It accepts the following options:
+
+@table @option
+@item datapath
+Set datapath to tesseract data. Default is to use whatever was
+set at installation.
+
+@item language
+Set language, default is "eng".
+
+@item whitelist
+Set character whitelist.
+
+@item blacklist
+Set character blacklist.
+@end table
+
+The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}.
+
 @section ocv
 
 Apply a video transform using libopencv.
@@ -7066,6 +9171,7 @@ Set chroma strength.
 Must be a double value in the range 0-1000, default is @code{1.0}.
 @end table
 
+@anchor{pad}
 @section pad
 
 Add paddings to the input image, and place the original input at the
@@ -7682,7 +9788,8 @@ The description of the accepted parameters follows.
 @table @option
 @item stats_file, f
 If specified the filter will use the named file to save the PSNR of
-each individual frame.
+each individual frame. When filename equals "-" the data is sent to
+standard output.
 @end table
 
 The file printed if @var{stats_file} is selected, contains a sequence of
@@ -7815,6 +9922,128 @@ qp=2+2*sin(PI*qp)
 @end example
 @end itemize
 
+@section random
+
+Flush video frames from internal cache of frames into a random order.
+No frame is discarded.
+Inspired by @ref{frei0r} nervous filter.
+
+@table @option
+@item frames
+Set size in number of frames of internal cache, in range from @code{2} to
+@code{512}. Default is @code{30}.
+
+@item seed
+Set seed for random number generator, must be an integer included between
+@code{0} and @code{UINT32_MAX}. If not specified, or if explicitly set to
+less than @code{0}, the filter will try to use a good random seed on a
+best effort basis.
+@end table
+
+@section removegrain
+
+The removegrain filter is a spatial denoiser for progressive video.
+
+@table @option
+@item m0
+Set mode for the first plane.
+
+@item m1
+Set mode for the second plane.
+
+@item m2
+Set mode for the third plane.
+
+@item m3
+Set mode for the fourth plane.
+@end table
+
+Range of mode is from 0 to 24. Description of each mode follows:
+
+@table @var
+@item 0
+Leave input plane unchanged. Default.
+
+@item 1
+Clips the pixel with the minimum and maximum of the 8 neighbour pixels.
+
+@item 2
+Clips the pixel with the second minimum and maximum of the 8 neighbour pixels.
+
+@item 3
+Clips the pixel with the third minimum and maximum of the 8 neighbour pixels.
+
+@item 4
+Clips the pixel with the fourth minimum and maximum of the 8 neighbour pixels.
+This is equivalent to a median filter.
+
+@item 5
+Line-sensitive clipping giving the minimal change.
+
+@item 6
+Line-sensitive clipping, intermediate.
+
+@item 7
+Line-sensitive clipping, intermediate.
+
+@item 8
+Line-sensitive clipping, intermediate.
+
+@item 9
+Line-sensitive clipping on a line where the neighbours pixels are the closest.
+
+@item 10
+Replaces the target pixel with the closest neighbour.
+
+@item 11
+[1 2 1] horizontal and vertical kernel blur.
+
+@item 12
+Same as mode 11.
+
+@item 13
+Bob mode, interpolates top field from the line where the neighbours
+pixels are the closest.
+
+@item 14
+Bob mode, interpolates bottom field from the line where the neighbours
+pixels are the closest.
+
+@item 15
+Bob mode, interpolates top field. Same as 13 but with a more complicated
+interpolation formula.
+
+@item 16
+Bob mode, interpolates bottom field. Same as 14 but with a more complicated
+interpolation formula.
+
+@item 17
+Clips the pixel with the minimum and maximum of respectively the maximum and
+minimum of each pair of opposite neighbour pixels.
+
+@item 18
+Line-sensitive clipping using opposite neighbours whose greatest distance from
+the current pixel is minimal.
+
+@item 19
+Replaces the pixel with the average of its 8 neighbours.
+
+@item 20
+Averages the 9 pixels ([1 1 1] horizontal and vertical blur).
+
+@item 21
+Clips pixels using the averages of opposite neighbour.
+
+@item 22
+Same as mode 21 but simpler and faster.
+
+@item 23
+Small edge and halo removal, but reputed useless.
+
+@item 24
+Similar as 23.
+@end table
+
 @section removelogo
 
 Suppress a TV station logo, using an image file to determine which
@@ -7850,6 +10079,23 @@ pixels will slow things down on a large logo.
 This filter uses the repeat_field flag from the Video ES headers and hard repeats
 fields based on its value.
 
+@section reverse, areverse
+
+Reverse a clip.
+
+Warning: This filter requires memory to buffer the entire clip, so trimming
+is suggested.
+
+@subsection Examples
+
+@itemize
+@item
+Take the first 5 seconds of a clip, and reverse it.
+@example
+trim=end=5,reverse
+@end example
+@end itemize
+
 @section rotate
 
 Rotate video by an arbitrary angle expressed in radians.
@@ -8059,6 +10305,21 @@ dimension is divisible by n and adjust the value if necessary.
 See below for the list of accepted constants for use in the dimension
 expression.
 
+@item eval
+Specify when to evaluate @var{width} and @var{height} expression. It accepts the following values:
+
+@table @samp
+@item init
+Only evaluate expressions once during the filter initialization or when a command is processed.
+
+@item frame
+Evaluate expressions for each incoming frame.
+
+@end table
+
+Default value is @samp{init}.
+
+
 @item interl
 Set the interlacing mode. It accepts the following values:
 
@@ -8082,6 +10343,15 @@ Set libswscale scaling flags. See
 complete list of values. If not explicitly specified the filter applies
 the default flags.
 
+
+@item param0, param1
+Set libswscale input parameters for scaling algorithms that need them. See
+@ref{sws_params,,the ffmpeg-scaler manual,ffmpeg-scaler} for the
+complete documentation. If not explicitly specified the filter applies
+empty parameters.
+
+
+
 @item size, s
 Set the video size. For the syntax of this option, check the
 @ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
@@ -8304,6 +10574,103 @@ scale=w='min(500\, iw*3/2):h=-1'
 @end example
 @end itemize
 
+@subsection Commands
+
+This filter supports the following commands:
+@table @option
+@item width, w
+@item height, h
+Set the output video dimension expression.
+The command accepts the same syntax of the corresponding option.
+
+If the specified expression is not valid, it is kept at its current
+value.
+@end table
+
+@section scale2ref
+
+Scale (resize) the input video, based on a reference video.
+
+See the scale filter for available options, scale2ref supports the same but
+uses the reference video instead of the main input as basis.
+
+@subsection Examples
+
+@itemize
+@item
+Scale a subtitle stream to match the main video in size before overlaying
+@example
+'scale2ref[b][a];[a][b]overlay'
+@end example
+@end itemize
+
+@section selectivecolor
+
+Adjust cyan, magenta, yellow and black (CMYK) to certain ranges of colors (such
+as "reds", "yellows", "greens", "cyans", ...). The adjustment range is defined
+by the "purity" of the color (that is, how saturated it already is).
+
+This filter is similar to the Adobe Photoshop Selective Color tool.
+
+The filter accepts the following options:
+
+@table @option
+@item correction_method
+Select color correction method.
+
+Available values are:
+@table @samp
+@item absolute
+Specified adjustments are applied "as-is" (added/subtracted to original pixel
+component value).
+@item relative
+Specified adjustments are relative to the original component value.
+@end table
+Default is @code{absolute}.
+@item reds
+Adjustments for red pixels (pixels where the red component is the maximum)
+@item yellows
+Adjustments for yellow pixels (pixels where the blue component is the minimum)
+@item greens
+Adjustments for green pixels (pixels where the green component is the maximum)
+@item cyans
+Adjustments for cyan pixels (pixels where the red component is the minimum)
+@item blues
+Adjustments for blue pixels (pixels where the blue component is the maximum)
+@item magentas
+Adjustments for magenta pixels (pixels where the green component is the minimum)
+@item whites
+Adjustments for white pixels (pixels where all components are greater than 128)
+@item neutrals
+Adjustments for all pixels except pure black and pure white
+@item blacks
+Adjustments for black pixels (pixels where all components are lesser than 128)
+@item psfile
+Specify a Photoshop selective color file (@code{.asv}) to import the settings from.
+@end table
+
+All the adjustment settings (@option{reds}, @option{yellows}, ...) accept up to
+4 space separated floating point adjustment values in the [-1,1] range,
+respectively to adjust the amount of cyan, magenta, yellow and black for the
+pixels of its range.
+
+@subsection Examples
+
+@itemize
+@item
+Increase cyan by 50% and reduce yellow by 33% in every green areas, and
+increase magenta by 27% in blue areas:
+@example
+selectivecolor=greens=.5 0 -.33 0:blues=0 .27
+@end example
+
+@item
+Use a Photoshop selective color preset:
+@example
+selectivecolor=psfile=MySelectiveColorPresets/Misty.asv
+@end example
+@end itemize
+
 @section separatefields
 
 The @code{separatefields} takes a frame-based video input and splits
@@ -8519,6 +10886,26 @@ Set the size of the box used to represent one palette color entry. Default is
 @code{30} (for a @code{30x30} pixel box).
 @end table
 
+@section shuffleframes
+
+Reorder and/or duplicate video frames.
+
+It accepts the following parameters:
+
+@table @option
+@item mapping
+Set the destination indexes of input frames.
+This is space or '|' separated list of indexes that maps input frames to output
+frames. Number of indexes also sets maximal value that each index may have.
+@end table
+
+The first frame has the index 0. The default is to keep the input unchanged.
+
+Swap second and third frame of every three frames of the input:
+@example
+ffmpeg -i INPUT -vf "shuffleframes=0 2 1" OUTPUT
+@end example
+
 @section shuffleplanes
 
 Reorder and/or duplicate video planes.
@@ -8548,6 +10935,7 @@ Swap the second and third planes of the input:
 ffmpeg -i INPUT -vf shuffleplanes=0:2:1:3 OUTPUT
 @end example
 
+@anchor{signalstats}
 @section signalstats
 Evaluate various visual metrics that assist in determining issues associated
 with the digitization of analog video media.
@@ -8778,18 +11166,77 @@ in [-30,0] will filter edges. Default value is 0.
 If a chroma option is not explicitly set, the corresponding luma value
 is set.
 
-@section stereo3d
+@section ssim
 
-Convert between different stereoscopic image formats.
+Obtain the SSIM (Structural SImilarity Metric) between two input videos.
 
-The filters accept the following options:
+This filter takes in input two input videos, the first input is
+considered the "main" source and is passed unchanged to the
+output. The second input is used as a "reference" video for computing
+the SSIM.
 
-@table @option
-@item in
-Set stereoscopic image format of input.
+Both video inputs must have the same resolution and pixel format for
+this filter to work correctly. Also it assumes that both inputs
+have the same number of frames, which are compared one by one.
 
-Available values for input image formats are:
-@table @samp
+The filter stores the calculated SSIM of each frame.
+
+The description of the accepted parameters follows.
+
+@table @option
+@item stats_file, f
+If specified the filter will use the named file to save the SSIM of
+each individual frame. When filename equals "-" the data is sent to
+standard output.
+@end table
+
+The file printed if @var{stats_file} is selected, contains a sequence of
+key/value pairs of the form @var{key}:@var{value} for each compared
+couple of frames.
+
+A description of each shown parameter follows:
+
+@table @option
+@item n
+sequential number of the input frame, starting from 1
+
+@item Y, U, V, R, G, B
+SSIM of the compared frames for the component specified by the suffix.
+
+@item All
+SSIM of the compared frames for the whole frame.
+
+@item dB
+Same as above but in dB representation.
+@end table
+
+For example:
+@example
+movie=ref_movie.mpg, setpts=PTS-STARTPTS [main];
+[main][ref] ssim="stats_file=stats.log" [out]
+@end example
+
+On this example the input file being processed is compared with the
+reference file @file{ref_movie.mpg}. The SSIM of each individual frame
+is stored in @file{stats.log}.
+
+Another example with both psnr and ssim at same time:
+@example
+ffmpeg -i main.mpg -i ref.mpg -lavfi  "ssim;[0:v][1:v]psnr" -f null -
+@end example
+
+@section stereo3d
+
+Convert between different stereoscopic image formats.
+
+The filters accept the following options:
+
+@table @option
+@item in
+Set stereoscopic image format of input.
+
+Available values for input image formats are:
+@table @samp
 @item sbsl
 side by side parallel (left eye left, right eye right)
 
@@ -8824,14 +11271,65 @@ alternating frames (left eye first, right eye second)
 @item ar
 alternating frames (right eye first, left eye second)
 
+@item irl
+interleaved rows (left eye has top row, right eye starts on next row)
+
+@item irr
+interleaved rows (right eye has top row, left eye starts on next row)
+
+@item icl
+interleaved columns, left eye first
+
+@item icr
+interleaved columns, right eye first
+
 Default value is @samp{sbsl}.
 @end table
 
 @item out
 Set stereoscopic image format of output.
 
-Available values for output image formats are all the input formats as well as:
 @table @samp
+@item sbsl
+side by side parallel (left eye left, right eye right)
+
+@item sbsr
+side by side crosseye (right eye left, left eye right)
+
+@item sbs2l
+side by side parallel with half width resolution
+(left eye left, right eye right)
+
+@item sbs2r
+side by side crosseye with half width resolution
+(right eye left, left eye right)
+
+@item abl
+above-below (left eye above, right eye below)
+
+@item abr
+above-below (right eye above, left eye below)
+
+@item ab2l
+above-below with half height resolution
+(left eye above, right eye below)
+
+@item ab2r
+above-below with half height resolution
+(right eye above, left eye below)
+
+@item al
+alternating frames (left eye first, right eye second)
+
+@item ar
+alternating frames (right eye first, left eye second)
+
+@item irl
+interleaved rows (left eye has top row, right eye starts on next row)
+
+@item irr
+interleaved rows (right eye has top row, left eye starts on next row)
+
 @item arbg
 anaglyph red/blue gray
 (red filter on left eye, blue filter on right eye)
@@ -8888,17 +11386,23 @@ anaglyph yellow/blue colored
 anaglyph yellow/blue color optimized with the least squares projection of dubois
 (yellow filter on left eye, blue filter on right eye)
 
-@item irl
-interleaved rows (left eye has top row, right eye starts on next row)
-
-@item irr
-interleaved rows (right eye has top row, left eye starts on next row)
-
 @item ml
 mono output (left eye only)
 
 @item mr
 mono output (right eye only)
+
+@item chl
+checkerboard, left eye first
+
+@item chr
+checkerboard, right eye first
+
+@item icl
+interleaved columns, left eye first
+
+@item icr
+interleaved columns, right eye first
 @end table
 
 Default value is @samp{arcd}.
@@ -8914,12 +11418,51 @@ stereo3d=sbsl:aybd
 @end example
 
 @item
-Convert input video from above bellow (left eye above, right eye below) to side by side crosseye.
+Convert input video from above below (left eye above, right eye below) to side by side crosseye.
 @example
 stereo3d=abl:sbsr
 @end example
 @end itemize
 
+@section streamselect, astreamselect
+Select video or audio streams.
+
+The filter accepts the following options:
+
+@table @option
+@item inputs
+Set number of inputs. Default is 2.
+
+@item map
+Set input indexes to remap to outputs.
+@end table
+
+@subsection Commands
+
+The @code{streamselect} and @code{astreamselect} filter supports the following
+commands:
+
+@table @option
+@item map
+Set input indexes to remap to outputs.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Select first 5 seconds 1st stream and rest of time 2nd stream:
+@example
+sendcmd='5.0 streamselect map 1',streamselect=inputs=2:map=0
+@end example
+
+@item
+Same as above, but for audio:
+@example
+asendcmd='5.0 astreamselect map 1',astreamselect=inputs=2:map=0
+@end example
+@end itemize
+
 @anchor{spp}
 @section spp
 
@@ -8980,6 +11523,10 @@ was composed. For the syntax of this option, check the
 Due to a misdesign in ASS aspect ratio arithmetic, this is necessary to
 correctly scale the fonts if the aspect ratio has been changed.
 
+@item fontsdir
+Set a directory path containing fonts that can be used by the filter.
+These fonts will be used in addition to whatever the font provider uses.
+
 @item charenc
 Set subtitles input character encoding. @code{subtitles} filter only. Only
 useful if not UTF-8.
@@ -9029,6 +11576,60 @@ Interpolate) pixel art scaling algorithm.
 
 Useful for enlarging pixel art images without reducing sharpness.
 
+@section swaprect
+
+Swap two rectangular objects in video.
+
+This filter accepts the following options:
+
+@table @option
+@item w
+Set object width.
+
+@item h
+Set object height.
+
+@item x1
+Set 1st rect x coordinate.
+
+@item y1
+Set 1st rect y coordinate.
+
+@item x2
+Set 2nd rect x coordinate.
+
+@item y2
+Set 2nd rect y coordinate.
+
+All expressions are evaluated once for each frame.
+@end table
+
+The all options are expressions containing the following constants:
+
+@table @option
+@item w
+@item h
+The input width and height.
+
+@item a
+same as @var{w} / @var{h}
+
+@item sar
+input sample aspect ratio
+
+@item dar
+input display aspect ratio, it is the same as (@var{w} / @var{h}) * @var{sar}
+
+@item n
+The number of the input frame, starting from 0.
+
+@item t
+The timestamp expressed in seconds. It's NAN if the input timestamp is unknown.
+
+@item pos
+the position in the file of the input frame, NAN if unknown
+@end table
+
 @section swapuv
 Swap U & V plane.
 
@@ -9333,6 +11934,29 @@ Output:
  11111   11111   22222   22222   33333   33333   44444
 @end example
 
+@item mergex2, 7
+Move odd frames into the upper field, even into the lower field,
+generating a double height frame at same frame rate.
+@example
+ ------> time
+Input:
+Frame 1         Frame 2         Frame 3         Frame 4
+
+11111           22222           33333           44444
+11111           22222           33333           44444
+11111           22222           33333           44444
+11111           22222           33333           44444
+
+Output:
+11111           33333           33333           55555
+22222           22222           44444           44444
+11111           33333           33333           55555
+22222           22222           44444           44444
+11111           33333           33333           55555
+22222           22222           44444           44444
+11111           33333           33333           55555
+22222           22222           44444           44444
+@end example
 
 @end table
 
@@ -9599,6 +12223,69 @@ Force a constant quantization parameter. If not set, the filter will use the QP
 from the video stream (if available).
 @end table
 
+@section vectorscope
+
+Display 2 color component values in the two dimensional graph (which is called
+a vectorscope).
+
+This filter accepts the following options:
+
+@table @option
+@item mode, m
+Set vectorscope mode.
+
+It accepts the following values:
+@table @samp
+@item gray
+Gray values are displayed on graph, higher brightness means more pixels have
+same component color value on location in graph. This is the default mode.
+
+@item color
+Gray values are displayed on graph. Surrounding pixels values which are not
+present in video frame are drawn in gradient of 2 color components which are
+set by option @code{x} and @code{y}.
+
+@item color2
+Actual color components values present in video frame are displayed on graph.
+
+@item color3
+Similar as color2 but higher frequency of same values @code{x} and @code{y}
+on graph increases value of another color component, which is luminance by
+default values of @code{x} and @code{y}.
+
+@item color4
+Actual colors present in video frame are displayed on graph. If two different
+colors map to same position on graph then color with higher value of component
+not present in graph is picked.
+@end table
+
+@item x
+Set which color component will be represented on X-axis. Default is @code{1}.
+
+@item y
+Set which color component will be represented on Y-axis. Default is @code{2}.
+
+@item intensity, i
+Set intensity, used by modes: gray, color and color3 for increasing brightness
+of color component which represents frequency of (X, Y) location in graph.
+
+@item envelope, e
+@table @samp
+@item none
+No envelope, this is default.
+
+@item instant
+Instant envelope, even darkest single pixel will be clearly highlighted.
+
+@item peak
+Hold maximum and minimum values presented in graph over time. This way you
+can still spot out of range values without constantly looking at vectorscope.
+
+@item peak+instant
+Peak and instant envelope combined together.
+@end table
+@end table
+
 @anchor{vidstabdetect}
 @section vidstabdetect
 
@@ -9952,6 +12639,25 @@ vignette='PI/4+random(1)*PI/50':eval=frame
 
 @end itemize
 
+@section vstack
+Stack input videos vertically.
+
+All streams must be of same pixel format and of same width.
+
+Note that this filter is faster than using @ref{overlay} and @ref{pad} filter
+to create same output.
+
+The filter accept the following option:
+
+@table @option
+@item inputs
+Set number of input streams. Default is 2.
+
+@item shortest
+If set to 1, force the output to terminate when the shortest input
+terminates. Default value is 0.
+@end table
+
 @section w3fdif
 
 Deinterlace the input video ("w3fdif" stands for "Weston 3 Field
@@ -9991,27 +12697,124 @@ Only deinterlace frames marked as interlaced.
 Default value is @samp{all}.
 @end table
 
-@section xbr
-Apply the xBR high-quality magnification filter which is designed for pixel
-art. It follows a set of edge-detection rules, see
-@url{http://www.libretro.com/forums/viewtopic.php?f=6&t=134}.
+@section waveform
+Video waveform monitor.
 
-It accepts the following option:
+The waveform monitor plots color component intensity. By default luminance
+only. Each column of the waveform corresponds to a column of pixels in the
+source video.
+
+It accepts the following options:
 
 @table @option
-@item n
-Set the scaling dimension: @code{2} for @code{2xBR}, @code{3} for
-@code{3xBR} and @code{4} for @code{4xBR}.
-Default is @code{3}.
-@end table
+@item mode, m
+Can be either @code{row}, or @code{column}. Default is @code{column}.
+In row mode, the graph on the left side represents color component value 0 and
+the right side represents value = 255. In column mode, the top side represents
+color component value = 0 and bottom side represents value = 255.
+
+@item intensity, i
+Set intensity. Smaller values are useful to find out how many values of the same
+luminance are distributed across input rows/columns.
+Default value is @code{0.04}. Allowed range is [0, 1].
+
+@item mirror, r
+Set mirroring mode. @code{0} means unmirrored, @code{1} means mirrored.
+In mirrored mode, higher values will be represented on the left
+side for @code{row} mode and at the top for @code{column} mode. Default is
+@code{1} (mirrored).
 
-@anchor{yadif}
-@section yadif
+@item display, d
+Set display mode.
+It accepts the following values:
+@table @samp
+@item overlay
+Presents information identical to that in the @code{parade}, except
+that the graphs representing color components are superimposed directly
+over one another.
 
-Deinterlace the input video ("yadif" means "yet another deinterlacing
-filter").
+This display mode makes it easier to spot relative differences or similarities
+in overlapping areas of the color components that are supposed to be identical,
+such as neutral whites, grays, or blacks.
 
-It accepts the following parameters:
+@item parade
+Display separate graph for the color components side by side in
+@code{row} mode or one below the other in @code{column} mode.
+
+Using this display mode makes it easy to spot color casts in the highlights
+and shadows of an image, by comparing the contours of the top and the bottom
+graphs of each waveform. Since whites, grays, and blacks are characterized
+by exactly equal amounts of red, green, and blue, neutral areas of the picture
+should display three waveforms of roughly equal width/height. If not, the
+correction is easy to perform by making level adjustments the three waveforms.
+@end table
+Default is @code{parade}.
+
+@item components, c
+Set which color components to display. Default is 1, which means only luminance
+or red color component if input is in RGB colorspace. If is set for example to
+7 it will display all 3 (if) available color components.
+
+@item envelope, e
+@table @samp
+@item none
+No envelope, this is default.
+
+@item instant
+Instant envelope, minimum and maximum values presented in graph will be easily
+visible even with small @code{step} value.
+
+@item peak
+Hold minimum and maximum values presented in graph across time. This way you
+can still spot out of range values without constantly looking at waveforms.
+
+@item peak+instant
+Peak and instant envelope combined together.
+@end table
+
+@item filter, f
+@table @samp
+@item lowpass
+No filtering, this is default.
+
+@item flat
+Luma and chroma combined together.
+
+@item aflat
+Similar as above, but shows difference between blue and red chroma.
+
+@item chroma
+Displays only chroma.
+
+@item achroma
+Similar as above, but shows difference between blue and red chroma.
+
+@item color
+Displays actual color value on waveform.
+@end table
+@end table
+
+@section xbr
+Apply the xBR high-quality magnification filter which is designed for pixel
+art. It follows a set of edge-detection rules, see
+@url{http://www.libretro.com/forums/viewtopic.php?f=6&t=134}.
+
+It accepts the following option:
+
+@table @option
+@item n
+Set the scaling dimension: @code{2} for @code{2xBR}, @code{3} for
+@code{3xBR} and @code{4} for @code{4xBR}.
+Default is @code{3}.
+@end table
+
+@anchor{yadif}
+@section yadif
+
+Deinterlace the input video ("yadif" means "yet another deinterlacing
+filter").
+
+It accepts the following parameters:
 
 
 @table @option
@@ -10084,6 +12887,9 @@ single input image.
 
 @item s
 Set the output image size, default is 'hd720'.
+
+@item fps
+Set the output frame rate, default is '25'.
 @end table
 
 Each expression can contain the following constants:
@@ -10149,8 +12955,242 @@ Zoom-in up to 1.5 and pan at same time to some spot near center of picture:
 @example
 zoompan=z='min(zoom+0.0015,1.5)':d=700:x='if(gte(zoom,1.5),x,x+1/a)':y='if(gte(zoom,1.5),y,y+1)':s=640x360
 @end example
+
+@item
+Zoom-in up to 1.5 and pan always at center of picture:
+@example
+zoompan=z='min(zoom+0.0015,1.5)':d=700:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)'
+@end example
 @end itemize
 
+@section zscale
+Scale (resize) the input video, using the z.lib library:
+https://github.com/sekrit-twc/zimg.
+
+The zscale filter forces the output display aspect ratio to be the same
+as the input, by changing the output sample aspect ratio.
+
+If the input image format is different from the format requested by
+the next filter, the zscale filter will convert the input to the
+requested format.
+
+@subsection Options
+The filter accepts the following options.
+
+@table @option
+@item width, w
+@item height, h
+Set the output video dimension expression. Default value is the input
+dimension.
+
+If the @var{width} or @var{w} is 0, the input width is used for the output.
+If the @var{height} or @var{h} is 0, the input height is used for the output.
+
+If one of the values is -1, the zscale filter will use a value that
+maintains the aspect ratio of the input image, calculated from the
+other specified dimension. If both of them are -1, the input size is
+used
+
+If one of the values is -n with n > 1, the zscale filter will also use a value
+that maintains the aspect ratio of the input image, calculated from the other
+specified dimension. After that it will, however, make sure that the calculated
+dimension is divisible by n and adjust the value if necessary.
+
+See below for the list of accepted constants for use in the dimension
+expression.
+
+@item size, s
+Set the video size. For the syntax of this option, check the
+@ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
+
+@item dither, d
+Set the dither type.
+
+Possible values are:
+@table @var
+@item none
+@item ordered
+@item random
+@item error_diffusion
+@end table
+
+Default is none.
+
+@item filter, f
+Set the resize filter type.
+
+Possible values are:
+@table @var
+@item point
+@item bilinear
+@item bicubic
+@item spline16
+@item spline36
+@item lanczos
+@end table
+
+Default is bilinear.
+
+@item range, r
+Set the color range.
+
+Possible values are:
+@table @var
+@item input
+@item limited
+@item full
+@end table
+
+Default is same as input.
+
+@item primaries, p
+Set the color primaries.
+
+Possible values are:
+@table @var
+@item input
+@item 709
+@item unspecified
+@item 170m
+@item 240m
+@item 2020
+@end table
+
+Default is same as input.
+
+@item transfer, t
+Set the transfer characteristics.
+
+Possible values are:
+@table @var
+@item input
+@item 709
+@item unspecified
+@item 601
+@item linear
+@item 2020_10
+@item 2020_12
+@end table
+
+Default is same as input.
+
+@item matrix, m
+Set the colorspace matrix.
+
+Possible value are:
+@table @var
+@item input
+@item 709
+@item unspecified
+@item 470bg
+@item 170m
+@item 2020_ncl
+@item 2020_cl
+@end table
+
+Default is same as input.
+
+@item rangein, rin
+Set the input color range.
+
+Possible values are:
+@table @var
+@item input
+@item limited
+@item full
+@end table
+
+Default is same as input.
+
+@item primariesin, pin
+Set the input color primaries.
+
+Possible values are:
+@table @var
+@item input
+@item 709
+@item unspecified
+@item 170m
+@item 240m
+@item 2020
+@end table
+
+Default is same as input.
+
+@item transferin, tin
+Set the input transfer characteristics.
+
+Possible values are:
+@table @var
+@item input
+@item 709
+@item unspecified
+@item 601
+@item linear
+@item 2020_10
+@item 2020_12
+@end table
+
+Default is same as input.
+
+@item matrixin, min
+Set the input colorspace matrix.
+
+Possible value are:
+@table @var
+@item input
+@item 709
+@item unspecified
+@item 470bg
+@item 170m
+@item 2020_ncl
+@item 2020_cl
+@end table
+@end table
+
+The values of the @option{w} and @option{h} options are expressions
+containing the following constants:
+
+@table @var
+@item in_w
+@item in_h
+The input width and height
+
+@item iw
+@item ih
+These are the same as @var{in_w} and @var{in_h}.
+
+@item out_w
+@item out_h
+The output (scaled) width and height
+
+@item ow
+@item oh
+These are the same as @var{out_w} and @var{out_h}
+
+@item a
+The same as @var{iw} / @var{ih}
+
+@item sar
+input sample aspect ratio
+
+@item dar
+The input display aspect ratio. Calculated from @code{(iw / ih) * sar}.
+
+@item hsub
+@item vsub
+horizontal and vertical input chroma subsample values. For example for the
+pixel format "yuv422p" @var{hsub} is 2 and @var{vsub} is 1.
+
+@item ohsub
+@item ovsub
+horizontal and vertical output chroma subsample values. For example for the
+pixel format "yuv422p" @var{hsub} is 2 and @var{vsub} is 1.
+@end table
+
+@table @option
+@end table
+
 @c man end VIDEO FILTERS
 
 @chapter Video Sources
@@ -10624,6 +13664,8 @@ ffplay -f lavfi life=s=300x200:mold=10:r=60:ratio=0.1:death_color=#C83232:life_c
 @end example
 @end itemize
 
+@anchor{allrgb}
+@anchor{allyuv}
 @anchor{color}
 @anchor{haldclutsrc}
 @anchor{nullsrc}
@@ -10631,7 +13673,11 @@ ffplay -f lavfi life=s=300x200:mold=10:r=60:ratio=0.1:death_color=#C83232:life_c
 @anchor{smptebars}
 @anchor{smptehdbars}
 @anchor{testsrc}
-@section color, haldclutsrc, nullsrc, rgbtestsrc, smptebars, smptehdbars, testsrc
+@section allrgb, allyuv, color, haldclutsrc, nullsrc, rgbtestsrc, smptebars, smptehdbars, testsrc
+
+The @code{allrgb} source returns frames of size 4096x4096 of all rgb colors.
+
+The @code{allyuv} source returns frames of size 4096x4096 of all yuv colors.
 
 The @code{color} source provides an uniformly colored input.
 
@@ -10770,62 +13816,193 @@ tools.
 
 Below is a description of the currently available multimedia filters.
 
-@section avectorscope
-
-Convert input audio to a video output, representing the audio vector
-scope.
+@section ahistogram
 
-The filter is used to measure the difference between channels of stereo
-audio stream. A monoaural signal, consisting of identical left and right
-signal, results in straight vertical line. Any stereo separation is visible
-as a deviation from this line, creating a Lissajous figure.
-If the straight (or deviation from it) but horizontal line appears this
-indicates that the left and right channels are out of phase.
+Convert input audio to a video output, displaying the volume histogram.
 
 The filter accepts the following options:
 
 @table @option
-@item mode, m
-Set the vectorscope mode.
+@item dmode
+Specify how histogram is calculated.
 
-Available values are:
+It accepts the following values:
 @table @samp
-@item lissajous
-Lissajous rotated by 45 degrees.
-
-@item lissajous_xy
-Same as above but not rotated.
+@item single
+Use single histogram for all channels.
+@item separate
+Use separate histogram for each channel.
 @end table
+Default is @code{single}.
 
-Default value is @samp{lissajous}.
+@item rate, r
+Set frame rate, expressed as number of frames per second. Default
+value is "25".
 
 @item size, s
-Set the video size for the output. For the syntax of this option, check the
+Specify the video size for the output. For the syntax of this option, check the
 @ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
-Default value is @code{400x400}.
-
-@item rate, r
-Set the output frame rate. Default value is @code{25}.
-
-@item rc
-@item gc
-@item bc
-Specify the red, green and blue contrast. Default values are @code{40}, @code{160} and @code{80}.
-Allowed range is @code{[0, 255]}.
+Default value is @code{hd720}.
 
-@item rf
-@item gf
-@item bf
-Specify the red, green and blue fade. Default values are @code{15}, @code{10} and @code{5}.
-Allowed range is @code{[0, 255]}.
+@item scale
+Set display scale.
 
-@item zoom
-Set the zoom factor. Default value is @code{1}. Allowed range is @code{[1, 10]}.
+It accepts the following values:
+@table @samp
+@item log
+logarithmic
+@item sqrt
+square root
+@item cbrt
+cubic root
+@item lin
+linear
+@item rlog
+reverse logarithmic
 @end table
+Default is @code{log}.
 
-@subsection Examples
+@item ascale
+Set amplitude scale.
 
-@itemize
+It accepts the following values:
+@table @samp
+@item log
+logarithmic
+@item lin
+linear
+@end table
+Default is @code{log}.
+
+@item acount
+Set how much frames to accumulate in histogram.
+Defauls is 1. Setting this to -1 accumulates all frames.
+
+@item rheight
+Set histogram ratio of window height.
+
+@item slide
+Set sonogram sliding.
+
+It accepts the following values:
+@table @samp
+@item replace
+replace old rows with new ones.
+@item scroll
+scroll from top to bottom.
+@end table
+Default is @code{replace}.
+@end table
+
+@section aphasemeter
+
+Convert input audio to a video output, displaying the audio phase.
+
+The filter accepts the following options:
+
+@table @option
+@item rate, r
+Set the output frame rate. Default value is @code{25}.
+
+@item size, s
+Set the video size for the output. For the syntax of this option, check the
+@ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
+Default value is @code{800x400}.
+
+@item rc
+@item gc
+@item bc
+Specify the red, green, blue contrast. Default values are @code{2},
+@code{7} and @code{1}.
+Allowed range is @code{[0, 255]}.
+
+@item mpc
+Set color which will be used for drawing median phase. If color is
+@code{none} which is default, no median phase value will be drawn.
+@end table
+
+The filter also exports the frame metadata @code{lavfi.aphasemeter.phase} which
+represents mean phase of current audio frame. Value is in range @code{[-1, 1]}.
+The @code{-1} means left and right channels are completely out of phase and
+@code{1} means channels are in phase.
+
+@section avectorscope
+
+Convert input audio to a video output, representing the audio vector
+scope.
+
+The filter is used to measure the difference between channels of stereo
+audio stream. A monoaural signal, consisting of identical left and right
+signal, results in straight vertical line. Any stereo separation is visible
+as a deviation from this line, creating a Lissajous figure.
+If the straight (or deviation from it) but horizontal line appears this
+indicates that the left and right channels are out of phase.
+
+The filter accepts the following options:
+
+@table @option
+@item mode, m
+Set the vectorscope mode.
+
+Available values are:
+@table @samp
+@item lissajous
+Lissajous rotated by 45 degrees.
+
+@item lissajous_xy
+Same as above but not rotated.
+
+@item polar
+Shape resembling half of circle.
+@end table
+
+Default value is @samp{lissajous}.
+
+@item size, s
+Set the video size for the output. For the syntax of this option, check the
+@ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
+Default value is @code{400x400}.
+
+@item rate, r
+Set the output frame rate. Default value is @code{25}.
+
+@item rc
+@item gc
+@item bc
+@item ac
+Specify the red, green, blue and alpha contrast. Default values are @code{40},
+@code{160}, @code{80} and @code{255}.
+Allowed range is @code{[0, 255]}.
+
+@item rf
+@item gf
+@item bf
+@item af
+Specify the red, green, blue and alpha fade. Default values are @code{15},
+@code{10}, @code{5} and @code{5}.
+Allowed range is @code{[0, 255]}.
+
+@item zoom
+Set the zoom factor. Default value is @code{1}. Allowed range is @code{[1, 10]}.
+
+@item draw
+Set the vectorscope drawing mode.
+
+Available values are:
+@table @samp
+@item dot
+Draw dot for each sample.
+
+@item line
+Draw line between previous and current sample.
+@end table
+
+Default value is @samp{dot}.
+@end table
+
+@subsection Examples
+
+@itemize
 @item
 Complete example using @command{ffplay}:
 @example
@@ -10914,6 +14091,7 @@ do not have exactly the same duration in the first file.
 
 @end itemize
 
+@anchor{ebur128}
 @section ebur128
 
 EBU R128 scanner filter. This filter takes an audio stream as input and outputs
@@ -10994,6 +14172,15 @@ stream for better peak accuracy. It logs a message for true-peak.
 This mode requires a build with @code{libswresample}.
 @end table
 
+@item dualmono
+Treat mono input files as "dual mono". If a mono file is intended for playback
+on a stereo system, its EBU R128 measurement will be perceptually incorrect.
+If set to @code{true}, this option will compensate for this effect.
+Multi-channel input files are not affected by this option.
+
+@item panlaw
+Set a specific pan law to be used for the measurement of dual mono files.
+This parameter is optional, and has a default value of -3.01dB.
 @end table
 
 @subsection Examples
@@ -11099,6 +14286,22 @@ following one, the permission might not be received as expected in that
 following filter. Inserting a @ref{format} or @ref{aformat} filter before the
 perms/aperms filter can avoid this problem.
 
+@section realtime, arealtime
+
+Slow down filtering to match real time approximatively.
+
+These filters will pause the filtering for a variable amount of time to
+match the output rate with the input timestamps.
+They are similar to the @option{re} option to @code{ffmpeg}.
+
+They accept the following options:
+
+@table @option
+@item limit
+Time limit for the pauses. Any pause longer than that will be considered
+a timestamp discontinuity and reset the timer. Default is 2 seconds.
+@end table
+
 @section select, aselect
 
 Select frames to pass in output.
@@ -11207,6 +14410,25 @@ value between 0 and 1 to indicate a new scene; a low value reflects a low
 probability for the current frame to introduce a new scene, while a higher
 value means the current frame is more likely to be one (see the example below)
 
+@item concatdec_select
+The concat demuxer can select only part of a concat input file by setting an
+inpoint and an outpoint, but the output packets may not be entirely contained
+in the selected interval. By using this variable, it is possible to skip frames
+generated by the concat demuxer which are not exactly contained in the selected
+interval.
+
+This works by comparing the frame pts against the @var{lavf.concat.start_time}
+and the @var{lavf.concat.duration} packet metadata values which are also
+present in the decoded frames.
+
+The @var{concatdec_select} variable is -1 if the frame pts is at least
+start_time and either the duration metadata is missing or the frame pts is less
+than start_time + duration, 0 otherwise, and NaN if the start_time metadata is
+missing.
+
+That basically means that an input frame is selected if its pts is within the
+interval set by the concat demuxer.
+
 @end table
 
 The default value of the select expression is "1".
@@ -11281,6 +14503,13 @@ Send even and odd frames to separate outputs, and compose them:
 @example
 select=n=2:e='mod(n, 2)+1' [odd][even]; [odd] pad=h=2*ih [tmp]; [tmp][even] overlay=y=h
 @end example
+
+@item
+Select useful frames from an ffconcat file which is using inpoints and
+outpoints but where the source files are not intra frame only.
+@example
+ffmpeg -copyts -vsync 0 -segment_time_metadata 1 -i input.ffconcat -vf select=concatdec_select -af aselect=concatdec_select output.avi
+@end example
 @end itemize
 
 @section sendcmd, asendcmd
@@ -11601,21 +14830,48 @@ settb=AVTB
 @end itemize
 
 @section showcqt
-Convert input audio to a video output representing
-frequency spectrum logarithmically (using constant Q transform with
-Brown-Puckette algorithm), with musical tone scale, from E0 to D#10 (10 octaves).
+Convert input audio to a video output representing frequency spectrum
+logarithmically using Brown-Puckette constant Q transform algorithm with
+direct frequency domain coefficient calculation (but the transform itself
+is not really constant Q, instead the Q factor is actually variable/clamped),
+with musical tone scale, from E0 to D#10.
 
 The filter accepts the following options:
 
 @table @option
-@item volume
-Specify transform volume (multiplier) expression. The expression can contain
-variables:
+@item size, s
+Specify the video size for the output. It must be even. For the syntax of this option,
+check the @ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
+Default value is @code{1920x1080}.
+
+@item fps, rate, r
+Set the output frame rate. Default value is @code{25}.
+
+@item bar_h
+Set the bargraph height. It must be even. Default value is @code{-1} which
+computes the bargraph height automatically.
+
+@item axis_h
+Set the axis height. It must be even. Default value is @code{-1} which computes
+the axis height automatically.
+
+@item sono_h
+Set the sonogram height. It must be even. Default value is @code{-1} which
+computes the sonogram height automatically.
+
+@item fullhd
+Set the fullhd resolution. This option is deprecated, use @var{size}, @var{s}
+instead. Default value is @code{1}.
+
+@item sono_v, volume
+Specify the sonogram volume expression. It can contain variables:
 @table @option
+@item bar_v
+the @var{bar_v} evaluated expression
 @item frequency, freq, f
-the frequency where transform is evaluated
+the frequency where it is evaluated
 @item timeclamp, tc
-value of timeclamp option
+the value of @var{timeclamp} option
 @end table
 and functions:
 @table @option
@@ -11624,75 +14880,112 @@ A-weighting of equal loudness
 @item b_weighting(f)
 B-weighting of equal loudness
 @item c_weighting(f)
-C-weighting of equal loudness
+C-weighting of equal loudness.
 @end table
 Default value is @code{16}.
 
-@item tlength
-Specify transform length expression. The expression can contain variables:
+@item bar_v, volume2
+Specify the bargraph volume expression. It can contain variables:
 @table @option
+@item sono_v
+the @var{sono_v} evaluated expression
 @item frequency, freq, f
-the frequency where transform is evaluated
+the frequency where it is evaluated
 @item timeclamp, tc
-value of timeclamp option
+the value of @var{timeclamp} option
+@end table
+and functions:
+@table @option
+@item a_weighting(f)
+A-weighting of equal loudness
+@item b_weighting(f)
+B-weighting of equal loudness
+@item c_weighting(f)
+C-weighting of equal loudness.
 @end table
-Default value is @code{384/f*tc/(384/f+tc)}.
+Default value is @code{sono_v}.
 
-@item timeclamp
+@item sono_g, gamma
+Specify the sonogram gamma. Lower gamma makes the spectrum more contrast,
+higher gamma makes the spectrum having more range. Default value is @code{3}.
+Acceptable range is @code{[1, 7]}.
+
+@item bar_g, gamma2
+Specify the bargraph gamma. Default value is @code{1}. Acceptable range is
+@code{[1, 7]}.
+
+@item timeclamp, tc
 Specify the transform timeclamp. At low frequency, there is trade-off between
 accuracy in time domain and frequency domain. If timeclamp is lower,
 event in time domain is represented more accurately (such as fast bass drum),
 otherwise event in frequency domain is represented more accurately
-(such as bass guitar). Acceptable value is [0.1, 1.0]. Default value is @code{0.17}.
+(such as bass guitar). Acceptable range is @code{[0.1, 1]}. Default value is @code{0.17}.
+
+@item basefreq
+Specify the transform base frequency. Default value is @code{20.01523126408007475},
+which is frequency 50 cents below E0. Acceptable range is @code{[10, 100000]}.
+
+@item endfreq
+Specify the transform end frequency. Default value is @code{20495.59681441799654},
+which is frequency 50 cents above D#10. Acceptable range is @code{[10, 100000]}.
 
 @item coeffclamp
-Specify the transform coeffclamp. If coeffclamp is lower, transform is
-more accurate, otherwise transform is faster. Acceptable value is [0.1, 10.0].
-Default value is @code{1.0}.
+This option is deprecated and ignored.
 
-@item gamma
-Specify gamma. Lower gamma makes the spectrum more contrast, higher gamma
-makes the spectrum having more range. Acceptable value is [1.0, 7.0].
-Default value is @code{3.0}.
+@item tlength
+Specify the transform length in time domain. Use this option to control accuracy
+trade-off between time domain and frequency domain at every frequency sample.
+It can contain variables:
+@table @option
+@item frequency, freq, f
+the frequency where it is evaluated
+@item timeclamp, tc
+the value of @var{timeclamp} option.
+@end table
+Default value is @code{384*tc/(384+tc*f)}.
+
+@item count
+Specify the transform count for every video frame. Default value is @code{6}.
+Acceptable range is @code{[1, 30]}.
 
-@item gamma2
-Specify gamma of bargraph. Acceptable value is [1.0, 7.0].
-Default value is @code{1.0}.
+@item fcount
+Specify the transform count for every single pixel. Default value is @code{0},
+which makes it computed automatically. Acceptable range is @code{[0, 10]}.
 
 @item fontfile
-Specify font file for use with freetype. If not specified, use embedded font.
+Specify font file for use with freetype to draw the axis. If not specified,
+use embedded font. Note that drawing with font file or embedded font is not
+implemented with custom @var{basefreq} and @var{endfreq}, use @var{axisfile}
+option instead.
 
 @item fontcolor
 Specify font color expression. This is arithmetic expression that should return
-integer value 0xRRGGBB. The expression can contain variables:
+integer value 0xRRGGBB. It can contain variables:
 @table @option
 @item frequency, freq, f
-the frequency where transform is evaluated
+the frequency where it is evaluated
 @item timeclamp, tc
-value of timeclamp option
+the value of @var{timeclamp} option
 @end table
 and functions:
 @table @option
 @item midi(f)
 midi number of frequency f, some midi numbers: E0(16), C1(24), C2(36), A4(69)
 @item r(x), g(x), b(x)
-red, green, and blue value of intensity x
+red, green, and blue value of intensity x.
 @end table
 Default value is @code{st(0, (midi(f)-59.5)/12);
 st(1, if(between(ld(0),0,1), 0.5-0.5*cos(2*PI*ld(0)), 0));
-r(1-ld(1)) + b(ld(1))}
-
-@item fullhd
-If set to 1 (the default), the video size is 1920x1080 (full HD),
-if set to 0, the video size is 960x540. Use this option to make CPU usage lower.
+r(1-ld(1)) + b(ld(1))}.
 
-@item fps
-Specify video fps. Default value is @code{25}.
+@item axisfile
+Specify image file to draw the axis. This option override @var{fontfile} and
+@var{fontcolor} option.
 
-@item count
-Specify number of transform per frame, so there are fps*count transforms
-per second. Note that audio data rate must be divisible by fps*count.
-Default value is @code{6}.
+@item axis, text
+Enable/disable drawing text to the axis. If it is set to @code{0}, drawing to
+the axis is disabled, ignoring @var{fontfile} and @var{axisfile} option.
+Default value is @code{1}.
 
 @end table
 
@@ -11712,9 +15005,15 @@ ffplay -f lavfi 'amovie=a.mp3, asplit [a][out1]; [a] showcqt=fps=30:count=5 [out
 @end example
 
 @item
-Playing at 960x540 and lower CPU usage:
+Playing at 1280x720:
+@example
+ffplay -f lavfi 'amovie=a.mp3, asplit [a][out1]; [a] showcqt=s=1280x720:count=4 [out0]'
+@end example
+
+@item
+Disable sonogram display:
 @example
-ffplay -f lavfi 'amovie=a.mp3, asplit [a][out1]; [a] showcqt=fullhd=0:count=3 [out0]'
+sono_h=0
 @end example
 
 @item
@@ -11725,38 +15024,173 @@ ffplay -f lavfi 'aevalsrc=0.1*sin(2*PI*55*t)+0.1*sin(4*PI*55*t)+0.1*sin(6*PI*55*
 @end example
 
 @item
-Same as above, but with more accuracy in frequency domain (and slower):
+Same as above, but with more accuracy in frequency domain:
 @example
 ffplay -f lavfi 'aevalsrc=0.1*sin(2*PI*55*t)+0.1*sin(4*PI*55*t)+0.1*sin(6*PI*55*t)+0.1*sin(8*PI*55*t),
                  asplit[a][out1]; [a] showcqt=timeclamp=0.5 [out0]'
 @end example
 
 @item
-B-weighting of equal loudness
+Custom volume:
 @example
-volume=16*b_weighting(f)
+bar_v=10:sono_v=bar_v*a_weighting(f)
 @end example
 
 @item
-Lower Q factor
+Custom gamma, now spectrum is linear to the amplitude.
 @example
-tlength=100/f*tc/(100/f+tc)
+bar_g=2:sono_g=2
 @end example
 
 @item
-Custom fontcolor, C-note is colored green, others are colored blue
+Custom tlength equation:
 @example
-fontcolor='if(mod(floor(midi(f)+0.5),12), 0x0000FF, g(1))'
+tc=0.33:tlength='st(0,0.17); 384*tc / (384 / ld(0) + tc*f /(1-ld(0))) + 384*tc / (tc*f / ld(0) + 384 /(1-ld(0)))'
 @end example
 
 @item
-Custom gamma, now spectrum is linear to the amplitude.
+Custom fontcolor and fontfile, C-note is colored green, others are colored blue:
 @example
-gamma=2:gamma2=2
+fontcolor='if(mod(floor(midi(f)+0.5),12), 0x0000FF, g(1))':fontfile=myfont.ttf
 @end example
 
+@item
+Custom frequency range with custom axis using image file:
+@example
+axisfile=myaxis.png:basefreq=40:endfreq=10000
+@end example
 @end itemize
 
+@section showfreqs
+
+Convert input audio to video output representing the audio power spectrum.
+Audio amplitude is on Y-axis while frequency is on X-axis.
+
+The filter accepts the following options:
+
+@table @option
+@item size, s
+Specify size of video. For the syntax of this option, check the
+@ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
+Default is @code{1024x512}.
+
+@item mode
+Set display mode.
+This set how each frequency bin will be represented.
+
+It accepts the following values:
+@table @samp
+@item line
+@item bar
+@item dot
+@end table
+Default is @code{bar}.
+
+@item ascale
+Set amplitude scale.
+
+It accepts the following values:
+@table @samp
+@item lin
+Linear scale.
+
+@item sqrt
+Square root scale.
+
+@item cbrt
+Cubic root scale.
+
+@item log
+Logarithmic scale.
+@end table
+Default is @code{log}.
+
+@item fscale
+Set frequency scale.
+
+It accepts the following values:
+@table @samp
+@item lin
+Linear scale.
+
+@item log
+Logarithmic scale.
+
+@item rlog
+Reverse logarithmic scale.
+@end table
+Default is @code{lin}.
+
+@item win_size
+Set window size.
+
+It accepts the following values:
+@table @samp
+@item w16
+@item w32
+@item w64
+@item w128
+@item w256
+@item w512
+@item w1024
+@item w2048
+@item w4096
+@item w8192
+@item w16384
+@item w32768
+@item w65536
+@end table
+Default is @code{w2048}
+
+@item win_func
+Set windowing function.
+
+It accepts the following values:
+@table @samp
+@item rect
+@item bartlett
+@item hanning
+@item hamming
+@item blackman
+@item welch
+@item flattop
+@item bharris
+@item bnuttall
+@item bhann
+@item sine
+@item nuttall
+@item lanczos
+@item gauss
+@item tukey
+@end table
+Default is @code{hanning}.
+
+@item overlap
+Set window overlap. In range @code{[0, 1]}. Default is @code{1},
+which means optimal overlap for selected window function will be picked.
+
+@item averaging
+Set time averaging. Setting this to 0 will display current maximal peaks.
+Default is @code{1}, which means time averaging is disabled.
+
+@item colors
+Specify list of colors separated by space or by '|' which will be used to
+draw channel frequencies. Unrecognized or missing colors will be replaced
+by white color.
+
+@item cmode
+Set channel display mode.
+
+It accepts the following values:
+@table @samp
+@item combined
+@item separate
+@end table
+Default is @code{combined}.
+
+@end table
+
+@anchor{showspectrum}
 @section showspectrum
 
 Convert input audio to a video output, representing the audio frequency
@@ -11779,6 +15213,8 @@ It accepts the following values:
 the samples start again on the left when they reach the right
 @item scroll
 the samples scroll from right to left
+@item rscroll
+the samples scroll from left to right
 @item fullframe
 frames are only produced when the samples reach the right
 @end table
@@ -11806,7 +15242,21 @@ It accepts the following values:
 @item channel
 each channel is displayed in a separate color
 @item intensity
-each channel is is displayed using the same color scheme
+each channel is displayed using the same color scheme
+@item rainbow
+each channel is displayed using the rainbow color scheme
+@item moreland
+each channel is displayed using the moreland color scheme
+@item nebulae
+each channel is displayed using the nebulae color scheme
+@item fire
+each channel is displayed using the fire color scheme
+@item fiery
+each channel is displayed using the fiery color scheme
+@item fruit
+each channel is displayed using the fruit color scheme
+@item cool
+each channel is displayed using the cool color scheme
 @end table
 
 Default value is @samp{channel}.
@@ -11822,6 +15272,10 @@ linear
 square root, default
 @item cbrt
 cubic root
+@item 4thrt
+4th root
+@item 5thrt
+5th root
 @item log
 logarithmic
 @end table
@@ -11839,17 +15293,41 @@ Set window function.
 
 It accepts the following values:
 @table @samp
-@item none
-No samples pre-processing (do not expect this to be faster)
+@item rect
+@item bartlett
 @item hann
-Hann window
+@item hanning
 @item hamming
-Hamming window
 @item blackman
-Blackman window
+@item welch
+@item flattop
+@item bharris
+@item bnuttall
+@item bhann
+@item sine
+@item nuttall
+@item lanczos
+@item gauss
+@item tukey
 @end table
 
 Default value is @code{hann}.
+
+@item orientation
+Set orientation of time vs frequency axis. Can be @code{vertical} or
+@code{horizontal}. Default is @code{vertical}.
+
+@item overlap
+Set ratio of overlap window. Default value is @code{0}.
+When value is @code{1} overlap is set to recommended size for specific
+window function currently used.
+
+@item gain
+Set scale gain for calculating intensity color values.
+Default value is @code{1}.
+
+@item data
+Set which data to display. Can be @code{magnitude}, default or @code{phase}.
 @end table
 
 The usage is very similar to the showwaves filter; see the examples in that
@@ -11872,6 +15350,172 @@ ffplay -f lavfi 'amovie=input.mp3, asplit [a][out1];
 @end example
 @end itemize
 
+@section showspectrumpic
+
+Convert input audio to a single video frame, representing the audio frequency
+spectrum.
+
+The filter accepts the following options:
+
+@table @option
+@item size, s
+Specify the video size for the output. For the syntax of this option, check the
+@ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
+Default value is @code{4096x2048}.
+
+@item mode
+Specify display mode.
+
+It accepts the following values:
+@table @samp
+@item combined
+all channels are displayed in the same row
+@item separate
+all channels are displayed in separate rows
+@end table
+Default value is @samp{combined}.
+
+@item color
+Specify display color mode.
+
+It accepts the following values:
+@table @samp
+@item channel
+each channel is displayed in a separate color
+@item intensity
+each channel is displayed using the same color scheme
+@item rainbow
+each channel is displayed using the rainbow color scheme
+@item moreland
+each channel is displayed using the moreland color scheme
+@item nebulae
+each channel is displayed using the nebulae color scheme
+@item fire
+each channel is displayed using the fire color scheme
+@item fiery
+each channel is displayed using the fiery color scheme
+@item fruit
+each channel is displayed using the fruit color scheme
+@item cool
+each channel is displayed using the cool color scheme
+@end table
+Default value is @samp{intensity}.
+
+@item scale
+Specify scale used for calculating intensity color values.
+
+It accepts the following values:
+@table @samp
+@item lin
+linear
+@item sqrt
+square root, default
+@item cbrt
+cubic root
+@item 4thrt
+4th root
+@item 5thrt
+5th root
+@item log
+logarithmic
+@end table
+Default value is @samp{log}.
+
+@item saturation
+Set saturation modifier for displayed colors. Negative values provide
+alternative color scheme. @code{0} is no saturation at all.
+Saturation must be in [-10.0, 10.0] range.
+Default value is @code{1}.
+
+@item win_func
+Set window function.
+
+It accepts the following values:
+@table @samp
+@item rect
+@item bartlett
+@item hann
+@item hanning
+@item hamming
+@item blackman
+@item welch
+@item flattop
+@item bharris
+@item bnuttall
+@item bhann
+@item sine
+@item nuttall
+@item lanczos
+@item gauss
+@item tukey
+@end table
+Default value is @code{hann}.
+
+@item orientation
+Set orientation of time vs frequency axis. Can be @code{vertical} or
+@code{horizontal}. Default is @code{vertical}.
+
+@item gain
+Set scale gain for calculating intensity color values.
+Default value is @code{1}.
+
+@item legend
+Draw time and frequency axes and legends. Default is enabled.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+Extract an audio spectrogram of a whole audio track
+in a 1024x1024 picture using @command{ffmpeg}:
+@example
+ffmpeg -i audio.flac -lavfi showspectrumpic=s=1024x1024 spectrogram.png
+@end example
+@end itemize
+
+@section showvolume
+
+Convert input audio volume to a video output.
+
+The filter accepts the following options:
+
+@table @option
+@item rate, r
+Set video rate.
+
+@item b
+Set border width, allowed range is [0, 5]. Default is 1.
+
+@item w
+Set channel width, allowed range is [80, 1080]. Default is 400.
+
+@item h
+Set channel height, allowed range is [1, 100]. Default is 20.
+
+@item f
+Set fade, allowed range is [0.001, 1]. Default is 0.95.
+
+@item c
+Set volume color expression.
+
+The expression can use the following variables:
+
+@table @option
+@item VOLUME
+Current max volume of channel in dB.
+
+@item CHANNEL
+Current channel number, starting from 0.
+@end table
+
+@item t
+If set, displays channel names. Default is enabled.
+
+@item v
+If set, displays volume values. Default is enabled.
+@end table
+
 @section showwaves
 
 Convert input audio to a video output, representing the samples waves.
@@ -11917,6 +15561,13 @@ option @var{n}. Default value is "25".
 @item split_channels
 Set if channels should be drawn separately or overlap. Default value is 0.
 
+@item colors
+Set colors separated by '|' which are going to be used for drawing of each channel.
+
+@item scale
+Set amplitude scale. Can be linear @code{lin} or logarithmic @code{log}.
+Default is linear.
+
 @end table
 
 @subsection Examples
@@ -11951,6 +15602,13 @@ Default value is @code{600x240}.
 
 @item split_channels
 Set if channels should be drawn separately or overlap. Default value is 0.
+
+@item colors
+Set colors separated by '|' which are going to be used for drawing of each channel.
+
+@item scale
+Set amplitude scale. Can be linear @code{lin} or logarithmic @code{log}.
+Default is linear.
 @end table
 
 @subsection Examples
@@ -11962,6 +15620,76 @@ in a 1024x800 picture using @command{ffmpeg}:
 @example
 ffmpeg -i audio.flac -lavfi showwavespic=split_channels=1:s=1024x800 waveform.png
 @end example
+
+@item
+Colorize the waveform with colorchannelmixer. This example will make
+the waveform a green color approximately RGB(66,217,150). Additional
+channels will be shades of this color.
+@example
+ffmpeg -i audio.mp3 -filter_complex "showwavespic,colorchannelmixer=rr=66/255:gg=217/255:bb=150/255" waveform.png
+@end example
+@end itemize
+
+@section spectrumsynth
+
+Sythesize audio from 2 input video spectrums, first input stream represents
+magnitude across time and second represents phase across time.
+The filter will transform from frequency domain as displayed in videos back
+to time domain as presented in audio output.
+
+This filter is primarly created for reversing processed @ref{showspectrum}
+filter outputs, but can synthesize sound from other spectrograms too.
+But in such case results are going to be poor if the phase data is not
+available, because in such cases phase data need to be recreated, usually
+its just recreated from random noise.
+For best results use gray only output (@code{channel} color mode in
+@ref{showspectrum} filter) and @code{log} scale for magnitude video and
+@code{lin} scale for phase video. To produce phase, for 2nd video, use
+@code{data} option. Inputs videos should generally use @code{fullframe}
+slide mode as that saves resources needed for decoding video.
+
+The filter accepts the following options:
+
+@table @option
+@item sample_rate
+Specify sample rate of output audio, the sample rate of audio from which
+spectrum was generated may differ.
+
+@item channels
+Set number of channels represented in input video spectrums.
+
+@item scale
+Set scale which was used when generating magnitude input spectrum.
+Can be @code{lin} or @code{log}. Default is @code{log}.
+
+@item slide
+Set slide which was used when generating inputs spectrums.
+Can be @code{replace}, @code{scroll}, @code{fullframe} or @code{rscroll}.
+Default is @code{fullframe}.
+
+@item win_func
+Set window function used for resynthesis.
+
+@item overlap
+Set window overlap. In range @code{[0, 1]}. Default is @code{1},
+which means optimal overlap for selected window function will be picked.
+
+@item orientation
+Set orientation of input videos. Can be @code{vertical} or @code{horizontal}.
+Default is @code{vertical}.
+@end table
+
+@subsection Examples
+
+@itemize
+@item
+First create magnitude and phase videos from audio, assuming audio is stereo with 44100 sample rate,
+then resynthesize videos back to audio with spectrumsynth:
+@example
+ffmpeg -i input.flac -lavfi showspectrum=mode=separate:scale=log:overlap=0.875:color=channel:slide=fullframe:data=magnitude -an -c:v rawvideo magnitude.nut
+ffmpeg -i input.flac -lavfi showspectrum=mode=separate:scale=lin:overlap=0.875:color=channel:slide=fullframe:data=phase -an -c:v rawvideo phase.nut
+ffmpeg -i magnitude.nut -i phase.nut -lavfi spectrumsynth=channels=2:sample_rate=44100:win_func=hann:overlap=0.875:slide=fullframe output.flac
+@end example
 @end itemize
 
 @section split, asplit
diff --git a/doc/general.texi b/doc/general.texi
index ba79503d..59ea4f44 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -53,14 +53,6 @@ instructions for installing the libraries.
 Then pass @code{--enable-libopencore-amrnb} and/or
 @code{--enable-libopencore-amrwb} to configure to enable them.
 
-@subsection VisualOn AAC encoder library
-
-FFmpeg can make use of the VisualOn AACenc library for AAC encoding.
-
-Go to @url{http://sourceforge.net/projects/opencore-amr/} and follow the
-instructions for installing the library.
-Then pass @code{--enable-libvo-aacenc} to configure to enable it.
-
 @subsection VisualOn AMR-WB encoder library
 
 FFmpeg can make use of the VisualOn AMR-WBenc library for AMR-WB encoding.
@@ -145,6 +137,14 @@ x265 is under the GNU Public License Version 2 or later
 details), you must upgrade FFmpeg's license to GPL in order to use it.
 @end float
 
+@section kvazaar
+
+FFmpeg can make use of the kvazaar library for HEVC encoding.
+
+Go to @url{https://github.com/ultravideo/kvazaar} and follow the
+instructions for installing the library. Then pass
+@code{--enable-libkvazaar} to configure to enable it.
+
 @section libilbc
 
 iLBC is a narrowband speech codec that has been made freely available
@@ -165,12 +165,6 @@ Go to @url{http://sourceforge.net/projects/zapping/} and follow the instructions
 installing the library. Then pass @code{--enable-libzvbi} to configure to
 enable it.
 
-@float NOTE
-libzvbi is licensed under the GNU General Public License Version 2 or later
-(see @url{http://www.gnu.org/licenses/old-licenses/gpl-2.0.html} for details),
-you must upgrade FFmpeg's license to GPL in order to use it.
-@end float
-
 @section AviSynth
 
 FFmpeg can read AviSynth scripts as input. To enable support, pass
@@ -192,6 +186,17 @@ end user having AviSynth or AvxSynth installed - they'll only need to be
 installed to use AviSynth scripts (obviously).
 @end float
 
+@section Intel QuickSync Video
+
+FFmpeg can use Intel QuickSync Video (QSV) for accelerated encoding and decoding
+of multiple codecs. To use QSV, FFmpeg must be linked against the @code{libmfx}
+dispatcher, which loads the actual decoding libraries.
+
+The dispatcher is open source and can be downloaded from
+@url{https://github.com/lu-zero/mfx_dispatch.git}. FFmpeg needs to be configured
+with the @code{--enable-libmfx} option and @code{pkg-config} needs to be able to
+locate the dispatcher's @code{.pc} files.
+
 
 @chapter Supported File Formats, Codecs or Features
 
@@ -204,9 +209,14 @@ library:
 
 @multitable @columnfractions .4 .1 .1 .4
 @item Name @tab Encoding @tab Decoding @tab Comments
+@item 3dostr                    @tab   @tab X
 @item 4xm                       @tab   @tab X
     @tab 4X Technologies format, used in some games.
 @item 8088flex TMV              @tab   @tab X
+@item AAX                       @tab   @tab X
+    @tab Audible Enhanced Audio format, used in audiobooks.
+@item AA                        @tab   @tab X
+    @tab Audible Format 2, 3, and 4, used in audiobooks.
 @item ACT Voice                 @tab   @tab X
     @tab contains G.729 audio
 @item Adobe Filmstrip           @tab X @tab X
@@ -218,10 +228,14 @@ library:
     @tab Multimedia format used in game Heart Of Darkness.
 @item Apple HTTP Live Streaming @tab   @tab X
 @item Artworx Data Format       @tab   @tab X
+@item Interplay ACM             @tab   @tab X
+    @tab Audio only format used in some Interplay games.
 @item ADP                       @tab   @tab X
     @tab Audio format used on the Nintendo Gamecube.
 @item AFC                       @tab   @tab X
     @tab Audio format used on the Nintendo Gamecube.
+@item ADS/SS2                   @tab   @tab X
+    @tab Audio format used on the PS2.
 @item APNG                      @tab X @tab X
 @item ASF                       @tab X @tab X
 @item AST                       @tab X @tab X
@@ -243,6 +257,8 @@ library:
     @tab Used in Z and Z95 games.
 @item Brute Force & Ignorance   @tab   @tab X
     @tab Used in the game Flash Traffic: City of Angels.
+@item BFSTM                     @tab   @tab X
+    @tab Audio format used on the Nintendo WiiU (based on BRSTM).
 @item BRSTM                     @tab   @tab X
     @tab Audio format used on the Nintendo Wii.
 @item BWF                       @tab X @tab X
@@ -260,6 +276,7 @@ library:
 @item CD+G                      @tab   @tab X
     @tab Video format used by CD+G karaoke disks
 @item Phantom Cine              @tab   @tab X
+@item Cineform HD               @tab   @tab X
 @item Commodore CDXL            @tab   @tab X
     @tab Amiga CD video format
 @item Core Audio Format         @tab X @tab X
@@ -271,8 +288,10 @@ library:
     @tab Audio format used in some games by CRYO Interactive Entertainment.
 @item D-Cinema audio            @tab X @tab X
 @item Deluxe Paint Animation    @tab   @tab X
+@item DCSTR                     @tab   @tab X
 @item DFA                       @tab   @tab X
     @tab This format is used in Chronomaster game
+@item DirectDraw Surface        @tab   @tab X
 @item DSD Stream File (DSF)     @tab   @tab X
 @item DV video                  @tab X @tab X
 @item DXA                       @tab   @tab X
@@ -296,6 +315,8 @@ library:
 @item G.723.1                   @tab X @tab X
 @item G.729 BIT                 @tab X @tab X
 @item G.729 raw                 @tab   @tab X
+@item GENH                      @tab   @tab X
+    @tab Audio format for various games.
 @item GIF Animation             @tab X @tab X
 @item GXF                       @tab X @tab X
     @tab General eXchange Format SMPTE 360M, used by Thomson Grass Valley
@@ -318,6 +339,7 @@ library:
     @tab A format generated by IndigoVision 8000 video server.
 @item IVF (On2)                 @tab X @tab X
     @tab A format used by libvpx
+@item Internet Video Recording  @tab   @tab X
 @item IRCAM                     @tab X @tab X
 @item LATM                      @tab X @tab X
 @item LMLM4                     @tab   @tab X
@@ -354,6 +376,8 @@ library:
     @tab also known as DVB Transport Stream
 @item MPEG-4                    @tab X @tab X
     @tab MPEG-4 is a variant of QuickTime.
+@item MSF                       @tab   @tab X
+    @tab Audio format used on the PS3.
 @item Mirillis FIC video        @tab   @tab X
     @tab No cursor rendering.
 @item MIME multipart JPEG       @tab X @tab
@@ -437,6 +461,7 @@ library:
 @item Redirector                @tab   @tab X
 @item RedSpark                  @tab   @tab X
 @item Renderware TeXture Dictionary @tab   @tab X
+@item Resolume DXV              @tab   @tab X
 @item RL2                       @tab   @tab X
     @tab Audio and video format used in some games by Entertainment Software Partners.
 @item RPL/ARMovie               @tab   @tab X
@@ -469,6 +494,8 @@ library:
 @item SoX native format         @tab X @tab X
 @item SUN AU format             @tab X @tab X
 @item SUP raw PGS subtitles     @tab   @tab X
+@item SVAG                      @tab   @tab X
+    @tab Audio format used in Konami PS2 games.
 @item TDSC                      @tab   @tab X
 @item Text files                @tab   @tab X
 @item THP                       @tab   @tab X
@@ -476,8 +503,13 @@ library:
 @item Tiertex Limited SEQ       @tab   @tab X
     @tab Tiertex .seq files used in the DOS CD-ROM version of the game Flashback.
 @item True Audio                @tab   @tab X
+@item VAG                       @tab   @tab X
+    @tab Audio format used in many Sony PS2 games.
 @item VC-1 test bitstream       @tab X @tab X
+@item Vidvox Hap                @tab X @tab X
 @item Vivo                      @tab   @tab X
+@item VPK                       @tab   @tab X
+    @tab Audio format used in Sony PS games.
 @item WAV                       @tab X @tab X
 @item WavPack                   @tab X @tab X
 @item WebM                      @tab X @tab X
@@ -488,8 +520,11 @@ library:
     @tab Multimedia format used in Westwood Studios games.
 @item Westwood Studios VQA      @tab   @tab X
     @tab Multimedia format used in Westwood Studios games.
+@item WVE                       @tab   @tab X
 @item XMV                       @tab   @tab X
     @tab Microsoft video container used in Xbox games.
+@item XVAG                      @tab   @tab X
+    @tab Audio format used on the PS3.
 @item xWMA                      @tab   @tab X
     @tab Microsoft audio container used by XAudio 2.
 @item eXtended BINary text (XBIN) @tab @tab X
@@ -663,6 +698,8 @@ following image formats are supported:
     @tab Sorenson H.263 used in Flash
 @item Forward Uncompressed   @tab     @tab  X
 @item Fraps                  @tab     @tab  X
+@item Go2Meeting             @tab     @tab  X
+    @tab fourcc: G2M2, G2M3
 @item Go2Webinar             @tab     @tab  X
     @tab fourcc: G2M4
 @item H.261                  @tab  X  @tab  X
@@ -671,7 +708,7 @@ following image formats are supported:
 @item H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10  @tab  E  @tab  X
     @tab encoding supported through external library libx264 and OpenH264
 @item HEVC                   @tab  X  @tab  X
-    @tab encoding supported through the external library libx265
+    @tab encoding supported through external library libx265 and libkvazaar
 @item HNM version 4          @tab     @tab  X
 @item HuffYUV                @tab  X  @tab  X
 @item HuffYUV FFmpeg variant @tab  X  @tab  X
@@ -770,6 +807,7 @@ following image formats are supported:
     @tab Texture dictionaries used by the Renderware Engine.
 @item RL2 video              @tab     @tab  X
     @tab used in some games by Entertainment Software Partners
+@item Screenpresso           @tab     @tab  X
 @item Sierra VMD video       @tab     @tab  X
     @tab Used in Sierra VMD files.
 @item Silicon Graphics Motion Video Compressor 1 (MVC1)  @tab     @tab  X
@@ -836,12 +874,13 @@ following image formats are supported:
 @item Name @tab Encoding @tab Decoding @tab Comments
 @item 8SVX exponential       @tab     @tab  X
 @item 8SVX fibonacci         @tab     @tab  X
-@item AAC+                   @tab  E  @tab  X
-    @tab encoding supported through external library libaacplus
-@item AAC                    @tab  E  @tab  X
-    @tab encoding supported through external library libfaac and libvo-aacenc
+@item AAC                    @tab EX  @tab  X
+    @tab encoding supported through internal encoder and external libraries libfaac and libfdk-aac
+@item AAC+                   @tab  E  @tab  IX
+    @tab encoding supported through external library libfdk-aac
 @item AC-3                   @tab IX  @tab  IX
 @item ADPCM 4X Movie         @tab     @tab  X
+@item APDCM Yamaha AICA      @tab     @tab  X
 @item ADPCM CDROM XA         @tab     @tab  X
 @item ADPCM Creative Technology @tab     @tab  X
     @tab 16 -> 4, 8 -> 4, 8 -> 3, 8 -> 2
@@ -876,7 +915,8 @@ following image formats are supported:
 @item ADPCM MS IMA           @tab  X  @tab  X
 @item ADPCM Nintendo Gamecube AFC  @tab     @tab  X
 @item ADPCM Nintendo Gamecube DTK  @tab     @tab  X
-@item ADPCM Nintendo Gamecube THP  @tab     @tab  X
+@item ADPCM Nintendo THP  @tab     @tab  X
+@item APDCM Playstation      @tab     @tab  X
 @item ADPCM QT IMA           @tab  X  @tab  X
 @item ADPCM SEGA CRI ADX     @tab  X  @tab  X
     @tab Used in Sega Dreamcast games.
@@ -884,7 +924,7 @@ following image formats are supported:
 @item ADPCM Sound Blaster Pro 2-bit  @tab     @tab  X
 @item ADPCM Sound Blaster Pro 2.6-bit  @tab     @tab  X
 @item ADPCM Sound Blaster Pro 4-bit  @tab     @tab  X
-@item ADPCM VIMA
+@item ADPCM VIMA             @tab     @tab  X
     @tab Used in LucasArts SMUSH animations.
 @item ADPCM Westwood Studios IMA @tab     @tab  X
     @tab Used in Westwood Studios games like Command and Conquer.
@@ -915,6 +955,8 @@ following image formats are supported:
     @tab Used in Quake III, Jedi Knight 2 and other computer games.
 @item DPCM Interplay         @tab     @tab  X
     @tab Used in various Interplay computer games.
+@item DPCM Squareroot-Delta-Exact  @tab  @tab  X
+    @tab Used in various games.
 @item DPCM Sierra Online     @tab     @tab  X
     @tab Used in Sierra Online game audio files.
 @item DPCM Sol               @tab     @tab  X
@@ -929,7 +971,7 @@ following image formats are supported:
 @item Enhanced AC-3          @tab  X  @tab  X
 @item EVRC (Enhanced Variable Rate Codec) @tab     @tab  X
 @item FLAC (Free Lossless Audio Codec)  @tab  X  @tab  IX
-@item G.723.1                @tab X @tab X
+@item G.723.1                @tab X   @tab  X
 @item G.729                  @tab     @tab  X
 @item GSM                    @tab  E  @tab  X
     @tab encoding supported through external library libgsm
@@ -939,6 +981,7 @@ following image formats are supported:
 @item iLBC (Internet Low Bitrate Codec) @tab  E  @tab  E
     @tab encoding and decoding supported through external library libilbc
 @item IMC (Intel Music Coder)  @tab     @tab  X
+@item Interplay ACM            @tab     @tab  X
 @item MACE (Macintosh Audio Compression/Expansion) 3:1  @tab     @tab  X
 @item MACE (Macintosh Audio Compression/Expansion) 6:1  @tab     @tab  X
 @item MLP (Meridian Lossless Packing)  @tab     @tab  X
@@ -954,8 +997,8 @@ following image formats are supported:
 @item Musepack SV8           @tab     @tab  X
 @item Nellymoser Asao        @tab  X  @tab  X
 @item On2 AVC (Audio for Video Codec) @tab     @tab  X
-@item Opus                   @tab  E  @tab  E
-    @tab supported through external library libopus
+@item Opus                   @tab  E  @tab  X
+    @tab encoding supported through external library libopus
 @item PCM A-law              @tab  X  @tab  X
 @item PCM mu-law             @tab  X  @tab  X
 @item PCM signed 8-bit planar  @tab  X  @tab  X
@@ -1023,6 +1066,8 @@ following image formats are supported:
 @item Windows Media Audio Lossless @tab  @tab  X
 @item Windows Media Audio Pro @tab    @tab  X
 @item Windows Media Audio Voice @tab  @tab  X
+@item Xbox Media Audio 1     @tab     @tab  X
+@item Xbox Media Audio 2     @tab     @tab  X
 @end multitable
 
 @code{X} means that encoding (resp. decoding) is supported.
diff --git a/doc/git-howto.texi b/doc/git-howto.texi
index b7b5d434..e5e3c817 100644
--- a/doc/git-howto.texi
+++ b/doc/git-howto.texi
@@ -1,10 +1,10 @@
 \input texinfo @c -*- texinfo -*-
 @documentencoding UTF-8
 
-@settitle Using git to develop FFmpeg
+@settitle Using Git to develop FFmpeg
 
 @titlepage
-@center @titlefont{Using git to develop FFmpeg}
+@center @titlefont{Using Git to develop FFmpeg}
 @end titlepage
 
 @top
@@ -13,9 +13,9 @@
 
 @chapter Introduction
 
-This document aims in giving some quick references on a set of useful git
+This document aims in giving some quick references on a set of useful Git
 commands. You should always use the extensive and detailed documentation
-provided directly by git:
+provided directly by Git:
 
 @example
 git --help
@@ -32,22 +32,21 @@ man git-<command>
 shows information about the subcommand <command>.
 
 Additional information could be found on the
-@url{http://gitref.org, Git Reference} website
+@url{http://gitref.org, Git Reference} website.
 
 For more information about the Git project, visit the
-
-@url{http://git-scm.com/, Git website}
+@url{http://git-scm.com/, Git website}.
 
 Consult these resources whenever you have problems, they are quite exhaustive.
 
 What follows now is a basic introduction to Git and some FFmpeg-specific
-guidelines to ease the contribution to the project
+guidelines to ease the contribution to the project.
 
 @chapter Basics Usage
 
-@section Get GIT
+@section Get Git
 
-You can get git from @url{http://git-scm.com/}
+You can get Git from @url{http://git-scm.com/}
 Most distribution and operating system provide a package for it.
 
 
@@ -66,6 +65,21 @@ git clone git@@source.ffmpeg.org:ffmpeg <target>
 This will put the FFmpeg sources into the directory @var{<target>} and let
 you push back your changes to the remote repository.
 
+@example
+git clone gil@@ffmpeg.org:ffmpeg-web <target>
+@end example
+
+This will put the source of the FFmpeg website into the directory
+@var{<target>} and let you push back your changes to the remote repository.
+(Note that @var{gil} stands for GItoLite and is not a typo of @var{git}.)
+
+If you don't have write-access to the ffmpeg-web repository, you can
+create patches after making a read-only ffmpeg-web clone:
+
+@example
+git clone git://ffmpeg.org/ffmpeg-web <target>
+@end example
+
 Make sure that you do not have Windows line endings in your checkouts,
 otherwise you may experience spurious compilation failures. One way to
 achieve this is to run
@@ -108,7 +122,7 @@ git add [-A] <filename/dirname>
 git rm [-r] <filename/dirname>
 @end example
 
-GIT needs to get notified of all changes you make to your working
+Git needs to get notified of all changes you make to your working
 directory that makes files appear or disappear.
 Line moves across files are automatically tracked.
 
@@ -128,8 +142,8 @@ will show all local modifications in your working directory as unified diff.
 git log <filename(s)>
 @end example
 
-You may also use the graphical tools like gitview or gitk or the web
-interface available at http://source.ffmpeg.org/
+You may also use the graphical tools like @command{gitview} or @command{gitk}
+or the web interface available at @url{http://source.ffmpeg.org/}.
 
 @section Checking source tree status
 
@@ -150,6 +164,7 @@ git diff --check
 to double check your changes before committing them to avoid trouble later
 on. All experienced developers do this on each and every commit, no matter
 how small.
+
 Every one of them has been saved from looking like a fool by this many times.
 It's very easy for stray debug output or cosmetic modifications to slip in,
 please avoid problems through this extra level of scrutiny.
@@ -172,14 +187,14 @@ to make sure you don't have untracked files or deletions.
 git add [-i|-p|-A] <filenames/dirnames>
 @end example
 
-Make sure you have told git your name and email address
+Make sure you have told Git your name and email address
 
 @example
 git config --global user.name "My Name"
 git config --global user.email my@@email.invalid
 @end example
 
-Use @var{--global} to set the global configuration for all your git checkouts.
+Use @option{--global} to set the global configuration for all your Git checkouts.
 
 Git will select the changes to the files for commit. Optionally you can use
 the interactive or the patch mode to select hunk by hunk what should be
@@ -210,7 +225,7 @@ include filenames in log messages, Git provides that information.
 
 Possibly make the commit message have a terse, descriptive first line, an
 empty line and then a full description. The first line will be used to name
-the patch by git format-patch.
+the patch by @command{git format-patch}.
 
 @section Preparing a patchset
 
@@ -326,10 +341,12 @@ faulty commit disappear from the history.
 @section Pushing changes to remote trees
 
 @example
-git push
+git push origin master --dry-run
 @end example
 
-Will push the changes to the default remote (@var{origin}).
+Will simulate a push of the local master branch to the default remote
+(@var{origin}). And list which branches and ranges or commits would have been
+pushed.
 Git will prevent you from pushing changes if the local and remote trees are
 out of sync. Refer to @ref{Updating the source tree to the latest revision}.
 
@@ -350,23 +367,24 @@ branches matching the local ones.
 
 @section Finding a specific svn revision
 
-Since version 1.7.1 git supports @var{:/foo} syntax for specifying commits
+Since version 1.7.1 Git supports @samp{:/foo} syntax for specifying commits
 based on a regular expression. see man gitrevisions
 
 @example
 git show :/'as revision 23456'
 @end example
 
-will show the svn changeset @var{r23456}. With older git versions searching in
+will show the svn changeset @samp{r23456}. With older Git versions searching in
 the @command{git log} output is the easiest option (especially if a pager with
 search capabilities is used).
+
 This commit can be checked out with
 
 @example
 git checkout -b svn_23456 :/'as revision 23456'
 @end example
 
-or for git < 1.7.1 with
+or for Git < 1.7.1 with
 
 @example
 git checkout -b svn_23456 $SHA1
@@ -375,7 +393,7 @@ git checkout -b svn_23456 $SHA1
 where @var{$SHA1} is the commit hash from the @command{git log} output.
 
 
-@chapter pre-push checklist
+@chapter Pre-push checklist
 
 Once you have a set of commits that you feel are ready for pushing,
 work through the following checklist to doublecheck everything is in
@@ -386,7 +404,7 @@ Apply your common sense, but if in doubt, err on the side of caution.
 First, make sure that the commits and branches you are going to push
 match what you want pushed and that nothing is missing, extraneous or
 wrong. You can see what will be pushed by running the git push command
-with --dry-run first. And then inspecting the commits listed with
+with @option{--dry-run} first. And then inspecting the commits listed with
 @command{git log -p 1234567..987654}. The @command{git status} command
 may help in finding local changes that have been forgotten to be added.
 
@@ -395,7 +413,7 @@ Next let the code pass through a full run of our testsuite.
 @itemize
 @item @command{make distclean}
 @item @command{/path/to/ffmpeg/configure}
-@item @command{make check}
+@item @command{make fate}
 @item if fate fails due to missing samples run @command{make fate-rsync} and retry
 @end itemize
 
@@ -413,5 +431,5 @@ recommended.
 
 @chapter Server Issues
 
-Contact the project admins @email{root@@ffmpeg.org} if you have technical
-problems with the GIT server.
+Contact the project admins at @email{root@@ffmpeg.org} if you have technical
+problems with the Git server.
diff --git a/doc/indevs.texi b/doc/indevs.texi
index d5415bbc..3fb852b1 100644
--- a/doc/indevs.texi
+++ b/doc/indevs.texi
@@ -51,6 +51,18 @@ ffmpeg -f alsa -i hw:0 alsaout.wav
 For more information see:
 @url{http://www.alsa-project.org/alsa-doc/alsa-lib/pcm.html}
 
+@subsection Options
+
+@table @option
+
+@item sample_rate
+Set the sample rate in Hz. Default is 48000.
+
+@item channels
+Set the number of channels. Default is 2.
+
+@end table
+
 @section avfoundation
 
 AVFoundation input device.
@@ -109,11 +121,24 @@ Specify the audio device by its index. Overrides anything given in the input fil
 @item -pixel_format <FORMAT>
 Request the video device to use a specific pixel format.
 If the specified format is not supported, a list of available formats is given
-und the first one in this list is used instead. Available pixel formats are:
+and the first one in this list is used instead. Available pixel formats are:
 @code{monob, rgb555be, rgb555le, rgb565be, rgb565le, rgb24, bgr24, 0rgb, bgr0, 0bgr, rgb0,
  bgr48be, uyvy422, yuva444p, yuva444p16le, yuv444p, yuv422p16, yuv422p10, yuv444p10,
  yuv420p, nv12, yuyv422, gray}
 
+@item -framerate
+Set the grabbing frame rate. Default is @code{ntsc}, corresponding to a
+frame rate of @code{30000/1001}.
+
+@item -video_size
+Set the video frame size.
+
+@item -capture_cursor
+Capture the mouse pointer. Default is 0.
+
+@item -capture_mouse_clicks
+Capture the screen mouse clicks. Default is 0.
+
 @end table
 
 @subsection Examples
@@ -150,6 +175,36 @@ $ ffmpeg -f avfoundation -pixel_format bgr0 -i "default:none" out.avi
 
 BSD video input device.
 
+@subsection Options
+
+@table @option
+
+@item framerate
+Set the frame rate.
+
+@item video_size
+Set the video frame size. Default is @code{vga}.
+
+@item standard
+
+Available values are:
+@table @samp
+@item pal
+
+@item ntsc
+
+@item secam
+
+@item paln
+
+@item palm
+
+@item ntscj
+
+@end table
+
+@end table
+
 @section decklink
 
 The decklink input device provides capture capabilities for Blackmagic
@@ -163,7 +218,8 @@ On Windows, you need to run the IDL files through @command{widl}.
 DeckLink is very picky about the formats it supports. Pixel format is
 uyvy422 or v210, framerate and video size must be determined for your device with
 @command{-list_formats 1}. Audio sample rate is always 48 kHz and the number
-of channels can be 2, 8 or 16.
+of channels can be 2, 8 or 16. Note that all audio channels are bundled in one single
+audio track.
 
 @subsection Options
 
@@ -181,6 +237,20 @@ Defaults to @option{false}.
 If set to @samp{1}, video is captured in 10 bit v210 instead
 of uyvy422. Not all Blackmagic devices support this option.
 
+@item teletext_lines
+If set to nonzero, an additional teletext stream will be captured from the
+vertical ancillary data. This option is a bitmask of the VBI lines checked,
+specifically lines 6 to 22, and lines 318 to 335. Line 6 is the LSB in the mask.
+Selected lines which do not contain teletext information will be ignored. You
+can use the special @option{all} constant to select all possible lines, or
+@option{standard} to skip lines 6, 318 and 319, which are not compatible with all
+receivers. Capturing teletext only works for SD PAL sources in 8 bit mode.
+To use this option, ffmpeg needs to be compiled with @code{--enable-libzvbi}.
+
+@item channels
+Defines number of audio channels to capture. Must be @samp{2}, @samp{8} or @samp{16}.
+Defaults to @samp{2}.
+
 @end table
 
 @subsection Examples
@@ -212,15 +282,9 @@ ffmpeg -bm_v210 1 -f decklink -i 'UltraStudio Mini Recorder@@11' -acodec copy -v
 @end example
 
 @item
-Capture video clip at 720p50 with 32bit audio:
-@example
-ffmpeg -bm_audiodepth 32 -f decklink -i 'UltraStudio Mini Recorder@@14' -acodec copy -vcodec copy output.avi
-@end example
-
-@item
-Capture video clip at 576i50 with 8 audio channels:
+Capture video clip at 1080i50 with 16 audio channels:
 @example
-ffmpeg -bm_channels 8 -f decklink -i 'UltraStudio Mini Recorder@@3' -acodec copy -vcodec copy output.avi
+ffmpeg -channels 16 -f decklink -i 'UltraStudio Mini Recorder@@11' -acodec copy -vcodec copy output.avi
 @end example
 
 @end itemize
@@ -275,11 +339,11 @@ If set to @option{true}, print a list of selected device's options
 and exit.
 
 @item video_device_number
-Set video device number for devices with same name (starts at 0,
+Set video device number for devices with the same name (starts at 0,
 defaults to 0).
 
 @item audio_device_number
-Set audio device number for devices with same name (starts at 0,
+Set audio device number for devices with the same name (starts at 0,
 defaults to 0).
 
 @item pixel_format
@@ -429,6 +493,27 @@ $ ffmpeg -f dshow -show_video_device_dialog true -crossbar_video_input_pin_numbe
 
 Linux DV 1394 input device.
 
+@subsection Options
+
+@table @option
+
+@item framerate
+Set the frame rate. Default is 25.
+
+@item standard
+
+Available values are:
+@table @samp
+@item pal
+
+@item ntsc
+
+@end table
+
+Default value is @code{ntsc}.
+
+@end table
+
 @section fbdev
 
 Linux framebuffer input device.
@@ -441,18 +526,27 @@ console. It is accessed through a file device node, usually
 For more detailed information read the file
 Documentation/fb/framebuffer.txt included in the Linux source tree.
 
+See also @url{http://linux-fbdev.sourceforge.net/}, and fbset(1).
+
 To record from the framebuffer device @file{/dev/fb0} with
 @command{ffmpeg}:
 @example
-ffmpeg -f fbdev -r 10 -i /dev/fb0 out.avi
+ffmpeg -f fbdev -framerate 10 -i /dev/fb0 out.avi
 @end example
 
 You can take a single screenshot image with the command:
 @example
-ffmpeg -f fbdev -frames:v 1 -r 1 -i /dev/fb0 screenshot.jpeg
+ffmpeg -f fbdev -framerate 1 -i /dev/fb0 -frames:v 1 screenshot.jpeg
 @end example
 
-See also @url{http://linux-fbdev.sourceforge.net/}, and fbset(1).
+@subsection Options
+
+@table @option
+
+@item framerate
+Set the frame rate. Default is 25.
+
+@end table
 
 @section gdigrab
 
@@ -638,6 +732,15 @@ $ jack_connect metro:120_bpm ffmpeg:input_1
 For more information read:
 @url{http://jackaudio.org/}
 
+@subsection Options
+
+@table @option
+
+@item channels
+Set the number of channels. Default is 2.
+
+@end table
+
 @section lavfi
 
 Libavfilter input virtual device.
@@ -678,6 +781,9 @@ Set the filename of the filtergraph to be read and sent to the other
 filters. Syntax of the filtergraph is the same as the one specified by
 the option @var{graph}.
 
+@item dumpgraph
+Dump graph to stderr.
+
 @end table
 
 @subsection Examples
@@ -879,6 +985,19 @@ ffmpeg -f oss -i /dev/dsp /tmp/oss.wav
 For more information about OSS see:
 @url{http://manuals.opensound.com/usersguide/dsp.html}
 
+@subsection Options
+
+@table @option
+
+@item sample_rate
+Set the sample rate in Hz. Default is 48000.
+
+@item channels
+Set the number of channels. Default is 2.
+
+@end table
+
+
 @section pulse
 
 PulseAudio input device.
@@ -919,6 +1038,10 @@ Specify the number of bytes per frame, by default it is set to 1024.
 @item fragment_size
 Specify the minimal buffering fragment in PulseAudio, it will affect the
 audio latency. By default it is unset.
+
+@item wallclock
+Set the initial PTS using the current time. Default is 1.
+
 @end table
 
 @subsection Examples
@@ -954,6 +1077,22 @@ ffmpeg -f qtkit -i "default" out.mpg
 ffmpeg -f qtkit -list_devices true -i ""
 @end example
 
+@subsection Options
+
+@table @option
+
+@item frame_rate
+Set frame rate. Default is 30.
+
+@item list_devices
+If set to @code{true}, print a list of devices and exit. Default is
+@code{false}.
+
+@item video_device_index
+Select the video device by index for devices with the same name (starts at 0).
+
+@end table
+
 @section sndio
 
 sndio input device.
@@ -971,6 +1110,18 @@ command:
 ffmpeg -f sndio -i /dev/audio0 /tmp/oss.wav
 @end example
 
+@subsection Options
+
+@table @option
+
+@item sample_rate
+Set the sample rate in Hz. Default is 48000.
+
+@item channels
+Set the number of channels. Default is 2.
+
+@end table
+
 @section video4linux2, v4l2
 
 Video4Linux2 input video device.
@@ -1093,6 +1244,10 @@ Force conversion from monotonic to absolute timestamps.
 @end table
 
 Default value is @code{default}.
+
+@item use_libv4l2
+Use libv4l2 (v4l-utils) conversion functions. Default is 0.
+
 @end table
 
 @section vfwcap
@@ -1103,6 +1258,19 @@ The filename passed as input is the capture driver number, ranging from
 0 to 9. You may use "list" as filename to print a list of drivers. Any
 other filename will be interpreted as device number 0.
 
+@subsection Options
+
+@table @option
+
+@item video_size
+Set the video frame size.
+
+@item framerate
+Set the grabbing frame rate. Default value is @code{ntsc},
+corresponding to a frame rate of @code{30000/1001}.
+
+@end table
+
 @section x11grab
 
 X11 video input device.
@@ -1205,17 +1373,13 @@ Set the video frame size. Default value is @code{vga}.
 Use the MIT-SHM extension for shared memory. Default value is @code{1}.
 It may be necessary to disable it for remote displays (legacy x11grab
 only).
-@end table
-
-@subsection @var{grab_x} @var{grab_y} AVOption
-
-The syntax is:
-@example
--grab_x @var{x_offset} -grab_y @var{y_offset}
-@end example
-
-Set the grabbing region coordinates. They are expressed as offset from the top left
-corner of the X11 window. The default value is 0.
 
+@item grab_x
+@item grab_y
+Set the grabbing region coordinates. They are expressed as offset from
+the top left corner of the X11 window and correspond to the
+@var{x_offset} and @var{y_offset} parameters in the device name. The
+default value for both options is 0.
+@end table
 
 @c man end INPUT DEVICES
diff --git a/doc/issue_tracker.txt b/doc/issue_tracker.txt
index 095c04c5..e8e85304 100644
--- a/doc/issue_tracker.txt
+++ b/doc/issue_tracker.txt
@@ -1,8 +1,6 @@
 FFmpeg's bug/feature request tracker manual
 =================================================
 
-NOTE: This is a draft.
-
 Overview:
 ---------
 
@@ -22,9 +20,9 @@ a mail for every change to every issue.
 (the above does all work already after light testing)
 
 The subscription URL for the ffmpeg-trac list is:
-http(s)://lists.ffmpeg.org/mailman/listinfo/ffmpeg-trac
+https://lists.ffmpeg.org/mailman/listinfo/ffmpeg-trac
 The URL of the webinterface of the tracker is:
-http(s)://trac.ffmpeg.org
+https://trac.ffmpeg.org
 
 Type:
 -----
@@ -42,12 +40,16 @@ feature request / enhancement
     where the current implementation cannot be considered wrong.
 
 license violation
-    ticket to keep track of (L)GPL violations of ffmpeg by others
+    Ticket to keep track of (L)GPL violations of ffmpeg by others.
 
 sponsoring request
     Developer requests for hardware, software, specifications, money,
     refunds, etc.
 
+task
+    A task/reminder such as setting up a FATE client, adding filters to
+    Trac, etc.
+
 Priority:
 ---------
 critical
@@ -66,7 +68,8 @@ important
     don't exist in a past revision or another branch.
 
 normal
-
+   Default setting. Use this if the bug does not match the other
+   priorities or if you are unsure of what priority to choose.
 
 minor
     Bugs about things like spelling errors, "mp2" instead of
@@ -163,14 +166,23 @@ Component:
 avcodec
     issues in libavcodec/*
 
+avdevice
+    issues in libavdevice/*
+
+avfilter
+    issues in libavfilter/*
+
 avformat
     issues in libavformat/*
 
 avutil
     issues in libavutil/*
 
-regression test
-    issues in tests/*
+build system
+    issues in or related to configure/Makefile
+
+documentation
+    issues in or related to doc/*
 
 ffmpeg
     issues in or related to ffmpeg.c
@@ -184,11 +196,23 @@ ffprobe
 ffserver
     issues in or related to ffserver.c
 
-build system
-    issues in or related to configure/Makefile
+postproc
+    issues in libpostproc/*
+
+swresample
+    issues in libswresample/*
 
-regression
-    bugs which were not present in a past revision
+swscale
+    issues in libswscale/*
 
 trac
     issues related to our issue tracker
+
+undetermined
+    default component; choose this if unsure
+
+website
+    issues related to the website
+
+wiki
+    issues related to the wiki
diff --git a/doc/mips.txt b/doc/mips.txt
index 8c6779f6..a84e89ae 100644
--- a/doc/mips.txt
+++ b/doc/mips.txt
@@ -47,12 +47,16 @@ Files that have MIPS copyright notice in them:
 * libavutil/mips/
       float_dsp_mips.c
       libm_mips.h
+      softfloat_tables.h
 * libavcodec/
       fft_fixed_32.c
       fft_init_table.c
       fft_table.h
       mdct_fixed_32.c
 * libavcodec/mips/
+      aacdec_fixed.c
+      aacsbr_fixed.c
+      aacsbr_template.c
       aaccoder_mips.c
       aacpsy_mips.h
       ac3dsp_mips.c
diff --git a/doc/multithreading.txt b/doc/multithreading.txt
index 2b992fcb..83849dea 100644
--- a/doc/multithreading.txt
+++ b/doc/multithreading.txt
@@ -54,7 +54,7 @@ thread.
 If the codec allocates writable tables in its init(), add an init_thread_copy()
 which re-allocates them for other threads.
 
-Add CODEC_CAP_FRAME_THREADS to the codec capabilities. There will be very little
+Add AV_CODEC_CAP_FRAME_THREADS to the codec capabilities. There will be very little
 speed gain at this point but it should work.
 
 If there are inter-frame dependencies, so the codec calls
diff --git a/doc/muxers.texi b/doc/muxers.texi
index 95cdb8fa..2e6bb4ca 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -37,6 +37,61 @@ ID3v2.3 and ID3v2.4) are supported. The default is version 4.
 
 @end table
 
+@anchor{asf}
+@section asf
+
+Advanced Systems Format muxer.
+
+Note that Windows Media Audio (wma) and Windows Media Video (wmv) use this
+muxer too.
+
+@subsection Options
+
+It accepts the following options:
+
+@table @option
+@item packet_size
+Set the muxer packet size. By tuning this setting you may reduce data
+fragmentation or muxer overhead depending on your source. Default value is
+3200, minimum is 100, maximum is 64k.
+
+@end table
+
+@anchor{chromaprint}
+@section chromaprint
+
+Chromaprint fingerprinter
+
+This muxer feeds audio data to the Chromaprint library, which generates
+a fingerprint for the provided audio data. It takes a single signed
+native-endian 16-bit raw audio stream.
+
+@subsection Options
+
+@table @option
+@item silence_threshold
+Threshold for detecting silence, ranges from 0 to 32767. -1 for default
+(required for use with the AcoustID service).
+
+@item algorithm
+Algorithm index to fingerprint with.
+
+@item fp_format
+Format to output the fingerprint as. Accepts the following options:
+@table @samp
+@item raw
+Binary raw fingerprint
+
+@item compressed
+Binary compressed fingerprint
+
+@item base64
+Base64 compressed fingerprint
+
+@end table
+
+@end table
+
 @anchor{crc}
 @section crc
 
@@ -263,6 +318,62 @@ ffmpeg in.nut -hls_segment_filename 'file%03d.ts' out.m3u8
 This example will produce the playlist, @file{out.m3u8}, and segment files:
 @file{file000.ts}, @file{file001.ts}, @file{file002.ts}, etc.
 
+@item hls_key_info_file @var{key_info_file}
+Use the information in @var{key_info_file} for segment encryption. The first
+line of @var{key_info_file} specifies the key URI written to the playlist. The
+key URL is used to access the encryption key during playback. The second line
+specifies the path to the key file used to obtain the key during the encryption
+process. The key file is read as a single packed array of 16 octets in binary
+format. The optional third line specifies the initialization vector (IV) as a
+hexadecimal string to be used instead of the segment sequence number (default)
+for encryption. Changes to @var{key_info_file} will result in segment
+encryption with the new key/IV and an entry in the playlist for the new key
+URI/IV.
+
+Key info file format:
+@example
+@var{key URI}
+@var{key file path}
+@var{IV} (optional)
+@end example
+
+Example key URIs:
+@example
+http://server/file.key
+/path/to/file.key
+file.key
+@end example
+
+Example key file paths:
+@example
+file.key
+/path/to/file.key
+@end example
+
+Example IV:
+@example
+0123456789ABCDEF0123456789ABCDEF
+@end example
+
+Key info file example:
+@example
+http://server/file.key
+/path/to/file.key
+0123456789ABCDEF0123456789ABCDEF
+@end example
+
+Example shell script:
+@example
+#!/bin/sh
+BASE_URL=$@{1:-'.'@}
+openssl rand 16 > file.key
+echo $BASE_URL/file.key > file.keyinfo
+echo file.key >> file.keyinfo
+echo $(openssl rand -hex 16) >> file.keyinfo
+ffmpeg -f lavfi -re -i testsrc -c:v h264 -hls_flags delete_segments \
+  -hls_key_info_file file.keyinfo out.m3u8
+@end example
+
 @item hls_flags single_file
 If this flag is set, the muxer will store all segments in a single MPEG-TS
 file, and will use byte ranges in the playlist. HLS playlists generated with
@@ -493,7 +604,7 @@ MD5 testing format.
 This muxer computes and prints the MD5 hash of all the input audio
 and video frames. By default audio frames are converted to signed
 16-bit raw audio and video frames to raw video before computing the
-hash.
+hash. Timestamps are ignored.
 
 The output of the muxer consists of a single line of the form:
 MD5=@var{MD5}, where @var{MD5} is a hexadecimal number representing
@@ -611,6 +722,13 @@ point on IIS with this muxer. Example:
 ffmpeg -re @var{<normal input/transcoding options>} -movflags isml+frag_keyframe -f ismv http://server/publishingpoint.isml/Streams(Encoder1)
 @end example
 
+@subsection Audible AAX
+
+Audible AAX files are encrypted M4B files, and they can be decrypted by specifying a 4 byte activation secret.
+@example
+ffmpeg -activation_bytes 1CEB00DA -i test.aax -vn -c:a copy output.mp4
+@end example
+
 @section mp3
 
 The MP3 muxer writes a raw MP3 stream with the following optional features:
@@ -703,6 +821,10 @@ Set a constant muxrate (default VBR).
 @item -pcr_period @var{numer}
 Override the default PCR retransmission time (default 20ms), ignored
 if variable muxrate is selected.
+@item pat_period @var{number}
+Maximal time in seconds between PAT/PMT tables.
+@item sdt_period @var{number}
+Maximal time in seconds between SDT tables.
 @item -pes_payload_size @var{number}
 Set minimum PES packet payload in bytes.
 @item -mpegts_flags @var{flags}
@@ -754,6 +876,10 @@ Option mpegts_flags may take a set of such flags:
 Reemit PAT/PMT before writing the next packet.
 @item latm
 Use LATM packetization for AAC.
+@item pat_pmt_at_frames
+Reemit PAT and PMT at each video frame.
+@item system_b
+Conform to System B (DVB) instead of System A (ATSC).
 @end table
 
 @subsection Example
@@ -770,6 +896,21 @@ ffmpeg -i file.mpg -c copy \
      -y out.ts
 @end example
 
+@section mxf, mxf_d10
+
+MXF muxer.
+
+@subsection Options
+
+The muxer options are:
+
+@table @option
+@item store_user_comments @var{bool}
+Set if user comments should be stored if available or never.
+IRT D-10 does not allow user comments. The default is thus to write them for
+mxf but not for mxf_d10
+@end table
+
 @section null
 
 Null muxer.
@@ -908,13 +1049,6 @@ Allow caching (only affects M3U8 list files).
 Allow live-friendly file generation.
 @end table
 
-@item segment_list_type @var{type}
-Select the listing format.
-@table @option
-@item @var{flat} use a simple flat list of entries.
-@item @var{hls} use a m3u8-like structure.
-@end table
-
 @item segment_list_size @var{size}
 Update the list file so that it contains at most @var{size}
 segments. If 0 the list file will contain all the segments. Default
@@ -924,6 +1058,9 @@ value is 0.
 Prepend @var{prefix} to each entry. Useful to generate absolute paths.
 By default no prefix is applied.
 
+@item segment_list_type @var{type}
+Select the listing format.
+
 The following values are recognized:
 @table @samp
 @item flat
@@ -983,6 +1120,28 @@ to create files at 12:00 o'clock, 12:15, 12:30, etc.
 
 Default value is "0".
 
+@item segment_clocktime_offset @var{duration}
+Delay the segment splitting times with the specified duration when using
+@option{segment_atclocktime}.
+
+For example with @option{segment_time} set to "900" and
+@option{segment_clocktime_offset} set to "300" this makes it possible to
+create files at 12:05, 12:20, 12:35, etc.
+
+Default value is "0".
+
+@item segment_clocktime_wrap_duration @var{duration}
+Force the segmenter to only start a new segment if a packet reaches the muxer
+within the specified duration after the segmenting clock time. This way you
+can make the segmenter more resilient to backward local time jumps, such as
+leap seconds or transition to standard time from daylight savings time.
+
+Assuming that the delay between the packets of your source is less than 0.5
+second you can detect a leap second by specifying 0.5 as the duration.
+
+Default is the maximum possible duration which means starting a new segment
+regardless of the elapsed time since the last clock time.
+
 @item segment_time_delta @var{delta}
 Specify the accuracy time when selecting the start time for a
 segment, expressed as a duration specification. Default value is "0".
@@ -1172,7 +1331,8 @@ Several bitstream filters can be specified, separated by ",".
 @item select
 Select the streams that should be mapped to the slave output,
 specified by a stream specifier. If not specified, this defaults to
-all the input streams.
+all the input streams. You may use multiple stream specifiers
+separated by commas (@code{,}) e.g.: @code{a:0,v}
 @end table
 
 @subsection Examples
diff --git a/doc/platform.texi b/doc/platform.texi
index 705a6800..f7ee4564 100644
--- a/doc/platform.texi
+++ b/doc/platform.texi
@@ -107,8 +107,13 @@ Notes:
 
 @itemize
 
-@item Building natively using MSYS2 can be sped up by disabling implicit rules
-in the Makefile by calling @code{make -r} instead of plain @code{make}. This
+@item Building for the MSYS environment is discouraged, MSYS2 provides a full
+MinGW-w64 environment through @file{mingw64_shell.bat} or
+@file{mingw32_shell.bat} that should be used instead of the environment
+provided by @file{msys2_shell.bat}.
+
+@item Building using MSYS2 can be sped up by disabling implicit rules in the
+Makefile by calling @code{make -r} instead of plain @code{make}. This
 speed up is close to non-existent for normal one-off builds and is only
 noticeable when running make for a second time (for example during
 @code{make install}).
@@ -122,6 +127,25 @@ libavformat) as DLLs.
 
 @end itemize
 
+@subsection Native Windows compilation using MSYS2
+
+The MSYS2 MinGW-w64 environment provides ready to use toolchains and dependencies
+through @command{pacman}.
+
+Make sure to use @file{mingw64_shell.bat} or @file{mingw32_shell.bat} to have
+the correct MinGW-w64 environment. The default install provides shortcuts to
+them under @command{MinGW-w64 Win64 Shell} and @command{MinGW-w64 Win32 Shell}.
+
+@example
+# normal msys2 packages
+pacman -S make pkgconf diffutils
+
+# mingw-w64 packages and toolchains
+pacman -S mingw-w64-x86_64-yasm mingw-w64-x86_64-gcc mingw-w64-x86_64-SDL
+@end example
+
+To target 32bit replace the @code{x86_64} with @code{i686} in the command above.
+
 @section Microsoft Visual C++ or Intel C++ Compiler for Windows
 
 FFmpeg can be built with MSVC 2012 or earlier using a C99-to-C89 conversion utility
@@ -175,12 +199,6 @@ Notes:
 
 @itemize
 
-@item It is possible that coreutils' @code{link.exe} conflicts with MSVC's linker.
-You can find out by running @code{which link} to see which @code{link.exe} you
-are using. If it is located at @code{/bin/link.exe}, then you have the wrong one
-in your @code{PATH}. Either move or remove that copy, or make sure MSVC's
-@code{link.exe} takes precedence in your @code{PATH} over coreutils'.
-
 @item If you wish to build with zlib support, you will have to grab a compatible
 zlib binary from somewhere, with an MSVC import lib, or if you wish to link
 statically, you can follow the instructions below to build a compatible
@@ -296,7 +314,7 @@ These library packages are only available from
 @uref{http://sourceware.org/cygwinports/, Cygwin Ports}:
 
 @example
-yasm, libSDL-devel, libfaac-devel, libaacplus-devel, libgsm-devel, libmp3lame-devel,
+yasm, libSDL-devel, libfaac-devel, libgsm-devel, libmp3lame-devel,
 libschroedinger1.0-devel, speex-devel, libtheora-devel, libxvidcore-devel
 @end example
 
diff --git a/doc/protocols.texi b/doc/protocols.texi
index 453dbcf6..375d0428 100644
--- a/doc/protocols.texi
+++ b/doc/protocols.texi
@@ -1,3 +1,22 @@
+@chapter Protocol Options
+@c man begin PROTOCOL OPTIONS
+
+The libavformat library provides some generic global options, which
+can be set on all the protocols. In addition each protocol may support
+so-called private options, which are specific for that component.
+
+The list of supported options follows:
+
+@table @option
+@item protocol_whitelist @var{list} (@emph{input})
+Set a ","-separated list of allowed protocols. "ALL" matches all protocols. Protocols
+prefixed by "-" are disabled.
+All protocols are allowed by default but protocols used by an another
+protocol (nested protocols) are restricted to a per protocol subset.
+@end table
+
+@c man end PROTOCOL OPTIONS
+
 @chapter Protocols
 @c man begin PROTOCOLS
 
@@ -19,6 +38,18 @@ supported protocols.
 
 A description of the currently available protocols follows.
 
+@section async
+
+Asynchronous data filling wrapper for input stream.
+
+Fill data in a background thread, to decouple I/O operation from demux thread.
+
+@example
+async:@var{URL}
+async:http://host/resource
+async:cache:http://host/resource
+@end example
+
 @section bluray
 
 Read BluRay playlist.
@@ -228,6 +259,9 @@ If set to 1 use chunked Transfer-Encoding for posts, default is 1.
 @item content_type
 Set a specific content type for the POST messages.
 
+@item http_proxy
+set HTTP proxy to tunnel through e.g. http://example.com:1234
+
 @item headers
 Set custom HTTP headers, can override built in default headers. The
 value must be a string encoding the headers.
@@ -248,6 +282,16 @@ Set timeout in microseconds of socket I/O operations used by the underlying low
 operation. By default it is set to -1, which means that the timeout is
 not specified.
 
+@item reconnect_at_eof
+If set then eof is treated like an error and causes reconnection, this is useful
+for live / endless streams.
+
+@item reconnect_streamed
+If set then even streamed/non seekable streams will be reconnected on errors.
+
+@item reconnect_delay_max
+Sets the maximum delay in seconds after which to give up reconnecting
+
 @item mime_type
 Export the MIME type.
 
@@ -292,6 +336,8 @@ autodetection in the future.
 If set to 1 enables experimental HTTP server. This can be used to send data when
 used as an output option, or read data from a client with HTTP POST when used as
 an input option.
+If set to 2 enables experimental mutli-client HTTP server. This is not yet implemented
+in ffmpeg.c or ffserver.c and thus must not be used as a command line option.
 @example
 # Server side (sending):
 ffmpeg -i somefile.ogg -c copy -listen 1 -f ogg http://@var{server}:@var{port}
@@ -1120,6 +1166,12 @@ than this time interval, raise error.
 
 @item listen_timeout=@var{milliseconds}
 Set listen timeout, expressed in milliseconds.
+
+@item recv_buffer_size=@var{bytes}
+Set receive buffer size, expressed bytes.
+
+@item send_buffer_size=@var{bytes}
+Set send buffer size, expressed bytes.
 @end table
 
 The following example shows how to setup a listening TCP connection
diff --git a/doc/resampler.texi b/doc/resampler.texi
index f9eef03f..cb7d536c 100644
--- a/doc/resampler.texi
+++ b/doc/resampler.texi
@@ -66,8 +66,8 @@ Set rematrix volume. Default value is 1.0.
 
 @item rematrix_maxval
 Set maximum output value for rematrixing.
-This can be used to prevent clipping vs. preventing volumn reduction
-A value of 1.0 prevents cliping.
+This can be used to prevent clipping vs. preventing volume reduction.
+A value of 1.0 prevents clipping.
 
 @item flags, swr_flags
 Set flags used by the converter. Default value is 0.
@@ -94,13 +94,13 @@ select triangular dither
 @item triangular_hp
 select triangular dither with high pass
 @item lipshitz
-select lipshitz noise shaping dither
+select Lipshitz noise shaping dither.
 @item shibata
-select shibata noise shaping dither
+select Shibata noise shaping dither.
 @item low_shibata
-select low shibata noise shaping dither
+select low Shibata noise shaping dither.
 @item high_shibata
-select high shibata noise shaping dither
+select high Shibata noise shaping dither.
 @item f_weighted
 select f-weighted noise shaping dither
 @item modified_e_weighted
@@ -132,7 +132,7 @@ For swr only, set resampling phase shift, default value is 10, and must be in
 the interval [0,30].
 
 @item linear_interp
-Use Linear Interpolation if set to 1, default value is 0.
+Use linear interpolation if set to 1, default value is 0.
 
 @item cutoff
 Set cutoff frequency (swr: 6dB point; soxr: 0dB point) ratio; must be a float
@@ -214,13 +214,13 @@ It accepts the following values:
 @item cubic
 select cubic
 @item blackman_nuttall
-select Blackman Nuttall Windowed Sinc
+select Blackman Nuttall windowed sinc
 @item kaiser
-select Kaiser Windowed Sinc
+select Kaiser windowed sinc
 @end table
 
 @item kaiser_beta
-For swr only, set Kaiser Window Beta value. Must be an integer in the
+For swr only, set Kaiser window beta value. Must be a double float value in the
 interval [2,16], default value is 9.
 
 @item output_sample_bits
diff --git a/doc/scaler.texi b/doc/scaler.texi
index 23d63938..3e115cdd 100644
--- a/doc/scaler.texi
+++ b/doc/scaler.texi
@@ -46,7 +46,7 @@ Select Gaussian rescaling algorithm.
 Select sinc rescaling algorithm.
 
 @item lanczos
-Select lanczos rescaling algorithm.
+Select Lanczos rescaling algorithm.
 
 @item spline
 Select natural bicubic spline rescaling algorithm.
@@ -91,6 +91,7 @@ Select source range.
 @item dst_range
 Select destination range.
 
+@anchor{sws_params}
 @item param0, param1
 Set scaling algorithm parameters. The specified values are specific of
 some scaling algorithms and ignored by others. The specified values
@@ -122,6 +123,22 @@ a_dither).
 
 @end table
 
+@item alphablend
+Set the alpha blending to use when the input has alpha but the output does not.
+Default value is @samp{none}.
+
+@table @samp
+@item uniform_color
+Blend onto a uniform background color
+
+@item checkerboard
+Blend onto a checkerboard
+
+@item none
+No blending
+
+@end table
+
 @end table
 
 @c man end SCALER OPTIONS
diff --git a/doc/texi2pod.pl b/doc/texi2pod.pl
index e1ff6b46..9a9b34fc 100644
--- a/doc/texi2pod.pl
+++ b/doc/texi2pod.pl
@@ -384,7 +384,7 @@ sub postprocess
     # @* is also impossible in .pod; we discard it and any newline that
     # follows it.  Similarly, our macro @gol must be discarded.
 
-    s/\@anchor{(?:[^\}]*)\}//g;
+    s/\@anchor\{(?:[^\}]*)\}//g;
     s/\(?\@xref\{(?:[^\}]*)\}(?:[^.<]|(?:<[^<>]*>))*\.\)?//g;
     s/\s+\(\@pxref\{(?:[^\}]*)\}\)//g;
     s/;\s+\@pxref\{(?:[^\}]*)\}//g;
diff --git a/doc/utils.texi b/doc/utils.texi
index 6517ac0a..7aea4606 100644
--- a/doc/utils.texi
+++ b/doc/utils.texi
@@ -238,6 +238,14 @@ The following abbreviations are recognized:
 480x320
 @item qhd
 960x540
+@item 2kdci
+2048x1080
+@item 4kdci
+4096x2160
+@item uhd2160
+3840x2160
+@item uhd4320
+7680x4320
 @end table
 
 @anchor{video rate syntax}
@@ -861,7 +869,7 @@ Return 1 if @var{x} is lesser than or equal to @var{y}, 0 otherwise.
 Return the maximum between @var{x} and @var{y}.
 
 @item min(x, y)
-Return the maximum between @var{x} and @var{y}.
+Return the minimum between @var{x} and @var{y}.
 
 @item mod(x, y)
 Compute the remainder of division of @var{x} by @var{y}.
diff --git a/doc/writing_filters.txt b/doc/writing_filters.txt
index eb16d424..66ebb532 100644
--- a/doc/writing_filters.txt
+++ b/doc/writing_filters.txt
@@ -3,8 +3,8 @@ libavfilter.
 
 Foreword: just like everything else in FFmpeg, libavfilter is monolithic, which
 means that it is highly recommended that you submit your filters to the FFmpeg
-development mailing-list and make sure it is applied. Otherwise, your filter is
-likely to have a very short lifetime due to more a less regular internal API
+development mailing-list and make sure that they are applied. Otherwise, your filters
+are likely to have a very short lifetime due to more or less regular internal API
 changes, and a limited distribution, review, and testing.
 
 Bootstrap
@@ -64,7 +64,7 @@ filter, so you can update the boilerplate with your credits.
 Doxy
 ----
 
-Next chunk is the Doxygen about the file. See http://ffmpeg.org/doxygen/trunk/.
+Next chunk is the Doxygen about the file. See https://ffmpeg.org/doxygen/trunk/.
 Detail here what the filter is, does, and add some references if you feel like
 it.
 
@@ -73,11 +73,11 @@ Context
 
 Skip the headers and scroll down to the definition of FoobarContext. This is
 your local state context. It is already filled with 0 when you get it so do not
-worry about uninitialized read into this context. This is where you put every
-"global" information you need, typically the variable storing the user options.
+worry about uninitialized reads into this context. This is where you put all
+"global" information that you need; typically the variables storing the user options.
 You'll notice the first field "const AVClass *class"; it's the only field you
-need to keep assuming you have a context. There are some magic you don't care
-about around this field, just let it be (in first position) for now.
+need to keep assuming you have a context. There is some magic you don't need to
+care about around this field, just let it be (in the first position) for now.
 
 Options
 -------
@@ -87,7 +87,7 @@ options. For example, -vf foobar=mode=colormix:high=0.4:low=0.1. Most options
 have the following pattern:
   name, description, offset, type, default value, minimum value, maximum value, flags
 
- - name is the option name, keep it simple, lowercase
+ - name is the option name, keep it simple and lowercase
  - description are short, in lowercase, without period, and describe what they
    do, for example "set the foo of the bar"
  - offset is the offset of the field in your local context, see the OFFSET()
@@ -99,7 +99,7 @@ have the following pattern:
  - min and max values define the range of available values, inclusive
  - flags are AVOption generic flags. See AV_OPT_FLAG_* definitions
 
-In doubt, just look at the other AVOption definitions all around the codebase,
+When in doubt, just look at the other AVOption definitions all around the codebase,
 there are tons of examples.
 
 Class
@@ -146,14 +146,14 @@ we won't cover this here since vf_foobar is just a simple 1:1 filter.
 uninit()
 ~~~~~~~~
 
-Similarly, there is the uninit() callback, doing what the name suggest. Free
+Similarly, there is the uninit() callback, doing what the name suggests. Free
 everything you allocated here.
 
 query_formats()
 ~~~~~~~~~~~~~~~
 
-This is following the init() and is used for the format negotiation, basically
-where you say what pixel format(s) (gray, rgb 32, yuv 4:2:0, ...) you accept
+This follows the init() and is used for the format negotiation. Basically
+you specify here what pixel format(s) (gray, rgb 32, yuv 4:2:0, ...) you accept
 for your inputs, and what you can output. All pixel formats are defined in
 libavutil/pixfmt.h. If you don't change the pixel format between the input and
 the output, you just have to define a pixel formats array and call
@@ -182,7 +182,7 @@ will update outlink->w and outlink->h.
 filter_frame()
 ~~~~~~~~~~~~~~
 
-This is the callback you are waiting from the beginning: it is where you
+This is the callback you are waiting for from the beginning: it is where you
 process the received frames. Along with the frame, you get the input link from
 where the frame comes from.
 
@@ -317,7 +317,7 @@ Adding timeline support
 feature to add. In the most simple case, you just have to add
 AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC to the AVFilter.flags. You can typically
 do this when your filter does not need to save the previous context frames, or
-basically if your filter just alter whatever goes in and doesn't need
+basically if your filter just alters whatever goes in and doesn't need
 previous/future information. See for instance commit 86cb986ce that adds
 timeline support to the fieldorder filter.
 
diff --git a/ffmpeg.c b/ffmpeg.c
index 384b92ca..a5ec3c38 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -32,14 +32,12 @@
 #include <limits.h>
 #include <stdint.h>
 
-#if HAVE_ISATTY
 #if HAVE_IO_H
 #include <io.h>
 #endif
 #if HAVE_UNISTD_H
 #include <unistd.h>
 #endif
-#endif
 
 #include "libavformat/avformat.h"
 #include "libavdevice/avdevice.h"
@@ -49,6 +47,7 @@
 #include "libavutil/parseutils.h"
 #include "libavutil/samplefmt.h"
 #include "libavutil/fifo.h"
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/dict.h"
 #include "libavutil/mathematics.h"
@@ -63,7 +62,6 @@
 #include "libavcodec/mathops.h"
 #include "libavformat/os_support.h"
 
-# include "libavfilter/avcodec.h"
 # include "libavfilter/avfilter.h"
 # include "libavfilter/buffersrc.h"
 # include "libavfilter/buffersink.h"
@@ -79,6 +77,10 @@
 #include <windows.h>
 #include <psapi.h>
 #endif
+#if HAVE_SETCONSOLECTRLHANDLER
+#include <windows.h>
+#endif
+
 
 #if HAVE_SYS_SELECT_H
 #include <sys/select.h>
@@ -132,8 +134,6 @@ AVIOContext *progress_avio = NULL;
 
 static uint8_t *subtitle_out;
 
-#define DEFAULT_PASS_LOGFILENAME_PREFIX "ffmpeg2pass"
-
 InputStream **input_streams = NULL;
 int        nb_input_streams = 0;
 InputFile   **input_files   = NULL;
@@ -169,8 +169,8 @@ static int sub2video_get_blank_frame(InputStream *ist)
     AVFrame *frame = ist->sub2video.frame;
 
     av_frame_unref(frame);
-    ist->sub2video.frame->width  = ist->sub2video.w;
-    ist->sub2video.frame->height = ist->sub2video.h;
+    ist->sub2video.frame->width  = ist->dec_ctx->width  ? ist->dec_ctx->width  : ist->sub2video.w;
+    ist->sub2video.frame->height = ist->dec_ctx->height ? ist->dec_ctx->height : ist->sub2video.h;
     ist->sub2video.frame->format = AV_PIX_FMT_RGB32;
     if ((ret = av_frame_get_buffer(frame, 32)) < 0)
         return ret;
@@ -190,7 +190,9 @@ static void sub2video_copy_rect(uint8_t *dst, int dst_linesize, int w, int h,
         return;
     }
     if (r->x < 0 || r->x + r->w > w || r->y < 0 || r->y + r->h > h) {
-        av_log(NULL, AV_LOG_WARNING, "sub2video: rectangle overflowing\n");
+        av_log(NULL, AV_LOG_WARNING, "sub2video: rectangle (%d %d %d %d) overflowing %d %d\n",
+            r->x, r->y, r->w, r->h, w, h
+        );
         return;
     }
 
@@ -222,7 +224,6 @@ static void sub2video_push_ref(InputStream *ist, int64_t pts)
 
 static void sub2video_update(InputStream *ist, AVSubtitle *sub)
 {
-    int w = ist->sub2video.w, h = ist->sub2video.h;
     AVFrame *frame = ist->sub2video.frame;
     int8_t *dst;
     int     dst_linesize;
@@ -250,7 +251,7 @@ static void sub2video_update(InputStream *ist, AVSubtitle *sub)
     dst          = frame->data    [0];
     dst_linesize = frame->linesize[0];
     for (i = 0; i < num_rects; i++)
-        sub2video_copy_rect(dst, dst_linesize, w, h, sub->rects[i]);
+        sub2video_copy_rect(dst, dst_linesize, frame->width, frame->height, sub->rects[i]);
     sub2video_push_ref(ist, pts);
     ist->sub2video.end_pts = end_pts;
 }
@@ -291,7 +292,7 @@ static void sub2video_flush(InputStream *ist)
     if (ist->sub2video.end_pts < INT64_MAX)
         sub2video_update(ist, NULL);
     for (i = 0; i < ist->nb_filters; i++)
-        av_buffersrc_add_ref(ist->filters[i]->filter, NULL, 0);
+        av_buffersrc_add_frame(ist->filters[i]->filter, NULL);
 }
 
 /* end of sub2video hack */
@@ -313,6 +314,7 @@ void term_exit(void)
 static volatile int received_sigterm = 0;
 static volatile int received_nb_signals = 0;
 static volatile int transcode_init_done = 0;
+static volatile int ffmpeg_exited = 0;
 static int main_return_code = 0;
 
 static void
@@ -321,20 +323,52 @@ sigterm_handler(int sig)
     received_sigterm = sig;
     received_nb_signals++;
     term_exit_sigsafe();
-    if(received_nb_signals > 3)
+    if(received_nb_signals > 3) {
+        write(2/*STDERR_FILENO*/, "Received > 3 system signals, hard exiting\n",
+                           strlen("Received > 3 system signals, hard exiting\n"));
+
         exit(123);
+    }
 }
 
+#if HAVE_SETCONSOLECTRLHANDLER
+static BOOL WINAPI CtrlHandler(DWORD fdwCtrlType)
+{
+    av_log(NULL, AV_LOG_DEBUG, "\nReceived windows signal %ld\n", fdwCtrlType);
+
+    switch (fdwCtrlType)
+    {
+    case CTRL_C_EVENT:
+    case CTRL_BREAK_EVENT:
+        sigterm_handler(SIGINT);
+        return TRUE;
+
+    case CTRL_CLOSE_EVENT:
+    case CTRL_LOGOFF_EVENT:
+    case CTRL_SHUTDOWN_EVENT:
+        sigterm_handler(SIGTERM);
+        /* Basically, with these 3 events, when we return from this method the
+           process is hard terminated, so stall as long as we need to
+           to try and let the main thread(s) clean up and gracefully terminate
+           (we have at most 5 seconds, but should be done far before that). */
+        while (!ffmpeg_exited) {
+            Sleep(0);
+        }
+        return TRUE;
+
+    default:
+        av_log(NULL, AV_LOG_ERROR, "Received unknown windows signal %ld\n", fdwCtrlType);
+        return FALSE;
+    }
+}
+#endif
+
 void term_init(void)
 {
 #if HAVE_TERMIOS_H
     if(!run_as_daemon){
         struct termios tty;
-        int istty = 1;
-#if HAVE_ISATTY
-        istty = isatty(0) && isatty(2);
-#endif
-        if (istty && tcgetattr (0, &tty) == 0) {
+        if (tcgetattr (0, &tty) == 0) {
             oldtty = tty;
             restore_tty = 1;
 
@@ -358,6 +392,9 @@ void term_init(void)
 #ifdef SIGXCPU
     signal(SIGXCPU, sigterm_handler);
 #endif
+#if HAVE_SETCONSOLECTRLHANDLER
+    SetConsoleCtrlHandler((PHANDLER_ROUTINE) CtrlHandler, TRUE);
+#endif
 }
 
 /* read a key without blocking */
@@ -391,10 +428,6 @@ static int read_key(void)
         is_pipe = !GetConsoleMode(input_handle, &dw);
     }
 
-    if (stdin->_cnt > 0) {
-        read(0, &ch, 1);
-        return ch;
-    }
     if (is_pipe) {
         /* When running under a GUI, you will end here. */
         if (!PeekNamedPipe(input_handle, NULL, 0, NULL, &nchars, NULL)) {
@@ -429,7 +462,7 @@ static void ffmpeg_cleanup(int ret)
 
     if (do_benchmark) {
         int maxrss = getmaxrss() / 1024;
-        printf("bench: maxrss=%ikB\n", maxrss);
+        av_log(NULL, AV_LOG_INFO, "bench: maxrss=%ikB\n", maxrss);
     }
 
     for (i = 0; i < nb_filtergraphs; i++) {
@@ -494,6 +527,8 @@ static void ffmpeg_cleanup(int ret)
         av_freep(&ost->audio_channels_map);
         ost->audio_channels_mapped = 0;
 
+        av_dict_free(&ost->sws_dict);
+
         avcodec_free_context(&ost->enc_ctx);
 
         av_freep(&output_streams[i]);
@@ -521,8 +556,12 @@ static void ffmpeg_cleanup(int ret)
         av_freep(&input_streams[i]);
     }
 
-    if (vstats_file)
-        fclose(vstats_file);
+    if (vstats_file) {
+        if (fclose(vstats_file))
+            av_log(NULL, AV_LOG_ERROR,
+                   "Error closing vstats file, loss of information possible: %s\n",
+                   av_err2str(AVERROR(errno)));
+    }
     av_freep(&vstats_filename);
 
     av_freep(&input_streams);
@@ -535,12 +574,13 @@ static void ffmpeg_cleanup(int ret)
     avformat_network_deinit();
 
     if (received_sigterm) {
-        av_log(NULL, AV_LOG_INFO, "Received signal %d: terminating.\n",
+        av_log(NULL, AV_LOG_INFO, "Exiting normally, received signal %d.\n",
                (int) received_sigterm);
     } else if (ret && transcode_init_done) {
         av_log(NULL, AV_LOG_INFO, "Conversion failed!\n");
     }
     term_exit();
+    ffmpeg_exited = 1;
 }
 
 void remove_avoptions(AVDictionary **a, AVDictionary *b)
@@ -577,7 +617,7 @@ static void update_benchmark(const char *fmt, ...)
             va_start(va, fmt);
             vsnprintf(buf, sizeof(buf), fmt, va);
             va_end(va);
-            printf("bench: %8"PRIu64" %s \n", t - current_time, buf);
+            av_log(NULL, AV_LOG_INFO, "bench: %8"PRIu64" %s \n", t - current_time, buf);
         }
         current_time = t;
     }
@@ -599,7 +639,7 @@ static void write_frame(AVFormatContext *s, AVPacket *pkt, OutputStream *ost)
     int ret;
 
     if (!ost->st->codec->extradata_size && ost->enc_ctx->extradata_size) {
-        ost->st->codec->extradata = av_mallocz(ost->enc_ctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+        ost->st->codec->extradata = av_mallocz(ost->enc_ctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
         if (ost->st->codec->extradata) {
             memcpy(ost->st->codec->extradata, ost->enc_ctx->extradata, ost->enc_ctx->extradata_size);
             ost->st->codec->extradata_size = ost->enc_ctx->extradata_size;
@@ -619,56 +659,40 @@ static void write_frame(AVFormatContext *s, AVPacket *pkt, OutputStream *ost)
      */
     if (!(avctx->codec_type == AVMEDIA_TYPE_VIDEO && avctx->codec)) {
         if (ost->frame_number >= ost->max_frames) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return;
         }
         ost->frame_number++;
     }
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        int i;
+        uint8_t *sd = av_packet_get_side_data(pkt, AV_PKT_DATA_QUALITY_STATS,
+                                              NULL);
+        ost->quality = sd ? AV_RL32(sd) : -1;
+        ost->pict_type = sd ? sd[4] : AV_PICTURE_TYPE_NONE;
+
+        for (i = 0; i<FF_ARRAY_ELEMS(ost->error); i++) {
+            if (sd && i < sd[5])
+                ost->error[i] = AV_RL64(sd + 8 + 8*i);
+            else
+                ost->error[i] = -1;
+        }
+
+        if (ost->frame_rate.num && ost->is_cfr) {
+            if (pkt->duration > 0)
+                av_log(NULL, AV_LOG_WARNING, "Overriding packet duration by frame rate, this should not happen\n");
+            pkt->duration = av_rescale_q(1, av_inv_q(ost->frame_rate),
+                                         ost->st->time_base);
+        }
+    }
 
     if (bsfc)
         av_packet_split_side_data(pkt);
 
-    while (bsfc) {
-        AVPacket new_pkt = *pkt;
-        AVDictionaryEntry *bsf_arg = av_dict_get(ost->bsf_args,
-                                                 bsfc->filter->name,
-                                                 NULL, 0);
-        int a = av_bitstream_filter_filter(bsfc, avctx,
-                                           bsf_arg ? bsf_arg->value : NULL,
-                                           &new_pkt.data, &new_pkt.size,
-                                           pkt->data, pkt->size,
-                                           pkt->flags & AV_PKT_FLAG_KEY);
-        if(a == 0 && new_pkt.data != pkt->data && new_pkt.destruct) {
-            uint8_t *t = av_malloc(new_pkt.size + FF_INPUT_BUFFER_PADDING_SIZE); //the new should be a subset of the old so cannot overflow
-            if(t) {
-                memcpy(t, new_pkt.data, new_pkt.size);
-                memset(t + new_pkt.size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
-                new_pkt.data = t;
-                new_pkt.buf = NULL;
-                a = 1;
-            } else
-                a = AVERROR(ENOMEM);
-        }
-        if (a > 0) {
-            pkt->side_data = NULL;
-            pkt->side_data_elems = 0;
-            av_free_packet(pkt);
-            new_pkt.buf = av_buffer_create(new_pkt.data, new_pkt.size,
-                                           av_buffer_default_free, NULL, 0);
-            if (!new_pkt.buf)
-                exit_program(1);
-        } else if (a < 0) {
-            new_pkt = *pkt;
-            av_log(NULL, AV_LOG_ERROR, "Failed to open bitstream filter %s for stream %d with codec %s",
-                   bsfc->filter->name, pkt->stream_index,
-                   avctx->codec ? avctx->codec->name : "copy");
-            print_error("", a);
-            if (exit_on_error)
-                exit_program(1);
-        }
-        *pkt = new_pkt;
-
-        bsfc = bsfc->next;
+    if ((ret = av_apply_bitstream_filters(avctx, pkt, bsfc)) < 0) {
+        print_error("", ret);
+        if (exit_on_error)
+            exit_program(1);
     }
 
     if (!(s->oformat->flags & AVFMT_NOTIMESTAMPS)) {
@@ -729,7 +753,7 @@ static void write_frame(AVFormatContext *s, AVPacket *pkt, OutputStream *ost)
         main_return_code = 1;
         close_all_output_streams(ost, MUXER_FINISHED | ENCODER_FINISHED, ENCODER_FINISHED);
     }
-    av_free_packet(pkt);
+    av_packet_unref(pkt);
 }
 
 static void close_output_stream(OutputStream *ost)
@@ -929,11 +953,11 @@ static void do_video_out(AVFormatContext *s,
                                           ost->last_nb0_frames[1],
                                           ost->last_nb0_frames[2]);
     } else {
-        delta0 = sync_ipts - ost->sync_opts;
+        delta0 = sync_ipts - ost->sync_opts; // delta0 is the "drift" between the input frame (next_picture) and where it would fall in the output.
         delta  = delta0 + duration;
 
         /* by default, we output a single frame */
-        nb0_frames = 0;
+        nb0_frames = 0; // tracks the number of times the PREVIOUS frame should be duplicated, mostly for variable framerate (VFR)
         nb_frames = 1;
 
         format_video_sync = video_sync_method;
@@ -952,25 +976,25 @@ static void do_video_out(AVFormatContext *s,
                 format_video_sync = VSYNC_VSCFR;
             }
         }
+        ost->is_cfr = (format_video_sync == VSYNC_CFR || format_video_sync == VSYNC_VSCFR);
 
         if (delta0 < 0 &&
             delta > 0 &&
             format_video_sync != VSYNC_PASSTHROUGH &&
             format_video_sync != VSYNC_DROP) {
-            double cor = FFMIN(-delta0, duration);
             if (delta0 < -0.6) {
                 av_log(NULL, AV_LOG_WARNING, "Past duration %f too large\n", -delta0);
             } else
-                av_log(NULL, AV_LOG_DEBUG, "Cliping frame in rate conversion by %f\n", -delta0);
-            sync_ipts += cor;
-            duration -= cor;
-            delta0 += cor;
+                av_log(NULL, AV_LOG_DEBUG, "Clipping frame in rate conversion by %f\n", -delta0);
+            sync_ipts = ost->sync_opts;
+            duration += delta0;
+            delta0 = 0;
         }
 
         switch (format_video_sync) {
         case VSYNC_VSCFR:
-            if (ost->frame_number == 0 && delta - duration >= 0.5) {
-                av_log(NULL, AV_LOG_DEBUG, "Not duplicating %d initial frames\n", (int)lrintf(delta - duration));
+            if (ost->frame_number == 0 && delta0 >= 0.5) {
+                av_log(NULL, AV_LOG_DEBUG, "Not duplicating %d initial frames\n", (int)lrintf(delta0));
                 delta = duration;
                 delta0 = 0;
                 ost->sync_opts = lrint(sync_ipts);
@@ -1010,22 +1034,22 @@ static void do_video_out(AVFormatContext *s,
             sizeof(ost->last_nb0_frames[0]) * (FF_ARRAY_ELEMS(ost->last_nb0_frames) - 1));
     ost->last_nb0_frames[0] = nb0_frames;
 
-    if (nb0_frames == 0 && ost->last_droped) {
+    if (nb0_frames == 0 && ost->last_dropped) {
         nb_frames_drop++;
         av_log(NULL, AV_LOG_VERBOSE,
                "*** dropping frame %d from stream %d at ts %"PRId64"\n",
                ost->frame_number, ost->st->index, ost->last_frame->pts);
     }
-    if (nb_frames > (nb0_frames && ost->last_droped) + (nb_frames > nb0_frames)) {
+    if (nb_frames > (nb0_frames && ost->last_dropped) + (nb_frames > nb0_frames)) {
         if (nb_frames > dts_error_threshold * 30) {
             av_log(NULL, AV_LOG_ERROR, "%d frame duplication too large, skipping\n", nb_frames - 1);
             nb_frames_drop++;
             return;
         }
-        nb_frames_dup += nb_frames - (nb0_frames && ost->last_droped) - (nb_frames > nb0_frames);
+        nb_frames_dup += nb_frames - (nb0_frames && ost->last_dropped) - (nb_frames > nb0_frames);
         av_log(NULL, AV_LOG_VERBOSE, "*** %d dup!\n", nb_frames - 1);
     }
-    ost->last_droped = nb_frames == nb0_frames && next_picture;
+    ost->last_dropped = nb_frames == nb0_frames && next_picture;
 
   /* duplicates frame if needed */
   for (i = 0; i < nb_frames; i++) {
@@ -1051,6 +1075,7 @@ static void do_video_out(AVFormatContext *s,
 #endif
         return;
 
+#if FF_API_LAVF_FMT_RAWPICTURE
     if (s->oformat->flags & AVFMT_RAWPICTURE &&
         enc->codec->id == AV_CODEC_ID_RAWVIDEO) {
         /* raw pictures are written as AVPicture structure to
@@ -1066,11 +1091,13 @@ static void do_video_out(AVFormatContext *s,
         pkt.flags |= AV_PKT_FLAG_KEY;
 
         write_frame(s, &pkt, ost);
-    } else {
+    } else
+#endif
+    {
         int got_packet, forced_keyframe = 0;
         double pts_time;
 
-        if (enc->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME) &&
+        if (enc->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME) &&
             ost->top_field_first >= 0)
             in_picture->top_field_first = !!ost->top_field_first;
 
@@ -1096,7 +1123,7 @@ static void do_video_out(AVFormatContext *s,
             ost->forced_keyframes_expr_const_values[FKF_T] = pts_time;
             res = av_expr_eval(ost->forced_keyframes_pexpr,
                                ost->forced_keyframes_expr_const_values, NULL);
-            av_dlog(NULL, "force_key_frame: n:%f n_forced:%f prev_forced_n:%f t:%f prev_forced_t:%f -> res:%f\n",
+            ff_dlog(NULL, "force_key_frame: n:%f n_forced:%f prev_forced_n:%f t:%f prev_forced_t:%f -> res:%f\n",
                     ost->forced_keyframes_expr_const_values[FKF_N],
                     ost->forced_keyframes_expr_const_values[FKF_N_FORCED],
                     ost->forced_keyframes_expr_const_values[FKF_PREV_FORCED_N],
@@ -1149,7 +1176,7 @@ static void do_video_out(AVFormatContext *s,
                        av_ts2str(pkt.dts), av_ts2timestr(pkt.dts, &enc->time_base));
             }
 
-            if (pkt.pts == AV_NOPTS_VALUE && !(enc->codec->capabilities & CODEC_CAP_DELAY))
+            if (pkt.pts == AV_NOPTS_VALUE && !(enc->codec->capabilities & AV_CODEC_CAP_DELAY))
                 pkt.pts = ost->sync_opts;
 
             av_packet_rescale_ts(&pkt, enc->time_base, ost->st->time_base);
@@ -1193,7 +1220,7 @@ static void do_video_out(AVFormatContext *s,
 
 static double psnr(double d)
 {
-    return -10.0 * log(d) / log(10.0);
+    return -10.0 * log10(d);
 }
 
 static void do_video_stats(OutputStream *ost, int frame_size)
@@ -1214,9 +1241,11 @@ static void do_video_stats(OutputStream *ost, int frame_size)
     enc = ost->enc_ctx;
     if (enc->codec_type == AVMEDIA_TYPE_VIDEO) {
         frame_number = ost->st->nb_frames;
-        fprintf(vstats_file, "frame= %5d q= %2.1f ", frame_number, enc->coded_frame ? enc->coded_frame->quality / (float)FF_QP2LAMBDA : 0);
-        if (enc->coded_frame && (enc->flags&CODEC_FLAG_PSNR))
-            fprintf(vstats_file, "PSNR= %6.2f ", psnr(enc->coded_frame->error[0] / (enc->width * enc->height * 255.0 * 255.0)));
+        fprintf(vstats_file, "frame= %5d q= %2.1f ", frame_number,
+                ost->quality / (float)FF_QP2LAMBDA);
+
+        if (ost->error[0]>=0 && (enc->flags & AV_CODEC_FLAG_PSNR))
+            fprintf(vstats_file, "PSNR= %6.2f ", psnr(ost->error[0] / (enc->width * enc->height * 255.0 * 255.0)));
 
         fprintf(vstats_file,"f_size= %6d ", frame_size);
         /* compute pts value */
@@ -1228,7 +1257,7 @@ static void do_video_stats(OutputStream *ost, int frame_size)
         avg_bitrate = (double)(ost->data_size * 8) / ti1 / 1000.0;
         fprintf(vstats_file, "s_size= %8.0fkB time= %0.3f br= %7.1fkbits/s avg_br= %7.1fkbits/s ",
                (double)ost->data_size / 1024, ti1, bitrate, avg_bitrate);
-        fprintf(vstats_file, "type= %c\n", enc->coded_frame ? av_get_picture_type_char(enc->coded_frame->pict_type) : 'I');
+        fprintf(vstats_file, "type= %c\n", av_get_picture_type_char(ost->pict_type));
     }
 }
 
@@ -1326,7 +1355,7 @@ static int reap_filters(int flush)
                 do_video_out(of->ctx, ost, filtered_frame, float_pts);
                 break;
             case AVMEDIA_TYPE_AUDIO:
-                if (!(enc->codec->capabilities & CODEC_CAP_PARAM_CHANGE) &&
+                if (!(enc->codec->capabilities & AV_CODEC_CAP_PARAM_CHANGE) &&
                     enc->channels != av_frame_get_channels(filtered_frame)) {
                     av_log(NULL, AV_LOG_ERROR,
                            "Audio filter graph output is not normalized and encoder does not support parameter changes\n");
@@ -1365,8 +1394,8 @@ static void print_final_stats(int64_t total_size)
         }
         extra_size += ost->enc_ctx->extradata_size;
         data_size  += ost->data_size;
-        if (   (ost->enc_ctx->flags & (CODEC_FLAG_PASS1 | CODEC_FLAG_PASS2))
-            != CODEC_FLAG_PASS1)
+        if (   (ost->enc_ctx->flags & (AV_CODEC_FLAG_PASS1 | CODEC_FLAG_PASS2))
+            != AV_CODEC_FLAG_PASS1)
             pass1_used = 0;
     }
 
@@ -1473,10 +1502,13 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti
     AVCodecContext *enc;
     int frame_number, vid, i;
     double bitrate;
-    int64_t pts = INT64_MIN;
+    double speed;
+    int64_t pts = INT64_MIN + 1;
     static int64_t last_time = -1;
     static int qp_histogram[52];
     int hours, mins, secs, us;
+    int ret;
+    float t;
 
     if (!print_stats && !is_last_report && !progress_avio)
         return;
@@ -1491,6 +1523,8 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti
         last_time = cur_time;
     }
 
+    t = (cur_time-timer_start) / 1000000.0;
+
 
     oc = output_files[0]->ctx;
 
@@ -1505,15 +1539,16 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti
         float q = -1;
         ost = output_streams[i];
         enc = ost->enc_ctx;
-        if (!ost->stream_copy && enc->coded_frame)
-            q = enc->coded_frame->quality / (float)FF_QP2LAMBDA;
+        if (!ost->stream_copy)
+            q = ost->quality / (float) FF_QP2LAMBDA;
+
         if (vid && enc->codec_type == AVMEDIA_TYPE_VIDEO) {
             snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "q=%2.1f ", q);
             av_bprintf(&buf_script, "stream_%d_%d_q=%.1f\n",
                        ost->file_index, ost->index, q);
         }
         if (!vid && enc->codec_type == AVMEDIA_TYPE_VIDEO) {
-            float fps, t = (cur_time-timer_start) / 1000000.0;
+            float fps;
 
             frame_number = ost->frame_number;
             fps = t > 1 ? frame_number / t : 0;
@@ -1531,9 +1566,10 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti
                 if (qp >= 0 && qp < FF_ARRAY_ELEMS(qp_histogram))
                     qp_histogram[qp]++;
                 for (j = 0; j < 32; j++)
-                    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", (int)lrintf(log2(qp_histogram[j] + 1)));
+                    snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%X", av_log2(qp_histogram[j] + 1));
             }
-            if ((enc->flags&CODEC_FLAG_PSNR) && (enc->coded_frame || is_last_report)) {
+
+            if ((enc->flags & AV_CODEC_FLAG_PSNR) && (ost->pict_type != AV_PICTURE_TYPE_NONE || is_last_report)) {
                 int j;
                 double error, error_sum = 0;
                 double scale, scale_sum = 0;
@@ -1545,7 +1581,7 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti
                         error = enc->error[j];
                         scale = enc->width * enc->height * 255.0 * 255.0 * frame_number;
                     } else {
-                        error = enc->coded_frame->error[j];
+                        error = ost->error[j];
                         scale = enc->width * enc->height * 255.0 * 255.0;
                     }
                     if (j)
@@ -1569,7 +1605,7 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti
             pts = FFMAX(pts, av_rescale_q(av_stream_get_end_pts(ost->st),
                                           ost->st->time_base, AV_TIME_BASE_Q));
         if (is_last_report)
-            nb_frames_drop += ost->last_droped;
+            nb_frames_drop += ost->last_dropped;
     }
 
     secs = FFABS(pts) / AV_TIME_BASE;
@@ -1580,6 +1616,7 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti
     mins %= 60;
 
     bitrate = pts && total_size >= 0 ? total_size * 8 / (pts / 1000.0) : -1;
+    speed = t != 0.0 ? (double)pts / AV_TIME_BASE / t : -1;
 
     if (total_size < 0) snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
                                  "size=N/A time=");
@@ -1611,6 +1648,14 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti
     av_bprintf(&buf_script, "dup_frames=%d\n", nb_frames_dup);
     av_bprintf(&buf_script, "drop_frames=%d\n", nb_frames_drop);
 
+    if (speed < 0) {
+        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf)," speed=N/A");
+        av_bprintf(&buf_script, "speed=N/A\n");
+    } else {
+        snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf)," speed=%4.3gx", speed);
+        av_bprintf(&buf_script, "speed=%4.3gx\n", speed);
+    }
+
     if (print_stats || is_last_report) {
         const char end = is_last_report ? '\n' : '\r';
         if (print_stats==1 && AV_LOG_INFO > av_log_get_level()) {
@@ -1629,7 +1674,9 @@ static void print_report(int is_last_report, int64_t timer_start, int64_t cur_ti
         avio_flush(progress_avio);
         av_bprint_finalize(&buf_script, NULL);
         if (is_last_report) {
-            avio_closep(&progress_avio);
+            if ((ret = avio_closep(&progress_avio)) < 0)
+                av_log(NULL, AV_LOG_ERROR,
+                       "Error closing progress log, loss of information possible: %s\n", av_err2str(ret));
         }
     }
 
@@ -1652,8 +1699,10 @@ static void flush_encoders(void)
 
         if (enc->codec_type == AVMEDIA_TYPE_AUDIO && enc->frame_size <= 1)
             continue;
+#if FF_API_LAVF_FMT_RAWPICTURE
         if (enc->codec_type == AVMEDIA_TYPE_VIDEO && (os->oformat->flags & AVFMT_RAWPICTURE) && enc->codec->id == AV_CODEC_ID_RAWVIDEO)
             continue;
+#endif
 
         for (;;) {
             int (*encode)(AVCodecContext*, AVPacket*, const AVFrame*, int*) = NULL;
@@ -1662,11 +1711,11 @@ static void flush_encoders(void)
             switch (enc->codec_type) {
             case AVMEDIA_TYPE_AUDIO:
                 encode = avcodec_encode_audio2;
-                desc   = "Audio";
+                desc   = "audio";
                 break;
             case AVMEDIA_TYPE_VIDEO:
                 encode = avcodec_encode_video2;
-                desc   = "Video";
+                desc   = "video";
                 break;
             default:
                 stop_encoding = 1;
@@ -1682,9 +1731,11 @@ static void flush_encoders(void)
 
                 update_benchmark(NULL);
                 ret = encode(enc, &pkt, NULL, &got_packet);
-                update_benchmark("flush %s %d.%d", desc, ost->file_index, ost->index);
+                update_benchmark("flush_%s %d.%d", desc, ost->file_index, ost->index);
                 if (ret < 0) {
-                    av_log(NULL, AV_LOG_FATAL, "%s encoding failed\n", desc);
+                    av_log(NULL, AV_LOG_FATAL, "%s encoding failed: %s\n",
+                           desc,
+                           av_err2str(ret));
                     exit_program(1);
                 }
                 if (ost->logfile && enc->stats_out) {
@@ -1695,7 +1746,7 @@ static void flush_encoders(void)
                     break;
                 }
                 if (ost->finished & MUXER_FINISHED) {
-                    av_free_packet(&pkt);
+                    av_packet_unref(&pkt);
                     continue;
                 }
                 av_packet_rescale_ts(&pkt, enc->time_base, ost->st->time_base);
@@ -1738,7 +1789,6 @@ static void do_streamcopy(InputStream *ist, OutputStream *ost, const AVPacket *p
     InputFile   *f = input_files [ist->file_index];
     int64_t start_time = (of->start_time == AV_NOPTS_VALUE) ? 0 : of->start_time;
     int64_t ost_tb_start_time = av_rescale_q(start_time, AV_TIME_BASE_Q, ost->st->time_base);
-    int64_t ist_tb_start_time = av_rescale_q(start_time, AV_TIME_BASE_Q, ist->st->time_base);
     AVPicture pict;
     AVPacket opkt;
 
@@ -1748,13 +1798,13 @@ static void do_streamcopy(InputStream *ist, OutputStream *ost, const AVPacket *p
         !ost->copy_initial_nonkeyframes)
         return;
 
-    if (pkt->pts == AV_NOPTS_VALUE) {
-        if (!ost->frame_number && ist->pts < start_time &&
-            !ost->copy_prior_start)
-            return;
-    } else {
-        if (!ost->frame_number && pkt->pts < ist_tb_start_time &&
-            !ost->copy_prior_start)
+    if (!ost->frame_number && !ost->copy_prior_start) {
+        int64_t comp_start = start_time;
+        if (copy_ts && f->start_time != AV_NOPTS_VALUE)
+            comp_start = FFMAX(start_time, f->start_time + f->ts_offset);
+        if (pkt->pts == AV_NOPTS_VALUE ?
+            ist->pts < comp_start :
+            pkt->pts < av_rescale_q(comp_start, AV_TIME_BASE_Q, ist->st->time_base))
             return;
     }
 
@@ -1766,7 +1816,7 @@ static void do_streamcopy(InputStream *ist, OutputStream *ost, const AVPacket *p
 
     if (f->recording_time != INT64_MAX) {
         start_time = f->ctx->start_time;
-        if (f->start_time != AV_NOPTS_VALUE)
+        if (f->start_time != AV_NOPTS_VALUE && copy_ts)
             start_time += f->start_time;
         if (ist->pts >= f->recording_time + start_time) {
             close_output_stream(ost);
@@ -1800,17 +1850,22 @@ static void do_streamcopy(InputStream *ist, OutputStream *ost, const AVPacket *p
 
     opkt.duration = av_rescale_q(pkt->duration, ist->st->time_base, ost->st->time_base);
     opkt.flags    = pkt->flags;
-
     // FIXME remove the following 2 lines they shall be replaced by the bitstream filters
-    if (  ost->enc_ctx->codec_id != AV_CODEC_ID_H264
-       && ost->enc_ctx->codec_id != AV_CODEC_ID_MPEG1VIDEO
-       && ost->enc_ctx->codec_id != AV_CODEC_ID_MPEG2VIDEO
-       && ost->enc_ctx->codec_id != AV_CODEC_ID_VC1
+    if (  ost->st->codec->codec_id != AV_CODEC_ID_H264
+       && ost->st->codec->codec_id != AV_CODEC_ID_MPEG1VIDEO
+       && ost->st->codec->codec_id != AV_CODEC_ID_MPEG2VIDEO
+       && ost->st->codec->codec_id != AV_CODEC_ID_VC1
        ) {
-        if (av_parser_change(ost->parser, ost->st->codec,
+        int ret = av_parser_change(ost->parser, ost->st->codec,
                              &opkt.data, &opkt.size,
                              pkt->data, pkt->size,
-                             pkt->flags & AV_PKT_FLAG_KEY)) {
+                             pkt->flags & AV_PKT_FLAG_KEY);
+        if (ret < 0) {
+            av_log(NULL, AV_LOG_FATAL, "av_parser_change failed: %s\n",
+                   av_err2str(ret));
+            exit_program(1);
+        }
+        if (ret) {
             opkt.buf = av_buffer_create(opkt.data, opkt.size, av_buffer_default_free, NULL, 0);
             if (!opkt.buf)
                 exit_program(1);
@@ -1821,13 +1876,22 @@ static void do_streamcopy(InputStream *ist, OutputStream *ost, const AVPacket *p
     }
     av_copy_packet_side_data(&opkt, pkt);
 
-    if (ost->st->codec->codec_type == AVMEDIA_TYPE_VIDEO && (of->ctx->oformat->flags & AVFMT_RAWPICTURE)) {
+#if FF_API_LAVF_FMT_RAWPICTURE
+    if (ost->st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
+        ost->st->codec->codec_id == AV_CODEC_ID_RAWVIDEO &&
+        (of->ctx->oformat->flags & AVFMT_RAWPICTURE)) {
         /* store AVPicture in AVPacket, as expected by the output format */
-        avpicture_fill(&pict, opkt.data, ost->st->codec->pix_fmt, ost->st->codec->width, ost->st->codec->height);
+        int ret = avpicture_fill(&pict, opkt.data, ost->st->codec->pix_fmt, ost->st->codec->width, ost->st->codec->height);
+        if (ret < 0) {
+            av_log(NULL, AV_LOG_FATAL, "avpicture_fill failed: %s\n",
+                   av_err2str(ret));
+            exit_program(1);
+        }
         opkt.data = (uint8_t *)&pict;
         opkt.size = sizeof(AVPicture);
         opkt.flags |= AV_PKT_FLAG_KEY;
     }
+#endif
 
     write_frame(of->ctx, &opkt, ost);
 }
@@ -1852,6 +1916,22 @@ int guess_input_channel_layout(InputStream *ist)
     return 1;
 }
 
+static void check_decode_result(InputStream *ist, int *got_output, int ret)
+{
+    if (*got_output || ret<0)
+        decode_error_stat[ret<0] ++;
+
+    if (ret < 0 && exit_on_error)
+        exit_program(1);
+
+    if (exit_on_error && *got_output && ist) {
+        if (av_frame_get_decode_error_flags(ist->decoded_frame) || (ist->decoded_frame->flags & AV_FRAME_FLAG_CORRUPT)) {
+            av_log(NULL, AV_LOG_FATAL, "%s: corrupt decoded frame in stream %d\n", input_files[ist->file_index]->ctx->filename, ist->st->index);
+            exit_program(1);
+        }
+    }
+}
+
 static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
 {
     AVFrame *decoded_frame, *f;
@@ -1874,23 +1954,10 @@ static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
         ret = AVERROR_INVALIDDATA;
     }
 
-    if (*got_output || ret<0)
-        decode_error_stat[ret<0] ++;
-
-    if (ret < 0 && exit_on_error)
-        exit_program(1);
+    check_decode_result(ist, got_output, ret);
 
-    if (!*got_output || ret < 0) {
-        if (!pkt->size) {
-            for (i = 0; i < ist->nb_filters; i++)
-#if 1
-                av_buffersrc_add_ref(ist->filters[i]->filter, NULL, 0);
-#else
-                av_buffersrc_add_frame(ist->filters[i]->filter, NULL);
-#endif
-        }
+    if (!*got_output || ret < 0)
         return ret;
-    }
 
     ist->samples_decoded += decoded_frame->nb_samples;
     ist->frames_decoded++;
@@ -1967,6 +2034,7 @@ static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
         decoded_frame->pts = av_rescale_delta(decoded_frame_tb, decoded_frame->pts,
                                               (AVRational){1, avctx->sample_rate}, decoded_frame->nb_samples, &ist->filter_in_rescale_delta_last,
                                               (AVRational){1, avctx->sample_rate});
+    ist->nb_samples = decoded_frame->nb_samples;
     for (i = 0; i < ist->nb_filters; i++) {
         if (i < ist->nb_filters - 1) {
             f = ist->filter_frame;
@@ -2014,19 +2082,16 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output)
         if (ist->dec_ctx->codec_id == AV_CODEC_ID_H264) {
             ist->st->codec->has_b_frames = ist->dec_ctx->has_b_frames;
         } else
-            av_log_ask_for_sample(
-                ist->dec_ctx,
-                "has_b_frames is larger in decoder than demuxer %d > %d ",
-                ist->dec_ctx->has_b_frames,
-                ist->st->codec->has_b_frames
-            );
+            av_log(ist->dec_ctx, AV_LOG_WARNING,
+                   "has_b_frames is larger in decoder than demuxer %d > %d.\n"
+                   "If you want to help, upload a sample "
+                   "of this file to ftp://upload.ffmpeg.org/incoming/ "
+                   "and contact the ffmpeg-devel mailing list. (ffmpeg-devel@ffmpeg.org)",
+                   ist->dec_ctx->has_b_frames,
+                   ist->st->codec->has_b_frames);
     }
 
-    if (*got_output || ret<0)
-        decode_error_stat[ret<0] ++;
-
-    if (ret < 0 && exit_on_error)
-        exit_program(1);
+    check_decode_result(ist, got_output, ret);
 
     if (*got_output && ret >= 0) {
         if (ist->dec_ctx->width  != decoded_frame->width ||
@@ -2042,17 +2107,8 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output)
         }
     }
 
-    if (!*got_output || ret < 0) {
-        if (!pkt->size) {
-            for (i = 0; i < ist->nb_filters; i++)
-#if 1
-                av_buffersrc_add_ref(ist->filters[i]->filter, NULL, 0);
-#else
-                av_buffersrc_add_frame(ist->filters[i]->filter, NULL);
-#endif
-        }
+    if (!*got_output || ret < 0)
         return ret;
-    }
 
     if(ist->top_field_first>=0)
         decoded_frame->top_field_first = ist->top_field_first;
@@ -2143,11 +2199,7 @@ static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output)
     int i, ret = avcodec_decode_subtitle2(ist->dec_ctx,
                                           &subtitle, got_output, pkt);
 
-    if (*got_output || ret<0)
-        decode_error_stat[ret<0] ++;
-
-    if (ret < 0 && exit_on_error)
-        exit_program(1);
+    check_decode_result(NULL, got_output, ret);
 
     if (ret < 0 || !*got_output) {
         if (!pkt->size)
@@ -2200,8 +2252,19 @@ static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output)
     return ret;
 }
 
+static int send_filter_eof(InputStream *ist)
+{
+    int i, ret;
+    for (i = 0; i < ist->nb_filters; i++) {
+        ret = av_buffersrc_add_frame(ist->filters[i]->filter, NULL);
+        if (ret < 0)
+            return ret;
+    }
+    return 0;
+}
+
 /* pkt = NULL means EOF (needed to flush decoder buffers) */
-static int process_input_packet(InputStream *ist, const AVPacket *pkt)
+static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eof)
 {
     int ret = 0, i;
     int got_output = 0;
@@ -2247,7 +2310,7 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt)
         ist->dts = ist->next_dts;
 
         if (avpkt.size && avpkt.size != pkt->size &&
-            !(ist->dec->capabilities & CODEC_CAP_SUBFRAMES)) {
+            !(ist->dec->capabilities & AV_CODEC_CAP_SUBFRAMES)) {
             av_log(NULL, ist->showed_multi_packet_warning ? AV_LOG_VERBOSE : AV_LOG_WARNING,
                    "Multiple frames in a packet from stream %d\n", pkt->stream_index);
             ist->showed_multi_packet_warning = 1;
@@ -2284,8 +2347,13 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt)
             return -1;
         }
 
-        if (ret < 0)
-            return ret;
+        if (ret < 0) {
+            av_log(NULL, AV_LOG_ERROR, "Error while decoding stream #%d:%d: %s\n",
+                   ist->file_index, ist->st->index, av_err2str(ret));
+            if (exit_on_error)
+                exit_program(1);
+            break;
+        }
 
         avpkt.dts=
         avpkt.pts= AV_NOPTS_VALUE;
@@ -2304,6 +2372,16 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt)
             break;
     }
 
+    /* after flushing, send an EOF on all the filter inputs attached to the stream */
+    /* except when looping we need to flush but not to send an EOF */
+    if (!pkt && ist->decoding_needed && !got_output && !no_eof) {
+        int ret = send_filter_eof(ist);
+        if (ret < 0) {
+            av_log(NULL, AV_LOG_FATAL, "Error marking filters as finished\n");
+            exit_program(1);
+        }
+    }
+
     /* handle stream copy */
     if (!ist->decoding_needed) {
         ist->dts = ist->next_dts;
@@ -2360,6 +2438,9 @@ static void print_sdp(void)
         }
     }
 
+    if (!j)
+        goto fail;
+
     av_sdp_create(avc, j, sdp, sizeof(sdp));
 
     if (!sdp_filename) {
@@ -2375,6 +2456,7 @@ static void print_sdp(void)
         }
     }
 
+fail:
     av_freep(&avc);
 }
 
@@ -2491,8 +2573,98 @@ static InputStream *get_input_stream(OutputStream *ost)
 
 static int compare_int64(const void *a, const void *b)
 {
-    int64_t va = *(int64_t *)a, vb = *(int64_t *)b;
-    return va < vb ? -1 : va > vb ? +1 : 0;
+    return FFDIFFSIGN(*(const int64_t *)a, *(const int64_t *)b);
+}
+
+static int init_output_stream(OutputStream *ost, char *error, int error_len)
+{
+    int ret = 0;
+
+    if (ost->encoding_needed) {
+        AVCodec      *codec = ost->enc;
+        AVCodecContext *dec = NULL;
+        InputStream *ist;
+
+        if ((ist = get_input_stream(ost)))
+            dec = ist->dec_ctx;
+        if (dec && dec->subtitle_header) {
+            /* ASS code assumes this buffer is null terminated so add extra byte. */
+            ost->enc_ctx->subtitle_header = av_mallocz(dec->subtitle_header_size + 1);
+            if (!ost->enc_ctx->subtitle_header)
+                return AVERROR(ENOMEM);
+            memcpy(ost->enc_ctx->subtitle_header, dec->subtitle_header, dec->subtitle_header_size);
+            ost->enc_ctx->subtitle_header_size = dec->subtitle_header_size;
+        }
+        if (!av_dict_get(ost->encoder_opts, "threads", NULL, 0))
+            av_dict_set(&ost->encoder_opts, "threads", "auto", 0);
+        if (ost->enc->type == AVMEDIA_TYPE_AUDIO &&
+            !codec->defaults &&
+            !av_dict_get(ost->encoder_opts, "b", NULL, 0) &&
+            !av_dict_get(ost->encoder_opts, "ab", NULL, 0))
+            av_dict_set(&ost->encoder_opts, "b", "128000", 0);
+
+        if ((ret = avcodec_open2(ost->enc_ctx, codec, &ost->encoder_opts)) < 0) {
+            if (ret == AVERROR_EXPERIMENTAL)
+                abort_codec_experimental(codec, 1);
+            snprintf(error, error_len,
+                     "Error while opening encoder for output stream #%d:%d - "
+                     "maybe incorrect parameters such as bit_rate, rate, width or height",
+                    ost->file_index, ost->index);
+            return ret;
+        }
+        if (ost->enc->type == AVMEDIA_TYPE_AUDIO &&
+            !(ost->enc->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE))
+            av_buffersink_set_frame_size(ost->filter->filter,
+                                            ost->enc_ctx->frame_size);
+        assert_avoptions(ost->encoder_opts);
+        if (ost->enc_ctx->bit_rate && ost->enc_ctx->bit_rate < 1000)
+            av_log(NULL, AV_LOG_WARNING, "The bitrate parameter is set too low."
+                                         " It takes bits/s as argument, not kbits/s\n");
+
+        ret = avcodec_copy_context(ost->st->codec, ost->enc_ctx);
+        if (ret < 0) {
+            av_log(NULL, AV_LOG_FATAL,
+                   "Error initializing the output stream codec context.\n");
+            exit_program(1);
+        }
+
+        if (ost->enc_ctx->nb_coded_side_data) {
+            int i;
+
+            ost->st->side_data = av_realloc_array(NULL, ost->enc_ctx->nb_coded_side_data,
+                                                  sizeof(*ost->st->side_data));
+            if (!ost->st->side_data)
+                return AVERROR(ENOMEM);
+
+            for (i = 0; i < ost->enc_ctx->nb_coded_side_data; i++) {
+                const AVPacketSideData *sd_src = &ost->enc_ctx->coded_side_data[i];
+                AVPacketSideData *sd_dst = &ost->st->side_data[i];
+
+                sd_dst->data = av_malloc(sd_src->size);
+                if (!sd_dst->data)
+                    return AVERROR(ENOMEM);
+                memcpy(sd_dst->data, sd_src->data, sd_src->size);
+                sd_dst->size = sd_src->size;
+                sd_dst->type = sd_src->type;
+                ost->st->nb_side_data++;
+            }
+        }
+
+        // copy timebase while removing common factors
+        ost->st->time_base = av_add_q(ost->enc_ctx->time_base, (AVRational){0, 1});
+        ost->st->codec->codec= ost->enc_ctx->codec;
+    } else {
+        ret = av_opt_set_dict(ost->enc_ctx, &ost->encoder_opts);
+        if (ret < 0) {
+           av_log(NULL, AV_LOG_FATAL,
+                  "Error setting up codec context options.\n");
+           return ret;
+        }
+        // copy timebase while removing common factors
+        ost->st->time_base = av_add_q(ost->st->codec->time_base, (AVRational){0, 1});
+    }
+
+    return ret;
 }
 
 static void parse_forced_key_frames(char *kf, OutputStream *ost,
@@ -2605,7 +2777,7 @@ static void set_encoder_id(OutputFile *of, OutputStream *ost)
     if (!encoder_string)
         exit_program(1);
 
-    if (!(format_flags & AVFMT_FLAG_BITEXACT) && !(codec_flags & CODEC_FLAG_BITEXACT))
+    if (!(format_flags & AVFMT_FLAG_BITEXACT) && !(codec_flags & AV_CODEC_FLAG_BITEXACT))
         av_strlcpy(encoder_string, LIBAVCODEC_IDENT " ", encoder_string_len);
     else
         av_strlcpy(encoder_string, "Lavc ", encoder_string_len);
@@ -2646,21 +2818,6 @@ static int transcode_init(void)
                 input_streams[j + ifile->ist_index]->start = av_gettime_relative();
     }
 
-    /* output stream init */
-    for (i = 0; i < nb_output_files; i++) {
-        oc = output_files[i]->ctx;
-        if (!oc->nb_streams && !(oc->oformat->flags & AVFMT_NOSTREAMS)) {
-            av_dump_format(oc, i, oc->filename, 1);
-            av_log(NULL, AV_LOG_ERROR, "Output file #%d does not contain any stream\n", i);
-            return AVERROR(EINVAL);
-        }
-    }
-
-    /* init complex filtergraphs */
-    for (i = 0; i < nb_filtergraphs; i++)
-        if ((ret = avfilter_graph_config(filtergraphs[i]->graph, NULL)) < 0)
-            return ret;
-
     /* for each output stream, we compute the right encoding parameters */
     for (i = 0; i < nb_output_streams; i++) {
         AVCodecContext *enc_ctx;
@@ -2697,7 +2854,7 @@ static int transcode_init(void)
 
             av_assert0(ist && !ost->filter);
 
-            extra_size = (uint64_t)dec_ctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE;
+            extra_size = (uint64_t)dec_ctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE;
 
             if (extra_size > INT_MAX) {
                 return AVERROR(EINVAL);
@@ -2772,7 +2929,7 @@ static int transcode_init(void)
                 enc_ctx->time_base = dec_ctx->time_base;
             }
 
-            if (ist && !ost->frame_rate.num)
+            if (!ost->frame_rate.num)
                 ost->frame_rate = ist->framerate;
             if(ost->frame_rate.num)
                 enc_ctx->time_base = av_inv_q(ost->frame_rate);
@@ -2819,6 +2976,7 @@ static int transcode_init(void)
                 enc_ctx->audio_service_type = dec_ctx->audio_service_type;
                 enc_ctx->block_align        = dec_ctx->block_align;
                 enc_ctx->initial_padding    = dec_ctx->delay;
+                enc_ctx->profile            = dec_ctx->profile;
 #if FF_API_AUDIOENC_DELAY
                 enc_ctx->delay              = dec_ctx->delay;
 #endif
@@ -2869,12 +3027,13 @@ static int transcode_init(void)
                 goto dump_format;
             }
 
-            if (ist)
-                ist->decoding_needed |= DECODING_FOR_OST;
-            ost->encoding_needed = 1;
-
             set_encoder_id(output_files[ost->file_index], ost);
 
+#if CONFIG_LIBMFX
+            if (qsv_transcode_init(ost))
+                exit_program(1);
+#endif
+
             if (!ost->filter &&
                 (enc_ctx->codec_type == AVMEDIA_TYPE_VIDEO ||
                  enc_ctx->codec_type == AVMEDIA_TYPE_AUDIO)) {
@@ -3001,39 +3160,6 @@ static int transcode_init(void)
                 abort();
                 break;
             }
-            /* two pass mode */
-            if (enc_ctx->flags & (CODEC_FLAG_PASS1 | CODEC_FLAG_PASS2)) {
-                char logfilename[1024];
-                FILE *f;
-
-                snprintf(logfilename, sizeof(logfilename), "%s-%d.log",
-                         ost->logfile_prefix ? ost->logfile_prefix :
-                                               DEFAULT_PASS_LOGFILENAME_PREFIX,
-                         i);
-                if (!strcmp(ost->enc->name, "libx264")) {
-                    av_dict_set(&ost->encoder_opts, "stats", logfilename, AV_DICT_DONT_OVERWRITE);
-                } else {
-                    if (enc_ctx->flags & CODEC_FLAG_PASS2) {
-                        char  *logbuffer;
-                        size_t logbuffer_size;
-                        if (cmdutils_read_file(logfilename, &logbuffer, &logbuffer_size) < 0) {
-                            av_log(NULL, AV_LOG_FATAL, "Error reading log file '%s' for pass-2 encoding\n",
-                                   logfilename);
-                            exit_program(1);
-                        }
-                        enc_ctx->stats_in = logbuffer;
-                    }
-                    if (enc_ctx->flags & CODEC_FLAG_PASS1) {
-                        f = av_fopen_utf8(logfilename, "wb");
-                        if (!f) {
-                            av_log(NULL, AV_LOG_FATAL, "Cannot write log file '%s' for pass-1 encoding: %s\n",
-                                logfilename, strerror(errno));
-                            exit_program(1);
-                        }
-                        ost->logfile = f;
-                    }
-                }
-            }
         }
 
         if (ost->disposition) {
@@ -3070,63 +3196,9 @@ static int transcode_init(void)
 
     /* open each encoder */
     for (i = 0; i < nb_output_streams; i++) {
-        ost = output_streams[i];
-        if (ost->encoding_needed) {
-            AVCodec      *codec = ost->enc;
-            AVCodecContext *dec = NULL;
-
-            if ((ist = get_input_stream(ost)))
-                dec = ist->dec_ctx;
-            if (dec && dec->subtitle_header) {
-                /* ASS code assumes this buffer is null terminated so add extra byte. */
-                ost->enc_ctx->subtitle_header = av_mallocz(dec->subtitle_header_size + 1);
-                if (!ost->enc_ctx->subtitle_header) {
-                    ret = AVERROR(ENOMEM);
-                    goto dump_format;
-                }
-                memcpy(ost->enc_ctx->subtitle_header, dec->subtitle_header, dec->subtitle_header_size);
-                ost->enc_ctx->subtitle_header_size = dec->subtitle_header_size;
-            }
-            if (!av_dict_get(ost->encoder_opts, "threads", NULL, 0))
-                av_dict_set(&ost->encoder_opts, "threads", "auto", 0);
-            av_dict_set(&ost->encoder_opts, "side_data_only_packets", "1", 0);
-
-            if ((ret = avcodec_open2(ost->enc_ctx, codec, &ost->encoder_opts)) < 0) {
-                if (ret == AVERROR_EXPERIMENTAL)
-                    abort_codec_experimental(codec, 1);
-                snprintf(error, sizeof(error), "Error while opening encoder for output stream #%d:%d - maybe incorrect parameters such as bit_rate, rate, width or height",
-                        ost->file_index, ost->index);
-                goto dump_format;
-            }
-            if (ost->enc->type == AVMEDIA_TYPE_AUDIO &&
-                !(ost->enc->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE))
-                av_buffersink_set_frame_size(ost->filter->filter,
-                                             ost->enc_ctx->frame_size);
-            assert_avoptions(ost->encoder_opts);
-            if (ost->enc_ctx->bit_rate && ost->enc_ctx->bit_rate < 1000)
-                av_log(NULL, AV_LOG_WARNING, "The bitrate parameter is set too low."
-                                             " It takes bits/s as argument, not kbits/s\n");
-
-            ret = avcodec_copy_context(ost->st->codec, ost->enc_ctx);
-            if (ret < 0) {
-                av_log(NULL, AV_LOG_FATAL,
-                       "Error initializing the output stream codec context.\n");
-                exit_program(1);
-            }
-
-            // copy timebase while removing common factors
-            ost->st->time_base = av_add_q(ost->enc_ctx->time_base, (AVRational){0, 1});
-            ost->st->codec->codec= ost->enc_ctx->codec;
-        } else {
-            ret = av_opt_set_dict(ost->enc_ctx, &ost->encoder_opts);
-            if (ret < 0) {
-                av_log(NULL, AV_LOG_FATAL,
-                    "Error setting up codec context options.\n");
-                return ret;
-            }
-            // copy timebase while removing common factors
-            ost->st->time_base = av_add_q(ost->st->codec->time_base, (AVRational){0, 1});
-        }
+        ret = init_output_stream(output_streams[i], error, sizeof(error));
+        if (ret < 0)
+            goto dump_format;
     }
 
     /* init input streams */
@@ -3316,8 +3388,12 @@ static OutputStream *choose_output(void)
 
     for (i = 0; i < nb_output_streams; i++) {
         OutputStream *ost = output_streams[i];
-        int64_t opts = av_rescale_q(ost->st->cur_dts, ost->st->time_base,
+        int64_t opts = ost->st->cur_dts == AV_NOPTS_VALUE ? INT64_MIN :
+                       av_rescale_q(ost->st->cur_dts, ost->st->time_base,
                                     AV_TIME_BASE_Q);
+        if (ost->st->cur_dts == AV_NOPTS_VALUE)
+            av_log(NULL, AV_LOG_DEBUG, "cur_dts is invalid (this is harmless if it occurs once at the start per stream)\n");
+
         if (!ost->finished && opts < opts_min) {
             opts_min = opts;
             ost_min  = ost->unavailable ? NULL : ost;
@@ -3326,6 +3402,18 @@ static OutputStream *choose_output(void)
     return ost_min;
 }
 
+static void set_tty_echo(int on)
+{
+#if HAVE_TERMIOS_H
+    struct termios tty;
+    if (tcgetattr(0, &tty) == 0) {
+        if (on) tty.c_lflag |= ECHO;
+        else    tty.c_lflag &= ~ECHO;
+        tcsetattr(0, TCSANOW, &tty);
+    }
+#endif
+}
+
 static int check_keyboard_interaction(int64_t cur_time)
 {
     int i, ret, key;
@@ -3358,10 +3446,13 @@ static int check_keyboard_interaction(int64_t cur_time)
         int k, n = 0;
         fprintf(stderr, "\nEnter command: <target>|all <time>|-1 <command>[ <argument>]\n");
         i = 0;
+        set_tty_echo(1);
         while ((k = read_key()) != '\n' && k != '\r' && i < sizeof(buf)-1)
             if (k > 0)
                 buf[i++] = k;
         buf[i] = 0;
+        set_tty_echo(0);
+        fprintf(stderr, "\n");
         if (k > 0 &&
             (n = sscanf(buf, "%63[^ ] %lf %255[^ ] %255[^\n]", target, &time, command, arg)) >= 3) {
             av_log(NULL, AV_LOG_DEBUG, "Processing command target:%s time:%f command:%s arg:%s",
@@ -3396,9 +3487,20 @@ static int check_keyboard_interaction(int64_t cur_time)
             if(!debug) debug = 1;
             while(debug & (FF_DEBUG_DCT_COEFF|FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) //unsupported, would just crash
                 debug += debug;
-        }else
-            if(scanf("%d", &debug)!=1)
+        }else{
+            char buf[32];
+            int k = 0;
+            i = 0;
+            set_tty_echo(1);
+            while ((k = read_key()) != '\n' && k != '\r' && i < sizeof(buf)-1)
+                if (k > 0)
+                    buf[i++] = k;
+            buf[i] = 0;
+            set_tty_echo(0);
+            fprintf(stderr, "\n");
+            if (k <= 0 || sscanf(buf, "%d", &debug)!=1)
                 fprintf(stderr,"error parsing debug value\n");
+        }
         for(i=0;i<nb_input_streams;i++) {
             input_streams[i]->st->codec->debug = debug;
         }
@@ -3444,7 +3546,6 @@ static void *input_thread(void *arg)
             av_thread_message_queue_set_err_recv(f->in_thread_queue, ret);
             break;
         }
-        av_dup_packet(&pkt);
         ret = av_thread_message_queue_send(f->in_thread_queue, &pkt, flags);
         if (flags && ret == AVERROR(EAGAIN)) {
             flags = 0;
@@ -3459,7 +3560,7 @@ static void *input_thread(void *arg)
                 av_log(f->ctx, AV_LOG_ERROR,
                        "Unable to send packet to main thread: %s\n",
                        av_err2str(ret));
-            av_free_packet(&pkt);
+            av_packet_unref(&pkt);
             av_thread_message_queue_set_err_recv(f->in_thread_queue, ret);
             break;
         }
@@ -3476,11 +3577,11 @@ static void free_input_threads(void)
         InputFile *f = input_files[i];
         AVPacket pkt;
 
-        if (!f->in_thread_queue)
+        if (!f || !f->in_thread_queue)
             continue;
         av_thread_message_queue_set_err_send(f->in_thread_queue, AVERROR_EOF);
         while (av_thread_message_queue_recv(f->in_thread_queue, &pkt, 0) >= 0)
-            av_free_packet(&pkt);
+            av_packet_unref(&pkt);
 
         pthread_join(f->thread, NULL);
         f->joined = 1;
@@ -3561,6 +3662,87 @@ static void reset_eagain(void)
         output_streams[i]->unavailable = 0;
 }
 
+// set duration to max(tmp, duration) in a proper time base and return duration's time_base
+static AVRational duration_max(int64_t tmp, int64_t *duration, AVRational tmp_time_base,
+                                AVRational time_base)
+{
+    int ret;
+
+    if (!*duration) {
+        *duration = tmp;
+        return tmp_time_base;
+    }
+
+    ret = av_compare_ts(*duration, time_base, tmp, tmp_time_base);
+    if (ret < 0) {
+        *duration = tmp;
+        return tmp_time_base;
+    }
+
+    return time_base;
+}
+
+static int seek_to_start(InputFile *ifile, AVFormatContext *is)
+{
+    InputStream *ist;
+    AVCodecContext *avctx;
+    int i, ret, has_audio = 0;
+    int64_t duration = 0;
+
+    ret = av_seek_frame(is, -1, is->start_time, 0);
+    if (ret < 0)
+        return ret;
+
+    for (i = 0; i < ifile->nb_streams; i++) {
+        ist   = input_streams[ifile->ist_index + i];
+        avctx = ist->dec_ctx;
+
+        // flush decoders
+        if (ist->decoding_needed) {
+            process_input_packet(ist, NULL, 1);
+            avcodec_flush_buffers(avctx);
+        }
+
+        /* duration is the length of the last frame in a stream
+         * when audio stream is present we don't care about
+         * last video frame length because it's not defined exactly */
+        if (avctx->codec_type == AVMEDIA_TYPE_AUDIO && ist->nb_samples)
+            has_audio = 1;
+    }
+
+    for (i = 0; i < ifile->nb_streams; i++) {
+        ist   = input_streams[ifile->ist_index + i];
+        avctx = ist->dec_ctx;
+
+        if (has_audio) {
+            if (avctx->codec_type == AVMEDIA_TYPE_AUDIO && ist->nb_samples) {
+                AVRational sample_rate = {1, avctx->sample_rate};
+
+                duration = av_rescale_q(ist->nb_samples, sample_rate, ist->st->time_base);
+            } else
+                continue;
+        } else {
+            if (ist->framerate.num) {
+                duration = av_rescale_q(1, ist->framerate, ist->st->time_base);
+            } else if (ist->st->avg_frame_rate.num) {
+                duration = av_rescale_q(1, ist->st->avg_frame_rate, ist->st->time_base);
+            } else duration = 1;
+        }
+        if (!ifile->duration)
+            ifile->time_base = ist->st->time_base;
+        /* the total duration of the stream, max_pts - min_pts is
+         * the duration of the stream without the last frame */
+        duration += ist->max_pts - ist->min_pts;
+        ifile->time_base = duration_max(duration, &ifile->duration, ist->st->time_base,
+                                        ifile->time_base);
+    }
+
+    if (ifile->loop > 0)
+        ifile->loop--;
+
+    return ret;
+}
+
 /*
  * Return
  * - 0 -- one packet was read and processed
@@ -3575,6 +3757,8 @@ static int process_input(int file_index)
     InputStream *ist;
     AVPacket pkt;
     int ret, i, j;
+    int64_t duration;
+    int64_t pkt_dts;
 
     is  = ifile->ctx;
     ret = get_input_packet(ifile, &pkt);
@@ -3583,6 +3767,11 @@ static int process_input(int file_index)
         ifile->eagain = 1;
         return ret;
     }
+    if (ret < 0 && ifile->loop) {
+        if ((ret = seek_to_start(ifile, is)) < 0)
+            return ret;
+        ret = get_input_packet(ifile, &pkt);
+    }
     if (ret < 0) {
         if (ret != AVERROR_EOF) {
             print_error(is->filename, ret);
@@ -3593,7 +3782,7 @@ static int process_input(int file_index)
         for (i = 0; i < ifile->nb_streams; i++) {
             ist = input_streams[ifile->ist_index + i];
             if (ist->decoding_needed) {
-                ret = process_input_packet(ist, NULL);
+                ret = process_input_packet(ist, NULL, 0);
                 if (ret>0)
                     return 0;
             }
@@ -3615,7 +3804,7 @@ static int process_input(int file_index)
     reset_eagain();
 
     if (do_pkt_dump) {
-        av_pkt_dump_log2(NULL, AV_LOG_DEBUG, &pkt, do_hex_dump,
+        av_pkt_dump_log2(NULL, AV_LOG_INFO, &pkt, do_hex_dump,
                          is->streams[pkt.stream_index]);
     }
     /* the following test is needed in case new streams appear
@@ -3633,6 +3822,11 @@ static int process_input(int file_index)
     if (ist->discard)
         goto discard_packet;
 
+    if (exit_on_error && (pkt.flags & AV_PKT_FLAG_CORRUPT)) {
+        av_log(NULL, AV_LOG_FATAL, "%s: corrupt input packet in stream %d\n", is->filename, pkt.stream_index);
+        exit_program(1);
+    }
+
     if (debug_ts) {
         av_log(NULL, AV_LOG_INFO, "demuxer -> ist_index:%d type:%s "
                "next_dts:%s next_dts_time:%s next_pts:%s next_pts_time:%s pkt_pts:%s pkt_pts_time:%s pkt_dts:%s pkt_dts_time:%s off:%s off_time:%s\n",
@@ -3711,11 +3905,11 @@ static int process_input(int file_index)
     if (pkt.dts != AV_NOPTS_VALUE)
         pkt.dts *= ist->ts_scale;
 
+    pkt_dts = av_rescale_q_rnd(pkt.dts, ist->st->time_base, AV_TIME_BASE_Q, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
     if ((ist->dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO ||
          ist->dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) &&
-        pkt.dts != AV_NOPTS_VALUE && ist->next_dts == AV_NOPTS_VALUE && !copy_ts
+        pkt_dts != AV_NOPTS_VALUE && ist->next_dts == AV_NOPTS_VALUE && !copy_ts
         && (is->iformat->flags & AVFMT_TS_DISCONT) && ifile->last_ts != AV_NOPTS_VALUE) {
-        int64_t pkt_dts = av_rescale_q(pkt.dts, ist->st->time_base, AV_TIME_BASE_Q);
         int64_t delta   = pkt_dts - ifile->last_ts;
         if (delta < -1LL*dts_delta_threshold*AV_TIME_BASE ||
             delta >  1LL*dts_delta_threshold*AV_TIME_BASE){
@@ -3729,11 +3923,21 @@ static int process_input(int file_index)
         }
     }
 
+    duration = av_rescale_q(ifile->duration, ifile->time_base, ist->st->time_base);
+    if (pkt.pts != AV_NOPTS_VALUE) {
+        pkt.pts += duration;
+        ist->max_pts = FFMAX(pkt.pts, ist->max_pts);
+        ist->min_pts = FFMIN(pkt.pts, ist->min_pts);
+    }
+
+    if (pkt.dts != AV_NOPTS_VALUE)
+        pkt.dts += duration;
+
+    pkt_dts = av_rescale_q_rnd(pkt.dts, ist->st->time_base, AV_TIME_BASE_Q, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
     if ((ist->dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO ||
          ist->dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) &&
-         pkt.dts != AV_NOPTS_VALUE && ist->next_dts != AV_NOPTS_VALUE &&
+         pkt_dts != AV_NOPTS_VALUE && ist->next_dts != AV_NOPTS_VALUE &&
         !copy_ts) {
-        int64_t pkt_dts = av_rescale_q(pkt.dts, ist->st->time_base, AV_TIME_BASE_Q);
         int64_t delta   = pkt_dts - ist->next_dts;
         if (is->iformat->flags & AVFMT_TS_DISCONT) {
             if (delta < -1LL*dts_delta_threshold*AV_TIME_BASE ||
@@ -3779,16 +3983,10 @@ static int process_input(int file_index)
 
     sub2video_heartbeat(ist, pkt.pts);
 
-    ret = process_input_packet(ist, &pkt);
-    if (ret < 0) {
-        av_log(NULL, AV_LOG_ERROR, "Error while decoding stream #%d:%d: %s\n",
-               ist->file_index, ist->st->index, av_err2str(ret));
-        if (exit_on_error)
-            exit_program(1);
-    }
+    process_input_packet(ist, &pkt, 0);
 
 discard_packet:
-    av_free_packet(&pkt);
+    av_packet_unref(&pkt);
 
     return 0;
 }
@@ -3896,6 +4094,7 @@ static int transcode(void)
     OutputStream *ost;
     InputStream *ist;
     int64_t timer_start;
+    int64_t total_packets_written = 0;
 
     ret = transcode_init();
     if (ret < 0)
@@ -3927,16 +4126,12 @@ static int transcode(void)
         }
 
         ret = transcode_step();
-        if (ret < 0) {
-            if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN)) {
-                continue;
-            } else {
-                char errbuf[128];
-                av_strerror(ret, errbuf, sizeof(errbuf));
+        if (ret < 0 && ret != AVERROR_EOF) {
+            char errbuf[128];
+            av_strerror(ret, errbuf, sizeof(errbuf));
 
-                av_log(NULL, AV_LOG_ERROR, "Error while filtering: %s\n", errbuf);
-                break;
-            }
+            av_log(NULL, AV_LOG_ERROR, "Error while filtering: %s\n", errbuf);
+            break;
         }
 
         /* dump report by using the output first video and audio streams */
@@ -3950,7 +4145,7 @@ static int transcode(void)
     for (i = 0; i < nb_input_streams; i++) {
         ist = input_streams[i];
         if (!input_files[ist->file_index]->eof_reached && ist->decoding_needed) {
-            process_input_packet(ist, NULL);
+            process_input_packet(ist, NULL, 0);
         }
     }
     flush_encoders();
@@ -3960,7 +4155,11 @@ static int transcode(void)
     /* write the trailer if needed and close file */
     for (i = 0; i < nb_output_files; i++) {
         os = output_files[i]->ctx;
-        av_write_trailer(os);
+        if ((ret = av_write_trailer(os)) < 0) {
+            av_log(NULL, AV_LOG_ERROR, "Error writing trailer of %s: %s", os->filename, av_err2str(ret));
+            if (exit_on_error)
+                exit_program(1);
+        }
     }
 
     /* dump report by using the first video and audio streams */
@@ -3972,6 +4171,12 @@ static int transcode(void)
         if (ost->encoding_needed) {
             av_freep(&ost->enc_ctx->stats_in);
         }
+        total_packets_written += ost->packets_written;
+    }
+
+    if (!total_packets_written && (abort_on_flags & ABORT_ON_FLAG_EMPTY_OUTPUT)) {
+        av_log(NULL, AV_LOG_FATAL, "Empty output\n");
+        exit_program(1);
     }
 
     /* close each decoder */
@@ -3997,16 +4202,19 @@ static int transcode(void)
             ost = output_streams[i];
             if (ost) {
                 if (ost->logfile) {
-                    fclose(ost->logfile);
+                    if (fclose(ost->logfile))
+                        av_log(NULL, AV_LOG_ERROR,
+                               "Error closing logfile, loss of information possible: %s\n",
+                               av_err2str(AVERROR(errno)));
                     ost->logfile = NULL;
                 }
                 av_freep(&ost->forced_kf_pts);
                 av_freep(&ost->apad);
                 av_freep(&ost->disposition);
                 av_dict_free(&ost->encoder_opts);
+                av_dict_free(&ost->sws_dict);
                 av_dict_free(&ost->swr_opts);
                 av_dict_free(&ost->resample_opts);
-                av_dict_free(&ost->bsf_args);
             }
         }
     }
@@ -4112,7 +4320,7 @@ int main(int argc, char **argv)
         exit_program(1);
     ti = getutime() - ti;
     if (do_benchmark) {
-        printf("bench: utime=%0.3fs\n", ti / 1000000.0);
+        av_log(NULL, AV_LOG_INFO, "bench: utime=%0.3fs\n", ti / 1000000.0);
     }
     av_log(NULL, AV_LOG_DEBUG, "%"PRIu64" frames successfully decoded, %"PRIu64" decoding errors\n",
            decode_error_stat[0], decode_error_stat[1]);
diff --git a/ffmpeg.h b/ffmpeg.h
index 7fd129a2..20322b04 100644
--- a/ffmpeg.h
+++ b/ffmpeg.h
@@ -63,6 +63,8 @@ enum HWAccelID {
     HWACCEL_VDPAU,
     HWACCEL_DXVA2,
     HWACCEL_VDA,
+    HWACCEL_VIDEOTOOLBOX,
+    HWACCEL_QSV,
 };
 
 typedef struct HWAccel {
@@ -92,6 +94,7 @@ typedef struct OptionsContext {
 
     /* input/output options */
     int64_t start_time;
+    int64_t start_time_eof;
     int seek_timestamp;
     const char *format;
 
@@ -110,6 +113,7 @@ typedef struct OptionsContext {
 
     /* input options */
     int64_t input_ts_offset;
+    int loop;
     int rate_emu;
     int accurate_seek;
     int thread_queue_size;
@@ -212,6 +216,8 @@ typedef struct OptionsContext {
     int        nb_discard;
     SpecifierOpt *disposition;
     int        nb_disposition;
+    SpecifierOpt *program;
+    int        nb_program;
 } OptionsContext;
 
 typedef struct InputFilter {
@@ -229,6 +235,7 @@ typedef struct OutputFilter {
 
     /* temporary storage until stream maps are processed */
     AVFilterInOut       *out_tmp;
+    enum AVMediaType     type;
 } OutputFilter;
 
 typedef struct FilterGraph {
@@ -270,6 +277,10 @@ typedef struct InputStream {
 
     int64_t filter_in_rescale_delta_last;
 
+    int64_t min_pts; /* pts with the smallest value in a current stream */
+    int64_t max_pts; /* pts with the higher value in a current stream */
+    int64_t nb_samples; /* number of samples in the last decoded audio frame before looping */
+
     double ts_scale;
     int saw_first_ts;
     int showed_multi_packet_warning;
@@ -339,7 +350,12 @@ typedef struct InputFile {
     int eof_reached;      /* true if eof reached */
     int eagain;           /* true if last read attempt returned EAGAIN */
     int ist_index;        /* index of first stream in input_streams */
+    int loop;             /* set number of times input stream should be looped */
+    int64_t duration;     /* actual duration of the longest stream in a file
+                             at the moment when looping happens */
+    AVRational time_base; /* time base of the duration */
     int64_t input_ts_offset;
+
     int64_t ts_offset;
     int64_t last_ts;
     int64_t start_time;   /* user-specified start time in AV_TIME_BASE or AV_NOPTS_VALUE */
@@ -369,6 +385,8 @@ enum forced_keyframes_const {
     FKF_NB
 };
 
+#define ABORT_ON_FLAG_EMPTY_OUTPUT (1 <<  0)
+
 extern const char *const forced_keyframes_const_names[];
 
 typedef enum {
@@ -398,11 +416,14 @@ typedef struct OutputStream {
     int64_t max_frames;
     AVFrame *filtered_frame;
     AVFrame *last_frame;
-    int last_droped;
+    int last_dropped;
     int last_nb0_frames[3];
 
+    void  *hwaccel_ctx;
+
     /* video only */
     AVRational frame_rate;
+    int is_cfr;
     int force_fps;
     int top_field_first;
     int rotate_overridden;
@@ -429,11 +450,10 @@ typedef struct OutputStream {
     char *filters;         ///< filtergraph associated to the -filter option
     char *filters_script;  ///< filtergraph script associated to the -filter_script option
 
-    int64_t sws_flags;
     AVDictionary *encoder_opts;
+    AVDictionary *sws_dict;
     AVDictionary *swr_opts;
     AVDictionary *resample_opts;
-    AVDictionary *bsf_args;
     char *apad;
     OSTFinished finished;        /* no more packets should be written for this stream */
     int unavailable;                     /* true if the steram is unavailable (possibly temporarily) */
@@ -455,6 +475,15 @@ typedef struct OutputStream {
     // number of frames/samples sent to the encoder
     uint64_t frames_encoded;
     uint64_t samples_encoded;
+
+    /* packet quality factor */
+    int quality;
+
+    /* packet picture type */
+    int pict_type;
+
+    /* frame encode sum of squared error values */
+    int64_t error[4];
 } OutputStream;
 
 typedef struct OutputFile {
@@ -502,6 +531,7 @@ extern int start_at_zero;
 extern int copy_tb;
 extern int debug_ts;
 extern int exit_on_error;
+extern int abort_on_flags;
 extern int print_stats;
 extern int qp_hist;
 extern int stdin_interaction;
@@ -509,6 +539,7 @@ extern int frame_bits_per_raw_sample;
 extern AVIOContext *progress_avio;
 extern float max_error_rate;
 extern int vdpau_api_ver;
+extern char *videotoolbox_pixfmt;
 
 extern const AVIOInterruptCB int_cb;
 
@@ -536,11 +567,15 @@ int configure_filtergraph(FilterGraph *fg);
 int configure_output_filter(FilterGraph *fg, OutputFilter *ofilter, AVFilterInOut *out);
 int ist_in_filtergraph(FilterGraph *fg, InputStream *ist);
 FilterGraph *init_simple_filtergraph(InputStream *ist, OutputStream *ost);
+int init_complex_filtergraph(FilterGraph *fg);
 
 int ffmpeg_parse_options(int argc, char **argv);
 
 int vdpau_init(AVCodecContext *s);
 int dxva2_init(AVCodecContext *s);
 int vda_init(AVCodecContext *s);
+int videotoolbox_init(AVCodecContext *s);
+int qsv_init(AVCodecContext *s);
+int qsv_transcode_init(OutputStream *ost);
 
 #endif /* FFMPEG_H */
diff --git a/ffmpeg_dxva2.c b/ffmpeg_dxva2.c
index 6b20195f..905bf890 100644
--- a/ffmpeg_dxva2.c
+++ b/ffmpeg_dxva2.c
@@ -53,6 +53,7 @@ DEFINE_GUID(DXVADDI_Intel_ModeH264_E, 0x604F8E68, 0x4951,0x4C54,0x88,0xFE,0xAB,0
 DEFINE_GUID(DXVA2_ModeVC1_D,          0x1b81beA3, 0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5);
 DEFINE_GUID(DXVA2_ModeVC1_D2010,      0x1b81beA4, 0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5);
 DEFINE_GUID(DXVA2_ModeHEVC_VLD_Main,  0x5b11d51b, 0x2f4c,0x4452,0xbc,0xc3,0x09,0xf2,0xa1,0x16,0x0c,0xc0);
+DEFINE_GUID(DXVA2_ModeVP9_VLD_Profile0, 0x463707f8, 0xa1d0,0x4585,0x87,0x6d,0x83,0xaa,0x6d,0x60,0xb8,0x9e);
 DEFINE_GUID(DXVA2_NoEncrypt,          0x1b81beD0, 0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5);
 DEFINE_GUID(GUID_NULL,                0x00000000, 0x0000,0x0000,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00);
 
@@ -84,6 +85,9 @@ static const dxva2_mode dxva2_modes[] = {
     /* HEVC/H.265 */
     { &DXVA2_ModeHEVC_VLD_Main,  AV_CODEC_ID_HEVC },
 
+    /* VP8/9 */
+    { &DXVA2_ModeVP9_VLD_Profile0, AV_CODEC_ID_VP9 },
+
     { NULL,                      0 },
 };
 
@@ -543,6 +547,8 @@ static int dxva2_create_decoder(AVCodecContext *s)
     /* add surfaces based on number of possible refs */
     if (s->codec_id == AV_CODEC_ID_H264 || s->codec_id == AV_CODEC_ID_HEVC)
         ctx->num_surfaces += 16;
+    else if (s->codec_id == AV_CODEC_ID_VP9)
+        ctx->num_surfaces += 8;
     else
         ctx->num_surfaces += 2;
 
diff --git a/ffmpeg_filter.c b/ffmpeg_filter.c
index 0be49bea..6896a788 100644
--- a/ffmpeg_filter.c
+++ b/ffmpeg_filter.c
@@ -38,6 +38,28 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/samplefmt.h"
 
+
+static const enum AVPixelFormat *get_compliance_unofficial_pix_fmts(enum AVCodecID codec_id, const enum AVPixelFormat default_formats[])
+{
+    static const enum AVPixelFormat mjpeg_formats[] =
+        { AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,
+          AV_PIX_FMT_YUV420P,  AV_PIX_FMT_YUV422P,  AV_PIX_FMT_YUV444P,
+          AV_PIX_FMT_NONE };
+    static const enum AVPixelFormat ljpeg_formats[] =
+        { AV_PIX_FMT_BGR24   , AV_PIX_FMT_BGRA    , AV_PIX_FMT_BGR0,
+          AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
+          AV_PIX_FMT_YUV420P , AV_PIX_FMT_YUV444P , AV_PIX_FMT_YUV422P,
+          AV_PIX_FMT_NONE};
+
+    if (codec_id == AV_CODEC_ID_MJPEG) {
+        return mjpeg_formats;
+    } else if (codec_id == AV_CODEC_ID_LJPEG) {
+        return ljpeg_formats;
+    } else {
+        return default_formats;
+    }
+}
+
 enum AVPixelFormat choose_pixel_fmt(AVStream *st, AVCodecContext *enc_ctx, AVCodec *codec, enum AVPixelFormat target)
 {
     if (codec && codec->pix_fmts) {
@@ -45,18 +67,9 @@ enum AVPixelFormat choose_pixel_fmt(AVStream *st, AVCodecContext *enc_ctx, AVCod
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(target);
         int has_alpha = desc ? desc->nb_components % 2 == 0 : 0;
         enum AVPixelFormat best= AV_PIX_FMT_NONE;
-        static const enum AVPixelFormat mjpeg_formats[] =
-            { AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_NONE };
-        static const enum AVPixelFormat ljpeg_formats[] =
-            { AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUV420P,
-              AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_BGRA, AV_PIX_FMT_NONE };
 
         if (enc_ctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL) {
-            if (enc_ctx->codec_id == AV_CODEC_ID_MJPEG) {
-                p = mjpeg_formats;
-            } else if (enc_ctx->codec_id == AV_CODEC_ID_LJPEG) {
-                p =ljpeg_formats;
-            }
+            p = get_compliance_unofficial_pix_fmts(enc_ctx->codec_id, p);
         }
         for (; *p != AV_PIX_FMT_NONE; p++) {
             best= avcodec_find_best_pix_fmt_of_2(best, *p, target, has_alpha, NULL);
@@ -85,7 +98,7 @@ void choose_sample_fmt(AVStream *st, AVCodec *codec)
                 break;
         }
         if (*p == -1) {
-            if((codec->capabilities & CODEC_CAP_LOSSLESS) && av_get_sample_fmt_name(st->codec->sample_fmt) > av_get_sample_fmt_name(codec->sample_fmts[0]))
+            if((codec->capabilities & AV_CODEC_CAP_LOSSLESS) && av_get_sample_fmt_name(st->codec->sample_fmt) > av_get_sample_fmt_name(codec->sample_fmts[0]))
                 av_log(NULL, AV_LOG_ERROR, "Conversion will not be lossless.\n");
             if(av_get_sample_fmt_name(st->codec->sample_fmt))
             av_log(NULL, AV_LOG_WARNING,
@@ -126,12 +139,7 @@ static char *choose_pix_fmts(OutputStream *ost)
 
         p = ost->enc->pix_fmts;
         if (ost->enc_ctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL) {
-            if (ost->enc_ctx->codec_id == AV_CODEC_ID_MJPEG) {
-                p = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_NONE };
-            } else if (ost->enc_ctx->codec_id == AV_CODEC_ID_LJPEG) {
-                p = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUV420P,
-                                                    AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_BGRA, AV_PIX_FMT_NONE };
-            }
+            p = get_compliance_unofficial_pix_fmts(ost->enc_ctx->codec_id, p);
         }
 
         for (; *p != AV_PIX_FMT_NONE; p++) {
@@ -289,6 +297,45 @@ static void init_input_filter(FilterGraph *fg, AVFilterInOut *in)
     ist->filters[ist->nb_filters - 1] = fg->inputs[fg->nb_inputs - 1];
 }
 
+int init_complex_filtergraph(FilterGraph *fg)
+{
+    AVFilterInOut *inputs, *outputs, *cur;
+    AVFilterGraph *graph;
+    int ret = 0;
+
+    /* this graph is only used for determining the kinds of inputs
+     * and outputs we have, and is discarded on exit from this function */
+    graph = avfilter_graph_alloc();
+    if (!graph)
+        return AVERROR(ENOMEM);
+
+    ret = avfilter_graph_parse2(graph, fg->graph_desc, &inputs, &outputs);
+    if (ret < 0)
+        goto fail;
+
+    for (cur = inputs; cur; cur = cur->next)
+        init_input_filter(fg, cur);
+
+    for (cur = outputs; cur;) {
+        GROW_ARRAY(fg->outputs, fg->nb_outputs);
+        fg->outputs[fg->nb_outputs - 1] = av_mallocz(sizeof(*fg->outputs[0]));
+        if (!fg->outputs[fg->nb_outputs - 1])
+            exit_program(1);
+
+        fg->outputs[fg->nb_outputs - 1]->graph   = fg;
+        fg->outputs[fg->nb_outputs - 1]->out_tmp = cur;
+        fg->outputs[fg->nb_outputs - 1]->type    = avfilter_pad_get_type(cur->filter_ctx->output_pads,
+                                                                         cur->pad_idx);
+        cur = cur->next;
+        fg->outputs[fg->nb_outputs - 1]->out_tmp->next = NULL;
+    }
+
+fail:
+    avfilter_inout_free(&inputs);
+    avfilter_graph_free(&graph);
+    return ret;
+}
+
 static int insert_trim(int64_t start_time, int64_t duration,
                        AVFilterContext **last_filter, int *pad_idx,
                        const char *filter_name)
@@ -384,11 +431,17 @@ static int configure_output_video_filter(FilterGraph *fg, OutputFilter *ofilter,
     if (codec->width || codec->height) {
         char args[255];
         AVFilterContext *filter;
+        AVDictionaryEntry *e = NULL;
 
-        snprintf(args, sizeof(args), "%d:%d:0x%X",
+        snprintf(args, sizeof(args), "%d:%d",
                  codec->width,
-                 codec->height,
-                 (unsigned)ost->sws_flags);
+                 codec->height);
+
+        while ((e = av_dict_get(ost->sws_dict, "", e,
+                                AV_DICT_IGNORE_SUFFIX))) {
+            av_strlcatf(args, sizeof(args), ":%s=%s", e->key, e->value);
+        }
+
         snprintf(name, sizeof(name), "scaler for output stream %d:%d",
                  ost->file_index, ost->index);
         if ((ret = avfilter_graph_create_filter(&filter, avfilter_get_by_name("scale"),
@@ -499,7 +552,7 @@ static int configure_output_audio_filter(FilterGraph *fg, OutputFilter *ofilter,
                    av_get_default_channel_layout(ost->audio_channels_mapped));
         for (i = 0; i < ost->audio_channels_mapped; i++)
             if (ost->audio_channels_map[i] != -1)
-                av_bprintf(&pan_buf, ":c%d=c%d", i, ost->audio_channels_map[i]);
+                av_bprintf(&pan_buf, "|c%d=c%d", i, ost->audio_channels_map[i]);
 
         AUTO_INSERT_FILTER("-map_channel", "pan", pan_buf.str);
         av_bprint_finalize(&pan_buf, NULL);
@@ -602,6 +655,11 @@ int configure_output_filter(FilterGraph *fg, OutputFilter *ofilter, AVFilterInOu
     av_freep(&ofilter->name);
     DESCRIBE_FILTER_LINK(ofilter, out, 0);
 
+    if (!ofilter->ost) {
+        av_log(NULL, AV_LOG_FATAL, "Filter %s has a unconnected output\n", ofilter->name);
+        exit_program(1);
+    }
+
     switch (avfilter_pad_get_type(out->filter_ctx->output_pads, out->pad_idx)) {
     case AVMEDIA_TYPE_VIDEO: return configure_output_video_filter(fg, ofilter, out);
     case AVMEDIA_TYPE_AUDIO: return configure_output_audio_filter(fg, ofilter, out);
@@ -632,8 +690,8 @@ static int sub2video_prepare(InputStream *ist)
         }
         av_log(avf, AV_LOG_INFO, "sub2video: using %dx%d canvas\n", w, h);
     }
-    ist->sub2video.w = ist->dec_ctx->width  = ist->resample_width  = w;
-    ist->sub2video.h = ist->dec_ctx->height = ist->resample_height = h;
+    ist->sub2video.w = ist->resample_width  = w;
+    ist->sub2video.h = ist->resample_height = h;
 
     /* rectangles are AV_PIX_FMT_PAL8, but we have no guarantee that the
        palettes for all rectangles are identical or compatible */
@@ -688,7 +746,7 @@ static int configure_input_video_filter(FilterGraph *fg, InputFilter *ifilter,
              ist->resample_height,
              ist->hwaccel_retrieve_data ? ist->hwaccel_retrieved_pix_fmt : ist->resample_pix_fmt,
              tb.num, tb.den, sar.num, sar.den,
-             SWS_BILINEAR + ((ist->dec_ctx->flags&CODEC_FLAG_BITEXACT) ? SWS_BITEXACT:0));
+             SWS_BILINEAR + ((ist->dec_ctx->flags&AV_CODEC_FLAG_BITEXACT) ? SWS_BITEXACT:0));
     if (fr.num && fr.den)
         av_bprintf(&args, ":frame_rate=%d/%d", fr.num, fr.den);
     snprintf(name, sizeof(name), "graph %d input from stream %d:%d", fg->index,
@@ -904,7 +962,7 @@ static int configure_input_filter(FilterGraph *fg, InputFilter *ifilter,
 int configure_filtergraph(FilterGraph *fg)
 {
     AVFilterInOut *inputs, *outputs, *cur;
-    int ret, i, init = !fg->graph, simple = !fg->graph_desc;
+    int ret, i, simple = !fg->graph_desc;
     const char *graph_desc = simple ? fg->outputs[0]->ost->avfilter :
                                       fg->graph_desc;
 
@@ -917,7 +975,13 @@ int configure_filtergraph(FilterGraph *fg)
         char args[512];
         AVDictionaryEntry *e = NULL;
 
-        snprintf(args, sizeof(args), "flags=0x%X", (unsigned)ost->sws_flags);
+        args[0] = 0;
+        while ((e = av_dict_get(ost->sws_dict, "", e,
+                                AV_DICT_IGNORE_SUFFIX))) {
+            av_strlcatf(args, sizeof(args), "%s=%s:", e->key, e->value);
+        }
+        if (strlen(args))
+            args[strlen(args)-1] = 0;
         fg->graph->scale_sws_opts = av_strdup(args);
 
         args[0] = 0;
@@ -947,14 +1011,30 @@ int configure_filtergraph(FilterGraph *fg)
         return ret;
 
     if (simple && (!inputs || inputs->next || !outputs || outputs->next)) {
-        av_log(NULL, AV_LOG_ERROR, "Simple filtergraph '%s' does not have "
-               "exactly one input and output.\n", graph_desc);
+        const char *num_inputs;
+        const char *num_outputs;
+        if (!outputs) {
+            num_outputs = "0";
+        } else if (outputs->next) {
+            num_outputs = ">1";
+        } else {
+            num_outputs = "1";
+        }
+        if (!inputs) {
+            num_inputs = "0";
+        } else if (inputs->next) {
+            num_inputs = ">1";
+        } else {
+            num_inputs = "1";
+        }
+        av_log(NULL, AV_LOG_ERROR, "Simple filtergraph '%s' was expected "
+               "to have exactly 1 input and 1 output."
+               " However, it had %s input(s) and %s output(s)."
+               " Please adjust, or use a complex filtergraph (-filter_complex) instead.\n",
+               graph_desc, num_inputs, num_outputs);
         return AVERROR(EINVAL);
     }
 
-    for (cur = inputs; !simple && init && cur; cur = cur->next)
-        init_input_filter(fg, cur);
-
     for (cur = inputs, i = 0; cur; cur = cur->next, i++)
         if ((ret = configure_input_filter(fg, fg->inputs[i], cur)) < 0) {
             avfilter_inout_free(&inputs);
@@ -963,35 +1043,26 @@ int configure_filtergraph(FilterGraph *fg)
         }
     avfilter_inout_free(&inputs);
 
-    if (!init || simple) {
-        /* we already know the mappings between lavfi outputs and output streams,
-         * so we can finish the setup */
-        for (cur = outputs, i = 0; cur; cur = cur->next, i++)
-            configure_output_filter(fg, fg->outputs[i], cur);
-        avfilter_inout_free(&outputs);
+    for (cur = outputs, i = 0; cur; cur = cur->next, i++)
+        configure_output_filter(fg, fg->outputs[i], cur);
+    avfilter_inout_free(&outputs);
 
-        if ((ret = avfilter_graph_config(fg->graph, NULL)) < 0)
-            return ret;
-    } else {
-        /* wait until output mappings are processed */
-        for (cur = outputs; cur;) {
-            GROW_ARRAY(fg->outputs, fg->nb_outputs);
-            if (!(fg->outputs[fg->nb_outputs - 1] = av_mallocz(sizeof(*fg->outputs[0]))))
-                exit_program(1);
-            fg->outputs[fg->nb_outputs - 1]->graph   = fg;
-            fg->outputs[fg->nb_outputs - 1]->out_tmp = cur;
-            cur = cur->next;
-            fg->outputs[fg->nb_outputs - 1]->out_tmp->next = NULL;
-        }
-    }
+    if ((ret = avfilter_graph_config(fg->graph, NULL)) < 0)
+        return ret;
 
     fg->reconfiguration = 1;
 
     for (i = 0; i < fg->nb_outputs; i++) {
         OutputStream *ost = fg->outputs[i]->ost;
-        if (ost &&
-            ost->enc->type == AVMEDIA_TYPE_AUDIO &&
-            !(ost->enc->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE))
+        if (!ost->enc) {
+            /* identical to the same check in ffmpeg.c, needed because
+               complex filter graphs are initialized earlier */
+            av_log(NULL, AV_LOG_ERROR, "Encoder (codec %s) not found for output stream #%d:%d\n",
+                     avcodec_get_name(ost->st->codec->codec_id), ost->file_index, ost->index);
+            return AVERROR(EINVAL);
+        }
+        if (ost->enc->type == AVMEDIA_TYPE_AUDIO &&
+            !(ost->enc->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE))
             av_buffersink_set_frame_size(ost->filter->filter,
                                          ost->enc_ctx->frame_size);
     }
diff --git a/ffmpeg_opt.c b/ffmpeg_opt.c
index 3cdabb49..bc8355e2 100644
--- a/ffmpeg_opt.c
+++ b/ffmpeg_opt.c
@@ -40,6 +40,9 @@
 #include "libavutil/parseutils.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/pixfmt.h"
+#include "libavutil/time_internal.h"
+
+#define DEFAULT_PASS_LOGFILENAME_PREFIX "ffmpeg2pass"
 
 #define MATCH_PER_STREAM_OPT(name, type, outvar, fmtctx, st)\
 {\
@@ -71,7 +74,13 @@ const HWAccel hwaccels[] = {
     { "dxva2", dxva2_init, HWACCEL_DXVA2, AV_PIX_FMT_DXVA2_VLD },
 #endif
 #if CONFIG_VDA
-    { "vda",   vda_init,   HWACCEL_VDA,   AV_PIX_FMT_VDA },
+    { "vda",   videotoolbox_init,   HWACCEL_VDA,   AV_PIX_FMT_VDA },
+#endif
+#if CONFIG_VIDEOTOOLBOX
+    { "videotoolbox",   videotoolbox_init,   HWACCEL_VIDEOTOOLBOX,   AV_PIX_FMT_VIDEOTOOLBOX },
+#endif
+#if CONFIG_LIBMFX
+    { "qsv",   qsv_init,   HWACCEL_QSV,   AV_PIX_FMT_QSV },
 #endif
     { 0 },
 };
@@ -97,6 +106,7 @@ int start_at_zero     = 0;
 int copy_tb           = -1;
 int debug_ts          = 0;
 int exit_on_error     = 0;
+int abort_on_flags    = 0;
 int print_stats       = -1;
 int qp_hist           = 0;
 int stdin_interaction = 1;
@@ -153,12 +163,25 @@ static void init_options(OptionsContext *o)
     o->stop_time = INT64_MAX;
     o->mux_max_delay  = 0.7;
     o->start_time     = AV_NOPTS_VALUE;
+    o->start_time_eof = AV_NOPTS_VALUE;
     o->recording_time = INT64_MAX;
     o->limit_filesize = UINT64_MAX;
     o->chapters_input_file = INT_MAX;
     o->accurate_seek  = 1;
 }
 
+static int show_hwaccels(void *optctx, const char *opt, const char *arg)
+{
+    int i;
+
+    printf("Hardware acceleration methods:\n");
+    for (i = 0; i < FF_ARRAY_ELEMS(hwaccels) - 1; i++) {
+        printf("%s\n", hwaccels[i].name);
+    }
+    printf("\n");
+    return 0;
+}
+
 /* return a copy of the input with the stream specifiers removed from the keys */
 static AVDictionary *strip_specifiers(AVDictionary *dict)
 {
@@ -177,6 +200,24 @@ static AVDictionary *strip_specifiers(AVDictionary *dict)
     return ret;
 }
 
+static int opt_abort_on(void *optctx, const char *opt, const char *arg)
+{
+    static const AVOption opts[] = {
+        { "abort_on"        , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = "flags" },
+        { "empty_output"    , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = ABORT_ON_FLAG_EMPTY_OUTPUT     },    .unit = "flags" },
+        { NULL },
+    };
+    static const AVClass class = {
+        .class_name = "",
+        .item_name  = av_default_item_name,
+        .option     = opts,
+        .version    = LIBAVUTIL_VERSION_INT,
+    };
+    const AVClass *pclass = &class;
+
+    return av_opt_eval_flags(&pclass, &opts[0], arg, &abort_on_flags);
+}
+
 static int opt_sameq(void *optctx, const char *opt, const char *arg)
 {
     av_log(NULL, AV_LOG_ERROR, "Option '%s' was removed. "
@@ -230,6 +271,7 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
     int sync_file_idx = -1, sync_stream_idx = 0;
     char *p, *sync;
     char *map;
+    char *allow_unused;
 
     if (*arg == '-') {
         negative = 1;
@@ -274,6 +316,8 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
             exit_program(1);
         }
     } else {
+        if (allow_unused = strchr(map, '?'))
+            *allow_unused = 0;
         file_idx = strtol(map, &p, 0);
         if (file_idx >= nb_input_files || file_idx < 0) {
             av_log(NULL, AV_LOG_FATAL, "Invalid input file index: %d.\n", file_idx);
@@ -311,8 +355,13 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
     }
 
     if (!m) {
-        av_log(NULL, AV_LOG_FATAL, "Stream map '%s' matches no streams.\n", arg);
-        exit_program(1);
+        if (allow_unused) {
+            av_log(NULL, AV_LOG_VERBOSE, "Stream map '%s' matches no streams; ignoring.\n", arg);
+        } else {
+            av_log(NULL, AV_LOG_FATAL, "Stream map '%s' matches no streams.\n"
+                                       "To ignore this, add a trailing '?' to the map.\n", arg);
+            exit_program(1);
+        }
     }
 
     av_freep(&map);
@@ -600,6 +649,9 @@ static void add_input_streams(OptionsContext *o, AVFormatContext *ic)
         ist->file_index = nb_input_files;
         ist->discard = 1;
         st->discard  = AVDISCARD_ALL;
+        ist->nb_samples = 0;
+        ist->min_pts = INT64_MAX;
+        ist->max_pts = INT64_MIN;
 
         ist->ts_scale = 1.0;
         MATCH_PER_STREAM_OPT(ts_scale, dbl, ist->ts_scale, ic, st);
@@ -647,9 +699,11 @@ static void add_input_streams(OptionsContext *o, AVFormatContext *ic)
         case AVMEDIA_TYPE_VIDEO:
             if(!ist->dec)
                 ist->dec = avcodec_find_decoder(dec->codec_id);
+#if FF_API_EMU_EDGE
             if (av_codec_get_lowres(dec)) {
                 dec->flags |= CODEC_FLAG_EMU_EDGE;
             }
+#endif
 
             ist->resample_height  = ist->dec_ctx->height;
             ist->resample_width   = ist->dec_ctx->width;
@@ -922,6 +976,12 @@ static int open_input_file(OptionsContext *o, const char *filename)
         }
     }
 
+    if (o->start_time_eof != AV_NOPTS_VALUE) {
+        if (ic->duration>0) {
+            o->start_time = o->start_time_eof + ic->duration;
+        } else
+            av_log(NULL, AV_LOG_WARNING, "Cannot use -sseof, duration of %s not known\n", filename);
+    }
     timestamp = (o->start_time == AV_NOPTS_VALUE) ? 0 : o->start_time;
     /* add the stream start time */
     if (!o->seek_timestamp && ic->start_time != AV_NOPTS_VALUE)
@@ -970,6 +1030,9 @@ static int open_input_file(OptionsContext *o, const char *filename)
     f->nb_streams = ic->nb_streams;
     f->rate_emu   = o->rate_emu;
     f->accurate_seek = o->accurate_seek;
+    f->loop = o->loop;
+    f->duration = 0;
+    f->time_base = (AVRational){ 1, 1 };
 #if HAVE_PTHREADS
     f->thread_queue_size = o->thread_queue_size > 0 ? o->thread_queue_size : 8;
 #endif
@@ -1192,7 +1255,11 @@ static OutputStream *new_output_stream(OptionsContext *o, AVFormatContext *oc, e
             bsfc_prev->next = bsfc;
         else
             ost->bitstream_filters = bsfc;
-        av_dict_set(&ost->bsf_args, bsfc->filter->name, arg, 0);
+        if (arg)
+            if (!(bsfc->args = av_strdup(arg))) {
+                av_log(NULL, AV_LOG_FATAL, "Bitstream filter memory allocation failed\n");
+                exit_program(1);
+            }
 
         bsfc_prev = bsfc;
         bsf       = next;
@@ -1209,7 +1276,7 @@ static OutputStream *new_output_stream(OptionsContext *o, AVFormatContext *oc, e
 
     MATCH_PER_STREAM_OPT(qscale, dbl, qscale, oc, st);
     if (qscale >= 0) {
-        ost->enc_ctx->flags |= CODEC_FLAG_QSCALE;
+        ost->enc_ctx->flags |= AV_CODEC_FLAG_QSCALE;
         ost->enc_ctx->global_quality = FF_QP2LAMBDA * qscale;
     }
 
@@ -1217,9 +1284,9 @@ static OutputStream *new_output_stream(OptionsContext *o, AVFormatContext *oc, e
     ost->disposition = av_strdup(ost->disposition);
 
     if (oc->oformat->flags & AVFMT_GLOBALHEADER)
-        ost->enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
+        ost->enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 
-    av_opt_get_int(o->g->sws_opts, "sws_flags", 0, &ost->sws_flags);
+    av_dict_copy(&ost->sws_dict, o->g->sws_dict, 0);
 
     av_dict_copy(&ost->swr_opts, o->g->swr_opts, 0);
     if (ost->enc && av_get_exact_bits_per_sample(ost->enc->id) == 24)
@@ -1439,17 +1506,17 @@ static OutputStream *new_video_stream(OptionsContext *o, AVFormatContext *oc, in
         video_enc->rc_override_count = i;
 
         if (do_psnr)
-            video_enc->flags|= CODEC_FLAG_PSNR;
+            video_enc->flags|= AV_CODEC_FLAG_PSNR;
 
         /* two pass mode */
         MATCH_PER_STREAM_OPT(pass, i, do_pass, oc, st);
         if (do_pass) {
             if (do_pass & 1) {
-                video_enc->flags |= CODEC_FLAG_PASS1;
+                video_enc->flags |= AV_CODEC_FLAG_PASS1;
                 av_dict_set(&ost->encoder_opts, "flags", "+pass1", AV_DICT_APPEND);
             }
             if (do_pass & 2) {
-                video_enc->flags |= CODEC_FLAG_PASS2;
+                video_enc->flags |= AV_CODEC_FLAG_PASS2;
                 av_dict_set(&ost->encoder_opts, "flags", "+pass2", AV_DICT_APPEND);
             }
         }
@@ -1459,6 +1526,40 @@ static OutputStream *new_video_stream(OptionsContext *o, AVFormatContext *oc, in
             !(ost->logfile_prefix = av_strdup(ost->logfile_prefix)))
             exit_program(1);
 
+        if (do_pass) {
+            char logfilename[1024];
+            FILE *f;
+
+            snprintf(logfilename, sizeof(logfilename), "%s-%d.log",
+                     ost->logfile_prefix ? ost->logfile_prefix :
+                                           DEFAULT_PASS_LOGFILENAME_PREFIX,
+                     i);
+            if (!strcmp(ost->enc->name, "libx264")) {
+                av_dict_set(&ost->encoder_opts, "stats", logfilename, AV_DICT_DONT_OVERWRITE);
+            } else {
+                if (video_enc->flags & AV_CODEC_FLAG_PASS2) {
+                    char  *logbuffer = read_file(logfilename);
+
+                    if (!logbuffer) {
+                        av_log(NULL, AV_LOG_FATAL, "Error reading log file '%s' for pass-2 encoding\n",
+                               logfilename);
+                        exit_program(1);
+                    }
+                    video_enc->stats_in = logbuffer;
+                }
+                if (video_enc->flags & AV_CODEC_FLAG_PASS1) {
+                    f = av_fopen_utf8(logfilename, "wb");
+                    if (!f) {
+                        av_log(NULL, AV_LOG_FATAL,
+                               "Cannot write log file '%s' for pass-1 encoding: %s\n",
+                               logfilename, strerror(errno));
+                        exit_program(1);
+                    }
+                    ost->logfile = f;
+                }
+            }
+        }
+
         MATCH_PER_STREAM_OPT(forced_key_frames, str, ost->forced_keyframes, oc, st);
         if (ost->forced_keyframes)
             ost->forced_keyframes = av_strdup(ost->forced_keyframes);
@@ -1737,8 +1838,7 @@ static void init_output_filter(OutputFilter *ofilter, OptionsContext *o,
 {
     OutputStream *ost;
 
-    switch (avfilter_pad_get_type(ofilter->out_tmp->filter_ctx->output_pads,
-                                  ofilter->out_tmp->pad_idx)) {
+    switch (ofilter->type) {
     case AVMEDIA_TYPE_VIDEO: ost = new_video_stream(o, oc, -1); break;
     case AVMEDIA_TYPE_AUDIO: ost = new_audio_stream(o, oc, -1); break;
     default:
@@ -1771,13 +1871,21 @@ static void init_output_filter(OutputFilter *ofilter, OptionsContext *o,
         exit_program(1);
     }
 
-    if (configure_output_filter(ofilter->graph, ofilter, ofilter->out_tmp) < 0) {
-        av_log(NULL, AV_LOG_FATAL, "Error configuring filter.\n");
-        exit_program(1);
-    }
     avfilter_inout_free(&ofilter->out_tmp);
 }
 
+static int init_complex_filters(void)
+{
+    int i, ret = 0;
+
+    for (i = 0; i < nb_filtergraphs; i++) {
+        ret = init_complex_filtergraph(filtergraphs[i]);
+        if (ret < 0)
+            return ret;
+    }
+    return 0;
+}
+
 static int configure_complex_filters(void)
 {
     int i, ret = 0;
@@ -1800,10 +1908,6 @@ static int open_output_file(OptionsContext *o, const char *filename)
     AVDictionary *unused_opts = NULL;
     AVDictionaryEntry *e = NULL;
 
-    if (configure_complex_filters() < 0) {
-        av_log(NULL, AV_LOG_FATAL, "Error configuring filters.\n");
-        exit_program(1);
-    }
 
     if (o->stop_time != INT64_MAX && o->recording_time != INT64_MAX) {
         o->stop_time = INT64_MAX;
@@ -1858,8 +1962,7 @@ static int open_output_file(OptionsContext *o, const char *filename)
             if (!ofilter->out_tmp || ofilter->out_tmp->name)
                 continue;
 
-            switch (avfilter_pad_get_type(ofilter->out_tmp->filter_ctx->output_pads,
-                                          ofilter->out_tmp->pad_idx)) {
+            switch (ofilter->type) {
             case AVMEDIA_TYPE_VIDEO:    o->video_disable    = 1; break;
             case AVMEDIA_TYPE_AUDIO:    o->audio_disable    = 1; break;
             case AVMEDIA_TYPE_SUBTITLE: o->subtitle_disable = 1; break;
@@ -1918,7 +2021,7 @@ static int open_output_file(OptionsContext *o, const char *filename)
             for (i = 0; i < nb_input_streams; i++) {
                 int new_area;
                 ist = input_streams[i];
-                new_area = ist->st->codec->width * ist->st->codec->height;
+                new_area = ist->st->codec->width * ist->st->codec->height + 100000000*!!ist->st->codec_info_nb_frames;
                 if((qcr!=MKTAG('A', 'P', 'I', 'C')) && (ist->st->disposition & AV_DISPOSITION_ATTACHED_PIC))
                     new_area = 1;
                 if (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
@@ -1935,12 +2038,14 @@ static int open_output_file(OptionsContext *o, const char *filename)
 
         /* audio: most channels */
         if (!o->audio_disable && av_guess_codec(oc->oformat, NULL, filename, NULL, AVMEDIA_TYPE_AUDIO) != AV_CODEC_ID_NONE) {
-            int channels = 0, idx = -1;
+            int best_score = 0, idx = -1;
             for (i = 0; i < nb_input_streams; i++) {
+                int score;
                 ist = input_streams[i];
+                score = ist->st->codec->channels + 100000000*!!ist->st->codec_info_nb_frames;
                 if (ist->st->codec->codec_type == AVMEDIA_TYPE_AUDIO &&
-                    ist->st->codec->channels > channels) {
-                    channels = ist->st->codec->channels;
+                    score > best_score) {
+                    best_score = score;
                     idx = i;
                 }
             }
@@ -2027,6 +2132,7 @@ static int open_output_file(OptionsContext *o, const char *filename)
                 if(o->    data_disable && ist->st->codec->codec_type == AVMEDIA_TYPE_DATA)
                     continue;
 
+                ost = NULL;
                 switch (ist->st->codec->codec_type) {
                 case AVMEDIA_TYPE_VIDEO:      ost = new_video_stream     (o, oc, src_idx); break;
                 case AVMEDIA_TYPE_AUDIO:      ost = new_audio_stream     (o, oc, src_idx); break;
@@ -2050,6 +2156,9 @@ static int open_output_file(OptionsContext *o, const char *filename)
                         exit_program(1);
                     }
                 }
+                if (ost)
+                    ost->sync_ist = input_streams[  input_files[map->sync_file_index]->ist_index
+                                                  + map->sync_stream_index];
             }
         }
     }
@@ -2079,7 +2188,7 @@ static int open_output_file(OptionsContext *o, const char *filename)
         avio_read(pb, attachment, len);
 
         ost = new_attachment_stream(o, oc, -1);
-        ost->stream_copy               = 0;
+        ost->stream_copy               = 1;
         ost->attachment_filename       = o->attachments[i];
         ost->finished                  = 1;
         ost->st->codec->extradata      = attachment;
@@ -2101,6 +2210,12 @@ static int open_output_file(OptionsContext *o, const char *filename)
                 exit_program(1);
     }
 
+    if (!oc->nb_streams && !(oc->oformat->flags & AVFMT_NOSTREAMS)) {
+        av_dump_format(oc, nb_output_files - 1, oc->filename, 1);
+        av_log(NULL, AV_LOG_ERROR, "Output file #%d does not contain any stream\n", nb_output_files - 1);
+        exit_program(1);
+    }
+
     /* check if all codec options have been used */
     unused_opts = strip_specifiers(o->g->codec_opts);
     for (i = of->ost_index; i < nb_output_streams; i++) {
@@ -2143,6 +2258,17 @@ static int open_output_file(OptionsContext *o, const char *filename)
     }
     av_dict_free(&unused_opts);
 
+    /* set the encoding/decoding_needed flags */
+    for (i = of->ost_index; i < nb_output_streams; i++) {
+        OutputStream *ost = output_streams[i];
+
+        ost->encoding_needed = !ost->stream_copy;
+        if (ost->encoding_needed && ost->source_index >= 0) {
+            InputStream *ist = input_streams[ost->source_index];
+            ist->decoding_needed |= DECODING_FOR_OST;
+        }
+    }
+
     /* check filename in case of an image number is expected */
     if (oc->oformat->flags & AVFMT_NEEDNUMBER) {
         if (!av_filename_number_test(oc->filename)) {
@@ -2232,12 +2358,79 @@ static int open_output_file(OptionsContext *o, const char *filename)
             }
         }
 
+    /* process manually set programs */
+    for (i = 0; i < o->nb_program; i++) {
+        const char *p = o->program[i].u.str;
+        int progid = i+1;
+        AVProgram *program;
+
+        while(*p) {
+            const char *p2 = av_get_token(&p, ":");
+            const char *to_dealloc = p2;
+            char *key;
+            if (!p2)
+                break;
+
+            if(*p) p++;
+
+            key = av_get_token(&p2, "=");
+            if (!key || !*p2) {
+                av_freep(&to_dealloc);
+                av_freep(&key);
+                break;
+            }
+            p2++;
+
+            if (!strcmp(key, "program_num"))
+                progid = strtol(p2, NULL, 0);
+            av_freep(&to_dealloc);
+            av_freep(&key);
+        }
+
+        program = av_new_program(oc, progid);
+
+        p = o->program[i].u.str;
+        while(*p) {
+            const char *p2 = av_get_token(&p, ":");
+            const char *to_dealloc = p2;
+            char *key;
+            if (!p2)
+                break;
+            if(*p) p++;
+
+            key = av_get_token(&p2, "=");
+            if (!key) {
+                av_log(NULL, AV_LOG_FATAL,
+                       "No '=' character in program string %s.\n",
+                       p2);
+                exit_program(1);
+            }
+            if (!*p2)
+                exit_program(1);
+            p2++;
+
+            if (!strcmp(key, "title")) {
+                av_dict_set(&program->metadata, "title", p2, 0);
+            } else if (!strcmp(key, "program_num")) {
+            } else if (!strcmp(key, "st")) {
+                int st_num = strtol(p2, NULL, 0);
+                av_program_add_stream_index(oc, progid, st_num);
+            } else {
+                av_log(NULL, AV_LOG_FATAL, "Unknown program key %s.\n", key);
+                exit_program(1);
+            }
+            av_freep(&to_dealloc);
+            av_freep(&key);
+        }
+    }
+
     /* process manually set metadata */
     for (i = 0; i < o->nb_metadata; i++) {
         AVDictionary **m;
         char type, *val;
         const char *stream_spec;
         int index = 0, j, ret = 0;
+        char now_time[256];
 
         val = strchr(o->metadata[i].u.str, '=');
         if (!val) {
@@ -2247,6 +2440,17 @@ static int open_output_file(OptionsContext *o, const char *filename)
         }
         *val++ = 0;
 
+        if (!strcmp(o->metadata[i].u.str, "creation_time") &&
+            !strcmp(val, "now")) {
+            time_t now = time(0);
+            struct tm *ptm, tmbuf;
+            ptm = localtime_r(&now, &tmbuf);
+            if (ptm) {
+                if (strftime(now_time, sizeof(now_time), "%Y-%m-%d %H:%M:%S", ptm))
+                    val = now_time;
+            }
+        }
+
         parse_meta_type(o->metadata[i].specifier, &type, &index, &stream_spec);
         if (type == 's') {
             for (j = 0; j < oc->nb_streams; j++) {
@@ -2272,6 +2476,13 @@ static int open_output_file(OptionsContext *o, const char *filename)
                 }
                 m = &oc->chapters[index]->metadata;
                 break;
+            case 'p':
+                if (index < 0 || index >= oc->nb_programs) {
+                    av_log(NULL, AV_LOG_FATAL, "Invalid program index %d in metadata specifier.\n", index);
+                    exit_program(1);
+                }
+                m = &oc->programs[index]->metadata;
+                break;
             default:
                 av_log(NULL, AV_LOG_FATAL, "Invalid metadata specifier %s.\n", o->metadata[i].specifier);
                 exit_program(1);
@@ -2439,8 +2650,10 @@ static int opt_vstats(void *optctx, const char *opt, const char *arg)
     time_t today2 = time(NULL);
     struct tm *today = localtime(&today2);
 
-    if (!today)
-        return AVERROR(errno);
+    if (!today) { // maybe tomorrow
+        av_log(NULL, AV_LOG_FATAL, "Unable to get current time: %s\n", strerror(errno));
+        exit_program(1);
+    }
 
     snprintf(filename, sizeof(filename), "vstats_%02d%02d%02d.log", today->tm_hour, today->tm_min,
              today->tm_sec);
@@ -2715,6 +2928,7 @@ void show_help_default(const char *opt, const char *arg)
            "    -h      -- print basic options\n"
            "    -h long -- print more options\n"
            "    -h full -- print all options (including all format and codec specific options, very long)\n"
+           "    -h type=name -- print all options for the named decoder/encoder/demuxer/muxer/filter\n"
            "    See man %s for detailed description of the options.\n"
            "\n", program_name);
 
@@ -2842,6 +3056,13 @@ int ffmpeg_parse_options(int argc, char **argv)
         goto fail;
     }
 
+    /* create the complex filtergraphs */
+    ret = init_complex_filters();
+    if (ret < 0) {
+        av_log(NULL, AV_LOG_FATAL, "Error initializing complex filters.\n");
+        goto fail;
+    }
+
     /* open output files */
     ret = open_files(&octx.groups[GROUP_OUTFILE], "output", open_output_file);
     if (ret < 0) {
@@ -2849,6 +3070,13 @@ int ffmpeg_parse_options(int argc, char **argv)
         goto fail;
     }
 
+    /* configure the complex filtergraphs */
+    ret = configure_complex_filters();
+    if (ret < 0) {
+        av_log(NULL, AV_LOG_FATAL, "Error configuring complex filters.\n");
+        goto fail;
+    }
+
 fail:
     uninit_parse_context(&octx);
     if (ret < 0) {
@@ -2923,6 +3151,9 @@ const OptionDef options[] = {
     { "ss",             HAS_ARG | OPT_TIME | OPT_OFFSET |
                         OPT_INPUT | OPT_OUTPUT,                      { .off = OFFSET(start_time) },
         "set the start time offset", "time_off" },
+    { "sseof",          HAS_ARG | OPT_TIME | OPT_OFFSET |
+                        OPT_INPUT | OPT_OUTPUT,                      { .off = OFFSET(start_time_eof) },
+        "set the start time offset relative to EOF", "time_off" },
     { "seek_timestamp", HAS_ARG | OPT_INT | OPT_OFFSET |
                         OPT_INPUT,                                   { .off = OFFSET(seek_timestamp) },
         "enable/disable seeking by timestamp with -ss" },
@@ -2939,6 +3170,8 @@ const OptionDef options[] = {
         "set the recording timestamp ('now' to set the current time)", "time" },
     { "metadata",       HAS_ARG | OPT_STRING | OPT_SPEC | OPT_OUTPUT, { .off = OFFSET(metadata) },
         "add metadata", "string=string" },
+    { "program",        HAS_ARG | OPT_STRING | OPT_SPEC | OPT_OUTPUT, { .off = OFFSET(program) },
+        "add program with specified streams", "title=string:st=number..." },
     { "dframes",        HAS_ARG | OPT_PERFILE | OPT_EXPERT |
                         OPT_OUTPUT,                                  { .func_arg = opt_data_frames },
         "set the number of data frames to output", "number" },
@@ -2960,9 +3193,9 @@ const OptionDef options[] = {
                         OPT_INPUT,                                   { .off = OFFSET(rate_emu) },
         "read input at native frame rate", "" },
     { "target",         HAS_ARG | OPT_PERFILE | OPT_OUTPUT,          { .func_arg = opt_target },
-        "specify target file type (\"vcd\", \"svcd\", \"dvd\","
-        " \"dv\", \"dv50\", \"pal-vcd\", \"ntsc-svcd\", ...)", "type" },
-    { "vsync",          HAS_ARG | OPT_EXPERT,                        { opt_vsync },
+        "specify target file type (\"vcd\", \"svcd\", \"dvd\", \"dv\" or \"dv50\" "
+        "with optional prefixes \"pal-\", \"ntsc-\" or \"film-\")", "type" },
+    { "vsync",          HAS_ARG | OPT_EXPERT,                        { .func_arg = opt_vsync },
         "video sync method", "" },
     { "frame_drop_threshold", HAS_ARG | OPT_FLOAT | OPT_EXPERT,      { &frame_drop_threshold },
         "frame drop threshold", "" },
@@ -2988,6 +3221,8 @@ const OptionDef options[] = {
         "timestamp error delta threshold", "threshold" },
     { "xerror",         OPT_BOOL | OPT_EXPERT,                       { &exit_on_error },
         "exit on error", "error" },
+    { "abort_on",       HAS_ARG | OPT_EXPERT,                        { .func_arg = opt_abort_on },
+        "abort on the specified condition flags", "flags" },
     { "copyinkf",       OPT_BOOL | OPT_EXPERT | OPT_SPEC |
                         OPT_OUTPUT,                                  { .off = OFFSET(copy_initial_nonkeyframes) },
         "copy initial non-keyframes" },
@@ -3026,6 +3261,8 @@ const OptionDef options[] = {
     { "dump_attachment", HAS_ARG | OPT_STRING | OPT_SPEC |
                          OPT_EXPERT | OPT_INPUT,                     { .off = OFFSET(dump_attachment) },
         "extract an attachment into a file", "filename" },
+    { "stream_loop", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_INPUT |
+                        OPT_OFFSET,                                  { .off = OFFSET(loop) }, "set number of times input stream shall be looped", "loop count" },
     { "debug_ts",       OPT_BOOL | OPT_EXPERT,                       { &debug_ts },
         "print timestamp debugging info" },
     { "max_error_rate",  HAS_ARG | OPT_FLOAT,                        { &max_error_rate },
@@ -3082,9 +3319,9 @@ const OptionDef options[] = {
         "this option is deprecated, use the yadif filter instead" },
     { "psnr",         OPT_VIDEO | OPT_BOOL | OPT_EXPERT,                         { &do_psnr },
         "calculate PSNR of compressed frames" },
-    { "vstats",       OPT_VIDEO | OPT_EXPERT ,                                   { &opt_vstats },
+    { "vstats",       OPT_VIDEO | OPT_EXPERT ,                                   { .func_arg = opt_vstats },
         "dump video coding statistics to file" },
-    { "vstats_file",  OPT_VIDEO | HAS_ARG | OPT_EXPERT ,                         { opt_vstats_file },
+    { "vstats_file",  OPT_VIDEO | HAS_ARG | OPT_EXPERT ,                         { .func_arg = opt_vstats_file },
         "dump video coding statistics to file", "file" },
     { "vf",           OPT_VIDEO | HAS_ARG  | OPT_PERFILE | OPT_OUTPUT,           { .func_arg = opt_video_filters },
         "set video filters", "filter_graph" },
@@ -3123,10 +3360,15 @@ const OptionDef options[] = {
         "use HW accelerated decoding", "hwaccel name" },
     { "hwaccel_device",   OPT_VIDEO | OPT_STRING | HAS_ARG | OPT_EXPERT |
                           OPT_SPEC | OPT_INPUT,                                  { .off = OFFSET(hwaccel_devices) },
-        "select a device for HW acceleration" "devicename" },
+        "select a device for HW acceleration", "devicename" },
 #if HAVE_VDPAU_X11
     { "vdpau_api_ver", HAS_ARG | OPT_INT | OPT_EXPERT, { &vdpau_api_ver }, "" },
 #endif
+#if CONFIG_VDA || CONFIG_VIDEOTOOLBOX
+    { "videotoolbox_pixfmt", HAS_ARG | OPT_STRING | OPT_EXPERT, { &videotoolbox_pixfmt}, "" },
+#endif
+    { "hwaccels",         OPT_EXIT,                                              { .func_arg = show_hwaccels },
+        "show available HW acceleration methods" },
     { "autorotate",       HAS_ARG | OPT_BOOL | OPT_SPEC |
                           OPT_EXPERT | OPT_INPUT,                                { .off = OFFSET(autorotate) },
         "automatically insert correct rotate filters" },
@@ -3189,7 +3431,7 @@ const OptionDef options[] = {
         "set the initial demux-decode delay", "seconds" },
     { "override_ffserver", OPT_BOOL | OPT_EXPERT | OPT_OUTPUT, { &override_ffserver },
         "override the options from ffserver", "" },
-    { "sdp_file", HAS_ARG | OPT_EXPERT | OPT_OUTPUT, { opt_sdp_file },
+    { "sdp_file", HAS_ARG | OPT_EXPERT | OPT_OUTPUT, { .func_arg = opt_sdp_file },
         "specify a file in which to print sdp information", "file" },
 
     { "bsf", HAS_ARG | OPT_STRING | OPT_SPEC | OPT_EXPERT | OPT_OUTPUT, { .off = OFFSET(bitstream_filters) },
diff --git a/ffmpeg_qsv.c b/ffmpeg_qsv.c
new file mode 100644
index 00000000..95a23516
--- /dev/null
+++ b/ffmpeg_qsv.c
@@ -0,0 +1,268 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <mfx/mfxvideo.h>
+#include <stdlib.h>
+
+#include "libavutil/dict.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavcodec/qsv.h"
+
+#include "ffmpeg.h"
+
+typedef struct QSVContext {
+    OutputStream *ost;
+
+    mfxSession session;
+
+    mfxExtOpaqueSurfaceAlloc opaque_alloc;
+    AVBufferRef             *opaque_surfaces_buf;
+
+    uint8_t           *surface_used;
+    mfxFrameSurface1 **surface_ptrs;
+    int nb_surfaces;
+
+    mfxExtBuffer *ext_buffers[1];
+} QSVContext;
+
+static void buffer_release(void *opaque, uint8_t *data)
+{
+    *(uint8_t*)opaque = 0;
+}
+
+static int qsv_get_buffer(AVCodecContext *s, AVFrame *frame, int flags)
+{
+    InputStream *ist = s->opaque;
+    QSVContext  *qsv = ist->hwaccel_ctx;
+    int i;
+
+    for (i = 0; i < qsv->nb_surfaces; i++) {
+        if (qsv->surface_used[i])
+            continue;
+
+        frame->buf[0] = av_buffer_create((uint8_t*)qsv->surface_ptrs[i], sizeof(*qsv->surface_ptrs[i]),
+                                         buffer_release, &qsv->surface_used[i], 0);
+        if (!frame->buf[0])
+            return AVERROR(ENOMEM);
+        frame->data[3]       = (uint8_t*)qsv->surface_ptrs[i];
+        qsv->surface_used[i] = 1;
+        return 0;
+    }
+
+    return AVERROR(ENOMEM);
+}
+
+static int init_opaque_surf(QSVContext *qsv)
+{
+    AVQSVContext *hwctx_enc = qsv->ost->enc_ctx->hwaccel_context;
+    mfxFrameSurface1 *surfaces;
+    int i;
+
+    qsv->nb_surfaces = hwctx_enc->nb_opaque_surfaces;
+
+    qsv->opaque_surfaces_buf = av_buffer_ref(hwctx_enc->opaque_surfaces);
+    qsv->surface_ptrs        = av_mallocz_array(qsv->nb_surfaces, sizeof(*qsv->surface_ptrs));
+    qsv->surface_used        = av_mallocz_array(qsv->nb_surfaces, sizeof(*qsv->surface_used));
+    if (!qsv->opaque_surfaces_buf || !qsv->surface_ptrs || !qsv->surface_used)
+        return AVERROR(ENOMEM);
+
+    surfaces = (mfxFrameSurface1*)qsv->opaque_surfaces_buf->data;
+    for (i = 0; i < qsv->nb_surfaces; i++)
+        qsv->surface_ptrs[i] = surfaces + i;
+
+    qsv->opaque_alloc.Out.Surfaces   = qsv->surface_ptrs;
+    qsv->opaque_alloc.Out.NumSurface = qsv->nb_surfaces;
+    qsv->opaque_alloc.Out.Type       = hwctx_enc->opaque_alloc_type;
+
+    qsv->opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION;
+    qsv->opaque_alloc.Header.BufferSz = sizeof(qsv->opaque_alloc);
+    qsv->ext_buffers[0]               = (mfxExtBuffer*)&qsv->opaque_alloc;
+
+    return 0;
+}
+
+static void qsv_uninit(AVCodecContext *s)
+{
+    InputStream *ist = s->opaque;
+    QSVContext  *qsv = ist->hwaccel_ctx;
+
+    av_freep(&qsv->ost->enc_ctx->hwaccel_context);
+    av_freep(&s->hwaccel_context);
+
+    av_buffer_unref(&qsv->opaque_surfaces_buf);
+    av_freep(&qsv->surface_used);
+    av_freep(&qsv->surface_ptrs);
+
+    av_freep(&qsv);
+}
+
+int qsv_init(AVCodecContext *s)
+{
+    InputStream *ist = s->opaque;
+    QSVContext  *qsv = ist->hwaccel_ctx;
+    AVQSVContext *hwctx_dec;
+    int ret;
+
+    if (!qsv) {
+        av_log(NULL, AV_LOG_ERROR, "QSV transcoding is not initialized. "
+               "-hwaccel qsv should only be used for one-to-one QSV transcoding "
+               "with no filters.\n");
+        return AVERROR_BUG;
+    }
+
+    ret = init_opaque_surf(qsv);
+    if (ret < 0)
+        return ret;
+
+    hwctx_dec = av_qsv_alloc_context();
+    if (!hwctx_dec)
+        return AVERROR(ENOMEM);
+
+    hwctx_dec->session        = qsv->session;
+    hwctx_dec->iopattern      = MFX_IOPATTERN_OUT_OPAQUE_MEMORY;
+    hwctx_dec->ext_buffers    = qsv->ext_buffers;
+    hwctx_dec->nb_ext_buffers = FF_ARRAY_ELEMS(qsv->ext_buffers);
+
+    av_freep(&s->hwaccel_context);
+    s->hwaccel_context = hwctx_dec;
+
+    ist->hwaccel_get_buffer = qsv_get_buffer;
+    ist->hwaccel_uninit     = qsv_uninit;
+
+    return 0;
+}
+
+static mfxIMPL choose_implementation(const InputStream *ist)
+{
+    static const struct {
+        const char *name;
+        mfxIMPL     impl;
+    } impl_map[] = {
+        { "auto",     MFX_IMPL_AUTO         },
+        { "sw",       MFX_IMPL_SOFTWARE     },
+        { "hw",       MFX_IMPL_HARDWARE     },
+        { "auto_any", MFX_IMPL_AUTO_ANY     },
+        { "hw_any",   MFX_IMPL_HARDWARE_ANY },
+        { "hw2",      MFX_IMPL_HARDWARE2    },
+        { "hw3",      MFX_IMPL_HARDWARE3    },
+        { "hw4",      MFX_IMPL_HARDWARE4    },
+    };
+
+    mfxIMPL impl = MFX_IMPL_AUTO_ANY;
+    int i;
+
+    if (ist->hwaccel_device) {
+        for (i = 0; i < FF_ARRAY_ELEMS(impl_map); i++)
+            if (!strcmp(ist->hwaccel_device, impl_map[i].name)) {
+                impl = impl_map[i].impl;
+                break;
+            }
+        if (i == FF_ARRAY_ELEMS(impl_map))
+            impl = strtol(ist->hwaccel_device, NULL, 0);
+    }
+
+    return impl;
+}
+
+int qsv_transcode_init(OutputStream *ost)
+{
+    InputStream *ist;
+    const enum AVPixelFormat *pix_fmt;
+
+    AVDictionaryEntry *e;
+    const AVOption *opt;
+    int flags = 0;
+
+    int err, i;
+
+    QSVContext *qsv = NULL;
+    AVQSVContext *hwctx = NULL;
+    mfxIMPL impl;
+    mfxVersion ver = { { 3, 1 } };
+
+    /* check if the encoder supports QSV */
+    if (!ost->enc->pix_fmts)
+        return 0;
+    for (pix_fmt = ost->enc->pix_fmts; *pix_fmt != AV_PIX_FMT_NONE; pix_fmt++)
+        if (*pix_fmt == AV_PIX_FMT_QSV)
+            break;
+    if (*pix_fmt == AV_PIX_FMT_NONE)
+        return 0;
+
+    if (strcmp(ost->avfilter, "null") || ost->source_index < 0)
+        return 0;
+
+    /* check if the decoder supports QSV and the output only goes to this stream */
+    ist = input_streams[ost->source_index];
+    if (ist->nb_filters || ist->hwaccel_id != HWACCEL_QSV ||
+        !ist->dec || !ist->dec->pix_fmts)
+        return 0;
+    for (pix_fmt = ist->dec->pix_fmts; *pix_fmt != AV_PIX_FMT_NONE; pix_fmt++)
+        if (*pix_fmt == AV_PIX_FMT_QSV)
+            break;
+    if (*pix_fmt == AV_PIX_FMT_NONE)
+        return 0;
+
+    for (i = 0; i < nb_output_streams; i++)
+        if (output_streams[i] != ost &&
+            output_streams[i]->source_index == ost->source_index)
+            return 0;
+
+    av_log(NULL, AV_LOG_VERBOSE, "Setting up QSV transcoding\n");
+
+    qsv   = av_mallocz(sizeof(*qsv));
+    hwctx = av_qsv_alloc_context();
+    if (!qsv || !hwctx)
+        goto fail;
+
+    impl = choose_implementation(ist);
+
+    err = MFXInit(impl, &ver, &qsv->session);
+    if (err != MFX_ERR_NONE) {
+        av_log(NULL, AV_LOG_ERROR, "Error initializing an MFX session: %d\n", err);
+        goto fail;
+    }
+
+    e = av_dict_get(ost->encoder_opts, "flags", NULL, 0);
+    opt = av_opt_find(ost->enc_ctx, "flags", NULL, 0, 0);
+    if (e && opt)
+        av_opt_eval_flags(ost->enc_ctx, opt, e->value, &flags);
+
+    qsv->ost = ost;
+
+    hwctx->session                = qsv->session;
+    hwctx->iopattern              = MFX_IOPATTERN_IN_OPAQUE_MEMORY;
+    hwctx->opaque_alloc           = 1;
+    hwctx->nb_opaque_surfaces     = 16;
+
+    ost->hwaccel_ctx              = qsv;
+    ost->enc_ctx->hwaccel_context = hwctx;
+    ost->enc_ctx->pix_fmt         = AV_PIX_FMT_QSV;
+
+    ist->hwaccel_ctx              = qsv;
+    ist->dec_ctx->pix_fmt         = AV_PIX_FMT_QSV;
+    ist->resample_pix_fmt         = AV_PIX_FMT_QSV;
+
+    return 0;
+
+fail:
+    av_freep(&hwctx);
+    av_freep(&qsv);
+    return AVERROR_UNKNOWN;
+}
diff --git a/ffmpeg_vda.c b/ffmpeg_vda.c
deleted file mode 100644
index 6fe4ed45..00000000
--- a/ffmpeg_vda.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavcodec/avcodec.h"
-#include "libavcodec/vda.h"
-#include "libavutil/imgutils.h"
-
-#include "ffmpeg.h"
-
-typedef struct VDAContext {
-    AVFrame *tmp_frame;
-} VDAContext;
-
-static int vda_retrieve_data(AVCodecContext *s, AVFrame *frame)
-{
-    InputStream *ist = s->opaque;
-    VDAContext  *vda = ist->hwaccel_ctx;
-    CVPixelBufferRef pixbuf = (CVPixelBufferRef)frame->data[3];
-    OSType pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
-    CVReturn err;
-    uint8_t *data[4] = { 0 };
-    int linesize[4] = { 0 };
-    int planes, ret, i;
-
-    av_frame_unref(vda->tmp_frame);
-
-    switch (pixel_format) {
-    case kCVPixelFormatType_420YpCbCr8Planar: vda->tmp_frame->format = AV_PIX_FMT_YUV420P; break;
-    case kCVPixelFormatType_422YpCbCr8:       vda->tmp_frame->format = AV_PIX_FMT_UYVY422; break;
-    default:
-        av_log(NULL, AV_LOG_ERROR,
-               "Unsupported pixel format: %u\n", pixel_format);
-        return AVERROR(ENOSYS);
-    }
-
-    vda->tmp_frame->width  = frame->width;
-    vda->tmp_frame->height = frame->height;
-    ret = av_frame_get_buffer(vda->tmp_frame, 32);
-    if (ret < 0)
-        return ret;
-
-    err = CVPixelBufferLockBaseAddress(pixbuf, kCVPixelBufferLock_ReadOnly);
-    if (err != kCVReturnSuccess) {
-        av_log(NULL, AV_LOG_ERROR, "Error locking the pixel buffer.\n");
-        return AVERROR_UNKNOWN;
-    }
-
-    if (CVPixelBufferIsPlanar(pixbuf)) {
-
-        planes = CVPixelBufferGetPlaneCount(pixbuf);
-        for (i = 0; i < planes; i++) {
-            data[i]     = CVPixelBufferGetBaseAddressOfPlane(pixbuf, i);
-            linesize[i] = CVPixelBufferGetBytesPerRowOfPlane(pixbuf, i);
-        }
-    } else {
-        data[0] = CVPixelBufferGetBaseAddress(pixbuf);
-        linesize[0] = CVPixelBufferGetBytesPerRow(pixbuf);
-    }
-
-    av_image_copy(vda->tmp_frame->data, vda->tmp_frame->linesize,
-                  (const uint8_t **)data, linesize, vda->tmp_frame->format,
-                  frame->width, frame->height);
-
-    ret = av_frame_copy_props(vda->tmp_frame, frame);
-    CVPixelBufferUnlockBaseAddress(pixbuf, kCVPixelBufferLock_ReadOnly);
-
-    if (ret < 0)
-        return ret;
-
-    av_frame_unref(frame);
-    av_frame_move_ref(frame, vda->tmp_frame);
-
-    return 0;
-}
-
-static void vda_uninit(AVCodecContext *s)
-{
-    InputStream *ist = s->opaque;
-    VDAContext  *vda = ist->hwaccel_ctx;
-
-    ist->hwaccel_uninit        = NULL;
-    ist->hwaccel_retrieve_data = NULL;
-
-    av_frame_free(&vda->tmp_frame);
-
-    av_vda_default_free(s);
-    av_freep(&ist->hwaccel_ctx);
-}
-
-int vda_init(AVCodecContext *s)
-{
-    InputStream *ist = s->opaque;
-    int loglevel = (ist->hwaccel_id == HWACCEL_AUTO) ? AV_LOG_VERBOSE : AV_LOG_ERROR;
-    VDAContext *vda;
-    int ret;
-
-    vda = av_mallocz(sizeof(*vda));
-    if (!vda)
-        return AVERROR(ENOMEM);
-
-    ist->hwaccel_ctx           = vda;
-    ist->hwaccel_uninit        = vda_uninit;
-    ist->hwaccel_retrieve_data = vda_retrieve_data;
-
-    vda->tmp_frame = av_frame_alloc();
-    if (!vda->tmp_frame) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    ret = av_vda_default_init(s);
-    if (ret < 0) {
-        av_log(NULL, loglevel, "Error creating VDA decoder.\n");
-        goto fail;
-    }
-
-    return 0;
-fail:
-    vda_uninit(s);
-    return ret;
-}
diff --git a/ffmpeg_vdpau.c b/ffmpeg_vdpau.c
index b05e5576..92a98eab 100644
--- a/ffmpeg_vdpau.c
+++ b/ffmpeg_vdpau.c
@@ -289,7 +289,8 @@ do {
 
         s->hwaccel_context = vdpau_ctx;
     } else
-    if (av_vdpau_bind_context(s, ctx->device, ctx->get_proc_address, 0))
+    if (av_vdpau_bind_context(s, ctx->device, ctx->get_proc_address,
+                              AV_HWACCEL_FLAG_IGNORE_LEVEL))
         goto fail;
 
     ctx->get_information_string(&vendor);
diff --git a/ffmpeg_videotoolbox.c b/ffmpeg_videotoolbox.c
new file mode 100644
index 00000000..744a2a00
--- /dev/null
+++ b/ffmpeg_videotoolbox.c
@@ -0,0 +1,202 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#if HAVE_UTGETOSTYPEFROMSTRING
+#include <CoreServices/CoreServices.h>
+#endif
+
+#include "libavcodec/avcodec.h"
+#if CONFIG_VDA
+#  include "libavcodec/vda.h"
+#endif
+#if CONFIG_VIDEOTOOLBOX
+#  include "libavcodec/videotoolbox.h"
+#endif
+#include "libavutil/imgutils.h"
+#include "ffmpeg.h"
+
+typedef struct VTContext {
+    AVFrame *tmp_frame;
+} VTContext;
+
+char *videotoolbox_pixfmt;
+
+static int videotoolbox_retrieve_data(AVCodecContext *s, AVFrame *frame)
+{
+    InputStream *ist = s->opaque;
+    VTContext  *vt = ist->hwaccel_ctx;
+    CVPixelBufferRef pixbuf = (CVPixelBufferRef)frame->data[3];
+    OSType pixel_format = CVPixelBufferGetPixelFormatType(pixbuf);
+    CVReturn err;
+    uint8_t *data[4] = { 0 };
+    int linesize[4] = { 0 };
+    int planes, ret, i;
+    char codec_str[32];
+
+    av_frame_unref(vt->tmp_frame);
+
+    switch (pixel_format) {
+    case kCVPixelFormatType_420YpCbCr8Planar: vt->tmp_frame->format = AV_PIX_FMT_YUV420P; break;
+    case kCVPixelFormatType_422YpCbCr8:       vt->tmp_frame->format = AV_PIX_FMT_UYVY422; break;
+    case kCVPixelFormatType_32BGRA:           vt->tmp_frame->format = AV_PIX_FMT_BGRA; break;
+#ifdef kCFCoreFoundationVersionNumber10_7
+    case kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange: vt->tmp_frame->format = AV_PIX_FMT_NV12; break;
+#endif
+    default:
+        av_get_codec_tag_string(codec_str, sizeof(codec_str), s->codec_tag);
+        av_log(NULL, AV_LOG_ERROR,
+               "%s: Unsupported pixel format: %s\n", codec_str, videotoolbox_pixfmt);
+        return AVERROR(ENOSYS);
+    }
+
+    vt->tmp_frame->width  = frame->width;
+    vt->tmp_frame->height = frame->height;
+    ret = av_frame_get_buffer(vt->tmp_frame, 32);
+    if (ret < 0)
+        return ret;
+
+    err = CVPixelBufferLockBaseAddress(pixbuf, kCVPixelBufferLock_ReadOnly);
+    if (err != kCVReturnSuccess) {
+        av_log(NULL, AV_LOG_ERROR, "Error locking the pixel buffer.\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    if (CVPixelBufferIsPlanar(pixbuf)) {
+
+        planes = CVPixelBufferGetPlaneCount(pixbuf);
+        for (i = 0; i < planes; i++) {
+            data[i]     = CVPixelBufferGetBaseAddressOfPlane(pixbuf, i);
+            linesize[i] = CVPixelBufferGetBytesPerRowOfPlane(pixbuf, i);
+        }
+    } else {
+        data[0] = CVPixelBufferGetBaseAddress(pixbuf);
+        linesize[0] = CVPixelBufferGetBytesPerRow(pixbuf);
+    }
+
+    av_image_copy(vt->tmp_frame->data, vt->tmp_frame->linesize,
+                  (const uint8_t **)data, linesize, vt->tmp_frame->format,
+                  frame->width, frame->height);
+
+    ret = av_frame_copy_props(vt->tmp_frame, frame);
+    CVPixelBufferUnlockBaseAddress(pixbuf, kCVPixelBufferLock_ReadOnly);
+    if (ret < 0)
+        return ret;
+
+    av_frame_unref(frame);
+    av_frame_move_ref(frame, vt->tmp_frame);
+
+    return 0;
+}
+
+static void videotoolbox_uninit(AVCodecContext *s)
+{
+    InputStream *ist = s->opaque;
+    VTContext  *vt = ist->hwaccel_ctx;
+
+    ist->hwaccel_uninit        = NULL;
+    ist->hwaccel_retrieve_data = NULL;
+
+    av_frame_free(&vt->tmp_frame);
+
+    if (ist->hwaccel_id == HWACCEL_VIDEOTOOLBOX) {
+#if CONFIG_VIDEOTOOLBOX
+        av_videotoolbox_default_free(s);
+#endif
+    } else {
+#if CONFIG_VDA
+        av_vda_default_free(s);
+#endif
+    }
+    av_freep(&ist->hwaccel_ctx);
+}
+
+int videotoolbox_init(AVCodecContext *s)
+{
+    InputStream *ist = s->opaque;
+    int loglevel = (ist->hwaccel_id == HWACCEL_AUTO) ? AV_LOG_VERBOSE : AV_LOG_ERROR;
+    int ret = 0;
+    VTContext *vt;
+
+    vt = av_mallocz(sizeof(*vt));
+    if (!vt)
+        return AVERROR(ENOMEM);
+
+    ist->hwaccel_ctx           = vt;
+    ist->hwaccel_uninit        = videotoolbox_uninit;
+    ist->hwaccel_retrieve_data = videotoolbox_retrieve_data;
+
+    vt->tmp_frame = av_frame_alloc();
+    if (!vt->tmp_frame) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    if (ist->hwaccel_id == HWACCEL_VIDEOTOOLBOX) {
+#if CONFIG_VIDEOTOOLBOX
+        if (!videotoolbox_pixfmt) {
+            ret = av_videotoolbox_default_init(s);
+        } else {
+            AVVideotoolboxContext *vtctx = av_videotoolbox_alloc_context();
+            CFStringRef pixfmt_str = CFStringCreateWithCString(kCFAllocatorDefault,
+                                                               videotoolbox_pixfmt,
+                                                               kCFStringEncodingUTF8);
+#if HAVE_UTGETOSTYPEFROMSTRING
+            vtctx->cv_pix_fmt_type = UTGetOSTypeFromString(pixfmt_str);
+#else
+            av_log(s, loglevel, "UTGetOSTypeFromString() is not available "
+                   "on this platform, %s pixel format can not be honored from "
+                   "the command line\n", videotoolbox_pixfmt);
+#endif
+            ret = av_videotoolbox_default_init2(s, vtctx);
+            CFRelease(pixfmt_str);
+        }
+#endif
+    } else {
+#if CONFIG_VDA
+        if (!videotoolbox_pixfmt) {
+            ret = av_vda_default_init(s);
+        } else {
+            AVVDAContext *vdactx = av_vda_alloc_context();
+            CFStringRef pixfmt_str = CFStringCreateWithCString(kCFAllocatorDefault,
+                                                               videotoolbox_pixfmt,
+                                                               kCFStringEncodingUTF8);
+#if HAVE_UTGETOSTYPEFROMSTRING
+            vdactx->cv_pix_fmt_type = UTGetOSTypeFromString(pixfmt_str);
+#else
+            av_log(s, loglevel, "UTGetOSTypeFromString() is not available "
+                   "on this platform, %s pixel format can not be honored from "
+                   "the command line\n", videotoolbox_pixfmt);
+#endif
+            ret = av_vda_default_init2(s, vdactx);
+            CFRelease(pixfmt_str);
+        }
+#endif
+    }
+    if (ret < 0) {
+        av_log(NULL, loglevel,
+               "Error creating %s decoder.\n", ist->hwaccel_id == HWACCEL_VIDEOTOOLBOX ? "Videotoolbox" : "VDA");
+        goto fail;
+    }
+
+    return 0;
+fail:
+    videotoolbox_uninit(s);
+    return ret;
+}
diff --git a/ffplay.c b/ffplay.c
index 58034c60..2cfdf26e 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -31,7 +31,6 @@
 #include <stdint.h>
 
 #include "libavutil/avstring.h"
-#include "libavutil/colorspace.h"
 #include "libavutil/eval.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/pixdesc.h"
@@ -49,7 +48,6 @@
 #include "libswresample/swresample.h"
 
 #if CONFIG_AVFILTER
-# include "libavfilter/avcodec.h"
 # include "libavfilter/avfilter.h"
 # include "libavfilter/buffersink.h"
 # include "libavfilter/buffersrc.h"
@@ -66,13 +64,18 @@ const char program_name[] = "ffplay";
 const int program_birth_year = 2003;
 
 #define MAX_QUEUE_SIZE (15 * 1024 * 1024)
-#define MIN_FRAMES 5
+#define MIN_FRAMES 25
+#define EXTERNAL_CLOCK_MIN_FRAMES 2
+#define EXTERNAL_CLOCK_MAX_FRAMES 10
 
 /* Minimum SDL audio buffer size, in samples. */
 #define SDL_AUDIO_MIN_BUFFER_SIZE 512
 /* Calculate actual buffer size keeping in mind not cause too frequent audio callbacks */
 #define SDL_AUDIO_MAX_CALLBACKS_PER_SEC 30
 
+/* Step size for volume control */
+#define SDL_VOLUME_STEP (SDL_MIX_MAXVOLUME / 50)
+
 /* no AV sync correction is done if below the minimum AV sync threshold */
 #define AV_SYNC_THRESHOLD_MIN 0.04
 /* AV sync correction is done if above the maximum AV sync threshold */
@@ -102,7 +105,7 @@ const int program_birth_year = 2003;
 
 #define CURSOR_HIDE_DELAY 1000000
 
-static int64_t sws_flags = SWS_BICUBIC;
+static unsigned sws_flags = SWS_BICUBIC;
 
 typedef struct MyAVPacketList {
     AVPacket pkt;
@@ -148,6 +151,7 @@ typedef struct Clock {
 typedef struct Frame {
     AVFrame *frame;
     AVSubtitle sub;
+    AVSubtitleRect **subrects;  /* rescaled subtitle rectangles in yuva */
     int serial;
     double pts;           /* presentation timestamp for the frame */
     double duration;      /* estimated duration of the frame */
@@ -223,6 +227,9 @@ typedef struct VideoState {
     Decoder viddec;
     Decoder subdec;
 
+    int viddec_width;
+    int viddec_height;
+
     int audio_stream;
 
     int av_sync_type;
@@ -243,6 +250,8 @@ typedef struct VideoState {
     unsigned int audio_buf1_size;
     int audio_buf_index; /* in bytes */
     int audio_write_buf_size;
+    int audio_volume;
+    int muted;
     struct AudioParams audio_src;
 #if CONFIG_AVFILTER
     struct AudioParams audio_filter_src;
@@ -278,10 +287,11 @@ typedef struct VideoState {
 #if !CONFIG_AVFILTER
     struct SwsContext *img_convert_ctx;
 #endif
+    struct SwsContext *sub_convert_ctx;
     SDL_Rect last_display_rect;
     int eof;
 
-    char filename[1024];
+    char *filename;
     int width, height, xleft, ytop;
     int step;
 
@@ -417,16 +427,12 @@ static int packet_queue_put(PacketQueue *q, AVPacket *pkt)
 {
     int ret;
 
-    /* duplicate the packet */
-    if (pkt != &flush_pkt && av_dup_packet(pkt) < 0)
-        return -1;
-
     SDL_LockMutex(q->mutex);
     ret = packet_queue_put_private(q, pkt);
     SDL_UnlockMutex(q->mutex);
 
     if (pkt != &flush_pkt && ret < 0)
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
 
     return ret;
 }
@@ -442,12 +448,21 @@ static int packet_queue_put_nullpacket(PacketQueue *q, int stream_index)
 }
 
 /* packet queue handling */
-static void packet_queue_init(PacketQueue *q)
+static int packet_queue_init(PacketQueue *q)
 {
     memset(q, 0, sizeof(PacketQueue));
     q->mutex = SDL_CreateMutex();
+    if (!q->mutex) {
+        av_log(NULL, AV_LOG_FATAL, "SDL_CreateMutex(): %s\n", SDL_GetError());
+        return AVERROR(ENOMEM);
+    }
     q->cond = SDL_CreateCond();
+    if (!q->cond) {
+        av_log(NULL, AV_LOG_FATAL, "SDL_CreateCond(): %s\n", SDL_GetError());
+        return AVERROR(ENOMEM);
+    }
     q->abort_request = 1;
+    return 0;
 }
 
 static void packet_queue_flush(PacketQueue *q)
@@ -457,7 +472,7 @@ static void packet_queue_flush(PacketQueue *q)
     SDL_LockMutex(q->mutex);
     for (pkt = q->first_pkt; pkt; pkt = pkt1) {
         pkt1 = pkt->next;
-        av_free_packet(&pkt->pkt);
+        av_packet_unref(&pkt->pkt);
         av_freep(&pkt);
     }
     q->last_pkt = NULL;
@@ -562,7 +577,7 @@ static int decoder_decode_frame(Decoder *d, AVFrame *frame, AVSubtitle *sub) {
                     d->next_pts_tb = d->start_pts_tb;
                 }
             } while (pkt.data == flush_pkt.data || d->queue->serial != d->pkt_serial);
-            av_free_packet(&d->pkt);
+            av_packet_unref(&d->pkt);
             d->pkt_temp = d->pkt = pkt;
             d->packet_pending = 1;
         }
@@ -626,11 +641,17 @@ static int decoder_decode_frame(Decoder *d, AVFrame *frame, AVSubtitle *sub) {
 }
 
 static void decoder_destroy(Decoder *d) {
-    av_free_packet(&d->pkt);
+    av_packet_unref(&d->pkt);
 }
 
 static void frame_queue_unref_item(Frame *vp)
 {
+    int i;
+    for (i = 0; i < vp->sub.num_rects; i++) {
+        av_freep(&vp->subrects[i]->data[0]);
+        av_freep(&vp->subrects[i]);
+    }
+    av_freep(&vp->subrects);
     av_frame_unref(vp->frame);
     avsubtitle_free(&vp->sub);
 }
@@ -639,10 +660,14 @@ static int frame_queue_init(FrameQueue *f, PacketQueue *pktq, int max_size, int
 {
     int i;
     memset(f, 0, sizeof(FrameQueue));
-    if (!(f->mutex = SDL_CreateMutex()))
+    if (!(f->mutex = SDL_CreateMutex())) {
+        av_log(NULL, AV_LOG_FATAL, "SDL_CreateMutex(): %s\n", SDL_GetError());
         return AVERROR(ENOMEM);
-    if (!(f->cond = SDL_CreateCond()))
+    }
+    if (!(f->cond = SDL_CreateCond())) {
+        av_log(NULL, AV_LOG_FATAL, "SDL_CreateCond(): %s\n", SDL_GetError());
         return AVERROR(ENOMEM);
+    }
     f->pktq = pktq;
     f->max_size = FFMIN(max_size, FRAME_QUEUE_SIZE);
     f->keep_last = !!keep_last;
@@ -829,229 +854,50 @@ static void fill_border(int xleft, int ytop, int width, int height, int x, int y
 #define ALPHA_BLEND(a, oldp, newp, s)\
 ((((oldp << s) * (255 - (a))) + (newp * (a))) / (255 << s))
 
-#define RGBA_IN(r, g, b, a, s)\
-{\
-    unsigned int v = ((const uint32_t *)(s))[0];\
-    a = (v >> 24) & 0xff;\
-    r = (v >> 16) & 0xff;\
-    g = (v >> 8) & 0xff;\
-    b = v & 0xff;\
-}
-
-#define YUVA_IN(y, u, v, a, s, pal)\
-{\
-    unsigned int val = ((const uint32_t *)(pal))[*(const uint8_t*)(s)];\
-    a = (val >> 24) & 0xff;\
-    y = (val >> 16) & 0xff;\
-    u = (val >> 8) & 0xff;\
-    v = val & 0xff;\
-}
-
-#define YUVA_OUT(d, y, u, v, a)\
-{\
-    ((uint32_t *)(d))[0] = (a << 24) | (y << 16) | (u << 8) | v;\
-}
 
 
 #define BPP 1
 
-static void blend_subrect(AVPicture *dst, const AVSubtitleRect *rect, int imgw, int imgh)
+static void blend_subrect(uint8_t **data, int *linesize, const AVSubtitleRect *rect, int imgw, int imgh)
 {
-    int wrap, wrap3, width2, skip2;
-    int y, u, v, a, u1, v1, a1, w, h;
+    int x, y, Y, U, V, A;
     uint8_t *lum, *cb, *cr;
-    const uint8_t *p;
-    const uint32_t *pal;
     int dstx, dsty, dstw, dsth;
+    const AVSubtitleRect *src = rect;
 
     dstw = av_clip(rect->w, 0, imgw);
     dsth = av_clip(rect->h, 0, imgh);
     dstx = av_clip(rect->x, 0, imgw - dstw);
     dsty = av_clip(rect->y, 0, imgh - dsth);
-    lum = dst->data[0] + dsty * dst->linesize[0];
-    cb  = dst->data[1] + (dsty >> 1) * dst->linesize[1];
-    cr  = dst->data[2] + (dsty >> 1) * dst->linesize[2];
-
-    width2 = ((dstw + 1) >> 1) + (dstx & ~dstw & 1);
-    skip2 = dstx >> 1;
-    wrap = dst->linesize[0];
-    wrap3 = rect->pict.linesize[0];
-    p = rect->pict.data[0];
-    pal = (const uint32_t *)rect->pict.data[1];  /* Now in YCrCb! */
-
-    if (dsty & 1) {
-        lum += dstx;
-        cb += skip2;
-        cr += skip2;
-
-        if (dstx & 1) {
-            YUVA_IN(y, u, v, a, p, pal);
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-            cb[0] = ALPHA_BLEND(a >> 2, cb[0], u, 0);
-            cr[0] = ALPHA_BLEND(a >> 2, cr[0], v, 0);
-            cb++;
-            cr++;
-            lum++;
-            p += BPP;
-        }
-        for (w = dstw - (dstx & 1); w >= 2; w -= 2) {
-            YUVA_IN(y, u, v, a, p, pal);
-            u1 = u;
-            v1 = v;
-            a1 = a;
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-
-            YUVA_IN(y, u, v, a, p + BPP, pal);
-            u1 += u;
-            v1 += v;
-            a1 += a;
-            lum[1] = ALPHA_BLEND(a, lum[1], y, 0);
-            cb[0] = ALPHA_BLEND(a1 >> 2, cb[0], u1, 1);
-            cr[0] = ALPHA_BLEND(a1 >> 2, cr[0], v1, 1);
-            cb++;
-            cr++;
-            p += 2 * BPP;
-            lum += 2;
-        }
-        if (w) {
-            YUVA_IN(y, u, v, a, p, pal);
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-            cb[0] = ALPHA_BLEND(a >> 2, cb[0], u, 0);
-            cr[0] = ALPHA_BLEND(a >> 2, cr[0], v, 0);
-            p++;
+    lum = data[0] + dstx + dsty * linesize[0];
+    cb  = data[1] + dstx/2 + (dsty >> 1) * linesize[1];
+    cr  = data[2] + dstx/2 + (dsty >> 1) * linesize[2];
+
+    for (y = 0; y<dsth; y++) {
+        for (x = 0; x<dstw; x++) {
+            Y = src->data[0][x + y*src->linesize[0]];
+            A = src->data[3][x + y*src->linesize[3]];
+            lum[0] = ALPHA_BLEND(A, lum[0], Y, 0);
             lum++;
         }
-        p += wrap3 - dstw * BPP;
-        lum += wrap - dstw - dstx;
-        cb += dst->linesize[1] - width2 - skip2;
-        cr += dst->linesize[2] - width2 - skip2;
-    }
-    for (h = dsth - (dsty & 1); h >= 2; h -= 2) {
-        lum += dstx;
-        cb += skip2;
-        cr += skip2;
-
-        if (dstx & 1) {
-            YUVA_IN(y, u, v, a, p, pal);
-            u1 = u;
-            v1 = v;
-            a1 = a;
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-            p += wrap3;
-            lum += wrap;
-            YUVA_IN(y, u, v, a, p, pal);
-            u1 += u;
-            v1 += v;
-            a1 += a;
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-            cb[0] = ALPHA_BLEND(a1 >> 2, cb[0], u1, 1);
-            cr[0] = ALPHA_BLEND(a1 >> 2, cr[0], v1, 1);
-            cb++;
-            cr++;
-            p += -wrap3 + BPP;
-            lum += -wrap + 1;
-        }
-        for (w = dstw - (dstx & 1); w >= 2; w -= 2) {
-            YUVA_IN(y, u, v, a, p, pal);
-            u1 = u;
-            v1 = v;
-            a1 = a;
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-
-            YUVA_IN(y, u, v, a, p + BPP, pal);
-            u1 += u;
-            v1 += v;
-            a1 += a;
-            lum[1] = ALPHA_BLEND(a, lum[1], y, 0);
-            p += wrap3;
-            lum += wrap;
-
-            YUVA_IN(y, u, v, a, p, pal);
-            u1 += u;
-            v1 += v;
-            a1 += a;
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-
-            YUVA_IN(y, u, v, a, p + BPP, pal);
-            u1 += u;
-            v1 += v;
-            a1 += a;
-            lum[1] = ALPHA_BLEND(a, lum[1], y, 0);
-
-            cb[0] = ALPHA_BLEND(a1 >> 2, cb[0], u1, 2);
-            cr[0] = ALPHA_BLEND(a1 >> 2, cr[0], v1, 2);
+        lum += linesize[0] - dstw;
+    }
 
+    for (y = 0; y<dsth/2; y++) {
+        for (x = 0; x<dstw/2; x++) {
+            U = src->data[1][x + y*src->linesize[1]];
+            V = src->data[2][x + y*src->linesize[2]];
+            A = src->data[3][2*x     +  2*y   *src->linesize[3]]
+              + src->data[3][2*x + 1 +  2*y   *src->linesize[3]]
+              + src->data[3][2*x + 1 + (2*y+1)*src->linesize[3]]
+              + src->data[3][2*x     + (2*y+1)*src->linesize[3]];
+            cb[0] = ALPHA_BLEND(A>>2, cb[0], U, 0);
+            cr[0] = ALPHA_BLEND(A>>2, cr[0], V, 0);
             cb++;
             cr++;
-            p += -wrap3 + 2 * BPP;
-            lum += -wrap + 2;
-        }
-        if (w) {
-            YUVA_IN(y, u, v, a, p, pal);
-            u1 = u;
-            v1 = v;
-            a1 = a;
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-            p += wrap3;
-            lum += wrap;
-            YUVA_IN(y, u, v, a, p, pal);
-            u1 += u;
-            v1 += v;
-            a1 += a;
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-            cb[0] = ALPHA_BLEND(a1 >> 2, cb[0], u1, 1);
-            cr[0] = ALPHA_BLEND(a1 >> 2, cr[0], v1, 1);
-            cb++;
-            cr++;
-            p += -wrap3 + BPP;
-            lum += -wrap + 1;
-        }
-        p += wrap3 + (wrap3 - dstw * BPP);
-        lum += wrap + (wrap - dstw - dstx);
-        cb += dst->linesize[1] - width2 - skip2;
-        cr += dst->linesize[2] - width2 - skip2;
-    }
-    /* handle odd height */
-    if (h) {
-        lum += dstx;
-        cb += skip2;
-        cr += skip2;
-
-        if (dstx & 1) {
-            YUVA_IN(y, u, v, a, p, pal);
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-            cb[0] = ALPHA_BLEND(a >> 2, cb[0], u, 0);
-            cr[0] = ALPHA_BLEND(a >> 2, cr[0], v, 0);
-            cb++;
-            cr++;
-            lum++;
-            p += BPP;
-        }
-        for (w = dstw - (dstx & 1); w >= 2; w -= 2) {
-            YUVA_IN(y, u, v, a, p, pal);
-            u1 = u;
-            v1 = v;
-            a1 = a;
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-
-            YUVA_IN(y, u, v, a, p + BPP, pal);
-            u1 += u;
-            v1 += v;
-            a1 += a;
-            lum[1] = ALPHA_BLEND(a, lum[1], y, 0);
-            cb[0] = ALPHA_BLEND(a1 >> 2, cb[0], u, 1);
-            cr[0] = ALPHA_BLEND(a1 >> 2, cr[0], v, 1);
-            cb++;
-            cr++;
-            p += 2 * BPP;
-            lum += 2;
-        }
-        if (w) {
-            YUVA_IN(y, u, v, a, p, pal);
-            lum[0] = ALPHA_BLEND(a, lum[0], y, 0);
-            cb[0] = ALPHA_BLEND(a >> 2, cb[0], u, 0);
-            cr[0] = ALPHA_BLEND(a >> 2, cr[0], v, 0);
         }
+        cb += linesize[1] - dstw/2;
+        cr += linesize[2] - dstw/2;
     }
 }
 
@@ -1081,10 +927,10 @@ static void calculate_display_rect(SDL_Rect *rect,
 
     /* XXX: we suppose the screen has a 1.0 pixel ratio */
     height = scr_height;
-    width = ((int)rint(height * aspect_ratio)) & ~1;
+    width = lrint(height * aspect_ratio) & ~1;
     if (width > scr_width) {
         width = scr_width;
-        height = ((int)rint(width / aspect_ratio)) & ~1;
+        height = lrint(width / aspect_ratio) & ~1;
     }
     x = (scr_width - width) / 2;
     y = (scr_height - height) / 2;
@@ -1098,7 +944,6 @@ static void video_image_display(VideoState *is)
 {
     Frame *vp;
     Frame *sp;
-    AVPicture pict;
     SDL_Rect rect;
     int i;
 
@@ -1109,18 +954,21 @@ static void video_image_display(VideoState *is)
                 sp = frame_queue_peek(&is->subpq);
 
                 if (vp->pts >= sp->pts + ((float) sp->sub.start_display_time / 1000)) {
+                    uint8_t *data[4];
+                    int linesize[4];
+
                     SDL_LockYUVOverlay (vp->bmp);
 
-                    pict.data[0] = vp->bmp->pixels[0];
-                    pict.data[1] = vp->bmp->pixels[2];
-                    pict.data[2] = vp->bmp->pixels[1];
+                    data[0] = vp->bmp->pixels[0];
+                    data[1] = vp->bmp->pixels[2];
+                    data[2] = vp->bmp->pixels[1];
 
-                    pict.linesize[0] = vp->bmp->pitches[0];
-                    pict.linesize[1] = vp->bmp->pitches[2];
-                    pict.linesize[2] = vp->bmp->pitches[1];
+                    linesize[0] = vp->bmp->pitches[0];
+                    linesize[1] = vp->bmp->pitches[2];
+                    linesize[2] = vp->bmp->pitches[1];
 
                     for (i = 0; i < sp->sub.num_rects; i++)
-                        blend_subrect(&pict, sp->sub.rects[i],
+                        blend_subrect(data, linesize, sp->subrects[i],
                                       vp->bmp->w, vp->bmp->h);
 
                     SDL_UnlockYUVOverlay (vp->bmp);
@@ -1269,9 +1117,9 @@ static void video_audio_display(VideoState *s)
              * directly access it but it is more than fast enough. */
             for (y = 0; y < s->height; y++) {
                 double w = 1 / sqrt(nb_freq);
-                int a = sqrt(w * sqrt(data[0][2 * y + 0] * data[0][2 * y + 0] + data[0][2 * y + 1] * data[0][2 * y + 1]));
-                int b = (nb_display_channels == 2 ) ? sqrt(w * sqrt(data[1][2 * y + 0] * data[1][2 * y + 0]
-                       + data[1][2 * y + 1] * data[1][2 * y + 1])) : a;
+                int a = sqrt(w * hypot(data[0][2 * y + 0], data[0][2 * y + 1]));
+                int b = (nb_display_channels == 2 ) ? sqrt(w * hypot(data[1][2 * y + 0], data[1][2 * y + 1]))
+                                                    : a;
                 a = FFMIN(a, 255);
                 b = FFMIN(b, 255);
                 fgcolor = SDL_MapRGB(screen->format, a, b, (a + b) / 2);
@@ -1289,11 +1137,80 @@ static void video_audio_display(VideoState *s)
     }
 }
 
+static void stream_component_close(VideoState *is, int stream_index)
+{
+    AVFormatContext *ic = is->ic;
+    AVCodecContext *avctx;
+
+    if (stream_index < 0 || stream_index >= ic->nb_streams)
+        return;
+    avctx = ic->streams[stream_index]->codec;
+
+    switch (avctx->codec_type) {
+    case AVMEDIA_TYPE_AUDIO:
+        decoder_abort(&is->auddec, &is->sampq);
+        SDL_CloseAudio();
+        decoder_destroy(&is->auddec);
+        swr_free(&is->swr_ctx);
+        av_freep(&is->audio_buf1);
+        is->audio_buf1_size = 0;
+        is->audio_buf = NULL;
+
+        if (is->rdft) {
+            av_rdft_end(is->rdft);
+            av_freep(&is->rdft_data);
+            is->rdft = NULL;
+            is->rdft_bits = 0;
+        }
+        break;
+    case AVMEDIA_TYPE_VIDEO:
+        decoder_abort(&is->viddec, &is->pictq);
+        decoder_destroy(&is->viddec);
+        break;
+    case AVMEDIA_TYPE_SUBTITLE:
+        decoder_abort(&is->subdec, &is->subpq);
+        decoder_destroy(&is->subdec);
+        break;
+    default:
+        break;
+    }
+
+    ic->streams[stream_index]->discard = AVDISCARD_ALL;
+    avcodec_close(avctx);
+    switch (avctx->codec_type) {
+    case AVMEDIA_TYPE_AUDIO:
+        is->audio_st = NULL;
+        is->audio_stream = -1;
+        break;
+    case AVMEDIA_TYPE_VIDEO:
+        is->video_st = NULL;
+        is->video_stream = -1;
+        break;
+    case AVMEDIA_TYPE_SUBTITLE:
+        is->subtitle_st = NULL;
+        is->subtitle_stream = -1;
+        break;
+    default:
+        break;
+    }
+}
+
 static void stream_close(VideoState *is)
 {
     /* XXX: use a special url_shutdown call to abort parse cleanly */
     is->abort_request = 1;
     SDL_WaitThread(is->read_tid, NULL);
+
+    /* close each stream */
+    if (is->audio_stream >= 0)
+        stream_component_close(is, is->audio_stream);
+    if (is->video_stream >= 0)
+        stream_component_close(is, is->video_stream);
+    if (is->subtitle_stream >= 0)
+        stream_component_close(is, is->subtitle_stream);
+
+    avformat_close_input(&is->ic);
+
     packet_queue_destroy(&is->videoq);
     packet_queue_destroy(&is->audioq);
     packet_queue_destroy(&is->subtitleq);
@@ -1306,6 +1223,8 @@ static void stream_close(VideoState *is)
 #if !CONFIG_AVFILTER
     sws_freeContext(is->img_convert_ctx);
 #endif
+    sws_freeContext(is->sub_convert_ctx);
+    av_free(is->filename);
     av_free(is);
 }
 
@@ -1475,11 +1394,11 @@ static double get_master_clock(VideoState *is)
 }
 
 static void check_external_clock_speed(VideoState *is) {
-   if (is->video_stream >= 0 && is->videoq.nb_packets <= MIN_FRAMES / 2 ||
-       is->audio_stream >= 0 && is->audioq.nb_packets <= MIN_FRAMES / 2) {
+   if (is->video_stream >= 0 && is->videoq.nb_packets <= EXTERNAL_CLOCK_MIN_FRAMES ||
+       is->audio_stream >= 0 && is->audioq.nb_packets <= EXTERNAL_CLOCK_MIN_FRAMES) {
        set_clock_speed(&is->extclk, FFMAX(EXTERNAL_CLOCK_SPEED_MIN, is->extclk.speed - EXTERNAL_CLOCK_SPEED_STEP));
-   } else if ((is->video_stream < 0 || is->videoq.nb_packets > MIN_FRAMES * 2) &&
-              (is->audio_stream < 0 || is->audioq.nb_packets > MIN_FRAMES * 2)) {
+   } else if ((is->video_stream < 0 || is->videoq.nb_packets > EXTERNAL_CLOCK_MAX_FRAMES) &&
+              (is->audio_stream < 0 || is->audioq.nb_packets > EXTERNAL_CLOCK_MAX_FRAMES)) {
        set_clock_speed(&is->extclk, FFMIN(EXTERNAL_CLOCK_SPEED_MAX, is->extclk.speed + EXTERNAL_CLOCK_SPEED_STEP));
    } else {
        double speed = is->extclk.speed;
@@ -1522,6 +1441,16 @@ static void toggle_pause(VideoState *is)
     is->step = 0;
 }
 
+static void toggle_mute(VideoState *is)
+{
+    is->muted = !is->muted;
+}
+
+static void update_volume(VideoState *is, int sign, int step)
+{
+    is->audio_volume = av_clip(is->audio_volume + sign * step, 0, SDL_MIX_MAXVOLUME);
+}
+
 static void step_to_next_frame(VideoState *is)
 {
     /* if the stream is paused unpause it, then step */
@@ -1833,25 +1762,37 @@ static int queue_picture(VideoState *is, AVFrame *src_frame, double pts, double
 
     /* if the frame is not skipped, then display it */
     if (vp->bmp) {
-        AVPicture pict = { { 0 } };
+        uint8_t *data[4];
+        int linesize[4];
 
         /* get a pointer on the bitmap */
         SDL_LockYUVOverlay (vp->bmp);
 
-        pict.data[0] = vp->bmp->pixels[0];
-        pict.data[1] = vp->bmp->pixels[2];
-        pict.data[2] = vp->bmp->pixels[1];
+        data[0] = vp->bmp->pixels[0];
+        data[1] = vp->bmp->pixels[2];
+        data[2] = vp->bmp->pixels[1];
 
-        pict.linesize[0] = vp->bmp->pitches[0];
-        pict.linesize[1] = vp->bmp->pitches[2];
-        pict.linesize[2] = vp->bmp->pitches[1];
+        linesize[0] = vp->bmp->pitches[0];
+        linesize[1] = vp->bmp->pitches[2];
+        linesize[2] = vp->bmp->pitches[1];
 
 #if CONFIG_AVFILTER
         // FIXME use direct rendering
-        av_picture_copy(&pict, (AVPicture *)src_frame,
+        av_image_copy(data, linesize, (const uint8_t **)src_frame->data, src_frame->linesize,
                         src_frame->format, vp->width, vp->height);
 #else
-        av_opt_get_int(sws_opts, "sws_flags", 0, &sws_flags);
+        {
+            AVDictionaryEntry *e = av_dict_get(sws_dict, "sws_flags", NULL, 0);
+            if (e) {
+                const AVClass *class = sws_get_class();
+                const AVOption    *o = av_opt_find(&class, "sws_flags", NULL, 0,
+                                                   AV_OPT_SEARCH_FAKE_OBJ);
+                int ret = av_opt_eval_flags(&class, o, e->value, &sws_flags);
+                if (ret < 0)
+                    exit(1);
+            }
+        }
+
         is->img_convert_ctx = sws_getCachedContext(is->img_convert_ctx,
             vp->width, vp->height, src_frame->format, vp->width, vp->height,
             AV_PIX_FMT_YUV420P, sws_flags, NULL, NULL, NULL);
@@ -1860,7 +1801,7 @@ static int queue_picture(VideoState *is, AVFrame *src_frame, double pts, double
             exit(1);
         }
         sws_scale(is->img_convert_ctx, src_frame->data, src_frame->linesize,
-                  0, vp->height, pict.data, pict.linesize);
+                  0, vp->height, data, linesize);
 #endif
         /* workaround SDL PITCH_WORKAROUND */
         duplicate_right_border_pixels(vp->bmp);
@@ -1893,6 +1834,9 @@ static int get_video_frame(VideoState *is, AVFrame *frame)
 
         frame->sample_aspect_ratio = av_guess_sample_aspect_ratio(is->ic, is->video_st, frame);
 
+        is->viddec_width  = frame->width;
+        is->viddec_height = frame->height;
+
         if (framedrop>0 || (framedrop && get_master_sync_type(is) != AV_SYNC_VIDEO_MASTER)) {
             if (frame->pts != AV_NOPTS_VALUE) {
                 double diff = dpts - get_master_clock(is);
@@ -1958,15 +1902,23 @@ static int configure_filtergraph(AVFilterGraph *graph, const char *filtergraph,
 static int configure_video_filters(AVFilterGraph *graph, VideoState *is, const char *vfilters, AVFrame *frame)
 {
     static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE };
-    char sws_flags_str[128];
+    char sws_flags_str[512] = "";
     char buffersrc_args[256];
     int ret;
     AVFilterContext *filt_src = NULL, *filt_out = NULL, *last_filter = NULL;
     AVCodecContext *codec = is->video_st->codec;
     AVRational fr = av_guess_frame_rate(is->ic, is->video_st, NULL);
+    AVDictionaryEntry *e = NULL;
+
+    while ((e = av_dict_get(sws_dict, "", e, AV_DICT_IGNORE_SUFFIX))) {
+        if (!strcmp(e->key, "sws_flags")) {
+            av_strlcatf(sws_flags_str, sizeof(sws_flags_str), "%s=%s:", "flags", e->value);
+        } else
+            av_strlcatf(sws_flags_str, sizeof(sws_flags_str), "%s=%s:", e->key, e->value);
+    }
+    if (strlen(sws_flags_str))
+        sws_flags_str[strlen(sws_flags_str)-1] = '\0';
 
-    av_opt_get_int(sws_opts, "sws_flags", 0, &sws_flags);
-    snprintf(sws_flags_str, sizeof(sws_flags_str), "flags=%"PRId64, sws_flags);
     graph->scale_sws_opts = av_strdup(sws_flags_str);
 
     snprintf(buffersrc_args, sizeof(buffersrc_args),
@@ -2207,10 +2159,15 @@ static int audio_thread(void *arg)
     return ret;
 }
 
-static void decoder_start(Decoder *d, int (*fn)(void *), void *arg)
+static int decoder_start(Decoder *d, int (*fn)(void *), void *arg)
 {
     packet_queue_start(d->queue);
     d->decoder_tid = SDL_CreateThread(fn, arg);
+    if (!d->decoder_tid) {
+        av_log(NULL, AV_LOG_ERROR, "SDL_CreateThread(): %s\n", SDL_GetError());
+        return AVERROR(ENOMEM);
+    }
+    return 0;
 }
 
 static int video_thread(void *arg)
@@ -2328,8 +2285,7 @@ static int subtitle_thread(void *arg)
     Frame *sp;
     int got_subtitle;
     double pts;
-    int i, j;
-    int r, g, b, y, u, v, a;
+    int i;
 
     for (;;) {
         if (!(sp = frame_queue_peek_writable(&is->subpq)))
@@ -2345,17 +2301,41 @@ static int subtitle_thread(void *arg)
                 pts = sp->sub.pts / (double)AV_TIME_BASE;
             sp->pts = pts;
             sp->serial = is->subdec.pkt_serial;
+            if (!(sp->subrects = av_mallocz_array(sp->sub.num_rects, sizeof(AVSubtitleRect*)))) {
+                av_log(NULL, AV_LOG_FATAL, "Cannot allocate subrects\n");
+                exit(1);
+            }
 
             for (i = 0; i < sp->sub.num_rects; i++)
             {
-                for (j = 0; j < sp->sub.rects[i]->nb_colors; j++)
-                {
-                    RGBA_IN(r, g, b, a, (uint32_t*)sp->sub.rects[i]->pict.data[1] + j);
-                    y = RGB_TO_Y_CCIR(r, g, b);
-                    u = RGB_TO_U_CCIR(r, g, b, 0);
-                    v = RGB_TO_V_CCIR(r, g, b, 0);
-                    YUVA_OUT((uint32_t*)sp->sub.rects[i]->pict.data[1] + j, y, u, v, a);
+                int in_w = sp->sub.rects[i]->w;
+                int in_h = sp->sub.rects[i]->h;
+                int subw = is->subdec.avctx->width  ? is->subdec.avctx->width  : is->viddec_width;
+                int subh = is->subdec.avctx->height ? is->subdec.avctx->height : is->viddec_height;
+                int out_w = is->viddec_width  ? in_w * is->viddec_width  / subw : in_w;
+                int out_h = is->viddec_height ? in_h * is->viddec_height / subh : in_h;
+
+                if (!(sp->subrects[i] = av_mallocz(sizeof(AVSubtitleRect))) ||
+                    av_image_alloc(sp->subrects[i]->data, sp->subrects[i]->linesize, out_w, out_h, AV_PIX_FMT_YUVA420P, 16) < 0) {
+                    av_log(NULL, AV_LOG_FATAL, "Cannot allocate subtitle data\n");
+                    exit(1);
+                }
+
+                is->sub_convert_ctx = sws_getCachedContext(is->sub_convert_ctx,
+                    in_w, in_h, AV_PIX_FMT_PAL8, out_w, out_h,
+                    AV_PIX_FMT_YUVA420P, sws_flags, NULL, NULL, NULL);
+                if (!is->sub_convert_ctx) {
+                    av_log(NULL, AV_LOG_FATAL, "Cannot initialize the sub conversion context\n");
+                    exit(1);
                 }
+                sws_scale(is->sub_convert_ctx,
+                          (void*)sp->sub.rects[i]->data, sp->sub.rects[i]->linesize,
+                          0, in_h, sp->subrects[i]->data, sp->subrects[i]->linesize);
+
+                sp->subrects[i]->w = out_w;
+                sp->subrects[i]->h = out_h;
+                sp->subrects[i]->x = sp->sub.rects[i]->x * out_w / in_w;
+                sp->subrects[i]->y = sp->sub.rects[i]->y * out_h / in_h;
             }
 
             /* now we can update the picture count */
@@ -2448,6 +2428,13 @@ static int audio_decode_frame(VideoState *is)
         return -1;
 
     do {
+#if defined(_WIN32)
+        while (frame_queue_nb_remaining(&is->sampq) == 0) {
+            if ((av_gettime_relative() - audio_callback_time) > 1000000LL * is->audio_hw_buf_size / is->audio_tgt.bytes_per_sec / 2)
+                return -1;
+            av_usleep (1000);
+        }
+#endif
         if (!(af = frame_queue_peek_readable(&is->sampq)))
             return -1;
         frame_queue_next(&is->sampq);
@@ -2566,7 +2553,13 @@ static void sdl_audio_callback(void *opaque, Uint8 *stream, int len)
         len1 = is->audio_buf_size - is->audio_buf_index;
         if (len1 > len)
             len1 = len;
-        memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
+        if (!is->muted && is->audio_volume == SDL_MIX_MAXVOLUME)
+            memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1);
+        else {
+            memset(stream, is->silence_buf[0], len1);
+            if (!is->muted)
+                SDL_MixAudio(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1, is->audio_volume);
+        }
         len -= len1;
         stream += len1;
         is->audio_buf_index += len1;
@@ -2695,10 +2688,15 @@ static int stream_component_open(VideoState *is, int stream_index)
     }
     av_codec_set_lowres(avctx, stream_lowres);
 
+#if FF_API_EMU_EDGE
     if(stream_lowres) avctx->flags |= CODEC_FLAG_EMU_EDGE;
-    if (fast)   avctx->flags2 |= CODEC_FLAG2_FAST;
-    if(codec->capabilities & CODEC_CAP_DR1)
+#endif
+    if (fast)
+        avctx->flags2 |= AV_CODEC_FLAG2_FAST;
+#if FF_API_EMU_EDGE
+    if(codec->capabilities & AV_CODEC_CAP_DR1)
         avctx->flags |= CODEC_FLAG_EMU_EDGE;
+#endif
 
     opts = filter_codec_opts(codec_opts, avctx->codec_id, ic, ic->streams[stream_index], codec);
     if (!av_dict_get(opts, "threads", NULL, 0))
@@ -2764,15 +2762,20 @@ static int stream_component_open(VideoState *is, int stream_index)
             is->auddec.start_pts = is->audio_st->start_time;
             is->auddec.start_pts_tb = is->audio_st->time_base;
         }
-        decoder_start(&is->auddec, audio_thread, is);
+        if ((ret = decoder_start(&is->auddec, audio_thread, is)) < 0)
+            goto fail;
         SDL_PauseAudio(0);
         break;
     case AVMEDIA_TYPE_VIDEO:
         is->video_stream = stream_index;
         is->video_st = ic->streams[stream_index];
 
+        is->viddec_width  = avctx->width;
+        is->viddec_height = avctx->height;
+
         decoder_init(&is->viddec, avctx, &is->videoq, is->continue_read_thread);
-        decoder_start(&is->viddec, video_thread, is);
+        if ((ret = decoder_start(&is->viddec, video_thread, is)) < 0)
+            goto fail;
         is->queue_attachments_req = 1;
         break;
     case AVMEDIA_TYPE_SUBTITLE:
@@ -2780,7 +2783,8 @@ static int stream_component_open(VideoState *is, int stream_index)
         is->subtitle_st = ic->streams[stream_index];
 
         decoder_init(&is->subdec, avctx, &is->subtitleq, is->continue_read_thread);
-        decoder_start(&is->subdec, subtitle_thread, is);
+        if ((ret = decoder_start(&is->subdec, subtitle_thread, is)) < 0)
+            goto fail;
         break;
     default:
         break;
@@ -2792,64 +2796,6 @@ static int stream_component_open(VideoState *is, int stream_index)
     return ret;
 }
 
-static void stream_component_close(VideoState *is, int stream_index)
-{
-    AVFormatContext *ic = is->ic;
-    AVCodecContext *avctx;
-
-    if (stream_index < 0 || stream_index >= ic->nb_streams)
-        return;
-    avctx = ic->streams[stream_index]->codec;
-
-    switch (avctx->codec_type) {
-    case AVMEDIA_TYPE_AUDIO:
-        decoder_abort(&is->auddec, &is->sampq);
-        SDL_CloseAudio();
-        decoder_destroy(&is->auddec);
-        swr_free(&is->swr_ctx);
-        av_freep(&is->audio_buf1);
-        is->audio_buf1_size = 0;
-        is->audio_buf = NULL;
-
-        if (is->rdft) {
-            av_rdft_end(is->rdft);
-            av_freep(&is->rdft_data);
-            is->rdft = NULL;
-            is->rdft_bits = 0;
-        }
-        break;
-    case AVMEDIA_TYPE_VIDEO:
-        decoder_abort(&is->viddec, &is->pictq);
-        decoder_destroy(&is->viddec);
-        break;
-    case AVMEDIA_TYPE_SUBTITLE:
-        decoder_abort(&is->subdec, &is->subpq);
-        decoder_destroy(&is->subdec);
-        break;
-    default:
-        break;
-    }
-
-    ic->streams[stream_index]->discard = AVDISCARD_ALL;
-    avcodec_close(avctx);
-    switch (avctx->codec_type) {
-    case AVMEDIA_TYPE_AUDIO:
-        is->audio_st = NULL;
-        is->audio_stream = -1;
-        break;
-    case AVMEDIA_TYPE_VIDEO:
-        is->video_st = NULL;
-        is->video_stream = -1;
-        break;
-    case AVMEDIA_TYPE_SUBTITLE:
-        is->subtitle_st = NULL;
-        is->subtitle_stream = -1;
-        break;
-    default:
-        break;
-    }
-}
-
 static int decode_interrupt_cb(void *ctx)
 {
     VideoState *is = ctx;
@@ -2889,6 +2835,12 @@ static int read_thread(void *arg)
     int scan_all_pmts_set = 0;
     int64_t pkt_ts;
 
+    if (!wait_mutex) {
+        av_log(NULL, AV_LOG_FATAL, "SDL_CreateMutex(): %s\n", SDL_GetError());
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
     memset(st_index, -1, sizeof(st_index));
     is->last_video_stream = is->video_stream = -1;
     is->last_audio_stream = is->audio_stream = -1;
@@ -3170,27 +3122,14 @@ static int read_thread(void *arg)
         } else if (pkt->stream_index == is->subtitle_stream && pkt_in_play_range) {
             packet_queue_put(&is->subtitleq, pkt);
         } else {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
         }
     }
-    /* wait until the end */
-    while (!is->abort_request) {
-        SDL_Delay(100);
-    }
 
     ret = 0;
  fail:
-    /* close each stream */
-    if (is->audio_stream >= 0)
-        stream_component_close(is, is->audio_stream);
-    if (is->video_stream >= 0)
-        stream_component_close(is, is->video_stream);
-    if (is->subtitle_stream >= 0)
-        stream_component_close(is, is->subtitle_stream);
-    if (ic) {
+    if (ic && !is->ic)
         avformat_close_input(&ic);
-        is->ic = NULL;
-    }
 
     if (ret != 0) {
         SDL_Event event;
@@ -3210,7 +3149,9 @@ static VideoState *stream_open(const char *filename, AVInputFormat *iformat)
     is = av_mallocz(sizeof(VideoState));
     if (!is)
         return NULL;
-    av_strlcpy(is->filename, filename, sizeof(is->filename));
+    is->filename = av_strdup(filename);
+    if (!is->filename)
+        goto fail;
     is->iformat = iformat;
     is->ytop    = 0;
     is->xleft   = 0;
@@ -3223,19 +3164,26 @@ static VideoState *stream_open(const char *filename, AVInputFormat *iformat)
     if (frame_queue_init(&is->sampq, &is->audioq, SAMPLE_QUEUE_SIZE, 1) < 0)
         goto fail;
 
-    packet_queue_init(&is->videoq);
-    packet_queue_init(&is->audioq);
-    packet_queue_init(&is->subtitleq);
+    if (packet_queue_init(&is->videoq) < 0 ||
+        packet_queue_init(&is->audioq) < 0 ||
+        packet_queue_init(&is->subtitleq) < 0)
+        goto fail;
 
-    is->continue_read_thread = SDL_CreateCond();
+    if (!(is->continue_read_thread = SDL_CreateCond())) {
+        av_log(NULL, AV_LOG_FATAL, "SDL_CreateCond(): %s\n", SDL_GetError());
+        goto fail;
+    }
 
     init_clock(&is->vidclk, &is->videoq.serial);
     init_clock(&is->audclk, &is->audioq.serial);
     init_clock(&is->extclk, &is->extclk.serial);
     is->audio_clock_serial = -1;
+    is->audio_volume = SDL_MIX_MAXVOLUME;
+    is->muted = 0;
     is->av_sync_type = av_sync_type;
     is->read_tid     = SDL_CreateThread(read_thread, is);
     if (!is->read_tid) {
+        av_log(NULL, AV_LOG_FATAL, "SDL_CreateThread(): %s\n", SDL_GetError());
 fail:
         stream_close(is);
         return NULL;
@@ -3422,6 +3370,17 @@ static void event_loop(VideoState *cur_stream)
             case SDLK_SPACE:
                 toggle_pause(cur_stream);
                 break;
+            case SDLK_m:
+                toggle_mute(cur_stream);
+                break;
+            case SDLK_KP_MULTIPLY:
+            case SDLK_0:
+                update_volume(cur_stream, 1, SDL_VOLUME_STEP);
+                break;
+            case SDLK_KP_DIVIDE:
+            case SDLK_9:
+                update_volume(cur_stream, -1, SDL_VOLUME_STEP);
+                break;
             case SDLK_s: // S: Step to next frame
                 step_to_next_frame(cur_stream);
                 break;
@@ -3514,6 +3473,16 @@ static void event_loop(VideoState *cur_stream)
                 do_exit(cur_stream);
                 break;
             }
+            if (event.button.button == SDL_BUTTON_LEFT) {
+                static int64_t last_mouse_left_click = 0;
+                if (av_gettime_relative() - last_mouse_left_click <= 500000) {
+                    toggle_full_screen(cur_stream);
+                    cur_stream->force_refresh = 1;
+                    last_mouse_left_click = 0;
+                } else {
+                    last_mouse_left_click = av_gettime_relative();
+                }
+            }
         case SDL_MOUSEMOTION:
             if (cursor_hidden) {
                 SDL_ShowCursor(1);
@@ -3521,9 +3490,11 @@ static void event_loop(VideoState *cur_stream)
             }
             cursor_last_shown = av_gettime_relative();
             if (event.type == SDL_MOUSEBUTTONDOWN) {
+                if (event.button.button != SDL_BUTTON_RIGHT)
+                    break;
                 x = event.button.x;
             } else {
-                if (event.motion.state != SDL_PRESSED)
+                if (!(event.motion.state & SDL_BUTTON_RMASK))
                     break;
                 x = event.motion.x;
             }
@@ -3755,6 +3726,9 @@ void show_help_default(const char *opt, const char *arg)
            "q, ESC              quit\n"
            "f                   toggle full screen\n"
            "p, SPC              pause\n"
+           "m                   toggle mute\n"
+           "9, 0                decrease and increase volume respectively\n"
+           "/, *                decrease and increase volume respectively\n"
            "a                   cycle audio channel in the current program\n"
            "v                   cycle video channel\n"
            "t                   cycle subtitle channel in the current program\n"
@@ -3764,7 +3738,8 @@ void show_help_default(const char *opt, const char *arg)
            "left/right          seek backward/forward 10 seconds\n"
            "down/up             seek backward/forward 1 minute\n"
            "page down/page up   seek backward/forward 10 minutes\n"
-           "mouse click         seek to percentage in file corresponding to fraction of width\n"
+           "right mouse click   seek to percentage in file corresponding to fraction of width\n"
+           "left double-click   toggle full screen\n"
            );
 }
 
@@ -3773,8 +3748,10 @@ static int lockmgr(void **mtx, enum AVLockOp op)
    switch(op) {
       case AV_LOCK_CREATE:
           *mtx = SDL_CreateMutex();
-          if(!*mtx)
+          if(!*mtx) {
+              av_log(NULL, AV_LOG_FATAL, "SDL_CreateMutex(): %s\n", SDL_GetError());
               return 1;
+          }
           return 0;
       case AV_LOCK_OBTAIN:
           return !!SDL_LockMutex(*mtx);
@@ -3851,6 +3828,8 @@ int main(int argc, char **argv)
     SDL_EventState(SDL_SYSWMEVENT, SDL_IGNORE);
     SDL_EventState(SDL_USEREVENT, SDL_IGNORE);
 
+    SDL_EnableKeyRepeat(SDL_DEFAULT_REPEAT_DELAY, SDL_DEFAULT_REPEAT_INTERVAL);
+
     if (av_lockmgr_register(lockmgr)) {
         av_log(NULL, AV_LOG_FATAL, "Could not initialize lock manager!\n");
         do_exit(NULL);
diff --git a/ffprobe.c b/ffprobe.c
index 415836b7..f7b51add 100644
--- a/ffprobe.c
+++ b/ffprobe.c
@@ -77,6 +77,7 @@ static int do_show_format_tags = 0;
 static int do_show_frame_tags = 0;
 static int do_show_program_tags = 0;
 static int do_show_stream_tags = 0;
+static int do_show_packet_tags = 0;
 
 static int show_value_unit              = 0;
 static int use_value_prefix             = 0;
@@ -135,6 +136,7 @@ typedef enum {
     SECTION_ID_LIBRARY_VERSION,
     SECTION_ID_LIBRARY_VERSIONS,
     SECTION_ID_PACKET,
+    SECTION_ID_PACKET_TAGS,
     SECTION_ID_PACKETS,
     SECTION_ID_PACKETS_AND_FRAMES,
     SECTION_ID_PACKET_SIDE_DATA_LIST,
@@ -178,7 +180,8 @@ static struct section sections[] = {
     [SECTION_ID_LIBRARY_VERSION] =    { SECTION_ID_LIBRARY_VERSION, "library_version", 0, { -1 } },
     [SECTION_ID_PACKETS] =            { SECTION_ID_PACKETS, "packets", SECTION_FLAG_IS_ARRAY, { SECTION_ID_PACKET, -1} },
     [SECTION_ID_PACKETS_AND_FRAMES] = { SECTION_ID_PACKETS_AND_FRAMES, "packets_and_frames", SECTION_FLAG_IS_ARRAY, { SECTION_ID_PACKET, -1} },
-    [SECTION_ID_PACKET] =             { SECTION_ID_PACKET, "packet", 0, { SECTION_ID_PACKET_SIDE_DATA_LIST, -1 } },
+    [SECTION_ID_PACKET] =             { SECTION_ID_PACKET, "packet", 0, { SECTION_ID_PACKET_TAGS, SECTION_ID_PACKET_SIDE_DATA_LIST, -1 } },
+    [SECTION_ID_PACKET_TAGS] =        { SECTION_ID_PACKET_TAGS, "tags", SECTION_FLAG_HAS_VARIABLE_FIELDS, { -1 }, .element_name = "tag", .unique_name = "packet_tags" },
     [SECTION_ID_PACKET_SIDE_DATA_LIST] ={ SECTION_ID_PACKET_SIDE_DATA_LIST, "side_data_list", SECTION_FLAG_IS_ARRAY, { SECTION_ID_PACKET_SIDE_DATA, -1 } },
     [SECTION_ID_PACKET_SIDE_DATA] =     { SECTION_ID_PACKET_SIDE_DATA, "side_data", 0, { -1 } },
     [SECTION_ID_PIXEL_FORMATS] =      { SECTION_ID_PIXEL_FORMATS, "pixel_formats", SECTION_FLAG_IS_ARRAY, { SECTION_ID_PIXEL_FORMAT, -1 } },
@@ -215,8 +218,19 @@ static AVInputFormat *iformat = NULL;
 
 static struct AVHashContext *hash;
 
-static const char *const binary_unit_prefixes [] = { "", "Ki", "Mi", "Gi", "Ti", "Pi" };
-static const char *const decimal_unit_prefixes[] = { "", "K" , "M" , "G" , "T" , "P"  };
+static const struct {
+    double bin_val;
+    double dec_val;
+    const char *bin_str;
+    const char *dec_str;
+} si_prefixes[] = {
+    { 1.0, 1.0, "", "" },
+    { 1.024e3, 1e3, "Ki", "K" },
+    { 1.048576e6, 1e6, "Mi", "M" },
+    { 1.073741824e9, 1e9, "Gi", "G" },
+    { 1.099511627776e12, 1e12, "Ti", "T" },
+    { 1.125899906842624e15, 1e15, "Pi", "P" },
+};
 
 static const char unit_second_str[]         = "s"    ;
 static const char unit_hertz_str[]          = "Hz"   ;
@@ -270,14 +284,14 @@ static char *value_string(char *buf, int buf_size, struct unit_value uv)
 
             if (uv.unit == unit_byte_str && use_byte_value_binary_prefix) {
                 index = (long long int) (log2(vald)) / 10;
-                index = av_clip(index, 0, FF_ARRAY_ELEMS(binary_unit_prefixes) - 1);
-                vald /= exp2(index * 10);
-                prefix_string = binary_unit_prefixes[index];
+                index = av_clip(index, 0, FF_ARRAY_ELEMS(si_prefixes) - 1);
+                vald /= si_prefixes[index].bin_val;
+                prefix_string = si_prefixes[index].bin_str;
             } else {
                 index = (long long int) (log10(vald)) / 3;
-                index = av_clip(index, 0, FF_ARRAY_ELEMS(decimal_unit_prefixes) - 1);
-                vald /= pow(10, index * 3);
-                prefix_string = decimal_unit_prefixes[index];
+                index = av_clip(index, 0, FF_ARRAY_ELEMS(si_prefixes) - 1);
+                vald /= si_prefixes[index].dec_val;
+                prefix_string = si_prefixes[index].dec_str;
             }
             vali = vald;
         }
@@ -807,10 +821,10 @@ typedef struct DefaultContext {
 #define OFFSET(x) offsetof(DefaultContext, x)
 
 static const AVOption default_options[] = {
-    { "noprint_wrappers", "do not print headers and footers", OFFSET(noprint_wrappers), AV_OPT_TYPE_INT, {.i64=0}, 0, 1 },
-    { "nw",               "do not print headers and footers", OFFSET(noprint_wrappers), AV_OPT_TYPE_INT, {.i64=0}, 0, 1 },
-    { "nokey",          "force no key printing",     OFFSET(nokey),          AV_OPT_TYPE_INT, {.i64=0}, 0, 1 },
-    { "nk",             "force no key printing",     OFFSET(nokey),          AV_OPT_TYPE_INT, {.i64=0}, 0, 1 },
+    { "noprint_wrappers", "do not print headers and footers", OFFSET(noprint_wrappers), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1 },
+    { "nw",               "do not print headers and footers", OFFSET(noprint_wrappers), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1 },
+    { "nokey",          "force no key printing",     OFFSET(nokey),          AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1 },
+    { "nk",             "force no key printing",     OFFSET(nokey),          AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1 },
     {NULL},
 };
 
@@ -963,12 +977,12 @@ typedef struct CompactContext {
 static const AVOption compact_options[]= {
     {"item_sep", "set item separator",    OFFSET(item_sep_str),    AV_OPT_TYPE_STRING, {.str="|"},  CHAR_MIN, CHAR_MAX },
     {"s",        "set item separator",    OFFSET(item_sep_str),    AV_OPT_TYPE_STRING, {.str="|"},  CHAR_MIN, CHAR_MAX },
-    {"nokey",    "force no key printing", OFFSET(nokey),           AV_OPT_TYPE_INT,    {.i64=0},    0,        1        },
-    {"nk",       "force no key printing", OFFSET(nokey),           AV_OPT_TYPE_INT,    {.i64=0},    0,        1        },
+    {"nokey",    "force no key printing", OFFSET(nokey),           AV_OPT_TYPE_BOOL,   {.i64=0},    0,        1        },
+    {"nk",       "force no key printing", OFFSET(nokey),           AV_OPT_TYPE_BOOL,   {.i64=0},    0,        1        },
     {"escape",   "set escape mode",       OFFSET(escape_mode_str), AV_OPT_TYPE_STRING, {.str="c"},  CHAR_MIN, CHAR_MAX },
     {"e",        "set escape mode",       OFFSET(escape_mode_str), AV_OPT_TYPE_STRING, {.str="c"},  CHAR_MIN, CHAR_MAX },
-    {"print_section", "print section name", OFFSET(print_section), AV_OPT_TYPE_INT,    {.i64=1},    0,        1        },
-    {"p",             "print section name", OFFSET(print_section), AV_OPT_TYPE_INT,    {.i64=1},    0,        1        },
+    {"print_section", "print section name", OFFSET(print_section), AV_OPT_TYPE_BOOL,   {.i64=1},    0,        1        },
+    {"p",             "print section name", OFFSET(print_section), AV_OPT_TYPE_BOOL,   {.i64=1},    0,        1        },
     {NULL},
 };
 
@@ -1079,12 +1093,12 @@ static const Writer compact_writer = {
 static const AVOption csv_options[] = {
     {"item_sep", "set item separator",    OFFSET(item_sep_str),    AV_OPT_TYPE_STRING, {.str=","},  CHAR_MIN, CHAR_MAX },
     {"s",        "set item separator",    OFFSET(item_sep_str),    AV_OPT_TYPE_STRING, {.str=","},  CHAR_MIN, CHAR_MAX },
-    {"nokey",    "force no key printing", OFFSET(nokey),           AV_OPT_TYPE_INT,    {.i64=1},    0,        1        },
-    {"nk",       "force no key printing", OFFSET(nokey),           AV_OPT_TYPE_INT,    {.i64=1},    0,        1        },
+    {"nokey",    "force no key printing", OFFSET(nokey),           AV_OPT_TYPE_BOOL,   {.i64=1},    0,        1        },
+    {"nk",       "force no key printing", OFFSET(nokey),           AV_OPT_TYPE_BOOL,   {.i64=1},    0,        1        },
     {"escape",   "set escape mode",       OFFSET(escape_mode_str), AV_OPT_TYPE_STRING, {.str="csv"}, CHAR_MIN, CHAR_MAX },
     {"e",        "set escape mode",       OFFSET(escape_mode_str), AV_OPT_TYPE_STRING, {.str="csv"}, CHAR_MIN, CHAR_MAX },
-    {"print_section", "print section name", OFFSET(print_section), AV_OPT_TYPE_INT,    {.i64=1},    0,        1        },
-    {"p",             "print section name", OFFSET(print_section), AV_OPT_TYPE_INT,    {.i64=1},    0,        1        },
+    {"print_section", "print section name", OFFSET(print_section), AV_OPT_TYPE_BOOL,   {.i64=1},    0,        1        },
+    {"p",             "print section name", OFFSET(print_section), AV_OPT_TYPE_BOOL,   {.i64=1},    0,        1        },
     {NULL},
 };
 
@@ -1117,8 +1131,8 @@ typedef struct FlatContext {
 static const AVOption flat_options[]= {
     {"sep_char", "set separator",    OFFSET(sep_str),    AV_OPT_TYPE_STRING, {.str="."},  CHAR_MIN, CHAR_MAX },
     {"s",        "set separator",    OFFSET(sep_str),    AV_OPT_TYPE_STRING, {.str="."},  CHAR_MIN, CHAR_MAX },
-    {"hierarchical", "specify if the section specification should be hierarchical", OFFSET(hierarchical), AV_OPT_TYPE_INT, {.i64=1}, 0, 1 },
-    {"h",           "specify if the section specification should be hierarchical", OFFSET(hierarchical), AV_OPT_TYPE_INT, {.i64=1}, 0, 1 },
+    {"hierarchical", "specify if the section specification should be hierarchical", OFFSET(hierarchical), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1 },
+    {"h",            "specify if the section specification should be hierarchical", OFFSET(hierarchical), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1 },
     {NULL},
 };
 
@@ -1237,8 +1251,8 @@ typedef struct INIContext {
 #define OFFSET(x) offsetof(INIContext, x)
 
 static const AVOption ini_options[] = {
-    {"hierarchical", "specify if the section specification should be hierarchical", OFFSET(hierarchical), AV_OPT_TYPE_INT, {.i64=1}, 0, 1 },
-    {"h",           "specify if the section specification should be hierarchical", OFFSET(hierarchical), AV_OPT_TYPE_INT, {.i64=1}, 0, 1 },
+    {"hierarchical", "specify if the section specification should be hierarchical", OFFSET(hierarchical), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1 },
+    {"h",            "specify if the section specification should be hierarchical", OFFSET(hierarchical), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1 },
     {NULL},
 };
 
@@ -1343,8 +1357,8 @@ typedef struct JSONContext {
 #define OFFSET(x) offsetof(JSONContext, x)
 
 static const AVOption json_options[]= {
-    { "compact", "enable compact output", OFFSET(compact), AV_OPT_TYPE_INT, {.i64=0}, 0, 1 },
-    { "c",       "enable compact output", OFFSET(compact), AV_OPT_TYPE_INT, {.i64=0}, 0, 1 },
+    { "compact", "enable compact output", OFFSET(compact), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1 },
+    { "c",       "enable compact output", OFFSET(compact), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1 },
     { NULL }
 };
 
@@ -1506,10 +1520,10 @@ typedef struct XMLContext {
 #define OFFSET(x) offsetof(XMLContext, x)
 
 static const AVOption xml_options[] = {
-    {"fully_qualified", "specify if the output should be fully qualified", OFFSET(fully_qualified), AV_OPT_TYPE_INT, {.i64=0},  0, 1 },
-    {"q",               "specify if the output should be fully qualified", OFFSET(fully_qualified), AV_OPT_TYPE_INT, {.i64=0},  0, 1 },
-    {"xsd_strict",      "ensure that the output is XSD compliant",         OFFSET(xsd_strict),      AV_OPT_TYPE_INT, {.i64=0},  0, 1 },
-    {"x",               "ensure that the output is XSD compliant",         OFFSET(xsd_strict),      AV_OPT_TYPE_INT, {.i64=0},  0, 1 },
+    {"fully_qualified", "specify if the output should be fully qualified", OFFSET(fully_qualified), AV_OPT_TYPE_BOOL, {.i64=0},  0, 1 },
+    {"q",               "specify if the output should be fully qualified", OFFSET(fully_qualified), AV_OPT_TYPE_BOOL, {.i64=0},  0, 1 },
+    {"xsd_strict",      "ensure that the output is XSD compliant",         OFFSET(xsd_strict),      AV_OPT_TYPE_BOOL, {.i64=0},  0, 1 },
+    {"x",               "ensure that the output is XSD compliant",         OFFSET(xsd_strict),      AV_OPT_TYPE_BOOL, {.i64=0},  0, 1 },
     {NULL},
 };
 
@@ -1762,6 +1776,16 @@ static void show_packet(WriterContext *w, AVFormatContext *fmt_ctx, AVPacket *pk
 
     if (pkt->side_data_elems) {
         int i;
+        int size;
+        const uint8_t *side_metadata;
+
+        side_metadata = av_packet_get_side_data(pkt, AV_PKT_DATA_STRINGS_METADATA, &size);
+        if (side_metadata && size && do_show_packet_tags) {
+            AVDictionary *dict = NULL;
+            if (av_packet_unpack_dictionary(side_metadata, size, &dict) >= 0)
+                show_tags(w, dict, SECTION_ID_PACKET_TAGS);
+            av_dict_free(&dict);
+        }
         writer_print_section_header(w, SECTION_ID_PACKET_SIDE_DATA_LIST);
         for (i = 0; i < pkt->side_data_elems; i++) {
             AVPacketSideData *sd = &pkt->side_data[i];
@@ -1814,6 +1838,7 @@ static void show_frame(WriterContext *w, AVFrame *frame, AVStream *stream,
                        AVFormatContext *fmt_ctx)
 {
     AVBPrint pbuf;
+    char val_str[128];
     const char *s;
     int i;
 
@@ -1836,7 +1861,7 @@ static void show_frame(WriterContext *w, AVFrame *frame, AVStream *stream,
     print_duration_time("pkt_duration_time", av_frame_get_pkt_duration(frame), &stream->time_base);
     if (av_frame_get_pkt_pos (frame) != -1) print_fmt    ("pkt_pos", "%"PRId64, av_frame_get_pkt_pos(frame));
     else                      print_str_opt("pkt_pos", "N/A");
-    if (av_frame_get_pkt_size(frame) != -1) print_fmt    ("pkt_size", "%d", av_frame_get_pkt_size(frame));
+    if (av_frame_get_pkt_size(frame) != -1) print_val    ("pkt_size", av_frame_get_pkt_size(frame), unit_byte_str);
     else                       print_str_opt("pkt_size", "N/A");
 
     switch (stream->codec->codec_type) {
@@ -1890,9 +1915,12 @@ static void show_frame(WriterContext *w, AVFrame *frame, AVStream *stream,
             print_str("side_data_type", name ? name : "unknown");
             print_int("side_data_size", sd->size);
             if (sd->type == AV_FRAME_DATA_DISPLAYMATRIX && sd->size >= 9*4) {
-                abort();
                 writer_print_integers(w, "displaymatrix", sd->data, 9, " %11d", 3, 4, 1);
                 print_int("rotation", av_display_rotation_get((int32_t *)sd->data));
+            } else if (sd->type == AV_FRAME_DATA_GOP_TIMECODE && sd->size >= 8) {
+                char tcbuf[AV_TIMECODE_STR_SIZE];
+                av_timecode_make_mpeg_tc_string(tcbuf, *(int64_t *)(sd->data));
+                print_str("timecode", tcbuf);
             }
             writer_print_section_footer(w);
         }
@@ -2056,7 +2084,7 @@ static int read_interval_packets(WriterContext *w, AVFormatContext *fmt_ctx,
                 while (pkt1.size && process_frame(w, fmt_ctx, frame, &pkt1) > 0);
             }
         }
-        av_free_packet(&pkt);
+        av_packet_unref(&pkt);
     }
     av_init_packet(&pkt);
     pkt.data = NULL;
@@ -2136,10 +2164,16 @@ static int show_stream(WriterContext *w, AVFormatContext *fmt_ctx, int stream_id
             }
         }
 
-        if (dec && (profile = av_get_profile_name(dec, dec_ctx->profile)))
+        if (!do_bitexact && dec && (profile = av_get_profile_name(dec, dec_ctx->profile)))
             print_str("profile", profile);
-        else
-            print_str_opt("profile", "unknown");
+        else {
+            if (dec_ctx->profile != FF_PROFILE_UNKNOWN) {
+                char profile_num[12];
+                snprintf(profile_num, sizeof(profile_num), "%d", dec_ctx->profile);
+                print_str("profile", profile_num);
+            } else
+                print_str_opt("profile", "unknown");
+        }
 
         s = av_get_media_type_string(dec_ctx->codec_type);
         if (s) print_str    ("codec_type", s);
@@ -2197,6 +2231,7 @@ static int show_stream(WriterContext *w, AVFormatContext *fmt_ctx, int stream_id
             else
                 print_str_opt("chroma_location", av_chroma_location_name(dec_ctx->chroma_sample_location));
 
+#if FF_API_PRIVATE_OPT
             if (dec_ctx->timecode_frame_start >= 0) {
                 char tcbuf[AV_TIMECODE_STR_SIZE];
                 av_timecode_make_mpeg_tc_string(tcbuf, dec_ctx->timecode_frame_start);
@@ -2204,6 +2239,7 @@ static int show_stream(WriterContext *w, AVFormatContext *fmt_ctx, int stream_id
             } else {
                 print_str_opt("timecode", "N/A");
             }
+#endif
             print_int("refs", dec_ctx->refs);
             break;
 
@@ -2722,7 +2758,7 @@ static void ffprobe_show_pixel_formats(WriterContext *w)
             for (i = 0; i < pixdesc->nb_components; i++) {
                 writer_print_section_header(w, SECTION_ID_PIXEL_FORMAT_COMPONENT);
                 print_int("index", i + 1);
-                print_int("bit_depth", pixdesc->comp[i].depth_minus1 + 1);
+                print_int("bit_depth", pixdesc->comp[i].depth);
                 writer_print_section_footer(w);
             }
             writer_print_section_footer(w);
@@ -2831,6 +2867,9 @@ static int opt_show_format_entry(void *optctx, const char *opt, const char *arg)
     char *buf = av_asprintf("format=%s", arg);
     int ret;
 
+    if (!buf)
+        return AVERROR(ENOMEM);
+
     av_log(NULL, AV_LOG_WARNING,
            "Option '%s' is deprecated, use '-show_entries format=%s' instead\n",
            opt, arg);
@@ -3059,16 +3098,16 @@ static int opt_show_versions(const char *opt, const char *arg)
         return 0;                                                       \
     }
 
-DEFINE_OPT_SHOW_SECTION(chapters,         CHAPTERS);
-DEFINE_OPT_SHOW_SECTION(error,            ERROR);
-DEFINE_OPT_SHOW_SECTION(format,           FORMAT);
-DEFINE_OPT_SHOW_SECTION(frames,           FRAMES);
-DEFINE_OPT_SHOW_SECTION(library_versions, LIBRARY_VERSIONS);
-DEFINE_OPT_SHOW_SECTION(packets,          PACKETS);
-DEFINE_OPT_SHOW_SECTION(pixel_formats,    PIXEL_FORMATS);
-DEFINE_OPT_SHOW_SECTION(program_version,  PROGRAM_VERSION);
-DEFINE_OPT_SHOW_SECTION(streams,          STREAMS);
-DEFINE_OPT_SHOW_SECTION(programs,         PROGRAMS);
+DEFINE_OPT_SHOW_SECTION(chapters,         CHAPTERS)
+DEFINE_OPT_SHOW_SECTION(error,            ERROR)
+DEFINE_OPT_SHOW_SECTION(format,           FORMAT)
+DEFINE_OPT_SHOW_SECTION(frames,           FRAMES)
+DEFINE_OPT_SHOW_SECTION(library_versions, LIBRARY_VERSIONS)
+DEFINE_OPT_SHOW_SECTION(packets,          PACKETS)
+DEFINE_OPT_SHOW_SECTION(pixel_formats,    PIXEL_FORMATS)
+DEFINE_OPT_SHOW_SECTION(program_version,  PROGRAM_VERSION)
+DEFINE_OPT_SHOW_SECTION(streams,          STREAMS)
+DEFINE_OPT_SHOW_SECTION(programs,         PROGRAMS)
 
 static const OptionDef real_options[] = {
 #include "cmdutils_common_opts.h"
@@ -3175,6 +3214,7 @@ int main(int argc, char **argv)
     SET_DO_SHOW(FRAME_TAGS, frame_tags);
     SET_DO_SHOW(PROGRAM_TAGS, program_tags);
     SET_DO_SHOW(STREAM_TAGS, stream_tags);
+    SET_DO_SHOW(PACKET_TAGS, packet_tags);
 
     if (do_bitexact && (do_show_program_version || do_show_library_versions)) {
         av_log(NULL, AV_LOG_ERROR,
diff --git a/ffserver.c b/ffserver.c
index 2b19bba9..374af99c 100644
--- a/ffserver.c
+++ b/ffserver.c
@@ -31,7 +31,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "libavformat/avformat.h"
-// FIXME those are internal headers, ffserver _really_ shouldn't use them
+/* FIXME: those are internal headers, ffserver _really_ shouldn't use them */
 #include "libavformat/ffm.h"
 #include "libavformat/network.h"
 #include "libavformat/os_support.h"
@@ -71,6 +71,8 @@
 #include "cmdutils.h"
 #include "ffserver_config.h"
 
+#define PATH_LENGTH 1024
+
 const char program_name[] = "ffserver";
 const int program_birth_year = 2000;
 
@@ -209,6 +211,7 @@ static void close_connection(HTTPContext *c);
 
 /* HTTP handling */
 static int handle_connection(HTTPContext *c);
+static inline void print_stream_params(AVIOContext *pb, FFServerStream *stream);
 static void compute_status(HTTPContext *c);
 static int open_input_stream(HTTPContext *c, const char *info);
 static int http_parse_request(HTTPContext *c);
@@ -239,6 +242,11 @@ static HTTPContext *rtp_new_connection(struct sockaddr_in *from_addr,
 static int rtp_new_av_stream(HTTPContext *c,
                              int stream_index, struct sockaddr_in *dest_addr,
                              HTTPContext *rtsp_c);
+/* utils */
+static size_t htmlencode (const char *src, char **dest);
+static inline void cp_html_entity (char *buffer, const char *entity);
+static inline int check_codec_match(AVCodecContext *ccf, AVCodecContext *ccs,
+                                    int stream);
 
 static const char *my_program_name;
 
@@ -250,18 +258,86 @@ static unsigned int nb_connections;
 
 static uint64_t current_bandwidth;
 
-static int64_t cur_time;           // Making this global saves on passing it around everywhere
+/* Making this global saves on passing it around everywhere */
+static int64_t cur_time;
 
 static AVLFG random_state;
 
 static FILE *logfile = NULL;
 
-static void htmlstrip(char *s) {
-    while (s && *s) {
-        s += strspn(s, "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,. ");
-        if (*s)
-            *s++ = '?';
+static inline void cp_html_entity (char *buffer, const char *entity) {
+    if (!buffer || !entity)
+        return;
+    while (*entity)
+        *buffer++ = *entity++;
+}
+
+/**
+ * Substitutes known conflicting chars on a text string with
+ * their corresponding HTML entities.
+ *
+ * Returns the number of bytes in the 'encoded' representation
+ * not including the terminating NUL.
+ */
+static size_t htmlencode (const char *src, char **dest) {
+    const char *amp = "&amp;";
+    const char *lt  = "&lt;";
+    const char *gt  = "&gt;";
+    const char *start;
+    char *tmp;
+    size_t final_size = 0;
+
+    if (!src)
+        return 0;
+
+    start = src;
+
+    /* Compute needed dest size */
+    while (*src != '\0') {
+        switch(*src) {
+            case 38: /* & */
+                final_size += 5;
+                break;
+            case 60: /* < */
+            case 62: /* > */
+                final_size += 4;
+                break;
+            default:
+                final_size++;
+        }
+        src++;
+    }
+
+    src = start;
+    *dest = av_mallocz(final_size + 1);
+    if (!*dest)
+        return 0;
+
+    /* Build dest */
+    tmp = *dest;
+    while (*src != '\0') {
+        switch(*src) {
+            case 38: /* & */
+                cp_html_entity (tmp, amp);
+                tmp += 5;
+                break;
+            case 60: /* < */
+                cp_html_entity (tmp, lt);
+                tmp += 4;
+                break;
+            case 62: /* > */
+                cp_html_entity (tmp, gt);
+                tmp += 4;
+                break;
+            default:
+                *tmp = *src;
+                tmp += 1;
+        }
+        src++;
     }
+    *tmp = '\0';
+
+    return final_size;
 }
 
 static int64_t ffm_read_write_index(int fd)
@@ -283,29 +359,37 @@ static int ffm_write_write_index(int fd, int64_t pos)
     for(i=0;i<8;i++)
         buf[i] = (pos >> (56 - i * 8)) & 0xff;
     if (lseek(fd, 8, SEEK_SET) < 0)
-        return AVERROR(EIO);
+        goto bail_eio;
     if (write(fd, buf, 8) != 8)
-        return AVERROR(EIO);
+        goto bail_eio;
+
     return 8;
+
+bail_eio:
+    return AVERROR(EIO);
 }
 
 static void ffm_set_write_index(AVFormatContext *s, int64_t pos,
                                 int64_t file_size)
 {
-    FFMContext *ffm = s->priv_data;
-    ffm->write_index = pos;
-    ffm->file_size = file_size;
+    av_opt_set_int(s, "server_attached", 1, AV_OPT_SEARCH_CHILDREN);
+    av_opt_set_int(s, "ffm_write_index", pos, AV_OPT_SEARCH_CHILDREN);
+    av_opt_set_int(s, "ffm_file_size", file_size, AV_OPT_SEARCH_CHILDREN);
 }
 
-static char *ctime1(char *buf2, int buf_size)
+static char *ctime1(char *buf2, size_t buf_size)
 {
     time_t ti;
     char *p;
 
     ti = time(NULL);
     p = ctime(&ti);
+    if (!p || !*p) {
+        *buf2 = '\0';
+        return buf2;
+    }
     av_strlcpy(buf2, p, buf_size);
-    p = buf2 + strlen(p) - 1;
+    p = buf2 + strlen(buf2) - 1;
     if (*p == '\n')
         *p = '\0';
     return buf2;
@@ -314,12 +398,12 @@ static char *ctime1(char *buf2, int buf_size)
 static void http_vlog(const char *fmt, va_list vargs)
 {
     static int print_prefix = 1;
+    char buf[32];
 
     if (!logfile)
         return;
 
     if (print_prefix) {
-        char buf[32];
         ctime1(buf, sizeof(buf));
         fprintf(logfile, "%s ", buf);
     }
@@ -386,16 +470,33 @@ static int compute_datarate(DataRateData *drd, int64_t count)
 
 static void start_children(FFServerStream *feed)
 {
-    char pathname[1024];
+    char *pathname;
     char *slash;
     int i;
+    size_t cmd_length;
 
     if (no_launch)
         return;
 
+    cmd_length = strlen(my_program_name);
+
+   /**
+    * FIXME: WIP Safeguard. Remove after clearing all harcoded
+    * '1024' path lengths
+    */
+    if (cmd_length > PATH_LENGTH - 1) {
+        http_log("Could not start children. Command line: '%s' exceeds "
+                    "path length limit (%d)\n", my_program_name, PATH_LENGTH);
+        return;
+    }
+
+    pathname = av_strdup (my_program_name);
+    if (!pathname) {
+        http_log("Could not allocate memory for children cmd line\n");
+        return;
+    }
    /* replace "ffserver" with "ffmpeg" in the path of current
     * program. Ignore user provided path */
-    av_strlcpy(pathname, my_program_name, sizeof(pathname));
 
     slash = strrchr(pathname, '/');
     if (!slash)
@@ -413,8 +514,9 @@ static void start_children(FFServerStream *feed)
 
         feed->pid = fork();
         if (feed->pid < 0) {
-            http_log("Unable to create children\n");
-            exit(1);
+            http_log("Unable to create children: %s\n", strerror(errno));
+            av_free (pathname);
+            exit(EXIT_FAILURE);
         }
 
         if (feed->pid)
@@ -443,8 +545,10 @@ static void start_children(FFServerStream *feed)
 
         signal(SIGPIPE, SIG_DFL);
         execvp(pathname, feed->child_argv);
+        av_free (pathname);
         _exit(1);
     }
+    av_free (pathname);
 }
 
 /* open a listening socket */
@@ -468,20 +572,22 @@ static int socket_open_listen(struct sockaddr_in *my_addr)
         snprintf(bindmsg, sizeof(bindmsg), "bind(port %d)",
                  ntohs(my_addr->sin_port));
         perror (bindmsg);
-        closesocket(server_fd);
-        return -1;
+        goto fail;
     }
 
     if (listen (server_fd, 5) < 0) {
         perror ("listen");
-        closesocket(server_fd);
-        return -1;
+        goto fail;
     }
 
     if (ff_socket_nonblock(server_fd, 1) < 0)
         av_log(NULL, AV_LOG_WARNING, "ff_socket_nonblock failed\n");
 
     return server_fd;
+
+fail:
+    closesocket(server_fd);
+    return -1;
 }
 
 /* start all multicast streams */
@@ -504,8 +610,7 @@ static void start_multicast(void)
         random1 = av_lfg_get(&random_state);
 
         /* open the RTP connection */
-        snprintf(session_id, sizeof(session_id), "%08x%08x",
-                 random0, random1);
+        snprintf(session_id, sizeof(session_id), "%08x%08x", random0, random1);
 
         /* choose a port if none given */
         if (stream->multicast_port == 0) {
@@ -563,25 +668,21 @@ static int http_server(void)
 
     if (config.http_addr.sin_port) {
         server_fd = socket_open_listen(&config.http_addr);
-        if (server_fd < 0) {
-            av_free(poll_table);
-            return -1;
-        }
+        if (server_fd < 0)
+            goto quit;
     }
 
     if (config.rtsp_addr.sin_port) {
         rtsp_server_fd = socket_open_listen(&config.rtsp_addr);
         if (rtsp_server_fd < 0) {
-            av_free(poll_table);
             closesocket(server_fd);
-            return -1;
+            goto quit;
         }
     }
 
     if (!rtsp_server_fd && !server_fd) {
         http_log("HTTP and RTSP disabled.\n");
-        av_free(poll_table);
-        return -1;
+        goto quit;
     }
 
     http_log("FFserver started.\n");
@@ -630,9 +731,8 @@ static int http_server(void)
                     poll_entry++;
                 } else {
                     /* when ffserver is doing the timing, we work by
-                       looking at which packet needs to be sent every
-                       10 ms */
-                    /* one tick wait XXX: 10 ms assumed */
+                     * looking at which packet needs to be sent every
+                     * 10 ms (one tick wait XXX: 10 ms assumed) */
                     if (delay > 10)
                         delay = 10;
                 }
@@ -655,13 +755,12 @@ static int http_server(void)
         }
 
         /* wait for an event on one connection. We poll at least every
-           second to handle timeouts */
+         * second to handle timeouts */
         do {
             ret = poll(poll_table, poll_entry - poll_table, delay);
             if (ret < 0 && ff_neterrno() != AVERROR(EAGAIN) &&
                 ff_neterrno() != AVERROR(EINTR)) {
-                av_free(poll_table);
-                return -1;
+                goto quit;
             }
         } while (ret < 0);
 
@@ -695,6 +794,10 @@ static int http_server(void)
                 new_connection(rtsp_server_fd, 1);
         }
     }
+
+quit:
+    av_free(poll_table);
+    return -1;
 }
 
 /* start waiting for a new HTTP/RTSP request */
@@ -703,13 +806,9 @@ static void start_wait_request(HTTPContext *c, int is_rtsp)
     c->buffer_ptr = c->buffer;
     c->buffer_end = c->buffer + c->buffer_size - 1; /* leave room for '\0' */
 
-    if (is_rtsp) {
-        c->timeout = cur_time + RTSP_REQUEST_TIMEOUT;
-        c->state = RTSPSTATE_WAIT_REQUEST;
-    } else {
-        c->timeout = cur_time + HTTP_REQUEST_TIMEOUT;
-        c->state = HTTPSTATE_WAIT_REQUEST;
-    }
+    c->state = is_rtsp ? RTSPSTATE_WAIT_REQUEST : HTTPSTATE_WAIT_REQUEST;
+    c->timeout = cur_time +
+                 (is_rtsp ? RTSP_REQUEST_TIMEOUT : HTTP_REQUEST_TIMEOUT);
 }
 
 static void http_send_too_busy_reply(int fd)
@@ -719,9 +818,12 @@ static void http_send_too_busy_reply(int fd)
                        "HTTP/1.0 503 Server too busy\r\n"
                        "Content-type: text/html\r\n"
                        "\r\n"
+                       "<!DOCTYPE html>\n"
                        "<html><head><title>Too busy</title></head><body>\r\n"
-                       "<p>The server is too busy to serve your request at this time.</p>\r\n"
-                       "<p>The number of current connections is %u, and this exceeds the limit of %u.</p>\r\n"
+                       "<p>The server is too busy to serve your request at "
+                       "this time.</p>\r\n"
+                       "<p>The number of current connections is %u, and this "
+                       "exceeds the limit of %u.</p>\r\n"
                        "</body></html>\r\n",
                        nb_connections, config.nb_max_connections);
     av_assert0(len < sizeof(buffer));
@@ -787,7 +889,6 @@ static void close_connection(HTTPContext *c)
     HTTPContext **cp, *c1;
     int i, nb_streams;
     AVFormatContext *ctx;
-    URLContext *h;
     AVStream *st;
 
     /* remove connection from list */
@@ -832,9 +933,7 @@ static void close_connection(HTTPContext *c)
             av_freep(&ctx->streams[0]);
             av_freep(&ctx);
         }
-        h = c->rtp_handles[i];
-        if (h)
-            ffurl_close(h);
+        ffurl_close(c->rtp_handles[i]);
     }
 
     ctx = &c->fmt_ctx;
@@ -903,11 +1002,11 @@ static int handle_connection(HTTPContext *c)
         if ((ptr >= c->buffer + 2 && !memcmp(ptr-2, "\n\n", 2)) ||
             (ptr >= c->buffer + 4 && !memcmp(ptr-4, "\r\n\r\n", 4))) {
             /* request found : parse it and reply */
-            if (c->state == HTTPSTATE_WAIT_REQUEST) {
+            if (c->state == HTTPSTATE_WAIT_REQUEST)
                 ret = http_parse_request(c);
-            } else {
+            else
                 ret = rtsp_parse_request(c);
-            }
+
             if (ret < 0)
                 return -1;
         } else if (ptr >= c->buffer_end) {
@@ -952,8 +1051,8 @@ static int handle_connection(HTTPContext *c)
     case HTTPSTATE_SEND_DATA_HEADER:
     case HTTPSTATE_SEND_DATA_TRAILER:
         /* for packetized output, we consider we can always write (the
-           input streams set the speed). It may be better to verify
-           that we do not rely too much on the kernel queues */
+         * input streams set the speed). It may be better to verify
+         * that we do not rely too much on the kernel queues */
         if (!c->is_packetized) {
             if (c->poll_entry->revents & (POLLERR | POLLHUP))
                 return -1;
@@ -1166,8 +1265,10 @@ static int modify_current_stream(HTTPContext *c, char *rates)
                 break;
         }
 
-        if (c->switch_feed_streams[i] >= 0 && c->switch_feed_streams[i] != c->feed_streams[i])
+        if (c->switch_feed_streams[i] >= 0 &&
+            c->switch_feed_streams[i] != c->feed_streams[i]) {
             action_required = 1;
+        }
     }
 
     return action_required;
@@ -1271,17 +1372,17 @@ static int validate_acl(FFServerStream *stream, HTTPContext *c)
 
     if (stream->dynamic_acl[0]) {
         acl = parse_dynamic_acl(stream, c);
-
         ret = validate_acl_list(acl, c);
-
         free_acl_list(acl);
     }
 
     return ret;
 }
 
-/* compute the real filename of a file by matching it without its
-   extensions to all the stream's filenames */
+/**
+ * compute the real filename of a file by matching it without its
+ * extensions to all the stream's filenames
+ */
 static void compute_real_filename(char *filename, int max_size)
 {
     char file1[1024];
@@ -1289,7 +1390,6 @@ static void compute_real_filename(char *filename, int max_size)
     char *p;
     FFServerStream *stream;
 
-    /* compute filename by matching without the file extensions */
     av_strlcpy(file1, filename, sizeof(file1));
     p = strrchr(file1, '.');
     if (p)
@@ -1326,6 +1426,7 @@ static int http_parse_request(HTTPContext *c)
     char url[1024], *q;
     char protocol[32];
     char msg[1024];
+    char *encoded_msg = NULL;
     const char *mime_type;
     FFServerStream *stream;
     int i;
@@ -1399,7 +1500,7 @@ static int http_parse_request(HTTPContext *c)
         compute_real_filename(filename, sizeof(filename) - 1);
     }
 
-    // "redirect" / request to index.html
+    /* "redirect" request to index.html */
     if (!strlen(filename))
         av_strlcpy(filename, "index.html", sizeof(filename) - 1);
 
@@ -1427,6 +1528,7 @@ static int http_parse_request(HTTPContext *c)
                       "Location: %s\r\n"
                       "Content-type: text/html\r\n"
                       "\r\n"
+                      "<!DOCTYPE html>\n"
                       "<html><head><title>Moved</title></head><body>\r\n"
                       "You should be <a href=\"%s\">redirected</a>.\r\n"
                       "</body></html>\r\n",
@@ -1452,7 +1554,7 @@ static int http_parse_request(HTTPContext *c)
     if (c->post == 0 && stream->stream_type == STREAM_TYPE_LIVE)
         current_bandwidth += stream->bandwidth;
 
-    /* If already streaming this feed, do not let start another feeder. */
+    /* If already streaming this feed, do not let another feeder start */
     if (stream->feed_opened) {
         snprintf(msg, sizeof(msg), "This feed is already being received.");
         http_log("Feed '%s' already being received\n", stream->feed_filename);
@@ -1466,10 +1568,13 @@ static int http_parse_request(HTTPContext *c)
                       "HTTP/1.0 503 Server too busy\r\n"
                       "Content-type: text/html\r\n"
                       "\r\n"
+                      "<!DOCTYPE html>\n"
                       "<html><head><title>Too busy</title></head><body>\r\n"
-                      "<p>The server is too busy to serve your request at this time.</p>\r\n"
-                      "<p>The bandwidth being served (including your stream) is %"PRIu64"kbit/sec, "
-                      "and this exceeds the limit of %"PRIu64"kbit/sec.</p>\r\n"
+                      "<p>The server is too busy to serve your request at "
+                      "this time.</p>\r\n"
+                      "<p>The bandwidth being served (including your stream) "
+                      "is %"PRIu64"kbit/s, and this exceeds the limit of "
+                      "%"PRIu64"kbit/s.</p>\r\n"
                       "</body></html>\r\n",
                  current_bandwidth, config.max_bandwidth);
         q += strlen(q);
@@ -1721,25 +1826,33 @@ static int http_parse_request(HTTPContext *c)
  send_error:
     c->http_error = 404;
     q = c->buffer;
-    htmlstrip(msg);
+    if (!htmlencode(msg, &encoded_msg)) {
+        http_log("Could not encode filename '%s' as HTML\n", msg);
+    }
     snprintf(q, c->buffer_size,
                   "HTTP/1.0 404 Not Found\r\n"
                   "Content-type: text/html\r\n"
                   "\r\n"
+                  "<!DOCTYPE html>\n"
                   "<html>\n"
-                  "<head><title>404 Not Found</title></head>\n"
+                  "<head>\n"
+                  "<meta charset=\"UTF-8\">\n"
+                  "<title>404 Not Found</title>\n"
+                  "</head>\n"
                   "<body>%s</body>\n"
-                  "</html>\n", msg);
+                  "</html>\n", encoded_msg? encoded_msg : "File not found");
     q += strlen(q);
     /* prepare output buffer */
     c->buffer_ptr = c->buffer;
     c->buffer_end = q;
     c->state = HTTPSTATE_SEND_HEADER;
+    av_freep(&encoded_msg);
     return 0;
  send_status:
     compute_status(c);
-    c->http_error = 200; /* horrible : we use this value to avoid
-                            going to the send data state */
+    /* horrible: we use this value to avoid
+     * going to the send data state */
+    c->http_error = 200;
     c->state = HTTPSTATE_SEND_HEADER;
     return 0;
 }
@@ -1754,6 +1867,52 @@ static void fmt_bytecount(AVIOContext *pb, int64_t count)
     avio_printf(pb, "%"PRId64"%c", count, *s);
 }
 
+static inline void print_stream_params(AVIOContext *pb, FFServerStream *stream)
+{
+    int i, stream_no;
+    const char *type = "unknown";
+    char parameters[64];
+    AVStream *st;
+    AVCodec *codec;
+
+    stream_no = stream->nb_streams;
+
+    avio_printf(pb, "<table cellspacing=0 cellpadding=4><tr><th>Stream<th>"
+                    "type<th>kbit/s<th align=left>codec<th align=left>"
+                    "Parameters\n");
+
+    for (i = 0; i < stream_no; i++) {
+        st = stream->streams[i];
+        codec = avcodec_find_encoder(st->codec->codec_id);
+
+        parameters[0] = 0;
+
+        switch(st->codec->codec_type) {
+        case AVMEDIA_TYPE_AUDIO:
+            type = "audio";
+            snprintf(parameters, sizeof(parameters), "%d channel(s), %d Hz",
+                     st->codec->channels, st->codec->sample_rate);
+            break;
+        case AVMEDIA_TYPE_VIDEO:
+            type = "video";
+            snprintf(parameters, sizeof(parameters),
+                     "%dx%d, q=%d-%d, fps=%d", st->codec->width,
+                     st->codec->height, st->codec->qmin, st->codec->qmax,
+                     st->codec->time_base.den / st->codec->time_base.num);
+            break;
+        default:
+            abort();
+        }
+
+        avio_printf(pb, "<tr><td align=right>%d<td>%s<td align=right>%"PRId64
+                        "<td>%s<td>%s\n",
+                    i, type, (int64_t)st->codec->bit_rate/1000,
+                    codec ? codec->name : "", parameters);
+     }
+
+     avio_printf(pb, "</table>\n");
+}
+
 static void compute_status(HTTPContext *c)
 {
     HTTPContext *c1;
@@ -1775,6 +1934,7 @@ static void compute_status(HTTPContext *c)
     avio_printf(pb, "Pragma: no-cache\r\n");
     avio_printf(pb, "\r\n");
 
+    avio_printf(pb, "<!DOCTYPE html>\n");
     avio_printf(pb, "<html><head><title>%s Status</title>\n", program_name);
     if (c->stream->feed_filename[0])
         avio_printf(pb, "<link rel=\"shortcut icon\" href=\"%s\">\n",
@@ -1784,7 +1944,7 @@ static void compute_status(HTTPContext *c)
     /* format status */
     avio_printf(pb, "<h2>Available Streams</h2>\n");
     avio_printf(pb, "<table cellspacing=0 cellpadding=4>\n");
-    avio_printf(pb, "<tr><th valign=top>Path<th align=left>Served<br>Conns<th><br>bytes<th valign=top>Format<th>Bit rate<br>kbits/s<th align=left>Video<br>kbits/s<th><br>Codec<th align=left>Audio<br>kbits/s<th><br>Codec<th align=left valign=top>Feed\n");
+    avio_printf(pb, "<tr><th valign=top>Path<th align=left>Served<br>Conns<th><br>bytes<th valign=top>Format<th>Bit rate<br>kbit/s<th align=left>Video<br>kbit/s<th><br>Codec<th align=left>Audio<br>kbit/s<th><br>Codec<th align=left valign=top>Feed\n");
     stream = config.first_stream;
     while (stream) {
         char sfilename[1024];
@@ -1804,8 +1964,8 @@ static void compute_status(HTTPContext *c)
                 strcpy(eosf - 3, ".ram");
             else if (stream->fmt && !strcmp(stream->fmt->name, "rtp")) {
                 /* generate a sample RTSP director if
-                   unicast. Generate an SDP redirector if
-                   multicast */
+                 * unicast. Generate an SDP redirector if
+                 * multicast */
                 eosf = strrchr(sfilename, '.');
                 if (!eosf)
                     eosf = sfilename + strlen(sfilename);
@@ -1894,7 +2054,7 @@ static void compute_status(HTTPContext *c)
 
         avio_printf(pb, "<h2>Feed %s</h2>", stream->filename);
         if (stream->pid) {
-            avio_printf(pb, "Running as pid %d.\n", stream->pid);
+            avio_printf(pb, "Running as pid %"PRId64".\n", (int64_t) stream->pid);
 
 #if defined(linux)
             {
@@ -1903,8 +2063,8 @@ static void compute_status(HTTPContext *c)
 
                 /* This is somewhat linux specific I guess */
                 snprintf(ps_cmd, sizeof(ps_cmd),
-                         "ps -o \"%%cpu,cputime\" --no-headers %d",
-                         stream->pid);
+                         "ps -o \"%%cpu,cputime\" --no-headers %"PRId64"",
+                         (int64_t) stream->pid);
 
                  pid_stat = popen(ps_cmd, "r");
                  if (pid_stat) {
@@ -1924,42 +2084,7 @@ static void compute_status(HTTPContext *c)
             avio_printf(pb, "<p>");
         }
 
-        avio_printf(pb, "<table cellspacing=0 cellpadding=4><tr><th>Stream<th>"
-                        "type<th>kbits/s<th align=left>codec<th align=left>"
-                        "Parameters\n");
-
-        for (i = 0; i < stream->nb_streams; i++) {
-            AVStream *st = stream->streams[i];
-            AVCodec *codec = avcodec_find_encoder(st->codec->codec_id);
-            const char *type = "unknown";
-            char parameters[64];
-
-            parameters[0] = 0;
-
-            switch(st->codec->codec_type) {
-            case AVMEDIA_TYPE_AUDIO:
-                type = "audio";
-                snprintf(parameters, sizeof(parameters), "%d channel(s), %d Hz",
-                         st->codec->channels, st->codec->sample_rate);
-                break;
-            case AVMEDIA_TYPE_VIDEO:
-                type = "video";
-                snprintf(parameters, sizeof(parameters),
-                         "%dx%d, q=%d-%d, fps=%d", st->codec->width,
-                         st->codec->height, st->codec->qmin, st->codec->qmax,
-                         st->codec->time_base.den / st->codec->time_base.num);
-                break;
-            default:
-                abort();
-            }
-
-            avio_printf(pb, "<tr><td align=right>%d<td>%s<td align=right>%d"
-                            "<td>%s<td>%s\n",
-                        i, type, st->codec->bit_rate/1000,
-                        codec ? codec->name : "", parameters);
-        }
-
-        avio_printf(pb, "</table>\n");
+        print_stream_params(pb, stream);
         stream = stream->next;
     }
 
@@ -1974,7 +2099,7 @@ static void compute_status(HTTPContext *c)
 
     avio_printf(pb, "<table>\n");
     avio_printf(pb, "<tr><th>#<th>File<th>IP<th>Proto<th>State<th>Target "
-                    "bits/sec<th>Actual bits/sec<th>Bytes transferred\n");
+                    "bit/s<th>Actual bit/s<th>Bytes transferred\n");
     c1 = first_http_ctx;
     i = 0;
     while (c1) {
@@ -2111,8 +2236,7 @@ static int64_t get_server_clock(HTTPContext *c)
     return (cur_time - c->start_time) * 1000;
 }
 
-/* return the estimated time at which the current packet must be sent
-   (in us) */
+/* return the estimated time (in us) at which the current packet must be sent */
 static int64_t get_packet_send_clock(HTTPContext *c)
 {
     int bytes_left, bytes_sent, frame_bytes;
@@ -2120,11 +2244,10 @@ static int64_t get_packet_send_clock(HTTPContext *c)
     frame_bytes = c->cur_frame_bytes;
     if (frame_bytes <= 0)
         return c->cur_pts;
-    else {
-        bytes_left = c->buffer_end - c->buffer_ptr;
-        bytes_sent = frame_bytes - bytes_left;
-        return c->cur_pts + (c->cur_frame_duration * bytes_sent) / frame_bytes;
-    }
+
+    bytes_left = c->buffer_end - c->buffer_ptr;
+    bytes_sent = frame_bytes - bytes_left;
+    return c->cur_pts + (c->cur_frame_duration * bytes_sent) / frame_bytes;
 }
 
 
@@ -2151,7 +2274,8 @@ static int http_prepare_data(HTTPContext *c)
             AVStream *src;
             c->fmt_ctx.streams[i] = av_mallocz(sizeof(AVStream));
 
-            /* if file or feed, then just take streams from FFServerStream struct */
+            /* if file or feed, then just take streams from FFServerStream
+             * struct */
             if (!c->stream->feed ||
                 c->stream->feed == c->stream)
                 src = c->stream->streams[i];
@@ -2216,28 +2340,28 @@ static int http_prepare_data(HTTPContext *c)
             if (ret < 0) {
                 if (c->stream->feed) {
                     /* if coming from feed, it means we reached the end of the
-                       ffm file, so must wait for more data */
+                     * ffm file, so must wait for more data */
                     c->state = HTTPSTATE_WAIT_FEED;
                     return 1; /* state changed */
-                } else if (ret == AVERROR(EAGAIN)) {
+                }
+                if (ret == AVERROR(EAGAIN)) {
                     /* input not ready, come back later */
                     return 0;
+                }
+                if (c->stream->loop) {
+                    avformat_close_input(&c->fmt_in);
+                    if (open_input_stream(c, "") < 0)
+                        goto no_loop;
+                    goto redo;
                 } else {
-                    if (c->stream->loop) {
-                        avformat_close_input(&c->fmt_in);
-                        if (open_input_stream(c, "") < 0)
-                            goto no_loop;
-                        goto redo;
-                    } else {
                     no_loop:
                         /* must send trailer now because EOF or error */
                         c->state = HTTPSTATE_SEND_DATA_TRAILER;
-                    }
                 }
             } else {
                 int source_index = pkt.stream_index;
                 /* update first pts if needed */
-                if (c->first_pts == AV_NOPTS_VALUE) {
+                if (c->first_pts == AV_NOPTS_VALUE && pkt.dts != AV_NOPTS_VALUE) {
                     c->first_pts = av_rescale_q(pkt.dts, c->fmt_in->streams[pkt.stream_index]->time_base, AV_TIME_BASE_Q);
                     c->start_time = cur_time;
                 }
@@ -2276,14 +2400,16 @@ static int http_prepare_data(HTTPContext *c)
                      * XXX: need more abstract handling */
                     if (c->is_packetized) {
                         /* compute send time and duration */
-                        c->cur_pts = av_rescale_q(pkt.dts, ist->time_base, AV_TIME_BASE_Q);
-                        c->cur_pts -= c->first_pts;
+                        if (pkt.dts != AV_NOPTS_VALUE) {
+                            c->cur_pts = av_rescale_q(pkt.dts, ist->time_base, AV_TIME_BASE_Q);
+                            c->cur_pts -= c->first_pts;
+                        }
                         c->cur_frame_duration = av_rescale_q(pkt.duration, ist->time_base, AV_TIME_BASE_Q);
                         /* find RTP context */
                         c->packet_stream_index = pkt.stream_index;
                         ctx = c->rtp_ctx[c->packet_stream_index];
                         if(!ctx) {
-                            av_free_packet(&pkt);
+                            av_packet_unref(&pkt);
                             break;
                         }
                         codec = ctx->streams[0]->codec;
@@ -2303,9 +2429,9 @@ static int http_prepare_data(HTTPContext *c)
                             max_packet_size = c->rtp_handles[c->packet_stream_index]->max_packet_size;
                         ret = ffio_open_dyn_packet_buf(&ctx->pb,
                                                        max_packet_size);
-                    } else {
+                    } else
                         ret = avio_open_dyn_buf(&ctx->pb);
-                    }
+
                     if (ret < 0) {
                         /* XXX: potential leak */
                         return -1;
@@ -2329,17 +2455,18 @@ static int http_prepare_data(HTTPContext *c)
 
                     av_freep(&c->pb_buffer);
                     len = avio_close_dyn_buf(ctx->pb, &c->pb_buffer);
+                    ctx->pb = NULL;
                     c->cur_frame_bytes = len;
                     c->buffer_ptr = c->pb_buffer;
                     c->buffer_end = c->pb_buffer + len;
 
                     codec->frame_number++;
                     if (len == 0) {
-                        av_free_packet(&pkt);
+                        av_packet_unref(&pkt);
                         goto redo;
                     }
                 }
-                av_free_packet(&pkt);
+                av_packet_unref(&pkt);
             }
         }
         break;
@@ -2368,7 +2495,8 @@ static int http_prepare_data(HTTPContext *c)
 
 /* should convert the format at the same time */
 /* send data starting at c->buffer_ptr to the output connection
- * (either UDP or TCP) */
+ * (either UDP or TCP)
+ */
 static int http_send_data(HTTPContext *c)
 {
     int len, ret;
@@ -2449,8 +2577,8 @@ static int http_send_data(HTTPContext *c)
                         rtsp_c->packet_buffer_ptr += len;
                     if (rtsp_c->packet_buffer_ptr < rtsp_c->packet_buffer_end) {
                         /* if we could not send all the data, we will
-                           send it later, so a new state is needed to
-                           "lock" the RTSP TCP connection */
+                         * send it later, so a new state is needed to
+                         * "lock" the RTSP TCP connection */
                         rtsp_c->state = RTSPSTATE_SEND_PACKET;
                         break;
                     } else
@@ -2534,9 +2662,8 @@ static int http_start_receive_data(HTTPContext *c)
             http_log("Error reading write index from feed file '%s': %s\n",
                      c->stream->feed_filename, strerror(errno));
             return ret;
-        } else {
-            c->stream->feed_write_index = ret;
         }
+        c->stream->feed_write_index = ret;
     }
 
     c->stream->feed_write_index = FFMAX(ffm_read_write_index(fd),
@@ -2578,12 +2705,11 @@ static int http_receive_data(HTTPContext *c)
                 goto fail;
             c->buffer_ptr = c->buffer;
             break;
-        } else if (++loop_run > 10) {
+        } else if (++loop_run > 10)
             /* no chunk header, abort */
             goto fail;
-        } else {
+        else
             c->buffer_ptr++;
-        }
     }
 
     if (c->buffer_end > c->buffer_ptr) {
@@ -2616,7 +2742,7 @@ static int http_receive_data(HTTPContext *c)
     if (c->buffer_ptr >= c->buffer_end) {
         FFServerStream *feed = c->stream;
         /* a packet has been received : write it in the store, except
-           if header */
+         * if header */
         if (c->data_count > FFM_PACKET_SIZE) {
             /* XXX: use llseek or url_seek
              * XXX: Should probably fail? */
@@ -2797,7 +2923,7 @@ static int rtsp_parse_request(HTTPContext *c)
             len = sizeof(line) - 1;
         memcpy(line, p, len);
         line[len] = '\0';
-        ff_rtsp_parse_line(header, line, NULL, NULL);
+        ff_rtsp_parse_line(NULL, header, line, NULL, NULL);
         p = p1 + 1;
     }
 
@@ -2822,10 +2948,10 @@ static int rtsp_parse_request(HTTPContext *c)
  the_end:
     len = avio_close_dyn_buf(c->pb, &c->pb_buffer);
     c->pb = NULL; /* safety */
-    if (len < 0) {
+    if (len < 0)
         /* XXX: cannot do more */
         return -1;
-    }
+
     c->buffer_ptr = c->pb_buffer;
     c->buffer_end = c->pb_buffer + len;
     c->state = RTSPSTATE_SEND_REPLY;
@@ -2844,9 +2970,9 @@ static int prepare_sdp_description(FFServerStream *stream, uint8_t **pbuffer,
     *pbuffer = NULL;
 
     avc =  avformat_alloc_context();
-    if (!avc || !rtp_format) {
+    if (!avc || !rtp_format)
         return -1;
-    }
+
     avc->oformat = rtp_format;
     av_dict_set(&avc->metadata, "title",
                 entry ? entry->value : "No Title", 0);
@@ -2855,9 +2981,8 @@ static int prepare_sdp_description(FFServerStream *stream, uint8_t **pbuffer,
         snprintf(avc->filename, 1024, "rtp://%s:%d?multicast=1?ttl=%d",
                  inet_ntoa(stream->multicast_ip),
                  stream->multicast_port, stream->multicast_ttl);
-    } else {
+    } else
         snprintf(avc->filename, 1024, "rtp://0.0.0.0");
-    }
 
     avc->streams = av_malloc_array(avc->nb_streams, sizeof(*avc->streams));
     if (!avc->streams)
@@ -2887,7 +3012,7 @@ static int prepare_sdp_description(FFServerStream *stream, uint8_t **pbuffer,
 
 static void rtsp_cmd_options(HTTPContext *c, const char *url)
 {
-//    rtsp_reply_header(c, RTSP_STATUS_OK);
+    /* rtsp_reply_header(c, RTSP_STATUS_OK); */
     avio_printf(c->pb, "RTSP/1.0 %d %s\r\n", RTSP_STATUS_OK, "OK");
     avio_printf(c->pb, "CSeq: %d\r\n", c->seq);
     avio_printf(c->pb, "Public: %s\r\n",
@@ -3054,7 +3179,7 @@ static void rtsp_cmd_setup(HTTPContext *c, const char *url,
     }
 
     /* test if stream is OK (test needed because several SETUP needs
-       to be done for a given file) */
+     * to be done for a given file) */
     if (rtp_c->stream != stream) {
         rtsp_reply_error(c, RTSP_STATUS_SERVICE);
         return;
@@ -3115,8 +3240,10 @@ static void rtsp_cmd_setup(HTTPContext *c, const char *url,
 }
 
 
-/* find an RTP connection by using the session ID. Check consistency
-   with filename */
+/**
+ * find an RTP connection by using the session ID. Check consistency
+ * with filename
+ */
 static HTTPContext *find_rtp_session_with_url(const char *url,
                                               const char *session_id)
 {
@@ -3139,10 +3266,10 @@ static HTTPContext *find_rtp_session_with_url(const char *url,
     for(s=0; s<rtp_c->stream->nb_streams; ++s) {
       snprintf(buf, sizeof(buf), "%s/streamid=%d",
         rtp_c->stream->filename, s);
-      if(!strncmp(path, buf, sizeof(buf))) {
-    // XXX: Should we reply with RTSP_STATUS_ONLY_AGGREGATE if nb_streams>1?
+      if(!strncmp(path, buf, sizeof(buf)))
+        /* XXX: Should we reply with RTSP_STATUS_ONLY_AGGREGATE
+         * if nb_streams>1? */
         return rtp_c;
-      }
     }
     len = strlen(path);
     if (len > 0 && path[len - 1] == '/' &&
@@ -3220,7 +3347,7 @@ static HTTPContext *rtp_new_connection(struct sockaddr_in *from_addr,
     const char *proto_str;
 
     /* XXX: should output a warning page when coming
-       close to the connection limit */
+     * close to the connection limit */
     if (nb_connections >= config.nb_max_connections)
         goto fail;
 
@@ -3275,9 +3402,11 @@ static HTTPContext *rtp_new_connection(struct sockaddr_in *from_addr,
     return NULL;
 }
 
-/* add a new RTP stream in an RTP connection (used in RTSP SETUP
-   command). If RTP/TCP protocol is used, TCP connection 'rtsp_c' is
-   used. */
+/**
+ * add a new RTP stream in an RTP connection (used in RTSP SETUP
+ * command). If RTP/TCP protocol is used, TCP connection 'rtsp_c' is
+ * used.
+ */
 static int rtp_new_av_stream(HTTPContext *c,
                              int stream_index, struct sockaddr_in *dest_addr,
                              HTTPContext *rtsp_c)
@@ -3288,6 +3417,7 @@ static int rtp_new_av_stream(HTTPContext *c,
     URLContext *h = NULL;
     uint8_t *dummy_buf;
     int max_packet_size;
+    void *st_internal;
 
     /* now we can open the relevant output stream */
     ctx = avformat_alloc_context();
@@ -3295,14 +3425,13 @@ static int rtp_new_av_stream(HTTPContext *c,
         return -1;
     ctx->oformat = av_guess_format("rtp", NULL, NULL);
 
-    st = av_mallocz(sizeof(AVStream));
+    st = avformat_new_stream(ctx, NULL);
     if (!st)
         goto fail;
-    ctx->nb_streams = 1;
-    ctx->streams = av_mallocz_array(ctx->nb_streams, sizeof(AVStream *));
-    if (!ctx->streams)
-      goto fail;
-    ctx->streams[0] = st;
+
+    av_freep(&st->codec);
+    av_freep(&st->info);
+    st_internal = st->internal;
 
     if (!c->stream->feed ||
         c->stream->feed == c->stream)
@@ -3312,6 +3441,7 @@ static int rtp_new_av_stream(HTTPContext *c,
                c->stream->feed->streams[c->stream->feed_streams[stream_index]],
                sizeof(AVStream));
     st->priv_data = NULL;
+    st->internal = st_internal;
 
     /* build destination RTP address */
     ipaddr = inet_ntoa(dest_addr->sin_addr);
@@ -3355,10 +3485,10 @@ static int rtp_new_av_stream(HTTPContext *c,
 
     /* normally, no packets should be output here, but the packet size may
      * be checked */
-    if (ffio_open_dyn_packet_buf(&ctx->pb, max_packet_size) < 0) {
+    if (ffio_open_dyn_packet_buf(&ctx->pb, max_packet_size) < 0)
         /* XXX: close stream */
         goto fail;
-    }
+
     if (avformat_write_header(ctx, NULL) < 0) {
     fail:
         if (h)
@@ -3368,6 +3498,7 @@ static int rtp_new_av_stream(HTTPContext *c,
         return -1;
     }
     avio_close_dyn_buf(ctx->pb, &dummy_buf);
+    ctx->pb = NULL;
     av_free(dummy_buf);
 
     c->rtp_ctx[stream_index] = ctx;
@@ -3377,6 +3508,7 @@ static int rtp_new_av_stream(HTTPContext *c,
 /********************************************************************/
 /* ffserver initialization */
 
+/* FIXME: This code should use avformat_new_stream() */
 static AVStream *add_av_stream1(FFServerStream *stream,
                                 AVCodecContext *codec, int copy)
 {
@@ -3395,13 +3527,14 @@ static AVStream *add_av_stream1(FFServerStream *stream,
             return NULL;
         }
         avcodec_copy_context(fst->codec, codec);
-    } else {
+    } else
         /* live streams must use the actual feed's codec since it may be
          * updated later to carry extradata needed by them.
          */
         fst->codec = codec;
-    }
+
     fst->priv_data = av_mallocz(sizeof(FeedData));
+    fst->internal = av_mallocz(sizeof(*fst->internal));
     fst->index = stream->nb_streams;
     avpriv_set_pts_info(fst, 33, 1, 90000);
     fst->sample_aspect_ratio = codec->sample_aspect_ratio;
@@ -3502,7 +3635,7 @@ static void extract_mpeg4_header(AVFormatContext *infile)
                 if (p[0] == 0x00 && p[1] == 0x00 &&
                     p[2] == 0x01 && p[3] == 0xb6) {
                     size = p - pkt.data;
-                    st->codec->extradata = av_mallocz(size + FF_INPUT_BUFFER_PADDING_SIZE);
+                    st->codec->extradata = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
                     st->codec->extradata_size = size;
                     memcpy(st->codec->extradata, pkt.data, size);
                     break;
@@ -3511,199 +3644,222 @@ static void extract_mpeg4_header(AVFormatContext *infile)
             }
             mpeg4_count--;
         }
-        av_free_packet(&pkt);
+        av_packet_unref(&pkt);
     }
 }
 
 /* compute the needed AVStream for each file */
 static void build_file_streams(void)
 {
-    FFServerStream *stream, *stream_next;
+    FFServerStream *stream;
+    AVFormatContext *infile;
     int i, ret;
 
     /* gather all streams */
-    for(stream = config.first_stream; stream; stream = stream_next) {
-        AVFormatContext *infile = NULL;
-        stream_next = stream->next;
-        if (stream->stream_type == STREAM_TYPE_LIVE &&
-            !stream->feed) {
-            /* the stream comes from a file */
-            /* try to open the file */
-            /* open stream */
-            if (stream->fmt && !strcmp(stream->fmt->name, "rtp")) {
-                /* specific case : if transport stream output to RTP,
-                   we use a raw transport stream reader */
-                av_dict_set(&stream->in_opts, "mpeg2ts_compute_pcr", "1", 0);
-            }
+    for(stream = config.first_stream; stream; stream = stream->next) {
+        infile = NULL;
 
-            if (!stream->feed_filename[0]) {
-                http_log("Unspecified feed file for stream '%s'\n",
-                         stream->filename);
-                goto fail;
-            }
+        if (stream->stream_type != STREAM_TYPE_LIVE || stream->feed)
+            continue;
 
-            http_log("Opening feed file '%s' for stream '%s'\n",
-                     stream->feed_filename, stream->filename);
-            ret = avformat_open_input(&infile, stream->feed_filename,
-                                      stream->ifmt, &stream->in_opts);
-            if (ret < 0) {
-                http_log("Could not open '%s': %s\n", stream->feed_filename,
-                         av_err2str(ret));
-                /* remove stream (no need to spend more time on it) */
-            fail:
-                remove_stream(stream);
-            } else {
-                /* find all the AVStreams inside and reference them in
-                   'stream' */
-                if (avformat_find_stream_info(infile, NULL) < 0) {
-                    http_log("Could not find codec parameters from '%s'\n",
-                             stream->feed_filename);
-                    avformat_close_input(&infile);
-                    goto fail;
-                }
-                extract_mpeg4_header(infile);
+        /* the stream comes from a file */
+        /* try to open the file */
+        /* open stream */
 
-                for(i=0;i<infile->nb_streams;i++)
-                    add_av_stream1(stream, infile->streams[i]->codec, 1);
 
+        /* specific case: if transport stream output to RTP,
+         * we use a raw transport stream reader */
+        if (stream->fmt && !strcmp(stream->fmt->name, "rtp"))
+            av_dict_set(&stream->in_opts, "mpeg2ts_compute_pcr", "1", 0);
+
+        if (!stream->feed_filename[0]) {
+            http_log("Unspecified feed file for stream '%s'\n",
+                     stream->filename);
+            goto fail;
+        }
+
+        http_log("Opening feed file '%s' for stream '%s'\n",
+                 stream->feed_filename, stream->filename);
+
+        ret = avformat_open_input(&infile, stream->feed_filename,
+                                  stream->ifmt, &stream->in_opts);
+        if (ret < 0) {
+            http_log("Could not open '%s': %s\n", stream->feed_filename,
+                     av_err2str(ret));
+            /* remove stream (no need to spend more time on it) */
+        fail:
+            remove_stream(stream);
+        } else {
+            /* find all the AVStreams inside and reference them in
+             * 'stream' */
+            if (avformat_find_stream_info(infile, NULL) < 0) {
+                http_log("Could not find codec parameters from '%s'\n",
+                         stream->feed_filename);
                 avformat_close_input(&infile);
+                goto fail;
             }
+            extract_mpeg4_header(infile);
+
+            for(i=0;i<infile->nb_streams;i++)
+                add_av_stream1(stream, infile->streams[i]->codec, 1);
+
+            avformat_close_input(&infile);
         }
     }
 }
 
+static inline
+int check_codec_match(AVCodecContext *ccf, AVCodecContext *ccs, int stream)
+{
+    int matches = 1;
+
+#define CHECK_CODEC(x)  (ccf->x != ccs->x)
+    if (CHECK_CODEC(codec_id) || CHECK_CODEC(codec_type)) {
+        http_log("Codecs do not match for stream %d\n", stream);
+        matches = 0;
+    } else if (CHECK_CODEC(bit_rate) || CHECK_CODEC(flags)) {
+        http_log("Codec bitrates do not match for stream %d\n", stream);
+        matches = 0;
+    } else if (ccf->codec_type == AVMEDIA_TYPE_VIDEO) {
+        if (CHECK_CODEC(time_base.den) ||
+            CHECK_CODEC(time_base.num) ||
+            CHECK_CODEC(width) ||
+            CHECK_CODEC(height)) {
+            http_log("Codec width, height or framerate do not match for stream %d\n", stream);
+            matches = 0;
+        }
+    } else if (ccf->codec_type == AVMEDIA_TYPE_AUDIO) {
+        if (CHECK_CODEC(sample_rate) ||
+            CHECK_CODEC(channels) ||
+            CHECK_CODEC(frame_size)) {
+            http_log("Codec sample_rate, channels, frame_size do not match for stream %d\n", stream);
+            matches = 0;
+        }
+    } else {
+        http_log("Unknown codec type for stream %d\n", stream);
+        matches = 0;
+    }
+
+    return matches;
+}
+
 /* compute the needed AVStream for each feed */
-static void build_feed_streams(void)
+static int build_feed_streams(void)
 {
     FFServerStream *stream, *feed;
-    int i;
+    int i, fd;
 
     /* gather all streams */
     for(stream = config.first_stream; stream; stream = stream->next) {
         feed = stream->feed;
-        if (feed) {
-            if (stream->is_feed) {
-                for(i=0;i<stream->nb_streams;i++)
-                    stream->feed_streams[i] = i;
-            } else {
-                /* we handle a stream coming from a feed */
-                for(i=0;i<stream->nb_streams;i++)
-                    stream->feed_streams[i] = add_av_stream(feed,
-                                                            stream->streams[i]);
-            }
+        if (!feed)
+            continue;
+
+        if (stream->is_feed) {
+            for(i=0;i<stream->nb_streams;i++)
+                stream->feed_streams[i] = i;
+            continue;
         }
+        /* we handle a stream coming from a feed */
+        for(i=0;i<stream->nb_streams;i++)
+            stream->feed_streams[i] = add_av_stream(feed, stream->streams[i]);
     }
 
     /* create feed files if needed */
     for(feed = config.first_feed; feed; feed = feed->next_feed) {
-        int fd;
 
         if (avio_check(feed->feed_filename, AVIO_FLAG_READ) > 0) {
-            /* See if it matches */
             AVFormatContext *s = NULL;
             int matches = 0;
 
-            if (avformat_open_input(&s, feed->feed_filename, NULL, NULL) >= 0) {
-                /* set buffer size */
-                int ret = ffio_set_buf_size(s->pb, FFM_PACKET_SIZE);
-                if (ret < 0) {
-                    http_log("Failed to set buffer size\n");
-                    exit(1);
-                }
+            /* See if it matches */
 
-                /* Now see if it matches */
-                if (s->nb_streams == feed->nb_streams) {
-                    matches = 1;
-                    for(i=0;i<s->nb_streams;i++) {
-                        AVStream *sf, *ss;
-                        sf = feed->streams[i];
-                        ss = s->streams[i];
-
-                        if (sf->index != ss->index ||
-                            sf->id != ss->id) {
-                            http_log("Index & Id do not match for stream %d (%s)\n",
-                                   i, feed->feed_filename);
-                            matches = 0;
-                        } else {
-                            AVCodecContext *ccf, *ccs;
-
-                            ccf = sf->codec;
-                            ccs = ss->codec;
-#define CHECK_CODEC(x)  (ccf->x != ccs->x)
+            if (avformat_open_input(&s, feed->feed_filename, NULL, NULL) < 0) {
+                http_log("Deleting feed file '%s' as it appears "
+                            "to be corrupt\n",
+                         feed->feed_filename);
+                goto drop;
+            }
 
-                            if (CHECK_CODEC(codec_id) || CHECK_CODEC(codec_type)) {
-                                http_log("Codecs do not match for stream %d\n", i);
-                                matches = 0;
-                            } else if (CHECK_CODEC(bit_rate) || CHECK_CODEC(flags)) {
-                                http_log("Codec bitrates do not match for stream %d\n", i);
-                                matches = 0;
-                            } else if (ccf->codec_type == AVMEDIA_TYPE_VIDEO) {
-                                if (CHECK_CODEC(time_base.den) ||
-                                    CHECK_CODEC(time_base.num) ||
-                                    CHECK_CODEC(width) ||
-                                    CHECK_CODEC(height)) {
-                                    http_log("Codec width, height and framerate do not match for stream %d\n", i);
-                                    matches = 0;
-                                }
-                            } else if (ccf->codec_type == AVMEDIA_TYPE_AUDIO) {
-                                if (CHECK_CODEC(sample_rate) ||
-                                    CHECK_CODEC(channels) ||
-                                    CHECK_CODEC(frame_size)) {
-                                    http_log("Codec sample_rate, channels, frame_size do not match for stream %d\n", i);
-                                    matches = 0;
-                                }
-                            } else {
-                                http_log("Unknown codec type\n");
-                                matches = 0;
-                            }
-                        }
-                        if (!matches)
-                            break;
-                    }
-                } else
-                    http_log("Deleting feed file '%s' as stream counts differ (%d != %d)\n",
-                        feed->feed_filename, s->nb_streams, feed->nb_streams);
+            /* set buffer size */
+            if (ffio_set_buf_size(s->pb, FFM_PACKET_SIZE) < 0) {
+                http_log("Failed to set buffer size\n");
+                avformat_close_input(&s);
+                goto bail;
+            }
+
+            /* Now see if it matches */
+            if (s->nb_streams != feed->nb_streams) {
+                http_log("Deleting feed file '%s' as stream counts "
+                            "differ (%d != %d)\n",
+                         feed->feed_filename, s->nb_streams, feed->nb_streams);
+                goto drop;
+            }
+
+            matches = 1;
+            for(i=0;i<s->nb_streams;i++) {
+                AVStream *sf, *ss;
 
+                sf = feed->streams[i];
+                ss = s->streams[i];
+
+                if (sf->index != ss->index || sf->id != ss->id) {
+                    http_log("Index & Id do not match for stream %d (%s)\n",
+                             i, feed->feed_filename);
+                    matches = 0;
+                    break;
+                }
+
+                matches = check_codec_match (sf->codec, ss->codec, i);
+                if (!matches)
+                    break;
+            }
+
+drop:
+            if (s)
                 avformat_close_input(&s);
-            } else
-                http_log("Deleting feed file '%s' as it appears to be corrupt\n",
-                        feed->feed_filename);
 
             if (!matches) {
                 if (feed->readonly) {
-                    http_log("Unable to delete feed file '%s' as it is marked readonly\n",
-                        feed->feed_filename);
-                    exit(1);
+                    http_log("Unable to delete read-only feed file '%s'\n",
+                             feed->feed_filename);
+                    goto bail;
                 }
                 unlink(feed->feed_filename);
             }
         }
+
         if (avio_check(feed->feed_filename, AVIO_FLAG_WRITE) <= 0) {
             AVFormatContext *s = avformat_alloc_context();
 
             if (!s) {
                 http_log("Failed to allocate context\n");
-                exit(1);
+                goto bail;
             }
 
             if (feed->readonly) {
-                http_log("Unable to create feed file '%s' as it is marked readonly\n",
-                    feed->feed_filename);
-                exit(1);
+                http_log("Unable to create feed file '%s' as it is "
+                            "marked readonly\n",
+                         feed->feed_filename);
+                avformat_free_context(s);
+                goto bail;
             }
 
             /* only write the header of the ffm file */
             if (avio_open(&s->pb, feed->feed_filename, AVIO_FLAG_WRITE) < 0) {
                 http_log("Could not open output feed file '%s'\n",
                          feed->feed_filename);
-                exit(1);
+                avformat_free_context(s);
+                goto bail;
             }
             s->oformat = feed->fmt;
             s->nb_streams = feed->nb_streams;
             s->streams = feed->streams;
             if (avformat_write_header(s, NULL) < 0) {
                 http_log("Container doesn't support the required parameters\n");
-                exit(1);
+                avio_closep(&s->pb);
+                avformat_free_context(s);
+                goto bail;
             }
             /* XXX: need better API */
             av_freep(&s->priv_data);
@@ -3712,15 +3868,17 @@ static void build_feed_streams(void)
             s->nb_streams = 0;
             avformat_free_context(s);
         }
+
         /* get feed size and write index */
         fd = open(feed->feed_filename, O_RDONLY);
         if (fd < 0) {
             http_log("Could not open output feed file '%s'\n",
                     feed->feed_filename);
-            exit(1);
+            goto bail;
         }
 
-        feed->feed_write_index = FFMAX(ffm_read_write_index(fd), FFM_PACKET_SIZE);
+        feed->feed_write_index = FFMAX(ffm_read_write_index(fd),
+                                       FFM_PACKET_SIZE);
         feed->feed_size = lseek(fd, 0, SEEK_END);
         /* ensure that we do not wrap before the end of file */
         if (feed->feed_max_size && feed->feed_max_size < feed->feed_size)
@@ -3728,6 +3886,10 @@ static void build_feed_streams(void)
 
         close(fd);
     }
+    return 0;
+
+bail:
+    return -1;
 }
 
 /* compute the bandwidth used by each stream */
@@ -3758,23 +3920,25 @@ static void handle_child_exit(int sig)
 {
     pid_t pid;
     int status;
+    time_t uptime;
 
     while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
         FFServerStream *feed;
 
         for (feed = config.first_feed; feed; feed = feed->next) {
-            if (feed->pid == pid) {
-                int uptime = time(0) - feed->pid_start;
+            if (feed->pid != pid)
+                continue;
 
-                feed->pid = 0;
-                fprintf(stderr,
-                        "%s: Pid %d exited with status %d after %d seconds\n",
-                        feed->filename, pid, status, uptime);
+            uptime = time(0) - feed->pid_start;
+            feed->pid = 0;
+            fprintf(stderr,
+                    "%s: Pid %"PRId64" exited with status %d after %"PRId64" "
+                        "seconds\n",
+                    feed->filename, (int64_t) pid, status, (int64_t)uptime);
 
-                if (uptime < 30)
-                    /* Turn off any more restarts */
-                    ffserver_free_child_args(&feed->child_argv);
-            }
+            if (uptime < 30)
+                /* Turn off any more restarts */
+                ffserver_free_child_args(&feed->child_argv);
         }
     }
 
@@ -3806,7 +3970,9 @@ static const OptionDef options[] = {
 int main(int argc, char **argv)
 {
     struct sigaction sigact = { { 0 } };
-    int ret = 0;
+    int cfg_parsed;
+    int ret = EXIT_FAILURE;
+
 
     config.filename = av_strdup("/etc/ffserver.conf");
 
@@ -3828,12 +3994,11 @@ int main(int argc, char **argv)
     sigact.sa_flags = SA_NOCLDSTOP | SA_RESTART;
     sigaction(SIGCHLD, &sigact, 0);
 
-    if ((ret = ffserver_parse_ffconfig(config.filename, &config)) < 0) {
+    if ((cfg_parsed = ffserver_parse_ffconfig(config.filename, &config)) < 0) {
         fprintf(stderr, "Error reading configuration file '%s': %s\n",
-                config.filename, av_err2str(ret));
-        exit(1);
+                config.filename, av_err2str(cfg_parsed));
+        goto bail;
     }
-    av_freep(&config.filename);
 
     /* open log file if needed */
     if (config.logfilename[0] != '\0') {
@@ -3846,7 +4011,10 @@ int main(int argc, char **argv)
 
     build_file_streams();
 
-    build_feed_streams();
+    if (build_feed_streams() < 0) {
+        http_log("Could not setup feed streams\n");
+        goto bail;
+    }
 
     compute_bandwidth();
 
@@ -3855,8 +4023,13 @@ int main(int argc, char **argv)
 
     if (http_server() < 0) {
         http_log("Could not start server\n");
-        exit(1);
+        goto bail;
     }
 
-    return 0;
+    ret=EXIT_SUCCESS;
+
+bail:
+    av_freep (&config.filename);
+    avformat_network_deinit();
+    return ret;
 }
diff --git a/ffserver_config.c b/ffserver_config.c
index 06bd8ac4..9fc1f003 100644
--- a/ffserver_config.c
+++ b/ffserver_config.c
@@ -42,8 +42,8 @@ static void report_config_error(const char *filename, int line_num,
                                 int log_level, int *errors, const char *fmt,
                                 ...);
 
-#define ERROR(...)   report_config_error(config->filename, config->line_num,\
-                                         AV_LOG_ERROR, &config->errors,  __VA_ARGS__)
+#define ERROR(...) report_config_error(config->filename, config->line_num,\
+                                       AV_LOG_ERROR, &config->errors, __VA_ARGS__)
 #define WARNING(...) report_config_error(config->filename, config->line_num,\
                                          AV_LOG_WARNING, &config->warnings, __VA_ARGS__)
 
@@ -116,7 +116,8 @@ void ffserver_parse_acl_row(FFServerStream *stream, FFServerStream* feed,
 {
     char arg[1024];
     FFServerIPAddressACL acl;
-    int errors = 0;
+    FFServerIPAddressACL *nacl;
+    FFServerIPAddressACL **naclp;
 
     ffserver_get_arg(arg, sizeof(arg), &p);
     if (av_strcasecmp(arg, "allow") == 0)
@@ -126,7 +127,7 @@ void ffserver_parse_acl_row(FFServerStream *stream, FFServerStream* feed,
     else {
         fprintf(stderr, "%s:%d: ACL action '%s' should be ALLOW or DENY.\n",
                 filename, line_num, arg);
-        errors++;
+        goto bail;
     }
 
     ffserver_get_arg(arg, sizeof(arg), &p);
@@ -135,9 +136,10 @@ void ffserver_parse_acl_row(FFServerStream *stream, FFServerStream* feed,
         fprintf(stderr,
                 "%s:%d: ACL refers to invalid host or IP address '%s'\n",
                 filename, line_num, arg);
-        errors++;
-    } else
-        acl.last = acl.first;
+        goto bail;
+    }
+
+    acl.last = acl.first;
 
     ffserver_get_arg(arg, sizeof(arg), &p);
 
@@ -146,37 +148,37 @@ void ffserver_parse_acl_row(FFServerStream *stream, FFServerStream* feed,
             fprintf(stderr,
                     "%s:%d: ACL refers to invalid host or IP address '%s'\n",
                     filename, line_num, arg);
-            errors++;
+            goto bail;
         }
     }
 
-    if (!errors) {
-        FFServerIPAddressACL *nacl = av_mallocz(sizeof(*nacl));
-        FFServerIPAddressACL **naclp = 0;
+    nacl = av_mallocz(sizeof(*nacl));
+    naclp = 0;
 
-        acl.next = 0;
-        *nacl = acl;
+    acl.next = 0;
+    *nacl = acl;
 
-        if (stream)
-            naclp = &stream->acl;
-        else if (feed)
-            naclp = &feed->acl;
-        else if (ext_acl)
-            naclp = &ext_acl;
-        else {
-            fprintf(stderr, "%s:%d: ACL found not in <Stream> or <Feed>\n",
-                    filename, line_num);
-            errors++;
-        }
+    if (stream)
+        naclp = &stream->acl;
+    else if (feed)
+        naclp = &feed->acl;
+    else if (ext_acl)
+        naclp = &ext_acl;
+    else
+        fprintf(stderr, "%s:%d: ACL found not in <Stream> or <Feed>\n",
+                filename, line_num);
 
-        if (naclp) {
-            while (*naclp)
-                naclp = &(*naclp)->next;
+    if (naclp) {
+        while (*naclp)
+            naclp = &(*naclp)->next;
+
+        *naclp = nacl;
+    } else
+        av_free(nacl);
+
+bail:
+  return;
 
-            *naclp = nacl;
-        } else
-            av_free(nacl);
-    }
 }
 
 /* add a codec and set the default parameters */
@@ -230,9 +232,9 @@ static void add_codec(FFServerStream *stream, AVCodecContext *av,
     /* compute default parameters */
     switch(av->codec_type) {
     case AVMEDIA_TYPE_AUDIO:
-        if (!av_dict_get(recommended, "ab", NULL, 0)) {
+        if (!av_dict_get(recommended, "b", NULL, 0)) {
             av->bit_rate = 64000;
-            av_dict_set_int(&recommended, "ab", av->bit_rate, 0);
+            av_dict_set_int(&recommended, "b", av->bit_rate, 0);
             WARNING("Setting default value for audio bit rate = %d. "
                     "Use NoDefaults to disable it.\n",
                     av->bit_rate);
@@ -458,7 +460,7 @@ static int ffserver_set_int_param(int *dest, const char *value, int factor,
     if (tmp < min || tmp > max)
         goto error;
     if (factor) {
-        if (FFABS(tmp) > INT_MAX / FFABS(factor))
+        if (tmp == INT_MIN || FFABS(tmp) > INT_MAX / FFABS(factor))
             goto error;
         tmp *= factor;
     }
@@ -683,8 +685,8 @@ static int ffserver_parse_config_global(FFServerConfig *config, const char *cmd,
     return 0;
 }
 
-static int ffserver_parse_config_feed(FFServerConfig *config, const char *cmd, const char **p,
-                                      FFServerStream **pfeed)
+static int ffserver_parse_config_feed(FFServerConfig *config, const char *cmd,
+                                      const char **p, FFServerStream **pfeed)
 {
     FFServerStream *feed;
     char arg[1024];
@@ -791,7 +793,8 @@ static int ffserver_parse_config_feed(FFServerConfig *config, const char *cmd, c
     return 0;
 }
 
-static int ffserver_parse_config_stream(FFServerConfig *config, const char *cmd, const char **p,
+static int ffserver_parse_config_stream(FFServerConfig *config, const char *cmd,
+                                        const char **p,
                                         FFServerStream **pstream)
 {
     char arg[1024], arg2[1024];
@@ -923,7 +926,7 @@ static int ffserver_parse_config_stream(FFServerConfig *config, const char *cmd,
         ffserver_get_arg(arg, sizeof(arg), p);
         ffserver_set_float_param(&f, arg, 1000, -FLT_MAX, FLT_MAX, config,
                 "Invalid %s: '%s'\n", cmd, arg);
-        if (ffserver_save_avoption_int("ab", (int64_t)lrintf(f),
+        if (ffserver_save_avoption_int("b", (int64_t)lrintf(f),
                                        AV_OPT_FLAG_AUDIO_PARAM, config) < 0)
             goto nomem;
     } else if (!av_strcasecmp(cmd, "AudioChannels")) {
diff --git a/libavcodec/012v.c b/libavcodec/012v.c
index b87551e0..b5a40666 100644
--- a/libavcodec/012v.c
+++ b/libavcodec/012v.c
@@ -151,5 +151,5 @@ AVCodec ff_zero12v_decoder = {
     .id             = AV_CODEC_ID_012V,
     .init           = zero12v_decode_init,
     .decode         = zero12v_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c
index 3a256228..a7a757a0 100644
--- a/libavcodec/4xm.c
+++ b/libavcodec/4xm.c
@@ -559,7 +559,7 @@ static inline void idct_put(FourXContext *f, int x, int y)
         idct(block[i]);
     }
 
-    if (!(f->avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!(f->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         for (i = 4; i < 6; i++)
             idct(block[i]);
     }
@@ -883,11 +883,11 @@ static int decode_frame(AVCodecContext *avctx, void *data,
         }
         cfrm = &f->cfrm[i];
 
-        if (data_size > UINT_MAX -  cfrm->size - FF_INPUT_BUFFER_PADDING_SIZE)
+        if (data_size > UINT_MAX -  cfrm->size - AV_INPUT_BUFFER_PADDING_SIZE)
             return AVERROR_INVALIDDATA;
 
         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
-                                     cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
+                                     cfrm->size + data_size + AV_INPUT_BUFFER_PADDING_SIZE);
         // explicit check needed as memcpy below might not catch a NULL
         if (!cfrm->data) {
             av_log(f->avctx, AV_LOG_ERROR, "realloc failure\n");
@@ -1026,5 +1026,5 @@ AVCodec ff_fourxm_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/8bps.c b/libavcodec/8bps.c
index e00bdfc8..2e4464db 100644
--- a/libavcodec/8bps.c
+++ b/libavcodec/8bps.c
@@ -184,5 +184,5 @@ AVCodec ff_eightbps_decoder = {
     .priv_data_size = sizeof(EightBpsContext),
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/8svx.c b/libavcodec/8svx.c
index 26496e50..edc945c6 100644
--- a/libavcodec/8svx.c
+++ b/libavcodec/8svx.c
@@ -194,7 +194,7 @@ AVCodec ff_eightsvx_fib_decoder = {
   .init           = eightsvx_decode_init,
   .decode         = eightsvx_decode_frame,
   .close          = eightsvx_decode_close,
-  .capabilities   = CODEC_CAP_DR1,
+  .capabilities   = AV_CODEC_CAP_DR1,
   .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
                                                     AV_SAMPLE_FMT_NONE },
 };
@@ -209,7 +209,7 @@ AVCodec ff_eightsvx_exp_decoder = {
   .init           = eightsvx_decode_init,
   .decode         = eightsvx_decode_frame,
   .close          = eightsvx_decode_close,
-  .capabilities   = CODEC_CAP_DR1,
+  .capabilities   = AV_CODEC_CAP_DR1,
   .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
                                                     AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index bfd6425f..f6a4fbbd 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -3,16 +3,18 @@ include $(SUBDIR)../config.mak
 NAME = avcodec
 
 HEADERS = avcodec.h                                                     \
+          avdct.h                                                       \
           avfft.h                                                       \
           dv_profile.h                                                  \
           d3d11va.h                                                     \
+          dirac.h                                                       \
           dxva2.h                                                       \
-          old_codec_ids.h                                               \
           qsv.h                                                         \
           vaapi.h                                                       \
           vda.h                                                         \
           vdpau.h                                                       \
           version.h                                                     \
+          videotoolbox.h                                                \
           vorbis_parser.h                                               \
           xvmc.h                                                        \
 
@@ -24,11 +26,14 @@ OBJS = allcodecs.o                                                      \
        bitstream.o                                                      \
        bitstream_filter.o                                               \
        codec_desc.o                                                     \
+       d3d11va.o                                                        \
+       dirac.o                                                          \
        dv_profile.o                                                     \
        imgconvert.o                                                     \
        mathtables.o                                                     \
        options.o                                                        \
        parser.o                                                         \
+       profiles.o                                                       \
        qsv_api.o                                                        \
        raw.o                                                            \
        resample.o                                                       \
@@ -56,6 +61,7 @@ FFT-OBJS-$(CONFIG_HARDCODED_TABLES)    += cos_tables.o cos_fixed_tables.o
 OBJS-$(CONFIG_FFT)                     += avfft.o fft_fixed.o fft_float.o \
                                           fft_fixed_32.o fft_init_table.o \
                                           $(FFT-OBJS-yes)
+OBJS-$(CONFIG_FLACDSP)                 += flacdsp.o
 OBJS-$(CONFIG_FMTCONVERT)              += fmtconvert.o
 OBJS-$(CONFIG_GOLOMB)                  += golomb.o
 OBJS-$(CONFIG_H263DSP)                 += h263dsp.o
@@ -71,12 +77,14 @@ OBJS-$(CONFIG_IDCTDSP)                 += idctdsp.o simple_idct.o jrevdct.o
 OBJS-$(CONFIG_IIRFILTER)               += iirfilter.o
 OBJS-$(CONFIG_IMDCT15)                 += imdct15.o
 OBJS-$(CONFIG_INTRAX8)                 += intrax8.o intrax8dsp.o
+OBJS-$(CONFIG_IVIDSP)                  += ivi_dsp.o
 OBJS-$(CONFIG_JPEGTABLES)              += jpegtables.o
 OBJS-$(CONFIG_LIBXVID)                 += libxvid_rc.o
 OBJS-$(CONFIG_LLAUDDSP)                += lossless_audiodsp.o
 OBJS-$(CONFIG_LLVIDDSP)                += lossless_videodsp.o
 OBJS-$(CONFIG_LPC)                     += lpc.o
 OBJS-$(CONFIG_LSP)                     += lsp.o
+OBJS-$(CONFIG_LZF)                     += lzf.o
 OBJS-$(CONFIG_MDCT)                    += mdct_fixed.o mdct_float.o mdct_fixed_32.o
 OBJS-$(CONFIG_ME_CMP)                  += me_cmp.o
 OBJS-$(CONFIG_MPEG_ER)                 += mpeg_er.o
@@ -88,10 +96,11 @@ OBJS-$(CONFIG_MPEGAUDIODSP)            += mpegaudiodsp.o                \
                                           mpegaudiodsp_float.o
 OBJS-$(CONFIG_MPEGVIDEO)               += mpegvideo.o mpegvideodsp.o rl.o \
                                           mpegvideo_motion.o mpegutils.o \
-                                          mpegvideodata.o
+                                          mpegvideodata.o mpegpicture.o
 OBJS-$(CONFIG_MPEGVIDEOENC)            += mpegvideo_enc.o mpeg12data.o  \
                                           motion_est.o ratecontrol.o    \
                                           mpegvideoencdsp.o
+OBJS-$(CONFIG_MSS34DSP)                += mss34dsp.o
 OBJS-$(CONFIG_NVENC)                   += nvenc.o
 OBJS-$(CONFIG_PIXBLOCKDSP)             += pixblockdsp.o
 OBJS-$(CONFIG_QPELDSP)                 += qpeldsp.o
@@ -101,23 +110,37 @@ OBJS-$(CONFIG_QSVENC)                  += qsvenc.o
 OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
-OBJS-$(CONFIG_SHARED)                  += log2_tab.o
-OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
+OBJS-$(CONFIG_RV34DSP)                 += rv34dsp.o
+OBJS-$(CONFIG_SHARED)                  += log2_tab.o reverse.o
+OBJS-$(CONFIG_SINEWIN)                 += sinewin.o sinewin_fixed.o
+OBJS-$(CONFIG_SNAPPY)                  += snappy.o
 OBJS-$(CONFIG_STARTCODE)               += startcode.o
+OBJS-$(CONFIG_TEXTUREDSP)              += texturedsp.o
+OBJS-$(CONFIG_TEXTUREDSPENC)           += texturedspenc.o
 OBJS-$(CONFIG_TPELDSP)                 += tpeldsp.o
 OBJS-$(CONFIG_VIDEODSP)                += videodsp.o
 OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
+OBJS-$(CONFIG_VP56DSP)                 += vp56dsp.o
+OBJS-$(CONFIG_VP8DSP)                  += vp8dsp.o
 OBJS-$(CONFIG_WMA_FREQS)               += wma_freqs.o
+OBJS-$(CONFIG_WMV2DSP)                 += wmv2dsp.o
 
 # decoders/encoders
 OBJS-$(CONFIG_ZERO12V_DECODER)         += 012v.o
 OBJS-$(CONFIG_A64MULTI_ENCODER)        += a64multienc.o elbg.o
 OBJS-$(CONFIG_A64MULTI5_ENCODER)       += a64multienc.o elbg.o
-OBJS-$(CONFIG_AAC_DECODER)             += aacdec.o aactab.o aacsbr.o aacps.o \
+OBJS-$(CONFIG_AAC_DECODER)             += aacdec.o aactab.o aacsbr.o aacps_float.o \
                                           aacadtsdec.o mpeg4audio.o kbdwin.o \
-                                          sbrdsp.o aacpsdsp.o
-OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
+                                          sbrdsp.o aacpsdsp_float.o
+OBJS-$(CONFIG_AAC_FIXED_DECODER)       += aacdec_fixed.o aactab.o aacsbr_fixed.o aacps_fixed.o \
+                                          aacadtsdec.o mpeg4audio.o kbdwin.o \
+                                          sbrdsp_fixed.o aacpsdsp_fixed.o
+OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o aacenctab.o    \
                                           aacpsy.o aactab.o      \
+                                          aacenc_is.o \
+                                          aacenc_tns.o \
+                                          aacenc_ltp.o \
+                                          aacenc_pred.o \
                                           psymodel.o mpeg4audio.o kbdwin.o
 OBJS-$(CONFIG_AASC_DECODER)            += aasc.o msrledec.o
 OBJS-$(CONFIG_AC3_DECODER)             += ac3dec_float.o ac3dec_data.o ac3.o kbdwin.o
@@ -126,7 +149,7 @@ OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3enc.o ac3tab.o \
                                           ac3.o kbdwin.o
 OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3enc.o ac3tab.o ac3.o
 OBJS-$(CONFIG_AIC_DECODER)             += aic.o
-OBJS-$(CONFIG_ALAC_DECODER)            += alac.o alac_data.o
+OBJS-$(CONFIG_ALAC_DECODER)            += alac.o alac_data.o alacdsp.o
 OBJS-$(CONFIG_ALAC_ENCODER)            += alacenc.o alac_data.o
 OBJS-$(CONFIG_ALIAS_PIX_DECODER)       += aliaspixdec.o
 OBJS-$(CONFIG_ALIAS_PIX_ENCODER)       += aliaspixenc.o
@@ -187,6 +210,7 @@ OBJS-$(CONFIG_CAVS_DECODER)            += cavs.o cavsdec.o cavsdsp.o \
 OBJS-$(CONFIG_CCAPTION_DECODER)        += ccaption_dec.o
 OBJS-$(CONFIG_CDGRAPHICS_DECODER)      += cdgraphics.o
 OBJS-$(CONFIG_CDXL_DECODER)            += cdxl.o
+OBJS-$(CONFIG_CFHD_DECODER)            += cfhd.o cfhddata.o
 OBJS-$(CONFIG_CINEPAK_DECODER)         += cinepak.o
 OBJS-$(CONFIG_CINEPAK_ENCODER)         += cinepakenc.o elbg.o
 OBJS-$(CONFIG_CLJR_DECODER)            += cljrdec.o
@@ -198,11 +222,12 @@ OBJS-$(CONFIG_COMFORTNOISE_ENCODER)    += cngenc.o
 OBJS-$(CONFIG_CPIA_DECODER)            += cpia.o
 OBJS-$(CONFIG_CSCD_DECODER)            += cscd.o
 OBJS-$(CONFIG_CYUV_DECODER)            += cyuv.o
-OBJS-$(CONFIG_DCA_DECODER)             += dcadec.o dca.o dcadsp.o      \
-                                          dcadata.o dca_exss.o         \
-                                          dca_xll.o synth_filter.o
+OBJS-$(CONFIG_DCA_DECODER)             += dcadec.o dca.o dcadata.o        \
+                                          dca_core.o dca_exss.o dca_xll.o \
+                                          dcadsp.o dcadct.o synth_filter.o
 OBJS-$(CONFIG_DCA_ENCODER)             += dcaenc.o dca.o dcadata.o
-OBJS-$(CONFIG_DIRAC_DECODER)           += diracdec.o dirac.o diracdsp.o \
+OBJS-$(CONFIG_DDS_DECODER)             += dds.o
+OBJS-$(CONFIG_DIRAC_DECODER)           += diracdec.o dirac.o diracdsp.o diractab.o \
                                           dirac_arith.o mpeg12data.o dirac_dwt.o
 OBJS-$(CONFIG_DFA_DECODER)             += dfa.o
 OBJS-$(CONFIG_DNXHD_DECODER)           += dnxhddec.o dnxhddata.o
@@ -220,10 +245,12 @@ OBJS-$(CONFIG_DVBSUB_DECODER)          += dvbsubdec.o
 OBJS-$(CONFIG_DVBSUB_ENCODER)          += dvbsub.o
 OBJS-$(CONFIG_DVDSUB_DECODER)          += dvdsubdec.o
 OBJS-$(CONFIG_DVDSUB_ENCODER)          += dvdsubenc.o
+OBJS-$(CONFIG_DVAUDIO_DECODER)         += dvaudiodec.o
 OBJS-$(CONFIG_DVVIDEO_DECODER)         += dvdec.o dv.o dvdata.o
 OBJS-$(CONFIG_DVVIDEO_ENCODER)         += dvenc.o dv.o dvdata.o
 OBJS-$(CONFIG_DXA_DECODER)             += dxa.o
 OBJS-$(CONFIG_DXTORY_DECODER)          += dxtory.o
+OBJS-$(CONFIG_DXV_DECODER)             += dxv.o
 OBJS-$(CONFIG_EAC3_DECODER)            += eac3_data.o
 OBJS-$(CONFIG_EAC3_ENCODER)            += eac3enc.o eac3_data.o
 OBJS-$(CONFIG_EACMV_DECODER)           += eacmv.o
@@ -243,8 +270,8 @@ OBJS-$(CONFIG_FFV1_DECODER)            += ffv1dec.o ffv1.o
 OBJS-$(CONFIG_FFV1_ENCODER)            += ffv1enc.o ffv1.o
 OBJS-$(CONFIG_FFWAVESYNTH_DECODER)     += ffwavesynth.o
 OBJS-$(CONFIG_FIC_DECODER)             += fic.o
-OBJS-$(CONFIG_FLAC_DECODER)            += flacdec.o flacdata.o flac.o flacdsp.o
-OBJS-$(CONFIG_FLAC_ENCODER)            += flacenc.o flacdata.o flac.o flacdsp.o vorbis_data.o
+OBJS-$(CONFIG_FLAC_DECODER)            += flacdec.o flacdata.o flac.o
+OBJS-$(CONFIG_FLAC_ENCODER)            += flacenc.o flacdata.o flac.o vorbis_data.o
 OBJS-$(CONFIG_FLASHSV_DECODER)         += flashsv.o
 OBJS-$(CONFIG_FLASHSV_ENCODER)         += flashsvenc.o
 OBJS-$(CONFIG_FLASHSV2_ENCODER)        += flashsv2enc.o
@@ -253,10 +280,11 @@ OBJS-$(CONFIG_FLIC_DECODER)            += flicvideo.o
 OBJS-$(CONFIG_FOURXM_DECODER)          += 4xm.o
 OBJS-$(CONFIG_FRAPS_DECODER)           += fraps.o
 OBJS-$(CONFIG_FRWU_DECODER)            += frwu.o
-OBJS-$(CONFIG_G2M_DECODER)             += g2meet.o
-OBJS-$(CONFIG_G723_1_DECODER)          += g723_1.o acelp_vectors.o \
-                                          celp_filters.o celp_math.o
-OBJS-$(CONFIG_G723_1_ENCODER)          += g723_1.o acelp_vectors.o celp_math.o
+OBJS-$(CONFIG_G2M_DECODER)             += g2meet.o elsdec.o
+OBJS-$(CONFIG_G723_1_DECODER)          += g723_1dec.o g723_1.o \
+                                          acelp_vectors.o celp_filters.o celp_math.o
+OBJS-$(CONFIG_G723_1_ENCODER)          += g723_1enc.o g723_1.o \
+                                          acelp_vectors.o celp_filters.o celp_math.o
 OBJS-$(CONFIG_G729_DECODER)            += g729dec.o lsp.o celp_math.o acelp_filters.o acelp_pitch_delay.o acelp_vectors.o g729postfilter.o
 OBJS-$(CONFIG_GIF_DECODER)             += gifdec.o lzw.o
 OBJS-$(CONFIG_GIF_ENCODER)             += gif.o lzwenc.o
@@ -266,20 +294,24 @@ OBJS-$(CONFIG_H261_DECODER)            += h261dec.o h261data.o h261.o
 OBJS-$(CONFIG_H261_ENCODER)            += h261enc.o h261data.o h261.o
 OBJS-$(CONFIG_H263_DECODER)            += h263dec.o h263.o ituh263dec.o        \
                                           mpeg4video.o mpeg4videodec.o flvdec.o\
-                                          intelh263dec.o
+                                          intelh263dec.o h263data.o
 OBJS-$(CONFIG_H263_ENCODER)            += mpeg4videoenc.o mpeg4video.o  \
-                                          h263.o ituh263enc.o flvenc.o
+                                          h263.o h263data.o ituh263enc.o flvenc.o
 OBJS-$(CONFIG_H264_DECODER)            += h264.o h264_cabac.o h264_cavlc.o \
                                           h264_direct.o h264_loopfilter.o  \
                                           h264_mb.o h264_picture.o h264_ps.o \
                                           h264_refs.o h264_sei.o h264_slice.o
 OBJS-$(CONFIG_H264_MMAL_DECODER)       += mmaldec.o
 OBJS-$(CONFIG_H264_VDA_DECODER)        += vda_h264_dec.o
-OBJS-$(CONFIG_H264_QSV_DECODER)        += qsvdec_h264.o
+OBJS-$(CONFIG_H264_QSV_DECODER)        += qsvdec_h2645.o
 OBJS-$(CONFIG_H264_QSV_ENCODER)        += qsvenc_h264.o
+OBJS-$(CONFIG_HAP_DECODER)             += hapdec.o hap.o
+OBJS-$(CONFIG_HAP_ENCODER)             += hapenc.o hap.o
 OBJS-$(CONFIG_HEVC_DECODER)            += hevc.o hevc_mvs.o hevc_ps.o hevc_sei.o \
                                           hevc_cabac.o hevc_refs.o hevcpred.o    \
-                                          hevcdsp.o hevc_filter.o
+                                          hevcdsp.o hevc_filter.o hevc_parse.o hevc_data.o
+OBJS-$(CONFIG_HEVC_QSV_DECODER)        += qsvdec_h2645.o
+OBJS-$(CONFIG_HEVC_QSV_ENCODER)        += qsvenc_hevc.o hevc_ps_enc.o hevc_parse.o
 OBJS-$(CONFIG_HNM4_VIDEO_DECODER)      += hnm4video.o
 OBJS-$(CONFIG_HQ_HQA_DECODER)          += hq_hqa.o hq_hqadata.o hq_hqadsp.o \
                                           canopus.o
@@ -288,13 +320,13 @@ OBJS-$(CONFIG_HUFFYUV_DECODER)         += huffyuv.o huffyuvdec.o
 OBJS-$(CONFIG_HUFFYUV_ENCODER)         += huffyuv.o huffyuvenc.o
 OBJS-$(CONFIG_IDCIN_DECODER)           += idcinvideo.o
 OBJS-$(CONFIG_IDF_DECODER)             += bintext.o cga_data.o
-OBJS-$(CONFIG_IFF_BYTERUN1_DECODER)    += iff.o
 OBJS-$(CONFIG_IFF_ILBM_DECODER)        += iff.o
 OBJS-$(CONFIG_IMC_DECODER)             += imc.o
 OBJS-$(CONFIG_INDEO2_DECODER)          += indeo2.o
 OBJS-$(CONFIG_INDEO3_DECODER)          += indeo3.o
-OBJS-$(CONFIG_INDEO4_DECODER)          += indeo4.o ivi.o ivi_dsp.o
-OBJS-$(CONFIG_INDEO5_DECODER)          += indeo5.o ivi.o ivi_dsp.o
+OBJS-$(CONFIG_INDEO4_DECODER)          += indeo4.o ivi.o
+OBJS-$(CONFIG_INDEO5_DECODER)          += indeo5.o ivi.o
+OBJS-$(CONFIG_INTERPLAY_ACM_DECODER)   += interplayacm.o
 OBJS-$(CONFIG_INTERPLAY_DPCM_DECODER)  += dpcm.o
 OBJS-$(CONFIG_INTERPLAY_VIDEO_DECODER) += interplayvideo.o
 OBJS-$(CONFIG_JACOSUB_DECODER)         += jacosubdec.o ass.o
@@ -346,6 +378,9 @@ OBJS-$(CONFIG_MPEG1VIDEO_DECODER)      += mpeg12dec.o mpeg12.o mpeg12data.o
 OBJS-$(CONFIG_MPEG1VIDEO_ENCODER)      += mpeg12enc.o mpeg12.o
 OBJS-$(CONFIG_MPEG2VIDEO_DECODER)      += mpeg12dec.o mpeg12.o mpeg12data.o
 OBJS-$(CONFIG_MPEG2VIDEO_ENCODER)      += mpeg12enc.o mpeg12.o
+OBJS-$(CONFIG_MPEG2_MMAL_DECODER)      += mmaldec.o
+OBJS-$(CONFIG_MPEG2_QSV_DECODER)       += qsvdec_mpeg2.o
+OBJS-$(CONFIG_MPEG2_QSV_ENCODER)       += qsvenc_mpeg2.o
 OBJS-$(CONFIG_MPEG4_DECODER)           += xvididct.o
 OBJS-$(CONFIG_MPL2_DECODER)            += mpl2dec.o ass.o
 OBJS-$(CONFIG_MSMPEG4V1_DECODER)       += msmpeg4dec.o msmpeg4.o msmpeg4data.o
@@ -354,13 +389,13 @@ OBJS-$(CONFIG_MSMPEG4V2_ENCODER)       += msmpeg4enc.o msmpeg4.o msmpeg4data.o
 OBJS-$(CONFIG_MSMPEG4V3_DECODER)       += msmpeg4dec.o msmpeg4.o msmpeg4data.o
 OBJS-$(CONFIG_MSMPEG4V3_ENCODER)       += msmpeg4enc.o msmpeg4.o msmpeg4data.o
 OBJS-$(CONFIG_MSRLE_DECODER)           += msrle.o msrledec.o
-OBJS-$(CONFIG_MSA1_DECODER)            += mss3.o mss34dsp.o
+OBJS-$(CONFIG_MSA1_DECODER)            += mss3.o
 OBJS-$(CONFIG_MSS1_DECODER)            += mss1.o mss12.o
 OBJS-$(CONFIG_MSS2_DECODER)            += mss2.o mss12.o mss2dsp.o
 OBJS-$(CONFIG_MSVIDEO1_DECODER)        += msvideo1.o
 OBJS-$(CONFIG_MSVIDEO1_ENCODER)        += msvideo1enc.o elbg.o
 OBJS-$(CONFIG_MSZH_DECODER)            += lcldec.o
-OBJS-$(CONFIG_MTS2_DECODER)            += mss4.o mss34dsp.o
+OBJS-$(CONFIG_MTS2_DECODER)            += mss4.o
 OBJS-$(CONFIG_MVC1_DECODER)            += mvcdec.o
 OBJS-$(CONFIG_MVC2_DECODER)            += mvcdec.o
 OBJS-$(CONFIG_MXPEG_DECODER)           += mxpegdec.o
@@ -420,16 +455,19 @@ OBJS-$(CONFIG_ROQ_ENCODER)             += roqvideoenc.o roqvideo.o elbg.o
 OBJS-$(CONFIG_ROQ_DPCM_DECODER)        += dpcm.o
 OBJS-$(CONFIG_ROQ_DPCM_ENCODER)        += roqaudioenc.o
 OBJS-$(CONFIG_RPZA_DECODER)            += rpza.o
+OBJS-$(CONFIG_RSCC_DECODER)            += rscc.o
 OBJS-$(CONFIG_RV10_DECODER)            += rv10.o
 OBJS-$(CONFIG_RV10_ENCODER)            += rv10enc.o
 OBJS-$(CONFIG_RV20_DECODER)            += rv10.o
 OBJS-$(CONFIG_RV20_ENCODER)            += rv20enc.o
-OBJS-$(CONFIG_RV30_DECODER)            += rv30.o rv34.o rv30dsp.o rv34dsp.o
-OBJS-$(CONFIG_RV40_DECODER)            += rv40.o rv34.o rv34dsp.o rv40dsp.o
-OBJS-$(CONFIG_SAMI_DECODER)            += samidec.o ass.o
+OBJS-$(CONFIG_RV30_DECODER)            += rv30.o rv34.o rv30dsp.o
+OBJS-$(CONFIG_RV40_DECODER)            += rv40.o rv34.o rv40dsp.o
+OBJS-$(CONFIG_SAMI_DECODER)            += samidec.o ass.o htmlsubtitles.o
 OBJS-$(CONFIG_S302M_DECODER)           += s302m.o
 OBJS-$(CONFIG_S302M_ENCODER)           += s302menc.o
 OBJS-$(CONFIG_SANM_DECODER)            += sanm.o
+OBJS-$(CONFIG_SCREENPRESSO_DECODER)    += screenpresso.o
+OBJS-$(CONFIG_SDX2_DPCM_DECODER)       += dpcm.o
 OBJS-$(CONFIG_SGI_DECODER)             += sgidec.o
 OBJS-$(CONFIG_SGI_ENCODER)             += sgienc.o rle.o
 OBJS-$(CONFIG_SGIRLE_DECODER)          += sgirledec.o
@@ -450,10 +488,10 @@ OBJS-$(CONFIG_SONIC_DECODER)           += sonic.o
 OBJS-$(CONFIG_SONIC_ENCODER)           += sonic.o
 OBJS-$(CONFIG_SONIC_LS_ENCODER)        += sonic.o
 OBJS-$(CONFIG_SP5X_DECODER)            += sp5xdec.o
-OBJS-$(CONFIG_SRT_DECODER)             += srtdec.o ass.o
+OBJS-$(CONFIG_SRT_DECODER)             += srtdec.o ass.o htmlsubtitles.o
 OBJS-$(CONFIG_SRT_ENCODER)             += srtenc.o ass_split.o
 OBJS-$(CONFIG_STL_DECODER)             += textdec.o ass.o
-OBJS-$(CONFIG_SUBRIP_DECODER)          += srtdec.o ass.o
+OBJS-$(CONFIG_SUBRIP_DECODER)          += srtdec.o ass.o htmlsubtitles.o
 OBJS-$(CONFIG_SUBRIP_ENCODER)          += srtenc.o ass_split.o
 OBJS-$(CONFIG_SUBVIEWER1_DECODER)      += textdec.o ass.o
 OBJS-$(CONFIG_SUBVIEWER_DECODER)       += subviewerdec.o ass.o
@@ -464,7 +502,8 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o    \
                                           h263.o ituh263enc.o
 OBJS-$(CONFIG_SVQ3_DECODER)            += svq3.o svq13.o mpegutils.o
 OBJS-$(CONFIG_TEXT_DECODER)            += textdec.o ass.o
-OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o
+OBJS-$(CONFIG_TEXT_ENCODER)            += srtenc.o ass_split.o
+OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o takdsp.o
 OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
 OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
 OBJS-$(CONFIG_TARGA_Y216_DECODER)      += targa_y216dec.o
@@ -482,7 +521,7 @@ OBJS-$(CONFIG_TSCC2_DECODER)           += tscc2.o
 OBJS-$(CONFIG_TTA_DECODER)             += tta.o ttadata.o ttadsp.o
 OBJS-$(CONFIG_TTA_ENCODER)             += ttaenc.o ttadata.o
 OBJS-$(CONFIG_TWINVQ_DECODER)          += twinvqdec.o twinvq.o
-OBJS-$(CONFIG_TXD_DECODER)             += txd.o s3tc.o
+OBJS-$(CONFIG_TXD_DECODER)             += txd.o
 OBJS-$(CONFIG_ULTI_DECODER)            += ulti.o
 OBJS-$(CONFIG_UTVIDEO_DECODER)         += utvideodec.o utvideo.o
 OBJS-$(CONFIG_UTVIDEO_ENCODER)         += utvideoenc.o utvideo.o
@@ -502,6 +541,9 @@ OBJS-$(CONFIG_VC1_DECODER)             += vc1dec.o vc1_block.o vc1_loopfilter.o
                                           vc1dsp.o \
                                           msmpeg4dec.o msmpeg4.o msmpeg4data.o \
                                           wmv2dsp.o
+OBJS-$(CONFIG_VC1_MMAL_DECODER)        += mmaldec.o
+OBJS-$(CONFIG_VC1_QSV_DECODER)         += qsvdec_vc1.o
+OBJS-$(CONFIG_VC2_ENCODER)             += vc2enc.o vc2enc_dwt.o diractab.o
 OBJS-$(CONFIG_VCR1_DECODER)            += vcr1.o
 OBJS-$(CONFIG_VMDAUDIO_DECODER)        += vmdaudio.o
 OBJS-$(CONFIG_VMDVIDEO_DECODER)        += vmdvideo.o
@@ -511,20 +553,18 @@ OBJS-$(CONFIG_VORBIS_DECODER)          += vorbisdec.o vorbisdsp.o vorbis.o \
 OBJS-$(CONFIG_VORBIS_ENCODER)          += vorbisenc.o vorbis.o \
                                           vorbis_data.o
 OBJS-$(CONFIG_VP3_DECODER)             += vp3.o
-OBJS-$(CONFIG_VP5_DECODER)             += vp5.o vp56.o vp56data.o vp56dsp.o \
-                                          vp56rac.o
-OBJS-$(CONFIG_VP6_DECODER)             += vp6.o vp56.o vp56data.o vp56dsp.o \
+OBJS-$(CONFIG_VP5_DECODER)             += vp5.o vp56.o vp56data.o vp56rac.o
+OBJS-$(CONFIG_VP6_DECODER)             += vp6.o vp56.o vp56data.o \
                                           vp6dsp.o vp56rac.o
-OBJS-$(CONFIG_VP7_DECODER)             += vp8.o vp8dsp.o vp56rac.o
-OBJS-$(CONFIG_VP8_DECODER)             += vp8.o vp8dsp.o vp56rac.o
+OBJS-$(CONFIG_VP7_DECODER)             += vp8.o vp56rac.o
+OBJS-$(CONFIG_VP8_DECODER)             += vp8.o vp56rac.o
 OBJS-$(CONFIG_VP9_DECODER)             += vp9.o vp9dsp.o vp56rac.o vp9dsp_8bpp.o \
                                           vp9dsp_10bpp.o vp9dsp_12bpp.o
 OBJS-$(CONFIG_VPLAYER_DECODER)         += textdec.o ass.o
 OBJS-$(CONFIG_VQA_DECODER)             += vqavideo.o
 OBJS-$(CONFIG_WAVPACK_DECODER)         += wavpack.o
 OBJS-$(CONFIG_WAVPACK_ENCODER)         += wavpackenc.o
-OBJS-$(CONFIG_WEBP_DECODER)            += vp8.o vp8dsp.o vp56rac.o
-OBJS-$(CONFIG_WEBP_DECODER)            += webp.o exif.o tiff_common.o
+OBJS-$(CONFIG_WEBP_DECODER)            += webp.o
 OBJS-$(CONFIG_WEBVTT_DECODER)          += webvttdec.o ass.o
 OBJS-$(CONFIG_WEBVTT_ENCODER)          += webvttenc.o ass_split.o
 OBJS-$(CONFIG_WMALOSSLESS_DECODER)     += wmalosslessdec.o wma_common.o
@@ -538,12 +578,13 @@ OBJS-$(CONFIG_WMAVOICE_DECODER)        += wmavoice.o \
                                           acelp_vectors.o acelp_filters.o
 OBJS-$(CONFIG_WMV1_DECODER)            += msmpeg4dec.o msmpeg4.o msmpeg4data.o
 OBJS-$(CONFIG_WMV1_ENCODER)            += msmpeg4enc.o
-OBJS-$(CONFIG_WMV2_DECODER)            += wmv2dec.o wmv2.o wmv2dsp.o \
+OBJS-$(CONFIG_WMV2_DECODER)            += wmv2dec.o wmv2.o \
                                           msmpeg4dec.o msmpeg4.o msmpeg4data.o
-OBJS-$(CONFIG_WMV2_ENCODER)            += wmv2enc.o wmv2.o wmv2dsp.o \
+OBJS-$(CONFIG_WMV2_ENCODER)            += wmv2enc.o wmv2.o \
                                           msmpeg4.o msmpeg4enc.o msmpeg4data.o
 OBJS-$(CONFIG_WNV1_DECODER)            += wnv1.o
 OBJS-$(CONFIG_WS_SND1_DECODER)         += ws-snd1.o
+OBJS-$(CONFIG_WRAPPED_AVFRAME_ENCODER) += wrapped_avframe.o
 OBJS-$(CONFIG_XAN_DPCM_DECODER)        += dpcm.o
 OBJS-$(CONFIG_XAN_WC3_DECODER)         += xan.o
 OBJS-$(CONFIG_XAN_WC4_DECODER)         += xxan.o
@@ -553,6 +594,8 @@ OBJS-$(CONFIG_XBM_ENCODER)             += xbmenc.o
 OBJS-$(CONFIG_XFACE_DECODER)           += xfacedec.o xface.o
 OBJS-$(CONFIG_XFACE_ENCODER)           += xfaceenc.o xface.o
 OBJS-$(CONFIG_XL_DECODER)              += xl.o
+OBJS-$(CONFIG_XMA1_DECODER)            += wmaprodec.o wma.o wma_common.o
+OBJS-$(CONFIG_XMA2_DECODER)            += wmaprodec.o wma.o wma_common.o
 OBJS-$(CONFIG_XSUB_DECODER)            += xsubdec.o
 OBJS-$(CONFIG_XSUB_ENCODER)            += xsubenc.o
 OBJS-$(CONFIG_XWD_DECODER)             += xwddec.o
@@ -630,6 +673,7 @@ OBJS-$(CONFIG_ADPCM_4XM_DECODER)          += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_ADX_DECODER)          += adxdec.o adx.o
 OBJS-$(CONFIG_ADPCM_ADX_ENCODER)          += adxenc.o adx.o
 OBJS-$(CONFIG_ADPCM_AFC_DECODER)          += adpcm.o adpcm_data.o
+OBJS-$(CONFIG_ADPCM_AICA_DECODER)         += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_CT_DECODER)           += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_DTK_DECODER)          += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_EA_DECODER)           += adpcm.o adpcm_data.o
@@ -660,6 +704,7 @@ OBJS-$(CONFIG_ADPCM_IMA_WAV_ENCODER)      += adpcmenc.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_IMA_WS_DECODER)       += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_MS_DECODER)           += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_MS_ENCODER)           += adpcmenc.o adpcm_data.o
+OBJS-$(CONFIG_ADPCM_PSX_DECODER)          += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_SBPRO_2_DECODER)      += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_SBPRO_3_DECODER)      += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_SBPRO_4_DECODER)      += adpcm.o adpcm_data.o
@@ -670,40 +715,50 @@ OBJS-$(CONFIG_ADPCM_VIMA_DECODER)         += vima.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_XA_DECODER)           += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_YAMAHA_DECODER)       += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_YAMAHA_ENCODER)       += adpcmenc.o adpcm_data.o
-OBJS-$(CONFIG_VIMA_DECODER)               += vima.o adpcm_data.o
 
 # hardware accelerators
 OBJS-$(CONFIG_D3D11VA)                    += dxva2.o
 OBJS-$(CONFIG_DXVA2)                      += dxva2.o
 OBJS-$(CONFIG_VAAPI)                      += vaapi.o
-OBJS-$(CONFIG_VDA)                        += vda.o
+OBJS-$(CONFIG_VDA)                        += vda.o videotoolbox.o
+OBJS-$(CONFIG_VIDEOTOOLBOX)               += videotoolbox.o
 OBJS-$(CONFIG_VDPAU)                      += vdpau.o
 
 OBJS-$(CONFIG_H263_VAAPI_HWACCEL)         += vaapi_mpeg4.o
-OBJS-$(CONFIG_H263_VDPAU_HWACCEL)         += vdpau_mpeg4.o
+OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
 OBJS-$(CONFIG_H264_D3D11VA_HWACCEL)       += dxva2_h264.o
 OBJS-$(CONFIG_H264_DXVA2_HWACCEL)         += dxva2_h264.o
 OBJS-$(CONFIG_H264_VAAPI_HWACCEL)         += vaapi_h264.o
 OBJS-$(CONFIG_H264_VDA_HWACCEL)           += vda_h264.o
 OBJS-$(CONFIG_H264_VDPAU_HWACCEL)         += vdpau_h264.o
+OBJS-$(CONFIG_H264_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
 OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL)       += dxva2_hevc.o
 OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
+OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o
+OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o
 OBJS-$(CONFIG_MPEG1_VDPAU_HWACCEL)        += vdpau_mpeg12.o
+OBJS-$(CONFIG_MPEG1_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
 OBJS-$(CONFIG_MPEG1_XVMC_HWACCEL)         += mpegvideo_xvmc.o
 OBJS-$(CONFIG_MPEG2_D3D11VA_HWACCEL)      += dxva2_mpeg2.o
 OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL)        += dxva2_mpeg2.o
 OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL)        += vaapi_mpeg2.o
 OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL)        += vdpau_mpeg12.o
+OBJS-$(CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
 OBJS-$(CONFIG_MPEG2_XVMC_HWACCEL)         += mpegvideo_xvmc.o
 OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL)        += vaapi_mpeg4.o
 OBJS-$(CONFIG_MPEG4_VDPAU_HWACCEL)        += vdpau_mpeg4.o
+OBJS-$(CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
 OBJS-$(CONFIG_VC1_D3D11VA_HWACCEL)        += dxva2_vc1.o
 OBJS-$(CONFIG_VC1_DXVA2_HWACCEL)          += dxva2_vc1.o
 OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)          += vaapi_vc1.o
 OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)          += vdpau_vc1.o
+OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)        += dxva2_vp9.o
+OBJS-$(CONFIG_VP9_DXVA2_HWACCEL)          += dxva2_vp9.o
+OBJS-$(CONFIG_VP9_VAAPI_HWACCEL)          += vaapi_vp9.o
 
 # libavformat dependencies
 OBJS-$(CONFIG_ADTS_MUXER)              += mpeg4audio.o
+OBJS-$(CONFIG_AVI_DEMUXER)             += mpeg4audio.o mpegaudiodata.o
 OBJS-$(CONFIG_CAF_DEMUXER)             += mpeg4audio.o mpegaudiodata.o  \
                                           ac3tab.o
 OBJS-$(CONFIG_FLAC_DEMUXER)            += flac.o flacdata.o vorbis_data.o
@@ -745,7 +800,6 @@ OBJS-$(CONFIG_WTV_DEMUXER)             += mpeg4audio.o mpegaudiodata.o
 OBJS-$(CONFIG_ELBG_FILTER)             += elbg.o
 
 # external codec libraries
-OBJS-$(CONFIG_LIBAACPLUS_ENCODER)         += libaacplus.o
 OBJS-$(CONFIG_LIBCELT_DECODER)            += libcelt_dec.o
 OBJS-$(CONFIG_LIBDCADEC_DECODER)          += libdcadec.o dca.o
 OBJS-$(CONFIG_LIBFAAC_ENCODER)            += libfaac.o
@@ -757,6 +811,7 @@ OBJS-$(CONFIG_LIBGSM_MS_DECODER)          += libgsmdec.o
 OBJS-$(CONFIG_LIBGSM_MS_ENCODER)          += libgsmenc.o
 OBJS-$(CONFIG_LIBILBC_DECODER)            += libilbc.o
 OBJS-$(CONFIG_LIBILBC_ENCODER)            += libilbc.o
+OBJS-$(CONFIG_LIBKVAZAAR_ENCODER)         += libkvazaar.o
 OBJS-$(CONFIG_LIBMP3LAME_ENCODER)         += libmp3lame.o mpegaudiodecheader.o
 OBJS-$(CONFIG_LIBOPENCORE_AMRNB_DECODER)  += libopencore-amr.o
 OBJS-$(CONFIG_LIBOPENCORE_AMRNB_ENCODER)  += libopencore-amr.o
@@ -775,12 +830,10 @@ OBJS-$(CONFIG_LIBSCHROEDINGER_ENCODER)    += libschroedingerenc.o \
 OBJS-$(CONFIG_LIBSHINE_ENCODER)           += libshine.o
 OBJS-$(CONFIG_LIBSPEEX_DECODER)           += libspeexdec.o
 OBJS-$(CONFIG_LIBSPEEX_ENCODER)           += libspeexenc.o
-OBJS-$(CONFIG_LIBSTAGEFRIGHT_H264_DECODER)+= libstagefright.o
 OBJS-$(CONFIG_LIBTHEORA_ENCODER)          += libtheoraenc.o
 OBJS-$(CONFIG_LIBTWOLAME_ENCODER)         += libtwolame.o
 OBJS-$(CONFIG_LIBUTVIDEO_DECODER)         += libutvideodec.o
 OBJS-$(CONFIG_LIBUTVIDEO_ENCODER)         += libutvideoenc.o
-OBJS-$(CONFIG_LIBVO_AACENC_ENCODER)       += libvo-aacenc.o mpeg4audio.o
 OBJS-$(CONFIG_LIBVO_AMRWBENC_ENCODER)     += libvo-amrwbenc.o
 OBJS-$(CONFIG_LIBVORBIS_DECODER)          += libvorbisdec.o
 OBJS-$(CONFIG_LIBVORBIS_ENCODER)          += libvorbisenc.o \
@@ -792,6 +845,7 @@ OBJS-$(CONFIG_LIBVPX_VP9_ENCODER)         += libvpxenc.o libvpx.o
 OBJS-$(CONFIG_LIBWAVPACK_ENCODER)         += libwavpackenc.o
 OBJS-$(CONFIG_LIBWEBP_ENCODER)            += libwebpenc_common.o libwebpenc.o
 OBJS-$(CONFIG_LIBWEBP_ANIM_ENCODER)       += libwebpenc_common.o libwebpenc_animencoder.o
+OBJS-$(CONFIG_LIBX262_ENCODER)            += libx264.o
 OBJS-$(CONFIG_LIBX264_ENCODER)            += libx264.o
 OBJS-$(CONFIG_LIBX265_ENCODER)            += libx265.o
 OBJS-$(CONFIG_LIBXAVS_ENCODER)            += libxavs.o
@@ -812,21 +866,23 @@ OBJS-$(CONFIG_DCA_PARSER)              += dca_parser.o dca.o
 OBJS-$(CONFIG_DIRAC_PARSER)            += dirac_parser.o
 OBJS-$(CONFIG_DNXHD_PARSER)            += dnxhd_parser.o
 OBJS-$(CONFIG_DPX_PARSER)              += dpx_parser.o
+OBJS-$(CONFIG_DVAUDIO_PARSER)          += dvaudio_parser.o
 OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsub_parser.o
 OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
 OBJS-$(CONFIG_DVDSUB_PARSER)           += dvdsub_parser.o
 OBJS-$(CONFIG_FLAC_PARSER)             += flac_parser.o flacdata.o flac.o \
                                           vorbis_data.o
+OBJS-$(CONFIG_G729_PARSER)             += g729_parser.o
 OBJS-$(CONFIG_GSM_PARSER)              += gsm_parser.o
 OBJS-$(CONFIG_H261_PARSER)             += h261_parser.o
 OBJS-$(CONFIG_H263_PARSER)             += h263_parser.o
 OBJS-$(CONFIG_H264_PARSER)             += h264_parser.o
-OBJS-$(CONFIG_HEVC_PARSER)             += hevc_parser.o
+OBJS-$(CONFIG_HEVC_PARSER)             += hevc_parser.o hevc_parse.o hevc_ps.o hevc_data.o
 OBJS-$(CONFIG_MJPEG_PARSER)            += mjpeg_parser.o
 OBJS-$(CONFIG_MLP_PARSER)              += mlp_parser.o mlp.o
 OBJS-$(CONFIG_MPEG4VIDEO_PARSER)       += mpeg4video_parser.o h263.o \
                                           mpeg4videodec.o mpeg4video.o \
-                                          ituh263dec.o h263dec.o
+                                          ituh263dec.o h263dec.o h263data.o
 OBJS-$(CONFIG_PNG_PARSER)              += png_parser.o
 OBJS-$(CONFIG_MPEGAUDIO_PARSER)        += mpegaudio_parser.o \
                                           mpegaudiodecheader.o mpegaudiodata.o
@@ -849,6 +905,7 @@ OBJS-$(CONFIG_AAC_ADTSTOASC_BSF)          += aac_adtstoasc_bsf.o aacadtsdec.o \
 OBJS-$(CONFIG_CHOMP_BSF)                  += chomp_bsf.o
 OBJS-$(CONFIG_DUMP_EXTRADATA_BSF)         += dump_extradata_bsf.o
 OBJS-$(CONFIG_H264_MP4TOANNEXB_BSF)       += h264_mp4toannexb_bsf.o
+OBJS-$(CONFIG_HEVC_MP4TOANNEXB_BSF)       += hevc_mp4toannexb_bsf.o
 OBJS-$(CONFIG_IMX_DUMP_HEADER_BSF)        += imx_dump_header_bsf.o
 OBJS-$(CONFIG_MJPEG2JPEG_BSF)             += mjpeg2jpeg_bsf.o
 OBJS-$(CONFIG_MJPEGA_DUMP_HEADER_BSF)     += mjpega_dump_header_bsf.o
@@ -871,33 +928,36 @@ SLIBOBJS-$(HAVE_GNU_WINDRES)           += avcodecres.o
 
 SKIPHEADERS                            += %_tablegen.h                  \
                                           %_tables.h                    \
-                                          aac_tablegen_decl.h           \
                                           fft-internal.h                \
-                                          libutvideo.h                  \
-                                          old_codec_ids.h               \
                                           tableprint.h                  \
                                           tableprint_vlc.h              \
+                                          aaccoder_twoloop.h            \
+                                          aaccoder_trellis.h            \
+                                          aacenc_quantization.h         \
+                                          aacenc_quantization_misc.h    \
                                           $(ARCH)/vp56_arith.h          \
 
 SKIPHEADERS-$(CONFIG_D3D11VA)          += d3d11va.h dxva2_internal.h
 SKIPHEADERS-$(CONFIG_DXVA2)            += dxva2.h dxva2_internal.h
 SKIPHEADERS-$(CONFIG_LIBSCHROEDINGER)  += libschroedinger.h
 SKIPHEADERS-$(CONFIG_LIBUTVIDEO)       += libutvideo.h
+SKIPHEADERS-$(CONFIG_LIBVPX)           += libvpx.h
+SKIPHEADERS-$(CONFIG_LIBWEBP_ENCODER)  += libwebpenc_common.h
 SKIPHEADERS-$(CONFIG_QSV)              += qsv.h qsv_internal.h
 SKIPHEADERS-$(CONFIG_QSVDEC)           += qsvdec.h
 SKIPHEADERS-$(CONFIG_QSVENC)           += qsvenc.h
 SKIPHEADERS-$(CONFIG_XVMC)             += xvmc.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_internal.h
-SKIPHEADERS-$(CONFIG_VDA)              += vda.h vda_internal.h
+SKIPHEADERS-$(CONFIG_VDA)              += vda.h vda_vt_internal.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
+SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vda_vt_internal.h
 
 TESTPROGS = imgconvert                                                  \
+            jpeg2000dwt                                                 \
             mathops                                                    \
             options                                                     \
             avfft                                                       \
 
-TESTPROGS += api-flac
-
 TESTPROGS-$(CONFIG_CABAC)                 += cabac
 TESTPROGS-$(CONFIG_FFT)                   += fft fft-fixed fft-fixed32
 TESTPROGS-$(CONFIG_IDCTDSP)               += dct
@@ -911,19 +971,18 @@ TESTOBJS = dctref.o
 
 TOOLS = fourcc2pixfmt
 
-HOSTPROGS = aac_tablegen                                                \
-            aacps_tablegen                                              \
-            aacsbr_tablegen                                             \
-            cabac_tablegen                                              \
+HOSTPROGS = aacps_tablegen                                              \
+            aacps_fixed_tablegen                                        \
             cbrt_tablegen                                               \
+            cbrt_fixed_tablegen                                         \
             cos_tablegen                                                \
-            dsd_tablegen                                                \
             dv_tablegen                                                 \
             motionpixels_tablegen                                       \
             mpegaudio_tablegen                                          \
             pcm_tablegen                                                \
             qdm2_tablegen                                               \
             sinewin_tablegen                                            \
+            sinewin_fixed_tablegen                                      \
 
 CLEANFILES = *_tables.c *_tables.h *_tablegen$(HOSTEXESUF)
 
@@ -942,8 +1001,9 @@ else
 $(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=0
 endif
 
-GEN_HEADERS = cabac_tables.h cbrt_tables.h aacps_tables.h aacsbr_tables.h aac_tables.h dsd_tables.h dv_tables.h     \
-              sinewin_tables.h mpegaudio_tables.h motionpixels_tables.h \
+GEN_HEADERS = cbrt_tables.h cbrt_fixed_tables.h aacps_tables.h aacps_fixed_tables.h \
+              dv_tables.h     \
+              sinewin_tables.h sinewin_fixed_tables.h mpegaudio_tables.h motionpixels_tables.h \
               pcm_tables.h qdm2_tables.h
 GEN_HEADERS := $(addprefix $(SUBDIR), $(GEN_HEADERS))
 
@@ -952,13 +1012,13 @@ $(GEN_HEADERS): $(SUBDIR)%_tables.h: $(SUBDIR)%_tablegen$(HOSTEXESUF)
 
 ifdef CONFIG_HARDCODED_TABLES
 $(SUBDIR)aacdec.o: $(SUBDIR)cbrt_tables.h
-$(SUBDIR)aacps.o: $(SUBDIR)aacps_tables.h
-$(SUBDIR)aacsbr.o: $(SUBDIR)aacsbr_tables.h
-$(SUBDIR)aactab.o: $(SUBDIR)aac_tables.h
-$(SUBDIR)cabac.o: $(SUBDIR)cabac_tables.h
-$(SUBDIR)dsddec.o: $(SUBDIR)dsd_tables.h
+$(SUBDIR)aacdec_fixed.o: $(SUBDIR)cbrt_fixed_tables.h
+$(SUBDIR)aacps_float.o: $(SUBDIR)aacps_tables.h
+$(SUBDIR)aacps_fixed.o: $(SUBDIR)aacps_fixed_tables.h
+$(SUBDIR)aactab_fixed.o: $(SUBDIR)aac_fixed_tables.h
 $(SUBDIR)dvenc.o: $(SUBDIR)dv_tables.h
 $(SUBDIR)sinewin.o: $(SUBDIR)sinewin_tables.h
+$(SUBDIR)sinewin_fixed.o: $(SUBDIR)sinewin_fixed_tables.h
 $(SUBDIR)mpegaudiodec_fixed.o: $(SUBDIR)mpegaudio_tables.h
 $(SUBDIR)mpegaudiodec_float.o: $(SUBDIR)mpegaudio_tables.h
 $(SUBDIR)motionpixels.o: $(SUBDIR)motionpixels_tables.h
diff --git a/libavcodec/a64multienc.c b/libavcodec/a64multienc.c
index f9c4f36e..91aac093 100644
--- a/libavcodec/a64multienc.c
+++ b/libavcodec/a64multienc.c
@@ -66,7 +66,8 @@ static const int mc_colors[5]={0x0,0xb,0xc,0xf,0x1};
 //static const int mc_colors[5]={0x0,0x8,0xa,0xf,0x7};
 //static const int mc_colors[5]={0x0,0x9,0x8,0xa,0x3};
 
-static void to_meta_with_crop(AVCodecContext *avctx, const AVFrame *p, int *dest)
+static void to_meta_with_crop(AVCodecContext *avctx,
+                              const AVFrame *p, int *dest)
 {
     int blockx, blocky, x, y;
     int luma = 0;
@@ -234,7 +235,7 @@ static av_cold int a64multi_encode_init(AVCodecContext *avctx)
     }
 
     /* set up extradata */
-    if (!(avctx->extradata = av_mallocz(8 * 4 + FF_INPUT_BUFFER_PADDING_SIZE))) {
+    if (!(avctx->extradata = av_mallocz(8 * 4 + AV_INPUT_BUFFER_PADDING_SIZE))) {
         av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory for extradata.\n");
         return AVERROR(ENOMEM);
     }
@@ -327,7 +328,7 @@ static int a64multi_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         /* any frames to encode? */
         if (c->mc_lifetime) {
             int alloc_size = charset_size + c->mc_lifetime*(screen_size + colram_size);
-            if ((ret = ff_alloc_packet2(avctx, pkt, alloc_size)) < 0)
+            if ((ret = ff_alloc_packet2(avctx, pkt, alloc_size, 0)) < 0)
                 return ret;
             buf = pkt->data;
 
@@ -405,7 +406,7 @@ AVCodec ff_a64multi_encoder = {
     .encode2        = a64multi_encode_frame,
     .close          = a64multi_close_encoder,
     .pix_fmts       = (const enum AVPixelFormat[]) {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY,
 };
 #endif
 #if CONFIG_A64MULTI5_ENCODER
@@ -419,6 +420,6 @@ AVCodec ff_a64multi5_encoder = {
     .encode2        = a64multi_encode_frame,
     .close          = a64multi_close_encoder,
     .pix_fmts       = (const enum AVPixelFormat[]) {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY,
 };
 #endif
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index 23ec085d..b1f4aa74 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -30,9 +30,14 @@
 #ifndef AVCODEC_AAC_H
 #define AVCODEC_AAC_H
 
+
+#include "aac_defines.h"
 #include "libavutil/float_dsp.h"
+#include "libavutil/fixed_dsp.h"
 #include "avcodec.h"
+#if !USE_FIXED
 #include "imdct15.h"
+#endif
 #include "fft.h"
 #include "mpeg4audio.h"
 #include "sbr.h"
@@ -45,6 +50,8 @@
 #define TNS_MAX_ORDER 20
 #define MAX_LTP_LONG_SFB 40
 
+#define CLIP_AVOIDANCE_FACTOR 0.95f
+
 enum RawDataBlockType {
     TYPE_SCE,
     TYPE_CPE,
@@ -76,9 +83,10 @@ enum BandType {
     ZERO_BT        = 0,     ///< Scalefactors and spectral data are all zero.
     FIRST_PAIR_BT  = 5,     ///< This and later band types encode two values (rather than four) with one code word.
     ESC_BT         = 11,    ///< Spectral data are coded with an escape sequence.
+    RESERVED_BT    = 12,    ///< Band types following are encoded differently from others.
     NOISE_BT       = 13,    ///< Spectral data are scaled white noise not coded in the bitstream.
-    INTENSITY_BT2  = 14,    ///< Scalefactor data are intensity stereo positions.
-    INTENSITY_BT   = 15,    ///< Scalefactor data are intensity stereo positions.
+    INTENSITY_BT2  = 14,    ///< Scalefactor data are intensity stereo positions (out of phase).
+    INTENSITY_BT   = 15,    ///< Scalefactor data are intensity stereo positions (in phase).
 };
 
 #define IS_CODEBOOK_UNSIGNED(x) (((x) - 1) & 10)
@@ -125,12 +133,14 @@ typedef struct OutputConfiguration {
  * Predictor State
  */
 typedef struct PredictorState {
-    float cor0;
-    float cor1;
-    float var0;
-    float var1;
-    float r0;
-    float r1;
+    AAC_FLOAT cor0;
+    AAC_FLOAT cor1;
+    AAC_FLOAT var0;
+    AAC_FLOAT var1;
+    AAC_FLOAT r0;
+    AAC_FLOAT r1;
+    AAC_FLOAT k1;
+    AAC_FLOAT x_est;
 } PredictorState;
 
 #define MAX_PREDICTORS 672
@@ -141,6 +151,8 @@ typedef struct PredictorState {
 #define SCALE_MAX_DIFF   60    ///< maximum scalefactor difference allowed by standard
 #define SCALE_DIFF_ZERO  60    ///< codebook index corresponding to zero scalefactor indices difference
 
+#define POW_SF2_ZERO    200    ///< ff_aac_pow2sf_tab index corresponding to pow(2, 0);
+
 #define NOISE_PRE       256    ///< preamble for NOISE_BT, put in bitstream with the first noise band
 #define NOISE_PRE_BITS    9    ///< length of preamble
 #define NOISE_OFFSET     90    ///< subtracted from global gain, used as offset for the preamble
@@ -151,7 +163,8 @@ typedef struct PredictorState {
 typedef struct LongTermPrediction {
     int8_t present;
     int16_t lag;
-    float coef;
+    int coef_idx;
+    INTFLOAT coef;
     int8_t used[MAX_LTP_LONG_SFB];
 } LongTermPrediction;
 
@@ -173,7 +186,10 @@ typedef struct IndividualChannelStream {
     int predictor_present;
     int predictor_initialized;
     int predictor_reset_group;
+    int predictor_reset_count[31];  ///< used by encoder to count prediction resets
     uint8_t prediction_used[41];
+    uint8_t window_clipping[8]; ///< set if a certain window is near clipping
+    float clip_avoidance_factor; ///< set if any window is near clipping to the necessary atennuation factor to avoid it
 } IndividualChannelStream;
 
 /**
@@ -185,7 +201,8 @@ typedef struct TemporalNoiseShaping {
     int length[8][4];
     int direction[8][4];
     int order[8][4];
-    float coef[8][4][TNS_MAX_ORDER];
+    int coef_idx[8][4][TNS_MAX_ORDER];
+    INTFLOAT coef[8][4][TNS_MAX_ORDER];
 } TemporalNoiseShaping;
 
 /**
@@ -222,7 +239,7 @@ typedef struct ChannelCoupling {
     int ch_select[8];      /**< [0] shared list of gains; [1] list of gains for right channel;
                             *   [2] list of gains for left channel; [3] lists of gains for both channels
                             */
-    float gain[16][120];
+    INTFLOAT gain[16][120];
 } ChannelCoupling;
 
 /**
@@ -233,17 +250,23 @@ typedef struct SingleChannelElement {
     TemporalNoiseShaping tns;
     Pulse pulse;
     enum BandType band_type[128];                   ///< band types
+    enum BandType band_alt[128];                    ///< alternative band type (used by encoder)
     int band_type_run_end[120];                     ///< band type run end points
-    float sf[120];                                  ///< scalefactors
+    INTFLOAT sf[120];                               ///< scalefactors
     int sf_idx[128];                                ///< scalefactor indices (used by encoder)
     uint8_t zeroes[128];                            ///< band is not coded (used by encoder)
-    DECLARE_ALIGNED(32, float,   pcoeffs)[1024];    ///< coefficients for IMDCT, pristine
-    DECLARE_ALIGNED(32, float,   coeffs)[1024];     ///< coefficients for IMDCT, maybe processed
-    DECLARE_ALIGNED(32, float,   saved)[1536];      ///< overlap
-    DECLARE_ALIGNED(32, float,   ret_buf)[2048];    ///< PCM output buffer
-    DECLARE_ALIGNED(16, float,   ltp_state)[3072];  ///< time signal for LTP
+    uint8_t can_pns[128];                           ///< band is allowed to PNS (informative)
+    float  is_ener[128];                            ///< Intensity stereo pos (used by encoder)
+    float pns_ener[128];                            ///< Noise energy values (used by encoder)
+    DECLARE_ALIGNED(32, INTFLOAT, pcoeffs)[1024];   ///< coefficients for IMDCT, pristine
+    DECLARE_ALIGNED(32, INTFLOAT, coeffs)[1024];    ///< coefficients for IMDCT, maybe processed
+    DECLARE_ALIGNED(32, INTFLOAT, saved)[1536];     ///< overlap
+    DECLARE_ALIGNED(32, INTFLOAT, ret_buf)[2048];   ///< PCM output buffer
+    DECLARE_ALIGNED(16, INTFLOAT, ltp_state)[3072]; ///< time signal for LTP
+    DECLARE_ALIGNED(32, AAC_FLOAT, lcoeffs)[1024];  ///< MDCT of LTP coefficients (used by encoder)
+    DECLARE_ALIGNED(32, AAC_FLOAT, prcoeffs)[1024]; ///< Main prediction coefs (used by encoder)
     PredictorState predictor_state[MAX_PREDICTORS];
-    float *ret;                                     ///< PCM output
+    INTFLOAT *ret;                                  ///< PCM output
 } SingleChannelElement;
 
 /**
@@ -254,7 +277,9 @@ typedef struct ChannelElement {
     // CPE specific
     int common_window;        ///< Set if channels share a common 'IndividualChannelStream' in bitstream.
     int     ms_mode;          ///< Signals mid/side stereo flags coding mode (used by encoder)
+    uint8_t is_mode;          ///< Set if any bands have been encoded using intensity stereo (used by encoder)
     uint8_t ms_mask[128];     ///< Set if mid/side stereo is used for each scalefactor window band
+    uint8_t is_mask[128];     ///< Set if intensity stereo is used (used by encoder)
     // shared
     SingleChannelElement ch[2];
     // CCE specific
@@ -288,7 +313,7 @@ struct AACContext {
      * (We do not want to have these on the stack.)
      * @{
      */
-    DECLARE_ALIGNED(32, float, buf_mdct)[1024];
+    DECLARE_ALIGNED(32, INTFLOAT, buf_mdct)[1024];
     /** @} */
 
     /**
@@ -299,8 +324,12 @@ struct AACContext {
     FFTContext mdct_small;
     FFTContext mdct_ld;
     FFTContext mdct_ltp;
+#if USE_FIXED
+    AVFixedDSPContext *fdsp;
+#else
     IMDCT15Context *mdct480;
     AVFloatDSPContext *fdsp;
+#endif /* USE_FIXED */
     int random_state;
     /** @} */
 
@@ -320,7 +349,7 @@ struct AACContext {
     int dmono_mode;      ///< 0->not dmono, 1->use first channel, 2->use second channel
     /** @} */
 
-    DECLARE_ALIGNED(32, float, temp)[128];
+    DECLARE_ALIGNED(32, INTFLOAT, temp)[128];
 
     OutputConfiguration oc[2];
     int warned_num_aac_frames;
@@ -328,11 +357,13 @@ struct AACContext {
     /* aacdec functions pointers */
     void (*imdct_and_windowing)(AACContext *ac, SingleChannelElement *sce);
     void (*apply_ltp)(AACContext *ac, SingleChannelElement *sce);
-    void (*apply_tns)(float coef[1024], TemporalNoiseShaping *tns,
+    void (*apply_tns)(INTFLOAT coef[1024], TemporalNoiseShaping *tns,
                       IndividualChannelStream *ics, int decode);
-    void (*windowing_and_mdct_ltp)(AACContext *ac, float *out,
-                                   float *in, IndividualChannelStream *ics);
+    void (*windowing_and_mdct_ltp)(AACContext *ac, INTFLOAT *out,
+                                   INTFLOAT *in, IndividualChannelStream *ics);
     void (*update_ltp)(AACContext *ac, SingleChannelElement *sce);
+    void (*vector_pow43)(int *coefs, int len);
+    void (*subband_scale)(int *dst, int *src, int scale, int offset, int len);
 
 };
 
diff --git a/libavcodec/aac_ac3_parser.c b/libavcodec/aac_ac3_parser.c
index 7fefda5c..2f7d5680 100644
--- a/libavcodec/aac_ac3_parser.c
+++ b/libavcodec/aac_ac3_parser.c
@@ -84,14 +84,6 @@ int ff_aac_ac3_parse(AVCodecParserContext *s1,
         avctx->sample_rate = s->sample_rate;
 
         /* (E-)AC-3: allow downmixing to stereo or mono */
-#if FF_API_REQUEST_CHANNELS
-FF_DISABLE_DEPRECATION_WARNINGS
-        if (avctx->request_channels == 1)
-            avctx->request_channel_layout = AV_CH_LAYOUT_MONO;
-        else if (avctx->request_channels == 2)
-            avctx->request_channel_layout = AV_CH_LAYOUT_STEREO;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
         if (s->channels > 1 &&
             avctx->request_channel_layout == AV_CH_LAYOUT_MONO) {
             avctx->channels       = 1;
diff --git a/libavcodec/aac_adtstoasc_bsf.c b/libavcodec/aac_adtstoasc_bsf.c
index 1f11d024..9c117c60 100644
--- a/libavcodec/aac_adtstoasc_bsf.c
+++ b/libavcodec/aac_adtstoasc_bsf.c
@@ -89,7 +89,7 @@ static int aac_adtstoasc_filter(AVBitStreamFilterContext *bsfc,
         }
         av_free(avctx->extradata);
         avctx->extradata_size = 2 + pce_size;
-        avctx->extradata = av_mallocz(avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+        avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!avctx->extradata) {
             avctx->extradata_size = 0;
             return AVERROR(ENOMEM);
diff --git a/libavcodec/aac_defines.h b/libavcodec/aac_defines.h
new file mode 100644
index 00000000..eff63b34
--- /dev/null
+++ b/libavcodec/aac_defines.h
@@ -0,0 +1,114 @@
+/*
+ * AAC defines
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_DEFINES_H
+#define AVCODEC_AAC_DEFINES_H
+
+#ifndef USE_FIXED
+#define USE_FIXED 0
+#endif
+
+#if USE_FIXED
+
+#include "libavutil/softfloat.h"
+
+#define FFT_FLOAT    0
+#define FFT_FIXED_32 1
+
+#define AAC_RENAME(x)       x ## _fixed
+#define AAC_RENAME_32(x)    x ## _fixed_32
+typedef int                 INTFLOAT;
+typedef int64_t             INT64FLOAT;
+typedef int16_t             SHORTFLOAT;
+typedef SoftFloat           AAC_FLOAT;
+typedef int                 AAC_SIGNE;
+#define FIXR(a)             ((int)((a) * 1 + 0.5))
+#define FIXR10(a)           ((int)((a) * 1024.0 + 0.5))
+#define Q23(a)              (int)((a) * 8388608.0 + 0.5)
+#define Q30(x)              (int)((x)*1073741824.0 + 0.5)
+#define Q31(x)              (int)((x)*2147483648.0 + 0.5)
+#define RANGE15(x)          x
+#define GET_GAIN(x, y)      (-(y) << (x)) + 1024
+#define AAC_MUL16(x, y)     (int)(((int64_t)(x) * (y) + 0x8000) >> 16)
+#define AAC_MUL26(x, y)     (int)(((int64_t)(x) * (y) + 0x2000000) >> 26)
+#define AAC_MUL30(x, y)     (int)(((int64_t)(x) * (y) + 0x20000000) >> 30)
+#define AAC_MUL31(x, y)     (int)(((int64_t)(x) * (y) + 0x40000000) >> 31)
+#define AAC_MADD28(x, y, a, b) (int)((((int64_t)(x) * (y)) + \
+                                      ((int64_t)(a) * (b)) + \
+                                        0x8000000) >> 28)
+#define AAC_MADD30(x, y, a, b) (int)((((int64_t)(x) * (y)) + \
+                                      ((int64_t)(a) * (b)) + \
+                                        0x20000000) >> 30)
+#define AAC_MADD30_V8(x, y, a, b, c, d, e, f) (int)((((int64_t)(x) * (y)) + \
+                                                     ((int64_t)(a) * (b)) + \
+                                                     ((int64_t)(c) * (d)) + \
+                                                     ((int64_t)(e) * (f)) + \
+                                                       0x20000000) >> 30)
+#define AAC_MSUB30(x, y, a, b) (int)((((int64_t)(x) * (y)) - \
+                                      ((int64_t)(a) * (b)) + \
+                                        0x20000000) >> 30)
+#define AAC_MSUB30_V8(x, y, a, b, c, d, e, f) (int)((((int64_t)(x) * (y)) + \
+                                                     ((int64_t)(a) * (b)) - \
+                                                     ((int64_t)(c) * (d)) - \
+                                                     ((int64_t)(e) * (f)) + \
+                                                       0x20000000) >> 30)
+#define AAC_MSUB31_V3(x, y, z)    (int)((((int64_t)(x) * (z)) - \
+                                      ((int64_t)(y) * (z)) + \
+                                        0x40000000) >> 31)
+#define AAC_HALF_SUM(x, y)  (x) >> 1 + (y) >> 1
+#define AAC_SRA_R(x, y)     (int)(((x) + (1 << ((y) - 1))) >> (y))
+
+#else
+
+#define FFT_FLOAT    1
+#define FFT_FIXED_32 0
+
+#define AAC_RENAME(x)       x
+#define AAC_RENAME_32(x)    x
+typedef float               INTFLOAT;
+typedef float               INT64FLOAT;
+typedef float               SHORTFLOAT;
+typedef float               AAC_FLOAT;
+typedef unsigned            AAC_SIGNE;
+#define FIXR(x)             ((float)(x))
+#define FIXR10(x)           ((float)(x))
+#define Q23(x)              x
+#define Q30(x)              x
+#define Q31(x)              x
+#define RANGE15(x)          (32768.0 * (x))
+#define GET_GAIN(x, y)      powf((x), -(y))
+#define AAC_MUL16(x, y)     ((x) * (y))
+#define AAC_MUL26(x, y)     ((x) * (y))
+#define AAC_MUL30(x, y)     ((x) * (y))
+#define AAC_MUL31(x, y)     ((x) * (y))
+#define AAC_MADD28(x, y, a, b) ((x) * (y) + (a) * (b))
+#define AAC_MADD30(x, y, a, b) ((x) * (y) + (a) * (b))
+#define AAC_MADD30_V8(x, y, a, b, c, d, e, f) ((x) * (y) + (a) * (b) + \
+                                               (c) * (d) + (e) * (f))
+#define AAC_MSUB30(x, y, a, b) ((x) * (y) - (a) * (b))
+#define AAC_MSUB30_V8(x, y, a, b, c, d, e, f) ((x) * (y) + (a) * (b) - \
+                                               (c) * (d) - (e) * (f))
+#define AAC_MSUB31_V3(x, y, z)    ((x) - (y)) * (z)
+#define AAC_HALF_SUM(x, y)  ((x) + (y)) * 0.5f
+#define AAC_SRA_R(x, y)     (x)
+
+#endif /* USE_FIXED */
+
+#endif /* AVCODEC_AAC_DEFINES_H */
diff --git a/libavcodec/aac_parser.c b/libavcodec/aac_parser.c
index cb93ba94..0b868edc 100644
--- a/libavcodec/aac_parser.c
+++ b/libavcodec/aac_parser.c
@@ -34,7 +34,7 @@ static int aac_sync(uint64_t state, AACAC3ParseContext *hdr_info,
     int size;
     union {
         uint64_t u64;
-        uint8_t  u8[8 + FF_INPUT_BUFFER_PADDING_SIZE];
+        uint8_t  u8[8 + AV_INPUT_BUFFER_PADDING_SIZE];
     } tmp;
 
     tmp.u64 = av_be2ne64(state);
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 2929f3ac..bca1f597 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -33,296 +33,34 @@
 #include "libavutil/libm.h" // brought forward to work around cygwin header breakage
 
 #include <float.h>
+
 #include "libavutil/mathematics.h"
+#include "mathops.h"
 #include "avcodec.h"
 #include "put_bits.h"
 #include "aac.h"
 #include "aacenc.h"
 #include "aactab.h"
+#include "aacenctab.h"
+#include "aacenc_utils.h"
+#include "aacenc_quantization.h"
 
-/** Frequency in Hz for lower limit of noise substitution **/
-#define NOISE_LOW_LIMIT 4000
-
-/** Total number of usable codebooks **/
-#define CB_TOT 13
-
-/** bits needed to code codebook run value for long windows */
-static const uint8_t run_value_bits_long[64] = {
-     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
-};
-
-/** bits needed to code codebook run value for short windows */
-static const uint8_t run_value_bits_short[16] = {
-    3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
-};
+#include "aacenc_is.h"
+#include "aacenc_tns.h"
+#include "aacenc_ltp.h"
+#include "aacenc_pred.h"
 
-static const uint8_t * const run_value_bits[2] = {
-    run_value_bits_long, run_value_bits_short
-};
-
-/** Map to convert values from BandCodingPath index to a codebook index **/
-static const uint8_t aac_cb_out_map[CB_TOT]  = {0,1,2,3,4,5,6,7,8,9,10,11,13};
-/** Inverse map to convert from codebooks to BandCodingPath indices **/
-static const uint8_t aac_cb_in_map[CB_TOT+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,12};
-
-/**
- * Quantize one coefficient.
- * @return absolute value of the quantized coefficient
- * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
- */
-static av_always_inline int quant(float coef, const float Q)
-{
-    float a = coef * Q;
-    return sqrtf(a * sqrtf(a)) + 0.4054;
-}
+#include "libavcodec/aaccoder_twoloop.h"
 
-static void quantize_bands(int *out, const float *in, const float *scaled,
-                           int size, float Q34, int is_signed, int maxval)
-{
-    int i;
-    double qc;
-    for (i = 0; i < size; i++) {
-        qc = scaled[i] * Q34;
-        out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
-        if (is_signed && in[i] < 0.0f) {
-            out[i] = -out[i];
-        }
-    }
-}
+/* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
+ * beyond which no PNS is used (since the SFBs contain tone rather than noise) */
+#define NOISE_SPREAD_THRESHOLD 0.9f
 
-static void abs_pow34_v(float *out, const float *in, const int size)
-{
-#ifndef USE_REALLY_FULL_SEARCH
-    int i;
-    for (i = 0; i < size; i++) {
-        float a = fabsf(in[i]);
-        out[i] = sqrtf(a * sqrtf(a));
-    }
-#endif /* USE_REALLY_FULL_SEARCH */
-}
+/* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to
+ * replace low energy non zero bands */
+#define NOISE_LAMBDA_REPLACE 1.948f
 
-static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
-static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
-
-/**
- * Calculate rate distortion cost for quantizing with given codebook
- *
- * @return quantization distortion
- */
-static av_always_inline float quantize_and_encode_band_cost_template(
-                                struct AACEncContext *s,
-                                PutBitContext *pb, const float *in,
-                                const float *scaled, int size, int scale_idx,
-                                int cb, const float lambda, const float uplim,
-                                int *bits, int BT_ZERO, int BT_UNSIGNED,
-                                int BT_PAIR, int BT_ESC, int BT_NOISE)
-{
-    const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512;
-    const float Q   = ff_aac_pow2sf_tab [q_idx];
-    const float Q34 = ff_aac_pow34sf_tab[q_idx];
-    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
-    const float CLIPPED_ESCAPE = 165140.0f*IQ;
-    int i, j;
-    float cost = 0;
-    const int dim = BT_PAIR ? 2 : 4;
-    int resbits = 0;
-    int off;
-
-    if (BT_ZERO) {
-        for (i = 0; i < size; i++)
-            cost += in[i]*in[i];
-        if (bits)
-            *bits = 0;
-        return cost * lambda;
-    }
-    if (BT_NOISE) {
-        for (i = 0; i < size; i++)
-            cost += in[i]*in[i];
-        if (bits)
-            *bits = 0;
-        return cost * lambda;
-    }
-    if (!scaled) {
-        abs_pow34_v(s->scoefs, in, size);
-        scaled = s->scoefs;
-    }
-    quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, aac_cb_maxval[cb]);
-    if (BT_UNSIGNED) {
-        off = 0;
-    } else {
-        off = aac_cb_maxval[cb];
-    }
-    for (i = 0; i < size; i += dim) {
-        const float *vec;
-        int *quants = s->qcoefs + i;
-        int curidx = 0;
-        int curbits;
-        float rd = 0.0f;
-        for (j = 0; j < dim; j++) {
-            curidx *= aac_cb_range[cb];
-            curidx += quants[j] + off;
-        }
-        curbits =  ff_aac_spectral_bits[cb-1][curidx];
-        vec     = &ff_aac_codebook_vectors[cb-1][curidx*dim];
-        if (BT_UNSIGNED) {
-            for (j = 0; j < dim; j++) {
-                float t = fabsf(in[i+j]);
-                float di;
-                if (BT_ESC && vec[j] == 64.0f) { //FIXME: slow
-                    if (t >= CLIPPED_ESCAPE) {
-                        di = t - CLIPPED_ESCAPE;
-                        curbits += 21;
-                    } else {
-                        int c = av_clip_uintp2(quant(t, Q), 13);
-                        di = t - c*cbrtf(c)*IQ;
-                        curbits += av_log2(c)*2 - 4 + 1;
-                    }
-                } else {
-                    di = t - vec[j]*IQ;
-                }
-                if (vec[j] != 0.0f)
-                    curbits++;
-                rd += di*di;
-            }
-        } else {
-            for (j = 0; j < dim; j++) {
-                float di = in[i+j] - vec[j]*IQ;
-                rd += di*di;
-            }
-        }
-        cost    += rd * lambda + curbits;
-        resbits += curbits;
-        if (cost >= uplim)
-            return uplim;
-        if (pb) {
-            put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
-            if (BT_UNSIGNED)
-                for (j = 0; j < dim; j++)
-                    if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
-                        put_bits(pb, 1, in[i+j] < 0.0f);
-            if (BT_ESC) {
-                for (j = 0; j < 2; j++) {
-                    if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
-                        int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q), 13);
-                        int len = av_log2(coef);
-
-                        put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
-                        put_sbits(pb, len, coef);
-                    }
-                }
-            }
-        }
-    }
-
-    if (bits)
-        *bits = resbits;
-    return cost;
-}
-
-static float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitContext *pb,
-                                                const float *in, const float *scaled,
-                                                int size, int scale_idx, int cb,
-                                                const float lambda, const float uplim,
-                                                int *bits) {
-    av_assert0(0);
-    return 0.0f;
-}
-
-#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE) \
-static float quantize_and_encode_band_cost_ ## NAME(                                    \
-                                struct AACEncContext *s,                                \
-                                PutBitContext *pb, const float *in,                     \
-                                const float *scaled, int size, int scale_idx,           \
-                                int cb, const float lambda, const float uplim,          \
-                                int *bits) {                                            \
-    return quantize_and_encode_band_cost_template(                                      \
-                                s, pb, in, scaled, size, scale_idx,                     \
-                                BT_ESC ? ESC_BT : cb, lambda, uplim, bits,              \
-                                BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE);       \
-}
-
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO,  1, 0, 0, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC,   0, 1, 1, 1, 0)
-QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1)
-
-static float (*const quantize_and_encode_band_cost_arr[])(
-                                struct AACEncContext *s,
-                                PutBitContext *pb, const float *in,
-                                const float *scaled, int size, int scale_idx,
-                                int cb, const float lambda, const float uplim,
-                                int *bits) = {
-    quantize_and_encode_band_cost_ZERO,
-    quantize_and_encode_band_cost_SQUAD,
-    quantize_and_encode_band_cost_SQUAD,
-    quantize_and_encode_band_cost_UQUAD,
-    quantize_and_encode_band_cost_UQUAD,
-    quantize_and_encode_band_cost_SPAIR,
-    quantize_and_encode_band_cost_SPAIR,
-    quantize_and_encode_band_cost_UPAIR,
-    quantize_and_encode_band_cost_UPAIR,
-    quantize_and_encode_band_cost_UPAIR,
-    quantize_and_encode_band_cost_UPAIR,
-    quantize_and_encode_band_cost_ESC,
-    quantize_and_encode_band_cost_NONE,     /* CB 12 doesn't exist */
-    quantize_and_encode_band_cost_NOISE,
-};
-
-#define quantize_and_encode_band_cost(                                  \
-                                s, pb, in, scaled, size, scale_idx, cb, \
-                                lambda, uplim, bits)                    \
-    quantize_and_encode_band_cost_arr[cb](                              \
-                                s, pb, in, scaled, size, scale_idx, cb, \
-                                lambda, uplim, bits)
-
-static float quantize_band_cost(struct AACEncContext *s, const float *in,
-                                const float *scaled, int size, int scale_idx,
-                                int cb, const float lambda, const float uplim,
-                                int *bits)
-{
-    return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
-                                         cb, lambda, uplim, bits);
-}
-
-static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
-                                     const float *in, int size, int scale_idx,
-                                     int cb, const float lambda)
-{
-    quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
-                                  INFINITY, NULL);
-}
-
-static float find_max_val(int group_len, int swb_size, const float *scaled) {
-    float maxval = 0.0f;
-    int w2, i;
-    for (w2 = 0; w2 < group_len; w2++) {
-        for (i = 0; i < swb_size; i++) {
-            maxval = FFMAX(maxval, scaled[w2*128+i]);
-        }
-    }
-    return maxval;
-}
-
-static int find_min_book(float maxval, int sf) {
-    float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
-    float Q34 = sqrtf(Q * sqrtf(Q));
-    int qmaxval, cb;
-    qmaxval = maxval * Q34 + 0.4054f;
-    if      (qmaxval ==  0) cb = 0;
-    else if (qmaxval ==  1) cb = 1;
-    else if (qmaxval ==  2) cb = 3;
-    else if (qmaxval <=  4) cb = 5;
-    else if (qmaxval <=  7) cb = 7;
-    else if (qmaxval <= 12) cb = 9;
-    else                    cb = 11;
-    return cb;
-}
+#include "libavcodec/aaccoder_trellis.h"
 
 /**
  * structure used in optimal codebook search
@@ -339,7 +77,7 @@ typedef struct BandCodingPath {
 static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
                                      int win, int group_len, const float lambda)
 {
-    BandCodingPath path[120][CB_TOT];
+    BandCodingPath path[120][CB_TOT_ALL];
     int w, swb, cb, start, size;
     int i, j;
     const int max_sfb  = sce->ics.max_sfb;
@@ -352,7 +90,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
 
     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
     start = win*128;
-    for (cb = 0; cb < CB_TOT; cb++) {
+    for (cb = 0; cb < CB_TOT_ALL; cb++) {
         path[0][cb].cost     = 0.0f;
         path[0][cb].prev_idx = -1;
         path[0][cb].run      = 0;
@@ -360,7 +98,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
     for (swb = 0; swb < max_sfb; swb++) {
         size = sce->ics.swb_sizes[swb];
         if (sce->zeroes[win*16 + swb]) {
-            for (cb = 0; cb < CB_TOT; cb++) {
+            for (cb = 0; cb < CB_TOT_ALL; cb++) {
                 path[swb+1][cb].prev_idx = cb;
                 path[swb+1][cb].cost     = path[swb][cb].cost;
                 path[swb+1][cb].run      = path[swb][cb].run + 1;
@@ -370,15 +108,22 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
             int mincb = next_mincb;
             next_minrd = INFINITY;
             next_mincb = 0;
-            for (cb = 0; cb < CB_TOT; cb++) {
+            for (cb = 0; cb < CB_TOT_ALL; cb++) {
                 float cost_stay_here, cost_get_here;
                 float rd = 0.0f;
+                if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] ||
+                    cb  < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) {
+                    path[swb+1][cb].prev_idx = -1;
+                    path[swb+1][cb].cost     = INFINITY;
+                    path[swb+1][cb].run      = path[swb][cb].run + 1;
+                    continue;
+                }
                 for (w = 0; w < group_len; w++) {
                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
-                    rd += quantize_band_cost(s, sce->coeffs + start + w*128,
-                                             s->scoefs + start + w*128, size,
+                    rd += quantize_band_cost(s, &sce->coeffs[start + w*128],
+                                             &s->scoefs[start + w*128], size,
                                              sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
-                                             lambda / band->threshold, INFINITY, NULL);
+                                             lambda / band->threshold, INFINITY, NULL, NULL, 0);
                 }
                 cost_stay_here = path[swb][cb].cost + rd;
                 cost_get_here  = minrd              + rd + run_bits + 4;
@@ -406,138 +151,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce
     //convert resulting path from backward-linked list
     stack_len = 0;
     idx       = 0;
-    for (cb = 1; cb < CB_TOT; cb++)
-        if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
-            idx = cb;
-    ppos = max_sfb;
-    while (ppos > 0) {
-        cb = idx;
-        stackrun[stack_len] = path[ppos][cb].run;
-        stackcb [stack_len] = cb;
-        idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
-        ppos -= path[ppos][cb].run;
-        stack_len++;
-    }
-    //perform actual band info encoding
-    start = 0;
-    for (i = stack_len - 1; i >= 0; i--) {
-        cb = aac_cb_out_map[stackcb[i]];
-        put_bits(&s->pb, 4, cb);
-        count = stackrun[i];
-        memset(sce->zeroes + win*16 + start, !cb, count);
-        //XXX: memset when band_type is also uint8_t
-        for (j = 0; j < count; j++) {
-            sce->band_type[win*16 + start] = cb;
-            start++;
-        }
-        while (count >= run_esc) {
-            put_bits(&s->pb, run_bits, run_esc);
-            count -= run_esc;
-        }
-        put_bits(&s->pb, run_bits, count);
-    }
-}
-
-static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
-                                  int win, int group_len, const float lambda)
-{
-    BandCodingPath path[120][CB_TOT];
-    int w, swb, cb, start, size;
-    int i, j;
-    const int max_sfb  = sce->ics.max_sfb;
-    const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
-    const int run_esc  = (1 << run_bits) - 1;
-    int idx, ppos, count;
-    int stackrun[120], stackcb[120], stack_len;
-    float next_minbits = INFINITY;
-    int next_mincb = 0;
-
-    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
-    start = win*128;
-    for (cb = 0; cb < CB_TOT; cb++) {
-        path[0][cb].cost     = run_bits+4;
-        path[0][cb].prev_idx = -1;
-        path[0][cb].run      = 0;
-    }
-    for (swb = 0; swb < max_sfb; swb++) {
-        size = sce->ics.swb_sizes[swb];
-        if (sce->zeroes[win*16 + swb]) {
-            float cost_stay_here = path[swb][0].cost;
-            float cost_get_here  = next_minbits + run_bits + 4;
-            if (   run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
-                != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
-                cost_stay_here += run_bits;
-            if (cost_get_here < cost_stay_here) {
-                path[swb+1][0].prev_idx = next_mincb;
-                path[swb+1][0].cost     = cost_get_here;
-                path[swb+1][0].run      = 1;
-            } else {
-                path[swb+1][0].prev_idx = 0;
-                path[swb+1][0].cost     = cost_stay_here;
-                path[swb+1][0].run      = path[swb][0].run + 1;
-            }
-            next_minbits = path[swb+1][0].cost;
-            next_mincb = 0;
-            for (cb = 1; cb < CB_TOT; cb++) {
-                path[swb+1][cb].cost = 61450;
-                path[swb+1][cb].prev_idx = -1;
-                path[swb+1][cb].run = 0;
-            }
-        } else {
-            float minbits = next_minbits;
-            int mincb = next_mincb;
-            int startcb = sce->band_type[win*16+swb];
-            startcb = aac_cb_in_map[startcb];
-            next_minbits = INFINITY;
-            next_mincb = 0;
-            for (cb = 0; cb < startcb; cb++) {
-                path[swb+1][cb].cost = 61450;
-                path[swb+1][cb].prev_idx = -1;
-                path[swb+1][cb].run = 0;
-            }
-            for (cb = startcb; cb < CB_TOT; cb++) {
-                float cost_stay_here, cost_get_here;
-                float bits = 0.0f;
-                if (cb == 12 && sce->band_type[win*16+swb] != NOISE_BT) {
-                    path[swb+1][cb].cost = 61450;
-                    path[swb+1][cb].prev_idx = -1;
-                    path[swb+1][cb].run = 0;
-                    continue;
-                }
-                for (w = 0; w < group_len; w++) {
-                    bits += quantize_band_cost(s, sce->coeffs + start + w*128,
-                                               s->scoefs + start + w*128, size,
-                                               sce->sf_idx[(win+w)*16+swb],
-                                               aac_cb_out_map[cb],
-                                               0, INFINITY, NULL);
-                }
-                cost_stay_here = path[swb][cb].cost + bits;
-                cost_get_here  = minbits            + bits + run_bits + 4;
-                if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
-                    != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
-                    cost_stay_here += run_bits;
-                if (cost_get_here < cost_stay_here) {
-                    path[swb+1][cb].prev_idx = mincb;
-                    path[swb+1][cb].cost     = cost_get_here;
-                    path[swb+1][cb].run      = 1;
-                } else {
-                    path[swb+1][cb].prev_idx = cb;
-                    path[swb+1][cb].cost     = cost_stay_here;
-                    path[swb+1][cb].run      = path[swb][cb].run + 1;
-                }
-                if (path[swb+1][cb].cost < next_minbits) {
-                    next_minbits = path[swb+1][cb].cost;
-                    next_mincb = cb;
-                }
-            }
-        }
-        start += sce->ics.swb_sizes[swb];
-    }
-
-    //convert resulting path from backward-linked list
-    stack_len = 0;
-    idx       = 0;
-    for (cb = 1; cb < CB_TOT; cb++)
+    for (cb = 1; cb < CB_TOT_ALL; cb++)
         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
             idx = cb;
     ppos = max_sfb;
@@ -570,15 +184,6 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
     }
 }
 
-/** Return the minimum scalefactor where the quantized coef does not clip. */
-static av_always_inline uint8_t coef2minsf(float coef) {
-    return av_clip_uint8(log2f(coef)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
-}
-
-/** Return the maximum scalefactor where the quantized coef is not zero. */
-static av_always_inline uint8_t coef2maxsf(float coef) {
-    return av_clip_uint8(log2f(coef)*4 +  6 + SCALE_ONE_POS - SCALE_DIV_512);
-}
 
 typedef struct TrellisPath {
     float cost;
@@ -588,6 +193,45 @@ typedef struct TrellisPath {
 #define TRELLIS_STAGES 121
 #define TRELLIS_STATES (SCALE_MAX_DIFF+1)
 
+static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce)
+{
+    int w, g;
+    int prevscaler_n = -255, prevscaler_i = 0;
+    int bands = 0;
+
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            if (sce->zeroes[w*16+g])
+                continue;
+            if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
+                sce->sf_idx[w*16+g] = av_clip(roundf(log2f(sce->is_ener[w*16+g])*2), -155, 100);
+                bands++;
+            } else if (sce->band_type[w*16+g] == NOISE_BT) {
+                sce->sf_idx[w*16+g] = av_clip(3+ceilf(log2f(sce->pns_ener[w*16+g])*2), -100, 155);
+                if (prevscaler_n == -255)
+                    prevscaler_n = sce->sf_idx[w*16+g];
+                bands++;
+            }
+        }
+    }
+
+    if (!bands)
+        return;
+
+    /* Clip the scalefactor indices */
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            if (sce->zeroes[w*16+g])
+                continue;
+            if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
+                sce->sf_idx[w*16+g] = prevscaler_i = av_clip(sce->sf_idx[w*16+g], prevscaler_i - SCALE_MAX_DIFF, prevscaler_i + SCALE_MAX_DIFF);
+            } else if (sce->band_type[w*16+g] == NOISE_BT) {
+                sce->sf_idx[w*16+g] = prevscaler_n = av_clip(sce->sf_idx[w*16+g], prevscaler_n - SCALE_MAX_DIFF, prevscaler_n + SCALE_MAX_DIFF);
+            }
+        }
+    }
+}
+
 static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
                                        SingleChannelElement *sce,
                                        const float lambda)
@@ -619,9 +263,9 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
     }
 
     //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
-    q0 = coef2minsf(q0f);
+    q0 = av_clip(coef2minsf(q0f), 0, SCALE_MAX_POS-1);
     //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
-    q1 = coef2maxsf(q1f);
+    q1 = av_clip(coef2maxsf(q1f), 1, SCALE_MAX_POS);
     if (q1 - q0 > 60) {
         int q0low  = q0;
         int q1high = q1;
@@ -637,6 +281,12 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
             q1  = q1high;
         }
     }
+    // q0 == q1 isn't really a legal situation
+    if (q0 == q1) {
+        // the following is indirect but guarantees q1 != q0 && q1 near q0
+        q1 = av_clip(q0+1, 1, SCALE_MAX_POS);
+        q0 = av_clip(q1-1, 0, SCALE_MAX_POS - 1);
+    }
 
     for (i = 0; i < TRELLIS_STATES; i++) {
         paths[0][i].cost    = 0.0f;
@@ -653,7 +303,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
         start = w*128;
         for (g = 0; g < sce->ics.num_swb; g++) {
-            const float *coefs = sce->coeffs + start;
+            const float *coefs = &sce->coeffs[start];
             float qmin, qmax;
             int nz = 0;
 
@@ -685,6 +335,10 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
                 maxscale = coef2maxsf(qmax);
                 minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
                 maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
+                if (minscale == maxscale) {
+                    maxscale = av_clip(minscale+1, 1, TRELLIS_STATES);
+                    minscale = av_clip(maxscale-1, 0, TRELLIS_STATES - 1);
+                }
                 maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
                 for (q = minscale; q < maxscale; q++) {
                     float dist = 0;
@@ -692,7 +346,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                         FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
                         dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
-                                                   q + q0, cb, lambda / band->threshold, INFINITY, NULL);
+                                                   q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0);
                     }
                     minrd = FFMIN(minrd, dist);
 
@@ -728,7 +382,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
     }
     while (idx) {
         sce->sf_idx[bandaddr[idx]] = minq + q0;
-        minq = paths[idx][minq].prev;
+        minq = FFMAX(paths[idx][minq].prev, 0);
         idx--;
     }
     //set the same quantizers inside window groups
@@ -738,463 +392,460 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
                 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
 }
 
-/**
- * two-loop quantizers search taken from ISO 13818-7 Appendix C
- */
-static void search_for_quantizers_twoloop(AVCodecContext *avctx,
-                                          AACEncContext *s,
-                                          SingleChannelElement *sce,
-                                          const float lambda)
+static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
+                                       SingleChannelElement *sce,
+                                       const float lambda)
 {
-    int start = 0, i, w, w2, g;
-    int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);
-    const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f;
-    float dists[128] = { 0 }, uplims[128] = { 0 };
-    float maxvals[128];
-    int noise_sf[128] = { 0 };
-    int fflag, minscaler, minscaler_n;
-    int its  = 0;
-    int allz = 0;
-    float minthr = INFINITY;
+    int i, w, w2, g;
+    int minq = 255;
 
-    // for values above this the decoder might end up in an endless loop
-    // due to always having more bits than what can be encoded.
-    destbits = FFMIN(destbits, 5800);
-    //XXX: some heuristic to determine initial quantizers will reduce search time
-    //determine zero bands and upper limits
+    memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-        start = 0;
-        for (g = 0;  g < sce->ics.num_swb; g++) {
-            int nz = 0;
-            float uplim = 0.0f, energy = 0.0f;
+        for (g = 0; g < sce->ics.num_swb; g++) {
             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
-                uplim += band->threshold;
-                energy += band->energy;
-                if (band->energy <= band->threshold || band->threshold == 0.0f) {
+                if (band->energy <= band->threshold) {
+                    sce->sf_idx[(w+w2)*16+g] = 218;
                     sce->zeroes[(w+w2)*16+g] = 1;
-                    continue;
+                } else {
+                    sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2f(band->threshold), 80, 218);
+                    sce->zeroes[(w+w2)*16+g] = 0;
                 }
-                nz = 1;
-            }
-            uplims[w*16+g] = uplim *512;
-            if (s->options.pns && start*freq_mult > NOISE_LOW_LIMIT && energy < uplim * 1.2f) {
-                noise_sf[w*16+g] = av_clip(4+FFMIN(log2f(energy)*2,255), -100, 155);
-                sce->band_type[w*16+g] = NOISE_BT;
-                nz= 1;
-            } else { /** Band type will be determined by the twoloop algorithm */
-                sce->band_type[w*16+g] = 0;
-            }
-            sce->zeroes[w*16+g] = !nz;
-            if (nz)
-                minthr = FFMIN(minthr, uplim);
-            allz |= nz;
-            start += sce->ics.swb_sizes[g];
-        }
-    }
-    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-        for (g = 0;  g < sce->ics.num_swb; g++) {
-            if (sce->zeroes[w*16+g]) {
-                sce->sf_idx[w*16+g] = SCALE_ONE_POS;
-                continue;
+                minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
             }
-            sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
         }
     }
-
-    if (!allz)
-        return;
-    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
-
-    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-        start = w*128;
-        for (g = 0;  g < sce->ics.num_swb; g++) {
-            const float *scaled = s->scoefs + start;
-            maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
-            start += sce->ics.swb_sizes[g];
-        }
+    for (i = 0; i < 128; i++) {
+        sce->sf_idx[i] = 140;
+        //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
     }
-
-    //perform two-loop search
-    //outer loop - improve quality
-    do {
-        int tbits, qstep;
-        minscaler = sce->sf_idx[0];
-        minscaler_n = sce->sf_idx[0];
-        //inner loop - quantize spectrum to fit into given number of bits
-        qstep = its ? 1 : 32;
-        do {
-            int prev = -1;
-            tbits = 0;
-            for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-                start = w*128;
-                for (g = 0;  g < sce->ics.num_swb; g++) {
-                    const float *coefs = sce->coeffs + start;
-                    const float *scaled = s->scoefs + start;
-                    int bits = 0;
-                    int cb;
-                    float dist = 0.0f;
-
-                    if (sce->band_type[w*16+g] == NOISE_BT) {
-                        minscaler_n = FFMIN(minscaler_n, noise_sf[w*16+g]);
-                        start += sce->ics.swb_sizes[g];
-                        continue;
-                    } else if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
-                        start += sce->ics.swb_sizes[g];
-                        continue;
-                    }
-                    minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
-                    cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
-                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
-                        int b;
-                        dist += quantize_band_cost(s, coefs + w2*128,
-                                                   scaled + w2*128,
-                                                   sce->ics.swb_sizes[g],
-                                                   sce->sf_idx[w*16+g],
-                                                   cb,
-                                                   1.0f,
-                                                   INFINITY,
-                                                   &b);
-                        bits += b;
-                    }
-                    dists[w*16+g] = dist - bits;
-                    if (prev != -1) {
-                        bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
-                    }
-                    tbits += bits;
-                    start += sce->ics.swb_sizes[g];
-                    prev = sce->sf_idx[w*16+g];
-                }
-            }
-            if (tbits > destbits) {
-                for (i = 0; i < 128; i++)
-                    if (sce->sf_idx[i] < 218 - qstep)
-                        sce->sf_idx[i] += qstep;
-            } else {
-                for (i = 0; i < 128; i++)
-                    if (sce->sf_idx[i] > 60 - qstep)
-                        sce->sf_idx[i] -= qstep;
-            }
-            qstep >>= 1;
-            if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
-                qstep = 1;
-        } while (qstep);
-
-        fflag = 0;
-        minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
-
-        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
-            for (g = 0; g < sce->ics.num_swb; g++)
-                if (sce->band_type[w*16+g] == NOISE_BT)
-                    sce->sf_idx[w*16+g] = av_clip(noise_sf[w*16+g], minscaler_n, minscaler_n + SCALE_MAX_DIFF);
-
-        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-            for (g = 0; g < sce->ics.num_swb; g++) {
-                int prevsc = sce->sf_idx[w*16+g];
-                if (sce->band_type[w*16+g] == NOISE_BT)
-                    continue;
-                if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
-                    if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
-                        sce->sf_idx[w*16+g]--;
-                    else //Try to make sure there is some energy in every band
-                        sce->sf_idx[w*16+g]-=2;
-                }
-                sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
-                sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
-                if (sce->sf_idx[w*16+g] != prevsc)
-                    fflag = 1;
-                sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
-            }
-        }
-        its++;
-    } while (fflag && its < 10);
+    //set the same quantizers inside window groups
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
+        for (g = 0;  g < sce->ics.num_swb; g++)
+            for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
+                sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
 }
 
-static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
-                                       SingleChannelElement *sce,
-                                       const float lambda)
+static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
 {
-    int start = 0, i, w, w2, g;
-    float uplim[128], maxq[128];
-    int minq, maxsf;
-    float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
-    int last = 0, lastband = 0, curband = 0;
-    float avg_energy = 0.0;
-    if (sce->ics.num_windows == 1) {
-        start = 0;
-        for (i = 0; i < 1024; i++) {
-            if (i - start >= sce->ics.swb_sizes[curband]) {
-                start += sce->ics.swb_sizes[curband];
-                curband++;
-            }
-            if (sce->coeffs[i]) {
-                avg_energy += sce->coeffs[i] * sce->coeffs[i];
-                last = i;
-                lastband = curband;
-            }
-        }
+    FFPsyBand *band;
+    int w, g, w2, i;
+    int wlen = 1024 / sce->ics.num_windows;
+    int bandwidth, cutoff;
+    float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
+    float *NOR34 = &s->scoefs[3*128];
+    uint8_t nextband[128];
+    const float lambda = s->lambda;
+    const float freq_mult = avctx->sample_rate*0.5f/wlen;
+    const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
+    const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
+    const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f);
+    const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
+
+    int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
+        / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
+        * (lambda / 120.f);
+
+    /** Keep this in sync with twoloop's cutoff selection */
+    float rate_bandwidth_multiplier = 1.5f;
+    int prev = -1000, prev_sf = -1;
+    int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
+        ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
+        : (avctx->bit_rate / avctx->channels);
+
+    frame_bit_rate *= 1.15f;
+
+    if (avctx->cutoff > 0) {
+        bandwidth = avctx->cutoff;
     } else {
-        for (w = 0; w < 8; w++) {
-            const float *coeffs = sce->coeffs + w*128;
-            curband = start = 0;
-            for (i = 0; i < 128; i++) {
-                if (i - start >= sce->ics.swb_sizes[curband]) {
-                    start += sce->ics.swb_sizes[curband];
-                    curband++;
-                }
-                if (coeffs[i]) {
-                    avg_energy += coeffs[i] * coeffs[i];
-                    last = FFMAX(last, i);
-                    lastband = FFMAX(lastband, curband);
-                }
-            }
-        }
-    }
-    last++;
-    avg_energy /= last;
-    if (avg_energy == 0.0f) {
-        for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
-            sce->sf_idx[i] = SCALE_ONE_POS;
-        return;
+        bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
     }
+
+    cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
+
+    memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
+    ff_init_nextband_map(sce, nextband);
     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-        start = w*128;
-        for (g = 0; g < sce->ics.num_swb; g++) {
-            float *coefs   = sce->coeffs + start;
-            const int size = sce->ics.swb_sizes[g];
-            int start2 = start, end2 = start + size, peakpos = start;
-            float maxval = -1, thr = 0.0f, t;
-            maxq[w*16+g] = 0.0f;
-            if (g > lastband) {
-                maxq[w*16+g] = 0.0f;
-                start += size;
-                for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
-                    memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
+        int wstart = w*128;
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            int noise_sfi;
+            float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
+            float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
+            float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
+            float min_energy = -1.0f, max_energy = 0.0f;
+            const int start = wstart+sce->ics.swb_offset[g];
+            const float freq = (start-wstart)*freq_mult;
+            const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
+            if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) {
+                if (!sce->zeroes[w*16+g])
+                    prev_sf = sce->sf_idx[w*16+g];
                 continue;
             }
             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
-                for (i = 0; i < size; i++) {
-                    float t = coefs[w2*128+i]*coefs[w2*128+i];
-                    maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
-                    thr += t;
-                    if (sce->ics.num_windows == 1 && maxval < t) {
-                        maxval  = t;
-                        peakpos = start+i;
-                    }
+                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+                sfb_energy += band->energy;
+                spread     = FFMIN(spread, band->spread);
+                threshold  += band->threshold;
+                if (!w2) {
+                    min_energy = max_energy = band->energy;
+                } else {
+                    min_energy = FFMIN(min_energy, band->energy);
+                    max_energy = FFMAX(max_energy, band->energy);
                 }
             }
-            if (sce->ics.num_windows == 1) {
-                start2 = FFMAX(peakpos - 2, start2);
-                end2   = FFMIN(peakpos + 3, end2);
-            } else {
-                start2 -= start;
-                end2   -= start;
-            }
-            start += size;
-            thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
-            t   = 1.0 - (1.0 * start2 / last);
-            uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
-        }
-    }
-    memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
-    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
-    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-        start = w*128;
-        for (g = 0;  g < sce->ics.num_swb; g++) {
-            const float *coefs  = sce->coeffs + start;
-            const float *scaled = s->scoefs   + start;
-            const int size      = sce->ics.swb_sizes[g];
-            int scf, prev_scf, step;
-            int min_scf = -1, max_scf = 256;
-            float curdiff;
-            if (maxq[w*16+g] < 21.544) {
-                sce->zeroes[w*16+g] = 1;
-                start += size;
+
+            /* Ramps down at ~8000Hz and loosens the dist threshold */
+            dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias;
+
+            /* PNS is acceptable when all of these are true:
+             * 1. high spread energy (noise-like band)
+             * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
+             * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
+             *
+             * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important)
+             */
+            if ((!sce->zeroes[w*16+g] && !ff_sfdelta_can_remove_band(sce, nextband, prev_sf, w*16+g)) ||
+                ((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.0f/freq_boost)) || spread < spread_threshold ||
+                (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) ||
+                min_energy < pns_transient_energy_r * max_energy ) {
+                sce->pns_ener[w*16+g] = sfb_energy;
+                if (!sce->zeroes[w*16+g])
+                    prev_sf = sce->sf_idx[w*16+g];
                 continue;
             }
-            sce->zeroes[w*16+g] = 0;
-            scf  = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2f(1/maxq[w*16+g])*16/3, 60, 218);
-            for (;;) {
-                float dist = 0.0f;
-                int quant_max;
 
-                for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
-                    int b;
-                    dist += quantize_band_cost(s, coefs + w2*128,
-                                               scaled + w2*128,
-                                               sce->ics.swb_sizes[g],
-                                               scf,
-                                               ESC_BT,
-                                               lambda,
-                                               INFINITY,
-                                               &b);
-                    dist -= b;
-                }
-                dist *= 1.0f / 512.0f / lambda;
-                quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512]);
-                if (quant_max >= 8191) { // too much, return to the previous quantizer
-                    sce->sf_idx[w*16+g] = prev_scf;
-                    break;
+            pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread);
+            noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */
+            noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO];    /* Dequantize */
+            if (prev != -1000) {
+                int noise_sfdiff = noise_sfi - prev + SCALE_DIFF_ZERO;
+                if (noise_sfdiff < 0 || noise_sfdiff > 2*SCALE_MAX_DIFF) {
+                    if (!sce->zeroes[w*16+g])
+                        prev_sf = sce->sf_idx[w*16+g];
+                    continue;
                 }
-                prev_scf = scf;
-                curdiff = fabsf(dist - uplim[w*16+g]);
-                if (curdiff <= 1.0f)
-                    step = 0;
-                else
-                    step = log2f(curdiff);
-                if (dist > uplim[w*16+g])
-                    step = -step;
-                scf += step;
-                scf = av_clip_uint8(scf);
-                step = scf - prev_scf;
-                if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
-                    sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
-                    break;
+            }
+            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                float band_energy, scale, pns_senergy;
+                const int start_c = (w+w2)*128+sce->ics.swb_offset[g];
+                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+                for (i = 0; i < sce->ics.swb_sizes[g]; i+=2) {
+                    double rnd[2];
+                    av_bmg_get(&s->lfg, rnd);
+                    PNS[i+0] = (float)rnd[0];
+                    PNS[i+1] = (float)rnd[1];
                 }
-                if (step > 0)
-                    min_scf = prev_scf;
-                else
-                    max_scf = prev_scf;
+                band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
+                scale = noise_amp/sqrtf(band_energy);
+                s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]);
+                pns_senergy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
+                pns_energy += pns_senergy;
+                abs_pow34_v(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]);
+                abs_pow34_v(PNS34, PNS, sce->ics.swb_sizes[g]);
+                dist1 += quantize_band_cost(s, &sce->coeffs[start_c],
+                                            NOR34,
+                                            sce->ics.swb_sizes[g],
+                                            sce->sf_idx[(w+w2)*16+g],
+                                            sce->band_alt[(w+w2)*16+g],
+                                            lambda/band->threshold, INFINITY, NULL, NULL, 0);
+                /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */
+                dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold;
+            }
+            if (g && sce->band_type[w*16+g-1] == NOISE_BT) {
+                dist2 += 5;
+            } else {
+                dist2 += 9;
+            }
+            energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */
+            sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy;
+            if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) {
+                sce->band_type[w*16+g] = NOISE_BT;
+                sce->zeroes[w*16+g] = 0;
+                prev = noise_sfi;
+            } else {
+                if (!sce->zeroes[w*16+g])
+                    prev_sf = sce->sf_idx[w*16+g];
             }
-            start += size;
         }
     }
-    minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
-    for (i = 1; i < 128; i++) {
-        if (!sce->sf_idx[i])
-            sce->sf_idx[i] = sce->sf_idx[i-1];
-        else
-            minq = FFMIN(minq, sce->sf_idx[i]);
-    }
-    if (minq == INT_MAX)
-        minq = 0;
-    minq = FFMIN(minq, SCALE_MAX_POS);
-    maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
-    for (i = 126; i >= 0; i--) {
-        if (!sce->sf_idx[i])
-            sce->sf_idx[i] = sce->sf_idx[i+1];
-        sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
-    }
 }
 
-static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
-                                       SingleChannelElement *sce,
-                                       const float lambda)
+static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
 {
-    int i, w, w2, g;
-    int minq = 255;
+    FFPsyBand *band;
+    int w, g, w2;
+    int wlen = 1024 / sce->ics.num_windows;
+    int bandwidth, cutoff;
+    const float lambda = s->lambda;
+    const float freq_mult = avctx->sample_rate*0.5f/wlen;
+    const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
+    const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
+
+    int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
+        / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
+        * (lambda / 120.f);
+
+    /** Keep this in sync with twoloop's cutoff selection */
+    float rate_bandwidth_multiplier = 1.5f;
+    int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
+        ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
+        : (avctx->bit_rate / avctx->channels);
+
+    frame_bit_rate *= 1.15f;
+
+    if (avctx->cutoff > 0) {
+        bandwidth = avctx->cutoff;
+    } else {
+        bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
+    }
 
-    memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
+    cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
+
+    memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-        for (g = 0; g < sce->ics.num_swb; g++) {
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
+            float min_energy = -1.0f, max_energy = 0.0f;
+            const int start = sce->ics.swb_offset[g];
+            const float freq = start*freq_mult;
+            const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
+            if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
+                sce->can_pns[w*16+g] = 0;
+                continue;
+            }
             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
-                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
-                if (band->energy <= band->threshold) {
-                    sce->sf_idx[(w+w2)*16+g] = 218;
-                    sce->zeroes[(w+w2)*16+g] = 1;
+                band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+                sfb_energy += band->energy;
+                spread     = FFMIN(spread, band->spread);
+                threshold  += band->threshold;
+                if (!w2) {
+                    min_energy = max_energy = band->energy;
                 } else {
-                    sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2f(band->threshold), 80, 218);
-                    sce->zeroes[(w+w2)*16+g] = 0;
+                    min_energy = FFMIN(min_energy, band->energy);
+                    max_energy = FFMAX(max_energy, band->energy);
                 }
-                minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
+            }
+
+            /* PNS is acceptable when all of these are true:
+             * 1. high spread energy (noise-like band)
+             * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
+             * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
+             */
+            sce->pns_ener[w*16+g] = sfb_energy;
+            if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) {
+                sce->can_pns[w*16+g] = 0;
+            } else {
+                sce->can_pns[w*16+g] = 1;
             }
         }
     }
-    for (i = 0; i < 128; i++) {
-        sce->sf_idx[i] = 140;
-        //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
-    }
-    //set the same quantizers inside window groups
-    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
-        for (g = 0;  g < sce->ics.num_swb; g++)
-            for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
-                sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
 }
 
-static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
-                          const float lambda)
+static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
 {
-    int start = 0, i, w, w2, g;
+    int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
+    uint8_t nextband0[128], nextband1[128];
     float M[128], S[128];
     float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
+    const float lambda = s->lambda;
+    const float mslambda = FFMIN(1.0f, lambda / 120.f);
     SingleChannelElement *sce0 = &cpe->ch[0];
     SingleChannelElement *sce1 = &cpe->ch[1];
     if (!cpe->common_window)
         return;
+
+    /** Scout out next nonzero bands */
+    ff_init_nextband_map(sce0, nextband0);
+    ff_init_nextband_map(sce1, nextband1);
+
+    prev_mid = sce0->sf_idx[0];
+    prev_side = sce1->sf_idx[0];
     for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
+        start = 0;
         for (g = 0;  g < sce0->ics.num_swb; g++) {
-            if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
-                float dist1 = 0.0f, dist2 = 0.0f;
+            float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
+            if (!cpe->is_mask[w*16+g])
+                cpe->ms_mask[w*16+g] = 0;
+            if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
+                float Mmax = 0.0f, Smax = 0.0f;
+
+                /* Must compute mid/side SF and book for the whole window group */
                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
-                    FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
-                    FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
-                    float minthr = FFMIN(band0->threshold, band1->threshold);
-                    float maxthr = FFMAX(band0->threshold, band1->threshold);
                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
-                        M[i] = (sce0->pcoeffs[start+w2*128+i]
-                              + sce1->pcoeffs[start+w2*128+i]) * 0.5;
+                        M[i] = (sce0->coeffs[start+(w+w2)*128+i]
+                              + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
                         S[i] =  M[i]
-                              - sce1->pcoeffs[start+w2*128+i];
+                              - sce1->coeffs[start+(w+w2)*128+i];
+                    }
+                    abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
+                    abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
+                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
+                        Mmax = FFMAX(Mmax, M34[i]);
+                        Smax = FFMAX(Smax, S34[i]);
+                    }
+                }
+
+                for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
+                    float dist1 = 0.0f, dist2 = 0.0f;
+                    int B0 = 0, B1 = 0;
+                    int minidx;
+                    int mididx, sididx;
+                    int midcb, sidcb;
+
+                    minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
+                    mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
+                    sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
+                    if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
+                        && (   !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
+                            || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
+                        /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
+                        continue;
+                    }
+
+                    midcb = find_min_book(Mmax, mididx);
+                    sidcb = find_min_book(Smax, sididx);
+
+                    /* No CB can be zero */
+                    midcb = FFMAX(1,midcb);
+                    sidcb = FFMAX(1,sidcb);
+
+                    for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+                        FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
+                        FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
+                        float minthr = FFMIN(band0->threshold, band1->threshold);
+                        int b1,b2,b3,b4;
+                        for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+                            M[i] = (sce0->coeffs[start+(w+w2)*128+i]
+                                  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
+                            S[i] =  M[i]
+                                  - sce1->coeffs[start+(w+w2)*128+i];
+                        }
+
+                        abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
+                        abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
+                        abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
+                        abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
+                        dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
+                                                    L34,
+                                                    sce0->ics.swb_sizes[g],
+                                                    sce0->sf_idx[w*16+g],
+                                                    sce0->band_type[w*16+g],
+                                                    lambda / band0->threshold, INFINITY, &b1, NULL, 0);
+                        dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
+                                                    R34,
+                                                    sce1->ics.swb_sizes[g],
+                                                    sce1->sf_idx[w*16+g],
+                                                    sce1->band_type[w*16+g],
+                                                    lambda / band1->threshold, INFINITY, &b2, NULL, 0);
+                        dist2 += quantize_band_cost(s, M,
+                                                    M34,
+                                                    sce0->ics.swb_sizes[g],
+                                                    mididx,
+                                                    midcb,
+                                                    lambda / minthr, INFINITY, &b3, NULL, 0);
+                        dist2 += quantize_band_cost(s, S,
+                                                    S34,
+                                                    sce1->ics.swb_sizes[g],
+                                                    sididx,
+                                                    sidcb,
+                                                    mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
+                        B0 += b1+b2;
+                        B1 += b3+b4;
+                        dist1 -= b1+b2;
+                        dist2 -= b3+b4;
+                    }
+                    cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
+                    if (cpe->ms_mask[w*16+g]) {
+                        if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
+                            sce0->sf_idx[w*16+g] = mididx;
+                            sce1->sf_idx[w*16+g] = sididx;
+                            sce0->band_type[w*16+g] = midcb;
+                            sce1->band_type[w*16+g] = sidcb;
+                        } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
+                            /* ms_mask unneeded, and it confuses some decoders */
+                            cpe->ms_mask[w*16+g] = 0;
+                        }
+                        break;
+                    } else if (B1 > B0) {
+                        /* More boost won't fix this */
+                        break;
                     }
-                    abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
-                    abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
-                    abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
-                    abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
-                    dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
-                                                L34,
-                                                sce0->ics.swb_sizes[g],
-                                                sce0->sf_idx[(w+w2)*16+g],
-                                                sce0->band_type[(w+w2)*16+g],
-                                                lambda / band0->threshold, INFINITY, NULL);
-                    dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
-                                                R34,
-                                                sce1->ics.swb_sizes[g],
-                                                sce1->sf_idx[(w+w2)*16+g],
-                                                sce1->band_type[(w+w2)*16+g],
-                                                lambda / band1->threshold, INFINITY, NULL);
-                    dist2 += quantize_band_cost(s, M,
-                                                M34,
-                                                sce0->ics.swb_sizes[g],
-                                                sce0->sf_idx[(w+w2)*16+g],
-                                                sce0->band_type[(w+w2)*16+g],
-                                                lambda / maxthr, INFINITY, NULL);
-                    dist2 += quantize_band_cost(s, S,
-                                                S34,
-                                                sce1->ics.swb_sizes[g],
-                                                sce1->sf_idx[(w+w2)*16+g],
-                                                sce1->band_type[(w+w2)*16+g],
-                                                lambda / minthr, INFINITY, NULL);
                 }
-                cpe->ms_mask[w*16+g] = dist2 < dist1;
             }
+            if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
+                prev_mid = sce0->sf_idx[w*16+g];
+            if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
+                prev_side = sce1->sf_idx[w*16+g];
             start += sce0->ics.swb_sizes[g];
         }
     }
 }
 
 AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
-    [AAC_CODER_FAAC] = {
-        search_for_quantizers_faac,
-        encode_window_bands_info,
-        quantize_and_encode_band,
-        search_for_ms,
-    },
     [AAC_CODER_ANMR] = {
         search_for_quantizers_anmr,
         encode_window_bands_info,
         quantize_and_encode_band,
+        ff_aac_encode_tns_info,
+        ff_aac_encode_ltp_info,
+        ff_aac_encode_main_pred,
+        ff_aac_adjust_common_pred,
+        ff_aac_adjust_common_ltp,
+        ff_aac_apply_main_pred,
+        ff_aac_apply_tns,
+        ff_aac_update_ltp,
+        ff_aac_ltp_insert_new_frame,
+        set_special_band_scalefactors,
+        search_for_pns,
+        mark_pns,
+        ff_aac_search_for_tns,
+        ff_aac_search_for_ltp,
         search_for_ms,
+        ff_aac_search_for_is,
+        ff_aac_search_for_pred,
     },
     [AAC_CODER_TWOLOOP] = {
         search_for_quantizers_twoloop,
         codebook_trellis_rate,
         quantize_and_encode_band,
+        ff_aac_encode_tns_info,
+        ff_aac_encode_ltp_info,
+        ff_aac_encode_main_pred,
+        ff_aac_adjust_common_pred,
+        ff_aac_adjust_common_ltp,
+        ff_aac_apply_main_pred,
+        ff_aac_apply_tns,
+        ff_aac_update_ltp,
+        ff_aac_ltp_insert_new_frame,
+        set_special_band_scalefactors,
+        search_for_pns,
+        mark_pns,
+        ff_aac_search_for_tns,
+        ff_aac_search_for_ltp,
         search_for_ms,
+        ff_aac_search_for_is,
+        ff_aac_search_for_pred,
     },
     [AAC_CODER_FAST] = {
         search_for_quantizers_fast,
         encode_window_bands_info,
         quantize_and_encode_band,
+        ff_aac_encode_tns_info,
+        ff_aac_encode_ltp_info,
+        ff_aac_encode_main_pred,
+        ff_aac_adjust_common_pred,
+        ff_aac_adjust_common_ltp,
+        ff_aac_apply_main_pred,
+        ff_aac_apply_tns,
+        ff_aac_update_ltp,
+        ff_aac_ltp_insert_new_frame,
+        set_special_band_scalefactors,
+        search_for_pns,
+        mark_pns,
+        ff_aac_search_for_tns,
+        ff_aac_search_for_ltp,
         search_for_ms,
+        ff_aac_search_for_is,
+        ff_aac_search_for_pred,
     },
 };
diff --git a/libavcodec/aaccoder_trellis.h b/libavcodec/aaccoder_trellis.h
new file mode 100644
index 00000000..02300528
--- /dev/null
+++ b/libavcodec/aaccoder_trellis.h
@@ -0,0 +1,192 @@
+/*
+ * AAC encoder trellis codebook selector
+ * Copyright (C) 2008-2009 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder trellis codebook selector
+ * @author Konstantin Shishkov
+ */
+
+/**
+ * This file contains a template for the codebook_trellis_rate selector function.
+ * It needs to be provided, externally, as an already included declaration,
+ * the following functions from aacenc_quantization/util.h. They're not included
+ * explicitly here to make it possible to provide alternative implementations:
+ *  - quantize_band_cost_bits
+ *  - abs_pow34_v
+ */
+
+#ifndef AVCODEC_AACCODER_TRELLIS_H
+#define AVCODEC_AACCODER_TRELLIS_H
+
+#include <float.h>
+#include "libavutil/mathematics.h"
+#include "avcodec.h"
+#include "put_bits.h"
+#include "aac.h"
+#include "aacenc.h"
+#include "aactab.h"
+#include "aacenctab.h"
+
+/**
+ * structure used in optimal codebook search
+ */
+typedef struct TrellisBandCodingPath {
+    int prev_idx; ///< pointer to the previous path point
+    float cost;   ///< path cost
+    int run;
+} TrellisBandCodingPath;
+
+
+static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
+                                  int win, int group_len, const float lambda)
+{
+    TrellisBandCodingPath path[120][CB_TOT_ALL];
+    int w, swb, cb, start, size;
+    int i, j;
+    const int max_sfb  = sce->ics.max_sfb;
+    const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
+    const int run_esc  = (1 << run_bits) - 1;
+    int idx, ppos, count;
+    int stackrun[120], stackcb[120], stack_len;
+    float next_minbits = INFINITY;
+    int next_mincb = 0;
+
+    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
+    start = win*128;
+    for (cb = 0; cb < CB_TOT_ALL; cb++) {
+        path[0][cb].cost     = run_bits+4;
+        path[0][cb].prev_idx = -1;
+        path[0][cb].run      = 0;
+    }
+    for (swb = 0; swb < max_sfb; swb++) {
+        size = sce->ics.swb_sizes[swb];
+        if (sce->zeroes[win*16 + swb]) {
+            float cost_stay_here = path[swb][0].cost;
+            float cost_get_here  = next_minbits + run_bits + 4;
+            if (   run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
+                != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
+                cost_stay_here += run_bits;
+            if (cost_get_here < cost_stay_here) {
+                path[swb+1][0].prev_idx = next_mincb;
+                path[swb+1][0].cost     = cost_get_here;
+                path[swb+1][0].run      = 1;
+            } else {
+                path[swb+1][0].prev_idx = 0;
+                path[swb+1][0].cost     = cost_stay_here;
+                path[swb+1][0].run      = path[swb][0].run + 1;
+            }
+            next_minbits = path[swb+1][0].cost;
+            next_mincb = 0;
+            for (cb = 1; cb < CB_TOT_ALL; cb++) {
+                path[swb+1][cb].cost = 61450;
+                path[swb+1][cb].prev_idx = -1;
+                path[swb+1][cb].run = 0;
+            }
+        } else {
+            float minbits = next_minbits;
+            int mincb = next_mincb;
+            int startcb = sce->band_type[win*16+swb];
+            startcb = aac_cb_in_map[startcb];
+            next_minbits = INFINITY;
+            next_mincb = 0;
+            for (cb = 0; cb < startcb; cb++) {
+                path[swb+1][cb].cost = 61450;
+                path[swb+1][cb].prev_idx = -1;
+                path[swb+1][cb].run = 0;
+            }
+            for (cb = startcb; cb < CB_TOT_ALL; cb++) {
+                float cost_stay_here, cost_get_here;
+                float bits = 0.0f;
+                if (cb >= 12 && sce->band_type[win*16+swb] != aac_cb_out_map[cb]) {
+                    path[swb+1][cb].cost = 61450;
+                    path[swb+1][cb].prev_idx = -1;
+                    path[swb+1][cb].run = 0;
+                    continue;
+                }
+                for (w = 0; w < group_len; w++) {
+                    bits += quantize_band_cost_bits(s, &sce->coeffs[start + w*128],
+                                               &s->scoefs[start + w*128], size,
+                                               sce->sf_idx[win*16+swb],
+                                               aac_cb_out_map[cb],
+                                               0, INFINITY, NULL, NULL, 0);
+                }
+                cost_stay_here = path[swb][cb].cost + bits;
+                cost_get_here  = minbits            + bits + run_bits + 4;
+                if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
+                    != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
+                    cost_stay_here += run_bits;
+                if (cost_get_here < cost_stay_here) {
+                    path[swb+1][cb].prev_idx = mincb;
+                    path[swb+1][cb].cost     = cost_get_here;
+                    path[swb+1][cb].run      = 1;
+                } else {
+                    path[swb+1][cb].prev_idx = cb;
+                    path[swb+1][cb].cost     = cost_stay_here;
+                    path[swb+1][cb].run      = path[swb][cb].run + 1;
+                }
+                if (path[swb+1][cb].cost < next_minbits) {
+                    next_minbits = path[swb+1][cb].cost;
+                    next_mincb = cb;
+                }
+            }
+        }
+        start += sce->ics.swb_sizes[swb];
+    }
+
+    //convert resulting path from backward-linked list
+    stack_len = 0;
+    idx       = 0;
+    for (cb = 1; cb < CB_TOT_ALL; cb++)
+        if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
+            idx = cb;
+    ppos = max_sfb;
+    while (ppos > 0) {
+        av_assert1(idx >= 0);
+        cb = idx;
+        stackrun[stack_len] = path[ppos][cb].run;
+        stackcb [stack_len] = cb;
+        idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
+        ppos -= path[ppos][cb].run;
+        stack_len++;
+    }
+    //perform actual band info encoding
+    start = 0;
+    for (i = stack_len - 1; i >= 0; i--) {
+        cb = aac_cb_out_map[stackcb[i]];
+        put_bits(&s->pb, 4, cb);
+        count = stackrun[i];
+        memset(sce->zeroes + win*16 + start, !cb, count);
+        //XXX: memset when band_type is also uint8_t
+        for (j = 0; j < count; j++) {
+            sce->band_type[win*16 + start] = cb;
+            start++;
+        }
+        while (count >= run_esc) {
+            put_bits(&s->pb, run_bits, run_esc);
+            count -= run_esc;
+        }
+        put_bits(&s->pb, run_bits, count);
+    }
+}
+
+
+#endif /* AVCODEC_AACCODER_TRELLIS_H */
diff --git a/libavcodec/aaccoder_twoloop.h b/libavcodec/aaccoder_twoloop.h
new file mode 100644
index 00000000..397a4db5
--- /dev/null
+++ b/libavcodec/aaccoder_twoloop.h
@@ -0,0 +1,755 @@
+/*
+ * AAC encoder twoloop coder
+ * Copyright (C) 2008-2009 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder twoloop coder
+ * @author Konstantin Shishkov, Claudio Freire
+ */
+
+/**
+ * This file contains a template for the twoloop coder function.
+ * It needs to be provided, externally, as an already included declaration,
+ * the following functions from aacenc_quantization/util.h. They're not included
+ * explicitly here to make it possible to provide alternative implementations:
+ *  - quantize_band_cost
+ *  - abs_pow34_v
+ *  - find_max_val
+ *  - find_min_book
+ *  - find_form_factor
+ */
+
+#ifndef AVCODEC_AACCODER_TWOLOOP_H
+#define AVCODEC_AACCODER_TWOLOOP_H
+
+#include <float.h>
+#include "libavutil/mathematics.h"
+#include "mathops.h"
+#include "avcodec.h"
+#include "put_bits.h"
+#include "aac.h"
+#include "aacenc.h"
+#include "aactab.h"
+#include "aacenctab.h"
+
+/** Frequency in Hz for lower limit of noise substitution **/
+#define NOISE_LOW_LIMIT 4000
+
+#define sclip(x) av_clip(x,60,218)
+
+/* Reflects the cost to change codebooks */
+static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
+{
+    return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
+}
+
+/**
+ * two-loop quantizers search taken from ISO 13818-7 Appendix C
+ */
+static void search_for_quantizers_twoloop(AVCodecContext *avctx,
+                                          AACEncContext *s,
+                                          SingleChannelElement *sce,
+                                          const float lambda)
+{
+    int start = 0, i, w, w2, g, recomprd;
+    int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
+        / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
+        * (lambda / 120.f);
+    int refbits = destbits;
+    int toomanybits, toofewbits;
+    char nzs[128];
+    uint8_t nextband[128];
+    int maxsf[128];
+    float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
+    float maxvals[128], spread_thr_r[128];
+    float min_spread_thr_r, max_spread_thr_r;
+
+    /**
+     * rdlambda controls the maximum tolerated distortion. Twoloop
+     * will keep iterating until it fails to lower it or it reaches
+     * ulimit * rdlambda. Keeping it low increases quality on difficult
+     * signals, but lower it too much, and bits will be taken from weak
+     * signals, creating "holes". A balance is necesary.
+     * rdmax and rdmin specify the relative deviation from rdlambda
+     * allowed for tonality compensation
+     */
+    float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
+    const float nzslope = 1.5f;
+    float rdmin = 0.03125f;
+    float rdmax = 1.0f;
+
+    /**
+     * sfoffs controls an offset of optmium allocation that will be
+     * applied based on lambda. Keep it real and modest, the loop
+     * will take care of the rest, this just accelerates convergence
+     */
+    float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
+
+    int fflag, minscaler, maxscaler, nminscaler;
+    int its  = 0;
+    int maxits = 30;
+    int allz = 0;
+    int tbits;
+    int cutoff = 1024;
+    int pns_start_pos;
+    int prev;
+
+    /**
+     * zeroscale controls a multiplier of the threshold, if band energy
+     * is below this, a zero is forced. Keep it lower than 1, unless
+     * low lambda is used, because energy < threshold doesn't mean there's
+     * no audible signal outright, it's just energy. Also make it rise
+     * slower than rdlambda, as rdscale has due compensation with
+     * noisy band depriorization below, whereas zeroing logic is rather dumb
+     */
+    float zeroscale;
+    if (lambda > 120.f) {
+        zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
+    } else {
+        zeroscale = 1.f;
+    }
+
+    if (s->psy.bitres.alloc >= 0) {
+        /**
+         * Psy granted us extra bits to use, from the reservoire
+         * adjust for lambda except what psy already did
+         */
+        destbits = s->psy.bitres.alloc
+            * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
+    }
+
+    if (avctx->flags & CODEC_FLAG_QSCALE) {
+        /**
+         * Constant Q-scale doesn't compensate MS coding on its own
+         * No need to be overly precise, this only controls RD
+         * adjustment CB limits when going overboard
+         */
+        if (s->options.mid_side && s->cur_type == TYPE_CPE)
+            destbits *= 2;
+
+        /**
+         * When using a constant Q-scale, don't adjust bits, just use RD
+         * Don't let it go overboard, though... 8x psy target is enough
+         */
+        toomanybits = 5800;
+        toofewbits = destbits / 16;
+
+        /** Don't offset scalers, just RD */
+        sfoffs = sce->ics.num_windows - 1;
+        rdlambda = sqrtf(rdlambda);
+
+        /** search further */
+        maxits *= 2;
+    } else {
+        /* When using ABR, be strict, but a reasonable leeway is
+         * critical to allow RC to smoothly track desired bitrate
+         * without sudden quality drops that cause audible artifacts.
+         * Symmetry is also desirable, to avoid systematic bias.
+         */
+        toomanybits = destbits + destbits/8;
+        toofewbits = destbits - destbits/8;
+
+        sfoffs = 0;
+        rdlambda = sqrtf(rdlambda);
+    }
+
+    /** and zero out above cutoff frequency */
+    {
+        int wlen = 1024 / sce->ics.num_windows;
+        int bandwidth;
+
+        /**
+         * Scale, psy gives us constant quality, this LP only scales
+         * bitrate by lambda, so we save bits on subjectively unimportant HF
+         * rather than increase quantization noise. Adjust nominal bitrate
+         * to effective bitrate according to encoding parameters,
+         * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
+         */
+        float rate_bandwidth_multiplier = 1.5f;
+        int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
+            ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
+            : (avctx->bit_rate / avctx->channels);
+
+        /** Compensate for extensions that increase efficiency */
+        if (s->options.pns || s->options.intensity_stereo)
+            frame_bit_rate *= 1.15f;
+
+        if (avctx->cutoff > 0) {
+            bandwidth = avctx->cutoff;
+        } else {
+            bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
+            s->psy.cutoff = bandwidth;
+        }
+
+        cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
+        pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
+    }
+
+    /**
+     * for values above this the decoder might end up in an endless loop
+     * due to always having more bits than what can be encoded.
+     */
+    destbits = FFMIN(destbits, 5800);
+    toomanybits = FFMIN(toomanybits, 5800);
+    toofewbits = FFMIN(toofewbits, 5800);
+    /**
+     * XXX: some heuristic to determine initial quantizers will reduce search time
+     * determine zero bands and upper distortion limits
+     */
+    min_spread_thr_r = -1;
+    max_spread_thr_r = -1;
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
+            int nz = 0;
+            float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
+            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+                if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
+                    sce->zeroes[(w+w2)*16+g] = 1;
+                    continue;
+                }
+                nz = 1;
+            }
+            if (!nz) {
+                uplim = 0.0f;
+            } else {
+                nz = 0;
+                for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                    FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+                    if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
+                        continue;
+                    uplim += band->threshold;
+                    energy += band->energy;
+                    spread += band->spread;
+                    nz++;
+                }
+            }
+            uplims[w*16+g] = uplim;
+            energies[w*16+g] = energy;
+            nzs[w*16+g] = nz;
+            sce->zeroes[w*16+g] = !nz;
+            allz |= nz;
+            if (nz && sce->can_pns[w*16+g]) {
+                spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
+                if (min_spread_thr_r < 0) {
+                    min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
+                } else {
+                    min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
+                    max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
+                }
+            }
+        }
+    }
+
+    /** Compute initial scalers */
+    minscaler = 65535;
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            if (sce->zeroes[w*16+g]) {
+                sce->sf_idx[w*16+g] = SCALE_ONE_POS;
+                continue;
+            }
+            /**
+             * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
+             * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
+             * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
+             * more robust.
+             */
+            sce->sf_idx[w*16+g] = av_clip(
+                SCALE_ONE_POS
+                    + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
+                    + sfoffs,
+                60, SCALE_MAX_POS);
+            minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
+        }
+    }
+
+    /** Clip */
+    minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
+        for (g = 0;  g < sce->ics.num_swb; g++)
+            if (!sce->zeroes[w*16+g])
+                sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
+
+    if (!allz)
+        return;
+    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
+    ff_quantize_band_cost_cache_init(s);
+
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        start = w*128;
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            const float *scaled = s->scoefs + start;
+            maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
+            start += sce->ics.swb_sizes[g];
+        }
+    }
+
+    /**
+     * Scale uplims to match rate distortion to quality
+     * bu applying noisy band depriorization and tonal band priorization.
+     * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
+     * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
+     * rate distortion requirements.
+     */
+    memcpy(euplims, uplims, sizeof(euplims));
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        /** psy already priorizes transients to some extent */
+        float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
+        start = w*128;
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            if (nzs[g] > 0) {
+                float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
+                float energy2uplim = find_form_factor(
+                    sce->ics.group_len[w], sce->ics.swb_sizes[g],
+                    uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
+                    sce->coeffs + start,
+                    nzslope * cleanup_factor);
+                energy2uplim *= de_psy_factor;
+                if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
+                    /** In ABR, we need to priorize less and let rate control do its thing */
+                    energy2uplim = sqrtf(energy2uplim);
+                }
+                energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
+                uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
+                                  * sce->ics.group_len[w];
+
+                energy2uplim = find_form_factor(
+                    sce->ics.group_len[w], sce->ics.swb_sizes[g],
+                    uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
+                    sce->coeffs + start,
+                    2.0f);
+                energy2uplim *= de_psy_factor;
+                if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
+                    /** In ABR, we need to priorize less and let rate control do its thing */
+                    energy2uplim = sqrtf(energy2uplim);
+                }
+                energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
+                euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
+                    0.5f, 1.0f);
+            }
+            start += sce->ics.swb_sizes[g];
+        }
+    }
+
+    for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
+        maxsf[i] = SCALE_MAX_POS;
+
+    //perform two-loop search
+    //outer loop - improve quality
+    do {
+        //inner loop - quantize spectrum to fit into given number of bits
+        int overdist;
+        int qstep = its ? 1 : 32;
+        do {
+            int changed = 0;
+            prev = -1;
+            recomprd = 0;
+            tbits = 0;
+            for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+                start = w*128;
+                for (g = 0;  g < sce->ics.num_swb; g++) {
+                    const float *coefs = &sce->coeffs[start];
+                    const float *scaled = &s->scoefs[start];
+                    int bits = 0;
+                    int cb;
+                    float dist = 0.0f;
+                    float qenergy = 0.0f;
+
+                    if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
+                        start += sce->ics.swb_sizes[g];
+                        if (sce->can_pns[w*16+g]) {
+                            /** PNS isn't free */
+                            tbits += ff_pns_bits(sce, w, g);
+                        }
+                        continue;
+                    }
+                    cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                        int b;
+                        float sqenergy;
+                        dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
+                                                   scaled + w2*128,
+                                                   sce->ics.swb_sizes[g],
+                                                   sce->sf_idx[w*16+g],
+                                                   cb,
+                                                   1.0f,
+                                                   INFINITY,
+                                                   &b, &sqenergy,
+                                                   0);
+                        bits += b;
+                        qenergy += sqenergy;
+                    }
+                    dists[w*16+g] = dist - bits;
+                    qenergies[w*16+g] = qenergy;
+                    if (prev != -1) {
+                        int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
+                        bits += ff_aac_scalefactor_bits[sfdiff];
+                    }
+                    tbits += bits;
+                    start += sce->ics.swb_sizes[g];
+                    prev = sce->sf_idx[w*16+g];
+                }
+            }
+            if (tbits > toomanybits) {
+                recomprd = 1;
+                for (i = 0; i < 128; i++) {
+                    if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
+                        int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
+                        int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
+                        if (new_sf != sce->sf_idx[i]) {
+                            sce->sf_idx[i] = new_sf;
+                            changed = 1;
+                        }
+                    }
+                }
+            } else if (tbits < toofewbits) {
+                recomprd = 1;
+                for (i = 0; i < 128; i++) {
+                    if (sce->sf_idx[i] > SCALE_ONE_POS) {
+                        int new_sf = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep);
+                        if (new_sf != sce->sf_idx[i]) {
+                            sce->sf_idx[i] = new_sf;
+                            changed = 1;
+                        }
+                    }
+                }
+            }
+            qstep >>= 1;
+            if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
+                qstep = 1;
+        } while (qstep);
+
+        overdist = 1;
+        fflag = tbits < toofewbits;
+        for (i = 0; i < 2 && (overdist || recomprd); ++i) {
+            if (recomprd) {
+                /** Must recompute distortion */
+                prev = -1;
+                tbits = 0;
+                for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+                    start = w*128;
+                    for (g = 0;  g < sce->ics.num_swb; g++) {
+                        const float *coefs = sce->coeffs + start;
+                        const float *scaled = s->scoefs + start;
+                        int bits = 0;
+                        int cb;
+                        float dist = 0.0f;
+                        float qenergy = 0.0f;
+
+                        if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
+                            start += sce->ics.swb_sizes[g];
+                            if (sce->can_pns[w*16+g]) {
+                                /** PNS isn't free */
+                                tbits += ff_pns_bits(sce, w, g);
+                            }
+                            continue;
+                        }
+                        cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+                        for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                            int b;
+                            float sqenergy;
+                            dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
+                                                    scaled + w2*128,
+                                                    sce->ics.swb_sizes[g],
+                                                    sce->sf_idx[w*16+g],
+                                                    cb,
+                                                    1.0f,
+                                                    INFINITY,
+                                                    &b, &sqenergy,
+                                                    0);
+                            bits += b;
+                            qenergy += sqenergy;
+                        }
+                        dists[w*16+g] = dist - bits;
+                        qenergies[w*16+g] = qenergy;
+                        if (prev != -1) {
+                            int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
+                            bits += ff_aac_scalefactor_bits[sfdiff];
+                        }
+                        tbits += bits;
+                        start += sce->ics.swb_sizes[g];
+                        prev = sce->sf_idx[w*16+g];
+                    }
+                }
+            }
+            if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {
+                float maxoverdist = 0.0f;
+                float ovrfactor = 1.f+(maxits-its)*16.f/maxits;
+                overdist = recomprd = 0;
+                for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+                    for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
+                        if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
+                            float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
+                            maxoverdist = FFMAX(maxoverdist, ovrdist);
+                            overdist++;
+                        }
+                    }
+                }
+                if (overdist) {
+                    /* We have overdistorted bands, trade for zeroes (that can be noise)
+                     * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
+                     */
+                    float minspread = max_spread_thr_r;
+                    float maxspread = min_spread_thr_r;
+                    float zspread;
+                    int zeroable = 0;
+                    int zeroed = 0;
+                    int maxzeroed, zloop;
+                    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+                        for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
+                            if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
+                                minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
+                                maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
+                                zeroable++;
+                            }
+                        }
+                    }
+                    zspread = (maxspread-minspread) * 0.0125f + minspread;
+                    /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
+                     * and forced the hand of the later search_for_pns step.
+                     * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
+                     * and leave further PNSing to search_for_pns if worthwhile.
+                     */
+                    zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,
+                        ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
+                    maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
+                    for (zloop = 0; zloop < 2; zloop++) {
+                        /* Two passes: first distorted stuff - two birds in one shot and all that,
+                         * then anything viable. Viable means not zero, but either CB=zero-able
+                         * (too high SF), not SF <= 1 (that means we'd be operating at very high
+                         * quality, we don't want PNS when doing VHQ), PNS allowed, and within
+                         * the lowest ranking percentile.
+                         */
+                        float loopovrfactor = (zloop) ? 1.0f : ovrfactor;
+                        int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;
+                        int mcb;
+                        for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
+                            if (sce->ics.swb_offset[g] < pns_start_pos)
+                                continue;
+                            for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+                                if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread
+                                    && sce->sf_idx[w*16+g] > loopminsf
+                                    && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))
+                                        || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {
+                                    sce->zeroes[w*16+g] = 1;
+                                    sce->band_type[w*16+g] = 0;
+                                    zeroed++;
+                                }
+                            }
+                        }
+                    }
+                    if (zeroed)
+                        recomprd = fflag = 1;
+                } else {
+                    overdist = 0;
+                }
+            }
+        }
+
+        minscaler = SCALE_MAX_POS;
+        maxscaler = 0;
+        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+            for (g = 0;  g < sce->ics.num_swb; g++) {
+                if (!sce->zeroes[w*16+g]) {
+                    minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
+                    maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
+                }
+            }
+        }
+
+        minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
+        prev = -1;
+        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+            /** Start with big steps, end up fine-tunning */
+            int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
+            int edepth = depth+2;
+            float uplmax = its / (maxits*0.25f) + 1.0f;
+            uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
+            start = w * 128;
+            for (g = 0; g < sce->ics.num_swb; g++) {
+                int prevsc = sce->sf_idx[w*16+g];
+                if (prev < 0 && !sce->zeroes[w*16+g])
+                    prev = sce->sf_idx[0];
+                if (!sce->zeroes[w*16+g]) {
+                    const float *coefs = sce->coeffs + start;
+                    const float *scaled = s->scoefs + start;
+                    int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+                    int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);
+                    int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);
+                    if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > mindeltasf) {
+                        /* Try to make sure there is some energy in every nonzero band
+                         * NOTE: This algorithm must be forcibly imbalanced, pushing harder
+                         *  on holes or more distorted bands at first, otherwise there's
+                         *  no net gain (since the next iteration will offset all bands
+                         *  on the opposite direction to compensate for extra bits)
+                         */
+                        for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {
+                            int cb, bits;
+                            float dist, qenergy;
+                            int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
+                            cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+                            dist = qenergy = 0.f;
+                            bits = 0;
+                            if (!cb) {
+                                maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
+                            } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
+                                break;
+                            }
+                            /* !g is the DC band, it's important, since quantization error here
+                             * applies to less than a cycle, it creates horrible intermodulation
+                             * distortion if it doesn't stick to what psy requests
+                             */
+                            if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])
+                                maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
+                            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                                int b;
+                                float sqenergy;
+                                dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
+                                                        scaled + w2*128,
+                                                        sce->ics.swb_sizes[g],
+                                                        sce->sf_idx[w*16+g]-1,
+                                                        cb,
+                                                        1.0f,
+                                                        INFINITY,
+                                                        &b, &sqenergy,
+                                                        0);
+                                bits += b;
+                                qenergy += sqenergy;
+                            }
+                            sce->sf_idx[w*16+g]--;
+                            dists[w*16+g] = dist - bits;
+                            qenergies[w*16+g] = qenergy;
+                            if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (
+                                    (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
+                                    && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
+                                ) )) {
+                                break;
+                            }
+                        }
+                    } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])
+                            && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
+                            && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
+                        ) {
+                        /** Um... over target. Save bits for more important stuff. */
+                        for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {
+                            int cb, bits;
+                            float dist, qenergy;
+                            cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
+                            if (cb > 0) {
+                                dist = qenergy = 0.f;
+                                bits = 0;
+                                for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                                    int b;
+                                    float sqenergy;
+                                    dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
+                                                            scaled + w2*128,
+                                                            sce->ics.swb_sizes[g],
+                                                            sce->sf_idx[w*16+g]+1,
+                                                            cb,
+                                                            1.0f,
+                                                            INFINITY,
+                                                            &b, &sqenergy,
+                                                            0);
+                                    bits += b;
+                                    qenergy += sqenergy;
+                                }
+                                dist -= bits;
+                                if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
+                                    sce->sf_idx[w*16+g]++;
+                                    dists[w*16+g] = dist;
+                                    qenergies[w*16+g] = qenergy;
+                                } else {
+                                    break;
+                                }
+                            } else {
+                                maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
+                                break;
+                            }
+                        }
+                    }
+                    prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);
+                    if (sce->sf_idx[w*16+g] != prevsc)
+                        fflag = 1;
+                    nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
+                    sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+                }
+                start += sce->ics.swb_sizes[g];
+            }
+        }
+
+        /** SF difference limit violation risk. Must re-clamp. */
+        prev = -1;
+        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+            for (g = 0; g < sce->ics.num_swb; g++) {
+                if (!sce->zeroes[w*16+g]) {
+                    int prevsf = sce->sf_idx[w*16+g];
+                    if (prev < 0)
+                        prev = prevsf;
+                    sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
+                    sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+                    prev = sce->sf_idx[w*16+g];
+                    if (!fflag && prevsf != sce->sf_idx[w*16+g])
+                        fflag = 1;
+                }
+            }
+        }
+
+        its++;
+    } while (fflag && its < maxits);
+
+    /** Scout out next nonzero bands */
+    ff_init_nextband_map(sce, nextband);
+
+    prev = -1;
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        /** Make sure proper codebooks are set */
+        for (g = 0; g < sce->ics.num_swb; g++) {
+            if (!sce->zeroes[w*16+g]) {
+                sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+                if (sce->band_type[w*16+g] <= 0) {
+                    if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {
+                        /** Cannot zero out, make sure it's not attempted */
+                        sce->band_type[w*16+g] = 1;
+                    } else {
+                        sce->zeroes[w*16+g] = 1;
+                        sce->band_type[w*16+g] = 0;
+                    }
+                }
+            } else {
+                sce->band_type[w*16+g] = 0;
+            }
+            /** Check that there's no SF delta range violations */
+            if (!sce->zeroes[w*16+g]) {
+                if (prev != -1) {
+                    av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
+                    av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
+                } else if (sce->zeroes[0]) {
+                    /** Set global gain to something useful */
+                    sce->sf_idx[0] = sce->sf_idx[w*16+g];
+                }
+                prev = sce->sf_idx[w*16+g];
+            }
+        }
+    }
+}
+
+#endif /* AVCODEC_AACCODER_TWOLOOP_H */
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index 622cc5c0..26bdea1e 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -32,54 +32,9 @@
  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  */
 
-/*
- * supported tools
- *
- * Support?             Name
- * N (code in SoC repo) gain control
- * Y                    block switching
- * Y                    window shapes - standard
- * N                    window shapes - Low Delay
- * Y                    filterbank - standard
- * N (code in SoC repo) filterbank - Scalable Sample Rate
- * Y                    Temporal Noise Shaping
- * Y                    Long Term Prediction
- * Y                    intensity stereo
- * Y                    channel coupling
- * Y                    frequency domain prediction
- * Y                    Perceptual Noise Substitution
- * Y                    Mid/Side stereo
- * N                    Scalable Inverse AAC Quantization
- * N                    Frequency Selective Switch
- * N                    upsampling filter
- * Y                    quantization & coding - AAC
- * N                    quantization & coding - TwinVQ
- * N                    quantization & coding - BSAC
- * N                    AAC Error Resilience tools
- * N                    Error Resilience payload syntax
- * N                    Error Protection tool
- * N                    CELP
- * N                    Silence Compression
- * N                    HVXC
- * N                    HVXC 4kbits/s VR
- * N                    Structured Audio tools
- * N                    Structured Audio Sample Bank Format
- * N                    MIDI
- * N                    Harmonic and Individual Lines plus Noise
- * N                    Text-To-Speech Interface
- * Y                    Spectral Band Replication
- * Y (not in this code) Layer-1
- * Y (not in this code) Layer-2
- * Y (not in this code) Layer-3
- * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
- * Y                    Parametric Stereo
- * N                    Direct Stream Transfer
- * Y                    Enhanced AAC Low Delay (ER AAC ELD)
- *
- * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
- *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
-           Parametric Stereo.
- */
+#define FFT_FLOAT 1
+#define FFT_FIXED_32 0
+#define USE_FIXED 0
 
 #include "libavutil/float_dsp.h"
 #include "libavutil/opt.h"
@@ -100,6 +55,7 @@
 #include "aacsbr.h"
 #include "mpeg4audio.h"
 #include "aacadtsdec.h"
+#include "profiles.h"
 #include "libavutil/intfloat.h"
 
 #include <errno.h>
@@ -108,1450 +64,19 @@
 #include <string.h>
 
 #if ARCH_ARM
-#   include "arm/aac.h"
-#elif ARCH_MIPS
-#   include "mips/aacdec_mips.h"
-#endif
-
-static VLC vlc_scalefactors;
-static VLC vlc_spectral[11];
-
-static int output_configure(AACContext *ac,
-                            uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
-                            enum OCStatus oc_type, int get_new_frame);
-
-#define overread_err "Input buffer exhausted before END element found\n"
-
-static int count_channels(uint8_t (*layout)[3], int tags)
-{
-    int i, sum = 0;
-    for (i = 0; i < tags; i++) {
-        int syn_ele = layout[i][0];
-        int pos     = layout[i][2];
-        sum += (1 + (syn_ele == TYPE_CPE)) *
-               (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC);
-    }
-    return sum;
-}
-
-/**
- * Check for the channel element in the current channel position configuration.
- * If it exists, make sure the appropriate element is allocated and map the
- * channel order to match the internal FFmpeg channel layout.
- *
- * @param   che_pos current channel position configuration
- * @param   type channel element type
- * @param   id channel element id
- * @param   channels count of the number of channels in the configuration
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static av_cold int che_configure(AACContext *ac,
-                                 enum ChannelPosition che_pos,
-                                 int type, int id, int *channels)
-{
-    if (*channels >= MAX_CHANNELS)
-        return AVERROR_INVALIDDATA;
-    if (che_pos) {
-        if (!ac->che[type][id]) {
-            if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
-                return AVERROR(ENOMEM);
-            ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
-        }
-        if (type != TYPE_CCE) {
-            if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) {
-                av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n");
-                return AVERROR_INVALIDDATA;
-            }
-            ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0];
-            if (type == TYPE_CPE ||
-                (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) {
-                ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1];
-            }
-        }
-    } else {
-        if (ac->che[type][id])
-            ff_aac_sbr_ctx_close(&ac->che[type][id]->sbr);
-        av_freep(&ac->che[type][id]);
-    }
-    return 0;
-}
-
-static int frame_configure_elements(AVCodecContext *avctx)
-{
-    AACContext *ac = avctx->priv_data;
-    int type, id, ch, ret;
-
-    /* set channel pointers to internal buffers by default */
-    for (type = 0; type < 4; type++) {
-        for (id = 0; id < MAX_ELEM_ID; id++) {
-            ChannelElement *che = ac->che[type][id];
-            if (che) {
-                che->ch[0].ret = che->ch[0].ret_buf;
-                che->ch[1].ret = che->ch[1].ret_buf;
-            }
-        }
-    }
-
-    /* get output buffer */
-    av_frame_unref(ac->frame);
-    if (!avctx->channels)
-        return 1;
-
-    ac->frame->nb_samples = 2048;
-    if ((ret = ff_get_buffer(avctx, ac->frame, 0)) < 0)
-        return ret;
-
-    /* map output channel pointers to AVFrame data */
-    for (ch = 0; ch < avctx->channels; ch++) {
-        if (ac->output_element[ch])
-            ac->output_element[ch]->ret = (float *)ac->frame->extended_data[ch];
-    }
-
-    return 0;
-}
-
-struct elem_to_channel {
-    uint64_t av_position;
-    uint8_t syn_ele;
-    uint8_t elem_id;
-    uint8_t aac_position;
-};
-
-static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],
-                       uint8_t (*layout_map)[3], int offset, uint64_t left,
-                       uint64_t right, int pos)
-{
-    if (layout_map[offset][0] == TYPE_CPE) {
-        e2c_vec[offset] = (struct elem_to_channel) {
-            .av_position  = left | right,
-            .syn_ele      = TYPE_CPE,
-            .elem_id      = layout_map[offset][1],
-            .aac_position = pos
-        };
-        return 1;
-    } else {
-        e2c_vec[offset] = (struct elem_to_channel) {
-            .av_position  = left,
-            .syn_ele      = TYPE_SCE,
-            .elem_id      = layout_map[offset][1],
-            .aac_position = pos
-        };
-        e2c_vec[offset + 1] = (struct elem_to_channel) {
-            .av_position  = right,
-            .syn_ele      = TYPE_SCE,
-            .elem_id      = layout_map[offset + 1][1],
-            .aac_position = pos
-        };
-        return 2;
-    }
-}
-
-static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos,
-                                 int *current)
-{
-    int num_pos_channels = 0;
-    int first_cpe        = 0;
-    int sce_parity       = 0;
-    int i;
-    for (i = *current; i < tags; i++) {
-        if (layout_map[i][2] != pos)
-            break;
-        if (layout_map[i][0] == TYPE_CPE) {
-            if (sce_parity) {
-                if (pos == AAC_CHANNEL_FRONT && !first_cpe) {
-                    sce_parity = 0;
-                } else {
-                    return -1;
-                }
-            }
-            num_pos_channels += 2;
-            first_cpe         = 1;
-        } else {
-            num_pos_channels++;
-            sce_parity ^= 1;
-        }
-    }
-    if (sce_parity &&
-        ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE))
-        return -1;
-    *current = i;
-    return num_pos_channels;
-}
-
-static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags)
-{
-    int i, n, total_non_cc_elements;
-    struct elem_to_channel e2c_vec[4 * MAX_ELEM_ID] = { { 0 } };
-    int num_front_channels, num_side_channels, num_back_channels;
-    uint64_t layout;
-
-    if (FF_ARRAY_ELEMS(e2c_vec) < tags)
-        return 0;
-
-    i = 0;
-    num_front_channels =
-        count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i);
-    if (num_front_channels < 0)
-        return 0;
-    num_side_channels =
-        count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i);
-    if (num_side_channels < 0)
-        return 0;
-    num_back_channels =
-        count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i);
-    if (num_back_channels < 0)
-        return 0;
-
-    if (num_side_channels == 0 && num_back_channels >= 4) {
-        num_side_channels = 2;
-        num_back_channels -= 2;
-    }
-
-    i = 0;
-    if (num_front_channels & 1) {
-        e2c_vec[i] = (struct elem_to_channel) {
-            .av_position  = AV_CH_FRONT_CENTER,
-            .syn_ele      = TYPE_SCE,
-            .elem_id      = layout_map[i][1],
-            .aac_position = AAC_CHANNEL_FRONT
-        };
-        i++;
-        num_front_channels--;
-    }
-    if (num_front_channels >= 4) {
-        i += assign_pair(e2c_vec, layout_map, i,
-                         AV_CH_FRONT_LEFT_OF_CENTER,
-                         AV_CH_FRONT_RIGHT_OF_CENTER,
-                         AAC_CHANNEL_FRONT);
-        num_front_channels -= 2;
-    }
-    if (num_front_channels >= 2) {
-        i += assign_pair(e2c_vec, layout_map, i,
-                         AV_CH_FRONT_LEFT,
-                         AV_CH_FRONT_RIGHT,
-                         AAC_CHANNEL_FRONT);
-        num_front_channels -= 2;
-    }
-    while (num_front_channels >= 2) {
-        i += assign_pair(e2c_vec, layout_map, i,
-                         UINT64_MAX,
-                         UINT64_MAX,
-                         AAC_CHANNEL_FRONT);
-        num_front_channels -= 2;
-    }
-
-    if (num_side_channels >= 2) {
-        i += assign_pair(e2c_vec, layout_map, i,
-                         AV_CH_SIDE_LEFT,
-                         AV_CH_SIDE_RIGHT,
-                         AAC_CHANNEL_FRONT);
-        num_side_channels -= 2;
-    }
-    while (num_side_channels >= 2) {
-        i += assign_pair(e2c_vec, layout_map, i,
-                         UINT64_MAX,
-                         UINT64_MAX,
-                         AAC_CHANNEL_SIDE);
-        num_side_channels -= 2;
-    }
-
-    while (num_back_channels >= 4) {
-        i += assign_pair(e2c_vec, layout_map, i,
-                         UINT64_MAX,
-                         UINT64_MAX,
-                         AAC_CHANNEL_BACK);
-        num_back_channels -= 2;
-    }
-    if (num_back_channels >= 2) {
-        i += assign_pair(e2c_vec, layout_map, i,
-                         AV_CH_BACK_LEFT,
-                         AV_CH_BACK_RIGHT,
-                         AAC_CHANNEL_BACK);
-        num_back_channels -= 2;
-    }
-    if (num_back_channels) {
-        e2c_vec[i] = (struct elem_to_channel) {
-            .av_position  = AV_CH_BACK_CENTER,
-            .syn_ele      = TYPE_SCE,
-            .elem_id      = layout_map[i][1],
-            .aac_position = AAC_CHANNEL_BACK
-        };
-        i++;
-        num_back_channels--;
-    }
-
-    if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
-        e2c_vec[i] = (struct elem_to_channel) {
-            .av_position  = AV_CH_LOW_FREQUENCY,
-            .syn_ele      = TYPE_LFE,
-            .elem_id      = layout_map[i][1],
-            .aac_position = AAC_CHANNEL_LFE
-        };
-        i++;
-    }
-    while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
-        e2c_vec[i] = (struct elem_to_channel) {
-            .av_position  = UINT64_MAX,
-            .syn_ele      = TYPE_LFE,
-            .elem_id      = layout_map[i][1],
-            .aac_position = AAC_CHANNEL_LFE
-        };
-        i++;
-    }
-
-    // Must choose a stable sort
-    total_non_cc_elements = n = i;
-    do {
-        int next_n = 0;
-        for (i = 1; i < n; i++)
-            if (e2c_vec[i - 1].av_position > e2c_vec[i].av_position) {
-                FFSWAP(struct elem_to_channel, e2c_vec[i - 1], e2c_vec[i]);
-                next_n = i;
-            }
-        n = next_n;
-    } while (n > 0);
-
-    layout = 0;
-    for (i = 0; i < total_non_cc_elements; i++) {
-        layout_map[i][0] = e2c_vec[i].syn_ele;
-        layout_map[i][1] = e2c_vec[i].elem_id;
-        layout_map[i][2] = e2c_vec[i].aac_position;
-        if (e2c_vec[i].av_position != UINT64_MAX) {
-            layout |= e2c_vec[i].av_position;
-        }
-    }
-
-    return layout;
-}
-
-/**
- * Save current output configuration if and only if it has been locked.
- */
-static void push_output_configuration(AACContext *ac) {
-    if (ac->oc[1].status == OC_LOCKED || ac->oc[0].status == OC_NONE) {
-        ac->oc[0] = ac->oc[1];
-    }
-    ac->oc[1].status = OC_NONE;
-}
-
-/**
- * Restore the previous output configuration if and only if the current
- * configuration is unlocked.
- */
-static void pop_output_configuration(AACContext *ac) {
-    if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) {
-        ac->oc[1] = ac->oc[0];
-        ac->avctx->channels = ac->oc[1].channels;
-        ac->avctx->channel_layout = ac->oc[1].channel_layout;
-        output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
-                         ac->oc[1].status, 0);
-    }
-}
-
-/**
- * Configure output channel order based on the current program
- * configuration element.
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int output_configure(AACContext *ac,
-                            uint8_t layout_map[MAX_ELEM_ID * 4][3], int tags,
-                            enum OCStatus oc_type, int get_new_frame)
-{
-    AVCodecContext *avctx = ac->avctx;
-    int i, channels = 0, ret;
-    uint64_t layout = 0;
-    uint8_t id_map[TYPE_END][MAX_ELEM_ID] = {{ 0 }};
-    uint8_t type_counts[TYPE_END] = { 0 };
-
-    if (ac->oc[1].layout_map != layout_map) {
-        memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0]));
-        ac->oc[1].layout_map_tags = tags;
-    }
-    for (i = 0; i < tags; i++) {
-        int type =         layout_map[i][0];
-        int id =           layout_map[i][1];
-        id_map[type][id] = type_counts[type]++;
-    }
-    // Try to sniff a reasonable channel order, otherwise output the
-    // channels in the order the PCE declared them.
-    if (avctx->request_channel_layout != AV_CH_LAYOUT_NATIVE)
-        layout = sniff_channel_order(layout_map, tags);
-    for (i = 0; i < tags; i++) {
-        int type =     layout_map[i][0];
-        int id =       layout_map[i][1];
-        int iid =      id_map[type][id];
-        int position = layout_map[i][2];
-        // Allocate or free elements depending on if they are in the
-        // current program configuration.
-        ret = che_configure(ac, position, type, iid, &channels);
-        if (ret < 0)
-            return ret;
-        ac->tag_che_map[type][id] = ac->che[type][iid];
-    }
-    if (ac->oc[1].m4ac.ps == 1 && channels == 2) {
-        if (layout == AV_CH_FRONT_CENTER) {
-            layout = AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT;
-        } else {
-            layout = 0;
-        }
-    }
-
-    if (layout) avctx->channel_layout = layout;
-                            ac->oc[1].channel_layout = layout;
-    avctx->channels       = ac->oc[1].channels       = channels;
-    ac->oc[1].status = oc_type;
-
-    if (get_new_frame) {
-        if ((ret = frame_configure_elements(ac->avctx)) < 0)
-            return ret;
-    }
-
-    return 0;
-}
-
-static void flush(AVCodecContext *avctx)
-{
-    AACContext *ac= avctx->priv_data;
-    int type, i, j;
-
-    for (type = 3; type >= 0; type--) {
-        for (i = 0; i < MAX_ELEM_ID; i++) {
-            ChannelElement *che = ac->che[type][i];
-            if (che) {
-                for (j = 0; j <= 1; j++) {
-                    memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved));
-                }
-            }
-        }
-    }
-}
-
-/**
- * Set up channel positions based on a default channel configuration
- * as specified in table 1.17.
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int set_default_channel_config(AVCodecContext *avctx,
-                                      uint8_t (*layout_map)[3],
-                                      int *tags,
-                                      int channel_config)
-{
-    if (channel_config < 1 || (channel_config > 7 && channel_config < 11) ||
-        channel_config > 12) {
-        av_log(avctx, AV_LOG_ERROR,
-               "invalid default channel configuration (%d)\n",
-               channel_config);
-        return AVERROR_INVALIDDATA;
-    }
-    *tags = tags_per_config[channel_config];
-    memcpy(layout_map, aac_channel_layout_map[channel_config - 1],
-           *tags * sizeof(*layout_map));
-
-    /*
-     * AAC specification has 7.1(wide) as a default layout for 8-channel streams.
-     * However, at least Nero AAC encoder encodes 7.1 streams using the default
-     * channel config 7, mapping the side channels of the original audio stream
-     * to the second AAC_CHANNEL_FRONT pair in the AAC stream. Similarly, e.g. FAAD
-     * decodes the second AAC_CHANNEL_FRONT pair as side channels, therefore decoding
-     * the incorrect streams as if they were correct (and as the encoder intended).
-     *
-     * As actual intended 7.1(wide) streams are very rare, default to assuming a
-     * 7.1 layout was intended.
-     */
-    if (channel_config == 7 && avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) {
-        av_log(avctx, AV_LOG_INFO, "Assuming an incorrectly encoded 7.1 channel layout"
-               " instead of a spec-compliant 7.1(wide) layout, use -strict %d to decode"
-               " according to the specification instead.\n", FF_COMPLIANCE_STRICT);
-        layout_map[2][2] = AAC_CHANNEL_SIDE;
-    }
-
-    return 0;
-}
-
-static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
-{
-    /* For PCE based channel configurations map the channels solely based
-     * on tags. */
-    if (!ac->oc[1].m4ac.chan_config) {
-        return ac->tag_che_map[type][elem_id];
-    }
-    // Allow single CPE stereo files to be signalled with mono configuration.
-    if (!ac->tags_mapped && type == TYPE_CPE &&
-        ac->oc[1].m4ac.chan_config == 1) {
-        uint8_t layout_map[MAX_ELEM_ID*4][3];
-        int layout_map_tags;
-        push_output_configuration(ac);
-
-        av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n");
-
-        if (set_default_channel_config(ac->avctx, layout_map,
-                                       &layout_map_tags, 2) < 0)
-            return NULL;
-        if (output_configure(ac, layout_map, layout_map_tags,
-                             OC_TRIAL_FRAME, 1) < 0)
-            return NULL;
-
-        ac->oc[1].m4ac.chan_config = 2;
-        ac->oc[1].m4ac.ps = 0;
-    }
-    // And vice-versa
-    if (!ac->tags_mapped && type == TYPE_SCE &&
-        ac->oc[1].m4ac.chan_config == 2) {
-        uint8_t layout_map[MAX_ELEM_ID * 4][3];
-        int layout_map_tags;
-        push_output_configuration(ac);
-
-        av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n");
-
-        if (set_default_channel_config(ac->avctx, layout_map,
-                                       &layout_map_tags, 1) < 0)
-            return NULL;
-        if (output_configure(ac, layout_map, layout_map_tags,
-                             OC_TRIAL_FRAME, 1) < 0)
-            return NULL;
-
-        ac->oc[1].m4ac.chan_config = 1;
-        if (ac->oc[1].m4ac.sbr)
-            ac->oc[1].m4ac.ps = -1;
-    }
-    /* For indexed channel configurations map the channels solely based
-     * on position. */
-    switch (ac->oc[1].m4ac.chan_config) {
-    case 12:
-    case 7:
-        if (ac->tags_mapped == 3 && type == TYPE_CPE) {
-            ac->tags_mapped++;
-            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
-        }
-    case 11:
-        if (ac->tags_mapped == 2 &&
-            ac->oc[1].m4ac.chan_config == 11 &&
-            type == TYPE_SCE) {
-            ac->tags_mapped++;
-            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
-        }
-    case 6:
-        /* Some streams incorrectly code 5.1 audio as
-         * SCE[0] CPE[0] CPE[1] SCE[1]
-         * instead of
-         * SCE[0] CPE[0] CPE[1] LFE[0].
-         * If we seem to have encountered such a stream, transfer
-         * the LFE[0] element to the SCE[1]'s mapping */
-        if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
-            if (!ac->warned_remapping_once && (type != TYPE_LFE || elem_id != 0)) {
-                av_log(ac->avctx, AV_LOG_WARNING,
-                   "This stream seems to incorrectly report its last channel as %s[%d], mapping to LFE[0]\n",
-                   type == TYPE_SCE ? "SCE" : "LFE", elem_id);
-                ac->warned_remapping_once++;
-            }
-            ac->tags_mapped++;
-            return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
-        }
-    case 5:
-        if (ac->tags_mapped == 2 && type == TYPE_CPE) {
-            ac->tags_mapped++;
-            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
-        }
-    case 4:
-        /* Some streams incorrectly code 4.0 audio as
-         * SCE[0] CPE[0] LFE[0]
-         * instead of
-         * SCE[0] CPE[0] SCE[1].
-         * If we seem to have encountered such a stream, transfer
-         * the SCE[1] element to the LFE[0]'s mapping */
-        if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
-            if (!ac->warned_remapping_once && (type != TYPE_SCE || elem_id != 1)) {
-                av_log(ac->avctx, AV_LOG_WARNING,
-                   "This stream seems to incorrectly report its last channel as %s[%d], mapping to SCE[1]\n",
-                   type == TYPE_SCE ? "SCE" : "LFE", elem_id);
-                ac->warned_remapping_once++;
-            }
-            ac->tags_mapped++;
-            return ac->tag_che_map[type][elem_id] = ac->che[TYPE_SCE][1];
-        }
-        if (ac->tags_mapped == 2 &&
-            ac->oc[1].m4ac.chan_config == 4 &&
-            type == TYPE_SCE) {
-            ac->tags_mapped++;
-            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
-        }
-    case 3:
-    case 2:
-        if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) &&
-            type == TYPE_CPE) {
-            ac->tags_mapped++;
-            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
-        } else if (ac->oc[1].m4ac.chan_config == 2) {
-            return NULL;
-        }
-    case 1:
-        if (!ac->tags_mapped && type == TYPE_SCE) {
-            ac->tags_mapped++;
-            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
-        }
-    default:
-        return NULL;
-    }
-}
-
-/**
- * Decode an array of 4 bit element IDs, optionally interleaved with a
- * stereo/mono switching bit.
- *
- * @param type speaker type/position for these channels
- */
-static void decode_channel_map(uint8_t layout_map[][3],
-                               enum ChannelPosition type,
-                               GetBitContext *gb, int n)
-{
-    while (n--) {
-        enum RawDataBlockType syn_ele;
-        switch (type) {
-        case AAC_CHANNEL_FRONT:
-        case AAC_CHANNEL_BACK:
-        case AAC_CHANNEL_SIDE:
-            syn_ele = get_bits1(gb);
-            break;
-        case AAC_CHANNEL_CC:
-            skip_bits1(gb);
-            syn_ele = TYPE_CCE;
-            break;
-        case AAC_CHANNEL_LFE:
-            syn_ele = TYPE_LFE;
-            break;
-        default:
-            // AAC_CHANNEL_OFF has no channel map
-            av_assert0(0);
-        }
-        layout_map[0][0] = syn_ele;
-        layout_map[0][1] = get_bits(gb, 4);
-        layout_map[0][2] = type;
-        layout_map++;
-    }
-}
-
-/**
- * Decode program configuration element; reference: table 4.2.
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
-                      uint8_t (*layout_map)[3],
-                      GetBitContext *gb)
-{
-    int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc;
-    int sampling_index;
-    int comment_len;
-    int tags;
-
-    skip_bits(gb, 2);  // object_type
-
-    sampling_index = get_bits(gb, 4);
-    if (m4ac->sampling_index != sampling_index)
-        av_log(avctx, AV_LOG_WARNING,
-               "Sample rate index in program config element does not "
-               "match the sample rate index configured by the container.\n");
-
-    num_front       = get_bits(gb, 4);
-    num_side        = get_bits(gb, 4);
-    num_back        = get_bits(gb, 4);
-    num_lfe         = get_bits(gb, 2);
-    num_assoc_data  = get_bits(gb, 3);
-    num_cc          = get_bits(gb, 4);
-
-    if (get_bits1(gb))
-        skip_bits(gb, 4); // mono_mixdown_tag
-    if (get_bits1(gb))
-        skip_bits(gb, 4); // stereo_mixdown_tag
-
-    if (get_bits1(gb))
-        skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
-
-    if (get_bits_left(gb) < 4 * (num_front + num_side + num_back + num_lfe + num_assoc_data + num_cc)) {
-        av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
-        return -1;
-    }
-    decode_channel_map(layout_map       , AAC_CHANNEL_FRONT, gb, num_front);
-    tags = num_front;
-    decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE,  gb, num_side);
-    tags += num_side;
-    decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK,  gb, num_back);
-    tags += num_back;
-    decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE,   gb, num_lfe);
-    tags += num_lfe;
-
-    skip_bits_long(gb, 4 * num_assoc_data);
-
-    decode_channel_map(layout_map + tags, AAC_CHANNEL_CC,    gb, num_cc);
-    tags += num_cc;
-
-    align_get_bits(gb);
-
-    /* comment field, first byte is length */
-    comment_len = get_bits(gb, 8) * 8;
-    if (get_bits_left(gb) < comment_len) {
-        av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
-        return AVERROR_INVALIDDATA;
-    }
-    skip_bits_long(gb, comment_len);
-    return tags;
-}
-
-/**
- * Decode GA "General Audio" specific configuration; reference: table 4.1.
- *
- * @param   ac          pointer to AACContext, may be null
- * @param   avctx       pointer to AVCCodecContext, used for logging
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
-                                     GetBitContext *gb,
-                                     MPEG4AudioConfig *m4ac,
-                                     int channel_config)
-{
-    int extension_flag, ret, ep_config, res_flags;
-    uint8_t layout_map[MAX_ELEM_ID*4][3];
-    int tags = 0;
-
-    if (get_bits1(gb)) { // frameLengthFlag
-        avpriv_request_sample(avctx, "960/120 MDCT window");
-        return AVERROR_PATCHWELCOME;
-    }
-    m4ac->frame_length_short = 0;
-
-    if (get_bits1(gb))       // dependsOnCoreCoder
-        skip_bits(gb, 14);   // coreCoderDelay
-    extension_flag = get_bits1(gb);
-
-    if (m4ac->object_type == AOT_AAC_SCALABLE ||
-        m4ac->object_type == AOT_ER_AAC_SCALABLE)
-        skip_bits(gb, 3);     // layerNr
-
-    if (channel_config == 0) {
-        skip_bits(gb, 4);  // element_instance_tag
-        tags = decode_pce(avctx, m4ac, layout_map, gb);
-        if (tags < 0)
-            return tags;
-    } else {
-        if ((ret = set_default_channel_config(avctx, layout_map,
-                                              &tags, channel_config)))
-            return ret;
-    }
-
-    if (count_channels(layout_map, tags) > 1) {
-        m4ac->ps = 0;
-    } else if (m4ac->sbr == 1 && m4ac->ps == -1)
-        m4ac->ps = 1;
-
-    if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
-        return ret;
-
-    if (extension_flag) {
-        switch (m4ac->object_type) {
-        case AOT_ER_BSAC:
-            skip_bits(gb, 5);    // numOfSubFrame
-            skip_bits(gb, 11);   // layer_length
-            break;
-        case AOT_ER_AAC_LC:
-        case AOT_ER_AAC_LTP:
-        case AOT_ER_AAC_SCALABLE:
-        case AOT_ER_AAC_LD:
-            res_flags = get_bits(gb, 3);
-            if (res_flags) {
-                avpriv_report_missing_feature(avctx,
-                                              "AAC data resilience (flags %x)",
-                                              res_flags);
-                return AVERROR_PATCHWELCOME;
-            }
-            break;
-        }
-        skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
-    }
-    switch (m4ac->object_type) {
-    case AOT_ER_AAC_LC:
-    case AOT_ER_AAC_LTP:
-    case AOT_ER_AAC_SCALABLE:
-    case AOT_ER_AAC_LD:
-        ep_config = get_bits(gb, 2);
-        if (ep_config) {
-            avpriv_report_missing_feature(avctx,
-                                          "epConfig %d", ep_config);
-            return AVERROR_PATCHWELCOME;
-        }
-    }
-    return 0;
-}
-
-static int decode_eld_specific_config(AACContext *ac, AVCodecContext *avctx,
-                                     GetBitContext *gb,
-                                     MPEG4AudioConfig *m4ac,
-                                     int channel_config)
-{
-    int ret, ep_config, res_flags;
-    uint8_t layout_map[MAX_ELEM_ID*4][3];
-    int tags = 0;
-    const int ELDEXT_TERM = 0;
-
-    m4ac->ps  = 0;
-    m4ac->sbr = 0;
-
-    m4ac->frame_length_short = get_bits1(gb);
-    res_flags = get_bits(gb, 3);
-    if (res_flags) {
-        avpriv_report_missing_feature(avctx,
-                                      "AAC data resilience (flags %x)",
-                                      res_flags);
-        return AVERROR_PATCHWELCOME;
-    }
-
-    if (get_bits1(gb)) { // ldSbrPresentFlag
-        avpriv_report_missing_feature(avctx,
-                                      "Low Delay SBR");
-        return AVERROR_PATCHWELCOME;
-    }
-
-    while (get_bits(gb, 4) != ELDEXT_TERM) {
-        int len = get_bits(gb, 4);
-        if (len == 15)
-            len += get_bits(gb, 8);
-        if (len == 15 + 255)
-            len += get_bits(gb, 16);
-        if (get_bits_left(gb) < len * 8 + 4) {
-            av_log(avctx, AV_LOG_ERROR, overread_err);
-            return AVERROR_INVALIDDATA;
-        }
-        skip_bits_long(gb, 8 * len);
-    }
-
-    if ((ret = set_default_channel_config(avctx, layout_map,
-                                          &tags, channel_config)))
-        return ret;
-
-    if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
-        return ret;
-
-    ep_config = get_bits(gb, 2);
-    if (ep_config) {
-        avpriv_report_missing_feature(avctx,
-                                      "epConfig %d", ep_config);
-        return AVERROR_PATCHWELCOME;
-    }
-    return 0;
-}
-
-/**
- * Decode audio specific configuration; reference: table 1.13.
- *
- * @param   ac          pointer to AACContext, may be null
- * @param   avctx       pointer to AVCCodecContext, used for logging
- * @param   m4ac        pointer to MPEG4AudioConfig, used for parsing
- * @param   data        pointer to buffer holding an audio specific config
- * @param   bit_size    size of audio specific config or data in bits
- * @param   sync_extension look for an appended sync extension
- *
- * @return  Returns error status or number of consumed bits. <0 - error
- */
-static int decode_audio_specific_config(AACContext *ac,
-                                        AVCodecContext *avctx,
-                                        MPEG4AudioConfig *m4ac,
-                                        const uint8_t *data, int bit_size,
-                                        int sync_extension)
-{
-    GetBitContext gb;
-    int i, ret;
-
-    ff_dlog(avctx, "audio specific config size %d\n", bit_size >> 3);
-    for (i = 0; i < bit_size >> 3; i++)
-        ff_dlog(avctx, "%02x ", data[i]);
-    ff_dlog(avctx, "\n");
-
-    if ((ret = init_get_bits(&gb, data, bit_size)) < 0)
-        return ret;
-
-    if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size,
-                                          sync_extension)) < 0)
-        return AVERROR_INVALIDDATA;
-    if (m4ac->sampling_index > 12) {
-        av_log(avctx, AV_LOG_ERROR,
-               "invalid sampling rate index %d\n",
-               m4ac->sampling_index);
-        return AVERROR_INVALIDDATA;
-    }
-    if (m4ac->object_type == AOT_ER_AAC_LD &&
-        (m4ac->sampling_index < 3 || m4ac->sampling_index > 7)) {
-        av_log(avctx, AV_LOG_ERROR,
-               "invalid low delay sampling rate index %d\n",
-               m4ac->sampling_index);
-        return AVERROR_INVALIDDATA;
-    }
-
-    skip_bits_long(&gb, i);
-
-    switch (m4ac->object_type) {
-    case AOT_AAC_MAIN:
-    case AOT_AAC_LC:
-    case AOT_AAC_LTP:
-    case AOT_ER_AAC_LC:
-    case AOT_ER_AAC_LD:
-        if ((ret = decode_ga_specific_config(ac, avctx, &gb,
-                                            m4ac, m4ac->chan_config)) < 0)
-            return ret;
-        break;
-    case AOT_ER_AAC_ELD:
-        if ((ret = decode_eld_specific_config(ac, avctx, &gb,
-                                              m4ac, m4ac->chan_config)) < 0)
-            return ret;
-        break;
-    default:
-        avpriv_report_missing_feature(avctx,
-                                      "Audio object type %s%d",
-                                      m4ac->sbr == 1 ? "SBR+" : "",
-                                      m4ac->object_type);
-        return AVERROR(ENOSYS);
-    }
-
-    ff_dlog(avctx,
-            "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
-            m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
-            m4ac->sample_rate, m4ac->sbr,
-            m4ac->ps);
-
-    return get_bits_count(&gb);
-}
-
-/**
- * linear congruential pseudorandom number generator
- *
- * @param   previous_val    pointer to the current state of the generator
- *
- * @return  Returns a 32-bit pseudorandom integer
- */
-static av_always_inline int lcg_random(unsigned previous_val)
-{
-    union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
-    return v.s;
-}
-
-static av_always_inline void reset_predict_state(PredictorState *ps)
-{
-    ps->r0   = 0.0f;
-    ps->r1   = 0.0f;
-    ps->cor0 = 0.0f;
-    ps->cor1 = 0.0f;
-    ps->var0 = 1.0f;
-    ps->var1 = 1.0f;
-}
-
-static void reset_all_predictors(PredictorState *ps)
-{
-    int i;
-    for (i = 0; i < MAX_PREDICTORS; i++)
-        reset_predict_state(&ps[i]);
-}
-
-static int sample_rate_idx (int rate)
-{
-         if (92017 <= rate) return 0;
-    else if (75132 <= rate) return 1;
-    else if (55426 <= rate) return 2;
-    else if (46009 <= rate) return 3;
-    else if (37566 <= rate) return 4;
-    else if (27713 <= rate) return 5;
-    else if (23004 <= rate) return 6;
-    else if (18783 <= rate) return 7;
-    else if (13856 <= rate) return 8;
-    else if (11502 <= rate) return 9;
-    else if (9391  <= rate) return 10;
-    else                    return 11;
-}
-
-static void reset_predictor_group(PredictorState *ps, int group_num)
-{
-    int i;
-    for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
-        reset_predict_state(&ps[i]);
-}
-
-#define AAC_INIT_VLC_STATIC(num, size)                                     \
-    INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num],     \
-         ff_aac_spectral_bits[num], sizeof(ff_aac_spectral_bits[num][0]),  \
-                                    sizeof(ff_aac_spectral_bits[num][0]),  \
-        ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), \
-                                    sizeof(ff_aac_spectral_codes[num][0]), \
-        size);
-
-static void aacdec_init(AACContext *ac);
-
-static av_cold int aac_decode_init(AVCodecContext *avctx)
-{
-    AACContext *ac = avctx->priv_data;
-    int ret;
-
-    ac->avctx = avctx;
-    ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
-
-    aacdec_init(ac);
-
-    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
-
-    if (avctx->extradata_size > 0) {
-        if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
-                                                avctx->extradata,
-                                                avctx->extradata_size * 8,
-                                                1)) < 0)
-            return ret;
-    } else {
-        int sr, i;
-        uint8_t layout_map[MAX_ELEM_ID*4][3];
-        int layout_map_tags;
-
-        sr = sample_rate_idx(avctx->sample_rate);
-        ac->oc[1].m4ac.sampling_index = sr;
-        ac->oc[1].m4ac.channels = avctx->channels;
-        ac->oc[1].m4ac.sbr = -1;
-        ac->oc[1].m4ac.ps = -1;
-
-        for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
-            if (ff_mpeg4audio_channels[i] == avctx->channels)
-                break;
-        if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) {
-            i = 0;
-        }
-        ac->oc[1].m4ac.chan_config = i;
-
-        if (ac->oc[1].m4ac.chan_config) {
-            int ret = set_default_channel_config(avctx, layout_map,
-                &layout_map_tags, ac->oc[1].m4ac.chan_config);
-            if (!ret)
-                output_configure(ac, layout_map, layout_map_tags,
-                                 OC_GLOBAL_HDR, 0);
-            else if (avctx->err_recognition & AV_EF_EXPLODE)
-                return AVERROR_INVALIDDATA;
-        }
-    }
-
-    if (avctx->channels > MAX_CHANNELS) {
-        av_log(avctx, AV_LOG_ERROR, "Too many channels\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    AAC_INIT_VLC_STATIC( 0, 304);
-    AAC_INIT_VLC_STATIC( 1, 270);
-    AAC_INIT_VLC_STATIC( 2, 550);
-    AAC_INIT_VLC_STATIC( 3, 300);
-    AAC_INIT_VLC_STATIC( 4, 328);
-    AAC_INIT_VLC_STATIC( 5, 294);
-    AAC_INIT_VLC_STATIC( 6, 306);
-    AAC_INIT_VLC_STATIC( 7, 268);
-    AAC_INIT_VLC_STATIC( 8, 510);
-    AAC_INIT_VLC_STATIC( 9, 366);
-    AAC_INIT_VLC_STATIC(10, 462);
-
-    ff_aac_sbr_init();
-
-    ac->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
-    if (!ac->fdsp) {
-        return AVERROR(ENOMEM);
-    }
-
-    ac->random_state = 0x1f2e3d4c;
-
-    ff_aac_tableinit();
-
-    INIT_VLC_STATIC(&vlc_scalefactors, 7,
-                    FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
-                    ff_aac_scalefactor_bits,
-                    sizeof(ff_aac_scalefactor_bits[0]),
-                    sizeof(ff_aac_scalefactor_bits[0]),
-                    ff_aac_scalefactor_code,
-                    sizeof(ff_aac_scalefactor_code[0]),
-                    sizeof(ff_aac_scalefactor_code[0]),
-                    352);
-
-    ff_mdct_init(&ac->mdct,       11, 1, 1.0 / (32768.0 * 1024.0));
-    ff_mdct_init(&ac->mdct_ld,    10, 1, 1.0 / (32768.0 * 512.0));
-    ff_mdct_init(&ac->mdct_small,  8, 1, 1.0 / (32768.0 * 128.0));
-    ff_mdct_init(&ac->mdct_ltp,   11, 0, -2.0 * 32768.0);
-    ret = ff_imdct15_init(&ac->mdct480, 5);
-    if (ret < 0)
-        return ret;
-
-    // window initialization
-    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
-    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
-    ff_init_ff_sine_windows(10);
-    ff_init_ff_sine_windows( 9);
-    ff_init_ff_sine_windows( 7);
-
-    cbrt_tableinit();
-
-    return 0;
-}
-
-/**
- * Skip data_stream_element; reference: table 4.10.
- */
-static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
-{
-    int byte_align = get_bits1(gb);
-    int count = get_bits(gb, 8);
-    if (count == 255)
-        count += get_bits(gb, 8);
-    if (byte_align)
-        align_get_bits(gb);
-
-    if (get_bits_left(gb) < 8 * count) {
-        av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err);
-        return AVERROR_INVALIDDATA;
-    }
-    skip_bits_long(gb, 8 * count);
-    return 0;
-}
-
-static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
-                             GetBitContext *gb)
-{
-    int sfb;
-    if (get_bits1(gb)) {
-        ics->predictor_reset_group = get_bits(gb, 5);
-        if (ics->predictor_reset_group == 0 ||
-            ics->predictor_reset_group > 30) {
-            av_log(ac->avctx, AV_LOG_ERROR,
-                   "Invalid Predictor Reset Group.\n");
-            return AVERROR_INVALIDDATA;
-        }
-    }
-    for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) {
-        ics->prediction_used[sfb] = get_bits1(gb);
-    }
-    return 0;
-}
-
-/**
- * Decode Long Term Prediction data; reference: table 4.xx.
- */
-static void decode_ltp(LongTermPrediction *ltp,
-                       GetBitContext *gb, uint8_t max_sfb)
-{
-    int sfb;
-
-    ltp->lag  = get_bits(gb, 11);
-    ltp->coef = ltp_coef[get_bits(gb, 3)];
-    for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
-        ltp->used[sfb] = get_bits1(gb);
-}
-
-/**
- * Decode Individual Channel Stream info; reference: table 4.6.
- */
-static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
-                           GetBitContext *gb)
-{
-    const MPEG4AudioConfig *const m4ac = &ac->oc[1].m4ac;
-    const int aot = m4ac->object_type;
-    const int sampling_index = m4ac->sampling_index;
-    if (aot != AOT_ER_AAC_ELD) {
-        if (get_bits1(gb)) {
-            av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
-            if (ac->avctx->err_recognition & AV_EF_BITSTREAM)
-                return AVERROR_INVALIDDATA;
-        }
-        ics->window_sequence[1] = ics->window_sequence[0];
-        ics->window_sequence[0] = get_bits(gb, 2);
-        if (aot == AOT_ER_AAC_LD &&
-            ics->window_sequence[0] != ONLY_LONG_SEQUENCE) {
-            av_log(ac->avctx, AV_LOG_ERROR,
-                   "AAC LD is only defined for ONLY_LONG_SEQUENCE but "
-                   "window sequence %d found.\n", ics->window_sequence[0]);
-            ics->window_sequence[0] = ONLY_LONG_SEQUENCE;
-            return AVERROR_INVALIDDATA;
-        }
-        ics->use_kb_window[1]   = ics->use_kb_window[0];
-        ics->use_kb_window[0]   = get_bits1(gb);
-    }
-    ics->num_window_groups  = 1;
-    ics->group_len[0]       = 1;
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        int i;
-        ics->max_sfb = get_bits(gb, 4);
-        for (i = 0; i < 7; i++) {
-            if (get_bits1(gb)) {
-                ics->group_len[ics->num_window_groups - 1]++;
-            } else {
-                ics->num_window_groups++;
-                ics->group_len[ics->num_window_groups - 1] = 1;
-            }
-        }
-        ics->num_windows       = 8;
-        ics->swb_offset        =    ff_swb_offset_128[sampling_index];
-        ics->num_swb           =   ff_aac_num_swb_128[sampling_index];
-        ics->tns_max_bands     = ff_tns_max_bands_128[sampling_index];
-        ics->predictor_present = 0;
-    } else {
-        ics->max_sfb           = get_bits(gb, 6);
-        ics->num_windows       = 1;
-        if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD) {
-            if (m4ac->frame_length_short) {
-                ics->swb_offset    =     ff_swb_offset_480[sampling_index];
-                ics->num_swb       =    ff_aac_num_swb_480[sampling_index];
-                ics->tns_max_bands =  ff_tns_max_bands_480[sampling_index];
-            } else {
-                ics->swb_offset    =     ff_swb_offset_512[sampling_index];
-                ics->num_swb       =    ff_aac_num_swb_512[sampling_index];
-                ics->tns_max_bands =  ff_tns_max_bands_512[sampling_index];
-            }
-            if (!ics->num_swb || !ics->swb_offset)
-                return AVERROR_BUG;
-        } else {
-            ics->swb_offset    =    ff_swb_offset_1024[sampling_index];
-            ics->num_swb       =   ff_aac_num_swb_1024[sampling_index];
-            ics->tns_max_bands = ff_tns_max_bands_1024[sampling_index];
-        }
-        if (aot != AOT_ER_AAC_ELD) {
-            ics->predictor_present     = get_bits1(gb);
-            ics->predictor_reset_group = 0;
-        }
-        if (ics->predictor_present) {
-            if (aot == AOT_AAC_MAIN) {
-                if (decode_prediction(ac, ics, gb)) {
-                    goto fail;
-                }
-            } else if (aot == AOT_AAC_LC ||
-                       aot == AOT_ER_AAC_LC) {
-                av_log(ac->avctx, AV_LOG_ERROR,
-                       "Prediction is not allowed in AAC-LC.\n");
-                goto fail;
-            } else {
-                if (aot == AOT_ER_AAC_LD) {
-                    av_log(ac->avctx, AV_LOG_ERROR,
-                           "LTP in ER AAC LD not yet implemented.\n");
-                    return AVERROR_PATCHWELCOME;
-                }
-                if ((ics->ltp.present = get_bits(gb, 1)))
-                    decode_ltp(&ics->ltp, gb, ics->max_sfb);
-            }
-        }
-    }
-
-    if (ics->max_sfb > ics->num_swb) {
-        av_log(ac->avctx, AV_LOG_ERROR,
-               "Number of scalefactor bands in group (%d) "
-               "exceeds limit (%d).\n",
-               ics->max_sfb, ics->num_swb);
-        goto fail;
-    }
-
-    return 0;
-fail:
-    ics->max_sfb = 0;
-    return AVERROR_INVALIDDATA;
-}
-
-/**
- * Decode band types (section_data payload); reference: table 4.46.
- *
- * @param   band_type           array of the used band type
- * @param   band_type_run_end   array of the last scalefactor band of a band type run
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int decode_band_types(AACContext *ac, enum BandType band_type[120],
-                             int band_type_run_end[120], GetBitContext *gb,
-                             IndividualChannelStream *ics)
-{
-    int g, idx = 0;
-    const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
-    for (g = 0; g < ics->num_window_groups; g++) {
-        int k = 0;
-        while (k < ics->max_sfb) {
-            uint8_t sect_end = k;
-            int sect_len_incr;
-            int sect_band_type = get_bits(gb, 4);
-            if (sect_band_type == 12) {
-                av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
-                return AVERROR_INVALIDDATA;
-            }
-            do {
-                sect_len_incr = get_bits(gb, bits);
-                sect_end += sect_len_incr;
-                if (get_bits_left(gb) < 0) {
-                    av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err);
-                    return AVERROR_INVALIDDATA;
-                }
-                if (sect_end > ics->max_sfb) {
-                    av_log(ac->avctx, AV_LOG_ERROR,
-                           "Number of bands (%d) exceeds limit (%d).\n",
-                           sect_end, ics->max_sfb);
-                    return AVERROR_INVALIDDATA;
-                }
-            } while (sect_len_incr == (1 << bits) - 1);
-            for (; k < sect_end; k++) {
-                band_type        [idx]   = sect_band_type;
-                band_type_run_end[idx++] = sect_end;
-            }
-        }
-    }
-    return 0;
-}
-
-/**
- * Decode scalefactors; reference: table 4.47.
- *
- * @param   global_gain         first scalefactor value as scalefactors are differentially coded
- * @param   band_type           array of the used band type
- * @param   band_type_run_end   array of the last scalefactor band of a band type run
- * @param   sf                  array of scalefactors or intensity stereo positions
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int decode_scalefactors(AACContext *ac, float sf[120], GetBitContext *gb,
-                               unsigned int global_gain,
-                               IndividualChannelStream *ics,
-                               enum BandType band_type[120],
-                               int band_type_run_end[120])
-{
-    int g, i, idx = 0;
-    int offset[3] = { global_gain, global_gain - NOISE_OFFSET, 0 };
-    int clipped_offset;
-    int noise_flag = 1;
-    for (g = 0; g < ics->num_window_groups; g++) {
-        for (i = 0; i < ics->max_sfb;) {
-            int run_end = band_type_run_end[idx];
-            if (band_type[idx] == ZERO_BT) {
-                for (; i < run_end; i++, idx++)
-                    sf[idx] = 0.0;
-            } else if ((band_type[idx] == INTENSITY_BT) ||
-                       (band_type[idx] == INTENSITY_BT2)) {
-                for (; i < run_end; i++, idx++) {
-                    offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO;
-                    clipped_offset = av_clip(offset[2], -155, 100);
-                    if (offset[2] != clipped_offset) {
-                        avpriv_request_sample(ac->avctx,
-                                              "If you heard an audible artifact, there may be a bug in the decoder. "
-                                              "Clipped intensity stereo position (%d -> %d)",
-                                              offset[2], clipped_offset);
-                    }
-                    sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
-                }
-            } else if (band_type[idx] == NOISE_BT) {
-                for (; i < run_end; i++, idx++) {
-                    if (noise_flag-- > 0)
-                        offset[1] += get_bits(gb, NOISE_PRE_BITS) - NOISE_PRE;
-                    else
-                        offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO;
-                    clipped_offset = av_clip(offset[1], -100, 155);
-                    if (offset[1] != clipped_offset) {
-                        avpriv_request_sample(ac->avctx,
-                                              "If you heard an audible artifact, there may be a bug in the decoder. "
-                                              "Clipped noise gain (%d -> %d)",
-                                              offset[1], clipped_offset);
-                    }
-                    sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
-                }
-            } else {
-                for (; i < run_end; i++, idx++) {
-                    offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO;
-                    if (offset[0] > 255U) {
-                        av_log(ac->avctx, AV_LOG_ERROR,
-                               "Scalefactor (%d) out of range.\n", offset[0]);
-                        return AVERROR_INVALIDDATA;
-                    }
-                    sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
-                }
-            }
-        }
-    }
-    return 0;
-}
-
-/**
- * Decode pulse data; reference: table 4.7.
- */
-static int decode_pulses(Pulse *pulse, GetBitContext *gb,
-                         const uint16_t *swb_offset, int num_swb)
-{
-    int i, pulse_swb;
-    pulse->num_pulse = get_bits(gb, 2) + 1;
-    pulse_swb        = get_bits(gb, 6);
-    if (pulse_swb >= num_swb)
-        return -1;
-    pulse->pos[0]    = swb_offset[pulse_swb];
-    pulse->pos[0]   += get_bits(gb, 5);
-    if (pulse->pos[0] >= swb_offset[num_swb])
-        return -1;
-    pulse->amp[0]    = get_bits(gb, 4);
-    for (i = 1; i < pulse->num_pulse; i++) {
-        pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
-        if (pulse->pos[i] >= swb_offset[num_swb])
-            return -1;
-        pulse->amp[i] = get_bits(gb, 4);
-    }
-    return 0;
-}
-
-/**
- * Decode Temporal Noise Shaping data; reference: table 4.48.
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
-                      GetBitContext *gb, const IndividualChannelStream *ics)
-{
-    int w, filt, i, coef_len, coef_res, coef_compress;
-    const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
-    const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
-    for (w = 0; w < ics->num_windows; w++) {
-        if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
-            coef_res = get_bits1(gb);
-
-            for (filt = 0; filt < tns->n_filt[w]; filt++) {
-                int tmp2_idx;
-                tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
-
-                if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
-                    av_log(ac->avctx, AV_LOG_ERROR,
-                           "TNS filter order %d is greater than maximum %d.\n",
-                           tns->order[w][filt], tns_max_order);
-                    tns->order[w][filt] = 0;
-                    return AVERROR_INVALIDDATA;
-                }
-                if (tns->order[w][filt]) {
-                    tns->direction[w][filt] = get_bits1(gb);
-                    coef_compress = get_bits1(gb);
-                    coef_len = coef_res + 3 - coef_compress;
-                    tmp2_idx = 2 * coef_compress + coef_res;
-
-                    for (i = 0; i < tns->order[w][filt]; i++)
-                        tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
-                }
-            }
-        }
-    }
-    return 0;
-}
+#   include "arm/aac.h"
+#elif ARCH_MIPS
+#   include "mips/aacdec_mips.h"
+#endif
 
-/**
- * Decode Mid/Side data; reference: table 4.54.
- *
- * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
- *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
- *                      [3] reserved for scalable AAC
- */
-static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
-                                   int ms_present)
+static av_always_inline void reset_predict_state(PredictorState *ps)
 {
-    int idx;
-    int max_idx = cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb;
-    if (ms_present == 1) {
-        for (idx = 0; idx < max_idx; idx++)
-            cpe->ms_mask[idx] = get_bits1(gb);
-    } else if (ms_present == 2) {
-        memset(cpe->ms_mask, 1, max_idx * sizeof(cpe->ms_mask[0]));
-    }
+    ps->r0   = 0.0f;
+    ps->r1   = 0.0f;
+    ps->cor0 = 0.0f;
+    ps->cor1 = 0.0f;
+    ps->var0 = 1.0f;
+    ps->var1 = 1.0f;
 }
 
 #ifndef VMUL2
@@ -1611,1062 +136,70 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
     *dst++ = v[idx>>2 & 3] * t.f;
 
     sign <<= nz & 1; nz >>= 1;
-    t.i = s.i ^ (sign & 1U<<31);
-    *dst++ = v[idx>>4 & 3] * t.f;
-
-    sign <<= nz & 1;
-    t.i = s.i ^ (sign & 1U<<31);
-    *dst++ = v[idx>>6 & 3] * t.f;
-
-    return dst;
-}
-#endif
-
-/**
- * Decode spectral data; reference: table 4.50.
- * Dequantize and scale spectral data; reference: 4.6.3.3.
- *
- * @param   coef            array of dequantized, scaled spectral data
- * @param   sf              array of scalefactors or intensity stereo positions
- * @param   pulse_present   set if pulses are present
- * @param   pulse           pointer to pulse data struct
- * @param   band_type       array of the used band type
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024],
-                                       GetBitContext *gb, const float sf[120],
-                                       int pulse_present, const Pulse *pulse,
-                                       const IndividualChannelStream *ics,
-                                       enum BandType band_type[120])
-{
-    int i, k, g, idx = 0;
-    const int c = 1024 / ics->num_windows;
-    const uint16_t *offsets = ics->swb_offset;
-    float *coef_base = coef;
-
-    for (g = 0; g < ics->num_windows; g++)
-        memset(coef + g * 128 + offsets[ics->max_sfb], 0,
-               sizeof(float) * (c - offsets[ics->max_sfb]));
-
-    for (g = 0; g < ics->num_window_groups; g++) {
-        unsigned g_len = ics->group_len[g];
-
-        for (i = 0; i < ics->max_sfb; i++, idx++) {
-            const unsigned cbt_m1 = band_type[idx] - 1;
-            float *cfo = coef + offsets[i];
-            int off_len = offsets[i + 1] - offsets[i];
-            int group;
-
-            if (cbt_m1 >= INTENSITY_BT2 - 1) {
-                for (group = 0; group < g_len; group++, cfo+=128) {
-                    memset(cfo, 0, off_len * sizeof(float));
-                }
-            } else if (cbt_m1 == NOISE_BT - 1) {
-                for (group = 0; group < g_len; group++, cfo+=128) {
-                    float scale;
-                    float band_energy;
-
-                    for (k = 0; k < off_len; k++) {
-                        ac->random_state  = lcg_random(ac->random_state);
-                        cfo[k] = ac->random_state;
-                    }
-
-                    band_energy = ac->fdsp->scalarproduct_float(cfo, cfo, off_len);
-                    scale = sf[idx] / sqrtf(band_energy);
-                    ac->fdsp->vector_fmul_scalar(cfo, cfo, scale, off_len);
-                }
-            } else {
-                const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
-                const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
-                VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
-                OPEN_READER(re, gb);
-
-                switch (cbt_m1 >> 1) {
-                case 0:
-                    for (group = 0; group < g_len; group++, cfo+=128) {
-                        float *cf = cfo;
-                        int len = off_len;
-
-                        do {
-                            int code;
-                            unsigned cb_idx;
-
-                            UPDATE_CACHE(re, gb);
-                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
-                            cb_idx = cb_vector_idx[code];
-                            cf = VMUL4(cf, vq, cb_idx, sf + idx);
-                        } while (len -= 4);
-                    }
-                    break;
-
-                case 1:
-                    for (group = 0; group < g_len; group++, cfo+=128) {
-                        float *cf = cfo;
-                        int len = off_len;
-
-                        do {
-                            int code;
-                            unsigned nnz;
-                            unsigned cb_idx;
-                            uint32_t bits;
-
-                            UPDATE_CACHE(re, gb);
-                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
-                            cb_idx = cb_vector_idx[code];
-                            nnz = cb_idx >> 8 & 15;
-                            bits = nnz ? GET_CACHE(re, gb) : 0;
-                            LAST_SKIP_BITS(re, gb, nnz);
-                            cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
-                        } while (len -= 4);
-                    }
-                    break;
-
-                case 2:
-                    for (group = 0; group < g_len; group++, cfo+=128) {
-                        float *cf = cfo;
-                        int len = off_len;
-
-                        do {
-                            int code;
-                            unsigned cb_idx;
-
-                            UPDATE_CACHE(re, gb);
-                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
-                            cb_idx = cb_vector_idx[code];
-                            cf = VMUL2(cf, vq, cb_idx, sf + idx);
-                        } while (len -= 2);
-                    }
-                    break;
-
-                case 3:
-                case 4:
-                    for (group = 0; group < g_len; group++, cfo+=128) {
-                        float *cf = cfo;
-                        int len = off_len;
-
-                        do {
-                            int code;
-                            unsigned nnz;
-                            unsigned cb_idx;
-                            unsigned sign;
-
-                            UPDATE_CACHE(re, gb);
-                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
-                            cb_idx = cb_vector_idx[code];
-                            nnz = cb_idx >> 8 & 15;
-                            sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
-                            LAST_SKIP_BITS(re, gb, nnz);
-                            cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
-                        } while (len -= 2);
-                    }
-                    break;
-
-                default:
-                    for (group = 0; group < g_len; group++, cfo+=128) {
-                        float *cf = cfo;
-                        uint32_t *icf = (uint32_t *) cf;
-                        int len = off_len;
-
-                        do {
-                            int code;
-                            unsigned nzt, nnz;
-                            unsigned cb_idx;
-                            uint32_t bits;
-                            int j;
-
-                            UPDATE_CACHE(re, gb);
-                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
-
-                            if (!code) {
-                                *icf++ = 0;
-                                *icf++ = 0;
-                                continue;
-                            }
-
-                            cb_idx = cb_vector_idx[code];
-                            nnz = cb_idx >> 12;
-                            nzt = cb_idx >> 8;
-                            bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
-                            LAST_SKIP_BITS(re, gb, nnz);
-
-                            for (j = 0; j < 2; j++) {
-                                if (nzt & 1<<j) {
-                                    uint32_t b;
-                                    int n;
-                                    /* The total length of escape_sequence must be < 22 bits according
-                                       to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
-                                    UPDATE_CACHE(re, gb);
-                                    b = GET_CACHE(re, gb);
-                                    b = 31 - av_log2(~b);
-
-                                    if (b > 8) {
-                                        av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
-                                        return AVERROR_INVALIDDATA;
-                                    }
-
-                                    SKIP_BITS(re, gb, b + 1);
-                                    b += 4;
-                                    n = (1 << b) + SHOW_UBITS(re, gb, b);
-                                    LAST_SKIP_BITS(re, gb, b);
-                                    *icf++ = cbrt_tab[n] | (bits & 1U<<31);
-                                    bits <<= 1;
-                                } else {
-                                    unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
-                                    *icf++ = (bits & 1U<<31) | v;
-                                    bits <<= !!v;
-                                }
-                                cb_idx >>= 4;
-                            }
-                        } while (len -= 2);
-
-                        ac->fdsp->vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
-                    }
-                }
-
-                CLOSE_READER(re, gb);
-            }
-        }
-        coef += g_len << 7;
-    }
-
-    if (pulse_present) {
-        idx = 0;
-        for (i = 0; i < pulse->num_pulse; i++) {
-            float co = coef_base[ pulse->pos[i] ];
-            while (offsets[idx + 1] <= pulse->pos[i])
-                idx++;
-            if (band_type[idx] != NOISE_BT && sf[idx]) {
-                float ico = -pulse->amp[i];
-                if (co) {
-                    co /= sf[idx];
-                    ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
-                }
-                coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
-            }
-        }
-    }
-    return 0;
-}
-
-static av_always_inline float flt16_round(float pf)
-{
-    union av_intfloat32 tmp;
-    tmp.f = pf;
-    tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
-    return tmp.f;
-}
-
-static av_always_inline float flt16_even(float pf)
-{
-    union av_intfloat32 tmp;
-    tmp.f = pf;
-    tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
-    return tmp.f;
-}
-
-static av_always_inline float flt16_trunc(float pf)
-{
-    union av_intfloat32 pun;
-    pun.f = pf;
-    pun.i &= 0xFFFF0000U;
-    return pun.f;
-}
-
-static av_always_inline void predict(PredictorState *ps, float *coef,
-                                     int output_enable)
-{
-    const float a     = 0.953125; // 61.0 / 64
-    const float alpha = 0.90625;  // 29.0 / 32
-    float e0, e1;
-    float pv;
-    float k1, k2;
-    float   r0 = ps->r0,     r1 = ps->r1;
-    float cor0 = ps->cor0, cor1 = ps->cor1;
-    float var0 = ps->var0, var1 = ps->var1;
-
-    k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
-    k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
-
-    pv = flt16_round(k1 * r0 + k2 * r1);
-    if (output_enable)
-        *coef += pv;
-
-    e0 = *coef;
-    e1 = e0 - k1 * r0;
-
-    ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
-    ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
-    ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
-    ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
-
-    ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
-    ps->r0 = flt16_trunc(a * e0);
-}
-
-/**
- * Apply AAC-Main style frequency domain prediction.
- */
-static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
-{
-    int sfb, k;
-
-    if (!sce->ics.predictor_initialized) {
-        reset_all_predictors(sce->predictor_state);
-        sce->ics.predictor_initialized = 1;
-    }
-
-    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
-        for (sfb = 0;
-             sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index];
-             sfb++) {
-            for (k = sce->ics.swb_offset[sfb];
-                 k < sce->ics.swb_offset[sfb + 1];
-                 k++) {
-                predict(&sce->predictor_state[k], &sce->coeffs[k],
-                        sce->ics.predictor_present &&
-                        sce->ics.prediction_used[sfb]);
-            }
-        }
-        if (sce->ics.predictor_reset_group)
-            reset_predictor_group(sce->predictor_state,
-                                  sce->ics.predictor_reset_group);
-    } else
-        reset_all_predictors(sce->predictor_state);
-}
-
-/**
- * Decode an individual_channel_stream payload; reference: table 4.44.
- *
- * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
- * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int decode_ics(AACContext *ac, SingleChannelElement *sce,
-                      GetBitContext *gb, int common_window, int scale_flag)
-{
-    Pulse pulse;
-    TemporalNoiseShaping    *tns = &sce->tns;
-    IndividualChannelStream *ics = &sce->ics;
-    float *out = sce->coeffs;
-    int global_gain, eld_syntax, er_syntax, pulse_present = 0;
-    int ret;
-
-    eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
-    er_syntax  = ac->oc[1].m4ac.object_type == AOT_ER_AAC_LC ||
-                 ac->oc[1].m4ac.object_type == AOT_ER_AAC_LTP ||
-                 ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD ||
-                 ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
-
-    /* This assignment is to silence a GCC warning about the variable being used
-     * uninitialized when in fact it always is.
-     */
-    pulse.num_pulse = 0;
-
-    global_gain = get_bits(gb, 8);
-
-    if (!common_window && !scale_flag) {
-        if (decode_ics_info(ac, ics, gb) < 0)
-            return AVERROR_INVALIDDATA;
-    }
-
-    if ((ret = decode_band_types(ac, sce->band_type,
-                                 sce->band_type_run_end, gb, ics)) < 0)
-        return ret;
-    if ((ret = decode_scalefactors(ac, sce->sf, gb, global_gain, ics,
-                                  sce->band_type, sce->band_type_run_end)) < 0)
-        return ret;
-
-    pulse_present = 0;
-    if (!scale_flag) {
-        if (!eld_syntax && (pulse_present = get_bits1(gb))) {
-            if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-                av_log(ac->avctx, AV_LOG_ERROR,
-                       "Pulse tool not allowed in eight short sequence.\n");
-                return AVERROR_INVALIDDATA;
-            }
-            if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
-                av_log(ac->avctx, AV_LOG_ERROR,
-                       "Pulse data corrupt or invalid.\n");
-                return AVERROR_INVALIDDATA;
-            }
-        }
-        tns->present = get_bits1(gb);
-        if (tns->present && !er_syntax)
-            if (decode_tns(ac, tns, gb, ics) < 0)
-                return AVERROR_INVALIDDATA;
-        if (!eld_syntax && get_bits1(gb)) {
-            avpriv_request_sample(ac->avctx, "SSR");
-            return AVERROR_PATCHWELCOME;
-        }
-        // I see no textual basis in the spec for this occurring after SSR gain
-        // control, but this is what both reference and real implmentations do
-        if (tns->present && er_syntax)
-            if (decode_tns(ac, tns, gb, ics) < 0)
-                return AVERROR_INVALIDDATA;
-    }
-
-    if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present,
-                                    &pulse, ics, sce->band_type) < 0)
-        return AVERROR_INVALIDDATA;
-
-    if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window)
-        apply_prediction(ac, sce);
-
-    return 0;
-}
-
-/**
- * Mid/Side stereo decoding; reference: 4.6.8.1.3.
- */
-static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
-{
-    const IndividualChannelStream *ics = &cpe->ch[0].ics;
-    float *ch0 = cpe->ch[0].coeffs;
-    float *ch1 = cpe->ch[1].coeffs;
-    int g, i, group, idx = 0;
-    const uint16_t *offsets = ics->swb_offset;
-    for (g = 0; g < ics->num_window_groups; g++) {
-        for (i = 0; i < ics->max_sfb; i++, idx++) {
-            if (cpe->ms_mask[idx] &&
-                cpe->ch[0].band_type[idx] < NOISE_BT &&
-                cpe->ch[1].band_type[idx] < NOISE_BT) {
-                for (group = 0; group < ics->group_len[g]; group++) {
-                    ac->fdsp->butterflies_float(ch0 + group * 128 + offsets[i],
-                                               ch1 + group * 128 + offsets[i],
-                                               offsets[i+1] - offsets[i]);
-                }
-            }
-        }
-        ch0 += ics->group_len[g] * 128;
-        ch1 += ics->group_len[g] * 128;
-    }
-}
-
-/**
- * intensity stereo decoding; reference: 4.6.8.2.3
- *
- * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
- *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
- *                      [3] reserved for scalable AAC
- */
-static void apply_intensity_stereo(AACContext *ac,
-                                   ChannelElement *cpe, int ms_present)
-{
-    const IndividualChannelStream *ics = &cpe->ch[1].ics;
-    SingleChannelElement         *sce1 = &cpe->ch[1];
-    float *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
-    const uint16_t *offsets = ics->swb_offset;
-    int g, group, i, idx = 0;
-    int c;
-    float scale;
-    for (g = 0; g < ics->num_window_groups; g++) {
-        for (i = 0; i < ics->max_sfb;) {
-            if (sce1->band_type[idx] == INTENSITY_BT ||
-                sce1->band_type[idx] == INTENSITY_BT2) {
-                const int bt_run_end = sce1->band_type_run_end[idx];
-                for (; i < bt_run_end; i++, idx++) {
-                    c = -1 + 2 * (sce1->band_type[idx] - 14);
-                    if (ms_present)
-                        c *= 1 - 2 * cpe->ms_mask[idx];
-                    scale = c * sce1->sf[idx];
-                    for (group = 0; group < ics->group_len[g]; group++)
-                        ac->fdsp->vector_fmul_scalar(coef1 + group * 128 + offsets[i],
-                                                    coef0 + group * 128 + offsets[i],
-                                                    scale,
-                                                    offsets[i + 1] - offsets[i]);
-                }
-            } else {
-                int bt_run_end = sce1->band_type_run_end[idx];
-                idx += bt_run_end - i;
-                i    = bt_run_end;
-            }
-        }
-        coef0 += ics->group_len[g] * 128;
-        coef1 += ics->group_len[g] * 128;
-    }
-}
-
-/**
- * Decode a channel_pair_element; reference: table 4.4.
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
-{
-    int i, ret, common_window, ms_present = 0;
-    int eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
-
-    common_window = eld_syntax || get_bits1(gb);
-    if (common_window) {
-        if (decode_ics_info(ac, &cpe->ch[0].ics, gb))
-            return AVERROR_INVALIDDATA;
-        i = cpe->ch[1].ics.use_kb_window[0];
-        cpe->ch[1].ics = cpe->ch[0].ics;
-        cpe->ch[1].ics.use_kb_window[1] = i;
-        if (cpe->ch[1].ics.predictor_present &&
-            (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN))
-            if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
-                decode_ltp(&cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
-        ms_present = get_bits(gb, 2);
-        if (ms_present == 3) {
-            av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
-            return AVERROR_INVALIDDATA;
-        } else if (ms_present)
-            decode_mid_side_stereo(cpe, gb, ms_present);
-    }
-    if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
-        return ret;
-    if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
-        return ret;
-
-    if (common_window) {
-        if (ms_present)
-            apply_mid_side_stereo(ac, cpe);
-        if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
-            apply_prediction(ac, &cpe->ch[0]);
-            apply_prediction(ac, &cpe->ch[1]);
-        }
-    }
-
-    apply_intensity_stereo(ac, cpe, ms_present);
-    return 0;
-}
-
-static const float cce_scale[] = {
-    1.09050773266525765921, //2^(1/8)
-    1.18920711500272106672, //2^(1/4)
-    M_SQRT2,
-    2,
-};
-
-/**
- * Decode coupling_channel_element; reference: table 4.8.
- *
- * @return  Returns error status. 0 - OK, !0 - error
- */
-static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
-{
-    int num_gain = 0;
-    int c, g, sfb, ret;
-    int sign;
-    float scale;
-    SingleChannelElement *sce = &che->ch[0];
-    ChannelCoupling     *coup = &che->coup;
-
-    coup->coupling_point = 2 * get_bits1(gb);
-    coup->num_coupled = get_bits(gb, 3);
-    for (c = 0; c <= coup->num_coupled; c++) {
-        num_gain++;
-        coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
-        coup->id_select[c] = get_bits(gb, 4);
-        if (coup->type[c] == TYPE_CPE) {
-            coup->ch_select[c] = get_bits(gb, 2);
-            if (coup->ch_select[c] == 3)
-                num_gain++;
-        } else
-            coup->ch_select[c] = 2;
-    }
-    coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
-
-    sign  = get_bits(gb, 1);
-    scale = cce_scale[get_bits(gb, 2)];
-
-    if ((ret = decode_ics(ac, sce, gb, 0, 0)))
-        return ret;
-
-    for (c = 0; c < num_gain; c++) {
-        int idx  = 0;
-        int cge  = 1;
-        int gain = 0;
-        float gain_cache = 1.0;
-        if (c) {
-            cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
-            gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
-            gain_cache = powf(scale, -gain);
-        }
-        if (coup->coupling_point == AFTER_IMDCT) {
-            coup->gain[c][0] = gain_cache;
-        } else {
-            for (g = 0; g < sce->ics.num_window_groups; g++) {
-                for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
-                    if (sce->band_type[idx] != ZERO_BT) {
-                        if (!cge) {
-                            int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
-                            if (t) {
-                                int s = 1;
-                                t = gain += t;
-                                if (sign) {
-                                    s  -= 2 * (t & 0x1);
-                                    t >>= 1;
-                                }
-                                gain_cache = powf(scale, -t) * s;
-                            }
-                        }
-                        coup->gain[c][idx] = gain_cache;
-                    }
-                }
-            }
-        }
-    }
-    return 0;
-}
-
-/**
- * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
- *
- * @return  Returns number of bytes consumed.
- */
-static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
-                                         GetBitContext *gb)
-{
-    int i;
-    int num_excl_chan = 0;
-
-    do {
-        for (i = 0; i < 7; i++)
-            che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
-    } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
-
-    return num_excl_chan / 7;
-}
-
-/**
- * Decode dynamic range information; reference: table 4.52.
- *
- * @return  Returns number of bytes consumed.
- */
-static int decode_dynamic_range(DynamicRangeControl *che_drc,
-                                GetBitContext *gb)
-{
-    int n             = 1;
-    int drc_num_bands = 1;
-    int i;
-
-    /* pce_tag_present? */
-    if (get_bits1(gb)) {
-        che_drc->pce_instance_tag  = get_bits(gb, 4);
-        skip_bits(gb, 4); // tag_reserved_bits
-        n++;
-    }
-
-    /* excluded_chns_present? */
-    if (get_bits1(gb)) {
-        n += decode_drc_channel_exclusions(che_drc, gb);
-    }
-
-    /* drc_bands_present? */
-    if (get_bits1(gb)) {
-        che_drc->band_incr            = get_bits(gb, 4);
-        che_drc->interpolation_scheme = get_bits(gb, 4);
-        n++;
-        drc_num_bands += che_drc->band_incr;
-        for (i = 0; i < drc_num_bands; i++) {
-            che_drc->band_top[i] = get_bits(gb, 8);
-            n++;
-        }
-    }
-
-    /* prog_ref_level_present? */
-    if (get_bits1(gb)) {
-        che_drc->prog_ref_level = get_bits(gb, 7);
-        skip_bits1(gb); // prog_ref_level_reserved_bits
-        n++;
-    }
-
-    for (i = 0; i < drc_num_bands; i++) {
-        che_drc->dyn_rng_sgn[i] = get_bits1(gb);
-        che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
-        n++;
-    }
-
-    return n;
-}
-
-static int decode_fill(AACContext *ac, GetBitContext *gb, int len) {
-    uint8_t buf[256];
-    int i, major, minor;
-
-    if (len < 13+7*8)
-        goto unknown;
-
-    get_bits(gb, 13); len -= 13;
-
-    for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8)
-        buf[i] = get_bits(gb, 8);
-
-    buf[i] = 0;
-    if (ac->avctx->debug & FF_DEBUG_PICT_INFO)
-        av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf);
-
-    if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){
-        ac->avctx->internal->skip_samples = 1024;
-    }
-
-unknown:
-    skip_bits_long(gb, len);
-
-    return 0;
-}
-
-/**
- * Decode extension data (incomplete); reference: table 4.51.
- *
- * @param   cnt length of TYPE_FIL syntactic element in bytes
- *
- * @return Returns number of bytes consumed
- */
-static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
-                                    ChannelElement *che, enum RawDataBlockType elem_type)
-{
-    int crc_flag = 0;
-    int res = cnt;
-    int type = get_bits(gb, 4);
-
-    if (ac->avctx->debug & FF_DEBUG_STARTCODE)
-        av_log(ac->avctx, AV_LOG_DEBUG, "extension type: %d len:%d\n", type, cnt);
-
-    switch (type) { // extension type
-    case EXT_SBR_DATA_CRC:
-        crc_flag++;
-    case EXT_SBR_DATA:
-        if (!che) {
-            av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
-            return res;
-        } else if (!ac->oc[1].m4ac.sbr) {
-            av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
-            skip_bits_long(gb, 8 * cnt - 4);
-            return res;
-        } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) {
-            av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
-            skip_bits_long(gb, 8 * cnt - 4);
-            return res;
-        } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED && ac->avctx->channels == 1) {
-            ac->oc[1].m4ac.sbr = 1;
-            ac->oc[1].m4ac.ps = 1;
-            ac->avctx->profile = FF_PROFILE_AAC_HE_V2;
-            output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
-                             ac->oc[1].status, 1);
-        } else {
-            ac->oc[1].m4ac.sbr = 1;
-            ac->avctx->profile = FF_PROFILE_AAC_HE;
-        }
-        res = ff_decode_sbr_extension(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
-        break;
-    case EXT_DYNAMIC_RANGE:
-        res = decode_dynamic_range(&ac->che_drc, gb);
-        break;
-    case EXT_FILL:
-        decode_fill(ac, gb, 8 * cnt - 4);
-        break;
-    case EXT_FILL_DATA:
-    case EXT_DATA_ELEMENT:
-    default:
-        skip_bits_long(gb, 8 * cnt - 4);
-        break;
-    };
-    return res;
-}
+    t.i = s.i ^ (sign & 1U<<31);
+    *dst++ = v[idx>>4 & 3] * t.f;
 
-/**
- * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
- *
- * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
- * @param   coef    spectral coefficients
- */
-static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
-                      IndividualChannelStream *ics, int decode)
-{
-    const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
-    int w, filt, m, i;
-    int bottom, top, order, start, end, size, inc;
-    float lpc[TNS_MAX_ORDER];
-    float tmp[TNS_MAX_ORDER+1];
-
-    for (w = 0; w < ics->num_windows; w++) {
-        bottom = ics->num_swb;
-        for (filt = 0; filt < tns->n_filt[w]; filt++) {
-            top    = bottom;
-            bottom = FFMAX(0, top - tns->length[w][filt]);
-            order  = tns->order[w][filt];
-            if (order == 0)
-                continue;
-
-            // tns_decode_coef
-            compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
-
-            start = ics->swb_offset[FFMIN(bottom, mmm)];
-            end   = ics->swb_offset[FFMIN(   top, mmm)];
-            if ((size = end - start) <= 0)
-                continue;
-            if (tns->direction[w][filt]) {
-                inc = -1;
-                start = end - 1;
-            } else {
-                inc = 1;
-            }
-            start += w * 128;
+    sign <<= nz & 1;
+    t.i = s.i ^ (sign & 1U<<31);
+    *dst++ = v[idx>>6 & 3] * t.f;
 
-            if (decode) {
-                // ar filter
-                for (m = 0; m < size; m++, start += inc)
-                    for (i = 1; i <= FFMIN(m, order); i++)
-                        coef[start] -= coef[start - i * inc] * lpc[i - 1];
-            } else {
-                // ma filter
-                for (m = 0; m < size; m++, start += inc) {
-                    tmp[0] = coef[start];
-                    for (i = 1; i <= FFMIN(m, order); i++)
-                        coef[start] += tmp[i] * lpc[i - 1];
-                    for (i = order; i > 0; i--)
-                        tmp[i] = tmp[i - 1];
-                }
-            }
-        }
-    }
+    return dst;
 }
+#endif
 
-/**
- *  Apply windowing and MDCT to obtain the spectral
- *  coefficient from the predicted sample by LTP.
- */
-static void windowing_and_mdct_ltp(AACContext *ac, float *out,
-                                   float *in, IndividualChannelStream *ics)
+static av_always_inline float flt16_round(float pf)
 {
-    const float *lwindow      = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
-    const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
-    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
-    const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
-
-    if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
-        ac->fdsp->vector_fmul(in, in, lwindow_prev, 1024);
-    } else {
-        memset(in, 0, 448 * sizeof(float));
-        ac->fdsp->vector_fmul(in + 448, in + 448, swindow_prev, 128);
-    }
-    if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
-        ac->fdsp->vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
-    } else {
-        ac->fdsp->vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
-        memset(in + 1024 + 576, 0, 448 * sizeof(float));
-    }
-    ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
+    union av_intfloat32 tmp;
+    tmp.f = pf;
+    tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
+    return tmp.f;
 }
 
-/**
- * Apply the long term prediction
- */
-static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
+static av_always_inline float flt16_even(float pf)
 {
-    const LongTermPrediction *ltp = &sce->ics.ltp;
-    const uint16_t *offsets = sce->ics.swb_offset;
-    int i, sfb;
-
-    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
-        float *predTime = sce->ret;
-        float *predFreq = ac->buf_mdct;
-        int16_t num_samples = 2048;
-
-        if (ltp->lag < 1024)
-            num_samples = ltp->lag + 1024;
-        for (i = 0; i < num_samples; i++)
-            predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
-        memset(&predTime[i], 0, (2048 - i) * sizeof(float));
-
-        ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
-
-        if (sce->tns.present)
-            ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
-
-        for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
-            if (ltp->used[sfb])
-                for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
-                    sce->coeffs[i] += predFreq[i];
-    }
+    union av_intfloat32 tmp;
+    tmp.f = pf;
+    tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
+    return tmp.f;
 }
 
-/**
- * Update the LTP buffer for next frame
- */
-static void update_ltp(AACContext *ac, SingleChannelElement *sce)
+static av_always_inline float flt16_trunc(float pf)
 {
-    IndividualChannelStream *ics = &sce->ics;
-    float *saved     = sce->saved;
-    float *saved_ltp = sce->coeffs;
-    const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
-    const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
-    int i;
-
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        memcpy(saved_ltp,       saved, 512 * sizeof(float));
-        memset(saved_ltp + 576, 0,     448 * sizeof(float));
-        ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
-        for (i = 0; i < 64; i++)
-            saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
-    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
-        memcpy(saved_ltp,       ac->buf_mdct + 512, 448 * sizeof(float));
-        memset(saved_ltp + 576, 0,                  448 * sizeof(float));
-        ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
-        for (i = 0; i < 64; i++)
-            saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * swindow[63 - i];
-    } else { // LONG_STOP or ONLY_LONG
-        ac->fdsp->vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
-        for (i = 0; i < 512; i++)
-            saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
-    }
-
-    memcpy(sce->ltp_state,      sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
-    memcpy(sce->ltp_state+1024, sce->ret,            1024 * sizeof(*sce->ltp_state));
-    memcpy(sce->ltp_state+2048, saved_ltp,           1024 * sizeof(*sce->ltp_state));
+    union av_intfloat32 pun;
+    pun.f = pf;
+    pun.i &= 0xFFFF0000U;
+    return pun.f;
 }
 
-/**
- * Conduct IMDCT and windowing.
- */
-static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
+static av_always_inline void predict(PredictorState *ps, float *coef,
+                                     int output_enable)
 {
-    IndividualChannelStream *ics = &sce->ics;
-    float *in    = sce->coeffs;
-    float *out   = sce->ret;
-    float *saved = sce->saved;
-    const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
-    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
-    const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
-    float *buf  = ac->buf_mdct;
-    float *temp = ac->temp;
-    int i;
-
-    // imdct
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        for (i = 0; i < 1024; i += 128)
-            ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
-    } else
-        ac->mdct.imdct_half(&ac->mdct, buf, in);
-
-    /* window overlapping
-     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
-     * and long to short transitions are considered to be short to short
-     * transitions. This leaves just two cases (long to long and short to short)
-     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
-     */
-    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
-            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
-        ac->fdsp->vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
-    } else {
-        memcpy(                         out,               saved,            448 * sizeof(float));
-
-        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-            ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
-            ac->fdsp->vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
-            ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
-            ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
-            ac->fdsp->vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
-            memcpy(                     out + 448 + 4*128, temp, 64 * sizeof(float));
-        } else {
-            ac->fdsp->vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
-            memcpy(                     out + 576,         buf + 64,         448 * sizeof(float));
-        }
-    }
+    const float a     = 0.953125; // 61.0 / 64
+    const float alpha = 0.90625;  // 29.0 / 32
+    float e0, e1;
+    float pv;
+    float k1, k2;
+    float   r0 = ps->r0,     r1 = ps->r1;
+    float cor0 = ps->cor0, cor1 = ps->cor1;
+    float var0 = ps->var0, var1 = ps->var1;
 
-    // buffer update
-    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
-        memcpy(                     saved,       temp + 64,         64 * sizeof(float));
-        ac->fdsp->vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
-        ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
-        ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
-        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
-    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
-        memcpy(                     saved,       buf + 512,        448 * sizeof(float));
-        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(float));
-    } else { // LONG_STOP or ONLY_LONG
-        memcpy(                     saved,       buf + 512,        512 * sizeof(float));
-    }
-}
+    k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
+    k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
 
-static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce)
-{
-    IndividualChannelStream *ics = &sce->ics;
-    float *in    = sce->coeffs;
-    float *out   = sce->ret;
-    float *saved = sce->saved;
-    float *buf  = ac->buf_mdct;
-
-    // imdct
-    ac->mdct.imdct_half(&ac->mdct_ld, buf, in);
-
-    // window overlapping
-    if (ics->use_kb_window[1]) {
-        // AAC LD uses a low overlap sine window instead of a KBD window
-        memcpy(out, saved, 192 * sizeof(float));
-        ac->fdsp->vector_fmul_window(out + 192, saved + 192, buf, ff_sine_128, 64);
-        memcpy(                     out + 320, buf + 64, 192 * sizeof(float));
-    } else {
-        ac->fdsp->vector_fmul_window(out, saved, buf, ff_sine_512, 256);
-    }
+    pv = flt16_round(k1 * r0 + k2 * r1);
+    if (output_enable)
+        *coef += pv;
 
-    // buffer update
-    memcpy(saved, buf + 256, 256 * sizeof(float));
-}
+    e0 = *coef;
+    e1 = e0 - k1 * r0;
 
-static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce)
-{
-    float *in    = sce->coeffs;
-    float *out   = sce->ret;
-    float *saved = sce->saved;
-    float *buf  = ac->buf_mdct;
-    int i;
-    const int n  = ac->oc[1].m4ac.frame_length_short ? 480 : 512;
-    const int n2 = n >> 1;
-    const int n4 = n >> 2;
-    const float *const window = n == 480 ? ff_aac_eld_window_480 :
-                                           ff_aac_eld_window_512;
-
-    // Inverse transform, mapped to the conventional IMDCT by
-    // Chivukula, R.K.; Reznik, Y.A.; Devarajan, V.,
-    // "Efficient algorithms for MPEG-4 AAC-ELD, AAC-LD and AAC-LC filterbanks,"
-    // International Conference on Audio, Language and Image Processing, ICALIP 2008.
-    // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950
-    for (i = 0; i < n2; i+=2) {
-        float temp;
-        temp =  in[i    ]; in[i    ] = -in[n - 1 - i]; in[n - 1 - i] = temp;
-        temp = -in[i + 1]; in[i + 1] =  in[n - 2 - i]; in[n - 2 - i] = temp;
-    }
-    if (n == 480)
-        ac->mdct480->imdct_half(ac->mdct480, buf, in, 1, -1.f/(16*1024*960));
-    else
-        ac->mdct.imdct_half(&ac->mdct_ld, buf, in);
-    for (i = 0; i < n; i+=2) {
-        buf[i] = -buf[i];
-    }
-    // Like with the regular IMDCT at this point we still have the middle half
-    // of a transform but with even symmetry on the left and odd symmetry on
-    // the right
-
-    // window overlapping
-    // The spec says to use samples [0..511] but the reference decoder uses
-    // samples [128..639].
-    for (i = n4; i < n2; i ++) {
-        out[i - n4] =    buf[n2 - 1 - i]       * window[i       - n4] +
-                       saved[      i + n2]     * window[i +   n - n4] +
-                      -saved[  n + n2 - 1 - i] * window[i + 2*n - n4] +
-                      -saved[2*n + n2 + i]     * window[i + 3*n - n4];
-    }
-    for (i = 0; i < n2; i ++) {
-        out[n4 + i] =    buf[i]               * window[i + n2       - n4] +
-                      -saved[      n - 1 - i] * window[i + n2 +   n - n4] +
-                      -saved[  n + i]         * window[i + n2 + 2*n - n4] +
-                       saved[2*n + n - 1 - i] * window[i + n2 + 3*n - n4];
-    }
-    for (i = 0; i < n4; i ++) {
-        out[n2 + n4 + i] =    buf[      i + n2]     * window[i +   n - n4] +
-                           -saved[      n2 - 1 - i] * window[i + 2*n - n4] +
-                           -saved[  n + n2 + i]     * window[i + 3*n - n4];
-    }
+    ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
+    ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
+    ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
+    ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
 
-    // buffer update
-    memmove(saved + n, saved, 2 * n * sizeof(float));
-    memcpy( saved,       buf,     n * sizeof(float));
+    ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
+    ps->r0 = flt16_trunc(a * e0);
 }
 
 /**
@@ -2724,506 +257,7 @@ static void apply_independent_coupling(AACContext *ac,
         dest[i] += gain * src[i];
 }
 
-/**
- * channel coupling transformation interface
- *
- * @param   apply_coupling_method   pointer to (in)dependent coupling function
- */
-static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
-                                   enum RawDataBlockType type, int elem_id,
-                                   enum CouplingPoint coupling_point,
-                                   void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
-{
-    int i, c;
-
-    for (i = 0; i < MAX_ELEM_ID; i++) {
-        ChannelElement *cce = ac->che[TYPE_CCE][i];
-        int index = 0;
-
-        if (cce && cce->coup.coupling_point == coupling_point) {
-            ChannelCoupling *coup = &cce->coup;
-
-            for (c = 0; c <= coup->num_coupled; c++) {
-                if (coup->type[c] == type && coup->id_select[c] == elem_id) {
-                    if (coup->ch_select[c] != 1) {
-                        apply_coupling_method(ac, &cc->ch[0], cce, index);
-                        if (coup->ch_select[c] != 0)
-                            index++;
-                    }
-                    if (coup->ch_select[c] != 2)
-                        apply_coupling_method(ac, &cc->ch[1], cce, index++);
-                } else
-                    index += 1 + (coup->ch_select[c] == 3);
-            }
-        }
-    }
-}
-
-/**
- * Convert spectral data to float samples, applying all supported tools as appropriate.
- */
-static void spectral_to_sample(AACContext *ac)
-{
-    int i, type;
-    void (*imdct_and_window)(AACContext *ac, SingleChannelElement *sce);
-    switch (ac->oc[1].m4ac.object_type) {
-    case AOT_ER_AAC_LD:
-        imdct_and_window = imdct_and_windowing_ld;
-        break;
-    case AOT_ER_AAC_ELD:
-        imdct_and_window = imdct_and_windowing_eld;
-        break;
-    default:
-        imdct_and_window = ac->imdct_and_windowing;
-    }
-    for (type = 3; type >= 0; type--) {
-        for (i = 0; i < MAX_ELEM_ID; i++) {
-            ChannelElement *che = ac->che[type][i];
-            if (che && che->present) {
-                if (type <= TYPE_CPE)
-                    apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling);
-                if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
-                    if (che->ch[0].ics.predictor_present) {
-                        if (che->ch[0].ics.ltp.present)
-                            ac->apply_ltp(ac, &che->ch[0]);
-                        if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
-                            ac->apply_ltp(ac, &che->ch[1]);
-                    }
-                }
-                if (che->ch[0].tns.present)
-                    ac->apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
-                if (che->ch[1].tns.present)
-                    ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
-                if (type <= TYPE_CPE)
-                    apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
-                if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
-                    imdct_and_window(ac, &che->ch[0]);
-                    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
-                        ac->update_ltp(ac, &che->ch[0]);
-                    if (type == TYPE_CPE) {
-                        imdct_and_window(ac, &che->ch[1]);
-                        if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
-                            ac->update_ltp(ac, &che->ch[1]);
-                    }
-                    if (ac->oc[1].m4ac.sbr > 0) {
-                        ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
-                    }
-                }
-                if (type <= TYPE_CCE)
-                    apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, apply_independent_coupling);
-                che->present = 0;
-            } else if (che) {
-                av_log(ac->avctx, AV_LOG_VERBOSE, "ChannelElement %d.%d missing \n", type, i);
-            }
-        }
-    }
-}
-
-static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
-{
-    int size;
-    AACADTSHeaderInfo hdr_info;
-    uint8_t layout_map[MAX_ELEM_ID*4][3];
-    int layout_map_tags, ret;
-
-    size = avpriv_aac_parse_header(gb, &hdr_info);
-    if (size > 0) {
-        if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) {
-            // This is 2 for "VLB " audio in NSV files.
-            // See samples/nsv/vlb_audio.
-            avpriv_report_missing_feature(ac->avctx,
-                                          "More than one AAC RDB per ADTS frame");
-            ac->warned_num_aac_frames = 1;
-        }
-        push_output_configuration(ac);
-        if (hdr_info.chan_config) {
-            ac->oc[1].m4ac.chan_config = hdr_info.chan_config;
-            if ((ret = set_default_channel_config(ac->avctx,
-                                                  layout_map,
-                                                  &layout_map_tags,
-                                                  hdr_info.chan_config)) < 0)
-                return ret;
-            if ((ret = output_configure(ac, layout_map, layout_map_tags,
-                                        FFMAX(ac->oc[1].status,
-                                              OC_TRIAL_FRAME), 0)) < 0)
-                return ret;
-        } else {
-            ac->oc[1].m4ac.chan_config = 0;
-            /**
-             * dual mono frames in Japanese DTV can have chan_config 0
-             * WITHOUT specifying PCE.
-             *  thus, set dual mono as default.
-             */
-            if (ac->dmono_mode && ac->oc[0].status == OC_NONE) {
-                layout_map_tags = 2;
-                layout_map[0][0] = layout_map[1][0] = TYPE_SCE;
-                layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT;
-                layout_map[0][1] = 0;
-                layout_map[1][1] = 1;
-                if (output_configure(ac, layout_map, layout_map_tags,
-                                     OC_TRIAL_FRAME, 0))
-                    return -7;
-            }
-        }
-        ac->oc[1].m4ac.sample_rate     = hdr_info.sample_rate;
-        ac->oc[1].m4ac.sampling_index  = hdr_info.sampling_index;
-        ac->oc[1].m4ac.object_type     = hdr_info.object_type;
-        ac->oc[1].m4ac.frame_length_short = 0;
-        if (ac->oc[0].status != OC_LOCKED ||
-            ac->oc[0].m4ac.chan_config != hdr_info.chan_config ||
-            ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) {
-            ac->oc[1].m4ac.sbr = -1;
-            ac->oc[1].m4ac.ps  = -1;
-        }
-        if (!hdr_info.crc_absent)
-            skip_bits(gb, 16);
-    }
-    return size;
-}
-
-static int aac_decode_er_frame(AVCodecContext *avctx, void *data,
-                               int *got_frame_ptr, GetBitContext *gb)
-{
-    AACContext *ac = avctx->priv_data;
-    const MPEG4AudioConfig *const m4ac = &ac->oc[1].m4ac;
-    ChannelElement *che;
-    int err, i;
-    int samples = m4ac->frame_length_short ? 960 : 1024;
-    int chan_config = m4ac->chan_config;
-    int aot = m4ac->object_type;
-
-    if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD)
-        samples >>= 1;
-
-    ac->frame = data;
-
-    if ((err = frame_configure_elements(avctx)) < 0)
-        return err;
-
-    // The FF_PROFILE_AAC_* defines are all object_type - 1
-    // This may lead to an undefined profile being signaled
-    ac->avctx->profile = aot - 1;
-
-    ac->tags_mapped = 0;
-
-    if (chan_config < 0 || (chan_config >= 8 && chan_config < 11) || chan_config >= 13) {
-        avpriv_request_sample(avctx, "Unknown ER channel configuration %d",
-                              chan_config);
-        return AVERROR_INVALIDDATA;
-    }
-    for (i = 0; i < tags_per_config[chan_config]; i++) {
-        const int elem_type = aac_channel_layout_map[chan_config-1][i][0];
-        const int elem_id   = aac_channel_layout_map[chan_config-1][i][1];
-        if (!(che=get_che(ac, elem_type, elem_id))) {
-            av_log(ac->avctx, AV_LOG_ERROR,
-                   "channel element %d.%d is not allocated\n",
-                   elem_type, elem_id);
-            return AVERROR_INVALIDDATA;
-        }
-        che->present = 1;
-        if (aot != AOT_ER_AAC_ELD)
-            skip_bits(gb, 4);
-        switch (elem_type) {
-        case TYPE_SCE:
-            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
-            break;
-        case TYPE_CPE:
-            err = decode_cpe(ac, gb, che);
-            break;
-        case TYPE_LFE:
-            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
-            break;
-        }
-        if (err < 0)
-            return err;
-    }
-
-    spectral_to_sample(ac);
-
-    ac->frame->nb_samples = samples;
-    ac->frame->sample_rate = avctx->sample_rate;
-    *got_frame_ptr = 1;
-
-    skip_bits_long(gb, get_bits_left(gb));
-    return 0;
-}
-
-static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
-                                int *got_frame_ptr, GetBitContext *gb, AVPacket *avpkt)
-{
-    AACContext *ac = avctx->priv_data;
-    ChannelElement *che = NULL, *che_prev = NULL;
-    enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
-    int err, elem_id;
-    int samples = 0, multiplier, audio_found = 0, pce_found = 0;
-    int is_dmono, sce_count = 0;
-
-    ac->frame = data;
-
-    if (show_bits(gb, 12) == 0xfff) {
-        if ((err = parse_adts_frame_header(ac, gb)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
-            goto fail;
-        }
-        if (ac->oc[1].m4ac.sampling_index > 12) {
-            av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
-            err = AVERROR_INVALIDDATA;
-            goto fail;
-        }
-    }
-
-    if ((err = frame_configure_elements(avctx)) < 0)
-        goto fail;
-
-    // The FF_PROFILE_AAC_* defines are all object_type - 1
-    // This may lead to an undefined profile being signaled
-    ac->avctx->profile = ac->oc[1].m4ac.object_type - 1;
-
-    ac->tags_mapped = 0;
-    // parse
-    while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
-        elem_id = get_bits(gb, 4);
-
-        if (avctx->debug & FF_DEBUG_STARTCODE)
-            av_log(avctx, AV_LOG_DEBUG, "Elem type:%x id:%x\n", elem_type, elem_id);
-
-        if (!avctx->channels && elem_type != TYPE_PCE) {
-            err = AVERROR_INVALIDDATA;
-            goto fail;
-        }
-
-        if (elem_type < TYPE_DSE) {
-            if (!(che=get_che(ac, elem_type, elem_id))) {
-                av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
-                       elem_type, elem_id);
-                err = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-            samples = 1024;
-            che->present = 1;
-        }
-
-        switch (elem_type) {
-
-        case TYPE_SCE:
-            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
-            audio_found = 1;
-            sce_count++;
-            break;
-
-        case TYPE_CPE:
-            err = decode_cpe(ac, gb, che);
-            audio_found = 1;
-            break;
-
-        case TYPE_CCE:
-            err = decode_cce(ac, gb, che);
-            break;
-
-        case TYPE_LFE:
-            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
-            audio_found = 1;
-            break;
-
-        case TYPE_DSE:
-            err = skip_data_stream_element(ac, gb);
-            break;
-
-        case TYPE_PCE: {
-            uint8_t layout_map[MAX_ELEM_ID*4][3];
-            int tags;
-            push_output_configuration(ac);
-            tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb);
-            if (tags < 0) {
-                err = tags;
-                break;
-            }
-            if (pce_found) {
-                av_log(avctx, AV_LOG_ERROR,
-                       "Not evaluating a further program_config_element as this construct is dubious at best.\n");
-            } else {
-                err = output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1);
-                if (!err)
-                    ac->oc[1].m4ac.chan_config = 0;
-                pce_found = 1;
-            }
-            break;
-        }
-
-        case TYPE_FIL:
-            if (elem_id == 15)
-                elem_id += get_bits(gb, 8) - 1;
-            if (get_bits_left(gb) < 8 * elem_id) {
-                    av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err);
-                    err = AVERROR_INVALIDDATA;
-                    goto fail;
-            }
-            while (elem_id > 0)
-                elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev);
-            err = 0; /* FIXME */
-            break;
-
-        default:
-            err = AVERROR_BUG; /* should not happen, but keeps compiler happy */
-            break;
-        }
-
-        che_prev       = che;
-        elem_type_prev = elem_type;
-
-        if (err)
-            goto fail;
-
-        if (get_bits_left(gb) < 3) {
-            av_log(avctx, AV_LOG_ERROR, overread_err);
-            err = AVERROR_INVALIDDATA;
-            goto fail;
-        }
-    }
-
-    if (!avctx->channels) {
-        *got_frame_ptr = 0;
-        return 0;
-    }
-
-    spectral_to_sample(ac);
-
-    multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0;
-    samples <<= multiplier;
-
-    if (ac->oc[1].status && audio_found) {
-        avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier;
-        avctx->frame_size = samples;
-        ac->oc[1].status = OC_LOCKED;
-    }
-
-    if (multiplier) {
-        int side_size;
-        const uint8_t *side = av_packet_get_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
-        if (side && side_size>=4)
-            AV_WL32(side, 2*AV_RL32(side));
-    }
-
-    if (!ac->frame->data[0] && samples) {
-        av_log(avctx, AV_LOG_ERROR, "no frame data found\n");
-        err = AVERROR_INVALIDDATA;
-        goto fail;
-    }
-
-    if (samples) {
-        ac->frame->nb_samples = samples;
-        ac->frame->sample_rate = avctx->sample_rate;
-    } else
-        av_frame_unref(ac->frame);
-    *got_frame_ptr = !!samples;
-
-    /* for dual-mono audio (SCE + SCE) */
-    is_dmono = ac->dmono_mode && sce_count == 2 &&
-               ac->oc[1].channel_layout == (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT);
-    if (is_dmono) {
-        if (ac->dmono_mode == 1)
-            ((AVFrame *)data)->data[1] =((AVFrame *)data)->data[0];
-        else if (ac->dmono_mode == 2)
-            ((AVFrame *)data)->data[0] =((AVFrame *)data)->data[1];
-    }
-
-    return 0;
-fail:
-    pop_output_configuration(ac);
-    return err;
-}
-
-static int aac_decode_frame(AVCodecContext *avctx, void *data,
-                            int *got_frame_ptr, AVPacket *avpkt)
-{
-    AACContext *ac = avctx->priv_data;
-    const uint8_t *buf = avpkt->data;
-    int buf_size = avpkt->size;
-    GetBitContext gb;
-    int buf_consumed;
-    int buf_offset;
-    int err;
-    int new_extradata_size;
-    const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
-                                       AV_PKT_DATA_NEW_EXTRADATA,
-                                       &new_extradata_size);
-    int jp_dualmono_size;
-    const uint8_t *jp_dualmono   = av_packet_get_side_data(avpkt,
-                                       AV_PKT_DATA_JP_DUALMONO,
-                                       &jp_dualmono_size);
-
-    if (new_extradata && 0) {
-        av_free(avctx->extradata);
-        avctx->extradata = av_mallocz(new_extradata_size +
-                                      FF_INPUT_BUFFER_PADDING_SIZE);
-        if (!avctx->extradata)
-            return AVERROR(ENOMEM);
-        avctx->extradata_size = new_extradata_size;
-        memcpy(avctx->extradata, new_extradata, new_extradata_size);
-        push_output_configuration(ac);
-        if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
-                                         avctx->extradata,
-                                         avctx->extradata_size*8, 1) < 0) {
-            pop_output_configuration(ac);
-            return AVERROR_INVALIDDATA;
-        }
-    }
-
-    ac->dmono_mode = 0;
-    if (jp_dualmono && jp_dualmono_size > 0)
-        ac->dmono_mode =  1 + *jp_dualmono;
-    if (ac->force_dmono_mode >= 0)
-        ac->dmono_mode = ac->force_dmono_mode;
-
-    if (INT_MAX / 8 <= buf_size)
-        return AVERROR_INVALIDDATA;
-
-    if ((err = init_get_bits(&gb, buf, buf_size * 8)) < 0)
-        return err;
-
-    switch (ac->oc[1].m4ac.object_type) {
-    case AOT_ER_AAC_LC:
-    case AOT_ER_AAC_LTP:
-    case AOT_ER_AAC_LD:
-    case AOT_ER_AAC_ELD:
-        err = aac_decode_er_frame(avctx, data, got_frame_ptr, &gb);
-        break;
-    default:
-        err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb, avpkt);
-    }
-    if (err < 0)
-        return err;
-
-    buf_consumed = (get_bits_count(&gb) + 7) >> 3;
-    for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
-        if (buf[buf_offset])
-            break;
-
-    return buf_size > buf_offset ? buf_consumed : buf_size;
-}
-
-static av_cold int aac_decode_close(AVCodecContext *avctx)
-{
-    AACContext *ac = avctx->priv_data;
-    int i, type;
-
-    for (i = 0; i < MAX_ELEM_ID; i++) {
-        for (type = 0; type < 4; type++) {
-            if (ac->che[type][i])
-                ff_aac_sbr_ctx_close(&ac->che[type][i]->sbr);
-            av_freep(&ac->che[type][i]);
-        }
-    }
-
-    ff_mdct_end(&ac->mdct);
-    ff_mdct_end(&ac->mdct_small);
-    ff_mdct_end(&ac->mdct_ld);
-    ff_mdct_end(&ac->mdct_ltp);
-    ff_imdct15_uninit(&ac->mdct480);
-    av_freep(&ac->fdsp);
-    return 0;
-}
-
+#include "aacdec_template.c"
 
 #define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word
 
@@ -3289,14 +323,14 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
 
         if (avctx->extradata_size < esize) {
             av_free(avctx->extradata);
-            avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE);
+            avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!avctx->extradata)
                 return AVERROR(ENOMEM);
         }
 
         avctx->extradata_size = esize;
         memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
-        memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+        memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
     }
     skip_bits_long(gb, bits_consumed);
 
@@ -3463,7 +497,7 @@ static int latm_decode_frame(AVCodecContext *avctx, void *out,
             push_output_configuration(&latmctx->aac_ctx);
             if ((err = decode_audio_specific_config(
                     &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
-                    avctx->extradata, avctx->extradata_size*8, 1)) < 0) {
+                    avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
                 pop_output_configuration(&latmctx->aac_ctx);
                 return err;
             }
@@ -3505,53 +539,6 @@ static av_cold int latm_decode_init(AVCodecContext *avctx)
     return ret;
 }
 
-static void aacdec_init(AACContext *c)
-{
-    c->imdct_and_windowing                      = imdct_and_windowing;
-    c->apply_ltp                                = apply_ltp;
-    c->apply_tns                                = apply_tns;
-    c->windowing_and_mdct_ltp                   = windowing_and_mdct_ltp;
-    c->update_ltp                               = update_ltp;
-
-    if(ARCH_MIPS)
-        ff_aacdec_init_mips(c);
-}
-/**
- * AVOptions for Japanese DTV specific extensions (ADTS only)
- */
-#define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
-static const AVOption options[] = {
-    {"dual_mono_mode", "Select the channel to decode for dual mono",
-     offsetof(AACContext, force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2,
-     AACDEC_FLAGS, "dual_mono_mode"},
-
-    {"auto", "autoselection",            0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
-    {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
-    {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
-    {"both", "Select both channels",     0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
-
-    {NULL},
-};
-
-static const AVClass aac_decoder_class = {
-    .class_name = "AAC decoder",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
-static const AVProfile profiles[] = {
-    { FF_PROFILE_AAC_MAIN,  "Main"     },
-    { FF_PROFILE_AAC_LOW,   "LC"       },
-    { FF_PROFILE_AAC_SSR,   "SSR"      },
-    { FF_PROFILE_AAC_LTP,   "LTP"      },
-    { FF_PROFILE_AAC_HE,    "HE-AAC"   },
-    { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
-    { FF_PROFILE_AAC_LD,    "LD"       },
-    { FF_PROFILE_AAC_ELD,   "ELD"      },
-    { FF_PROFILE_UNKNOWN },
-};
-
 AVCodec ff_aac_decoder = {
     .name            = "aac",
     .long_name       = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
@@ -3564,11 +551,12 @@ AVCodec ff_aac_decoder = {
     .sample_fmts     = (const enum AVSampleFormat[]) {
         AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
     },
-    .capabilities    = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
+    .capabilities    = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
+    .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE,
     .channel_layouts = aac_channel_layout,
     .flush = flush,
     .priv_class      = &aac_decoder_class,
-    .profiles        = profiles,
+    .profiles        = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
 };
 
 /*
@@ -3588,8 +576,9 @@ AVCodec ff_aac_latm_decoder = {
     .sample_fmts     = (const enum AVSampleFormat[]) {
         AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
     },
-    .capabilities    = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
+    .capabilities    = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
+    .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE,
     .channel_layouts = aac_channel_layout,
     .flush = flush,
-    .profiles        = profiles,
+    .profiles        = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
 };
diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
new file mode 100644
index 00000000..396a874d
--- /dev/null
+++ b/libavcodec/aacdec_fixed.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * AAC decoder fixed-point implementation
+ *
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC decoder
+ * @author Oded Shimon  ( ods15 ods15 dyndns org )
+ * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
+ *
+ * Fixed point implementation
+ * @author Stanislav Ocovaj ( stanislav.ocovaj imgtec com )
+ */
+
+#define FFT_FLOAT 0
+#define FFT_FIXED_32 1
+#define USE_FIXED 1
+
+#include "libavutil/fixed_dsp.h"
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "get_bits.h"
+#include "fft.h"
+#include "lpc.h"
+#include "kbdwin.h"
+#include "sinewin.h"
+
+#include "aac.h"
+#include "aactab.h"
+#include "aacdectab.h"
+#include "cbrt_tablegen.h"
+#include "sbr.h"
+#include "aacsbr.h"
+#include "mpeg4audio.h"
+#include "aacadtsdec.h"
+#include "profiles.h"
+#include "libavutil/intfloat.h"
+
+#include <math.h>
+#include <string.h>
+
+static av_always_inline void reset_predict_state(PredictorState *ps)
+{
+    ps->r0.mant   = 0;
+    ps->r0.exp   = 0;
+    ps->r1.mant   = 0;
+    ps->r1.exp   = 0;
+    ps->cor0.mant = 0;
+    ps->cor0.exp = 0;
+    ps->cor1.mant = 0;
+    ps->cor1.exp = 0;
+    ps->var0.mant = 0x20000000;
+    ps->var0.exp = 1;
+    ps->var1.mant = 0x20000000;
+    ps->var1.exp = 1;
+}
+
+static const int exp2tab[4] = { Q31(1.0000000000/2), Q31(1.1892071150/2), Q31(1.4142135624/2), Q31(1.6817928305/2) };  // 2^0, 2^0.25, 2^0.5, 2^0.75
+
+static inline int *DEC_SPAIR(int *dst, unsigned idx)
+{
+    dst[0] = (idx & 15) - 4;
+    dst[1] = (idx >> 4 & 15) - 4;
+
+    return dst + 2;
+}
+
+static inline int *DEC_SQUAD(int *dst, unsigned idx)
+{
+    dst[0] = (idx & 3) - 1;
+    dst[1] = (idx >> 2 & 3) - 1;
+    dst[2] = (idx >> 4 & 3) - 1;
+    dst[3] = (idx >> 6 & 3) - 1;
+
+    return dst + 4;
+}
+
+static inline int *DEC_UPAIR(int *dst, unsigned idx, unsigned sign)
+{
+    dst[0] = (idx & 15) * (1 - (sign & 0xFFFFFFFE));
+    dst[1] = (idx >> 4 & 15) * (1 - ((sign & 1) << 1));
+
+    return dst + 2;
+}
+
+static inline int *DEC_UQUAD(int *dst, unsigned idx, unsigned sign)
+{
+    unsigned nz = idx >> 12;
+
+    dst[0] = (idx & 3) * (1 + (((int)sign >> 31) << 1));
+    sign <<= nz & 1;
+    nz >>= 1;
+    dst[1] = (idx >> 2 & 3) * (1 + (((int)sign >> 31) << 1));
+    sign <<= nz & 1;
+    nz >>= 1;
+    dst[2] = (idx >> 4 & 3) * (1 + (((int)sign >> 31) << 1));
+    sign <<= nz & 1;
+    nz >>= 1;
+    dst[3] = (idx >> 6 & 3) * (1 + (((int)sign >> 31) << 1));
+
+    return dst + 4;
+}
+
+static void vector_pow43(int *coefs, int len)
+{
+    int i, coef;
+
+    for (i=0; i<len; i++) {
+        coef = coefs[i];
+        if (coef < 0)
+            coef = -(int)cbrt_tab[-coef];
+        else
+            coef = (int)cbrt_tab[coef];
+        coefs[i] = coef;
+    }
+}
+
+static void subband_scale(int *dst, int *src, int scale, int offset, int len)
+{
+    int ssign = scale < 0 ? -1 : 1;
+    int s = FFABS(scale);
+    unsigned int round;
+    int i, out, c = exp2tab[s & 3];
+
+    s = offset - (s >> 2);
+
+    if (s > 0) {
+        round = 1 << (s-1);
+        for (i=0; i<len; i++) {
+            out = (int)(((int64_t)src[i] * c) >> 32);
+            dst[i] = ((int)(out+round) >> s) * ssign;
+        }
+    }
+    else {
+        s = s + 32;
+        round = 1 << (s-1);
+        for (i=0; i<len; i++) {
+            out = (int)((int64_t)((int64_t)src[i] * c + round) >> s);
+            dst[i] = out * ssign;
+        }
+    }
+}
+
+static void noise_scale(int *coefs, int scale, int band_energy, int len)
+{
+    int ssign = scale < 0 ? -1 : 1;
+    int s = FFABS(scale);
+    unsigned int round;
+    int i, out, c = exp2tab[s & 3];
+    int nlz = 0;
+
+    while (band_energy > 0x7fff) {
+        band_energy >>= 1;
+        nlz++;
+    }
+    c /= band_energy;
+    s = 21 + nlz - (s >> 2);
+
+    if (s > 0) {
+        round = 1 << (s-1);
+        for (i=0; i<len; i++) {
+            out = (int)(((int64_t)coefs[i] * c) >> 32);
+            coefs[i] = ((int)(out+round) >> s) * ssign;
+        }
+    }
+    else {
+        s = s + 32;
+        round = 1 << (s-1);
+        for (i=0; i<len; i++) {
+            out = (int)((int64_t)((int64_t)coefs[i] * c + round) >> s);
+            coefs[i] = out * ssign;
+        }
+    }
+}
+
+static av_always_inline SoftFloat flt16_round(SoftFloat pf)
+{
+    SoftFloat tmp;
+    int s;
+
+    tmp.exp = pf.exp;
+    s = pf.mant >> 31;
+    tmp.mant = (pf.mant ^ s) - s;
+    tmp.mant = (tmp.mant + 0x00200000U) & 0xFFC00000U;
+    tmp.mant = (tmp.mant ^ s) - s;
+
+    return tmp;
+}
+
+static av_always_inline SoftFloat flt16_even(SoftFloat pf)
+{
+    SoftFloat tmp;
+    int s;
+
+    tmp.exp = pf.exp;
+    s = pf.mant >> 31;
+    tmp.mant = (pf.mant ^ s) - s;
+    tmp.mant = (tmp.mant + 0x001FFFFFU + (tmp.mant & 0x00400000U >> 16)) & 0xFFC00000U;
+    tmp.mant = (tmp.mant ^ s) - s;
+
+    return tmp;
+}
+
+static av_always_inline SoftFloat flt16_trunc(SoftFloat pf)
+{
+    SoftFloat pun;
+    int s;
+
+    pun.exp = pf.exp;
+    s = pf.mant >> 31;
+    pun.mant = (pf.mant ^ s) - s;
+    pun.mant = pun.mant & 0xFFC00000U;
+    pun.mant = (pun.mant ^ s) - s;
+
+    return pun;
+}
+
+static av_always_inline void predict(PredictorState *ps, int *coef,
+                                     int output_enable)
+{
+    const SoftFloat a     = { 1023410176, 0 };  // 61.0 / 64
+    const SoftFloat alpha = {  973078528, 0 };  // 29.0 / 32
+    SoftFloat e0, e1;
+    SoftFloat pv;
+    SoftFloat k1, k2;
+    SoftFloat   r0 = ps->r0,     r1 = ps->r1;
+    SoftFloat cor0 = ps->cor0, cor1 = ps->cor1;
+    SoftFloat var0 = ps->var0, var1 = ps->var1;
+    SoftFloat tmp;
+
+    if (var0.exp > 1 || (var0.exp == 1 && var0.mant > 0x20000000)) {
+        k1 = av_mul_sf(cor0, flt16_even(av_div_sf(a, var0)));
+    }
+    else {
+        k1.mant = 0;
+        k1.exp = 0;
+    }
+
+    if (var1.exp > 1 || (var1.exp == 1 && var1.mant > 0x20000000)) {
+        k2 = av_mul_sf(cor1, flt16_even(av_div_sf(a, var1)));
+    }
+    else {
+        k2.mant = 0;
+        k2.exp = 0;
+    }
+
+    tmp = av_mul_sf(k1, r0);
+    pv = flt16_round(av_add_sf(tmp, av_mul_sf(k2, r1)));
+    if (output_enable) {
+        int shift = 28 - pv.exp;
+
+        if (shift < 31)
+            *coef += (pv.mant + (1 << (shift - 1))) >> shift;
+    }
+
+    e0 = av_int2sf(*coef, 2);
+    e1 = av_sub_sf(e0, tmp);
+
+    ps->cor1 = flt16_trunc(av_add_sf(av_mul_sf(alpha, cor1), av_mul_sf(r1, e1)));
+    tmp = av_add_sf(av_mul_sf(r1, r1), av_mul_sf(e1, e1));
+    tmp.exp--;
+    ps->var1 = flt16_trunc(av_add_sf(av_mul_sf(alpha, var1), tmp));
+    ps->cor0 = flt16_trunc(av_add_sf(av_mul_sf(alpha, cor0), av_mul_sf(r0, e0)));
+    tmp = av_add_sf(av_mul_sf(r0, r0), av_mul_sf(e0, e0));
+    tmp.exp--;
+    ps->var0 = flt16_trunc(av_add_sf(av_mul_sf(alpha, var0), tmp));
+
+    ps->r1 = flt16_trunc(av_mul_sf(a, av_sub_sf(r0, av_mul_sf(k1, e0))));
+    ps->r0 = flt16_trunc(av_mul_sf(a, e0));
+}
+
+
+static const int cce_scale_fixed[8] = {
+    Q30(1.0),          //2^(0/8)
+    Q30(1.0905077327), //2^(1/8)
+    Q30(1.1892071150), //2^(2/8)
+    Q30(1.2968395547), //2^(3/8)
+    Q30(1.4142135624), //2^(4/8)
+    Q30(1.5422108254), //2^(5/8)
+    Q30(1.6817928305), //2^(6/8)
+    Q30(1.8340080864), //2^(7/8)
+};
+
+/**
+ * Apply dependent channel coupling (applied before IMDCT).
+ *
+ * @param   index   index into coupling gain array
+ */
+static void apply_dependent_coupling_fixed(AACContext *ac,
+                                     SingleChannelElement *target,
+                                     ChannelElement *cce, int index)
+{
+    IndividualChannelStream *ics = &cce->ch[0].ics;
+    const uint16_t *offsets = ics->swb_offset;
+    int *dest = target->coeffs;
+    const int *src = cce->ch[0].coeffs;
+    int g, i, group, k, idx = 0;
+    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Dependent coupling is not supported together with LTP\n");
+        return;
+    }
+    for (g = 0; g < ics->num_window_groups; g++) {
+        for (i = 0; i < ics->max_sfb; i++, idx++) {
+            if (cce->ch[0].band_type[idx] != ZERO_BT) {
+                const int gain = cce->coup.gain[index][idx];
+                int shift, round, c, tmp;
+
+                if (gain < 0) {
+                    c = -cce_scale_fixed[-gain & 7];
+                    shift = (-gain-1024) >> 3;
+                }
+                else {
+                    c = cce_scale_fixed[gain & 7];
+                    shift = (gain-1024) >> 3;
+                }
+
+                if (shift < 0) {
+                    shift = -shift;
+                    round = 1 << (shift - 1);
+
+                    for (group = 0; group < ics->group_len[g]; group++) {
+                        for (k = offsets[i]; k < offsets[i + 1]; k++) {
+                            tmp = (int)(((int64_t)src[group * 128 + k] * c + \
+                                       (int64_t)0x1000000000) >> 37);
+                            dest[group * 128 + k] += (tmp + round) >> shift;
+                        }
+                    }
+                }
+                else {
+                    for (group = 0; group < ics->group_len[g]; group++) {
+                        for (k = offsets[i]; k < offsets[i + 1]; k++) {
+                            tmp = (int)(((int64_t)src[group * 128 + k] * c + \
+                                        (int64_t)0x1000000000) >> 37);
+                            dest[group * 128 + k] += tmp << shift;
+                        }
+                    }
+                }
+            }
+        }
+        dest += ics->group_len[g] * 128;
+        src  += ics->group_len[g] * 128;
+    }
+}
+
+/**
+ * Apply independent channel coupling (applied after IMDCT).
+ *
+ * @param   index   index into coupling gain array
+ */
+static void apply_independent_coupling_fixed(AACContext *ac,
+                                       SingleChannelElement *target,
+                                       ChannelElement *cce, int index)
+{
+    int i, c, shift, round, tmp;
+    const int gain = cce->coup.gain[index][0];
+    const int *src = cce->ch[0].ret;
+    int *dest = target->ret;
+    const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
+
+    c = cce_scale_fixed[gain & 7];
+    shift = (gain-1024) >> 3;
+    if (shift < 0) {
+        shift = -shift;
+        round = 1 << (shift - 1);
+
+        for (i = 0; i < len; i++) {
+            tmp = (int)(((int64_t)src[i] * c + (int64_t)0x1000000000) >> 37);
+            dest[i] += (tmp + round) >> shift;
+        }
+    }
+    else {
+      for (i = 0; i < len; i++) {
+          tmp = (int)(((int64_t)src[i] * c + (int64_t)0x1000000000) >> 37);
+          dest[i] += tmp << shift;
+      }
+    }
+}
+
+#include "aacdec_template.c"
+
+AVCodec ff_aac_fixed_decoder = {
+    .name            = "aac_fixed",
+    .long_name       = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
+    .type            = AVMEDIA_TYPE_AUDIO,
+    .id              = AV_CODEC_ID_AAC,
+    .priv_data_size  = sizeof(AACContext),
+    .init            = aac_decode_init,
+    .close           = aac_decode_close,
+    .decode          = aac_decode_frame,
+    .sample_fmts     = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_S32P, AV_SAMPLE_FMT_NONE
+    },
+    .capabilities    = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
+    .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE,
+    .channel_layouts = aac_channel_layout,
+    .profiles        = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
+    .flush = flush,
+};
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
new file mode 100644
index 00000000..6bc94c87
--- /dev/null
+++ b/libavcodec/aacdec_template.c
@@ -0,0 +1,3238 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
+ * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC decoder
+ * @author Oded Shimon  ( ods15 ods15 dyndns org )
+ * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
+ *
+ * AAC decoder fixed-point implementation
+ * @author Stanislav Ocovaj ( stanislav.ocovaj imgtec com )
+ * @author Nedeljko Babic ( nedeljko.babic imgtec com )
+ */
+
+/*
+ * supported tools
+ *
+ * Support?                     Name
+ * N (code in SoC repo)         gain control
+ * Y                            block switching
+ * Y                            window shapes - standard
+ * N                            window shapes - Low Delay
+ * Y                            filterbank - standard
+ * N (code in SoC repo)         filterbank - Scalable Sample Rate
+ * Y                            Temporal Noise Shaping
+ * Y                            Long Term Prediction
+ * Y                            intensity stereo
+ * Y                            channel coupling
+ * Y                            frequency domain prediction
+ * Y                            Perceptual Noise Substitution
+ * Y                            Mid/Side stereo
+ * N                            Scalable Inverse AAC Quantization
+ * N                            Frequency Selective Switch
+ * N                            upsampling filter
+ * Y                            quantization & coding - AAC
+ * N                            quantization & coding - TwinVQ
+ * N                            quantization & coding - BSAC
+ * N                            AAC Error Resilience tools
+ * N                            Error Resilience payload syntax
+ * N                            Error Protection tool
+ * N                            CELP
+ * N                            Silence Compression
+ * N                            HVXC
+ * N                            HVXC 4kbits/s VR
+ * N                            Structured Audio tools
+ * N                            Structured Audio Sample Bank Format
+ * N                            MIDI
+ * N                            Harmonic and Individual Lines plus Noise
+ * N                            Text-To-Speech Interface
+ * Y                            Spectral Band Replication
+ * Y (not in this code)         Layer-1
+ * Y (not in this code)         Layer-2
+ * Y (not in this code)         Layer-3
+ * N                            SinuSoidal Coding (Transient, Sinusoid, Noise)
+ * Y                            Parametric Stereo
+ * N                            Direct Stream Transfer
+ * Y  (not in fixed point code) Enhanced AAC Low Delay (ER AAC ELD)
+ *
+ * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
+ *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
+           Parametric Stereo.
+ */
+
+#include "libavutil/thread.h"
+
+static VLC vlc_scalefactors;
+static VLC vlc_spectral[11];
+
+static int output_configure(AACContext *ac,
+                            uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
+                            enum OCStatus oc_type, int get_new_frame);
+
+#define overread_err "Input buffer exhausted before END element found\n"
+
+static int count_channels(uint8_t (*layout)[3], int tags)
+{
+    int i, sum = 0;
+    for (i = 0; i < tags; i++) {
+        int syn_ele = layout[i][0];
+        int pos     = layout[i][2];
+        sum += (1 + (syn_ele == TYPE_CPE)) *
+               (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC);
+    }
+    return sum;
+}
+
+/**
+ * Check for the channel element in the current channel position configuration.
+ * If it exists, make sure the appropriate element is allocated and map the
+ * channel order to match the internal FFmpeg channel layout.
+ *
+ * @param   che_pos current channel position configuration
+ * @param   type channel element type
+ * @param   id channel element id
+ * @param   channels count of the number of channels in the configuration
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static av_cold int che_configure(AACContext *ac,
+                                 enum ChannelPosition che_pos,
+                                 int type, int id, int *channels)
+{
+    if (*channels >= MAX_CHANNELS)
+        return AVERROR_INVALIDDATA;
+    if (che_pos) {
+        if (!ac->che[type][id]) {
+            if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
+                return AVERROR(ENOMEM);
+            AAC_RENAME(ff_aac_sbr_ctx_init)(ac, &ac->che[type][id]->sbr);
+        }
+        if (type != TYPE_CCE) {
+            if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) {
+                av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n");
+                return AVERROR_INVALIDDATA;
+            }
+            ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0];
+            if (type == TYPE_CPE ||
+                (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) {
+                ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1];
+            }
+        }
+    } else {
+        if (ac->che[type][id])
+            AAC_RENAME(ff_aac_sbr_ctx_close)(&ac->che[type][id]->sbr);
+        av_freep(&ac->che[type][id]);
+    }
+    return 0;
+}
+
+static int frame_configure_elements(AVCodecContext *avctx)
+{
+    AACContext *ac = avctx->priv_data;
+    int type, id, ch, ret;
+
+    /* set channel pointers to internal buffers by default */
+    for (type = 0; type < 4; type++) {
+        for (id = 0; id < MAX_ELEM_ID; id++) {
+            ChannelElement *che = ac->che[type][id];
+            if (che) {
+                che->ch[0].ret = che->ch[0].ret_buf;
+                che->ch[1].ret = che->ch[1].ret_buf;
+            }
+        }
+    }
+
+    /* get output buffer */
+    av_frame_unref(ac->frame);
+    if (!avctx->channels)
+        return 1;
+
+    ac->frame->nb_samples = 2048;
+    if ((ret = ff_get_buffer(avctx, ac->frame, 0)) < 0)
+        return ret;
+
+    /* map output channel pointers to AVFrame data */
+    for (ch = 0; ch < avctx->channels; ch++) {
+        if (ac->output_element[ch])
+            ac->output_element[ch]->ret = (INTFLOAT *)ac->frame->extended_data[ch];
+    }
+
+    return 0;
+}
+
+struct elem_to_channel {
+    uint64_t av_position;
+    uint8_t syn_ele;
+    uint8_t elem_id;
+    uint8_t aac_position;
+};
+
+static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],
+                       uint8_t (*layout_map)[3], int offset, uint64_t left,
+                       uint64_t right, int pos)
+{
+    if (layout_map[offset][0] == TYPE_CPE) {
+        e2c_vec[offset] = (struct elem_to_channel) {
+            .av_position  = left | right,
+            .syn_ele      = TYPE_CPE,
+            .elem_id      = layout_map[offset][1],
+            .aac_position = pos
+        };
+        return 1;
+    } else {
+        e2c_vec[offset] = (struct elem_to_channel) {
+            .av_position  = left,
+            .syn_ele      = TYPE_SCE,
+            .elem_id      = layout_map[offset][1],
+            .aac_position = pos
+        };
+        e2c_vec[offset + 1] = (struct elem_to_channel) {
+            .av_position  = right,
+            .syn_ele      = TYPE_SCE,
+            .elem_id      = layout_map[offset + 1][1],
+            .aac_position = pos
+        };
+        return 2;
+    }
+}
+
+static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos,
+                                 int *current)
+{
+    int num_pos_channels = 0;
+    int first_cpe        = 0;
+    int sce_parity       = 0;
+    int i;
+    for (i = *current; i < tags; i++) {
+        if (layout_map[i][2] != pos)
+            break;
+        if (layout_map[i][0] == TYPE_CPE) {
+            if (sce_parity) {
+                if (pos == AAC_CHANNEL_FRONT && !first_cpe) {
+                    sce_parity = 0;
+                } else {
+                    return -1;
+                }
+            }
+            num_pos_channels += 2;
+            first_cpe         = 1;
+        } else {
+            num_pos_channels++;
+            sce_parity ^= 1;
+        }
+    }
+    if (sce_parity &&
+        ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE))
+        return -1;
+    *current = i;
+    return num_pos_channels;
+}
+
+static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags)
+{
+    int i, n, total_non_cc_elements;
+    struct elem_to_channel e2c_vec[4 * MAX_ELEM_ID] = { { 0 } };
+    int num_front_channels, num_side_channels, num_back_channels;
+    uint64_t layout;
+
+    if (FF_ARRAY_ELEMS(e2c_vec) < tags)
+        return 0;
+
+    i = 0;
+    num_front_channels =
+        count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i);
+    if (num_front_channels < 0)
+        return 0;
+    num_side_channels =
+        count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i);
+    if (num_side_channels < 0)
+        return 0;
+    num_back_channels =
+        count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i);
+    if (num_back_channels < 0)
+        return 0;
+
+    if (num_side_channels == 0 && num_back_channels >= 4) {
+        num_side_channels = 2;
+        num_back_channels -= 2;
+    }
+
+    i = 0;
+    if (num_front_channels & 1) {
+        e2c_vec[i] = (struct elem_to_channel) {
+            .av_position  = AV_CH_FRONT_CENTER,
+            .syn_ele      = TYPE_SCE,
+            .elem_id      = layout_map[i][1],
+            .aac_position = AAC_CHANNEL_FRONT
+        };
+        i++;
+        num_front_channels--;
+    }
+    if (num_front_channels >= 4) {
+        i += assign_pair(e2c_vec, layout_map, i,
+                         AV_CH_FRONT_LEFT_OF_CENTER,
+                         AV_CH_FRONT_RIGHT_OF_CENTER,
+                         AAC_CHANNEL_FRONT);
+        num_front_channels -= 2;
+    }
+    if (num_front_channels >= 2) {
+        i += assign_pair(e2c_vec, layout_map, i,
+                         AV_CH_FRONT_LEFT,
+                         AV_CH_FRONT_RIGHT,
+                         AAC_CHANNEL_FRONT);
+        num_front_channels -= 2;
+    }
+    while (num_front_channels >= 2) {
+        i += assign_pair(e2c_vec, layout_map, i,
+                         UINT64_MAX,
+                         UINT64_MAX,
+                         AAC_CHANNEL_FRONT);
+        num_front_channels -= 2;
+    }
+
+    if (num_side_channels >= 2) {
+        i += assign_pair(e2c_vec, layout_map, i,
+                         AV_CH_SIDE_LEFT,
+                         AV_CH_SIDE_RIGHT,
+                         AAC_CHANNEL_FRONT);
+        num_side_channels -= 2;
+    }
+    while (num_side_channels >= 2) {
+        i += assign_pair(e2c_vec, layout_map, i,
+                         UINT64_MAX,
+                         UINT64_MAX,
+                         AAC_CHANNEL_SIDE);
+        num_side_channels -= 2;
+    }
+
+    while (num_back_channels >= 4) {
+        i += assign_pair(e2c_vec, layout_map, i,
+                         UINT64_MAX,
+                         UINT64_MAX,
+                         AAC_CHANNEL_BACK);
+        num_back_channels -= 2;
+    }
+    if (num_back_channels >= 2) {
+        i += assign_pair(e2c_vec, layout_map, i,
+                         AV_CH_BACK_LEFT,
+                         AV_CH_BACK_RIGHT,
+                         AAC_CHANNEL_BACK);
+        num_back_channels -= 2;
+    }
+    if (num_back_channels) {
+        e2c_vec[i] = (struct elem_to_channel) {
+            .av_position  = AV_CH_BACK_CENTER,
+            .syn_ele      = TYPE_SCE,
+            .elem_id      = layout_map[i][1],
+            .aac_position = AAC_CHANNEL_BACK
+        };
+        i++;
+        num_back_channels--;
+    }
+
+    if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
+        e2c_vec[i] = (struct elem_to_channel) {
+            .av_position  = AV_CH_LOW_FREQUENCY,
+            .syn_ele      = TYPE_LFE,
+            .elem_id      = layout_map[i][1],
+            .aac_position = AAC_CHANNEL_LFE
+        };
+        i++;
+    }
+    while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
+        e2c_vec[i] = (struct elem_to_channel) {
+            .av_position  = UINT64_MAX,
+            .syn_ele      = TYPE_LFE,
+            .elem_id      = layout_map[i][1],
+            .aac_position = AAC_CHANNEL_LFE
+        };
+        i++;
+    }
+
+    // Must choose a stable sort
+    total_non_cc_elements = n = i;
+    do {
+        int next_n = 0;
+        for (i = 1; i < n; i++)
+            if (e2c_vec[i - 1].av_position > e2c_vec[i].av_position) {
+                FFSWAP(struct elem_to_channel, e2c_vec[i - 1], e2c_vec[i]);
+                next_n = i;
+            }
+        n = next_n;
+    } while (n > 0);
+
+    layout = 0;
+    for (i = 0; i < total_non_cc_elements; i++) {
+        layout_map[i][0] = e2c_vec[i].syn_ele;
+        layout_map[i][1] = e2c_vec[i].elem_id;
+        layout_map[i][2] = e2c_vec[i].aac_position;
+        if (e2c_vec[i].av_position != UINT64_MAX) {
+            layout |= e2c_vec[i].av_position;
+        }
+    }
+
+    return layout;
+}
+
+/**
+ * Save current output configuration if and only if it has been locked.
+ */
+static void push_output_configuration(AACContext *ac) {
+    if (ac->oc[1].status == OC_LOCKED || ac->oc[0].status == OC_NONE) {
+        ac->oc[0] = ac->oc[1];
+    }
+    ac->oc[1].status = OC_NONE;
+}
+
+/**
+ * Restore the previous output configuration if and only if the current
+ * configuration is unlocked.
+ */
+static void pop_output_configuration(AACContext *ac) {
+    if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) {
+        ac->oc[1] = ac->oc[0];
+        ac->avctx->channels = ac->oc[1].channels;
+        ac->avctx->channel_layout = ac->oc[1].channel_layout;
+        output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
+                         ac->oc[1].status, 0);
+    }
+}
+
+/**
+ * Configure output channel order based on the current program
+ * configuration element.
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int output_configure(AACContext *ac,
+                            uint8_t layout_map[MAX_ELEM_ID * 4][3], int tags,
+                            enum OCStatus oc_type, int get_new_frame)
+{
+    AVCodecContext *avctx = ac->avctx;
+    int i, channels = 0, ret;
+    uint64_t layout = 0;
+    uint8_t id_map[TYPE_END][MAX_ELEM_ID] = {{ 0 }};
+    uint8_t type_counts[TYPE_END] = { 0 };
+
+    if (ac->oc[1].layout_map != layout_map) {
+        memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0]));
+        ac->oc[1].layout_map_tags = tags;
+    }
+    for (i = 0; i < tags; i++) {
+        int type =         layout_map[i][0];
+        int id =           layout_map[i][1];
+        id_map[type][id] = type_counts[type]++;
+        if (id_map[type][id] >= MAX_ELEM_ID) {
+            avpriv_request_sample(ac->avctx, "Remapped id too large\n");
+            return AVERROR_PATCHWELCOME;
+        }
+    }
+    // Try to sniff a reasonable channel order, otherwise output the
+    // channels in the order the PCE declared them.
+    if (avctx->request_channel_layout != AV_CH_LAYOUT_NATIVE)
+        layout = sniff_channel_order(layout_map, tags);
+    for (i = 0; i < tags; i++) {
+        int type =     layout_map[i][0];
+        int id =       layout_map[i][1];
+        int iid =      id_map[type][id];
+        int position = layout_map[i][2];
+        // Allocate or free elements depending on if they are in the
+        // current program configuration.
+        ret = che_configure(ac, position, type, iid, &channels);
+        if (ret < 0)
+            return ret;
+        ac->tag_che_map[type][id] = ac->che[type][iid];
+    }
+    if (ac->oc[1].m4ac.ps == 1 && channels == 2) {
+        if (layout == AV_CH_FRONT_CENTER) {
+            layout = AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT;
+        } else {
+            layout = 0;
+        }
+    }
+
+    if (layout) avctx->channel_layout = layout;
+                            ac->oc[1].channel_layout = layout;
+    avctx->channels       = ac->oc[1].channels       = channels;
+    ac->oc[1].status = oc_type;
+
+    if (get_new_frame) {
+        if ((ret = frame_configure_elements(ac->avctx)) < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static void flush(AVCodecContext *avctx)
+{
+    AACContext *ac= avctx->priv_data;
+    int type, i, j;
+
+    for (type = 3; type >= 0; type--) {
+        for (i = 0; i < MAX_ELEM_ID; i++) {
+            ChannelElement *che = ac->che[type][i];
+            if (che) {
+                for (j = 0; j <= 1; j++) {
+                    memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved));
+                }
+            }
+        }
+    }
+}
+
+/**
+ * Set up channel positions based on a default channel configuration
+ * as specified in table 1.17.
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int set_default_channel_config(AVCodecContext *avctx,
+                                      uint8_t (*layout_map)[3],
+                                      int *tags,
+                                      int channel_config)
+{
+    if (channel_config < 1 || (channel_config > 7 && channel_config < 11) ||
+        channel_config > 12) {
+        av_log(avctx, AV_LOG_ERROR,
+               "invalid default channel configuration (%d)\n",
+               channel_config);
+        return AVERROR_INVALIDDATA;
+    }
+    *tags = tags_per_config[channel_config];
+    memcpy(layout_map, aac_channel_layout_map[channel_config - 1],
+           *tags * sizeof(*layout_map));
+
+    /*
+     * AAC specification has 7.1(wide) as a default layout for 8-channel streams.
+     * However, at least Nero AAC encoder encodes 7.1 streams using the default
+     * channel config 7, mapping the side channels of the original audio stream
+     * to the second AAC_CHANNEL_FRONT pair in the AAC stream. Similarly, e.g. FAAD
+     * decodes the second AAC_CHANNEL_FRONT pair as side channels, therefore decoding
+     * the incorrect streams as if they were correct (and as the encoder intended).
+     *
+     * As actual intended 7.1(wide) streams are very rare, default to assuming a
+     * 7.1 layout was intended.
+     */
+    if (channel_config == 7 && avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) {
+        av_log(avctx, AV_LOG_INFO, "Assuming an incorrectly encoded 7.1 channel layout"
+               " instead of a spec-compliant 7.1(wide) layout, use -strict %d to decode"
+               " according to the specification instead.\n", FF_COMPLIANCE_STRICT);
+        layout_map[2][2] = AAC_CHANNEL_SIDE;
+    }
+
+    return 0;
+}
+
+static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
+{
+    /* For PCE based channel configurations map the channels solely based
+     * on tags. */
+    if (!ac->oc[1].m4ac.chan_config) {
+        return ac->tag_che_map[type][elem_id];
+    }
+    // Allow single CPE stereo files to be signalled with mono configuration.
+    if (!ac->tags_mapped && type == TYPE_CPE &&
+        ac->oc[1].m4ac.chan_config == 1) {
+        uint8_t layout_map[MAX_ELEM_ID*4][3];
+        int layout_map_tags;
+        push_output_configuration(ac);
+
+        av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n");
+
+        if (set_default_channel_config(ac->avctx, layout_map,
+                                       &layout_map_tags, 2) < 0)
+            return NULL;
+        if (output_configure(ac, layout_map, layout_map_tags,
+                             OC_TRIAL_FRAME, 1) < 0)
+            return NULL;
+
+        ac->oc[1].m4ac.chan_config = 2;
+        ac->oc[1].m4ac.ps = 0;
+    }
+    // And vice-versa
+    if (!ac->tags_mapped && type == TYPE_SCE &&
+        ac->oc[1].m4ac.chan_config == 2) {
+        uint8_t layout_map[MAX_ELEM_ID * 4][3];
+        int layout_map_tags;
+        push_output_configuration(ac);
+
+        av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n");
+
+        if (set_default_channel_config(ac->avctx, layout_map,
+                                       &layout_map_tags, 1) < 0)
+            return NULL;
+        if (output_configure(ac, layout_map, layout_map_tags,
+                             OC_TRIAL_FRAME, 1) < 0)
+            return NULL;
+
+        ac->oc[1].m4ac.chan_config = 1;
+        if (ac->oc[1].m4ac.sbr)
+            ac->oc[1].m4ac.ps = -1;
+    }
+    /* For indexed channel configurations map the channels solely based
+     * on position. */
+    switch (ac->oc[1].m4ac.chan_config) {
+    case 12:
+    case 7:
+        if (ac->tags_mapped == 3 && type == TYPE_CPE) {
+            ac->tags_mapped++;
+            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
+        }
+    case 11:
+        if (ac->tags_mapped == 2 &&
+            ac->oc[1].m4ac.chan_config == 11 &&
+            type == TYPE_SCE) {
+            ac->tags_mapped++;
+            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
+        }
+    case 6:
+        /* Some streams incorrectly code 5.1 audio as
+         * SCE[0] CPE[0] CPE[1] SCE[1]
+         * instead of
+         * SCE[0] CPE[0] CPE[1] LFE[0].
+         * If we seem to have encountered such a stream, transfer
+         * the LFE[0] element to the SCE[1]'s mapping */
+        if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
+            if (!ac->warned_remapping_once && (type != TYPE_LFE || elem_id != 0)) {
+                av_log(ac->avctx, AV_LOG_WARNING,
+                   "This stream seems to incorrectly report its last channel as %s[%d], mapping to LFE[0]\n",
+                   type == TYPE_SCE ? "SCE" : "LFE", elem_id);
+                ac->warned_remapping_once++;
+            }
+            ac->tags_mapped++;
+            return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
+        }
+    case 5:
+        if (ac->tags_mapped == 2 && type == TYPE_CPE) {
+            ac->tags_mapped++;
+            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
+        }
+    case 4:
+        /* Some streams incorrectly code 4.0 audio as
+         * SCE[0] CPE[0] LFE[0]
+         * instead of
+         * SCE[0] CPE[0] SCE[1].
+         * If we seem to have encountered such a stream, transfer
+         * the SCE[1] element to the LFE[0]'s mapping */
+        if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
+            if (!ac->warned_remapping_once && (type != TYPE_SCE || elem_id != 1)) {
+                av_log(ac->avctx, AV_LOG_WARNING,
+                   "This stream seems to incorrectly report its last channel as %s[%d], mapping to SCE[1]\n",
+                   type == TYPE_SCE ? "SCE" : "LFE", elem_id);
+                ac->warned_remapping_once++;
+            }
+            ac->tags_mapped++;
+            return ac->tag_che_map[type][elem_id] = ac->che[TYPE_SCE][1];
+        }
+        if (ac->tags_mapped == 2 &&
+            ac->oc[1].m4ac.chan_config == 4 &&
+            type == TYPE_SCE) {
+            ac->tags_mapped++;
+            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
+        }
+    case 3:
+    case 2:
+        if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) &&
+            type == TYPE_CPE) {
+            ac->tags_mapped++;
+            return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
+        } else if (ac->oc[1].m4ac.chan_config == 2) {
+            return NULL;
+        }
+    case 1:
+        if (!ac->tags_mapped && type == TYPE_SCE) {
+            ac->tags_mapped++;
+            return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
+        }
+    default:
+        return NULL;
+    }
+}
+
+/**
+ * Decode an array of 4 bit element IDs, optionally interleaved with a
+ * stereo/mono switching bit.
+ *
+ * @param type speaker type/position for these channels
+ */
+static void decode_channel_map(uint8_t layout_map[][3],
+                               enum ChannelPosition type,
+                               GetBitContext *gb, int n)
+{
+    while (n--) {
+        enum RawDataBlockType syn_ele;
+        switch (type) {
+        case AAC_CHANNEL_FRONT:
+        case AAC_CHANNEL_BACK:
+        case AAC_CHANNEL_SIDE:
+            syn_ele = get_bits1(gb);
+            break;
+        case AAC_CHANNEL_CC:
+            skip_bits1(gb);
+            syn_ele = TYPE_CCE;
+            break;
+        case AAC_CHANNEL_LFE:
+            syn_ele = TYPE_LFE;
+            break;
+        default:
+            // AAC_CHANNEL_OFF has no channel map
+            av_assert0(0);
+        }
+        layout_map[0][0] = syn_ele;
+        layout_map[0][1] = get_bits(gb, 4);
+        layout_map[0][2] = type;
+        layout_map++;
+    }
+}
+
+/**
+ * Decode program configuration element; reference: table 4.2.
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
+                      uint8_t (*layout_map)[3],
+                      GetBitContext *gb)
+{
+    int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc;
+    int sampling_index;
+    int comment_len;
+    int tags;
+
+    skip_bits(gb, 2);  // object_type
+
+    sampling_index = get_bits(gb, 4);
+    if (m4ac->sampling_index != sampling_index)
+        av_log(avctx, AV_LOG_WARNING,
+               "Sample rate index in program config element does not "
+               "match the sample rate index configured by the container.\n");
+
+    num_front       = get_bits(gb, 4);
+    num_side        = get_bits(gb, 4);
+    num_back        = get_bits(gb, 4);
+    num_lfe         = get_bits(gb, 2);
+    num_assoc_data  = get_bits(gb, 3);
+    num_cc          = get_bits(gb, 4);
+
+    if (get_bits1(gb))
+        skip_bits(gb, 4); // mono_mixdown_tag
+    if (get_bits1(gb))
+        skip_bits(gb, 4); // stereo_mixdown_tag
+
+    if (get_bits1(gb))
+        skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
+
+    if (get_bits_left(gb) < 4 * (num_front + num_side + num_back + num_lfe + num_assoc_data + num_cc)) {
+        av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
+        return -1;
+    }
+    decode_channel_map(layout_map       , AAC_CHANNEL_FRONT, gb, num_front);
+    tags = num_front;
+    decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE,  gb, num_side);
+    tags += num_side;
+    decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK,  gb, num_back);
+    tags += num_back;
+    decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE,   gb, num_lfe);
+    tags += num_lfe;
+
+    skip_bits_long(gb, 4 * num_assoc_data);
+
+    decode_channel_map(layout_map + tags, AAC_CHANNEL_CC,    gb, num_cc);
+    tags += num_cc;
+
+    align_get_bits(gb);
+
+    /* comment field, first byte is length */
+    comment_len = get_bits(gb, 8) * 8;
+    if (get_bits_left(gb) < comment_len) {
+        av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
+        return AVERROR_INVALIDDATA;
+    }
+    skip_bits_long(gb, comment_len);
+    return tags;
+}
+
+/**
+ * Decode GA "General Audio" specific configuration; reference: table 4.1.
+ *
+ * @param   ac          pointer to AACContext, may be null
+ * @param   avctx       pointer to AVCCodecContext, used for logging
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
+                                     GetBitContext *gb,
+                                     MPEG4AudioConfig *m4ac,
+                                     int channel_config)
+{
+    int extension_flag, ret, ep_config, res_flags;
+    uint8_t layout_map[MAX_ELEM_ID*4][3];
+    int tags = 0;
+
+    if (get_bits1(gb)) { // frameLengthFlag
+        avpriv_request_sample(avctx, "960/120 MDCT window");
+        return AVERROR_PATCHWELCOME;
+    }
+    m4ac->frame_length_short = 0;
+
+    if (get_bits1(gb))       // dependsOnCoreCoder
+        skip_bits(gb, 14);   // coreCoderDelay
+    extension_flag = get_bits1(gb);
+
+    if (m4ac->object_type == AOT_AAC_SCALABLE ||
+        m4ac->object_type == AOT_ER_AAC_SCALABLE)
+        skip_bits(gb, 3);     // layerNr
+
+    if (channel_config == 0) {
+        skip_bits(gb, 4);  // element_instance_tag
+        tags = decode_pce(avctx, m4ac, layout_map, gb);
+        if (tags < 0)
+            return tags;
+    } else {
+        if ((ret = set_default_channel_config(avctx, layout_map,
+                                              &tags, channel_config)))
+            return ret;
+    }
+
+    if (count_channels(layout_map, tags) > 1) {
+        m4ac->ps = 0;
+    } else if (m4ac->sbr == 1 && m4ac->ps == -1)
+        m4ac->ps = 1;
+
+    if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
+        return ret;
+
+    if (extension_flag) {
+        switch (m4ac->object_type) {
+        case AOT_ER_BSAC:
+            skip_bits(gb, 5);    // numOfSubFrame
+            skip_bits(gb, 11);   // layer_length
+            break;
+        case AOT_ER_AAC_LC:
+        case AOT_ER_AAC_LTP:
+        case AOT_ER_AAC_SCALABLE:
+        case AOT_ER_AAC_LD:
+            res_flags = get_bits(gb, 3);
+            if (res_flags) {
+                avpriv_report_missing_feature(avctx,
+                                              "AAC data resilience (flags %x)",
+                                              res_flags);
+                return AVERROR_PATCHWELCOME;
+            }
+            break;
+        }
+        skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
+    }
+    switch (m4ac->object_type) {
+    case AOT_ER_AAC_LC:
+    case AOT_ER_AAC_LTP:
+    case AOT_ER_AAC_SCALABLE:
+    case AOT_ER_AAC_LD:
+        ep_config = get_bits(gb, 2);
+        if (ep_config) {
+            avpriv_report_missing_feature(avctx,
+                                          "epConfig %d", ep_config);
+            return AVERROR_PATCHWELCOME;
+        }
+    }
+    return 0;
+}
+
+static int decode_eld_specific_config(AACContext *ac, AVCodecContext *avctx,
+                                     GetBitContext *gb,
+                                     MPEG4AudioConfig *m4ac,
+                                     int channel_config)
+{
+    int ret, ep_config, res_flags;
+    uint8_t layout_map[MAX_ELEM_ID*4][3];
+    int tags = 0;
+    const int ELDEXT_TERM = 0;
+
+    m4ac->ps  = 0;
+    m4ac->sbr = 0;
+#if USE_FIXED
+    if (get_bits1(gb)) { // frameLengthFlag
+        avpriv_request_sample(avctx, "960/120 MDCT window");
+        return AVERROR_PATCHWELCOME;
+    }
+#else
+    m4ac->frame_length_short = get_bits1(gb);
+#endif
+    res_flags = get_bits(gb, 3);
+    if (res_flags) {
+        avpriv_report_missing_feature(avctx,
+                                      "AAC data resilience (flags %x)",
+                                      res_flags);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if (get_bits1(gb)) { // ldSbrPresentFlag
+        avpriv_report_missing_feature(avctx,
+                                      "Low Delay SBR");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    while (get_bits(gb, 4) != ELDEXT_TERM) {
+        int len = get_bits(gb, 4);
+        if (len == 15)
+            len += get_bits(gb, 8);
+        if (len == 15 + 255)
+            len += get_bits(gb, 16);
+        if (get_bits_left(gb) < len * 8 + 4) {
+            av_log(avctx, AV_LOG_ERROR, overread_err);
+            return AVERROR_INVALIDDATA;
+        }
+        skip_bits_long(gb, 8 * len);
+    }
+
+    if ((ret = set_default_channel_config(avctx, layout_map,
+                                          &tags, channel_config)))
+        return ret;
+
+    if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
+        return ret;
+
+    ep_config = get_bits(gb, 2);
+    if (ep_config) {
+        avpriv_report_missing_feature(avctx,
+                                      "epConfig %d", ep_config);
+        return AVERROR_PATCHWELCOME;
+    }
+    return 0;
+}
+
+/**
+ * Decode audio specific configuration; reference: table 1.13.
+ *
+ * @param   ac          pointer to AACContext, may be null
+ * @param   avctx       pointer to AVCCodecContext, used for logging
+ * @param   m4ac        pointer to MPEG4AudioConfig, used for parsing
+ * @param   data        pointer to buffer holding an audio specific config
+ * @param   bit_size    size of audio specific config or data in bits
+ * @param   sync_extension look for an appended sync extension
+ *
+ * @return  Returns error status or number of consumed bits. <0 - error
+ */
+static int decode_audio_specific_config(AACContext *ac,
+                                        AVCodecContext *avctx,
+                                        MPEG4AudioConfig *m4ac,
+                                        const uint8_t *data, int64_t bit_size,
+                                        int sync_extension)
+{
+    GetBitContext gb;
+    int i, ret;
+
+    if (bit_size < 0 || bit_size > INT_MAX) {
+        av_log(avctx, AV_LOG_ERROR, "Audio specific config size is invalid\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    ff_dlog(avctx, "audio specific config size %d\n", (int)bit_size >> 3);
+    for (i = 0; i < bit_size >> 3; i++)
+        ff_dlog(avctx, "%02x ", data[i]);
+    ff_dlog(avctx, "\n");
+
+    if ((ret = init_get_bits(&gb, data, bit_size)) < 0)
+        return ret;
+
+    if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size,
+                                          sync_extension)) < 0)
+        return AVERROR_INVALIDDATA;
+    if (m4ac->sampling_index > 12) {
+        av_log(avctx, AV_LOG_ERROR,
+               "invalid sampling rate index %d\n",
+               m4ac->sampling_index);
+        return AVERROR_INVALIDDATA;
+    }
+    if (m4ac->object_type == AOT_ER_AAC_LD &&
+        (m4ac->sampling_index < 3 || m4ac->sampling_index > 7)) {
+        av_log(avctx, AV_LOG_ERROR,
+               "invalid low delay sampling rate index %d\n",
+               m4ac->sampling_index);
+        return AVERROR_INVALIDDATA;
+    }
+
+    skip_bits_long(&gb, i);
+
+    switch (m4ac->object_type) {
+    case AOT_AAC_MAIN:
+    case AOT_AAC_LC:
+    case AOT_AAC_LTP:
+    case AOT_ER_AAC_LC:
+    case AOT_ER_AAC_LD:
+        if ((ret = decode_ga_specific_config(ac, avctx, &gb,
+                                            m4ac, m4ac->chan_config)) < 0)
+            return ret;
+        break;
+    case AOT_ER_AAC_ELD:
+        if ((ret = decode_eld_specific_config(ac, avctx, &gb,
+                                              m4ac, m4ac->chan_config)) < 0)
+            return ret;
+        break;
+    default:
+        avpriv_report_missing_feature(avctx,
+                                      "Audio object type %s%d",
+                                      m4ac->sbr == 1 ? "SBR+" : "",
+                                      m4ac->object_type);
+        return AVERROR(ENOSYS);
+    }
+
+    ff_dlog(avctx,
+            "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
+            m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
+            m4ac->sample_rate, m4ac->sbr,
+            m4ac->ps);
+
+    return get_bits_count(&gb);
+}
+
+/**
+ * linear congruential pseudorandom number generator
+ *
+ * @param   previous_val    pointer to the current state of the generator
+ *
+ * @return  Returns a 32-bit pseudorandom integer
+ */
+static av_always_inline int lcg_random(unsigned previous_val)
+{
+    union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
+    return v.s;
+}
+
+static void reset_all_predictors(PredictorState *ps)
+{
+    int i;
+    for (i = 0; i < MAX_PREDICTORS; i++)
+        reset_predict_state(&ps[i]);
+}
+
+static int sample_rate_idx (int rate)
+{
+         if (92017 <= rate) return 0;
+    else if (75132 <= rate) return 1;
+    else if (55426 <= rate) return 2;
+    else if (46009 <= rate) return 3;
+    else if (37566 <= rate) return 4;
+    else if (27713 <= rate) return 5;
+    else if (23004 <= rate) return 6;
+    else if (18783 <= rate) return 7;
+    else if (13856 <= rate) return 8;
+    else if (11502 <= rate) return 9;
+    else if (9391  <= rate) return 10;
+    else                    return 11;
+}
+
+static void reset_predictor_group(PredictorState *ps, int group_num)
+{
+    int i;
+    for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
+        reset_predict_state(&ps[i]);
+}
+
+#define AAC_INIT_VLC_STATIC(num, size)                                     \
+    INIT_VLC_STATIC(&vlc_spectral[num], 8, ff_aac_spectral_sizes[num],     \
+         ff_aac_spectral_bits[num], sizeof(ff_aac_spectral_bits[num][0]),  \
+                                    sizeof(ff_aac_spectral_bits[num][0]),  \
+        ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), \
+                                    sizeof(ff_aac_spectral_codes[num][0]), \
+        size);
+
+static void aacdec_init(AACContext *ac);
+
+static av_cold void aac_static_table_init(void)
+{
+    AAC_INIT_VLC_STATIC( 0, 304);
+    AAC_INIT_VLC_STATIC( 1, 270);
+    AAC_INIT_VLC_STATIC( 2, 550);
+    AAC_INIT_VLC_STATIC( 3, 300);
+    AAC_INIT_VLC_STATIC( 4, 328);
+    AAC_INIT_VLC_STATIC( 5, 294);
+    AAC_INIT_VLC_STATIC( 6, 306);
+    AAC_INIT_VLC_STATIC( 7, 268);
+    AAC_INIT_VLC_STATIC( 8, 510);
+    AAC_INIT_VLC_STATIC( 9, 366);
+    AAC_INIT_VLC_STATIC(10, 462);
+
+    AAC_RENAME(ff_aac_sbr_init)();
+
+    ff_aac_tableinit();
+
+    INIT_VLC_STATIC(&vlc_scalefactors, 7,
+                    FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
+                    ff_aac_scalefactor_bits,
+                    sizeof(ff_aac_scalefactor_bits[0]),
+                    sizeof(ff_aac_scalefactor_bits[0]),
+                    ff_aac_scalefactor_code,
+                    sizeof(ff_aac_scalefactor_code[0]),
+                    sizeof(ff_aac_scalefactor_code[0]),
+                    352);
+
+    // window initialization
+    AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(ff_aac_kbd_long_1024), 4.0, 1024);
+    AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(ff_aac_kbd_short_128), 6.0, 128);
+    AAC_RENAME(ff_init_ff_sine_windows)(10);
+    AAC_RENAME(ff_init_ff_sine_windows)( 9);
+    AAC_RENAME(ff_init_ff_sine_windows)( 7);
+
+    AAC_RENAME(cbrt_tableinit)();
+}
+
+static AVOnce aac_table_init = AV_ONCE_INIT;
+
+static av_cold int aac_decode_init(AVCodecContext *avctx)
+{
+    AACContext *ac = avctx->priv_data;
+    int ret;
+
+    ret = ff_thread_once(&aac_table_init, &aac_static_table_init);
+    if (ret != 0)
+        return AVERROR_UNKNOWN;
+
+    ac->avctx = avctx;
+    ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
+
+    aacdec_init(ac);
+#if USE_FIXED
+    avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
+#else
+    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
+#endif /* USE_FIXED */
+
+    if (avctx->extradata_size > 0) {
+        if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
+                                                avctx->extradata,
+                                                avctx->extradata_size * 8LL,
+                                                1)) < 0)
+            return ret;
+    } else {
+        int sr, i;
+        uint8_t layout_map[MAX_ELEM_ID*4][3];
+        int layout_map_tags;
+
+        sr = sample_rate_idx(avctx->sample_rate);
+        ac->oc[1].m4ac.sampling_index = sr;
+        ac->oc[1].m4ac.channels = avctx->channels;
+        ac->oc[1].m4ac.sbr = -1;
+        ac->oc[1].m4ac.ps = -1;
+
+        for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
+            if (ff_mpeg4audio_channels[i] == avctx->channels)
+                break;
+        if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) {
+            i = 0;
+        }
+        ac->oc[1].m4ac.chan_config = i;
+
+        if (ac->oc[1].m4ac.chan_config) {
+            int ret = set_default_channel_config(avctx, layout_map,
+                &layout_map_tags, ac->oc[1].m4ac.chan_config);
+            if (!ret)
+                output_configure(ac, layout_map, layout_map_tags,
+                                 OC_GLOBAL_HDR, 0);
+            else if (avctx->err_recognition & AV_EF_EXPLODE)
+                return AVERROR_INVALIDDATA;
+        }
+    }
+
+    if (avctx->channels > MAX_CHANNELS) {
+        av_log(avctx, AV_LOG_ERROR, "Too many channels\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+#if USE_FIXED
+    ac->fdsp = avpriv_alloc_fixed_dsp(avctx->flags & AV_CODEC_FLAG_BITEXACT);
+#else
+    ac->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
+#endif /* USE_FIXED */
+    if (!ac->fdsp) {
+        return AVERROR(ENOMEM);
+    }
+
+    ac->random_state = 0x1f2e3d4c;
+
+    AAC_RENAME_32(ff_mdct_init)(&ac->mdct,       11, 1, 1.0 / RANGE15(1024.0));
+    AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ld,    10, 1, 1.0 / RANGE15(512.0));
+    AAC_RENAME_32(ff_mdct_init)(&ac->mdct_small,  8, 1, 1.0 / RANGE15(128.0));
+    AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ltp,   11, 0, RANGE15(-2.0));
+#if !USE_FIXED
+    ret = ff_imdct15_init(&ac->mdct480, 5);
+    if (ret < 0)
+        return ret;
+#endif
+
+    return 0;
+}
+
+/**
+ * Skip data_stream_element; reference: table 4.10.
+ */
+static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
+{
+    int byte_align = get_bits1(gb);
+    int count = get_bits(gb, 8);
+    if (count == 255)
+        count += get_bits(gb, 8);
+    if (byte_align)
+        align_get_bits(gb);
+
+    if (get_bits_left(gb) < 8 * count) {
+        av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err);
+        return AVERROR_INVALIDDATA;
+    }
+    skip_bits_long(gb, 8 * count);
+    return 0;
+}
+
+static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
+                             GetBitContext *gb)
+{
+    int sfb;
+    if (get_bits1(gb)) {
+        ics->predictor_reset_group = get_bits(gb, 5);
+        if (ics->predictor_reset_group == 0 ||
+            ics->predictor_reset_group > 30) {
+            av_log(ac->avctx, AV_LOG_ERROR,
+                   "Invalid Predictor Reset Group.\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+    for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) {
+        ics->prediction_used[sfb] = get_bits1(gb);
+    }
+    return 0;
+}
+
+/**
+ * Decode Long Term Prediction data; reference: table 4.xx.
+ */
+static void decode_ltp(LongTermPrediction *ltp,
+                       GetBitContext *gb, uint8_t max_sfb)
+{
+    int sfb;
+
+    ltp->lag  = get_bits(gb, 11);
+    ltp->coef = ltp_coef[get_bits(gb, 3)];
+    for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
+        ltp->used[sfb] = get_bits1(gb);
+}
+
+/**
+ * Decode Individual Channel Stream info; reference: table 4.6.
+ */
+static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
+                           GetBitContext *gb)
+{
+    const MPEG4AudioConfig *const m4ac = &ac->oc[1].m4ac;
+    const int aot = m4ac->object_type;
+    const int sampling_index = m4ac->sampling_index;
+    if (aot != AOT_ER_AAC_ELD) {
+        if (get_bits1(gb)) {
+            av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
+            if (ac->avctx->err_recognition & AV_EF_BITSTREAM)
+                return AVERROR_INVALIDDATA;
+        }
+        ics->window_sequence[1] = ics->window_sequence[0];
+        ics->window_sequence[0] = get_bits(gb, 2);
+        if (aot == AOT_ER_AAC_LD &&
+            ics->window_sequence[0] != ONLY_LONG_SEQUENCE) {
+            av_log(ac->avctx, AV_LOG_ERROR,
+                   "AAC LD is only defined for ONLY_LONG_SEQUENCE but "
+                   "window sequence %d found.\n", ics->window_sequence[0]);
+            ics->window_sequence[0] = ONLY_LONG_SEQUENCE;
+            return AVERROR_INVALIDDATA;
+        }
+        ics->use_kb_window[1]   = ics->use_kb_window[0];
+        ics->use_kb_window[0]   = get_bits1(gb);
+    }
+    ics->num_window_groups  = 1;
+    ics->group_len[0]       = 1;
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        int i;
+        ics->max_sfb = get_bits(gb, 4);
+        for (i = 0; i < 7; i++) {
+            if (get_bits1(gb)) {
+                ics->group_len[ics->num_window_groups - 1]++;
+            } else {
+                ics->num_window_groups++;
+                ics->group_len[ics->num_window_groups - 1] = 1;
+            }
+        }
+        ics->num_windows       = 8;
+        ics->swb_offset        =    ff_swb_offset_128[sampling_index];
+        ics->num_swb           =   ff_aac_num_swb_128[sampling_index];
+        ics->tns_max_bands     = ff_tns_max_bands_128[sampling_index];
+        ics->predictor_present = 0;
+    } else {
+        ics->max_sfb           = get_bits(gb, 6);
+        ics->num_windows       = 1;
+        if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD) {
+            if (m4ac->frame_length_short) {
+                ics->swb_offset    =     ff_swb_offset_480[sampling_index];
+                ics->num_swb       =    ff_aac_num_swb_480[sampling_index];
+                ics->tns_max_bands =  ff_tns_max_bands_480[sampling_index];
+            } else {
+                ics->swb_offset    =     ff_swb_offset_512[sampling_index];
+                ics->num_swb       =    ff_aac_num_swb_512[sampling_index];
+                ics->tns_max_bands =  ff_tns_max_bands_512[sampling_index];
+            }
+            if (!ics->num_swb || !ics->swb_offset)
+                return AVERROR_BUG;
+        } else {
+            ics->swb_offset    =    ff_swb_offset_1024[sampling_index];
+            ics->num_swb       =   ff_aac_num_swb_1024[sampling_index];
+            ics->tns_max_bands = ff_tns_max_bands_1024[sampling_index];
+        }
+        if (aot != AOT_ER_AAC_ELD) {
+            ics->predictor_present     = get_bits1(gb);
+            ics->predictor_reset_group = 0;
+        }
+        if (ics->predictor_present) {
+            if (aot == AOT_AAC_MAIN) {
+                if (decode_prediction(ac, ics, gb)) {
+                    goto fail;
+                }
+            } else if (aot == AOT_AAC_LC ||
+                       aot == AOT_ER_AAC_LC) {
+                av_log(ac->avctx, AV_LOG_ERROR,
+                       "Prediction is not allowed in AAC-LC.\n");
+                goto fail;
+            } else {
+                if (aot == AOT_ER_AAC_LD) {
+                    av_log(ac->avctx, AV_LOG_ERROR,
+                           "LTP in ER AAC LD not yet implemented.\n");
+                    return AVERROR_PATCHWELCOME;
+                }
+                if ((ics->ltp.present = get_bits(gb, 1)))
+                    decode_ltp(&ics->ltp, gb, ics->max_sfb);
+            }
+        }
+    }
+
+    if (ics->max_sfb > ics->num_swb) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Number of scalefactor bands in group (%d) "
+               "exceeds limit (%d).\n",
+               ics->max_sfb, ics->num_swb);
+        goto fail;
+    }
+
+    return 0;
+fail:
+    ics->max_sfb = 0;
+    return AVERROR_INVALIDDATA;
+}
+
+/**
+ * Decode band types (section_data payload); reference: table 4.46.
+ *
+ * @param   band_type           array of the used band type
+ * @param   band_type_run_end   array of the last scalefactor band of a band type run
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int decode_band_types(AACContext *ac, enum BandType band_type[120],
+                             int band_type_run_end[120], GetBitContext *gb,
+                             IndividualChannelStream *ics)
+{
+    int g, idx = 0;
+    const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
+    for (g = 0; g < ics->num_window_groups; g++) {
+        int k = 0;
+        while (k < ics->max_sfb) {
+            uint8_t sect_end = k;
+            int sect_len_incr;
+            int sect_band_type = get_bits(gb, 4);
+            if (sect_band_type == 12) {
+                av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
+                return AVERROR_INVALIDDATA;
+            }
+            do {
+                sect_len_incr = get_bits(gb, bits);
+                sect_end += sect_len_incr;
+                if (get_bits_left(gb) < 0) {
+                    av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err);
+                    return AVERROR_INVALIDDATA;
+                }
+                if (sect_end > ics->max_sfb) {
+                    av_log(ac->avctx, AV_LOG_ERROR,
+                           "Number of bands (%d) exceeds limit (%d).\n",
+                           sect_end, ics->max_sfb);
+                    return AVERROR_INVALIDDATA;
+                }
+            } while (sect_len_incr == (1 << bits) - 1);
+            for (; k < sect_end; k++) {
+                band_type        [idx]   = sect_band_type;
+                band_type_run_end[idx++] = sect_end;
+            }
+        }
+    }
+    return 0;
+}
+
+/**
+ * Decode scalefactors; reference: table 4.47.
+ *
+ * @param   global_gain         first scalefactor value as scalefactors are differentially coded
+ * @param   band_type           array of the used band type
+ * @param   band_type_run_end   array of the last scalefactor band of a band type run
+ * @param   sf                  array of scalefactors or intensity stereo positions
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int decode_scalefactors(AACContext *ac, INTFLOAT sf[120], GetBitContext *gb,
+                               unsigned int global_gain,
+                               IndividualChannelStream *ics,
+                               enum BandType band_type[120],
+                               int band_type_run_end[120])
+{
+    int g, i, idx = 0;
+    int offset[3] = { global_gain, global_gain - NOISE_OFFSET, 0 };
+    int clipped_offset;
+    int noise_flag = 1;
+    for (g = 0; g < ics->num_window_groups; g++) {
+        for (i = 0; i < ics->max_sfb;) {
+            int run_end = band_type_run_end[idx];
+            if (band_type[idx] == ZERO_BT) {
+                for (; i < run_end; i++, idx++)
+                    sf[idx] = FIXR(0.);
+            } else if ((band_type[idx] == INTENSITY_BT) ||
+                       (band_type[idx] == INTENSITY_BT2)) {
+                for (; i < run_end; i++, idx++) {
+                    offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO;
+                    clipped_offset = av_clip(offset[2], -155, 100);
+                    if (offset[2] != clipped_offset) {
+                        avpriv_request_sample(ac->avctx,
+                                              "If you heard an audible artifact, there may be a bug in the decoder. "
+                                              "Clipped intensity stereo position (%d -> %d)",
+                                              offset[2], clipped_offset);
+                    }
+#if USE_FIXED
+                    sf[idx] = 100 - clipped_offset;
+#else
+                    sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
+#endif /* USE_FIXED */
+                }
+            } else if (band_type[idx] == NOISE_BT) {
+                for (; i < run_end; i++, idx++) {
+                    if (noise_flag-- > 0)
+                        offset[1] += get_bits(gb, NOISE_PRE_BITS) - NOISE_PRE;
+                    else
+                        offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO;
+                    clipped_offset = av_clip(offset[1], -100, 155);
+                    if (offset[1] != clipped_offset) {
+                        avpriv_request_sample(ac->avctx,
+                                              "If you heard an audible artifact, there may be a bug in the decoder. "
+                                              "Clipped noise gain (%d -> %d)",
+                                              offset[1], clipped_offset);
+                    }
+#if USE_FIXED
+                    sf[idx] = -(100 + clipped_offset);
+#else
+                    sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
+#endif /* USE_FIXED */
+                }
+            } else {
+                for (; i < run_end; i++, idx++) {
+                    offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO;
+                    if (offset[0] > 255U) {
+                        av_log(ac->avctx, AV_LOG_ERROR,
+                               "Scalefactor (%d) out of range.\n", offset[0]);
+                        return AVERROR_INVALIDDATA;
+                    }
+#if USE_FIXED
+                    sf[idx] = -offset[0];
+#else
+                    sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
+#endif /* USE_FIXED */
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+/**
+ * Decode pulse data; reference: table 4.7.
+ */
+static int decode_pulses(Pulse *pulse, GetBitContext *gb,
+                         const uint16_t *swb_offset, int num_swb)
+{
+    int i, pulse_swb;
+    pulse->num_pulse = get_bits(gb, 2) + 1;
+    pulse_swb        = get_bits(gb, 6);
+    if (pulse_swb >= num_swb)
+        return -1;
+    pulse->pos[0]    = swb_offset[pulse_swb];
+    pulse->pos[0]   += get_bits(gb, 5);
+    if (pulse->pos[0] >= swb_offset[num_swb])
+        return -1;
+    pulse->amp[0]    = get_bits(gb, 4);
+    for (i = 1; i < pulse->num_pulse; i++) {
+        pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
+        if (pulse->pos[i] >= swb_offset[num_swb])
+            return -1;
+        pulse->amp[i] = get_bits(gb, 4);
+    }
+    return 0;
+}
+
+/**
+ * Decode Temporal Noise Shaping data; reference: table 4.48.
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
+                      GetBitContext *gb, const IndividualChannelStream *ics)
+{
+    int w, filt, i, coef_len, coef_res, coef_compress;
+    const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
+    const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
+    for (w = 0; w < ics->num_windows; w++) {
+        if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
+            coef_res = get_bits1(gb);
+
+            for (filt = 0; filt < tns->n_filt[w]; filt++) {
+                int tmp2_idx;
+                tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
+
+                if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
+                    av_log(ac->avctx, AV_LOG_ERROR,
+                           "TNS filter order %d is greater than maximum %d.\n",
+                           tns->order[w][filt], tns_max_order);
+                    tns->order[w][filt] = 0;
+                    return AVERROR_INVALIDDATA;
+                }
+                if (tns->order[w][filt]) {
+                    tns->direction[w][filt] = get_bits1(gb);
+                    coef_compress = get_bits1(gb);
+                    coef_len = coef_res + 3 - coef_compress;
+                    tmp2_idx = 2 * coef_compress + coef_res;
+
+                    for (i = 0; i < tns->order[w][filt]; i++)
+                        tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+/**
+ * Decode Mid/Side data; reference: table 4.54.
+ *
+ * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
+ *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
+ *                      [3] reserved for scalable AAC
+ */
+static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
+                                   int ms_present)
+{
+    int idx;
+    int max_idx = cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb;
+    if (ms_present == 1) {
+        for (idx = 0; idx < max_idx; idx++)
+            cpe->ms_mask[idx] = get_bits1(gb);
+    } else if (ms_present == 2) {
+        memset(cpe->ms_mask, 1, max_idx * sizeof(cpe->ms_mask[0]));
+    }
+}
+
+/**
+ * Decode spectral data; reference: table 4.50.
+ * Dequantize and scale spectral data; reference: 4.6.3.3.
+ *
+ * @param   coef            array of dequantized, scaled spectral data
+ * @param   sf              array of scalefactors or intensity stereo positions
+ * @param   pulse_present   set if pulses are present
+ * @param   pulse           pointer to pulse data struct
+ * @param   band_type       array of the used band type
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int decode_spectrum_and_dequant(AACContext *ac, INTFLOAT coef[1024],
+                                       GetBitContext *gb, const INTFLOAT sf[120],
+                                       int pulse_present, const Pulse *pulse,
+                                       const IndividualChannelStream *ics,
+                                       enum BandType band_type[120])
+{
+    int i, k, g, idx = 0;
+    const int c = 1024 / ics->num_windows;
+    const uint16_t *offsets = ics->swb_offset;
+    INTFLOAT *coef_base = coef;
+
+    for (g = 0; g < ics->num_windows; g++)
+        memset(coef + g * 128 + offsets[ics->max_sfb], 0,
+               sizeof(INTFLOAT) * (c - offsets[ics->max_sfb]));
+
+    for (g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+
+        for (i = 0; i < ics->max_sfb; i++, idx++) {
+            const unsigned cbt_m1 = band_type[idx] - 1;
+            INTFLOAT *cfo = coef + offsets[i];
+            int off_len = offsets[i + 1] - offsets[i];
+            int group;
+
+            if (cbt_m1 >= INTENSITY_BT2 - 1) {
+                for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+                    memset(cfo, 0, off_len * sizeof(*cfo));
+                }
+            } else if (cbt_m1 == NOISE_BT - 1) {
+                for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+#if !USE_FIXED
+                    float scale;
+#endif /* !USE_FIXED */
+                    INTFLOAT band_energy;
+
+                    for (k = 0; k < off_len; k++) {
+                        ac->random_state  = lcg_random(ac->random_state);
+#if USE_FIXED
+                        cfo[k] = ac->random_state >> 3;
+#else
+                        cfo[k] = ac->random_state;
+#endif /* USE_FIXED */
+                    }
+
+#if USE_FIXED
+                    band_energy = ac->fdsp->scalarproduct_fixed(cfo, cfo, off_len);
+                    band_energy = fixed_sqrt(band_energy, 31);
+                    noise_scale(cfo, sf[idx], band_energy, off_len);
+#else
+                    band_energy = ac->fdsp->scalarproduct_float(cfo, cfo, off_len);
+                    scale = sf[idx] / sqrtf(band_energy);
+                    ac->fdsp->vector_fmul_scalar(cfo, cfo, scale, off_len);
+#endif /* USE_FIXED */
+                }
+            } else {
+#if !USE_FIXED
+                const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
+#endif /* !USE_FIXED */
+                const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
+                VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
+                OPEN_READER(re, gb);
+
+                switch (cbt_m1 >> 1) {
+                case 0:
+                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+                        INTFLOAT *cf = cfo;
+                        int len = off_len;
+
+                        do {
+                            int code;
+                            unsigned cb_idx;
+
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+                            cb_idx = cb_vector_idx[code];
+#if USE_FIXED
+                            cf = DEC_SQUAD(cf, cb_idx);
+#else
+                            cf = VMUL4(cf, vq, cb_idx, sf + idx);
+#endif /* USE_FIXED */
+                        } while (len -= 4);
+                    }
+                    break;
+
+                case 1:
+                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+                        INTFLOAT *cf = cfo;
+                        int len = off_len;
+
+                        do {
+                            int code;
+                            unsigned nnz;
+                            unsigned cb_idx;
+                            uint32_t bits;
+
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+                            cb_idx = cb_vector_idx[code];
+                            nnz = cb_idx >> 8 & 15;
+                            bits = nnz ? GET_CACHE(re, gb) : 0;
+                            LAST_SKIP_BITS(re, gb, nnz);
+#if USE_FIXED
+                            cf = DEC_UQUAD(cf, cb_idx, bits);
+#else
+                            cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
+#endif /* USE_FIXED */
+                        } while (len -= 4);
+                    }
+                    break;
+
+                case 2:
+                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+                        INTFLOAT *cf = cfo;
+                        int len = off_len;
+
+                        do {
+                            int code;
+                            unsigned cb_idx;
+
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+                            cb_idx = cb_vector_idx[code];
+#if USE_FIXED
+                            cf = DEC_SPAIR(cf, cb_idx);
+#else
+                            cf = VMUL2(cf, vq, cb_idx, sf + idx);
+#endif /* USE_FIXED */
+                        } while (len -= 2);
+                    }
+                    break;
+
+                case 3:
+                case 4:
+                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+                        INTFLOAT *cf = cfo;
+                        int len = off_len;
+
+                        do {
+                            int code;
+                            unsigned nnz;
+                            unsigned cb_idx;
+                            unsigned sign;
+
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+                            cb_idx = cb_vector_idx[code];
+                            nnz = cb_idx >> 8 & 15;
+                            sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
+                            LAST_SKIP_BITS(re, gb, nnz);
+#if USE_FIXED
+                            cf = DEC_UPAIR(cf, cb_idx, sign);
+#else
+                            cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
+#endif /* USE_FIXED */
+                        } while (len -= 2);
+                    }
+                    break;
+
+                default:
+                    for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
+#if USE_FIXED
+                        int *icf = cfo;
+                        int v;
+#else
+                        float *cf = cfo;
+                        uint32_t *icf = (uint32_t *) cf;
+#endif /* USE_FIXED */
+                        int len = off_len;
+
+                        do {
+                            int code;
+                            unsigned nzt, nnz;
+                            unsigned cb_idx;
+                            uint32_t bits;
+                            int j;
+
+                            UPDATE_CACHE(re, gb);
+                            GET_VLC(code, re, gb, vlc_tab, 8, 2);
+
+                            if (!code) {
+                                *icf++ = 0;
+                                *icf++ = 0;
+                                continue;
+                            }
+
+                            cb_idx = cb_vector_idx[code];
+                            nnz = cb_idx >> 12;
+                            nzt = cb_idx >> 8;
+                            bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
+                            LAST_SKIP_BITS(re, gb, nnz);
+
+                            for (j = 0; j < 2; j++) {
+                                if (nzt & 1<<j) {
+                                    uint32_t b;
+                                    int n;
+                                    /* The total length of escape_sequence must be < 22 bits according
+                                       to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
+                                    UPDATE_CACHE(re, gb);
+                                    b = GET_CACHE(re, gb);
+                                    b = 31 - av_log2(~b);
+
+                                    if (b > 8) {
+                                        av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
+                                        return AVERROR_INVALIDDATA;
+                                    }
+
+                                    SKIP_BITS(re, gb, b + 1);
+                                    b += 4;
+                                    n = (1 << b) + SHOW_UBITS(re, gb, b);
+                                    LAST_SKIP_BITS(re, gb, b);
+#if USE_FIXED
+                                    v = n;
+                                    if (bits & 1U<<31)
+                                        v = -v;
+                                    *icf++ = v;
+#else
+                                    *icf++ = cbrt_tab[n] | (bits & 1U<<31);
+#endif /* USE_FIXED */
+                                    bits <<= 1;
+                                } else {
+#if USE_FIXED
+                                    v = cb_idx & 15;
+                                    if (bits & 1U<<31)
+                                        v = -v;
+                                    *icf++ = v;
+#else
+                                    unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
+                                    *icf++ = (bits & 1U<<31) | v;
+#endif /* USE_FIXED */
+                                    bits <<= !!v;
+                                }
+                                cb_idx >>= 4;
+                            }
+                        } while (len -= 2);
+#if !USE_FIXED
+                        ac->fdsp->vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
+#endif /* !USE_FIXED */
+                    }
+                }
+
+                CLOSE_READER(re, gb);
+            }
+        }
+        coef += g_len << 7;
+    }
+
+    if (pulse_present) {
+        idx = 0;
+        for (i = 0; i < pulse->num_pulse; i++) {
+            INTFLOAT co = coef_base[ pulse->pos[i] ];
+            while (offsets[idx + 1] <= pulse->pos[i])
+                idx++;
+            if (band_type[idx] != NOISE_BT && sf[idx]) {
+                INTFLOAT ico = -pulse->amp[i];
+#if USE_FIXED
+                if (co) {
+                    ico = co + (co > 0 ? -ico : ico);
+                }
+                coef_base[ pulse->pos[i] ] = ico;
+#else
+                if (co) {
+                    co /= sf[idx];
+                    ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
+                }
+                coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
+#endif /* USE_FIXED */
+            }
+        }
+    }
+#if USE_FIXED
+    coef = coef_base;
+    idx = 0;
+    for (g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+
+        for (i = 0; i < ics->max_sfb; i++, idx++) {
+            const unsigned cbt_m1 = band_type[idx] - 1;
+            int *cfo = coef + offsets[i];
+            int off_len = offsets[i + 1] - offsets[i];
+            int group;
+
+            if (cbt_m1 < NOISE_BT - 1) {
+                for (group = 0; group < (int)g_len; group++, cfo+=128) {
+                    ac->vector_pow43(cfo, off_len);
+                    ac->subband_scale(cfo, cfo, sf[idx], 34, off_len);
+                }
+            }
+        }
+        coef += g_len << 7;
+    }
+#endif /* USE_FIXED */
+    return 0;
+}
+
+/**
+ * Apply AAC-Main style frequency domain prediction.
+ */
+static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
+{
+    int sfb, k;
+
+    if (!sce->ics.predictor_initialized) {
+        reset_all_predictors(sce->predictor_state);
+        sce->ics.predictor_initialized = 1;
+    }
+
+    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
+        for (sfb = 0;
+             sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index];
+             sfb++) {
+            for (k = sce->ics.swb_offset[sfb];
+                 k < sce->ics.swb_offset[sfb + 1];
+                 k++) {
+                predict(&sce->predictor_state[k], &sce->coeffs[k],
+                        sce->ics.predictor_present &&
+                        sce->ics.prediction_used[sfb]);
+            }
+        }
+        if (sce->ics.predictor_reset_group)
+            reset_predictor_group(sce->predictor_state,
+                                  sce->ics.predictor_reset_group);
+    } else
+        reset_all_predictors(sce->predictor_state);
+}
+
+/**
+ * Decode an individual_channel_stream payload; reference: table 4.44.
+ *
+ * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
+ * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int decode_ics(AACContext *ac, SingleChannelElement *sce,
+                      GetBitContext *gb, int common_window, int scale_flag)
+{
+    Pulse pulse;
+    TemporalNoiseShaping    *tns = &sce->tns;
+    IndividualChannelStream *ics = &sce->ics;
+    INTFLOAT *out = sce->coeffs;
+    int global_gain, eld_syntax, er_syntax, pulse_present = 0;
+    int ret;
+
+    eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
+    er_syntax  = ac->oc[1].m4ac.object_type == AOT_ER_AAC_LC ||
+                 ac->oc[1].m4ac.object_type == AOT_ER_AAC_LTP ||
+                 ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD ||
+                 ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
+
+    /* This assignment is to silence a GCC warning about the variable being used
+     * uninitialized when in fact it always is.
+     */
+    pulse.num_pulse = 0;
+
+    global_gain = get_bits(gb, 8);
+
+    if (!common_window && !scale_flag) {
+        if (decode_ics_info(ac, ics, gb) < 0)
+            return AVERROR_INVALIDDATA;
+    }
+
+    if ((ret = decode_band_types(ac, sce->band_type,
+                                 sce->band_type_run_end, gb, ics)) < 0)
+        return ret;
+    if ((ret = decode_scalefactors(ac, sce->sf, gb, global_gain, ics,
+                                  sce->band_type, sce->band_type_run_end)) < 0)
+        return ret;
+
+    pulse_present = 0;
+    if (!scale_flag) {
+        if (!eld_syntax && (pulse_present = get_bits1(gb))) {
+            if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+                av_log(ac->avctx, AV_LOG_ERROR,
+                       "Pulse tool not allowed in eight short sequence.\n");
+                return AVERROR_INVALIDDATA;
+            }
+            if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
+                av_log(ac->avctx, AV_LOG_ERROR,
+                       "Pulse data corrupt or invalid.\n");
+                return AVERROR_INVALIDDATA;
+            }
+        }
+        tns->present = get_bits1(gb);
+        if (tns->present && !er_syntax)
+            if (decode_tns(ac, tns, gb, ics) < 0)
+                return AVERROR_INVALIDDATA;
+        if (!eld_syntax && get_bits1(gb)) {
+            avpriv_request_sample(ac->avctx, "SSR");
+            return AVERROR_PATCHWELCOME;
+        }
+        // I see no textual basis in the spec for this occurring after SSR gain
+        // control, but this is what both reference and real implmentations do
+        if (tns->present && er_syntax)
+            if (decode_tns(ac, tns, gb, ics) < 0)
+                return AVERROR_INVALIDDATA;
+    }
+
+    if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present,
+                                    &pulse, ics, sce->band_type) < 0)
+        return AVERROR_INVALIDDATA;
+
+    if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window)
+        apply_prediction(ac, sce);
+
+    return 0;
+}
+
+/**
+ * Mid/Side stereo decoding; reference: 4.6.8.1.3.
+ */
+static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
+{
+    const IndividualChannelStream *ics = &cpe->ch[0].ics;
+    INTFLOAT *ch0 = cpe->ch[0].coeffs;
+    INTFLOAT *ch1 = cpe->ch[1].coeffs;
+    int g, i, group, idx = 0;
+    const uint16_t *offsets = ics->swb_offset;
+    for (g = 0; g < ics->num_window_groups; g++) {
+        for (i = 0; i < ics->max_sfb; i++, idx++) {
+            if (cpe->ms_mask[idx] &&
+                cpe->ch[0].band_type[idx] < NOISE_BT &&
+                cpe->ch[1].band_type[idx] < NOISE_BT) {
+#if USE_FIXED
+                for (group = 0; group < ics->group_len[g]; group++) {
+                    ac->fdsp->butterflies_fixed(ch0 + group * 128 + offsets[i],
+                                                ch1 + group * 128 + offsets[i],
+                                                offsets[i+1] - offsets[i]);
+#else
+                for (group = 0; group < ics->group_len[g]; group++) {
+                    ac->fdsp->butterflies_float(ch0 + group * 128 + offsets[i],
+                                               ch1 + group * 128 + offsets[i],
+                                               offsets[i+1] - offsets[i]);
+#endif /* USE_FIXED */
+                }
+            }
+        }
+        ch0 += ics->group_len[g] * 128;
+        ch1 += ics->group_len[g] * 128;
+    }
+}
+
+/**
+ * intensity stereo decoding; reference: 4.6.8.2.3
+ *
+ * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
+ *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
+ *                      [3] reserved for scalable AAC
+ */
+static void apply_intensity_stereo(AACContext *ac,
+                                   ChannelElement *cpe, int ms_present)
+{
+    const IndividualChannelStream *ics = &cpe->ch[1].ics;
+    SingleChannelElement         *sce1 = &cpe->ch[1];
+    INTFLOAT *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
+    const uint16_t *offsets = ics->swb_offset;
+    int g, group, i, idx = 0;
+    int c;
+    INTFLOAT scale;
+    for (g = 0; g < ics->num_window_groups; g++) {
+        for (i = 0; i < ics->max_sfb;) {
+            if (sce1->band_type[idx] == INTENSITY_BT ||
+                sce1->band_type[idx] == INTENSITY_BT2) {
+                const int bt_run_end = sce1->band_type_run_end[idx];
+                for (; i < bt_run_end; i++, idx++) {
+                    c = -1 + 2 * (sce1->band_type[idx] - 14);
+                    if (ms_present)
+                        c *= 1 - 2 * cpe->ms_mask[idx];
+                    scale = c * sce1->sf[idx];
+                    for (group = 0; group < ics->group_len[g]; group++)
+#if USE_FIXED
+                        ac->subband_scale(coef1 + group * 128 + offsets[i],
+                                      coef0 + group * 128 + offsets[i],
+                                      scale,
+                                      23,
+                                      offsets[i + 1] - offsets[i]);
+#else
+                        ac->fdsp->vector_fmul_scalar(coef1 + group * 128 + offsets[i],
+                                                    coef0 + group * 128 + offsets[i],
+                                                    scale,
+                                                    offsets[i + 1] - offsets[i]);
+#endif /* USE_FIXED */
+                }
+            } else {
+                int bt_run_end = sce1->band_type_run_end[idx];
+                idx += bt_run_end - i;
+                i    = bt_run_end;
+            }
+        }
+        coef0 += ics->group_len[g] * 128;
+        coef1 += ics->group_len[g] * 128;
+    }
+}
+
+/**
+ * Decode a channel_pair_element; reference: table 4.4.
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
+{
+    int i, ret, common_window, ms_present = 0;
+    int eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
+
+    common_window = eld_syntax || get_bits1(gb);
+    if (common_window) {
+        if (decode_ics_info(ac, &cpe->ch[0].ics, gb))
+            return AVERROR_INVALIDDATA;
+        i = cpe->ch[1].ics.use_kb_window[0];
+        cpe->ch[1].ics = cpe->ch[0].ics;
+        cpe->ch[1].ics.use_kb_window[1] = i;
+        if (cpe->ch[1].ics.predictor_present &&
+            (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN))
+            if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
+                decode_ltp(&cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
+        ms_present = get_bits(gb, 2);
+        if (ms_present == 3) {
+            av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
+            return AVERROR_INVALIDDATA;
+        } else if (ms_present)
+            decode_mid_side_stereo(cpe, gb, ms_present);
+    }
+    if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
+        return ret;
+    if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
+        return ret;
+
+    if (common_window) {
+        if (ms_present)
+            apply_mid_side_stereo(ac, cpe);
+        if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
+            apply_prediction(ac, &cpe->ch[0]);
+            apply_prediction(ac, &cpe->ch[1]);
+        }
+    }
+
+    apply_intensity_stereo(ac, cpe, ms_present);
+    return 0;
+}
+
+static const float cce_scale[] = {
+    1.09050773266525765921, //2^(1/8)
+    1.18920711500272106672, //2^(1/4)
+    M_SQRT2,
+    2,
+};
+
+/**
+ * Decode coupling_channel_element; reference: table 4.8.
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
+{
+    int num_gain = 0;
+    int c, g, sfb, ret;
+    int sign;
+    INTFLOAT scale;
+    SingleChannelElement *sce = &che->ch[0];
+    ChannelCoupling     *coup = &che->coup;
+
+    coup->coupling_point = 2 * get_bits1(gb);
+    coup->num_coupled = get_bits(gb, 3);
+    for (c = 0; c <= coup->num_coupled; c++) {
+        num_gain++;
+        coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
+        coup->id_select[c] = get_bits(gb, 4);
+        if (coup->type[c] == TYPE_CPE) {
+            coup->ch_select[c] = get_bits(gb, 2);
+            if (coup->ch_select[c] == 3)
+                num_gain++;
+        } else
+            coup->ch_select[c] = 2;
+    }
+    coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
+
+    sign  = get_bits(gb, 1);
+    scale = AAC_RENAME(cce_scale)[get_bits(gb, 2)];
+
+    if ((ret = decode_ics(ac, sce, gb, 0, 0)))
+        return ret;
+
+    for (c = 0; c < num_gain; c++) {
+        int idx  = 0;
+        int cge  = 1;
+        int gain = 0;
+        INTFLOAT gain_cache = FIXR10(1.);
+        if (c) {
+            cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
+            gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
+            gain_cache = GET_GAIN(scale, gain);
+        }
+        if (coup->coupling_point == AFTER_IMDCT) {
+            coup->gain[c][0] = gain_cache;
+        } else {
+            for (g = 0; g < sce->ics.num_window_groups; g++) {
+                for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
+                    if (sce->band_type[idx] != ZERO_BT) {
+                        if (!cge) {
+                            int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
+                            if (t) {
+                                int s = 1;
+                                t = gain += t;
+                                if (sign) {
+                                    s  -= 2 * (t & 0x1);
+                                    t >>= 1;
+                                }
+                                gain_cache = GET_GAIN(scale, t) * s;
+                            }
+                        }
+                        coup->gain[c][idx] = gain_cache;
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+/**
+ * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
+ *
+ * @return  Returns number of bytes consumed.
+ */
+static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
+                                         GetBitContext *gb)
+{
+    int i;
+    int num_excl_chan = 0;
+
+    do {
+        for (i = 0; i < 7; i++)
+            che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
+    } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
+
+    return num_excl_chan / 7;
+}
+
+/**
+ * Decode dynamic range information; reference: table 4.52.
+ *
+ * @return  Returns number of bytes consumed.
+ */
+static int decode_dynamic_range(DynamicRangeControl *che_drc,
+                                GetBitContext *gb)
+{
+    int n             = 1;
+    int drc_num_bands = 1;
+    int i;
+
+    /* pce_tag_present? */
+    if (get_bits1(gb)) {
+        che_drc->pce_instance_tag  = get_bits(gb, 4);
+        skip_bits(gb, 4); // tag_reserved_bits
+        n++;
+    }
+
+    /* excluded_chns_present? */
+    if (get_bits1(gb)) {
+        n += decode_drc_channel_exclusions(che_drc, gb);
+    }
+
+    /* drc_bands_present? */
+    if (get_bits1(gb)) {
+        che_drc->band_incr            = get_bits(gb, 4);
+        che_drc->interpolation_scheme = get_bits(gb, 4);
+        n++;
+        drc_num_bands += che_drc->band_incr;
+        for (i = 0; i < drc_num_bands; i++) {
+            che_drc->band_top[i] = get_bits(gb, 8);
+            n++;
+        }
+    }
+
+    /* prog_ref_level_present? */
+    if (get_bits1(gb)) {
+        che_drc->prog_ref_level = get_bits(gb, 7);
+        skip_bits1(gb); // prog_ref_level_reserved_bits
+        n++;
+    }
+
+    for (i = 0; i < drc_num_bands; i++) {
+        che_drc->dyn_rng_sgn[i] = get_bits1(gb);
+        che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
+        n++;
+    }
+
+    return n;
+}
+
+static int decode_fill(AACContext *ac, GetBitContext *gb, int len) {
+    uint8_t buf[256];
+    int i, major, minor;
+
+    if (len < 13+7*8)
+        goto unknown;
+
+    get_bits(gb, 13); len -= 13;
+
+    for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8)
+        buf[i] = get_bits(gb, 8);
+
+    buf[i] = 0;
+    if (ac->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf);
+
+    if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){
+        ac->avctx->internal->skip_samples = 1024;
+    }
+
+unknown:
+    skip_bits_long(gb, len);
+
+    return 0;
+}
+
+/**
+ * Decode extension data (incomplete); reference: table 4.51.
+ *
+ * @param   cnt length of TYPE_FIL syntactic element in bytes
+ *
+ * @return Returns number of bytes consumed
+ */
+static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
+                                    ChannelElement *che, enum RawDataBlockType elem_type)
+{
+    int crc_flag = 0;
+    int res = cnt;
+    int type = get_bits(gb, 4);
+
+    if (ac->avctx->debug & FF_DEBUG_STARTCODE)
+        av_log(ac->avctx, AV_LOG_DEBUG, "extension type: %d len:%d\n", type, cnt);
+
+    switch (type) { // extension type
+    case EXT_SBR_DATA_CRC:
+        crc_flag++;
+    case EXT_SBR_DATA:
+        if (!che) {
+            av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
+            return res;
+        } else if (!ac->oc[1].m4ac.sbr) {
+            av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
+            skip_bits_long(gb, 8 * cnt - 4);
+            return res;
+        } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) {
+            av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
+            skip_bits_long(gb, 8 * cnt - 4);
+            return res;
+        } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED && ac->avctx->channels == 1) {
+            ac->oc[1].m4ac.sbr = 1;
+            ac->oc[1].m4ac.ps = 1;
+            ac->avctx->profile = FF_PROFILE_AAC_HE_V2;
+            output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
+                             ac->oc[1].status, 1);
+        } else {
+            ac->oc[1].m4ac.sbr = 1;
+            ac->avctx->profile = FF_PROFILE_AAC_HE;
+        }
+        res = AAC_RENAME(ff_decode_sbr_extension)(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
+        break;
+    case EXT_DYNAMIC_RANGE:
+        res = decode_dynamic_range(&ac->che_drc, gb);
+        break;
+    case EXT_FILL:
+        decode_fill(ac, gb, 8 * cnt - 4);
+        break;
+    case EXT_FILL_DATA:
+    case EXT_DATA_ELEMENT:
+    default:
+        skip_bits_long(gb, 8 * cnt - 4);
+        break;
+    };
+    return res;
+}
+
+/**
+ * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
+ *
+ * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
+ * @param   coef    spectral coefficients
+ */
+static void apply_tns(INTFLOAT coef[1024], TemporalNoiseShaping *tns,
+                      IndividualChannelStream *ics, int decode)
+{
+    const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
+    int w, filt, m, i;
+    int bottom, top, order, start, end, size, inc;
+    INTFLOAT lpc[TNS_MAX_ORDER];
+    INTFLOAT tmp[TNS_MAX_ORDER+1];
+
+    for (w = 0; w < ics->num_windows; w++) {
+        bottom = ics->num_swb;
+        for (filt = 0; filt < tns->n_filt[w]; filt++) {
+            top    = bottom;
+            bottom = FFMAX(0, top - tns->length[w][filt]);
+            order  = tns->order[w][filt];
+            if (order == 0)
+                continue;
+
+            // tns_decode_coef
+            AAC_RENAME(compute_lpc_coefs)(tns->coef[w][filt], order, lpc, 0, 0, 0);
+
+            start = ics->swb_offset[FFMIN(bottom, mmm)];
+            end   = ics->swb_offset[FFMIN(   top, mmm)];
+            if ((size = end - start) <= 0)
+                continue;
+            if (tns->direction[w][filt]) {
+                inc = -1;
+                start = end - 1;
+            } else {
+                inc = 1;
+            }
+            start += w * 128;
+
+            if (decode) {
+                // ar filter
+                for (m = 0; m < size; m++, start += inc)
+                    for (i = 1; i <= FFMIN(m, order); i++)
+                        coef[start] -= AAC_MUL26(coef[start - i * inc], lpc[i - 1]);
+            } else {
+                // ma filter
+                for (m = 0; m < size; m++, start += inc) {
+                    tmp[0] = coef[start];
+                    for (i = 1; i <= FFMIN(m, order); i++)
+                        coef[start] += AAC_MUL26(tmp[i], lpc[i - 1]);
+                    for (i = order; i > 0; i--)
+                        tmp[i] = tmp[i - 1];
+                }
+            }
+        }
+    }
+}
+
+/**
+ *  Apply windowing and MDCT to obtain the spectral
+ *  coefficient from the predicted sample by LTP.
+ */
+static void windowing_and_mdct_ltp(AACContext *ac, INTFLOAT *out,
+                                   INTFLOAT *in, IndividualChannelStream *ics)
+{
+    const INTFLOAT *lwindow      = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_long_1024) : AAC_RENAME(ff_sine_1024);
+    const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128);
+    const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_long_1024) : AAC_RENAME(ff_sine_1024);
+    const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128);
+
+    if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
+        ac->fdsp->vector_fmul(in, in, lwindow_prev, 1024);
+    } else {
+        memset(in, 0, 448 * sizeof(*in));
+        ac->fdsp->vector_fmul(in + 448, in + 448, swindow_prev, 128);
+    }
+    if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
+        ac->fdsp->vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
+    } else {
+        ac->fdsp->vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
+        memset(in + 1024 + 576, 0, 448 * sizeof(*in));
+    }
+    ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
+}
+
+/**
+ * Apply the long term prediction
+ */
+static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
+{
+    const LongTermPrediction *ltp = &sce->ics.ltp;
+    const uint16_t *offsets = sce->ics.swb_offset;
+    int i, sfb;
+
+    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
+        INTFLOAT *predTime = sce->ret;
+        INTFLOAT *predFreq = ac->buf_mdct;
+        int16_t num_samples = 2048;
+
+        if (ltp->lag < 1024)
+            num_samples = ltp->lag + 1024;
+        for (i = 0; i < num_samples; i++)
+            predTime[i] = AAC_MUL30(sce->ltp_state[i + 2048 - ltp->lag], ltp->coef);
+        memset(&predTime[i], 0, (2048 - i) * sizeof(*predTime));
+
+        ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
+
+        if (sce->tns.present)
+            ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
+
+        for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
+            if (ltp->used[sfb])
+                for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
+                    sce->coeffs[i] += predFreq[i];
+    }
+}
+
+/**
+ * Update the LTP buffer for next frame
+ */
+static void update_ltp(AACContext *ac, SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    INTFLOAT *saved     = sce->saved;
+    INTFLOAT *saved_ltp = sce->coeffs;
+    const INTFLOAT *lwindow = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_long_1024) : AAC_RENAME(ff_sine_1024);
+    const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128);
+    int i;
+
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        memcpy(saved_ltp,       saved, 512 * sizeof(*saved_ltp));
+        memset(saved_ltp + 576, 0,     448 * sizeof(*saved_ltp));
+        ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
+
+        for (i = 0; i < 64; i++)
+            saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], swindow[63 - i]);
+    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
+        memcpy(saved_ltp,       ac->buf_mdct + 512, 448 * sizeof(*saved_ltp));
+        memset(saved_ltp + 576, 0,                  448 * sizeof(*saved_ltp));
+        ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
+
+        for (i = 0; i < 64; i++)
+            saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], swindow[63 - i]);
+    } else { // LONG_STOP or ONLY_LONG
+        ac->fdsp->vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
+
+        for (i = 0; i < 512; i++)
+            saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], lwindow[511 - i]);
+    }
+
+    memcpy(sce->ltp_state,      sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
+    memcpy(sce->ltp_state+1024, sce->ret,            1024 * sizeof(*sce->ltp_state));
+    memcpy(sce->ltp_state+2048, saved_ltp,           1024 * sizeof(*sce->ltp_state));
+}
+
+/**
+ * Conduct IMDCT and windowing.
+ */
+static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    INTFLOAT *in    = sce->coeffs;
+    INTFLOAT *out   = sce->ret;
+    INTFLOAT *saved = sce->saved;
+    const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128);
+    const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_long_1024) : AAC_RENAME(ff_sine_1024);
+    const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128);
+    INTFLOAT *buf  = ac->buf_mdct;
+    INTFLOAT *temp = ac->temp;
+    int i;
+
+    // imdct
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        for (i = 0; i < 1024; i += 128)
+            ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
+    } else {
+        ac->mdct.imdct_half(&ac->mdct, buf, in);
+#if USE_FIXED
+        for (i=0; i<1024; i++)
+          buf[i] = (buf[i] + 4) >> 3;
+#endif /* USE_FIXED */
+    }
+
+    /* window overlapping
+     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
+     * and long to short transitions are considered to be short to short
+     * transitions. This leaves just two cases (long to long and short to short)
+     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
+     */
+    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
+            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
+        ac->fdsp->vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
+    } else {
+        memcpy(                         out,               saved,            448 * sizeof(*out));
+
+        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+            ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
+            ac->fdsp->vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
+            ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
+            ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
+            ac->fdsp->vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
+            memcpy(                     out + 448 + 4*128, temp, 64 * sizeof(*out));
+        } else {
+            ac->fdsp->vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
+            memcpy(                     out + 576,         buf + 64,         448 * sizeof(*out));
+        }
+    }
+
+    // buffer update
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        memcpy(                     saved,       temp + 64,         64 * sizeof(*saved));
+        ac->fdsp->vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
+        ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
+        ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
+        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(*saved));
+    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
+        memcpy(                     saved,       buf + 512,        448 * sizeof(*saved));
+        memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(*saved));
+    } else { // LONG_STOP or ONLY_LONG
+        memcpy(                     saved,       buf + 512,        512 * sizeof(*saved));
+    }
+}
+
+static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    INTFLOAT *in    = sce->coeffs;
+    INTFLOAT *out   = sce->ret;
+    INTFLOAT *saved = sce->saved;
+    INTFLOAT *buf  = ac->buf_mdct;
+#if USE_FIXED
+    int i;
+#endif /* USE_FIXED */
+
+    // imdct
+    ac->mdct.imdct_half(&ac->mdct_ld, buf, in);
+
+#if USE_FIXED
+    for (i = 0; i < 1024; i++)
+        buf[i] = (buf[i] + 2) >> 2;
+#endif /* USE_FIXED */
+
+    // window overlapping
+    if (ics->use_kb_window[1]) {
+        // AAC LD uses a low overlap sine window instead of a KBD window
+        memcpy(out, saved, 192 * sizeof(*out));
+        ac->fdsp->vector_fmul_window(out + 192, saved + 192, buf, AAC_RENAME(ff_sine_128), 64);
+        memcpy(                     out + 320, buf + 64, 192 * sizeof(*out));
+    } else {
+        ac->fdsp->vector_fmul_window(out, saved, buf, AAC_RENAME(ff_sine_512), 256);
+    }
+
+    // buffer update
+    memcpy(saved, buf + 256, 256 * sizeof(*saved));
+}
+
+static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce)
+{
+    INTFLOAT *in    = sce->coeffs;
+    INTFLOAT *out   = sce->ret;
+    INTFLOAT *saved = sce->saved;
+    INTFLOAT *buf  = ac->buf_mdct;
+    int i;
+    const int n  = ac->oc[1].m4ac.frame_length_short ? 480 : 512;
+    const int n2 = n >> 1;
+    const int n4 = n >> 2;
+    const INTFLOAT *const window = n == 480 ? AAC_RENAME(ff_aac_eld_window_480) :
+                                           AAC_RENAME(ff_aac_eld_window_512);
+
+    // Inverse transform, mapped to the conventional IMDCT by
+    // Chivukula, R.K.; Reznik, Y.A.; Devarajan, V.,
+    // "Efficient algorithms for MPEG-4 AAC-ELD, AAC-LD and AAC-LC filterbanks,"
+    // International Conference on Audio, Language and Image Processing, ICALIP 2008.
+    // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950
+    for (i = 0; i < n2; i+=2) {
+        INTFLOAT temp;
+        temp =  in[i    ]; in[i    ] = -in[n - 1 - i]; in[n - 1 - i] = temp;
+        temp = -in[i + 1]; in[i + 1] =  in[n - 2 - i]; in[n - 2 - i] = temp;
+    }
+#if !USE_FIXED
+    if (n == 480)
+        ac->mdct480->imdct_half(ac->mdct480, buf, in, 1, -1.f/(16*1024*960));
+    else
+#endif
+        ac->mdct.imdct_half(&ac->mdct_ld, buf, in);
+
+#if USE_FIXED
+    for (i = 0; i < 1024; i++)
+      buf[i] = (buf[i] + 1) >> 1;
+#endif /* USE_FIXED */
+
+    for (i = 0; i < n; i+=2) {
+        buf[i] = -buf[i];
+    }
+    // Like with the regular IMDCT at this point we still have the middle half
+    // of a transform but with even symmetry on the left and odd symmetry on
+    // the right
+
+    // window overlapping
+    // The spec says to use samples [0..511] but the reference decoder uses
+    // samples [128..639].
+    for (i = n4; i < n2; i ++) {
+        out[i - n4] = AAC_MUL31(   buf[    n2 - 1 - i] , window[i       - n4]) +
+                      AAC_MUL31( saved[        i + n2] , window[i +   n - n4]) +
+                      AAC_MUL31(-saved[n + n2 - 1 - i] , window[i + 2*n - n4]) +
+                      AAC_MUL31(-saved[  2*n + n2 + i] , window[i + 3*n - n4]);
+    }
+    for (i = 0; i < n2; i ++) {
+        out[n4 + i] = AAC_MUL31(   buf[              i] , window[i + n2       - n4]) +
+                      AAC_MUL31(-saved[      n - 1 - i] , window[i + n2 +   n - n4]) +
+                      AAC_MUL31(-saved[          n + i] , window[i + n2 + 2*n - n4]) +
+                      AAC_MUL31( saved[2*n + n - 1 - i] , window[i + n2 + 3*n - n4]);
+    }
+    for (i = 0; i < n4; i ++) {
+        out[n2 + n4 + i] = AAC_MUL31(   buf[    i + n2] , window[i +   n - n4]) +
+                           AAC_MUL31(-saved[n2 - 1 - i] , window[i + 2*n - n4]) +
+                           AAC_MUL31(-saved[n + n2 + i] , window[i + 3*n - n4]);
+    }
+
+    // buffer update
+    memmove(saved + n, saved, 2 * n * sizeof(*saved));
+    memcpy( saved,       buf,     n * sizeof(*saved));
+}
+
+/**
+ * channel coupling transformation interface
+ *
+ * @param   apply_coupling_method   pointer to (in)dependent coupling function
+ */
+static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
+                                   enum RawDataBlockType type, int elem_id,
+                                   enum CouplingPoint coupling_point,
+                                   void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
+{
+    int i, c;
+
+    for (i = 0; i < MAX_ELEM_ID; i++) {
+        ChannelElement *cce = ac->che[TYPE_CCE][i];
+        int index = 0;
+
+        if (cce && cce->coup.coupling_point == coupling_point) {
+            ChannelCoupling *coup = &cce->coup;
+
+            for (c = 0; c <= coup->num_coupled; c++) {
+                if (coup->type[c] == type && coup->id_select[c] == elem_id) {
+                    if (coup->ch_select[c] != 1) {
+                        apply_coupling_method(ac, &cc->ch[0], cce, index);
+                        if (coup->ch_select[c] != 0)
+                            index++;
+                    }
+                    if (coup->ch_select[c] != 2)
+                        apply_coupling_method(ac, &cc->ch[1], cce, index++);
+                } else
+                    index += 1 + (coup->ch_select[c] == 3);
+            }
+        }
+    }
+}
+
+/**
+ * Convert spectral data to samples, applying all supported tools as appropriate.
+ */
+static void spectral_to_sample(AACContext *ac, int samples)
+{
+    int i, type;
+    void (*imdct_and_window)(AACContext *ac, SingleChannelElement *sce);
+    switch (ac->oc[1].m4ac.object_type) {
+    case AOT_ER_AAC_LD:
+        imdct_and_window = imdct_and_windowing_ld;
+        break;
+    case AOT_ER_AAC_ELD:
+        imdct_and_window = imdct_and_windowing_eld;
+        break;
+    default:
+        imdct_and_window = ac->imdct_and_windowing;
+    }
+    for (type = 3; type >= 0; type--) {
+        for (i = 0; i < MAX_ELEM_ID; i++) {
+            ChannelElement *che = ac->che[type][i];
+            if (che && che->present) {
+                if (type <= TYPE_CPE)
+                    apply_channel_coupling(ac, che, type, i, BEFORE_TNS, AAC_RENAME(apply_dependent_coupling));
+                if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
+                    if (che->ch[0].ics.predictor_present) {
+                        if (che->ch[0].ics.ltp.present)
+                            ac->apply_ltp(ac, &che->ch[0]);
+                        if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
+                            ac->apply_ltp(ac, &che->ch[1]);
+                    }
+                }
+                if (che->ch[0].tns.present)
+                    ac->apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
+                if (che->ch[1].tns.present)
+                    ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
+                if (type <= TYPE_CPE)
+                    apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, AAC_RENAME(apply_dependent_coupling));
+                if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
+                    imdct_and_window(ac, &che->ch[0]);
+                    if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
+                        ac->update_ltp(ac, &che->ch[0]);
+                    if (type == TYPE_CPE) {
+                        imdct_and_window(ac, &che->ch[1]);
+                        if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
+                            ac->update_ltp(ac, &che->ch[1]);
+                    }
+                    if (ac->oc[1].m4ac.sbr > 0) {
+                        AAC_RENAME(ff_sbr_apply)(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
+                    }
+                }
+                if (type <= TYPE_CCE)
+                    apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, AAC_RENAME(apply_independent_coupling));
+
+#if USE_FIXED
+                {
+                    int j;
+                    /* preparation for resampler */
+                    for(j = 0; j<samples; j++){
+                        che->ch[0].ret[j] = (int32_t)av_clipl_int32((int64_t)che->ch[0].ret[j]<<7)+0x8000;
+                        if(type == TYPE_CPE)
+                            che->ch[1].ret[j] = (int32_t)av_clipl_int32((int64_t)che->ch[1].ret[j]<<7)+0x8000;
+                    }
+                }
+#endif /* USE_FIXED */
+                che->present = 0;
+            } else if (che) {
+                av_log(ac->avctx, AV_LOG_VERBOSE, "ChannelElement %d.%d missing \n", type, i);
+            }
+        }
+    }
+}
+
+static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
+{
+    int size;
+    AACADTSHeaderInfo hdr_info;
+    uint8_t layout_map[MAX_ELEM_ID*4][3];
+    int layout_map_tags, ret;
+
+    size = avpriv_aac_parse_header(gb, &hdr_info);
+    if (size > 0) {
+        if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) {
+            // This is 2 for "VLB " audio in NSV files.
+            // See samples/nsv/vlb_audio.
+            avpriv_report_missing_feature(ac->avctx,
+                                          "More than one AAC RDB per ADTS frame");
+            ac->warned_num_aac_frames = 1;
+        }
+        push_output_configuration(ac);
+        if (hdr_info.chan_config) {
+            ac->oc[1].m4ac.chan_config = hdr_info.chan_config;
+            if ((ret = set_default_channel_config(ac->avctx,
+                                                  layout_map,
+                                                  &layout_map_tags,
+                                                  hdr_info.chan_config)) < 0)
+                return ret;
+            if ((ret = output_configure(ac, layout_map, layout_map_tags,
+                                        FFMAX(ac->oc[1].status,
+                                              OC_TRIAL_FRAME), 0)) < 0)
+                return ret;
+        } else {
+            ac->oc[1].m4ac.chan_config = 0;
+            /**
+             * dual mono frames in Japanese DTV can have chan_config 0
+             * WITHOUT specifying PCE.
+             *  thus, set dual mono as default.
+             */
+            if (ac->dmono_mode && ac->oc[0].status == OC_NONE) {
+                layout_map_tags = 2;
+                layout_map[0][0] = layout_map[1][0] = TYPE_SCE;
+                layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT;
+                layout_map[0][1] = 0;
+                layout_map[1][1] = 1;
+                if (output_configure(ac, layout_map, layout_map_tags,
+                                     OC_TRIAL_FRAME, 0))
+                    return -7;
+            }
+        }
+        ac->oc[1].m4ac.sample_rate     = hdr_info.sample_rate;
+        ac->oc[1].m4ac.sampling_index  = hdr_info.sampling_index;
+        ac->oc[1].m4ac.object_type     = hdr_info.object_type;
+        ac->oc[1].m4ac.frame_length_short = 0;
+        if (ac->oc[0].status != OC_LOCKED ||
+            ac->oc[0].m4ac.chan_config != hdr_info.chan_config ||
+            ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) {
+            ac->oc[1].m4ac.sbr = -1;
+            ac->oc[1].m4ac.ps  = -1;
+        }
+        if (!hdr_info.crc_absent)
+            skip_bits(gb, 16);
+    }
+    return size;
+}
+
+static int aac_decode_er_frame(AVCodecContext *avctx, void *data,
+                               int *got_frame_ptr, GetBitContext *gb)
+{
+    AACContext *ac = avctx->priv_data;
+    const MPEG4AudioConfig *const m4ac = &ac->oc[1].m4ac;
+    ChannelElement *che;
+    int err, i;
+    int samples = m4ac->frame_length_short ? 960 : 1024;
+    int chan_config = m4ac->chan_config;
+    int aot = m4ac->object_type;
+
+    if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD)
+        samples >>= 1;
+
+    ac->frame = data;
+
+    if ((err = frame_configure_elements(avctx)) < 0)
+        return err;
+
+    // The FF_PROFILE_AAC_* defines are all object_type - 1
+    // This may lead to an undefined profile being signaled
+    ac->avctx->profile = aot - 1;
+
+    ac->tags_mapped = 0;
+
+    if (chan_config < 0 || (chan_config >= 8 && chan_config < 11) || chan_config >= 13) {
+        avpriv_request_sample(avctx, "Unknown ER channel configuration %d",
+                              chan_config);
+        return AVERROR_INVALIDDATA;
+    }
+    for (i = 0; i < tags_per_config[chan_config]; i++) {
+        const int elem_type = aac_channel_layout_map[chan_config-1][i][0];
+        const int elem_id   = aac_channel_layout_map[chan_config-1][i][1];
+        if (!(che=get_che(ac, elem_type, elem_id))) {
+            av_log(ac->avctx, AV_LOG_ERROR,
+                   "channel element %d.%d is not allocated\n",
+                   elem_type, elem_id);
+            return AVERROR_INVALIDDATA;
+        }
+        che->present = 1;
+        if (aot != AOT_ER_AAC_ELD)
+            skip_bits(gb, 4);
+        switch (elem_type) {
+        case TYPE_SCE:
+            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+            break;
+        case TYPE_CPE:
+            err = decode_cpe(ac, gb, che);
+            break;
+        case TYPE_LFE:
+            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+            break;
+        }
+        if (err < 0)
+            return err;
+    }
+
+    spectral_to_sample(ac, samples);
+
+    if (!ac->frame->data[0] && samples) {
+        av_log(avctx, AV_LOG_ERROR, "no frame data found\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    ac->frame->nb_samples = samples;
+    ac->frame->sample_rate = avctx->sample_rate;
+    *got_frame_ptr = 1;
+
+    skip_bits_long(gb, get_bits_left(gb));
+    return 0;
+}
+
+static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
+                                int *got_frame_ptr, GetBitContext *gb, AVPacket *avpkt)
+{
+    AACContext *ac = avctx->priv_data;
+    ChannelElement *che = NULL, *che_prev = NULL;
+    enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
+    int err, elem_id;
+    int samples = 0, multiplier, audio_found = 0, pce_found = 0;
+    int is_dmono, sce_count = 0;
+
+    ac->frame = data;
+
+    if (show_bits(gb, 12) == 0xfff) {
+        if ((err = parse_adts_frame_header(ac, gb)) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
+            goto fail;
+        }
+        if (ac->oc[1].m4ac.sampling_index > 12) {
+            av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
+            err = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+    }
+
+    if ((err = frame_configure_elements(avctx)) < 0)
+        goto fail;
+
+    // The FF_PROFILE_AAC_* defines are all object_type - 1
+    // This may lead to an undefined profile being signaled
+    ac->avctx->profile = ac->oc[1].m4ac.object_type - 1;
+
+    ac->tags_mapped = 0;
+    // parse
+    while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
+        elem_id = get_bits(gb, 4);
+
+        if (avctx->debug & FF_DEBUG_STARTCODE)
+            av_log(avctx, AV_LOG_DEBUG, "Elem type:%x id:%x\n", elem_type, elem_id);
+
+        if (!avctx->channels && elem_type != TYPE_PCE) {
+            err = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        if (elem_type < TYPE_DSE) {
+            if (!(che=get_che(ac, elem_type, elem_id))) {
+                av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
+                       elem_type, elem_id);
+                err = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            samples = 1024;
+            che->present = 1;
+        }
+
+        switch (elem_type) {
+
+        case TYPE_SCE:
+            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+            audio_found = 1;
+            sce_count++;
+            break;
+
+        case TYPE_CPE:
+            err = decode_cpe(ac, gb, che);
+            audio_found = 1;
+            break;
+
+        case TYPE_CCE:
+            err = decode_cce(ac, gb, che);
+            break;
+
+        case TYPE_LFE:
+            err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+            audio_found = 1;
+            break;
+
+        case TYPE_DSE:
+            err = skip_data_stream_element(ac, gb);
+            break;
+
+        case TYPE_PCE: {
+            uint8_t layout_map[MAX_ELEM_ID*4][3];
+            int tags;
+            push_output_configuration(ac);
+            tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb);
+            if (tags < 0) {
+                err = tags;
+                break;
+            }
+            if (pce_found) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "Not evaluating a further program_config_element as this construct is dubious at best.\n");
+            } else {
+                err = output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1);
+                if (!err)
+                    ac->oc[1].m4ac.chan_config = 0;
+                pce_found = 1;
+            }
+            break;
+        }
+
+        case TYPE_FIL:
+            if (elem_id == 15)
+                elem_id += get_bits(gb, 8) - 1;
+            if (get_bits_left(gb) < 8 * elem_id) {
+                    av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err);
+                    err = AVERROR_INVALIDDATA;
+                    goto fail;
+            }
+            while (elem_id > 0)
+                elem_id -= decode_extension_payload(ac, gb, elem_id, che_prev, elem_type_prev);
+            err = 0; /* FIXME */
+            break;
+
+        default:
+            err = AVERROR_BUG; /* should not happen, but keeps compiler happy */
+            break;
+        }
+
+        che_prev       = che;
+        elem_type_prev = elem_type;
+
+        if (err)
+            goto fail;
+
+        if (get_bits_left(gb) < 3) {
+            av_log(avctx, AV_LOG_ERROR, overread_err);
+            err = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+    }
+
+    if (!avctx->channels) {
+        *got_frame_ptr = 0;
+        return 0;
+    }
+
+    multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0;
+    samples <<= multiplier;
+
+    spectral_to_sample(ac, samples);
+
+    if (ac->oc[1].status && audio_found) {
+        avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier;
+        avctx->frame_size = samples;
+        ac->oc[1].status = OC_LOCKED;
+    }
+
+    if (multiplier) {
+        int side_size;
+        const uint8_t *side = av_packet_get_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
+        if (side && side_size>=4)
+            AV_WL32(side, 2*AV_RL32(side));
+    }
+
+    if (!ac->frame->data[0] && samples) {
+        av_log(avctx, AV_LOG_ERROR, "no frame data found\n");
+        err = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    if (samples) {
+        ac->frame->nb_samples = samples;
+        ac->frame->sample_rate = avctx->sample_rate;
+    } else
+        av_frame_unref(ac->frame);
+    *got_frame_ptr = !!samples;
+
+    /* for dual-mono audio (SCE + SCE) */
+    is_dmono = ac->dmono_mode && sce_count == 2 &&
+               ac->oc[1].channel_layout == (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT);
+    if (is_dmono) {
+        if (ac->dmono_mode == 1)
+            ((AVFrame *)data)->data[1] =((AVFrame *)data)->data[0];
+        else if (ac->dmono_mode == 2)
+            ((AVFrame *)data)->data[0] =((AVFrame *)data)->data[1];
+    }
+
+    return 0;
+fail:
+    pop_output_configuration(ac);
+    return err;
+}
+
+static int aac_decode_frame(AVCodecContext *avctx, void *data,
+                            int *got_frame_ptr, AVPacket *avpkt)
+{
+    AACContext *ac = avctx->priv_data;
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    GetBitContext gb;
+    int buf_consumed;
+    int buf_offset;
+    int err;
+    int new_extradata_size;
+    const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
+                                       AV_PKT_DATA_NEW_EXTRADATA,
+                                       &new_extradata_size);
+    int jp_dualmono_size;
+    const uint8_t *jp_dualmono   = av_packet_get_side_data(avpkt,
+                                       AV_PKT_DATA_JP_DUALMONO,
+                                       &jp_dualmono_size);
+
+    if (new_extradata && 0) {
+        av_free(avctx->extradata);
+        avctx->extradata = av_mallocz(new_extradata_size +
+                                      AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!avctx->extradata)
+            return AVERROR(ENOMEM);
+        avctx->extradata_size = new_extradata_size;
+        memcpy(avctx->extradata, new_extradata, new_extradata_size);
+        push_output_configuration(ac);
+        if (decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
+                                         avctx->extradata,
+                                         avctx->extradata_size*8LL, 1) < 0) {
+            pop_output_configuration(ac);
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    ac->dmono_mode = 0;
+    if (jp_dualmono && jp_dualmono_size > 0)
+        ac->dmono_mode =  1 + *jp_dualmono;
+    if (ac->force_dmono_mode >= 0)
+        ac->dmono_mode = ac->force_dmono_mode;
+
+    if (INT_MAX / 8 <= buf_size)
+        return AVERROR_INVALIDDATA;
+
+    if ((err = init_get_bits8(&gb, buf, buf_size)) < 0)
+        return err;
+
+    switch (ac->oc[1].m4ac.object_type) {
+    case AOT_ER_AAC_LC:
+    case AOT_ER_AAC_LTP:
+    case AOT_ER_AAC_LD:
+    case AOT_ER_AAC_ELD:
+        err = aac_decode_er_frame(avctx, data, got_frame_ptr, &gb);
+        break;
+    default:
+        err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb, avpkt);
+    }
+    if (err < 0)
+        return err;
+
+    buf_consumed = (get_bits_count(&gb) + 7) >> 3;
+    for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
+        if (buf[buf_offset])
+            break;
+
+    return buf_size > buf_offset ? buf_consumed : buf_size;
+}
+
+static av_cold int aac_decode_close(AVCodecContext *avctx)
+{
+    AACContext *ac = avctx->priv_data;
+    int i, type;
+
+    for (i = 0; i < MAX_ELEM_ID; i++) {
+        for (type = 0; type < 4; type++) {
+            if (ac->che[type][i])
+                AAC_RENAME(ff_aac_sbr_ctx_close)(&ac->che[type][i]->sbr);
+            av_freep(&ac->che[type][i]);
+        }
+    }
+
+    ff_mdct_end(&ac->mdct);
+    ff_mdct_end(&ac->mdct_small);
+    ff_mdct_end(&ac->mdct_ld);
+    ff_mdct_end(&ac->mdct_ltp);
+#if !USE_FIXED
+    ff_imdct15_uninit(&ac->mdct480);
+#endif
+    av_freep(&ac->fdsp);
+    return 0;
+}
+
+static void aacdec_init(AACContext *c)
+{
+    c->imdct_and_windowing                      = imdct_and_windowing;
+    c->apply_ltp                                = apply_ltp;
+    c->apply_tns                                = apply_tns;
+    c->windowing_and_mdct_ltp                   = windowing_and_mdct_ltp;
+    c->update_ltp                               = update_ltp;
+#if USE_FIXED
+    c->vector_pow43                             = vector_pow43;
+    c->subband_scale                            = subband_scale;
+#endif
+
+#if !USE_FIXED
+    if(ARCH_MIPS)
+        ff_aacdec_init_mips(c);
+#endif /* !USE_FIXED */
+}
+/**
+ * AVOptions for Japanese DTV specific extensions (ADTS only)
+ */
+#define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
+static const AVOption options[] = {
+    {"dual_mono_mode", "Select the channel to decode for dual mono",
+     offsetof(AACContext, force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2,
+     AACDEC_FLAGS, "dual_mono_mode"},
+
+    {"auto", "autoselection",            0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
+    {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
+    {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
+    {"both", "Select both channels",     0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
+
+    {NULL},
+};
+
+static const AVClass aac_decoder_class = {
+    .class_name = "AAC decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
diff --git a/libavcodec/aacdectab.h b/libavcodec/aacdectab.h
index 3e70e064..baf51a74 100644
--- a/libavcodec/aacdectab.h
+++ b/libavcodec/aacdectab.h
@@ -35,49 +35,6 @@
 
 #include <stdint.h>
 
-/* @name ltp_coef
- * Table of the LTP coefficients
- */
-static const float ltp_coef[8] = {
-    0.570829, 0.696616, 0.813004, 0.911304,
-    0.984900, 1.067894, 1.194601, 1.369533,
-};
-
-/* @name tns_tmp2_map
- * Tables of the tmp2[] arrays of LPC coefficients used for TNS.
- * The suffix _M_N[] indicate the values of coef_compress and coef_res
- * respectively.
- * @{
- */
-static const float tns_tmp2_map_1_3[4] = {
-     0.00000000, -0.43388373,  0.64278758,  0.34202015,
-};
-
-static const float tns_tmp2_map_0_3[8] = {
-     0.00000000, -0.43388373, -0.78183150, -0.97492790,
-     0.98480773,  0.86602539,  0.64278758,  0.34202015,
-};
-
-static const float tns_tmp2_map_1_4[8] = {
-     0.00000000, -0.20791170, -0.40673664, -0.58778524,
-     0.67369562,  0.52643216,  0.36124167,  0.18374951,
-};
-
-static const float tns_tmp2_map_0_4[16] = {
-     0.00000000, -0.20791170, -0.40673664, -0.58778524,
-    -0.74314481, -0.86602539, -0.95105654, -0.99452192,
-     0.99573416,  0.96182561,  0.89516330,  0.79801720,
-     0.67369562,  0.52643216,  0.36124167,  0.18374951,
-};
-
-static const float * const tns_tmp2_map[4] = {
-    tns_tmp2_map_0_3,
-    tns_tmp2_map_0_4,
-    tns_tmp2_map_1_3,
-    tns_tmp2_map_1_4
-};
-// @}
-
 static const int8_t tags_per_config[16] = { 0, 1, 1, 2, 3, 3, 4, 5, 0, 0, 0, 4, 5, 0, 5, 0 };
 
 static const uint8_t aac_channel_layout_map[16][5][3] = {
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 897c3a10..5a70da17 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -27,9 +27,10 @@
 /***********************************
  *              TODOs:
  * add sane pulse detection
- * add temporal noise shaping
  ***********************************/
 
+#include "libavutil/libm.h"
+#include "libavutil/thread.h"
 #include "libavutil/float_dsp.h"
 #include "libavutil/opt.h"
 #include "avcodec.h"
@@ -42,126 +43,12 @@
 #include "aac.h"
 #include "aactab.h"
 #include "aacenc.h"
+#include "aacenctab.h"
+#include "aacenc_utils.h"
 
 #include "psymodel.h"
 
-#define AAC_MAX_CHANNELS 6
-
-#define ERROR_IF(cond, ...) \
-    if (cond) { \
-        av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
-        return AVERROR(EINVAL); \
-    }
-
-#define WARN_IF(cond, ...) \
-    if (cond) { \
-        av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
-    }
-
-float ff_aac_pow34sf_tab[428];
-
-static const uint8_t swb_size_1024_96[] = {
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
-    12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
-    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
-};
-
-static const uint8_t swb_size_1024_64[] = {
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
-    12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
-    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
-};
-
-static const uint8_t swb_size_1024_48[] = {
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
-    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
-    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
-    96
-};
-
-static const uint8_t swb_size_1024_32[] = {
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
-    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
-    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
-};
-
-static const uint8_t swb_size_1024_24[] = {
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-    12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
-    32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
-};
-
-static const uint8_t swb_size_1024_16[] = {
-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
-    32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
-};
-
-static const uint8_t swb_size_1024_8[] = {
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
-    16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
-    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
-};
-
-static const uint8_t *swb_size_1024[] = {
-    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
-    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
-    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
-    swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
-    swb_size_1024_8
-};
-
-static const uint8_t swb_size_128_96[] = {
-    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
-};
-
-static const uint8_t swb_size_128_48[] = {
-    4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
-};
-
-static const uint8_t swb_size_128_24[] = {
-    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
-};
-
-static const uint8_t swb_size_128_16[] = {
-    4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
-};
-
-static const uint8_t swb_size_128_8[] = {
-    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
-};
-
-static const uint8_t *swb_size_128[] = {
-    /* the last entry on the following row is swb_size_128_64 but is a
-       duplicate of swb_size_128_96 */
-    swb_size_128_96, swb_size_128_96, swb_size_128_96,
-    swb_size_128_48, swb_size_128_48, swb_size_128_48,
-    swb_size_128_24, swb_size_128_24, swb_size_128_16,
-    swb_size_128_16, swb_size_128_16, swb_size_128_8,
-    swb_size_128_8
-};
-
-/** default channel configurations */
-static const uint8_t aac_chan_configs[6][5] = {
- {1, TYPE_SCE},                               // 1 channel  - single channel element
- {1, TYPE_CPE},                               // 2 channels - channel pair
- {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
- {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
- {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
- {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
-};
-
-/**
- * Table to remap channels from libavcodec's default order to AAC order.
- */
-static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
-    { 0 },
-    { 0, 1 },
-    { 2, 0, 1 },
-    { 2, 0, 1, 3 },
-    { 2, 0, 1, 3, 4 },
-    { 2, 0, 1, 4, 5, 3 },
-};
+static AVOnce aac_table_init = AV_ONCE_INIT;
 
 /**
  * Make AAC audio config object.
@@ -171,11 +58,12 @@ static void put_audio_specific_config(AVCodecContext *avctx)
 {
     PutBitContext pb;
     AACEncContext *s = avctx->priv_data;
+    int channels = s->channels - (s->channels == 8 ? 1 : 0);
 
     init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
-    put_bits(&pb, 5, 2); //object type - AAC-LC
+    put_bits(&pb, 5, s->profile+1); //profile
     put_bits(&pb, 4, s->samplerate_index); //sample rate index
-    put_bits(&pb, 4, s->channels);
+    put_bits(&pb, 4, channels);
     //GASpecificConfig
     put_bits(&pb, 1, 0); //frame length - 1024 samples
     put_bits(&pb, 1, 0); //does not depend on core coder
@@ -188,6 +76,16 @@ static void put_audio_specific_config(AVCodecContext *avctx)
     flush_put_bits(&pb);
 }
 
+void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
+{
+    int sf, g;
+    for (sf = 0; sf < 256; sf++) {
+        for (g = 0; g < 128; g++) {
+            s->quantize_band_cost_cache[sf][g].bits = -1;
+        }
+    }
+}
+
 #define WINDOW_FUNC(type) \
 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
                                     SingleChannelElement *sce, \
@@ -257,7 +155,7 @@ static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
                                   float *audio)
 {
     int i;
-    float *output = sce->ret_buf;
+    const float *output = sce->ret_buf;
 
     apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
 
@@ -265,7 +163,7 @@ static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
     else
         for (i = 0; i < 1024; i += 128)
-            s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
+            s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2);
     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
     memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
 }
@@ -283,7 +181,7 @@ static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
     put_bits(&s->pb, 1, info->use_kb_window[0]);
     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
         put_bits(&s->pb, 6, info->max_sfb);
-        put_bits(&s->pb, 1, 0);            // no prediction
+        put_bits(&s->pb, 1, !!info->predictor_present);
     } else {
         put_bits(&s->pb, 4, info->max_sfb);
         for (w = 1; w < 8; w++)
@@ -312,26 +210,14 @@ static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
 static void adjust_frame_information(ChannelElement *cpe, int chans)
 {
     int i, w, w2, g, ch;
-    int start, maxsfb, cmaxsfb;
+    int maxsfb, cmaxsfb;
 
     for (ch = 0; ch < chans; ch++) {
         IndividualChannelStream *ics = &cpe->ch[ch].ics;
-        start = 0;
         maxsfb = 0;
         cpe->ch[ch].pulse.num_pulse = 0;
         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
-            for (w2 = 0; w2 < ics->group_len[w]; w2++) {
-                start = (w+w2) * 128;
-                for (g = 0; g < ics->num_swb; g++) {
-                    //apply M/S
-                    if (cpe->common_window && !ch && cpe->ms_mask[w*16 + g]) {
-                        for (i = 0; i < ics->swb_sizes[g]; i++) {
-                            cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
-                            cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
-                        }
-                    }
-                    start += ics->swb_sizes[g];
-                }
+            for (w2 =  0; w2 < ics->group_len[w]; w2++) {
                 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
                     ;
                 maxsfb = FFMAX(maxsfb, cmaxsfb);
@@ -371,6 +257,67 @@ static void adjust_frame_information(ChannelElement *cpe, int chans)
     }
 }
 
+static void apply_intensity_stereo(ChannelElement *cpe)
+{
+    int w, w2, g, i;
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+    if (!cpe->common_window)
+        return;
+    for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
+        for (w2 =  0; w2 < ics->group_len[w]; w2++) {
+            int start = (w+w2) * 128;
+            for (g = 0; g < ics->num_swb; g++) {
+                int p  = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
+                float scale = cpe->ch[0].is_ener[w*16+g];
+                if (!cpe->is_mask[w*16 + g]) {
+                    start += ics->swb_sizes[g];
+                    continue;
+                }
+                if (cpe->ms_mask[w*16 + g])
+                    p *= -1;
+                for (i = 0; i < ics->swb_sizes[g]; i++) {
+                    float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
+                    cpe->ch[0].coeffs[start+i] = sum;
+                    cpe->ch[1].coeffs[start+i] = 0.0f;
+                }
+                start += ics->swb_sizes[g];
+            }
+        }
+    }
+}
+
+static void apply_mid_side_stereo(ChannelElement *cpe)
+{
+    int w, w2, g, i;
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+    if (!cpe->common_window)
+        return;
+    for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
+        for (w2 =  0; w2 < ics->group_len[w]; w2++) {
+            int start = (w+w2) * 128;
+            for (g = 0; g < ics->num_swb; g++) {
+                /* ms_mask can be used for other purposes in PNS and I/S,
+                 * so must not apply M/S if any band uses either, even if
+                 * ms_mask is set.
+                 */
+                if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g]
+                    || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT
+                    || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) {
+                    start += ics->swb_sizes[g];
+                    continue;
+                }
+                for (i = 0; i < ics->swb_sizes[g]; i++) {
+                    float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
+                    float R = L - cpe->ch[1].coeffs[start+i];
+                    cpe->ch[0].coeffs[start+i] = L;
+                    cpe->ch[1].coeffs[start+i] = R;
+                }
+                start += ics->swb_sizes[g];
+            }
+        }
+    }
+}
+
 /**
  * Encode scalefactor band coding type.
  */
@@ -378,6 +325,9 @@ static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
 {
     int w;
 
+    if (s->coder->set_special_band_scalefactors)
+        s->coder->set_special_band_scalefactors(s, sce);
+
     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
 }
@@ -389,7 +339,7 @@ static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
                                  SingleChannelElement *sce)
 {
     int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
-    int noise_flag = 1;
+    int off_is = 0, noise_flag = 1;
     int i, w;
 
     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
@@ -402,6 +352,10 @@ static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
                         put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
                         continue;
                     }
+                } else if (sce->band_type[w*16 + i] == INTENSITY_BT  ||
+                           sce->band_type[w*16 + i] == INTENSITY_BT2) {
+                    diff = sce->sf_idx[w*16 + i] - off_is;
+                    off_is = sce->sf_idx[w*16 + i];
                 } else {
                     diff = sce->sf_idx[w*16 + i] - off_sf;
                     off_sf = sce->sf_idx[w*16 + i];
@@ -447,17 +401,40 @@ static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
                 start += sce->ics.swb_sizes[i];
                 continue;
             }
-            for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
-                s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
-                                                   sce->ics.swb_sizes[i],
+            for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
+                s->coder->quantize_and_encode_band(s, &s->pb,
+                                                   &sce->coeffs[start + w2*128],
+                                                   NULL, sce->ics.swb_sizes[i],
                                                    sce->sf_idx[w*16 + i],
                                                    sce->band_type[w*16 + i],
-                                                   s->lambda);
+                                                   s->lambda,
+                                                   sce->ics.window_clipping[w]);
+            }
             start += sce->ics.swb_sizes[i];
         }
     }
 }
 
+/**
+ * Downscale spectral coefficients for near-clipping windows to avoid artifacts
+ */
+static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
+{
+    int start, i, j, w;
+
+    if (sce->ics.clip_avoidance_factor < 1.0f) {
+        for (w = 0; w < sce->ics.num_windows; w++) {
+            start = 0;
+            for (i = 0; i < sce->ics.max_sfb; i++) {
+                float *swb_coeffs = &sce->coeffs[start + w*128];
+                for (j = 0; j < sce->ics.swb_sizes[i]; j++)
+                    swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
+                start += sce->ics.swb_sizes[i];
+            }
+        }
+    }
+}
+
 /**
  * Encode one channel of audio data.
  */
@@ -466,12 +443,19 @@ static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
                                      int common_window)
 {
     put_bits(&s->pb, 8, sce->sf_idx[0]);
-    if (!common_window)
+    if (!common_window) {
         put_ics_info(s, &sce->ics);
+        if (s->coder->encode_main_pred)
+            s->coder->encode_main_pred(s, sce);
+        if (s->coder->encode_ltp_info)
+            s->coder->encode_ltp_info(s, sce, 0);
+    }
     encode_band_info(s, sce);
     encode_scale_factors(avctx, s, sce);
     encode_pulses(s, &sce->pulse);
-    put_bits(&s->pb, 1, 0); //tns
+    put_bits(&s->pb, 1, !!sce->tns.present);
+    if (s->coder->encode_tns_info)
+        s->coder->encode_tns_info(s, sce);
     put_bits(&s->pb, 1, 0); //ssr
     encode_spectral_coeffs(s, sce);
     return 0;
@@ -529,7 +513,11 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     AACEncContext *s = avctx->priv_data;
     float **samples = s->planar_samples, *samples2, *la, *overlap;
     ChannelElement *cpe;
-    int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0;
+    SingleChannelElement *sce;
+    IndividualChannelStream *ics;
+    int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
+    int target_bits, rate_bits, too_many_bits, too_few_bits;
+    int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
     int chan_el_counter[4];
     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
 
@@ -556,9 +544,12 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         chans    = tag == TYPE_CPE ? 2 : 1;
         cpe      = &s->cpe[i];
         for (ch = 0; ch < chans; ch++) {
-            IndividualChannelStream *ics = &cpe->ch[ch].ics;
-            int cur_channel = start_ch + ch;
-            overlap  = &samples[cur_channel][0];
+            int k;
+            float clip_avoidance_factor;
+            sce = &cpe->ch[ch];
+            ics = &sce->ics;
+            s->cur_channel = start_ch + ch;
+            overlap  = &samples[s->cur_channel][0];
             samples2 = overlap + 1024;
             la       = samples2 + (448+64);
             if (!frame)
@@ -575,7 +566,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                  */
                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
             } else {
-                wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
+                wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel,
                                               ics->window_sequence[0]);
             }
             ics->window_sequence[1] = ics->window_sequence[0];
@@ -585,27 +576,58 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
             ics->num_windows        = wi[ch].num_windows;
             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
+            ics->max_sfb            = FFMIN(ics->max_sfb, ics->num_swb);
+            ics->swb_offset         = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
+                                        ff_swb_offset_128 [s->samplerate_index]:
+                                        ff_swb_offset_1024[s->samplerate_index];
+            ics->tns_max_bands      = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
+                                        ff_tns_max_bands_128 [s->samplerate_index]:
+                                        ff_tns_max_bands_1024[s->samplerate_index];
+            clip_avoidance_factor = 0.0f;
             for (w = 0; w < ics->num_windows; w++)
                 ics->group_len[w] = wi[ch].grouping[w];
+            for (w = 0; w < ics->num_windows; w++) {
+                if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
+                    ics->window_clipping[w] = 1;
+                    clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
+                } else {
+                    ics->window_clipping[w] = 0;
+                }
+            }
+            if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
+                ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
+            } else {
+                ics->clip_avoidance_factor = 1.0f;
+            }
 
-            apply_window_and_mdct(s, &cpe->ch[ch], overlap);
-            if (isnan(cpe->ch->coeffs[0])) {
-                av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
-                return AVERROR(EINVAL);
+            apply_window_and_mdct(s, sce, overlap);
+
+            if (s->options.ltp && s->coder->update_ltp) {
+                s->coder->update_ltp(s, sce);
+                apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, &sce->ltp_state[0]);
+                s->mdct1024.mdct_calc(&s->mdct1024, sce->lcoeffs, sce->ret_buf);
             }
+
+            for (k = 0; k < 1024; k++) {
+                if (!isfinite(cpe->ch[ch].coeffs[k])) {
+                    av_log(avctx, AV_LOG_ERROR, "Input contains NaN/+-Inf\n");
+                    return AVERROR(EINVAL);
+                }
+            }
+            avoid_clipping(s, sce);
         }
         start_ch += chans;
     }
-    if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
         return ret;
+    frame_bits = its = 0;
     do {
-        int frame_bits;
-
         init_put_bits(&s->pb, avpkt->data, avpkt->size);
 
-        if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
+        if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
             put_bitstream_info(s, LIBAVCODEC_IDENT);
         start_ch = 0;
+        target_bits = 0;
         memset(chan_el_counter, 0, sizeof(chan_el_counter));
         for (i = 0; i < s->chan_map[0]; i++) {
             FFPsyWindowInfo* wi = windows + start_ch;
@@ -613,16 +635,39 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
             tag      = s->chan_map[i+1];
             chans    = tag == TYPE_CPE ? 2 : 1;
             cpe      = &s->cpe[i];
+            cpe->common_window = 0;
+            memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
+            memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
             put_bits(&s->pb, 3, tag);
             put_bits(&s->pb, 4, chan_el_counter[tag]++);
-            for (ch = 0; ch < chans; ch++)
-                coeffs[ch] = cpe->ch[ch].coeffs;
+            for (ch = 0; ch < chans; ch++) {
+                sce = &cpe->ch[ch];
+                coeffs[ch] = sce->coeffs;
+                sce->ics.predictor_present = 0;
+                sce->ics.ltp.present = 0;
+                memset(sce->ics.ltp.used, 0, sizeof(sce->ics.ltp.used));
+                memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
+                memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
+                for (w = 0; w < 128; w++)
+                    if (sce->band_type[w] > RESERVED_BT)
+                        sce->band_type[w] = 0;
+            }
+            s->psy.bitres.alloc = -1;
+            s->psy.bitres.bits = s->last_frame_pb_count / s->channels;
             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
+            if (s->psy.bitres.alloc > 0) {
+                /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
+                target_bits += s->psy.bitres.alloc
+                    * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
+                s->psy.bitres.alloc /= chans;
+            }
+            s->cur_type = tag;
             for (ch = 0; ch < chans; ch++) {
                 s->cur_channel = start_ch + ch;
+                if (s->options.pns && s->coder->mark_pns)
+                    s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
             }
-            cpe->common_window = 0;
             if (chans > 1
                 && wi[0].window_type[0] == wi[1].window_type[0]
                 && wi[0].window_shape   == wi[1].window_shape) {
@@ -635,22 +680,71 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                     }
                 }
             }
+            for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
+                sce = &cpe->ch[ch];
+                s->cur_channel = start_ch + ch;
+                if (s->options.tns && s->coder->search_for_tns)
+                    s->coder->search_for_tns(s, sce);
+                if (s->options.tns && s->coder->apply_tns_filt)
+                    s->coder->apply_tns_filt(s, sce);
+                if (sce->tns.present)
+                    tns_mode = 1;
+                if (s->options.pns && s->coder->search_for_pns)
+                    s->coder->search_for_pns(s, avctx, sce);
+            }
             s->cur_channel = start_ch;
-            if (s->options.stereo_mode && cpe->common_window) {
-                if (s->options.stereo_mode > 0) {
-                    IndividualChannelStream *ics = &cpe->ch[0].ics;
-                    for (w = 0; w < ics->num_windows; w += ics->group_len[w])
-                        for (g = 0;  g < ics->num_swb; g++)
-                            cpe->ms_mask[w*16+g] = 1;
-                } else if (s->coder->search_for_ms) {
-                    s->coder->search_for_ms(s, cpe, s->lambda);
+            if (s->options.intensity_stereo) { /* Intensity Stereo */
+                if (s->coder->search_for_is)
+                    s->coder->search_for_is(s, avctx, cpe);
+                if (cpe->is_mode) is_mode = 1;
+                apply_intensity_stereo(cpe);
+            }
+            if (s->options.pred) { /* Prediction */
+                for (ch = 0; ch < chans; ch++) {
+                    sce = &cpe->ch[ch];
+                    s->cur_channel = start_ch + ch;
+                    if (s->options.pred && s->coder->search_for_pred)
+                        s->coder->search_for_pred(s, sce);
+                    if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
+                }
+                if (s->coder->adjust_common_pred)
+                    s->coder->adjust_common_pred(s, cpe);
+                for (ch = 0; ch < chans; ch++) {
+                    sce = &cpe->ch[ch];
+                    s->cur_channel = start_ch + ch;
+                    if (s->options.pred && s->coder->apply_main_pred)
+                        s->coder->apply_main_pred(s, sce);
                 }
+                s->cur_channel = start_ch;
+            }
+            if (s->options.mid_side) { /* Mid/Side stereo */
+                if (s->options.mid_side == -1 && s->coder->search_for_ms)
+                    s->coder->search_for_ms(s, cpe);
+                else if (cpe->common_window)
+                    memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
+                apply_mid_side_stereo(cpe);
             }
             adjust_frame_information(cpe, chans);
+            if (s->options.ltp) { /* LTP */
+                for (ch = 0; ch < chans; ch++) {
+                    sce = &cpe->ch[ch];
+                    s->cur_channel = start_ch + ch;
+                    if (s->coder->search_for_ltp)
+                        s->coder->search_for_ltp(s, sce, cpe->common_window);
+                    if (sce->ics.ltp.present) pred_mode = 1;
+                }
+                s->cur_channel = start_ch;
+                if (s->coder->adjust_common_ltp)
+                    s->coder->adjust_common_ltp(s, cpe);
+            }
             if (chans == 2) {
                 put_bits(&s->pb, 1, cpe->common_window);
                 if (cpe->common_window) {
                     put_ics_info(s, &cpe->ch[0].ics);
+                    if (s->coder->encode_main_pred)
+                        s->coder->encode_main_pred(s, &cpe->ch[0]);
+                    if (s->coder->encode_ltp_info)
+                        s->coder->encode_ltp_info(s, &cpe->ch[0], 1);
                     encode_ms_info(&s->pb, cpe);
                     if (cpe->ms_mode) ms_mode = 1;
                 }
@@ -662,35 +756,77 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
             start_ch += chans;
         }
 
-        frame_bits = put_bits_count(&s->pb);
-        if (frame_bits <= 6144 * s->channels - 3) {
-            s->psy.bitres.bits = frame_bits / s->channels;
+        if (avctx->flags & CODEC_FLAG_QSCALE) {
+            /* When using a constant Q-scale, don't mess with lambda */
             break;
         }
-        if (ms_mode) {
-            for (i = 0; i < s->chan_map[0]; i++) {
-                // Must restore coeffs
-                chans = tag == TYPE_CPE ? 2 : 1;
-                cpe = &s->cpe[i];
-                for (ch = 0; ch < chans; ch++)
-                    memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
-            }
-        }
 
-        s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
+        /* rate control stuff
+         * allow between the nominal bitrate, and what psy's bit reservoir says to target
+         * but drift towards the nominal bitrate always
+         */
+        frame_bits = put_bits_count(&s->pb);
+        rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
+        rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
+        too_many_bits = FFMAX(target_bits, rate_bits);
+        too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
+        too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
+
+        /* When using ABR, be strict (but only for increasing) */
+        too_few_bits = too_few_bits - too_few_bits/8;
+        too_many_bits = too_many_bits + too_many_bits/2;
+
+        if (   its == 0 /* for steady-state Q-scale tracking */
+            || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
+            || frame_bits >= 6144 * s->channels - 3  )
+        {
+            float ratio = ((float)rate_bits) / frame_bits;
+
+            if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
+                /*
+                 * This path is for steady-state Q-scale tracking
+                 * When frame bits fall within the stable range, we still need to adjust
+                 * lambda to maintain it like so in a stable fashion (large jumps in lambda
+                 * create artifacts and should be avoided), but slowly
+                 */
+                ratio = sqrtf(sqrtf(ratio));
+                ratio = av_clipf(ratio, 0.9f, 1.1f);
+            } else {
+                /* Not so fast though */
+                ratio = sqrtf(ratio);
+            }
+            s->lambda = FFMIN(s->lambda * ratio, 65536.f);
 
+            /* Keep iterating if we must reduce and lambda is in the sky */
+            if (ratio > 0.9f && ratio < 1.1f) {
+                break;
+            } else {
+                if (is_mode || ms_mode || tns_mode || pred_mode) {
+                    for (i = 0; i < s->chan_map[0]; i++) {
+                        // Must restore coeffs
+                        chans = tag == TYPE_CPE ? 2 : 1;
+                        cpe = &s->cpe[i];
+                        for (ch = 0; ch < chans; ch++)
+                            memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
+                    }
+                }
+                its++;
+            }
+        } else {
+            break;
+        }
     } while (1);
 
+    if (s->options.ltp && s->coder->ltp_insert_new_frame)
+        s->coder->ltp_insert_new_frame(s);
+
     put_bits(&s->pb, 3, TYPE_END);
     flush_put_bits(&s->pb);
-    avctx->frame_bits = put_bits_count(&s->pb);
 
-    // rate control stuff
-    if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
-        float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
-        s->lambda *= ratio;
-        s->lambda = FFMIN(s->lambda, 65536.f);
-    }
+    s->last_frame_pb_count = put_bits_count(&s->pb);
+
+    s->lambda_sum += s->lambda;
+    s->lambda_count++;
 
     if (!frame)
         s->last_frame++;
@@ -707,9 +843,12 @@ static av_cold int aac_encode_end(AVCodecContext *avctx)
 {
     AACEncContext *s = avctx->priv_data;
 
+    av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
+
     ff_mdct_end(&s->mdct1024);
     ff_mdct_end(&s->mdct128);
     ff_psy_end(&s->psy);
+    ff_lpc_end(&s->lpc);
     if (s->psypp)
         ff_psy_preprocess_end(s->psypp);
     av_freep(&s->buffer.samples);
@@ -723,7 +862,7 @@ static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
 {
     int ret = 0;
 
-    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp)
         return AVERROR(ENOMEM);
 
@@ -746,7 +885,7 @@ static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
     int ch;
     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
-    FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
+    FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + AV_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
 
     for(ch = 0; ch < s->channels; ch++)
         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
@@ -756,6 +895,11 @@ static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
     return AVERROR(ENOMEM);
 }
 
+static av_cold void aac_encode_init_tables(void)
+{
+    ff_aac_tableinit();
+}
+
 static av_cold int aac_encode_init(AVCodecContext *avctx)
 {
     AACEncContext *s = avctx->priv_data;
@@ -764,32 +908,96 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     uint8_t grouping[AAC_MAX_CHANNELS];
     int lengths[2];
 
+    /* Constants */
+    s->last_frame_pb_count = 0;
+    avctx->extradata_size = 5;
     avctx->frame_size = 1024;
+    avctx->initial_padding = 1024;
+    s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
+
+    /* Channel map and unspecified bitrate guessing */
+    s->channels = avctx->channels;
+    ERROR_IF(s->channels > AAC_MAX_CHANNELS || s->channels == 7,
+             "Unsupported number of channels: %d\n", s->channels);
+    s->chan_map = aac_chan_configs[s->channels-1];
+    if (!avctx->bit_rate) {
+        for (i = 1; i <= s->chan_map[0]; i++) {
+            avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
+                               s->chan_map[i] == TYPE_LFE ? 16000  : /* LFE  */
+                                                            69000  ; /* SCE  */
+        }
+    }
 
+    /* Samplerate */
     for (i = 0; i < 16; i++)
         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
             break;
-
-    s->channels = avctx->channels;
-
-    ERROR_IF(i == 16
-                || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
-                || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
+    s->samplerate_index = i;
+    ERROR_IF(s->samplerate_index == 16 ||
+             s->samplerate_index >= ff_aac_swb_size_1024_len ||
+             s->samplerate_index >= ff_aac_swb_size_128_len,
              "Unsupported sample rate %d\n", avctx->sample_rate);
-    ERROR_IF(s->channels > AAC_MAX_CHANNELS,
-             "Unsupported number of channels: %d\n", s->channels);
-    ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
-             "Unsupported profile %d\n", avctx->profile);
-    WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
-             "Too many bits per frame requested, clamping to max\n");
-
-    avctx->bit_rate = (int)FFMIN(
-        6144 * s->channels / 1024.0 * avctx->sample_rate,
-        avctx->bit_rate);
 
-    s->samplerate_index = i;
+    /* Bitrate limiting */
+    WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
+             "Too many bits %f > %d per frame requested, clamping to max\n",
+             1024.0 * avctx->bit_rate / avctx->sample_rate,
+             6144 * s->channels);
+    avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
+                                     avctx->bit_rate);
+
+    /* Profile and option setting */
+    avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW :
+                     avctx->profile;
+    for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++)
+        if (avctx->profile == aacenc_profiles[i])
+            break;
+    if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW) {
+        avctx->profile = FF_PROFILE_AAC_LOW;
+        ERROR_IF(s->options.pred,
+                 "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n");
+        ERROR_IF(s->options.ltp,
+                 "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n");
+        WARN_IF(s->options.pns,
+                "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
+        s->options.pns = 0;
+    } else if (avctx->profile == FF_PROFILE_AAC_LTP) {
+        s->options.ltp = 1;
+        ERROR_IF(s->options.pred,
+                 "Main prediction unavailable in the \"aac_ltp\" profile\n");
+    } else if (avctx->profile == FF_PROFILE_AAC_MAIN) {
+        s->options.pred = 1;
+        ERROR_IF(s->options.ltp,
+                 "LTP prediction unavailable in the \"aac_main\" profile\n");
+    } else if (s->options.ltp) {
+        avctx->profile = FF_PROFILE_AAC_LTP;
+        WARN_IF(1,
+                "Chainging profile to \"aac_ltp\"\n");
+        ERROR_IF(s->options.pred,
+                 "Main prediction unavailable in the \"aac_ltp\" profile\n");
+    } else if (s->options.pred) {
+        avctx->profile = FF_PROFILE_AAC_MAIN;
+        WARN_IF(1,
+                "Chainging profile to \"aac_main\"\n");
+        ERROR_IF(s->options.ltp,
+                 "LTP prediction unavailable in the \"aac_main\" profile\n");
+    }
+    s->profile = avctx->profile;
+
+    /* Coder limitations */
+    s->coder = &ff_aac_coders[s->options.coder];
+    if (s->options.coder != AAC_CODER_TWOLOOP) {
+        ERROR_IF(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
+                 "Coders other than twoloop require -strict -2 and some may be removed in the future\n");
+        s->options.intensity_stereo = 0;
+        s->options.pns = 0;
+    }
+    ERROR_IF(s->options.ltp && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
+             "The LPT profile requires experimental compliance, add -strict -2 to enable!\n");
 
-    s->chan_map = aac_chan_configs[s->channels-1];
+    /* M/S introduces horrible artifacts with multichannel files, this is temporary */
+    if (s->channels > 3)
+        s->options.mid_side = 0;
 
     if ((ret = dsp_init(avctx, s)) < 0)
         goto fail;
@@ -797,32 +1005,27 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     if ((ret = alloc_buffers(avctx, s)) < 0)
         goto fail;
 
-    avctx->extradata_size = 5;
     put_audio_specific_config(avctx);
 
-    sizes[0]   = swb_size_1024[i];
-    sizes[1]   = swb_size_128[i];
-    lengths[0] = ff_aac_num_swb_1024[i];
-    lengths[1] = ff_aac_num_swb_128[i];
+    sizes[0]   = ff_aac_swb_size_1024[s->samplerate_index];
+    sizes[1]   = ff_aac_swb_size_128[s->samplerate_index];
+    lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
+    lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
     for (i = 0; i < s->chan_map[0]; i++)
         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
     if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
                            s->chan_map[0], grouping)) < 0)
         goto fail;
     s->psypp = ff_psy_preprocess_init(avctx);
-    s->coder = &ff_aac_coders[s->options.aac_coder];
+    ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON);
+    av_lfg_init(&s->lfg, 0x72adca55);
 
-    if (HAVE_MIPSDSPR1)
+    if (HAVE_MIPSDSP)
         ff_aac_coder_init_mips(s);
 
-    s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
+    if ((ret = ff_thread_once(&aac_table_init, &aac_encode_init_tables)) != 0)
+        return AVERROR_UNKNOWN;
 
-    ff_aac_tableinit();
-
-    for (i = 0; i < 428; i++)
-        ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
-
-    avctx->initial_padding = 1024;
     ff_af_queue_init(avctx, &s->afq);
 
     return 0;
@@ -833,18 +1036,16 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
 
 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
 static const AVOption aacenc_options[] = {
-    {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
-        {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
-        {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
-        {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
-    {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
-        {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
-        {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
-        {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
-        {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
-    {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"},
-        {"disable",  "Disable PNS", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
-        {"enable",   "Enable PNS (Proof of concept)",  0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
+    {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
+        {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+        {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+        {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
+    {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
+    {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
+    {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
+    {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
+    {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
+    {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
     {NULL}
 };
 
@@ -855,11 +1056,9 @@ static const AVClass aacenc_class = {
     LIBAVUTIL_VERSION_INT,
 };
 
-/* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
- * failures */
-static const int mpeg4audio_sample_rates[16] = {
-    96000, 88200, 64000, 48000, 44100, 32000,
-    24000, 22050, 16000, 12000, 11025, 8000, 7350
+static const AVCodecDefault aac_encode_defaults[] = {
+    { "b", "0" },
+    { NULL }
 };
 
 AVCodec ff_aac_encoder = {
@@ -871,9 +1070,10 @@ AVCodec ff_aac_encoder = {
     .init           = aac_encode_init,
     .encode2        = aac_encode_frame,
     .close          = aac_encode_end,
+    .defaults       = aac_encode_defaults,
     .supported_samplerates = mpeg4audio_sample_rates,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
-                      CODEC_CAP_EXPERIMENTAL,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
+    .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
                                                      AV_SAMPLE_FMT_NONE },
     .priv_class     = &aacenc_class,
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 7c1f277f..2252e298 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -23,6 +23,7 @@
 #define AVCODEC_AACENC_H
 
 #include "libavutil/float_dsp.h"
+#include "libavutil/lfg.h"
 #include "avcodec.h"
 #include "put_bits.h"
 
@@ -30,9 +31,10 @@
 #include "audio_frame_queue.h"
 #include "psymodel.h"
 
+#include "lpc.h"
+
 typedef enum AACCoder {
-    AAC_CODER_FAAC = 0,
-    AAC_CODER_ANMR,
+    AAC_CODER_ANMR = 0,
     AAC_CODER_TWOLOOP,
     AAC_CODER_FAST,
 
@@ -40,9 +42,13 @@ typedef enum AACCoder {
 }AACCoder;
 
 typedef struct AACEncOptions {
-    int stereo_mode;
-    int aac_coder;
+    int coder;
     int pns;
+    int tns;
+    int ltp;
+    int pred;
+    int mid_side;
+    int intensity_stereo;
 } AACEncOptions;
 
 struct AACEncContext;
@@ -52,13 +58,38 @@ typedef struct AACCoefficientsEncoder {
                                   SingleChannelElement *sce, const float lambda);
     void (*encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce,
                                      int win, int group_len, const float lambda);
-    void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, int size,
-                                     int scale_idx, int cb, const float lambda);
-    void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda);
+    void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size,
+                                     int scale_idx, int cb, const float lambda, int rtz);
+    void (*encode_tns_info)(struct AACEncContext *s, SingleChannelElement *sce);
+    void (*encode_ltp_info)(struct AACEncContext *s, SingleChannelElement *sce, int common_window);
+    void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
+    void (*adjust_common_pred)(struct AACEncContext *s, ChannelElement *cpe);
+    void (*adjust_common_ltp)(struct AACEncContext *s, ChannelElement *cpe);
+    void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
+    void (*apply_tns_filt)(struct AACEncContext *s, SingleChannelElement *sce);
+    void (*update_ltp)(struct AACEncContext *s, SingleChannelElement *sce);
+    void (*ltp_insert_new_frame)(struct AACEncContext *s);
+    void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
+    void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
+    void (*mark_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
+    void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);
+    void (*search_for_ltp)(struct AACEncContext *s, SingleChannelElement *sce, int common_window);
+    void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe);
+    void (*search_for_is)(struct AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe);
+    void (*search_for_pred)(struct AACEncContext *s, SingleChannelElement *sce);
 } AACCoefficientsEncoder;
 
 extern AACCoefficientsEncoder ff_aac_coders[];
 
+typedef struct AACQuantizeBandCostCacheEntry {
+    float rd;
+    float energy;
+    int bits; ///< -1 means uninitialized entry
+    char cb;
+    char rtz;
+    char padding[2]; ///< Keeps the entry size a multiple of 32 bits
+} AACQuantizeBandCostCacheEntry;
+
 /**
  * AAC encoder context
  */
@@ -69,8 +100,11 @@ typedef struct AACEncContext {
     FFTContext mdct1024;                         ///< long (1024 samples) frame transform context
     FFTContext mdct128;                          ///< short (128 samples) frame transform context
     AVFloatDSPContext *fdsp;
-    float *planar_samples[6];                    ///< saved preprocessed input
+    AVLFG lfg;                                   ///< PRNG needed for PNS
+    float *planar_samples[8];                    ///< saved preprocessed input
 
+    int profile;                                 ///< copied from avctx
+    LPCContext lpc;                              ///< used by TNS
     int samplerate_index;                        ///< MPEG-4 samplerate index
     int channels;                                ///< channel count
     const uint8_t *chan_map;                     ///< channel configuration map
@@ -79,20 +113,28 @@ typedef struct AACEncContext {
     FFPsyContext psy;
     struct FFPsyPreprocessContext* psypp;
     AACCoefficientsEncoder *coder;
-    int cur_channel;
+    int cur_channel;                             ///< current channel for coder context
     int last_frame;
+    int random_state;
     float lambda;
+    int last_frame_pb_count;                     ///< number of bits for the previous frame
+    float lambda_sum;                            ///< sum(lambda), for Qvg reporting
+    int lambda_count;                            ///< count(lambda), for Qvg reporting
+    enum RawDataBlockType cur_type;              ///< channel group type cur_channel belongs to
+
     AudioFrameQueue afq;
     DECLARE_ALIGNED(16, int,   qcoefs)[96];      ///< quantized coefficients
     DECLARE_ALIGNED(32, float, scoefs)[1024];    ///< scaled coefficients
 
+    AACQuantizeBandCostCacheEntry quantize_band_cost_cache[256][128]; ///< memoization area for quantize_band_cost
+
     struct {
         float *samples;
     } buffer;
 } AACEncContext;
 
-extern float ff_aac_pow34sf_tab[428];
-
 void ff_aac_coder_init_mips(AACEncContext *c);
+void ff_quantize_band_cost_cache_init(struct AACEncContext *s);
+
 
 #endif /* AVCODEC_AACENC_H */
diff --git a/libavcodec/aacenc_is.c b/libavcodec/aacenc_is.c
new file mode 100644
index 00000000..473897b1
--- /dev/null
+++ b/libavcodec/aacenc_is.c
@@ -0,0 +1,158 @@
+/*
+ * AAC encoder intensity stereo
+ * Copyright (C) 2015 Rostislav Pehlivanov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder Intensity Stereo
+ * @author Rostislav Pehlivanov ( atomnuker gmail com )
+ */
+
+#include "aacenc.h"
+#include "aacenc_utils.h"
+#include "aacenc_is.h"
+#include "aacenc_quantization.h"
+
+struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe,
+                                         int start, int w, int g, float ener0,
+                                         float ener1, float ener01,
+                                         int use_pcoeffs, int phase)
+{
+    int i, w2;
+    SingleChannelElement *sce0 = &cpe->ch[0];
+    SingleChannelElement *sce1 = &cpe->ch[1];
+    float *L = use_pcoeffs ? sce0->pcoeffs : sce0->coeffs;
+    float *R = use_pcoeffs ? sce1->pcoeffs : sce1->coeffs;
+    float *L34 = &s->scoefs[256*0], *R34 = &s->scoefs[256*1];
+    float *IS  = &s->scoefs[256*2], *I34 = &s->scoefs[256*3];
+    float dist1 = 0.0f, dist2 = 0.0f;
+    struct AACISError is_error = {0};
+
+    if (ener01 <= 0 || ener0 <= 0) {
+        is_error.pass = 0;
+        return is_error;
+    }
+
+    for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+        FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
+        FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
+        int is_band_type, is_sf_idx = FFMAX(1, sce0->sf_idx[w*16+g]-4);
+        float e01_34 = phase*pos_pow34(ener1/ener0);
+        float maxval, dist_spec_err = 0.0f;
+        float minthr = FFMIN(band0->threshold, band1->threshold);
+        for (i = 0; i < sce0->ics.swb_sizes[g]; i++)
+            IS[i] = (L[start+(w+w2)*128+i] + phase*R[start+(w+w2)*128+i])*sqrt(ener0/ener01);
+        abs_pow34_v(L34, &L[start+(w+w2)*128], sce0->ics.swb_sizes[g]);
+        abs_pow34_v(R34, &R[start+(w+w2)*128], sce0->ics.swb_sizes[g]);
+        abs_pow34_v(I34, IS,                   sce0->ics.swb_sizes[g]);
+        maxval = find_max_val(1, sce0->ics.swb_sizes[g], I34);
+        is_band_type = find_min_book(maxval, is_sf_idx);
+        dist1 += quantize_band_cost(s, &L[start + (w+w2)*128], L34,
+                                    sce0->ics.swb_sizes[g],
+                                    sce0->sf_idx[w*16+g],
+                                    sce0->band_type[w*16+g],
+                                    s->lambda / band0->threshold, INFINITY, NULL, NULL, 0);
+        dist1 += quantize_band_cost(s, &R[start + (w+w2)*128], R34,
+                                    sce1->ics.swb_sizes[g],
+                                    sce1->sf_idx[w*16+g],
+                                    sce1->band_type[w*16+g],
+                                    s->lambda / band1->threshold, INFINITY, NULL, NULL, 0);
+        dist2 += quantize_band_cost(s, IS, I34, sce0->ics.swb_sizes[g],
+                                    is_sf_idx, is_band_type,
+                                    s->lambda / minthr, INFINITY, NULL, NULL, 0);
+        for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+            dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]);
+            dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34);
+        }
+        dist_spec_err *= s->lambda / minthr;
+        dist2 += dist_spec_err;
+    }
+
+    is_error.pass = dist2 <= dist1;
+    is_error.phase = phase;
+    is_error.error = dist2 - dist1;
+    is_error.dist1 = dist1;
+    is_error.dist2 = dist2;
+    is_error.ener01 = ener01;
+
+    return is_error;
+}
+
+void ff_aac_search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe)
+{
+    SingleChannelElement *sce0 = &cpe->ch[0];
+    SingleChannelElement *sce1 = &cpe->ch[1];
+    int start = 0, count = 0, w, w2, g, i, prev_sf1 = -1, prev_bt = -1, prev_is = 0;
+    const float freq_mult = avctx->sample_rate/(1024.0f/sce0->ics.num_windows)/2.0f;
+    uint8_t nextband1[128];
+
+    if (!cpe->common_window)
+        return;
+
+    /** Scout out next nonzero bands */
+    ff_init_nextband_map(sce1, nextband1);
+
+    for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
+        start = 0;
+        for (g = 0;  g < sce0->ics.num_swb; g++) {
+            if (start*freq_mult > INT_STEREO_LOW_LIMIT*(s->lambda/170.0f) &&
+                cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] &&
+                cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g] &&
+                ff_sfdelta_can_remove_band(sce1, nextband1, prev_sf1, w*16+g)) {
+                float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f, ener01p = 0.0f;
+                struct AACISError ph_err1, ph_err2, *best;
+                for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+                        float coef0 = sce0->coeffs[start+(w+w2)*128+i];
+                        float coef1 = sce1->coeffs[start+(w+w2)*128+i];
+                        ener0  += coef0*coef0;
+                        ener1  += coef1*coef1;
+                        ener01 += (coef0 + coef1)*(coef0 + coef1);
+                        ener01p += (coef0 - coef1)*(coef0 - coef1);
+                    }
+                }
+                ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
+                                                 ener0, ener1, ener01p, 0, -1);
+                ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
+                                                 ener0, ener1, ener01, 0, +1);
+                best = (ph_err1.pass && ph_err1.error < ph_err2.error) ? &ph_err1 : &ph_err2;
+                if (best->pass) {
+                    cpe->is_mask[w*16+g] = 1;
+                    cpe->ms_mask[w*16+g] = 0;
+                    cpe->ch[0].is_ener[w*16+g] = sqrt(ener0 / best->ener01);
+                    cpe->ch[1].is_ener[w*16+g] = ener0/ener1;
+                    cpe->ch[1].band_type[w*16+g] = (best->phase > 0) ? INTENSITY_BT : INTENSITY_BT2;
+                    if (prev_is && prev_bt != cpe->ch[1].band_type[w*16+g]) {
+                        /** Flip M/S mask and pick the other CB, since it encodes more efficiently */
+                        cpe->ms_mask[w*16+g] = 1;
+                        cpe->ch[1].band_type[w*16+g] = (best->phase > 0) ? INTENSITY_BT2 : INTENSITY_BT;
+                    }
+                    prev_bt = cpe->ch[1].band_type[w*16+g];
+                    count++;
+                }
+            }
+            if (!sce1->zeroes[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
+                prev_sf1 = sce1->sf_idx[w*16+g];
+            prev_is = cpe->is_mask[w*16+g];
+            start += sce0->ics.swb_sizes[g];
+        }
+    }
+    cpe->is_mode = !!count;
+}
diff --git a/libavcodec/aacenc_is.h b/libavcodec/aacenc_is.h
new file mode 100644
index 00000000..269fd1a9
--- /dev/null
+++ b/libavcodec/aacenc_is.h
@@ -0,0 +1,51 @@
+/*
+ * AAC encoder intensity stereo
+ * Copyright (C) 2015 Rostislav Pehlivanov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder Intensity Stereo
+ * @author Rostislav Pehlivanov ( atomnuker gmail com )
+ */
+
+#ifndef AVCODEC_AACENC_IS_H
+#define AVCODEC_AACENC_IS_H
+
+#include "aacenc.h"
+
+/** Frequency in Hz for lower limit of intensity stereo **/
+#define INT_STEREO_LOW_LIMIT 6100
+
+struct AACISError {
+    int pass;    /* 1 if dist2 <= dist1  */
+    int phase;   /* -1 or +1             */
+    float error; /* fabs(dist1 - dist2)  */
+    float dist1; /* From original coeffs */
+    float dist2; /* From IS'd coeffs     */
+    float ener01;
+};
+
+struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe,
+                                         int start, int w, int g, float ener0,
+                                         float ener1, float ener01,
+                                         int use_pcoeffs, int phase);
+void ff_aac_search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe);
+
+#endif /* AVCODEC_AACENC_IS_H */
diff --git a/libavcodec/aacenc_ltp.c b/libavcodec/aacenc_ltp.c
new file mode 100644
index 00000000..b9d43b47
--- /dev/null
+++ b/libavcodec/aacenc_ltp.c
@@ -0,0 +1,236 @@
+/*
+ * AAC encoder long term prediction extension
+ * Copyright (C) 2015 Rostislav Pehlivanov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder long term prediction extension
+ * @author Rostislav Pehlivanov ( atomnuker gmail com )
+ */
+
+#include "aacenc_ltp.h"
+#include "aacenc_quantization.h"
+#include "aacenc_utils.h"
+
+/**
+ * Encode LTP data.
+ */
+void ff_aac_encode_ltp_info(AACEncContext *s, SingleChannelElement *sce,
+                            int common_window)
+{
+    int i;
+    IndividualChannelStream *ics = &sce->ics;
+    if (s->profile != FF_PROFILE_AAC_LTP || !ics->predictor_present)
+        return;
+    if (common_window)
+        put_bits(&s->pb, 1, 0);
+    put_bits(&s->pb, 1, ics->ltp.present);
+    if (!ics->ltp.present)
+        return;
+    put_bits(&s->pb, 11, ics->ltp.lag);
+    put_bits(&s->pb, 3,  ics->ltp.coef_idx);
+    for (i = 0; i < FFMIN(ics->max_sfb, MAX_LTP_LONG_SFB); i++)
+        put_bits(&s->pb, 1, ics->ltp.used[i]);
+}
+
+void ff_aac_ltp_insert_new_frame(AACEncContext *s)
+{
+    int i, ch, tag, chans, cur_channel, start_ch = 0;
+    ChannelElement *cpe;
+    SingleChannelElement *sce;
+    for (i = 0; i < s->chan_map[0]; i++) {
+        cpe = &s->cpe[i];
+        tag      = s->chan_map[i+1];
+        chans    = tag == TYPE_CPE ? 2 : 1;
+        for (ch = 0; ch < chans; ch++) {
+            sce = &cpe->ch[ch];
+            cur_channel = start_ch + ch;
+            /* New sample + overlap */
+            memcpy(&sce->ltp_state[0],    &sce->ltp_state[1024], 1024*sizeof(sce->ltp_state[0]));
+            memcpy(&sce->ltp_state[1024], &s->planar_samples[cur_channel][2048], 1024*sizeof(sce->ltp_state[0]));
+            memcpy(&sce->ltp_state[2048], &sce->ret_buf[0], 1024*sizeof(sce->ltp_state[0]));
+            sce->ics.ltp.lag = 0;
+        }
+        start_ch += chans;
+    }
+}
+
+static void get_lag(float *buf, const float *new, LongTermPrediction *ltp)
+{
+    int i, j, lag, max_corr = 0;
+    float max_ratio;
+    for (i = 0; i < 2048; i++) {
+        float corr, s0 = 0.0f, s1 = 0.0f;
+        const int start = FFMAX(0, i - 1024);
+        for (j = start; j < 2048; j++) {
+            const int idx = j - i + 1024;
+            s0 += new[j]*buf[idx];
+            s1 += buf[idx]*buf[idx];
+        }
+        corr = s1 > 0.0f ? s0/sqrt(s1) : 0.0f;
+        if (corr > max_corr) {
+            max_corr = corr;
+            lag = i;
+            max_ratio = corr/(2048-start);
+        }
+    }
+    ltp->lag = FFMAX(av_clip_uintp2(lag, 11), 0);
+    ltp->coef_idx = quant_array_idx(max_ratio, ltp_coef, 8);
+    ltp->coef = ltp_coef[ltp->coef_idx];
+}
+
+static void generate_samples(float *buf, LongTermPrediction *ltp)
+{
+    int i, samples_num = 2048;
+    if (!ltp->lag) {
+        ltp->present = 0;
+        return;
+    } else if (ltp->lag < 1024) {
+        samples_num = ltp->lag + 1024;
+    }
+    for (i = 0; i < samples_num; i++)
+        buf[i] = ltp->coef*buf[i + 2048 - ltp->lag];
+    memset(&buf[i], 0, (2048 - i)*sizeof(float));
+}
+
+/**
+ * Process LTP parameters
+ * @see Patent WO2006070265A1
+ */
+void ff_aac_update_ltp(AACEncContext *s, SingleChannelElement *sce)
+{
+    float *pred_signal = &sce->ltp_state[0];
+    const float *samples = &s->planar_samples[s->cur_channel][1024];
+
+    if (s->profile != FF_PROFILE_AAC_LTP)
+        return;
+
+    /* Calculate lag */
+    get_lag(pred_signal, samples, &sce->ics.ltp);
+    generate_samples(pred_signal, &sce->ics.ltp);
+}
+
+void ff_aac_adjust_common_ltp(AACEncContext *s, ChannelElement *cpe)
+{
+    int sfb, count = 0;
+    SingleChannelElement *sce0 = &cpe->ch[0];
+    SingleChannelElement *sce1 = &cpe->ch[1];
+
+    if (!cpe->common_window ||
+        sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE ||
+        sce1->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        sce0->ics.ltp.present = 0;
+        return;
+    }
+
+    for (sfb = 0; sfb < FFMIN(sce0->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++) {
+        int sum = sce0->ics.ltp.used[sfb] + sce1->ics.ltp.used[sfb];
+        if (sum != 2) {
+            sce0->ics.ltp.used[sfb] = 0;
+        } else if (sum == 2) {
+            count++;
+        }
+    }
+
+    sce0->ics.ltp.present = !!count;
+    sce0->ics.predictor_present = !!count;
+}
+
+/**
+ * Mark LTP sfb's
+ */
+void ff_aac_search_for_ltp(AACEncContext *s, SingleChannelElement *sce,
+                           int common_window)
+{
+    int w, g, w2, i, start = 0, count = 0;
+    int saved_bits = -(15 + FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB));
+    float *C34 = &s->scoefs[128*0], *PCD = &s->scoefs[128*1];
+    float *PCD34 = &s->scoefs[128*2];
+    const int max_ltp = FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB);
+
+    if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        if (sce->ics.ltp.lag) {
+            memset(&sce->ltp_state[0], 0, 3072*sizeof(sce->ltp_state[0]));
+            memset(&sce->ics.ltp, 0, sizeof(LongTermPrediction));
+        }
+        return;
+    }
+
+    if (!sce->ics.ltp.lag || s->lambda > 120.0f)
+        return;
+
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        start = 0;
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            int bits1 = 0, bits2 = 0;
+            float dist1 = 0.0f, dist2 = 0.0f;
+            if (w*16+g > max_ltp) {
+                start += sce->ics.swb_sizes[g];
+                continue;
+            }
+            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                int bits_tmp1, bits_tmp2;
+                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+                for (i = 0; i < sce->ics.swb_sizes[g]; i++)
+                    PCD[i] = sce->coeffs[start+(w+w2)*128+i] - sce->lcoeffs[start+(w+w2)*128+i];
+                abs_pow34_v(C34,  &sce->coeffs[start+(w+w2)*128],  sce->ics.swb_sizes[g]);
+                abs_pow34_v(PCD34, PCD, sce->ics.swb_sizes[g]);
+                dist1 += quantize_band_cost(s, &sce->coeffs[start+(w+w2)*128], C34, sce->ics.swb_sizes[g],
+                                            sce->sf_idx[(w+w2)*16+g], sce->band_type[(w+w2)*16+g],
+                                            s->lambda/band->threshold, INFINITY, &bits_tmp1, NULL, 0);
+                dist2 += quantize_band_cost(s, PCD, PCD34, sce->ics.swb_sizes[g],
+                                            sce->sf_idx[(w+w2)*16+g],
+                                            sce->band_type[(w+w2)*16+g],
+                                            s->lambda/band->threshold, INFINITY, &bits_tmp2, NULL, 0);
+                bits1 += bits_tmp1;
+                bits2 += bits_tmp2;
+            }
+            if (dist2 < dist1 && bits2 < bits1) {
+                for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
+                    for (i = 0; i < sce->ics.swb_sizes[g]; i++)
+                        sce->coeffs[start+(w+w2)*128+i] -= sce->lcoeffs[start+(w+w2)*128+i];
+                sce->ics.ltp.used[w*16+g] = 1;
+                saved_bits += bits1 - bits2;
+                count++;
+            }
+            start += sce->ics.swb_sizes[g];
+        }
+    }
+
+    sce->ics.ltp.present = !!count && (saved_bits >= 0);
+    sce->ics.predictor_present = !!sce->ics.ltp.present;
+
+    /* Reset any marked sfbs */
+    if (!sce->ics.ltp.present && !!count) {
+        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+            start = 0;
+            for (g = 0;  g < sce->ics.num_swb; g++) {
+                if (sce->ics.ltp.used[w*16+g]) {
+                    for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                        for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
+                            sce->coeffs[start+(w+w2)*128+i] += sce->lcoeffs[start+(w+w2)*128+i];
+                        }
+                    }
+                }
+                start += sce->ics.swb_sizes[g];
+            }
+        }
+    }
+}
diff --git a/libavcodec/aacenc_ltp.h b/libavcodec/aacenc_ltp.h
new file mode 100644
index 00000000..72768784
--- /dev/null
+++ b/libavcodec/aacenc_ltp.h
@@ -0,0 +1,41 @@
+/*
+ * AAC encoder long term prediction extension
+ * Copyright (C) 2015 Rostislav Pehlivanov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder long term prediction extension
+ * @author Rostislav Pehlivanov ( atomnuker gmail com )
+ */
+
+#ifndef AVCODEC_AACENC_LTP_H
+#define AVCODEC_AACENC_LTP_H
+
+#include "aacenc.h"
+
+void ff_aac_encode_ltp_info(AACEncContext *s, SingleChannelElement *sce,
+                            int common_window);
+void ff_aac_update_ltp(AACEncContext *s, SingleChannelElement *sce);
+void ff_aac_adjust_common_ltp(AACEncContext *s, ChannelElement *cpe);
+void ff_aac_ltp_insert_new_frame(AACEncContext *s);
+void ff_aac_search_for_ltp(AACEncContext *s, SingleChannelElement *sce,
+                           int common_window);
+
+#endif /* AVCODEC_AACENC_LTP_H */
diff --git a/libavcodec/aacenc_pred.c b/libavcodec/aacenc_pred.c
new file mode 100644
index 00000000..e77a3de9
--- /dev/null
+++ b/libavcodec/aacenc_pred.c
@@ -0,0 +1,347 @@
+/*
+ * AAC encoder main-type prediction
+ * Copyright (C) 2015 Rostislav Pehlivanov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder main-type prediction
+ * @author Rostislav Pehlivanov ( atomnuker gmail com )
+ */
+
+#include "aactab.h"
+#include "aacenc_pred.h"
+#include "aacenc_utils.h"
+#include "aacenc_is.h"            /* <- Needed for common window distortions */
+#include "aacenc_quantization.h"
+
+#define RESTORE_PRED(sce, sfb) \
+        if (sce->ics.prediction_used[sfb]) {\
+            sce->ics.prediction_used[sfb] = 0;\
+            sce->band_type[sfb] = sce->band_alt[sfb];\
+        }
+
+static inline float flt16_round(float pf)
+{
+    union av_intfloat32 tmp;
+    tmp.f = pf;
+    tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
+    return tmp.f;
+}
+
+static inline float flt16_even(float pf)
+{
+    union av_intfloat32 tmp;
+    tmp.f = pf;
+    tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
+    return tmp.f;
+}
+
+static inline float flt16_trunc(float pf)
+{
+    union av_intfloat32 pun;
+    pun.f = pf;
+    pun.i &= 0xFFFF0000U;
+    return pun.f;
+}
+
+static inline void predict(PredictorState *ps, float *coef, float *rcoef, int set)
+{
+    float k2;
+    const float a     = 0.953125; // 61.0 / 64
+    const float alpha = 0.90625;  // 29.0 / 32
+    const float   k1 = ps->k1;
+    const float   r0 = ps->r0,     r1 = ps->r1;
+    const float cor0 = ps->cor0, cor1 = ps->cor1;
+    const float var0 = ps->var0, var1 = ps->var1;
+    const float e0 = *coef - ps->x_est;
+    const float e1 = e0 - k1 * r0;
+
+    if (set)
+        *coef = e0;
+
+    ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
+    ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
+    ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
+    ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
+    ps->r1   = flt16_trunc(a * (r0 - k1 * e0));
+    ps->r0   = flt16_trunc(a * e0);
+
+    /* Prediction for next frame */
+    ps->k1   = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
+    k2       = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
+    *rcoef   = ps->x_est = flt16_round(ps->k1*ps->r0 + k2*ps->r1);
+}
+
+static inline void reset_predict_state(PredictorState *ps)
+{
+    ps->r0    = 0.0f;
+    ps->r1    = 0.0f;
+    ps->k1    = 0.0f;
+    ps->cor0  = 0.0f;
+    ps->cor1  = 0.0f;
+    ps->var0  = 1.0f;
+    ps->var1  = 1.0f;
+    ps->x_est = 0.0f;
+}
+
+static inline void reset_all_predictors(PredictorState *ps)
+{
+    int i;
+    for (i = 0; i < MAX_PREDICTORS; i++)
+        reset_predict_state(&ps[i]);
+}
+
+static inline void reset_predictor_group(SingleChannelElement *sce, int group_num)
+{
+    int i;
+    PredictorState *ps = sce->predictor_state;
+    for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
+        reset_predict_state(&ps[i]);
+}
+
+void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce)
+{
+    int sfb, k;
+    const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
+
+    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
+        for (sfb = 0; sfb < pmax; sfb++) {
+            for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
+                predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k],
+                        sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
+            }
+        }
+        if (sce->ics.predictor_reset_group) {
+            reset_predictor_group(sce, sce->ics.predictor_reset_group);
+        }
+    } else {
+        reset_all_predictors(sce->predictor_state);
+    }
+}
+
+/* If inc = 0 you can check if this returns 0 to see if you can reset freely */
+static inline int update_counters(IndividualChannelStream *ics, int inc)
+{
+    int i;
+    for (i = 1; i < 31; i++) {
+        ics->predictor_reset_count[i] += inc;
+        if (ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN)
+            return i; /* Reset this immediately */
+    }
+    return 0;
+}
+
+void ff_aac_adjust_common_pred(AACEncContext *s, ChannelElement *cpe)
+{
+    int start, w, w2, g, i, count = 0;
+    SingleChannelElement *sce0 = &cpe->ch[0];
+    SingleChannelElement *sce1 = &cpe->ch[1];
+    const int pmax0 = FFMIN(sce0->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
+    const int pmax1 = FFMIN(sce1->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
+    const int pmax  = FFMIN(pmax0, pmax1);
+
+    if (!cpe->common_window ||
+        sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE ||
+        sce1->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
+        return;
+
+    for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
+        start = 0;
+        for (g = 0; g < sce0->ics.num_swb; g++) {
+            int sfb = w*16+g;
+            int sum = sce0->ics.prediction_used[sfb] + sce1->ics.prediction_used[sfb];
+            float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
+            struct AACISError ph_err1, ph_err2, *erf;
+            if (sfb < PRED_SFB_START || sfb > pmax || sum != 2) {
+                RESTORE_PRED(sce0, sfb);
+                RESTORE_PRED(sce1, sfb);
+                start += sce0->ics.swb_sizes[g];
+                continue;
+            }
+            for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+                for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+                    float coef0 = sce0->pcoeffs[start+(w+w2)*128+i];
+                    float coef1 = sce1->pcoeffs[start+(w+w2)*128+i];
+                    ener0  += coef0*coef0;
+                    ener1  += coef1*coef1;
+                    ener01 += (coef0 + coef1)*(coef0 + coef1);
+                }
+            }
+            ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
+                                             ener0, ener1, ener01, 1, -1);
+            ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
+                                             ener0, ener1, ener01, 1, +1);
+            erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
+            if (erf->pass) {
+                sce0->ics.prediction_used[sfb] = 1;
+                sce1->ics.prediction_used[sfb] = 1;
+                count++;
+            } else {
+                RESTORE_PRED(sce0, sfb);
+                RESTORE_PRED(sce1, sfb);
+            }
+            start += sce0->ics.swb_sizes[g];
+        }
+    }
+
+    sce1->ics.predictor_present = sce0->ics.predictor_present = !!count;
+}
+
+static void update_pred_resets(SingleChannelElement *sce)
+{
+    int i, max_group_id_c, max_frame = 0;
+    float avg_frame = 0.0f;
+    IndividualChannelStream *ics = &sce->ics;
+
+    /* Update the counters and immediately update any frame behind schedule */
+    if ((ics->predictor_reset_group = update_counters(&sce->ics, 1)))
+        return;
+
+    for (i = 1; i < 31; i++) {
+        /* Count-based */
+        if (ics->predictor_reset_count[i] > max_frame) {
+            max_group_id_c = i;
+            max_frame = ics->predictor_reset_count[i];
+        }
+        avg_frame = (ics->predictor_reset_count[i] + avg_frame)/2;
+    }
+
+    if (max_frame > PRED_RESET_MIN) {
+        ics->predictor_reset_group = max_group_id_c;
+    } else {
+        ics->predictor_reset_group = 0;
+    }
+}
+
+void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
+{
+    int sfb, i, count = 0, cost_coeffs = 0, cost_pred = 0;
+    const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
+    float *O34  = &s->scoefs[128*0], *P34 = &s->scoefs[128*1];
+    float *SENT = &s->scoefs[128*2], *S34 = &s->scoefs[128*3];
+    float *QERR = &s->scoefs[128*4];
+
+    if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        sce->ics.predictor_present = 0;
+        return;
+    }
+
+    if (!sce->ics.predictor_initialized) {
+        reset_all_predictors(sce->predictor_state);
+        sce->ics.predictor_initialized = 1;
+        memcpy(sce->prcoeffs, sce->coeffs, 1024*sizeof(float));
+        for (i = 1; i < 31; i++)
+            sce->ics.predictor_reset_count[i] = i;
+    }
+
+    update_pred_resets(sce);
+    memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
+
+    for (sfb = PRED_SFB_START; sfb < pmax; sfb++) {
+        int cost1, cost2, cb_p;
+        float dist1, dist2, dist_spec_err = 0.0f;
+        const int cb_n = sce->zeroes[sfb] ? 0 : sce->band_type[sfb];
+        const int cb_min = sce->zeroes[sfb] ? 0 : 1;
+        const int cb_max = sce->zeroes[sfb] ? 0 : RESERVED_BT;
+        const int start_coef = sce->ics.swb_offset[sfb];
+        const int num_coeffs = sce->ics.swb_offset[sfb + 1] - start_coef;
+        const FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb];
+
+        if (start_coef + num_coeffs > MAX_PREDICTORS ||
+            (s->cur_channel && sce->band_type[sfb] >= INTENSITY_BT2) ||
+            sce->band_type[sfb] == NOISE_BT)
+            continue;
+
+        /* Normal coefficients */
+        abs_pow34_v(O34, &sce->coeffs[start_coef], num_coeffs);
+        dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL,
+                                              O34, num_coeffs, sce->sf_idx[sfb],
+                                              cb_n, s->lambda / band->threshold, INFINITY, &cost1, NULL, 0);
+        cost_coeffs += cost1;
+
+        /* Encoded coefficients - needed for #bits, band type and quant. error */
+        for (i = 0; i < num_coeffs; i++)
+            SENT[i] = sce->coeffs[start_coef + i] - sce->prcoeffs[start_coef + i];
+        abs_pow34_v(S34, SENT, num_coeffs);
+        if (cb_n < RESERVED_BT)
+            cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, S34), sce->sf_idx[sfb]), cb_min, cb_max);
+        else
+            cb_p = cb_n;
+        quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs,
+                                      sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY,
+                                      &cost2, NULL, 0);
+
+        /* Reconstructed coefficients - needed for distortion measurements */
+        for (i = 0; i < num_coeffs; i++)
+            sce->prcoeffs[start_coef + i] += QERR[i] != 0.0f ? (sce->prcoeffs[start_coef + i] - QERR[i]) : 0.0f;
+        abs_pow34_v(P34, &sce->prcoeffs[start_coef], num_coeffs);
+        if (cb_n < RESERVED_BT)
+            cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, P34), sce->sf_idx[sfb]), cb_min, cb_max);
+        else
+            cb_p = cb_n;
+        dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL,
+                                              P34, num_coeffs, sce->sf_idx[sfb],
+                                              cb_p, s->lambda / band->threshold, INFINITY, NULL, NULL, 0);
+        for (i = 0; i < num_coeffs; i++)
+            dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]);
+        dist_spec_err *= s->lambda / band->threshold;
+        dist2 += dist_spec_err;
+
+        if (dist2 <= dist1 && cb_p <= cb_n) {
+            cost_pred += cost2;
+            sce->ics.prediction_used[sfb] = 1;
+            sce->band_alt[sfb]  = cb_n;
+            sce->band_type[sfb] = cb_p;
+            count++;
+        } else {
+            cost_pred += cost1;
+            sce->band_alt[sfb] = cb_p;
+        }
+    }
+
+    if (count && cost_coeffs < cost_pred) {
+        count = 0;
+        for (sfb = PRED_SFB_START; sfb < pmax; sfb++)
+            RESTORE_PRED(sce, sfb);
+        memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
+    }
+
+    sce->ics.predictor_present = !!count;
+}
+
+/**
+ * Encoder predictors data.
+ */
+void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce)
+{
+    int sfb;
+    IndividualChannelStream *ics = &sce->ics;
+    const int pmax = FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
+
+    if (s->profile != FF_PROFILE_AAC_MAIN ||
+        !ics->predictor_present)
+        return;
+
+    put_bits(&s->pb, 1, !!ics->predictor_reset_group);
+    if (ics->predictor_reset_group)
+        put_bits(&s->pb, 5, ics->predictor_reset_group);
+    for (sfb = 0; sfb < pmax; sfb++)
+        put_bits(&s->pb, 1, ics->prediction_used[sfb]);
+}
diff --git a/libavcodec/aacenc_pred.h b/libavcodec/aacenc_pred.h
new file mode 100644
index 00000000..aa305f45
--- /dev/null
+++ b/libavcodec/aacenc_pred.h
@@ -0,0 +1,47 @@
+/*
+ * AAC encoder main-type prediction
+ * Copyright (C) 2015 Rostislav Pehlivanov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder main-type prediction
+ * @author Rostislav Pehlivanov ( atomnuker gmail com )
+ */
+
+#ifndef AVCODEC_AACENC_PRED_H
+#define AVCODEC_AACENC_PRED_H
+
+#include "aacenc.h"
+
+/* Every predictor group needs to get reset at least once in this many frames */
+#define PRED_RESET_FRAME_MIN 240
+
+/* Any frame with less than this amount of frames since last reset is ok */
+#define PRED_RESET_MIN 64
+
+/* Raise to filter any low frequency artifacts due to prediction */
+#define PRED_SFB_START 10
+
+void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce);
+void ff_aac_adjust_common_pred(AACEncContext *s, ChannelElement *cpe);
+void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce);
+void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce);
+
+#endif /* AVCODEC_AACENC_PRED_H */
diff --git a/libavcodec/aacenc_quantization.h b/libavcodec/aacenc_quantization.h
new file mode 100644
index 00000000..42504074
--- /dev/null
+++ b/libavcodec/aacenc_quantization.h
@@ -0,0 +1,283 @@
+/*
+ * AAC encoder quantizer
+ * Copyright (C) 2015 Rostislav Pehlivanov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder quantizer
+ * @author Rostislav Pehlivanov ( atomnuker gmail com )
+ */
+
+#ifndef AVCODEC_AACENC_QUANTIZATION_H
+#define AVCODEC_AACENC_QUANTIZATION_H
+
+#include "aactab.h"
+#include "aacenc.h"
+#include "aacenctab.h"
+#include "aacenc_utils.h"
+
+/**
+ * Calculate rate distortion cost for quantizing with given codebook
+ *
+ * @return quantization distortion
+ */
+static av_always_inline float quantize_and_encode_band_cost_template(
+                                struct AACEncContext *s,
+                                PutBitContext *pb, const float *in, float *out,
+                                const float *scaled, int size, int scale_idx,
+                                int cb, const float lambda, const float uplim,
+                                int *bits, float *energy, int BT_ZERO, int BT_UNSIGNED,
+                                int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO,
+                                const float ROUNDING)
+{
+    const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512;
+    const float Q   = ff_aac_pow2sf_tab [q_idx];
+    const float Q34 = ff_aac_pow34sf_tab[q_idx];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
+    const float CLIPPED_ESCAPE = 165140.0f*IQ;
+    int i, j;
+    float cost = 0;
+    float qenergy = 0;
+    const int dim = BT_PAIR ? 2 : 4;
+    int resbits = 0;
+    int off;
+
+    if (BT_ZERO || BT_NOISE || BT_STEREO) {
+        for (i = 0; i < size; i++)
+            cost += in[i]*in[i];
+        if (bits)
+            *bits = 0;
+        if (energy)
+            *energy = qenergy;
+        if (out) {
+            for (i = 0; i < size; i += dim)
+                for (j = 0; j < dim; j++)
+                    out[i+j] = 0.0f;
+        }
+        return cost * lambda;
+    }
+    if (!scaled) {
+        abs_pow34_v(s->scoefs, in, size);
+        scaled = s->scoefs;
+    }
+    quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, aac_cb_maxval[cb], ROUNDING);
+    if (BT_UNSIGNED) {
+        off = 0;
+    } else {
+        off = aac_cb_maxval[cb];
+    }
+    for (i = 0; i < size; i += dim) {
+        const float *vec;
+        int *quants = s->qcoefs + i;
+        int curidx = 0;
+        int curbits;
+        float quantized, rd = 0.0f;
+        for (j = 0; j < dim; j++) {
+            curidx *= aac_cb_range[cb];
+            curidx += quants[j] + off;
+        }
+        curbits =  ff_aac_spectral_bits[cb-1][curidx];
+        vec     = &ff_aac_codebook_vectors[cb-1][curidx*dim];
+        if (BT_UNSIGNED) {
+            for (j = 0; j < dim; j++) {
+                float t = fabsf(in[i+j]);
+                float di;
+                if (BT_ESC && vec[j] == 64.0f) { //FIXME: slow
+                    if (t >= CLIPPED_ESCAPE) {
+                        quantized = CLIPPED_ESCAPE;
+                        curbits += 21;
+                    } else {
+                        int c = av_clip_uintp2(quant(t, Q, ROUNDING), 13);
+                        quantized = c*cbrtf(c)*IQ;
+                        curbits += av_log2(c)*2 - 4 + 1;
+                    }
+                } else {
+                    quantized = vec[j]*IQ;
+                }
+                di = t - quantized;
+                if (out)
+                    out[i+j] = in[i+j] >= 0 ? quantized : -quantized;
+                if (vec[j] != 0.0f)
+                    curbits++;
+                qenergy += quantized*quantized;
+                rd += di*di;
+            }
+        } else {
+            for (j = 0; j < dim; j++) {
+                quantized = vec[j]*IQ;
+                qenergy += quantized*quantized;
+                if (out)
+                    out[i+j] = quantized;
+                rd += (in[i+j] - quantized)*(in[i+j] - quantized);
+            }
+        }
+        cost    += rd * lambda + curbits;
+        resbits += curbits;
+        if (cost >= uplim)
+            return uplim;
+        if (pb) {
+            put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
+            if (BT_UNSIGNED)
+                for (j = 0; j < dim; j++)
+                    if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
+                        put_bits(pb, 1, in[i+j] < 0.0f);
+            if (BT_ESC) {
+                for (j = 0; j < 2; j++) {
+                    if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
+                        int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q, ROUNDING), 13);
+                        int len = av_log2(coef);
+
+                        put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
+                        put_sbits(pb, len, coef);
+                    }
+                }
+            }
+        }
+    }
+
+    if (bits)
+        *bits = resbits;
+    if (energy)
+        *energy = qenergy;
+    return cost;
+}
+
+static inline float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitContext *pb,
+                                                const float *in, float *quant, const float *scaled,
+                                                int size, int scale_idx, int cb,
+                                                const float lambda, const float uplim,
+                                                int *bits, float *energy) {
+    av_assert0(0);
+    return 0.0f;
+}
+
+#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, ROUNDING) \
+static float quantize_and_encode_band_cost_ ## NAME(                                         \
+                                struct AACEncContext *s,                                     \
+                                PutBitContext *pb, const float *in, float *quant,            \
+                                const float *scaled, int size, int scale_idx,                \
+                                int cb, const float lambda, const float uplim,               \
+                                int *bits, float *energy) {                                  \
+    return quantize_and_encode_band_cost_template(                                           \
+                                s, pb, in, quant, scaled, size, scale_idx,                   \
+                                BT_ESC ? ESC_BT : cb, lambda, uplim, bits, energy,           \
+                                BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO,  \
+                                ROUNDING);                                                   \
+}
+
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO,  1, 0, 0, 0, 0, 0, ROUND_STANDARD)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0, ROUND_STANDARD)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0, ROUND_STANDARD)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0, ROUND_STANDARD)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0, ROUND_STANDARD)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC,   0, 1, 1, 1, 0, 0, ROUND_STANDARD)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC_RTZ, 0, 1, 1, 1, 0, 0, ROUND_TO_ZERO)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0, ROUND_STANDARD)
+QUANTIZE_AND_ENCODE_BAND_COST_FUNC(STEREO,0, 0, 0, 0, 0, 1, ROUND_STANDARD)
+
+static float (*const quantize_and_encode_band_cost_arr[])(
+                                struct AACEncContext *s,
+                                PutBitContext *pb, const float *in, float *quant,
+                                const float *scaled, int size, int scale_idx,
+                                int cb, const float lambda, const float uplim,
+                                int *bits, float *energy) = {
+    quantize_and_encode_band_cost_ZERO,
+    quantize_and_encode_band_cost_SQUAD,
+    quantize_and_encode_band_cost_SQUAD,
+    quantize_and_encode_band_cost_UQUAD,
+    quantize_and_encode_band_cost_UQUAD,
+    quantize_and_encode_band_cost_SPAIR,
+    quantize_and_encode_band_cost_SPAIR,
+    quantize_and_encode_band_cost_UPAIR,
+    quantize_and_encode_band_cost_UPAIR,
+    quantize_and_encode_band_cost_UPAIR,
+    quantize_and_encode_band_cost_UPAIR,
+    quantize_and_encode_band_cost_ESC,
+    quantize_and_encode_band_cost_NONE,     /* CB 12 doesn't exist */
+    quantize_and_encode_band_cost_NOISE,
+    quantize_and_encode_band_cost_STEREO,
+    quantize_and_encode_band_cost_STEREO,
+};
+
+static float (*const quantize_and_encode_band_cost_rtz_arr[])(
+                                struct AACEncContext *s,
+                                PutBitContext *pb, const float *in, float *quant,
+                                const float *scaled, int size, int scale_idx,
+                                int cb, const float lambda, const float uplim,
+                                int *bits, float *energy) = {
+    quantize_and_encode_band_cost_ZERO,
+    quantize_and_encode_band_cost_SQUAD,
+    quantize_and_encode_band_cost_SQUAD,
+    quantize_and_encode_band_cost_UQUAD,
+    quantize_and_encode_band_cost_UQUAD,
+    quantize_and_encode_band_cost_SPAIR,
+    quantize_and_encode_band_cost_SPAIR,
+    quantize_and_encode_band_cost_UPAIR,
+    quantize_and_encode_band_cost_UPAIR,
+    quantize_and_encode_band_cost_UPAIR,
+    quantize_and_encode_band_cost_UPAIR,
+    quantize_and_encode_band_cost_ESC_RTZ,
+    quantize_and_encode_band_cost_NONE,     /* CB 12 doesn't exist */
+    quantize_and_encode_band_cost_NOISE,
+    quantize_and_encode_band_cost_STEREO,
+    quantize_and_encode_band_cost_STEREO,
+};
+
+#define quantize_and_encode_band_cost(                                  \
+                                s, pb, in, quant, scaled, size, scale_idx, cb, \
+                                lambda, uplim, bits, energy, rtz)               \
+    ((rtz) ? quantize_and_encode_band_cost_rtz_arr : quantize_and_encode_band_cost_arr)[cb]( \
+                                s, pb, in, quant, scaled, size, scale_idx, cb, \
+                                lambda, uplim, bits, energy)
+
+static inline float quantize_band_cost(struct AACEncContext *s, const float *in,
+                                const float *scaled, int size, int scale_idx,
+                                int cb, const float lambda, const float uplim,
+                                int *bits, float *energy, int rtz)
+{
+    return quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx,
+                                         cb, lambda, uplim, bits, energy, rtz);
+}
+
+static inline int quantize_band_cost_bits(struct AACEncContext *s, const float *in,
+                                const float *scaled, int size, int scale_idx,
+                                int cb, const float lambda, const float uplim,
+                                int *bits, float *energy, int rtz)
+{
+    int auxbits;
+    quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx,
+                                         cb, 0.0f, uplim, &auxbits, energy, rtz);
+    if (bits) {
+        *bits = auxbits;
+    }
+    return auxbits;
+}
+
+static inline void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
+                                            const float *in, float *out, int size, int scale_idx,
+                                            int cb, const float lambda, int rtz)
+{
+    quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
+                                  INFINITY, NULL, NULL, rtz);
+}
+
+#include "aacenc_quantization_misc.h"
+
+#endif /* AVCODEC_AACENC_QUANTIZATION_H */
diff --git a/libavcodec/aacenc_quantization_misc.h b/libavcodec/aacenc_quantization_misc.h
new file mode 100644
index 00000000..eaa71c96
--- /dev/null
+++ b/libavcodec/aacenc_quantization_misc.h
@@ -0,0 +1,52 @@
+/*
+ * AAC encoder quantization
+ * Copyright (C) 2015 Claudio Freire
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder quantization misc reusable function templates
+ * @author Claudio Freire ( klaussfreire gmail com )
+ */
+
+#ifndef AVCODEC_AACENC_QUANTIZATION_MISC_H
+#define AVCODEC_AACENC_QUANTIZATION_MISC_H
+
+static inline float quantize_band_cost_cached(struct AACEncContext *s, int w, int g, const float *in,
+                                const float *scaled, int size, int scale_idx,
+                                int cb, const float lambda, const float uplim,
+                                int *bits, float *energy, int rtz)
+{
+    AACQuantizeBandCostCacheEntry *entry;
+    av_assert1(scale_idx >= 0 && scale_idx < 256);
+    entry = &s->quantize_band_cost_cache[scale_idx][w*16+g];
+    if (entry->bits < 0 || entry->cb != cb || entry->rtz != rtz) {
+        entry->rd = quantize_band_cost(s, in, scaled, size, scale_idx,
+                                       cb, lambda, uplim, &entry->bits, &entry->energy, rtz);
+        entry->cb = cb;
+        entry->rtz = rtz;
+    }
+    if (bits)
+        *bits = entry->bits;
+    if (energy)
+        *energy = entry->energy;
+    return entry->rd;
+}
+
+#endif /* AVCODEC_AACENC_QUANTIZATION_MISC_H */
diff --git a/libavcodec/aacenc_tns.c b/libavcodec/aacenc_tns.c
new file mode 100644
index 00000000..2ffe1f8d
--- /dev/null
+++ b/libavcodec/aacenc_tns.c
@@ -0,0 +1,215 @@
+/*
+ * AAC encoder TNS
+ * Copyright (C) 2015 Rostislav Pehlivanov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder temporal noise shaping
+ * @author Rostislav Pehlivanov ( atomnuker gmail com )
+ */
+
+#include "libavutil/libm.h"
+#include "aacenc.h"
+#include "aacenc_tns.h"
+#include "aactab.h"
+#include "aacenc_utils.h"
+#include "aacenc_quantization.h"
+
+/* Could be set to 3 to save an additional bit at the cost of little quality */
+#define TNS_Q_BITS 4
+
+/* Coefficient resolution in short windows */
+#define TNS_Q_BITS_IS8 4
+
+/* We really need the bits we save here elsewhere */
+#define TNS_ENABLE_COEF_COMPRESSION
+
+/* TNS will only be used if the LPC gain is within these margins */
+#define TNS_GAIN_THRESHOLD_LOW      1.4f
+#define TNS_GAIN_THRESHOLD_HIGH     1.16f*TNS_GAIN_THRESHOLD_LOW
+
+static inline int compress_coeffs(int *coef, int order, int c_bits)
+{
+    int i;
+    const int low_idx   = c_bits ?  4 : 2;
+    const int shift_val = c_bits ?  8 : 4;
+    const int high_idx  = c_bits ? 11 : 5;
+#ifndef TNS_ENABLE_COEF_COMPRESSION
+    return 0;
+#endif /* TNS_ENABLE_COEF_COMPRESSION */
+    for (i = 0; i < order; i++)
+        if (coef[i] >= low_idx && coef[i] <= high_idx)
+            return 0;
+    for (i = 0; i < order; i++)
+        coef[i] -= (coef[i] > high_idx) ? shift_val : 0;
+    return 1;
+}
+
+/**
+ * Encode TNS data.
+ * Coefficient compression is simply not lossless as it should be
+ * on any decoder tested and as such is not active.
+ */
+void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
+{
+    TemporalNoiseShaping *tns = &sce->tns;
+    int i, w, filt, coef_compress = 0, coef_len;
+    const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
+    const int c_bits = is8 ? TNS_Q_BITS_IS8 == 4 : TNS_Q_BITS == 4;
+
+    if (!sce->tns.present)
+        return;
+
+    for (i = 0; i < sce->ics.num_windows; i++) {
+        put_bits(&s->pb, 2 - is8, sce->tns.n_filt[i]);
+        if (!tns->n_filt[i])
+            continue;
+        put_bits(&s->pb, 1, c_bits);
+        for (filt = 0; filt < tns->n_filt[i]; filt++) {
+            put_bits(&s->pb, 6 - 2 * is8, tns->length[i][filt]);
+            put_bits(&s->pb, 5 - 2 * is8, tns->order[i][filt]);
+            if (!tns->order[i][filt])
+                continue;
+            put_bits(&s->pb, 1, tns->direction[i][filt]);
+            coef_compress = compress_coeffs(tns->coef_idx[i][filt],
+                                            tns->order[i][filt], c_bits);
+            put_bits(&s->pb, 1, coef_compress);
+            coef_len = c_bits + 3 - coef_compress;
+            for (w = 0; w < tns->order[i][filt]; w++)
+                put_bits(&s->pb, coef_len, tns->coef_idx[i][filt][w]);
+        }
+    }
+}
+
+/* Apply TNS filter */
+void ff_aac_apply_tns(AACEncContext *s, SingleChannelElement *sce)
+{
+    TemporalNoiseShaping *tns = &sce->tns;
+    IndividualChannelStream *ics = &sce->ics;
+    int w, filt, m, i, top, order, bottom, start, end, size, inc;
+    const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
+    float lpc[TNS_MAX_ORDER];
+
+    for (w = 0; w < ics->num_windows; w++) {
+        bottom = ics->num_swb;
+        for (filt = 0; filt < tns->n_filt[w]; filt++) {
+            top    = bottom;
+            bottom = FFMAX(0, top - tns->length[w][filt]);
+            order  = tns->order[w][filt];
+            if (order == 0)
+                continue;
+
+            // tns_decode_coef
+            compute_lpc_coefs(tns->coef[w][filt], order, lpc, 0, 0, 0);
+
+            start = ics->swb_offset[FFMIN(bottom, mmm)];
+            end   = ics->swb_offset[FFMIN(   top, mmm)];
+            if ((size = end - start) <= 0)
+                continue;
+            if (tns->direction[w][filt]) {
+                inc = -1;
+                start = end - 1;
+            } else {
+                inc = 1;
+            }
+            start += w * 128;
+
+            /* AR filter */
+            for (m = 0; m < size; m++, start += inc) {
+                for (i = 1; i <= FFMIN(m, order); i++) {
+                    sce->coeffs[start] += lpc[i-1]*sce->pcoeffs[start - i*inc];
+                }
+            }
+        }
+    }
+}
+
+/*
+ * c_bits - 1 if 4 bit coefficients, 0 if 3 bit coefficients
+ */
+static inline void quantize_coefs(double *coef, int *idx, float *lpc, int order,
+                                  int c_bits)
+{
+    int i;
+    const float *quant_arr = tns_tmp2_map[c_bits];
+    for (i = 0; i < order; i++) {
+        idx[i] = quant_array_idx(coef[i], quant_arr, c_bits ? 16 : 8);
+        lpc[i] = quant_arr[idx[i]];
+    }
+}
+
+/*
+ * 3 bits per coefficient with 8 short windows
+ */
+void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
+{
+    TemporalNoiseShaping *tns = &sce->tns;
+    int w, g, count = 0;
+    double gain, coefs[MAX_LPC_ORDER];
+    const int mmm = FFMIN(sce->ics.tns_max_bands, sce->ics.max_sfb);
+    const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
+    const int c_bits = is8 ? TNS_Q_BITS_IS8 == 4 : TNS_Q_BITS == 4;
+    const int sfb_start = av_clip(tns_min_sfb[is8][s->samplerate_index], 0, mmm);
+    const int sfb_end   = av_clip(sce->ics.num_swb, 0, mmm);
+    const int order = is8 ? 7 : s->profile == FF_PROFILE_AAC_LOW ? 12 : TNS_MAX_ORDER;
+    const int slant = sce->ics.window_sequence[0] == LONG_STOP_SEQUENCE  ? 1 :
+                      sce->ics.window_sequence[0] == LONG_START_SEQUENCE ? 0 : 2;
+    const int sfb_len = sfb_end - sfb_start;
+    const int coef_len = sce->ics.swb_offset[sfb_end] - sce->ics.swb_offset[sfb_start];
+
+    if (coef_len <= 0 || sfb_len <= 0) {
+        sce->tns.present = 0;
+        return;
+    }
+
+    for (w = 0; w < sce->ics.num_windows; w++) {
+        float en[2] = {0.0f, 0.0f};
+        int oc_start = 0, os_start = 0;
+        int coef_start = sce->ics.swb_offset[sfb_start];
+
+        for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
+            FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
+            if (g > sfb_start + (sfb_len/2))
+                en[1] += band->energy;
+            else
+                en[0] += band->energy;
+        }
+
+        /* LPC */
+        gain = ff_lpc_calc_ref_coefs_f(&s->lpc, &sce->coeffs[w*128 + coef_start],
+                                       coef_len, order, coefs);
+
+        if (!order || !isfinite(gain) || gain < TNS_GAIN_THRESHOLD_LOW || gain > TNS_GAIN_THRESHOLD_HIGH)
+            continue;
+
+        tns->n_filt[w] = is8 ? 1 : order != TNS_MAX_ORDER ? 2 : 3;
+        for (g = 0; g < tns->n_filt[w]; g++) {
+            tns->direction[w][g] = slant != 2 ? slant : en[g] < en[!g];
+            tns->order[w][g] = g < tns->n_filt[w] ? order/tns->n_filt[w] : order - oc_start;
+            tns->length[w][g] = g < tns->n_filt[w] ? sfb_len/tns->n_filt[w] : sfb_len - os_start;
+            quantize_coefs(&coefs[oc_start], tns->coef_idx[w][g], tns->coef[w][g],
+                            tns->order[w][g], c_bits);
+            oc_start += tns->order[w][g];
+            os_start += tns->length[w][g];
+        }
+        count++;
+    }
+    sce->tns.present = !!count;
+}
diff --git a/libavcodec/aacenc_tns.h b/libavcodec/aacenc_tns.h
new file mode 100644
index 00000000..466738dd
--- /dev/null
+++ b/libavcodec/aacenc_tns.h
@@ -0,0 +1,37 @@
+/*
+ * AAC encoder TNS
+ * Copyright (C) 2015 Rostislav Pehlivanov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder temporal noise shaping
+ * @author Rostislav Pehlivanov ( atomnuker gmail com )
+ */
+
+#ifndef AVCODEC_AACENC_TNS_H
+#define AVCODEC_AACENC_TNS_H
+
+#include "aacenc.h"
+
+void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce);
+void ff_aac_apply_tns(AACEncContext *s, SingleChannelElement *sce);
+void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce);
+
+#endif /* AVCODEC_AACENC_TNS_H */
diff --git a/libavcodec/aacenc_utils.h b/libavcodec/aacenc_utils.h
new file mode 100644
index 00000000..07f73374
--- /dev/null
+++ b/libavcodec/aacenc_utils.h
@@ -0,0 +1,266 @@
+/*
+ * AAC encoder utilities
+ * Copyright (C) 2015 Rostislav Pehlivanov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder utilities
+ * @author Rostislav Pehlivanov ( atomnuker gmail com )
+ */
+
+#ifndef AVCODEC_AACENC_UTILS_H
+#define AVCODEC_AACENC_UTILS_H
+
+#include "libavutil/internal.h"
+#include "aac.h"
+#include "aacenctab.h"
+#include "aactab.h"
+
+#define ROUND_STANDARD 0.4054f
+#define ROUND_TO_ZERO 0.1054f
+#define C_QUANT 0.4054f
+
+static inline void abs_pow34_v(float *out, const float *in, const int size)
+{
+    int i;
+    for (i = 0; i < size; i++) {
+        float a = fabsf(in[i]);
+        out[i] = sqrtf(a * sqrtf(a));
+    }
+}
+
+static inline float pos_pow34(float a)
+{
+    return sqrtf(a * sqrtf(a));
+}
+
+/**
+ * Quantize one coefficient.
+ * @return absolute value of the quantized coefficient
+ * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
+ */
+static inline int quant(float coef, const float Q, const float rounding)
+{
+    float a = coef * Q;
+    return sqrtf(a * sqrtf(a)) + rounding;
+}
+
+static inline void quantize_bands(int *out, const float *in, const float *scaled,
+                                  int size, float Q34, int is_signed, int maxval,
+                                  const float rounding)
+{
+    int i;
+    for (i = 0; i < size; i++) {
+        float qc = scaled[i] * Q34;
+        int tmp = (int)FFMIN(qc + rounding, (float)maxval);
+        if (is_signed && in[i] < 0.0f) {
+            tmp = -tmp;
+        }
+        out[i] = tmp;
+    }
+}
+
+static inline float find_max_val(int group_len, int swb_size, const float *scaled)
+{
+    float maxval = 0.0f;
+    int w2, i;
+    for (w2 = 0; w2 < group_len; w2++) {
+        for (i = 0; i < swb_size; i++) {
+            maxval = FFMAX(maxval, scaled[w2*128+i]);
+        }
+    }
+    return maxval;
+}
+
+static inline int find_min_book(float maxval, int sf)
+{
+    float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
+    int qmaxval, cb;
+    qmaxval = maxval * Q34 + C_QUANT;
+    if (qmaxval >= (FF_ARRAY_ELEMS(aac_maxval_cb)))
+        cb = 11;
+    else
+        cb = aac_maxval_cb[qmaxval];
+    return cb;
+}
+
+static inline float find_form_factor(int group_len, int swb_size, float thresh,
+                                     const float *scaled, float nzslope) {
+    const float iswb_size = 1.0f / swb_size;
+    const float iswb_sizem1 = 1.0f / (swb_size - 1);
+    const float ethresh = thresh;
+    float form = 0.0f, weight = 0.0f;
+    int w2, i;
+    for (w2 = 0; w2 < group_len; w2++) {
+        float e = 0.0f, e2 = 0.0f, var = 0.0f, maxval = 0.0f;
+        float nzl = 0;
+        for (i = 0; i < swb_size; i++) {
+            float s = fabsf(scaled[w2*128+i]);
+            maxval = FFMAX(maxval, s);
+            e += s;
+            e2 += s *= s;
+            /* We really don't want a hard non-zero-line count, since
+             * even below-threshold lines do add up towards band spectral power.
+             * So, fall steeply towards zero, but smoothly
+             */
+            if (s >= ethresh) {
+                nzl += 1.0f;
+            } else {
+                if (nzslope == 2.f)
+                    nzl += (s / ethresh) * (s / ethresh);
+                else
+                    nzl += ff_fast_powf(s / ethresh, nzslope);
+            }
+        }
+        if (e2 > thresh) {
+            float frm;
+            e *= iswb_size;
+
+            /** compute variance */
+            for (i = 0; i < swb_size; i++) {
+                float d = fabsf(scaled[w2*128+i]) - e;
+                var += d*d;
+            }
+            var = sqrtf(var * iswb_sizem1);
+
+            e2 *= iswb_size;
+            frm = e / FFMIN(e+4*var,maxval);
+            form += e2 * sqrtf(frm) / FFMAX(0.5f,nzl);
+            weight += e2;
+        }
+    }
+    if (weight > 0) {
+        return form / weight;
+    } else {
+        return 1.0f;
+    }
+}
+
+/** Return the minimum scalefactor where the quantized coef does not clip. */
+static inline uint8_t coef2minsf(float coef)
+{
+    return av_clip_uint8(log2f(coef)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
+}
+
+/** Return the maximum scalefactor where the quantized coef is not zero. */
+static inline uint8_t coef2maxsf(float coef)
+{
+    return av_clip_uint8(log2f(coef)*4 +  6 + SCALE_ONE_POS - SCALE_DIV_512);
+}
+
+/*
+ * Returns the closest possible index to an array of float values, given a value.
+ */
+static inline int quant_array_idx(const float val, const float *arr, const int num)
+{
+    int i, index = 0;
+    float quant_min_err = INFINITY;
+    for (i = 0; i < num; i++) {
+        float error = (val - arr[i])*(val - arr[i]);
+        if (error < quant_min_err) {
+            quant_min_err = error;
+            index = i;
+        }
+    }
+    return index;
+}
+
+/**
+ * approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
+ */
+static av_always_inline float bval2bmax(float b)
+{
+    return 0.001f + 0.0035f * (b*b*b) / (15.5f*15.5f*15.5f);
+}
+
+/*
+ * Compute a nextband map to be used with SF delta constraint utilities.
+ * The nextband array should contain 128 elements, and positions that don't
+ * map to valid, nonzero bands of the form w*16+g (with w being the initial
+ * window of the window group, only) are left indetermined.
+ */
+static inline void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
+{
+    unsigned char prevband = 0;
+    int w, g;
+    /** Just a safe default */
+    for (g = 0; g < 128; g++)
+        nextband[g] = g;
+
+    /** Now really navigate the nonzero band chain */
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        for (g = 0; g < sce->ics.num_swb; g++) {
+            if (!sce->zeroes[w*16+g] && sce->band_type[w*16+g] < RESERVED_BT)
+                prevband = nextband[prevband] = w*16+g;
+        }
+    }
+    nextband[prevband] = prevband; /* terminate */
+}
+
+/*
+ * Updates nextband to reflect a removed band (equivalent to
+ * calling ff_init_nextband_map after marking a band as zero)
+ */
+static inline void ff_nextband_remove(uint8_t *nextband, int prevband, int band)
+{
+    nextband[prevband] = nextband[band];
+}
+
+/*
+ * Checks whether the specified band could be removed without inducing
+ * scalefactor delta that violates SF delta encoding constraints.
+ * prev_sf has to be the scalefactor of the previous nonzero, nonspecial
+ * band, in encoding order, or negative if there was no such band.
+ */
+static inline int ff_sfdelta_can_remove_band(const SingleChannelElement *sce,
+    const uint8_t *nextband, int prev_sf, int band)
+{
+    return prev_sf >= 0
+        && sce->sf_idx[nextband[band]] >= (prev_sf - SCALE_MAX_DIFF)
+        && sce->sf_idx[nextband[band]] <= (prev_sf + SCALE_MAX_DIFF);
+}
+
+/*
+ * Checks whether the specified band's scalefactor could be replaced
+ * with another one without violating SF delta encoding constraints.
+ * prev_sf has to be the scalefactor of the previous nonzero, nonsepcial
+ * band, in encoding order, or negative if there was no such band.
+ */
+static inline int ff_sfdelta_can_replace(const SingleChannelElement *sce,
+    const uint8_t *nextband, int prev_sf, int new_sf, int band)
+{
+    return new_sf >= (prev_sf - SCALE_MAX_DIFF)
+        && new_sf <= (prev_sf + SCALE_MAX_DIFF)
+        && sce->sf_idx[nextband[band]] >= (new_sf - SCALE_MAX_DIFF)
+        && sce->sf_idx[nextband[band]] <= (new_sf + SCALE_MAX_DIFF);
+}
+
+#define ERROR_IF(cond, ...) \
+    if (cond) { \
+        av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
+        return AVERROR(EINVAL); \
+    }
+
+#define WARN_IF(cond, ...) \
+    if (cond) { \
+        av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
+    }
+
+#endif /* AVCODEC_AACENC_UTILS_H */
diff --git a/libavcodec/aacenctab.c b/libavcodec/aacenctab.c
new file mode 100644
index 00000000..f3d70fbe
--- /dev/null
+++ b/libavcodec/aacenctab.c
@@ -0,0 +1,108 @@
+/*
+ * AAC encoder data
+ * Copyright (c) 2015 Rostislav Pehlivanov ( atomnuker gmail com )
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "aacenctab.h"
+
+static const uint8_t swb_size_128_96[] = {
+    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
+};
+
+static const uint8_t swb_size_128_64[] = {
+    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
+};
+
+static const uint8_t swb_size_128_48[] = {
+    4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
+};
+
+static const uint8_t swb_size_128_24[] = {
+    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
+};
+
+static const uint8_t swb_size_128_16[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
+};
+
+static const uint8_t swb_size_128_8[] = {
+    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
+};
+
+static const uint8_t swb_size_1024_96[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
+    12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
+    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+};
+
+static const uint8_t swb_size_1024_64[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
+    12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
+    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
+};
+
+static const uint8_t swb_size_1024_48[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
+    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
+    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+    96
+};
+
+static const uint8_t swb_size_1024_32[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
+    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
+    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
+};
+
+static const uint8_t swb_size_1024_24[] = {
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
+    32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
+};
+
+static const uint8_t swb_size_1024_16[] = {
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
+    32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
+};
+
+static const uint8_t swb_size_1024_8[] = {
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+    16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
+    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
+};
+
+const uint8_t *ff_aac_swb_size_128[] = {
+    swb_size_128_96, swb_size_128_96, swb_size_128_64,
+    swb_size_128_48, swb_size_128_48, swb_size_128_48,
+    swb_size_128_24, swb_size_128_24, swb_size_128_16,
+    swb_size_128_16, swb_size_128_16, swb_size_128_8,
+    swb_size_128_8
+};
+
+const uint8_t *ff_aac_swb_size_1024[] = {
+    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
+    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
+    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
+    swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
+    swb_size_1024_8
+};
+
+const int ff_aac_swb_size_128_len  = FF_ARRAY_ELEMS(ff_aac_swb_size_128);
+const int ff_aac_swb_size_1024_len = FF_ARRAY_ELEMS(ff_aac_swb_size_1024);
diff --git a/libavcodec/aacenctab.h b/libavcodec/aacenctab.h
new file mode 100644
index 00000000..5fc94112
--- /dev/null
+++ b/libavcodec/aacenctab.h
@@ -0,0 +1,128 @@
+/*
+ * AAC encoder data
+ * Copyright (c) 2015 Rostislav Pehlivanov ( atomnuker gmail com )
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC encoder data
+ * @author Rostislav Pehlivanov ( atomnuker gmail com )
+ */
+
+#ifndef AVCODEC_AACENCTAB_H
+#define AVCODEC_AACENCTAB_H
+
+#include "aac.h"
+
+/** Total number of usable codebooks **/
+#define CB_TOT 12
+
+/** Total number of codebooks, including special ones **/
+#define CB_TOT_ALL 15
+
+#define AAC_MAX_CHANNELS 8
+
+extern const uint8_t *ff_aac_swb_size_1024[];
+extern const int      ff_aac_swb_size_1024_len;
+extern const uint8_t *ff_aac_swb_size_128[];
+extern const int      ff_aac_swb_size_128_len;
+
+/** default channel configurations */
+static const uint8_t aac_chan_configs[AAC_MAX_CHANNELS][6] = {
+    {1, TYPE_SCE},                                         // 1 channel  - single channel element
+    {1, TYPE_CPE},                                         // 2 channels - channel pair
+    {2, TYPE_SCE, TYPE_CPE},                               // 3 channels - center + stereo
+    {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},                     // 4 channels - front center + stereo + back center
+    {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},                     // 5 channels - front center + stereo + back stereo
+    {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE},           // 6 channels - front center + stereo + back stereo + LFE
+    {0},                                                   // 7 channels - invalid without PCE
+    {5, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 8 channels - front center + front stereo + side stereo + back stereo + LFE
+};
+
+/**
+ * Table to remap channels from libavcodec's default order to AAC order.
+ */
+static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
+    { 0 },
+    { 0, 1 },
+    { 2, 0, 1 },
+    { 2, 0, 1, 3 },
+    { 2, 0, 1, 3, 4 },
+    { 2, 0, 1, 4, 5, 3 },
+    { 0 },
+    { 2, 0, 1, 6, 7, 4, 5, 3 },
+};
+
+/* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
+ * failures */
+static const int mpeg4audio_sample_rates[16] = {
+    96000, 88200, 64000, 48000, 44100, 32000,
+    24000, 22050, 16000, 12000, 11025, 8000, 7350
+};
+
+/** bits needed to code codebook run value for long windows */
+static const uint8_t run_value_bits_long[64] = {
+     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
+};
+
+/** bits needed to code codebook run value for short windows */
+static const uint8_t run_value_bits_short[16] = {
+    3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
+};
+
+/* TNS starting SFBs for long and short windows */
+static const uint8_t tns_min_sfb_short[16] = {
+    2, 2, 2, 3, 3, 4, 6, 6, 8, 10, 10, 12, 12, 12, 12, 12
+};
+
+static const uint8_t tns_min_sfb_long[16] = {
+    12, 13, 15, 16, 17, 20, 25, 26, 24, 28, 30, 31, 31, 31, 31, 31
+};
+
+static const uint8_t * const tns_min_sfb[2] = {
+    tns_min_sfb_long, tns_min_sfb_short
+};
+
+static const uint8_t * const run_value_bits[2] = {
+    run_value_bits_long, run_value_bits_short
+};
+
+/** Map to convert values from BandCodingPath index to a codebook index **/
+static const uint8_t aac_cb_out_map[CB_TOT_ALL]  = {0,1,2,3,4,5,6,7,8,9,10,11,13,14,15};
+/** Inverse map to convert from codebooks to BandCodingPath indices **/
+static const uint8_t aac_cb_in_map[CB_TOT_ALL+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,12,13,14};
+
+static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
+static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
+
+static const unsigned char aac_maxval_cb[] = {
+    0, 1, 3, 5, 5, 7, 7, 7, 9, 9, 9, 9, 9, 11
+};
+
+static const int aacenc_profiles[] = {
+    FF_PROFILE_AAC_MAIN,
+    FF_PROFILE_AAC_LOW,
+    FF_PROFILE_AAC_LTP,
+    FF_PROFILE_MPEG2_AAC_LOW,
+};
+
+#endif /* AVCODEC_AACENCTAB_H */
diff --git a/libavcodec/aacps.c b/libavcodec/aacps.c
index ea5a5d23..ccc79ffc 100644
--- a/libavcodec/aacps.c
+++ b/libavcodec/aacps.c
@@ -17,16 +17,23 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Note: Rounding-to-nearest used unless otherwise stated
+ *
  */
 
 #include <stdint.h>
 #include "libavutil/common.h"
-#include "libavutil/internal.h"
 #include "libavutil/mathematics.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "aacps.h"
+#if USE_FIXED
+#include "aacps_fixed_tablegen.h"
+#else
+#include "libavutil/internal.h"
 #include "aacps_tablegen.h"
+#endif /* USE_FIXED */
 #include "aacpsdata.c"
 
 #define PS_BASELINE 0  ///< Operate in Baseline PS mode
@@ -148,7 +155,7 @@ static void ipdopd_reset(int8_t *ipd_hist, int8_t *opd_hist)
     }
 }
 
-int ff_ps_read_data(AVCodecContext *avctx, GetBitContext *gb_host, PSContext *ps, int bits_left)
+int AAC_RENAME(ff_ps_read_data)(AVCodecContext *avctx, GetBitContext *gb_host, PSContext *ps, int bits_left)
 {
     int e;
     int bit_count_start = get_bits_count(gb_host);
@@ -302,35 +309,41 @@ int ff_ps_read_data(AVCodecContext *avctx, GetBitContext *gb_host, PSContext *ps
 
 /** Split one subband into 2 subsubbands with a symmetric real filter.
  * The filter must have its non-center even coefficients equal to zero. */
-static void hybrid2_re(float (*in)[2], float (*out)[32][2], const float filter[8], int len, int reverse)
+static void hybrid2_re(INTFLOAT (*in)[2], INTFLOAT (*out)[32][2], const INTFLOAT filter[8], int len, int reverse)
 {
     int i, j;
     for (i = 0; i < len; i++, in++) {
-        float re_in = filter[6] * in[6][0];          //real inphase
-        float re_op = 0.0f;                          //real out of phase
-        float im_in = filter[6] * in[6][1];          //imag inphase
-        float im_op = 0.0f;                          //imag out of phase
+        INT64FLOAT re_in = AAC_MUL31(filter[6], in[6][0]); //real inphase
+        INT64FLOAT re_op = 0.0f;                          //real out of phase
+        INT64FLOAT im_in = AAC_MUL31(filter[6], in[6][1]); //imag inphase
+        INT64FLOAT im_op = 0.0f;                          //imag out of phase
         for (j = 0; j < 6; j += 2) {
-            re_op += filter[j+1] * (in[j+1][0] + in[12-j-1][0]);
-            im_op += filter[j+1] * (in[j+1][1] + in[12-j-1][1]);
+            re_op += (INT64FLOAT)filter[j+1] * (in[j+1][0] + in[12-j-1][0]);
+            im_op += (INT64FLOAT)filter[j+1] * (in[j+1][1] + in[12-j-1][1]);
         }
-        out[ reverse][i][0] = re_in + re_op;
-        out[ reverse][i][1] = im_in + im_op;
-        out[!reverse][i][0] = re_in - re_op;
-        out[!reverse][i][1] = im_in - im_op;
+
+#if USE_FIXED
+        re_op = (re_op + 0x40000000) >> 31;
+        im_op = (im_op + 0x40000000) >> 31;
+#endif /* USE_FIXED */
+
+        out[ reverse][i][0] = (INTFLOAT)(re_in + re_op);
+        out[ reverse][i][1] = (INTFLOAT)(im_in + im_op);
+        out[!reverse][i][0] = (INTFLOAT)(re_in - re_op);
+        out[!reverse][i][1] = (INTFLOAT)(im_in - im_op);
     }
 }
 
 /** Split one subband into 6 subsubbands with a complex filter */
-static void hybrid6_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2],
-                       TABLE_CONST float (*filter)[8][2], int len)
+static void hybrid6_cx(PSDSPContext *dsp, INTFLOAT (*in)[2], INTFLOAT (*out)[32][2],
+                       TABLE_CONST INTFLOAT (*filter)[8][2], int len)
 {
     int i;
     int N = 8;
-    LOCAL_ALIGNED_16(float, temp, [8], [2]);
+    LOCAL_ALIGNED_16(INTFLOAT, temp, [8], [2]);
 
     for (i = 0; i < len; i++, in++) {
-        dsp->hybrid_analysis(temp, in, (const float (*)[8][2]) filter, 1, N);
+        dsp->hybrid_analysis(temp, in, (const INTFLOAT (*)[8][2]) filter, 1, N);
         out[0][i][0] = temp[6][0];
         out[0][i][1] = temp[6][1];
         out[1][i][0] = temp[7][0];
@@ -347,18 +360,18 @@ static void hybrid6_cx(PSDSPContext *dsp, float (*in)[2], float (*out)[32][2],
 }
 
 static void hybrid4_8_12_cx(PSDSPContext *dsp,
-                            float (*in)[2], float (*out)[32][2],
-                            TABLE_CONST float (*filter)[8][2], int N, int len)
+                            INTFLOAT (*in)[2], INTFLOAT (*out)[32][2],
+                            TABLE_CONST INTFLOAT (*filter)[8][2], int N, int len)
 {
     int i;
 
     for (i = 0; i < len; i++, in++) {
-        dsp->hybrid_analysis(out[0] + i, in, (const float (*)[8][2]) filter, 32, N);
+        dsp->hybrid_analysis(out[0] + i, in, (const INTFLOAT (*)[8][2]) filter, 32, N);
     }
 }
 
-static void hybrid_analysis(PSDSPContext *dsp, float out[91][32][2],
-                            float in[5][44][2], float L[2][38][64],
+static void hybrid_analysis(PSDSPContext *dsp, INTFLOAT out[91][32][2],
+                            INTFLOAT in[5][44][2], INTFLOAT L[2][38][64],
                             int is34, int len)
 {
     int i, j;
@@ -387,8 +400,8 @@ static void hybrid_analysis(PSDSPContext *dsp, float out[91][32][2],
     }
 }
 
-static void hybrid_synthesis(PSDSPContext *dsp, float out[2][38][64],
-                             float in[91][32][2], int is34, int len)
+static void hybrid_synthesis(PSDSPContext *dsp, INTFLOAT out[2][38][64],
+                             INTFLOAT in[91][32][2], int is34, int len)
 {
     int i, n;
     if (is34) {
@@ -429,7 +442,7 @@ static void hybrid_synthesis(PSDSPContext *dsp, float out[2][38][64],
 }
 
 /// All-pass filter decay slope
-#define DECAY_SLOPE      0.05f
+#define DECAY_SLOPE      Q30(0.05f)
 /// Number of frequency bands that can be addressed by the parameter index, b(k)
 static const int   NR_PAR_BANDS[]      = { 20, 34 };
 static const int   NR_IPDOPD_BANDS[]   = { 11, 17 };
@@ -483,28 +496,43 @@ static void map_idx_34_to_20(int8_t *par_mapped, const int8_t *par, int full)
     }
 }
 
-static void map_val_34_to_20(float par[PS_MAX_NR_IIDICC])
+static void map_val_34_to_20(INTFLOAT par[PS_MAX_NR_IIDICC])
 {
+#if USE_FIXED
+    par[ 0] = (int)(((int64_t)(par[ 0] + (par[ 1]>>1)) * 1431655765 + \
+                      0x40000000) >> 31);
+    par[ 1] = (int)(((int64_t)((par[ 1]>>1) + par[ 2]) * 1431655765 + \
+                      0x40000000) >> 31);
+    par[ 2] = (int)(((int64_t)(par[ 3] + (par[ 4]>>1)) * 1431655765 + \
+                      0x40000000) >> 31);
+    par[ 3] = (int)(((int64_t)((par[ 4]>>1) + par[ 5]) * 1431655765 + \
+                      0x40000000) >> 31);
+#else
     par[ 0] = (2*par[ 0] +   par[ 1]) * 0.33333333f;
     par[ 1] = (  par[ 1] + 2*par[ 2]) * 0.33333333f;
     par[ 2] = (2*par[ 3] +   par[ 4]) * 0.33333333f;
     par[ 3] = (  par[ 4] + 2*par[ 5]) * 0.33333333f;
-    par[ 4] = (  par[ 6] +   par[ 7]) * 0.5f;
-    par[ 5] = (  par[ 8] +   par[ 9]) * 0.5f;
+#endif /* USE_FIXED */
+    par[ 4] = AAC_HALF_SUM(par[ 6], par[ 7]);
+    par[ 5] = AAC_HALF_SUM(par[ 8], par[ 9]);
     par[ 6] =    par[10];
     par[ 7] =    par[11];
-    par[ 8] = (  par[12] +   par[13]) * 0.5f;
-    par[ 9] = (  par[14] +   par[15]) * 0.5f;
+    par[ 8] = AAC_HALF_SUM(par[12], par[13]);
+    par[ 9] = AAC_HALF_SUM(par[14], par[15]);
     par[10] =    par[16];
     par[11] =    par[17];
     par[12] =    par[18];
     par[13] =    par[19];
-    par[14] = (  par[20] +   par[21]) * 0.5f;
-    par[15] = (  par[22] +   par[23]) * 0.5f;
-    par[16] = (  par[24] +   par[25]) * 0.5f;
-    par[17] = (  par[26] +   par[27]) * 0.5f;
+    par[14] = AAC_HALF_SUM(par[20], par[21]);
+    par[15] = AAC_HALF_SUM(par[22], par[23]);
+    par[16] = AAC_HALF_SUM(par[24], par[25]);
+    par[17] = AAC_HALF_SUM(par[26], par[27]);
+#if USE_FIXED
+    par[18] = (((par[28]+2)>>2) + ((par[29]+2)>>2) + ((par[30]+2)>>2) + ((par[31]+2)>>2));
+#else
     par[18] = (  par[28] +   par[29] +   par[30] +   par[31]) * 0.25f;
-    par[19] = (  par[32] +   par[33]) * 0.5f;
+#endif /* USE_FIXED */
+    par[19] = AAC_HALF_SUM(par[32], par[33]);
 }
 
 static void map_idx_10_to_34(int8_t *par_mapped, const int8_t *par, int full)
@@ -589,7 +617,7 @@ static void map_idx_20_to_34(int8_t *par_mapped, const int8_t *par, int full)
     par_mapped[ 0] =  par[ 0];
 }
 
-static void map_val_20_to_34(float par[PS_MAX_NR_IIDICC])
+static void map_val_20_to_34(INTFLOAT par[PS_MAX_NR_IIDICC])
 {
     par[33] =  par[19];
     par[32] =  par[19];
@@ -620,27 +648,29 @@ static void map_val_20_to_34(float par[PS_MAX_NR_IIDICC])
     par[ 7] =  par[ 4];
     par[ 6] =  par[ 4];
     par[ 5] =  par[ 3];
-    par[ 4] = (par[ 2] + par[ 3]) * 0.5f;
+    par[ 4] = AAC_HALF_SUM(par[ 2], par[ 3]);
     par[ 3] =  par[ 2];
     par[ 2] =  par[ 1];
-    par[ 1] = (par[ 0] + par[ 1]) * 0.5f;
+    par[ 1] = AAC_HALF_SUM(par[ 0], par[ 1]);
 }
 
-static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[32][2], int is34)
+static void decorrelation(PSContext *ps, INTFLOAT (*out)[32][2], const INTFLOAT (*s)[32][2], int is34)
 {
-    LOCAL_ALIGNED_16(float, power, [34], [PS_QMF_TIME_SLOTS]);
-    LOCAL_ALIGNED_16(float, transient_gain, [34], [PS_QMF_TIME_SLOTS]);
-    float *peak_decay_nrg = ps->peak_decay_nrg;
-    float *power_smooth = ps->power_smooth;
-    float *peak_decay_diff_smooth = ps->peak_decay_diff_smooth;
-    float (*delay)[PS_QMF_TIME_SLOTS + PS_MAX_DELAY][2] = ps->delay;
-    float (*ap_delay)[PS_AP_LINKS][PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2] = ps->ap_delay;
-    const int8_t *k_to_i = is34 ? k_to_i_34 : k_to_i_20;
-    const float peak_decay_factor = 0.76592833836465f;
+    LOCAL_ALIGNED_16(INTFLOAT, power, [34], [PS_QMF_TIME_SLOTS]);
+    LOCAL_ALIGNED_16(INTFLOAT, transient_gain, [34], [PS_QMF_TIME_SLOTS]);
+    INTFLOAT *peak_decay_nrg = ps->peak_decay_nrg;
+    INTFLOAT *power_smooth = ps->power_smooth;
+    INTFLOAT *peak_decay_diff_smooth = ps->peak_decay_diff_smooth;
+    INTFLOAT (*delay)[PS_QMF_TIME_SLOTS + PS_MAX_DELAY][2] = ps->delay;
+    INTFLOAT (*ap_delay)[PS_AP_LINKS][PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2] = ps->ap_delay;
+#if !USE_FIXED
     const float transient_impact  = 1.5f;
     const float a_smooth          = 0.25f; ///< Smoothing coefficient
+#endif /* USE_FIXED */
+    const int8_t *k_to_i = is34 ? k_to_i_34 : k_to_i_20;
     int i, k, m, n;
     int n0 = 0, nL = 32;
+    const INTFLOAT peak_decay_factor = Q31(0.76592833836465f);
 
     memset(power, 0, 34 * sizeof(*power));
 
@@ -658,6 +688,33 @@ static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[3
     }
 
     //Transient detection
+#if USE_FIXED
+    for (i = 0; i < NR_PAR_BANDS[is34]; i++) {
+        for (n = n0; n < nL; n++) {
+            int decayed_peak;
+            int denom;
+
+            decayed_peak = (int)(((int64_t)peak_decay_factor * \
+                                           peak_decay_nrg[i] + 0x40000000) >> 31);
+            peak_decay_nrg[i] = FFMAX(decayed_peak, power[i][n]);
+            power_smooth[i] += (power[i][n] - power_smooth[i] + 2) >> 2;
+            peak_decay_diff_smooth[i] += (peak_decay_nrg[i] - power[i][n] - \
+                                          peak_decay_diff_smooth[i] + 2) >> 2;
+            denom = peak_decay_diff_smooth[i] + (peak_decay_diff_smooth[i] >> 1);
+            if (denom > power_smooth[i]) {
+              int p = power_smooth[i];
+              while (denom < 0x40000000) {
+                denom <<= 1;
+                p <<= 1;
+              }
+              transient_gain[i][n] = p / (denom >> 16);
+            }
+            else {
+              transient_gain[i][n] = 1 << 16;
+            }
+        }
+    }
+#else
     for (i = 0; i < NR_PAR_BANDS[is34]; i++) {
         for (n = n0; n < nL; n++) {
             float decayed_peak = peak_decay_factor * peak_decay_nrg[i];
@@ -671,6 +728,7 @@ static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[3
         }
     }
 
+#endif /* USE_FIXED */
     //Decorrelation and transient reduction
     //                         PS_AP_LINKS - 1
     //                               -----
@@ -681,8 +739,22 @@ static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[3
     //d[k][z] (out) = transient_gain_mapped[k][z] * H[k][z] * s[k][z]
     for (k = 0; k < NR_ALLPASS_BANDS[is34]; k++) {
         int b = k_to_i[k];
+#if USE_FIXED
+        int g_decay_slope;
+
+        if (k - DECAY_CUTOFF[is34] <= 0) {
+          g_decay_slope = 1 << 30;
+        }
+        else if (k - DECAY_CUTOFF[is34] >= 20) {
+          g_decay_slope = 0;
+        }
+        else {
+          g_decay_slope = (1 << 30) - DECAY_SLOPE * (k - DECAY_CUTOFF[is34]);
+        }
+#else
         float g_decay_slope = 1.f - DECAY_SLOPE * (k - DECAY_CUTOFF[is34]);
         g_decay_slope = av_clipf(g_decay_slope, 0.f, 1.f);
+#endif /* USE_FIXED */
         memcpy(delay[k], delay[k]+nL, PS_MAX_DELAY*sizeof(delay[k][0]));
         memcpy(delay[k]+PS_MAX_DELAY, s[k], numQMFSlots*sizeof(delay[k][0]));
         for (m = 0; m < PS_AP_LINKS; m++) {
@@ -690,7 +762,7 @@ static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[3
         }
         ps->dsp.decorrelate(out[k], delay[k] + PS_MAX_DELAY - 2, ap_delay[k],
                             phi_fract[is34][k],
-                            (const float (*)[2]) Q_fract_allpass[is34][k],
+                            (const INTFLOAT (*)[2]) Q_fract_allpass[is34][k],
                             transient_gain[b], g_decay_slope, nL - n0);
     }
     for (; k < SHORT_DELAY_BAND[is34]; k++) {
@@ -749,14 +821,14 @@ static void remap20(int8_t (**p_par_mapped)[PS_MAX_NR_IIDICC],
     }
 }
 
-static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2], int is34)
+static void stereo_processing(PSContext *ps, INTFLOAT (*l)[32][2], INTFLOAT (*r)[32][2], int is34)
 {
     int e, b, k;
 
-    float (*H11)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H11;
-    float (*H12)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H12;
-    float (*H21)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H21;
-    float (*H22)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H22;
+    INTFLOAT (*H11)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H11;
+    INTFLOAT (*H12)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H12;
+    INTFLOAT (*H21)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H21;
+    INTFLOAT (*H22)[PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC] = ps->H22;
     int8_t *opd_hist = ps->opd_hist;
     int8_t *ipd_hist = ps->ipd_hist;
     int8_t iid_mapped_buf[PS_MAX_NUM_ENV][PS_MAX_NR_IIDICC];
@@ -768,7 +840,7 @@ static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2
     int8_t (*ipd_mapped)[PS_MAX_NR_IIDICC] = ipd_mapped_buf;
     int8_t (*opd_mapped)[PS_MAX_NR_IIDICC] = opd_mapped_buf;
     const int8_t *k_to_i = is34 ? k_to_i_34 : k_to_i_20;
-    TABLE_CONST float (*H_LUT)[8][4] = (PS_BASELINE || ps->icc_mode < 3) ? HA : HB;
+    TABLE_CONST INTFLOAT (*H_LUT)[8][4] = (PS_BASELINE || ps->icc_mode < 3) ? HA : HB;
 
     //Remapping
     if (ps->num_env_old) {
@@ -823,7 +895,7 @@ static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2
     //Mixing
     for (e = 0; e < ps->num_env; e++) {
         for (b = 0; b < NR_PAR_BANDS[is34]; b++) {
-            float h11, h12, h21, h22;
+            INTFLOAT h11, h12, h21, h22;
             h11 = H_LUT[iid_mapped[e][b] + 7 + 23 * ps->iid_quant][icc_mapped[e][b]][0];
             h12 = H_LUT[iid_mapped[e][b] + 7 + 23 * ps->iid_quant][icc_mapped[e][b]][1];
             h21 = H_LUT[iid_mapped[e][b] + 7 + 23 * ps->iid_quant][icc_mapped[e][b]][2];
@@ -832,27 +904,27 @@ static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2
             if (!PS_BASELINE && ps->enable_ipdopd && b < NR_IPDOPD_BANDS[is34]) {
                 //The spec say says to only run this smoother when enable_ipdopd
                 //is set but the reference decoder appears to run it constantly
-                float h11i, h12i, h21i, h22i;
-                float ipd_adj_re, ipd_adj_im;
+                INTFLOAT h11i, h12i, h21i, h22i;
+                INTFLOAT ipd_adj_re, ipd_adj_im;
                 int opd_idx = opd_hist[b] * 8 + opd_mapped[e][b];
                 int ipd_idx = ipd_hist[b] * 8 + ipd_mapped[e][b];
-                float opd_re = pd_re_smooth[opd_idx];
-                float opd_im = pd_im_smooth[opd_idx];
-                float ipd_re = pd_re_smooth[ipd_idx];
-                float ipd_im = pd_im_smooth[ipd_idx];
+                INTFLOAT opd_re = pd_re_smooth[opd_idx];
+                INTFLOAT opd_im = pd_im_smooth[opd_idx];
+                INTFLOAT ipd_re = pd_re_smooth[ipd_idx];
+                INTFLOAT ipd_im = pd_im_smooth[ipd_idx];
                 opd_hist[b] = opd_idx & 0x3F;
                 ipd_hist[b] = ipd_idx & 0x3F;
 
-                ipd_adj_re = opd_re*ipd_re + opd_im*ipd_im;
-                ipd_adj_im = opd_im*ipd_re - opd_re*ipd_im;
-                h11i = h11 * opd_im;
-                h11  = h11 * opd_re;
-                h12i = h12 * ipd_adj_im;
-                h12  = h12 * ipd_adj_re;
-                h21i = h21 * opd_im;
-                h21  = h21 * opd_re;
-                h22i = h22 * ipd_adj_im;
-                h22  = h22 * ipd_adj_re;
+                ipd_adj_re = AAC_MADD30(opd_re, ipd_re, opd_im, ipd_im);
+                ipd_adj_im = AAC_MSUB30(opd_im, ipd_re, opd_re, ipd_im);
+                h11i = AAC_MUL30(h11,  opd_im);
+                h11  = AAC_MUL30(h11,  opd_re);
+                h12i = AAC_MUL30(h12,  ipd_adj_im);
+                h12  = AAC_MUL30(h12,  ipd_adj_re);
+                h21i = AAC_MUL30(h21,  opd_im);
+                h21  = AAC_MUL30(h21,  opd_re);
+                h22i = AAC_MUL30(h22,  ipd_adj_im);
+                h22  = AAC_MUL30(h22,  ipd_adj_re);
                 H11[1][e+1][b] = h11i;
                 H12[1][e+1][b] = h12i;
                 H21[1][e+1][b] = h21i;
@@ -864,11 +936,14 @@ static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2
             H22[0][e+1][b] = h22;
         }
         for (k = 0; k < NR_BANDS[is34]; k++) {
-            float h[2][4];
-            float h_step[2][4];
+            LOCAL_ALIGNED_16(INTFLOAT, h, [2], [4]);
+            LOCAL_ALIGNED_16(INTFLOAT, h_step, [2], [4]);
             int start = ps->border_position[e];
             int stop  = ps->border_position[e+1];
-            float width = 1.f / (stop - start);
+            INTFLOAT width = Q30(1.f) / ((stop - start) ? (stop - start) : 1);
+#if USE_FIXED
+            width <<= 1;
+#endif
             b = k_to_i[k];
             h[0][0] = H11[0][e][b];
             h[0][1] = H12[0][e][b];
@@ -889,15 +964,15 @@ static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2
             }
             }
             //Interpolation
-            h_step[0][0] = (H11[0][e+1][b] - h[0][0]) * width;
-            h_step[0][1] = (H12[0][e+1][b] - h[0][1]) * width;
-            h_step[0][2] = (H21[0][e+1][b] - h[0][2]) * width;
-            h_step[0][3] = (H22[0][e+1][b] - h[0][3]) * width;
+            h_step[0][0] = AAC_MSUB31_V3(H11[0][e+1][b], h[0][0], width);
+            h_step[0][1] = AAC_MSUB31_V3(H12[0][e+1][b], h[0][1], width);
+            h_step[0][2] = AAC_MSUB31_V3(H21[0][e+1][b], h[0][2], width);
+            h_step[0][3] = AAC_MSUB31_V3(H22[0][e+1][b], h[0][3], width);
             if (!PS_BASELINE && ps->enable_ipdopd) {
-                h_step[1][0] = (H11[1][e+1][b] - h[1][0]) * width;
-                h_step[1][1] = (H12[1][e+1][b] - h[1][1]) * width;
-                h_step[1][2] = (H21[1][e+1][b] - h[1][2]) * width;
-                h_step[1][3] = (H22[1][e+1][b] - h[1][3]) * width;
+                h_step[1][0] = AAC_MSUB31_V3(H11[1][e+1][b], h[1][0], width);
+                h_step[1][1] = AAC_MSUB31_V3(H12[1][e+1][b], h[1][1], width);
+                h_step[1][2] = AAC_MSUB31_V3(H21[1][e+1][b], h[1][2], width);
+                h_step[1][3] = AAC_MSUB31_V3(H22[1][e+1][b], h[1][3], width);
             }
             ps->dsp.stereo_interpolate[!PS_BASELINE && ps->enable_ipdopd](
                 l[k] + start + 1, r[k] + start + 1,
@@ -906,10 +981,10 @@ static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2
     }
 }
 
-int ff_ps_apply(AVCodecContext *avctx, PSContext *ps, float L[2][38][64], float R[2][38][64], int top)
+int AAC_RENAME(ff_ps_apply)(AVCodecContext *avctx, PSContext *ps, INTFLOAT L[2][38][64], INTFLOAT R[2][38][64], int top)
 {
-    float (*Lbuf)[32][2] = ps->Lbuf;
-    float (*Rbuf)[32][2] = ps->Rbuf;
+    INTFLOAT (*Lbuf)[32][2] = ps->Lbuf;
+    INTFLOAT (*Rbuf)[32][2] = ps->Rbuf;
     const int len = 32;
     int is34 = ps->is34bands;
 
@@ -919,7 +994,7 @@ int ff_ps_apply(AVCodecContext *avctx, PSContext *ps, float L[2][38][64], float
         memset(ps->ap_delay + top, 0, (NR_ALLPASS_BANDS[is34] - top)*sizeof(ps->ap_delay[0]));
 
     hybrid_analysis(&ps->dsp, Lbuf, ps->in_buf, L, is34, len);
-    decorrelation(ps, Rbuf, (const float (*)[32][2]) Lbuf, is34);
+    decorrelation(ps, Rbuf, (const INTFLOAT (*)[32][2]) Lbuf, is34);
     stereo_processing(ps, Lbuf, Rbuf, is34);
     hybrid_synthesis(&ps->dsp, L, Lbuf, is34, len);
     hybrid_synthesis(&ps->dsp, R, Rbuf, is34, len);
@@ -936,7 +1011,7 @@ int ff_ps_apply(AVCodecContext *avctx, PSContext *ps, float L[2][38][64], float
 #define PS_VLC_ROW(name) \
     { name ## _codes, name ## _bits, sizeof(name ## _codes), sizeof(name ## _codes[0]) }
 
-av_cold void ff_ps_init(void) {
+av_cold void AAC_RENAME(ff_ps_init)(void) {
     // Syntax initialization
     static const struct {
         const void *ps_codes, *ps_bits;
@@ -968,7 +1043,7 @@ av_cold void ff_ps_init(void) {
     ps_tableinit();
 }
 
-av_cold void ff_ps_ctx_init(PSContext *ps)
+av_cold void AAC_RENAME(ff_ps_ctx_init)(PSContext *ps)
 {
-    ff_psdsp_init(&ps->dsp);
+    AAC_RENAME(ff_psdsp_init)(&ps->dsp);
 }
diff --git a/libavcodec/aacps.h b/libavcodec/aacps.h
index 174770d6..61edce35 100644
--- a/libavcodec/aacps.h
+++ b/libavcodec/aacps.h
@@ -19,8 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_PS_H
-#define AVCODEC_PS_H
+#ifndef AVCODEC_AACPS_H
+#define AVCODEC_AACPS_H
 
 #include <stdint.h>
 
@@ -61,26 +61,26 @@ typedef struct PSContext {
     int    is34bands;
     int    is34bands_old;
 
-    DECLARE_ALIGNED(16, float, in_buf)[5][44][2];
-    DECLARE_ALIGNED(16, float, delay)[PS_MAX_SSB][PS_QMF_TIME_SLOTS + PS_MAX_DELAY][2];
-    DECLARE_ALIGNED(16, float, ap_delay)[PS_MAX_AP_BANDS][PS_AP_LINKS][PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2];
-    DECLARE_ALIGNED(16, float, peak_decay_nrg)[34];
-    DECLARE_ALIGNED(16, float, power_smooth)[34];
-    DECLARE_ALIGNED(16, float, peak_decay_diff_smooth)[34];
-    DECLARE_ALIGNED(16, float, H11)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
-    DECLARE_ALIGNED(16, float, H12)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
-    DECLARE_ALIGNED(16, float, H21)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
-    DECLARE_ALIGNED(16, float, H22)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
-    DECLARE_ALIGNED(16, float, Lbuf)[91][32][2];
-    DECLARE_ALIGNED(16, float, Rbuf)[91][32][2];
+    DECLARE_ALIGNED(16, INTFLOAT, in_buf)[5][44][2];
+    DECLARE_ALIGNED(16, INTFLOAT, delay)[PS_MAX_SSB][PS_QMF_TIME_SLOTS + PS_MAX_DELAY][2];
+    DECLARE_ALIGNED(16, INTFLOAT, ap_delay)[PS_MAX_AP_BANDS][PS_AP_LINKS][PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2];
+    DECLARE_ALIGNED(16, INTFLOAT, peak_decay_nrg)[34];
+    DECLARE_ALIGNED(16, INTFLOAT, power_smooth)[34];
+    DECLARE_ALIGNED(16, INTFLOAT, peak_decay_diff_smooth)[34];
+    DECLARE_ALIGNED(16, INTFLOAT, H11)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
+    DECLARE_ALIGNED(16, INTFLOAT, H12)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
+    DECLARE_ALIGNED(16, INTFLOAT, H21)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
+    DECLARE_ALIGNED(16, INTFLOAT, H22)[2][PS_MAX_NUM_ENV+1][PS_MAX_NR_IIDICC];
+    DECLARE_ALIGNED(16, INTFLOAT, Lbuf)[91][32][2];
+    DECLARE_ALIGNED(16, INTFLOAT, Rbuf)[91][32][2];
     int8_t opd_hist[PS_MAX_NR_IIDICC];
     int8_t ipd_hist[PS_MAX_NR_IIDICC];
     PSDSPContext dsp;
 } PSContext;
 
-void ff_ps_init(void);
-void ff_ps_ctx_init(PSContext *ps);
-int ff_ps_read_data(AVCodecContext *avctx, GetBitContext *gb, PSContext *ps, int bits_left);
-int ff_ps_apply(AVCodecContext *avctx, PSContext *ps, float L[2][38][64], float R[2][38][64], int top);
+void AAC_RENAME(ff_ps_init)(void);
+void AAC_RENAME(ff_ps_ctx_init)(PSContext *ps);
+int AAC_RENAME(ff_ps_read_data)(AVCodecContext *avctx, GetBitContext *gb, PSContext *ps, int bits_left);
+int AAC_RENAME(ff_ps_apply)(AVCodecContext *avctx, PSContext *ps, INTFLOAT L[2][38][64], INTFLOAT R[2][38][64], int top);
 
-#endif /* AVCODEC_PS_H */
+#endif /* AVCODEC_AACPS_H */
diff --git a/libavcodec/aacps_fixed.c b/libavcodec/aacps_fixed.c
new file mode 100644
index 00000000..46af2133
--- /dev/null
+++ b/libavcodec/aacps_fixed.c
@@ -0,0 +1,24 @@
+/*
+ * MPEG-4 Parametric Stereo decoding functions
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FIXED 1
+
+#include "aacps.c"
diff --git a/libavcodec/aac_tablegen_decl.h b/libavcodec/aacps_fixed_tablegen.c
similarity index 64%
rename from libavcodec/aac_tablegen_decl.h
rename to libavcodec/aacps_fixed_tablegen.c
index 5105dae4..9e306991 100644
--- a/libavcodec/aac_tablegen_decl.h
+++ b/libavcodec/aacps_fixed_tablegen.c
@@ -1,5 +1,5 @@
 /*
- * Header file for hardcoded AAC tables
+ * Generate a header file for hardcoded Parametric Stereo tables
  *
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  *
@@ -20,17 +20,5 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_AAC_TABLEGEN_DECL_H
-#define AVCODEC_AAC_TABLEGEN_DECL_H
-
-#define POW_SF2_ZERO    200    ///< ff_aac_pow2sf_tab index corresponding to pow(2, 0);
-
-#if CONFIG_HARDCODED_TABLES
-#define ff_aac_tableinit()
-extern const float ff_aac_pow2sf_tab[428];
-#else
-void ff_aac_tableinit(void);
-extern       float ff_aac_pow2sf_tab[428];
-#endif /* CONFIG_HARDCODED_TABLES */
-
-#endif /* AVCODEC_AAC_TABLEGEN_DECL_H */
+#define USE_FIXED 1
+#include "aacps_tablegen_template.c"
diff --git a/libavcodec/aacps_fixed_tablegen.h b/libavcodec/aacps_fixed_tablegen.h
new file mode 100644
index 00000000..8b82deb5
--- /dev/null
+++ b/libavcodec/aacps_fixed_tablegen.h
@@ -0,0 +1,403 @@
+/*
+ * Header file for hardcoded Parametric Stereo tables
+ *
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Note: Rounding-to-nearest used unless otherwise stated
+ *
+ */
+
+#ifndef AVCODEC_AACPS_FIXED_TABLEGEN_H
+#define AVCODEC_AACPS_FIXED_TABLEGEN_H
+
+#include <math.h>
+#include <stdint.h>
+
+#if CONFIG_HARDCODED_TABLES
+#define ps_tableinit()
+#define TABLE_CONST const
+#include "libavcodec/aacps_fixed_tables.h"
+#else
+#include "libavutil/common.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/mem.h"
+
+#include "aac_defines.h"
+#include "libavutil/softfloat.h"
+#define NR_ALLPASS_BANDS20 30
+#define NR_ALLPASS_BANDS34 50
+#define PS_AP_LINKS 3
+#define TABLE_CONST
+static int pd_re_smooth[8*8*8];
+static int pd_im_smooth[8*8*8];
+static int HA[46][8][4];
+static int HB[46][8][4];
+static DECLARE_ALIGNED(16, int, f20_0_8) [ 8][8][2];
+static DECLARE_ALIGNED(16, int, f34_0_12)[12][8][2];
+static DECLARE_ALIGNED(16, int, f34_1_8) [ 8][8][2];
+static DECLARE_ALIGNED(16, int, f34_2_4) [ 4][8][2];
+static TABLE_CONST DECLARE_ALIGNED(16, int, Q_fract_allpass)[2][50][3][2];
+static DECLARE_ALIGNED(16, int, phi_fract)[2][50][2];
+
+static const int g0_Q8[] = {
+    Q31(0.00746082949812f), Q31(0.02270420949825f), Q31(0.04546865930473f), Q31(0.07266113929591f),
+    Q31(0.09885108575264f), Q31(0.11793710567217f), Q31(0.125f)
+};
+
+static const int g0_Q12[] = {
+    Q31(0.04081179924692f), Q31(0.03812810994926f), Q31(0.05144908135699f), Q31(0.06399831151592f),
+    Q31(0.07428313801106f), Q31(0.08100347892914f), Q31(0.08333333333333f)
+};
+
+static const int g1_Q8[] = {
+    Q31(0.01565675600122f), Q31(0.03752716391991f), Q31(0.05417891378782f), Q31(0.08417044116767f),
+    Q31(0.10307344158036f), Q31(0.12222452249753f), Q31(0.125f)
+};
+
+static const int g2_Q4[] = {
+    Q31(-0.05908211155639f), Q31(-0.04871498374946f), Q31(0.0f),   Q31(0.07778723915851f),
+    Q31( 0.16486303567403f), Q31( 0.23279856662996f), Q31(0.25f)
+};
+
+static const int sintbl_4[4]   = {           0,  1073741824,           0, -1073741824 };
+static const int costbl_4[4]   = {  1073741824,           0, -1073741824,           0 };
+static const int sintbl_8[8]   = {           0,   759250125,  1073741824,   759250125,
+                                             0,  -759250125, -1073741824,  -759250125 };
+static const int costbl_8[8]   = {  1073741824,   759250125,           0,  -759250125,
+                                   -1073741824,  -759250125,           0,   759250125 };
+static const int sintbl_12[12] = {           0,   536870912,   929887697,  1073741824,
+                                     929887697,   536870912,           0,  -536870912,
+                                    -929887697, -1073741824,  -929887697,  -536870912 };
+static const int costbl_12[12] = {  1073741824,   929887697,   536870912,           0,
+                                    -536870912,  -929887697, -1073741824,  -929887697,
+                                    -536870912,           0,   536870912,   929887697 };
+
+static void make_filters_from_proto(int (*filter)[8][2], const int *proto, int bands)
+{
+
+    const int *sinptr, *cosptr;
+    int s, c, sinhalf, coshalf;
+    int q, n;
+
+    if (bands == 4) {
+        sinptr = sintbl_4;
+        cosptr = costbl_4;
+        sinhalf = 759250125;
+        coshalf = 759250125;
+    } else if (bands == 8) {
+        sinptr = sintbl_8;
+        cosptr = costbl_8;
+        sinhalf = 410903207;
+        coshalf = 992008094;
+    } else {
+        sinptr = sintbl_12;
+        cosptr = costbl_12;
+        sinhalf = 277904834;
+        coshalf = 1037154959;
+    }
+
+    for (q = 0; q < bands; q++) {
+        for (n = 0; n < 7; n++) {
+            int theta = (q*(n-6) + (n>>1) - 3) % bands;
+
+            if (theta < 0)
+                theta += bands;
+            s = sinptr[theta];
+            c = cosptr[theta];
+
+            if (n & 1) {
+                theta = (int)(((int64_t)c * coshalf - (int64_t)s * sinhalf + 0x20000000) >> 30);
+                s = (int)(((int64_t)s * coshalf + (int64_t)c * sinhalf + 0x20000000) >> 30);
+                c = theta;
+            }
+            filter[q][n][0] = (int)(((int64_t)proto[n] * c + 0x20000000) >> 30);
+            filter[q][n][1] = -(int)(((int64_t)proto[n] * s + 0x20000000) >> 30);
+        }
+    }
+}
+
+static void ps_tableinit(void)
+{
+    static const int ipdopd_sin[] = { Q30(0), Q30(M_SQRT1_2), Q30(1), Q30( M_SQRT1_2), Q30( 0), Q30(-M_SQRT1_2), Q30(-1), Q30(-M_SQRT1_2) };
+    static const int ipdopd_cos[] = { Q30(1), Q30(M_SQRT1_2), Q30(0), Q30(-M_SQRT1_2), Q30(-1), Q30(-M_SQRT1_2), Q30( 0), Q30( M_SQRT1_2) };
+    int pd0, pd1, pd2;
+    int idx;
+
+    static const int alpha_tab[] =
+    {
+      Q30(1.5146213770f/M_PI), Q30(1.5181334019f/M_PI), Q30(1.5234849453f/M_PI), Q30(1.5369486809f/M_PI), Q30(1.5500687361f/M_PI), Q30(1.5679757595f/M_PI),
+      Q30(1.4455626011f/M_PI), Q30(1.4531552792f/M_PI), Q30(1.4648091793f/M_PI), Q30(1.4945238829f/M_PI), Q30(1.5239057541f/M_PI), Q30(1.5644006729f/M_PI),
+      Q30(1.3738563061f/M_PI), Q30(1.3851221800f/M_PI), Q30(1.4026404619f/M_PI), Q30(1.4484288692f/M_PI), Q30(1.4949874878f/M_PI), Q30(1.5604078770f/M_PI),
+      Q30(1.2645189762f/M_PI), Q30(1.2796478271f/M_PI), Q30(1.3038636446f/M_PI), Q30(1.3710125685f/M_PI), Q30(1.4443849325f/M_PI), Q30(1.5532352924f/M_PI),
+      Q30(1.1507037878f/M_PI), Q30(1.1669205427f/M_PI), Q30(1.1938756704f/M_PI), Q30(1.2754167318f/M_PI), Q30(1.3761177063f/M_PI), Q30(1.5429240465f/M_PI),
+      Q30(1.0079245567f/M_PI), Q30(1.0208238363f/M_PI), Q30(1.0433073044f/M_PI), Q30(1.1208510399f/M_PI), Q30(1.2424604893f/M_PI), Q30(1.5185726881f/M_PI),
+      Q30(0.8995233774f/M_PI), Q30(0.9069069624f/M_PI), Q30(0.9201194048f/M_PI), Q30(0.9698365927f/M_PI), Q30(1.0671583414f/M_PI), Q30(1.4647934437f/M_PI),
+      Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI),
+      Q30(0.6712729335f/M_PI), Q30(0.6638893485f/M_PI), Q30(0.6506769061f/M_PI), Q30(0.6009597182f/M_PI), Q30(0.5036380291f/M_PI), Q30(0.1060028747f/M_PI),
+      Q30(0.5628717542f/M_PI), Q30(0.5499725342f/M_PI), Q30(0.5274890065f/M_PI), Q30(0.4499453008f/M_PI), Q30(0.3283358216f/M_PI), Q30(0.0522236861f/M_PI),
+      Q30(0.4200925827f/M_PI), Q30(0.4038758278f/M_PI), Q30(0.3769206405f/M_PI), Q30(0.2953795493f/M_PI), Q30(0.1946786791f/M_PI), Q30(0.0278722942f/M_PI),
+      Q30(0.3062773645f/M_PI), Q30(0.2911485136f/M_PI), Q30(0.2669326365f/M_PI), Q30(0.1997837722f/M_PI), Q30(0.1264114529f/M_PI), Q30(0.0175609849f/M_PI),
+      Q30(0.1969399750f/M_PI), Q30(0.1856741160f/M_PI), Q30(0.1681558639f/M_PI), Q30(0.1223674342f/M_PI), Q30(0.0758088827f/M_PI), Q30(0.0103884479f/M_PI),
+      Q30(0.1252337098f/M_PI), Q30(0.1176410317f/M_PI), Q30(0.1059871912f/M_PI), Q30(0.0762724727f/M_PI), Q30(0.0468905345f/M_PI), Q30(0.0063956482f/M_PI),
+      Q30(0.0561749674f/M_PI), Q30(0.0526629239f/M_PI), Q30(0.0473113805f/M_PI), Q30(0.0338476151f/M_PI), Q30(0.0207276177f/M_PI), Q30(0.0028205961f/M_PI),
+      Q30(1.5676341057f/M_PI), Q30(1.5678333044f/M_PI), Q30(1.5681363344f/M_PI), Q30(1.5688960552f/M_PI), Q30(1.5696337223f/M_PI), Q30(1.5706381798f/M_PI),
+      Q30(1.5651730299f/M_PI), Q30(1.5655272007f/M_PI), Q30(1.5660660267f/M_PI), Q30(1.5674170256f/M_PI), Q30(1.5687289238f/M_PI), Q30(1.5705151558f/M_PI),
+      Q30(1.5607966185f/M_PI), Q30(1.5614265203f/M_PI), Q30(1.5623844862f/M_PI), Q30(1.5647867918f/M_PI), Q30(1.5671195984f/M_PI), Q30(1.5702962875f/M_PI),
+      Q30(1.5530153513f/M_PI), Q30(1.5541347265f/M_PI), Q30(1.5558375120f/M_PI), Q30(1.5601085424f/M_PI), Q30(1.5642569065f/M_PI), Q30(1.5699069500f/M_PI),
+      Q30(1.5391840935f/M_PI), Q30(1.5411708355f/M_PI), Q30(1.5441943407f/M_PI), Q30(1.5517836809f/M_PI), Q30(1.5591609478f/M_PI), Q30(1.5692136288f/M_PI),
+      Q30(1.5146213770f/M_PI), Q30(1.5181334019f/M_PI), Q30(1.5234849453f/M_PI), Q30(1.5369486809f/M_PI), Q30(1.5500687361f/M_PI), Q30(1.5679757595f/M_PI),
+      Q30(1.4915299416f/M_PI), Q30(1.4964480400f/M_PI), Q30(1.5039558411f/M_PI), Q30(1.5229074955f/M_PI), Q30(1.5414420366f/M_PI), Q30(1.5667995214f/M_PI),
+      Q30(1.4590617418f/M_PI), Q30(1.4658898115f/M_PI), Q30(1.4763505459f/M_PI), Q30(1.5029321909f/M_PI), Q30(1.5291173458f/M_PI), Q30(1.5651149750f/M_PI),
+      Q30(1.4136143923f/M_PI), Q30(1.4229322672f/M_PI), Q30(1.4373078346f/M_PI), Q30(1.4743183851f/M_PI), Q30(1.5113102198f/M_PI), Q30(1.5626684427f/M_PI),
+      Q30(1.3505556583f/M_PI), Q30(1.3628427982f/M_PI), Q30(1.3820509911f/M_PI), Q30(1.4327841997f/M_PI), Q30(1.4850014448f/M_PI), Q30(1.5590143204f/M_PI),
+      Q30(1.2645189762f/M_PI), Q30(1.2796478271f/M_PI), Q30(1.3038636446f/M_PI), Q30(1.3710125685f/M_PI), Q30(1.4443849325f/M_PI), Q30(1.5532352924f/M_PI),
+      Q30(1.1919227839f/M_PI), Q30(1.2081253529f/M_PI), Q30(1.2346779108f/M_PI), Q30(1.3123005629f/M_PI), Q30(1.4034168720f/M_PI), Q30(1.5471596718f/M_PI),
+      Q30(1.1061993837f/M_PI), Q30(1.1219338179f/M_PI), Q30(1.1484941244f/M_PI), Q30(1.2320860624f/M_PI), Q30(1.3421301842f/M_PI), Q30(1.5373806953f/M_PI),
+      Q30(1.0079245567f/M_PI), Q30(1.0208238363f/M_PI), Q30(1.0433073044f/M_PI), Q30(1.1208510399f/M_PI), Q30(1.2424604893f/M_PI), Q30(1.5185726881f/M_PI),
+      Q30(0.8995233774f/M_PI), Q30(0.9069069624f/M_PI), Q30(0.9201194048f/M_PI), Q30(0.9698365927f/M_PI), Q30(1.0671583414f/M_PI), Q30(1.4647934437f/M_PI),
+      Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI), Q30(0.7853981853f/M_PI),
+      Q30(0.6712729335f/M_PI), Q30(0.6638893485f/M_PI), Q30(0.6506769061f/M_PI), Q30(0.6009597182f/M_PI), Q30(0.5036380291f/M_PI), Q30(0.1060028747f/M_PI),
+      Q30(0.5628717542f/M_PI), Q30(0.5499725342f/M_PI), Q30(0.5274890065f/M_PI), Q30(0.4499453008f/M_PI), Q30(0.3283358216f/M_PI), Q30(0.0522236861f/M_PI),
+      Q30(0.4645969570f/M_PI), Q30(0.4488625824f/M_PI), Q30(0.4223022461f/M_PI), Q30(0.3387103081f/M_PI), Q30(0.2286661267f/M_PI), Q30(0.0334156826f/M_PI),
+      Q30(0.3788735867f/M_PI), Q30(0.3626709878f/M_PI), Q30(0.3361184299f/M_PI), Q30(0.2584958076f/M_PI), Q30(0.1673794836f/M_PI), Q30(0.0236366931f/M_PI),
+      Q30(0.3062773645f/M_PI), Q30(0.2911485136f/M_PI), Q30(0.2669326365f/M_PI), Q30(0.1997837722f/M_PI), Q30(0.1264114529f/M_PI), Q30(0.0175609849f/M_PI),
+      Q30(0.2202406377f/M_PI), Q30(0.2079535723f/M_PI), Q30(0.1887452900f/M_PI), Q30(0.1380121708f/M_PI), Q30(0.0857949182f/M_PI), Q30(0.0117820343f/M_PI),
+      Q30(0.1571819335f/M_PI), Q30(0.1478640437f/M_PI), Q30(0.1334884763f/M_PI), Q30(0.0964778885f/M_PI), Q30(0.0594860613f/M_PI), Q30(0.0081279324f/M_PI),
+      Q30(0.1117345318f/M_PI), Q30(0.1049065739f/M_PI), Q30(0.0944457650f/M_PI), Q30(0.0678641573f/M_PI), Q30(0.0416790098f/M_PI), Q30(0.0056813755f/M_PI),
+      Q30(0.0792663917f/M_PI), Q30(0.0743482932f/M_PI), Q30(0.0668405443f/M_PI), Q30(0.0478888862f/M_PI), Q30(0.0293543357f/M_PI), Q30(0.0039967746f/M_PI),
+      Q30(0.0561749674f/M_PI), Q30(0.0526629239f/M_PI), Q30(0.0473113805f/M_PI), Q30(0.0338476151f/M_PI), Q30(0.0207276177f/M_PI), Q30(0.0028205961f/M_PI),
+      Q30(0.0316122435f/M_PI), Q30(0.0296254847f/M_PI), Q30(0.0266019460f/M_PI), Q30(0.0190126132f/M_PI), Q30(0.0116353342f/M_PI), Q30(0.0015827164f/M_PI),
+      Q30(0.0177809205f/M_PI), Q30(0.0166615788f/M_PI), Q30(0.0149587989f/M_PI), Q30(0.0106877899f/M_PI), Q30(0.0065393616f/M_PI), Q30(0.0008894200f/M_PI),
+      Q30(0.0099996664f/M_PI), Q30(0.0093698399f/M_PI), Q30(0.0084118480f/M_PI), Q30(0.0060095116f/M_PI), Q30(0.0036767013f/M_PI), Q30(0.0005000498f/M_PI),
+      Q30(0.0056233541f/M_PI), Q30(0.0052691097f/M_PI), Q30(0.0047303112f/M_PI), Q30(0.0033792770f/M_PI), Q30(0.0020674451f/M_PI), Q30(0.0002811795f/M_PI),
+      Q30(0.0031622672f/M_PI), Q30(0.0029630491f/M_PI), Q30(0.0026600463f/M_PI), Q30(0.0019002859f/M_PI), Q30(0.0011625893f/M_PI), Q30(0.0001581155f/M_PI)
+    };
+
+    static const int gamma_tab[] =
+    {
+      Q30(0.0000000000f/M_PI), Q30(0.0195873566f/M_PI), Q30(0.0303316917f/M_PI), Q30(0.0448668823f/M_PI), Q30(0.0522258915f/M_PI), Q30(0.0561044961f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0433459543f/M_PI), Q30(0.0672172382f/M_PI), Q30(0.0997167900f/M_PI), Q30(0.1162951663f/M_PI), Q30(0.1250736862f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0672341362f/M_PI), Q30(0.1045235619f/M_PI), Q30(0.1558904350f/M_PI), Q30(0.1824723780f/M_PI), Q30(0.1966800541f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1011129096f/M_PI), Q30(0.1580764502f/M_PI), Q30(0.2387557179f/M_PI), Q30(0.2820728719f/M_PI), Q30(0.3058380187f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1315985769f/M_PI), Q30(0.2072522491f/M_PI), Q30(0.3188187480f/M_PI), Q30(0.3825501204f/M_PI), Q30(0.4193951190f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1603866369f/M_PI), Q30(0.2549437582f/M_PI), Q30(0.4029446840f/M_PI), Q30(0.4980689585f/M_PI), Q30(0.5615641475f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1736015975f/M_PI), Q30(0.2773745656f/M_PI), Q30(0.4461984038f/M_PI), Q30(0.5666890144f/M_PI), Q30(0.6686112881f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1784276664f/M_PI), Q30(0.2856673002f/M_PI), Q30(0.4630723596f/M_PI), Q30(0.5971632004f/M_PI), Q30(0.7603877187f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1736015975f/M_PI), Q30(0.2773745656f/M_PI), Q30(0.4461984038f/M_PI), Q30(0.5666890144f/M_PI), Q30(0.6686112881f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1603866369f/M_PI), Q30(0.2549437582f/M_PI), Q30(0.4029446840f/M_PI), Q30(0.4980689585f/M_PI), Q30(0.5615641475f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1315985769f/M_PI), Q30(0.2072522491f/M_PI), Q30(0.3188187480f/M_PI), Q30(0.3825501204f/M_PI), Q30(0.4193951190f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1011129096f/M_PI), Q30(0.1580764502f/M_PI), Q30(0.2387557179f/M_PI), Q30(0.2820728719f/M_PI), Q30(0.3058380187f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0672341362f/M_PI), Q30(0.1045235619f/M_PI), Q30(0.1558904350f/M_PI), Q30(0.1824723780f/M_PI), Q30(0.1966800541f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0433459543f/M_PI), Q30(0.0672172382f/M_PI), Q30(0.0997167900f/M_PI), Q30(0.1162951663f/M_PI), Q30(0.1250736862f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0195873566f/M_PI), Q30(0.0303316917f/M_PI), Q30(0.0448668823f/M_PI), Q30(0.0522258915f/M_PI), Q30(0.0561044961f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0011053939f/M_PI), Q30(0.0017089852f/M_PI), Q30(0.0025254129f/M_PI), Q30(0.0029398468f/M_PI), Q30(0.0031597170f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0019607407f/M_PI), Q30(0.0030395309f/M_PI), Q30(0.0044951206f/M_PI), Q30(0.0052305623f/M_PI), Q30(0.0056152637f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0034913034f/M_PI), Q30(0.0054070661f/M_PI), Q30(0.0079917293f/M_PI), Q30(0.0092999367f/M_PI), Q30(0.0099875759f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0062100487f/M_PI), Q30(0.0096135242f/M_PI), Q30(0.0142110568f/M_PI), Q30(0.0165348612f/M_PI), Q30(0.0177587029f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0110366223f/M_PI), Q30(0.0170863140f/M_PI), Q30(0.0252620988f/M_PI), Q30(0.0293955617f/M_PI), Q30(0.0315726399f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0195873566f/M_PI), Q30(0.0303316917f/M_PI), Q30(0.0448668823f/M_PI), Q30(0.0522258915f/M_PI), Q30(0.0561044961f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0275881495f/M_PI), Q30(0.0427365713f/M_PI), Q30(0.0632618815f/M_PI), Q30(0.0736731067f/M_PI), Q30(0.0791663304f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0387469754f/M_PI), Q30(0.0600636788f/M_PI), Q30(0.0890387669f/M_PI), Q30(0.1037906483f/M_PI), Q30(0.1115923747f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0541138873f/M_PI), Q30(0.0839984417f/M_PI), Q30(0.1248718798f/M_PI), Q30(0.1458375156f/M_PI), Q30(0.1569785923f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0747506917f/M_PI), Q30(0.1163287833f/M_PI), Q30(0.1738867164f/M_PI), Q30(0.2038587779f/M_PI), Q30(0.2199459076f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1011129096f/M_PI), Q30(0.1580764502f/M_PI), Q30(0.2387557179f/M_PI), Q30(0.2820728719f/M_PI), Q30(0.3058380187f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1212290376f/M_PI), Q30(0.1903949380f/M_PI), Q30(0.2907958031f/M_PI), Q30(0.3466993868f/M_PI), Q30(0.3782821596f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1418247074f/M_PI), Q30(0.2240308374f/M_PI), Q30(0.3474813402f/M_PI), Q30(0.4202919006f/M_PI), Q30(0.4637607038f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1603866369f/M_PI), Q30(0.2549437582f/M_PI), Q30(0.4029446840f/M_PI), Q30(0.4980689585f/M_PI), Q30(0.5615641475f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1736015975f/M_PI), Q30(0.2773745656f/M_PI), Q30(0.4461984038f/M_PI), Q30(0.5666890144f/M_PI), Q30(0.6686112881f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1784276664f/M_PI), Q30(0.2856673002f/M_PI), Q30(0.4630723596f/M_PI), Q30(0.5971632004f/M_PI), Q30(0.7603877187f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1736015975f/M_PI), Q30(0.2773745656f/M_PI), Q30(0.4461984038f/M_PI), Q30(0.5666890144f/M_PI), Q30(0.6686112881f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1603866369f/M_PI), Q30(0.2549437582f/M_PI), Q30(0.4029446840f/M_PI), Q30(0.4980689585f/M_PI), Q30(0.5615641475f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1418247074f/M_PI), Q30(0.2240308374f/M_PI), Q30(0.3474813402f/M_PI), Q30(0.4202919006f/M_PI), Q30(0.4637607038f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1212290376f/M_PI), Q30(0.1903949380f/M_PI), Q30(0.2907958031f/M_PI), Q30(0.3466993868f/M_PI), Q30(0.3782821596f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.1011129096f/M_PI), Q30(0.1580764502f/M_PI), Q30(0.2387557179f/M_PI), Q30(0.2820728719f/M_PI), Q30(0.3058380187f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0747506917f/M_PI), Q30(0.1163287833f/M_PI), Q30(0.1738867164f/M_PI), Q30(0.2038587779f/M_PI), Q30(0.2199459076f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0541138873f/M_PI), Q30(0.0839984417f/M_PI), Q30(0.1248718798f/M_PI), Q30(0.1458375156f/M_PI), Q30(0.1569785923f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0387469754f/M_PI), Q30(0.0600636788f/M_PI), Q30(0.0890387669f/M_PI), Q30(0.1037906483f/M_PI), Q30(0.1115923747f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0275881495f/M_PI), Q30(0.0427365713f/M_PI), Q30(0.0632618815f/M_PI), Q30(0.0736731067f/M_PI), Q30(0.0791663304f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0195873566f/M_PI), Q30(0.0303316917f/M_PI), Q30(0.0448668823f/M_PI), Q30(0.0522258915f/M_PI), Q30(0.0561044961f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0110366223f/M_PI), Q30(0.0170863140f/M_PI), Q30(0.0252620988f/M_PI), Q30(0.0293955617f/M_PI), Q30(0.0315726399f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0062100487f/M_PI), Q30(0.0096135242f/M_PI), Q30(0.0142110568f/M_PI), Q30(0.0165348612f/M_PI), Q30(0.0177587029f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0034913034f/M_PI), Q30(0.0054070661f/M_PI), Q30(0.0079917293f/M_PI), Q30(0.0092999367f/M_PI), Q30(0.0099875759f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0019607407f/M_PI), Q30(0.0030395309f/M_PI), Q30(0.0044951206f/M_PI), Q30(0.0052305623f/M_PI), Q30(0.0056152637f/M_PI),
+      Q30(0.0000000000f/M_PI), Q30(0.0011053939f/M_PI), Q30(0.0017089852f/M_PI), Q30(0.0025254129f/M_PI), Q30(0.0029398468f/M_PI), Q30(0.0031597170f/M_PI)
+    };
+
+    static const int iid_par_dequant_c1[] = {
+        //iid_par_dequant_default
+        Q30(1.41198278375959f), Q30(1.40313815268360f), Q30(1.38687670404960f), Q30(1.34839972492648f),
+        Q30(1.29124937110028f), Q30(1.19603741667993f), Q30(1.10737240362323f), Q30(1),
+        Q30(0.87961716655242f), Q30(0.75464859232732f), Q30(0.57677990744575f), Q30(0.42640143271122f),
+        Q30(0.27671828230984f), Q30(0.17664462766713f), Q30(0.07940162697653f),
+        //iid_par_dequant_fine
+        Q30(1.41420649135832f), Q30(1.41419120222364f), Q30(1.41414285699784f), Q30(1.41399000859438f),
+        Q30(1.41350698548044f), Q30(1.41198278375959f), Q30(1.40977302262355f), Q30(1.40539479488545f),
+        Q30(1.39677960498402f), Q30(1.38005309967827f), Q30(1.34839972492648f), Q30(1.31392017367631f),
+        Q30(1.26431008149654f), Q30(1.19603741667993f), Q30(1.10737240362323f), Q30(1),
+        Q30(0.87961716655242f), Q30(0.75464859232732f), Q30(0.63365607219232f), Q30(0.52308104267543f),
+        Q30(0.42640143271122f), Q30(0.30895540465965f), Q30(0.22137464873077f), Q30(0.15768788954414f),
+        Q30(0.11198225164225f), Q30(0.07940162697653f), Q30(0.04469901562677f), Q30(0.02514469318284f),
+        Q30(0.01414142856998f), Q30(0.00795258154731f), Q30(0.00447211359449f),
+    };
+
+    static const int acos_icc_invq[] = {
+        Q31(0), Q31(0.178427635f/M_PI), Q31(0.28566733f/M_PI), Q31(0.46307236f/M_PI), Q31(0.59716315f/M_PI), Q31(0.78539816f/M_PI), Q31(1.10030855f/M_PI), Q31(1.57079633f/M_PI)
+    };
+    int iid, icc;
+
+    int k, m;
+    static const int8_t f_center_20[] = {
+        -3, -1, 1, 3, 5, 7, 10, 14, 18, 22,
+    };
+    static const int32_t f_center_34[] = {
+      Q31(  2/768.0),Q31(  6/768.0),Q31(10/768.0),Q31(14/768.0),Q31( 18/768.0),Q31( 22/768.0),Q31( 26/768.0),Q31(30/768.0),
+      Q31( 34/768.0),Q31(-10/768.0),Q31(-6/768.0),Q31(-2/768.0),Q31( 51/768.0),Q31( 57/768.0),Q31( 15/768.0),Q31(21/768.0),
+      Q31( 27/768.0),Q31( 33/768.0),Q31(39/768.0),Q31(45/768.0),Q31( 54/768.0),Q31( 66/768.0),Q31( 78/768.0),Q31(42/768.0),
+      Q31(102/768.0),Q31( 66/768.0),Q31(78/768.0),Q31(90/768.0),Q31(102/768.0),Q31(114/768.0),Q31(126/768.0),Q31(90/768.0)
+    };
+    static const int fractional_delay_links[] = { Q31(0.43f), Q31(0.75f), Q31(0.347f) };
+    const int fractional_delay_gain = Q31(0.39f);
+
+    for (pd0 = 0; pd0 < 8; pd0++) {
+        int pd0_re = (ipdopd_cos[pd0]+2)>>2;
+        int pd0_im = (ipdopd_sin[pd0]+2)>>2;
+        for (pd1 = 0; pd1 < 8; pd1++) {
+            int pd1_re = ipdopd_cos[pd1] >> 1;
+            int pd1_im = ipdopd_sin[pd1] >> 1;
+            for (pd2 = 0; pd2 < 8; pd2++) {
+                int shift, round;
+                int pd2_re = ipdopd_cos[pd2];
+                int pd2_im = ipdopd_sin[pd2];
+                int re_smooth = pd0_re + pd1_re + pd2_re;
+                int im_smooth = pd0_im + pd1_im + pd2_im;
+
+                SoftFloat pd_mag = av_int2sf(((ipdopd_cos[(pd0-pd1)&7]+8)>>4) + ((ipdopd_cos[(pd0-pd2)&7]+4)>>3) +
+                                               ((ipdopd_cos[(pd1-pd2)&7]+2)>>2) + 0x15000000, 28);
+                pd_mag = av_div_sf(FLOAT_1, av_sqrt_sf(pd_mag));
+                shift = 30 - pd_mag.exp;
+                round = 1 << (shift-1);
+                pd_re_smooth[pd0*64+pd1*8+pd2] = (int)(((int64_t)re_smooth * pd_mag.mant + round) >> shift);
+                pd_im_smooth[pd0*64+pd1*8+pd2] = (int)(((int64_t)im_smooth * pd_mag.mant + round) >> shift);
+            }
+        }
+    }
+
+    idx = 0;
+    for (iid = 0; iid < 46; iid++) {
+        int c1, c2;
+
+        c1 = iid_par_dequant_c1[iid];
+        if (iid < 15)
+          c2 = iid_par_dequant_c1[14-iid];
+        else
+          c2 = iid_par_dequant_c1[60-iid];
+
+        for (icc = 0; icc < 8; icc++) {
+            /*if (PS_BASELINE || ps->icc_mode < 3)*/{
+                int alpha, beta;
+                int ca, sa, cb, sb;
+
+                alpha = acos_icc_invq[icc];
+                beta = (int)(((int64_t)alpha * 1518500250 + 0x40000000) >> 31);
+                alpha >>= 1;
+                beta = (int)(((int64_t)beta * (c1 - c2) + 0x40000000) >> 31);
+                av_sincos_sf(beta + alpha, &sa, &ca);
+                av_sincos_sf(beta - alpha, &sb, &cb);
+
+                HA[iid][icc][0] = (int)(((int64_t)c2 * ca + 0x20000000) >> 30);
+                HA[iid][icc][1] = (int)(((int64_t)c1 * cb + 0x20000000) >> 30);
+                HA[iid][icc][2] = (int)(((int64_t)c2 * sa + 0x20000000) >> 30);
+                HA[iid][icc][3] = (int)(((int64_t)c1 * sb + 0x20000000) >> 30);
+            } /* else */ {
+                int alpha_int, gamma_int;
+                int alpha_c_int, alpha_s_int, gamma_c_int, gamma_s_int;
+
+                alpha_int = alpha_tab[idx];
+                gamma_int = gamma_tab[idx];
+
+                av_sincos_sf(alpha_int, &alpha_s_int, &alpha_c_int);
+                av_sincos_sf(gamma_int, &gamma_s_int, &gamma_c_int);
+
+                alpha_c_int = (int)(((int64_t)alpha_c_int * 1518500250 + 0x20000000) >> 30);
+                alpha_s_int = (int)(((int64_t)alpha_s_int * 1518500250 + 0x20000000) >> 30);
+
+                HB[iid][icc][0] = (int)(((int64_t)alpha_c_int * gamma_c_int + 0x20000000) >> 30);
+                HB[iid][icc][1] = (int)(((int64_t)alpha_s_int * gamma_c_int + 0x20000000) >> 30);
+                HB[iid][icc][2] = -(int)(((int64_t)alpha_s_int * gamma_s_int + 0x20000000) >> 30);
+                HB[iid][icc][3] = (int)(((int64_t)alpha_c_int * gamma_s_int + 0x20000000) >> 30);
+            }
+
+            if (icc < 5 || icc > 6)
+              idx++;
+        }
+    }
+
+    for (k = 0; k < NR_ALLPASS_BANDS20; k++) {
+        int theta;
+        int64_t f_center;
+        int c, s;
+
+        if (k < FF_ARRAY_ELEMS(f_center_20))
+          f_center = f_center_20[k];
+        else
+          f_center = (k << 3) - 52;
+
+        for (m = 0; m < PS_AP_LINKS; m++) {
+            theta = (int)(((int64_t)fractional_delay_links[m] * f_center + 8) >> 4);
+            av_sincos_sf(-theta, &s, &c);
+            Q_fract_allpass[0][k][m][0] = c;
+            Q_fract_allpass[0][k][m][1] = s;
+        }
+
+        theta = (int)(((int64_t)fractional_delay_gain * f_center + 8) >> 4);
+        av_sincos_sf(-theta, &s, &c);
+        phi_fract[0][k][0] = c;
+        phi_fract[0][k][1] = s;
+    }
+
+    for (k = 0; k < NR_ALLPASS_BANDS34; k++) {
+        int theta, f_center;
+        int c, s;
+
+        if (k < FF_ARRAY_ELEMS(f_center_34))
+            f_center = f_center_34[k];
+        else
+            f_center = ((int64_t)k << 26) - (53 << 25);
+
+        for (m = 0; m < PS_AP_LINKS; m++) {
+            theta = (int)(((int64_t)fractional_delay_links[m] * f_center + 0x10000000) >> 27);
+            av_sincos_sf(-theta, &s, &c);
+            Q_fract_allpass[1][k][m][0] = c;
+            Q_fract_allpass[1][k][m][1] = s;
+        }
+
+        theta = (int)(((int64_t)fractional_delay_gain * f_center + 0x10000000) >> 27);
+        av_sincos_sf(-theta, &s, &c);
+        phi_fract[1][k][0] = c;
+        phi_fract[1][k][1] = s;
+    }
+
+    make_filters_from_proto(f20_0_8,  g0_Q8,   8);
+    make_filters_from_proto(f34_0_12, g0_Q12, 12);
+    make_filters_from_proto(f34_1_8,  g1_Q8,   8);
+    make_filters_from_proto(f34_2_4,  g2_Q4,   4);
+}
+#endif /* CONFIG_HARDCODED_TABLES */
+
+#endif /* AVCODEC_AACPS_FIXED_TABLEGEN_H */
diff --git a/libavcodec/aacps_float.c b/libavcodec/aacps_float.c
new file mode 100644
index 00000000..73259c10
--- /dev/null
+++ b/libavcodec/aacps_float.c
@@ -0,0 +1,24 @@
+/*
+ * MPEG-4 Parametric Stereo decoding functions
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FIXED 0
+
+#include "aacps.c"
diff --git a/libavcodec/aacps_tablegen.c b/libavcodec/aacps_tablegen.c
index f56930b9..26a6752f 100644
--- a/libavcodec/aacps_tablegen.c
+++ b/libavcodec/aacps_tablegen.c
@@ -20,74 +20,5 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <stdlib.h>
-#define CONFIG_HARDCODED_TABLES 0
-#include "aacps_tablegen.h"
-#include "tableprint.h"
-
-void write_float_3d_array (const void *p, int b, int c, int d)
-{
-    int i;
-    const float *f = p;
-    for (i = 0; i < b; i++) {
-        printf("{\n");
-        write_float_2d_array(f, c, d);
-        printf("},\n");
-        f += c * d;
-    }
-}
-
-void write_float_4d_array (const void *p, int a, int b, int c, int d)
-{
-    int i;
-    const float *f = p;
-    for (i = 0; i < a; i++) {
-        printf("{\n");
-        write_float_3d_array(f, b, c, d);
-        printf("},\n");
-        f += b * c * d;
-    }
-}
-
-int main(void)
-{
-    ps_tableinit();
-
-    write_fileheader();
-
-    printf("static const float pd_re_smooth[8*8*8] = {\n");
-    write_float_array(pd_re_smooth, 8*8*8);
-    printf("};\n");
-    printf("static const float pd_im_smooth[8*8*8] = {\n");
-    write_float_array(pd_im_smooth, 8*8*8);
-    printf("};\n");
-
-    printf("static const float HA[46][8][4] = {\n");
-    write_float_3d_array(HA, 46, 8, 4);
-    printf("};\n");
-    printf("static const float HB[46][8][4] = {\n");
-    write_float_3d_array(HB, 46, 8, 4);
-    printf("};\n");
-
-    printf("static const DECLARE_ALIGNED(16, float, f20_0_8)[8][8][2] = {\n");
-    write_float_3d_array(f20_0_8, 8, 8, 2);
-    printf("};\n");
-    printf("static const DECLARE_ALIGNED(16, float, f34_0_12)[12][8][2] = {\n");
-    write_float_3d_array(f34_0_12, 12, 8, 2);
-    printf("};\n");
-    printf("static const DECLARE_ALIGNED(16, float, f34_1_8)[8][8][2] = {\n");
-    write_float_3d_array(f34_1_8, 8, 8, 2);
-    printf("};\n");
-    printf("static const DECLARE_ALIGNED(16, float, f34_2_4)[4][8][2] = {\n");
-    write_float_3d_array(f34_2_4, 4, 8, 2);
-    printf("};\n");
-
-    printf("static const DECLARE_ALIGNED(16, float, Q_fract_allpass)[2][50][3][2] = {\n");
-    write_float_4d_array(Q_fract_allpass, 2, 50, 3, 2);
-    printf("};\n");
-    printf("static const DECLARE_ALIGNED(16, float, phi_fract)[2][50][2] = {\n");
-    write_float_3d_array(phi_fract, 2, 50, 2);
-    printf("};\n");
-
-    return 0;
-}
+#define USE_FIXED 0
+#include "aacps_tablegen_template.c"
diff --git a/libavcodec/aacps_tablegen.h b/libavcodec/aacps_tablegen.h
index ca1112dd..0ac4f68d 100644
--- a/libavcodec/aacps_tablegen.h
+++ b/libavcodec/aacps_tablegen.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AACPS_TABLEGEN_H
-#define AACPS_TABLEGEN_H
+#ifndef AVCODEC_AACPS_TABLEGEN_H
+#define AVCODEC_AACPS_TABLEGEN_H
 
 #include <math.h>
 #include <stdint.h>
@@ -136,7 +136,7 @@ static av_cold void ps_tableinit(void)
                 float pd2_im = ipdopd_sin[pd2];
                 float re_smooth = 0.25f * pd0_re + 0.5f * pd1_re + pd2_re;
                 float im_smooth = 0.25f * pd0_im + 0.5f * pd1_im + pd2_im;
-                float pd_mag = 1 / sqrt(im_smooth * im_smooth + re_smooth * re_smooth);
+                float pd_mag = 1 / hypot(im_smooth, re_smooth);
                 pd_re_smooth[pd0*64+pd1*8+pd2] = re_smooth * pd_mag;
                 pd_im_smooth[pd0*64+pd1*8+pd2] = im_smooth * pd_mag;
             }
@@ -214,4 +214,4 @@ static av_cold void ps_tableinit(void)
 }
 #endif /* CONFIG_HARDCODED_TABLES */
 
-#endif /* AACPS_TABLEGEN_H */
+#endif /* AVCODEC_AACPS_TABLEGEN_H */
diff --git a/libavcodec/aacps_tablegen_template.c b/libavcodec/aacps_tablegen_template.c
new file mode 100644
index 00000000..341bd444
--- /dev/null
+++ b/libavcodec/aacps_tablegen_template.c
@@ -0,0 +1,107 @@
+/*
+ * Generate a header file for hardcoded Parametric Stereo tables
+ *
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#define CONFIG_HARDCODED_TABLES 0
+#include "aac_defines.h"
+
+#if USE_FIXED
+#define TYPE_NAME "int32_t"
+typedef int32_t INT32FLOAT;
+#define ARRAY_RENAME(x) write_int32_t_ ## x
+#define ARRAY_URENAME(x) write_uint32_t_ ## x
+#include "aacps_fixed_tablegen.h"
+#else
+#define TYPE_NAME "float"
+typedef float INT32FLOAT;
+#define ARRAY_RENAME(x) write_float_ ## x
+#define ARRAY_URENAME(x) write_float_ ## x
+#include "aacps_tablegen.h"
+#endif /* USE_FIXED */
+#include "tableprint.h"
+
+void ARRAY_RENAME(3d_array) (const void *p, int b, int c, int d)
+{
+    int i;
+    const INT32FLOAT *f = p;
+    for (i = 0; i < b; i++) {
+        printf("{\n");
+        ARRAY_URENAME(2d_array)(f, c, d);
+        printf("},\n");
+        f += c * d;
+    }
+}
+
+void ARRAY_RENAME(4d_array) (const void *p, int a, int b, int c, int d)
+{
+    int i;
+    const INT32FLOAT *f = p;
+    for (i = 0; i < a; i++) {
+        printf("{\n");
+        ARRAY_RENAME(3d_array)(f, b, c, d);
+        printf("},\n");
+        f += b * c * d;
+    }
+}
+
+int main(void)
+{
+    ps_tableinit();
+
+    write_fileheader();
+
+    printf("static const %s pd_re_smooth[8*8*8] = {\n", TYPE_NAME);
+    ARRAY_RENAME(array)(pd_re_smooth, 8*8*8);
+    printf("};\n");
+    printf("static const %s pd_im_smooth[8*8*8] = {\n", TYPE_NAME);
+    ARRAY_RENAME(array)(pd_im_smooth, 8*8*8);
+    printf("};\n");
+
+    printf("static const %s HA[46][8][4] = {\n", TYPE_NAME);
+    ARRAY_RENAME(3d_array)(HA, 46, 8, 4);
+    printf("};\n");
+    printf("static const %s HB[46][8][4] = {\n", TYPE_NAME);
+    ARRAY_RENAME(3d_array)(HB, 46, 8, 4);
+    printf("};\n");
+
+    printf("static const DECLARE_ALIGNED(16, %s, f20_0_8)[8][8][2] = {\n", TYPE_NAME);
+    ARRAY_RENAME(3d_array)(f20_0_8, 8, 8, 2);
+    printf("};\n");
+    printf("static const DECLARE_ALIGNED(16, %s, f34_0_12)[12][8][2] = {\n", TYPE_NAME);
+    ARRAY_RENAME(3d_array)(f34_0_12, 12, 8, 2);
+    printf("};\n");
+    printf("static const DECLARE_ALIGNED(16, %s, f34_1_8)[8][8][2] = {\n", TYPE_NAME);
+    ARRAY_RENAME(3d_array)(f34_1_8, 8, 8, 2);
+    printf("};\n");
+    printf("static const DECLARE_ALIGNED(16, %s, f34_2_4)[4][8][2] = {\n", TYPE_NAME);
+    ARRAY_RENAME(3d_array)(f34_2_4, 4, 8, 2);
+    printf("};\n");
+
+    printf("static const DECLARE_ALIGNED(16, %s, Q_fract_allpass)[2][50][3][2] = {\n", TYPE_NAME);
+    ARRAY_RENAME(4d_array)(Q_fract_allpass, 2, 50, 3, 2);
+    printf("};\n");
+    printf("static const DECLARE_ALIGNED(16, %s, phi_fract)[2][50][2] = {\n", TYPE_NAME);
+    ARRAY_RENAME(3d_array)(phi_fract, 2, 50, 2);
+    printf("};\n");
+
+    return 0;
+}
diff --git a/libavcodec/aacpsdata.c b/libavcodec/aacpsdata.c
index 7431caeb..5c1a1b0f 100644
--- a/libavcodec/aacpsdata.c
+++ b/libavcodec/aacpsdata.c
@@ -157,7 +157,7 @@ static const int8_t k_to_i_34[] = {
     33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33
 };
 
-static const float g1_Q2[] = {
-    0.0f,  0.01899487526049f, 0.0f, -0.07293139167538f,
-    0.0f,  0.30596630545168f, 0.5f
+static const INTFLOAT g1_Q2[] = {
+    Q31(0.0f),  Q31(0.01899487526049f), Q31(0.0f), Q31(-0.07293139167538f),
+    Q31(0.0f),  Q31(0.30596630545168f), Q31(0.5f)
 };
diff --git a/libavcodec/aacpsdsp.c b/libavcodec/aacpsdsp.c
deleted file mode 100644
index 5dc1a6ab..00000000
--- a/libavcodec/aacpsdsp.c
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-#include "libavutil/attributes.h"
-#include "aacpsdsp.h"
-
-static void ps_add_squares_c(float *dst, const float (*src)[2], int n)
-{
-    int i;
-    for (i = 0; i < n; i++)
-        dst[i] += src[i][0] * src[i][0] + src[i][1] * src[i][1];
-}
-
-static void ps_mul_pair_single_c(float (*dst)[2], float (*src0)[2], float *src1,
-                                 int n)
-{
-    int i;
-    for (i = 0; i < n; i++) {
-        dst[i][0] = src0[i][0] * src1[i];
-        dst[i][1] = src0[i][1] * src1[i];
-    }
-}
-
-static void ps_hybrid_analysis_c(float (*out)[2], float (*in)[2],
-                                 const float (*filter)[8][2],
-                                 int stride, int n)
-{
-    int i, j;
-
-    for (i = 0; i < n; i++) {
-        float sum_re = filter[i][6][0] * in[6][0];
-        float sum_im = filter[i][6][0] * in[6][1];
-
-        for (j = 0; j < 6; j++) {
-            float in0_re = in[j][0];
-            float in0_im = in[j][1];
-            float in1_re = in[12-j][0];
-            float in1_im = in[12-j][1];
-            sum_re += filter[i][j][0] * (in0_re + in1_re) -
-                      filter[i][j][1] * (in0_im - in1_im);
-            sum_im += filter[i][j][0] * (in0_im + in1_im) +
-                      filter[i][j][1] * (in0_re - in1_re);
-        }
-        out[i * stride][0] = sum_re;
-        out[i * stride][1] = sum_im;
-    }
-}
-
-static void ps_hybrid_analysis_ileave_c(float (*out)[32][2], float L[2][38][64],
-                                        int i, int len)
-{
-    int j;
-
-    for (; i < 64; i++) {
-        for (j = 0; j < len; j++) {
-            out[i][j][0] = L[0][j][i];
-            out[i][j][1] = L[1][j][i];
-        }
-    }
-}
-
-static void ps_hybrid_synthesis_deint_c(float out[2][38][64],
-                                        float (*in)[32][2],
-                                        int i, int len)
-{
-    int n;
-
-    for (; i < 64; i++) {
-        for (n = 0; n < len; n++) {
-            out[0][n][i] = in[i][n][0];
-            out[1][n][i] = in[i][n][1];
-        }
-    }
-}
-
-static void ps_decorrelate_c(float (*out)[2], float (*delay)[2],
-                             float (*ap_delay)[PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2],
-                             const float phi_fract[2], const float (*Q_fract)[2],
-                             const float *transient_gain,
-                             float g_decay_slope,
-                             int len)
-{
-    static const float a[] = { 0.65143905753106f,
-                               0.56471812200776f,
-                               0.48954165955695f };
-    float ag[PS_AP_LINKS];
-    int m, n;
-
-    for (m = 0; m < PS_AP_LINKS; m++)
-        ag[m] = a[m] * g_decay_slope;
-
-    for (n = 0; n < len; n++) {
-        float in_re = delay[n][0] * phi_fract[0] - delay[n][1] * phi_fract[1];
-        float in_im = delay[n][0] * phi_fract[1] + delay[n][1] * phi_fract[0];
-        for (m = 0; m < PS_AP_LINKS; m++) {
-            float a_re                = ag[m] * in_re;
-            float a_im                = ag[m] * in_im;
-            float link_delay_re       = ap_delay[m][n+2-m][0];
-            float link_delay_im       = ap_delay[m][n+2-m][1];
-            float fractional_delay_re = Q_fract[m][0];
-            float fractional_delay_im = Q_fract[m][1];
-            float apd_re = in_re;
-            float apd_im = in_im;
-            in_re = link_delay_re * fractional_delay_re -
-                    link_delay_im * fractional_delay_im - a_re;
-            in_im = link_delay_re * fractional_delay_im +
-                    link_delay_im * fractional_delay_re - a_im;
-            ap_delay[m][n+5][0] = apd_re + ag[m] * in_re;
-            ap_delay[m][n+5][1] = apd_im + ag[m] * in_im;
-        }
-        out[n][0] = transient_gain[n] * in_re;
-        out[n][1] = transient_gain[n] * in_im;
-    }
-}
-
-static void ps_stereo_interpolate_c(float (*l)[2], float (*r)[2],
-                                    float h[2][4], float h_step[2][4],
-                                    int len)
-{
-    float h0 = h[0][0];
-    float h1 = h[0][1];
-    float h2 = h[0][2];
-    float h3 = h[0][3];
-    float hs0 = h_step[0][0];
-    float hs1 = h_step[0][1];
-    float hs2 = h_step[0][2];
-    float hs3 = h_step[0][3];
-    int n;
-
-    for (n = 0; n < len; n++) {
-        //l is s, r is d
-        float l_re = l[n][0];
-        float l_im = l[n][1];
-        float r_re = r[n][0];
-        float r_im = r[n][1];
-        h0 += hs0;
-        h1 += hs1;
-        h2 += hs2;
-        h3 += hs3;
-        l[n][0] = h0 * l_re + h2 * r_re;
-        l[n][1] = h0 * l_im + h2 * r_im;
-        r[n][0] = h1 * l_re + h3 * r_re;
-        r[n][1] = h1 * l_im + h3 * r_im;
-    }
-}
-
-static void ps_stereo_interpolate_ipdopd_c(float (*l)[2], float (*r)[2],
-                                           float h[2][4], float h_step[2][4],
-                                           int len)
-{
-    float h00  = h[0][0],      h10  = h[1][0];
-    float h01  = h[0][1],      h11  = h[1][1];
-    float h02  = h[0][2],      h12  = h[1][2];
-    float h03  = h[0][3],      h13  = h[1][3];
-    float hs00 = h_step[0][0], hs10 = h_step[1][0];
-    float hs01 = h_step[0][1], hs11 = h_step[1][1];
-    float hs02 = h_step[0][2], hs12 = h_step[1][2];
-    float hs03 = h_step[0][3], hs13 = h_step[1][3];
-    int n;
-
-    for (n = 0; n < len; n++) {
-        //l is s, r is d
-        float l_re = l[n][0];
-        float l_im = l[n][1];
-        float r_re = r[n][0];
-        float r_im = r[n][1];
-        h00 += hs00;
-        h01 += hs01;
-        h02 += hs02;
-        h03 += hs03;
-        h10 += hs10;
-        h11 += hs11;
-        h12 += hs12;
-        h13 += hs13;
-
-        l[n][0] = h00 * l_re + h02 * r_re - h10 * l_im - h12 * r_im;
-        l[n][1] = h00 * l_im + h02 * r_im + h10 * l_re + h12 * r_re;
-        r[n][0] = h01 * l_re + h03 * r_re - h11 * l_im - h13 * r_im;
-        r[n][1] = h01 * l_im + h03 * r_im + h11 * l_re + h13 * r_re;
-    }
-}
-
-av_cold void ff_psdsp_init(PSDSPContext *s)
-{
-    s->add_squares            = ps_add_squares_c;
-    s->mul_pair_single        = ps_mul_pair_single_c;
-    s->hybrid_analysis        = ps_hybrid_analysis_c;
-    s->hybrid_analysis_ileave = ps_hybrid_analysis_ileave_c;
-    s->hybrid_synthesis_deint = ps_hybrid_synthesis_deint_c;
-    s->decorrelate            = ps_decorrelate_c;
-    s->stereo_interpolate[0]  = ps_stereo_interpolate_c;
-    s->stereo_interpolate[1]  = ps_stereo_interpolate_ipdopd_c;
-
-    if (ARCH_ARM)
-        ff_psdsp_init_arm(s);
-    if (ARCH_MIPS)
-        ff_psdsp_init_mips(s);
-}
diff --git a/libavcodec/aacpsdsp.h b/libavcodec/aacpsdsp.h
index 0ef30236..ad9bbb81 100644
--- a/libavcodec/aacpsdsp.h
+++ b/libavcodec/aacpsdsp.h
@@ -18,37 +18,40 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef LIBAVCODEC_AACPSDSP_H
-#define LIBAVCODEC_AACPSDSP_H
+#ifndef AVCODEC_AACPSDSP_H
+#define AVCODEC_AACPSDSP_H
+
+#include "aac_defines.h"
 
 #define PS_QMF_TIME_SLOTS 32
 #define PS_AP_LINKS 3
 #define PS_MAX_AP_DELAY 5
 
 typedef struct PSDSPContext {
-    void (*add_squares)(float *dst, const float (*src)[2], int n);
-    void (*mul_pair_single)(float (*dst)[2], float (*src0)[2], float *src1,
+    void (*add_squares)(INTFLOAT *dst, const INTFLOAT (*src)[2], int n);
+    void (*mul_pair_single)(INTFLOAT (*dst)[2], INTFLOAT (*src0)[2], INTFLOAT *src1,
                             int n);
-    void (*hybrid_analysis)(float (*out)[2], float (*in)[2],
-                            const float (*filter)[8][2],
+    void (*hybrid_analysis)(INTFLOAT (*out)[2], INTFLOAT (*in)[2],
+                            const INTFLOAT (*filter)[8][2],
                             int stride, int n);
-    void (*hybrid_analysis_ileave)(float (*out)[32][2], float L[2][38][64],
+    void (*hybrid_analysis_ileave)(INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
                                    int i, int len);
-    void (*hybrid_synthesis_deint)(float out[2][38][64], float (*in)[32][2],
+    void (*hybrid_synthesis_deint)(INTFLOAT out[2][38][64], INTFLOAT (*in)[32][2],
                                    int i, int len);
-    void (*decorrelate)(float (*out)[2], float (*delay)[2],
-                        float (*ap_delay)[PS_QMF_TIME_SLOTS+PS_MAX_AP_DELAY][2],
-                        const float phi_fract[2], const float (*Q_fract)[2],
-                        const float *transient_gain,
-                        float g_decay_slope,
+    void (*decorrelate)(INTFLOAT (*out)[2], INTFLOAT (*delay)[2],
+                        INTFLOAT (*ap_delay)[PS_QMF_TIME_SLOTS+PS_MAX_AP_DELAY][2],
+                        const INTFLOAT phi_fract[2], const INTFLOAT (*Q_fract)[2],
+                        const INTFLOAT *transient_gain,
+                        INTFLOAT g_decay_slope,
                         int len);
-    void (*stereo_interpolate[2])(float (*l)[2], float (*r)[2],
-                                  float h[2][4], float h_step[2][4],
+    void (*stereo_interpolate[2])(INTFLOAT (*l)[2], INTFLOAT (*r)[2],
+                                  INTFLOAT h[2][4], INTFLOAT h_step[2][4],
                                   int len);
 } PSDSPContext;
 
-void ff_psdsp_init(PSDSPContext *s);
+void AAC_RENAME(ff_psdsp_init)(PSDSPContext *s);
 void ff_psdsp_init_arm(PSDSPContext *s);
 void ff_psdsp_init_mips(PSDSPContext *s);
+void ff_psdsp_init_x86(PSDSPContext *s);
 
-#endif /* LIBAVCODEC_AACPSDSP_H */
+#endif /* AVCODEC_AACPSDSP_H */
diff --git a/libavcodec/aacpsdsp_fixed.c b/libavcodec/aacpsdsp_fixed.c
new file mode 100644
index 00000000..24132951
--- /dev/null
+++ b/libavcodec/aacpsdsp_fixed.c
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FIXED 1
+
+#include "aacpsdsp_template.c"
diff --git a/libavcodec/aacpsdsp_float.c b/libavcodec/aacpsdsp_float.c
new file mode 100644
index 00000000..99aa650a
--- /dev/null
+++ b/libavcodec/aacpsdsp_float.c
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FIXED 0
+
+#include "aacpsdsp_template.c"
diff --git a/libavcodec/aacpsdsp_template.c b/libavcodec/aacpsdsp_template.c
new file mode 100644
index 00000000..3049ce8b
--- /dev/null
+++ b/libavcodec/aacpsdsp_template.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Note: Rounding-to-nearest used unless otherwise stated
+ *
+ */
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "aacpsdsp.h"
+
+static void ps_add_squares_c(INTFLOAT *dst, const INTFLOAT (*src)[2], int n)
+{
+    int i;
+    for (i = 0; i < n; i++)
+        dst[i] += AAC_MADD28(src[i][0], src[i][0], src[i][1], src[i][1]);
+}
+
+static void ps_mul_pair_single_c(INTFLOAT (*dst)[2], INTFLOAT (*src0)[2], INTFLOAT *src1,
+                                 int n)
+{
+    int i;
+    for (i = 0; i < n; i++) {
+        dst[i][0] = AAC_MUL16(src0[i][0], src1[i]);
+        dst[i][1] = AAC_MUL16(src0[i][1], src1[i]);
+    }
+}
+
+static void ps_hybrid_analysis_c(INTFLOAT (*out)[2], INTFLOAT (*in)[2],
+                                 const INTFLOAT (*filter)[8][2],
+                                 int stride, int n)
+{
+    int i, j;
+
+    for (i = 0; i < n; i++) {
+        INT64FLOAT sum_re = (INT64FLOAT)filter[i][6][0] * in[6][0];
+        INT64FLOAT sum_im = (INT64FLOAT)filter[i][6][0] * in[6][1];
+
+        for (j = 0; j < 6; j++) {
+            INTFLOAT in0_re = in[j][0];
+            INTFLOAT in0_im = in[j][1];
+            INTFLOAT in1_re = in[12-j][0];
+            INTFLOAT in1_im = in[12-j][1];
+            sum_re += (INT64FLOAT)filter[i][j][0] * (in0_re + in1_re) -
+                      (INT64FLOAT)filter[i][j][1] * (in0_im - in1_im);
+            sum_im += (INT64FLOAT)filter[i][j][0] * (in0_im + in1_im) +
+                      (INT64FLOAT)filter[i][j][1] * (in0_re - in1_re);
+        }
+#if USE_FIXED
+        out[i * stride][0] = (int)((sum_re + 0x40000000) >> 31);
+        out[i * stride][1] = (int)((sum_im + 0x40000000) >> 31);
+#else
+        out[i * stride][0] = sum_re;
+        out[i * stride][1] = sum_im;
+#endif /* USE_FIXED */
+    }
+}
+static void ps_hybrid_analysis_ileave_c(INTFLOAT (*out)[32][2], INTFLOAT L[2][38][64],
+                                      int i, int len)
+{
+    int j;
+
+    for (; i < 64; i++) {
+        for (j = 0; j < len; j++) {
+            out[i][j][0] = L[0][j][i];
+            out[i][j][1] = L[1][j][i];
+        }
+    }
+}
+
+static void ps_hybrid_synthesis_deint_c(INTFLOAT out[2][38][64],
+                                      INTFLOAT (*in)[32][2],
+                                      int i, int len)
+{
+    int n;
+
+    for (; i < 64; i++) {
+        for (n = 0; n < len; n++) {
+            out[0][n][i] = in[i][n][0];
+            out[1][n][i] = in[i][n][1];
+        }
+    }
+}
+
+static void ps_decorrelate_c(INTFLOAT (*out)[2], INTFLOAT (*delay)[2],
+                             INTFLOAT (*ap_delay)[PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2],
+                             const INTFLOAT phi_fract[2], const INTFLOAT (*Q_fract)[2],
+                             const INTFLOAT *transient_gain,
+                             INTFLOAT g_decay_slope,
+                             int len)
+{
+    static const INTFLOAT a[] = { Q31(0.65143905753106f),
+                               Q31(0.56471812200776f),
+                               Q31(0.48954165955695f) };
+    INTFLOAT ag[PS_AP_LINKS];
+    int m, n;
+
+    for (m = 0; m < PS_AP_LINKS; m++)
+        ag[m] = AAC_MUL30(a[m], g_decay_slope);
+
+    for (n = 0; n < len; n++) {
+        INTFLOAT in_re = AAC_MSUB30(delay[n][0], phi_fract[0], delay[n][1], phi_fract[1]);
+        INTFLOAT in_im = AAC_MADD30(delay[n][0], phi_fract[1], delay[n][1], phi_fract[0]);
+        for (m = 0; m < PS_AP_LINKS; m++) {
+            INTFLOAT a_re                = AAC_MUL31(ag[m], in_re);
+            INTFLOAT a_im                = AAC_MUL31(ag[m], in_im);
+            INTFLOAT link_delay_re       = ap_delay[m][n+2-m][0];
+            INTFLOAT link_delay_im       = ap_delay[m][n+2-m][1];
+            INTFLOAT fractional_delay_re = Q_fract[m][0];
+            INTFLOAT fractional_delay_im = Q_fract[m][1];
+            INTFLOAT apd_re = in_re;
+            INTFLOAT apd_im = in_im;
+            in_re = AAC_MSUB30(link_delay_re, fractional_delay_re,
+                    link_delay_im, fractional_delay_im);
+            in_re -= a_re;
+            in_im = AAC_MADD30(link_delay_re, fractional_delay_im,
+                    link_delay_im, fractional_delay_re);
+            in_im -= a_im;
+            ap_delay[m][n+5][0] = apd_re + AAC_MUL31(ag[m], in_re);
+            ap_delay[m][n+5][1] = apd_im + AAC_MUL31(ag[m], in_im);
+        }
+        out[n][0] = AAC_MUL16(transient_gain[n], in_re);
+        out[n][1] = AAC_MUL16(transient_gain[n], in_im);
+    }
+}
+
+static void ps_stereo_interpolate_c(INTFLOAT (*l)[2], INTFLOAT (*r)[2],
+                                    INTFLOAT h[2][4], INTFLOAT h_step[2][4],
+                                    int len)
+{
+    INTFLOAT h0 = h[0][0];
+    INTFLOAT h1 = h[0][1];
+    INTFLOAT h2 = h[0][2];
+    INTFLOAT h3 = h[0][3];
+    INTFLOAT hs0 = h_step[0][0];
+    INTFLOAT hs1 = h_step[0][1];
+    INTFLOAT hs2 = h_step[0][2];
+    INTFLOAT hs3 = h_step[0][3];
+    int n;
+
+    for (n = 0; n < len; n++) {
+        //l is s, r is d
+        INTFLOAT l_re = l[n][0];
+        INTFLOAT l_im = l[n][1];
+        INTFLOAT r_re = r[n][0];
+        INTFLOAT r_im = r[n][1];
+        h0 += hs0;
+        h1 += hs1;
+        h2 += hs2;
+        h3 += hs3;
+        l[n][0] = AAC_MADD30(h0,  l_re,  h2, r_re);
+        l[n][1] = AAC_MADD30(h0,  l_im,  h2,  r_im);
+        r[n][0] = AAC_MADD30(h1,  l_re,  h3,  r_re);
+        r[n][1] = AAC_MADD30(h1,  l_im,  h3,  r_im);
+    }
+}
+
+static void ps_stereo_interpolate_ipdopd_c(INTFLOAT (*l)[2], INTFLOAT (*r)[2],
+                                           INTFLOAT h[2][4], INTFLOAT h_step[2][4],
+                                           int len)
+{
+    INTFLOAT h00  = h[0][0],      h10  = h[1][0];
+    INTFLOAT h01  = h[0][1],      h11  = h[1][1];
+    INTFLOAT h02  = h[0][2],      h12  = h[1][2];
+    INTFLOAT h03  = h[0][3],      h13  = h[1][3];
+    INTFLOAT hs00 = h_step[0][0], hs10 = h_step[1][0];
+    INTFLOAT hs01 = h_step[0][1], hs11 = h_step[1][1];
+    INTFLOAT hs02 = h_step[0][2], hs12 = h_step[1][2];
+    INTFLOAT hs03 = h_step[0][3], hs13 = h_step[1][3];
+    int n;
+
+    for (n = 0; n < len; n++) {
+        //l is s, r is d
+        INTFLOAT l_re = l[n][0];
+        INTFLOAT l_im = l[n][1];
+        INTFLOAT r_re = r[n][0];
+        INTFLOAT r_im = r[n][1];
+        h00 += hs00;
+        h01 += hs01;
+        h02 += hs02;
+        h03 += hs03;
+        h10 += hs10;
+        h11 += hs11;
+        h12 += hs12;
+        h13 += hs13;
+
+        l[n][0] = AAC_MSUB30_V8(h00, l_re, h02, r_re, h10, l_im, h12, r_im);
+        l[n][1] = AAC_MADD30_V8(h00, l_im, h02, r_im, h10, l_re, h12, r_re);
+        r[n][0] = AAC_MSUB30_V8(h01, l_re, h03, r_re, h11, l_im, h13, r_im);
+        r[n][1] = AAC_MADD30_V8(h01, l_im, h03, r_im, h11, l_re, h13, r_re);
+    }
+}
+
+av_cold void AAC_RENAME(ff_psdsp_init)(PSDSPContext *s)
+{
+    s->add_squares            = ps_add_squares_c;
+    s->mul_pair_single        = ps_mul_pair_single_c;
+    s->hybrid_analysis        = ps_hybrid_analysis_c;
+    s->hybrid_analysis_ileave = ps_hybrid_analysis_ileave_c;
+    s->hybrid_synthesis_deint = ps_hybrid_synthesis_deint_c;
+    s->decorrelate            = ps_decorrelate_c;
+    s->stereo_interpolate[0]  = ps_stereo_interpolate_c;
+    s->stereo_interpolate[1]  = ps_stereo_interpolate_ipdopd_c;
+
+#if !USE_FIXED
+    if (ARCH_ARM)
+        ff_psdsp_init_arm(s);
+    if (ARCH_MIPS)
+        ff_psdsp_init_mips(s);
+    if (ARCH_X86)
+        ff_psdsp_init_x86(s);
+#endif /* !USE_FIXED */
+}
diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c
index 3a661594..8643fe70 100644
--- a/libavcodec/aacpsy.c
+++ b/libavcodec/aacpsy.c
@@ -25,6 +25,7 @@
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/internal.h"
 #include "libavutil/libm.h"
 
 #include "avcodec.h"
@@ -80,6 +81,8 @@
 #define PSY_3GPP_AH_THR_LONG    0.5f
 #define PSY_3GPP_AH_THR_SHORT   0.63f
 
+#define PSY_PE_FORGET_SLOPE  511
+
 enum {
     PSY_3GPP_AH_NONE,
     PSY_3GPP_AH_INACTIVE,
@@ -87,6 +90,7 @@ enum {
 };
 
 #define PSY_3GPP_BITS_TO_PE(bits) ((bits) * 1.18f)
+#define PSY_3GPP_PE_TO_BITS(bits) ((bits) / 1.18f)
 
 /* LAME psy model constants */
 #define PSY_LAME_FIR_LEN 21         ///< LAME psy model FIR order
@@ -157,6 +161,7 @@ typedef struct AacPsyContext{
     } pe;
     AacPsyCoeffs psy_coef[2][64];
     AacPsyChannel *ch;
+    float global_quality; ///< normalized global quality taken from avctx
 }AacPsyContext;
 
 /**
@@ -262,7 +267,7 @@ static av_cold void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx)
     for (i = 0; i < avctx->channels; i++) {
         AacPsyChannel *pch = &ctx->ch[i];
 
-        if (avctx->flags & CODEC_FLAG_QSCALE)
+        if (avctx->flags & AV_CODEC_FLAG_QSCALE)
             pch->attack_threshold = psy_vbr_map[avctx->global_quality / FF_QP2LAMBDA].st_lrm;
         else
             pch->attack_threshold = lame_calc_attack_threshold(avctx->bit_rate / avctx->channels / 1000);
@@ -299,17 +304,24 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
     float bark;
     int i, j, g, start;
     float prev, minscale, minath, minsnr, pe_min;
-    const int chan_bitrate = ctx->avctx->bit_rate / ctx->avctx->channels;
-    const int bandwidth    = ctx->avctx->cutoff ? ctx->avctx->cutoff : AAC_CUTOFF(ctx->avctx);
+    int chan_bitrate = ctx->avctx->bit_rate / ((ctx->avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : ctx->avctx->channels);
+
+    const int bandwidth    = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
     const float num_bark   = calc_bark((float)bandwidth);
 
     ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
     if (!ctx->model_priv_data)
         return AVERROR(ENOMEM);
     pctx = (AacPsyContext*) ctx->model_priv_data;
+    pctx->global_quality = (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) * 0.01f;
+
+    if (ctx->avctx->flags & CODEC_FLAG_QSCALE) {
+        /* Use the target average bitrate to compute spread parameters */
+        chan_bitrate = (int)(chan_bitrate / 120.0 * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120));
+    }
 
     pctx->chan_bitrate = chan_bitrate;
-    pctx->frame_bits   = chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate;
+    pctx->frame_bits   = FFMIN(2560, chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate);
     pctx->pe.min       =  8.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
     pctx->pe.max       = 12.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f);
     ctx->bitres.size   = 6144 - pctx->frame_bits;
@@ -338,10 +350,10 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
         for (g = 0; g < ctx->num_bands[j] - 1; g++) {
             AacPsyCoeffs *coeff = &coeffs[g];
             float bark_width = coeffs[g+1].barks - coeffs->barks;
-            coeff->spread_low[0] = pow(10.0, -bark_width * PSY_3GPP_THR_SPREAD_LOW);
-            coeff->spread_hi [0] = pow(10.0, -bark_width * PSY_3GPP_THR_SPREAD_HI);
-            coeff->spread_low[1] = pow(10.0, -bark_width * en_spread_low);
-            coeff->spread_hi [1] = pow(10.0, -bark_width * en_spread_hi);
+            coeff->spread_low[0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_LOW);
+            coeff->spread_hi [0] = ff_exp10(-bark_width * PSY_3GPP_THR_SPREAD_HI);
+            coeff->spread_low[1] = ff_exp10(-bark_width * en_spread_low);
+            coeff->spread_hi [1] = ff_exp10(-bark_width * en_spread_hi);
             pe_min = bark_pe * bark_width;
             minsnr = exp2(pe_min / band_sizes[g]) - 1.5f;
             coeff->min_snr = av_clipf(1.0f / minsnr, PSY_SNR_25DB, PSY_SNR_1DB);
@@ -397,7 +409,7 @@ static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx,
                                                  int channel, int prev_type)
 {
     int i, j;
-    int br               = ctx->avctx->bit_rate / ctx->avctx->channels;
+    int br               = ((AacPsyContext*)ctx->model_priv_data)->chan_bitrate;
     int attack_ratio     = br <= 16000 ? 18 : 10;
     AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
     AacPsyChannel *pch  = &pctx->ch[channel];
@@ -486,7 +498,7 @@ static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size,
     const float bitspend_add   = short_window ? PSY_3GPP_SPEND_ADD_S   : PSY_3GPP_SPEND_ADD_L;
     const float clip_low       = short_window ? PSY_3GPP_CLIP_LO_S     : PSY_3GPP_CLIP_LO_L;
     const float clip_high      = short_window ? PSY_3GPP_CLIP_HI_S     : PSY_3GPP_CLIP_HI_L;
-    float clipped_pe, bit_save, bit_spend, bit_factor, fill_level;
+    float clipped_pe, bit_save, bit_spend, bit_factor, fill_level, forgetful_min_pe;
 
     ctx->fill_level += ctx->frame_bits - bits;
     ctx->fill_level  = av_clip(ctx->fill_level, 0, size);
@@ -503,11 +515,21 @@ static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size,
      * Hopefully below is correct.
      */
     bit_factor = 1.0f - bit_save + ((bit_spend - bit_save) / (ctx->pe.max - ctx->pe.min)) * (clipped_pe - ctx->pe.min);
-    /* NOTE: The reference encoder attempts to center pe max/min around the current pe. */
+    /* NOTE: The reference encoder attempts to center pe max/min around the current pe.
+     * Here we do that by slowly forgetting pe.min when pe stays in a range that makes
+     * it unlikely (ie: above the mean)
+     */
     ctx->pe.max = FFMAX(pe, ctx->pe.max);
-    ctx->pe.min = FFMIN(pe, ctx->pe.min);
+    forgetful_min_pe = ((ctx->pe.min * PSY_PE_FORGET_SLOPE)
+        + FFMAX(ctx->pe.min, pe * (pe / ctx->pe.max))) / (PSY_PE_FORGET_SLOPE + 1);
+    ctx->pe.min = FFMIN(pe, forgetful_min_pe);
 
-    return FFMIN(ctx->frame_bits * bit_factor, ctx->frame_bits + size - bits);
+    /* NOTE: allocate a minimum of 1/8th average frame bits, to avoid
+     *   reservoir starvation from producing zero-bit frames
+     */
+    return FFMIN(
+        ctx->frame_bits * bit_factor,
+        FFMAX(ctx->frame_bits + size - bits, ctx->frame_bits / 8));
 }
 
 static float calc_pe_3gpp(AacPsyBand *band)
@@ -574,26 +596,30 @@ static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr,
 
 #ifndef calc_thr_3gpp
 static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch,
-                          const uint8_t *band_sizes, const float *coefs)
+                          const uint8_t *band_sizes, const float *coefs, const int cutoff)
 {
     int i, w, g;
-    int start = 0;
+    int start = 0, wstart = 0;
     for (w = 0; w < wi->num_windows*16; w += 16) {
+        wstart = 0;
         for (g = 0; g < num_bands; g++) {
             AacPsyBand *band = &pch->band[w+g];
 
             float form_factor = 0.0f;
             float Temp;
             band->energy = 0.0f;
-            for (i = 0; i < band_sizes[g]; i++) {
-                band->energy += coefs[start+i] * coefs[start+i];
-                form_factor  += sqrtf(fabs(coefs[start+i]));
+            if (wstart < cutoff) {
+                for (i = 0; i < band_sizes[g]; i++) {
+                    band->energy += coefs[start+i] * coefs[start+i];
+                    form_factor  += sqrtf(fabs(coefs[start+i]));
+                }
             }
             Temp = band->energy > 0 ? sqrtf((float)band_sizes[g] / band->energy) : 0;
             band->thr      = band->energy * 0.001258925f;
             band->nz_lines = form_factor * sqrtf(Temp);
 
             start += band_sizes[g];
+            wstart += band_sizes[g];
         }
     }
 }
@@ -634,9 +660,11 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
     const uint8_t *band_sizes  = ctx->bands[wi->num_windows == 8];
     AacPsyCoeffs  *coeffs      = pctx->psy_coef[wi->num_windows == 8];
     const float avoid_hole_thr = wi->num_windows == 8 ? PSY_3GPP_AH_THR_SHORT : PSY_3GPP_AH_THR_LONG;
+    const int bandwidth        = ctx->cutoff ? ctx->cutoff : AAC_CUTOFF(ctx->avctx);
+    const int cutoff           = bandwidth * 2048 / wi->num_windows / ctx->avctx->sample_rate;
 
     //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation"
-    calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs);
+    calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs, cutoff);
 
     //modify thresholds and energies - spread, threshold in quiet, pre-echo control
     for (w = 0; w < wi->num_windows*16; w += 16) {
@@ -677,16 +705,36 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
 
     /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */
     ctx->ch[channel].entropy = pe;
-    desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
-    desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
-    /* NOTE: PE correction is kept simple. During initial testing it had very
-     *       little effect on the final bitrate. Probably a good idea to come
-     *       back and do more testing later.
-     */
-    if (ctx->bitres.bits > 0)
-        desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
-                               0.85f, 1.15f);
+    if (ctx->avctx->flags & CODEC_FLAG_QSCALE) {
+        /* (2.5 * 120) achieves almost transparent rate, and we want to give
+         * ample room downwards, so we make that equivalent to QSCALE=2.4
+         */
+        desired_pe = pe * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) / (2 * 2.5f * 120.0f);
+        desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
+        desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
+
+        /* PE slope smoothing */
+        if (ctx->bitres.bits > 0) {
+            desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe));
+            desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping
+        }
+
+        pctx->pe.max = FFMAX(pe, pctx->pe.max);
+        pctx->pe.min = FFMIN(pe, pctx->pe.min);
+    } else {
+        desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8);
+        desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits);
+
+        /* NOTE: PE correction is kept simple. During initial testing it had very
+         *       little effect on the final bitrate. Probably a good idea to come
+         *       back and do more testing later.
+         */
+        if (ctx->bitres.bits > 0)
+            desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits),
+                                   0.85f, 1.15f);
+    }
     pctx->pe.previous = PSY_3GPP_BITS_TO_PE(desired_bits);
+    ctx->bitres.alloc = desired_bits;
 
     if (desired_pe < pe) {
         /* 5.6.1.3.4 "First Estimation of the reduction value" */
@@ -787,6 +835,8 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
 
             psy_band->threshold = band->thr;
             psy_band->energy    = band->energy;
+            psy_band->spread    = band->active_lines * 2.0f / band_sizes[g];
+            psy_band->bits      = PSY_3GPP_PE_TO_BITS(band->pe);
         }
     }
 
@@ -836,6 +886,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
     int grouping     = 0;
     int uselongblock = 1;
     int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
+    float clippings[AAC_NUM_BLOCKS_SHORT];
     int i;
     FFPsyWindowInfo wi = { { 0 } };
 
@@ -925,14 +976,35 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
 
     lame_apply_block_type(pch, &wi, uselongblock);
 
+    /* Calculate input sample maximums and evaluate clipping risk */
+    if (audio) {
+        for (i = 0; i < AAC_NUM_BLOCKS_SHORT; i++) {
+            const float *wbuf = audio + i * AAC_BLOCK_SIZE_SHORT;
+            float max = 0;
+            int j;
+            for (j = 0; j < AAC_BLOCK_SIZE_SHORT; j++)
+                max = FFMAX(max, fabsf(wbuf[j]));
+            clippings[i] = max;
+        }
+    } else {
+        for (i = 0; i < 8; i++)
+            clippings[i] = 0;
+    }
+
     wi.window_type[1] = prev_type;
     if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
+        float clipping = 0.0f;
+
         wi.num_windows  = 1;
         wi.grouping[0]  = 1;
         if (wi.window_type[0] == LONG_START_SEQUENCE)
             wi.window_shape = 0;
         else
             wi.window_shape = 1;
+
+        for (i = 0; i < 8; i++)
+            clipping = FFMAX(clipping, clippings[i]);
+        wi.clipping[0] = clipping;
     } else {
         int lastgrp = 0;
 
@@ -943,6 +1015,14 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
                 lastgrp = i;
             wi.grouping[lastgrp]++;
         }
+
+        for (i = 0; i < 8; i += wi.grouping[i]) {
+            int w;
+            float clipping = 0.0f;
+            for (w = 0; w < wi.grouping[i] && !clipping; w++)
+                clipping = FFMAX(clipping, clippings[i+w]);
+            wi.clipping[i] = clipping;
+        }
     }
 
     /* Determine grouping, based on the location of the first attack, and save for
diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c
index a39b78de..15956e3f 100644
--- a/libavcodec/aacsbr.c
+++ b/libavcodec/aacsbr.c
@@ -25,6 +25,7 @@
  * AAC Spectral Band Replication decoding functions
  * @author Robert Swain ( rob opendot cl )
  */
+#define USE_FIXED 0
 
 #include "aac.h"
 #include "sbr.h"
@@ -32,6 +33,7 @@
 #include "aacsbrdata.h"
 #include "aacsbr_tablegen.h"
 #include "fft.h"
+#include "internal.h"
 #include "aacps.h"
 #include "sbrdsp.h"
 #include "libavutil/internal.h"
@@ -42,252 +44,13 @@
 #include <float.h>
 #include <math.h>
 
-#define ENVELOPE_ADJUSTMENT_OFFSET 2
-#define NOISE_FLOOR_OFFSET 6.0f
-
 #if ARCH_MIPS
 #include "mips/aacsbr_mips.h"
 #endif /* ARCH_MIPS */
 
-/**
- * SBR VLC tables
- */
-enum {
-    T_HUFFMAN_ENV_1_5DB,
-    F_HUFFMAN_ENV_1_5DB,
-    T_HUFFMAN_ENV_BAL_1_5DB,
-    F_HUFFMAN_ENV_BAL_1_5DB,
-    T_HUFFMAN_ENV_3_0DB,
-    F_HUFFMAN_ENV_3_0DB,
-    T_HUFFMAN_ENV_BAL_3_0DB,
-    F_HUFFMAN_ENV_BAL_3_0DB,
-    T_HUFFMAN_NOISE_3_0DB,
-    T_HUFFMAN_NOISE_BAL_3_0DB,
-};
-
-/**
- * bs_frame_class - frame class of current SBR frame (14496-3 sp04 p98)
- */
-enum {
-    FIXFIX,
-    FIXVAR,
-    VARFIX,
-    VARVAR,
-};
-
-enum {
-    EXTENSION_ID_PS = 2,
-};
-
 static VLC vlc_sbr[10];
-static const int8_t vlc_sbr_lav[10] =
-    { 60, 60, 24, 24, 31, 31, 12, 12, 31, 12 };
-
-#define SBR_INIT_VLC_STATIC(num, size) \
-    INIT_VLC_STATIC(&vlc_sbr[num], 9, sbr_tmp[num].table_size / sbr_tmp[num].elem_size,     \
-                    sbr_tmp[num].sbr_bits ,                      1,                      1, \
-                    sbr_tmp[num].sbr_codes, sbr_tmp[num].elem_size, sbr_tmp[num].elem_size, \
-                    size)
-
-#define SBR_VLC_ROW(name) \
-    { name ## _codes, name ## _bits, sizeof(name ## _codes), sizeof(name ## _codes[0]) }
-
 static void aacsbr_func_ptr_init(AACSBRContext *c);
 
-av_cold void ff_aac_sbr_init(void)
-{
-    static const struct {
-        const void *sbr_codes, *sbr_bits;
-        const unsigned int table_size, elem_size;
-    } sbr_tmp[] = {
-        SBR_VLC_ROW(t_huffman_env_1_5dB),
-        SBR_VLC_ROW(f_huffman_env_1_5dB),
-        SBR_VLC_ROW(t_huffman_env_bal_1_5dB),
-        SBR_VLC_ROW(f_huffman_env_bal_1_5dB),
-        SBR_VLC_ROW(t_huffman_env_3_0dB),
-        SBR_VLC_ROW(f_huffman_env_3_0dB),
-        SBR_VLC_ROW(t_huffman_env_bal_3_0dB),
-        SBR_VLC_ROW(f_huffman_env_bal_3_0dB),
-        SBR_VLC_ROW(t_huffman_noise_3_0dB),
-        SBR_VLC_ROW(t_huffman_noise_bal_3_0dB),
-    };
-
-    // SBR VLC table initialization
-    SBR_INIT_VLC_STATIC(0, 1098);
-    SBR_INIT_VLC_STATIC(1, 1092);
-    SBR_INIT_VLC_STATIC(2, 768);
-    SBR_INIT_VLC_STATIC(3, 1026);
-    SBR_INIT_VLC_STATIC(4, 1058);
-    SBR_INIT_VLC_STATIC(5, 1052);
-    SBR_INIT_VLC_STATIC(6, 544);
-    SBR_INIT_VLC_STATIC(7, 544);
-    SBR_INIT_VLC_STATIC(8, 592);
-    SBR_INIT_VLC_STATIC(9, 512);
-
-    aacsbr_tableinit();
-
-    ff_ps_init();
-}
-
-/** Places SBR in pure upsampling mode. */
-static void sbr_turnoff(SpectralBandReplication *sbr) {
-    sbr->start = 0;
-    // Init defults used in pure upsampling mode
-    sbr->kx[1] = 32; //Typo in spec, kx' inits to 32
-    sbr->m[1] = 0;
-    // Reset values for first SBR header
-    sbr->data[0].e_a[1] = sbr->data[1].e_a[1] = -1;
-    memset(&sbr->spectrum_params, -1, sizeof(SpectrumParameters));
-}
-
-av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr)
-{
-    if(sbr->mdct.mdct_bits)
-        return;
-    sbr->kx[0] = sbr->kx[1];
-    sbr_turnoff(sbr);
-    sbr->data[0].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
-    sbr->data[1].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
-    /* SBR requires samples to be scaled to +/-32768.0 to work correctly.
-     * mdct scale factors are adjusted to scale up from +/-1.0 at analysis
-     * and scale back down at synthesis. */
-    ff_mdct_init(&sbr->mdct,     7, 1, 1.0 / (64 * 32768.0));
-    ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0 * 32768.0);
-    ff_ps_ctx_init(&sbr->ps);
-    ff_sbrdsp_init(&sbr->dsp);
-    aacsbr_func_ptr_init(&sbr->c);
-}
-
-av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr)
-{
-    ff_mdct_end(&sbr->mdct);
-    ff_mdct_end(&sbr->mdct_ana);
-}
-
-static int qsort_comparison_function_int16(const void *a, const void *b)
-{
-    return *(const int16_t *)a - *(const int16_t *)b;
-}
-
-static inline int in_table_int16(const int16_t *table, int last_el, int16_t needle)
-{
-    int i;
-    for (i = 0; i <= last_el; i++)
-        if (table[i] == needle)
-            return 1;
-    return 0;
-}
-
-/// Limiter Frequency Band Table (14496-3 sp04 p198)
-static void sbr_make_f_tablelim(SpectralBandReplication *sbr)
-{
-    int k;
-    if (sbr->bs_limiter_bands > 0) {
-        static const float bands_warped[3] = { 1.32715174233856803909f,   //2^(0.49/1.2)
-                                               1.18509277094158210129f,   //2^(0.49/2)
-                                               1.11987160404675912501f }; //2^(0.49/3)
-        const float lim_bands_per_octave_warped = bands_warped[sbr->bs_limiter_bands - 1];
-        int16_t patch_borders[7];
-        uint16_t *in = sbr->f_tablelim + 1, *out = sbr->f_tablelim;
-
-        patch_borders[0] = sbr->kx[1];
-        for (k = 1; k <= sbr->num_patches; k++)
-            patch_borders[k] = patch_borders[k-1] + sbr->patch_num_subbands[k-1];
-
-        memcpy(sbr->f_tablelim, sbr->f_tablelow,
-               (sbr->n[0] + 1) * sizeof(sbr->f_tablelow[0]));
-        if (sbr->num_patches > 1)
-            memcpy(sbr->f_tablelim + sbr->n[0] + 1, patch_borders + 1,
-                   (sbr->num_patches - 1) * sizeof(patch_borders[0]));
-
-        qsort(sbr->f_tablelim, sbr->num_patches + sbr->n[0],
-              sizeof(sbr->f_tablelim[0]),
-              qsort_comparison_function_int16);
-
-        sbr->n_lim = sbr->n[0] + sbr->num_patches - 1;
-        while (out < sbr->f_tablelim + sbr->n_lim) {
-            if (*in >= *out * lim_bands_per_octave_warped) {
-                *++out = *in++;
-            } else if (*in == *out ||
-                !in_table_int16(patch_borders, sbr->num_patches, *in)) {
-                in++;
-                sbr->n_lim--;
-            } else if (!in_table_int16(patch_borders, sbr->num_patches, *out)) {
-                *out = *in++;
-                sbr->n_lim--;
-            } else {
-                *++out = *in++;
-            }
-        }
-    } else {
-        sbr->f_tablelim[0] = sbr->f_tablelow[0];
-        sbr->f_tablelim[1] = sbr->f_tablelow[sbr->n[0]];
-        sbr->n_lim = 1;
-    }
-}
-
-static unsigned int read_sbr_header(SpectralBandReplication *sbr, GetBitContext *gb)
-{
-    unsigned int cnt = get_bits_count(gb);
-    uint8_t bs_header_extra_1;
-    uint8_t bs_header_extra_2;
-    int old_bs_limiter_bands = sbr->bs_limiter_bands;
-    SpectrumParameters old_spectrum_params;
-
-    sbr->start = 1;
-
-    // Save last spectrum parameters variables to compare to new ones
-    memcpy(&old_spectrum_params, &sbr->spectrum_params, sizeof(SpectrumParameters));
-
-    sbr->bs_amp_res_header              = get_bits1(gb);
-    sbr->spectrum_params.bs_start_freq  = get_bits(gb, 4);
-    sbr->spectrum_params.bs_stop_freq   = get_bits(gb, 4);
-    sbr->spectrum_params.bs_xover_band  = get_bits(gb, 3);
-                                          skip_bits(gb, 2); // bs_reserved
-
-    bs_header_extra_1 = get_bits1(gb);
-    bs_header_extra_2 = get_bits1(gb);
-
-    if (bs_header_extra_1) {
-        sbr->spectrum_params.bs_freq_scale  = get_bits(gb, 2);
-        sbr->spectrum_params.bs_alter_scale = get_bits1(gb);
-        sbr->spectrum_params.bs_noise_bands = get_bits(gb, 2);
-    } else {
-        sbr->spectrum_params.bs_freq_scale  = 2;
-        sbr->spectrum_params.bs_alter_scale = 1;
-        sbr->spectrum_params.bs_noise_bands = 2;
-    }
-
-    // Check if spectrum parameters changed
-    if (memcmp(&old_spectrum_params, &sbr->spectrum_params, sizeof(SpectrumParameters)))
-        sbr->reset = 1;
-
-    if (bs_header_extra_2) {
-        sbr->bs_limiter_bands  = get_bits(gb, 2);
-        sbr->bs_limiter_gains  = get_bits(gb, 2);
-        sbr->bs_interpol_freq  = get_bits1(gb);
-        sbr->bs_smoothing_mode = get_bits1(gb);
-    } else {
-        sbr->bs_limiter_bands  = 2;
-        sbr->bs_limiter_gains  = 2;
-        sbr->bs_interpol_freq  = 1;
-        sbr->bs_smoothing_mode = 1;
-    }
-
-    if (sbr->bs_limiter_bands != old_bs_limiter_bands && !sbr->reset)
-        sbr_make_f_tablelim(sbr);
-
-    return get_bits_count(gb) - cnt;
-}
-
-static int array_min_int16(const int16_t *array, int nel)
-{
-    int i, min = array[0];
-    for (i = 1; i < nel; i++)
-        min = FFMIN(array[i], min);
-    return min;
-}
-
 static void make_bands(int16_t* bands, int start, int stop, int num_bands)
 {
     int k, previous, present;
@@ -306,837 +69,27 @@ static void make_bands(int16_t* bands, int start, int stop, int num_bands)
     bands[num_bands-1] = stop - previous;
 }
 
-static int check_n_master(AVCodecContext *avctx, int n_master, int bs_xover_band)
-{
-    // Requirements (14496-3 sp04 p205)
-    if (n_master <= 0) {
-        av_log(avctx, AV_LOG_ERROR, "Invalid n_master: %d\n", n_master);
-        return -1;
-    }
-    if (bs_xover_band >= n_master) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Invalid bitstream, crossover band index beyond array bounds: %d\n",
-               bs_xover_band);
-        return -1;
-    }
-    return 0;
-}
-
-/// Master Frequency Band Table (14496-3 sp04 p194)
-static int sbr_make_f_master(AACContext *ac, SpectralBandReplication *sbr,
-                             SpectrumParameters *spectrum)
-{
-    unsigned int temp, max_qmf_subbands = 0;
-    unsigned int start_min, stop_min;
-    int k;
-    const int8_t *sbr_offset_ptr;
-    int16_t stop_dk[13];
-
-    if (sbr->sample_rate < 32000) {
-        temp = 3000;
-    } else if (sbr->sample_rate < 64000) {
-        temp = 4000;
-    } else
-        temp = 5000;
-
-    switch (sbr->sample_rate) {
-    case 16000:
-        sbr_offset_ptr = sbr_offset[0];
-        break;
-    case 22050:
-        sbr_offset_ptr = sbr_offset[1];
-        break;
-    case 24000:
-        sbr_offset_ptr = sbr_offset[2];
-        break;
-    case 32000:
-        sbr_offset_ptr = sbr_offset[3];
-        break;
-    case 44100: case 48000: case 64000:
-        sbr_offset_ptr = sbr_offset[4];
-        break;
-    case 88200: case 96000: case 128000: case 176400: case 192000:
-        sbr_offset_ptr = sbr_offset[5];
-        break;
-    default:
-        av_log(ac->avctx, AV_LOG_ERROR,
-               "Unsupported sample rate for SBR: %d\n", sbr->sample_rate);
-        return -1;
-    }
-
-    start_min = ((temp << 7) + (sbr->sample_rate >> 1)) / sbr->sample_rate;
-    stop_min  = ((temp << 8) + (sbr->sample_rate >> 1)) / sbr->sample_rate;
-
-    sbr->k[0] = start_min + sbr_offset_ptr[spectrum->bs_start_freq];
-
-    if (spectrum->bs_stop_freq < 14) {
-        sbr->k[2] = stop_min;
-        make_bands(stop_dk, stop_min, 64, 13);
-        qsort(stop_dk, 13, sizeof(stop_dk[0]), qsort_comparison_function_int16);
-        for (k = 0; k < spectrum->bs_stop_freq; k++)
-            sbr->k[2] += stop_dk[k];
-    } else if (spectrum->bs_stop_freq == 14) {
-        sbr->k[2] = 2*sbr->k[0];
-    } else if (spectrum->bs_stop_freq == 15) {
-        sbr->k[2] = 3*sbr->k[0];
-    } else {
-        av_log(ac->avctx, AV_LOG_ERROR,
-               "Invalid bs_stop_freq: %d\n", spectrum->bs_stop_freq);
-        return -1;
-    }
-    sbr->k[2] = FFMIN(64, sbr->k[2]);
-
-    // Requirements (14496-3 sp04 p205)
-    if (sbr->sample_rate <= 32000) {
-        max_qmf_subbands = 48;
-    } else if (sbr->sample_rate == 44100) {
-        max_qmf_subbands = 35;
-    } else if (sbr->sample_rate >= 48000)
-        max_qmf_subbands = 32;
-    else
-        av_assert0(0);
-
-    if (sbr->k[2] - sbr->k[0] > max_qmf_subbands) {
-        av_log(ac->avctx, AV_LOG_ERROR,
-               "Invalid bitstream, too many QMF subbands: %d\n", sbr->k[2] - sbr->k[0]);
-        return -1;
-    }
-
-    if (!spectrum->bs_freq_scale) {
-        int dk, k2diff;
-
-        dk = spectrum->bs_alter_scale + 1;
-        sbr->n_master = ((sbr->k[2] - sbr->k[0] + (dk&2)) >> dk) << 1;
-        if (check_n_master(ac->avctx, sbr->n_master, sbr->spectrum_params.bs_xover_band))
-            return -1;
-
-        for (k = 1; k <= sbr->n_master; k++)
-            sbr->f_master[k] = dk;
-
-        k2diff = sbr->k[2] - sbr->k[0] - sbr->n_master * dk;
-        if (k2diff < 0) {
-            sbr->f_master[1]--;
-            sbr->f_master[2]-= (k2diff < -1);
-        } else if (k2diff) {
-            sbr->f_master[sbr->n_master]++;
-        }
-
-        sbr->f_master[0] = sbr->k[0];
-        for (k = 1; k <= sbr->n_master; k++)
-            sbr->f_master[k] += sbr->f_master[k - 1];
-
-    } else {
-        int half_bands = 7 - spectrum->bs_freq_scale;      // bs_freq_scale  = {1,2,3}
-        int two_regions, num_bands_0;
-        int vdk0_max, vdk1_min;
-        int16_t vk0[49];
-
-        if (49 * sbr->k[2] > 110 * sbr->k[0]) {
-            two_regions = 1;
-            sbr->k[1] = 2 * sbr->k[0];
-        } else {
-            two_regions = 0;
-            sbr->k[1] = sbr->k[2];
-        }
-
-        num_bands_0 = lrintf(half_bands * log2f(sbr->k[1] / (float)sbr->k[0])) * 2;
-
-        if (num_bands_0 <= 0) { // Requirements (14496-3 sp04 p205)
-            av_log(ac->avctx, AV_LOG_ERROR, "Invalid num_bands_0: %d\n", num_bands_0);
-            return -1;
-        }
-
-        vk0[0] = 0;
-
-        make_bands(vk0+1, sbr->k[0], sbr->k[1], num_bands_0);
-
-        qsort(vk0 + 1, num_bands_0, sizeof(vk0[1]), qsort_comparison_function_int16);
-        vdk0_max = vk0[num_bands_0];
-
-        vk0[0] = sbr->k[0];
-        for (k = 1; k <= num_bands_0; k++) {
-            if (vk0[k] <= 0) { // Requirements (14496-3 sp04 p205)
-                av_log(ac->avctx, AV_LOG_ERROR, "Invalid vDk0[%d]: %d\n", k, vk0[k]);
-                return -1;
-            }
-            vk0[k] += vk0[k-1];
-        }
-
-        if (two_regions) {
-            int16_t vk1[49];
-            float invwarp = spectrum->bs_alter_scale ? 0.76923076923076923077f
-                                                     : 1.0f; // bs_alter_scale = {0,1}
-            int num_bands_1 = lrintf(half_bands * invwarp *
-                                     log2f(sbr->k[2] / (float)sbr->k[1])) * 2;
-
-            make_bands(vk1+1, sbr->k[1], sbr->k[2], num_bands_1);
-
-            vdk1_min = array_min_int16(vk1 + 1, num_bands_1);
-
-            if (vdk1_min < vdk0_max) {
-                int change;
-                qsort(vk1 + 1, num_bands_1, sizeof(vk1[1]), qsort_comparison_function_int16);
-                change = FFMIN(vdk0_max - vk1[1], (vk1[num_bands_1] - vk1[1]) >> 1);
-                vk1[1]           += change;
-                vk1[num_bands_1] -= change;
-            }
-
-            qsort(vk1 + 1, num_bands_1, sizeof(vk1[1]), qsort_comparison_function_int16);
-
-            vk1[0] = sbr->k[1];
-            for (k = 1; k <= num_bands_1; k++) {
-                if (vk1[k] <= 0) { // Requirements (14496-3 sp04 p205)
-                    av_log(ac->avctx, AV_LOG_ERROR, "Invalid vDk1[%d]: %d\n", k, vk1[k]);
-                    return -1;
-                }
-                vk1[k] += vk1[k-1];
-            }
-
-            sbr->n_master = num_bands_0 + num_bands_1;
-            if (check_n_master(ac->avctx, sbr->n_master, sbr->spectrum_params.bs_xover_band))
-                return -1;
-            memcpy(&sbr->f_master[0],               vk0,
-                   (num_bands_0 + 1) * sizeof(sbr->f_master[0]));
-            memcpy(&sbr->f_master[num_bands_0 + 1], vk1 + 1,
-                    num_bands_1      * sizeof(sbr->f_master[0]));
-
-        } else {
-            sbr->n_master = num_bands_0;
-            if (check_n_master(ac->avctx, sbr->n_master, sbr->spectrum_params.bs_xover_band))
-                return -1;
-            memcpy(sbr->f_master, vk0, (num_bands_0 + 1) * sizeof(sbr->f_master[0]));
-        }
-    }
-
-    return 0;
-}
-
-/// High Frequency Generation - Patch Construction (14496-3 sp04 p216 fig. 4.46)
-static int sbr_hf_calc_npatches(AACContext *ac, SpectralBandReplication *sbr)
-{
-    int i, k, last_k = -1, last_msb = -1, sb = 0;
-    int msb = sbr->k[0];
-    int usb = sbr->kx[1];
-    int goal_sb = ((1000 << 11) + (sbr->sample_rate >> 1)) / sbr->sample_rate;
-
-    sbr->num_patches = 0;
-
-    if (goal_sb < sbr->kx[1] + sbr->m[1]) {
-        for (k = 0; sbr->f_master[k] < goal_sb; k++) ;
-    } else
-        k = sbr->n_master;
-
-    do {
-        int odd = 0;
-        if (k == last_k && msb == last_msb) {
-            av_log(ac->avctx, AV_LOG_ERROR, "patch construction failed\n");
-            return AVERROR_INVALIDDATA;
-        }
-        last_k = k;
-        last_msb = msb;
-        for (i = k; i == k || sb > (sbr->k[0] - 1 + msb - odd); i--) {
-            sb = sbr->f_master[i];
-            odd = (sb + sbr->k[0]) & 1;
-        }
-
-        // Requirements (14496-3 sp04 p205) sets the maximum number of patches to 5.
-        // After this check the final number of patches can still be six which is
-        // illegal however the Coding Technologies decoder check stream has a final
-        // count of 6 patches
-        if (sbr->num_patches > 5) {
-            av_log(ac->avctx, AV_LOG_ERROR, "Too many patches: %d\n", sbr->num_patches);
-            return -1;
-        }
-
-        sbr->patch_num_subbands[sbr->num_patches]  = FFMAX(sb - usb, 0);
-        sbr->patch_start_subband[sbr->num_patches] = sbr->k[0] - odd - sbr->patch_num_subbands[sbr->num_patches];
-
-        if (sbr->patch_num_subbands[sbr->num_patches] > 0) {
-            usb = sb;
-            msb = sb;
-            sbr->num_patches++;
-        } else
-            msb = sbr->kx[1];
-
-        if (sbr->f_master[k] - sb < 3)
-            k = sbr->n_master;
-    } while (sb != sbr->kx[1] + sbr->m[1]);
-
-    if (sbr->num_patches > 1 &&
-        sbr->patch_num_subbands[sbr->num_patches - 1] < 3)
-        sbr->num_patches--;
-
-    return 0;
-}
-
-/// Derived Frequency Band Tables (14496-3 sp04 p197)
-static int sbr_make_f_derived(AACContext *ac, SpectralBandReplication *sbr)
-{
-    int k, temp;
-
-    sbr->n[1] = sbr->n_master - sbr->spectrum_params.bs_xover_band;
-    sbr->n[0] = (sbr->n[1] + 1) >> 1;
-
-    memcpy(sbr->f_tablehigh, &sbr->f_master[sbr->spectrum_params.bs_xover_band],
-           (sbr->n[1] + 1) * sizeof(sbr->f_master[0]));
-    sbr->m[1] = sbr->f_tablehigh[sbr->n[1]] - sbr->f_tablehigh[0];
-    sbr->kx[1] = sbr->f_tablehigh[0];
-
-    // Requirements (14496-3 sp04 p205)
-    if (sbr->kx[1] + sbr->m[1] > 64) {
-        av_log(ac->avctx, AV_LOG_ERROR,
-               "Stop frequency border too high: %d\n", sbr->kx[1] + sbr->m[1]);
-        return -1;
-    }
-    if (sbr->kx[1] > 32) {
-        av_log(ac->avctx, AV_LOG_ERROR, "Start frequency border too high: %d\n", sbr->kx[1]);
-        return -1;
-    }
-
-    sbr->f_tablelow[0] = sbr->f_tablehigh[0];
-    temp = sbr->n[1] & 1;
-    for (k = 1; k <= sbr->n[0]; k++)
-        sbr->f_tablelow[k] = sbr->f_tablehigh[2 * k - temp];
-
-    sbr->n_q = FFMAX(1, lrintf(sbr->spectrum_params.bs_noise_bands *
-                               log2f(sbr->k[2] / (float)sbr->kx[1]))); // 0 <= bs_noise_bands <= 3
-    if (sbr->n_q > 5) {
-        av_log(ac->avctx, AV_LOG_ERROR, "Too many noise floor scale factors: %d\n", sbr->n_q);
-        return -1;
-    }
-
-    sbr->f_tablenoise[0] = sbr->f_tablelow[0];
-    temp = 0;
-    for (k = 1; k <= sbr->n_q; k++) {
-        temp += (sbr->n[0] - temp) / (sbr->n_q + 1 - k);
-        sbr->f_tablenoise[k] = sbr->f_tablelow[temp];
-    }
-
-    if (sbr_hf_calc_npatches(ac, sbr) < 0)
-        return -1;
-
-    sbr_make_f_tablelim(sbr);
-
-    sbr->data[0].f_indexnoise = 0;
-    sbr->data[1].f_indexnoise = 0;
-
-    return 0;
-}
-
-static av_always_inline void get_bits1_vector(GetBitContext *gb, uint8_t *vec,
-                                              int elements)
-{
-    int i;
-    for (i = 0; i < elements; i++) {
-        vec[i] = get_bits1(gb);
-    }
-}
-
-/** ceil(log2(index+1)) */
-static const int8_t ceil_log2[] = {
-    0, 1, 2, 2, 3, 3,
-};
-
-static int read_sbr_grid(AACContext *ac, SpectralBandReplication *sbr,
-                         GetBitContext *gb, SBRData *ch_data)
-{
-    int i;
-    int bs_pointer = 0;
-    // frameLengthFlag ? 15 : 16; 960 sample length frames unsupported; this value is numTimeSlots
-    int abs_bord_trail = 16;
-    int num_rel_lead, num_rel_trail;
-    unsigned bs_num_env_old = ch_data->bs_num_env;
-
-    ch_data->bs_freq_res[0] = ch_data->bs_freq_res[ch_data->bs_num_env];
-    ch_data->bs_amp_res = sbr->bs_amp_res_header;
-    ch_data->t_env_num_env_old = ch_data->t_env[bs_num_env_old];
-
-    switch (ch_data->bs_frame_class = get_bits(gb, 2)) {
-    case FIXFIX:
-        ch_data->bs_num_env                 = 1 << get_bits(gb, 2);
-        num_rel_lead                        = ch_data->bs_num_env - 1;
-        if (ch_data->bs_num_env == 1)
-            ch_data->bs_amp_res = 0;
-
-        if (ch_data->bs_num_env > 4) {
-            av_log(ac->avctx, AV_LOG_ERROR,
-                   "Invalid bitstream, too many SBR envelopes in FIXFIX type SBR frame: %d\n",
-                   ch_data->bs_num_env);
-            return -1;
-        }
-
-        ch_data->t_env[0]                   = 0;
-        ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
-
-        abs_bord_trail = (abs_bord_trail + (ch_data->bs_num_env >> 1)) /
-                   ch_data->bs_num_env;
-        for (i = 0; i < num_rel_lead; i++)
-            ch_data->t_env[i + 1] = ch_data->t_env[i] + abs_bord_trail;
-
-        ch_data->bs_freq_res[1] = get_bits1(gb);
-        for (i = 1; i < ch_data->bs_num_env; i++)
-            ch_data->bs_freq_res[i + 1] = ch_data->bs_freq_res[1];
-        break;
-    case FIXVAR:
-        abs_bord_trail                     += get_bits(gb, 2);
-        num_rel_trail                       = get_bits(gb, 2);
-        ch_data->bs_num_env                 = num_rel_trail + 1;
-        ch_data->t_env[0]                   = 0;
-        ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
-
-        for (i = 0; i < num_rel_trail; i++)
-            ch_data->t_env[ch_data->bs_num_env - 1 - i] =
-                ch_data->t_env[ch_data->bs_num_env - i] - 2 * get_bits(gb, 2) - 2;
-
-        bs_pointer = get_bits(gb, ceil_log2[ch_data->bs_num_env]);
-
-        for (i = 0; i < ch_data->bs_num_env; i++)
-            ch_data->bs_freq_res[ch_data->bs_num_env - i] = get_bits1(gb);
-        break;
-    case VARFIX:
-        ch_data->t_env[0]                   = get_bits(gb, 2);
-        num_rel_lead                        = get_bits(gb, 2);
-        ch_data->bs_num_env                 = num_rel_lead + 1;
-        ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
-
-        for (i = 0; i < num_rel_lead; i++)
-            ch_data->t_env[i + 1] = ch_data->t_env[i] + 2 * get_bits(gb, 2) + 2;
-
-        bs_pointer = get_bits(gb, ceil_log2[ch_data->bs_num_env]);
-
-        get_bits1_vector(gb, ch_data->bs_freq_res + 1, ch_data->bs_num_env);
-        break;
-    case VARVAR:
-        ch_data->t_env[0]                   = get_bits(gb, 2);
-        abs_bord_trail                     += get_bits(gb, 2);
-        num_rel_lead                        = get_bits(gb, 2);
-        num_rel_trail                       = get_bits(gb, 2);
-        ch_data->bs_num_env                 = num_rel_lead + num_rel_trail + 1;
-
-        if (ch_data->bs_num_env > 5) {
-            av_log(ac->avctx, AV_LOG_ERROR,
-                   "Invalid bitstream, too many SBR envelopes in VARVAR type SBR frame: %d\n",
-                   ch_data->bs_num_env);
-            return -1;
-        }
-
-        ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
-
-        for (i = 0; i < num_rel_lead; i++)
-            ch_data->t_env[i + 1] = ch_data->t_env[i] + 2 * get_bits(gb, 2) + 2;
-        for (i = 0; i < num_rel_trail; i++)
-            ch_data->t_env[ch_data->bs_num_env - 1 - i] =
-                ch_data->t_env[ch_data->bs_num_env - i] - 2 * get_bits(gb, 2) - 2;
-
-        bs_pointer = get_bits(gb, ceil_log2[ch_data->bs_num_env]);
-
-        get_bits1_vector(gb, ch_data->bs_freq_res + 1, ch_data->bs_num_env);
-        break;
-    }
-
-    av_assert0(bs_pointer >= 0);
-    if (bs_pointer > ch_data->bs_num_env + 1) {
-        av_log(ac->avctx, AV_LOG_ERROR,
-               "Invalid bitstream, bs_pointer points to a middle noise border outside the time borders table: %d\n",
-               bs_pointer);
-        return -1;
-    }
-
-    for (i = 1; i <= ch_data->bs_num_env; i++) {
-        if (ch_data->t_env[i-1] > ch_data->t_env[i]) {
-            av_log(ac->avctx, AV_LOG_ERROR, "Non monotone time borders\n");
-            return -1;
-        }
-    }
-
-    ch_data->bs_num_noise = (ch_data->bs_num_env > 1) + 1;
-
-    ch_data->t_q[0]                     = ch_data->t_env[0];
-    ch_data->t_q[ch_data->bs_num_noise] = ch_data->t_env[ch_data->bs_num_env];
-    if (ch_data->bs_num_noise > 1) {
-        int idx;
-        if (ch_data->bs_frame_class == FIXFIX) {
-            idx = ch_data->bs_num_env >> 1;
-        } else if (ch_data->bs_frame_class & 1) { // FIXVAR or VARVAR
-            idx = ch_data->bs_num_env - FFMAX(bs_pointer - 1, 1);
-        } else { // VARFIX
-            if (!bs_pointer)
-                idx = 1;
-            else if (bs_pointer == 1)
-                idx = ch_data->bs_num_env - 1;
-            else // bs_pointer > 1
-                idx = bs_pointer - 1;
-        }
-        ch_data->t_q[1] = ch_data->t_env[idx];
-    }
-
-    ch_data->e_a[0] = -(ch_data->e_a[1] != bs_num_env_old); // l_APrev
-    ch_data->e_a[1] = -1;
-    if ((ch_data->bs_frame_class & 1) && bs_pointer) { // FIXVAR or VARVAR and bs_pointer != 0
-        ch_data->e_a[1] = ch_data->bs_num_env + 1 - bs_pointer;
-    } else if ((ch_data->bs_frame_class == 2) && (bs_pointer > 1)) // VARFIX and bs_pointer > 1
-        ch_data->e_a[1] = bs_pointer - 1;
-
-    return 0;
-}
-
-static void copy_sbr_grid(SBRData *dst, const SBRData *src) {
-    //These variables are saved from the previous frame rather than copied
-    dst->bs_freq_res[0]    = dst->bs_freq_res[dst->bs_num_env];
-    dst->t_env_num_env_old = dst->t_env[dst->bs_num_env];
-    dst->e_a[0]            = -(dst->e_a[1] != dst->bs_num_env);
-
-    //These variables are read from the bitstream and therefore copied
-    memcpy(dst->bs_freq_res+1, src->bs_freq_res+1, sizeof(dst->bs_freq_res)-sizeof(*dst->bs_freq_res));
-    memcpy(dst->t_env,         src->t_env,         sizeof(dst->t_env));
-    memcpy(dst->t_q,           src->t_q,           sizeof(dst->t_q));
-    dst->bs_num_env        = src->bs_num_env;
-    dst->bs_amp_res        = src->bs_amp_res;
-    dst->bs_num_noise      = src->bs_num_noise;
-    dst->bs_frame_class    = src->bs_frame_class;
-    dst->e_a[1]            = src->e_a[1];
-}
-
-/// Read how the envelope and noise floor data is delta coded
-static void read_sbr_dtdf(SpectralBandReplication *sbr, GetBitContext *gb,
-                          SBRData *ch_data)
-{
-    get_bits1_vector(gb, ch_data->bs_df_env,   ch_data->bs_num_env);
-    get_bits1_vector(gb, ch_data->bs_df_noise, ch_data->bs_num_noise);
-}
-
-/// Read inverse filtering data
-static void read_sbr_invf(SpectralBandReplication *sbr, GetBitContext *gb,
-                          SBRData *ch_data)
-{
-    int i;
-
-    memcpy(ch_data->bs_invf_mode[1], ch_data->bs_invf_mode[0], 5 * sizeof(uint8_t));
-    for (i = 0; i < sbr->n_q; i++)
-        ch_data->bs_invf_mode[0][i] = get_bits(gb, 2);
-}
-
-static void read_sbr_envelope(SpectralBandReplication *sbr, GetBitContext *gb,
-                              SBRData *ch_data, int ch)
-{
-    int bits;
-    int i, j, k;
-    VLC_TYPE (*t_huff)[2], (*f_huff)[2];
-    int t_lav, f_lav;
-    const int delta = (ch == 1 && sbr->bs_coupling == 1) + 1;
-    const int odd = sbr->n[1] & 1;
-
-    if (sbr->bs_coupling && ch) {
-        if (ch_data->bs_amp_res) {
-            bits   = 5;
-            t_huff = vlc_sbr[T_HUFFMAN_ENV_BAL_3_0DB].table;
-            t_lav  = vlc_sbr_lav[T_HUFFMAN_ENV_BAL_3_0DB];
-            f_huff = vlc_sbr[F_HUFFMAN_ENV_BAL_3_0DB].table;
-            f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_BAL_3_0DB];
-        } else {
-            bits   = 6;
-            t_huff = vlc_sbr[T_HUFFMAN_ENV_BAL_1_5DB].table;
-            t_lav  = vlc_sbr_lav[T_HUFFMAN_ENV_BAL_1_5DB];
-            f_huff = vlc_sbr[F_HUFFMAN_ENV_BAL_1_5DB].table;
-            f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_BAL_1_5DB];
-        }
-    } else {
-        if (ch_data->bs_amp_res) {
-            bits   = 6;
-            t_huff = vlc_sbr[T_HUFFMAN_ENV_3_0DB].table;
-            t_lav  = vlc_sbr_lav[T_HUFFMAN_ENV_3_0DB];
-            f_huff = vlc_sbr[F_HUFFMAN_ENV_3_0DB].table;
-            f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_3_0DB];
-        } else {
-            bits   = 7;
-            t_huff = vlc_sbr[T_HUFFMAN_ENV_1_5DB].table;
-            t_lav  = vlc_sbr_lav[T_HUFFMAN_ENV_1_5DB];
-            f_huff = vlc_sbr[F_HUFFMAN_ENV_1_5DB].table;
-            f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_1_5DB];
-        }
-    }
-
-    for (i = 0; i < ch_data->bs_num_env; i++) {
-        if (ch_data->bs_df_env[i]) {
-            // bs_freq_res[0] == bs_freq_res[bs_num_env] from prev frame
-            if (ch_data->bs_freq_res[i + 1] == ch_data->bs_freq_res[i]) {
-                for (j = 0; j < sbr->n[ch_data->bs_freq_res[i + 1]]; j++)
-                    ch_data->env_facs[i + 1][j] = ch_data->env_facs[i][j] + delta * (get_vlc2(gb, t_huff, 9, 3) - t_lav);
-            } else if (ch_data->bs_freq_res[i + 1]) {
-                for (j = 0; j < sbr->n[ch_data->bs_freq_res[i + 1]]; j++) {
-                    k = (j + odd) >> 1; // find k such that f_tablelow[k] <= f_tablehigh[j] < f_tablelow[k + 1]
-                    ch_data->env_facs[i + 1][j] = ch_data->env_facs[i][k] + delta * (get_vlc2(gb, t_huff, 9, 3) - t_lav);
-                }
-            } else {
-                for (j = 0; j < sbr->n[ch_data->bs_freq_res[i + 1]]; j++) {
-                    k = j ? 2*j - odd : 0; // find k such that f_tablehigh[k] == f_tablelow[j]
-                    ch_data->env_facs[i + 1][j] = ch_data->env_facs[i][k] + delta * (get_vlc2(gb, t_huff, 9, 3) - t_lav);
-                }
-            }
-        } else {
-            ch_data->env_facs[i + 1][0] = delta * get_bits(gb, bits); // bs_env_start_value_balance
-            for (j = 1; j < sbr->n[ch_data->bs_freq_res[i + 1]]; j++)
-                ch_data->env_facs[i + 1][j] = ch_data->env_facs[i + 1][j - 1] + delta * (get_vlc2(gb, f_huff, 9, 3) - f_lav);
-        }
-    }
-
-    //assign 0th elements of env_facs from last elements
-    memcpy(ch_data->env_facs[0], ch_data->env_facs[ch_data->bs_num_env],
-           sizeof(ch_data->env_facs[0]));
-}
-
-static void read_sbr_noise(SpectralBandReplication *sbr, GetBitContext *gb,
-                           SBRData *ch_data, int ch)
-{
-    int i, j;
-    VLC_TYPE (*t_huff)[2], (*f_huff)[2];
-    int t_lav, f_lav;
-    int delta = (ch == 1 && sbr->bs_coupling == 1) + 1;
-
-    if (sbr->bs_coupling && ch) {
-        t_huff = vlc_sbr[T_HUFFMAN_NOISE_BAL_3_0DB].table;
-        t_lav  = vlc_sbr_lav[T_HUFFMAN_NOISE_BAL_3_0DB];
-        f_huff = vlc_sbr[F_HUFFMAN_ENV_BAL_3_0DB].table;
-        f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_BAL_3_0DB];
-    } else {
-        t_huff = vlc_sbr[T_HUFFMAN_NOISE_3_0DB].table;
-        t_lav  = vlc_sbr_lav[T_HUFFMAN_NOISE_3_0DB];
-        f_huff = vlc_sbr[F_HUFFMAN_ENV_3_0DB].table;
-        f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_3_0DB];
-    }
-
-    for (i = 0; i < ch_data->bs_num_noise; i++) {
-        if (ch_data->bs_df_noise[i]) {
-            for (j = 0; j < sbr->n_q; j++)
-                ch_data->noise_facs[i + 1][j] = ch_data->noise_facs[i][j] + delta * (get_vlc2(gb, t_huff, 9, 2) - t_lav);
-        } else {
-            ch_data->noise_facs[i + 1][0] = delta * get_bits(gb, 5); // bs_noise_start_value_balance or bs_noise_start_value_level
-            for (j = 1; j < sbr->n_q; j++)
-                ch_data->noise_facs[i + 1][j] = ch_data->noise_facs[i + 1][j - 1] + delta * (get_vlc2(gb, f_huff, 9, 3) - f_lav);
-        }
-    }
-
-    //assign 0th elements of noise_facs from last elements
-    memcpy(ch_data->noise_facs[0], ch_data->noise_facs[ch_data->bs_num_noise],
-           sizeof(ch_data->noise_facs[0]));
-}
-
-static void read_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
-                               GetBitContext *gb,
-                               int bs_extension_id, int *num_bits_left)
-{
-    switch (bs_extension_id) {
-    case EXTENSION_ID_PS:
-        if (!ac->oc[1].m4ac.ps) {
-            av_log(ac->avctx, AV_LOG_ERROR, "Parametric Stereo signaled to be not-present but was found in the bitstream.\n");
-            skip_bits_long(gb, *num_bits_left); // bs_fill_bits
-            *num_bits_left = 0;
-        } else {
-#if 1
-            *num_bits_left -= ff_ps_read_data(ac->avctx, gb, &sbr->ps, *num_bits_left);
-            ac->avctx->profile = FF_PROFILE_AAC_HE_V2;
-#else
-            avpriv_report_missing_feature(ac->avctx, "Parametric Stereo");
-            skip_bits_long(gb, *num_bits_left); // bs_fill_bits
-            *num_bits_left = 0;
-#endif
-        }
-        break;
-    default:
-        // some files contain 0-padding
-        if (bs_extension_id || *num_bits_left > 16 || show_bits(gb, *num_bits_left))
-            avpriv_request_sample(ac->avctx, "Reserved SBR extensions");
-        skip_bits_long(gb, *num_bits_left); // bs_fill_bits
-        *num_bits_left = 0;
-        break;
-    }
-}
-
-static int read_sbr_single_channel_element(AACContext *ac,
-                                            SpectralBandReplication *sbr,
-                                            GetBitContext *gb)
-{
-    if (get_bits1(gb)) // bs_data_extra
-        skip_bits(gb, 4); // bs_reserved
-
-    if (read_sbr_grid(ac, sbr, gb, &sbr->data[0]))
-        return -1;
-    read_sbr_dtdf(sbr, gb, &sbr->data[0]);
-    read_sbr_invf(sbr, gb, &sbr->data[0]);
-    read_sbr_envelope(sbr, gb, &sbr->data[0], 0);
-    read_sbr_noise(sbr, gb, &sbr->data[0], 0);
-
-    if ((sbr->data[0].bs_add_harmonic_flag = get_bits1(gb)))
-        get_bits1_vector(gb, sbr->data[0].bs_add_harmonic, sbr->n[1]);
-
-    return 0;
-}
-
-static int read_sbr_channel_pair_element(AACContext *ac,
-                                          SpectralBandReplication *sbr,
-                                          GetBitContext *gb)
-{
-    if (get_bits1(gb))    // bs_data_extra
-        skip_bits(gb, 8); // bs_reserved
-
-    if ((sbr->bs_coupling = get_bits1(gb))) {
-        if (read_sbr_grid(ac, sbr, gb, &sbr->data[0]))
-            return -1;
-        copy_sbr_grid(&sbr->data[1], &sbr->data[0]);
-        read_sbr_dtdf(sbr, gb, &sbr->data[0]);
-        read_sbr_dtdf(sbr, gb, &sbr->data[1]);
-        read_sbr_invf(sbr, gb, &sbr->data[0]);
-        memcpy(sbr->data[1].bs_invf_mode[1], sbr->data[1].bs_invf_mode[0], sizeof(sbr->data[1].bs_invf_mode[0]));
-        memcpy(sbr->data[1].bs_invf_mode[0], sbr->data[0].bs_invf_mode[0], sizeof(sbr->data[1].bs_invf_mode[0]));
-        read_sbr_envelope(sbr, gb, &sbr->data[0], 0);
-        read_sbr_noise(sbr, gb, &sbr->data[0], 0);
-        read_sbr_envelope(sbr, gb, &sbr->data[1], 1);
-        read_sbr_noise(sbr, gb, &sbr->data[1], 1);
-    } else {
-        if (read_sbr_grid(ac, sbr, gb, &sbr->data[0]) ||
-            read_sbr_grid(ac, sbr, gb, &sbr->data[1]))
-            return -1;
-        read_sbr_dtdf(sbr, gb, &sbr->data[0]);
-        read_sbr_dtdf(sbr, gb, &sbr->data[1]);
-        read_sbr_invf(sbr, gb, &sbr->data[0]);
-        read_sbr_invf(sbr, gb, &sbr->data[1]);
-        read_sbr_envelope(sbr, gb, &sbr->data[0], 0);
-        read_sbr_envelope(sbr, gb, &sbr->data[1], 1);
-        read_sbr_noise(sbr, gb, &sbr->data[0], 0);
-        read_sbr_noise(sbr, gb, &sbr->data[1], 1);
-    }
-
-    if ((sbr->data[0].bs_add_harmonic_flag = get_bits1(gb)))
-        get_bits1_vector(gb, sbr->data[0].bs_add_harmonic, sbr->n[1]);
-    if ((sbr->data[1].bs_add_harmonic_flag = get_bits1(gb)))
-        get_bits1_vector(gb, sbr->data[1].bs_add_harmonic, sbr->n[1]);
-
-    return 0;
-}
-
-static unsigned int read_sbr_data(AACContext *ac, SpectralBandReplication *sbr,
-                                  GetBitContext *gb, int id_aac)
-{
-    unsigned int cnt = get_bits_count(gb);
-
-    sbr->id_aac = id_aac;
-
-    if (id_aac == TYPE_SCE || id_aac == TYPE_CCE) {
-        if (read_sbr_single_channel_element(ac, sbr, gb)) {
-            sbr_turnoff(sbr);
-            return get_bits_count(gb) - cnt;
-        }
-    } else if (id_aac == TYPE_CPE) {
-        if (read_sbr_channel_pair_element(ac, sbr, gb)) {
-            sbr_turnoff(sbr);
-            return get_bits_count(gb) - cnt;
-        }
-    } else {
-        av_log(ac->avctx, AV_LOG_ERROR,
-            "Invalid bitstream - cannot apply SBR to element type %d\n", id_aac);
-        sbr_turnoff(sbr);
-        return get_bits_count(gb) - cnt;
-    }
-    if (get_bits1(gb)) { // bs_extended_data
-        int num_bits_left = get_bits(gb, 4); // bs_extension_size
-        if (num_bits_left == 15)
-            num_bits_left += get_bits(gb, 8); // bs_esc_count
-
-        num_bits_left <<= 3;
-        while (num_bits_left > 7) {
-            num_bits_left -= 2;
-            read_sbr_extension(ac, sbr, gb, get_bits(gb, 2), &num_bits_left); // bs_extension_id
-        }
-        if (num_bits_left < 0) {
-            av_log(ac->avctx, AV_LOG_ERROR, "SBR Extension over read.\n");
-        }
-        if (num_bits_left > 0)
-            skip_bits(gb, num_bits_left);
-    }
-
-    return get_bits_count(gb) - cnt;
-}
-
-static void sbr_reset(AACContext *ac, SpectralBandReplication *sbr)
-{
-    int err;
-    err = sbr_make_f_master(ac, sbr, &sbr->spectrum_params);
-    if (err >= 0)
-        err = sbr_make_f_derived(ac, sbr);
-    if (err < 0) {
-        av_log(ac->avctx, AV_LOG_ERROR,
-               "SBR reset failed. Switching SBR to pure upsampling mode.\n");
-        sbr_turnoff(sbr);
-    }
-}
-
-/**
- * Decode Spectral Band Replication extension data; reference: table 4.55.
- *
- * @param   crc flag indicating the presence of CRC checksum
- * @param   cnt length of TYPE_FIL syntactic element in bytes
- *
- * @return  Returns number of bytes consumed from the TYPE_FIL element.
- */
-int ff_decode_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
-                            GetBitContext *gb_host, int crc, int cnt, int id_aac)
-{
-    unsigned int num_sbr_bits = 0, num_align_bits;
-    unsigned bytes_read;
-    GetBitContext gbc = *gb_host, *gb = &gbc;
-    skip_bits_long(gb_host, cnt*8 - 4);
-
-    sbr->reset = 0;
-
-    if (!sbr->sample_rate)
-        sbr->sample_rate = 2 * ac->oc[1].m4ac.sample_rate; //TODO use the nominal sample rate for arbitrary sample rate support
-    if (!ac->oc[1].m4ac.ext_sample_rate)
-        ac->oc[1].m4ac.ext_sample_rate = 2 * ac->oc[1].m4ac.sample_rate;
-
-    if (crc) {
-        skip_bits(gb, 10); // bs_sbr_crc_bits; TODO - implement CRC check
-        num_sbr_bits += 10;
-    }
-
-    //Save some state from the previous frame.
-    sbr->kx[0] = sbr->kx[1];
-    sbr->m[0] = sbr->m[1];
-    sbr->kx_and_m_pushed = 1;
-
-    num_sbr_bits++;
-    if (get_bits1(gb)) // bs_header_flag
-        num_sbr_bits += read_sbr_header(sbr, gb);
-
-    if (sbr->reset)
-        sbr_reset(ac, sbr);
-
-    if (sbr->start)
-        num_sbr_bits  += read_sbr_data(ac, sbr, gb, id_aac);
-
-    num_align_bits = ((cnt << 3) - 4 - num_sbr_bits) & 7;
-    bytes_read = ((num_sbr_bits + num_align_bits + 4) >> 3);
-
-    if (bytes_read > cnt) {
-        av_log(ac->avctx, AV_LOG_ERROR,
-               "Expected to read %d SBR bytes actually read %d.\n", cnt, bytes_read);
-    }
-    return cnt;
-}
-
 /// Dequantization and stereo decoding (14496-3 sp04 p203)
 static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
 {
     int k, e;
     int ch;
-
+    static const double exp2_tab[2] = {1, M_SQRT2};
     if (id_aac == TYPE_CPE && sbr->bs_coupling) {
-        float alpha      = sbr->data[0].bs_amp_res ?  1.0f :  0.5f;
-        float pan_offset = sbr->data[0].bs_amp_res ? 12.0f : 24.0f;
+        int pan_offset = sbr->data[0].bs_amp_res ? 12 : 24;
         for (e = 1; e <= sbr->data[0].bs_num_env; e++) {
             for (k = 0; k < sbr->n[sbr->data[0].bs_freq_res[e]]; k++) {
-                float temp1 = exp2f(sbr->data[0].env_facs[e][k] * alpha + 7.0f);
-                float temp2 = exp2f((pan_offset - sbr->data[1].env_facs[e][k]) * alpha);
-                float fac;
+                float temp1, temp2, fac;
+                if (sbr->data[0].bs_amp_res) {
+                    temp1 = ff_exp2fi(sbr->data[0].env_facs_q[e][k] + 7);
+                    temp2 = ff_exp2fi(pan_offset - sbr->data[1].env_facs_q[e][k]);
+                }
+                else {
+                    temp1 = ff_exp2fi((sbr->data[0].env_facs_q[e][k]>>1) + 7) *
+                            exp2_tab[sbr->data[0].env_facs_q[e][k] & 1];
+                    temp2 = ff_exp2fi((pan_offset - sbr->data[1].env_facs_q[e][k])>>1) *
+                            exp2_tab[(pan_offset - sbr->data[1].env_facs_q[e][k]) & 1];
+                }
                 if (temp1 > 1E20) {
                     av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
                     temp1 = 1;
@@ -1148,13 +101,10 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
         }
         for (e = 1; e <= sbr->data[0].bs_num_noise; e++) {
             for (k = 0; k < sbr->n_q; k++) {
-                float temp1 = exp2f(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs[e][k] + 1);
-                float temp2 = exp2f(12 - sbr->data[1].noise_facs[e][k]);
+                float temp1 = ff_exp2fi(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs_q[e][k] + 1);
+                float temp2 = ff_exp2fi(12 - sbr->data[1].noise_facs_q[e][k]);
                 float fac;
-                if (temp1 > 1E20) {
-                    av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
-                    temp1 = 1;
-                }
+                av_assert0(temp1 <= 1E20);
                 fac = temp1 / (1.0f + temp2);
                 sbr->data[0].noise_facs[e][k] = fac;
                 sbr->data[1].noise_facs[e][k] = fac * temp2;
@@ -1162,11 +112,13 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
         }
     } else { // SCE or one non-coupled CPE
         for (ch = 0; ch < (id_aac == TYPE_CPE) + 1; ch++) {
-            float alpha = sbr->data[ch].bs_amp_res ? 1.0f : 0.5f;
             for (e = 1; e <= sbr->data[ch].bs_num_env; e++)
                 for (k = 0; k < sbr->n[sbr->data[ch].bs_freq_res[e]]; k++){
-                    sbr->data[ch].env_facs[e][k] =
-                        exp2f(alpha * sbr->data[ch].env_facs[e][k] + 6.0f);
+                    if (sbr->data[ch].bs_amp_res)
+                        sbr->data[ch].env_facs[e][k] = ff_exp2fi(sbr->data[ch].env_facs_q[e][k] + 6);
+                    else
+                        sbr->data[ch].env_facs[e][k] = ff_exp2fi((sbr->data[ch].env_facs_q[e][k]>>1) + 6)
+                                                       * exp2_tab[sbr->data[ch].env_facs_q[e][k] & 1];
                     if (sbr->data[ch].env_facs[e][k] > 1E20) {
                         av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
                         sbr->data[ch].env_facs[e][k] = 1;
@@ -1176,89 +128,11 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
             for (e = 1; e <= sbr->data[ch].bs_num_noise; e++)
                 for (k = 0; k < sbr->n_q; k++)
                     sbr->data[ch].noise_facs[e][k] =
-                        exp2f(NOISE_FLOOR_OFFSET - sbr->data[ch].noise_facs[e][k]);
+                        ff_exp2fi(NOISE_FLOOR_OFFSET - sbr->data[ch].noise_facs_q[e][k]);
         }
     }
 }
 
-/**
- * Analysis QMF Bank (14496-3 sp04 p206)
- *
- * @param   x       pointer to the beginning of the first sample window
- * @param   W       array of complex-valued samples split into subbands
- */
-#ifndef sbr_qmf_analysis
-static void sbr_qmf_analysis(AVFloatDSPContext *dsp, FFTContext *mdct,
-                             SBRDSPContext *sbrdsp, const float *in, float *x,
-                             float z[320], float W[2][32][32][2], int buf_idx)
-{
-    int i;
-    memcpy(x    , x+1024, (320-32)*sizeof(x[0]));
-    memcpy(x+288, in,         1024*sizeof(x[0]));
-    for (i = 0; i < 32; i++) { // numTimeSlots*RATE = 16*2 as 960 sample frames
-                               // are not supported
-        dsp->vector_fmul_reverse(z, sbr_qmf_window_ds, x, 320);
-        sbrdsp->sum64x5(z);
-        sbrdsp->qmf_pre_shuffle(z);
-        mdct->imdct_half(mdct, z, z+64);
-        sbrdsp->qmf_post_shuffle(W[buf_idx][i], z);
-        x += 32;
-    }
-}
-#endif
-
-/**
- * Synthesis QMF Bank (14496-3 sp04 p206) and Downsampled Synthesis QMF Bank
- * (14496-3 sp04 p206)
- */
-#ifndef sbr_qmf_synthesis
-static void sbr_qmf_synthesis(FFTContext *mdct,
-                              SBRDSPContext *sbrdsp, AVFloatDSPContext *dsp,
-                              float *out, float X[2][38][64],
-                              float mdct_buf[2][64],
-                              float *v0, int *v_off, const unsigned int div)
-{
-    int i, n;
-    const float *sbr_qmf_window = div ? sbr_qmf_window_ds : sbr_qmf_window_us;
-    const int step = 128 >> div;
-    float *v;
-    for (i = 0; i < 32; i++) {
-        if (*v_off < step) {
-            int saved_samples = (1280 - 128) >> div;
-            memcpy(&v0[SBR_SYNTHESIS_BUF_SIZE - saved_samples], v0, saved_samples * sizeof(float));
-            *v_off = SBR_SYNTHESIS_BUF_SIZE - saved_samples - step;
-        } else {
-            *v_off -= step;
-        }
-        v = v0 + *v_off;
-        if (div) {
-            for (n = 0; n < 32; n++) {
-                X[0][i][   n] = -X[0][i][n];
-                X[0][i][32+n] =  X[1][i][31-n];
-            }
-            mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
-            sbrdsp->qmf_deint_neg(v, mdct_buf[0]);
-        } else {
-            sbrdsp->neg_odd_64(X[1][i]);
-            mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
-            mdct->imdct_half(mdct, mdct_buf[1], X[1][i]);
-            sbrdsp->qmf_deint_bfly(v, mdct_buf[1], mdct_buf[0]);
-        }
-        dsp->vector_fmul    (out, v                , sbr_qmf_window                       , 64 >> div);
-        dsp->vector_fmul_add(out, v + ( 192 >> div), sbr_qmf_window + ( 64 >> div), out   , 64 >> div);
-        dsp->vector_fmul_add(out, v + ( 256 >> div), sbr_qmf_window + (128 >> div), out   , 64 >> div);
-        dsp->vector_fmul_add(out, v + ( 448 >> div), sbr_qmf_window + (192 >> div), out   , 64 >> div);
-        dsp->vector_fmul_add(out, v + ( 512 >> div), sbr_qmf_window + (256 >> div), out   , 64 >> div);
-        dsp->vector_fmul_add(out, v + ( 704 >> div), sbr_qmf_window + (320 >> div), out   , 64 >> div);
-        dsp->vector_fmul_add(out, v + ( 768 >> div), sbr_qmf_window + (384 >> div), out   , 64 >> div);
-        dsp->vector_fmul_add(out, v + ( 960 >> div), sbr_qmf_window + (448 >> div), out   , 64 >> div);
-        dsp->vector_fmul_add(out, v + (1024 >> div), sbr_qmf_window + (512 >> div), out   , 64 >> div);
-        dsp->vector_fmul_add(out, v + (1216 >> div), sbr_qmf_window + (576 >> div), out   , 64 >> div);
-        out += 64 >> div;
-    }
-}
-#endif
-
 /** High Frequency Generation (14496-3 sp04 p214+) and Inverse Filtering
  * (14496-3 sp04 p214)
  * Warning: This routine does not seem numerically stable.
@@ -1338,203 +212,6 @@ static void sbr_chirp(SpectralBandReplication *sbr, SBRData *ch_data)
     }
 }
 
-/// Generate the subband filtered lowband
-static int sbr_lf_gen(AACContext *ac, SpectralBandReplication *sbr,
-                      float X_low[32][40][2], const float W[2][32][32][2],
-                      int buf_idx)
-{
-    int i, k;
-    const int t_HFGen = 8;
-    const int i_f = 32;
-    memset(X_low, 0, 32*sizeof(*X_low));
-    for (k = 0; k < sbr->kx[1]; k++) {
-        for (i = t_HFGen; i < i_f + t_HFGen; i++) {
-            X_low[k][i][0] = W[buf_idx][i - t_HFGen][k][0];
-            X_low[k][i][1] = W[buf_idx][i - t_HFGen][k][1];
-        }
-    }
-    buf_idx = 1-buf_idx;
-    for (k = 0; k < sbr->kx[0]; k++) {
-        for (i = 0; i < t_HFGen; i++) {
-            X_low[k][i][0] = W[buf_idx][i + i_f - t_HFGen][k][0];
-            X_low[k][i][1] = W[buf_idx][i + i_f - t_HFGen][k][1];
-        }
-    }
-    return 0;
-}
-
-/// High Frequency Generator (14496-3 sp04 p215)
-static int sbr_hf_gen(AACContext *ac, SpectralBandReplication *sbr,
-                      float X_high[64][40][2], const float X_low[32][40][2],
-                      const float (*alpha0)[2], const float (*alpha1)[2],
-                      const float bw_array[5], const uint8_t *t_env,
-                      int bs_num_env)
-{
-    int j, x;
-    int g = 0;
-    int k = sbr->kx[1];
-    for (j = 0; j < sbr->num_patches; j++) {
-        for (x = 0; x < sbr->patch_num_subbands[j]; x++, k++) {
-            const int p = sbr->patch_start_subband[j] + x;
-            while (g <= sbr->n_q && k >= sbr->f_tablenoise[g])
-                g++;
-            g--;
-
-            if (g < 0) {
-                av_log(ac->avctx, AV_LOG_ERROR,
-                       "ERROR : no subband found for frequency %d\n", k);
-                return -1;
-            }
-
-            sbr->dsp.hf_gen(X_high[k] + ENVELOPE_ADJUSTMENT_OFFSET,
-                            X_low[p]  + ENVELOPE_ADJUSTMENT_OFFSET,
-                            alpha0[p], alpha1[p], bw_array[g],
-                            2 * t_env[0], 2 * t_env[bs_num_env]);
-        }
-    }
-    if (k < sbr->m[1] + sbr->kx[1])
-        memset(X_high + k, 0, (sbr->m[1] + sbr->kx[1] - k) * sizeof(*X_high));
-
-    return 0;
-}
-
-/// Generate the subband filtered lowband
-static int sbr_x_gen(SpectralBandReplication *sbr, float X[2][38][64],
-                     const float Y0[38][64][2], const float Y1[38][64][2],
-                     const float X_low[32][40][2], int ch)
-{
-    int k, i;
-    const int i_f = 32;
-    const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
-    memset(X, 0, 2*sizeof(*X));
-    for (k = 0; k < sbr->kx[0]; k++) {
-        for (i = 0; i < i_Temp; i++) {
-            X[0][i][k] = X_low[k][i + ENVELOPE_ADJUSTMENT_OFFSET][0];
-            X[1][i][k] = X_low[k][i + ENVELOPE_ADJUSTMENT_OFFSET][1];
-        }
-    }
-    for (; k < sbr->kx[0] + sbr->m[0]; k++) {
-        for (i = 0; i < i_Temp; i++) {
-            X[0][i][k] = Y0[i + i_f][k][0];
-            X[1][i][k] = Y0[i + i_f][k][1];
-        }
-    }
-
-    for (k = 0; k < sbr->kx[1]; k++) {
-        for (i = i_Temp; i < 38; i++) {
-            X[0][i][k] = X_low[k][i + ENVELOPE_ADJUSTMENT_OFFSET][0];
-            X[1][i][k] = X_low[k][i + ENVELOPE_ADJUSTMENT_OFFSET][1];
-        }
-    }
-    for (; k < sbr->kx[1] + sbr->m[1]; k++) {
-        for (i = i_Temp; i < i_f; i++) {
-            X[0][i][k] = Y1[i][k][0];
-            X[1][i][k] = Y1[i][k][1];
-        }
-    }
-    return 0;
-}
-
-/** High Frequency Adjustment (14496-3 sp04 p217) and Mapping
- * (14496-3 sp04 p217)
- */
-static int sbr_mapping(AACContext *ac, SpectralBandReplication *sbr,
-                        SBRData *ch_data, int e_a[2])
-{
-    int e, i, m;
-
-    memset(ch_data->s_indexmapped[1], 0, 7*sizeof(ch_data->s_indexmapped[1]));
-    for (e = 0; e < ch_data->bs_num_env; e++) {
-        const unsigned int ilim = sbr->n[ch_data->bs_freq_res[e + 1]];
-        uint16_t *table = ch_data->bs_freq_res[e + 1] ? sbr->f_tablehigh : sbr->f_tablelow;
-        int k;
-
-        if (sbr->kx[1] != table[0]) {
-            av_log(ac->avctx, AV_LOG_ERROR, "kx != f_table{high,low}[0]. "
-                   "Derived frequency tables were not regenerated.\n");
-            sbr_turnoff(sbr);
-            return AVERROR_BUG;
-        }
-        for (i = 0; i < ilim; i++)
-            for (m = table[i]; m < table[i + 1]; m++)
-                sbr->e_origmapped[e][m - sbr->kx[1]] = ch_data->env_facs[e+1][i];
-
-        // ch_data->bs_num_noise > 1 => 2 noise floors
-        k = (ch_data->bs_num_noise > 1) && (ch_data->t_env[e] >= ch_data->t_q[1]);
-        for (i = 0; i < sbr->n_q; i++)
-            for (m = sbr->f_tablenoise[i]; m < sbr->f_tablenoise[i + 1]; m++)
-                sbr->q_mapped[e][m - sbr->kx[1]] = ch_data->noise_facs[k+1][i];
-
-        for (i = 0; i < sbr->n[1]; i++) {
-            if (ch_data->bs_add_harmonic_flag) {
-                const unsigned int m_midpoint =
-                    (sbr->f_tablehigh[i] + sbr->f_tablehigh[i + 1]) >> 1;
-
-                ch_data->s_indexmapped[e + 1][m_midpoint - sbr->kx[1]] = ch_data->bs_add_harmonic[i] *
-                    (e >= e_a[1] || (ch_data->s_indexmapped[0][m_midpoint - sbr->kx[1]] == 1));
-            }
-        }
-
-        for (i = 0; i < ilim; i++) {
-            int additional_sinusoid_present = 0;
-            for (m = table[i]; m < table[i + 1]; m++) {
-                if (ch_data->s_indexmapped[e + 1][m - sbr->kx[1]]) {
-                    additional_sinusoid_present = 1;
-                    break;
-                }
-            }
-            memset(&sbr->s_mapped[e][table[i] - sbr->kx[1]], additional_sinusoid_present,
-                   (table[i + 1] - table[i]) * sizeof(sbr->s_mapped[e][0]));
-        }
-    }
-
-    memcpy(ch_data->s_indexmapped[0], ch_data->s_indexmapped[ch_data->bs_num_env], sizeof(ch_data->s_indexmapped[0]));
-    return 0;
-}
-
-/// Estimation of current envelope (14496-3 sp04 p218)
-static void sbr_env_estimate(float (*e_curr)[48], float X_high[64][40][2],
-                             SpectralBandReplication *sbr, SBRData *ch_data)
-{
-    int e, m;
-    int kx1 = sbr->kx[1];
-
-    if (sbr->bs_interpol_freq) {
-        for (e = 0; e < ch_data->bs_num_env; e++) {
-            const float recip_env_size = 0.5f / (ch_data->t_env[e + 1] - ch_data->t_env[e]);
-            int ilb = ch_data->t_env[e]     * 2 + ENVELOPE_ADJUSTMENT_OFFSET;
-            int iub = ch_data->t_env[e + 1] * 2 + ENVELOPE_ADJUSTMENT_OFFSET;
-
-            for (m = 0; m < sbr->m[1]; m++) {
-                float sum = sbr->dsp.sum_square(X_high[m+kx1] + ilb, iub - ilb);
-                e_curr[e][m] = sum * recip_env_size;
-            }
-        }
-    } else {
-        int k, p;
-
-        for (e = 0; e < ch_data->bs_num_env; e++) {
-            const int env_size = 2 * (ch_data->t_env[e + 1] - ch_data->t_env[e]);
-            int ilb = ch_data->t_env[e]     * 2 + ENVELOPE_ADJUSTMENT_OFFSET;
-            int iub = ch_data->t_env[e + 1] * 2 + ENVELOPE_ADJUSTMENT_OFFSET;
-            const uint16_t *table = ch_data->bs_freq_res[e + 1] ? sbr->f_tablehigh : sbr->f_tablelow;
-
-            for (p = 0; p < sbr->n[ch_data->bs_freq_res[e + 1]]; p++) {
-                float sum = 0.0f;
-                const int den = env_size * (table[p + 1] - table[p]);
-
-                for (k = table[p]; k < table[p + 1]; k++) {
-                    sum += sbr->dsp.sum_square(X_high[k] + ilb, iub - ilb);
-                }
-                sum /= den;
-                for (k = table[p]; k < table[p + 1]; k++) {
-                    e_curr[e][k - kx1] = sum;
-                }
-            }
-        }
-    }
-}
-
 /**
  * Calculation of levels of additional HF signal components (14496-3 sp04 p219)
  * and Calculation of gain (14496-3 sp04 p219)
@@ -1689,99 +366,4 @@ static void sbr_hf_assemble(float Y1[38][64][2],
     ch_data->f_indexsine  = indexsine;
 }
 
-void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
-                  float* L, float* R)
-{
-    int downsampled = ac->oc[1].m4ac.ext_sample_rate < sbr->sample_rate;
-    int ch;
-    int nch = (id_aac == TYPE_CPE) ? 2 : 1;
-    int err;
-
-    if (id_aac != sbr->id_aac) {
-        av_log(ac->avctx, AV_LOG_ERROR,
-            "element type mismatch %d != %d\n", id_aac, sbr->id_aac);
-        sbr_turnoff(sbr);
-    }
-
-    if (!sbr->kx_and_m_pushed) {
-        sbr->kx[0] = sbr->kx[1];
-        sbr->m[0] = sbr->m[1];
-    } else {
-        sbr->kx_and_m_pushed = 0;
-    }
-
-    if (sbr->start) {
-        sbr_dequant(sbr, id_aac);
-    }
-    for (ch = 0; ch < nch; ch++) {
-        /* decode channel */
-        sbr_qmf_analysis(ac->fdsp, &sbr->mdct_ana, &sbr->dsp, ch ? R : L, sbr->data[ch].analysis_filterbank_samples,
-                         (float*)sbr->qmf_filter_scratch,
-                         sbr->data[ch].W, sbr->data[ch].Ypos);
-        sbr->c.sbr_lf_gen(ac, sbr, sbr->X_low,
-                          (const float (*)[32][32][2]) sbr->data[ch].W,
-                          sbr->data[ch].Ypos);
-        sbr->data[ch].Ypos ^= 1;
-        if (sbr->start) {
-            sbr->c.sbr_hf_inverse_filter(&sbr->dsp, sbr->alpha0, sbr->alpha1,
-                                         (const float (*)[40][2]) sbr->X_low, sbr->k[0]);
-            sbr_chirp(sbr, &sbr->data[ch]);
-            av_assert0(sbr->data[ch].bs_num_env > 0);
-            sbr_hf_gen(ac, sbr, sbr->X_high,
-                       (const float (*)[40][2]) sbr->X_low,
-                       (const float (*)[2]) sbr->alpha0,
-                       (const float (*)[2]) sbr->alpha1,
-                       sbr->data[ch].bw_array, sbr->data[ch].t_env,
-                       sbr->data[ch].bs_num_env);
-
-            // hf_adj
-            err = sbr_mapping(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a);
-            if (!err) {
-                sbr_env_estimate(sbr->e_curr, sbr->X_high, sbr, &sbr->data[ch]);
-                sbr_gain_calc(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a);
-                sbr->c.sbr_hf_assemble(sbr->data[ch].Y[sbr->data[ch].Ypos],
-                                (const float (*)[40][2]) sbr->X_high,
-                                sbr, &sbr->data[ch],
-                                sbr->data[ch].e_a);
-            }
-        }
-
-        /* synthesis */
-        sbr->c.sbr_x_gen(sbr, sbr->X[ch],
-                  (const float (*)[64][2]) sbr->data[ch].Y[1-sbr->data[ch].Ypos],
-                  (const float (*)[64][2]) sbr->data[ch].Y[  sbr->data[ch].Ypos],
-                  (const float (*)[40][2]) sbr->X_low, ch);
-    }
-
-    if (ac->oc[1].m4ac.ps == 1) {
-        if (sbr->ps.start) {
-            ff_ps_apply(ac->avctx, &sbr->ps, sbr->X[0], sbr->X[1], sbr->kx[1] + sbr->m[1]);
-        } else {
-            memcpy(sbr->X[1], sbr->X[0], sizeof(sbr->X[0]));
-        }
-        nch = 2;
-    }
-
-    sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, ac->fdsp,
-                      L, sbr->X[0], sbr->qmf_filter_scratch,
-                      sbr->data[0].synthesis_filterbank_samples,
-                      &sbr->data[0].synthesis_filterbank_samples_offset,
-                      downsampled);
-    if (nch == 2)
-        sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, ac->fdsp,
-                          R, sbr->X[1], sbr->qmf_filter_scratch,
-                          sbr->data[1].synthesis_filterbank_samples,
-                          &sbr->data[1].synthesis_filterbank_samples_offset,
-                          downsampled);
-}
-
-static void aacsbr_func_ptr_init(AACSBRContext *c)
-{
-    c->sbr_lf_gen            = sbr_lf_gen;
-    c->sbr_hf_assemble       = sbr_hf_assemble;
-    c->sbr_x_gen             = sbr_x_gen;
-    c->sbr_hf_inverse_filter = sbr_hf_inverse_filter;
-
-    if(ARCH_MIPS)
-        ff_aacsbr_func_ptr_init_mips(c);
-}
+#include "aacsbr_template.c"
diff --git a/libavcodec/aacsbr.h b/libavcodec/aacsbr.h
index f5e33ab6..88c4d8a9 100644
--- a/libavcodec/aacsbr.h
+++ b/libavcodec/aacsbr.h
@@ -33,18 +33,63 @@
 #include "aac.h"
 #include "sbr.h"
 
+#define ENVELOPE_ADJUSTMENT_OFFSET 2
+#define NOISE_FLOOR_OFFSET 6
+
+/**
+ * SBR VLC tables
+ */
+enum {
+    T_HUFFMAN_ENV_1_5DB,
+    F_HUFFMAN_ENV_1_5DB,
+    T_HUFFMAN_ENV_BAL_1_5DB,
+    F_HUFFMAN_ENV_BAL_1_5DB,
+    T_HUFFMAN_ENV_3_0DB,
+    F_HUFFMAN_ENV_3_0DB,
+    T_HUFFMAN_ENV_BAL_3_0DB,
+    F_HUFFMAN_ENV_BAL_3_0DB,
+    T_HUFFMAN_NOISE_3_0DB,
+    T_HUFFMAN_NOISE_BAL_3_0DB,
+};
+
+/**
+ * bs_frame_class - frame class of current SBR frame (14496-3 sp04 p98)
+ */
+enum {
+    FIXFIX,
+    FIXVAR,
+    VARFIX,
+    VARVAR,
+};
+
+enum {
+    EXTENSION_ID_PS = 2,
+};
+
+static const int8_t vlc_sbr_lav[10] =
+    { 60, 60, 24, 24, 31, 31, 12, 12, 31, 12 };
+
+#define SBR_INIT_VLC_STATIC(num, size) \
+    INIT_VLC_STATIC(&vlc_sbr[num], 9, sbr_tmp[num].table_size / sbr_tmp[num].elem_size,     \
+                    sbr_tmp[num].sbr_bits ,                      1,                      1, \
+                    sbr_tmp[num].sbr_codes, sbr_tmp[num].elem_size, sbr_tmp[num].elem_size, \
+                    size)
+
+#define SBR_VLC_ROW(name) \
+    { name ## _codes, name ## _bits, sizeof(name ## _codes), sizeof(name ## _codes[0]) }
+
 /** Initialize SBR. */
-void ff_aac_sbr_init(void);
+void AAC_RENAME(ff_aac_sbr_init)(void);
 /** Initialize one SBR context. */
-void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr);
+void AAC_RENAME(ff_aac_sbr_ctx_init)(AACContext *ac, SpectralBandReplication *sbr);
 /** Close one SBR context. */
-void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr);
+void AAC_RENAME(ff_aac_sbr_ctx_close)(SpectralBandReplication *sbr);
 /** Decode one SBR element. */
-int ff_decode_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
+int AAC_RENAME(ff_decode_sbr_extension)(AACContext *ac, SpectralBandReplication *sbr,
                             GetBitContext *gb, int crc, int cnt, int id_aac);
 /** Apply one SBR element to one AAC element. */
-void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
-                  float* L, float *R);
+void AAC_RENAME(ff_sbr_apply)(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
+                  INTFLOAT* L, INTFLOAT *R);
 
 void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c);
 
diff --git a/libavcodec/aacsbr_fixed.c b/libavcodec/aacsbr_fixed.c
new file mode 100644
index 00000000..b26314a7
--- /dev/null
+++ b/libavcodec/aacsbr_fixed.c
@@ -0,0 +1,594 @@
+/*
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * AAC Spectral Band Replication decoding functions (fixed-point)
+ * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
+ * Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC Spectral Band Replication decoding functions (fixed-point)
+ * Note: Rounding-to-nearest used unless otherwise stated
+ * @author Robert Swain ( rob opendot cl )
+ * @author Stanislav Ocovaj ( stanislav.ocovaj imgtec com )
+ */
+#define USE_FIXED 1
+
+#include "aac.h"
+#include "sbr.h"
+#include "aacsbr.h"
+#include "aacsbrdata.h"
+#include "aacsbr_fixed_tablegen.h"
+#include "fft.h"
+#include "aacps.h"
+#include "sbrdsp.h"
+#include "libavutil/internal.h"
+#include "libavutil/libm.h"
+#include "libavutil/avassert.h"
+
+#include <stdint.h>
+#include <float.h>
+#include <math.h>
+
+static VLC vlc_sbr[10];
+static void aacsbr_func_ptr_init(AACSBRContext *c);
+static const int CONST_LN2       = Q31(0.6931471806/256);  // ln(2)/256
+static const int CONST_RECIP_LN2 = Q31(0.7213475204);      // 0.5/ln(2)
+static const int CONST_076923    = Q31(0.76923076923076923077f);
+
+static const int fixed_log_table[10] =
+{
+    Q31(1.0/2), Q31(1.0/3), Q31(1.0/4), Q31(1.0/5), Q31(1.0/6),
+    Q31(1.0/7), Q31(1.0/8), Q31(1.0/9), Q31(1.0/10), Q31(1.0/11)
+};
+
+static int fixed_log(int x)
+{
+    int i, ret, xpow, tmp;
+
+    ret = x;
+    xpow = x;
+    for (i=0; i<10; i+=2){
+        xpow = (int)(((int64_t)xpow * x + 0x40000000) >> 31);
+        tmp = (int)(((int64_t)xpow * fixed_log_table[i] + 0x40000000) >> 31);
+        ret -= tmp;
+
+        xpow = (int)(((int64_t)xpow * x + 0x40000000) >> 31);
+        tmp = (int)(((int64_t)xpow * fixed_log_table[i+1] + 0x40000000) >> 31);
+        ret += tmp;
+    }
+
+    return ret;
+}
+
+static const int fixed_exp_table[7] =
+{
+    Q31(1.0/2), Q31(1.0/6), Q31(1.0/24), Q31(1.0/120),
+    Q31(1.0/720), Q31(1.0/5040), Q31(1.0/40320)
+};
+
+static int fixed_exp(int x)
+{
+    int i, ret, xpow, tmp;
+
+    ret = 0x800000 + x;
+    xpow = x;
+    for (i=0; i<7; i++){
+        xpow = (int)(((int64_t)xpow * x + 0x400000) >> 23);
+        tmp = (int)(((int64_t)xpow * fixed_exp_table[i] + 0x40000000) >> 31);
+        ret += tmp;
+    }
+
+    return ret;
+}
+
+static void make_bands(int16_t* bands, int start, int stop, int num_bands)
+{
+    int k, previous, present;
+    int base, prod, nz = 0;
+
+    base = (stop << 23) / start;
+    while (base < 0x40000000){
+        base <<= 1;
+        nz++;
+    }
+    base = fixed_log(base - 0x80000000);
+    base = (((base + 0x80) >> 8) + (8-nz)*CONST_LN2) / num_bands;
+    base = fixed_exp(base);
+
+    previous = start;
+    prod = start << 23;
+
+    for (k = 0; k < num_bands-1; k++) {
+        prod = (int)(((int64_t)prod * base + 0x400000) >> 23);
+        present = (prod + 0x400000) >> 23;
+        bands[k] = present - previous;
+        previous = present;
+    }
+    bands[num_bands-1] = stop - previous;
+}
+
+/// Dequantization and stereo decoding (14496-3 sp04 p203)
+static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
+{
+    int k, e;
+    int ch;
+
+    if (id_aac == TYPE_CPE && sbr->bs_coupling) {
+        int alpha      = sbr->data[0].bs_amp_res ?  2 :  1;
+        int pan_offset = sbr->data[0].bs_amp_res ? 12 : 24;
+        for (e = 1; e <= sbr->data[0].bs_num_env; e++) {
+            for (k = 0; k < sbr->n[sbr->data[0].bs_freq_res[e]]; k++) {
+                SoftFloat temp1, temp2, fac;
+
+                temp1.exp = sbr->data[0].env_facs_q[e][k] * alpha + 14;
+                if (temp1.exp & 1)
+                  temp1.mant = 759250125;
+                else
+                  temp1.mant = 0x20000000;
+                temp1.exp = (temp1.exp >> 1) + 1;
+                if (temp1.exp > 66) { // temp1 > 1E20
+                    av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
+                    temp1 = FLOAT_1;
+                }
+
+                temp2.exp = (pan_offset - sbr->data[1].env_facs_q[e][k]) * alpha;
+                if (temp2.exp & 1)
+                  temp2.mant = 759250125;
+                else
+                  temp2.mant = 0x20000000;
+                temp2.exp = (temp2.exp >> 1) + 1;
+                fac   = av_div_sf(temp1, av_add_sf(FLOAT_1, temp2));
+                sbr->data[0].env_facs[e][k] = fac;
+                sbr->data[1].env_facs[e][k] = av_mul_sf(fac, temp2);
+            }
+        }
+        for (e = 1; e <= sbr->data[0].bs_num_noise; e++) {
+            for (k = 0; k < sbr->n_q; k++) {
+                SoftFloat temp1, temp2, fac;
+
+                temp1.exp = NOISE_FLOOR_OFFSET - \
+                    sbr->data[0].noise_facs_q[e][k] + 2;
+                temp1.mant = 0x20000000;
+                av_assert0(temp1.exp <= 66);
+                temp2.exp = 12 - sbr->data[1].noise_facs_q[e][k] + 1;
+                temp2.mant = 0x20000000;
+                fac   = av_div_sf(temp1, av_add_sf(FLOAT_1, temp2));
+                sbr->data[0].noise_facs[e][k] = fac;
+                sbr->data[1].noise_facs[e][k] = av_mul_sf(fac, temp2);
+            }
+        }
+    } else { // SCE or one non-coupled CPE
+        for (ch = 0; ch < (id_aac == TYPE_CPE) + 1; ch++) {
+            int alpha = sbr->data[ch].bs_amp_res ? 2 : 1;
+            for (e = 1; e <= sbr->data[ch].bs_num_env; e++)
+                for (k = 0; k < sbr->n[sbr->data[ch].bs_freq_res[e]]; k++){
+                    SoftFloat temp1;
+
+                    temp1.exp = alpha * sbr->data[ch].env_facs_q[e][k] + 12;
+                    if (temp1.exp & 1)
+                        temp1.mant = 759250125;
+                    else
+                        temp1.mant = 0x20000000;
+                    temp1.exp = (temp1.exp >> 1) + 1;
+                    if (temp1.exp > 66) { // temp1 > 1E20
+                        av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
+                        temp1 = FLOAT_1;
+                    }
+                    sbr->data[ch].env_facs[e][k] = temp1;
+                }
+            for (e = 1; e <= sbr->data[ch].bs_num_noise; e++)
+                for (k = 0; k < sbr->n_q; k++){
+                    sbr->data[ch].noise_facs[e][k].exp = NOISE_FLOOR_OFFSET - \
+                        sbr->data[ch].noise_facs_q[e][k] + 1;
+                    sbr->data[ch].noise_facs[e][k].mant = 0x20000000;
+                }
+        }
+    }
+}
+
+/** High Frequency Generation (14496-3 sp04 p214+) and Inverse Filtering
+ * (14496-3 sp04 p214)
+ * Warning: This routine does not seem numerically stable.
+ */
+static void sbr_hf_inverse_filter(SBRDSPContext *dsp,
+                                  int (*alpha0)[2], int (*alpha1)[2],
+                                  const int X_low[32][40][2], int k0)
+{
+    int k;
+    int shift, round;
+
+    for (k = 0; k < k0; k++) {
+        SoftFloat phi[3][2][2];
+        SoftFloat a00, a01, a10, a11;
+        SoftFloat dk;
+
+        dsp->autocorrelate(X_low[k], phi);
+
+        dk = av_sub_sf(av_mul_sf(phi[2][1][0], phi[1][0][0]),
+             av_mul_sf(av_add_sf(av_mul_sf(phi[1][1][0], phi[1][1][0]),
+             av_mul_sf(phi[1][1][1], phi[1][1][1])), FLOAT_0999999));
+
+        if (!dk.mant) {
+            a10 = FLOAT_0;
+            a11 = FLOAT_0;
+        } else {
+            SoftFloat temp_real, temp_im;
+            temp_real = av_sub_sf(av_sub_sf(av_mul_sf(phi[0][0][0], phi[1][1][0]),
+                                            av_mul_sf(phi[0][0][1], phi[1][1][1])),
+                                  av_mul_sf(phi[0][1][0], phi[1][0][0]));
+            temp_im   = av_sub_sf(av_add_sf(av_mul_sf(phi[0][0][0], phi[1][1][1]),
+                                            av_mul_sf(phi[0][0][1], phi[1][1][0])),
+                                  av_mul_sf(phi[0][1][1], phi[1][0][0]));
+
+            a10 = av_div_sf(temp_real, dk);
+            a11 = av_div_sf(temp_im,   dk);
+        }
+
+        if (!phi[1][0][0].mant) {
+            a00 = FLOAT_0;
+            a01 = FLOAT_0;
+        } else {
+            SoftFloat temp_real, temp_im;
+            temp_real = av_add_sf(phi[0][0][0],
+                                  av_add_sf(av_mul_sf(a10, phi[1][1][0]),
+                                            av_mul_sf(a11, phi[1][1][1])));
+            temp_im   = av_add_sf(phi[0][0][1],
+                                  av_sub_sf(av_mul_sf(a11, phi[1][1][0]),
+                                            av_mul_sf(a10, phi[1][1][1])));
+
+            temp_real.mant = -temp_real.mant;
+            temp_im.mant   = -temp_im.mant;
+            a00 = av_div_sf(temp_real, phi[1][0][0]);
+            a01 = av_div_sf(temp_im,   phi[1][0][0]);
+        }
+
+        shift = a00.exp;
+        if (shift >= 3)
+            alpha0[k][0] = 0x7fffffff;
+        else {
+            a00.mant <<= 1;
+            shift = 2-shift;
+            if (shift == 0)
+                alpha0[k][0] = a00.mant;
+            else {
+                round = 1 << (shift-1);
+                alpha0[k][0] = (a00.mant + round) >> shift;
+            }
+        }
+
+        shift = a01.exp;
+        if (shift >= 3)
+            alpha0[k][1] = 0x7fffffff;
+        else {
+            a01.mant <<= 1;
+            shift = 2-shift;
+            if (shift == 0)
+                alpha0[k][1] = a01.mant;
+            else {
+                round = 1 << (shift-1);
+                alpha0[k][1] = (a01.mant + round) >> shift;
+            }
+        }
+        shift = a10.exp;
+        if (shift >= 3)
+            alpha1[k][0] = 0x7fffffff;
+        else {
+            a10.mant <<= 1;
+            shift = 2-shift;
+            if (shift == 0)
+                alpha1[k][0] = a10.mant;
+            else {
+                round = 1 << (shift-1);
+                alpha1[k][0] = (a10.mant + round) >> shift;
+            }
+        }
+
+        shift = a11.exp;
+        if (shift >= 3)
+            alpha1[k][1] = 0x7fffffff;
+        else {
+            a11.mant <<= 1;
+            shift = 2-shift;
+            if (shift == 0)
+                alpha1[k][1] = a11.mant;
+            else {
+                round = 1 << (shift-1);
+                alpha1[k][1] = (a11.mant + round) >> shift;
+            }
+        }
+
+        shift = (int)(((int64_t)(alpha1[k][0]>>1) * (alpha1[k][0]>>1) + \
+                       (int64_t)(alpha1[k][1]>>1) * (alpha1[k][1]>>1) + \
+                       0x40000000) >> 31);
+        if (shift >= 0x20000000){
+            alpha1[k][0] = 0;
+            alpha1[k][1] = 0;
+            alpha0[k][0] = 0;
+            alpha0[k][1] = 0;
+        }
+
+        shift = (int)(((int64_t)(alpha0[k][0]>>1) * (alpha0[k][0]>>1) + \
+                       (int64_t)(alpha0[k][1]>>1) * (alpha0[k][1]>>1) + \
+                       0x40000000) >> 31);
+        if (shift >= 0x20000000){
+            alpha1[k][0] = 0;
+            alpha1[k][1] = 0;
+            alpha0[k][0] = 0;
+            alpha0[k][1] = 0;
+        }
+    }
+}
+
+/// Chirp Factors (14496-3 sp04 p214)
+static void sbr_chirp(SpectralBandReplication *sbr, SBRData *ch_data)
+{
+    int i;
+    int new_bw;
+    static const int bw_tab[] = { 0, 1610612736, 1932735283, 2104533975 };
+    int64_t accu;
+
+    for (i = 0; i < sbr->n_q; i++) {
+        if (ch_data->bs_invf_mode[0][i] + ch_data->bs_invf_mode[1][i] == 1)
+            new_bw = 1288490189;
+        else
+            new_bw = bw_tab[ch_data->bs_invf_mode[0][i]];
+
+        if (new_bw < ch_data->bw_array[i]){
+            accu  = (int64_t)new_bw * 1610612736;
+            accu += (int64_t)ch_data->bw_array[i] * 0x20000000;
+            new_bw = (int)((accu + 0x40000000) >> 31);
+        } else {
+            accu  = (int64_t)new_bw * 1946157056;
+            accu += (int64_t)ch_data->bw_array[i] * 201326592;
+            new_bw = (int)((accu + 0x40000000) >> 31);
+        }
+        ch_data->bw_array[i] = new_bw < 0x2000000 ? 0 : new_bw;
+    }
+}
+
+/**
+ * Calculation of levels of additional HF signal components (14496-3 sp04 p219)
+ * and Calculation of gain (14496-3 sp04 p219)
+ */
+static void sbr_gain_calc(AACContext *ac, SpectralBandReplication *sbr,
+                          SBRData *ch_data, const int e_a[2])
+{
+    int e, k, m;
+    // max gain limits : -3dB, 0dB, 3dB, inf dB (limiter off)
+    static const SoftFloat limgain[4] = { { 760155524,  0 }, { 0x20000000,  1 },
+                                            { 758351638,  1 }, { 625000000, 34 } };
+
+    for (e = 0; e < ch_data->bs_num_env; e++) {
+        int delta = !((e == e_a[1]) || (e == e_a[0]));
+        for (k = 0; k < sbr->n_lim; k++) {
+            SoftFloat gain_boost, gain_max;
+            SoftFloat sum[2];
+            sum[0] = sum[1] = FLOAT_0;
+            for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
+                const SoftFloat temp = av_div_sf(sbr->e_origmapped[e][m],
+                                            av_add_sf(FLOAT_1, sbr->q_mapped[e][m]));
+                sbr->q_m[e][m] = av_sqrt_sf(av_mul_sf(temp, sbr->q_mapped[e][m]));
+                sbr->s_m[e][m] = av_sqrt_sf(av_mul_sf(temp, av_int2sf(ch_data->s_indexmapped[e + 1][m], 0)));
+                if (!sbr->s_mapped[e][m]) {
+                    if (delta) {
+                      sbr->gain[e][m] = av_sqrt_sf(av_div_sf(sbr->e_origmapped[e][m],
+                                            av_mul_sf(av_add_sf(FLOAT_1, sbr->e_curr[e][m]),
+                                            av_add_sf(FLOAT_1, sbr->q_mapped[e][m]))));
+                    } else {
+                      sbr->gain[e][m] = av_sqrt_sf(av_div_sf(sbr->e_origmapped[e][m],
+                                            av_add_sf(FLOAT_1, sbr->e_curr[e][m])));
+                    }
+                } else {
+                    sbr->gain[e][m] = av_sqrt_sf(
+                                        av_div_sf(
+                                            av_mul_sf(sbr->e_origmapped[e][m], sbr->q_mapped[e][m]),
+                                            av_mul_sf(
+                                                av_add_sf(FLOAT_1, sbr->e_curr[e][m]),
+                                                av_add_sf(FLOAT_1, sbr->q_mapped[e][m]))));
+                }
+            }
+            for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
+                sum[0] = av_add_sf(sum[0], sbr->e_origmapped[e][m]);
+                sum[1] = av_add_sf(sum[1], sbr->e_curr[e][m]);
+            }
+            gain_max = av_mul_sf(limgain[sbr->bs_limiter_gains],
+                            av_sqrt_sf(
+                                av_div_sf(
+                                    av_add_sf(FLOAT_EPSILON, sum[0]),
+                                    av_add_sf(FLOAT_EPSILON, sum[1]))));
+            if (av_gt_sf(gain_max, FLOAT_100000))
+              gain_max = FLOAT_100000;
+            for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
+                SoftFloat q_m_max = av_div_sf(
+                                        av_mul_sf(sbr->q_m[e][m], gain_max),
+                                        sbr->gain[e][m]);
+                if (av_gt_sf(sbr->q_m[e][m], q_m_max))
+                  sbr->q_m[e][m] = q_m_max;
+                if (av_gt_sf(sbr->gain[e][m], gain_max))
+                  sbr->gain[e][m] = gain_max;
+            }
+            sum[0] = sum[1] = FLOAT_0;
+            for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
+                sum[0] = av_add_sf(sum[0], sbr->e_origmapped[e][m]);
+                sum[1] = av_add_sf(sum[1],
+                            av_mul_sf(
+                                av_mul_sf(sbr->e_curr[e][m],
+                                          sbr->gain[e][m]),
+                                sbr->gain[e][m]));
+                sum[1] = av_add_sf(sum[1],
+                            av_mul_sf(sbr->s_m[e][m], sbr->s_m[e][m]));
+                if (delta && !sbr->s_m[e][m].mant)
+                  sum[1] = av_add_sf(sum[1],
+                                av_mul_sf(sbr->q_m[e][m], sbr->q_m[e][m]));
+            }
+            gain_boost = av_sqrt_sf(
+                            av_div_sf(
+                                av_add_sf(FLOAT_EPSILON, sum[0]),
+                                av_add_sf(FLOAT_EPSILON, sum[1])));
+            if (av_gt_sf(gain_boost, FLOAT_1584893192))
+              gain_boost = FLOAT_1584893192;
+
+            for (m = sbr->f_tablelim[k] - sbr->kx[1]; m < sbr->f_tablelim[k + 1] - sbr->kx[1]; m++) {
+                sbr->gain[e][m] = av_mul_sf(sbr->gain[e][m], gain_boost);
+                sbr->q_m[e][m]  = av_mul_sf(sbr->q_m[e][m], gain_boost);
+                sbr->s_m[e][m]  = av_mul_sf(sbr->s_m[e][m], gain_boost);
+            }
+        }
+    }
+}
+
+/// Assembling HF Signals (14496-3 sp04 p220)
+static void sbr_hf_assemble(int Y1[38][64][2],
+                            const int X_high[64][40][2],
+                            SpectralBandReplication *sbr, SBRData *ch_data,
+                            const int e_a[2])
+{
+    int e, i, j, m;
+    const int h_SL = 4 * !sbr->bs_smoothing_mode;
+    const int kx = sbr->kx[1];
+    const int m_max = sbr->m[1];
+    static const SoftFloat h_smooth[5] = {
+      { 715827883, -1 },
+      { 647472402, -1 },
+      { 937030863, -2 },
+      { 989249804, -3 },
+      { 546843842, -4 },
+    };
+    SoftFloat (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
+    int indexnoise = ch_data->f_indexnoise;
+    int indexsine  = ch_data->f_indexsine;
+
+    if (sbr->reset) {
+        for (i = 0; i < h_SL; i++) {
+            memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
+            memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0],  m_max * sizeof(sbr->q_m[0][0]));
+        }
+    } else if (h_SL) {
+        for (i = 0; i < 4; i++) {
+            memcpy(g_temp[i + 2 * ch_data->t_env[0]],
+                   g_temp[i + 2 * ch_data->t_env_num_env_old],
+                   sizeof(g_temp[0]));
+            memcpy(q_temp[i + 2 * ch_data->t_env[0]],
+                   q_temp[i + 2 * ch_data->t_env_num_env_old],
+                   sizeof(q_temp[0]));
+        }
+    }
+
+    for (e = 0; e < ch_data->bs_num_env; e++) {
+        for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
+            memcpy(g_temp[h_SL + i], sbr->gain[e], m_max * sizeof(sbr->gain[0][0]));
+            memcpy(q_temp[h_SL + i], sbr->q_m[e],  m_max * sizeof(sbr->q_m[0][0]));
+        }
+    }
+
+    for (e = 0; e < ch_data->bs_num_env; e++) {
+        for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
+            SoftFloat g_filt_tab[48];
+            SoftFloat q_filt_tab[48];
+            SoftFloat *g_filt, *q_filt;
+
+            if (h_SL && e != e_a[0] && e != e_a[1]) {
+                g_filt = g_filt_tab;
+                q_filt = q_filt_tab;
+                for (m = 0; m < m_max; m++) {
+                    const int idx1 = i + h_SL;
+                    g_filt[m].mant = g_filt[m].exp = 0;
+                    q_filt[m].mant = q_filt[m].exp = 0;
+                    for (j = 0; j <= h_SL; j++) {
+                        g_filt[m] = av_add_sf(g_filt[m],
+                                        av_mul_sf(g_temp[idx1 - j][m],
+                                            h_smooth[j]));
+                        q_filt[m] = av_add_sf(q_filt[m],
+                                        av_mul_sf(q_temp[idx1 - j][m],
+                                            h_smooth[j]));
+                    }
+                }
+            } else {
+                g_filt = g_temp[i + h_SL];
+                q_filt = q_temp[i];
+            }
+
+            sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
+                               i + ENVELOPE_ADJUSTMENT_OFFSET);
+
+            if (e != e_a[0] && e != e_a[1]) {
+                sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
+                                                   q_filt, indexnoise,
+                                                   kx, m_max);
+            } else {
+                int idx = indexsine&1;
+                int A = (1-((indexsine+(kx & 1))&2));
+                int B = (A^(-idx)) + idx;
+                int *out = &Y1[i][kx][idx];
+                int shift, round;
+
+                SoftFloat *in  = sbr->s_m[e];
+                for (m = 0; m+1 < m_max; m+=2) {
+                  shift = 22 - in[m  ].exp;
+                  round = 1 << (shift-1);
+                  out[2*m  ] += (in[m  ].mant * A + round) >> shift;
+
+                  shift = 22 - in[m+1].exp;
+                  round = 1 << (shift-1);
+                  out[2*m+2] += (in[m+1].mant * B + round) >> shift;
+                }
+                if(m_max&1)
+                {
+                  shift = 22 - in[m  ].exp;
+                  round = 1 << (shift-1);
+
+                  out[2*m  ] += (in[m  ].mant * A + round) >> shift;
+                }
+            }
+            indexnoise = (indexnoise + m_max) & 0x1ff;
+            indexsine = (indexsine + 1) & 3;
+        }
+    }
+    ch_data->f_indexnoise = indexnoise;
+    ch_data->f_indexsine  = indexsine;
+}
+
+#include "aacsbr_template.c"
diff --git a/libavcodec/aacsbr_tablegen.c b/libavcodec/aacsbr_fixed_tablegen.h
similarity index 70%
rename from libavcodec/aacsbr_tablegen.c
rename to libavcodec/aacsbr_fixed_tablegen.h
index c3c0f0ce..3fcf0204 100644
--- a/libavcodec/aacsbr_tablegen.c
+++ b/libavcodec/aacsbr_fixed_tablegen.h
@@ -20,20 +20,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <stdlib.h>
-#define CONFIG_HARDCODED_TABLES 0
-#include "libavutil/common.h"
-#include "aacsbr_tablegen.h"
-#include "tableprint.h"
+#ifndef AVCODEC_AACSBR_FIXED_TABLEGEN_H
+#define AVCODEC_AACSBR_FIXED_TABLEGEN_H
 
-int main(void)
-{
-    aacsbr_tableinit();
+#include "aacsbr_tablegen_common.h"
 
-    write_fileheader();
-
-    WRITE_ARRAY_ALIGNED("static const", 32, float, sbr_qmf_window_ds);
-    WRITE_ARRAY_ALIGNED("static const", 32, float, sbr_qmf_window_us);
-
-    return 0;
-}
+#endif /* AVCODEC_AACSBR_FIXED_TABLEGEN_H */
diff --git a/libavcodec/aacsbr_tablegen.h b/libavcodec/aacsbr_tablegen.h
index 56fdccce..242a9635 100644
--- a/libavcodec/aacsbr_tablegen.h
+++ b/libavcodec/aacsbr_tablegen.h
@@ -23,107 +23,6 @@
 #ifndef AVCODEC_AACSBR_TABLEGEN_H
 #define AVCODEC_AACSBR_TABLEGEN_H
 
-#if CONFIG_HARDCODED_TABLES
-#define aacsbr_tableinit()
-#include "libavcodec/aacsbr_tables.h"
-#else
-///< window coefficients for analysis/synthesis QMF banks
-static DECLARE_ALIGNED(32, float, sbr_qmf_window_ds)[320];
-static DECLARE_ALIGNED(32, float, sbr_qmf_window_us)[640] = {
-     0.0000000000, -0.0005525286, -0.0005617692, -0.0004947518,
-    -0.0004875227, -0.0004893791, -0.0005040714, -0.0005226564,
-    -0.0005466565, -0.0005677802, -0.0005870930, -0.0006132747,
-    -0.0006312493, -0.0006540333, -0.0006777690, -0.0006941614,
-    -0.0007157736, -0.0007255043, -0.0007440941, -0.0007490598,
-    -0.0007681371, -0.0007724848, -0.0007834332, -0.0007779869,
-    -0.0007803664, -0.0007801449, -0.0007757977, -0.0007630793,
-    -0.0007530001, -0.0007319357, -0.0007215391, -0.0006917937,
-    -0.0006650415, -0.0006341594, -0.0005946118, -0.0005564576,
-    -0.0005145572, -0.0004606325, -0.0004095121, -0.0003501175,
-    -0.0002896981, -0.0002098337, -0.0001446380, -0.0000617334,
-     0.0000134949,  0.0001094383,  0.0002043017,  0.0002949531,
-     0.0004026540,  0.0005107388,  0.0006239376,  0.0007458025,
-     0.0008608443,  0.0009885988,  0.0011250155,  0.0012577884,
-     0.0013902494,  0.0015443219,  0.0016868083,  0.0018348265,
-     0.0019841140,  0.0021461583,  0.0023017254,  0.0024625616,
-     0.0026201758,  0.0027870464,  0.0029469447,  0.0031125420,
-     0.0032739613,  0.0034418874,  0.0036008268,  0.0037603922,
-     0.0039207432,  0.0040819753,  0.0042264269,  0.0043730719,
-     0.0045209852,  0.0046606460,  0.0047932560,  0.0049137603,
-     0.0050393022,  0.0051407353,  0.0052461166,  0.0053471681,
-     0.0054196775,  0.0054876040,  0.0055475714,  0.0055938023,
-     0.0056220643,  0.0056455196,  0.0056389199,  0.0056266114,
-     0.0055917128,  0.0055404363,  0.0054753783,  0.0053838975,
-     0.0052715758,  0.0051382275,  0.0049839687,  0.0048109469,
-     0.0046039530,  0.0043801861,  0.0041251642,  0.0038456408,
-     0.0035401246,  0.0032091885,  0.0028446757,  0.0024508540,
-     0.0020274176,  0.0015784682,  0.0010902329,  0.0005832264,
-     0.0000276045, -0.0005464280, -0.0011568135, -0.0018039472,
-    -0.0024826723, -0.0031933778, -0.0039401124, -0.0047222596,
-    -0.0055337211, -0.0063792293, -0.0072615816, -0.0081798233,
-    -0.0091325329, -0.0101150215, -0.0111315548, -0.0121849995,
-     0.0132718220,  0.0143904666,  0.0155405553,  0.0167324712,
-     0.0179433381,  0.0191872431,  0.0204531793,  0.0217467550,
-     0.0230680169,  0.0244160992,  0.0257875847,  0.0271859429,
-     0.0286072173,  0.0300502657,  0.0315017608,  0.0329754081,
-     0.0344620948,  0.0359697560,  0.0374812850,  0.0390053679,
-     0.0405349170,  0.0420649094,  0.0436097542,  0.0451488405,
-     0.0466843027,  0.0482165720,  0.0497385755,  0.0512556155,
-     0.0527630746,  0.0542452768,  0.0557173648,  0.0571616450,
-     0.0585915683,  0.0599837480,  0.0613455171,  0.0626857808,
-     0.0639715898,  0.0652247106,  0.0664367512,  0.0676075985,
-     0.0687043828,  0.0697630244,  0.0707628710,  0.0717002673,
-     0.0725682583,  0.0733620255,  0.0741003642,  0.0747452558,
-     0.0753137336,  0.0758008358,  0.0761992479,  0.0764992170,
-     0.0767093490,  0.0768173975,  0.0768230011,  0.0767204924,
-     0.0765050718,  0.0761748321,  0.0757305756,  0.0751576255,
-     0.0744664394,  0.0736406005,  0.0726774642,  0.0715826364,
-     0.0703533073,  0.0689664013,  0.0674525021,  0.0657690668,
-     0.0639444805,  0.0619602779,  0.0598166570,  0.0575152691,
-     0.0550460034,  0.0524093821,  0.0495978676,  0.0466303305,
-     0.0434768782,  0.0401458278,  0.0366418116,  0.0329583930,
-     0.0290824006,  0.0250307561,  0.0207997072,  0.0163701258,
-     0.0117623832,  0.0069636862,  0.0019765601, -0.0032086896,
-    -0.0085711749, -0.0141288827, -0.0198834129, -0.0258227288,
-    -0.0319531274, -0.0382776572, -0.0447806821, -0.0514804176,
-    -0.0583705326, -0.0654409853, -0.0726943300, -0.0801372934,
-    -0.0877547536, -0.0955533352, -0.1035329531, -0.1116826931,
-    -0.1200077984, -0.1285002850, -0.1371551761, -0.1459766491,
-    -0.1549607071, -0.1640958855, -0.1733808172, -0.1828172548,
-    -0.1923966745, -0.2021250176, -0.2119735853, -0.2219652696,
-    -0.2320690870, -0.2423016884, -0.2526480309, -0.2631053299,
-    -0.2736634040, -0.2843214189, -0.2950716717, -0.3059098575,
-    -0.3168278913, -0.3278113727, -0.3388722693, -0.3499914122,
-     0.3611589903,  0.3723795546,  0.3836350013,  0.3949211761,
-     0.4062317676,  0.4175696896,  0.4289119920,  0.4402553754,
-     0.4515996535,  0.4629308085,  0.4742453214,  0.4855253091,
-     0.4967708254,  0.5079817500,  0.5191234970,  0.5302240895,
-     0.5412553448,  0.5522051258,  0.5630789140,  0.5738524131,
-     0.5845403235,  0.5951123086,  0.6055783538,  0.6159109932,
-     0.6261242695,  0.6361980107,  0.6461269695,  0.6559016302,
-     0.6655139880,  0.6749663190,  0.6842353293,  0.6933282376,
-     0.7022388719,  0.7109410426,  0.7194462634,  0.7277448900,
-     0.7358211758,  0.7436827863,  0.7513137456,  0.7587080760,
-     0.7658674865,  0.7727780881,  0.7794287519,  0.7858353120,
-     0.7919735841,  0.7978466413,  0.8034485751,  0.8087695004,
-     0.8138191270,  0.8185776004,  0.8230419890,  0.8272275347,
-     0.8311038457,  0.8346937361,  0.8379717337,  0.8409541392,
-     0.8436238281,  0.8459818469,  0.8480315777,  0.8497805198,
-     0.8511971524,  0.8523047035,  0.8531020949,  0.8535720573,
-     0.8537385600,
-};
-
-static av_cold void aacsbr_tableinit(void)
-{
-    int n;
-    for (n = 1; n < 320; n++)
-        sbr_qmf_window_us[320 + n] = sbr_qmf_window_us[320 - n];
-    sbr_qmf_window_us[384] = -sbr_qmf_window_us[384];
-    sbr_qmf_window_us[512] = -sbr_qmf_window_us[512];
-
-    for (n = 0; n < 320; n++)
-        sbr_qmf_window_ds[n] = sbr_qmf_window_us[2*n];
-}
-#endif /* CONFIG_HARDCODED_TABLES */
+#include "aacsbr_tablegen_common.h"
 
 #endif /* AVCODEC_AACSBR_TABLEGEN_H */
diff --git a/libavcodec/aacsbr_tablegen_common.h b/libavcodec/aacsbr_tablegen_common.h
new file mode 100644
index 00000000..8c8f6eff
--- /dev/null
+++ b/libavcodec/aacsbr_tablegen_common.h
@@ -0,0 +1,126 @@
+/*
+ * Header file for hardcoded AAC SBR windows
+ *
+ * Copyright (c) 2014 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AACSBR_TABLEGEN_COMMON_H
+#define AVCODEC_AACSBR_TABLEGEN_COMMON_H
+#include "aac_defines.h"
+#include "libavutil/mem.h"
+
+///< window coefficients for analysis/synthesis QMF banks
+static DECLARE_ALIGNED(32, INTFLOAT, sbr_qmf_window_ds)[320];
+static DECLARE_ALIGNED(32, INTFLOAT, sbr_qmf_window_us)[640] = {
+    Q31( 0.0000000000f), Q31(-0.0005525286f), Q31(-0.0005617692f), Q31(-0.0004947518f),
+    Q31(-0.0004875227f), Q31(-0.0004893791f), Q31(-0.0005040714f), Q31(-0.0005226564f),
+    Q31(-0.0005466565f), Q31(-0.0005677802f), Q31(-0.0005870930f), Q31(-0.0006132747f),
+    Q31(-0.0006312493f), Q31(-0.0006540333f), Q31(-0.0006777690f), Q31(-0.0006941614f),
+    Q31(-0.0007157736f), Q31(-0.0007255043f), Q31(-0.0007440941f), Q31(-0.0007490598f),
+    Q31(-0.0007681371f), Q31(-0.0007724848f), Q31(-0.0007834332f), Q31(-0.0007779869f),
+    Q31(-0.0007803664f), Q31(-0.0007801449f), Q31(-0.0007757977f), Q31(-0.0007630793f),
+    Q31(-0.0007530001f), Q31(-0.0007319357f), Q31(-0.0007215391f), Q31(-0.0006917937f),
+    Q31(-0.0006650415f), Q31(-0.0006341594f), Q31(-0.0005946118f), Q31(-0.0005564576f),
+    Q31(-0.0005145572f), Q31(-0.0004606325f), Q31(-0.0004095121f), Q31(-0.0003501175f),
+    Q31(-0.0002896981f), Q31(-0.0002098337f), Q31(-0.0001446380f), Q31(-0.0000617334f),
+    Q31( 0.0000134949f), Q31( 0.0001094383f), Q31( 0.0002043017f), Q31( 0.0002949531f),
+    Q31( 0.0004026540f), Q31( 0.0005107388f), Q31( 0.0006239376f), Q31( 0.0007458025f),
+    Q31( 0.0008608443f), Q31( 0.0009885988f), Q31( 0.0011250155f), Q31( 0.0012577884f),
+    Q31( 0.0013902494f), Q31( 0.0015443219f), Q31( 0.0016868083f), Q31( 0.0018348265f),
+    Q31( 0.0019841140f), Q31( 0.0021461583f), Q31( 0.0023017254f), Q31( 0.0024625616f),
+    Q31( 0.0026201758f), Q31( 0.0027870464f), Q31( 0.0029469447f), Q31( 0.0031125420f),
+    Q31( 0.0032739613f), Q31( 0.0034418874f), Q31( 0.0036008268f), Q31( 0.0037603922f),
+    Q31( 0.0039207432f), Q31( 0.0040819753f), Q31( 0.0042264269f), Q31( 0.0043730719f),
+    Q31( 0.0045209852f), Q31( 0.0046606460f), Q31( 0.0047932560f), Q31( 0.0049137603f),
+    Q31( 0.0050393022f), Q31( 0.0051407353f), Q31( 0.0052461166f), Q31( 0.0053471681f),
+    Q31( 0.0054196775f), Q31( 0.0054876040f), Q31( 0.0055475714f), Q31( 0.0055938023f),
+    Q31( 0.0056220643f), Q31( 0.0056455196f), Q31( 0.0056389199f), Q31( 0.0056266114f),
+    Q31( 0.0055917128f), Q31( 0.0055404363f), Q31( 0.0054753783f), Q31( 0.0053838975f),
+    Q31( 0.0052715758f), Q31( 0.0051382275f), Q31( 0.0049839687f), Q31( 0.0048109469f),
+    Q31( 0.0046039530f), Q31( 0.0043801861f), Q31( 0.0041251642f), Q31( 0.0038456408f),
+    Q31( 0.0035401246f), Q31( 0.0032091885f), Q31( 0.0028446757f), Q31( 0.0024508540f),
+    Q31( 0.0020274176f), Q31( 0.0015784682f), Q31( 0.0010902329f), Q31( 0.0005832264f),
+    Q31( 0.0000276045f), Q31(-0.0005464280f), Q31(-0.0011568135f), Q31(-0.0018039472f),
+    Q31(-0.0024826723f), Q31(-0.0031933778f), Q31(-0.0039401124f), Q31(-0.0047222596f),
+    Q31(-0.0055337211f), Q31(-0.0063792293f), Q31(-0.0072615816f), Q31(-0.0081798233f),
+    Q31(-0.0091325329f), Q31(-0.0101150215f), Q31(-0.0111315548f), Q31(-0.0121849995f),
+    Q31( 0.0132718220f), Q31( 0.0143904666f), Q31( 0.0155405553f), Q31( 0.0167324712f),
+    Q31( 0.0179433381f), Q31( 0.0191872431f), Q31( 0.0204531793f), Q31( 0.0217467550f),
+    Q31( 0.0230680169f), Q31( 0.0244160992f), Q31( 0.0257875847f), Q31( 0.0271859429f),
+    Q31( 0.0286072173f), Q31( 0.0300502657f), Q31( 0.0315017608f), Q31( 0.0329754081f),
+    Q31( 0.0344620948f), Q31( 0.0359697560f), Q31( 0.0374812850f), Q31( 0.0390053679f),
+    Q31( 0.0405349170f), Q31( 0.0420649094f), Q31( 0.0436097542f), Q31( 0.0451488405f),
+    Q31( 0.0466843027f), Q31( 0.0482165720f), Q31( 0.0497385755f), Q31( 0.0512556155f),
+    Q31( 0.0527630746f), Q31( 0.0542452768f), Q31( 0.0557173648f), Q31( 0.0571616450f),
+    Q31( 0.0585915683f), Q31( 0.0599837480f), Q31( 0.0613455171f), Q31( 0.0626857808f),
+    Q31( 0.0639715898f), Q31( 0.0652247106f), Q31( 0.0664367512f), Q31( 0.0676075985f),
+    Q31( 0.0687043828f), Q31( 0.0697630244f), Q31( 0.0707628710f), Q31( 0.0717002673f),
+    Q31( 0.0725682583f), Q31( 0.0733620255f), Q31( 0.0741003642f), Q31( 0.0747452558f),
+    Q31( 0.0753137336f), Q31( 0.0758008358f), Q31( 0.0761992479f), Q31( 0.0764992170f),
+    Q31( 0.0767093490f), Q31( 0.0768173975f), Q31( 0.0768230011f), Q31( 0.0767204924f),
+    Q31( 0.0765050718f), Q31( 0.0761748321f), Q31( 0.0757305756f), Q31( 0.0751576255f),
+    Q31( 0.0744664394f), Q31( 0.0736406005f), Q31( 0.0726774642f), Q31( 0.0715826364f),
+    Q31( 0.0703533073f), Q31( 0.0689664013f), Q31( 0.0674525021f), Q31( 0.0657690668f),
+    Q31( 0.0639444805f), Q31( 0.0619602779f), Q31( 0.0598166570f), Q31( 0.0575152691f),
+    Q31( 0.0550460034f), Q31( 0.0524093821f), Q31( 0.0495978676f), Q31( 0.0466303305f),
+    Q31( 0.0434768782f), Q31( 0.0401458278f), Q31( 0.0366418116f), Q31( 0.0329583930f),
+    Q31( 0.0290824006f), Q31( 0.0250307561f), Q31( 0.0207997072f), Q31( 0.0163701258f),
+    Q31( 0.0117623832f), Q31( 0.0069636862f), Q31( 0.0019765601f), Q31(-0.0032086896f),
+    Q31(-0.0085711749f), Q31(-0.0141288827f), Q31(-0.0198834129f), Q31(-0.0258227288f),
+    Q31(-0.0319531274f), Q31(-0.0382776572f), Q31(-0.0447806821f), Q31(-0.0514804176f),
+    Q31(-0.0583705326f), Q31(-0.0654409853f), Q31(-0.0726943300f), Q31(-0.0801372934f),
+    Q31(-0.0877547536f), Q31(-0.0955533352f), Q31(-0.1035329531f), Q31(-0.1116826931f),
+    Q31(-0.1200077984f), Q31(-0.1285002850f), Q31(-0.1371551761f), Q31(-0.1459766491f),
+    Q31(-0.1549607071f), Q31(-0.1640958855f), Q31(-0.1733808172f), Q31(-0.1828172548f),
+    Q31(-0.1923966745f), Q31(-0.2021250176f), Q31(-0.2119735853f), Q31(-0.2219652696f),
+    Q31(-0.2320690870f), Q31(-0.2423016884f), Q31(-0.2526480309f), Q31(-0.2631053299f),
+    Q31(-0.2736634040f), Q31(-0.2843214189f), Q31(-0.2950716717f), Q31(-0.3059098575f),
+    Q31(-0.3168278913f), Q31(-0.3278113727f), Q31(-0.3388722693f), Q31(-0.3499914122f),
+    Q31( 0.3611589903f), Q31( 0.3723795546f), Q31( 0.3836350013f), Q31( 0.3949211761f),
+    Q31( 0.4062317676f), Q31( 0.4175696896f), Q31( 0.4289119920f), Q31( 0.4402553754f),
+    Q31( 0.4515996535f), Q31( 0.4629308085f), Q31( 0.4742453214f), Q31( 0.4855253091f),
+    Q31( 0.4967708254f), Q31( 0.5079817500f), Q31( 0.5191234970f), Q31( 0.5302240895f),
+    Q31( 0.5412553448f), Q31( 0.5522051258f), Q31( 0.5630789140f), Q31( 0.5738524131f),
+    Q31( 0.5845403235f), Q31( 0.5951123086f), Q31( 0.6055783538f), Q31( 0.6159109932f),
+    Q31( 0.6261242695f), Q31( 0.6361980107f), Q31( 0.6461269695f), Q31( 0.6559016302f),
+    Q31( 0.6655139880f), Q31( 0.6749663190f), Q31( 0.6842353293f), Q31( 0.6933282376f),
+    Q31( 0.7022388719f), Q31( 0.7109410426f), Q31( 0.7194462634f), Q31( 0.7277448900f),
+    Q31( 0.7358211758f), Q31( 0.7436827863f), Q31( 0.7513137456f), Q31( 0.7587080760f),
+    Q31( 0.7658674865f), Q31( 0.7727780881f), Q31( 0.7794287519f), Q31( 0.7858353120f),
+    Q31( 0.7919735841f), Q31( 0.7978466413f), Q31( 0.8034485751f), Q31( 0.8087695004f),
+    Q31( 0.8138191270f), Q31( 0.8185776004f), Q31( 0.8230419890f), Q31( 0.8272275347f),
+    Q31( 0.8311038457f), Q31( 0.8346937361f), Q31( 0.8379717337f), Q31( 0.8409541392f),
+    Q31( 0.8436238281f), Q31( 0.8459818469f), Q31( 0.8480315777f), Q31( 0.8497805198f),
+    Q31( 0.8511971524f), Q31( 0.8523047035f), Q31( 0.8531020949f), Q31( 0.8535720573f),
+    Q31( 0.8537385600f),
+};
+
+static av_cold void aacsbr_tableinit(void)
+{
+    int n;
+    for (n = 1; n < 320; n++)
+        sbr_qmf_window_us[320 + n] = sbr_qmf_window_us[320 - n];
+    sbr_qmf_window_us[384] = -sbr_qmf_window_us[384];
+    sbr_qmf_window_us[512] = -sbr_qmf_window_us[512];
+
+    for (n = 0; n < 320; n++)
+        sbr_qmf_window_ds[n] = sbr_qmf_window_us[2*n];
+}
+
+#endif /* AVCODEC_AACSBR_TABLEGEN_COMMON_H */
diff --git a/libavcodec/aacsbr_template.c b/libavcodec/aacsbr_template.c
new file mode 100644
index 00000000..733e619b
--- /dev/null
+++ b/libavcodec/aacsbr_template.c
@@ -0,0 +1,1571 @@
+/*
+ * AAC Spectral Band Replication decoding functions
+ * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
+ * Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com>
+ *
+ * Fixed point code
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AAC Spectral Band Replication decoding functions
+ * @author Robert Swain ( rob opendot cl )
+ * @author Stanislav Ocovaj ( stanislav.ocovaj@imgtec.com )
+ * @author Zoran Basaric ( zoran.basaric@imgtec.com )
+ */
+
+#include "libavutil/qsort.h"
+
+av_cold void AAC_RENAME(ff_aac_sbr_init)(void)
+{
+    static const struct {
+        const void *sbr_codes, *sbr_bits;
+        const unsigned int table_size, elem_size;
+    } sbr_tmp[] = {
+        SBR_VLC_ROW(t_huffman_env_1_5dB),
+        SBR_VLC_ROW(f_huffman_env_1_5dB),
+        SBR_VLC_ROW(t_huffman_env_bal_1_5dB),
+        SBR_VLC_ROW(f_huffman_env_bal_1_5dB),
+        SBR_VLC_ROW(t_huffman_env_3_0dB),
+        SBR_VLC_ROW(f_huffman_env_3_0dB),
+        SBR_VLC_ROW(t_huffman_env_bal_3_0dB),
+        SBR_VLC_ROW(f_huffman_env_bal_3_0dB),
+        SBR_VLC_ROW(t_huffman_noise_3_0dB),
+        SBR_VLC_ROW(t_huffman_noise_bal_3_0dB),
+    };
+
+    // SBR VLC table initialization
+    SBR_INIT_VLC_STATIC(0, 1098);
+    SBR_INIT_VLC_STATIC(1, 1092);
+    SBR_INIT_VLC_STATIC(2, 768);
+    SBR_INIT_VLC_STATIC(3, 1026);
+    SBR_INIT_VLC_STATIC(4, 1058);
+    SBR_INIT_VLC_STATIC(5, 1052);
+    SBR_INIT_VLC_STATIC(6, 544);
+    SBR_INIT_VLC_STATIC(7, 544);
+    SBR_INIT_VLC_STATIC(8, 592);
+    SBR_INIT_VLC_STATIC(9, 512);
+
+    aacsbr_tableinit();
+
+    AAC_RENAME(ff_ps_init)();
+}
+
+/** Places SBR in pure upsampling mode. */
+static void sbr_turnoff(SpectralBandReplication *sbr) {
+    sbr->start = 0;
+    sbr->ready_for_dequant = 0;
+    // Init defults used in pure upsampling mode
+    sbr->kx[1] = 32; //Typo in spec, kx' inits to 32
+    sbr->m[1] = 0;
+    // Reset values for first SBR header
+    sbr->data[0].e_a[1] = sbr->data[1].e_a[1] = -1;
+    memset(&sbr->spectrum_params, -1, sizeof(SpectrumParameters));
+}
+
+av_cold void AAC_RENAME(ff_aac_sbr_ctx_init)(AACContext *ac, SpectralBandReplication *sbr)
+{
+    if(sbr->mdct.mdct_bits)
+        return;
+    sbr->kx[0] = sbr->kx[1];
+    sbr_turnoff(sbr);
+    sbr->data[0].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
+    sbr->data[1].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
+    /* SBR requires samples to be scaled to +/-32768.0 to work correctly.
+     * mdct scale factors are adjusted to scale up from +/-1.0 at analysis
+     * and scale back down at synthesis. */
+    AAC_RENAME_32(ff_mdct_init)(&sbr->mdct,     7, 1, 1.0 / (64 * 32768.0));
+    AAC_RENAME_32(ff_mdct_init)(&sbr->mdct_ana, 7, 1, -2.0 * 32768.0);
+    AAC_RENAME(ff_ps_ctx_init)(&sbr->ps);
+    AAC_RENAME(ff_sbrdsp_init)(&sbr->dsp);
+    aacsbr_func_ptr_init(&sbr->c);
+}
+
+av_cold void AAC_RENAME(ff_aac_sbr_ctx_close)(SpectralBandReplication *sbr)
+{
+    AAC_RENAME_32(ff_mdct_end)(&sbr->mdct);
+    AAC_RENAME_32(ff_mdct_end)(&sbr->mdct_ana);
+}
+
+static int qsort_comparison_function_int16(const void *a, const void *b)
+{
+    return *(const int16_t *)a - *(const int16_t *)b;
+}
+
+static inline int in_table_int16(const int16_t *table, int last_el, int16_t needle)
+{
+    int i;
+    for (i = 0; i <= last_el; i++)
+        if (table[i] == needle)
+            return 1;
+    return 0;
+}
+
+/// Limiter Frequency Band Table (14496-3 sp04 p198)
+static void sbr_make_f_tablelim(SpectralBandReplication *sbr)
+{
+    int k;
+    if (sbr->bs_limiter_bands > 0) {
+        static const INTFLOAT bands_warped[3] = { Q23(1.32715174233856803909f),   //2^(0.49/1.2)
+                                               Q23(1.18509277094158210129f),   //2^(0.49/2)
+                                               Q23(1.11987160404675912501f) }; //2^(0.49/3)
+        const INTFLOAT lim_bands_per_octave_warped = bands_warped[sbr->bs_limiter_bands - 1];
+        int16_t patch_borders[7];
+        uint16_t *in = sbr->f_tablelim + 1, *out = sbr->f_tablelim;
+
+        patch_borders[0] = sbr->kx[1];
+        for (k = 1; k <= sbr->num_patches; k++)
+            patch_borders[k] = patch_borders[k-1] + sbr->patch_num_subbands[k-1];
+
+        memcpy(sbr->f_tablelim, sbr->f_tablelow,
+               (sbr->n[0] + 1) * sizeof(sbr->f_tablelow[0]));
+        if (sbr->num_patches > 1)
+            memcpy(sbr->f_tablelim + sbr->n[0] + 1, patch_borders + 1,
+                   (sbr->num_patches - 1) * sizeof(patch_borders[0]));
+
+        AV_QSORT(sbr->f_tablelim, sbr->num_patches + sbr->n[0],
+              uint16_t,
+              qsort_comparison_function_int16);
+
+        sbr->n_lim = sbr->n[0] + sbr->num_patches - 1;
+        while (out < sbr->f_tablelim + sbr->n_lim) {
+#if USE_FIXED
+            if ((*in << 23) >= *out * lim_bands_per_octave_warped) {
+#else
+            if (*in >= *out * lim_bands_per_octave_warped) {
+#endif /* USE_FIXED */
+                *++out = *in++;
+            } else if (*in == *out ||
+                !in_table_int16(patch_borders, sbr->num_patches, *in)) {
+                in++;
+                sbr->n_lim--;
+            } else if (!in_table_int16(patch_borders, sbr->num_patches, *out)) {
+                *out = *in++;
+                sbr->n_lim--;
+            } else {
+                *++out = *in++;
+            }
+        }
+    } else {
+        sbr->f_tablelim[0] = sbr->f_tablelow[0];
+        sbr->f_tablelim[1] = sbr->f_tablelow[sbr->n[0]];
+        sbr->n_lim = 1;
+    }
+}
+
+static unsigned int read_sbr_header(SpectralBandReplication *sbr, GetBitContext *gb)
+{
+    unsigned int cnt = get_bits_count(gb);
+    uint8_t bs_header_extra_1;
+    uint8_t bs_header_extra_2;
+    int old_bs_limiter_bands = sbr->bs_limiter_bands;
+    SpectrumParameters old_spectrum_params;
+
+    sbr->start = 1;
+    sbr->ready_for_dequant = 0;
+
+    // Save last spectrum parameters variables to compare to new ones
+    memcpy(&old_spectrum_params, &sbr->spectrum_params, sizeof(SpectrumParameters));
+
+    sbr->bs_amp_res_header              = get_bits1(gb);
+    sbr->spectrum_params.bs_start_freq  = get_bits(gb, 4);
+    sbr->spectrum_params.bs_stop_freq   = get_bits(gb, 4);
+    sbr->spectrum_params.bs_xover_band  = get_bits(gb, 3);
+                                          skip_bits(gb, 2); // bs_reserved
+
+    bs_header_extra_1 = get_bits1(gb);
+    bs_header_extra_2 = get_bits1(gb);
+
+    if (bs_header_extra_1) {
+        sbr->spectrum_params.bs_freq_scale  = get_bits(gb, 2);
+        sbr->spectrum_params.bs_alter_scale = get_bits1(gb);
+        sbr->spectrum_params.bs_noise_bands = get_bits(gb, 2);
+    } else {
+        sbr->spectrum_params.bs_freq_scale  = 2;
+        sbr->spectrum_params.bs_alter_scale = 1;
+        sbr->spectrum_params.bs_noise_bands = 2;
+    }
+
+    // Check if spectrum parameters changed
+    if (memcmp(&old_spectrum_params, &sbr->spectrum_params, sizeof(SpectrumParameters)))
+        sbr->reset = 1;
+
+    if (bs_header_extra_2) {
+        sbr->bs_limiter_bands  = get_bits(gb, 2);
+        sbr->bs_limiter_gains  = get_bits(gb, 2);
+        sbr->bs_interpol_freq  = get_bits1(gb);
+        sbr->bs_smoothing_mode = get_bits1(gb);
+    } else {
+        sbr->bs_limiter_bands  = 2;
+        sbr->bs_limiter_gains  = 2;
+        sbr->bs_interpol_freq  = 1;
+        sbr->bs_smoothing_mode = 1;
+    }
+
+    if (sbr->bs_limiter_bands != old_bs_limiter_bands && !sbr->reset)
+        sbr_make_f_tablelim(sbr);
+
+    return get_bits_count(gb) - cnt;
+}
+
+static int array_min_int16(const int16_t *array, int nel)
+{
+    int i, min = array[0];
+    for (i = 1; i < nel; i++)
+        min = FFMIN(array[i], min);
+    return min;
+}
+
+static int check_n_master(AVCodecContext *avctx, int n_master, int bs_xover_band)
+{
+    // Requirements (14496-3 sp04 p205)
+    if (n_master <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid n_master: %d\n", n_master);
+        return -1;
+    }
+    if (bs_xover_band >= n_master) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Invalid bitstream, crossover band index beyond array bounds: %d\n",
+               bs_xover_band);
+        return -1;
+    }
+    return 0;
+}
+
+/// Master Frequency Band Table (14496-3 sp04 p194)
+static int sbr_make_f_master(AACContext *ac, SpectralBandReplication *sbr,
+                             SpectrumParameters *spectrum)
+{
+    unsigned int temp, max_qmf_subbands = 0;
+    unsigned int start_min, stop_min;
+    int k;
+    const int8_t *sbr_offset_ptr;
+    int16_t stop_dk[13];
+
+    if (sbr->sample_rate < 32000) {
+        temp = 3000;
+    } else if (sbr->sample_rate < 64000) {
+        temp = 4000;
+    } else
+        temp = 5000;
+
+    switch (sbr->sample_rate) {
+    case 16000:
+        sbr_offset_ptr = sbr_offset[0];
+        break;
+    case 22050:
+        sbr_offset_ptr = sbr_offset[1];
+        break;
+    case 24000:
+        sbr_offset_ptr = sbr_offset[2];
+        break;
+    case 32000:
+        sbr_offset_ptr = sbr_offset[3];
+        break;
+    case 44100: case 48000: case 64000:
+        sbr_offset_ptr = sbr_offset[4];
+        break;
+    case 88200: case 96000: case 128000: case 176400: case 192000:
+        sbr_offset_ptr = sbr_offset[5];
+        break;
+    default:
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Unsupported sample rate for SBR: %d\n", sbr->sample_rate);
+        return -1;
+    }
+
+    start_min = ((temp << 7) + (sbr->sample_rate >> 1)) / sbr->sample_rate;
+    stop_min  = ((temp << 8) + (sbr->sample_rate >> 1)) / sbr->sample_rate;
+
+    sbr->k[0] = start_min + sbr_offset_ptr[spectrum->bs_start_freq];
+
+    if (spectrum->bs_stop_freq < 14) {
+        sbr->k[2] = stop_min;
+        make_bands(stop_dk, stop_min, 64, 13);
+        AV_QSORT(stop_dk, 13, int16_t, qsort_comparison_function_int16);
+        for (k = 0; k < spectrum->bs_stop_freq; k++)
+            sbr->k[2] += stop_dk[k];
+    } else if (spectrum->bs_stop_freq == 14) {
+        sbr->k[2] = 2*sbr->k[0];
+    } else if (spectrum->bs_stop_freq == 15) {
+        sbr->k[2] = 3*sbr->k[0];
+    } else {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Invalid bs_stop_freq: %d\n", spectrum->bs_stop_freq);
+        return -1;
+    }
+    sbr->k[2] = FFMIN(64, sbr->k[2]);
+
+    // Requirements (14496-3 sp04 p205)
+    if (sbr->sample_rate <= 32000) {
+        max_qmf_subbands = 48;
+    } else if (sbr->sample_rate == 44100) {
+        max_qmf_subbands = 35;
+    } else if (sbr->sample_rate >= 48000)
+        max_qmf_subbands = 32;
+    else
+        av_assert0(0);
+
+    if (sbr->k[2] - sbr->k[0] > max_qmf_subbands) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Invalid bitstream, too many QMF subbands: %d\n", sbr->k[2] - sbr->k[0]);
+        return -1;
+    }
+
+    if (!spectrum->bs_freq_scale) {
+        int dk, k2diff;
+
+        dk = spectrum->bs_alter_scale + 1;
+        sbr->n_master = ((sbr->k[2] - sbr->k[0] + (dk&2)) >> dk) << 1;
+        if (check_n_master(ac->avctx, sbr->n_master, sbr->spectrum_params.bs_xover_band))
+            return -1;
+
+        for (k = 1; k <= sbr->n_master; k++)
+            sbr->f_master[k] = dk;
+
+        k2diff = sbr->k[2] - sbr->k[0] - sbr->n_master * dk;
+        if (k2diff < 0) {
+            sbr->f_master[1]--;
+            sbr->f_master[2]-= (k2diff < -1);
+        } else if (k2diff) {
+            sbr->f_master[sbr->n_master]++;
+        }
+
+        sbr->f_master[0] = sbr->k[0];
+        for (k = 1; k <= sbr->n_master; k++)
+            sbr->f_master[k] += sbr->f_master[k - 1];
+
+    } else {
+        int half_bands = 7 - spectrum->bs_freq_scale;      // bs_freq_scale  = {1,2,3}
+        int two_regions, num_bands_0;
+        int vdk0_max, vdk1_min;
+        int16_t vk0[49];
+#if USE_FIXED
+        int tmp, nz = 0;
+#endif /* USE_FIXED */
+
+        if (49 * sbr->k[2] > 110 * sbr->k[0]) {
+            two_regions = 1;
+            sbr->k[1] = 2 * sbr->k[0];
+        } else {
+            two_regions = 0;
+            sbr->k[1] = sbr->k[2];
+        }
+
+#if USE_FIXED
+        tmp = (sbr->k[1] << 23) / sbr->k[0];
+        while (tmp < 0x40000000) {
+          tmp <<= 1;
+          nz++;
+        }
+        tmp = fixed_log(tmp - 0x80000000);
+        tmp = (int)(((int64_t)tmp * CONST_RECIP_LN2 + 0x20000000) >> 30);
+        tmp = (((tmp + 0x80) >> 8) + ((8 - nz) << 23)) * half_bands;
+        num_bands_0 = ((tmp + 0x400000) >> 23) * 2;
+#else
+        num_bands_0 = lrintf(half_bands * log2f(sbr->k[1] / (float)sbr->k[0])) * 2;
+#endif /* USE_FIXED */
+
+        if (num_bands_0 <= 0) { // Requirements (14496-3 sp04 p205)
+            av_log(ac->avctx, AV_LOG_ERROR, "Invalid num_bands_0: %d\n", num_bands_0);
+            return -1;
+        }
+
+        vk0[0] = 0;
+
+        make_bands(vk0+1, sbr->k[0], sbr->k[1], num_bands_0);
+
+        AV_QSORT(vk0 + 1, num_bands_0, int16_t, qsort_comparison_function_int16);
+        vdk0_max = vk0[num_bands_0];
+
+        vk0[0] = sbr->k[0];
+        for (k = 1; k <= num_bands_0; k++) {
+            if (vk0[k] <= 0) { // Requirements (14496-3 sp04 p205)
+                av_log(ac->avctx, AV_LOG_ERROR, "Invalid vDk0[%d]: %d\n", k, vk0[k]);
+                return -1;
+            }
+            vk0[k] += vk0[k-1];
+        }
+
+        if (two_regions) {
+            int16_t vk1[49];
+#if USE_FIXED
+            int num_bands_1;
+
+            tmp = (sbr->k[2] << 23) / sbr->k[1];
+            nz = 0;
+            while (tmp < 0x40000000) {
+              tmp <<= 1;
+              nz++;
+            }
+            tmp = fixed_log(tmp - 0x80000000);
+            tmp = (int)(((int64_t)tmp * CONST_RECIP_LN2 + 0x20000000) >> 30);
+            tmp = (((tmp + 0x80) >> 8) + ((8 - nz) << 23)) * half_bands;
+            if (spectrum->bs_alter_scale)
+                tmp = (int)(((int64_t)tmp * CONST_076923 + 0x40000000) >> 31);
+            num_bands_1 = ((tmp + 0x400000) >> 23) * 2;
+#else
+            float invwarp = spectrum->bs_alter_scale ? 0.76923076923076923077f
+                                                     : 1.0f; // bs_alter_scale = {0,1}
+            int num_bands_1 = lrintf(half_bands * invwarp *
+                                     log2f(sbr->k[2] / (float)sbr->k[1])) * 2;
+#endif /* USE_FIXED */
+            make_bands(vk1+1, sbr->k[1], sbr->k[2], num_bands_1);
+
+            vdk1_min = array_min_int16(vk1 + 1, num_bands_1);
+
+            if (vdk1_min < vdk0_max) {
+                int change;
+                AV_QSORT(vk1 + 1, num_bands_1, int16_t, qsort_comparison_function_int16);
+                change = FFMIN(vdk0_max - vk1[1], (vk1[num_bands_1] - vk1[1]) >> 1);
+                vk1[1]           += change;
+                vk1[num_bands_1] -= change;
+            }
+
+            AV_QSORT(vk1 + 1, num_bands_1, int16_t, qsort_comparison_function_int16);
+
+            vk1[0] = sbr->k[1];
+            for (k = 1; k <= num_bands_1; k++) {
+                if (vk1[k] <= 0) { // Requirements (14496-3 sp04 p205)
+                    av_log(ac->avctx, AV_LOG_ERROR, "Invalid vDk1[%d]: %d\n", k, vk1[k]);
+                    return -1;
+                }
+                vk1[k] += vk1[k-1];
+            }
+
+            sbr->n_master = num_bands_0 + num_bands_1;
+            if (check_n_master(ac->avctx, sbr->n_master, sbr->spectrum_params.bs_xover_band))
+                return -1;
+            memcpy(&sbr->f_master[0],               vk0,
+                   (num_bands_0 + 1) * sizeof(sbr->f_master[0]));
+            memcpy(&sbr->f_master[num_bands_0 + 1], vk1 + 1,
+                    num_bands_1      * sizeof(sbr->f_master[0]));
+
+        } else {
+            sbr->n_master = num_bands_0;
+            if (check_n_master(ac->avctx, sbr->n_master, sbr->spectrum_params.bs_xover_band))
+                return -1;
+            memcpy(sbr->f_master, vk0, (num_bands_0 + 1) * sizeof(sbr->f_master[0]));
+        }
+    }
+
+    return 0;
+}
+
+/// High Frequency Generation - Patch Construction (14496-3 sp04 p216 fig. 4.46)
+static int sbr_hf_calc_npatches(AACContext *ac, SpectralBandReplication *sbr)
+{
+    int i, k, last_k = -1, last_msb = -1, sb = 0;
+    int msb = sbr->k[0];
+    int usb = sbr->kx[1];
+    int goal_sb = ((1000 << 11) + (sbr->sample_rate >> 1)) / sbr->sample_rate;
+
+    sbr->num_patches = 0;
+
+    if (goal_sb < sbr->kx[1] + sbr->m[1]) {
+        for (k = 0; sbr->f_master[k] < goal_sb; k++) ;
+    } else
+        k = sbr->n_master;
+
+    do {
+        int odd = 0;
+        if (k == last_k && msb == last_msb) {
+            av_log(ac->avctx, AV_LOG_ERROR, "patch construction failed\n");
+            return AVERROR_INVALIDDATA;
+        }
+        last_k = k;
+        last_msb = msb;
+        for (i = k; i == k || sb > (sbr->k[0] - 1 + msb - odd); i--) {
+            sb = sbr->f_master[i];
+            odd = (sb + sbr->k[0]) & 1;
+        }
+
+        // Requirements (14496-3 sp04 p205) sets the maximum number of patches to 5.
+        // After this check the final number of patches can still be six which is
+        // illegal however the Coding Technologies decoder check stream has a final
+        // count of 6 patches
+        if (sbr->num_patches > 5) {
+            av_log(ac->avctx, AV_LOG_ERROR, "Too many patches: %d\n", sbr->num_patches);
+            return -1;
+        }
+
+        sbr->patch_num_subbands[sbr->num_patches]  = FFMAX(sb - usb, 0);
+        sbr->patch_start_subband[sbr->num_patches] = sbr->k[0] - odd - sbr->patch_num_subbands[sbr->num_patches];
+
+        if (sbr->patch_num_subbands[sbr->num_patches] > 0) {
+            usb = sb;
+            msb = sb;
+            sbr->num_patches++;
+        } else
+            msb = sbr->kx[1];
+
+        if (sbr->f_master[k] - sb < 3)
+            k = sbr->n_master;
+    } while (sb != sbr->kx[1] + sbr->m[1]);
+
+    if (sbr->num_patches > 1 &&
+        sbr->patch_num_subbands[sbr->num_patches - 1] < 3)
+        sbr->num_patches--;
+
+    return 0;
+}
+
+/// Derived Frequency Band Tables (14496-3 sp04 p197)
+static int sbr_make_f_derived(AACContext *ac, SpectralBandReplication *sbr)
+{
+    int k, temp;
+#if USE_FIXED
+    int nz = 0;
+#endif /* USE_FIXED */
+
+    sbr->n[1] = sbr->n_master - sbr->spectrum_params.bs_xover_band;
+    sbr->n[0] = (sbr->n[1] + 1) >> 1;
+
+    memcpy(sbr->f_tablehigh, &sbr->f_master[sbr->spectrum_params.bs_xover_band],
+           (sbr->n[1] + 1) * sizeof(sbr->f_master[0]));
+    sbr->m[1] = sbr->f_tablehigh[sbr->n[1]] - sbr->f_tablehigh[0];
+    sbr->kx[1] = sbr->f_tablehigh[0];
+
+    // Requirements (14496-3 sp04 p205)
+    if (sbr->kx[1] + sbr->m[1] > 64) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Stop frequency border too high: %d\n", sbr->kx[1] + sbr->m[1]);
+        return -1;
+    }
+    if (sbr->kx[1] > 32) {
+        av_log(ac->avctx, AV_LOG_ERROR, "Start frequency border too high: %d\n", sbr->kx[1]);
+        return -1;
+    }
+
+    sbr->f_tablelow[0] = sbr->f_tablehigh[0];
+    temp = sbr->n[1] & 1;
+    for (k = 1; k <= sbr->n[0]; k++)
+        sbr->f_tablelow[k] = sbr->f_tablehigh[2 * k - temp];
+#if USE_FIXED
+    temp = (sbr->k[2] << 23) / sbr->kx[1];
+    while (temp < 0x40000000) {
+        temp <<= 1;
+        nz++;
+    }
+    temp = fixed_log(temp - 0x80000000);
+    temp = (int)(((int64_t)temp * CONST_RECIP_LN2 + 0x20000000) >> 30);
+    temp = (((temp + 0x80) >> 8) + ((8 - nz) << 23)) * sbr->spectrum_params.bs_noise_bands;
+
+    sbr->n_q = (temp + 0x400000) >> 23;
+    if (sbr->n_q < 1)
+        sbr->n_q = 1;
+#else
+    sbr->n_q = FFMAX(1, lrintf(sbr->spectrum_params.bs_noise_bands *
+                               log2f(sbr->k[2] / (float)sbr->kx[1]))); // 0 <= bs_noise_bands <= 3
+#endif /* USE_FIXED */
+
+    if (sbr->n_q > 5) {
+        av_log(ac->avctx, AV_LOG_ERROR, "Too many noise floor scale factors: %d\n", sbr->n_q);
+        return -1;
+    }
+
+    sbr->f_tablenoise[0] = sbr->f_tablelow[0];
+    temp = 0;
+    for (k = 1; k <= sbr->n_q; k++) {
+        temp += (sbr->n[0] - temp) / (sbr->n_q + 1 - k);
+        sbr->f_tablenoise[k] = sbr->f_tablelow[temp];
+    }
+
+    if (sbr_hf_calc_npatches(ac, sbr) < 0)
+        return -1;
+
+    sbr_make_f_tablelim(sbr);
+
+    sbr->data[0].f_indexnoise = 0;
+    sbr->data[1].f_indexnoise = 0;
+
+    return 0;
+}
+
+static av_always_inline void get_bits1_vector(GetBitContext *gb, uint8_t *vec,
+                                              int elements)
+{
+    int i;
+    for (i = 0; i < elements; i++) {
+        vec[i] = get_bits1(gb);
+    }
+}
+
+/** ceil(log2(index+1)) */
+static const int8_t ceil_log2[] = {
+    0, 1, 2, 2, 3, 3,
+};
+
+static int read_sbr_grid(AACContext *ac, SpectralBandReplication *sbr,
+                         GetBitContext *gb, SBRData *ch_data)
+{
+    int i;
+    int bs_pointer = 0;
+    // frameLengthFlag ? 15 : 16; 960 sample length frames unsupported; this value is numTimeSlots
+    int abs_bord_trail = 16;
+    int num_rel_lead, num_rel_trail;
+    unsigned bs_num_env_old = ch_data->bs_num_env;
+
+    ch_data->bs_freq_res[0] = ch_data->bs_freq_res[ch_data->bs_num_env];
+    ch_data->bs_amp_res = sbr->bs_amp_res_header;
+    ch_data->t_env_num_env_old = ch_data->t_env[bs_num_env_old];
+
+    switch (ch_data->bs_frame_class = get_bits(gb, 2)) {
+    case FIXFIX:
+        ch_data->bs_num_env                 = 1 << get_bits(gb, 2);
+        num_rel_lead                        = ch_data->bs_num_env - 1;
+        if (ch_data->bs_num_env == 1)
+            ch_data->bs_amp_res = 0;
+
+        if (ch_data->bs_num_env > 4) {
+            av_log(ac->avctx, AV_LOG_ERROR,
+                   "Invalid bitstream, too many SBR envelopes in FIXFIX type SBR frame: %d\n",
+                   ch_data->bs_num_env);
+            return -1;
+        }
+
+        ch_data->t_env[0]                   = 0;
+        ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
+
+        abs_bord_trail = (abs_bord_trail + (ch_data->bs_num_env >> 1)) /
+                   ch_data->bs_num_env;
+        for (i = 0; i < num_rel_lead; i++)
+            ch_data->t_env[i + 1] = ch_data->t_env[i] + abs_bord_trail;
+
+        ch_data->bs_freq_res[1] = get_bits1(gb);
+        for (i = 1; i < ch_data->bs_num_env; i++)
+            ch_data->bs_freq_res[i + 1] = ch_data->bs_freq_res[1];
+        break;
+    case FIXVAR:
+        abs_bord_trail                     += get_bits(gb, 2);
+        num_rel_trail                       = get_bits(gb, 2);
+        ch_data->bs_num_env                 = num_rel_trail + 1;
+        ch_data->t_env[0]                   = 0;
+        ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
+
+        for (i = 0; i < num_rel_trail; i++)
+            ch_data->t_env[ch_data->bs_num_env - 1 - i] =
+                ch_data->t_env[ch_data->bs_num_env - i] - 2 * get_bits(gb, 2) - 2;
+
+        bs_pointer = get_bits(gb, ceil_log2[ch_data->bs_num_env]);
+
+        for (i = 0; i < ch_data->bs_num_env; i++)
+            ch_data->bs_freq_res[ch_data->bs_num_env - i] = get_bits1(gb);
+        break;
+    case VARFIX:
+        ch_data->t_env[0]                   = get_bits(gb, 2);
+        num_rel_lead                        = get_bits(gb, 2);
+        ch_data->bs_num_env                 = num_rel_lead + 1;
+        ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
+
+        for (i = 0; i < num_rel_lead; i++)
+            ch_data->t_env[i + 1] = ch_data->t_env[i] + 2 * get_bits(gb, 2) + 2;
+
+        bs_pointer = get_bits(gb, ceil_log2[ch_data->bs_num_env]);
+
+        get_bits1_vector(gb, ch_data->bs_freq_res + 1, ch_data->bs_num_env);
+        break;
+    case VARVAR:
+        ch_data->t_env[0]                   = get_bits(gb, 2);
+        abs_bord_trail                     += get_bits(gb, 2);
+        num_rel_lead                        = get_bits(gb, 2);
+        num_rel_trail                       = get_bits(gb, 2);
+        ch_data->bs_num_env                 = num_rel_lead + num_rel_trail + 1;
+
+        if (ch_data->bs_num_env > 5) {
+            av_log(ac->avctx, AV_LOG_ERROR,
+                   "Invalid bitstream, too many SBR envelopes in VARVAR type SBR frame: %d\n",
+                   ch_data->bs_num_env);
+            return -1;
+        }
+
+        ch_data->t_env[ch_data->bs_num_env] = abs_bord_trail;
+
+        for (i = 0; i < num_rel_lead; i++)
+            ch_data->t_env[i + 1] = ch_data->t_env[i] + 2 * get_bits(gb, 2) + 2;
+        for (i = 0; i < num_rel_trail; i++)
+            ch_data->t_env[ch_data->bs_num_env - 1 - i] =
+                ch_data->t_env[ch_data->bs_num_env - i] - 2 * get_bits(gb, 2) - 2;
+
+        bs_pointer = get_bits(gb, ceil_log2[ch_data->bs_num_env]);
+
+        get_bits1_vector(gb, ch_data->bs_freq_res + 1, ch_data->bs_num_env);
+        break;
+    }
+
+    av_assert0(bs_pointer >= 0);
+    if (bs_pointer > ch_data->bs_num_env + 1) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Invalid bitstream, bs_pointer points to a middle noise border outside the time borders table: %d\n",
+               bs_pointer);
+        return -1;
+    }
+
+    for (i = 1; i <= ch_data->bs_num_env; i++) {
+        if (ch_data->t_env[i-1] >= ch_data->t_env[i]) {
+            av_log(ac->avctx, AV_LOG_ERROR, "Not strictly monotone time borders\n");
+            return -1;
+        }
+    }
+
+    ch_data->bs_num_noise = (ch_data->bs_num_env > 1) + 1;
+
+    ch_data->t_q[0]                     = ch_data->t_env[0];
+    ch_data->t_q[ch_data->bs_num_noise] = ch_data->t_env[ch_data->bs_num_env];
+    if (ch_data->bs_num_noise > 1) {
+        int idx;
+        if (ch_data->bs_frame_class == FIXFIX) {
+            idx = ch_data->bs_num_env >> 1;
+        } else if (ch_data->bs_frame_class & 1) { // FIXVAR or VARVAR
+            idx = ch_data->bs_num_env - FFMAX(bs_pointer - 1, 1);
+        } else { // VARFIX
+            if (!bs_pointer)
+                idx = 1;
+            else if (bs_pointer == 1)
+                idx = ch_data->bs_num_env - 1;
+            else // bs_pointer > 1
+                idx = bs_pointer - 1;
+        }
+        ch_data->t_q[1] = ch_data->t_env[idx];
+    }
+
+    ch_data->e_a[0] = -(ch_data->e_a[1] != bs_num_env_old); // l_APrev
+    ch_data->e_a[1] = -1;
+    if ((ch_data->bs_frame_class & 1) && bs_pointer) { // FIXVAR or VARVAR and bs_pointer != 0
+        ch_data->e_a[1] = ch_data->bs_num_env + 1 - bs_pointer;
+    } else if ((ch_data->bs_frame_class == 2) && (bs_pointer > 1)) // VARFIX and bs_pointer > 1
+        ch_data->e_a[1] = bs_pointer - 1;
+
+    return 0;
+}
+
+static void copy_sbr_grid(SBRData *dst, const SBRData *src) {
+    //These variables are saved from the previous frame rather than copied
+    dst->bs_freq_res[0]    = dst->bs_freq_res[dst->bs_num_env];
+    dst->t_env_num_env_old = dst->t_env[dst->bs_num_env];
+    dst->e_a[0]            = -(dst->e_a[1] != dst->bs_num_env);
+
+    //These variables are read from the bitstream and therefore copied
+    memcpy(dst->bs_freq_res+1, src->bs_freq_res+1, sizeof(dst->bs_freq_res)-sizeof(*dst->bs_freq_res));
+    memcpy(dst->t_env,         src->t_env,         sizeof(dst->t_env));
+    memcpy(dst->t_q,           src->t_q,           sizeof(dst->t_q));
+    dst->bs_num_env        = src->bs_num_env;
+    dst->bs_amp_res        = src->bs_amp_res;
+    dst->bs_num_noise      = src->bs_num_noise;
+    dst->bs_frame_class    = src->bs_frame_class;
+    dst->e_a[1]            = src->e_a[1];
+}
+
+/// Read how the envelope and noise floor data is delta coded
+static void read_sbr_dtdf(SpectralBandReplication *sbr, GetBitContext *gb,
+                          SBRData *ch_data)
+{
+    get_bits1_vector(gb, ch_data->bs_df_env,   ch_data->bs_num_env);
+    get_bits1_vector(gb, ch_data->bs_df_noise, ch_data->bs_num_noise);
+}
+
+/// Read inverse filtering data
+static void read_sbr_invf(SpectralBandReplication *sbr, GetBitContext *gb,
+                          SBRData *ch_data)
+{
+    int i;
+
+    memcpy(ch_data->bs_invf_mode[1], ch_data->bs_invf_mode[0], 5 * sizeof(uint8_t));
+    for (i = 0; i < sbr->n_q; i++)
+        ch_data->bs_invf_mode[0][i] = get_bits(gb, 2);
+}
+
+static int read_sbr_envelope(AACContext *ac, SpectralBandReplication *sbr, GetBitContext *gb,
+                              SBRData *ch_data, int ch)
+{
+    int bits;
+    int i, j, k;
+    VLC_TYPE (*t_huff)[2], (*f_huff)[2];
+    int t_lav, f_lav;
+    const int delta = (ch == 1 && sbr->bs_coupling == 1) + 1;
+    const int odd = sbr->n[1] & 1;
+
+    if (sbr->bs_coupling && ch) {
+        if (ch_data->bs_amp_res) {
+            bits   = 5;
+            t_huff = vlc_sbr[T_HUFFMAN_ENV_BAL_3_0DB].table;
+            t_lav  = vlc_sbr_lav[T_HUFFMAN_ENV_BAL_3_0DB];
+            f_huff = vlc_sbr[F_HUFFMAN_ENV_BAL_3_0DB].table;
+            f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_BAL_3_0DB];
+        } else {
+            bits   = 6;
+            t_huff = vlc_sbr[T_HUFFMAN_ENV_BAL_1_5DB].table;
+            t_lav  = vlc_sbr_lav[T_HUFFMAN_ENV_BAL_1_5DB];
+            f_huff = vlc_sbr[F_HUFFMAN_ENV_BAL_1_5DB].table;
+            f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_BAL_1_5DB];
+        }
+    } else {
+        if (ch_data->bs_amp_res) {
+            bits   = 6;
+            t_huff = vlc_sbr[T_HUFFMAN_ENV_3_0DB].table;
+            t_lav  = vlc_sbr_lav[T_HUFFMAN_ENV_3_0DB];
+            f_huff = vlc_sbr[F_HUFFMAN_ENV_3_0DB].table;
+            f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_3_0DB];
+        } else {
+            bits   = 7;
+            t_huff = vlc_sbr[T_HUFFMAN_ENV_1_5DB].table;
+            t_lav  = vlc_sbr_lav[T_HUFFMAN_ENV_1_5DB];
+            f_huff = vlc_sbr[F_HUFFMAN_ENV_1_5DB].table;
+            f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_1_5DB];
+        }
+    }
+
+    for (i = 0; i < ch_data->bs_num_env; i++) {
+        if (ch_data->bs_df_env[i]) {
+            // bs_freq_res[0] == bs_freq_res[bs_num_env] from prev frame
+            if (ch_data->bs_freq_res[i + 1] == ch_data->bs_freq_res[i]) {
+                for (j = 0; j < sbr->n[ch_data->bs_freq_res[i + 1]]; j++) {
+                    ch_data->env_facs_q[i + 1][j] = ch_data->env_facs_q[i][j] + delta * (get_vlc2(gb, t_huff, 9, 3) - t_lav);
+                    if (ch_data->env_facs_q[i + 1][j] > 127U) {
+                        av_log(ac->avctx, AV_LOG_ERROR, "env_facs_q %d is invalid\n", ch_data->env_facs_q[i + 1][j]);
+                        return AVERROR_INVALIDDATA;
+                    }
+                }
+            } else if (ch_data->bs_freq_res[i + 1]) {
+                for (j = 0; j < sbr->n[ch_data->bs_freq_res[i + 1]]; j++) {
+                    k = (j + odd) >> 1; // find k such that f_tablelow[k] <= f_tablehigh[j] < f_tablelow[k + 1]
+                    ch_data->env_facs_q[i + 1][j] = ch_data->env_facs_q[i][k] + delta * (get_vlc2(gb, t_huff, 9, 3) - t_lav);
+                    if (ch_data->env_facs_q[i + 1][j] > 127U) {
+                        av_log(ac->avctx, AV_LOG_ERROR, "env_facs_q %d is invalid\n", ch_data->env_facs_q[i + 1][j]);
+                        return AVERROR_INVALIDDATA;
+                    }
+                }
+            } else {
+                for (j = 0; j < sbr->n[ch_data->bs_freq_res[i + 1]]; j++) {
+                    k = j ? 2*j - odd : 0; // find k such that f_tablehigh[k] == f_tablelow[j]
+                    ch_data->env_facs_q[i + 1][j] = ch_data->env_facs_q[i][k] + delta * (get_vlc2(gb, t_huff, 9, 3) - t_lav);
+                    if (ch_data->env_facs_q[i + 1][j] > 127U) {
+                        av_log(ac->avctx, AV_LOG_ERROR, "env_facs_q %d is invalid\n", ch_data->env_facs_q[i + 1][j]);
+                        return AVERROR_INVALIDDATA;
+                    }
+                }
+            }
+        } else {
+            ch_data->env_facs_q[i + 1][0] = delta * get_bits(gb, bits); // bs_env_start_value_balance
+            for (j = 1; j < sbr->n[ch_data->bs_freq_res[i + 1]]; j++) {
+                ch_data->env_facs_q[i + 1][j] = ch_data->env_facs_q[i + 1][j - 1] + delta * (get_vlc2(gb, f_huff, 9, 3) - f_lav);
+                if (ch_data->env_facs_q[i + 1][j] > 127U) {
+                    av_log(ac->avctx, AV_LOG_ERROR, "env_facs_q %d is invalid\n", ch_data->env_facs_q[i + 1][j]);
+                    return AVERROR_INVALIDDATA;
+                }
+            }
+        }
+    }
+
+    //assign 0th elements of env_facs_q from last elements
+    memcpy(ch_data->env_facs_q[0], ch_data->env_facs_q[ch_data->bs_num_env],
+           sizeof(ch_data->env_facs_q[0]));
+
+    return 0;
+}
+
+static int read_sbr_noise(AACContext *ac, SpectralBandReplication *sbr, GetBitContext *gb,
+                           SBRData *ch_data, int ch)
+{
+    int i, j;
+    VLC_TYPE (*t_huff)[2], (*f_huff)[2];
+    int t_lav, f_lav;
+    int delta = (ch == 1 && sbr->bs_coupling == 1) + 1;
+
+    if (sbr->bs_coupling && ch) {
+        t_huff = vlc_sbr[T_HUFFMAN_NOISE_BAL_3_0DB].table;
+        t_lav  = vlc_sbr_lav[T_HUFFMAN_NOISE_BAL_3_0DB];
+        f_huff = vlc_sbr[F_HUFFMAN_ENV_BAL_3_0DB].table;
+        f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_BAL_3_0DB];
+    } else {
+        t_huff = vlc_sbr[T_HUFFMAN_NOISE_3_0DB].table;
+        t_lav  = vlc_sbr_lav[T_HUFFMAN_NOISE_3_0DB];
+        f_huff = vlc_sbr[F_HUFFMAN_ENV_3_0DB].table;
+        f_lav  = vlc_sbr_lav[F_HUFFMAN_ENV_3_0DB];
+    }
+
+    for (i = 0; i < ch_data->bs_num_noise; i++) {
+        if (ch_data->bs_df_noise[i]) {
+            for (j = 0; j < sbr->n_q; j++) {
+                ch_data->noise_facs_q[i + 1][j] = ch_data->noise_facs_q[i][j] + delta * (get_vlc2(gb, t_huff, 9, 2) - t_lav);
+                if (ch_data->noise_facs_q[i + 1][j] > 30U) {
+                    av_log(ac->avctx, AV_LOG_ERROR, "noise_facs_q %d is invalid\n", ch_data->noise_facs_q[i + 1][j]);
+                    return AVERROR_INVALIDDATA;
+                }
+            }
+        } else {
+            ch_data->noise_facs_q[i + 1][0] = delta * get_bits(gb, 5); // bs_noise_start_value_balance or bs_noise_start_value_level
+            for (j = 1; j < sbr->n_q; j++) {
+                ch_data->noise_facs_q[i + 1][j] = ch_data->noise_facs_q[i + 1][j - 1] + delta * (get_vlc2(gb, f_huff, 9, 3) - f_lav);
+                if (ch_data->noise_facs_q[i + 1][j] > 30U) {
+                    av_log(ac->avctx, AV_LOG_ERROR, "noise_facs_q %d is invalid\n", ch_data->noise_facs_q[i + 1][j]);
+                    return AVERROR_INVALIDDATA;
+                }
+            }
+        }
+    }
+
+    //assign 0th elements of noise_facs_q from last elements
+    memcpy(ch_data->noise_facs_q[0], ch_data->noise_facs_q[ch_data->bs_num_noise],
+           sizeof(ch_data->noise_facs_q[0]));
+    return 0;
+}
+
+static void read_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
+                               GetBitContext *gb,
+                               int bs_extension_id, int *num_bits_left)
+{
+    switch (bs_extension_id) {
+    case EXTENSION_ID_PS:
+        if (!ac->oc[1].m4ac.ps) {
+            av_log(ac->avctx, AV_LOG_ERROR, "Parametric Stereo signaled to be not-present but was found in the bitstream.\n");
+            skip_bits_long(gb, *num_bits_left); // bs_fill_bits
+            *num_bits_left = 0;
+        } else {
+#if 1
+            *num_bits_left -= AAC_RENAME(ff_ps_read_data)(ac->avctx, gb, &sbr->ps, *num_bits_left);
+            ac->avctx->profile = FF_PROFILE_AAC_HE_V2;
+#else
+            avpriv_report_missing_feature(ac->avctx, "Parametric Stereo");
+            skip_bits_long(gb, *num_bits_left); // bs_fill_bits
+            *num_bits_left = 0;
+#endif
+        }
+        break;
+    default:
+        // some files contain 0-padding
+        if (bs_extension_id || *num_bits_left > 16 || show_bits(gb, *num_bits_left))
+            avpriv_request_sample(ac->avctx, "Reserved SBR extensions");
+        skip_bits_long(gb, *num_bits_left); // bs_fill_bits
+        *num_bits_left = 0;
+        break;
+    }
+}
+
+static int read_sbr_single_channel_element(AACContext *ac,
+                                            SpectralBandReplication *sbr,
+                                            GetBitContext *gb)
+{
+    int ret;
+
+    if (get_bits1(gb)) // bs_data_extra
+        skip_bits(gb, 4); // bs_reserved
+
+    if (read_sbr_grid(ac, sbr, gb, &sbr->data[0]))
+        return -1;
+    read_sbr_dtdf(sbr, gb, &sbr->data[0]);
+    read_sbr_invf(sbr, gb, &sbr->data[0]);
+    if((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[0], 0)) < 0)
+        return ret;
+    if((ret = read_sbr_noise(ac, sbr, gb, &sbr->data[0], 0)) < 0)
+        return ret;
+
+    if ((sbr->data[0].bs_add_harmonic_flag = get_bits1(gb)))
+        get_bits1_vector(gb, sbr->data[0].bs_add_harmonic, sbr->n[1]);
+
+    return 0;
+}
+
+static int read_sbr_channel_pair_element(AACContext *ac,
+                                          SpectralBandReplication *sbr,
+                                          GetBitContext *gb)
+{
+    int ret;
+
+    if (get_bits1(gb))    // bs_data_extra
+        skip_bits(gb, 8); // bs_reserved
+
+    if ((sbr->bs_coupling = get_bits1(gb))) {
+        if (read_sbr_grid(ac, sbr, gb, &sbr->data[0]))
+            return -1;
+        copy_sbr_grid(&sbr->data[1], &sbr->data[0]);
+        read_sbr_dtdf(sbr, gb, &sbr->data[0]);
+        read_sbr_dtdf(sbr, gb, &sbr->data[1]);
+        read_sbr_invf(sbr, gb, &sbr->data[0]);
+        memcpy(sbr->data[1].bs_invf_mode[1], sbr->data[1].bs_invf_mode[0], sizeof(sbr->data[1].bs_invf_mode[0]));
+        memcpy(sbr->data[1].bs_invf_mode[0], sbr->data[0].bs_invf_mode[0], sizeof(sbr->data[1].bs_invf_mode[0]));
+        if((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[0], 0)) < 0)
+            return ret;
+        if((ret = read_sbr_noise(ac, sbr, gb, &sbr->data[0], 0)) < 0)
+            return ret;
+        if((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[1], 1)) < 0)
+            return ret;
+        if((ret = read_sbr_noise(ac, sbr, gb, &sbr->data[1], 1)) < 0)
+            return ret;
+    } else {
+        if (read_sbr_grid(ac, sbr, gb, &sbr->data[0]) ||
+            read_sbr_grid(ac, sbr, gb, &sbr->data[1]))
+            return -1;
+        read_sbr_dtdf(sbr, gb, &sbr->data[0]);
+        read_sbr_dtdf(sbr, gb, &sbr->data[1]);
+        read_sbr_invf(sbr, gb, &sbr->data[0]);
+        read_sbr_invf(sbr, gb, &sbr->data[1]);
+        if((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[0], 0)) < 0)
+            return ret;
+        if((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[1], 1)) < 0)
+            return ret;
+        if((ret = read_sbr_noise(ac, sbr, gb, &sbr->data[0], 0)) < 0)
+            return ret;
+        if((ret = read_sbr_noise(ac, sbr, gb, &sbr->data[1], 1)) < 0)
+            return ret;
+    }
+
+    if ((sbr->data[0].bs_add_harmonic_flag = get_bits1(gb)))
+        get_bits1_vector(gb, sbr->data[0].bs_add_harmonic, sbr->n[1]);
+    if ((sbr->data[1].bs_add_harmonic_flag = get_bits1(gb)))
+        get_bits1_vector(gb, sbr->data[1].bs_add_harmonic, sbr->n[1]);
+
+    return 0;
+}
+
+static unsigned int read_sbr_data(AACContext *ac, SpectralBandReplication *sbr,
+                                  GetBitContext *gb, int id_aac)
+{
+    unsigned int cnt = get_bits_count(gb);
+
+    sbr->id_aac = id_aac;
+    sbr->ready_for_dequant = 1;
+
+    if (id_aac == TYPE_SCE || id_aac == TYPE_CCE) {
+        if (read_sbr_single_channel_element(ac, sbr, gb)) {
+            sbr_turnoff(sbr);
+            return get_bits_count(gb) - cnt;
+        }
+    } else if (id_aac == TYPE_CPE) {
+        if (read_sbr_channel_pair_element(ac, sbr, gb)) {
+            sbr_turnoff(sbr);
+            return get_bits_count(gb) - cnt;
+        }
+    } else {
+        av_log(ac->avctx, AV_LOG_ERROR,
+            "Invalid bitstream - cannot apply SBR to element type %d\n", id_aac);
+        sbr_turnoff(sbr);
+        return get_bits_count(gb) - cnt;
+    }
+    if (get_bits1(gb)) { // bs_extended_data
+        int num_bits_left = get_bits(gb, 4); // bs_extension_size
+        if (num_bits_left == 15)
+            num_bits_left += get_bits(gb, 8); // bs_esc_count
+
+        num_bits_left <<= 3;
+        while (num_bits_left > 7) {
+            num_bits_left -= 2;
+            read_sbr_extension(ac, sbr, gb, get_bits(gb, 2), &num_bits_left); // bs_extension_id
+        }
+        if (num_bits_left < 0) {
+            av_log(ac->avctx, AV_LOG_ERROR, "SBR Extension over read.\n");
+        }
+        if (num_bits_left > 0)
+            skip_bits(gb, num_bits_left);
+    }
+
+    return get_bits_count(gb) - cnt;
+}
+
+static void sbr_reset(AACContext *ac, SpectralBandReplication *sbr)
+{
+    int err;
+    err = sbr_make_f_master(ac, sbr, &sbr->spectrum_params);
+    if (err >= 0)
+        err = sbr_make_f_derived(ac, sbr);
+    if (err < 0) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "SBR reset failed. Switching SBR to pure upsampling mode.\n");
+        sbr_turnoff(sbr);
+    }
+}
+
+/**
+ * Decode Spectral Band Replication extension data; reference: table 4.55.
+ *
+ * @param   crc flag indicating the presence of CRC checksum
+ * @param   cnt length of TYPE_FIL syntactic element in bytes
+ *
+ * @return  Returns number of bytes consumed from the TYPE_FIL element.
+ */
+int AAC_RENAME(ff_decode_sbr_extension)(AACContext *ac, SpectralBandReplication *sbr,
+                            GetBitContext *gb_host, int crc, int cnt, int id_aac)
+{
+    unsigned int num_sbr_bits = 0, num_align_bits;
+    unsigned bytes_read;
+    GetBitContext gbc = *gb_host, *gb = &gbc;
+    skip_bits_long(gb_host, cnt*8 - 4);
+
+    sbr->reset = 0;
+
+    if (!sbr->sample_rate)
+        sbr->sample_rate = 2 * ac->oc[1].m4ac.sample_rate; //TODO use the nominal sample rate for arbitrary sample rate support
+    if (!ac->oc[1].m4ac.ext_sample_rate)
+        ac->oc[1].m4ac.ext_sample_rate = 2 * ac->oc[1].m4ac.sample_rate;
+
+    if (crc) {
+        skip_bits(gb, 10); // bs_sbr_crc_bits; TODO - implement CRC check
+        num_sbr_bits += 10;
+    }
+
+    //Save some state from the previous frame.
+    sbr->kx[0] = sbr->kx[1];
+    sbr->m[0] = sbr->m[1];
+    sbr->kx_and_m_pushed = 1;
+
+    num_sbr_bits++;
+    if (get_bits1(gb)) // bs_header_flag
+        num_sbr_bits += read_sbr_header(sbr, gb);
+
+    if (sbr->reset)
+        sbr_reset(ac, sbr);
+
+    if (sbr->start)
+        num_sbr_bits  += read_sbr_data(ac, sbr, gb, id_aac);
+
+    num_align_bits = ((cnt << 3) - 4 - num_sbr_bits) & 7;
+    bytes_read = ((num_sbr_bits + num_align_bits + 4) >> 3);
+
+    if (bytes_read > cnt) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Expected to read %d SBR bytes actually read %d.\n", cnt, bytes_read);
+    }
+    return cnt;
+}
+
+/**
+ * Analysis QMF Bank (14496-3 sp04 p206)
+ *
+ * @param   x       pointer to the beginning of the first sample window
+ * @param   W       array of complex-valued samples split into subbands
+ */
+#ifndef sbr_qmf_analysis
+#if USE_FIXED
+static void sbr_qmf_analysis(AVFixedDSPContext *dsp, FFTContext *mdct,
+#else
+static void sbr_qmf_analysis(AVFloatDSPContext *dsp, FFTContext *mdct,
+#endif /* USE_FIXED */
+                             SBRDSPContext *sbrdsp, const INTFLOAT *in, INTFLOAT *x,
+                             INTFLOAT z[320], INTFLOAT W[2][32][32][2], int buf_idx)
+{
+    int i;
+#if USE_FIXED
+    int j;
+#endif
+    memcpy(x    , x+1024, (320-32)*sizeof(x[0]));
+    memcpy(x+288, in,         1024*sizeof(x[0]));
+    for (i = 0; i < 32; i++) { // numTimeSlots*RATE = 16*2 as 960 sample frames
+                               // are not supported
+        dsp->vector_fmul_reverse(z, sbr_qmf_window_ds, x, 320);
+        sbrdsp->sum64x5(z);
+        sbrdsp->qmf_pre_shuffle(z);
+#if USE_FIXED
+        for (j = 64; j < 128; j++) {
+            if (z[j] > 1<<24) {
+                av_log(NULL, AV_LOG_WARNING,
+                       "sbr_qmf_analysis: value %09d too large, setting to %09d\n",
+                       z[j], 1<<24);
+                z[j] = 1<<24;
+            } else if (z[j] < -(1<<24)) {
+                av_log(NULL, AV_LOG_WARNING,
+                       "sbr_qmf_analysis: value %09d too small, setting to %09d\n",
+                       z[j], -(1<<24));
+                z[j] = -(1<<24);
+            }
+        }
+#endif
+        mdct->imdct_half(mdct, z, z+64);
+        sbrdsp->qmf_post_shuffle(W[buf_idx][i], z);
+        x += 32;
+    }
+}
+#endif
+
+/**
+ * Synthesis QMF Bank (14496-3 sp04 p206) and Downsampled Synthesis QMF Bank
+ * (14496-3 sp04 p206)
+ */
+#ifndef sbr_qmf_synthesis
+static void sbr_qmf_synthesis(FFTContext *mdct,
+#if USE_FIXED
+                              SBRDSPContext *sbrdsp, AVFixedDSPContext *dsp,
+#else
+                              SBRDSPContext *sbrdsp, AVFloatDSPContext *dsp,
+#endif /* USE_FIXED */
+                              INTFLOAT *out, INTFLOAT X[2][38][64],
+                              INTFLOAT mdct_buf[2][64],
+                              INTFLOAT *v0, int *v_off, const unsigned int div)
+{
+    int i, n;
+    const INTFLOAT *sbr_qmf_window = div ? sbr_qmf_window_ds : sbr_qmf_window_us;
+    const int step = 128 >> div;
+    INTFLOAT *v;
+    for (i = 0; i < 32; i++) {
+        if (*v_off < step) {
+            int saved_samples = (1280 - 128) >> div;
+            memcpy(&v0[SBR_SYNTHESIS_BUF_SIZE - saved_samples], v0, saved_samples * sizeof(INTFLOAT));
+            *v_off = SBR_SYNTHESIS_BUF_SIZE - saved_samples - step;
+        } else {
+            *v_off -= step;
+        }
+        v = v0 + *v_off;
+        if (div) {
+            for (n = 0; n < 32; n++) {
+                X[0][i][   n] = -X[0][i][n];
+                X[0][i][32+n] =  X[1][i][31-n];
+            }
+            mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
+            sbrdsp->qmf_deint_neg(v, mdct_buf[0]);
+        } else {
+            sbrdsp->neg_odd_64(X[1][i]);
+            mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
+            mdct->imdct_half(mdct, mdct_buf[1], X[1][i]);
+            sbrdsp->qmf_deint_bfly(v, mdct_buf[1], mdct_buf[0]);
+        }
+        dsp->vector_fmul    (out, v                , sbr_qmf_window                       , 64 >> div);
+        dsp->vector_fmul_add(out, v + ( 192 >> div), sbr_qmf_window + ( 64 >> div), out   , 64 >> div);
+        dsp->vector_fmul_add(out, v + ( 256 >> div), sbr_qmf_window + (128 >> div), out   , 64 >> div);
+        dsp->vector_fmul_add(out, v + ( 448 >> div), sbr_qmf_window + (192 >> div), out   , 64 >> div);
+        dsp->vector_fmul_add(out, v + ( 512 >> div), sbr_qmf_window + (256 >> div), out   , 64 >> div);
+        dsp->vector_fmul_add(out, v + ( 704 >> div), sbr_qmf_window + (320 >> div), out   , 64 >> div);
+        dsp->vector_fmul_add(out, v + ( 768 >> div), sbr_qmf_window + (384 >> div), out   , 64 >> div);
+        dsp->vector_fmul_add(out, v + ( 960 >> div), sbr_qmf_window + (448 >> div), out   , 64 >> div);
+        dsp->vector_fmul_add(out, v + (1024 >> div), sbr_qmf_window + (512 >> div), out   , 64 >> div);
+        dsp->vector_fmul_add(out, v + (1216 >> div), sbr_qmf_window + (576 >> div), out   , 64 >> div);
+        out += 64 >> div;
+    }
+}
+#endif
+
+/// Generate the subband filtered lowband
+static int sbr_lf_gen(AACContext *ac, SpectralBandReplication *sbr,
+                      INTFLOAT X_low[32][40][2], const INTFLOAT W[2][32][32][2],
+                      int buf_idx)
+{
+    int i, k;
+    const int t_HFGen = 8;
+    const int i_f = 32;
+    memset(X_low, 0, 32*sizeof(*X_low));
+    for (k = 0; k < sbr->kx[1]; k++) {
+        for (i = t_HFGen; i < i_f + t_HFGen; i++) {
+            X_low[k][i][0] = W[buf_idx][i - t_HFGen][k][0];
+            X_low[k][i][1] = W[buf_idx][i - t_HFGen][k][1];
+        }
+    }
+    buf_idx = 1-buf_idx;
+    for (k = 0; k < sbr->kx[0]; k++) {
+        for (i = 0; i < t_HFGen; i++) {
+            X_low[k][i][0] = W[buf_idx][i + i_f - t_HFGen][k][0];
+            X_low[k][i][1] = W[buf_idx][i + i_f - t_HFGen][k][1];
+        }
+    }
+    return 0;
+}
+
+/// High Frequency Generator (14496-3 sp04 p215)
+static int sbr_hf_gen(AACContext *ac, SpectralBandReplication *sbr,
+                      INTFLOAT X_high[64][40][2], const INTFLOAT X_low[32][40][2],
+                      const INTFLOAT (*alpha0)[2], const INTFLOAT (*alpha1)[2],
+                      const INTFLOAT bw_array[5], const uint8_t *t_env,
+                      int bs_num_env)
+{
+    int j, x;
+    int g = 0;
+    int k = sbr->kx[1];
+    for (j = 0; j < sbr->num_patches; j++) {
+        for (x = 0; x < sbr->patch_num_subbands[j]; x++, k++) {
+            const int p = sbr->patch_start_subband[j] + x;
+            while (g <= sbr->n_q && k >= sbr->f_tablenoise[g])
+                g++;
+            g--;
+
+            if (g < 0) {
+                av_log(ac->avctx, AV_LOG_ERROR,
+                       "ERROR : no subband found for frequency %d\n", k);
+                return -1;
+            }
+
+            sbr->dsp.hf_gen(X_high[k] + ENVELOPE_ADJUSTMENT_OFFSET,
+                            X_low[p]  + ENVELOPE_ADJUSTMENT_OFFSET,
+                            alpha0[p], alpha1[p], bw_array[g],
+                            2 * t_env[0], 2 * t_env[bs_num_env]);
+        }
+    }
+    if (k < sbr->m[1] + sbr->kx[1])
+        memset(X_high + k, 0, (sbr->m[1] + sbr->kx[1] - k) * sizeof(*X_high));
+
+    return 0;
+}
+
+/// Generate the subband filtered lowband
+static int sbr_x_gen(SpectralBandReplication *sbr, INTFLOAT X[2][38][64],
+                     const INTFLOAT Y0[38][64][2], const INTFLOAT Y1[38][64][2],
+                     const INTFLOAT X_low[32][40][2], int ch)
+{
+    int k, i;
+    const int i_f = 32;
+    const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
+    memset(X, 0, 2*sizeof(*X));
+    for (k = 0; k < sbr->kx[0]; k++) {
+        for (i = 0; i < i_Temp; i++) {
+            X[0][i][k] = X_low[k][i + ENVELOPE_ADJUSTMENT_OFFSET][0];
+            X[1][i][k] = X_low[k][i + ENVELOPE_ADJUSTMENT_OFFSET][1];
+        }
+    }
+    for (; k < sbr->kx[0] + sbr->m[0]; k++) {
+        for (i = 0; i < i_Temp; i++) {
+            X[0][i][k] = Y0[i + i_f][k][0];
+            X[1][i][k] = Y0[i + i_f][k][1];
+        }
+    }
+
+    for (k = 0; k < sbr->kx[1]; k++) {
+        for (i = i_Temp; i < 38; i++) {
+            X[0][i][k] = X_low[k][i + ENVELOPE_ADJUSTMENT_OFFSET][0];
+            X[1][i][k] = X_low[k][i + ENVELOPE_ADJUSTMENT_OFFSET][1];
+        }
+    }
+    for (; k < sbr->kx[1] + sbr->m[1]; k++) {
+        for (i = i_Temp; i < i_f; i++) {
+            X[0][i][k] = Y1[i][k][0];
+            X[1][i][k] = Y1[i][k][1];
+        }
+    }
+    return 0;
+}
+
+/** High Frequency Adjustment (14496-3 sp04 p217) and Mapping
+ * (14496-3 sp04 p217)
+ */
+static int sbr_mapping(AACContext *ac, SpectralBandReplication *sbr,
+                        SBRData *ch_data, int e_a[2])
+{
+    int e, i, m;
+
+    memset(ch_data->s_indexmapped[1], 0, 7*sizeof(ch_data->s_indexmapped[1]));
+    for (e = 0; e < ch_data->bs_num_env; e++) {
+        const unsigned int ilim = sbr->n[ch_data->bs_freq_res[e + 1]];
+        uint16_t *table = ch_data->bs_freq_res[e + 1] ? sbr->f_tablehigh : sbr->f_tablelow;
+        int k;
+
+        if (sbr->kx[1] != table[0]) {
+            av_log(ac->avctx, AV_LOG_ERROR, "kx != f_table{high,low}[0]. "
+                   "Derived frequency tables were not regenerated.\n");
+            sbr_turnoff(sbr);
+            return AVERROR_BUG;
+        }
+        for (i = 0; i < ilim; i++)
+            for (m = table[i]; m < table[i + 1]; m++)
+                sbr->e_origmapped[e][m - sbr->kx[1]] = ch_data->env_facs[e+1][i];
+
+        // ch_data->bs_num_noise > 1 => 2 noise floors
+        k = (ch_data->bs_num_noise > 1) && (ch_data->t_env[e] >= ch_data->t_q[1]);
+        for (i = 0; i < sbr->n_q; i++)
+            for (m = sbr->f_tablenoise[i]; m < sbr->f_tablenoise[i + 1]; m++)
+                sbr->q_mapped[e][m - sbr->kx[1]] = ch_data->noise_facs[k+1][i];
+
+        for (i = 0; i < sbr->n[1]; i++) {
+            if (ch_data->bs_add_harmonic_flag) {
+                const unsigned int m_midpoint =
+                    (sbr->f_tablehigh[i] + sbr->f_tablehigh[i + 1]) >> 1;
+
+                ch_data->s_indexmapped[e + 1][m_midpoint - sbr->kx[1]] = ch_data->bs_add_harmonic[i] *
+                    (e >= e_a[1] || (ch_data->s_indexmapped[0][m_midpoint - sbr->kx[1]] == 1));
+            }
+        }
+
+        for (i = 0; i < ilim; i++) {
+            int additional_sinusoid_present = 0;
+            for (m = table[i]; m < table[i + 1]; m++) {
+                if (ch_data->s_indexmapped[e + 1][m - sbr->kx[1]]) {
+                    additional_sinusoid_present = 1;
+                    break;
+                }
+            }
+            memset(&sbr->s_mapped[e][table[i] - sbr->kx[1]], additional_sinusoid_present,
+                   (table[i + 1] - table[i]) * sizeof(sbr->s_mapped[e][0]));
+        }
+    }
+
+    memcpy(ch_data->s_indexmapped[0], ch_data->s_indexmapped[ch_data->bs_num_env], sizeof(ch_data->s_indexmapped[0]));
+    return 0;
+}
+
+/// Estimation of current envelope (14496-3 sp04 p218)
+static void sbr_env_estimate(AAC_FLOAT (*e_curr)[48], INTFLOAT X_high[64][40][2],
+                             SpectralBandReplication *sbr, SBRData *ch_data)
+{
+    int e, m;
+    int kx1 = sbr->kx[1];
+
+    if (sbr->bs_interpol_freq) {
+        for (e = 0; e < ch_data->bs_num_env; e++) {
+#if USE_FIXED
+            const SoftFloat recip_env_size = av_int2sf(0x20000000 / (ch_data->t_env[e + 1] - ch_data->t_env[e]), 30);
+#else
+            const float recip_env_size = 0.5f / (ch_data->t_env[e + 1] - ch_data->t_env[e]);
+#endif /* USE_FIXED */
+            int ilb = ch_data->t_env[e]     * 2 + ENVELOPE_ADJUSTMENT_OFFSET;
+            int iub = ch_data->t_env[e + 1] * 2 + ENVELOPE_ADJUSTMENT_OFFSET;
+
+            for (m = 0; m < sbr->m[1]; m++) {
+                AAC_FLOAT sum = sbr->dsp.sum_square(X_high[m+kx1] + ilb, iub - ilb);
+#if USE_FIXED
+                e_curr[e][m] = av_mul_sf(sum, recip_env_size);
+#else
+                e_curr[e][m] = sum * recip_env_size;
+#endif /* USE_FIXED */
+            }
+        }
+    } else {
+        int k, p;
+
+        for (e = 0; e < ch_data->bs_num_env; e++) {
+            const int env_size = 2 * (ch_data->t_env[e + 1] - ch_data->t_env[e]);
+            int ilb = ch_data->t_env[e]     * 2 + ENVELOPE_ADJUSTMENT_OFFSET;
+            int iub = ch_data->t_env[e + 1] * 2 + ENVELOPE_ADJUSTMENT_OFFSET;
+            const uint16_t *table = ch_data->bs_freq_res[e + 1] ? sbr->f_tablehigh : sbr->f_tablelow;
+
+            for (p = 0; p < sbr->n[ch_data->bs_freq_res[e + 1]]; p++) {
+#if USE_FIXED
+                SoftFloat sum = FLOAT_0;
+                const SoftFloat den = av_int2sf(0x20000000 / (env_size * (table[p + 1] - table[p])), 29);
+                for (k = table[p]; k < table[p + 1]; k++) {
+                    sum = av_add_sf(sum, sbr->dsp.sum_square(X_high[k] + ilb, iub - ilb));
+                }
+                sum = av_mul_sf(sum, den);
+#else
+                float sum = 0.0f;
+                const int den = env_size * (table[p + 1] - table[p]);
+
+                for (k = table[p]; k < table[p + 1]; k++) {
+                    sum += sbr->dsp.sum_square(X_high[k] + ilb, iub - ilb);
+                }
+                sum /= den;
+#endif /* USE_FIXED */
+                for (k = table[p]; k < table[p + 1]; k++) {
+                    e_curr[e][k - kx1] = sum;
+                }
+            }
+        }
+    }
+}
+
+void AAC_RENAME(ff_sbr_apply)(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
+                  INTFLOAT* L, INTFLOAT* R)
+{
+    int downsampled = ac->oc[1].m4ac.ext_sample_rate < sbr->sample_rate;
+    int ch;
+    int nch = (id_aac == TYPE_CPE) ? 2 : 1;
+    int err;
+
+    if (id_aac != sbr->id_aac) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+            "element type mismatch %d != %d\n", id_aac, sbr->id_aac);
+        sbr_turnoff(sbr);
+    }
+
+    if (sbr->start && !sbr->ready_for_dequant) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "No quantized data read for sbr_dequant.\n");
+        sbr_turnoff(sbr);
+    }
+
+    if (!sbr->kx_and_m_pushed) {
+        sbr->kx[0] = sbr->kx[1];
+        sbr->m[0] = sbr->m[1];
+    } else {
+        sbr->kx_and_m_pushed = 0;
+    }
+
+    if (sbr->start) {
+        sbr_dequant(sbr, id_aac);
+        sbr->ready_for_dequant = 0;
+    }
+    for (ch = 0; ch < nch; ch++) {
+        /* decode channel */
+        sbr_qmf_analysis(ac->fdsp, &sbr->mdct_ana, &sbr->dsp, ch ? R : L, sbr->data[ch].analysis_filterbank_samples,
+                         (INTFLOAT*)sbr->qmf_filter_scratch,
+                         sbr->data[ch].W, sbr->data[ch].Ypos);
+        sbr->c.sbr_lf_gen(ac, sbr, sbr->X_low,
+                          (const INTFLOAT (*)[32][32][2]) sbr->data[ch].W,
+                          sbr->data[ch].Ypos);
+        sbr->data[ch].Ypos ^= 1;
+        if (sbr->start) {
+            sbr->c.sbr_hf_inverse_filter(&sbr->dsp, sbr->alpha0, sbr->alpha1,
+                                         (const INTFLOAT (*)[40][2]) sbr->X_low, sbr->k[0]);
+            sbr_chirp(sbr, &sbr->data[ch]);
+            av_assert0(sbr->data[ch].bs_num_env > 0);
+            sbr_hf_gen(ac, sbr, sbr->X_high,
+                       (const INTFLOAT (*)[40][2]) sbr->X_low,
+                       (const INTFLOAT (*)[2]) sbr->alpha0,
+                       (const INTFLOAT (*)[2]) sbr->alpha1,
+                       sbr->data[ch].bw_array, sbr->data[ch].t_env,
+                       sbr->data[ch].bs_num_env);
+
+            // hf_adj
+            err = sbr_mapping(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a);
+            if (!err) {
+                sbr_env_estimate(sbr->e_curr, sbr->X_high, sbr, &sbr->data[ch]);
+                sbr_gain_calc(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a);
+                sbr->c.sbr_hf_assemble(sbr->data[ch].Y[sbr->data[ch].Ypos],
+                                (const INTFLOAT (*)[40][2]) sbr->X_high,
+                                sbr, &sbr->data[ch],
+                                sbr->data[ch].e_a);
+            }
+        }
+
+        /* synthesis */
+        sbr->c.sbr_x_gen(sbr, sbr->X[ch],
+                  (const INTFLOAT (*)[64][2]) sbr->data[ch].Y[1-sbr->data[ch].Ypos],
+                  (const INTFLOAT (*)[64][2]) sbr->data[ch].Y[  sbr->data[ch].Ypos],
+                  (const INTFLOAT (*)[40][2]) sbr->X_low, ch);
+    }
+
+    if (ac->oc[1].m4ac.ps == 1) {
+        if (sbr->ps.start) {
+            AAC_RENAME(ff_ps_apply)(ac->avctx, &sbr->ps, sbr->X[0], sbr->X[1], sbr->kx[1] + sbr->m[1]);
+        } else {
+            memcpy(sbr->X[1], sbr->X[0], sizeof(sbr->X[0]));
+        }
+        nch = 2;
+    }
+
+    sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, ac->fdsp,
+                      L, sbr->X[0], sbr->qmf_filter_scratch,
+                      sbr->data[0].synthesis_filterbank_samples,
+                      &sbr->data[0].synthesis_filterbank_samples_offset,
+                      downsampled);
+    if (nch == 2)
+        sbr_qmf_synthesis(&sbr->mdct, &sbr->dsp, ac->fdsp,
+                          R, sbr->X[1], sbr->qmf_filter_scratch,
+                          sbr->data[1].synthesis_filterbank_samples,
+                          &sbr->data[1].synthesis_filterbank_samples_offset,
+                          downsampled);
+}
+
+static void aacsbr_func_ptr_init(AACSBRContext *c)
+{
+    c->sbr_lf_gen            = sbr_lf_gen;
+    c->sbr_hf_assemble       = sbr_hf_assemble;
+    c->sbr_x_gen             = sbr_x_gen;
+    c->sbr_hf_inverse_filter = sbr_hf_inverse_filter;
+
+#if !USE_FIXED
+    if(ARCH_MIPS)
+        ff_aacsbr_func_ptr_init_mips(c);
+#endif
+}
diff --git a/libavcodec/aacsbrdata.h b/libavcodec/aacsbrdata.h
index c667e0b4..4ff8fae9 100644
--- a/libavcodec/aacsbrdata.h
+++ b/libavcodec/aacsbrdata.h
@@ -30,6 +30,7 @@
 
 #include <stdint.h>
 #include "libavutil/mem.h"
+#include "aac_defines.h"
 
 ///< Huffman tables for SBR
 
@@ -267,268 +268,268 @@ static const int8_t sbr_offset[6][16] = {
 };
 
 /* First eight entries repeated at end to simplify SIMD implementations. */
-const DECLARE_ALIGNED(16, float, ff_sbr_noise_table)[][2] = {
-{-0.99948153278296, -0.59483417516607}, { 0.97113454393991, -0.67528515225647},
-{ 0.14130051758487, -0.95090983575689}, {-0.47005496701697, -0.37340549728647},
-{ 0.80705063769351,  0.29653668284408}, {-0.38981478896926,  0.89572605717087},
-{-0.01053049862020, -0.66959058036166}, {-0.91266367957293, -0.11522938140034},
-{ 0.54840422910309,  0.75221367176302}, { 0.40009252867955, -0.98929400334421},
-{-0.99867974711855, -0.88147068645358}, {-0.95531076805040,  0.90908757154593},
-{-0.45725933317144, -0.56716323646760}, {-0.72929675029275, -0.98008272727324},
-{ 0.75622801399036,  0.20950329995549}, { 0.07069442601050, -0.78247898470706},
-{ 0.74496252926055, -0.91169004445807}, {-0.96440182703856, -0.94739918296622},
-{ 0.30424629369539, -0.49438267012479}, { 0.66565033746925,  0.64652935542491},
-{ 0.91697008020594,  0.17514097332009}, {-0.70774918760427,  0.52548653416543},
-{-0.70051415345560, -0.45340028808763}, {-0.99496513054797, -0.90071908066973},
-{ 0.98164490790123, -0.77463155528697}, {-0.54671580548181, -0.02570928536004},
-{-0.01689629065389,  0.00287506445732}, {-0.86110349531986,  0.42548583726477},
-{-0.98892980586032, -0.87881132267556}, { 0.51756627678691,  0.66926784710139},
-{-0.99635026409640, -0.58107730574765}, {-0.99969370862163,  0.98369989360250},
-{ 0.55266258627194,  0.59449057465591}, { 0.34581177741673,  0.94879421061866},
-{ 0.62664209577999, -0.74402970906471}, {-0.77149701404973, -0.33883658042801},
-{-0.91592244254432,  0.03687901376713}, {-0.76285492357887, -0.91371867919124},
-{ 0.79788337195331, -0.93180971199849}, { 0.54473080610200, -0.11919206037186},
-{-0.85639281671058,  0.42429854760451}, {-0.92882402971423,  0.27871809078609},
-{-0.11708371046774, -0.99800843444966}, { 0.21356749817493, -0.90716295627033},
-{-0.76191692573909,  0.99768118356265}, { 0.98111043100884, -0.95854459734407},
-{-0.85913269895572,  0.95766566168880}, {-0.93307242253692,  0.49431757696466},
-{ 0.30485754879632, -0.70540034357529}, { 0.85289650925190,  0.46766131791044},
-{ 0.91328082618125, -0.99839597361769}, {-0.05890199924154,  0.70741827819497},
-{ 0.28398686150148,  0.34633555702188}, { 0.95258164539612, -0.54893416026939},
-{-0.78566324168507, -0.75568541079691}, {-0.95789495447877, -0.20423194696966},
-{ 0.82411158711197,  0.96654618432562}, {-0.65185446735885, -0.88734990773289},
-{-0.93643603134666,  0.99870790442385}, { 0.91427159529618, -0.98290505544444},
-{-0.70395684036886,  0.58796798221039}, { 0.00563771969365,  0.61768196727244},
-{ 0.89065051931895,  0.52783352697585}, {-0.68683707712762,  0.80806944710339},
-{ 0.72165342518718, -0.69259857349564}, {-0.62928247730667,  0.13627037407335},
-{ 0.29938434065514, -0.46051329682246}, {-0.91781958879280, -0.74012716684186},
-{ 0.99298717043688,  0.40816610075661}, { 0.82368298622748, -0.74036047190173},
-{-0.98512833386833, -0.99972330709594}, {-0.95915368242257, -0.99237800466040},
-{-0.21411126572790, -0.93424819052545}, {-0.68821476106884, -0.26892306315457},
-{ 0.91851997982317,  0.09358228901785}, {-0.96062769559127,  0.36099095133739},
-{ 0.51646184922287, -0.71373332873917}, { 0.61130721139669,  0.46950141175917},
-{ 0.47336129371299, -0.27333178296162}, { 0.90998308703519,  0.96715662938132},
-{ 0.44844799194357,  0.99211574628306}, { 0.66614891079092,  0.96590176169121},
-{ 0.74922239129237, -0.89879858826087}, {-0.99571588506485,  0.52785521494349},
-{ 0.97401082477563, -0.16855870075190}, { 0.72683747733879, -0.48060774432251},
-{ 0.95432193457128,  0.68849603408441}, {-0.72962208425191, -0.76608443420917},
-{-0.85359479233537,  0.88738125901579}, {-0.81412430338535, -0.97480768049637},
-{-0.87930772356786,  0.74748307690436}, {-0.71573331064977, -0.98570608178923},
-{ 0.83524300028228,  0.83702537075163}, {-0.48086065601423, -0.98848504923531},
-{ 0.97139128574778,  0.80093621198236}, { 0.51992825347895,  0.80247631400510},
-{-0.00848591195325, -0.76670128000486}, {-0.70294374303036,  0.55359910445577},
-{-0.95894428168140, -0.43265504344783}, { 0.97079252950321,  0.09325857238682},
-{-0.92404293670797,  0.85507704027855}, {-0.69506469500450,  0.98633412625459},
-{ 0.26559203620024,  0.73314307966524}, { 0.28038443336943,  0.14537913654427},
-{-0.74138124825523,  0.99310339807762}, {-0.01752795995444, -0.82616635284178},
-{-0.55126773094930, -0.98898543862153}, { 0.97960898850996, -0.94021446752851},
-{-0.99196309146936,  0.67019017358456}, {-0.67684928085260,  0.12631491649378},
-{ 0.09140039465500, -0.20537731453108}, {-0.71658965751996, -0.97788200391224},
-{ 0.81014640078925,  0.53722648362443}, { 0.40616991671205, -0.26469008598449},
-{-0.67680188682972,  0.94502052337695}, { 0.86849774348749, -0.18333598647899},
-{-0.99500381284851, -0.02634122068550}, { 0.84329189340667,  0.10406957462213},
-{-0.09215968531446,  0.69540012101253}, { 0.99956173327206, -0.12358542001404},
-{-0.79732779473535, -0.91582524736159}, { 0.96349973642406,  0.96640458041000},
-{-0.79942778496547,  0.64323902822857}, {-0.11566039853896,  0.28587846253726},
-{-0.39922954514662,  0.94129601616966}, { 0.99089197565987, -0.92062625581587},
-{ 0.28631285179909, -0.91035047143603}, {-0.83302725605608, -0.67330410892084},
-{ 0.95404443402072,  0.49162765398743}, {-0.06449863579434,  0.03250560813135},
-{-0.99575054486311,  0.42389784469507}, {-0.65501142790847,  0.82546114655624},
-{-0.81254441908887, -0.51627234660629}, {-0.99646369485481,  0.84490533520752},
-{ 0.00287840603348,  0.64768261158166}, { 0.70176989408455, -0.20453028573322},
-{ 0.96361882270190,  0.40706967140989}, {-0.68883758192426,  0.91338958840772},
-{-0.34875585502238,  0.71472290693300}, { 0.91980081243087,  0.66507455644919},
-{-0.99009048343881,  0.85868021604848}, { 0.68865791458395,  0.55660316809678},
-{-0.99484402129368, -0.20052559254934}, { 0.94214511408023, -0.99696425367461},
-{-0.67414626793544,  0.49548221180078}, {-0.47339353684664, -0.85904328834047},
-{ 0.14323651387360, -0.94145598222488}, {-0.29268293575672,  0.05759224927952},
-{ 0.43793861458754, -0.78904969892724}, {-0.36345126374441,  0.64874435357162},
-{-0.08750604656825,  0.97686944362527}, {-0.96495267812511, -0.53960305946511},
-{ 0.55526940659947,  0.78891523734774}, { 0.73538215752630,  0.96452072373404},
-{-0.30889773919437, -0.80664389776860}, { 0.03574995626194, -0.97325616900959},
-{ 0.98720684660488,  0.48409133691962}, {-0.81689296271203, -0.90827703628298},
-{ 0.67866860118215,  0.81284503870856}, {-0.15808569732583,  0.85279555024382},
-{ 0.80723395114371, -0.24717418514605}, { 0.47788757329038, -0.46333147839295},
-{ 0.96367554763201,  0.38486749303242}, {-0.99143875716818, -0.24945277239809},
-{ 0.83081876925833, -0.94780851414763}, {-0.58753191905341,  0.01290772389163},
-{ 0.95538108220960, -0.85557052096538}, {-0.96490920476211, -0.64020970923102},
-{-0.97327101028521,  0.12378128133110}, { 0.91400366022124,  0.57972471346930},
-{-0.99925837363824,  0.71084847864067}, {-0.86875903507313, -0.20291699203564},
-{-0.26240034795124, -0.68264554369108}, {-0.24664412953388, -0.87642273115183},
-{ 0.02416275806869,  0.27192914288905}, { 0.82068619590515, -0.85087787994476},
-{ 0.88547373760759, -0.89636802901469}, {-0.18173078152226, -0.26152145156800},
-{ 0.09355476558534,  0.54845123045604}, {-0.54668414224090,  0.95980774020221},
-{ 0.37050990604091, -0.59910140383171}, {-0.70373594262891,  0.91227665827081},
-{-0.34600785879594, -0.99441426144200}, {-0.68774481731008, -0.30238837956299},
-{-0.26843291251234,  0.83115668004362}, { 0.49072334613242, -0.45359708737775},
-{ 0.38975993093975,  0.95515358099121}, {-0.97757125224150,  0.05305894580606},
-{-0.17325552859616, -0.92770672250494}, { 0.99948035025744,  0.58285545563426},
-{-0.64946246527458,  0.68645507104960}, {-0.12016920576437, -0.57147322153312},
-{-0.58947456517751, -0.34847132454388}, {-0.41815140454465,  0.16276422358861},
-{ 0.99885650204884,  0.11136095490444}, {-0.56649614128386, -0.90494866361587},
-{ 0.94138021032330,  0.35281916733018}, {-0.75725076534641,  0.53650549640587},
-{ 0.20541973692630, -0.94435144369918}, { 0.99980371023351,  0.79835913565599},
-{ 0.29078277605775,  0.35393777921520}, {-0.62858772103030,  0.38765693387102},
-{ 0.43440904467688, -0.98546330463232}, {-0.98298583762390,  0.21021524625209},
-{ 0.19513029146934, -0.94239832251867}, {-0.95476662400101,  0.98364554179143},
-{ 0.93379635304810, -0.70881994583682}, {-0.85235410573336, -0.08342347966410},
-{-0.86425093011245, -0.45795025029466}, { 0.38879779059045,  0.97274429344593},
-{ 0.92045124735495, -0.62433652524220}, { 0.89162532251878,  0.54950955570563},
-{-0.36834336949252,  0.96458298020975}, { 0.93891760988045, -0.89968353740388},
-{ 0.99267657565094, -0.03757034316958}, {-0.94063471614176,  0.41332338538963},
-{ 0.99740224117019, -0.16830494996370}, {-0.35899413170555, -0.46633226649613},
-{ 0.05237237274947, -0.25640361602661}, { 0.36703583957424, -0.38653265641875},
-{ 0.91653180367913, -0.30587628726597}, { 0.69000803499316,  0.90952171386132},
-{-0.38658751133527,  0.99501571208985}, {-0.29250814029851,  0.37444994344615},
-{-0.60182204677608,  0.86779651036123}, {-0.97418588163217,  0.96468523666475},
-{ 0.88461574003963,  0.57508405276414}, { 0.05198933055162,  0.21269661669964},
-{-0.53499621979720,  0.97241553731237}, {-0.49429560226497,  0.98183865291903},
-{-0.98935142339139, -0.40249159006933}, {-0.98081380091130, -0.72856895534041},
-{-0.27338148835532,  0.99950922447209}, { 0.06310802338302, -0.54539587529618},
-{-0.20461677199539, -0.14209977628489}, { 0.66223843141647,  0.72528579940326},
-{-0.84764345483665,  0.02372316801261}, {-0.89039863483811,  0.88866581484602},
-{ 0.95903308477986,  0.76744927173873}, { 0.73504123909879, -0.03747203173192},
-{-0.31744434966056, -0.36834111883652}, {-0.34110827591623,  0.40211222807691},
-{ 0.47803883714199, -0.39423219786288}, { 0.98299195879514,  0.01989791390047},
-{-0.30963073129751, -0.18076720599336}, { 0.99992588229018, -0.26281872094289},
-{-0.93149731080767, -0.98313162570490}, { 0.99923472302773, -0.80142993767554},
-{-0.26024169633417, -0.75999759855752}, {-0.35712514743563,  0.19298963768574},
-{-0.99899084509530,  0.74645156992493}, { 0.86557171579452,  0.55593866696299},
-{ 0.33408042438752,  0.86185953874709}, { 0.99010736374716,  0.04602397576623},
-{-0.66694269691195, -0.91643611810148}, { 0.64016792079480,  0.15649530836856},
-{ 0.99570534804836,  0.45844586038111}, {-0.63431466947340,  0.21079116459234},
-{-0.07706847005931, -0.89581437101329}, { 0.98590090577724,  0.88241721133981},
-{ 0.80099335254678, -0.36851896710853}, { 0.78368131392666,  0.45506999802597},
-{ 0.08707806671691,  0.80938994918745}, {-0.86811883080712,  0.39347308654705},
-{-0.39466529740375, -0.66809432114456}, { 0.97875325649683, -0.72467840967746},
-{-0.95038560288864,  0.89563219587625}, { 0.17005239424212,  0.54683053962658},
-{-0.76910792026848, -0.96226617549298}, { 0.99743281016846,  0.42697157037567},
-{ 0.95437383549973,  0.97002324109952}, { 0.99578905365569, -0.54106826257356},
-{ 0.28058259829990, -0.85361420634036}, { 0.85256524470573, -0.64567607735589},
-{-0.50608540105128, -0.65846015480300}, {-0.97210735183243, -0.23095213067791},
-{ 0.95424048234441, -0.99240147091219}, {-0.96926570524023,  0.73775654896574},
-{ 0.30872163214726,  0.41514960556126}, {-0.24523839572639,  0.63206633394807},
-{-0.33813265086024, -0.38661779441897}, {-0.05826828420146, -0.06940774188029},
-{-0.22898461455054,  0.97054853316316}, {-0.18509915019881,  0.47565762892084},
-{-0.10488238045009, -0.87769947402394}, {-0.71886586182037,  0.78030982480538},
-{ 0.99793873738654,  0.90041310491497}, { 0.57563307626120, -0.91034337352097},
-{ 0.28909646383717,  0.96307783970534}, { 0.42188998312520,  0.48148651230437},
-{ 0.93335049681047, -0.43537023883588}, {-0.97087374418267,  0.86636445711364},
-{ 0.36722871286923,  0.65291654172961}, {-0.81093025665696,  0.08778370229363},
-{-0.26240603062237, -0.92774095379098}, { 0.83996497984604,  0.55839849139647},
-{-0.99909615720225, -0.96024605713970}, { 0.74649464155061,  0.12144893606462},
-{-0.74774595569805, -0.26898062008959}, { 0.95781667469567, -0.79047927052628},
-{ 0.95472308713099, -0.08588776019550}, { 0.48708332746299,  0.99999041579432},
-{ 0.46332038247497,  0.10964126185063}, {-0.76497004940162,  0.89210929242238},
-{ 0.57397389364339,  0.35289703373760}, { 0.75374316974495,  0.96705214651335},
-{-0.59174397685714, -0.89405370422752}, { 0.75087906691890, -0.29612672982396},
-{-0.98607857336230,  0.25034911730023}, {-0.40761056640505, -0.90045573444695},
-{ 0.66929266740477,  0.98629493401748}, {-0.97463695257310, -0.00190223301301},
-{ 0.90145509409859,  0.99781390365446}, {-0.87259289048043,  0.99233587353666},
-{-0.91529461447692, -0.15698707534206}, {-0.03305738840705, -0.37205262859764},
-{ 0.07223051368337, -0.88805001733626}, { 0.99498012188353,  0.97094358113387},
-{-0.74904939500519,  0.99985483641521}, { 0.04585228574211,  0.99812337444082},
-{-0.89054954257993, -0.31791913188064}, {-0.83782144651251,  0.97637632547466},
-{ 0.33454804933804, -0.86231516800408}, {-0.99707579362824,  0.93237990079441},
-{-0.22827527843994,  0.18874759397997}, { 0.67248046289143, -0.03646211390569},
-{-0.05146538187944, -0.92599700120679}, { 0.99947295749905,  0.93625229707912},
-{ 0.66951124390363,  0.98905825623893}, {-0.99602956559179, -0.44654715757688},
-{ 0.82104905483590,  0.99540741724928}, { 0.99186510988782,  0.72023001312947},
-{-0.65284592392918,  0.52186723253637}, { 0.93885443798188, -0.74895312615259},
-{ 0.96735248738388,  0.90891816978629}, {-0.22225968841114,  0.57124029781228},
-{-0.44132783753414, -0.92688840659280}, {-0.85694974219574,  0.88844532719844},
-{ 0.91783042091762, -0.46356892383970}, { 0.72556974415690, -0.99899555770747},
-{-0.99711581834508,  0.58211560180426}, { 0.77638976371966,  0.94321834873819},
-{ 0.07717324253925,  0.58638399856595}, {-0.56049829194163,  0.82522301569036},
-{ 0.98398893639988,  0.39467440420569}, { 0.47546946844938,  0.68613044836811},
-{ 0.65675089314631,  0.18331637134880}, { 0.03273375457980, -0.74933109564108},
-{-0.38684144784738,  0.51337349030406}, {-0.97346267944545, -0.96549364384098},
-{-0.53282156061942, -0.91423265091354}, { 0.99817310731176,  0.61133572482148},
-{-0.50254500772635, -0.88829338134294}, { 0.01995873238855,  0.85223515096765},
-{ 0.99930381973804,  0.94578896296649}, { 0.82907767600783, -0.06323442598128},
-{-0.58660709669728,  0.96840773806582}, {-0.17573736667267, -0.48166920859485},
-{ 0.83434292401346, -0.13023450646997}, { 0.05946491307025,  0.20511047074866},
-{ 0.81505484574602, -0.94685947861369}, {-0.44976380954860,  0.40894572671545},
-{-0.89746474625671,  0.99846578838537}, { 0.39677256130792, -0.74854668609359},
-{-0.07588948563079,  0.74096214084170}, { 0.76343198951445,  0.41746629422634},
-{-0.74490104699626,  0.94725911744610}, { 0.64880119792759,  0.41336660830571},
-{ 0.62319537462542, -0.93098313552599}, { 0.42215817594807, -0.07712787385208},
-{ 0.02704554141885, -0.05417518053666}, { 0.80001773566818,  0.91542195141039},
-{-0.79351832348816, -0.36208897989136}, { 0.63872359151636,  0.08128252493444},
-{ 0.52890520960295,  0.60048872455592}, { 0.74238552914587,  0.04491915291044},
-{ 0.99096131449250, -0.19451182854402}, {-0.80412329643109, -0.88513818199457},
-{-0.64612616129736,  0.72198674804544}, { 0.11657770663191, -0.83662833815041},
-{-0.95053182488101, -0.96939905138082}, {-0.62228872928622,  0.82767262846661},
-{ 0.03004475787316, -0.99738896333384}, {-0.97987214341034,  0.36526129686425},
-{-0.99986980746200, -0.36021610299715}, { 0.89110648599879, -0.97894250343044},
-{ 0.10407960510582,  0.77357793811619}, { 0.95964737821728, -0.35435818285502},
-{ 0.50843233159162,  0.96107691266205}, { 0.17006334670615, -0.76854025314829},
-{ 0.25872675063360,  0.99893303933816}, {-0.01115998681937,  0.98496019742444},
-{-0.79598702973261,  0.97138411318894}, {-0.99264708948101, -0.99542822402536},
-{-0.99829663752818,  0.01877138824311}, {-0.70801016548184,  0.33680685948117},
-{-0.70467057786826,  0.93272777501857}, { 0.99846021905254, -0.98725746254433},
-{-0.63364968534650, -0.16473594423746}, {-0.16258217500792, -0.95939125400802},
-{-0.43645594360633, -0.94805030113284}, {-0.99848471702976,  0.96245166923809},
-{-0.16796458968998, -0.98987511890470}, {-0.87979225745213, -0.71725725041680},
-{ 0.44183099021786, -0.93568974498761}, { 0.93310180125532, -0.99913308068246},
-{-0.93941931782002, -0.56409379640356}, {-0.88590003188677,  0.47624600491382},
-{ 0.99971463703691, -0.83889954253462}, {-0.75376385639978,  0.00814643438625},
-{ 0.93887685615875, -0.11284528204636}, { 0.85126435782309,  0.52349251543547},
-{ 0.39701421446381,  0.81779634174316}, {-0.37024464187437, -0.87071656222959},
-{-0.36024828242896,  0.34655735648287}, {-0.93388812549209, -0.84476541096429},
-{-0.65298804552119, -0.18439575450921}, { 0.11960319006843,  0.99899346780168},
-{ 0.94292565553160,  0.83163906518293}, { 0.75081145286948, -0.35533223142265},
-{ 0.56721979748394, -0.24076836414499}, { 0.46857766746029, -0.30140233457198},
-{ 0.97312313923635, -0.99548191630031}, {-0.38299976567017,  0.98516909715427},
-{ 0.41025800019463,  0.02116736935734}, { 0.09638062008048,  0.04411984381457},
-{-0.85283249275397,  0.91475563922421}, { 0.88866808958124, -0.99735267083226},
-{-0.48202429536989, -0.96805608884164}, { 0.27572582416567,  0.58634753335832},
-{-0.65889129659168,  0.58835634138583}, { 0.98838086953732,  0.99994349600236},
-{-0.20651349620689,  0.54593044066355}, {-0.62126416356920, -0.59893681700392},
-{ 0.20320105410437, -0.86879180355289}, {-0.97790548600584,  0.96290806999242},
-{ 0.11112534735126,  0.21484763313301}, {-0.41368337314182,  0.28216837680365},
-{ 0.24133038992960,  0.51294362630238}, {-0.66393410674885, -0.08249679629081},
-{-0.53697829178752, -0.97649903936228}, {-0.97224737889348,  0.22081333579837},
-{ 0.87392477144549, -0.12796173740361}, { 0.19050361015753,  0.01602615387195},
-{-0.46353441212724, -0.95249041539006}, {-0.07064096339021, -0.94479803205886},
-{-0.92444085484466, -0.10457590187436}, {-0.83822593578728, -0.01695043208885},
-{ 0.75214681811150, -0.99955681042665}, {-0.42102998829339,  0.99720941999394},
-{-0.72094786237696, -0.35008961934255}, { 0.78843311019251,  0.52851398958271},
-{ 0.97394027897442, -0.26695944086561}, { 0.99206463477946, -0.57010120849429},
-{ 0.76789609461795, -0.76519356730966}, {-0.82002421836409, -0.73530179553767},
-{ 0.81924990025724,  0.99698425250579}, {-0.26719850873357,  0.68903369776193},
-{-0.43311260380975,  0.85321815947490}, { 0.99194979673836,  0.91876249766422},
-{-0.80692001248487, -0.32627540663214}, { 0.43080003649976, -0.21919095636638},
-{ 0.67709491937357, -0.95478075822906}, { 0.56151770568316, -0.70693811747778},
-{ 0.10831862810749, -0.08628837174592}, { 0.91229417540436, -0.65987351408410},
-{-0.48972893932274,  0.56289246362686}, {-0.89033658689697, -0.71656563987082},
-{ 0.65269447475094,  0.65916004833932}, { 0.67439478141121, -0.81684380846796},
-{-0.47770832416973, -0.16789556203025}, {-0.99715979260878, -0.93565784007648},
-{-0.90889593602546,  0.62034397054380}, {-0.06618622548177, -0.23812217221359},
-{ 0.99430266919728,  0.18812555317553}, { 0.97686402381843, -0.28664534366620},
-{ 0.94813650221268, -0.97506640027128}, {-0.95434497492853, -0.79607978501983},
-{-0.49104783137150,  0.32895214359663}, { 0.99881175120751,  0.88993983831354},
-{ 0.50449166760303, -0.85995072408434}, { 0.47162891065108, -0.18680204049569},
-{-0.62081581361840,  0.75000676218956}, {-0.43867015250812,  0.99998069244322},
-{ 0.98630563232075, -0.53578899600662}, {-0.61510362277374, -0.89515019899997},
-{-0.03841517601843, -0.69888815681179}, {-0.30102157304644, -0.07667808922205},
-{ 0.41881284182683,  0.02188098922282}, {-0.86135454941237,  0.98947480909359},
-{ 0.67226861393788, -0.13494389011014}, {-0.70737398842068, -0.76547349325992},
-{ 0.94044946687963,  0.09026201157416}, {-0.82386352534327,  0.08924768823676},
-{-0.32070666698656,  0.50143421908753}, { 0.57593163224487, -0.98966422921509},
-{-0.36326018419965,  0.07440243123228}, { 0.99979044674350, -0.14130287347405},
-{-0.92366023326932, -0.97979298068180}, {-0.44607178518598, -0.54233252016394},
-{ 0.44226800932956,  0.71326756742752}, { 0.03671907158312,  0.63606389366675},
-{ 0.52175424682195, -0.85396826735705}, {-0.94701139690956, -0.01826348194255},
-{-0.98759606946049,  0.82288714303073}, { 0.87434794743625,  0.89399495655433},
-{-0.93412041758744,  0.41374052024363}, { 0.96063943315511,  0.93116709541280},
-{ 0.97534253457837,  0.86150930812689}, { 0.99642466504163,  0.70190043427512},
-{-0.94705089665984, -0.29580042814306}, { 0.91599807087376, -0.98147830385781},
+const DECLARE_ALIGNED(16, INTFLOAT, AAC_RENAME(ff_sbr_noise_table))[][2] = {
+{Q31(-0.99948153278296f), Q31(-0.59483417516607f)}, {Q31( 0.97113454393991f), Q31(-0.67528515225647f)},
+{Q31( 0.14130051758487f), Q31(-0.95090983575689f)}, {Q31(-0.47005496701697f), Q31(-0.37340549728647f)},
+{Q31( 0.80705063769351f), Q31( 0.29653668284408f)}, {Q31(-0.38981478896926f), Q31( 0.89572605717087f)},
+{Q31(-0.01053049862020f), Q31(-0.66959058036166f)}, {Q31(-0.91266367957293f), Q31(-0.11522938140034f)},
+{Q31( 0.54840422910309f), Q31( 0.75221367176302f)}, {Q31( 0.40009252867955f), Q31(-0.98929400334421f)},
+{Q31(-0.99867974711855f), Q31(-0.88147068645358f)}, {Q31(-0.95531076805040f), Q31( 0.90908757154593f)},
+{Q31(-0.45725933317144f), Q31(-0.56716323646760f)}, {Q31(-0.72929675029275f), Q31(-0.98008272727324f)},
+{Q31( 0.75622801399036f), Q31( 0.20950329995549f)}, {Q31( 0.07069442601050f), Q31(-0.78247898470706f)},
+{Q31( 0.74496252926055f), Q31(-0.91169004445807f)}, {Q31(-0.96440182703856f), Q31(-0.94739918296622f)},
+{Q31( 0.30424629369539f), Q31(-0.49438267012479f)}, {Q31( 0.66565033746925f), Q31( 0.64652935542491f)},
+{Q31( 0.91697008020594f), Q31( 0.17514097332009f)}, {Q31(-0.70774918760427f), Q31( 0.52548653416543f)},
+{Q31(-0.70051415345560f), Q31(-0.45340028808763f)}, {Q31(-0.99496513054797f), Q31(-0.90071908066973f)},
+{Q31( 0.98164490790123f), Q31(-0.77463155528697f)}, {Q31(-0.54671580548181f), Q31(-0.02570928536004f)},
+{Q31(-0.01689629065389f), Q31( 0.00287506445732f)}, {Q31(-0.86110349531986f), Q31( 0.42548583726477f)},
+{Q31(-0.98892980586032f), Q31(-0.87881132267556f)}, {Q31( 0.51756627678691f), Q31( 0.66926784710139f)},
+{Q31(-0.99635026409640f), Q31(-0.58107730574765f)}, {Q31(-0.99969370862163f), Q31( 0.98369989360250f)},
+{Q31( 0.55266258627194f), Q31( 0.59449057465591f)}, {Q31( 0.34581177741673f), Q31( 0.94879421061866f)},
+{Q31( 0.62664209577999f), Q31(-0.74402970906471f)}, {Q31(-0.77149701404973f), Q31(-0.33883658042801f)},
+{Q31(-0.91592244254432f), Q31( 0.03687901376713f)}, {Q31(-0.76285492357887f), Q31(-0.91371867919124f)},
+{Q31( 0.79788337195331f), Q31(-0.93180971199849f)}, {Q31( 0.54473080610200f), Q31(-0.11919206037186f)},
+{Q31(-0.85639281671058f), Q31( 0.42429854760451f)}, {Q31(-0.92882402971423f), Q31( 0.27871809078609f)},
+{Q31(-0.11708371046774f), Q31(-0.99800843444966f)}, {Q31( 0.21356749817493f), Q31(-0.90716295627033f)},
+{Q31(-0.76191692573909f), Q31( 0.99768118356265f)}, {Q31( 0.98111043100884f), Q31(-0.95854459734407f)},
+{Q31(-0.85913269895572f), Q31( 0.95766566168880f)}, {Q31(-0.93307242253692f), Q31( 0.49431757696466f)},
+{Q31( 0.30485754879632f), Q31(-0.70540034357529f)}, {Q31( 0.85289650925190f), Q31( 0.46766131791044f)},
+{Q31( 0.91328082618125f), Q31(-0.99839597361769f)}, {Q31(-0.05890199924154f), Q31( 0.70741827819497f)},
+{Q31( 0.28398686150148f), Q31( 0.34633555702188f)}, {Q31( 0.95258164539612f), Q31(-0.54893416026939f)},
+{Q31(-0.78566324168507f), Q31(-0.75568541079691f)}, {Q31(-0.95789495447877f), Q31(-0.20423194696966f)},
+{Q31( 0.82411158711197f), Q31( 0.96654618432562f)}, {Q31(-0.65185446735885f), Q31(-0.88734990773289f)},
+{Q31(-0.93643603134666f), Q31( 0.99870790442385f)}, {Q31( 0.91427159529618f), Q31(-0.98290505544444f)},
+{Q31(-0.70395684036886f), Q31( 0.58796798221039f)}, {Q31( 0.00563771969365f), Q31( 0.61768196727244f)},
+{Q31( 0.89065051931895f), Q31( 0.52783352697585f)}, {Q31(-0.68683707712762f), Q31( 0.80806944710339f)},
+{Q31( 0.72165342518718f), Q31(-0.69259857349564f)}, {Q31(-0.62928247730667f), Q31( 0.13627037407335f)},
+{Q31( 0.29938434065514f), Q31(-0.46051329682246f)}, {Q31(-0.91781958879280f), Q31(-0.74012716684186f)},
+{Q31( 0.99298717043688f), Q31( 0.40816610075661f)}, {Q31( 0.82368298622748f), Q31(-0.74036047190173f)},
+{Q31(-0.98512833386833f), Q31(-0.99972330709594f)}, {Q31(-0.95915368242257f), Q31(-0.99237800466040f)},
+{Q31(-0.21411126572790f), Q31(-0.93424819052545f)}, {Q31(-0.68821476106884f), Q31(-0.26892306315457f)},
+{Q31( 0.91851997982317f), Q31( 0.09358228901785f)}, {Q31(-0.96062769559127f), Q31( 0.36099095133739f)},
+{Q31( 0.51646184922287f), Q31(-0.71373332873917f)}, {Q31( 0.61130721139669f), Q31( 0.46950141175917f)},
+{Q31( 0.47336129371299f), Q31(-0.27333178296162f)}, {Q31( 0.90998308703519f), Q31( 0.96715662938132f)},
+{Q31( 0.44844799194357f), Q31( 0.99211574628306f)}, {Q31( 0.66614891079092f), Q31( 0.96590176169121f)},
+{Q31( 0.74922239129237f), Q31(-0.89879858826087f)}, {Q31(-0.99571588506485f), Q31( 0.52785521494349f)},
+{Q31( 0.97401082477563f), Q31(-0.16855870075190f)}, {Q31( 0.72683747733879f), Q31(-0.48060774432251f)},
+{Q31( 0.95432193457128f), Q31( 0.68849603408441f)}, {Q31(-0.72962208425191f), Q31(-0.76608443420917f)},
+{Q31(-0.85359479233537f), Q31( 0.88738125901579f)}, {Q31(-0.81412430338535f), Q31(-0.97480768049637f)},
+{Q31(-0.87930772356786f), Q31( 0.74748307690436f)}, {Q31(-0.71573331064977f), Q31(-0.98570608178923f)},
+{Q31( 0.83524300028228f), Q31( 0.83702537075163f)}, {Q31(-0.48086065601423f), Q31(-0.98848504923531f)},
+{Q31( 0.97139128574778f), Q31( 0.80093621198236f)}, {Q31( 0.51992825347895f), Q31( 0.80247631400510f)},
+{Q31(-0.00848591195325f), Q31(-0.76670128000486f)}, {Q31(-0.70294374303036f), Q31( 0.55359910445577f)},
+{Q31(-0.95894428168140f), Q31(-0.43265504344783f)}, {Q31( 0.97079252950321f), Q31( 0.09325857238682f)},
+{Q31(-0.92404293670797f), Q31( 0.85507704027855f)}, {Q31(-0.69506469500450f), Q31( 0.98633412625459f)},
+{Q31( 0.26559203620024f), Q31( 0.73314307966524f)}, {Q31( 0.28038443336943f), Q31( 0.14537913654427f)},
+{Q31(-0.74138124825523f), Q31( 0.99310339807762f)}, {Q31(-0.01752795995444f), Q31(-0.82616635284178f)},
+{Q31(-0.55126773094930f), Q31(-0.98898543862153f)}, {Q31( 0.97960898850996f), Q31(-0.94021446752851f)},
+{Q31(-0.99196309146936f), Q31( 0.67019017358456f)}, {Q31(-0.67684928085260f), Q31( 0.12631491649378f)},
+{Q31( 0.09140039465500f), Q31(-0.20537731453108f)}, {Q31(-0.71658965751996f), Q31(-0.97788200391224f)},
+{Q31( 0.81014640078925f), Q31( 0.53722648362443f)}, {Q31( 0.40616991671205f), Q31(-0.26469008598449f)},
+{Q31(-0.67680188682972f), Q31( 0.94502052337695f)}, {Q31( 0.86849774348749f), Q31(-0.18333598647899f)},
+{Q31(-0.99500381284851f), Q31(-0.02634122068550f)}, {Q31( 0.84329189340667f), Q31( 0.10406957462213f)},
+{Q31(-0.09215968531446f), Q31( 0.69540012101253f)}, {Q31( 0.99956173327206f), Q31(-0.12358542001404f)},
+{Q31(-0.79732779473535f), Q31(-0.91582524736159f)}, {Q31( 0.96349973642406f), Q31( 0.96640458041000f)},
+{Q31(-0.79942778496547f), Q31( 0.64323902822857f)}, {Q31(-0.11566039853896f), Q31( 0.28587846253726f)},
+{Q31(-0.39922954514662f), Q31( 0.94129601616966f)}, {Q31( 0.99089197565987f), Q31(-0.92062625581587f)},
+{Q31( 0.28631285179909f), Q31(-0.91035047143603f)}, {Q31(-0.83302725605608f), Q31(-0.67330410892084f)},
+{Q31( 0.95404443402072f), Q31( 0.49162765398743f)}, {Q31(-0.06449863579434f), Q31( 0.03250560813135f)},
+{Q31(-0.99575054486311f), Q31( 0.42389784469507f)}, {Q31(-0.65501142790847f), Q31( 0.82546114655624f)},
+{Q31(-0.81254441908887f), Q31(-0.51627234660629f)}, {Q31(-0.99646369485481f), Q31( 0.84490533520752f)},
+{Q31( 0.00287840603348f), Q31( 0.64768261158166f)}, {Q31( 0.70176989408455f), Q31(-0.20453028573322f)},
+{Q31( 0.96361882270190f), Q31( 0.40706967140989f)}, {Q31(-0.68883758192426f), Q31( 0.91338958840772f)},
+{Q31(-0.34875585502238f), Q31( 0.71472290693300f)}, {Q31( 0.91980081243087f), Q31( 0.66507455644919f)},
+{Q31(-0.99009048343881f), Q31( 0.85868021604848f)}, {Q31( 0.68865791458395f), Q31( 0.55660316809678f)},
+{Q31(-0.99484402129368f), Q31(-0.20052559254934f)}, {Q31( 0.94214511408023f), Q31(-0.99696425367461f)},
+{Q31(-0.67414626793544f), Q31( 0.49548221180078f)}, {Q31(-0.47339353684664f), Q31(-0.85904328834047f)},
+{Q31( 0.14323651387360f), Q31(-0.94145598222488f)}, {Q31(-0.29268293575672f), Q31( 0.05759224927952f)},
+{Q31( 0.43793861458754f), Q31(-0.78904969892724f)}, {Q31(-0.36345126374441f), Q31( 0.64874435357162f)},
+{Q31(-0.08750604656825f), Q31( 0.97686944362527f)}, {Q31(-0.96495267812511f), Q31(-0.53960305946511f)},
+{Q31( 0.55526940659947f), Q31( 0.78891523734774f)}, {Q31( 0.73538215752630f), Q31( 0.96452072373404f)},
+{Q31(-0.30889773919437f), Q31(-0.80664389776860f)}, {Q31( 0.03574995626194f), Q31(-0.97325616900959f)},
+{Q31( 0.98720684660488f), Q31( 0.48409133691962f)}, {Q31(-0.81689296271203f), Q31(-0.90827703628298f)},
+{Q31( 0.67866860118215f), Q31( 0.81284503870856f)}, {Q31(-0.15808569732583f), Q31( 0.85279555024382f)},
+{Q31( 0.80723395114371f), Q31(-0.24717418514605f)}, {Q31( 0.47788757329038f), Q31(-0.46333147839295f)},
+{Q31( 0.96367554763201f), Q31( 0.38486749303242f)}, {Q31(-0.99143875716818f), Q31(-0.24945277239809f)},
+{Q31( 0.83081876925833f), Q31(-0.94780851414763f)}, {Q31(-0.58753191905341f), Q31( 0.01290772389163f)},
+{Q31( 0.95538108220960f), Q31(-0.85557052096538f)}, {Q31(-0.96490920476211f), Q31(-0.64020970923102f)},
+{Q31(-0.97327101028521f), Q31( 0.12378128133110f)}, {Q31( 0.91400366022124f), Q31( 0.57972471346930f)},
+{Q31(-0.99925837363824f), Q31( 0.71084847864067f)}, {Q31(-0.86875903507313f), Q31(-0.20291699203564f)},
+{Q31(-0.26240034795124f), Q31(-0.68264554369108f)}, {Q31(-0.24664412953388f), Q31(-0.87642273115183f)},
+{Q31( 0.02416275806869f), Q31( 0.27192914288905f)}, {Q31( 0.82068619590515f), Q31(-0.85087787994476f)},
+{Q31( 0.88547373760759f), Q31(-0.89636802901469f)}, {Q31(-0.18173078152226f), Q31(-0.26152145156800f)},
+{Q31( 0.09355476558534f), Q31( 0.54845123045604f)}, {Q31(-0.54668414224090f), Q31( 0.95980774020221f)},
+{Q31( 0.37050990604091f), Q31(-0.59910140383171f)}, {Q31(-0.70373594262891f), Q31( 0.91227665827081f)},
+{Q31(-0.34600785879594f), Q31(-0.99441426144200f)}, {Q31(-0.68774481731008f), Q31(-0.30238837956299f)},
+{Q31(-0.26843291251234f), Q31( 0.83115668004362f)}, {Q31( 0.49072334613242f), Q31(-0.45359708737775f)},
+{Q31( 0.38975993093975f), Q31( 0.95515358099121f)}, {Q31(-0.97757125224150f), Q31( 0.05305894580606f)},
+{Q31(-0.17325552859616f), Q31(-0.92770672250494f)}, {Q31( 0.99948035025744f), Q31( 0.58285545563426f)},
+{Q31(-0.64946246527458f), Q31( 0.68645507104960f)}, {Q31(-0.12016920576437f), Q31(-0.57147322153312f)},
+{Q31(-0.58947456517751f), Q31(-0.34847132454388f)}, {Q31(-0.41815140454465f), Q31( 0.16276422358861f)},
+{Q31( 0.99885650204884f), Q31( 0.11136095490444f)}, {Q31(-0.56649614128386f), Q31(-0.90494866361587f)},
+{Q31( 0.94138021032330f), Q31( 0.35281916733018f)}, {Q31(-0.75725076534641f), Q31( 0.53650549640587f)},
+{Q31( 0.20541973692630f), Q31(-0.94435144369918f)}, {Q31( 0.99980371023351f), Q31( 0.79835913565599f)},
+{Q31( 0.29078277605775f), Q31( 0.35393777921520f)}, {Q31(-0.62858772103030f), Q31( 0.38765693387102f)},
+{Q31( 0.43440904467688f), Q31(-0.98546330463232f)}, {Q31(-0.98298583762390f), Q31( 0.21021524625209f)},
+{Q31( 0.19513029146934f), Q31(-0.94239832251867f)}, {Q31(-0.95476662400101f), Q31( 0.98364554179143f)},
+{Q31( 0.93379635304810f), Q31(-0.70881994583682f)}, {Q31(-0.85235410573336f), Q31(-0.08342347966410f)},
+{Q31(-0.86425093011245f), Q31(-0.45795025029466f)}, {Q31( 0.38879779059045f), Q31( 0.97274429344593f)},
+{Q31( 0.92045124735495f), Q31(-0.62433652524220f)}, {Q31( 0.89162532251878f), Q31( 0.54950955570563f)},
+{Q31(-0.36834336949252f), Q31( 0.96458298020975f)}, {Q31( 0.93891760988045f), Q31(-0.89968353740388f)},
+{Q31( 0.99267657565094f), Q31(-0.03757034316958f)}, {Q31(-0.94063471614176f), Q31( 0.41332338538963f)},
+{Q31( 0.99740224117019f), Q31(-0.16830494996370f)}, {Q31(-0.35899413170555f), Q31(-0.46633226649613f)},
+{Q31( 0.05237237274947f), Q31(-0.25640361602661f)}, {Q31( 0.36703583957424f), Q31(-0.38653265641875f)},
+{Q31( 0.91653180367913f), Q31(-0.30587628726597f)}, {Q31( 0.69000803499316f), Q31( 0.90952171386132f)},
+{Q31(-0.38658751133527f), Q31( 0.99501571208985f)}, {Q31(-0.29250814029851f), Q31( 0.37444994344615f)},
+{Q31(-0.60182204677608f), Q31( 0.86779651036123f)}, {Q31(-0.97418588163217f), Q31( 0.96468523666475f)},
+{Q31( 0.88461574003963f), Q31( 0.57508405276414f)}, {Q31( 0.05198933055162f), Q31( 0.21269661669964f)},
+{Q31(-0.53499621979720f), Q31( 0.97241553731237f)}, {Q31(-0.49429560226497f), Q31( 0.98183865291903f)},
+{Q31(-0.98935142339139f), Q31(-0.40249159006933f)}, {Q31(-0.98081380091130f), Q31(-0.72856895534041f)},
+{Q31(-0.27338148835532f), Q31( 0.99950922447209f)}, {Q31( 0.06310802338302f), Q31(-0.54539587529618f)},
+{Q31(-0.20461677199539f), Q31(-0.14209977628489f)}, {Q31( 0.66223843141647f), Q31( 0.72528579940326f)},
+{Q31(-0.84764345483665f), Q31( 0.02372316801261f)}, {Q31(-0.89039863483811f), Q31( 0.88866581484602f)},
+{Q31( 0.95903308477986f), Q31( 0.76744927173873f)}, {Q31( 0.73504123909879f), Q31(-0.03747203173192f)},
+{Q31(-0.31744434966056f), Q31(-0.36834111883652f)}, {Q31(-0.34110827591623f), Q31( 0.40211222807691f)},
+{Q31( 0.47803883714199f), Q31(-0.39423219786288f)}, {Q31( 0.98299195879514f), Q31( 0.01989791390047f)},
+{Q31(-0.30963073129751f), Q31(-0.18076720599336f)}, {Q31( 0.99992588229018f), Q31(-0.26281872094289f)},
+{Q31(-0.93149731080767f), Q31(-0.98313162570490f)}, {Q31( 0.99923472302773f), Q31(-0.80142993767554f)},
+{Q31(-0.26024169633417f), Q31(-0.75999759855752f)}, {Q31(-0.35712514743563f), Q31( 0.19298963768574f)},
+{Q31(-0.99899084509530f), Q31( 0.74645156992493f)}, {Q31( 0.86557171579452f), Q31( 0.55593866696299f)},
+{Q31( 0.33408042438752f), Q31( 0.86185953874709f)}, {Q31( 0.99010736374716f), Q31( 0.04602397576623f)},
+{Q31(-0.66694269691195f), Q31(-0.91643611810148f)}, {Q31( 0.64016792079480f), Q31( 0.15649530836856f)},
+{Q31( 0.99570534804836f), Q31( 0.45844586038111f)}, {Q31(-0.63431466947340f), Q31( 0.21079116459234f)},
+{Q31(-0.07706847005931f), Q31(-0.89581437101329f)}, {Q31( 0.98590090577724f), Q31( 0.88241721133981f)},
+{Q31( 0.80099335254678f), Q31(-0.36851896710853f)}, {Q31( 0.78368131392666f), Q31( 0.45506999802597f)},
+{Q31( 0.08707806671691f), Q31( 0.80938994918745f)}, {Q31(-0.86811883080712f), Q31( 0.39347308654705f)},
+{Q31(-0.39466529740375f), Q31(-0.66809432114456f)}, {Q31( 0.97875325649683f), Q31(-0.72467840967746f)},
+{Q31(-0.95038560288864f), Q31( 0.89563219587625f)}, {Q31( 0.17005239424212f), Q31( 0.54683053962658f)},
+{Q31(-0.76910792026848f), Q31(-0.96226617549298f)}, {Q31( 0.99743281016846f), Q31( 0.42697157037567f)},
+{Q31( 0.95437383549973f), Q31( 0.97002324109952f)}, {Q31( 0.99578905365569f), Q31(-0.54106826257356f)},
+{Q31( 0.28058259829990f), Q31(-0.85361420634036f)}, {Q31( 0.85256524470573f), Q31(-0.64567607735589f)},
+{Q31(-0.50608540105128f), Q31(-0.65846015480300f)}, {Q31(-0.97210735183243f), Q31(-0.23095213067791f)},
+{Q31( 0.95424048234441f), Q31(-0.99240147091219f)}, {Q31(-0.96926570524023f), Q31( 0.73775654896574f)},
+{Q31( 0.30872163214726f), Q31( 0.41514960556126f)}, {Q31(-0.24523839572639f), Q31( 0.63206633394807f)},
+{Q31(-0.33813265086024f), Q31(-0.38661779441897f)}, {Q31(-0.05826828420146f), Q31(-0.06940774188029f)},
+{Q31(-0.22898461455054f), Q31( 0.97054853316316f)}, {Q31(-0.18509915019881f), Q31( 0.47565762892084f)},
+{Q31(-0.10488238045009f), Q31(-0.87769947402394f)}, {Q31(-0.71886586182037f), Q31( 0.78030982480538f)},
+{Q31( 0.99793873738654f), Q31( 0.90041310491497f)}, {Q31( 0.57563307626120f), Q31(-0.91034337352097f)},
+{Q31( 0.28909646383717f), Q31( 0.96307783970534f)}, {Q31( 0.42188998312520f), Q31( 0.48148651230437f)},
+{Q31( 0.93335049681047f), Q31(-0.43537023883588f)}, {Q31(-0.97087374418267f), Q31( 0.86636445711364f)},
+{Q31( 0.36722871286923f), Q31( 0.65291654172961f)}, {Q31(-0.81093025665696f), Q31( 0.08778370229363f)},
+{Q31(-0.26240603062237f), Q31(-0.92774095379098f)}, {Q31( 0.83996497984604f), Q31( 0.55839849139647f)},
+{Q31(-0.99909615720225f), Q31(-0.96024605713970f)}, {Q31( 0.74649464155061f), Q31( 0.12144893606462f)},
+{Q31(-0.74774595569805f), Q31(-0.26898062008959f)}, {Q31( 0.95781667469567f), Q31(-0.79047927052628f)},
+{Q31( 0.95472308713099f), Q31(-0.08588776019550f)}, {Q31( 0.48708332746299f), Q31( 0.99999041579432f)},
+{Q31( 0.46332038247497f), Q31( 0.10964126185063f)}, {Q31(-0.76497004940162f), Q31( 0.89210929242238f)},
+{Q31( 0.57397389364339f), Q31( 0.35289703373760f)}, {Q31( 0.75374316974495f), Q31( 0.96705214651335f)},
+{Q31(-0.59174397685714f), Q31(-0.89405370422752f)}, {Q31( 0.75087906691890f), Q31(-0.29612672982396f)},
+{Q31(-0.98607857336230f), Q31( 0.25034911730023f)}, {Q31(-0.40761056640505f), Q31(-0.90045573444695f)},
+{Q31( 0.66929266740477f), Q31( 0.98629493401748f)}, {Q31(-0.97463695257310f), Q31(-0.00190223301301f)},
+{Q31( 0.90145509409859f), Q31( 0.99781390365446f)}, {Q31(-0.87259289048043f), Q31( 0.99233587353666f)},
+{Q31(-0.91529461447692f), Q31(-0.15698707534206f)}, {Q31(-0.03305738840705f), Q31(-0.37205262859764f)},
+{Q31( 0.07223051368337f), Q31(-0.88805001733626f)}, {Q31( 0.99498012188353f), Q31( 0.97094358113387f)},
+{Q31(-0.74904939500519f), Q31( 0.99985483641521f)}, {Q31( 0.04585228574211f), Q31( 0.99812337444082f)},
+{Q31(-0.89054954257993f), Q31(-0.31791913188064f)}, {Q31(-0.83782144651251f), Q31( 0.97637632547466f)},
+{Q31( 0.33454804933804f), Q31(-0.86231516800408f)}, {Q31(-0.99707579362824f), Q31( 0.93237990079441f)},
+{Q31(-0.22827527843994f), Q31( 0.18874759397997f)}, {Q31( 0.67248046289143f), Q31(-0.03646211390569f)},
+{Q31(-0.05146538187944f), Q31(-0.92599700120679f)}, {Q31( 0.99947295749905f), Q31( 0.93625229707912f)},
+{Q31( 0.66951124390363f), Q31( 0.98905825623893f)}, {Q31(-0.99602956559179f), Q31(-0.44654715757688f)},
+{Q31( 0.82104905483590f), Q31( 0.99540741724928f)}, {Q31( 0.99186510988782f), Q31( 0.72023001312947f)},
+{Q31(-0.65284592392918f), Q31( 0.52186723253637f)}, {Q31( 0.93885443798188f), Q31(-0.74895312615259f)},
+{Q31( 0.96735248738388f), Q31( 0.90891816978629f)}, {Q31(-0.22225968841114f), Q31( 0.57124029781228f)},
+{Q31(-0.44132783753414f), Q31(-0.92688840659280f)}, {Q31(-0.85694974219574f), Q31( 0.88844532719844f)},
+{Q31( 0.91783042091762f), Q31(-0.46356892383970f)}, {Q31( 0.72556974415690f), Q31(-0.99899555770747f)},
+{Q31(-0.99711581834508f), Q31( 0.58211560180426f)}, {Q31( 0.77638976371966f), Q31( 0.94321834873819f)},
+{Q31( 0.07717324253925f), Q31( 0.58638399856595f)}, {Q31(-0.56049829194163f), Q31( 0.82522301569036f)},
+{Q31( 0.98398893639988f), Q31( 0.39467440420569f)}, {Q31( 0.47546946844938f), Q31( 0.68613044836811f)},
+{Q31( 0.65675089314631f), Q31( 0.18331637134880f)}, {Q31( 0.03273375457980f), Q31(-0.74933109564108f)},
+{Q31(-0.38684144784738f), Q31( 0.51337349030406f)}, {Q31(-0.97346267944545f), Q31(-0.96549364384098f)},
+{Q31(-0.53282156061942f), Q31(-0.91423265091354f)}, {Q31( 0.99817310731176f), Q31( 0.61133572482148f)},
+{Q31(-0.50254500772635f), Q31(-0.88829338134294f)}, {Q31( 0.01995873238855f), Q31( 0.85223515096765f)},
+{Q31( 0.99930381973804f), Q31( 0.94578896296649f)}, {Q31( 0.82907767600783f), Q31(-0.06323442598128f)},
+{Q31(-0.58660709669728f), Q31( 0.96840773806582f)}, {Q31(-0.17573736667267f), Q31(-0.48166920859485f)},
+{Q31( 0.83434292401346f), Q31(-0.13023450646997f)}, {Q31( 0.05946491307025f), Q31( 0.20511047074866f)},
+{Q31( 0.81505484574602f), Q31(-0.94685947861369f)}, {Q31(-0.44976380954860f), Q31( 0.40894572671545f)},
+{Q31(-0.89746474625671f), Q31( 0.99846578838537f)}, {Q31( 0.39677256130792f), Q31(-0.74854668609359f)},
+{Q31(-0.07588948563079f), Q31( 0.74096214084170f)}, {Q31( 0.76343198951445f), Q31( 0.41746629422634f)},
+{Q31(-0.74490104699626f), Q31( 0.94725911744610f)}, {Q31( 0.64880119792759f), Q31( 0.41336660830571f)},
+{Q31( 0.62319537462542f), Q31(-0.93098313552599f)}, {Q31( 0.42215817594807f), Q31(-0.07712787385208f)},
+{Q31( 0.02704554141885f), Q31(-0.05417518053666f)}, {Q31( 0.80001773566818f), Q31( 0.91542195141039f)},
+{Q31(-0.79351832348816f), Q31(-0.36208897989136f)}, {Q31( 0.63872359151636f), Q31( 0.08128252493444f)},
+{Q31( 0.52890520960295f), Q31( 0.60048872455592f)}, {Q31( 0.74238552914587f), Q31( 0.04491915291044f)},
+{Q31( 0.99096131449250f), Q31(-0.19451182854402f)}, {Q31(-0.80412329643109f), Q31(-0.88513818199457f)},
+{Q31(-0.64612616129736f), Q31( 0.72198674804544f)}, {Q31( 0.11657770663191f), Q31(-0.83662833815041f)},
+{Q31(-0.95053182488101f), Q31(-0.96939905138082f)}, {Q31(-0.62228872928622f), Q31( 0.82767262846661f)},
+{Q31( 0.03004475787316f), Q31(-0.99738896333384f)}, {Q31(-0.97987214341034f), Q31( 0.36526129686425f)},
+{Q31(-0.99986980746200f), Q31(-0.36021610299715f)}, {Q31( 0.89110648599879f), Q31(-0.97894250343044f)},
+{Q31( 0.10407960510582f), Q31( 0.77357793811619f)}, {Q31( 0.95964737821728f), Q31(-0.35435818285502f)},
+{Q31( 0.50843233159162f), Q31( 0.96107691266205f)}, {Q31( 0.17006334670615f), Q31(-0.76854025314829f)},
+{Q31( 0.25872675063360f), Q31( 0.99893303933816f)}, {Q31(-0.01115998681937f), Q31( 0.98496019742444f)},
+{Q31(-0.79598702973261f), Q31( 0.97138411318894f)}, {Q31(-0.99264708948101f), Q31(-0.99542822402536f)},
+{Q31(-0.99829663752818f), Q31( 0.01877138824311f)}, {Q31(-0.70801016548184f), Q31( 0.33680685948117f)},
+{Q31(-0.70467057786826f), Q31( 0.93272777501857f)}, {Q31( 0.99846021905254f), Q31(-0.98725746254433f)},
+{Q31(-0.63364968534650f), Q31(-0.16473594423746f)}, {Q31(-0.16258217500792f), Q31(-0.95939125400802f)},
+{Q31(-0.43645594360633f), Q31(-0.94805030113284f)}, {Q31(-0.99848471702976f), Q31( 0.96245166923809f)},
+{Q31(-0.16796458968998f), Q31(-0.98987511890470f)}, {Q31(-0.87979225745213f), Q31(-0.71725725041680f)},
+{Q31( 0.44183099021786f), Q31(-0.93568974498761f)}, {Q31( 0.93310180125532f), Q31(-0.99913308068246f)},
+{Q31(-0.93941931782002f), Q31(-0.56409379640356f)}, {Q31(-0.88590003188677f), Q31( 0.47624600491382f)},
+{Q31( 0.99971463703691f), Q31(-0.83889954253462f)}, {Q31(-0.75376385639978f), Q31( 0.00814643438625f)},
+{Q31( 0.93887685615875f), Q31(-0.11284528204636f)}, {Q31( 0.85126435782309f), Q31( 0.52349251543547f)},
+{Q31( 0.39701421446381f), Q31( 0.81779634174316f)}, {Q31(-0.37024464187437f), Q31(-0.87071656222959f)},
+{Q31(-0.36024828242896f), Q31( 0.34655735648287f)}, {Q31(-0.93388812549209f), Q31(-0.84476541096429f)},
+{Q31(-0.65298804552119f), Q31(-0.18439575450921f)}, {Q31( 0.11960319006843f), Q31( 0.99899346780168f)},
+{Q31( 0.94292565553160f), Q31( 0.83163906518293f)}, {Q31( 0.75081145286948f), Q31(-0.35533223142265f)},
+{Q31( 0.56721979748394f), Q31(-0.24076836414499f)}, {Q31( 0.46857766746029f), Q31(-0.30140233457198f)},
+{Q31( 0.97312313923635f), Q31(-0.99548191630031f)}, {Q31(-0.38299976567017f), Q31( 0.98516909715427f)},
+{Q31( 0.41025800019463f), Q31( 0.02116736935734f)}, {Q31( 0.09638062008048f), Q31( 0.04411984381457f)},
+{Q31(-0.85283249275397f), Q31( 0.91475563922421f)}, {Q31( 0.88866808958124f), Q31(-0.99735267083226f)},
+{Q31(-0.48202429536989f), Q31(-0.96805608884164f)}, {Q31( 0.27572582416567f), Q31( 0.58634753335832f)},
+{Q31(-0.65889129659168f), Q31( 0.58835634138583f)}, {Q31( 0.98838086953732f), Q31( 0.99994349600236f)},
+{Q31(-0.20651349620689f), Q31( 0.54593044066355f)}, {Q31(-0.62126416356920f), Q31(-0.59893681700392f)},
+{Q31( 0.20320105410437f), Q31(-0.86879180355289f)}, {Q31(-0.97790548600584f), Q31( 0.96290806999242f)},
+{Q31( 0.11112534735126f), Q31( 0.21484763313301f)}, {Q31(-0.41368337314182f), Q31( 0.28216837680365f)},
+{Q31( 0.24133038992960f), Q31( 0.51294362630238f)}, {Q31(-0.66393410674885f), Q31(-0.08249679629081f)},
+{Q31(-0.53697829178752f), Q31(-0.97649903936228f)}, {Q31(-0.97224737889348f), Q31( 0.22081333579837f)},
+{Q31( 0.87392477144549f), Q31(-0.12796173740361f)}, {Q31( 0.19050361015753f), Q31( 0.01602615387195f)},
+{Q31(-0.46353441212724f), Q31(-0.95249041539006f)}, {Q31(-0.07064096339021f), Q31(-0.94479803205886f)},
+{Q31(-0.92444085484466f), Q31(-0.10457590187436f)}, {Q31(-0.83822593578728f), Q31(-0.01695043208885f)},
+{Q31( 0.75214681811150f), Q31(-0.99955681042665f)}, {Q31(-0.42102998829339f), Q31( 0.99720941999394f)},
+{Q31(-0.72094786237696f), Q31(-0.35008961934255f)}, {Q31( 0.78843311019251f), Q31( 0.52851398958271f)},
+{Q31( 0.97394027897442f), Q31(-0.26695944086561f)}, {Q31( 0.99206463477946f), Q31(-0.57010120849429f)},
+{Q31( 0.76789609461795f), Q31(-0.76519356730966f)}, {Q31(-0.82002421836409f), Q31(-0.73530179553767f)},
+{Q31( 0.81924990025724f), Q31( 0.99698425250579f)}, {Q31(-0.26719850873357f), Q31( 0.68903369776193f)},
+{Q31(-0.43311260380975f), Q31( 0.85321815947490f)}, {Q31( 0.99194979673836f), Q31( 0.91876249766422f)},
+{Q31(-0.80692001248487f), Q31(-0.32627540663214f)}, {Q31( 0.43080003649976f), Q31(-0.21919095636638f)},
+{Q31( 0.67709491937357f), Q31(-0.95478075822906f)}, {Q31( 0.56151770568316f), Q31(-0.70693811747778f)},
+{Q31( 0.10831862810749f), Q31(-0.08628837174592f)}, {Q31( 0.91229417540436f), Q31(-0.65987351408410f)},
+{Q31(-0.48972893932274f), Q31( 0.56289246362686f)}, {Q31(-0.89033658689697f), Q31(-0.71656563987082f)},
+{Q31( 0.65269447475094f), Q31( 0.65916004833932f)}, {Q31( 0.67439478141121f), Q31(-0.81684380846796f)},
+{Q31(-0.47770832416973f), Q31(-0.16789556203025f)}, {Q31(-0.99715979260878f), Q31(-0.93565784007648f)},
+{Q31(-0.90889593602546f), Q31( 0.62034397054380f)}, {Q31(-0.06618622548177f), Q31(-0.23812217221359f)},
+{Q31( 0.99430266919728f), Q31( 0.18812555317553f)}, {Q31( 0.97686402381843f), Q31(-0.28664534366620f)},
+{Q31( 0.94813650221268f), Q31(-0.97506640027128f)}, {Q31(-0.95434497492853f), Q31(-0.79607978501983f)},
+{Q31(-0.49104783137150f), Q31( 0.32895214359663f)}, {Q31( 0.99881175120751f), Q31( 0.88993983831354f)},
+{Q31( 0.50449166760303f), Q31(-0.85995072408434f)}, {Q31( 0.47162891065108f), Q31(-0.18680204049569f)},
+{Q31(-0.62081581361840f), Q31( 0.75000676218956f)}, {Q31(-0.43867015250812f), Q31( 0.99998069244322f)},
+{Q31( 0.98630563232075f), Q31(-0.53578899600662f)}, {Q31(-0.61510362277374f), Q31(-0.89515019899997f)},
+{Q31(-0.03841517601843f), Q31(-0.69888815681179f)}, {Q31(-0.30102157304644f), Q31(-0.07667808922205f)},
+{Q31( 0.41881284182683f), Q31( 0.02188098922282f)}, {Q31(-0.86135454941237f), Q31( 0.98947480909359f)},
+{Q31( 0.67226861393788f), Q31(-0.13494389011014f)}, {Q31(-0.70737398842068f), Q31(-0.76547349325992f)},
+{Q31( 0.94044946687963f), Q31( 0.09026201157416f)}, {Q31(-0.82386352534327f), Q31( 0.08924768823676f)},
+{Q31(-0.32070666698656f), Q31( 0.50143421908753f)}, {Q31( 0.57593163224487f), Q31(-0.98966422921509f)},
+{Q31(-0.36326018419965f), Q31( 0.07440243123228f)}, {Q31( 0.99979044674350f), Q31(-0.14130287347405f)},
+{Q31(-0.92366023326932f), Q31(-0.97979298068180f)}, {Q31(-0.44607178518598f), Q31(-0.54233252016394f)},
+{Q31( 0.44226800932956f), Q31( 0.71326756742752f)}, {Q31( 0.03671907158312f), Q31( 0.63606389366675f)},
+{Q31( 0.52175424682195f), Q31(-0.85396826735705f)}, {Q31(-0.94701139690956f), Q31(-0.01826348194255f)},
+{Q31(-0.98759606946049f), Q31( 0.82288714303073f)}, {Q31( 0.87434794743625f), Q31( 0.89399495655433f)},
+{Q31(-0.93412041758744f), Q31( 0.41374052024363f)}, {Q31( 0.96063943315511f), Q31( 0.93116709541280f)},
+{Q31( 0.97534253457837f), Q31( 0.86150930812689f)}, {Q31( 0.99642466504163f), Q31( 0.70190043427512f)},
+{Q31(-0.94705089665984f), Q31(-0.29580042814306f)}, {Q31( 0.91599807087376f), Q31(-0.98147830385781f)},
 // Start of duplicated table
-{-0.99948153278296, -0.59483417516607}, { 0.97113454393991, -0.67528515225647},
-{ 0.14130051758487, -0.95090983575689}, {-0.47005496701697, -0.37340549728647},
-{ 0.80705063769351,  0.29653668284408}, {-0.38981478896926,  0.89572605717087},
-{-0.01053049862020, -0.66959058036166}, {-0.91266367957293, -0.11522938140034},
+{Q31(-0.99948153278296f), Q31(-0.59483417516607f)}, {Q31( 0.97113454393991f), Q31(-0.67528515225647f)},
+{Q31( 0.14130051758487f), Q31(-0.95090983575689f)}, {Q31(-0.47005496701697f), Q31(-0.37340549728647f)},
+{Q31( 0.80705063769351f), Q31( 0.29653668284408f)}, {Q31(-0.38981478896926f), Q31( 0.89572605717087f)},
+{Q31(-0.01053049862020f), Q31(-0.66959058036166f)}, {Q31(-0.91266367957293f), Q31(-0.11522938140034f)},
 };
 
 #endif /* AVCODEC_AACSBRDATA_H */
diff --git a/libavcodec/aactab.c b/libavcodec/aactab.c
index 25f6de29..77d8732c 100644
--- a/libavcodec/aactab.c
+++ b/libavcodec/aactab.c
@@ -29,12 +29,16 @@
 
 #include "libavutil/mem.h"
 #include "aac.h"
-#include "aac_tablegen.h"
 
 #include <stdint.h>
 
+float ff_aac_pow2sf_tab[428];
+float ff_aac_pow34sf_tab[428];
+
 DECLARE_ALIGNED(32, float,  ff_aac_kbd_long_1024)[1024];
 DECLARE_ALIGNED(32, float,  ff_aac_kbd_short_128)[128];
+DECLARE_ALIGNED(32, int,    ff_aac_kbd_long_1024_fixed)[1024];
+DECLARE_ALIGNED(32, int,    ff_aac_kbd_short_128_fixed)[128];
 
 const uint8_t ff_aac_num_swb_1024[] = {
     41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40, 40
@@ -1767,6 +1771,490 @@ const DECLARE_ALIGNED(32, float, ff_aac_eld_window_512)[1920] = {
     -0.00111144, -0.00109764, -0.00108377, -0.00106989,
 };
 
+/* Q30 representation of ff_aac_eld_window_512 table */
+const DECLARE_ALIGNED(32, int, ff_aac_eld_window_512_fixed)[1920] = {
+    0x003783ba, 0x005d04f4, 0x008ae226, 0x00c02021,
+    0x00fb1804, 0x013a30a8, 0x017be9e6, 0x01bf296c,
+    0x02033204, 0x0247502c, 0x028adab0, 0x02cd9568,
+    0x030fa980, 0x03513dc0, 0x03927274, 0x03d363e0,
+    0x04142e40, 0x0454edc0, 0x0495bd48, 0x04d6a060,
+    0x051786d8, 0x05586548, 0x059935e8, 0x05d9feb0,
+    0x061acea0, 0x065bb680, 0x069cc800, 0x06de13f0,
+    0x071fa748, 0x07618b80, 0x07a3c7a8, 0x07e66da0,
+    0x082999d0, 0x086d6590, 0x08b1e640, 0x08f72850,
+    0x093d3120, 0x09840550, 0x09cba880, 0x0a1415f0,
+    0x0a5d41b0, 0x0aa720d0, 0x0af1a9a0, 0x0b3cce70,
+    0x0b887ec0, 0x0bd4ac10, 0x0c214a70, 0x0c6e5130,
+    0x0cbbba50, 0x0d098130, 0x0d57a240, 0x0da61a60,
+    0x0df4e620, 0x0e4401d0, 0x0e9369f0, 0x0ee31de0,
+    0x0f332000, 0x0f837180, 0x0fd412a0, 0x10250260,
+    0x10763f20, 0x10c7c660, 0x11199560, 0x116baa00,
+    0x11be0400, 0x1210a1c0, 0x12638180, 0x12b69ee0,
+    0x1309f3e0, 0x135d7ac0, 0x13b12dc0, 0x1404ffa0,
+    0x1458dd40, 0x14acb720, 0x15008120, 0x15543260,
+    0x15a7c460, 0x15fb3160, 0x164e7520, 0x16a193c0,
+    0x16f49740, 0x17478720, 0x179a6720, 0x17ed3720,
+    0x183ff460, 0x18929c20, 0x18e52b00, 0x19379c00,
+    0x1989e900, 0x19dc0ca0, 0x1a2e0280, 0x1a7fc400,
+    0x1ad14a00, 0x1b228ec0, 0x1b738ea0, 0x1bc44540,
+    0x1c14ada0, 0x1c64c380, 0x1cb48440, 0x1d03f420,
+    0x1d531c00, 0x1da20160, 0x1df0a660, 0x1e3f0860,
+    0x1e8d2340, 0x1edaf340, 0x1f2875e0, 0x1f75a700,
+    0x1fc281e0, 0x200f0380, 0x205b2ac0, 0x20a6f980,
+    0x20f27200, 0x213d9600, 0x21886580, 0x21d2e040,
+    0x221d0640, 0x2266d6c0, 0x22b05180, 0x22f97580,
+    0x23424280, 0x238ab880, 0x23d2d780, 0x241aa040,
+    0x246213c0, 0x24a93300, 0x24efff80, 0x25367b40,
+    0x256f68c0, 0x25b53580, 0x25faa580, 0x263fb940,
+    0x26847080, 0x26c8cbc0, 0x270ccb00, 0x27506e40,
+    0x2793b600, 0x27d6a200, 0x281932c0, 0x285b6880,
+    0x289d4400, 0x28dec5c0, 0x291feec0, 0x2960bf80,
+    0x29a137c0, 0x29e15800, 0x2a212000, 0x2a609080,
+    0x2a9fa980, 0x2ade6b40, 0x2b1cd600, 0x2b5aea00,
+    0x2b98a740, 0x2bd60d80, 0x2c131cc0, 0x2c4fd500,
+    0x2c8c3600, 0x2cc83f00, 0x2d03f040, 0x2d3f48c0,
+    0x2d7a48c0, 0x2db4ef40, 0x2def3c40, 0x2e292ec0,
+    0x2e62c700, 0x2e9c0400, 0x2ed4e580, 0x2f0d6ac0,
+    0x2f4592c0, 0x2f7d5c80, 0x2fb4c6c0, 0x2febd140,
+    0x30227b40, 0x3058c400, 0x308eab40, 0x30c43040,
+    0x30f95100, 0x312e0d00, 0x31626240, 0x31965040,
+    0x31c9d5c0, 0x31fcf240, 0x322fa480, 0x3261ec00,
+    0x3293c7c0, 0x32c53680, 0x32f63780, 0x3326c9c0,
+    0x3356ec00, 0x33869d00, 0x33b5db80, 0x33e4a700,
+    0x3412fdc0, 0x3440df40, 0x346e4a80, 0x349b3e40,
+    0x34c7ba00, 0x34f3bd80, 0x351f47c0, 0x354a5840,
+    0x3574ee40, 0x359f0900, 0x35c8a840, 0x35f1cb80,
+    0x361a71c0, 0x36429a80, 0x366a4580, 0x36917280,
+    0x36b82100, 0x36de5180, 0x37040340, 0x372936c0,
+    0x374dec40, 0x37722340, 0x3795dc40, 0x37b91780,
+    0x37dbd600, 0x37fe18c0, 0x381fe080, 0x38412e00,
+    0x38620280, 0x38825f40, 0x38a24540, 0x38c1b680,
+    0x38e0b5c0, 0x38ff4540, 0x391d6800, 0x393b20c0,
+    0x39587280, 0x39755fc0, 0x3991eb80, 0x39ae1a80,
+    0x39c9f280, 0x39e57980, 0x3a00b600, 0x3a1bae00,
+    0x3a366800, 0x3a50e9c0, 0x3a6b3a40, 0x3a8560c0,
+    0x3a9f6640, 0x3ab95400, 0x3ad332c0, 0x3aed0680,
+    0x3b06cf80, 0x3b208d40, 0x3b3a3e80, 0x3b53cb80,
+    0x3b6d0780, 0x3b85c380, 0x3b9dd0c0, 0x3bb4eb40,
+    0x3bcabac0, 0x3bdee680, 0x3bf11680, 0x3c011440,
+    0x3c179ac0, 0x3c1c4f00, 0x3c21aa40, 0x3c278880,
+    0x3c2dba80, 0x3c341140, 0x3c3a5e80, 0x3c409100,
+    0x3c46b480, 0x3c4cd5c0, 0x3c530180, 0x3c593cc0,
+    0x3c5f84c0, 0x3c65d640, 0x3c6c2e40, 0x3c728b40,
+    0x3c78ee80, 0x3c7f5840, 0x3c85c940, 0x3c8c4240,
+    0x3c92c380, 0x3c994cc0, 0x3c9fde40, 0x3ca67880,
+    0x3cad1ac0, 0x3cb3c540, 0x3cba7800, 0x3cc132c0,
+    0x3cc7f640, 0x3ccec280, 0x3cd59800, 0x3cdc76c0,
+    0x3ce35e80, 0x3cea4f00, 0x3cf147c0, 0x3cf84900,
+    0x3cff5340, 0x3d0666c0, 0x3d0d8400, 0x3d14ab40,
+    0x3d1bdc00, 0x3d2315c0, 0x3d2a5880, 0x3d31a440,
+    0x3d38f900, 0x3d405780, 0x3d47c040, 0x3d4f3300,
+    0x3d56af40, 0x3d5e3500, 0x3d65c380, 0x3d6d5ac0,
+    0x3d74fb40, 0x3d7ca540, 0x3d845900, 0x3d8c1680,
+    0x3d93dd00, 0x3d9bac80, 0x3da38400, 0x3dab6400,
+    0x3db34c80, 0x3dbb3dc0, 0x3dc33840, 0x3dcb3bc0,
+    0x3dd347c0, 0x3ddb5bc0, 0x3de37780, 0x3deb9b00,
+    0x3df3c600, 0x3dfbf940, 0x3e0434c0, 0x3e0c7840,
+    0x3e14c3c0, 0x3e1d1640, 0x3e256f80, 0x3e2dcf40,
+    0x3e363580, 0x3e3ea300, 0x3e4717c0, 0x3e4f9380,
+    0x3e581600, 0x3e609e40, 0x3e692c40, 0x3e71bf80,
+    0x3e7a5840, 0x3e82f740, 0x3e8b9c40, 0x3e944700,
+    0x3e9cf780, 0x3ea5ad00, 0x3eae66c0, 0x3eb72500,
+    0x3ebfe780, 0x3ec8af00, 0x3ed17b80, 0x3eda4d00,
+    0x3ee32340, 0x3eebfd40, 0x3ef4dac0, 0x3efdbbc0,
+    0x3f06a040, 0x3f0f88c0, 0x3f187540, 0x3f216600,
+    0x3f2a5a80, 0x3f335200, 0x3f3c4c40, 0x3f454940,
+    0x3f4e4940, 0x3f574c80, 0x3f605340, 0x3f695dc0,
+    0x3f726b40, 0x3f7b7b40, 0x3f848dc0, 0x3f8da240,
+    0x3f96b940, 0x3f9fd300, 0x3fa8f040, 0x3fb21080,
+    0x3fbb33c0, 0x3fc459c0, 0x3fcd81c0, 0x3fd6abc0,
+    0x3fdfd780, 0x3fe90480, 0x3ff23280, 0x3ffb6100,
+    0x40049f80, 0x400dd080, 0x40170400, 0x40203880,
+    0x40296f00, 0x4032a600, 0x403bde00, 0x40451680,
+    0x404e4f00, 0x40578700, 0x4060be80, 0x4069f500,
+    0x40732b80, 0x407c6280, 0x40859980, 0x408ed100,
+    0x40980800, 0x40a13f00, 0x40aa7500, 0x40b3a980,
+    0x40bcdd80, 0x40c61180, 0x40cf4500, 0x40d87800,
+    0x40e1ab00, 0x40eadc80, 0x40f40c80, 0x40fd3a80,
+    0x41066700, 0x410f9300, 0x4118bd80, 0x4121e700,
+    0x412b0f80, 0x41343580, 0x413d5880, 0x41467980,
+    0x414f9780, 0x4158b380, 0x4161cd80, 0x416ae580,
+    0x4173fb00, 0x417d0d00, 0x41861b80, 0x418f2600,
+    0x41982c80, 0x41a12f80, 0x41aa3000, 0x41b32c80,
+    0x41bc2580, 0x41c51a00, 0x41ce0900, 0x41d6f300,
+    0x41dfd800, 0x41e8b880, 0x41f19400, 0x41fa6b80,
+    0x42033d00, 0x420c0900, 0x4214cf00, 0x421d8e00,
+    0x42264680, 0x422ef980, 0x4237a680, 0x42404d80,
+    0x4248ee00, 0x42518780, 0x425a1a00, 0x4262a480,
+    0x426b2800, 0x4273a400, 0x427c1980, 0x42848880,
+    0x428cef80, 0x42954f00, 0x429da680, 0x42a5f500,
+    0x42ae3b80, 0x42b67a00, 0x42beb100, 0x42c6e080,
+    0x42cf0780, 0x42d72680, 0x42df3c00, 0x42e74880,
+    0x42ef4c80, 0x42f74880, 0x42ff3c80, 0x43072880,
+    0x430f0c80, 0x4316e800, 0x431eba00, 0x43268380,
+    0x432e4480, 0x4335fd00, 0x433dae80, 0x43455800,
+    0x434cfa00, 0x43549400, 0x435c2500, 0x4363ad80,
+    0x436b2e00, 0x4372a700, 0x437a1800, 0x43818200,
+    0x4388e400, 0x43903f00, 0x43979200, 0x439edd00,
+    0x43a62080, 0x43ad5c80, 0x43b49180, 0x43bbbf80,
+    0x43c2e800, 0x43ca0b00, 0x43d12980, 0x43d84280,
+    0x43df5200, 0x43e65500, 0x43ed4800, 0x43f43080,
+    0x43fb1c80, 0x44021b80, 0x44093a00, 0x44106480,
+    0x44176700, 0x441e0c00, 0x44241e00, 0x44297380,
+    0x4425dc00, 0x44240180, 0x441ff300, 0x4419e300,
+    0x44123f80, 0x44097500, 0x43ffe900, 0x43f5e700,
+    0x43eb9f00, 0x43e13f00, 0x43d6f200, 0x43ccbd80,
+    0x43c28400, 0x43b82780, 0x43ad8b00, 0x43a29c80,
+    0x43975180, 0x438ba080, 0x437f8180, 0x4372fd00,
+    0x43662b00, 0x43592480, 0x434c0000, 0x433ecd00,
+    0x43319180, 0x43245300, 0x43171700, 0x4309da80,
+    0x42fc9300, 0x42ef3500, 0x42e1b600, 0x42d40280,
+    0x42c60000, 0x42b79300, 0x42a8a180, 0x42991a00,
+    0x4288f200, 0x42782100, 0x42669e00, 0x42546880,
+    0x42418800, 0x422e0480, 0x4219e500, 0x42053680,
+    0x41f00980, 0x41da7080, 0x41c47b00, 0x41ae3600,
+    0x4197ab80, 0x4180e400, 0x4169e780, 0x4152bb00,
+    0x413b5e80, 0x4123d180, 0x410c1480, 0x40f42100,
+    0x40dbed00, 0x40c36c80, 0x40aa9600, 0x40915f80,
+    0x4077c100, 0x405db280, 0x40432c80, 0x40282580,
+    0x400c9280, 0x3ff068c0, 0x3fd39dc0, 0x3fb62bc0,
+    0x3f981200, 0x3f795080, 0x3f59e780, 0x3f39ebc0,
+    0x3f198680, 0x3ef8e100, 0x3ed82440, 0x3eb76c80,
+    0x3e96c940, 0x3e764900, 0x3e55f980, 0x3e35cb00,
+    0x3e1590c0, 0x3df51cc0, 0x3dd44200, 0x3db2e640,
+    0x3d910200, 0x3d6e8e40, 0x3d4b8480, 0x3d27e600,
+    0x3d03bc00, 0x3cdf0fc0, 0x3cb9eb80, 0x3c946240,
+    0x3c6e9180, 0x3c489700, 0x3c229000, 0x3bfc95c0,
+    0x3bd6bd00, 0x3bb11a80, 0x3b8bc180, 0x3b669bc0,
+    0x3b416a00, 0x3b1beb80, 0x3af5e140, 0x3acf3300,
+    0x3aa7ef80, 0x3a802780, 0x3a57eb80, 0x3a2f5880,
+    0x3a069640, 0x39ddcd40, 0x39b524c0, 0x398ca540,
+    0x39643800, 0x393bc540, 0x39133580, 0x38ea7ac0,
+    0x38c19040, 0x389871c0, 0x386f1b40, 0x38458e00,
+    0x381bd000, 0x37f1e780, 0x37c7db00, 0x379db080,
+    0x37736e80, 0x37491b00, 0x371ebcc0, 0x36f45980,
+    0x36c96600, 0x369ed300, 0x36740380, 0x3648ffc0,
+    0x361dcf40, 0x35f27a00, 0x35c70780, 0x359b7f80,
+    0x356fe9c0, 0x35444dc0, 0x3518b280, 0x34ed1940,
+    0x34c17c00, 0x3495d4c0, 0x346a1d40, 0x343e4300,
+    0x34122840, 0x33e5ae00, 0x33b8b780, 0x338b4dc0,
+    0x335d9f00, 0x332fdc00, 0x33023440, 0x32d4cc40,
+    0x32a7bc80, 0x327b1d40, 0x324f04c0, 0x32235280,
+    0x31f7b100, 0x31cbc7c0, 0x319f4140, 0x3171fb40,
+    0x31440840, 0x31157d00, 0x30e66e80, 0x30b6fc40,
+    0x30875080, 0x30579600, 0x3027f700, 0x2ff89140,
+    0x2fc976c0, 0x2f9ab880, 0x2f6c6780, 0x2f3e8780,
+    0x2f111000, 0x2ee3f800, 0x2eb73480, 0x2e8a9840,
+    0x2e5dd340, 0x2e3093c0, 0x2e028ac0, 0x2dd39680,
+    0x2da3c480, 0x2d732380, 0x2d41c400, 0x2d0fd300,
+    0x2cdd9ac0, 0x2cab6640, 0x2c797f00, 0x2c480d40,
+    0x2c171700, 0x2be6a0c0, 0x2bb6ae80, 0x2b8739c0,
+    0x2b583200, 0x2b298600, 0x2afb2400, 0x2accfa40,
+    0x2a9ef500, 0x2a710100, 0x2a430ac0, 0x2a14f9c0,
+    0x29e6b0c0, 0x29b81240, 0x29890140, 0x29596900,
+    0x29293e00, 0x28f87500, 0x28c70340, 0x2894efc0,
+    0x28625140, 0x282f4040, 0x27fbd5c0, 0x27c83540,
+    0x27948ec0, 0x27611240, 0x272def80, 0x26fb4cc0,
+    0x26c94780, 0x2697fcc0, 0x26678880, 0x2637f740,
+    0x26094540, 0x25db6dc0, 0x25ae6b40, 0x25821680,
+    0x255627c0, 0x252a55c0, 0x24fe5680, 0x24d1db40,
+    0x24a48fc0, 0x24761f40, 0x244637c0, 0x2414c900,
+    0x23e20240, 0x23ae1740, 0x23793bc0, 0x2343cc00,
+    0x230e4ac0, 0x22d93c80, 0x22a52400, 0x22725180,
+    0x2240e480, 0x2210f9c0, 0x21e2ab40, 0x21b5c7c0,
+    0x2189d2c0, 0x215e4d40, 0x2132b900, 0x2106ba80,
+    0x20da1940, 0x20ac9d80, 0x207e11c0, 0x204e77c0,
+    0x201e0880, 0x1fecfea0, 0x1fbb94e0, 0x1f8a0500,
+    0x1f59d340, 0x1f27ac20, 0x1ef67c60, 0x1ec64e40,
+    0x1e96fdc0, 0x1e686400, 0x1e3a5a00, 0x1e0cae80,
+    0x1ddf25e0, 0x1db18460, 0x1d839020, 0x1d5536e0,
+    0x1d268e80, 0x1cf7ae60, 0x1cc8aea0, 0x1c99af00,
+    0x1c6ad820, 0x1c3c5280, 0x1c0e4500, 0x1be0ab60,
+    0x1bb35620, 0x1b861400, 0x1b58b480, 0x1b2b1a00,
+    0x1afd39c0, 0x1acf09a0, 0x1aa080c0, 0x1a71b020,
+    0x1a42c2a0, 0x1a13e420, 0x19e53fc0, 0x19b6eb00,
+    0x1988e620, 0x195b3060, 0x192dc8a0, 0x1900a8a0,
+    0x18d3c4e0, 0x18a711e0, 0x187a83e0, 0x184e10e0,
+    0x1821b060, 0x17f55a00, 0x17c90580, 0x179cb100,
+    0x177060a0, 0x17441880, 0x1717dd20, 0x16ebb080,
+    0x16bf9260, 0x169382e0, 0x166781c0, 0x163b8f80,
+    0x160fade0, 0x15e3de40, 0x15b82220, 0x158c7ae0,
+    0x1560ea80, 0x15357240, 0x150a1400, 0x14ded020,
+    0x14b3a640, 0x148895a0, 0x145d9dc0, 0x1432bde0,
+    0x1407f540, 0x13dd4380, 0x13b2a860, 0x13882460,
+    0x135db880, 0x133365a0, 0x13092cc0, 0x12df0e60,
+    0x12b50aa0, 0x128b2120, 0x12615200, 0x12379da0,
+    0x120e04c0, 0x11e48820, 0x11bb2860, 0x1191e600,
+    0x1168c080, 0x113fb7a0, 0x1116cb40, 0x10edfba0,
+    0x10c54a00, 0x109cb7a0, 0x10744560, 0x104bf420,
+    0x1023c3e0, 0x0ffbb500, 0x0fd3c790, 0x0fabfbe0,
+    0x0f845290, 0x0f5ccc40, 0x0f356970, 0x0f0e2a60,
+    0x0ee70eb0, 0x0ec01610, 0x0e994040, 0x0e728d50,
+    0x0e4bfdf0, 0x0e2592c0, 0x0dff4c70, 0x0dd92af0,
+    0x0db32da0, 0x0d8d53e0, 0x0d679cf0, 0x0d420880,
+    0x0d1c9680, 0x0cf74700, 0x0cd219f0, 0x0cad0eb0,
+    0x0c882450, 0x0c6359a0, 0x0c3ead90, 0x0c1a1f80,
+    0x0bf5af40, 0x0bd15cf0, 0x0bad2870, 0x0b891440,
+    0x0b652530, 0x0b416020, 0x0b1dca30, 0x0afa6810,
+    0x0ad73ee0, 0x0ab45370, 0x0a91aac0, 0x0a6f49b0,
+    0x0a4da7f0, 0x0a2c7e20, 0x0a0ba310, 0x09eb1220,
+    0x09cac6e0, 0x09aabc70, 0x098aee40, 0x096b57a0,
+    0x094bf400, 0x092cbea0, 0x090db2e0, 0x08eecef0,
+    0x08d01360, 0x08b18110, 0x089318b0, 0x0874db00,
+    0x0856c880, 0x0838e1b0, 0x081b2730, 0x07fd99a8,
+    0x07e03a28, 0x07c309a8, 0x07a60910, 0x07893918,
+    0x076c99d0, 0x07502b90, 0x0733ee70, 0x0717e2f8,
+    0x06fc09b8, 0x06e06378, 0x06c4f0b8, 0x06a9b1c8,
+    0x068ea6a0, 0x0673cf18, 0x06592b18, 0x063ebad0,
+    0x06247ed0, 0x060a7780, 0x05f0a570, 0x05d708b8,
+    0x05bda128, 0x05a46e80, 0x058b7078, 0x0572a740,
+    0x055a1330, 0x0541b4d8, 0x05298c98, 0x05119a88,
+    0x04f9de50, 0x04e257a0, 0x04cb0630, 0x04b3ea00,
+    0x049d0378, 0x04865308, 0x046fd918, 0x045995a8,
+    0x04438860, 0x042db0d0, 0x04180ea0, 0x0402a1d0,
+    0x03ed6abc, 0x03d869b8, 0x03c39f28, 0x03af0af0,
+    0x039aaca0, 0x038683b4, 0x03728fc0, 0x035ed0b0,
+    0x034b46c4, 0x0337f254, 0x0324d3a0, 0x0311eab0,
+    0x02ff370c, 0x02ecb85c, 0x02da6e34, 0x02c858a8,
+    0x02b67820, 0x02a4cd28, 0x02935820, 0x02821920,
+    0x02710fac, 0x02603b54, 0x024f9bb4, 0x023f308c,
+    0x022ef9e8, 0x021ef7c8, 0x020f2a40, 0x01ff908e,
+    0x01f02974, 0x01e0f38a, 0x01d1ed94, 0x01c316d6,
+    0x01b46f5e, 0x01a5f720, 0x0197ae28, 0x018994ea,
+    0x017bac54, 0x016df546, 0x016070ae, 0x01532078,
+    0x01460760, 0x01392834, 0x012c85a4, 0x01201f7a,
+    0x0113f27c, 0x0107fb6c, 0x00fc36fd, 0x00f0a2d5,
+    0x00e53d51, 0x00da050f, 0x00cef88c, 0x00c41869,
+    0x00b9671f, 0x00aee754, 0x00a49b80, 0x009a8384,
+    0x00909ca6, 0x0086e400, 0x007d56e3, 0x0073f48e,
+    0x006abe70, 0x0061b5de, 0x0058dc65, 0x005033b4,
+    0x0047be30, 0x003f7e30, 0x00377619, 0x002fa4d4,
+    0x002805ee, 0x002094cb, 0x00194cb8, 0x00122856,
+    0x000b215c, 0x00043148, 0xfffd51f0, 0xfff683a0,
+    0xffefcd4d, 0xffe9362f, 0xffe2c57d, 0xffdc855c,
+    0xffd682c4, 0xffd0cad4, 0xffcb6a2c, 0xffc663bc,
+    0xffc1b06f, 0xffbd48e1, 0xffb92570, 0xffb53a54,
+    0xffb1779c, 0xffadcd38, 0xffaa2b42, 0xffa68855,
+    0xffa2e141, 0xff9f332c, 0xff9b7b9c, 0xff97bf2e,
+    0xff9409e2, 0xff9067e2, 0xff8ce556, 0xff898bf0,
+    0xff866306, 0xff8371d0, 0xff80bf63, 0xff7e4eba,
+    0xff7c1eaa, 0xff7a2e04, 0xff787b47, 0xff770280,
+    0xff75bd06, 0xff74a3f7, 0xff73b0b2, 0xff72dd02,
+    0xff72237e, 0xff717ebe, 0xff70e94c, 0xff705f59,
+    0xff6fde6a, 0xff6f6426, 0xff6eee40, 0xff6e7d0b,
+    0xff6e1359, 0xff6db403, 0xff6d61f8, 0xff6d2054,
+    0xff6cf267, 0xff6cdb76, 0xff6cdebb, 0xff6cff47,
+    0xff6d3fc9, 0xff6da306, 0xff6e2b82, 0xff6eda13,
+    0xff6fad6d, 0xff70a463, 0xff71bd9d, 0xff72f662,
+    0xff744a80, 0xff75b5c4, 0xff773409, 0xff78c0a6,
+    0xff7a5693, 0xff7bf0dc, 0xff7d8abb, 0xff7f2301,
+    0xff80bc08, 0xff825854, 0xff83fa56, 0xff85a55c,
+    0xff875d22, 0xff892598, 0xff8b025d, 0xff8cf53c,
+    0xff8efdf4, 0xff911c48, 0xff934fc9, 0xff959675,
+    0xff97ec86, 0xff9a4e35, 0xff9cb7d2, 0xff9f26cc,
+    0xffa199ce, 0xffa40f74, 0xffa6867c, 0xffa8feb2,
+    0xffab78e0, 0xffadf5c7, 0xffb07640, 0xffb2fba0,
+    0xffb587a2, 0xffb81bfb, 0xffbaba46, 0xffbd6236,
+    0xffc011a8, 0xffc2c679, 0xffc57e84, 0xffc83894,
+    0xffcaf41a, 0xffcdb0b8, 0xffd06e17, 0xffd32bf7,
+    0xffd5ea38, 0xffd8a8c3, 0xffdb6764, 0xffde25fb,
+    0xffe0e471, 0xffe3a2b2, 0xffe66087, 0xffe91da6,
+    0xffebd978, 0xffee9351, 0xfff14ab0, 0xfff3fef6,
+    0xfff6af94, 0xfff95c0c, 0xfffc03c7, 0xfffea659,
+    0x00015885, 0x0003f2e9, 0x00068a73, 0x00091e8d,
+    0x000bae7f, 0x000e39bf, 0x0010bf96, 0x00133f78,
+    0x0015b8c4, 0x00182ae4, 0x001a9558, 0x001cf7b2,
+    0x001f51e0, 0x0021a3b4, 0x0023ed25, 0x00262df2,
+    0x002865c5, 0x002a9469, 0x002cb967, 0x002ed4aa,
+    0x0030e607, 0x0032ed88, 0x0034eb2f, 0x0036de23,
+    0x0038c503, 0x003a9e4c, 0x003c68a6, 0x003e23dd,
+    0x003fd0db, 0x00417083, 0x0043038b, 0x00448adf,
+    0x00460740, 0x0047799c, 0x0048e2b2, 0x004a42af,
+    0x004b98fb, 0x004ce50b, 0x004e2654, 0x004f5b5d,
+    0x005081c3, 0x00519716, 0x00529920, 0x005386d0,
+    0x0054603f, 0x00552581, 0x0055d6cc, 0x00567558,
+    0x0057033c, 0x005782b4, 0x0057f5b6, 0x00585e46,
+    0x0058be68, 0x005917ff, 0x00596ce4, 0x0059bcc0,
+    0x005a053a, 0x005a43ee, 0x005a76ae, 0x005a9b37,
+    0x005aaf38, 0x005ab07a, 0x005a9cef, 0x005a7349,
+    0x005a3328, 0x0059dc0a, 0x00596db0, 0x0058e8e5,
+    0x00584f98, 0x0057a3c0, 0x0056e738, 0x00561bec,
+    0x005543df, 0x0054610b, 0x0053753e, 0x0052824e,
+    0x005189f6, 0x00508dec, 0x004f8fc0, 0x004e8fd0,
+    0x004d8d26, 0x004c86d7, 0x004b7c0a, 0x004a6b33,
+    0x00495239, 0x00482f0e, 0x0046ffc4, 0x0045c201,
+    0x00447337, 0x004310cc, 0x00419871, 0x004008e4,
+    0x003e6231, 0x003ca460, 0x003acf8a, 0x0038e57a,
+    0x0036e981, 0x0034defa, 0x0032c94b, 0x0030acc6,
+    0x002e8eb4, 0x002c7452, 0x002a62aa, 0x00285bbf,
+    0x00265eda, 0x00246b24, 0x00227f9c, 0x002098e7,
+    0x001eb13b, 0x001cc2ef, 0x001ac899, 0x0018be3d,
+    0x0016a198, 0x00147065, 0x00122897, 0x000fcbc5,
+    0x000d5f03, 0x000ae77a, 0x00086a52, 0x0005eb92,
+    0x00036e4a, 0x0000f57e, 0xfffe8414, 0xfffc1a78,
+    0xfff9b6bb, 0xfff756d9, 0xfff4f8d0, 0xfff29add,
+    0xfff03b87, 0xffedd94c, 0xffeb7295, 0xffe9072b,
+    0xffe6981a, 0xffe4265b, 0xffe1b30e, 0xffdf3f2b,
+    0xffdccb9e, 0xffda5993, 0xffd7ea0c, 0xffd57d60,
+    0xffd31302, 0xffd0aa27, 0xffce4243, 0xffcbdb40,
+    0xffc97595, 0xffc711a2, 0xffc4af9d, 0xffc24fa6,
+    0xffbff1de, 0xffbd9699, 0xffbb3e44, 0xffb8e8d5,
+    0xffb695f4, 0xffb44522, 0xffb1f627, 0xffafa8f0,
+    0xffad5d91, 0xffab140a, 0xffa8cc1c, 0xffa68590,
+    0xffa44066, 0xffa1fca0, 0xff9fba30, 0xff9d7902,
+    0xff9b3916, 0xff98fa6d, 0xff96bd06, 0xff9480b6,
+    0xff924532, 0xff900a24, 0xff8dcf41, 0xff8b9433,
+    0xff895884, 0xff871bd3, 0xff84dd8a, 0xff829d34,
+    0xff805a43, 0xff7e142d, 0xff7bca71, 0xff797c83,
+    0xff7729e3, 0xff74d204, 0xff727451, 0xff70101e,
+    0xff6da493, 0xff6b30d1, 0xff68b3f4, 0xff662d31,
+    0xff639bd1, 0xff60ff09, 0xff5e562c, 0xff5ba3e0,
+    0xff58ee39, 0xff563c22, 0xff5394f3, 0xff50fd1e,
+    0xff4e7599, 0xff4bff32, 0xff499ad4, 0xff47490a,
+    0xff450a36, 0xff42deb7, 0xff40c6cf, 0xff3ec2be,
+    0xff3cd299, 0xff3af681, 0xff392e6a, 0xff377a4a,
+    0xff35d9f7, 0xff344d44, 0xff32d3e8, 0xff316d96,
+    0xff3019d9, 0xff2ed83a, 0xff2da82f, 0xff2c88bf,
+    0xff2b78b4, 0xff2a76cc, 0xff298184, 0xff289890,
+    0xff27bc7d, 0xff26ee21, 0xff262e28, 0xff257cdc,
+    0xff24d9f4, 0xff244524, 0xff23be15, 0xff234488,
+    0xff22d852, 0xff227947, 0xff22273d, 0xff21e1d2,
+    0xff21a871, 0xff217a79, 0xff215748, 0xff213eca,
+    0xff21319e, 0xff21305c, 0xff213baf, 0xff2153c2,
+    0xff21782b, 0xff21a892, 0xff21e477, 0xff222bda,
+    0xff227f26, 0xff22debd, 0xff234b09, 0xff23c394,
+    0xff24471d, 0xff24d42b, 0xff25695c, 0xff260538,
+    0xff26a652, 0xff274b28, 0xff27f22d, 0xff2899d2,
+    0xff295975, 0xff29f2ad, 0xff2a96d7, 0xff2b45f4,
+    0xff2bffe3, 0xff2cc4ba, 0xff2d9458, 0xff2e6ede,
+    0xff2f544c, 0xff3044b7, 0xff314034, 0xff3246fa,
+    0xff33591e, 0xff3476e0, 0xff35a060, 0xff36d534,
+    0xff38148f, 0xff395daf, 0xff3aafd4, 0xff3c0ac8,
+    0xff3d6ed6, 0xff3edc54, 0xff405382, 0xff41d3f5,
+    0xff435ccc, 0xff44ed0f, 0xff4683d3, 0xff482080,
+    0xff49c297, 0xff4b69ab, 0xff4d1547, 0xff4ec4f5,
+    0xff50781d, 0xff522e20, 0xff53e692, 0xff55a15d,
+    0xff575f17, 0xff592022, 0xff5ae4de, 0xff5cacb4,
+    0xff5e75e2, 0xff603ee5, 0xff62062f, 0xff63caab,
+    0xff658b55, 0xff67476d, 0xff68fe11, 0xff6aaea0,
+    0xff6c5899, 0xff6dfb86, 0xff6f96e7, 0xff712a65,
+    0xff72b59f, 0xff74382b, 0xff75b1d3, 0xff772276,
+    0xff788a20, 0xff79e8e5, 0xff7b3ef0, 0xff7c8c98,
+    0xff7dd249, 0xff7f108c, 0xff804804, 0xff817d0e,
+    0xff82b74a, 0xff83fde6, 0xff855762, 0xff86c622,
+    0xff884904, 0xff89ded1, 0xff8b8646, 0xff8d3e4c,
+    0xff8f05cc, 0xff90dbc6, 0xff92bf2a, 0xff94af04,
+    0xff96aa26, 0xff98af9a, 0xff9abe48, 0xff9cd543,
+    0xff9ef3c1, 0xffa118ea, 0xffa343fd, 0xffa57423,
+    0xffa7a890, 0xffa9e084, 0xffac1b31, 0xffae5802,
+    0xffb09680, 0xffb2d621, 0xffb51678, 0xffb75704,
+    0xffb99726, 0xffbbd645, 0xffbe13d7, 0xffc04f26,
+    0xffc2879a, 0xffc4bc72, 0xffc6ed24, 0xffc918e3,
+    0xffcb3eb8, 0xffcd5dcc, 0xffcf7549, 0xffd184d8,
+    0xffd38c8f, 0xffd58ca4, 0xffd7854d, 0xffd97694,
+    0xffdb606e, 0xffdd42d1, 0xffdf1da8, 0xffe0f09b,
+    0xffe2bb00, 0xffe47c41, 0xffe633c6, 0xffe7e150,
+    0xffe98534, 0xffeb1fb4, 0xffecb10e, 0xffee3944,
+    0xffefb7e9, 0xfff12cbe, 0xfff29762, 0xfff3f789,
+    0xfff54cbe, 0xfff69695, 0xfff7d4b8, 0xfff90748,
+    0xfffa2ee5, 0xfffb4c3c, 0xfffc6003, 0xfffd6af0,
+    0xfffe6dda, 0xffff69b8, 0x00005f4b, 0x00014e7f,
+    0x00023646, 0x000315b4, 0x0003ebd3, 0x0004b74a,
+    0x00057677, 0x000627e2, 0x0006ca09, 0x00075ce1,
+    0x0007e196, 0x00085955, 0x0008c556, 0x00092751,
+    0x00098153, 0x0009d581, 0x000a25be, 0x000a732b,
+    0x000abe1f, 0x000b06e4, 0x000b4db1, 0x000b91fa,
+    0x000bd266, 0x000c0da0, 0x000c426e, 0x000c6ffb,
+    0x000c95b0, 0x000cb2f7, 0x000cc76e, 0x000cd317,
+    0x000cd647, 0x000cd17f, 0x000cc52b, 0x000cb1ea,
+    0x000c98c0, 0x000c7a62, 0x000c57c7, 0x000c3187,
+    0x000c0862, 0x000bdcd8, 0x000baf81, 0x000b80c7,
+    0x000b50ec, 0x000b202f, 0x000aeec6, 0x000abcb2,
+    0x000a89d2, 0x000a5605, 0x000a2116, 0x0009eafb,
+    0x0009b37d, 0x00097a9d, 0x00094030, 0x00090440,
+    0x0008c6b9, 0x000887ae, 0x0008470c, 0x00080512,
+    0x0007c1f6, 0x00077df9, 0x0007395a, 0x0006f45b,
+    0x0006af67, 0x00066abe, 0x000626b6, 0x0005e38f,
+    0x0005a1a0, 0x0005611e, 0x00052234, 0x0004e502,
+    0x0004a95d, 0x00046f46, 0x00043691, 0x0003ff33,
+    0x0003c90d, 0x0003941f, 0x00036047, 0x00032d9c,
+    0x0002fc1e, 0x0002cbed, 0x00029d1e, 0x00026fbc,
+    0x000243f2, 0x000219d6, 0x0001f17d, 0x0001caf1,
+    0x0001a63e, 0x00018363, 0x00016256, 0x00014316,
+    0x0001258f, 0x000109cb, 0x0000efaa, 0x0000d720,
+    0x0000c03a, 0x0000aacb, 0x000096de, 0x0000846a,
+    0x0000736d, 0x000063d3, 0x000055a6, 0x000048d0,
+    0x00003d47, 0x000032f6, 0x000029dc, 0x000021d9,
+    0x00001ae3, 0x000014ee, 0x00000fdb, 0x00000ba9,
+    0x00000839, 0x00000589, 0x00000370, 0x000001ee,
+    0x000000d7, 0x00000036, 0xffffffe0, 0xffffffc0,
+    0xffffffd5, 0xfffffff5, 0x0000000b, 0x0000000b,
+    0x0000000b, 0x0000000b, 0xfffffff5, 0xffffffd5,
+    0xffffffca, 0xffffffe0, 0x00000036, 0x000000d7,
+    0x000001ce, 0x0000033b, 0x00000529, 0x000007ad,
+    0x00000ac8, 0x00000e99, 0x00001316, 0x0000185e,
+    0x00001e7e, 0x00002575, 0x00002d4c, 0x0000361b,
+    0x00003fd6, 0x00004a93, 0x00005647, 0x00006312,
+    0x000070de, 0x00007fad, 0x00008f87, 0x0000a064,
+    0x0000b242, 0x0000c52d, 0x0000d919, 0x0000ee12,
+    0x0001040c, 0x00011b13, 0x0001331b, 0x00014c30,
+    0x0001663c, 0x0001814a, 0x00019d4f, 0x0001ba35,
+    0x0001d7e7, 0x0001f645, 0x00021544, 0x000234c3,
+    0x000254b9, 0x00027505, 0x000295a7, 0x0002b67e,
+    0x0002d7a1, 0x0002f904, 0x00031ab2, 0x00033ca0,
+    0x00035ee5, 0x0003818a, 0x0003a485, 0x0003c7e1,
+    0x0003eb72, 0x00040f0e, 0x0004329f, 0x000455e6,
+    0x000478c0, 0x00049aef, 0x0004bc52, 0x0004dca9,
+    0x0004fbde, 0x000519c5, 0x00053635, 0x0005512d,
+    0x00056aae, 0x000582a1, 0x00059927, 0x0005ae40,
+    0x0005c1f6, 0x0005d455, 0x0005e572, 0x0005f56d,
+    0x00060446, 0x0006121e, 0x00061f09, 0x00062b08,
+    0x00063605, 0x00063feb, 0x00064899, 0x00064ff0,
+    0x000655a5, 0x00065996, 0x00065b6f, 0x00065af8,
+    0x000657e9, 0x000651d4, 0x00064884, 0x00063bae,
+    0x00062b33, 0x00061706, 0x0005fefd, 0x0005e344,
+    0x0005c404, 0x0005a195, 0x00057c41, 0x00055473,
+    0x00052ac2, 0x0004ffc4, 0x0004d410, 0x0004a7e5,
+    0x00047b4f, 0x00044e39, 0x00042096, 0x0003f208,
+    0x0003c1e1, 0x00038f77, 0x00035a12, 0x00032127,
+    0x0002e476, 0x0002a389, 0x00025e29, 0x0002146d,
+    0x0001c700, 0x00017682, 0x000123a1, 0x0000cefd,
+    0x000078f7, 0x0000221a, 0xffffcad1, 0xffff7332,
+    0xffff1b1e, 0xfffec253, 0xfffe6891, 0xfffe0da2,
+    0xfffdb15c, 0xfffd5393, 0xfffcf412, 0xfffc92e3,
+    0xfffc3032, 0xfffbcc29, 0xfffb6714, 0xfffb0113,
+    0xfffa9a5b, 0xfffa3337, 0xfff9cbd4, 0xfff96450,
+    0xfff8fcac, 0xfff894dc, 0xfff82cd8, 0xfff7c4a8,
+    0xfff75c6d, 0xfff6f45e, 0xfff68c84, 0xfff62500,
+    0xfff5bde8, 0xfff5575a, 0xfff4f179, 0xfff48c64,
+    0xfff42810, 0xfff3c488, 0xfff361d7, 0xfff30008,
+    0xfff29f3a, 0xfff23f78, 0xfff1e0d8, 0xfff1835b,
+    0xfff1272a, 0xfff0cc46, 0xfff072cf, 0xfff01ad0,
+    0xffefc469, 0xffef6fa4, 0xffef1ca3, 0xffeecb7a,
+    0xffee7c1f, 0xffee2eb2, 0xffede33d, 0xffed99c1,
+    0xffed5249, 0xffed0cde, 0xffecc98d, 0xffec8849,
+    0xffec4934, 0xffec0c38, 0xffebd175, 0xffeb98eb,
+    0xffeb62a4, 0xffeb2ead, 0xffeafd19, 0xffeacdea,
+    0xffeaa129, 0xffea76cc, 0xffea4ef4, 0xffea299f,
+    0xffea06e5, 0xffe9e6ce, 0xffe9c97d, 0xffe9aebb,
+    0xffe99651, 0xffe97fd6, 0xffe96ad3, 0xffe95711,
+    0xffe9447d, 0xffe93315, 0xffe922ce, 0xffe913a0,
+    0xffe90588, 0xffe8f887, 0xffe8ec93, 0xffe8e1c1,
+    0xffe8d806, 0xffe8cf77, 0xffe8c816, 0xffe8c1eb,
+    0xffe8bd03, 0xffe8b967, 0xffe8b72e, 0xffe8b64d,
+    0xffe8b6d8, 0xffe8b8dc, 0xffe8bc6c, 0xffe8c18a,
+    0xffe8c840, 0xffe8d0a4, 0xffe8daca, 0xffe8e69e,
+    0xffe8f42a, 0xffe9035a, 0xffe9142b, 0xffe926a0,
+    0xffe93ab7, 0xffe95066, 0xffe967b8, 0xffe980ad,
+    0xffe99b3a, 0xffe9b754, 0xffe9d511, 0xffe9f45b,
+    0xffea1532, 0xffea3797, 0xffea5b89, 0xffea8108,
+    0xffeaa7ff, 0xffead079, 0xffeafa55, 0xffeb259e,
+    0xffeb5254, 0xffeb8061, 0xffebafdc, 0xffebe0ae,
+    0xffec12ce, 0xffec462f, 0xffec7add, 0xffecb0a3,
+    0xffece774, 0xffed1f32, 0xffed57a7, 0xffed90b2,
+    0xffedca48, 0xffee042a, 0xffee3e57, 0xffee788e,
+};
+
 const DECLARE_ALIGNED(32, float, ff_aac_eld_window_480)[1800] = {
      0.00101191,  0.00440397,  0.00718669,  0.01072130,
      0.01459757,  0.01875954,  0.02308987,  0.02751541,
@@ -2219,3 +2707,456 @@ const DECLARE_ALIGNED(32, float, ff_aac_eld_window_480)[1800] = {
     -0.00115988, -0.00114605, -0.00113200, -0.00111778,
     -0.00110343, -0.00108898, -0.00107448, -0.00105995,
 };
+
+const DECLARE_ALIGNED(32, int, ff_aac_eld_window_480_fixed)[1800] = {
+    0x00109442, 0x00482797, 0x0075bf2a, 0x00afa864,
+    0x00ef2aa5, 0x01335b36, 0x017a4df0, 0x01c2cffe,
+    0x020bfb4c, 0x0254fd74, 0x029d557c, 0x02e50574,
+    0x032c41a8, 0x03732c08, 0x03b9cb88, 0x040032e8,
+    0x044686f0, 0x048cd578, 0x04d30738, 0x05190500,
+    0x055ec210, 0x05a44750, 0x05e9aeb8, 0x062f0c80,
+    0x067477a0, 0x06ba1ac0, 0x07001998, 0x074680e0,
+    0x078d5ec0, 0x07d4d038, 0x081cf8f0, 0x0865f8b0,
+    0x08afe0e0, 0x08fab150, 0x09466cd0, 0x09931910,
+    0x09e0adb0, 0x0a2f1640, 0x0a7e43f0, 0x0ace2960,
+    0x0b1eb180, 0x0b6fc4b0, 0x0bc15050, 0x0c134710,
+    0x0c65a420, 0x0cb86340, 0x0d0b7df0, 0x0d5ef450,
+    0x0db2cb60, 0x0e070180, 0x0e5b91f0, 0x0eb07f20,
+    0x0f05d0a0, 0x0f5b8920, 0x0fb1a950, 0x10082e40,
+    0x105f1400, 0x10b65820, 0x110df780, 0x1165f120,
+    0x11be43e0, 0x1216eea0, 0x126feac0, 0x12c92b00,
+    0x1322a620, 0x137c55c0, 0x13d61ae0, 0x142fc940,
+    0x148949e0, 0x14e28da0, 0x153b9a80, 0x15947640,
+    0x15ed1840, 0x16458660, 0x169deb20, 0x16f663c0,
+    0x174ef8c0, 0x17a7a120, 0x180041c0, 0x1858d000,
+    0x18b14940, 0x1909a140, 0x1961c820, 0x19b9b620,
+    0x1a116480, 0x1a68c1a0, 0x1abfbd00, 0x1b164f60,
+    0x1b6c7580, 0x1bc23120, 0x1c1780e0, 0x1c6c5d00,
+    0x1cc0dbe0, 0x1d1532a0, 0x1d697660, 0x1dbdac20,
+    0x1e11b280, 0x1e655b80, 0x1eb89e80, 0x1f0b7720,
+    0x1f5dd680, 0x1fafaec0, 0x2000fb00, 0x2051c340,
+    0x20a22ac0, 0x20f24580, 0x214213c0, 0x21919140,
+    0x21e0b300, 0x222f7580, 0x227dd900, 0x22cbd880,
+    0x23196ec0, 0x23669b00, 0x23b35d80, 0x23ffb6c0,
+    0x244ba7c0, 0x249731c0, 0x24e25700, 0x252d1940,
+    0x2594ae40, 0x25deea40, 0x2628bd00, 0x26722680,
+    0x26bb2740, 0x2703bf40, 0x274beec0, 0x2793b600,
+    0x27db1500, 0x28220c00, 0x28689b80, 0x28aec4c0,
+    0x28f48800, 0x2939e680, 0x297ee080, 0x29c37600,
+    0x2a07a740, 0x2a4b74c0, 0x2a8ede80, 0x2ad1e500,
+    0x2b148880, 0x2b56c940, 0x2b98a740, 0x2bda2240,
+    0x2c1b3a80, 0x2c5bef80, 0x2c9c4100, 0x2cdc2e80,
+    0x2d1bb800, 0x2d5adc80, 0x2d999b80, 0x2dd7f500,
+    0x2e15e800, 0x2e537400, 0x2e9098c0, 0x2ecd5540,
+    0x2f09a900, 0x2f4592c0, 0x2f811140, 0x2fbc2340,
+    0x2ff6c7c0, 0x3030fe80, 0x306ac6c0, 0x30a41f80,
+    0x30dd07c0, 0x31157dc0, 0x314d7fc0, 0x31850c80,
+    0x31bc22c0, 0x31f2c1c0, 0x3228e840, 0x325e9540,
+    0x3293c7c0, 0x32c87e40, 0x32fcb800, 0x33307340,
+    0x3363aec0, 0x33966940, 0x33c8a140, 0x33fa5580,
+    0x342b84c0, 0x345c2dc0, 0x348c4f80, 0x34bbe900,
+    0x34eaf9c0, 0x35198080, 0x35477d00, 0x3574ee40,
+    0x35a1d340, 0x35ce2bc0, 0x35f9f6c0, 0x36253380,
+    0x364fe180, 0x367a0040, 0x36a38f80, 0x36cc8ec0,
+    0x36f4fe80, 0x371cde80, 0x37442e80, 0x376aef00,
+    0x37912000, 0x37b6c200, 0x37dbd600, 0x38005d00,
+    0x38245840, 0x3847c880, 0x386aaf80, 0x388d0e80,
+    0x38aee700, 0x38d03bc0, 0x38f11000, 0x39116700,
+    0x39314440, 0x3950ab00, 0x396f9e80, 0x398e22c0,
+    0x39ac3c40, 0x39c9f280, 0x39e74cc0, 0x3a045280,
+    0x3a210b40, 0x3a3d7ec0, 0x3a59b480, 0x3a75b480,
+    0x3a918900, 0x3aad3cc0, 0x3ac8db00, 0x3ae46bc0,
+    0x3afff080, 0x3b1b6840, 0x3b36d2c0, 0x3b521980,
+    0x3b6d0780, 0x3b876400, 0x3ba0f4c0, 0x3bb96740,
+    0x3bd03dc0, 0x3be56580, 0x3bf6dec0, 0x3c0c6140,
+    0x3c15a9c0, 0x3c1a5780, 0x3c1fd0c0, 0x3c25edc0,
+    0x3c2c78c0, 0x3c333880, 0x3c39f3c0, 0x3c409100,
+    0x3c471d00, 0x3c4da780, 0x3c543f40, 0x3c5ae880,
+    0x3c619f00, 0x3c685f00, 0x3c6f25c0, 0x3c75f280,
+    0x3c7cc6c0, 0x3c83a2c0, 0x3c8a87c0, 0x3c9175c0,
+    0x3c986d00, 0x3c9f6e00, 0x3ca67880, 0x3cad8c40,
+    0x3cb4a980, 0x3cbbd000, 0x3cc2ffc0, 0x3cca3940,
+    0x3cd17d40, 0x3cd8cb80, 0x3ce02480, 0x3ce78740,
+    0x3ceef3c0, 0x3cf66a00, 0x3cfdea00, 0x3d0574c0,
+    0x3d0d0a40, 0x3d14ab40, 0x3d1c5700, 0x3d240d00,
+    0x3d2bcd40, 0x3d3397c0, 0x3d3b6cc0, 0x3d434d00,
+    0x3d4b38c0, 0x3d532fc0, 0x3d5b3180, 0x3d633dc0,
+    0x3d6b53c0, 0x3d737400, 0x3d7b9f00, 0x3d83d540,
+    0x3d8c1680, 0x3d946200, 0x3d9cb780, 0x3da51680,
+    0x3dad7f00, 0x3db5f140, 0x3dbe6dc0, 0x3dc6f480,
+    0x3dcf8540, 0x3dd81fc0, 0x3de0c300, 0x3de96ec0,
+    0x3df22340, 0x3dfae0c0, 0x3e03a800, 0x3e0c7840,
+    0x3e155180, 0x3e1e32c0, 0x3e271bc0, 0x3e300c00,
+    0x3e390400, 0x3e420400, 0x3e4b0c40, 0x3e541c80,
+    0x3e5d33c0, 0x3e6651c0, 0x3e6f7580, 0x3e789fc0,
+    0x3e81d080, 0x3e8b0880, 0x3e944700, 0x3e9d8c00,
+    0x3ea6d680, 0x3eb02600, 0x3eb97a80, 0x3ec2d400,
+    0x3ecc3340, 0x3ed59880, 0x3edf0300, 0x3ee87280,
+    0x3ef1e600, 0x3efb5d40, 0x3f04d880, 0x3f0e5840,
+    0x3f17dcc0, 0x3f216600, 0x3f2af340, 0x3f348440,
+    0x3f3e1840, 0x3f47af40, 0x3f514a00, 0x3f5ae840,
+    0x3f648b00, 0x3f6e3140, 0x3f77db00, 0x3f818740,
+    0x3f8b3600, 0x3f94e780, 0x3f9e9c40, 0x3fa85480,
+    0x3fb21080, 0x3fbbcfc0, 0x3fc59200, 0x3fcf56c0,
+    0x3fd91dc0, 0x3fe2e640, 0x3fecb040, 0x3ff67b40,
+    0x40098600, 0x40135580, 0x401d2700, 0x4026fa00,
+    0x4030ce80, 0x403aa380, 0x40447900, 0x404e4f00,
+    0x40582400, 0x4061f900, 0x406bcd00, 0x4075a080,
+    0x407f7480, 0x40894900, 0x40931e00, 0x409cf280,
+    0x40a6c600, 0x40b09800, 0x40ba6980, 0x40c43a80,
+    0x40ce0b00, 0x40d7db00, 0x40e1ab00, 0x40eb7980,
+    0x40f54600, 0x40ff1080, 0x4108d980, 0x4112a100,
+    0x411c6800, 0x41262d80, 0x412ff080, 0x4139b180,
+    0x41436e80, 0x414d2980, 0x4156e100, 0x41609700,
+    0x416a4a80, 0x4173fb00, 0x417da800, 0x41875000,
+    0x4190f400, 0x419a9400, 0x41a43000, 0x41adc880,
+    0x41b75d00, 0x41c0ec80, 0x41ca7700, 0x41d3fb00,
+    0x41dd7980, 0x41e6f280, 0x41f06600, 0x41f9d480,
+    0x42033d00, 0x420c9f00, 0x4215f980, 0x421f4d00,
+    0x42289900, 0x4231de80, 0x423b1d00, 0x42445500,
+    0x424d8500, 0x4256ad00, 0x425fcc80, 0x4268e380,
+    0x4271f200, 0x427af900, 0x4283f880, 0x428cef80,
+    0x4295de00, 0x429ec280, 0x42a79d80, 0x42b06f00,
+    0x42b93800, 0x42c1f800, 0x42caaf80, 0x42d35d80,
+    0x42dc0100, 0x42e49b00, 0x42ed2a80, 0x42f5b080,
+    0x42fe2d80, 0x4306a180, 0x430f0c80, 0x43176d80,
+    0x431fc480, 0x43281100, 0x43305400, 0x43388e80,
+    0x4340c000, 0x4348e900, 0x43510900, 0x43591f00,
+    0x43612b80, 0x43692f00, 0x43712900, 0x43791a80,
+    0x43810380, 0x4388e400, 0x4390bc00, 0x43988b00,
+    0x43a05180, 0x43a80f00, 0x43afc480, 0x43b77180,
+    0x43bf1780, 0x43c6b700, 0x43ce5100, 0x43d5e580,
+    0x43dd7100, 0x43e4ef80, 0x43ec5b80, 0x43f3ba80,
+    0x43fb1c80, 0x44029400, 0x440a2e80, 0x4411d080,
+    0x44193800, 0x44202480, 0x44265880, 0x442ba780,
+    0x442d8680, 0x4428a500, 0x44241380, 0x441ccb00,
+    0x44140100, 0x440a1200, 0x43ff7280, 0x43f46980,
+    0x43e93200, 0x43ddff00, 0x43d2dc80, 0x43c7ac00,
+    0x43bc4900, 0x43b09400, 0x43a47d80, 0x4397fd80,
+    0x438b0780, 0x437d9b80, 0x436fd380, 0x4361cd80,
+    0x4353a800, 0x43457500, 0x43373c80, 0x43290500,
+    0x431ad400, 0x430ca280, 0x42fe6000, 0x42f00080,
+    0x42e17380, 0x42d29e00, 0x42c35d80, 0x42b39200,
+    0x42a32080, 0x4291fc00, 0x42801900, 0x426d6d80,
+    0x4259f680, 0x4245bd00, 0x4230ca80, 0x421b2900,
+    0x4204e800, 0x41ee1d00, 0x41d6dd80, 0x41bf3c80,
+    0x41a74680, 0x418f0680, 0x41768800, 0x415dd100,
+    0x4144e400, 0x412bbf80, 0x41126400, 0x40f8cc00,
+    0x40deea00, 0x40c4b100, 0x40aa1400, 0x408f0800,
+    0x40738380, 0x40577d80, 0x403aeb80, 0x401dc180,
+    0x3ffff240, 0x3fe170c0, 0x3fc232c0, 0x3fa23680,
+    0x3f817c40, 0x3f6002c0, 0x3f3ddec0, 0x3f1b4180,
+    0x3ef85d40, 0x3ed56340, 0x3eb27240, 0x3e8f9c40,
+    0x3e6cf400, 0x3e4a81c0, 0x3e282140, 0x3e059980,
+    0x3de2b280, 0x3dbf4100, 0x3d9b3640, 0x3d768b00,
+    0x3d513640, 0x3d2b3840, 0x3d049b80, 0x3cdd6b40,
+    0x3cb5b400, 0x3c8d8f40, 0x3c652080, 0x3c3c8c40,
+    0x3c13f480, 0x3beb7580, 0x3bc327c0, 0x3b9b2680,
+    0x3b737000, 0x3b4bc580, 0x3b23d740, 0x3afb5640,
+    0x3ad21c40, 0x3aa83780, 0x3a7dbc40, 0x3a52bf80,
+    0x3a276600, 0x39fbe0c0, 0x39d06140, 0x39a50ec0,
+    0x3979e300, 0x394ebf40, 0x392386c0, 0x38f82280,
+    0x38cc89c0, 0x38a0b7c0, 0x3874a740, 0x38485840,
+    0x381bd1c0, 0x37ef1b40, 0x37c23cc0, 0x37953dc0,
+    0x376825c0, 0x373afc80, 0x370dc980, 0x36e09440,
+    0x36b41dc0, 0x36862100, 0x3657e480, 0x36297240,
+    0x35fad380, 0x35cc1200, 0x359d36c0, 0x356e4b40,
+    0x353f5880, 0x35106780, 0x34e17780, 0x34b28240,
+    0x34838040, 0x345466c0, 0x34251940, 0x33f57280,
+    0x33c54bc0, 0x33949840, 0x33638380, 0x33324980,
+    0x33012500, 0x32d04480, 0x329fc7c0, 0x326fcbc0,
+    0x324068c0, 0x32116fc0, 0x31e27600, 0x31b30fc0,
+    0x3182e300, 0x3151e240, 0x312029c0, 0x30edd080,
+    0x30baf700, 0x3087cd00, 0x30548600, 0x30215680,
+    0x2fee65c0, 0x2fbbca40, 0x2f899980, 0x2f57e6c0,
+    0x2f26b540, 0x2ef5f980, 0x2ec5aa00, 0x2e95afc0,
+    0x2e65c180, 0x2e357b40, 0x2e047840, 0x2dd27380,
+    0x2d9f6c40, 0x2d6b7780, 0x2d36a6c0, 0x2d012940,
+    0x2ccb5680, 0x2c958a00, 0x2c601b80, 0x2c2b3640,
+    0x2bf6dfc0, 0x2bc31ec0, 0x2b8ff500, 0x2b5d5540,
+    0x2b2b2a00, 0x2af95e80, 0x2ac7dd80, 0x2a968f80,
+    0x2a655d40, 0x2a342f00, 0x2a02e8c0, 0x29d16700,
+    0x299f8640, 0x296d2380, 0x293a2740, 0x29068400,
+    0x28d22b40, 0x289d1540, 0x28675280, 0x28310180,
+    0x27fa3f00, 0x27c32f80, 0x278c08c0, 0x275505c0,
+    0x271e60c0, 0x26e84b00, 0x26b2e880, 0x267e5cc0,
+    0x264ac940, 0x26183a40, 0x25e6aa80, 0x25b615c0,
+    0x25866b80, 0x25576b40, 0x2528ba00, 0x24f9ffc0,
+    0x24cadfc0, 0x249af540, 0x2469da80, 0x24372780,
+    0x2402b800, 0x23ccbfc0, 0x23957cc0, 0x235d3140,
+    0x23245200, 0x22eb8000, 0x22b35cc0, 0x227c7940,
+    0x22471d40, 0x22136840, 0x21e18240, 0x21b15d80,
+    0x21827dc0, 0x21544600, 0x21261b00, 0x20f78600,
+    0x20c83e00, 0x20980000, 0x20668e00, 0x2033f300,
+    0x20007400, 0x1fcc64e0, 0x1f97d120, 0x1f642320,
+    0x1f2f49e0, 0x1efaa840, 0x1ec73580, 0x1e94d880,
+    0x1e636120, 0x1e32a160, 0x1e025ba0, 0x1dd24300,
+    0x1da20e60, 0x1d717940, 0x1d407560, 0x1d0f2040,
+    0x1cdd95c0, 0x1cabf500, 0x1c7a6940, 0x1c492340,
+    0x1c185680, 0x1be818c0, 0x1bb83f60, 0x1b888d20,
+    0x1b58c640, 0x1b28c240, 0x1af871e0, 0x1ac7c960,
+    0x1a96bf00, 0x1a656b60, 0x1a340360, 0x1a02bd20,
+    0x19d1c6c0, 0x19a12f40, 0x1970f480, 0x19411640,
+    0x19119000, 0x18e255a0, 0x18b358a0, 0x18848b20,
+    0x1855e040, 0x18274e00, 0x17f8c9e0, 0x17ca4a80,
+    0x179bce40, 0x176d5a60, 0x173ef400, 0x17109fe0,
+    0x16e25f60, 0x16b43240, 0x16861880, 0x16581220,
+    0x162a20c0, 0x15fc4620, 0x15ce8420, 0x15a0dca0,
+    0x157351c0, 0x1545e580, 0x151899a0, 0x14eb6ec0,
+    0x14be63a0, 0x14917a00, 0x14649ae0, 0x14377060,
+    0x1409d0c0, 0x13dbbb20, 0x13ad58e0, 0x137f0160,
+    0x1350cc80, 0x1322b8c0, 0x12f4ca60, 0x12c704e0,
+    0x129968a0, 0x126bf5c0, 0x123eade0, 0x12119300,
+    0x11e4a660, 0x11b7e860, 0x118b5940, 0x115ef8a0,
+    0x1132c600, 0x1106c1a0, 0x10daecc0, 0x10af4900,
+    0x1083d7a0, 0x10589c00, 0x102d9a00, 0x1002d1e0,
+    0x0fd842c0, 0x0fadde80, 0x0f839a50, 0x0f597700,
+    0x0f2f76e0, 0x0f05a170, 0x0edbf9c0, 0x0eb27f30,
+    0x0e8930d0, 0x0e600d70, 0x0e371550, 0x0e0e4950,
+    0x0de5ab50, 0x0dbd3d20, 0x0d94fe10, 0x0d6cecb0,
+    0x0d450220, 0x0d1d38f0, 0x0cf59130, 0x0cce0c30,
+    0x0ca6af10, 0x0c7f7b80, 0x0c587010, 0x0c318960,
+    0x0c0ac200, 0x0be418d0, 0x0bbd8da0, 0x0b9724e0,
+    0x0b70e6c0, 0x0b4ad970, 0x0b2502f0, 0x0aff6930,
+    0x0ada1250, 0x0ab50430, 0x0a9044d0, 0x0a6bda30,
+    0x0a3bedf0, 0x0a18be40, 0x09f5e530, 0x09d35cf0,
+    0x09b11ff0, 0x098f2890, 0x096d7120, 0x094bf400,
+    0x092aab80, 0x09099240, 0x08e8a620, 0x08c7e850,
+    0x08a75990, 0x0886fae0, 0x0866ccf0, 0x0846d070,
+    0x08270610, 0x08076e70, 0x07e80ac8, 0x07c8dc60,
+    0x07a9e440, 0x078b2348, 0x076c99d0, 0x074e4818,
+    0x07302e50, 0x07124d18, 0x06f4a530, 0x06d73778,
+    0x06ba0488, 0x069d0c88, 0x06804f68, 0x0663cce0,
+    0x06478528, 0x062b78a0, 0x060fa7e8, 0x05f413b8,
+    0x05d8bc38, 0x05bda128, 0x05a2c258, 0x05881f60,
+    0x056db888, 0x05538e60, 0x0539a170, 0x051ff218,
+    0x05068040, 0x04ed4b90, 0x04d45398, 0x04bb9820,
+    0x04a31988, 0x048ad860, 0x0472d528, 0x045b0ff0,
+    0x04438860, 0x042c3de8, 0x04153040, 0x03fe5f4c,
+    0x03e7cb98, 0x03d17580, 0x03bb5d64, 0x03a582e8,
+    0x038fe588, 0x037a8494, 0x03655fcc, 0x03507768,
+    0x033bcbb4, 0x03275d28, 0x03132bc0, 0x02ff370c,
+    0x02eb7e94, 0x02d801e8, 0x02c4c11c, 0x02b1bcbc,
+    0x029ef578, 0x028c6ba8, 0x027a1f20, 0x02680f54,
+    0x02563bac, 0x0244a3c8, 0x023347a0, 0x02222730,
+    0x0211429c, 0x02009938, 0x01f02974, 0x01dff1ae,
+    0x01cff058, 0x01c024c8, 0x01b08ef4, 0x01a12eda,
+    0x019204b0, 0x01831138, 0x01745588, 0x0165d2c2,
+    0x01578a96, 0x01497ffc, 0x013bb670, 0x012e3160,
+    0x0120f146, 0x0113f27c, 0x0107310c, 0x00faa909,
+    0x00ee57a1, 0x00e23b09, 0x00d6515b, 0x00ca9977,
+    0x00bf1509, 0x00b3c74d, 0x00a8b388, 0x009ddb3d,
+    0x00933bf2, 0x0088d22c, 0x007e9a70, 0x0074935a,
+    0x006abe70, 0x00611d5c, 0x0057b1f8, 0x004e7e73,
+    0x0045859b, 0x003cca96, 0x00344f32, 0x002c1074,
+    0x00240873, 0x001c31ba, 0x0014863f, 0x000cfe8b,
+    0x00059307, 0xfffe3b9a, 0xfff6f718, 0xffefcd4d,
+    0xffe8c6f4, 0xffe1ed10, 0xffdb4c57, 0xffd4f484,
+    0xffcef5dc, 0xffc95d0c, 0xffc4284e, 0xffbf4e14,
+    0xffbac5ae, 0xffb68360, 0xffb27548, 0xffae87be,
+    0xffaaa733, 0xffa6c67e, 0xffa2e141, 0xff9ef40c,
+    0xff9afc25, 0xff970058, 0xff930f7c, 0xff8f3857,
+    0xff8b8900, 0xff880bfe, 0xff84c9ea, 0xff81cbbd,
+    0xff7f17ad, 0xff7cadc6, 0xff7a8c4e, 0xff78b1cd,
+    0xff7719f3, 0xff75bd06, 0xff7492a4, 0xff7392bf,
+    0xff72b600, 0xff71f5c6, 0xff714b72, 0xff70b0ed,
+    0xff702232, 0xff6f9c90, 0xff6f1cee, 0xff6ea21f,
+    0xff6e2e9c, 0xff6dc617, 0xff6d6c09, 0xff6d2425,
+    0xff6cf267, 0xff6cdaca, 0xff6ce155, 0xff6d0983,
+    0xff6d56bb, 0xff6dcc4c, 0xff6e6cd0, 0xff6f3832,
+    0xff702cc4, 0xff71492e, 0xff728ae2, 0xff73ed63,
+    0xff756b7c, 0xff77001c, 0xff78a5d9, 0xff7a5693,
+    0xff7c0c40, 0xff7dc141, 0xff7f74aa, 0xff81298b,
+    0xff82e2de, 0xff84a3de, 0xff8670bd, 0xff884e42,
+    0xff8a410c, 0xff8c4c7f, 0xff8e70fc, 0xff90ae18,
+    0xff93037e, 0xff956f12, 0xff97ec86, 0xff9a7724,
+    0xff9d0a9d, 0xff9fa3ea, 0xffa2417e, 0xffa4e1ac,
+    0xffa78332, 0xffaa265a, 0xffaccc26, 0xffaf758e,
+    0xffb223d4, 0xffb4d906, 0xffb79726, 0xffba604e,
+    0xffbd349e, 0xffc011a8, 0xffc2f4d2, 0xffc5db82,
+    0xffc8c45f, 0xffcbaed5, 0xffce9a6d, 0xffd186c6,
+    0xffd473aa, 0xffd760e5, 0xffda4e55, 0xffdd3bd0,
+    0xffe0292b, 0xffe31645, 0xffe602ff, 0xffe8eef7,
+    0xffebd978, 0xffeec1bf, 0xfff1a72c, 0xfff488fe,
+    0xfff76689, 0xfffa3f2c, 0xfffd1245, 0xffffdf33,
+    0x000020ac, 0x0002e66f, 0x0005a937, 0x00086839,
+    0x000b22b3, 0x000dd7da, 0x001086ec, 0x00132f3c,
+    0x0015d001, 0x00186897, 0x001af849, 0x001d7eb6,
+    0x001ffbbe, 0x00226f41, 0x0024d8e8, 0x00273874,
+    0x00298d82, 0x002bd7aa, 0x002e16d4, 0x00304af6,
+    0x00327406, 0x00349203, 0x0036a416, 0x0038a893,
+    0x003a9da0, 0x003c8170, 0x003e53b8, 0x0040159a,
+    0x0041c816, 0x00436c92, 0x0045042c, 0x00468ff2,
+    0x00481106, 0x004987fe, 0x004af466, 0x004c5599,
+    0x004daae4, 0x004ef28c, 0x005029c4, 0x00514d9a,
+    0x00525b57, 0x005351f7, 0x00543190, 0x0054fa43,
+    0x0055ac2f, 0x00564938, 0x0056d3f7, 0x00574f3c,
+    0x0057bdd7, 0x00582260, 0x00587f28, 0x0058d6b1,
+    0x0059293c, 0x0059741a, 0x0059b472, 0x0059e73c,
+    0x005a0976, 0x005a1870, 0x005a116e, 0x0059f224,
+    0x0059b964, 0x005966ce, 0x0058f9e2, 0x005872e8,
+    0x0057d407, 0x00571f82, 0x005657b0, 0x00557ecd,
+    0x00549731, 0x0053a34b, 0x0052a56a, 0x00519fc6,
+    0x00509482, 0x004f85a4, 0x004e74ee, 0x004d6214,
+    0x004c4bd3, 0x004b314c, 0x004a1110, 0x0048e8c8,
+    0x0047b5f7, 0x00467626, 0x00452690, 0x0043c405,
+    0x00424b7f, 0x0040ba04, 0x003f0e53, 0x003d488b,
+    0x003b688c, 0x00396eb6, 0x00375dfb, 0x00353aaa,
+    0x003308ac, 0x0030ccb1, 0x002e8cf1, 0x002c4fd5,
+    0x002a1be8, 0x0027f486, 0x0025d90d, 0x0023c852,
+    0x0021c13b, 0x001fbf23, 0x001dbafc, 0x001badc6,
+    0x00199136, 0x00176150, 0x00151b86, 0x0012bcd1,
+    0x001044d1, 0x000db8d0, 0x000b1f43, 0x00087e89,
+    0x0005dbe2, 0x00033b1e, 0x00009fee, 0xfffe0d82,
+    0xfffb83cf, 0xfff90047, 0xfff6805a, 0xfff4019a,
+    0xfff18203, 0xffeeffb2, 0xffec78ba, 0xffe9ec4d,
+    0xffe75b4e, 0xffe4c71f, 0xffe23138, 0xffdf9ae6,
+    0xffdd0574, 0xffda723c, 0xffd7e24a, 0xffd55567,
+    0xffd2cabe, 0xffd04161, 0xffcdb890, 0xffcb306a,
+    0xffc8a95c, 0xffc62406, 0xffc3a140, 0xffc12188,
+    0xffbea542, 0xffbc2cc2, 0xffb9b7d2, 0xffb745f2,
+    0xffb4d6ac, 0xffb268fe, 0xffaffc72, 0xffad90e8,
+    0xffab263e, 0xffa8bcb8, 0xffa6547e, 0xffa3ed7b,
+    0xffa187ba, 0xff9f2351, 0xff9cc055, 0xff9a5ebc,
+    0xff97fe84, 0xff959f84, 0xff934146, 0xff90e37d,
+    0xff8e858a, 0xff8c26c0, 0xff89c69e, 0xff876483,
+    0xff84ffe4, 0xff82982b, 0xff802cb6, 0xff7dbccf,
+    0xff7b47b4, 0xff78ccd0, 0xff764b6c, 0xff73c2db,
+    0xff713227, 0xff6e9864, 0xff6bf470, 0xff694553,
+    0xff668a0d, 0xff63c1a6, 0xff60ec34, 0xff5e0e9e,
+    0xff5b30d3, 0xff585b8c, 0xff5595c9, 0xff52e1da,
+    0xff5040a0, 0xff4db31c, 0xff4b3a3b, 0xff48d67e,
+    0xff468850, 0xff445011, 0xff422ded, 0xff4021f9,
+    0xff3e2c56, 0xff3c4cf8, 0xff3a83df, 0xff38d0ec,
+    0xff3733c9, 0xff35ac14, 0xff343963, 0xff32db09,
+    0xff319066, 0xff305898, 0xff2f323d, 0xff2e1bb2,
+    0xff2d1369, 0xff2c18f8, 0xff2b2d2a, 0xff2a50e1,
+    0xff2984f4, 0xff28c978, 0xff281e01, 0xff278245,
+    0xff26f5c3, 0xff26785a, 0xff2609bf, 0xff25a9c8,
+    0xff255814, 0xff2513f6, 0xff24dcc4, 0xff24b1a6,
+    0xff2492b1, 0xff248093, 0xff247c0b, 0xff2485c6,
+    0xff249daf, 0xff24c359, 0xff24f639, 0xff253605,
+    0xff258312, 0xff25ddd5, 0xff2646e7, 0xff26be25,
+    0xff274264, 0xff27d1f6, 0xff286b19, 0xff290c13,
+    0xff29b30d, 0xff2a5e38, 0xff2b0bbd, 0xff2bb9a2,
+    0xff29a9d2, 0xff2a53dc, 0xff2b0a5a, 0xff2bcd43,
+    0xff2c9c76, 0xff2d7808, 0xff2e5ffa, 0xff2f544c,
+    0xff305528, 0xff316299, 0xff327ce0, 0xff33a432,
+    0xff34d8ba, 0xff361a8e, 0xff3768f8, 0xff38c2f5,
+    0xff3a2784, 0xff3b9623, 0xff3d0ef4, 0xff3e9277,
+    0xff4020ed, 0xff41ba14, 0xff435ccc, 0xff4507fd,
+    0xff46ba84, 0xff4873ac, 0xff4a32ea, 0xff4bf7bb,
+    0xff4dc17f, 0xff4f8fa0, 0xff516167, 0xff53361d,
+    0xff550d79, 0xff56e7ee, 0xff58c5ff, 0xff5aa84d,
+    0xff5c8e41, 0xff5e75e2, 0xff605d4d, 0xff6242b6,
+    0xff6424b8, 0xff66023d, 0xff67da44, 0xff69abd6,
+    0xff6b7646, 0xff6d38e8, 0xff6ef348, 0xff70a4ce,
+    0xff724d0f, 0xff73eb95, 0xff757fff, 0xff770a2d,
+    0xff788a20, 0xff79fff6, 0xff7b6be7, 0xff7cce52,
+    0xff7e27e4, 0xff7f78fc, 0xff80c38a, 0xff820e98,
+    0xff836378, 0xff84caaa, 0xff864990, 0xff87dff4,
+    0xff898c30, 0xff8b4cda, 0xff8d207a, 0xff8f05cc,
+    0xff90fb9b, 0xff930098, 0xff95138e, 0xff97332d,
+    0xff995e2a, 0xff9b934e, 0xff9dd18c, 0xffa017e3,
+    0xffa26550, 0xffa4b8e7, 0xffa711a8, 0xffa96eae,
+    0xffabcefc, 0xffae31cc, 0xffb09680, 0xffb2fc82,
+    0xffb5635a, 0xffb7ca52, 0xffba30a8, 0xffbc95a8,
+    0xffbef8a4, 0xffc158d0, 0xffc3b557, 0xffc60d6b,
+    0xffc86041, 0xffcaacb7, 0xffccf1cb, 0xffcf2e5c,
+    0xffd161e8, 0xffd38c8f, 0xffd5ae88, 0xffd7c808,
+    0xffd9d925, 0xffdbe1c8, 0xffdde1f3, 0xffdfd964,
+    0xffe1c79b, 0xffe3abcc, 0xffe5852a, 0xffe75341,
+    0xffe9162f, 0xffeace55, 0xffec7c15, 0xffee1f63,
+    0xffefb7e9, 0xfff1453d, 0xfff2c6fd, 0xfff43ca8,
+    0xfff5a5d4, 0xfff701ea, 0xfff850b4, 0xfff99288,
+    0xfffac853, 0xfffbf2d5, 0xfffd12e6, 0xfffe2991,
+    0xffff37e4, 0x00003eea, 0x00013ec4, 0x00023646,
+    0x0003244d, 0x00040797, 0x0004de8c, 0x0005a734,
+    0x00065fab, 0x0007068f, 0x00079c82, 0x000822fa,
+    0x00089b70, 0x000907a6, 0x00096a01, 0x0009c506,
+    0x000a1b37, 0x000a6e18, 0x000abe1f, 0x000b0bac,
+    0x000b5701, 0x000b9f3b, 0x000be2c2, 0x000c1fff,
+    0x000c5599, 0x000c829a, 0x000ca661, 0x000cc058,
+    0x000cd028, 0x000cd63d, 0x000cd317, 0x000cc739,
+    0x000cb36d, 0x000c98c0, 0x000c7833, 0x000c52df,
+    0x000c2984, 0x000bfcf9, 0x000bcdea, 0x000b9cf7,
+    0x000b6a97, 0x000b3700, 0x000b029d, 0x000acd79,
+    0x000a977e, 0x000a6076, 0x000a2838, 0x0009eea1,
+    0x0009b37d, 0x000976c2, 0x0009384e, 0x0008f816,
+    0x0008b612, 0x0008724a, 0x00082cd5, 0x0007e5e8,
+    0x00079dce, 0x000754de, 0x00070b62, 0x0006c1c6,
+    0x0006786a, 0x00062fba, 0x0005e801, 0x0005a1a0,
+    0x00055ce1, 0x000519fb, 0x0004d8f8, 0x000499b8,
+    0x00045c30, 0x00042040, 0x0003e5c8, 0x0003acb3,
+    0x000374df, 0x00033e59, 0x00030934, 0x0002d57d,
+    0x0002a348, 0x000272b6, 0x000243f2, 0x00021711,
+    0x0001ec3e, 0x0001c37a, 0x00019cc3, 0x00017830,
+    0x000155a0, 0x00013514, 0x0001168b, 0x0000f9e6,
+    0x0000df23, 0x0000c62e, 0x0000aef2, 0x00009978,
+    0x000085a1, 0x0000736d, 0x000062dc, 0x000053d8,
+    0x0000466c, 0x00003a62, 0x00002fd1, 0x00002681,
+    0x00001e73, 0x00001792, 0x000011c9, 0x00000cf6,
+    0x0000091a, 0x000005ff, 0x000003b1, 0x00000203,
+    0x000000d7, 0x0000002b, 0xffffffd5, 0xffffffc0,
+    0xffffffd5, 0x00000000, 0x00000015, 0x00000000,
+    0x00000000, 0x00000015, 0x00000000, 0xffffffd5,
+    0xffffffca, 0xffffffd5, 0x0000002b, 0x000000cc,
+    0x000001e3, 0x0000037b, 0x0000059f, 0x0000086e,
+    0x00000bf4, 0x0000103b, 0x00001564, 0x00001b6e,
+    0x0000226f, 0x00002a68, 0x00003377, 0x00003d93,
+    0x000048c5, 0x00005525, 0x000062a6, 0x00007155,
+    0x0000812f, 0x00009237, 0x0000a455, 0x0000b7ab,
+    0x0000cc18, 0x0000e1bd, 0x0000f878, 0x0001106c,
+    0x00012981, 0x000143c2, 0x00015f30, 0x00017bb6,
+    0x00019948, 0x0001b7e6, 0x0001d771, 0x0001f7bc,
+    0x000218b4, 0x00023a42, 0x00025c3b, 0x00027ea0,
+    0x0002a150, 0x0002c440, 0x0002e771, 0x00030aed,
+    0x00032eb4, 0x000352db, 0x00037759, 0x00039c4c,
+    0x0003c1ac, 0x0003e74b, 0x00040d00, 0x0004329f,
+    0x000457de, 0x00047c9c, 0x0004a083, 0x0004c35e,
+    0x0004e502, 0x00050543, 0x000523ec, 0x000540e7,
+    0x00055c2b, 0x000575c0, 0x00058da9, 0x0005a3e4,
+    0x0005b886, 0x0005cbb1, 0x0005dd65, 0x0005edcb,
+    0x0005fcfa, 0x00060afc, 0x00061808, 0x000623fc,
+    0x00062ec3, 0x00063849, 0x0006404b, 0x000646ac,
+    0x00064b13, 0x00064d37, 0x00064cd6, 0x0006497b,
+    0x000642c5, 0x0006385e, 0x000629f0, 0x00061766,
+    0x000600a0, 0x0005e57d, 0x0005c63e, 0x0005a322,
+    0x00057c97, 0x00055306, 0x00052711, 0x0004f96f,
+    0x0004caeb, 0x00049bfc, 0x00046c96, 0x00043cbb,
+    0x00040c3f, 0x0003daab, 0x0003a734, 0x000370f9,
+    0x0003372d, 0x0002f944, 0x0002b6d4, 0x00026f71,
+    0x000222fb, 0x0001d212, 0x00017d84, 0x00012630,
+    0x0000ccda, 0x00007200, 0x0000163b, 0xffffba15,
+    0xffff5da3, 0xffff0091, 0xfffea293, 0xfffe4367,
+    0xfffde2da, 0xfffd809f, 0xfffd1c81, 0xfffcb66a,
+    0xfffc4e90, 0xfffbe53e, 0xfffb7aa0, 0xfffb0f0a,
+    0xfffaa2c9, 0xfffa3612, 0xfff9c92f, 0xfff95c2d,
+    0xfff8eef4, 0xfff8817c, 0xfff813c3, 0xfff7a5d4,
+    0xfff737e5, 0xfff6ca17, 0xfff65c9e, 0xfff5efbc,
+    0xfff58390, 0xfff51830, 0xfff4adbc, 0xfff44435,
+    0xfff3db9a, 0xfff373d6, 0xfff30cfd, 0xfff2a71c,
+    0xfff24248, 0xfff1de9f, 0xfff17c44, 0xfff11b56,
+    0xfff0bbea, 0xfff05e17, 0xfff00206, 0xffefa7d9,
+    0xffef4f99, 0xffeef95d, 0xffeea53a, 0xffee533a,
+    0xffee035e, 0xffedb5b0, 0xffed6a3c, 0xffed20f5,
+    0xffecd9fe, 0xffec9555, 0xffec5305, 0xffec1319,
+    0xffebd591, 0xffeb9a83, 0xffeb61f9, 0xffeb2bfe,
+    0xffeaf89c, 0xffeac7ea, 0xffea99d2, 0xffea6e7e,
+    0xffea45ef, 0xffea203a, 0xffe9fda0, 0xffe9decc,
+    0xffe9c3de, 0xffe9ac56, 0xffe99789, 0xffe9845e,
+    0xffe97295, 0xffe96219, 0xffe952ea, 0xffe944f3,
+    0xffe93833, 0xffe92c9f, 0xffe92238, 0xffe918fe,
+    0xffe910fb, 0xffe90a3a, 0xffe904c6, 0xffe900a0,
+    0xffe8fddb, 0xffe8fc83, 0xffe8fca4, 0xffe8fe3c,
+    0xffe9016c, 0xffe9061e, 0xffe90c74, 0xffe9146c,
+    0xffe91e11, 0xffe929a5, 0xffe93731, 0xffe946c0,
+    0xffe95833, 0xffe96b7e, 0xffe98082, 0xffe9975e,
+    0xffe9affd, 0xffe9ca5e, 0xffe9e68e, 0xffea0481,
+    0xffea242b, 0xffea458e, 0xffea6894, 0xffea8d52,
+    0xffeab3c8, 0xffeadc0c, 0xffeb05fe, 0xffeb31a7,
+    0xffeb5ede, 0xffeb8da2, 0xffebbdf4, 0xffebefbd,
+    0xffec231f, 0xffec5802, 0xffec8e5e, 0xffecc61c,
+    0xffecff1c, 0xffed391e, 0xffed740c, 0xffedafb1,
+    0xffedebe1, 0xffee287d, 0xffee654e, 0xffeea23f,
+};
diff --git a/libavcodec/aactab.h b/libavcodec/aactab.h
index 9f5a7e47..b48e7da6 100644
--- a/libavcodec/aactab.h
+++ b/libavcodec/aactab.h
@@ -32,22 +32,117 @@
 
 #include "libavutil/mem.h"
 #include "aac.h"
-#include "aac_tablegen_decl.h"
 
 #include <stdint.h>
 
 /* NOTE:
- * Tables in this file are used by the AAC decoder and will be used by the AAC
- * encoder.
+ * Tables in this file are shared by the AAC decoders and encoder
  */
 
+extern float ff_aac_pow2sf_tab[428];
+extern float ff_aac_pow34sf_tab[428];
+
+static inline void ff_aac_tableinit(void)
+{
+    int i;
+
+    /* 2^(i/16) for 0 <= i <= 15 */
+    static const float exp2_lut[] = {
+        1.00000000000000000000,
+        1.04427378242741384032,
+        1.09050773266525765921,
+        1.13878863475669165370,
+        1.18920711500272106672,
+        1.24185781207348404859,
+        1.29683955465100966593,
+        1.35425554693689272830,
+        1.41421356237309504880,
+        1.47682614593949931139,
+        1.54221082540794082361,
+        1.61049033194925430818,
+        1.68179283050742908606,
+        1.75625216037329948311,
+        1.83400808640934246349,
+        1.91520656139714729387,
+    };
+    float t1 = 8.8817841970012523233890533447265625e-16; // 2^(-50)
+    float t2 = 3.63797880709171295166015625e-12; // 2^(-38)
+    int t1_inc_cur, t2_inc_cur;
+    int t1_inc_prev = 0;
+    int t2_inc_prev = 8;
+
+    for (i = 0; i < 428; i++) {
+        t1_inc_cur = 4 * (i % 4);
+        t2_inc_cur = (8 + 3*i) % 16;
+        if (t1_inc_cur < t1_inc_prev)
+            t1 *= 2;
+        if (t2_inc_cur < t2_inc_prev)
+            t2 *= 2;
+        // A much more efficient and accurate way of doing:
+        // ff_aac_pow2sf_tab[i] = pow(2, (i - POW_SF2_ZERO) / 4.0);
+        // ff_aac_pow34sf_tab[i] = pow(ff_aac_pow2sf_tab[i], 3.0/4.0);
+        ff_aac_pow2sf_tab[i] = t1 * exp2_lut[t1_inc_cur];
+        ff_aac_pow34sf_tab[i] = t2 * exp2_lut[t2_inc_cur];
+        t1_inc_prev = t1_inc_cur;
+        t2_inc_prev = t2_inc_cur;
+    }
+}
+
+/* @name ltp_coef
+ * Table of the LTP coefficients
+ */
+static const INTFLOAT ltp_coef[8] = {
+    Q30(0.570829f), Q30(0.696616f), Q30(0.813004f), Q30(0.911304f),
+    Q30(0.984900f), Q30(1.067894f), Q30(1.194601f), Q30(1.369533f),
+};
+
+/* @name tns_tmp2_map
+ * Tables of the tmp2[] arrays of LPC coefficients used for TNS.
+ * The suffix _M_N[] indicate the values of coef_compress and coef_res
+ * respectively.
+ * @{
+ */
+static const INTFLOAT tns_tmp2_map_1_3[4] = {
+    Q31(0.00000000f), Q31(-0.43388373f),  Q31(0.64278758f),  Q31(0.34202015f),
+};
+
+static const INTFLOAT tns_tmp2_map_0_3[8] = {
+    Q31(0.00000000f), Q31(-0.43388373f), Q31(-0.78183150f), Q31(-0.97492790f),
+    Q31(0.98480773f), Q31( 0.86602539f), Q31( 0.64278758f), Q31( 0.34202015f),
+};
+
+static const INTFLOAT tns_tmp2_map_1_4[8] = {
+    Q31(0.00000000f), Q31(-0.20791170f), Q31(-0.40673664f), Q31(-0.58778524f),
+    Q31(0.67369562f), Q31( 0.52643216f), Q31( 0.36124167f), Q31( 0.18374951f),
+};
+
+static const INTFLOAT tns_tmp2_map_0_4[16] = {
+    Q31( 0.00000000f), Q31(-0.20791170f), Q31(-0.40673664f), Q31(-0.58778524f),
+    Q31(-0.74314481f), Q31(-0.86602539f), Q31(-0.95105654f), Q31(-0.99452192f),
+    Q31( 0.99573416f), Q31( 0.96182561f), Q31( 0.89516330f), Q31( 0.79801720f),
+    Q31( 0.67369562f), Q31( 0.52643216f), Q31( 0.36124167f), Q31( 0.18374951f),
+};
+
+static const INTFLOAT * const tns_tmp2_map[4] = {
+    tns_tmp2_map_0_3,
+    tns_tmp2_map_0_4,
+    tns_tmp2_map_1_3,
+    tns_tmp2_map_1_4
+};
+// @}
+
 /* @name window coefficients
  * @{
  */
 DECLARE_ALIGNED(32, extern float,  ff_aac_kbd_long_1024)[1024];
 DECLARE_ALIGNED(32, extern float,  ff_aac_kbd_short_128)[128];
+DECLARE_ALIGNED(32, extern int,    ff_aac_kbd_long_1024_fixed)[1024];
+DECLARE_ALIGNED(32, extern int,    ff_aac_kbd_long_512_fixed)[512];
+DECLARE_ALIGNED(32, extern int,    ff_aac_kbd_short_128_fixed)[128];
 const DECLARE_ALIGNED(32, extern float, ff_aac_eld_window_512)[1920];
+const DECLARE_ALIGNED(32, extern int,   ff_aac_eld_window_512_fixed)[1920];
 const DECLARE_ALIGNED(32, extern float, ff_aac_eld_window_480)[1800];
+const DECLARE_ALIGNED(32, extern int,   ff_aac_eld_window_480_fixed)[1800];
 // @}
 
 /* @name number of scalefactor window bands for long and short transform windows respectively
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 2afff297..fd89035c 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -1,6 +1,9 @@
+OBJS-$(CONFIG_DCA_DECODER)              += aarch64/synth_filter_init.o
 OBJS-$(CONFIG_FFT)                      += aarch64/fft_init_aarch64.o
+OBJS-$(CONFIG_FMTCONVERT)               += aarch64/fmtconvert_init.o
 OBJS-$(CONFIG_H264CHROMA)               += aarch64/h264chroma_init_aarch64.o
 OBJS-$(CONFIG_H264DSP)                  += aarch64/h264dsp_init_aarch64.o
+OBJS-$(CONFIG_H264PRED)                 += aarch64/h264pred_init.o
 OBJS-$(CONFIG_H264QPEL)                 += aarch64/h264qpel_init_aarch64.o
 OBJS-$(CONFIG_HPELDSP)                  += aarch64/hpeldsp_init_aarch64.o
 OBJS-$(CONFIG_IMDCT15)                  += aarch64/imdct15_init.o
@@ -14,10 +17,13 @@ OBJS-$(CONFIG_VORBIS_DECODER)           += aarch64/vorbisdsp_init.o
 
 ARMV8-OBJS-$(CONFIG_VIDEODSP)           += aarch64/videodsp.o
 
+NEON-OBJS-$(CONFIG_DCA_DECODER)         += aarch64/synth_filter_neon.o
 NEON-OBJS-$(CONFIG_FFT)                 += aarch64/fft_neon.o
+NEON-OBJS-$(CONFIG_FMTCONVERT)          += aarch64/fmtconvert_neon.o
 NEON-OBJS-$(CONFIG_H264CHROMA)          += aarch64/h264cmc_neon.o
 NEON-OBJS-$(CONFIG_H264DSP)             += aarch64/h264dsp_neon.o              \
                                            aarch64/h264idct_neon.o
+NEON-OBJS-$(CONFIG_H264PRED)            += aarch64/h264pred_neon.o
 NEON-OBJS-$(CONFIG_H264QPEL)            += aarch64/h264qpel_neon.o             \
                                            aarch64/hpeldsp_neon.o
 NEON-OBJS-$(CONFIG_HPELDSP)             += aarch64/hpeldsp_neon.o
diff --git a/libavcodec/aarch64/asm-offsets.h b/libavcodec/aarch64/asm-offsets.h
index 8defd7c9..e05c5ad2 100644
--- a/libavcodec/aarch64/asm-offsets.h
+++ b/libavcodec/aarch64/asm-offsets.h
@@ -27,4 +27,7 @@
 #define CELT_TMP                        0x10
 #define CELT_TWIDDLE                    (CELT_TMP + 0x8)    // loaded as pair
 
+/* FFTContext */
+#define IMDCT_HALF                      0x48
+
 #endif /* AVCODEC_AARCH64_ASM_OFFSETS_H */
diff --git a/libavcodec/aarch64/fmtconvert_init.c b/libavcodec/aarch64/fmtconvert_init.c
new file mode 100644
index 00000000..210e74b6
--- /dev/null
+++ b/libavcodec/aarch64/fmtconvert_init.c
@@ -0,0 +1,43 @@
+/*
+ * ARM optimized Format Conversion Utils
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/fmtconvert.h"
+
+void ff_int32_to_float_fmul_array8_neon(FmtConvertContext *c, float *dst,
+                                        const int32_t *src, const float *mul,
+                                        int len);
+void ff_int32_to_float_fmul_scalar_neon(float *dst, const int32_t *src,
+                                        float mul, int len);
+
+av_cold void ff_fmt_convert_init_aarch64(FmtConvertContext *c,
+                                         AVCodecContext *avctx)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
+        c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_neon;
+        c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon;
+    }
+}
diff --git a/libavcodec/aarch64/fmtconvert_neon.S b/libavcodec/aarch64/fmtconvert_neon.S
new file mode 100644
index 00000000..2161c3a8
--- /dev/null
+++ b/libavcodec/aarch64/fmtconvert_neon.S
@@ -0,0 +1,76 @@
+/*
+ * ARM NEON optimised Format Conversion Utils
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2015 Janne Grunau  <janne-libav@jannau.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/aarch64/asm.S"
+
+function ff_int32_to_float_fmul_scalar_neon, export=1
+        ld1             {v1.4s,v2.4s}, [x1], #32
+        scvtf           v1.4s,  v1.4s
+        scvtf           v2.4s,  v2.4s
+1:
+        subs            w2,  w2,  #8
+        fmul            v3.4s,  v1.4s,  v0.s[0]
+        fmul            v4.4s,  v2.4s,  v0.s[0]
+        b.le            2f
+        ld1             {v1.4s,v2.4s}, [x1], #32
+        st1             {v3.4s,v4.4s}, [x0], #32
+        scvtf           v1.4s,  v1.4s
+        scvtf           v2.4s,  v2.4s
+        b               1b
+2:
+        st1             {v3.4s,v4.4s}, [x0]
+        ret
+endfunc
+
+function ff_int32_to_float_fmul_array8_neon, export=1
+        lsr             w4,  w4,  #3
+        subs            w5,  w4,  #1
+        b.eq            1f
+2:
+        ld1             {v0.4s,v1.4s}, [x2], #32
+        ld1             {v2.4s,v3.4s}, [x2], #32
+        scvtf           v0.4s,  v0.4s
+        scvtf           v1.4s,  v1.4s
+        ld1             {v16.2s},  [x3], #8
+        scvtf           v2.4s,  v2.4s
+        scvtf           v3.4s,  v3.4s
+        fmul            v4.4s,  v0.4s,  v16.s[0]
+        fmul            v5.4s,  v1.4s,  v16.s[0]
+        fmul            v6.4s,  v2.4s,  v16.s[1]
+        fmul            v7.4s,  v3.4s,  v16.s[1]
+        st1             {v4.4s,v5.4s}, [x1], #32
+        st1             {v6.4s,v7.4s}, [x1], #32
+        subs            w5,  w5,  #2
+        b.gt            2b
+        b.eq            1f
+        ret
+1:
+        ld1             {v0.4s,v1.4s}, [x2]
+        ld1             {v16.s}[0],  [x3]
+        scvtf           v0.4s,  v0.4s
+        scvtf           v1.4s,  v1.4s
+        fmul            v4.4s,  v0.4s,  v16.s[0]
+        fmul            v5.4s,  v1.4s,  v16.s[0]
+        st1             {v4.4s,v5.4s}, [x1]
+        ret
+endfunc
diff --git a/libavcodec/aarch64/h264idct_neon.S b/libavcodec/aarch64/h264idct_neon.S
index 04b5a47f..91f1e773 100644
--- a/libavcodec/aarch64/h264idct_neon.S
+++ b/libavcodec/aarch64/h264idct_neon.S
@@ -37,8 +37,8 @@ function ff_h264_idct_add_neon, export=1
         sub             v7.4H,  v16.4H, v3.4H
         add             v0.4H,  v4.4H,  v6.4H
         add             v1.4H,  v5.4H,  v7.4H
-        sub             v2.4H,  v4.4H,  v6.4H
-        sub             v3.4H,  v5.4H,  v7.4H
+        sub             v3.4H,  v4.4H,  v6.4H
+        sub             v2.4H,  v5.4H,  v7.4H
 
         transpose_4x4H  v0, v1, v2, v3, v4, v5, v6, v7
 
diff --git a/libavcodec/aarch64/h264pred_init.c b/libavcodec/aarch64/h264pred_init.c
new file mode 100644
index 00000000..b144376f
--- /dev/null
+++ b/libavcodec/aarch64/h264pred_init.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/h264pred.h"
+
+void ff_pred16x16_vert_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_hor_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_plane_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_128_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_left_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_top_dc_neon(uint8_t *src, ptrdiff_t stride);
+
+void ff_pred8x8_vert_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_hor_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_plane_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_128_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_left_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_top_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l0t_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);
+
+static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
+                                        const int bit_depth,
+                                        const int chroma_format_idc)
+{
+    const int high_depth = bit_depth > 8;
+
+    if (high_depth)
+        return;
+
+    if (chroma_format_idc <= 1) {
+        h->pred8x8[VERT_PRED8x8     ] = ff_pred8x8_vert_neon;
+        h->pred8x8[HOR_PRED8x8      ] = ff_pred8x8_hor_neon;
+        if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+            h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon;
+        h->pred8x8[DC_128_PRED8x8   ] = ff_pred8x8_128_dc_neon;
+        if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 &&
+            codec_id != AV_CODEC_ID_VP8) {
+            h->pred8x8[DC_PRED8x8     ] = ff_pred8x8_dc_neon;
+            h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon;
+            h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon;
+        }
+    }
+
+    h->pred16x16[DC_PRED8x8     ] = ff_pred16x16_dc_neon;
+    h->pred16x16[VERT_PRED8x8   ] = ff_pred16x16_vert_neon;
+    h->pred16x16[HOR_PRED8x8    ] = ff_pred16x16_hor_neon;
+    h->pred16x16[LEFT_DC_PRED8x8] = ff_pred16x16_left_dc_neon;
+    h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_neon;
+    h->pred16x16[DC_128_PRED8x8 ] = ff_pred16x16_128_dc_neon;
+    if (codec_id != AV_CODEC_ID_SVQ3 && codec_id != AV_CODEC_ID_RV40 &&
+        codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+        h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
+}
+
+av_cold void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id,
+                                       int bit_depth, const int chroma_format_idc)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
+}
diff --git a/libavcodec/aarch64/h264pred_neon.S b/libavcodec/aarch64/h264pred_neon.S
new file mode 100644
index 00000000..213b40b3
--- /dev/null
+++ b/libavcodec/aarch64/h264pred_neon.S
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+.macro ldcol.8  rd,  rs,  rt,  n=8,  hi=0
+.if \n >= 8 || \hi == 0
+        ld1             {\rd\().b}[0],  [\rs], \rt
+        ld1             {\rd\().b}[1],  [\rs], \rt
+        ld1             {\rd\().b}[2],  [\rs], \rt
+        ld1             {\rd\().b}[3],  [\rs], \rt
+.endif
+.if \n >= 8 || \hi == 1
+        ld1             {\rd\().b}[4],  [\rs], \rt
+        ld1             {\rd\().b}[5],  [\rs], \rt
+        ld1             {\rd\().b}[6],  [\rs], \rt
+        ld1             {\rd\().b}[7],  [\rs], \rt
+.endif
+.if \n == 16
+        ld1             {\rd\().b}[8],  [\rs], \rt
+        ld1             {\rd\().b}[9],  [\rs], \rt
+        ld1             {\rd\().b}[10], [\rs], \rt
+        ld1             {\rd\().b}[11], [\rs], \rt
+        ld1             {\rd\().b}[12], [\rs], \rt
+        ld1             {\rd\().b}[13], [\rs], \rt
+        ld1             {\rd\().b}[14], [\rs], \rt
+        ld1             {\rd\().b}[15], [\rs], \rt
+.endif
+.endm
+
+function ff_pred16x16_128_dc_neon, export=1
+        movi            v0.16b,  #128
+        b               .L_pred16x16_dc_end
+endfunc
+
+function ff_pred16x16_top_dc_neon, export=1
+        sub             x2,  x0,  x1
+        ld1             {v0.16b},  [x2]
+        uaddlv          h0,  v0.16b
+        rshrn           v0.8b,  v0.8h,  #4
+        dup             v0.16b, v0.b[0]
+        b               .L_pred16x16_dc_end
+endfunc
+
+function ff_pred16x16_left_dc_neon, export=1
+        sub             x2,  x0,  #1
+        ldcol.8         v0,  x2,  x1, 16
+        uaddlv          h0,  v0.16b
+        rshrn           v0.8b,  v0.8h,  #4
+        dup             v0.16b, v0.b[0]
+        b               .L_pred16x16_dc_end
+endfunc
+
+function ff_pred16x16_dc_neon, export=1
+        sub             x2,  x0,  x1
+        sub             x3,  x0,  #1
+        ld1             {v0.16b}, [x2]
+        ldcol.8         v1,  x3,  x1, 16
+        uaddlv          h0,  v0.16b
+        uaddlv          h1,  v1.16b
+        add             v0.4h,  v0.4h,  v1.4h
+        rshrn           v0.8b,  v0.8h,  #5
+        dup             v0.16b, v0.b[0]
+.L_pred16x16_dc_end:
+        mov             w3,  #8
+6:      st1             {v0.16b}, [x0], x1
+        st1             {v0.16b}, [x0], x1
+        subs            w3,  w3,  #1
+        b.ne            6b
+        ret
+endfunc
+
+function ff_pred16x16_hor_neon, export=1
+        sub             x2,  x0,  #1
+        mov             w3,  #16
+1:      ld1r            {v0.16b}, [x2], x1
+        st1             {v0.16b}, [x0], x1
+        subs            w3,  w3,  #1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred16x16_vert_neon, export=1
+        sub             x2,  x0,  x1
+        add             x1,  x1,  x1
+        ld1             {v0.16b}, [x2], x1
+        mov             w3,  #8
+1:      st1             {v0.16b}, [x0], x1
+        st1             {v0.16b}, [x2], x1
+        subs            w3,  w3,  #1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred16x16_plane_neon, export=1
+        sub             x3,  x0,  x1
+        movrel          x4,  p16weight
+        add             x2,  x3,  #8
+        sub             x3,  x3,  #1
+        ld1             {v0.8b},  [x3]
+        ld1             {v2.8b},  [x2], x1
+        ldcol.8         v1,  x3,  x1
+        add             x3,  x3,  x1
+        ldcol.8         v3,  x3,  x1
+        rev64           v0.8b,  v0.8b
+        rev64           v1.8b,  v1.8b
+        uaddl           v7.8h,  v2.8b,  v3.8b
+        usubl           v2.8h,  v2.8b,  v0.8b
+        usubl           v3.8h,  v3.8b,  v1.8b
+        ld1             {v0.8h},     [x4]
+        mul             v2.8h,  v2.8h,  v0.8h
+        mul             v3.8h,  v3.8h,  v0.8h
+        addp            v2.8h,  v2.8h,  v3.8h
+        addp            v2.8h,  v2.8h,  v2.8h
+        addp            v2.4h,  v2.4h,  v2.4h
+        sshll           v3.4s,  v2.4h,  #2
+        saddw           v2.4s,  v3.4s,  v2.4h
+        rshrn           v4.4h,  v2.4s,  #6
+        trn2            v5.4h,  v4.4h,  v4.4h
+        add             v2.4h,  v4.4h,  v5.4h
+        shl             v3.4h,  v2.4h,  #3
+        ext             v7.16b, v7.16b, v7.16b, #14
+        sub             v3.4h,  v3.4h,  v2.4h   // 7 * (b + c)
+        add             v7.4h,  v7.4h,  v0.4h
+        shl             v2.4h,  v7.4h,  #4
+        sub             v2.4h,  v2.4h,  v3.4h
+        shl             v3.4h,  v4.4h,  #4
+        ext             v0.16b, v0.16b, v0.16b, #14
+        sub             v6.4h,  v5.4h,  v3.4h
+        mov             v0.h[0],  wzr
+        mul             v0.8h,  v0.8h,  v4.h[0]
+        dup             v1.8h,  v2.h[0]
+        dup             v2.8h,  v4.h[0]
+        dup             v3.8h,  v6.h[0]
+        shl             v2.8h,  v2.8h,  #3
+        add             v1.8h,  v1.8h,  v0.8h
+        add             v3.8h,  v3.8h,  v2.8h
+        mov             w3,  #16
+1:
+        sqshrun         v0.8b,  v1.8h,  #5
+        add             v1.8h,  v1.8h,  v2.8h
+        sqshrun2        v0.16b, v1.8h,  #5
+        add             v1.8h,  v1.8h,  v3.8h
+        st1             {v0.16b}, [x0], x1
+        subs            w3,  w3,  #1
+        b.ne            1b
+        ret
+endfunc
+
+const   p16weight, align=4
+        .short          1,2,3,4,5,6,7,8
+endconst
+const   p8weight, align=4
+        .short          1,2,3,4,1,2,3,4
+endconst
+
+function ff_pred8x8_hor_neon, export=1
+        sub             x2,  x0,  #1
+        mov             w3,  #8
+1:      ld1r            {v0.8b},  [x2], x1
+        st1             {v0.8b},  [x0], x1
+        subs            w3,  w3,  #1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred8x8_vert_neon, export=1
+        sub             x2,  x0,  x1
+        lsl             x1,  x1,  #1
+        ld1             {v0.8b},  [x2], x1
+        mov             w3,  #4
+1:      st1             {v0.8b},  [x0], x1
+        st1             {v0.8b},  [x2], x1
+        subs            w3,  w3,  #1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred8x8_plane_neon, export=1
+        sub             x3,  x0,  x1
+        movrel          x4,  p8weight
+        movrel          x5,  p16weight
+        add             x2,  x3,  #4
+        sub             x3,  x3,  #1
+        ld1             {v0.s}[0],  [x3]
+        ld1             {v2.s}[0],  [x2], x1
+        ldcol.8         v0,  x3,  x1,  4,  hi=1
+        add             x3,  x3,  x1
+        ldcol.8         v3,  x3,  x1,  4
+        uaddl           v7.8h,  v2.8b,  v3.8b
+        rev32           v0.8b,  v0.8b
+        trn1            v2.2s,  v2.2s,  v3.2s
+        usubl           v2.8h,  v2.8b,  v0.8b
+        ld1             {v6.8h},  [x4]
+        mul             v2.8h,  v2.8h,  v6.8h
+        ld1             {v0.8h},  [x5]
+        saddlp          v2.4s,  v2.8h
+        addp            v2.4s,  v2.4s,  v2.4s
+        shl             v3.4s,  v2.4s,  #4
+        add             v2.4s,  v3.4s,  v2.4s
+        rshrn           v5.4h,  v2.4s,  #5
+        addp            v2.4h,  v5.4h,  v5.4h
+        shl             v3.4h,  v2.4h,  #1
+        add             v3.4h,  v3.4h,  v2.4h
+        rev64           v7.4h,  v7.4h
+        add             v7.4h,  v7.4h,  v0.4h
+        shl             v2.4h,  v7.4h,  #4
+        sub             v2.4h,  v2.4h,  v3.4h
+        ext             v0.16b, v0.16b, v0.16b, #14
+        mov             v0.h[0],  wzr
+        mul             v0.8h,  v0.8h,  v5.h[0]
+        dup             v1.8h,  v2.h[0]
+        dup             v2.8h,  v5.h[1]
+        add             v1.8h,  v1.8h,  v0.8h
+        mov             w3,  #8
+1:
+        sqshrun         v0.8b,  v1.8h,  #5
+        add             v1.8h,  v1.8h,  v2.8h
+        st1             {v0.8b},  [x0], x1
+        subs            w3,  w3,  #1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_pred8x8_128_dc_neon, export=1
+        movi            v0.8b,  #128
+        movi            v1.8b,  #128
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_top_dc_neon, export=1
+        sub             x2,  x0,  x1
+        ld1             {v0.8b},  [x2]
+        uaddlp          v0.4h,  v0.8b
+        addp            v0.4h,  v0.4h,  v0.4h
+        zip1            v0.8h,  v0.8h,  v0.8h
+        rshrn           v2.8b,  v0.8h,  #2
+        zip1            v0.8b,  v2.8b,  v2.8b
+        zip1            v1.8b,  v2.8b,  v2.8b
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_left_dc_neon, export=1
+        sub             x2,  x0,  #1
+        ldcol.8         v0,  x2,  x1
+        uaddlp          v0.4h,  v0.8b
+        addp            v0.4h,  v0.4h,  v0.4h
+        rshrn           v2.8b,  v0.8h,  #2
+        dup             v1.8b,  v2.b[1]
+        dup             v0.8b,  v2.b[0]
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_dc_neon, export=1
+        sub             x2,  x0,  x1
+        sub             x3,  x0,  #1
+        ld1             {v0.8b}, [x2]
+        ldcol.8         v1,  x3,  x1
+        uaddlp          v0.4h,  v0.8b
+        uaddlp          v1.4h,  v1.8b
+        trn1            v2.2s,  v0.2s,  v1.2s
+        trn2            v3.2s,  v0.2s,  v1.2s
+        addp            v4.4h,  v2.4h,  v3.4h
+        addp            v5.4h,  v4.4h,  v4.4h
+        rshrn           v6.8b,  v5.8h,  #3
+        rshrn           v7.8b,  v4.8h,  #2
+        dup             v0.8b,  v6.b[0]
+        dup             v2.8b,  v7.b[2]
+        dup             v1.8b,  v7.b[3]
+        dup             v3.8b,  v6.b[1]
+        zip1            v0.2s,  v0.2s,  v2.2s
+        zip1            v1.2s,  v1.2s,  v3.2s
+.L_pred8x8_dc_end:
+        mov             w3,  #4
+        add             x2,  x0,  x1,  lsl #2
+6:      st1             {v0.8b},  [x0], x1
+        st1             {v1.8b},  [x2], x1
+        subs            w3,  w3,  #1
+        b.ne            6b
+        ret
+endfunc
+
+function ff_pred8x8_l0t_dc_neon, export=1
+        sub             x2,  x0,  x1
+        sub             x3,  x0,  #1
+        ld1             {v0.8b},  [x2]
+        ldcol.8         v1,  x3,  x1,  4
+        zip1            v0.4s,  v0.4s,  v1.4s
+        uaddlp          v0.8h,  v0.16b
+        addp            v0.8h,  v0.8h,  v0.8h
+        addp            v1.4h,  v0.4h,  v0.4h
+        rshrn           v2.8b,  v0.8h,  #2
+        rshrn           v3.8b,  v1.8h,  #3
+        dup             v4.8b,  v3.b[0]
+        dup             v6.8b,  v2.b[2]
+        dup             v5.8b,  v2.b[0]
+        zip1            v0.2s,  v4.2s,  v6.2s
+        zip1            v1.2s,  v5.2s,  v6.2s
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_l00_dc_neon, export=1
+        sub             x2,  x0,  #1
+        ldcol.8         v0,  x2,  x1,  4
+        uaddlp          v0.4h,  v0.8b
+        addp            v0.4h,  v0.4h,  v0.4h
+        rshrn           v0.8b,  v0.8h,  #2
+        movi            v1.8b,  #128
+        dup             v0.8b,  v0.b[0]
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_0lt_dc_neon, export=1
+        add             x3,  x0,  x1,  lsl #2
+        sub             x2,  x0,  x1
+        sub             x3,  x3,  #1
+        ld1             {v0.8b},  [x2]
+        ldcol.8         v1,  x3,  x1,  4,  hi=1
+        zip1            v0.4s,  v0.4s,  v1.4s
+        uaddlp          v0.8h,  v0.16b
+        addp            v0.8h,  v0.8h,  v0.8h
+        addp            v1.4h,  v0.4h,  v0.4h
+        rshrn           v2.8b,  v0.8h,  #2
+        rshrn           v3.8b,  v1.8h,  #3
+        dup             v4.8b,  v2.b[0]
+        dup             v5.8b,  v2.b[3]
+        dup             v6.8b,  v2.b[2]
+        dup             v7.8b,  v3.b[1]
+        zip1            v0.2s,  v4.2s,  v6.2s
+        zip1            v1.2s,  v5.2s,  v7.2s
+        b               .L_pred8x8_dc_end
+endfunc
+
+function ff_pred8x8_0l0_dc_neon, export=1
+        add             x2,  x0,  x1,  lsl #2
+        sub             x2,  x2,  #1
+        ldcol.8         v1,  x2,  x1,  4
+        uaddlp          v2.4h,  v1.8b
+        addp            v2.4h,  v2.4h,  v2.4h
+        rshrn           v1.8b,  v2.8h,  #2
+        movi            v0.8b,  #128
+        dup             v1.8b,  v1.b[0]
+        b               .L_pred8x8_dc_end
+endfunc
diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S
index 619aec64..a227cbd3 100644
--- a/libavcodec/aarch64/neon.S
+++ b/libavcodec/aarch64/neon.S
@@ -107,8 +107,8 @@
 .macro  transpose_4x4H  r0, r1, r2, r3, r4, r5, r6, r7
         trn1            \r4\().4H,  \r0\().4H,  \r1\().4H
         trn2            \r5\().4H,  \r0\().4H,  \r1\().4H
-        trn1            \r7\().4H,  \r3\().4H,  \r2\().4H
-        trn2            \r6\().4H,  \r3\().4H,  \r2\().4H
+        trn1            \r7\().4H,  \r2\().4H,  \r3\().4H
+        trn2            \r6\().4H,  \r2\().4H,  \r3\().4H
         trn1            \r0\().2S,  \r4\().2S,  \r7\().2S
         trn2            \r3\().2S,  \r4\().2S,  \r7\().2S
         trn1            \r1\().2S,  \r5\().2S,  \r6\().2S
diff --git a/libavcodec/aarch64/synth_filter_init.c b/libavcodec/aarch64/synth_filter_init.c
new file mode 100644
index 00000000..767b0111
--- /dev/null
+++ b/libavcodec/aarch64/synth_filter_init.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/aarch64/cpu.h"
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavcodec/fft.h"
+#include "libavcodec/synth_filter.h"
+
+#include "asm-offsets.h"
+
+#if HAVE_NEON || HAVE_VFP
+AV_CHECK_OFFSET(FFTContext, imdct_half, IMDCT_HALF);
+#endif
+
+void ff_synth_filter_float_neon(FFTContext *imdct,
+                                float *synth_buf_ptr, int *synth_buf_offset,
+                                float synth_buf2[32], const float window[512],
+                                float out[32], const float in[32],
+                                float scale);
+
+av_cold void ff_synth_filter_init_aarch64(SynthFilterContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        s->synth_filter_float = ff_synth_filter_float_neon;
+}
diff --git a/libavcodec/aarch64/synth_filter_neon.S b/libavcodec/aarch64/synth_filter_neon.S
new file mode 100644
index 00000000..65551cbf
--- /dev/null
+++ b/libavcodec/aarch64/synth_filter_neon.S
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2015 Janne Grunau <janne-libav@jannau.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm-offsets.h"
+
+#include "libavutil/aarch64/asm.S"
+
+.macro inner_loop
+        ld1             {v29.4s},  [x9],  x15
+        ld1             {v28.4s},  [x8],  x15
+        ld1             {v30.4s},  [x10], x15
+        ld1             {v31.4s},  [x11], x15
+        rev64           v28.4s, v28.4s
+        ld1             {v24.4s},  [x4],  x15
+        ld1             {v25.4s},  [x5],  x15
+        rev64           v31.4s, v31.4s
+        ld1             {v26.4s},  [x6],  x15
+        fmla            v5.4s,  v25.4s, v29.4s
+        ld1             {v27.4s},  [x7],  x15
+        ext             v28.16b, v28.16b, v28.16b, #8
+        ext             v31.16b, v31.16b, v31.16b, #8
+        fmla            v6.4s,  v26.4s, v30.4s
+        fmls            v4.4s,  v24.4s, v28.4s
+        fmla            v7.4s,  v27.4s, v31.4s
+.endm
+
+function ff_synth_filter_float_neon, export=1
+        ldr             w7,  [x2]               // *synth_buf_offset
+        ldr             x9,  [x0, #IMDCT_HALF]  // imdct_half function pointer
+        sxtw            x7,  w7
+        stp             x3,  x4,  [sp, #-64]!
+        add             x1,  x1,  x7,  lsl #2   // synth_buf
+        sub             w8,  w7,  #32
+        stp             x5,  x1,  [sp, #16]
+        bic             x7,  x7,  #63
+        and             w8,  w8,  #511
+        stp             x7,  x30, [sp, #32]
+        str             w8,  [x2]
+        str             s0,  [sp, #48]
+
+        mov             x2,  x6                 // in
+
+        blr             x9
+
+        ldp             x2,  x4,  [sp]          // synct_buf_2, window
+        ldp             x13, x9,  [sp, #16]     // out, synth_buf
+        ldp             x0,  x30, [sp, #32]     // *synth_buf_offset
+        ldr             s0,  [sp, #48]
+
+        add             x3,  x2,  #16*4         // synct_buf_2 + 16
+        add             x14, x13, #16*4         // out + 16
+        add             x8,  x9,  #12*4
+        mov             x15, #64*4
+        mov             x1,  #4
+1:
+        add             x10, x9,  #16*4         // synth_buf
+        add             x11, x8,  #16*4
+        add             x5,  x4,  #16*4         // window
+        add             x6,  x4,  #32*4
+        add             x7,  x4,  #48*4
+
+        ld1             {v4.4s},   [x2]         // a
+        ld1             {v5.4s},   [x3]         // b
+        movi            v6.4s,  #0              // c
+        movi            v7.4s,  #0              // d
+
+        mov             x12, #512
+2:
+        sub             x12, x12, #64
+        cmp             x12, x0
+        inner_loop
+        b.gt            2b
+
+        sub             x8,  x8,  #512*4
+        sub             x9,  x9,  #512*4
+        cbz             x12, 4f
+        sub             x10, x10, #512*4
+        sub             x11, x11, #512*4
+3:
+        subs            x12, x12, #64
+        inner_loop
+        b.gt            3b
+4:
+        subs            x1,  x1,  #1
+        fmul            v4.4s,  v4.4s,  v0.s[0]
+        fmul            v5.4s,  v5.4s,  v0.s[0]
+        st1             {v6.4s},   [x2],  #16
+        st1             {v7.4s},   [x3],  #16
+        st1             {v4.4s},   [x13], #16
+        st1             {v5.4s},   [x14], #16
+        b.le            10f
+
+        sub             x4,  x4,  #508*4        // window
+        add             x9,  x9,  #4*4          // synth_buf
+        sub             x8,  x8,  #4*4          // synth_buf
+        b               1b
+
+10:
+        add             sp,  sp,  #64
+        ret
+endfunc
diff --git a/libavcodec/aasc.c b/libavcodec/aasc.c
index 469fc5ee..6abf3b7a 100644
--- a/libavcodec/aasc.c
+++ b/libavcodec/aasc.c
@@ -101,7 +101,7 @@ static int aasc_decode_frame(AVCodecContext *avctx,
     switch (avctx->codec_tag) {
     case MKTAG('A', 'A', 'S', '4'):
         bytestream2_init(&s->gb, buf - 4, buf_size + 4);
-        ff_msrle_decode(avctx, (AVPicture*)s->frame, 8, &s->gb);
+        ff_msrle_decode(avctx, s->frame, 8, &s->gb);
         break;
     case MKTAG('A', 'A', 'S', 'C'):
     switch (compr) {
@@ -117,7 +117,7 @@ static int aasc_decode_frame(AVCodecContext *avctx,
         break;
     case 1:
         bytestream2_init(&s->gb, buf, buf_size);
-        ff_msrle_decode(avctx, (AVPicture*)s->frame, 8, &s->gb);
+        ff_msrle_decode(avctx, s->frame, 8, &s->gb);
         break;
     default:
         av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
@@ -158,5 +158,5 @@ AVCodec ff_aasc_decoder = {
     .init           = aasc_decode_init,
     .close          = aasc_decode_end,
     .decode         = aasc_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/ac3.c b/libavcodec/ac3.c
index b54315dc..1d4eaa56 100644
--- a/libavcodec/ac3.c
+++ b/libavcodec/ac3.c
@@ -39,8 +39,6 @@ const uint8_t ff_ac3_band_start_tab[AC3_CRITICAL_BANDS+1] = {
      79,  85, 97, 109, 121, 133, 157, 181, 205, 229, 253
 };
 
-#if CONFIG_HARDCODED_TABLES
-
 /**
  * Map each frequency coefficient bin to the critical band that contains it.
  */
@@ -69,10 +67,6 @@ const uint8_t ff_ac3_bin_to_band_tab[253] = {
     49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49
 };
 
-#else /* CONFIG_HARDCODED_TABLES */
-uint8_t ff_ac3_bin_to_band_tab[253];
-#endif
-
 static inline int calc_lowcomp1(int a, int b0, int b1, int c)
 {
     if ((b0 + 256) == b1) {
@@ -214,21 +208,3 @@ int ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd,
     }
     return 0;
 }
-
-/**
- * Initialize some tables.
- * note: This function must remain thread safe because it is called by the
- *       AVParser init code.
- */
-av_cold void ff_ac3_common_init(void)
-{
-#if !CONFIG_HARDCODED_TABLES
-    /* compute ff_ac3_bin_to_band_tab from ff_ac3_band_start_tab */
-    int bin = 0, band;
-    for (band = 0; band < AC3_CRITICAL_BANDS; band++) {
-        int band_end = ff_ac3_band_start_tab[band+1];
-        while (bin < band_end)
-            ff_ac3_bin_to_band_tab[bin++] = band;
-    }
-#endif /* !CONFIG_HARDCODED_TABLES */
-}
diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h
index 1fe30b95..747f2f56 100644
--- a/libavcodec/ac3.h
+++ b/libavcodec/ac3.h
@@ -73,8 +73,8 @@
 #define AC3_SPX_BLEND(x)        (x)
 #define AC3_DYNAMIC_RANGE1      0
 
-#define INTFLOAT                int
-#define SHORTFLOAT              int16_t
+typedef int                     INTFLOAT;
+typedef int16_t                 SHORTFLOAT;
 
 #else /* USE_FIXED */
 
@@ -92,18 +92,18 @@
 #define AC3_SPX_BLEND(x)        (x)* (1.0f/32)
 #define AC3_DYNAMIC_RANGE1      1.0f
 
-#define INTFLOAT                float
-#define SHORTFLOAT              float
+typedef float                   INTFLOAT;
+typedef float                   SHORTFLOAT;
 
 #endif /* USE_FIXED */
 
-#define AC3_LEVEL(x)            ROUND15((x) * FIXR15(0.7071067811865476))
+#define AC3_LEVEL(x)            ROUND15((x) * FIXR15(M_SQRT1_2))
 
 /* pre-defined gain values */
-#define LEVEL_PLUS_3DB          1.4142135623730950
+#define LEVEL_PLUS_3DB          M_SQRT2
 #define LEVEL_PLUS_1POINT5DB    1.1892071150027209
 #define LEVEL_MINUS_1POINT5DB   0.8408964152537145
-#define LEVEL_MINUS_3DB         0.7071067811865476
+#define LEVEL_MINUS_3DB         M_SQRT1_2
 #define LEVEL_MINUS_4POINT5DB   0.5946035575013605
 #define LEVEL_MINUS_6DB         0.5000000000000000
 #define LEVEL_MINUS_9DB         0.3535533905932738
@@ -190,9 +190,7 @@ typedef struct AC3HeaderInfo {
     int surround_mix_level;                 ///< Surround mix level index
     uint16_t channel_map;
     int num_blocks;                         ///< number of audio blocks
-#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
     int dolby_surround_mode;
-#endif
     /** @} */
 
     /** @name Derived values
@@ -205,9 +203,6 @@ typedef struct AC3HeaderInfo {
     uint16_t frame_size;
     uint64_t channel_layout;
     /** @} */
-#if !AV_HAVE_INCOMPATIBLE_LIBAV_ABI
-    int dolby_surround_mode;
-#endif
 } AC3HeaderInfo;
 
 typedef enum {
diff --git a/libavcodec/ac3_parser.c b/libavcodec/ac3_parser.c
index 678f08d2..83dd90ff 100644
--- a/libavcodec/ac3_parser.c
+++ b/libavcodec/ac3_parser.c
@@ -47,7 +47,7 @@ static const uint8_t center_levels[4] = { 4, 5, 6, 5 };
 static const uint8_t surround_levels[4] = { 4, 6, 7, 6 };
 
 
-int avpriv_ac3_parse_header2(GetBitContext *gbc, AC3HeaderInfo **phdr)
+int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo **phdr)
 {
     int frame_size_code;
     AC3HeaderInfo *hdr;
@@ -151,28 +151,19 @@ int avpriv_ac3_parse_header2(GetBitContext *gbc, AC3HeaderInfo **phdr)
     return 0;
 }
 
-int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
-{
-    AC3HeaderInfo tmp, *ptmp = &tmp;
-    int ret = avpriv_ac3_parse_header2(gbc, &ptmp);
-
-    memcpy(hdr, ptmp, ((intptr_t)&tmp.channel_layout) - ((intptr_t)&tmp) + sizeof(uint64_t));
-    return ret;
-}
-
 static int ac3_sync(uint64_t state, AACAC3ParseContext *hdr_info,
         int *need_next_header, int *new_frame_start)
 {
     int err;
     union {
         uint64_t u64;
-        uint8_t  u8[8 + FF_INPUT_BUFFER_PADDING_SIZE];
+        uint8_t  u8[8 + AV_INPUT_BUFFER_PADDING_SIZE];
     } tmp = { av_be2ne64(state) };
     AC3HeaderInfo hdr, *phdr = &hdr;
     GetBitContext gbc;
 
     init_get_bits(&gbc, tmp.u8+8-AC3_HEADER_SIZE, 54);
-    err = avpriv_ac3_parse_header2(&gbc, &phdr);
+    err = avpriv_ac3_parse_header(&gbc, &phdr);
 
     if(err < 0)
         return 0;
diff --git a/libavcodec/ac3_parser.h b/libavcodec/ac3_parser.h
index f37387d7..dc5d035e 100644
--- a/libavcodec/ac3_parser.h
+++ b/libavcodec/ac3_parser.h
@@ -37,8 +37,6 @@
  * -2 if the bsid (version) element is invalid, -3 if the fscod (sample rate)
  * element is invalid, or -4 if the frmsizecod (bit rate) element is invalid.
  */
-int avpriv_ac3_parse_header2(GetBitContext *gbc, AC3HeaderInfo **hdr);
-
-int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr);
+int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo **hdr);
 
 #endif /* AVCODEC_AC3_PARSER_H */
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 234b469b..f82f3974 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -185,7 +185,6 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
 
     s->avctx = avctx;
 
-    ff_ac3_common_init();
     ac3_tables_init();
     ff_mdct_init(&s->imdct_256, 8, 1, 1.0);
     ff_mdct_init(&s->imdct_512, 9, 1, 1.0);
@@ -193,13 +192,13 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
     ff_bswapdsp_init(&s->bdsp);
 
 #if (USE_FIXED)
-    s->fdsp = avpriv_alloc_fixed_dsp(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_alloc_fixed_dsp(avctx->flags & AV_CODEC_FLAG_BITEXACT);
 #else
-    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     ff_fmt_convert_init(&s->fmt_conv, avctx);
 #endif
 
-    ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT);
+    ff_ac3dsp_init(&s->ac3dsp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
     av_lfg_init(&s->dith_state, 0);
 
     if (USE_FIXED)
@@ -208,14 +207,6 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
         avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
 
     /* allow downmixing to stereo or mono */
-#if FF_API_REQUEST_CHANNELS
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->request_channels == 1)
-        avctx->request_channel_layout = AV_CH_LAYOUT_MONO;
-    else if (avctx->request_channels == 2)
-        avctx->request_channel_layout = AV_CH_LAYOUT_STEREO;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
     if (avctx->channels > 1 &&
         avctx->request_channel_layout == AV_CH_LAYOUT_MONO)
         avctx->channels = 1;
@@ -306,7 +297,7 @@ static int parse_frame_header(AC3DecodeContext *s)
     AC3HeaderInfo hdr, *phdr=&hdr;
     int err;
 
-    err = avpriv_ac3_parse_header2(&s->gbc, &phdr);
+    err = avpriv_ac3_parse_header(&s->gbc, &phdr);
     if (err)
         return err;
 
@@ -420,7 +411,8 @@ static void set_downmix_coeffs(AC3DecodeContext *s)
  * Decode the grouped exponents according to exponent strategy.
  * reference: Section 7.1.3 Exponent Decoding
  */
-static int decode_exponents(GetBitContext *gbc, int exp_strategy, int ngrps,
+static int decode_exponents(AC3DecodeContext *s,
+                            GetBitContext *gbc, int exp_strategy, int ngrps,
                             uint8_t absexp, int8_t *dexps)
 {
     int i, j, grp, group_size;
@@ -440,8 +432,10 @@ static int decode_exponents(GetBitContext *gbc, int exp_strategy, int ngrps,
     prevexp = absexp;
     for (i = 0, j = 0; i < ngrps * 3; i++) {
         prevexp += dexp[i] - 2;
-        if (prevexp > 24U)
+        if (prevexp > 24U) {
+            av_log(s->avctx, AV_LOG_ERROR, "exponent %d is out-of-range\n", prevexp);
             return -1;
+        }
         switch (group_size) {
         case 4: dexps[j++] = prevexp;
                 dexps[j++] = prevexp;
@@ -901,11 +895,13 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
                                   ff_eac3_default_spx_band_struct,
                                   &s->num_spx_bands,
                                   s->spx_band_sizes);
-        } else {
-            for (ch = 1; ch <= fbw_channels; ch++) {
-                s->channel_uses_spx[ch] = 0;
-                s->first_spx_coords[ch] = 1;
-            }
+        }
+    }
+    if (!s->eac3 || !s->spx_in_use) {
+        s->spx_in_use = 0;
+        for (ch = 1; ch <= fbw_channels; ch++) {
+            s->channel_uses_spx[ch] = 0;
+            s->first_spx_coords[ch] = 1;
         }
     }
 
@@ -1150,10 +1146,9 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
     for (ch = !cpl_in_use; ch <= s->channels; ch++) {
         if (s->exp_strategy[blk][ch] != EXP_REUSE) {
             s->dexps[ch][0] = get_bits(gbc, 4) << !ch;
-            if (decode_exponents(gbc, s->exp_strategy[blk][ch],
+            if (decode_exponents(s, gbc, s->exp_strategy[blk][ch],
                                  s->num_exp_groups[ch], s->dexps[ch][0],
                                  &s->dexps[ch][s->start_freq[ch]+!!ch])) {
-                av_log(s->avctx, AV_LOG_ERROR, "exponent out-of-range\n");
                 return AVERROR_INVALIDDATA;
             }
             if (ch != CPL_CH && ch != s->lfe_ch)
diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h
index 5259c600..b3498fec 100644
--- a/libavcodec/ac3dec.h
+++ b/libavcodec/ac3dec.h
@@ -235,7 +235,7 @@ typedef struct AC3DecodeContext {
     DECLARE_ALIGNED(32, INTFLOAT, window)[AC3_BLOCK_SIZE];                              ///< window coefficients
     DECLARE_ALIGNED(32, INTFLOAT, tmp_output)[AC3_BLOCK_SIZE];                          ///< temporary storage for output before windowing
     DECLARE_ALIGNED(32, SHORTFLOAT, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];            ///< output after imdct transform and windowing
-    DECLARE_ALIGNED(32, uint8_t, input_buffer)[AC3_FRAME_BUFFER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< temp buffer to prevent overread
+    DECLARE_ALIGNED(32, uint8_t, input_buffer)[AC3_FRAME_BUFFER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE]; ///< temp buffer to prevent overread
 ///@}
 } AC3DecodeContext;
 
diff --git a/libavcodec/ac3dec_fixed.c b/libavcodec/ac3dec_fixed.c
index b4beee6d..6416da43 100644
--- a/libavcodec/ac3dec_fixed.c
+++ b/libavcodec/ac3dec_fixed.c
@@ -169,7 +169,7 @@ static void ac3_downmix_c_fixed16(int16_t **samples, int16_t (*matrix)[2],
 
 static const AVOption options[] = {
     { "drc_scale", "percentage of dynamic range compression to apply", OFFSET(drc_scale), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, 0.0, 6.0, PAR },
-    { "heavy_compr", "heavy dynamic range compression enabled", OFFSET(heavy_compression), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, PAR },
+    { "heavy_compr", "enable heavy dynamic range compression", OFFSET(heavy_compression), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, PAR },
     { NULL},
 };
 
@@ -188,7 +188,7 @@ AVCodec ff_ac3_fixed_decoder = {
     .init           = ac3_decode_init,
     .close          = ac3_decode_end,
     .decode         = ac3_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .long_name      = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/ac3dec_float.c b/libavcodec/ac3dec_float.c
index d74a0df6..0a5319a3 100644
--- a/libavcodec/ac3dec_float.c
+++ b/libavcodec/ac3dec_float.c
@@ -33,7 +33,7 @@
 
 static const AVOption options[] = {
     { "drc_scale", "percentage of dynamic range compression to apply", OFFSET(drc_scale), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, 0.0, 6.0, PAR },
-    { "heavy_compr", "heavy dynamic range compression enabled", OFFSET(heavy_compression), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, PAR },
+    { "heavy_compr", "enable heavy dynamic range compression", OFFSET(heavy_compression), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, PAR },
     { "target_level", "target level in -dBFS (0 not applied)", OFFSET(target_level), AV_OPT_TYPE_INT, {.i64 = 0 }, -31, 0, PAR },
 
 {"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), AV_OPT_TYPE_INT, {.i64 = -1 }, -1, 2, 0, "dmix_mode"},
@@ -60,7 +60,7 @@ AVCodec ff_ac3_decoder = {
     .init           = ac3_decode_init,
     .close          = ac3_decode_end,
     .decode         = ac3_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .long_name      = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
@@ -83,7 +83,7 @@ AVCodec ff_eac3_decoder = {
     .init           = ac3_decode_init,
     .close          = ac3_decode_end,
     .decode         = ac3_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .long_name      = NULL_IF_CONFIG_SMALL("ATSC A/52B (AC-3, E-AC-3)"),
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 50803de3..636ca720 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -1183,7 +1183,7 @@ static inline int asym_quant(int c, int e, int qbits)
 {
     int m;
 
-    c = (((c << e) >> (24 - qbits)) + 1) >> 1;
+    c = (((c * (1<<e)) >> (24 - qbits)) + 1) >> 1;
     m = (1 << (qbits-1));
     if (c >= m)
         c = m - 1;
@@ -2153,8 +2153,9 @@ static av_cold int validate_options(AC3EncodeContext *s)
 
     /* validate bit rate */
     if (s->eac3) {
-        int max_br, min_br, wpf, min_br_dist, min_br_code;
+        int max_br, min_br, wpf, min_br_code;
         int num_blks_code, num_blocks, frame_samples;
+        long long min_br_dist;
 
         /* calculate min/max bitrate */
         /* TODO: More testing with 3 and 2 blocks. All E-AC-3 samples I've
@@ -2184,9 +2185,9 @@ static av_cold int validate_options(AC3EncodeContext *s)
            this is needed for lookup tables for bandwidth and coupling
            parameter selection */
         min_br_code = -1;
-        min_br_dist = INT_MAX;
+        min_br_dist = INT64_MAX;
         for (i = 0; i < 19; i++) {
-            int br_dist = abs(ff_ac3_bitrate_tab[i] * 1000 - avctx->bit_rate);
+            long long br_dist = llabs(ff_ac3_bitrate_tab[i] * 1000 - avctx->bit_rate);
             if (br_dist < min_br_dist) {
                 min_br_dist = br_dist;
                 min_br_code = i;
@@ -2199,10 +2200,11 @@ static av_cold int validate_options(AC3EncodeContext *s)
             wpf--;
         s->frame_size_min = 2 * wpf;
     } else {
-        int best_br = 0, best_code = 0, best_diff = INT_MAX;
+        int best_br = 0, best_code = 0;
+        long long best_diff = INT64_MAX;
         for (i = 0; i < 19; i++) {
             int br   = (ff_ac3_bitrate_tab[i] >> s->bit_alloc.sr_shift) * 1000;
-            int diff = abs(br - avctx->bit_rate);
+            long long diff = llabs(br - avctx->bit_rate);
             if (diff < best_diff) {
                 best_br   = br;
                 best_code = i;
@@ -2429,8 +2431,6 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
 
     s->eac3 = avctx->codec_id == AV_CODEC_ID_EAC3;
 
-    ff_ac3_common_init();
-
     ret = validate_options(s);
     if (ret)
         return ret;
@@ -2484,7 +2484,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
 
     ff_audiodsp_init(&s->adsp);
     ff_me_cmp_init(&s->mecc, avctx);
-    ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT);
+    ff_ac3dsp_init(&s->ac3dsp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
 
     dprint_options(s);
 
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index 766b14ec..6c91f459 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -139,7 +139,7 @@ static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl)
 av_cold int ff_ac3_float_encode_init(AVCodecContext *avctx)
 {
     AC3EncodeContext *s = avctx->priv_data;
-    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp)
         return AVERROR(ENOMEM);
     return ff_ac3_encode_init(avctx);
diff --git a/libavcodec/ac3enc_opts_template.c b/libavcodec/ac3enc_opts_template.c
index 83113b8d..57b65a7a 100644
--- a/libavcodec/ac3enc_opts_template.c
+++ b/libavcodec/ac3enc_opts_template.c
@@ -25,7 +25,7 @@
 
 static const AVOption ac3_options[] = {
 /* Metadata Options */
-{"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, AC3ENC_PARAM},
+{"per_frame_metadata", "Allow Changing Metadata Per-Frame", OFFSET(allow_per_frame_metadata), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, AC3ENC_PARAM},
 #if AC3ENC_TYPE != AC3ENC_TYPE_EAC3
 /* AC-3 downmix levels */
 {"center_mixlev", "Center Mix Level", OFFSET(center_mix_level), AV_OPT_TYPE_FLOAT, {.dbl = LEVEL_MINUS_4POINT5DB }, 0.0, 1.0, AC3ENC_PARAM},
@@ -68,7 +68,7 @@ static const AVOption ac3_options[] = {
     {"standard", "Standard (default)", 0, AV_OPT_TYPE_CONST, {.i64 = AC3ENC_OPT_ADCONV_STANDARD }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
     {"hdcd",     "HDCD",               0, AV_OPT_TYPE_CONST, {.i64 = AC3ENC_OPT_ADCONV_HDCD     }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
 /* Other Encoding Options */
-{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), AV_OPT_TYPE_INT, {.i64 = AC3ENC_OPT_ON }, AC3ENC_OPT_OFF, AC3ENC_OPT_ON, AC3ENC_PARAM},
+{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), AV_OPT_TYPE_BOOL, {.i64 = 1 }, 0, 1, AC3ENC_PARAM},
 {"channel_coupling",   "Channel Coupling",   OFFSET(channel_coupling),   AV_OPT_TYPE_INT, {.i64 = AC3ENC_OPT_AUTO }, AC3ENC_OPT_AUTO, AC3ENC_OPT_ON, AC3ENC_PARAM, "channel_coupling"},
     {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = AC3ENC_OPT_AUTO }, INT_MIN, INT_MAX, AC3ENC_PARAM, "channel_coupling"},
 {"cpl_start_band", "Coupling Start Band", OFFSET(cpl_start), AV_OPT_TYPE_INT, {.i64 = AC3ENC_OPT_AUTO }, AC3ENC_OPT_AUTO, 15, AC3ENC_PARAM, "cpl_start_band"},
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index c3ad76f4..9dec9ae9 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -438,7 +438,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt,
 
     ff_ac3_quantize_mantissas(s);
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size, 0)) < 0)
         return ret;
     ff_ac3_output_frame(s, avpkt->data);
 
diff --git a/libavcodec/ac3tab.h b/libavcodec/ac3tab.h
index 74cbd9ed..f529fc80 100644
--- a/libavcodec/ac3tab.h
+++ b/libavcodec/ac3tab.h
@@ -27,12 +27,6 @@
 #include "libavutil/internal.h"
 #include "ac3.h"
 
-#if CONFIG_HARDCODED_TABLES
-#   define HCONST const
-#else
-#   define HCONST
-#endif
-
 extern const uint16_t ff_ac3_frame_size_tab[38][3];
 extern const uint8_t  ff_ac3_channels_tab[8];
 extern av_export const uint16_t avpriv_ac3_channel_layout_tab[8];
@@ -54,7 +48,7 @@ extern const int16_t  ff_ac3_floor_tab[8];
 extern const uint16_t ff_ac3_fast_gain_tab[8];
 extern const uint16_t ff_eac3_default_chmap[8];
 extern const uint8_t  ff_ac3_band_start_tab[AC3_CRITICAL_BANDS+1];
-extern HCONST uint8_t ff_ac3_bin_to_band_tab[253];
+extern const uint8_t  ff_ac3_bin_to_band_tab[253];
 
 /** Custom channel map locations bitmask
  *  Other channels described in documentation:
diff --git a/libavcodec/acelp_filters.c b/libavcodec/acelp_filters.c
index 9ab758b9..35aa863e 100644
--- a/libavcodec/acelp_filters.c
+++ b/libavcodec/acelp_filters.c
@@ -70,7 +70,7 @@ void ff_acelp_interpolate(int16_t* out, const int16_t* in,
             v += in[n - i] * filter_coeffs[idx - frac_pos];
         }
         if (av_clip_int16(v >> 15) != (v >> 15))
-            av_log(NULL, AV_LOG_WARNING, "overflow that would need cliping in ff_acelp_interpolate()\n");
+            av_log(NULL, AV_LOG_WARNING, "overflow that would need clipping in ff_acelp_interpolate()\n");
         out[n] = v >> 15;
     }
 }
diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c
index 3ecec01c..c2533b1e 100644
--- a/libavcodec/acelp_pitch_delay.c
+++ b/libavcodec/acelp_pitch_delay.c
@@ -22,6 +22,7 @@
 
 #include "libavutil/common.h"
 #include "libavutil/float_dsp.h"
+#include "libavutil/internal.h"
 #include "libavutil/libm.h"
 #include "libavutil/mathematics.h"
 #include "avcodec.h"
@@ -132,7 +133,7 @@ float ff_amr_set_fixed_gain(float fixed_gain_factor, float fixed_mean_energy,
     // ^g_c = ^gamma_gc * 100.05 (predicted dB + mean dB - dB of fixed vector)
     // Note 10^(0.05 * -10log(average x2)) = 1/sqrt((average x2)).
     float val = fixed_gain_factor *
-        exp2f(M_LOG2_10 * 0.05 *
+        ff_exp10(0.05 *
               (avpriv_scalarproduct_float_c(pred_table, prediction_error, 4) +
                energy_mean)) /
         sqrtf(fixed_mean_energy);
diff --git a/libavcodec/adpcm.c b/libavcodec/adpcm.c
index 22b54684..c4a0a186 100644
--- a/libavcodec/adpcm.c
+++ b/libavcodec/adpcm.c
@@ -84,8 +84,9 @@ static const int swf_index_tables[4][16] = {
 /* end of tables */
 
 typedef struct ADPCMDecodeContext {
-    ADPCMChannelStatus status[6];
+    ADPCMChannelStatus status[14];
     int vqa_version;                /**< VQA version. Used for ADPCM_IMA_WS */
+    int has_status;
 } ADPCMDecodeContext;
 
 static av_cold int adpcm_decode_init(AVCodecContext * avctx)
@@ -104,9 +105,15 @@ static av_cold int adpcm_decode_init(AVCodecContext * avctx)
     case AV_CODEC_ID_ADPCM_EA_R2:
     case AV_CODEC_ID_ADPCM_EA_R3:
     case AV_CODEC_ID_ADPCM_EA_XAS:
-    case AV_CODEC_ID_ADPCM_THP:
         max_channels = 6;
         break;
+    case AV_CODEC_ID_ADPCM_PSX:
+        max_channels = 8;
+        break;
+    case AV_CODEC_ID_ADPCM_THP:
+    case AV_CODEC_ID_ADPCM_THP_LE:
+        max_channels = 14;
+        break;
     }
     if (avctx->channels < min_channels || avctx->channels > max_channels) {
         av_log(avctx, AV_LOG_ERROR, "Invalid number of channels\n");
@@ -136,6 +143,7 @@ static av_cold int adpcm_decode_init(AVCodecContext * avctx)
     }
 
     switch(avctx->codec->id) {
+        case AV_CODEC_ID_ADPCM_AICA:
         case AV_CODEC_ID_ADPCM_IMA_QT:
         case AV_CODEC_ID_ADPCM_IMA_WAV:
         case AV_CODEC_ID_ADPCM_4XM:
@@ -145,8 +153,10 @@ static av_cold int adpcm_decode_init(AVCodecContext * avctx)
         case AV_CODEC_ID_ADPCM_EA_R3:
         case AV_CODEC_ID_ADPCM_EA_XAS:
         case AV_CODEC_ID_ADPCM_THP:
+        case AV_CODEC_ID_ADPCM_THP_LE:
         case AV_CODEC_ID_ADPCM_AFC:
         case AV_CODEC_ID_ADPCM_DTK:
+        case AV_CODEC_ID_ADPCM_PSX:
             avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
             break;
         case AV_CODEC_ID_ADPCM_IMA_WS:
@@ -512,6 +522,7 @@ static int get_nb_samples(AVCodecContext *avctx, GetByteContext *gb,
     case AV_CODEC_ID_ADPCM_IMA_OKI:
     case AV_CODEC_ID_ADPCM_IMA_WS:
     case AV_CODEC_ID_ADPCM_YAMAHA:
+    case AV_CODEC_ID_ADPCM_AICA:
         nb_samples = buf_size * 2 / ch;
         break;
     }
@@ -636,15 +647,22 @@ static int get_nb_samples(AVCodecContext *avctx, GetByteContext *gb,
         break;
     }
     case AV_CODEC_ID_ADPCM_THP:
+    case AV_CODEC_ID_ADPCM_THP_LE:
         if (avctx->extradata) {
-            nb_samples = buf_size / (8 * ch) * 14;
+            nb_samples = buf_size * 14 / (8 * ch);
             break;
         }
         has_coded_samples = 1;
         bytestream2_skip(gb, 4); // channel size
-        *coded_samples  = bytestream2_get_be32(gb);
-        *coded_samples -= *coded_samples % 14;
-        nb_samples      = (buf_size - (8 + 36 * ch)) / (8 * ch) * 14;
+        *coded_samples  = (avctx->codec->id == AV_CODEC_ID_ADPCM_THP_LE) ?
+                          bytestream2_get_le32(gb) :
+                          bytestream2_get_be32(gb);
+        buf_size       -= 8 + 36 * ch;
+        buf_size       /= ch;
+        nb_samples      = buf_size / 8 * 14;
+        if (buf_size % 8 > 1)
+            nb_samples     += (buf_size % 8 - 1) * 2;
+        *approx_nb_samples = 1;
         break;
     case AV_CODEC_ID_ADPCM_AFC:
         nb_samples = buf_size / (9 * ch) * 16;
@@ -653,6 +671,7 @@ static int get_nb_samples(AVCodecContext *avctx, GetByteContext *gb,
         nb_samples = (buf_size / 128) * 224 / ch;
         break;
     case AV_CODEC_ID_ADPCM_DTK:
+    case AV_CODEC_ID_ADPCM_PSX:
         nb_samples = buf_size / (16 * ch) * 28;
         break;
     }
@@ -763,7 +782,9 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
             int samples_per_block = ff_adpcm_ima_block_samples[avctx->bits_per_coded_sample - 2];
             GetBitContext g;
 
-            init_get_bits8(&g, gb.buffer, bytestream2_get_bytes_left(&gb));
+            ret = init_get_bits8(&g, gb.buffer, bytestream2_get_bytes_left(&gb));
+            if (ret < 0)
+                return ret;
             for (n = 0; n < (nb_samples - 1) / samples_per_block; n++) {
                 for (i = 0; i < avctx->channels; i++) {
                     cs = &c->status[i];
@@ -1361,6 +1382,21 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
             *samples++ = adpcm_yamaha_expand_nibble(&c->status[st], v >> 4  );
         }
         break;
+    case AV_CODEC_ID_ADPCM_AICA:
+        if (!c->has_status) {
+            for (channel = 0; channel < avctx->channels; channel++)
+                c->status[channel].step = 0;
+            c->has_status = 1;
+        }
+        for (channel = 0; channel < avctx->channels; channel++) {
+            samples = samples_p[channel];
+            for (n = nb_samples >> 1; n > 0; n--) {
+                int v = bytestream2_get_byteu(&gb);
+                *samples++ = adpcm_yamaha_expand_nibble(&c->status[channel], v & 0x0F);
+                *samples++ = adpcm_yamaha_expand_nibble(&c->status[channel], v >> 4  );
+            }
+        }
+        break;
     case AV_CODEC_ID_ADPCM_AFC:
     {
         int samples_per_block;
@@ -1415,10 +1451,17 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
         break;
     }
     case AV_CODEC_ID_ADPCM_THP:
+    case AV_CODEC_ID_ADPCM_THP_LE:
     {
-        int table[6][16];
+        int table[14][16];
         int ch;
 
+#define THP_GET16(g) \
+    sign_extend( \
+        avctx->codec->id == AV_CODEC_ID_ADPCM_THP_LE ? \
+        bytestream2_get_le16u(&(g)) : \
+        bytestream2_get_be16u(&(g)), 16)
+
         if (avctx->extradata) {
             GetByteContext tb;
             if (avctx->extradata_size < 32 * avctx->channels) {
@@ -1429,24 +1472,29 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
             bytestream2_init(&tb, avctx->extradata, avctx->extradata_size);
             for (i = 0; i < avctx->channels; i++)
                 for (n = 0; n < 16; n++)
-                    table[i][n] = sign_extend(bytestream2_get_be16u(&tb), 16);
+                    table[i][n] = THP_GET16(tb);
         } else {
-        for (i = 0; i < avctx->channels; i++)
-            for (n = 0; n < 16; n++)
-                table[i][n] = sign_extend(bytestream2_get_be16u(&gb), 16);
+            for (i = 0; i < avctx->channels; i++)
+                for (n = 0; n < 16; n++)
+                    table[i][n] = THP_GET16(gb);
 
-        /* Initialize the previous sample.  */
-        for (i = 0; i < avctx->channels; i++) {
-            c->status[i].sample1 = sign_extend(bytestream2_get_be16u(&gb), 16);
-            c->status[i].sample2 = sign_extend(bytestream2_get_be16u(&gb), 16);
-        }
+            if (!c->has_status) {
+                /* Initialize the previous sample.  */
+                for (i = 0; i < avctx->channels; i++) {
+                    c->status[i].sample1 = THP_GET16(gb);
+                    c->status[i].sample2 = THP_GET16(gb);
+                }
+                c->has_status = 1;
+            } else {
+                bytestream2_skip(&gb, avctx->channels * 4);
+            }
         }
 
         for (ch = 0; ch < avctx->channels; ch++) {
             samples = samples_p[ch];
 
             /* Read in every sample for this channel.  */
-            for (i = 0; i < nb_samples / 14; i++) {
+            for (i = 0; i < (nb_samples + 13) / 14; i++) {
                 int byte = bytestream2_get_byteu(&gb);
                 int index = (byte >> 4) & 7;
                 unsigned int exp = byte & 0x0F;
@@ -1454,7 +1502,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
                 int factor2 = table[ch][index * 2 + 1];
 
                 /* Decode 14 samples.  */
-                for (n = 0; n < 14; n++) {
+                for (n = 0; n < 14 && (i * 14 + n < nb_samples); n++) {
                     int32_t sampledat;
 
                     if (n & 1) {
@@ -1522,6 +1570,43 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
                 bytestream2_seek(&gb, 0, SEEK_SET);
         }
         break;
+    case AV_CODEC_ID_ADPCM_PSX:
+        for (channel = 0; channel < avctx->channels; channel++) {
+            samples = samples_p[channel];
+
+            /* Read in every sample for this channel.  */
+            for (i = 0; i < nb_samples / 28; i++) {
+                int filter, shift, flag, byte;
+
+                filter = bytestream2_get_byteu(&gb);
+                shift  = filter & 0xf;
+                filter = filter >> 4;
+                if (filter >= FF_ARRAY_ELEMS(xa_adpcm_table))
+                    return AVERROR_INVALIDDATA;
+                flag   = bytestream2_get_byteu(&gb);
+
+                /* Decode 28 samples.  */
+                for (n = 0; n < 28; n++) {
+                    int sample = 0, scale;
+
+                    if (flag < 0x07) {
+                        if (n & 1) {
+                            scale = sign_extend(byte >> 4, 4);
+                        } else {
+                            byte  = bytestream2_get_byteu(&gb);
+                            scale = sign_extend(byte, 4);
+                        }
+
+                        scale  = scale << 12;
+                        sample = (int)((scale >> shift) + (c->status[channel].sample1 * xa_adpcm_table[filter][0] + c->status[channel].sample2 * xa_adpcm_table[filter][1]) / 64);
+                    }
+                    *samples++ = av_clip_int16(sample);
+                    c->status[channel].sample2 = c->status[channel].sample1;
+                    c->status[channel].sample1 = sample;
+                }
+            }
+        }
+        break;
 
     default:
         return -1;
@@ -1542,6 +1627,12 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
     return bytestream2_tell(&gb);
 }
 
+static void adpcm_flush(AVCodecContext *avctx)
+{
+    ADPCMDecodeContext *c = avctx->priv_data;
+    c->has_status = 0;
+}
+
 
 static const enum AVSampleFormat sample_fmts_s16[]  = { AV_SAMPLE_FMT_S16,
                                                         AV_SAMPLE_FMT_NONE };
@@ -1560,13 +1651,15 @@ AVCodec ff_ ## name_ ## _decoder = {                        \
     .priv_data_size = sizeof(ADPCMDecodeContext),           \
     .init           = adpcm_decode_init,                    \
     .decode         = adpcm_decode_frame,                   \
-    .capabilities   = CODEC_CAP_DR1,                        \
+    .flush          = adpcm_flush,                          \
+    .capabilities   = AV_CODEC_CAP_DR1,                     \
     .sample_fmts    = sample_fmts_,                         \
 }
 
 /* Note: Do not forget to add new entries to the Makefile as well. */
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_4XM,         sample_fmts_s16p, adpcm_4xm,         "ADPCM 4X Movie");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_AFC,         sample_fmts_s16p, adpcm_afc,         "ADPCM Nintendo Gamecube AFC");
+ADPCM_DECODER(AV_CODEC_ID_ADPCM_AICA,        sample_fmts_s16p, adpcm_aica,        "ADPCM Yamaha AICA");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_CT,          sample_fmts_s16,  adpcm_ct,          "ADPCM Creative Technology");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_DTK,         sample_fmts_s16p, adpcm_dtk,         "ADPCM Nintendo Gamecube DTK");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_EA,          sample_fmts_s16,  adpcm_ea,          "ADPCM Electronic Arts");
@@ -1589,10 +1682,12 @@ ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_SMJPEG,  sample_fmts_s16,  adpcm_ima_smjpeg,
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_WAV,     sample_fmts_s16p, adpcm_ima_wav,     "ADPCM IMA WAV");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_WS,      sample_fmts_both, adpcm_ima_ws,      "ADPCM IMA Westwood");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_MS,          sample_fmts_s16,  adpcm_ms,          "ADPCM Microsoft");
+ADPCM_DECODER(AV_CODEC_ID_ADPCM_PSX,         sample_fmts_s16p, adpcm_psx,         "ADPCM Playstation");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_SBPRO_2,     sample_fmts_s16,  adpcm_sbpro_2,     "ADPCM Sound Blaster Pro 2-bit");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_SBPRO_3,     sample_fmts_s16,  adpcm_sbpro_3,     "ADPCM Sound Blaster Pro 2.6-bit");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_SBPRO_4,     sample_fmts_s16,  adpcm_sbpro_4,     "ADPCM Sound Blaster Pro 4-bit");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_SWF,         sample_fmts_s16,  adpcm_swf,         "ADPCM Shockwave Flash");
-ADPCM_DECODER(AV_CODEC_ID_ADPCM_THP,         sample_fmts_s16p, adpcm_thp,         "ADPCM Nintendo Gamecube THP");
+ADPCM_DECODER(AV_CODEC_ID_ADPCM_THP_LE,      sample_fmts_s16p, adpcm_thp_le,      "ADPCM Nintendo THP (little-endian)");
+ADPCM_DECODER(AV_CODEC_ID_ADPCM_THP,         sample_fmts_s16p, adpcm_thp,         "ADPCM Nintendo THP");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_XA,          sample_fmts_s16p, adpcm_xa,          "ADPCM CDROM XA");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_YAMAHA,      sample_fmts_s16,  adpcm_yamaha,      "ADPCM Yamaha");
diff --git a/libavcodec/adpcmenc.c b/libavcodec/adpcmenc.c
index 50872c3e..9ceea094 100644
--- a/libavcodec/adpcmenc.c
+++ b/libavcodec/adpcmenc.c
@@ -113,7 +113,7 @@ static av_cold int adpcm_encode_init(AVCodecContext *avctx)
         avctx->frame_size = (BLKSIZE - 7 * avctx->channels) * 2 / avctx->channels + 2;
         avctx->bits_per_coded_sample = 4;
         avctx->block_align    = BLKSIZE;
-        if (!(avctx->extradata = av_malloc(32 + FF_INPUT_BUFFER_PADDING_SIZE)))
+        if (!(avctx->extradata = av_malloc(32 + AV_INPUT_BUFFER_PADDING_SIZE)))
             goto error;
         avctx->extradata_size = 32;
         extradata = avctx->extradata;
@@ -486,7 +486,7 @@ static int adpcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         pkt_size = (2 + avctx->channels * (22 + 4 * (frame->nb_samples - 1)) + 7) / 8;
     else
         pkt_size = avctx->block_align;
-    if ((ret = ff_alloc_packet2(avctx, avpkt, pkt_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, pkt_size, 0)) < 0)
         return ret;
     dst = avpkt->data;
 
diff --git a/libavcodec/adxdec.c b/libavcodec/adxdec.c
index 5115cede..32cc0f00 100644
--- a/libavcodec/adxdec.c
+++ b/libavcodec/adxdec.c
@@ -183,7 +183,7 @@ AVCodec ff_adpcm_adx_decoder = {
     .init           = adx_decode_init,
     .decode         = adx_decode_frame,
     .flush          = adx_decode_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/adxenc.c b/libavcodec/adxenc.c
index 7736d09b..f1ba5911 100644
--- a/libavcodec/adxenc.c
+++ b/libavcodec/adxenc.c
@@ -146,7 +146,7 @@ static int adx_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     int ch, out_size, ret;
 
     out_size = BLOCK_SIZE * avctx->channels + !c->header_parsed * HEADER_SIZE;
-    if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
         return ret;
     dst = avpkt->data;
 
diff --git a/libavcodec/aic.c b/libavcodec/aic.c
index 648ccba5..5decc787 100644
--- a/libavcodec/aic.c
+++ b/libavcodec/aic.c
@@ -387,8 +387,11 @@ static int aic_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = aic_decode_header(ctx, buf, buf_size)) < 0)
+    ret = aic_decode_header(ctx, buf, buf_size);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid header\n");
         return ret;
+    }
 
     if ((ret = ff_get_buffer(avctx, ctx->frame, 0)) < 0)
         return ret;
@@ -400,13 +403,17 @@ static int aic_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         for (x = 0; x < ctx->mb_width; x += ctx->slice_width) {
             slice_size = bytestream2_get_le16(&gb) * 4;
             if (slice_size + off > buf_size || !slice_size) {
-                av_log(avctx, AV_LOG_ERROR, "Incorrect slice size\n");
+                av_log(avctx, AV_LOG_ERROR,
+                       "Incorrect slice size %d at %d.%d\n", slice_size, x, y);
                 return AVERROR_INVALIDDATA;
             }
 
-            if ((ret = aic_decode_slice(ctx, x, y,
-                                        buf + off, slice_size)) < 0)
+            ret = aic_decode_slice(ctx, x, y, buf + off, slice_size);
+            if (ret < 0) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "Error decoding slice at %d.%d\n", x, y);
                 return ret;
+            }
 
             off += slice_size;
         }
@@ -441,7 +448,7 @@ static av_cold int aic_decode_init(AVCodecContext *avctx)
     ctx->num_x_slices = (ctx->mb_width + 15) >> 4;
     ctx->slice_width  = 16;
     for (i = 1; i < 32; i++) {
-        if (!(ctx->mb_width % i) && (ctx->mb_width / i < 32)) {
+        if (!(ctx->mb_width % i) && (ctx->mb_width / i <= 32)) {
             ctx->slice_width  = ctx->mb_width / i;
             ctx->num_x_slices = i;
             break;
@@ -481,5 +488,5 @@ AVCodec ff_aic_decoder = {
     .init           = aic_decode_init,
     .close          = aic_decode_close,
     .decode         = aic_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/alac.c b/libavcodec/alac.c
index a5948bba..fc8bc968 100644
--- a/libavcodec/alac.c
+++ b/libavcodec/alac.c
@@ -57,6 +57,7 @@
 #include "unary.h"
 #include "mathops.h"
 #include "alac_data.h"
+#include "alacdsp.h"
 
 #define ALAC_EXTRADATA_SIZE 36
 
@@ -81,6 +82,8 @@ typedef struct ALACContext {
 
     int direct_output;
     int extra_bit_bug;
+
+    ALACDSPContext dsp;
 } ALACContext;
 
 static inline unsigned int decode_scalar(GetBitContext *gb, int k, int bps)
@@ -230,35 +233,6 @@ static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out,
     }
 }
 
-static void decorrelate_stereo(int32_t *buffer[2], int nb_samples,
-                               int decorr_shift, int decorr_left_weight)
-{
-    int i;
-
-    for (i = 0; i < nb_samples; i++) {
-        int32_t a, b;
-
-        a = buffer[0][i];
-        b = buffer[1][i];
-
-        a -= (b * decorr_left_weight) >> decorr_shift;
-        b += a;
-
-        buffer[0][i] = b;
-        buffer[1][i] = a;
-    }
-}
-
-static void append_extra_bits(int32_t *buffer[2], int32_t *extra_bits_buffer[2],
-                              int extra_bits, int channels, int nb_samples)
-{
-    int i, ch;
-
-    for (ch = 0; ch < channels; ch++)
-        for (i = 0; i < nb_samples; i++)
-            buffer[ch][i] = (buffer[ch][i] << extra_bits) | extra_bits_buffer[ch][i];
-}
-
 static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
                           int channels)
 {
@@ -389,22 +363,26 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
         decorr_left_weight = 0;
     }
 
-    if (alac->extra_bits && alac->extra_bit_bug) {
-        append_extra_bits(alac->output_samples_buffer, alac->extra_bits_buffer,
-                          alac->extra_bits, channels, alac->nb_samples);
-    }
+    if (channels == 2) {
+        if (alac->extra_bits && alac->extra_bit_bug) {
+            alac->dsp.append_extra_bits[1](alac->output_samples_buffer, alac->extra_bits_buffer,
+                                           alac->extra_bits, channels, alac->nb_samples);
+        }
 
-    if (channels == 2 && decorr_left_weight) {
-        decorrelate_stereo(alac->output_samples_buffer, alac->nb_samples,
-                           decorr_shift, decorr_left_weight);
-    }
+        if (decorr_left_weight) {
+            alac->dsp.decorrelate_stereo(alac->output_samples_buffer, alac->nb_samples,
+                                         decorr_shift, decorr_left_weight);
+        }
 
-    if (alac->extra_bits && !alac->extra_bit_bug) {
-        append_extra_bits(alac->output_samples_buffer, alac->extra_bits_buffer,
-                          alac->extra_bits, channels, alac->nb_samples);
+        if (alac->extra_bits && !alac->extra_bit_bug) {
+            alac->dsp.append_extra_bits[1](alac->output_samples_buffer, alac->extra_bits_buffer,
+                                           alac->extra_bits, channels, alac->nb_samples);
+        }
+    } else if (alac->extra_bits) {
+        alac->dsp.append_extra_bits[0](alac->output_samples_buffer, alac->extra_bits_buffer,
+                                       alac->extra_bits, channels, alac->nb_samples);
     }
 
-    if(av_sample_fmt_is_planar(avctx->sample_fmt)) {
     switch(alac->sample_size) {
     case 16: {
         for (ch = 0; ch < channels; ch++) {
@@ -420,37 +398,6 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
         }}
         break;
     }
-    }else{
-        switch(alac->sample_size) {
-        case 16: {
-            int16_t *outbuffer = ((int16_t *)frame->extended_data[0]) + ch_index;
-            for (i = 0; i < alac->nb_samples; i++) {
-                for (ch = 0; ch < channels; ch++)
-                    *outbuffer++ = alac->output_samples_buffer[ch][i];
-                outbuffer += alac->channels - channels;
-            }
-            }
-            break;
-        case 24: {
-            int32_t *outbuffer = ((int32_t *)frame->extended_data[0]) + ch_index;
-            for (i = 0; i < alac->nb_samples; i++) {
-                for (ch = 0; ch < channels; ch++)
-                    *outbuffer++ = alac->output_samples_buffer[ch][i] << 8;
-                outbuffer += alac->channels - channels;
-            }
-            }
-            break;
-        case 32: {
-            int32_t *outbuffer = ((int32_t *)frame->extended_data[0]) + ch_index;
-            for (i = 0; i < alac->nb_samples; i++) {
-                for (ch = 0; ch < channels; ch++)
-                    *outbuffer++ = alac->output_samples_buffer[ch][i];
-                outbuffer += alac->channels - channels;
-            }
-            }
-            break;
-        }
-    }
 
     return 0;
 }
@@ -544,14 +491,14 @@ static int allocate_buffers(ALACContext *alac)
         FF_ALLOC_OR_GOTO(alac->avctx, alac->predict_error_buffer[ch],
                          buf_size, buf_alloc_fail);
 
-        alac->direct_output = alac->sample_size > 16 && av_sample_fmt_is_planar(alac->avctx->sample_fmt);
+        alac->direct_output = alac->sample_size > 16;
         if (!alac->direct_output) {
             FF_ALLOC_OR_GOTO(alac->avctx, alac->output_samples_buffer[ch],
-                             buf_size, buf_alloc_fail);
+                             buf_size + AV_INPUT_BUFFER_PADDING_SIZE, buf_alloc_fail);
         }
 
         FF_ALLOC_OR_GOTO(alac->avctx, alac->extra_bits_buffer[ch],
-                         buf_size, buf_alloc_fail);
+                         buf_size + AV_INPUT_BUFFER_PADDING_SIZE, buf_alloc_fail);
     }
     return 0;
 buf_alloc_fail:
@@ -593,7 +540,6 @@ static int alac_set_info(ALACContext *alac)
 static av_cold int alac_decode_init(AVCodecContext * avctx)
 {
     int ret;
-    int req_packed;
     ALACContext *alac = avctx->priv_data;
     alac->avctx = avctx;
 
@@ -607,12 +553,11 @@ static av_cold int alac_decode_init(AVCodecContext * avctx)
         return -1;
     }
 
-    req_packed = LIBAVCODEC_VERSION_MAJOR < 55 && !av_sample_fmt_is_planar(avctx->request_sample_fmt);
     switch (alac->sample_size) {
-    case 16: avctx->sample_fmt = req_packed ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_S16P;
+    case 16: avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
              break;
     case 24:
-    case 32: avctx->sample_fmt = req_packed ? AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S32P;
+    case 32: avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
              break;
     default: avpriv_request_sample(avctx, "Sample depth %d", alac->sample_size);
              return AVERROR_PATCHWELCOME;
@@ -640,19 +585,23 @@ static av_cold int alac_decode_init(AVCodecContext * avctx)
         return ret;
     }
 
+    ff_alacdsp_init(&alac->dsp);
+
     return 0;
 }
 
+#if HAVE_THREADS
 static int init_thread_copy(AVCodecContext *avctx)
 {
     ALACContext *alac = avctx->priv_data;
     alac->avctx = avctx;
     return allocate_buffers(alac);
 }
+#endif
 
 static const AVOption options[] = {
     { "extra_bits_bug", "Force non-standard decoding process",
-      offsetof(ALACContext, extra_bit_bug), AV_OPT_TYPE_INT, { .i64 = 0 },
+      offsetof(ALACContext, extra_bit_bug), AV_OPT_TYPE_BOOL, { .i64 = 0 },
       0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM },
     { NULL },
 };
@@ -674,6 +623,6 @@ AVCodec ff_alac_decoder = {
     .close          = alac_decode_close,
     .decode         = alac_decode_frame,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .priv_class     = &alac_class
 };
diff --git a/libavcodec/alacdsp.c b/libavcodec/alacdsp.c
new file mode 100644
index 00000000..ecbaedb0
--- /dev/null
+++ b/libavcodec/alacdsp.c
@@ -0,0 +1,63 @@
+/*
+ * ALAC (Apple Lossless Audio Codec) decoder
+ * Copyright (c) 2005 David Hammerton
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "alacdsp.h"
+#include "config.h"
+
+static void decorrelate_stereo(int32_t *buffer[2], int nb_samples,
+                               int decorr_shift, int decorr_left_weight)
+{
+    int i;
+
+    for (i = 0; i < nb_samples; i++) {
+        int32_t a, b;
+
+        a = buffer[0][i];
+        b = buffer[1][i];
+
+        a -= (b * decorr_left_weight) >> decorr_shift;
+        b += a;
+
+        buffer[0][i] = b;
+        buffer[1][i] = a;
+    }
+}
+
+static void append_extra_bits(int32_t *buffer[2], int32_t *extra_bits_buffer[2],
+                              int extra_bits, int channels, int nb_samples)
+{
+    int i, ch;
+
+    for (ch = 0; ch < channels; ch++)
+        for (i = 0; i < nb_samples; i++)
+            buffer[ch][i] = (buffer[ch][i] << extra_bits) | extra_bits_buffer[ch][i];
+}
+
+av_cold void ff_alacdsp_init(ALACDSPContext *c)
+{
+    c->decorrelate_stereo   = decorrelate_stereo;
+    c->append_extra_bits[0] =
+    c->append_extra_bits[1] = append_extra_bits;
+
+    if (ARCH_X86)
+        ff_alacdsp_init_x86(c);
+}
diff --git a/libavcodec/alacdsp.h b/libavcodec/alacdsp.h
new file mode 100644
index 00000000..f8b56dd5
--- /dev/null
+++ b/libavcodec/alacdsp.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ALACDSP_H
+#define AVCODEC_ALACDSP_H
+
+#include <stdint.h>
+
+typedef struct ALACDSPContext {
+    void (*decorrelate_stereo)(int32_t *buffer[2], int nb_samples,
+                               int decorr_shift, int decorr_left_weight);
+    void (*append_extra_bits[2])(int32_t *buffer[2], int32_t *extra_bits_buffer[2],
+                                 int extra_bits, int channels, int nb_samples);
+} ALACDSPContext;
+
+void ff_alacdsp_init(ALACDSPContext *c);
+void ff_alacdsp_init_x86(ALACDSPContext *c);
+
+#endif /* AVCODEC_ALACDSP_H */
diff --git a/libavcodec/alacenc.c b/libavcodec/alacenc.c
index ce63da66..c80c8876 100644
--- a/libavcodec/alacenc.c
+++ b/libavcodec/alacenc.c
@@ -19,6 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/opt.h"
+
 #include "avcodec.h"
 #include "put_bits.h"
 #include "internal.h"
@@ -57,6 +59,8 @@ typedef struct AlacLPCContext {
 } AlacLPCContext;
 
 typedef struct AlacEncodeContext {
+    const AVClass *class;
+    AVCodecContext *avctx;
     int frame_size;                     /**< current frame size               */
     int verbatim;                       /**< current frame verbatim mode flag */
     int compression_level;
@@ -73,7 +77,6 @@ typedef struct AlacEncodeContext {
     RiceContext rc;
     AlacLPCContext lpc[2];
     LPCContext lpc_ctx;
-    AVCodecContext *avctx;
 } AlacEncodeContext;
 
 
@@ -531,7 +534,7 @@ static av_cold int alac_encode_init(AVCodecContext *avctx)
                                                  avctx->channels,
                                                  avctx->bits_per_raw_sample);
 
-    avctx->extradata = av_mallocz(ALAC_EXTRADATA_SIZE + FF_INPUT_BUFFER_PADDING_SIZE);
+    avctx->extradata = av_mallocz(ALAC_EXTRADATA_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!avctx->extradata) {
         ret = AVERROR(ENOMEM);
         goto error;
@@ -556,7 +559,8 @@ static av_cold int alac_encode_init(AVCodecContext *avctx)
         AV_WB8(alac_extradata+20, s->rc.k_modifier);
     }
 
-    s->min_prediction_order = DEFAULT_MIN_PRED_ORDER;
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
     if (avctx->min_prediction_order >= 0) {
         if (avctx->min_prediction_order < MIN_LPC_ORDER ||
            avctx->min_prediction_order > ALAC_MAX_LPC_ORDER) {
@@ -569,7 +573,6 @@ static av_cold int alac_encode_init(AVCodecContext *avctx)
         s->min_prediction_order = avctx->min_prediction_order;
     }
 
-    s->max_prediction_order = DEFAULT_MAX_PRED_ORDER;
     if (avctx->max_prediction_order >= 0) {
         if (avctx->max_prediction_order < MIN_LPC_ORDER ||
             avctx->max_prediction_order > ALAC_MAX_LPC_ORDER) {
@@ -581,6 +584,8 @@ static av_cold int alac_encode_init(AVCodecContext *avctx)
 
         s->max_prediction_order = avctx->max_prediction_order;
     }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     if (s->max_prediction_order < s->min_prediction_order) {
         av_log(avctx, AV_LOG_ERROR,
@@ -618,7 +623,7 @@ static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     else
         max_frame_size = s->max_coded_frame_size;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * max_frame_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * max_frame_size, 0)) < 0)
         return ret;
 
     /* use verbatim mode for compression_level 0 */
@@ -644,16 +649,33 @@ static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     return 0;
 }
 
+#define OFFSET(x) offsetof(AlacEncodeContext, x)
+#define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "min_prediction_order", NULL, OFFSET(min_prediction_order), AV_OPT_TYPE_INT, { .i64 = DEFAULT_MIN_PRED_ORDER }, MIN_LPC_ORDER, ALAC_MAX_LPC_ORDER, AE },
+    { "max_prediction_order", NULL, OFFSET(max_prediction_order), AV_OPT_TYPE_INT, { .i64 = DEFAULT_MAX_PRED_ORDER }, MIN_LPC_ORDER, ALAC_MAX_LPC_ORDER, AE },
+
+    { NULL },
+};
+
+static const AVClass alacenc_class = {
+    .class_name = "alacenc",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_alac_encoder = {
     .name           = "alac",
     .long_name      = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
     .type           = AVMEDIA_TYPE_AUDIO,
     .id             = AV_CODEC_ID_ALAC,
     .priv_data_size = sizeof(AlacEncodeContext),
+    .priv_class     = &alacenc_class,
     .init           = alac_encode_init,
     .encode2        = alac_encode_frame,
     .close          = alac_encode_close,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
+    .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME,
     .channel_layouts = ff_alac_channel_layouts,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32P,
                                                      AV_SAMPLE_FMT_S16P,
diff --git a/libavcodec/aliaspixdec.c b/libavcodec/aliaspixdec.c
index bdc4c72c..087b18fb 100644
--- a/libavcodec/aliaspixdec.c
+++ b/libavcodec/aliaspixdec.c
@@ -124,5 +124,5 @@ AVCodec ff_alias_pix_decoder = {
     .type         = AVMEDIA_TYPE_VIDEO,
     .id           = AV_CODEC_ID_ALIAS_PIX,
     .decode       = decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/aliaspixenc.c b/libavcodec/aliaspixenc.c
index 1fcea084..a9ba00cd 100644
--- a/libavcodec/aliaspixenc.c
+++ b/libavcodec/aliaspixenc.c
@@ -27,22 +27,18 @@
 
 #define ALIAS_HEADER_SIZE 10
 
-static av_cold int encode_init(AVCodecContext *avctx)
-{
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-    return 0;
-}
-
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                         const AVFrame *frame, int *got_packet)
 {
     int width, height, bits_pixel, i, j, length, ret;
     uint8_t *in_buf, *buf;
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     width  = avctx->width;
     height = avctx->height;
@@ -65,7 +61,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
 
     length = ALIAS_HEADER_SIZE + 4 * width * height; // max possible
-    if ((ret = ff_alloc_packet(pkt, length)) < 0) {
+    if ((ret = ff_alloc_packet2(avctx, pkt, length, ALIAS_HEADER_SIZE + height*2)) < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error getting output packet of size %d.\n", length);
         return ret;
     }
@@ -114,20 +110,12 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
-static av_cold int encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
-
 AVCodec ff_alias_pix_encoder = {
     .name      = "alias_pix",
     .long_name = NULL_IF_CONFIG_SMALL("Alias/Wavefront PIX image"),
     .type      = AVMEDIA_TYPE_VIDEO,
     .id        = AV_CODEC_ID_ALIAS_PIX,
-    .init      = encode_init,
     .encode2   = encode_frame,
-    .close     = encode_close,
     .pix_fmts  = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_BGR24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE
     },
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index ce977467..2097db03 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -75,7 +75,7 @@ void avcodec_register_all(void)
 
     /* hardware accelerators */
     REGISTER_HWACCEL(H263_VAAPI,        h263_vaapi);
-    REGISTER_HWACCEL(H263_VDPAU,        h263_vdpau);
+    REGISTER_HWACCEL(H263_VIDEOTOOLBOX, h263_videotoolbox);
     REGISTER_HWACCEL(H264_D3D11VA,      h264_d3d11va);
     REGISTER_HWACCEL(H264_DXVA2,        h264_dxva2);
     REGISTER_HWACCEL(H264_MMAL,         h264_mmal);
@@ -84,21 +84,36 @@ void avcodec_register_all(void)
     REGISTER_HWACCEL(H264_VDA,          h264_vda);
     REGISTER_HWACCEL(H264_VDA_OLD,      h264_vda_old);
     REGISTER_HWACCEL(H264_VDPAU,        h264_vdpau);
+    REGISTER_HWACCEL(H264_VIDEOTOOLBOX, h264_videotoolbox);
     REGISTER_HWACCEL(HEVC_D3D11VA,      hevc_d3d11va);
     REGISTER_HWACCEL(HEVC_DXVA2,        hevc_dxva2);
+    REGISTER_HWACCEL(HEVC_QSV,          hevc_qsv);
+    REGISTER_HWACCEL(HEVC_VAAPI,        hevc_vaapi);
+    REGISTER_HWACCEL(HEVC_VDPAU,        hevc_vdpau);
     REGISTER_HWACCEL(MPEG1_XVMC,        mpeg1_xvmc);
     REGISTER_HWACCEL(MPEG1_VDPAU,       mpeg1_vdpau);
+    REGISTER_HWACCEL(MPEG1_VIDEOTOOLBOX, mpeg1_videotoolbox);
     REGISTER_HWACCEL(MPEG2_XVMC,        mpeg2_xvmc);
     REGISTER_HWACCEL(MPEG2_D3D11VA,     mpeg2_d3d11va);
     REGISTER_HWACCEL(MPEG2_DXVA2,       mpeg2_dxva2);
+    REGISTER_HWACCEL(MPEG2_MMAL,        mpeg2_mmal);
+    REGISTER_HWACCEL(MPEG2_QSV,         mpeg2_qsv);
     REGISTER_HWACCEL(MPEG2_VAAPI,       mpeg2_vaapi);
     REGISTER_HWACCEL(MPEG2_VDPAU,       mpeg2_vdpau);
+    REGISTER_HWACCEL(MPEG2_VIDEOTOOLBOX, mpeg2_videotoolbox);
+    REGISTER_HWACCEL(MPEG4_MMAL,        mpeg4_mmal);
     REGISTER_HWACCEL(MPEG4_VAAPI,       mpeg4_vaapi);
     REGISTER_HWACCEL(MPEG4_VDPAU,       mpeg4_vdpau);
+    REGISTER_HWACCEL(MPEG4_VIDEOTOOLBOX, mpeg4_videotoolbox);
     REGISTER_HWACCEL(VC1_D3D11VA,       vc1_d3d11va);
     REGISTER_HWACCEL(VC1_DXVA2,         vc1_dxva2);
     REGISTER_HWACCEL(VC1_VAAPI,         vc1_vaapi);
     REGISTER_HWACCEL(VC1_VDPAU,         vc1_vdpau);
+    REGISTER_HWACCEL(VC1_MMAL,          vc1_mmal);
+    REGISTER_HWACCEL(VC1_QSV,           vc1_qsv);
+    REGISTER_HWACCEL(VP9_D3D11VA,       vp9_d3d11va);
+    REGISTER_HWACCEL(VP9_DXVA2,         vp9_dxva2);
+    REGISTER_HWACCEL(VP9_VAAPI,         vp9_vaapi);
     REGISTER_HWACCEL(WMV3_D3D11VA,      wmv3_d3d11va);
     REGISTER_HWACCEL(WMV3_DXVA2,        wmv3_dxva2);
     REGISTER_HWACCEL(WMV3_VAAPI,        wmv3_vaapi);
@@ -133,6 +148,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER(CAVS,              cavs);
     REGISTER_DECODER(CDGRAPHICS,        cdgraphics);
     REGISTER_DECODER(CDXL,              cdxl);
+    REGISTER_DECODER(CFHD,              cfhd);
     REGISTER_ENCDEC (CINEPAK,           cinepak);
     REGISTER_ENCDEC (CLJR,              cljr);
     REGISTER_DECODER(CLLC,              cllc);
@@ -140,14 +156,17 @@ void avcodec_register_all(void)
     REGISTER_DECODER(CPIA,              cpia);
     REGISTER_DECODER(CSCD,              cscd);
     REGISTER_DECODER(CYUV,              cyuv);
+    REGISTER_DECODER(DDS,               dds);
     REGISTER_DECODER(DFA,               dfa);
     REGISTER_DECODER(DIRAC,             dirac);
     REGISTER_ENCDEC (DNXHD,             dnxhd);
     REGISTER_ENCDEC (DPX,               dpx);
     REGISTER_DECODER(DSICINVIDEO,       dsicinvideo);
+    REGISTER_DECODER(DVAUDIO,           dvaudio);
     REGISTER_ENCDEC (DVVIDEO,           dvvideo);
     REGISTER_DECODER(DXA,               dxa);
     REGISTER_DECODER(DXTORY,            dxtory);
+    REGISTER_DECODER(DXV,               dxv);
     REGISTER_DECODER(EACMV,             eacmv);
     REGISTER_DECODER(EAMAD,             eamad);
     REGISTER_DECODER(EATGQ,             eatgq);
@@ -180,14 +199,17 @@ void avcodec_register_all(void)
     REGISTER_DECODER(H264_MMAL,         h264_mmal);
     REGISTER_DECODER(H264_QSV,          h264_qsv);
     REGISTER_DECODER(H264_VDA,          h264_vda);
+#if FF_API_VDPAU
     REGISTER_DECODER(H264_VDPAU,        h264_vdpau);
+#endif
+    REGISTER_ENCDEC (HAP,               hap);
     REGISTER_DECODER(HEVC,              hevc);
+    REGISTER_DECODER(HEVC_QSV,          hevc_qsv);
     REGISTER_DECODER(HNM4_VIDEO,        hnm4_video);
     REGISTER_DECODER(HQ_HQA,            hq_hqa);
     REGISTER_DECODER(HQX,               hqx);
     REGISTER_ENCDEC (HUFFYUV,           huffyuv);
     REGISTER_DECODER(IDCIN,             idcin);
-    REGISTER_DECODER(IFF_BYTERUN1,      iff_byterun1);
     REGISTER_DECODER(IFF_ILBM,          iff_ilbm);
     REGISTER_DECODER(INDEO2,            indeo2);
     REGISTER_DECODER(INDEO3,            indeo3);
@@ -215,11 +237,18 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (MPEG2VIDEO,        mpeg2video);
     REGISTER_ENCDEC (MPEG4,             mpeg4);
     REGISTER_DECODER(MPEG4_CRYSTALHD,   mpeg4_crystalhd);
+    REGISTER_DECODER(MPEG4_MMAL,        mpeg4_mmal);
+#if FF_API_VDPAU
     REGISTER_DECODER(MPEG4_VDPAU,       mpeg4_vdpau);
+#endif
     REGISTER_DECODER(MPEGVIDEO,         mpegvideo);
+#if FF_API_VDPAU
     REGISTER_DECODER(MPEG_VDPAU,        mpeg_vdpau);
     REGISTER_DECODER(MPEG1_VDPAU,       mpeg1_vdpau);
+#endif
+    REGISTER_DECODER(MPEG2_MMAL,        mpeg2_mmal);
     REGISTER_DECODER(MPEG2_CRYSTALHD,   mpeg2_crystalhd);
+    REGISTER_DECODER(MPEG2_QSV,         mpeg2_qsv);
     REGISTER_DECODER(MSA1,              msa1);
     REGISTER_DECODER(MSMPEG4_CRYSTALHD, msmpeg4_crystalhd);
     REGISTER_DECODER(MSMPEG4V1,         msmpeg4v1);
@@ -258,12 +287,15 @@ void avcodec_register_all(void)
     REGISTER_DECODER(RL2,               rl2);
     REGISTER_ENCDEC (ROQ,               roq);
     REGISTER_DECODER(RPZA,              rpza);
+    REGISTER_DECODER(RSCC,              rscc);
     REGISTER_ENCDEC (RV10,              rv10);
     REGISTER_ENCDEC (RV20,              rv20);
     REGISTER_DECODER(RV30,              rv30);
     REGISTER_DECODER(RV40,              rv40);
     REGISTER_ENCDEC (S302M,             s302m);
     REGISTER_DECODER(SANM,              sanm);
+    REGISTER_DECODER(SCREENPRESSO,      screenpresso);
+    REGISTER_DECODER(SDX2_DPCM,         sdx2_dpcm);
     REGISTER_ENCDEC (SGI,               sgi);
     REGISTER_DECODER(SGIRLE,            sgirle);
     REGISTER_DECODER(SMACKER,           smacker);
@@ -298,8 +330,13 @@ void avcodec_register_all(void)
     REGISTER_DECODER(VBLE,              vble);
     REGISTER_DECODER(VC1,               vc1);
     REGISTER_DECODER(VC1_CRYSTALHD,     vc1_crystalhd);
+#if FF_API_VDPAU
     REGISTER_DECODER(VC1_VDPAU,         vc1_vdpau);
+#endif
     REGISTER_DECODER(VC1IMAGE,          vc1image);
+    REGISTER_DECODER(VC1_MMAL,          vc1_mmal);
+    REGISTER_DECODER(VC1_QSV,           vc1_qsv);
+    REGISTER_ENCODER(VC2,               vc2);
     REGISTER_DECODER(VCR1,              vcr1);
     REGISTER_DECODER(VMDVIDEO,          vmdvideo);
     REGISTER_DECODER(VMNC,              vmnc);
@@ -313,11 +350,14 @@ void avcodec_register_all(void)
     REGISTER_DECODER(VP9,               vp9);
     REGISTER_DECODER(VQA,               vqa);
     REGISTER_DECODER(WEBP,              webp);
+    REGISTER_ENCODER(WRAPPED_AVFRAME,   wrapped_avframe);
     REGISTER_ENCDEC (WMV1,              wmv1);
     REGISTER_ENCDEC (WMV2,              wmv2);
     REGISTER_DECODER(WMV3,              wmv3);
     REGISTER_DECODER(WMV3_CRYSTALHD,    wmv3_crystalhd);
+#if FF_API_VDPAU
     REGISTER_DECODER(WMV3_VDPAU,        wmv3_vdpau);
+#endif
     REGISTER_DECODER(WMV3IMAGE,         wmv3image);
     REGISTER_DECODER(WNV1,              wnv1);
     REGISTER_DECODER(XAN_WC3,           xan_wc3);
@@ -336,6 +376,7 @@ void avcodec_register_all(void)
 
     /* audio codecs */
     REGISTER_ENCDEC (AAC,               aac);
+    REGISTER_DECODER(AAC_FIXED,         aac_fixed);
     REGISTER_DECODER(AAC_LATM,          aac_latm);
     REGISTER_ENCDEC (AC3,               ac3);
     REGISTER_ENCDEC (AC3_FIXED,         ac3_fixed);
@@ -368,6 +409,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER(GSM_MS,            gsm_ms);
     REGISTER_DECODER(IAC,               iac);
     REGISTER_DECODER(IMC,               imc);
+    REGISTER_DECODER(INTERPLAY_ACM,     interplay_acm);
     REGISTER_DECODER(MACE3,             mace3);
     REGISTER_DECODER(MACE6,             mace6);
     REGISTER_DECODER(METASOUND,         metasound);
@@ -413,6 +455,8 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (WMAV2,             wmav2);
     REGISTER_DECODER(WMAVOICE,          wmavoice);
     REGISTER_DECODER(WS_SND1,           ws_snd1);
+    REGISTER_DECODER(XMA1,              xma1);
+    REGISTER_DECODER(XMA2,              xma2);
 
     /* PCM codecs */
     REGISTER_ENCDEC (PCM_ALAW,          pcm_alaw);
@@ -456,6 +500,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER(ADPCM_4XM,         adpcm_4xm);
     REGISTER_ENCDEC (ADPCM_ADX,         adpcm_adx);
     REGISTER_DECODER(ADPCM_AFC,         adpcm_afc);
+    REGISTER_DECODER(ADPCM_AICA,        adpcm_aica);
     REGISTER_DECODER(ADPCM_CT,          adpcm_ct);
     REGISTER_DECODER(ADPCM_DTK,         adpcm_dtk);
     REGISTER_DECODER(ADPCM_EA,          adpcm_ea);
@@ -481,17 +526,16 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (ADPCM_IMA_WAV,     adpcm_ima_wav);
     REGISTER_DECODER(ADPCM_IMA_WS,      adpcm_ima_ws);
     REGISTER_ENCDEC (ADPCM_MS,          adpcm_ms);
+    REGISTER_DECODER(ADPCM_PSX,         adpcm_psx);
     REGISTER_DECODER(ADPCM_SBPRO_2,     adpcm_sbpro_2);
     REGISTER_DECODER(ADPCM_SBPRO_3,     adpcm_sbpro_3);
     REGISTER_DECODER(ADPCM_SBPRO_4,     adpcm_sbpro_4);
     REGISTER_ENCDEC (ADPCM_SWF,         adpcm_swf);
     REGISTER_DECODER(ADPCM_THP,         adpcm_thp);
+    REGISTER_DECODER(ADPCM_THP_LE,      adpcm_thp_le);
     REGISTER_DECODER(ADPCM_VIMA,        adpcm_vima);
     REGISTER_DECODER(ADPCM_XA,          adpcm_xa);
     REGISTER_ENCDEC (ADPCM_YAMAHA,      adpcm_yamaha);
-#if FF_API_VIMA_DECODER
-    REGISTER_DECODER(VIMA,              vima);
-#endif
 
     /* subtitles */
     REGISTER_ENCDEC (SSA,               ssa);
@@ -512,7 +556,7 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (SUBRIP,            subrip);
     REGISTER_DECODER(SUBVIEWER,         subviewer);
     REGISTER_DECODER(SUBVIEWER1,        subviewer1);
-    REGISTER_DECODER(TEXT,              text);
+    REGISTER_ENCDEC (TEXT,              text);
     REGISTER_DECODER(VPLAYER,           vplayer);
     REGISTER_ENCDEC (WEBVTT,            webvtt);
     REGISTER_ENCDEC (XSUB,              xsub);
@@ -533,11 +577,9 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (LIBSCHROEDINGER,   libschroedinger);
     REGISTER_ENCODER(LIBSHINE,          libshine);
     REGISTER_ENCDEC (LIBSPEEX,          libspeex);
-    REGISTER_DECODER(LIBSTAGEFRIGHT_H264, libstagefright_h264);
     REGISTER_ENCODER(LIBTHEORA,         libtheora);
     REGISTER_ENCODER(LIBTWOLAME,        libtwolame);
     REGISTER_ENCDEC (LIBUTVIDEO,        libutvideo);
-    REGISTER_ENCODER(LIBVO_AACENC,      libvo_aacenc);
     REGISTER_ENCODER(LIBVO_AMRWBENC,    libvo_amrwbenc);
     REGISTER_ENCDEC (LIBVORBIS,         libvorbis);
     REGISTER_ENCDEC (LIBVPX_VP8,        libvpx_vp8);
@@ -545,13 +587,13 @@ void avcodec_register_all(void)
     REGISTER_ENCODER(LIBWAVPACK,        libwavpack);
     REGISTER_ENCODER(LIBWEBP_ANIM,      libwebp_anim);  /* preferred over libwebp */
     REGISTER_ENCODER(LIBWEBP,           libwebp);
+    REGISTER_ENCODER(LIBX262,           libx262);
     REGISTER_ENCODER(LIBX264,           libx264);
     REGISTER_ENCODER(LIBX264RGB,        libx264rgb);
     REGISTER_ENCODER(LIBX265,           libx265);
     REGISTER_ENCODER(LIBXAVS,           libxavs);
     REGISTER_ENCODER(LIBXVID,           libxvid);
     REGISTER_DECODER(LIBZVBI_TELETEXT,  libzvbi_teletext);
-    REGISTER_ENCODER(LIBAACPLUS,        libaacplus);
 
     /* text */
     REGISTER_DECODER(BINTEXT,           bintext);
@@ -565,6 +607,9 @@ void avcodec_register_all(void)
     REGISTER_ENCODER(NVENC,             nvenc);
     REGISTER_ENCODER(NVENC_H264,        nvenc_h264);
     REGISTER_ENCODER(NVENC_HEVC,        nvenc_hevc);
+    REGISTER_ENCODER(HEVC_QSV,          hevc_qsv);
+    REGISTER_ENCODER(LIBKVAZAAR,        libkvazaar);
+    REGISTER_ENCODER(MPEG2_QSV,         mpeg2_qsv);
 
     /* parsers */
     REGISTER_PARSER(AAC,                aac);
@@ -578,10 +623,12 @@ void avcodec_register_all(void)
     REGISTER_PARSER(DIRAC,              dirac);
     REGISTER_PARSER(DNXHD,              dnxhd);
     REGISTER_PARSER(DPX,                dpx);
+    REGISTER_PARSER(DVAUDIO,            dvaudio);
     REGISTER_PARSER(DVBSUB,             dvbsub);
     REGISTER_PARSER(DVDSUB,             dvdsub);
     REGISTER_PARSER(DVD_NAV,            dvd_nav);
     REGISTER_PARSER(FLAC,               flac);
+    REGISTER_PARSER(G729,               g729);
     REGISTER_PARSER(GSM,                gsm);
     REGISTER_PARSER(H261,               h261);
     REGISTER_PARSER(H263,               h263);
@@ -609,6 +656,7 @@ void avcodec_register_all(void)
     REGISTER_BSF(CHOMP,                 chomp);
     REGISTER_BSF(DUMP_EXTRADATA,        dump_extradata);
     REGISTER_BSF(H264_MP4TOANNEXB,      h264_mp4toannexb);
+    REGISTER_BSF(HEVC_MP4TOANNEXB,      hevc_mp4toannexb);
     REGISTER_BSF(IMX_DUMP_HEADER,       imx_dump_header);
     REGISTER_BSF(MJPEG2JPEG,            mjpeg2jpeg);
     REGISTER_BSF(MJPEGA_DUMP_HEADER,    mjpega_dump_header);
diff --git a/libavcodec/alpha/blockdsp_alpha.c b/libavcodec/alpha/blockdsp_alpha.c
index ded439d8..c6f09646 100644
--- a/libavcodec/alpha/blockdsp_alpha.c
+++ b/libavcodec/alpha/blockdsp_alpha.c
@@ -43,9 +43,7 @@ static void clear_blocks_axp(int16_t *blocks) {
     } while (n);
 }
 
-av_cold void ff_blockdsp_init_alpha(BlockDSPContext *c, unsigned high_bit_depth)
+av_cold void ff_blockdsp_init_alpha(BlockDSPContext *c)
 {
-    if (!high_bit_depth) {
-        c->clear_blocks = clear_blocks_axp;
-    }
+    c->clear_blocks = clear_blocks_axp;
 }
diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c
index b7d147d6..ebd364e0 100644
--- a/libavcodec/alsdec.c
+++ b/libavcodec/alsdec.c
@@ -1853,5 +1853,5 @@ AVCodec ff_als_decoder = {
     .close          = decode_end,
     .decode         = decode_frame,
     .flush          = flush,
-    .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/amr.h b/libavcodec/amr.h
index 1ac73abe..727f8c34 100644
--- a/libavcodec/amr.h
+++ b/libavcodec/amr.h
@@ -28,9 +28,9 @@
 #include "avcodec.h"
 
 #ifdef AMR_USE_16BIT_TABLES
-#define R_TABLE_TYPE uint16_t
+typedef uint16_t R_TABLE_TYPE;
 #else
-#define R_TABLE_TYPE uint8_t
+typedef uint8_t R_TABLE_TYPE;
 #endif
 
 /**
diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c
index 3fa639de..2299a253 100644
--- a/libavcodec/amrnbdec.c
+++ b/libavcodec/amrnbdec.c
@@ -1088,7 +1088,7 @@ AVCodec ff_amrnb_decoder = {
     .priv_data_size = sizeof(AMRContext),
     .init           = amrnb_decode_init,
     .decode         = amrnb_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 8771a2af..a99dbd11 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -358,7 +358,7 @@ static void decode_pitch_vector(AMRWBContext *ctx,
 }
 
 /** Get x bits in the index interval [lsb,lsb+len-1] inclusive */
-#define BIT_STR(x,lsb,len) (((x) >> (lsb)) & ((1 << (len)) - 1))
+#define BIT_STR(x,lsb,len) av_mod_uintp2((x) >> (lsb), (len))
 
 /** Get the bit at specified position */
 #define BIT_POS(x, p) (((x) >> (p)) & 1)
@@ -1273,7 +1273,7 @@ AVCodec ff_amrwb_decoder = {
     .priv_data_size = sizeof(AMRWBContext),
     .init           = amrwb_decode_init,
     .decode         = amrwb_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/anm.c b/libavcodec/anm.c
index 37275347..29d59fbc 100644
--- a/libavcodec/anm.c
+++ b/libavcodec/anm.c
@@ -198,5 +198,5 @@ AVCodec ff_anm_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/ansi.c b/libavcodec/ansi.c
index 20233591..21d5ae1d 100644
--- a/libavcodec/ansi.c
+++ b/libavcodec/ansi.c
@@ -478,5 +478,5 @@ AVCodec ff_ansi_decoder = {
     .init           = decode_init,
     .close          = decode_close,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c
index 03afd756..de9d71ca 100644
--- a/libavcodec/apedec.c
+++ b/libavcodec/apedec.c
@@ -247,7 +247,7 @@ static av_cold int ape_decode_init(AVCodecContext *avctx)
     s->compression_level = AV_RL16(avctx->extradata + 2);
     s->flags             = AV_RL16(avctx->extradata + 4);
 
-    av_log(avctx, AV_LOG_DEBUG, "Compression Level: %d - Flags: %d\n",
+    av_log(avctx, AV_LOG_VERBOSE, "Compression Level: %d - Flags: %d\n",
            s->compression_level, s->flags);
     if (s->compression_level % 1000 || s->compression_level > COMPRESSION_LEVEL_INSANE ||
         !s->compression_level ||
@@ -892,6 +892,9 @@ static void long_filter_high_3800(int32_t *buffer, int order, int shift, int len
     int32_t dotprod, sign;
     int32_t coeffs[256], delay[256];
 
+    if (order >= length)
+        return;
+
     memset(coeffs, 0, order * sizeof(*coeffs));
     for (i = 0; i < order; i++)
         delay[i] = buffer[i];
@@ -1281,7 +1284,7 @@ static void do_apply_filter(APEContext *ctx, int version, APEFilter *f,
             /* Update the adaption coefficients */
             absres = FFABS(res);
             if (absres)
-                *f->adaptcoeffs = ((res & (-1<<31)) ^ (-1<<30)) >>
+                *f->adaptcoeffs = ((res & INT32_MIN) ^ (-(1<<30))) >>
                                   (25 + (absres <= f->avg*3) + (absres <= f->avg*4/3));
             else
                 *f->adaptcoeffs = 0;
@@ -1369,7 +1372,7 @@ static void ape_unpack_stereo(APEContext *ctx, int count)
     int32_t *decoded0 = ctx->decoded[0];
     int32_t *decoded1 = ctx->decoded[1];
 
-    if (ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) {
+    if ((ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) == APE_FRAMECODE_STEREO_SILENCE) {
         /* We are pure silence, so we're done. */
         av_log(ctx->avctx, AV_LOG_DEBUG, "pure silence stereo\n");
         return;
@@ -1570,7 +1573,8 @@ AVCodec ff_ape_decoder = {
     .init           = ape_decode_init,
     .close          = ape_decode_close,
     .decode         = ape_decode_frame,
-    .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DELAY | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DELAY |
+                      AV_CODEC_CAP_DR1,
     .flush          = ape_flush,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
                                                       AV_SAMPLE_FMT_S16P,
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 2f873969..179c403b 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -18,6 +18,9 @@ OBJS-$(CONFIG_IDCTDSP)                 += arm/idctdsp_init_arm.o        \
                                           arm/idctdsp_arm.o             \
                                           arm/jrevdct_arm.o             \
                                           arm/simple_idct_arm.o
+OBJS-$(CONFIG_FLACDSP)                 += arm/flacdsp_init_arm.o        \
+                                          arm/flacdsp_arm.o
+OBJS-$(CONFIG_G722DSP)                 += arm/g722dsp_init_arm.o
 OBJS-$(CONFIG_LLAUDDSP)                += arm/lossless_audiodsp_init_arm.o
 OBJS-$(CONFIG_ME_CMP)                  += arm/me_cmp_init_arm.o
 OBJS-$(CONFIG_MPEGAUDIODSP)            += arm/mpegaudiodsp_init_arm.o
@@ -25,28 +28,21 @@ OBJS-$(CONFIG_MPEGVIDEO)               += arm/mpegvideo_arm.o
 OBJS-$(CONFIG_MPEGVIDEOENC)            += arm/mpegvideoencdsp_init_arm.o
 OBJS-$(CONFIG_NEON_CLOBBER_TEST)       += arm/neontest.o
 OBJS-$(CONFIG_PIXBLOCKDSP)             += arm/pixblockdsp_init_arm.o
+OBJS-$(CONFIG_RV34DSP)                 += arm/rv34dsp_init_arm.o
 OBJS-$(CONFIG_VIDEODSP)                += arm/videodsp_init_arm.o
 OBJS-$(CONFIG_VP3DSP)                  += arm/vp3dsp_init_arm.o
+OBJS-$(CONFIG_VP8DSP)                  += arm/vp8dsp_init_arm.o
 
 # decoders/encoders
 OBJS-$(CONFIG_AAC_DECODER)             += arm/aacpsdsp_init_arm.o       \
                                           arm/sbrdsp_init_arm.o
-OBJS-$(CONFIG_ADPCM_G722_DECODER)      += arm/g722dsp_init_arm.o
-OBJS-$(CONFIG_ADPCM_G722_ENCODER)      += arm/g722dsp_init_arm.o
-OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o
-OBJS-$(CONFIG_FLAC_DECODER)            += arm/flacdsp_init_arm.o        \
-                                          arm/flacdsp_arm.o
-OBJS-$(CONFIG_FLAC_ENCODER)            += arm/flacdsp_init_arm.o
+OBJS-$(CONFIG_DCA_DECODER)             += arm/synth_filter_init_arm.o
 OBJS-$(CONFIG_HEVC_DECODER)            += arm/hevcdsp_init_arm.o
 OBJS-$(CONFIG_MLP_DECODER)             += arm/mlpdsp_init_arm.o
+OBJS-$(CONFIG_RV40_DECODER)            += arm/rv40dsp_init_arm.o
 OBJS-$(CONFIG_VC1_DECODER)             += arm/vc1dsp_init_arm.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += arm/vorbisdsp_init_arm.o
 OBJS-$(CONFIG_VP6_DECODER)             += arm/vp6dsp_init_arm.o
-OBJS-$(CONFIG_VP7_DECODER)             += arm/vp8dsp_init_arm.o
-OBJS-$(CONFIG_VP8_DECODER)             += arm/vp8dsp_init_arm.o
-OBJS-$(CONFIG_RV30_DECODER)            += arm/rv34dsp_init_arm.o
-OBJS-$(CONFIG_RV40_DECODER)            += arm/rv34dsp_init_arm.o        \
-                                          arm/rv40dsp_init_arm.o
 
 
 # ARMv5 optimizations
@@ -74,16 +70,13 @@ ARMV6-OBJS-$(CONFIG_ME_CMP)            += arm/me_cmp_armv6.o
 ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
 ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC)      += arm/mpegvideoencdsp_armv6.o
 ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP)       += arm/pixblockdsp_armv6.o
+ARMV6-OBJS-$(CONFIG_VP8DSP)            += arm/vp8_armv6.o               \
+                                          arm/vp8dsp_init_armv6.o       \
+                                          arm/vp8dsp_armv6.o
 
 # decoders/encoders
 ARMV6-OBJS-$(CONFIG_MLP_DECODER)       += arm/mlpdsp_armv6.o
 ARMV6-OBJS-$(CONFIG_STARTCODE)         += arm/startcode_armv6.o
-ARMV6-OBJS-$(CONFIG_VP7_DECODER)       += arm/vp8_armv6.o               \
-                                          arm/vp8dsp_init_armv6.o       \
-                                          arm/vp8dsp_armv6.o
-ARMV6-OBJS-$(CONFIG_VP8_DECODER)       += arm/vp8_armv6.o               \
-                                          arm/vp8dsp_init_armv6.o       \
-                                          arm/vp8dsp_armv6.o
 
 
 # VFP optimizations
@@ -94,8 +87,7 @@ VFP-OBJS-$(CONFIG_FMTCONVERT)          += arm/fmtconvert_vfp.o
 VFP-OBJS-$(CONFIG_MDCT)                += arm/mdct_vfp.o
 
 # decoders/encoders
-VFP-OBJS-$(CONFIG_DCA_DECODER)         += arm/dcadsp_vfp.o              \
-                                          arm/synth_filter_vfp.o
+VFP-OBJS-$(CONFIG_DCA_DECODER)         += arm/synth_filter_vfp.o
 
 
 # NEON optimizations
@@ -110,6 +102,7 @@ NEON-OBJS-$(CONFIG_BLOCKDSP)           += arm/blockdsp_init_neon.o      \
 NEON-OBJS-$(CONFIG_FFT)                += arm/fft_neon.o                \
                                           arm/fft_fixed_neon.o
 NEON-OBJS-$(CONFIG_FMTCONVERT)         += arm/fmtconvert_neon.o
+NEON-OBJS-$(CONFIG_G722DSP)            += arm/g722dsp_neon.o
 NEON-OBJS-$(CONFIG_H264CHROMA)         += arm/h264cmc_neon.o
 NEON-OBJS-$(CONFIG_H264DSP)            += arm/h264dsp_neon.o            \
                                           arm/h264idct_neon.o
@@ -126,15 +119,14 @@ NEON-OBJS-$(CONFIG_MDCT)               += arm/mdct_neon.o               \
 NEON-OBJS-$(CONFIG_MPEGVIDEO)          += arm/mpegvideo_neon.o
 NEON-OBJS-$(CONFIG_RDFT)               += arm/rdft_neon.o
 NEON-OBJS-$(CONFIG_VP3DSP)             += arm/vp3dsp_neon.o
+NEON-OBJS-$(CONFIG_VP8DSP)             += arm/vp8dsp_init_neon.o        \
+                                          arm/vp8dsp_neon.o
 
 # decoders/encoders
 NEON-OBJS-$(CONFIG_AAC_DECODER)        += arm/aacpsdsp_neon.o           \
                                           arm/sbrdsp_neon.o
-NEON-OBJS-$(CONFIG_ADPCM_G722_DECODER) += arm/g722dsp_neon.o
-NEON-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += arm/g722dsp_neon.o
 NEON-OBJS-$(CONFIG_LLAUDDSP)           += arm/lossless_audiodsp_neon.o
-NEON-OBJS-$(CONFIG_DCA_DECODER)        += arm/dcadsp_neon.o             \
-                                          arm/synth_filter_neon.o
+NEON-OBJS-$(CONFIG_DCA_DECODER)        += arm/synth_filter_neon.o
 NEON-OBJS-$(CONFIG_HEVC_DECODER)       += arm/hevcdsp_init_neon.o       \
                                           arm/hevcdsp_deblock_neon.o    \
                                           arm/hevcdsp_idct_neon.o       \
@@ -146,7 +138,3 @@ NEON-OBJS-$(CONFIG_VC1_DECODER)        += arm/vc1dsp_init_neon.o        \
                                           arm/vc1dsp_neon.o
 NEON-OBJS-$(CONFIG_VORBIS_DECODER)     += arm/vorbisdsp_neon.o
 NEON-OBJS-$(CONFIG_VP6_DECODER)        += arm/vp6dsp_neon.o
-NEON-OBJS-$(CONFIG_VP7_DECODER)        += arm/vp8dsp_init_neon.o        \
-                                          arm/vp8dsp_neon.o
-NEON-OBJS-$(CONFIG_VP8_DECODER)        += arm/vp8dsp_init_neon.o        \
-                                          arm/vp8dsp_neon.o
diff --git a/libavcodec/arm/blockdsp_arm.h b/libavcodec/arm/blockdsp_arm.h
index 2688d362..59ebeb84 100644
--- a/libavcodec/arm/blockdsp_arm.h
+++ b/libavcodec/arm/blockdsp_arm.h
@@ -21,6 +21,6 @@
 
 #include "libavcodec/blockdsp.h"
 
-void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth);
+void ff_blockdsp_init_neon(BlockDSPContext *c);
 
 #endif /* AVCODEC_ARM_BLOCKDSP_ARM_H */
diff --git a/libavcodec/arm/blockdsp_init_arm.c b/libavcodec/arm/blockdsp_init_arm.c
index 3b86a710..2080d525 100644
--- a/libavcodec/arm/blockdsp_init_arm.c
+++ b/libavcodec/arm/blockdsp_init_arm.c
@@ -24,10 +24,10 @@
 #include "libavcodec/blockdsp.h"
 #include "blockdsp_arm.h"
 
-av_cold void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth)
+av_cold void ff_blockdsp_init_arm(BlockDSPContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
 
     if (have_neon(cpu_flags))
-        ff_blockdsp_init_neon(c, high_bit_depth);
+        ff_blockdsp_init_neon(c);
 }
diff --git a/libavcodec/arm/blockdsp_init_neon.c b/libavcodec/arm/blockdsp_init_neon.c
index 62b51fc7..87c0d6d6 100644
--- a/libavcodec/arm/blockdsp_init_neon.c
+++ b/libavcodec/arm/blockdsp_init_neon.c
@@ -28,10 +28,8 @@
 void ff_clear_block_neon(int16_t *block);
 void ff_clear_blocks_neon(int16_t *blocks);
 
-av_cold void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth)
+av_cold void ff_blockdsp_init_neon(BlockDSPContext *c)
 {
-    if (!high_bit_depth) {
-        c->clear_block  = ff_clear_block_neon;
-        c->clear_blocks = ff_clear_blocks_neon;
-    }
+      c->clear_block  = ff_clear_block_neon;
+      c->clear_blocks = ff_clear_blocks_neon;
 }
diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h
index 6e87111a..ae4b730a 100644
--- a/libavcodec/arm/dca.h
+++ b/libavcodec/arm/dca.h
@@ -24,7 +24,6 @@
 #include <stdint.h>
 
 #include "config.h"
-#include "libavcodec/dcadsp.h"
 #include "libavcodec/mathops.h"
 
 #if HAVE_ARMV6_INLINE && AV_GCC_VERSION_AT_LEAST(4,4) && !CONFIG_THUMB
diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S
deleted file mode 100644
index cdc41367..00000000
--- a/libavcodec/arm/dcadsp_neon.S
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/arm/asm.S"
-
-function ff_decode_hf_neon, export=1
-        push            {r4-r5,lr}
-        add             r2,  r2,  r3
-        ldr             r3,       [sp, #12]
-        ldrd            r4,  r5,  [sp, #16]
-        add             r3,  r3,  r4, lsl #3
-        add             r1,  r1,  r4, lsl #2
-        add             r0,  r0,  r4, lsl #5
-
-1:      ldr_post        lr,  r1,  #4
-        add             r4,  r4,  #1
-        add             lr,  r2,  lr, lsl #5
-        cmp             r4,  r5
-        vld1.32         {d7},     [r3]!
-        vld1.8          {d0},     [lr,:64]
-        vcvt.f32.s32    d7,  d7,  #4
-        vmovl.s8        q1,  d0
-        vmovl.s16       q0,  d2
-        vmovl.s16       q1,  d3
-        vcvt.f32.s32    q0,  q0
-        vcvt.f32.s32    q1,  q1
-        vmul.f32        q0,  q0,  d7[0]
-        vmul.f32        q1,  q1,  d7[0]
-        vst1.32         {q0-q1},  [r0,:128]!
-        bne             1b
-
-        pop             {r4-r5,pc}
-endfunc
-
-function ff_dca_lfe_fir0_neon, export=1
-        push            {r4-r6,lr}
-        mov             r3,  #32                @ decifactor
-        mov             r6,  #256/32
-        b               dca_lfe_fir
-endfunc
-
-function ff_dca_lfe_fir1_neon, export=1
-        push            {r4-r6,lr}
-        mov             r3,  #64                @ decifactor
-        mov             r6,  #256/64
-dca_lfe_fir:
-        add             r4,  r0,  r3,  lsl #2   @ out2
-        add             r5,  r2,  #256*4-16     @ cf1
-        sub             r1,  r1,  #12
-        mov             lr,  #-16
-1:
-        vmov.f32        q2,  #0.0               @ v0
-        vmov.f32        q3,  #0.0               @ v1
-        mov             r12, r6
-2:
-        vld1.32         {q8},     [r2,:128]!    @ cf0
-        vld1.32         {q9},     [r5,:128], lr @ cf1
-        vld1.32         {q1},     [r1], lr      @ in
-        subs            r12, r12, #4
-        vrev64.32       q10, q8
-        vmla.f32        q3,  q1,  q9
-        vmla.f32        d4,  d2,  d21
-        vmla.f32        d5,  d3,  d20
-        bne             2b
-
-        add             r1,  r1,  r6,  lsl #2
-        subs            r3,  r3,  #1
-        vadd.f32        d4,  d4,  d5
-        vadd.f32        d6,  d6,  d7
-        vpadd.f32       d5,  d4,  d6
-        vst1.32         {d5[0]},  [r0,:32]!
-        vst1.32         {d5[1]},  [r4,:32]!
-        bne             1b
-
-        pop             {r4-r6,pc}
-endfunc
diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S
deleted file mode 100644
index 2e09f0ee..00000000
--- a/libavcodec/arm/dcadsp_vfp.S
+++ /dev/null
@@ -1,476 +0,0 @@
-/*
- * Copyright (c) 2013 RISC OS Open Ltd
- * Author: Ben Avison <bavison@riscosopen.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/arm/asm.S"
-
-POUT          .req    a1
-PIN           .req    a2
-PCOEF         .req    a3
-OLDFPSCR      .req    a4
-COUNTER       .req    ip
-
-IN0           .req    s4
-IN1           .req    s5
-IN2           .req    s6
-IN3           .req    s7
-IN4           .req    s0
-IN5           .req    s1
-IN6           .req    s2
-IN7           .req    s3
-COEF0         .req    s8   @ coefficient elements
-COEF1         .req    s9
-COEF2         .req    s10
-COEF3         .req    s11
-COEF4         .req    s12
-COEF5         .req    s13
-COEF6         .req    s14
-COEF7         .req    s15
-ACCUM0        .req    s16  @ double-buffered multiply-accumulate results
-ACCUM4        .req    s20
-POST0         .req    s24  @ do long-latency post-multiply in this vector in parallel
-POST1         .req    s25
-POST2         .req    s26
-POST3         .req    s27
-
-
-.macro inner_loop  decifactor, dir, tail, head
- .ifc "\dir","up"
-  .set X, 0
-  .set Y, 4
- .else
-  .set X, 4*JMAX*4 - 4
-  .set Y, -4
- .endif
- .ifnc "\head",""
-        vldr    COEF0, [PCOEF, #X + (0*JMAX + 0) * Y]
-        vldr    COEF1, [PCOEF, #X + (1*JMAX + 0) * Y]
-        vldr    COEF2, [PCOEF, #X + (2*JMAX + 0) * Y]
-        vldr    COEF3, [PCOEF, #X + (3*JMAX + 0) * Y]
- .endif
- .ifnc "\tail",""
-        vadd.f  POST0, ACCUM0, ACCUM4   @ vector operation
- .endif
- .ifnc "\head",""
-        vmul.f  ACCUM0, COEF0, IN0      @ vector = vector * scalar
-        vldr    COEF4, [PCOEF, #X + (0*JMAX + 1) * Y]
-        vldr    COEF5, [PCOEF, #X + (1*JMAX + 1) * Y]
-        vldr    COEF6, [PCOEF, #X + (2*JMAX + 1) * Y]
- .endif
- .ifnc "\head",""
-        vldr    COEF7, [PCOEF, #X + (3*JMAX + 1) * Y]
-   .ifc "\tail",""
-        vmul.f  ACCUM4, COEF4, IN1      @ vector operation
-   .endif
-        vldr    COEF0, [PCOEF, #X + (0*JMAX + 2) * Y]
-        vldr    COEF1, [PCOEF, #X + (1*JMAX + 2) * Y]
-   .ifnc "\tail",""
-        vmul.f  ACCUM4, COEF4, IN1      @ vector operation
-   .endif
-        vldr    COEF2, [PCOEF, #X + (2*JMAX + 2) * Y]
-        vldr    COEF3, [PCOEF, #X + (3*JMAX + 2) * Y]
- .endif
- .ifnc "\tail",""
-        vstmia  POUT!, {POST0-POST3}
- .endif
- .ifnc "\head",""
-        vmla.f  ACCUM0, COEF0, IN2      @ vector = vector * scalar
-        vldr    COEF4, [PCOEF, #X + (0*JMAX + 3) * Y]
-        vldr    COEF5, [PCOEF, #X + (1*JMAX + 3) * Y]
-        vldr    COEF6, [PCOEF, #X + (2*JMAX + 3) * Y]
-        vldr    COEF7, [PCOEF, #X + (3*JMAX + 3) * Y]
-        vmla.f  ACCUM4, COEF4, IN3      @ vector = vector * scalar
-  .if \decifactor == 32
-        vldr    COEF0, [PCOEF, #X + (0*JMAX + 4) * Y]
-        vldr    COEF1, [PCOEF, #X + (1*JMAX + 4) * Y]
-        vldr    COEF2, [PCOEF, #X + (2*JMAX + 4) * Y]
-        vldr    COEF3, [PCOEF, #X + (3*JMAX + 4) * Y]
-        vmla.f  ACCUM0, COEF0, IN4      @ vector = vector * scalar
-        vldr    COEF4, [PCOEF, #X + (0*JMAX + 5) * Y]
-        vldr    COEF5, [PCOEF, #X + (1*JMAX + 5) * Y]
-        vldr    COEF6, [PCOEF, #X + (2*JMAX + 5) * Y]
-        vldr    COEF7, [PCOEF, #X + (3*JMAX + 5) * Y]
-        vmla.f  ACCUM4, COEF4, IN5      @ vector = vector * scalar
-        vldr    COEF0, [PCOEF, #X + (0*JMAX + 6) * Y]
-        vldr    COEF1, [PCOEF, #X + (1*JMAX + 6) * Y]
-        vldr    COEF2, [PCOEF, #X + (2*JMAX + 6) * Y]
-        vldr    COEF3, [PCOEF, #X + (3*JMAX + 6) * Y]
-        vmla.f  ACCUM0, COEF0, IN6      @ vector = vector * scalar
-        vldr    COEF4, [PCOEF, #X + (0*JMAX + 7) * Y]
-        vldr    COEF5, [PCOEF, #X + (1*JMAX + 7) * Y]
-        vldr    COEF6, [PCOEF, #X + (2*JMAX + 7) * Y]
-        vldr    COEF7, [PCOEF, #X + (3*JMAX + 7) * Y]
-        vmla.f  ACCUM4, COEF4, IN7      @ vector = vector * scalar
-  .endif
- .endif
-.endm
-
-.macro dca_lfe_fir  decifactor
-function ff_dca_lfe_fir\decifactor\()_vfp, export=1
-        fmrx    OLDFPSCR, FPSCR
-        ldr     ip, =0x03030000         @ RunFast mode, short vectors of length 4, stride 1
-        fmxr    FPSCR, ip
-        vldr    IN0, [PIN, #-0*4]
-        vldr    IN1, [PIN, #-1*4]
-        vldr    IN2, [PIN, #-2*4]
-        vldr    IN3, [PIN, #-3*4]
- .if \decifactor == 32
-  .set JMAX, 8
-        vpush   {s16-s31}
-        vldr    IN4, [PIN, #-4*4]
-        vldr    IN5, [PIN, #-5*4]
-        vldr    IN6, [PIN, #-6*4]
-        vldr    IN7, [PIN, #-7*4]
- .else
-  .set JMAX, 4
-        vpush   {s16-s27}
- .endif
-
-        mov     COUNTER, #\decifactor/4 - 1
-        inner_loop  \decifactor, up,, head
-1:      add     PCOEF, PCOEF, #4*JMAX*4
-        subs    COUNTER, COUNTER, #1
-        inner_loop  \decifactor, up, tail, head
-        bne     1b
-        inner_loop  \decifactor, up, tail
-
-        mov     COUNTER, #\decifactor/4 - 1
-        inner_loop  \decifactor, down,, head
-1:      sub     PCOEF, PCOEF, #4*JMAX*4
-        subs    COUNTER, COUNTER, #1
-        inner_loop  \decifactor, down, tail, head
-        bne     1b
-        inner_loop  \decifactor, down, tail
-
- .if \decifactor == 32
-        vpop    {s16-s31}
- .else
-        vpop    {s16-s27}
- .endif
-        fmxr    FPSCR, OLDFPSCR
-        bx      lr
-endfunc
-.endm
-
-        dca_lfe_fir  64
- .ltorg
-        dca_lfe_fir  32
-
-        .unreq  POUT
-        .unreq  PIN
-        .unreq  PCOEF
-        .unreq  OLDFPSCR
-        .unreq  COUNTER
-
-        .unreq  IN0
-        .unreq  IN1
-        .unreq  IN2
-        .unreq  IN3
-        .unreq  IN4
-        .unreq  IN5
-        .unreq  IN6
-        .unreq  IN7
-        .unreq  COEF0
-        .unreq  COEF1
-        .unreq  COEF2
-        .unreq  COEF3
-        .unreq  COEF4
-        .unreq  COEF5
-        .unreq  COEF6
-        .unreq  COEF7
-        .unreq  ACCUM0
-        .unreq  ACCUM4
-        .unreq  POST0
-        .unreq  POST1
-        .unreq  POST2
-        .unreq  POST3
-
-
-IN      .req    a1
-SBACT   .req    a2
-OLDFPSCR .req   a3
-IMDCT   .req    a4
-WINDOW  .req    v1
-OUT     .req    v2
-BUF     .req    v3
-SCALEINT .req   v4 @ only used in softfp case
-COUNT   .req    v5
-
-SCALE   .req    s0
-
-/* Stack layout differs in softfp and hardfp cases:
- *
- * hardfp
- *      fp -> 6 arg words saved by caller
- *            a3,a4,v1-v3,v5,fp,lr on entry (a3 just to pad to 8 bytes)
- *            s16-s23 on entry
- *            align 16
- *     buf -> 8*32*4 bytes buffer
- *            s0 on entry
- *      sp -> 3 arg words for callee
- *
- * softfp
- *      fp -> 7 arg words saved by caller
- *            a4,v1-v5,fp,lr on entry
- *            s16-s23 on entry
- *            align 16
- *     buf -> 8*32*4 bytes buffer
- *      sp -> 4 arg words for callee
- */
-
-/* void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
- *                                 SynthFilterContext *synth, FFTContext *imdct,
- *                                 float (*synth_buf_ptr)[512],
- *                                 int *synth_buf_offset, float (*synth_buf2)[32],
- *                                 const float (*window)[512], float *samples_out,
- *                                 float (*raXin)[32], float scale);
- */
-function ff_dca_qmf_32_subbands_vfp, export=1
-VFP     push    {a3-a4,v1-v3,v5,fp,lr}
-NOVFP   push    {a4,v1-v5,fp,lr}
-        add     fp, sp, #8*4
-        vpush   {s16-s23}
-        @ The buffer pointed at by raXin isn't big enough for us to do a
-        @ complete matrix transposition as we want to, so allocate an
-        @ alternative buffer from the stack. Align to 4 words for speed.
-        sub     BUF, sp, #8*32*4
-        bic     BUF, BUF, #15
-        mov     sp, BUF
-        ldr     lr, =0x03330000     @ RunFast mode, short vectors of length 4, stride 2
-        fmrx    OLDFPSCR, FPSCR
-        fmxr    FPSCR, lr
-        @ COUNT is used to count down 2 things at once:
-        @ bits 0-4 are the number of word pairs remaining in the output row
-        @ bits 5-31 are the number of words to copy (with possible negation)
-        @   from the source matrix before we start zeroing the remainder
-        mov     COUNT, #(-4 << 5) + 16
-        adds    COUNT, COUNT, SBACT, lsl #5
-        bmi     2f
-1:
-        vldr    s8,  [IN, #(0*8+0)*4]
-        vldr    s10, [IN, #(0*8+1)*4]
-        vldr    s12, [IN, #(0*8+2)*4]
-        vldr    s14, [IN, #(0*8+3)*4]
-        vldr    s16, [IN, #(0*8+4)*4]
-        vldr    s18, [IN, #(0*8+5)*4]
-        vldr    s20, [IN, #(0*8+6)*4]
-        vldr    s22, [IN, #(0*8+7)*4]
-        vneg.f  s8, s8
-        vldr    s9,  [IN, #(1*8+0)*4]
-        vldr    s11, [IN, #(1*8+1)*4]
-        vldr    s13, [IN, #(1*8+2)*4]
-        vldr    s15, [IN, #(1*8+3)*4]
-        vneg.f  s16, s16
-        vldr    s17, [IN, #(1*8+4)*4]
-        vldr    s19, [IN, #(1*8+5)*4]
-        vldr    s21, [IN, #(1*8+6)*4]
-        vldr    s23, [IN, #(1*8+7)*4]
-        vstr    d4,  [BUF, #(0*32+0)*4]
-        vstr    d5,  [BUF, #(1*32+0)*4]
-        vstr    d6,  [BUF, #(2*32+0)*4]
-        vstr    d7,  [BUF, #(3*32+0)*4]
-        vstr    d8,  [BUF, #(4*32+0)*4]
-        vstr    d9,  [BUF, #(5*32+0)*4]
-        vstr    d10, [BUF, #(6*32+0)*4]
-        vstr    d11, [BUF, #(7*32+0)*4]
-        vldr    s9,  [IN, #(3*8+0)*4]
-        vldr    s11, [IN, #(3*8+1)*4]
-        vldr    s13, [IN, #(3*8+2)*4]
-        vldr    s15, [IN, #(3*8+3)*4]
-        vldr    s17, [IN, #(3*8+4)*4]
-        vldr    s19, [IN, #(3*8+5)*4]
-        vldr    s21, [IN, #(3*8+6)*4]
-        vldr    s23, [IN, #(3*8+7)*4]
-        vneg.f  s9, s9
-        vldr    s8,  [IN, #(2*8+0)*4]
-        vldr    s10, [IN, #(2*8+1)*4]
-        vldr    s12, [IN, #(2*8+2)*4]
-        vldr    s14, [IN, #(2*8+3)*4]
-        vneg.f  s17, s17
-        vldr    s16, [IN, #(2*8+4)*4]
-        vldr    s18, [IN, #(2*8+5)*4]
-        vldr    s20, [IN, #(2*8+6)*4]
-        vldr    s22, [IN, #(2*8+7)*4]
-        vstr    d4,  [BUF, #(0*32+2)*4]
-        vstr    d5,  [BUF, #(1*32+2)*4]
-        vstr    d6,  [BUF, #(2*32+2)*4]
-        vstr    d7,  [BUF, #(3*32+2)*4]
-        vstr    d8,  [BUF, #(4*32+2)*4]
-        vstr    d9,  [BUF, #(5*32+2)*4]
-        vstr    d10, [BUF, #(6*32+2)*4]
-        vstr    d11, [BUF, #(7*32+2)*4]
-        add     IN, IN, #4*8*4
-        add     BUF, BUF, #4*4
-        subs    COUNT, COUNT, #(4 << 5) + 2
-        bpl     1b
-2:      @ Now deal with trailing < 4 samples
-        adds    COUNT, COUNT, #3 << 5
-        bmi     4f  @ sb_act was a multiple of 4
-        bics    lr, COUNT, #0x1F
-        bne     3f
-        @ sb_act was n*4+1
-        vldr    s8,  [IN, #(0*8+0)*4]
-        vldr    s10, [IN, #(0*8+1)*4]
-        vldr    s12, [IN, #(0*8+2)*4]
-        vldr    s14, [IN, #(0*8+3)*4]
-        vldr    s16, [IN, #(0*8+4)*4]
-        vldr    s18, [IN, #(0*8+5)*4]
-        vldr    s20, [IN, #(0*8+6)*4]
-        vldr    s22, [IN, #(0*8+7)*4]
-        vneg.f  s8, s8
-        vldr    s9,  zero
-        vldr    s11, zero
-        vldr    s13, zero
-        vldr    s15, zero
-        vneg.f  s16, s16
-        vldr    s17, zero
-        vldr    s19, zero
-        vldr    s21, zero
-        vldr    s23, zero
-        vstr    d4,  [BUF, #(0*32+0)*4]
-        vstr    d5,  [BUF, #(1*32+0)*4]
-        vstr    d6,  [BUF, #(2*32+0)*4]
-        vstr    d7,  [BUF, #(3*32+0)*4]
-        vstr    d8,  [BUF, #(4*32+0)*4]
-        vstr    d9,  [BUF, #(5*32+0)*4]
-        vstr    d10, [BUF, #(6*32+0)*4]
-        vstr    d11, [BUF, #(7*32+0)*4]
-        add     BUF, BUF, #2*4
-        sub     COUNT, COUNT, #1
-        b       4f
-3:      @ sb_act was n*4+2 or n*4+3, so do the first 2
-        vldr    s8,  [IN, #(0*8+0)*4]
-        vldr    s10, [IN, #(0*8+1)*4]
-        vldr    s12, [IN, #(0*8+2)*4]
-        vldr    s14, [IN, #(0*8+3)*4]
-        vldr    s16, [IN, #(0*8+4)*4]
-        vldr    s18, [IN, #(0*8+5)*4]
-        vldr    s20, [IN, #(0*8+6)*4]
-        vldr    s22, [IN, #(0*8+7)*4]
-        vneg.f  s8, s8
-        vldr    s9,  [IN, #(1*8+0)*4]
-        vldr    s11, [IN, #(1*8+1)*4]
-        vldr    s13, [IN, #(1*8+2)*4]
-        vldr    s15, [IN, #(1*8+3)*4]
-        vneg.f  s16, s16
-        vldr    s17, [IN, #(1*8+4)*4]
-        vldr    s19, [IN, #(1*8+5)*4]
-        vldr    s21, [IN, #(1*8+6)*4]
-        vldr    s23, [IN, #(1*8+7)*4]
-        vstr    d4,  [BUF, #(0*32+0)*4]
-        vstr    d5,  [BUF, #(1*32+0)*4]
-        vstr    d6,  [BUF, #(2*32+0)*4]
-        vstr    d7,  [BUF, #(3*32+0)*4]
-        vstr    d8,  [BUF, #(4*32+0)*4]
-        vstr    d9,  [BUF, #(5*32+0)*4]
-        vstr    d10, [BUF, #(6*32+0)*4]
-        vstr    d11, [BUF, #(7*32+0)*4]
-        add     BUF, BUF, #2*4
-        sub     COUNT, COUNT, #(2 << 5) + 1
-        bics    lr, COUNT, #0x1F
-        bne     4f
-        @ sb_act was n*4+3
-        vldr    s8,  [IN, #(2*8+0)*4]
-        vldr    s10, [IN, #(2*8+1)*4]
-        vldr    s12, [IN, #(2*8+2)*4]
-        vldr    s14, [IN, #(2*8+3)*4]
-        vldr    s16, [IN, #(2*8+4)*4]
-        vldr    s18, [IN, #(2*8+5)*4]
-        vldr    s20, [IN, #(2*8+6)*4]
-        vldr    s22, [IN, #(2*8+7)*4]
-        vldr    s9,  zero
-        vldr    s11, zero
-        vldr    s13, zero
-        vldr    s15, zero
-        vldr    s17, zero
-        vldr    s19, zero
-        vldr    s21, zero
-        vldr    s23, zero
-        vstr    d4,  [BUF, #(0*32+0)*4]
-        vstr    d5,  [BUF, #(1*32+0)*4]
-        vstr    d6,  [BUF, #(2*32+0)*4]
-        vstr    d7,  [BUF, #(3*32+0)*4]
-        vstr    d8,  [BUF, #(4*32+0)*4]
-        vstr    d9,  [BUF, #(5*32+0)*4]
-        vstr    d10, [BUF, #(6*32+0)*4]
-        vstr    d11, [BUF, #(7*32+0)*4]
-        add     BUF, BUF, #2*4
-        sub     COUNT, COUNT, #1
-4:      @ Now fill the remainder with 0
-        vldr    s8, zero
-        vldr    s9, zero
-        ands    COUNT, COUNT, #0x1F
-        beq     6f
-5:      vstr    d4, [BUF, #(0*32+0)*4]
-        vstr    d4, [BUF, #(1*32+0)*4]
-        vstr    d4, [BUF, #(2*32+0)*4]
-        vstr    d4, [BUF, #(3*32+0)*4]
-        vstr    d4, [BUF, #(4*32+0)*4]
-        vstr    d4, [BUF, #(5*32+0)*4]
-        vstr    d4, [BUF, #(6*32+0)*4]
-        vstr    d4, [BUF, #(7*32+0)*4]
-        add     BUF, BUF, #2*4
-        subs    COUNT, COUNT, #1
-        bne     5b
-6:
-        fmxr    FPSCR, OLDFPSCR
-        ldr     WINDOW, [fp, #3*4]
-        ldr     OUT, [fp, #4*4]
-        sub     BUF, BUF, #32*4
-NOVFP   ldr     SCALEINT, [fp, #6*4]
-        mov     COUNT, #8
-VFP     vpush   {SCALE}
-VFP     sub     sp, sp, #3*4
-NOVFP   sub     sp, sp, #4*4
-7:
-VFP     ldr     a1, [fp, #-7*4]     @ imdct
-NOVFP   ldr     a1, [fp, #-8*4]
-        ldmia   fp, {a2-a4}
-VFP     stmia   sp, {WINDOW, OUT, BUF}
-NOVFP   stmia   sp, {WINDOW, OUT, BUF, SCALEINT}
-VFP     vldr    SCALE, [sp, #3*4]
-        bl      X(ff_synth_filter_float_vfp)
-        add     OUT, OUT, #32*4
-        add     BUF, BUF, #32*4
-        subs    COUNT, COUNT, #1
-        bne     7b
-
-A       sub     sp, fp, #(8+8)*4
-T       sub     fp, fp, #(8+8)*4
-T       mov     sp, fp
-        vpop    {s16-s23}
-VFP     pop     {a3-a4,v1-v3,v5,fp,pc}
-NOVFP   pop     {a4,v1-v5,fp,pc}
-endfunc
-
-        .unreq  IN
-        .unreq  SBACT
-        .unreq  OLDFPSCR
-        .unreq  IMDCT
-        .unreq  WINDOW
-        .unreq  OUT
-        .unreq  BUF
-        .unreq  SCALEINT
-        .unreq  COUNT
-
-        .unreq  SCALE
-
-        .align 2
-zero:   .word   0
diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c
index 5087f5f6..055c2c1c 100644
--- a/libavcodec/arm/fft_init_arm.c
+++ b/libavcodec/arm/fft_init_arm.c
@@ -40,7 +40,7 @@ av_cold void ff_fft_init_arm(FFTContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
+    if (have_vfp_vm(cpu_flags)) {
         s->fft_calc     = ff_fft_calc_vfp;
 #if CONFIG_MDCT
         s->imdct_half   = ff_imdct_half_vfp;
diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c
index 58589c46..a734dece 100644
--- a/libavcodec/arm/fmtconvert_init_arm.c
+++ b/libavcodec/arm/fmtconvert_init_arm.c
@@ -25,6 +25,9 @@
 #include "libavcodec/avcodec.h"
 #include "libavcodec/fmtconvert.h"
 
+void ff_int32_to_float_fmul_array8_neon(FmtConvertContext *c, float *dst,
+                                        const int32_t *src, const float *mul,
+                                        int len);
 void ff_int32_to_float_fmul_scalar_neon(float *dst, const int32_t *src,
                                         float mul, int len);
 
@@ -38,7 +41,7 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (have_vfp(cpu_flags)) {
+    if (have_vfp_vm(cpu_flags)) {
         if (!have_vfpv3(cpu_flags)) {
             c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
             c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
@@ -46,6 +49,7 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
     }
 
     if (have_neon(cpu_flags)) {
+        c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_neon;
         c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon;
     }
 }
diff --git a/libavcodec/arm/fmtconvert_neon.S b/libavcodec/arm/fmtconvert_neon.S
index a9ad57f7..738953e8 100644
--- a/libavcodec/arm/fmtconvert_neon.S
+++ b/libavcodec/arm/fmtconvert_neon.S
@@ -1,6 +1,7 @@
 /*
  * ARM NEON optimised Format Conversion Utils
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2015 Janne Grunau  <janne-libav@jannau.net>b
  *
  * This file is part of FFmpeg.
  *
@@ -49,3 +50,39 @@ NOVFP   len     .req    r3
         bx              lr
         .unreq  len
 endfunc
+
+function ff_int32_to_float_fmul_array8_neon, export=1
+        ldr             r0,  [sp]
+        lsr             r0,  r0,  #3
+        subs            r0,  r0,  #1
+        beq             1f
+2:
+        vld1.32         {q0-q1},   [r2,:128]!
+        vld1.32         {q2-q3},   [r2,:128]!
+        vld1.32         {d20},     [r3]!
+        subs            r0,  r0,  #2
+        vcvt.f32.s32    q0,  q0
+        vcvt.f32.s32    q1,  q1
+        vdup.32         q8,  d20[0]
+        vcvt.f32.s32    q2,  q2
+        vcvt.f32.s32    q3,  q3
+        vmul.f32        q0,  q0,  q8
+        vdup.32         q9,  d20[1]
+        vmul.f32        q1,  q1,  q8
+        vmul.f32        q2,  q2,  q9
+        vmul.f32        q3,  q3,  q9
+        vst1.32         {q0-q1},   [r1,:128]!
+        vst1.32         {q2-q3},   [r1,:128]!
+        bgt             2b
+        it              lt
+        bxlt            lr
+1:
+        vld1.32         {q0-q1},   [r2,:128]
+        vld1.32         {d16[],d17[]},  [r3]
+        vcvt.f32.s32    q0,  q0
+        vcvt.f32.s32    q1,  q1
+        vmul.f32        q0,  q0,  q8
+        vmul.f32        q1,  q1,  q8
+        vst1.32         {q0-q1},   [r1,:128]
+        bx              lr
+endfunc
diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c
index 6ba7592f..cc324d7d 100644
--- a/libavcodec/arm/h264pred_init_arm.c
+++ b/libavcodec/arm/h264pred_init_arm.c
@@ -54,22 +54,23 @@ static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
 
     if (high_depth)
         return;
-    if(chroma_format_idc == 1){
-    h->pred8x8[VERT_PRED8x8     ] = ff_pred8x8_vert_neon;
-    h->pred8x8[HOR_PRED8x8      ] = ff_pred8x8_hor_neon;
-    if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
-        h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon;
-    h->pred8x8[DC_128_PRED8x8   ] = ff_pred8x8_128_dc_neon;
-    if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 &&
-        codec_id != AV_CODEC_ID_VP8) {
-        h->pred8x8[DC_PRED8x8     ] = ff_pred8x8_dc_neon;
-        h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon;
-        h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon;
-        h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon;
-        h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon;
-        h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon;
-        h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon;
-    }
+
+    if (chroma_format_idc <= 1) {
+        h->pred8x8[VERT_PRED8x8     ] = ff_pred8x8_vert_neon;
+        h->pred8x8[HOR_PRED8x8      ] = ff_pred8x8_hor_neon;
+        if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+            h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon;
+        h->pred8x8[DC_128_PRED8x8   ] = ff_pred8x8_128_dc_neon;
+        if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 &&
+            codec_id != AV_CODEC_ID_VP8) {
+            h->pred8x8[DC_PRED8x8     ] = ff_pred8x8_dc_neon;
+            h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon;
+            h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = ff_pred8x8_l0t_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = ff_pred8x8_0lt_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon;
+            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon;
+        }
     }
 
     h->pred16x16[DC_PRED8x8     ] = ff_pred16x16_dc_neon;
diff --git a/libavcodec/arm/idctdsp_init_arm.c b/libavcodec/arm/idctdsp_init_arm.c
index da5da068..0068e3f8 100644
--- a/libavcodec/arm/idctdsp_init_arm.c
+++ b/libavcodec/arm/idctdsp_init_arm.c
@@ -64,7 +64,7 @@ av_cold void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx,
     int cpu_flags = av_get_cpu_flags();
 
     if (!avctx->lowres && !high_bit_depth) {
-        if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & CODEC_FLAG_BITEXACT)) ||
+        if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & AV_CODEC_FLAG_BITEXACT)) ||
             avctx->idct_algo == FF_IDCT_ARM) {
             c->idct_put  = j_rev_dct_arm_put;
             c->idct_add  = j_rev_dct_arm_add;
diff --git a/libavcodec/arm/idctdsp_init_armv6.c b/libavcodec/arm/idctdsp_init_armv6.c
index a3470a83..edf3070e 100644
--- a/libavcodec/arm/idctdsp_init_armv6.c
+++ b/libavcodec/arm/idctdsp_init_armv6.c
@@ -33,7 +33,7 @@ av_cold void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx,
                                    unsigned high_bit_depth)
 {
     if (!avctx->lowres && !high_bit_depth) {
-        if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & CODEC_FLAG_BITEXACT)) ||
+        if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & AV_CODEC_FLAG_BITEXACT)) ||
             avctx->idct_algo == FF_IDCT_SIMPLEARMV6) {
             c->idct_put  = ff_simple_idct_put_armv6;
             c->idct_add  = ff_simple_idct_add_armv6;
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/synth_filter_init_arm.c
similarity index 52%
rename from libavcodec/arm/dcadsp_init_arm.c
rename to libavcodec/arm/synth_filter_init_arm.c
index a5495158..ea0ce148 100644
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/synth_filter_init_arm.c
@@ -22,20 +22,9 @@
 
 #include "libavutil/arm/cpu.h"
 #include "libavutil/attributes.h"
-#include "libavcodec/dcadsp.h"
-
-void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs);
-void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs);
-
-void ff_dca_lfe_fir32_vfp(float *out, const float *in, const float *coefs);
-void ff_dca_lfe_fir64_vfp(float *out, const float *in, const float *coefs);
-
-void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
-                                SynthFilterContext *synth, FFTContext *imdct,
-                                float synth_buf_ptr[512],
-                                int *synth_buf_offset, float synth_buf2[32],
-                                const float window[512], float *samples_out,
-                                float raXin[32], float scale);
+#include "libavutil/internal.h"
+#include "libavcodec/fft.h"
+#include "libavcodec/synth_filter.h"
 
 void ff_synth_filter_float_vfp(FFTContext *imdct,
                                float *synth_buf_ptr, int *synth_buf_offset,
@@ -49,33 +38,11 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
                                 float out[32], const float in[32],
                                 float scale);
 
-void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
-                       const int32_t vq_num[DCA_SUBBANDS],
-                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
-                       int32_t scale[DCA_SUBBANDS][2],
-                       intptr_t start, intptr_t end);
-
-av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
-{
-    int cpu_flags = av_get_cpu_flags();
-
-    if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
-        s->lfe_fir[0]      = ff_dca_lfe_fir32_vfp;
-        s->lfe_fir[1]      = ff_dca_lfe_fir64_vfp;
-        s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
-    }
-    if (have_neon(cpu_flags)) {
-        s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
-        s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
-        s->decode_hf  = ff_decode_hf_neon;
-    }
-}
-
 av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags))
+    if (have_vfp_vm(cpu_flags))
         s->synth_filter_float = ff_synth_filter_float_vfp;
     if (have_neon(cpu_flags))
         s->synth_filter_float = ff_synth_filter_float_neon;
diff --git a/libavcodec/ass.c b/libavcodec/ass.c
index 468b8bb6..56d452f7 100644
--- a/libavcodec/ass.c
+++ b/libavcodec/ass.c
@@ -30,7 +30,7 @@ int ff_ass_subtitle_header(AVCodecContext *avctx,
                            const char *font, int font_size,
                            int color, int back_color,
                            int bold, int italic, int underline,
-                           int alignment)
+                           int border_style, int alignment)
 {
     avctx->subtitle_header = av_asprintf(
              "[Script Info]\r\n"
@@ -59,17 +59,17 @@ int ff_ass_subtitle_header(AVCodecContext *avctx,
              "%d,%d,%d,0,"          /* Bold, Italic, Underline, StrikeOut */
              "100,100,"             /* Scale{X,Y} */
              "0,0,"                 /* Spacing, Angle */
-             "1,1,0,"               /* BorderStyle, Outline, Shadow */
+             "%d,1,0,"              /* BorderStyle, Outline, Shadow */
              "%d,10,10,10,"         /* Alignment, Margin[LRV] */
              "0\r\n"                /* Encoding */
 
              "\r\n"
              "[Events]\r\n"
              "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\r\n",
-             !(avctx->flags & CODEC_FLAG_BITEXACT) ? AV_STRINGIFY(LIBAVCODEC_VERSION) : "",
+             !(avctx->flags & AV_CODEC_FLAG_BITEXACT) ? AV_STRINGIFY(LIBAVCODEC_VERSION) : "",
              ASS_DEFAULT_PLAYRESX, ASS_DEFAULT_PLAYRESY,
              font, font_size, color, color, back_color, back_color,
-             -bold, -italic, -underline, alignment);
+             -bold, -italic, -underline, border_style, alignment);
 
     if (!avctx->subtitle_header)
         return AVERROR(ENOMEM);
@@ -86,6 +86,7 @@ int ff_ass_subtitle_header_default(AVCodecContext *avctx)
                                ASS_DEFAULT_BOLD,
                                ASS_DEFAULT_ITALIC,
                                ASS_DEFAULT_UNDERLINE,
+                               ASS_DEFAULT_BORDERSTYLE,
                                ASS_DEFAULT_ALIGNMENT);
 }
 
@@ -161,6 +162,8 @@ int ff_ass_add_rect(AVSubtitle *sub, const char *dialog,
     sub->rects = rects;
     sub->end_display_time = FFMAX(sub->end_display_time, 10 * duration);
     rects[sub->num_rects]       = av_mallocz(sizeof(*rects[0]));
+    if (!rects[sub->num_rects])
+        goto errnomem;
     rects[sub->num_rects]->type = SUBTITLE_ASS;
     ret = av_bprint_finalize(&buf, &rects[sub->num_rects]->ass);
     if (ret < 0)
diff --git a/libavcodec/ass.h b/libavcodec/ass.h
index f3046efb..621a7ba7 100644
--- a/libavcodec/ass.h
+++ b/libavcodec/ass.h
@@ -40,6 +40,7 @@
 #define ASS_DEFAULT_ITALIC      0
 #define ASS_DEFAULT_UNDERLINE   0
 #define ASS_DEFAULT_ALIGNMENT   2
+#define ASS_DEFAULT_BORDERSTYLE 1
 /** @} */
 
 /**
@@ -61,7 +62,7 @@ int ff_ass_subtitle_header(AVCodecContext *avctx,
                            const char *font, int font_size,
                            int color, int back_color,
                            int bold, int italic, int underline,
-                           int alignment);
+                           int border_style, int alignment);
 
 /**
  * Generate a suitable AVCodecContext.subtitle_header for SUBTITLE_ASS
@@ -95,7 +96,7 @@ int ff_ass_add_rect(AVSubtitle *sub, const char *dialog,
                     int ts_start, int duration, int raw);
 
 /**
- * Same as ff_ass_add_rect_bprint, but taking an AVBPrint buffer instead of a
+ * Same as ff_ass_add_rect, but taking an AVBPrint buffer instead of a
  * string, and assuming raw=0.
  */
 int ff_ass_add_rect_bprint(AVSubtitle *sub, AVBPrint *buf,
diff --git a/libavcodec/ass_split.c b/libavcodec/ass_split.c
index 2458cb92..f84a686b 100644
--- a/libavcodec/ass_split.c
+++ b/libavcodec/ass_split.c
@@ -356,6 +356,8 @@ static int ass_split(ASSSplitContext *ctx, const char *buf)
 ASSSplitContext *ff_ass_split(const char *buf)
 {
     ASSSplitContext *ctx = av_mallocz(sizeof(*ctx));
+    if (!ctx)
+        return NULL;
     ctx->current_section = -1;
     if (ass_split(ctx, buf) < 0) {
         ff_ass_split_free(ctx);
@@ -523,7 +525,7 @@ ASSStyle *ff_ass_style_get(ASSSplitContext *ctx, const char *style)
     if (!style || !*style)
         style = "Default";
     for (i=0; i<ass->styles_count; i++)
-        if (!strcmp(ass->styles[i].name, style))
+        if (ass->styles[i].name && !strcmp(ass->styles[i].name, style))
             return ass->styles + i;
     return NULL;
 }
diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c
index 18e2faac..036d56ed 100644
--- a/libavcodec/asvdec.c
+++ b/libavcodec/asvdec.c
@@ -195,7 +195,7 @@ static inline void idct_put(ASV1Context *a, AVFrame *frame, int mb_x, int mb_y)
     a->idsp.idct_put(dest_y + 8 * linesize,     linesize, block[2]);
     a->idsp.idct_put(dest_y + 8 * linesize + 8, linesize, block[3]);
 
-    if (!(a->avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!(a->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         a->idsp.idct_put(dest_cb, frame->linesize[1], block[4]);
         a->idsp.idct_put(dest_cr, frame->linesize[2], block[5]);
     }
@@ -322,7 +322,7 @@ AVCodec ff_asv1_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 
@@ -336,6 +336,6 @@ AVCodec ff_asv2_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c
index d78fa47e..ec98a0ce 100644
--- a/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@ -207,7 +207,7 @@ static inline void dct_get(ASV1Context *a, const AVFrame *frame,
     for (i = 0; i < 4; i++)
         a->fdsp.fdct(block[i]);
 
-    if (!(a->avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!(a->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]);
         a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]);
         for (i = 4; i < 6; i++)
@@ -245,10 +245,10 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
         for (i = 0; i<3; i++) {
             int x, y;
-            int w  = FF_CEIL_RSHIFT(pict->width, !!i);
-            int h  = FF_CEIL_RSHIFT(pict->height, !!i);
-            int w2 = FF_CEIL_RSHIFT(clone->width, !!i);
-            int h2 = FF_CEIL_RSHIFT(clone->height, !!i);
+            int w  = AV_CEIL_RSHIFT(pict->width, !!i);
+            int h  = AV_CEIL_RSHIFT(pict->height, !!i);
+            int w2 = AV_CEIL_RSHIFT(clone->width, !!i);
+            int h2 = AV_CEIL_RSHIFT(clone->height, !!i);
             for (y=0; y<h; y++)
                 for (x=w; x<w2; x++)
                     clone->data[i][x + y*clone->linesize[i]] =
@@ -265,7 +265,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
 
     if ((ret = ff_alloc_packet2(avctx, pkt, a->mb_height * a->mb_width * MAX_MB_SIZE +
-                                FF_MIN_BUFFER_SIZE)) < 0)
+                                AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;
 
     init_put_bits(&a->pb, pkt->data, pkt->size);
@@ -363,8 +363,7 @@ AVCodec ff_asv1_encoder = {
     .encode2        = encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P,
                                                      AV_PIX_FMT_NONE },
-    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
-                      FF_CODEC_CAP_INIT_CLEANUP,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };
 #endif
 
@@ -379,7 +378,6 @@ AVCodec ff_asv2_encoder = {
     .encode2        = encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P,
                                                      AV_PIX_FMT_NONE },
-    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
-                      FF_CODEC_CAP_INIT_CLEANUP,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };
 #endif
diff --git a/libavcodec/atrac1.c b/libavcodec/atrac1.c
index f965dcc2..a8c8c91b 100644
--- a/libavcodec/atrac1.c
+++ b/libavcodec/atrac1.c
@@ -65,7 +65,7 @@ typedef struct AT1SUCtx {
     DECLARE_ALIGNED(32, float, spec2)[AT1_SU_SAMPLES];     ///< mdct buffer
     DECLARE_ALIGNED(32, float, fst_qmf_delay)[46];         ///< delay line for the 1st stacked QMF filter
     DECLARE_ALIGNED(32, float, snd_qmf_delay)[46];         ///< delay line for the 2nd stacked QMF filter
-    DECLARE_ALIGNED(32, float, last_qmf_delay)[256+23];    ///< delay line for the last stacked QMF filter
+    DECLARE_ALIGNED(32, float, last_qmf_delay)[256+39];    ///< delay line for the last stacked QMF filter
 } AT1SUCtx;
 
 /**
@@ -260,9 +260,9 @@ static void at1_subband_synthesis(AT1Ctx *q, AT1SUCtx* su, float *pOut)
     /* combine low and middle bands */
     ff_atrac_iqmf(q->bands[0], q->bands[1], 128, temp, su->fst_qmf_delay, iqmf_temp);
 
-    /* delay the signal of the high band by 23 samples */
-    memcpy( su->last_qmf_delay,    &su->last_qmf_delay[256], sizeof(float) *  23);
-    memcpy(&su->last_qmf_delay[23], q->bands[2],             sizeof(float) * 256);
+    /* delay the signal of the high band by 39 samples */
+    memcpy( su->last_qmf_delay,    &su->last_qmf_delay[256], sizeof(float) *  39);
+    memcpy(&su->last_qmf_delay[39], q->bands[2],             sizeof(float) * 256);
 
     /* combine (low + middle) and high bands */
     ff_atrac_iqmf(temp, su->last_qmf_delay, 256, pOut, su->snd_qmf_delay, iqmf_temp);
@@ -361,7 +361,7 @@ static av_cold int atrac1_decode_init(AVCodecContext *avctx)
 
     ff_atrac_generate_tables();
 
-    q->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    q->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
 
     q->bands[0] = q->low;
     q->bands[1] = q->mid;
@@ -386,7 +386,7 @@ AVCodec ff_atrac1_decoder = {
     .init           = atrac1_decode_init,
     .close          = atrac1_decode_end,
     .decode         = atrac1_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/atrac3.c b/libavcodec/atrac3.c
index 5aa3d8df..4bdb63f9 100644
--- a/libavcodec/atrac3.c
+++ b/libavcodec/atrac3.c
@@ -886,7 +886,7 @@ static av_cold int atrac3_decode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
 
     q->decoded_bytes_buffer = av_mallocz(FFALIGN(avctx->block_align, 4) +
-                                         FF_INPUT_BUFFER_PADDING_SIZE);
+                                         AV_INPUT_BUFFER_PADDING_SIZE);
     if (!q->decoded_bytes_buffer)
         return AVERROR(ENOMEM);
 
@@ -914,7 +914,7 @@ static av_cold int atrac3_decode_init(AVCodecContext *avctx)
     }
 
     ff_atrac_init_gain_compensation(&q->gainc_ctx, 4, 3);
-    q->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    q->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
 
     q->units = av_mallocz_array(avctx->channels, sizeof(*q->units));
     if (!q->units || !q->fdsp) {
@@ -934,7 +934,7 @@ AVCodec ff_atrac3_decoder = {
     .init             = atrac3_decode_init,
     .close            = atrac3_decode_close,
     .decode           = atrac3_decode_frame,
-    .capabilities     = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
+    .capabilities     = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
     .sample_fmts      = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                         AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/atrac3plus.c b/libavcodec/atrac3plus.c
index b16a1397..46e0beaf 100644
--- a/libavcodec/atrac3plus.c
+++ b/libavcodec/atrac3plus.c
@@ -39,9 +39,6 @@ static VLC spec_vlc_tabs[112];
 static VLC gain_vlc_tabs[11];
 static VLC tone_vlc_tabs[7];
 
-#define GET_DELTA(gb, delta_bits) \
-    ((delta_bits) ? get_bits((gb), (delta_bits)) : 0)
-
 /**
  * Generate canonical VLC table from given descriptor.
  *
@@ -384,7 +381,7 @@ static int decode_channel_wordlen(GetBitContext *gb, Atrac3pChanUnitCtx *ctx,
                     chan->qu_wordlen[i] = get_bits(gb, 3);
 
                 for (i = pos; i < chan->num_coded_vals; i++)
-                    chan->qu_wordlen[i] = (min_val + GET_DELTA(gb, delta_bits)) & 7;
+                    chan->qu_wordlen[i] = (min_val + get_bitsz(gb, delta_bits)) & 7;
             }
         }
         break;
@@ -516,7 +513,7 @@ static int decode_channel_sf_idx(GetBitContext *gb, Atrac3pChanUnitCtx *ctx,
                 /* all others are: min_val + delta */
                 for (i = num_long_vals; i < ctx->used_quant_units; i++)
                     chan->qu_sf_idx[i] = (chan->qu_sf_idx[i] + min_val +
-                                          GET_DELTA(gb, delta_bits)) & 0x3F;
+                                          get_bitsz(gb, delta_bits)) & 0x3F;
             } else {
                 num_long_vals = get_bits(gb, 5);
                 delta_bits    = get_bits(gb, 3);
@@ -534,7 +531,7 @@ static int decode_channel_sf_idx(GetBitContext *gb, Atrac3pChanUnitCtx *ctx,
                 /* all others are: min_val + delta */
                 for (i = num_long_vals; i < ctx->used_quant_units; i++)
                     chan->qu_sf_idx[i] = (min_val +
-                                          GET_DELTA(gb, delta_bits)) & 0x3F;
+                                          get_bitsz(gb, delta_bits)) & 0x3F;
             }
         }
         break;
@@ -1014,7 +1011,7 @@ static int decode_gainc_npoints(GetBitContext *gb, Atrac3pChanUnitCtx *ctx,
             min_val    = get_bits(gb, 3);
 
             for (i = 0; i < coded_subbands; i++) {
-                chan->gain_data[i].num_points = min_val + GET_DELTA(gb, delta_bits);
+                chan->gain_data[i].num_points = min_val + get_bitsz(gb, delta_bits);
                 if (chan->gain_data[i].num_points > 7)
                     return AVERROR_INVALIDDATA;
             }
@@ -1134,7 +1131,7 @@ static int decode_gainc_levels(GetBitContext *gb, Atrac3pChanUnitCtx *ctx,
 
             for (sb = 0; sb < coded_subbands; sb++)
                 for (i = 0; i < chan->gain_data[sb].num_points; i++) {
-                    chan->gain_data[sb].lev_code[i] = min_val + GET_DELTA(gb, delta_bits);
+                    chan->gain_data[sb].lev_code[i] = min_val + get_bitsz(gb, delta_bits);
                     if (chan->gain_data[sb].lev_code[i] > 15)
                         return AVERROR_INVALIDDATA;
                 }
diff --git a/libavcodec/atrac3plusdec.c b/libavcodec/atrac3plusdec.c
index b4437f1a..ec2b1ad3 100644
--- a/libavcodec/atrac3plusdec.c
+++ b/libavcodec/atrac3plusdec.c
@@ -171,7 +171,7 @@ static av_cold int atrac3p_decode_init(AVCodecContext *avctx)
     ctx->my_channel_layout = avctx->channel_layout;
 
     ctx->ch_units = av_mallocz_array(ctx->num_channel_blocks, sizeof(*ctx->ch_units));
-    ctx->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    ctx->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
 
     if (!ctx->ch_units || !ctx->fdsp) {
         atrac3p_decode_close(avctx);
@@ -392,6 +392,7 @@ AVCodec ff_atrac3p_decoder = {
     .long_name      = NULL_IF_CONFIG_SMALL("ATRAC3+ (Adaptive TRansform Acoustic Coding 3+)"),
     .type           = AVMEDIA_TYPE_AUDIO,
     .id             = AV_CODEC_ID_ATRAC3P,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .priv_data_size = sizeof(ATRAC3PContext),
     .init           = atrac3p_decode_init,
     .close          = atrac3p_decode_close,
diff --git a/libavcodec/atrac3plusdsp.c b/libavcodec/atrac3plusdsp.c
index 17c64377..d0895882 100644
--- a/libavcodec/atrac3plusdsp.c
+++ b/libavcodec/atrac3plusdsp.c
@@ -28,6 +28,7 @@
 #include <math.h>
 
 #include "libavutil/float_dsp.h"
+#include "libavutil/libm.h"
 #include "avcodec.h"
 #include "sinewin.h"
 #include "fft.h"
@@ -107,7 +108,7 @@ av_cold void ff_atrac3p_init_wave_synth(void)
 
     /* generate amplitude scalefactors table */
     for (i = 0; i < 64; i++)
-        amp_sf_tab[i] = pow(2.0f, ((double)i - 3) / 4.0f);
+        amp_sf_tab[i] = exp2f((i - 3) / 4.0f);
 }
 
 /**
diff --git a/libavcodec/audio_frame_queue.c b/libavcodec/audio_frame_queue.c
index 4f6bccce..f2ccd692 100644
--- a/libavcodec/audio_frame_queue.c
+++ b/libavcodec/audio_frame_queue.c
@@ -73,7 +73,7 @@ int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f)
 }
 
 void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts,
-                        int *duration)
+                        int64_t *duration)
 {
     int64_t out_pts = AV_NOPTS_VALUE;
     int removed_samples = 0;
diff --git a/libavcodec/audio_frame_queue.h b/libavcodec/audio_frame_queue.h
index 2e317bbf..d8076eae 100644
--- a/libavcodec/audio_frame_queue.h
+++ b/libavcodec/audio_frame_queue.h
@@ -78,6 +78,6 @@ int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f);
  * @param[out] duration output packet duration
  */
 void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts,
-                        int *duration);
+                        int64_t *duration);
 
 #endif /* AVCODEC_AUDIO_FRAME_QUEUE_H */
diff --git a/libavcodec/aura.c b/libavcodec/aura.c
index 8d0f16a4..5f84d957 100644
--- a/libavcodec/aura.c
+++ b/libavcodec/aura.c
@@ -104,5 +104,5 @@ AVCodec ff_aura2_decoder = {
     .id             = AV_CODEC_ID_AURA2,
     .init           = aura_decode_init,
     .decode         = aura_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 4aeb57ae..d8497655 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -93,8 +93,7 @@
  *
  * If you add a codec ID to this list, add it so that
  * 1. no value of a existing codec ID changes (that would break ABI),
- * 2. Give it a value which when taken as ASCII is recognized uniquely by a human as this specific codec.
- *    This ensures that 2 forks can independently add AVCodecIDs without producing conflicts.
+ * 2. it is as close as possible to similar codecs
  *
  * After adding new codec IDs, do not forget to add an entry to the codec
  * descriptor list and bump libavcodec minor version.
@@ -242,7 +241,7 @@ enum AVCodecID {
     AV_CODEC_ID_ANM,
     AV_CODEC_ID_BINKVIDEO,
     AV_CODEC_ID_IFF_ILBM,
-    AV_CODEC_ID_IFF_BYTERUN1,
+#define AV_CODEC_ID_IFF_BYTERUN1 AV_CODEC_ID_IFF_ILBM
     AV_CODEC_ID_KGV1,
     AV_CODEC_ID_YOP,
     AV_CODEC_ID_VP8,
@@ -275,54 +274,48 @@ enum AVCodecID {
     AV_CODEC_ID_MSS2,
     AV_CODEC_ID_VP9,
     AV_CODEC_ID_AIC,
-    AV_CODEC_ID_ESCAPE130_DEPRECATED,
-    AV_CODEC_ID_G2M_DEPRECATED,
-    AV_CODEC_ID_WEBP_DEPRECATED,
+    AV_CODEC_ID_ESCAPE130,
+    AV_CODEC_ID_G2M,
+    AV_CODEC_ID_WEBP,
     AV_CODEC_ID_HNM4_VIDEO,
-    AV_CODEC_ID_HEVC_DEPRECATED,
+    AV_CODEC_ID_HEVC,
+#define AV_CODEC_ID_H265 AV_CODEC_ID_HEVC
     AV_CODEC_ID_FIC,
     AV_CODEC_ID_ALIAS_PIX,
-    AV_CODEC_ID_BRENDER_PIX_DEPRECATED,
-    AV_CODEC_ID_PAF_VIDEO_DEPRECATED,
-    AV_CODEC_ID_EXR_DEPRECATED,
-    AV_CODEC_ID_VP7_DEPRECATED,
-    AV_CODEC_ID_SANM_DEPRECATED,
-    AV_CODEC_ID_SGIRLE_DEPRECATED,
-    AV_CODEC_ID_MVC1_DEPRECATED,
-    AV_CODEC_ID_MVC2_DEPRECATED,
+    AV_CODEC_ID_BRENDER_PIX,
+    AV_CODEC_ID_PAF_VIDEO,
+    AV_CODEC_ID_EXR,
+    AV_CODEC_ID_VP7,
+    AV_CODEC_ID_SANM,
+    AV_CODEC_ID_SGIRLE,
+    AV_CODEC_ID_MVC1,
+    AV_CODEC_ID_MVC2,
     AV_CODEC_ID_HQX,
     AV_CODEC_ID_TDSC,
     AV_CODEC_ID_HQ_HQA,
-
-    AV_CODEC_ID_BRENDER_PIX= MKBETAG('B','P','I','X'),
-    AV_CODEC_ID_Y41P       = MKBETAG('Y','4','1','P'),
-    AV_CODEC_ID_ESCAPE130  = MKBETAG('E','1','3','0'),
-    AV_CODEC_ID_EXR        = MKBETAG('0','E','X','R'),
-    AV_CODEC_ID_AVRP       = MKBETAG('A','V','R','P'),
-
-    AV_CODEC_ID_012V       = MKBETAG('0','1','2','V'),
-    AV_CODEC_ID_G2M        = MKBETAG( 0 ,'G','2','M'),
-    AV_CODEC_ID_AVUI       = MKBETAG('A','V','U','I'),
-    AV_CODEC_ID_AYUV       = MKBETAG('A','Y','U','V'),
-    AV_CODEC_ID_TARGA_Y216 = MKBETAG('T','2','1','6'),
-    AV_CODEC_ID_V308       = MKBETAG('V','3','0','8'),
-    AV_CODEC_ID_V408       = MKBETAG('V','4','0','8'),
-    AV_CODEC_ID_YUV4       = MKBETAG('Y','U','V','4'),
-    AV_CODEC_ID_SANM       = MKBETAG('S','A','N','M'),
-    AV_CODEC_ID_PAF_VIDEO  = MKBETAG('P','A','F','V'),
-    AV_CODEC_ID_AVRN       = MKBETAG('A','V','R','n'),
-    AV_CODEC_ID_CPIA       = MKBETAG('C','P','I','A'),
-    AV_CODEC_ID_XFACE      = MKBETAG('X','F','A','C'),
-    AV_CODEC_ID_SGIRLE     = MKBETAG('S','G','I','R'),
-    AV_CODEC_ID_MVC1       = MKBETAG('M','V','C','1'),
-    AV_CODEC_ID_MVC2       = MKBETAG('M','V','C','2'),
-    AV_CODEC_ID_SNOW       = MKBETAG('S','N','O','W'),
-    AV_CODEC_ID_WEBP       = MKBETAG('W','E','B','P'),
-    AV_CODEC_ID_SMVJPEG    = MKBETAG('S','M','V','J'),
-    AV_CODEC_ID_HEVC       = MKBETAG('H','2','6','5'),
-#define AV_CODEC_ID_H265 AV_CODEC_ID_HEVC
-    AV_CODEC_ID_VP7        = MKBETAG('V','P','7','0'),
-    AV_CODEC_ID_APNG       = MKBETAG('A','P','N','G'),
+    AV_CODEC_ID_HAP,
+    AV_CODEC_ID_DDS,
+    AV_CODEC_ID_DXV,
+    AV_CODEC_ID_SCREENPRESSO,
+    AV_CODEC_ID_RSCC,
+
+    AV_CODEC_ID_Y41P = 0x8000,
+    AV_CODEC_ID_AVRP,
+    AV_CODEC_ID_012V,
+    AV_CODEC_ID_AVUI,
+    AV_CODEC_ID_AYUV,
+    AV_CODEC_ID_TARGA_Y216,
+    AV_CODEC_ID_V308,
+    AV_CODEC_ID_V408,
+    AV_CODEC_ID_YUV4,
+    AV_CODEC_ID_AVRN,
+    AV_CODEC_ID_CPIA,
+    AV_CODEC_ID_XFACE,
+    AV_CODEC_ID_SNOW,
+    AV_CODEC_ID_SMVJPEG,
+    AV_CODEC_ID_APNG,
+    AV_CODEC_ID_DAALA,
+    AV_CODEC_ID_CFHD,
 
     /* various PCM "codecs" */
     AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
@@ -354,11 +347,12 @@ enum AVCodecID {
     AV_CODEC_ID_PCM_LXF,
     AV_CODEC_ID_S302M,
     AV_CODEC_ID_PCM_S8_PLANAR,
-    AV_CODEC_ID_PCM_S24LE_PLANAR_DEPRECATED,
-    AV_CODEC_ID_PCM_S32LE_PLANAR_DEPRECATED,
-    AV_CODEC_ID_PCM_S24LE_PLANAR = MKBETAG(24,'P','S','P'),
-    AV_CODEC_ID_PCM_S32LE_PLANAR = MKBETAG(32,'P','S','P'),
-    AV_CODEC_ID_PCM_S16BE_PLANAR = MKBETAG('P','S','P',16),
+    AV_CODEC_ID_PCM_S24LE_PLANAR,
+    AV_CODEC_ID_PCM_S32LE_PLANAR,
+    AV_CODEC_ID_PCM_S16BE_PLANAR,
+    /* new PCM "codecs" should be added right below this line starting with
+     * an explicit value of for example 0x10800
+     */
 
     /* various ADPCM codecs */
     AV_CODEC_ID_ADPCM_IMA_QT = 0x11000,
@@ -391,16 +385,19 @@ enum AVCodecID {
     AV_CODEC_ID_ADPCM_IMA_ISS,
     AV_CODEC_ID_ADPCM_G722,
     AV_CODEC_ID_ADPCM_IMA_APC,
-    AV_CODEC_ID_ADPCM_VIMA_DEPRECATED,
-    AV_CODEC_ID_ADPCM_VIMA = MKBETAG('V','I','M','A'),
+    AV_CODEC_ID_ADPCM_VIMA,
 #if FF_API_VIMA_DECODER
-    AV_CODEC_ID_VIMA       = MKBETAG('V','I','M','A'),
+    AV_CODEC_ID_VIMA = AV_CODEC_ID_ADPCM_VIMA,
 #endif
-    AV_CODEC_ID_ADPCM_AFC  = MKBETAG('A','F','C',' '),
-    AV_CODEC_ID_ADPCM_IMA_OKI = MKBETAG('O','K','I',' '),
-    AV_CODEC_ID_ADPCM_DTK  = MKBETAG('D','T','K',' '),
-    AV_CODEC_ID_ADPCM_IMA_RAD = MKBETAG('R','A','D',' '),
-    AV_CODEC_ID_ADPCM_G726LE = MKBETAG('6','2','7','G'),
+
+    AV_CODEC_ID_ADPCM_AFC = 0x11800,
+    AV_CODEC_ID_ADPCM_IMA_OKI,
+    AV_CODEC_ID_ADPCM_DTK,
+    AV_CODEC_ID_ADPCM_IMA_RAD,
+    AV_CODEC_ID_ADPCM_G726LE,
+    AV_CODEC_ID_ADPCM_THP_LE,
+    AV_CODEC_ID_ADPCM_PSX,
+    AV_CODEC_ID_ADPCM_AICA,
 
     /* AMR */
     AV_CODEC_ID_AMR_NB = 0x12000,
@@ -416,6 +413,8 @@ enum AVCodecID {
     AV_CODEC_ID_XAN_DPCM,
     AV_CODEC_ID_SOL_DPCM,
 
+    AV_CODEC_ID_SDX2_DPCM = 0x14800,
+
     /* audio codecs */
     AV_CODEC_ID_MP2 = 0x15000,
     AV_CODEC_ID_MP3, ///< preferred ID for decoding MPEG audio layer 1, 2 or 3
@@ -480,25 +479,27 @@ enum AVCodecID {
     AV_CODEC_ID_RALF,
     AV_CODEC_ID_IAC,
     AV_CODEC_ID_ILBC,
-    AV_CODEC_ID_OPUS_DEPRECATED,
+    AV_CODEC_ID_OPUS,
     AV_CODEC_ID_COMFORT_NOISE,
-    AV_CODEC_ID_TAK_DEPRECATED,
+    AV_CODEC_ID_TAK,
     AV_CODEC_ID_METASOUND,
-    AV_CODEC_ID_PAF_AUDIO_DEPRECATED,
+    AV_CODEC_ID_PAF_AUDIO,
     AV_CODEC_ID_ON2AVC,
     AV_CODEC_ID_DSS_SP,
-    AV_CODEC_ID_FFWAVESYNTH = MKBETAG('F','F','W','S'),
-    AV_CODEC_ID_SONIC       = MKBETAG('S','O','N','C'),
-    AV_CODEC_ID_SONIC_LS    = MKBETAG('S','O','N','L'),
-    AV_CODEC_ID_PAF_AUDIO   = MKBETAG('P','A','F','A'),
-    AV_CODEC_ID_OPUS        = MKBETAG('O','P','U','S'),
-    AV_CODEC_ID_TAK         = MKBETAG('t','B','a','K'),
-    AV_CODEC_ID_EVRC        = MKBETAG('s','e','v','c'),
-    AV_CODEC_ID_SMV         = MKBETAG('s','s','m','v'),
-    AV_CODEC_ID_DSD_LSBF    = MKBETAG('D','S','D','L'),
-    AV_CODEC_ID_DSD_MSBF    = MKBETAG('D','S','D','M'),
-    AV_CODEC_ID_DSD_LSBF_PLANAR = MKBETAG('D','S','D','1'),
-    AV_CODEC_ID_DSD_MSBF_PLANAR = MKBETAG('D','S','D','8'),
+
+    AV_CODEC_ID_FFWAVESYNTH = 0x15800,
+    AV_CODEC_ID_SONIC,
+    AV_CODEC_ID_SONIC_LS,
+    AV_CODEC_ID_EVRC,
+    AV_CODEC_ID_SMV,
+    AV_CODEC_ID_DSD_LSBF,
+    AV_CODEC_ID_DSD_MSBF,
+    AV_CODEC_ID_DSD_LSBF_PLANAR,
+    AV_CODEC_ID_DSD_MSBF_PLANAR,
+    AV_CODEC_ID_4GV,
+    AV_CODEC_ID_INTERPLAY_ACM,
+    AV_CODEC_ID_XMA1,
+    AV_CODEC_ID_XMA2,
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
@@ -511,32 +512,35 @@ enum AVCodecID {
     AV_CODEC_ID_HDMV_PGS_SUBTITLE,
     AV_CODEC_ID_DVB_TELETEXT,
     AV_CODEC_ID_SRT,
-    AV_CODEC_ID_MICRODVD   = MKBETAG('m','D','V','D'),
-    AV_CODEC_ID_EIA_608    = MKBETAG('c','6','0','8'),
-    AV_CODEC_ID_JACOSUB    = MKBETAG('J','S','U','B'),
-    AV_CODEC_ID_SAMI       = MKBETAG('S','A','M','I'),
-    AV_CODEC_ID_REALTEXT   = MKBETAG('R','T','X','T'),
-    AV_CODEC_ID_STL        = MKBETAG('S','p','T','L'),
-    AV_CODEC_ID_SUBVIEWER1 = MKBETAG('S','b','V','1'),
-    AV_CODEC_ID_SUBVIEWER  = MKBETAG('S','u','b','V'),
-    AV_CODEC_ID_SUBRIP     = MKBETAG('S','R','i','p'),
-    AV_CODEC_ID_WEBVTT     = MKBETAG('W','V','T','T'),
-    AV_CODEC_ID_MPL2       = MKBETAG('M','P','L','2'),
-    AV_CODEC_ID_VPLAYER    = MKBETAG('V','P','l','r'),
-    AV_CODEC_ID_PJS        = MKBETAG('P','h','J','S'),
-    AV_CODEC_ID_ASS        = MKBETAG('A','S','S',' '),  ///< ASS as defined in Matroska
+
+    AV_CODEC_ID_MICRODVD   = 0x17800,
+    AV_CODEC_ID_EIA_608,
+    AV_CODEC_ID_JACOSUB,
+    AV_CODEC_ID_SAMI,
+    AV_CODEC_ID_REALTEXT,
+    AV_CODEC_ID_STL,
+    AV_CODEC_ID_SUBVIEWER1,
+    AV_CODEC_ID_SUBVIEWER,
+    AV_CODEC_ID_SUBRIP,
+    AV_CODEC_ID_WEBVTT,
+    AV_CODEC_ID_MPL2,
+    AV_CODEC_ID_VPLAYER,
+    AV_CODEC_ID_PJS,
+    AV_CODEC_ID_ASS,
+    AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
 
     /* other specific kind of codecs (generally used for attachments) */
     AV_CODEC_ID_FIRST_UNKNOWN = 0x18000,           ///< A dummy ID pointing at the start of various fake codecs.
     AV_CODEC_ID_TTF = 0x18000,
-    AV_CODEC_ID_BINTEXT    = MKBETAG('B','T','X','T'),
-    AV_CODEC_ID_XBIN       = MKBETAG('X','B','I','N'),
-    AV_CODEC_ID_IDF        = MKBETAG( 0 ,'I','D','F'),
-    AV_CODEC_ID_OTF        = MKBETAG( 0 ,'O','T','F'),
-    AV_CODEC_ID_SMPTE_KLV  = MKBETAG('K','L','V','A'),
-    AV_CODEC_ID_DVD_NAV    = MKBETAG('D','N','A','V'),
-    AV_CODEC_ID_TIMED_ID3  = MKBETAG('T','I','D','3'),
-    AV_CODEC_ID_BIN_DATA   = MKBETAG('D','A','T','A'),
+
+    AV_CODEC_ID_BINTEXT    = 0x18800,
+    AV_CODEC_ID_XBIN,
+    AV_CODEC_ID_IDF,
+    AV_CODEC_ID_OTF,
+    AV_CODEC_ID_SMPTE_KLV,
+    AV_CODEC_ID_DVD_NAV,
+    AV_CODEC_ID_TIMED_ID3,
+    AV_CODEC_ID_BIN_DATA,
 
 
     AV_CODEC_ID_PROBE = 0x19000, ///< codec_id is not known (like AV_CODEC_ID_NONE) but lavf should attempt to identify it
@@ -546,10 +550,7 @@ enum AVCodecID {
     AV_CODEC_ID_MPEG4SYSTEMS = 0x20001, /**< _FAKE_ codec to indicate a MPEG-4 Systems
                                 * stream (only used by libavformat) */
     AV_CODEC_ID_FFMETADATA = 0x21000,   ///< Dummy codec for streams containing only metadata information.
-
-#if FF_API_CODEC_ID
-#include "old_codec_ids.h"
-#endif
+    AV_CODEC_ID_WRAPPED_AVFRAME = 0x21001, ///< Passthrough codec, AVFrames wrapped in AVPacket
 };
 
 /**
@@ -574,13 +575,17 @@ typedef struct AVCodecDescriptor {
      * Codec properties, a combination of AV_CODEC_PROP_* flags.
      */
     int             props;
-
     /**
      * MIME type(s) associated with the codec.
      * May be NULL; if not, a NULL-terminated array of MIME types.
      * The first item is always non-NULL and is the preferred MIME type.
      */
     const char *const *mime_types;
+    /**
+     * If non-NULL, an array of profiles recognized for this codec.
+     * Terminated with FF_PROFILE_UNKNOWN.
+     */
+    const struct AVProfile *profiles;
 } AVCodecDescriptor;
 
 /**
@@ -627,20 +632,33 @@ typedef struct AVCodecDescriptor {
  * Note: If the first 23 bits of the additional bytes are not 0, then damaged
  * MPEG bitstreams could cause overread and segfault.
  */
-#define FF_INPUT_BUFFER_PADDING_SIZE 32
+#define AV_INPUT_BUFFER_PADDING_SIZE 32
 
 /**
  * @ingroup lavc_encoding
  * minimum encoding buffer size
  * Used to avoid some checks during header writing.
  */
-#define FF_MIN_BUFFER_SIZE 16384
+#define AV_INPUT_BUFFER_MIN_SIZE 16384
 
+#if FF_API_WITHOUT_PREFIX
+/**
+ * @deprecated use AV_INPUT_BUFFER_PADDING_SIZE instead
+ */
+#define FF_INPUT_BUFFER_PADDING_SIZE 32
+
+/**
+ * @deprecated use AV_INPUT_BUFFER_MIN_SIZE instead
+ */
+#define FF_MIN_BUFFER_SIZE 16384
+#endif /* FF_API_WITHOUT_PREFIX */
 
 /**
  * @ingroup lavc_encoding
  * motion estimation type.
+ * @deprecated use codec private option instead
  */
+#if FF_API_MOTION_EST
 enum Motion_Est_ID {
     ME_ZERO = 1,    ///< no search, that is use 0,0 vector whenever one is needed
     ME_FULL,
@@ -653,6 +671,7 @@ enum Motion_Est_ID {
     ME_TESA,        ///< transformed exhaustive search algorithm
     ME_ITER=50,     ///< iterative search
 };
+#endif
 
 /**
  * @ingroup lavc_decoding
@@ -708,11 +727,232 @@ typedef struct RcOverride{
  * Allow decoders to produce frames with data planes that are not aligned
  * to CPU requirements (e.g. due to cropping).
  */
-#define CODEC_FLAG_UNALIGNED 0x0001
-#define CODEC_FLAG_QSCALE 0x0002  ///< Use fixed qscale.
-#define CODEC_FLAG_4MV    0x0004  ///< 4 MV per MB allowed / advanced prediction for H.263.
-#define CODEC_FLAG_OUTPUT_CORRUPT 0x0008 ///< Output even those frames that might be corrupted
-#define CODEC_FLAG_QPEL   0x0010  ///< Use qpel MC.
+#define AV_CODEC_FLAG_UNALIGNED       (1 <<  0)
+/**
+ * Use fixed qscale.
+ */
+#define AV_CODEC_FLAG_QSCALE          (1 <<  1)
+/**
+ * 4 MV per MB allowed / advanced prediction for H.263.
+ */
+#define AV_CODEC_FLAG_4MV             (1 <<  2)
+/**
+ * Output even those frames that might be corrupted.
+ */
+#define AV_CODEC_FLAG_OUTPUT_CORRUPT  (1 <<  3)
+/**
+ * Use qpel MC.
+ */
+#define AV_CODEC_FLAG_QPEL            (1 <<  4)
+/**
+ * Use internal 2pass ratecontrol in first pass mode.
+ */
+#define AV_CODEC_FLAG_PASS1           (1 <<  9)
+/**
+ * Use internal 2pass ratecontrol in second pass mode.
+ */
+#define AV_CODEC_FLAG_PASS2           (1 << 10)
+/**
+ * loop filter.
+ */
+#define AV_CODEC_FLAG_LOOP_FILTER     (1 << 11)
+/**
+ * Only decode/encode grayscale.
+ */
+#define AV_CODEC_FLAG_GRAY            (1 << 13)
+/**
+ * error[?] variables will be set during encoding.
+ */
+#define AV_CODEC_FLAG_PSNR            (1 << 15)
+/**
+ * Input bitstream might be truncated at a random location
+ * instead of only at frame boundaries.
+ */
+#define AV_CODEC_FLAG_TRUNCATED       (1 << 16)
+/**
+ * Use interlaced DCT.
+ */
+#define AV_CODEC_FLAG_INTERLACED_DCT  (1 << 18)
+/**
+ * Force low delay.
+ */
+#define AV_CODEC_FLAG_LOW_DELAY       (1 << 19)
+/**
+ * Place global headers in extradata instead of every keyframe.
+ */
+#define AV_CODEC_FLAG_GLOBAL_HEADER   (1 << 22)
+/**
+ * Use only bitexact stuff (except (I)DCT).
+ */
+#define AV_CODEC_FLAG_BITEXACT        (1 << 23)
+/* Fx : Flag for h263+ extra options */
+/**
+ * H.263 advanced intra coding / MPEG-4 AC prediction
+ */
+#define AV_CODEC_FLAG_AC_PRED         (1 << 24)
+/**
+ * interlaced motion estimation
+ */
+#define AV_CODEC_FLAG_INTERLACED_ME   (1 << 29)
+#define AV_CODEC_FLAG_CLOSED_GOP      (1U << 31)
+
+/**
+ * Allow non spec compliant speedup tricks.
+ */
+#define AV_CODEC_FLAG2_FAST           (1 <<  0)
+/**
+ * Skip bitstream encoding.
+ */
+#define AV_CODEC_FLAG2_NO_OUTPUT      (1 <<  2)
+/**
+ * Place global headers at every keyframe instead of in extradata.
+ */
+#define AV_CODEC_FLAG2_LOCAL_HEADER   (1 <<  3)
+
+/**
+ * timecode is in drop frame format. DEPRECATED!!!!
+ */
+#define AV_CODEC_FLAG2_DROP_FRAME_TIMECODE (1 << 13)
+
+/**
+ * Input bitstream might be truncated at a packet boundaries
+ * instead of only at frame boundaries.
+ */
+#define AV_CODEC_FLAG2_CHUNKS         (1 << 15)
+/**
+ * Discard cropping information from SPS.
+ */
+#define AV_CODEC_FLAG2_IGNORE_CROP    (1 << 16)
+
+/**
+ * Show all frames before the first keyframe
+ */
+#define AV_CODEC_FLAG2_SHOW_ALL       (1 << 22)
+/**
+ * Export motion vectors through frame side data
+ */
+#define AV_CODEC_FLAG2_EXPORT_MVS     (1 << 28)
+/**
+ * Do not skip samples and export skip information as frame side data
+ */
+#define AV_CODEC_FLAG2_SKIP_MANUAL    (1 << 29)
+
+/* Unsupported options :
+ *              Syntax Arithmetic coding (SAC)
+ *              Reference Picture Selection
+ *              Independent Segment Decoding */
+/* /Fx */
+/* codec capabilities */
+
+/**
+ * Decoder can use draw_horiz_band callback.
+ */
+#define AV_CODEC_CAP_DRAW_HORIZ_BAND     (1 <<  0)
+/**
+ * Codec uses get_buffer() for allocating buffers and supports custom allocators.
+ * If not set, it might not use get_buffer() at all or use operations that
+ * assume the buffer was allocated by avcodec_default_get_buffer.
+ */
+#define AV_CODEC_CAP_DR1                 (1 <<  1)
+#define AV_CODEC_CAP_TRUNCATED           (1 <<  3)
+/**
+ * Encoder or decoder requires flushing with NULL input at the end in order to
+ * give the complete and correct output.
+ *
+ * NOTE: If this flag is not set, the codec is guaranteed to never be fed with
+ *       with NULL data. The user can still send NULL data to the public encode
+ *       or decode function, but libavcodec will not pass it along to the codec
+ *       unless this flag is set.
+ *
+ * Decoders:
+ * The decoder has a non-zero delay and needs to be fed with avpkt->data=NULL,
+ * avpkt->size=0 at the end to get the delayed data until the decoder no longer
+ * returns frames.
+ *
+ * Encoders:
+ * The encoder needs to be fed with NULL data at the end of encoding until the
+ * encoder no longer returns data.
+ *
+ * NOTE: For encoders implementing the AVCodec.encode2() function, setting this
+ *       flag also means that the encoder must set the pts and duration for
+ *       each output packet. If this flag is not set, the pts and duration will
+ *       be determined by libavcodec from the input frame.
+ */
+#define AV_CODEC_CAP_DELAY               (1 <<  5)
+/**
+ * Codec can be fed a final frame with a smaller size.
+ * This can be used to prevent truncation of the last audio samples.
+ */
+#define AV_CODEC_CAP_SMALL_LAST_FRAME    (1 <<  6)
+
+#if FF_API_CAP_VDPAU
+/**
+ * Codec can export data for HW decoding (VDPAU).
+ */
+#define AV_CODEC_CAP_HWACCEL_VDPAU       (1 <<  7)
+#endif
+
+/**
+ * Codec can output multiple frames per AVPacket
+ * Normally demuxers return one frame at a time, demuxers which do not do
+ * are connected to a parser to split what they return into proper frames.
+ * This flag is reserved to the very rare category of codecs which have a
+ * bitstream that cannot be split into frames without timeconsuming
+ * operations like full decoding. Demuxers carring such bitstreams thus
+ * may return multiple frames in a packet. This has many disadvantages like
+ * prohibiting stream copy in many cases thus it should only be considered
+ * as a last resort.
+ */
+#define AV_CODEC_CAP_SUBFRAMES           (1 <<  8)
+/**
+ * Codec is experimental and is thus avoided in favor of non experimental
+ * encoders
+ */
+#define AV_CODEC_CAP_EXPERIMENTAL        (1 <<  9)
+/**
+ * Codec should fill in channel configuration and samplerate instead of container
+ */
+#define AV_CODEC_CAP_CHANNEL_CONF        (1 << 10)
+/**
+ * Codec supports frame-level multithreading.
+ */
+#define AV_CODEC_CAP_FRAME_THREADS       (1 << 12)
+/**
+ * Codec supports slice-based (or partition-based) multithreading.
+ */
+#define AV_CODEC_CAP_SLICE_THREADS       (1 << 13)
+/**
+ * Codec supports changed parameters at any point.
+ */
+#define AV_CODEC_CAP_PARAM_CHANGE        (1 << 14)
+/**
+ * Codec supports avctx->thread_count == 0 (auto).
+ */
+#define AV_CODEC_CAP_AUTO_THREADS        (1 << 15)
+/**
+ * Audio encoder supports receiving a different number of samples in each call.
+ */
+#define AV_CODEC_CAP_VARIABLE_FRAME_SIZE (1 << 16)
+/**
+ * Codec is intra only.
+ */
+#define AV_CODEC_CAP_INTRA_ONLY       0x40000000
+/**
+ * Codec is lossless.
+ */
+#define AV_CODEC_CAP_LOSSLESS         0x80000000
+
+
+#if FF_API_WITHOUT_PREFIX
+/**
+ * Allow decoders to produce frames with data planes that are not aligned
+ * to CPU requirements (e.g. due to cropping).
+ */
+#define CODEC_FLAG_UNALIGNED AV_CODEC_FLAG_UNALIGNED
+#define CODEC_FLAG_QSCALE AV_CODEC_FLAG_QSCALE
+#define CODEC_FLAG_4MV    AV_CODEC_FLAG_4MV
+#define CODEC_FLAG_OUTPUT_CORRUPT AV_CODEC_FLAG_OUTPUT_CORRUPT
+#define CODEC_FLAG_QPEL   AV_CODEC_FLAG_QPEL
 #if FF_API_GMC
 /**
  * @deprecated use the "gmc" private option of the libxvid encoder
@@ -733,9 +973,9 @@ typedef struct RcOverride{
  */
 #define CODEC_FLAG_INPUT_PRESERVED 0x0100
 #endif
-#define CODEC_FLAG_PASS1           0x0200   ///< Use internal 2pass ratecontrol in first pass mode.
-#define CODEC_FLAG_PASS2           0x0400   ///< Use internal 2pass ratecontrol in second pass mode.
-#define CODEC_FLAG_GRAY            0x2000   ///< Only decode/encode grayscale.
+#define CODEC_FLAG_PASS1           AV_CODEC_FLAG_PASS1
+#define CODEC_FLAG_PASS2           AV_CODEC_FLAG_PASS2
+#define CODEC_FLAG_GRAY            AV_CODEC_FLAG_GRAY
 #if FF_API_EMU_EDGE
 /**
  * @deprecated edges are not used/required anymore. I.e. this flag is now always
@@ -743,9 +983,9 @@ typedef struct RcOverride{
  */
 #define CODEC_FLAG_EMU_EDGE        0x4000
 #endif
-#define CODEC_FLAG_PSNR            0x8000   ///< error[?] variables will be set during encoding.
-#define CODEC_FLAG_TRUNCATED       0x00010000 /** Input bitstream might be truncated at a random
-                                                  location instead of only at frame boundaries. */
+#define CODEC_FLAG_PSNR            AV_CODEC_FLAG_PSNR
+#define CODEC_FLAG_TRUNCATED       AV_CODEC_FLAG_TRUNCATED
+
 #if FF_API_NORMALIZE_AQP
 /**
  * @deprecated use the flag "naq" in the "mpv_flags" private option of the
@@ -753,25 +993,24 @@ typedef struct RcOverride{
  */
 #define CODEC_FLAG_NORMALIZE_AQP  0x00020000
 #endif
-#define CODEC_FLAG_INTERLACED_DCT 0x00040000 ///< Use interlaced DCT.
-#define CODEC_FLAG_LOW_DELAY      0x00080000 ///< Force low delay.
-#define CODEC_FLAG_GLOBAL_HEADER  0x00400000 ///< Place global headers in extradata instead of every keyframe.
-#define CODEC_FLAG_BITEXACT       0x00800000 ///< Use only bitexact stuff (except (I)DCT).
-/* Fx : Flag for h263+ extra options */
-#define CODEC_FLAG_AC_PRED        0x01000000 ///< H.263 advanced intra coding / MPEG-4 AC prediction
-#define CODEC_FLAG_LOOP_FILTER    0x00000800 ///< loop filter
-#define CODEC_FLAG_INTERLACED_ME  0x20000000 ///< interlaced motion estimation
-#define CODEC_FLAG_CLOSED_GOP     0x80000000
-#define CODEC_FLAG2_FAST          0x00000001 ///< Allow non spec compliant speedup tricks.
-#define CODEC_FLAG2_NO_OUTPUT     0x00000004 ///< Skip bitstream encoding.
-#define CODEC_FLAG2_LOCAL_HEADER  0x00000008 ///< Place global headers at every keyframe instead of in extradata.
-#define CODEC_FLAG2_DROP_FRAME_TIMECODE 0x00002000 ///< timecode is in drop frame format. DEPRECATED!!!!
-#define CODEC_FLAG2_IGNORE_CROP   0x00010000 ///< Discard cropping information from SPS.
-
-#define CODEC_FLAG2_CHUNKS        0x00008000 ///< Input bitstream might be truncated at a packet boundaries instead of only at frame boundaries.
-#define CODEC_FLAG2_SHOW_ALL      0x00400000 ///< Show all frames before the first keyframe
-#define CODEC_FLAG2_EXPORT_MVS    0x10000000 ///< Export motion vectors through frame side data
-#define CODEC_FLAG2_SKIP_MANUAL   0x20000000 ///< Do not skip samples and export skip information as frame side data
+#define CODEC_FLAG_INTERLACED_DCT AV_CODEC_FLAG_INTERLACED_DCT
+#define CODEC_FLAG_LOW_DELAY      AV_CODEC_FLAG_LOW_DELAY
+#define CODEC_FLAG_GLOBAL_HEADER  AV_CODEC_FLAG_GLOBAL_HEADER
+#define CODEC_FLAG_BITEXACT       AV_CODEC_FLAG_BITEXACT
+#define CODEC_FLAG_AC_PRED        AV_CODEC_FLAG_AC_PRED
+#define CODEC_FLAG_LOOP_FILTER    AV_CODEC_FLAG_LOOP_FILTER
+#define CODEC_FLAG_INTERLACED_ME  AV_CODEC_FLAG_INTERLACED_ME
+#define CODEC_FLAG_CLOSED_GOP     AV_CODEC_FLAG_CLOSED_GOP
+#define CODEC_FLAG2_FAST          AV_CODEC_FLAG2_FAST
+#define CODEC_FLAG2_NO_OUTPUT     AV_CODEC_FLAG2_NO_OUTPUT
+#define CODEC_FLAG2_LOCAL_HEADER  AV_CODEC_FLAG2_LOCAL_HEADER
+#define CODEC_FLAG2_DROP_FRAME_TIMECODE AV_CODEC_FLAG2_DROP_FRAME_TIMECODE
+#define CODEC_FLAG2_IGNORE_CROP   AV_CODEC_FLAG2_IGNORE_CROP
+
+#define CODEC_FLAG2_CHUNKS        AV_CODEC_FLAG2_CHUNKS
+#define CODEC_FLAG2_SHOW_ALL      AV_CODEC_FLAG2_SHOW_ALL
+#define CODEC_FLAG2_EXPORT_MVS    AV_CODEC_FLAG2_EXPORT_MVS
+#define CODEC_FLAG2_SKIP_MANUAL   AV_CODEC_FLAG2_SKIP_MANUAL
 
 /* Unsupported options :
  *              Syntax Arithmetic coding (SAC)
@@ -780,14 +1019,14 @@ typedef struct RcOverride{
 /* /Fx */
 /* codec capabilities */
 
-#define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 ///< Decoder can use draw_horiz_band callback.
+#define CODEC_CAP_DRAW_HORIZ_BAND AV_CODEC_CAP_DRAW_HORIZ_BAND ///< Decoder can use draw_horiz_band callback.
 /**
  * Codec uses get_buffer() for allocating buffers and supports custom allocators.
  * If not set, it might not use get_buffer() at all or use operations that
  * assume the buffer was allocated by avcodec_default_get_buffer.
  */
-#define CODEC_CAP_DR1             0x0002
-#define CODEC_CAP_TRUNCATED       0x0008
+#define CODEC_CAP_DR1             AV_CODEC_CAP_DR1
+#define CODEC_CAP_TRUNCATED       AV_CODEC_CAP_TRUNCATED
 #if FF_API_XVMC
 /* Codec can export data for HW decoding. This flag indicates that
  * the codec would call get_format() with list that might contain HW accelerated
@@ -821,17 +1060,17 @@ typedef struct RcOverride{
  *       each output packet. If this flag is not set, the pts and duration will
  *       be determined by libavcodec from the input frame.
  */
-#define CODEC_CAP_DELAY           0x0020
+#define CODEC_CAP_DELAY           AV_CODEC_CAP_DELAY
 /**
  * Codec can be fed a final frame with a smaller size.
  * This can be used to prevent truncation of the last audio samples.
  */
-#define CODEC_CAP_SMALL_LAST_FRAME 0x0040
+#define CODEC_CAP_SMALL_LAST_FRAME AV_CODEC_CAP_SMALL_LAST_FRAME
 #if FF_API_CAP_VDPAU
 /**
  * Codec can export data for HW decoding (VDPAU).
  */
-#define CODEC_CAP_HWACCEL_VDPAU    0x0080
+#define CODEC_CAP_HWACCEL_VDPAU    AV_CODEC_CAP_HWACCEL_VDPAU
 #endif
 /**
  * Codec can output multiple frames per AVPacket
@@ -844,16 +1083,16 @@ typedef struct RcOverride{
  * prohibiting stream copy in many cases thus it should only be considered
  * as a last resort.
  */
-#define CODEC_CAP_SUBFRAMES        0x0100
+#define CODEC_CAP_SUBFRAMES        AV_CODEC_CAP_SUBFRAMES
 /**
  * Codec is experimental and is thus avoided in favor of non experimental
  * encoders
  */
-#define CODEC_CAP_EXPERIMENTAL     0x0200
+#define CODEC_CAP_EXPERIMENTAL     AV_CODEC_CAP_EXPERIMENTAL
 /**
  * Codec should fill in channel configuration and samplerate instead of container
  */
-#define CODEC_CAP_CHANNEL_CONF     0x0400
+#define CODEC_CAP_CHANNEL_CONF     AV_CODEC_CAP_CHANNEL_CONF
 #if FF_API_NEG_LINESIZES
 /**
  * @deprecated no codecs use this capability
@@ -863,31 +1102,38 @@ typedef struct RcOverride{
 /**
  * Codec supports frame-level multithreading.
  */
-#define CODEC_CAP_FRAME_THREADS    0x1000
+#define CODEC_CAP_FRAME_THREADS    AV_CODEC_CAP_FRAME_THREADS
 /**
  * Codec supports slice-based (or partition-based) multithreading.
  */
-#define CODEC_CAP_SLICE_THREADS    0x2000
+#define CODEC_CAP_SLICE_THREADS    AV_CODEC_CAP_SLICE_THREADS
 /**
  * Codec supports changed parameters at any point.
  */
-#define CODEC_CAP_PARAM_CHANGE     0x4000
+#define CODEC_CAP_PARAM_CHANGE     AV_CODEC_CAP_PARAM_CHANGE
 /**
  * Codec supports avctx->thread_count == 0 (auto).
  */
-#define CODEC_CAP_AUTO_THREADS     0x8000
+#define CODEC_CAP_AUTO_THREADS     AV_CODEC_CAP_AUTO_THREADS
 /**
  * Audio encoder supports receiving a different number of samples in each call.
  */
-#define CODEC_CAP_VARIABLE_FRAME_SIZE 0x10000
+#define CODEC_CAP_VARIABLE_FRAME_SIZE AV_CODEC_CAP_VARIABLE_FRAME_SIZE
 /**
  * Codec is intra only.
  */
-#define CODEC_CAP_INTRA_ONLY       0x40000000
+#define CODEC_CAP_INTRA_ONLY       AV_CODEC_CAP_INTRA_ONLY
 /**
  * Codec is lossless.
  */
-#define CODEC_CAP_LOSSLESS         0x80000000
+#define CODEC_CAP_LOSSLESS         AV_CODEC_CAP_LOSSLESS
+
+/**
+ * HWAccel is experimental and is thus avoided in favor of non experimental
+ * codecs
+ */
+#define HWACCEL_CODEC_CAP_EXPERIMENTAL     0x0200
+#endif /* FF_API_WITHOUT_PREFIX */
 
 #if FF_API_MB_TYPE
 //The following defines may change, don't expect compatibility if you use them.
@@ -944,6 +1190,44 @@ typedef struct AVPanScan{
     int16_t position[3][2];
 }AVPanScan;
 
+/**
+ * This structure describes the bitrate properties of an encoded bitstream. It
+ * roughly corresponds to a subset the VBV parameters for MPEG-2 or HRD
+ * parameters for H.264/HEVC.
+ */
+typedef struct AVCPBProperties {
+    /**
+     * Maximum bitrate of the stream, in bits per second.
+     * Zero if unknown or unspecified.
+     */
+    int max_bitrate;
+    /**
+     * Minimum bitrate of the stream, in bits per second.
+     * Zero if unknown or unspecified.
+     */
+    int min_bitrate;
+    /**
+     * Average bitrate of the stream, in bits per second.
+     * Zero if unknown or unspecified.
+     */
+    int avg_bitrate;
+
+    /**
+     * The size of the buffer to which the ratecontrol is applied, in bits.
+     * Zero if unknown or unspecified.
+     */
+    int buffer_size;
+
+    /**
+     * The delay between the time the packet this structure is associated with
+     * is received and the time when it should be decoded, in periods of a 27MHz
+     * clock.
+     *
+     * UINT64_MAX when unknown or unspecified.
+     */
+    uint64_t vbv_delay;
+} AVCPBProperties;
+
 #if FF_API_QSCALE_TYPE
 #define FF_QSCALE_TYPE_MPEG1 0
 #define FF_QSCALE_TYPE_MPEG2 1
@@ -951,18 +1235,6 @@ typedef struct AVPanScan{
 #define FF_QSCALE_TYPE_VP56  3
 #endif
 
-#if FF_API_GET_BUFFER
-#define FF_BUFFER_TYPE_INTERNAL 1
-#define FF_BUFFER_TYPE_USER     2 ///< direct rendering buffers (image is (de)allocated by user)
-#define FF_BUFFER_TYPE_SHARED   4 ///< Buffer from somewhere else; don't deallocate image (data/base), all other tables are not shared.
-#define FF_BUFFER_TYPE_COPY     8 ///< Just a (modified) copy of some other buffer, don't deallocate anything.
-
-#define FF_BUFFER_HINTS_VALID    0x01 // Buffer hints value is meaningful (if 0 ignore).
-#define FF_BUFFER_HINTS_READABLE 0x02 // Codec will read from buffer.
-#define FF_BUFFER_HINTS_PRESERVE 0x04 // User must not alter buffer content.
-#define FF_BUFFER_HINTS_REUSABLE 0x08 // Codec will reuse the buffer (update).
-#endif
-
 /**
  * The decoder will keep a reference to the frame and may reuse it later.
  */
@@ -1043,6 +1315,31 @@ enum AVPacketSideDataType {
      */
     AV_PKT_DATA_AUDIO_SERVICE_TYPE,
 
+    /**
+     * This side data contains quality related information from the encoder.
+     * @code
+     * u32le quality factor of the compressed frame. Allowed range is between 1 (good) and FF_LAMBDA_MAX (bad).
+     * u8    picture type
+     * u8    error count
+     * u16   reserved
+     * u64le[error count] sum of squared differences between encoder in and output
+     * @endcode
+     */
+    AV_PKT_DATA_QUALITY_STATS,
+
+    /**
+     * This side data contains an integer value representing the stream index
+     * of a "fallback" track.  A fallback track indicates an alternate
+     * track to use when the current track can not be decoded for some reason.
+     * e.g. no decoder available for codec.
+     */
+    AV_PKT_DATA_FALLBACK_TRACK,
+
+    /**
+     * This side data corresponds to the AVCPBProperties struct.
+     */
+    AV_PKT_DATA_CPB_PROPERTIES,
+
     /**
      * Recommmends skipping the specified number of samples
      * @code
@@ -1108,6 +1405,8 @@ enum AVPacketSideDataType {
     AV_PKT_DATA_METADATA_UPDATE,
 };
 
+#define AV_PKT_DATA_QUALITY_FACTOR AV_PKT_DATA_QUALITY_STATS //DEPRECATED
+
 typedef struct AVPacketSideData {
     uint8_t *data;
     int      size;
@@ -1120,21 +1419,27 @@ typedef struct AVPacketSideData {
  * then passed to muxers.
  *
  * For video, it should typically contain one compressed frame. For audio it may
- * contain several compressed frames.
+ * contain several compressed frames. Encoders are allowed to output empty
+ * packets, with no compressed data, containing only side data
+ * (e.g. to update some stream parameters at the end of encoding).
  *
  * AVPacket is one of the few structs in FFmpeg, whose size is a part of public
  * ABI. Thus it may be allocated on stack and no new fields can be added to it
  * without libavcodec and libavformat major bump.
  *
- * The semantics of data ownership depends on the buf or destruct (deprecated)
- * fields. If either is set, the packet data is dynamically allocated and is
- * valid indefinitely until av_free_packet() is called (which in turn calls
- * av_buffer_unref()/the destruct callback to free the data). If neither is set,
- * the packet data is typically backed by some static buffer somewhere and is
- * only valid for a limited time (e.g. until the next read call when demuxing).
+ * The semantics of data ownership depends on the buf field.
+ * If it is set, the packet data is dynamically allocated and is
+ * valid indefinitely until a call to av_packet_unref() reduces the
+ * reference count to 0.
+ *
+ * If the buf field is not set av_packet_ref() would make a copy instead
+ * of increasing the reference count.
+ *
+ * The side data is always allocated with av_malloc(), copied by
+ * av_packet_ref() and freed by av_packet_unref().
  *
- * The side data is always allocated with av_malloc() and is freed in
- * av_free_packet().
+ * @see av_packet_ref
+ * @see av_packet_unref
  */
 typedef struct AVPacket {
     /**
@@ -1177,33 +1482,19 @@ typedef struct AVPacket {
      * Duration of this packet in AVStream->time_base units, 0 if unknown.
      * Equals next_pts - this_pts in presentation order.
      */
-    int   duration;
-#if FF_API_DESTRUCT_PACKET
-    attribute_deprecated
-    void  (*destruct)(struct AVPacket *);
-    attribute_deprecated
-    void  *priv;
-#endif
+    int64_t duration;
+
     int64_t pos;                            ///< byte position in stream, -1 if unknown
 
+#if FF_API_CONVERGENCE_DURATION
     /**
-     * Time difference in AVStream->time_base units from the pts of this
-     * packet to the point at which the output from the decoder has converged
-     * independent from the availability of previous frames. That is, the
-     * frames are virtually identical no matter if decoding started from
-     * the very first frame or from this keyframe.
-     * Is AV_NOPTS_VALUE if unknown.
-     * This field is not the display duration of the current packet.
-     * This field has no meaning if the packet does not have AV_PKT_FLAG_KEY
-     * set.
-     *
-     * The purpose of this field is to allow seeking in streams that have no
-     * keyframes in the conventional sense. It corresponds to the
-     * recovery point SEI in H.264 and match_time_delta in NUT. It is also
-     * essential for some types of subtitle streams to ensure that all
-     * subtitles are correctly displayed after seeking.
+     * @deprecated Same as the duration field, but as int64_t. This was required
+     * for Matroska subtitles, whose duration values could overflow when the
+     * duration field was still an int.
      */
+    attribute_deprecated
     int64_t convergence_duration;
+#endif
 } AVPacket;
 #define AV_PKT_FLAG_KEY     0x0001 ///< The packet contains a keyframe
 #define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted
@@ -1300,9 +1591,10 @@ typedef struct AVCodecContext {
     /**
      * the average bitrate
      * - encoding: Set by user; unused for constant quantizer encoding.
-     * - decoding: Set by libavcodec. 0 or some bitrate if this info is available in the stream.
+     * - decoding: Set by user, may be overwritten by libavcodec
+     *             if this info is available in the stream
      */
-    int bit_rate;
+    int64_t bit_rate;
 
     /**
      * number of bits the bitstream is allowed to diverge from the reference.
@@ -1328,14 +1620,14 @@ typedef struct AVCodecContext {
 #define FF_COMPRESSION_DEFAULT -1
 
     /**
-     * CODEC_FLAG_*.
+     * AV_CODEC_FLAG_*.
      * - encoding: Set by user.
      * - decoding: Set by user.
      */
     int flags;
 
     /**
-     * CODEC_FLAG2_*
+     * AV_CODEC_FLAG2_*
      * - encoding: Set by user.
      * - decoding: Set by user.
      */
@@ -1346,7 +1638,7 @@ typedef struct AVCodecContext {
      * mjpeg: Huffman tables
      * rv10: additional flags
      * mpeg4: global headers (they can be in the bitstream or here)
-     * The allocated memory should be FF_INPUT_BUFFER_PADDING_SIZE bytes larger
+     * The allocated memory should be AV_INPUT_BUFFER_PADDING_SIZE bytes larger
      * than extradata_size to avoid problems if it is read with the bitstream reader.
      * The bytewise contents of extradata must not depend on the architecture or CPU endianness.
      * - encoding: Set/allocated/freed by libavcodec.
@@ -1405,21 +1697,31 @@ typedef struct AVCodecContext {
     /* video only */
     /**
      * picture width / height.
+     *
+     * @note Those fields may not match the values of the last
+     * AVFrame outputted by avcodec_decode_video2 due frame
+     * reordering.
+     *
      * - encoding: MUST be set by user.
      * - decoding: May be set by the user before opening the decoder if known e.g.
      *             from the container. Some decoders will require the dimensions
      *             to be set by the caller. During decoding, the decoder may
-     *             overwrite those values as required.
+     *             overwrite those values as required while parsing the data.
      */
     int width, height;
 
     /**
      * Bitstream width / height, may be different from width/height e.g. when
      * the decoded frame is cropped before being output or lowres is enabled.
+     *
+     * @note Those field may not match the value of the last
+     * AVFrame outputted by avcodec_decode_video2 due frame
+     * reordering.
+     *
      * - encoding: unused
      * - decoding: May be set by the user before opening the decoder if known
      *             e.g. from the container. During decoding, the decoder may
-     *             overwrite those values as required.
+     *             overwrite those values as required while parsing the data.
      */
     int coded_width, coded_height;
 
@@ -1438,19 +1740,24 @@ typedef struct AVCodecContext {
      * Pixel format, see AV_PIX_FMT_xxx.
      * May be set by the demuxer if known from headers.
      * May be overridden by the decoder if it knows better.
+     *
+     * @note This field may not match the value of the last
+     * AVFrame outputted by avcodec_decode_video2 due frame
+     * reordering.
+     *
      * - encoding: Set by user.
-     * - decoding: Set by user if known, overridden by libavcodec if known
+     * - decoding: Set by user if known, overridden by libavcodec while
+     *             parsing the data.
      */
     enum AVPixelFormat pix_fmt;
 
+#if FF_API_MOTION_EST
     /**
-     * Motion estimation algorithm used for video coding.
-     * 1 (zero), 2 (full), 3 (log), 4 (phods), 5 (epzs), 6 (x1), 7 (hex),
-     * 8 (umh), 9 (iter), 10 (tesa) [7, 8, 10 are x264 specific, 9 is snow specific]
-     * - encoding: MUST be set by user.
-     * - decoding: unused
+     * This option does nothing
+     * @deprecated use codec private options instead
      */
-    int me_method;
+    attribute_deprecated int me_method;
+#endif
 
     /**
      * If non NULL, 'draw_horiz_band' is called by the libavcodec
@@ -1511,11 +1818,17 @@ typedef struct AVCodecContext {
      */
     float b_quant_factor;
 
-    /** obsolete FIXME remove */
-    int rc_strategy;
+#if FF_API_RC_STRATEGY
+    /** @deprecated use codec private option instead */
+    attribute_deprecated int rc_strategy;
 #define FF_RC_STRATEGY_XVID 1
+#endif
 
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int b_frame_strategy;
+#endif
 
     /**
      * qscale offset between IP and B-frames
@@ -1532,12 +1845,11 @@ typedef struct AVCodecContext {
      */
     int has_b_frames;
 
-    /**
-     * 0-> h263 quant 1-> mpeg quant
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int mpeg_quant;
+#endif
 
     /**
      * qscale factor between P and I-frames
@@ -1596,15 +1908,15 @@ typedef struct AVCodecContext {
      * - decoding: Set by user (or 0).
      */
     int slice_count;
-    /**
-     * prediction method (needed for huffyuv)
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
      int prediction_method;
 #define FF_PRED_LEFT   0
 #define FF_PRED_PLANE  1
 #define FF_PRED_MEDIAN 2
+#endif
 
     /**
      * slice offsets in the frame in bytes
@@ -1677,12 +1989,11 @@ typedef struct AVCodecContext {
      */
     int last_predictor_count;
 
-    /**
-     * prepass for motion estimation
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int pre_me;
+#endif
 
     /**
      * motion estimation prepass comparison function
@@ -1734,20 +2045,18 @@ typedef struct AVCodecContext {
      */
     int me_range;
 
+#if FF_API_QUANT_BIAS
     /**
-     * intra quantizer bias
-     * - encoding: Set by user.
-     * - decoding: unused
+     * @deprecated use encoder private option instead
      */
-    int intra_quant_bias;
+    attribute_deprecated int intra_quant_bias;
 #define FF_DEFAULT_QUANT_BIAS 999999
 
     /**
-     * inter quantizer bias
-     * - encoding: Set by user.
-     * - decoding: unused
+     * @deprecated use encoder private option instead
      */
-    int inter_quant_bias;
+    attribute_deprecated int inter_quant_bias;
+#endif
 
     /**
      * slice flags
@@ -1793,20 +2102,15 @@ typedef struct AVCodecContext {
      */
     uint16_t *inter_matrix;
 
-    /**
-     * scene change detection threshold
-     * 0 is default, larger means fewer detected scene changes.
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int scenechange_threshold;
 
-    /**
-     * noise reduction strength
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int noise_reduction;
+#endif
 
 #if FF_API_MPV_OPT
     /**
@@ -1865,12 +2169,13 @@ typedef struct AVCodecContext {
      */
     int mb_lmax;
 
+#if FF_API_PRIVATE_OPT
     /**
-     *
-     * - encoding: Set by user.
-     * - decoding: unused
+     * @deprecated use encoder private options instead
      */
+    attribute_deprecated
     int me_penalty_compensation;
+#endif
 
     /**
      *
@@ -1879,12 +2184,11 @@ typedef struct AVCodecContext {
      */
     int bidir_refine;
 
-    /**
-     *
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int brd_scale;
+#endif
 
     /**
      * minimum GOP size
@@ -1900,12 +2204,11 @@ typedef struct AVCodecContext {
      */
     int refs;
 
-    /**
-     * chroma qp offset from luma
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int chromaoffset;
+#endif
 
 #if FF_API_UNUSED_MEMBERS
     /**
@@ -1924,12 +2227,11 @@ typedef struct AVCodecContext {
      */
     int mv0_threshold;
 
-    /**
-     * Adjust sensitivity of b_frame_strategy 1.
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int b_sensitivity;
+#endif
 
     /**
      * Chromaticity coordinates of the source primaries.
@@ -1998,7 +2300,7 @@ typedef struct AVCodecContext {
      *
      * - encoding: set by libavcodec in avcodec_open2(). Each submitted frame
      *   except the last must contain exactly frame_size samples per channel.
-     *   May be 0 when the codec has CODEC_CAP_VARIABLE_FRAME_SIZE set, then the
+     *   May be 0 when the codec has AV_CODEC_CAP_VARIABLE_FRAME_SIZE set, then the
      *   frame size is not restricted.
      * - decoding: may be set by some decoders to indicate constant frame size
      */
@@ -2028,16 +2330,6 @@ typedef struct AVCodecContext {
      */
     int cutoff;
 
-#if FF_API_REQUEST_CHANNELS
-    /**
-     * Decoder should decode to this many channels if it can (0 for default)
-     * - encoding: unused
-     * - decoding: Set by user.
-     * @deprecated Deprecated in favor of request_channel_layout.
-     */
-    attribute_deprecated int request_channels;
-#endif
-
     /**
      * Audio channel layout.
      * - encoding: set by user.
@@ -2060,108 +2352,12 @@ typedef struct AVCodecContext {
     enum AVAudioServiceType audio_service_type;
 
     /**
-     * desired sample format
-     * - encoding: Not used.
-     * - decoding: Set by user.
-     * Decoder will decode to this format if it can.
-     */
-    enum AVSampleFormat request_sample_fmt;
-
-#if FF_API_GET_BUFFER
-    /**
-     * Called at the beginning of each frame to get a buffer for it.
-     *
-     * The function will set AVFrame.data[], AVFrame.linesize[].
-     * AVFrame.extended_data[] must also be set, but it should be the same as
-     * AVFrame.data[] except for planar audio with more channels than can fit
-     * in AVFrame.data[]. In that case, AVFrame.data[] shall still contain as
-     * many data pointers as it can hold.
-     *
-     * if CODEC_CAP_DR1 is not set then get_buffer() must call
-     * avcodec_default_get_buffer() instead of providing buffers allocated by
-     * some other means.
-     *
-     * AVFrame.data[] should be 32- or 16-byte-aligned unless the CPU doesn't
-     * need it. avcodec_default_get_buffer() aligns the output buffer properly,
-     * but if get_buffer() is overridden then alignment considerations should
-     * be taken into account.
-     *
-     * @see avcodec_default_get_buffer()
-     *
-     * Video:
-     *
-     * If pic.reference is set then the frame will be read later by libavcodec.
-     * avcodec_align_dimensions2() should be used to find the required width and
-     * height, as they normally need to be rounded up to the next multiple of 16.
-     *
-     * If frame multithreading is used and thread_safe_callbacks is set,
-     * it may be called from a different thread, but not from more than one at
-     * once. Does not need to be reentrant.
-     *
-     * @see release_buffer(), reget_buffer()
-     * @see avcodec_align_dimensions2()
-     *
-     * Audio:
-     *
-     * Decoders request a buffer of a particular size by setting
-     * AVFrame.nb_samples prior to calling get_buffer(). The decoder may,
-     * however, utilize only part of the buffer by setting AVFrame.nb_samples
-     * to a smaller value in the output frame.
-     *
-     * Decoders cannot use the buffer after returning from
-     * avcodec_decode_audio4(), so they will not call release_buffer(), as it
-     * is assumed to be released immediately upon return. In some rare cases,
-     * a decoder may need to call get_buffer() more than once in a single
-     * call to avcodec_decode_audio4(). In that case, when get_buffer() is
-     * called again after it has already been called once, the previously
-     * acquired buffer is assumed to be released at that time and may not be
-     * reused by the decoder.
-     *
-     * As a convenience, av_samples_get_buffer_size() and
-     * av_samples_fill_arrays() in libavutil may be used by custom get_buffer()
-     * functions to find the required data size and to fill data pointers and
-     * linesize. In AVFrame.linesize, only linesize[0] may be set for audio
-     * since all planes must be the same size.
-     *
-     * @see av_samples_get_buffer_size(), av_samples_fill_arrays()
-     *
-     * - encoding: unused
-     * - decoding: Set by libavcodec, user can override.
-     *
-     * @deprecated use get_buffer2()
-     */
-    attribute_deprecated
-    int (*get_buffer)(struct AVCodecContext *c, AVFrame *pic);
-
-    /**
-     * Called to release buffers which were allocated with get_buffer.
-     * A released buffer can be reused in get_buffer().
-     * pic.data[*] must be set to NULL.
-     * May be called from a different thread if frame multithreading is used,
-     * but not by more than one thread at once, so does not need to be reentrant.
-     * - encoding: unused
-     * - decoding: Set by libavcodec, user can override.
-     *
-     * @deprecated custom freeing callbacks should be set from get_buffer2()
-     */
-    attribute_deprecated
-    void (*release_buffer)(struct AVCodecContext *c, AVFrame *pic);
-
-    /**
-     * Called at the beginning of a frame to get cr buffer for it.
-     * Buffer type (size, hints) must be the same. libavcodec won't check it.
-     * libavcodec will pass previous buffer in pic, function should return
-     * same buffer or new buffer with old frame "painted" into it.
-     * If pic.data[0] == NULL must behave like get_buffer().
-     * if CODEC_CAP_DR1 is not set then reget_buffer() must call
-     * avcodec_default_reget_buffer() instead of providing buffers allocated by
-     * some other means.
-     * - encoding: unused
-     * - decoding: Set by libavcodec, user can override.
+     * desired sample format
+     * - encoding: Not used.
+     * - decoding: Set by user.
+     * Decoder will decode to this format if it can.
      */
-    attribute_deprecated
-    int (*reget_buffer)(struct AVCodecContext *c, AVFrame *pic);
-#endif
+    enum AVSampleFormat request_sample_fmt;
 
     /**
      * This callback is called at the beginning of each frame to get data
@@ -2200,7 +2396,7 @@ typedef struct AVCodecContext {
      *   buffers than buf[] can hold. extended_buf will be freed in
      *   av_frame_unref().
      *
-     * If CODEC_CAP_DR1 is not set then get_buffer2() must call
+     * If AV_CODEC_CAP_DR1 is not set then get_buffer2() must call
      * avcodec_default_get_buffer2() instead of providing buffers allocated by
      * some other means.
      *
@@ -2322,16 +2518,16 @@ typedef struct AVCodecContext {
     /**
      * maximum bitrate
      * - encoding: Set by user.
-     * - decoding: Set by libavcodec.
+     * - decoding: Set by user, may be overwritten by libavcodec.
      */
-    int rc_max_rate;
+    int64_t rc_max_rate;
 
     /**
      * minimum bitrate
      * - encoding: Set by user.
      * - decoding: unused
      */
-    int rc_min_rate;
+    int64_t rc_min_rate;
 
 #if FF_API_MPV_OPT
     /**
@@ -2365,6 +2561,7 @@ typedef struct AVCodecContext {
      */
     int rc_initial_buffer_occupancy;
 
+#if FF_API_CODER_TYPE
 #define FF_CODER_TYPE_VLC       0
 #define FF_CODER_TYPE_AC        1
 #define FF_CODER_TYPE_RAW       2
@@ -2373,18 +2570,17 @@ typedef struct AVCodecContext {
 #define FF_CODER_TYPE_DEFLATE   4
 #endif /* FF_API_UNUSED_MEMBERS */
     /**
-     * coder type
-     * - encoding: Set by user.
-     * - decoding: unused
+     * @deprecated use encoder private options instead
      */
+    attribute_deprecated
     int coder_type;
+#endif /* FF_API_CODER_TYPE */
 
-    /**
-     * context model
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int context_model;
+#endif
 
 #if FF_API_MPV_OPT
     /**
@@ -2400,33 +2596,23 @@ typedef struct AVCodecContext {
     int lmax;
 #endif
 
-    /**
-     * frame skip threshold
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int frame_skip_threshold;
 
-    /**
-     * frame skip factor
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int frame_skip_factor;
 
-    /**
-     * frame skip exponent
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int frame_skip_exp;
 
-    /**
-     * frame skip comparison function
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int frame_skip_cmp;
+#endif /* FF_API_PRIVATE_OPT */
 
     /**
      * trellis RD quantization
@@ -2435,56 +2621,68 @@ typedef struct AVCodecContext {
      */
     int trellis;
 
-    /**
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int min_prediction_order;
 
-    /**
-     * - encoding: Set by user.
-     * - decoding: unused
-     */
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int max_prediction_order;
 
-    /**
-     * GOP timecode frame start number
-     * - encoding: Set by user, in non drop frame format
-     * - decoding: Set by libavcodec (timecode in the 25 bits format, -1 if unset)
-     */
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int64_t timecode_frame_start;
+#endif
 
+#if FF_API_RTP_CALLBACK
+    /**
+     * @deprecated unused
+     */
     /* The RTP callback: This function is called    */
     /* every time the encoder has a packet to send. */
     /* It depends on the encoder if the data starts */
     /* with a Start Code (it should). H.263 does.   */
     /* mb_nb contains the number of macroblocks     */
     /* encoded in the RTP payload.                  */
+    attribute_deprecated
     void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int mb_nb);
+#endif
 
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
     int rtp_payload_size;   /* The size of the RTP payload: the coder will  */
                             /* do its best to deliver a chunk with size     */
                             /* below rtp_payload_size, the chunk will start */
                             /* with a start code on some codecs like H.263. */
                             /* This doesn't take account of any particular  */
                             /* headers inside the transmitted RTP payload.  */
+#endif
 
+#if FF_API_STAT_BITS
     /* statistics, used for 2-pass encoding */
+    attribute_deprecated
     int mv_bits;
+    attribute_deprecated
     int header_bits;
+    attribute_deprecated
     int i_tex_bits;
+    attribute_deprecated
     int p_tex_bits;
+    attribute_deprecated
     int i_count;
+    attribute_deprecated
     int p_count;
+    attribute_deprecated
     int skip_count;
+    attribute_deprecated
     int misc_bits;
 
-    /**
-     * number of bits used for the previously encoded frame
-     * - encoding: Set by libavcodec.
-     * - decoding: unused
-     */
+    /** @deprecated this field is unused */
+    attribute_deprecated
     int frame_bits;
+#endif
 
     /**
      * pass1 encoding statistics output buffer
@@ -2589,6 +2787,7 @@ typedef struct AVCodecContext {
 #endif
 #define FF_DEBUG_BUFFERS     0x00008000
 #define FF_DEBUG_THREADS     0x00010000
+#define FF_DEBUG_GREEN_MD    0x00800000
 #define FF_DEBUG_NOMC        0x01000000
 
 #if FF_API_DEBUG_MV
@@ -2657,7 +2856,7 @@ typedef struct AVCodecContext {
 
     /**
      * error
-     * - encoding: Set by libavcodec if flags&CODEC_FLAG_PSNR.
+     * - encoding: Set by libavcodec if flags & AV_CODEC_FLAG_PSNR.
      * - decoding: unused
      */
     uint64_t error[AV_NUM_DATA_POINTERS];
@@ -2670,9 +2869,7 @@ typedef struct AVCodecContext {
     int dct_algo;
 #define FF_DCT_AUTO    0
 #define FF_DCT_FASTINT 1
-#if FF_API_UNUSED_MEMBERS
 #define FF_DCT_INT     2
-#endif /* FF_API_UNUSED_MEMBERS */
 #define FF_DCT_MMX     3
 #define FF_DCT_ALTIVEC 5
 #define FF_DCT_FAAN    6
@@ -2737,12 +2934,16 @@ typedef struct AVCodecContext {
      int lowres;
 #endif
 
+#if FF_API_CODED_FRAME
     /**
      * the picture in the bitstream
      * - encoding: Set by libavcodec.
      * - decoding: unused
+     *
+     * @deprecated use the quality factor packet side data instead
      */
-    AVFrame *coded_frame;
+    attribute_deprecated AVFrame *coded_frame;
+#endif
 
     /**
      * thread count
@@ -2812,14 +3013,6 @@ typedef struct AVCodecContext {
      */
     int (*execute2)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg, int jobnr, int threadnr), void *arg2, int *ret, int count);
 
-#if FF_API_THREAD_OPAQUE
-    /**
-     * @deprecated this field should not be used from outside of lavc
-     */
-    attribute_deprecated
-    void *thread_opaque;
-#endif
-
     /**
      * noise vs. sse weight for the nsse comparison function
      * - encoding: Set by user.
@@ -2906,17 +3099,16 @@ typedef struct AVCodecContext {
 #define FF_PROFILE_JPEG2000_DCINEMA_2K              3
 #define FF_PROFILE_JPEG2000_DCINEMA_4K              4
 
+#define FF_PROFILE_VP9_0                            0
+#define FF_PROFILE_VP9_1                            1
+#define FF_PROFILE_VP9_2                            2
+#define FF_PROFILE_VP9_3                            3
 
 #define FF_PROFILE_HEVC_MAIN                        1
 #define FF_PROFILE_HEVC_MAIN_10                     2
 #define FF_PROFILE_HEVC_MAIN_STILL_PICTURE          3
 #define FF_PROFILE_HEVC_REXT                        4
 
-#define FF_PROFILE_VP9_0                            0
-#define FF_PROFILE_VP9_1                            1
-#define FF_PROFILE_VP9_2                            2
-#define FF_PROFILE_VP9_3                            3
-
     /**
      * level
      * - encoding: Set by user.
@@ -2966,36 +3158,33 @@ typedef struct AVCodecContext {
     int error_rate;
 #endif
 
-#if FF_API_CODEC_PKT
-    /**
-     * @deprecated this field is not supposed to be accessed from outside lavc
-     */
-    attribute_deprecated
-    AVPacket *pkt;
-#endif
-
+#if FF_API_VBV_DELAY
     /**
      * VBV delay coded in the last frame (in periods of a 27 MHz clock).
      * Used for compliant TS muxing.
      * - encoding: Set by libavcodec.
      * - decoding: unused.
+     * @deprecated this value is now exported as a part of
+     * AV_PKT_DATA_CPB_PROPERTIES packet side data
      */
+    attribute_deprecated
     uint64_t vbv_delay;
+#endif
 
+#if FF_API_SIDEDATA_ONLY_PKT
     /**
-     * Encoding only. Allow encoders to output packets that do not contain any
-     * encoded data, only side data.
+     * Encoding only and set by default. Allow encoders to output packets
+     * that do not contain any encoded data, only side data.
      *
      * Some encoders need to output such packets, e.g. to update some stream
      * parameters at the end of encoding.
      *
-     * All callers are strongly recommended to set this option to 1 and update
-     * their code to deal with such packets, since this behaviour may become
-     * always enabled in the future (then this option will be deprecated and
-     * later removed). To avoid ABI issues when this happens, the callers should
-     * use AVOptions to set this field.
+     * @deprecated this field disables the default behaviour and
+     *             it is kept only for compatibility.
      */
+    attribute_deprecated
     int side_data_only_packets;
+#endif
 
     /**
      * Audio only. The number of "priming" samples (padding) inserted by the
@@ -3146,6 +3335,26 @@ typedef struct AVCodecContext {
      * - decoding: set by user through AVOPtions (NO direct access)
      */
     char *codec_whitelist;
+
+    /*
+     * Properties of the stream that gets decoded
+     * To be accessed through av_codec_get_properties() (NO direct access)
+     * - encoding: unused
+     * - decoding: set by libavcodec
+     */
+    unsigned properties;
+#define FF_CODEC_PROPERTY_LOSSLESS        0x00000001
+#define FF_CODEC_PROPERTY_CLOSED_CAPTIONS 0x00000002
+
+    /**
+     * Additional data associated with the entire coded stream.
+     *
+     * - decoding: unused
+     * - encoding: may be set by libavcodec after avcodec_open2().
+     */
+    AVPacketSideData *coded_side_data;
+    int            nb_coded_side_data;
+
 } AVCodecContext;
 
 AVRational av_codec_get_pkt_timebase         (const AVCodecContext *avctx);
@@ -3154,6 +3363,8 @@ void       av_codec_set_pkt_timebase         (AVCodecContext *avctx, AVRational
 const AVCodecDescriptor *av_codec_get_codec_descriptor(const AVCodecContext *avctx);
 void                     av_codec_set_codec_descriptor(AVCodecContext *avctx, const AVCodecDescriptor *desc);
 
+unsigned av_codec_get_codec_properties(const AVCodecContext *avctx);
+
 int  av_codec_get_lowres(const AVCodecContext *avctx);
 void av_codec_set_lowres(AVCodecContext *avctx, int val);
 
@@ -3195,7 +3406,7 @@ typedef struct AVCodec {
     enum AVCodecID id;
     /**
      * Codec capabilities.
-     * see CODEC_CAP_*
+     * see AV_CODEC_CAP_*
      */
     int capabilities;
     const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0}
@@ -3203,9 +3414,7 @@ typedef struct AVCodec {
     const int *supported_samplerates;       ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
     const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
     const uint64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
-#if FF_API_LOWRES
     uint8_t max_lowres;                     ///< maximum value for lowres supported by the decoder, no direct access, use av_codec_get_max_lowres()
-#endif
     const AVClass *priv_class;              ///< AVClass for the private context
     const AVProfile *profiles;              ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN}
 
@@ -3316,7 +3525,7 @@ typedef struct AVHWAccel {
 
     /**
      * Hardware accelerated codec capabilities.
-     * see FF_HWACCEL_CODEC_CAP_*
+     * see HWACCEL_CODEC_CAP_*
      */
     int capabilities;
 
@@ -3423,6 +3632,9 @@ typedef struct AVHWAccel {
  * Hardware acceleration should be used for decoding even if the codec level
  * used is unknown or higher than the maximum supported level reported by the
  * hardware driver.
+ *
+ * It's generally a good idea to pass this flag unless you have a specific
+ * reason not to, as hardware tends to under-report supported levels.
  */
 #define AV_HWACCEL_FLAG_IGNORE_LEVEL (1 << 0)
 
@@ -3436,6 +3648,7 @@ typedef struct AVHWAccel {
  * @}
  */
 
+#if FF_API_AVPICTURE
 /**
  * @defgroup lavc_picture AVPicture
  *
@@ -3448,15 +3661,19 @@ typedef struct AVHWAccel {
  *
  * Up to four components can be stored into it, the last component is
  * alpha.
+ * @deprecated use AVFrame or imgutils functions instead
  */
 typedef struct AVPicture {
+    attribute_deprecated
     uint8_t *data[AV_NUM_DATA_POINTERS];    ///< pointers to the image data planes
+    attribute_deprecated
     int linesize[AV_NUM_DATA_POINTERS];     ///< number of bytes per line
 } AVPicture;
 
 /**
  * @}
  */
+#endif
 
 enum AVSubtitleType {
     SUBTITLE_NONE,
@@ -3485,11 +3702,20 @@ typedef struct AVSubtitleRect {
     int h;         ///< height           of pict, undefined when pict is not set
     int nb_colors; ///< number of colors in pict, undefined when pict is not set
 
+#if FF_API_AVPICTURE
     /**
-     * data+linesize for the bitmap of this subtitle.
-     * can be set for text/ass as well once they are rendered
+     * @deprecated unused
      */
+    attribute_deprecated
     AVPicture pict;
+#endif
+    /**
+     * data+linesize for the bitmap of this subtitle.
+     * Can be set for text/ass as well once they are rendered.
+     */
+    uint8_t *data[4];
+    int linesize[4];
+
     enum AVSubtitleType type;
 
     char *text;                     ///< 0 terminated plain UTF-8 text
@@ -3627,39 +3853,6 @@ const AVClass *avcodec_get_subtitle_rect_class(void);
  */
 int avcodec_copy_context(AVCodecContext *dest, const AVCodecContext *src);
 
-#if FF_API_AVFRAME_LAVC
-/**
- * @deprecated use av_frame_alloc()
- */
-attribute_deprecated
-AVFrame *avcodec_alloc_frame(void);
-
-/**
- * Set the fields of the given AVFrame to default values.
- *
- * @param frame The AVFrame of which the fields should be set to default values.
- *
- * @deprecated use av_frame_unref()
- */
-attribute_deprecated
-void avcodec_get_frame_defaults(AVFrame *frame);
-
-/**
- * Free the frame and any dynamically allocated objects in it,
- * e.g. extended_data.
- *
- * @param frame frame to be freed. The pointer will be set to NULL.
- *
- * @warning this function does NOT free the data buffers themselves
- * (it does not know how, since they might have been allocated with
- *  a custom get_buffer()).
- *
- * @deprecated use av_frame_free()
- */
-attribute_deprecated
-void avcodec_free_frame(AVFrame **frame);
-#endif
-
 /**
  * Initialize the AVCodecContext to use the given AVCodec. Prior to using this
  * function the context has to be allocated with avcodec_alloc_context3().
@@ -3728,14 +3921,39 @@ void avsubtitle_free(AVSubtitle *sub);
  * @{
  */
 
-#if FF_API_DESTRUCT_PACKET
 /**
- * Default packet destructor.
- * @deprecated use the AVBuffer API instead
+ * Allocate an AVPacket and set its fields to default values.  The resulting
+ * struct must be freed using av_packet_free().
+ *
+ * @return An AVPacket filled with default values or NULL on failure.
+ *
+ * @note this only allocates the AVPacket itself, not the data buffers. Those
+ * must be allocated through other means such as av_new_packet.
+ *
+ * @see av_new_packet
+ */
+AVPacket *av_packet_alloc(void);
+
+/**
+ * Create a new packet that references the same data as src.
+ *
+ * This is a shortcut for av_packet_alloc()+av_packet_ref().
+ *
+ * @return newly created AVPacket on success, NULL on error.
+ *
+ * @see av_packet_alloc
+ * @see av_packet_ref
  */
-attribute_deprecated
-void av_destruct_packet(AVPacket *pkt);
-#endif
+AVPacket *av_packet_clone(AVPacket *src);
+
+/**
+ * Free the packet, if the packet is reference counted, it will be
+ * unreferenced first.
+ *
+ * @param packet packet to be freed. The pointer will be set to NULL.
+ * @note passing NULL is a no-op.
+ */
+void av_packet_free(AVPacket **pkt);
 
 /**
  * Initialize optional fields of a packet with default values.
@@ -3782,18 +4000,21 @@ int av_grow_packet(AVPacket *pkt, int grow_by);
  *        function returns successfully, the data is owned by the underlying AVBuffer.
  *        The caller may not access the data through other means.
  * @param size size of data in bytes, without the padding. I.e. the full buffer
- *        size is assumed to be size + FF_INPUT_BUFFER_PADDING_SIZE.
+ *        size is assumed to be size + AV_INPUT_BUFFER_PADDING_SIZE.
  *
  * @return 0 on success, a negative AVERROR on error
  */
 int av_packet_from_data(AVPacket *pkt, uint8_t *data, int size);
 
+#if FF_API_AVPACKET_OLD_API
 /**
  * @warning This is a hack - the packet memory allocation stuff is broken. The
  * packet is allocated if it was not really allocated.
+ *
+ * @deprecated Use av_packet_ref
  */
+attribute_deprecated
 int av_dup_packet(AVPacket *pkt);
-
 /**
  * Copy packet, including contents
  *
@@ -3811,10 +4032,13 @@ int av_copy_packet_side_data(AVPacket *dst, const AVPacket *src);
 /**
  * Free a packet.
  *
+ * @deprecated Use av_packet_unref
+ *
  * @param pkt packet to free
  */
+attribute_deprecated
 void av_free_packet(AVPacket *pkt);
-
+#endif
 /**
  * Allocate new information of a packet.
  *
@@ -3826,6 +4050,22 @@ void av_free_packet(AVPacket *pkt);
 uint8_t* av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
                                  int size);
 
+/**
+ * Wrap an existing array as a packet side data.
+ *
+ * @param pkt packet
+ * @param type side information type
+ * @param data the side data array. It must be allocated with the av_malloc()
+ *             family of functions. The ownership of the data is transferred to
+ *             pkt.
+ * @param size side information size
+ * @return a non-negative number on success, a negative AVERROR code on
+ *         failure. On failure, the packet is unchanged and the data remains
+ *         owned by the caller.
+ */
+int av_packet_add_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                            uint8_t *data, size_t size);
+
 /**
  * Shrink the already allocated side data buffer
  *
@@ -3971,16 +4211,10 @@ AVCodec *avcodec_find_decoder(enum AVCodecID id);
  */
 AVCodec *avcodec_find_decoder_by_name(const char *name);
 
-#if FF_API_GET_BUFFER
-attribute_deprecated int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic);
-attribute_deprecated void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic);
-attribute_deprecated int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic);
-#endif
-
 /**
  * The default callback for AVCodecContext.get_buffer2(). It is made public so
  * it can be called by custom get_buffer2() implementations for decoders without
- * CODEC_CAP_DR1 set.
+ * AV_CODEC_CAP_DR1 set.
  */
 int avcodec_default_get_buffer2(AVCodecContext *s, AVFrame *frame, int flags);
 
@@ -4004,7 +4238,7 @@ unsigned avcodec_get_edge_width(void);
  * buffer that is acceptable for the codec if you do not use any horizontal
  * padding.
  *
- * May only be used if a codec with CODEC_CAP_DR1 has been opened.
+ * May only be used if a codec with AV_CODEC_CAP_DR1 has been opened.
  */
 void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height);
 
@@ -4013,7 +4247,7 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height);
  * buffer that is acceptable for the codec if you also ensure that all
  * line sizes are a multiple of the respective linesize_align[i].
  *
- * May only be used if a codec with CODEC_CAP_DR1 has been opened.
+ * May only be used if a codec with AV_CODEC_CAP_DR1 has been opened.
  */
 void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
                                int linesize_align[AV_NUM_DATA_POINTERS]);
@@ -4040,66 +4274,6 @@ int avcodec_enum_to_chroma_pos(int *xpos, int *ypos, enum AVChromaLocation pos);
  */
 enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos);
 
-#if FF_API_OLD_DECODE_AUDIO
-/**
- * Wrapper function which calls avcodec_decode_audio4.
- *
- * @deprecated Use avcodec_decode_audio4 instead.
- *
- * Decode the audio frame of size avpkt->size from avpkt->data into samples.
- * Some decoders may support multiple frames in a single AVPacket, such
- * decoders would then just decode the first frame. In this case,
- * avcodec_decode_audio3 has to be called again with an AVPacket that contains
- * the remaining data in order to decode the second frame etc.
- * If no frame
- * could be outputted, frame_size_ptr is zero. Otherwise, it is the
- * decompressed frame size in bytes.
- *
- * @warning You must set frame_size_ptr to the allocated size of the
- * output buffer before calling avcodec_decode_audio3().
- *
- * @warning The input buffer must be FF_INPUT_BUFFER_PADDING_SIZE larger than
- * the actual read bytes because some optimized bitstream readers read 32 or 64
- * bits at once and could read over the end.
- *
- * @warning The end of the input buffer avpkt->data should be set to 0 to ensure that
- * no overreading happens for damaged MPEG streams.
- *
- * @warning You must not provide a custom get_buffer() when using
- * avcodec_decode_audio3().  Doing so will override it with
- * avcodec_default_get_buffer.  Use avcodec_decode_audio4() instead,
- * which does allow the application to provide a custom get_buffer().
- *
- * @note You might have to align the input buffer avpkt->data and output buffer
- * samples. The alignment requirements depend on the CPU: On some CPUs it isn't
- * necessary at all, on others it won't work at all if not aligned and on others
- * it will work but it will have an impact on performance.
- *
- * In practice, avpkt->data should have 4 byte alignment at minimum and
- * samples should be 16 byte aligned unless the CPU doesn't need it
- * (AltiVec and SSE do).
- *
- * @note Codecs which have the CODEC_CAP_DELAY capability set have a delay
- * between input and output, these need to be fed with avpkt->data=NULL,
- * avpkt->size=0 at the end to return the remaining frames.
- *
- * @param avctx the codec context
- * @param[out] samples the output buffer, sample type in avctx->sample_fmt
- *                     If the sample format is planar, each channel plane will
- *                     be the same size, with no padding between channels.
- * @param[in,out] frame_size_ptr the output buffer size in bytes
- * @param[in] avpkt The input AVPacket containing the input buffer.
- *            You can create such packet with av_init_packet() and by then setting
- *            data and size, some decoders might in addition need other fields.
- *            All decoders are designed to use the least fields possible though.
- * @return On error a negative value is returned, otherwise the number of bytes
- * used or zero if no frame data was decompressed (used) from the input AVPacket.
- */
-attribute_deprecated int avcodec_decode_audio3(AVCodecContext *avctx, int16_t *samples,
-                         int *frame_size_ptr,
-                         AVPacket *avpkt);
-#endif
-
 /**
  * Decode the audio frame of size avpkt->size from avpkt->data into frame.
  *
@@ -4111,15 +4285,15 @@ attribute_deprecated int avcodec_decode_audio3(AVCodecContext *avctx, int16_t *s
  * needs to be fed to the decoder with remaining data until it is completely
  * consumed or an error occurs.
  *
- * Some decoders (those marked with CODEC_CAP_DELAY) have a delay between input
+ * Some decoders (those marked with AV_CODEC_CAP_DELAY) have a delay between input
  * and output. This means that for some packets they will not immediately
  * produce decoded output and need to be flushed at the end of decoding to get
  * all the decoded data. Flushing is done by calling this function with packets
  * with avpkt->data set to NULL and avpkt->size set to 0 until it stops
  * returning samples. It is safe to flush even those decoders that are not
- * marked with CODEC_CAP_DELAY, then no samples will be returned.
+ * marked with AV_CODEC_CAP_DELAY, then no samples will be returned.
  *
- * @warning The input buffer, avpkt->data must be FF_INPUT_BUFFER_PADDING_SIZE
+ * @warning The input buffer, avpkt->data must be AV_INPUT_BUFFER_PADDING_SIZE
  *          larger than the actual read bytes because some optimized bitstream
  *          readers read 32 or 64 bits at once and could read over the end.
  *
@@ -4142,7 +4316,7 @@ attribute_deprecated int avcodec_decode_audio3(AVCodecContext *avctx, int16_t *s
  * @param[out] got_frame_ptr Zero if no frame could be decoded, otherwise it is
  *                           non-zero. Note that this field being set to zero
  *                           does not mean that an error has occurred. For
- *                           decoders with CODEC_CAP_DELAY set, no given decode
+ *                           decoders with AV_CODEC_CAP_DELAY set, no given decode
  *                           call is guaranteed to produce a frame.
  * @param[in]  avpkt The input AVPacket containing the input buffer.
  *                   At least avpkt->data and avpkt->size should be set. Some
@@ -4159,14 +4333,14 @@ int avcodec_decode_audio4(AVCodecContext *avctx, AVFrame *frame,
  * Some decoders may support multiple frames in a single AVPacket, such
  * decoders would then just decode the first frame.
  *
- * @warning The input buffer must be FF_INPUT_BUFFER_PADDING_SIZE larger than
+ * @warning The input buffer must be AV_INPUT_BUFFER_PADDING_SIZE larger than
  * the actual read bytes because some optimized bitstream readers read 32 or 64
  * bits at once and could read over the end.
  *
  * @warning The end of the input buffer buf should be set to 0 to ensure that
  * no overreading happens for damaged MPEG streams.
  *
- * @note Codecs which have the CODEC_CAP_DELAY capability set have a delay
+ * @note Codecs which have the AV_CODEC_CAP_DELAY capability set have a delay
  * between input and output, these need to be fed with avpkt->data=NULL,
  * avpkt->size=0 at the end to return the remaining frames.
  *
@@ -4206,7 +4380,7 @@ int avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture,
  * Return a negative value on error, otherwise return the number of bytes used.
  * If no subtitle could be decompressed, got_sub_ptr is zero.
  * Otherwise, the subtitle is stored in *sub.
- * Note that CODEC_CAP_DR1 is not available for subtitle codecs. This is for
+ * Note that AV_CODEC_CAP_DR1 is not available for subtitle codecs. This is for
  * simplicity, because the performance difference is expect to be negligible
  * and reusing a get_buffer written for video codecs would probably perform badly
  * due to a potentially very different allocation pattern.
@@ -4295,24 +4469,13 @@ typedef struct AVCodecParserContext {
      */
     int key_frame;
 
+#if FF_API_CONVERGENCE_DURATION
     /**
-     * Time difference in stream time base units from the pts of this
-     * packet to the point at which the output from the decoder has converged
-     * independent from the availability of previous frames. That is, the
-     * frames are virtually identical no matter if decoding started from
-     * the very first frame or from this keyframe.
-     * Is AV_NOPTS_VALUE if unknown.
-     * This field is not the display duration of the current frame.
-     * This field has no meaning if the packet does not have AV_PKT_FLAG_KEY
-     * set.
-     *
-     * The purpose of this field is to allow seeking in streams that have no
-     * keyframes in the conventional sense. It corresponds to the
-     * recovery point SEI in H.264 and match_time_delta in NUT. It is also
-     * essential for some types of subtitle streams to ensure that all
-     * subtitles are correctly displayed after seeking.
+     * @deprecated unused
      */
+    attribute_deprecated
     int64_t convergence_duration;
+#endif
 
     // Timestamp generation support:
     /**
@@ -4513,36 +4676,6 @@ AVCodec *avcodec_find_encoder(enum AVCodecID id);
  */
 AVCodec *avcodec_find_encoder_by_name(const char *name);
 
-#if FF_API_OLD_ENCODE_AUDIO
-/**
- * Encode an audio frame from samples into buf.
- *
- * @deprecated Use avcodec_encode_audio2 instead.
- *
- * @note The output buffer should be at least FF_MIN_BUFFER_SIZE bytes large.
- * However, for codecs with avctx->frame_size equal to 0 (e.g. PCM) the user
- * will know how much space is needed because it depends on the value passed
- * in buf_size as described below. In that case a lower value can be used.
- *
- * @param avctx the codec context
- * @param[out] buf the output buffer
- * @param[in] buf_size the output buffer size
- * @param[in] samples the input buffer containing the samples
- * The number of samples read from this buffer is frame_size*channels,
- * both of which are defined in avctx.
- * For codecs which have avctx->frame_size equal to 0 (e.g. PCM) the number of
- * samples read from samples is equal to:
- * buf_size * 8 / (avctx->channels * av_get_bits_per_sample(avctx->codec_id))
- * This also implies that av_get_bits_per_sample() must not return 0 for these
- * codecs.
- * @return On error a negative value is returned, on success zero or the number
- * of bytes used to encode the data read from the input buffer.
- */
-int attribute_deprecated avcodec_encode_audio(AVCodecContext *avctx,
-                                              uint8_t *buf, int buf_size,
-                                              const short *samples);
-#endif
-
 /**
  * Encode a frame of audio.
  *
@@ -4564,12 +4697,11 @@ int attribute_deprecated avcodec_encode_audio(AVCodecContext *avctx,
  *                  of the output packet.
  *
  *                  If this function fails or produces no output, avpkt will be
- *                  freed using av_free_packet() (i.e. avpkt->destruct will be
- *                  called to free the user supplied buffer).
+ *                  freed using av_packet_unref().
  * @param[in] frame AVFrame containing the raw audio data to be encoded.
  *                  May be NULL when flushing an encoder that has the
- *                  CODEC_CAP_DELAY capability set.
- *                  If CODEC_CAP_VARIABLE_FRAME_SIZE is set, then each frame
+ *                  AV_CODEC_CAP_DELAY capability set.
+ *                  If AV_CODEC_CAP_VARIABLE_FRAME_SIZE is set, then each frame
  *                  can have any number of samples.
  *                  If it is not set, frame->nb_samples must be equal to
  *                  avctx->frame_size for all frames except the last.
@@ -4585,26 +4717,6 @@ int attribute_deprecated avcodec_encode_audio(AVCodecContext *avctx,
 int avcodec_encode_audio2(AVCodecContext *avctx, AVPacket *avpkt,
                           const AVFrame *frame, int *got_packet_ptr);
 
-#if FF_API_OLD_ENCODE_VIDEO
-/**
- * @deprecated use avcodec_encode_video2() instead.
- *
- * Encode a video frame from pict into buf.
- * The input picture should be
- * stored using a specific format, namely avctx.pix_fmt.
- *
- * @param avctx the codec context
- * @param[out] buf the output buffer for the bitstream of encoded frame
- * @param[in] buf_size the size of the output buffer in bytes
- * @param[in] pict the input picture to encode
- * @return On error a negative value is returned, on success zero or the number
- * of bytes used from the output buffer.
- */
-attribute_deprecated
-int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
-                         const AVFrame *pict);
-#endif
-
 /**
  * Encode a frame of video.
  *
@@ -4626,11 +4738,10 @@ int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
  *                  caller, he is responsible for freeing it.
  *
  *                  If this function fails or produces no output, avpkt will be
- *                  freed using av_free_packet() (i.e. avpkt->destruct will be
- *                  called to free the user supplied buffer).
+ *                  freed using av_packet_unref().
  * @param[in] frame AVFrame containing the raw video data to be encoded.
  *                  May be NULL when flushing an encoder that has the
- *                  CODEC_CAP_DELAY capability set.
+ *                  AV_CODEC_CAP_DELAY capability set.
  * @param[out] got_packet_ptr This field is set to 1 by libavcodec if the
  *                            output packet is non-empty, and to 0 if it is
  *                            empty. If the function returns an error, the
@@ -4747,129 +4858,70 @@ void av_resample_close(struct AVResampleContext *c);
  */
 #endif
 
+#if FF_API_AVPICTURE
 /**
  * @addtogroup lavc_picture
  * @{
  */
 
 /**
- * Allocate memory for the pixels of a picture and setup the AVPicture
- * fields for it.
- *
- * Call avpicture_free() to free it.
- *
- * @param picture            the picture structure to be filled in
- * @param pix_fmt            the pixel format of the picture
- * @param width              the width of the picture
- * @param height             the height of the picture
- * @return zero if successful, a negative error code otherwise
- *
- * @see av_image_alloc(), avpicture_fill()
+ * @deprecated unused
  */
+attribute_deprecated
 int avpicture_alloc(AVPicture *picture, enum AVPixelFormat pix_fmt, int width, int height);
 
 /**
- * Free a picture previously allocated by avpicture_alloc().
- * The data buffer used by the AVPicture is freed, but the AVPicture structure
- * itself is not.
- *
- * @param picture the AVPicture to be freed
+ * @deprecated unused
  */
+attribute_deprecated
 void avpicture_free(AVPicture *picture);
 
 /**
- * Setup the picture fields based on the specified image parameters
- * and the provided image data buffer.
- *
- * The picture fields are filled in by using the image data buffer
- * pointed to by ptr.
- *
- * If ptr is NULL, the function will fill only the picture linesize
- * array and return the required size for the image buffer.
- *
- * To allocate an image buffer and fill the picture data in one call,
- * use avpicture_alloc().
- *
- * @param picture       the picture to be filled in
- * @param ptr           buffer where the image data is stored, or NULL
- * @param pix_fmt       the pixel format of the image
- * @param width         the width of the image in pixels
- * @param height        the height of the image in pixels
- * @return the size in bytes required for src, a negative error code
- * in case of failure
- *
- * @see av_image_fill_arrays()
+ * @deprecated use av_image_fill_arrays() instead.
  */
+attribute_deprecated
 int avpicture_fill(AVPicture *picture, const uint8_t *ptr,
                    enum AVPixelFormat pix_fmt, int width, int height);
 
 /**
- * Copy pixel data from an AVPicture into a buffer.
- *
- * avpicture_get_size() can be used to compute the required size for
- * the buffer to fill.
- *
- * @param src        source picture with filled data
- * @param pix_fmt    picture pixel format
- * @param width      picture width
- * @param height     picture height
- * @param dest       destination buffer
- * @param dest_size  destination buffer size in bytes
- * @return the number of bytes written to dest, or a negative value
- * (error code) on error, for example if the destination buffer is not
- * big enough
- *
- * @see av_image_copy_to_buffer()
+ * @deprecated use av_image_copy_to_buffer() instead.
  */
+attribute_deprecated
 int avpicture_layout(const AVPicture *src, enum AVPixelFormat pix_fmt,
                      int width, int height,
                      unsigned char *dest, int dest_size);
 
 /**
- * Calculate the size in bytes that a picture of the given width and height
- * would occupy if stored in the given picture format.
- *
- * @param pix_fmt    picture pixel format
- * @param width      picture width
- * @param height     picture height
- * @return the computed picture buffer size or a negative error code
- * in case of error
- *
- * @see av_image_get_buffer_size().
+ * @deprecated use av_image_get_buffer_size() instead.
  */
+attribute_deprecated
 int avpicture_get_size(enum AVPixelFormat pix_fmt, int width, int height);
 
-#if FF_API_DEINTERLACE
 /**
- *  deinterlace - if not supported return -1
- *
- * @deprecated - use yadif (in libavfilter) instead
+ * @deprecated av_image_copy() instead.
  */
 attribute_deprecated
-int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
-                          enum AVPixelFormat pix_fmt, int width, int height);
-#endif
-/**
- * Copy image src to dst. Wraps av_image_copy().
- */
 void av_picture_copy(AVPicture *dst, const AVPicture *src,
                      enum AVPixelFormat pix_fmt, int width, int height);
 
 /**
- * Crop image top and left side.
+ * @deprecated unused
  */
+attribute_deprecated
 int av_picture_crop(AVPicture *dst, const AVPicture *src,
                     enum AVPixelFormat pix_fmt, int top_band, int left_band);
 
 /**
- * Pad image.
+ * @deprecated unused
  */
+attribute_deprecated
 int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width, enum AVPixelFormat pix_fmt,
             int padtop, int padbottom, int padleft, int padright, int *color);
 
 /**
  * @}
  */
+#endif
 
 /**
  * @defgroup lavc_misc Utility functions
@@ -4991,6 +5043,19 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode);
  */
 const char *av_get_profile_name(const AVCodec *codec, int profile);
 
+/**
+ * Return a name for the specified profile, if available.
+ *
+ * @param codec_id the ID of the codec to which the requested profile belongs
+ * @param profile the profile value for which a name is requested
+ * @return A name for the profile if found, NULL otherwise.
+ *
+ * @note unlike av_get_profile_name(), which searches a list of profiles
+ *       supported by a specific decoder or encoder implementation, this
+ *       function searches the list of profiles from the AVCodecDescriptor
+ */
+const char *avcodec_profile_name(enum AVCodecID codec_id, int profile);
+
 int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size);
 int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2, int, int),void *arg, int *ret, int count);
 //FIXME func typedef
@@ -5075,6 +5140,11 @@ typedef struct AVBitStreamFilterContext {
     struct AVBitStreamFilter *filter;
     AVCodecParserContext *parser;
     struct AVBitStreamFilterContext *next;
+    /**
+     * Internal default arguments, used if NULL is passed to av_bitstream_filter_filter().
+     * Not for access by library users.
+     */
+    char *args;
 } AVBitStreamFilterContext;
 
 
@@ -5167,7 +5237,7 @@ AVBitStreamFilter *av_bitstream_filter_next(const AVBitStreamFilter *f);
 
 /**
  * Same behaviour av_fast_malloc but the buffer has additional
- * FF_INPUT_BUFFER_PADDING_SIZE at the end which will always be 0.
+ * AV_INPUT_BUFFER_PADDING_SIZE at the end which will always be 0.
  *
  * In addition the whole buffer will initially and after resizes
  * be 0-initialized so that no uninitialized data will ever appear.
@@ -5314,6 +5384,17 @@ const AVCodecDescriptor *avcodec_descriptor_next(const AVCodecDescriptor *prev);
  */
 const AVCodecDescriptor *avcodec_descriptor_get_by_name(const char *name);
 
+/**
+ * Allocate a CPB properties structure and initialize its fields to default
+ * values.
+ *
+ * @param size if non-NULL, the size of the allocated struct will be written
+ *             here. This is useful for embedding it in side data.
+ *
+ * @return the newly allocated struct or NULL on failure
+ */
+AVCPBProperties *av_cpb_properties_alloc(size_t *size);
+
 /**
  * @}
  */
diff --git a/libavcodec/avdct.c b/libavcodec/avdct.c
index f92c691a..80aca887 100644
--- a/libavcodec/avdct.c
+++ b/libavcodec/avdct.c
@@ -34,7 +34,7 @@
 
 static const AVOption avdct_options[] = {
 {"dct", "DCT algorithm", OFFSET(dct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E, "dct"},
-{"auto", "autoselect a good one (default)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_AUTO }, INT_MIN, INT_MAX, V|E, "dct"},
+{"auto", "autoselect a good one", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_AUTO }, INT_MIN, INT_MAX, V|E, "dct"},
 {"fastint", "fast integer (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FASTINT }, INT_MIN, INT_MAX, V|E, "dct"},
 {"int", "accurate integer", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_INT }, INT_MIN, INT_MAX, V|E, "dct"},
 {"mmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_MMX }, INT_MIN, INT_MAX, V|E, "dct"},
@@ -42,7 +42,7 @@ static const AVOption avdct_options[] = {
 {"faan", "floating point AAN DCT (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FAAN }, INT_MIN, INT_MAX, V|E, "dct"},
 
 {"idct", "select IDCT implementation", OFFSET(idct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E|D, "idct"},
-{"auto", "autoselect a good one (default)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_AUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"auto", "autoselect a good one", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_AUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"int", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_INT }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"simple", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLE }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"simplemmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
@@ -58,7 +58,9 @@ static const AVOption avdct_options[] = {
 #if FF_API_ARCH_ALPHA
 {"simplealpha", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEALPHA }, INT_MIN, INT_MAX, V|E|D, "idct"},
 #endif
+#if FF_API_UNUSED_MEMBERS
 {"ipp", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_IPP }, INT_MIN, INT_MAX, V|E|D, "idct"},
+#endif
 {"xvid", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"xvidmmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVID }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"faani", "floating point AAN IDCT (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"},
diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
index aae67c5a..b2079f61 100644
--- a/libavcodec/avpacket.c
+++ b/libavcodec/avpacket.c
@@ -30,52 +30,55 @@
 #include "bytestream.h"
 #include "internal.h"
 
-#if FF_API_DESTRUCT_PACKET
-
-void av_destruct_packet(AVPacket *pkt)
-{
-    av_freep(&pkt->data);
-    pkt->size = 0;
-}
-
-/* a dummy destruct callback for the callers that assume AVPacket.destruct ==
- * NULL => static data */
-static void dummy_destruct_packet(AVPacket *pkt)
-{
-    av_assert0(0);
-}
-#endif
-
 void av_init_packet(AVPacket *pkt)
 {
     pkt->pts                  = AV_NOPTS_VALUE;
     pkt->dts                  = AV_NOPTS_VALUE;
     pkt->pos                  = -1;
     pkt->duration             = 0;
-    pkt->convergence_duration = 0;
-    pkt->flags                = 0;
-    pkt->stream_index         = 0;
-#if FF_API_DESTRUCT_PACKET
+#if FF_API_CONVERGENCE_DURATION
 FF_DISABLE_DEPRECATION_WARNINGS
-    pkt->destruct             = NULL;
+    pkt->convergence_duration = 0;
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
+    pkt->flags                = 0;
+    pkt->stream_index         = 0;
     pkt->buf                  = NULL;
     pkt->side_data            = NULL;
     pkt->side_data_elems      = 0;
 }
 
+AVPacket *av_packet_alloc(void)
+{
+    AVPacket *pkt = av_mallocz(sizeof(AVPacket));
+    if (!pkt)
+        return pkt;
+
+    av_packet_unref(pkt);
+
+    return pkt;
+}
+
+void av_packet_free(AVPacket **pkt)
+{
+    if (!pkt || !*pkt)
+        return;
+
+    av_packet_unref(*pkt);
+    av_freep(pkt);
+}
+
 static int packet_alloc(AVBufferRef **buf, int size)
 {
     int ret;
-    if ((unsigned)size >= (unsigned)size + FF_INPUT_BUFFER_PADDING_SIZE)
+    if (size < 0 || size >= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
         return AVERROR(EINVAL);
 
-    ret = av_buffer_realloc(buf, size + FF_INPUT_BUFFER_PADDING_SIZE);
+    ret = av_buffer_realloc(buf, size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (ret < 0)
         return ret;
 
-    memset((*buf)->data + size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset((*buf)->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
     return 0;
 }
@@ -91,11 +94,6 @@ int av_new_packet(AVPacket *pkt, int size)
     pkt->buf      = buf;
     pkt->data     = buf->data;
     pkt->size     = size;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-    pkt->destruct = dummy_destruct_packet;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
 
     return 0;
 }
@@ -105,20 +103,20 @@ void av_shrink_packet(AVPacket *pkt, int size)
     if (pkt->size <= size)
         return;
     pkt->size = size;
-    memset(pkt->data + size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(pkt->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 }
 
 int av_grow_packet(AVPacket *pkt, int grow_by)
 {
     int new_size;
-    av_assert0((unsigned)pkt->size <= INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE);
+    av_assert0((unsigned)pkt->size <= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE);
     if (!pkt->size)
         return av_new_packet(pkt, grow_by);
     if ((unsigned)grow_by >
-        INT_MAX - (pkt->size + FF_INPUT_BUFFER_PADDING_SIZE))
+        INT_MAX - (pkt->size + AV_INPUT_BUFFER_PADDING_SIZE))
         return -1;
 
-    new_size = pkt->size + grow_by + FF_INPUT_BUFFER_PADDING_SIZE;
+    new_size = pkt->size + grow_by + AV_INPUT_BUFFER_PADDING_SIZE;
     if (pkt->buf) {
         int ret = av_buffer_realloc(&pkt->buf, new_size);
         if (ret < 0)
@@ -128,40 +126,32 @@ int av_grow_packet(AVPacket *pkt, int grow_by)
         if (!pkt->buf)
             return AVERROR(ENOMEM);
         memcpy(pkt->buf->data, pkt->data, FFMIN(pkt->size, pkt->size + grow_by));
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-        pkt->destruct = dummy_destruct_packet;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
     }
     pkt->data  = pkt->buf->data;
     pkt->size += grow_by;
-    memset(pkt->data + pkt->size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(pkt->data + pkt->size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
     return 0;
 }
 
 int av_packet_from_data(AVPacket *pkt, uint8_t *data, int size)
 {
-    if (size >= INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE)
+    if (size >= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
         return AVERROR(EINVAL);
 
-    pkt->buf = av_buffer_create(data, size + FF_INPUT_BUFFER_PADDING_SIZE,
+    pkt->buf = av_buffer_create(data, size + AV_INPUT_BUFFER_PADDING_SIZE,
                                 av_buffer_default_free, NULL, 0);
     if (!pkt->buf)
         return AVERROR(ENOMEM);
 
     pkt->data = data;
     pkt->size = size;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-    pkt->destruct = dummy_destruct_packet;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
 
     return 0;
 }
 
+#if FF_API_AVPACKET_OLD_API
+FF_DISABLE_DEPRECATION_WARNINGS
 #define ALLOC_MALLOC(data, size) data = av_malloc(size)
 #define ALLOC_BUF(data, size)                \
 do {                                         \
@@ -174,9 +164,9 @@ do {                                         \
         void *data;                                                     \
         if (padding) {                                                  \
             if ((unsigned)(size) >                                      \
-                (unsigned)(size) + FF_INPUT_BUFFER_PADDING_SIZE)        \
+                (unsigned)(size) + AV_INPUT_BUFFER_PADDING_SIZE)        \
                 goto failed_alloc;                                      \
-            ALLOC(data, size + FF_INPUT_BUFFER_PADDING_SIZE);           \
+            ALLOC(data, size + AV_INPUT_BUFFER_PADDING_SIZE);           \
         } else {                                                        \
             ALLOC(data, size);                                          \
         }                                                               \
@@ -185,7 +175,7 @@ do {                                         \
         memcpy(data, src, size);                                        \
         if (padding)                                                    \
             memset((uint8_t *)data + size, 0,                           \
-                   FF_INPUT_BUFFER_PADDING_SIZE);                       \
+                   AV_INPUT_BUFFER_PADDING_SIZE);                       \
         dst = data;                                                     \
     } while (0)
 
@@ -203,11 +193,6 @@ static int copy_packet_data(AVPacket *pkt, const AVPacket *src, int dup)
     } else {
         DUP_DATA(pkt->data, src->data, pkt->size, 1, ALLOC_BUF);
     }
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-    pkt->destruct = dummy_destruct_packet;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
     if (pkt->side_data_elems && dup)
         pkt->side_data = src->side_data;
     if (pkt->side_data_elems && !dup) {
@@ -216,7 +201,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
     return 0;
 
 failed_alloc:
-    av_free_packet(pkt);
+    av_packet_unref(pkt);
     return AVERROR(ENOMEM);
 }
 
@@ -241,21 +226,17 @@ int av_copy_packet_side_data(AVPacket *pkt, const AVPacket *src)
     return 0;
 
 failed_alloc:
-    av_free_packet(pkt);
+    av_packet_unref(pkt);
     return AVERROR(ENOMEM);
 }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
 int av_dup_packet(AVPacket *pkt)
 {
     AVPacket tmp_pkt;
 
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (!pkt->buf && pkt->data
-#if FF_API_DESTRUCT_PACKET
-        && !pkt->destruct
-#endif
-        ) {
-FF_ENABLE_DEPRECATION_WARNINGS
+    if (!pkt->buf && pkt->data) {
         tmp_pkt = *pkt;
         return copy_packet_data(pkt, &tmp_pkt, 1);
     }
@@ -277,48 +258,63 @@ void av_packet_free_side_data(AVPacket *pkt)
     pkt->side_data_elems = 0;
 }
 
+#if FF_API_AVPACKET_OLD_API
+FF_DISABLE_DEPRECATION_WARNINGS
 void av_free_packet(AVPacket *pkt)
 {
     if (pkt) {
-FF_DISABLE_DEPRECATION_WARNINGS
         if (pkt->buf)
             av_buffer_unref(&pkt->buf);
-#if FF_API_DESTRUCT_PACKET
-        else if (pkt->destruct)
-            pkt->destruct(pkt);
-        pkt->destruct = NULL;
-#endif
-FF_ENABLE_DEPRECATION_WARNINGS
         pkt->data            = NULL;
         pkt->size            = 0;
 
         av_packet_free_side_data(pkt);
     }
 }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
-uint8_t *av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
-                                 int size)
+int av_packet_add_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                            uint8_t *data, size_t size)
 {
     int elems = pkt->side_data_elems;
 
     if ((unsigned)elems + 1 > INT_MAX / sizeof(*pkt->side_data))
-        return NULL;
-    if ((unsigned)size > INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE)
-        return NULL;
+        return AVERROR(ERANGE);
 
     pkt->side_data = av_realloc(pkt->side_data,
                                 (elems + 1) * sizeof(*pkt->side_data));
     if (!pkt->side_data)
-        return NULL;
+        return AVERROR(ENOMEM);
 
-    pkt->side_data[elems].data = av_mallocz(size + FF_INPUT_BUFFER_PADDING_SIZE);
-    if (!pkt->side_data[elems].data)
-        return NULL;
+    pkt->side_data[elems].data = data;
     pkt->side_data[elems].size = size;
     pkt->side_data[elems].type = type;
     pkt->side_data_elems++;
 
-    return pkt->side_data[elems].data;
+    return 0;
+}
+
+
+uint8_t *av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                                 int size)
+{
+    int ret;
+    uint8_t *data;
+
+    if ((unsigned)size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
+        return NULL;
+    data = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!data)
+        return NULL;
+
+    ret = av_packet_add_side_data(pkt, type, data, size);
+    if (ret < 0) {
+        av_freep(&data);
+        return NULL;
+    }
+
+    return data;
 }
 
 uint8_t *av_packet_get_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
@@ -366,7 +362,7 @@ int av_packet_merge_side_data(AVPacket *pkt){
         AVBufferRef *buf;
         int i;
         uint8_t *p;
-        uint64_t size= pkt->size + 8LL + FF_INPUT_BUFFER_PADDING_SIZE;
+        uint64_t size= pkt->size + 8LL + AV_INPUT_BUFFER_PADDING_SIZE;
         AVPacket old= *pkt;
         for (i=0; i<old.side_data_elems; i++) {
             size += old.side_data[i].size + 5LL;
@@ -378,12 +374,7 @@ int av_packet_merge_side_data(AVPacket *pkt){
             return AVERROR(ENOMEM);
         pkt->buf = buf;
         pkt->data = p = buf->data;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-        pkt->destruct = dummy_destruct_packet;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-        pkt->size = size - FF_INPUT_BUFFER_PADDING_SIZE;
+        pkt->size = size - AV_INPUT_BUFFER_PADDING_SIZE;
         bytestream_put_buffer(&p, old.data, old.size);
         for (i=old.side_data_elems-1; i>=0; i--) {
             bytestream_put_buffer(&p, old.side_data[i].data, old.side_data[i].size);
@@ -392,8 +383,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
         }
         bytestream_put_be64(&p, FF_MERGE_MARKER);
         av_assert0(p-pkt->data == pkt->size);
-        memset(p, 0, FF_INPUT_BUFFER_PADDING_SIZE);
-        av_free_packet(&old);
+        memset(p, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+        av_packet_unref(&old);
         pkt->side_data_elems = 0;
         pkt->side_data = NULL;
         return 1;
@@ -410,10 +401,12 @@ int av_packet_split_side_data(AVPacket *pkt){
         p = pkt->data + pkt->size - 8 - 5;
         for (i=1; ; i++){
             size = AV_RB32(p);
-            if (size>INT_MAX || p - pkt->data < size)
+            if (size>INT_MAX - 5 || p - pkt->data < size)
                 return 0;
             if (p[4]&128)
                 break;
+            if (p - pkt->data < size + 5)
+                return 0;
             p-= size+5;
         }
 
@@ -424,8 +417,8 @@ int av_packet_split_side_data(AVPacket *pkt){
         p= pkt->data + pkt->size - 8 - 5;
         for (i=0; ; i++){
             size= AV_RB32(p);
-            av_assert0(size<=INT_MAX && p - pkt->data >= size);
-            pkt->side_data[i].data = av_mallocz(size + FF_INPUT_BUFFER_PADDING_SIZE);
+            av_assert0(size<=INT_MAX - 5 && p - pkt->data >= size);
+            pkt->side_data[i].data = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
             pkt->side_data[i].size = size;
             pkt->side_data[i].type = p[4]&127;
             if (!pkt->side_data[i].data)
@@ -527,7 +520,11 @@ int av_packet_copy_props(AVPacket *dst, const AVPacket *src)
     dst->dts                  = src->dts;
     dst->pos                  = src->pos;
     dst->duration             = src->duration;
+#if FF_API_CONVERGENCE_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
     dst->convergence_duration = src->convergence_duration;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     dst->flags                = src->flags;
     dst->stream_index         = src->stream_index;
 
@@ -585,6 +582,19 @@ int av_packet_ref(AVPacket *dst, const AVPacket *src)
     return ret;
 }
 
+AVPacket *av_packet_clone(AVPacket *src)
+{
+    AVPacket *ret = av_packet_alloc();
+
+    if (!ret)
+        return ret;
+
+    if (av_packet_ref(ret, src))
+        av_packet_free(&ret);
+
+    return ret;
+}
+
 void av_packet_move_ref(AVPacket *dst, AVPacket *src)
 {
     *dst = *src;
@@ -599,6 +609,35 @@ void av_packet_rescale_ts(AVPacket *pkt, AVRational src_tb, AVRational dst_tb)
         pkt->dts = av_rescale_q(pkt->dts, src_tb, dst_tb);
     if (pkt->duration > 0)
         pkt->duration = av_rescale_q(pkt->duration, src_tb, dst_tb);
+#if FF_API_CONVERGENCE_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
     if (pkt->convergence_duration > 0)
         pkt->convergence_duration = av_rescale_q(pkt->convergence_duration, src_tb, dst_tb);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+}
+
+int ff_side_data_set_encoder_stats(AVPacket *pkt, int quality, int64_t *error, int error_count, int pict_type)
+{
+    uint8_t *side_data;
+    int side_data_size;
+    int i;
+
+    side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_QUALITY_STATS, &side_data_size);
+    if (!side_data) {
+        side_data_size = 4+4+8*error_count;
+        side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_STATS,
+                                            side_data_size);
+    }
+
+    if (!side_data || side_data_size < 4+4+8*error_count)
+        return AVERROR(ENOMEM);
+
+    AV_WL32(side_data   , quality  );
+    side_data[4] = pict_type;
+    side_data[5] = error_count;
+    for (i = 0; i<error_count; i++)
+        AV_WL64(side_data+8 + 8*i , error[i]);
+
+    return 0;
 }
diff --git a/libavcodec/avpicture.c b/libavcodec/avpicture.c
index 0484dc3f..56435f4f 100644
--- a/libavcodec/avpicture.c
+++ b/libavcodec/avpicture.c
@@ -29,8 +29,11 @@
 #include "libavutil/common.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
 #include "libavutil/colorspace.h"
 
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
 int avpicture_fill(AVPicture *picture, const uint8_t *ptr,
                    enum AVPixelFormat pix_fmt, int width, int height)
 {
@@ -75,4 +78,5 @@ void av_picture_copy(AVPicture *dst, const AVPicture *src,
     av_image_copy(dst->data, dst->linesize, (const uint8_t **)src->data,
                   src->linesize, pix_fmt, width, height);
 }
-
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif /* FF_API_AVPICTURE */
diff --git a/libavcodec/avrndec.c b/libavcodec/avrndec.c
index 7a50a5c3..695c4912 100644
--- a/libavcodec/avrndec.c
+++ b/libavcodec/avrndec.c
@@ -26,7 +26,7 @@
 #include "libavutil/imgutils.h"
 
 typedef struct {
-    MJpegDecodeContext mjpeg_ctx;
+    AVCodecContext *mjpeg_avctx;
     int is_mjpeg;
     int interlace; //FIXME use frame.interlaced_frame
     int tff;
@@ -45,8 +45,31 @@ static av_cold int init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
-    if(a->is_mjpeg)
-        return ff_mjpeg_decode_init(avctx);
+    if(a->is_mjpeg) {
+        AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_MJPEG);
+        AVDictionary *thread_opt = NULL;
+        if (!codec) {
+            av_log(avctx, AV_LOG_ERROR, "MJPEG codec not found\n");
+            return AVERROR_DECODER_NOT_FOUND;
+        }
+
+        a->mjpeg_avctx = avcodec_alloc_context3(codec);
+
+        av_dict_set(&thread_opt, "threads", "1", 0); // Is this needed ?
+        a->mjpeg_avctx->refcounted_frames = 1;
+        a->mjpeg_avctx->flags = avctx->flags;
+        a->mjpeg_avctx->idct_algo = avctx->idct_algo;
+        a->mjpeg_avctx->lowres = avctx->lowres;
+        a->mjpeg_avctx->width = avctx->width;
+        a->mjpeg_avctx->height = avctx->height;
+
+        if ((ret = ff_codec_open2_recursive(a->mjpeg_avctx, codec, &thread_opt)) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "MJPEG codec failed to open\n");
+        }
+        av_dict_free(&thread_opt);
+
+        return ret;
+    }
 
     if ((ret = av_image_check_size(avctx->width, avctx->height, 0, avctx)) < 0)
         return ret;
@@ -68,8 +91,8 @@ static av_cold int end(AVCodecContext *avctx)
 {
     AVRnContext *a = avctx->priv_data;
 
-    if(a->is_mjpeg)
-        ff_mjpeg_decode_end(avctx);
+    avcodec_close(a->mjpeg_avctx);
+    av_freep(&a->mjpeg_avctx);
 
     return 0;
 }
@@ -83,8 +106,27 @@ static int decode_frame(AVCodecContext *avctx, void *data,
     int buf_size       = avpkt->size;
     int y, ret, true_height;
 
-    if(a->is_mjpeg)
-        return ff_mjpeg_decode_frame(avctx, data, got_frame, avpkt);
+    if(a->is_mjpeg) {
+        ret = avcodec_decode_video2(a->mjpeg_avctx, data, got_frame, avpkt);
+
+        if (ret >= 0 && *got_frame && avctx->width <= p->width && avctx->height <= p->height) {
+            int shift = p->height - avctx->height;
+            int subsample_h, subsample_v;
+
+            av_pix_fmt_get_chroma_sub_sample(p->format, &subsample_h, &subsample_v);
+
+            p->data[0] += p->linesize[0] * shift;
+            if (p->data[2]) {
+                p->data[1] += p->linesize[1] * (shift>>subsample_v);
+                p->data[2] += p->linesize[2] * (shift>>subsample_v);
+            }
+
+            p->width  = avctx->width;
+            p->height = avctx->height;
+        }
+        avctx->pix_fmt = a->mjpeg_avctx->pix_fmt;
+        return ret;
+    }
 
     true_height    = buf_size / (2*avctx->width);
 
@@ -126,6 +168,6 @@ AVCodec ff_avrn_decoder = {
     .init           = init,
     .close          = end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
 };
diff --git a/libavcodec/avs.c b/libavcodec/avs.c
index 82056896..345d628d 100644
--- a/libavcodec/avs.c
+++ b/libavcodec/avs.c
@@ -185,5 +185,5 @@ AVCodec ff_avs_decoder = {
     .init           = avs_decode_init,
     .decode         = avs_decode_frame,
     .close          = avs_decode_end,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/avuidec.c b/libavcodec/avuidec.c
index 7fb644cc..5117844f 100644
--- a/libavcodec/avuidec.c
+++ b/libavcodec/avuidec.c
@@ -126,5 +126,5 @@ AVCodec ff_avui_decoder = {
     .id           = AV_CODEC_ID_AVUI,
     .init         = avui_decode_init,
     .decode       = avui_decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/avuienc.c b/libavcodec/avuienc.c
index b91f7829..b2199067 100644
--- a/libavcodec/avuienc.c
+++ b/libavcodec/avuienc.c
@@ -30,7 +30,7 @@ static av_cold int avui_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "Only 720x486 and 720x576 are supported.\n");
         return AVERROR(EINVAL);
     }
-    if (!(avctx->extradata = av_mallocz(144 + FF_INPUT_BUFFER_PADDING_SIZE)))
+    if (!(avctx->extradata = av_mallocz(144 + AV_INPUT_BUFFER_PADDING_SIZE)))
         return AVERROR(ENOMEM);
     avctx->extradata_size = 144;
     memcpy(avctx->extradata, "\0\0\0\x18""APRGAPRG0001", 16);
@@ -44,11 +44,6 @@ static av_cold int avui_encode_init(AVCodecContext *avctx)
     AV_WB32(avctx->extradata + 48, avctx->height);
     memcpy(avctx->extradata + 52, "\0\0\0\x1\0\0\0\x20\0\0\0\x2", 12);
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
-        return AVERROR(ENOMEM);
-    }
 
     return 0;
 }
@@ -67,16 +62,14 @@ static int avui_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         skip = 16;
     }
     size = 2 * avctx->width * (avctx->height + skip) + 8 * interlaced;
-    if ((ret = ff_alloc_packet2(avctx, pkt, size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, size, size)) < 0)
         return ret;
     dst = pkt->data;
     if (!interlaced) {
+        memset(dst, 0, avctx->width * skip);
         dst += avctx->width * skip;
     }
 
-    avctx->coded_frame->key_frame = 1;
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-
     for (i = 0; i <= interlaced; i++) {
         uint8_t *src;
         if (interlaced && avctx->height == 486) {
@@ -84,6 +77,7 @@ static int avui_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         } else {
             src = pic->data[0] + i * pic->linesize[0];
         }
+        memset(dst, 0, avctx->width * skip + 4 * i);
         dst += avctx->width * skip + 4 * i;
         for (j = 0; j < avctx->height; j += interlaced + 1) {
             memcpy(dst, src, avctx->width * 2);
@@ -97,13 +91,6 @@ static int avui_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
-static av_cold int avui_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-
-    return 0;
-}
-
 AVCodec ff_avui_encoder = {
     .name         = "avui",
     .long_name    = NULL_IF_CONFIG_SMALL("Avid Meridien Uncompressed"),
@@ -111,7 +98,6 @@ AVCodec ff_avui_encoder = {
     .id           = AV_CODEC_ID_AVUI,
     .init         = avui_encode_init,
     .encode2      = avui_encode_frame,
-    .close        = avui_encode_close,
-    .capabilities = CODEC_CAP_EXPERIMENTAL,
+    .capabilities = AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_INTRA_ONLY,
     .pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_UYVY422, AV_PIX_FMT_NONE },
 };
diff --git a/libavcodec/bethsoftvideo.c b/libavcodec/bethsoftvideo.c
index 37cd22eb..97b745d3 100644
--- a/libavcodec/bethsoftvideo.c
+++ b/libavcodec/bethsoftvideo.c
@@ -162,5 +162,5 @@ AVCodec ff_bethsoftvid_decoder = {
     .init           = bethsoftvid_decode_init,
     .close          = bethsoftvid_decode_end,
     .decode         = bethsoftvid_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/bfi.c b/libavcodec/bfi.c
index 77bca490..6727629b 100644
--- a/libavcodec/bfi.c
+++ b/libavcodec/bfi.c
@@ -184,5 +184,5 @@ AVCodec ff_bfi_decoder = {
     .init           = bfi_decode_init,
     .close          = bfi_decode_close,
     .decode         = bfi_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/bink.c b/libavcodec/bink.c
index c793fa79..131eaa16 100644
--- a/libavcodec/bink.c
+++ b/libavcodec/bink.c
@@ -1354,5 +1354,5 @@ AVCodec ff_bink_decoder = {
     .close          = decode_end,
     .decode         = decode_frame,
     .flush          = flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/binkaudio.c b/libavcodec/binkaudio.c
index f30ea123..5cc23311 100644
--- a/libavcodec/binkaudio.c
+++ b/libavcodec/binkaudio.c
@@ -302,10 +302,10 @@ static int decode_frame(AVCodecContext *avctx, void *data,
             av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
             return AVERROR_INVALIDDATA;
         }
-        buf = av_realloc(s->packet_buffer, avpkt->size + FF_INPUT_BUFFER_PADDING_SIZE);
+        buf = av_realloc(s->packet_buffer, avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!buf)
             return AVERROR(ENOMEM);
-        memset(buf + avpkt->size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+        memset(buf + avpkt->size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
         s->packet_buffer = buf;
         memcpy(s->packet_buffer, avpkt->data, avpkt->size);
         if ((ret = init_get_bits8(gb, s->packet_buffer, avpkt->size)) < 0)
@@ -343,7 +343,7 @@ AVCodec ff_binkaudio_rdft_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
 };
 
 AVCodec ff_binkaudio_dct_decoder = {
@@ -355,5 +355,5 @@ AVCodec ff_binkaudio_dct_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/bintext.c b/libavcodec/bintext.c
index 97fceb12..90bbe67b 100644
--- a/libavcodec/bintext.c
+++ b/libavcodec/bintext.c
@@ -227,7 +227,7 @@ AVCodec ff_bintext_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 #if CONFIG_XBIN_DECODER
@@ -240,7 +240,7 @@ AVCodec ff_xbin_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 #if CONFIG_IDF_DECODER
@@ -253,6 +253,6 @@ AVCodec ff_idf_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
diff --git a/libavcodec/bitstream.c b/libavcodec/bitstream.c
index 924cc519..1acb7a33 100644
--- a/libavcodec/bitstream.c
+++ b/libavcodec/bitstream.c
@@ -30,6 +30,7 @@
 
 #include "libavutil/atomic.h"
 #include "libavutil/avassert.h"
+#include "libavutil/qsort.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "mathops.h"
@@ -333,7 +334,7 @@ int ff_init_vlc_sparse(VLC *vlc_arg, int nb_bits, int nb_codes,
     }
     COPY(buf[j].bits > nb_bits);
     // qsort is the slowest part of init_vlc, and could probably be improved or avoided
-    qsort(buf, j, sizeof(VLCcode), compare_vlcspec);
+    AV_QSORT(buf, j, struct VLCcode, compare_vlcspec);
     COPY(buf[j].bits && buf[j].bits <= nb_bits);
     nb_codes = j;
 
diff --git a/libavcodec/bitstream_filter.c b/libavcodec/bitstream_filter.c
index a4e437df..fb690b68 100644
--- a/libavcodec/bitstream_filter.c
+++ b/libavcodec/bitstream_filter.c
@@ -73,6 +73,7 @@ void av_bitstream_filter_close(AVBitStreamFilterContext *bsfc)
     if (bsfc->filter->close)
         bsfc->filter->close(bsfc);
     av_freep(&bsfc->priv_data);
+    av_freep(&bsfc->args);
     av_parser_close(bsfc->parser);
     av_free(bsfc);
 }
@@ -84,6 +85,6 @@ int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc,
 {
     *poutbuf      = (uint8_t *)buf;
     *poutbuf_size = buf_size;
-    return bsfc->filter->filter(bsfc, avctx, args, poutbuf, poutbuf_size,
-                                buf, buf_size, keyframe);
+    return bsfc->filter->filter(bsfc, avctx, args ? args : bsfc->args,
+                                poutbuf, poutbuf_size, buf, buf_size, keyframe);
 }
diff --git a/libavcodec/blockdsp.c b/libavcodec/blockdsp.c
index f5259f63..a5c527a4 100644
--- a/libavcodec/blockdsp.c
+++ b/libavcodec/blockdsp.c
@@ -25,12 +25,12 @@
 #include "blockdsp.h"
 #include "version.h"
 
-static void clear_block_8_c(int16_t *block)
+static void clear_block_c(int16_t *block)
 {
     memset(block, 0, sizeof(int16_t) * 64);
 }
 
-static void clear_blocks_8_c(int16_t *blocks)
+static void clear_blocks_c(int16_t *blocks)
 {
     memset(blocks, 0, sizeof(int16_t) * 6 * 64);
 }
@@ -57,24 +57,20 @@ static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
 
 av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx)
 {
-    const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
-
-    c->clear_block  = clear_block_8_c;
-    c->clear_blocks = clear_blocks_8_c;
+    c->clear_block  = clear_block_c;
+    c->clear_blocks = clear_blocks_c;
 
     c->fill_block_tab[0] = fill_block16_c;
     c->fill_block_tab[1] = fill_block8_c;
 
     if (ARCH_ALPHA)
-        ff_blockdsp_init_alpha(c, high_bit_depth);
+        ff_blockdsp_init_alpha(c);
     if (ARCH_ARM)
-        ff_blockdsp_init_arm(c, high_bit_depth);
+        ff_blockdsp_init_arm(c);
     if (ARCH_PPC)
-        ff_blockdsp_init_ppc(c, high_bit_depth);
+        ff_blockdsp_init_ppc(c);
     if (ARCH_X86)
-#if FF_API_XVMC
-        ff_blockdsp_init_x86(c, high_bit_depth, avctx);
-#else
-        ff_blockdsp_init_x86(c, high_bit_depth);
-#endif /* FF_API_XVMC */
+        ff_blockdsp_init_x86(c, avctx);
+    if (ARCH_MIPS)
+        ff_blockdsp_init_mips(c);
 }
diff --git a/libavcodec/blockdsp.h b/libavcodec/blockdsp.h
index c7ad265d..95e1d0f3 100644
--- a/libavcodec/blockdsp.h
+++ b/libavcodec/blockdsp.h
@@ -40,14 +40,10 @@ typedef struct BlockDSPContext {
 
 void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx);
 
-void ff_blockdsp_init_alpha(BlockDSPContext *c, unsigned high_bit_depth);
-void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth);
-void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth);
-#if FF_API_XVMC
-void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
-                          AVCodecContext *avctx);
-#else
-void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth);
-#endif /* FF_API_XVMC */
+void ff_blockdsp_init_alpha(BlockDSPContext *c);
+void ff_blockdsp_init_arm(BlockDSPContext *c);
+void ff_blockdsp_init_ppc(BlockDSPContext *c);
+void ff_blockdsp_init_x86(BlockDSPContext *c, AVCodecContext *avctx);
+void ff_blockdsp_init_mips(BlockDSPContext *c);
 
 #endif /* AVCODEC_BLOCKDSP_H */
diff --git a/libavcodec/bmp.c b/libavcodec/bmp.c
index a35ed1ac..fa1d6a53 100644
--- a/libavcodec/bmp.c
+++ b/libavcodec/bmp.c
@@ -276,7 +276,7 @@ static int bmp_decode_frame(AVCodecContext *avctx,
             p->linesize[0] = -p->linesize[0];
         }
         bytestream2_init(&gb, buf, dsize);
-        ff_msrle_decode(avctx, (AVPicture*)p, depth, &gb);
+        ff_msrle_decode(avctx, p, depth, &gb);
         if (height < 0) {
             p->data[0]    +=  p->linesize[0] * (avctx->height - 1);
             p->linesize[0] = -p->linesize[0];
@@ -337,6 +337,20 @@ static int bmp_decode_frame(AVCodecContext *avctx,
             return AVERROR_INVALIDDATA;
         }
     }
+    if (avctx->pix_fmt == AV_PIX_FMT_BGRA) {
+        for (i = 0; i < avctx->height; i++) {
+            int j;
+            uint8_t *ptr = p->data[0] + p->linesize[0]*i + 3;
+            for (j = 0; j < avctx->width; j++) {
+                if (ptr[4*j])
+                    break;
+            }
+            if (j < avctx->width)
+                break;
+        }
+        if (i == avctx->height)
+            avctx->pix_fmt = p->format = AV_PIX_FMT_BGR0;
+    }
 
     *got_frame = 1;
 
@@ -349,5 +363,5 @@ AVCodec ff_bmp_decoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_BMP,
     .decode         = bmp_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/bmp_parser.c b/libavcodec/bmp_parser.c
index c9493dc3..7ab32a0b 100644
--- a/libavcodec/bmp_parser.c
+++ b/libavcodec/bmp_parser.c
@@ -63,7 +63,7 @@ static int bmp_parse(AVCodecParserContext *s, AVCodecContext *avctx,
                     continue;
                 }
                 bpc->pc.frame_start_found++;
-                bpc->remaining_size = bpc->fsize + i - 17;
+                bpc->remaining_size = bpc->fsize + FFMAX(i - 17, 0);
 
                 if (bpc->pc.index + i > 17) {
                     next = i - 17;
diff --git a/libavcodec/bmpenc.c b/libavcodec/bmpenc.c
index 2a1956dc..e829d684 100644
--- a/libavcodec/bmpenc.c
+++ b/libavcodec/bmpenc.c
@@ -60,10 +60,6 @@ static av_cold int bmp_encode_init(AVCodecContext *avctx){
         return AVERROR(EINVAL);
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
     return 0;
 }
 
@@ -78,8 +74,12 @@ static int bmp_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int bit_count = avctx->bits_per_coded_sample;
     uint8_t *ptr, *buf;
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     switch (avctx->pix_fmt) {
     case AV_PIX_FMT_RGB444:
         compression = BMP_BITFIELDS;
@@ -118,7 +118,7 @@ static int bmp_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 #define SIZE_BITMAPINFOHEADER 40
     hsize = SIZE_BITMAPFILEHEADER + SIZE_BITMAPINFOHEADER + (pal_entries << 2);
     n_bytes = n_bytes_image + hsize;
-    if ((ret = ff_alloc_packet2(avctx, pkt, n_bytes)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, n_bytes, 0)) < 0)
         return ret;
     buf = pkt->data;
     bytestream_put_byte(&buf, 'B');                   // BITMAPFILEHEADER.bfType
@@ -163,12 +163,6 @@ static int bmp_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
-static av_cold int bmp_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
-
 AVCodec ff_bmp_encoder = {
     .name           = "bmp",
     .long_name      = NULL_IF_CONFIG_SMALL("BMP (Windows and OS/2 bitmap)"),
@@ -176,7 +170,6 @@ AVCodec ff_bmp_encoder = {
     .id             = AV_CODEC_ID_BMP,
     .init           = bmp_encode_init,
     .encode2        = bmp_encode_frame,
-    .close          = bmp_encode_close,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_BGRA, AV_PIX_FMT_BGR24,
         AV_PIX_FMT_RGB565, AV_PIX_FMT_RGB555, AV_PIX_FMT_RGB444,
diff --git a/libavcodec/bmvaudio.c b/libavcodec/bmvaudio.c
index 0e473df9..b1587ab3 100644
--- a/libavcodec/bmvaudio.c
+++ b/libavcodec/bmvaudio.c
@@ -85,5 +85,5 @@ AVCodec ff_bmv_audio_decoder = {
     .id             = AV_CODEC_ID_BMV_AUDIO,
     .init           = bmv_aud_decode_init,
     .decode         = bmv_aud_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/bmvvideo.c b/libavcodec/bmvvideo.c
index 76a3d6f1..97f850db 100644
--- a/libavcodec/bmvvideo.c
+++ b/libavcodec/bmvvideo.c
@@ -294,5 +294,5 @@ AVCodec ff_bmv_video_decoder = {
     .priv_data_size = sizeof(BMVDecContext),
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/brenderpix.c b/libavcodec/brenderpix.c
index 02d922f8..0556858d 100644
--- a/libavcodec/brenderpix.c
+++ b/libavcodec/brenderpix.c
@@ -288,5 +288,5 @@ AVCodec ff_brender_pix_decoder = {
     .type         = AVMEDIA_TYPE_VIDEO,
     .id           = AV_CODEC_ID_BRENDER_PIX,
     .decode       = pix_decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/bswapdsp.h b/libavcodec/bswapdsp.h
index f167d77f..4d190922 100644
--- a/libavcodec/bswapdsp.h
+++ b/libavcodec/bswapdsp.h
@@ -16,8 +16,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_BSWAP_BUF_H
-#define AVCODEC_BSWAP_BUF_H
+#ifndef AVCODEC_BSWAPDSP_H
+#define AVCODEC_BSWAPDSP_H
 
 #include <stdint.h>
 
@@ -29,4 +29,4 @@ typedef struct BswapDSPContext {
 void ff_bswapdsp_init(BswapDSPContext *c);
 void ff_bswapdsp_init_x86(BswapDSPContext *c);
 
-#endif /* AVCODEC_BSWAP_BUF_H */
+#endif /* AVCODEC_BSWAPDSP_H */
diff --git a/libavcodec/c93.c b/libavcodec/c93.c
index eff8887c..fd026acb 100644
--- a/libavcodec/c93.c
+++ b/libavcodec/c93.c
@@ -182,7 +182,7 @@ static int decode_frame(AVCodecContext *avctx, void *data,
                         int from_y = offset / WIDTH;
                         if (block_type == C93_4X4_FROM_CURR && from_y == y+j &&
                             (FFABS(from_x - x-i) < 4 || FFABS(from_x - x-i) > WIDTH-4)) {
-                            avpriv_request_sample(avctx, "block overlap %d %d %d %d\n", from_x, x+i, from_y, y+j);
+                            avpriv_request_sample(avctx, "block overlap %d %d %d %d", from_x, x+i, from_y, y+j);
                             return AVERROR_INVALIDDATA;
                         }
                         if ((ret = copy_block(avctx, &out[j*stride+i],
@@ -268,5 +268,5 @@ AVCodec ff_c93_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index 8cc9333e..a9fafbdd 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -32,7 +32,131 @@
 #include "cabac.h"
 #include "cabac_functions.h"
 
-#include "cabac_tablegen.h"
+const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = {
+    9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
+    4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    // LPS range
+    -128,    -128,    -128,    -128,    -128,    -128,    123,     123,
+    116,     116,     111,     111,     105,     105,     100,     100,
+    95,      95,      90,      90,      85,      85,      81,      81,
+    77,      77,      73,      73,      69,      69,      66,      66,
+    62,      62,      59,      59,      56,      56,      53,      53,
+    51,      51,      48,      48,      46,      46,      43,      43,
+    41,      41,      39,      39,      37,      37,      35,      35,
+    33,      33,      32,      32,      30,      30,      29,      29,
+    27,      27,      26,      26,      24,      24,      23,      23,
+    22,      22,      21,      21,      20,      20,      19,      19,
+    18,      18,      17,      17,      16,      16,      15,      15,
+    14,      14,      14,      14,      13,      13,      12,      12,
+    12,      12,      11,      11,      11,      11,      10,      10,
+    10,      10,      9,       9,       9,       9,       8,       8,
+    8,       8,       7,       7,       7,       7,       7,       7,
+    6,       6,       6,       6,       6,       6,       2,       2,
+    -80,     -80,     -89,     -89,     -98,     -98,     -106,    -106,
+    -114,    -114,    -121,    -121,    -128,    -128,    122,     122,
+    116,     116,     110,     110,     104,     104,     99,      99,
+    94,      94,      89,      89,      85,      85,      80,      80,
+    76,      76,      72,      72,      69,      69,      65,      65,
+    62,      62,      59,      59,      56,      56,      53,      53,
+    50,      50,      48,      48,      45,      45,      43,      43,
+    41,      41,      39,      39,      37,      37,      35,      35,
+    33,      33,      31,      31,      30,      30,      28,      28,
+    27,      27,      26,      26,      24,      24,      23,      23,
+    22,      22,      21,      21,      20,      20,      19,      19,
+    18,      18,      17,      17,      16,      16,      15,      15,
+    14,      14,      14,      14,      13,      13,      12,      12,
+    12,      12,      11,      11,      11,      11,      10,      10,
+    9,       9,       9,       9,       9,       9,       8,       8,
+    8,       8,       7,       7,       7,       7,       2,       2,
+    -48,     -48,     -59,     -59,     -69,     -69,     -78,     -78,
+    -87,     -87,     -96,     -96,     -104,    -104,    -112,    -112,
+    -119,    -119,    -126,    -126,    123,     123,     117,     117,
+    111,     111,     105,     105,     100,     100,     95,      95,
+    90,      90,      86,      86,      81,      81,      77,      77,
+    73,      73,      69,      69,      66,      66,      63,      63,
+    59,      59,      56,      56,      54,      54,      51,      51,
+    48,      48,      46,      46,      43,      43,      41,      41,
+    39,      39,      37,      37,      35,      35,      33,      33,
+    32,      32,      30,      30,      29,      29,      27,      27,
+    26,      26,      25,      25,      23,      23,      22,      22,
+    21,      21,      20,      20,      19,      19,      18,      18,
+    17,      17,      16,      16,      15,      15,      15,      15,
+    14,      14,      13,      13,      12,      12,      12,      12,
+    11,      11,      11,      11,      10,      10,      10,      10,
+    9,       9,       9,       9,       8,       8,       2,       2,
+    -16,     -16,     -29,     -29,     -40,     -40,     -51,     -51,
+    -61,     -61,     -71,     -71,     -81,     -81,     -90,     -90,
+    -98,     -98,     -106,    -106,    -114,    -114,    -121,    -121,
+    -128,    -128,    122,     122,     116,     116,     110,     110,
+    104,     104,     99,      99,      94,      94,      89,      89,
+    85,      85,      80,      80,      76,      76,      72,      72,
+    69,      69,      65,      65,      62,      62,      59,      59,
+    56,      56,      53,      53,      50,      50,      48,      48,
+    45,      45,      43,      43,      41,      41,      39,      39,
+    37,      37,      35,      35,      33,      33,      31,      31,
+    30,      30,      28,      28,      27,      27,      25,      25,
+    24,      24,      23,      23,      22,      22,      21,      21,
+    20,      20,      19,      19,      18,      18,      17,      17,
+    16,      16,      15,      15,      14,      14,      14,      14,
+    13,      13,      12,      12,      12,      12,      11,      11,
+    11,      11,      10,      10,      9,       9,       2,       2,
+    // mlps state
+    127,     126,     77,      76,      77,      76,      75,      74,
+    75,      74,      75,      74,      73,      72,      73,      72,
+    73,      72,      71,      70,      71,      70,      71,      70,
+    69,      68,      69,      68,      67,      66,      67,      66,
+    67,      66,      65,      64,      65,      64,      63,      62,
+    61,      60,      61,      60,      61,      60,      59,      58,
+    59,      58,      57,      56,      55,      54,      55,      54,
+    53,      52,      53,      52,      51,      50,      49,      48,
+    49,      48,      47,      46,      45,      44,      45,      44,
+    43,      42,      43,      42,      39,      38,      39,      38,
+    37,      36,      37,      36,      33,      32,      33,      32,
+    31,      30,      31,      30,      27,      26,      27,      26,
+    25,      24,      23,      22,      23,      22,      19,      18,
+    19,      18,      17,      16,      15,      14,      13,      12,
+    11,      10,      9,       8,       9,       8,       5,       4,
+    5,       4,       3,       2,       1,       0,       0,       1,
+    2,       3,       4,       5,       6,       7,       8,       9,
+    10,      11,      12,      13,      14,      15,      16,      17,
+    18,      19,      20,      21,      22,      23,      24,      25,
+    26,      27,      28,      29,      30,      31,      32,      33,
+    34,      35,      36,      37,      38,      39,      40,      41,
+    42,      43,      44,      45,      46,      47,      48,      49,
+    50,      51,      52,      53,      54,      55,      56,      57,
+    58,      59,      60,      61,      62,      63,      64,      65,
+    66,      67,      68,      69,      70,      71,      72,      73,
+    74,      75,      76,      77,      78,      79,      80,      81,
+    82,      83,      84,      85,      86,      87,      88,      89,
+    90,      91,      92,      93,      94,      95,      96,      97,
+    98,      99,      100,     101,     102,     103,     104,     105,
+    106,     107,     108,     109,     110,     111,     112,     113,
+    114,     115,     116,     117,     118,     119,     120,     121,
+    122,     123,     124,     125,     124,     125,     126,     127,
+    // last_coeff_flag_offset_8x8
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
+};
 
 /**
  *
@@ -51,7 +175,7 @@ void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size){
  *
  * @param buf_size size of buf in bits
  */
-void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
+int ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
     c->bytestream_start=
     c->bytestream= buf;
     c->bytestream_end= buf + buf_size;
@@ -59,23 +183,23 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
 #if CABAC_BITS == 16
     c->low =  (*c->bytestream++)<<18;
     c->low+=  (*c->bytestream++)<<10;
+    // Keep our fetches on a 2-byte boundry as this should avoid ever having to
+    // do unaligned loads if the compiler (or asm) optimises the double byte
+    // load into a single instruction
+    if(((uintptr_t)c->bytestream & 1) == 0) {
+        c->low += (1 << 9);
+    }
+    else {
+        c->low += ((*c->bytestream++) << 2) + 2;
+    }
 #else
     c->low =  (*c->bytestream++)<<10;
-#endif
     c->low+= ((*c->bytestream++)<<2) + 2;
+#endif
     c->range= 0x1FE;
-}
-
-void ff_init_cabac_states(void)
-{
-    static int initialized = 0;
-
-    if (initialized)
-        return;
-
-    cabac_tableinit();
-
-    initialized = 1;
+    if ((c->range<<(CABAC_BITS+1)) < c->low)
+        return AVERROR_INVALIDDATA;
+    return 0;
 }
 
 #ifdef TEST
@@ -180,7 +304,6 @@ int main(void){
 
     av_lfg_init(&prng, 1);
     ff_init_cabac_encoder(&c, b, SIZE);
-    ff_init_cabac_states();
 
     for(i=0; i<SIZE; i++){
         if(2*i<SIZE) r[i] = av_lfg_get(&prng) % 7;
@@ -195,7 +318,9 @@ int main(void){
         put_cabac(&c, state, r[i]&1);
     }
 
-    put_cabac_terminate(&c, 1);
+    i= put_cabac_terminate(&c, 1);
+    b[i++] = av_lfg_get(&prng);
+    b[i  ] = av_lfg_get(&prng);
 
     ff_init_cabac_decoder(&c, b, SIZE);
 
diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
index f9eafed1..1bf1c620 100644
--- a/libavcodec/cabac.h
+++ b/libavcodec/cabac.h
@@ -31,12 +31,7 @@
 
 #include "put_bits.h"
 
-#if CONFIG_HARDCODED_TABLES
-#define CABAC_TABLE_CONST const
-#else
-#define CABAC_TABLE_CONST
-#endif
-extern CABAC_TABLE_CONST uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
+extern const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
 #define H264_NORM_SHIFT_OFFSET 0
 #define H264_LPS_RANGE_OFFSET 512
 #define H264_MLPS_STATE_OFFSET 1024
@@ -56,7 +51,6 @@ typedef struct CABACContext{
 }CABACContext;
 
 void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
-void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
-void ff_init_cabac_states(void);
+int ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
 
 #endif /* AVCODEC_CABAC_H */
diff --git a/libavcodec/cabac_functions.h b/libavcodec/cabac_functions.h
index 15dba29f..cbb186bc 100644
--- a/libavcodec/cabac_functions.h
+++ b/libavcodec/cabac_functions.h
@@ -46,11 +46,12 @@
 #   include "x86/cabac.h"
 #endif
 
-static CABAC_TABLE_CONST uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET;
-static CABAC_TABLE_CONST uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET;
-static CABAC_TABLE_CONST uint8_t * const ff_h264_mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET;
-static CABAC_TABLE_CONST uint8_t * const ff_h264_last_coeff_flag_offset_8x8 = ff_h264_cabac_tables + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET;
+static const uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET;
+static const uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET;
+static const uint8_t * const ff_h264_mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET;
+static const uint8_t * const ff_h264_last_coeff_flag_offset_8x8 = ff_h264_cabac_tables + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET;
 
+#if !defined(get_cabac_bypass) || !defined(get_cabac_terminate)
 static void refill(CABACContext *c){
 #if CABAC_BITS == 16
         c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
@@ -63,7 +64,9 @@ static void refill(CABACContext *c){
 #endif
         c->bytestream += CABAC_BITS / 8;
 }
+#endif
 
+#ifndef get_cabac_terminate
 static inline void renorm_cabac_decoder_once(CABACContext *c){
     int shift= (uint32_t)(c->range - 0x100)>>31;
     c->range<<= shift;
@@ -71,13 +74,18 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){
     if(!(c->low & CABAC_MASK))
         refill(c);
 }
+#endif
 
 #ifndef get_cabac_inline
 static void refill2(CABACContext *c){
-    int i, x;
-
+    int i;
+    unsigned x;
+#if !HAVE_FAST_CLZ
     x= c->low ^ (c->low-1);
     i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
+#else
+    i = ff_ctz(c->low) - CABAC_BITS;
+#endif
 
     x= -CABAC_MASK;
 
@@ -93,7 +101,9 @@ static void refill2(CABACContext *c){
 #endif
         c->bytestream += CABAC_BITS/8;
 }
+#endif
 
+#ifndef get_cabac_inline
 static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){
     int s = *state;
     int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
@@ -165,6 +175,7 @@ static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
  *
  * @return the number of bytes read or 0 if no end
  */
+#ifndef get_cabac_terminate
 static int av_unused get_cabac_terminate(CABACContext *c){
     c->range -= 2;
     if(c->low < c->range<<(CABAC_BITS+1)){
@@ -174,11 +185,13 @@ static int av_unused get_cabac_terminate(CABACContext *c){
         return c->bytestream - c->bytestream_start;
     }
 }
+#endif
 
 /**
  * Skip @p n bytes and reset the decoder.
  * @return the address of the first skipped byte or NULL if there's less than @p n bytes left
  */
+#ifndef skip_bytes
 static av_unused const uint8_t* skip_bytes(CABACContext *c, int n) {
     const uint8_t *ptr = c->bytestream;
 
@@ -190,9 +203,11 @@ static av_unused const uint8_t* skip_bytes(CABACContext *c, int n) {
 #endif
     if ((int) (c->bytestream_end - ptr) < n)
         return NULL;
-    ff_init_cabac_decoder(c, ptr + n, c->bytestream_end - ptr - n);
+    if (ff_init_cabac_decoder(c, ptr + n, c->bytestream_end - ptr - n) < 0)
+        return NULL;
 
     return ptr;
 }
+#endif
 
 #endif /* AVCODEC_CABAC_FUNCTIONS_H */
diff --git a/libavcodec/cabac_tablegen.h b/libavcodec/cabac_tablegen.h
deleted file mode 100644
index a6379122..00000000
--- a/libavcodec/cabac_tablegen.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Header file for hardcoded CABAC table
- *
- * Copyright (c) 2014 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_CABAC_TABLEGEN_H
-#define AVCODEC_CABAC_TABLEGEN_H
-
-#if CONFIG_HARDCODED_TABLES
-#define cabac_tableinit()
-#include "libavcodec/cabac_tables.h"
-#else
-uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
-
-static const uint8_t lps_range[64][4]= {
-{128,176,208,240}, {128,167,197,227}, {128,158,187,216}, {123,150,178,205},
-{116,142,169,195}, {111,135,160,185}, {105,128,152,175}, {100,122,144,166},
-{ 95,116,137,158}, { 90,110,130,150}, { 85,104,123,142}, { 81, 99,117,135},
-{ 77, 94,111,128}, { 73, 89,105,122}, { 69, 85,100,116}, { 66, 80, 95,110},
-{ 62, 76, 90,104}, { 59, 72, 86, 99}, { 56, 69, 81, 94}, { 53, 65, 77, 89},
-{ 51, 62, 73, 85}, { 48, 59, 69, 80}, { 46, 56, 66, 76}, { 43, 53, 63, 72},
-{ 41, 50, 59, 69}, { 39, 48, 56, 65}, { 37, 45, 54, 62}, { 35, 43, 51, 59},
-{ 33, 41, 48, 56}, { 32, 39, 46, 53}, { 30, 37, 43, 50}, { 29, 35, 41, 48},
-{ 27, 33, 39, 45}, { 26, 31, 37, 43}, { 24, 30, 35, 41}, { 23, 28, 33, 39},
-{ 22, 27, 32, 37}, { 21, 26, 30, 35}, { 20, 24, 29, 33}, { 19, 23, 27, 31},
-{ 18, 22, 26, 30}, { 17, 21, 25, 28}, { 16, 20, 23, 27}, { 15, 19, 22, 25},
-{ 14, 18, 21, 24}, { 14, 17, 20, 23}, { 13, 16, 19, 22}, { 12, 15, 18, 21},
-{ 12, 14, 17, 20}, { 11, 14, 16, 19}, { 11, 13, 15, 18}, { 10, 12, 15, 17},
-{ 10, 12, 14, 16}, {  9, 11, 13, 15}, {  9, 11, 12, 14}, {  8, 10, 12, 14},
-{  8,  9, 11, 13}, {  7,  9, 11, 12}, {  7,  9, 10, 12}, {  7,  8, 10, 11},
-{  6,  8,  9, 11}, {  6,  7,  9, 10}, {  6,  7,  8,  9}, {  2,  2,  2,  2},
-};
-
-static const uint8_t mps_state[64]= {
-  1, 2, 3, 4, 5, 6, 7, 8,
-  9,10,11,12,13,14,15,16,
- 17,18,19,20,21,22,23,24,
- 25,26,27,28,29,30,31,32,
- 33,34,35,36,37,38,39,40,
- 41,42,43,44,45,46,47,48,
- 49,50,51,52,53,54,55,56,
- 57,58,59,60,61,62,62,63,
-};
-
-static const uint8_t lps_state[64]= {
-  0, 0, 1, 2, 2, 4, 4, 5,
-  6, 7, 8, 9, 9,11,11,12,
- 13,13,15,15,16,16,18,18,
- 19,19,21,21,22,22,23,24,
- 24,25,26,26,27,27,28,29,
- 29,30,30,30,31,32,32,33,
- 33,33,34,34,35,35,35,36,
- 36,36,37,37,37,38,38,63,
-};
-
-static const uint8_t last_coeff_flag_offset_8x8[63] = {
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
- 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
-};
-
-static av_cold void cabac_tableinit(void)
-{
-    int i, j;
-    for (i = 0; i < 512; i++)
-        ff_h264_norm_shift[i] = i ? 8 - av_log2(i) : 9;
-
-    for(i=0; i<64; i++){
-        for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
-            ff_h264_lps_range[j*2*64+2*i+0]=
-            ff_h264_lps_range[j*2*64+2*i+1]= lps_range[i][j];
-        }
-        ff_h264_mlps_state[128 + 2 * i + 0] = 2 * mps_state[i] + 0;
-        ff_h264_mlps_state[128 + 2 * i + 1] = 2 * mps_state[i] + 1;
-
-        if( i ){
-            ff_h264_mlps_state[128-2*i-1]= 2*lps_state[i]+0;
-            ff_h264_mlps_state[128-2*i-2]= 2*lps_state[i]+1;
-        }else{
-            ff_h264_mlps_state[128-2*i-1]= 1;
-            ff_h264_mlps_state[128-2*i-2]= 0;
-        }
-    }
-    for(i=0; i< 63; i++){
-      ff_h264_last_coeff_flag_offset_8x8[i] = last_coeff_flag_offset_8x8[i];
-    }
-}
-#endif /* CONFIG_HARDCODED_TABLES */
-
-#endif /* AVCODEC_CABAC_TABLEGEN_H */
diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
index bf8c301e..70ac6f8a 100644
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c
@@ -32,7 +32,6 @@
 #include "cavs.h"
 #include "internal.h"
 #include "mpeg12data.h"
-#include "mpegvideo.h"
 
 static const uint8_t mv_scan[4] = {
     MV_FWD_X0, MV_FWD_X1,
@@ -1262,6 +1261,6 @@ AVCodec ff_cavs_decoder = {
     .init           = ff_cavs_init,
     .close          = ff_cavs_end,
     .decode         = cavs_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .flush          = cavs_flush,
 };
diff --git a/libavcodec/cbrt_fixed_tablegen.c b/libavcodec/cbrt_fixed_tablegen.c
new file mode 100644
index 00000000..24d2fbb7
--- /dev/null
+++ b/libavcodec/cbrt_fixed_tablegen.c
@@ -0,0 +1,24 @@
+/*
+ * Generate a header file for hardcoded AAC cube-root table
+ *
+ * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FIXED 1
+#include "cbrt_tablegen_template.c"
diff --git a/libavcodec/cbrt_tablegen.c b/libavcodec/cbrt_tablegen.c
index e0a8e63a..8c2235e9 100644
--- a/libavcodec/cbrt_tablegen.c
+++ b/libavcodec/cbrt_tablegen.c
@@ -20,18 +20,5 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <stdlib.h>
-#define CONFIG_HARDCODED_TABLES 0
-#include "cbrt_tablegen.h"
-#include "tableprint.h"
-
-int main(void)
-{
-    cbrt_tableinit();
-
-    write_fileheader();
-
-    WRITE_ARRAY("static const", uint32_t, cbrt_tab);
-
-    return 0;
-}
+#define USE_FIXED 0
+#include "cbrt_tablegen_template.c"
diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h
index d8c77c22..21e4b9a1 100644
--- a/libavcodec/cbrt_tablegen.h
+++ b/libavcodec/cbrt_tablegen.h
@@ -26,26 +26,56 @@
 #include <stdint.h>
 #include <math.h>
 #include "libavutil/attributes.h"
+#include "libavutil/intfloat.h"
+#include "libavcodec/aac_defines.h"
+
+#if USE_FIXED
+#define CBRT(x) lrint((x) * 8192)
+#else
+#define CBRT(x) av_float2int((float)(x))
+#endif
 
 #if CONFIG_HARDCODED_TABLES
+#if USE_FIXED
+#define cbrt_tableinit_fixed()
+#include "libavcodec/cbrt_fixed_tables.h"
+#else
 #define cbrt_tableinit()
 #include "libavcodec/cbrt_tables.h"
+#endif
 #else
 static uint32_t cbrt_tab[1 << 13];
 
-static av_cold void cbrt_tableinit(void)
+static av_cold void AAC_RENAME(cbrt_tableinit)(void)
 {
+    static double cbrt_tab_dbl[1 << 13];
     if (!cbrt_tab[(1<<13) - 1]) {
-        int i;
-        /* cbrtf() isn't available on all systems, so we use powf(). */
-        for (i = 0; i < 1<<13; i++) {
-            union {
-                float f;
-                uint32_t i;
-            } f;
-            f.f = pow(i, 1.0 / 3.0) * i;
-            cbrt_tab[i] = f.i;
+        int i, j, k;
+        double cbrt_val;
+
+        for (i = 1; i < 1<<13; i++)
+            cbrt_tab_dbl[i] = 1;
+
+        /* have to take care of non-squarefree numbers */
+        for (i = 2; i < 90; i++) {
+            if (cbrt_tab_dbl[i] == 1) {
+                cbrt_val = i * cbrt(i);
+                for (k = i; k < 1<<13; k *= i)
+                    for (j = k; j < 1<<13; j += k)
+                        cbrt_tab_dbl[j] *= cbrt_val;
+            }
+        }
+
+        for (i = 91; i <= 8191; i+= 2) {
+            if (cbrt_tab_dbl[i] == 1) {
+                cbrt_val = i * cbrt(i);
+                for (j = i; j < 1<<13; j += i)
+                    cbrt_tab_dbl[j] *= cbrt_val;
+            }
         }
+
+        for (i = 0; i < 1<<13; i++)
+            cbrt_tab[i] = CBRT(cbrt_tab_dbl[i]);
     }
 }
 #endif /* CONFIG_HARDCODED_TABLES */
diff --git a/libavcodec/aac_tablegen.c b/libavcodec/cbrt_tablegen_template.c
similarity index 76%
rename from libavcodec/aac_tablegen.c
rename to libavcodec/cbrt_tablegen_template.c
index 33a179f5..7dcab911 100644
--- a/libavcodec/aac_tablegen.c
+++ b/libavcodec/cbrt_tablegen_template.c
@@ -1,7 +1,7 @@
 /*
- * Generate a header file for hardcoded AAC tables
+ * Generate a header file for hardcoded AAC cube-root table
  *
- * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
  * This file is part of FFmpeg.
  *
@@ -22,16 +22,17 @@
 
 #include <stdlib.h>
 #define CONFIG_HARDCODED_TABLES 0
-#include "aac_tablegen.h"
+#include "libavutil/tablegen.h"
+#include "cbrt_tablegen.h"
 #include "tableprint.h"
 
 int main(void)
 {
-    ff_aac_tableinit();
+    AAC_RENAME(cbrt_tableinit)();
 
     write_fileheader();
 
-    WRITE_ARRAY("const", float, ff_aac_pow2sf_tab);
+    WRITE_ARRAY("static const", uint32_t, cbrt_tab);
 
     return 0;
 }
diff --git a/libavcodec/ccaption_dec.c b/libavcodec/ccaption_dec.c
index 264d21cf..790f0718 100644
--- a/libavcodec/ccaption_dec.c
+++ b/libavcodec/ccaption_dec.c
@@ -30,6 +30,8 @@
 #define UNSET_FLAG(var, val) ( (var) &=  ~( 1 << (val)) )
 #define CHECK_FLAG(var, val) ( (var) &    ( 1 << (val)) )
 
+static const AVRational ass_tb = {1, 100};
+
 /*
  * TODO list
  * 1) handle font and color completely
@@ -37,9 +39,7 @@
 enum cc_mode {
     CCMODE_POPON,
     CCMODE_PAINTON,
-    CCMODE_ROLLUP_2,
-    CCMODE_ROLLUP_3,
-    CCMODE_ROLLUP_4,
+    CCMODE_ROLLUP,
     CCMODE_TEXT,
 };
 
@@ -100,40 +100,6 @@ static const unsigned char pac2_attribs[32][3] = // Color, font, ident
     /* total 32 entries */
 };
 
-/* 0-255 needs 256 spaces */
-static const uint8_t parity_table[256] = { 0, 1, 1, 0, 1, 0, 0, 1,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           1, 0, 0, 1, 0, 1, 1, 0,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           0, 1, 1, 0, 1, 0, 0, 1,
-                                           1, 0, 0, 1, 0, 1, 1, 0 };
-
 struct Screen {
     /* +1 is used to compensate null character of string */
     uint8_t characters[SCREEN_ROWS][SCREEN_COLUMNS+1];
@@ -145,12 +111,12 @@ struct Screen {
      * for setting row 1  use row | (1 << 0)
      * for setting row 15 use row | (1 << 14)
      */
-    int16_t  row_used;
+    int16_t row_used;
 };
 
-
 typedef struct CCaptionSubContext {
     AVClass *class;
+    int real_time;
     struct Screen screen[2];
     int active_screen;
     uint8_t cursor_row;
@@ -158,17 +124,19 @@ typedef struct CCaptionSubContext {
     uint8_t cursor_color;
     uint8_t cursor_font;
     AVBPrint buffer;
-    int screen_changed;
+    int buffer_changed;
     int rollup;
-    enum  cc_mode mode;
+    enum cc_mode mode;
     int64_t start_time;
     /* visible screen time */
     int64_t startv_time;
     int64_t end_time;
+    int screen_touched;
+    int64_t last_real_time;
     char prev_cmd[2];
     /* buffer to store pkt data */
     AVBufferRef *pktbuf;
-}CCaptionSubContext;
+} CCaptionSubContext;
 
 
 static av_cold int init_decoder(AVCodecContext *avctx)
@@ -178,15 +146,23 @@ static av_cold int init_decoder(AVCodecContext *avctx)
 
     av_bprint_init(&ctx->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
     /* taking by default roll up to 2 */
-    ctx->mode = CCMODE_ROLLUP_2;
+    ctx->mode = CCMODE_ROLLUP;
     ctx->rollup = 2;
-    ret = ff_ass_subtitle_header_default(avctx);
-    if(ret < 0) {
+    ret = ff_ass_subtitle_header(avctx, "Monospace",
+                                 ASS_DEFAULT_FONT_SIZE,
+                                 ASS_DEFAULT_COLOR,
+                                 ASS_DEFAULT_BACK_COLOR,
+                                 ASS_DEFAULT_BOLD,
+                                 ASS_DEFAULT_ITALIC,
+                                 ASS_DEFAULT_UNDERLINE,
+                                 3,
+                                 ASS_DEFAULT_ALIGNMENT);
+    if (ret < 0) {
         return ret;
     }
     /* allocate pkt buffer */
     ctx->pktbuf = av_buffer_alloc(128);
-    if( !ctx->pktbuf) {
+    if (!ctx->pktbuf) {
         ret = AVERROR(ENOMEM);
     }
     return ret;
@@ -195,27 +171,53 @@ static av_cold int init_decoder(AVCodecContext *avctx)
 static av_cold int close_decoder(AVCodecContext *avctx)
 {
     CCaptionSubContext *ctx = avctx->priv_data;
-    av_bprint_finalize( &ctx->buffer, NULL);
+    av_bprint_finalize(&ctx->buffer, NULL);
     av_buffer_unref(&ctx->pktbuf);
     return 0;
 }
 
+static void flush_decoder(AVCodecContext *avctx)
+{
+    CCaptionSubContext *ctx = avctx->priv_data;
+    ctx->screen[0].row_used = 0;
+    ctx->screen[1].row_used = 0;
+    ctx->prev_cmd[0] = 0;
+    ctx->prev_cmd[1] = 0;
+    ctx->mode = CCMODE_ROLLUP;
+    ctx->rollup = 2;
+    ctx->cursor_row = 0;
+    ctx->cursor_column = 0;
+    ctx->cursor_font = 0;
+    ctx->cursor_color = 0;
+    ctx->active_screen = 0;
+    ctx->last_real_time = 0;
+    ctx->screen_touched = 0;
+    ctx->buffer_changed = 0;
+    av_bprint_clear(&ctx->buffer);
+}
+
 /**
  * @param ctx closed caption context just to print log
  */
-static int write_char (CCaptionSubContext *ctx, char *row,uint8_t col, char ch)
+static int write_char(CCaptionSubContext *ctx, struct Screen *screen, char ch)
 {
-    if(col < SCREEN_COLUMNS) {
+    uint8_t col = ctx->cursor_column;
+    char *row = screen->characters[ctx->cursor_row];
+    char *font = screen->fonts[ctx->cursor_row];
+
+    if (col < SCREEN_COLUMNS) {
         row[col] = ch;
+        font[col] = ctx->cursor_font;
+        if (ch) ctx->cursor_column++;
         return 0;
     }
     /* We have extra space at end only for null character */
-    else if ( col == SCREEN_COLUMNS && ch == 0) {
+    else if (col == SCREEN_COLUMNS && ch == 0) {
         row[col] = ch;
         return 0;
     }
     else {
-        av_log(ctx, AV_LOG_WARNING,"Data Ignored since exceeding screen width\n");
+        av_log(ctx, AV_LOG_WARNING, "Data Ignored since exceeding screen width\n");
         return AVERROR_INVALIDDATA;
     }
 }
@@ -227,7 +229,7 @@ static int write_char (CCaptionSubContext *ctx, char *row,uint8_t col, char ch)
  * If the second byte doesn't pass parity, it returns INVALIDDATA
  * user can ignore the whole pair and pass the other pair.
  */
-static int validate_cc_data_pair (uint8_t *cc_data_pair)
+static int validate_cc_data_pair(uint8_t *cc_data_pair)
 {
     uint8_t cc_valid = (*cc_data_pair & 4) >>2;
     uint8_t cc_type = *cc_data_pair & 3;
@@ -237,30 +239,28 @@ static int validate_cc_data_pair (uint8_t *cc_data_pair)
 
     // if EIA-608 data then verify parity.
     if (cc_type==0 || cc_type==1) {
-        if (!parity_table[cc_data_pair[2]]) {
+        if (!av_parity(cc_data_pair[2])) {
             return AVERROR_INVALIDDATA;
         }
-        if (!parity_table[cc_data_pair[1]]) {
+        if (!av_parity(cc_data_pair[1])) {
             cc_data_pair[1]=0x7F;
         }
     }
 
     //Skip non-data
-    if( (cc_data_pair[0] == 0xFA || cc_data_pair[0] == 0xFC || cc_data_pair[0] == 0xFD )
+    if ((cc_data_pair[0] == 0xFA || cc_data_pair[0] == 0xFC || cc_data_pair[0] == 0xFD)
          && (cc_data_pair[1] & 0x7F) == 0 && (cc_data_pair[2] & 0x7F) == 0)
         return AVERROR_PATCHWELCOME;
 
     //skip 708 data
-    if(cc_type == 3 || cc_type == 2 )
+    if (cc_type == 3 || cc_type == 2)
         return AVERROR_PATCHWELCOME;
 
     /* remove parity bit */
     cc_data_pair[1] &= 0x7F;
     cc_data_pair[2] &= 0x7F;
 
-
     return 0;
-
 }
 
 static struct Screen *get_writing_screen(CCaptionSubContext *ctx)
@@ -270,9 +270,7 @@ static struct Screen *get_writing_screen(CCaptionSubContext *ctx)
         // use Inactive screen
         return ctx->screen + !ctx->active_screen;
     case CCMODE_PAINTON:
-    case CCMODE_ROLLUP_2:
-    case CCMODE_ROLLUP_3:
-    case CCMODE_ROLLUP_4:
+    case CCMODE_ROLLUP:
     case CCMODE_TEXT:
         // use active screen
         return ctx->screen + ctx->active_screen;
@@ -286,7 +284,7 @@ static void roll_up(CCaptionSubContext *ctx)
     struct Screen *screen;
     int i, keep_lines;
 
-    if(ctx->mode == CCMODE_TEXT)
+    if (ctx->mode == CCMODE_TEXT)
         return;
 
     screen = get_writing_screen(ctx);
@@ -296,88 +294,123 @@ static void roll_up(CCaptionSubContext *ctx)
      */
     keep_lines = FFMIN(ctx->cursor_row + 1, ctx->rollup);
 
-    for( i = 0; i < ctx->cursor_row - keep_lines; i++ )
+    for (i = 0; i < SCREEN_ROWS; i++) {
+        if (i > ctx->cursor_row - keep_lines && i <= ctx->cursor_row)
+            continue;
         UNSET_FLAG(screen->row_used, i);
+    }
 
-
-    for( i = 0; i < keep_lines && screen->row_used; i++ ) {
+    for (i = 0; i < keep_lines && screen->row_used; i++) {
         const int i_row = ctx->cursor_row - keep_lines + i + 1;
 
-        memcpy( screen->characters[i_row], screen->characters[i_row+1], SCREEN_COLUMNS );
-        memcpy( screen->colors[i_row], screen->colors[i_row+1], SCREEN_COLUMNS);
-        memcpy( screen->fonts[i_row], screen->fonts[i_row+1], SCREEN_COLUMNS);
-        if(CHECK_FLAG(screen->row_used, i_row + 1))
+        memcpy(screen->characters[i_row], screen->characters[i_row+1], SCREEN_COLUMNS);
+        memcpy(screen->colors[i_row], screen->colors[i_row+1], SCREEN_COLUMNS);
+        memcpy(screen->fonts[i_row], screen->fonts[i_row+1], SCREEN_COLUMNS);
+        if (CHECK_FLAG(screen->row_used, i_row + 1))
             SET_FLAG(screen->row_used, i_row);
-
     }
-    UNSET_FLAG(screen->row_used, ctx->cursor_row);
 
+    UNSET_FLAG(screen->row_used, ctx->cursor_row);
 }
 
-static int reap_screen(CCaptionSubContext *ctx, int64_t pts)
+static int capture_screen(CCaptionSubContext *ctx)
 {
     int i;
-    int ret = 0;
     struct Screen *screen = ctx->screen + ctx->active_screen;
-    ctx->start_time = ctx->startv_time;
+    enum cc_font prev_font = CCFONT_REGULAR;
+    av_bprint_clear(&ctx->buffer);
 
-    for( i = 0; screen->row_used && i < SCREEN_ROWS; i++)
+    for (i = 0; screen->row_used && i < SCREEN_ROWS; i++)
     {
-        if(CHECK_FLAG(screen->row_used,i)) {
-            char *str = screen->characters[i];
-            /* skip space */
-            while (*str == ' ')
-                str++;
-
-            av_bprintf(&ctx->buffer, "%s\\N", str);
-            ret = av_bprint_is_complete(&ctx->buffer);
-            if( ret == 0) {
-                ret = AVERROR(ENOMEM);
-                break;
+        if (CHECK_FLAG(screen->row_used, i)) {
+            const char *row = screen->characters[i];
+            const char *font = screen->fonts[i];
+            int j = 0;
+
+            /* skip leading space */
+            while (row[j] == ' ')
+                j++;
+
+            for (; j < SCREEN_COLUMNS; j++) {
+                const char *e_tag = "", *s_tag = "";
+
+                if (row[j] == 0)
+                    break;
+
+                if (prev_font != font[j]) {
+                    switch (prev_font) {
+                    case CCFONT_ITALICS:
+                        e_tag = "{\\i0}";
+                        break;
+                    case CCFONT_UNDERLINED:
+                        e_tag = "{\\u0}";
+                        break;
+                    case CCFONT_UNDERLINED_ITALICS:
+                        e_tag = "{\\u0}{\\i0}";
+                        break;
+                    }
+                    switch (font[j]) {
+                    case CCFONT_ITALICS:
+                        s_tag = "{\\i1}";
+                        break;
+                    case CCFONT_UNDERLINED:
+                        s_tag = "{\\u1}";
+                        break;
+                    case CCFONT_UNDERLINED_ITALICS:
+                        s_tag = "{\\u1}{\\i1}";
+                        break;
+                    }
+                }
+                prev_font = font[j];
+                av_bprintf(&ctx->buffer, "%s%s%c", e_tag, s_tag, row[j]);
             }
+            av_bprintf(&ctx->buffer, "\\N");
         }
-
     }
-    if(screen->row_used && ctx->buffer.len >= 2 ) {
+    if (!av_bprint_is_complete(&ctx->buffer))
+        return AVERROR(ENOMEM);
+    if (screen->row_used && ctx->buffer.len >= 2) {
         ctx->buffer.len -= 2;
         ctx->buffer.str[ctx->buffer.len] = 0;
     }
+    ctx->buffer_changed = 1;
+    return 0;
+}
+
+static int reap_screen(CCaptionSubContext *ctx, int64_t pts)
+{
+    ctx->start_time = ctx->startv_time;
     ctx->startv_time = pts;
     ctx->end_time = pts;
-    return ret;
+    return capture_screen(ctx);
 }
 
-static void handle_textattr( CCaptionSubContext *ctx, uint8_t hi, uint8_t lo )
+static void handle_textattr(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo)
 {
     int i = lo - 0x20;
-    int ret;
     struct Screen *screen = get_writing_screen(ctx);
-    char *row = screen->characters[ctx->cursor_row];
 
-    if( i >= 32)
+    if (i >= 32)
         return;
 
-    ctx->cursor_color =  pac2_attribs[i][0];
+    ctx->cursor_color = pac2_attribs[i][0];
     ctx->cursor_font = pac2_attribs[i][1];
 
-    SET_FLAG(screen->row_used,ctx->cursor_row);
-    ret = write_char(ctx, row, ctx->cursor_column, ' ');
-    if(ret == 0)
-        ctx->cursor_column++;
+    SET_FLAG(screen->row_used, ctx->cursor_row);
+    write_char(ctx, screen, ' ');
 }
 
-static void handle_pac( CCaptionSubContext *ctx, uint8_t hi, uint8_t lo )
+static void handle_pac(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo)
 {
     static const int8_t row_map[] = {
         11, -1, 1, 2, 3, 4, 12, 13, 14, 15, 5, 6, 7, 8, 9, 10
     };
     const int index = ( (hi<<1) & 0x0e) | ( (lo>>5) & 0x01 );
     struct Screen *screen = get_writing_screen(ctx);
-    char *row;
-    int indent,i,ret;
+    int indent, i;
 
-    if( row_map[index] <= 0 ) {
-        av_log(ctx, AV_LOG_DEBUG,"Invalid pac index encountered\n");
+    if (row_map[index] <= 0) {
+        av_log(ctx, AV_LOG_DEBUG, "Invalid pac index encountered\n");
         return;
     }
 
@@ -388,135 +421,152 @@ static void handle_pac( CCaptionSubContext *ctx, uint8_t hi, uint8_t lo )
     ctx->cursor_font = pac2_attribs[lo][1];
     ctx->cursor_column = 0;
     indent = pac2_attribs[lo][2];
-    row = screen->characters[ctx->cursor_row];
-    for(i = 0;i < indent; i++) {
-        ret = write_char(ctx, row, ctx->cursor_column, ' ');
-        if(  ret == 0 )
-            ctx->cursor_column++;
+    for (i = 0; i < indent; i++) {
+        write_char(ctx, screen, ' ');
     }
-
 }
 
 /**
  * @param pts it is required to set end time
  */
-static int handle_edm(CCaptionSubContext *ctx,int64_t pts)
+static void handle_edm(CCaptionSubContext *ctx, int64_t pts)
 {
-    int ret = 0;
     struct Screen *screen = ctx->screen + ctx->active_screen;
 
-    reap_screen(ctx, pts);
+    // In buffered mode, keep writing to screen until it is wiped.
+    // Before wiping the display, capture contents to emit subtitle.
+    if (!ctx->real_time)
+        reap_screen(ctx, pts);
+
     screen->row_used = 0;
-    ctx->screen_changed = 1;
-    return ret;
+
+    // In realtime mode, emit an empty caption so the last one doesn't
+    // stay on the screen.
+    if (ctx->real_time)
+        reap_screen(ctx, pts);
 }
 
-static int handle_eoc(CCaptionSubContext *ctx, int64_t pts)
+static void handle_eoc(CCaptionSubContext *ctx, int64_t pts)
 {
-    int ret;
-    ret = handle_edm(ctx,pts);
+    // In buffered mode, we wait til the *next* EOC and
+    // reap what was already on the screen since the last EOC.
+    if (!ctx->real_time)
+        handle_edm(ctx,pts);
+
     ctx->active_screen = !ctx->active_screen;
     ctx->cursor_column = 0;
-    return ret;
+
+    // In realtime mode, we display the buffered contents (after
+    // flipping the buffer to active above) as soon as EOC arrives.
+    if (ctx->real_time)
+        reap_screen(ctx, pts);
 }
 
-static void handle_delete_end_of_row( CCaptionSubContext *ctx, char hi, char lo)
+static void handle_delete_end_of_row(CCaptionSubContext *ctx, char hi, char lo)
 {
     struct Screen *screen = get_writing_screen(ctx);
-    char *row = screen->characters[ctx->cursor_row];
-    write_char(ctx, row, ctx->cursor_column, 0);
-
+    write_char(ctx, screen, 0);
 }
 
 static void handle_char(CCaptionSubContext *ctx, char hi, char lo, int64_t pts)
 {
     struct Screen *screen = get_writing_screen(ctx);
-    char *row = screen->characters[ctx->cursor_row];
-    int ret;
 
-    SET_FLAG(screen->row_used,ctx->cursor_row);
+    SET_FLAG(screen->row_used, ctx->cursor_row);
 
-    ret = write_char(ctx, row, ctx->cursor_column, hi);
-    if( ret == 0 )
-        ctx->cursor_column++;
+    write_char(ctx, screen, hi);
 
-    if(lo) {
-        ret = write_char(ctx, row, ctx->cursor_column, lo);
-        if ( ret == 0 )
-            ctx->cursor_column++;
+    if (lo) {
+        write_char(ctx, screen, lo);
     }
-    write_char(ctx, row, ctx->cursor_column, 0);
+    write_char(ctx, screen, 0);
+
+    if (ctx->mode != CCMODE_POPON)
+        ctx->screen_touched = 1;
 
     /* reset prev command since character can repeat */
     ctx->prev_cmd[0] = 0;
     ctx->prev_cmd[1] = 0;
     if (lo)
-       av_dlog(ctx, "(%c,%c)\n",hi,lo);
+       ff_dlog(ctx, "(%c,%c)\n", hi, lo);
     else
-       av_dlog(ctx, "(%c)\n",hi);
+       ff_dlog(ctx, "(%c)\n", hi);
 }
 
-static int process_cc608(CCaptionSubContext *ctx, int64_t pts, uint8_t hi, uint8_t lo)
+static void process_cc608(CCaptionSubContext *ctx, int64_t pts, uint8_t hi, uint8_t lo)
 {
-    int ret = 0;
-#define COR3(var, with1, with2, with3)  ( (var) == (with1) ||  (var) == (with2) || (var) == (with3) )
-    if ( hi == ctx->prev_cmd[0] && lo == ctx->prev_cmd[1]) {
-    /* ignore redundant command */
-    } else if ( (hi == 0x10 && (lo >= 0x40 || lo <= 0x5f)) ||
+    if (hi == ctx->prev_cmd[0] && lo == ctx->prev_cmd[1]) {
+        /* ignore redundant command */
+    } else if ( (hi == 0x10 && (lo >= 0x40 && lo <= 0x5f)) ||
               ( (hi >= 0x11 && hi <= 0x17) && (lo >= 0x40 && lo <= 0x7f) ) ) {
         handle_pac(ctx, hi, lo);
     } else if ( ( hi == 0x11 && lo >= 0x20 && lo <= 0x2f ) ||
                 ( hi == 0x17 && lo >= 0x2e && lo <= 0x2f) ) {
         handle_textattr(ctx, hi, lo);
-    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x20 ) {
-    /* resume caption loading */
-        ctx->mode = CCMODE_POPON;
-    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x24 ) {
-        handle_delete_end_of_row(ctx, hi, lo);
-    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x25 ) {
-        ctx->rollup = 2;
-        ctx->mode = CCMODE_ROLLUP_2;
-    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x26 ) {
-        ctx->rollup = 3;
-        ctx->mode = CCMODE_ROLLUP_3;
-    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x27 ) {
-        ctx->rollup = 4;
-        ctx->mode = CCMODE_ROLLUP_4;
-    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x29 ) {
-    /* resume direct captioning */
-        ctx->mode = CCMODE_PAINTON;
-    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x2B ) {
-    /* resume text display */
-        ctx->mode = CCMODE_TEXT;
-    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x2C ) {
-    /* erase display memory */
-        ret = handle_edm(ctx, pts);
-    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x2D ) {
-    /* carriage return */
-        av_dlog(ctx, "carriage return\n");
-        reap_screen(ctx, pts);
-        roll_up(ctx);
-        ctx->screen_changed = 1;
-        ctx->cursor_column = 0;
-    } else if ( COR3(hi, 0x14, 0x15, 0x1C) && lo == 0x2F ) {
-    /* end of caption */
-        av_dlog(ctx, "handle_eoc\n");
-        ret = handle_eoc(ctx, pts);
-    } else if (hi>=0x20) {
-    /* Standard characters (always in pairs) */
+    } else if (hi == 0x14 || hi == 0x15 || hi == 0x1c) {
+        switch (lo) {
+        case 0x20:
+            /* resume caption loading */
+            ctx->mode = CCMODE_POPON;
+            break;
+        case 0x24:
+            handle_delete_end_of_row(ctx, hi, lo);
+            break;
+        case 0x25:
+        case 0x26:
+        case 0x27:
+            ctx->rollup = lo - 0x23;
+            ctx->mode = CCMODE_ROLLUP;
+            break;
+        case 0x29:
+            /* resume direct captioning */
+            ctx->mode = CCMODE_PAINTON;
+            break;
+        case 0x2b:
+            /* resume text display */
+            ctx->mode = CCMODE_TEXT;
+            break;
+        case 0x2c:
+            /* erase display memory */
+            handle_edm(ctx, pts);
+            break;
+        case 0x2d:
+            /* carriage return */
+            ff_dlog(ctx, "carriage return\n");
+            if (!ctx->real_time)
+                reap_screen(ctx, pts);
+            roll_up(ctx);
+            ctx->cursor_column = 0;
+            break;
+        case 0x2e:
+            /* erase buffered (non displayed) memory */
+            // Only in realtime mode. In buffered mode, we re-use the inactive screen
+            // for our own buffering.
+            if (ctx->real_time) {
+                struct Screen *screen = ctx->screen + !ctx->active_screen;
+                screen->row_used = 0;
+            }
+            break;
+        case 0x2f:
+            /* end of caption */
+            ff_dlog(ctx, "handle_eoc\n");
+            handle_eoc(ctx, pts);
+            break;
+        default:
+            ff_dlog(ctx, "Unknown command 0x%hhx 0x%hhx\n", hi, lo);
+            break;
+        }
+    } else if (hi >= 0x20) {
+        /* Standard characters (always in pairs) */
         handle_char(ctx, hi, lo, pts);
     } else {
-    /* Ignoring all other non data code */
-        av_dlog(ctx, "Unknown command 0x%hhx 0x%hhx\n", hi, lo);
+        /* Ignoring all other non data code */
+        ff_dlog(ctx, "Unknown command 0x%hhx 0x%hhx\n", hi, lo);
     }
 
     /* set prev command */
-     ctx->prev_cmd[0] = hi;
-     ctx->prev_cmd[1] = lo;
-
-#undef COR3
-    return ret;
-
+    ctx->prev_cmd[0] = hi;
+    ctx->prev_cmd[1] = lo;
 }
 
 static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avpkt)
@@ -528,10 +578,10 @@ static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avp
     int ret = 0;
     int i;
 
-    if ( ctx->pktbuf->size < len) {
+    if (ctx->pktbuf->size < len) {
         ret = av_buffer_realloc(&ctx->pktbuf, len);
-         if(ret < 0) {
-            av_log(ctx, AV_LOG_WARNING, "Insufficient Memory of %d truncated to %d\n",len, ctx->pktbuf->size);
+         if (ret < 0) {
+            av_log(ctx, AV_LOG_WARNING, "Insufficient Memory of %d truncated to %d\n", len, ctx->pktbuf->size);
             len = ctx->pktbuf->size;
             ret = 0;
         }
@@ -539,35 +589,64 @@ static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avp
     memcpy(ctx->pktbuf->data, avpkt->data, len);
     bptr = ctx->pktbuf->data;
 
-
     for (i  = 0; i < len; i += 3) {
         uint8_t cc_type = *(bptr + i) & 3;
-        if (validate_cc_data_pair( bptr + i) )
+        if (validate_cc_data_pair(bptr + i))
             continue;
         /* ignoring data field 1 */
         if(cc_type == 1)
             continue;
         else
             process_cc608(ctx, avpkt->pts, *(bptr + i + 1) & 0x7f, *(bptr + i + 2) & 0x7f);
-        if(ctx->screen_changed && *ctx->buffer.str)
+
+        if (!ctx->buffer_changed)
+            continue;
+        ctx->buffer_changed = 0;
+
+        if (*ctx->buffer.str || ctx->real_time)
         {
-            int start_time = av_rescale_q(ctx->start_time, avctx->time_base, (AVRational){ 1, 100 });
-            int end_time = av_rescale_q(ctx->end_time, avctx->time_base, (AVRational){ 1, 100 });
-            av_dlog(ctx, "cdp writing data (%s)\n",ctx->buffer.str);
-            ret = ff_ass_add_rect_bprint(sub, &ctx->buffer, start_time, end_time - start_time);
+            int64_t sub_pts = ctx->real_time ? avpkt->pts : ctx->start_time;
+            int start_time = av_rescale_q(sub_pts, avctx->time_base, ass_tb);
+            int duration = -1;
+            if (!ctx->real_time) {
+                int end_time = av_rescale_q(ctx->end_time, avctx->time_base, ass_tb);
+                duration = end_time - start_time;
+            }
+            ff_dlog(ctx, "cdp writing data (%s)\n",ctx->buffer.str);
+            ret = ff_ass_add_rect_bprint(sub, &ctx->buffer, start_time, duration);
             if (ret < 0)
                 return ret;
-            sub->pts = av_rescale_q(ctx->start_time, avctx->time_base, AV_TIME_BASE_Q);
-            ctx->screen_changed = 0;
-            av_bprint_clear(&ctx->buffer);
+            sub->pts = av_rescale_q(sub_pts, avctx->time_base, AV_TIME_BASE_Q);
+            ctx->buffer_changed = 0;
+            ctx->last_real_time = avpkt->pts;
+            ctx->screen_touched = 0;
         }
     }
 
+    if (ctx->real_time && ctx->screen_touched &&
+        avpkt->pts > ctx->last_real_time + av_rescale_q(20, ass_tb, avctx->time_base)) {
+        int start_time;
+        ctx->last_real_time = avpkt->pts;
+        ctx->screen_touched = 0;
+
+        capture_screen(ctx);
+        ctx->buffer_changed = 0;
+
+        start_time = av_rescale_q(avpkt->pts, avctx->time_base, ass_tb);
+        ret = ff_ass_add_rect_bprint(sub, &ctx->buffer, start_time, -1);
+        if (ret < 0)
+            return ret;
+        sub->pts = av_rescale_q(avpkt->pts, avctx->time_base, AV_TIME_BASE_Q);
+    }
+
     *got_sub = sub->num_rects > 0;
     return ret;
 }
 
+#define OFFSET(x) offsetof(CCaptionSubContext, x)
+#define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
+    { "real_time", "emit subtitle events as they are decoded for real-time display", OFFSET(real_time), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, SD },
     {NULL}
 };
 
@@ -586,6 +665,7 @@ AVCodec ff_ccaption_decoder = {
     .priv_data_size = sizeof(CCaptionSubContext),
     .init           = init_decoder,
     .close          = close_decoder,
+    .flush          = flush_decoder,
     .decode         = decode,
     .priv_class     = &ccaption_dec_class,
 };
diff --git a/libavcodec/cdgraphics.c b/libavcodec/cdgraphics.c
index 340df448..87ad5e79 100644
--- a/libavcodec/cdgraphics.c
+++ b/libavcodec/cdgraphics.c
@@ -49,6 +49,7 @@
 #define CDG_INST_TILE_BLOCK        6
 #define CDG_INST_SCROLL_PRESET    20
 #define CDG_INST_SCROLL_COPY      24
+#define CDG_INST_TRANSPARENT_COL  28
 #define CDG_INST_LOAD_PAL_LO      30
 #define CDG_INST_LOAD_PAL_HIGH    31
 #define CDG_INST_TILE_BLOCK_XOR   38
@@ -67,6 +68,7 @@ typedef struct CDGraphicsContext {
     AVFrame *frame;
     int hscroll;
     int vscroll;
+    int transparency;
 } CDGraphicsContext;
 
 static av_cold int cdg_decode_init(AVCodecContext *avctx)
@@ -76,6 +78,7 @@ static av_cold int cdg_decode_init(AVCodecContext *avctx)
     cc->frame = av_frame_alloc();
     if (!cc->frame)
         return AVERROR(ENOMEM);
+    cc->transparency = -1;
 
     avctx->width   = CDG_FULL_WIDTH;
     avctx->height  = CDG_FULL_HEIGHT;
@@ -120,6 +123,8 @@ static void cdg_load_palette(CDGraphicsContext *cc, uint8_t *data, int low)
         g = ((color >> 4) & 0x000F) * 17;
         b = ((color     ) & 0x000F) * 17;
         palette[i + array_offset] = 0xFFU << 24 | r << 16 | g << 8 | b;
+        if (cc->transparency >= 0)
+            palette[cc->transparency] &= 0xFFFFFF;
     }
     cc->frame->palette_has_changed = 1;
 }
@@ -341,6 +346,9 @@ static int cdg_decode_frame(AVCodecContext *avctx,
             if (ret < 0)
                 return ret;
             break;
+        case CDG_INST_TRANSPARENT_COL:
+            cc->transparency = cdg_data[0] & 0xF;
+            break;
         default:
             break;
         }
@@ -376,5 +384,5 @@ AVCodec ff_cdgraphics_decoder = {
     .init           = cdg_decode_init,
     .close          = cdg_decode_end,
     .decode         = cdg_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/cdxl.c b/libavcodec/cdxl.c
index adccd52e..50d514b2 100644
--- a/libavcodec/cdxl.c
+++ b/libavcodec/cdxl.c
@@ -272,7 +272,7 @@ static int cdxl_decode_frame(AVCodecContext *avctx, void *data,
 
     if (encoding) {
         av_fast_padded_malloc(&c->new_video, &c->new_video_size,
-                              h * w + FF_INPUT_BUFFER_PADDING_SIZE);
+                              h * w + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!c->new_video)
             return AVERROR(ENOMEM);
         if (c->bpp == 8)
@@ -305,5 +305,5 @@ AVCodec ff_cdxl_decoder = {
     .init           = cdxl_decode_init,
     .close          = cdxl_decode_end,
     .decode         = cdxl_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/celp_math.h b/libavcodec/celp_math.h
index 18d3ad94..18888a42 100644
--- a/libavcodec/celp_math.h
+++ b/libavcodec/celp_math.h
@@ -61,6 +61,16 @@ int ff_exp2(uint16_t power);
  */
 int ff_log2_q15(uint32_t value);
 
+/**
+ * Calculate the dot product of 2 int16_t vectors.
+ * @param a input data array
+ * @param b input data array
+ * @param length number of elements
+ *
+ * @return dot product = sum of elementwise products
+ */
+int64_t ff_dot_product(const int16_t *a, const int16_t *b, int length);
+
 /**
  * Shift value left or right depending on sign of offset parameter.
  * @param value value to shift
@@ -74,16 +84,6 @@ static inline int bidir_sal(int value, int offset)
     else           return value <<  offset;
 }
 
-/**
- * returns the dot product of 2 int16_t vectors.
- * @param a input data array
- * @param b input data array
- * @param length number of elements
- *
- * @return dot product = sum of elementwise products
- */
-int64_t ff_dot_product(const int16_t *a, const int16_t *b, int length);
-
 /**
  * Return the dot product.
  * @param a input data array
diff --git a/libavcodec/cfhd.c b/libavcodec/cfhd.c
new file mode 100644
index 00000000..d6d831b6
--- /dev/null
+++ b/libavcodec/cfhd.c
@@ -0,0 +1,761 @@
+/*
+ * Copyright (c) 2015-2016 Kieran Kunhya <kieran@kunhya.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * CFHD Video Decoder
+ */
+
+#include "libavutil/buffer.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "internal.h"
+#include "bytestream.h"
+#include "thread.h"
+#include "cfhd.h"
+
+#define SUBBAND_COUNT 10
+
+static av_cold int cfhd_decode_init(AVCodecContext *avctx)
+{
+    CFHDContext *s = avctx->priv_data;
+
+    avctx->bits_per_raw_sample = 10;
+    s->avctx                   = avctx;
+    avctx->width               = 0;
+    avctx->height              = 0;
+
+    return ff_cfhd_init_vlcs(s);
+}
+
+static void init_plane_defaults(CFHDContext *s)
+{
+    s->subband_num        = 0;
+    s->level              = 0;
+    s->subband_num_actual = 0;
+}
+
+static void init_frame_defaults(CFHDContext *s)
+{
+    s->coded_width       = 0;
+    s->coded_height      = 0;
+    s->bpc               = 10;
+    s->channel_cnt       = 4;
+    s->subband_cnt       = 10;
+    s->channel_num       = 0;
+    s->lowpass_precision = 16;
+    s->quantisation      = 1;
+    s->wavelet_depth     = 3;
+    s->pshift            = 1;
+    s->codebook          = 0;
+    init_plane_defaults(s);
+}
+
+/* TODO: merge with VLC tables or use LUT */
+static inline int dequant_and_decompand(int level, int quantisation)
+{
+    int64_t abslevel = abs(level);
+    return (abslevel + ((768 * abslevel * abslevel * abslevel) / (255 * 255 * 255))) * FFSIGN(level) * quantisation;
+}
+
+static inline void filter(int16_t *output, ptrdiff_t out_stride, int16_t *low, ptrdiff_t low_stride,
+                          int16_t *high, ptrdiff_t high_stride, int len, uint8_t clip)
+{
+    int16_t tmp;
+
+    int i;
+    for (i = 0; i < len; i++) {
+        if (i == 0) {
+            tmp = (11*low[0*low_stride] - 4*low[1*low_stride] + low[2*low_stride] + 4) >> 3;
+            output[(2*i+0)*out_stride] = (tmp + high[0*high_stride]) >> 1;
+            if (clip)
+                output[(2*i+0)*out_stride] = av_clip_uintp2_c(output[(2*i+0)*out_stride], clip);
+
+            tmp = ( 5*low[0*low_stride] + 4*low[1*low_stride] - low[2*low_stride] + 4) >> 3;
+            output[(2*i+1)*out_stride] = (tmp - high[0*high_stride]) >> 1;
+            if (clip)
+                output[(2*i+1)*out_stride] = av_clip_uintp2_c(output[(2*i+1)*out_stride], clip);
+        } else if (i == len-1) {
+            tmp = ( 5*low[i*low_stride] + 4*low[(i-1)*low_stride] - low[(i-2)*low_stride] + 4) >> 3;
+            output[(2*i+0)*out_stride] = (tmp + high[i*high_stride]) >> 1;
+            if (clip)
+                output[(2*i+0)*out_stride] = av_clip_uintp2_c(output[(2*i+0)*out_stride], clip);
+
+            tmp = (11*low[i*low_stride] - 4*low[(i-1)*low_stride] + low[(i-2)*low_stride] + 4) >> 3;
+            output[(2*i+1)*out_stride] = (tmp - high[i*high_stride]) >> 1;
+            if (clip)
+                output[(2*i+1)*out_stride] = av_clip_uintp2_c(output[(2*i+1)*out_stride], clip);
+        } else {
+            tmp = (low[(i-1)*low_stride] - low[(i+1)*low_stride] + 4) >> 3;
+            output[(2*i+0)*out_stride] = (tmp + low[i*low_stride] + high[i*high_stride]) >> 1;
+            if (clip)
+                output[(2*i+0)*out_stride] = av_clip_uintp2_c(output[(2*i+0)*out_stride], clip);
+
+            tmp = (low[(i+1)*low_stride] - low[(i-1)*low_stride] + 4) >> 3;
+            output[(2*i+1)*out_stride] = (tmp + low[i*low_stride] - high[i*high_stride]) >> 1;
+            if (clip)
+                output[(2*i+1)*out_stride] = av_clip_uintp2_c(output[(2*i+1)*out_stride], clip);
+        }
+    }
+}
+
+static void horiz_filter(int16_t *output, int16_t *low, int16_t *high, int width)
+{
+    filter(output, 1, low, 1, high, 1, width, 0);
+}
+
+static void horiz_filter_clip(int16_t *output, int16_t *low, int16_t *high, int width, uint8_t clip)
+{
+    filter(output, 1, low, 1, high, 1, width, clip);
+}
+
+static void vert_filter(int16_t *output, int out_stride, int16_t *low, int low_stride,
+                        int16_t *high, int high_stride, int len)
+{
+    filter(output, out_stride, low, low_stride, high, high_stride, len, 0);
+}
+
+static void free_buffers(AVCodecContext *avctx)
+{
+    CFHDContext *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < 3; i++) {
+        av_freep(&s->plane[i].idwt_buf);
+        av_freep(&s->plane[i].idwt_tmp);
+    }
+    s->a_height = 0;
+    s->a_width  = 0;
+}
+
+static int alloc_buffers(AVCodecContext *avctx)
+{
+    CFHDContext *s = avctx->priv_data;
+    int i, j, k, ret;
+
+    if ((ret = ff_set_dimensions(avctx, s->coded_width, s->coded_height)) < 0)
+        return ret;
+
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
+
+    for (i = 0; i < 3; i++) {
+        int width = i ? avctx->width >> s->chroma_x_shift : avctx->width;
+        int height = i ? avctx->height >> s->chroma_y_shift : avctx->height;
+        int stride = FFALIGN(width / 8, 8) * 8;
+        int w8, h8, w4, h4, w2, h2;
+        height = FFALIGN(height / 8, 2) * 8;
+        s->plane[i].width = width;
+        s->plane[i].height = height;
+        s->plane[i].stride = stride;
+
+        w8 = FFALIGN(s->plane[i].width / 8, 8);
+        h8 = FFALIGN(s->plane[i].height / 8, 2);
+        w4 = w8 * 2;
+        h4 = h8 * 2;
+        w2 = w4 * 2;
+        h2 = h4 * 2;
+
+        s->plane[i].idwt_buf = av_malloc_array(height * stride, sizeof(*s->plane[i].idwt_buf));
+        s->plane[i].idwt_tmp = av_malloc_array(height * stride, sizeof(*s->plane[i].idwt_tmp));
+        if (!s->plane[i].idwt_buf || !s->plane[i].idwt_tmp) {
+            return AVERROR(ENOMEM);
+        }
+
+        s->plane[i].subband[0] = s->plane[i].idwt_buf;
+        s->plane[i].subband[1] = s->plane[i].idwt_buf + 2 * w8 * h8;
+        s->plane[i].subband[2] = s->plane[i].idwt_buf + 1 * w8 * h8;
+        s->plane[i].subband[3] = s->plane[i].idwt_buf + 3 * w8 * h8;
+        s->plane[i].subband[4] = s->plane[i].idwt_buf + 2 * w4 * h4;
+        s->plane[i].subband[5] = s->plane[i].idwt_buf + 1 * w4 * h4;
+        s->plane[i].subband[6] = s->plane[i].idwt_buf + 3 * w4 * h4;
+        s->plane[i].subband[7] = s->plane[i].idwt_buf + 2 * w2 * h2;
+        s->plane[i].subband[8] = s->plane[i].idwt_buf + 1 * w2 * h2;
+        s->plane[i].subband[9] = s->plane[i].idwt_buf + 3 * w2 * h2;
+
+        for (j = 0; j < DWT_LEVELS; j++) {
+            for(k = 0; k < 4; k++) {
+                s->plane[i].band[j][k].a_width  = w8 << j;
+                s->plane[i].band[j][k].a_height = h8 << j;
+            }
+        }
+
+        /* ll2 and ll1 commented out because they are done in-place */
+        s->plane[i].l_h[0] = s->plane[i].idwt_tmp;
+        s->plane[i].l_h[1] = s->plane[i].idwt_tmp + 2 * w8 * h8;
+        //s->plane[i].l_h[2] = ll2;
+        s->plane[i].l_h[3] = s->plane[i].idwt_tmp;
+        s->plane[i].l_h[4] = s->plane[i].idwt_tmp + 2 * w4 * h4;
+        //s->plane[i].l_h[5] = ll1;
+        s->plane[i].l_h[6] = s->plane[i].idwt_tmp;
+        s->plane[i].l_h[7] = s->plane[i].idwt_tmp + 2 * w2 * h2;
+    }
+
+    s->a_height = s->coded_height;
+    s->a_width  = s->coded_width;
+
+    return 0;
+}
+
+static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
+                       AVPacket *avpkt)
+{
+    CFHDContext *s = avctx->priv_data;
+    GetByteContext gb;
+    ThreadFrame frame = { .f = data };
+    AVFrame *pic = data;
+    int ret = 0, i, j, plane, got_buffer = 0;
+    int16_t *coeff_data;
+
+    avctx->pix_fmt = AV_PIX_FMT_YUV422P10;
+    init_frame_defaults(s);
+
+    bytestream2_init(&gb, avpkt->data, avpkt->size);
+
+    while (bytestream2_get_bytes_left(&gb) > 4) {
+        /* Bit weird but implement the tag parsing as the spec says */
+        uint16_t tagu   = bytestream2_get_be16(&gb);
+        int16_t tag     = (int16_t)tagu;
+        int8_t tag8     = (int8_t)(tagu >> 8);
+        uint16_t abstag = abs(tag);
+        int8_t abs_tag8 = abs(tag8);
+        uint16_t data   = bytestream2_get_be16(&gb);
+        if (abs_tag8 >= 0x60 && abs_tag8 <= 0x6f) {
+            av_log(avctx, AV_LOG_DEBUG, "large len %x\n", ((tagu & 0xff) << 16) | data);
+        } else if (tag == 20) {
+            av_log(avctx, AV_LOG_DEBUG, "Width %"PRIu16"\n", data);
+            s->coded_width = data;
+        } else if (tag == 21) {
+            av_log(avctx, AV_LOG_DEBUG, "Height %"PRIu16"\n", data);
+            s->coded_height = data;
+        } else if (tag == 101) {
+            av_log(avctx, AV_LOG_DEBUG, "Bits per component: %"PRIu16"\n", data);
+            s->bpc = data;
+        } else if (tag == 12) {
+            av_log(avctx, AV_LOG_DEBUG, "Channel Count: %"PRIu16"\n", data);
+            s->channel_cnt = data;
+            if (data != 3) {
+                av_log(avctx, AV_LOG_ERROR, "Channel Count of %"PRIu16" is unsupported\n", data);
+                ret = AVERROR_PATCHWELCOME;
+                break;
+            }
+        } else if (tag == 14) {
+            av_log(avctx, AV_LOG_DEBUG, "Subband Count: %"PRIu16"\n", data);
+            if (data != SUBBAND_COUNT) {
+                av_log(avctx, AV_LOG_ERROR, "Subband Count of %"PRIu16" is unsupported\n", data);
+                ret = AVERROR_PATCHWELCOME;
+                break;
+            }
+        } else if (tag == 62) {
+            s->channel_num = data;
+            av_log(avctx, AV_LOG_DEBUG, "Channel number %"PRIu16"\n", data);
+            if (s->channel_num > 2) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid channel number\n");
+                ret = AVERROR(EINVAL);
+                break;
+            }
+            init_plane_defaults(s);
+        } else if (tag == 48) {
+            if (s->subband_num != 0 && data == 1)  // hack
+                s->level++;
+            av_log(avctx, AV_LOG_DEBUG, "Subband number %"PRIu16"\n", data);
+            s->subband_num = data;
+            if (s->level >= DWT_LEVELS) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid level\n");
+                ret = AVERROR(EINVAL);
+                break;
+            }
+            if (s->subband_num > 3) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid subband number\n");
+                ret = AVERROR(EINVAL);
+                break;
+            }
+        } else if (tag == 51) {
+            av_log(avctx, AV_LOG_DEBUG, "Subband number actual %"PRIu16"\n", data);
+            s->subband_num_actual = data;
+            if (s->subband_num_actual >= 10) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid subband number actual\n");
+                ret = AVERROR(EINVAL);
+                break;
+            }
+        } else if (tag == 35)
+            av_log(avctx, AV_LOG_DEBUG, "Lowpass precision bits: %"PRIu16"\n", data);
+        else if (tag == 53) {
+            s->quantisation = data;
+            av_log(avctx, AV_LOG_DEBUG, "Quantisation: %"PRIu16"\n", data);
+        } else if (tag == 109) {
+            s->prescale_shift[0] = (data >> 0) & 0x7;
+            s->prescale_shift[1] = (data >> 3) & 0x7;
+            s->prescale_shift[2] = (data >> 6) & 0x7;
+            av_log(avctx, AV_LOG_DEBUG, "Prescale shift (VC-5): %x\n", data);
+        } else if (tag == 27) {
+            s->plane[s->channel_num].band[0][0].width  = data;
+            s->plane[s->channel_num].band[0][0].stride = data;
+            av_log(avctx, AV_LOG_DEBUG, "Lowpass width %"PRIu16"\n", data);
+            if (data < 2 || data > s->plane[s->channel_num].band[0][0].a_width) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid lowpass width\n");
+                ret = AVERROR(EINVAL);
+                break;
+            }
+        } else if (tag == 28) {
+            s->plane[s->channel_num].band[0][0].height = data;
+            av_log(avctx, AV_LOG_DEBUG, "Lowpass height %"PRIu16"\n", data);
+            if (data < 2 || data > s->plane[s->channel_num].band[0][0].height) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid lowpass height\n");
+                ret = AVERROR(EINVAL);
+                break;
+            }
+        } else if (tag == 1)
+            av_log(avctx, AV_LOG_DEBUG, "Sample type? %"PRIu16"\n", data);
+        else if (tag == 10) {
+            if (data != 0) {
+                avpriv_report_missing_feature(avctx, "Transform type of %"PRIu16" is unsupported\n", data);
+                ret = AVERROR_PATCHWELCOME;
+                break;
+            }
+            av_log(avctx, AV_LOG_DEBUG, "Transform-type? %"PRIu16"\n", data);
+        } else if (abstag >= 0x4000 && abstag <= 0x40ff) {
+            av_log(avctx, AV_LOG_DEBUG, "Small chunk length %"PRIu16" %s\n", data * 4, tag < 0 ? "optional" : "required");
+            bytestream2_skipu(&gb, data * 4);
+        } else if (tag == 23) {
+            av_log(avctx, AV_LOG_DEBUG, "Skip frame\n");
+            avpriv_report_missing_feature(avctx, "Skip frame not supported\n");
+            ret = AVERROR_PATCHWELCOME;
+            break;
+        } else if (tag == 2) {
+            av_log(avctx, AV_LOG_DEBUG, "tag=2 header - skipping %i tag/value pairs\n", data);
+            if (data > bytestream2_get_bytes_left(&gb) / 4) {
+                av_log(avctx, AV_LOG_ERROR, "too many tag/value pairs (%d)\n", data);
+                ret = AVERROR_INVALIDDATA;
+                break;
+            }
+            for (i = 0; i < data; i++) {
+                uint16_t tag2 = bytestream2_get_be16(&gb);
+                uint16_t val2 = bytestream2_get_be16(&gb);
+                av_log(avctx, AV_LOG_DEBUG, "Tag/Value = %x %x\n", tag2, val2);
+            }
+        } else if (tag == 41) {
+            s->plane[s->channel_num].band[s->level][s->subband_num].width  = data;
+            s->plane[s->channel_num].band[s->level][s->subband_num].stride = FFALIGN(data, 8);
+            av_log(avctx, AV_LOG_DEBUG, "Highpass width %i channel %i level %i subband %i\n", data, s->channel_num, s->level, s->subband_num);
+            if (data < 2) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid highpass width\n");
+                ret = AVERROR(EINVAL);
+                break;
+            }
+        } else if (tag == 42) {
+            s->plane[s->channel_num].band[s->level][s->subband_num].height = data;
+            av_log(avctx, AV_LOG_DEBUG, "Highpass height %i\n", data);
+            if (data < 2) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid highpass height\n");
+                ret = AVERROR(EINVAL);
+                break;
+            }
+        } else if (tag == 49) {
+            s->plane[s->channel_num].band[s->level][s->subband_num].width  = data;
+            s->plane[s->channel_num].band[s->level][s->subband_num].stride = FFALIGN(data, 8);
+            av_log(avctx, AV_LOG_DEBUG, "Highpass width2 %i\n", data);
+            if (data < 2) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid highpass width2\n");
+                ret = AVERROR(EINVAL);
+                break;
+            }
+        } else if (tag == 50) {
+            s->plane[s->channel_num].band[s->level][s->subband_num].height = data;
+            av_log(avctx, AV_LOG_DEBUG, "Highpass height2 %i\n", data);
+            if (data < 2) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid highpass height2\n");
+                ret = AVERROR(EINVAL);
+                break;
+            }
+        } else if (tag == 71) {
+            s->codebook = data;
+            av_log(avctx, AV_LOG_DEBUG, "Codebook %i\n", s->codebook);
+        } else if (tag == 72) {
+            s->codebook = data;
+            av_log(avctx, AV_LOG_DEBUG, "Other codebook? %i\n", s->codebook);
+        } else if (tag == 70) {
+            av_log(avctx, AV_LOG_DEBUG, "Subsampling or bit-depth flag? %i\n", data);
+            s->bpc = data;
+            if (!(s->bpc == 10 || s->bpc == 12)) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid bits per channel\n");
+                ret = AVERROR(EINVAL);
+                break;
+            }
+        } else if (tag == 84) {
+            av_log(avctx, AV_LOG_DEBUG, "Sample format? %i\n", data);
+            if (data == 1)
+                avctx->pix_fmt = AV_PIX_FMT_YUV422P10;
+            else if (data == 3)
+                avctx->pix_fmt = AV_PIX_FMT_GBRP12;
+            else {
+                avpriv_report_missing_feature(avctx, "Sample format of %"PRIu16" is unsupported\n", data);
+                ret = AVERROR_PATCHWELCOME;
+                break;
+            }
+        } else
+            av_log(avctx, AV_LOG_DEBUG,  "Unknown tag %i data %x\n", tag, data);
+
+        /* Some kind of end of header tag */
+        if (tag == 4 && data == 0x1a4a && s->coded_width && s->coded_height && avctx->pix_fmt != AV_PIX_FMT_NONE) {
+            if (s->a_width != s->coded_width || s->a_height != s->coded_height) {
+                free_buffers(avctx);
+                if ((ret = alloc_buffers(avctx)) < 0) {
+                    free_buffers(avctx);
+                    return ret;
+                }
+            }
+
+            if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
+                return ret;
+
+            s->coded_width = 0;
+            s->coded_height = 0;
+            got_buffer = 1;
+        }
+        coeff_data = s->plane[s->channel_num].subband[s->subband_num_actual];
+
+        /* Lowpass coefficients */
+        if (tag == 4 && data == 0xf0f && s->a_width && s->a_height) {
+            int lowpass_height = s->plane[s->channel_num].band[0][0].height;
+            int lowpass_width  = s->plane[s->channel_num].band[0][0].width;
+            int lowpass_a_height = s->plane[s->channel_num].band[0][0].a_height;
+            int lowpass_a_width  = s->plane[s->channel_num].band[0][0].a_width;
+
+            if (lowpass_height > lowpass_a_height || lowpass_width > lowpass_a_width ||
+                lowpass_a_width * lowpass_a_height * sizeof(int16_t) > bytestream2_get_bytes_left(&gb)) {
+                av_log(avctx, AV_LOG_ERROR, "Too many lowpass coefficients\n");
+                ret = AVERROR(EINVAL);
+                goto end;
+            }
+
+            av_log(avctx, AV_LOG_DEBUG, "Start of lowpass coeffs component %"PRIu16" height:%d, width:%d\n", s->channel_num, lowpass_height, lowpass_width);
+            for (i = 0; i < lowpass_height; i++) {
+                for (j = 0; j < lowpass_width; j++)
+                    coeff_data[j] = bytestream2_get_be16u(&gb);
+
+                coeff_data += lowpass_width;
+            }
+
+            /* Copy last line of coefficients if odd height */
+            if (lowpass_height & 1) {
+                memcpy(&coeff_data[lowpass_height * lowpass_width],
+                       &coeff_data[(lowpass_height - 1) * lowpass_width],
+                       lowpass_width * sizeof(*coeff_data));
+            }
+
+            av_log(avctx, AV_LOG_DEBUG, "Lowpass coefficients %"PRIu16"\n", lowpass_width * lowpass_height);
+        }
+
+        if (tag == 55 && s->subband_num_actual != 255 && s->a_width && s->a_height) {
+            int highpass_height = s->plane[s->channel_num].band[s->level][s->subband_num].height;
+            int highpass_width  = s->plane[s->channel_num].band[s->level][s->subband_num].width;
+            int highpass_a_width = s->plane[s->channel_num].band[s->level][s->subband_num].a_width;
+            int highpass_a_height = s->plane[s->channel_num].band[s->level][s->subband_num].a_height;
+            int highpass_stride = s->plane[s->channel_num].band[s->level][s->subband_num].stride;
+            int expected = highpass_height * highpass_stride;
+            int a_expected = highpass_a_height * highpass_a_width;
+            int level, run, coeff;
+            int count = 0, bytes;
+
+            if (highpass_height > highpass_a_height || highpass_width > highpass_a_width || a_expected < expected) {
+                av_log(avctx, AV_LOG_ERROR, "Too many highpass coefficents\n");
+                ret = AVERROR(EINVAL);
+                goto end;
+            }
+
+            av_log(avctx, AV_LOG_DEBUG, "Start subband coeffs plane %i level %i codebook %i expected %i\n", s->channel_num, s->level, s->codebook, expected);
+
+            init_get_bits(&s->gb, gb.buffer, bytestream2_get_bytes_left(&gb) * 8);
+            {
+                OPEN_READER(re, &s->gb);
+                if (!s->codebook) {
+                    while (1) {
+                        UPDATE_CACHE(re, &s->gb);
+                        GET_RL_VLC(level, run, re, &s->gb, s->table_9_rl_vlc,
+                                   VLC_BITS, 3, 1);
+
+                        /* escape */
+                        if (level == 64)
+                            break;
+
+                        count += run;
+
+                        if (count > expected)
+                            break;
+
+                        coeff = dequant_and_decompand(level, s->quantisation);
+                        for (i = 0; i < run; i++)
+                            *coeff_data++ = coeff;
+                    }
+                } else {
+                    while (1) {
+                        UPDATE_CACHE(re, &s->gb);
+                        GET_RL_VLC(level, run, re, &s->gb, s->table_18_rl_vlc,
+                                   VLC_BITS, 3, 1);
+
+                        /* escape */
+                        if (level == 255 && run == 2)
+                            break;
+
+                        count += run;
+
+                        if (count > expected)
+                            break;
+
+                        coeff = dequant_and_decompand(level, s->quantisation);
+                        for (i = 0; i < run; i++)
+                            *coeff_data++ = coeff;
+                    }
+                }
+                CLOSE_READER(re, &s->gb);
+            }
+
+            if (count > expected) {
+                av_log(avctx, AV_LOG_ERROR, "Escape codeword not found, probably corrupt data\n");
+                ret = AVERROR(EINVAL);
+                goto end;
+            }
+
+            bytes = FFALIGN(FF_CEIL_RSHIFT(get_bits_count(&s->gb), 3), 4);
+            if (bytes > bytestream2_get_bytes_left(&gb)) {
+                av_log(avctx, AV_LOG_ERROR, "Bitstream overread error\n");
+                ret = AVERROR(EINVAL);
+                goto end;
+            } else
+                bytestream2_seek(&gb, bytes, SEEK_CUR);
+
+            av_log(avctx, AV_LOG_DEBUG, "End subband coeffs %i extra %i\n", count, count - expected);
+            s->codebook = 0;
+
+            /* Copy last line of coefficients if odd height */
+            if (highpass_height & 1) {
+                memcpy(&coeff_data[highpass_height * highpass_stride],
+                       &coeff_data[(highpass_height - 1) * highpass_stride],
+                       highpass_stride * sizeof(*coeff_data));
+            }
+        }
+    }
+
+    if (!s->a_width || !s->a_height || s->coded_width || s->coded_height) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid dimensions\n");
+        ret = AVERROR(EINVAL);
+        goto end;
+    }
+
+    if (!got_buffer) {
+        av_log(avctx, AV_LOG_ERROR, "No end of header tag found\n");
+        ret = AVERROR(EINVAL);
+        goto end;
+    }
+
+    for (plane = 0; plane < 3 && !ret; plane++) {
+        /* level 1 */
+        int lowpass_height  = s->plane[plane].band[0][0].height;
+        int lowpass_width   = s->plane[plane].band[0][0].width;
+        int highpass_stride = s->plane[plane].band[0][1].stride;
+        int act_plane = plane == 1 ? 2 : plane == 2 ? 1 : 0;
+        int16_t *low, *high, *output, *dst;
+
+        if (lowpass_height > s->plane[plane].band[0][0].a_height || lowpass_width > s->plane[plane].band[0][0].a_width ||
+            !highpass_stride || s->plane[plane].band[0][1].width > s->plane[plane].band[0][1].a_width) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid plane dimensions\n");
+            ret = AVERROR(EINVAL);
+            goto end;
+        }
+
+        av_log(avctx, AV_LOG_DEBUG, "Decoding level 1 plane %i %i %i %i\n", plane, lowpass_height, lowpass_width, highpass_stride);
+
+        low    = s->plane[plane].subband[0];
+        high   = s->plane[plane].subband[2];
+        output = s->plane[plane].l_h[0];
+        for (i = 0; i < lowpass_width; i++) {
+            vert_filter(output, lowpass_width, low, lowpass_width, high, highpass_stride, lowpass_height);
+            low++;
+            high++;
+            output++;
+        }
+
+        low    = s->plane[plane].subband[1];
+        high   = s->plane[plane].subband[3];
+        output = s->plane[plane].l_h[1];
+
+        for (i = 0; i < lowpass_width; i++) {
+            // note the stride of "low" is highpass_stride
+            vert_filter(output, lowpass_width, low, highpass_stride, high, highpass_stride, lowpass_height);
+            low++;
+            high++;
+            output++;
+        }
+
+        low    = s->plane[plane].l_h[0];
+        high   = s->plane[plane].l_h[1];
+        output = s->plane[plane].subband[0];
+        for (i = 0; i < lowpass_height * 2; i++) {
+            horiz_filter(output, low, high, lowpass_width);
+            low    += lowpass_width;
+            high   += lowpass_width;
+            output += lowpass_width * 2;
+        }
+        if (avctx->pix_fmt == AV_PIX_FMT_GBRP12) {
+            output = s->plane[plane].subband[0];
+            for (i = 0; i < lowpass_height * 2; i++) {
+                for (j = 0; j < lowpass_width * 2; j++)
+                    output[j] <<= 2;
+
+                output += lowpass_width * 2;
+            }
+        }
+
+        /* level 2 */
+        lowpass_height  = s->plane[plane].band[1][1].height;
+        lowpass_width   = s->plane[plane].band[1][1].width;
+        highpass_stride = s->plane[plane].band[1][1].stride;
+
+        if (lowpass_height > s->plane[plane].band[1][1].a_height || lowpass_width > s->plane[plane].band[1][1].a_width ||
+            !highpass_stride || s->plane[plane].band[1][1].width > s->plane[plane].band[1][1].a_width) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid plane dimensions\n");
+            ret = AVERROR(EINVAL);
+            goto end;
+        }
+
+        av_log(avctx, AV_LOG_DEBUG, "Level 2 plane %i %i %i %i\n", plane, lowpass_height, lowpass_width, highpass_stride);
+
+        low    = s->plane[plane].subband[0];
+        high   = s->plane[plane].subband[5];
+        output = s->plane[plane].l_h[3];
+        for (i = 0; i < lowpass_width; i++) {
+            vert_filter(output, lowpass_width, low, lowpass_width, high, highpass_stride, lowpass_height);
+            low++;
+            high++;
+            output++;
+        }
+
+        low    = s->plane[plane].subband[4];
+        high   = s->plane[plane].subband[6];
+        output = s->plane[plane].l_h[4];
+        for (i = 0; i < lowpass_width; i++) {
+            vert_filter(output, lowpass_width, low, highpass_stride, high, highpass_stride, lowpass_height);
+            low++;
+            high++;
+            output++;
+        }
+
+        low    = s->plane[plane].l_h[3];
+        high   = s->plane[plane].l_h[4];
+        output = s->plane[plane].subband[0];
+        for (i = 0; i < lowpass_height * 2; i++) {
+            horiz_filter(output, low, high, lowpass_width);
+            low    += lowpass_width;
+            high   += lowpass_width;
+            output += lowpass_width * 2;
+        }
+
+        output = s->plane[plane].subband[0];
+        for (i = 0; i < lowpass_height * 2; i++) {
+            for (j = 0; j < lowpass_width * 2; j++)
+                output[j] <<= 2;
+
+            output += lowpass_width * 2;
+        }
+
+        /* level 3 */
+        lowpass_height  = s->plane[plane].band[2][1].height;
+        lowpass_width   = s->plane[plane].band[2][1].width;
+        highpass_stride = s->plane[plane].band[2][1].stride;
+
+        if (lowpass_height > s->plane[plane].band[2][1].a_height || lowpass_width > s->plane[plane].band[2][1].a_width ||
+            !highpass_stride || s->plane[plane].band[2][1].width > s->plane[plane].band[2][1].a_width) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid plane dimensions\n");
+            ret = AVERROR(EINVAL);
+            goto end;
+        }
+
+        av_log(avctx, AV_LOG_DEBUG, "Level 3 plane %i %i %i %i\n", plane, lowpass_height, lowpass_width, highpass_stride);
+
+        low    = s->plane[plane].subband[0];
+        high   = s->plane[plane].subband[8];
+        output = s->plane[plane].l_h[6];
+        for (i = 0; i < lowpass_width; i++) {
+            vert_filter(output, lowpass_width, low, lowpass_width, high, highpass_stride, lowpass_height);
+            low++;
+            high++;
+            output++;
+        }
+
+        low    = s->plane[plane].subband[7];
+        high   = s->plane[plane].subband[9];
+        output = s->plane[plane].l_h[7];
+        for (i = 0; i < lowpass_width; i++) {
+            vert_filter(output, lowpass_width, low, highpass_stride, high, highpass_stride, lowpass_height);
+            low++;
+            high++;
+            output++;
+        }
+
+        dst = (int16_t *)pic->data[act_plane];
+        low  = s->plane[plane].l_h[6];
+        high = s->plane[plane].l_h[7];
+        for (i = 0; i < lowpass_height * 2; i++) {
+            horiz_filter_clip(dst, low, high, lowpass_width, s->bpc);
+            low  += lowpass_width;
+            high += lowpass_width;
+            dst  += pic->linesize[act_plane] / 2;
+        }
+    }
+
+
+end:
+    if (ret < 0)
+        return ret;
+
+    *got_frame = 1;
+    return avpkt->size;
+}
+
+static av_cold int cfhd_close_decoder(AVCodecContext *avctx)
+{
+    CFHDContext *s = avctx->priv_data;
+
+    free_buffers(avctx);
+
+    if (!avctx->internal->is_copy) {
+        ff_free_vlc(&s->vlc_9);
+        ff_free_vlc(&s->vlc_18);
+    }
+
+    return 0;
+}
+
+AVCodec ff_cfhd_decoder = {
+    .name           = "cfhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("Cineform HD"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_CFHD,
+    .priv_data_size = sizeof(CFHDContext),
+    .init           = cfhd_decode_init,
+    .close          = cfhd_close_decoder,
+    .decode         = cfhd_decode,
+    .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/cfhd.h b/libavcodec/cfhd.h
new file mode 100644
index 00000000..a2631240
--- /dev/null
+++ b/libavcodec/cfhd.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2015 Kieran Kunhya
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_CFHD_H
+#define AVCODEC_CFHD_H
+
+#include <stdint.h>
+
+#include "libavutil/avassert.h"
+
+#include "avcodec.h"
+#include "get_bits.h"
+
+#define VLC_BITS 9
+#define NB_VLC_TABLE_9 (71+3)
+#define NB_VLC_TABLE_18 (263+1)
+
+typedef struct CFHD_RL_VLC_ELEM {
+    int16_t level;
+    int8_t len;
+    uint16_t run;
+} CFHD_RL_VLC_ELEM;
+
+#define DWT_LEVELS 3
+
+typedef struct SubBand {
+    int level;
+    int orientation;
+    int stride;
+    int a_width;
+    int width;
+    int a_height;
+    int height;
+    int pshift;
+    int quant;
+    uint8_t *ibuf;
+} SubBand;
+
+typedef struct Plane {
+    int width;
+    int height;
+    ptrdiff_t stride;
+
+    int16_t *idwt_buf;
+    int16_t *idwt_tmp;
+
+    /* TODO: merge this into SubBand structure */
+    int16_t *subband[10];
+    int16_t *l_h[8];
+
+    SubBand band[DWT_LEVELS][4];
+} Plane;
+
+typedef struct CFHDContext {
+    AVCodecContext *avctx;
+
+    CFHD_RL_VLC_ELEM table_9_rl_vlc[2088];
+    VLC vlc_9;
+
+    CFHD_RL_VLC_ELEM table_18_rl_vlc[4572];
+    VLC vlc_18;
+
+    GetBitContext gb;
+
+    int chroma_x_shift;
+    int chroma_y_shift;
+
+    int coded_width;
+    int coded_height;
+
+    int a_width;
+    int a_height;
+
+    int bpc;
+    int channel_cnt;
+    int subband_cnt;
+    int channel_num;
+    uint8_t lowpass_precision;
+    uint16_t quantisation;
+    int wavelet_depth;
+    int pshift;
+
+    int codebook;
+    int subband_num;
+    int level;
+    int subband_num_actual;
+
+    uint8_t prescale_shift[3];
+    Plane plane[4];
+
+} CFHDContext;
+
+int ff_cfhd_init_vlcs(CFHDContext *s);
+
+#endif /* AVCODEC_CFHD_H */
diff --git a/libavcodec/cfhddata.c b/libavcodec/cfhddata.c
new file mode 100644
index 00000000..9330d346
--- /dev/null
+++ b/libavcodec/cfhddata.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2015 Kieran Kunhya
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "stdint.h"
+#include "cfhd.h"
+
+/* some special codewords, not sure what they all mean */
+#define TABLE_9_BAND_END1 0x1C7859Eh
+#define TABLE_9_BAND_END_LEN1 25
+#define TABLE_9_BAND_END2 0x38F0B3Fh
+#define TABLE_9_BAND_END_LEN2 26
+#define TABLE_9_BAND_END3 0x38F0B3Eh
+#define TABLE_9_BAND_END_LEN3 26
+
+static const uint8_t table_9_vlc_len[NB_VLC_TABLE_9] = {
+     1,    2,    4,    5,    5,    5,    6,    6,
+     6,    7,    7,    8,    8,    8,    8,    9,
+     9,    9,    9,    9,   10,   10,   11,   11,
+    11,   11,   12,   12,   12,   12,   13,   13,
+    13,   14,   14,   14,   14,   14,   14,   15,
+    15,   15,   15,   16,   16,   16,   16,   17,
+    17,   17,   17,   17,   18,   18,   18,   19,
+    19,   19,   20,   20,   20,   20,   20,   22,
+    23,   23,   23,   23,   24,   24,   24,   25,
+    26,   26,
+};
+
+static const uint32_t table_9_vlc_bits[NB_VLC_TABLE_9] = {
+            0,       0x2,       0xc,      0x1a,      0x1d,      0x1e,      0x39,      0x3e,
+         0x37,      0x7e,      0x6c,      0xe2,      0xfe,      0xdb,      0xe0,     0x1c3,
+        0x1c6,     0x1ff,     0x1fe,     0x1b5,     0x369,     0x385,     0x71d,     0x6d0,
+        0x708,     0x71f,     0xe3d,     0xe39,     0xe13,     0xe12,    0x1c71,    0x1b45,
+       0x1b47,    0x3689,    0x38f2,    0x38e1,    0x38e0,    0x38f1,    0x3688,    0x6d1b,
+       0x71e0,    0x6d19,    0x71e7,    0xe3cd,    0xda35,    0xda30,    0xe3c3,   0x1b469,
+      0x1b462,   0x1c798,   0x1b463,   0x1c799,   0x38f08,   0x38f09,   0x38f0a,   0x6d1a0,
+      0x6d1a3,   0x6d1a1,   0xda345,   0xda344,   0xe3c2d,   0xe3c2f,   0xe3c2e,  0x38f0b2,
+     0x71e160,  0x71e162,  0x71e166,  0x71e161,  0xe3c2ce,  0xe3c2c6,  0xe3c2c7, 0x1C7859E,
+    0x38F0B3F, 0x38F0B3E,
+};
+
+static const uint16_t table_9_vlc_run[NB_VLC_TABLE_9] = {
+    1,    1,    1,    1,   12,    1,   32,  160,
+    1,    1,    1,  320,    1,    1,   80,  120,
+    1,    1,  100,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1
+};
+
+static const uint8_t table_9_vlc_level[NB_VLC_TABLE_9] = {
+     0,    1,    2,    3,    0,    4,    0,    0,
+     5,    7,    6,    0,    9,    8,    0,    0,
+    11,   12,    0,   10,   13,   14,   17,   15,
+    16,   18,   22,   21,   20,   19,   25,   23,
+    24,   27,   31,   29,   28,   30,   26,   33,
+    34,   32,   35,   39,   37,   36,   38,   42,
+    40,   43,   41,   44,   45,   46,   47,   48,
+    50,   49,   52,   51,   53,   55,   54,   56,
+    57,   59,   60,   58,   61,   62,   63,   64,
+    64,   64,
+};
+
+static const uint32_t table_18_vlc_bits[NB_VLC_TABLE_18] = {
+            0,       0x2,       0x7,      0x19,      0x30,      0x36,      0x6f,      0x63,
+         0x69,      0x6b,      0xd1,      0xd4,      0xdc,     0x189,     0x18a,     0x1a0,
+        0x1ab,     0x377,     0x310,     0x316,     0x343,     0x354,     0x375,     0x623,
+        0x684,     0x685,     0x6ab,     0x6ec,     0xddb,     0xc5c,     0xc5e,     0xc44,
+        0xd55,     0xdd1,     0xdd3,    0x1bb5,    0x188b,    0x18bb,    0x18bf,    0x1aa8,
+       0x1ba0,    0x1ba5,    0x1ba4,    0x3115,    0x3175,    0x317d,    0x3553,    0x3768,
+       0x6e87,    0x6ed3,    0x62e8,    0x62f8,    0x6228,    0x6aa4,    0x6e85,    0xc453,
+       0xc5d3,    0xc5f3,    0xdda4,    0xdd08,    0xdd0c,   0x1bb4b,   0x1bb4a,   0x18ba5,
+      0x18be5,   0x1aa95,   0x1aa97,   0x188a4,   0x1ba13,   0x31748,   0x317c8,   0x35528,
+      0x3552c,   0x37424,   0x37434,   0x37436,   0x62294,   0x62e92,   0x62f92,   0x6aa52,
+      0x6aa5a,   0x6e86a,   0x6e86e,   0x6e84a,   0xc452a,   0xc5d27,   0xc5f26,   0xd54a6,
+      0xd54b6,   0xdd096,   0xdd0d6,   0xdd0de,  0x188a56,  0x18ba4d,  0x18be4e,  0x18be4f,
+     0x1aa96e,  0x1ba12e,  0x1ba12f,  0x1ba1af,  0x1ba1bf,  0x37435d,  0x37437d,  0x317498,
+     0x35529c,  0x35529d,  0x3552de,  0x3552df,  0x62e933,  0x62295d,  0x6aa53d,  0x6aa53f,
+     0x6aa53e,  0x6e86b9,  0x6e86f8,  0xd54a79,  0xc5d265,  0xc452b8,  0xdd0d71,  0xd54a78,
+     0xdd0d70,  0xdd0df2,  0xdd0df3, 0x188a5f6, 0x188a5f5, 0x188a5f4, 0x188a5f3, 0x188a5f2,
+    0x188a5f1, 0x188a5f0, 0x188a5ef, 0x188a5ee, 0x188a5ed, 0x188a5aa, 0x188a5e3, 0x188a5df,
+    0x188a589, 0x188a5dd, 0x188a578, 0x188a5e0, 0x188a588, 0x188a5d6, 0x188a5db, 0x188a5e1,
+    0x188a587, 0x188a59a, 0x188a5c4, 0x188a5ec, 0x188a586, 0x188a573, 0x188a59c, 0x188a5c8,
+    0x188a5fb, 0x188a5a1, 0x188a5eb, 0x188a5a8, 0x188a584, 0x188a5d2, 0x188a599, 0x188a598,
+    0x188a583, 0x18ba4c9, 0x188a5d0, 0x188a594, 0x188a582, 0x188a5cb, 0x188a5d8, 0x188a5e7,
+    0x188a581, 0x188a5ea, 0x188a5a9, 0x188a5a6, 0x188a580, 0x188a5a0, 0x188a59d, 0x188a5c3,
+    0x188a57f, 0x188a5c0, 0x188a5de, 0x188a5d4, 0x188a57e, 0x188a5c2, 0x188a592, 0x188a5cd,
+    0x188a57d, 0x188a5a3, 0x188a5e8, 0x188a5a2, 0x188a57c, 0x188a58e, 0x188a5b3, 0x188a5b2,
+    0x188a5b1, 0x188a5b0, 0x188a5af, 0x188a5ae, 0x188a5ad, 0x188a5ac, 0x188a5ab, 0x188a5da,
+    0x188a5e4, 0x188a5e5, 0x188a5d9, 0x188a5b5, 0x188a5bc, 0x188a5bd, 0x188a5e9, 0x188a5cc,
+    0x188a585, 0x188a5d3, 0x188a5e2, 0x188a595, 0x188a596, 0x188a5b8, 0x188a590, 0x188a5c9,
+    0x188a5a4, 0x188a5e6, 0x188a5a5, 0x188a5ce, 0x188a5bf, 0x188a572, 0x188a59b, 0x188a5be,
+    0x188a5c7, 0x188a5ca, 0x188a5d5, 0x188a57b, 0x188a58d, 0x188a58c, 0x188a58b, 0x188a58a,
+    0x18ba4c8, 0x188a5c5, 0x188a5fa, 0x188a5bb, 0x188a5c1, 0x188a5cf, 0x188a5b9, 0x188a5b6,
+    0x188a597, 0x188a5fe, 0x188a5d7, 0x188a5ba, 0x188a591, 0x188a5c6, 0x188a5dc, 0x188a57a,
+    0x188a59f, 0x188a5f9, 0x188a5b4, 0x188a5a7, 0x188a58f, 0x188a5fd, 0x188a5b7, 0x188a593,
+    0x188a59e, 0x188a5f8, 0x188a5ff, 0x188a5fc, 0x188a579, 0x188a5f7, 0x3114ba2, 0x3114ba3,
+};
+
+static const uint8_t table_18_vlc_len[NB_VLC_TABLE_18] = {
+     1,  2,  3,  5,  6,  6,  7,  7,
+     7,  7,  8,  8,  8,  9,  9,  9,
+     9, 10, 10, 10, 10, 10, 10, 11,
+    11, 11, 11, 11, 12, 12, 12, 12,
+    12, 12, 12, 13, 13, 13, 13, 13,
+    13, 13, 13, 14, 14, 14, 14, 14,
+    15, 15, 15, 15, 15, 15, 15, 16,
+    16, 16, 16, 16, 16, 17, 17, 17,
+    17, 17, 17, 17, 17, 18, 18, 18,
+    18, 18, 18, 18, 19, 19, 19, 19,
+    19, 19, 19, 19, 20, 20, 20, 20,
+    20, 20, 20, 20, 21, 21, 21, 21,
+    21, 21, 21, 21, 21, 22, 22, 22,
+    22, 22, 22, 22, 23, 23, 23, 23,
+    23, 23, 23, 24, 24, 24, 24, 24,
+    24, 24, 24, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 25, 25,
+    25, 25, 25, 25, 25, 25, 26, 26,
+};
+
+static const uint16_t table_18_vlc_run[NB_VLC_TABLE_18] = {
+    1,    1,    1,    1,    1,    1,    1,    1,
+   12,    1,   20,    1,    1,    1,   32,    1,
+    1,    1,    1,    1,   60,    1,    1,    1,
+    1,  100,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,  180,    1,
+    1,  320,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    1,
+    1,    1,    1,    1,    1,    1,    1,    2,
+};
+
+static const uint8_t table_18_vlc_level[NB_VLC_TABLE_18] = {
+      0,    1,    2,    3,    4,    5,    8,    6,
+      0,    7,    0,    9,   10,   11,    0,   12,
+     13,   18,   14,   15,    0,   16,   17,   19,
+     20,    0,   21,   22,   29,   24,   25,   23,
+     26,   27,   28,   35,   30,   31,    0,   32,
+     33,    0,   34,   36,   37,   38,   39,   40,
+     46,   47,   42,   43,   41,   44,   45,   48,
+     49,   50,   53,   51,   52,   61,   60,   55,
+     56,   57,   58,   54,   59,   62,   63,   64,
+     65,   66,   67,   68,   69,   70,   71,   72,
+     73,   75,   76,   74,   77,   78,   79,   80,
+     81,   82,   83,   84,   85,   86,   87,   88,
+     89,   90,   91,   92,   93,   99,  100,   94,
+     95,   96,   97,   98,  102,  101,  103,  105,
+    104,  106,  107,  111,  109,  108,  113,  110,
+    112,  114,  115,  225,  189,  188,  203,  202,
+    197,  207,  169,  223,  159,  235,  152,  192,
+    179,  201,  172,  149,  178,  120,  219,  150,
+    127,  211,  125,  158,  247,  238,  163,  228,
+    183,  217,  168,  122,  128,  249,  187,  186,
+    136,  181,  255,  230,  135,  233,  222,  145,
+    134,  167,  248,  209,  243,  216,  164,  140,
+    157,  239,  191,  251,  156,  139,  242,  133,
+    162,  213,  165,  212,  227,  198,  236,  234,
+    117,  215,  124,  123,  254,  253,  148,  218,
+    146,  147,  224,  143,  184,  185,  166,  132,
+    129,  250,  151,  119,  193,  176,  245,  229,
+    206,  144,  208,  137,  241,  237,  190,  240,
+    131,  232,  252,  171,  205,  204,  118,  214,
+    180,  126,  182,  175,  141,  138,  177,  153,
+    194,  160,  121,  174,  246,  130,  200,  170,
+    221,  196,  142,  210,  199,  155,  154,  244,
+    220,  195,  161,  231,  173,  226,  116,  255,
+};
+
+av_cold int ff_cfhd_init_vlcs(CFHDContext *s)
+{
+    int i, j, ret = 0;
+    uint32_t new_cfhd_vlc_bits[NB_VLC_TABLE_18 * 2];
+    uint8_t  new_cfhd_vlc_len[NB_VLC_TABLE_18 * 2];
+    uint16_t new_cfhd_vlc_run[NB_VLC_TABLE_18 * 2];
+    int16_t  new_cfhd_vlc_level[NB_VLC_TABLE_18 * 2];
+
+    /** Similar to dv.c, generate signed VLC tables **/
+
+    /* Table 9 */
+    for (i = 0, j = 0; i < NB_VLC_TABLE_9; i++, j++) {
+        new_cfhd_vlc_bits[j]  = table_9_vlc_bits[i];
+        new_cfhd_vlc_len[j]   = table_9_vlc_len[i];
+        new_cfhd_vlc_run[j]   = table_9_vlc_run[i];
+        new_cfhd_vlc_level[j] = table_9_vlc_level[i];
+
+        /* Don't include the zero level nor escape bits */
+        if (table_9_vlc_level[i] &&
+            new_cfhd_vlc_bits[j] != table_9_vlc_bits[NB_VLC_TABLE_9-1]) {
+            new_cfhd_vlc_bits[j] <<= 1;
+            new_cfhd_vlc_len[j]++;
+            j++;
+            new_cfhd_vlc_bits[j]  = (table_9_vlc_bits[i] << 1) | 1;
+            new_cfhd_vlc_len[j]   =  table_9_vlc_len[i] + 1;
+            new_cfhd_vlc_run[j]   =  table_9_vlc_run[i];
+            new_cfhd_vlc_level[j] = -table_9_vlc_level[i];
+        }
+    }
+
+    ret = init_vlc(&s->vlc_9, VLC_BITS, j, new_cfhd_vlc_len,
+                   1, 1, new_cfhd_vlc_bits, 4, 4, 0);
+    if (ret < 0)
+        return ret;
+    for (i = 0; i < s->vlc_9.table_size; i++) {
+        int code = s->vlc_9.table[i][0];
+        int len  = s->vlc_9.table[i][1];
+        int level, run;
+
+        if (len < 0) { // more bits needed
+            run   = 0;
+            level = code;
+        } else {
+            run   = new_cfhd_vlc_run[code];
+            level = new_cfhd_vlc_level[code];
+        }
+        s->table_9_rl_vlc[i].len   = len;
+        s->table_9_rl_vlc[i].level = level;
+        s->table_9_rl_vlc[i].run   = run;
+    }
+
+    /* Table 18 */
+    for (i = 0, j = 0; i < NB_VLC_TABLE_18; i++, j++) {
+        new_cfhd_vlc_bits[j]  = table_18_vlc_bits[i];
+        new_cfhd_vlc_len[j]   = table_18_vlc_len[i];
+        new_cfhd_vlc_run[j]   = table_18_vlc_run[i];
+        new_cfhd_vlc_level[j] = table_18_vlc_level[i];
+
+        /* Don't include the zero level nor escape bits */
+        if (table_18_vlc_level[i] &&
+            new_cfhd_vlc_bits[j] != table_18_vlc_bits[NB_VLC_TABLE_18-1]) {
+            new_cfhd_vlc_bits[j] <<= 1;
+            new_cfhd_vlc_len[j]++;
+            j++;
+            new_cfhd_vlc_bits[j]  = (table_18_vlc_bits[i] << 1) | 1;
+            new_cfhd_vlc_len[j]   =  table_18_vlc_len[i] + 1;
+            new_cfhd_vlc_run[j]   =  table_18_vlc_run[i];
+            new_cfhd_vlc_level[j] = -table_18_vlc_level[i];
+        }
+    }
+
+    ret = init_vlc(&s->vlc_18, VLC_BITS, j, new_cfhd_vlc_len,
+                   1, 1, new_cfhd_vlc_bits, 4, 4, 0);
+    if (ret < 0)
+        return ret;
+    av_assert0(s->vlc_18.table_size == 4572);
+
+    for (i = 0; i < s->vlc_18.table_size; i++) {
+        int code = s->vlc_18.table[i][0];
+        int len  = s->vlc_18.table[i][1];
+        int level, run;
+
+        if (len < 0) { // more bits needed
+            run   = 0;
+            level = code;
+        } else {
+            run   = new_cfhd_vlc_run[code];
+            level = new_cfhd_vlc_level[code];
+        }
+        s->table_18_rl_vlc[i].len   = len;
+        s->table_18_rl_vlc[i].level = level;
+        s->table_18_rl_vlc[i].run   = run;
+    }
+
+    return ret;
+}
diff --git a/libavcodec/cinepak.c b/libavcodec/cinepak.c
index 1a6d4f55..f1a46563 100644
--- a/libavcodec/cinepak.c
+++ b/libavcodec/cinepak.c
@@ -484,5 +484,5 @@ AVCodec ff_cinepak_decoder = {
     .init           = cinepak_decode_init,
     .close          = cinepak_decode_end,
     .decode         = cinepak_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/cinepakenc.c b/libavcodec/cinepakenc.c
index 72773451..06b06da9 100644
--- a/libavcodec/cinepakenc.c
+++ b/libavcodec/cinepakenc.c
@@ -169,7 +169,7 @@ typedef struct {
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
     { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
-    { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
     { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
     { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
     { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
@@ -327,7 +327,7 @@ static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *inf
                    (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
                    CHUNK_HEADER_SIZE) << 3;
 
-    //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9lli score mb_count %i", info->v1_size, info->v4_size, (long long int)ret, mb_count);
+    //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9"PRId64" score mb_count %i", info->v1_size, info->v4_size, ret, mb_count);
 
 #ifdef CINEPAK_REPORT_SERR
     *serr = 0;
@@ -525,105 +525,115 @@ static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int ch
 }
 
 //sets out to the sub picture starting at (x,y) in in
-static void get_sub_picture(CinepakEncContext *s, int x, int y, AVPicture *in, AVPicture *out)
+static void get_sub_picture(CinepakEncContext *s, int x, int y,
+                            uint8_t * in_data[4], int  in_linesize[4],
+                            uint8_t *out_data[4], int out_linesize[4])
 {
-    out->data[0] = in->data[0] + x + y * in->linesize[0];
-    out->linesize[0] = in->linesize[0];
+    out_data[0] = in_data[0] + x + y * in_linesize[0];
+    out_linesize[0] = in_linesize[0];
 
     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
-        out->data[1] = in->data[1] + (x >> 1) + (y >> 1) * in->linesize[1];
-        out->linesize[1] = in->linesize[1];
+        out_data[1] = in_data[1] + (x >> 1) + (y >> 1) * in_linesize[1];
+        out_linesize[1] = in_linesize[1];
 
-        out->data[2] = in->data[2] + (x >> 1) + (y >> 1) * in->linesize[2];
-        out->linesize[2] = in->linesize[2];
+        out_data[2] = in_data[2] + (x >> 1) + (y >> 1) * in_linesize[2];
+        out_linesize[2] = in_linesize[2];
     }
 }
 
-//decodes the V1 vector in mb into the 4x4 MB pointed to by sub_pict
-static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, int v1_vector, strip_info *info)
+//decodes the V1 vector in mb into the 4x4 MB pointed to by data
+static void decode_v1_vector(CinepakEncContext *s, uint8_t *data[4],
+                             int linesize[4], int v1_vector, strip_info *info)
 {
     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 
-    sub_pict->data[0][0] =
-            sub_pict->data[0][1] =
-            sub_pict->data[0][    sub_pict->linesize[0]] =
-            sub_pict->data[0][1+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size];
+    data[0][0] =
+            data[0][1] =
+            data[0][    linesize[0]] =
+            data[0][1+  linesize[0]] = info->v1_codebook[v1_vector*entry_size];
 
-    sub_pict->data[0][2] =
-            sub_pict->data[0][3] =
-            sub_pict->data[0][2+  sub_pict->linesize[0]] =
-            sub_pict->data[0][3+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
+    data[0][2] =
+            data[0][3] =
+            data[0][2+  linesize[0]] =
+            data[0][3+  linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
 
-    sub_pict->data[0][2*sub_pict->linesize[0]] =
-            sub_pict->data[0][1+2*sub_pict->linesize[0]] =
-            sub_pict->data[0][  3*sub_pict->linesize[0]] =
-            sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
+    data[0][2*linesize[0]] =
+            data[0][1+2*linesize[0]] =
+            data[0][  3*linesize[0]] =
+            data[0][1+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
 
-    sub_pict->data[0][2+2*sub_pict->linesize[0]] =
-            sub_pict->data[0][3+2*sub_pict->linesize[0]] =
-            sub_pict->data[0][2+3*sub_pict->linesize[0]] =
-            sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
+    data[0][2+2*linesize[0]] =
+            data[0][3+2*linesize[0]] =
+            data[0][2+3*linesize[0]] =
+            data[0][3+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
 
     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
-        sub_pict->data[1][0] =
-            sub_pict->data[1][1] =
-            sub_pict->data[1][    sub_pict->linesize[1]] =
-            sub_pict->data[1][1+  sub_pict->linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
-
-        sub_pict->data[2][0] =
-            sub_pict->data[2][1] =
-            sub_pict->data[2][    sub_pict->linesize[2]] =
-            sub_pict->data[2][1+  sub_pict->linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
+        data[1][0] =
+            data[1][1] =
+            data[1][    linesize[1]] =
+            data[1][1+  linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
+
+        data[2][0] =
+            data[2][1] =
+            data[2][    linesize[2]] =
+            data[2][1+  linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
     }
 }
 
-//decodes the V4 vectors in mb into the 4x4 MB pointed to by sub_pict
-static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_vector, strip_info *info)
+//decodes the V4 vectors in mb into the 4x4 MB pointed to by data
+static void decode_v4_vector(CinepakEncContext *s, uint8_t *data[4],
+                             int linesize[4], int *v4_vector, strip_info *info)
 {
     int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 
     for(i = y = 0; y < 4; y += 2) {
         for(x = 0; x < 4; x += 2, i++) {
-            sub_pict->data[0][x   +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
-            sub_pict->data[0][x+1 +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
-            sub_pict->data[0][x   + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
-            sub_pict->data[0][x+1 + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
+            data[0][x   +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
+            data[0][x+1 +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
+            data[0][x   + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
+            data[0][x+1 + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
 
             if(s->pix_fmt == AV_PIX_FMT_RGB24) {
-                sub_pict->data[1][(x>>1) + (y>>1)*sub_pict->linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
-                sub_pict->data[2][(x>>1) + (y>>1)*sub_pict->linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
+                data[1][(x>>1) + (y>>1)*linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
+                data[2][(x>>1) + (y>>1)*linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
             }
         }
     }
 }
 
-static void copy_mb(CinepakEncContext *s, AVPicture *a, AVPicture *b)
+static void copy_mb(CinepakEncContext *s,
+                    uint8_t *a_data[4], int a_linesize[4],
+                    uint8_t *b_data[4], int b_linesize[4])
 {
     int y, p;
 
     for(y = 0; y < MB_SIZE; y++) {
-        memcpy(a->data[0]+y*a->linesize[0], b->data[0]+y*b->linesize[0],
+        memcpy(a_data[0]+y*a_linesize[0], b_data[0]+y*b_linesize[0],
                MB_SIZE);
     }
 
     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
         for(p = 1; p <= 2; p++) {
             for(y = 0; y < MB_SIZE/2; y++) {
-                memcpy(a->data[p] + y*a->linesize[p],
-                       b->data[p] + y*b->linesize[p],
+                memcpy(a_data[p] + y*a_linesize[p],
+                       b_data[p] + y*b_linesize[p],
                        MB_SIZE/2);
             }
         }
     }
 }
 
-static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVPicture *last_pict, strip_info *info, unsigned char *buf)
+static int encode_mode(CinepakEncContext *s, int h,
+                       uint8_t *scratch_data[4], int scratch_linesize[4],
+                       uint8_t *last_data[4], int last_linesize[4],
+                       strip_info *info, unsigned char *buf)
 {
     int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
     int needs_extra_bit, should_write_temp;
     unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
     mb_info *mb;
-    AVPicture sub_scratch = {{0}}, sub_last = {{0}};
+    uint8_t *sub_scratch_data[4] = {0}, *sub_last_data[4] = {0};
+    int sub_scratch_linesize[4] = {0}, sub_last_linesize[4] = {0};
 
     //encode codebooks
 ////// MacOS vintage decoder compatibility dictates the presence of
@@ -640,15 +650,21 @@ static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVP
         for(x = 0; x < s->w; x += MB_SIZE, z++) {
             mb = &s->mb[z];
 
-            get_sub_picture(s, x, y, scratch_pict, &sub_scratch);
+            get_sub_picture(s, x, y, scratch_data, scratch_linesize,
+                            sub_scratch_data, sub_scratch_linesize);
 
             if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
-                get_sub_picture(s, x, y, last_pict, &sub_last);
-                copy_mb(s, &sub_scratch, &sub_last);
+                get_sub_picture(s, x, y,
+                                last_data, last_linesize,
+                                sub_last_data, sub_last_linesize);
+                copy_mb(s, sub_scratch_data, sub_scratch_linesize,
+                        sub_last_data, sub_last_linesize);
             } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
-                decode_v1_vector(s, &sub_scratch, mb->v1_vector, info);
+                decode_v1_vector(s, sub_scratch_data, sub_scratch_linesize,
+                                 mb->v1_vector, info);
             else
-                decode_v4_vector(s, &sub_scratch, mb->v4_vector, info);
+                decode_v4_vector(s, sub_scratch_data, sub_scratch_linesize,
+                                 mb->v4_vector, info);
         }
     }
 
@@ -755,13 +771,15 @@ static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVP
 }
 
 //computes distortion of 4x4 MB in b compared to a
-static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture *b)
+static int compute_mb_distortion(CinepakEncContext *s,
+                                 uint8_t *a_data[4], int a_linesize[4],
+                                 uint8_t *b_data[4], int b_linesize[4])
 {
     int x, y, p, d, ret = 0;
 
     for(y = 0; y < MB_SIZE; y++) {
         for(x = 0; x < MB_SIZE; x++) {
-            d = a->data[0][x + y*a->linesize[0]] - b->data[0][x + y*b->linesize[0]];
+            d = a_data[0][x + y*a_linesize[0]] - b_data[0][x + y*b_linesize[0]];
             ret += d*d;
         }
     }
@@ -770,7 +788,7 @@ static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture *
         for(p = 1; p <= 2; p++) {
             for(y = 0; y < MB_SIZE/2; y++) {
                 for(x = 0; x < MB_SIZE/2; x++) {
-                    d = a->data[p][x + y*a->linesize[p]] - b->data[p][x + y*b->linesize[p]];
+                    d = a_data[p][x + y*a_linesize[p]] - b_data[p][x + y*b_linesize[p]];
                     ret += d*d;
                 }
             }
@@ -782,7 +800,8 @@ static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture *
 
 // return the possibly adjusted size of the codebook
 #define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
-static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
+static int quantize(CinepakEncContext *s, int h,
+                    uint8_t *data[4], int linesize[4],
                     int v1mode, strip_info *info,
                     mb_encoding encoding)
 {
@@ -792,7 +811,8 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
     int size = v1mode ? info->v1_size : info->v4_size;
     int64_t total_error = 0;
     uint8_t vq_pict_buf[(MB_AREA*3)/2];
-    AVPicture sub_pict, vq_pict;
+    uint8_t *sub_data    [4], *vq_data    [4];
+    int      sub_linesize[4],  vq_linesize[4];
 
     for(mbn = i = y = 0; y < h; y += MB_SIZE) {
         for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
@@ -812,10 +832,10 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
                         shift = y2 < 4 ? 0 : 1;
                         x3 = shift ? 0 : x2;
                         y3 = shift ? 0 : y2;
-                        base[j] = (pict->data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * pict->linesize[plane]] +
-                                   pict->data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * pict->linesize[plane]] +
-                                   pict->data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * pict->linesize[plane]] +
-                                   pict->data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * pict->linesize[plane]]) >> 2;
+                        base[j] = (data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * linesize[plane]] +
+                                   data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * linesize[plane]] +
+                                   data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * linesize[plane]] +
+                                   data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * linesize[plane]]) >> 2;
                     }
                 }
             } else {
@@ -833,7 +853,7 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
                                 y3 = y + y2 + (k >> 1);
                             }
 
-                            base[j] = pict->data[plane][x3 + y3*pict->linesize[plane]];
+                            base[j] = data[plane][x3 + y3*linesize[plane]];
                         }
                     }
                 }
@@ -855,12 +875,12 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
     avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
     avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
 
-    //setup vq_pict, which contains a single MB
-    vq_pict.data[0] = vq_pict_buf;
-    vq_pict.linesize[0] = MB_SIZE;
-    vq_pict.data[1] = &vq_pict_buf[MB_AREA];
-    vq_pict.data[2] = vq_pict.data[1] + (MB_AREA >> 2);
-    vq_pict.linesize[1] = vq_pict.linesize[2] = MB_SIZE >> 1;
+    //setup vq_data, which contains a single MB
+    vq_data[0] = vq_pict_buf;
+    vq_linesize[0] = MB_SIZE;
+    vq_data[1] = &vq_pict_buf[MB_AREA];
+    vq_data[2] = vq_data[1] + (MB_AREA >> 2);
+    vq_linesize[1] = vq_linesize[2] = MB_SIZE >> 1;
 
     //copy indices
     for(i = j = y = 0; y < h; y += MB_SIZE) {
@@ -870,25 +890,27 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
             if(CERTAIN(encoding) && mb->best_encoding != encoding)
                 continue;
 
-            //point sub_pict to current MB
-            get_sub_picture(s, x, y, pict, &sub_pict);
+            //point sub_data to current MB
+            get_sub_picture(s, x, y, data, linesize, sub_data, sub_linesize);
 
             if(v1mode) {
                 mb->v1_vector = s->codebook_closest[i];
 
-                //fill in vq_pict with V1 data
-                decode_v1_vector(s, &vq_pict, mb->v1_vector, info);
+                //fill in vq_data with V1 data
+                decode_v1_vector(s, vq_data, vq_linesize, mb->v1_vector, info);
 
-                mb->v1_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
+                mb->v1_error = compute_mb_distortion(s, sub_data, sub_linesize,
+                                                     vq_data, vq_linesize);
                 total_error += mb->v1_error;
             } else {
                 for(k = 0; k < 4; k++)
                     mb->v4_vector[k] = s->codebook_closest[i+k];
 
-                //fill in vq_pict with V4 data
-                decode_v4_vector(s, &vq_pict, mb->v4_vector, info);
+                //fill in vq_data with V4 data
+                decode_v4_vector(s, vq_data, vq_linesize, mb->v4_vector, info);
 
-                mb->v4_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
+                mb->v4_error = compute_mb_distortion(s, sub_data, sub_linesize,
+                                                     vq_data, vq_linesize);
                 total_error += mb->v4_error;
             }
             i += v1mode ? 1 : 4;
@@ -897,22 +919,30 @@ static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
 // check that we did it right in the beginning of the function
     av_assert0(i >= size); // training set is no smaller than the codebook
 
-    //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %lli\n", v1mode, size, i, (long long int)total_error);
+    //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %"PRId64"\n", v1mode, size, i, total_error);
 
     return size;
 }
 
-static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_pict, AVPicture *pict, strip_info *info)
+static void calculate_skip_errors(CinepakEncContext *s, int h,
+                                  uint8_t *last_data[4], int last_linesize[4],
+                                  uint8_t *data[4], int linesize[4],
+                                  strip_info *info)
 {
     int x, y, i;
-    AVPicture sub_last, sub_pict;
+    uint8_t *sub_last_data    [4], *sub_pict_data    [4];
+    int      sub_last_linesize[4],  sub_pict_linesize[4];
 
     for(i = y = 0; y < h; y += MB_SIZE) {
         for(x = 0; x < s->w; x += MB_SIZE, i++) {
-            get_sub_picture(s, x, y, last_pict, &sub_last);
-            get_sub_picture(s, x, y, pict,      &sub_pict);
-
-            s->mb[i].skip_error = compute_mb_distortion(s, &sub_last, &sub_pict);
+            get_sub_picture(s, x, y, last_data,     last_linesize,
+                                 sub_last_data, sub_last_linesize);
+            get_sub_picture(s, x, y,      data,          linesize,
+                                 sub_pict_data, sub_pict_linesize);
+
+            s->mb[i].skip_error = compute_mb_distortion(s,
+                                            sub_last_data, sub_last_linesize,
+                                            sub_pict_data, sub_pict_linesize);
         }
     }
 }
@@ -935,7 +965,11 @@ static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe,
     //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
 }
 
-static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score
+static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe,
+                    uint8_t *last_data[4], int last_linesize[4],
+                    uint8_t *data[4], int linesize[4],
+                    uint8_t *scratch_data[4], int scratch_linesize[4],
+                    unsigned char *buf, int64_t *best_score
 #ifdef CINEPAK_REPORT_SERR
 , int64_t *best_serr
 #endif
@@ -953,7 +987,8 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture
     int v1shrunk, v4shrunk;
 
     if(!keyframe)
-        calculate_skip_errors(s, h, last_pict, pict, &info);
+        calculate_skip_errors(s, h, last_data, last_linesize, data, linesize,
+                              &info);
 
     //try some powers of 4 for the size of the codebooks
     //constraint the v4 codebook to be no bigger than v1 one,
@@ -971,7 +1006,8 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture
                 if(mode == MODE_V1_ONLY) {
                     info.v1_size = v1_size;
 // the size may shrink even before optimizations if the input is short:
-                    info.v1_size = quantize(s, h, pict, 1, &info, ENC_UNCERTAIN);
+                    info.v1_size = quantize(s, h, data, linesize, 1,
+                                            &info, ENC_UNCERTAIN);
                     if(info.v1_size < v1_size)
 // too few eligible blocks, no sense in trying bigger sizes
                         v1enough = 1;
@@ -984,7 +1020,8 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture
 
                     if(mode == MODE_V1_V4) {
                         info.v4_size = v4_size;
-                        info.v4_size = quantize(s, h, pict, 0, &info, ENC_UNCERTAIN);
+                        info.v4_size = quantize(s, h, data, linesize, 0,
+                                                &info, ENC_UNCERTAIN);
                         if(info.v4_size < v4_size)
 // too few eligible blocks, no sense in trying bigger sizes
                             v4enough = 1;
@@ -1005,14 +1042,14 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture
 // recompute the codebooks, omitting the extra blocks
 // we assume we _may_ come here with more blocks to encode than before
                     info.v1_size = v1_size;
-                    new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
+                    new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
                     if(new_v1_size < info.v1_size){
                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
                         info.v1_size = new_v1_size;
                     }
 // we assume we _may_ come here with more blocks to encode than before
                     info.v4_size = v4_size;
-                    new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
+                    new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
                     if(new_v4_size < info.v4_size) {
                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
                         info.v4_size = new_v4_size;
@@ -1033,7 +1070,7 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture
 // recompute the codebooks, omitting the extra blocks
                         if(v1shrunk) {
                             info.v1_size = v1_size;
-                            new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
+                            new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
                             if(new_v1_size < info.v1_size){
                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
                                 info.v1_size = new_v1_size;
@@ -1041,7 +1078,7 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture
                         }
                         if(v4shrunk) {
                             info.v4_size = v4_size;
-                            new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
+                            new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
                             if(new_v4_size < info.v4_size) {
                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
                                 info.v4_size = new_v4_size;
@@ -1050,7 +1087,7 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture
                     }
                 }
 
-                //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %lli\n", v1_size, v4_size, (long long int)score);
+                //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %"PRId64"\n", v1_size, v4_size, score);
 
                 if(best_size == 0 || score < *best_score) {
 
@@ -1058,12 +1095,15 @@ static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture
 #ifdef CINEPAK_REPORT_SERR
                     *best_serr = serr;
 #endif
-                    best_size = encode_mode(s, h, scratch_pict, last_pict, &info, s->strip_buf + STRIP_HEADER_SIZE);
+                    best_size = encode_mode(s, h,
+                                            scratch_data, scratch_linesize,
+                                            last_data, last_linesize, &info,
+                                            s->strip_buf + STRIP_HEADER_SIZE);
 
-                    //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B", mode, info.v1_size, info.v4_size, (long long int)score, best_size);
+                    //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B", mode, info.v1_size, info.v4_size, score, best_size);
                     //av_log(s->avctx, AV_LOG_INFO, "\n");
 #ifdef CINEPAK_REPORT_SERR
-                    av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B\n", mode, v1_size, v4_size, (long long int)serr, best_size);
+                    av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B\n", mode, v1_size, v4_size, serr, best_size);
 #endif
 
 #ifdef CINEPAKENC_DEBUG
@@ -1118,10 +1158,12 @@ static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_s
     return CVID_HEADER_SIZE;
 }
 
-static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe, unsigned char *buf, int buf_size)
+static int rd_frame(CinepakEncContext *s, const AVFrame *frame,
+                    int isakeyframe, unsigned char *buf, int buf_size)
 {
     int num_strips, strip, i, y, nexty, size, temp_size;
-    AVPicture last_pict, pict, scratch_pict;
+    uint8_t *last_data    [4], *data    [4], *scratch_data    [4];
+    int      last_linesize[4],  linesize[4],  scratch_linesize[4];
     int64_t best_score = 0, score, score_temp;
 #ifdef CINEPAK_REPORT_SERR
     int64_t best_serr = 0, serr, serr_temp;
@@ -1135,9 +1177,11 @@ static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe,
         for(y = 0; y < s->h; y += 2) {
             for(x = 0; x < s->w; x += 2) {
                 uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
-                ir[0] = ((AVPicture*)frame)->data[0] + x*3 + y*((AVPicture*)frame)->linesize[0];
-                ir[1] = ir[0] + ((AVPicture*)frame)->linesize[0];
-                get_sub_picture(s, x, y, (AVPicture*)s->input_frame, &scratch_pict);
+                ir[0] = frame->data[0] + x*3 + y*frame->linesize[0];
+                ir[1] = ir[0] + frame->linesize[0];
+                get_sub_picture(s, x, y,
+                                s->input_frame->data, s->input_frame->linesize,
+                                scratch_data, scratch_linesize);
                 r = g = b = 0;
                 for(i=0; i<4; ++i) {
                     int i1, i2;
@@ -1152,7 +1196,7 @@ static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe,
                     rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
                     if(      rr <   0) rr =   0;
                     else if (rr > 255) rr = 255;
-                    scratch_pict.data[0][i1 + i2*scratch_pict.linesize[0]] = rr;
+                    scratch_data[0][i1 + i2*scratch_linesize[0]] = rr;
                 }
 // let us scale down as late as possible
 //                r /= 4; g /= 4; b /= 4;
@@ -1161,13 +1205,13 @@ static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe,
                 rr = (-299683*r - 599156*g + 898839*b) >> 23;
                 if(      rr < -128) rr = -128;
                 else if (rr >  127) rr =  127;
-                scratch_pict.data[1][0] = rr + 128; // quantize needs unsigned
+                scratch_data[1][0] = rr + 128; // quantize needs unsigned
 // "V"
 //                rr = 0.3571*r - 0.2857*g - 0.0714*b;
                 rr = (748893*r - 599156*g - 149737*b) >> 23;
                 if(      rr < -128) rr = -128;
                 else if (rr >  127) rr =  127;
-                scratch_pict.data[2][0] = rr + 128; // quantize needs unsigned
+                scratch_data[2][0] = rr + 128; // quantize needs unsigned
             }
         }
     }
@@ -1196,13 +1240,24 @@ static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe,
             }
 
             if(s->pix_fmt == AV_PIX_FMT_RGB24)
-                get_sub_picture(s, 0, y, (AVPicture*)s->input_frame,    &pict);
+                get_sub_picture(s, 0, y,
+                                s->input_frame->data, s->input_frame->linesize,
+                                data, linesize);
             else
-                get_sub_picture(s, 0, y, (AVPicture*)frame,              &pict);
-            get_sub_picture(s, 0, y, (AVPicture*)s->last_frame,    &last_pict);
-            get_sub_picture(s, 0, y, (AVPicture*)s->scratch_frame, &scratch_pict);
-
-            if((temp_size = rd_strip(s, y, strip_height, isakeyframe, &last_pict, &pict, &scratch_pict, s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
+                get_sub_picture(s, 0, y,
+                                (uint8_t **)frame->data, (int*)frame->linesize,
+                                data, linesize);
+            get_sub_picture(s, 0, y,
+                            s->last_frame->data, s->last_frame->linesize,
+                            last_data, last_linesize);
+            get_sub_picture(s, 0, y,
+                            s->scratch_frame->data, s->scratch_frame->linesize,
+                            scratch_data, scratch_linesize);
+
+            if((temp_size = rd_strip(s, y, strip_height, isakeyframe,
+                                     last_data, last_linesize, data, linesize,
+                                     scratch_data, scratch_linesize,
+                                     s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
 #ifdef CINEPAK_REPORT_SERR
 , &serr_temp
 #endif
@@ -1224,9 +1279,9 @@ static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe,
             best_serr = serr;
 #endif
             best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
-            //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)score, best_size);
+            //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, score, best_size);
 #ifdef CINEPAK_REPORT_SERR
-            av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)serr, best_size);
+            av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, serr, best_size);
 #endif
 
             FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
@@ -1275,7 +1330,7 @@ static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size, 0)) < 0)
         return ret;
     ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
     pkt->size = ret;
diff --git a/libavcodec/cljrdec.c b/libavcodec/cljrdec.c
index 68c87717..4b187f8c 100644
--- a/libavcodec/cljrdec.c
+++ b/libavcodec/cljrdec.c
@@ -89,6 +89,6 @@ AVCodec ff_cljr_decoder = {
     .id             = AV_CODEC_ID_CLJR,
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 
diff --git a/libavcodec/cljrenc.c b/libavcodec/cljrenc.c
index c672f800..a3718259 100644
--- a/libavcodec/cljrenc.c
+++ b/libavcodec/cljrenc.c
@@ -56,7 +56,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
          return AVERROR_EXPERIMENTAL;
     }
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, 32*avctx->height*avctx->width/4)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, 32*avctx->height*avctx->width/4, 0)) < 0)
         return ret;
 
     init_put_bits(&pb, pkt->data, pkt->size);
diff --git a/libavcodec/cllc.c b/libavcodec/cllc.c
index c9ab8b93..1c6902af 100644
--- a/libavcodec/cllc.c
+++ b/libavcodec/cllc.c
@@ -495,5 +495,5 @@ AVCodec ff_cllc_decoder = {
     .init           = cllc_decode_init,
     .decode         = cllc_decode_frame,
     .close          = cllc_decode_close,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/cngdec.c b/libavcodec/cngdec.c
index c49e903c..02510860 100644
--- a/libavcodec/cngdec.c
+++ b/libavcodec/cngdec.c
@@ -22,6 +22,7 @@
 #include <math.h>
 
 #include "libavutil/common.h"
+#include "libavutil/internal.h"
 #include "avcodec.h"
 #include "celp_filters.h"
 #include "internal.h"
@@ -112,7 +113,7 @@ static int cng_decode_frame(AVCodecContext *avctx, void *data,
 
     if (avpkt->size) {
         int dbov = -avpkt->data[0];
-        p->target_energy = 1081109975 * pow(10, dbov / 10.0) * 0.75;
+        p->target_energy = 1081109975 * ff_exp10(dbov / 10.0) * 0.75;
         memset(p->target_refl_coef, 0, p->order * sizeof(*p->target_refl_coef));
         for (i = 0; i < FFMIN(avpkt->size - 1, p->order); i++) {
             p->target_refl_coef[i] = (avpkt->data[1 + i] - 127) / 128.0;
@@ -167,5 +168,5 @@ AVCodec ff_comfortnoise_decoder = {
     .close          = cng_decode_close,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/cngenc.c b/libavcodec/cngenc.c
index 58918aa9..302c703f 100644
--- a/libavcodec/cngenc.c
+++ b/libavcodec/cngenc.c
@@ -75,7 +75,7 @@ static int cng_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     int qdbov;
     int16_t *samples = (int16_t*) frame->data[0];
 
-    if ((ret = ff_alloc_packet(avpkt, 1 + p->order))) {
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 1 + p->order, 1 + p->order))) {
         av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
         return ret;
     }
@@ -97,7 +97,7 @@ static int cng_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         avpkt->data[1 + i] = p->ref_coef[i] * 127 + 127;
 
     *got_packet_ptr = 1;
-    avpkt->size = 1 + p->order;
+    av_assert1(avpkt->size == 1 + p->order);
 
     return 0;
 }
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index c1694f33..672bf3ff 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -24,6 +24,7 @@
 #include "libavutil/common.h"
 #include "libavutil/internal.h"
 #include "avcodec.h"
+#include "profiles.h"
 #include "version.h"
 
 #define MT(...) (const char *const[]){ __VA_ARGS__, NULL }
@@ -43,6 +44,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "mpeg2video",
         .long_name = NULL_IF_CONFIG_SMALL("MPEG-2 video"),
         .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_mpeg2_video_profiles),
     },
 #if FF_API_XVMC
     {
@@ -102,6 +104,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "mpeg4",
         .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2"),
         .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_mpeg4_video_profiles),
     },
     {
         .id        = AV_CODEC_ID_RAWVIDEO,
@@ -207,6 +210,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "h264",
         .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
         .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_h264_profiles),
     },
     {
         .id        = AV_CODEC_ID_INDEO3,
@@ -473,6 +477,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "vc1",
         .long_name = NULL_IF_CONFIG_SMALL("SMPTE VC-1"),
         .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_vc1_profiles),
     },
     {
         .id        = AV_CODEC_ID_WMV3,
@@ -480,6 +485,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "wmv3",
         .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9"),
         .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_vc1_profiles),
     },
     {
         .id        = AV_CODEC_ID_LOCO,
@@ -602,6 +608,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
                      AV_CODEC_PROP_LOSSLESS,
         .mime_types= MT("image/jp2"),
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_jpeg2000_profiles),
     },
     {
         .id        = AV_CODEC_ID_VMNC,
@@ -736,6 +743,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("Escape 124"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_DAALA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "daala",
+        .long_name = NULL_IF_CONFIG_SMALL("Daala"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+    },
     {
         .id        = AV_CODEC_ID_DIRAC,
         .type      = AVMEDIA_TYPE_VIDEO,
@@ -876,13 +890,6 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("IFF ILBM"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
-    {
-        .id        = AV_CODEC_ID_IFF_BYTERUN1,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "iff_byterun1",
-        .long_name = NULL_IF_CONFIG_SMALL("IFF ByteRun1"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
     {
         .id        = AV_CODEC_ID_KGV1,
         .type      = AVMEDIA_TYPE_VIDEO,
@@ -910,6 +917,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "vp9",
         .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
         .props     = AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
     },
     {
         .id        = AV_CODEC_ID_PICTOR,
@@ -1178,7 +1186,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
     {
         .id        = AV_CODEC_ID_SMVJPEG,
         .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "smv",
+        .name      = "smvjpeg",
         .long_name = NULL_IF_CONFIG_SMALL("Sigmatel Motion Video"),
     },
 
@@ -1202,6 +1210,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "hevc",
         .long_name = NULL_IF_CONFIG_SMALL("H.265 / HEVC (High Efficiency Video Coding)"),
         .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
     },
     {
         .id        = AV_CODEC_ID_FIC,
@@ -1252,6 +1261,41 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("Canopus HQ/HQA"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_HAP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hap",
+        .long_name = NULL_IF_CONFIG_SMALL("Vidvox Hap decoder"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DXV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dxv",
+        .long_name = NULL_IF_CONFIG_SMALL("Resolume DXV"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SCREENPRESSO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "screenpresso",
+        .long_name = NULL_IF_CONFIG_SMALL("Screenpresso"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_WRAPPED_AVFRAME,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wrapped_avframe",
+        .long_name = NULL_IF_CONFIG_SMALL("AVFrame to AVPacket passthrough"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_RSCC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rscc",
+        .long_name = NULL_IF_CONFIG_SMALL("innoHeim/Rsupport Screen Capture Codec"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
 
     /* image codecs */
     {
@@ -1275,6 +1319,14 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("BRender PIX image"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
     },
+    {
+        .id        = AV_CODEC_ID_DDS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dds",
+        .long_name = NULL_IF_CONFIG_SMALL("DirectDraw Surface image decoder"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
     {
         .id        = AV_CODEC_ID_DPX,
         .type      = AVMEDIA_TYPE_VIDEO,
@@ -1469,6 +1521,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .props     = AV_CODEC_PROP_LOSSLESS,
         .mime_types= MT("image/png"),
     },
+    {
+        .id        = AV_CODEC_ID_CFHD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cfhd",
+        .long_name = NULL_IF_CONFIG_SMALL("Cineform HD"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* various PCM "codecs" */
     {
@@ -1820,7 +1879,14 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .id        = AV_CODEC_ID_ADPCM_THP,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "adpcm_thp",
-        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo Gamecube THP"),
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo THP"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_THP_LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_thp_le",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo THP (Little-Endian)"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
@@ -1942,6 +2008,20 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_ADPCM_PSX,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_psx",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Playstation"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_AICA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_aica",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Yamaha AICA"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* AMR */
     {
@@ -2004,6 +2084,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("DPCM Sol"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_SDX2_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sdx2_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Squareroot-Delta-Exact"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* audio codecs */
     {
@@ -2026,6 +2113,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "aac",
         .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
         .props     = AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
     },
     {
         .id        = AV_CODEC_ID_AC3,
@@ -2040,6 +2128,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "dts",
         .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),
         .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_dca_profiles),
     },
     {
         .id        = AV_CODEC_ID_VORBIS,
@@ -2364,6 +2453,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "aac_latm",
         .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
         .props     = AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
     },
     {
         .id        = AV_CODEC_ID_QDMC,
@@ -2516,6 +2606,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("SMV (Selectable Mode Vocoder)"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_4GV,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "4gv",
+        .long_name = NULL_IF_CONFIG_SMALL("4GV (Fourth Generation Vocoder)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
     {
         .id        = AV_CODEC_ID_DSD_LSBF,
         .type      = AVMEDIA_TYPE_AUDIO,
@@ -2544,6 +2641,27 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), most significant bit first, planar"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_INTERPLAY_ACM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "interplayacm",
+        .long_name = NULL_IF_CONFIG_SMALL("Interplay ACM"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XMA1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "xma1",
+        .long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XMA2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "xma2",
+        .long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
@@ -2641,6 +2759,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "eia_608",
         .long_name = NULL_IF_CONFIG_SMALL("EIA-608 closed captions"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
     },
     {
         .id        = AV_CODEC_ID_JACOSUB,
@@ -2705,6 +2824,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("WebVTT subtitle"),
         .props     = AV_CODEC_PROP_TEXT_SUB,
     },
+    {
+        .id        = AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "hdmv_text_subtitle",
+        .long_name = NULL_IF_CONFIG_SMALL("HDMV Text subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
 
     /* other kind of codecs and pseudo-codecs */
     {
@@ -2769,123 +2895,6 @@ static const AVCodecDescriptor codec_descriptors[] = {
     },
 
     /* deprecated codec ids */
-    {
-        .id        = AV_CODEC_ID_BRENDER_PIX_DEPRECATED,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "brender_pix_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("BRender PIX image (deprecated id)"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_ESCAPE130_DEPRECATED,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "escape130_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("Escape 130 (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_EXR_DEPRECATED,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "exr_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("OpenEXR image (deprecated id)"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
-                     AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_G2M_DEPRECATED,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "g2m_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("Go2Meeting (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_HEVC_DEPRECATED,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "hevc_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("H.265 / HEVC (High Efficiency Video Coding) (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_PAF_VIDEO_DEPRECATED,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "paf_video_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Video (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_SANM_DEPRECATED,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "sanm_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("LucasArts SANM/SMUSH video (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_VP7_DEPRECATED,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "vp7_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("On2 VP7 (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_WEBP_DEPRECATED,
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .name      = "webp_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("WebP (deprecated id)"),
-        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
-                     AV_CODEC_PROP_LOSSLESS,
-    },
-
-#if FF_API_VIMA_DECODER
-    {
-        .id        = AV_CODEC_ID_VIMA,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "vima",
-        .long_name = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-#endif
-    {
-        .id        = AV_CODEC_ID_ADPCM_VIMA_DEPRECATED,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "adpcm_vima_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_OPUS_DEPRECATED,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "opus_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("Opus (Opus Interactive Audio Codec) (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_PAF_AUDIO_DEPRECATED,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "paf_audio_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Audio (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSY,
-    },
-    {
-        .id        = AV_CODEC_ID_PCM_S24LE_PLANAR_DEPRECATED,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "pcm_s24le_planar_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit little-endian planar (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_PCM_S32LE_PLANAR_DEPRECATED,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "pcm_s32le_planar_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit little-endian planar (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
-    {
-        .id        = AV_CODEC_ID_TAK_DEPRECATED,
-        .type      = AVMEDIA_TYPE_AUDIO,
-        .name      = "tak_deprecated",
-        .long_name = NULL_IF_CONFIG_SMALL("TAK (Tom's lossless Audio Kompressor) (deprecated id)"),
-        .props     = AV_CODEC_PROP_LOSSLESS,
-    },
 };
 
 const AVCodecDescriptor *avcodec_descriptor_get(enum AVCodecID id)
@@ -2916,3 +2925,9 @@ const AVCodecDescriptor *avcodec_descriptor_get_by_name(const char *name)
             return desc;
     return NULL;
 }
+
+enum AVMediaType avcodec_get_type(enum AVCodecID codec_id)
+{
+    const AVCodecDescriptor *desc = avcodec_descriptor_get(codec_id);
+    return desc ? desc->type : AVMEDIA_TYPE_UNKNOWN;
+}
diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index 75b184a9..1b38019a 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -166,10 +166,17 @@ static float rootpow2tab[127];
 /* table generator */
 static av_cold void init_pow2table(void)
 {
+    /* fast way of computing 2^i and 2^(0.5*i) for -63 <= i < 64 */
     int i;
+    static const float exp2_tab[2] = {1, M_SQRT2};
+    float exp2_val = powf(2, -63);
+    float root_val = powf(2, -32);
     for (i = -63; i < 64; i++) {
-        pow2tab[63 + i] = pow(2, i);
-        rootpow2tab[63 + i] = sqrt(pow(2, i));
+        if (!(i & 1))
+            root_val *= 2;
+        pow2tab[63 + i] = exp2_val;
+        rootpow2tab[63 + i] = root_val * exp2_tab[i & 1];
+        exp2_val *= 2;
     }
 }
 
@@ -1028,7 +1035,7 @@ static void dump_cook_context(COOKContext *q)
     }
     ff_dlog(q->avctx, "COOKContext\n");
     PRINT("nb_channels", q->avctx->channels);
-    PRINT("bit_rate", q->avctx->bit_rate);
+    PRINT("bit_rate", (int)q->avctx->bit_rate);
     PRINT("sample_rate", q->avctx->sample_rate);
     PRINT("samples_per_channel", q->subpacket[0].samples_per_channel);
     PRINT("subbands", q->subpacket[0].subbands);
@@ -1232,11 +1239,11 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
 
     /* Pad the databuffer with:
        DECODE_BYTES_PAD1 or DECODE_BYTES_PAD2 for decode_bytes(),
-       FF_INPUT_BUFFER_PADDING_SIZE, for the bitstreamreader. */
+       AV_INPUT_BUFFER_PADDING_SIZE, for the bitstreamreader. */
     q->decoded_bytes_buffer =
         av_mallocz(avctx->block_align
                    + DECODE_BYTES_PAD1(avctx->block_align)
-                   + FF_INPUT_BUFFER_PADDING_SIZE);
+                   + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!q->decoded_bytes_buffer)
         return AVERROR(ENOMEM);
 
@@ -1282,7 +1289,7 @@ AVCodec ff_cook_decoder = {
     .init           = cook_decode_init,
     .close          = cook_decode_close,
     .decode         = cook_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/cos_tablegen.c b/libavcodec/cos_tablegen.c
index 9af83f4d..dbd0cc0d 100644
--- a/libavcodec/cos_tablegen.c
+++ b/libavcodec/cos_tablegen.c
@@ -24,6 +24,8 @@
 #include <string.h>
 #include <math.h>
 
+#include "libavutil/mathematics.h"
+
 #define BITS 16
 #define FLOATFMT "%.18e"
 #define FIXEDFMT "%6d"
@@ -61,7 +63,7 @@ int main(int argc, char *argv[])
     printf("#include \"libavcodec/%s\"\n", do_sin ? "rdft.h" : "fft.h");
     for (i = 4; i <= BITS; i++) {
         int m = 1 << i;
-        double freq = 2*3.14159265358979323846/m;
+        double freq = 2*M_PI/m;
         printf("%s(%i) = {\n   ", do_sin ? "SINTABLE" : "COSTABLE", m);
         for (j = 0; j < m/2 - 1; j++) {
             int idx = j > m/4 ? m/2 - j : j;
diff --git a/libavcodec/cpia.c b/libavcodec/cpia.c
index 9036cb37..6b784b20 100644
--- a/libavcodec/cpia.c
+++ b/libavcodec/cpia.c
@@ -229,5 +229,5 @@ AVCodec ff_cpia_decoder = {
     .init           = cpia_decode_init,
     .close          = cpia_decode_end,
     .decode         = cpia_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c
index 001afa4f..3cb32a8c 100644
--- a/libavcodec/crystalhd.c
+++ b/libavcodec/crystalhd.c
@@ -422,7 +422,7 @@ static av_cold int init(AVCodecContext *avctx)
             int dummy_int;
 
             /* Back up the extradata so it can be restored at close time. */
-            priv->orig_extradata = av_malloc(avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+            priv->orig_extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!priv->orig_extradata) {
                 av_log(avctx, AV_LOG_ERROR,
                        "Failed to allocate copy of extradata\n");
@@ -1098,7 +1098,7 @@ AVCodec ff_h264_crystalhd_decoder = {
     .init           = init,
     .close          = uninit,
     .decode         = decode,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .flush          = flush,
     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
     .priv_class     = &h264_class,
@@ -1122,7 +1122,7 @@ AVCodec ff_mpeg2_crystalhd_decoder = {
     .init           = init,
     .close          = uninit,
     .decode         = decode,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .flush          = flush,
     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
     .priv_class     = &mpeg2_class,
@@ -1146,7 +1146,7 @@ AVCodec ff_mpeg4_crystalhd_decoder = {
     .init           = init,
     .close          = uninit,
     .decode         = decode,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .flush          = flush,
     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
     .priv_class     = &mpeg4_class,
@@ -1170,7 +1170,7 @@ AVCodec ff_msmpeg4_crystalhd_decoder = {
     .init           = init,
     .close          = uninit,
     .decode         = decode,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_EXPERIMENTAL,
     .flush          = flush,
     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
     .priv_class     = &msmpeg4_class,
@@ -1194,7 +1194,7 @@ AVCodec ff_vc1_crystalhd_decoder = {
     .init           = init,
     .close          = uninit,
     .decode         = decode,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .flush          = flush,
     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
     .priv_class     = &vc1_class,
@@ -1218,7 +1218,7 @@ AVCodec ff_wmv3_crystalhd_decoder = {
     .init           = init,
     .close          = uninit,
     .decode         = decode,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .flush          = flush,
     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
     .priv_class     = &wmv3_class,
diff --git a/libavcodec/cscd.c b/libavcodec/cscd.c
index 07f2f5ed..9e1dec9d 100644
--- a/libavcodec/cscd.c
+++ b/libavcodec/cscd.c
@@ -127,7 +127,7 @@ static av_cold int decode_init(AVCodecContext *avctx) {
     switch (avctx->bits_per_coded_sample) {
         case 16: avctx->pix_fmt = AV_PIX_FMT_RGB555LE; break;
         case 24: avctx->pix_fmt = AV_PIX_FMT_BGR24; break;
-        case 32: avctx->pix_fmt = AV_PIX_FMT_BGRA; break;
+        case 32: avctx->pix_fmt = AV_PIX_FMT_BGR0; break;
         default:
             av_log(avctx, AV_LOG_ERROR,
                    "CamStudio codec error: invalid depth %i bpp\n",
@@ -166,5 +166,5 @@ AVCodec ff_cscd_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/cyuv.c b/libavcodec/cyuv.c
index 6e8e461a..c7ec0085 100644
--- a/libavcodec/cyuv.c
+++ b/libavcodec/cyuv.c
@@ -184,7 +184,7 @@ AVCodec ff_aura_decoder = {
     .priv_data_size = sizeof(CyuvDecodeContext),
     .init           = cyuv_decode_init,
     .decode         = cyuv_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 
@@ -197,6 +197,6 @@ AVCodec ff_cyuv_decoder = {
     .priv_data_size = sizeof(CyuvDecodeContext),
     .init           = cyuv_decode_init,
     .decode         = cyuv_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
diff --git a/libavcodec/aac_tablegen.h b/libavcodec/d3d11va.c
similarity index 57%
rename from libavcodec/aac_tablegen.h
rename to libavcodec/d3d11va.c
index bf71e59c..9967f322 100644
--- a/libavcodec/aac_tablegen.h
+++ b/libavcodec/d3d11va.c
@@ -1,7 +1,7 @@
 /*
- * Header file for hardcoded AAC tables
+ * Direct3D11 HW acceleration
  *
- * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ * copyright (c) 2015 Steve Lhomme
  *
  * This file is part of FFmpeg.
  *
@@ -20,23 +20,29 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_AAC_TABLEGEN_H
-#define AVCODEC_AAC_TABLEGEN_H
+#include <stddef.h>
 
-#include "aac_tablegen_decl.h"
+#include "config.h"
 
-#if CONFIG_HARDCODED_TABLES
-#include "libavcodec/aac_tables.h"
-#else
-#include "libavutil/mathematics.h"
-float ff_aac_pow2sf_tab[428];
+#if CONFIG_D3D11VA
+#include "libavutil/error.h"
+#include "libavutil/mem.h"
+
+#include "d3d11va.h"
 
-av_cold void ff_aac_tableinit(void)
+AVD3D11VAContext *av_d3d11va_alloc_context(void)
 {
-    int i;
-    for (i = 0; i < 428; i++)
-        ff_aac_pow2sf_tab[i] = pow(2, (i - POW_SF2_ZERO) / 4.0);
+    AVD3D11VAContext* res = av_mallocz(sizeof(AVD3D11VAContext));
+    if (!res)
+        return NULL;
+    res->context_mutex = INVALID_HANDLE_VALUE;
+    return res;
 }
-#endif /* CONFIG_HARDCODED_TABLES */
+#else
+struct AVD3D11VAContext *av_d3d11va_alloc_context(void);
 
-#endif /* AVCODEC_AAC_TABLEGEN_H */
+struct AVD3D11VAContext *av_d3d11va_alloc_context(void)
+{
+    return NULL;
+}
+#endif /* CONFIG_D3D11VA */
diff --git a/libavcodec/d3d11va.h b/libavcodec/d3d11va.h
index d51e2ff8..6816b6c1 100644
--- a/libavcodec/d3d11va.h
+++ b/libavcodec/d3d11va.h
@@ -30,9 +30,9 @@
  * Public libavcodec D3D11VA header.
  */
 
-#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0600
+#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0602
 #undef _WIN32_WINNT
-#define _WIN32_WINNT 0x0600
+#define _WIN32_WINNT 0x0602
 #endif
 
 #include <stdint.h>
@@ -53,8 +53,10 @@
  * to the Direct3D11 FFmpeg HWAccel implementation.
  *
  * The application must make it available as AVCodecContext.hwaccel_context.
+ *
+ * Use av_d3d11va_alloc_context() exclusively to allocate an AVD3D11VAContext.
  */
-struct AVD3D11VAContext {
+typedef struct AVD3D11VAContext {
     /**
      * D3D11 decoder object
      */
@@ -89,7 +91,19 @@ struct AVD3D11VAContext {
      * Private to the FFmpeg AVHWAccel implementation
      */
     unsigned report_id;
-};
+
+    /**
+      * Mutex to access video_context
+      */
+    HANDLE  context_mutex;
+} AVD3D11VAContext;
+
+/**
+ * Allocate an AVD3D11VAContext.
+ *
+ * @return Newly-allocated AVD3D11VAContext or NULL on failure.
+ */
+AVD3D11VAContext *av_d3d11va_alloc_context(void);
 
 /**
  * @}
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index 8dd04308..714509b2 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -41,8 +41,6 @@ int avpriv_dca_convert_bitstream(const uint8_t *src, int src_size, uint8_t *dst,
 {
     uint32_t mrk;
     int i, tmp;
-    const uint16_t *ssrc = (const uint16_t *) src;
-    uint16_t *sdst = (uint16_t *) dst;
     PutBitContext pb;
 
     if ((unsigned) src_size > (unsigned) max_size)
@@ -54,8 +52,11 @@ int avpriv_dca_convert_bitstream(const uint8_t *src, int src_size, uint8_t *dst,
         memcpy(dst, src, src_size);
         return src_size;
     case DCA_SYNCWORD_CORE_LE:
-        for (i = 0; i < (src_size + 1) >> 1; i++)
-            *sdst++ = av_bswap16(*ssrc++);
+        for (i = 0; i < (src_size + 1) >> 1; i++) {
+            AV_WB16(dst, AV_RL16(src));
+            src += 2;
+            dst += 2;
+        }
         return src_size;
     case DCA_SYNCWORD_CORE_14B_BE:
     case DCA_SYNCWORD_CORE_14B_LE:
diff --git a/libavcodec/dca.h b/libavcodec/dca.h
index 897ebf4b..ccb02af9 100644
--- a/libavcodec/dca.h
+++ b/libavcodec/dca.h
@@ -4,6 +4,7 @@
  * Copyright (C) 2004 Benjamin Zores
  * Copyright (C) 2006 Benjamin Larsson
  * Copyright (C) 2007 Konstantin Shishkov
+ * Copyright (C) 2016 foo86
  *
  * This file is part of FFmpeg.
  *
@@ -27,264 +28,101 @@
 
 #include <stdint.h>
 
-#include "libavutil/float_dsp.h"
 #include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+enum DCASpeaker {
+    DCA_SPEAKER_C,    DCA_SPEAKER_L,    DCA_SPEAKER_R,    DCA_SPEAKER_Ls,
+    DCA_SPEAKER_Rs,   DCA_SPEAKER_LFE1, DCA_SPEAKER_Cs,   DCA_SPEAKER_Lsr,
+    DCA_SPEAKER_Rsr,  DCA_SPEAKER_Lss,  DCA_SPEAKER_Rss,  DCA_SPEAKER_Lc,
+    DCA_SPEAKER_Rc,   DCA_SPEAKER_Lh,   DCA_SPEAKER_Ch,   DCA_SPEAKER_Rh,
+    DCA_SPEAKER_LFE2, DCA_SPEAKER_Lw,   DCA_SPEAKER_Rw,   DCA_SPEAKER_Oh,
+    DCA_SPEAKER_Lhs,  DCA_SPEAKER_Rhs,  DCA_SPEAKER_Chr,  DCA_SPEAKER_Lhr,
+    DCA_SPEAKER_Rhr,  DCA_SPEAKER_Cl,   DCA_SPEAKER_Ll,   DCA_SPEAKER_Rl,
+    DCA_SPEAKER_RSV1, DCA_SPEAKER_RSV2, DCA_SPEAKER_RSV3, DCA_SPEAKER_RSV4,
+
+    DCA_SPEAKER_COUNT
+};
 
-#include "avcodec.h"
-#include "dcadsp.h"
-#include "fmtconvert.h"
-#include "get_bits.h"
-
-#define DCA_PRIM_CHANNELS_MAX  (7)
-#define DCA_ABITS_MAX         (32)      /* Should be 28 */
-#define DCA_SUBSUBFRAMES_MAX   (4)
-#define DCA_SUBFRAMES_MAX     (16)
-#define DCA_BLOCKS_MAX        (16)
-#define DCA_LFE_MAX            (3)
-#define DCA_CHSETS_MAX         (4)
-#define DCA_CHSET_CHANS_MAX    (8)
-
-#define DCA_PRIM_CHANNELS_MAX  (7)
-#define DCA_ABITS_MAX         (32)      /* Should be 28 */
-#define DCA_SUBSUBFRAMES_MAX   (4)
-#define DCA_SUBFRAMES_MAX     (16)
-#define DCA_BLOCKS_MAX        (16)
-#define DCA_LFE_MAX            (3)
-#define DCA_XLL_FBANDS_MAX     (4)
-#define DCA_XLL_SEGMENTS_MAX  (16)
-#define DCA_XLL_CHSETS_MAX    (16)
-#define DCA_XLL_CHANNELS_MAX  (16)
-#define DCA_XLL_AORDER_MAX    (15)
-
-/* Arbitrary limit; not sure what the maximum really is, but much larger. */
-#define DCA_XLL_DMIX_NCOEFFS_MAX (18)
-
-#define DCA_MAX_FRAME_SIZE       16384
-#define DCA_MAX_EXSS_HEADER_SIZE  4096
+enum DCASpeakerMask {
+    DCA_SPEAKER_MASK_C     = 0x00000001,
+    DCA_SPEAKER_MASK_L     = 0x00000002,
+    DCA_SPEAKER_MASK_R     = 0x00000004,
+    DCA_SPEAKER_MASK_Ls    = 0x00000008,
+    DCA_SPEAKER_MASK_Rs    = 0x00000010,
+    DCA_SPEAKER_MASK_LFE1  = 0x00000020,
+    DCA_SPEAKER_MASK_Cs    = 0x00000040,
+    DCA_SPEAKER_MASK_Lsr   = 0x00000080,
+    DCA_SPEAKER_MASK_Rsr   = 0x00000100,
+    DCA_SPEAKER_MASK_Lss   = 0x00000200,
+    DCA_SPEAKER_MASK_Rss   = 0x00000400,
+    DCA_SPEAKER_MASK_Lc    = 0x00000800,
+    DCA_SPEAKER_MASK_Rc    = 0x00001000,
+    DCA_SPEAKER_MASK_Lh    = 0x00002000,
+    DCA_SPEAKER_MASK_Ch    = 0x00004000,
+    DCA_SPEAKER_MASK_Rh    = 0x00008000,
+    DCA_SPEAKER_MASK_LFE2  = 0x00010000,
+    DCA_SPEAKER_MASK_Lw    = 0x00020000,
+    DCA_SPEAKER_MASK_Rw    = 0x00040000,
+    DCA_SPEAKER_MASK_Oh    = 0x00080000,
+    DCA_SPEAKER_MASK_Lhs   = 0x00100000,
+    DCA_SPEAKER_MASK_Rhs   = 0x00200000,
+    DCA_SPEAKER_MASK_Chr   = 0x00400000,
+    DCA_SPEAKER_MASK_Lhr   = 0x00800000,
+    DCA_SPEAKER_MASK_Rhr   = 0x01000000,
+    DCA_SPEAKER_MASK_Cl    = 0x02000000,
+    DCA_SPEAKER_MASK_Ll    = 0x04000000,
+    DCA_SPEAKER_MASK_Rl    = 0x08000000,
+};
 
-#define DCA_BUFFER_PADDING_SIZE   1024
+#define DCA_SPEAKER_LAYOUT_MONO         (DCA_SPEAKER_MASK_C)
+#define DCA_SPEAKER_LAYOUT_STEREO       (DCA_SPEAKER_MASK_L | DCA_SPEAKER_MASK_R)
+#define DCA_SPEAKER_LAYOUT_2POINT1      (DCA_SPEAKER_LAYOUT_STEREO | DCA_SPEAKER_MASK_LFE1)
+#define DCA_SPEAKER_LAYOUT_3_0          (DCA_SPEAKER_LAYOUT_STEREO | DCA_SPEAKER_MASK_C)
+#define DCA_SPEAKER_LAYOUT_2_1          (DCA_SPEAKER_LAYOUT_STEREO | DCA_SPEAKER_MASK_Cs)
+#define DCA_SPEAKER_LAYOUT_3_1          (DCA_SPEAKER_LAYOUT_3_0 | DCA_SPEAKER_MASK_Cs)
+#define DCA_SPEAKER_LAYOUT_2_2          (DCA_SPEAKER_LAYOUT_STEREO | DCA_SPEAKER_MASK_Ls | DCA_SPEAKER_MASK_Rs)
+#define DCA_SPEAKER_LAYOUT_5POINT0      (DCA_SPEAKER_LAYOUT_3_0 | DCA_SPEAKER_MASK_Ls | DCA_SPEAKER_MASK_Rs)
+#define DCA_SPEAKER_LAYOUT_5POINT1      (DCA_SPEAKER_LAYOUT_5POINT0 | DCA_SPEAKER_MASK_LFE1)
+#define DCA_SPEAKER_LAYOUT_7POINT0_WIDE (DCA_SPEAKER_LAYOUT_5POINT0 | DCA_SPEAKER_MASK_Lw | DCA_SPEAKER_MASK_Rw)
+#define DCA_SPEAKER_LAYOUT_7POINT1_WIDE (DCA_SPEAKER_LAYOUT_7POINT0_WIDE | DCA_SPEAKER_MASK_LFE1)
+
+#define DCA_HAS_STEREO(mask) \
+    ((mask & DCA_SPEAKER_LAYOUT_STEREO) == DCA_SPEAKER_LAYOUT_STEREO)
+
+enum DCARepresentationType {
+    DCA_REPR_TYPE_LtRt = 2,
+    DCA_REPR_TYPE_LhRh = 3
+};
 
 enum DCAExtensionMask {
-    DCA_EXT_CORE       = 0x001, ///< core in core substream
-    DCA_EXT_XXCH       = 0x002, ///< XXCh channels extension in core substream
-    DCA_EXT_X96        = 0x004, ///< 96/24 extension in core substream
-    DCA_EXT_XCH        = 0x008, ///< XCh channel extension in core substream
-    DCA_EXT_EXSS_CORE  = 0x010, ///< core in ExSS (extension substream)
-    DCA_EXT_EXSS_XBR   = 0x020, ///< extended bitrate extension in ExSS
-    DCA_EXT_EXSS_XXCH  = 0x040, ///< XXCh channels extension in ExSS
-    DCA_EXT_EXSS_X96   = 0x080, ///< 96/24 extension in ExSS
-    DCA_EXT_EXSS_LBR   = 0x100, ///< low bitrate component in ExSS
-    DCA_EXT_EXSS_XLL   = 0x200, ///< lossless extension in ExSS
+    DCA_CSS_CORE   = 0x001,
+    DCA_CSS_XXCH   = 0x002,
+    DCA_CSS_X96    = 0x004,
+    DCA_CSS_XCH    = 0x008,
+    DCA_CSS_MASK   = 0x00f,
+    DCA_EXSS_CORE  = 0x010,
+    DCA_EXSS_XBR   = 0x020,
+    DCA_EXSS_XXCH  = 0x040,
+    DCA_EXSS_X96   = 0x080,
+    DCA_EXSS_LBR   = 0x100,
+    DCA_EXSS_XLL   = 0x200,
+    DCA_EXSS_RSV1  = 0x400,
+    DCA_EXSS_RSV2  = 0x800,
+    DCA_EXSS_MASK  = 0xff0,
 };
 
-typedef struct XllChSetSubHeader {
-    int channels;               ///< number of channels in channel set, at most 16
-    int residual_encode;        ///< residual channel encoding
-    int bit_resolution;         ///< input sample bit-width
-    int bit_width;              ///< original input sample bit-width
-    int sampling_frequency;     ///< sampling frequency
-    int samp_freq_interp;       ///< sampling frequency interpolation multiplier
-    int replacement_set;        ///< replacement channel set group
-    int active_replace_set;     ///< current channel set is active channel set
-    int primary_ch_set;
-    int downmix_coeff_code_embedded;
-    int downmix_embedded;
-    int downmix_type;
-    int hier_chset;             ///< hierarchical channel set
-    int downmix_ncoeffs;
-    int downmix_coeffs[DCA_XLL_DMIX_NCOEFFS_MAX];
-    int ch_mask_enabled;
-    int ch_mask;
-    int mapping_coeffs_present;
-    int num_freq_bands;
-
-    /* m_nOrigChanOrder */
-    uint8_t orig_chan_order[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX];
-    uint8_t orig_chan_order_inv[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX];
-    /* Coefficients for channel pairs (at most 8), m_anPWChPairsCoeffs */
-    int8_t pw_ch_pairs_coeffs[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX/2];
-    /* m_nCurrHighestLPCOrder */
-    uint8_t adapt_order_max[DCA_XLL_FBANDS_MAX];
-    /* m_pnAdaptPredOrder */
-    uint8_t adapt_order[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX];
-    /* m_pnFixedPredOrder */
-    uint8_t fixed_order[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX];
-    /* m_pnLPCReflCoeffsQInd, unsigned version */
-    uint8_t lpc_refl_coeffs_q_ind[DCA_XLL_FBANDS_MAX]
-                                 [DCA_XLL_CHANNELS_MAX][DCA_XLL_AORDER_MAX];
-
-    int lsb_fsize[DCA_XLL_FBANDS_MAX];
-    int8_t scalable_lsbs[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX];
-    int8_t bit_width_adj_per_ch[DCA_XLL_FBANDS_MAX][DCA_XLL_CHANNELS_MAX];
-} XllChSetSubHeader;
-
-typedef struct XllNavi {
-    GetBitContext gb;  // Context for parsing the data segments
-    unsigned band_size[DCA_XLL_FBANDS_MAX];
-    unsigned segment_size[DCA_XLL_FBANDS_MAX][DCA_XLL_SEGMENTS_MAX];
-    unsigned chset_size[DCA_XLL_FBANDS_MAX][DCA_XLL_SEGMENTS_MAX][DCA_XLL_CHSETS_MAX];
-} XllNavi;
-
-typedef struct QMF64_table {
-    float dct4_coeff[32][32];
-    float dct2_coeff[32][32];
-    float rcos[32];
-    float rsin[32];
-} QMF64_table;
-
-typedef struct DCAContext {
-    const AVClass *class;       ///< class for AVOptions
-    AVCodecContext *avctx;
-    /* Frame header */
-    int frame_type;             ///< type of the current frame
-    int samples_deficit;        ///< deficit sample count
-    int crc_present;            ///< crc is present in the bitstream
-    int sample_blocks;          ///< number of PCM sample blocks
-    int frame_size;             ///< primary frame byte size
-    int amode;                  ///< audio channels arrangement
-    int sample_rate;            ///< audio sampling rate
-    int bit_rate;               ///< transmission bit rate
-    int bit_rate_index;         ///< transmission bit rate index
-
-    int dynrange;               ///< embedded dynamic range flag
-    int timestamp;              ///< embedded time stamp flag
-    int aux_data;               ///< auxiliary data flag
-    int hdcd;                   ///< source material is mastered in HDCD
-    int ext_descr;              ///< extension audio descriptor flag
-    int ext_coding;             ///< extended coding flag
-    int aspf;                   ///< audio sync word insertion flag
-    int lfe;                    ///< low frequency effects flag
-    int predictor_history;      ///< predictor history flag
-    int header_crc;             ///< header crc check bytes
-    int multirate_inter;        ///< multirate interpolator switch
-    int version;                ///< encoder software revision
-    int copy_history;           ///< copy history
-    int source_pcm_res;         ///< source pcm resolution
-    int front_sum;              ///< front sum/difference flag
-    int surround_sum;           ///< surround sum/difference flag
-    int dialog_norm;            ///< dialog normalisation parameter
-
-    /* Primary audio coding header */
-    int subframes;              ///< number of subframes
-    int total_channels;         ///< number of channels including extensions
-    int prim_channels;          ///< number of primary audio channels
-    int subband_activity[DCA_PRIM_CHANNELS_MAX];    ///< subband activity count
-    int vq_start_subband[DCA_PRIM_CHANNELS_MAX];    ///< high frequency vq start subband
-    int joint_intensity[DCA_PRIM_CHANNELS_MAX];     ///< joint intensity coding index
-    int transient_huffman[DCA_PRIM_CHANNELS_MAX];   ///< transient mode code book
-    int scalefactor_huffman[DCA_PRIM_CHANNELS_MAX]; ///< scale factor code book
-    int bitalloc_huffman[DCA_PRIM_CHANNELS_MAX];    ///< bit allocation quantizer select
-    int quant_index_huffman[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX]; ///< quantization index codebook select
-    float scalefactor_adj[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX];   ///< scale factor adjustment
-
-    /* Primary audio coding side information */
-    int subsubframes[DCA_SUBFRAMES_MAX];                         ///< number of subsubframes
-    int partial_samples[DCA_SUBFRAMES_MAX];                      ///< partial subsubframe samples count
-    int prediction_mode[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS];    ///< prediction mode (ADPCM used or not)
-    int prediction_vq[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS];      ///< prediction VQ coefs
-    int bitalloc[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS];           ///< bit allocation index
-    int transition_mode[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS];    ///< transition mode (transients)
-    int32_t scale_factor[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][2];///< scale factors (2 if transient)
-    int joint_huff[DCA_PRIM_CHANNELS_MAX];                       ///< joint subband scale factors codebook
-    int joint_scale_factor[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS]; ///< joint subband scale factors
-    float downmix_coef[DCA_PRIM_CHANNELS_MAX + 1][2];            ///< stereo downmix coefficients
-    int dynrange_coef;                                           ///< dynamic range coefficient
-
-    /* Core substream's embedded downmix coefficients (cf. ETSI TS 102 114 V1.4.1)
-     * Input:  primary audio channels (incl. LFE if present)
-     * Output: downmix audio channels (up to 4, no LFE) */
-    uint8_t  core_downmix;                                       ///< embedded downmix coefficients available
-    uint8_t  core_downmix_amode;                                 ///< audio channel arrangement of embedded downmix
-    uint16_t core_downmix_codes[DCA_PRIM_CHANNELS_MAX + 1][4];   ///< embedded downmix coefficients (9-bit codes)
-
-    int32_t  high_freq_vq[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS];  ///< VQ encoded high frequency subbands
-
-    float lfe_data[2 * DCA_LFE_MAX * (DCA_BLOCKS_MAX + 4)];      ///< Low frequency effect data
-    int lfe_scale_factor;
+enum DCADownMixType {
+    DCA_DMIX_TYPE_1_0,
+    DCA_DMIX_TYPE_LoRo,
+    DCA_DMIX_TYPE_LtRt,
+    DCA_DMIX_TYPE_3_0,
+    DCA_DMIX_TYPE_2_1,
+    DCA_DMIX_TYPE_2_2,
+    DCA_DMIX_TYPE_3_1,
 
-    /* Subband samples history (for ADPCM) */
-    DECLARE_ALIGNED(16, float, subband_samples_hist)[DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][4];
-    /* Half size is sufficient for core decoding, but for 96 kHz data
-     * we need QMF with 64 subbands and 1024 samples. */
-    DECLARE_ALIGNED(32, float, subband_fir_hist)[DCA_PRIM_CHANNELS_MAX][1024];
-    DECLARE_ALIGNED(32, float, subband_fir_noidea)[DCA_PRIM_CHANNELS_MAX][64];
-    int hist_index[DCA_PRIM_CHANNELS_MAX];
-    DECLARE_ALIGNED(32, float, raXin)[32];
-
-    int output;                 ///< type of output
-
-    DECLARE_ALIGNED(32, float, subband_samples)[DCA_BLOCKS_MAX][DCA_PRIM_CHANNELS_MAX][DCA_SUBBANDS][8];
-    float *samples_chanptr[DCA_PRIM_CHANNELS_MAX + 1];
-    float *extra_channels[DCA_PRIM_CHANNELS_MAX + 1];
-    uint8_t *extra_channels_buffer;
-    unsigned int extra_channels_buffer_size;
-
-    uint8_t dca_buffer[DCA_MAX_FRAME_SIZE + DCA_MAX_EXSS_HEADER_SIZE + DCA_BUFFER_PADDING_SIZE];
-    int dca_buffer_size;        ///< how much data is in the dca_buffer
-
-    const int8_t *channel_order_tab;  ///< channel reordering table, lfe and non lfe
-    GetBitContext gb;
-    /* Current position in DCA frame */
-    int current_subframe;
-    int current_subsubframe;
-
-    int core_ext_mask;          ///< present extensions in the core substream
-    int exss_ext_mask;          ///< Non-core extensions
-
-    /* XCh extension information */
-    int xch_present;            ///< XCh extension present and valid
-    int xch_base_channel;       ///< index of first (only) channel containing XCH data
-    int xch_disable;            ///< whether the XCh extension should be decoded or not
-
-    /* XXCH extension information */
-    int xxch_chset;
-    int xxch_nbits_spk_mask;
-    uint32_t xxch_core_spkmask;
-    uint32_t xxch_spk_masks[4]; /* speaker masks, last element is core mask */
-    int xxch_chset_nch[4];
-    float xxch_dmix_sf[DCA_CHSETS_MAX];
-
-    uint32_t xxch_dmix_embedded;  /* lower layer has mix pre-embedded, per chset */
-    float xxch_dmix_coeff[DCA_PRIM_CHANNELS_MAX][32]; /* worst case sizing */
-
-    int8_t xxch_order_tab[32];
-    int8_t lfe_index;
-
-    /* XLL extension information */
-    int xll_disable;
-    int xll_nch_sets;           ///< number of channel sets per frame
-    int xll_channels;           ///< total number of channels (in all channel sets)
-    int xll_residual_channels;  ///< number of residual channels
-    int xll_segments;           ///< number of segments per frame
-    int xll_log_smpl_in_seg;    ///< supposedly this is "nBits4SamplLoci"
-    int xll_smpl_in_seg;        ///< samples in segment per one frequency band for the first channel set
-    int xll_bits4seg_size;      ///< number of bits used to read segment size
-    int xll_banddata_crc;       ///< presence of CRC16 within each frequency band
-    int xll_scalable_lsb;
-    int xll_bits4ch_mask;       ///< channel position mask
-    int xll_fixed_lsb_width;
-    XllChSetSubHeader xll_chsets[DCA_XLL_CHSETS_MAX];
-    XllNavi xll_navi;
-    int *xll_sample_buf;
-    unsigned int xll_sample_buf_size;
-
-    /* ExSS header parser */
-    int static_fields;          ///< static fields present
-    int mix_metadata;           ///< mixing metadata present
-    int num_mix_configs;        ///< number of mix out configurations
-    int mix_config_num_ch[4];   ///< number of channels in each mix out configuration
-
-    int profile;
-    int one2one_map_chtospkr;
-
-    int debug_flag;             ///< used for suppressing repeated error messages output
-    AVFloatDSPContext *fdsp;
-    FFTContext imdct;
-    SynthFilterContext synth;
-    DCADSPContext dcadsp;
-    QMF64_table *qmf64_table;
-    FmtConvertContext fmt_conv;
-} DCAContext;
+    DCA_DMIX_TYPE_COUNT
+};
 
 extern av_export const uint32_t avpriv_dca_sample_rates[16];
 
@@ -292,15 +130,6 @@ extern av_export const uint32_t avpriv_dca_sample_rates[16];
  * Convert bitstream to one representation based on sync marker
  */
 int avpriv_dca_convert_bitstream(const uint8_t *src, int src_size, uint8_t *dst,
-                             int max_size);
-
-int ff_dca_xbr_parse_frame(DCAContext *s);
-int ff_dca_xxch_decode_frame(DCAContext *s);
-
-void ff_dca_exss_parse_header(DCAContext *s);
-
-int ff_dca_xll_decode_header(DCAContext *s);
-int ff_dca_xll_decode_navi(DCAContext *s, int asset_end);
-int ff_dca_xll_decode_audio(DCAContext *s, AVFrame *frame);
+                                 int max_size);
 
 #endif /* AVCODEC_DCA_H */
diff --git a/libavcodec/dca_core.c b/libavcodec/dca_core.c
new file mode 100644
index 00000000..d9f1a4ca
--- /dev/null
+++ b/libavcodec/dca_core.c
@@ -0,0 +1,2612 @@
+/*
+ * Copyright (C) 2016 foo86
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dcadec.h"
+#include "dcadata.h"
+#include "dcahuff.h"
+#include "dcamath.h"
+#include "dca_syncwords.h"
+
+#if ARCH_ARM
+#include "arm/dca.h"
+#endif
+
+enum HeaderType {
+    HEADER_CORE,
+    HEADER_XCH,
+    HEADER_XXCH
+};
+
+enum AudioMode {
+    AMODE_MONO,             // Mode 0: A (mono)
+    AMODE_MONO_DUAL,        // Mode 1: A + B (dual mono)
+    AMODE_STEREO,           // Mode 2: L + R (stereo)
+    AMODE_STEREO_SUMDIFF,   // Mode 3: (L+R) + (L-R) (sum-diff)
+    AMODE_STEREO_TOTAL,     // Mode 4: LT + RT (left and right total)
+    AMODE_3F,               // Mode 5: C + L + R
+    AMODE_2F1R,             // Mode 6: L + R + S
+    AMODE_3F1R,             // Mode 7: C + L + R + S
+    AMODE_2F2R,             // Mode 8: L + R + SL + SR
+    AMODE_3F2R,             // Mode 9: C + L + R + SL + SR
+
+    AMODE_COUNT
+};
+
+enum ExtAudioType {
+    EXT_AUDIO_XCH   = 0,
+    EXT_AUDIO_X96   = 2,
+    EXT_AUDIO_XXCH  = 6
+};
+
+enum LFEFlag {
+    LFE_FLAG_NONE,
+    LFE_FLAG_128,
+    LFE_FLAG_64,
+    LFE_FLAG_INVALID
+};
+
+static const int8_t prm_ch_to_spkr_map[AMODE_COUNT][5] = {
+    { DCA_SPEAKER_C,            -1,             -1,             -1,             -1 },
+    { DCA_SPEAKER_L, DCA_SPEAKER_R,             -1,             -1,             -1 },
+    { DCA_SPEAKER_L, DCA_SPEAKER_R,             -1,             -1,             -1 },
+    { DCA_SPEAKER_L, DCA_SPEAKER_R,             -1,             -1,             -1 },
+    { DCA_SPEAKER_L, DCA_SPEAKER_R,             -1,             -1,             -1 },
+    { DCA_SPEAKER_C, DCA_SPEAKER_L, DCA_SPEAKER_R ,             -1,             -1 },
+    { DCA_SPEAKER_L, DCA_SPEAKER_R, DCA_SPEAKER_Cs,             -1,             -1 },
+    { DCA_SPEAKER_C, DCA_SPEAKER_L, DCA_SPEAKER_R , DCA_SPEAKER_Cs,             -1 },
+    { DCA_SPEAKER_L, DCA_SPEAKER_R, DCA_SPEAKER_Ls, DCA_SPEAKER_Rs,             -1 },
+    { DCA_SPEAKER_C, DCA_SPEAKER_L, DCA_SPEAKER_R,  DCA_SPEAKER_Ls, DCA_SPEAKER_Rs }
+};
+
+static const uint8_t audio_mode_ch_mask[AMODE_COUNT] = {
+    DCA_SPEAKER_LAYOUT_MONO,
+    DCA_SPEAKER_LAYOUT_STEREO,
+    DCA_SPEAKER_LAYOUT_STEREO,
+    DCA_SPEAKER_LAYOUT_STEREO,
+    DCA_SPEAKER_LAYOUT_STEREO,
+    DCA_SPEAKER_LAYOUT_3_0,
+    DCA_SPEAKER_LAYOUT_2_1,
+    DCA_SPEAKER_LAYOUT_3_1,
+    DCA_SPEAKER_LAYOUT_2_2,
+    DCA_SPEAKER_LAYOUT_5POINT0
+};
+
+static const uint8_t block_code_nbits[7] = {
+    7, 10, 12, 13, 15, 17, 19
+};
+
+static const uint8_t quant_index_sel_nbits[DCA_CODE_BOOKS] = {
+    1, 2, 2, 2, 2, 3, 3, 3, 3, 3
+};
+
+static const uint8_t quant_index_group_size[DCA_CODE_BOOKS] = {
+    1, 3, 3, 3, 3, 7, 7, 7, 7, 7
+};
+
+typedef struct DCAVLC {
+    int offset;         ///< Code values offset
+    int max_depth;      ///< Parameter for get_vlc2()
+    VLC vlc[7];         ///< Actual codes
+} DCAVLC;
+
+static DCAVLC   vlc_bit_allocation;
+static DCAVLC   vlc_transition_mode;
+static DCAVLC   vlc_scale_factor;
+static DCAVLC   vlc_quant_index[DCA_CODE_BOOKS];
+
+static av_cold void dca_init_vlcs(void)
+{
+    static VLC_TYPE dca_table[23622][2];
+    static int vlcs_initialized = 0;
+    int i, j, k;
+
+    if (vlcs_initialized)
+        return;
+
+#define DCA_INIT_VLC(vlc, a, b, c, d)                                      \
+    do {                                                                   \
+        vlc.table           = &dca_table[ff_dca_vlc_offs[k]];              \
+        vlc.table_allocated = ff_dca_vlc_offs[k + 1] - ff_dca_vlc_offs[k]; \
+        init_vlc(&vlc, a, b, c, 1, 1, d, 2, 2, INIT_VLC_USE_NEW_STATIC);   \
+    } while (0)
+
+    vlc_bit_allocation.offset    = 1;
+    vlc_bit_allocation.max_depth = 2;
+    for (i = 0, k = 0; i < 5; i++, k++)
+        DCA_INIT_VLC(vlc_bit_allocation.vlc[i], bitalloc_12_vlc_bits[i], 12,
+                     bitalloc_12_bits[i], bitalloc_12_codes[i]);
+
+    vlc_scale_factor.offset    = -64;
+    vlc_scale_factor.max_depth = 2;
+    for (i = 0; i < 5; i++, k++)
+        DCA_INIT_VLC(vlc_scale_factor.vlc[i], SCALES_VLC_BITS, 129,
+                     scales_bits[i], scales_codes[i]);
+
+    vlc_transition_mode.offset    = 0;
+    vlc_transition_mode.max_depth = 1;
+    for (i = 0; i < 4; i++, k++)
+        DCA_INIT_VLC(vlc_transition_mode.vlc[i], tmode_vlc_bits[i], 4,
+                     tmode_bits[i], tmode_codes[i]);
+
+    for (i = 0; i < DCA_CODE_BOOKS; i++) {
+        vlc_quant_index[i].offset    = bitalloc_offsets[i];
+        vlc_quant_index[i].max_depth = 1 + (i > 4);
+        for (j = 0; j < quant_index_group_size[i]; j++, k++)
+            DCA_INIT_VLC(vlc_quant_index[i].vlc[j], bitalloc_maxbits[i][j],
+                         bitalloc_sizes[i], bitalloc_bits[i][j], bitalloc_codes[i][j]);
+    }
+
+    vlcs_initialized = 1;
+}
+
+static int dca_get_vlc(GetBitContext *s, DCAVLC *v, int i)
+{
+    return get_vlc2(s, v->vlc[i].table, v->vlc[i].bits, v->max_depth) + v->offset;
+}
+
+static void get_array(GetBitContext *s, int32_t *array, int size, int n)
+{
+    int i;
+
+    for (i = 0; i < size; i++)
+        array[i] = get_sbits(s, n);
+}
+
+// 5.3.1 - Bit stream header
+static int parse_frame_header(DCACoreDecoder *s)
+{
+    int normal_frame, pcmr_index;
+
+    // Frame type
+    normal_frame = get_bits1(&s->gb);
+
+    // Deficit sample count
+    if (get_bits(&s->gb, 5) != DCA_PCMBLOCK_SAMPLES - 1) {
+        av_log(s->avctx, AV_LOG_ERROR, "Deficit samples are not supported\n");
+        return normal_frame ? AVERROR_INVALIDDATA : AVERROR_PATCHWELCOME;
+    }
+
+    // CRC present flag
+    s->crc_present = get_bits1(&s->gb);
+
+    // Number of PCM sample blocks
+    s->npcmblocks = get_bits(&s->gb, 7) + 1;
+    if (s->npcmblocks & (DCA_SUBBAND_SAMPLES - 1)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Unsupported number of PCM sample blocks (%d)\n", s->npcmblocks);
+        return (s->npcmblocks < 6 || normal_frame) ? AVERROR_INVALIDDATA : AVERROR_PATCHWELCOME;
+    }
+
+    // Primary frame byte size
+    s->frame_size = get_bits(&s->gb, 14) + 1;
+    if (s->frame_size < 96) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid core frame size (%d bytes)\n", s->frame_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Audio channel arrangement
+    s->audio_mode = get_bits(&s->gb, 6);
+    if (s->audio_mode >= AMODE_COUNT) {
+        av_log(s->avctx, AV_LOG_ERROR, "Unsupported audio channel arrangement (%d)\n", s->audio_mode);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    // Core audio sampling frequency
+    s->sample_rate = avpriv_dca_sample_rates[get_bits(&s->gb, 4)];
+    if (!s->sample_rate) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid core audio sampling frequency\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Transmission bit rate
+    s->bit_rate = ff_dca_bit_rates[get_bits(&s->gb, 5)];
+
+    // Reserved field
+    skip_bits1(&s->gb);
+
+    // Embedded dynamic range flag
+    s->drc_present = get_bits1(&s->gb);
+
+    // Embedded time stamp flag
+    s->ts_present = get_bits1(&s->gb);
+
+    // Auxiliary data flag
+    s->aux_present = get_bits1(&s->gb);
+
+    // HDCD mastering flag
+    skip_bits1(&s->gb);
+
+    // Extension audio descriptor flag
+    s->ext_audio_type = get_bits(&s->gb, 3);
+
+    // Extended coding flag
+    s->ext_audio_present = get_bits1(&s->gb);
+
+    // Audio sync word insertion flag
+    s->sync_ssf = get_bits1(&s->gb);
+
+    // Low frequency effects flag
+    s->lfe_present = get_bits(&s->gb, 2);
+    if (s->lfe_present == LFE_FLAG_INVALID) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid low frequency effects flag\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Predictor history flag switch
+    s->predictor_history = get_bits1(&s->gb);
+
+    // Header CRC check bytes
+    if (s->crc_present)
+        skip_bits(&s->gb, 16);
+
+    // Multirate interpolator switch
+    s->filter_perfect = get_bits1(&s->gb);
+
+    // Encoder software revision
+    skip_bits(&s->gb, 4);
+
+    // Copy history
+    skip_bits(&s->gb, 2);
+
+    // Source PCM resolution
+    s->source_pcm_res = ff_dca_bits_per_sample[pcmr_index = get_bits(&s->gb, 3)];
+    if (!s->source_pcm_res) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid source PCM resolution\n");
+        return AVERROR_INVALIDDATA;
+    }
+    s->es_format = pcmr_index & 1;
+
+    // Front sum/difference flag
+    s->sumdiff_front = get_bits1(&s->gb);
+
+    // Surround sum/difference flag
+    s->sumdiff_surround = get_bits1(&s->gb);
+
+    // Dialog normalization / unspecified
+    skip_bits(&s->gb, 4);
+
+    return 0;
+}
+
+// 5.3.2 - Primary audio coding header
+static int parse_coding_header(DCACoreDecoder *s, enum HeaderType header, int xch_base)
+{
+    int n, ch, nchannels, header_size = 0, header_pos = get_bits_count(&s->gb);
+    unsigned int mask, index;
+
+    if (get_bits_left(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    switch (header) {
+    case HEADER_CORE:
+        // Number of subframes
+        s->nsubframes = get_bits(&s->gb, 4) + 1;
+
+        // Number of primary audio channels
+        s->nchannels = get_bits(&s->gb, 3) + 1;
+        if (s->nchannels != ff_dca_channels[s->audio_mode]) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid number of primary audio channels (%d) for audio channel arrangement (%d)\n", s->nchannels, s->audio_mode);
+            return AVERROR_INVALIDDATA;
+        }
+        av_assert1(s->nchannels <= DCA_CHANNELS - 2);
+
+        s->ch_mask = audio_mode_ch_mask[s->audio_mode];
+
+        // Add LFE channel if present
+        if (s->lfe_present)
+            s->ch_mask |= DCA_SPEAKER_MASK_LFE1;
+        break;
+
+    case HEADER_XCH:
+        s->nchannels = ff_dca_channels[s->audio_mode] + 1;
+        av_assert1(s->nchannels <= DCA_CHANNELS - 1);
+        s->ch_mask |= DCA_SPEAKER_MASK_Cs;
+        break;
+
+    case HEADER_XXCH:
+        // Channel set header length
+        header_size = get_bits(&s->gb, 7) + 1;
+
+        // Check CRC
+        if (s->xxch_crc_present
+            && (s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL))
+            && ff_dca_check_crc(&s->gb, header_pos, header_pos + header_size * 8)) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH channel set header checksum\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        // Number of channels in a channel set
+        nchannels = get_bits(&s->gb, 3) + 1;
+        if (nchannels > DCA_XXCH_CHANNELS_MAX) {
+            avpriv_request_sample(s->avctx, "%d XXCH channels", nchannels);
+            return AVERROR_PATCHWELCOME;
+        }
+        s->nchannels = ff_dca_channels[s->audio_mode] + nchannels;
+        av_assert1(s->nchannels <= DCA_CHANNELS);
+
+        // Loudspeaker layout mask
+        mask = get_bits_long(&s->gb, s->xxch_mask_nbits - DCA_SPEAKER_Cs);
+        s->xxch_spkr_mask = mask << DCA_SPEAKER_Cs;
+
+        if (av_popcount(s->xxch_spkr_mask) != nchannels) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH speaker layout mask (%#x)\n", s->xxch_spkr_mask);
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (s->xxch_core_mask & s->xxch_spkr_mask) {
+            av_log(s->avctx, AV_LOG_ERROR, "XXCH speaker layout mask (%#x) overlaps with core (%#x)\n", s->xxch_spkr_mask, s->xxch_core_mask);
+            return AVERROR_INVALIDDATA;
+        }
+
+        // Combine core and XXCH masks together
+        s->ch_mask = s->xxch_core_mask | s->xxch_spkr_mask;
+
+        // Downmix coefficients present in stream
+        if (get_bits1(&s->gb)) {
+            int *coeff_ptr = s->xxch_dmix_coeff;
+
+            // Downmix already performed by encoder
+            s->xxch_dmix_embedded = get_bits1(&s->gb);
+
+            // Downmix scale factor
+            index = get_bits(&s->gb, 6) * 4 - FF_DCA_DMIXTABLE_OFFSET - 3;
+            if (index >= FF_DCA_INV_DMIXTABLE_SIZE) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH downmix scale index (%d)\n", index);
+                return AVERROR_INVALIDDATA;
+            }
+            s->xxch_dmix_scale_inv = ff_dca_inv_dmixtable[index];
+
+            // Downmix channel mapping mask
+            for (ch = 0; ch < nchannels; ch++) {
+                mask = get_bits_long(&s->gb, s->xxch_mask_nbits);
+                if ((mask & s->xxch_core_mask) != mask) {
+                    av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH downmix channel mapping mask (%#x)\n", mask);
+                    return AVERROR_INVALIDDATA;
+                }
+                s->xxch_dmix_mask[ch] = mask;
+            }
+
+            // Downmix coefficients
+            for (ch = 0; ch < nchannels; ch++) {
+                for (n = 0; n < s->xxch_mask_nbits; n++) {
+                    if (s->xxch_dmix_mask[ch] & (1U << n)) {
+                        int code = get_bits(&s->gb, 7);
+                        int sign = (code >> 6) - 1;
+                        if (code &= 63) {
+                            index = code * 4 - 3;
+                            if (index >= FF_DCA_DMIXTABLE_SIZE) {
+                                av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH downmix coefficient index (%d)\n", index);
+                                return AVERROR_INVALIDDATA;
+                            }
+                            *coeff_ptr++ = (ff_dca_dmixtable[index] ^ sign) - sign;
+                        } else {
+                            *coeff_ptr++ = 0;
+                        }
+                    }
+                }
+            }
+        } else {
+            s->xxch_dmix_embedded = 0;
+        }
+
+        break;
+    }
+
+    // Subband activity count
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        s->nsubbands[ch] = get_bits(&s->gb, 5) + 2;
+        if (s->nsubbands[ch] > DCA_SUBBANDS) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid subband activity count\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    // High frequency VQ start subband
+    for (ch = xch_base; ch < s->nchannels; ch++)
+        s->subband_vq_start[ch] = get_bits(&s->gb, 5) + 1;
+
+    // Joint intensity coding index
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        if ((n = get_bits(&s->gb, 3)) && header == HEADER_XXCH)
+            n += xch_base - 1;
+        if (n > s->nchannels) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid joint intensity coding index\n");
+            return AVERROR_INVALIDDATA;
+        }
+        s->joint_intensity_index[ch] = n;
+    }
+
+    // Transient mode code book
+    for (ch = xch_base; ch < s->nchannels; ch++)
+        s->transition_mode_sel[ch] = get_bits(&s->gb, 2);
+
+    // Scale factor code book
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        s->scale_factor_sel[ch] = get_bits(&s->gb, 3);
+        if (s->scale_factor_sel[ch] == 7) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid scale factor code book\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    // Bit allocation quantizer select
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        s->bit_allocation_sel[ch] = get_bits(&s->gb, 3);
+        if (s->bit_allocation_sel[ch] == 7) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid bit allocation quantizer select\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    // Quantization index codebook select
+    for (n = 0; n < DCA_CODE_BOOKS; n++)
+        for (ch = xch_base; ch < s->nchannels; ch++)
+            s->quant_index_sel[ch][n] = get_bits(&s->gb, quant_index_sel_nbits[n]);
+
+    // Scale factor adjustment index
+    for (n = 0; n < DCA_CODE_BOOKS; n++)
+        for (ch = xch_base; ch < s->nchannels; ch++)
+            if (s->quant_index_sel[ch][n] < quant_index_group_size[n])
+                s->scale_factor_adj[ch][n] = ff_dca_scale_factor_adj[get_bits(&s->gb, 2)];
+
+    if (header == HEADER_XXCH) {
+        // Reserved
+        // Byte align
+        // CRC16 of channel set header
+        if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) {
+            av_log(s->avctx, AV_LOG_ERROR, "Read past end of XXCH channel set header\n");
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        // Audio header CRC check word
+        if (s->crc_present)
+            skip_bits(&s->gb, 16);
+    }
+
+    return 0;
+}
+
+static inline int parse_scale(DCACoreDecoder *s, int *scale_index, int sel)
+{
+    const uint32_t *scale_table;
+    unsigned int scale_size;
+
+    // Select the root square table
+    if (sel > 5) {
+        scale_table = ff_dca_scale_factor_quant7;
+        scale_size = FF_ARRAY_ELEMS(ff_dca_scale_factor_quant7);
+    } else {
+        scale_table = ff_dca_scale_factor_quant6;
+        scale_size = FF_ARRAY_ELEMS(ff_dca_scale_factor_quant6);
+    }
+
+    // If Huffman code was used, the difference of scales was encoded
+    if (sel < 5)
+        *scale_index += dca_get_vlc(&s->gb, &vlc_scale_factor, sel);
+    else
+        *scale_index = get_bits(&s->gb, sel + 1);
+
+    // Look up scale factor from the root square table
+    if ((unsigned int)*scale_index >= scale_size) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid scale factor index\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return scale_table[*scale_index];
+}
+
+static inline int parse_joint_scale(DCACoreDecoder *s, int sel)
+{
+    int scale_index;
+
+    // Absolute value was encoded even when Huffman code was used
+    if (sel < 5)
+        scale_index = dca_get_vlc(&s->gb, &vlc_scale_factor, sel);
+    else
+        scale_index = get_bits(&s->gb, sel + 1);
+
+    // Bias by 64
+    scale_index += 64;
+
+    // Look up joint scale factor
+    if ((unsigned int)scale_index >= FF_ARRAY_ELEMS(ff_dca_joint_scale_factors)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid joint scale factor index\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return ff_dca_joint_scale_factors[scale_index];
+}
+
+// 5.4.1 - Primary audio coding side information
+static int parse_subframe_header(DCACoreDecoder *s, int sf,
+                                 enum HeaderType header, int xch_base)
+{
+    int ch, band, ret;
+
+    if (get_bits_left(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    if (header == HEADER_CORE) {
+        // Subsubframe count
+        s->nsubsubframes[sf] = get_bits(&s->gb, 2) + 1;
+
+        // Partial subsubframe sample count
+        skip_bits(&s->gb, 3);
+    }
+
+    // Prediction mode
+    for (ch = xch_base; ch < s->nchannels; ch++)
+        for (band = 0; band < s->nsubbands[ch]; band++)
+            s->prediction_mode[ch][band] = get_bits1(&s->gb);
+
+    // Prediction coefficients VQ address
+    for (ch = xch_base; ch < s->nchannels; ch++)
+        for (band = 0; band < s->nsubbands[ch]; band++)
+            if (s->prediction_mode[ch][band])
+                s->prediction_vq_index[ch][band] = get_bits(&s->gb, 12);
+
+    // Bit allocation index
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        int sel = s->bit_allocation_sel[ch];
+
+        for (band = 0; band < s->subband_vq_start[ch]; band++) {
+            int abits;
+
+            if (sel < 5)
+                abits = dca_get_vlc(&s->gb, &vlc_bit_allocation, sel);
+            else
+                abits = get_bits(&s->gb, sel - 1);
+
+            if (abits > DCA_ABITS_MAX) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid bit allocation index\n");
+                return AVERROR_INVALIDDATA;
+            }
+
+            s->bit_allocation[ch][band] = abits;
+        }
+    }
+
+    // Transition mode
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        // Clear transition mode for all subbands
+        memset(s->transition_mode[sf][ch], 0, sizeof(s->transition_mode[0][0]));
+
+        // Transient possible only if more than one subsubframe
+        if (s->nsubsubframes[sf] > 1) {
+            int sel = s->transition_mode_sel[ch];
+            for (band = 0; band < s->subband_vq_start[ch]; band++)
+                if (s->bit_allocation[ch][band])
+                    s->transition_mode[sf][ch][band] = dca_get_vlc(&s->gb, &vlc_transition_mode, sel);
+        }
+    }
+
+    // Scale factors
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        int sel = s->scale_factor_sel[ch];
+        int scale_index = 0;
+
+        // Extract scales for subbands up to VQ
+        for (band = 0; band < s->subband_vq_start[ch]; band++) {
+            if (s->bit_allocation[ch][band]) {
+                if ((ret = parse_scale(s, &scale_index, sel)) < 0)
+                    return ret;
+                s->scale_factors[ch][band][0] = ret;
+                if (s->transition_mode[sf][ch][band]) {
+                    if ((ret = parse_scale(s, &scale_index, sel)) < 0)
+                        return ret;
+                    s->scale_factors[ch][band][1] = ret;
+                }
+            } else {
+                s->scale_factors[ch][band][0] = 0;
+            }
+        }
+
+        // High frequency VQ subbands
+        for (band = s->subband_vq_start[ch]; band < s->nsubbands[ch]; band++) {
+            if ((ret = parse_scale(s, &scale_index, sel)) < 0)
+                return ret;
+            s->scale_factors[ch][band][0] = ret;
+        }
+    }
+
+    // Joint subband codebook select
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        if (s->joint_intensity_index[ch]) {
+            s->joint_scale_sel[ch] = get_bits(&s->gb, 3);
+            if (s->joint_scale_sel[ch] == 7) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid joint scale factor code book\n");
+                return AVERROR_INVALIDDATA;
+            }
+        }
+    }
+
+    // Scale factors for joint subband coding
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        int src_ch = s->joint_intensity_index[ch] - 1;
+        if (src_ch >= 0) {
+            int sel = s->joint_scale_sel[ch];
+            for (band = s->nsubbands[ch]; band < s->nsubbands[src_ch]; band++) {
+                if ((ret = parse_joint_scale(s, sel)) < 0)
+                    return ret;
+                s->joint_scale_factors[ch][band] = ret;
+            }
+        }
+    }
+
+    // Dynamic range coefficient
+    if (s->drc_present && header == HEADER_CORE)
+        skip_bits(&s->gb, 8);
+
+    // Side information CRC check word
+    if (s->crc_present)
+        skip_bits(&s->gb, 16);
+
+    return 0;
+}
+
+#ifndef decode_blockcodes
+static inline int decode_blockcodes(int code1, int code2, int levels, int32_t *audio)
+{
+    int offset = (levels - 1) / 2;
+    int n, div;
+
+    for (n = 0; n < DCA_SUBBAND_SAMPLES / 2; n++) {
+        div = FASTDIV(code1, levels);
+        audio[n] = code1 - div * levels - offset;
+        code1 = div;
+    }
+    for (; n < DCA_SUBBAND_SAMPLES; n++) {
+        div = FASTDIV(code2, levels);
+        audio[n] = code2 - div * levels - offset;
+        code2 = div;
+    }
+
+    return code1 | code2;
+}
+#endif
+
+static inline int parse_block_codes(DCACoreDecoder *s, int32_t *audio, int abits)
+{
+    // Extract block code indices from the bit stream
+    int code1 = get_bits(&s->gb, block_code_nbits[abits - 1]);
+    int code2 = get_bits(&s->gb, block_code_nbits[abits - 1]);
+    int levels = ff_dca_quant_levels[abits];
+
+    // Look up samples from the block code book
+    if (decode_blockcodes(code1, code2, levels, audio)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Failed to decode block code(s)\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static inline int parse_huffman_codes(DCACoreDecoder *s, int32_t *audio, int abits, int sel)
+{
+    int i;
+
+    // Extract Huffman codes from the bit stream
+    for (i = 0; i < DCA_SUBBAND_SAMPLES; i++)
+        audio[i] = dca_get_vlc(&s->gb, &vlc_quant_index[abits - 1], sel);
+
+    return 1;
+}
+
+static inline int extract_audio(DCACoreDecoder *s, int32_t *audio, int abits, int ch)
+{
+    av_assert1(abits >= 0 && abits <= DCA_ABITS_MAX);
+
+    if (abits == 0) {
+        // No bits allocated
+        memset(audio, 0, DCA_SUBBAND_SAMPLES * sizeof(*audio));
+        return 0;
+    }
+
+    if (abits <= DCA_CODE_BOOKS) {
+        int sel = s->quant_index_sel[ch][abits - 1];
+        if (sel < quant_index_group_size[abits - 1]) {
+            // Huffman codes
+            return parse_huffman_codes(s, audio, abits, sel);
+        }
+        if (abits <= 7) {
+            // Block codes
+            return parse_block_codes(s, audio, abits);
+        }
+    }
+
+    // No further encoding
+    get_array(&s->gb, audio, DCA_SUBBAND_SAMPLES, abits - 3);
+    return 0;
+}
+
+static inline void dequantize(int32_t *output, const int32_t *input,
+                              int32_t step_size, int32_t scale, int residual)
+{
+    // Account for quantizer step size
+    int64_t step_scale = (int64_t)step_size * scale;
+    int n, shift = 0;
+
+    // Limit scale factor resolution to 22 bits
+    if (step_scale > (1 << 23)) {
+        shift = av_log2(step_scale >> 23) + 1;
+        step_scale >>= shift;
+    }
+
+    // Scale the samples
+    if (residual) {
+        for (n = 0; n < DCA_SUBBAND_SAMPLES; n++)
+            output[n] += clip23(norm__(input[n] * step_scale, 22 - shift));
+    } else {
+        for (n = 0; n < DCA_SUBBAND_SAMPLES; n++)
+            output[n]  = clip23(norm__(input[n] * step_scale, 22 - shift));
+    }
+}
+
+static inline void inverse_adpcm(int32_t **subband_samples,
+                                 const int16_t *vq_index,
+                                 const int8_t *prediction_mode,
+                                 int sb_start, int sb_end,
+                                 int ofs, int len)
+{
+    int i, j, k;
+
+    for (i = sb_start; i < sb_end; i++) {
+        if (prediction_mode[i]) {
+            const int16_t *coeff = ff_dca_adpcm_vb[vq_index[i]];
+            int32_t *ptr = subband_samples[i] + ofs;
+            for (j = 0; j < len; j++) {
+                int64_t err = 0;
+                for (k = 0; k < DCA_ADPCM_COEFFS; k++)
+                    err += (int64_t)ptr[j - k - 1] * coeff[k];
+                ptr[j] = clip23(ptr[j] + clip23(norm13(err)));
+            }
+        }
+    }
+}
+
+// 5.5 - Primary audio data arrays
+static int parse_subframe_audio(DCACoreDecoder *s, int sf, enum HeaderType header,
+                                int xch_base, int *sub_pos, int *lfe_pos)
+{
+    int32_t audio[16], scale;
+    int n, ssf, ofs, ch, band;
+
+    // Check number of subband samples in this subframe
+    int nsamples = s->nsubsubframes[sf] * DCA_SUBBAND_SAMPLES;
+    if (*sub_pos + nsamples > s->npcmblocks) {
+        av_log(s->avctx, AV_LOG_ERROR, "Subband sample buffer overflow\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (get_bits_left(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    // VQ encoded subbands
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        int32_t vq_index[DCA_SUBBANDS];
+
+        for (band = s->subband_vq_start[ch]; band < s->nsubbands[ch]; band++)
+            // Extract the VQ address from the bit stream
+            vq_index[band] = get_bits(&s->gb, 10);
+
+        if (s->subband_vq_start[ch] < s->nsubbands[ch]) {
+            s->dcadsp->decode_hf(s->subband_samples[ch], vq_index,
+                                 ff_dca_high_freq_vq, s->scale_factors[ch],
+                                 s->subband_vq_start[ch], s->nsubbands[ch],
+                                 *sub_pos, nsamples);
+        }
+    }
+
+    // Low frequency effect data
+    if (s->lfe_present && header == HEADER_CORE) {
+        unsigned int index;
+
+        // Determine number of LFE samples in this subframe
+        int nlfesamples = 2 * s->lfe_present * s->nsubsubframes[sf];
+        av_assert1((unsigned int)nlfesamples <= FF_ARRAY_ELEMS(audio));
+
+        // Extract LFE samples from the bit stream
+        get_array(&s->gb, audio, nlfesamples, 8);
+
+        // Extract scale factor index from the bit stream
+        index = get_bits(&s->gb, 8);
+        if (index >= FF_ARRAY_ELEMS(ff_dca_scale_factor_quant7)) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid LFE scale factor index\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        // Look up the 7-bit root square quantization table
+        scale = ff_dca_scale_factor_quant7[index];
+
+        // Account for quantizer step size which is 0.035
+        scale = mul23(4697620 /* 0.035 * (1 << 27) */, scale);
+
+        // Scale and take the LFE samples
+        for (n = 0, ofs = *lfe_pos; n < nlfesamples; n++, ofs++)
+            s->lfe_samples[ofs] = clip23(audio[n] * scale >> 4);
+
+        // Advance LFE sample pointer for the next subframe
+        *lfe_pos = ofs;
+    }
+
+    // Audio data
+    for (ssf = 0, ofs = *sub_pos; ssf < s->nsubsubframes[sf]; ssf++) {
+        for (ch = xch_base; ch < s->nchannels; ch++) {
+            if (get_bits_left(&s->gb) < 0)
+                return AVERROR_INVALIDDATA;
+
+            // Not high frequency VQ subbands
+            for (band = 0; band < s->subband_vq_start[ch]; band++) {
+                int ret, trans_ssf, abits = s->bit_allocation[ch][band];
+                int32_t step_size;
+
+                // Extract bits from the bit stream
+                if ((ret = extract_audio(s, audio, abits, ch)) < 0)
+                    return ret;
+
+                // Select quantization step size table and look up
+                // quantization step size
+                if (s->bit_rate == 3)
+                    step_size = ff_dca_lossless_quant[abits];
+                else
+                    step_size = ff_dca_lossy_quant[abits];
+
+                // Identify transient location
+                trans_ssf = s->transition_mode[sf][ch][band];
+
+                // Determine proper scale factor
+                if (trans_ssf == 0 || ssf < trans_ssf)
+                    scale = s->scale_factors[ch][band][0];
+                else
+                    scale = s->scale_factors[ch][band][1];
+
+                // Adjust scale factor when SEL indicates Huffman code
+                if (ret > 0) {
+                    int64_t adj = s->scale_factor_adj[ch][abits - 1];
+                    scale = clip23(adj * scale >> 22);
+                }
+
+                dequantize(s->subband_samples[ch][band] + ofs,
+                           audio, step_size, scale, 0);
+            }
+        }
+
+        // DSYNC
+        if ((ssf == s->nsubsubframes[sf] - 1 || s->sync_ssf) && get_bits(&s->gb, 16) != 0xffff) {
+            av_log(s->avctx, AV_LOG_ERROR, "DSYNC check failed\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        ofs += DCA_SUBBAND_SAMPLES;
+    }
+
+    // Inverse ADPCM
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        inverse_adpcm(s->subband_samples[ch], s->prediction_vq_index[ch],
+                      s->prediction_mode[ch], 0, s->nsubbands[ch],
+                      *sub_pos, nsamples);
+    }
+
+    // Joint subband coding
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        int src_ch = s->joint_intensity_index[ch] - 1;
+        if (src_ch >= 0) {
+            s->dcadsp->decode_joint(s->subband_samples[ch], s->subband_samples[src_ch],
+                                    s->joint_scale_factors[ch], s->nsubbands[ch],
+                                    s->nsubbands[src_ch], *sub_pos, nsamples);
+        }
+    }
+
+    // Advance subband sample pointer for the next subframe
+    *sub_pos = ofs;
+    return 0;
+}
+
+static void erase_adpcm_history(DCACoreDecoder *s)
+{
+    int ch, band;
+
+    // Erase ADPCM history from previous frame if
+    // predictor history switch was disabled
+    for (ch = 0; ch < DCA_CHANNELS; ch++)
+        for (band = 0; band < DCA_SUBBANDS; band++)
+            AV_ZERO128(s->subband_samples[ch][band] - DCA_ADPCM_COEFFS);
+
+    emms_c();
+}
+
+static int alloc_sample_buffer(DCACoreDecoder *s)
+{
+    int nchsamples = DCA_ADPCM_COEFFS + s->npcmblocks;
+    int nframesamples = nchsamples * DCA_CHANNELS * DCA_SUBBANDS;
+    int nlfesamples = DCA_LFE_HISTORY + s->npcmblocks / 2;
+    unsigned int size = s->subband_size;
+    int ch, band;
+
+    // Reallocate subband sample buffer
+    av_fast_mallocz(&s->subband_buffer, &s->subband_size,
+                    (nframesamples + nlfesamples) * sizeof(int32_t));
+    if (!s->subband_buffer)
+        return AVERROR(ENOMEM);
+
+    if (size != s->subband_size) {
+        for (ch = 0; ch < DCA_CHANNELS; ch++)
+            for (band = 0; band < DCA_SUBBANDS; band++)
+                s->subband_samples[ch][band] = s->subband_buffer +
+                    (ch * DCA_SUBBANDS + band) * nchsamples + DCA_ADPCM_COEFFS;
+        s->lfe_samples = s->subband_buffer + nframesamples;
+    }
+
+    if (!s->predictor_history)
+        erase_adpcm_history(s);
+
+    return 0;
+}
+
+static int parse_frame_data(DCACoreDecoder *s, enum HeaderType header, int xch_base)
+{
+    int sf, ch, ret, band, sub_pos, lfe_pos;
+
+    if ((ret = parse_coding_header(s, header, xch_base)) < 0)
+        return ret;
+
+    for (sf = 0, sub_pos = 0, lfe_pos = DCA_LFE_HISTORY; sf < s->nsubframes; sf++) {
+        if ((ret = parse_subframe_header(s, sf, header, xch_base)) < 0)
+            return ret;
+        if ((ret = parse_subframe_audio(s, sf, header, xch_base, &sub_pos, &lfe_pos)) < 0)
+            return ret;
+    }
+
+    for (ch = xch_base; ch < s->nchannels; ch++) {
+        // Determine number of active subbands for this channel
+        int nsubbands = s->nsubbands[ch];
+        if (s->joint_intensity_index[ch])
+            nsubbands = FFMAX(nsubbands, s->nsubbands[s->joint_intensity_index[ch] - 1]);
+
+        // Update history for ADPCM
+        for (band = 0; band < nsubbands; band++) {
+            int32_t *samples = s->subband_samples[ch][band] - DCA_ADPCM_COEFFS;
+            AV_COPY128(samples, samples + s->npcmblocks);
+        }
+
+        // Clear inactive subbands
+        for (; band < DCA_SUBBANDS; band++) {
+            int32_t *samples = s->subband_samples[ch][band] - DCA_ADPCM_COEFFS;
+            memset(samples, 0, (DCA_ADPCM_COEFFS + s->npcmblocks) * sizeof(int32_t));
+        }
+    }
+
+    emms_c();
+
+    return 0;
+}
+
+static int parse_xch_frame(DCACoreDecoder *s)
+{
+    int ret;
+
+    if (s->ch_mask & DCA_SPEAKER_MASK_Cs) {
+        av_log(s->avctx, AV_LOG_ERROR, "XCH with Cs speaker already present\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if ((ret = parse_frame_data(s, HEADER_XCH, s->nchannels)) < 0)
+        return ret;
+
+    // Seek to the end of core frame, don't trust XCH frame size
+    if (ff_dca_seek_bits(&s->gb, s->frame_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of XCH frame\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static int parse_xxch_frame(DCACoreDecoder *s)
+{
+    int xxch_nchsets, xxch_frame_size;
+    int ret, mask, header_size, header_pos = get_bits_count(&s->gb);
+
+    // XXCH sync word
+    if (get_bits_long(&s->gb, 32) != DCA_SYNCWORD_XXCH) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH sync word\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // XXCH frame header length
+    header_size = get_bits(&s->gb, 6) + 1;
+
+    // Check XXCH frame header CRC
+    if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL))
+        && ff_dca_check_crc(&s->gb, header_pos + 32, header_pos + header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH frame header checksum\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // CRC presence flag for channel set header
+    s->xxch_crc_present = get_bits1(&s->gb);
+
+    // Number of bits for loudspeaker mask
+    s->xxch_mask_nbits = get_bits(&s->gb, 5) + 1;
+    if (s->xxch_mask_nbits <= DCA_SPEAKER_Cs) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid number of bits for XXCH speaker mask (%d)\n", s->xxch_mask_nbits);
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Number of channel sets
+    xxch_nchsets = get_bits(&s->gb, 2) + 1;
+    if (xxch_nchsets > 1) {
+        avpriv_request_sample(s->avctx, "%d XXCH channel sets", xxch_nchsets);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    // Channel set 0 data byte size
+    xxch_frame_size = get_bits(&s->gb, 14) + 1;
+
+    // Core loudspeaker activity mask
+    s->xxch_core_mask = get_bits_long(&s->gb, s->xxch_mask_nbits);
+
+    // Validate the core mask
+    mask = s->ch_mask;
+
+    if ((mask & DCA_SPEAKER_MASK_Ls) && (s->xxch_core_mask & DCA_SPEAKER_MASK_Lss))
+        mask = (mask & ~DCA_SPEAKER_MASK_Ls) | DCA_SPEAKER_MASK_Lss;
+
+    if ((mask & DCA_SPEAKER_MASK_Rs) && (s->xxch_core_mask & DCA_SPEAKER_MASK_Rss))
+        mask = (mask & ~DCA_SPEAKER_MASK_Rs) | DCA_SPEAKER_MASK_Rss;
+
+    if (mask != s->xxch_core_mask) {
+        av_log(s->avctx, AV_LOG_ERROR, "XXCH core speaker activity mask (%#x) disagrees with core (%#x)\n", s->xxch_core_mask, mask);
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Reserved
+    // Byte align
+    // CRC16 of XXCH frame header
+    if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of XXCH frame header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Parse XXCH channel set 0
+    if ((ret = parse_frame_data(s, HEADER_XXCH, s->nchannels)) < 0)
+        return ret;
+
+    if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8 + xxch_frame_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of XXCH channel set\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static int parse_xbr_subframe(DCACoreDecoder *s, int xbr_base_ch, int xbr_nchannels,
+                              int *xbr_nsubbands, int xbr_transition_mode, int sf, int *sub_pos)
+{
+    int     xbr_nabits[DCA_CHANNELS];
+    int     xbr_bit_allocation[DCA_CHANNELS][DCA_SUBBANDS];
+    int     xbr_scale_nbits[DCA_CHANNELS];
+    int32_t xbr_scale_factors[DCA_CHANNELS][DCA_SUBBANDS][2];
+    int     ssf, ch, band, ofs;
+
+    // Check number of subband samples in this subframe
+    if (*sub_pos + s->nsubsubframes[sf] * DCA_SUBBAND_SAMPLES > s->npcmblocks) {
+        av_log(s->avctx, AV_LOG_ERROR, "Subband sample buffer overflow\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (get_bits_left(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    // Number of bits for XBR bit allocation index
+    for (ch = xbr_base_ch; ch < xbr_nchannels; ch++)
+        xbr_nabits[ch] = get_bits(&s->gb, 2) + 2;
+
+    // XBR bit allocation index
+    for (ch = xbr_base_ch; ch < xbr_nchannels; ch++) {
+        for (band = 0; band < xbr_nsubbands[ch]; band++) {
+            xbr_bit_allocation[ch][band] = get_bits(&s->gb, xbr_nabits[ch]);
+            if (xbr_bit_allocation[ch][band] > DCA_ABITS_MAX) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid XBR bit allocation index\n");
+                return AVERROR_INVALIDDATA;
+            }
+        }
+    }
+
+    // Number of bits for scale indices
+    for (ch = xbr_base_ch; ch < xbr_nchannels; ch++) {
+        xbr_scale_nbits[ch] = get_bits(&s->gb, 3);
+        if (!xbr_scale_nbits[ch]) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid number of bits for XBR scale factor index\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    // XBR scale factors
+    for (ch = xbr_base_ch; ch < xbr_nchannels; ch++) {
+        const uint32_t *scale_table;
+        int scale_size;
+
+        // Select the root square table
+        if (s->scale_factor_sel[ch] > 5) {
+            scale_table = ff_dca_scale_factor_quant7;
+            scale_size = FF_ARRAY_ELEMS(ff_dca_scale_factor_quant7);
+        } else {
+            scale_table = ff_dca_scale_factor_quant6;
+            scale_size = FF_ARRAY_ELEMS(ff_dca_scale_factor_quant6);
+        }
+
+        // Parse scale factor indices and look up scale factors from the root
+        // square table
+        for (band = 0; band < xbr_nsubbands[ch]; band++) {
+            if (xbr_bit_allocation[ch][band]) {
+                int scale_index = get_bits(&s->gb, xbr_scale_nbits[ch]);
+                if (scale_index >= scale_size) {
+                    av_log(s->avctx, AV_LOG_ERROR, "Invalid XBR scale factor index\n");
+                    return AVERROR_INVALIDDATA;
+                }
+                xbr_scale_factors[ch][band][0] = scale_table[scale_index];
+                if (xbr_transition_mode && s->transition_mode[sf][ch][band]) {
+                    scale_index = get_bits(&s->gb, xbr_scale_nbits[ch]);
+                    if (scale_index >= scale_size) {
+                        av_log(s->avctx, AV_LOG_ERROR, "Invalid XBR scale factor index\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                    xbr_scale_factors[ch][band][1] = scale_table[scale_index];
+                }
+            }
+        }
+    }
+
+    // Audio data
+    for (ssf = 0, ofs = *sub_pos; ssf < s->nsubsubframes[sf]; ssf++) {
+        for (ch = xbr_base_ch; ch < xbr_nchannels; ch++) {
+            if (get_bits_left(&s->gb) < 0)
+                return AVERROR_INVALIDDATA;
+
+            for (band = 0; band < xbr_nsubbands[ch]; band++) {
+                int ret, trans_ssf, abits = xbr_bit_allocation[ch][band];
+                int32_t audio[DCA_SUBBAND_SAMPLES], step_size, scale;
+
+                // Extract bits from the bit stream
+                if (abits > 7) {
+                    // No further encoding
+                    get_array(&s->gb, audio, DCA_SUBBAND_SAMPLES, abits - 3);
+                } else if (abits > 0) {
+                    // Block codes
+                    if ((ret = parse_block_codes(s, audio, abits)) < 0)
+                        return ret;
+                } else {
+                    // No bits allocated
+                    continue;
+                }
+
+                // Look up quantization step size
+                step_size = ff_dca_lossless_quant[abits];
+
+                // Identify transient location
+                if (xbr_transition_mode)
+                    trans_ssf = s->transition_mode[sf][ch][band];
+                else
+                    trans_ssf = 0;
+
+                // Determine proper scale factor
+                if (trans_ssf == 0 || ssf < trans_ssf)
+                    scale = xbr_scale_factors[ch][band][0];
+                else
+                    scale = xbr_scale_factors[ch][band][1];
+
+                dequantize(s->subband_samples[ch][band] + ofs,
+                           audio, step_size, scale, 1);
+            }
+        }
+
+        // DSYNC
+        if ((ssf == s->nsubsubframes[sf] - 1 || s->sync_ssf) && get_bits(&s->gb, 16) != 0xffff) {
+            av_log(s->avctx, AV_LOG_ERROR, "XBR-DSYNC check failed\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        ofs += DCA_SUBBAND_SAMPLES;
+    }
+
+    // Advance subband sample pointer for the next subframe
+    *sub_pos = ofs;
+    return 0;
+}
+
+static int parse_xbr_frame(DCACoreDecoder *s)
+{
+    int     xbr_frame_size[DCA_EXSS_CHSETS_MAX];
+    int     xbr_nchannels[DCA_EXSS_CHSETS_MAX];
+    int     xbr_nsubbands[DCA_EXSS_CHSETS_MAX * DCA_EXSS_CHANNELS_MAX];
+    int     xbr_nchsets, xbr_transition_mode, xbr_band_nbits, xbr_base_ch;
+    int     i, ch1, ch2, ret, header_size, header_pos = get_bits_count(&s->gb);
+
+    // XBR sync word
+    if (get_bits_long(&s->gb, 32) != DCA_SYNCWORD_XBR) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid XBR sync word\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // XBR frame header length
+    header_size = get_bits(&s->gb, 6) + 1;
+
+    // Check XBR frame header CRC
+    if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL))
+        && ff_dca_check_crc(&s->gb, header_pos + 32, header_pos + header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid XBR frame header checksum\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Number of channel sets
+    xbr_nchsets = get_bits(&s->gb, 2) + 1;
+
+    // Channel set data byte size
+    for (i = 0; i < xbr_nchsets; i++)
+        xbr_frame_size[i] = get_bits(&s->gb, 14) + 1;
+
+    // Transition mode flag
+    xbr_transition_mode = get_bits1(&s->gb);
+
+    // Channel set headers
+    for (i = 0, ch2 = 0; i < xbr_nchsets; i++) {
+        xbr_nchannels[i] = get_bits(&s->gb, 3) + 1;
+        xbr_band_nbits = get_bits(&s->gb, 2) + 5;
+        for (ch1 = 0; ch1 < xbr_nchannels[i]; ch1++, ch2++) {
+            xbr_nsubbands[ch2] = get_bits(&s->gb, xbr_band_nbits) + 1;
+            if (xbr_nsubbands[ch2] > DCA_SUBBANDS) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid number of active XBR subbands (%d)\n", xbr_nsubbands[ch2]);
+                return AVERROR_INVALIDDATA;
+            }
+        }
+    }
+
+    // Reserved
+    // Byte align
+    // CRC16 of XBR frame header
+    if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of XBR frame header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Channel set data
+    for (i = 0, xbr_base_ch = 0; i < xbr_nchsets; i++) {
+        header_pos = get_bits_count(&s->gb);
+
+        if (xbr_base_ch + xbr_nchannels[i] <= s->nchannels) {
+            int sf, sub_pos;
+
+            for (sf = 0, sub_pos = 0; sf < s->nsubframes; sf++) {
+                if ((ret = parse_xbr_subframe(s, xbr_base_ch,
+                                              xbr_base_ch + xbr_nchannels[i],
+                                              xbr_nsubbands, xbr_transition_mode,
+                                              sf, &sub_pos)) < 0)
+                    return ret;
+            }
+        }
+
+        xbr_base_ch += xbr_nchannels[i];
+
+        if (ff_dca_seek_bits(&s->gb, header_pos + xbr_frame_size[i] * 8)) {
+            av_log(s->avctx, AV_LOG_ERROR, "Read past end of XBR channel set\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    return 0;
+}
+
+// Modified ISO/IEC 9899 linear congruential generator
+// Returns pseudorandom integer in range [-2^30, 2^30 - 1]
+static int rand_x96(DCACoreDecoder *s)
+{
+    s->x96_rand = 1103515245U * s->x96_rand + 12345U;
+    return (s->x96_rand & 0x7fffffff) - 0x40000000;
+}
+
+static int parse_x96_subframe_audio(DCACoreDecoder *s, int sf, int xch_base, int *sub_pos)
+{
+    int n, ssf, ch, band, ofs;
+
+    // Check number of subband samples in this subframe
+    int nsamples = s->nsubsubframes[sf] * DCA_SUBBAND_SAMPLES;
+    if (*sub_pos + nsamples > s->npcmblocks) {
+        av_log(s->avctx, AV_LOG_ERROR, "Subband sample buffer overflow\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (get_bits_left(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    // VQ encoded or unallocated subbands
+    for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+        for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++) {
+            // Get the sample pointer and scale factor
+            int32_t *samples = s->x96_subband_samples[ch][band] + *sub_pos;
+            int32_t scale    = s->scale_factors[ch][band >> 1][band & 1];
+
+            switch (s->bit_allocation[ch][band]) {
+            case 0: // No bits allocated for subband
+                if (scale <= 1)
+                    memset(samples, 0, nsamples * sizeof(int32_t));
+                else for (n = 0; n < nsamples; n++)
+                    // Generate scaled random samples
+                    samples[n] = mul31(rand_x96(s), scale);
+                break;
+
+            case 1: // VQ encoded subband
+                for (ssf = 0; ssf < (s->nsubsubframes[sf] + 1) / 2; ssf++) {
+                    // Extract the VQ address from the bit stream and look up
+                    // the VQ code book for up to 16 subband samples
+                    const int8_t *vq_samples = ff_dca_high_freq_vq[get_bits(&s->gb, 10)];
+                    // Scale and take the samples
+                    for (n = 0; n < FFMIN(nsamples - ssf * 16, 16); n++)
+                        *samples++ = clip23(vq_samples[n] * scale + (1 << 3) >> 4);
+                }
+                break;
+            }
+        }
+    }
+
+    // Audio data
+    for (ssf = 0, ofs = *sub_pos; ssf < s->nsubsubframes[sf]; ssf++) {
+        for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+            if (get_bits_left(&s->gb) < 0)
+                return AVERROR_INVALIDDATA;
+
+            for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++) {
+                int ret, abits = s->bit_allocation[ch][band] - 1;
+                int32_t audio[DCA_SUBBAND_SAMPLES], step_size, scale;
+
+                // Not VQ encoded or unallocated subbands
+                if (abits < 1)
+                    continue;
+
+                // Extract bits from the bit stream
+                if ((ret = extract_audio(s, audio, abits, ch)) < 0)
+                    return ret;
+
+                // Select quantization step size table and look up quantization
+                // step size
+                if (s->bit_rate == 3)
+                    step_size = ff_dca_lossless_quant[abits];
+                else
+                    step_size = ff_dca_lossy_quant[abits];
+
+                // Get the scale factor
+                scale = s->scale_factors[ch][band >> 1][band & 1];
+
+                dequantize(s->x96_subband_samples[ch][band] + ofs,
+                           audio, step_size, scale, 0);
+            }
+        }
+
+        // DSYNC
+        if ((ssf == s->nsubsubframes[sf] - 1 || s->sync_ssf) && get_bits(&s->gb, 16) != 0xffff) {
+            av_log(s->avctx, AV_LOG_ERROR, "X96-DSYNC check failed\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        ofs += DCA_SUBBAND_SAMPLES;
+    }
+
+    // Inverse ADPCM
+    for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+        inverse_adpcm(s->x96_subband_samples[ch], s->prediction_vq_index[ch],
+                      s->prediction_mode[ch], s->x96_subband_start, s->nsubbands[ch],
+                      *sub_pos, nsamples);
+    }
+
+    // Joint subband coding
+    for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+        int src_ch = s->joint_intensity_index[ch] - 1;
+        if (src_ch >= 0) {
+            s->dcadsp->decode_joint(s->x96_subband_samples[ch], s->x96_subband_samples[src_ch],
+                                    s->joint_scale_factors[ch], s->nsubbands[ch],
+                                    s->nsubbands[src_ch], *sub_pos, nsamples);
+        }
+    }
+
+    // Advance subband sample pointer for the next subframe
+    *sub_pos = ofs;
+    return 0;
+}
+
+static void erase_x96_adpcm_history(DCACoreDecoder *s)
+{
+    int ch, band;
+
+    // Erase ADPCM history from previous frame if
+    // predictor history switch was disabled
+    for (ch = 0; ch < DCA_CHANNELS; ch++)
+        for (band = 0; band < DCA_SUBBANDS_X96; band++)
+            AV_ZERO128(s->x96_subband_samples[ch][band] - DCA_ADPCM_COEFFS);
+
+    emms_c();
+}
+
+static int alloc_x96_sample_buffer(DCACoreDecoder *s)
+{
+    int nchsamples = DCA_ADPCM_COEFFS + s->npcmblocks;
+    int nframesamples = nchsamples * DCA_CHANNELS * DCA_SUBBANDS_X96;
+    unsigned int size = s->x96_subband_size;
+    int ch, band;
+
+    // Reallocate subband sample buffer
+    av_fast_mallocz(&s->x96_subband_buffer, &s->x96_subband_size,
+                    nframesamples * sizeof(int32_t));
+    if (!s->x96_subband_buffer)
+        return AVERROR(ENOMEM);
+
+    if (size != s->x96_subband_size) {
+        for (ch = 0; ch < DCA_CHANNELS; ch++)
+            for (band = 0; band < DCA_SUBBANDS_X96; band++)
+                s->x96_subband_samples[ch][band] = s->x96_subband_buffer +
+                    (ch * DCA_SUBBANDS_X96 + band) * nchsamples + DCA_ADPCM_COEFFS;
+    }
+
+    if (!s->predictor_history)
+        erase_x96_adpcm_history(s);
+
+    return 0;
+}
+
+static int parse_x96_subframe_header(DCACoreDecoder *s, int xch_base)
+{
+    int ch, band, ret;
+
+    if (get_bits_left(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    // Prediction mode
+    for (ch = xch_base; ch < s->x96_nchannels; ch++)
+        for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++)
+            s->prediction_mode[ch][band] = get_bits1(&s->gb);
+
+    // Prediction coefficients VQ address
+    for (ch = xch_base; ch < s->x96_nchannels; ch++)
+        for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++)
+            if (s->prediction_mode[ch][band])
+                s->prediction_vq_index[ch][band] = get_bits(&s->gb, 12);
+
+    // Bit allocation index
+    for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+        int sel = s->bit_allocation_sel[ch];
+        int abits = 0;
+
+        for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++) {
+            // If Huffman code was used, the difference of abits was encoded
+            if (sel < 7)
+                abits += dca_get_vlc(&s->gb, &vlc_quant_index[5 + 2 * s->x96_high_res], sel);
+            else
+                abits = get_bits(&s->gb, 3 + s->x96_high_res);
+
+            if (abits < 0 || abits > 7 + 8 * s->x96_high_res) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 bit allocation index\n");
+                return AVERROR_INVALIDDATA;
+            }
+
+            s->bit_allocation[ch][band] = abits;
+        }
+    }
+
+    // Scale factors
+    for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+        int sel = s->scale_factor_sel[ch];
+        int scale_index = 0;
+
+        // Extract scales for subbands which are transmitted even for
+        // unallocated subbands
+        for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++) {
+            if ((ret = parse_scale(s, &scale_index, sel)) < 0)
+                return ret;
+            s->scale_factors[ch][band >> 1][band & 1] = ret;
+        }
+    }
+
+    // Joint subband codebook select
+    for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+        if (s->joint_intensity_index[ch]) {
+            s->joint_scale_sel[ch] = get_bits(&s->gb, 3);
+            if (s->joint_scale_sel[ch] == 7) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 joint scale factor code book\n");
+                return AVERROR_INVALIDDATA;
+            }
+        }
+    }
+
+    // Scale factors for joint subband coding
+    for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+        int src_ch = s->joint_intensity_index[ch] - 1;
+        if (src_ch >= 0) {
+            int sel = s->joint_scale_sel[ch];
+            for (band = s->nsubbands[ch]; band < s->nsubbands[src_ch]; band++) {
+                if ((ret = parse_joint_scale(s, sel)) < 0)
+                    return ret;
+                s->joint_scale_factors[ch][band] = ret;
+            }
+        }
+    }
+
+    // Side information CRC check word
+    if (s->crc_present)
+        skip_bits(&s->gb, 16);
+
+    return 0;
+}
+
+static int parse_x96_coding_header(DCACoreDecoder *s, int exss, int xch_base)
+{
+    int n, ch, header_size = 0, header_pos = get_bits_count(&s->gb);
+
+    if (get_bits_left(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    if (exss) {
+        // Channel set header length
+        header_size = get_bits(&s->gb, 7) + 1;
+
+        // Check CRC
+        if (s->x96_crc_present
+            && (s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL))
+            && ff_dca_check_crc(&s->gb, header_pos, header_pos + header_size * 8)) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 channel set header checksum\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    // High resolution flag
+    s->x96_high_res = get_bits1(&s->gb);
+
+    // First encoded subband
+    if (s->x96_rev_no < 8) {
+        s->x96_subband_start = get_bits(&s->gb, 5);
+        if (s->x96_subband_start > 27) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 subband start index (%d)\n", s->x96_subband_start);
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        s->x96_subband_start = DCA_SUBBANDS;
+    }
+
+    // Subband activity count
+    for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+        s->nsubbands[ch] = get_bits(&s->gb, 6) + 1;
+        if (s->nsubbands[ch] < DCA_SUBBANDS) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 subband activity count (%d)\n", s->nsubbands[ch]);
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    // Joint intensity coding index
+    for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+        if ((n = get_bits(&s->gb, 3)) && xch_base)
+            n += xch_base - 1;
+        if (n > s->x96_nchannels) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 joint intensity coding index\n");
+            return AVERROR_INVALIDDATA;
+        }
+        s->joint_intensity_index[ch] = n;
+    }
+
+    // Scale factor code book
+    for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+        s->scale_factor_sel[ch] = get_bits(&s->gb, 3);
+        if (s->scale_factor_sel[ch] >= 6) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 scale factor code book\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    // Bit allocation quantizer select
+    for (ch = xch_base; ch < s->x96_nchannels; ch++)
+        s->bit_allocation_sel[ch] = get_bits(&s->gb, 3);
+
+    // Quantization index codebook select
+    for (n = 0; n < 6 + 4 * s->x96_high_res; n++)
+        for (ch = xch_base; ch < s->x96_nchannels; ch++)
+            s->quant_index_sel[ch][n] = get_bits(&s->gb, quant_index_sel_nbits[n]);
+
+    if (exss) {
+        // Reserved
+        // Byte align
+        // CRC16 of channel set header
+        if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) {
+            av_log(s->avctx, AV_LOG_ERROR, "Read past end of X96 channel set header\n");
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        if (s->crc_present)
+            skip_bits(&s->gb, 16);
+    }
+
+    return 0;
+}
+
+static int parse_x96_frame_data(DCACoreDecoder *s, int exss, int xch_base)
+{
+    int sf, ch, ret, band, sub_pos;
+
+    if ((ret = parse_x96_coding_header(s, exss, xch_base)) < 0)
+        return ret;
+
+    for (sf = 0, sub_pos = 0; sf < s->nsubframes; sf++) {
+        if ((ret = parse_x96_subframe_header(s, xch_base)) < 0)
+            return ret;
+        if ((ret = parse_x96_subframe_audio(s, sf, xch_base, &sub_pos)) < 0)
+            return ret;
+    }
+
+    for (ch = xch_base; ch < s->x96_nchannels; ch++) {
+        // Determine number of active subbands for this channel
+        int nsubbands = s->nsubbands[ch];
+        if (s->joint_intensity_index[ch])
+            nsubbands = FFMAX(nsubbands, s->nsubbands[s->joint_intensity_index[ch] - 1]);
+
+        // Update history for ADPCM and clear inactive subbands
+        for (band = 0; band < DCA_SUBBANDS_X96; band++) {
+            int32_t *samples = s->x96_subband_samples[ch][band] - DCA_ADPCM_COEFFS;
+            if (band >= s->x96_subband_start && band < nsubbands)
+                AV_COPY128(samples, samples + s->npcmblocks);
+            else
+                memset(samples, 0, (DCA_ADPCM_COEFFS + s->npcmblocks) * sizeof(int32_t));
+        }
+    }
+
+    emms_c();
+
+    return 0;
+}
+
+static int parse_x96_frame(DCACoreDecoder *s)
+{
+    int ret;
+
+    // Revision number
+    s->x96_rev_no = get_bits(&s->gb, 4);
+    if (s->x96_rev_no < 1 || s->x96_rev_no > 8) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 revision (%d)\n", s->x96_rev_no);
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->x96_crc_present = 0;
+    s->x96_nchannels = s->nchannels;
+
+    if ((ret = alloc_x96_sample_buffer(s)) < 0)
+        return ret;
+
+    if ((ret = parse_x96_frame_data(s, 0, 0)) < 0)
+        return ret;
+
+    // Seek to the end of core frame
+    if (ff_dca_seek_bits(&s->gb, s->frame_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of X96 frame\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static int parse_x96_frame_exss(DCACoreDecoder *s)
+{
+    int     x96_frame_size[DCA_EXSS_CHSETS_MAX];
+    int     x96_nchannels[DCA_EXSS_CHSETS_MAX];
+    int     x96_nchsets, x96_base_ch;
+    int     i, ret, header_size, header_pos = get_bits_count(&s->gb);
+
+    // X96 sync word
+    if (get_bits_long(&s->gb, 32) != DCA_SYNCWORD_X96) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 sync word\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // X96 frame header length
+    header_size = get_bits(&s->gb, 6) + 1;
+
+    // Check X96 frame header CRC
+    if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL))
+        && ff_dca_check_crc(&s->gb, header_pos + 32, header_pos + header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 frame header checksum\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Revision number
+    s->x96_rev_no = get_bits(&s->gb, 4);
+    if (s->x96_rev_no < 1 || s->x96_rev_no > 8) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 revision (%d)\n", s->x96_rev_no);
+        return AVERROR_INVALIDDATA;
+    }
+
+    // CRC presence flag for channel set header
+    s->x96_crc_present = get_bits1(&s->gb);
+
+    // Number of channel sets
+    x96_nchsets = get_bits(&s->gb, 2) + 1;
+
+    // Channel set data byte size
+    for (i = 0; i < x96_nchsets; i++)
+        x96_frame_size[i] = get_bits(&s->gb, 12) + 1;
+
+    // Number of channels in channel set
+    for (i = 0; i < x96_nchsets; i++)
+        x96_nchannels[i] = get_bits(&s->gb, 3) + 1;
+
+    // Reserved
+    // Byte align
+    // CRC16 of X96 frame header
+    if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of X96 frame header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if ((ret = alloc_x96_sample_buffer(s)) < 0)
+        return ret;
+
+    // Channel set data
+    s->x96_nchannels = 0;
+    for (i = 0, x96_base_ch = 0; i < x96_nchsets; i++) {
+        header_pos = get_bits_count(&s->gb);
+
+        if (x96_base_ch + x96_nchannels[i] <= s->nchannels) {
+            s->x96_nchannels = x96_base_ch + x96_nchannels[i];
+            if ((ret = parse_x96_frame_data(s, 1, x96_base_ch)) < 0)
+                return ret;
+        }
+
+        x96_base_ch += x96_nchannels[i];
+
+        if (ff_dca_seek_bits(&s->gb, header_pos + x96_frame_size[i] * 8)) {
+            av_log(s->avctx, AV_LOG_ERROR, "Read past end of X96 channel set\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    return 0;
+}
+
+static int parse_aux_data(DCACoreDecoder *s)
+{
+    int aux_pos;
+
+    if (get_bits_left(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
+
+    // Auxiliary data byte count (can't be trusted)
+    skip_bits(&s->gb, 6);
+
+    // 4-byte align
+    skip_bits_long(&s->gb, -get_bits_count(&s->gb) & 31);
+
+    // Auxiliary data sync word
+    if (get_bits_long(&s->gb, 32) != DCA_SYNCWORD_REV1AUX) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid auxiliary data sync word\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    aux_pos = get_bits_count(&s->gb);
+
+    // Auxiliary decode time stamp flag
+    if (get_bits1(&s->gb))
+        skip_bits_long(&s->gb, 47);
+
+    // Auxiliary dynamic downmix flag
+    if (s->prim_dmix_embedded = get_bits1(&s->gb)) {
+        int i, m, n;
+
+        // Auxiliary primary channel downmix type
+        s->prim_dmix_type = get_bits(&s->gb, 3);
+        if (s->prim_dmix_type >= DCA_DMIX_TYPE_COUNT) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid primary channel set downmix type\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        // Size of downmix coefficients matrix
+        m = ff_dca_dmix_primary_nch[s->prim_dmix_type];
+        n = ff_dca_channels[s->audio_mode] + !!s->lfe_present;
+
+        // Dynamic downmix code coefficients
+        for (i = 0; i < m * n; i++) {
+            int code = get_bits(&s->gb, 9);
+            int sign = (code >> 8) - 1;
+            unsigned int index = code & 0xff;
+            if (index >= FF_DCA_DMIXTABLE_SIZE) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid downmix coefficient index\n");
+                return AVERROR_INVALIDDATA;
+            }
+            s->prim_dmix_coeff[i] = (ff_dca_dmixtable[index] ^ sign) - sign;
+        }
+    }
+
+    // Byte align
+    skip_bits(&s->gb, -get_bits_count(&s->gb) & 7);
+
+    // CRC16 of auxiliary data
+    skip_bits(&s->gb, 16);
+
+    // Check CRC
+    if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL))
+        && ff_dca_check_crc(&s->gb, aux_pos, get_bits_count(&s->gb))) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid auxiliary data checksum\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static int parse_optional_info(DCACoreDecoder *s)
+{
+    DCAContext *dca = s->avctx->priv_data;
+    int ret = -1;
+
+    // Time code stamp
+    if (s->ts_present)
+        skip_bits_long(&s->gb, 32);
+
+    // Auxiliary data
+    if (s->aux_present && (ret = parse_aux_data(s)) < 0
+        && (s->avctx->err_recognition & AV_EF_EXPLODE))
+        return ret;
+
+    if (ret < 0)
+        s->prim_dmix_embedded = 0;
+
+    // Core extensions
+    if (s->ext_audio_present && !dca->core_only) {
+        int sync_pos = FFMIN(s->frame_size / 4, s->gb.size_in_bits / 32) - 1;
+        int last_pos = get_bits_count(&s->gb) / 32;
+        int size, dist;
+
+        // Search for extension sync words aligned on 4-byte boundary. Search
+        // must be done backwards from the end of core frame to work around
+        // sync word aliasing issues.
+        switch (s->ext_audio_type) {
+        case EXT_AUDIO_XCH:
+            if (dca->request_channel_layout)
+                break;
+
+            // The distance between XCH sync word and end of the core frame
+            // must be equal to XCH frame size. Off by one error is allowed for
+            // compatibility with legacy bitstreams. Minimum XCH frame size is
+            // 96 bytes. AMODE and PCHS are further checked to reduce
+            // probability of alias sync detection.
+            for (; sync_pos >= last_pos; sync_pos--) {
+                if (AV_RB32(s->gb.buffer + sync_pos * 4) == DCA_SYNCWORD_XCH) {
+                    s->gb.index = (sync_pos + 1) * 32;
+                    size = get_bits(&s->gb, 10) + 1;
+                    dist = s->frame_size - sync_pos * 4;
+                    if (size >= 96
+                        && (size == dist || size - 1 == dist)
+                        && get_bits(&s->gb, 7) == 0x08) {
+                        s->xch_pos = get_bits_count(&s->gb);
+                        break;
+                    }
+                }
+            }
+
+            if (s->avctx->err_recognition & AV_EF_EXPLODE) {
+                av_log(s->avctx, AV_LOG_ERROR, "XCH sync word not found\n");
+                return AVERROR_INVALIDDATA;
+            }
+            break;
+
+        case EXT_AUDIO_X96:
+            // The distance between X96 sync word and end of the core frame
+            // must be equal to X96 frame size. Minimum X96 frame size is 96
+            // bytes.
+            for (; sync_pos >= last_pos; sync_pos--) {
+                if (AV_RB32(s->gb.buffer + sync_pos * 4) == DCA_SYNCWORD_X96) {
+                    s->gb.index = (sync_pos + 1) * 32;
+                    size = get_bits(&s->gb, 12) + 1;
+                    dist = s->frame_size - sync_pos * 4;
+                    if (size >= 96 && size == dist) {
+                        s->x96_pos = get_bits_count(&s->gb);
+                        break;
+                    }
+                }
+            }
+
+            if (s->avctx->err_recognition & AV_EF_EXPLODE) {
+                av_log(s->avctx, AV_LOG_ERROR, "X96 sync word not found\n");
+                return AVERROR_INVALIDDATA;
+            }
+            break;
+
+        case EXT_AUDIO_XXCH:
+            if (dca->request_channel_layout)
+                break;
+
+            // XXCH frame header CRC must be valid. Minimum XXCH frame header
+            // size is 11 bytes.
+            for (; sync_pos >= last_pos; sync_pos--) {
+                if (AV_RB32(s->gb.buffer + sync_pos * 4) == DCA_SYNCWORD_XXCH) {
+                    s->gb.index = (sync_pos + 1) * 32;
+                    size = get_bits(&s->gb, 6) + 1;
+                    if (size >= 11 &&
+                        !ff_dca_check_crc(&s->gb, (sync_pos + 1) * 32,
+                                          sync_pos * 32 + size * 8)) {
+                        s->xxch_pos = sync_pos * 32;
+                        break;
+                    }
+                }
+            }
+
+            if (s->avctx->err_recognition & AV_EF_EXPLODE) {
+                av_log(s->avctx, AV_LOG_ERROR, "XXCH sync word not found\n");
+                return AVERROR_INVALIDDATA;
+            }
+            break;
+        }
+    }
+
+    return 0;
+}
+
+int ff_dca_core_parse(DCACoreDecoder *s, uint8_t *data, int size)
+{
+    int ret;
+
+    s->ext_audio_mask = 0;
+    s->xch_pos = s->xxch_pos = s->x96_pos = 0;
+
+    if ((ret = init_get_bits8(&s->gb, data, size)) < 0)
+        return ret;
+
+    skip_bits_long(&s->gb, 32);
+    if ((ret = parse_frame_header(s)) < 0)
+        return ret;
+    if ((ret = alloc_sample_buffer(s)) < 0)
+        return ret;
+    if ((ret = parse_frame_data(s, HEADER_CORE, 0)) < 0)
+        return ret;
+    if ((ret = parse_optional_info(s)) < 0)
+        return ret;
+
+    // Workaround for DTS in WAV
+    if (s->frame_size > size && s->frame_size < size + 4) {
+        av_log(s->avctx, AV_LOG_DEBUG, "Working around excessive core frame size (%d > %d)\n", s->frame_size, size);
+        s->frame_size = size;
+    }
+
+    if (ff_dca_seek_bits(&s->gb, s->frame_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of core frame\n");
+        if (s->avctx->err_recognition & AV_EF_EXPLODE)
+            return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+int ff_dca_core_parse_exss(DCACoreDecoder *s, uint8_t *data, DCAExssAsset *asset)
+{
+    AVCodecContext *avctx = s->avctx;
+    DCAContext *dca = avctx->priv_data;
+    GetBitContext gb = s->gb;
+    int exss_mask = asset ? asset->extension_mask : 0;
+    int ret = 0, ext = 0;
+
+    // Parse (X)XCH unless downmixing
+    if (!dca->request_channel_layout) {
+        if (exss_mask & DCA_EXSS_XXCH) {
+            if ((ret = init_get_bits8(&s->gb, data + asset->xxch_offset, asset->xxch_size)) < 0)
+                return ret;
+            ret = parse_xxch_frame(s);
+            ext = DCA_EXSS_XXCH;
+        } else if (s->xxch_pos) {
+            s->gb.index = s->xxch_pos;
+            ret = parse_xxch_frame(s);
+            ext = DCA_CSS_XXCH;
+        } else if (s->xch_pos) {
+            s->gb.index = s->xch_pos;
+            ret = parse_xch_frame(s);
+            ext = DCA_CSS_XCH;
+        }
+
+        // Revert to primary channel set in case (X)XCH parsing fails
+        if (ret < 0) {
+            if (avctx->err_recognition & AV_EF_EXPLODE)
+                return ret;
+            s->nchannels = ff_dca_channels[s->audio_mode];
+            s->ch_mask = audio_mode_ch_mask[s->audio_mode];
+            if (s->lfe_present)
+                s->ch_mask |= DCA_SPEAKER_MASK_LFE1;
+        } else {
+            s->ext_audio_mask |= ext;
+        }
+    }
+
+    // Parse XBR
+    if (exss_mask & DCA_EXSS_XBR) {
+        if ((ret = init_get_bits8(&s->gb, data + asset->xbr_offset, asset->xbr_size)) < 0)
+            return ret;
+        if ((ret = parse_xbr_frame(s)) < 0) {
+            if (avctx->err_recognition & AV_EF_EXPLODE)
+                return ret;
+        } else {
+            s->ext_audio_mask |= DCA_EXSS_XBR;
+        }
+    }
+
+    // Parse X96 unless decoding XLL
+    if (!(dca->packet & DCA_PACKET_XLL)) {
+        if (exss_mask & DCA_EXSS_X96) {
+            if ((ret = init_get_bits8(&s->gb, data + asset->x96_offset, asset->x96_size)) < 0)
+                return ret;
+            if ((ret = parse_x96_frame_exss(s)) < 0) {
+                if (ret == AVERROR(ENOMEM) || (avctx->err_recognition & AV_EF_EXPLODE))
+                    return ret;
+            } else {
+                s->ext_audio_mask |= DCA_EXSS_X96;
+            }
+        } else if (s->x96_pos) {
+            s->gb = gb;
+            s->gb.index = s->x96_pos;
+            if ((ret = parse_x96_frame(s)) < 0) {
+                if (ret == AVERROR(ENOMEM) || (avctx->err_recognition & AV_EF_EXPLODE))
+                    return ret;
+            } else {
+                s->ext_audio_mask |= DCA_CSS_X96;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int map_prm_ch_to_spkr(DCACoreDecoder *s, int ch)
+{
+    int pos, spkr;
+
+    // Try to map this channel to core first
+    pos = ff_dca_channels[s->audio_mode];
+    if (ch < pos) {
+        spkr = prm_ch_to_spkr_map[s->audio_mode][ch];
+        if (s->ext_audio_mask & (DCA_CSS_XXCH | DCA_EXSS_XXCH)) {
+            if (s->xxch_core_mask & (1U << spkr))
+                return spkr;
+            if (spkr == DCA_SPEAKER_Ls && (s->xxch_core_mask & DCA_SPEAKER_MASK_Lss))
+                return DCA_SPEAKER_Lss;
+            if (spkr == DCA_SPEAKER_Rs && (s->xxch_core_mask & DCA_SPEAKER_MASK_Rss))
+                return DCA_SPEAKER_Rss;
+            return -1;
+        }
+        return spkr;
+    }
+
+    // Then XCH
+    if ((s->ext_audio_mask & DCA_CSS_XCH) && ch == pos)
+        return DCA_SPEAKER_Cs;
+
+    // Then XXCH
+    if (s->ext_audio_mask & (DCA_CSS_XXCH | DCA_EXSS_XXCH)) {
+        for (spkr = DCA_SPEAKER_Cs; spkr < s->xxch_mask_nbits; spkr++)
+            if (s->xxch_spkr_mask & (1U << spkr))
+                if (pos++ == ch)
+                    return spkr;
+    }
+
+    // No mapping
+    return -1;
+}
+
+static void erase_dsp_history(DCACoreDecoder *s)
+{
+    memset(s->dcadsp_data, 0, sizeof(s->dcadsp_data));
+    s->output_history_lfe_fixed = 0;
+    s->output_history_lfe_float = 0;
+}
+
+static void set_filter_mode(DCACoreDecoder *s, int mode)
+{
+    if (s->filter_mode != mode) {
+        erase_dsp_history(s);
+        s->filter_mode = mode;
+    }
+}
+
+int ff_dca_core_filter_fixed(DCACoreDecoder *s, int x96_synth)
+{
+    int n, ch, spkr, nsamples, x96_nchannels = 0;
+    const int32_t *filter_coeff;
+    int32_t *ptr;
+
+    // Externally set x96_synth flag implies that X96 synthesis should be
+    // enabled, yet actual X96 subband data should be discarded. This is a
+    // special case for lossless residual decoder that ignores X96 data if
+    // present.
+    if (!x96_synth && (s->ext_audio_mask & (DCA_CSS_X96 | DCA_EXSS_X96))) {
+        x96_nchannels = s->x96_nchannels;
+        x96_synth = 1;
+    }
+    if (x96_synth < 0)
+        x96_synth = 0;
+
+    s->output_rate = s->sample_rate << x96_synth;
+    s->npcmsamples = nsamples = (s->npcmblocks * DCA_PCMBLOCK_SAMPLES) << x96_synth;
+
+    // Reallocate PCM output buffer
+    av_fast_malloc(&s->output_buffer, &s->output_size,
+                   nsamples * av_popcount(s->ch_mask) * sizeof(int32_t));
+    if (!s->output_buffer)
+        return AVERROR(ENOMEM);
+
+    ptr = (int32_t *)s->output_buffer;
+    for (spkr = 0; spkr < DCA_SPEAKER_COUNT; spkr++) {
+        if (s->ch_mask & (1U << spkr)) {
+            s->output_samples[spkr] = ptr;
+            ptr += nsamples;
+        } else {
+            s->output_samples[spkr] = NULL;
+        }
+    }
+
+    // Handle change of filtering mode
+    set_filter_mode(s, x96_synth | DCA_FILTER_MODE_FIXED);
+
+    // Select filter
+    if (x96_synth)
+        filter_coeff = ff_dca_fir_64bands_fixed;
+    else if (s->filter_perfect)
+        filter_coeff = ff_dca_fir_32bands_perfect_fixed;
+    else
+        filter_coeff = ff_dca_fir_32bands_nonperfect_fixed;
+
+    // Filter primary channels
+    for (ch = 0; ch < s->nchannels; ch++) {
+        // Map this primary channel to speaker
+        spkr = map_prm_ch_to_spkr(s, ch);
+        if (spkr < 0)
+            return AVERROR(EINVAL);
+
+        // Filter bank reconstruction
+        s->dcadsp->sub_qmf_fixed[x96_synth](
+            &s->synth,
+            &s->dcadct,
+            s->output_samples[spkr],
+            s->subband_samples[ch],
+            ch < x96_nchannels ? s->x96_subband_samples[ch] : NULL,
+            s->dcadsp_data[ch].u.fix.hist1,
+            &s->dcadsp_data[ch].offset,
+            s->dcadsp_data[ch].u.fix.hist2,
+            filter_coeff,
+            s->npcmblocks);
+    }
+
+    // Filter LFE channel
+    if (s->lfe_present) {
+        int32_t *samples = s->output_samples[DCA_SPEAKER_LFE1];
+        int nlfesamples = s->npcmblocks >> 1;
+
+        // Check LFF
+        if (s->lfe_present == LFE_FLAG_128) {
+            av_log(s->avctx, AV_LOG_ERROR, "Fixed point mode doesn't support LFF=1\n");
+            return AVERROR(EINVAL);
+        }
+
+        // Offset intermediate buffer for X96
+        if (x96_synth)
+            samples += nsamples / 2;
+
+        // Interpolate LFE channel
+        s->dcadsp->lfe_fir_fixed(samples, s->lfe_samples + DCA_LFE_HISTORY,
+                                 ff_dca_lfe_fir_64_fixed, s->npcmblocks);
+
+        if (x96_synth) {
+            // Filter 96 kHz oversampled LFE PCM to attenuate high frequency
+            // (47.6 - 48.0 kHz) components of interpolation image
+            s->dcadsp->lfe_x96_fixed(s->output_samples[DCA_SPEAKER_LFE1],
+                                     samples, &s->output_history_lfe_fixed,
+                                     nsamples / 2);
+
+        }
+
+        // Update LFE history
+        for (n = DCA_LFE_HISTORY - 1; n >= 0; n--)
+            s->lfe_samples[n] = s->lfe_samples[nlfesamples + n];
+    }
+
+    return 0;
+}
+
+static int filter_frame_fixed(DCACoreDecoder *s, AVFrame *frame)
+{
+    AVCodecContext *avctx = s->avctx;
+    DCAContext *dca = avctx->priv_data;
+    int i, n, ch, ret, spkr, nsamples;
+
+    // Don't filter twice when falling back from XLL
+    if (!(dca->packet & DCA_PACKET_XLL) && (ret = ff_dca_core_filter_fixed(s, 0)) < 0)
+        return ret;
+
+    avctx->sample_rate = s->output_rate;
+    avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
+    avctx->bits_per_raw_sample = 24;
+
+    frame->nb_samples = nsamples = s->npcmsamples;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    // Undo embedded XCH downmix
+    if (s->es_format && (s->ext_audio_mask & DCA_CSS_XCH)
+        && s->audio_mode >= AMODE_2F2R) {
+        s->dcadsp->dmix_sub_xch(s->output_samples[DCA_SPEAKER_Ls],
+                                s->output_samples[DCA_SPEAKER_Rs],
+                                s->output_samples[DCA_SPEAKER_Cs],
+                                nsamples);
+
+    }
+
+    // Undo embedded XXCH downmix
+    if ((s->ext_audio_mask & (DCA_CSS_XXCH | DCA_EXSS_XXCH))
+        && s->xxch_dmix_embedded) {
+        int scale_inv   = s->xxch_dmix_scale_inv;
+        int *coeff_ptr  = s->xxch_dmix_coeff;
+        int xch_base    = ff_dca_channels[s->audio_mode];
+        av_assert1(s->nchannels - xch_base <= DCA_XXCH_CHANNELS_MAX);
+
+        // Undo embedded core downmix pre-scaling
+        for (spkr = 0; spkr < s->xxch_mask_nbits; spkr++) {
+            if (s->xxch_core_mask & (1U << spkr)) {
+                s->dcadsp->dmix_scale_inv(s->output_samples[spkr],
+                                          scale_inv, nsamples);
+            }
+        }
+
+        // Undo downmix
+        for (ch = xch_base; ch < s->nchannels; ch++) {
+            int src_spkr = map_prm_ch_to_spkr(s, ch);
+            if (src_spkr < 0)
+                return AVERROR(EINVAL);
+            for (spkr = 0; spkr < s->xxch_mask_nbits; spkr++) {
+                if (s->xxch_dmix_mask[ch - xch_base] & (1U << spkr)) {
+                    int coeff = mul16(*coeff_ptr++, scale_inv);
+                    if (coeff) {
+                        s->dcadsp->dmix_sub(s->output_samples[spkr    ],
+                                            s->output_samples[src_spkr],
+                                            coeff, nsamples);
+                    }
+                }
+            }
+        }
+    }
+
+    if (!(s->ext_audio_mask & (DCA_CSS_XXCH | DCA_CSS_XCH | DCA_EXSS_XXCH))) {
+        // Front sum/difference decoding
+        if ((s->sumdiff_front && s->audio_mode > AMODE_MONO)
+            || s->audio_mode == AMODE_STEREO_SUMDIFF) {
+            s->fixed_dsp->butterflies_fixed(s->output_samples[DCA_SPEAKER_L],
+                                            s->output_samples[DCA_SPEAKER_R],
+                                            nsamples);
+        }
+
+        // Surround sum/difference decoding
+        if (s->sumdiff_surround && s->audio_mode >= AMODE_2F2R) {
+            s->fixed_dsp->butterflies_fixed(s->output_samples[DCA_SPEAKER_Ls],
+                                            s->output_samples[DCA_SPEAKER_Rs],
+                                            nsamples);
+        }
+    }
+
+    // Downmix primary channel set to stereo
+    if (s->request_mask != s->ch_mask) {
+        ff_dca_downmix_to_stereo_fixed(s->dcadsp,
+                                       s->output_samples,
+                                       s->prim_dmix_coeff,
+                                       nsamples, s->ch_mask);
+    }
+
+    for (i = 0; i < avctx->channels; i++) {
+        int32_t *samples = s->output_samples[s->ch_remap[i]];
+        int32_t *plane = (int32_t *)frame->extended_data[i];
+        for (n = 0; n < nsamples; n++)
+            plane[n] = clip23(samples[n]) * (1 << 8);
+    }
+
+    return 0;
+}
+
+static int filter_frame_float(DCACoreDecoder *s, AVFrame *frame)
+{
+    AVCodecContext *avctx = s->avctx;
+    int x96_nchannels = 0, x96_synth = 0;
+    int i, n, ch, ret, spkr, nsamples, nchannels;
+    float *output_samples[DCA_SPEAKER_COUNT] = { NULL }, *ptr;
+    const float *filter_coeff;
+
+    if (s->ext_audio_mask & (DCA_CSS_X96 | DCA_EXSS_X96)) {
+        x96_nchannels = s->x96_nchannels;
+        x96_synth = 1;
+    }
+
+    avctx->sample_rate = s->sample_rate << x96_synth;
+    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
+    avctx->bits_per_raw_sample = 0;
+
+    frame->nb_samples = nsamples = (s->npcmblocks * DCA_PCMBLOCK_SAMPLES) << x96_synth;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    // Build reverse speaker to channel mapping
+    for (i = 0; i < avctx->channels; i++)
+        output_samples[s->ch_remap[i]] = (float *)frame->extended_data[i];
+
+    // Allocate space for extra channels
+    nchannels = av_popcount(s->ch_mask) - avctx->channels;
+    if (nchannels > 0) {
+        av_fast_malloc(&s->output_buffer, &s->output_size,
+                       nsamples * nchannels * sizeof(float));
+        if (!s->output_buffer)
+            return AVERROR(ENOMEM);
+
+        ptr = (float *)s->output_buffer;
+        for (spkr = 0; spkr < DCA_SPEAKER_COUNT; spkr++) {
+            if (!(s->ch_mask & (1U << spkr)))
+                continue;
+            if (output_samples[spkr])
+                continue;
+            output_samples[spkr] = ptr;
+            ptr += nsamples;
+        }
+    }
+
+    // Handle change of filtering mode
+    set_filter_mode(s, x96_synth);
+
+    // Select filter
+    if (x96_synth)
+        filter_coeff = ff_dca_fir_64bands;
+    else if (s->filter_perfect)
+        filter_coeff = ff_dca_fir_32bands_perfect;
+    else
+        filter_coeff = ff_dca_fir_32bands_nonperfect;
+
+    // Filter primary channels
+    for (ch = 0; ch < s->nchannels; ch++) {
+        // Map this primary channel to speaker
+        spkr = map_prm_ch_to_spkr(s, ch);
+        if (spkr < 0)
+            return AVERROR(EINVAL);
+
+        // Filter bank reconstruction
+        s->dcadsp->sub_qmf_float[x96_synth](
+            &s->synth,
+            &s->imdct[x96_synth],
+            output_samples[spkr],
+            s->subband_samples[ch],
+            ch < x96_nchannels ? s->x96_subband_samples[ch] : NULL,
+            s->dcadsp_data[ch].u.flt.hist1,
+            &s->dcadsp_data[ch].offset,
+            s->dcadsp_data[ch].u.flt.hist2,
+            filter_coeff,
+            s->npcmblocks,
+            1.0f / (1 << (17 - x96_synth)));
+    }
+
+    // Filter LFE channel
+    if (s->lfe_present) {
+        int dec_select = (s->lfe_present == LFE_FLAG_128);
+        float *samples = output_samples[DCA_SPEAKER_LFE1];
+        int nlfesamples = s->npcmblocks >> (dec_select + 1);
+
+        // Offset intermediate buffer for X96
+        if (x96_synth)
+            samples += nsamples / 2;
+
+        // Select filter
+        if (dec_select)
+            filter_coeff = ff_dca_lfe_fir_128;
+        else
+            filter_coeff = ff_dca_lfe_fir_64;
+
+        // Interpolate LFE channel
+        s->dcadsp->lfe_fir_float[dec_select](
+            samples, s->lfe_samples + DCA_LFE_HISTORY,
+            filter_coeff, s->npcmblocks);
+
+        if (x96_synth) {
+            // Filter 96 kHz oversampled LFE PCM to attenuate high frequency
+            // (47.6 - 48.0 kHz) components of interpolation image
+            s->dcadsp->lfe_x96_float(output_samples[DCA_SPEAKER_LFE1],
+                                     samples, &s->output_history_lfe_float,
+                                     nsamples / 2);
+        }
+
+        // Update LFE history
+        for (n = DCA_LFE_HISTORY - 1; n >= 0; n--)
+            s->lfe_samples[n] = s->lfe_samples[nlfesamples + n];
+    }
+
+    // Undo embedded XCH downmix
+    if (s->es_format && (s->ext_audio_mask & DCA_CSS_XCH)
+        && s->audio_mode >= AMODE_2F2R) {
+        s->float_dsp->vector_fmac_scalar(output_samples[DCA_SPEAKER_Ls],
+                                         output_samples[DCA_SPEAKER_Cs],
+                                         -M_SQRT1_2, nsamples);
+        s->float_dsp->vector_fmac_scalar(output_samples[DCA_SPEAKER_Rs],
+                                         output_samples[DCA_SPEAKER_Cs],
+                                         -M_SQRT1_2, nsamples);
+    }
+
+    // Undo embedded XXCH downmix
+    if ((s->ext_audio_mask & (DCA_CSS_XXCH | DCA_EXSS_XXCH))
+        && s->xxch_dmix_embedded) {
+        float scale_inv = s->xxch_dmix_scale_inv * (1.0f / (1 << 16));
+        int *coeff_ptr  = s->xxch_dmix_coeff;
+        int xch_base    = ff_dca_channels[s->audio_mode];
+        av_assert1(s->nchannels - xch_base <= DCA_XXCH_CHANNELS_MAX);
+
+        // Undo downmix
+        for (ch = xch_base; ch < s->nchannels; ch++) {
+            int src_spkr = map_prm_ch_to_spkr(s, ch);
+            if (src_spkr < 0)
+                return AVERROR(EINVAL);
+            for (spkr = 0; spkr < s->xxch_mask_nbits; spkr++) {
+                if (s->xxch_dmix_mask[ch - xch_base] & (1U << spkr)) {
+                    int coeff = *coeff_ptr++;
+                    if (coeff) {
+                        s->float_dsp->vector_fmac_scalar(output_samples[    spkr],
+                                                         output_samples[src_spkr],
+                                                         coeff * (-1.0f / (1 << 15)),
+                                                         nsamples);
+                    }
+                }
+            }
+        }
+
+        // Undo embedded core downmix pre-scaling
+        for (spkr = 0; spkr < s->xxch_mask_nbits; spkr++) {
+            if (s->xxch_core_mask & (1U << spkr)) {
+                s->float_dsp->vector_fmul_scalar(output_samples[spkr],
+                                                 output_samples[spkr],
+                                                 scale_inv, nsamples);
+            }
+        }
+    }
+
+    if (!(s->ext_audio_mask & (DCA_CSS_XXCH | DCA_CSS_XCH | DCA_EXSS_XXCH))) {
+        // Front sum/difference decoding
+        if ((s->sumdiff_front && s->audio_mode > AMODE_MONO)
+            || s->audio_mode == AMODE_STEREO_SUMDIFF) {
+            s->float_dsp->butterflies_float(output_samples[DCA_SPEAKER_L],
+                                            output_samples[DCA_SPEAKER_R],
+                                            nsamples);
+        }
+
+        // Surround sum/difference decoding
+        if (s->sumdiff_surround && s->audio_mode >= AMODE_2F2R) {
+            s->float_dsp->butterflies_float(output_samples[DCA_SPEAKER_Ls],
+                                            output_samples[DCA_SPEAKER_Rs],
+                                            nsamples);
+        }
+    }
+
+    // Downmix primary channel set to stereo
+    if (s->request_mask != s->ch_mask) {
+        ff_dca_downmix_to_stereo_float(s->float_dsp, output_samples,
+                                       s->prim_dmix_coeff,
+                                       nsamples, s->ch_mask);
+    }
+
+    return 0;
+}
+
+int ff_dca_core_filter_frame(DCACoreDecoder *s, AVFrame *frame)
+{
+    AVCodecContext *avctx = s->avctx;
+    DCAContext *dca = avctx->priv_data;
+    DCAExssAsset *asset = &dca->exss.assets[0];
+    enum AVMatrixEncoding matrix_encoding;
+    int ret;
+
+    // Handle downmixing to stereo request
+    if (dca->request_channel_layout == DCA_SPEAKER_LAYOUT_STEREO
+        && s->audio_mode > AMODE_MONO && s->prim_dmix_embedded
+        && (s->prim_dmix_type == DCA_DMIX_TYPE_LoRo ||
+            s->prim_dmix_type == DCA_DMIX_TYPE_LtRt))
+        s->request_mask = DCA_SPEAKER_LAYOUT_STEREO;
+    else
+        s->request_mask = s->ch_mask;
+    if (!ff_dca_set_channel_layout(avctx, s->ch_remap, s->request_mask))
+        return AVERROR(EINVAL);
+
+    // Force fixed point mode when falling back from XLL
+    if ((avctx->flags & AV_CODEC_FLAG_BITEXACT) || ((dca->packet & DCA_PACKET_EXSS)
+                                                    && (asset->extension_mask & DCA_EXSS_XLL)))
+        ret = filter_frame_fixed(s, frame);
+    else
+        ret = filter_frame_float(s, frame);
+    if (ret < 0)
+        return ret;
+
+    // Set profile, bit rate, etc
+    if (s->ext_audio_mask & DCA_EXSS_MASK)
+        avctx->profile = FF_PROFILE_DTS_HD_HRA;
+    else if (s->ext_audio_mask & (DCA_CSS_XXCH | DCA_CSS_XCH))
+        avctx->profile = FF_PROFILE_DTS_ES;
+    else if (s->ext_audio_mask & DCA_CSS_X96)
+        avctx->profile = FF_PROFILE_DTS_96_24;
+    else
+        avctx->profile = FF_PROFILE_DTS;
+
+    if (s->bit_rate > 3 && !(s->ext_audio_mask & DCA_EXSS_MASK))
+        avctx->bit_rate = s->bit_rate;
+    else
+        avctx->bit_rate = 0;
+
+    if (s->audio_mode == AMODE_STEREO_TOTAL || (s->request_mask != s->ch_mask &&
+                                                s->prim_dmix_type == DCA_DMIX_TYPE_LtRt))
+        matrix_encoding = AV_MATRIX_ENCODING_DOLBY;
+    else
+        matrix_encoding = AV_MATRIX_ENCODING_NONE;
+    if ((ret = ff_side_data_update_matrix_encoding(frame, matrix_encoding)) < 0)
+        return ret;
+
+    return 0;
+}
+
+av_cold void ff_dca_core_flush(DCACoreDecoder *s)
+{
+    if (s->subband_buffer) {
+        erase_adpcm_history(s);
+        memset(s->lfe_samples, 0, DCA_LFE_HISTORY * sizeof(int32_t));
+    }
+
+    if (s->x96_subband_buffer)
+        erase_x96_adpcm_history(s);
+
+    erase_dsp_history(s);
+}
+
+av_cold int ff_dca_core_init(DCACoreDecoder *s)
+{
+    dca_init_vlcs();
+
+    if (!(s->float_dsp = avpriv_float_dsp_alloc(0)))
+        return -1;
+    if (!(s->fixed_dsp = avpriv_alloc_fixed_dsp(0)))
+        return -1;
+
+    ff_dcadct_init(&s->dcadct);
+    if (ff_mdct_init(&s->imdct[0], 6, 1, 1.0) < 0)
+        return -1;
+    if (ff_mdct_init(&s->imdct[1], 7, 1, 1.0) < 0)
+        return -1;
+    ff_synth_filter_init(&s->synth);
+
+    s->x96_rand = 1;
+    return 0;
+}
+
+av_cold void ff_dca_core_close(DCACoreDecoder *s)
+{
+    av_freep(&s->float_dsp);
+    av_freep(&s->fixed_dsp);
+
+    ff_mdct_end(&s->imdct[0]);
+    ff_mdct_end(&s->imdct[1]);
+
+    av_freep(&s->subband_buffer);
+    s->subband_size = 0;
+
+    av_freep(&s->x96_subband_buffer);
+    s->x96_subband_size = 0;
+
+    av_freep(&s->output_buffer);
+    s->output_size = 0;
+}
diff --git a/libavcodec/dca_core.h b/libavcodec/dca_core.h
new file mode 100644
index 00000000..112b72ba
--- /dev/null
+++ b/libavcodec/dca_core.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2016 foo86
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DCA_CORE_H
+#define AVCODEC_DCA_CORE_H
+
+#include "libavutil/common.h"
+#include "libavutil/float_dsp.h"
+#include "libavutil/fixed_dsp.h"
+#include "libavutil/mem.h"
+
+#include "avcodec.h"
+#include "internal.h"
+#include "get_bits.h"
+#include "dca.h"
+#include "dca_exss.h"
+#include "dcadsp.h"
+#include "dcadct.h"
+#include "fft.h"
+#include "synth_filter.h"
+
+#define DCA_CHANNELS            7
+#define DCA_SUBBANDS            32
+#define DCA_SUBBANDS_X96        64
+#define DCA_SUBFRAMES           16
+#define DCA_SUBBAND_SAMPLES     8
+#define DCA_PCMBLOCK_SAMPLES    32
+#define DCA_ADPCM_COEFFS        4
+#define DCA_LFE_HISTORY         8
+#define DCA_CODE_BOOKS          10
+#define DCA_ABITS_MAX           26
+
+#define DCA_CORE_CHANNELS_MAX       6
+#define DCA_DMIX_CHANNELS_MAX       4
+#define DCA_XXCH_CHANNELS_MAX       2
+#define DCA_EXSS_CHANNELS_MAX       8
+#define DCA_EXSS_CHSETS_MAX         4
+
+#define DCA_FILTER_MODE_X96     0x01
+#define DCA_FILTER_MODE_FIXED   0x02
+
+typedef struct DCADSPData {
+    union {
+        struct {
+            DECLARE_ALIGNED(32, float, hist1)[1024];
+            DECLARE_ALIGNED(32, float, hist2)[64];
+        } flt;
+        struct {
+            DECLARE_ALIGNED(32, int32_t, hist1)[1024];
+            DECLARE_ALIGNED(32, int32_t, hist2)[64];
+        } fix;
+    } u;
+    int offset;
+} DCADSPData;
+
+typedef struct DCACoreDecoder {
+    AVCodecContext  *avctx;
+    GetBitContext   gb;
+
+    // Bit stream header
+    int     crc_present;        ///< CRC present flag
+    int     npcmblocks;         ///< Number of PCM sample blocks
+    int     frame_size;         ///< Primary frame byte size
+    int     audio_mode;         ///< Audio channel arrangement
+    int     sample_rate;        ///< Core audio sampling frequency
+    int     bit_rate;           ///< Transmission bit rate
+    int     drc_present;        ///< Embedded dynamic range flag
+    int     ts_present;         ///< Embedded time stamp flag
+    int     aux_present;        ///< Auxiliary data flag
+    int     ext_audio_type;     ///< Extension audio descriptor flag
+    int     ext_audio_present;  ///< Extended coding flag
+    int     sync_ssf;           ///< Audio sync word insertion flag
+    int     lfe_present;        ///< Low frequency effects flag
+    int     predictor_history;  ///< Predictor history flag switch
+    int     filter_perfect;     ///< Multirate interpolator switch
+    int     source_pcm_res;     ///< Source PCM resolution
+    int     es_format;          ///< Extended surround (ES) mastering flag
+    int     sumdiff_front;      ///< Front sum/difference flag
+    int     sumdiff_surround;   ///< Surround sum/difference flag
+
+    // Primary audio coding header
+    int         nsubframes;     ///< Number of subframes
+    int         nchannels;      ///< Number of primary audio channels (incl. extension channels)
+    int         ch_mask;        ///< Speaker layout mask (incl. LFE and extension channels)
+    int8_t      nsubbands[DCA_CHANNELS];                ///< Subband activity count
+    int8_t      subband_vq_start[DCA_CHANNELS];         ///< High frequency VQ start subband
+    int8_t      joint_intensity_index[DCA_CHANNELS];    ///< Joint intensity coding index
+    int8_t      transition_mode_sel[DCA_CHANNELS];      ///< Transient mode code book
+    int8_t      scale_factor_sel[DCA_CHANNELS];         ///< Scale factor code book
+    int8_t      bit_allocation_sel[DCA_CHANNELS];       ///< Bit allocation quantizer select
+    int8_t      quant_index_sel[DCA_CHANNELS][DCA_CODE_BOOKS];  ///< Quantization index codebook select
+    int32_t     scale_factor_adj[DCA_CHANNELS][DCA_CODE_BOOKS]; ///< Scale factor adjustment
+
+    // Primary audio coding side information
+    int8_t      nsubsubframes[DCA_SUBFRAMES];   ///< Subsubframe count for each subframe
+    int8_t      prediction_mode[DCA_CHANNELS][DCA_SUBBANDS_X96];            ///< Prediction mode
+    int16_t     prediction_vq_index[DCA_CHANNELS][DCA_SUBBANDS_X96];        ///< Prediction coefficients VQ address
+    int8_t      bit_allocation[DCA_CHANNELS][DCA_SUBBANDS_X96];             ///< Bit allocation index
+    int8_t      transition_mode[DCA_SUBFRAMES][DCA_CHANNELS][DCA_SUBBANDS]; ///< Transition mode
+    int32_t     scale_factors[DCA_CHANNELS][DCA_SUBBANDS][2];               ///< Scale factors (2x for transients and X96)
+    int8_t      joint_scale_sel[DCA_CHANNELS];                              ///< Joint subband codebook select
+    int32_t     joint_scale_factors[DCA_CHANNELS][DCA_SUBBANDS_X96];        ///< Scale factors for joint subband coding
+
+    // Auxiliary data
+    int     prim_dmix_embedded; ///< Auxiliary dynamic downmix flag
+    int     prim_dmix_type;     ///< Auxiliary primary channel downmix type
+    int     prim_dmix_coeff[DCA_DMIX_CHANNELS_MAX * DCA_CORE_CHANNELS_MAX]; ///< Dynamic downmix code coefficients
+
+    // Core extensions
+    int     ext_audio_mask;     ///< Bit mask of fully decoded core extensions
+
+    // XCH extension data
+    int     xch_pos;    ///< Bit position of XCH frame in core substream
+
+    // XXCH extension data
+    int     xxch_crc_present;       ///< CRC presence flag for XXCH channel set header
+    int     xxch_mask_nbits;        ///< Number of bits for loudspeaker mask
+    int     xxch_core_mask;         ///< Core loudspeaker activity mask
+    int     xxch_spkr_mask;         ///< Loudspeaker layout mask
+    int     xxch_dmix_embedded;     ///< Downmix already performed by encoder
+    int     xxch_dmix_scale_inv;    ///< Downmix scale factor
+    int     xxch_dmix_mask[DCA_XXCH_CHANNELS_MAX];  ///< Downmix channel mapping mask
+    int     xxch_dmix_coeff[DCA_XXCH_CHANNELS_MAX * DCA_CORE_CHANNELS_MAX];     ///< Downmix coefficients
+    int     xxch_pos;   ///< Bit position of XXCH frame in core substream
+
+    // X96 extension data
+    int     x96_rev_no;         ///< X96 revision number
+    int     x96_crc_present;    ///< CRC presence flag for X96 channel set header
+    int     x96_nchannels;      ///< Number of primary channels in X96 extension
+    int     x96_high_res;       ///< X96 high resolution flag
+    int     x96_subband_start;  ///< First encoded subband in X96 extension
+    int     x96_rand;           ///< Random seed for generating samples for unallocated X96 subbands
+    int     x96_pos;            ///< Bit position of X96 frame in core substream
+
+    // Sample buffers
+    unsigned int    x96_subband_size;
+    int32_t         *x96_subband_buffer;    ///< X96 subband sample buffer base
+    int32_t         *x96_subband_samples[DCA_CHANNELS][DCA_SUBBANDS_X96];   ///< X96 subband samples
+
+    unsigned int    subband_size;
+    int32_t         *subband_buffer;    ///< Subband sample buffer base
+    int32_t         *subband_samples[DCA_CHANNELS][DCA_SUBBANDS];   ///< Subband samples
+    int32_t         *lfe_samples;    ///< Decimated LFE samples
+
+    // DSP contexts
+    DCADSPData              dcadsp_data[DCA_CHANNELS];    ///< FIR history buffers
+    DCADSPContext           *dcadsp;
+    DCADCTContext           dcadct;
+    FFTContext              imdct[2];
+    SynthFilterContext      synth;
+    AVFloatDSPContext       *float_dsp;
+    AVFixedDSPContext       *fixed_dsp;
+
+    // PCM output data
+    unsigned int    output_size;
+    void            *output_buffer;                         ///< PCM output buffer base
+    int32_t         *output_samples[DCA_SPEAKER_COUNT];     ///< PCM output for fixed point mode
+    int32_t         output_history_lfe_fixed;               ///< LFE PCM history for X96 filter
+    float           output_history_lfe_float;               ///< LFE PCM history for X96 filter
+
+    int     ch_remap[DCA_SPEAKER_COUNT];   ///< Channel to speaker map
+    int     request_mask;   ///< Requested channel layout (for stereo downmix)
+
+    int     npcmsamples;    ///< Number of PCM samples per channel
+    int     output_rate;    ///< Output sample rate (1x or 2x header rate)
+
+    int     filter_mode;    ///< Previous filtering mode for detecting changes
+} DCACoreDecoder;
+
+static inline int ff_dca_core_map_spkr(DCACoreDecoder *core, int spkr)
+{
+    if (core->ch_mask & (1U << spkr))
+        return spkr;
+    if (spkr == DCA_SPEAKER_Lss && (core->ch_mask & DCA_SPEAKER_MASK_Ls))
+        return DCA_SPEAKER_Ls;
+    if (spkr == DCA_SPEAKER_Rss && (core->ch_mask & DCA_SPEAKER_MASK_Rs))
+        return DCA_SPEAKER_Rs;
+    return -1;
+}
+
+int ff_dca_core_parse(DCACoreDecoder *s, uint8_t *data, int size);
+int ff_dca_core_parse_exss(DCACoreDecoder *s, uint8_t *data, DCAExssAsset *asset);
+int ff_dca_core_filter_fixed(DCACoreDecoder *s, int x96_synth);
+int ff_dca_core_filter_frame(DCACoreDecoder *s, AVFrame *frame);
+av_cold void ff_dca_core_flush(DCACoreDecoder *s);
+av_cold int ff_dca_core_init(DCACoreDecoder *s);
+av_cold void ff_dca_core_close(DCACoreDecoder *s);
+
+#endif
diff --git a/libavcodec/dca_exss.c b/libavcodec/dca_exss.c
index ed014906..4579f235 100644
--- a/libavcodec/dca_exss.c
+++ b/libavcodec/dca_exss.c
@@ -1,5 +1,5 @@
 /*
- * DCA ExSS extension
+ * Copyright (C) 2016 foo86
  *
  * This file is part of FFmpeg.
  *
@@ -18,356 +18,497 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/common.h"
-#include "libavutil/log.h"
-
-#include "dca.h"
-#include "dca_syncwords.h"
-#include "get_bits.h"
-
-/* extensions that reside in core substream */
-#define DCA_CORE_EXTS (DCA_EXT_XCH | DCA_EXT_XXCH | DCA_EXT_X96)
-
-/* these are unconfirmed but should be mostly correct */
-enum DCAExSSSpeakerMask {
-    DCA_EXSS_FRONT_CENTER          = 0x0001,
-    DCA_EXSS_FRONT_LEFT_RIGHT      = 0x0002,
-    DCA_EXSS_SIDE_REAR_LEFT_RIGHT  = 0x0004,
-    DCA_EXSS_LFE                   = 0x0008,
-    DCA_EXSS_REAR_CENTER           = 0x0010,
-    DCA_EXSS_FRONT_HIGH_LEFT_RIGHT = 0x0020,
-    DCA_EXSS_REAR_LEFT_RIGHT       = 0x0040,
-    DCA_EXSS_FRONT_HIGH_CENTER     = 0x0080,
-    DCA_EXSS_OVERHEAD              = 0x0100,
-    DCA_EXSS_CENTER_LEFT_RIGHT     = 0x0200,
-    DCA_EXSS_WIDE_LEFT_RIGHT       = 0x0400,
-    DCA_EXSS_SIDE_LEFT_RIGHT       = 0x0800,
-    DCA_EXSS_LFE2                  = 0x1000,
-    DCA_EXSS_SIDE_HIGH_LEFT_RIGHT  = 0x2000,
-    DCA_EXSS_REAR_HIGH_CENTER      = 0x4000,
-    DCA_EXSS_REAR_HIGH_LEFT_RIGHT  = 0x8000,
-};
-
-/**
- * Return the number of channels in an ExSS speaker mask (HD)
- */
-static int dca_exss_mask2count(int mask)
+#include "dcadec.h"
+#include "dcadata.h"
+
+static int count_chs_for_mask(int mask)
 {
-    /* count bits that mean speaker pairs twice */
-    return av_popcount(mask) +
-           av_popcount(mask & (DCA_EXSS_CENTER_LEFT_RIGHT      |
-                               DCA_EXSS_FRONT_LEFT_RIGHT       |
-                               DCA_EXSS_FRONT_HIGH_LEFT_RIGHT  |
-                               DCA_EXSS_WIDE_LEFT_RIGHT        |
-                               DCA_EXSS_SIDE_LEFT_RIGHT        |
-                               DCA_EXSS_SIDE_HIGH_LEFT_RIGHT   |
-                               DCA_EXSS_SIDE_REAR_LEFT_RIGHT   |
-                               DCA_EXSS_REAR_LEFT_RIGHT        |
-                               DCA_EXSS_REAR_HIGH_LEFT_RIGHT));
+    return av_popcount(mask) + av_popcount(mask & 0xae66);
 }
 
-/**
- * Skip mixing coefficients of a single mix out configuration (HD)
- */
-static void dca_exss_skip_mix_coeffs(GetBitContext *gb, int channels, int out_ch)
+static void parse_xll_parameters(DCAExssParser *s, DCAExssAsset *asset)
 {
-    int i;
+    // Size of XLL data in extension substream
+    asset->xll_size = get_bits(&s->gb, s->exss_size_nbits) + 1;
+
+    // XLL sync word present flag
+    if (asset->xll_sync_present = get_bits1(&s->gb)) {
+        int xll_delay_nbits;
+
+        // Peak bit rate smoothing buffer size
+        skip_bits(&s->gb, 4);
+
+        // Number of bits for XLL decoding delay
+        xll_delay_nbits = get_bits(&s->gb, 5) + 1;
 
-    for (i = 0; i < channels; i++) {
-        int mix_map_mask = get_bits(gb, out_ch);
-        int num_coeffs = av_popcount(mix_map_mask);
-        skip_bits_long(gb, num_coeffs * 6);
+        // Initial XLL decoding delay in frames
+        asset->xll_delay_nframes = get_bits_long(&s->gb, xll_delay_nbits);
+
+        // Number of bytes offset to XLL sync
+        asset->xll_sync_offset = get_bits(&s->gb, s->exss_size_nbits);
+    } else {
+        asset->xll_delay_nframes = 0;
+        asset->xll_sync_offset = 0;
     }
 }
 
-/**
- * Parse extension substream asset header (HD)
- */
-static int dca_exss_parse_asset_header(DCAContext *s)
+static void parse_lbr_parameters(DCAExssParser *s, DCAExssAsset *asset)
 {
-    int header_pos = get_bits_count(&s->gb);
-    int header_size;
-    int channels = 0;
-    int embedded_stereo = 0;
-    int embedded_6ch    = 0;
-    int drc_code_present;
-    int extensions_mask = 0;
-    int i, j;
-
-    if (get_bits_left(&s->gb) < 16)
-        return AVERROR_INVALIDDATA;
+    // Size of LBR component in extension substream
+    asset->lbr_size = get_bits(&s->gb, 14) + 1;
+
+    // LBR sync word present flag
+    if (get_bits1(&s->gb))
+        // LBR sync distance
+        skip_bits(&s->gb, 2);
+}
 
-    /* We will parse just enough to get to the extensions bitmask with which
-     * we can set the profile value. */
+static int parse_descriptor(DCAExssParser *s, DCAExssAsset *asset)
+{
+    int i, j, drc_present, descr_size, descr_pos = get_bits_count(&s->gb);
+
+    // Size of audio asset descriptor in bytes
+    descr_size = get_bits(&s->gb, 9) + 1;
+
+    // Audio asset identifier
+    asset->asset_index = get_bits(&s->gb, 3);
 
-    header_size = get_bits(&s->gb, 9) + 1;
-    skip_bits(&s->gb, 3); // asset index
+    //
+    // Per stream static metadata
+    //
 
-    if (s->static_fields) {
+    if (s->static_fields_present) {
+        // Asset type descriptor presence
         if (get_bits1(&s->gb))
-            skip_bits(&s->gb, 4); // asset type descriptor
+            // Asset type descriptor
+            skip_bits(&s->gb, 4);
+
+        // Language descriptor presence
         if (get_bits1(&s->gb))
-            skip_bits_long(&s->gb, 24); // language descriptor
+            // Language descriptor
+            skip_bits(&s->gb, 24);
 
+        // Additional textual information presence
         if (get_bits1(&s->gb)) {
-            /* How can one fit 1024 bytes of text here if the maximum value
-             * for the asset header size field above was 512 bytes? */
-            int text_length = get_bits(&s->gb, 10) + 1;
-            if (get_bits_left(&s->gb) < text_length * 8)
+            // Byte size of additional text info
+            int text_size = get_bits(&s->gb, 10) + 1;
+
+            // Sanity check available size
+            if (get_bits_left(&s->gb) < text_size * 8)
                 return AVERROR_INVALIDDATA;
-            skip_bits_long(&s->gb, text_length * 8); // info text
+
+            // Additional textual information string
+            skip_bits_long(&s->gb, text_size * 8);
         }
 
-        skip_bits(&s->gb, 5); // bit resolution - 1
-        skip_bits(&s->gb, 4); // max sample rate code
-        channels = get_bits(&s->gb, 8) + 1;
+        // PCM bit resolution
+        asset->pcm_bit_res = get_bits(&s->gb, 5) + 1;
 
-        s->one2one_map_chtospkr = get_bits1(&s->gb);
-        if (s->one2one_map_chtospkr) {
-            int spkr_remap_sets;
-            int spkr_mask_size = 16;
-            int num_spkrs[7];
+        // Maximum sample rate
+        asset->max_sample_rate = ff_dca_sampling_freqs[get_bits(&s->gb, 4)];
 
-            if (channels > 2)
-                embedded_stereo = get_bits1(&s->gb);
-            if (channels > 6)
-                embedded_6ch = get_bits1(&s->gb);
+        // Total number of channels
+        asset->nchannels_total = get_bits(&s->gb, 8) + 1;
 
-            if (get_bits1(&s->gb)) {
-                spkr_mask_size = (get_bits(&s->gb, 2) + 1) << 2;
-                skip_bits(&s->gb, spkr_mask_size); // spkr activity mask
-            }
+        // One to one map channel to speakers
+        if (asset->one_to_one_map_ch_to_spkr = get_bits1(&s->gb)) {
+            int spkr_mask_nbits = 0;
+            int spkr_remap_nsets;
+            int nspeakers[8];
+
+            // Embedded stereo flag
+            if (asset->nchannels_total > 2)
+                asset->embedded_stereo = get_bits1(&s->gb);
+
+            // Embedded 6 channels flag
+            if (asset->nchannels_total > 6)
+                asset->embedded_6ch = get_bits1(&s->gb);
 
-            spkr_remap_sets = get_bits(&s->gb, 3);
+            // Speaker mask enabled flag
+            if (asset->spkr_mask_enabled = get_bits1(&s->gb)) {
+                // Number of bits for speaker activity mask
+                spkr_mask_nbits = (get_bits(&s->gb, 2) + 1) << 2;
 
-            for (i = 0; i < spkr_remap_sets; i++) {
-                /* std layout mask for each remap set */
-                num_spkrs[i] = dca_exss_mask2count(get_bits(&s->gb, spkr_mask_size));
+                // Loudspeaker activity mask
+                asset->spkr_mask = get_bits(&s->gb, spkr_mask_nbits);
+            }
+
+            // Number of speaker remapping sets
+            if ((spkr_remap_nsets = get_bits(&s->gb, 3)) && !spkr_mask_nbits) {
+                av_log(s->avctx, AV_LOG_ERROR, "Speaker mask disabled yet there are remapping sets\n");
+                return AVERROR_INVALIDDATA;
             }
 
-            for (i = 0; i < spkr_remap_sets; i++) {
-                int num_dec_ch_remaps = get_bits(&s->gb, 5) + 1;
-                if (get_bits_left(&s->gb) < 0)
-                    return AVERROR_INVALIDDATA;
+            // Standard loudspeaker layout mask
+            for (i = 0; i < spkr_remap_nsets; i++)
+                nspeakers[i] = count_chs_for_mask(get_bits(&s->gb, spkr_mask_nbits));
 
-                for (j = 0; j < num_spkrs[i]; j++) {
-                    int remap_dec_ch_mask = get_bits_long(&s->gb, num_dec_ch_remaps);
-                    int num_dec_ch = av_popcount(remap_dec_ch_mask);
-                    skip_bits_long(&s->gb, num_dec_ch * 5); // remap codes
+            for (i = 0; i < spkr_remap_nsets; i++) {
+                // Number of channels to be decoded for speaker remapping
+                int nch_for_remaps = get_bits(&s->gb, 5) + 1;
+
+                for (j = 0; j < nspeakers[i]; j++) {
+                    // Decoded channels to output speaker mapping mask
+                    int remap_ch_mask = get_bits_long(&s->gb, nch_for_remaps);
+
+                    // Loudspeaker remapping codes
+                    skip_bits_long(&s->gb, av_popcount(remap_ch_mask) * 5);
                 }
             }
         } else {
-            skip_bits(&s->gb, 3); // representation type
+            asset->embedded_stereo = 0;
+            asset->embedded_6ch = 0;
+            asset->spkr_mask_enabled = 0;
+            asset->spkr_mask = 0;
+
+            // Representation type
+            asset->representation_type = get_bits(&s->gb, 3);
         }
     }
 
-    drc_code_present = get_bits1(&s->gb);
-    if (drc_code_present)
-        get_bits(&s->gb, 8); // drc code
+    //
+    // DRC, DNC and mixing metadata
+    //
 
+    // Dynamic range coefficient presence flag
+    drc_present = get_bits1(&s->gb);
+
+    // Code for dynamic range coefficient
+    if (drc_present)
+        skip_bits(&s->gb, 8);
+
+    // Dialog normalization presence flag
     if (get_bits1(&s->gb))
-        skip_bits(&s->gb, 5); // dialog normalization code
+        // Dialog normalization code
+        skip_bits(&s->gb, 5);
+
+    // DRC for stereo downmix
+    if (drc_present && asset->embedded_stereo)
+        skip_bits(&s->gb, 8);
 
-    if (drc_code_present && embedded_stereo)
-        get_bits(&s->gb, 8); // drc stereo code
+    // Mixing metadata presence flag
+    if (s->mix_metadata_enabled && get_bits1(&s->gb)) {
+        int nchannels_dmix;
 
-    if (s->mix_metadata && get_bits1(&s->gb)) {
-        skip_bits(&s->gb, 1); // external mix
-        skip_bits(&s->gb, 6); // post mix gain code
+        // External mixing flag
+        skip_bits1(&s->gb);
 
-        if (get_bits(&s->gb, 2) != 3) // mixer drc code
-            skip_bits(&s->gb, 3); // drc limit
+        // Post mixing / replacement gain adjustment
+        skip_bits(&s->gb, 6);
+
+        // DRC prior to mixing
+        if (get_bits(&s->gb, 2) == 3)
+            // Custom code for mixing DRC
+            skip_bits(&s->gb, 8);
         else
-            skip_bits(&s->gb, 8); // custom drc code
+            // Limit for mixing DRC
+            skip_bits(&s->gb, 3);
 
-        if (get_bits1(&s->gb)) // channel specific scaling
-            for (i = 0; i < s->num_mix_configs; i++)
-                skip_bits_long(&s->gb, s->mix_config_num_ch[i] * 6); // scale codes
+        // Scaling type for channels of main audio
+        // Scaling parameters of main audio
+        if (get_bits1(&s->gb))
+            for (i = 0; i < s->nmixoutconfigs; i++)
+                skip_bits_long(&s->gb, 6 * s->nmixoutchs[i]);
         else
-            skip_bits_long(&s->gb, s->num_mix_configs * 6); // scale codes
+            skip_bits_long(&s->gb, 6 * s->nmixoutconfigs);
+
+        nchannels_dmix = asset->nchannels_total;
+        if (asset->embedded_6ch)
+            nchannels_dmix += 6;
+        if (asset->embedded_stereo)
+            nchannels_dmix += 2;
 
-        for (i = 0; i < s->num_mix_configs; i++) {
-            if (get_bits_left(&s->gb) < 0)
+        for (i = 0; i < s->nmixoutconfigs; i++) {
+            if (!s->nmixoutchs[i]) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid speaker layout mask for mixing configuration\n");
                 return AVERROR_INVALIDDATA;
-            dca_exss_skip_mix_coeffs(&s->gb, channels, s->mix_config_num_ch[i]);
-            if (embedded_6ch)
-                dca_exss_skip_mix_coeffs(&s->gb, 6, s->mix_config_num_ch[i]);
-            if (embedded_stereo)
-                dca_exss_skip_mix_coeffs(&s->gb, 2, s->mix_config_num_ch[i]);
+            }
+            for (j = 0; j < nchannels_dmix; j++) {
+                // Mix output mask
+                int mix_map_mask = get_bits(&s->gb, s->nmixoutchs[i]);
+
+                // Mixing coefficients
+                skip_bits_long(&s->gb, av_popcount(mix_map_mask) * 6);
+            }
         }
     }
 
-    switch (get_bits(&s->gb, 2)) {
-    case 0:
-        extensions_mask = get_bits(&s->gb, 12);
+    //
+    // Decoder navigation data
+    //
+
+    // Coding mode for the asset
+    asset->coding_mode = get_bits(&s->gb, 2);
+
+    // Coding components used in asset
+    switch (asset->coding_mode) {
+    case 0: // Coding mode that may contain multiple coding components
+        asset->extension_mask = get_bits(&s->gb, 12);
+
+        if (asset->extension_mask & DCA_EXSS_CORE) {
+            // Size of core component in extension substream
+            asset->core_size = get_bits(&s->gb, 14) + 1;
+            // Core sync word present flag
+            if (get_bits1(&s->gb))
+                // Core sync distance
+                skip_bits(&s->gb, 2);
+        }
+
+        if (asset->extension_mask & DCA_EXSS_XBR)
+            // Size of XBR extension in extension substream
+            asset->xbr_size = get_bits(&s->gb, 14) + 1;
+
+        if (asset->extension_mask & DCA_EXSS_XXCH)
+            // Size of XXCH extension in extension substream
+            asset->xxch_size = get_bits(&s->gb, 14) + 1;
+
+        if (asset->extension_mask & DCA_EXSS_X96)
+            // Size of X96 extension in extension substream
+            asset->x96_size = get_bits(&s->gb, 12) + 1;
+
+        if (asset->extension_mask & DCA_EXSS_LBR)
+            parse_lbr_parameters(s, asset);
+
+        if (asset->extension_mask & DCA_EXSS_XLL)
+            parse_xll_parameters(s, asset);
+
+        if (asset->extension_mask & DCA_EXSS_RSV1)
+            skip_bits(&s->gb, 16);
+
+        if (asset->extension_mask & DCA_EXSS_RSV2)
+            skip_bits(&s->gb, 16);
         break;
-    case 1:
-        extensions_mask = DCA_EXT_EXSS_XLL;
+
+    case 1: // Loss-less coding mode without CBR component
+        asset->extension_mask = DCA_EXSS_XLL;
+        parse_xll_parameters(s, asset);
         break;
-    case 2:
-        extensions_mask = DCA_EXT_EXSS_LBR;
+
+    case 2: // Low bit rate mode
+        asset->extension_mask = DCA_EXSS_LBR;
+        parse_lbr_parameters(s, asset);
         break;
-    case 3:
-        extensions_mask = 0; /* aux coding */
+
+    case 3: // Auxiliary coding mode
+        asset->extension_mask = 0;
+
+        // Size of auxiliary coded data
+        skip_bits(&s->gb, 14);
+
+        // Auxiliary codec identification
+        skip_bits(&s->gb, 8);
+
+        // Aux sync word present flag
+        if (get_bits1(&s->gb))
+            // Aux sync distance
+            skip_bits(&s->gb, 3);
         break;
     }
 
-    /* not parsed further, we were only interested in the extensions mask */
-
-    if (get_bits_left(&s->gb) < 0)
+    if (asset->extension_mask & DCA_EXSS_XLL)
+        // DTS-HD stream ID
+        asset->hd_stream_id = get_bits(&s->gb, 3);
+
+    // One to one mixing flag
+    // Per channel main audio scaling flag
+    // Main audio scaling codes
+    // Decode asset in secondary decoder flag
+    // Revision 2 DRC metadata
+    // Reserved
+    // Zero pad
+    if (ff_dca_seek_bits(&s->gb, descr_pos + descr_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of EXSS asset descriptor\n");
         return AVERROR_INVALIDDATA;
+    }
 
-    if (get_bits_count(&s->gb) - header_pos > header_size * 8) {
-        av_log(s->avctx, AV_LOG_WARNING, "Asset header size mismatch.\n");
-        return AVERROR_INVALIDDATA;
+    return 0;
+}
+
+static int set_exss_offsets(DCAExssAsset *asset)
+{
+    int offs = asset->asset_offset;
+    int size = asset->asset_size;
+
+    if (asset->extension_mask & DCA_EXSS_CORE) {
+        asset->core_offset = offs;
+        if (asset->core_size > size)
+            return AVERROR_INVALIDDATA;
+        offs += asset->core_size;
+        size -= asset->core_size;
+    }
+
+    if (asset->extension_mask & DCA_EXSS_XBR) {
+        asset->xbr_offset = offs;
+        if (asset->xbr_size > size)
+            return AVERROR_INVALIDDATA;
+        offs += asset->xbr_size;
+        size -= asset->xbr_size;
     }
-    skip_bits_long(&s->gb, header_pos + header_size * 8 - get_bits_count(&s->gb));
 
-    if (extensions_mask & DCA_EXT_EXSS_XLL)
-        s->profile = FF_PROFILE_DTS_HD_MA;
-    else if (extensions_mask & (DCA_EXT_EXSS_XBR | DCA_EXT_EXSS_X96 |
-                                DCA_EXT_EXSS_XXCH))
-        s->profile = FF_PROFILE_DTS_HD_HRA;
+    if (asset->extension_mask & DCA_EXSS_XXCH) {
+        asset->xxch_offset = offs;
+        if (asset->xxch_size > size)
+            return AVERROR_INVALIDDATA;
+        offs += asset->xxch_size;
+        size -= asset->xxch_size;
+    }
+
+    if (asset->extension_mask & DCA_EXSS_X96) {
+        asset->x96_offset = offs;
+        if (asset->x96_size > size)
+            return AVERROR_INVALIDDATA;
+        offs += asset->x96_size;
+        size -= asset->x96_size;
+    }
 
-    if (!(extensions_mask & DCA_EXT_CORE))
-        av_log(s->avctx, AV_LOG_WARNING, "DTS core detection mismatch.\n");
-    if ((extensions_mask & DCA_CORE_EXTS) != s->core_ext_mask)
-        av_log(s->avctx, AV_LOG_WARNING,
-               "DTS extensions detection mismatch (%d, %d)\n",
-               extensions_mask & DCA_CORE_EXTS, s->core_ext_mask);
+    if (asset->extension_mask & DCA_EXSS_LBR) {
+        asset->lbr_offset = offs;
+        if (asset->lbr_size > size)
+            return AVERROR_INVALIDDATA;
+        offs += asset->lbr_size;
+        size -= asset->lbr_size;
+    }
+
+    if (asset->extension_mask & DCA_EXSS_XLL) {
+        asset->xll_offset = offs;
+        if (asset->xll_size > size)
+            return AVERROR_INVALIDDATA;
+        offs += asset->xll_size;
+        size -= asset->xll_size;
+    }
 
     return 0;
 }
 
-/**
- * Parse extension substream header (HD)
- */
-void ff_dca_exss_parse_header(DCAContext *s)
+int ff_dca_exss_parse(DCAExssParser *s, uint8_t *data, int size)
 {
-    int asset_size[8];
-    int ss_index;
-    int blownup;
-    int num_audiop = 1;
-    int num_assets = 1;
-    int active_ss_mask[8];
-    int i, j;
-    int start_pos;
-    int hdrsize;
-    uint32_t mkr;
-
-    if (get_bits_left(&s->gb) < 52)
-        return;
-
-    start_pos = get_bits_count(&s->gb) - 32;
-
-    skip_bits(&s->gb, 8); // user data
-    ss_index = get_bits(&s->gb, 2);
-
-    blownup = get_bits1(&s->gb);
-    hdrsize = get_bits(&s->gb,  8 + 4 * blownup) + 1; // header_size
-    skip_bits(&s->gb, 16 + 4 * blownup); // hd_size
-
-    s->static_fields = get_bits1(&s->gb);
-    if (s->static_fields) {
-        skip_bits(&s->gb, 2); // reference clock code
-        skip_bits(&s->gb, 3); // frame duration code
+    int i, ret, offset, wide_hdr, header_size;
 
-        if (get_bits1(&s->gb))
-            skip_bits_long(&s->gb, 36); // timestamp
+    if ((ret = init_get_bits8(&s->gb, data, size)) < 0)
+        return ret;
+
+    // Extension substream sync word
+    skip_bits_long(&s->gb, 32);
+
+    // User defined bits
+    skip_bits(&s->gb, 8);
+
+    // Extension substream index
+    s->exss_index = get_bits(&s->gb, 2);
+
+    // Flag indicating short or long header size
+    wide_hdr = get_bits1(&s->gb);
 
-        /* a single stream can contain multiple audio assets that can be
-         * combined to form multiple audio presentations */
+    // Extension substream header length
+    header_size = get_bits(&s->gb, 8 + 4 * wide_hdr) + 1;
 
-        num_audiop = get_bits(&s->gb, 3) + 1;
-        if (num_audiop > 1) {
-            avpriv_request_sample(s->avctx,
-                                  "Multiple DTS-HD audio presentations");
-            /* ignore such streams for now */
-            return;
+    // Check CRC
+    if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL))
+        && ff_dca_check_crc(&s->gb, 32 + 8, header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid EXSS header checksum\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->exss_size_nbits = 16 + 4 * wide_hdr;
+
+    // Number of bytes of extension substream
+    s->exss_size = get_bits(&s->gb, s->exss_size_nbits) + 1;
+    if (s->exss_size > size) {
+        av_log(s->avctx, AV_LOG_ERROR, "Packet too short for EXSS frame\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Per stream static fields presence flag
+    if (s->static_fields_present = get_bits1(&s->gb)) {
+        int active_exss_mask[8];
+
+        // Reference clock code
+        skip_bits(&s->gb, 2);
+
+        // Extension substream frame duration
+        skip_bits(&s->gb, 3);
+
+        // Timecode presence flag
+        if (get_bits1(&s->gb))
+            // Timecode data
+            skip_bits_long(&s->gb, 36);
+
+        // Number of defined audio presentations
+        s->npresents = get_bits(&s->gb, 3) + 1;
+        if (s->npresents > 1) {
+            avpriv_request_sample(s->avctx, "%d audio presentations", s->npresents);
+            return AVERROR_PATCHWELCOME;
         }
 
-        num_assets = get_bits(&s->gb, 3) + 1;
-        if (num_assets > 1) {
-            avpriv_request_sample(s->avctx, "Multiple DTS-HD audio assets");
-            /* ignore such streams for now */
-            return;
+        // Number of audio assets in extension substream
+        s->nassets = get_bits(&s->gb, 3) + 1;
+        if (s->nassets > 1) {
+            avpriv_request_sample(s->avctx, "%d audio assets", s->nassets);
+            return AVERROR_PATCHWELCOME;
         }
 
-        for (i = 0; i < num_audiop; i++)
-            active_ss_mask[i] = get_bits(&s->gb, ss_index + 1);
+        // Active extension substream mask for audio presentation
+        for (i = 0; i < s->npresents; i++)
+            active_exss_mask[i] = get_bits(&s->gb, s->exss_index + 1);
 
-        for (i = 0; i < num_audiop; i++)
-            for (j = 0; j <= ss_index; j++)
-                if (active_ss_mask[i] & (1 << j))
-                    skip_bits(&s->gb, 8); // active asset mask
+        // Active audio asset mask
+        for (i = 0; i < s->npresents; i++)
+            skip_bits_long(&s->gb, av_popcount(active_exss_mask[i]) * 8);
 
-        s->mix_metadata = get_bits1(&s->gb);
-        if (s->mix_metadata) {
-            int mix_out_mask_size;
+        // Mixing metadata enable flag
+        if (s->mix_metadata_enabled = get_bits1(&s->gb)) {
+            int spkr_mask_nbits;
 
-            skip_bits(&s->gb, 2); // adjustment level
-            mix_out_mask_size  = (get_bits(&s->gb, 2) + 1) << 2;
-            s->num_mix_configs =  get_bits(&s->gb, 2) + 1;
+            // Mixing metadata adjustment level
+            skip_bits(&s->gb, 2);
 
-            for (i = 0; i < s->num_mix_configs; i++) {
-                int mix_out_mask        = get_bits(&s->gb, mix_out_mask_size);
-                s->mix_config_num_ch[i] = dca_exss_mask2count(mix_out_mask);
-            }
+            // Number of bits for mixer output speaker activity mask
+            spkr_mask_nbits = (get_bits(&s->gb, 2) + 1) << 2;
+
+            // Number of mixing configurations
+            s->nmixoutconfigs = get_bits(&s->gb, 2) + 1;
+
+            // Speaker layout mask for mixer output channels
+            for (i = 0; i < s->nmixoutconfigs; i++)
+                s->nmixoutchs[i] = count_chs_for_mask(get_bits(&s->gb, spkr_mask_nbits));
         }
+    } else {
+        s->npresents = 1;
+        s->nassets = 1;
     }
 
-    av_assert0(num_assets > 0); // silence a warning
-
-    for (i = 0; i < num_assets; i++)
-        asset_size[i] = get_bits_long(&s->gb, 16 + 4 * blownup) + 1;
+    // Size of encoded asset data in bytes
+    offset = header_size;
+    for (i = 0; i < s->nassets; i++) {
+        s->assets[i].asset_offset = offset;
+        s->assets[i].asset_size = get_bits(&s->gb, s->exss_size_nbits) + 1;
+        offset += s->assets[i].asset_size;
+        if (offset > s->exss_size) {
+            av_log(s->avctx, AV_LOG_ERROR, "EXSS asset out of bounds\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
 
-    for (i = 0; i < num_assets; i++) {
-        if (dca_exss_parse_asset_header(s))
-            return;
+    // Audio asset descriptor
+    for (i = 0; i < s->nassets; i++) {
+        if ((ret = parse_descriptor(s, &s->assets[i])) < 0)
+            return ret;
+        if ((ret = set_exss_offsets(&s->assets[i])) < 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid extension size in EXSS asset descriptor\n");
+            return ret;
+        }
     }
 
-        j = get_bits_count(&s->gb);
-        if (start_pos + hdrsize * 8 > j)
-            skip_bits_long(&s->gb, start_pos + hdrsize * 8 - j);
-
-        for (i = 0; i < num_assets; i++) {
-            int end_pos;
-            start_pos = get_bits_count(&s->gb);
-            end_pos   = start_pos + asset_size[i] * 8;
-            mkr       = get_bits_long(&s->gb, 32);
-
-            /* parse extensions that we know about */
-            switch (mkr) {
-            case DCA_SYNCWORD_XBR:
-                ff_dca_xbr_parse_frame(s);
-                break;
-            case DCA_SYNCWORD_XXCH:
-                ff_dca_xxch_decode_frame(s);
-                s->core_ext_mask |= DCA_EXT_XXCH; /* xxx use for chan reordering */
-                break;
-            case DCA_SYNCWORD_XLL:
-                if (s->xll_disable) {
-                    av_log(s->avctx, AV_LOG_DEBUG,
-                           "DTS-XLL: ignoring XLL extension\n");
-                    break;
-                }
-                av_log(s->avctx, AV_LOG_DEBUG,
-                       "DTS-XLL: decoding XLL extension\n");
-                if (ff_dca_xll_decode_header(s)        == 0 &&
-                    ff_dca_xll_decode_navi(s, end_pos) == 0)
-                    s->exss_ext_mask |= DCA_EXT_EXSS_XLL;
-                break;
-            default:
-                av_log(s->avctx, AV_LOG_DEBUG,
-                       "DTS-ExSS: unknown marker = 0x%08x\n", mkr);
-            }
+    // Backward compatible core present
+    // Backward compatible core substream index
+    // Backward compatible core asset index
+    // Reserved
+    // Byte align
+    // CRC16 of extension substream header
+    if (ff_dca_seek_bits(&s->gb, header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of EXSS header\n");
+        return AVERROR_INVALIDDATA;
+    }
 
-            /* skip to end of block */
-            j = get_bits_count(&s->gb);
-            if (j > end_pos)
-                av_log(s->avctx, AV_LOG_ERROR,
-                       "DTS-ExSS: Processed asset too long.\n");
-            if (j < end_pos)
-                skip_bits_long(&s->gb, end_pos - j);
-        }
+    return 0;
 }
diff --git a/libavcodec/dca_exss.h b/libavcodec/dca_exss.h
new file mode 100644
index 00000000..323063aa
--- /dev/null
+++ b/libavcodec/dca_exss.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016 foo86
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DCA_EXSS_H
+#define AVCODEC_DCA_EXSS_H
+
+#include "libavutil/common.h"
+
+#include "avcodec.h"
+#include "get_bits.h"
+
+typedef struct DCAExssAsset {
+    int     asset_offset;   ///< Offset to asset data from start of substream
+    int     asset_size;     ///< Size of encoded asset data
+    int     asset_index;    ///< Audio asset identifier
+
+    int     pcm_bit_res;                ///< PCM bit resolution
+    int     max_sample_rate;            ///< Maximum sample rate
+    int     nchannels_total;            ///< Total number of channels
+    int     one_to_one_map_ch_to_spkr;  ///< One to one channel to speaker mapping flag
+    int     embedded_stereo;            ///< Embedded stereo flag
+    int     embedded_6ch;               ///< Embedded 6 channels flag
+    int     spkr_mask_enabled;          ///< Speaker mask enabled flag
+    int     spkr_mask;                  ///< Loudspeaker activity mask
+    int     representation_type;        ///< Representation type
+
+    int     coding_mode;        ///< Coding mode for the asset
+    int     extension_mask;     ///< Coding components used in asset
+
+    int     core_offset;    ///< Offset to core component from start of substream
+    int     core_size;      ///< Size of core component in extension substream
+
+    int     xbr_offset;     ///< Offset to XBR extension from start of substream
+    int     xbr_size;       ///< Size of XBR extension in extension substream
+
+    int     xxch_offset;    ///< Offset to XXCH extension from start of substream
+    int     xxch_size;      ///< Size of XXCH extension in extension substream
+
+    int     x96_offset;     ///< Offset to X96 extension from start of substream
+    int     x96_size;       ///< Size of X96 extension in extension substream
+
+    int     lbr_offset;     ///< Offset to LBR component from start of substream
+    int     lbr_size;       ///< Size of LBR component in extension substream
+
+    int     xll_offset;         ///< Offset to XLL data from start of substream
+    int     xll_size;           ///< Size of XLL data in extension substream
+    int     xll_sync_present;   ///< XLL sync word present flag
+    int     xll_delay_nframes;  ///< Initial XLL decoding delay in frames
+    int     xll_sync_offset;    ///< Number of bytes offset to XLL sync
+
+    int     hd_stream_id;   ///< DTS-HD stream ID
+} DCAExssAsset;
+
+typedef struct DCAExssParser {
+    AVCodecContext  *avctx;
+    GetBitContext   gb;
+
+    int     exss_index;         ///< Extension substream index
+    int     exss_size_nbits;    ///< Number of bits for extension substream size
+    int     exss_size;          ///< Number of bytes of extension substream
+
+    int     static_fields_present;  ///< Per stream static fields presence flag
+    int     npresents;  ///< Number of defined audio presentations
+    int     nassets;    ///< Number of audio assets in extension substream
+
+    int     mix_metadata_enabled;   ///< Mixing metadata enable flag
+    int     nmixoutconfigs;         ///< Number of mixing configurations
+    int     nmixoutchs[4];          ///< Speaker layout mask for mixer output channels
+
+    DCAExssAsset   assets[1];    ///< Audio asset descriptors
+} DCAExssParser;
+
+int ff_dca_exss_parse(DCAExssParser *s, uint8_t *data, int size);
+
+#endif
diff --git a/libavcodec/dca_parser.c b/libavcodec/dca_parser.c
index 9dafe706..bde7dfe8 100644
--- a/libavcodec/dca_parser.c
+++ b/libavcodec/dca_parser.c
@@ -113,7 +113,7 @@ static int dca_parse_params(const uint8_t *buf, int buf_size, int *duration,
                             int *sample_rate, int *framesize)
 {
     GetBitContext gb;
-    uint8_t hdr[12 + FF_INPUT_BUFFER_PADDING_SIZE] = { 0 };
+    uint8_t hdr[12 + AV_INPUT_BUFFER_PADDING_SIZE] = { 0 };
     int ret, sample_blocks, sr_code;
 
     if (buf_size < 12)
@@ -165,8 +165,9 @@ static int dca_parse(AVCodecParserContext *s, AVCodecContext *avctx,
 
     /* read the duration and sample rate from the frame header */
     if (!dca_parse_params(buf, buf_size, &duration, &sample_rate, &pc1->framesize)) {
-        s->duration        = duration;
-        avctx->sample_rate = sample_rate;
+        if (!avctx->sample_rate)
+            avctx->sample_rate = sample_rate;
+        s->duration = av_rescale(duration, avctx->sample_rate, sample_rate);
     } else
         s->duration = 0;
 
diff --git a/libavcodec/dca_syncwords.h b/libavcodec/dca_syncwords.h
index 3466b6bc..4d2cd5f5 100644
--- a/libavcodec/dca_syncwords.h
+++ b/libavcodec/dca_syncwords.h
@@ -19,19 +19,18 @@
 #ifndef AVCODEC_DCA_SYNCWORDS_H
 #define AVCODEC_DCA_SYNCWORDS_H
 
-enum DCASyncwords {
-    DCA_SYNCWORD_CORE_BE        = 0x7FFE8001U,
-    DCA_SYNCWORD_CORE_LE        = 0xFE7F0180U,
-    DCA_SYNCWORD_CORE_14B_BE    = 0x1FFFE800U,
-    DCA_SYNCWORD_CORE_14B_LE    = 0xFF1F00E8U,
-    DCA_SYNCWORD_XCH            = 0x5A5A5A5AU,
-    DCA_SYNCWORD_XXCH           = 0x47004A03U,
-    DCA_SYNCWORD_X96            = 0x1D95F262U,
-    DCA_SYNCWORD_XBR            = 0x655E315EU,
-    DCA_SYNCWORD_LBR            = 0x0A801921U,
-    DCA_SYNCWORD_XLL            = 0x41A29547U,
-    DCA_SYNCWORD_SUBSTREAM      = 0x64582025U,
-    DCA_SYNCWORD_SUBSTREAM_CORE = 0x02B09261U,
-};
+#define    DCA_SYNCWORD_CORE_BE              0x7FFE8001U
+#define    DCA_SYNCWORD_CORE_LE              0xFE7F0180U
+#define    DCA_SYNCWORD_CORE_14B_BE          0x1FFFE800U
+#define    DCA_SYNCWORD_CORE_14B_LE          0xFF1F00E8U
+#define    DCA_SYNCWORD_XCH                  0x5A5A5A5AU
+#define    DCA_SYNCWORD_XXCH                 0x47004A03U
+#define    DCA_SYNCWORD_X96                  0x1D95F262U
+#define    DCA_SYNCWORD_XBR                  0x655E315EU
+#define    DCA_SYNCWORD_LBR                  0x0A801921U
+#define    DCA_SYNCWORD_XLL                  0x41A29547U
+#define    DCA_SYNCWORD_SUBSTREAM            0x64582025U
+#define    DCA_SYNCWORD_SUBSTREAM_CORE       0x02B09261U
+#define    DCA_SYNCWORD_REV1AUX              0x9A1105A0U
 
 #endif /* AVCODEC_DCA_SYNCWORDS_H */
diff --git a/libavcodec/dca_xll.c b/libavcodec/dca_xll.c
index 98fd4c8e..cd1af81d 100644
--- a/libavcodec/dca_xll.c
+++ b/libavcodec/dca_xll.c
@@ -1,8 +1,5 @@
 /*
- * DCA XLL extension
- *
- * Copyright (C) 2012 Paul B Mahol
- * Copyright (C) 2014 Niels Möller
+ * Copyright (C) 2016 foo86
  *
  * This file is part of FFmpeg.
  *
@@ -21,727 +18,1482 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/attributes.h"
-#include "libavutil/common.h"
-#include "libavutil/internal.h"
-
-#include "avcodec.h"
-#include "dca.h"
+#include "dcadec.h"
 #include "dcadata.h"
-#include "get_bits.h"
+#include "dcamath.h"
+#include "dca_syncwords.h"
 #include "unary.h"
 
-/* Sign as bit 0 */
-static inline int get_bits_sm(GetBitContext *s, unsigned n)
+static int get_linear(GetBitContext *gb, int n)
 {
-    int x = get_bits(s, n);
-    if (x & 1)
-        return -(x >> 1) - 1;
-    else
-        return x >> 1;
-}
-
-/* Return -1 on error. */
-static int32_t get_dmix_coeff(DCAContext *s, int inverse)
-{
-    unsigned code = get_bits(&s->gb, 9);
-    int32_t sign = (int32_t) (code >> 8) - 1;
-    unsigned idx = code & 0xff;
-    int inv_offset = FF_DCA_DMIXTABLE_SIZE -FF_DCA_INV_DMIXTABLE_SIZE;
-    if (idx >= FF_DCA_DMIXTABLE_SIZE) {
-        av_log(s->avctx, AV_LOG_ERROR,
-               "XLL: Invalid channel set downmix code %x\n", code);
-        return -1;
-    } else if (!inverse) {
-        return (ff_dca_dmixtable[idx] ^ sign) - sign;
-    } else if (idx < inv_offset) {
-        av_log(s->avctx, AV_LOG_ERROR,
-               "XLL: Invalid channel set inverse downmix code %x\n", code);
-        return -1;
-    } else {
-        return (ff_dca_inv_dmixtable[idx - inv_offset] ^ sign) - sign;
-    }
+    unsigned int v = get_bits_long(gb, n);
+    return (v >> 1) ^ -(v & 1);
+}
+
+static int get_rice_un(GetBitContext *gb, int k)
+{
+    unsigned int v = get_unary(gb, 1, 128);
+    return (v << k) | get_bits_long(gb, k);
 }
 
-static int32_t dca_get_dmix_coeff(DCAContext *s)
+static int get_rice(GetBitContext *gb, int k)
 {
-    return get_dmix_coeff(s, 0);
+    unsigned int v = get_rice_un(gb, k);
+    return (v >> 1) ^ -(v & 1);
 }
 
-static int32_t dca_get_inv_dmix_coeff(DCAContext *s)
+static void get_array(GetBitContext *gb, int32_t *array, int size, int n)
 {
-    return get_dmix_coeff(s, 1);
+    int i;
+
+    for (i = 0; i < size; i++)
+        array[i] = get_bits(gb, n);
 }
 
-/* parse XLL header */
-int ff_dca_xll_decode_header(DCAContext *s)
+static void get_linear_array(GetBitContext *gb, int32_t *array, int size, int n)
 {
-    int hdr_pos, hdr_size;
-    av_unused int version, frame_size;
-    int i, chset_index;
+    int i;
 
-    /* get bit position of sync header */
-    hdr_pos    = get_bits_count(&s->gb) - 32;
+    if (n == 0)
+        memset(array, 0, sizeof(*array) * size);
+    else for (i = 0; i < size; i++)
+        array[i] = get_linear(gb, n);
+}
 
-    version    = get_bits(&s->gb, 4) + 1;
-    hdr_size   = get_bits(&s->gb, 8) + 1;
+static void get_rice_array(GetBitContext *gb, int32_t *array, int size, int k)
+{
+    int i;
 
-    frame_size = get_bits_long(&s->gb, get_bits(&s->gb, 5) + 1) + 1;
+    for (i = 0; i < size; i++)
+        array[i] = get_rice(gb, k);
+}
 
-    s->xll_channels          =
-    s->xll_residual_channels = 0;
-    s->xll_nch_sets          = get_bits(&s->gb, 4) + 1;
-    s->xll_segments          = 1 << get_bits(&s->gb, 4);
-    s->xll_log_smpl_in_seg   = get_bits(&s->gb, 4);
-    s->xll_smpl_in_seg       = 1 << s->xll_log_smpl_in_seg;
-    s->xll_bits4seg_size     = get_bits(&s->gb, 5) + 1;
-    s->xll_banddata_crc      = get_bits(&s->gb, 2);
-    s->xll_scalable_lsb      = get_bits1(&s->gb);
-    s->xll_bits4ch_mask      = get_bits(&s->gb, 5) + 1;
+static int parse_dmix_coeffs(DCAXllDecoder *s, DCAXllChSet *c)
+{
+    // Size of downmix coefficient matrix
+    int m = c->primary_chset ? ff_dca_dmix_primary_nch[c->dmix_type] : c->hier_ofs;
+    int i, j, *coeff_ptr = c->dmix_coeff;
+
+    for (i = 0; i < m; i++) {
+        int code, sign, coeff, scale, scale_inv = 0;
+        unsigned int index;
+
+        // Downmix scale (only for non-primary channel sets)
+        if (!c->primary_chset) {
+            code = get_bits(&s->gb, 9);
+            sign = (code >> 8) - 1;
+            index = (code & 0xff) - FF_DCA_DMIXTABLE_OFFSET;
+            if (index >= FF_DCA_INV_DMIXTABLE_SIZE) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL downmix scale index\n");
+                return AVERROR_INVALIDDATA;
+            }
+            scale = ff_dca_dmixtable[index + FF_DCA_DMIXTABLE_OFFSET];
+            scale_inv = ff_dca_inv_dmixtable[index];
+            c->dmix_scale[i] = (scale ^ sign) - sign;
+            c->dmix_scale_inv[i] = (scale_inv ^ sign) - sign;
+        }
 
-    if (s->xll_scalable_lsb) {
-        s->xll_fixed_lsb_width = get_bits(&s->gb, 4);
-        if (s->xll_fixed_lsb_width)
-            av_log(s->avctx, AV_LOG_WARNING,
-                   "XLL: fixed lsb width = %d, non-zero not supported.\n",
-                   s->xll_fixed_lsb_width);
+        // Downmix coefficients
+        for (j = 0; j < c->nchannels; j++) {
+            code = get_bits(&s->gb, 9);
+            sign = (code >> 8) - 1;
+            index = code & 0xff;
+            if (index >= FF_DCA_DMIXTABLE_SIZE) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL downmix coefficient index\n");
+                return AVERROR_INVALIDDATA;
+            }
+            coeff = ff_dca_dmixtable[index];
+            if (!c->primary_chset)
+                // Multiply by |InvDmixScale| to get |UndoDmixScale|
+                coeff = mul16(scale_inv, coeff);
+            *coeff_ptr++ = (coeff ^ sign) - sign;
+        }
     }
-    /* skip to the end of the common header */
-    i = get_bits_count(&s->gb);
-    if (hdr_pos + hdr_size * 8 > i)
-        skip_bits_long(&s->gb, hdr_pos + hdr_size * 8 - i);
 
-    for (chset_index = 0; chset_index < s->xll_nch_sets; chset_index++) {
-        XllChSetSubHeader *chset = &s->xll_chsets[chset_index];
-        hdr_pos  = get_bits_count(&s->gb);
-        hdr_size = get_bits(&s->gb, 10) + 1;
+    return 0;
+}
+
+static int chs_parse_header(DCAXllDecoder *s, DCAXllChSet *c, DCAExssAsset *asset)
+{
+    int i, j, k, ret, band, header_size, header_pos = get_bits_count(&s->gb);
+    DCAXllChSet *p = &s->chset[0];
+    DCAXllBand *b;
 
-        chset->channels           = get_bits(&s->gb, 4) + 1;
-        chset->residual_encode    = get_bits(&s->gb, chset->channels);
-        chset->bit_resolution     = get_bits(&s->gb, 5) + 1;
-        chset->bit_width          = get_bits(&s->gb, 5) + 1;
-        chset->sampling_frequency = ff_dca_sampling_freqs[get_bits(&s->gb, 4)];
-        chset->samp_freq_interp   = get_bits(&s->gb, 2);
-        chset->replacement_set    = get_bits(&s->gb, 2);
-        if (chset->replacement_set)
-            chset->active_replace_set = get_bits(&s->gb, 1);
+    // Size of channel set sub-header
+    header_size = get_bits(&s->gb, 10) + 1;
 
-        if (s->one2one_map_chtospkr) {
-            chset->primary_ch_set              = get_bits(&s->gb, 1);
-            chset->downmix_coeff_code_embedded = get_bits(&s->gb, 1);
-            if (chset->downmix_coeff_code_embedded) {
-                chset->downmix_embedded = get_bits(&s->gb, 1);
-                if (chset->primary_ch_set) {
-                    chset->downmix_type = get_bits(&s->gb, 3);
-                    if (chset->downmix_type > 6) {
-                        av_log(s->avctx, AV_LOG_ERROR,
-                               "XLL: Invalid channel set downmix type\n");
-                        return AVERROR_INVALIDDATA;
-                    }
-                }
+    // Check CRC
+    if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL))
+        && ff_dca_check_crc(&s->gb, header_pos, header_pos + header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL sub-header checksum\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Number of channels in the channel set
+    c->nchannels = get_bits(&s->gb, 4) + 1;
+    if (c->nchannels > DCA_XLL_CHANNELS_MAX) {
+        avpriv_request_sample(s->avctx, "%d XLL channels", c->nchannels);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    // Residual type
+    c->residual_encode = get_bits(&s->gb, c->nchannels);
+
+    // PCM bit resolution
+    c->pcm_bit_res = get_bits(&s->gb, 5) + 1;
+
+    // Storage unit width
+    c->storage_bit_res = get_bits(&s->gb, 5) + 1;
+    if (c->storage_bit_res != 16 && c->storage_bit_res != 24) {
+        avpriv_request_sample(s->avctx, "%d-bit XLL storage resolution", c->storage_bit_res);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if (c->pcm_bit_res > c->storage_bit_res) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid PCM bit resolution for XLL channel set (%d > %d)\n", c->pcm_bit_res, c->storage_bit_res);
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Original sampling frequency
+    c->freq = ff_dca_sampling_freqs[get_bits(&s->gb, 4)];
+    if (c->freq > 192000) {
+        avpriv_request_sample(s->avctx, "%d Hz XLL sampling frequency", c->freq);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    // Sampling frequency modifier
+    if (get_bits(&s->gb, 2)) {
+        avpriv_request_sample(s->avctx, "XLL sampling frequency modifier");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    // Which replacement set this channel set is member of
+    if (get_bits(&s->gb, 2)) {
+        avpriv_request_sample(s->avctx, "XLL replacement set");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if (asset->one_to_one_map_ch_to_spkr) {
+        // Primary channel set flag
+        c->primary_chset = get_bits1(&s->gb);
+        if (c->primary_chset != (c == p)) {
+            av_log(s->avctx, AV_LOG_ERROR, "The first (and only) XLL channel set must be primary\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        // Downmix coefficients present in stream
+        c->dmix_coeffs_present = get_bits1(&s->gb);
+
+        // Downmix already performed by encoder
+        c->dmix_embedded = c->dmix_coeffs_present && get_bits1(&s->gb);
+
+        // Downmix type
+        if (c->dmix_coeffs_present && c->primary_chset) {
+            c->dmix_type = get_bits(&s->gb, 3);
+            if (c->dmix_type >= DCA_DMIX_TYPE_COUNT) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL primary channel set downmix type\n");
+                return AVERROR_INVALIDDATA;
             }
-            chset->hier_chset = get_bits(&s->gb, 1);
-
-            if (chset->downmix_coeff_code_embedded) {
-                /* nDownmixCoeffs is specified as N * M. For a primary
-                 * channel set, it appears that N = number of
-                 * channels, and M is the number of downmix channels.
-                 *
-                 * For a non-primary channel set, N is specified as
-                 * number of channels + 1, and M is derived from the
-                 * channel set hierarchy, and at least in simple cases
-                 * M is the number of channels in preceding channel
-                 * sets. */
-                if (chset->primary_ch_set) {
-                    static const char dmix_table[7] = { 1, 2, 2, 3, 3, 4, 4 };
-                    chset->downmix_ncoeffs = chset->channels * dmix_table[chset->downmix_type];
-                } else
-                    chset->downmix_ncoeffs = (chset->channels + 1) * s->xll_channels;
-
-                if (chset->downmix_ncoeffs > DCA_XLL_DMIX_NCOEFFS_MAX) {
-                    avpriv_request_sample(s->avctx,
-                                          "XLL: More than %d downmix coefficients",
-                                          DCA_XLL_DMIX_NCOEFFS_MAX);
-                    return AVERROR_PATCHWELCOME;
-                } else if (chset->primary_ch_set) {
-                    for (i = 0; i < chset->downmix_ncoeffs; i++)
-                        if ((chset->downmix_coeffs[i] = dca_get_dmix_coeff(s)) == -1)
-                            return AVERROR_INVALIDDATA;
-                } else {
-                    unsigned c, r;
-                    for (c = 0, i = 0; c < s->xll_channels; c++, i += chset->channels + 1) {
-                        if ((chset->downmix_coeffs[i] = dca_get_inv_dmix_coeff(s)) == -1)
-                            return AVERROR_INVALIDDATA;
-                        for (r = 1; r <= chset->channels; r++) {
-                            int32_t coeff = dca_get_dmix_coeff(s);
-                            if (coeff == -1)
-                                return AVERROR_INVALIDDATA;
-                            chset->downmix_coeffs[i + r] =
-                                (chset->downmix_coeffs[i] * (int64_t) coeff + (1 << 15)) >> 16;
-                        }
-                    }
+        }
+
+        // Whether the channel set is part of a hierarchy
+        c->hier_chset = get_bits1(&s->gb);
+        if (!c->hier_chset && s->nchsets != 1) {
+            avpriv_request_sample(s->avctx, "XLL channel set outside of hierarchy");
+            return AVERROR_PATCHWELCOME;
+        }
+
+        // Downmix coefficients
+        if (c->dmix_coeffs_present && (ret = parse_dmix_coeffs(s, c)) < 0)
+            return ret;
+
+        // Channel mask enabled
+        if (!get_bits1(&s->gb)) {
+            avpriv_request_sample(s->avctx, "Disabled XLL channel mask");
+            return AVERROR_PATCHWELCOME;
+        }
+
+        // Channel mask for set
+        c->ch_mask = get_bits_long(&s->gb, s->ch_mask_nbits);
+        if (av_popcount(c->ch_mask) != c->nchannels) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL channel mask\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        // Build the channel to speaker map
+        for (i = 0, j = 0; i < s->ch_mask_nbits; i++)
+            if (c->ch_mask & (1U << i))
+                c->ch_remap[j++] = i;
+    } else {
+        // Mapping coeffs present flag
+        if (c->nchannels != 2 || s->nchsets != 1 || get_bits1(&s->gb)) {
+            avpriv_request_sample(s->avctx, "Custom XLL channel to speaker mapping");
+            return AVERROR_PATCHWELCOME;
+        }
+
+        // Setup for LtRt decoding
+        c->primary_chset = 1;
+        c->dmix_coeffs_present = 0;
+        c->dmix_embedded = 0;
+        c->hier_chset = 0;
+        c->ch_mask = DCA_SPEAKER_LAYOUT_STEREO;
+        c->ch_remap[0] = DCA_SPEAKER_L;
+        c->ch_remap[1] = DCA_SPEAKER_R;
+    }
+
+    if (c->freq > 96000) {
+        // Extra frequency bands flag
+        if (get_bits1(&s->gb)) {
+            avpriv_request_sample(s->avctx, "Extra XLL frequency bands");
+            return AVERROR_PATCHWELCOME;
+        }
+        c->nfreqbands = 2;
+    } else {
+        c->nfreqbands = 1;
+    }
+
+    // Set the sampling frequency to that of the first frequency band.
+    // Frequency will be doubled again after bands assembly.
+    c->freq >>= c->nfreqbands - 1;
+
+    // Verify that all channel sets have the same audio characteristics
+    if (c != p && (c->nfreqbands != p->nfreqbands || c->freq != p->freq
+                   || c->pcm_bit_res != p->pcm_bit_res
+                   || c->storage_bit_res != p->storage_bit_res)) {
+        avpriv_request_sample(s->avctx, "Different XLL audio characteristics");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    // Determine number of bits to read bit allocation coding parameter
+    if (c->storage_bit_res > 16)
+        c->nabits = 5;
+    else if (c->storage_bit_res > 8)
+        c->nabits = 4;
+    else
+        c->nabits = 3;
+
+    // Account for embedded downmix and decimator saturation
+    if ((s->nchsets > 1 || c->nfreqbands > 1) && c->nabits < 5)
+        c->nabits++;
+
+    for (band = 0, b = c->bands; band < c->nfreqbands; band++, b++) {
+        // Pairwise channel decorrelation
+        if ((b->decor_enabled = get_bits1(&s->gb)) && c->nchannels > 1) {
+            int ch_nbits = av_ceil_log2(c->nchannels);
+
+            // Original channel order
+            for (i = 0; i < c->nchannels; i++) {
+                b->orig_order[i] = get_bits(&s->gb, ch_nbits);
+                if (b->orig_order[i] >= c->nchannels) {
+                    av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL original channel order\n");
+                    return AVERROR_INVALIDDATA;
                 }
             }
-            chset->ch_mask_enabled = get_bits(&s->gb, 1);
-            if (chset->ch_mask_enabled)
-                chset->ch_mask = get_bits(&s->gb, s->xll_bits4ch_mask);
-            else
-                /* Skip speaker configuration bits */
-                skip_bits_long(&s->gb, 25 * chset->channels);
+
+            // Pairwise channel coefficients
+            for (i = 0; i < c->nchannels / 2; i++)
+                b->decor_coeff[i] = get_bits1(&s->gb) ? get_linear(&s->gb, 7) : 0;
         } else {
-            chset->primary_ch_set              = 1;
-            chset->downmix_coeff_code_embedded = 0;
-            /* Spec: NumChHierChSet = 0, NumDwnMixCodeCoeffs = 0, whatever that means. */
-            chset->mapping_coeffs_present = get_bits(&s->gb, 1);
-            if (chset->mapping_coeffs_present) {
-                avpriv_report_missing_feature(s->avctx, "XLL: mapping coefficients");
-                return AVERROR_PATCHWELCOME;
-            }
+            for (i = 0; i < c->nchannels; i++)
+                b->orig_order[i] = i;
+            for (i = 0; i < c->nchannels / 2; i++)
+                b->decor_coeff[i] = 0;
         }
-        if (chset->sampling_frequency > 96000)
-            chset->num_freq_bands = 2 * (1 + get_bits(&s->gb, 1));
-        else
-            chset->num_freq_bands = 1;
 
-        if (chset->num_freq_bands > 1) {
-            avpriv_report_missing_feature(s->avctx, "XLL: num_freq_bands > 1");
-            return AVERROR_PATCHWELCOME;
+        // Adaptive predictor order
+        b->highest_pred_order = 0;
+        for (i = 0; i < c->nchannels; i++) {
+            b->adapt_pred_order[i] = get_bits(&s->gb, 4);
+            if (b->adapt_pred_order[i] > b->highest_pred_order)
+                b->highest_pred_order = b->adapt_pred_order[i];
+        }
+        if (b->highest_pred_order > s->nsegsamples) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL adaptive predicition order\n");
+            return AVERROR_INVALIDDATA;
         }
 
-        if (get_bits(&s->gb, 1)) { /* pw_ch_decor_enabled */
-            int bits = av_ceil_log2(chset->channels);
-            for (i = 0; i < chset->channels; i++) {
-                unsigned j = get_bits(&s->gb, bits);
-                if (j >= chset->channels) {
-                    av_log(s->avctx, AV_LOG_ERROR,
-                           "Original channel order value %u too large, only %d channels.\n",
-                           j, chset->channels);
+        // Fixed predictor order
+        for (i = 0; i < c->nchannels; i++)
+            b->fixed_pred_order[i] = b->adapt_pred_order[i] ? 0 : get_bits(&s->gb, 2);
+
+        // Adaptive predictor quantized reflection coefficients
+        for (i = 0; i < c->nchannels; i++) {
+            for (j = 0; j < b->adapt_pred_order[i]; j++) {
+                k = get_linear(&s->gb, 8);
+                if (k == -128) {
+                    av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL reflection coefficient index\n");
                     return AVERROR_INVALIDDATA;
                 }
-                chset->orig_chan_order[0][i]     = j;
-                chset->orig_chan_order_inv[0][j] = i;
-            }
-            for (i = 0; i < chset->channels / 2; i++) {
-                if (get_bits(&s->gb, 1)) /* bChPFlag */
-                    chset->pw_ch_pairs_coeffs[0][i] = get_bits_sm(&s->gb, 7);
+                if (k < 0)
+                    b->adapt_refl_coeff[i][j] = -(int)ff_dca_xll_refl_coeff[-k];
                 else
-                    chset->pw_ch_pairs_coeffs[0][i] = 0;
+                    b->adapt_refl_coeff[i][j] =  (int)ff_dca_xll_refl_coeff[ k];
             }
-        } else {
-            for (i = 0; i < chset->channels; i++)
-                chset->orig_chan_order[0][i]     =
-                chset->orig_chan_order_inv[0][i] = i;
-            for (i = 0; i < chset->channels / 2; i++)
-                chset->pw_ch_pairs_coeffs[0][i] = 0;
-        }
-        /* Adaptive prediction order */
-        chset->adapt_order_max[0] = 0;
-        for (i = 0; i < chset->channels; i++) {
-            chset->adapt_order[0][i] = get_bits(&s->gb, 4);
-            if (chset->adapt_order_max[0] < chset->adapt_order[0][i])
-                chset->adapt_order_max[0] = chset->adapt_order[0][i];
-        }
-        /* Fixed prediction order, used in case the adaptive order
-         * above is zero */
-        for (i = 0; i < chset->channels; i++)
-            chset->fixed_order[0][i] =
-                chset->adapt_order[0][i] ? 0 : get_bits(&s->gb, 2);
-
-        for (i = 0; i < chset->channels; i++) {
-            unsigned j;
-            for (j = 0; j < chset->adapt_order[0][i]; j++)
-                chset->lpc_refl_coeffs_q_ind[0][i][j] = get_bits(&s->gb, 8);
-        }
-
-        if (s->xll_scalable_lsb) {
-            chset->lsb_fsize[0] = get_bits(&s->gb, s->xll_bits4seg_size);
-
-            for (i = 0; i < chset->channels; i++)
-                chset->scalable_lsbs[0][i] = get_bits(&s->gb, 4);
-            for (i = 0; i < chset->channels; i++)
-                chset->bit_width_adj_per_ch[0][i] = get_bits(&s->gb, 4);
-        } else {
-            memset(chset->scalable_lsbs[0], 0,
-                   chset->channels * sizeof(chset->scalable_lsbs[0][0]));
-            memset(chset->bit_width_adj_per_ch[0], 0,
-                   chset->channels * sizeof(chset->bit_width_adj_per_ch[0][0]));
         }
 
-        s->xll_channels          += chset->channels;
-        s->xll_residual_channels += chset->channels -
-                                    av_popcount(chset->residual_encode);
+        // Downmix performed by encoder in extension frequency band
+        b->dmix_embedded = c->dmix_embedded && (band == 0 || get_bits1(&s->gb));
 
-        /* FIXME: Parse header data for extra frequency bands. */
+        // MSB/LSB split flag in extension frequency band
+        if ((band == 0 && s->scalable_lsbs) || (band != 0 && get_bits1(&s->gb))) {
+            // Size of LSB section in any segment
+            b->lsb_section_size = get_bits_long(&s->gb, s->seg_size_nbits);
+            if (b->lsb_section_size < 0 || b->lsb_section_size > s->frame_size) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid LSB section size\n");
+                return AVERROR_INVALIDDATA;
+            }
 
-        /* Skip to end of channel set sub header. */
-        i = get_bits_count(&s->gb);
-        if (hdr_pos + 8 * hdr_size < i) {
-            av_log(s->avctx, AV_LOG_ERROR,
-                   "chset header too large, %d bits, should be <= %d bits\n",
-                   i - hdr_pos, 8 * hdr_size);
-            return AVERROR_INVALIDDATA;
+            // Account for optional CRC bytes after LSB section
+            if (b->lsb_section_size && (s->band_crc_present > 2 ||
+                                        (band == 0 && s->band_crc_present > 1)))
+                b->lsb_section_size += 2;
+
+            // Number of bits to represent the samples in LSB part
+            for (i = 0; i < c->nchannels; i++) {
+                b->nscalablelsbs[i] = get_bits(&s->gb, 4);
+                if (b->nscalablelsbs[i] && !b->lsb_section_size) {
+                    av_log(s->avctx, AV_LOG_ERROR, "LSB section missing with non-zero LSB width\n");
+                    return AVERROR_INVALIDDATA;
+                }
+            }
+        } else {
+            b->lsb_section_size = 0;
+            for (i = 0; i < c->nchannels; i++)
+                b->nscalablelsbs[i] = 0;
         }
-        if (hdr_pos + 8 * hdr_size > i)
-            skip_bits_long(&s->gb, hdr_pos + 8 * hdr_size - i);
+
+        // Scalable resolution flag in extension frequency band
+        if ((band == 0 && s->scalable_lsbs) || (band != 0 && get_bits1(&s->gb))) {
+            // Number of bits discarded by authoring
+            for (i = 0; i < c->nchannels; i++)
+                b->bit_width_adjust[i] = get_bits(&s->gb, 4);
+        } else {
+            for (i = 0; i < c->nchannels; i++)
+                b->bit_width_adjust[i] = 0;
+        }
+    }
+
+    // Reserved
+    // Byte align
+    // CRC16 of channel set sub-header
+    if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL sub-header\n");
+        return AVERROR_INVALIDDATA;
     }
+
     return 0;
 }
 
-/* parse XLL navigation table */
-int ff_dca_xll_decode_navi(DCAContext *s, int asset_end)
+static int chs_alloc_msb_band_data(DCAXllDecoder *s, DCAXllChSet *c)
 {
-    int nbands, band, chset, seg, data_start;
+    int ndecisamples = c->nfreqbands > 1 ? DCA_XLL_DECI_HISTORY_MAX : 0;
+    int nchsamples = s->nframesamples + ndecisamples;
+    int i, j, nsamples = nchsamples * c->nchannels * c->nfreqbands;
+    int32_t *ptr;
+
+    // Reallocate MSB sample buffer
+    av_fast_malloc(&c->sample_buffer[0], &c->sample_size[0], nsamples * sizeof(int32_t));
+    if (!c->sample_buffer[0])
+        return AVERROR(ENOMEM);
 
-    /* FIXME: Supports only a single frequency band */
-    nbands = 1;
+    ptr = c->sample_buffer[0] + ndecisamples;
+    for (i = 0; i < c->nfreqbands; i++) {
+        for (j = 0; j < c->nchannels; j++) {
+            c->bands[i].msb_sample_buffer[j] = ptr;
+            ptr += nchsamples;
+        }
+    }
 
-    for (band = 0; band < nbands; band++) {
-        s->xll_navi.band_size[band] = 0;
-        for (seg = 0; seg < s->xll_segments; seg++) {
-            /* Note: The spec, ETSI TS 102 114 V1.4.1 (2012-09), says
-             * we should read a base value for segment_size from the
-             * stream, before reading the sizes of the channel sets.
-             * But that's apparently incorrect. */
-            s->xll_navi.segment_size[band][seg] = 0;
+    return 0;
+}
 
-            for (chset = 0; chset < s->xll_nch_sets; chset++)
-                if (band < s->xll_chsets[chset].num_freq_bands) {
-                    s->xll_navi.chset_size[band][seg][chset] =
-                        get_bits(&s->gb, s->xll_bits4seg_size) + 1;
-                    s->xll_navi.segment_size[band][seg] +=
-                        s->xll_navi.chset_size[band][seg][chset];
-                }
-            s->xll_navi.band_size[band] += s->xll_navi.segment_size[band][seg];
+static int chs_alloc_lsb_band_data(DCAXllDecoder *s, DCAXllChSet *c)
+{
+    int i, j, nsamples = 0;
+    int32_t *ptr;
+
+    // Determine number of frequency bands that have MSB/LSB split
+    for (i = 0; i < c->nfreqbands; i++)
+        if (c->bands[i].lsb_section_size)
+            nsamples += s->nframesamples * c->nchannels;
+    if (!nsamples)
+        return 0;
+
+    // Reallocate LSB sample buffer
+    av_fast_malloc(&c->sample_buffer[1], &c->sample_size[1], nsamples * sizeof(int32_t));
+    if (!c->sample_buffer[1])
+        return AVERROR(ENOMEM);
+
+    ptr = c->sample_buffer[1];
+    for (i = 0; i < c->nfreqbands; i++) {
+        if (c->bands[i].lsb_section_size) {
+            for (j = 0; j < c->nchannels; j++) {
+                c->bands[i].lsb_sample_buffer[j] = ptr;
+                ptr += s->nframesamples;
+            }
+        } else {
+            for (j = 0; j < c->nchannels; j++)
+                c->bands[i].lsb_sample_buffer[j] = NULL;
         }
     }
-    /* Align to 8 bits and skip 16-bit CRC. */
-    skip_bits_long(&s->gb, 16 + ((-get_bits_count(&s->gb)) & 7));
 
-    data_start = get_bits_count(&s->gb);
-    if (data_start + 8 * s->xll_navi.band_size[0] > asset_end) {
-        av_log(s->avctx, AV_LOG_ERROR,
-               "XLL: Data in NAVI table exceeds containing asset\n"
-               "start: %d (bit), size %u (bytes), end %d (bit), error %u\n",
-               data_start, s->xll_navi.band_size[0], asset_end,
-               data_start + 8 * s->xll_navi.band_size[0] - asset_end);
-        return AVERROR_INVALIDDATA;
-    }
-    init_get_bits(&s->xll_navi.gb, s->gb.buffer + data_start / 8,
-                  8 * s->xll_navi.band_size[0]);
     return 0;
 }
 
-static void dca_xll_inv_adapt_pred(int *samples, int nsamples, unsigned order,
-                                   const int *prev, const uint8_t *q_ind)
-{
-    static const uint16_t table[0x81] = {
-            0,  3070,  5110,  7140,  9156, 11154, 13132, 15085,
-        17010, 18904, 20764, 22588, 24373, 26117, 27818, 29474,
-        31085, 32648, 34164, 35631, 37049, 38418, 39738, 41008,
-        42230, 43404, 44530, 45609, 46642, 47630, 48575, 49477,
-        50337, 51157, 51937, 52681, 53387, 54059, 54697, 55302,
-        55876, 56421, 56937, 57426, 57888, 58326, 58741, 59132,
-        59502, 59852, 60182, 60494, 60789, 61066, 61328, 61576,
-        61809, 62029, 62236, 62431, 62615, 62788, 62951, 63105,
-        63250, 63386, 63514, 63635, 63749, 63855, 63956, 64051,
-        64140, 64224, 64302, 64376, 64446, 64512, 64573, 64631,
-        64686, 64737, 64785, 64830, 64873, 64913, 64950, 64986,
-        65019, 65050, 65079, 65107, 65133, 65157, 65180, 65202,
-        65222, 65241, 65259, 65275, 65291, 65306, 65320, 65333,
-        65345, 65357, 65368, 65378, 65387, 65396, 65405, 65413,
-        65420, 65427, 65434, 65440, 65446, 65451, 65456, 65461,
-        65466, 65470, 65474, 65478, 65481, 65485, 65488, 65491,
-        65535, /* Final value is for the -128 corner case, see below. */
-    };
-    int c[DCA_XLL_AORDER_MAX];
-    int64_t s;
-    unsigned i, j;
-
-    for (i = 0; i < order; i++) {
-        if (q_ind[i] & 1)
-            /* The index value 0xff corresponds to a lookup of entry 0x80 in
-             * the table, and no value is provided in the specification. */
-            c[i] = -table[(q_ind[i] >> 1) + 1];
-        else
-            c[i] = table[q_ind[i] >> 1];
-    }
-    /* The description in the spec is a bit convoluted. We can convert
-     * the reflected values to direct values in place, using a
-     * sequence of reflections operating on two values. */
-    for (i = 1; i < order; i++) {
-        /* i = 1: scale c[0]
-         * i = 2: reflect c[0] <-> c[1]
-         * i = 3: scale c[1], reflect c[0] <-> c[2]
-         * i = 4: reflect c[0] <-> c[3] reflect c[1] <-> c[2]
-         * ... */
-        if (i & 1)
-            c[i / 2] += ((int64_t) c[i] * c[i / 2] + 0x8000) >> 16;
-        for (j = 0; j < i / 2; j++) {
-            int r0 = c[j];
-            int r1 = c[i - j - 1];
-            c[j]         += ((int64_t) c[i] * r1 + 0x8000) >> 16;
-            c[i - j - 1] += ((int64_t) c[i] * r0 + 0x8000) >> 16;
-        }
-    }
-    /* Apply predictor. */
-    /* NOTE: Processing samples in this order means that the
-     * predictor is applied to the newly reconstructed samples. */
-    if (prev) {
-        for (i = 0; i < order; i++) {
-            for (j = s = 0; j < i; j++)
-                s += (int64_t) c[j] * samples[i - 1 - j];
-            for (; j < order; j++)
-                s += (int64_t) c[j] * prev[DCA_XLL_AORDER_MAX + i - 1 - j];
-
-            samples[i] -= av_clip_intp2((s + 0x8000) >> 16, 24);
-        }
-    }
-    for (i = order; i < nsamples; i++) {
-        for (j = s = 0; j < order; j++)
-            s += (int64_t) c[j] * samples[i - 1 - j];
-
-        /* NOTE: Equations seem to imply addition, while the
-         * pseudocode seems to use subtraction.*/
-        samples[i] -= av_clip_intp2((s + 0x8000) >> 16, 24);
-    }
-}
-
-int ff_dca_xll_decode_audio(DCAContext *s, AVFrame *frame)
-{
-    /* FIXME: Decodes only the first frequency band. */
-    int seg, chset_i;
-
-    /* Coding parameters for each channel set. */
-    struct coding_params {
-        int seg_type;
-        int rice_code_flag[16];
-        int pancAuxABIT[16];
-        int pancABIT0[16];  /* Not sure what this is */
-        int pancABIT[16];   /* Not sure what this is */
-        int nSamplPart0[16];
-    } param_state[16];
-
-    GetBitContext *gb = &s->xll_navi.gb;
-    int *history;
-
-    /* Layout: First the sample buffer for one segment per channel,
-     * followed by history buffers of DCA_XLL_AORDER_MAX samples for
-     * each channel. */
-    av_fast_malloc(&s->xll_sample_buf, &s->xll_sample_buf_size,
-                   (s->xll_smpl_in_seg + DCA_XLL_AORDER_MAX) *
-                   s->xll_channels * sizeof(*s->xll_sample_buf));
-    if (!s->xll_sample_buf)
-        return AVERROR(ENOMEM);
+static int chs_parse_band_data(DCAXllDecoder *s, DCAXllChSet *c, int band, int seg, int band_data_end)
+{
+    DCAXllBand *b = &c->bands[band];
+    int i, j, k;
+
+    // Start unpacking MSB portion of the segment
+    if (!(seg && get_bits1(&s->gb))) {
+        // Unpack segment type
+        // 0 - distinct coding parameters for each channel
+        // 1 - common coding parameters for all channels
+        c->seg_common = get_bits1(&s->gb);
+
+        // Determine number of coding parameters encoded in segment
+        k = c->seg_common ? 1 : c->nchannels;
+
+        // Unpack Rice coding parameters
+        for (i = 0; i < k; i++) {
+            // Unpack Rice coding flag
+            // 0 - linear code, 1 - Rice code
+            c->rice_code_flag[i] = get_bits1(&s->gb);
+            if (!c->seg_common && c->rice_code_flag[i]) {
+                // Unpack Hybrid Rice coding flag
+                // 0 - Rice code, 1 - Hybrid Rice code
+                if (get_bits1(&s->gb))
+                    // Unpack binary code length for isolated samples
+                    c->bitalloc_hybrid_linear[i] = get_bits(&s->gb, c->nabits) + 1;
+                else
+                    // 0 indicates no Hybrid Rice coding
+                    c->bitalloc_hybrid_linear[i] = 0;
+            } else {
+                // 0 indicates no Hybrid Rice coding
+                c->bitalloc_hybrid_linear[i] = 0;
+            }
+        }
+
+        // Unpack coding parameters
+        for (i = 0; i < k; i++) {
+            if (seg == 0) {
+                // Unpack coding parameter for part A of segment 0
+                c->bitalloc_part_a[i] = get_bits(&s->gb, c->nabits);
 
-    history = s->xll_sample_buf + s->xll_smpl_in_seg * s->xll_channels;
-
-    for (seg = 0; seg < s->xll_segments; seg++) {
-        unsigned in_channel;
-
-        for (chset_i = in_channel = 0; chset_i < s->xll_nch_sets; chset_i++) {
-            /* The spec isn't very explicit, but I think the NAVI sizes are in bytes. */
-            int end_pos = get_bits_count(gb) +
-                          8 * s->xll_navi.chset_size[0][seg][chset_i];
-            int i, j;
-            struct coding_params *params = &param_state[chset_i];
-            /* I think this flag means that we should keep seg_type and
-             * other parameters from the previous segment. */
-            int use_seg_state_code_param;
-            XllChSetSubHeader *chset = &s->xll_chsets[chset_i];
-            if (in_channel >= s->avctx->channels)
-                /* FIXME: Could go directly to next segment */
-                goto next_chset;
-
-            if (s->avctx->sample_rate != chset->sampling_frequency) {
-                av_log(s->avctx, AV_LOG_WARNING,
-                       "XLL: unexpected chset sample rate %d, expected %d\n",
-                       chset->sampling_frequency, s->avctx->sample_rate);
-                goto next_chset;
+                // Adjust for the linear code
+                if (!c->rice_code_flag[i] && c->bitalloc_part_a[i])
+                    c->bitalloc_part_a[i]++;
+
+                if (!c->seg_common)
+                    c->nsamples_part_a[i] = b->adapt_pred_order[i];
+                else
+                    c->nsamples_part_a[i] = b->highest_pred_order;
+            } else {
+                c->bitalloc_part_a[i] = 0;
+                c->nsamples_part_a[i] = 0;
             }
-            if (seg != 0)
-                use_seg_state_code_param = get_bits(gb, 1);
-            else
-                use_seg_state_code_param = 0;
-
-            if (!use_seg_state_code_param) {
-                int num_param_sets, i;
-                unsigned bits4ABIT;
-
-                params->seg_type = get_bits(gb, 1);
-                num_param_sets   = params->seg_type ? 1 : chset->channels;
-
-                if (chset->bit_width > 16) {
-                    bits4ABIT = 5;
-                } else {
-                    if (chset->bit_width > 8)
-                        bits4ABIT = 4;
-                    else
-                        bits4ABIT = 3;
-                    if (s->xll_nch_sets > 1)
-                        bits4ABIT++;
+
+            // Unpack coding parameter for part B of segment
+            c->bitalloc_part_b[i] = get_bits(&s->gb, c->nabits);
+
+            // Adjust for the linear code
+            if (!c->rice_code_flag[i] && c->bitalloc_part_b[i])
+                c->bitalloc_part_b[i]++;
+        }
+    }
+
+    // Unpack entropy codes
+    for (i = 0; i < c->nchannels; i++) {
+        int32_t *part_a, *part_b;
+        int nsamples_part_b;
+
+        // Select index of coding parameters
+        k = c->seg_common ? 0 : i;
+
+        // Slice the segment into parts A and B
+        part_a = b->msb_sample_buffer[i] + seg * s->nsegsamples;
+        part_b = part_a + c->nsamples_part_a[k];
+        nsamples_part_b = s->nsegsamples - c->nsamples_part_a[k];
+
+        if (get_bits_left(&s->gb) < 0)
+            return AVERROR_INVALIDDATA;
+
+        if (!c->rice_code_flag[k]) {
+            // Linear codes
+            // Unpack all residuals of part A of segment 0
+            get_linear_array(&s->gb, part_a, c->nsamples_part_a[k],
+                             c->bitalloc_part_a[k]);
+
+            // Unpack all residuals of part B of segment 0 and others
+            get_linear_array(&s->gb, part_b, nsamples_part_b,
+                             c->bitalloc_part_b[k]);
+        } else {
+            // Rice codes
+            // Unpack all residuals of part A of segment 0
+            get_rice_array(&s->gb, part_a, c->nsamples_part_a[k],
+                           c->bitalloc_part_a[k]);
+
+            if (c->bitalloc_hybrid_linear[k]) {
+                // Hybrid Rice codes
+                // Unpack the number of isolated samples
+                int nisosamples = get_bits(&s->gb, s->nsegsamples_log2);
+
+                // Set all locations to 0
+                memset(part_b, 0, sizeof(*part_b) * nsamples_part_b);
+
+                // Extract the locations of isolated samples and flag by -1
+                for (j = 0; j < nisosamples; j++) {
+                    int loc = get_bits(&s->gb, s->nsegsamples_log2);
+                    if (loc >= nsamples_part_b) {
+                        av_log(s->avctx, AV_LOG_ERROR, "Invalid isolated sample location\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                    part_b[loc] = -1;
                 }
 
-                for (i = 0; i < num_param_sets; i++) {
-                    params->rice_code_flag[i] = get_bits(gb, 1);
-                    if (!params->seg_type && params->rice_code_flag[i] && get_bits(gb, 1))
-                        params->pancAuxABIT[i] = get_bits(gb, bits4ABIT) + 1;
+                // Unpack all residuals of part B of segment 0 and others
+                for (j = 0; j < nsamples_part_b; j++) {
+                    if (part_b[j])
+                        part_b[j] = get_linear(&s->gb, c->bitalloc_hybrid_linear[k]);
                     else
-                        params->pancAuxABIT[i] = 0;
+                        part_b[j] = get_rice(&s->gb, c->bitalloc_part_b[k]);
                 }
+            } else {
+                // Rice codes
+                // Unpack all residuals of part B of segment 0 and others
+                get_rice_array(&s->gb, part_b, nsamples_part_b, c->bitalloc_part_b[k]);
+            }
+        }
+    }
 
-                for (i = 0; i < num_param_sets; i++) {
-                    if (!seg) {
-                        /* Parameters for part 1 */
-                        params->pancABIT0[i] = get_bits(gb, bits4ABIT);
-                        if (params->rice_code_flag[i] == 0 && params->pancABIT0[i] > 0)
-                            /* For linear code */
-                            params->pancABIT0[i]++;
-
-                        /* NOTE: In the spec, not indexed by band??? */
-                        if (params->seg_type == 0)
-                            params->nSamplPart0[i] = chset->adapt_order[0][i];
-                        else
-                            params->nSamplPart0[i] = chset->adapt_order_max[0];
-                    } else
-                        params->nSamplPart0[i] = 0;
-
-                    /* Parameters for part 2 */
-                    params->pancABIT[i] = get_bits(gb, bits4ABIT);
-                    if (params->rice_code_flag[i] == 0 && params->pancABIT[i] > 0)
-                        /* For linear code */
-                        params->pancABIT[i]++;
-                }
+    // Unpack decimator history for frequency band 1
+    if (seg == 0 && band == 1) {
+        int nbits = get_bits(&s->gb, 5) + 1;
+        for (i = 0; i < c->nchannels; i++)
+            for (j = 1; j < DCA_XLL_DECI_HISTORY_MAX; j++)
+                c->deci_history[i][j] = get_sbits_long(&s->gb, nbits);
+    }
+
+    // Start unpacking LSB portion of the segment
+    if (b->lsb_section_size) {
+        // Skip to the start of LSB portion
+        if (ff_dca_seek_bits(&s->gb, band_data_end - b->lsb_section_size * 8)) {
+            av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL band data\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        // Unpack all LSB parts of residuals of this segment
+        for (i = 0; i < c->nchannels; i++) {
+            if (b->nscalablelsbs[i]) {
+                get_array(&s->gb,
+                          b->lsb_sample_buffer[i] + seg * s->nsegsamples,
+                          s->nsegsamples, b->nscalablelsbs[i]);
             }
-            for (i = 0; i < chset->channels; i++) {
-                int param_index = params->seg_type ? 0 : i;
-                int part0       = params->nSamplPart0[param_index];
-                int bits        = part0 ? params->pancABIT0[param_index] : 0;
-                int *sample_buf = s->xll_sample_buf +
-                                  (in_channel + i) * s->xll_smpl_in_seg;
-
-                if (!params->rice_code_flag[param_index]) {
-                    /* Linear code */
-                    if (bits)
-                        for (j = 0; j < part0; j++)
-                            sample_buf[j] = get_bits_sm(gb, bits);
-                    else
-                        memset(sample_buf, 0, part0 * sizeof(sample_buf[0]));
+        }
+    }
 
-                    /* Second part */
-                    bits = params->pancABIT[param_index];
-                    if (bits)
-                        for (j = part0; j < s->xll_smpl_in_seg; j++)
-                            sample_buf[j] = get_bits_sm(gb, bits);
-                    else
-                        memset(sample_buf + part0, 0,
-                               (s->xll_smpl_in_seg - part0) * sizeof(sample_buf[0]));
-                } else {
-                    int aux_bits = params->pancAuxABIT[param_index];
-
-                    for (j = 0; j < part0; j++) {
-                        /* FIXME: Is this identical to Golomb code? */
-                        int t = get_unary(gb, 1, 33) << bits;
-                        /* FIXME: Could move this test outside of the loop, for efficiency. */
-                        if (bits)
-                            t |= get_bits(gb, bits);
-                        sample_buf[j] = (t & 1) ? -(t >> 1) - 1 : (t >> 1);
-                    }
+    // Skip to the end of band data
+    if (ff_dca_seek_bits(&s->gb, band_data_end)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL band data\n");
+        return AVERROR_INVALIDDATA;
+    }
 
-                    /* Second part */
-                    bits = params->pancABIT[param_index];
-
-                    /* Follow the spec's suggestion of using the
-                     * buffer also to store the hybrid-rice flags. */
-                    memset(sample_buf + part0, 0,
-                           (s->xll_smpl_in_seg - part0) * sizeof(sample_buf[0]));
-
-                    if (aux_bits > 0) {
-                        /* For hybrid rice encoding, some samples are linearly
-                         * coded. According to the spec, "nBits4SamplLoci" bits
-                         * are used for each index, but this value is not
-                         * defined. I guess we should use log2(xll_smpl_in_seg)
-                         * bits. */
-                        int count = get_bits(gb, s->xll_log_smpl_in_seg);
-                        av_log(s->avctx, AV_LOG_DEBUG, "aux count %d (bits %d)\n",
-                               count, s->xll_log_smpl_in_seg);
-
-                        for (j = 0; j < count; j++)
-                            sample_buf[get_bits(gb, s->xll_log_smpl_in_seg)] = 1;
-                    }
-                    for (j = part0; j < s->xll_smpl_in_seg; j++) {
-                        if (!sample_buf[j]) {
-                            int t = get_unary(gb, 1, 33);
-                            if (bits)
-                                t = (t << bits) | get_bits(gb, bits);
-                            sample_buf[j] = (t & 1) ? -(t >> 1) - 1 : (t >> 1);
-                        } else
-                            sample_buf[j] = get_bits_sm(gb, aux_bits);
-                    }
+    return 0;
+}
+
+static void av_cold chs_clear_band_data(DCAXllDecoder *s, DCAXllChSet *c, int band, int seg)
+{
+    DCAXllBand *b = &c->bands[band];
+    int i, offset, nsamples;
+
+    if (seg < 0) {
+        offset = 0;
+        nsamples = s->nframesamples;
+    } else {
+        offset = seg * s->nsegsamples;
+        nsamples = s->nsegsamples;
+    }
+
+    for (i = 0; i < c->nchannels; i++) {
+        memset(b->msb_sample_buffer[i] + offset, 0, nsamples * sizeof(int32_t));
+        if (b->lsb_section_size)
+            memset(b->lsb_sample_buffer[i] + offset, 0, nsamples * sizeof(int32_t));
+    }
+
+    if (seg <= 0 && band)
+        memset(c->deci_history, 0, sizeof(c->deci_history));
+
+    if (seg < 0) {
+        memset(b->nscalablelsbs, 0, sizeof(b->nscalablelsbs));
+        memset(b->bit_width_adjust, 0, sizeof(b->bit_width_adjust));
+    }
+}
+
+static void chs_filter_band_data(DCAXllDecoder *s, DCAXllChSet *c, int band)
+{
+    DCAXllBand *b = &c->bands[band];
+    int nsamples = s->nframesamples;
+    int i, j, k;
+
+    // Inverse adaptive or fixed prediction
+    for (i = 0; i < c->nchannels; i++) {
+        int32_t *buf = b->msb_sample_buffer[i];
+        int order = b->adapt_pred_order[i];
+        if (order > 0) {
+            int coeff[DCA_XLL_ADAPT_PRED_ORDER_MAX];
+            // Conversion from reflection coefficients to direct form coefficients
+            for (j = 0; j < order; j++) {
+                int rc = b->adapt_refl_coeff[i][j];
+                for (k = 0; k < (j + 1) / 2; k++) {
+                    int tmp1 = coeff[    k    ];
+                    int tmp2 = coeff[j - k - 1];
+                    coeff[    k    ] = tmp1 + mul16(rc, tmp2);
+                    coeff[j - k - 1] = tmp2 + mul16(rc, tmp1);
                 }
+                coeff[j] = rc;
+            }
+            // Inverse adaptive prediction
+            for (j = 0; j < nsamples - order; j++) {
+                int64_t err = 0;
+                for (k = 0; k < order; k++)
+                    err += (int64_t)buf[j + k] * coeff[order - k - 1];
+                buf[j + k] -= clip23(norm16(err));
             }
+        } else {
+            // Inverse fixed coefficient prediction
+            for (j = 0; j < b->fixed_pred_order[i]; j++)
+                for (k = 1; k < nsamples; k++)
+                    buf[k] += buf[k - 1];
+        }
+    }
 
-            for (i = 0; i < chset->channels; i++) {
-                unsigned adapt_order = chset->adapt_order[0][i];
-                int *sample_buf = s->xll_sample_buf +
-                                  (in_channel + i) * s->xll_smpl_in_seg;
-                int *prev = history + (in_channel + i) * DCA_XLL_AORDER_MAX;
-
-                if (!adapt_order) {
-                    unsigned order;
-                    for (order = chset->fixed_order[0][i]; order > 0; order--) {
-                        unsigned j;
-                        for (j = 1; j < s->xll_smpl_in_seg; j++)
-                            sample_buf[j] += sample_buf[j - 1];
-                    }
-                } else
-                    /* Inverse adaptive prediction, in place. */
-                    dca_xll_inv_adapt_pred(sample_buf, s->xll_smpl_in_seg,
-                                           adapt_order, seg ? prev : NULL,
-                                           chset->lpc_refl_coeffs_q_ind[0][i]);
-                memcpy(prev, sample_buf + s->xll_smpl_in_seg - DCA_XLL_AORDER_MAX,
-                       DCA_XLL_AORDER_MAX * sizeof(*prev));
+    // Inverse pairwise channel decorrellation
+    if (b->decor_enabled) {
+        int32_t *tmp[DCA_XLL_CHANNELS_MAX];
+
+        for (i = 0; i < c->nchannels / 2; i++) {
+            int coeff = b->decor_coeff[i];
+            if (coeff) {
+                s->dcadsp->decor(b->msb_sample_buffer[i * 2 + 1],
+                                 b->msb_sample_buffer[i * 2    ],
+                                 coeff, nsamples);
             }
-            for (i = 1; i < chset->channels; i += 2) {
-                int coeff = chset->pw_ch_pairs_coeffs[0][i / 2];
-                if (coeff != 0) {
-                    int *sample_buf = s->xll_sample_buf +
-                                      (in_channel + i) * s->xll_smpl_in_seg;
-                    int *prev = sample_buf - s->xll_smpl_in_seg;
-                    unsigned j;
-                    for (j = 0; j < s->xll_smpl_in_seg; j++)
-                        /* Shift is unspecified, but should apparently be 3. */
-                        sample_buf[j] += ((int64_t) coeff * prev[j] + 4) >> 3;
-                }
+        }
+
+        // Reorder channel pointers to the original order
+        for (i = 0; i < c->nchannels; i++)
+            tmp[i] = b->msb_sample_buffer[i];
+
+        for (i = 0; i < c->nchannels; i++)
+            b->msb_sample_buffer[b->orig_order[i]] = tmp[i];
+    }
+
+    // Map output channel pointers for frequency band 0
+    if (c->nfreqbands == 1)
+        for (i = 0; i < c->nchannels; i++)
+            s->output_samples[c->ch_remap[i]] = b->msb_sample_buffer[i];
+}
+
+static int chs_get_lsb_width(DCAXllDecoder *s, DCAXllChSet *c, int band, int ch)
+{
+    int adj = c->bands[band].bit_width_adjust[ch];
+    int shift = c->bands[band].nscalablelsbs[ch];
+
+    if (s->fixed_lsb_width)
+        shift = s->fixed_lsb_width;
+    else if (shift && adj)
+        shift += adj - 1;
+    else
+        shift += adj;
+
+    return shift;
+}
+
+static void chs_assemble_msbs_lsbs(DCAXllDecoder *s, DCAXllChSet *c, int band)
+{
+    DCAXllBand *b = &c->bands[band];
+    int n, ch, nsamples = s->nframesamples;
+
+    for (ch = 0; ch < c->nchannels; ch++) {
+        int shift = chs_get_lsb_width(s, c, band, ch);
+        if (shift) {
+            int32_t *msb = b->msb_sample_buffer[ch];
+            if (b->nscalablelsbs[ch]) {
+                int32_t *lsb = b->lsb_sample_buffer[ch];
+                int adj = b->bit_width_adjust[ch];
+                for (n = 0; n < nsamples; n++)
+                    msb[n] = msb[n] * (1 << shift) + (lsb[n] << adj);
+            } else {
+                for (n = 0; n < nsamples; n++)
+                    msb[n] = msb[n] * (1 << shift);
             }
+        }
+    }
+}
 
-            if (s->xll_scalable_lsb) {
-                int lsb_start = end_pos - 8 * chset->lsb_fsize[0] -
-                                8 * (s->xll_banddata_crc & 2);
-                int done;
-                i = get_bits_count(gb);
-                if (i > lsb_start) {
-                    av_log(s->avctx, AV_LOG_ERROR,
-                           "chset data lsb exceeds NAVI size, end_pos %d, lsb_start %d, pos %d\n",
-                           end_pos, lsb_start, i);
-                    return AVERROR_INVALIDDATA;
-                }
-                if (i < lsb_start)
-                    skip_bits_long(gb, lsb_start - i);
-
-                for (i = done = 0; i < chset->channels; i++) {
-                    int bits = chset->scalable_lsbs[0][i];
-                    if (bits > 0) {
-                        /* The channel reordering is conceptually done
-                         * before adding the lsb:s, so we need to do
-                         * the inverse permutation here. */
-                        unsigned pi = chset->orig_chan_order_inv[0][i];
-                        int *sample_buf = s->xll_sample_buf +
-                                          (in_channel + pi) * s->xll_smpl_in_seg;
-                        int adj = chset->bit_width_adj_per_ch[0][i];
-                        int msb_shift = bits;
-                        unsigned j;
-
-                        if (adj > 0)
-                            msb_shift += adj - 1;
-
-                        for (j = 0; j < s->xll_smpl_in_seg; j++)
-                            sample_buf[j] = (sample_buf[j] << msb_shift) +
-                                            (get_bits(gb, bits) << adj);
-
-                        done += bits * s->xll_smpl_in_seg;
+static int chs_assemble_freq_bands(DCAXllDecoder *s, DCAXllChSet *c)
+{
+    int ch, nsamples = s->nframesamples;
+    int32_t *ptr;
+
+    av_assert1(c->nfreqbands > 1);
+
+    // Reallocate frequency band assembly buffer
+    av_fast_malloc(&c->sample_buffer[2], &c->sample_size[2],
+                   2 * nsamples * c->nchannels * sizeof(int32_t));
+    if (!c->sample_buffer[2])
+        return AVERROR(ENOMEM);
+
+    // Assemble frequency bands 0 and 1
+    ptr = c->sample_buffer[2];
+    for (ch = 0; ch < c->nchannels; ch++) {
+        int32_t *band0 = c->bands[0].msb_sample_buffer[ch];
+        int32_t *band1 = c->bands[1].msb_sample_buffer[ch];
+
+        // Copy decimator history
+        memcpy(band0 - DCA_XLL_DECI_HISTORY_MAX,
+               c->deci_history[ch], sizeof(c->deci_history[0]));
+
+        // Filter
+        s->dcadsp->assemble_freq_bands(ptr, band0, band1,
+                                       ff_dca_xll_band_coeff,
+                                       nsamples);
+
+        // Remap output channel pointer to assembly buffer
+        s->output_samples[c->ch_remap[ch]] = ptr;
+        ptr += nsamples * 2;
+    }
+
+    return 0;
+}
+
+static int parse_common_header(DCAXllDecoder *s)
+{
+    int stream_ver, header_size, frame_size_nbits, nframesegs_log2;
+
+    // XLL extension sync word
+    if (get_bits_long(&s->gb, 32) != DCA_SYNCWORD_XLL) {
+        av_log(s->avctx, AV_LOG_VERBOSE, "Invalid XLL sync word\n");
+        return AVERROR(EAGAIN);
+    }
+
+    // Version number
+    stream_ver = get_bits(&s->gb, 4) + 1;
+    if (stream_ver > 1) {
+        avpriv_request_sample(s->avctx, "XLL stream version %d", stream_ver);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    // Lossless frame header length
+    header_size = get_bits(&s->gb, 8) + 1;
+
+    // Check CRC
+    if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL))
+        && ff_dca_check_crc(&s->gb, 32, header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL common header checksum\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Number of bits used to read frame size
+    frame_size_nbits = get_bits(&s->gb, 5) + 1;
+
+    // Number of bytes in a lossless frame
+    s->frame_size = get_bits_long(&s->gb, frame_size_nbits);
+    if (s->frame_size < 0 || s->frame_size >= DCA_XLL_PBR_BUFFER_MAX) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL frame size (%d bytes)\n", s->frame_size);
+        return AVERROR_INVALIDDATA;
+    }
+    s->frame_size++;
+
+    // Number of channels sets per frame
+    s->nchsets = get_bits(&s->gb, 4) + 1;
+    if (s->nchsets > DCA_XLL_CHSETS_MAX) {
+        avpriv_request_sample(s->avctx, "%d XLL channel sets", s->nchsets);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    // Number of segments per frame
+    nframesegs_log2 = get_bits(&s->gb, 4);
+    s->nframesegs = 1 << nframesegs_log2;
+    if (s->nframesegs > 1024) {
+        av_log(s->avctx, AV_LOG_ERROR, "Too many segments per XLL frame\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Samples in segment per one frequency band for the first channel set
+    // Maximum value is 256 for sampling frequencies <= 48 kHz
+    // Maximum value is 512 for sampling frequencies > 48 kHz
+    s->nsegsamples_log2 = get_bits(&s->gb, 4);
+    if (!s->nsegsamples_log2) {
+        av_log(s->avctx, AV_LOG_ERROR, "Too few samples per XLL segment\n");
+        return AVERROR_INVALIDDATA;
+    }
+    s->nsegsamples = 1 << s->nsegsamples_log2;
+    if (s->nsegsamples > 512) {
+        av_log(s->avctx, AV_LOG_ERROR, "Too many samples per XLL segment\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Samples in frame per one frequency band for the first channel set
+    s->nframesamples_log2 = s->nsegsamples_log2 + nframesegs_log2;
+    s->nframesamples = 1 << s->nframesamples_log2;
+    if (s->nframesamples > 65536) {
+        av_log(s->avctx, AV_LOG_ERROR, "Too many samples per XLL frame\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Number of bits used to read segment size
+    s->seg_size_nbits = get_bits(&s->gb, 5) + 1;
+
+    // Presence of CRC16 within each frequency band
+    // 0 - No CRC16 within band
+    // 1 - CRC16 placed at the end of MSB0
+    // 2 - CRC16 placed at the end of MSB0 and LSB0
+    // 3 - CRC16 placed at the end of MSB0 and LSB0 and other frequency bands
+    s->band_crc_present = get_bits(&s->gb, 2);
+
+    // MSB/LSB split flag
+    s->scalable_lsbs = get_bits1(&s->gb);
+
+    // Channel position mask
+    s->ch_mask_nbits = get_bits(&s->gb, 5) + 1;
+
+    // Fixed LSB width
+    if (s->scalable_lsbs)
+        s->fixed_lsb_width = get_bits(&s->gb, 4);
+    else
+        s->fixed_lsb_width = 0;
+
+    // Reserved
+    // Byte align
+    // Header CRC16 protection
+    if (ff_dca_seek_bits(&s->gb, header_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL common header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static int is_hier_dmix_chset(DCAXllChSet *c)
+{
+    return !c->primary_chset && c->dmix_embedded && c->hier_chset;
+}
+
+static DCAXllChSet *find_next_hier_dmix_chset(DCAXllDecoder *s, DCAXllChSet *c)
+{
+    if (c->hier_chset)
+        while (++c < &s->chset[s->nchsets])
+            if (is_hier_dmix_chset(c))
+                return c;
+
+    return NULL;
+}
+
+static void prescale_down_mix(DCAXllChSet *c, DCAXllChSet *o)
+{
+    int i, j, *coeff_ptr = c->dmix_coeff;
+
+    for (i = 0; i < c->hier_ofs; i++) {
+        int scale = o->dmix_scale[i];
+        int scale_inv = o->dmix_scale_inv[i];
+        c->dmix_scale[i] = mul15(c->dmix_scale[i], scale);
+        c->dmix_scale_inv[i] = mul16(c->dmix_scale_inv[i], scale_inv);
+        for (j = 0; j < c->nchannels; j++) {
+            int coeff = mul16(*coeff_ptr, scale_inv);
+            *coeff_ptr++ = mul15(coeff, o->dmix_scale[c->hier_ofs + j]);
+        }
+    }
+}
+
+static int parse_sub_headers(DCAXllDecoder *s, DCAExssAsset *asset)
+{
+    DCAContext *dca = s->avctx->priv_data;
+    DCAXllChSet *c;
+    int i, ret;
+
+    // Parse channel set headers
+    s->nfreqbands = 0;
+    s->nchannels = 0;
+    s->nreschsets = 0;
+    for (i = 0, c = s->chset; i < s->nchsets; i++, c++) {
+        c->hier_ofs = s->nchannels;
+        if ((ret = chs_parse_header(s, c, asset)) < 0)
+            return ret;
+        if (c->nfreqbands > s->nfreqbands)
+            s->nfreqbands = c->nfreqbands;
+        if (c->hier_chset)
+            s->nchannels += c->nchannels;
+        if (c->residual_encode != (1 << c->nchannels) - 1)
+            s->nreschsets++;
+    }
+
+    // Pre-scale downmixing coefficients for all non-primary channel sets
+    for (i = s->nchsets - 1, c = &s->chset[i]; i > 0; i--, c--) {
+        if (is_hier_dmix_chset(c)) {
+            DCAXllChSet *o = find_next_hier_dmix_chset(s, c);
+            if (o)
+                prescale_down_mix(c, o);
+        }
+    }
+
+    // Determine number of active channel sets to decode
+    switch (dca->request_channel_layout) {
+    case DCA_SPEAKER_LAYOUT_STEREO:
+        s->nactivechsets = 1;
+        break;
+    case DCA_SPEAKER_LAYOUT_5POINT0:
+    case DCA_SPEAKER_LAYOUT_5POINT1:
+        s->nactivechsets = (s->chset[0].nchannels < 5 && s->nchsets > 1) ? 2 : 1;
+        break;
+    default:
+        s->nactivechsets = s->nchsets;
+        break;
+    }
+
+    return 0;
+}
+
+static int parse_navi_table(DCAXllDecoder *s)
+{
+    int chs, seg, band, navi_nb, navi_pos, *navi_ptr;
+    DCAXllChSet *c;
+
+    // Determine size of NAVI table
+    navi_nb = s->nfreqbands * s->nframesegs * s->nchsets;
+    if (navi_nb > 1024) {
+        av_log(s->avctx, AV_LOG_ERROR, "Too many NAVI entries (%d)\n", navi_nb);
+        return AVERROR_INVALIDDATA;
+    }
+
+    // Reallocate NAVI table
+    av_fast_malloc(&s->navi, &s->navi_size, navi_nb * sizeof(*s->navi));
+    if (!s->navi)
+        return AVERROR(ENOMEM);
+
+    // Parse NAVI
+    navi_pos = get_bits_count(&s->gb);
+    navi_ptr = s->navi;
+    for (band = 0; band < s->nfreqbands; band++) {
+        for (seg = 0; seg < s->nframesegs; seg++) {
+            for (chs = 0, c = s->chset; chs < s->nchsets; chs++, c++) {
+                int size = 0;
+                if (c->nfreqbands > band) {
+                    size = get_bits_long(&s->gb, s->seg_size_nbits);
+                    if (size < 0 || size >= s->frame_size) {
+                        av_log(s->avctx, AV_LOG_ERROR, "Invalid NAVI segment size (%d bytes)\n", size);
+                        return AVERROR_INVALIDDATA;
                     }
+                    size++;
                 }
-                if (done > 8 * chset->lsb_fsize[0]) {
-                    av_log(s->avctx, AV_LOG_ERROR,
-                           "chset lsb exceeds lsb_size\n");
-                    return AVERROR_INVALIDDATA;
-                }
+                *navi_ptr++ = size;
             }
+        }
+    }
+
+    // Byte align
+    // CRC16
+    skip_bits(&s->gb, -get_bits_count(&s->gb) & 7);
+    skip_bits(&s->gb, 16);
+
+    // Check CRC
+    if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL))
+        && ff_dca_check_crc(&s->gb, navi_pos, get_bits_count(&s->gb))) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid NAVI checksum\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
 
-            /* Store output. */
-            for (i = 0; i < chset->channels; i++) {
-                int *sample_buf = s->xll_sample_buf +
-                                  (in_channel + i) * s->xll_smpl_in_seg;
-                int shift = 1 - chset->bit_resolution;
-                int out_channel = chset->orig_chan_order[0][i];
-                float *out;
-
-                /* XLL uses the channel order C, L, R, and we want L,
-                 * R, C. FIXME: Generalize. */
-                if (chset->ch_mask_enabled &&
-                    (chset->ch_mask & 7) == 7 && out_channel < 3)
-                    out_channel = out_channel ? out_channel - 1 : 2;
-
-                out_channel += in_channel;
-                if (out_channel >= s->avctx->channels)
-                    continue;
-
-                out  = (float *) frame->extended_data[out_channel];
-                out += seg * s->xll_smpl_in_seg;
-
-                /* NOTE: A one bit means residual encoding is *not* used. */
-                if ((chset->residual_encode >> i) & 1) {
-                    /* Replace channel samples.
-                     * FIXME: Most likely not the right thing to do. */
-                    for (j = 0; j < s->xll_smpl_in_seg; j++)
-                        out[j] = ldexpf(sample_buf[j], shift);
-                } else {
-                    /* Add residual signal to core channel */
-                    for (j = 0; j < s->xll_smpl_in_seg; j++)
-                        out[j] += ldexpf(sample_buf[j], shift);
+static int parse_band_data(DCAXllDecoder *s)
+{
+    int ret, chs, seg, band, navi_pos, *navi_ptr;
+    DCAXllChSet *c;
+
+    for (chs = 0, c = s->chset; chs < s->nactivechsets; chs++, c++) {
+        if ((ret = chs_alloc_msb_band_data(s, c)) < 0)
+            return ret;
+        if ((ret = chs_alloc_lsb_band_data(s, c)) < 0)
+            return ret;
+    }
+
+    navi_pos = get_bits_count(&s->gb);
+    navi_ptr = s->navi;
+    for (band = 0; band < s->nfreqbands; band++) {
+        for (seg = 0; seg < s->nframesegs; seg++) {
+            for (chs = 0, c = s->chset; chs < s->nchsets; chs++, c++) {
+                if (c->nfreqbands > band) {
+                    navi_pos += *navi_ptr * 8;
+                    if (navi_pos > s->gb.size_in_bits) {
+                        av_log(s->avctx, AV_LOG_ERROR, "Invalid NAVI position\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                    if (chs < s->nactivechsets &&
+                        (ret = chs_parse_band_data(s, c, band, seg, navi_pos)) < 0) {
+                        if (s->avctx->err_recognition & AV_EF_EXPLODE)
+                            return ret;
+                        chs_clear_band_data(s, c, band, seg);
+                    }
+                    s->gb.index = navi_pos;
                 }
+                navi_ptr++;
             }
+        }
+    }
 
-            if (chset->downmix_coeff_code_embedded &&
-                !chset->primary_ch_set && chset->hier_chset) {
-                /* Undo hierarchical downmix of earlier channels. */
-                unsigned mix_channel;
-                for (mix_channel = 0; mix_channel < in_channel; mix_channel++) {
-                    float *mix_buf;
-                    const int *col;
-                    float coeff;
-                    unsigned row;
-                    /* Similar channel reorder C, L, R vs L, R, C reorder. */
-                    if (chset->ch_mask_enabled &&
-                        (chset->ch_mask & 7) == 7 && mix_channel < 3)
-                        mix_buf = (float *) frame->extended_data[mix_channel ? mix_channel - 1 : 2];
-                    else
-                        mix_buf = (float *) frame->extended_data[mix_channel];
-
-                    mix_buf += seg * s->xll_smpl_in_seg;
-                    col = &chset->downmix_coeffs[mix_channel * (chset->channels + 1)];
-
-                    /* Scale */
-                    coeff = ldexpf(col[0], -16);
-                    for (j = 0; j < s->xll_smpl_in_seg; j++)
-                        mix_buf[j] *= coeff;
-
-                    for (row = 0;
-                         row < chset->channels && in_channel + row < s->avctx->channels;
-                         row++)
-                        if (col[row + 1]) {
-                            const float *new_channel =
-                                (const float *) frame->extended_data[in_channel + row];
-                            new_channel += seg * s->xll_smpl_in_seg;
-                            coeff        = ldexpf(col[row + 1], -15);
-                            for (j = 0; j < s->xll_smpl_in_seg; j++)
-                                mix_buf[j] -= coeff * new_channel[j];
-                        }
+    return 0;
+}
+
+static int parse_frame(DCAXllDecoder *s, uint8_t *data, int size, DCAExssAsset *asset)
+{
+    int ret;
+
+    if ((ret = init_get_bits8(&s->gb, data, size)) < 0)
+        return ret;
+    if ((ret = parse_common_header(s)) < 0)
+        return ret;
+    if ((ret = parse_sub_headers(s, asset)) < 0)
+        return ret;
+    if ((ret = parse_navi_table(s)) < 0)
+        return ret;
+    if ((ret = parse_band_data(s)) < 0)
+        return ret;
+    if (ff_dca_seek_bits(&s->gb, s->frame_size * 8)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL frame\n");
+        return AVERROR_INVALIDDATA;
+    }
+    return ret;
+}
+
+static void clear_pbr(DCAXllDecoder *s)
+{
+    s->pbr_length = 0;
+    s->pbr_delay = 0;
+}
+
+static int copy_to_pbr(DCAXllDecoder *s, uint8_t *data, int size, int delay)
+{
+    if (size > DCA_XLL_PBR_BUFFER_MAX)
+        return AVERROR(ENOSPC);
+
+    if (!s->pbr_buffer && !(s->pbr_buffer = av_malloc(DCA_XLL_PBR_BUFFER_MAX + DCA_BUFFER_PADDING_SIZE)))
+        return AVERROR(ENOMEM);
+
+    memcpy(s->pbr_buffer, data, size);
+    s->pbr_length = size;
+    s->pbr_delay = delay;
+    return 0;
+}
+
+static int parse_frame_no_pbr(DCAXllDecoder *s, uint8_t *data, int size, DCAExssAsset *asset)
+{
+    int ret = parse_frame(s, data, size, asset);
+
+    // If XLL packet data didn't start with a sync word, we must have jumped
+    // right into the middle of PBR smoothing period
+    if (ret == AVERROR(EAGAIN) && asset->xll_sync_present && asset->xll_sync_offset < size) {
+        // Skip to the next sync word in this packet
+        data += asset->xll_sync_offset;
+        size -= asset->xll_sync_offset;
+
+        // If decoding delay is set, put the frame into PBR buffer and return
+        // failure code. Higher level decoder is expected to switch to lossy
+        // core decoding or mute its output until decoding delay expires.
+        if (asset->xll_delay_nframes > 0) {
+            if ((ret = copy_to_pbr(s, data, size, asset->xll_delay_nframes)) < 0)
+                return ret;
+            return AVERROR(EAGAIN);
+        }
+
+        // No decoding delay, just parse the frame in place
+        ret = parse_frame(s, data, size, asset);
+    }
+
+    if (ret < 0)
+        return ret;
+
+    if (s->frame_size > size)
+        return AVERROR(EINVAL);
+
+    // If the XLL decoder didn't consume full packet, start PBR smoothing period
+    if (s->frame_size < size)
+        if ((ret = copy_to_pbr(s, data + s->frame_size, size - s->frame_size, 0)) < 0)
+            return ret;
+
+    return 0;
+}
+
+static int parse_frame_pbr(DCAXllDecoder *s, uint8_t *data, int size, DCAExssAsset *asset)
+{
+    int ret;
+
+    if (size > DCA_XLL_PBR_BUFFER_MAX - s->pbr_length) {
+        ret = AVERROR(ENOSPC);
+        goto fail;
+    }
+
+    memcpy(s->pbr_buffer + s->pbr_length, data, size);
+    s->pbr_length += size;
+
+    // Respect decoding delay after synchronization error
+    if (s->pbr_delay > 0 && --s->pbr_delay)
+        return AVERROR(EAGAIN);
+
+    if ((ret = parse_frame(s, s->pbr_buffer, s->pbr_length, asset)) < 0)
+        goto fail;
+
+    if (s->frame_size > s->pbr_length) {
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    if (s->frame_size == s->pbr_length) {
+        // End of PBR smoothing period
+        clear_pbr(s);
+    } else {
+        s->pbr_length -= s->frame_size;
+        memmove(s->pbr_buffer, s->pbr_buffer + s->frame_size, s->pbr_length);
+    }
+
+    return 0;
+
+fail:
+    // For now, throw out all PBR state on failure.
+    // Perhaps we can be smarter and try to resync somehow.
+    clear_pbr(s);
+    return ret;
+}
+
+int ff_dca_xll_parse(DCAXllDecoder *s, uint8_t *data, DCAExssAsset *asset)
+{
+    int ret;
+
+    if (s->hd_stream_id != asset->hd_stream_id) {
+        clear_pbr(s);
+        s->hd_stream_id = asset->hd_stream_id;
+    }
+
+    if (s->pbr_length)
+        ret = parse_frame_pbr(s, data + asset->xll_offset, asset->xll_size, asset);
+    else
+        ret = parse_frame_no_pbr(s, data + asset->xll_offset, asset->xll_size, asset);
+
+    return ret;
+}
+
+static void undo_down_mix(DCAXllDecoder *s, DCAXllChSet *o, int band)
+{
+    int i, j, k, nchannels = 0, *coeff_ptr = o->dmix_coeff;
+    DCAXllChSet *c;
+
+    for (i = 0, c = s->chset; i < s->nactivechsets; i++, c++) {
+        if (!c->hier_chset)
+            continue;
+
+        av_assert1(band < c->nfreqbands);
+        for (j = 0; j < c->nchannels; j++) {
+            for (k = 0; k < o->nchannels; k++) {
+                int coeff = *coeff_ptr++;
+                if (coeff) {
+                    s->dcadsp->dmix_sub(c->bands[band].msb_sample_buffer[j],
+                                        o->bands[band].msb_sample_buffer[k],
+                                        coeff, s->nframesamples);
+                    if (band)
+                        s->dcadsp->dmix_sub(c->deci_history[j],
+                                            o->deci_history[k],
+                                            coeff, DCA_XLL_DECI_HISTORY_MAX);
                 }
             }
+        }
 
-next_chset:
-            in_channel += chset->channels;
-            /* Skip to next channel set using the NAVI info. */
-            i = get_bits_count(gb);
-            if (i > end_pos) {
-                av_log(s->avctx, AV_LOG_ERROR,
-                       "chset data exceeds NAVI size\n");
-                return AVERROR_INVALIDDATA;
+        nchannels += c->nchannels;
+        if (nchannels >= o->hier_ofs)
+            break;
+    }
+}
+
+static void scale_down_mix(DCAXllDecoder *s, DCAXllChSet *o, int band)
+{
+    int i, j, nchannels = 0;
+    DCAXllChSet *c;
+
+    for (i = 0, c = s->chset; i < s->nactivechsets; i++, c++) {
+        if (!c->hier_chset)
+            continue;
+
+        av_assert1(band < c->nfreqbands);
+        for (j = 0; j < c->nchannels; j++) {
+            int scale = o->dmix_scale[nchannels++];
+            if (scale != (1 << 15)) {
+                s->dcadsp->dmix_scale(c->bands[band].msb_sample_buffer[j],
+                                      scale, s->nframesamples);
+                if (band)
+                    s->dcadsp->dmix_scale(c->deci_history[j],
+                                          scale, DCA_XLL_DECI_HISTORY_MAX);
             }
-            if (i < end_pos)
-                skip_bits_long(gb, end_pos - i);
+        }
+
+        if (nchannels >= o->hier_ofs)
+            break;
+    }
+}
+
+// Clear all band data and replace non-residual encoded channels with lossy
+// counterparts
+static void av_cold force_lossy_output(DCAXllDecoder *s, DCAXllChSet *c)
+{
+    DCAContext *dca = s->avctx->priv_data;
+    int band, ch;
+
+    for (band = 0; band < c->nfreqbands; band++)
+        chs_clear_band_data(s, c, band, -1);
+
+    for (ch = 0; ch < c->nchannels; ch++) {
+        if (!(c->residual_encode & (1 << ch)))
+            continue;
+        if (ff_dca_core_map_spkr(&dca->core, c->ch_remap[ch]) < 0)
+            continue;
+        c->residual_encode &= ~(1 << ch);
+    }
+}
+
+static int combine_residual_frame(DCAXllDecoder *s, DCAXllChSet *c)
+{
+    DCAContext *dca = s->avctx->priv_data;
+    int ch, nsamples = s->nframesamples;
+    DCAXllChSet *o;
+
+    // Verify that core is compatible
+    if (!(dca->packet & DCA_PACKET_CORE)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Residual encoded channels are present without core\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (c->freq != dca->core.output_rate) {
+        av_log(s->avctx, AV_LOG_WARNING, "Sample rate mismatch between core (%d Hz) and XLL (%d Hz)\n", dca->core.output_rate, c->freq);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (nsamples != dca->core.npcmsamples) {
+        av_log(s->avctx, AV_LOG_WARNING, "Number of samples per frame mismatch between core (%d) and XLL (%d)\n", dca->core.npcmsamples, nsamples);
+        return AVERROR_INVALIDDATA;
+    }
+
+    // See if this channel set is downmixed and find the next channel set in
+    // hierarchy. If downmixed, undo core pre-scaling before combining with
+    // residual (residual is not scaled).
+    o = find_next_hier_dmix_chset(s, c);
+
+    // Reduce core bit width and combine with residual
+    for (ch = 0; ch < c->nchannels; ch++) {
+        int n, spkr, shift, round;
+        int32_t *src, *dst;
+
+        if (c->residual_encode & (1 << ch))
+            continue;
+
+        // Map this channel to core speaker
+        spkr = ff_dca_core_map_spkr(&dca->core, c->ch_remap[ch]);
+        if (spkr < 0) {
+            av_log(s->avctx, AV_LOG_WARNING, "Residual encoded channel (%d) references unavailable core channel\n", c->ch_remap[ch]);
+            return AVERROR_INVALIDDATA;
+        }
+
+        // Account for LSB width
+        shift = 24 - c->pcm_bit_res + chs_get_lsb_width(s, c, 0, ch);
+        if (shift > 24) {
+            av_log(s->avctx, AV_LOG_WARNING, "Invalid core shift (%d bits)\n", shift);
+            return AVERROR_INVALIDDATA;
+        }
+
+        round = shift > 0 ? 1 << (shift - 1) : 0;
+
+        src = dca->core.output_samples[spkr];
+        dst = c->bands[0].msb_sample_buffer[ch];
+        if (o) {
+            // Undo embedded core downmix pre-scaling
+            int scale_inv = o->dmix_scale_inv[c->hier_ofs + ch];
+            for (n = 0; n < nsamples; n++)
+                dst[n] += clip23((mul16(src[n], scale_inv) + round) >> shift);
+        } else {
+            // No downmix scaling
+            for (n = 0; n < nsamples; n++)
+                dst[n] += (src[n] + round) >> shift;
         }
     }
+
     return 0;
 }
+
+int ff_dca_xll_filter_frame(DCAXllDecoder *s, AVFrame *frame)
+{
+    AVCodecContext *avctx = s->avctx;
+    DCAContext *dca = avctx->priv_data;
+    DCAExssAsset *asset = &dca->exss.assets[0];
+    DCAXllChSet *p = &s->chset[0], *c;
+    enum AVMatrixEncoding matrix_encoding = AV_MATRIX_ENCODING_NONE;
+    int i, j, k, ret, shift, nsamples, request_mask;
+    int ch_remap[DCA_SPEAKER_COUNT];
+
+    // Force lossy downmixed output during recovery
+    if (dca->packet & DCA_PACKET_RECOVERY) {
+        for (i = 0, c = s->chset; i < s->nchsets; i++, c++) {
+            if (i < s->nactivechsets)
+                force_lossy_output(s, c);
+
+            if (!c->primary_chset)
+                c->dmix_embedded = 0;
+        }
+
+        s->scalable_lsbs = 0;
+        s->fixed_lsb_width = 0;
+    }
+
+    // Filter frequency bands for active channel sets
+    s->output_mask = 0;
+    for (i = 0, c = s->chset; i < s->nactivechsets; i++, c++) {
+        chs_filter_band_data(s, c, 0);
+
+        if (c->residual_encode != (1 << c->nchannels) - 1
+            && (ret = combine_residual_frame(s, c)) < 0)
+            return ret;
+
+        if (s->scalable_lsbs)
+            chs_assemble_msbs_lsbs(s, c, 0);
+
+        if (c->nfreqbands > 1) {
+            chs_filter_band_data(s, c, 1);
+            chs_assemble_msbs_lsbs(s, c, 1);
+        }
+
+        s->output_mask |= c->ch_mask;
+    }
+
+    // Undo hierarchial downmix and/or apply scaling
+    for (i = 1, c = &s->chset[1]; i < s->nchsets; i++, c++) {
+        if (!is_hier_dmix_chset(c))
+            continue;
+
+        if (i >= s->nactivechsets) {
+            for (j = 0; j < c->nfreqbands; j++)
+                if (c->bands[j].dmix_embedded)
+                    scale_down_mix(s, c, j);
+            break;
+        }
+
+        for (j = 0; j < c->nfreqbands; j++)
+            if (c->bands[j].dmix_embedded)
+                undo_down_mix(s, c, j);
+    }
+
+    // Assemble frequency bands for active channel sets
+    if (s->nfreqbands > 1) {
+        for (i = 0; i < s->nactivechsets; i++)
+            if ((ret = chs_assemble_freq_bands(s, &s->chset[i])) < 0)
+                return ret;
+    }
+
+    // Normalize to regular 5.1 layout if downmixing
+    if (dca->request_channel_layout) {
+        if (s->output_mask & DCA_SPEAKER_MASK_Lss) {
+            s->output_samples[DCA_SPEAKER_Ls] = s->output_samples[DCA_SPEAKER_Lss];
+            s->output_mask = (s->output_mask & ~DCA_SPEAKER_MASK_Lss) | DCA_SPEAKER_MASK_Ls;
+        }
+        if (s->output_mask & DCA_SPEAKER_MASK_Rss) {
+            s->output_samples[DCA_SPEAKER_Rs] = s->output_samples[DCA_SPEAKER_Rss];
+            s->output_mask = (s->output_mask & ~DCA_SPEAKER_MASK_Rss) | DCA_SPEAKER_MASK_Rs;
+        }
+    }
+
+    // Handle downmixing to stereo request
+    if (dca->request_channel_layout == DCA_SPEAKER_LAYOUT_STEREO
+        && DCA_HAS_STEREO(s->output_mask) && p->dmix_embedded
+        && (p->dmix_type == DCA_DMIX_TYPE_LoRo ||
+            p->dmix_type == DCA_DMIX_TYPE_LtRt))
+        request_mask = DCA_SPEAKER_LAYOUT_STEREO;
+    else
+        request_mask = s->output_mask;
+    if (!ff_dca_set_channel_layout(avctx, ch_remap, request_mask))
+        return AVERROR(EINVAL);
+
+    avctx->sample_rate = p->freq << (s->nfreqbands - 1);
+
+    switch (p->storage_bit_res) {
+    case 16:
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
+        break;
+    case 24:
+        avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
+        break;
+    default:
+        return AVERROR(EINVAL);
+    }
+
+    avctx->bits_per_raw_sample = p->storage_bit_res;
+    avctx->profile = FF_PROFILE_DTS_HD_MA;
+    avctx->bit_rate = 0;
+
+    frame->nb_samples = nsamples = s->nframesamples << (s->nfreqbands - 1);
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    // Downmix primary channel set to stereo
+    if (request_mask != s->output_mask) {
+        ff_dca_downmix_to_stereo_fixed(s->dcadsp, s->output_samples,
+                                       p->dmix_coeff, nsamples,
+                                       s->output_mask);
+    }
+
+    shift = p->storage_bit_res - p->pcm_bit_res;
+    for (i = 0; i < avctx->channels; i++) {
+        int32_t *samples = s->output_samples[ch_remap[i]];
+        if (frame->format == AV_SAMPLE_FMT_S16P) {
+            int16_t *plane = (int16_t *)frame->extended_data[i];
+            for (k = 0; k < nsamples; k++)
+                plane[k] = av_clip_int16(samples[k] * (1 << shift));
+        } else {
+            int32_t *plane = (int32_t *)frame->extended_data[i];
+            for (k = 0; k < nsamples; k++)
+                plane[k] = clip23(samples[k] * (1 << shift)) * (1 << 8);
+        }
+    }
+
+    if (!asset->one_to_one_map_ch_to_spkr) {
+        if (asset->representation_type == DCA_REPR_TYPE_LtRt)
+            matrix_encoding = AV_MATRIX_ENCODING_DOLBY;
+        else if (asset->representation_type == DCA_REPR_TYPE_LhRh)
+            matrix_encoding = AV_MATRIX_ENCODING_DOLBYHEADPHONE;
+    } else if (request_mask != s->output_mask && p->dmix_type == DCA_DMIX_TYPE_LtRt) {
+        matrix_encoding = AV_MATRIX_ENCODING_DOLBY;
+    }
+    if ((ret = ff_side_data_update_matrix_encoding(frame, matrix_encoding)) < 0)
+        return ret;
+
+    return 0;
+}
+
+av_cold void ff_dca_xll_flush(DCAXllDecoder *s)
+{
+    clear_pbr(s);
+}
+
+av_cold void ff_dca_xll_close(DCAXllDecoder *s)
+{
+    DCAXllChSet *c;
+    int i, j;
+
+    for (i = 0, c = s->chset; i < DCA_XLL_CHSETS_MAX; i++, c++) {
+        for (j = 0; j < DCA_XLL_SAMPLE_BUFFERS_MAX; j++) {
+            av_freep(&c->sample_buffer[j]);
+            c->sample_size[j] = 0;
+        }
+    }
+
+    av_freep(&s->navi);
+    s->navi_size = 0;
+
+    av_freep(&s->pbr_buffer);
+    clear_pbr(s);
+}
diff --git a/libavcodec/dca_xll.h b/libavcodec/dca_xll.h
new file mode 100644
index 00000000..bc0aa65b
--- /dev/null
+++ b/libavcodec/dca_xll.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2016 foo86
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DCA_XLL_H
+#define AVCODEC_DCA_XLL_H
+
+#include "libavutil/common.h"
+#include "libavutil/mem.h"
+
+#include "avcodec.h"
+#include "internal.h"
+#include "get_bits.h"
+#include "dca.h"
+#include "dcadsp.h"
+#include "dca_exss.h"
+
+#define DCA_XLL_CHSETS_MAX              3
+#define DCA_XLL_CHANNELS_MAX            8
+#define DCA_XLL_BANDS_MAX               2
+#define DCA_XLL_ADAPT_PRED_ORDER_MAX    16
+#define DCA_XLL_DECI_HISTORY_MAX        8
+#define DCA_XLL_DMIX_SCALES_MAX         ((DCA_XLL_CHSETS_MAX - 1) * DCA_XLL_CHANNELS_MAX)
+#define DCA_XLL_DMIX_COEFFS_MAX         (DCA_XLL_DMIX_SCALES_MAX * DCA_XLL_CHANNELS_MAX)
+#define DCA_XLL_PBR_BUFFER_MAX          (240 << 10)
+#define DCA_XLL_SAMPLE_BUFFERS_MAX      3
+
+typedef struct DCAXllBand {
+    int     decor_enabled;                          ///< Pairwise channel decorrelation flag
+    int     orig_order[DCA_XLL_CHANNELS_MAX];       ///< Original channel order
+    int     decor_coeff[DCA_XLL_CHANNELS_MAX / 2];  ///< Pairwise channel coefficients
+
+    int     adapt_pred_order[DCA_XLL_CHANNELS_MAX]; ///< Adaptive predictor order
+    int     highest_pred_order;                     ///< Highest adaptive predictor order
+    int     fixed_pred_order[DCA_XLL_CHANNELS_MAX]; ///< Fixed predictor order
+    int     adapt_refl_coeff[DCA_XLL_CHANNELS_MAX][DCA_XLL_ADAPT_PRED_ORDER_MAX];   ///< Adaptive predictor reflection coefficients
+
+    int     dmix_embedded;  ///< Downmix performed by encoder in frequency band
+
+    int     lsb_section_size;                       ///< Size of LSB section in any segment
+    int     nscalablelsbs[DCA_XLL_CHANNELS_MAX];    ///< Number of bits to represent the samples in LSB part
+    int     bit_width_adjust[DCA_XLL_CHANNELS_MAX]; ///< Number of bits discarded by authoring
+
+    int32_t *msb_sample_buffer[DCA_XLL_CHANNELS_MAX];   ///< MSB sample buffer pointers
+    int32_t *lsb_sample_buffer[DCA_XLL_CHANNELS_MAX];   ///< LSB sample buffer pointers or NULL
+} DCAXllBand;
+
+typedef struct DCAXllChSet {
+    // Channel set header
+    int     nchannels;          ///< Number of channels in the channel set (N)
+    int     residual_encode;    ///< Residual encoding mask (0 - residual, 1 - full channel)
+    int     pcm_bit_res;        ///< PCM bit resolution (variable)
+    int     storage_bit_res;    ///< Storage bit resolution (16 or 24)
+    int     freq;               ///< Original sampling frequency (max. 96000 Hz)
+
+    int     primary_chset;          ///< Primary channel set flag
+    int     dmix_coeffs_present;    ///< Downmix coefficients present in stream
+    int     dmix_embedded;          ///< Downmix already performed by encoder
+    int     dmix_type;              ///< Primary channel set downmix type
+    int     hier_chset;             ///< Whether the channel set is part of a hierarchy
+    int     hier_ofs;               ///< Number of preceding channels in a hierarchy (M)
+    int     dmix_coeff[DCA_XLL_DMIX_COEFFS_MAX];       ///< Downmixing coefficients
+    int     dmix_scale[DCA_XLL_DMIX_SCALES_MAX];       ///< Downmixing scales
+    int     dmix_scale_inv[DCA_XLL_DMIX_SCALES_MAX];   ///< Inverse downmixing scales
+    int     ch_mask;                ///< Channel mask for set
+    int     ch_remap[DCA_XLL_CHANNELS_MAX];    ///< Channel to speaker map
+
+    int     nfreqbands; ///< Number of frequency bands (1 or 2)
+    int     nabits;     ///< Number of bits to read bit allocation coding parameter
+
+    DCAXllBand     bands[DCA_XLL_BANDS_MAX];   ///< Frequency bands
+
+    // Frequency band coding parameters
+    int     seg_common;                                     ///< Segment type
+    int     rice_code_flag[DCA_XLL_CHANNELS_MAX];           ///< Rice coding flag
+    int     bitalloc_hybrid_linear[DCA_XLL_CHANNELS_MAX];   ///< Binary code length for isolated samples
+    int     bitalloc_part_a[DCA_XLL_CHANNELS_MAX];          ///< Coding parameter for part A of segment
+    int     bitalloc_part_b[DCA_XLL_CHANNELS_MAX];          ///< Coding parameter for part B of segment
+    int     nsamples_part_a[DCA_XLL_CHANNELS_MAX];          ///< Number of samples in part A of segment
+
+    // Decimator history
+    DECLARE_ALIGNED(32, int32_t, deci_history)[DCA_XLL_CHANNELS_MAX][DCA_XLL_DECI_HISTORY_MAX]; ///< Decimator history for frequency band 1
+
+    // Sample buffers
+    unsigned int    sample_size[DCA_XLL_SAMPLE_BUFFERS_MAX];
+    int32_t         *sample_buffer[DCA_XLL_SAMPLE_BUFFERS_MAX];
+} DCAXllChSet;
+
+typedef struct DCAXllDecoder {
+    AVCodecContext  *avctx;
+    GetBitContext   gb;
+
+    int     frame_size;             ///< Number of bytes in a lossless frame
+    int     nchsets;                ///< Number of channels sets per frame
+    int     nframesegs;             ///< Number of segments per frame
+    int     nsegsamples_log2;       ///< log2(nsegsamples)
+    int     nsegsamples;            ///< Samples in segment per one frequency band
+    int     nframesamples_log2;     ///< log2(nframesamples)
+    int     nframesamples;          ///< Samples in frame per one frequency band
+    int     seg_size_nbits;         ///< Number of bits used to read segment size
+    int     band_crc_present;       ///< Presence of CRC16 within each frequency band
+    int     scalable_lsbs;          ///< MSB/LSB split flag
+    int     ch_mask_nbits;          ///< Number of bits used to read channel mask
+    int     fixed_lsb_width;        ///< Fixed LSB width
+
+    DCAXllChSet    chset[DCA_XLL_CHSETS_MAX]; ///< Channel sets
+
+    int             *navi;          ///< NAVI table
+    unsigned int    navi_size;
+
+    int     nfreqbands;     ///< Highest number of frequency bands
+    int     nchannels;      ///< Total number of channels in a hierarchy
+    int     nreschsets;     ///< Number of channel sets that have residual encoded channels
+    int     nactivechsets;  ///< Number of active channel sets to decode
+
+    int     hd_stream_id;   ///< Previous DTS-HD stream ID for detecting changes
+
+    uint8_t     *pbr_buffer;        ///< Peak bit rate (PBR) smoothing buffer
+    int         pbr_length;         ///< Length in bytes of data currently buffered
+    int         pbr_delay;          ///< Delay in frames before decoding buffered data
+
+    DCADSPContext   *dcadsp;
+
+    int     output_mask;
+    int32_t *output_samples[DCA_SPEAKER_COUNT];
+} DCAXllDecoder;
+
+int ff_dca_xll_parse(DCAXllDecoder *s, uint8_t *data, DCAExssAsset *asset);
+int ff_dca_xll_filter_frame(DCAXllDecoder *s, AVFrame *frame);
+av_cold void ff_dca_xll_flush(DCAXllDecoder *s);
+av_cold void ff_dca_xll_close(DCAXllDecoder *s);
+
+#endif
diff --git a/libavcodec/dcadata.c b/libavcodec/dcadata.c
index 5d7d5943..e9911347 100644
--- a/libavcodec/dcadata.c
+++ b/libavcodec/dcadata.c
@@ -22,7 +22,6 @@
 
 #include <stdint.h>
 
-#include "libavutil/channel_layout.h"
 #include "libavutil/mem.h"
 
 #include "dca.h"
@@ -43,8 +42,12 @@ const uint8_t ff_dca_channels[16] = {
     1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 6, 6, 6, 7, 8, 8
 };
 
-const uint8_t ff_dca_bits_per_sample[7] = {
-    16, 16, 20, 20, 0, 24, 24
+const uint8_t ff_dca_bits_per_sample[8] = {
+    16, 16, 20, 20, 0, 24, 24, 0
+};
+
+const uint8_t ff_dca_dmix_primary_nch[8] = {
+    1, 2, 2, 3, 3, 4, 4, 0
 };
 
 /* ADPCM data */
@@ -4181,6 +4184,37 @@ const uint32_t ff_dca_scale_factor_quant7[128] = {
     5011872, 5688529, 6456542, 7328245, 8317638,       0,       0,       0
 };
 
+const uint32_t ff_dca_joint_scale_factors[129] = {
+       3288,    3490,    3691,    3909,    4144,    4387,    4647,    4924,
+       5218,    5528,    5855,    6199,    6568,    6963,    7374,    7810,
+       8271,    8758,    9278,    9831,   10410,   11031,   11685,   12373,
+      13103,   13883,   14705,   15578,   16500,   17482,   18514,   19613,
+      20770,   22003,   23312,   24688,   26156,   27699,   29343,   31080,
+      32925,   34871,   36943,   39133,   41448,   43906,   46506,   49258,
+      52177,   55273,   58544,   62017,   65691,   69584,   73711,   78073,
+      82703,   87602,   92795,   98289,  104111,  110285,  116820,  123740,
+     131072,  138840,  147069,  155776,  165012,  174785,  185145,  196117,
+     207735,  220042,  233086,  246894,  261523,  277017,  293434,  310823,
+     329236,  348748,  369409,  391303,  414490,  439043,  465064,  492621,
+     521805,  552725,  585475,  620170,  656920,  695843,  737073,  780745,
+     827008,  876014,  927923,  982902, 1041144, 1102834, 1168181, 1237404,
+    1310720, 1388382, 1470649, 1557790, 1650098, 1747876, 1851441, 1961147,
+    2077355, 2200441, 2330825, 2468935, 2615232, 2770195, 2934335, 3108206,
+    3292378, 3487463, 3694108, 3913000, 4144862, 4390455, 4650611, 4926176,
+    5218066
+};
+
+const uint32_t ff_dca_scale_factor_adj[4] = {
+    4194304, 4718592, 5242880, 6029312
+};
+
+const uint32_t ff_dca_quant_levels[32] = {
+          1,       3,       5,     7,      9,     13,     17,      25,
+         32,      64,     128,   256,    512,   1024,   2048,    4096,
+       8192,   16384,   32768, 65536, 131072, 262144, 524288, 1048576,
+    2097152, 4194304, 8388608,     0,      0,      0,      0,       0
+};
+
 /* 20bits unsigned fractional binary codes */
 const uint32_t ff_dca_lossy_quant[32] = {
          0, 6710886, 4194304, 3355443, 2474639, 2097152, 1761608, 1426063,
@@ -4189,13 +4223,6 @@ const uint32_t ff_dca_lossy_quant[32] = {
         84,      42,      21,       0,       0,       0,       0,       0
 };
 
-const float ff_dca_lossy_quant_d[32] = {
-          0,     1.6,      1.0,     0.8,    0.59,    0.50,    0.42,    0.34,
-       0.19,    0.11,     0.06,   0.035,   0.019,   0.011,  0.0065,  0.0040,
-     0.0025,  0.0014,   0.0008, 0.00045, 0.00030, 0.00017, 0.00008, 0.00004,
-    0.00002, 0.00001, 0.000005,       0,       0,       0,       0,       0
-};
-
 /* 20bits unsigned fractional binary codes */
 const uint32_t ff_dca_lossless_quant[32] = {
          0, 4194304, 2097152, 1384120, 1048576, 696254, 524288, 348127,
@@ -4204,14 +4231,6 @@ const uint32_t ff_dca_lossless_quant[32] = {
          4,       2,       1,       0,       0,      0,      0,      0
 };
 
-const float ff_dca_lossless_quant_d[32] = {
-           0,      1.0,      0.5,     0.33,     0.25,    0.166,    0.125,
-       0.083,   0.0625,  0.03125,   0.0156, 7.874E-3, 3.922E-3, 1.957E-3,
-    9.775E-4, 4.885E-4, 2.442E-4, 1.221E-4, 6.104E-5, 3.052E-5, 1.526E-5,
-    7.629E-6, 3.815E-6, 1.907E-6, 9.537E-7, 4.768E-7, 2.384E-7,        0,
-           0,        0,        0,        0
-};
-
 /* Vector quantization tables */
 
 DECLARE_ALIGNED(8, const int8_t, ff_dca_high_freq_vq)[1024][32] = {
@@ -7524,76 +7543,6 @@ DECLARE_ALIGNED(16, const float, ff_dca_lfe_fir_128)[256] = {
 };
 #undef SCALE
 
-
-#define SCALE(c) ((float)(c) / (256.0f * 32768.0f * 8388608.0f))
-DECLARE_ALIGNED(16, const float, ff_dca_lfe_xll_fir_64)[256] = {
-    SCALE(   6103), SCALE(  52170), SCALE(-558064), SCALE(1592440),
-    SCALE(6290049), SCALE(1502534), SCALE(-546669), SCALE(  53047),
-    SCALE(   1930), SCALE(  51089), SCALE(-568920), SCALE(1683709),
-    SCALE(6286575), SCALE(1414057), SCALE(-534782), SCALE(  53729),
-    SCALE(   2228), SCALE(  49794), SCALE(-579194), SCALE(1776276),
-    SCALE(6279634), SCALE(1327070), SCALE(-522445), SCALE(  54228),
-    SCALE(   2552), SCALE(  48275), SCALE(-588839), SCALE(1870070),
-    SCALE(6269231), SCALE(1241632), SCALE(-509702), SCALE(  54550),
-    SCALE(   2904), SCALE(  46523), SCALE(-597808), SCALE(1965017),
-    SCALE(6255380), SCALE(1157798), SCALE(-496595), SCALE(  54708),
-    SCALE(   3287), SCALE(  44529), SCALE(-606054), SCALE(2061044),
-    SCALE(6238099), SCALE(1075621), SCALE(-483164), SCALE(  54710),
-    SCALE(   3704), SCALE(  42282), SCALE(-613529), SCALE(2158071),
-    SCALE(6217408), SCALE( 995149), SCALE(-469451), SCALE(  54566),
-    SCALE(   4152), SCALE(  39774), SCALE(-620186), SCALE(2256019),
-    SCALE(6193332), SCALE( 916430), SCALE(-455494), SCALE(  54285),
-    SCALE(   4631), SCALE(  36995), SCALE(-625976), SCALE(2354805),
-    SCALE(6165900), SCALE( 839507), SCALE(-441330), SCALE(  53876),
-    SCALE(   5139), SCALE(  33937), SCALE(-630850), SCALE(2454343),
-    SCALE(6135146), SCALE( 764419), SCALE(-426998), SCALE(  53348),
-    SCALE(   5682), SCALE(  30591), SCALE(-634759), SCALE(2554547),
-    SCALE(6101107), SCALE( 691203), SCALE(-412531), SCALE(  52711),
-    SCALE(   6264), SCALE(  26948), SCALE(-637655), SCALE(2655326),
-    SCALE(6063824), SCALE( 619894), SCALE(-397966), SCALE(  51972),
-    SCALE(   6886), SCALE(  23001), SCALE(-639488), SCALE(2756591),
-    SCALE(6023343), SCALE( 550521), SCALE(-383335), SCALE(  51140),
-    SCALE(   7531), SCALE(  18741), SCALE(-640210), SCALE(2858248),
-    SCALE(5979711), SCALE( 483113), SCALE(-368671), SCALE(  50224),
-    SCALE(   8230), SCALE(  14162), SCALE(-639772), SCALE(2960201),
-    SCALE(5932981), SCALE( 417692), SCALE(-354003), SCALE(  49231),
-    SCALE(   8959), SCALE(   9257), SCALE(-638125), SCALE(3062355),
-    SCALE(5883210), SCALE( 354281), SCALE(-339362), SCALE(  48168),
-    SCALE(   9727), SCALE(   4018), SCALE(-635222), SCALE(3164612),
-    SCALE(5830457), SCALE( 292897), SCALE(-324777), SCALE(  47044),
-    SCALE(  10535), SCALE(  -1558), SCALE(-631014), SCALE(3266872),
-    SCALE(5774785), SCALE( 233555), SCALE(-310273), SCALE(  45866),
-    SCALE(  11381), SCALE(  -7480), SCALE(-625455), SCALE(3369035),
-    SCALE(5716260), SCALE( 176267), SCALE(-295877), SCALE(  44640),
-    SCALE(  12267), SCALE( -13750), SCALE(-618499), SCALE(3471000),
-    SCALE(5654952), SCALE( 121042), SCALE(-281613), SCALE(  43373),
-    SCALE(  13190), SCALE( -20372), SCALE(-610098), SCALE(3572664),
-    SCALE(5590933), SCALE(  67886), SCALE(-267505), SCALE(  42072),
-    SCALE(  14152), SCALE( -27352), SCALE(-600209), SCALE(3673924),
-    SCALE(5524280), SCALE(  16800), SCALE(-253574), SCALE(  40743),
-    SCALE(  15153), SCALE( -34691), SCALE(-588788), SCALE(3774676),
-    SCALE(5455069), SCALE( -32214), SCALE(-239840), SCALE(  39391),
-    SCALE(  16192), SCALE( -42390), SCALE(-575791), SCALE(3874816),
-    SCALE(5383383), SCALE( -79159), SCALE(-226323), SCALE(  38022),
-    SCALE(  17267), SCALE( -50453), SCALE(-561178), SCALE(3974239),
-    SCALE(5309305), SCALE(-124041), SCALE(-213041), SCALE(  36642),
-    SCALE(  18377), SCALE( -58879), SCALE(-544906), SCALE(4072841),
-    SCALE(5232922), SCALE(-166869), SCALE(-200010), SCALE(  35256),
-    SCALE(  19525), SCALE( -67667), SCALE(-526937), SCALE(4170517),
-    SCALE(5154321), SCALE(-207653), SCALE(-187246), SCALE(  33866),
-    SCALE(  20704), SCALE( -76817), SCALE(-507233), SCALE(4267162),
-    SCALE(5073593), SCALE(-246406), SCALE(-174764), SCALE(  32480),
-    SCALE(  21915), SCALE( -86327), SCALE(-485757), SCALE(4362672),
-    SCALE(4990831), SCALE(-283146), SCALE(-162575), SCALE(  31101),
-    SCALE(  23157), SCALE( -96193), SCALE(-462476), SCALE(4456942),
-    SCALE(4906129), SCALE(-317890), SCALE(-150692), SCALE(  29732),
-    SCALE(  24426), SCALE(-106412), SCALE(-437356), SCALE(4549871),
-    SCALE(4819584), SCALE(-350658), SCALE(-139125), SCALE(  28376),
-    SCALE(  25721), SCALE(-116977), SCALE(-410365), SCALE(4641355),
-    SCALE(4731293), SCALE(-381475), SCALE(-127884), SCALE(  27038),
-};
-#undef SCALE
-
 DECLARE_ALIGNED(16, const float, ff_dca_fir_64bands)[1024] = {
     /* Bank 0 */
     -7.1279389866041690e-8, -7.0950903150874990e-8,
@@ -8118,6 +8067,562 @@ DECLARE_ALIGNED(16, const float, ff_dca_fir_64bands)[1024] = {
      7.0950903150874990e-8,  7.1279389866041690e-8,
 };
 
+DECLARE_ALIGNED(16, const int32_t, ff_dca_fir_32bands_perfect_fixed)[512] = {
+           0,        0,       -3,      -10,
+         -35,     -105,     -218,     -141,
+        -170,     -216,     -239,     -254,
+        -257,     -251,     -235,     -212,
+        -267,     -317,     -362,     -400,
+        -425,     -434,     -427,     -373,
+        -339,     -593,     -321,     -120,
+         -39,      -16,        0,        1,
+           1,        1,       -3,       -1,
+          -6,      -38,      -93,     -496,
+        -723,     -970,    -1235,    -1501,
+       -1753,    -1978,    -2163,    -2295,
+       -2891,    -2915,    -2860,    -2726,
+       -2517,    -2243,    -1915,    -1590,
+       -1192,     -252,     -117,      -22,
+          -6,      -13,       12,       14,
+          32,       25,      469,      942,
+        1403,     1421,     1239,     2838,
+        3539,     4259,     5002,     5716,
+        6365,     6908,     7311,     7545,
+       11680,    12355,    12785,    12951,
+       12841,    12453,    11803,    10864,
+        9762,     7099,     6725,     5954,
+        4284,     2584,      215,      379,
+         557,      701,      -29,     -687,
+       -1578,    -2749,    -4076,    -7933,
+      -10049,   -12133,   -14039,   -15752,
+      -17213,   -18400,   -19291,   -19878,
+       -1444,    -3005,    -4523,    -5927,
+       -7143,    -8093,    -8713,    -8939,
+       -8700,    -9481,    -7515,    -5279,
+       -2725,       61,     5763,     6113,
+        7571,     6735,    17126,    20165,
+       23328,    26775,    30310,    32639,
+       35464,    38064,    40423,    42512,
+       44261,    45632,    46578,    46974,
+      -45572,   -45008,   -43753,   -41661,
+      -38655,   -34660,   -29587,   -23375,
+      -15998,    -7631,     2472,    13757,
+       26188,    39942,    49789,    67293,
+       84699,   101701,   127325,   148404,
+      170391,   193280,   217044,   241451,
+      266537,   292144,   318161,   344417,
+      370786,   397082,   423133,   448757,
+      475085,   499136,   522007,   543516,
+      563424,   581467,   597422,   611005,
+      621975,   630083,   634996,   636457,
+      634311,   628147,   619871,   604524,
+      585077,   561301,   529204,   494129,
+      453552,   407189,   354920,   296502,
+      231916,   161012,    83700,      -86,
+      -90377,  -187193,  -290528,  -400329,
+      516487,   639054,   767835,   902710,
+     1043512,  1190048,  1342100,  1499418,
+     1661729,  1828700,  2000071,  2175433,
+     2354437,  2536630,  2721120,  2908704,
+     3098059,  3288764,  3480801,  3672922,
+     3864970,  4056432,  4246767,  4435454,
+     4621921,  4805642,  4986073,  5162677,
+     5334921,  5502279,  5664239,  5820300,
+     5969913,  6112723,  6248225,  6375985,
+     6495593,  6606663,  6708832,  6801769,
+     6885168,  6958762,  7022294,  7075566,
+     7118382,  7150633,  7172314,  7183082,
+     7183082,  7172314,  7150633,  7118382,
+     7075566,  7022294,  6958762,  6885168,
+     6801769,  6708832,  6606663,  6495593,
+     6375985,  6248225,  6112723,  5969913,
+    -5820300, -5664239, -5502279, -5334921,
+    -5162677, -4986073, -4805642, -4621921,
+    -4435454, -4246767, -4056432, -3864970,
+    -3672922, -3480801, -3288764, -3098059,
+    -2908704, -2721120, -2536630, -2354437,
+    -2175433, -2000071, -1828700, -1661729,
+    -1499418, -1342100, -1190048, -1043512,
+     -902710,  -767835,  -639054,  -516487,
+     -400329,  -290528,  -187193,   -90377,
+         -86,    83700,   161012,   231916,
+      296502,   354920,   407189,   453552,
+      494129,   529204,   561301,   585077,
+      604524,   619871,   628147,   634311,
+      636457,   634996,   630083,   621975,
+      611005,   597422,   581467,   563424,
+      543516,   522007,   499136,   475085,
+     -448757,  -423133,  -397082,  -370786,
+     -344417,  -318161,  -292144,  -266537,
+     -241451,  -217044,  -193280,  -170391,
+     -148404,  -127325,  -101701,   -84699,
+      -67293,   -49789,   -39942,   -26188,
+      -13757,    -2472,     7631,    15998,
+       23375,    29587,    34660,    38655,
+       41661,    43753,    45008,    45572,
+       46974,    46578,    45632,    44261,
+       42512,    40423,    38064,    35464,
+       32639,    30310,    26775,    23328,
+       20165,    17126,     6735,     7571,
+        6113,     5763,       61,    -2725,
+       -5279,    -7515,    -9481,    -8700,
+       -8939,    -8713,    -8093,    -7143,
+       -5927,    -4523,    -3005,    -1444,
+       19878,    19291,    18400,    17213,
+       15752,    14039,    12133,    10049,
+        7933,     4076,     2749,     1578,
+         687,       29,     -701,     -557,
+        -379,     -215,    -2584,    -4284,
+       -5954,    -6725,    -7099,    -9762,
+      -10864,   -11803,   -12453,   -12841,
+      -12951,   -12785,   -12355,   -11680,
+        7545,     7311,     6908,     6365,
+        5716,     5002,     4259,     3539,
+        2838,     1239,     1421,     1403,
+         942,      469,       25,       32,
+          14,       12,      -13,       -6,
+         -22,     -117,     -252,    -1192,
+       -1590,    -1915,    -2243,    -2517,
+       -2726,    -2860,    -2915,    -2891,
+        2295,     2163,     1978,     1753,
+        1501,     1235,      970,      723,
+         496,       93,       38,        6,
+           1,        3,       -1,       -1,
+          -1,        0,       16,       39,
+         120,      321,      593,      339,
+         373,      427,      434,      425,
+         400,      362,      317,      267,
+        -212,     -235,     -251,     -257,
+        -254,     -239,     -216,     -170,
+        -141,     -218,     -105,      -35,
+         -10,       -3,        0,        0
+};
+
+DECLARE_ALIGNED(16, const int32_t, ff_dca_fir_32bands_nonperfect_fixed)[512] = {
+         -53,      -64,      -77,      -91,
+        -107,     -124,     -144,     -165,
+        -189,     -215,     -244,     -277,
+        -313,     -353,     -397,     -447,
+         502,      563,      631,      706,
+         789,      881,      983,     1095,
+        1218,     1354,     1502,     1665,
+        1843,     2036,     2247,     2475,
+        2723,     2990,     3277,     3586,
+        3916,     4270,     4646,     5046,
+        5470,     5918,     6390,     6886,
+        7405,     7947,     8510,     9094,
+        9698,    10319,    10955,    11605,
+       12265,    12933,    13605,    14277,
+       14945,    15604,    16250,    16877,
+       17480,    18051,    18585,    19075,
+       19513,    19891,    20202,    20436,
+       20587,    20643,    20597,    20439,
+       20160,    19749,    19198,    18496,
+       17634,    16603,    15393,    13996,
+      -12403,   -10605,    -8595,    -6366,
+       -3911,    -1225,     1697,     4860,
+        8265,    11916,    15812,    19953,
+       24337,    28961,    33819,    38904,
+       44210,    49725,    55437,    61334,
+       67398,    73614,    79961,    86417,
+       92960,    99563,   106198,   112837,
+      119446,   125994,   132443,   138758,
+      144898,   150823,   156491,   161858,
+      166879,   171507,   175697,   179400,
+      182566,   185149,   187097,   188363,
+      188899,   188654,   187581,   185635,
+      182770,   178943,   174112,   168238,
+      161285,   153218,   144007,   133624,
+      122046,   109254,    95232,    79969,
+       63462,    45709,    26715,     6492,
+       14943,    37567,    61350,    86256,
+      112242,   139258,   167246,   196143,
+      225877,   256368,   287532,   319275,
+      351496,   384088,   416936,   449919,
+      482909,   515770,   548362,   580539,
+      612148,   643030,   673024,   701963,
+      729674,   755985,   780717,   803690,
+      824721,   843628,   860226,   874332,
+      885761,   894330,   899861,   902174,
+      901096,   896456,   888088,   875832,
+      859535,   839050,   814237,   784966,
+      751116,   712574,   669239,   621021,
+      567840,   509632,   446341,   377927,
+      304365,   225641,   141757,    52732,
+      -41403,  -140599,  -244793,  -353905,
+     -467840,  -586486,  -709716,  -837385,
+      969336,  1105393,  1245366,  1389049,
+     1536224,  1686655,  1840096,  1996285,
+     2154949,  2315802,  2478547,  2642877,
+     2808475,  2975015,  3142163,  3309579,
+     3476914,  3643818,  3809934,  3974901,
+     4138360,  4299948,  4459303,  4616064,
+     4769873,  4920374,  5067219,  5210063,
+     5348569,  5482406,  5611255,  5734805,
+     5852757,  5964823,  6070729,  6170216,
+     6263037,  6348961,  6427777,  6499286,
+     6563310,  6619688,  6668279,  6708963,
+     6741632,  6766206,  6782623,  6790843,
+     6790843,  6782623,  6766206,  6741632,
+     6708963,  6668279,  6619688,  6563310,
+     6499286,  6427777,  6348961,  6263037,
+     6170216,  6070729,  5964823,  5852757,
+    -5734805, -5611255, -5482406, -5348569,
+    -5210063, -5067219, -4920374, -4769873,
+    -4616064, -4459303, -4299948, -4138360,
+    -3974901, -3809934, -3643818, -3476914,
+    -3309579, -3142163, -2975015, -2808475,
+    -2642877, -2478547, -2315802, -2154949,
+    -1996285, -1840096, -1686655, -1536224,
+    -1389049, -1245366, -1105393,  -969336,
+     -837385,  -709716,  -586486,  -467840,
+     -353905,  -244793,  -140599,   -41403,
+       52732,   141757,   225641,   304365,
+      377927,   446341,   509632,   567840,
+      621021,   669239,   712574,   751116,
+      784966,   814237,   839050,   859535,
+      875832,   888088,   896456,   901096,
+      902174,   899861,   894330,   885761,
+     -874332,  -860226,  -843628,  -824721,
+     -803690,  -780717,  -755985,  -729674,
+     -701963,  -673024,  -643030,  -612148,
+     -580539,  -548362,  -515770,  -482909,
+     -449919,  -416936,  -384088,  -351496,
+     -319275,  -287532,  -256368,  -225877,
+     -196143,  -167246,  -139258,  -112242,
+      -86256,   -61350,   -37567,   -14943,
+        6492,    26715,    45709,    63462,
+       79969,    95232,   109254,   122046,
+      133624,   144007,   153218,   161285,
+      168238,   174112,   178943,   182770,
+      185635,   187581,   188654,   188899,
+      188363,   187097,   185149,   182566,
+      179400,   175697,   171507,   166879,
+      161858,   156491,   150823,   144898,
+     -138758,  -132443,  -125994,  -119446,
+     -112837,  -106198,   -99563,   -92960,
+      -86417,   -79961,   -73614,   -67398,
+      -61334,   -55437,   -49725,   -44210,
+      -38904,   -33819,   -28961,   -24337,
+      -19953,   -15812,   -11916,    -8265,
+       -4860,    -1697,     1225,     3911,
+        6366,     8595,    10605,    12403,
+       13996,    15393,    16603,    17634,
+       18496,    19198,    19749,    20160,
+       20439,    20597,    20643,    20587,
+       20436,    20202,    19891,    19513,
+       19075,    18585,    18051,    17480,
+       16877,    16250,    15604,    14945,
+       14277,    13605,    12933,    12265,
+       11605,    10955,    10319,     9698,
+       -9094,    -8510,    -7947,    -7405,
+       -6886,    -6390,    -5918,    -5470,
+       -5046,    -4646,    -4270,    -3916,
+       -3586,    -3277,    -2990,    -2723,
+       -2475,    -2247,    -2036,    -1843,
+       -1665,    -1502,    -1354,    -1218,
+       -1095,     -983,     -881,     -789,
+        -706,     -631,     -563,     -502,
+        -447,     -397,     -353,     -313,
+        -277,     -244,     -215,     -189,
+        -165,     -144,     -124,     -107,
+         -91,      -77,      -64,      -53
+};
+
+DECLARE_ALIGNED(16, const int32_t, ff_dca_lfe_fir_64_fixed)[256] = {
+     6103,   52170, -558064, 1592440, 6290049, 1502534, -546669, 53047,
+     1930,   51089, -568920, 1683709, 6286575, 1414057, -534782, 53729,
+     2228,   49794, -579194, 1776276, 6279634, 1327070, -522445, 54228,
+     2552,   48275, -588839, 1870070, 6269231, 1241632, -509702, 54550,
+     2904,   46523, -597808, 1965017, 6255380, 1157798, -496595, 54708,
+     3287,   44529, -606054, 2061044, 6238099, 1075621, -483164, 54710,
+     3704,   42282, -613529, 2158071, 6217408,  995149, -469451, 54566,
+     4152,   39774, -620186, 2256019, 6193332,  916430, -455494, 54285,
+     4631,   36995, -625976, 2354805, 6165900,  839507, -441330, 53876,
+     5139,   33937, -630850, 2454343, 6135146,  764419, -426998, 53348,
+     5682,   30591, -634759, 2554547, 6101107,  691203, -412531, 52711,
+     6264,   26948, -637655, 2655326, 6063824,  619894, -397966, 51972,
+     6886,   23001, -639488, 2756591, 6023343,  550521, -383335, 51140,
+     7531,   18741, -640210, 2858248, 5979711,  483113, -368671, 50224,
+     8230,   14162, -639772, 2960201, 5932981,  417692, -354003, 49231,
+     8959,    9257, -638125, 3062355, 5883210,  354281, -339362, 48168,
+     9727,    4018, -635222, 3164612, 5830457,  292897, -324777, 47044,
+    10535,   -1558, -631014, 3266872, 5774785,  233555, -310273, 45866,
+    11381,   -7480, -625455, 3369035, 5716260,  176267, -295877, 44640,
+    12267,  -13750, -618499, 3471000, 5654952,  121042, -281613, 43373,
+    13190,  -20372, -610098, 3572664, 5590933,   67886, -267505, 42072,
+    14152,  -27352, -600209, 3673924, 5524280,   16800, -253574, 40743,
+    15153,  -34691, -588788, 3774676, 5455069,  -32214, -239840, 39391,
+    16192,  -42390, -575791, 3874816, 5383383,  -79159, -226323, 38022,
+    17267,  -50453, -561178, 3974239, 5309305, -124041, -213041, 36642,
+    18377,  -58879, -544906, 4072841, 5232922, -166869, -200010, 35256,
+    19525,  -67667, -526937, 4170517, 5154321, -207653, -187246, 33866,
+    20704,  -76817, -507233, 4267162, 5073593, -246406, -174764, 32480,
+    21915,  -86327, -485757, 4362672, 4990831, -283146, -162575, 31101,
+    23157,  -96193, -462476, 4456942, 4906129, -317890, -150692, 29732,
+    24426, -106412, -437356, 4549871, 4819584, -350658, -139125, 28376,
+    25721, -116977, -410365, 4641355, 4731293, -381475, -127884, 27038
+};
+
+DECLARE_ALIGNED(16, const int32_t, ff_dca_fir_64bands_fixed)[1024] = {
+         -38,      -38,      -43,      -48,
+         -52,      -57,      -62,      -67,
+         -73,      -79,      -85,      -91,
+         -98,     -105,     -113,     -121,
+        -129,     -138,     -147,     -157,
+        -167,     -178,     -190,     -202,
+        -215,     -228,     -242,     -257,
+        -273,     -289,     -307,     -325,
+         345,      365,      387,      410,
+         433,      459,      485,      513,
+         543,      574,      606,      640,
+         676,      714,      753,      795,
+         839,      884,      932,      983,
+        1035,     1090,     1148,     1208,
+        1271,     1336,     1405,     1476,
+        1550,     1628,     1709,     1793,
+        1880,     1971,     2065,     2163,
+        2265,     2370,     2479,     2592,
+        2709,     2830,     2955,     3084,
+        3217,     3354,     3496,     3642,
+        3791,     3946,     4104,     4267,
+        4433,     4604,     4780,     4959,
+        5143,     5330,     5522,     5717,
+        5916,     6119,     6326,     6536,
+        6749,     6966,     7186,     7408,
+        7633,     7861,     8090,     8322,
+        8556,     8791,     9027,     9264,
+        9501,     9739,     9977,    10214,
+       10450,    10685,    10918,    11149,
+       11377,    11603,    11825,    12042,
+       12255,    12463,    12665,    12861,
+       13050,    13231,    13405,    13569,
+       13724,    13869,    14002,    14125,
+       14235,    14331,    14415,    14483,
+       14536,    14573,    14594,    14596,
+       14580,    14544,    14488,    14412,
+       14313,    14191,    14046,    13877,
+       13682,    13461,    13213,    12937,
+       12632,    12298,    11934,    11538,
+       11111,    10650,    10156,     9628,
+       -9065,    -8466,    -7830,    -7158,
+       -6447,    -5698,    -4910,    -4083,
+       -3215,    -2306,    -1357,     -366,
+         668,     1743,     2861,     4022,
+        5226,     6474,     7764,     9098,
+       10476,    11897,    13361,    14868,
+       16418,    18011,    19645,    21322,
+       23039,    24798,    26596,    28433,
+       30309,    32222,    34172,    36158,
+       38177,    40231,    42315,    44431,
+       46575,    48747,    50945,    53167,
+       55411,    57676,    59959,    62258,
+       64571,    66897,    69231,    71573,
+       73919,    76268,    78615,    80959,
+       83296,    85624,    87939,    90239,
+       92519,    94778,    97011,    99215,
+      101386,   103521,   105616,   107668,
+      109673,   111626,   113524,   115362,
+      117138,   118847,   120484,   122045,
+      123527,   124925,   126234,   127451,
+      128571,   129591,   130504,   131308,
+      131997,   132568,   133016,   133338,
+      133528,   133582,   133495,   133265,
+      132886,   132355,   131668,   130820,
+      129807,   128626,   127274,   125746,
+      124038,   122148,   120071,   117806,
+      115348,   112694,   109843,   106790,
+      103534,   100071,    96401,    92520,
+       88426,    84119,    79597,    74857,
+       69900,    64723,    59327,    53711,
+       47875,    41818,    35542,    29045,
+       22330,    15397,     8247,      881,
+        6697,    14487,    22487,    30692,
+       39101,    47711,    56517,    65516,
+       74704,    84076,    93628,   103355,
+      113251,   123311,   133528,   143897,
+      154410,   165061,   175843,   186747,
+      197766,   208892,   220116,   231429,
+      242822,   254285,   265810,   277384,
+      288999,   300644,   312306,   323976,
+      335641,   347289,   358909,   370488,
+      382013,   393471,   404848,   416133,
+      427310,   438366,   449286,   460057,
+      470663,   481090,   491323,   501347,
+      511147,   520707,   530011,   539044,
+      547790,   556233,   564357,   572146,
+      579584,   586654,   593341,   599627,
+      605498,   610936,   615925,   620449,
+      624491,   628037,   631069,   633571,
+      635529,   636925,   637745,   637972,
+      637593,   636592,   634953,   632662,
+      629705,   626068,   621737,   616698,
+      610938,   604443,   597202,   589202,
+      580431,   570877,   560530,   549378,
+      537411,   524620,   510994,   496525,
+      481205,   465026,   447979,   430058,
+      411256,   391569,   370989,   349514,
+      327137,   303857,   279670,   254573,
+      228564,   201644,   173811,   145065,
+      115408,    84840,    53365,    20984,
+      -12298,   -46478,   -81550,  -117508,
+     -154347,  -192060,  -230638,  -270073,
+     -310356,  -351478,  -393427,  -436192,
+     -479762,  -524124,  -569264,  -615168,
+      661821,   709209,   757314,   806121,
+      855611,   905766,   956569,  1007998,
+     1060035,  1112658,  1165846,  1219578,
+     1273830,  1328580,  1383805,  1439479,
+     1495578,  1552077,  1608950,  1666171,
+     1723714,  1781550,  1839653,  1897995,
+     1956546,  2015279,  2074163,  2133170,
+     2192270,  2251432,  2310626,  2369822,
+     2428988,  2488093,  2547106,  2605996,
+     2664731,  2723279,  2781607,  2839685,
+     2897481,  2954962,  3012096,  3068851,
+     3125195,  3181097,  3236524,  3291445,
+     3345829,  3399643,  3452858,  3505441,
+     3557362,  3608591,  3659098,  3708853,
+     3757825,  3805987,  3853309,  3899763,
+     3945322,  3989957,  4033642,  4076350,
+     4118055,  4158733,  4198357,  4236904,
+     4274351,  4310673,  4345850,  4379859,
+     4412678,  4444289,  4474670,  4503803,
+     4531671,  4558255,  4583539,  4607508,
+     4630146,  4651438,  4671373,  4689936,
+     4707117,  4722905,  4737290,  4750262,
+     4761813,  4771936,  4780625,  4787874,
+     4793679,  4798036,  4800943,  4802396,
+     4802396,  4800943,  4798036,  4793679,
+     4787874,  4780625,  4771936,  4761813,
+     4750262,  4737290,  4722905,  4707117,
+     4689936,  4671373,  4651438,  4630146,
+     4607508,  4583539,  4558255,  4531671,
+     4503803,  4474670,  4444289,  4412678,
+     4379859,  4345850,  4310673,  4274351,
+     4236904,  4198357,  4158733,  4118055,
+    -4076350, -4033642, -3989957, -3945322,
+    -3899763, -3853309, -3805987, -3757825,
+    -3708853, -3659098, -3608591, -3557362,
+    -3505441, -3452858, -3399643, -3345829,
+    -3291445, -3236524, -3181097, -3125195,
+    -3068851, -3012096, -2954962, -2897481,
+    -2839685, -2781607, -2723279, -2664731,
+    -2605996, -2547106, -2488093, -2428988,
+    -2369822, -2310626, -2251432, -2192270,
+    -2133170, -2074163, -2015279, -1956546,
+    -1897995, -1839653, -1781550, -1723714,
+    -1666171, -1608950, -1552077, -1495578,
+    -1439479, -1383805, -1328580, -1273830,
+    -1219578, -1165846, -1112658, -1060035,
+    -1007998,  -956569,  -905766,  -855611,
+     -806121,  -757314,  -709209,  -661821,
+     -615168,  -569264,  -524124,  -479762,
+     -436192,  -393427,  -351478,  -310356,
+     -270073,  -230638,  -192060,  -154347,
+     -117508,   -81550,   -46478,   -12298,
+       20984,    53365,    84840,   115408,
+      145065,   173811,   201644,   228564,
+      254573,   279670,   303857,   327137,
+      349514,   370989,   391569,   411256,
+      430058,   447979,   465026,   481205,
+      496525,   510994,   524620,   537411,
+      549378,   560530,   570877,   580431,
+      589202,   597202,   604443,   610938,
+      616698,   621737,   626068,   629705,
+      632662,   634953,   636592,   637593,
+      637972,   637745,   636925,   635529,
+      633571,   631069,   628037,   624491,
+     -620449,  -615925,  -610936,  -605498,
+     -599627,  -593341,  -586654,  -579584,
+     -572146,  -564357,  -556233,  -547790,
+     -539044,  -530011,  -520707,  -511147,
+     -501347,  -491323,  -481090,  -470663,
+     -460057,  -449286,  -438366,  -427310,
+     -416133,  -404848,  -393471,  -382013,
+     -370488,  -358909,  -347289,  -335641,
+     -323976,  -312306,  -300644,  -288999,
+     -277384,  -265810,  -254285,  -242822,
+     -231429,  -220116,  -208892,  -197766,
+     -186747,  -175843,  -165061,  -154410,
+     -143897,  -133528,  -123311,  -113251,
+     -103355,   -93628,   -84076,   -74704,
+      -65516,   -56517,   -47711,   -39101,
+      -30692,   -22487,   -14487,    -6697,
+         881,     8247,    15397,    22330,
+       29045,    35542,    41818,    47875,
+       53711,    59327,    64723,    69900,
+       74857,    79597,    84119,    88426,
+       92520,    96401,   100071,   103534,
+      106790,   109843,   112694,   115348,
+      117806,   120071,   122148,   124038,
+      125746,   127274,   128626,   129807,
+      130820,   131668,   132355,   132886,
+      133265,   133495,   133582,   133528,
+      133338,   133016,   132568,   131997,
+      131308,   130504,   129591,   128571,
+      127451,   126234,   124925,   123527,
+      122045,   120484,   118847,   117138,
+      115362,   113524,   111626,   109673,
+      107668,   105616,   103521,   101386,
+      -99215,   -97011,   -94778,   -92519,
+      -90239,   -87939,   -85624,   -83296,
+      -80959,   -78615,   -76268,   -73919,
+      -71573,   -69231,   -66897,   -64571,
+      -62258,   -59959,   -57676,   -55411,
+      -53167,   -50945,   -48747,   -46575,
+      -44431,   -42315,   -40231,   -38177,
+      -36158,   -34172,   -32222,   -30309,
+      -28433,   -26596,   -24798,   -23039,
+      -21322,   -19645,   -18011,   -16418,
+      -14868,   -13361,   -11897,   -10476,
+       -9098,    -7764,    -6474,    -5226,
+       -4022,    -2861,    -1743,     -668,
+         366,     1357,     2306,     3215,
+        4083,     4910,     5698,     6447,
+        7158,     7830,     8466,     9065,
+        9628,    10156,    10650,    11111,
+       11538,    11934,    12298,    12632,
+       12937,    13213,    13461,    13682,
+       13877,    14046,    14191,    14313,
+       14412,    14488,    14544,    14580,
+       14596,    14594,    14573,    14536,
+       14483,    14415,    14331,    14235,
+       14125,    14002,    13869,    13724,
+       13569,    13405,    13231,    13050,
+       12861,    12665,    12463,    12255,
+       12042,    11825,    11603,    11377,
+       11149,    10918,    10685,    10450,
+       10214,     9977,     9739,     9501,
+        9264,     9027,     8791,     8556,
+        8322,     8090,     7861,     7633,
+        7408,     7186,     6966,     6749,
+       -6536,    -6326,    -6119,    -5916,
+       -5717,    -5522,    -5330,    -5143,
+       -4959,    -4780,    -4604,    -4433,
+       -4267,    -4104,    -3946,    -3791,
+       -3642,    -3496,    -3354,    -3217,
+       -3084,    -2955,    -2830,    -2709,
+       -2592,    -2479,    -2370,    -2265,
+       -2163,    -2065,    -1971,    -1880,
+       -1793,    -1709,    -1628,    -1550,
+       -1476,    -1405,    -1336,    -1271,
+       -1208,    -1148,    -1090,    -1035,
+        -983,     -932,     -884,     -839,
+        -795,     -753,     -714,     -676,
+        -640,     -606,     -574,     -543,
+        -513,     -485,     -459,     -433,
+        -410,     -387,     -365,     -345,
+        -325,     -307,     -289,     -273,
+        -257,     -242,     -228,     -215,
+        -202,     -190,     -178,     -167,
+        -157,     -147,     -138,     -129,
+        -121,     -113,     -105,      -98,
+         -91,      -85,      -79,      -73,
+         -67,      -62,      -57,      -52,
+         -48,      -43,      -38,      -38
+};
+
 /*
  * D.11 Look-up Table for Downmix Scale Factors
  *
@@ -8193,17 +8698,31 @@ const uint32_t ff_dca_inv_dmixtable[FF_DCA_INV_DMIXTABLE_SIZE] = {
       65536,
 };
 
-const float ff_dca_default_coeffs[10][6][2] = {
-    { { 0.707107, 0.707107 }, { 0.000000, 0.000000 },                                                                                                 }, // A [LFE]
-    { { 1.000000, 0.000000 }, { 0.000000, 1.000000 }, { 0.000000, 0.000000 },                                                                         }, // A + B (dual mono) [LFE]
-    { { 1.000000, 0.000000 }, { 0.000000, 1.000000 }, { 0.000000, 0.000000 },                                                                         }, // L + R (stereo) [LFE]
-    { { 1.000000, 0.000000 }, { 0.000000, 1.000000 }, { 0.000000, 0.000000 },                                                                         }, // (L+R) + (L-R) (sum-difference) [LFE]
-    { { 1.000000, 0.000000 }, { 0.000000, 1.000000 }, { 0.000000, 0.000000 },                                                                         }, // LT + RT (left and right total) [LFE]
-    { { 0.501187, 0.501187 }, { 0.707107, 0.000000 }, { 0.000000, 0.707107 }, { 0.000000, 0.000000 },                                                 }, // C + L + R [LFE]
-    { { 0.707107, 0.000000 }, { 0.000000, 0.707107 }, { 0.501187, 0.501187 }, { 0.000000, 0.000000 },                                                 }, // L + R + S [LFE]
-    { { 0.501187, 0.501187 }, { 0.707107, 0.000000 }, { 0.000000, 0.707107 }, { 0.501187, 0.501187 }, { 0.000000, 0.000000 },                         }, // C + L + R + S [LFE]
-    { { 0.707107, 0.000000 }, { 0.000000, 0.707107 }, { 0.501187, 0.000000 }, { 0.000000, 0.501187 }, { 0.000000, 0.000000 },                         }, // L + R + SL + SR [LFE]
-    { { 0.501187, 0.501187 }, { 0.707107, 0.000000 }, { 0.000000, 0.707107 }, { 0.501187, 0.000000 }, { 0.000000, 0.501187 }, { 0.000000, 0.000000 }, }, // C + L + R + SL + SR [LFE]
+const uint16_t ff_dca_xll_refl_coeff[128] = {
+        0,  3070,  5110,  7140,  9156, 11154, 13132, 15085,
+    17010, 18904, 20764, 22588, 24373, 26117, 27818, 29474,
+    31085, 32648, 34164, 35631, 37049, 38418, 39738, 41008,
+    42230, 43404, 44530, 45609, 46642, 47630, 48575, 49477,
+    50337, 51157, 51937, 52681, 53387, 54059, 54697, 55302,
+    55876, 56421, 56937, 57426, 57888, 58326, 58741, 59132,
+    59502, 59852, 60182, 60494, 60789, 61066, 61328, 61576,
+    61809, 62029, 62236, 62431, 62615, 62788, 62951, 63105,
+    63250, 63386, 63514, 63635, 63749, 63855, 63956, 64051,
+    64140, 64224, 64302, 64376, 64446, 64512, 64573, 64631,
+    64686, 64737, 64785, 64830, 64873, 64913, 64950, 64986,
+    65019, 65050, 65079, 65107, 65133, 65157, 65180, 65202,
+    65222, 65241, 65259, 65275, 65291, 65306, 65320, 65333,
+    65345, 65357, 65368, 65378, 65387, 65396, 65405, 65413,
+    65420, 65427, 65434, 65440, 65446, 65451, 65456, 65461,
+    65466, 65470, 65474, 65478, 65481, 65485, 65488, 65491
+};
+
+const int32_t ff_dca_xll_band_coeff[20] = {
+      868669, -5931642, -1228483,  4194304,
+      -20577,   122631,  -393647,   904476,
+    -1696305,  2825313, -4430736,  6791313,
+       41153,  -245210,   785564, -1788164,
+     3259333, -5074941,  6928550, -8204883
 };
 
 const int32_t ff_dca_sampling_freqs[16] = {
@@ -8211,202 +8730,6 @@ const int32_t ff_dca_sampling_freqs[16] = {
     176400, 352800, 12000, 24000,  48000, 96000, 192000, 384000,
 };
 
-/* downmix coeffs
- *
- * TABLE 9
- * ______________________________________
- * Down-mix coefficients for 8-channel source
- * audio (5 + 3 format)
- * lt
- * cen- rt lt ctr rt
- * lt ter ctr center
- * rt srd srd srd
- * ______________________________________
- * 1 0.71 0.74 1.0 0.71 0.71 0.58 0.58 0.58
- * 2 left 1.0 0.89 0.71 0.46 0.71 0.50
- * rt 0.45 0.71 0.89 1.0 0.50 0.71
- * 3 lt 1.0 0.89 0.71 0.45
- * rt 0.45 0.71 0.89 1.0
- * srd 0.71 0.71 0.71
- * 4 lt 1.0 0.89 0.71 0.45
- * rt 0.45 0.71 0.89 1.0
- * lt srd 1.0 0.71
- * rt srd 0.71 0.71
- * 4 lt 1.0 0.5
- * ctr 0.87 1.0 0.87
- * rt 0.5 1.0
- * srd 0.71 0.71 0.71
- * 5 lt 1.0 0.5
- * ctr 0.87 1.0 0.87
- * rt 0.5 1.0
- * lt srd 1.0 0.71
- * rt srd 0.71 1.0
- * 6 lt 1.0 0.5
- * lt ctr 0.87 0.71
- * rt ctr 0.71 0.87
- * rt 0.5 1.0
- * lt srd 1.0 0.71
- * rt srd 0.71 1.0
- * 6 lt 1.0 0.5
- * ctr 0.86 1.0 0.86
- * rt 0.5 1.0
- * lt srd 1.0
- * ctr srd 1.0
- * rt srd 1.0
- * 7 lt 1.0
- * lt ctr 1.0
- * ctr 1.0
- * rt ctr 1.0
- * rt 1.0
- * lt srd 1.0 0.71
- * rt srd 0.71 1.0
- * 7 lt 1.0 0.5
- * lt ctr 0.87 0.71
- * rt ctr 0.71 0.87
- * rt 0.5 1.0
- * lt srd 1.0
- * ctr srd 1.0
- * rt srd 1.0
- * 8 lt 1.0 0.5
- * lt ctr 0.87 0.71
- * rt ctr 0.71 0.87
- * rt 0.5 1.0
- * lt 1 srd 0.87 0.35
- * lt 2 srd 0.5 0.61
- * rt 2 srd 0.61 0.50
- * rt 2 srd 0.35 0.87
- *
- * Generation of Lt Rt
- *
- * In the case when the playback system has analog or digital surround
- * multi-channel capability, a down matrix from 5, 4, or 3 channel to
- * Lt Rt may be desirable. In the case when the number of decoded audio
- * channels exceeds 5, 4 or 3 respectively a first stage down mix to 5,
- * 4 or 3 chs should be used as described above.
- *
- * The down matrixing equations for 5-channel source audio to a
- * two-channel Lt Rt playback system are given by:
- *
- * Left  = left  + 0.7 * center - 0.7 * (lt surround + rt surround)
- *
- * Right = right + 0.7 * center + 0.7 * (lt surround + rt surround)
- *
- * Embedded mixing to 2-channel
- *
- * One concern arising from the proliferation of multi-channel audio
- * systems is that most home systems presently have only two channel
- * playback capability. To accommodate this a fixed 2-channel down
- * matrix processes is commonly used following the multi-channel
- * decoding stage. However, for music only applications the image
- * quality etc. of the down matrixed signal may not match that of an
- * equivalent stereo recording found on CD.
- *
- * The concept of embedded mixing is to allow the producer to
- * dynamically specify the matrixing coefficients within the audio
- * frame itself. In this way the stereo down mix at the decoder may be
- * better matched to a 2-channel playback environment.
- *
- * CHS*2, 7-bit down mix indexes (MCOEFFS) are transmitted along with
- * the multi-channel audio once in every frame. The indexes are
- * converted to attenuation factors using a 7 bit LUT. The 2-ch down
- * mix equations are as follows,
- *
- * Left Ch  = sum (MCOEFF[n]       * Ch[n]) for n=1, CHS
- *
- * Right Ch = sum (MCOEFF[n + CHS] * Ch[n]) for n=1, CHS
- *
- * where Ch(n) represents the subband samples in the (n)th audio channel.
- */
-
-const uint32_t ff_dca_map_xxch_to_native[28] = {
-    AV_CH_FRONT_CENTER,
-    AV_CH_FRONT_LEFT,
-    AV_CH_FRONT_RIGHT,
-    AV_CH_SIDE_LEFT,
-    AV_CH_SIDE_RIGHT,
-    AV_CH_LOW_FREQUENCY,
-    AV_CH_BACK_CENTER,
-    AV_CH_BACK_LEFT,
-    AV_CH_BACK_RIGHT,
-    AV_CH_SIDE_LEFT,           /* side surround left -- dup sur side L */
-    AV_CH_SIDE_RIGHT,          /* side surround right -- dup sur side R */
-    AV_CH_FRONT_LEFT_OF_CENTER,
-    AV_CH_FRONT_RIGHT_OF_CENTER,
-    AV_CH_TOP_FRONT_LEFT,
-    AV_CH_TOP_FRONT_CENTER,
-    AV_CH_TOP_FRONT_RIGHT,
-    AV_CH_LOW_FREQUENCY,        /* lfe2 -- duplicate lfe1 position */
-    AV_CH_FRONT_LEFT_OF_CENTER, /* side front left -- dup front cntr L */
-    AV_CH_FRONT_RIGHT_OF_CENTER,/* side front right -- dup front cntr R */
-    AV_CH_TOP_CENTER,           /* overhead */
-    AV_CH_TOP_FRONT_LEFT,       /* side high left -- dup */
-    AV_CH_TOP_FRONT_RIGHT,      /* side high right -- dup */
-    AV_CH_TOP_BACK_CENTER,
-    AV_CH_TOP_BACK_LEFT,
-    AV_CH_TOP_BACK_RIGHT,
-    AV_CH_BACK_CENTER,          /* rear low center -- dup */
-    AV_CH_BACK_LEFT,            /* rear low left -- dup */
-    AV_CH_BACK_RIGHT            /* read low right -- dup  */
-};
-
-/* -1 are reserved or unknown */
-const int ff_dca_ext_audio_descr_mask[8] = {
-    DCA_EXT_XCH,
-    -1,
-    DCA_EXT_X96,
-    DCA_EXT_XCH | DCA_EXT_X96,
-    -1,
-    -1,
-    DCA_EXT_XXCH,
-    -1,
-};
-
-/* Tables for mapping dts channel configurations to libavcodec multichannel api.
- * Some compromises have been made for special configurations. Most configurations
- * are never used so complete accuracy is not needed.
- *
- * L = left, R = right, C = center, S = surround, F = front, R = rear, T = total, OV = overhead.
- * S  -> side, when both rear and back are configured move one of them to the side channel
- * OV -> center back
- * All 2 channel configurations -> AV_CH_LAYOUT_STEREO
- */
-const uint64_t ff_dca_core_channel_layout[16] = {
-    AV_CH_FRONT_CENTER,                                                     ///< 1, A
-    AV_CH_LAYOUT_STEREO,                                                    ///< 2, A + B (dual mono)
-    AV_CH_LAYOUT_STEREO,                                                    ///< 2, L + R (stereo)
-    AV_CH_LAYOUT_STEREO,                                                    ///< 2, (L + R) + (L - R) (sum-difference)
-    AV_CH_LAYOUT_STEREO,                                                    ///< 2, LT + RT (left and right total)
-    AV_CH_LAYOUT_STEREO | AV_CH_FRONT_CENTER,                               ///< 3, C + L + R
-    AV_CH_LAYOUT_STEREO | AV_CH_BACK_CENTER,                                ///< 3, L + R + S
-    AV_CH_LAYOUT_STEREO | AV_CH_FRONT_CENTER | AV_CH_BACK_CENTER,           ///< 4, C + L + R + S
-    AV_CH_LAYOUT_STEREO | AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT,               ///< 4, L + R + SL + SR
-
-    AV_CH_LAYOUT_STEREO | AV_CH_FRONT_CENTER | AV_CH_SIDE_LEFT |
-    AV_CH_SIDE_RIGHT,                                                       ///< 5, C + L + R + SL + SR
-
-    AV_CH_LAYOUT_STEREO | AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT |
-    AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER,               ///< 6, CL + CR + L + R + SL + SR
-
-    AV_CH_LAYOUT_STEREO | AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT |
-    AV_CH_FRONT_CENTER  | AV_CH_BACK_CENTER,                                ///< 6, C + L + R + LR + RR + OV
-
-    AV_CH_FRONT_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER |
-    AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_BACK_CENTER   |
-    AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT,                                     ///< 6, CF + CR + LF + RF + LR + RR
-
-    AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_CENTER   |
-    AV_CH_FRONT_RIGHT_OF_CENTER | AV_CH_LAYOUT_STEREO |
-    AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT,                                     ///< 7, CL + C + CR + L + R + SL + SR
-
-    AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER |
-    AV_CH_LAYOUT_STEREO | AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT |
-    AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT,                                     ///< 8, CL + CR + L + R + SL1 + SL2 + SR1 + SR2
-
-    AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_CENTER   |
-    AV_CH_FRONT_RIGHT_OF_CENTER | AV_CH_LAYOUT_STEREO |
-    AV_CH_SIDE_LEFT | AV_CH_BACK_CENTER | AV_CH_SIDE_RIGHT,                 ///< 8, CL + C + CR + L + R + SL + S + SR
-};
-
 const int8_t ff_dca_lfe_index[16] = {
     1, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 3, 1, 3, 2, 3
 };
@@ -8430,25 +8753,6 @@ const int8_t ff_dca_channel_reorder_lfe[16][9] = {
     { 4,  2,  5,  0,  1,  6,  8,  7, -1 },
 };
 
-const int8_t ff_dca_channel_reorder_lfe_xch[16][9] = {
-    { 0,  2, -1, -1, -1, -1, -1, -1, -1 },
-    { 0,  1,  3, -1, -1, -1, -1, -1, -1 },
-    { 0,  1,  3, -1, -1, -1, -1, -1, -1 },
-    { 0,  1,  3, -1, -1, -1, -1, -1, -1 },
-    { 0,  1,  3, -1, -1, -1, -1, -1, -1 },
-    { 2,  0,  1,  4, -1, -1, -1, -1, -1 },
-    { 0,  1,  3,  4, -1, -1, -1, -1, -1 },
-    { 2,  0,  1,  4,  5, -1, -1, -1, -1 },
-    { 0,  1,  4,  5,  3, -1, -1, -1, -1 },
-    { 2,  0,  1,  5,  6,  4, -1, -1, -1 },
-    { 3,  4,  0,  1,  6,  7,  5, -1, -1 },
-    { 2,  0,  1,  4,  5,  6,  7, -1, -1 },
-    { 0,  6,  4,  5,  2,  3,  7, -1, -1 },
-    { 4,  2,  5,  0,  1,  7,  8,  6, -1 },
-    { 5,  6,  0,  1,  8,  3,  9,  4,  7 },
-    { 4,  2,  5,  0,  1,  6,  9,  8,  7 },
-};
-
 const int8_t ff_dca_channel_reorder_nolfe[16][9] = {
     { 0, -1, -1, -1, -1, -1, -1, -1, -1 },
     { 0,  1, -1, -1, -1, -1, -1, -1, -1 },
@@ -8468,25 +8772,6 @@ const int8_t ff_dca_channel_reorder_nolfe[16][9] = {
     { 3,  2,  4,  0,  1,  5,  7,  6, -1 },
 };
 
-const int8_t ff_dca_channel_reorder_nolfe_xch[16][9] = {
-    { 0,  1, -1, -1, -1, -1, -1, -1, -1 },
-    { 0,  1,  2, -1, -1, -1, -1, -1, -1 },
-    { 0,  1,  2, -1, -1, -1, -1, -1, -1 },
-    { 0,  1,  2, -1, -1, -1, -1, -1, -1 },
-    { 0,  1,  2, -1, -1, -1, -1, -1, -1 },
-    { 2,  0,  1,  3, -1, -1, -1, -1, -1 },
-    { 0,  1,  2,  3, -1, -1, -1, -1, -1 },
-    { 2,  0,  1,  3,  4, -1, -1, -1, -1 },
-    { 0,  1,  3,  4,  2, -1, -1, -1, -1 },
-    { 2,  0,  1,  4,  5,  3, -1, -1, -1 },
-    { 2,  3,  0,  1,  5,  6,  4, -1, -1 },
-    { 2,  0,  1,  3,  4,  5,  6, -1, -1 },
-    { 0,  5,  3,  4,  1,  2,  6, -1, -1 },
-    { 3,  2,  4,  0,  1,  6,  7,  5, -1 },
-    { 4,  5,  0,  1,  7,  2,  8,  3,  6 },
-    { 3,  2,  4,  0,  1,  5,  8,  7,  6 },
-};
-
 const uint16_t ff_dca_vlc_offs[63] = {
         0,   512,   640,   768,  1282,  1794,  2436,  3080,  3770,  4454,  5364,
      5372,  5380,  5388,  5392,  5396,  5412,  5420,  5428,  5460,  5492,  5508,
diff --git a/libavcodec/dcadata.h b/libavcodec/dcadata.h
index 1d3d605b..d864251f 100644
--- a/libavcodec/dcadata.h
+++ b/libavcodec/dcadata.h
@@ -27,18 +27,24 @@ extern const uint32_t ff_dca_bit_rates[32];
 
 extern const uint8_t ff_dca_channels[16];
 
-extern const uint8_t ff_dca_bits_per_sample[7];
+extern const uint8_t ff_dca_bits_per_sample[8];
+
+extern const uint8_t ff_dca_dmix_primary_nch[8];
 
 extern const int16_t ff_dca_adpcm_vb[4096][4];
 
 extern const uint32_t ff_dca_scale_factor_quant6[64];
 extern const uint32_t ff_dca_scale_factor_quant7[128];
 
+extern const uint32_t ff_dca_joint_scale_factors[129];
+
+extern const uint32_t ff_dca_scale_factor_adj[4];
+
+extern const uint32_t ff_dca_quant_levels[32];
+
 extern const uint32_t ff_dca_lossy_quant[32];
-extern const float ff_dca_lossy_quant_d[32];
 
 extern const uint32_t ff_dca_lossless_quant[32];
-extern const float ff_dca_lossless_quant_d[32];
 
 extern const int8_t ff_dca_high_freq_vq[1024][32];
 
@@ -47,30 +53,30 @@ extern const float ff_dca_fir_32bands_nonperfect[512];
 
 extern const float ff_dca_lfe_fir_64[256];
 extern const float ff_dca_lfe_fir_128[256];
-extern const float ff_dca_lfe_xll_fir_64[256];
 extern const float ff_dca_fir_64bands[1024];
 
-#define FF_DCA_DMIXTABLE_SIZE      242
-#define FF_DCA_INV_DMIXTABLE_SIZE  201
+extern const int32_t ff_dca_fir_32bands_perfect_fixed[512];
+extern const int32_t ff_dca_fir_32bands_nonperfect_fixed[512];
+extern const int32_t ff_dca_lfe_fir_64_fixed[256];
+extern const int32_t ff_dca_fir_64bands_fixed[1024];
+
+#define FF_DCA_DMIXTABLE_SIZE       242U
+#define FF_DCA_INV_DMIXTABLE_SIZE   201U
+#define FF_DCA_DMIXTABLE_OFFSET     (FF_DCA_DMIXTABLE_SIZE - FF_DCA_INV_DMIXTABLE_SIZE)
 
 extern const uint16_t ff_dca_dmixtable[FF_DCA_DMIXTABLE_SIZE];
 extern const uint32_t ff_dca_inv_dmixtable[FF_DCA_INV_DMIXTABLE_SIZE];
 
-extern const float ff_dca_default_coeffs[10][6][2];
-
-extern const uint32_t ff_dca_map_xxch_to_native[28];
-extern const int ff_dca_ext_audio_descr_mask[8];
+extern const uint16_t ff_dca_xll_refl_coeff[128];
 
-extern const uint64_t ff_dca_core_channel_layout[16];
+extern const int32_t ff_dca_xll_band_coeff[20];
 
 extern const int32_t ff_dca_sampling_freqs[16];
 
 extern const int8_t ff_dca_lfe_index[16];
 
 extern const int8_t ff_dca_channel_reorder_lfe[16][9];
-extern const int8_t ff_dca_channel_reorder_lfe_xch[16][9];
 extern const int8_t ff_dca_channel_reorder_nolfe[16][9];
-extern const int8_t ff_dca_channel_reorder_nolfe_xch[16][9];
 
 extern const uint16_t ff_dca_vlc_offs[63];
 
diff --git a/libavcodec/dcadct.c b/libavcodec/dcadct.c
new file mode 100644
index 00000000..1082aa88
--- /dev/null
+++ b/libavcodec/dcadct.c
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C) 2016 foo86
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+
+#include "dcadct.h"
+#include "dcamath.h"
+
+static void sum_a(const int *input, int *output, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        output[i] = input[2 * i] + input[2 * i + 1];
+}
+
+static void sum_b(const int *input, int *output, int len)
+{
+    int i;
+
+    output[0] = input[0];
+    for (i = 1; i < len; i++)
+        output[i] = input[2 * i] + input[2 * i - 1];
+}
+
+static void sum_c(const int *input, int *output, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        output[i] = input[2 * i];
+}
+
+static void sum_d(const int *input, int *output, int len)
+{
+    int i;
+
+    output[0] = input[1];
+    for (i = 1; i < len; i++)
+        output[i] = input[2 * i - 1] + input[2 * i + 1];
+}
+
+static void dct_a(const int *input, int *output)
+{
+    static const int cos_mod[8][8] = {
+         { 8348215,  8027397,  7398092,  6484482,  5321677,  3954362,  2435084,   822227 },
+         { 8027397,  5321677,   822227, -3954362, -7398092, -8348215, -6484482, -2435084 },
+         { 7398092,   822227, -6484482, -8027397, -2435084,  5321677,  8348215,  3954362 },
+         { 6484482, -3954362, -8027397,   822227,  8348215,  2435084, -7398092, -5321677 },
+         { 5321677, -7398092, -2435084,  8348215,  -822227, -8027397,  3954362,  6484482 },
+         { 3954362, -8348215,  5321677,  2435084, -8027397,  6484482,   822227, -7398092 },
+         { 2435084, -6484482,  8348215, -7398092,  3954362,   822227, -5321677,  8027397 },
+         {  822227, -2435084,  3954362, -5321677,  6484482, -7398092,  8027397, -8348215 }
+    };
+
+    int i, j;
+
+    for (i = 0; i < 8; i++) {
+        int64_t res = 0;
+        for (j = 0; j < 8; j++)
+            res += (int64_t)cos_mod[i][j] * input[j];
+        output[i] = norm23(res);
+    }
+}
+
+static void dct_b(const int *input, int *output)
+{
+    static const int cos_mod[8][7] = {
+        {  8227423,  7750063,  6974873,  5931642,  4660461,  3210181,  1636536 },
+        {  6974873,  3210181, -1636536, -5931642, -8227423, -7750063, -4660461 },
+        {  4660461, -3210181, -8227423, -5931642,  1636536,  7750063,  6974873 },
+        {  1636536, -7750063, -4660461,  5931642,  6974873, -3210181, -8227423 },
+        { -1636536, -7750063,  4660461,  5931642, -6974873, -3210181,  8227423 },
+        { -4660461, -3210181,  8227423, -5931642, -1636536,  7750063, -6974873 },
+        { -6974873,  3210181,  1636536, -5931642,  8227423, -7750063,  4660461 },
+        { -8227423,  7750063, -6974873,  5931642, -4660461,  3210181, -1636536 }
+    };
+
+    int i, j;
+
+    for (i = 0; i < 8; i++) {
+        int64_t res = input[0] * (INT64_C(1) << 23);
+        for (j = 0; j < 7; j++)
+            res += (int64_t)cos_mod[i][j] * input[1 + j];
+        output[i] = norm23(res);
+    }
+}
+
+static void mod_a(const int *input, int *output)
+{
+    static const int cos_mod[16] = {
+          4199362,   4240198,   4323885,   4454708,
+          4639772,   4890013,   5221943,   5660703,
+         -6245623,  -7040975,  -8158494,  -9809974,
+        -12450076, -17261920, -28585092, -85479984
+    };
+
+    int i, k;
+
+    for (i = 0; i < 8; i++)
+        output[i] = mul23(cos_mod[i], input[i] + input[8 + i]);
+
+    for (i = 8, k = 7; i < 16; i++, k--)
+        output[i] = mul23(cos_mod[i], input[k] - input[8 + k]);
+}
+
+static void mod_b(int *input, int *output)
+{
+    static const int cos_mod[8] = {
+        4214598,  4383036,  4755871,  5425934,
+        6611520,  8897610, 14448934, 42791536
+    };
+
+    int i, k;
+
+    for (i = 0; i < 8; i++)
+        input[8 + i] = mul23(cos_mod[i], input[8 + i]);
+
+    for (i = 0; i < 8; i++)
+        output[i] = input[i] + input[8 + i];
+
+    for (i = 8, k = 7; i < 16; i++, k--)
+        output[i] = input[k] - input[8 + k];
+}
+
+static void mod_c(const int *input, int *output)
+{
+    static const int cos_mod[32] = {
+         1048892,  1051425,   1056522,   1064244,
+         1074689,  1087987,   1104313,   1123884,
+         1146975,  1173922,   1205139,   1241133,
+         1282529,  1330095,   1384791,   1447815,
+        -1520688, -1605358,  -1704360,  -1821051,
+        -1959964, -2127368,  -2332183,  -2587535,
+        -2913561, -3342802,  -3931480,  -4785806,
+        -6133390, -8566050, -14253820, -42727120
+    };
+
+    int i, k;
+
+    for (i = 0; i < 16; i++)
+        output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
+
+    for (i = 16, k = 15; i < 32; i++, k--)
+        output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
+}
+
+static void clp_v(int *input, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        input[i] = clip23(input[i]);
+}
+
+static void imdct_half_32(int32_t *output, const int32_t *input)
+{
+    int buf_a[32], buf_b[32];
+    int i, k, mag, shift, round;
+
+    mag = 0;
+    for (i = 0; i < 32; i++)
+        mag += abs(input[i]);
+
+    shift = mag > 0x400000 ? 2 : 0;
+    round = shift > 0 ? 1 << (shift - 1) : 0;
+
+    for (i = 0; i < 32; i++)
+        buf_a[i] = (input[i] + round) >> shift;
+
+    sum_a(buf_a, buf_b +  0, 16);
+    sum_b(buf_a, buf_b + 16, 16);
+    clp_v(buf_b, 32);
+
+    sum_a(buf_b +  0, buf_a +  0, 8);
+    sum_b(buf_b +  0, buf_a +  8, 8);
+    sum_c(buf_b + 16, buf_a + 16, 8);
+    sum_d(buf_b + 16, buf_a + 24, 8);
+    clp_v(buf_a, 32);
+
+    dct_a(buf_a +  0, buf_b +  0);
+    dct_b(buf_a +  8, buf_b +  8);
+    dct_b(buf_a + 16, buf_b + 16);
+    dct_b(buf_a + 24, buf_b + 24);
+    clp_v(buf_b, 32);
+
+    mod_a(buf_b +  0, buf_a +  0);
+    mod_b(buf_b + 16, buf_a + 16);
+    clp_v(buf_a, 32);
+
+    mod_c(buf_a, buf_b);
+
+    for (i = 0; i < 32; i++)
+        buf_b[i] = clip23(buf_b[i] * (1 << shift));
+
+    for (i = 0, k = 31; i < 16; i++, k--) {
+        output[     i] = clip23(buf_b[i] - buf_b[k]);
+        output[16 + i] = clip23(buf_b[i] + buf_b[k]);
+    }
+}
+
+static void mod64_a(const int *input, int *output)
+{
+    static const int cos_mod[32] = {
+          4195568,   4205700,   4226086,    4256977,
+          4298755,   4351949,   4417251,    4495537,
+          4587901,   4695690,   4820557,    4964534,
+          5130115,   5320382,   5539164,    5791261,
+         -6082752,  -6421430,  -6817439,   -7284203,
+         -7839855,  -8509474,  -9328732,  -10350140,
+        -11654242, -13371208, -15725922,  -19143224,
+        -24533560, -34264200, -57015280, -170908480
+    };
+
+    int i, k;
+
+    for (i = 0; i < 16; i++)
+        output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
+
+    for (i = 16, k = 15; i < 32; i++, k--)
+        output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
+}
+
+static void mod64_b(int *input, int *output)
+{
+    static const int cos_mod[16] = {
+         4199362,  4240198,  4323885,  4454708,
+         4639772,  4890013,  5221943,  5660703,
+         6245623,  7040975,  8158494,  9809974,
+        12450076, 17261920, 28585092, 85479984
+    };
+
+    int i, k;
+
+    for (i = 0; i < 16; i++)
+        input[16 + i] = mul23(cos_mod[i], input[16 + i]);
+
+    for (i = 0; i < 16; i++)
+        output[i] = input[i] + input[16 + i];
+
+    for (i = 16, k = 15; i < 32; i++, k--)
+        output[i] = input[k] - input[16 + k];
+}
+
+static void mod64_c(const int *input, int *output)
+{
+    static const int cos_mod[64] = {
+          741511,    741958,    742853,    744199,
+          746001,    748262,    750992,    754197,
+          757888,    762077,    766777,    772003,
+          777772,    784105,    791021,    798546,
+          806707,    815532,    825054,    835311,
+          846342,    858193,    870912,    884554,
+          899181,    914860,    931667,    949686,
+          969011,    989747,   1012012,   1035941,
+        -1061684,  -1089412,  -1119320,  -1151629,
+        -1186595,  -1224511,  -1265719,  -1310613,
+        -1359657,  -1413400,  -1472490,  -1537703,
+        -1609974,  -1690442,  -1780506,  -1881904,
+        -1996824,  -2128058,  -2279225,  -2455101,
+        -2662128,  -2909200,  -3208956,  -3579983,
+        -4050785,  -4667404,  -5509372,  -6726913,
+        -8641940, -12091426, -20144284, -60420720
+    };
+
+    int i, k;
+
+    for (i = 0; i < 32; i++)
+        output[i] = mul23(cos_mod[i], input[i] + input[32 + i]);
+
+    for (i = 32, k = 31; i < 64; i++, k--)
+        output[i] = mul23(cos_mod[i], input[k] - input[32 + k]);
+}
+
+static void imdct_half_64(int32_t *output, const int32_t *input)
+{
+    int buf_a[64], buf_b[64];
+    int i, k, mag, shift, round;
+
+    mag = 0;
+    for (i = 0; i < 64; i++)
+        mag += abs(input[i]);
+
+    shift = mag > 0x400000 ? 2 : 0;
+    round = shift > 0 ? 1 << (shift - 1) : 0;
+
+    for (i = 0; i < 64; i++)
+        buf_a[i] = (input[i] + round) >> shift;
+
+    sum_a(buf_a, buf_b +  0, 32);
+    sum_b(buf_a, buf_b + 32, 32);
+    clp_v(buf_b, 64);
+
+    sum_a(buf_b +  0, buf_a +  0, 16);
+    sum_b(buf_b +  0, buf_a + 16, 16);
+    sum_c(buf_b + 32, buf_a + 32, 16);
+    sum_d(buf_b + 32, buf_a + 48, 16);
+    clp_v(buf_a, 64);
+
+    sum_a(buf_a +  0, buf_b +  0, 8);
+    sum_b(buf_a +  0, buf_b +  8, 8);
+    sum_c(buf_a + 16, buf_b + 16, 8);
+    sum_d(buf_a + 16, buf_b + 24, 8);
+    sum_c(buf_a + 32, buf_b + 32, 8);
+    sum_d(buf_a + 32, buf_b + 40, 8);
+    sum_c(buf_a + 48, buf_b + 48, 8);
+    sum_d(buf_a + 48, buf_b + 56, 8);
+    clp_v(buf_b, 64);
+
+    dct_a(buf_b +  0, buf_a +  0);
+    dct_b(buf_b +  8, buf_a +  8);
+    dct_b(buf_b + 16, buf_a + 16);
+    dct_b(buf_b + 24, buf_a + 24);
+    dct_b(buf_b + 32, buf_a + 32);
+    dct_b(buf_b + 40, buf_a + 40);
+    dct_b(buf_b + 48, buf_a + 48);
+    dct_b(buf_b + 56, buf_a + 56);
+    clp_v(buf_a, 64);
+
+    mod_a(buf_a +  0, buf_b +  0);
+    mod_b(buf_a + 16, buf_b + 16);
+    mod_b(buf_a + 32, buf_b + 32);
+    mod_b(buf_a + 48, buf_b + 48);
+    clp_v(buf_b, 64);
+
+    mod64_a(buf_b +  0, buf_a +  0);
+    mod64_b(buf_b + 32, buf_a + 32);
+    clp_v(buf_a, 64);
+
+    mod64_c(buf_a, buf_b);
+
+    for (i = 0; i < 64; i++)
+        buf_b[i] = clip23(buf_b[i] * (1 << shift));
+
+    for (i = 0, k = 63; i < 32; i++, k--) {
+        output[     i] = clip23(buf_b[i] - buf_b[k]);
+        output[32 + i] = clip23(buf_b[i] + buf_b[k]);
+    }
+}
+
+av_cold void ff_dcadct_init(DCADCTContext *c)
+{
+    c->imdct_half[0] = imdct_half_32;
+    c->imdct_half[1] = imdct_half_64;
+}
diff --git a/libavcodec/dsd_tablegen.c b/libavcodec/dcadct.h
similarity index 66%
rename from libavcodec/dsd_tablegen.c
rename to libavcodec/dcadct.h
index dbeb9fe2..518c9f90 100644
--- a/libavcodec/dsd_tablegen.c
+++ b/libavcodec/dcadct.h
@@ -1,5 +1,5 @@
 /*
- * Generate a header file for hardcoded DSD tables
+ * Copyright (C) 2016 foo86
  *
  * This file is part of FFmpeg.
  *
@@ -18,21 +18,15 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <stdlib.h>
-#define CONFIG_HARDCODED_TABLES 0
-#include "dsd_tablegen.h"
-#include "tableprint.h"
-#include <inttypes.h>
+#ifndef AVCODEC_DCADCT_H
+#define AVCODEC_DCADCT_H
 
-int main(void)
-{
-    dsd_ctables_tableinit();
+#include "libavutil/common.h"
 
-    write_fileheader();
+typedef struct DCADCTContext {
+    void (*imdct_half[2])(int32_t *output, const int32_t *input);
+} DCADCTContext;
 
-    printf("static const double ctables[CTABLES][256] = {\n");
-    write_float_2d_array(ctables, CTABLES, 256);
-    printf("};\n");
+av_cold void ff_dcadct_init(DCADCTContext *c);
 
-    return 0;
-}
+#endif
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index 3ea1bcfc..f3c39725 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -1,11 +1,5 @@
 /*
- * DCA compatible decoder
- * Copyright (C) 2004 Gildas Bazin
- * Copyright (C) 2004 Benjamin Zores
- * Copyright (C) 2006 Benjamin Larsson
- * Copyright (C) 2007 Konstantin Shishkov
- * Copyright (C) 2012 Paul B Mahol
- * Copyright (C) 2014 Niels Möller
+ * Copyright (C) 2016 foo86
  *
  * This file is part of FFmpeg.
  *
@@ -24,2041 +18,400 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <math.h>
-#include <stddef.h>
-#include <stdio.h>
-
-#include "libavutil/attributes.h"
-#include "libavutil/channel_layout.h"
-#include "libavutil/common.h"
-#include "libavutil/float_dsp.h"
-#include "libavutil/internal.h"
-#include "libavutil/intreadwrite.h"
-#include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
-#include "libavutil/samplefmt.h"
+#include "libavutil/channel_layout.h"
 
-#include "avcodec.h"
-#include "dca.h"
+#include "dcadec.h"
+#include "dcamath.h"
 #include "dca_syncwords.h"
-#include "dcadata.h"
-#include "dcadsp.h"
-#include "dcahuff.h"
-#include "fft.h"
-#include "fmtconvert.h"
-#include "get_bits.h"
-#include "internal.h"
-#include "mathops.h"
-#include "synth_filter.h"
-
-#if ARCH_ARM
-#   include "arm/dca.h"
-#endif
-
-enum DCAMode {
-    DCA_MONO = 0,
-    DCA_CHANNEL,
-    DCA_STEREO,
-    DCA_STEREO_SUMDIFF,
-    DCA_STEREO_TOTAL,
-    DCA_3F,
-    DCA_2F1R,
-    DCA_3F1R,
-    DCA_2F2R,
-    DCA_3F2R,
-    DCA_4F2R
-};
-
-
-enum DCAXxchSpeakerMask {
-    DCA_XXCH_FRONT_CENTER          = 0x0000001,
-    DCA_XXCH_FRONT_LEFT            = 0x0000002,
-    DCA_XXCH_FRONT_RIGHT           = 0x0000004,
-    DCA_XXCH_SIDE_REAR_LEFT        = 0x0000008,
-    DCA_XXCH_SIDE_REAR_RIGHT       = 0x0000010,
-    DCA_XXCH_LFE1                  = 0x0000020,
-    DCA_XXCH_REAR_CENTER           = 0x0000040,
-    DCA_XXCH_SURROUND_REAR_LEFT    = 0x0000080,
-    DCA_XXCH_SURROUND_REAR_RIGHT   = 0x0000100,
-    DCA_XXCH_SIDE_SURROUND_LEFT    = 0x0000200,
-    DCA_XXCH_SIDE_SURROUND_RIGHT   = 0x0000400,
-    DCA_XXCH_FRONT_CENTER_LEFT     = 0x0000800,
-    DCA_XXCH_FRONT_CENTER_RIGHT    = 0x0001000,
-    DCA_XXCH_FRONT_HIGH_LEFT       = 0x0002000,
-    DCA_XXCH_FRONT_HIGH_CENTER     = 0x0004000,
-    DCA_XXCH_FRONT_HIGH_RIGHT      = 0x0008000,
-    DCA_XXCH_LFE2                  = 0x0010000,
-    DCA_XXCH_SIDE_FRONT_LEFT       = 0x0020000,
-    DCA_XXCH_SIDE_FRONT_RIGHT      = 0x0040000,
-    DCA_XXCH_OVERHEAD              = 0x0080000,
-    DCA_XXCH_SIDE_HIGH_LEFT        = 0x0100000,
-    DCA_XXCH_SIDE_HIGH_RIGHT       = 0x0200000,
-    DCA_XXCH_REAR_HIGH_CENTER      = 0x0400000,
-    DCA_XXCH_REAR_HIGH_LEFT        = 0x0800000,
-    DCA_XXCH_REAR_HIGH_RIGHT       = 0x1000000,
-    DCA_XXCH_REAR_LOW_CENTER       = 0x2000000,
-    DCA_XXCH_REAR_LOW_LEFT         = 0x4000000,
-    DCA_XXCH_REAR_LOW_RIGHT        = 0x8000000,
-};
-
-#define DCA_DOLBY                  101           /* FIXME */
-
-#define DCA_CHANNEL_BITS             6
-#define DCA_CHANNEL_MASK          0x3F
-
-#define DCA_LFE                   0x80
-
-#define HEADER_SIZE                 14
-
-#define DCA_NSYNCAUX        0x9A1105A0
-
+#include "profiles.h"
 
-/** Bit allocation */
-typedef struct BitAlloc {
-    int offset;                 ///< code values offset
-    int maxbits[8];             ///< max bits in VLC
-    int wrap;                   ///< wrap for get_vlc2()
-    VLC vlc[8];                 ///< actual codes
-} BitAlloc;
-
-static BitAlloc dca_bitalloc_index;    ///< indexes for samples VLC select
-static BitAlloc dca_tmode;             ///< transition mode VLCs
-static BitAlloc dca_scalefactor;       ///< scalefactor VLCs
-static BitAlloc dca_smpl_bitalloc[11]; ///< samples VLCs
-
-static av_always_inline int get_bitalloc(GetBitContext *gb, BitAlloc *ba,
-                                         int idx)
-{
-    return get_vlc2(gb, ba->vlc[idx].table, ba->vlc[idx].bits, ba->wrap) +
-           ba->offset;
-}
-
-static float dca_dmix_code(unsigned code);
-
-static av_cold void dca_init_vlcs(void)
-{
-    static int vlcs_initialized = 0;
-    int i, j, c = 14;
-    static VLC_TYPE dca_table[23622][2];
-
-    if (vlcs_initialized)
-        return;
-
-    dca_bitalloc_index.offset = 1;
-    dca_bitalloc_index.wrap   = 2;
-    for (i = 0; i < 5; i++) {
-        dca_bitalloc_index.vlc[i].table           = &dca_table[ff_dca_vlc_offs[i]];
-        dca_bitalloc_index.vlc[i].table_allocated = ff_dca_vlc_offs[i + 1] - ff_dca_vlc_offs[i];
-        init_vlc(&dca_bitalloc_index.vlc[i], bitalloc_12_vlc_bits[i], 12,
-                 bitalloc_12_bits[i], 1, 1,
-                 bitalloc_12_codes[i], 2, 2, INIT_VLC_USE_NEW_STATIC);
-    }
-    dca_scalefactor.offset = -64;
-    dca_scalefactor.wrap   = 2;
-    for (i = 0; i < 5; i++) {
-        dca_scalefactor.vlc[i].table           = &dca_table[ff_dca_vlc_offs[i + 5]];
-        dca_scalefactor.vlc[i].table_allocated = ff_dca_vlc_offs[i + 6] - ff_dca_vlc_offs[i + 5];
-        init_vlc(&dca_scalefactor.vlc[i], SCALES_VLC_BITS, 129,
-                 scales_bits[i], 1, 1,
-                 scales_codes[i], 2, 2, INIT_VLC_USE_NEW_STATIC);
-    }
-    dca_tmode.offset = 0;
-    dca_tmode.wrap   = 1;
-    for (i = 0; i < 4; i++) {
-        dca_tmode.vlc[i].table           = &dca_table[ff_dca_vlc_offs[i + 10]];
-        dca_tmode.vlc[i].table_allocated = ff_dca_vlc_offs[i + 11] - ff_dca_vlc_offs[i + 10];
-        init_vlc(&dca_tmode.vlc[i], tmode_vlc_bits[i], 4,
-                 tmode_bits[i], 1, 1,
-                 tmode_codes[i], 2, 2, INIT_VLC_USE_NEW_STATIC);
-    }
-
-    for (i = 0; i < 10; i++)
-        for (j = 0; j < 7; j++) {
-            if (!bitalloc_codes[i][j])
-                break;
-            dca_smpl_bitalloc[i + 1].offset                 = bitalloc_offsets[i];
-            dca_smpl_bitalloc[i + 1].wrap                   = 1 + (j > 4);
-            dca_smpl_bitalloc[i + 1].vlc[j].table           = &dca_table[ff_dca_vlc_offs[c]];
-            dca_smpl_bitalloc[i + 1].vlc[j].table_allocated = ff_dca_vlc_offs[c + 1] - ff_dca_vlc_offs[c];
-
-            init_vlc(&dca_smpl_bitalloc[i + 1].vlc[j], bitalloc_maxbits[i][j],
-                     bitalloc_sizes[i],
-                     bitalloc_bits[i][j], 1, 1,
-                     bitalloc_codes[i][j], 2, 2, INIT_VLC_USE_NEW_STATIC);
-            c++;
-        }
-    vlcs_initialized = 1;
-}
+#define MIN_PACKET_SIZE     16
+#define MAX_PACKET_SIZE     0x104000
 
-static inline void get_array(GetBitContext *gb, int *dst, int len, int bits)
+int ff_dca_set_channel_layout(AVCodecContext *avctx, int *ch_remap, int dca_mask)
 {
-    while (len--)
-        *dst++ = get_bits(gb, bits);
-}
-
-static inline int dca_xxch2index(DCAContext *s, int xxch_ch)
-{
-    int i, base, mask;
-
-    /* locate channel set containing the channel */
-    for (i = -1, base = 0, mask = (s->xxch_core_spkmask & ~DCA_XXCH_LFE1);
-         i <= s->xxch_chset && !(mask & xxch_ch); mask = s->xxch_spk_masks[++i])
-        base += av_popcount(mask);
-
-    return base + av_popcount(mask & (xxch_ch - 1));
-}
-
-static int dca_parse_audio_coding_header(DCAContext *s, int base_channel,
-                                         int xxch)
-{
-    int i, j;
-    static const float adj_table[4] = { 1.0, 1.1250, 1.2500, 1.4375 };
-    static const int bitlen[11] = { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 };
-    static const int thr[11]    = { 0, 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 };
-    int hdr_pos = 0, hdr_size = 0;
-    float scale_factor;
-    int this_chans, acc_mask;
-    int embedded_downmix;
-    int nchans, mask[8];
-    int coeff, ichan;
-
-    /* xxch has arbitrary sized audio coding headers */
-    if (xxch) {
-        hdr_pos  = get_bits_count(&s->gb);
-        hdr_size = get_bits(&s->gb, 7) + 1;
-    }
-
-    nchans = get_bits(&s->gb, 3) + 1;
-    if (xxch && nchans >= 3) {
-        av_log(s->avctx, AV_LOG_ERROR, "nchans %d is too large\n", nchans);
-        return AVERROR_INVALIDDATA;
-    } else if (nchans + base_channel > DCA_PRIM_CHANNELS_MAX) {
-        av_log(s->avctx, AV_LOG_ERROR, "channel sum %d + %d is too large\n", nchans, base_channel);
-        return AVERROR_INVALIDDATA;
-    }
-
-    s->total_channels = nchans + base_channel;
-    s->prim_channels  = s->total_channels;
-
-    /* obtain speaker layout mask & downmix coefficients for XXCH */
-    if (xxch) {
-        acc_mask = s->xxch_core_spkmask;
-
-        this_chans = get_bits(&s->gb, s->xxch_nbits_spk_mask - 6) << 6;
-        s->xxch_spk_masks[s->xxch_chset] = this_chans;
-        s->xxch_chset_nch[s->xxch_chset] = nchans;
-
-        for (i = 0; i <= s->xxch_chset; i++)
-            acc_mask |= s->xxch_spk_masks[i];
-
-        /* check for downmixing information */
-        if (get_bits1(&s->gb)) {
-            embedded_downmix = get_bits1(&s->gb);
-            coeff            = get_bits(&s->gb, 6);
-
-            if (coeff<1 || coeff>61) {
-                av_log(s->avctx, AV_LOG_ERROR, "6bit coeff %d is out of range\n", coeff);
-                return AVERROR_INVALIDDATA;
-            }
-
-            scale_factor     = -1.0f / dca_dmix_code((coeff<<2)-3);
-
-            s->xxch_dmix_sf[s->xxch_chset] = scale_factor;
-
-            for (i = base_channel; i < s->prim_channels; i++) {
-                mask[i] = get_bits(&s->gb, s->xxch_nbits_spk_mask);
-            }
-
-            for (j = base_channel; j < s->prim_channels; j++) {
-                memset(s->xxch_dmix_coeff[j], 0, sizeof(s->xxch_dmix_coeff[0]));
-                s->xxch_dmix_embedded |= (embedded_downmix << j);
-                for (i = 0; i < s->xxch_nbits_spk_mask; i++) {
-                    if (mask[j] & (1 << i)) {
-                        if ((1 << i) == DCA_XXCH_LFE1) {
-                            av_log(s->avctx, AV_LOG_WARNING,
-                                   "DCA-XXCH: dmix to LFE1 not supported.\n");
-                            continue;
-                        }
-
-                        coeff = get_bits(&s->gb, 7);
-                        ichan = dca_xxch2index(s, 1 << i);
-                        if ((coeff&63)<1 || (coeff&63)>61) {
-                            av_log(s->avctx, AV_LOG_ERROR, "7bit coeff %d is out of range\n", coeff);
-                            return AVERROR_INVALIDDATA;
-                        }
-                        s->xxch_dmix_coeff[j][ichan] = dca_dmix_code((coeff<<2)-3);
-                    }
+    static const uint8_t dca2wav_norm[28] = {
+         2,  0, 1, 9, 10,  3,  8,  4,  5,  9, 10, 6, 7, 12,
+        13, 14, 3, 6,  7, 11, 12, 14, 16, 15, 17, 8, 4,  5,
+    };
+
+    static const uint8_t dca2wav_wide[28] = {
+         2,  0, 1, 4,  5,  3,  8,  4,  5,  9, 10, 6, 7, 12,
+        13, 14, 3, 9, 10, 11, 12, 14, 16, 15, 17, 8, 4,  5,
+    };
+
+    int dca_ch, wav_ch, nchannels = 0;
+
+    if (avctx->request_channel_layout & AV_CH_LAYOUT_NATIVE) {
+        for (dca_ch = 0; dca_ch < DCA_SPEAKER_COUNT; dca_ch++)
+            if (dca_mask & (1U << dca_ch))
+                ch_remap[nchannels++] = dca_ch;
+        avctx->channel_layout = dca_mask;
+    } else {
+        int wav_mask = 0;
+        int wav_map[18];
+        const uint8_t *dca2wav;
+        if (dca_mask == DCA_SPEAKER_LAYOUT_7POINT0_WIDE ||
+            dca_mask == DCA_SPEAKER_LAYOUT_7POINT1_WIDE)
+            dca2wav = dca2wav_wide;
+        else
+            dca2wav = dca2wav_norm;
+        for (dca_ch = 0; dca_ch < 28; dca_ch++) {
+            if (dca_mask & (1 << dca_ch)) {
+                wav_ch = dca2wav[dca_ch];
+                if (!(wav_mask & (1 << wav_ch))) {
+                    wav_map[wav_ch] = dca_ch;
+                    wav_mask |= 1 << wav_ch;
                 }
             }
         }
+        for (wav_ch = 0; wav_ch < 18; wav_ch++)
+            if (wav_mask & (1 << wav_ch))
+                ch_remap[nchannels++] = wav_map[wav_ch];
+        avctx->channel_layout = wav_mask;
     }
 
-    if (s->prim_channels > DCA_PRIM_CHANNELS_MAX)
-        s->prim_channels = DCA_PRIM_CHANNELS_MAX;
-
-    for (i = base_channel; i < s->prim_channels; i++) {
-        s->subband_activity[i] = get_bits(&s->gb, 5) + 2;
-        if (s->subband_activity[i] > DCA_SUBBANDS)
-            s->subband_activity[i] = DCA_SUBBANDS;
-    }
-    for (i = base_channel; i < s->prim_channels; i++) {
-        s->vq_start_subband[i] = get_bits(&s->gb, 5) + 1;
-        if (s->vq_start_subband[i] > DCA_SUBBANDS)
-            s->vq_start_subband[i] = DCA_SUBBANDS;
-    }
-    get_array(&s->gb, s->joint_intensity + base_channel,     s->prim_channels - base_channel, 3);
-    get_array(&s->gb, s->transient_huffman + base_channel,   s->prim_channels - base_channel, 2);
-    get_array(&s->gb, s->scalefactor_huffman + base_channel, s->prim_channels - base_channel, 3);
-    get_array(&s->gb, s->bitalloc_huffman + base_channel,    s->prim_channels - base_channel, 3);
-
-    /* Get codebooks quantization indexes */
-    if (!base_channel)
-        memset(s->quant_index_huffman, 0, sizeof(s->quant_index_huffman));
-    for (j = 1; j < 11; j++)
-        for (i = base_channel; i < s->prim_channels; i++)
-            s->quant_index_huffman[i][j] = get_bits(&s->gb, bitlen[j]);
-
-    /* Get scale factor adjustment */
-    for (j = 0; j < 11; j++)
-        for (i = base_channel; i < s->prim_channels; i++)
-            s->scalefactor_adj[i][j] = 1;
-
-    for (j = 1; j < 11; j++)
-        for (i = base_channel; i < s->prim_channels; i++)
-            if (s->quant_index_huffman[i][j] < thr[j])
-                s->scalefactor_adj[i][j] = adj_table[get_bits(&s->gb, 2)];
-
-    if (!xxch) {
-        if (s->crc_present) {
-            /* Audio header CRC check */
-            get_bits(&s->gb, 16);
-        }
-    } else {
-        /* Skip to the end of the header, also ignore CRC if present  */
-        i = get_bits_count(&s->gb);
-        if (hdr_pos + 8 * hdr_size > i)
-            skip_bits_long(&s->gb, hdr_pos + 8 * hdr_size - i);
-    }
-
-    s->current_subframe    = 0;
-    s->current_subsubframe = 0;
-
-    return 0;
+    avctx->channels = nchannels;
+    return nchannels;
 }
 
-static int dca_parse_frame_header(DCAContext *s)
+static uint16_t crc16(const uint8_t *data, int size)
 {
-    init_get_bits(&s->gb, s->dca_buffer, s->dca_buffer_size * 8);
-
-    /* Sync code */
-    skip_bits_long(&s->gb, 32);
-
-    /* Frame header */
-    s->frame_type        = get_bits(&s->gb, 1);
-    s->samples_deficit   = get_bits(&s->gb, 5) + 1;
-    s->crc_present       = get_bits(&s->gb, 1);
-    s->sample_blocks     = get_bits(&s->gb, 7) + 1;
-    s->frame_size        = get_bits(&s->gb, 14) + 1;
-    if (s->frame_size < 95)
-        return AVERROR_INVALIDDATA;
-    s->amode             = get_bits(&s->gb, 6);
-    s->sample_rate       = avpriv_dca_sample_rates[get_bits(&s->gb, 4)];
-    if (!s->sample_rate)
-        return AVERROR_INVALIDDATA;
-    s->bit_rate_index    = get_bits(&s->gb, 5);
-    s->bit_rate          = ff_dca_bit_rates[s->bit_rate_index];
-    if (!s->bit_rate)
-        return AVERROR_INVALIDDATA;
-
-    skip_bits1(&s->gb); // always 0 (reserved, cf. ETSI TS 102 114 V1.4.1)
-    s->dynrange          = get_bits(&s->gb, 1);
-    s->timestamp         = get_bits(&s->gb, 1);
-    s->aux_data          = get_bits(&s->gb, 1);
-    s->hdcd              = get_bits(&s->gb, 1);
-    s->ext_descr         = get_bits(&s->gb, 3);
-    s->ext_coding        = get_bits(&s->gb, 1);
-    s->aspf              = get_bits(&s->gb, 1);
-    s->lfe               = get_bits(&s->gb, 2);
-    s->predictor_history = get_bits(&s->gb, 1);
-
-    if (s->lfe > 2) {
-        s->lfe = 0;
-        av_log(s->avctx, AV_LOG_ERROR, "Invalid LFE value: %d\n", s->lfe);
-        return AVERROR_INVALIDDATA;
-    }
-
-    /* TODO: check CRC */
-    if (s->crc_present)
-        s->header_crc    = get_bits(&s->gb, 16);
-
-    s->multirate_inter   = get_bits(&s->gb, 1);
-    s->version           = get_bits(&s->gb, 4);
-    s->copy_history      = get_bits(&s->gb, 2);
-    s->source_pcm_res    = get_bits(&s->gb, 3);
-    s->front_sum         = get_bits(&s->gb, 1);
-    s->surround_sum      = get_bits(&s->gb, 1);
-    s->dialog_norm       = get_bits(&s->gb, 4);
+    static const uint16_t crctab[16] = {
+        0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
+        0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
+    };
 
-    /* FIXME: channels mixing levels */
-    s->output = s->amode;
-    if (s->lfe)
-        s->output |= DCA_LFE;
+    uint16_t res = 0xffff;
+    int i;
 
-    /* Primary audio coding header */
-    s->subframes = get_bits(&s->gb, 4) + 1;
+    for (i = 0; i < size; i++) {
+        res = (res << 4) ^ crctab[(data[i] >> 4) ^ (res >> 12)];
+        res = (res << 4) ^ crctab[(data[i] & 15) ^ (res >> 12)];
+    }
 
-    return dca_parse_audio_coding_header(s, 0, 0);
+    return res;
 }
 
-static inline int get_scale(GetBitContext *gb, int level, int value, int log2range)
+int ff_dca_check_crc(GetBitContext *s, int p1, int p2)
 {
-    if (level < 5) {
-        /* huffman encoded */
-        value += get_bitalloc(gb, &dca_scalefactor, level);
-        value  = av_clip(value, 0, (1 << log2range) - 1);
-    } else if (level < 8) {
-        if (level + 1 > log2range) {
-            skip_bits(gb, level + 1 - log2range);
-            value = get_bits(gb, log2range);
-        } else {
-            value = get_bits(gb, level + 1);
-        }
-    }
-    return value;
+    if (((p1 | p2) & 7) || p1 < 0 || p2 > s->size_in_bits || p2 - p1 < 16)
+        return -1;
+    if (crc16(s->buffer + p1 / 8, (p2 - p1) / 8))
+        return -1;
+    return 0;
 }
 
-static int dca_subframe_header(DCAContext *s, int base_channel, int block_index)
+void ff_dca_downmix_to_stereo_fixed(DCADSPContext *dcadsp, int32_t **samples,
+                                    int *coeff_l, int nsamples, int ch_mask)
 {
-    /* Primary audio coding side information */
-    int j, k;
-
-    if (get_bits_left(&s->gb) < 0)
-        return AVERROR_INVALIDDATA;
-
-    if (!base_channel) {
-        s->subsubframes[s->current_subframe]    = get_bits(&s->gb, 2) + 1;
-        if (block_index + s->subsubframes[s->current_subframe] > s->sample_blocks/8) {
-            s->subsubframes[s->current_subframe] = 1;
-            return AVERROR_INVALIDDATA;
-        }
-        s->partial_samples[s->current_subframe] = get_bits(&s->gb, 3);
-    }
-
-    for (j = base_channel; j < s->prim_channels; j++) {
-        for (k = 0; k < s->subband_activity[j]; k++)
-            s->prediction_mode[j][k] = get_bits(&s->gb, 1);
-    }
-
-    /* Get prediction codebook */
-    for (j = base_channel; j < s->prim_channels; j++) {
-        for (k = 0; k < s->subband_activity[j]; k++) {
-            if (s->prediction_mode[j][k] > 0) {
-                /* (Prediction coefficient VQ address) */
-                s->prediction_vq[j][k] = get_bits(&s->gb, 12);
-            }
-        }
-    }
-
-    /* Bit allocation index */
-    for (j = base_channel; j < s->prim_channels; j++) {
-        for (k = 0; k < s->vq_start_subband[j]; k++) {
-            if (s->bitalloc_huffman[j] == 6)
-                s->bitalloc[j][k] = get_bits(&s->gb, 5);
-            else if (s->bitalloc_huffman[j] == 5)
-                s->bitalloc[j][k] = get_bits(&s->gb, 4);
-            else if (s->bitalloc_huffman[j] == 7) {
-                av_log(s->avctx, AV_LOG_ERROR,
-                       "Invalid bit allocation index\n");
-                return AVERROR_INVALIDDATA;
-            } else {
-                s->bitalloc[j][k] =
-                    get_bitalloc(&s->gb, &dca_bitalloc_index, s->bitalloc_huffman[j]);
-            }
-
-            if (s->bitalloc[j][k] > 26) {
-                ff_dlog(s->avctx, "bitalloc index [%i][%i] too big (%i)\n",
-                        j, k, s->bitalloc[j][k]);
-                return AVERROR_INVALIDDATA;
-            }
-        }
-    }
-
-    /* Transition mode */
-    for (j = base_channel; j < s->prim_channels; j++) {
-        for (k = 0; k < s->subband_activity[j]; k++) {
-            s->transition_mode[j][k] = 0;
-            if (s->subsubframes[s->current_subframe] > 1 &&
-                k < s->vq_start_subband[j] && s->bitalloc[j][k] > 0) {
-                s->transition_mode[j][k] =
-                    get_bitalloc(&s->gb, &dca_tmode, s->transient_huffman[j]);
-            }
-        }
-    }
-
-    if (get_bits_left(&s->gb) < 0)
-        return AVERROR_INVALIDDATA;
-
-    for (j = base_channel; j < s->prim_channels; j++) {
-        const uint32_t *scale_table;
-        int scale_sum, log_size;
-
-        memset(s->scale_factor[j], 0,
-               s->subband_activity[j] * sizeof(s->scale_factor[0][0][0]) * 2);
+    int pos, spkr, max_spkr = av_log2(ch_mask);
+    int *coeff_r = coeff_l + av_popcount(ch_mask);
 
-        if (s->scalefactor_huffman[j] == 6) {
-            scale_table = ff_dca_scale_factor_quant7;
-            log_size    = 7;
-        } else {
-            scale_table = ff_dca_scale_factor_quant6;
-            log_size    = 6;
-        }
-
-        /* When huffman coded, only the difference is encoded */
-        scale_sum = 0;
-
-        for (k = 0; k < s->subband_activity[j]; k++) {
-            if (k >= s->vq_start_subband[j] || s->bitalloc[j][k] > 0) {
-                scale_sum = get_scale(&s->gb, s->scalefactor_huffman[j], scale_sum, log_size);
-                s->scale_factor[j][k][0] = scale_table[scale_sum];
-            }
-
-            if (k < s->vq_start_subband[j] && s->transition_mode[j][k]) {
-                /* Get second scale factor */
-                scale_sum = get_scale(&s->gb, s->scalefactor_huffman[j], scale_sum, log_size);
-                s->scale_factor[j][k][1] = scale_table[scale_sum];
-            }
-        }
-    }
-
-    /* Joint subband scale factor codebook select */
-    for (j = base_channel; j < s->prim_channels; j++) {
-        /* Transmitted only if joint subband coding enabled */
-        if (s->joint_intensity[j] > 0)
-            s->joint_huff[j] = get_bits(&s->gb, 3);
-    }
-
-    if (get_bits_left(&s->gb) < 0)
-        return AVERROR_INVALIDDATA;
-
-    /* Scale factors for joint subband coding */
-    for (j = base_channel; j < s->prim_channels; j++) {
-        int source_channel;
+    av_assert0(DCA_HAS_STEREO(ch_mask));
 
-        /* Transmitted only if joint subband coding enabled */
-        if (s->joint_intensity[j] > 0) {
-            int scale = 0;
-            source_channel = s->joint_intensity[j] - 1;
+    // Scale left and right channels
+    pos = (ch_mask & DCA_SPEAKER_MASK_C);
+    dcadsp->dmix_scale(samples[DCA_SPEAKER_L], coeff_l[pos    ], nsamples);
+    dcadsp->dmix_scale(samples[DCA_SPEAKER_R], coeff_r[pos + 1], nsamples);
 
-            /* When huffman coded, only the difference is encoded
-             * (is this valid as well for joint scales ???) */
+    // Downmix remaining channels
+    for (spkr = 0; spkr <= max_spkr; spkr++) {
+        if (!(ch_mask & (1U << spkr)))
+            continue;
 
-            for (k = s->subband_activity[j]; k < s->subband_activity[source_channel]; k++) {
-                scale = get_scale(&s->gb, s->joint_huff[j], 64 /* bias */, 7);
-                s->joint_scale_factor[j][k] = scale;    /*joint_scale_table[scale]; */
-            }
+        if (*coeff_l && spkr != DCA_SPEAKER_L)
+            dcadsp->dmix_add(samples[DCA_SPEAKER_L], samples[spkr],
+                             *coeff_l, nsamples);
 
-            if (!(s->debug_flag & 0x02)) {
-                av_log(s->avctx, AV_LOG_DEBUG,
-                       "Joint stereo coding not supported\n");
-                s->debug_flag |= 0x02;
-            }
-        }
-    }
+        if (*coeff_r && spkr != DCA_SPEAKER_R)
+            dcadsp->dmix_add(samples[DCA_SPEAKER_R], samples[spkr],
+                             *coeff_r, nsamples);
 
-    /* Dynamic range coefficient */
-    if (!base_channel && s->dynrange)
-        s->dynrange_coef = get_bits(&s->gb, 8);
-
-    /* Side information CRC check word */
-    if (s->crc_present) {
-        get_bits(&s->gb, 16);
+        coeff_l++;
+        coeff_r++;
     }
-
-    /*
-     * Primary audio data arrays
-     */
-
-    /* VQ encoded high frequency subbands */
-    for (j = base_channel; j < s->prim_channels; j++)
-        for (k = s->vq_start_subband[j]; k < s->subband_activity[j]; k++)
-            /* 1 vector -> 32 samples */
-            s->high_freq_vq[j][k] = get_bits(&s->gb, 10);
-
-    /* Low frequency effect data */
-    if (!base_channel && s->lfe) {
-        int quant7;
-        /* LFE samples */
-        int lfe_samples    = 2 * s->lfe * (4 + block_index);
-        int lfe_end_sample = 2 * s->lfe * (4 + block_index + s->subsubframes[s->current_subframe]);
-        float lfe_scale;
-
-        for (j = lfe_samples; j < lfe_end_sample; j++) {
-            /* Signed 8 bits int */
-            s->lfe_data[j] = get_sbits(&s->gb, 8);
-        }
-
-        /* Scale factor index */
-        quant7 = get_bits(&s->gb, 8);
-        if (quant7 > 127) {
-            avpriv_request_sample(s->avctx, "LFEScaleIndex larger than 127");
-            return AVERROR_INVALIDDATA;
-        }
-        s->lfe_scale_factor = ff_dca_scale_factor_quant7[quant7];
-
-        /* Quantization step size * scale factor */
-        lfe_scale = 0.035 * s->lfe_scale_factor;
-
-        for (j = lfe_samples; j < lfe_end_sample; j++)
-            s->lfe_data[j] *= lfe_scale;
-    }
-
-    return 0;
 }
 
-static void qmf_32_subbands(DCAContext *s, int chans,
-                            float samples_in[32][8], float *samples_out,
-                            float scale)
+void ff_dca_downmix_to_stereo_float(AVFloatDSPContext *fdsp, float **samples,
+                                    int *coeff_l, int nsamples, int ch_mask)
 {
-    const float *prCoeff;
+    int pos, spkr, max_spkr = av_log2(ch_mask);
+    int *coeff_r = coeff_l + av_popcount(ch_mask);
+    const float scale = 1.0f / (1 << 15);
 
-    int sb_act = s->subband_activity[chans];
+    av_assert0(DCA_HAS_STEREO(ch_mask));
 
-    scale *= sqrt(1 / 8.0);
+    // Scale left and right channels
+    pos = (ch_mask & DCA_SPEAKER_MASK_C);
+    fdsp->vector_fmul_scalar(samples[DCA_SPEAKER_L], samples[DCA_SPEAKER_L],
+                             coeff_l[pos    ] * scale, nsamples);
+    fdsp->vector_fmul_scalar(samples[DCA_SPEAKER_R], samples[DCA_SPEAKER_R],
+                             coeff_r[pos + 1] * scale, nsamples);
 
-    /* Select filter */
-    if (!s->multirate_inter)    /* Non-perfect reconstruction */
-        prCoeff = ff_dca_fir_32bands_nonperfect;
-    else                        /* Perfect reconstruction */
-        prCoeff = ff_dca_fir_32bands_perfect;
+    // Downmix remaining channels
+    for (spkr = 0; spkr <= max_spkr; spkr++) {
+        if (!(ch_mask & (1U << spkr)))
+            continue;
 
-    s->dcadsp.qmf_32_subbands(samples_in, sb_act, &s->synth, &s->imdct,
-                              s->subband_fir_hist[chans],
-                              &s->hist_index[chans],
-                              s->subband_fir_noidea[chans], prCoeff,
-                              samples_out, s->raXin, scale);
-}
+        if (*coeff_l && spkr != DCA_SPEAKER_L)
+            fdsp->vector_fmac_scalar(samples[DCA_SPEAKER_L], samples[spkr],
+                                     *coeff_l * scale, nsamples);
 
-static QMF64_table *qmf64_precompute(void)
-{
-    unsigned i, j;
-    QMF64_table *table = av_malloc(sizeof(*table));
-    if (!table)
-        return NULL;
-
-    for (i = 0; i < 32; i++)
-        for (j = 0; j < 32; j++)
-            table->dct4_coeff[i][j] = cos((2 * i + 1) * (2 * j + 1) * M_PI / 128);
-    for (i = 0; i < 32; i++)
-        for (j = 0; j < 32; j++)
-            table->dct2_coeff[i][j] = cos((2 * i + 1) *      j      * M_PI /  64);
-
-    /* FIXME: Is the factor 0.125 = 1/8 right? */
-    for (i = 0; i < 32; i++)
-        table->rcos[i] =  0.125 / cos((2 * i + 1) * M_PI / 256);
-    for (i = 0; i < 32; i++)
-        table->rsin[i] = -0.125 / sin((2 * i + 1) * M_PI / 256);
-
-    return table;
-}
+        if (*coeff_r && spkr != DCA_SPEAKER_R)
+            fdsp->vector_fmac_scalar(samples[DCA_SPEAKER_R], samples[spkr],
+                                     *coeff_r * scale, nsamples);
 
-/* FIXME: Totally unoptimized. Based on the reference code and
- * http://multimedia.cx/mirror/dca-transform.pdf, with guessed tweaks
- * for doubling the size. */
-static void qmf_64_subbands(DCAContext *s, int chans, float samples_in[64][8],
-                            float *samples_out, float scale)
-{
-    float raXin[64];
-    float A[32], B[32];
-    float *raX = s->subband_fir_hist[chans];
-    float *raZ = s->subband_fir_noidea[chans];
-    unsigned i, j, k, subindex;
-
-    for (i = s->subband_activity[chans]; i < 64; i++)
-        raXin[i] = 0.0;
-    for (subindex = 0; subindex < 8; subindex++) {
-        for (i = 0; i < s->subband_activity[chans]; i++)
-            raXin[i] = samples_in[i][subindex];
-
-        for (k = 0; k < 32; k++) {
-            A[k] = 0.0;
-            for (i = 0; i < 32; i++)
-                A[k] += (raXin[2 * i] + raXin[2 * i + 1]) * s->qmf64_table->dct4_coeff[k][i];
-        }
-        for (k = 0; k < 32; k++) {
-            B[k] = raXin[0] * s->qmf64_table->dct2_coeff[k][0];
-            for (i = 1; i < 32; i++)
-                B[k] += (raXin[2 * i] + raXin[2 * i - 1]) * s->qmf64_table->dct2_coeff[k][i];
-        }
-        for (k = 0; k < 32; k++) {
-            raX[k]      = s->qmf64_table->rcos[k] * (A[k] + B[k]);
-            raX[63 - k] = s->qmf64_table->rsin[k] * (A[k] - B[k]);
-        }
-
-        for (i = 0; i < 64; i++) {
-            float out = raZ[i];
-            for (j = 0; j < 1024; j += 128)
-                out += ff_dca_fir_64bands[j + i] * (raX[j + i] - raX[j + 63 - i]);
-            *samples_out++ = out * scale;
-        }
-
-        for (i = 0; i < 64; i++) {
-            float hist = 0.0;
-            for (j = 0; j < 1024; j += 128)
-                hist += ff_dca_fir_64bands[64 + j + i] * (-raX[i + j] - raX[j + 63 - i]);
-
-            raZ[i] = hist;
-        }
-
-        /* FIXME: Make buffer circular, to avoid this move. */
-        memmove(raX + 64, raX, (1024 - 64) * sizeof(*raX));
-    }
-}
-
-static void lfe_interpolation_fir(DCAContext *s, const float *samples_in,
-                                  float *samples_out)
-{
-    /* samples_in: An array holding decimated samples.
-     *   Samples in current subframe starts from samples_in[0],
-     *   while samples_in[-1], samples_in[-2], ..., stores samples
-     *   from last subframe as history.
-     *
-     * samples_out: An array holding interpolated samples
-     */
-
-    int idx;
-    const float *prCoeff;
-    int deciindex;
-
-    /* Select decimation filter */
-    if (s->lfe == 1) {
-        idx     = 1;
-        prCoeff = ff_dca_lfe_fir_128;
-    } else {
-        idx = 0;
-        if (s->exss_ext_mask & DCA_EXT_EXSS_XLL)
-            prCoeff = ff_dca_lfe_xll_fir_64;
-        else
-            prCoeff = ff_dca_lfe_fir_64;
-    }
-    /* Interpolation */
-    for (deciindex = 0; deciindex < 2 * s->lfe; deciindex++) {
-        s->dcadsp.lfe_fir[idx](samples_out, samples_in, prCoeff);
-        samples_in++;
-        samples_out += 2 * 32 * (1 + idx);
+        coeff_l++;
+        coeff_r++;
     }
 }
 
-/* downmixing routines */
-#define MIX_REAR1(samples, s1, rs, coef)            \
-    samples[0][i] += samples[s1][i] * coef[rs][0];  \
-    samples[1][i] += samples[s1][i] * coef[rs][1];
-
-#define MIX_REAR2(samples, s1, s2, rs, coef)                                          \
-    samples[0][i] += samples[s1][i] * coef[rs][0] + samples[s2][i] * coef[rs + 1][0]; \
-    samples[1][i] += samples[s1][i] * coef[rs][1] + samples[s2][i] * coef[rs + 1][1];
-
-#define MIX_FRONT3(samples, coef)                                      \
-    t = samples[c][i];                                                 \
-    u = samples[l][i];                                                 \
-    v = samples[r][i];                                                 \
-    samples[0][i] = t * coef[0][0] + u * coef[1][0] + v * coef[2][0];  \
-    samples[1][i] = t * coef[0][1] + u * coef[1][1] + v * coef[2][1];
-
-#define DOWNMIX_TO_STEREO(op1, op2)             \
-    for (i = 0; i < 256; i++) {                 \
-        op1                                     \
-        op2                                     \
-    }
-
-static void dca_downmix(float **samples, int srcfmt, int lfe_present,
-                        float coef[DCA_PRIM_CHANNELS_MAX + 1][2],
-                        const int8_t *channel_mapping)
+static int convert_bitstream(const uint8_t *src, int src_size, uint8_t *dst, int max_size)
 {
-    int c, l, r, sl, sr, s;
-    int i;
-    float t, u, v;
-
-    switch (srcfmt) {
-    case DCA_MONO:
-    case DCA_4F2R:
-        av_log(NULL, AV_LOG_ERROR, "Not implemented!\n");
-        break;
-    case DCA_CHANNEL:
-    case DCA_STEREO:
-    case DCA_STEREO_TOTAL:
-    case DCA_STEREO_SUMDIFF:
-        break;
-    case DCA_3F:
-        c = channel_mapping[0];
-        l = channel_mapping[1];
-        r = channel_mapping[2];
-        DOWNMIX_TO_STEREO(MIX_FRONT3(samples, coef), );
-        break;
-    case DCA_2F1R:
-        s = channel_mapping[2];
-        DOWNMIX_TO_STEREO(MIX_REAR1(samples, s, 2, coef), );
-        break;
-    case DCA_3F1R:
-        c = channel_mapping[0];
-        l = channel_mapping[1];
-        r = channel_mapping[2];
-        s = channel_mapping[3];
-        DOWNMIX_TO_STEREO(MIX_FRONT3(samples, coef),
-                          MIX_REAR1(samples, s, 3, coef));
-        break;
-    case DCA_2F2R:
-        sl = channel_mapping[2];
-        sr = channel_mapping[3];
-        DOWNMIX_TO_STEREO(MIX_REAR2(samples, sl, sr, 2, coef), );
-        break;
-    case DCA_3F2R:
-        c  = channel_mapping[0];
-        l  = channel_mapping[1];
-        r  = channel_mapping[2];
-        sl = channel_mapping[3];
-        sr = channel_mapping[4];
-        DOWNMIX_TO_STEREO(MIX_FRONT3(samples, coef),
-                          MIX_REAR2(samples, sl, sr, 3, coef));
-        break;
-    }
-    if (lfe_present) {
-        int lf_buf = ff_dca_lfe_index[srcfmt];
-        int lf_idx =  ff_dca_channels[srcfmt];
-        for (i = 0; i < 256; i++) {
-            samples[0][i] += samples[lf_buf][i] * coef[lf_idx][0];
-            samples[1][i] += samples[lf_buf][i] * coef[lf_idx][1];
-        }
+    switch (AV_RB32(src)) {
+    case DCA_SYNCWORD_CORE_BE:
+    case DCA_SYNCWORD_SUBSTREAM:
+        memcpy(dst, src, src_size);
+        return src_size;
+    case DCA_SYNCWORD_CORE_LE:
+    case DCA_SYNCWORD_CORE_14B_BE:
+    case DCA_SYNCWORD_CORE_14B_LE:
+        return avpriv_dca_convert_bitstream(src, src_size, dst, max_size);
+    default:
+        return AVERROR_INVALIDDATA;
     }
 }
 
-#ifndef decode_blockcodes
-/* Very compact version of the block code decoder that does not use table
- * look-up but is slightly slower */
-static int decode_blockcode(int code, int levels, int32_t *values)
+static int dcadec_decode_frame(AVCodecContext *avctx, void *data,
+                               int *got_frame_ptr, AVPacket *avpkt)
 {
-    int i;
-    int offset = (levels - 1) >> 1;
+    DCAContext *s = avctx->priv_data;
+    AVFrame *frame = data;
+    uint8_t *input = avpkt->data;
+    int input_size = avpkt->size;
+    int i, ret, prev_packet = s->packet;
 
-    for (i = 0; i < 4; i++) {
-        int div = FASTDIV(code, levels);
-        values[i] = code - offset - div * levels;
-        code      = div;
+    if (input_size < MIN_PACKET_SIZE || input_size > MAX_PACKET_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid packet size\n");
+        return AVERROR_INVALIDDATA;
     }
 
-    return code;
-}
-
-static int decode_blockcodes(int code1, int code2, int levels, int32_t *values)
-{
-    return decode_blockcode(code1, levels, values) |
-           decode_blockcode(code2, levels, values + 4);
-}
-#endif
-
-static const uint8_t abits_sizes[7]  = { 7, 10, 12, 13, 15, 17, 19 };
-static const uint8_t abits_levels[7] = { 3,  5,  7,  9, 13, 17, 25 };
-
-static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
-{
-    int k, l;
-    int subsubframe = s->current_subsubframe;
-
-    const float *quant_step_table;
-
-    /* FIXME */
-    float (*subband_samples)[DCA_SUBBANDS][8] = s->subband_samples[block_index];
-    LOCAL_ALIGNED_16(int32_t, block, [8 * DCA_SUBBANDS]);
-
-    /*
-     * Audio data
-     */
-
-    /* Select quantization step size table */
-    if (s->bit_rate_index == 0x1f)
-        quant_step_table = ff_dca_lossless_quant_d;
-    else
-        quant_step_table = ff_dca_lossy_quant_d;
-
-    for (k = base_channel; k < s->prim_channels; k++) {
-        float rscale[DCA_SUBBANDS];
-
-        if (get_bits_left(&s->gb) < 0)
-            return AVERROR_INVALIDDATA;
+    av_fast_malloc(&s->buffer, &s->buffer_size,
+                   FFALIGN(input_size, 4096) + DCA_BUFFER_PADDING_SIZE);
+    if (!s->buffer)
+        return AVERROR(ENOMEM);
 
-        for (l = 0; l < s->vq_start_subband[k]; l++) {
-            int m;
+    for (i = 0, ret = AVERROR_INVALIDDATA; i < input_size - MIN_PACKET_SIZE + 1 && ret < 0; i++)
+        ret = convert_bitstream(input + i, input_size - i, s->buffer, s->buffer_size);
 
-            /* Select the mid-tread linear quantizer */
-            int abits = s->bitalloc[k][l];
+    if (ret < 0)
+        return ret;
 
-            float quant_step_size = quant_step_table[abits];
+    input      = s->buffer;
+    input_size = ret;
 
-            /*
-             * Determine quantization index code book and its type
-             */
+    s->packet = 0;
 
-            /* Select quantization index code book */
-            int sel = s->quant_index_huffman[k][abits];
+    // Parse backward compatible core sub-stream
+    if (AV_RB32(input) == DCA_SYNCWORD_CORE_BE) {
+        int frame_size;
 
-            /*
-             * Extract bits from the bit stream
-             */
-            if (!abits) {
-                rscale[l] = 0;
-                memset(block + 8 * l, 0, 8 * sizeof(block[0]));
-            } else {
-                /* Deal with transients */
-                int sfi = s->transition_mode[k][l] && subsubframe >= s->transition_mode[k][l];
-                rscale[l] = quant_step_size * s->scale_factor[k][l][sfi] *
-                            s->scalefactor_adj[k][sel];
-
-                if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table) {
-                    if (abits <= 7) {
-                        /* Block code */
-                        int block_code1, block_code2, size, levels, err;
-
-                        size   = abits_sizes[abits - 1];
-                        levels = abits_levels[abits - 1];
-
-                        block_code1 = get_bits(&s->gb, size);
-                        block_code2 = get_bits(&s->gb, size);
-                        err         = decode_blockcodes(block_code1, block_code2,
-                                                        levels, block + 8 * l);
-                        if (err) {
-                            av_log(s->avctx, AV_LOG_ERROR,
-                                   "ERROR: block code look-up failed\n");
-                            return AVERROR_INVALIDDATA;
-                        }
-                    } else {
-                        /* no coding */
-                        for (m = 0; m < 8; m++)
-                            block[8 * l + m] = get_sbits(&s->gb, abits - 3);
-                    }
-                } else {
-                    /* Huffman coded */
-                    for (m = 0; m < 8; m++)
-                        block[8 * l + m] = get_bitalloc(&s->gb,
-                                                        &dca_smpl_bitalloc[abits], sel);
-                }
-            }
+        if ((ret = ff_dca_core_parse(&s->core, input, input_size)) < 0) {
+            s->core_residual_valid = 0;
+            return ret;
         }
 
-        s->fmt_conv.int32_to_float_fmul_array8(&s->fmt_conv, subband_samples[k][0],
-                                               block, rscale, 8 * s->vq_start_subband[k]);
-
-        for (l = 0; l < s->vq_start_subband[k]; l++) {
-            int m;
-            /*
-             * Inverse ADPCM if in prediction mode
-             */
-            if (s->prediction_mode[k][l]) {
-                int n;
-                if (s->predictor_history)
-                    subband_samples[k][l][0] += (ff_dca_adpcm_vb[s->prediction_vq[k][l]][0] *
-                                                 s->subband_samples_hist[k][l][3] +
-                                                 ff_dca_adpcm_vb[s->prediction_vq[k][l]][1] *
-                                                 s->subband_samples_hist[k][l][2] +
-                                                 ff_dca_adpcm_vb[s->prediction_vq[k][l]][2] *
-                                                 s->subband_samples_hist[k][l][1] +
-                                                 ff_dca_adpcm_vb[s->prediction_vq[k][l]][3] *
-                                                 s->subband_samples_hist[k][l][0]) *
-                                                (1.0f / 8192);
-                for (m = 1; m < 8; m++) {
-                    float sum = ff_dca_adpcm_vb[s->prediction_vq[k][l]][0] *
-                                subband_samples[k][l][m - 1];
-                    for (n = 2; n <= 4; n++)
-                        if (m >= n)
-                            sum += ff_dca_adpcm_vb[s->prediction_vq[k][l]][n - 1] *
-                                   subband_samples[k][l][m - n];
-                        else if (s->predictor_history)
-                            sum += ff_dca_adpcm_vb[s->prediction_vq[k][l]][n - 1] *
-                                   s->subband_samples_hist[k][l][m - n + 4];
-                    subband_samples[k][l][m] += sum * (1.0f / 8192);
-                }
-            }
-        }
+        s->packet |= DCA_PACKET_CORE;
 
-        /*
-         * Decode VQ encoded high frequencies
-         */
-        if (s->subband_activity[k] > s->vq_start_subband[k]) {
-            if (!(s->debug_flag & 0x01)) {
-                av_log(s->avctx, AV_LOG_DEBUG,
-                       "Stream with high frequencies VQ coding\n");
-                s->debug_flag |= 0x01;
-            }
-            s->dcadsp.decode_hf(subband_samples[k], s->high_freq_vq[k],
-                                ff_dca_high_freq_vq, subsubframe * 8,
-                                s->scale_factor[k], s->vq_start_subband[k],
-                                s->subband_activity[k]);
+        // EXXS data must be aligned on 4-byte boundary
+        frame_size = FFALIGN(s->core.frame_size, 4);
+        if (input_size - 4 > frame_size) {
+            input      += frame_size;
+            input_size -= frame_size;
         }
     }
 
-    /* Check for DSYNC after subsubframe */
-    if (s->aspf || subsubframe == s->subsubframes[s->current_subframe] - 1) {
-        if (get_bits(&s->gb, 16) != 0xFFFF) {
-            av_log(s->avctx, AV_LOG_ERROR, "Didn't get subframe DSYNC\n");
-            return AVERROR_INVALIDDATA;
-        }
-    }
-
-    /* Backup predictor history for adpcm */
-    for (k = base_channel; k < s->prim_channels; k++)
-        for (l = 0; l < s->vq_start_subband[k]; l++)
-            AV_COPY128(s->subband_samples_hist[k][l], &subband_samples[k][l][4]);
-
-    return 0;
-}
-
-static int dca_filter_channels(DCAContext *s, int block_index, int upsample)
-{
-    float (*subband_samples)[DCA_SUBBANDS][8] = s->subband_samples[block_index];
-    int k;
-
-    if (upsample) {
-        if (!s->qmf64_table) {
-            s->qmf64_table = qmf64_precompute();
-            if (!s->qmf64_table)
-                return AVERROR(ENOMEM);
-        }
+    if (!s->core_only) {
+        DCAExssAsset *asset = NULL;
 
-        /* 64 subbands QMF */
-        for (k = 0; k < s->prim_channels; k++) {
-            if (s->channel_order_tab[k] >= 0)
-                qmf_64_subbands(s, k, subband_samples[k],
-                                s->samples_chanptr[s->channel_order_tab[k]],
-                                /* Upsampling needs a factor 2 here. */
-                                M_SQRT2 / 32768.0);
-        }
-    } else {
-        /* 32 subbands QMF */
-        for (k = 0; k < s->prim_channels; k++) {
-            if (s->channel_order_tab[k] >= 0)
-                qmf_32_subbands(s, k, subband_samples[k],
-                                s->samples_chanptr[s->channel_order_tab[k]],
-                                M_SQRT1_2 / 32768.0);
-        }
-    }
-
-    /* Generate LFE samples for this subsubframe FIXME!!! */
-    if (s->lfe) {
-        float *samples = s->samples_chanptr[s->lfe_index];
-        lfe_interpolation_fir(s,
-                              s->lfe_data + 2 * s->lfe * (block_index + 4),
-                              samples);
-        if (upsample) {
-            unsigned i;
-            /* Should apply the filter in Table 6-11 when upsampling. For
-             * now, just duplicate. */
-            for (i = 255; i > 0; i--) {
-                samples[2 * i]     =
-                samples[2 * i + 1] = samples[i];
+        // Parse extension sub-stream (EXSS)
+        if (AV_RB32(input) == DCA_SYNCWORD_SUBSTREAM) {
+            if ((ret = ff_dca_exss_parse(&s->exss, input, input_size)) < 0) {
+                if (avctx->err_recognition & AV_EF_EXPLODE)
+                    return ret;
+            } else {
+                s->packet |= DCA_PACKET_EXSS;
+                asset = &s->exss.assets[0];
             }
-            samples[1] = samples[0];
         }
-    }
-
-    /* FIXME: This downmixing is probably broken with upsample.
-     * Probably totally broken also with XLL in general. */
-    /* Downmixing to Stereo */
-    if (s->prim_channels + !!s->lfe > 2 &&
-        s->avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
-        dca_downmix(s->samples_chanptr, s->amode, !!s->lfe, s->downmix_coef,
-                    s->channel_order_tab);
-    }
-
-    return 0;
-}
-
-static int dca_subframe_footer(DCAContext *s, int base_channel)
-{
-    int in, out, aux_data_count, aux_data_end, reserved;
-    uint32_t nsyncaux;
-
-    /*
-     * Unpack optional information
-     */
-
-    /* presumably optional information only appears in the core? */
-    if (!base_channel) {
-        if (s->timestamp)
-            skip_bits_long(&s->gb, 32);
-
-        if (s->aux_data) {
-            aux_data_count = get_bits(&s->gb, 6);
 
-            // align (32-bit)
-            skip_bits_long(&s->gb, (-get_bits_count(&s->gb)) & 31);
-
-            aux_data_end = 8 * aux_data_count + get_bits_count(&s->gb);
-
-            if ((nsyncaux = get_bits_long(&s->gb, 32)) != DCA_NSYNCAUX) {
-                av_log(s->avctx, AV_LOG_ERROR, "nSYNCAUX mismatch %#"PRIx32"\n",
-                       nsyncaux);
-                return AVERROR_INVALIDDATA;
-            }
-
-            if (get_bits1(&s->gb)) { // bAUXTimeStampFlag
-                avpriv_request_sample(s->avctx,
-                                      "Auxiliary Decode Time Stamp Flag");
-                // align (4-bit)
-                skip_bits(&s->gb, (-get_bits_count(&s->gb)) & 4);
-                // 44 bits: nMSByte (8), nMarker (4), nLSByte (28), nMarker (4)
-                skip_bits_long(&s->gb, 44);
-            }
-
-            if ((s->core_downmix = get_bits1(&s->gb))) {
-                int am = get_bits(&s->gb, 3);
-                switch (am) {
-                case 0:
-                    s->core_downmix_amode = DCA_MONO;
-                    break;
-                case 1:
-                    s->core_downmix_amode = DCA_STEREO;
-                    break;
-                case 2:
-                    s->core_downmix_amode = DCA_STEREO_TOTAL;
-                    break;
-                case 3:
-                    s->core_downmix_amode = DCA_3F;
-                    break;
-                case 4:
-                    s->core_downmix_amode = DCA_2F1R;
-                    break;
-                case 5:
-                    s->core_downmix_amode = DCA_2F2R;
-                    break;
-                case 6:
-                    s->core_downmix_amode = DCA_3F1R;
-                    break;
-                default:
-                    av_log(s->avctx, AV_LOG_ERROR,
-                           "Invalid mode %d for embedded downmix coefficients\n",
-                           am);
-                    return AVERROR_INVALIDDATA;
-                }
-                for (out = 0; out < ff_dca_channels[s->core_downmix_amode]; out++) {
-                    for (in = 0; in < s->prim_channels + !!s->lfe; in++) {
-                        uint16_t tmp = get_bits(&s->gb, 9);
-                        if ((tmp & 0xFF) > 241) {
-                            av_log(s->avctx, AV_LOG_ERROR,
-                                   "Invalid downmix coefficient code %"PRIu16"\n",
-                                   tmp);
-                            return AVERROR_INVALIDDATA;
-                        }
-                        s->core_downmix_codes[in][out] = tmp;
-                    }
-                }
-            }
-
-            align_get_bits(&s->gb); // byte align
-            skip_bits(&s->gb, 16);  // nAUXCRC16
-
-            // additional data (reserved, cf. ETSI TS 102 114 V1.4.1)
-            if ((reserved = (aux_data_end - get_bits_count(&s->gb))) < 0) {
-                av_log(s->avctx, AV_LOG_ERROR,
-                       "Overread auxiliary data by %d bits\n", -reserved);
-                return AVERROR_INVALIDDATA;
-            } else if (reserved) {
-                avpriv_request_sample(s->avctx,
-                                      "Core auxiliary data reserved content");
-                skip_bits_long(&s->gb, reserved);
+        // Parse XLL component in EXSS
+        if (asset && (asset->extension_mask & DCA_EXSS_XLL)) {
+            if ((ret = ff_dca_xll_parse(&s->xll, input, asset)) < 0) {
+                // Conceal XLL synchronization error
+                if (ret == AVERROR(EAGAIN)
+                    && (prev_packet & DCA_PACKET_XLL)
+                    && (s->packet & DCA_PACKET_CORE))
+                    s->packet |= DCA_PACKET_XLL | DCA_PACKET_RECOVERY;
+                else if (ret == AVERROR(ENOMEM) || (avctx->err_recognition & AV_EF_EXPLODE))
+                    return ret;
+            } else {
+                s->packet |= DCA_PACKET_XLL;
             }
         }
 
-        if (s->crc_present && s->dynrange)
-            get_bits(&s->gb, 16);
-    }
-
-    return 0;
-}
-
-/**
- * Decode a dca frame block
- *
- * @param s     pointer to the DCAContext
- */
-
-static int dca_decode_block(DCAContext *s, int base_channel, int block_index)
-{
-    int ret;
-
-    /* Sanity check */
-    if (s->current_subframe >= s->subframes) {
-        av_log(s->avctx, AV_LOG_DEBUG, "check failed: %i>%i",
-               s->current_subframe, s->subframes);
-        return AVERROR_INVALIDDATA;
-    }
-
-    if (!s->current_subsubframe) {
-        /* Read subframe header */
-        if ((ret = dca_subframe_header(s, base_channel, block_index)))
+        // Parse core extensions in EXSS or backward compatible core sub-stream
+        if ((s->packet & DCA_PACKET_CORE)
+            && (ret = ff_dca_core_parse_exss(&s->core, input, asset)) < 0)
             return ret;
     }
 
-    /* Read subsubframe */
-    if ((ret = dca_subsubframe(s, base_channel, block_index)))
-        return ret;
+    // Filter the frame
+    if (s->packet & DCA_PACKET_XLL) {
+        if (s->packet & DCA_PACKET_CORE) {
+            int x96_synth = -1;
 
-    /* Update state */
-    s->current_subsubframe++;
-    if (s->current_subsubframe >= s->subsubframes[s->current_subframe]) {
-        s->current_subsubframe = 0;
-        s->current_subframe++;
-    }
-    if (s->current_subframe >= s->subframes) {
-        /* Read subframe footer */
-        if ((ret = dca_subframe_footer(s, base_channel)))
-            return ret;
-    }
-
-    return 0;
-}
+            // Enable X96 synthesis if needed
+            if (s->xll.chset[0].freq == 96000 && s->core.sample_rate == 48000)
+                x96_synth = 1;
 
-int ff_dca_xbr_parse_frame(DCAContext *s)
-{
-    int scale_table_high[DCA_CHSET_CHANS_MAX][DCA_SUBBANDS][2];
-    int active_bands[DCA_CHSETS_MAX][DCA_CHSET_CHANS_MAX];
-    int abits_high[DCA_CHSET_CHANS_MAX][DCA_SUBBANDS];
-    int anctemp[DCA_CHSET_CHANS_MAX];
-    int chset_fsize[DCA_CHSETS_MAX];
-    int n_xbr_ch[DCA_CHSETS_MAX];
-    int hdr_size, num_chsets, xbr_tmode, hdr_pos;
-    int i, j, k, l, chset, chan_base;
-
-    av_log(s->avctx, AV_LOG_DEBUG, "DTS-XBR: decoding XBR extension\n");
-
-    /* get bit position of sync header */
-    hdr_pos = get_bits_count(&s->gb) - 32;
-
-    hdr_size = get_bits(&s->gb, 6) + 1;
-    num_chsets = get_bits(&s->gb, 2) + 1;
-
-    for(i = 0; i < num_chsets; i++)
-        chset_fsize[i] = get_bits(&s->gb, 14) + 1;
-
-    xbr_tmode = get_bits1(&s->gb);
-
-    for(i = 0; i < num_chsets; i++) {
-        n_xbr_ch[i] = get_bits(&s->gb, 3) + 1;
-        k = get_bits(&s->gb, 2) + 5;
-        for(j = 0; j < n_xbr_ch[i]; j++) {
-            active_bands[i][j] = get_bits(&s->gb, k) + 1;
-            if (active_bands[i][j] > DCA_SUBBANDS) {
-                av_log(s->avctx, AV_LOG_ERROR, "too many active subbands (%d)\n", active_bands[i][j]);
-                return AVERROR_INVALIDDATA;
+            if ((ret = ff_dca_core_filter_fixed(&s->core, x96_synth)) < 0) {
+                s->core_residual_valid = 0;
+                return ret;
             }
-        }
-    }
-
-    /* skip to the end of the header */
-    i = get_bits_count(&s->gb);
-    if(hdr_pos + hdr_size * 8 > i)
-        skip_bits_long(&s->gb, hdr_pos + hdr_size * 8 - i);
-
-    /* loop over the channel data sets */
-    /* only decode as many channels as we've decoded base data for */
-    for(chset = 0, chan_base = 0;
-        chset < num_chsets && chan_base + n_xbr_ch[chset] <= s->prim_channels;
-        chan_base += n_xbr_ch[chset++]) {
-        int start_posn = get_bits_count(&s->gb);
-        int subsubframe = 0;
-        int subframe = 0;
-
-        /* loop over subframes */
-        for (k = 0; k < (s->sample_blocks / 8); k++) {
-            /* parse header if we're on first subsubframe of a block */
-            if(subsubframe == 0) {
-                /* Parse subframe header */
-                for(i = 0; i < n_xbr_ch[chset]; i++) {
-                    anctemp[i] = get_bits(&s->gb, 2) + 2;
-                }
-
-                for(i = 0; i < n_xbr_ch[chset]; i++) {
-                    get_array(&s->gb, abits_high[i], active_bands[chset][i], anctemp[i]);
-                }
 
-                for(i = 0; i < n_xbr_ch[chset]; i++) {
-                    anctemp[i] = get_bits(&s->gb, 3);
-                    if(anctemp[i] < 1) {
-                        av_log(s->avctx, AV_LOG_ERROR, "DTS-XBR: SYNC ERROR\n");
-                        return AVERROR_INVALIDDATA;
-                    }
-                }
-
-                /* generate scale factors */
-                for(i = 0; i < n_xbr_ch[chset]; i++) {
-                    const uint32_t *scale_table;
-                    int nbits;
-                    int scale_table_size;
-
-                    if (s->scalefactor_huffman[chan_base+i] == 6) {
-                        scale_table = ff_dca_scale_factor_quant7;
-                        scale_table_size = FF_ARRAY_ELEMS(ff_dca_scale_factor_quant7);
-                    } else {
-                        scale_table = ff_dca_scale_factor_quant6;
-                        scale_table_size = FF_ARRAY_ELEMS(ff_dca_scale_factor_quant6);
-                    }
-
-                    nbits = anctemp[i];
-
-                    for(j = 0; j < active_bands[chset][i]; j++) {
-                        if(abits_high[i][j] > 0) {
-                            int index = get_bits(&s->gb, nbits);
-                            if (index >= scale_table_size) {
-                                av_log(s->avctx, AV_LOG_ERROR, "scale table index %d invalid\n", index);
-                                return AVERROR_INVALIDDATA;
-                            }
-                            scale_table_high[i][j][0] = scale_table[index];
-
-                            if(xbr_tmode && s->transition_mode[i][j]) {
-                                int index = get_bits(&s->gb, nbits);
-                                if (index >= scale_table_size) {
-                                    av_log(s->avctx, AV_LOG_ERROR, "scale table index %d invalid\n", index);
-                                    return AVERROR_INVALIDDATA;
-                                }
-                                scale_table_high[i][j][1] = scale_table[index];
-                            }
-                        }
-                    }
-                }
-            }
-
-            /* decode audio array for this block */
-            for(i = 0; i < n_xbr_ch[chset]; i++) {
-                for(j = 0; j < active_bands[chset][i]; j++) {
-                    const int xbr_abits = abits_high[i][j];
-                    const float quant_step_size = ff_dca_lossless_quant_d[xbr_abits];
-                    const int sfi = xbr_tmode && s->transition_mode[i][j] && subsubframe >= s->transition_mode[i][j];
-                    const float rscale = quant_step_size * scale_table_high[i][j][sfi];
-                    float *subband_samples = s->subband_samples[k][chan_base+i][j];
-                    int block[8];
-
-                    if(xbr_abits <= 0)
-                        continue;
-
-                    if(xbr_abits > 7) {
-                        get_array(&s->gb, block, 8, xbr_abits - 3);
-                    } else {
-                        int block_code1, block_code2, size, levels, err;
-
-                        size   = abits_sizes[xbr_abits - 1];
-                        levels = abits_levels[xbr_abits - 1];
-
-                        block_code1 = get_bits(&s->gb, size);
-                        block_code2 = get_bits(&s->gb, size);
-                        err = decode_blockcodes(block_code1, block_code2,
-                                                levels, block);
-                        if (err) {
-                            av_log(s->avctx, AV_LOG_ERROR,
-                                   "ERROR: DTS-XBR: block code look-up failed\n");
-                            return AVERROR_INVALIDDATA;
-                        }
-                    }
-
-                    /* scale & sum into subband */
-                    for(l = 0; l < 8; l++)
-                        subband_samples[l] += (float)block[l] * rscale;
-                }
-            }
-
-            /* check DSYNC marker */
-            if(s->aspf || subsubframe == s->subsubframes[subframe] - 1) {
-                if(get_bits(&s->gb, 16) != 0xffff) {
-                    av_log(s->avctx, AV_LOG_ERROR, "DTS-XBR: Didn't get subframe DSYNC\n");
-                    return AVERROR_INVALIDDATA;
-                }
+            // Force lossy downmixed output on the first core frame filtered.
+            // This prevents audible clicks when seeking and is consistent with
+            // what reference decoder does when there are multiple channel sets.
+            if (!s->core_residual_valid) {
+                if (s->xll.nreschsets > 0 && s->xll.nchsets > 1)
+                    s->packet |= DCA_PACKET_RECOVERY;
+                s->core_residual_valid = 1;
             }
+        }
 
-            /* advance sub-sub-frame index */
-            if(++subsubframe >= s->subsubframes[subframe]) {
-                subsubframe = 0;
-                subframe++;
+        if ((ret = ff_dca_xll_filter_frame(&s->xll, frame)) < 0) {
+            // Fall back to core unless hard error
+            if (!(s->packet & DCA_PACKET_CORE))
+                return ret;
+            if (ret != AVERROR_INVALIDDATA || (avctx->err_recognition & AV_EF_EXPLODE))
+                return ret;
+            if ((ret = ff_dca_core_filter_frame(&s->core, frame)) < 0) {
+                s->core_residual_valid = 0;
+                return ret;
             }
         }
-
-        /* skip to next channel set */
-        i = get_bits_count(&s->gb);
-        if(start_posn + chset_fsize[chset] * 8 != i) {
-            j = start_posn + chset_fsize[chset] * 8 - i;
-            if(j < 0 || j >= 8)
-                av_log(s->avctx, AV_LOG_ERROR, "DTS-XBR: end of channel set,"
-                       " skipping further than expected (%d bits)\n", j);
-            skip_bits_long(&s->gb, j);
+    } else if (s->packet & DCA_PACKET_CORE) {
+        if ((ret = ff_dca_core_filter_frame(&s->core, frame)) < 0) {
+            s->core_residual_valid = 0;
+            return ret;
         }
+        s->core_residual_valid = !!(s->core.filter_mode & DCA_FILTER_MODE_FIXED);
+    } else {
+        return AVERROR_INVALIDDATA;
     }
 
-    return 0;
-}
+    *got_frame_ptr = 1;
 
+    return avpkt->size;
+}
 
-/* parse initial header for XXCH and dump details */
-int ff_dca_xxch_decode_frame(DCAContext *s)
+static av_cold void dcadec_flush(AVCodecContext *avctx)
 {
-    int hdr_size, spkmsk_bits, num_chsets, core_spk, hdr_pos;
-    int i, chset, base_channel, chstart, fsize[8];
-
-    /* assume header word has already been parsed */
-    hdr_pos     = get_bits_count(&s->gb) - 32;
-    hdr_size    = get_bits(&s->gb, 6) + 1;
-  /*chhdr_crc   =*/ skip_bits1(&s->gb);
-    spkmsk_bits = get_bits(&s->gb, 5) + 1;
-    num_chsets  = get_bits(&s->gb, 2) + 1;
-
-    for (i = 0; i < num_chsets; i++)
-        fsize[i] = get_bits(&s->gb, 14) + 1;
-
-    core_spk               = get_bits(&s->gb, spkmsk_bits);
-    s->xxch_core_spkmask   = core_spk;
-    s->xxch_nbits_spk_mask = spkmsk_bits;
-    s->xxch_dmix_embedded  = 0;
-
-    /* skip to the end of the header */
-    i = get_bits_count(&s->gb);
-    if (hdr_pos + hdr_size * 8 > i)
-        skip_bits_long(&s->gb, hdr_pos + hdr_size * 8 - i);
-
-    for (chset = 0; chset < num_chsets; chset++) {
-        chstart       = get_bits_count(&s->gb);
-        base_channel  = s->prim_channels;
-        s->xxch_chset = chset;
-
-        /* XXCH and Core headers differ, see 6.4.2 "XXCH Channel Set Header" vs.
-           5.3.2 "Primary Audio Coding Header", DTS Spec 1.3.1 */
-        dca_parse_audio_coding_header(s, base_channel, 1);
-
-        /* decode channel data */
-        for (i = 0; i < (s->sample_blocks / 8); i++) {
-            if (dca_decode_block(s, base_channel, i)) {
-                av_log(s->avctx, AV_LOG_ERROR,
-                       "Error decoding DTS-XXCH extension\n");
-                continue;
-            }
-        }
-
-        /* skip to end of this section */
-        i = get_bits_count(&s->gb);
-        if (chstart + fsize[chset] * 8 > i)
-            skip_bits_long(&s->gb, chstart + fsize[chset] * 8 - i);
-    }
-    s->xxch_chset = num_chsets;
+    DCAContext *s = avctx->priv_data;
 
-    return 0;
-}
+    ff_dca_core_flush(&s->core);
+    ff_dca_xll_flush(&s->xll);
 
-static float dca_dmix_code(unsigned code)
-{
-    int sign = (code >> 8) - 1;
-    code &= 0xff;
-    return ((ff_dca_dmixtable[code] ^ sign) - sign) * (1.0 / (1 << 15));
+    s->core_residual_valid = 0;
 }
 
-/**
- * Main frame decoding function
- * FIXME add arguments
- */
-static int dca_decode_frame(AVCodecContext *avctx, void *data,
-                            int *got_frame_ptr, AVPacket *avpkt)
+static av_cold int dcadec_close(AVCodecContext *avctx)
 {
-    AVFrame *frame     = data;
-    const uint8_t *buf = avpkt->data;
-    int buf_size       = avpkt->size;
-    int channel_mask;
-    int channel_layout;
-    int lfe_samples;
-    int num_core_channels = 0;
-    int i, ret;
-    float **samples_flt;
-    float *src_chan;
-    float *dst_chan;
     DCAContext *s = avctx->priv_data;
-    int core_ss_end;
-    int channels, full_channels;
-    float scale;
-    int achan;
-    int chset;
-    int mask;
-    int lavc;
-    int posn;
-    int j, k;
-    int endch;
-    int upsample = 0;
-
-    s->exss_ext_mask = 0;
-    s->xch_present   = 0;
-
-    s->dca_buffer_size = AVERROR_INVALIDDATA;
-    for (i = 0; i < buf_size - 3 && s->dca_buffer_size == AVERROR_INVALIDDATA; i++)
-        s->dca_buffer_size = avpriv_dca_convert_bitstream(buf + i, buf_size - i, s->dca_buffer,
-                                                          DCA_MAX_FRAME_SIZE + DCA_MAX_EXSS_HEADER_SIZE);
-
-    if (s->dca_buffer_size == AVERROR_INVALIDDATA) {
-        av_log(avctx, AV_LOG_ERROR, "Not a valid DCA frame\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    if ((ret = dca_parse_frame_header(s)) < 0) {
-        // seems like the frame is corrupt, try with the next one
-        return ret;
-    }
-    // set AVCodec values with parsed data
-    avctx->sample_rate = s->sample_rate;
-
-    s->profile = FF_PROFILE_DTS;
-
-    for (i = 0; i < (s->sample_blocks / 8); i++) {
-        if ((ret = dca_decode_block(s, 0, i))) {
-            av_log(avctx, AV_LOG_ERROR, "error decoding block\n");
-            return ret;
-        }
-    }
-
-    /* record number of core channels incase less than max channels are requested */
-    num_core_channels = s->prim_channels;
-
-    if (s->prim_channels + !!s->lfe > 2 &&
-        avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
-            /* Stereo downmix coefficients
-             *
-             * The decoder can only downmix to 2-channel, so we need to ensure
-             * embedded downmix coefficients are actually targeting 2-channel.
-             */
-            if (s->core_downmix && (s->core_downmix_amode == DCA_STEREO ||
-                                    s->core_downmix_amode == DCA_STEREO_TOTAL)) {
-                for (i = 0; i < num_core_channels + !!s->lfe; i++) {
-                    /* Range checked earlier */
-                    s->downmix_coef[i][0] = dca_dmix_code(s->core_downmix_codes[i][0]);
-                    s->downmix_coef[i][1] = dca_dmix_code(s->core_downmix_codes[i][1]);
-                }
-                s->output = s->core_downmix_amode;
-            } else {
-                int am = s->amode & DCA_CHANNEL_MASK;
-                if (am >= FF_ARRAY_ELEMS(ff_dca_default_coeffs)) {
-                    av_log(s->avctx, AV_LOG_ERROR,
-                           "Invalid channel mode %d\n", am);
-                    return AVERROR_INVALIDDATA;
-                }
-                if (num_core_channels + !!s->lfe >
-                    FF_ARRAY_ELEMS(ff_dca_default_coeffs[0])) {
-                    avpriv_request_sample(s->avctx, "Downmixing %d channels",
-                                          s->prim_channels + !!s->lfe);
-                    return AVERROR_PATCHWELCOME;
-                }
-                for (i = 0; i < num_core_channels + !!s->lfe; i++) {
-                    s->downmix_coef[i][0] = ff_dca_default_coeffs[am][i][0];
-                    s->downmix_coef[i][1] = ff_dca_default_coeffs[am][i][1];
-                }
-            }
-            ff_dlog(s->avctx, "Stereo downmix coeffs:\n");
-            for (i = 0; i < num_core_channels + !!s->lfe; i++) {
-                ff_dlog(s->avctx, "L, input channel %d = %f\n", i,
-                        s->downmix_coef[i][0]);
-                ff_dlog(s->avctx, "R, input channel %d = %f\n", i,
-                        s->downmix_coef[i][1]);
-            }
-            ff_dlog(s->avctx, "\n");
-    }
-
-    if (s->ext_coding)
-        s->core_ext_mask = ff_dca_ext_audio_descr_mask[s->ext_descr];
-    else
-        s->core_ext_mask = 0;
-
-    core_ss_end = FFMIN(s->frame_size, s->dca_buffer_size) * 8;
-
-    /* only scan for extensions if ext_descr was unknown or indicated a
-     * supported XCh extension */
-    if (s->core_ext_mask < 0 || s->core_ext_mask & (DCA_EXT_XCH | DCA_EXT_XXCH)) {
-        /* if ext_descr was unknown, clear s->core_ext_mask so that the
-         * extensions scan can fill it up */
-        s->core_ext_mask = FFMAX(s->core_ext_mask, 0);
-
-        /* extensions start at 32-bit boundaries into bitstream */
-        skip_bits_long(&s->gb, (-get_bits_count(&s->gb)) & 31);
 
-        while (core_ss_end - get_bits_count(&s->gb) >= 32) {
-            uint32_t bits = get_bits_long(&s->gb, 32);
+    ff_dca_core_close(&s->core);
+    ff_dca_xll_close(&s->xll);
 
-            switch (bits) {
-            case DCA_SYNCWORD_XCH: {
-                int ext_amode, xch_fsize;
+    av_freep(&s->buffer);
+    s->buffer_size = 0;
 
-                s->xch_base_channel = s->prim_channels;
-
-                /* validate sync word using XCHFSIZE field */
-                xch_fsize = show_bits(&s->gb, 10);
-                if ((s->frame_size != (get_bits_count(&s->gb) >> 3) - 4 + xch_fsize) &&
-                    (s->frame_size != (get_bits_count(&s->gb) >> 3) - 4 + xch_fsize + 1))
-                    continue;
-
-                /* skip length-to-end-of-frame field for the moment */
-                skip_bits(&s->gb, 10);
-
-                s->core_ext_mask |= DCA_EXT_XCH;
-
-                /* extension amode(number of channels in extension) should be 1 */
-                /* AFAIK XCh is not used for more channels */
-                if ((ext_amode = get_bits(&s->gb, 4)) != 1) {
-                    av_log(avctx, AV_LOG_ERROR,
-                           "XCh extension amode %d not supported!\n",
-                           ext_amode);
-                    continue;
-                }
-
-                if (s->xch_base_channel < 2) {
-                    avpriv_request_sample(avctx, "XCh with fewer than 2 base channels");
-                    continue;
-                }
-
-                /* much like core primary audio coding header */
-                dca_parse_audio_coding_header(s, s->xch_base_channel, 0);
-
-                for (i = 0; i < (s->sample_blocks / 8); i++)
-                    if ((ret = dca_decode_block(s, s->xch_base_channel, i))) {
-                        av_log(avctx, AV_LOG_ERROR, "error decoding XCh extension\n");
-                        continue;
-                    }
-
-                s->xch_present = 1;
-                break;
-            }
-            case DCA_SYNCWORD_XXCH:
-                /* XXCh: extended channels */
-                /* usually found either in core or HD part in DTS-HD HRA streams,
-                 * but not in DTS-ES which contains XCh extensions instead */
-                s->core_ext_mask |= DCA_EXT_XXCH;
-                ff_dca_xxch_decode_frame(s);
-                break;
-
-            case 0x1d95f262: {
-                int fsize96 = show_bits(&s->gb, 12) + 1;
-                if (s->frame_size != (get_bits_count(&s->gb) >> 3) - 4 + fsize96)
-                    continue;
-
-                av_log(avctx, AV_LOG_DEBUG, "X96 extension found at %d bits\n",
-                       get_bits_count(&s->gb));
-                skip_bits(&s->gb, 12);
-                av_log(avctx, AV_LOG_DEBUG, "FSIZE96 = %d bytes\n", fsize96);
-                av_log(avctx, AV_LOG_DEBUG, "REVNO = %d\n", get_bits(&s->gb, 4));
-
-                s->core_ext_mask |= DCA_EXT_X96;
-                break;
-            }
-            }
-
-            skip_bits_long(&s->gb, (-get_bits_count(&s->gb)) & 31);
-        }
-    } else {
-        /* no supported extensions, skip the rest of the core substream */
-        skip_bits_long(&s->gb, core_ss_end - get_bits_count(&s->gb));
-    }
-
-    if (s->core_ext_mask & DCA_EXT_X96)
-        s->profile = FF_PROFILE_DTS_96_24;
-    else if (s->core_ext_mask & (DCA_EXT_XCH | DCA_EXT_XXCH))
-        s->profile = FF_PROFILE_DTS_ES;
-
-    /* check for ExSS (HD part) */
-    if (s->dca_buffer_size - s->frame_size > 32 &&
-        get_bits_long(&s->gb, 32) == DCA_SYNCWORD_SUBSTREAM)
-        ff_dca_exss_parse_header(s);
-
-    avctx->profile = s->profile;
-
-    full_channels = channels = s->prim_channels + !!s->lfe;
-
-    /* If we have XXCH then the channel layout is managed differently */
-    /* note that XLL will also have another way to do things */
-    if (!(s->core_ext_mask & DCA_EXT_XXCH)
-        || (s->core_ext_mask & DCA_EXT_XXCH && avctx->request_channels > 0
-            && avctx->request_channels
-            < num_core_channels + !!s->lfe + s->xxch_chset_nch[0]))
-    { /* xxx should also do MA extensions */
-        if (s->amode < 16) {
-            avctx->channel_layout = ff_dca_core_channel_layout[s->amode];
-
-            if (s->prim_channels + !!s->lfe > 2 &&
-                avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
-                /*
-                 * Neither the core's auxiliary data nor our default tables contain
-                 * downmix coefficients for the additional channel coded in the XCh
-                 * extension, so when we're doing a Stereo downmix, don't decode it.
-                 */
-                s->xch_disable = 1;
-            }
-
-#if FF_API_REQUEST_CHANNELS
-FF_DISABLE_DEPRECATION_WARNINGS
-            if (s->xch_present && !s->xch_disable &&
-                (!avctx->request_channels ||
-                 avctx->request_channels > num_core_channels + !!s->lfe)) {
-FF_ENABLE_DEPRECATION_WARNINGS
-#else
-            if (s->xch_present && !s->xch_disable) {
-#endif
-                if (avctx->channel_layout & AV_CH_BACK_CENTER) {
-                    avpriv_request_sample(avctx, "XCh with Back center channel");
-                    return AVERROR_INVALIDDATA;
-                }
-                avctx->channel_layout |= AV_CH_BACK_CENTER;
-                if (s->lfe) {
-                    avctx->channel_layout |= AV_CH_LOW_FREQUENCY;
-                    s->channel_order_tab = ff_dca_channel_reorder_lfe_xch[s->amode];
-                } else {
-                    s->channel_order_tab = ff_dca_channel_reorder_nolfe_xch[s->amode];
-                }
-                if (s->channel_order_tab[s->xch_base_channel] < 0)
-                    return AVERROR_INVALIDDATA;
-            } else {
-                channels       = num_core_channels + !!s->lfe;
-                s->xch_present = 0; /* disable further xch processing */
-                if (s->lfe) {
-                    avctx->channel_layout |= AV_CH_LOW_FREQUENCY;
-                    s->channel_order_tab = ff_dca_channel_reorder_lfe[s->amode];
-                } else
-                    s->channel_order_tab = ff_dca_channel_reorder_nolfe[s->amode];
-            }
-
-            if (channels > !!s->lfe &&
-                s->channel_order_tab[channels - 1 - !!s->lfe] < 0)
-                return AVERROR_INVALIDDATA;
-
-            if (av_get_channel_layout_nb_channels(avctx->channel_layout) != channels) {
-                av_log(avctx, AV_LOG_ERROR, "Number of channels %d mismatches layout %d\n", channels, av_get_channel_layout_nb_channels(avctx->channel_layout));
-                return AVERROR_INVALIDDATA;
-            }
-
-            if (num_core_channels + !!s->lfe > 2 &&
-                avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
-                channels              = 2;
-                s->output             = s->prim_channels == 2 ? s->amode : DCA_STEREO;
-                avctx->channel_layout = AV_CH_LAYOUT_STEREO;
-            }
-            else if (avctx->request_channel_layout & AV_CH_LAYOUT_NATIVE) {
-                static const int8_t dca_channel_order_native[9] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
-                s->channel_order_tab = dca_channel_order_native;
-            }
-            s->lfe_index = ff_dca_lfe_index[s->amode];
-        } else {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Non standard configuration %d !\n", s->amode);
-            return AVERROR_INVALIDDATA;
-        }
-
-        s->xxch_dmix_embedded = 0;
-    } else {
-        /* we only get here if an XXCH channel set can be added to the mix */
-        channel_mask = s->xxch_core_spkmask;
-
-        if (avctx->request_channels > 0
-            && avctx->request_channels < s->prim_channels) {
-            channels = num_core_channels + !!s->lfe;
-            for (i = 0; i < s->xxch_chset && channels + s->xxch_chset_nch[i]
-                                              <= avctx->request_channels; i++) {
-                channels += s->xxch_chset_nch[i];
-                channel_mask |= s->xxch_spk_masks[i];
-            }
-        } else {
-            channels = s->prim_channels + !!s->lfe;
-            for (i = 0; i < s->xxch_chset; i++) {
-                channel_mask |= s->xxch_spk_masks[i];
-            }
-        }
-
-        /* Given the DTS spec'ed channel mask, generate an avcodec version */
-        channel_layout = 0;
-        for (i = 0; i < s->xxch_nbits_spk_mask; ++i) {
-            if (channel_mask & (1 << i)) {
-                channel_layout |= ff_dca_map_xxch_to_native[i];
-            }
-        }
-
-        /* make sure that we have managed to get equivalent dts/avcodec channel
-         * masks in some sense -- unfortunately some channels could overlap */
-        if (av_popcount(channel_mask) != av_popcount(channel_layout)) {
-            av_log(avctx, AV_LOG_DEBUG,
-                   "DTS-XXCH: Inconsistent avcodec/dts channel layouts\n");
-            return AVERROR_INVALIDDATA;
-        }
-
-        avctx->channel_layout = channel_layout;
-
-        if (!(avctx->request_channel_layout & AV_CH_LAYOUT_NATIVE)) {
-            /* Estimate DTS --> avcodec ordering table */
-            for (chset = -1, j = 0; chset < s->xxch_chset; ++chset) {
-                mask = chset >= 0 ? s->xxch_spk_masks[chset]
-                                  : s->xxch_core_spkmask;
-                for (i = 0; i < s->xxch_nbits_spk_mask; i++) {
-                    if (mask & ~(DCA_XXCH_LFE1 | DCA_XXCH_LFE2) & (1 << i)) {
-                        lavc = ff_dca_map_xxch_to_native[i];
-                        posn = av_popcount(channel_layout & (lavc - 1));
-                        s->xxch_order_tab[j++] = posn;
-                    }
-                }
-
-            }
-
-            s->lfe_index = av_popcount(channel_layout & (AV_CH_LOW_FREQUENCY-1));
-        } else { /* native ordering */
-            for (i = 0; i < channels; i++)
-                s->xxch_order_tab[i] = i;
-
-            s->lfe_index = channels - 1;
-        }
-
-        s->channel_order_tab = s->xxch_order_tab;
-    }
-
-    /* get output buffer */
-    frame->nb_samples = 256 * (s->sample_blocks / 8);
-    if (s->exss_ext_mask & DCA_EXT_EXSS_XLL) {
-        int xll_nb_samples = s->xll_segments * s->xll_smpl_in_seg;
-        /* Check for invalid/unsupported conditions first */
-        if (s->xll_residual_channels > channels) {
-            av_log(s->avctx, AV_LOG_WARNING,
-                   "DCA: too many residual channels (%d, core channels %d). Disabling XLL\n",
-                   s->xll_residual_channels, channels);
-            s->exss_ext_mask &= ~DCA_EXT_EXSS_XLL;
-        } else if (xll_nb_samples != frame->nb_samples &&
-                   2 * frame->nb_samples != xll_nb_samples) {
-            av_log(s->avctx, AV_LOG_WARNING,
-                   "DCA: unsupported upsampling (%d XLL samples, %d core samples). Disabling XLL\n",
-                   xll_nb_samples, frame->nb_samples);
-            s->exss_ext_mask &= ~DCA_EXT_EXSS_XLL;
-        } else {
-            if (2 * frame->nb_samples == xll_nb_samples) {
-                av_log(s->avctx, AV_LOG_INFO,
-                       "XLL: upsampling core channels by a factor of 2\n");
-                upsample = 1;
-
-                frame->nb_samples = xll_nb_samples;
-                // FIXME: Is it good enough to copy from the first channel set?
-                avctx->sample_rate = s->xll_chsets[0].sampling_frequency;
-            }
-            /* If downmixing to stereo, don't decode additional channels.
-             * FIXME: Using the xch_disable flag for this doesn't seem right. */
-            if (!s->xch_disable)
-                channels = s->xll_channels;
-        }
-    }
-
-    if (avctx->channels != channels) {
-        if (avctx->channels)
-            av_log(avctx, AV_LOG_INFO, "Number of channels changed in DCA decoder (%d -> %d)\n", avctx->channels, channels);
-        avctx->channels = channels;
-    }
-
-    /* FIXME: This is an ugly hack, to just revert to the default
-     * layout if we have additional channels. Need to convert the XLL
-     * channel masks to ffmpeg channel_layout mask. */
-    if (av_get_channel_layout_nb_channels(avctx->channel_layout) != avctx->channels)
-        avctx->channel_layout = 0;
-
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
-        return ret;
-    samples_flt = (float **) frame->extended_data;
-
-    /* allocate buffer for extra channels if downmixing */
-    if (avctx->channels < full_channels) {
-        ret = av_samples_get_buffer_size(NULL, full_channels - channels,
-                                         frame->nb_samples,
-                                         avctx->sample_fmt, 0);
-        if (ret < 0)
-            return ret;
-
-        av_fast_malloc(&s->extra_channels_buffer,
-                       &s->extra_channels_buffer_size, ret);
-        if (!s->extra_channels_buffer)
-            return AVERROR(ENOMEM);
-
-        ret = av_samples_fill_arrays((uint8_t **) s->extra_channels, NULL,
-                                     s->extra_channels_buffer,
-                                     full_channels - channels,
-                                     frame->nb_samples, avctx->sample_fmt, 0);
-        if (ret < 0)
-            return ret;
-    }
-
-    /* filter to get final output */
-    for (i = 0; i < (s->sample_blocks / 8); i++) {
-        int ch;
-        unsigned block = upsample ? 512 : 256;
-        for (ch = 0; ch < channels; ch++)
-            s->samples_chanptr[ch] = samples_flt[ch] + i * block;
-        for (; ch < full_channels; ch++)
-            s->samples_chanptr[ch] = s->extra_channels[ch - channels] + i * block;
-
-        dca_filter_channels(s, i, upsample);
-
-        /* If this was marked as a DTS-ES stream we need to subtract back- */
-        /* channel from SL & SR to remove matrixed back-channel signal */
-        if ((s->source_pcm_res & 1) && s->xch_present) {
-            float *back_chan = s->samples_chanptr[s->channel_order_tab[s->xch_base_channel]];
-            float *lt_chan   = s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 2]];
-            float *rt_chan   = s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 1]];
-            s->fdsp->vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
-            s->fdsp->vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
-        }
-
-        /* If stream contains XXCH, we might need to undo an embedded downmix */
-        if (s->xxch_dmix_embedded) {
-            /* Loop over channel sets in turn */
-            ch = num_core_channels;
-            for (chset = 0; chset < s->xxch_chset; chset++) {
-                endch = ch + s->xxch_chset_nch[chset];
-                mask = s->xxch_dmix_embedded;
-
-                /* undo downmix */
-                for (j = ch; j < endch; j++) {
-                    if (mask & (1 << j)) { /* this channel has been mixed-out */
-                        src_chan = s->samples_chanptr[s->channel_order_tab[j]];
-                        for (k = 0; k < endch; k++) {
-                            achan = s->channel_order_tab[k];
-                            scale = s->xxch_dmix_coeff[j][k];
-                            if (scale != 0.0) {
-                                dst_chan = s->samples_chanptr[achan];
-                                s->fdsp->vector_fmac_scalar(dst_chan, src_chan,
-                                                           -scale, 256);
-                            }
-                        }
-                    }
-                }
-
-                /* if a downmix has been embedded then undo the pre-scaling */
-                if ((mask & (1 << ch)) && s->xxch_dmix_sf[chset] != 1.0f) {
-                    scale = s->xxch_dmix_sf[chset];
-
-                    for (j = 0; j < ch; j++) {
-                        src_chan = s->samples_chanptr[s->channel_order_tab[j]];
-                        for (k = 0; k < 256; k++)
-                            src_chan[k] *= scale;
-                    }
-
-                    /* LFE channel is always part of core, scale if it exists */
-                    if (s->lfe) {
-                        src_chan = s->samples_chanptr[s->lfe_index];
-                        for (k = 0; k < 256; k++)
-                            src_chan[k] *= scale;
-                    }
-                }
-
-                ch = endch;
-            }
-
-        }
-    }
-
-    /* update lfe history */
-    lfe_samples = 2 * s->lfe * (s->sample_blocks / 8);
-    for (i = 0; i < 2 * s->lfe * 4; i++)
-        s->lfe_data[i] = s->lfe_data[i + lfe_samples];
-
-    if (s->exss_ext_mask & DCA_EXT_EXSS_XLL) {
-        ret = ff_dca_xll_decode_audio(s, frame);
-        if (ret < 0)
-            return ret;
-    }
-    /* AVMatrixEncoding
-     *
-     * DCA_STEREO_TOTAL (Lt/Rt) is equivalent to Dolby Surround */
-    ret = ff_side_data_update_matrix_encoding(frame,
-                                              (s->output & ~DCA_LFE) == DCA_STEREO_TOTAL ?
-                                              AV_MATRIX_ENCODING_DOLBY : AV_MATRIX_ENCODING_NONE);
-    if (ret < 0)
-        return ret;
-
-    if (   avctx->profile != FF_PROFILE_DTS_HD_MA
-        && avctx->profile != FF_PROFILE_DTS_HD_HRA)
-        avctx->bit_rate = s->bit_rate;
-    *got_frame_ptr = 1;
-
-    return buf_size;
+    return 0;
 }
 
-/**
- * DCA initialization
- *
- * @param avctx     pointer to the AVCodecContext
- */
-
-static av_cold int dca_decode_init(AVCodecContext *avctx)
+static av_cold int dcadec_init(AVCodecContext *avctx)
 {
     DCAContext *s = avctx->priv_data;
 
     s->avctx = avctx;
-    dca_init_vlcs();
+    s->core.avctx = avctx;
+    s->exss.avctx = avctx;
+    s->xll.avctx = avctx;
 
-    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
-    if (!s->fdsp)
+    if (ff_dca_core_init(&s->core) < 0)
         return AVERROR(ENOMEM);
 
-    ff_mdct_init(&s->imdct, 6, 1, 1.0);
-    ff_synth_filter_init(&s->synth);
     ff_dcadsp_init(&s->dcadsp);
-    ff_fmt_convert_init(&s->fmt_conv, avctx);
+    s->core.dcadsp = &s->dcadsp;
+    s->xll.dcadsp = &s->dcadsp;
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
-
-    /* allow downmixing to stereo */
-#if FF_API_REQUEST_CHANNELS
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->request_channels == 2)
-        avctx->request_channel_layout = AV_CH_LAYOUT_STEREO;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-    if (avctx->channels > 2 &&
-        avctx->request_channel_layout == AV_CH_LAYOUT_STEREO)
-        avctx->channels = 2;
+    switch (avctx->request_channel_layout & ~AV_CH_LAYOUT_NATIVE) {
+    case 0:
+        s->request_channel_layout = 0;
+        break;
+    case AV_CH_LAYOUT_STEREO:
+    case AV_CH_LAYOUT_STEREO_DOWNMIX:
+        s->request_channel_layout = DCA_SPEAKER_LAYOUT_STEREO;
+        break;
+    case AV_CH_LAYOUT_5POINT0:
+        s->request_channel_layout = DCA_SPEAKER_LAYOUT_5POINT0;
+        break;
+    case AV_CH_LAYOUT_5POINT1:
+        s->request_channel_layout = DCA_SPEAKER_LAYOUT_5POINT1;
+        break;
+    default:
+        av_log(avctx, AV_LOG_WARNING, "Invalid request_channel_layout\n");
+        break;
+    }
 
-    return 0;
-}
+    avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
+    avctx->bits_per_raw_sample = 24;
 
-static av_cold int dca_decode_end(AVCodecContext *avctx)
-{
-    DCAContext *s = avctx->priv_data;
-    ff_mdct_end(&s->imdct);
-    av_freep(&s->extra_channels_buffer);
-    av_freep(&s->fdsp);
-    av_freep(&s->xll_sample_buf);
-    av_freep(&s->qmf64_table);
     return 0;
 }
 
-static const AVProfile profiles[] = {
-    { FF_PROFILE_DTS,        "DTS"        },
-    { FF_PROFILE_DTS_ES,     "DTS-ES"     },
-    { FF_PROFILE_DTS_96_24,  "DTS 96/24"  },
-    { FF_PROFILE_DTS_HD_HRA, "DTS-HD HRA" },
-    { FF_PROFILE_DTS_HD_MA,  "DTS-HD MA"  },
-    { FF_PROFILE_UNKNOWN },
-};
+#define OFFSET(x) offsetof(DCAContext, x)
+#define PARAM AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 
-static const AVOption options[] = {
-    { "disable_xch", "disable decoding of the XCh extension", offsetof(DCAContext, xch_disable), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
-    { "disable_xll", "disable decoding of the XLL extension", offsetof(DCAContext, xll_disable), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
-    { NULL },
+static const AVOption dcadec_options[] = {
+    { "core_only", "Decode core only without extensions", OFFSET(core_only), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, PARAM },
+    { NULL }
 };
 
-static const AVClass dca_decoder_class = {
+static const AVClass dcadec_class = {
     .class_name = "DCA decoder",
     .item_name  = av_default_item_name,
-    .option     = options,
+    .option     = dcadec_options,
     .version    = LIBAVUTIL_VERSION_INT,
     .category   = AV_CLASS_CATEGORY_DECODER,
 };
 
 AVCodec ff_dca_decoder = {
-    .name            = "dca",
-    .long_name       = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),
-    .type            = AVMEDIA_TYPE_AUDIO,
-    .id              = AV_CODEC_ID_DTS,
-    .priv_data_size  = sizeof(DCAContext),
-    .init            = dca_decode_init,
-    .decode          = dca_decode_frame,
-    .close           = dca_decode_end,
-    .capabilities    = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
-    .sample_fmts     = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
-                                                       AV_SAMPLE_FMT_NONE },
-    .profiles        = NULL_IF_CONFIG_SMALL(profiles),
-    .priv_class      = &dca_decoder_class,
+    .name           = "dca",
+    .long_name      = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_DTS,
+    .priv_data_size = sizeof(DCAContext),
+    .init           = dcadec_init,
+    .decode         = dcadec_decode_frame,
+    .close          = dcadec_close,
+    .flush          = dcadec_flush,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S32P,
+                                                      AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE },
+    .priv_class     = &dcadec_class,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_dca_profiles),
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/dcadec.h b/libavcodec/dcadec.h
new file mode 100644
index 00000000..6726121d
--- /dev/null
+++ b/libavcodec/dcadec.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2016 foo86
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DCADEC_H
+#define AVCODEC_DCADEC_H
+
+#include "libavutil/common.h"
+#include "libavutil/float_dsp.h"
+
+#include "avcodec.h"
+#include "get_bits.h"
+#include "dca.h"
+#include "dcadsp.h"
+#include "dca_core.h"
+#include "dca_exss.h"
+#include "dca_xll.h"
+
+#define DCA_BUFFER_PADDING_SIZE     1024
+
+#define DCA_PACKET_CORE         0x01
+#define DCA_PACKET_EXSS         0x02
+#define DCA_PACKET_XLL          0x04
+#define DCA_PACKET_RECOVERY     0x08
+
+typedef struct DCAContext {
+    const AVClass   *class;       ///< class for AVOptions
+    AVCodecContext  *avctx;
+
+    DCACoreDecoder core;  ///< Core decoder context
+    DCAExssParser  exss;  ///< EXSS parser context
+    DCAXllDecoder  xll;   ///< XLL decoder context
+
+    DCADSPContext   dcadsp;
+
+    uint8_t         *buffer;    ///< Packet buffer
+    unsigned int    buffer_size;
+
+    int     packet; ///< Packet flags
+
+    int     core_residual_valid;    ///< Core valid for residual decoding
+
+    int     request_channel_layout; ///< Converted from avctx.request_channel_layout
+    int     core_only;              ///< Core only decoding flag
+} DCAContext;
+
+int ff_dca_set_channel_layout(AVCodecContext *avctx, int *ch_remap, int dca_mask);
+
+int ff_dca_check_crc(GetBitContext *s, int p1, int p2);
+
+void ff_dca_downmix_to_stereo_fixed(DCADSPContext *dcadsp, int32_t **samples,
+                                    int *coeff_l, int nsamples, int ch_mask);
+void ff_dca_downmix_to_stereo_float(AVFloatDSPContext *fdsp, float **samples,
+                                    int *coeff_l, int nsamples, int ch_mask);
+
+static inline int ff_dca_seek_bits(GetBitContext *s, int p)
+{
+    if (p < s->index || p > s->size_in_bits)
+        return -1;
+    s->index = p;
+    return 0;
+}
+
+#endif
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
index b32d962c..09faee51 100644
--- a/libavcodec/dcadsp.c
+++ b/libavcodec/dcadsp.c
@@ -1,6 +1,5 @@
 /*
- * Copyright (c) 2004 Gildas Bazin
- * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
+ * Copyright (C) 2016 foo86
  *
  * This file is part of FFmpeg.
  *
@@ -19,99 +18,399 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "config.h"
-
-#include "libavutil/attributes.h"
-#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
 
 #include "dcadsp.h"
+#include "dcamath.h"
+
+static void decode_hf_c(int32_t **dst,
+                        const int32_t *vq_index,
+                        const int8_t hf_vq[1024][32],
+                        int32_t scale_factors[32][2],
+                        ptrdiff_t sb_start, ptrdiff_t sb_end,
+                        ptrdiff_t ofs, ptrdiff_t len)
+{
+    int i, j;
+
+    for (i = sb_start; i < sb_end; i++) {
+        const int8_t *coeff = hf_vq[vq_index[i]];
+        int32_t scale = scale_factors[i][0];
+        for (j = 0; j < len; j++)
+            dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
+    }
+}
+
+static void decode_joint_c(int32_t **dst, int32_t **src,
+                           const int32_t *scale_factors,
+                           ptrdiff_t sb_start, ptrdiff_t sb_end,
+                           ptrdiff_t ofs, ptrdiff_t len)
+{
+    int i, j;
 
-static void decode_hf_c(float dst[DCA_SUBBANDS][8],
-                        const int32_t vq_num[DCA_SUBBANDS],
-                        const int8_t hf_vq[1024][32], intptr_t vq_offset,
-                        int32_t scale[DCA_SUBBANDS][2],
-                        intptr_t start, intptr_t end)
-{
-    int i, l;
-
-    for (l = start; l < end; l++) {
-        /* 1 vector -> 32 samples but we only need the 8 samples
-         * for this subsubframe. */
-        const int8_t *ptr = &hf_vq[vq_num[l]][vq_offset];
-        float fscale = scale[l][0] * (1 / 16.0);
-        for (i = 0; i < 8; i++)
-            dst[l][i] = ptr[i] * fscale;
+    for (i = sb_start; i < sb_end; i++) {
+        int32_t scale = scale_factors[i];
+        for (j = 0; j < len; j++)
+            dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
     }
 }
 
-static inline void dca_lfe_fir(float *out, const float *in, const float *coefs,
-                               int decifactor)
+static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
+                            const float *filter_coeff, ptrdiff_t npcmblocks,
+                            int dec_select)
 {
-    float *out2    = out + 2 * decifactor - 1;
-    int num_coeffs = 256 / decifactor;
-    int j, k;
+    // Select decimation factor
+    int factor = 64 << dec_select;
+    int ncoeffs = 8 >> dec_select;
+    int nlfesamples = npcmblocks >> (dec_select + 1);
+    int i, j, k;
+
+    for (i = 0; i < nlfesamples; i++) {
+        // One decimated sample generates 64 or 128 interpolated ones
+        for (j = 0; j < factor / 2; j++) {
+            float a = 0;
+            float b = 0;
 
-    /* One decimated sample generates 2*decifactor interpolated ones */
-    for (k = 0; k < decifactor; k++) {
-        float v0 = 0.0;
-        float v1 = 0.0;
-        for (j = 0; j < num_coeffs; j++, coefs++) {
-            v0 += in[-j]                 * *coefs;
-            v1 += in[j + 1 - num_coeffs] * *coefs;
+            for (k = 0; k < ncoeffs; k++) {
+                a += filter_coeff[      j * ncoeffs + k] * lfe_samples[-k];
+                b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
+            }
+
+            pcm_samples[             j] = a;
+            pcm_samples[factor / 2 + j] = b;
         }
-        *out++  = v0;
-        *out2-- = v1;
+
+        lfe_samples++;
+        pcm_samples += factor;
     }
 }
 
-static void dca_qmf_32_subbands(float samples_in[32][8], int sb_act,
-                                SynthFilterContext *synth, FFTContext *imdct,
-                                float synth_buf_ptr[512],
-                                int *synth_buf_offset, float synth_buf2[32],
-                                const float window[512], float *samples_out,
-                                float raXin[32], float scale)
+static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
+                             const float *filter_coeff, ptrdiff_t npcmblocks)
+{
+    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
+}
+
+static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
+                             const float *filter_coeff, ptrdiff_t npcmblocks)
 {
+    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
+}
+
+static void lfe_x96_float_c(float *dst, const float *src,
+                            float *hist, ptrdiff_t len)
+{
+    float prev = *hist;
     int i;
-    int subindex;
-
-    for (i = sb_act; i < 32; i++)
-        raXin[i] = 0.0;
-
-    /* Reconstructed channel sample index */
-    for (subindex = 0; subindex < 8; subindex++) {
-        /* Load in one sample from each subband and clear inactive subbands */
-        for (i = 0; i < sb_act; i++) {
-            unsigned sign = (i - 1) & 2;
-            uint32_t v    = AV_RN32A(&samples_in[i][subindex]) ^ sign << 30;
-            AV_WN32A(&raXin[i], v);
+
+    for (i = 0; i < len; i++) {
+        float a = 0.25f * src[i] + 0.75f * prev;
+        float b = 0.75f * src[i] + 0.25f * prev;
+        prev = src[i];
+        *dst++ = a;
+        *dst++ = b;
+    }
+
+    *hist = prev;
+}
+
+static void sub_qmf32_float_c(SynthFilterContext *synth,
+                              FFTContext *imdct,
+                              float *pcm_samples,
+                              int32_t **subband_samples_lo,
+                              int32_t **subband_samples_hi,
+                              float *hist1, int *offset, float *hist2,
+                              const float *filter_coeff, ptrdiff_t npcmblocks,
+                              float scale)
+{
+    LOCAL_ALIGNED(32, float, input, [32]);
+    int i, j;
+
+    for (j = 0; j < npcmblocks; j++) {
+        // Load in one sample from each subband
+        for (i = 0; i < 32; i++) {
+            if ((i - 1) & 2)
+                input[i] = -subband_samples_lo[i][j];
+            else
+                input[i] =  subband_samples_lo[i][j];
+        }
+
+        // One subband sample generates 32 interpolated ones
+        synth->synth_filter_float(imdct, hist1, offset,
+                                  hist2, filter_coeff,
+                                  pcm_samples, input, scale);
+        pcm_samples += 32;
+    }
+}
+
+static void sub_qmf64_float_c(SynthFilterContext *synth,
+                              FFTContext *imdct,
+                              float *pcm_samples,
+                              int32_t **subband_samples_lo,
+                              int32_t **subband_samples_hi,
+                              float *hist1, int *offset, float *hist2,
+                              const float *filter_coeff, ptrdiff_t npcmblocks,
+                              float scale)
+{
+    LOCAL_ALIGNED(32, float, input, [64]);
+    int i, j;
+
+    if (!subband_samples_hi)
+        memset(&input[32], 0, sizeof(input[0]) * 32);
+
+    for (j = 0; j < npcmblocks; j++) {
+        // Load in one sample from each subband
+        if (subband_samples_hi) {
+            // Full 64 subbands, first 32 are residual coded
+            for (i =  0; i < 32; i++) {
+                if ((i - 1) & 2)
+                    input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
+                else
+                    input[i] =  subband_samples_lo[i][j] + subband_samples_hi[i][j];
+            }
+            for (i = 32; i < 64; i++) {
+                if ((i - 1) & 2)
+                    input[i] = -subband_samples_hi[i][j];
+                else
+                    input[i] =  subband_samples_hi[i][j];
+            }
+        } else {
+            // Only first 32 subbands
+            for (i =  0; i < 32; i++) {
+                if ((i - 1) & 2)
+                    input[i] = -subband_samples_lo[i][j];
+                else
+                    input[i] =  subband_samples_lo[i][j];
+            }
+        }
+
+        // One subband sample generates 64 interpolated ones
+        synth->synth_filter_float_64(imdct, hist1, offset,
+                                     hist2, filter_coeff,
+                                     pcm_samples, input, scale);
+        pcm_samples += 64;
+    }
+}
+
+static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
+                            const int32_t *filter_coeff, ptrdiff_t npcmblocks)
+{
+    // Select decimation factor
+    int nlfesamples = npcmblocks >> 1;
+    int i, j, k;
+
+    for (i = 0; i < nlfesamples; i++) {
+        // One decimated sample generates 64 interpolated ones
+        for (j = 0; j < 32; j++) {
+            int64_t a = 0;
+            int64_t b = 0;
+
+            for (k = 0; k < 8; k++) {
+                a += (int64_t)filter_coeff[      j * 8 + k] * lfe_samples[-k];
+                b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
+            }
+
+            pcm_samples[     j] = clip23(norm23(a));
+            pcm_samples[32 + j] = clip23(norm23(b));
         }
 
-        synth->synth_filter_float(imdct, synth_buf_ptr, synth_buf_offset,
-                                  synth_buf2, window, samples_out, raXin,
-                                  scale);
-        samples_out += 32;
+        lfe_samples++;
+        pcm_samples += 64;
+    }
+}
+
+static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
+                            int32_t *hist, ptrdiff_t len)
+{
+    int32_t prev = *hist;
+    int i;
+
+    for (i = 0; i < len; i++) {
+        int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
+        int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
+        prev = src[i];
+        *dst++ = clip23(norm23(a));
+        *dst++ = clip23(norm23(b));
     }
+
+    *hist = prev;
 }
 
-static void dca_lfe_fir0_c(float *out, const float *in, const float *coefs)
+static void sub_qmf32_fixed_c(SynthFilterContext *synth,
+                              DCADCTContext *imdct,
+                              int32_t *pcm_samples,
+                              int32_t **subband_samples_lo,
+                              int32_t **subband_samples_hi,
+                              int32_t *hist1, int *offset, int32_t *hist2,
+                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
 {
-    dca_lfe_fir(out, in, coefs, 32);
+    LOCAL_ALIGNED(32, int32_t, input, [32]);
+    int i, j;
+
+    for (j = 0; j < npcmblocks; j++) {
+        // Load in one sample from each subband
+        for (i = 0; i < 32; i++)
+            input[i] = subband_samples_lo[i][j];
+
+        // One subband sample generates 32 interpolated ones
+        synth->synth_filter_fixed(imdct, hist1, offset,
+                                  hist2, filter_coeff,
+                                  pcm_samples, input);
+        pcm_samples += 32;
+    }
 }
 
-static void dca_lfe_fir1_c(float *out, const float *in, const float *coefs)
+static void sub_qmf64_fixed_c(SynthFilterContext *synth,
+                              DCADCTContext *imdct,
+                              int32_t *pcm_samples,
+                              int32_t **subband_samples_lo,
+                              int32_t **subband_samples_hi,
+                              int32_t *hist1, int *offset, int32_t *hist2,
+                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
 {
-    dca_lfe_fir(out, in, coefs, 64);
+    LOCAL_ALIGNED(32, int32_t, input, [64]);
+    int i, j;
+
+    if (!subband_samples_hi)
+        memset(&input[32], 0, sizeof(input[0]) * 32);
+
+    for (j = 0; j < npcmblocks; j++) {
+        // Load in one sample from each subband
+        if (subband_samples_hi) {
+            // Full 64 subbands, first 32 are residual coded
+            for (i =  0; i < 32; i++)
+                input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
+            for (i = 32; i < 64; i++)
+                input[i] = subband_samples_hi[i][j];
+        } else {
+            // Only first 32 subbands
+            for (i =  0; i < 32; i++)
+                input[i] = subband_samples_lo[i][j];
+        }
+
+        // One subband sample generates 64 interpolated ones
+        synth->synth_filter_fixed_64(imdct, hist1, offset,
+                                     hist2, filter_coeff,
+                                     pcm_samples, input);
+        pcm_samples += 64;
+    }
+}
+
+static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        dst[i] += src[i] * coeff + (1 << 2) >> 3;
+}
+
+static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
+                           const int32_t *src, ptrdiff_t len)
+{
+    int i;
+
+    for (i = 0; i < len; i++) {
+        int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
+        dst1[i] -= cs;
+        dst2[i] -= cs;
+    }
+}
+
+static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        dst[i] -= mul15(src[i], coeff);
+}
+
+static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        dst[i] += mul15(src[i], coeff);
+}
+
+static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        dst[i] = mul15(dst[i], scale);
+}
+
+static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        dst[i] = mul16(dst[i], scale_inv);
+}
+
+static void filter0(int32_t *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        dst[i] -= mul22(src[i], coeff);
+}
+
+static void filter1(int32_t *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        dst[i] -= mul23(src[i], coeff);
+}
+
+static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
+                                  const int32_t *coeff, ptrdiff_t len)
+{
+    int i;
+
+    filter0(src0, src1, coeff[0], len);
+    filter0(src1, src0, coeff[1], len);
+    filter0(src0, src1, coeff[2], len);
+    filter0(src1, src0, coeff[3], len);
+
+    for (i = 0; i < 8; i++, src0--) {
+        filter1(src0, src1, coeff[i +  4], len);
+        filter1(src1, src0, coeff[i + 12], len);
+        filter1(src0, src1, coeff[i +  4], len);
+    }
+
+    for (i = 0; i < len; i++) {
+        *dst++ = *src1++;
+        *dst++ = *++src0;
+    }
 }
 
 av_cold void ff_dcadsp_init(DCADSPContext *s)
 {
-    s->lfe_fir[0]      = dca_lfe_fir0_c;
-    s->lfe_fir[1]      = dca_lfe_fir1_c;
-    s->qmf_32_subbands = dca_qmf_32_subbands;
-    s->decode_hf       = decode_hf_c;
+    s->decode_hf     = decode_hf_c;
+    s->decode_joint  = decode_joint_c;
+
+    s->lfe_fir_float[0] = lfe_fir0_float_c;
+    s->lfe_fir_float[1] = lfe_fir1_float_c;
+    s->lfe_x96_float    = lfe_x96_float_c;
+    s->sub_qmf_float[0] = sub_qmf32_float_c;
+    s->sub_qmf_float[1] = sub_qmf64_float_c;
+
+    s->lfe_fir_fixed    = lfe_fir_fixed_c;
+    s->lfe_x96_fixed    = lfe_x96_fixed_c;
+    s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
+    s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
+
+    s->decor   = decor_c;
+
+    s->dmix_sub_xch   = dmix_sub_xch_c;
+    s->dmix_sub       = dmix_sub_c;
+    s->dmix_add       = dmix_add_c;
+    s->dmix_scale     = dmix_scale_c;
+    s->dmix_scale_inv = dmix_scale_inv_c;
+
+    s->assemble_freq_bands = assemble_freq_bands_c;
 
-    if (ARCH_ARM)
-        ff_dcadsp_init_arm(s);
     if (ARCH_X86)
         ff_dcadsp_init_x86(s);
 }
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
index abf577b6..c82b7b10 100644
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2016 foo86
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -19,28 +21,72 @@
 #ifndef AVCODEC_DCADSP_H
 #define AVCODEC_DCADSP_H
 
-#include "avfft.h"
-#include "synth_filter.h"
+#include "libavutil/common.h"
 
-#define DCA_SUBBANDS 64
+#include "fft.h"
+#include "dcadct.h"
+#include "synth_filter.h"
 
 typedef struct DCADSPContext {
-    void (*lfe_fir[2])(float *out, const float *in, const float *coefs);
-    void (*qmf_32_subbands)(float samples_in[32][8], int sb_act,
-                            SynthFilterContext *synth, FFTContext *imdct,
-                            float synth_buf_ptr[512],
-                            int *synth_buf_offset, float synth_buf2[32],
-                            const float window[512], float *samples_out,
-                            float raXin[32], float scale);
-    void (*decode_hf)(float dst[DCA_SUBBANDS][8],
-                      const int32_t vq_num[DCA_SUBBANDS],
-                      const int8_t hf_vq[1024][32], intptr_t vq_offset,
-                      int32_t scale[DCA_SUBBANDS][2],
-                      intptr_t start, intptr_t end);
+    void (*decode_hf)(int32_t **dst,
+                      const int32_t *vq_index,
+                      const int8_t hf_vq[1024][32],
+                      int32_t scale_factors[32][2],
+                      ptrdiff_t sb_start, ptrdiff_t sb_end,
+                      ptrdiff_t ofs, ptrdiff_t len);
+
+    void (*decode_joint)(int32_t **dst, int32_t **src,
+                         const int32_t *scale_factors,
+                         ptrdiff_t sb_start, ptrdiff_t sb_end,
+                         ptrdiff_t ofs, ptrdiff_t len);
+
+    void (*lfe_fir_float[2])(float *pcm_samples, int32_t *lfe_samples,
+                             const float *filter_coeff, ptrdiff_t npcmblocks);
+
+    void (*lfe_x96_float)(float *dst, const float *src,
+                          float *hist, ptrdiff_t len);
+
+    void (*sub_qmf_float[2])(SynthFilterContext *synth,
+                             FFTContext *imdct,
+                             float *pcm_samples,
+                             int32_t **subband_samples_lo,
+                             int32_t **subband_samples_hi,
+                             float *hist1, int *offset, float *hist2,
+                             const float *filter_coeff, ptrdiff_t npcmblocks,
+                             float scale);
+
+    void (*lfe_fir_fixed)(int32_t *pcm_samples, int32_t *lfe_samples,
+                          const int32_t *filter_coeff, ptrdiff_t npcmblocks);
+
+    void (*lfe_x96_fixed)(int32_t *dst, const int32_t *src,
+                          int32_t *hist, ptrdiff_t len);
+
+    void (*sub_qmf_fixed[2])(SynthFilterContext *synth,
+                             DCADCTContext *imdct,
+                             int32_t *pcm_samples,
+                             int32_t **subband_samples_lo,
+                             int32_t **subband_samples_hi,
+                             int32_t *hist1, int *offset, int32_t *hist2,
+                             const int32_t *filter_coeff, ptrdiff_t npcmblocks);
+
+    void (*decor)(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len);
+
+    void (*dmix_sub_xch)(int32_t *dst1, int32_t *dst2,
+                         const int32_t *src, ptrdiff_t len);
+
+    void (*dmix_sub)(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len);
+
+    void (*dmix_add)(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len);
+
+    void (*dmix_scale)(int32_t *dst, int scale, ptrdiff_t len);
+
+    void (*dmix_scale_inv)(int32_t *dst, int scale_inv, ptrdiff_t len);
+
+    void (*assemble_freq_bands)(int32_t *dst, int32_t *src0, int32_t *src1,
+                                const int32_t *coeff, ptrdiff_t len);
 } DCADSPContext;
 
-void ff_dcadsp_init(DCADSPContext *s);
-void ff_dcadsp_init_arm(DCADSPContext *s);
-void ff_dcadsp_init_x86(DCADSPContext *s);
+av_cold void ff_dcadsp_init(DCADSPContext *s);
+av_cold void ff_dcadsp_init_x86(DCADSPContext *s);
 
-#endif /* AVCODEC_DCADSP_H */
+#endif
diff --git a/libavcodec/dcaenc.c b/libavcodec/dcaenc.c
index c8a215c2..70e92300 100644
--- a/libavcodec/dcaenc.c
+++ b/libavcodec/dcaenc.c
@@ -24,6 +24,7 @@
 #include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/common.h"
+#include "libavutil/internal.h"
 #include "avcodec.h"
 #include "dca.h"
 #include "dcadata.h"
@@ -58,6 +59,7 @@ typedef struct DCAEncContext {
     int lfe_scale_factor;
     softfloat lfe_quant;
     int32_t lfe_peak_cb;
+    const int8_t *channel_order_tab;  ///< channel reordering table, lfe and non lfe
 
     int32_t history[512][MAX_CHANNELS]; /* This is a circular buffer */
     int32_t subband[SUBBAND_SAMPLES][DCAENC_SUBBANDS][MAX_CHANNELS];
@@ -133,8 +135,12 @@ static int encode_init(AVCodecContext *avctx)
         return AVERROR_PATCHWELCOME;
     }
 
-    if (c->lfe_channel)
+    if (c->lfe_channel) {
         c->fullband_channels--;
+        c->channel_order_tab = ff_dca_channel_reorder_lfe[c->channel_config];
+    } else {
+        c->channel_order_tab = ff_dca_channel_reorder_nolfe[c->channel_config];
+    }
 
     for (i = 0; i < 9; i++) {
         if (sample_rates[i] == avctx->sample_rate)
@@ -145,13 +151,12 @@ static int encode_init(AVCodecContext *avctx)
     c->samplerate_index = i;
 
     if (avctx->bit_rate < 32000 || avctx->bit_rate > 3840000) {
-        av_log(avctx, AV_LOG_ERROR, "Bit rate %i not supported.", avctx->bit_rate);
+        av_log(avctx, AV_LOG_ERROR, "Bit rate %"PRId64" not supported.", (int64_t)avctx->bit_rate);
         return AVERROR(EINVAL);
     }
     for (i = 0; ff_dca_bit_rates[i] < avctx->bit_rate; i++)
         ;
     c->bitrate_index = i;
-    avctx->bit_rate = ff_dca_bit_rates[i];
     c->frame_bits = FFALIGN((avctx->bit_rate * 512 + avctx->sample_rate - 1) / avctx->sample_rate, 32);
     min_frame_bits = 132 + (493 + 28 * 32) * c->fullband_channels + c->lfe_channel * 72;
     if (c->frame_bits < min_frame_bits || c->frame_bits > (DCA_MAX_FRAME_SIZE << 3))
@@ -164,15 +169,24 @@ static int encode_init(AVCodecContext *avctx)
     if (!cos_table[0]) {
         int j, k;
 
-        for (i = 0; i < 2048; i++) {
+        cos_table[0] = 0x7fffffff;
+        cos_table[512] = 0;
+        cos_table[1024] = -cos_table[0];
+        for (i = 1; i < 512; i++) {
             cos_table[i]   = (int32_t)(0x7fffffff * cos(M_PI * i / 1024));
-            cb_to_level[i] = (int32_t)(0x7fffffff * pow(10, -0.005 * i));
+            cos_table[1024-i] = -cos_table[i];
+            cos_table[1024+i] = -cos_table[i];
+            cos_table[2048-i] = cos_table[i];
+        }
+        for (i = 0; i < 2048; i++) {
+            cb_to_level[i] = (int32_t)(0x7fffffff * ff_exp10(-0.005 * i));
         }
 
-        /* FIXME: probably incorrect */
-        for (i = 0; i < 256; i++) {
-            lfe_fir_64i[i] = (int32_t)(0x01ffffff * ff_dca_lfe_fir_64[i]);
-            lfe_fir_64i[511 - i] = (int32_t)(0x01ffffff * ff_dca_lfe_fir_64[i]);
+        for (k = 0; k < 32; k++) {
+            for (j = 0; j < 8; j++) {
+                lfe_fir_64i[64 * j + k] = (int32_t)(0xffffff800000ULL * ff_dca_lfe_fir_64[8 * k + j]);
+                lfe_fir_64i[64 * (7-j) + (63 - k)] = (int32_t)(0xffffff800000ULL * ff_dca_lfe_fir_64[8 * k + j]);
+            }
         }
 
         for (i = 0; i < 512; i++) {
@@ -191,7 +205,7 @@ static int encode_init(AVCodecContext *avctx)
         }
 
         for (i = 0; i < 256; i++) {
-            double add = 1 + pow(10, -0.01 * i);
+            double add = 1 + ff_exp10(-0.01 * i);
             cb_to_add[i] = (int32_t)(100 * log10(add));
         }
         for (j = 0; j < 8; j++) {
@@ -243,6 +257,7 @@ static void subband_transform(DCAEncContext *c, const int32_t *input)
         /* History is copied because it is also needed for PSY */
         int32_t hist[512];
         int hist_start = 0;
+        const int chi = c->channel_order_tab[ch];
 
         for (i = 0; i < 512; i++)
             hist[i] = c->history[i][ch];
@@ -279,7 +294,7 @@ static void subband_transform(DCAEncContext *c, const int32_t *input)
 
             /* Copy in 32 new samples from input */
             for (i = 0; i < 32; i++)
-                hist[i + hist_start] = input[(subs * 32 + i) * c->channels + ch];
+                hist[i + hist_start] = input[(subs * 32 + i) * c->channels + chi];
             hist_start = (hist_start + 32) & 511;
         }
     }
@@ -288,6 +303,7 @@ static void subband_transform(DCAEncContext *c, const int32_t *input)
 static void lfe_downsample(DCAEncContext *c, const int32_t *input)
 {
     /* FIXME: make 128x LFE downsampling possible */
+    const int lfech = ff_dca_lfe_index[c->channel_config];
     int i, j, lfes;
     int32_t hist[512];
     int32_t accum;
@@ -309,7 +325,7 @@ static void lfe_downsample(DCAEncContext *c, const int32_t *input)
 
         /* Copy in 64 new samples from input */
         for (i = 0; i < 64; i++)
-            hist[i + hist_start] = input[(lfes * 64 + i) * c->channels + c->channels - 1];
+            hist[i + hist_start] = input[(lfes * 64 + i) * c->channels + lfech];
 
         hist_start = (hist_start + 64) & 511;
     }
@@ -497,10 +513,12 @@ static void calc_masking(DCAEncContext *c, const int32_t *input)
 
     for (ssf = 0; ssf < SUBSUBFRAMES; ssf++)
         for (ch = 0; ch < c->fullband_channels; ch++) {
+            const int chi = c->channel_order_tab[ch];
+
             for (i = 0, k = 128 + 256 * ssf; k < 512; i++, k++)
                 data[i] = c->history[k][ch];
             for (k -= 512; i < 512; i++, k++)
-                data[i] = input[k * c->channels + ch];
+                data[i] = input[k * c->channels + chi];
             adjust_jnd(c->samplerate_index, data, c->masking_curve_cb[ssf]);
         }
     for (i = 0; i < 256; i++) {
@@ -632,8 +650,11 @@ static void shift_history(DCAEncContext *c, const int32_t *input)
     int k, ch;
 
     for (k = 0; k < 512; k++)
-        for (ch = 0; ch < c->channels; ch++)
-            c->history[k][ch] = input[k * c->channels + ch];
+        for (ch = 0; ch < c->channels; ch++) {
+            const int chi = c->channel_order_tab[ch];
+
+            c->history[k][ch] = input[k * c->channels + chi];
+        }
 }
 
 static int32_t quantize_value(int32_t value, softfloat quant)
@@ -916,7 +937,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     const int32_t *samples;
     int ret, i;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, c->frame_size )) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, c->frame_size, 0)) < 0)
         return ret;
 
     samples = (const int32_t *)frame->data[0];
@@ -938,11 +959,15 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     for (i = 0; i < SUBFRAMES; i++)
         put_subframe(c, i);
 
+
+    for (i = put_bits_count(&c->pb); i < 8*c->frame_size; i++)
+        put_bits(&c->pb, 1, 0);
+
     flush_put_bits(&c->pb);
 
     avpkt->pts      = frame->pts;
     avpkt->duration = ff_samples_to_time_base(avctx, frame->nb_samples);
-    avpkt->size     = c->frame_size + 1;
+    avpkt->size     = put_bits_count(&c->pb) >> 3;
     *got_packet_ptr = 1;
     return 0;
 }
@@ -960,7 +985,7 @@ AVCodec ff_dca_encoder = {
     .priv_data_size        = sizeof(DCAEncContext),
     .init                  = encode_init,
     .encode2               = encode_frame,
-    .capabilities          = CODEC_CAP_EXPERIMENTAL,
+    .capabilities          = AV_CODEC_CAP_EXPERIMENTAL,
     .sample_fmts           = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32,
                                                             AV_SAMPLE_FMT_NONE },
     .supported_samplerates = sample_rates,
diff --git a/libavcodec/dcamath.h b/libavcodec/dcamath.h
new file mode 100644
index 00000000..e0d6f4fd
--- /dev/null
+++ b/libavcodec/dcamath.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2016 foo86
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DCAMATH_H
+#define AVCODEC_DCAMATH_H
+
+#include "libavutil/common.h"
+#include "libavutil/intmath.h"
+
+static inline int32_t norm__(int64_t a, int bits)
+{
+    if (bits > 0)
+        return (int32_t)((a + (INT64_C(1) << (bits - 1))) >> bits);
+    else
+        return (int32_t)a;
+}
+
+static inline int32_t mul__(int32_t a, int32_t b, int bits)
+{
+    return norm__((int64_t)a * b, bits);
+}
+
+static inline int32_t norm13(int64_t a) { return norm__(a, 13); }
+static inline int32_t norm16(int64_t a) { return norm__(a, 16); }
+static inline int32_t norm20(int64_t a) { return norm__(a, 20); }
+static inline int32_t norm21(int64_t a) { return norm__(a, 21); }
+static inline int32_t norm23(int64_t a) { return norm__(a, 23); }
+
+static inline int32_t mul15(int32_t a, int32_t b) { return mul__(a, b, 15); }
+static inline int32_t mul16(int32_t a, int32_t b) { return mul__(a, b, 16); }
+static inline int32_t mul17(int32_t a, int32_t b) { return mul__(a, b, 17); }
+static inline int32_t mul22(int32_t a, int32_t b) { return mul__(a, b, 22); }
+static inline int32_t mul23(int32_t a, int32_t b) { return mul__(a, b, 23); }
+static inline int32_t mul31(int32_t a, int32_t b) { return mul__(a, b, 31); }
+
+static inline int32_t clip23(int32_t a) { return av_clip_intp2(a, 23); }
+
+#endif
diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c
index 56e1a629..e5ef8378 100644
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@@ -82,6 +82,8 @@ static const struct algo idct_tab[] = {
     { "REF-DBL",     ff_ref_idct,          FF_IDCT_PERM_NONE },
     { "INT",         ff_j_rev_dct,         FF_IDCT_PERM_LIBMPEG2 },
     { "SIMPLE-C",    ff_simple_idct_8,     FF_IDCT_PERM_NONE },
+    { "SIMPLE-C10",  ff_simple_idct_10,    FF_IDCT_PERM_NONE },
+    { "SIMPLE-C12",  ff_simple_idct_12,    FF_IDCT_PERM_NONE, 0, 1 },
     { "PR-C",        ff_prores_idct_wrap,  FF_IDCT_PERM_NONE, 0, 1 },
 #if CONFIG_FAANIDCT
     { "FAANI",       ff_faanidct,          FF_IDCT_PERM_NONE },
@@ -245,8 +247,10 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed, c
            omse, ome, (double) sysErrMax / NB_ITS,
            maxout, blockSumErrMax);
 
-    if (spec_err && !dct->nonspec)
+    if (spec_err && !dct->nonspec) {
+        printf("Failed!\n");
         return 1;
+    }
 
     if (!speed)
         return 0;
diff --git a/libavcodec/dct32.h b/libavcodec/dct32.h
index f4b2471d..61bf223a 100644
--- a/libavcodec/dct32.h
+++ b/libavcodec/dct32.h
@@ -22,4 +22,4 @@
 void ff_dct32_float(float *dst, const float *src);
 void ff_dct32_fixed(int *dst, const int *src);
 
-#endif
+#endif /* AVCODEC_DCT32_H */
diff --git a/libavcodec/dct32_template.c b/libavcodec/dct32_template.c
index fb53d53a..c70396e5 100644
--- a/libavcodec/dct32_template.c
+++ b/libavcodec/dct32_template.c
@@ -73,7 +73,7 @@
 #define COS3_0 FIXHR(0.54119610014619698439/2)
 #define COS3_1 FIXHR(1.30656296487637652785/4)
 
-#define COS4_0 FIXHR(0.70710678118654752439/2)
+#define COS4_0 FIXHR(M_SQRT1_2/2)
 
 /* butterfly operator */
 #define BF(a, b, c, s)\
diff --git a/libavcodec/dds.c b/libavcodec/dds.c
new file mode 100644
index 00000000..9577b67f
--- /dev/null
+++ b/libavcodec/dds.c
@@ -0,0 +1,721 @@
+/*
+ * DirectDraw Surface image decoder
+ * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DDS decoder
+ *
+ * https://msdn.microsoft.com/en-us/library/bb943982%28v=vs.85%29.aspx
+ */
+
+#include <stdint.h>
+
+#include "libavutil/libm.h"
+#include "libavutil/imgutils.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+#include "texturedsp.h"
+#include "thread.h"
+
+#define DDPF_FOURCC    (1 <<  2)
+#define DDPF_PALETTE   (1 <<  5)
+#define DDPF_NORMALMAP (1 << 31)
+
+enum DDSPostProc {
+    DDS_NONE = 0,
+    DDS_ALPHA_EXP,
+    DDS_NORMAL_MAP,
+    DDS_RAW_YCOCG,
+    DDS_SWAP_ALPHA,
+    DDS_SWIZZLE_A2XY,
+    DDS_SWIZZLE_RBXG,
+    DDS_SWIZZLE_RGXB,
+    DDS_SWIZZLE_RXBG,
+    DDS_SWIZZLE_RXGB,
+    DDS_SWIZZLE_XGBR,
+    DDS_SWIZZLE_XRBG,
+    DDS_SWIZZLE_XGXR,
+};
+
+enum DDSDXGIFormat {
+    DXGI_FORMAT_R16G16B16A16_TYPELESS       =  9,
+    DXGI_FORMAT_R16G16B16A16_FLOAT          = 10,
+    DXGI_FORMAT_R16G16B16A16_UNORM          = 11,
+    DXGI_FORMAT_R16G16B16A16_UINT           = 12,
+    DXGI_FORMAT_R16G16B16A16_SNORM          = 13,
+    DXGI_FORMAT_R16G16B16A16_SINT           = 14,
+
+    DXGI_FORMAT_R8G8B8A8_TYPELESS           = 27,
+    DXGI_FORMAT_R8G8B8A8_UNORM              = 28,
+    DXGI_FORMAT_R8G8B8A8_UNORM_SRGB         = 29,
+    DXGI_FORMAT_R8G8B8A8_UINT               = 30,
+    DXGI_FORMAT_R8G8B8A8_SNORM              = 31,
+    DXGI_FORMAT_R8G8B8A8_SINT               = 32,
+
+    DXGI_FORMAT_BC1_TYPELESS                = 70,
+    DXGI_FORMAT_BC1_UNORM                   = 71,
+    DXGI_FORMAT_BC1_UNORM_SRGB              = 72,
+    DXGI_FORMAT_BC2_TYPELESS                = 73,
+    DXGI_FORMAT_BC2_UNORM                   = 74,
+    DXGI_FORMAT_BC2_UNORM_SRGB              = 75,
+    DXGI_FORMAT_BC3_TYPELESS                = 76,
+    DXGI_FORMAT_BC3_UNORM                   = 77,
+    DXGI_FORMAT_BC3_UNORM_SRGB              = 78,
+    DXGI_FORMAT_BC4_TYPELESS                = 79,
+    DXGI_FORMAT_BC4_UNORM                   = 80,
+    DXGI_FORMAT_BC4_SNORM                   = 81,
+    DXGI_FORMAT_BC5_TYPELESS                = 82,
+    DXGI_FORMAT_BC5_UNORM                   = 83,
+    DXGI_FORMAT_BC5_SNORM                   = 84,
+    DXGI_FORMAT_B5G6R5_UNORM                = 85,
+    DXGI_FORMAT_B8G8R8A8_UNORM              = 87,
+    DXGI_FORMAT_B8G8R8X8_UNORM              = 88,
+    DXGI_FORMAT_B8G8R8A8_TYPELESS           = 90,
+    DXGI_FORMAT_B8G8R8A8_UNORM_SRGB         = 91,
+    DXGI_FORMAT_B8G8R8X8_TYPELESS           = 92,
+    DXGI_FORMAT_B8G8R8X8_UNORM_SRGB         = 93,
+};
+
+typedef struct DDSContext {
+    TextureDSPContext texdsp;
+    GetByteContext gbc;
+
+    int compressed;
+    int paletted;
+    enum DDSPostProc postproc;
+
+    const uint8_t *tex_data; // Compressed texture
+    int tex_ratio;           // Compression ratio
+    int slice_count;         // Number of slices for threaded operations
+
+    /* Pointer to the selected compress or decompress function. */
+    int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+} DDSContext;
+
+static int parse_pixel_format(AVCodecContext *avctx)
+{
+    DDSContext *ctx = avctx->priv_data;
+    GetByteContext *gbc = &ctx->gbc;
+    char buf[32];
+    uint32_t flags, fourcc, gimp_tag;
+    enum DDSDXGIFormat dxgi;
+    int size, bpp, r, g, b, a;
+    int alpha_exponent, ycocg_classic, ycocg_scaled, normal_map, array;
+
+    /* Alternative DDS implementations use reserved1 as custom header. */
+    bytestream2_skip(gbc, 4 * 3);
+    gimp_tag = bytestream2_get_le32(gbc);
+    alpha_exponent = gimp_tag == MKTAG('A', 'E', 'X', 'P');
+    ycocg_classic  = gimp_tag == MKTAG('Y', 'C', 'G', '1');
+    ycocg_scaled   = gimp_tag == MKTAG('Y', 'C', 'G', '2');
+    bytestream2_skip(gbc, 4 * 7);
+
+    /* Now the real DDPF starts. */
+    size = bytestream2_get_le32(gbc);
+    if (size != 32) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid pixel format header %d.\n", size);
+        return AVERROR_INVALIDDATA;
+    }
+    flags = bytestream2_get_le32(gbc);
+    ctx->compressed = flags & DDPF_FOURCC;
+    ctx->paletted   = flags & DDPF_PALETTE;
+    normal_map      = flags & DDPF_NORMALMAP;
+    fourcc = bytestream2_get_le32(gbc);
+
+    if (ctx->compressed && ctx->paletted) {
+        av_log(avctx, AV_LOG_WARNING,
+               "Disabling invalid palette flag for compressed dds.\n");
+        ctx->paletted = 0;
+    }
+
+    bpp = bytestream2_get_le32(gbc); // rgbbitcount
+    r   = bytestream2_get_le32(gbc); // rbitmask
+    g   = bytestream2_get_le32(gbc); // gbitmask
+    b   = bytestream2_get_le32(gbc); // bbitmask
+    a   = bytestream2_get_le32(gbc); // abitmask
+
+    bytestream2_skip(gbc, 4); // caps
+    bytestream2_skip(gbc, 4); // caps2
+    bytestream2_skip(gbc, 4); // caps3
+    bytestream2_skip(gbc, 4); // caps4
+    bytestream2_skip(gbc, 4); // reserved2
+
+    av_get_codec_tag_string(buf, sizeof(buf), fourcc);
+    av_log(avctx, AV_LOG_VERBOSE, "fourcc %s bpp %d "
+           "r 0x%x g 0x%x b 0x%x a 0x%x\n", buf, bpp, r, g, b, a);
+    if (gimp_tag) {
+        av_get_codec_tag_string(buf, sizeof(buf), gimp_tag);
+        av_log(avctx, AV_LOG_VERBOSE, "and GIMP-DDS tag %s\n", buf);
+    }
+
+    if (ctx->compressed)
+        avctx->pix_fmt = AV_PIX_FMT_RGBA;
+
+    if (ctx->compressed) {
+        switch (fourcc) {
+        case MKTAG('D', 'X', 'T', '1'):
+            ctx->tex_ratio = 8;
+            ctx->tex_funct = ctx->texdsp.dxt1a_block;
+            break;
+        case MKTAG('D', 'X', 'T', '2'):
+            ctx->tex_ratio = 16;
+            ctx->tex_funct = ctx->texdsp.dxt2_block;
+            break;
+        case MKTAG('D', 'X', 'T', '3'):
+            ctx->tex_ratio = 16;
+            ctx->tex_funct = ctx->texdsp.dxt3_block;
+            break;
+        case MKTAG('D', 'X', 'T', '4'):
+            ctx->tex_ratio = 16;
+            ctx->tex_funct = ctx->texdsp.dxt4_block;
+            break;
+        case MKTAG('D', 'X', 'T', '5'):
+            ctx->tex_ratio = 16;
+            if (ycocg_scaled)
+                ctx->tex_funct = ctx->texdsp.dxt5ys_block;
+            else if (ycocg_classic)
+                ctx->tex_funct = ctx->texdsp.dxt5y_block;
+            else
+                ctx->tex_funct = ctx->texdsp.dxt5_block;
+            break;
+        case MKTAG('R', 'X', 'G', 'B'):
+            ctx->tex_ratio = 16;
+            ctx->tex_funct = ctx->texdsp.dxt5_block;
+            /* This format may be considered as a normal map,
+             * but it is handled differently in a separate postproc. */
+            ctx->postproc = DDS_SWIZZLE_RXGB;
+            normal_map = 0;
+            break;
+        case MKTAG('A', 'T', 'I', '1'):
+        case MKTAG('B', 'C', '4', 'U'):
+            ctx->tex_ratio = 8;
+            ctx->tex_funct = ctx->texdsp.rgtc1u_block;
+            break;
+        case MKTAG('B', 'C', '4', 'S'):
+            ctx->tex_ratio = 8;
+            ctx->tex_funct = ctx->texdsp.rgtc1s_block;
+            break;
+        case MKTAG('A', 'T', 'I', '2'):
+            /* RGT2 variant with swapped R and G (3Dc)*/
+            ctx->tex_ratio = 16;
+            ctx->tex_funct = ctx->texdsp.dxn3dc_block;
+            break;
+        case MKTAG('B', 'C', '5', 'U'):
+            ctx->tex_ratio = 16;
+            ctx->tex_funct = ctx->texdsp.rgtc2u_block;
+            break;
+        case MKTAG('B', 'C', '5', 'S'):
+            ctx->tex_ratio = 16;
+            ctx->tex_funct = ctx->texdsp.rgtc2s_block;
+            break;
+        case MKTAG('U', 'Y', 'V', 'Y'):
+            ctx->compressed = 0;
+            avctx->pix_fmt = AV_PIX_FMT_UYVY422;
+            break;
+        case MKTAG('Y', 'U', 'Y', '2'):
+            ctx->compressed = 0;
+            avctx->pix_fmt = AV_PIX_FMT_YUYV422;
+            break;
+        case MKTAG('P', '8', ' ', ' '):
+            /* ATI Palette8, same as normal palette */
+            ctx->compressed = 0;
+            ctx->paletted   = 1;
+            avctx->pix_fmt  = AV_PIX_FMT_PAL8;
+            break;
+        case MKTAG('D', 'X', '1', '0'):
+            /* DirectX 10 extra header */
+            dxgi = bytestream2_get_le32(gbc);
+            bytestream2_skip(gbc, 4); // resourceDimension
+            bytestream2_skip(gbc, 4); // miscFlag
+            array = bytestream2_get_le32(gbc);
+            bytestream2_skip(gbc, 4); // miscFlag2
+
+            if (array != 0)
+                av_log(avctx, AV_LOG_VERBOSE,
+                       "Found array of size %d (ignored).\n", array);
+
+            /* Only BC[1-5] are actually compressed. */
+            ctx->compressed = (dxgi >= 70) && (dxgi <= 84);
+
+            av_log(avctx, AV_LOG_VERBOSE, "DXGI format %d.\n", dxgi);
+            switch (dxgi) {
+            /* RGB types. */
+            case DXGI_FORMAT_R16G16B16A16_TYPELESS:
+            case DXGI_FORMAT_R16G16B16A16_FLOAT:
+            case DXGI_FORMAT_R16G16B16A16_UNORM:
+            case DXGI_FORMAT_R16G16B16A16_UINT:
+            case DXGI_FORMAT_R16G16B16A16_SNORM:
+            case DXGI_FORMAT_R16G16B16A16_SINT:
+                avctx->pix_fmt = AV_PIX_FMT_BGRA64;
+                break;
+            case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
+                avctx->colorspace = AVCOL_SPC_RGB;
+            case DXGI_FORMAT_R8G8B8A8_TYPELESS:
+            case DXGI_FORMAT_R8G8B8A8_UNORM:
+            case DXGI_FORMAT_R8G8B8A8_UINT:
+            case DXGI_FORMAT_R8G8B8A8_SNORM:
+            case DXGI_FORMAT_R8G8B8A8_SINT:
+                avctx->pix_fmt = AV_PIX_FMT_BGRA;
+                break;
+            case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
+                avctx->colorspace = AVCOL_SPC_RGB;
+            case DXGI_FORMAT_B8G8R8A8_TYPELESS:
+            case DXGI_FORMAT_B8G8R8A8_UNORM:
+                avctx->pix_fmt = AV_PIX_FMT_RGBA;
+                break;
+            case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
+                avctx->colorspace = AVCOL_SPC_RGB;
+            case DXGI_FORMAT_B8G8R8X8_TYPELESS:
+            case DXGI_FORMAT_B8G8R8X8_UNORM:
+                avctx->pix_fmt = AV_PIX_FMT_RGBA; // opaque
+                break;
+            case DXGI_FORMAT_B5G6R5_UNORM:
+                avctx->pix_fmt = AV_PIX_FMT_RGB565LE;
+                break;
+            /* Texture types. */
+            case DXGI_FORMAT_BC1_UNORM_SRGB:
+                avctx->colorspace = AVCOL_SPC_RGB;
+            case DXGI_FORMAT_BC1_TYPELESS:
+            case DXGI_FORMAT_BC1_UNORM:
+                ctx->tex_ratio = 8;
+                ctx->tex_funct = ctx->texdsp.dxt1a_block;
+                break;
+            case DXGI_FORMAT_BC2_UNORM_SRGB:
+                avctx->colorspace = AVCOL_SPC_RGB;
+            case DXGI_FORMAT_BC2_TYPELESS:
+            case DXGI_FORMAT_BC2_UNORM:
+                ctx->tex_ratio = 16;
+                ctx->tex_funct = ctx->texdsp.dxt3_block;
+                break;
+            case DXGI_FORMAT_BC3_UNORM_SRGB:
+                avctx->colorspace = AVCOL_SPC_RGB;
+            case DXGI_FORMAT_BC3_TYPELESS:
+            case DXGI_FORMAT_BC3_UNORM:
+                ctx->tex_ratio = 16;
+                ctx->tex_funct = ctx->texdsp.dxt5_block;
+                break;
+            case DXGI_FORMAT_BC4_TYPELESS:
+            case DXGI_FORMAT_BC4_UNORM:
+                ctx->tex_ratio = 8;
+                ctx->tex_funct = ctx->texdsp.rgtc1u_block;
+                break;
+            case DXGI_FORMAT_BC4_SNORM:
+                ctx->tex_ratio = 8;
+                ctx->tex_funct = ctx->texdsp.rgtc1s_block;
+                break;
+            case DXGI_FORMAT_BC5_TYPELESS:
+            case DXGI_FORMAT_BC5_UNORM:
+                ctx->tex_ratio = 16;
+                ctx->tex_funct = ctx->texdsp.rgtc2u_block;
+                break;
+            case DXGI_FORMAT_BC5_SNORM:
+                ctx->tex_ratio = 16;
+                ctx->tex_funct = ctx->texdsp.rgtc2s_block;
+                break;
+            default:
+                av_log(avctx, AV_LOG_ERROR,
+                       "Unsupported DXGI format %d.\n", dxgi);
+                return AVERROR_INVALIDDATA;
+            }
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unsupported %s fourcc.\n", buf);
+            return AVERROR_INVALIDDATA;
+        }
+    } else if (ctx->paletted) {
+        if (bpp == 8) {
+            avctx->pix_fmt = AV_PIX_FMT_PAL8;
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported palette bpp %d.\n", bpp);
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        /*  8 bpp */
+        if (bpp == 8 && r == 0xff && g == 0 && b == 0 && a == 0)
+            avctx->pix_fmt = AV_PIX_FMT_GRAY8;
+        /* 16 bpp */
+        else if (bpp == 16 && r == 0xff && g == 0 && b == 0 && a == 0xff00)
+            avctx->pix_fmt = AV_PIX_FMT_YA8;
+        else if (bpp == 16 && r == 0xffff && g == 0 && b == 0 && a == 0)
+            avctx->pix_fmt = AV_PIX_FMT_GRAY16LE;
+        else if (bpp == 16 && r == 0xf800 && g == 0x7e0 && b == 0x1f && a == 0)
+            avctx->pix_fmt = AV_PIX_FMT_RGB565LE;
+        /* 24 bpp */
+        else if (bpp == 24 && r == 0xff0000 && g == 0xff00 && b == 0xff && a == 0)
+            avctx->pix_fmt = AV_PIX_FMT_BGR24;
+        /* 32 bpp */
+        else if (bpp == 32 && r == 0xff0000 && g == 0xff00 && b == 0xff && a == 0)
+            avctx->pix_fmt = AV_PIX_FMT_BGR0; // opaque
+        else if (bpp == 32 && r == 0xff && g == 0xff00 && b == 0xff0000 && a == 0)
+            avctx->pix_fmt = AV_PIX_FMT_RGB0; // opaque
+        else if (bpp == 32 && r == 0xff0000 && g == 0xff00 && b == 0xff && a == 0xff000000)
+            avctx->pix_fmt = AV_PIX_FMT_BGRA;
+        else if (bpp == 32 && r == 0xff && g == 0xff00 && b == 0xff0000 && a == 0xff000000)
+            avctx->pix_fmt = AV_PIX_FMT_RGBA;
+        /* give up */
+        else {
+            av_log(avctx, AV_LOG_ERROR, "Unknown pixel format "
+                   "[bpp %d r 0x%x g 0x%x b 0x%x a 0x%x].\n", bpp, r, g, b, a);
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    /* Set any remaining post-proc that should happen before frame is ready. */
+    if (alpha_exponent)
+        ctx->postproc = DDS_ALPHA_EXP;
+    else if (normal_map)
+        ctx->postproc = DDS_NORMAL_MAP;
+    else if (ycocg_classic && !ctx->compressed)
+        ctx->postproc = DDS_RAW_YCOCG;
+    else if (avctx->pix_fmt == AV_PIX_FMT_YA8)
+        ctx->postproc = DDS_SWAP_ALPHA;
+
+    /* ATI/NVidia variants sometimes add swizzling in bpp. */
+    switch (bpp) {
+    case MKTAG('A', '2', 'X', 'Y'):
+        ctx->postproc = DDS_SWIZZLE_A2XY;
+        break;
+    case MKTAG('x', 'G', 'B', 'R'):
+        ctx->postproc = DDS_SWIZZLE_XGBR;
+        break;
+    case MKTAG('x', 'R', 'B', 'G'):
+        ctx->postproc = DDS_SWIZZLE_XRBG;
+        break;
+    case MKTAG('R', 'B', 'x', 'G'):
+        ctx->postproc = DDS_SWIZZLE_RBXG;
+        break;
+    case MKTAG('R', 'G', 'x', 'B'):
+        ctx->postproc = DDS_SWIZZLE_RGXB;
+        break;
+    case MKTAG('R', 'x', 'B', 'G'):
+        ctx->postproc = DDS_SWIZZLE_RXBG;
+        break;
+    case MKTAG('x', 'G', 'x', 'R'):
+        ctx->postproc = DDS_SWIZZLE_XGXR;
+        break;
+    case MKTAG('A', '2', 'D', '5'):
+        ctx->postproc = DDS_NORMAL_MAP;
+        break;
+    }
+
+    return 0;
+}
+
+static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
+                                     int slice, int thread_nb)
+{
+    DDSContext *ctx = avctx->priv_data;
+    AVFrame *frame = arg;
+    const uint8_t *d = ctx->tex_data;
+    int w_block = avctx->coded_width / TEXTURE_BLOCK_W;
+    int h_block = avctx->coded_height / TEXTURE_BLOCK_H;
+    int x, y;
+    int start_slice, end_slice;
+    int base_blocks_per_slice = h_block / ctx->slice_count;
+    int remainder_blocks = h_block % ctx->slice_count;
+
+    /* When the frame height (in blocks) doesn't divide evenly between the
+     * number of slices, spread the remaining blocks evenly between the first
+     * operations */
+    start_slice = slice * base_blocks_per_slice;
+    /* Add any extra blocks (one per slice) that have been added before this slice */
+    start_slice += FFMIN(slice, remainder_blocks);
+
+    end_slice = start_slice + base_blocks_per_slice;
+    /* Add an extra block if there are still remainder blocks to be accounted for */
+    if (slice < remainder_blocks)
+        end_slice++;
+
+    for (y = start_slice; y < end_slice; y++) {
+        uint8_t *p = frame->data[0] + y * frame->linesize[0] * TEXTURE_BLOCK_H;
+        int off  = y * w_block;
+        for (x = 0; x < w_block; x++) {
+            ctx->tex_funct(p + x * 16, frame->linesize[0],
+                           d + (off + x) * ctx->tex_ratio);
+        }
+    }
+
+    return 0;
+}
+
+static void do_swizzle(AVFrame *frame, int x, int y)
+{
+    int i;
+    for (i = 0; i < frame->linesize[0] * frame->height; i += 4) {
+        uint8_t *src = frame->data[0] + i;
+        FFSWAP(uint8_t, src[x], src[y]);
+    }
+}
+
+static void run_postproc(AVCodecContext *avctx, AVFrame *frame)
+{
+    DDSContext *ctx = avctx->priv_data;
+    int i, x_off;
+
+    switch (ctx->postproc) {
+    case DDS_ALPHA_EXP:
+        /* Alpha-exponential mode divides each channel by the maximum
+         * R, G or B value, and stores the multiplying factor in the
+         * alpha channel. */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing alpha exponent.\n");
+
+        for (i = 0; i < frame->linesize[0] * frame->height; i += 4) {
+            uint8_t *src = frame->data[0] + i;
+            int r = src[0];
+            int g = src[1];
+            int b = src[2];
+            int a = src[3];
+
+            src[0] = r * a / 255;
+            src[1] = g * a / 255;
+            src[2] = b * a / 255;
+            src[3] = 255;
+        }
+        break;
+    case DDS_NORMAL_MAP:
+        /* Normal maps work in the XYZ color space and they encode
+         * X in R or in A, depending on the texture type, Y in G and
+         * derive Z with a square root of the distance.
+         *
+         * http://www.realtimecollisiondetection.net/blog/?p=28 */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing normal map.\n");
+
+        x_off = ctx->tex_ratio == 8 ? 0 : 3;
+        for (i = 0; i < frame->linesize[0] * frame->height; i += 4) {
+            uint8_t *src = frame->data[0] + i;
+            int x = src[x_off];
+            int y = src[1];
+            int z = 127;
+
+            int d = (255 * 255 - x * x - y * y) / 2;
+            if (d > 0)
+                z = lrint(sqrtf(d));
+
+            src[0] = x;
+            src[1] = y;
+            src[2] = z;
+            src[3] = 255;
+        }
+        break;
+    case DDS_RAW_YCOCG:
+        /* Data is Y-Co-Cg-A and not RGBA, but they are represented
+         * with the same masks in the DDPF header. */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing raw YCoCg.\n");
+
+        for (i = 0; i < frame->linesize[0] * frame->height; i += 4) {
+            uint8_t *src = frame->data[0] + i;
+            int a  = src[0];
+            int cg = src[1] - 128;
+            int co = src[2] - 128;
+            int y  = src[3];
+
+            src[0] = av_clip_uint8(y + co - cg);
+            src[1] = av_clip_uint8(y + cg);
+            src[2] = av_clip_uint8(y - co - cg);
+            src[3] = a;
+        }
+        break;
+    case DDS_SWAP_ALPHA:
+        /* Alpha and Luma are stored swapped. */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing swapped Luma/Alpha.\n");
+
+        for (i = 0; i < frame->linesize[0] * frame->height; i += 2) {
+            uint8_t *src = frame->data[0] + i;
+            FFSWAP(uint8_t, src[0], src[1]);
+        }
+        break;
+    case DDS_SWIZZLE_A2XY:
+        /* Swap R and G, often used to restore a standard RGTC2. */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing A2XY swizzle.\n");
+        do_swizzle(frame, 0, 1);
+        break;
+    case DDS_SWIZZLE_RBXG:
+        /* Swap G and A, then B and new A (G). */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing RBXG swizzle.\n");
+        do_swizzle(frame, 1, 3);
+        do_swizzle(frame, 2, 3);
+        break;
+    case DDS_SWIZZLE_RGXB:
+        /* Swap B and A. */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing RGXB swizzle.\n");
+        do_swizzle(frame, 2, 3);
+        break;
+    case DDS_SWIZZLE_RXBG:
+        /* Swap G and A. */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing RXBG swizzle.\n");
+        do_swizzle(frame, 1, 3);
+        break;
+    case DDS_SWIZZLE_RXGB:
+        /* Swap R and A (misleading name). */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing RXGB swizzle.\n");
+        do_swizzle(frame, 0, 3);
+        break;
+    case DDS_SWIZZLE_XGBR:
+        /* Swap B and A, then R and new A (B). */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing XGBR swizzle.\n");
+        do_swizzle(frame, 2, 3);
+        do_swizzle(frame, 0, 3);
+        break;
+    case DDS_SWIZZLE_XGXR:
+        /* Swap G and A, then R and new A (G), then new R (G) and new G (A).
+         * This variant does not store any B component. */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing XGXR swizzle.\n");
+        do_swizzle(frame, 1, 3);
+        do_swizzle(frame, 0, 3);
+        do_swizzle(frame, 0, 1);
+        break;
+    case DDS_SWIZZLE_XRBG:
+        /* Swap G and A, then R and new A (G). */
+        av_log(avctx, AV_LOG_DEBUG, "Post-processing XRBG swizzle.\n");
+        do_swizzle(frame, 1, 3);
+        do_swizzle(frame, 0, 3);
+        break;
+    }
+}
+
+static int dds_decode(AVCodecContext *avctx, void *data,
+                      int *got_frame, AVPacket *avpkt)
+{
+    DDSContext *ctx = avctx->priv_data;
+    GetByteContext *gbc = &ctx->gbc;
+    AVFrame *frame = data;
+    int mipmap;
+    int ret;
+
+    ff_texturedsp_init(&ctx->texdsp);
+    bytestream2_init(gbc, avpkt->data, avpkt->size);
+
+    if (bytestream2_get_bytes_left(gbc) < 128) {
+        av_log(avctx, AV_LOG_ERROR, "Frame is too small (%d).\n",
+               bytestream2_get_bytes_left(gbc));
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (bytestream2_get_le32(gbc) != MKTAG('D', 'D', 'S', ' ') ||
+        bytestream2_get_le32(gbc) != 124) { // header size
+        av_log(avctx, AV_LOG_ERROR, "Invalid DDS header.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    bytestream2_skip(gbc, 4); // flags
+
+    avctx->height = bytestream2_get_le32(gbc);
+    avctx->width  = bytestream2_get_le32(gbc);
+    ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid image size %dx%d.\n",
+               avctx->width, avctx->height);
+        return ret;
+    }
+
+    /* Since codec is based on 4x4 blocks, size is aligned to 4. */
+    avctx->coded_width  = FFALIGN(avctx->width,  TEXTURE_BLOCK_W);
+    avctx->coded_height = FFALIGN(avctx->height, TEXTURE_BLOCK_H);
+
+    bytestream2_skip(gbc, 4); // pitch
+    bytestream2_skip(gbc, 4); // depth
+    mipmap = bytestream2_get_le32(gbc);
+    if (mipmap != 0)
+        av_log(avctx, AV_LOG_VERBOSE, "Found %d mipmaps (ignored).\n", mipmap);
+
+    /* Extract pixel format information, considering additional elements
+     * in reserved1 and reserved2. */
+    ret = parse_pixel_format(avctx);
+    if (ret < 0)
+        return ret;
+
+    ret = ff_get_buffer(avctx, frame, 0);
+    if (ret < 0)
+        return ret;
+
+    if (ctx->compressed) {
+        int size = (avctx->coded_height / TEXTURE_BLOCK_H) *
+                   (avctx->coded_width / TEXTURE_BLOCK_W) * ctx->tex_ratio;
+        ctx->slice_count = av_clip(avctx->thread_count, 1,
+                                   avctx->coded_height / TEXTURE_BLOCK_H);
+
+        if (bytestream2_get_bytes_left(gbc) < size) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Compressed Buffer is too small (%d < %d).\n",
+                   bytestream2_get_bytes_left(gbc), size);
+            return AVERROR_INVALIDDATA;
+        }
+
+        /* Use the decompress function on the texture, one block per thread. */
+        ctx->tex_data = gbc->buffer;
+        avctx->execute2(avctx, decompress_texture_thread, frame, NULL, ctx->slice_count);
+    } else {
+        int linesize = av_image_get_linesize(avctx->pix_fmt, frame->width, 0);
+
+        if (ctx->paletted) {
+            int i;
+            /* Use the first 1024 bytes as palette, then copy the rest. */
+            bytestream2_get_buffer(gbc, frame->data[1], 256 * 4);
+            for (i = 0; i < 256; i++)
+                AV_WN32(frame->data[1] + i*4,
+                        (frame->data[1][2+i*4]<<0)+
+                        (frame->data[1][1+i*4]<<8)+
+                        (frame->data[1][0+i*4]<<16)+
+                        (frame->data[1][3+i*4]<<24)
+                );
+
+            frame->palette_has_changed = 1;
+        }
+
+        if (bytestream2_get_bytes_left(gbc) < frame->height * linesize) {
+            av_log(avctx, AV_LOG_ERROR, "Buffer is too small (%d < %d).\n",
+                   bytestream2_get_bytes_left(gbc), frame->height * linesize);
+            return AVERROR_INVALIDDATA;
+        }
+
+        av_image_copy_plane(frame->data[0], frame->linesize[0],
+                            gbc->buffer, linesize,
+                            linesize, frame->height);
+    }
+
+    /* Run any post processing here if needed. */
+    if (avctx->pix_fmt == AV_PIX_FMT_BGRA ||
+        avctx->pix_fmt == AV_PIX_FMT_RGBA ||
+        avctx->pix_fmt == AV_PIX_FMT_RGB0 ||
+        avctx->pix_fmt == AV_PIX_FMT_BGR0 ||
+        avctx->pix_fmt == AV_PIX_FMT_YA8)
+        run_postproc(avctx, frame);
+
+    /* Frame is ready to be output. */
+    frame->pict_type = AV_PICTURE_TYPE_I;
+    frame->key_frame = 1;
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+AVCodec ff_dds_decoder = {
+    .name           = "dds",
+    .long_name      = NULL_IF_CONFIG_SMALL("DirectDraw Surface image decoder"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_DDS,
+    .decode         = dds_decode,
+    .priv_data_size = sizeof(DDSContext),
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE
+};
diff --git a/libavcodec/dfa.c b/libavcodec/dfa.c
index f13291ef..f45d019a 100644
--- a/libavcodec/dfa.c
+++ b/libavcodec/dfa.c
@@ -418,5 +418,5 @@ AVCodec ff_dfa_decoder = {
     .init           = dfa_decode_init,
     .close          = dfa_decode_end,
     .decode         = dfa_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/dirac.c b/libavcodec/dirac.c
index 07db919b..527f015e 100644
--- a/libavcodec/dirac.c
+++ b/libavcodec/dirac.c
@@ -34,6 +34,28 @@
 #include "internal.h"
 #include "mpeg12data.h"
 
+#if CONFIG_DIRAC_PARSE
+
+typedef struct dirac_source_params {
+    unsigned width;
+    unsigned height;
+    uint8_t chroma_format;          ///< 0: 444  1: 422  2: 420
+
+    uint8_t interlaced;
+    uint8_t top_field_first;
+
+    uint8_t frame_rate_index;       ///< index into dirac_frame_rate[]
+    uint8_t aspect_ratio_index;     ///< index into dirac_aspect_ratio[]
+
+    uint16_t clean_width;
+    uint16_t clean_height;
+    uint16_t clean_left_offset;
+    uint16_t clean_right_offset;
+
+    uint8_t pixel_range_index;      ///< index into dirac_pixel_range_presets[]
+    uint8_t color_spec_index;       ///< index into dirac_color_spec_presets[]
+} dirac_source_params;
+
 /* defaults for source parameters */
 static const dirac_source_params dirac_source_parameters_defaults[] = {
     {  640,  480, 2, 0, 0,  1, 1,  640,  480, 0, 0, 1, 0 },
@@ -109,16 +131,17 @@ static const struct {
     { AVCOL_PRI_BT709,     AVCOL_SPC_BT709,   AVCOL_TRC_UNSPECIFIED /* DCinema */ },
 };
 
-/* [DIRAC_STD] Table 10.2 Supported chroma sampling formats + luma Offset */
-static const enum AVPixelFormat dirac_pix_fmt[2][3] = {
-    { AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV422P,  AV_PIX_FMT_YUV420P  },
-    { AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P },
+/* [DIRAC_STD] Table 10.2 Supported chroma sampling formats */
+static const enum AVPixelFormat dirac_pix_fmt[][3] = {
+    {AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV444P12},
+    {AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV422P12},
+    {AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P12},
 };
 
 /* [DIRAC_STD] 10.3 Parse Source Parameters.
  * source_parameters(base_video_format) */
-static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
-                                   dirac_source_params *source)
+static int parse_source_parameters(AVDiracSeqHeader *dsh, GetBitContext *gb,
+                                   void *log_ctx)
 {
     AVRational frame_rate = { 0, 0 };
     unsigned luma_depth = 8, luma_offset = 16;
@@ -128,8 +151,8 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
     /* [DIRAC_STD] 10.3.2 Frame size. frame_size(video_params) */
     /* [DIRAC_STD] custom_dimensions_flag */
     if (get_bits1(gb)) {
-        source->width  = svq3_get_ue_golomb(gb); /* [DIRAC_STD] FRAME_WIDTH  */
-        source->height = svq3_get_ue_golomb(gb); /* [DIRAC_STD] FRAME_HEIGHT */
+        dsh->width  = svq3_get_ue_golomb(gb); /* [DIRAC_STD] FRAME_WIDTH  */
+        dsh->height = svq3_get_ue_golomb(gb); /* [DIRAC_STD] FRAME_HEIGHT */
     }
 
     /* [DIRAC_STD] 10.3.3 Chroma Sampling Format.
@@ -137,10 +160,11 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
     /* [DIRAC_STD] custom_chroma_format_flag */
     if (get_bits1(gb))
         /* [DIRAC_STD] CHROMA_FORMAT_INDEX */
-        source->chroma_format = svq3_get_ue_golomb(gb);
-    if (source->chroma_format > 2U) {
-        av_log(avctx, AV_LOG_ERROR, "Unknown chroma format %d\n",
-               source->chroma_format);
+        dsh->chroma_format = svq3_get_ue_golomb(gb);
+    if (dsh->chroma_format > 2U) {
+        if (log_ctx)
+            av_log(log_ctx, AV_LOG_ERROR, "Unknown chroma format %d\n",
+                   dsh->chroma_format);
         return AVERROR_INVALIDDATA;
     }
 
@@ -148,18 +172,18 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
     /* [DIRAC_STD] custom_scan_format_flag */
     if (get_bits1(gb))
         /* [DIRAC_STD] SOURCE_SAMPLING */
-        source->interlaced = svq3_get_ue_golomb(gb);
-    if (source->interlaced > 1U)
+        dsh->interlaced = svq3_get_ue_golomb(gb);
+    if (dsh->interlaced > 1U)
         return AVERROR_INVALIDDATA;
 
     /* [DIRAC_STD] 10.3.5 Frame Rate. frame_rate(video_params) */
     if (get_bits1(gb)) { /* [DIRAC_STD] custom_frame_rate_flag */
-        source->frame_rate_index = svq3_get_ue_golomb(gb);
+        dsh->frame_rate_index = svq3_get_ue_golomb(gb);
 
-        if (source->frame_rate_index > 10U)
+        if (dsh->frame_rate_index > 10U)
             return AVERROR_INVALIDDATA;
 
-        if (!source->frame_rate_index) {
+        if (!dsh->frame_rate_index) {
             /* [DIRAC_STD] FRAME_RATE_NUMER */
             frame_rate.num = svq3_get_ue_golomb(gb);
             /* [DIRAC_STD] FRAME_RATE_DENOM */
@@ -167,45 +191,45 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
         }
     }
     /* [DIRAC_STD] preset_frame_rate(video_params, index) */
-    if (source->frame_rate_index > 0) {
-        if (source->frame_rate_index <= 8)
-            frame_rate = ff_mpeg12_frame_rate_tab[source->frame_rate_index];
+    if (dsh->frame_rate_index > 0) {
+        if (dsh->frame_rate_index <= 8)
+            frame_rate = ff_mpeg12_frame_rate_tab[dsh->frame_rate_index];
         else
             /* [DIRAC_STD] Table 10.3 values 9-10 */
-            frame_rate = dirac_frame_rate[source->frame_rate_index - 9];
+            frame_rate = dirac_frame_rate[dsh->frame_rate_index - 9];
     }
-    avctx->framerate = frame_rate;
+    dsh->framerate = frame_rate;
 
     /* [DIRAC_STD] 10.3.6 Pixel Aspect Ratio.
      * pixel_aspect_ratio(video_params) */
     if (get_bits1(gb)) { /* [DIRAC_STD] custom_pixel_aspect_ratio_flag */
         /* [DIRAC_STD] index */
-        source->aspect_ratio_index = svq3_get_ue_golomb(gb);
+        dsh->aspect_ratio_index = svq3_get_ue_golomb(gb);
 
-        if (source->aspect_ratio_index > 6U)
+        if (dsh->aspect_ratio_index > 6U)
             return AVERROR_INVALIDDATA;
 
-        if (!source->aspect_ratio_index) {
-            avctx->sample_aspect_ratio.num = svq3_get_ue_golomb(gb);
-            avctx->sample_aspect_ratio.den = svq3_get_ue_golomb(gb);
+        if (!dsh->aspect_ratio_index) {
+            dsh->sample_aspect_ratio.num = svq3_get_ue_golomb(gb);
+            dsh->sample_aspect_ratio.den = svq3_get_ue_golomb(gb);
         }
     }
     /* [DIRAC_STD] Take value from Table 10.4 Available preset pixel
      *  aspect ratio values */
-    if (source->aspect_ratio_index > 0)
-        avctx->sample_aspect_ratio =
-            dirac_preset_aspect_ratios[source->aspect_ratio_index - 1];
+    if (dsh->aspect_ratio_index > 0)
+        dsh->sample_aspect_ratio =
+            dirac_preset_aspect_ratios[dsh->aspect_ratio_index - 1];
 
     /* [DIRAC_STD] 10.3.7 Clean area. clean_area(video_params) */
     if (get_bits1(gb)) { /* [DIRAC_STD] custom_clean_area_flag */
         /* [DIRAC_STD] CLEAN_WIDTH */
-        source->clean_width = svq3_get_ue_golomb(gb);
+        dsh->clean_width = svq3_get_ue_golomb(gb);
         /* [DIRAC_STD] CLEAN_HEIGHT */
-        source->clean_height = svq3_get_ue_golomb(gb);
+        dsh->clean_height = svq3_get_ue_golomb(gb);
         /* [DIRAC_STD] CLEAN_LEFT_OFFSET */
-        source->clean_left_offset = svq3_get_ue_golomb(gb);
+        dsh->clean_left_offset = svq3_get_ue_golomb(gb);
         /* [DIRAC_STD] CLEAN_RIGHT_OFFSET */
-        source->clean_right_offset = svq3_get_ue_golomb(gb);
+        dsh->clean_right_offset = svq3_get_ue_golomb(gb);
     }
 
     /* [DIRAC_STD] 10.3.8 Signal range. signal_range(video_params)
@@ -213,127 +237,168 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
      * AVCOL_RANGE_MPEG/JPEG values */
     if (get_bits1(gb)) { /* [DIRAC_STD] custom_signal_range_flag */
         /* [DIRAC_STD] index */
-        source->pixel_range_index = svq3_get_ue_golomb(gb);
+        dsh->pixel_range_index = svq3_get_ue_golomb(gb);
 
-        if (source->pixel_range_index > 4U)
+        if (dsh->pixel_range_index > 4U)
             return AVERROR_INVALIDDATA;
 
         /* This assumes either fullrange or MPEG levels only */
-        if (!source->pixel_range_index) {
+        if (!dsh->pixel_range_index) {
             luma_offset = svq3_get_ue_golomb(gb);
             luma_depth  = av_log2(svq3_get_ue_golomb(gb)) + 1;
             svq3_get_ue_golomb(gb); /* chroma offset    */
             svq3_get_ue_golomb(gb); /* chroma excursion */
-            avctx->color_range = luma_offset ? AVCOL_RANGE_MPEG
-                                             : AVCOL_RANGE_JPEG;
+            dsh->color_range = luma_offset ? AVCOL_RANGE_MPEG
+                                           : AVCOL_RANGE_JPEG;
         }
     }
     /* [DIRAC_STD] Table 10.5
      * Available signal range presets <--> pixel_range_presets */
-    if (source->pixel_range_index > 0) {
-        idx                = source->pixel_range_index - 1;
+    if (dsh->pixel_range_index > 0) {
+        idx                = dsh->pixel_range_index - 1;
         luma_depth         = pixel_range_presets[idx].bitdepth;
-        avctx->color_range = pixel_range_presets[idx].color_range;
+        dsh->color_range   = pixel_range_presets[idx].color_range;
     }
 
-    if (luma_depth > 8)
-        av_log(avctx, AV_LOG_WARNING, "Bitdepth greater than 8\n");
+    dsh->bit_depth = luma_depth;
 
-    avctx->pix_fmt = dirac_pix_fmt[!luma_offset][source->chroma_format];
-    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
-    if ((source->width % (1<<chroma_x_shift)) || (source->height % (1<<chroma_y_shift))) {
-        av_log(avctx, AV_LOG_ERROR, "Dimensions must be an integer multiple of the chroma subsampling\n");
+    /* Full range 8 bts uses the same pix_fmts as limited range 8 bits */
+    dsh->pixel_range_index += dsh->pixel_range_index == 1;
+
+    if (dsh->pixel_range_index < 2U)
         return AVERROR_INVALIDDATA;
-    }
 
+    dsh->pix_fmt = dirac_pix_fmt[dsh->chroma_format][dsh->pixel_range_index-2];
+    avcodec_get_chroma_sub_sample(dsh->pix_fmt, &chroma_x_shift, &chroma_y_shift);
+    if ((dsh->width % (1<<chroma_x_shift)) || (dsh->height % (1<<chroma_y_shift))) {
+        if (log_ctx)
+            av_log(log_ctx, AV_LOG_ERROR, "Dimensions must be an integer multiple of the chroma subsampling\n");
+        return AVERROR_INVALIDDATA;
+    }
 
     /* [DIRAC_STD] 10.3.9 Colour specification. colour_spec(video_params) */
     if (get_bits1(gb)) { /* [DIRAC_STD] custom_colour_spec_flag */
         /* [DIRAC_STD] index */
-        idx = source->color_spec_index = svq3_get_ue_golomb(gb);
+        idx = dsh->color_spec_index = svq3_get_ue_golomb(gb);
 
-        if (source->color_spec_index > 4U)
+        if (dsh->color_spec_index > 4U)
             return AVERROR_INVALIDDATA;
 
-        avctx->color_primaries = dirac_color_presets[idx].color_primaries;
-        avctx->colorspace      = dirac_color_presets[idx].colorspace;
-        avctx->color_trc       = dirac_color_presets[idx].color_trc;
+        dsh->color_primaries = dirac_color_presets[idx].color_primaries;
+        dsh->colorspace      = dirac_color_presets[idx].colorspace;
+        dsh->color_trc       = dirac_color_presets[idx].color_trc;
 
-        if (!source->color_spec_index) {
+        if (!dsh->color_spec_index) {
             /* [DIRAC_STD] 10.3.9.1 Colour primaries */
             if (get_bits1(gb)) {
                 idx = svq3_get_ue_golomb(gb);
                 if (idx < 3U)
-                    avctx->color_primaries = dirac_primaries[idx];
+                    dsh->color_primaries = dirac_primaries[idx];
             }
             /* [DIRAC_STD] 10.3.9.2 Colour matrix */
             if (get_bits1(gb)) {
                 idx = svq3_get_ue_golomb(gb);
                 if (!idx)
-                    avctx->colorspace = AVCOL_SPC_BT709;
+                    dsh->colorspace = AVCOL_SPC_BT709;
                 else if (idx == 1)
-                    avctx->colorspace = AVCOL_SPC_BT470BG;
+                    dsh->colorspace = AVCOL_SPC_BT470BG;
             }
             /* [DIRAC_STD] 10.3.9.3 Transfer function */
             if (get_bits1(gb) && !svq3_get_ue_golomb(gb))
-                avctx->color_trc = AVCOL_TRC_BT709;
+                dsh->color_trc = AVCOL_TRC_BT709;
         }
     } else {
-        idx                    = source->color_spec_index;
-        avctx->color_primaries = dirac_color_presets[idx].color_primaries;
-        avctx->colorspace      = dirac_color_presets[idx].colorspace;
-        avctx->color_trc       = dirac_color_presets[idx].color_trc;
+        idx                    = dsh->color_spec_index;
+        dsh->color_primaries = dirac_color_presets[idx].color_primaries;
+        dsh->colorspace      = dirac_color_presets[idx].colorspace;
+        dsh->color_trc       = dirac_color_presets[idx].color_trc;
     }
 
     return 0;
 }
 
 /* [DIRAC_STD] 10. Sequence Header. sequence_header() */
-int avpriv_dirac_parse_sequence_header(AVCodecContext *avctx, GetBitContext *gb,
-                                       dirac_source_params *source)
+int av_dirac_parse_sequence_header(AVDiracSeqHeader **pdsh,
+                                   const uint8_t *buf, size_t buf_size,
+                                   void *log_ctx)
 {
-    unsigned version_major;
+    AVDiracSeqHeader *dsh;
+    GetBitContext gb;
     unsigned video_format, picture_coding_mode;
     int ret;
 
+    dsh = av_mallocz(sizeof(*dsh));
+    if (!dsh)
+        return AVERROR(ENOMEM);
+
+    ret = init_get_bits8(&gb, buf, buf_size);
+    if (ret < 0)
+        goto fail;
+
     /* [DIRAC_SPEC] 10.1 Parse Parameters. parse_parameters() */
-    version_major  = svq3_get_ue_golomb(gb);
-    svq3_get_ue_golomb(gb); /* version_minor */
-    avctx->profile = svq3_get_ue_golomb(gb);
-    avctx->level   = svq3_get_ue_golomb(gb);
+    dsh->version.major = svq3_get_ue_golomb(&gb);
+    dsh->version.minor = svq3_get_ue_golomb(&gb);
+    dsh->profile   = svq3_get_ue_golomb(&gb);
+    dsh->level     = svq3_get_ue_golomb(&gb);
     /* [DIRAC_SPEC] sequence_header() -> base_video_format as defined in
      * 10.2 Base Video Format, table 10.1 Dirac predefined video formats */
-    video_format   = svq3_get_ue_golomb(gb);
+    video_format   = svq3_get_ue_golomb(&gb);
 
-    if (version_major < 2)
-        av_log(avctx, AV_LOG_WARNING, "Stream is old and may not work\n");
-    else if (version_major > 2)
-        av_log(avctx, AV_LOG_WARNING, "Stream may have unhandled features\n");
+    if (dsh->version.major < 2 && log_ctx)
+        av_log(log_ctx, AV_LOG_WARNING, "Stream is old and may not work\n");
+    else if (dsh->version.major > 2 && log_ctx)
+        av_log(log_ctx, AV_LOG_WARNING, "Stream may have unhandled features\n");
 
-    if (video_format > 20U)
-        return AVERROR_INVALIDDATA;
+    if (video_format > 20U) {
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
 
     /* Fill in defaults for the source parameters. */
-    *source = dirac_source_parameters_defaults[video_format];
+    dsh->width              = dirac_source_parameters_defaults[video_format].width;
+    dsh->height             = dirac_source_parameters_defaults[video_format].height;
+    dsh->chroma_format      = dirac_source_parameters_defaults[video_format].chroma_format;
+    dsh->interlaced         = dirac_source_parameters_defaults[video_format].interlaced;
+    dsh->top_field_first    = dirac_source_parameters_defaults[video_format].top_field_first;
+    dsh->frame_rate_index   = dirac_source_parameters_defaults[video_format].frame_rate_index;
+    dsh->aspect_ratio_index = dirac_source_parameters_defaults[video_format].aspect_ratio_index;
+    dsh->clean_width        = dirac_source_parameters_defaults[video_format].clean_width;
+    dsh->clean_height       = dirac_source_parameters_defaults[video_format].clean_height;
+    dsh->clean_left_offset  = dirac_source_parameters_defaults[video_format].clean_left_offset;
+    dsh->clean_right_offset = dirac_source_parameters_defaults[video_format].clean_right_offset;
+    dsh->pixel_range_index  = dirac_source_parameters_defaults[video_format].pixel_range_index;
+    dsh->color_spec_index   = dirac_source_parameters_defaults[video_format].color_spec_index;
 
     /* [DIRAC_STD] 10.3 Source Parameters
      * Override the defaults. */
-    if (ret = parse_source_parameters(avctx, gb, source))
-        return ret;
-
-    ret = ff_set_dimensions(avctx, source->width, source->height);
+    ret = parse_source_parameters(dsh, &gb, log_ctx);
     if (ret < 0)
-        return ret;
-
-    ff_set_sar(avctx, avctx->sample_aspect_ratio);
+        goto fail;
 
     /* [DIRAC_STD] picture_coding_mode shall be 0 for fields and 1 for frames
      * currently only used to signal field coding */
-    picture_coding_mode = svq3_get_ue_golomb(gb);
+    picture_coding_mode = svq3_get_ue_golomb(&gb);
     if (picture_coding_mode != 0) {
-        av_log(avctx, AV_LOG_ERROR, "Unsupported picture coding mode %d\n",
-               picture_coding_mode);
-        return AVERROR_INVALIDDATA;
+        if (log_ctx) {
+            av_log(log_ctx, AV_LOG_ERROR, "Unsupported picture coding mode %d",
+                   picture_coding_mode);
+        }
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
     }
+
+    *pdsh = dsh;
     return 0;
+fail:
+    av_freep(&dsh);
+    *pdsh = NULL;
+    return ret;
+}
+#else
+int av_dirac_parse_sequence_header(AVDiracSeqHeader **pdsh,
+                                   const uint8_t *buf, size_t buf_size,
+                                   void *log_ctx)
+{
+    return AVERROR(ENOSYS);
 }
+#endif
diff --git a/libavcodec/dirac.h b/libavcodec/dirac.h
index b0f955bf..e6d9d346 100644
--- a/libavcodec/dirac.h
+++ b/libavcodec/dirac.h
@@ -32,9 +32,53 @@
  */
 
 #include "avcodec.h"
-#include "get_bits.h"
 
-typedef struct dirac_source_params {
+/**
+ * The spec limits the number of wavelet decompositions to 4 for both
+ * level 1 (VC-2) and 128 (long-gop default).
+ * 5 decompositions is the maximum before >16-bit buffers are needed.
+ * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
+ * the others to 4 decompositions (or 3 for the fidelity filter).
+ *
+ * We use this instead of MAX_DECOMPOSITIONS to save some memory.
+ */
+#define MAX_DWT_LEVELS 5
+
+/**
+ * Parse code values:
+ *
+ * Dirac Specification ->
+ * 9.6.1  Table 9.1
+ *
+ * VC-2 Specification  ->
+ * 10.4.1 Table 10.1
+ */
+
+enum DiracParseCodes {
+    DIRAC_PCODE_SEQ_HEADER      = 0x00,
+    DIRAC_PCODE_END_SEQ         = 0x10,
+    DIRAC_PCODE_AUX             = 0x20,
+    DIRAC_PCODE_PAD             = 0x30,
+    DIRAC_PCODE_PICTURE_CODED   = 0x08,
+    DIRAC_PCODE_PICTURE_RAW     = 0x48,
+    DIRAC_PCODE_PICTURE_LOW_DEL = 0xC8,
+    DIRAC_PCODE_PICTURE_HQ      = 0xE8,
+    DIRAC_PCODE_INTER_NOREF_CO1 = 0x0A,
+    DIRAC_PCODE_INTER_NOREF_CO2 = 0x09,
+    DIRAC_PCODE_INTER_REF_CO1   = 0x0D,
+    DIRAC_PCODE_INTER_REF_CO2   = 0x0E,
+    DIRAC_PCODE_INTRA_REF_CO    = 0x0C,
+    DIRAC_PCODE_INTRA_REF_RAW   = 0x4C,
+    DIRAC_PCODE_INTRA_REF_PICT  = 0xCC,
+    DIRAC_PCODE_MAGIC           = 0x42424344,
+};
+
+typedef struct DiracVersionInfo {
+    int major;
+    int minor;
+} DiracVersionInfo;
+
+typedef struct AVDiracSeqHeader {
     unsigned width;
     unsigned height;
     uint8_t chroma_format;          ///< 0: 444  1: 422  2: 420
@@ -52,9 +96,36 @@ typedef struct dirac_source_params {
 
     uint8_t pixel_range_index;      ///< index into dirac_pixel_range_presets[]
     uint8_t color_spec_index;       ///< index into dirac_color_spec_presets[]
-} dirac_source_params;
 
-int avpriv_dirac_parse_sequence_header(AVCodecContext *avctx, GetBitContext *gb,
-                                       dirac_source_params *source);
+    int profile;
+    int level;
+
+    AVRational framerate;
+    AVRational sample_aspect_ratio;
+
+    enum AVPixelFormat pix_fmt;
+    enum AVColorRange color_range;
+    enum AVColorPrimaries color_primaries;
+    enum AVColorTransferCharacteristic color_trc;
+    enum AVColorSpace colorspace;
+
+    DiracVersionInfo version;
+    int bit_depth;
+} AVDiracSeqHeader;
+
+/**
+ * Parse a Dirac sequence header.
+ *
+ * @param dsh this function will allocate and fill an AVDiracSeqHeader struct
+ *            and write it into this pointer. The caller must free it with
+ *            av_free().
+ * @param buf the data buffer
+ * @param buf_size the size of the data buffer in bytes
+ * @param log_ctx if non-NULL, this function will log errors here
+ * @return 0 on success, a negative AVERROR code on failure
+ */
+int av_dirac_parse_sequence_header(AVDiracSeqHeader **dsh,
+                                   const uint8_t *buf, size_t buf_size,
+                                   void *log_ctx);
 
 #endif /* AVCODEC_DIRAC_H */
diff --git a/libavcodec/dirac_dwt.c b/libavcodec/dirac_dwt.c
index ee3665e7..cc08f886 100644
--- a/libavcodec/dirac_dwt.c
+++ b/libavcodec/dirac_dwt.c
@@ -23,525 +23,44 @@
 #include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "dirac_dwt.h"
-#include "libavcodec/x86/dirac_dwt.h"
 
+#define TEMPLATE_8bit
+#include "dirac_dwt_template.c"
 
-static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
-                                  int width)
-{
-    int i;
-
-    for (i = 0; i < width; i++)
-        b1[i] -= (b0[i] + b2[i] + 2) >> 2;
-}
-
-
-static av_always_inline
-void interleave(IDWTELEM *dst, IDWTELEM *src0, IDWTELEM *src1, int w2, int add, int shift)
-{
-    int i;
-    for (i = 0; i < w2; i++) {
-        dst[2*i  ] = (src0[i] + add) >> shift;
-        dst[2*i+1] = (src1[i] + add) >> shift;
-    }
-}
-
-static void horizontal_compose_dirac53i(IDWTELEM *b, IDWTELEM *temp, int w)
-{
-    const int w2 = w >> 1;
-    int x;
-
-    temp[0] = COMPOSE_53iL0(b[w2], b[0], b[w2]);
-    for (x = 1; x < w2; x++) {
-        temp[x     ] = COMPOSE_53iL0     (b[x+w2-1], b[x     ], b[x+w2]);
-        temp[x+w2-1] = COMPOSE_DIRAC53iH0(temp[x-1], b[x+w2-1], temp[x]);
-    }
-    temp[w-1] = COMPOSE_DIRAC53iH0(temp[w2-1], b[w-1], temp[w2-1]);
-
-    interleave(b, temp, temp+w2, w2, 1, 1);
-}
-
-static void horizontal_compose_dd97i(IDWTELEM *b, IDWTELEM *tmp, int w)
-{
-    const int w2 = w >> 1;
-    int x;
-
-    tmp[0] = COMPOSE_53iL0(b[w2], b[0], b[w2]);
-    for (x = 1; x < w2; x++)
-        tmp[x] = COMPOSE_53iL0(b[x+w2-1], b[x], b[x+w2]);
-
-    // extend the edges
-    tmp[-1]   = tmp[0];
-    tmp[w2+1] = tmp[w2] = tmp[w2-1];
-
-    for (x = 0; x < w2; x++) {
-        b[2*x  ] = (tmp[x] + 1)>>1;
-        b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
-    }
-}
+#define TEMPLATE_10bit
+#include "dirac_dwt_template.c"
 
-static void horizontal_compose_dd137i(IDWTELEM *b, IDWTELEM *tmp, int w)
-{
-    const int w2 = w >> 1;
-    int x;
-
-    tmp[0] = COMPOSE_DD137iL0(b[w2], b[w2], b[0], b[w2  ], b[w2+1]);
-    tmp[1] = COMPOSE_DD137iL0(b[w2], b[w2], b[1], b[w2+1], b[w2+2]);
-    for (x = 2; x < w2-1; x++)
-        tmp[x] = COMPOSE_DD137iL0(b[x+w2-2], b[x+w2-1], b[x], b[x+w2], b[x+w2+1]);
-    tmp[w2-1] = COMPOSE_DD137iL0(b[w-3], b[w-2], b[w2-1], b[w-1], b[w-1]);
-
-    // extend the edges
-    tmp[-1]   = tmp[0];
-    tmp[w2+1] = tmp[w2] = tmp[w2-1];
-
-    for (x = 0; x < w2; x++) {
-        b[2*x  ] = (tmp[x] + 1)>>1;
-        b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
-    }
-}
+#define TEMPLATE_12bit
+#include "dirac_dwt_template.c"
 
-static av_always_inline
-void horizontal_compose_haari(IDWTELEM *b, IDWTELEM *temp, int w, int shift)
+int ff_spatial_idwt_init(DWTContext *d, DWTPlane *p, enum dwt_type type,
+                         int decomposition_count, int bit_depth)
 {
-    const int w2 = w >> 1;
-    int x;
+    int ret = 0;
 
-    for (x = 0; x < w2; x++) {
-        temp[x   ] = COMPOSE_HAARiL0(b[x   ], b[x+w2]);
-        temp[x+w2] = COMPOSE_HAARiH0(b[x+w2], temp[x]);
-    }
-
-    interleave(b, temp, temp+w2, w2, shift, shift);
-}
-
-static void horizontal_compose_haar0i(IDWTELEM *b, IDWTELEM *temp, int w)
-{
-    horizontal_compose_haari(b, temp, w, 0);
-}
-
-static void horizontal_compose_haar1i(IDWTELEM *b, IDWTELEM *temp, int w)
-{
-    horizontal_compose_haari(b, temp, w, 1);
-}
-
-static void horizontal_compose_fidelityi(IDWTELEM *b, IDWTELEM *tmp, int w)
-{
-    const int w2 = w >> 1;
-    int i, x;
-    IDWTELEM v[8];
-
-    for (x = 0; x < w2; x++) {
-        for (i = 0; i < 8; i++)
-            v[i] = b[av_clip(x-3+i, 0, w2-1)];
-        tmp[x] = COMPOSE_FIDELITYiH0(v[0], v[1], v[2], v[3], b[x+w2], v[4], v[5], v[6], v[7]);
-    }
-
-    for (x = 0; x < w2; x++) {
-        for (i = 0; i < 8; i++)
-            v[i] = tmp[av_clip(x-4+i, 0, w2-1)];
-        tmp[x+w2] = COMPOSE_FIDELITYiL0(v[0], v[1], v[2], v[3], b[x], v[4], v[5], v[6], v[7]);
-    }
-
-    interleave(b, tmp+w2, tmp, w2, 0, 0);
-}
-
-static void horizontal_compose_daub97i(IDWTELEM *b, IDWTELEM *temp, int w)
-{
-    const int w2 = w >> 1;
-    int x, b0, b1, b2;
-
-    temp[0] = COMPOSE_DAUB97iL1(b[w2], b[0], b[w2]);
-    for (x = 1; x < w2; x++) {
-        temp[x     ] = COMPOSE_DAUB97iL1(b[x+w2-1], b[x     ], b[x+w2]);
-        temp[x+w2-1] = COMPOSE_DAUB97iH1(temp[x-1], b[x+w2-1], temp[x]);
-    }
-    temp[w-1] = COMPOSE_DAUB97iH1(temp[w2-1], b[w-1], temp[w2-1]);
-
-    // second stage combined with interleave and shift
-    b0 = b2 = COMPOSE_DAUB97iL0(temp[w2], temp[0], temp[w2]);
-    b[0] = (b0 + 1) >> 1;
-    for (x = 1; x < w2; x++) {
-        b2 = COMPOSE_DAUB97iL0(temp[x+w2-1], temp[x     ], temp[x+w2]);
-        b1 = COMPOSE_DAUB97iH0(          b0, temp[x+w2-1], b2        );
-        b[2*x-1] = (b1 + 1) >> 1;
-        b[2*x  ] = (b2 + 1) >> 1;
-        b0 = b2;
-    }
-    b[w-1] = (COMPOSE_DAUB97iH0(b2, temp[w-1], b2) + 1) >> 1;
-}
-
-static void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
-{
-    int i;
-
-    for(i=0; i<width; i++){
-        b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]);
-    }
-}
-
-static void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
-                                  IDWTELEM *b3, IDWTELEM *b4, int width)
-{
-    int i;
-
-    for(i=0; i<width; i++){
-        b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]);
-    }
-}
-
-static void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
-                                      IDWTELEM *b3, IDWTELEM *b4, int width)
-{
-    int i;
-
-    for(i=0; i<width; i++){
-        b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]);
-    }
-}
-
-static void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width)
-{
-    int i;
-
-    for (i = 0; i < width; i++) {
-        b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]);
-        b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]);
-    }
-}
-
-static void vertical_compose_fidelityiH0(IDWTELEM *dst, IDWTELEM *b[8], int width)
-{
-    int i;
-
-    for(i=0; i<width; i++){
-        dst[i] = COMPOSE_FIDELITYiH0(b[0][i], b[1][i], b[2][i], b[3][i], dst[i], b[4][i], b[5][i], b[6][i], b[7][i]);
-    }
-}
-
-static void vertical_compose_fidelityiL0(IDWTELEM *dst, IDWTELEM *b[8], int width)
-{
-    int i;
-
-    for(i=0; i<width; i++){
-        dst[i] = COMPOSE_FIDELITYiL0(b[0][i], b[1][i], b[2][i], b[3][i], dst[i], b[4][i], b[5][i], b[6][i], b[7][i]);
-    }
-}
-
-static void vertical_compose_daub97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
-{
-    int i;
-
-    for(i=0; i<width; i++){
-        b1[i] = COMPOSE_DAUB97iH0(b0[i], b1[i], b2[i]);
-    }
-}
-
-static void vertical_compose_daub97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
-{
-    int i;
-
-    for(i=0; i<width; i++){
-        b1[i] = COMPOSE_DAUB97iH1(b0[i], b1[i], b2[i]);
-    }
-}
-
-static void vertical_compose_daub97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
-{
-    int i;
-
-    for(i=0; i<width; i++){
-        b1[i] = COMPOSE_DAUB97iL0(b0[i], b1[i], b2[i]);
-    }
-}
-
-static void vertical_compose_daub97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
-{
-    int i;
-
-    for(i=0; i<width; i++){
-        b1[i] = COMPOSE_DAUB97iL1(b0[i], b1[i], b2[i]);
-    }
-}
-
-
-static void spatial_compose_dd97i_dy(DWTContext *d, int level, int width, int height, int stride)
-{
-    vertical_compose_3tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
-    vertical_compose_5tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
-    DWTCompose *cs = d->cs + level;
-
-    int i, y = cs->y;
-    IDWTELEM *b[8];
-    for (i = 0; i < 6; i++)
-        b[i] = cs->b[i];
-    b[6] = d->buffer + av_clip(y+5, 0, height-2)*stride;
-    b[7] = d->buffer + av_clip(y+6, 1, height-1)*stride;
-
-        if(y+5<(unsigned)height) vertical_compose_l0(      b[5], b[6], b[7],       width);
-        if(y+1<(unsigned)height) vertical_compose_h0(b[0], b[2], b[3], b[4], b[6], width);
-
-        if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
-        if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
-
-    for (i = 0; i < 6; i++)
-        cs->b[i] = b[i+2];
-    cs->y += 2;
-}
-
-static void spatial_compose_dirac53i_dy(DWTContext *d, int level, int width, int height, int stride)
-{
-    vertical_compose_3tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
-    vertical_compose_3tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
-    DWTCompose *cs = d->cs + level;
-
-    int y= cs->y;
-    IDWTELEM *b[4] = { cs->b[0], cs->b[1] };
-    b[2] = d->buffer + avpriv_mirror(y+1, height-1)*stride;
-    b[3] = d->buffer + avpriv_mirror(y+2, height-1)*stride;
-
-        if(y+1<(unsigned)height) vertical_compose_l0(b[1], b[2], b[3], width);
-        if(y+0<(unsigned)height) vertical_compose_h0(b[0], b[1], b[2], width);
-
-        if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
-        if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
-
-    cs->b[0] = b[2];
-    cs->b[1] = b[3];
-    cs->y += 2;
-}
-
-
-static void spatial_compose_dd137i_dy(DWTContext *d, int level, int width, int height, int stride)
-{
-    vertical_compose_5tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
-    vertical_compose_5tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
-    DWTCompose *cs = d->cs + level;
-
-    int i, y = cs->y;
-    IDWTELEM *b[10];
-    for (i = 0; i < 8; i++)
-        b[i] = cs->b[i];
-    b[8] = d->buffer + av_clip(y+7, 0, height-2)*stride;
-    b[9] = d->buffer + av_clip(y+8, 1, height-1)*stride;
-
-        if(y+5<(unsigned)height) vertical_compose_l0(b[3], b[5], b[6], b[7], b[9], width);
-        if(y+1<(unsigned)height) vertical_compose_h0(b[0], b[2], b[3], b[4], b[6], width);
-
-        if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
-        if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
-
-    for (i = 0; i < 8; i++)
-        cs->b[i] = b[i+2];
-    cs->y += 2;
-}
-
-// haar makes the assumption that height is even (always true for dirac)
-static void spatial_compose_haari_dy(DWTContext *d, int level, int width, int height, int stride)
-{
-    vertical_compose_2tap vertical_compose = (void*)d->vertical_compose;
-    int y = d->cs[level].y;
-    IDWTELEM *b0 = d->buffer + (y-1)*stride;
-    IDWTELEM *b1 = d->buffer + (y  )*stride;
-
-    vertical_compose(b0, b1, width);
-    d->horizontal_compose(b0, d->temp, width);
-    d->horizontal_compose(b1, d->temp, width);
-
-    d->cs[level].y += 2;
-}
-
-// Don't do sliced idwt for fidelity; the 9 tap filter makes it a bit annoying
-// Fortunately, this filter isn't used in practice.
-static void spatial_compose_fidelity(DWTContext *d, int level, int width, int height, int stride)
-{
-    vertical_compose_9tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
-    vertical_compose_9tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
-    int i, y;
-    IDWTELEM *b[8];
-
-    for (y = 1; y < height; y += 2) {
-        for (i = 0; i < 8; i++)
-            b[i] = d->buffer + av_clip((y-7 + 2*i), 0, height-2)*stride;
-        vertical_compose_h0(d->buffer + y*stride, b, width);
-    }
-
-    for (y = 0; y < height; y += 2) {
-        for (i = 0; i < 8; i++)
-            b[i] = d->buffer + av_clip((y-7 + 2*i), 1, height-1)*stride;
-        vertical_compose_l0(d->buffer + y*stride, b, width);
-    }
-
-    for (y = 0; y < height; y++)
-        d->horizontal_compose(d->buffer + y*stride, d->temp, width);
-
-    d->cs[level].y = height+1;
-}
-
-static void spatial_compose_daub97i_dy(DWTContext *d, int level, int width, int height, int stride)
-{
-    vertical_compose_3tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
-    vertical_compose_3tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
-    vertical_compose_3tap vertical_compose_l1 = (void*)d->vertical_compose_l1;
-    vertical_compose_3tap vertical_compose_h1 = (void*)d->vertical_compose_h1;
-    DWTCompose *cs = d->cs + level;
-
-    int i, y = cs->y;
-    IDWTELEM *b[6];
-    for (i = 0; i < 4; i++)
-        b[i] = cs->b[i];
-    b[4] = d->buffer + avpriv_mirror(y+3, height-1)*stride;
-    b[5] = d->buffer + avpriv_mirror(y+4, height-1)*stride;
-
-        if(y+3<(unsigned)height) vertical_compose_l1(b[3], b[4], b[5], width);
-        if(y+2<(unsigned)height) vertical_compose_h1(b[2], b[3], b[4], width);
-        if(y+1<(unsigned)height) vertical_compose_l0(b[1], b[2], b[3], width);
-        if(y+0<(unsigned)height) vertical_compose_h0(b[0], b[1], b[2], width);
-
-        if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
-        if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
-
-    for (i = 0; i < 4; i++)
-        cs->b[i] = b[i+2];
-    cs->y += 2;
-}
-
-
-static void spatial_compose97i_init2(DWTCompose *cs, IDWTELEM *buffer, int height, int stride)
-{
-    cs->b[0] = buffer + avpriv_mirror(-3-1, height-1)*stride;
-    cs->b[1] = buffer + avpriv_mirror(-3  , height-1)*stride;
-    cs->b[2] = buffer + avpriv_mirror(-3+1, height-1)*stride;
-    cs->b[3] = buffer + avpriv_mirror(-3+2, height-1)*stride;
-    cs->y = -3;
-}
-
-static void spatial_compose53i_init2(DWTCompose *cs, IDWTELEM *buffer, int height, int stride)
-{
-    cs->b[0] = buffer + avpriv_mirror(-1-1, height-1)*stride;
-    cs->b[1] = buffer + avpriv_mirror(-1  , height-1)*stride;
-    cs->y = -1;
-}
-
-static void spatial_compose_dd97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride)
-{
-    cs->b[0] = buffer + av_clip(-5-1, 0, height-2)*stride;
-    cs->b[1] = buffer + av_clip(-5  , 1, height-1)*stride;
-    cs->b[2] = buffer + av_clip(-5+1, 0, height-2)*stride;
-    cs->b[3] = buffer + av_clip(-5+2, 1, height-1)*stride;
-    cs->b[4] = buffer + av_clip(-5+3, 0, height-2)*stride;
-    cs->b[5] = buffer + av_clip(-5+4, 1, height-1)*stride;
-    cs->y = -5;
-}
-
-static void spatial_compose_dd137i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride)
-{
-    cs->b[0] = buffer + av_clip(-5-1, 0, height-2)*stride;
-    cs->b[1] = buffer + av_clip(-5  , 1, height-1)*stride;
-    cs->b[2] = buffer + av_clip(-5+1, 0, height-2)*stride;
-    cs->b[3] = buffer + av_clip(-5+2, 1, height-1)*stride;
-    cs->b[4] = buffer + av_clip(-5+3, 0, height-2)*stride;
-    cs->b[5] = buffer + av_clip(-5+4, 1, height-1)*stride;
-    cs->b[6] = buffer + av_clip(-5+5, 0, height-2)*stride;
-    cs->b[7] = buffer + av_clip(-5+6, 1, height-1)*stride;
-    cs->y = -5;
-}
-
-int ff_spatial_idwt_init2(DWTContext *d, IDWTELEM *buffer, int width, int height,
-                          int stride, enum dwt_type type, int decomposition_count,
-                          IDWTELEM *temp)
-{
-    int level;
-
-    d->buffer = buffer;
-    d->width = width;
-    d->height = height;
-    d->stride = stride;
+    d->buffer = p->buf;
+    d->width  = p->width;
+    d->height = p->height;
+    d->stride = p->stride;
+    d->temp   = p->tmp;
     d->decomposition_count = decomposition_count;
-    d->temp = temp + 8;
-
-    for(level=decomposition_count-1; level>=0; level--){
-        int hl = height >> level;
-        int stride_l = stride << level;
 
-        switch(type){
-        case DWT_DIRAC_DD9_7:
-            spatial_compose_dd97i_init(d->cs+level, buffer, hl, stride_l);
-            break;
-        case DWT_DIRAC_LEGALL5_3:
-            spatial_compose53i_init2(d->cs+level, buffer, hl, stride_l);
-            break;
-        case DWT_DIRAC_DD13_7:
-            spatial_compose_dd137i_init(d->cs+level, buffer, hl, stride_l);
-            break;
-        case DWT_DIRAC_HAAR0:
-        case DWT_DIRAC_HAAR1:
-            d->cs[level].y = 1;
-            break;
-        case DWT_DIRAC_DAUB9_7:
-            spatial_compose97i_init2(d->cs+level, buffer, hl, stride_l);
-            break;
-        default:
-            d->cs[level].y = 0;
-            break;
-        }
-    }
+    if (bit_depth == 8)
+        ret = ff_spatial_idwt_init_8bit(d, type);
+    else if (bit_depth == 10)
+        ret = ff_spatial_idwt_init_10bit(d, type);
+    else if (bit_depth == 12)
+        ret = ff_spatial_idwt_init_12bit(d, type);
+    else
+        av_log(NULL, AV_LOG_WARNING, "Unsupported bit depth = %i\n", bit_depth);
 
-    switch (type) {
-    case DWT_DIRAC_DD9_7:
-        d->spatial_compose = spatial_compose_dd97i_dy;
-        d->vertical_compose_l0 = (void*)vertical_compose53iL0;
-        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0;
-        d->horizontal_compose = horizontal_compose_dd97i;
-        d->support = 7;
-        break;
-    case DWT_DIRAC_LEGALL5_3:
-        d->spatial_compose = spatial_compose_dirac53i_dy;
-        d->vertical_compose_l0 = (void*)vertical_compose53iL0;
-        d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0;
-        d->horizontal_compose = horizontal_compose_dirac53i;
-        d->support = 3;
-        break;
-    case DWT_DIRAC_DD13_7:
-        d->spatial_compose = spatial_compose_dd137i_dy;
-        d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0;
-        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0;
-        d->horizontal_compose = horizontal_compose_dd137i;
-        d->support = 7;
-        break;
-    case DWT_DIRAC_HAAR0:
-    case DWT_DIRAC_HAAR1:
-        d->spatial_compose = spatial_compose_haari_dy;
-        d->vertical_compose = (void*)vertical_compose_haar;
-        if (type == DWT_DIRAC_HAAR0)
-            d->horizontal_compose = horizontal_compose_haar0i;
-        else
-            d->horizontal_compose = horizontal_compose_haar1i;
-        d->support = 1;
-        break;
-    case DWT_DIRAC_FIDELITY:
-        d->spatial_compose = spatial_compose_fidelity;
-        d->vertical_compose_l0 = (void*)vertical_compose_fidelityiL0;
-        d->vertical_compose_h0 = (void*)vertical_compose_fidelityiH0;
-        d->horizontal_compose = horizontal_compose_fidelityi;
-        d->support = 0; // not really used
-        break;
-    case DWT_DIRAC_DAUB9_7:
-        d->spatial_compose = spatial_compose_daub97i_dy;
-        d->vertical_compose_l0 = (void*)vertical_compose_daub97iL0;
-        d->vertical_compose_h0 = (void*)vertical_compose_daub97iH0;
-        d->vertical_compose_l1 = (void*)vertical_compose_daub97iL1;
-        d->vertical_compose_h1 = (void*)vertical_compose_daub97iH1;
-        d->horizontal_compose = horizontal_compose_daub97i;
-        d->support = 5;
-        break;
-    default:
+    if (ret) {
         av_log(NULL, AV_LOG_ERROR, "Unknown wavelet type %d\n", type);
         return AVERROR_INVALIDDATA;
     }
 
-    if (HAVE_MMX) ff_spatial_idwt_init_mmx(d, type);
-
+    if (ARCH_X86 && bit_depth == 8)
+        ff_spatial_idwt_init_x86(d, type);
     return 0;
 }
 
@@ -558,4 +77,3 @@ void ff_spatial_idwt_slice2(DWTContext *d, int y)
             d->spatial_compose(d, level, wl, hl, stride_l);
     }
 }
-
diff --git a/libavcodec/dirac_dwt.h b/libavcodec/dirac_dwt.h
index e5e447b0..4d338651 100644
--- a/libavcodec/dirac_dwt.h
+++ b/libavcodec/dirac_dwt.h
@@ -30,21 +30,30 @@ typedef short IDWTELEM;
 #define MAX_DECOMPOSITIONS 8
 
 typedef struct DWTCompose {
-    IDWTELEM *b[MAX_DWT_SUPPORT];
+    uint8_t *b[MAX_DWT_SUPPORT];
     int y;
 } DWTCompose;
 
+typedef struct DWTPlane {
+    int width;
+    int height;
+    int stride;
+    uint8_t *buf;
+    uint8_t *buf_base;
+    uint8_t *tmp;
+} DWTPlane;
+
 struct DWTContext;
 
 // Possible prototypes for vertical_compose functions
-typedef void (*vertical_compose_2tap)(IDWTELEM *b0, IDWTELEM *b1, int width);
-typedef void (*vertical_compose_3tap)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width);
-typedef void (*vertical_compose_5tap)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width);
-typedef void (*vertical_compose_9tap)(IDWTELEM *dst, IDWTELEM *b[8], int width);
+typedef void (*vertical_compose_2tap)(uint8_t *b0, uint8_t *b1, int width);
+typedef void (*vertical_compose_3tap)(uint8_t *b0, uint8_t *b1, uint8_t *b2, int width);
+typedef void (*vertical_compose_5tap)(uint8_t *b0, uint8_t *b1, uint8_t *b2, uint8_t *b3, uint8_t *b4, int width);
+typedef void (*vertical_compose_9tap)(uint8_t *dst, uint8_t *b[8], int width);
 
 typedef struct DWTContext {
-    IDWTELEM *buffer;
-    IDWTELEM *temp;
+    uint8_t *buffer;
+    uint8_t *temp;
     int width;
     int height;
     int stride;
@@ -57,7 +66,7 @@ typedef struct DWTContext {
     void (*vertical_compose_l1)(void);
     void (*vertical_compose_h1)(void);
     void (*vertical_compose)(void);     ///< one set of lowpass and highpass combined
-    void (*horizontal_compose)(IDWTELEM *b, IDWTELEM *tmp, int width);
+    void (*horizontal_compose)(uint8_t *b, uint8_t *tmp, int width);
 
     DWTCompose cs[MAX_DECOMPOSITIONS];
 } DWTContext;
@@ -76,9 +85,9 @@ enum dwt_type {
 };
 
 // -1 if an error occurred, e.g. the dwt_type isn't recognized
-int ff_spatial_idwt_init2(DWTContext *d, IDWTELEM *buffer, int width, int height,
-                          int stride, enum dwt_type type, int decomposition_count,
-                          IDWTELEM *temp);
+int ff_spatial_idwt_init(DWTContext *d, DWTPlane *p, enum dwt_type type,
+                         int decomposition_count, int bit_depth);
+void ff_spatial_idwt_init_x86(DWTContext *d, enum dwt_type type);
 
 void ff_spatial_idwt_slice2(DWTContext *d, int y);
 
diff --git a/libavcodec/dirac_dwt_template.c b/libavcodec/dirac_dwt_template.c
new file mode 100644
index 00000000..972c711c
--- /dev/null
+++ b/libavcodec/dirac_dwt_template.c
@@ -0,0 +1,608 @@
+/*
+ * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2008 David Conrad
+ * Copyright (C) 2015 Open Broadcast Systems Ltd.
+ * Author    (C) 2015 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if defined(TEMPLATE_8bit)
+
+#    define RENAME(N)   N ## _8bit
+#    define TYPE        int16_t
+#    undef  TEMPLATE_8bit
+
+#elif defined(TEMPLATE_10bit)
+
+#    define RENAME(N)   N ## _10bit
+#    define TYPE        int32_t
+#    undef  TEMPLATE_10bit
+
+#elif defined(TEMPLATE_12bit)
+
+#    define RENAME(N)   N ## _12bit
+#    define TYPE        int32_t
+#    undef  TEMPLATE_12bit
+
+#endif
+
+static void RENAME(vertical_compose53iL0)(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2,
+                                          int width)
+{
+    int i;
+    TYPE *b0 = (TYPE *)_b0;
+    TYPE *b1 = (TYPE *)_b1;
+    TYPE *b2 = (TYPE *)_b2;
+    for (i = 0; i < width; i++)
+        b1[i] -= (b0[i] + b2[i] + 2) >> 2;
+}
+
+static av_always_inline void RENAME(interleave)(TYPE *dst, TYPE *src0, TYPE *src1, int w2,
+                                                int add, int shift)
+{
+    int i;
+    for (i = 0; i < w2; i++) {
+        dst[2*i  ] = (src0[i] + add) >> shift;
+        dst[2*i+1] = (src1[i] + add) >> shift;
+    }
+}
+
+static void RENAME(horizontal_compose_dirac53i)(uint8_t *_b, uint8_t *_temp, int w)
+{
+    int x;
+    const int w2 = w >> 1;
+    TYPE *b     = (TYPE *)_b;
+    TYPE *temp  = (TYPE *)_temp;
+
+    temp[0] = COMPOSE_53iL0(b[w2], b[0], b[w2]);
+    for (x = 1; x < w2; x++) {
+        temp[x     ] = COMPOSE_53iL0     (b[x+w2-1], b[x     ], b[x+w2]);
+        temp[x+w2-1] = COMPOSE_DIRAC53iH0(temp[x-1], b[x+w2-1], temp[x]);
+    }
+    temp[w-1] = COMPOSE_DIRAC53iH0(temp[w2-1], b[w-1], temp[w2-1]);
+
+    RENAME(interleave)(b, temp, temp+w2, w2, 1, 1);
+}
+
+static void RENAME(horizontal_compose_dd97i)(uint8_t *_b, uint8_t *_tmp, int w)
+{
+    int x;
+    const int w2 = w >> 1;
+    TYPE *b   = (TYPE *)_b;
+    TYPE *tmp = (TYPE *)_tmp;
+
+    tmp[0] = COMPOSE_53iL0(b[w2], b[0], b[w2]);
+    for (x = 1; x < w2; x++)
+        tmp[x] = COMPOSE_53iL0(b[x+w2-1], b[x], b[x+w2]);
+
+    // extend the edges
+    tmp[-1]   = tmp[0];
+    tmp[w2+1] = tmp[w2] = tmp[w2-1];
+
+    for (x = 0; x < w2; x++) {
+        b[2*x  ] = (tmp[x] + 1)>>1;
+        b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
+    }
+}
+
+static void RENAME(horizontal_compose_dd137i)(uint8_t *_b, uint8_t *_tmp, int w)
+{
+    const int w2 = w >> 1;
+    int x;
+    TYPE *b   = (TYPE *)_b;
+    TYPE *tmp = (TYPE *)_tmp;
+
+    tmp[0] = COMPOSE_DD137iL0(b[w2], b[w2], b[0], b[w2  ], b[w2+1]);
+    tmp[1] = COMPOSE_DD137iL0(b[w2], b[w2], b[1], b[w2+1], b[w2+2]);
+    for (x = 2; x < w2-1; x++)
+        tmp[x] = COMPOSE_DD137iL0(b[x+w2-2], b[x+w2-1], b[x], b[x+w2], b[x+w2+1]);
+    tmp[w2-1] = COMPOSE_DD137iL0(b[w-3], b[w-2], b[w2-1], b[w-1], b[w-1]);
+
+    // extend the edges
+    tmp[-1]   = tmp[0];
+    tmp[w2+1] = tmp[w2] = tmp[w2-1];
+
+    for (x = 0; x < w2; x++) {
+        b[2*x  ] = (tmp[x] + 1)>>1;
+        b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
+    }
+}
+
+static av_always_inline void RENAME(horizontal_compose_haari)(TYPE *b, TYPE *temp,
+                                                              int w, int shift)
+{
+    const int w2 = w >> 1;
+    int x;
+
+    for (x = 0; x < w2; x++) {
+        temp[x   ] = COMPOSE_HAARiL0(b[x   ], b[x+w2]);
+        temp[x+w2] = COMPOSE_HAARiH0(b[x+w2], temp[x]);
+    }
+
+    RENAME(interleave)(b, temp, temp+w2, w2, shift, shift);
+}
+
+static void RENAME(horizontal_compose_haar0i)(uint8_t *_b, uint8_t *_temp, int w)
+{
+    TYPE *b    = (TYPE *)_b;
+    TYPE *temp = (TYPE *)_temp;
+    RENAME(horizontal_compose_haari)(b, temp, w, 0);
+}
+
+static void RENAME(horizontal_compose_haar1i)(uint8_t *_b, uint8_t *_temp, int w)
+{
+    TYPE *b    = (TYPE *)_b;
+    TYPE *temp = (TYPE *)_temp;
+    RENAME(horizontal_compose_haari)(b, temp, w, 1);
+}
+
+static void RENAME(horizontal_compose_fidelityi)(uint8_t *_b, uint8_t *_tmp, int w)
+{
+    const int w2 = w >> 1;
+    int i, x;
+    TYPE v[8];
+    TYPE *b   = (TYPE *)_b;
+    TYPE *tmp = (TYPE *)_tmp;
+
+    for (x = 0; x < w2; x++) {
+        for (i = 0; i < 8; i++)
+            v[i] = b[av_clip(x-3+i, 0, w2-1)];
+        tmp[x] = COMPOSE_FIDELITYiH0(v[0], v[1], v[2], v[3], b[x+w2], v[4], v[5], v[6], v[7]);
+    }
+
+    for (x = 0; x < w2; x++) {
+        for (i = 0; i < 8; i++)
+            v[i] = tmp[av_clip(x-4+i, 0, w2-1)];
+        tmp[x+w2] = COMPOSE_FIDELITYiL0(v[0], v[1], v[2], v[3], b[x], v[4], v[5], v[6], v[7]);
+    }
+
+    RENAME(interleave)(b, tmp+w2, tmp, w2, 0, 0);
+}
+
+static void RENAME(horizontal_compose_daub97i)(uint8_t *_b, uint8_t *_temp, int w)
+{
+    const int w2 = w >> 1;
+    int x, b0, b1, b2;
+    TYPE *b    = (TYPE *)_b;
+    TYPE *temp = (TYPE *)_temp;
+
+    temp[0] = COMPOSE_DAUB97iL1(b[w2], b[0], b[w2]);
+    for (x = 1; x < w2; x++) {
+        temp[x     ] = COMPOSE_DAUB97iL1(b[x+w2-1], b[x     ], b[x+w2]);
+        temp[x+w2-1] = COMPOSE_DAUB97iH1(temp[x-1], b[x+w2-1], temp[x]);
+    }
+    temp[w-1] = COMPOSE_DAUB97iH1(temp[w2-1], b[w-1], temp[w2-1]);
+
+    // second stage combined with interleave and shift
+    b0 = b2 = COMPOSE_DAUB97iL0(temp[w2], temp[0], temp[w2]);
+    b[0] = (b0 + 1) >> 1;
+    for (x = 1; x < w2; x++) {
+        b2 = COMPOSE_DAUB97iL0(temp[x+w2-1], temp[x     ], temp[x+w2]);
+        b1 = COMPOSE_DAUB97iH0(          b0, temp[x+w2-1], b2        );
+        b[2*x-1] = (b1 + 1) >> 1;
+        b[2*x  ] = (b2 + 1) >> 1;
+        b0 = b2;
+    }
+    b[w-1] = (COMPOSE_DAUB97iH0(b2, temp[w-1], b2) + 1) >> 1;
+}
+
+static void RENAME(vertical_compose_dirac53iH0)(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2,
+                                                int width)
+{
+    int i;
+    TYPE *b0  = (TYPE *)_b0;
+    TYPE *b1 = (TYPE *)_b1;
+    TYPE *b2 = (TYPE *)_b2;
+    for(i=0; i<width; i++){
+        b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]);
+    }
+}
+
+static void RENAME(vertical_compose_dd97iH0)(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2,
+                                             uint8_t *_b3, uint8_t *_b4, int width)
+{
+    int i;
+    TYPE *b0 = (TYPE *)_b0;
+    TYPE *b1 = (TYPE *)_b1;
+    TYPE *b2 = (TYPE *)_b2;
+    TYPE *b3 = (TYPE *)_b3;
+    TYPE *b4 = (TYPE *)_b4;
+    for(i=0; i<width; i++){
+        b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]);
+    }
+}
+
+static void RENAME(vertical_compose_dd137iL0)(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2,
+                                              uint8_t *_b3, uint8_t *_b4, int width)
+{
+    int i;
+    TYPE *b0 = (TYPE *)_b0;
+    TYPE *b1 = (TYPE *)_b1;
+    TYPE *b2 = (TYPE *)_b2;
+    TYPE *b3 = (TYPE *)_b3;
+    TYPE *b4 = (TYPE *)_b4;
+    for(i=0; i<width; i++){
+        b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]);
+    }
+}
+
+static void RENAME(vertical_compose_haar)(uint8_t *_b0, uint8_t *_b1, int width)
+{
+    int i;
+    TYPE *b0 = (TYPE *)_b0;
+    TYPE *b1 = (TYPE *)_b1;
+
+    for (i = 0; i < width; i++) {
+        b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]);
+        b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]);
+    }
+}
+
+static void RENAME(vertical_compose_fidelityiH0)(uint8_t *_dst, uint8_t *_b[8], int width)
+{
+    int i;
+    TYPE *dst = (TYPE *)_dst;
+    TYPE *b0  = (TYPE *)_b[0];
+    TYPE *b1  = (TYPE *)_b[1];
+    TYPE *b2  = (TYPE *)_b[2];
+    TYPE *b3  = (TYPE *)_b[3];
+    TYPE *b4  = (TYPE *)_b[4];
+    TYPE *b5  = (TYPE *)_b[5];
+    TYPE *b6  = (TYPE *)_b[6];
+    TYPE *b7  = (TYPE *)_b[7];
+    for(i=0; i<width; i++){
+        dst[i] = COMPOSE_FIDELITYiH0(b0[i], b1[i], b2[i], b3[i], dst[i], b4[i], b5[i], b6[i], b7[i]);
+    }
+}
+
+static void RENAME(vertical_compose_fidelityiL0)(uint8_t *_dst, uint8_t *_b[8], int width)
+{
+    int i;
+    TYPE *dst = (TYPE *)_dst;
+    TYPE *b0  = (TYPE *)_b[0];
+    TYPE *b1  = (TYPE *)_b[1];
+    TYPE *b2  = (TYPE *)_b[2];
+    TYPE *b3  = (TYPE *)_b[3];
+    TYPE *b4  = (TYPE *)_b[4];
+    TYPE *b5  = (TYPE *)_b[5];
+    TYPE *b6  = (TYPE *)_b[6];
+    TYPE *b7  = (TYPE *)_b[7];
+
+    for(i=0; i<width; i++){
+        dst[i] = COMPOSE_FIDELITYiL0(b0[i], b1[i], b2[i], b3[i], dst[i], b4[i], b5[i], b6[i], b7[i]);
+    }
+}
+
+static void RENAME(vertical_compose_daub97iH0)(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width)
+{
+    int i;
+    TYPE *b0 = (TYPE *)_b0;
+    TYPE *b1 = (TYPE *)_b1;
+    TYPE *b2 = (TYPE *)_b2;
+
+    for(i=0; i<width; i++){
+        b1[i] = COMPOSE_DAUB97iH0(b0[i], b1[i], b2[i]);
+    }
+}
+
+static void RENAME(vertical_compose_daub97iH1)(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width)
+{
+    int i;
+    TYPE *b0 = (TYPE *)_b0;
+    TYPE *b1 = (TYPE *)_b1;
+    TYPE *b2 = (TYPE *)_b2;
+
+    for(i=0; i<width; i++){
+        b1[i] = COMPOSE_DAUB97iH1(b0[i], b1[i], b2[i]);
+    }
+}
+
+static void RENAME(vertical_compose_daub97iL0)(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width)
+{
+    int i;
+    TYPE *b0 = (TYPE *)_b0;
+    TYPE *b1 = (TYPE *)_b1;
+    TYPE *b2 = (TYPE *)_b2;
+
+    for(i=0; i<width; i++){
+        b1[i] = COMPOSE_DAUB97iL0(b0[i], b1[i], b2[i]);
+    }
+}
+
+static void RENAME(vertical_compose_daub97iL1)(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width)
+{
+    int i;
+    TYPE *b0 = (TYPE *)_b0;
+    TYPE *b1 = (TYPE *)_b1;
+    TYPE *b2 = (TYPE *)_b2;
+
+    for(i=0; i<width; i++){
+        b1[i] = COMPOSE_DAUB97iL1(b0[i], b1[i], b2[i]);
+    }
+}
+
+static void RENAME(spatial_compose_dd97i_dy)(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_3tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
+    vertical_compose_5tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
+    DWTCompose *cs = d->cs + level;
+
+    int i, y = cs->y;
+    uint8_t *b[8];
+    for (i = 0; i < 6; i++)
+        b[i] = cs->b[i];
+    b[6] = d->buffer + av_clip(y+5, 0, height-2)*stride;
+    b[7] = d->buffer + av_clip(y+6, 1, height-1)*stride;
+
+    if(y+5<(unsigned)height) vertical_compose_l0(      b[5], b[6], b[7],       width);
+    if(y+1<(unsigned)height) vertical_compose_h0(b[0], b[2], b[3], b[4], b[6], width);
+
+    if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
+    if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
+
+    for (i = 0; i < 6; i++)
+        cs->b[i] = b[i+2];
+    cs->y += 2;
+}
+
+static void RENAME(spatial_compose_dirac53i_dy)(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_3tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
+    vertical_compose_3tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
+    DWTCompose *cs = d->cs + level;
+
+    int y= cs->y;
+    uint8_t *b[4] = { cs->b[0], cs->b[1] };
+    b[2] = d->buffer + avpriv_mirror(y+1, height-1)*stride;
+    b[3] = d->buffer + avpriv_mirror(y+2, height-1)*stride;
+
+    if(y+1<(unsigned)height) vertical_compose_l0(b[1], b[2], b[3], width);
+    if(y+0<(unsigned)height) vertical_compose_h0(b[0], b[1], b[2], width);
+
+    if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
+    if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
+
+    cs->b[0] = b[2];
+    cs->b[1] = b[3];
+    cs->y += 2;
+}
+
+static void RENAME(spatial_compose_dd137i_dy)(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_5tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
+    vertical_compose_5tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
+    DWTCompose *cs = d->cs + level;
+
+    int i, y = cs->y;
+    uint8_t *b[10];
+    for (i = 0; i < 8; i++)
+        b[i] = cs->b[i];
+    b[8] = d->buffer + av_clip(y+7, 0, height-2)*stride;
+    b[9] = d->buffer + av_clip(y+8, 1, height-1)*stride;
+
+    if(y+5<(unsigned)height) vertical_compose_l0(b[3], b[5], b[6], b[7], b[9], width);
+    if(y+1<(unsigned)height) vertical_compose_h0(b[0], b[2], b[3], b[4], b[6], width);
+
+    if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
+    if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
+
+    for (i = 0; i < 8; i++)
+        cs->b[i] = b[i+2];
+    cs->y += 2;
+}
+
+// haar makes the assumption that height is even (always true for dirac)
+static void RENAME(spatial_compose_haari_dy)(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_2tap vertical_compose = (void*)d->vertical_compose;
+    int y = d->cs[level].y;
+    uint8_t *b0 = d->buffer + (y-1)*stride;
+    uint8_t *b1 = d->buffer + (y  )*stride;
+
+    vertical_compose(b0, b1, width);
+    d->horizontal_compose(b0, d->temp, width);
+    d->horizontal_compose(b1, d->temp, width);
+
+    d->cs[level].y += 2;
+}
+
+// Don't do sliced idwt for fidelity; the 9 tap filter makes it a bit annoying
+// Fortunately, this filter isn't used in practice.
+static void RENAME(spatial_compose_fidelity)(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_9tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
+    vertical_compose_9tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
+    int i, y;
+    uint8_t *b[8];
+
+    for (y = 1; y < height; y += 2) {
+        for (i = 0; i < 8; i++)
+            b[i] = d->buffer + av_clip((y-7 + 2*i), 0, height-2)*stride;
+        vertical_compose_h0(d->buffer + y*stride, b, width);
+    }
+
+    for (y = 0; y < height; y += 2) {
+        for (i = 0; i < 8; i++)
+            b[i] = d->buffer + av_clip((y-7 + 2*i), 1, height-1)*stride;
+        vertical_compose_l0(d->buffer + y*stride, b, width);
+    }
+
+    for (y = 0; y < height; y++)
+        d->horizontal_compose(d->buffer + y*stride, d->temp, width);
+
+    d->cs[level].y = height+1;
+}
+
+static void RENAME(spatial_compose_daub97i_dy)(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_3tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
+    vertical_compose_3tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
+    vertical_compose_3tap vertical_compose_l1 = (void*)d->vertical_compose_l1;
+    vertical_compose_3tap vertical_compose_h1 = (void*)d->vertical_compose_h1;
+    DWTCompose *cs = d->cs + level;
+
+    int i, y = cs->y;
+    uint8_t *b[6];
+    for (i = 0; i < 4; i++)
+        b[i] = cs->b[i];
+    b[4] = d->buffer + avpriv_mirror(y+3, height-1)*stride;
+    b[5] = d->buffer + avpriv_mirror(y+4, height-1)*stride;
+
+    if(y+3<(unsigned)height) vertical_compose_l1(b[3], b[4], b[5], width);
+    if(y+2<(unsigned)height) vertical_compose_h1(b[2], b[3], b[4], width);
+    if(y+1<(unsigned)height) vertical_compose_l0(b[1], b[2], b[3], width);
+    if(y+0<(unsigned)height) vertical_compose_h0(b[0], b[1], b[2], width);
+
+    if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
+    if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
+
+    for (i = 0; i < 4; i++)
+        cs->b[i] = b[i+2];
+    cs->y += 2;
+}
+
+static void RENAME(spatial_compose97i_init)(DWTCompose *cs, uint8_t *buffer, int height, int stride)
+{
+    cs->b[0] = buffer + avpriv_mirror(-3-1, height-1)*stride;
+    cs->b[1] = buffer + avpriv_mirror(-3  , height-1)*stride;
+    cs->b[2] = buffer + avpriv_mirror(-3+1, height-1)*stride;
+    cs->b[3] = buffer + avpriv_mirror(-3+2, height-1)*stride;
+    cs->y = -3;
+}
+
+static void RENAME(spatial_compose53i_init)(DWTCompose *cs, uint8_t *buffer, int height, int stride)
+{
+    cs->b[0] = buffer + avpriv_mirror(-1-1, height-1)*stride;
+    cs->b[1] = buffer + avpriv_mirror(-1  , height-1)*stride;
+    cs->y = -1;
+}
+
+static void RENAME(spatial_compose_dd97i_init)(DWTCompose *cs, uint8_t *buffer, int height, int stride)
+{
+    cs->b[0] = buffer + av_clip(-5-1, 0, height-2)*stride;
+    cs->b[1] = buffer + av_clip(-5  , 1, height-1)*stride;
+    cs->b[2] = buffer + av_clip(-5+1, 0, height-2)*stride;
+    cs->b[3] = buffer + av_clip(-5+2, 1, height-1)*stride;
+    cs->b[4] = buffer + av_clip(-5+3, 0, height-2)*stride;
+    cs->b[5] = buffer + av_clip(-5+4, 1, height-1)*stride;
+    cs->y = -5;
+}
+
+static void RENAME(spatial_compose_dd137i_init)(DWTCompose *cs, uint8_t *buffer, int height, int stride)
+{
+    cs->b[0] = buffer + av_clip(-5-1, 0, height-2)*stride;
+    cs->b[1] = buffer + av_clip(-5  , 1, height-1)*stride;
+    cs->b[2] = buffer + av_clip(-5+1, 0, height-2)*stride;
+    cs->b[3] = buffer + av_clip(-5+2, 1, height-1)*stride;
+    cs->b[4] = buffer + av_clip(-5+3, 0, height-2)*stride;
+    cs->b[5] = buffer + av_clip(-5+4, 1, height-1)*stride;
+    cs->b[6] = buffer + av_clip(-5+5, 0, height-2)*stride;
+    cs->b[7] = buffer + av_clip(-5+6, 1, height-1)*stride;
+    cs->y = -5;
+}
+
+static int RENAME(ff_spatial_idwt_init)(DWTContext *d, enum dwt_type type)
+{
+    int level;
+
+    d->temp = (uint8_t *)(((TYPE *)d->temp) + 8);
+
+    for (level = d->decomposition_count - 1; level >= 0; level--){
+        int hl = d->height >> level;
+        int stride_l = d->stride << level;
+
+        switch(type){
+            case DWT_DIRAC_DD9_7:
+                RENAME(spatial_compose_dd97i_init)(d->cs+level, d->buffer, hl, stride_l);
+                break;
+            case DWT_DIRAC_LEGALL5_3:
+                RENAME(spatial_compose53i_init)(d->cs+level, d->buffer, hl, stride_l);
+                break;
+            case DWT_DIRAC_DD13_7:
+                RENAME(spatial_compose_dd137i_init)(d->cs+level, d->buffer, hl, stride_l);
+                break;
+            case DWT_DIRAC_HAAR0:
+            case DWT_DIRAC_HAAR1:
+                d->cs[level].y = 1;
+                break;
+            case DWT_DIRAC_DAUB9_7:
+                RENAME(spatial_compose97i_init)(d->cs+level, d->buffer, hl, stride_l);
+                break;
+            default:
+                d->cs[level].y = 0;
+                break;
+        }
+    }
+
+    switch (type) {
+        case DWT_DIRAC_DD9_7:
+            d->spatial_compose = RENAME(spatial_compose_dd97i_dy);
+            d->vertical_compose_l0 = (void*)RENAME(vertical_compose53iL0);
+            d->vertical_compose_h0 = (void*)RENAME(vertical_compose_dd97iH0);
+            d->horizontal_compose = RENAME(horizontal_compose_dd97i);
+            d->support = 7;
+            break;
+        case DWT_DIRAC_LEGALL5_3:
+            d->spatial_compose = RENAME(spatial_compose_dirac53i_dy);
+            d->vertical_compose_l0 = (void*)RENAME(vertical_compose53iL0);
+            d->vertical_compose_h0 = (void*)RENAME(vertical_compose_dirac53iH0);
+            d->horizontal_compose = RENAME(horizontal_compose_dirac53i);
+            d->support = 3;
+            break;
+        case DWT_DIRAC_DD13_7:
+            d->spatial_compose = RENAME(spatial_compose_dd137i_dy);
+            d->vertical_compose_l0 = (void*)RENAME(vertical_compose_dd137iL0);
+            d->vertical_compose_h0 = (void*)RENAME(vertical_compose_dd97iH0);
+            d->horizontal_compose = RENAME(horizontal_compose_dd137i);
+            d->support = 7;
+            break;
+        case DWT_DIRAC_HAAR0:
+        case DWT_DIRAC_HAAR1:
+            d->spatial_compose = RENAME(spatial_compose_haari_dy);
+            d->vertical_compose = (void*)RENAME(vertical_compose_haar);
+            if (type == DWT_DIRAC_HAAR0)
+                d->horizontal_compose = RENAME(horizontal_compose_haar0i);
+            else
+                d->horizontal_compose = RENAME(horizontal_compose_haar1i);
+            d->support = 1;
+            break;
+        case DWT_DIRAC_FIDELITY:
+            d->spatial_compose = RENAME(spatial_compose_fidelity);
+            d->vertical_compose_l0 = (void*)RENAME(vertical_compose_fidelityiL0);
+            d->vertical_compose_h0 = (void*)RENAME(vertical_compose_fidelityiH0);
+            d->horizontal_compose = RENAME(horizontal_compose_fidelityi);
+            d->support = 0; // not really used
+            break;
+        case DWT_DIRAC_DAUB9_7:
+            d->spatial_compose = RENAME(spatial_compose_daub97i_dy);
+            d->vertical_compose_l0 = (void*)RENAME(vertical_compose_daub97iL0);
+            d->vertical_compose_h0 = (void*)RENAME(vertical_compose_daub97iH0);
+            d->vertical_compose_l1 = (void*)RENAME(vertical_compose_daub97iL1);
+            d->vertical_compose_h1 = (void*)RENAME(vertical_compose_daub97iH1);
+            d->horizontal_compose = RENAME(horizontal_compose_daub97i);
+            d->support = 5;
+            break;
+        default:
+            return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+#undef RENAME
+#undef TYPE
diff --git a/libavcodec/dirac_parser.c b/libavcodec/dirac_parser.c
index 45ded5a7..1ade44a4 100644
--- a/libavcodec/dirac_parser.c
+++ b/libavcodec/dirac_parser.c
@@ -100,17 +100,36 @@ typedef struct DiracParseUnit {
 static int unpack_parse_unit(DiracParseUnit *pu, DiracParseContext *pc,
                              int offset)
 {
-    uint8_t *start = pc->buffer + offset;
-    uint8_t *end   = pc->buffer + pc->index;
-    if (start < pc->buffer || (start + 13 > end))
+    int i;
+    int8_t *start;
+    static const uint8_t valid_pu_types[] = {
+        0x00, 0x10, 0x20, 0x30, 0x08, 0x48, 0xC8, 0xE8, 0x0A, 0x0C, 0x0D, 0x0E,
+        0x4C, 0x09, 0xCC, 0x88, 0xCB
+    };
+
+    if (offset < 0 || pc->index - 13 < offset)
         return 0;
+
+    start = pc->buffer + offset;
     pu->pu_type = start[4];
 
     pu->next_pu_offset = AV_RB32(start + 5);
     pu->prev_pu_offset = AV_RB32(start + 9);
 
-    if (pu->pu_type == 0x10 && pu->next_pu_offset == 0)
-        pu->next_pu_offset = 13;
+    /* Check for valid parse code */
+    for (i = 0; i < 17; i++)
+        if (valid_pu_types[i] == pu->pu_type)
+            break;
+    if (i == 17)
+        return 0;
+
+    if (pu->pu_type == 0x10 && pu->next_pu_offset == 0x00)
+        pu->next_pu_offset = 13; /* The length of a parse info header */
+
+    /* Check if the parse offsets are somewhat sane */
+    if ((pu->next_pu_offset && pu->next_pu_offset < 13) ||
+        (pu->prev_pu_offset && pu->prev_pu_offset < 13))
+        return 0;
 
     return 1;
 }
@@ -123,7 +142,7 @@ static int dirac_combine_frame(AVCodecParserContext *s, AVCodecContext *avctx,
     DiracParseContext *pc = s->priv_data;
 
     if (pc->overread_index) {
-        memcpy(pc->buffer, pc->buffer + pc->overread_index,
+        memmove(pc->buffer, pc->buffer + pc->overread_index,
                pc->index - pc->overread_index);
         pc->index         -= pc->overread_index;
         pc->overread_index = 0;
@@ -190,7 +209,7 @@ static int dirac_combine_frame(AVCodecParserContext *s, AVCodecContext *avctx,
         }
 
         /* Get the picture number to set the pts and dts*/
-        if (parse_timing_info) {
+        if (parse_timing_info && pu1.prev_pu_offset >= 13) {
             uint8_t *cur_pu = pc->buffer +
                               pc->index - 13 - pu1.prev_pu_offset;
             int pts = AV_RB32(cur_pu + 13);
diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
index 49a49525..05c79005 100644
--- a/libavcodec/diracdec.c
+++ b/libavcodec/diracdec.c
@@ -37,27 +37,17 @@
 #include "mpegvideoencdsp.h"
 #include "dirac_dwt.h"
 #include "dirac.h"
+#include "diractab.h"
 #include "diracdsp.h"
 #include "videodsp.h"
 
-/**
- * The spec limits the number of wavelet decompositions to 4 for both
- * level 1 (VC-2) and 128 (long-gop default).
- * 5 decompositions is the maximum before >16-bit buffers are needed.
- * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
- * the others to 4 decompositions (or 3 for the fidelity filter).
- *
- * We use this instead of MAX_DECOMPOSITIONS to save some memory.
- */
-#define MAX_DWT_LEVELS 5
-
 /**
  * The spec limits this to 3 for frame coding, but in practice can be as high as 6
  */
 #define MAX_REFERENCE_FRAMES 8
 #define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
 #define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
-#define MAX_QUANT 68        /* max quant for VC-2 */
+#define MAX_QUANT 255        /* max quant for VC-2 */
 #define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
 
 /**
@@ -83,6 +73,7 @@ typedef struct {
     int interpolated[3];    /* 1 if hpel[] is valid */
     uint8_t *hpel[3][4];
     uint8_t *hpel_base[3][4];
+    int reference;
 } DiracFrame;
 
 typedef struct {
@@ -96,11 +87,12 @@ typedef struct {
 typedef struct SubBand {
     int level;
     int orientation;
-    int stride;
+    int stride; /* in bytes */
     int width;
     int height;
+    int pshift;
     int quant;
-    IDWTELEM *ibuf;
+    uint8_t *ibuf;
     struct SubBand *parent;
 
     /* for low delay */
@@ -109,17 +101,12 @@ typedef struct SubBand {
 } SubBand;
 
 typedef struct Plane {
+    DWTPlane idwt;
+
     int width;
     int height;
     ptrdiff_t stride;
 
-    int idwt_width;
-    int idwt_height;
-    int idwt_stride;
-    IDWTELEM *idwt_buf;
-    IDWTELEM *idwt_buf_base;
-    IDWTELEM *idwt_tmp;
-
     /* block length */
     uint8_t xblen;
     uint8_t yblen;
@@ -138,17 +125,25 @@ typedef struct DiracContext {
     MpegvideoEncDSPContext mpvencdsp;
     VideoDSPContext vdsp;
     DiracDSPContext diracdsp;
+    DiracVersionInfo version;
     GetBitContext gb;
-    dirac_source_params source;
+    AVDiracSeqHeader seq;
     int seen_sequence_header;
     int frame_number;           /* number of the next frame to display       */
     Plane plane[3];
     int chroma_x_shift;
     int chroma_y_shift;
 
+    int bit_depth;              /* bit depth                                 */
+    int pshift;                 /* pixel shift = bit_depth > 8               */
+
     int zero_res;               /* zero residue flag                         */
     int is_arith;               /* whether coeffs use arith or golomb coding */
+    int core_syntax;            /* use core syntax only                      */
     int low_delay;              /* use the low delay syntax                  */
+    int hq_picture;             /* high quality picture, enables low_delay   */
+    int ld_picture;             /* use low delay picture, turns on low_delay */
+    int dc_prediction;          /* has dc prediction                         */
     int globalmc_flag;          /* use global motion compensation            */
     int num_refs;               /* number of reference pictures              */
 
@@ -163,18 +158,24 @@ typedef struct DiracContext {
     unsigned old_delta_quant;
     unsigned codeblock_mode;
 
+    unsigned num_x;              /* number of horizontal slices               */
+    unsigned num_y;              /* number of vertical slices                 */
+
     struct {
         unsigned width;
         unsigned height;
     } codeblock[MAX_DWT_LEVELS+1];
 
     struct {
-        unsigned num_x;         /* number of horizontal slices               */
-        unsigned num_y;         /* number of vertical slices                 */
         AVRational bytes;       /* average bytes per slice                   */
         uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
     } lowdelay;
 
+    struct {
+        unsigned prefix_bytes;
+        uint64_t size_scaler;
+    } highquality;
+
     struct {
         int pan_tilt[2];        /* pan/tilt vector                           */
         int zrs[2][2];          /* zoom/rotate/shear matrix                  */
@@ -219,17 +220,6 @@ typedef struct DiracContext {
     DiracFrame all_frames[MAX_FRAMES];
 } DiracContext;
 
-/**
- * Dirac Specification ->
- * Parse code values. 9.6.1 Table 9.1
- */
-enum dirac_parse_code {
-    pc_seq_header         = 0x00,
-    pc_eos                = 0x10,
-    pc_aux_data           = 0x20,
-    pc_padding            = 0x30,
-};
-
 enum dirac_subband {
     subband_ll = 0,
     subband_hl = 1,
@@ -238,49 +228,6 @@ enum dirac_subband {
     subband_nb,
 };
 
-static const uint8_t default_qmat[][4][4] = {
-    { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
-    { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
-    { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
-    { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
-    { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
-    { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
-    { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
-};
-
-static const int qscale_tab[MAX_QUANT+1] = {
-    4,     5,     6,     7,     8,    10,    11,    13,
-    16,    19,    23,    27,    32,    38,    45,    54,
-    64,    76,    91,   108,   128,   152,   181,   215,
-    256,   304,   362,   431,   512,   609,   724,   861,
-    1024,  1218,  1448,  1722,  2048,  2435,  2896,  3444,
-    4096,  4871,  5793,  6889,  8192,  9742, 11585, 13777,
-    16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
-    65536, 77936
-};
-
-static const int qoffset_intra_tab[MAX_QUANT+1] = {
-    1,     2,     3,     4,     4,     5,     6,     7,
-    8,    10,    12,    14,    16,    19,    23,    27,
-    32,    38,    46,    54,    64,    76,    91,   108,
-    128,   152,   181,   216,   256,   305,   362,   431,
-    512,   609,   724,   861,  1024,  1218,  1448,  1722,
-    2048,  2436,  2897,  3445,  4096,  4871,  5793,  6889,
-    8192,  9742, 11585, 13777, 16384, 19484, 23171, 27555,
-    32768, 38968
-};
-
-static const int qoffset_inter_tab[MAX_QUANT+1] = {
-    1,     2,     2,     3,     3,     4,     4,     5,
-    6,     7,     9,    10,    12,    14,    17,    20,
-    24,    29,    34,    41,    48,    57,    68,    81,
-    96,   114,   136,   162,   192,   228,   272,   323,
-    384,   457,   543,   646,   768,   913,  1086,  1292,
-    1536,  1827,  2172,  2583,  3072,  3653,  4344,  5166,
-    6144,  7307,  8689, 10333, 12288, 14613, 17378, 20666,
-    24576, 29226
-};
-
 /* magic number division by 3 from schroedinger */
 static inline int divide3(int x)
 {
@@ -318,16 +265,16 @@ static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
 
 static int alloc_sequence_buffers(DiracContext *s)
 {
-    int sbwidth  = DIVRNDUP(s->source.width,  4);
-    int sbheight = DIVRNDUP(s->source.height, 4);
+    int sbwidth  = DIVRNDUP(s->seq.width,  4);
+    int sbheight = DIVRNDUP(s->seq.height, 4);
     int i, w, h, top_padding;
 
     /* todo: think more about this / use or set Plane here */
     for (i = 0; i < 3; i++) {
         int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
         int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
-        w = s->source.width  >> (i ? s->chroma_x_shift : 0);
-        h = s->source.height >> (i ? s->chroma_y_shift : 0);
+        w = s->seq.width  >> (i ? s->chroma_x_shift : 0);
+        h = s->seq.height >> (i ? s->chroma_y_shift : 0);
 
         /* we allocate the max we support here since num decompositions can
          * change from frame to frame. Stride is aligned to 16 for SIMD, and
@@ -338,10 +285,10 @@ static int alloc_sequence_buffers(DiracContext *s)
         w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
         h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
 
-        s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
-        s->plane[i].idwt_tmp      = av_malloc_array((w+16), sizeof(IDWTELEM));
-        s->plane[i].idwt_buf      = s->plane[i].idwt_buf_base + top_padding*w;
-        if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
+        s->plane[i].idwt.buf_base = av_mallocz_array((w+max_xblen), h * (2 << s->pshift));
+        s->plane[i].idwt.tmp      = av_malloc_array((w+16), 2 << s->pshift);
+        s->plane[i].idwt.buf      = s->plane[i].idwt.buf_base + (top_padding*w)*(2 << s->pshift);
+        if (!s->plane[i].idwt.buf_base || !s->plane[i].idwt.tmp)
             return AVERROR(ENOMEM);
     }
 
@@ -356,8 +303,8 @@ static int alloc_sequence_buffers(DiracContext *s)
 
 static int alloc_buffers(DiracContext *s, int stride)
 {
-    int w = s->source.width;
-    int h = s->source.height;
+    int w = s->seq.width;
+    int h = s->seq.height;
 
     av_assert0(stride >= w);
     stride += 64;
@@ -402,8 +349,8 @@ static void free_sequence_buffers(DiracContext *s)
     memset(s->delay_frames, 0, sizeof(s->delay_frames));
 
     for (i = 0; i < 3; i++) {
-        av_freep(&s->plane[i].idwt_buf_base);
-        av_freep(&s->plane[i].idwt_tmp);
+        av_freep(&s->plane[i].idwt.buf_base);
+        av_freep(&s->plane[i].idwt.tmp);
     }
 
     s->buffer_stride = 0;
@@ -461,51 +408,88 @@ static av_cold int dirac_decode_end(AVCodecContext *avctx)
 
 #define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
 
-static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
-                                      SubBand *b, IDWTELEM *buf, int x, int y)
+static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
 {
-    int coeff, sign;
-    int sign_pred = 0;
-    int pred_ctx = CTX_ZPZN_F1;
-
-    /* Check if the parent subband has a 0 in the corresponding position */
-    if (b->parent)
-        pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
-
-    if (b->orientation == subband_hl)
-        sign_pred = buf[-b->stride];
-
-    /* Determine if the pixel has only zeros in its neighbourhood */
-    if (x) {
-        pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
-        if (b->orientation == subband_lh)
-            sign_pred = buf[-1];
-    } else {
-        pred_ctx += !buf[-b->stride];
-    }
+    int sign, coeff;
+    uint32_t buf;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
 
-    coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
-    if (coeff) {
-        coeff = (coeff * qfactor + qoffset + 2) >> 2;
-        sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
-        coeff = (coeff ^ -sign) + sign;
+    if (buf & 0x80000000) {
+        LAST_SKIP_BITS(re,gb,1);
+        CLOSE_READER(re, gb);
+        return 0;
     }
-    *buf = coeff;
-}
 
-static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
-{
-    int sign, coeff;
+    if (buf & 0xAA800000) {
+        buf >>= 32 - 8;
+        SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
+
+        coeff = ff_interleaved_ue_golomb_vlc_code[buf];
+    } else {
+        unsigned ret = 1;
+
+        do {
+            buf >>= 32 - 8;
+            SKIP_BITS(re, gb,
+                           FFMIN(ff_interleaved_golomb_vlc_len[buf], 8));
 
-    coeff = svq3_get_ue_golomb(gb);
-    if (coeff) {
-        coeff = (coeff * qfactor + qoffset + 2) >> 2;
-        sign  = get_bits1(gb);
-        coeff = (coeff ^ -sign) + sign;
+            if (ff_interleaved_golomb_vlc_len[buf] != 9) {
+                ret <<= (ff_interleaved_golomb_vlc_len[buf] - 1) >> 1;
+                ret  |= ff_interleaved_dirac_golomb_vlc_code[buf];
+                break;
+            }
+            ret = (ret << 4) | ff_interleaved_dirac_golomb_vlc_code[buf];
+            UPDATE_CACHE(re, gb);
+            buf = GET_CACHE(re, gb);
+        } while (ret<0x8000000U && BITS_AVAILABLE(re, gb));
+
+        coeff = ret - 1;
     }
+
+    coeff = (coeff * qfactor + qoffset) >> 2;
+    sign  = SHOW_SBITS(re, gb, 1);
+    LAST_SKIP_BITS(re, gb, 1);
+    coeff = (coeff ^ sign) - sign;
+
+    CLOSE_READER(re, gb);
     return coeff;
 }
 
+#define UNPACK_ARITH(n, type) \
+    static inline void coeff_unpack_arith_##n(DiracArith *c, int qfactor, int qoffset, \
+                                              SubBand *b, type *buf, int x, int y) \
+    { \
+        int coeff, sign, sign_pred = 0, pred_ctx = CTX_ZPZN_F1; \
+        const int mstride = -(b->stride >> (1+b->pshift)); \
+        if (b->parent) { \
+            const type *pbuf = (type *)b->parent->ibuf; \
+            const int stride = b->parent->stride >> (1+b->parent->pshift); \
+            pred_ctx += !!pbuf[stride * (y>>1) + (x>>1)] << 1; \
+        } \
+        if (b->orientation == subband_hl) \
+            sign_pred = buf[mstride]; \
+        if (x) { \
+            pred_ctx += !(buf[-1] | buf[mstride] | buf[-1 + mstride]); \
+            if (b->orientation == subband_lh) \
+                sign_pred = buf[-1]; \
+        } else { \
+            pred_ctx += !buf[mstride]; \
+        } \
+        coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA); \
+        if (coeff) { \
+            coeff = (coeff * qfactor + qoffset) >> 2; \
+            sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred)); \
+            coeff = (coeff ^ -sign) + sign; \
+        } \
+        *buf = coeff; \
+    } \
+
+UNPACK_ARITH(8, int16_t)
+UNPACK_ARITH(10, int32_t)
+
 /**
  * Decode the coeffs in the rectangle defined by left, right, top, bottom
  * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
@@ -517,7 +501,7 @@ static inline void codeblock(DiracContext *s, SubBand *b,
 {
     int x, y, zero_block;
     int qoffset, qfactor;
-    IDWTELEM *buf;
+    uint8_t *buf;
 
     /* check for any coded coefficients in this codeblock */
     if (!blockcnt_one) {
@@ -543,51 +527,73 @@ static inline void codeblock(DiracContext *s, SubBand *b,
         b->quant = quant;
     }
 
-    b->quant = FFMIN(b->quant, MAX_QUANT);
+    if (b->quant > 115) {
+        av_log(s->avctx, AV_LOG_ERROR, "Unsupported quant %d\n", b->quant);
+        b->quant = 0;
+        return;
+    }
 
-    qfactor = qscale_tab[b->quant];
+    qfactor = ff_dirac_qscale_tab[b->quant];
     /* TODO: context pointer? */
     if (!s->num_refs)
-        qoffset = qoffset_intra_tab[b->quant];
+        qoffset = ff_dirac_qoffset_intra_tab[b->quant] + 2;
     else
-        qoffset = qoffset_inter_tab[b->quant];
+        qoffset = ff_dirac_qoffset_inter_tab[b->quant] + 2;
 
     buf = b->ibuf + top * b->stride;
-    for (y = top; y < bottom; y++) {
-        for (x = left; x < right; x++) {
-            /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
-            if (is_arith)
-                coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
-            else
-                buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
+    if (is_arith) {
+        for (y = top; y < bottom; y++) {
+            for (x = left; x < right; x++) {
+                if (b->pshift) {
+                    coeff_unpack_arith_10(c, qfactor, qoffset, b, (int32_t*)(buf)+x, x, y);
+                } else {
+                    coeff_unpack_arith_8(c, qfactor, qoffset, b, (int16_t*)(buf)+x, x, y);
+                }
+            }
+            buf += b->stride;
         }
-        buf += b->stride;
-    }
+    } else {
+        for (y = top; y < bottom; y++) {
+            for (x = left; x < right; x++) {
+                int val = coeff_unpack_golomb(gb, qfactor, qoffset);
+                if (b->pshift) {
+                    AV_WN32(&buf[4*x], val);
+                } else {
+                    AV_WN16(&buf[2*x], val);
+                }
+            }
+            buf += b->stride;
+         }
+     }
 }
 
 /**
  * Dirac Specification ->
  * 13.3 intra_dc_prediction(band)
  */
-static inline void intra_dc_prediction(SubBand *b)
-{
-    IDWTELEM *buf = b->ibuf;
-    int x, y;
-
-    for (x = 1; x < b->width; x++)
-        buf[x] += buf[x-1];
-    buf += b->stride;
-
-    for (y = 1; y < b->height; y++) {
-        buf[0] += buf[-b->stride];
-
-        for (x = 1; x < b->width; x++) {
-            int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
-            buf[x]  += divide3(pred);
-        }
-        buf += b->stride;
-    }
-}
+#define INTRA_DC_PRED(n, type) \
+    static inline void intra_dc_prediction_##n(SubBand *b) \
+    { \
+        type *buf = (type*)b->ibuf; \
+        int x, y; \
+        \
+        for (x = 1; x < b->width; x++) \
+            buf[x] += buf[x-1]; \
+        buf += (b->stride >> (1+b->pshift)); \
+        \
+        for (y = 1; y < b->height; y++) { \
+            buf[0] += buf[-(b->stride >> (1+b->pshift))]; \
+            \
+            for (x = 1; x < b->width; x++) { \
+                int pred = buf[x - 1] + buf[x - (b->stride >> (1+b->pshift))] + buf[x - (b->stride >> (1+b->pshift))-1]; \
+                buf[x]  += divide3(pred); \
+            } \
+            buf += (b->stride >> (1+b->pshift)); \
+        } \
+    } \
+
+INTRA_DC_PRED(8, int16_t)
+INTRA_DC_PRED(10, int32_t)
 
 /**
  * Dirac Specification ->
@@ -622,8 +628,13 @@ static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b
         top = bottom;
     }
 
-    if (b->orientation == subband_ll && s->num_refs == 0)
-        intra_dc_prediction(b);
+    if (b->orientation == subband_ll && s->num_refs == 0) {
+        if (s->pshift) {
+            intra_dc_prediction_10(b);
+        } else {
+            intra_dc_prediction_8(b);
+        }
+    }
 }
 
 static int decode_subband_arith(AVCodecContext *avctx, void *b)
@@ -679,51 +690,73 @@ static void decode_component(DiracContext *s, int comp)
         avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
 }
 
-/* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
-/* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
-static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
-                             int slice_x, int slice_y, int bits_end,
-                             SubBand *b1, SubBand *b2)
+#define PARSE_VALUES(type, x, gb, ebits, buf1, buf2) \
+    type *buf = (type *)buf1; \
+    buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset); \
+    if (get_bits_count(gb) >= ebits) \
+        return; \
+    if (buf2) { \
+        buf = (type *)buf2; \
+        buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset); \
+        if (get_bits_count(gb) >= ebits) \
+            return; \
+    } \
+
+static void decode_subband(DiracContext *s, GetBitContext *gb, int quant,
+                           int slice_x, int slice_y, int bits_end,
+                           SubBand *b1, SubBand *b2)
 {
-    int left   = b1->width  * slice_x    / s->lowdelay.num_x;
-    int right  = b1->width  *(slice_x+1) / s->lowdelay.num_x;
-    int top    = b1->height * slice_y    / s->lowdelay.num_y;
-    int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
+    int left   = b1->width  * slice_x    / s->num_x;
+    int right  = b1->width  *(slice_x+1) / s->num_x;
+    int top    = b1->height * slice_y    / s->num_y;
+    int bottom = b1->height *(slice_y+1) / s->num_y;
 
-    int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
-    int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
+    int qfactor, qoffset;
 
-    IDWTELEM *buf1 =      b1->ibuf + top * b1->stride;
-    IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
+    uint8_t *buf1 =      b1->ibuf + top * b1->stride;
+    uint8_t *buf2 = b2 ? b2->ibuf + top * b2->stride: NULL;
     int x, y;
+
+    if (quant > 115) {
+        av_log(s->avctx, AV_LOG_ERROR, "Unsupported quant %d\n", quant);
+        return;
+    }
+    qfactor = ff_dirac_qscale_tab[quant & 0x7f];
+    qoffset = ff_dirac_qoffset_intra_tab[quant & 0x7f] + 2;
     /* we have to constantly check for overread since the spec explicitly
        requires this, with the meaning that all remaining coeffs are set to 0 */
     if (get_bits_count(gb) >= bits_end)
         return;
 
-    for (y = top; y < bottom; y++) {
-        for (x = left; x < right; x++) {
-            buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
-            if (get_bits_count(gb) >= bits_end)
-                return;
-            if (buf2) {
-                buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
-                if (get_bits_count(gb) >= bits_end)
-                    return;
+    if (s->pshift) {
+        for (y = top; y < bottom; y++) {
+            for (x = left; x < right; x++) {
+                PARSE_VALUES(int32_t, x, gb, bits_end, buf1, buf2);
+            }
+            buf1 += b1->stride;
+            if (buf2)
+                buf2 += b2->stride;
+        }
+    }
+    else {
+        for (y = top; y < bottom; y++) {
+            for (x = left; x < right; x++) {
+                PARSE_VALUES(int16_t, x, gb, bits_end, buf1, buf2);
             }
+            buf1 += b1->stride;
+            if (buf2)
+                buf2 += b2->stride;
         }
-        buf1 += b1->stride;
-        if (buf2)
-            buf2 += b2->stride;
     }
 }
 
-struct lowdelay_slice {
+/* Used by Low Delay and High Quality profiles */
+typedef struct DiracSlice {
     GetBitContext gb;
     int slice_x;
     int slice_y;
     int bytes;
-};
+} DiracSlice;
 
 
 /**
@@ -733,7 +766,7 @@ struct lowdelay_slice {
 static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
 {
     DiracContext *s = avctx->priv_data;
-    struct lowdelay_slice *slice = arg;
+    DiracSlice *slice = arg;
     GetBitContext *gb = &slice->gb;
     enum dirac_subband orientation;
     int level, quant, chroma_bits, chroma_end;
@@ -747,8 +780,8 @@ static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
     for (level = 0; level < s->wavelet_depth; level++)
         for (orientation = !!level; orientation < 4; orientation++) {
             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
-            lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
-                             &s->plane[0].band[level][orientation], NULL);
+            decode_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
+                           &s->plane[0].band[level][orientation], NULL);
         }
 
     /* consume any unused bits from luma */
@@ -760,10 +793,56 @@ static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
     for (level = 0; level < s->wavelet_depth; level++)
         for (orientation = !!level; orientation < 4; orientation++) {
             quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
-            lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
-                             &s->plane[1].band[level][orientation],
-                             &s->plane[2].band[level][orientation]);
+            decode_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
+                           &s->plane[1].band[level][orientation],
+                           &s->plane[2].band[level][orientation]);
+        }
+
+    return 0;
+}
+
+/**
+ * VC-2 Specification ->
+ * 13.5.3 hq_slice(sx,sy)
+ */
+static int decode_hq_slice(AVCodecContext *avctx, void *arg)
+{
+    int i, quant, level, orientation, quant_idx;
+    uint8_t quants[MAX_DWT_LEVELS][4];
+    DiracContext *s = avctx->priv_data;
+    DiracSlice *slice = arg;
+    GetBitContext *gb = &slice->gb;
+
+    skip_bits_long(gb, 8*s->highquality.prefix_bytes);
+    quant_idx = get_bits(gb, 8);
+
+    /* Slice quantization (slice_quantizers() in the specs) */
+    for (level = 0; level < s->wavelet_depth; level++) {
+        for (orientation = !!level; orientation < 4; orientation++) {
+            quant = FFMAX(quant_idx - s->lowdelay.quant[level][orientation], 0);
+            quants[level][orientation] = quant;
+        }
+    }
+
+    /* Luma + 2 Chroma planes */
+    for (i = 0; i < 3; i++) {
+        int64_t length = s->highquality.size_scaler * get_bits(gb, 8);
+        int64_t bits_left = 8 * length;
+        int64_t bits_end = get_bits_count(gb) + bits_left;
+
+        if (bits_end >= INT_MAX) {
+            av_log(s->avctx, AV_LOG_ERROR, "end too far away\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        for (level = 0; level < s->wavelet_depth; level++) {
+            for (orientation = !!level; orientation < 4; orientation++) {
+                decode_subband(s, gb, quants[level][orientation], slice->slice_x, slice->slice_y, bits_end,
+                               &s->plane[i].band[level][orientation], NULL);
+            }
         }
+        skip_bits_long(gb, bits_end - get_bits_count(gb));
+    }
 
     return 0;
 }
@@ -775,12 +854,13 @@ static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
 static int decode_lowdelay(DiracContext *s)
 {
     AVCodecContext *avctx = s->avctx;
-    int slice_x, slice_y, bytes, bufsize;
+    int slice_x, slice_y, bufsize;
+    int64_t bytes = 0;
     const uint8_t *buf;
-    struct lowdelay_slice *slices;
+    DiracSlice *slices;
     int slice_num = 0;
 
-    slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
+    slices = av_mallocz_array(s->num_x, s->num_y * sizeof(DiracSlice));
     if (!slices)
         return AVERROR(ENOMEM);
 
@@ -789,29 +869,70 @@ static int decode_lowdelay(DiracContext *s)
     buf = s->gb.buffer + get_bits_count(&s->gb)/8;
     bufsize = get_bits_left(&s->gb);
 
-    for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
-        for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
-            bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
-                - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
+    if (s->hq_picture) {
+        int i;
 
-            slices[slice_num].bytes   = bytes;
-            slices[slice_num].slice_x = slice_x;
-            slices[slice_num].slice_y = slice_y;
-            init_get_bits(&slices[slice_num].gb, buf, bufsize);
-            slice_num++;
+        for (slice_y = 0; bufsize > 0 && slice_y < s->num_y; slice_y++) {
+            for (slice_x = 0; bufsize > 0 && slice_x < s->num_x; slice_x++) {
+                bytes = s->highquality.prefix_bytes + 1;
+                for (i = 0; i < 3; i++) {
+                    if (bytes <= bufsize/8)
+                        bytes += buf[bytes] * s->highquality.size_scaler + 1;
+                }
+                if (bytes >= INT_MAX) {
+                    av_log(s->avctx, AV_LOG_ERROR, "too many bytes\n");
+                    av_free(slices);
+                    return AVERROR_INVALIDDATA;
+                }
 
-            buf     += bytes;
-            if (bufsize/8 >= bytes)
-                bufsize -= bytes*8;
-            else
-                bufsize = 0;
+                slices[slice_num].bytes   = bytes;
+                slices[slice_num].slice_x = slice_x;
+                slices[slice_num].slice_y = slice_y;
+                init_get_bits(&slices[slice_num].gb, buf, bufsize);
+                slice_num++;
+
+                buf     += bytes;
+                if (bufsize/8 >= bytes)
+                    bufsize -= bytes*8;
+                else
+                    bufsize = 0;
+            }
+        }
+        avctx->execute(avctx, decode_hq_slice, slices, NULL, slice_num,
+                       sizeof(DiracSlice));
+    } else {
+        for (slice_y = 0; bufsize > 0 && slice_y < s->num_y; slice_y++) {
+            for (slice_x = 0; bufsize > 0 && slice_x < s->num_x; slice_x++) {
+                bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
+                    - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
+                slices[slice_num].bytes   = bytes;
+                slices[slice_num].slice_x = slice_x;
+                slices[slice_num].slice_y = slice_y;
+                init_get_bits(&slices[slice_num].gb, buf, bufsize);
+                slice_num++;
+
+                buf     += bytes;
+                if (bufsize/8 >= bytes)
+                    bufsize -= bytes*8;
+                else
+                    bufsize = 0;
+            }
         }
+        avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
+                       sizeof(DiracSlice)); /* [DIRAC_STD] 13.5.2 Slices */
+    }
 
-    avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
-                   sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
-    intra_dc_prediction(&s->plane[0].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
-    intra_dc_prediction(&s->plane[1].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
-    intra_dc_prediction(&s->plane[2].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
+    if (s->dc_prediction) {
+        if (s->pshift) {
+            intra_dc_prediction_10(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
+            intra_dc_prediction_10(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
+            intra_dc_prediction_10(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
+        } else {
+            intra_dc_prediction_8(&s->plane[0].band[0][0]);
+            intra_dc_prediction_8(&s->plane[1].band[0][0]);
+            intra_dc_prediction_8(&s->plane[2].band[0][0]);
+        }
+    }
     av_free(slices);
     return 0;
 }
@@ -823,11 +944,11 @@ static void init_planes(DiracContext *s)
     for (i = 0; i < 3; i++) {
         Plane *p = &s->plane[i];
 
-        p->width       = s->source.width  >> (i ? s->chroma_x_shift : 0);
-        p->height      = s->source.height >> (i ? s->chroma_y_shift : 0);
-        p->idwt_width  = w = CALC_PADDING(p->width , s->wavelet_depth);
-        p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
-        p->idwt_stride = FFALIGN(p->idwt_width, 8);
+        p->width       = s->seq.width  >> (i ? s->chroma_x_shift : 0);
+        p->height      = s->seq.height >> (i ? s->chroma_y_shift : 0);
+        p->idwt.width  = w = CALC_PADDING(p->width , s->wavelet_depth);
+        p->idwt.height = h = CALC_PADDING(p->height, s->wavelet_depth);
+        p->idwt.stride = FFALIGN(p->idwt.width, 8) << (1 + s->pshift);
 
         for (level = s->wavelet_depth-1; level >= 0; level--) {
             w = w>>1;
@@ -835,17 +956,18 @@ static void init_planes(DiracContext *s)
             for (orientation = !!level; orientation < 4; orientation++) {
                 SubBand *b = &p->band[level][orientation];
 
-                b->ibuf   = p->idwt_buf;
+                b->pshift = s->pshift;
+                b->ibuf   = p->idwt.buf;
                 b->level  = level;
-                b->stride = p->idwt_stride << (s->wavelet_depth - level);
+                b->stride = p->idwt.stride << (s->wavelet_depth - level);
                 b->width  = w;
                 b->height = h;
                 b->orientation = orientation;
 
                 if (orientation & 1)
-                    b->ibuf += w;
+                    b->ibuf += w << (1+b->pshift);
                 if (orientation > 1)
-                    b->ibuf += b->stride>>1;
+                    b->ibuf += (b->stride>>1);
 
                 if (level)
                     b->parent = &p->band[level-1][orientation];
@@ -1022,20 +1144,29 @@ static int dirac_unpack_idwt_params(DiracContext *s)
             }
 
             CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
-        } else
+        }
+        else {
             for (i = 0; i <= s->wavelet_depth; i++)
                 s->codeblock[i].width = s->codeblock[i].height = 1;
-    } else {
-        /* Slice parameters + quantization matrix*/
-        /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
-        s->lowdelay.num_x     = svq3_get_ue_golomb(gb);
-        s->lowdelay.num_y     = svq3_get_ue_golomb(gb);
-        s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
-        s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
-
-        if (s->lowdelay.bytes.den <= 0) {
-            av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
-            return AVERROR_INVALIDDATA;
+        }
+    }
+    else {
+        s->num_x        = svq3_get_ue_golomb(gb);
+        s->num_y        = svq3_get_ue_golomb(gb);
+        if (s->ld_picture) {
+            s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
+            s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
+            if (s->lowdelay.bytes.den <= 0) {
+                av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
+                return AVERROR_INVALIDDATA;
+            }
+        } else if (s->hq_picture) {
+            s->highquality.prefix_bytes = svq3_get_ue_golomb(gb);
+            s->highquality.size_scaler  = svq3_get_ue_golomb(gb);
+            if (s->highquality.prefix_bytes >= INT_MAX / 8) {
+                av_log(s->avctx,AV_LOG_ERROR,"too many prefix bytes\n");
+                return AVERROR_INVALIDDATA;
+            }
         }
 
         /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
@@ -1056,7 +1187,7 @@ static int dirac_unpack_idwt_params(DiracContext *s)
             /* default quantization matrix */
             for (level = 0; level < s->wavelet_depth; level++)
                 for (i = 0; i < 4; i++) {
-                    s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
+                    s->lowdelay.quant[level][i] = ff_dirac_default_qmat[s->wavelet_idx][level][i];
                     /* haar with no shift differs for different depths */
                     if (s->wavelet_idx == 3)
                         s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
@@ -1250,8 +1381,8 @@ static int dirac_unpack_block_motion_data(DiracContext *s)
     align_get_bits(gb);
 
     /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
-    s->sbwidth  = DIVRNDUP(s->source.width,  4*s->plane[0].xbsep);
-    s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
+    s->sbwidth  = DIVRNDUP(s->seq.width,  4*s->plane[0].xbsep);
+    s->sbheight = DIVRNDUP(s->seq.height, 4*s->plane[0].ybsep);
     s->blwidth  = 4 * s->sbwidth;
     s->blheight = 4 * s->sbheight;
 
@@ -1614,7 +1745,7 @@ static int dirac_decode_frame_internal(DiracContext *s)
         /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
         for (comp = 0; comp < 3; comp++) {
             Plane *p = &s->plane[comp];
-            memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
+            memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
         }
         if (!s->zero_res) {
             if ((ret = decode_lowdelay(s)) < 0)
@@ -1632,19 +1763,22 @@ static int dirac_decode_frame_internal(DiracContext *s)
 
         if (!s->zero_res && !s->low_delay)
         {
-            memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
+            memset(p->idwt.buf, 0, p->idwt.stride * p->idwt.height);
             decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
         }
-        ret = ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
-                                    s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp);
+        ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2,
+                                   s->wavelet_depth, s->bit_depth);
         if (ret < 0)
             return ret;
 
         if (!s->num_refs) { /* intra */
             for (y = 0; y < p->height; y += 16) {
+                int idx = (s->bit_depth - 8) >> 1;
                 ff_spatial_idwt_slice2(&d, y+16); /* decode */
-                s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
-                                                    p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
+                s->diracdsp.put_signed_rect_clamped[idx](frame + y*p->stride,
+                                                         p->stride,
+                                                         p->idwt.buf + y*p->idwt.stride,
+                                                         p->idwt.stride, p->width, 16);
             }
         } else { /* inter */
             int rowheight = p->ybsep*p->stride;
@@ -1680,8 +1814,10 @@ static int dirac_decode_frame_internal(DiracContext *s)
 
                 mctmp += (start - dsty)*p->stride + p->xoffset;
                 ff_spatial_idwt_slice2(&d, start + h); /* decode */
+                /* NOTE: add_rect_clamped hasn't been templated hence the shifts.
+                 * idwt.stride is passed as pixels, not in bytes as in the rest of the decoder */
                 s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
-                                             p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
+                                             (int16_t*)(p->idwt.buf) + start*(p->idwt.stride >> 1), (p->idwt.stride >> 1), p->width, h);
 
                 dsty += p->ybsep;
             }
@@ -1771,13 +1907,13 @@ static int dirac_decode_picture_header(DiracContext *s)
     }
 
     /* retire the reference frames that are not used anymore */
-    if (s->current_picture->avframe->reference) {
+    if (s->current_picture->reference) {
         retire = (picnum + dirac_get_se_golomb(gb)) & 0xFFFFFFFF;
         if (retire != picnum) {
             DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
 
             if (retire_pic)
-                retire_pic->avframe->reference &= DELAYED_PIC_REF;
+                retire_pic->reference &= DELAYED_PIC_REF;
             else
                 av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
         }
@@ -1785,7 +1921,7 @@ static int dirac_decode_picture_header(DiracContext *s)
         /* if reference array is full, remove the oldest as per the spec */
         while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
             av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
-            remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
+            remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->reference &= DELAYED_PIC_REF;
         }
     }
 
@@ -1822,7 +1958,7 @@ static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
         s->delay_frames[i] = s->delay_frames[i+1];
 
     if (out) {
-        out->avframe->reference ^= DELAYED_PIC_REF;
+        out->reference ^= DELAYED_PIC_REF;
         *got_frame = 1;
         if((ret = av_frame_ref(picture, out->avframe)) < 0)
             return ret;
@@ -1844,7 +1980,9 @@ static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int
 {
     DiracContext *s   = avctx->priv_data;
     DiracFrame *pic   = NULL;
-    int ret, i, parse_code;
+    AVDiracSeqHeader *dsh;
+    int ret, i;
+    uint8_t parse_code;
     unsigned tmp;
 
     if (size < DATA_UNIT_HEADER_SIZE)
@@ -1854,14 +1992,39 @@ static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int
 
     init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
 
-    if (parse_code == pc_seq_header) {
+    if (parse_code == DIRAC_PCODE_SEQ_HEADER) {
         if (s->seen_sequence_header)
             return 0;
 
         /* [DIRAC_STD] 10. Sequence header */
-        ret = avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source);
-        if (ret < 0)
+        ret = av_dirac_parse_sequence_header(&dsh, buf + DATA_UNIT_HEADER_SIZE, size - DATA_UNIT_HEADER_SIZE, avctx);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "error parsing sequence header");
             return ret;
+        }
+
+        ret = ff_set_dimensions(avctx, dsh->width, dsh->height);
+        if (ret < 0) {
+            av_freep(&dsh);
+            return ret;
+        }
+
+        ff_set_sar(avctx, dsh->sample_aspect_ratio);
+        avctx->pix_fmt         = dsh->pix_fmt;
+        avctx->color_range     = dsh->color_range;
+        avctx->color_trc       = dsh->color_trc;
+        avctx->color_primaries = dsh->color_primaries;
+        avctx->colorspace      = dsh->colorspace;
+        avctx->profile         = dsh->profile;
+        avctx->level           = dsh->level;
+        avctx->framerate       = dsh->framerate;
+        s->bit_depth           = dsh->bit_depth;
+        s->version.major       = dsh->version.major;
+        s->version.minor       = dsh->version.minor;
+        s->seq                 = *dsh;
+        av_freep(&dsh);
+
+        s->pshift = s->bit_depth > 8;
 
         avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
 
@@ -1870,10 +2033,10 @@ static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int
             return ret;
 
         s->seen_sequence_header = 1;
-    } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
+    } else if (parse_code == DIRAC_PCODE_END_SEQ) { /* [DIRAC_STD] End of Sequence */
         free_sequence_buffers(s);
         s->seen_sequence_header = 0;
-    } else if (parse_code == pc_aux_data) {
+    } else if (parse_code == DIRAC_PCODE_AUX) {
         if (buf[13] == 1) {     /* encoder implementation/version */
             int ver[3];
             /* versions older than 1.0.8 don't store quant delta for
@@ -1905,12 +2068,25 @@ static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int
             av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
             return AVERROR_INVALIDDATA;
         }
-        s->num_refs    = tmp;
-        s->is_arith    = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()      */
-        s->low_delay   = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()  */
-        pic->avframe->reference = (parse_code & 0x0C) == 0x0C;  /* [DIRAC_STD]  is_reference() */
-        pic->avframe->key_frame = s->num_refs == 0;             /* [DIRAC_STD] is_intra()      */
-        pic->avframe->pict_type = s->num_refs + 1;              /* Definition of AVPictureType in avutil.h */
+        s->num_refs      = tmp;
+        s->is_arith      = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()            */
+        s->low_delay     = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()        */
+        s->core_syntax   = (parse_code & 0x88) == 0x08;          /* [DIRAC_STD] is_core_syntax()      */
+        s->ld_picture    = (parse_code & 0xF8) == 0xC8;          /* [DIRAC_STD] is_ld_picture()       */
+        s->hq_picture    = (parse_code & 0xF8) == 0xE8;          /* [DIRAC_STD] is_hq_picture()       */
+        s->dc_prediction = (parse_code & 0x28) == 0x08;          /* [DIRAC_STD] using_dc_prediction() */
+        pic->reference   = (parse_code & 0x0C) == 0x0C;          /* [DIRAC_STD] is_reference()        */
+        pic->avframe->key_frame = s->num_refs == 0;              /* [DIRAC_STD] is_intra()            */
+        pic->avframe->pict_type = s->num_refs + 1;               /* Definition of AVPictureType in avutil.h */
+
+        /* VC-2 Low Delay has a different parse code than the Dirac Low Delay */
+        if (s->version.minor == 2 && parse_code == 0x88)
+            s->ld_picture = 1;
+
+        if (s->low_delay && !(s->ld_picture || s->hq_picture) ) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid low delay flag\n");
+            return AVERROR_INVALIDDATA;
+        }
 
         if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
             return ret;
@@ -1947,7 +2123,7 @@ static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     /* release unused frames */
     for (i = 0; i < MAX_FRAMES; i++)
-        if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
+        if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].reference) {
             av_frame_unref(s->all_frames[i].avframe);
             memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
         }
@@ -1997,7 +2173,7 @@ static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if (s->current_picture->avframe->display_picture_number > s->frame_number) {
         DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
 
-        s->current_picture->avframe->reference |= DELAYED_PIC_REF;
+        s->current_picture->reference |= DELAYED_PIC_REF;
 
         if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
             int min_num = s->delay_frames[0]->avframe->display_picture_number;
@@ -2013,7 +2189,7 @@ static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         }
 
         if (delayed_frame) {
-            delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
+            delayed_frame->reference ^= DELAYED_PIC_REF;
             if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
                 return ret;
             *got_frame = 1;
@@ -2040,6 +2216,6 @@ AVCodec ff_dirac_decoder = {
     .init           = dirac_decode_init,
     .close          = dirac_decode_end,
     .decode         = dirac_decode_frame,
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_DR1,
     .flush          = dirac_decode_flush,
 };
diff --git a/libavcodec/diracdsp.c b/libavcodec/diracdsp.c
index 6b02779e..ab8d1497 100644
--- a/libavcodec/diracdsp.c
+++ b/libavcodec/diracdsp.c
@@ -20,7 +20,6 @@
 
 #include "avcodec.h"
 #include "diracdsp.h"
-#include "libavcodec/x86/diracdsp_mmx.h"
 
 #define FILTER(src, stride)                                     \
     ((21*((src)[ 0*stride] + (src)[1*stride])                   \
@@ -135,9 +134,10 @@ ADD_OBMC(8)
 ADD_OBMC(16)
 ADD_OBMC(32)
 
-static void put_signed_rect_clamped_c(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
+static void put_signed_rect_clamped_8bit_c(uint8_t *dst, int dst_stride, const uint8_t *_src, int src_stride, int width, int height)
 {
     int x, y;
+    int16_t *src = (int16_t *)_src;
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x+=4) {
             dst[x  ] = av_clip_uint8(src[x  ] + 128);
@@ -146,10 +146,32 @@ static void put_signed_rect_clamped_c(uint8_t *dst, int dst_stride, const int16_
             dst[x+3] = av_clip_uint8(src[x+3] + 128);
         }
         dst += dst_stride;
-        src += src_stride;
+        src += src_stride >> 1;
     }
 }
 
+#define PUT_SIGNED_RECT_CLAMPED(PX)                                                                     \
+static void put_signed_rect_clamped_ ## PX ## bit_c(uint8_t *_dst, int dst_stride, const uint8_t *_src, \
+                                                  int src_stride, int width, int height)                \
+{                                                                                                       \
+    int x, y;                                                                                           \
+    uint16_t *dst = (uint16_t *)_dst;                                                                   \
+    int32_t *src = (int32_t *)_src;                                                                     \
+    for (y = 0; y < height; y++) {                                                                      \
+        for (x = 0; x < width; x+=4) {                                                                  \
+            dst[x  ] = av_clip_uintp2(src[x  ] + (1 << (PX - 1)), PX);                                  \
+            dst[x+1] = av_clip_uintp2(src[x+1] + (1 << (PX - 1)), PX);                                  \
+            dst[x+2] = av_clip_uintp2(src[x+2] + (1 << (PX - 1)), PX);                                  \
+            dst[x+3] = av_clip_uintp2(src[x+3] + (1 << (PX - 1)), PX);                                  \
+        }                                                                                               \
+        dst += dst_stride >> 1;                                                                         \
+        src += src_stride >> 2;                                                                         \
+    }                                                                                                   \
+}
+
+PUT_SIGNED_RECT_CLAMPED(10)
+PUT_SIGNED_RECT_CLAMPED(12)
+
 static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride,
                                const int16_t *idwt, int idwt_stride,
                                int width, int height)
@@ -177,7 +199,9 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c)
 {
     c->dirac_hpel_filter = dirac_hpel_filter;
     c->add_rect_clamped = add_rect_clamped_c;
-    c->put_signed_rect_clamped = put_signed_rect_clamped_c;
+    c->put_signed_rect_clamped[0] = put_signed_rect_clamped_8bit_c;
+    c->put_signed_rect_clamped[1] = put_signed_rect_clamped_10bit_c;
+    c->put_signed_rect_clamped[2] = put_signed_rect_clamped_12bit_c;
 
     c->add_dirac_obmc[0] = add_obmc8_c;
     c->add_dirac_obmc[1] = add_obmc16_c;
@@ -197,5 +221,6 @@ av_cold void ff_diracdsp_init(DiracDSPContext *c)
     PIXFUNC(avg, 16);
     PIXFUNC(avg, 32);
 
-    if (HAVE_MMX && HAVE_YASM) ff_diracdsp_init_mmx(c);
+    if (ARCH_X86)
+        ff_diracdsp_init_x86(c);
 }
diff --git a/libavcodec/diracdsp.h b/libavcodec/diracdsp.h
index 613ca5bc..25a872d8 100644
--- a/libavcodec/diracdsp.h
+++ b/libavcodec/diracdsp.h
@@ -41,8 +41,8 @@ typedef struct {
     void (*put_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
     void (*avg_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
 
-    void (*put_signed_rect_clamped)(uint8_t *dst/*align 16*/, int dst_stride, const int16_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
-    void (*put_rect_clamped)(uint8_t *dst/*align 16*/, int dst_stride, const int16_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
+    void (*put_signed_rect_clamped[3])(uint8_t *dst/*align 16*/, int dst_stride, const uint8_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
+    void (*put_rect_clamped)(uint8_t *dst/*align 16*/, int dst_stride, const uint8_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
     void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/);
     void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
 
@@ -63,5 +63,6 @@ DECL_DIRAC_PIXOP(put, l4_c);
 DECL_DIRAC_PIXOP(avg, l4_c);
 
 void ff_diracdsp_init(DiracDSPContext *c);
+void ff_diracdsp_init_x86(DiracDSPContext* c);
 
 #endif /* AVCODEC_DIRACDSP_H */
diff --git a/libavcodec/diractab.c b/libavcodec/diractab.c
new file mode 100644
index 00000000..816b9393
--- /dev/null
+++ b/libavcodec/diractab.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2016 Open Broadcast Systems Ltd.
+ * Author    (C) 2016 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "diractab.h"
+
+const uint8_t ff_dirac_default_qmat[7][4][4] = {
+    { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
+    { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
+    { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
+    { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
+    { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
+    { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
+    { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
+};
+
+const int32_t ff_dirac_qscale_tab[116] = {
+    4,         5,         6,         7,         8,        10,        11,        13,
+    16,        19,        23,        27,        32,        38,        45,        54,
+    64,        76,        91,       108,       128,       152,       181,       215,
+    256,       304,       362,       431,       512,       609,       724,       861,
+    1024,      1218,      1448,      1722,      2048,      2435,      2896,      3444,
+    4096,      4871,      5793,      6889,      8192,      9742,     11585,     13777,
+    16384,     19484,     23170,     27554,     32768,     38968,     46341,     55109,
+    65536,     77936,     92682,    110218,    131072,    155872,    185364,    220436,
+    262144,    311744,    370728,    440872,    524288,    623487,    741455,    881744,
+    1048576,   1246974,   1482910,   1763488,   2097152,   2493948,   2965821,   3526975,
+    4194304,   4987896,   5931642,   7053950,   8388608,   9975792,  11863283,  14107901,
+    16777216,  19951585,  23726566,  28215802,  33554432,  39903169,  47453133,  56431603,
+    67108864,  79806339,  94906266, 112863206, 134217728, 159612677, 189812531, 225726413,
+    268435456, 319225354, 379625062, 451452825, 536870912, 638450708, 759250125, 902905651,
+    1073741824,1276901417,1518500250,1805811301,/*2147483648,2553802834,3037000500,3611622603,
+    4294967296*/
+};
+
+const int32_t ff_dirac_qoffset_intra_tab[120] = {
+    1,         2,         3,         4,         4,         5,         6,         7,
+    8,        10,        12,        14,        16,        19,        23,        27,
+    32,        38,        46,        54,        64,        76,        91,       108,
+    128,       152,       181,       216,       256,       305,       362,       431,
+    512,       609,       724,       861,      1024,      1218,      1448,      1722,
+    2048,      2436,      2897,      3445,      4096,      4871,      5793,      6889,
+    8192,      9742,     11585,     13777,     16384,     19484,     23171,     27555,
+    32768,     38968,     46341,     55109,     65536,     77936,     92682,    110218,
+    131072,    155872,    185364,    220436,    262144,    311744,    370728,    440872,
+    524288,    623487,    741455,    881744,   1048576,   1246974,   1482911,   1763488,
+    2097152,   2493948,   2965821,   3526975,   4194304,   4987896,   5931642,   7053951,
+    8388608,   9975793,  11863283,  14107901,  16777216,  19951585,  23726567,  28215802,
+    33554432,  39903170,  47453133,  56431603,  67108864,  79806339,  94906266, 112863207,
+    134217728, 159612677, 189812531, 225726413, 268435456, 319225354, 379625063, 451452826,
+    536870912, 638450709, 759250125, 902905651,1073741824,1276901417,1518500250,1805811302,
+    /*2147483648, 2553802834, 3037000500, 3611622603, 4294967296,*/
+};
+
+const int ff_dirac_qoffset_inter_tab[122] = {
+    1,         2,         2,         3,         3,         4,         4,         5,
+    6,         7,         9,        10,        12,        14,        17,        20,
+    24,        29,        34,        41,        48,        57,        68,        81,
+    96,       114,       136,       162,       192,       228,       272,       323,
+    384,       457,       543,       646,       768,       913,      1086,      1292,
+    1536,      1827,      2172,      2583,      3072,      3653,      4344,      5166,
+    6144,      7307,      8689,     10333,     12288,     14613,     17378,     20666,
+    24576,     29226,     34756,     41332,     49152,     58452,     69512,     82664,
+    98304,    116904,    139023,    165327,    196608,    233808,    278046,    330654,
+    393216,    467615,    556091,    661308,    786432,    935231,   1112183,   1322616,
+    1572864,   1870461,   2224366,   2645231,   3145728,   3740922,   4448731,   5290463,
+    6291456,   7481844,   8897462,  10580926,  12582912,  14963688,  17794925,  21161851,
+    25165824,  29927377,  35589850,  42323702,  50331648,  59854754,  71179699,  84647405,
+    100663296, 119709508, 142359398, 169294809, 201326592, 239419016, 284718797, 338589619,
+    402653184, 478838031, 569437594, 677179238, 805306368, 957676063,1138875188,1354358476,
+    1610612736, 1915352125, /*2277750375, 2708716952, 3221225472, 3830704250,*/
+};
diff --git a/libavcodec/diractab.h b/libavcodec/diractab.h
new file mode 100644
index 00000000..cd8b8ace
--- /dev/null
+++ b/libavcodec/diractab.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 Open Broadcast Systems Ltd.
+ * Author    (C) 2016 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DIRACTAB_H
+#define AVCODEC_DIRACTAB_H
+
+#include <stdint.h>
+
+/* Tables here are shared between the Dirac/VC-2 decoder and the VC-2 encoder */
+
+/* Default quantization tables for each wavelet transform */
+extern const uint8_t ff_dirac_default_qmat[7][4][4];
+
+/* Scaling factors needed for quantization/dequantization */
+extern const int32_t ff_dirac_qscale_tab[116];
+
+/* Scaling offsets needed for quantization/dequantization, for intra frames */
+extern const int32_t ff_dirac_qoffset_intra_tab[120];
+
+/* Scaling offsets needed for quantization/dequantization, for inter frames */
+extern const int ff_dirac_qoffset_inter_tab[122];
+
+#endif /* AVCODEC_DIRACTAB_H */
diff --git a/libavcodec/dnxhd_parser.c b/libavcodec/dnxhd_parser.c
index fffb98fa..033b8ee7 100644
--- a/libavcodec/dnxhd_parser.c
+++ b/libavcodec/dnxhd_parser.c
@@ -25,8 +25,7 @@
  */
 
 #include "parser.h"
-
-#define DNXHD_HEADER_PREFIX 0x000002800100
+#include "dnxhddata.h"
 
 typedef struct {
     ParseContext pc;
@@ -47,7 +46,7 @@ static int dnxhd_find_frame_end(DNXHDParserContext *dctx,
     if (!pic_found) {
         for (i = 0; i < buf_size; i++) {
             state = (state << 8) | buf[i];
-            if ((state & 0xffffffffff00LL) == DNXHD_HEADER_PREFIX) {
+            if (ff_dnxhd_check_header_prefix(state & 0xffffffffff00LL) != 0) {
                 i++;
                 pic_found = 1;
                 interlaced = (state&2)>>1; /* byte following the 5-byte header prefix */
@@ -62,7 +61,7 @@ static int dnxhd_find_frame_end(DNXHDParserContext *dctx,
             return 0;
         for (; i < buf_size; i++) {
             state = (state << 8) | buf[i];
-            if ((state & 0xffffffffff00LL) == DNXHD_HEADER_PREFIX) {
+            if (ff_dnxhd_check_header_prefix(state & 0xffffffffff00LL) != 0) {
                 if (!interlaced || dctx->cur_field) {
                     pc->frame_start_found = 0;
                     pc->state64 = -1;
diff --git a/libavcodec/dnxhddata.c b/libavcodec/dnxhddata.c
index ef918b02..7d935a3f 100644
--- a/libavcodec/dnxhddata.c
+++ b/libavcodec/dnxhddata.c
@@ -22,9 +22,11 @@
 #include "avcodec.h"
 #include "dnxhddata.h"
 #include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
 
 /* The quantization tables below are in zigzag order! */
 
+/* Used in CID 1235, 1256, 1270 */
 static const uint8_t dnxhd_1235_luma_weight[] = {
      0, 32, 32, 32, 33, 32, 32, 32,
     32, 31, 32, 33, 33, 33, 33, 35,
@@ -36,6 +38,7 @@ static const uint8_t dnxhd_1235_luma_weight[] = {
     50, 50, 53, 55, 55, 56, 60, 60,
 };
 
+/* Used in CID 1235, 1256 */
 static const uint8_t dnxhd_1235_chroma_weight[] = {
      0, 32, 33, 34, 34, 33, 34, 35,
     37, 40, 43, 42, 39, 38, 39, 41,
@@ -47,6 +50,7 @@ static const uint8_t dnxhd_1235_chroma_weight[] = {
     90, 90, 85, 79, 73, 73, 73, 73,
 };
 
+/* Used in CID 1237, 1253, 1259, 1273, 1274 */
 static const uint8_t dnxhd_1237_luma_weight[] = {
      0,  32,  33,  34, 34, 36, 37, 36,
     36,  37,  38,  38, 38, 39, 41, 44,
@@ -58,6 +62,7 @@ static const uint8_t dnxhd_1237_luma_weight[] = {
     97, 100, 104, 102, 98, 98, 99, 99,
 };
 
+/* Used in CID 1237, 1253, 1259, 1273, 1274 */
 static const uint8_t dnxhd_1237_chroma_weight[] = {
      0,  32,  36,  39, 39, 38, 39,  41,
     45,  51,  57,  58, 53, 48, 47,  51,
@@ -69,6 +74,7 @@ static const uint8_t dnxhd_1237_chroma_weight[] = {
     97, 100, 104, 102, 98, 98, 99,  99,
 };
 
+/* Used in CID 1238, 1272 */
 static const uint8_t dnxhd_1238_luma_weight[] = {
      0, 32, 32, 33, 34, 33, 33, 33,
     33, 33, 33, 33, 33, 35, 37, 37,
@@ -80,6 +86,7 @@ static const uint8_t dnxhd_1238_luma_weight[] = {
     51, 53, 55, 57, 58, 59, 57, 57,
 };
 
+/* Used in CID 1238, 1272 */
 static const uint8_t dnxhd_1238_chroma_weight[] = {
      0, 32, 35, 35, 35, 34, 34, 35,
     39, 43, 45, 45, 41, 39, 40, 41,
@@ -91,6 +98,7 @@ static const uint8_t dnxhd_1238_chroma_weight[] = {
     82, 77, 80, 86, 84, 82, 82, 82,
 };
 
+/* Used in CID 1241, 1271 */
 static const uint8_t dnxhd_1241_luma_weight[] = {
      0, 32, 33, 34, 34, 35, 36, 37,
     36, 37, 38, 38, 38, 39, 39, 40,
@@ -102,6 +110,7 @@ static const uint8_t dnxhd_1241_luma_weight[] = {
     48, 46, 47, 48, 48, 49, 49, 49,
 };
 
+/* Used in CID 1241, 1271 */
 static const uint8_t dnxhd_1241_chroma_weight[] = {
      0, 32, 36, 38, 37, 37, 40, 41,
     40, 40, 42, 42, 41, 41, 41, 41,
@@ -201,6 +210,7 @@ static const uint8_t dnxhd_1251_chroma_weight[] = {
     61, 59, 59, 59, 61, 62, 62, 62,
 };
 
+/* Used in CID 1252, 1258 */
 static const uint8_t dnxhd_1252_luma_weight[] = {
       0,  32,  34, 35, 36, 36, 36, 37,
      36,  37,  39, 40, 41, 40, 40, 40,
@@ -211,6 +221,8 @@ static const uint8_t dnxhd_1252_luma_weight[] = {
      71,  82,  90, 90, 88, 87, 90, 95,
     100, 107, 103, 97, 95, 93, 99, 99,
 };
+
+/* Used in CID 1252, 1258 */
 static const uint8_t dnxhd_1252_chroma_weight[] = {
       0,  32,  35,  36,  37,  37,  38,  40,
      42,  46,  49,  50,  50,  49,  49,  53,
@@ -222,47 +234,49 @@ static const uint8_t dnxhd_1252_chroma_weight[] = {
     114, 128, 125, 129, 134, 125, 116, 116,
 };
 
-static const uint8_t dnxhd_1256_chroma_weight[] = {
-     0, 32, 32, 32, 32, 32, 32, 33,
-    32, 32, 32, 32, 32, 32, 32, 34,
-    32, 32, 32, 32, 32, 32, 33, 37,
-    32, 32, 32, 32, 32, 32, 36, 39,
-    32, 32, 32, 32, 32, 34, 39, 44,
-    32, 37, 32, 32, 35, 40, 43, 49,
-    32, 33, 36, 36, 40, 43, 50, 60,
-    34, 37, 39, 44, 51, 56, 61, 70,
+static const uint8_t dnxhd_1260_luma_weight[] = {
+     0, 32, 33, 34, 36, 37, 37, 36,
+    34, 33, 34, 35, 37, 38, 40, 41,
+    40, 39, 38, 37, 34, 33, 34, 37,
+    40, 44, 48, 52, 53, 49, 47, 45,
+    42, 38, 36, 36, 38, 41, 43, 44,
+    46, 49, 52, 54, 54, 49, 44, 44,
+    44, 47, 51, 51, 52, 51, 48, 50,
+    52, 53, 53, 50, 50, 54, 54, 54,
+};
+
+static const uint8_t dnxhd_1260_chroma_weight[] = {
+     0, 32, 34, 38, 42, 40, 38, 36,
+    35, 35, 38, 42, 43, 43, 42, 40,
+    38, 39, 43, 43, 42, 41, 43, 43,
+    42, 44, 46, 45, 45, 46, 47, 46,
+    44, 44, 45, 46, 46, 46, 50, 50,
+    47, 47, 49, 49, 49, 49, 51, 53,
+    51, 49, 53, 57, 56, 52, 50, 52,
+    56, 56, 53, 53, 53, 54, 58, 58,
 };
 
-static const uint8_t dnxhd_1258_luma_weight[] = {
-     0, 32, 36, 36,  40,  40, 55, 60,
-    34, 36, 37, 40,  41,  48, 57, 82,
-    35, 36, 41, 41,  46,  52, 73, 82,
-    37, 40, 42, 45,  50,  65, 80, 87,
-    39, 41, 44, 49,  62,  78, 88, 90,
-    41, 44, 49, 58,  73,  90, 95, 95,
-    43, 52, 55, 68,  90, 100, 97, 93,
-    52, 53, 71, 82, 107, 103, 99, 99,
+/* Used in CID 1235, 1236, 1241, 1250, 1256, 1257, 1270, 1271 */
+static const uint8_t dnxhd_1235_dc_codes[14] = {
+    10, 62, 11, 12, 13, 0, 1, 2, 3, 4, 14, 30, 126, 127,
 };
 
-static const uint8_t dnxhd_1258_chroma_weight[] = {
-     0, 32, 37,  38,  49,  53,  65,  66,
-    35, 37, 40,  49,  56,  64,  65,  82,
-    36, 42, 50,  56,  64,  67,  73,  85,
-    46, 50, 57,  63,  71,  72,  89,  87,
-    49, 58, 65,  72,  78,  88,  88,  90,
-    60, 64, 74,  81,  84,  90,  95, 134,
-    62, 74, 77,  80,  90, 114, 129, 125,
-    74, 74, 90, 100, 128, 125, 116, 116,
+/* Used in CID 1235, 1236, 1241, 1250, 1256, 1257, 1270, 1271 */
+static const uint8_t dnxhd_1235_dc_bits[14] = {
+    4, 6, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 7, 7,
 };
 
+/* Used in CID 1237, 1238, 1242, 1243, 1251, 1252, 1253, 1258, 1259, 1260, 1272, 1273, 1274 */
 static const uint8_t dnxhd_1237_dc_codes[12] = {
     0, 12, 13, 1, 2, 3, 4, 5, 14, 30, 62, 63,
 };
 
+/* Used in CID 1237, 1238, 1242, 1243, 1251, 1252, 1253, 1258, 1259, 1260, 1272, 1273, 1274 */
 static const uint8_t dnxhd_1237_dc_bits[12] = {
     3, 4, 4, 3, 3, 3, 3, 3, 4, 5, 6, 6,
 };
 
+/* Used in CID 1237, 1242, 1253, 1259, 1260, 1273, 1274 */
 static const uint16_t dnxhd_1237_ac_codes[257] = {
         0,     1,     4,     5,    12,    26,    27,    56,
        57,    58,    59,   120,   121,   244,   245,   246,
@@ -299,6 +313,7 @@ static const uint16_t dnxhd_1237_ac_codes[257] = {
     65535,
 };
 
+/* Used in CID 1237, 1242, 1253, 1259, 1260, 1273, 1274 */
 static const uint8_t dnxhd_1237_ac_bits[257] = {
      2,  2,  3,  3,  4,  5,  5,  6,  6,  6,  6,  7,  7,  8,  8,  8,
      8,  8,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 11, 11, 11,
@@ -319,79 +334,44 @@ static const uint8_t dnxhd_1237_ac_bits[257] = {
     16,
 };
 
-static const uint8_t dnxhd_1237_ac_level[257] = {
-      3,  3,  5,  0,  7,  9,  5, 11, 13, 15,  7, 17, 19, 21, 23, 25,
-      9, 11, 27, 29, 31, 33, 13, 35, 37, 39, 41, 43, 15, 45, 47, 49,
-     51, 53, 55, 17, 19, 57, 59, 61, 63, 65, 67, 69, 21, 23, 25, 71,
-     73, 75, 77, 79, 81, 83, 27, 29, 31, 33, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105, 35, 37, 39, 41, 43,107,109,111,113,115,117,
-    119,121,123,129,  3, 45, 47, 49, 51, 53, 55,125,127,  5,  7,  9,
-     11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41,
-     43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73,
-     75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,101,103,105,
-    107,109,111,113,115,117,119,121,123,125,127,129, 57, 59, 61, 63,
-     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
-     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
-     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129,
-};
-
-static const uint8_t dnxhd_1237_ac_flags[257] = {
-    0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0,
-    2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0,
-    0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0,
-    0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 0, 0, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3,
-};
-
-static const uint16_t dnxhd_1237_run_codes[62] = {
-       0,    4,   10,   11,   24,   25,   26,   54,
-      55,   56,   57,   58,  118,  119,  240,  482,
-     483,  484,  485,  486,  487,  488,  489,  490,
-     491,  492,  493,  494,  990,  991,  992,  993,
-     994,  995,  996,  997,  998,  999, 1000, 1001,
-    1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
-    1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017,
-    1018, 1019, 1020, 1021, 1022, 1023,
-};
-
-static const uint8_t dnxhd_1237_run_bits[62] = {
-     1,  3,  4,  4,  5,  5,  5,  6,  6,  6,  6,  6,  7,  7,  8,  9,
-     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-};
-
-static const uint8_t dnxhd_1237_run[62] = {
-     1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
-    17, 18, 19, 20, 21, 53, 57, 58, 59, 60, 61, 62, 22, 23, 24, 25,
-    26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
-    42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56,
-};
-
-static const uint8_t dnxhd_1238_dc_codes[12] = {
-    0, 12, 13, 1, 2, 3, 4, 5, 14, 30, 62, 63,
-};
-
-static const uint8_t dnxhd_1238_dc_bits[12] = {
-    3, 4, 4, 3, 3, 3, 3, 3, 4, 5, 6, 6,
-};
-
+/* Used in CID 1237, 1242, 1253, 1259, 1260, 1273, 1274 */
+static const uint8_t dnxhd_1237_ac_info[2*257] = {
+      3, 0,   3, 2,   5, 0,   0, 0,   7, 0,   9, 0,   5, 2,  11, 0,
+     13, 0,  15, 0,   7, 2,  17, 0,  19, 0,  21, 0,  23, 0,  25, 0,
+      9, 2,  11, 2,  27, 0,  29, 0,  31, 0,  33, 0,  13, 2,  35, 0,
+     37, 0,  39, 0,  41, 0,  43, 0,  15, 2,  45, 0,  47, 0,  49, 0,
+     51, 0,  53, 0,  55, 0,  17, 2,  19, 2,  57, 0,  59, 0,  61, 0,
+     63, 0,  65, 0,  67, 0,  69, 0,  21, 2,  23, 2,  25, 2,  71, 0,
+     73, 0,  75, 0,  77, 0,  79, 0,  81, 0,  83, 0,  27, 2,  29, 2,
+     31, 2,  33, 2,  85, 0,  87, 0,  89, 0,  91, 0,  93, 0,  95, 0,
+     97, 0,  99, 0, 101, 0, 103, 0, 105, 0,  35, 2,  37, 2,  39, 2,
+     41, 2,  43, 2, 107, 0, 109, 0, 111, 0, 113, 0, 115, 0, 117, 0,
+    119, 0, 121, 0, 123, 0, 129, 0,   3, 1,  45, 2,  47, 2,  49, 2,
+     51, 2,  53, 2,  55, 2, 125, 0, 127, 0,   5, 1,   7, 1,   9, 1,
+     11, 1,  13, 1,  15, 1,  17, 1,  19, 1,  21, 1,  23, 1,  25, 1,
+     27, 1,  29, 1,  31, 1,  33, 1,  35, 1,  37, 1,  39, 1,  41, 1,
+     43, 1,  45, 1,  47, 1,  49, 1,  51, 1,  53, 1,  55, 1,  57, 1,
+     59, 1,  61, 1,  63, 1,  65, 1,  67, 1,  69, 1,  71, 1,  73, 1,
+     75, 1,  77, 1,  79, 1,  81, 1,  83, 1,  85, 1,  87, 1,  89, 1,
+     91, 1,  93, 1,  95, 1,  97, 1,  99, 1, 101, 1, 103, 1, 105, 1,
+    107, 1, 109, 1, 111, 1, 113, 1, 115, 1, 117, 1, 119, 1, 121, 1,
+    123, 1, 125, 1, 127, 1, 129, 1,  57, 2,  59, 2,  61, 2,  63, 2,
+     65, 2,  67, 2,  69, 2,  71, 2,  73, 2,  75, 2,  77, 2,  79, 2,
+     81, 2,  83, 2,  85, 2,  87, 2,  89, 2,  91, 2,  93, 2,  95, 2,
+     97, 2,  99, 2, 101, 2, 103, 2, 105, 2, 107, 2, 109, 2, 111, 2,
+    113, 2, 115, 2, 117, 2, 119, 2, 121, 2, 123, 2, 125, 2, 127, 2,
+    129, 2,   3, 3,   5, 3,   7, 3,   9, 3,  11, 3,  13, 3,  15, 3,
+     17, 3,  19, 3,  21, 3,  23, 3,  25, 3,  27, 3,  29, 3,  31, 3,
+     33, 3,  35, 3,  37, 3,  39, 3,  41, 3,  43, 3,  45, 3,  47, 3,
+     49, 3,  51, 3,  53, 3,  55, 3,  57, 3,  59, 3,  61, 3,  63, 3,
+     65, 3,  67, 3,  69, 3,  71, 3,  73, 3,  75, 3,  77, 3,  79, 3,
+     81, 3,  83, 3,  85, 3,  87, 3,  89, 3,  91, 3,  93, 3,  95, 3,
+     97, 3,  99, 3, 101, 3, 103, 3, 105, 3, 107, 3, 109, 3, 111, 3,
+    113, 3, 115, 3, 117, 3, 119, 3, 121, 3, 123, 3, 125, 3, 127, 3,
+    129, 3,
+};
+
+/* Used in CID 1238, 1240, 1243, 1272 */
 static const uint16_t dnxhd_1238_ac_codes[257] = {
         0,     1,     4,    10,    11,    24,    25,    26,
        54,    55,    56,    57,   116,   117,   118,   119,
@@ -428,6 +408,7 @@ static const uint16_t dnxhd_1238_ac_codes[257] = {
     65535,
 };
 
+/* Used in CID 1238, 1240, 1243, 1272 */
 static const uint8_t dnxhd_1238_ac_bits[257] = {
      2,  2,  3,  4,  4,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,
      8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10,
@@ -448,80 +429,45 @@ static const uint8_t dnxhd_1238_ac_bits[257] = {
     16,
 };
 
-static const uint8_t dnxhd_1238_ac_level[257] = {
-      3,  3,  5,  7,  0,  9, 11,  5, 13, 15, 17,  7, 19, 21, 23,  9,
-     25, 27, 29, 31, 33, 11, 35, 37, 39, 41, 43, 45, 13, 15, 47, 49,
-     51, 53, 55, 57, 59, 17, 19, 61, 63, 65, 67, 69, 71, 73, 75, 21,
-     23, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 25, 27, 29, 99,
-    101,103,105,107,109,111,113,115,117,119,121,123, 31, 33, 35, 37,
-    125,127,129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27,
-     29, 31, 33, 39, 41, 43, 45, 47, 49, 35, 37, 39, 41, 43, 45, 47,
-     49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 81, 51,
-     53, 55, 57, 59, 61, 77, 79, 83, 85, 87, 89, 91, 93, 95, 97, 99,
-    101,103,105,107,109,111,113,115,117,119,121,123,125,127,129, 63,
-     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
-     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
-     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129,
+/* Used in CID 1238, 1240, 1243, 1272 */
+static const uint8_t dnxhd_1238_ac_info[2*257] = {
+      3, 0,   3, 2,   5, 0,   7, 0,   0, 0,   9, 0,  11, 0,   5, 2,
+     13, 0,  15, 0,  17, 0,   7, 2,  19, 0,  21, 0,  23, 0,   9, 2,
+     25, 0,  27, 0,  29, 0,  31, 0,  33, 0,  11, 2,  35, 0,  37, 0,
+     39, 0,  41, 0,  43, 0,  45, 0,  13, 2,  15, 2,  47, 0,  49, 0,
+     51, 0,  53, 0,  55, 0,  57, 0,  59, 0,  17, 2,  19, 2,  61, 0,
+     63, 0,  65, 0,  67, 0,  69, 0,  71, 0,  73, 0,  75, 0,  21, 2,
+     23, 2,  77, 0,  79, 0,  81, 0,  83, 0,  85, 0,  87, 0,  89, 0,
+     91, 0,  93, 0,  95, 0,  97, 0,  25, 2,  27, 2,  29, 2,  99, 0,
+    101, 0, 103, 0, 105, 0, 107, 0, 109, 0, 111, 0, 113, 0, 115, 0,
+    117, 0, 119, 0, 121, 0, 123, 0,  31, 2,  33, 2,  35, 2,  37, 2,
+    125, 0, 127, 0, 129, 0,   3, 1,   5, 1,   7, 1,   9, 1,  11, 1,
+     13, 1,  15, 1,  17, 1,  19, 1,  21, 1,  23, 1,  25, 1,  27, 1,
+     29, 1,  31, 1,  33, 1,  39, 2,  41, 2,  43, 2,  45, 2,  47, 2,
+     49, 2,  35, 1,  37, 1,  39, 1,  41, 1,  43, 1,  45, 1,  47, 1,
+     49, 1,  51, 1,  53, 1,  55, 1,  57, 1,  59, 1,  61, 1,  63, 1,
+     65, 1,  67, 1,  69, 1,  71, 1,  73, 1,  75, 1,  81, 1,  51, 2,
+     53, 2,  55, 2,  57, 2,  59, 2,  61, 2,  77, 1,  79, 1,  83, 1,
+     85, 1,  87, 1,  89, 1,  91, 1,  93, 1,  95, 1,  97, 1,  99, 1,
+    101, 1, 103, 1, 105, 1, 107, 1, 109, 1, 111, 1, 113, 1, 115, 1,
+    117, 1, 119, 1, 121, 1, 123, 1, 125, 1, 127, 1, 129, 1,  63, 2,
+     65, 2,  67, 2,  69, 2,  71, 2,  73, 2,  75, 2,  77, 2,  79, 2,
+     81, 2,  83, 2,  85, 2,  87, 2,  89, 2,  91, 2,  93, 2,  95, 2,
+     97, 2,  99, 2, 101, 2, 103, 2, 105, 2, 107, 2, 109, 2, 111, 2,
+    113, 2, 115, 2, 117, 2, 119, 2, 121, 2, 123, 2, 125, 2, 127, 2,
+    129, 2,   3, 3,   5, 3,   7, 3,   9, 3,  11, 3,  13, 3,  15, 3,
+     17, 3,  19, 3,  21, 3,  23, 3,  25, 3,  27, 3,  29, 3,  31, 3,
+     33, 3,  35, 3,  37, 3,  39, 3,  41, 3,  43, 3,  45, 3,  47, 3,
+     49, 3,  51, 3,  53, 3,  55, 3,  57, 3,  59, 3,  61, 3,  63, 3,
+     65, 3,  67, 3,  69, 3,  71, 3,  73, 3,  75, 3,  77, 3,  79, 3,
+     81, 3,  83, 3,  85, 3,  87, 3,  89, 3,  91, 3,  93, 3,  95, 3,
+     97, 3,  99, 3, 101, 3, 103, 3, 105, 3, 107, 3, 109, 3, 111, 3,
+    113, 3, 115, 3, 117, 3, 119, 3, 121, 3, 123, 3, 125, 3, 127, 3,
+    129, 3,
 }; /* 0 is EOB */
 
-static const uint8_t dnxhd_1238_ac_flags[257] = {
-    0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2,
-    0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0,
-    0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2,
-    2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2,
-    0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
-    2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3,
-};
-
-static const uint16_t dnxhd_1235_1238_1241_run_codes[62] = {
-       0,    4,   10,   11,   24,   25,   26,   27,
-      56,   57,   58,   59,  120,  242,  486,  487,
-     488,  489,  980,  981,  982,  983,  984,  985,
-     986,  987,  988,  989,  990,  991,  992,  993,
-     994,  995,  996,  997,  998,  999, 1000, 1001,
-    1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
-    1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017,
-    1018, 1019, 1020, 1021, 1022, 1023,
-};
-
-static const uint8_t dnxhd_1235_1238_1241_run_bits[62] = {
-     1,  3,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  8,  9,  9,
-     9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-};
-
-static const uint8_t dnxhd_1238_run[62] = {
-     1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
-    20, 21, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
-    49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
-};
-
-static const uint8_t dnxhd_1235_1241_dc_codes[14] = {
-    10, 62, 11, 12, 13, 0, 1, 2, 3, 4, 14, 30, 126, 127,
-};
-
-static const uint8_t dnxhd_1235_1241_dc_bits[14] = {
-    4, 6, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 7, 7,
-};
-
-static const uint16_t dnxhd_1235_1241_ac_codes[257] = {
+/* Used in CID 1235, 1236, 1241, 1256, 1257, 1270, 1271 */
+static const uint16_t dnxhd_1235_ac_codes[257] = {
         0,     1,     4,    10,    11,    24,    25,    26,
        54,    55,    56,    57,   116,   117,   118,   119,
       240,   241,   242,   243,   244,   245,   492,   493,
@@ -557,7 +503,8 @@ static const uint16_t dnxhd_1235_1241_ac_codes[257] = {
     65535,
 };
 
-static const uint8_t dnxhd_1235_1241_ac_bits[257] = {
+/* Used in CID 1235, 1236, 1241, 1256, 1257, 1270, 1271 */
+static const uint8_t dnxhd_1235_ac_bits[257] = {
      2,  2,  3,  4,  4,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,
      8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10,
     10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11,
@@ -577,59 +524,43 @@ static const uint8_t dnxhd_1235_1241_ac_bits[257] = {
     16,
 };
 
-static const uint8_t dnxhd_1235_1241_ac_level[257] = {
-      3,  3,  5,  7,  0,  9, 11,  5, 13, 15, 17,  7, 19, 21, 23,  9,
-     25, 27, 29, 31, 33, 11, 35, 37, 39, 41, 43, 13, 15, 45, 47, 49,
-     51, 53, 55, 57, 59, 17, 19, 61, 63, 65, 67, 69, 71, 73, 75, 77,
-     21, 23, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,101, 25, 27,
-     29, 31,103,105,107,109,111,113,115,117,119,121,123,125,127,  3,
-     33, 35, 37, 39,129,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25,
-     27, 29, 31, 33, 35, 41, 43, 45, 47, 49, 37, 39, 41, 43, 45, 47,
-     49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
-     81, 83, 85, 51, 53, 55, 57, 59, 61, 63, 65, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
-     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
-     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129,
-};
-
-static const uint8_t dnxhd_1235_1241_ac_flags[257] = {
-    0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2,
-    0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0,
-    0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,
-    2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-    2, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3,
-};
-
-static const uint8_t dnxhd_1235_1241_run[62] = {
-     1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
-    18, 20, 17, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
-    49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+/* Used in CID 1235, 1241, 1256, 1270, 1271 */
+static const uint8_t dnxhd_1235_ac_info[2*257] = {
+      3, 0,   3, 2,   5, 0,   7, 0,   0, 0,   9, 0,  11, 0,   5, 2,
+     13, 0,  15, 0,  17, 0,   7, 2,  19, 0,  21, 0,  23, 0,   9, 2,
+     25, 0,  27, 0,  29, 0,  31, 0,  33, 0,  11, 2,  35, 0,  37, 0,
+     39, 0,  41, 0,  43, 0,  13, 2,  15, 2,  45, 0,  47, 0,  49, 0,
+     51, 0,  53, 0,  55, 0,  57, 0,  59, 0,  17, 2,  19, 2,  61, 0,
+     63, 0,  65, 0,  67, 0,  69, 0,  71, 0,  73, 0,  75, 0,  77, 0,
+     21, 2,  23, 2,  79, 0,  81, 0,  83, 0,  85, 0,  87, 0,  89, 0,
+     91, 0,  93, 0,  95, 0,  97, 0,  99, 0, 101, 0,  25, 2,  27, 2,
+     29, 2,  31, 2, 103, 0, 105, 0, 107, 0, 109, 0, 111, 0, 113, 0,
+    115, 0, 117, 0, 119, 0, 121, 0, 123, 0, 125, 0, 127, 0,   3, 1,
+     33, 2,  35, 2,  37, 2,  39, 2, 129, 0,   5, 1,   7, 1,   9, 1,
+     11, 1,  13, 1,  15, 1,  17, 1,  19, 1,  21, 1,  23, 1,  25, 1,
+     27, 1,  29, 1,  31, 1,  33, 1,  35, 1,  41, 2,  43, 2,  45, 2,
+     47, 2,  49, 2,  37, 1,  39, 1,  41, 1,  43, 1,  45, 1,  47, 1,
+     49, 1,  51, 1,  53, 1,  55, 1,  57, 1,  59, 1,  61, 1,  63, 1,
+     65, 1,  67, 1,  69, 1,  71, 1,  73, 1,  75, 1,  77, 1,  79, 1,
+     81, 1,  83, 1,  85, 1,  51, 2,  53, 2,  55, 2,  57, 2,  59, 2,
+     61, 2,  63, 2,  65, 2,  87, 1,  89, 1,  91, 1,  93, 1,  95, 1,
+     97, 1,  99, 1, 101, 1, 103, 1, 105, 1, 107, 1, 109, 1, 111, 1,
+    113, 1, 115, 1, 117, 1, 119, 1, 121, 1, 123, 1, 125, 1, 127, 1,
+    129, 1,  67, 2,  69, 2,  71, 2,  73, 2,  75, 2,  77, 2,  79, 2,
+     81, 2,  83, 2,  85, 2,  87, 2,  89, 2,  91, 2,  93, 2,  95, 2,
+     97, 2,  99, 2, 101, 2, 103, 2, 105, 2, 107, 2, 109, 2, 111, 2,
+    113, 2, 115, 2, 117, 2, 119, 2, 121, 2, 123, 2, 125, 2, 127, 2,
+    129, 2,   3, 3,   5, 3,   7, 3,   9, 3,  11, 3,  13, 3,  15, 3,
+     17, 3,  19, 3,  21, 3,  23, 3,  25, 3,  27, 3,  29, 3,  31, 3,
+     33, 3,  35, 3,  37, 3,  39, 3,  41, 3,  43, 3,  45, 3,  47, 3,
+     49, 3,  51, 3,  53, 3,  55, 3,  57, 3,  59, 3,  61, 3,  63, 3,
+     65, 3,  67, 3,  69, 3,  71, 3,  73, 3,  75, 3,  77, 3,  79, 3,
+     81, 3,  83, 3,  85, 3,  87, 3,  89, 3,  91, 3,  93, 3,  95, 3,
+     97, 3,  99, 3, 101, 3, 103, 3, 105, 3, 107, 3, 109, 3, 111, 3,
+    113, 3, 115, 3, 117, 3, 119, 3, 121, 3, 123, 3, 125, 3, 127, 3,
+    129, 3,
 };
 
-static const uint8_t dnxhd_1250_dc_codes[14] = {
-    10, 62, 11, 12, 13, 0, 1, 2, 3, 4, 14, 30, 126, 127
-};
-static const uint8_t dnxhd_1250_dc_bits[14] = {
-    4, 6, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 7, 7
-};
 static const uint16_t dnxhd_1250_ac_codes[257] = {
         0,     1,     4,    10,    11,    24,    25,    26,
        54,    55,    56,    57,   116,   117,   118,   119,
@@ -684,73 +615,41 @@ static const uint8_t dnxhd_1250_ac_bits[257] = {
     16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
     16
 };
-static const uint8_t dnxhd_1250_ac_level[257] = {
-      3,  3,  5,  7,  0,  9, 11,  5, 13, 15, 17,  7, 19, 21, 23,  9,
-     25, 27, 29, 31, 33, 11, 35, 37, 39, 41, 43, 45, 13, 47, 49, 51,
-     53, 55, 57, 59, 15, 17, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
-     19, 21, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,101,103,105, 23,
-     25, 27,107,109,111,113,115,117,119,121,123,125,127,129,  3,  5,
-      7,  9, 11, 29, 31, 33, 35, 13, 15, 17, 19, 21, 23, 25, 27, 29,
-     31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 37, 39, 41, 43,
-     55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85,
-     87, 89, 91, 93, 95, 97, 99,101,103,105,107,111,113, 45, 47, 49,
-     51, 53, 55,109,115,117,119,121,123,125,127,129, 57, 59, 61, 63,
-     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
-     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
-     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129
-};
-static const uint8_t dnxhd_1250_ac_flags[257] = {
-    0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2,
-    0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
-    0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
-    2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
-    1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
-    2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3,
-};
-static const uint16_t dnxhd_1250_run_codes[62] = {
-       0,    4,    5,   12,   26,   27,   28,   58,
-     118,  119,  120,  242,  486,  487,  976,  977,
-     978,  979,  980,  981,  982,  983,  984,  985,
-     986,  987,  988,  989,  990,  991,  992,  993,
-     994,  995,  996,  997,  998,  999, 1000, 1001,
-    1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
-    1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017,
-    1018, 1019, 1020, 1021, 1022, 1023
-};
-static const uint8_t dnxhd_1250_run_bits[62] = {
-     1,  3,  3,  4,  5,  5,  5,  6,  7,  7,  7,  8,  9,  9, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
-};
-static const uint8_t dnxhd_1250_run[62] = {
-     1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
-    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
-    49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62
-};
 
-static const uint8_t dnxhd_1251_dc_codes[12] = {
-    0, 12, 13, 1, 2, 3, 4, 5, 14, 30, 62, 63,
-};
-
-static const uint8_t dnxhd_1251_dc_bits[12] = {
-    3, 4, 4, 3, 3, 3, 3, 3, 4, 5, 6, 6,
+static const uint8_t dnxhd_1250_ac_info[2*257] = {
+      3, 0,   3, 2,   5, 0,   7, 0,   0, 0,   9, 0,  11, 0,   5, 2,
+     13, 0,  15, 0,  17, 0,   7, 2,  19, 0,  21, 0,  23, 0,   9, 2,
+     25, 0,  27, 0,  29, 0,  31, 0,  33, 0,  11, 2,  35, 0,  37, 0,
+     39, 0,  41, 0,  43, 0,  45, 0,  13, 2,  47, 0,  49, 0,  51, 0,
+     53, 0,  55, 0,  57, 0,  59, 0,  15, 2,  17, 2,  61, 0,  63, 0,
+     65, 0,  67, 0,  69, 0,  71, 0,  73, 0,  75, 0,  77, 0,  79, 0,
+     19, 2,  21, 2,  81, 0,  83, 0,  85, 0,  87, 0,  89, 0,  91, 0,
+     93, 0,  95, 0,  97, 0,  99, 0, 101, 0, 103, 0, 105, 0,  23, 2,
+     25, 2,  27, 2, 107, 0, 109, 0, 111, 0, 113, 0, 115, 0, 117, 0,
+    119, 0, 121, 0, 123, 0, 125, 0, 127, 0, 129, 0,   3, 1,   5, 1,
+      7, 1,   9, 1,  11, 1,  29, 2,  31, 2,  33, 2,  35, 2,  13, 1,
+     15, 1,  17, 1,  19, 1,  21, 1,  23, 1,  25, 1,  27, 1,  29, 1,
+     31, 1,  33, 1,  35, 1,  37, 1,  39, 1,  41, 1,  43, 1,  45, 1,
+     47, 1,  49, 1,  51, 1,  53, 1,  37, 2,  39, 2,  41, 2,  43, 2,
+     55, 1,  57, 1,  59, 1,  61, 1,  63, 1,  65, 1,  67, 1,  69, 1,
+     71, 1,  73, 1,  75, 1,  77, 1,  79, 1,  81, 1,  83, 1,  85, 1,
+     87, 1,  89, 1,  91, 1,  93, 1,  95, 1,  97, 1,  99, 1, 101, 1,
+    103, 1, 105, 1, 107, 1, 111, 1, 113, 1,  45, 2,  47, 2,  49, 2,
+     51, 2,  53, 2,  55, 2, 109, 1, 115, 1, 117, 1, 119, 1, 121, 1,
+    123, 1, 125, 1, 127, 1, 129, 1,  57, 2,  59, 2,  61, 2,  63, 2,
+     65, 2,  67, 2,  69, 2,  71, 2,  73, 2,  75, 2,  77, 2,  79, 2,
+     81, 2,  83, 2,  85, 2,  87, 2,  89, 2,  91, 2,  93, 2,  95, 2,
+     97, 2,  99, 2, 101, 2, 103, 2, 105, 2, 107, 2, 109, 2, 111, 2,
+    113, 2, 115, 2, 117, 2, 119, 2, 121, 2, 123, 2, 125, 2, 127, 2,
+    129, 2,   3, 3,   5, 3,   7, 3,   9, 3,  11, 3,  13, 3,  15, 3,
+     17, 3,  19, 3,  21, 3,  23, 3,  25, 3,  27, 3,  29, 3,  31, 3,
+     33, 3,  35, 3,  37, 3,  39, 3,  41, 3,  43, 3,  45, 3,  47, 3,
+     49, 3,  51, 3,  53, 3,  55, 3,  57, 3,  59, 3,  61, 3,  63, 3,
+     65, 3,  67, 3,  69, 3,  71, 3,  73, 3,  75, 3,  77, 3,  79, 3,
+     81, 3,  83, 3,  85, 3,  87, 3,  89, 3,  91, 3,  93, 3,  95, 3,
+     97, 3,  99, 3, 101, 3, 103, 3, 105, 3, 107, 3, 109, 3, 111, 3,
+    113, 3, 115, 3, 117, 3, 119, 3, 121, 3, 123, 3, 125, 3, 127, 3,
+    129, 3,
 };
 
 static const uint16_t dnxhd_1251_ac_codes[257] = {
@@ -809,79 +708,43 @@ static const uint8_t dnxhd_1251_ac_bits[257] = {
     16,
 };
 
-static const uint8_t dnxhd_1251_ac_level[257] = {
-      3,  3,  5,  7,  0,  9, 11,  5, 13, 15, 17,  7, 19, 21, 23,  9,
-     25, 27, 29, 31, 33, 11, 35, 37, 39, 41, 43, 13, 45, 47, 49, 51,
-     53, 55, 57, 59, 15, 17, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
-     81, 19, 21, 23, 83, 85, 87, 89, 91, 93, 95, 97, 99,101,103,105,
-     25, 27, 29,107,109,111,113,115,117,119,121,123,125,127,129,  3,
-      5,  7,  9, 11, 13, 15, 17, 31, 33, 35, 19, 21, 23, 25, 27, 29,
-     31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 37,
-     39, 41, 43, 45, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 83,
-     85, 87, 89, 91, 93, 95, 97, 99,101,103,105,107,109,111,113,115,
-    117, 47, 49, 51, 53, 55, 57,119,121,123,125,127,129, 59, 61, 63,
-     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
-     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
-     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129,
-};
-
-static const uint8_t dnxhd_1251_ac_flags[257] = {
-    0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2,
-    0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
-    0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-    1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
-    2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3,
-};
-
-static const uint16_t dnxhd_1251_run_codes[62] = {
-       0,    4,    5,   12,   26,   27,   28,   58,
-     118,  119,  120,  242,  486,  487,  976,  977,
-     978,  979,  980,  981,  982,  983,  984,  985,
-     986,  987,  988,  989,  990,  991,  992,  993,
-     994,  995,  996,  997,  998,  999, 1000, 1001,
-    1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
-    1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017,
-    1018, 1019, 1020, 1021, 1022, 1023,
-};
-
-static const uint8_t dnxhd_1251_run_bits[62] = {
-     1,  3,  3,  4,  5,  5,  5,  6,  7,  7,  7,  8,  9,  9, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-};
-
-static const uint8_t dnxhd_1251_run[62] = {
-     1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
-    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
-    49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
-};
-
-static const uint8_t dnxhd_1252_dc_codes[12] = {
-    0, 12, 13, 1, 2, 3, 4, 5, 14, 30, 62, 63,
-};
-
-static const uint8_t dnxhd_1252_dc_bits[12] = {
-    3, 4, 4, 3, 3, 3, 3, 3, 4, 5, 6, 6,
-};
-
+static const uint8_t dnxhd_1251_ac_info[2*257] = {
+      3, 0,   3, 2,   5, 0,   7, 0,   0, 0,   9, 0,  11, 0,   5, 2,
+     13, 0,  15, 0,  17, 0,   7, 2,  19, 0,  21, 0,  23, 0,   9, 2,
+     25, 0,  27, 0,  29, 0,  31, 0,  33, 0,  11, 2,  35, 0,  37, 0,
+     39, 0,  41, 0,  43, 0,  13, 2,  45, 0,  47, 0,  49, 0,  51, 0,
+     53, 0,  55, 0,  57, 0,  59, 0,  15, 2,  17, 2,  61, 0,  63, 0,
+     65, 0,  67, 0,  69, 0,  71, 0,  73, 0,  75, 0,  77, 0,  79, 0,
+     81, 0,  19, 2,  21, 2,  23, 2,  83, 0,  85, 0,  87, 0,  89, 0,
+     91, 0,  93, 0,  95, 0,  97, 0,  99, 0, 101, 0, 103, 0, 105, 0,
+     25, 2,  27, 2,  29, 2, 107, 0, 109, 0, 111, 0, 113, 0, 115, 0,
+    117, 0, 119, 0, 121, 0, 123, 0, 125, 0, 127, 0, 129, 0,   3, 1,
+      5, 1,   7, 1,   9, 1,  11, 1,  13, 1,  15, 1,  17, 1,  31, 2,
+     33, 2,  35, 2,  19, 1,  21, 1,  23, 1,  25, 1,  27, 1,  29, 1,
+     31, 1,  33, 1,  35, 1,  37, 1,  39, 1,  41, 1,  43, 1,  45, 1,
+     47, 1,  49, 1,  51, 1,  53, 1,  55, 1,  57, 1,  59, 1,  37, 2,
+     39, 2,  41, 2,  43, 2,  45, 2,  61, 1,  63, 1,  65, 1,  67, 1,
+     69, 1,  71, 1,  73, 1,  75, 1,  77, 1,  79, 1,  81, 1,  83, 1,
+     85, 1,  87, 1,  89, 1,  91, 1,  93, 1,  95, 1,  97, 1,  99, 1,
+    101, 1, 103, 1, 105, 1, 107, 1, 109, 1, 111, 1, 113, 1, 115, 1,
+    117, 1,  47, 2,  49, 2,  51, 2,  53, 2,  55, 2,  57, 2, 119, 1,
+    121, 1, 123, 1, 125, 1, 127, 1, 129, 1,  59, 2,  61, 2,  63, 2,
+     65, 2,  67, 2,  69, 2,  71, 2,  73, 2,  75, 2,  77, 2,  79, 2,
+     81, 2,  83, 2,  85, 2,  87, 2,  89, 2,  91, 2,  93, 2,  95, 2,
+     97, 2,  99, 2, 101, 2, 103, 2, 105, 2, 107, 2, 109, 2, 111, 2,
+    113, 2, 115, 2, 117, 2, 119, 2, 121, 2, 123, 2, 125, 2, 127, 2,
+    129, 2,   3, 3,   5, 3,   7, 3,   9, 3,  11, 3,  13, 3,  15, 3,
+     17, 3,  19, 3,  21, 3,  23, 3,  25, 3,  27, 3,  29, 3,  31, 3,
+     33, 3,  35, 3,  37, 3,  39, 3,  41, 3,  43, 3,  45, 3,  47, 3,
+     49, 3,  51, 3,  53, 3,  55, 3,  57, 3,  59, 3,  61, 3,  63, 3,
+     65, 3,  67, 3,  69, 3,  71, 3,  73, 3,  75, 3,  77, 3,  79, 3,
+     81, 3,  83, 3,  85, 3,  87, 3,  89, 3,  91, 3,  93, 3,  95, 3,
+     97, 3,  99, 3, 101, 3, 103, 3, 105, 3, 107, 3, 109, 3, 111, 3,
+    113, 3, 115, 3, 117, 3, 119, 3, 121, 3, 123, 3, 125, 3, 127, 3,
+    129, 3,
+};
+
+/* Used in CID 1252, 1258 */
 static const uint16_t dnxhd_1252_ac_codes[257] = {
         0,     1,     4,    10,    11,    12,    26,    27,
        56,    57,    58,   118,   119,   120,   242,   243,
@@ -918,6 +781,7 @@ static const uint16_t dnxhd_1252_ac_codes[257] = {
     65535,
 };
 
+/* Used in CID 1252, 1258 */
 static const uint8_t dnxhd_1252_ac_bits[257] = {
      2,  2,  3,  4,  4,  4,  5,  5,  6,  6,  6,  7,  7,  7,  8,  8,
      8,  8,  8,  8,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10,
@@ -938,151 +802,280 @@ static const uint8_t dnxhd_1252_ac_bits[257] = {
     16,
 };
 
-static const uint8_t dnxhd_1252_ac_level[257] = {
-      3,  3,  5,  7,  5,  0,  9, 11, 13, 15,  7, 17, 19, 21, 23, 25,
-     27, 29,  9, 11, 31, 33, 35, 37, 13, 39, 41, 43, 45, 47, 49, 15,
-     17, 51, 53, 55, 57, 59, 61, 63, 65, 19, 21, 67, 69, 71, 73, 75,
-     77, 79, 81, 83, 23, 25, 27, 85, 87, 89, 91, 93, 95, 97, 99,101,
-    103,105,107, 29, 31, 33,109,111,113,115,117,119,121,123,125,127,
-    129,  3,  5,  7, 35, 37, 39, 41,  9, 11, 13, 15, 17, 19, 21, 23,
-     25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 43, 45, 47, 49, 51, 45,
-     47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77,
-     79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,101,103,105,107,109,
-    111,113,115,117,119,121,123,125,127,129, 53, 55, 57, 59, 61, 63,
-     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
-     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
-     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
-     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
-    129,
+/* Used in CID 1252, 1258 */
+static const uint8_t dnxhd_1252_ac_info[2*257] = {
+      3, 0,   3, 2,   5, 0,   7, 0,   5, 2,   0, 0,   9, 0,  11, 0,
+     13, 0,  15, 0,   7, 2,  17, 0,  19, 0,  21, 0,  23, 0,  25, 0,
+     27, 0,  29, 0,   9, 2,  11, 2,  31, 0,  33, 0,  35, 0,  37, 0,
+     13, 2,  39, 0,  41, 0,  43, 0,  45, 0,  47, 0,  49, 0,  15, 2,
+     17, 2,  51, 0,  53, 0,  55, 0,  57, 0,  59, 0,  61, 0,  63, 0,
+     65, 0,  19, 2,  21, 2,  67, 0,  69, 0,  71, 0,  73, 0,  75, 0,
+     77, 0,  79, 0,  81, 0,  83, 0,  23, 2,  25, 2,  27, 2,  85, 0,
+     87, 0,  89, 0,  91, 0,  93, 0,  95, 0,  97, 0,  99, 0, 101, 0,
+    103, 0, 105, 0, 107, 0,  29, 2,  31, 2,  33, 2, 109, 0, 111, 0,
+    113, 0, 115, 0, 117, 0, 119, 0, 121, 0, 123, 0, 125, 0, 127, 0,
+    129, 0,   3, 1,   5, 1,   7, 1,  35, 2,  37, 2,  39, 2,  41, 2,
+      9, 1,  11, 1,  13, 1,  15, 1,  17, 1,  19, 1,  21, 1,  23, 1,
+     25, 1,  27, 1,  29, 1,  31, 1,  33, 1,  35, 1,  37, 1,  39, 1,
+     41, 1,  43, 1,  43, 2,  45, 2,  47, 2,  49, 2,  51, 2,  45, 1,
+     47, 1,  49, 1,  51, 1,  53, 1,  55, 1,  57, 1,  59, 1,  61, 1,
+     63, 1,  65, 1,  67, 1,  69, 1,  71, 1,  73, 1,  75, 1,  77, 1,
+     79, 1,  81, 1,  83, 1,  85, 1,  87, 1,  89, 1,  91, 1,  93, 1,
+     95, 1,  97, 1,  99, 1, 101, 1, 103, 1, 105, 1, 107, 1, 109, 1,
+    111, 1, 113, 1, 115, 1, 117, 1, 119, 1, 121, 1, 123, 1, 125, 1,
+    127, 1, 129, 1,  53, 2,  55, 2,  57, 2,  59, 2,  61, 2,  63, 2,
+     65, 2,  67, 2,  69, 2,  71, 2,  73, 2,  75, 2,  77, 2,  79, 2,
+     81, 2,  83, 2,  85, 2,  87, 2,  89, 2,  91, 2,  93, 2,  95, 2,
+     97, 2,  99, 2, 101, 2, 103, 2, 105, 2, 107, 2, 109, 2, 111, 2,
+    113, 2, 115, 2, 117, 2, 119, 2, 121, 2, 123, 2, 125, 2, 127, 2,
+    129, 2,   3, 3,   5, 3,   7, 3,   9, 3,  11, 3,  13, 3,  15, 3,
+     17, 3,  19, 3,  21, 3,  23, 3,  25, 3,  27, 3,  29, 3,  31, 3,
+     33, 3,  35, 3,  37, 3,  39, 3,  41, 3,  43, 3,  45, 3,  47, 3,
+     49, 3,  51, 3,  53, 3,  55, 3,  57, 3,  59, 3,  61, 3,  63, 3,
+     65, 3,  67, 3,  69, 3,  71, 3,  73, 3,  75, 3,  77, 3,  79, 3,
+     81, 3,  83, 3,  85, 3,  87, 3,  89, 3,  91, 3,  93, 3,  95, 3,
+     97, 3,  99, 3, 101, 3, 103, 3, 105, 3, 107, 3, 109, 3, 111, 3,
+    113, 3, 115, 3, 117, 3, 119, 3, 121, 3, 123, 3, 125, 3, 127, 3,
+    129, 3,
+};
+
+/* Used in CID 1235, 1238, 1241, 1243, 1256, 1270, 1271, 1272 */
+static const uint16_t dnxhd_1235_run_codes[62] = {
+       0,    4,   10,   11,   24,   25,   26,   27,
+      56,   57,   58,   59,  120,  242,  486,  487,
+     488,  489,  980,  981,  982,  983,  984,  985,
+     986,  987,  988,  989,  990,  991,  992,  993,
+     994,  995,  996,  997,  998,  999, 1000, 1001,
+    1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
+    1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017,
+    1018, 1019, 1020, 1021, 1022, 1023,
+};
+
+/* Used in CID 1235, 1238, 1241, 1243, 1256, 1270, 1271, 1272 */
+static const uint8_t dnxhd_1235_run_bits[62] = {
+     1,  3,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  8,  9,  9,
+     9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+};
+
+/* Used in CID 1235, 1241, 1256, 1270, 1271 */
+static const uint8_t dnxhd_1235_run[62] = {
+     1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+    18, 20, 17, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+    49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+};
+
+/* Used in CID 1237, 1242, 1253, 1259, 1260, 1273, 1274 */
+static const uint16_t dnxhd_1237_run_codes[62] = {
+       0,    4,   10,   11,   24,   25,   26,   54,
+      55,   56,   57,   58,  118,  119,  240,  482,
+     483,  484,  485,  486,  487,  488,  489,  490,
+     491,  492,  493,  494,  990,  991,  992,  993,
+     994,  995,  996,  997,  998,  999, 1000, 1001,
+    1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
+    1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017,
+    1018, 1019, 1020, 1021, 1022, 1023,
+};
+
+/* Used in CID 1237, 1242, 1253, 1259, 1260, 1273, 1274 */
+static const uint8_t dnxhd_1237_run_bits[62] = {
+     1,  3,  4,  4,  5,  5,  5,  6,  6,  6,  6,  6,  7,  7,  8,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+};
+
+/* Used in CID 1237, 1242, 1253, 1259, 1260, 1273, 1274 */
+static const uint8_t dnxhd_1237_run[62] = {
+     1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+    17, 18, 19, 20, 21, 53, 57, 58, 59, 60, 61, 62, 22, 23, 24, 25,
+    26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+    42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56,
 };
 
-static const uint8_t dnxhd_1252_ac_flags[257] = {
-    0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
-    0, 0, 2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2,
-    2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3,
+/* Used in CID 1238, 1243, 1272 */
+static const uint8_t dnxhd_1238_run[62] = {
+     1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+    20, 21, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+    49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
 };
 
-static const uint8_t dnxhd_1258_dc_codes[14] = {
-    0, 12, 13, 1, 2, 3, 4, 5, 14, 30, 62, 63, 0, 0,
+/* Used in CID 1250, 1251, 1252, 1258 */
+static const uint16_t dnxhd_1250_run_codes[62] = {
+       0,    4,    5,   12,   26,   27,   28,   58,
+     118,  119,  120,  242,  486,  487,  976,  977,
+     978,  979,  980,  981,  982,  983,  984,  985,
+     986,  987,  988,  989,  990,  991,  992,  993,
+     994,  995,  996,  997,  998,  999, 1000, 1001,
+    1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
+    1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017,
+    1018, 1019, 1020, 1021, 1022, 1023,
 };
 
-static const uint8_t dnxhd_1258_dc_bits[14] = {
-    3, 4, 4, 3, 3, 3, 3, 3, 4, 5, 6, 6, 0, 0,
+/* Used in CID 1250, 1251, 1252, 1258 */
+static const uint8_t dnxhd_1250_run_bits[62] = {
+     1,  3,  3,  4,  5,  5,  5,  6,  7,  7,  7,  8,  9,  9, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+};
+
+/* Used in CID 1250, 1251, 1252, 1258 */
+static const uint8_t dnxhd_1250_run[62] = {
+     1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+    49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
 };
 
 const CIDEntry ff_dnxhd_cid_table[] = {
-    { 1235, 1920, 1080, 0, 917504, 917504, 6, 10, 4,
+    { 1235, 1920, 1080, 917504, 917504,
+      0, 6, 10, 4,
       dnxhd_1235_luma_weight, dnxhd_1235_chroma_weight,
-      dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits,
-      dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level,
-      dnxhd_1235_1241_ac_flags,
-      dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run,
+      dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
+      dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
+      dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
       { 175, 185, 365, 440 },
       { { 24000, 1001 }, { 25, 1 }, { 50, 1 }, { 60000, 1001 } } },
-    { 1237, 1920, 1080, 0, 606208, 606208, 4, 8, 3,
+    { 1237, 1920, 1080, 606208, 606208,
+      0, 4, 8, 3,
       dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
-      dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_level,
-      dnxhd_1237_ac_flags,
+      dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
       { 115, 120, 145, 240, 290 },
       { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
-    { 1238, 1920, 1080, 0, 917504, 917504, 4, 8, 4,
+    { 1238, 1920, 1080, 917504, 917504,
+      0, 4, 8, 4,
       dnxhd_1238_luma_weight, dnxhd_1238_chroma_weight,
-      dnxhd_1238_dc_codes, dnxhd_1238_dc_bits,
-      dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_level,
-      dnxhd_1238_ac_flags,
-      dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run,
+      dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
+      dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_info,
+      dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1238_run,
       { 175, 185, 220, 365, 440 },
       { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
-    { 1241, 1920, 1080, 1, 917504, 458752, 6, 10, 4,
+    { 1241, 1920, 1080, 917504, 458752,
+      DNXHD_INTERLACED, 6, 10, 4,
       dnxhd_1241_luma_weight, dnxhd_1241_chroma_weight,
-      dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits,
-      dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level,
-      dnxhd_1235_1241_ac_flags,
-      dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run,
+      dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
+      dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
+      dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
       { 185, 220 },
       { { 25, 1 }, { 30000, 1001 } } },
-    { 1242, 1920, 1080, 1, 606208, 303104, 4, 8, 3,
+    { 1242, 1920, 1080, 606208, 303104,
+      DNXHD_INTERLACED, 4, 8, 3,
       dnxhd_1242_luma_weight, dnxhd_1242_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
-      dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_level,
-      dnxhd_1237_ac_flags,
+      dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
       { 120, 145 },
       { { 25, 1 }, { 30000, 1001 } } },
-    { 1243, 1920, 1080, 1, 917504, 458752, 4, 8, 4,
+    { 1243, 1920, 1080, 917504, 458752,
+      DNXHD_INTERLACED, 4, 8, 4,
       dnxhd_1243_luma_weight, dnxhd_1243_chroma_weight,
-      dnxhd_1238_dc_codes, dnxhd_1238_dc_bits,
-      dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_level,
-      dnxhd_1238_ac_flags,
-      dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run,
+      dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
+      dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_info,
+      dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1238_run,
       { 185, 220 },
       { { 25, 1 }, { 30000, 1001 } } },
-    { 1250, 1280,  720, 0, 458752, 458752, 6, 10, 4,
+    { 1250, 1280,  720, 458752, 458752,
+      0, 6, 10, 4,
       dnxhd_1250_luma_weight, dnxhd_1250_chroma_weight,
-      dnxhd_1250_dc_codes, dnxhd_1250_dc_bits,
-      dnxhd_1250_ac_codes, dnxhd_1250_ac_bits, dnxhd_1250_ac_level,
-      dnxhd_1250_ac_flags,
+      dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
+      dnxhd_1250_ac_codes, dnxhd_1250_ac_bits, dnxhd_1250_ac_info,
       dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
       { 90, 90, 180, 220 },
       { { 24000, 1001 }, { 25, 1 }, { 50, 1 }, { 60000, 1001 } } },
-    { 1251, 1280,  720, 0, 458752, 458752, 4, 8, 4,
+    { 1251, 1280,  720, 458752, 458752,
+      0, 4, 8, 4,
       dnxhd_1251_luma_weight, dnxhd_1251_chroma_weight,
-      dnxhd_1251_dc_codes, dnxhd_1251_dc_bits,
-      dnxhd_1251_ac_codes, dnxhd_1251_ac_bits, dnxhd_1251_ac_level,
-      dnxhd_1251_ac_flags,
-      dnxhd_1251_run_codes, dnxhd_1251_run_bits, dnxhd_1251_run,
+      dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
+      dnxhd_1251_ac_codes, dnxhd_1251_ac_bits, dnxhd_1251_ac_info,
+      dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
       { 90, 90, 110, 180, 220 },
       { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
-    { 1252, 1280,  720, 0, 303104, 303104, 4, 8, 5,
+    { 1252, 1280,  720, 303104, 303104,
+      0, 4, 8, 5,
       dnxhd_1252_luma_weight, dnxhd_1252_chroma_weight,
-      dnxhd_1252_dc_codes, dnxhd_1252_dc_bits,
-      dnxhd_1252_ac_codes, dnxhd_1252_ac_bits, dnxhd_1252_ac_level,
-      dnxhd_1252_ac_flags,
-      dnxhd_1251_run_codes, dnxhd_1251_run_bits, dnxhd_1251_run,
+      dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
+      dnxhd_1252_ac_codes, dnxhd_1252_ac_bits, dnxhd_1252_ac_info,
+      dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
       { 60, 60, 75, 120, 145 },
       { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
-    { 1253, 1920, 1080, 0, 188416, 188416, 4, 8, 3,
+    { 1253, 1920, 1080, 188416, 188416,
+      0, 4, 8, 3,
       dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
-      dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_level,
-      dnxhd_1237_ac_flags,
+      dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
       { 36, 36, 45, 75, 90 },
       { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
-    { 1256, 1920, 1080, 0, 1835008, 1835008, 6, 10, 4,
-      dnxhd_1235_luma_weight, dnxhd_1256_chroma_weight,
-      dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits,
-      dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level,
-      dnxhd_1235_1241_ac_flags,
-      dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run,
+    { 1256, 1920, 1080, 1835008, 1835008,
+      DNXHD_444, 6, 10, 4,
+      dnxhd_1235_luma_weight, dnxhd_1235_luma_weight,
+      dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
+      dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
+      dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
       { 350, 390, 440, 730, 880 },
       { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
-    { 1258, 960, 720, 0, 212992, 212992, 4, 8, 5,
-      dnxhd_1258_luma_weight, dnxhd_1258_chroma_weight,
-      dnxhd_1258_dc_codes, dnxhd_1258_dc_bits,
-      dnxhd_1252_ac_codes, dnxhd_1252_ac_bits, dnxhd_1252_ac_level,
-      dnxhd_1252_ac_flags,
-      dnxhd_1251_run_codes, dnxhd_1251_run_bits, dnxhd_1251_run,
+    { 1258, 960, 720, 212992, 212992,
+      0, 4, 8, 5,
+      dnxhd_1252_luma_weight, dnxhd_1252_chroma_weight,
+      dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
+      dnxhd_1252_ac_codes, dnxhd_1252_ac_bits, dnxhd_1252_ac_info,
+      dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
       { 42, 60, 75, 115 } },
-
+    { 1259, 1440, 1080, 417792, 417792,
+      0, 4, 8, 3,
+      dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
+      dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
+      dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
+      dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
+      { 63, 84, 100, 110 } },
+    { 1260, 1440, 1080, 835584, 417792,
+      DNXHD_INTERLACED | DNXHD_MBAFF, 4, 8, 3,
+      dnxhd_1260_luma_weight, dnxhd_1260_chroma_weight,
+      dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
+      dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
+      dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
+      { 80, 90, 100, 110 } },
+    { 1270, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE,
+      DNXHD_444, 6, DNXHD_VARIABLE, 4,
+      dnxhd_1235_luma_weight, dnxhd_1235_luma_weight,
+      dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
+      dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
+      dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
+      { 0 } },
+    { 1271, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE,
+      0, 6, DNXHD_VARIABLE, 4,
+      dnxhd_1241_luma_weight, dnxhd_1241_chroma_weight,
+      dnxhd_1235_dc_codes, dnxhd_1235_dc_bits,
+      dnxhd_1235_ac_codes, dnxhd_1235_ac_bits, dnxhd_1235_ac_info,
+      dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1235_run,
+      { 0 } },
+    { 1272, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE,
+      0, 4, 8, 4,
+      dnxhd_1238_luma_weight, dnxhd_1238_chroma_weight,
+      dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
+      dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_info,
+      dnxhd_1235_run_codes, dnxhd_1235_run_bits, dnxhd_1238_run,
+      { 0 } },
+    { 1273, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE,
+      0, 4, 8, 3,
+      dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
+      dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
+      dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
+      dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
+      { 0 } },
+    { 1274, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE, DNXHD_VARIABLE,
+      0, 4, 8, 3,
+      dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
+      dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
+      dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_info,
+      dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
+      { 0 } },
 };
 
 int ff_dnxhd_get_cid_table(int cid)
@@ -1107,7 +1100,14 @@ int avpriv_dnxhd_get_interlaced(int cid)
     int i = ff_dnxhd_get_cid_table(cid);
     if (i < 0)
         return i;
-    return ff_dnxhd_cid_table[i].interlaced;
+    return ff_dnxhd_cid_table[i].flags & DNXHD_INTERLACED ? 1 : 0;
+}
+
+uint64_t avpriv_dnxhd_parse_header_prefix(const uint8_t *buf)
+{
+    uint64_t prefix = AV_RB32(buf);
+    prefix = (prefix << 16) | buf[4] << 8;
+    return ff_dnxhd_check_header_prefix(prefix);
 }
 
 int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth)
@@ -1118,9 +1118,15 @@ int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth)
         return 0;
     for (i = 0; i < FF_ARRAY_ELEMS(ff_dnxhd_cid_table); i++) {
         const CIDEntry *cid = &ff_dnxhd_cid_table[i];
+        int interlaced = cid->flags & DNXHD_INTERLACED ? 1 : 0;
         if (cid->width == avctx->width && cid->height == avctx->height &&
-            cid->interlaced == !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT) &&
-            cid->bit_depth == bit_depth) {
+            interlaced == !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) &&
+            !(cid->flags & DNXHD_444) && cid->bit_depth == bit_depth) {
+            if (avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL &&
+                cid->flags & DNXHD_MBAFF) {
+                av_log(avctx, AV_LOG_WARNING, "Profile selected is experimental\n");
+                continue;
+            }
             for (j = 0; j < FF_ARRAY_ELEMS(cid->bit_rates); j++) {
                 if (cid->bit_rates[j] == mbs)
                     return cid->cid;
@@ -1140,7 +1146,7 @@ void ff_dnxhd_print_profiles(AVCodecContext *avctx, int loglevel)
                 break;
 
             av_log(avctx, loglevel, "Frame size: %dx%d%c; bitrate: %dMbps; pixel format: %s; framerate: %d/%d\n",
-                   cid->width, cid->height, cid->interlaced ? 'i' : 'p', cid->bit_rates[j],
+                   cid->width, cid->height, cid->flags & DNXHD_INTERLACED ? 'i' : 'p', cid->bit_rates[j],
                    cid->bit_depth == 10 ? "yuv422p10" : "yuv422p", cid->frame_rates[j].num, cid->frame_rates[j].den);
         }
     }
diff --git a/libavcodec/dnxhddata.h b/libavcodec/dnxhddata.h
index 8cc27e88..3ae4683a 100644
--- a/libavcodec/dnxhddata.h
+++ b/libavcodec/dnxhddata.h
@@ -26,20 +26,33 @@
 #include "avcodec.h"
 #include "libavutil/internal.h"
 
+/** Additional profile info flags */
+#define DNXHD_INTERLACED   (1<<0)
+#define DNXHD_MBAFF        (1<<1)
+#define DNXHD_444          (1<<2)
+
+/** Frame headers, extra 0x00 added to end for parser */
+#define DNXHD_HEADER_INITIAL 0x000002800100
+#define DNXHD_HEADER_444     0x000002800200
+#define DNXHD_HEADER_HR1     0x000002800300
+#define DNXHD_HEADER_HR2     0x0000038C0300
+
+/** Indicate that a CIDEntry value must be read in the bitstream */
+#define DNXHD_VARIABLE 0
+
 typedef struct CIDEntry {
     int cid;
     unsigned int width, height;
-    int interlaced;
     unsigned int frame_size;
     unsigned int coding_unit_size;
+    uint16_t flags;
     int index_bits;
     int bit_depth;
     int eob_index;
     const uint8_t *luma_weight, *chroma_weight;
     const uint8_t *dc_codes, *dc_bits;
     const uint16_t *ac_codes;
-    const uint8_t *ac_bits, *ac_level;
-    const uint8_t *ac_flags;
+    const uint8_t *ac_bits, *ac_info;
     const uint16_t *run_codes;
     const uint8_t *run_bits, *run;
     int bit_rates[5]; ///< Helper to choose variants, rounded to nearest 5Mb/s
@@ -52,7 +65,17 @@ int ff_dnxhd_get_cid_table(int cid);
 int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth);
 void ff_dnxhd_print_profiles(AVCodecContext *avctx, int loglevel);
 
+static av_always_inline uint64_t ff_dnxhd_check_header_prefix(uint64_t prefix)
+{
+    if (prefix == DNXHD_HEADER_INITIAL ||
+        prefix == DNXHD_HEADER_444     ||
+        prefix == DNXHD_HEADER_HR1     ||
+        prefix == DNXHD_HEADER_HR2)
+        return prefix;
+    return 0;
+}
+
 int avpriv_dnxhd_get_frame_size(int cid);
 int avpriv_dnxhd_get_interlaced(int cid);
-
+uint64_t avpriv_dnxhd_parse_header_prefix(const uint8_t *buf);
 #endif /* AVCODEC_DNXHDDATA_H */
diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index a1376d30..18080803 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -2,8 +2,10 @@
  * VC3/DNxHD decoder.
  * Copyright (c) 2007 SmartJog S.A., Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
  * Copyright (c) 2011 MirriAd Ltd
+ * Copyright (c) 2015 Christophe Gisquet
  *
  * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
+ * Slice multithreading and MB interlaced support added by Christophe Gisquet
  *
  * This file is part of FFmpeg.
  *
@@ -26,46 +28,63 @@
 #include "libavutil/timer.h"
 #include "avcodec.h"
 #include "blockdsp.h"
+#define  UNCHECKED_BITSTREAM_READER 1
 #include "get_bits.h"
 #include "dnxhddata.h"
 #include "idctdsp.h"
 #include "internal.h"
 #include "thread.h"
 
+typedef struct RowContext {
+    DECLARE_ALIGNED(16, int16_t, blocks)[12][64];
+    int luma_scale[64];
+    int chroma_scale[64];
+    GetBitContext gb;
+    int last_dc[3];
+    int last_qscale;
+    int errors;
+    /** -1:not set yet  0:off=RGB  1:on=YUV  2:variable */
+    int format;
+} RowContext;
+
 typedef struct DNXHDContext {
     AVCodecContext *avctx;
-    GetBitContext gb;
+    RowContext *rows;
     BlockDSPContext bdsp;
+    const uint8_t* buf;
+    int buf_size;
     int64_t cid;                        ///< compression id
     unsigned int width, height;
     enum AVPixelFormat pix_fmt;
     unsigned int mb_width, mb_height;
-    uint32_t mb_scan_index[68];         /* max for 1080p */
+    uint32_t mb_scan_index[256];
+    int data_offset;                    // End of mb_scan_index, where macroblocks start
     int cur_field;                      ///< current interlaced field
     VLC ac_vlc, dc_vlc, run_vlc;
-    int last_dc[3];
     IDCTDSPContext idsp;
-    DECLARE_ALIGNED(16, int16_t, blocks)[12][64];
     ScanTable scantable;
     const CIDEntry *cid_table;
-    int bit_depth; // 8, 10 or 0 if not initialized at all.
+    int bit_depth; // 8, 10, 12 or 0 if not initialized at all.
     int is_444;
-    void (*decode_dct_block)(struct DNXHDContext *ctx, int16_t *block,
-                             int n, int qscale);
-    int last_qscale;
-    int luma_scale[64];
-    int chroma_scale[64];
+    int mbaff;
+    int act;
+    int (*decode_dct_block)(const struct DNXHDContext *ctx,
+                            RowContext *row, int n);
 } DNXHDContext;
 
 #define DNXHD_VLC_BITS 9
 #define DNXHD_DC_VLC_BITS 7
 
-static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, int16_t *block,
-                                     int n, int qscale);
-static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, int16_t *block,
-                                      int n, int qscale);
-static void dnxhd_decode_dct_block_10_444(DNXHDContext *ctx, int16_t *block,
-                                          int n, int qscale);
+static int dnxhd_decode_dct_block_8(const DNXHDContext *ctx,
+                                    RowContext *row, int n);
+static int dnxhd_decode_dct_block_10(const DNXHDContext *ctx,
+                                     RowContext *row, int n);
+static int dnxhd_decode_dct_block_10_444(const DNXHDContext *ctx,
+                                         RowContext *row, int n);
+static int dnxhd_decode_dct_block_12(const DNXHDContext *ctx,
+                                     RowContext *row, int n);
+static int dnxhd_decode_dct_block_12_444(const DNXHDContext *ctx,
+                                         RowContext *row, int n);
 
 static av_cold int dnxhd_decode_init(AVCodecContext *avctx)
 {
@@ -73,10 +92,19 @@ static av_cold int dnxhd_decode_init(AVCodecContext *avctx)
 
     ctx->avctx = avctx;
     ctx->cid = -1;
+    avctx->colorspace = AVCOL_SPC_BT709;
+
+    avctx->coded_width  = FFALIGN(avctx->width,  16);
+    avctx->coded_height = FFALIGN(avctx->height, 16);
+
+    ctx->rows = av_mallocz_array(avctx->thread_count, sizeof(RowContext));
+    if (!ctx->rows)
+        return AVERROR(ENOMEM);
+
     return 0;
 }
 
-static int dnxhd_init_vlc(DNXHDContext *ctx, uint32_t cid)
+static int dnxhd_init_vlc(DNXHDContext *ctx, uint32_t cid, int bitdepth)
 {
     if (cid != ctx->cid) {
         int index;
@@ -85,10 +113,16 @@ static int dnxhd_init_vlc(DNXHDContext *ctx, uint32_t cid)
             av_log(ctx->avctx, AV_LOG_ERROR, "unsupported cid %d\n", cid);
             return AVERROR(ENOSYS);
         }
-        if (ff_dnxhd_cid_table[index].bit_depth != ctx->bit_depth) {
-            av_log(ctx->avctx, AV_LOG_ERROR, "bit depth mismatches %d %d\n", ff_dnxhd_cid_table[index].bit_depth, ctx->bit_depth);
+        if (ff_dnxhd_cid_table[index].bit_depth != bitdepth &&
+            ff_dnxhd_cid_table[index].bit_depth != DNXHD_VARIABLE) {
+            av_log(ctx->avctx, AV_LOG_ERROR, "bit depth mismatches %d %d\n", ff_dnxhd_cid_table[index].bit_depth, bitdepth);
             return AVERROR_INVALIDDATA;
         }
+        if (bitdepth > 10) {
+            avpriv_request_sample(ctx->avctx, "DNXHR 12-bit");
+            if (ctx->avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL)
+                return AVERROR_PATCHWELCOME;
+        }
         ctx->cid_table = &ff_dnxhd_cid_table[index];
         av_log(ctx->avctx, AV_LOG_VERBOSE, "Profile cid %d.\n", cid);
 
@@ -99,36 +133,47 @@ static int dnxhd_init_vlc(DNXHDContext *ctx, uint32_t cid)
         init_vlc(&ctx->ac_vlc, DNXHD_VLC_BITS, 257,
                  ctx->cid_table->ac_bits, 1, 1,
                  ctx->cid_table->ac_codes, 2, 2, 0);
-        init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, ctx->bit_depth + 4,
+        init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, bitdepth + 4,
                  ctx->cid_table->dc_bits, 1, 1,
                  ctx->cid_table->dc_codes, 1, 1, 0);
         init_vlc(&ctx->run_vlc, DNXHD_VLC_BITS, 62,
                  ctx->cid_table->run_bits, 1, 1,
                  ctx->cid_table->run_codes, 2, 2, 0);
 
-        ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable,
-                          ff_zigzag_direct);
         ctx->cid = cid;
     }
     return 0;
 }
 
+static av_cold int dnxhd_decode_init_thread_copy(AVCodecContext *avctx)
+{
+    DNXHDContext *ctx = avctx->priv_data;
+
+    // make sure VLC tables will be loaded when cid is parsed
+    ctx->cid = -1;
+
+    ctx->rows = av_mallocz_array(avctx->thread_count, sizeof(RowContext));
+    if (!ctx->rows)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
 static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
                                const uint8_t *buf, int buf_size,
                                int first_field)
 {
-    static const uint8_t header_prefix[]    = { 0x00, 0x00, 0x02, 0x80, 0x01 };
-    static const uint8_t header_prefix444[] = { 0x00, 0x00, 0x02, 0x80, 0x02 };
     int i, cid, ret;
-    int old_bit_depth = ctx->bit_depth;
-
+    int old_bit_depth = ctx->bit_depth, bitdepth;
+    uint64_t header_prefix;
     if (buf_size < 0x280) {
-        av_log(ctx->avctx, AV_LOG_ERROR, "buffer too small (%d < 640).\n",
-               buf_size);
+        av_log(ctx->avctx, AV_LOG_ERROR,
+               "buffer too small (%d < 640).\n", buf_size);
         return AVERROR_INVALIDDATA;
     }
 
-    if (memcmp(buf, header_prefix, 5) && memcmp(buf, header_prefix444, 5)) {
+    header_prefix = avpriv_dnxhd_parse_header_prefix(buf);
+    if (header_prefix == 0) {
         av_log(ctx->avctx, AV_LOG_ERROR,
                "unknown header 0x%02X 0x%02X 0x%02X 0x%02X 0x%02X\n",
                buf[0], buf[1], buf[2], buf[3], buf[4]);
@@ -143,49 +188,70 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
     } else {
         ctx->cur_field = 0;
     }
+    ctx->mbaff = (buf[0x6] >> 5) & 1;
 
     ctx->height = AV_RB16(buf + 0x18);
     ctx->width  = AV_RB16(buf + 0x1a);
 
-    ff_dlog(ctx->avctx, "width %d, height %d\n", ctx->width, ctx->height);
+    switch(buf[0x21] >> 5) {
+    case 1: bitdepth = 8; break;
+    case 2: bitdepth = 10; break;
+    case 3: bitdepth = 12; break;
+    default:
+        av_log(ctx->avctx, AV_LOG_ERROR,
+               "Unknown bitdepth indicator (%d)\n", buf[0x21] >> 5);
+        return AVERROR_INVALIDDATA;
+    }
+
+    cid = AV_RB32(buf + 0x28);
+    if ((ret = dnxhd_init_vlc(ctx, cid, bitdepth)) < 0)
+        return ret;
+    if (ctx->mbaff && ctx->cid_table->cid != 1260)
+        av_log(ctx->avctx, AV_LOG_WARNING,
+               "Adaptive MB interlace flag in an unsupported profile.\n");
 
-    if (buf[0x21] == 0x58) { /* 10 bit */
-        ctx->bit_depth = ctx->avctx->bits_per_raw_sample = 10;
+    ctx->act = buf[0x2C] & 7;
+    if (ctx->act && ctx->cid_table->cid != 1256 && ctx->cid_table->cid != 1270)
+        av_log(ctx->avctx, AV_LOG_WARNING,
+               "Adaptive color transform in an unsupported profile.\n");
 
-        if (buf[0x4] == 0x2) {
+    ctx->is_444 = (buf[0x2C] >> 6) & 1;
+    if (ctx->is_444) {
+        if (bitdepth == 8) {
+            avpriv_request_sample(ctx->avctx, "4:4:4 8 bits\n");
+            return AVERROR_INVALIDDATA;
+        } else if (bitdepth == 10) {
             ctx->decode_dct_block = dnxhd_decode_dct_block_10_444;
-            ctx->pix_fmt = AV_PIX_FMT_YUV444P10;
-            ctx->is_444 = 1;
+            ctx->pix_fmt = ctx->act ? AV_PIX_FMT_YUV444P10
+                                    : AV_PIX_FMT_GBRP10;
         } else {
-            ctx->decode_dct_block = dnxhd_decode_dct_block_10;
-            ctx->pix_fmt = AV_PIX_FMT_YUV422P10;
-            ctx->is_444 = 0;
+            ctx->decode_dct_block = dnxhd_decode_dct_block_12_444;
+            ctx->pix_fmt = ctx->act ? AV_PIX_FMT_YUV444P12
+                                    : AV_PIX_FMT_GBRP12;
         }
-    } else if (buf[0x21] == 0x38) { /* 8 bit */
-        ctx->bit_depth = ctx->avctx->bits_per_raw_sample = 8;
-
-        ctx->pix_fmt = AV_PIX_FMT_YUV422P;
-        ctx->is_444 = 0;
-        ctx->decode_dct_block = dnxhd_decode_dct_block_8;
+    } else if (bitdepth == 12) {
+        ctx->decode_dct_block = dnxhd_decode_dct_block_12;
+        ctx->pix_fmt = AV_PIX_FMT_YUV422P12;
+    } else if (bitdepth == 10) {
+        ctx->decode_dct_block = dnxhd_decode_dct_block_10;
+        ctx->pix_fmt = AV_PIX_FMT_YUV422P10;
     } else {
-        av_log(ctx->avctx, AV_LOG_ERROR, "invalid bit depth value (%d).\n",
-               buf[0x21]);
-        return AVERROR_INVALIDDATA;
+        ctx->decode_dct_block = dnxhd_decode_dct_block_8;
+        ctx->pix_fmt = AV_PIX_FMT_YUV422P;
     }
+
+    ctx->avctx->bits_per_raw_sample = ctx->bit_depth = bitdepth;
     if (ctx->bit_depth != old_bit_depth) {
         ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
         ff_idctdsp_init(&ctx->idsp, ctx->avctx);
+        ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable,
+                          ff_zigzag_direct);
     }
 
-    cid = AV_RB32(buf + 0x28);
-    ff_dlog(ctx->avctx, "compression id %d\n", cid);
-
-    if ((ret = dnxhd_init_vlc(ctx, cid)) < 0)
-        return ret;
-
     // make sure profile size constraints are respected
     // DNx100 allows 1920->1440 and 1280->960 subsampling
-    if (ctx->width != ctx->cid_table->width) {
+    if (ctx->width != ctx->cid_table->width &&
+        ctx->cid_table->width != DNXHD_VARIABLE) {
         av_reduce(&ctx->avctx->sample_aspect_ratio.num,
                   &ctx->avctx->sample_aspect_ratio.den,
                   ctx->width, ctx->cid_table->width, 255);
@@ -198,29 +264,44 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
         return AVERROR_INVALIDDATA;
     }
 
-    ctx->mb_width  = ctx->width >> 4;
+    ctx->mb_width  = (ctx->width + 15)>> 4;
     ctx->mb_height = buf[0x16d];
 
-    ff_dlog(ctx->avctx,
-            "mb width %d, mb height %d\n", ctx->mb_width, ctx->mb_height);
-
     if ((ctx->height + 15) >> 4 == ctx->mb_height && frame->interlaced_frame)
         ctx->height <<= 1;
 
-    if (ctx->mb_height > 68 ||
-        (ctx->mb_height << frame->interlaced_frame) > (ctx->height + 15) >> 4) {
+    av_log(ctx->avctx, AV_LOG_VERBOSE, "%dx%d, 4:%s %d bits, MBAFF=%d ACT=%d\n",
+           ctx->width, ctx->height, ctx->is_444 ? "4:4" : "2:2",
+           ctx->bit_depth, ctx->mbaff, ctx->act);
+
+    // Newer format supports variable mb_scan_index sizes
+    if (header_prefix == DNXHD_HEADER_HR2) {
+        ctx->data_offset = 0x170 + (ctx->mb_height << 2);
+    } else {
+        if (ctx->mb_height > 68 ||
+            (ctx->mb_height << frame->interlaced_frame) > (ctx->height + 15) >> 4) {
+            av_log(ctx->avctx, AV_LOG_ERROR,
+                   "mb height too big: %d\n", ctx->mb_height);
+            return AVERROR_INVALIDDATA;
+        }
+        ctx->data_offset = 0x280;
+    }
+
+    if (buf_size < ctx->data_offset) {
         av_log(ctx->avctx, AV_LOG_ERROR,
-               "mb height too big: %d\n", ctx->mb_height);
+               "buffer too small (%d < %d).\n", buf_size, ctx->data_offset);
         return AVERROR_INVALIDDATA;
     }
 
+    av_assert0((unsigned)ctx->mb_height <= FF_ARRAY_ELEMS(ctx->mb_scan_index));
+
     for (i = 0; i < ctx->mb_height; i++) {
         ctx->mb_scan_index[i] = AV_RB32(buf + 0x170 + (i << 2));
-        ff_dlog(ctx->avctx, "mb scan index %d\n", ctx->mb_scan_index[i]);
-        if (buf_size < ctx->mb_scan_index[i] + 0x280LL) {
+        ff_dlog(ctx->avctx, "mb scan index %d, pos %d: %u\n", i, 0x170 + (i << 2), ctx->mb_scan_index[i]);
+        if (buf_size - ctx->data_offset < ctx->mb_scan_index[i]) {
             av_log(ctx->avctx, AV_LOG_ERROR,
-                   "invalid mb scan index (%d < %d).\n",
-                   buf_size, ctx->mb_scan_index[i] + 0x280);
+                   "invalid mb scan index (%u vs %u).\n",
+                   ctx->mb_scan_index[i], buf_size - ctx->data_offset);
             return AVERROR_INVALIDDATA;
         }
     }
@@ -228,148 +309,181 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
     return 0;
 }
 
-static av_always_inline void dnxhd_decode_dct_block(DNXHDContext *ctx,
-                                                    int16_t *block, int n,
-                                                    int qscale,
-                                                    int index_bits,
-                                                    int level_bias,
-                                                    int level_shift)
+static av_always_inline int dnxhd_decode_dct_block(const DNXHDContext *ctx,
+                                                   RowContext *row,
+                                                   int n,
+                                                   int index_bits,
+                                                   int level_bias,
+                                                   int level_shift,
+                                                   int dc_shift)
 {
     int i, j, index1, index2, len, flags;
     int level, component, sign;
     const int *scale;
     const uint8_t *weight_matrix;
-    const uint8_t *ac_level = ctx->cid_table->ac_level;
-    const uint8_t *ac_flags = ctx->cid_table->ac_flags;
+    const uint8_t *ac_info = ctx->cid_table->ac_info;
+    int16_t *block = row->blocks[n];
     const int eob_index     = ctx->cid_table->eob_index;
-    OPEN_READER(bs, &ctx->gb);
+    int ret = 0;
+    OPEN_READER(bs, &row->gb);
+
+    ctx->bdsp.clear_block(block);
 
     if (!ctx->is_444) {
         if (n & 2) {
             component     = 1 + (n & 1);
-            scale = ctx->chroma_scale;
+            scale = row->chroma_scale;
             weight_matrix = ctx->cid_table->chroma_weight;
         } else {
             component     = 0;
-            scale = ctx->luma_scale;
+            scale = row->luma_scale;
             weight_matrix = ctx->cid_table->luma_weight;
         }
     } else {
         component = (n >> 1) % 3;
         if (component) {
-            scale = ctx->chroma_scale;
+            scale = row->chroma_scale;
             weight_matrix = ctx->cid_table->chroma_weight;
         } else {
-            scale = ctx->luma_scale;
+            scale = row->luma_scale;
             weight_matrix = ctx->cid_table->luma_weight;
         }
     }
 
-    UPDATE_CACHE(bs, &ctx->gb);
-    GET_VLC(len, bs, &ctx->gb, ctx->dc_vlc.table, DNXHD_DC_VLC_BITS, 1);
+    UPDATE_CACHE(bs, &row->gb);
+    GET_VLC(len, bs, &row->gb, ctx->dc_vlc.table, DNXHD_DC_VLC_BITS, 1);
     if (len) {
-        level = GET_CACHE(bs, &ctx->gb);
-        LAST_SKIP_BITS(bs, &ctx->gb, len);
+        level = GET_CACHE(bs, &row->gb);
+        LAST_SKIP_BITS(bs, &row->gb, len);
         sign  = ~level >> 31;
         level = (NEG_USR32(sign ^ level, len) ^ sign) - sign;
-        ctx->last_dc[component] += level;
+        row->last_dc[component] += level * (1 << dc_shift);
     }
-    block[0] = ctx->last_dc[component];
+    block[0] = row->last_dc[component];
 
     i = 0;
 
-    UPDATE_CACHE(bs, &ctx->gb);
-    GET_VLC(index1, bs, &ctx->gb, ctx->ac_vlc.table,
+    UPDATE_CACHE(bs, &row->gb);
+    GET_VLC(index1, bs, &row->gb, ctx->ac_vlc.table,
             DNXHD_VLC_BITS, 2);
 
     while (index1 != eob_index) {
-        level = ac_level[index1];
-        flags = ac_flags[index1];
+        level = ac_info[2*index1+0];
+        flags = ac_info[2*index1+1];
 
-        sign = SHOW_SBITS(bs, &ctx->gb, 1);
-        SKIP_BITS(bs, &ctx->gb, 1);
+        sign = SHOW_SBITS(bs, &row->gb, 1);
+        SKIP_BITS(bs, &row->gb, 1);
 
         if (flags & 1) {
-            level += SHOW_UBITS(bs, &ctx->gb, index_bits) << 7;
-            SKIP_BITS(bs, &ctx->gb, index_bits);
+            level += SHOW_UBITS(bs, &row->gb, index_bits) << 7;
+            SKIP_BITS(bs, &row->gb, index_bits);
         }
 
         if (flags & 2) {
-            UPDATE_CACHE(bs, &ctx->gb);
-            GET_VLC(index2, bs, &ctx->gb, ctx->run_vlc.table,
+            UPDATE_CACHE(bs, &row->gb);
+            GET_VLC(index2, bs, &row->gb, ctx->run_vlc.table,
                     DNXHD_VLC_BITS, 2);
             i += ctx->cid_table->run[index2];
         }
 
         if (++i > 63) {
             av_log(ctx->avctx, AV_LOG_ERROR, "ac tex damaged %d, %d\n", n, i);
+            ret = -1;
             break;
         }
 
         j     = ctx->scantable.permutated[i];
         level *= scale[i];
+        level += scale[i] >> 1;
         if (level_bias < 32 || weight_matrix[i] != level_bias)
-            level += level_bias;
+            level += level_bias; // 1<<(level_shift-1)
         level >>= level_shift;
 
         block[j] = (level ^ sign) - sign;
 
-        UPDATE_CACHE(bs, &ctx->gb);
-        GET_VLC(index1, bs, &ctx->gb, ctx->ac_vlc.table,
+        UPDATE_CACHE(bs, &row->gb);
+        GET_VLC(index1, bs, &row->gb, ctx->ac_vlc.table,
                 DNXHD_VLC_BITS, 2);
     }
 
-    CLOSE_READER(bs, &ctx->gb);
+    CLOSE_READER(bs, &row->gb);
+    return ret;
+}
+
+static int dnxhd_decode_dct_block_8(const DNXHDContext *ctx,
+                                    RowContext *row, int n)
+{
+    return dnxhd_decode_dct_block(ctx, row, n, 4, 32, 6, 0);
+}
+
+static int dnxhd_decode_dct_block_10(const DNXHDContext *ctx,
+                                     RowContext *row, int n)
+{
+    return dnxhd_decode_dct_block(ctx, row, n, 6, 8, 4, 0);
 }
 
-static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, int16_t *block,
-                                     int n, int qscale)
+static int dnxhd_decode_dct_block_10_444(const DNXHDContext *ctx,
+                                         RowContext *row, int n)
 {
-    dnxhd_decode_dct_block(ctx, block, n, qscale, 4, 32, 6);
+    return dnxhd_decode_dct_block(ctx, row, n, 6, 32, 6, 0);
 }
 
-static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, int16_t *block,
-                                      int n, int qscale)
+static int dnxhd_decode_dct_block_12(const DNXHDContext *ctx,
+                                     RowContext *row, int n)
 {
-    dnxhd_decode_dct_block(ctx, block, n, qscale, 6, 8, 4);
+    return dnxhd_decode_dct_block(ctx, row, n, 6, 8, 4, 2);
 }
 
-static void dnxhd_decode_dct_block_10_444(DNXHDContext *ctx, int16_t *block,
-                                          int n, int qscale)
+static int dnxhd_decode_dct_block_12_444(const DNXHDContext *ctx,
+                                         RowContext *row, int n)
 {
-    dnxhd_decode_dct_block(ctx, block, n, qscale, 6, 32, 6);
+    return dnxhd_decode_dct_block(ctx, row, n, 6, 32, 4, 2);
 }
 
-static int dnxhd_decode_macroblock(DNXHDContext *ctx, AVFrame *frame,
-                                   int x, int y)
+static int dnxhd_decode_macroblock(const DNXHDContext *ctx, RowContext *row,
+                                   AVFrame *frame, int x, int y)
 {
-    int shift1 = ctx->bit_depth == 10;
+    int shift1 = ctx->bit_depth >= 10;
     int dct_linesize_luma   = frame->linesize[0];
     int dct_linesize_chroma = frame->linesize[1];
     uint8_t *dest_y, *dest_u, *dest_v;
     int dct_y_offset, dct_x_offset;
-    int qscale, i;
+    int qscale, i, act;
+    int interlaced_mb = 0;
 
-    qscale = get_bits(&ctx->gb, 11);
-    skip_bits1(&ctx->gb);
+    if (ctx->mbaff) {
+        interlaced_mb = get_bits1(&row->gb);
+        qscale = get_bits(&row->gb, 10);
+    } else {
+        qscale = get_bits(&row->gb, 11);
+    }
+    act = get_bits1(&row->gb);
+    if (act) {
+        if (!ctx->act) {
+            static int act_warned;
+            if (!act_warned) {
+                act_warned = 1;
+                av_log(ctx->avctx, AV_LOG_ERROR,
+                       "ACT flag set, in violation of frame header.\n");
+            }
+        } else if (row->format == -1) {
+            row->format = act;
+        } else if (row->format != act) {
+            row->format = 2; // Variable
+        }
+    }
 
-    if (qscale != ctx->last_qscale) {
+    if (qscale != row->last_qscale) {
         for (i = 0; i < 64; i++) {
-            ctx->luma_scale[i]   = qscale * ctx->cid_table->luma_weight[i];
-            ctx->chroma_scale[i] = qscale * ctx->cid_table->chroma_weight[i];
+            row->luma_scale[i]   = qscale * ctx->cid_table->luma_weight[i];
+            row->chroma_scale[i] = qscale * ctx->cid_table->chroma_weight[i];
         }
-        ctx->last_qscale = qscale;
+        row->last_qscale = qscale;
     }
 
-    for (i = 0; i < 8; i++) {
-        ctx->bdsp.clear_block(ctx->blocks[i]);
-        ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
-    }
-    if (ctx->is_444) {
-        for (; i < 12; i++) {
-            ctx->bdsp.clear_block(ctx->blocks[i]);
-            ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
-        }
+    for (i = 0; i < 8 + 4 * ctx->is_444; i++) {
+        if (ctx->decode_dct_block(ctx, row, i) < 0)
+            return AVERROR_INVALIDDATA;
     }
 
     if (frame->interlaced_frame) {
@@ -386,59 +500,70 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, AVFrame *frame,
         dest_u += frame->linesize[1];
         dest_v += frame->linesize[2];
     }
+    if (interlaced_mb) {
+        dct_linesize_luma   <<= 1;
+        dct_linesize_chroma <<= 1;
+    }
 
-    dct_y_offset = dct_linesize_luma << 3;
+    dct_y_offset = interlaced_mb ? frame->linesize[0] : (dct_linesize_luma << 3);
     dct_x_offset = 8 << shift1;
     if (!ctx->is_444) {
-        ctx->idsp.idct_put(dest_y,                               dct_linesize_luma, ctx->blocks[0]);
-        ctx->idsp.idct_put(dest_y + dct_x_offset,                dct_linesize_luma, ctx->blocks[1]);
-        ctx->idsp.idct_put(dest_y + dct_y_offset,                dct_linesize_luma, ctx->blocks[4]);
-        ctx->idsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[5]);
-
-        if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) {
-            dct_y_offset = dct_linesize_chroma << 3;
-            ctx->idsp.idct_put(dest_u,                dct_linesize_chroma, ctx->blocks[2]);
-            ctx->idsp.idct_put(dest_v,                dct_linesize_chroma, ctx->blocks[3]);
-            ctx->idsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[6]);
-            ctx->idsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[7]);
+        ctx->idsp.idct_put(dest_y,                               dct_linesize_luma, row->blocks[0]);
+        ctx->idsp.idct_put(dest_y + dct_x_offset,                dct_linesize_luma, row->blocks[1]);
+        ctx->idsp.idct_put(dest_y + dct_y_offset,                dct_linesize_luma, row->blocks[4]);
+        ctx->idsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, row->blocks[5]);
+
+        if (!(ctx->avctx->flags & AV_CODEC_FLAG_GRAY)) {
+            dct_y_offset = interlaced_mb ? frame->linesize[1] : (dct_linesize_chroma << 3);
+            ctx->idsp.idct_put(dest_u,                dct_linesize_chroma, row->blocks[2]);
+            ctx->idsp.idct_put(dest_v,                dct_linesize_chroma, row->blocks[3]);
+            ctx->idsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, row->blocks[6]);
+            ctx->idsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, row->blocks[7]);
         }
     } else {
-        ctx->idsp.idct_put(dest_y,                               dct_linesize_luma, ctx->blocks[0]);
-        ctx->idsp.idct_put(dest_y + dct_x_offset,                dct_linesize_luma, ctx->blocks[1]);
-        ctx->idsp.idct_put(dest_y + dct_y_offset,                dct_linesize_luma, ctx->blocks[6]);
-        ctx->idsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[7]);
-
-        if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) {
-            dct_y_offset = dct_linesize_chroma << 3;
-            ctx->idsp.idct_put(dest_u,                               dct_linesize_chroma, ctx->blocks[2]);
-            ctx->idsp.idct_put(dest_u + dct_x_offset,                dct_linesize_chroma, ctx->blocks[3]);
-            ctx->idsp.idct_put(dest_u + dct_y_offset,                dct_linesize_chroma, ctx->blocks[8]);
-            ctx->idsp.idct_put(dest_u + dct_y_offset + dct_x_offset, dct_linesize_chroma, ctx->blocks[9]);
-            ctx->idsp.idct_put(dest_v,                               dct_linesize_chroma, ctx->blocks[4]);
-            ctx->idsp.idct_put(dest_v + dct_x_offset,                dct_linesize_chroma, ctx->blocks[5]);
-            ctx->idsp.idct_put(dest_v + dct_y_offset,                dct_linesize_chroma, ctx->blocks[10]);
-            ctx->idsp.idct_put(dest_v + dct_y_offset + dct_x_offset, dct_linesize_chroma, ctx->blocks[11]);
+        ctx->idsp.idct_put(dest_y,                               dct_linesize_luma, row->blocks[0]);
+        ctx->idsp.idct_put(dest_y + dct_x_offset,                dct_linesize_luma, row->blocks[1]);
+        ctx->idsp.idct_put(dest_y + dct_y_offset,                dct_linesize_luma, row->blocks[6]);
+        ctx->idsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, row->blocks[7]);
+
+        if (!(ctx->avctx->flags & AV_CODEC_FLAG_GRAY)) {
+            dct_y_offset = interlaced_mb ? frame->linesize[1] : (dct_linesize_chroma << 3);
+            ctx->idsp.idct_put(dest_u,                               dct_linesize_chroma, row->blocks[2]);
+            ctx->idsp.idct_put(dest_u + dct_x_offset,                dct_linesize_chroma, row->blocks[3]);
+            ctx->idsp.idct_put(dest_u + dct_y_offset,                dct_linesize_chroma, row->blocks[8]);
+            ctx->idsp.idct_put(dest_u + dct_y_offset + dct_x_offset, dct_linesize_chroma, row->blocks[9]);
+            ctx->idsp.idct_put(dest_v,                               dct_linesize_chroma, row->blocks[4]);
+            ctx->idsp.idct_put(dest_v + dct_x_offset,                dct_linesize_chroma, row->blocks[5]);
+            ctx->idsp.idct_put(dest_v + dct_y_offset,                dct_linesize_chroma, row->blocks[10]);
+            ctx->idsp.idct_put(dest_v + dct_y_offset + dct_x_offset, dct_linesize_chroma, row->blocks[11]);
         }
     }
 
     return 0;
 }
 
-static int dnxhd_decode_macroblocks(DNXHDContext *ctx, AVFrame *frame,
-                                    const uint8_t *buf, int buf_size)
+static int dnxhd_decode_row(AVCodecContext *avctx, void *data,
+                            int rownb, int threadnb)
 {
-    int x, y;
-    for (y = 0; y < ctx->mb_height; y++) {
-        ctx->last_dc[0] =
-        ctx->last_dc[1] =
-        ctx->last_dc[2] = 1 << (ctx->bit_depth + 2); // for levels +2^(bitdepth-1)
-        init_get_bits(&ctx->gb, buf + ctx->mb_scan_index[y], (buf_size - ctx->mb_scan_index[y]) << 3);
-        for (x = 0; x < ctx->mb_width; x++) {
-            //START_TIMER;
-            dnxhd_decode_macroblock(ctx, frame, x, y);
-            //STOP_TIMER("decode macroblock");
+    const DNXHDContext *ctx = avctx->priv_data;
+    uint32_t offset = ctx->mb_scan_index[rownb];
+    RowContext *row = ctx->rows + threadnb;
+    int x;
+
+    row->last_dc[0] =
+    row->last_dc[1] =
+    row->last_dc[2] = 1 << (ctx->bit_depth + 2); // for levels +2^(bitdepth-1)
+    init_get_bits(&row->gb, ctx->buf + offset, (ctx->buf_size - offset) << 3);
+    for (x = 0; x < ctx->mb_width; x++) {
+        //START_TIMER;
+        int ret = dnxhd_decode_macroblock(ctx, row, data, x, rownb);
+        if (ret < 0) {
+            row->errors++;
+            return ret;
         }
+        //STOP_TIMER("decode macroblock");
     }
+
     return 0;
 }
 
@@ -451,10 +576,13 @@ static int dnxhd_decode_frame(AVCodecContext *avctx, void *data,
     ThreadFrame frame = { .f = data };
     AVFrame *picture = data;
     int first_field = 1;
-    int ret;
+    int ret, i;
 
     ff_dlog(avctx, "frame size %d\n", buf_size);
 
+    for (i = 0; i < avctx->thread_count; i++)
+        ctx->rows[i].format = -1;
+
 decode_coding_unit:
     if ((ret = dnxhd_decode_header(ctx, picture, buf, buf_size, first_field)) < 0)
         return ret;
@@ -483,7 +611,9 @@ static int dnxhd_decode_frame(AVCodecContext *avctx, void *data,
         picture->key_frame = 1;
     }
 
-    dnxhd_decode_macroblocks(ctx, picture, buf + 0x280, buf_size - 0x280);
+    ctx->buf_size = buf_size - ctx->data_offset;
+    ctx->buf = buf + ctx->data_offset;
+    avctx->execute2(avctx, dnxhd_decode_row, picture, NULL, ctx->mb_height);
 
     if (first_field && picture->interlaced_frame) {
         buf      += ctx->cid_table->coding_unit_size;
@@ -492,6 +622,47 @@ static int dnxhd_decode_frame(AVCodecContext *avctx, void *data,
         goto decode_coding_unit;
     }
 
+    ret = 0;
+    for (i = 0; i < avctx->thread_count; i++) {
+        ret += ctx->rows[i].errors;
+        ctx->rows[i].errors = 0;
+    }
+
+    if (ctx->act) {
+        static int act_warned;
+        int format = ctx->rows[0].format;
+        for (i = 1; i < avctx->thread_count; i++) {
+            if (ctx->rows[i].format != format &&
+                ctx->rows[i].format != -1 /* not run */) {
+                format = 2;
+                break;
+            }
+        }
+        switch (format) {
+        case -1:
+        case 2:
+            if (!act_warned) {
+                act_warned = 1;
+                av_log(ctx->avctx, AV_LOG_ERROR,
+                       "Unsupported: variable ACT flag.\n");
+            }
+            break;
+        case 0:
+            ctx->pix_fmt = ctx->bit_depth==10
+                         ? AV_PIX_FMT_GBRP10 : AV_PIX_FMT_GBRP12;
+            break;
+        case 1:
+            ctx->pix_fmt = ctx->bit_depth==10
+                         ? AV_PIX_FMT_YUV444P10 : AV_PIX_FMT_YUV444P12;
+            break;
+        }
+    }
+    avctx->pix_fmt = ctx->pix_fmt;
+    if (ret) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "%d lines with errors\n", ret);
+        return AVERROR_INVALIDDATA;
+    }
+
     *got_frame = 1;
     return avpkt->size;
 }
@@ -503,6 +674,9 @@ static av_cold int dnxhd_decode_close(AVCodecContext *avctx)
     ff_free_vlc(&ctx->ac_vlc);
     ff_free_vlc(&ctx->dc_vlc);
     ff_free_vlc(&ctx->run_vlc);
+
+    av_freep(&ctx->rows);
+
     return 0;
 }
 
@@ -515,5 +689,7 @@ AVCodec ff_dnxhd_decoder = {
     .init           = dnxhd_decode_init,
     .close          = dnxhd_decode_close,
     .decode         = dnxhd_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
+                      AV_CODEC_CAP_SLICE_THREADS,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(dnxhd_decode_init_thread_copy),
 };
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index 90d51ffb..e2ebeeb4 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -45,7 +45,10 @@
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
     { "nitris_compat", "encode with Avid Nitris compatibility",
-        offsetof(DNXHDEncContext, nitris_compat), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+        offsetof(DNXHDEncContext, nitris_compat), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "ibias", "intra quant bias",
+        offsetof(DNXHDEncContext, intra_quant_bias), AV_OPT_TYPE_INT,
+        { .i64 = 0 }, INT_MIN, INT_MAX, VE },
     { NULL }
 };
 
@@ -84,22 +87,14 @@ void dnxhd_10bit_get_pixels_8x4_sym(int16_t *av_restrict block,
                                     const uint8_t *pixels,
                                     ptrdiff_t line_size)
 {
-    int i;
-    const uint16_t* pixels16 = (const uint16_t*)pixels;
-    line_size >>= 1;
-
-    for (i = 0; i < 4; i++) {
-        block[0] = pixels16[0]; block[1] = pixels16[1];
-        block[2] = pixels16[2]; block[3] = pixels16[3];
-        block[4] = pixels16[4]; block[5] = pixels16[5];
-        block[6] = pixels16[6]; block[7] = pixels16[7];
-        pixels16 += line_size;
-        block += 8;
-    }
-    memcpy(block,      block -  8, sizeof(*block) * 8);
-    memcpy(block +  8, block - 16, sizeof(*block) * 8);
-    memcpy(block + 16, block - 24, sizeof(*block) * 8);
-    memcpy(block + 24, block - 32, sizeof(*block) * 8);
+    memcpy(block + 0 * 8, pixels + 0 * line_size, 8 * sizeof(*block));
+    memcpy(block + 7 * 8, pixels + 0 * line_size, 8 * sizeof(*block));
+    memcpy(block + 1 * 8, pixels + 1 * line_size, 8 * sizeof(*block));
+    memcpy(block + 6 * 8, pixels + 1 * line_size, 8 * sizeof(*block));
+    memcpy(block + 2 * 8, pixels + 2 * line_size, 8 * sizeof(*block));
+    memcpy(block + 5 * 8, pixels + 2 * line_size, 8 * sizeof(*block));
+    memcpy(block + 3 * 8, pixels + 3 * line_size, 8 * sizeof(*block));
+    memcpy(block + 4 * 8, pixels + 3 * line_size, 8 * sizeof(*block));
 }
 
 static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block,
@@ -125,6 +120,11 @@ static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block,
             last_non_zero = i;
     }
 
+    /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
+    if (ctx->idsp.perm_type != FF_IDCT_PERM_NONE)
+        ff_block_permute(block, ctx->idsp.idct_permutation,
+                         scantable, last_non_zero);
+
     return last_non_zero;
 }
 
@@ -155,9 +155,9 @@ static av_cold int dnxhd_init_vlc(DNXHDEncContext *ctx)
                 alevel -= offset << 6;
             }
             for (j = 0; j < 257; j++) {
-                if (ctx->cid_table->ac_level[j] >> 1 == alevel &&
-                    (!offset || (ctx->cid_table->ac_flags[j] & 1) && offset) &&
-                    (!run    || (ctx->cid_table->ac_flags[j] & 2) && run)) {
+                if (ctx->cid_table->ac_info[2*j+0] >> 1 == alevel &&
+                    (!offset || (ctx->cid_table->ac_info[2*j+1] & 1) && offset) &&
+                    (!run    || (ctx->cid_table->ac_info[2*j+1] & 2) && run)) {
                     av_assert1(!ctx->vlc_codes[index]);
                     if (alevel) {
                         ctx->vlc_codes[index] =
@@ -214,14 +214,14 @@ static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
             weight_matrix[j] = ctx->cid_table->luma_weight[i];
         }
         ff_convert_matrix(&ctx->m, ctx->qmatrix_l, ctx->qmatrix_l16,
-                          weight_matrix, ctx->m.intra_quant_bias, 1,
+                          weight_matrix, ctx->intra_quant_bias, 1,
                           ctx->m.avctx->qmax, 1);
         for (i = 1; i < 64; i++) {
             int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]];
             weight_matrix[j] = ctx->cid_table->chroma_weight[i];
         }
         ff_convert_matrix(&ctx->m, ctx->qmatrix_c, ctx->qmatrix_c16,
-                          weight_matrix, ctx->m.intra_quant_bias, 1,
+                          weight_matrix, ctx->intra_quant_bias, 1,
                           ctx->m.avctx->qmax, 1);
 
         for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
@@ -238,7 +238,7 @@ static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
         // 10-bit
         for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
             for (i = 1; i < 64; i++) {
-                int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]];
+                int j = ff_zigzag_direct[i];
 
                 /* The quantization formula from the VC-3 standard is:
                  * quantized = sign(block[i]) * floor(abs(block[i]/s) * p /
@@ -348,17 +348,21 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
     ctx->m.mb_height = (avctx->height + 15) / 16;
     ctx->m.mb_width  = (avctx->width  + 15) / 16;
 
-    if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
+    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
         ctx->interlaced   = 1;
         ctx->m.mb_height /= 2;
     }
 
     ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width;
 
+#if FF_API_QUANT_BIAS
+FF_DISABLE_DEPRECATION_WARNINGS
     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
-        ctx->m.intra_quant_bias = avctx->intra_quant_bias;
+        ctx->intra_quant_bias = avctx->intra_quant_bias;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     // XXX tune lbias/cbias
-    if ((ret = dnxhd_init_qmat(ctx, ctx->m.intra_quant_bias, 0)) < 0)
+    if ((ret = dnxhd_init_qmat(ctx, ctx->intra_quant_bias, 0)) < 0)
         return ret;
 
     /* Avid Nitris hardware decoder requires a minimum amount of padding
@@ -380,12 +384,12 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_qscale,
                       ctx->m.mb_num * sizeof(uint8_t), fail);
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->key_frame = 1;
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     if (avctx->thread_count > MAX_THREADS) {
         av_log(avctx, AV_LOG_ERROR, "too many threads\n");
@@ -764,11 +768,13 @@ static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg,
             unsigned mb  = mb_y * ctx->m.mb_width + mb_x;
             int sum = 0;
             int sqsum = 0;
+            int bw = FFMIN(avctx->width - 16 * mb_x, 16);
+            int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
             int mean, sqmean;
             int i, j;
             // Macroblocks are 16x16 pixels, unlike DCT blocks which are 8x8.
-            for (i = 0; i < 16; ++i) {
-                for (j = 0; j < 16; ++j) {
+            for (i = 0; i < bh; ++i) {
+                for (j = 0; j < bw; ++j) {
                     // Turn 16-bit pixels into 10-bit ones.
                     int const sample = (unsigned) pix[j] >> 6;
                     sum   += sample;
@@ -1036,7 +1042,11 @@ static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame)
         ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8;
     }
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     ctx->m.avctx->coded_frame->interlaced_frame = frame->interlaced_frame;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     ctx->cur_field = frame->interlaced_frame && !frame->top_field_first;
 }
 
@@ -1048,7 +1058,7 @@ static int dnxhd_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
     int offset, i, ret;
     uint8_t *buf;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, ctx->cid_table->frame_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, ctx->cid_table->frame_size, 0)) < 0)
         return ret;
     buf = pkt->data;
 
@@ -1097,7 +1107,13 @@ static int dnxhd_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
         goto encode_coding_unit;
     }
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->quality = ctx->qscale * FF_QP2LAMBDA;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    ff_side_data_set_encoder_stats(pkt, ctx->qscale * FF_QP2LAMBDA, NULL, 0, AV_PICTURE_TYPE_I);
 
     pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
@@ -1130,8 +1146,6 @@ static av_cold int dnxhd_encode_end(AVCodecContext *avctx)
     for (i = 1; i < avctx->thread_count; i++)
         av_freep(&ctx->thread[i]);
 
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
@@ -1149,7 +1163,7 @@ AVCodec ff_dnxhd_encoder = {
     .init           = dnxhd_encode_init,
     .encode2        = dnxhd_encode_picture,
     .close          = dnxhd_encode_end,
-    .capabilities   = CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV422P,
         AV_PIX_FMT_YUV422P10,
diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h
index 7ef0b964..3f531efc 100644
--- a/libavcodec/dnxhdenc.h
+++ b/libavcodec/dnxhdenc.h
@@ -63,6 +63,7 @@ typedef struct DNXHDEncContext {
 
     int nitris_compat;
     unsigned min_padding;
+    int intra_quant_bias;
 
     DECLARE_ALIGNED(16, int16_t, blocks)[8][64];
 
diff --git a/libavcodec/dpcm.c b/libavcodec/dpcm.c
index ecc7a291..52a2c616 100644
--- a/libavcodec/dpcm.c
+++ b/libavcodec/dpcm.c
@@ -44,7 +44,7 @@
 #include "mathops.h"
 
 typedef struct DPCMContext {
-    int16_t roq_square_array[256];
+    int16_t square_array[256];
     int sample[2];                  ///< previous sample (for SOL_DPCM)
     const int8_t *sol_table;        ///< delta table for SOL_DPCM
 } DPCMContext;
@@ -130,8 +130,8 @@ static av_cold int dpcm_decode_init(AVCodecContext *avctx)
         /* initialize square table */
         for (i = 0; i < 128; i++) {
             int16_t square = i * i;
-            s->roq_square_array[i      ] =  square;
-            s->roq_square_array[i + 128] = -square;
+            s->square_array[i      ] =  square;
+            s->square_array[i + 128] = -square;
         }
         break;
 
@@ -153,6 +153,13 @@ static av_cold int dpcm_decode_init(AVCodecContext *avctx)
         }
         break;
 
+    case AV_CODEC_ID_SDX2_DPCM:
+        for (i = -128; i < 128; i++) {
+            int16_t square = i * i * 2;
+            s->square_array[i+128] = i < 0 ? -square: square;
+        }
+        break;
+
     default:
         break;
     }
@@ -200,6 +207,9 @@ static int dpcm_decode_frame(AVCodecContext *avctx, void *data,
         else
             out = buf_size;
         break;
+    case AV_CODEC_ID_SDX2_DPCM:
+        out = buf_size;
+        break;
     }
     if (out <= 0) {
         av_log(avctx, AV_LOG_ERROR, "packet is too small\n");
@@ -230,7 +240,7 @@ static int dpcm_decode_frame(AVCodecContext *avctx, void *data,
 
         /* decode the samples */
         while (output_samples < samples_end) {
-            predictor[ch] += s->roq_square_array[bytestream2_get_byteu(&gb)];
+            predictor[ch] += s->square_array[bytestream2_get_byteu(&gb)];
             predictor[ch]  = av_clip_int16(predictor[ch]);
             *output_samples++ = predictor[ch];
 
@@ -318,6 +328,19 @@ static int dpcm_decode_frame(AVCodecContext *avctx, void *data,
             }
         }
         break;
+
+    case AV_CODEC_ID_SDX2_DPCM:
+        while (output_samples < samples_end) {
+            int8_t n = bytestream2_get_byteu(&gb);
+
+            if (!(n & 1))
+                s->sample[ch] = 0;
+            s->sample[ch] += s->square_array[n + 128];
+            s->sample[ch]  = av_clip_int16(s->sample[ch]);
+            *output_samples++ = s->sample[ch];
+            ch ^= stereo;
+        }
+        break;
     }
 
     *got_frame_ptr = 1;
@@ -334,10 +357,11 @@ AVCodec ff_ ## name_ ## _decoder = {                        \
     .priv_data_size = sizeof(DPCMContext),                  \
     .init           = dpcm_decode_init,                     \
     .decode         = dpcm_decode_frame,                    \
-    .capabilities   = CODEC_CAP_DR1,                        \
+    .capabilities   = AV_CODEC_CAP_DR1,                     \
 }
 
 DPCM_DECODER(AV_CODEC_ID_INTERPLAY_DPCM, interplay_dpcm, "DPCM Interplay");
 DPCM_DECODER(AV_CODEC_ID_ROQ_DPCM,       roq_dpcm,       "DPCM id RoQ");
+DPCM_DECODER(AV_CODEC_ID_SDX2_DPCM,      sdx2_dpcm,      "DPCM Squareroot-Delta-Exact");
 DPCM_DECODER(AV_CODEC_ID_SOL_DPCM,       sol_dpcm,       "DPCM Sol");
 DPCM_DECODER(AV_CODEC_ID_XAN_DPCM,       xan_dpcm,       "DPCM Xan");
diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c
index 66d84289..af7276ad 100644
--- a/libavcodec/dpx.c
+++ b/libavcodec/dpx.c
@@ -348,11 +348,11 @@ static int decode_frame(AVCodecContext *avctx,
                 // For 12 bit, ignore alpha
                 if (elements == 4)
                     buf += 2;
-                // Jump to next aligned position
-                buf += need_align;
             }
             for (i = 0; i < 3; i++)
                 ptr[i] += p->linesize[i];
+            // Jump to next aligned position
+            buf += need_align;
         }
         break;
     case 16:
@@ -392,5 +392,5 @@ AVCodec ff_dpx_decoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_DPX,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/dpxenc.c b/libavcodec/dpxenc.c
index 76aa0cc4..a5960334 100644
--- a/libavcodec/dpxenc.c
+++ b/libavcodec/dpxenc.c
@@ -39,7 +39,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
 
     s->big_endian         = !!(desc->flags & AV_PIX_FMT_FLAG_BE);
-    s->bits_per_component = desc->comp[0].depth_minus1 + 1;
+    s->bits_per_component = desc->comp[0].depth;
     s->num_components     = desc->nb_components;
     s->descriptor         = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) ? 51 : 50;
     s->planar             = !!(desc->flags & AV_PIX_FMT_FLAG_PLANAR);
@@ -90,7 +90,8 @@ static av_always_inline void write32_internal(int big_endian, void *p, int value
 #define write16(p, value) write16_internal(s->big_endian, p, value)
 #define write32(p, value) write32_internal(s->big_endian, p, value)
 
-static void encode_rgb48_10bit(AVCodecContext *avctx, const AVPicture *pic, uint8_t *dst)
+static void encode_rgb48_10bit(AVCodecContext *avctx, const AVFrame *pic,
+                               uint8_t *dst)
 {
     DPXContext *s = avctx->priv_data;
     const uint8_t *src = pic->data[0];
@@ -115,7 +116,7 @@ static void encode_rgb48_10bit(AVCodecContext *avctx, const AVPicture *pic, uint
     }
 }
 
-static void encode_gbrp10(AVCodecContext *avctx, const AVPicture *pic, uint8_t *dst)
+static void encode_gbrp10(AVCodecContext *avctx, const AVFrame *pic, uint8_t *dst)
 {
     DPXContext *s = avctx->priv_data;
     const uint8_t *src[3] = {pic->data[0], pic->data[1], pic->data[2]};
@@ -141,7 +142,7 @@ static void encode_gbrp10(AVCodecContext *avctx, const AVPicture *pic, uint8_t *
     }
 }
 
-static void encode_gbrp12(AVCodecContext *avctx, const AVPicture *pic, uint16_t *dst)
+static void encode_gbrp12(AVCodecContext *avctx, const AVFrame *pic, uint16_t *dst)
 {
     DPXContext *s = avctx->priv_data;
     const uint16_t *src[3] = {(uint16_t*)pic->data[0],
@@ -195,7 +196,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         need_align = size - len;
         size *= avctx->height;
     }
-    if ((ret = ff_alloc_packet2(avctx, pkt, size + HEADER_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, size + HEADER_SIZE, 0)) < 0)
         return ret;
     buf = pkt->data;
 
@@ -207,7 +208,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     memcpy (buf +   8, "V1.0", 4);
     write32(buf +  20, 1); /* new image */
     write32(buf +  24, HEADER_SIZE);
-    if (!(avctx->flags & CODEC_FLAG_BITEXACT))
+    if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT))
         memcpy (buf + 160, LIBAVCODEC_IDENT, FFMIN(sizeof(LIBAVCODEC_IDENT), 100));
     write32(buf + 660, 0xFFFFFFFF); /* unencrypted */
 
@@ -243,21 +244,22 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                 src += frame->linesize[0];
             }
         } else {
-            size = avpicture_layout((const AVPicture*)frame, avctx->pix_fmt,
-                                    avctx->width, avctx->height,
-                                    buf + HEADER_SIZE, pkt->size - HEADER_SIZE);
+            size = av_image_copy_to_buffer(buf + HEADER_SIZE, pkt->size - HEADER_SIZE,
+                                           (const uint8_t**)frame->data, frame->linesize,
+                                           avctx->pix_fmt,
+                                           avctx->width, avctx->height, 1);
         }
         if (size < 0)
             return size;
         break;
     case 10:
         if (s->planar)
-            encode_gbrp10(avctx, (const AVPicture*)frame, buf + HEADER_SIZE);
+            encode_gbrp10(avctx, frame, buf + HEADER_SIZE);
         else
-            encode_rgb48_10bit(avctx, (const AVPicture*)frame, buf + HEADER_SIZE);
+            encode_rgb48_10bit(avctx, frame, buf + HEADER_SIZE);
         break;
     case 12:
-        encode_gbrp12(avctx, (const AVPicture*)frame, (uint16_t*)(buf + HEADER_SIZE));
+        encode_gbrp12(avctx, frame, (uint16_t*)(buf + HEADER_SIZE));
         break;
     default:
         av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", s->bits_per_component);
diff --git a/libavcodec/dsd_tablegen.h b/libavcodec/dsd_tablegen.h
index 6afb4167..990d57a5 100644
--- a/libavcodec/dsd_tablegen.h
+++ b/libavcodec/dsd_tablegen.h
@@ -29,10 +29,6 @@
 #define HTAPS   48                /** number of FIR constants */
 #define CTABLES ((HTAPS + 7) / 8) /** number of "8 MACs" lookup tables */
 
-#if CONFIG_HARDCODED_TABLES
-#define dsd_ctables_tableinit()
-#include "libavcodec/dsd_tables.h"
-#else
 #include "libavutil/common.h"
 
 /*
@@ -78,18 +74,18 @@ static float ctables[CTABLES][256];
 
 static av_cold void dsd_ctables_tableinit(void)
 {
-    int t, e, m, k;
-    double acc;
-    for (t = 0; t < CTABLES; ++t) {
-        k = FFMIN(HTAPS - t * 8, 8);
-        for (e = 0; e < 256; ++e) {
-            acc = 0.0;
-            for (m = 0; m < k; ++m)
-                acc += (((e >> (7 - m)) & 1) * 2 - 1) * htaps[t * 8 + m];
-            ctables[CTABLES - 1 - t][e] = (float)acc;
+    int t, e, m, sign;
+    double acc[CTABLES];
+    for (e = 0; e < 256; ++e) {
+        memset(acc, 0, sizeof(acc));
+        for (m = 0; m < 8; ++m) {
+            sign = (((e >> (7 - m)) & 1) * 2 - 1);
+            for (t = 0; t < CTABLES; ++t)
+                acc[t] += sign * htaps[t * 8 + m];
         }
+        for (t = 0; t < CTABLES; ++t)
+            ctables[CTABLES - 1 - t][e] = acc[t];
     }
 }
-#endif /* CONFIG_HARDCODED_TABLES */
 
 #endif /* AVCODEC_DSD_TABLEGEN_H */
diff --git a/libavcodec/dsicinaudio.c b/libavcodec/dsicinaudio.c
index b336d2c5..290dab41 100644
--- a/libavcodec/dsicinaudio.c
+++ b/libavcodec/dsicinaudio.c
@@ -129,5 +129,5 @@ AVCodec ff_dsicinaudio_decoder = {
     .priv_data_size = sizeof(CinAudioContext),
     .init           = cinaudio_decode_init,
     .decode         = cinaudio_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/dsicinvideo.c b/libavcodec/dsicinvideo.c
index 48fb635c..f95cbc74 100644
--- a/libavcodec/dsicinvideo.c
+++ b/libavcodec/dsicinvideo.c
@@ -313,5 +313,5 @@ AVCodec ff_dsicinvideo_decoder = {
     .init           = cinvideo_decode_init,
     .close          = cinvideo_decode_end,
     .decode         = cinvideo_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/dss_sp.c b/libavcodec/dss_sp.c
index 909ad1f2..7cf84899 100644
--- a/libavcodec/dss_sp.c
+++ b/libavcodec/dss_sp.c
@@ -66,7 +66,7 @@ typedef struct DssSpContext {
     int pulse_dec_mode;
 
     DECLARE_ALIGNED(16, uint8_t, bits)[DSS_SP_FRAME_SIZE +
-                                       FF_INPUT_BUFFER_PADDING_SIZE];
+                                       AV_INPUT_BUFFER_PADDING_SIZE];
 } DssSpContext;
 
 /*
@@ -783,5 +783,5 @@ AVCodec ff_dss_sp_decoder = {
     .priv_data_size = sizeof(DssSpContext),
     .init           = dss_sp_decode_init,
     .decode         = dss_sp_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/dump_extradata_bsf.c b/libavcodec/dump_extradata_bsf.c
index 568f9209..08c42270 100644
--- a/libavcodec/dump_extradata_bsf.c
+++ b/libavcodec/dump_extradata_bsf.c
@@ -30,18 +30,18 @@ static int dump_extradata(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx,
     int cmd= args ? *args : 0;
     /* cast to avoid warning about discarding qualifiers */
     if(avctx->extradata){
-        if(  (keyframe && (avctx->flags2 & CODEC_FLAG2_LOCAL_HEADER) && cmd=='a')
+        if(  (keyframe && (avctx->flags2 & AV_CODEC_FLAG2_LOCAL_HEADER) && cmd == 'a')
            ||(keyframe && (cmd=='k' || !cmd))
            ||(cmd=='e')
             /*||(? && (s->flags & PARSER_FLAG_DUMP_EXTRADATA_AT_BEGIN)*/){
             int size= buf_size + avctx->extradata_size;
             *poutbuf_size= size;
-            *poutbuf= av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
+            *poutbuf= av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!*poutbuf)
                 return AVERROR(ENOMEM);
 
             memcpy(*poutbuf, avctx->extradata, avctx->extradata_size);
-            memcpy((*poutbuf) + avctx->extradata_size, buf, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+            memcpy((*poutbuf) + avctx->extradata_size, buf, buf_size + AV_INPUT_BUFFER_PADDING_SIZE);
             return 1;
         }
     }
diff --git a/libavcodec/dv.h b/libavcodec/dv.h
index 5d282633..af506ebb 100644
--- a/libavcodec/dv.h
+++ b/libavcodec/dv.h
@@ -39,7 +39,7 @@ typedef struct DVwork_chunk {
 
 typedef struct DVVideoContext {
     const AVDVProfile *sys;
-    AVFrame         *frame;
+    const AVFrame   *frame;
     AVCodecContext  *avctx;
     uint8_t         *buf;
 
diff --git a/libavcodec/dv_profile.c b/libavcodec/dv_profile.c
index e336e081..66505c88 100644
--- a/libavcodec/dv_profile.c
+++ b/libavcodec/dv_profile.c
@@ -297,14 +297,6 @@ const AVDVProfile* ff_dv_frame_profile(AVCodecContext* codec, const AVDVProfile
     return NULL;
 }
 
-#if FF_API_DV_FRAME_PROFILE
-const AVDVProfile* avpriv_dv_frame_profile2(AVCodecContext* codec, const AVDVProfile *sys,
-                                            const uint8_t *frame, unsigned buf_size)
-{
-    return ff_dv_frame_profile(codec, sys, frame, buf_size);
-}
-#endif
-
 const AVDVProfile *av_dv_frame_profile(const AVDVProfile *sys,
                                        const uint8_t *frame, unsigned buf_size)
 {
diff --git a/libavcodec/dv_profile.h b/libavcodec/dv_profile.h
index d22ad266..9380a66f 100644
--- a/libavcodec/dv_profile.h
+++ b/libavcodec/dv_profile.h
@@ -58,15 +58,6 @@ typedef struct AVDVProfile {
     const uint8_t  (*audio_shuffle)[9];     /* PCM shuffling table */
 } AVDVProfile;
 
-#if FF_API_DV_FRAME_PROFILE
-/**
- * @deprecated use av_dv_frame_profile()
- */
-attribute_deprecated
-const AVDVProfile* avpriv_dv_frame_profile2(AVCodecContext* codec, const AVDVProfile *sys,
-                                            const uint8_t* frame, unsigned buf_size);
-#endif
-
 /**
  * Get a DV profile for the provided compressed frame.
  *
diff --git a/libavcodec/dvaudio.h b/libavcodec/dvaudio.h
new file mode 100644
index 00000000..e7f70c58
--- /dev/null
+++ b/libavcodec/dvaudio.h
@@ -0,0 +1,39 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DVAUDIO_H
+#define AVCODEC_DVAUDIO_H
+
+#include <stdint.h>
+
+static inline int dv_get_audio_sample_count(const uint8_t *buffer, int dsf)
+{
+    int samples = buffer[0] & 0x3f; /* samples in this frame - min samples */
+
+    switch ((buffer[3] >> 3) & 0x07) {
+    case 0:
+        return samples + (dsf ? 1896 : 1580);
+    case 1:
+        return samples + (dsf ? 1742 : 1452);
+    case 2:
+    default:
+        return samples + (dsf ? 1264 : 1053);
+    }
+}
+
+#endif /* AVCODEC_DVAUDIO_H */
diff --git a/libavcodec/dvaudio_parser.c b/libavcodec/dvaudio_parser.c
new file mode 100644
index 00000000..160faafd
--- /dev/null
+++ b/libavcodec/dvaudio_parser.c
@@ -0,0 +1,46 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Ulead DV audio parser
+ *
+ * Determines the duration for each packet.
+ */
+
+#include "parser.h"
+#include "dvaudio.h"
+
+static int dvaudio_parse(AVCodecParserContext *s1, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    if (buf_size >= 248)
+        s1->duration = dv_get_audio_sample_count(buf + 244, avctx->block_align == 8640);
+
+    /* always return the full packet. this parser isn't doing any splitting or
+       combining, only packet analysis */
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return buf_size;
+}
+
+AVCodecParser ff_dvaudio_parser = {
+    .codec_ids      = { AV_CODEC_ID_DVAUDIO },
+    .parser_parse   = dvaudio_parse,
+};
diff --git a/libavcodec/dvaudiodec.c b/libavcodec/dvaudiodec.c
new file mode 100644
index 00000000..faa9e5fb
--- /dev/null
+++ b/libavcodec/dvaudiodec.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2012 Laurent Aimar
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "dvaudio.h"
+
+typedef struct DVAudioContext {
+    int block_size;
+    int is_12bit;
+    int is_pal;
+    int16_t shuffle[2000];
+} DVAudioContext;
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    DVAudioContext *s = avctx->priv_data;
+    int i;
+
+    if (avctx->channels != 2) {
+        av_log(avctx, AV_LOG_ERROR, "invalid number of channels\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->codec_tag == 0x0215) {
+        s->block_size = 7200;
+    } else if (avctx->codec_tag == 0x0216) {
+        s->block_size = 8640;
+    } else if (avctx->block_align == 7200 ||
+               avctx->block_align == 8640) {
+        s->block_size = avctx->block_align;
+    } else {
+        return AVERROR(EINVAL);
+    }
+
+    s->is_pal = s->block_size == 8640;
+    s->is_12bit = avctx->bits_per_raw_sample == 12;
+    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    avctx->channel_layout = AV_CH_LAYOUT_STEREO;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->shuffle); i++) {
+        const unsigned a = s->is_pal ? 18 : 15;
+        const unsigned b = 3 * a;
+
+        s->shuffle[i] = 80 * ((21 * (i % 3) + 9 * (i / 3) + ((i / a) % 3)) % b) +
+                         (2 + s->is_12bit) * (i / b) + 8;
+    }
+
+    return 0;
+}
+
+static inline uint16_t dv_audio_12to16(uint16_t sample)
+{
+    uint16_t shift, result;
+
+    sample = (sample < 0x800) ? sample : sample | 0xf000;
+    shift  = (sample & 0xf00) >> 8;
+
+    if (shift < 0x2 || shift > 0xd) {
+        result = sample;
+    } else if (shift < 0x8) {
+        shift--;
+        result = (sample - (256 * shift)) << shift;
+    } else {
+        shift  = 0xe - shift;
+        result = ((sample + ((256 * shift) + 1)) << shift) - 1;
+    }
+
+    return result;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data,
+                        int *got_frame_ptr, AVPacket *pkt)
+{
+    DVAudioContext *s = avctx->priv_data;
+    AVFrame *frame = data;
+    const uint8_t *src = pkt->data;
+    int16_t *dst;
+    int ret, i;
+
+    if (pkt->size < s->block_size)
+        return AVERROR_INVALIDDATA;
+
+    frame->nb_samples = dv_get_audio_sample_count(pkt->data + 244, s->is_pal);
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+    dst = (int16_t *)frame->data[0];
+
+    for (i = 0; i < frame->nb_samples; i++) {
+       const uint8_t *v = &src[s->shuffle[i]];
+
+       if (s->is_12bit) {
+           *dst++ = dv_audio_12to16((v[0] << 4) | ((v[2] >> 4) & 0x0f));
+           *dst++ = dv_audio_12to16((v[1] << 4) | ((v[2] >> 0) & 0x0f));
+       } else {
+           *dst++ = AV_RB16(&v[0]);
+           *dst++ = AV_RB16(&v[s->is_pal ? 4320 : 3600]);
+       }
+    }
+
+    *got_frame_ptr = 1;
+
+    return s->block_size;
+}
+
+AVCodec ff_dvaudio_decoder = {
+    .name           = "dvaudio",
+    .long_name      = NULL_IF_CONFIG_SMALL("Ulead DV Audio"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_DVAUDIO,
+    .init           = decode_init,
+    .decode         = decode_frame,
+    .capabilities   = AV_CODEC_CAP_DR1,
+    .priv_data_size = sizeof(DVAudioContext),
+};
diff --git a/libavcodec/dvbsub.c b/libavcodec/dvbsub.c
index dd84a076..3cdbade9 100644
--- a/libavcodec/dvbsub.c
+++ b/libavcodec/dvbsub.c
@@ -315,7 +315,7 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
                 *q++ = (1 << (7 - bpp_index)) | (0xf << 1) | 1; /* 2 bits/pixel full range */
                 {
                     int a, r, g, b;
-                    uint32_t x= ((uint32_t*)h->rects[clut_id]->pict.data[1])[i];
+                    uint32_t x= ((uint32_t*)h->rects[clut_id]->data[1])[i];
                     a = (x >> 24) & 0xff;
                     r = (x >> 16) & 0xff;
                     g = (x >>  8) & 0xff;
@@ -410,10 +410,10 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
                 q += 2;
 
                 top_ptr = q;
-                dvb_encode_rle(&q, h->rects[object_id]->pict.data[0], h->rects[object_id]->w * 2,
+                dvb_encode_rle(&q, h->rects[object_id]->data[0], h->rects[object_id]->w * 2,
                                     h->rects[object_id]->w, h->rects[object_id]->h >> 1);
                 bottom_ptr = q;
-                dvb_encode_rle(&q, h->rects[object_id]->pict.data[0] + h->rects[object_id]->w,
+                dvb_encode_rle(&q, h->rects[object_id]->data[0] + h->rects[object_id]->w,
                                     h->rects[object_id]->w * 2, h->rects[object_id]->w,
                                     h->rects[object_id]->h >> 1);
 
diff --git a/libavcodec/dvbsubdec.c b/libavcodec/dvbsubdec.c
index e268e2a3..a4663d99 100644
--- a/libavcodec/dvbsubdec.c
+++ b/libavcodec/dvbsubdec.c
@@ -237,6 +237,8 @@ typedef struct DVBSubContext {
     int time_out;
     int compute_edt; /**< if 1 end display time calculated using pts
                           if 0 (Default) calculated using time out */
+    int compute_clut;
+    int substream;
     int64_t prev_start;
     DVBSubRegion *region_list;
     DVBSubCLUT   *clut_list;
@@ -367,17 +369,22 @@ static av_cold int dvbsub_init_decoder(AVCodecContext *avctx)
     int i, r, g, b, a = 0;
     DVBSubContext *ctx = avctx->priv_data;
 
-    if (!avctx->extradata || (avctx->extradata_size < 4) || ((avctx->extradata_size % 5 != 0) && (avctx->extradata_size != 4))) {
+    if (ctx->substream < 0) {
+        ctx->composition_id = -1;
+        ctx->ancillary_id   = -1;
+    } else if (!avctx->extradata || (avctx->extradata_size < 4) || ((avctx->extradata_size % 5 != 0) && (avctx->extradata_size != 4))) {
         av_log(avctx, AV_LOG_WARNING, "Invalid DVB subtitles stream extradata!\n");
         ctx->composition_id = -1;
         ctx->ancillary_id   = -1;
     } else {
-        if (avctx->extradata_size > 5) {
-            av_log(avctx, AV_LOG_WARNING, "Decoding first DVB subtitles sub-stream\n");
+        if (avctx->extradata_size > 5*ctx->substream + 2) {
+            ctx->composition_id = AV_RB16(avctx->extradata + 5*ctx->substream);
+            ctx->ancillary_id   = AV_RB16(avctx->extradata + 5*ctx->substream + 2);
+        } else {
+            av_log(avctx, AV_LOG_WARNING, "Selected DVB subtitles sub-stream %d is not available\n", ctx->substream);
+            ctx->composition_id = AV_RB16(avctx->extradata);
+            ctx->ancillary_id   = AV_RB16(avctx->extradata + 2);
         }
-
-        ctx->composition_id = AV_RB16(avctx->extradata);
-        ctx->ancillary_id   = AV_RB16(avctx->extradata + 2);
     }
 
     ctx->version = -1;
@@ -754,6 +761,63 @@ static int dvbsub_read_8bit_string(AVCodecContext *avctx,
     return pixels_read;
 }
 
+static void compute_default_clut(AVSubtitleRect *rect, int w, int h)
+{
+    uint8_t list[256] = {0};
+    uint8_t list_inv[256];
+    int counttab[256] = {0};
+    int count, i, x, y;
+
+#define V(x,y) rect->data[0][(x) + (y)*rect->linesize[0]]
+    for (y = 0; y<h; y++) {
+        for (x = 0; x<w; x++) {
+            int v = V(x,y) + 1;
+            int vl = x     ? V(x-1,y) + 1 : 0;
+            int vr = x+1<w ? V(x+1,y) + 1 : 0;
+            int vt = y     ? V(x,y-1) + 1 : 0;
+            int vb = y+1<h ? V(x,y+1) + 1 : 0;
+            counttab[v-1] += !!((v!=vl) + (v!=vr) + (v!=vt) + (v!=vb));
+        }
+    }
+#define L(x,y) list[ rect->data[0][(x) + (y)*rect->linesize[0]] ]
+
+    for (i = 0; i<256; i++) {
+        int scoretab[256] = {0};
+        int bestscore = 0;
+        int bestv = 0;
+        for (y = 0; y<h; y++) {
+            for (x = 0; x<w; x++) {
+                int v = rect->data[0][x + y*rect->linesize[0]];
+                int l_m = list[v];
+                int l_l = x     ? L(x-1, y) : 1;
+                int l_r = x+1<w ? L(x+1, y) : 1;
+                int l_t = y     ? L(x, y-1) : 1;
+                int l_b = y+1<h ? L(x, y+1) : 1;
+                int score;
+                if (l_m)
+                    continue;
+                scoretab[v] += l_l + l_r + l_t + l_b;
+                score = 1024LL*scoretab[v] / counttab[v];
+                if (score > bestscore) {
+                    bestscore = score;
+                    bestv = v;
+                }
+            }
+        }
+        if (!bestscore)
+            break;
+        list    [ bestv ] = 1;
+        list_inv[     i ] = bestv;
+    }
+
+    count = i - 1;
+    for (i--; i>=0; i--) {
+        int v = i*255/count;
+        AV_WN32(rect->data[1] + 4*list_inv[i], RGBA(v/2,v,v/2,v));
+    }
+}
+
+
 static int save_subtitle_set(AVCodecContext *avctx, AVSubtitle *sub, int *got_output)
 {
     DVBSubContext *ctx = avctx->priv_data;
@@ -763,7 +827,7 @@ static int save_subtitle_set(AVCodecContext *avctx, AVSubtitle *sub, int *got_ou
     AVSubtitleRect *rect;
     DVBSubCLUT *clut;
     uint32_t *clut_table;
-    int i;
+    int i,j;
     int offset_x=0, offset_y=0;
     int ret = 0;
 
@@ -775,7 +839,7 @@ static int save_subtitle_set(AVCodecContext *avctx, AVSubtitle *sub, int *got_ou
 
     /* Not touching AVSubtitles again*/
     if(sub->num_rects) {
-        avpriv_request_sample(ctx, "Different Version of Segment asked Twice\n");
+        avpriv_request_sample(ctx, "Different Version of Segment asked Twice");
         return AVERROR_PATCHWELCOME;
     }
     for (display = ctx->display_list; display; display = display->next) {
@@ -820,7 +884,7 @@ static int save_subtitle_set(AVCodecContext *avctx, AVSubtitle *sub, int *got_ou
             rect->h = region->height;
             rect->nb_colors = (1 << region->depth);
             rect->type      = SUBTITLE_BITMAP;
-            rect->pict.linesize[0] = region->width;
+            rect->linesize[0] = region->width;
 
             clut = get_clut(ctx, region->clut);
 
@@ -840,20 +904,32 @@ static int save_subtitle_set(AVCodecContext *avctx, AVSubtitle *sub, int *got_ou
                 break;
             }
 
-            rect->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
-            if (!rect->pict.data[1]) {
+            rect->data[1] = av_mallocz(AVPALETTE_SIZE);
+            if (!rect->data[1]) {
                 ret = AVERROR(ENOMEM);
                 goto fail;
             }
-            memcpy(rect->pict.data[1], clut_table, (1 << region->depth) * sizeof(uint32_t));
+            memcpy(rect->data[1], clut_table, (1 << region->depth) * sizeof(uint32_t));
 
-            rect->pict.data[0] = av_malloc(region->buf_size);
-            if (!rect->pict.data[0]) {
+            rect->data[0] = av_malloc(region->buf_size);
+            if (!rect->data[0]) {
                 ret = AVERROR(ENOMEM);
                 goto fail;
             }
 
-            memcpy(rect->pict.data[0], region->pbuf, region->buf_size);
+            memcpy(rect->data[0], region->pbuf, region->buf_size);
+
+            if ((clut == &default_clut && ctx->compute_clut == -1) || ctx->compute_clut == 1)
+                compute_default_clut(rect, rect->w, rect->h);
+
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
+            for (j = 0; j < 4; j++) {
+                rect->pict.data[j] = rect->data[j];
+                rect->pict.linesize[j] = rect->linesize[j];
+            }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
             i++;
         }
@@ -865,8 +941,8 @@ static int save_subtitle_set(AVCodecContext *avctx, AVSubtitle *sub, int *got_ou
         for(i=0; i<sub->num_rects; i++) {
             rect = sub->rects[i];
             if (rect) {
-                av_freep(&rect->pict.data[0]);
-                av_freep(&rect->pict.data[1]);
+                av_freep(&rect->data[0]);
+                av_freep(&rect->data[1]);
             }
             av_freep(&sub->rects[i]);
         }
@@ -1023,7 +1099,7 @@ static int dvbsub_parse_object_segment(AVCodecContext *avctx,
         buf += 2;
 
         if (buf + top_field_len + bottom_field_len > buf_end) {
-            av_log(avctx, AV_LOG_ERROR, "Field data size too large\n");
+            av_log(avctx, AV_LOG_ERROR, "Field data size %d+%d too large\n", top_field_len, bottom_field_len);
             return AVERROR_INVALIDDATA;
         }
 
@@ -1107,7 +1183,6 @@ static int dvbsub_parse_clut_segment(AVCodecContext *avctx,
 
         if (depth == 0) {
             av_log(avctx, AV_LOG_ERROR, "Invalid clut depth 0x%x!\n", *buf);
-            return AVERROR_INVALIDDATA;
         }
 
         full_range = (*buf++) & 1;
@@ -1369,7 +1444,7 @@ static int dvbsub_parse_page_segment(AVCodecContext *avctx,
 
 
 #ifdef DEBUG
-static void save_display_set(DVBSubContext *ctx)
+static int save_display_set(DVBSubContext *ctx)
 {
     DVBSubRegion *region;
     DVBSubRegionDisplay *display;
@@ -1390,7 +1465,7 @@ static void save_display_set(DVBSubContext *ctx)
         region = get_region(ctx, display->region_id);
 
         if (!region)
-            return;
+            return -1;
 
         if (x_pos == -1) {
             x_pos = display->x_pos;
@@ -1422,13 +1497,13 @@ static void save_display_set(DVBSubContext *ctx)
 
         pbuf = av_malloc(width * height * 4);
         if (!pbuf)
-            return;
+            return -1;
 
         for (display = ctx->display_list; display; display = display->next) {
             region = get_region(ctx, display->region_id);
 
             if (!region)
-                return;
+                return -1;
 
             x_off = display->x_pos - x_pos;
             y_off = display->y_pos - y_pos;
@@ -1468,6 +1543,7 @@ static void save_display_set(DVBSubContext *ctx)
     }
 
     fileno_index++;
+    return 0;
 }
 #endif
 
@@ -1545,6 +1621,7 @@ static int dvbsub_decode(AVCodecContext *avctx,
     int i;
     int ret = 0;
     int got_segment = 0;
+    int got_dds = 0;
 
     ff_dlog(avctx, "DVB sub packet:\n");
 
@@ -1607,9 +1684,15 @@ static int dvbsub_decode(AVCodecContext *avctx,
             case DVBSUB_DISPLAYDEFINITION_SEGMENT:
                 ret = dvbsub_parse_display_definition_segment(avctx, p,
                                                               segment_length);
+                got_dds = 1;
                 break;
             case DVBSUB_DISPLAY_SEGMENT:
                 ret = dvbsub_display_end_segment(avctx, p, segment_length, sub, data_size);
+                if (got_segment == 15 && !got_dds && !avctx->width && !avctx->height) {
+                    // Default from ETSI EN 300 743 V1.3.1 (7.2.1)
+                    avctx->width  = 720;
+                    avctx->height = 576;
+                }
                 got_segment |= 16;
                 break;
             default:
@@ -1645,7 +1728,9 @@ static int dvbsub_decode(AVCodecContext *avctx,
 
 #define DS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_SUBTITLE_PARAM
 static const AVOption options[] = {
-    {"compute_edt", "compute end of time using pts or timeout", offsetof(DVBSubContext, compute_edt), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 1, DS},
+    {"compute_edt", "compute end of time using pts or timeout", offsetof(DVBSubContext, compute_edt), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DS},
+    {"compute_clut", "compute clut when not available(-1) or always(1) or never(0)", offsetof(DVBSubContext, compute_clut), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, DS},
+    {"dvb_substream", "", offsetof(DVBSubContext, substream), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 63, DS},
     {NULL}
 };
 static const AVClass dvbsubdec_class = {
diff --git a/libavcodec/dvdec.c b/libavcodec/dvdec.c
index fbd6bf50..0b4c1bc2 100644
--- a/libavcodec/dvdec.c
+++ b/libavcodec/dvdec.c
@@ -287,14 +287,20 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
     GetBitContext gb;
     BlockInfo mb_data[5 * DV_MAX_BPM], *mb, *mb1;
     LOCAL_ALIGNED_16(int16_t, sblock, [5 * DV_MAX_BPM], [64]);
-    LOCAL_ALIGNED_16(uint8_t, mb_bit_buffer, [80     + FF_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */
-    LOCAL_ALIGNED_16(uint8_t, vs_bit_buffer, [80 * 5 + FF_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */
+    LOCAL_ALIGNED_16(uint8_t, mb_bit_buffer, [80     + AV_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */
+    LOCAL_ALIGNED_16(uint8_t, vs_bit_buffer, [80 * 5 + AV_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */
     const int log2_blocksize = 3-s->avctx->lowres;
     int is_field_mode[5];
+    int vs_bit_buffer_damaged = 0;
+    int mb_bit_buffer_damaged[5] = {0};
+    int retried = 0;
+    int sta;
 
     av_assert1((((int) mb_bit_buffer) & 7) == 0);
     av_assert1((((int) vs_bit_buffer) & 7) == 0);
 
+retry:
+
     memset(sblock, 0, 5 * DV_MAX_BPM * sizeof(*sblock));
 
     /* pass 1: read DC and AC coefficients in blocks */
@@ -305,6 +311,14 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
     for (mb_index = 0; mb_index < 5; mb_index++, mb1 += s->sys->bpm, block1 += s->sys->bpm * 64) {
         /* skip header */
         quant    = buf_ptr[3] & 0x0f;
+        if (avctx->error_concealment) {
+            if ((buf_ptr[3] >> 4) == 0x0E)
+                vs_bit_buffer_damaged = 1;
+            if (!mb_index) {
+                sta = buf_ptr[3] >> 4;
+            } else if (sta != (buf_ptr[3] >> 4))
+                vs_bit_buffer_damaged = 1;
+        }
         buf_ptr += 4;
         init_put_bits(&pb, mb_bit_buffer, 80);
         mb    = mb1;
@@ -333,7 +347,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
                                     dct_mode                        * 22 * 64 +
                                     (quant + ff_dv_quant_offset[class1]) * 64];
             }
-            dc = dc << 2;
+            dc = dc * 4;
             /* convert to unsigned because 128 is not added in the
              * standard IDCT */
             dc                   += 1024;
@@ -349,11 +363,16 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
              * block is finished */
             if (mb->pos >= 64)
                 bit_copy(&pb, &gb);
+            if (mb->pos >= 64 && mb->pos < 127)
+                vs_bit_buffer_damaged = mb_bit_buffer_damaged[mb_index] = 1;
 
             block += 64;
             mb++;
         }
 
+        if (mb_bit_buffer_damaged[mb_index] > 0)
+            continue;
+
         /* pass 2: we can do it just after */
         ff_dlog(avctx, "***pass 2 size=%d MB#=%d\n", put_bits_count(&pb), mb_index);
         block = block1;
@@ -367,6 +386,8 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
                 /* if still not finished, no need to parse other blocks */
                 if (mb->pos < 64)
                     break;
+                if (mb->pos < 127)
+                    vs_bit_buffer_damaged = mb_bit_buffer_damaged[mb_index] = 1;
             }
         }
         /* all blocks are finished, so the extra bytes can be used at
@@ -384,17 +405,25 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
     flush_put_bits(&vs_pb);
     for (mb_index = 0; mb_index < 5; mb_index++) {
         for (j = 0; j < s->sys->bpm; j++) {
-            if (mb->pos < 64 && get_bits_left(&gb) > 0) {
+            if (mb->pos < 64 && get_bits_left(&gb) > 0 && !vs_bit_buffer_damaged) {
                 ff_dlog(avctx, "start %d:%d\n", mb_index, j);
                 dv_decode_ac(&gb, mb, block);
             }
-            if (mb->pos >= 64 && mb->pos < 127)
+
+            if (mb->pos >= 64 && mb->pos < 127) {
                 av_log(avctx, AV_LOG_ERROR,
                        "AC EOB marker is absent pos=%d\n", mb->pos);
+                vs_bit_buffer_damaged = 1;
+            }
             block += 64;
             mb++;
         }
     }
+    if (vs_bit_buffer_damaged && !retried) {
+        av_log(avctx, AV_LOG_ERROR, "Concealing bitstream errors\n");
+        retried = 1;
+        goto retry;
+    }
 
     /* compute idct and place blocks */
     block = &sblock[0][0];
@@ -470,6 +499,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, void *data,
     uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     DVVideoContext *s = avctx->priv_data;
+    AVFrame *frame = data;
     const uint8_t *vsc_pack;
     int apt, is16_9, ret;
     const AVDVProfile *sys;
@@ -490,9 +520,9 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, void *data,
         s->sys = sys;
     }
 
-    s->frame            = data;
-    s->frame->key_frame = 1;
-    s->frame->pict_type = AV_PICTURE_TYPE_I;
+    s->frame            = frame;
+    frame->key_frame    = 1;
+    frame->pict_type    = AV_PICTURE_TYPE_I;
     avctx->pix_fmt      = s->sys->pix_fmt;
     avctx->framerate    = av_inv_q(s->sys->time_base);
 
@@ -509,14 +539,14 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, void *data,
         ff_set_sar(avctx, s->sys->sar[is16_9]);
     }
 
-    if ((ret = ff_get_buffer(avctx, s->frame, 0)) < 0)
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    s->frame->interlaced_frame = 1;
-    s->frame->top_field_first  = 0;
+    frame->interlaced_frame = 1;
+    frame->top_field_first  = 0;
 
     /* Determine the codec's field order from the packet */
     if ( *vsc_pack == dv_video_control ) {
-        s->frame->top_field_first = !(vsc_pack[3] & 0x40);
+        frame->top_field_first = !(vsc_pack[3] & 0x40);
     }
 
     s->buf = buf;
@@ -539,6 +569,6 @@ AVCodec ff_dvvideo_decoder = {
     .priv_data_size = sizeof(DVVideoContext),
     .init           = dvvideo_decode_init,
     .decode         = dvvideo_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS,
     .max_lowres     = 3,
 };
diff --git a/libavcodec/dvdsubdec.c b/libavcodec/dvdsubdec.c
index 7120f10a..19f25f0e 100644
--- a/libavcodec/dvdsubdec.c
+++ b/libavcodec/dvdsubdec.c
@@ -206,8 +206,8 @@ static void reset_rects(AVSubtitle *sub_header)
 
     if (sub_header->rects) {
         for (i = 0; i < sub_header->num_rects; i++) {
-            av_freep(&sub_header->rects[i]->pict.data[0]);
-            av_freep(&sub_header->rects[i]->pict.data[1]);
+            av_freep(&sub_header->rects[i]->data[0]);
+            av_freep(&sub_header->rects[i]->data[1]);
             av_freep(&sub_header->rects[i]);
         }
         av_freep(&sub_header->rects);
@@ -220,13 +220,15 @@ static void reset_rects(AVSubtitle *sub_header)
 static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
                                 const uint8_t *buf, int buf_size)
 {
-    int cmd_pos, pos, cmd, x1, y1, x2, y2, offset1, offset2, next_cmd_pos;
+    int cmd_pos, pos, cmd, x1, y1, x2, y2, next_cmd_pos;
     int big_offsets, offset_size, is_8bit = 0;
     const uint8_t *yuv_palette = NULL;
     uint8_t *colormap = ctx->colormap, *alpha = ctx->alpha;
     int date;
     int i;
     int is_menu = 0;
+    uint32_t size;
+    int64_t offset1, offset2;
 
     if (buf_size < 10)
         return -1;
@@ -241,10 +243,16 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
         cmd_pos = 2;
     }
 
+    size = READ_OFFSET(buf + (big_offsets ? 2 : 0));
     cmd_pos = READ_OFFSET(buf + cmd_pos);
 
-    if (cmd_pos < 0 || cmd_pos > buf_size - 2 - offset_size)
+    if (cmd_pos < 0 || cmd_pos > buf_size - 2 - offset_size) {
+        if (cmd_pos > size) {
+            av_log(ctx, AV_LOG_ERROR, "Discarding invalid packet\n");
+            return 0;
+        }
         return AVERROR(EAGAIN);
+    }
 
     while (cmd_pos > 0 && cmd_pos < buf_size - 2 - offset_size) {
         date = AV_RB16(buf + cmd_pos);
@@ -290,7 +298,7 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
                 alpha[1] = buf[pos + 1] >> 4;
                 alpha[0] = buf[pos + 1] & 0x0f;
                 pos += 2;
-            ff_dlog(NULL, "alpha=%x%x%x%x\n", alpha[0],alpha[1],alpha[2],alpha[3]);
+                ff_dlog(NULL, "alpha=%x%x%x%x\n", alpha[0],alpha[1],alpha[2],alpha[3]);
                 break;
             case 0x05:
             case 0x85:
@@ -310,7 +318,7 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
                     goto fail;
                 offset1 = AV_RB16(buf + pos);
                 offset2 = AV_RB16(buf + pos + 2);
-                ff_dlog(NULL, "offset1=0x%04x offset2=0x%04x\n", offset1, offset2);
+                ff_dlog(NULL, "offset1=0x%04"PRIx64" offset2=0x%04"PRIx64"\n", offset1, offset2);
                 pos += 4;
                 break;
             case 0x86:
@@ -318,7 +326,7 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
                     goto fail;
                 offset1 = AV_RB32(buf + pos);
                 offset2 = AV_RB32(buf + pos + 4);
-                ff_dlog(NULL, "offset1=0x%04x offset2=0x%04x\n", offset1, offset2);
+                ff_dlog(NULL, "offset1=0x%04"PRIx64" offset2=0x%04"PRIx64"\n", offset1, offset2);
                 pos += 8;
                 break;
 
@@ -346,7 +354,10 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
             }
         }
     the_end:
-        if (offset1 >= 0) {
+        if (offset1 >= buf_size || offset2 >= buf_size)
+            goto fail;
+
+        if (offset1 >= 0 && offset2 >= 0) {
             int w, h;
             uint8_t *bitmap;
 
@@ -367,7 +378,7 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
                 if (!sub_header->rects[0])
                     goto fail;
                 sub_header->num_rects = 1;
-                bitmap = sub_header->rects[0]->pict.data[0] = av_malloc(w * h);
+                bitmap = sub_header->rects[0]->data[0] = av_malloc(w * h);
                 if (!bitmap)
                     goto fail;
                 if (decode_rle(bitmap, w * 2, w, (h + 1) / 2,
@@ -376,17 +387,19 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
                 if (decode_rle(bitmap + w, w * 2, w, h / 2,
                                buf, offset2, buf_size, is_8bit) < 0)
                     goto fail;
-                sub_header->rects[0]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
-                if (!sub_header->rects[0]->pict.data[1])
+                sub_header->rects[0]->data[1] = av_mallocz(AVPALETTE_SIZE);
+                if (!sub_header->rects[0]->data[1])
                     goto fail;
                 if (is_8bit) {
                     if (!yuv_palette)
                         goto fail;
                     sub_header->rects[0]->nb_colors = 256;
-                    yuv_a_to_rgba(yuv_palette, alpha, (uint32_t*)sub_header->rects[0]->pict.data[1], 256);
+                    yuv_a_to_rgba(yuv_palette, alpha,
+                                  (uint32_t *)sub_header->rects[0]->data[1],
+                                  256);
                 } else {
                     sub_header->rects[0]->nb_colors = 4;
-                    guess_palette(ctx, (uint32_t*)sub_header->rects[0]->pict.data[1],
+                    guess_palette(ctx, (uint32_t*)sub_header->rects[0]->data[1],
                                   0xffff00);
                 }
                 sub_header->rects[0]->x = x1;
@@ -394,12 +407,21 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
                 sub_header->rects[0]->w = w;
                 sub_header->rects[0]->h = h;
                 sub_header->rects[0]->type = SUBTITLE_BITMAP;
-                sub_header->rects[0]->pict.linesize[0] = w;
+                sub_header->rects[0]->linesize[0] = w;
                 sub_header->rects[0]->flags = is_menu ? AV_SUBTITLE_FLAG_FORCED : 0;
+
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
+                for (i = 0; i < 4; i++) {
+                    sub_header->rects[0]->pict.data[i] = sub_header->rects[0]->data[i];
+                    sub_header->rects[0]->pict.linesize[i] = sub_header->rects[0]->linesize[i];
+                }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
             }
         }
         if (next_cmd_pos < cmd_pos) {
-            av_log(NULL, AV_LOG_ERROR, "Invalid command offset\n");
+            av_log(ctx, AV_LOG_ERROR, "Invalid command offset\n");
             break;
         }
         if (next_cmd_pos == cmd_pos)
@@ -436,29 +458,29 @@ static int find_smallest_bounding_rectangle(AVSubtitle *s)
         return 0;
 
     for(i = 0; i < s->rects[0]->nb_colors; i++) {
-        if ((((uint32_t*)s->rects[0]->pict.data[1])[i] >> 24) == 0)
+        if ((((uint32_t *)s->rects[0]->data[1])[i] >> 24) == 0)
             transp_color[i] = 1;
     }
     y1 = 0;
-    while (y1 < s->rects[0]->h && is_transp(s->rects[0]->pict.data[0] + y1 * s->rects[0]->pict.linesize[0],
+    while (y1 < s->rects[0]->h && is_transp(s->rects[0]->data[0] + y1 * s->rects[0]->linesize[0],
                                   1, s->rects[0]->w, transp_color))
         y1++;
     if (y1 == s->rects[0]->h) {
-        av_freep(&s->rects[0]->pict.data[0]);
+        av_freep(&s->rects[0]->data[0]);
         s->rects[0]->w = s->rects[0]->h = 0;
         return 0;
     }
 
     y2 = s->rects[0]->h - 1;
-    while (y2 > 0 && is_transp(s->rects[0]->pict.data[0] + y2 * s->rects[0]->pict.linesize[0], 1,
+    while (y2 > 0 && is_transp(s->rects[0]->data[0] + y2 * s->rects[0]->linesize[0], 1,
                                s->rects[0]->w, transp_color))
         y2--;
     x1 = 0;
-    while (x1 < (s->rects[0]->w - 1) && is_transp(s->rects[0]->pict.data[0] + x1, s->rects[0]->pict.linesize[0],
+    while (x1 < (s->rects[0]->w - 1) && is_transp(s->rects[0]->data[0] + x1, s->rects[0]->linesize[0],
                                         s->rects[0]->h, transp_color))
         x1++;
     x2 = s->rects[0]->w - 1;
-    while (x2 > 0 && is_transp(s->rects[0]->pict.data[0] + x2, s->rects[0]->pict.linesize[0], s->rects[0]->h,
+    while (x2 > 0 && is_transp(s->rects[0]->data[0] + x2, s->rects[0]->linesize[0], s->rects[0]->h,
                                   transp_color))
         x2--;
     w = x2 - x1 + 1;
@@ -467,15 +489,25 @@ static int find_smallest_bounding_rectangle(AVSubtitle *s)
     if (!bitmap)
         return 1;
     for(y = 0; y < h; y++) {
-        memcpy(bitmap + w * y, s->rects[0]->pict.data[0] + x1 + (y1 + y) * s->rects[0]->pict.linesize[0], w);
+        memcpy(bitmap + w * y, s->rects[0]->data[0] + x1 + (y1 + y) * s->rects[0]->linesize[0], w);
     }
-    av_freep(&s->rects[0]->pict.data[0]);
-    s->rects[0]->pict.data[0] = bitmap;
-    s->rects[0]->pict.linesize[0] = w;
+    av_freep(&s->rects[0]->data[0]);
+    s->rects[0]->data[0] = bitmap;
+    s->rects[0]->linesize[0] = w;
     s->rects[0]->w = w;
     s->rects[0]->h = h;
     s->rects[0]->x += x1;
     s->rects[0]->y += y1;
+
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
+    for (i = 0; i < 4; i++) {
+        s->rects[0]->pict.data[i] = s->rects[0]->data[i];
+        s->rects[0]->pict.linesize[i] = s->rects[0]->linesize[i];
+    }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     return 1;
 }
 
@@ -535,6 +567,7 @@ static int dvdsub_decode(AVCodecContext *avctx,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     AVSubtitle *sub = data;
+    int appended = 0;
     int is_menu;
 
     if (ctx->buf_size) {
@@ -545,12 +578,13 @@ static int dvdsub_decode(AVCodecContext *avctx,
         }
         buf = ctx->buf;
         buf_size = ctx->buf_size;
+        appended = 1;
     }
 
     is_menu = decode_dvd_subtitles(ctx, sub, buf, buf_size);
     if (is_menu == AVERROR(EAGAIN)) {
         *data_size = 0;
-        return append_to_cached_buf(avctx, buf, buf_size);
+        return appended ? 0 : append_to_cached_buf(avctx, buf, buf_size);
     }
 
     if (is_menu < 0) {
@@ -574,8 +608,8 @@ static int dvdsub_decode(AVCodecContext *avctx,
     ff_dlog(NULL, "start=%d ms end =%d ms\n",
             sub->start_display_time,
             sub->end_display_time);
-    ppm_save(ppm_name, sub->rects[0]->pict.data[0],
-             sub->rects[0]->w, sub->rects[0]->h, (uint32_t*) sub->rects[0]->pict.data[1]);
+    ppm_save(ppm_name, sub->rects[0]->data[0],
+             sub->rects[0]->w, sub->rects[0]->h, (uint32_t*) sub->rects[0]->data[1]);
     }
 #endif
 
@@ -736,7 +770,7 @@ static av_cold int dvdsub_close(AVCodecContext *avctx)
 static const AVOption options[] = {
     { "palette", "set the global palette", OFFSET(palette_str), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, SD },
     { "ifo_palette", "obtain the global palette from .IFO file", OFFSET(ifo_str), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, SD },
-    { "forced_subs_only", "Only show forced subtitles", OFFSET(forced_subs_only), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, SD},
+    { "forced_subs_only", "Only show forced subtitles", OFFSET(forced_subs_only), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, SD},
     { NULL }
 };
 static const AVClass dvdsub_class = {
diff --git a/libavcodec/dvdsubenc.c b/libavcodec/dvdsubenc.c
index 425f0af9..29e0322d 100644
--- a/libavcodec/dvdsubenc.c
+++ b/libavcodec/dvdsubenc.c
@@ -118,15 +118,15 @@ static void count_colors(AVCodecContext *avctx, unsigned hits[33],
 {
     DVDSubtitleContext *dvdc = avctx->priv_data;
     unsigned count[256] = { 0 };
-    uint32_t *palette = (uint32_t *)r->pict.data[1];
+    uint32_t *palette = (uint32_t *)r->data[1];
     uint32_t color;
     int x, y, i, j, match, d, best_d, av_uninit(best_j);
-    uint8_t *p = r->pict.data[0];
+    uint8_t *p = r->data[0];
 
     for (y = 0; y < r->h; y++) {
         for (x = 0; x < r->w; x++)
             count[*(p++)]++;
-        p += r->pict.linesize[0] - r->w;
+        p += r->linesize[0] - r->w;
     }
     for (i = 0; i < 256; i++) {
         if (!count[i]) /* avoid useless search */
@@ -236,14 +236,14 @@ static void copy_rectangle(AVSubtitleRect *dst, AVSubtitleRect *src, int cmap[])
     int x, y;
     uint8_t *p, *q;
 
-    p = src->pict.data[0];
-    q = dst->pict.data[0] + (src->x - dst->x) +
-                            (src->y - dst->y) * dst->pict.linesize[0];
+    p = src->data[0];
+    q = dst->data[0] + (src->x - dst->x) +
+                            (src->y - dst->y) * dst->linesize[0];
     for (y = 0; y < src->h; y++) {
         for (x = 0; x < src->w; x++)
             *(q++) = cmap[*(p++)];
-        p += src->pict.linesize[0] - src->w;
-        q += dst->pict.linesize[0] - src->w;
+        p += src->linesize[0] - src->w;
+        q += dst->linesize[0] - src->w;
     }
 }
 
@@ -277,6 +277,21 @@ static int encode_dvd_subtitles(AVCodecContext *avctx,
             forced = 1;
             break;
         }
+
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
+    for (i = 0; i < rects; i++)
+        if (!h->rects[i]->data[0]) {
+            AVSubtitleRect *rect = h->rects[i];
+            int j;
+            for (j = 0; j < 4; j++) {
+                rect->data[j] = rect->pict.data[j];
+                rect->linesize[j] = rect->pict.linesize[j];
+            }
+        }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     vrect = *h->rects[0];
 
     if (rects > 1) {
@@ -312,17 +327,17 @@ static int encode_dvd_subtitles(AVCodecContext *avctx,
     if (rects > 1) {
         if (!(vrect_data = av_calloc(vrect.w, vrect.h)))
             return AVERROR(ENOMEM);
-        vrect.pict.data    [0] = vrect_data;
-        vrect.pict.linesize[0] = vrect.w;
+        vrect.data    [0] = vrect_data;
+        vrect.linesize[0] = vrect.w;
         for (i = 0; i < rects; i++) {
-            build_color_map(avctx, cmap, (uint32_t *)h->rects[i]->pict.data[1],
+            build_color_map(avctx, cmap, (uint32_t *)h->rects[i]->data[1],
                             out_palette, out_alpha);
             copy_rectangle(&vrect, h->rects[i], cmap);
         }
         for (i = 0; i < 4; i++)
             cmap[i] = i;
     } else {
-        build_color_map(avctx, cmap, (uint32_t *)h->rects[0]->pict.data[1],
+        build_color_map(avctx, cmap, (uint32_t *)h->rects[0]->data[1],
                         out_palette, out_alpha);
     }
 
@@ -342,10 +357,10 @@ static int encode_dvd_subtitles(AVCodecContext *avctx,
         ret = AVERROR_BUFFER_TOO_SMALL;
         goto fail;
     }
-    dvd_encode_rle(&q, vrect.pict.data[0], vrect.w * 2,
+    dvd_encode_rle(&q, vrect.data[0], vrect.w * 2,
                    vrect.w, (vrect.h + 1) >> 1, cmap);
     offset2 = q - outbuf;
-    dvd_encode_rle(&q, vrect.pict.data[0] + vrect.w, vrect.w * 2,
+    dvd_encode_rle(&q, vrect.data[0] + vrect.w, vrect.w * 2,
                    vrect.w, vrect.h >> 1, cmap);
 
     if (dvdc->even_rows_fix && (vrect.h & 1)) {
@@ -452,7 +467,7 @@ static int dvdsub_encode(AVCodecContext *avctx,
 #define OFFSET(x) offsetof(DVDSubtitleContext, x)
 #define SE AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    {"even_rows_fix", "Make number of rows even (workaround for some players)", OFFSET(even_rows_fix), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, SE},
+    {"even_rows_fix", "Make number of rows even (workaround for some players)", OFFSET(even_rows_fix), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, SE},
     { NULL },
 };
 
diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c
index 9ce72732..5de12cc8 100644
--- a/libavcodec/dvenc.c
+++ b/libavcodec/dvenc.c
@@ -65,10 +65,6 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
         return ret;
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
     dv_vlc_map_tableinit();
 
     memset(&fdsp,0, sizeof(fdsp));
@@ -208,7 +204,7 @@ static av_always_inline PutBitContext *dv_encode_ac(EncBlockInfo *bi,
 static av_always_inline int dv_guess_dct_mode(DVVideoContext *s, uint8_t *data,
                                               int linesize)
 {
-    if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
+    if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
         int ps = s->ildct_cmp(NULL, data, NULL, linesize, 8) - 400;
         if (ps > 0) {
             int is = s->ildct_cmp(NULL, data,            NULL, linesize << 1, 4) +
@@ -721,13 +717,17 @@ static int dvvideo_encode_frame(AVCodecContext *c, AVPacket *pkt,
     DVVideoContext *s = c->priv_data;
     int ret;
 
-    if ((ret = ff_alloc_packet2(c, pkt, s->sys->frame_size)) < 0)
+    if ((ret = ff_alloc_packet2(c, pkt, s->sys->frame_size, 0)) < 0)
         return ret;
 
     c->pix_fmt                = s->sys->pix_fmt;
     s->frame                  = frame;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     c->coded_frame->key_frame = 1;
     c->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     s->buf = pkt->data;
     c->execute(c, dv_encode_video_segment, s->work_chunks, NULL,
@@ -743,12 +743,6 @@ static int dvvideo_encode_frame(AVCodecContext *c, AVPacket *pkt,
     return 0;
 }
 
-static int dvvideo_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
-
 AVCodec ff_dvvideo_encoder = {
     .name           = "dvvideo",
     .long_name      = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),
@@ -757,8 +751,7 @@ AVCodec ff_dvvideo_encoder = {
     .priv_data_size = sizeof(DVVideoContext),
     .init           = dvvideo_encode_init,
     .encode2        = dvvideo_encode_frame,
-    .close          = dvvideo_encode_close,
-    .capabilities   = CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV422P,
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE
diff --git a/libavcodec/dxa.c b/libavcodec/dxa.c
index c8e3f713..f6edc03e 100644
--- a/libavcodec/dxa.c
+++ b/libavcodec/dxa.c
@@ -284,7 +284,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     case 5:
         if (!tmpptr) {
             av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
-            if (!(avctx->flags2 & CODEC_FLAG2_SHOW_ALL))
+            if (!(avctx->flags2 & AV_CODEC_FLAG2_SHOW_ALL))
                 return AVERROR_INVALIDDATA;
         }
         frame->key_frame = 0;
@@ -370,5 +370,5 @@ AVCodec ff_dxa_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/dxtory.c b/libavcodec/dxtory.c
index 22e7b2f4..fc193698 100644
--- a/libavcodec/dxtory.c
+++ b/libavcodec/dxtory.c
@@ -65,7 +65,7 @@ static int dxtory_decode_v1_410(AVCodecContext *avctx, AVFrame *pic,
     uint8_t *Y1, *Y2, *Y3, *Y4, *U, *V;
     int ret;
 
-    if (src_size < avctx->width * avctx->height * 9LL / 8) {
+    if (src_size < FFALIGN(avctx->width, 4) * FFALIGN(avctx->height, 4) * 9LL / 8) {
         av_log(avctx, AV_LOG_ERROR, "packet too small\n");
         return AVERROR_INVALIDDATA;
     }
@@ -108,7 +108,7 @@ static int dxtory_decode_v1_420(AVCodecContext *avctx, AVFrame *pic,
     uint8_t *Y1, *Y2, *U, *V;
     int ret;
 
-    if (src_size < avctx->width * avctx->height * 3LL / 2) {
+    if (src_size < FFALIGN(avctx->width, 2) * FFALIGN(avctx->height, 2) * 3LL / 2) {
         av_log(avctx, AV_LOG_ERROR, "packet too small\n");
         return AVERROR_INVALIDDATA;
     }
@@ -192,6 +192,56 @@ static inline uint8_t decode_sym(GetBitContext *gb, uint8_t lru[8])
     return val;
 }
 
+static int check_slice_size(AVCodecContext *avctx,
+                            const uint8_t *src, int src_size,
+                            int slice_size, int off)
+{
+    int cur_slice_size;
+
+    if (slice_size > src_size - off) {
+        av_log(avctx, AV_LOG_ERROR,
+               "invalid slice size %"PRIu32" (only %"PRIu32" bytes left)\n",
+               slice_size, src_size - off);
+        return AVERROR_INVALIDDATA;
+    }
+    if (slice_size <= 16) {
+        av_log(avctx, AV_LOG_ERROR, "invalid slice size %"PRIu32"\n",
+               slice_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    cur_slice_size = AV_RL32(src + off);
+    if (cur_slice_size != slice_size - 16) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Slice sizes mismatch: got %"PRIu32" instead of %"PRIu32"\n",
+               cur_slice_size, slice_size - 16);
+    }
+
+    return 0;
+}
+
+static int load_buffer(AVCodecContext *avctx,
+                       const uint8_t *src, int src_size,
+                       GetByteContext *gb,
+                       int *nslices, int *off)
+{
+    bytestream2_init(gb, src, src_size);
+    *nslices = bytestream2_get_le16(gb);
+    *off = FFALIGN(*nslices * 4 + 2, 16);
+    if (src_size < *off) {
+        av_log(avctx, AV_LOG_ERROR, "no slice data\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (!*nslices) {
+        avpriv_request_sample(avctx, "%d slices for %dx%d", *nslices,
+                              avctx->width, avctx->height);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    return 0;
+}
+
 static inline uint8_t decode_sym_565(GetBitContext *gb, uint8_t lru[8],
                                      int bits)
 {
@@ -210,187 +260,183 @@ static inline uint8_t decode_sym_565(GetBitContext *gb, uint8_t lru[8],
     return val;
 }
 
-static int dx2_decode_slice_565(GetBitContext *gb, int width, int height,
-                                uint8_t *dst, int stride, int is_565)
-{
-    int x, y;
-    int r, g, b;
-    uint8_t lru[3][8];
+typedef int (*decode_slice_func)(GetBitContext *gb, AVFrame *frame,
+                                 int line, int height, uint8_t lru[3][8]);
 
-    memcpy(lru[0], def_lru_555, 8 * sizeof(*def_lru));
-    memcpy(lru[1], is_565 ? def_lru_565 : def_lru_555, 8 * sizeof(*def_lru));
-    memcpy(lru[2], def_lru_555, 8 * sizeof(*def_lru));
+typedef void (*setup_lru_func)(uint8_t lru[3][8]);
 
-    for (y = 0; y < height; y++) {
-        for (x = 0; x < width; x++) {
-            b = decode_sym_565(gb, lru[0], 5);
-            g = decode_sym_565(gb, lru[1], is_565 ? 6 : 5);
-            r = decode_sym_565(gb, lru[2], 5);
-            dst[x * 3 + 0] = (r << 3) | (r >> 2);
-            dst[x * 3 + 1] = is_565 ? (g << 2) | (g >> 4) : (g << 3) | (g >> 2);
-            dst[x * 3 + 2] = (b << 3) | (b >> 2);
-        }
-
-        dst += stride;
-    }
-
-    return 0;
-}
-
-static int dxtory_decode_v2_565(AVCodecContext *avctx, AVFrame *pic,
-                                const uint8_t *src, int src_size, int is_565)
+static int dxtory_decode_v2(AVCodecContext *avctx, AVFrame *pic,
+                            const uint8_t *src, int src_size,
+                            decode_slice_func decode_slice,
+                            setup_lru_func setup_lru,
+                            enum AVPixelFormat fmt)
 {
     GetByteContext gb;
     GetBitContext  gb2;
-    int nslices, slice, slice_height;
+    int nslices, slice, line = 0;
     uint32_t off, slice_size;
-    uint8_t *dst;
+    uint8_t lru[3][8];
     int ret;
 
-    bytestream2_init(&gb, src, src_size);
-    nslices = bytestream2_get_le16(&gb);
-    off = FFALIGN(nslices * 4 + 2, 16);
-    if (src_size < off) {
-        av_log(avctx, AV_LOG_ERROR, "no slice data\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    if (!nslices || avctx->height % nslices) {
-        avpriv_request_sample(avctx, "%d slices for %dx%d", nslices,
-                              avctx->width, avctx->height);
-        return AVERROR_PATCHWELCOME;
-    }
+    ret = load_buffer(avctx, src, src_size, &gb, &nslices, &off);
+    if (ret < 0)
+        return ret;
 
-    slice_height = avctx->height / nslices;
-    avctx->pix_fmt = AV_PIX_FMT_RGB24;
+    avctx->pix_fmt = fmt;
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    dst = pic->data[0];
     for (slice = 0; slice < nslices; slice++) {
         slice_size = bytestream2_get_le32(&gb);
-        if (slice_size > src_size - off) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "invalid slice size %"PRIu32" (only %"PRIu32" bytes left)\n",
-                   slice_size, src_size - off);
-            return AVERROR_INVALIDDATA;
-        }
-        if (slice_size <= 16) {
-            av_log(avctx, AV_LOG_ERROR, "invalid slice size %"PRIu32"\n", slice_size);
-            return AVERROR_INVALIDDATA;
-        }
 
-        if (AV_RL32(src + off) != slice_size - 16) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Slice sizes mismatch: got %"PRIu32" instead of %"PRIu32"\n",
-                   AV_RL32(src + off), slice_size - 16);
-        }
+        setup_lru(lru);
+
+        ret = check_slice_size(avctx, src, src_size, slice_size, off);
+        if (ret < 0)
+            return ret;
+
         if ((ret = init_get_bits8(&gb2, src + off + 16, slice_size - 16)) < 0)
             return ret;
-        dx2_decode_slice_565(&gb2, avctx->width, slice_height, dst,
-                             pic->linesize[0], is_565);
 
-        dst += pic->linesize[0] * slice_height;
+        line += decode_slice(&gb2, pic, line, avctx->height - line, lru);
+
         off += slice_size;
     }
 
+    if (avctx->height - line) {
+        av_log(avctx, AV_LOG_VERBOSE,
+               "Not enough slice data available, "
+               "cropping the frame by %d pixels\n",
+                avctx->height - line);
+        avctx->height = line;
+    }
+
     return 0;
 }
 
-static int dx2_decode_slice_rgb(GetBitContext *gb, int width, int height,
-                                uint8_t *dst, int stride)
+av_always_inline
+static int dx2_decode_slice_5x5(GetBitContext *gb, AVFrame *frame,
+                                int line, int left, uint8_t lru[3][8],
+                                int is_565)
 {
-    int x, y, i;
-    uint8_t lru[3][8];
-
-    for (i = 0; i < 3; i++)
-        memcpy(lru[i], def_lru, 8 * sizeof(*def_lru));
+    int x, y;
+    int r, g, b;
+    int width    = frame->width;
+    int stride   = frame->linesize[0];
+    uint8_t *dst = frame->data[0] + stride * line;
 
-    for (y = 0; y < height; y++) {
+    for (y = 0; y < left && get_bits_left(gb) > 16; y++) {
         for (x = 0; x < width; x++) {
-            dst[x * 3 + 0] = decode_sym(gb, lru[0]);
-            dst[x * 3 + 1] = decode_sym(gb, lru[1]);
-            dst[x * 3 + 2] = decode_sym(gb, lru[2]);
+            b = decode_sym_565(gb, lru[0], 5);
+            g = decode_sym_565(gb, lru[1], is_565 ? 6 : 5);
+            r = decode_sym_565(gb, lru[2], 5);
+            dst[x * 3 + 0] = (r << 3) | (r >> 2);
+            dst[x * 3 + 1] = is_565 ? (g << 2) | (g >> 4) : (g << 3) | (g >> 2);
+            dst[x * 3 + 2] = (b << 3) | (b >> 2);
         }
 
         dst += stride;
     }
 
-    return 0;
+    return y;
 }
 
-static int dxtory_decode_v2_rgb(AVCodecContext *avctx, AVFrame *pic,
-                                const uint8_t *src, int src_size)
+static void setup_lru_555(uint8_t lru[3][8])
 {
-    GetByteContext gb;
-    GetBitContext  gb2;
-    int nslices, slice, slice_height;
-    uint32_t off, slice_size;
-    uint8_t *dst;
-    int ret;
+    memcpy(lru[0], def_lru_555, 8 * sizeof(*def_lru));
+    memcpy(lru[1], def_lru_555, 8 * sizeof(*def_lru));
+    memcpy(lru[2], def_lru_555, 8 * sizeof(*def_lru));
+}
 
-    bytestream2_init(&gb, src, src_size);
-    nslices = bytestream2_get_le16(&gb);
-    off = FFALIGN(nslices * 4 + 2, 16);
-    if (src_size < off) {
-        av_log(avctx, AV_LOG_ERROR, "no slice data\n");
-        return AVERROR_INVALIDDATA;
-    }
+static void setup_lru_565(uint8_t lru[3][8])
+{
+    memcpy(lru[0], def_lru_555, 8 * sizeof(*def_lru));
+    memcpy(lru[1], def_lru_565, 8 * sizeof(*def_lru));
+    memcpy(lru[2], def_lru_555, 8 * sizeof(*def_lru));
+}
 
-    if (!nslices || avctx->height % nslices) {
-        avpriv_request_sample(avctx, "%d slices for %dx%d", nslices,
-                              avctx->width, avctx->height);
-        return AVERROR_PATCHWELCOME;
-    }
+static int dx2_decode_slice_555(GetBitContext *gb, AVFrame *frame,
+                                int line, int left, uint8_t lru[3][8])
+{
+    return dx2_decode_slice_5x5(gb, frame, line, left, lru, 0);
+}
 
-    slice_height = avctx->height / nslices;
-    avctx->pix_fmt = AV_PIX_FMT_BGR24;
-    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
-        return ret;
+static int dx2_decode_slice_565(GetBitContext *gb, AVFrame *frame,
+                                int line, int left, uint8_t lru[3][8])
+{
+    return dx2_decode_slice_5x5(gb, frame, line, left, lru, 1);
+}
 
-    dst = pic->data[0];
-    for (slice = 0; slice < nslices; slice++) {
-        slice_size = bytestream2_get_le32(&gb);
-        if (slice_size > src_size - off) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "invalid slice size %"PRIu32" (only %"PRIu32" bytes left)\n",
-                   slice_size, src_size - off);
-            return AVERROR_INVALIDDATA;
-        }
-        if (slice_size <= 16) {
-            av_log(avctx, AV_LOG_ERROR, "invalid slice size %"PRIu32"\n",
-                   slice_size);
-            return AVERROR_INVALIDDATA;
-        }
+static int dxtory_decode_v2_565(AVCodecContext *avctx, AVFrame *pic,
+                                const uint8_t *src, int src_size, int is_565)
+{
+    enum AVPixelFormat fmt = AV_PIX_FMT_RGB24;
+    if (is_565)
+        return dxtory_decode_v2(avctx, pic, src, src_size,
+                                dx2_decode_slice_565,
+                                setup_lru_565,
+                                fmt);
+    else
+        return dxtory_decode_v2(avctx, pic, src, src_size,
+                                dx2_decode_slice_555,
+                                setup_lru_555,
+                                fmt);
+}
 
-        if (AV_RL32(src + off) != slice_size - 16) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Slice sizes mismatch: got %"PRIu32" instead of %"PRIu32"\n",
-                   AV_RL32(src + off), slice_size - 16);
+static int dx2_decode_slice_rgb(GetBitContext *gb, AVFrame *frame,
+                                int line, int left, uint8_t lru[3][8])
+{
+    int x, y;
+    int width    = frame->width;
+    int stride   = frame->linesize[0];
+    uint8_t *dst = frame->data[0] + stride * line;
+
+    for (y = 0; y < left && get_bits_left(gb) > 16; y++) {
+        for (x = 0; x < width; x++) {
+            dst[x * 3 + 0] = decode_sym(gb, lru[0]);
+            dst[x * 3 + 1] = decode_sym(gb, lru[1]);
+            dst[x * 3 + 2] = decode_sym(gb, lru[2]);
         }
-        if ((ret = init_get_bits8(&gb2, src + off + 16, slice_size - 16)) < 0)
-            return ret;
-        dx2_decode_slice_rgb(&gb2, avctx->width, slice_height, dst,
-                             pic->linesize[0]);
 
-        dst += pic->linesize[0] * slice_height;
-        off += slice_size;
+        dst += stride;
     }
 
-    return 0;
+    return y;
 }
 
-static int dx2_decode_slice_410(GetBitContext *gb, int width, int height,
-                                uint8_t *Y, uint8_t *U, uint8_t *V,
-                                int ystride, int ustride, int vstride)
+static void default_setup_lru(uint8_t lru[3][8])
 {
-    int x, y, i, j;
-    uint8_t lru[3][8];
+    int i;
 
     for (i = 0; i < 3; i++)
         memcpy(lru[i], def_lru, 8 * sizeof(*def_lru));
+}
+
+static int dxtory_decode_v2_rgb(AVCodecContext *avctx, AVFrame *pic,
+                                const uint8_t *src, int src_size)
+{
+    return dxtory_decode_v2(avctx, pic, src, src_size,
+                            dx2_decode_slice_rgb,
+                            default_setup_lru,
+                            AV_PIX_FMT_BGR24);
+}
+
+static int dx2_decode_slice_410(GetBitContext *gb, AVFrame *frame,
+                                int line, int left,
+                                uint8_t lru[3][8])
+{
+    int x, y, i, j;
+    int width   = frame->width;
+
+    int ystride = frame->linesize[0];
+    int ustride = frame->linesize[1];
+    int vstride = frame->linesize[2];
+
+    uint8_t *Y  = frame->data[0] + ystride * line;
+    uint8_t *U  = frame->data[1] + (ustride >> 2) * line;
+    uint8_t *V  = frame->data[2] + (vstride >> 2) * line;
 
-    for (y = 0; y < height; y += 4) {
+    for (y = 0; y < left - 3 && get_bits_left(gb) > 16; y += 4) {
         for (x = 0; x < width; x += 4) {
             for (j = 0; j < 4; j++)
                 for (i = 0; i < 4; i++)
@@ -404,95 +450,37 @@ static int dx2_decode_slice_410(GetBitContext *gb, int width, int height,
         V += vstride;
     }
 
-    return 0;
+    return y;
 }
 
+
 static int dxtory_decode_v2_410(AVCodecContext *avctx, AVFrame *pic,
                                 const uint8_t *src, int src_size)
 {
-    GetByteContext gb;
-    GetBitContext  gb2;
-    int nslices, slice, slice_height;
-    int cur_y, next_y;
-    uint32_t off, slice_size;
-    uint8_t *Y, *U, *V;
-    int ret;
-
-    bytestream2_init(&gb, src, src_size);
-    nslices = bytestream2_get_le16(&gb);
-    off = FFALIGN(nslices * 4 + 2, 16);
-    if (src_size < off) {
-        av_log(avctx, AV_LOG_ERROR, "no slice data\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    if (!nslices) {
-        avpriv_request_sample(avctx, "%d slices for %dx%d", nslices,
-                              avctx->width, avctx->height);
-        return AVERROR_PATCHWELCOME;
-    }
-
-    if ((avctx->width & 3) || (avctx->height & 3)) {
-        avpriv_request_sample(avctx, "Frame dimensions %dx%d",
-                              avctx->width, avctx->height);
-    }
-
-    avctx->pix_fmt = AV_PIX_FMT_YUV410P;
-    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
-        return ret;
-
-    Y = pic->data[0];
-    U = pic->data[1];
-    V = pic->data[2];
+    return dxtory_decode_v2(avctx, pic, src, src_size,
+                            dx2_decode_slice_410,
+                            default_setup_lru,
+                            AV_PIX_FMT_YUV410P);
+}
 
-    cur_y  = 0;
-    for (slice = 0; slice < nslices; slice++) {
-        slice_size   = bytestream2_get_le32(&gb);
-        next_y = ((slice + 1) * avctx->height) / nslices;
-        slice_height = (next_y & ~3) - (cur_y & ~3);
-        if (slice_size > src_size - off) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "invalid slice size %"PRIu32" (only %"PRIu32" bytes left)\n",
-                   slice_size, src_size - off);
-            return AVERROR_INVALIDDATA;
-        }
-        if (slice_size <= 16) {
-            av_log(avctx, AV_LOG_ERROR, "invalid slice size %"PRIu32"\n", slice_size);
-            return AVERROR_INVALIDDATA;
-        }
+static int dx2_decode_slice_420(GetBitContext *gb, AVFrame *frame,
+                                int line, int left,
+                                uint8_t lru[3][8])
+{
+    int x, y;
 
-        if (AV_RL32(src + off) != slice_size - 16) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Slice sizes mismatch: got %"PRIu32" instead of %"PRIu32"\n",
-                   AV_RL32(src + off), slice_size - 16);
-        }
-        if ((ret = init_get_bits8(&gb2, src + off + 16, slice_size - 16)) < 0)
-            return ret;
-        dx2_decode_slice_410(&gb2, avctx->width, slice_height, Y, U, V,
-                             pic->linesize[0], pic->linesize[1],
-                             pic->linesize[2]);
+    int width    = frame->width;
 
-        Y += pic->linesize[0] *  slice_height;
-        U += pic->linesize[1] * (slice_height >> 2);
-        V += pic->linesize[2] * (slice_height >> 2);
-        off += slice_size;
-        cur_y   = next_y;
-    }
+    int ystride = frame->linesize[0];
+    int ustride = frame->linesize[1];
+    int vstride = frame->linesize[2];
 
-    return 0;
-}
+    uint8_t *Y  = frame->data[0] + ystride * line;
+    uint8_t *U  = frame->data[1] + (ustride >> 1) * line;
+    uint8_t *V  = frame->data[2] + (vstride >> 1) * line;
 
-static int dx2_decode_slice_420(GetBitContext *gb, int width, int height,
-                                uint8_t *Y, uint8_t *U, uint8_t *V,
-                                int ystride, int ustride, int vstride)
-{
-    int x, y, i;
-    uint8_t lru[3][8];
 
-    for (i = 0; i < 3; i++)
-        memcpy(lru[i], def_lru, 8 * sizeof(*def_lru));
-
-    for (y = 0; y < height; y+=2) {
+    for (y = 0; y < left - 1 && get_bits_left(gb) > 16; y += 2) {
         for (x = 0; x < width; x += 2) {
             Y[x + 0 + 0 * ystride] = decode_sym(gb, lru[0]);
             Y[x + 1 + 0 * ystride] = decode_sym(gb, lru[0]);
@@ -507,95 +495,35 @@ static int dx2_decode_slice_420(GetBitContext *gb, int width, int height,
         V += vstride;
     }
 
-    return 0;
+    return y;
 }
 
 static int dxtory_decode_v2_420(AVCodecContext *avctx, AVFrame *pic,
                                 const uint8_t *src, int src_size)
 {
-    GetByteContext gb;
-    GetBitContext  gb2;
-    int nslices, slice, slice_height;
-    int cur_y, next_y;
-    uint32_t off, slice_size;
-    uint8_t *Y, *U, *V;
-    int ret;
-
-    bytestream2_init(&gb, src, src_size);
-    nslices = bytestream2_get_le16(&gb);
-    off = FFALIGN(nslices * 4 + 2, 16);
-    if (src_size < off) {
-        av_log(avctx, AV_LOG_ERROR, "no slice data\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    if (!nslices) {
-        avpriv_request_sample(avctx, "%d slices for %dx%d", nslices,
-                              avctx->width, avctx->height);
-        return AVERROR_PATCHWELCOME;
-    }
-
-    if ((avctx->width & 1) || (avctx->height & 1)) {
-        avpriv_request_sample(avctx, "Frame dimensions %dx%d",
-                              avctx->width, avctx->height);
-    }
-
-    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
-    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
-        return ret;
-
-    Y = pic->data[0];
-    U = pic->data[1];
-    V = pic->data[2];
-
-    cur_y  = 0;
-    for (slice = 0; slice < nslices; slice++) {
-        slice_size   = bytestream2_get_le32(&gb);
-        next_y = ((slice + 1) * avctx->height) / nslices;
-        slice_height = (next_y & ~1) - (cur_y & ~1);
-        if (slice_size > src_size - off) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "invalid slice size %"PRIu32" (only %"PRIu32" bytes left)\n",
-                   slice_size, src_size - off);
-            return AVERROR_INVALIDDATA;
-        }
-        if (slice_size <= 16) {
-            av_log(avctx, AV_LOG_ERROR, "invalid slice size %"PRIu32"\n", slice_size);
-            return AVERROR_INVALIDDATA;
-        }
-
-        if (AV_RL32(src + off) != slice_size - 16) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Slice sizes mismatch: got %"PRIu32" instead of %"PRIu32"\n",
-                   AV_RL32(src + off), slice_size - 16);
-        }
-        if ((ret = init_get_bits8(&gb2, src + off + 16, slice_size - 16)) < 0)
-            return ret;
-        dx2_decode_slice_420(&gb2, avctx->width, slice_height, Y, U, V,
-                             pic->linesize[0], pic->linesize[1],
-                             pic->linesize[2]);
-
-        Y += pic->linesize[0] *  slice_height;
-        U += pic->linesize[1] * (slice_height >> 1);
-        V += pic->linesize[2] * (slice_height >> 1);
-        off += slice_size;
-        cur_y   = next_y;
-    }
-
-    return 0;
+    return dxtory_decode_v2(avctx, pic, src, src_size,
+                            dx2_decode_slice_420,
+                            default_setup_lru,
+                            AV_PIX_FMT_YUV420P);
 }
 
-static int dx2_decode_slice_444(GetBitContext *gb, int width, int height,
-                                uint8_t *Y, uint8_t *U, uint8_t *V,
-                                int ystride, int ustride, int vstride)
+static int dx2_decode_slice_444(GetBitContext *gb, AVFrame *frame,
+                                int line, int left,
+                                uint8_t lru[3][8])
 {
-    int x, y, i;
-    uint8_t lru[3][8];
+    int x, y;
 
-    for (i = 0; i < 3; i++)
-        memcpy(lru[i], def_lru, 8 * sizeof(*def_lru));
+    int width   = frame->width;
+
+    int ystride = frame->linesize[0];
+    int ustride = frame->linesize[1];
+    int vstride = frame->linesize[2];
+
+    uint8_t *Y  = frame->data[0] + ystride * line;
+    uint8_t *U  = frame->data[1] + ustride * line;
+    uint8_t *V  = frame->data[2] + vstride * line;
 
-    for (y = 0; y < height; y++) {
+    for (y = 0; y < left && get_bits_left(gb) > 16; y++) {
         for (x = 0; x < width; x++) {
             Y[x] = decode_sym(gb, lru[0]);
             U[x] = decode_sym(gb, lru[1]) ^ 0x80;
@@ -607,74 +535,16 @@ static int dx2_decode_slice_444(GetBitContext *gb, int width, int height,
         V += vstride;
     }
 
-    return 0;
+    return y;
 }
 
 static int dxtory_decode_v2_444(AVCodecContext *avctx, AVFrame *pic,
                                 const uint8_t *src, int src_size)
 {
-    GetByteContext gb;
-    GetBitContext  gb2;
-    int nslices, slice, slice_height;
-    uint32_t off, slice_size;
-    uint8_t *Y, *U, *V;
-    int ret;
-
-    bytestream2_init(&gb, src, src_size);
-    nslices = bytestream2_get_le16(&gb);
-    off = FFALIGN(nslices * 4 + 2, 16);
-    if (src_size < off) {
-        av_log(avctx, AV_LOG_ERROR, "no slice data\n");
-        return AVERROR_INVALIDDATA;
-    }
-
-    if (!nslices || avctx->height % nslices) {
-        avpriv_request_sample(avctx, "%d slices for %dx%d", nslices,
-                              avctx->width, avctx->height);
-        return AVERROR_PATCHWELCOME;
-    }
-
-    slice_height = avctx->height / nslices;
-
-    avctx->pix_fmt = AV_PIX_FMT_YUV444P;
-    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
-        return ret;
-
-    Y = pic->data[0];
-    U = pic->data[1];
-    V = pic->data[2];
-
-    for (slice = 0; slice < nslices; slice++) {
-        slice_size = bytestream2_get_le32(&gb);
-        if (slice_size > src_size - off) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "invalid slice size %"PRIu32" (only %"PRIu32" bytes left)\n",
-                   slice_size, src_size - off);
-            return AVERROR_INVALIDDATA;
-        }
-        if (slice_size <= 16) {
-            av_log(avctx, AV_LOG_ERROR, "invalid slice size %"PRIu32"\n", slice_size);
-            return AVERROR_INVALIDDATA;
-        }
-
-        if (AV_RL32(src + off) != slice_size - 16) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Slice sizes mismatch: got %"PRIu32" instead of %"PRIu32"\n",
-                   AV_RL32(src + off), slice_size - 16);
-        }
-        if ((ret = init_get_bits8(&gb2, src + off + 16, slice_size - 16)) < 0)
-            return ret;
-        dx2_decode_slice_444(&gb2, avctx->width, slice_height, Y, U, V,
-                             pic->linesize[0], pic->linesize[1],
-                             pic->linesize[2]);
-
-        Y += pic->linesize[0] * slice_height;
-        U += pic->linesize[1] * slice_height;
-        V += pic->linesize[2] * slice_height;
-        off += slice_size;
-    }
-
-    return 0;
+    return dxtory_decode_v2(avctx, pic, src, src_size,
+                            dx2_decode_slice_444,
+                            default_setup_lru,
+                            AV_PIX_FMT_YUV444P);
 }
 
 static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
@@ -752,5 +622,5 @@ AVCodec ff_dxtory_decoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_DXTORY,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
new file mode 100644
index 00000000..05a9aadd
--- /dev/null
+++ b/libavcodec/dxv.c
@@ -0,0 +1,507 @@
+/*
+ * Resolume DXV decoder
+ * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/imgutils.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+#include "lzf.h"
+#include "texturedsp.h"
+#include "thread.h"
+
+typedef struct DXVContext {
+    TextureDSPContext texdsp;
+    GetByteContext gbc;
+
+    uint8_t *tex_data;  // Compressed texture
+    int tex_rat;        // Compression ratio
+    int tex_step;       // Distance between blocks
+    int64_t tex_size;   // Texture size
+
+    /* Optimal number of slices for parallel decoding */
+    int slice_count;
+
+    /* Pointer to the selected decompression function */
+    int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+} DXVContext;
+
+static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
+                                     int slice, int thread_nb)
+{
+    DXVContext *ctx = avctx->priv_data;
+    AVFrame *frame = arg;
+    const uint8_t *d = ctx->tex_data;
+    int w_block = avctx->coded_width / TEXTURE_BLOCK_W;
+    int h_block = avctx->coded_height / TEXTURE_BLOCK_H;
+    int x, y;
+    int start_slice, end_slice;
+    int base_blocks_per_slice = h_block / ctx->slice_count;
+    int remainder_blocks = h_block % ctx->slice_count;
+
+    /* When the frame height (in blocks) doesn't divide evenly between the
+     * number of slices, spread the remaining blocks evenly between the first
+     * operations */
+    start_slice = slice * base_blocks_per_slice;
+    /* Add any extra blocks (one per slice) that have been added
+     * before this slice */
+    start_slice += FFMIN(slice, remainder_blocks);
+
+    end_slice = start_slice + base_blocks_per_slice;
+    /* Add an extra block if there are remainder blocks to be accounted for */
+    if (slice < remainder_blocks)
+        end_slice++;
+
+    for (y = start_slice; y < end_slice; y++) {
+        uint8_t *p = frame->data[0] + y * frame->linesize[0] * TEXTURE_BLOCK_H;
+        int off  = y * w_block;
+        for (x = 0; x < w_block; x++) {
+            ctx->tex_funct(p + x * 16, frame->linesize[0],
+                           d + (off + x) * ctx->tex_step);
+        }
+    }
+
+    return 0;
+}
+
+/* This scheme addresses already decoded elements depending on 2-bit status:
+ *   0 -> copy new element
+ *   1 -> copy one element from position -x
+ *   2 -> copy one element from position -(get_byte() + 2) * x
+ *   3 -> copy one element from position -(get_16le() + 0x102) * x
+ * x is always 2 for dxt1 and 4 for dxt5. */
+#define CHECKPOINT(x)                                                         \
+    do {                                                                      \
+        if (state == 0) {                                                     \
+            value = bytestream2_get_le32(gbc);                                \
+            state = 16;                                                       \
+        }                                                                     \
+        op = value & 0x3;                                                     \
+        value >>= 2;                                                          \
+        state--;                                                              \
+        switch (op) {                                                         \
+        case 1:                                                               \
+            idx = x;                                                          \
+            break;                                                            \
+        case 2:                                                               \
+            idx = (bytestream2_get_byte(gbc) + 2) * x;                        \
+            if (idx > pos) {                                                  \
+                av_log(avctx, AV_LOG_ERROR, "idx %d > %d\n", idx, pos);       \
+                return AVERROR_INVALIDDATA;                                   \
+            }                                                                 \
+            break;                                                            \
+        case 3:                                                               \
+            idx = (bytestream2_get_le16(gbc) + 0x102) * x;                    \
+            if (idx > pos) {                                                  \
+                av_log(avctx, AV_LOG_ERROR, "idx %d > %d\n", idx, pos);       \
+                return AVERROR_INVALIDDATA;                                   \
+            }                                                                 \
+            break;                                                            \
+        }                                                                     \
+    } while(0)
+
+static int dxv_decompress_dxt1(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+    GetByteContext *gbc = &ctx->gbc;
+    uint32_t value, prev, op;
+    int idx = 0, state = 0;
+    int pos = 2;
+
+    /* Copy the first two elements */
+    AV_WL32(ctx->tex_data, bytestream2_get_le32(gbc));
+    AV_WL32(ctx->tex_data + 4, bytestream2_get_le32(gbc));
+
+    /* Process input until the whole texture has been filled */
+    while (pos < ctx->tex_size / 4) {
+        CHECKPOINT(2);
+
+        /* Copy two elements from a previous offset or from the input buffer */
+        if (op) {
+            prev = AV_RL32(ctx->tex_data + 4 * (pos - idx));
+            AV_WL32(ctx->tex_data + 4 * pos, prev);
+            pos++;
+
+            prev = AV_RL32(ctx->tex_data + 4 * (pos - idx));
+            AV_WL32(ctx->tex_data + 4 * pos, prev);
+            pos++;
+        } else {
+            CHECKPOINT(2);
+
+            if (op)
+                prev = AV_RL32(ctx->tex_data + 4 * (pos - idx));
+            else
+                prev = bytestream2_get_le32(gbc);
+            AV_WL32(ctx->tex_data + 4 * pos, prev);
+            pos++;
+
+            CHECKPOINT(2);
+
+            if (op)
+                prev = AV_RL32(ctx->tex_data + 4 * (pos - idx));
+            else
+                prev = bytestream2_get_le32(gbc);
+            AV_WL32(ctx->tex_data + 4 * pos, prev);
+            pos++;
+        }
+    }
+
+    return 0;
+}
+
+static int dxv_decompress_dxt5(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+    GetByteContext *gbc = &ctx->gbc;
+    uint32_t value, op;
+    int idx, prev, state = 0;
+    int pos = 4;
+    int run = 0;
+    int probe, check;
+
+    /* Copy the first four elements */
+    AV_WL32(ctx->tex_data +  0, bytestream2_get_le32(gbc));
+    AV_WL32(ctx->tex_data +  4, bytestream2_get_le32(gbc));
+    AV_WL32(ctx->tex_data +  8, bytestream2_get_le32(gbc));
+    AV_WL32(ctx->tex_data + 12, bytestream2_get_le32(gbc));
+
+    /* Process input until the whole texture has been filled */
+    while (pos < ctx->tex_size / 4) {
+        if (run) {
+            run--;
+
+            prev = AV_RL32(ctx->tex_data + 4 * (pos - 4));
+            AV_WL32(ctx->tex_data + 4 * pos, prev);
+            pos++;
+            prev = AV_RL32(ctx->tex_data + 4 * (pos - 4));
+            AV_WL32(ctx->tex_data + 4 * pos, prev);
+            pos++;
+        } else {
+            if (state == 0) {
+                value = bytestream2_get_le32(gbc);
+                state = 16;
+            }
+            op = value & 0x3;
+            value >>= 2;
+            state--;
+
+            switch (op) {
+            case 0:
+                /* Long copy */
+                check = bytestream2_get_byte(gbc) + 1;
+                if (check == 256) {
+                    do {
+                        probe = bytestream2_get_le16(gbc);
+                        check += probe;
+                    } while (probe == 0xFFFF);
+                }
+                while (check && pos < ctx->tex_size / 4) {
+                    prev = AV_RL32(ctx->tex_data + 4 * (pos - 4));
+                    AV_WL32(ctx->tex_data + 4 * pos, prev);
+                    pos++;
+
+                    prev = AV_RL32(ctx->tex_data + 4 * (pos - 4));
+                    AV_WL32(ctx->tex_data + 4 * pos, prev);
+                    pos++;
+
+                    prev = AV_RL32(ctx->tex_data + 4 * (pos - 4));
+                    AV_WL32(ctx->tex_data + 4 * pos, prev);
+                    pos++;
+
+                    prev = AV_RL32(ctx->tex_data + 4 * (pos - 4));
+                    AV_WL32(ctx->tex_data + 4 * pos, prev);
+                    pos++;
+
+                    check--;
+                }
+
+                /* Restart (or exit) the loop */
+                continue;
+                break;
+            case 1:
+                /* Load new run value */
+                run = bytestream2_get_byte(gbc);
+                if (run == 255) {
+                    do {
+                        probe = bytestream2_get_le16(gbc);
+                        run += probe;
+                    } while (probe == 0xFFFF);
+                }
+
+                /* Copy two dwords from previous data */
+                prev = AV_RL32(ctx->tex_data + 4 * (pos - 4));
+                AV_WL32(ctx->tex_data + 4 * pos, prev);
+                pos++;
+
+                prev = AV_RL32(ctx->tex_data + 4 * (pos - 4));
+                AV_WL32(ctx->tex_data + 4 * pos, prev);
+                pos++;
+                break;
+            case 2:
+                /* Copy two dwords from a previous index */
+                idx = 8 + bytestream2_get_le16(gbc);
+                if (idx > pos) {
+                    av_log(avctx, AV_LOG_ERROR, "idx %d > %d\n", idx, pos);
+                    return AVERROR_INVALIDDATA;
+                }
+                prev = AV_RL32(ctx->tex_data + 4 * (pos - idx));
+                AV_WL32(ctx->tex_data + 4 * pos, prev);
+                pos++;
+
+                prev = AV_RL32(ctx->tex_data + 4 * (pos - idx));
+                AV_WL32(ctx->tex_data + 4 * pos, prev);
+                pos++;
+                break;
+            case 3:
+                /* Copy two dwords from input */
+                prev = bytestream2_get_le32(gbc);
+                AV_WL32(ctx->tex_data + 4 * pos, prev);
+                pos++;
+
+                prev = bytestream2_get_le32(gbc);
+                AV_WL32(ctx->tex_data + 4 * pos, prev);
+                pos++;
+                break;
+            }
+        }
+
+        CHECKPOINT(4);
+
+        /* Copy two elements from a previous offset or from the input buffer */
+        if (op) {
+            prev = AV_RL32(ctx->tex_data + 4 * (pos - idx));
+            AV_WL32(ctx->tex_data + 4 * pos, prev);
+            pos++;
+
+            prev = AV_RL32(ctx->tex_data + 4 * (pos - idx));
+            AV_WL32(ctx->tex_data + 4 * pos, prev);
+            pos++;
+        } else {
+            CHECKPOINT(4);
+
+            if (op)
+                prev = AV_RL32(ctx->tex_data + 4 * (pos - idx));
+            else
+                prev = bytestream2_get_le32(gbc);
+            AV_WL32(ctx->tex_data + 4 * pos, prev);
+            pos++;
+
+            CHECKPOINT(4);
+
+            if (op)
+                prev = AV_RL32(ctx->tex_data + 4 * (pos - idx));
+            else
+                prev = bytestream2_get_le32(gbc);
+            AV_WL32(ctx->tex_data + 4 * pos, prev);
+            pos++;
+        }
+    }
+
+    return 0;
+}
+
+static int dxv_decompress_lzf(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+    return ff_lzf_uncompress(&ctx->gbc, &ctx->tex_data, &ctx->tex_size);
+}
+
+static int dxv_decompress_raw(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+    GetByteContext *gbc = &ctx->gbc;
+
+    bytestream2_get_buffer(gbc, ctx->tex_data, ctx->tex_size);
+    return 0;
+}
+
+static int dxv_decode(AVCodecContext *avctx, void *data,
+                      int *got_frame, AVPacket *avpkt)
+{
+    DXVContext *ctx = avctx->priv_data;
+    ThreadFrame tframe;
+    GetByteContext *gbc = &ctx->gbc;
+    int (*decompress_tex)(AVCodecContext *avctx);
+    const char *msgcomp, *msgtext;
+    uint32_t tag;
+    int version_major, version_minor = 0;
+    int size = 0, old_type = 0;
+    int ret;
+
+    bytestream2_init(gbc, avpkt->data, avpkt->size);
+
+    tag = bytestream2_get_le32(gbc);
+    switch (tag) {
+    case MKBETAG('D', 'X', 'T', '1'):
+        decompress_tex = dxv_decompress_dxt1;
+        ctx->tex_funct = ctx->texdsp.dxt1_block;
+        ctx->tex_rat   = 8;
+        ctx->tex_step  = 8;
+        msgcomp = "DXTR1";
+        msgtext = "DXT1";
+        break;
+    case MKBETAG('D', 'X', 'T', '5'):
+        decompress_tex = dxv_decompress_dxt5;
+        ctx->tex_funct = ctx->texdsp.dxt5_block;
+        ctx->tex_rat   = 4;
+        ctx->tex_step  = 16;
+        msgcomp = "DXTR5";
+        msgtext = "DXT5";
+        break;
+    case MKBETAG('Y', 'C', 'G', '6'):
+    case MKBETAG('Y', 'G', '1', '0'):
+        avpriv_report_missing_feature(avctx, "Tag 0x%08X", tag);
+        return AVERROR_PATCHWELCOME;
+    default:
+        /* Old version does not have a real header, just size and type. */
+        size = tag & 0x00FFFFFF;
+        old_type = tag >> 24;
+        version_major = (old_type & 0x0F) - 1;
+
+        if (old_type & 0x80) {
+            msgcomp = "RAW";
+            decompress_tex = dxv_decompress_raw;
+        } else {
+            msgcomp = "LZF";
+            decompress_tex = dxv_decompress_lzf;
+        }
+
+        if (old_type & 0x40) {
+            msgtext = "DXT5";
+
+            ctx->tex_funct = ctx->texdsp.dxt5_block;
+            ctx->tex_step  = 16;
+        } else if (old_type & 0x20 || version_major == 1) {
+            msgtext = "DXT1";
+
+            ctx->tex_funct = ctx->texdsp.dxt1_block;
+            ctx->tex_step  = 8;
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported header (0x%08X)\n.", tag);
+            return AVERROR_INVALIDDATA;
+        }
+        ctx->tex_rat = 1;
+        break;
+    }
+
+    /* New header is 12 bytes long. */
+    if (!old_type) {
+        version_major = bytestream2_get_byte(gbc) - 1;
+        version_minor = bytestream2_get_byte(gbc);
+
+        /* Encoder copies texture data when compression is not advantageous. */
+        if (bytestream2_get_byte(gbc)) {
+            msgcomp = "RAW";
+            ctx->tex_rat = 1;
+            decompress_tex = dxv_decompress_raw;
+        }
+
+        bytestream2_skip(gbc, 1); // unknown
+        size = bytestream2_get_le32(gbc);
+    }
+    av_log(avctx, AV_LOG_DEBUG,
+           "%s compression with %s texture (version %d.%d)\n",
+           msgcomp, msgtext, version_major, version_minor);
+
+    if (size != bytestream2_get_bytes_left(gbc)) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Incomplete or invalid file (header %d, left %d).\n",
+               size, bytestream2_get_bytes_left(gbc));
+        return AVERROR_INVALIDDATA;
+    }
+
+    ctx->tex_size = avctx->coded_width * avctx->coded_height * 4 / ctx->tex_rat;
+    ret = av_reallocp(&ctx->tex_data, ctx->tex_size);
+    if (ret < 0)
+        return ret;
+
+    /* Decompress texture out of the intermediate compression. */
+    ret = decompress_tex(avctx);
+    if (ret < 0)
+        return ret;
+
+    tframe.f = data;
+    ret = ff_thread_get_buffer(avctx, &tframe, 0);
+    if (ret < 0)
+        return ret;
+
+    /* Now decompress the texture with the standard functions. */
+    avctx->execute2(avctx, decompress_texture_thread,
+                    tframe.f, NULL, ctx->slice_count);
+
+    /* Frame is ready to be output. */
+    tframe.f->pict_type = AV_PICTURE_TYPE_I;
+    tframe.f->key_frame = 1;
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+static int dxv_init(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+    int ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
+
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid image size %dx%d.\n",
+               avctx->width, avctx->height);
+        return ret;
+    }
+
+    /* Codec requires 16x16 alignment. */
+    avctx->coded_width  = FFALIGN(avctx->width,  16);
+    avctx->coded_height = FFALIGN(avctx->height, 16);
+
+    ff_texturedsp_init(&ctx->texdsp);
+    avctx->pix_fmt = AV_PIX_FMT_RGBA;
+
+    ctx->slice_count = av_clip(avctx->thread_count, 1,
+                               avctx->coded_height / TEXTURE_BLOCK_H);
+
+    return 0;
+}
+
+static int dxv_close(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+
+    av_freep(&ctx->tex_data);
+
+    return 0;
+}
+
+AVCodec ff_dxv_decoder = {
+    .name           = "dxv",
+    .long_name      = NULL_IF_CONFIG_SMALL("Resolume DXV"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_DXV,
+    .init           = dxv_init,
+    .decode         = dxv_decode,
+    .close          = dxv_close,
+    .priv_data_size = sizeof(DXVContext),
+    .capabilities   = AV_CODEC_CAP_DR1 |
+                      AV_CODEC_CAP_SLICE_THREADS |
+                      AV_CODEC_CAP_FRAME_THREADS,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
+                      FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index f31c5e26..2cf57ad6 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c
@@ -27,7 +27,6 @@
 #include "libavutil/time.h"
 
 #include "avcodec.h"
-#include "mpegvideo.h"
 #include "dxva2_internal.h"
 
 void *ff_dxva2_get_surface(const AVFrame *frame)
@@ -145,10 +144,13 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
 
     do {
 #if CONFIG_D3D11VA
-        if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+        if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+            if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
+                WaitForSingleObjectEx(D3D11VA_CONTEXT(ctx)->context_mutex, INFINITE, FALSE);
             hr = ID3D11VideoContext_DecoderBeginFrame(D3D11VA_CONTEXT(ctx)->video_context, D3D11VA_CONTEXT(ctx)->decoder,
                                                       ff_dxva2_get_surface(frame),
                                                       0, NULL);
+        }
 #endif
 #if CONFIG_DXVA2
         if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
@@ -162,6 +164,11 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
 
     if (FAILED(hr)) {
         av_log(avctx, AV_LOG_ERROR, "Failed to begin frame: 0x%lx\n", hr);
+#if CONFIG_D3D11VA
+        if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+            if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
+                ReleaseMutex(D3D11VA_CONTEXT(ctx)->context_mutex);
+#endif
         return -1;
     }
 
@@ -261,8 +268,11 @@ int ff_dxva2_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
 
 end:
 #if CONFIG_D3D11VA
-    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
         hr = ID3D11VideoContext_DecoderEndFrame(D3D11VA_CONTEXT(ctx)->video_context, D3D11VA_CONTEXT(ctx)->decoder);
+        if (D3D11VA_CONTEXT(ctx)->context_mutex != INVALID_HANDLE_VALUE)
+            ReleaseMutex(D3D11VA_CONTEXT(ctx)->context_mutex);
+    }
 #endif
 #if CONFIG_DXVA2
     if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h
index be246d71..22c93992 100644
--- a/libavcodec/dxva2.h
+++ b/libavcodec/dxva2.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_DXVA_H
-#define AVCODEC_DXVA_H
+#ifndef AVCODEC_DXVA2_H
+#define AVCODEC_DXVA2_H
 
 /**
  * @file
@@ -29,9 +29,9 @@
  * Public libavcodec DXVA2 header.
  */
 
-#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0600
+#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x0602
 #undef _WIN32_WINNT
-#define _WIN32_WINNT 0x0600
+#define _WIN32_WINNT 0x0602
 #endif
 
 #include <stdint.h>
@@ -90,4 +90,4 @@ struct dxva_context {
  * @}
  */
 
-#endif /* AVCODEC_DXVA_H */
+#endif /* AVCODEC_DXVA2_H */
diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
index 99b80ba3..61cce3ae 100644
--- a/libavcodec/dxva2_h264.c
+++ b/libavcodec/dxva2_h264.c
@@ -20,11 +20,15 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "dxva2_internal.h"
 #include "h264.h"
 #include "h264data.h"
 #include "mpegutils.h"
 
+// The headers above may include w32threads.h, which uses the original
+// _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
+// potentially newer version.
+#include "dxva2_internal.h"
+
 struct dxva2_picture_context {
     DXVA_PicParams_H264   pp;
     DXVA_Qmatrix_H264     qm;
@@ -248,7 +252,7 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
                 else
                     index = get_refpic_index(pp, ff_dxva2_get_surface_index(avctx, ctx, r->f));
                 fill_picture_entry(&slice->RefPicList[list][i], index,
-                                   r->reference == PICT_BOTTOM_FIELD);
+                                   sl->ref_list[list][i].reference == PICT_BOTTOM_FIELD);
                 for (plane = 0; plane < 3; plane++) {
                     int w, o;
                     if (plane == 0 && sl->luma_weight_flag[list]) {
diff --git a/libavcodec/dxva2_hevc.c b/libavcodec/dxva2_hevc.c
index 5f5134b6..5a312eaf 100644
--- a/libavcodec/dxva2_hevc.c
+++ b/libavcodec/dxva2_hevc.c
@@ -22,9 +22,13 @@
 
 #include "libavutil/avassert.h"
 
-#include "dxva2_internal.h"
 #include "hevc.h"
 
+// The headers above may include w32threads.h, which uses the original
+// _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
+// potentially newer version.
+#include "dxva2_internal.h"
+
 #define MAX_SLICES 256
 
 struct hevc_dxva2_picture_context {
@@ -57,102 +61,104 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *
                                     DXVA_PicParams_HEVC *pp)
 {
     const HEVCFrame *current_picture = h->ref;
+    const HEVCSPS *sps = h->ps.sps;
+    const HEVCPPS *pps = h->ps.pps;
     int i, j;
 
     memset(pp, 0, sizeof(*pp));
 
-    pp->PicWidthInMinCbsY  = h->sps->min_cb_width;
-    pp->PicHeightInMinCbsY = h->sps->min_cb_height;
+    pp->PicWidthInMinCbsY  = sps->min_cb_width;
+    pp->PicHeightInMinCbsY = sps->min_cb_height;
 
-    pp->wFormatAndSequenceInfoFlags = (h->sps->chroma_format_idc          <<  0) |
-                                      (h->sps->separate_colour_plane_flag <<  2) |
-                                      ((h->sps->bit_depth - 8)            <<  3) |
-                                      ((h->sps->bit_depth - 8)            <<  6) |
-                                      ((h->sps->log2_max_poc_lsb - 4)     <<  9) |
+    pp->wFormatAndSequenceInfoFlags = (sps->chroma_format_idc             <<  0) |
+                                      (sps->separate_colour_plane_flag    <<  2) |
+                                      ((sps->bit_depth - 8)               <<  3) |
+                                      ((sps->bit_depth - 8)               <<  6) |
+                                      ((sps->log2_max_poc_lsb - 4)        <<  9) |
                                       (0                                  << 13) |
                                       (0                                  << 14) |
                                       (0                                  << 15);
 
     fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, current_picture->frame), 0);
 
-    pp->sps_max_dec_pic_buffering_minus1         = h->sps->temporal_layer[h->sps->max_sub_layers - 1].max_dec_pic_buffering - 1;
-    pp->log2_min_luma_coding_block_size_minus3   = h->sps->log2_min_cb_size - 3;
-    pp->log2_diff_max_min_luma_coding_block_size = h->sps->log2_diff_max_min_coding_block_size;
-    pp->log2_min_transform_block_size_minus2     = h->sps->log2_min_tb_size - 2;
-    pp->log2_diff_max_min_transform_block_size   = h->sps->log2_max_trafo_size  - h->sps->log2_min_tb_size;
-    pp->max_transform_hierarchy_depth_inter      = h->sps->max_transform_hierarchy_depth_inter;
-    pp->max_transform_hierarchy_depth_intra      = h->sps->max_transform_hierarchy_depth_intra;
-    pp->num_short_term_ref_pic_sets              = h->sps->nb_st_rps;
-    pp->num_long_term_ref_pics_sps               = h->sps->num_long_term_ref_pics_sps;
+    pp->sps_max_dec_pic_buffering_minus1         = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1;
+    pp->log2_min_luma_coding_block_size_minus3   = sps->log2_min_cb_size - 3;
+    pp->log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size;
+    pp->log2_min_transform_block_size_minus2     = sps->log2_min_tb_size - 2;
+    pp->log2_diff_max_min_transform_block_size   = sps->log2_max_trafo_size  - sps->log2_min_tb_size;
+    pp->max_transform_hierarchy_depth_inter      = sps->max_transform_hierarchy_depth_inter;
+    pp->max_transform_hierarchy_depth_intra      = sps->max_transform_hierarchy_depth_intra;
+    pp->num_short_term_ref_pic_sets              = sps->nb_st_rps;
+    pp->num_long_term_ref_pics_sps               = sps->num_long_term_ref_pics_sps;
 
-    pp->num_ref_idx_l0_default_active_minus1     = h->pps->num_ref_idx_l0_default_active - 1;
-    pp->num_ref_idx_l1_default_active_minus1     = h->pps->num_ref_idx_l1_default_active - 1;
-    pp->init_qp_minus26                          = h->pps->pic_init_qp_minus26;
+    pp->num_ref_idx_l0_default_active_minus1     = pps->num_ref_idx_l0_default_active - 1;
+    pp->num_ref_idx_l1_default_active_minus1     = pps->num_ref_idx_l1_default_active - 1;
+    pp->init_qp_minus26                          = pps->pic_init_qp_minus26;
 
     if (h->sh.short_term_ref_pic_set_sps_flag == 0 && h->sh.short_term_rps) {
-        pp->ucNumDeltaPocsOfRefRpsIdx            = h->sh.short_term_rps->num_delta_pocs;
+        pp->ucNumDeltaPocsOfRefRpsIdx            = h->sh.short_term_rps->rps_idx_num_delta_pocs;
         pp->wNumBitsForShortTermRPSInSlice       = h->sh.short_term_ref_pic_set_size;
     }
 
-    pp->dwCodingParamToolFlags = (h->sps->scaling_list_enable_flag               <<  0) |
-                                 (h->sps->amp_enabled_flag                       <<  1) |
-                                 (h->sps->sao_enabled                            <<  2) |
-                                 (h->sps->pcm_enabled_flag                       <<  3) |
-                                 ((h->sps->pcm_enabled_flag ? (h->sps->pcm.bit_depth - 1) : 0)            <<  4) |
-                                 ((h->sps->pcm_enabled_flag ? (h->sps->pcm.bit_depth_chroma - 1) : 0)     <<  8) |
-                                 ((h->sps->pcm_enabled_flag ? (h->sps->pcm.log2_min_pcm_cb_size - 3) : 0) << 12) |
-                                 ((h->sps->pcm_enabled_flag ? (h->sps->pcm.log2_max_pcm_cb_size - h->sps->pcm.log2_min_pcm_cb_size) : 0) << 14) |
-                                 (h->sps->pcm.loop_filter_disable_flag           << 16) |
-                                 (h->sps->long_term_ref_pics_present_flag        << 17) |
-                                 (h->sps->sps_temporal_mvp_enabled_flag          << 18) |
-                                 (h->sps->sps_strong_intra_smoothing_enable_flag << 19) |
-                                 (h->pps->dependent_slice_segments_enabled_flag  << 20) |
-                                 (h->pps->output_flag_present_flag               << 21) |
-                                 (h->pps->num_extra_slice_header_bits            << 22) |
-                                 (h->pps->sign_data_hiding_flag                  << 25) |
-                                 (h->pps->cabac_init_present_flag                << 26) |
+    pp->dwCodingParamToolFlags = (sps->scaling_list_enable_flag                  <<  0) |
+                                 (sps->amp_enabled_flag                          <<  1) |
+                                 (sps->sao_enabled                               <<  2) |
+                                 (sps->pcm_enabled_flag                          <<  3) |
+                                 ((sps->pcm_enabled_flag ? (sps->pcm.bit_depth - 1) : 0)            <<  4) |
+                                 ((sps->pcm_enabled_flag ? (sps->pcm.bit_depth_chroma - 1) : 0)     <<  8) |
+                                 ((sps->pcm_enabled_flag ? (sps->pcm.log2_min_pcm_cb_size - 3) : 0) << 12) |
+                                 ((sps->pcm_enabled_flag ? (sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size) : 0) << 14) |
+                                 (sps->pcm.loop_filter_disable_flag              << 16) |
+                                 (sps->long_term_ref_pics_present_flag           << 17) |
+                                 (sps->sps_temporal_mvp_enabled_flag             << 18) |
+                                 (sps->sps_strong_intra_smoothing_enable_flag    << 19) |
+                                 (pps->dependent_slice_segments_enabled_flag     << 20) |
+                                 (pps->output_flag_present_flag                  << 21) |
+                                 (pps->num_extra_slice_header_bits               << 22) |
+                                 (pps->sign_data_hiding_flag                     << 25) |
+                                 (pps->cabac_init_present_flag                   << 26) |
                                  (0                                              << 27);
 
-    pp->dwCodingSettingPicturePropertyFlags = (h->pps->constrained_intra_pred_flag                <<  0) |
-                                              (h->pps->transform_skip_enabled_flag                <<  1) |
-                                              (h->pps->cu_qp_delta_enabled_flag                   <<  2) |
-                                              (h->pps->pic_slice_level_chroma_qp_offsets_present_flag <<  3) |
-                                              (h->pps->weighted_pred_flag                         <<  4) |
-                                              (h->pps->weighted_bipred_flag                       <<  5) |
-                                              (h->pps->transquant_bypass_enable_flag              <<  6) |
-                                              (h->pps->tiles_enabled_flag                         <<  7) |
-                                              (h->pps->entropy_coding_sync_enabled_flag           <<  8) |
-                                              (h->pps->uniform_spacing_flag                       <<  9) |
-                                              ((h->pps->tiles_enabled_flag ? h->pps->loop_filter_across_tiles_enabled_flag : 0) << 10) |
-                                              (h->pps->seq_loop_filter_across_slices_enabled_flag << 11) |
-                                              (h->pps->deblocking_filter_override_enabled_flag    << 12) |
-                                              (h->pps->disable_dbf                                << 13) |
-                                              (h->pps->lists_modification_present_flag            << 14) |
-                                              (h->pps->slice_header_extension_present_flag        << 15) |
+    pp->dwCodingSettingPicturePropertyFlags = (pps->constrained_intra_pred_flag                   <<  0) |
+                                              (pps->transform_skip_enabled_flag                   <<  1) |
+                                              (pps->cu_qp_delta_enabled_flag                      <<  2) |
+                                              (pps->pic_slice_level_chroma_qp_offsets_present_flag <<  3) |
+                                              (pps->weighted_pred_flag                            <<  4) |
+                                              (pps->weighted_bipred_flag                          <<  5) |
+                                              (pps->transquant_bypass_enable_flag                 <<  6) |
+                                              (pps->tiles_enabled_flag                            <<  7) |
+                                              (pps->entropy_coding_sync_enabled_flag              <<  8) |
+                                              (pps->uniform_spacing_flag                          <<  9) |
+                                              ((pps->tiles_enabled_flag ? pps->loop_filter_across_tiles_enabled_flag : 0) << 10) |
+                                              (pps->seq_loop_filter_across_slices_enabled_flag    << 11) |
+                                              (pps->deblocking_filter_override_enabled_flag       << 12) |
+                                              (pps->disable_dbf                                   << 13) |
+                                              (pps->lists_modification_present_flag               << 14) |
+                                              (pps->slice_header_extension_present_flag           << 15) |
                                               (IS_IRAP(h)                                         << 16) |
                                               (IS_IDR(h)                                          << 17) |
                                               /* IntraPicFlag */
                                               (IS_IRAP(h)                                         << 18) |
                                               (0                                                  << 19);
-    pp->pps_cb_qp_offset            = h->pps->cb_qp_offset;
-    pp->pps_cr_qp_offset            = h->pps->cr_qp_offset;
-    if (h->pps->tiles_enabled_flag) {
-        pp->num_tile_columns_minus1 = h->pps->num_tile_columns - 1;
-        pp->num_tile_rows_minus1    = h->pps->num_tile_rows - 1;
-
-        if (!h->pps->uniform_spacing_flag) {
-            for (i = 0; i < h->pps->num_tile_columns; i++)
-                pp->column_width_minus1[i] = h->pps->column_width[i] - 1;
-
-            for (i = 0; i < h->pps->num_tile_rows; i++)
-                pp->row_height_minus1[i] = h->pps->row_height[i] - 1;
+    pp->pps_cb_qp_offset            = pps->cb_qp_offset;
+    pp->pps_cr_qp_offset            = pps->cr_qp_offset;
+    if (pps->tiles_enabled_flag) {
+        pp->num_tile_columns_minus1 = pps->num_tile_columns - 1;
+        pp->num_tile_rows_minus1    = pps->num_tile_rows - 1;
+
+        if (!pps->uniform_spacing_flag) {
+            for (i = 0; i < pps->num_tile_columns; i++)
+                pp->column_width_minus1[i] = pps->column_width[i] - 1;
+
+            for (i = 0; i < pps->num_tile_rows; i++)
+                pp->row_height_minus1[i] = pps->row_height[i] - 1;
         }
     }
 
-    pp->diff_cu_qp_delta_depth           = h->pps->diff_cu_qp_delta_depth;
-    pp->pps_beta_offset_div2             = h->pps->beta_offset / 2;
-    pp->pps_tc_offset_div2               = h->pps->tc_offset / 2;
-    pp->log2_parallel_merge_level_minus2 = h->pps->log2_parallel_merge_level - 2;
+    pp->diff_cu_qp_delta_depth           = pps->diff_cu_qp_delta_depth;
+    pp->pps_beta_offset_div2             = pps->beta_offset / 2;
+    pp->pps_tc_offset_div2               = pps->tc_offset / 2;
+    pp->log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2;
     pp->CurrPicOrderCntVal               = h->poc;
 
     // fill RefPicList from the DPB
@@ -197,8 +203,8 @@ static void fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *
 static void fill_scaling_lists(AVDXVAContext *ctx, const HEVCContext *h, DXVA_Qmatrix_HEVC *qm)
 {
     unsigned i, j, pos;
-    const ScalingList *sl = h->pps->scaling_list_data_present_flag ?
-                            &h->pps->scaling_list : &h->sps->scaling_list;
+    const ScalingList *sl = h->ps.pps->scaling_list_data_present_flag ?
+                            &h->ps.pps->scaling_list : &h->ps.sps->scaling_list;
 
     memset(qm, 0, sizeof(*qm));
     for (i = 0; i < 6; i++) {
diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h
index 426de9dc..ad89f829 100644
--- a/libavcodec/dxva2_internal.h
+++ b/libavcodec/dxva2_internal.h
@@ -20,13 +20,16 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_DXVA_INTERNAL_H
-#define AVCODEC_DXVA_INTERNAL_H
+#ifndef AVCODEC_DXVA2_INTERNAL_H
+#define AVCODEC_DXVA2_INTERNAL_H
 
 #define COBJMACROS
 
 #include "config.h"
 
+/* define the proper COM entries before forcing desktop APIs */
+#include <objbase.h>
+
 #if CONFIG_DXVA2
 #include "dxva2.h"
 #endif
@@ -35,11 +38,16 @@
 #endif
 
 #if HAVE_DXVA_H
+/* When targeting WINAPI_FAMILY_PHONE_APP or WINAPI_FAMILY_APP, dxva.h
+ * defines nothing. Force the struct definitions to be visible. */
+#undef WINAPI_FAMILY
+#define WINAPI_FAMILY WINAPI_FAMILY_DESKTOP_APP
+#undef _CRT_BUILD_DESKTOP_APP
+#define _CRT_BUILD_DESKTOP_APP 0
 #include <dxva.h>
 #endif
 
 #include "avcodec.h"
-#include "mpegvideo.h"
 
 typedef void DECODER_BUFFER_DESC;
 
@@ -110,4 +118,4 @@ int ff_dxva2_common_end_frame(AVCodecContext *, AVFrame *,
                                                   DECODER_BUFFER_DESC *bs,
                                                   DECODER_BUFFER_DESC *slice));
 
-#endif /* AVCODEC_DXVA_INTERNAL_H */
+#endif /* AVCODEC_DXVA2_INTERNAL_H */
diff --git a/libavcodec/dxva2_mpeg2.c b/libavcodec/dxva2_mpeg2.c
index 89c43e7f..c2f0b58b 100644
--- a/libavcodec/dxva2_mpeg2.c
+++ b/libavcodec/dxva2_mpeg2.c
@@ -21,8 +21,13 @@
  */
 
 #include "libavutil/log.h"
-#include "dxva2_internal.h"
 #include "mpegutils.h"
+#include "mpegvideo.h"
+
+// The headers above may include w32threads.h, which uses the original
+// _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
+// potentially newer version.
+#include "dxva2_internal.h"
 
 #define MAX_SLICES 1024
 struct dxva2_picture_context {
diff --git a/libavcodec/dxva2_vc1.c b/libavcodec/dxva2_vc1.c
index 1eb42538..7cbbc7ef 100644
--- a/libavcodec/dxva2_vc1.c
+++ b/libavcodec/dxva2_vc1.c
@@ -20,11 +20,15 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "dxva2_internal.h"
 #include "mpegutils.h"
 #include "vc1.h"
 #include "vc1data.h"
 
+// The headers above may include w32threads.h, which uses the original
+// _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
+// potentially newer version.
+#include "dxva2_internal.h"
+
 struct dxva2_picture_context {
     DXVA_PictureParameters pp;
     DXVA_SliceInfo         si;
diff --git a/libavcodec/dxva2_vp9.c b/libavcodec/dxva2_vp9.c
new file mode 100644
index 00000000..0c4996c0
--- /dev/null
+++ b/libavcodec/dxva2_vp9.c
@@ -0,0 +1,337 @@
+/*
+ * DXVA2 VP9 HW acceleration.
+ *
+ * copyright (c) 2015 Hendrik Leppkes
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/pixdesc.h"
+
+#include "vp9.h"
+
+// The headers above may include w32threads.h, which uses the original
+// _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
+// potentially newer version.
+#include "dxva2_internal.h"
+
+struct vp9_dxva2_picture_context {
+    DXVA_PicParams_VP9    pp;
+    DXVA_Slice_VPx_Short  slice;
+    const uint8_t         *bitstream;
+    unsigned              bitstream_size;
+};
+
+static void fill_picture_entry(DXVA_PicEntry_VPx *pic,
+                               unsigned index, unsigned flag)
+{
+    av_assert0((index & 0x7f) == index && (flag & 0x01) == flag);
+    pic->bPicEntry = index | (flag << 7);
+}
+
+static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, const VP9SharedContext *h,
+                                    DXVA_PicParams_VP9 *pp)
+{
+    int i;
+    const AVPixFmtDescriptor * pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+
+    if (!pixdesc)
+        return -1;
+
+    memset(pp, 0, sizeof(*pp));
+
+    fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, h->frames[CUR_FRAME].tf.f), 0);
+
+    pp->profile = h->h.profile;
+    pp->wFormatAndPictureInfoFlags = ((h->h.keyframe == 0)   <<  0) |
+                                     ((h->h.invisible == 0)  <<  1) |
+                                     (h->h.errorres          <<  2) |
+                                     (pixdesc->log2_chroma_w <<  3) | /* subsampling_x */
+                                     (pixdesc->log2_chroma_h <<  4) | /* subsampling_y */
+                                     (0                      <<  5) | /* extra_plane */
+                                     (h->h.refreshctx        <<  6) |
+                                     (h->h.parallelmode      <<  7) |
+                                     (h->h.intraonly         <<  8) |
+                                     (h->h.framectxid        <<  9) |
+                                     (h->h.resetctx          << 11) |
+                                     ((h->h.keyframe ? 0 : h->h.highprecisionmvs) << 13) |
+                                     (0                      << 14);  /* ReservedFormatInfo2Bits */
+
+    pp->width  = avctx->width;
+    pp->height = avctx->height;
+    pp->BitDepthMinus8Luma   = pixdesc->comp[0].depth - 8;
+    pp->BitDepthMinus8Chroma = pixdesc->comp[1].depth - 8;
+    /* swap 0/1 to match the reference */
+    pp->interp_filter = h->h.filtermode ^ (h->h.filtermode <= 1);
+    pp->Reserved8Bits = 0;
+
+    for (i = 0; i < 8; i++) {
+        if (h->refs[i].f->buf[0]) {
+            fill_picture_entry(&pp->ref_frame_map[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[i].f), 0);
+            pp->ref_frame_coded_width[i]  = h->refs[i].f->width;
+            pp->ref_frame_coded_height[i] = h->refs[i].f->height;
+        } else
+            pp->ref_frame_map[i].bPicEntry = 0xFF;
+    }
+
+    for (i = 0; i < 3; i++) {
+        uint8_t refidx = h->h.refidx[i];
+        if (h->refs[refidx].f->buf[0])
+            fill_picture_entry(&pp->frame_refs[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[refidx].f), 0);
+        else
+            pp->frame_refs[i].bPicEntry = 0xFF;
+
+        pp->ref_frame_sign_bias[i + 1] = h->h.signbias[i];
+    }
+
+    pp->filter_level    = h->h.filter.level;
+    pp->sharpness_level = h->h.filter.sharpness;
+
+    pp->wControlInfoFlags = (h->h.lf_delta.enabled   << 0) |
+                            (h->h.lf_delta.updated   << 1) |
+                            (h->h.use_last_frame_mvs << 2) |
+                            (0                       << 3);  /* ReservedControlInfo5Bits */
+
+    for (i = 0; i < 4; i++)
+        pp->ref_deltas[i]  = h->h.lf_delta.ref[i];
+
+    for (i = 0; i < 2; i++)
+        pp->mode_deltas[i]  = h->h.lf_delta.mode[i];
+
+    pp->base_qindex   = h->h.yac_qi;
+    pp->y_dc_delta_q  = h->h.ydc_qdelta;
+    pp->uv_dc_delta_q = h->h.uvdc_qdelta;
+    pp->uv_ac_delta_q = h->h.uvac_qdelta;
+
+    /* segmentation data */
+    pp->stVP9Segments.wSegmentInfoFlags = (h->h.segmentation.enabled       << 0) |
+                                          (h->h.segmentation.update_map    << 1) |
+                                          (h->h.segmentation.temporal      << 2) |
+                                          (h->h.segmentation.absolute_vals << 3) |
+                                          (0                               << 4);  /* ReservedSegmentFlags4Bits */
+
+    for (i = 0; i < 7; i++)
+        pp->stVP9Segments.tree_probs[i] = h->h.segmentation.prob[i];
+
+    if (h->h.segmentation.temporal)
+        for (i = 0; i < 3; i++)
+            pp->stVP9Segments.pred_probs[i] = h->h.segmentation.pred_prob[i];
+    else
+        memset(pp->stVP9Segments.pred_probs, 255, sizeof(pp->stVP9Segments.pred_probs));
+
+    for (i = 0; i < 8; i++) {
+        pp->stVP9Segments.feature_mask[i] = (h->h.segmentation.feat[i].q_enabled    << 0) |
+                                            (h->h.segmentation.feat[i].lf_enabled   << 1) |
+                                            (h->h.segmentation.feat[i].ref_enabled  << 2) |
+                                            (h->h.segmentation.feat[i].skip_enabled << 3);
+
+        pp->stVP9Segments.feature_data[i][0] = h->h.segmentation.feat[i].q_val;
+        pp->stVP9Segments.feature_data[i][1] = h->h.segmentation.feat[i].lf_val;
+        pp->stVP9Segments.feature_data[i][2] = h->h.segmentation.feat[i].ref_val;
+        pp->stVP9Segments.feature_data[i][3] = 0; /* no data for skip */
+    }
+
+    pp->log2_tile_cols = h->h.tiling.log2_tile_cols;
+    pp->log2_tile_rows = h->h.tiling.log2_tile_rows;
+
+    pp->uncompressed_header_size_byte_aligned = h->h.uncompressed_header_size;
+    pp->first_partition_size = h->h.compressed_header_size;
+
+    pp->StatusReportFeedbackNumber = 1 + DXVA_CONTEXT_REPORT_ID(avctx, ctx)++;
+    return 0;
+}
+
+static void fill_slice_short(DXVA_Slice_VPx_Short *slice,
+                             unsigned position, unsigned size)
+{
+    memset(slice, 0, sizeof(*slice));
+    slice->BSNALunitDataLocation = position;
+    slice->SliceBytesInBuffer    = size;
+    slice->wBadSliceChopping     = 0;
+}
+
+static int commit_bitstream_and_slice_buffer(AVCodecContext *avctx,
+                                             DECODER_BUFFER_DESC *bs,
+                                             DECODER_BUFFER_DESC *sc)
+{
+    const VP9SharedContext *h = avctx->priv_data;
+    AVDXVAContext *ctx = avctx->hwaccel_context;
+    struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private;
+    void     *dxva_data_ptr;
+    uint8_t  *dxva_data;
+    unsigned dxva_size;
+    unsigned padding;
+    unsigned type;
+
+#if CONFIG_D3D11VA
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+        type = D3D11_VIDEO_DECODER_BUFFER_BITSTREAM;
+        if (FAILED(ID3D11VideoContext_GetDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
+                                                       D3D11VA_CONTEXT(ctx)->decoder,
+                                                       type,
+                                                       &dxva_size, &dxva_data_ptr)))
+            return -1;
+    }
+#endif
+#if CONFIG_DXVA2
+    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
+        type = DXVA2_BitStreamDateBufferType;
+        if (FAILED(IDirectXVideoDecoder_GetBuffer(DXVA2_CONTEXT(ctx)->decoder,
+                                                  type,
+                                                  &dxva_data_ptr, &dxva_size)))
+            return -1;
+    }
+#endif
+
+    dxva_data = dxva_data_ptr;
+
+    if (ctx_pic->slice.SliceBytesInBuffer > dxva_size) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to build bitstream");
+        return -1;
+    }
+
+    memcpy(dxva_data, ctx_pic->bitstream, ctx_pic->slice.SliceBytesInBuffer);
+
+    padding = FFMIN(128 - ((ctx_pic->slice.SliceBytesInBuffer) & 127), dxva_size - ctx_pic->slice.SliceBytesInBuffer);
+    if (padding > 0) {
+        memset(dxva_data + ctx_pic->slice.SliceBytesInBuffer, 0, padding);
+        ctx_pic->slice.SliceBytesInBuffer += padding;
+    }
+
+#if CONFIG_D3D11VA
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
+        if (FAILED(ID3D11VideoContext_ReleaseDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context, D3D11VA_CONTEXT(ctx)->decoder, type)))
+            return -1;
+#endif
+#if CONFIG_DXVA2
+    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
+        if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(DXVA2_CONTEXT(ctx)->decoder, type)))
+            return -1;
+#endif
+
+#if CONFIG_D3D11VA
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
+        D3D11_VIDEO_DECODER_BUFFER_DESC *dsc11 = bs;
+        memset(dsc11, 0, sizeof(*dsc11));
+        dsc11->BufferType           = type;
+        dsc11->DataSize             = ctx_pic->slice.SliceBytesInBuffer;
+        dsc11->NumMBsInBuffer       = 0;
+
+        type = D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL;
+    }
+#endif
+#if CONFIG_DXVA2
+    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
+        DXVA2_DecodeBufferDesc *dsc2 = bs;
+        memset(dsc2, 0, sizeof(*dsc2));
+        dsc2->CompressedBufferType = type;
+        dsc2->DataSize             = ctx_pic->slice.SliceBytesInBuffer;
+        dsc2->NumMBsInBuffer       = 0;
+
+        type = DXVA2_SliceControlBufferType;
+    }
+#endif
+
+    return ff_dxva2_commit_buffer(avctx, ctx, sc,
+                                  type,
+                                  &ctx_pic->slice, sizeof(ctx_pic->slice), 0);
+}
+
+
+static int dxva2_vp9_start_frame(AVCodecContext *avctx,
+                                 av_unused const uint8_t *buffer,
+                                 av_unused uint32_t size)
+{
+    const VP9SharedContext *h = avctx->priv_data;
+    AVDXVAContext *ctx = avctx->hwaccel_context;
+    struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private;
+
+    if (DXVA_CONTEXT_DECODER(avctx, ctx) == NULL ||
+        DXVA_CONTEXT_CFG(avctx, ctx) == NULL ||
+        DXVA_CONTEXT_COUNT(avctx, ctx) <= 0)
+        return -1;
+    av_assert0(ctx_pic);
+
+    /* Fill up DXVA_PicParams_VP9 */
+    if (fill_picture_parameters(avctx, ctx, h, &ctx_pic->pp) < 0)
+        return -1;
+
+    ctx_pic->bitstream_size = 0;
+    ctx_pic->bitstream      = NULL;
+    return 0;
+}
+
+static int dxva2_vp9_decode_slice(AVCodecContext *avctx,
+                                  const uint8_t *buffer,
+                                  uint32_t size)
+{
+    const VP9SharedContext *h = avctx->priv_data;
+    struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private;
+    unsigned position;
+
+    if (!ctx_pic->bitstream)
+        ctx_pic->bitstream = buffer;
+    ctx_pic->bitstream_size += size;
+
+    position = buffer - ctx_pic->bitstream;
+    fill_slice_short(&ctx_pic->slice, position, size);
+
+    return 0;
+}
+
+static int dxva2_vp9_end_frame(AVCodecContext *avctx)
+{
+    VP9SharedContext *h = avctx->priv_data;
+    struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private;
+    int ret;
+
+    if (ctx_pic->bitstream_size <= 0)
+        return -1;
+
+    ret = ff_dxva2_common_end_frame(avctx, h->frames[CUR_FRAME].tf.f,
+                                    &ctx_pic->pp, sizeof(ctx_pic->pp),
+                                    NULL, 0,
+                                    commit_bitstream_and_slice_buffer);
+    return ret;
+}
+
+#if CONFIG_VP9_DXVA2_HWACCEL
+AVHWAccel ff_vp9_dxva2_hwaccel = {
+    .name           = "vp9_dxva2",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_VP9,
+    .pix_fmt        = AV_PIX_FMT_DXVA2_VLD,
+    .start_frame    = dxva2_vp9_start_frame,
+    .decode_slice   = dxva2_vp9_decode_slice,
+    .end_frame      = dxva2_vp9_end_frame,
+    .frame_priv_data_size = sizeof(struct vp9_dxva2_picture_context),
+};
+#endif
+
+#if CONFIG_VP9_D3D11VA_HWACCEL
+AVHWAccel ff_vp9_d3d11va_hwaccel = {
+    .name           = "vp9_d3d11va",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_VP9,
+    .pix_fmt        = AV_PIX_FMT_D3D11VA_VLD,
+    .start_frame    = dxva2_vp9_start_frame,
+    .decode_slice   = dxva2_vp9_decode_slice,
+    .end_frame      = dxva2_vp9_end_frame,
+    .frame_priv_data_size = sizeof(struct vp9_dxva2_picture_context),
+};
+#endif
diff --git a/libavcodec/eacmv.c b/libavcodec/eacmv.c
index d1b7c685..047be813 100644
--- a/libavcodec/eacmv.c
+++ b/libavcodec/eacmv.c
@@ -242,5 +242,5 @@ AVCodec ff_eacmv_decoder = {
     .init           = cmv_decode_init,
     .close          = cmv_decode_end,
     .decode         = cmv_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/eamad.c b/libavcodec/eamad.c
index 6b7134a9..4e202f98 100644
--- a/libavcodec/eamad.c
+++ b/libavcodec/eamad.c
@@ -29,6 +29,7 @@
  */
 
 #include "avcodec.h"
+#include "blockdsp.h"
 #include "bytestream.h"
 #include "bswapdsp.h"
 #include "get_bits.h"
@@ -36,9 +37,8 @@
 #include "eaidct.h"
 #include "idctdsp.h"
 #include "internal.h"
-#include "mpeg12.h"
 #include "mpeg12data.h"
-#include "libavutil/imgutils.h"
+#include "mpeg12vlc.h"
 
 #define EA_PREAMBLE_SIZE    8
 #define MADk_TAG MKTAG('M', 'A', 'D', 'k')    /* MAD i-frame */
@@ -101,7 +101,7 @@ static inline void comp_block(MadContext *t, AVFrame *frame,
              frame->linesize[0],
              t->last_frame->data[0] + offset,
              t->last_frame->linesize[0], add);
-    } else if (!(t->avctx->flags & CODEC_FLAG_GRAY)) {
+    } else if (!(t->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         int index = j - 3;
         unsigned offset = (mb_y * 8 + (mv_y/2))*t->last_frame->linesize[index] + mb_x * 8 + (mv_x/2);
         if (offset >= (t->avctx->height/2 - 7) * t->last_frame->linesize[index] - 7)
@@ -120,7 +120,7 @@ static inline void idct_put(MadContext *t, AVFrame *frame, int16_t *block,
         ff_ea_idct_put_c(
             frame->data[0] + (mb_y*16 + ((j&2)<<2))*frame->linesize[0] + mb_x*16 + ((j&1)<<3),
             frame->linesize[0], block);
-    } else if (!(t->avctx->flags & CODEC_FLAG_GRAY)) {
+    } else if (!(t->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         int index = j - 3;
         ff_ea_idct_put_c(
             frame->data[index] + (mb_y*8)*frame->linesize[index] + mb_x*8,
@@ -312,7 +312,7 @@ static int decode_frame(AVCodecContext *avctx,
         return AVERROR(ENOMEM);
     s->bbdsp.bswap16_buf(s->bitstream_buf, (const uint16_t *)(buf + bytestream2_tell(&gb)),
                          bytestream2_get_bytes_left(&gb) / 2);
-    memset((uint8_t*)s->bitstream_buf + bytestream2_get_bytes_left(&gb), 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset((uint8_t*)s->bitstream_buf + bytestream2_get_bytes_left(&gb), 0, AV_INPUT_BUFFER_PADDING_SIZE);
     init_get_bits(&s->gb, s->bitstream_buf, 8*(bytestream2_get_bytes_left(&gb)));
 
     for (s->mb_y=0; s->mb_y < (avctx->height+15)/16; s->mb_y++)
@@ -348,5 +348,5 @@ AVCodec ff_eamad_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/eatgq.c b/libavcodec/eatgq.c
index 771dc2fb..f8a47cb1 100644
--- a/libavcodec/eatgq.c
+++ b/libavcodec/eatgq.c
@@ -116,7 +116,7 @@ static void tgq_idct_put_mb(TgqContext *s, int16_t (*block)[64], AVFrame *frame,
     ff_ea_idct_put_c(dest_y                + 8, linesize, block[1]);
     ff_ea_idct_put_c(dest_y + 8 * linesize    , linesize, block[2]);
     ff_ea_idct_put_c(dest_y + 8 * linesize + 8, linesize, block[3]);
-    if (!(s->avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
          ff_ea_idct_put_c(dest_cb, frame->linesize[1], block[4]);
          ff_ea_idct_put_c(dest_cr, frame->linesize[2], block[5]);
     }
@@ -142,7 +142,7 @@ static void tgq_idct_put_mb_dconly(TgqContext *s, AVFrame *frame,
     tgq_dconly(s, dest_y                + 8, linesize, dc[1]);
     tgq_dconly(s, dest_y + 8 * linesize,     linesize, dc[2]);
     tgq_dconly(s, dest_y + 8 * linesize + 8, linesize, dc[3]);
-    if (!(s->avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         tgq_dconly(s, dest_cb, frame->linesize[1], dc[4]);
         tgq_dconly(s, dest_cr, frame->linesize[2], dc[5]);
     }
@@ -157,7 +157,10 @@ static int tgq_decode_mb(TgqContext *s, AVFrame *frame, int mb_y, int mb_x)
     mode = bytestream2_get_byte(&s->gb);
     if (mode > 12) {
         GetBitContext gb;
-        init_get_bits8(&gb, s->gb.buffer, FFMIN(bytestream2_get_bytes_left(&s->gb), mode));
+        int ret = init_get_bits8(&gb, s->gb.buffer, FFMIN(bytestream2_get_bytes_left(&s->gb), mode));
+        if (ret < 0)
+            return ret;
+
         for (i = 0; i < 6; i++)
             tgq_decode_block(s, s->block[i], &gb);
         tgq_idct_put_mb(s, s->block, frame, mb_x, mb_y);
@@ -249,5 +252,5 @@ AVCodec ff_eatgq_decoder = {
     .priv_data_size = sizeof(TgqContext),
     .init           = tgq_decode_init,
     .decode         = tgq_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/eatgv.c b/libavcodec/eatgv.c
index 835ff151..882bf077 100644
--- a/libavcodec/eatgv.c
+++ b/libavcodec/eatgv.c
@@ -364,5 +364,5 @@ AVCodec ff_eatgv_decoder = {
     .init           = tgv_decode_init,
     .close          = tgv_decode_end,
     .decode         = tgv_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/eatqi.c b/libavcodec/eatqi.c
index 34fc30db..2423e21a 100644
--- a/libavcodec/eatqi.c
+++ b/libavcodec/eatqi.c
@@ -85,7 +85,7 @@ static inline void tqi_idct_put(TqiContext *t, AVFrame *frame, int16_t (*block)[
     ff_ea_idct_put_c(dest_y              + 8, linesize, block[1]);
     ff_ea_idct_put_c(dest_y + 8*linesize    , linesize, block[2]);
     ff_ea_idct_put_c(dest_y + 8*linesize + 8, linesize, block[3]);
-    if(!(s->avctx->flags&CODEC_FLAG_GRAY)) {
+    if(!(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         ff_ea_idct_put_c(dest_cb, frame->linesize[1], block[4]);
         ff_ea_idct_put_c(dest_cr, frame->linesize[2], block[5]);
     }
@@ -162,5 +162,5 @@ AVCodec ff_eatqi_decoder = {
     .init           = tqi_decode_init,
     .close          = tqi_decode_end,
     .decode         = tqi_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/elsdec.c b/libavcodec/elsdec.c
new file mode 100644
index 00000000..47979654
--- /dev/null
+++ b/libavcodec/elsdec.c
@@ -0,0 +1,422 @@
+/*
+ * ELS (Entropy Logarithmic-Scale) decoder
+ *
+ * Copyright (c) 2013 Maxim Poliakovski
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Entropy Logarithmic-Scale binary arithmetic decoder
+ */
+
+#include <math.h>
+#include <stdint.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+
+#include "avcodec.h"
+#include "elsdec.h"
+
+/* ELS coder constants and structures. */
+#define ELS_JOTS_PER_BYTE   36
+#define ELS_MAX             (1 << 24)
+#define RUNG_SPACE          (64 * sizeof(ElsRungNode))
+
+/* ELS coder tables. */
+static const struct Ladder {
+    int8_t  AMps;
+    int8_t  ALps;
+    uint8_t next0;
+    uint8_t next1;
+} Ladder[174] = {
+    { -6,   -5,   2,   1 },
+    { -2,  -12,   3,   6 },
+    { -2,  -12,   4,   6 },
+    { -1,  -16,   7,   5 },
+    { -1,  -16,   8,  10 },
+    { -5,   -6,  11,   9 },
+    { -6,   -5,  10,   5 },
+    { -1,  -18,  13,  11 },
+    { -1,  -18,  12,  14 },
+    { -6,   -5,  15,  18 },
+    { -5,   -6,  14,   9 },
+    { -3,   -8,  17,  15 },
+    { -1,  -20,  20,  16 },
+    { -1,  -20,  23,  17 },
+    { -3,   -8,  16,  18 },
+    { -5,   -6,  19,  26 },
+    { -3,   -9,  22,  24 },
+    { -3,   -9,  21,  19 },
+    { -5,   -6,  24,  26 },
+    { -4,   -7,  27,  25 },
+    { -1,  -22,  34,  28 },
+    { -2,  -11,  29,  27 },
+    { -2,  -11,  28,  30 },
+    { -1,  -22,  39,  29 },
+    { -4,   -7,  30,  32 },
+    { -6,   -5,  33,  31 },
+    { -6,   -5,  32,  25 },
+    { -3,   -8,  35,  33 },
+    { -2,  -12,  36,  38 },
+    { -2,  -12,  37,  35 },
+    { -3,   -8,  38,  40 },
+    { -6,   -5,  41,  48 },
+    { -6,   -5,  40,  31 },
+    { -5,   -6,  43,  41 },
+    { -1,  -24,  94,  42 },
+    { -3,   -8,  45,  43 },
+    { -2,  -12,  42,  44 },
+    { -2,  -12,  47,  45 },
+    { -3,   -8,  44,  46 },
+    { -1,  -24, 125,  47 },
+    { -5,   -6,  46,  48 },
+    { -6,   -5,  49,  49 },
+    { -2,  -13, 152, 164 },
+    { -4,   -7,  51,  49 },
+    { -3,   -9, 164, 168 },
+    { -3,   -9,  55,  51 },
+    { -4,   -7, 168, 170 },
+    { -2,  -13,  67,  55 },
+    { -6,   -5, 170,  49 },
+    { -6,   -5,  51, 170 },
+    { -1,  -72,  50,  74 },
+    { -4,   -7,  53,  49 },
+    { -1,  -61,  50,  74 },
+    { -3,   -8,  55,  49 },
+    { -1,  -51,  52,  76 },
+    { -3,   -9,  57,  51 },
+    { -1,  -46,  54,  76 },
+    { -2,  -10,  59,  53 },
+    { -1,  -43,  56,  78 },
+    { -2,  -11,  61,  53 },
+    { -1,  -41,  58,  80 },
+    { -2,  -12,  63,  55 },
+    { -1,  -39,  60,  82 },
+    { -2,  -12,  65,  55 },
+    { -1,  -37,  62,  84 },
+    { -2,  -13,  67,  57 },
+    { -1,  -36,  64,  86 },
+    { -1,  -14,  69,  59 },
+    { -1,  -35,  66,  88 },
+    { -1,  -14,  71,  59 },
+    { -1,  -34,  68,  90 },
+    { -1,  -15,  73,  61 },
+    { -1,  -33,  70,  92 },
+    { -1,  -15,  75,  61 },
+    { -1,  -32,  72,  94 },
+    { -1,  -15,  77,  63 },
+    { -1,  -31,  74,  96 },
+    { -1,  -16,  79,  65 },
+    { -1,  -31,  76,  98 },
+    { -1,  -16,  81,  67 },
+    { -1,  -30,  78, 100 },
+    { -1,  -17,  83,  67 },
+    { -1,  -29,  80, 102 },
+    { -1,  -17,  85,  69 },
+    { -1,  -29,  82, 104 },
+    { -1,  -18,  87,  71 },
+    { -1,  -28,  84, 104 },
+    { -1,  -18,  89,  73 },
+    { -1,  -28,  86, 108 },
+    { -1,  -18,  91,  73 },
+    { -1,  -27,  88, 108 },
+    { -1,  -19,  93,  75 },
+    { -1,  -27,  90, 112 },
+    { -1,  -19,  95,  77 },
+    { -1,  -26,  92, 112 },
+    { -1,  -20,  97,  79 },
+    { -1,  -26,  94, 114 },
+    { -1,  -20,  99,  81 },
+    { -1,  -25,  96, 116 },
+    { -1,  -20, 101,  83 },
+    { -1,  -25,  98, 118 },
+    { -1,  -21, 103,  83 },
+    { -1,  -24, 100, 120 },
+    { -1,  -21, 105,  85 },
+    { -1,  -24, 102, 122 },
+    { -1,  -22, 107,  87 },
+    { -1,  -23, 104, 124 },
+    { -1,  -22, 109,  89 },
+    { -1,  -23, 106, 126 },
+    { -1,  -22, 111,  91 },
+    { -1,  -22, 108, 128 },
+    { -1,  -23, 113,  93 },
+    { -1,  -22, 110, 130 },
+    { -1,  -23, 115,  95 },
+    { -1,  -22, 112, 132 },
+    { -1,  -24, 117,  97 },
+    { -1,  -21, 114, 134 },
+    { -1,  -24, 119,  99 },
+    { -1,  -21, 116, 136 },
+    { -1,  -25, 121, 101 },
+    { -1,  -20, 118, 136 },
+    { -1,  -25, 123, 103 },
+    { -1,  -20, 120, 138 },
+    { -1,  -26, 125, 105 },
+    { -1,  -20, 122, 140 },
+    { -1,  -26, 127, 107 },
+    { -1,  -19, 124, 142 },
+    { -1,  -27, 129, 107 },
+    { -1,  -19, 126, 144 },
+    { -1,  -27, 131, 111 },
+    { -1,  -18, 128, 146 },
+    { -1,  -28, 133, 111 },
+    { -1,  -18, 130, 146 },
+    { -1,  -28, 135, 115 },
+    { -1,  -18, 132, 148 },
+    { -1,  -29, 137, 115 },
+    { -1,  -17, 134, 150 },
+    { -1,  -29, 139, 117 },
+    { -1,  -17, 136, 152 },
+    { -1,  -30, 141, 119 },
+    { -1,  -16, 138, 152 },
+    { -1,  -31, 143, 121 },
+    { -1,  -16, 140, 154 },
+    { -1,  -31, 145, 123 },
+    { -1,  -15, 142, 156 },
+    { -1,  -32, 147, 125 },
+    { -1,  -15, 144, 158 },
+    { -1,  -33, 149, 127 },
+    { -1,  -15, 146, 158 },
+    { -1,  -34, 151, 129 },
+    { -1,  -14, 148, 160 },
+    { -1,  -35, 153, 131 },
+    { -1,  -14, 150, 160 },
+    { -1,  -36, 155, 133 },
+    { -2,  -13, 152, 162 },
+    { -1,  -37, 157, 135 },
+    { -2,  -12, 154, 164 },
+    { -1,  -39, 159, 137 },
+    { -2,  -12, 156, 164 },
+    { -1,  -41, 161, 139 },
+    { -2,  -11, 158, 166 },
+    { -1,  -43, 163, 141 },
+    { -2,  -10, 160, 166 },
+    { -1,  -46, 165, 143 },
+    { -3,   -9, 162, 168 },
+    { -1,  -51, 167, 143 },
+    { -3,   -8, 164, 170 },
+    { -1,  -61, 169, 145 },
+    { -4,   -7, 166, 170 },
+    { -1,  -72, 169, 145 },
+    { -6,   -5, 168,  49 },
+    {  0, -108, 171, 171 },
+    {  0, -108, 172, 172 },
+    { -6,   -5, 173, 173 },
+};
+
+static const uint32_t els_exp_tab[ELS_JOTS_PER_BYTE * 4 + 1] = {
+           0,        0,       0,       0,       0,       0,         0,        0,
+           0,        0,       0,       0,       0,       0,         0,        0,
+           0,        0,       0,       0,       0,       0,         0,        0,
+           0,        0,       0,       0,       0,       0,         0,        0,
+           0,        0,       0,       0,       1,       1,         1,        1,
+           1,        2,       2,       2,       3,       4,         4,        5,
+           6,        7,       8,      10,      11,      13,        16,       18,
+          21,       25,      29,      34,      40,      47,        54,       64,
+          74,       87,     101,     118,     138,      161,      188,      219,
+         256,      298,     348,     406,     474,      552,      645,      752,
+         877,     1024,    1194,    1393,    1625,     1896,     2211,     2580,
+        3010,     3511,    4096,    4778,    5573,     6501,     7584,     8847,
+       10321,    12040,   14045,   16384,   19112,    22295,    26007,    30339,
+       35391,    41285,   48160,   56180,   65536,    76288,    89088,   103936,
+      121344,   141312,  165120,  192512,  224512,   262144,   305664,   356608,
+      416000,   485376,  566016,  660480,  770560,   898816,  1048576,  1223168,
+     1426688,  1664256, 1941504, 2264832, 2642176,  3082240,  3595520,  4194304,
+     4892672,  5707520, 6657792, 7766784, 9060096, 10568960, 12328960, 14382080,
+    16777216,
+};
+
+void ff_els_decoder_init(ElsDecCtx *ctx, const uint8_t *in, size_t data_size)
+{
+    int nbytes;
+
+    /* consume up to 3 bytes from the input data */
+    if (data_size >= 3) {
+        ctx->x = AV_RB24(in);
+        nbytes = 3;
+    } else if (data_size == 2) {
+        ctx->x = AV_RB16(in);
+        nbytes = 2;
+    } else {
+        ctx->x = *in;
+        nbytes = 1;
+    }
+
+    ctx->in_buf    = in + nbytes;
+    ctx->data_size = data_size - nbytes;
+    ctx->err       = 0;
+    ctx->j         = ELS_JOTS_PER_BYTE;
+    ctx->t         = ELS_MAX;
+    ctx->diff      = FFMIN(ELS_MAX - ctx->x,
+                           ELS_MAX - els_exp_tab[ELS_JOTS_PER_BYTE * 4 - 1]);
+}
+
+void ff_els_decoder_uninit(ElsUnsignedRung *rung)
+{
+    av_free(rung->rem_rung_list);
+}
+
+static int els_import_byte(ElsDecCtx *ctx)
+{
+    if (!ctx->data_size) {
+        ctx->err = AVERROR_EOF;
+        return AVERROR_EOF;
+    }
+    ctx->x   = (ctx->x << 8) | *ctx->in_buf++;
+    ctx->data_size--;
+    ctx->j  += ELS_JOTS_PER_BYTE;
+    ctx->t <<= 8;
+
+    return 0;
+}
+
+int ff_els_decode_bit(ElsDecCtx *ctx, uint8_t *rung)
+{
+    int z, bit, ret;
+    const uint32_t *pAllowable = &els_exp_tab[ELS_JOTS_PER_BYTE * 3];
+
+    if (ctx->err)
+        return 0;
+
+    z          = pAllowable[ctx->j + Ladder[*rung].ALps];
+    ctx->t    -= z;
+    ctx->diff -= z;
+    if (ctx->diff > 0)
+        return *rung & 1;   /* shortcut for x < t > pAllowable[j - 1] */
+
+    if (ctx->t > ctx->x) {  /* decode most probable symbol (MPS) */
+        ctx->j += Ladder[*rung].AMps;
+        while (ctx->t > pAllowable[ctx->j])
+            ctx->j++;
+
+        if (ctx->j <= 0) { /* MPS: import one byte from bytestream. */
+            ret = els_import_byte(ctx);
+            if (ret < 0)
+                return ret;
+        }
+
+        z     = ctx->t;
+        bit   = *rung & 1;
+        *rung = Ladder[*rung].next0;
+    } else { /* decode less probable symbol (LPS) */
+        ctx->x -= ctx->t;
+        ctx->t  = z;
+
+        ctx->j += Ladder[*rung].ALps;
+        if (ctx->j <= 0) {
+            /* LPS: import one byte from bytestream. */
+            z <<= 8;
+            ret = els_import_byte(ctx);
+            if (ret < 0)
+                return ret;
+            if (ctx->j <= 0) {
+                /* LPS: import second byte from bytestream. */
+                z <<= 8;
+                ret = els_import_byte(ctx);
+                if (ret < 0)
+                    return ret;
+                while (pAllowable[ctx->j - 1] >= z)
+                    ctx->j--;
+            }
+        }
+
+        bit   = !(*rung & 1);
+        *rung = Ladder[*rung].next1;
+    }
+
+    ctx->diff = FFMIN(z - ctx->x, z - pAllowable[ctx->j - 1]);
+
+    return bit;
+}
+
+unsigned ff_els_decode_unsigned(ElsDecCtx *ctx, ElsUnsignedRung *ur)
+{
+    int i, n, r, bit;
+    ElsRungNode *rung_node;
+
+    if (ctx->err)
+        return 0;
+
+    /* decode unary prefix */
+    for (n = 0; n < ELS_EXPGOLOMB_LEN + 1; n++)
+        if (ff_els_decode_bit(ctx, &ur->prefix_rung[n]))
+            break;
+
+    /* handle the error/overflow case */
+    if (ctx->err || n >= ELS_EXPGOLOMB_LEN) {
+        ctx->err = AVERROR_INVALIDDATA;
+        return 0;
+    }
+
+    /* handle the zero case */
+    if (!n)
+        return 0;
+
+    /* initialize probability tree */
+    if (!ur->rem_rung_list) {
+        ur->rem_rung_list = av_realloc(NULL, RUNG_SPACE);
+        if (!ur->rem_rung_list) {
+            ctx->err = AVERROR(ENOMEM);
+            return 0;
+        }
+        memset(ur->rem_rung_list, 0, RUNG_SPACE);
+        ur->rung_list_size = RUNG_SPACE;
+        ur->avail_index    = ELS_EXPGOLOMB_LEN;
+    }
+
+    /* decode the remainder */
+    for (i = 0, r = 0, bit = 0; i < n; i++) {
+        if (!i)
+            rung_node = &ur->rem_rung_list[n];
+        else {
+            if (!rung_node->next_index) {
+                if (ur->rung_list_size <= (ur->avail_index + 2) * sizeof(ElsRungNode)) {
+                    // remember rung_node position
+                    ptrdiff_t pos     = rung_node - ur->rem_rung_list;
+                    ur->rem_rung_list = av_realloc(ur->rem_rung_list,
+                                                   ur->rung_list_size +
+                                                   RUNG_SPACE);
+                    if (!ur->rem_rung_list) {
+                        av_free(ur->rem_rung_list);
+                        ctx->err = AVERROR(ENOMEM);
+                        return 0;
+                    }
+                    memset((uint8_t *) ur->rem_rung_list + ur->rung_list_size, 0,
+                           RUNG_SPACE);
+                    ur->rung_list_size += RUNG_SPACE;
+                    // restore rung_node position in the new list
+                    rung_node = &ur->rem_rung_list[pos];
+                }
+                rung_node->next_index = ur->avail_index;
+                ur->avail_index      += 2;
+            }
+            rung_node = &ur->rem_rung_list[rung_node->next_index + bit];
+        }
+
+        bit = ff_els_decode_bit(ctx, &rung_node->rung);
+        if (ctx->err)
+            return bit;
+
+        r = (r << 1) + bit;
+    }
+
+    return (1 << n) - 1 + r; /* make value from exp golomb code */
+}
diff --git a/libavcodec/elsdec.h b/libavcodec/elsdec.h
new file mode 100644
index 00000000..139a24ab
--- /dev/null
+++ b/libavcodec/elsdec.h
@@ -0,0 +1,60 @@
+/*
+ * ELS (Entropy Logarithmic-Scale) decoder
+ *
+ * Copyright (c) 2013 Maxim Poliakovski
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Entropy Logarithmic-Scale binary arithmetic coder
+ */
+
+#ifndef AVCODEC_ELSDEC_H
+#define AVCODEC_ELSDEC_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#define ELS_EXPGOLOMB_LEN   10
+
+typedef struct ElsDecCtx {
+    const uint8_t *in_buf;
+    unsigned x;
+    size_t data_size;
+    int j, t, diff, err;
+} ElsDecCtx;
+
+typedef struct ElsRungNode {
+    uint8_t  rung;
+    uint16_t next_index;
+} ElsRungNode;
+
+typedef struct ElsUnsignedRung {
+    uint8_t      prefix_rung[ELS_EXPGOLOMB_LEN + 1];
+    ElsRungNode  *rem_rung_list;
+    size_t       rung_list_size;
+    uint16_t     avail_index;
+} ElsUnsignedRung;
+
+void ff_els_decoder_init(ElsDecCtx *ctx, const uint8_t *in, size_t data_size);
+void ff_els_decoder_uninit(ElsUnsignedRung *rung);
+int  ff_els_decode_bit(ElsDecCtx *ctx, unsigned char *rung);
+unsigned ff_els_decode_unsigned(ElsDecCtx *ctx, ElsUnsignedRung *ur);
+
+#endif /* AVCODEC_ELSDEC_H */
diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index df4a64d1..5a75fadd 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -381,14 +381,19 @@ static void guess_mv(ERContext *s)
 #define MV_UNCHANGED 1
     const int mb_stride = s->mb_stride;
     const int mb_width  = s->mb_width;
-    const int mb_height = s->mb_height;
+    int mb_height = s->mb_height;
     int i, depth, num_avail;
     int mb_x, mb_y, mot_step, mot_stride;
 
+    if (s->last_pic.f && s->last_pic.f->data[0])
+        mb_height = FFMIN(mb_height, (s->last_pic.f->height+15)>>4);
+    if (s->next_pic.f && s->next_pic.f->data[0])
+        mb_height = FFMIN(mb_height, (s->next_pic.f->height+15)>>4);
+
     set_mv_strides(s, &mot_step, &mot_stride);
 
     num_avail = 0;
-    for (i = 0; i < s->mb_num; i++) {
+    for (i = 0; i < mb_width * mb_height; i++) {
         const int mb_xy = s->mb_index2xy[i];
         int f = 0;
         int error = s->error_status_table[mb_xy];
@@ -413,7 +418,7 @@ static void guess_mv(ERContext *s)
 
     if ((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) ||
         num_avail <= mb_width / 2) {
-        for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+        for (mb_y = 0; mb_y < mb_height; mb_y++) {
             for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
                 const int mb_xy = mb_x + mb_y * s->mb_stride;
                 int mv_dir = (s->last_pic.f && s->last_pic.f->data[0]) ? MV_DIR_FORWARD : MV_DIR_BACKWARD;
@@ -442,7 +447,7 @@ static void guess_mv(ERContext *s)
             int score_sum = 0;
 
             changed = 0;
-            for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+            for (mb_y = 0; mb_y < mb_height; mb_y++) {
                 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
                     const int mb_xy        = mb_x + mb_y * s->mb_stride;
                     int mv_predictor[8][2] = { { 0 } };
@@ -675,7 +680,7 @@ static void guess_mv(ERContext *s)
         if (none_left)
             return;
 
-        for (i = 0; i < s->mb_num; i++) {
+        for (i = 0; i < mb_width * mb_height; i++) {
             int mb_xy = s->mb_index2xy[i];
             if (fixed[mb_xy])
                 fixed[mb_xy] = MV_FROZEN;
@@ -777,7 +782,9 @@ void ff_er_frame_start(ERContext *s)
 static int er_supported(ERContext *s)
 {
     if(s->avctx->hwaccel && s->avctx->hwaccel->decode_slice           ||
-       s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU          ||
+#if FF_API_CAP_VDPAU
+       s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU          ||
+#endif
        !s->cur_pic.f                                                  ||
        s->cur_pic.field_picture
     )
diff --git a/libavcodec/escape124.c b/libavcodec/escape124.c
index 28167419..50a86c83 100644
--- a/libavcodec/escape124.c
+++ b/libavcodec/escape124.c
@@ -155,7 +155,7 @@ static MacroBlock decode_macroblock(Escape124Context* s, GetBitContext* gb,
     // depth = 0 means that this shouldn't read any bits;
     // in theory, this is the same as get_bits(gb, 0), but
     // that doesn't actually work.
-    block_index = depth ? get_bits(gb, depth) : 0;
+    block_index = get_bitsz(gb, depth);
 
     if (*codebook_index == 1) {
         block_index += superblock_index << s->codebooks[1].depth;
@@ -373,5 +373,5 @@ AVCodec ff_escape124_decoder = {
     .init           = escape124_decode_init,
     .close          = escape124_decode_close,
     .decode         = escape124_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/escape130.c b/libavcodec/escape130.c
index 129011b9..f4f64d84 100644
--- a/libavcodec/escape130.c
+++ b/libavcodec/escape130.c
@@ -356,5 +356,5 @@ AVCodec ff_escape130_decoder = {
     .init           = escape130_decode_init,
     .close          = escape130_decode_close,
     .decode         = escape130_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/evrcdec.c b/libavcodec/evrcdec.c
index c605a13d..8728c02a 100644
--- a/libavcodec/evrcdec.c
+++ b/libavcodec/evrcdec.c
@@ -26,6 +26,7 @@
  */
 
 #include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "get_bits.h"
@@ -66,6 +67,10 @@ typedef struct EVRCAFrame {
 } EVRCAFrame;
 
 typedef struct EVRCContext {
+    AVClass *class;
+
+    int              postfilter;
+
     GetBitContext    gb;
     evrc_packet_rate bitrate;
     evrc_packet_rate last_valid_bitrate;
@@ -876,9 +881,11 @@ static int evrc_decode_frame(AVCodecContext *avctx, void *data,
         memmove(e->pitch, e->pitch + subframe_size, ACB_SIZE * sizeof(float));
 
         synthesis_filter(e->pitch + ACB_SIZE, ilpc,
-                         e->synthesis, subframe_size, tmp);
-        postfilter(e, tmp, ilpc, samples, pitch_lag,
-                   &postfilter_coeffs[e->bitrate], subframe_size);
+                         e->synthesis, subframe_size,
+                         e->postfilter ? tmp : samples);
+        if (e->postfilter)
+            postfilter(e, tmp, ilpc, samples, pitch_lag,
+                       &postfilter_coeffs[e->bitrate], subframe_size);
 
         samples += subframe_size;
     }
@@ -906,6 +913,21 @@ static int evrc_decode_frame(AVCodecContext *avctx, void *data,
     return avpkt->size;
 }
 
+#define OFFSET(x) offsetof(EVRCContext, x)
+#define AD AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+
+static const AVOption options[] = {
+    { "postfilter", "enable postfilter", OFFSET(postfilter), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AD },
+    { NULL }
+};
+
+static const AVClass evrcdec_class = {
+    .class_name = "evrc",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_evrc_decoder = {
     .name           = "evrc",
     .long_name      = NULL_IF_CONFIG_SMALL("EVRC (Enhanced Variable Rate Codec)"),
@@ -913,6 +935,7 @@ AVCodec ff_evrc_decoder = {
     .id             = AV_CODEC_ID_EVRC,
     .init           = evrc_decode_init,
     .decode         = evrc_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .priv_data_size = sizeof(EVRCContext),
+    .priv_class     = &evrcdec_class,
 };
diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 9308ae03..9ec99d65 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -37,6 +37,7 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/intfloat.h"
 #include "libavutil/opt.h"
+#include "libavutil/color_utils.h"
 
 #include "avcodec.h"
 #include "bytestream.h"
@@ -110,6 +111,7 @@ typedef struct EXRContext {
 
     const char *layer;
 
+    enum AVColorTransferCharacteristic apply_trc_type;
     float gamma;
     uint16_t gamma_table[65536];
 } EXRContext;
@@ -459,7 +461,7 @@ static int huf_build_dec_table(const uint64_t *hcode, int im,
         lc += 8;                                                              \
 }
 
-#define get_code(po, rlc, c, lc, gb, out, oe)                                 \
+#define get_code(po, rlc, c, lc, gb, out, oe, outb)                           \
 {                                                                             \
         if (po == rlc) {                                                      \
             if (lc < 8)                                                       \
@@ -468,7 +470,7 @@ static int huf_build_dec_table(const uint64_t *hcode, int im,
                                                                               \
             cs = c >> lc;                                                     \
                                                                               \
-            if (out + cs > oe)                                                \
+            if (out + cs > oe || out == outb)                                 \
                 return AVERROR_INVALIDDATA;                                   \
                                                                               \
             s = out[-1];                                                      \
@@ -501,7 +503,7 @@ static int huf_decode(const uint64_t *hcode, const HufDec *hdecod,
 
             if (pl.len) {
                 lc -= pl.len;
-                get_code(pl.lit, rlc, c, lc, gb, out, oe);
+                get_code(pl.lit, rlc, c, lc, gb, out, oe, outb);
             } else {
                 int j;
 
@@ -518,7 +520,7 @@ static int huf_decode(const uint64_t *hcode, const HufDec *hdecod,
                         if ((hcode[pl.p[j]] >> 6) ==
                             ((c >> (lc - l)) & ((1LL << l) - 1))) {
                             lc -= l;
-                            get_code(pl.p[j], rlc, c, lc, gb, out, oe);
+                            get_code(pl.p[j], rlc, c, lc, gb, out, oe, outb);
                             break;
                         }
                     }
@@ -539,7 +541,7 @@ static int huf_decode(const uint64_t *hcode, const HufDec *hdecod,
 
         if (pl.len) {
             lc -= pl.len;
-            get_code(pl.lit, rlc, c, lc, gb, out, oe);
+            get_code(pl.lit, rlc, c, lc, gb, out, oe, outb);
         } else {
             return AVERROR_INVALIDDATA;
         }
@@ -842,6 +844,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
     int bxmin = s->xmin * 2 * s->desc->nb_components;
     int i, x, buf_size = s->buf_size;
     float one_gamma = 1.0f / s->gamma;
+    avpriv_trc_function trc_func = avpriv_get_trc_function_from_trc(s->apply_trc_type);
     int ret;
 
     line_offset = AV_RL64(s->gb.buffer + jobnr * 8);
@@ -921,24 +924,43 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
         ptr_x += s->xmin * s->desc->nb_components;
         if (s->pixel_type == EXR_FLOAT) {
             // 32-bit
-            for (x = 0; x < xdelta; x++) {
-                union av_intfloat32 t;
-                t.i = bytestream_get_le32(&r);
-                if (t.f > 0.0f)  /* avoid negative values */
-                    t.f = powf(t.f, one_gamma);
-                *ptr_x++ = exr_flt2uint(t.i);
-
-                t.i = bytestream_get_le32(&g);
-                if (t.f > 0.0f)
-                    t.f = powf(t.f, one_gamma);
-                *ptr_x++ = exr_flt2uint(t.i);
-
-                t.i = bytestream_get_le32(&b);
-                if (t.f > 0.0f)
-                    t.f = powf(t.f, one_gamma);
-                *ptr_x++ = exr_flt2uint(t.i);
-                if (channel_buffer[3])
-                    *ptr_x++ = exr_flt2uint(bytestream_get_le32(&a));
+            if (trc_func) {
+                for (x = 0; x < xdelta; x++) {
+                    union av_intfloat32 t;
+                    t.i = bytestream_get_le32(&r);
+                    t.f = trc_func(t.f);
+                    *ptr_x++ = exr_flt2uint(t.i);
+
+                    t.i = bytestream_get_le32(&g);
+                    t.f = trc_func(t.f);
+                    *ptr_x++ = exr_flt2uint(t.i);
+
+                    t.i = bytestream_get_le32(&b);
+                    t.f = trc_func(t.f);
+                    *ptr_x++ = exr_flt2uint(t.i);
+                    if (channel_buffer[3])
+                        *ptr_x++ = exr_flt2uint(bytestream_get_le32(&a));
+                }
+            } else {
+                for (x = 0; x < xdelta; x++) {
+                    union av_intfloat32 t;
+                    t.i = bytestream_get_le32(&r);
+                    if (t.f > 0.0f)  /* avoid negative values */
+                        t.f = powf(t.f, one_gamma);
+                    *ptr_x++ = exr_flt2uint(t.i);
+
+                    t.i = bytestream_get_le32(&g);
+                    if (t.f > 0.0f)
+                        t.f = powf(t.f, one_gamma);
+                    *ptr_x++ = exr_flt2uint(t.i);
+
+                    t.i = bytestream_get_le32(&b);
+                    if (t.f > 0.0f)
+                        t.f = powf(t.f, one_gamma);
+                    *ptr_x++ = exr_flt2uint(t.i);
+                    if (channel_buffer[3])
+                        *ptr_x++ = exr_flt2uint(bytestream_get_le32(&a));
+                }
             }
         } else {
             // 16-bit
@@ -1008,7 +1030,7 @@ static int check_header_variable(EXRContext *s,
 static int decode_header(EXRContext *s)
 {
     int current_channel_offset = 0;
-    int magic_number, version, flags, i;
+    int magic_number, version, flags, i, sar = 0;
 
     s->xmin               = ~0;
     s->xmax               = ~0;
@@ -1108,8 +1130,7 @@ static int decode_header(EXRContext *s)
 
                 current_pixel_type = bytestream2_get_le32(&ch_gb);
                 if (current_pixel_type >= EXR_UNKNOWN) {
-                    avpriv_report_missing_feature(s->avctx,
-                                                  "Pixel type %d.\n",
+                    avpriv_report_missing_feature(s->avctx, "Pixel type %d",
                                                   current_pixel_type);
                     return AVERROR_PATCHWELCOME;
                 }
@@ -1206,8 +1227,7 @@ static int decode_header(EXRContext *s)
             if (!var_size)
                 return AVERROR_INVALIDDATA;
 
-            ff_set_sar(s->avctx,
-                       av_d2q(av_int2float(bytestream2_get_le32(&s->gb)), 255));
+            sar = bytestream2_get_le32(&s->gb);
 
             continue;
         } else if ((var_size = check_header_variable(s, "compression",
@@ -1238,6 +1258,8 @@ static int decode_header(EXRContext *s)
         bytestream2_skip(&s->gb, bytestream2_get_le32(&s->gb));
     }
 
+    ff_set_sar(s->avctx, av_d2q(av_int2float(sar), 255));
+
     if (s->compression == EXR_UNKN) {
         av_log(s->avctx, AV_LOG_ERROR, "Missing compression attribute.\n");
         return AVERROR_INVALIDDATA;
@@ -1287,6 +1309,9 @@ static int decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_INVALIDDATA;
     }
 
+    if (s->apply_trc_type != AVCOL_TRC_UNSPECIFIED)
+        avctx->color_trc = s->apply_trc_type;
+
     switch (s->compression) {
     case EXR_RAW:
     case EXR_RLE:
@@ -1364,21 +1389,31 @@ static av_cold int decode_init(AVCodecContext *avctx)
     uint32_t i;
     union av_intfloat32 t;
     float one_gamma = 1.0f / s->gamma;
+    avpriv_trc_function trc_func = NULL;
 
     s->avctx              = avctx;
 
-    if (one_gamma > 0.9999f && one_gamma < 1.0001f) {
-        for (i = 0; i < 65536; ++i)
-            s->gamma_table[i] = exr_halflt2uint(i);
-    } else {
+    trc_func = avpriv_get_trc_function_from_trc(s->apply_trc_type);
+    if (trc_func) {
         for (i = 0; i < 65536; ++i) {
             t = exr_half2float(i);
-            /* If negative value we reuse half value */
-            if (t.f <= 0.0f) {
+            t.f = trc_func(t.f);
+            s->gamma_table[i] = exr_flt2uint(t.i);
+        }
+    } else {
+        if (one_gamma > 0.9999f && one_gamma < 1.0001f) {
+            for (i = 0; i < 65536; ++i)
                 s->gamma_table[i] = exr_halflt2uint(i);
-            } else {
-                t.f = powf(t.f, one_gamma);
-                s->gamma_table[i] = exr_flt2uint(t.i);
+        } else {
+            for (i = 0; i < 65536; ++i) {
+                t = exr_half2float(i);
+                /* If negative value we reuse half value */
+                if (t.f <= 0.0f) {
+                    s->gamma_table[i] = exr_halflt2uint(i);
+                } else {
+                    t.f = powf(t.f, one_gamma);
+                    s->gamma_table[i] = exr_flt2uint(t.i);
+                }
             }
         }
     }
@@ -1391,6 +1426,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+#if HAVE_THREADS
 static int decode_init_thread_copy(AVCodecContext *avctx)
 {    EXRContext *s = avctx->priv_data;
 
@@ -1401,6 +1437,7 @@ static int decode_init_thread_copy(AVCodecContext *avctx)
 
     return 0;
 }
+#endif
 
 static av_cold int decode_end(AVCodecContext *avctx)
 {
@@ -1427,6 +1464,43 @@ static const AVOption options[] = {
         AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VD },
     { "gamma", "Set the float gamma value when decoding", OFFSET(gamma),
         AV_OPT_TYPE_FLOAT, { .dbl = 1.0f }, 0.001, FLT_MAX, VD },
+
+    // XXX: Note the abuse of the enum using AVCOL_TRC_UNSPECIFIED to subsume the existing gamma option
+    { "apply_trc", "color transfer characteristics to apply to EXR linear input", OFFSET(apply_trc_type),
+        AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_UNSPECIFIED }, 1, AVCOL_TRC_NB-1, VD, "apply_trc_type"},
+    { "bt709",        "BT.709",           0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709 },        INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "gamma",        "gamma",            0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_UNSPECIFIED },  INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "gamma22",      "BT.470 M",         0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_GAMMA22 },      INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "gamma28",      "BT.470 BG",        0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_GAMMA28 },      INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "smpte170m",    "SMPTE 170 M",      0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE170M },    INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "smpte240m",    "SMPTE 240 M",      0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTE240M },    INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "linear",       "Linear",           0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LINEAR },       INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "log",          "Log",              0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LOG },          INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "log_sqrt",     "Log square root",  0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_LOG_SQRT },     INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "iec61966_2_4", "IEC 61966-2-4",    0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_IEC61966_2_4 }, INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "bt1361",       "BT.1361",          0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT1361_ECG },   INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "iec61966_2_1", "IEC 61966-2-1",    0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_IEC61966_2_1 }, INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "bt2020_10bit", "BT.2020 - 10 bit", 0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10 },    INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "bt2020_12bit", "BT.2020 - 12 bit", 0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_12 },    INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "smpte2084",    "SMPTE ST 2084",    0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST2084 },  INT_MIN, INT_MAX, VD, "apply_trc_type"},
+    { "smpte428_1",   "SMPTE ST 428-1",   0,
+        AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST428_1 }, INT_MIN, INT_MAX, VD, "apply_trc_type"},
+
     { NULL },
 };
 
@@ -1447,7 +1521,7 @@ AVCodec ff_exr_decoder = {
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
     .close            = decode_end,
     .decode           = decode_frame,
-    .capabilities     = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS |
-                        CODEC_CAP_SLICE_THREADS,
+    .capabilities     = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
+                        AV_CODEC_CAP_SLICE_THREADS,
     .priv_class       = &exr_class,
 };
diff --git a/libavcodec/faandct.c b/libavcodec/faandct.c
index 4053d69d..b6830725 100644
--- a/libavcodec/faandct.c
+++ b/libavcodec/faandct.c
@@ -29,25 +29,24 @@
 #include "libavutil/internal.h"
 #include "libavutil/libm.h"
 
-#define FLOAT float
+typedef float FLOAT;
 
-//numbers generated by simple c code (not as accurate as they could be)
-/*
-for(i=0; i<8; i++){
-    printf("#define B%d %1.20llf\n", i, (long double)1.0/(cosl(i*acosl(-1.0)/(long double)16.0)*sqrtl(2)));
-}
+/* numbers generated by arbitrary precision arithmetic followed by truncation
+to 36 fractional digits (enough for a 128-bit IEEE quad, see /usr/include/math.h
+for this approach). Unfortunately, long double is not always available correctly,
+e.g ppc has issues.
+TODO: add L suffixes when ppc and toolchains sort out their stuff.
 */
-#define B0 1.00000000000000000000
-#define B1 0.72095982200694791383 // (cos(pi*1/16)sqrt(2))^-1
-#define B2 0.76536686473017954350 // (cos(pi*2/16)sqrt(2))^-1
-#define B3 0.85043009476725644878 // (cos(pi*3/16)sqrt(2))^-1
-#define B4 1.00000000000000000000 // (cos(pi*4/16)sqrt(2))^-1
-#define B5 1.27275858057283393842 // (cos(pi*5/16)sqrt(2))^-1
-#define B6 1.84775906502257351242 // (cos(pi*6/16)sqrt(2))^-1
-#define B7 3.62450978541155137218 // (cos(pi*7/16)sqrt(2))^-1
-
-
-#define A1 0.70710678118654752438 // cos(pi*4/16)
+#define B0 1.000000000000000000000000000000000000
+#define B1 0.720959822006947913789091890943021267 // (cos(pi*1/16)sqrt(2))^-1
+#define B2 0.765366864730179543456919968060797734 // (cos(pi*2/16)sqrt(2))^-1
+#define B3 0.850430094767256448766702844371412325 // (cos(pi*3/16)sqrt(2))^-1
+#define B4 1.000000000000000000000000000000000000 // (cos(pi*4/16)sqrt(2))^-1
+#define B5 1.272758580572833938461007018281767032 // (cos(pi*5/16)sqrt(2))^-1
+#define B6 1.847759065022573512256366378793576574 // (cos(pi*6/16)sqrt(2))^-1
+#define B7 3.624509785411551372409941227504289587 // (cos(pi*7/16)sqrt(2))^-1
+
+#define A1 M_SQRT1_2              // cos(pi*4/16)
 #define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2)
 #define A5 0.38268343236508977170 // cos(pi*6/16)
 #define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2)
diff --git a/libavcodec/faanidct.c b/libavcodec/faanidct.c
index ca82f778..bbaaa3fd 100644
--- a/libavcodec/faanidct.c
+++ b/libavcodec/faanidct.c
@@ -22,7 +22,7 @@
 #include "libavutil/common.h"
 
 /* To allow switching to double. */
-#define FLOAT float
+typedef float FLOAT;
 
 #define B0 1.0000000000000000000000
 #define B1 1.3870398453221474618216 // cos(pi*1/16)sqrt(2)
diff --git a/libavcodec/faxcompr.c b/libavcodec/faxcompr.c
index eb39ae0b..2a1d2bc3 100644
--- a/libavcodec/faxcompr.c
+++ b/libavcodec/faxcompr.c
@@ -122,6 +122,81 @@ av_cold void ff_ccitt_unpack_init(void)
     initialized = 1;
 }
 
+static int decode_uncompressed(AVCodecContext *avctx, GetBitContext *gb,
+                               unsigned int *pix_left, int **runs,
+                               const int *runend, int *mode)
+{
+    int eob = 0;
+    int newmode;
+    int saved_run = 0;
+
+    do {
+        int cwi, k;
+        int cw = 0;
+        int codes[2];
+        do {
+            cwi = show_bits(gb, 11);
+            if (!cwi) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid uncompressed codeword\n");
+                return AVERROR_INVALIDDATA;
+            }
+            cwi = 10 - av_log2(cwi);
+            skip_bits(gb, cwi + 1);
+            if (cwi > 5) {
+                newmode = get_bits1(gb);
+                eob = 1;
+                cwi -= 6;
+            }
+            cw += cwi;
+        } while(cwi == 5);
+
+        codes[0] = cw;
+        codes[1] = !eob;
+
+        for (k = 0; k < 2; k++) {
+            if (codes[k]) {
+                if (*mode == !k) {
+                    *(*runs)++ = saved_run;
+                    if (*runs >= runend) {
+                        av_log(avctx, AV_LOG_ERROR, "uncompressed run overrun\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                    if (*pix_left <= saved_run) {
+                        av_log(avctx, AV_LOG_ERROR, "uncompressed run went out of bounds\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                    *pix_left -= saved_run;
+                    saved_run = 0;
+                    *mode = !*mode;
+                }
+                saved_run += codes[k];
+            }
+        }
+    } while (!eob);
+    *(*runs)++ = saved_run;
+    if (*runs >= runend) {
+        av_log(avctx, AV_LOG_ERROR, "uncompressed run overrun\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (*pix_left <= saved_run) {
+        if (*pix_left == saved_run)
+            return 1;
+        av_log(avctx, AV_LOG_ERROR, "uncompressed run went out of boundsE\n");
+        return AVERROR_INVALIDDATA;
+    }
+    *pix_left -= saved_run;
+    saved_run = 0;
+    *mode = !*mode;
+    if (newmode != *mode) { //FIXME CHECK
+        *(*runs)++ = 0;
+        if (*runs >= runend) {
+            av_log(avctx, AV_LOG_ERROR, "uncompressed run overrun\n");
+            return AVERROR_INVALIDDATA;
+        }
+        *mode = newmode;
+    }
+    return 0;
+}
 
 static int decode_group3_1d_line(AVCodecContext *avctx, GetBitContext *gb,
                                  unsigned int pix_left, int *runs,
@@ -149,8 +224,18 @@ static int decode_group3_1d_line(AVCodecContext *avctx, GetBitContext *gb,
             run       = 0;
             mode      = !mode;
         } else if ((int)t == -1) {
-            av_log(avctx, AV_LOG_ERROR, "Incorrect code\n");
-            return AVERROR_INVALIDDATA;
+            if (show_bits(gb, 12) == 15) {
+                int ret;
+                skip_bits(gb, 12);
+                ret = decode_uncompressed(avctx, gb, &pix_left, &runs, runend, &mode);
+                if (ret < 0) {
+                    return ret;
+                } else if (ret)
+                    break;
+            } else {
+                av_log(avctx, AV_LOG_ERROR, "Incorrect code\n");
+                return AVERROR_INVALIDDATA;
+            }
         }
     }
     *runs++ = 0;
@@ -211,8 +296,25 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb,
                 mode = !mode;
             }
         } else if (cmode == 9 || cmode == 10) {
-            avpriv_report_missing_feature(avctx, "Special modes support");
-            return AVERROR_PATCHWELCOME;
+            int xxx = get_bits(gb, 3);
+            if (cmode == 9 && xxx == 7) {
+                int ret;
+                int pix_left = width - offs;
+
+                if (saved_run) {
+                    av_log(avctx, AV_LOG_ERROR, "saved run %d on entering uncompressed mode\n", saved_run);
+                    return AVERROR_INVALIDDATA;
+                }
+                ret = decode_uncompressed(avctx, gb, &pix_left, &runs, runend, &mode);
+                offs = width - pix_left;
+                if (ret < 0) {
+                    return ret;
+                } else if (ret)
+                    break;
+            } else {
+                avpriv_report_missing_feature(avctx, "Special mode %d xxx=%d support", cmode, xxx);
+                return AVERROR_PATCHWELCOME;
+            }
         } else { //vertical mode
             run      = run_off - offs + (cmode - 5);
             run_off -= *--ref;
diff --git a/libavcodec/fft-test.c b/libavcodec/fft-test.c
index ba26f81f..d647fde1 100644
--- a/libavcodec/fft-test.c
+++ b/libavcodec/fft-test.c
@@ -197,7 +197,7 @@ static int check_diff(FFTSample *tab1, FFTSample *tab2, int n, double scale)
     double error = 0, max = 0;
 
     for (i = 0; i < n; i++) {
-        double e = fabsf(tab1[i] - (tab2[i] / scale)) / RANGE;
+        double e = fabs(tab1[i] - (tab2[i] / scale)) / RANGE;
         if (e >= 1e-3) {
             av_log(NULL, AV_LOG_ERROR, "ERROR %5d: "FMT" "FMT"\n",
                    i, tab1[i], tab2[i]);
@@ -285,7 +285,7 @@ int main(int argc, char **argv)
             break;
         case 'c':
         {
-            int cpuflags = av_get_cpu_flags();
+            unsigned cpuflags = av_get_cpu_flags();
 
             if (av_parse_cpu_caps(&cpuflags, optarg) < 0)
                 return 1;
diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index 7a38bf92..537409e4 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -39,7 +39,7 @@
 #include "mathops.h"
 #include "ffv1.h"
 
-av_cold int ffv1_common_init(AVCodecContext *avctx)
+av_cold int ff_ffv1_common_init(AVCodecContext *avctx)
 {
     FFV1Context *s = avctx->priv_data;
 
@@ -64,30 +64,35 @@ av_cold int ffv1_common_init(AVCodecContext *avctx)
     return 0;
 }
 
-av_cold int ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs)
+av_cold int ff_ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs)
 {
-    int j;
+    int j, i;
 
     fs->plane_count  = f->plane_count;
     fs->transparency = f->transparency;
     for (j = 0; j < f->plane_count; j++) {
         PlaneContext *const p = &fs->plane[j];
 
-        if (fs->ac) {
+        if (fs->ac != AC_GOLOMB_RICE) {
             if (!p->state)
                 p->state = av_malloc_array(p->context_count, CONTEXT_SIZE *
                                      sizeof(uint8_t));
             if (!p->state)
                 return AVERROR(ENOMEM);
         } else {
-            if (!p->vlc_state)
-                p->vlc_state = av_malloc_array(p->context_count, sizeof(VlcState));
-            if (!p->vlc_state)
-                return AVERROR(ENOMEM);
+            if (!p->vlc_state) {
+                p->vlc_state = av_mallocz_array(p->context_count, sizeof(VlcState));
+                if (!p->vlc_state)
+                    return AVERROR(ENOMEM);
+                for (i = 0; i < p->context_count; i++) {
+                    p->vlc_state[i].error_sum = 4;
+                    p->vlc_state[i].count     = 1;
+                }
+            }
         }
     }
 
-    if (fs->ac > 1) {
+    if (fs->ac == AC_RANGE_CUSTOM_TAB) {
         //FIXME only redo if state_transition changed
         for (j = 1; j < 256; j++) {
             fs->c. one_state[      j] = f->state_transition[j];
@@ -98,25 +103,25 @@ av_cold int ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs)
     return 0;
 }
 
-av_cold int ffv1_init_slices_state(FFV1Context *f)
+av_cold int ff_ffv1_init_slices_state(FFV1Context *f)
 {
     int i, ret;
-    for (i = 0; i < f->slice_count; i++) {
+    for (i = 0; i < f->max_slice_count; i++) {
         FFV1Context *fs = f->slice_context[i];
-        if ((ret = ffv1_init_slice_state(f, fs)) < 0)
+        if ((ret = ff_ffv1_init_slice_state(f, fs)) < 0)
             return AVERROR(ENOMEM);
     }
     return 0;
 }
 
-av_cold int ffv1_init_slice_contexts(FFV1Context *f)
+av_cold int ff_ffv1_init_slice_contexts(FFV1Context *f)
 {
     int i;
 
-    f->slice_count = f->num_h_slices * f->num_v_slices;
-    av_assert0(f->slice_count > 0);
+    f->max_slice_count = f->num_h_slices * f->num_v_slices;
+    av_assert0(f->max_slice_count > 0);
 
-    for (i = 0; i < f->slice_count; i++) {
+    for (i = 0; i < f->max_slice_count; i++) {
         int sx          = i % f->num_h_slices;
         int sy          = i / f->num_h_slices;
         int sxs         = f->avctx->width  *  sx      / f->num_h_slices;
@@ -154,7 +159,7 @@ av_cold int ffv1_init_slice_contexts(FFV1Context *f)
     return AVERROR(ENOMEM);
 }
 
-int ffv1_allocate_initial_states(FFV1Context *f)
+int ff_ffv1_allocate_initial_states(FFV1Context *f)
 {
     int i;
 
@@ -169,7 +174,7 @@ int ffv1_allocate_initial_states(FFV1Context *f)
     return 0;
 }
 
-void ffv1_clear_slice_state(FFV1Context *f, FFV1Context *fs)
+void ff_ffv1_clear_slice_state(FFV1Context *f, FFV1Context *fs)
 {
     int i, j;
 
@@ -179,7 +184,7 @@ void ffv1_clear_slice_state(FFV1Context *f, FFV1Context *fs)
         p->interlace_bit_state[0] = 128;
         p->interlace_bit_state[1] = 128;
 
-        if (fs->ac) {
+        if (fs->ac != AC_GOLOMB_RICE) {
             if (f->initial_states[p->quant_table_index]) {
                 memcpy(p->state, f->initial_states[p->quant_table_index],
                        CONTEXT_SIZE * p->context_count);
@@ -197,7 +202,7 @@ void ffv1_clear_slice_state(FFV1Context *f, FFV1Context *fs)
 }
 
 
-av_cold int ffv1_close(AVCodecContext *avctx)
+av_cold int ff_ffv1_close(AVCodecContext *avctx)
 {
     FFV1Context *s = avctx->priv_data;
     int i, j;
@@ -210,7 +215,7 @@ av_cold int ffv1_close(AVCodecContext *avctx)
         ff_thread_release_buffer(avctx, &s->last_picture);
     av_frame_free(&s->last_picture.f);
 
-    for (j = 0; j < s->slice_count; j++) {
+    for (j = 0; j < s->max_slice_count; j++) {
         FFV1Context *fs = s->slice_context[j];
         for (i = 0; i < s->plane_count; i++) {
             PlaneContext *p = &fs->plane[i];
@@ -224,14 +229,14 @@ av_cold int ffv1_close(AVCodecContext *avctx)
     av_freep(&avctx->stats_out);
     for (j = 0; j < s->quant_table_count; j++) {
         av_freep(&s->initial_states[j]);
-        for (i = 0; i < s->slice_count; i++) {
+        for (i = 0; i < s->max_slice_count; i++) {
             FFV1Context *sf = s->slice_context[i];
             av_freep(&sf->rc_stat2[j]);
         }
         av_freep(&s->rc_stat2[j]);
     }
 
-    for (i = 0; i < s->slice_count; i++)
+    for (i = 0; i < s->max_slice_count; i++)
         av_freep(&s->slice_context[i]);
 
     return 0;
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index bfc4d71e..d9398e55 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -53,6 +53,11 @@
 #define MAX_QUANT_TABLES 8
 #define MAX_CONTEXT_INPUTS 5
 
+#define AC_GOLOMB_RICE          0
+#define AC_RANGE_DEFAULT_TAB    1
+#define AC_RANGE_CUSTOM_TAB     2
+#define AC_RANGE_DEFAULT_TAB_FORCE -2
+
 typedef struct VlcState {
     int16_t drift;
     uint16_t error_sum;
@@ -87,6 +92,7 @@ typedef struct FFV1Context {
     int transparency;
     int flags;
     int picture_number;
+    int key_frame;
     ThreadFrame picture, last_picture;
     struct FFV1Context *fsrc;
 
@@ -108,6 +114,7 @@ typedef struct FFV1Context {
     int intra;
     int slice_damaged;
     int key_frame_ok;
+    int context_model;
 
     int bits_per_raw_sample;
     int packed_at_lsb;
@@ -117,6 +124,7 @@ typedef struct FFV1Context {
 
     struct FFV1Context *slice_context[MAX_SLICES];
     int slice_count;
+    int max_slice_count;
     int num_v_slices;
     int num_h_slices;
     int slice_width;
@@ -129,13 +137,13 @@ typedef struct FFV1Context {
     int slice_rct_ry_coef;
 } FFV1Context;
 
-int ffv1_common_init(AVCodecContext *avctx);
-int ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs);
-int ffv1_init_slices_state(FFV1Context *f);
-int ffv1_init_slice_contexts(FFV1Context *f);
-int ffv1_allocate_initial_states(FFV1Context *f);
-void ffv1_clear_slice_state(FFV1Context *f, FFV1Context *fs);
-int ffv1_close(AVCodecContext *avctx);
+int ff_ffv1_common_init(AVCodecContext *avctx);
+int ff_ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs);
+int ff_ffv1_init_slices_state(FFV1Context *f);
+int ff_ffv1_init_slice_contexts(FFV1Context *f);
+int ff_ffv1_allocate_initial_states(FFV1Context *f);
+void ff_ffv1_clear_slice_state(FFV1Context *f, FFV1Context *fs);
+int ff_ffv1_close(AVCodecContext *avctx);
 
 static av_always_inline int fold(int diff, int bits)
 {
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index cc7c605c..d2bf3a89 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -47,8 +47,11 @@ static inline av_flatten int get_symbol_inline(RangeCoder *c, uint8_t *state,
     else {
         int i, e, a;
         e = 0;
-        while (get_rac(c, state + 1 + FFMIN(e, 9))) // 1..10
+        while (get_rac(c, state + 1 + FFMIN(e, 9))) { // 1..10
             e++;
+            if (e > 31)
+                return AVERROR_INVALIDDATA;
+        }
 
         a = 1;
         for (i = e - 1; i >= 0; i--)
@@ -130,7 +133,7 @@ static av_always_inline void decode_line(FFV1Context *s, int w,
 
         av_assert2(context < p->context_count);
 
-        if (s->ac) {
+        if (s->ac != AC_GOLOMB_RICE) {
             diff = get_symbol_inline(c, p->state[context], 1);
         } else {
             if (context == 0 && run_mode == 0)
@@ -178,7 +181,8 @@ static av_always_inline void decode_line(FFV1Context *s, int w,
 }
 
 static void decode_plane(FFV1Context *s, uint8_t *src,
-                         int w, int h, int stride, int plane_index)
+                         int w, int h, int stride, int plane_index,
+                         int pixel_stride)
 {
     int x, y;
     int16_t *sample[2];
@@ -202,16 +206,16 @@ static void decode_plane(FFV1Context *s, uint8_t *src,
         if (s->avctx->bits_per_raw_sample <= 8) {
             decode_line(s, w, sample, plane_index, 8);
             for (x = 0; x < w; x++)
-                src[x + stride * y] = sample[1][x];
+                src[x*pixel_stride + stride * y] = sample[1][x];
         } else {
             decode_line(s, w, sample, plane_index, s->avctx->bits_per_raw_sample);
             if (s->packed_at_lsb) {
                 for (x = 0; x < w; x++) {
-                    ((uint16_t*)(src + stride*y))[x] = sample[1][x];
+                    ((uint16_t*)(src + stride*y))[x*pixel_stride] = sample[1][x];
                 }
             } else {
                 for (x = 0; x < w; x++) {
-                    ((uint16_t*)(src + stride*y))[x] = sample[1][x] << (16 - s->avctx->bits_per_raw_sample);
+                    ((uint16_t*)(src + stride*y))[x*pixel_stride] = sample[1][x] << (16 - s->avctx->bits_per_raw_sample);
                 }
             }
         }
@@ -302,7 +306,7 @@ static int decode_slice_header(FFV1Context *f, FFV1Context *fs)
     for (i = 0; i < f->plane_count; i++) {
         PlaneContext * const p = &fs->plane[i];
         int idx = get_symbol(c, state, 0);
-        if (idx > (unsigned)f->quant_table_count) {
+        if (idx >= (unsigned)f->quant_table_count) {
             av_log(f->avctx, AV_LOG_ERROR, "quant_table_index out of range\n");
             return -1;
         }
@@ -359,7 +363,7 @@ static int decode_slice(AVCodecContext *c, void *arg)
     FFV1Context *fs   = *(void **)arg;
     FFV1Context *f    = fs->avctx->priv_data;
     int width, height, x, y, ret;
-    const int ps      = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step_minus1 + 1;
+    const int ps      = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step;
     AVFrame * const p = f->cur;
     int i, si;
 
@@ -402,24 +406,25 @@ static int decode_slice(AVCodecContext *c, void *arg)
     fs->slice_rct_ry_coef = 1;
 
     if (f->version > 2) {
-        if (ffv1_init_slice_state(f, fs) < 0)
+        if (ff_ffv1_init_slice_state(f, fs) < 0)
             return AVERROR(ENOMEM);
         if (decode_slice_header(f, fs) < 0) {
+            fs->slice_x = fs->slice_y = fs->slice_height = fs->slice_width = 0;
             fs->slice_damaged = 1;
             return AVERROR_INVALIDDATA;
         }
     }
-    if ((ret = ffv1_init_slice_state(f, fs)) < 0)
+    if ((ret = ff_ffv1_init_slice_state(f, fs)) < 0)
         return ret;
     if (f->cur->key_frame || fs->slice_reset_contexts)
-        ffv1_clear_slice_state(f, fs);
+        ff_ffv1_clear_slice_state(f, fs);
 
     width  = fs->slice_width;
     height = fs->slice_height;
     x      = fs->slice_x;
     y      = fs->slice_y;
 
-    if (!fs->ac) {
+    if (fs->ac == AC_GOLOMB_RICE) {
         if (f->version == 3 && f->micro_version > 1 || f->version > 3)
             get_rac(&fs->c, (uint8_t[]) { 129 });
         fs->ac_byte_count = f->version > 2 || (!x && !y) ? fs->c.bytestream - fs->c.bytestream_start - 1 : 0;
@@ -429,26 +434,29 @@ static int decode_slice(AVCodecContext *c, void *arg)
     }
 
     av_assert1(width && height);
-    if (f->colorspace == 0) {
-        const int chroma_width  = FF_CEIL_RSHIFT(width,  f->chroma_h_shift);
-        const int chroma_height = FF_CEIL_RSHIFT(height, f->chroma_v_shift);
+    if (f->colorspace == 0 && (f->chroma_planes || !fs->transparency)) {
+        const int chroma_width  = AV_CEIL_RSHIFT(width,  f->chroma_h_shift);
+        const int chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
         const int cx            = x >> f->chroma_h_shift;
         const int cy            = y >> f->chroma_v_shift;
-        decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0);
+        decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 1);
 
         if (f->chroma_planes) {
-            decode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
-            decode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1);
+            decode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1, 1);
+            decode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1, 1);
         }
         if (fs->transparency)
-            decode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], (f->version >= 4 && !f->chroma_planes) ? 1 : 2);
+            decode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], (f->version >= 4 && !f->chroma_planes) ? 1 : 2, 1);
+    } else if (f->colorspace == 0) {
+         decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0]    , width, height, p->linesize[0], 0, 2);
+         decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0] + 1, width, height, p->linesize[0], 1, 2);
     } else {
         uint8_t *planes[3] = { p->data[0] + ps * x + y * p->linesize[0],
                                p->data[1] + ps * x + y * p->linesize[1],
                                p->data[2] + ps * x + y * p->linesize[2] };
         decode_rgb_frame(fs, planes, width, height, p->linesize);
     }
-    if (fs->ac && f->version > 2) {
+    if (fs->ac != AC_GOLOMB_RICE && f->version > 2) {
         int v;
         get_rac(&fs->c, (uint8_t[]) { 129 });
         v = fs->c.bytestream_end - fs->c.bytestream - 2 - 5*f->ec;
@@ -499,7 +507,10 @@ static int read_quant_tables(RangeCoder *c,
     int context_count = 1;
 
     for (i = 0; i < 5; i++) {
-        context_count *= read_quant_table(c, quant_table[i], context_count);
+        int ret = read_quant_table(c, quant_table[i], context_count);
+        if (ret < 0)
+            return ret;
+        context_count *= ret;
         if (context_count > 32768U) {
             return AVERROR_INVALIDDATA;
         }
@@ -513,6 +524,7 @@ static int read_extra_header(FFV1Context *f)
     uint8_t state[CONTEXT_SIZE];
     int i, j, k, ret;
     uint8_t state2[32][CONTEXT_SIZE];
+    unsigned crc = 0;
 
     memset(state2, 128, sizeof(state2));
     memset(state, 128, sizeof(state));
@@ -531,8 +543,9 @@ static int read_extra_header(FFV1Context *f)
         if (f->micro_version < 0)
             return AVERROR_INVALIDDATA;
     }
-    f->ac = f->avctx->coder_type = get_symbol(c, state, 0);
-    if (f->ac > 1) {
+    f->ac = get_symbol(c, state, 0);
+
+    if (f->ac == AC_RANGE_CUSTOM_TAB) {
         for (i = 1; i < 256; i++)
             f->state_transition[i] = get_symbol(c, state, 1) + c->one_state[i];
     }
@@ -561,8 +574,11 @@ static int read_extra_header(FFV1Context *f)
     }
 
     f->quant_table_count = get_symbol(c, state, 0);
-    if (f->quant_table_count > (unsigned)MAX_QUANT_TABLES)
+    if (f->quant_table_count > (unsigned)MAX_QUANT_TABLES || !f->quant_table_count) {
+        av_log(f->avctx, AV_LOG_ERROR, "quant table count %d is invalid\n", f->quant_table_count);
+        f->quant_table_count = 0;
         return AVERROR_INVALIDDATA;
+    }
 
     for (i = 0; i < f->quant_table_count; i++) {
         f->context_count[i] = read_quant_tables(c, f->quant_tables[i]);
@@ -571,7 +587,7 @@ static int read_extra_header(FFV1Context *f)
             return AVERROR_INVALIDDATA;
         }
     }
-    if ((ret = ffv1_allocate_initial_states(f)) < 0)
+    if ((ret = ff_ffv1_allocate_initial_states(f)) < 0)
         return ret;
 
     for (i = 0; i < f->quant_table_count; i++)
@@ -594,15 +610,16 @@ static int read_extra_header(FFV1Context *f)
         unsigned v;
         v = av_crc(av_crc_get_table(AV_CRC_32_IEEE), 0,
                    f->avctx->extradata, f->avctx->extradata_size);
-        if (v) {
+        if (v || f->avctx->extradata_size < 4) {
             av_log(f->avctx, AV_LOG_ERROR, "CRC mismatch %X!\n", v);
             return AVERROR_INVALIDDATA;
         }
+        crc = AV_RB32(f->avctx->extradata + f->avctx->extradata_size - 4);
     }
 
     if (f->avctx->debug & FF_DEBUG_PICT_INFO)
         av_log(f->avctx, AV_LOG_DEBUG,
-               "global: ver:%d.%d, coder:%d, colorspace: %d bpr:%d chroma:%d(%d:%d), alpha:%d slices:%dx%d qtabs:%d ec:%d intra:%d\n",
+               "global: ver:%d.%d, coder:%d, colorspace: %d bpr:%d chroma:%d(%d:%d), alpha:%d slices:%dx%d qtabs:%d ec:%d intra:%d CRC:0x%08X\n",
                f->version, f->micro_version,
                f->ac,
                f->colorspace,
@@ -612,7 +629,8 @@ static int read_extra_header(FFV1Context *f)
                f->num_h_slices, f->num_v_slices,
                f->quant_table_count,
                f->ec,
-               f->intra
+               f->intra,
+               crc
               );
     return 0;
 }
@@ -633,8 +651,9 @@ static int read_header(FFV1Context *f)
             return AVERROR_INVALIDDATA;
         }
         f->version = v;
-        f->ac      = f->avctx->coder_type = get_symbol(c, state, 0);
-        if (f->ac > 1) {
+        f->ac = get_symbol(c, state, 0);
+
+        if (f->ac == AC_RANGE_CUSTOM_TAB) {
             for (i = 1; i < 256; i++)
                 f->state_transition[i] = get_symbol(c, state, 1) + c->one_state[i];
         }
@@ -682,6 +701,11 @@ static int read_header(FFV1Context *f)
                 f->avctx->pix_fmt = AV_PIX_FMT_GRAY8;
             else
                 f->avctx->pix_fmt = AV_PIX_FMT_GRAY16;
+        } else if (f->transparency && !f->chroma_planes) {
+            if (f->avctx->bits_per_raw_sample <= 8)
+                f->avctx->pix_fmt = AV_PIX_FMT_YA8;
+            else
+                return AVERROR(ENOSYS);
         } else if (f->avctx->bits_per_raw_sample<=8 && !f->transparency) {
             switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
             case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P; break;
@@ -772,6 +796,7 @@ static int read_header(FFV1Context *f)
             av_log(f->avctx, AV_LOG_ERROR, "read_quant_table error\n");
             return AVERROR_INVALIDDATA;
         }
+        f->slice_count = f->max_slice_count;
     } else if (f->version < 3) {
         f->slice_count = get_symbol(c, state, 0);
     } else {
@@ -786,8 +811,8 @@ static int read_header(FFV1Context *f)
             p -= size + trailer;
         }
     }
-    if (f->slice_count > (unsigned)MAX_SLICES || f->slice_count <= 0) {
-        av_log(f->avctx, AV_LOG_ERROR, "slice count %d is invalid\n", f->slice_count);
+    if (f->slice_count > (unsigned)MAX_SLICES || f->slice_count <= 0 || f->slice_count > f->max_slice_count) {
+        av_log(f->avctx, AV_LOG_ERROR, "slice count %d is invalid (max=%d)\n", f->slice_count, f->max_slice_count);
         return AVERROR_INVALIDDATA;
     }
 
@@ -852,13 +877,13 @@ static av_cold int decode_init(AVCodecContext *avctx)
     FFV1Context *f = avctx->priv_data;
     int ret;
 
-    if ((ret = ffv1_common_init(avctx)) < 0)
+    if ((ret = ff_ffv1_common_init(avctx)) < 0)
         return ret;
 
     if (avctx->extradata && (ret = read_extra_header(f)) < 0)
         return ret;
 
-    if ((ret = ffv1_init_slice_contexts(f)) < 0)
+    if ((ret = ff_ffv1_init_slice_contexts(f)) < 0)
         return ret;
 
     avctx->internal->allocate_progress = 1;
@@ -929,6 +954,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
         else                     v = buf_p - c->bytestream_start;
         if (buf_p - c->bytestream_start < v) {
             av_log(avctx, AV_LOG_ERROR, "Slice pointer chain broken\n");
+            ff_thread_report_progress(&f->picture, INT_MAX, 0);
             return AVERROR_INVALIDDATA;
         }
         buf_p -= v;
@@ -947,6 +973,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
                 }
                 fs->slice_damaged = 1;
             }
+            if (avctx->debug & FF_DEBUG_PICT_INFO) {
+                av_log(avctx, AV_LOG_DEBUG, "slice %d, CRC: 0x%08X\n", i, AV_RB32(buf_p + v - 4));
+            }
         }
 
         if (i) {
@@ -969,16 +998,18 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
         FFV1Context *fs = f->slice_context[i];
         int j;
         if (fs->slice_damaged && f->last_picture.f->data[0]) {
+            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
             const uint8_t *src[4];
             uint8_t *dst[4];
             ff_thread_await_progress(&f->last_picture, INT_MAX, 0);
             for (j = 0; j < 4; j++) {
+                int pixshift = desc->comp[j].depth > 8;
                 int sh = (j == 1 || j == 2) ? f->chroma_h_shift : 0;
                 int sv = (j == 1 || j == 2) ? f->chroma_v_shift : 0;
                 dst[j] = p->data[j] + p->linesize[j] *
-                         (fs->slice_y >> sv) + (fs->slice_x >> sh);
+                         (fs->slice_y >> sv) + ((fs->slice_x >> sh) << pixshift);
                 src[j] = f->last_picture.f->data[j] + f->last_picture.f->linesize[j] *
-                         (fs->slice_y >> sv) + (fs->slice_x >> sh);
+                         (fs->slice_y >> sv) + ((fs->slice_x >> sh) << pixshift);
             }
             av_image_copy(dst, p->linesize, src,
                           f->last_picture.f->linesize,
@@ -1002,6 +1033,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     return buf_size;
 }
 
+#if HAVE_THREADS
 static int init_thread_copy(AVCodecContext *avctx)
 {
     FFV1Context *f = avctx->priv_data;
@@ -1010,6 +1042,7 @@ static int init_thread_copy(AVCodecContext *avctx)
     f->picture.f      = NULL;
     f->last_picture.f = NULL;
     f->sample_buffer  = NULL;
+    f->max_slice_count = 0;
     f->slice_count = 0;
 
     for (i = 0; i < f->quant_table_count; i++) {
@@ -1021,11 +1054,12 @@ static int init_thread_copy(AVCodecContext *avctx)
     f->picture.f      = av_frame_alloc();
     f->last_picture.f = av_frame_alloc();
 
-    if ((ret = ffv1_init_slice_contexts(f)) < 0)
+    if ((ret = ff_ffv1_init_slice_contexts(f)) < 0)
         return ret;
 
     return 0;
 }
+#endif
 
 static void copy_fields(FFV1Context *fsdst, FFV1Context *fssrc, FFV1Context *fsrc)
 {
@@ -1055,6 +1089,7 @@ static void copy_fields(FFV1Context *fsdst, FFV1Context *fssrc, FFV1Context *fsr
     }
 }
 
+#if HAVE_THREADS
 static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 {
     FFV1Context *fsrc = src->priv_data;
@@ -1085,7 +1120,7 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
         av_assert0(!fdst->sample_buffer);
     }
 
-    av_assert1(fdst->slice_count == fsrc->slice_count);
+    av_assert1(fdst->max_slice_count == fsrc->max_slice_count);
 
 
     ff_thread_release_buffer(dst, &fdst->picture);
@@ -1098,6 +1133,7 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 
     return 0;
 }
+#endif
 
 AVCodec ff_ffv1_decoder = {
     .name           = "ffv1",
@@ -1106,10 +1142,11 @@ AVCodec ff_ffv1_decoder = {
     .id             = AV_CODEC_ID_FFV1,
     .priv_data_size = sizeof(FFV1Context),
     .init           = decode_init,
-    .close          = ffv1_close,
+    .close          = ff_ffv1_close,
     .decode         = decode_frame,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(update_thread_context),
-    .capabilities   = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/ |
-                      CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 /*| AV_CODEC_CAP_DRAW_HORIZ_BAND*/ |
+                      AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP
 };
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 45ab3a39..9ee99214 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -155,6 +155,10 @@ static void find_best_state(uint8_t best_state[256][256],
             double occ[256] = { 0 };
             double len      = 0;
             occ[j] = 1.0;
+
+            if (!one_state[j])
+                continue;
+
             for (k = 0; k < 256; k++) {
                 double newocc[256] = { 0 };
                 for (m = 1; m < 256; m++)
@@ -274,7 +278,7 @@ static av_always_inline int encode_line(FFV1Context *s, int w,
     int run_count = 0;
     int run_mode  = 0;
 
-    if (s->ac) {
+    if (s->ac != AC_GOLOMB_RICE) {
         if (c->bytestream_end - c->bytestream < w * 35) {
             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
             return AVERROR_INVALIDDATA;
@@ -311,8 +315,8 @@ static av_always_inline int encode_line(FFV1Context *s, int w,
 
         diff = fold(diff, bits);
 
-        if (s->ac) {
-            if (s->flags & CODEC_FLAG_PASS1) {
+        if (s->ac != AC_GOLOMB_RICE) {
+            if (s->flags & AV_CODEC_FLAG_PASS1) {
                 put_symbol_inline(c, p->state[context], diff, 1, s->rc_stat,
                                   s->rc_stat2[p->quant_table_index][context]);
             } else {
@@ -366,10 +370,10 @@ static av_always_inline int encode_line(FFV1Context *s, int w,
 }
 
 static int encode_plane(FFV1Context *s, uint8_t *src, int w, int h,
-                         int stride, int plane_index)
+                         int stride, int plane_index, int pixel_stride)
 {
     int x, y, i, ret;
-    const int ring_size = s->avctx->context_model ? 3 : 2;
+    const int ring_size = s->context_model ? 3 : 2;
     int16_t *sample[3];
     s->run_index = 0;
 
@@ -384,7 +388,7 @@ static int encode_plane(FFV1Context *s, uint8_t *src, int w, int h,
 // { START_TIMER
         if (s->bits_per_raw_sample <= 8) {
             for (x = 0; x < w; x++)
-                sample[0][x] = src[x + stride * y];
+                sample[0][x] = src[x * pixel_stride + stride * y];
             if((ret = encode_line(s, w, sample, plane_index, 8)) < 0)
                 return ret;
         } else {
@@ -409,7 +413,7 @@ static int encode_rgb_frame(FFV1Context *s, const uint8_t *src[3],
                              int w, int h, const int stride[3])
 {
     int x, y, p, i;
-    const int ring_size = s->avctx->context_model ? 3 : 2;
+    const int ring_size = s->context_model ? 3 : 2;
     int16_t *sample[4][3];
     int lbd    = s->bits_per_raw_sample <= 8;
     int bits   = s->bits_per_raw_sample > 0 ? s->bits_per_raw_sample : 8;
@@ -501,7 +505,7 @@ static void write_header(FFV1Context *f)
     if (f->version < 2) {
         put_symbol(c, state, f->version, 0);
         put_symbol(c, state, f->ac, 0);
-        if (f->ac > 1) {
+        if (f->ac == AC_RANGE_CUSTOM_TAB) {
             for (i = 1; i < 256; i++)
                 put_symbol(c, state,
                            f->state_transition[i] - c->one_state[i], 1);
@@ -531,7 +535,7 @@ static void write_header(FFV1Context *f)
                        0);
             for (j = 0; j < f->plane_count; j++) {
                 put_symbol(c, state, f->plane[j].quant_table_index, 0);
-                av_assert0(f->plane[j].quant_table_index == f->avctx->context_model);
+                av_assert0(f->plane[j].quant_table_index == f->context_model);
             }
         }
     }
@@ -550,7 +554,7 @@ static int write_extradata(FFV1Context *f)
 
     f->avctx->extradata_size = 10000 + 4 +
                                     (11 * 11 * 5 * 5 * 5 + 11 * 11 * 11) * 32;
-    f->avctx->extradata = av_malloc(f->avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    f->avctx->extradata = av_malloc(f->avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!f->avctx->extradata)
         return AVERROR(ENOMEM);
     ff_init_range_encoder(c, f->avctx->extradata, f->avctx->extradata_size);
@@ -566,7 +570,7 @@ static int write_extradata(FFV1Context *f)
     }
 
     put_symbol(c, state, f->ac, 0);
-    if (f->ac > 1)
+    if (f->ac == AC_RANGE_CUSTOM_TAB)
         for (i = 1; i < 256; i++)
             put_symbol(c, state, f->state_transition[i] - c->one_state[i], 1);
 
@@ -667,12 +671,13 @@ static av_cold int encode_init(AVCodecContext *avctx)
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
     int i, j, k, m, ret;
 
-    if ((ret = ffv1_common_init(avctx)) < 0)
+    if ((ret = ff_ffv1_common_init(avctx)) < 0)
         return ret;
 
     s->version = 0;
 
-    if ((avctx->flags & (CODEC_FLAG_PASS1|CODEC_FLAG_PASS2)) || avctx->slices>1)
+    if ((avctx->flags & (AV_CODEC_FLAG_PASS1 | AV_CODEC_FLAG_PASS2)) ||
+        avctx->slices > 1)
         s->version = FFMAX(s->version, 2);
 
     // Unspecified level & slices, we choose version 1.2+ to ensure multithreaded decodability
@@ -682,8 +687,13 @@ static av_cold int encode_init(AVCodecContext *avctx)
     if (avctx->level <= 0 && s->version == 2) {
         s->version = 3;
     }
-    if (avctx->level >= 0 && avctx->level <= 4)
-        s->version = FFMAX(s->version, avctx->level);
+    if (avctx->level >= 0 && avctx->level <= 4) {
+        if (avctx->level < s->version) {
+            av_log(avctx, AV_LOG_ERROR, "Version %d needed for requested features but %d requested\n", s->version, avctx->level);
+            return AVERROR(EINVAL);
+        }
+        s->version = avctx->level;
+    }
 
     if (s->ec < 0) {
         s->ec = (s->version >= 3);
@@ -694,7 +704,17 @@ static av_cold int encode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    s->ac = avctx->coder_type > 0 ? 2 : 0;
+#if FF_API_CODER_TYPE
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->coder_type != -1)
+        s->ac = avctx->coder_type > 0 ? AC_RANGE_CUSTOM_TAB : AC_GOLOMB_RICE;
+    else
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    if (s->ac == 1) // Compatbility with common command line usage
+        s->ac = AC_RANGE_CUSTOM_TAB;
+    else if (s->ac == AC_RANGE_DEFAULT_TAB_FORCE)
+        s->ac = AC_RANGE_DEFAULT_TAB;
 
     s->plane_count = 3;
     switch(avctx->pix_fmt) {
@@ -731,16 +751,14 @@ static av_cold int encode_init(AVCodecContext *avctx)
             av_log(avctx, AV_LOG_ERROR, "bits_per_raw_sample invalid\n");
             return AVERROR_INVALIDDATA;
         }
-        if (!s->ac && avctx->coder_type == -1) {
-            av_log(avctx, AV_LOG_INFO, "bits_per_raw_sample > 8, forcing coder 1\n");
-            s->ac = 2;
-        }
-        if (!s->ac) {
-            av_log(avctx, AV_LOG_ERROR, "bits_per_raw_sample of more than 8 needs -coder 1 currently\n");
-            return AVERROR(ENOSYS);
+        if (s->ac == AC_GOLOMB_RICE) {
+            av_log(avctx, AV_LOG_INFO,
+                   "bits_per_raw_sample > 8, forcing range coder\n");
+            s->ac = AC_RANGE_CUSTOM_TAB;
         }
         s->version = FFMAX(s->version, 1);
     case AV_PIX_FMT_GRAY8:
+    case AV_PIX_FMT_YA8:
     case AV_PIX_FMT_YUV444P:
     case AV_PIX_FMT_YUV440P:
     case AV_PIX_FMT_YUV422P:
@@ -752,9 +770,11 @@ static av_cold int encode_init(AVCodecContext *avctx)
     case AV_PIX_FMT_YUVA420P:
         s->chroma_planes = desc->nb_components < 3 ? 0 : 1;
         s->colorspace = 0;
-        s->transparency = desc->nb_components == 4;
+        s->transparency = desc->nb_components == 4 || desc->nb_components == 2;
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
             s->bits_per_raw_sample = 8;
+        else if (!s->bits_per_raw_sample)
+            s->bits_per_raw_sample = 8;
         break;
     case AV_PIX_FMT_RGB32:
         s->colorspace = 1;
@@ -786,13 +806,10 @@ static av_cold int encode_init(AVCodecContext *avctx)
         s->colorspace = 1;
         s->chroma_planes = 1;
         s->version = FFMAX(s->version, 1);
-        if (!s->ac && avctx->coder_type == -1) {
-            av_log(avctx, AV_LOG_INFO, "bits_per_raw_sample > 8, forcing coder 1\n");
-            s->ac = 2;
-        }
-        if (!s->ac) {
-            av_log(avctx, AV_LOG_ERROR, "bits_per_raw_sample of more than 8 needs -coder 1 currently\n");
-            return AVERROR(ENOSYS);
+        if (s->ac == AC_GOLOMB_RICE) {
+            av_log(avctx, AV_LOG_INFO,
+                   "bits_per_raw_sample > 8, forcing coder 1\n");
+            s->ac = AC_RANGE_CUSTOM_TAB;
         }
         break;
     default:
@@ -804,14 +821,26 @@ static av_cold int encode_init(AVCodecContext *avctx)
     if (s->transparency) {
         av_log(avctx, AV_LOG_WARNING, "Storing alpha plane, this will require a recent FFV1 decoder to playback!\n");
     }
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->context_model)
+        s->context_model = avctx->context_model;
     if (avctx->context_model > 1U) {
         av_log(avctx, AV_LOG_ERROR, "Invalid context model %d, valid values are 0 and 1\n", avctx->context_model);
         return AVERROR(EINVAL);
     }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
-    if (s->ac > 1)
+    if (s->ac == AC_RANGE_CUSTOM_TAB) {
         for (i = 1; i < 256; i++)
             s->state_transition[i] = ver2_state[i];
+    } else {
+        RangeCoder c;
+        ff_build_rac_states(&c, 0.05 * (1LL << 32), 256 - 8);
+        for (i = 1; i < 256; i++)
+            s->state_transition[i] = c.one_state[i];
+    }
 
     for (i = 0; i < 256; i++) {
         s->quant_table_count = 2;
@@ -837,25 +866,25 @@ static av_cold int encode_init(AVCodecContext *avctx)
     }
     s->context_count[0] = (11 * 11 * 11        + 1) / 2;
     s->context_count[1] = (11 * 11 * 5 * 5 * 5 + 1) / 2;
-    memcpy(s->quant_table, s->quant_tables[avctx->context_model],
+    memcpy(s->quant_table, s->quant_tables[s->context_model],
            sizeof(s->quant_table));
 
     for (i = 0; i < s->plane_count; i++) {
         PlaneContext *const p = &s->plane[i];
 
         memcpy(p->quant_table, s->quant_table, sizeof(p->quant_table));
-        p->quant_table_index = avctx->context_model;
+        p->quant_table_index = s->context_model;
         p->context_count     = s->context_count[p->quant_table_index];
     }
 
-    if ((ret = ffv1_allocate_initial_states(s)) < 0)
+    if ((ret = ff_ffv1_allocate_initial_states(s)) < 0)
         return ret;
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     if (!s->transparency)
         s->plane_count = 2;
@@ -865,7 +894,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
     s->picture_number = 0;
 
-    if (avctx->flags & (CODEC_FLAG_PASS1 | CODEC_FLAG_PASS2)) {
+    if (avctx->flags & (AV_CODEC_FLAG_PASS1 | AV_CODEC_FLAG_PASS2)) {
         for (i = 0; i < s->quant_table_count; i++) {
             s->rc_stat2[i] = av_mallocz(s->context_count[i] *
                                         sizeof(*s->rc_stat2[i]));
@@ -922,7 +951,8 @@ static av_cold int encode_init(AVCodecContext *avctx)
             if (p[0] == 0)
                 break;
         }
-        sort_stt(s, s->state_transition);
+        if (s->ac == AC_RANGE_CUSTOM_TAB)
+            sort_stt(s, s->state_transition);
 
         find_best_state(best_state, s->state_transition);
 
@@ -972,18 +1002,19 @@ static av_cold int encode_init(AVCodecContext *avctx)
             return ret;
     }
 
-    if ((ret = ffv1_init_slice_contexts(s)) < 0)
+    if ((ret = ff_ffv1_init_slice_contexts(s)) < 0)
         return ret;
-    if ((ret = ffv1_init_slices_state(s)) < 0)
+    s->slice_count = s->max_slice_count;
+    if ((ret = ff_ffv1_init_slices_state(s)) < 0)
         return ret;
 
 #define STATS_OUT_SIZE 1024 * 1024 * 6
-    if (avctx->flags & CODEC_FLAG_PASS1) {
+    if (avctx->flags & AV_CODEC_FLAG_PASS1) {
         avctx->stats_out = av_mallocz(STATS_OUT_SIZE);
         if (!avctx->stats_out)
             return AVERROR(ENOMEM);
         for (i = 0; i < s->quant_table_count; i++)
-            for (j = 0; j < s->slice_count; j++) {
+            for (j = 0; j < s->max_slice_count; j++) {
                 FFV1Context *sf = s->slice_context[j];
                 av_assert0(!sf->rc_stat2[i]);
                 sf->rc_stat2[i] = av_mallocz(s->context_count[i] *
@@ -1009,7 +1040,7 @@ static void encode_slice_header(FFV1Context *f, FFV1Context *fs)
     put_symbol(c, state, (fs->slice_height+1)*f->num_v_slices / f->height-1, 0);
     for (j=0; j<f->plane_count; j++) {
         put_symbol(c, state, f->plane[j].quant_table_index, 0);
-        av_assert0(f->plane[j].quant_table_index == f->avctx->context_model);
+        av_assert0(f->plane[j].quant_table_index == f->context_model);
     }
     if (!f->picture.f->interlaced_frame)
         put_symbol(c, state, 3, 0);
@@ -1020,7 +1051,7 @@ static void encode_slice_header(FFV1Context *f, FFV1Context *fs)
     if (f->version > 3) {
         put_rac(c, state, fs->slice_coding_mode == 1);
         if (fs->slice_coding_mode == 1)
-            ffv1_clear_slice_state(f, fs);
+            ff_ffv1_clear_slice_state(f, fs);
         put_symbol(c, state, fs->slice_coding_mode, 0);
         if (fs->slice_coding_mode != 1) {
             put_symbol(c, state, fs->slice_rct_by_coef, 0);
@@ -1120,7 +1151,7 @@ static int encode_slice(AVCodecContext *c, void *arg)
     int x            = fs->slice_x;
     int y            = fs->slice_y;
     const AVFrame *const p = f->picture.f;
-    const int ps     = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step_minus1 + 1;
+    const int ps     = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step;
     int ret;
     RangeCoder c_bak = fs->c;
     const uint8_t *planes[3] = {p->data[0] + ps*x + y*p->linesize[0],
@@ -1136,12 +1167,12 @@ static int encode_slice(AVCodecContext *c, void *arg)
     }
 
 retry:
-    if (c->coded_frame->key_frame)
-        ffv1_clear_slice_state(f, fs);
+    if (f->key_frame)
+        ff_ffv1_clear_slice_state(f, fs);
     if (f->version > 2) {
         encode_slice_header(f, fs);
     }
-    if (!fs->ac) {
+    if (fs->ac == AC_GOLOMB_RICE) {
         if (f->version > 2)
             put_rac(&fs->c, (uint8_t[]) { 129 }, 0);
         fs->ac_byte_count = f->version > 2 || (!x && !y) ? ff_rac_terminate(&fs->c) : 0;
@@ -1150,20 +1181,23 @@ static int encode_slice(AVCodecContext *c, void *arg)
                       fs->c.bytestream_end - fs->c.bytestream_start - fs->ac_byte_count);
     }
 
-    if (f->colorspace == 0) {
-        const int chroma_width  = FF_CEIL_RSHIFT(width,  f->chroma_h_shift);
-        const int chroma_height = FF_CEIL_RSHIFT(height, f->chroma_v_shift);
+    if (f->colorspace == 0 && c->pix_fmt != AV_PIX_FMT_YA8) {
+        const int chroma_width  = AV_CEIL_RSHIFT(width,  f->chroma_h_shift);
+        const int chroma_height = AV_CEIL_RSHIFT(height, f->chroma_v_shift);
         const int cx            = x >> f->chroma_h_shift;
         const int cy            = y >> f->chroma_v_shift;
 
-        ret = encode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0);
+        ret = encode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 1);
 
         if (f->chroma_planes) {
-            ret |= encode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
-            ret |= encode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1);
+            ret |= encode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1, 1);
+            ret |= encode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1, 1);
         }
         if (fs->transparency)
-            ret |= encode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], 2);
+            ret |= encode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], 2, 1);
+    } else if (c->pix_fmt == AV_PIX_FMT_YA8) {
+        ret  = encode_plane(fs, p->data[0] +     ps*x + y*p->linesize[0], width, height, p->linesize[0], 0, 2);
+        ret |= encode_plane(fs, p->data[0] + 1 + ps*x + y*p->linesize[0], width, height, p->linesize[0], 1, 2);
     } else {
         ret = encode_rgb_frame(fs, planes, width, height, p->linesize);
     }
@@ -1194,11 +1228,11 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint8_t keystate    = 128;
     uint8_t *buf_p;
     int i, ret;
-    int64_t maxsize =   FF_MIN_BUFFER_SIZE
+    int64_t maxsize =   AV_INPUT_BUFFER_MIN_SIZE
                       + avctx->width*avctx->height*35LL*4;
 
     if(!pict) {
-        if (avctx->flags & CODEC_FLAG_PASS1) {
+        if (avctx->flags & AV_CODEC_FLAG_PASS1) {
             int j, k, m;
             char *p   = avctx->stats_out;
             char *end = p + STATS_OUT_SIZE;
@@ -1207,6 +1241,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
             for (i = 0; i < f->quant_table_count; i++)
                 memset(f->rc_stat2[i], 0, f->context_count[i] * sizeof(*f->rc_stat2[i]));
 
+            av_assert0(f->slice_count == f->max_slice_count);
             for (j = 0; j < f->slice_count; j++) {
                 FFV1Context *fs = f->slice_context[j];
                 for (i = 0; i < 256; i++) {
@@ -1243,9 +1278,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
 
     if (f->version > 3)
-        maxsize = FF_MIN_BUFFER_SIZE + avctx->width*avctx->height*3LL*4;
+        maxsize = AV_INPUT_BUFFER_MIN_SIZE + avctx->width*avctx->height*3LL*4;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, maxsize)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, maxsize, 0)) < 0)
         return ret;
 
     ff_init_range_encoder(c, pkt->data, pkt->size);
@@ -1258,15 +1293,15 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
         put_rac(c, &keystate, 1);
-        avctx->coded_frame->key_frame = 1;
+        f->key_frame = 1;
         f->gob_count++;
         write_header(f);
     } else {
         put_rac(c, &keystate, 0);
-        avctx->coded_frame->key_frame = 0;
+        f->key_frame = 0;
     }
 
-    if (f->ac > 1) {
+    if (f->ac == AC_RANGE_CUSTOM_TAB) {
         int i;
         for (i = 1; i < 256; i++) {
             c->one_state[i]        = f->state_transition[i];
@@ -1288,7 +1323,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         FFV1Context *fs = f->slice_context[i];
         int bytes;
 
-        if (fs->ac) {
+        if (fs->ac != AC_GOLOMB_RICE) {
             uint8_t state = 129;
             put_rac(&fs->c, &state, 0);
             bytes = ff_rac_terminate(&fs->c);
@@ -1313,14 +1348,20 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         buf_p += bytes;
     }
 
-    if (avctx->flags & CODEC_FLAG_PASS1)
+    if (avctx->flags & AV_CODEC_FLAG_PASS1)
         avctx->stats_out[0] = '\0';
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    avctx->coded_frame->key_frame = f->key_frame;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     f->picture_number++;
     pkt->size   = buf_p - pkt->data;
     pkt->pts    =
     pkt->dts    = pict->pts;
-    pkt->flags |= AV_PKT_FLAG_KEY * avctx->coded_frame->key_frame;
+    pkt->flags |= AV_PKT_FLAG_KEY * f->key_frame;
     *got_packet = 1;
 
     return 0;
@@ -1328,15 +1369,27 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
 static av_cold int encode_close(AVCodecContext *avctx)
 {
-    av_frame_free(&avctx->coded_frame);
-    ffv1_close(avctx);
+    ff_ffv1_close(avctx);
     return 0;
 }
 
 #define OFFSET(x) offsetof(FFV1Context, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "slicecrc", "Protect slices with CRCs", OFFSET(ec), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE },
+    { "slicecrc", "Protect slices with CRCs", OFFSET(ec), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
+    { "coder", "Coder type", OFFSET(ac), AV_OPT_TYPE_INT,
+            { .i64 = 0 }, -2, 2, VE, "coder" },
+        { "rice", "Golomb rice", 0, AV_OPT_TYPE_CONST,
+            { .i64 = AC_GOLOMB_RICE }, INT_MIN, INT_MAX, VE, "coder" },
+        { "range_def", "Range with default table", 0, AV_OPT_TYPE_CONST,
+            { .i64 = AC_RANGE_DEFAULT_TAB_FORCE }, INT_MIN, INT_MAX, VE, "coder" },
+        { "range_tab", "Range with custom table", 0, AV_OPT_TYPE_CONST,
+            { .i64 = AC_RANGE_CUSTOM_TAB }, INT_MIN, INT_MAX, VE, "coder" },
+        { "ac", "Range with custom table (the ac option exists for compatibility and is deprecated)", 0, AV_OPT_TYPE_CONST,
+            { .i64 = 1 }, INT_MIN, INT_MAX, VE, "coder" },
+    { "context", "Context model", OFFSET(context_model), AV_OPT_TYPE_INT,
+            { .i64 = 0 }, 0, 1, VE },
+
     { NULL }
 };
 
@@ -1347,10 +1400,12 @@ static const AVClass ffv1_class = {
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
+#if FF_API_CODER_TYPE
 static const AVCodecDefault ffv1_defaults[] = {
     { "coder", "-1" },
     { NULL },
 };
+#endif
 
 AVCodec ff_ffv1_encoder = {
     .name           = "ffv1",
@@ -1361,7 +1416,7 @@ AVCodec ff_ffv1_encoder = {
     .init           = encode_init,
     .encode2        = encode_frame,
     .close          = encode_close,
-    .capabilities   = CODEC_CAP_SLICE_THREADS | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_DELAY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,   AV_PIX_FMT_YUVA420P,  AV_PIX_FMT_YUVA422P,  AV_PIX_FMT_YUV444P,
         AV_PIX_FMT_YUVA444P,  AV_PIX_FMT_YUV440P,   AV_PIX_FMT_YUV422P,   AV_PIX_FMT_YUV411P,
@@ -1373,9 +1428,12 @@ AVCodec ff_ffv1_encoder = {
         AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA420P9,
         AV_PIX_FMT_GRAY16,    AV_PIX_FMT_GRAY8,     AV_PIX_FMT_GBRP9,     AV_PIX_FMT_GBRP10,
         AV_PIX_FMT_GBRP12,    AV_PIX_FMT_GBRP14,
+        AV_PIX_FMT_YA8,
         AV_PIX_FMT_NONE
 
     },
+#if FF_API_CODER_TYPE
     .defaults       = ffv1_defaults,
+#endif
     .priv_class     = &ffv1_class,
 };
diff --git a/libavcodec/ffwavesynth.c b/libavcodec/ffwavesynth.c
index e835934f..9d055e40 100644
--- a/libavcodec/ffwavesynth.c
+++ b/libavcodec/ffwavesynth.c
@@ -477,5 +477,5 @@ AVCodec ff_ffwavesynth_decoder = {
     .init           = wavesynth_init,
     .close          = wavesynth_close,
     .decode         = wavesynth_decode,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/fic.c b/libavcodec/fic.c
index 48e7a6ea..7d698f08 100644
--- a/libavcodec/fic.c
+++ b/libavcodec/fic.c
@@ -22,6 +22,7 @@
  */
 
 #include "libavutil/common.h"
+#include "libavutil/opt.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "get_bits.h"
@@ -36,6 +37,7 @@ typedef struct FICThreadContext {
 } FICThreadContext;
 
 typedef struct FICContext {
+    AVClass *class;
     AVCodecContext *avctx;
     AVFrame *frame;
     AVFrame *final_frame;
@@ -51,6 +53,7 @@ typedef struct FICContext {
     int num_slices, slice_h;
 
     uint8_t cursor_buf[4096];
+    int skip_cursor;
 } FICContext;
 
 static const uint8_t fic_qmat_hq[64] = {
@@ -263,7 +266,7 @@ static int fic_decode_frame(AVCodecContext *avctx, void *data,
     int msize;
     int tsize;
     int cur_x, cur_y;
-    int skip_cursor = 0;
+    int skip_cursor = ctx->skip_cursor;
     uint8_t *sdata;
 
     if ((ret = ff_reget_buffer(avctx, ctx->frame)) < 0)
@@ -452,6 +455,18 @@ static av_cold int fic_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+static const AVOption options[] = {
+{ "skip_cursor", "skip the cursor", offsetof(FICContext, skip_cursor), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM },
+{ NULL },
+};
+
+static const AVClass fic_decoder_class = {
+    .class_name = "FIC encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_fic_decoder = {
     .name           = "fic",
     .long_name      = NULL_IF_CONFIG_SMALL("Mirillis FIC"),
@@ -461,5 +476,6 @@ AVCodec ff_fic_decoder = {
     .init           = fic_decode_init,
     .decode         = fic_decode_frame,
     .close          = fic_decode_close,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS,
+    .priv_class     = &fic_decoder_class,
 };
diff --git a/libavcodec/flac.c b/libavcodec/flac.c
index 5ff004e0..f5154b91 100644
--- a/libavcodec/flac.c
+++ b/libavcodec/flac.c
@@ -235,18 +235,3 @@ void ff_flac_parse_streaminfo(AVCodecContext *avctx, struct FLACStreaminfo *s,
     skip_bits_long(&gb, 64); /* md5 sum */
     skip_bits_long(&gb, 64); /* md5 sum */
 }
-
-#if LIBAVCODEC_VERSION_MAJOR < 57
-void avpriv_flac_parse_streaminfo(AVCodecContext *avctx, struct FLACStreaminfo *s,
-                              const uint8_t *buffer)
-{
-    ff_flac_parse_streaminfo(avctx, s, buffer);
-}
-
-int avpriv_flac_is_extradata_valid(AVCodecContext *avctx,
-                               enum FLACExtradataFormat *format,
-                               uint8_t **streaminfo_start)
-{
-    return ff_flac_is_extradata_valid(avctx, format, streaminfo_start);
-}
-#endif
diff --git a/libavcodec/flac.h b/libavcodec/flac.h
index f1307c7f..96d971c9 100644
--- a/libavcodec/flac.h
+++ b/libavcodec/flac.h
@@ -99,14 +99,6 @@ typedef struct FLACFrameInfo {
 void ff_flac_parse_streaminfo(AVCodecContext *avctx, struct FLACStreaminfo *s,
                               const uint8_t *buffer);
 
-#if LIBAVCODEC_VERSION_MAJOR < 57
-void avpriv_flac_parse_streaminfo(AVCodecContext *avctx, struct FLACStreaminfo *s,
-                                  const uint8_t *buffer);
-int avpriv_flac_is_extradata_valid(AVCodecContext *avctx,
-                                   enum FLACExtradataFormat *format,
-                                   uint8_t **streaminfo_start);
-#endif
-
 /**
  * Validate the FLAC extradata.
  * @param[in]  avctx codec context containing the extradata.
diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
index 30fe4161..b7237e18 100644
--- a/libavcodec/flacdec.c
+++ b/libavcodec/flacdec.c
@@ -623,6 +623,7 @@ static int flac_decode_frame(AVCodecContext *avctx, void *data,
     return bytes_read;
 }
 
+#if HAVE_THREADS
 static int init_thread_copy(AVCodecContext *avctx)
 {
     FLACContext *s = avctx->priv_data;
@@ -633,6 +634,7 @@ static int init_thread_copy(AVCodecContext *avctx)
         return allocate_buffers(s);
     return 0;
 }
+#endif
 
 static av_cold int flac_decode_close(AVCodecContext *avctx)
 {
@@ -644,7 +646,7 @@ static av_cold int flac_decode_close(AVCodecContext *avctx)
 }
 
 static const AVOption options[] = {
-{ "use_buggy_lpc", "emulate old buggy lavc behavior", offsetof(FLACContext, buggy_lpc), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
+{ "use_buggy_lpc", "emulate old buggy lavc behavior", offsetof(FLACContext, buggy_lpc), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
 { NULL },
 };
 
@@ -665,7 +667,7 @@ AVCodec ff_flac_decoder = {
     .close          = flac_decode_close,
     .decode         = flac_decode_frame,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
                                                       AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_S32,
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index 29bd9999..a91ed197 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -315,8 +315,10 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
                                          FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON,
                                          FF_LPC_TYPE_LEVINSON})[level];
 
-    s->options.min_prediction_order = ((int[]){  2,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1})[level];
-    s->options.max_prediction_order = ((int[]){  3,  4,  4,  6,  8,  8,  8,  8, 12, 12, 12, 32, 32})[level];
+    if (s->options.min_prediction_order < 0)
+        s->options.min_prediction_order = ((int[]){  2,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1})[level];
+    if (s->options.max_prediction_order < 0)
+        s->options.max_prediction_order = ((int[]){  3,  4,  4,  6,  8,  8,  8,  8, 12, 12, 12, 32, 32})[level];
 
     if (s->options.prediction_order_method < 0)
         s->options.prediction_order_method = ((int[]){ ORDER_METHOD_EST,    ORDER_METHOD_EST,    ORDER_METHOD_EST,
@@ -335,14 +337,15 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
     if (s->options.max_partition_order < 0)
         s->options.max_partition_order = ((int[]){  2,  2,  3,  3,  3,  8,  8,  8,  8,  8,  8,  8,  8})[level];
 
-    if (s->options.lpc_type == FF_LPC_TYPE_NONE) {
-        s->options.min_prediction_order = 0;
-    } else if (avctx->min_prediction_order >= 0) {
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->min_prediction_order >= 0) {
         if (s->options.lpc_type == FF_LPC_TYPE_FIXED) {
             if (avctx->min_prediction_order > MAX_FIXED_ORDER) {
-                av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
-                       avctx->min_prediction_order);
-                return AVERROR(EINVAL);
+                av_log(avctx, AV_LOG_WARNING,
+                       "invalid min prediction order %d, clamped to %d\n",
+                       avctx->min_prediction_order, MAX_FIXED_ORDER);
+                avctx->min_prediction_order = MAX_FIXED_ORDER;
             }
         } else if (avctx->min_prediction_order < MIN_LPC_ORDER ||
                    avctx->min_prediction_order > MAX_LPC_ORDER) {
@@ -352,14 +355,13 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
         }
         s->options.min_prediction_order = avctx->min_prediction_order;
     }
-    if (s->options.lpc_type == FF_LPC_TYPE_NONE) {
-        s->options.max_prediction_order = 0;
-    } else if (avctx->max_prediction_order >= 0) {
+    if (avctx->max_prediction_order >= 0) {
         if (s->options.lpc_type == FF_LPC_TYPE_FIXED) {
             if (avctx->max_prediction_order > MAX_FIXED_ORDER) {
-                av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
-                       avctx->max_prediction_order);
-                return AVERROR(EINVAL);
+                av_log(avctx, AV_LOG_WARNING,
+                       "invalid max prediction order %d, clamped to %d\n",
+                       avctx->max_prediction_order, MAX_FIXED_ORDER);
+                avctx->max_prediction_order = MAX_FIXED_ORDER;
             }
         } else if (avctx->max_prediction_order < MIN_LPC_ORDER ||
                    avctx->max_prediction_order > MAX_LPC_ORDER) {
@@ -369,6 +371,26 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
         }
         s->options.max_prediction_order = avctx->max_prediction_order;
     }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    if (s->options.lpc_type == FF_LPC_TYPE_NONE) {
+        s->options.min_prediction_order = 0;
+        s->options.max_prediction_order = 0;
+    } else if (s->options.lpc_type == FF_LPC_TYPE_FIXED) {
+        if (s->options.min_prediction_order > MAX_FIXED_ORDER) {
+            av_log(avctx, AV_LOG_WARNING,
+                   "invalid min prediction order %d, clamped to %d\n",
+                   s->options.min_prediction_order, MAX_FIXED_ORDER);
+            s->options.min_prediction_order = MAX_FIXED_ORDER;
+        }
+        if (s->options.max_prediction_order > MAX_FIXED_ORDER) {
+            av_log(avctx, AV_LOG_WARNING,
+                   "invalid max prediction order %d, clamped to %d\n",
+                   s->options.max_prediction_order, MAX_FIXED_ORDER);
+            s->options.max_prediction_order = MAX_FIXED_ORDER;
+        }
+    }
+
     if (s->options.max_prediction_order < s->options.min_prediction_order) {
         av_log(avctx, AV_LOG_ERROR, "invalid prediction orders: min=%d max=%d\n",
                s->options.min_prediction_order, s->options.max_prediction_order);
@@ -1021,7 +1043,7 @@ static int count_frame_header(FlacEncodeContext *s)
         count += 16;
 
     /* explicit sample rate */
-    count += ((s->sr_code[0] == 12) + (s->sr_code[0] > 12)) * 8;
+    count += ((s->sr_code[0] == 12) + (s->sr_code[0] > 12) * 2) * 8;
 
     /* frame header CRC-8 */
     count += 8;
@@ -1065,7 +1087,7 @@ static void remove_wasted_bits(FlacEncodeContext *s)
         }
 
         if (v && !(v & 1)) {
-            v = av_ctz(v);
+            v = ff_ctz(v);
 
             for (i = 0; i < s->frame.blocksize; i++)
                 sub->samples[i] >>= v;
@@ -1346,7 +1368,13 @@ static int flac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         av_md5_final(s->md5ctx, s->md5sum);
         write_streaminfo(s, avctx->extradata);
 
+#if FF_API_SIDEDATA_ONLY_PKT
+FF_DISABLE_DEPRECATION_WARNINGS
         if (avctx->side_data_only_packets && !s->flushed) {
+FF_ENABLE_DEPRECATION_WARNINGS
+#else
+        if (!s->flushed) {
+#endif
             uint8_t *side_data = av_packet_new_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
                                                          avctx->extradata_size);
             if (!side_data)
@@ -1390,7 +1418,7 @@ static int flac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         }
     }
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, frame_bytes)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, frame_bytes, 0)) < 0)
         return ret;
 
     out_bytes = write_frame(s, avpkt);
@@ -1454,16 +1482,19 @@ static const AVOption options[] = {
 { "left_side",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FLAC_CHMODE_LEFT_SIDE   }, INT_MIN, INT_MAX, FLAGS, "ch_mode" },
 { "right_side", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FLAC_CHMODE_RIGHT_SIDE  }, INT_MIN, INT_MAX, FLAGS, "ch_mode" },
 { "mid_side",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FLAC_CHMODE_MID_SIDE    }, INT_MIN, INT_MAX, FLAGS, "ch_mode" },
-{ "exact_rice_parameters", "Calculate rice parameters exactly", offsetof(FlacEncodeContext, options.exact_rice_parameters), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS },
-{ "multi_dim_quant",       "Multi-dimensional quantization",    offsetof(FlacEncodeContext, options.multi_dim_quant),       AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS },
+{ "exact_rice_parameters", "Calculate rice parameters exactly", offsetof(FlacEncodeContext, options.exact_rice_parameters), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+{ "multi_dim_quant",       "Multi-dimensional quantization",    offsetof(FlacEncodeContext, options.multi_dim_quant),       AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+{ "min_prediction_order", NULL, offsetof(FlacEncodeContext, options.min_prediction_order), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, MAX_LPC_ORDER, FLAGS },
+{ "max_prediction_order", NULL, offsetof(FlacEncodeContext, options.max_prediction_order), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, MAX_LPC_ORDER, FLAGS },
+
 { NULL },
 };
 
 static const AVClass flac_encoder_class = {
-    "FLAC encoder",
-    av_default_item_name,
-    options,
-    LIBAVUTIL_VERSION_INT,
+    .class_name = "FLAC encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 AVCodec ff_flac_encoder = {
@@ -1475,7 +1506,7 @@ AVCodec ff_flac_encoder = {
     .init           = flac_encode_init,
     .encode2        = flac_encode_frame,
     .close          = flac_encode_close,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY | CODEC_CAP_LOSSLESS,
+    .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_LOSSLESS,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_S32,
                                                      AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/flashsv.c b/libavcodec/flashsv.c
index f429167f..90e1d43c 100644
--- a/libavcodec/flashsv.c
+++ b/libavcodec/flashsv.c
@@ -413,6 +413,10 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
                 }
 
                 if (has_diff) {
+                    if (size < 3) {
+                        av_log(avctx, AV_LOG_ERROR, "size too small for diff\n");
+                        return AVERROR_INVALIDDATA;
+                    }
                     if (!s->keyframe) {
                         av_log(avctx, AV_LOG_ERROR,
                                "Inter frame without keyframe\n");
@@ -440,6 +444,10 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
                     int row = get_bits(&gb, 8);
                     av_log(avctx, AV_LOG_DEBUG, "%dx%d zlibprime_curr %dx%d\n",
                            i, j, col, row);
+                    if (size < 3) {
+                        av_log(avctx, AV_LOG_ERROR, "size too small for zlibprime_curr\n");
+                        return AVERROR_INVALIDDATA;
+                    }
                     size -= 2;
                     avpriv_request_sample(avctx, "zlibprime_curr");
                     return AVERROR_PATCHWELCOME;
@@ -509,7 +517,7 @@ AVCodec ff_flashsv_decoder = {
     .init           = flashsv_decode_init,
     .close          = flashsv_decode_end,
     .decode         = flashsv_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_BGR24, AV_PIX_FMT_NONE },
 };
 #endif /* CONFIG_FLASHSV_DECODER */
@@ -572,7 +580,7 @@ AVCodec ff_flashsv2_decoder = {
     .init           = flashsv2_decode_init,
     .close          = flashsv2_decode_end,
     .decode         = flashsv_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_BGR24, AV_PIX_FMT_NONE },
 };
 #endif /* CONFIG_FLASHSV2_DECODER */
diff --git a/libavcodec/flashsv2enc.c b/libavcodec/flashsv2enc.c
index a8bcaa24..65db1126 100644
--- a/libavcodec/flashsv2enc.c
+++ b/libavcodec/flashsv2enc.c
@@ -412,12 +412,14 @@ static inline unsigned pixel_color15(const uint8_t * src)
 
 static inline unsigned int chroma_diff(unsigned int c1, unsigned int c2)
 {
+#define ABSDIFF(a,b) (abs((int)(a)-(int)(b)))
+
     unsigned int t1 = (c1 & 0x000000ff) + ((c1 & 0x0000ff00) >> 8) + ((c1 & 0x00ff0000) >> 16);
     unsigned int t2 = (c2 & 0x000000ff) + ((c2 & 0x0000ff00) >> 8) + ((c2 & 0x00ff0000) >> 16);
 
-    return abs(t1 - t2) + abs((c1 & 0x000000ff) - (c2 & 0x000000ff)) +
-        abs(((c1 & 0x0000ff00) >> 8) - ((c2 & 0x0000ff00) >> 8)) +
-        abs(((c1 & 0x00ff0000) >> 16) - ((c2 & 0x00ff0000) >> 16));
+    return ABSDIFF(t1, t2) + ABSDIFF(c1 & 0x000000ff, c2 & 0x000000ff) +
+        ABSDIFF((c1 & 0x0000ff00) >> 8 , (c2 & 0x0000ff00) >> 8) +
+        ABSDIFF((c1 & 0x00ff0000) >> 16, (c2 & 0x00ff0000) >> 16);
 }
 
 static inline int pixel_color7_fast(Palette * palette, unsigned c15)
@@ -854,7 +856,7 @@ static int flashsv2_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int res;
     int keyframe = 0;
 
-    if ((res = ff_alloc_packet2(avctx, pkt, s->frame_size + FF_MIN_BUFFER_SIZE)) < 0)
+    if ((res = ff_alloc_packet2(avctx, pkt, s->frame_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return res;
 
     /* First frame needs to be a keyframe */
diff --git a/libavcodec/flashsvenc.c b/libavcodec/flashsvenc.c
index acbc1348..f7f98efd 100644
--- a/libavcodec/flashsvenc.c
+++ b/libavcodec/flashsvenc.c
@@ -98,8 +98,6 @@ static av_cold int flashsv_encode_end(AVCodecContext *avctx)
     av_freep(&s->previous_frame);
     av_freep(&s->tmpblock);
 
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
@@ -111,7 +109,7 @@ static av_cold int flashsv_encode_init(AVCodecContext *avctx)
 
     if (avctx->width > 4095 || avctx->height > 4095) {
         av_log(avctx, AV_LOG_ERROR,
-               "Input dimensions too large, input must be max 4096x4096 !\n");
+               "Input dimensions too large, input must be max 4095x4095 !\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -131,12 +129,6 @@ static av_cold int flashsv_encode_init(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame) {
-        flashsv_encode_end(avctx);
-        return AVERROR(ENOMEM);
-    }
-
     return 0;
 }
 
@@ -246,7 +238,7 @@ static int flashsv_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         I_frame = 1;
     }
 
-    if ((res = ff_alloc_packet2(avctx, pkt, s->image_width * s->image_height * 3)) < 0)
+    if ((res = ff_alloc_packet2(avctx, pkt, s->image_width * s->image_height * 3, 0)) < 0)
         return res;
 
     pkt->size = encode_bitstream(s, p, pkt->data, pkt->size, opt_w * 16, opt_h * 16,
@@ -262,16 +254,24 @@ static int flashsv_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     //mark the frame type so the muxer can mux it correctly
     if (I_frame) {
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
         avctx->coded_frame->pict_type      = AV_PICTURE_TYPE_I;
         avctx->coded_frame->key_frame      = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
         s->last_key_frame = avctx->frame_number;
         ff_dlog(avctx, "Inserting keyframe at frame %d\n", avctx->frame_number);
     } else {
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
         avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
         avctx->coded_frame->key_frame = 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     }
 
-    if (avctx->coded_frame->key_frame)
+    if (I_frame)
         pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
 
diff --git a/libavcodec/flicvideo.c b/libavcodec/flicvideo.c
index 08dd98b5..3e0573af 100644
--- a/libavcodec/flicvideo.c
+++ b/libavcodec/flicvideo.c
@@ -193,7 +193,7 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
 
     pixels = s->frame->data[0];
     pixel_limit = s->avctx->height * s->frame->linesize[0];
-    if (buf_size < 16 || buf_size > INT_MAX - (3 * 256 + FF_INPUT_BUFFER_PADDING_SIZE))
+    if (buf_size < 16 || buf_size > INT_MAX - (3 * 256 + AV_INPUT_BUFFER_PADDING_SIZE))
         return AVERROR_INVALIDDATA;
     frame_size = bytestream2_get_le32(&g2);
     if (frame_size > buf_size)
@@ -814,5 +814,5 @@ AVCodec ff_flic_decoder = {
     .init           = flic_decode_init,
     .close          = flic_decode_end,
     .decode         = flic_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/flvdec.c b/libavcodec/flvdec.c
index fd1971b2..f74ba3f0 100644
--- a/libavcodec/flvdec.c
+++ b/libavcodec/flvdec.c
@@ -122,7 +122,7 @@ AVCodec ff_flv_decoder = {
     .init           = ff_h263_decode_init,
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P,
                                                      AV_PIX_FMT_NONE },
diff --git a/libavcodec/flvenc.c b/libavcodec/flvenc.c
index a54f5a81..a2cd3995 100644
--- a/libavcodec/flvenc.c
+++ b/libavcodec/flvenc.c
@@ -20,6 +20,7 @@
 
 #include "flv.h"
 #include "h263.h"
+#include "h263data.h"
 #include "mpegvideo.h"
 #include "mpegvideodata.h"
 
@@ -90,7 +91,12 @@ void ff_flv2_encode_ac_esc(PutBitContext *pb, int slevel, int level,
     }
 }
 
-FF_MPV_GENERIC_CLASS(flv)
+static const AVClass flv_class = {
+    .class_name = "flv encoder",
+    .item_name  = av_default_item_name,
+    .option     = ff_mpv_generic_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_flv_encoder = {
     .name           = "flv",
diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index e6fc509a..3b33af61 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -32,6 +32,14 @@ static void int32_to_float_fmul_scalar_c(float *dst, const int32_t *src,
         dst[i] = src[i] * mul;
 }
 
+static void int32_to_float_c(float *dst, const int32_t *src, intptr_t len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        dst[i] = (float)src[i];
+}
+
 static void int32_to_float_fmul_array8_c(FmtConvertContext *c, float *dst,
                                          const int32_t *src, const float *mul,
                                          int len)
@@ -43,42 +51,18 @@ static void int32_to_float_fmul_array8_c(FmtConvertContext *c, float *dst,
 
 av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
 {
+    c->int32_to_float             = int32_to_float_c;
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
     c->int32_to_float_fmul_array8 = int32_to_float_fmul_array8_c;
 
-    if (ARCH_ARM) ff_fmt_convert_init_arm(c, avctx);
-    if (ARCH_PPC) ff_fmt_convert_init_ppc(c, avctx);
-    if (ARCH_X86) ff_fmt_convert_init_x86(c, avctx);
-    if (HAVE_MIPSFPU) ff_fmt_convert_init_mips(c);
-}
-
-/* ffdshow custom code */
-void float_interleave(float *dst, const float **src, long len, int channels)
-{
-    int i,j,c;
-    if(channels==2){
-        for(i=0; i<len; i++){
-            dst[2*i]   = src[0][i] / 32768.0f;
-            dst[2*i+1] = src[1][i] / 32768.0f;
-        }
-    }else{
-        for(c=0; c<channels; c++)
-            for(i=0, j=c; i<len; i++, j+=channels)
-                dst[j] = src[c][i] / 32768.0f;
-    }
-}
-
-void float_interleave_noscale(float *dst, const float **src, long len, int channels)
-{
-    int i,j,c;
-    if(channels==2){
-        for(i=0; i<len; i++){
-            dst[2*i]   = src[0][i];
-            dst[2*i+1] = src[1][i];
-        }
-    }else{
-        for(c=0; c<channels; c++)
-            for(i=0, j=c; i<len; i++, j+=channels)
-                dst[j] = src[c][i];
-    }
+    if (ARCH_AARCH64)
+        ff_fmt_convert_init_aarch64(c, avctx);
+    if (ARCH_ARM)
+        ff_fmt_convert_init_arm(c, avctx);
+    if (ARCH_PPC)
+        ff_fmt_convert_init_ppc(c, avctx);
+    if (ARCH_X86)
+        ff_fmt_convert_init_x86(c, avctx);
+    if (HAVE_MIPSFPU)
+        ff_fmt_convert_init_mips(c);
 }
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index 401ac3a1..a1b17e4f 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -37,6 +37,16 @@ typedef struct FmtConvertContext {
      */
     void (*int32_to_float_fmul_scalar)(float *dst, const int32_t *src,
                                        float mul, int len);
+    /**
+     * Convert an array of int32_t to float.
+     * @param dst destination array of float.
+     *            constraints: 32-byte aligned
+     * @param src source array of int32_t.
+     *            constraints: 32-byte aligned
+     * @param len number of elements to convert.
+     *            constraints: multiple of 8
+     */
+    void (*int32_to_float)(float *dst, const int32_t *src, intptr_t len);
 
     /**
      * Convert an array of int32_t to float and multiply by a float value from another array,
@@ -58,13 +68,10 @@ typedef struct FmtConvertContext {
 
 void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx);
 
+void ff_fmt_convert_init_aarch64(FmtConvertContext *c, AVCodecContext *avctx);
 void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx);
 void ff_fmt_convert_init_ppc(FmtConvertContext *c, AVCodecContext *avctx);
 void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx);
 void ff_fmt_convert_init_mips(FmtConvertContext *c);
 
-/* ffdshow custom code */
-void float_interleave(float *dst, const float **src, long len, int channels);
-void float_interleave_noscale(float *dst, const float **src, long len, int channels);
-
 #endif /* AVCODEC_FMTCONVERT_H */
diff --git a/libavcodec/frame_thread_encoder.c b/libavcodec/frame_thread_encoder.c
index 9a49fea5..04c9a0eb 100644
--- a/libavcodec/frame_thread_encoder.c
+++ b/libavcodec/frame_thread_encoder.c
@@ -23,18 +23,11 @@
 #include "libavutil/fifo.h"
 #include "libavutil/avassert.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/thread.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "thread.h"
 
-#if HAVE_PTHREADS
-#include <pthread.h>
-#elif HAVE_W32THREADS
-#include "compat/w32pthreads.h"
-#elif HAVE_OS2THREADS
-#include "compat/os2threads.h"
-#endif
-
 #define MAX_THREADS 64
 #define BUFFER_SIZE (2*MAX_THREADS)
 
@@ -122,12 +115,12 @@ int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options){
 
 
     if(   !(avctx->thread_type & FF_THREAD_FRAME)
-       || !(avctx->codec->capabilities & CODEC_CAP_INTRA_ONLY))
+       || !(avctx->codec->capabilities & AV_CODEC_CAP_INTRA_ONLY))
         return 0;
 
     if(   !avctx->thread_count
        && avctx->codec_id == AV_CODEC_ID_MJPEG
-       && !(avctx->flags & CODEC_FLAG_QSCALE)) {
+       && !(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
         av_log(avctx, AV_LOG_DEBUG,
                "Forcing thread count to 1 for MJPEG encoding, use -thread_type slice "
                "or a constant quantizer if you want to use multiple cpu cores\n");
@@ -135,7 +128,7 @@ int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options){
     }
     if(   avctx->thread_count > 1
        && avctx->codec_id == AV_CODEC_ID_MJPEG
-       && !(avctx->flags & CODEC_FLAG_QSCALE))
+       && !(avctx->flags & AV_CODEC_FLAG_QSCALE))
         av_log(avctx, AV_LOG_WARNING,
                "MJPEG CBR encoding works badly with frame multi-threading, consider "
                "using -threads 1, -thread_type slice or a constant quantizer.\n");
@@ -143,9 +136,15 @@ int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options){
     if (avctx->codec_id == AV_CODEC_ID_HUFFYUV ||
         avctx->codec_id == AV_CODEC_ID_FFVHUFF) {
         int warn = 0;
-        if (avctx->flags & CODEC_FLAG_PASS1)
+        int context_model = 0;
+        AVDictionaryEntry *con = av_dict_get(options, "context", NULL, AV_DICT_MATCH_CASE);
+
+        if (con && con->value)
+            context_model = atoi(con->value);
+
+        if (avctx->flags & AV_CODEC_FLAG_PASS1)
             warn = 1;
-        else if(avctx->context_model > 0) {
+        else if(context_model > 0) {
             AVDictionaryEntry *t = av_dict_get(options, "non_deterministic",
                                                NULL, AV_DICT_MATCH_CASE);
             warn = !t || !t->value || !atoi(t->value) ? 1 : 0;
diff --git a/libavcodec/frame_thread_encoder.h b/libavcodec/frame_thread_encoder.h
index 1da0ce18..1f79553f 100644
--- a/libavcodec/frame_thread_encoder.h
+++ b/libavcodec/frame_thread_encoder.h
@@ -18,9 +18,13 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVCODEC_FRAME_THREAD_ENCODER_H
+#define AVCODEC_FRAME_THREAD_ENCODER_H
+
 #include "avcodec.h"
 
 int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options);
 void ff_frame_thread_encoder_free(AVCodecContext *avctx);
 int ff_thread_video_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet_ptr);
 
+#endif /* AVCODEC_FRAME_THREAD_ENCODER_H */
diff --git a/libavcodec/fraps.c b/libavcodec/fraps.c
index c49866e7..57e13f26 100644
--- a/libavcodec/fraps.c
+++ b/libavcodec/fraps.c
@@ -105,7 +105,9 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t *dst, int stride, int w,
     s->bdsp.bswap_buf((uint32_t *) s->tmpbuf,
                       (const uint32_t *) src, size >> 2);
 
-    init_get_bits(&gb, s->tmpbuf, size * 8);
+    if ((ret = init_get_bits8(&gb, s->tmpbuf, size)) < 0)
+        return ret;
+
     for (j = 0; j < h; j++) {
         for (i = 0; i < w*step; i += step) {
             dst[i] = get_vlc2(&gb, vlc.table, VLC_BITS, 3);
@@ -186,13 +188,13 @@ static int decode_frame(AVCodecContext *avctx,
             return buf_size;
         }
         if (AV_RL32(buf) != FPS_TAG || buf_size < planes*1024 + 24) {
-            av_log(avctx, AV_LOG_ERROR, "Fraps: error in data stream\n");
+            av_log(avctx, AV_LOG_ERROR, "error in data stream\n");
             return AVERROR_INVALIDDATA;
         }
         for (i = 0; i < planes; i++) {
             offs[i] = AV_RL32(buf + 4 + i * 4);
             if (offs[i] >= buf_size - header_size || (i && offs[i] <= offs[i - 1] + 1024)) {
-                av_log(avctx, AV_LOG_ERROR, "Fraps: plane %i offset is out of bounds\n", i);
+                av_log(avctx, AV_LOG_ERROR, "plane %i offset is out of bounds\n", i);
                 return AVERROR_INVALIDDATA;
             }
         }
@@ -322,5 +324,5 @@ AVCodec ff_fraps_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/frwu.c b/libavcodec/frwu.c
index c778dbde..e68fda96 100644
--- a/libavcodec/frwu.c
+++ b/libavcodec/frwu.c
@@ -103,7 +103,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 }
 
 static const AVOption frwu_options[] = {
-    {"change_field_order", "Change field order", offsetof(FRWUContext, change_field_order), FF_OPT_TYPE_INT,
+    {"change_field_order", "Change field order", offsetof(FRWUContext, change_field_order), AV_OPT_TYPE_BOOL,
      {.i64 = 0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM},
     {NULL}
 };
@@ -123,6 +123,6 @@ AVCodec ff_frwu_decoder = {
     .priv_data_size = sizeof(FRWUContext),
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .priv_class     = &frwu_class,
 };
diff --git a/libavcodec/g2meet.c b/libavcodec/g2meet.c
index 64824834..51e8bbcd 100644
--- a/libavcodec/g2meet.c
+++ b/libavcodec/g2meet.c
@@ -1,6 +1,7 @@
 /*
- * Go2Webinar decoder
+ * Go2Webinar / Go2Meeting decoder
  * Copyright (c) 2012 Konstantin Shishkov
+ * Copyright (c) 2013 Maxim Poliakovski
  *
  * This file is part of FFmpeg.
  *
@@ -21,7 +22,7 @@
 
 /**
  * @file
- * Go2Webinar decoder
+ * Go2Webinar / Go2Meeting decoder
  */
 
 #include <inttypes.h>
@@ -32,12 +33,16 @@
 #include "avcodec.h"
 #include "blockdsp.h"
 #include "bytestream.h"
+#include "elsdec.h"
 #include "get_bits.h"
 #include "idctdsp.h"
 #include "internal.h"
 #include "jpegtables.h"
 #include "mjpeg.h"
 
+#define EPIC_PIX_STACK_SIZE 1024
+#define EPIC_PIX_STACK_MAX  (EPIC_PIX_STACK_SIZE - 1)
+
 enum ChunkType {
     DISPLAY_INFO = 0xC8,
     TILE_DATA,
@@ -74,6 +79,42 @@ static const uint8_t chroma_quant[64] = {
     50, 50, 50, 50, 50, 50, 50, 50,
 };
 
+typedef struct ePICPixListElem {
+    struct ePICPixListElem *next;
+    uint32_t               pixel;
+    uint8_t                rung;
+} ePICPixListElem;
+
+typedef struct ePICPixHashElem {
+    uint32_t                pix_id;
+    struct ePICPixListElem  *list;
+} ePICPixHashElem;
+
+#define EPIC_HASH_SIZE 256
+typedef struct ePICPixHash {
+    ePICPixHashElem *bucket[EPIC_HASH_SIZE];
+    int              bucket_size[EPIC_HASH_SIZE];
+    int              bucket_fill[EPIC_HASH_SIZE];
+} ePICPixHash;
+
+typedef struct ePICContext {
+    ElsDecCtx        els_ctx;
+    int              next_run_pos;
+    ElsUnsignedRung  unsigned_rung;
+    uint8_t          W_flag_rung;
+    uint8_t          N_flag_rung;
+    uint8_t          W_ctx_rung[256];
+    uint8_t          N_ctx_rung[512];
+    uint8_t          nw_pred_rung[256];
+    uint8_t          ne_pred_rung[256];
+    uint8_t          prev_row_rung[14];
+    uint8_t          runlen_zeroes[14];
+    uint8_t          runlen_one;
+    int              stack_pos;
+    uint32_t         stack[EPIC_PIX_STACK_SIZE];
+    ePICPixHash      hash;
+} ePICContext;
+
 typedef struct JPGContext {
     BlockDSPContext bdsp;
     IDCTDSPContext idsp;
@@ -87,7 +128,9 @@ typedef struct JPGContext {
 } JPGContext;
 
 typedef struct G2MContext {
+    ePICContext ec;
     JPGContext jc;
+
     int        version;
 
     int        compression;
@@ -101,8 +144,9 @@ typedef struct G2MContext {
     uint8_t    *framebuf;
     int        framebuf_stride, old_width, old_height;
 
-    uint8_t    *synth_tile, *jpeg_tile;
-    int        tile_stride, old_tile_w, old_tile_h;
+    uint8_t    *synth_tile, *jpeg_tile, *epic_buf, *epic_buf_base;
+    int        tile_stride, epic_buf_stride, old_tile_w, old_tile_h;
+    int        swapuv;
 
     uint8_t    *kempf_buf, *kempf_flags;
 
@@ -229,11 +273,11 @@ static int jpg_decode_block(JPGContext *c, GetBitContext *gb,
     return 0;
 }
 
-static inline void yuv2rgb(uint8_t *out, int Y, int U, int V)
+static inline void yuv2rgb(uint8_t *out, int ridx, int Y, int U, int V)
 {
-    out[0] = av_clip_uint8(Y + (             91881 * V + 32768 >> 16));
-    out[1] = av_clip_uint8(Y + (-22554 * U - 46802 * V + 32768 >> 16));
-    out[2] = av_clip_uint8(Y + (116130 * U             + 32768 >> 16));
+    out[ridx]     = av_clip_uint8(Y +              (91881 * V + 32768 >> 16));
+    out[1]        = av_clip_uint8(Y + (-22554 * U - 46802 * V + 32768 >> 16));
+    out[2 - ridx] = av_clip_uint8(Y + (116130 * U             + 32768 >> 16));
 }
 
 static int jpg_decode_data(JPGContext *c, int width, int height,
@@ -247,12 +291,13 @@ static int jpg_decode_data(JPGContext *c, int width, int height,
     int bx, by;
     int unesc_size;
     int ret;
+    const int ridx = swapuv ? 2 : 0;
 
     if ((ret = av_reallocp(&c->buf,
-                           src_size + FF_INPUT_BUFFER_PADDING_SIZE)) < 0)
+                           src_size + AV_INPUT_BUFFER_PADDING_SIZE)) < 0)
         return ret;
     jpg_unescape(src, src_size, c->buf, &unesc_size);
-    memset(c->buf + unesc_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(c->buf + unesc_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
     if((ret = init_get_bits8(&gb, c->buf, unesc_size)) < 0)
         return ret;
 
@@ -299,9 +344,9 @@ static int jpg_decode_data(JPGContext *c, int width, int height,
                     int Y, U, V;
 
                     Y = c->block[(j >> 3) * 2 + (i >> 3)][(i & 7) + (j & 7) * 8];
-                    U = c->block[4 ^ swapuv][(i >> 1) + (j >> 1) * 8] - 128;
-                    V = c->block[5 ^ swapuv][(i >> 1) + (j >> 1) * 8] - 128;
-                    yuv2rgb(out + i * 3, Y, U, V);
+                    U = c->block[4][(i >> 1) + (j >> 1) * 8] - 128;
+                    V = c->block[5][(i >> 1) + (j >> 1) * 8] - 128;
+                    yuv2rgb(out + i * 3, ridx, Y, U, V);
                 }
             }
 
@@ -318,6 +363,659 @@ static int jpg_decode_data(JPGContext *c, int width, int height,
     return 0;
 }
 
+#define LOAD_NEIGHBOURS(x)      \
+    W   = curr_row[(x)   - 1];  \
+    N   = above_row[(x)];       \
+    WW  = curr_row[(x)   - 2];  \
+    NW  = above_row[(x)  - 1];  \
+    NE  = above_row[(x)  + 1];  \
+    NN  = above2_row[(x)];      \
+    NNW = above2_row[(x) - 1];  \
+    NWW = above_row[(x)  - 2];  \
+    NNE = above2_row[(x) + 1]
+
+#define UPDATE_NEIGHBOURS(x)    \
+    NNW = NN;                   \
+    NN  = NNE;                  \
+    NWW = NW;                   \
+    NW  = N;                    \
+    N   = NE;                   \
+    NE  = above_row[(x)  + 1];  \
+    NNE = above2_row[(x) + 1]
+
+#define R_shift 16
+#define G_shift  8
+#define B_shift  0
+
+/* improved djb2 hash from http://www.cse.yorku.ca/~oz/hash.html */
+static int djb2_hash(uint32_t key)
+{
+    uint32_t h = 5381;
+
+    h = (h * 33) ^ ((key >> 24) & 0xFF); // xxx: probably not needed at all
+    h = (h * 33) ^ ((key >> 16) & 0xFF);
+    h = (h * 33) ^ ((key >>  8) & 0xFF);
+    h = (h * 33) ^  (key        & 0xFF);
+
+    return h & (EPIC_HASH_SIZE - 1);
+}
+
+static void epic_hash_init(ePICPixHash *hash)
+{
+    memset(hash, 0, sizeof(*hash));
+}
+
+static ePICPixHashElem *epic_hash_find(const ePICPixHash *hash, uint32_t key)
+{
+    int i, idx = djb2_hash(key);
+    ePICPixHashElem *bucket = hash->bucket[idx];
+
+    for (i = 0; i < hash->bucket_fill[idx]; i++)
+        if (bucket[i].pix_id == key)
+            return &bucket[i];
+
+    return NULL;
+}
+
+static ePICPixHashElem *epic_hash_add(ePICPixHash *hash, uint32_t key)
+{
+    ePICPixHashElem *bucket, *ret;
+    int idx = djb2_hash(key);
+
+    if (hash->bucket_size[idx] > INT_MAX / sizeof(**hash->bucket))
+        return NULL;
+
+    if (!(hash->bucket_fill[idx] < hash->bucket_size[idx])) {
+        int new_size = hash->bucket_size[idx] + 16;
+        bucket = av_realloc(hash->bucket[idx], new_size * sizeof(*bucket));
+        if (!bucket)
+            return NULL;
+        hash->bucket[idx]      = bucket;
+        hash->bucket_size[idx] = new_size;
+    }
+
+    ret = &hash->bucket[idx][hash->bucket_fill[idx]++];
+    memset(ret, 0, sizeof(*ret));
+    ret->pix_id = key;
+    return ret;
+}
+
+static int epic_add_pixel_to_cache(ePICPixHash *hash, uint32_t key, uint32_t pix)
+{
+    ePICPixListElem *new_elem;
+    ePICPixHashElem *hash_elem = epic_hash_find(hash, key);
+
+    if (!hash_elem) {
+        if (!(hash_elem = epic_hash_add(hash, key)))
+            return AVERROR(ENOMEM);
+    }
+
+    new_elem = av_mallocz(sizeof(*new_elem));
+    if (!new_elem)
+        return AVERROR(ENOMEM);
+
+    new_elem->pixel = pix;
+    new_elem->next  = hash_elem->list;
+    hash_elem->list = new_elem;
+
+    return 0;
+}
+
+static inline int epic_cache_entries_for_pixel(const ePICPixHash *hash,
+                                               uint32_t pix)
+{
+    ePICPixHashElem *hash_elem = epic_hash_find(hash, pix);
+
+    if (hash_elem != NULL && hash_elem->list != NULL)
+        return 1;
+
+    return 0;
+}
+
+static void epic_free_pixel_cache(ePICPixHash *hash)
+{
+    int i, j;
+
+    for (i = 0; i < EPIC_HASH_SIZE; i++) {
+        for (j = 0; j < hash->bucket_fill[i]; j++) {
+            ePICPixListElem *list_elem = hash->bucket[i][j].list;
+            while (list_elem) {
+                ePICPixListElem *tmp = list_elem->next;
+                av_free(list_elem);
+                list_elem = tmp;
+            }
+        }
+        av_freep(&hash->bucket[i]);
+        hash->bucket_size[i] =
+        hash->bucket_fill[i] = 0;
+    }
+}
+
+static inline int is_pixel_on_stack(const ePICContext *dc, uint32_t pix)
+{
+    int i;
+
+    for (i = 0; i < dc->stack_pos; i++)
+        if (dc->stack[i] == pix)
+            break;
+
+    return i != dc->stack_pos;
+}
+
+#define TOSIGNED(val) (((val) >> 1) ^ -((val) & 1))
+
+static inline int epic_decode_component_pred(ePICContext *dc,
+                                             int N, int W, int NW)
+{
+    unsigned delta = ff_els_decode_unsigned(&dc->els_ctx, &dc->unsigned_rung);
+    return mid_pred(N, N + W - NW, W) - TOSIGNED(delta);
+}
+
+static uint32_t epic_decode_pixel_pred(ePICContext *dc, int x, int y,
+                                       const uint32_t *curr_row,
+                                       const uint32_t *above_row)
+{
+    uint32_t N, W, NW, pred;
+    unsigned delta;
+    int GN, GW, GNW, R, G, B;
+
+    if (x && y) {
+        W  = curr_row[x  - 1];
+        N  = above_row[x];
+        NW = above_row[x - 1];
+
+        GN  = (N  >> G_shift) & 0xFF;
+        GW  = (W  >> G_shift) & 0xFF;
+        GNW = (NW >> G_shift) & 0xFF;
+
+        G = epic_decode_component_pred(dc, GN, GW, GNW);
+
+        R = G + epic_decode_component_pred(dc,
+                                           ((N  >> R_shift) & 0xFF) - GN,
+                                           ((W  >> R_shift) & 0xFF) - GW,
+                                           ((NW >> R_shift) & 0xFF) - GNW);
+
+        B = G + epic_decode_component_pred(dc,
+                                           ((N  >> B_shift) & 0xFF) - GN,
+                                           ((W  >> B_shift) & 0xFF) - GW,
+                                           ((NW >> B_shift) & 0xFF) - GNW);
+    } else {
+        if (x)
+            pred = curr_row[x - 1];
+        else
+            pred = above_row[x];
+
+        delta = ff_els_decode_unsigned(&dc->els_ctx, &dc->unsigned_rung);
+        R     = ((pred >> R_shift) & 0xFF) - TOSIGNED(delta);
+
+        delta = ff_els_decode_unsigned(&dc->els_ctx, &dc->unsigned_rung);
+        G     = ((pred >> G_shift) & 0xFF) - TOSIGNED(delta);
+
+        delta = ff_els_decode_unsigned(&dc->els_ctx, &dc->unsigned_rung);
+        B     = ((pred >> B_shift) & 0xFF) - TOSIGNED(delta);
+    }
+
+    if (R<0 || G<0 || B<0) {
+        av_log(NULL, AV_LOG_ERROR, "RGB %d %d %d is out of range\n", R, G, B);
+        return 0;
+    }
+
+    return (R << R_shift) | (G << G_shift) | (B << B_shift);
+}
+
+static int epic_predict_pixel(ePICContext *dc, uint8_t *rung,
+                              uint32_t *pPix, uint32_t pix)
+{
+    if (!ff_els_decode_bit(&dc->els_ctx, rung)) {
+        *pPix = pix;
+        return 1;
+    }
+    dc->stack[dc->stack_pos++ & EPIC_PIX_STACK_MAX] = pix;
+    return 0;
+}
+
+static int epic_handle_edges(ePICContext *dc, int x, int y,
+                             const uint32_t *curr_row,
+                             const uint32_t *above_row, uint32_t *pPix)
+{
+    uint32_t pix;
+
+    if (!x && !y) { /* special case: top-left pixel */
+        /* the top-left pixel is coded independently with 3 unsigned numbers */
+        *pPix = (ff_els_decode_unsigned(&dc->els_ctx, &dc->unsigned_rung) << R_shift) |
+                (ff_els_decode_unsigned(&dc->els_ctx, &dc->unsigned_rung) << G_shift) |
+                (ff_els_decode_unsigned(&dc->els_ctx, &dc->unsigned_rung) << B_shift);
+        return 1;
+    }
+
+    if (x) { /* predict from W first */
+        pix = curr_row[x - 1];
+        if (epic_predict_pixel(dc, &dc->W_flag_rung, pPix, pix))
+            return 1;
+    }
+
+    if (y) { /* then try to predict from N */
+        pix = above_row[x];
+        if (!dc->stack_pos || dc->stack[0] != pix) {
+            if (epic_predict_pixel(dc, &dc->N_flag_rung, pPix, pix))
+                return 1;
+        }
+    }
+
+    return 0;
+}
+
+static int epic_decode_run_length(ePICContext *dc, int x, int y, int tile_width,
+                                  const uint32_t *curr_row,
+                                  const uint32_t *above_row,
+                                  const uint32_t *above2_row,
+                                  uint32_t *pPix, int *pRun)
+{
+    int idx, got_pixel = 0, WWneW, old_WWneW = 0;
+    uint32_t W, WW, N, NN, NW, NE, NWW, NNW, NNE;
+
+    *pRun = 0;
+
+    LOAD_NEIGHBOURS(x);
+
+    if (dc->next_run_pos == x) {
+        /* can't reuse W for the new pixel in this case */
+        WWneW = 1;
+    } else {
+        idx = (WW  != W)  << 7 |
+              (NW  != W)  << 6 |
+              (N   != NE) << 5 |
+              (NW  != N)  << 4 |
+              (NWW != NW) << 3 |
+              (NNE != NE) << 2 |
+              (NN  != N)  << 1 |
+              (NNW != NW);
+        WWneW = ff_els_decode_bit(&dc->els_ctx, &dc->W_ctx_rung[idx]);
+        if (WWneW < 0)
+            return WWneW;
+    }
+
+    if (WWneW)
+        dc->stack[dc->stack_pos++ & EPIC_PIX_STACK_MAX] = W;
+    else {
+        *pPix     = W;
+        got_pixel = 1;
+    }
+
+    do {
+        int NWneW = 1;
+        if (got_pixel) // pixel value already known (derived from either W or N)
+            NWneW = *pPix != N;
+        else { // pixel value is unknown and will be decoded later
+            NWneW = *pRun ? NWneW : NW != W;
+
+            /* TODO: RFC this mess! */
+            switch (((NW != N) << 2) | (NWneW << 1) | WWneW) {
+            case 0:
+                break; // do nothing here
+            case 3:
+            case 5:
+            case 6:
+            case 7:
+                if (!is_pixel_on_stack(dc, N)) {
+                    idx = WWneW       << 8 |
+                          (*pRun ? old_WWneW : WW != W) << 7 |
+                          NWneW       << 6 |
+                          (N   != NE) << 5 |
+                          (NW  != N)  << 4 |
+                          (NWW != NW) << 3 |
+                          (NNE != NE) << 2 |
+                          (NN  != N)  << 1 |
+                          (NNW != NW);
+                    if (!ff_els_decode_bit(&dc->els_ctx, &dc->N_ctx_rung[idx])) {
+                        NWneW = 0;
+                        *pPix = N;
+                        got_pixel = 1;
+                        break;
+                    }
+                }
+                /* fall through */
+            default:
+                NWneW = 1;
+                old_WWneW = WWneW;
+                if (!is_pixel_on_stack(dc, N))
+                    dc->stack[dc->stack_pos++ & EPIC_PIX_STACK_MAX] = N;
+            }
+        }
+
+        (*pRun)++;
+        if (x + *pRun >= tile_width - 1)
+            break;
+
+        UPDATE_NEIGHBOURS(x + *pRun);
+
+        if (!NWneW && NW == N && N == NE) {
+            int pos, run, rle;
+            int start_pos = x + *pRun;
+
+            /* scan for a run of pix in the line above */
+            uint32_t pix = above_row[start_pos + 1];
+            for (pos = start_pos + 2; pos < tile_width; pos++)
+                if (!(above_row[pos] == pix))
+                    break;
+            run = pos - start_pos - 1;
+            idx = av_ceil_log2(run);
+            if (ff_els_decode_bit(&dc->els_ctx, &dc->prev_row_rung[idx]))
+                *pRun += run;
+            else {
+                int flag;
+                /* run-length is coded as plain binary number of idx - 1 bits */
+                for (pos = idx - 1, rle = 0, flag = 0; pos >= 0; pos--) {
+                    if ((1 << pos) + rle < run &&
+                        ff_els_decode_bit(&dc->els_ctx,
+                                          flag ? &dc->runlen_one
+                                               : &dc->runlen_zeroes[pos])) {
+                        flag = 1;
+                        rle |= 1 << pos;
+                    }
+                }
+                *pRun += rle;
+                break; // return immediately
+            }
+            if (x + *pRun >= tile_width - 1)
+                break;
+
+            LOAD_NEIGHBOURS(x + *pRun);
+            WWneW = 0;
+            NWneW = 0;
+        }
+
+        idx = WWneW       << 7 |
+              NWneW       << 6 |
+              (N   != NE) << 5 |
+              (NW  != N)  << 4 |
+              (NWW != NW) << 3 |
+              (NNE != NE) << 2 |
+              (NN  != N)  << 1 |
+              (NNW != NW);
+        WWneW = ff_els_decode_bit(&dc->els_ctx, &dc->W_ctx_rung[idx]);
+    } while (!WWneW);
+
+    dc->next_run_pos = x + *pRun;
+    return got_pixel;
+}
+
+static int epic_predict_pixel2(ePICContext *dc, uint8_t *rung,
+                               uint32_t *pPix, uint32_t pix)
+{
+    if (ff_els_decode_bit(&dc->els_ctx, rung)) {
+        *pPix = pix;
+        return 1;
+    }
+    dc->stack[dc->stack_pos++ & EPIC_PIX_STACK_MAX] = pix;
+    return 0;
+}
+
+static int epic_predict_from_NW_NE(ePICContext *dc, int x, int y, int run,
+                                   int tile_width, const uint32_t *curr_row,
+                                   const uint32_t *above_row, uint32_t *pPix)
+{
+    int pos;
+
+    /* try to reuse the NW pixel first */
+    if (x && y) {
+        uint32_t NW = above_row[x - 1];
+        if (NW != curr_row[x - 1] && NW != above_row[x] && !is_pixel_on_stack(dc, NW)) {
+            if (epic_predict_pixel2(dc, &dc->nw_pred_rung[NW & 0xFF], pPix, NW))
+                return 1;
+        }
+    }
+
+    /* try to reuse the NE[x + run, y] pixel */
+    pos = x + run - 1;
+    if (pos < tile_width - 1 && y) {
+        uint32_t NE = above_row[pos + 1];
+        if (NE != above_row[pos] && !is_pixel_on_stack(dc, NE)) {
+            if (epic_predict_pixel2(dc, &dc->ne_pred_rung[NE & 0xFF], pPix, NE))
+                return 1;
+        }
+    }
+
+    return 0;
+}
+
+static int epic_decode_from_cache(ePICContext *dc, uint32_t W, uint32_t *pPix)
+{
+    ePICPixListElem *list, *prev = NULL;
+    ePICPixHashElem *hash_elem = epic_hash_find(&dc->hash, W);
+
+    if (!hash_elem || !hash_elem->list)
+        return 0;
+
+    list = hash_elem->list;
+    while (list) {
+        if (!is_pixel_on_stack(dc, list->pixel)) {
+            if (ff_els_decode_bit(&dc->els_ctx, &list->rung)) {
+                *pPix = list->pixel;
+                if (list != hash_elem->list) {
+                    prev->next      = list->next;
+                    list->next      = hash_elem->list;
+                    hash_elem->list = list;
+                }
+                return 1;
+            }
+            dc->stack[dc->stack_pos++ & EPIC_PIX_STACK_MAX] = list->pixel;
+        }
+        prev = list;
+        list = list->next;
+    }
+
+    return 0;
+}
+
+static int epic_decode_tile(ePICContext *dc, uint8_t *out, int tile_height,
+                            int tile_width, int stride)
+{
+    int x, y;
+    uint32_t pix;
+    uint32_t *curr_row = NULL, *above_row = NULL, *above2_row;
+
+    for (y = 0; y < tile_height; y++, out += stride) {
+        above2_row = above_row;
+        above_row  = curr_row;
+        curr_row   = (uint32_t *) out;
+
+        for (x = 0, dc->next_run_pos = 0; x < tile_width;) {
+            if (dc->els_ctx.err)
+                return AVERROR_INVALIDDATA; // bail out in the case of ELS overflow
+
+            pix = curr_row[x - 1]; // get W pixel
+
+            if (y >= 1 && x >= 2 &&
+                pix != curr_row[x - 2]  && pix != above_row[x - 1] &&
+                pix != above_row[x - 2] && pix != above_row[x] &&
+                !epic_cache_entries_for_pixel(&dc->hash, pix)) {
+                curr_row[x] = epic_decode_pixel_pred(dc, x, y, curr_row, above_row);
+                x++;
+            } else {
+                int got_pixel, run;
+                dc->stack_pos = 0; // empty stack
+
+                if (y < 2 || x < 2 || x == tile_width - 1) {
+                    run       = 1;
+                    got_pixel = epic_handle_edges(dc, x, y, curr_row, above_row, &pix);
+                } else {
+                    got_pixel = epic_decode_run_length(dc, x, y, tile_width,
+                                                       curr_row, above_row,
+                                                       above2_row, &pix, &run);
+                    if (got_pixel < 0)
+                        return got_pixel;
+                }
+
+                if (!got_pixel && !epic_predict_from_NW_NE(dc, x, y, run,
+                                                           tile_width, curr_row,
+                                                           above_row, &pix)) {
+                    uint32_t ref_pix = curr_row[x - 1];
+                    if (!x || !epic_decode_from_cache(dc, ref_pix, &pix)) {
+                        pix = epic_decode_pixel_pred(dc, x, y, curr_row, above_row);
+                        if (x) {
+                            int ret = epic_add_pixel_to_cache(&dc->hash,
+                                                              ref_pix,
+                                                              pix);
+                            if (ret)
+                                return ret;
+                        }
+                    }
+                }
+                for (; run > 0; x++, run--)
+                    curr_row[x] = pix;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int epic_jb_decode_tile(G2MContext *c, int tile_x, int tile_y,
+                               const uint8_t *src, size_t src_size,
+                               AVCodecContext *avctx)
+{
+    uint8_t prefix, mask = 0x80;
+    int extrabytes, tile_width, tile_height, awidth, aheight;
+    size_t els_dsize;
+    uint8_t *dst;
+
+    if (!src_size)
+        return 0;
+
+    /* get data size of the ELS partition as unsigned variable-length integer */
+    prefix = *src++;
+    src_size--;
+    for (extrabytes = 0; (prefix & mask) && (extrabytes < 7); extrabytes++)
+        mask >>= 1;
+    if (extrabytes > 3 || src_size < extrabytes) {
+        av_log(avctx, AV_LOG_ERROR, "ePIC: invalid data size VLI\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    els_dsize = prefix & ((0x80 >> extrabytes) - 1); // mask out the length prefix
+    while (extrabytes-- > 0) {
+        els_dsize = (els_dsize << 8) | *src++;
+        src_size--;
+    }
+
+    if (src_size < els_dsize) {
+        av_log(avctx, AV_LOG_ERROR, "ePIC: data too short, needed %zu, got %zu\n",
+               els_dsize, src_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    tile_width  = FFMIN(c->width  - tile_x * c->tile_width,  c->tile_width);
+    tile_height = FFMIN(c->height - tile_y * c->tile_height, c->tile_height);
+    awidth      = FFALIGN(tile_width,  16);
+    aheight     = FFALIGN(tile_height, 16);
+
+    if (els_dsize) {
+        int ret, i, j, k;
+        uint8_t tr_r, tr_g, tr_b, *buf;
+        uint32_t *in;
+        /* ELS decoder initializations */
+        memset(&c->ec, 0, sizeof(c->ec));
+        ff_els_decoder_init(&c->ec.els_ctx, src, els_dsize);
+        epic_hash_init(&c->ec.hash);
+
+        /* decode transparent pixel value */
+        tr_r = ff_els_decode_unsigned(&c->ec.els_ctx, &c->ec.unsigned_rung);
+        tr_g = ff_els_decode_unsigned(&c->ec.els_ctx, &c->ec.unsigned_rung);
+        tr_b = ff_els_decode_unsigned(&c->ec.els_ctx, &c->ec.unsigned_rung);
+        if (c->ec.els_ctx.err != 0) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "ePIC: couldn't decode transparency pixel!\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        ret = epic_decode_tile(&c->ec, c->epic_buf, tile_height, tile_width,
+                               c->epic_buf_stride);
+
+        epic_free_pixel_cache(&c->ec.hash);
+        ff_els_decoder_uninit(&c->ec.unsigned_rung);
+
+        if (ret) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "ePIC: tile decoding failed, frame=%d, tile_x=%d, tile_y=%d\n",
+                   avctx->frame_number, tile_x, tile_y);
+            return AVERROR_INVALIDDATA;
+        }
+
+        buf = c->epic_buf;
+        dst = c->framebuf + tile_x * c->tile_width * 3 +
+              tile_y * c->tile_height * c->framebuf_stride;
+
+        for (j = 0; j < tile_height; j++) {
+            uint8_t *out = dst;
+            in  = (uint32_t *) buf;
+            for (i = 0; i < tile_width; i++) {
+                out[0] = (in[i] >> R_shift) & 0xFF;
+                out[1] = (in[i] >> G_shift) & 0xFF;
+                out[2] = (in[i] >> B_shift) & 0xFF;
+                out   += 3;
+            }
+            buf += c->epic_buf_stride;
+            dst += c->framebuf_stride;
+        }
+
+        if (src_size > els_dsize) {
+            uint8_t *jpg;
+            uint32_t tr;
+            int bstride = FFALIGN(tile_width, 16) >> 3;
+            int nblocks = 0;
+            int estride = c->epic_buf_stride >> 2;
+
+            src      += els_dsize;
+            src_size -= els_dsize;
+
+            in = (uint32_t *) c->epic_buf;
+            tr = (tr_r << R_shift) | (tr_g << G_shift) | (tr_b << B_shift);
+
+            memset(c->kempf_flags, 0,
+                   (aheight >> 3) * bstride * sizeof(*c->kempf_flags));
+            for (j = 0; j < tile_height; j += 8) {
+                for (i = 0; i < tile_width; i += 8) {
+                    c->kempf_flags[(i >> 3) + (j >> 3) * bstride] = 0;
+                    for (k = 0; k < 8 * 8; k++) {
+                        if (in[i + (k & 7) + (k >> 3) * estride] == tr) {
+                            c->kempf_flags[(i >> 3) + (j >> 3) * bstride] = 1;
+                            nblocks++;
+                            break;
+                        }
+                    }
+                }
+                in += 8 * estride;
+            }
+
+            memset(c->jpeg_tile, 0, c->tile_stride * aheight);
+            jpg_decode_data(&c->jc, awidth, aheight, src, src_size,
+                            c->jpeg_tile, c->tile_stride,
+                            c->kempf_flags, bstride, nblocks, c->swapuv);
+
+            in  = (uint32_t *) c->epic_buf;
+            dst = c->framebuf + tile_x * c->tile_width * 3 +
+                  tile_y * c->tile_height * c->framebuf_stride;
+            jpg = c->jpeg_tile;
+            for (j = 0; j < tile_height; j++) {
+                for (i = 0; i < tile_width; i++)
+                    if (in[i] == tr)
+                        memcpy(dst + i * 3, jpg + i * 3, 3);
+                in  += c->epic_buf_stride >> 2;
+                dst += c->framebuf_stride;
+                jpg += c->tile_stride;
+            }
+        }
+    } else {
+        dst = c->framebuf + tile_x * c->tile_width * 3 +
+              tile_y * c->tile_height * c->framebuf_stride;
+        return jpg_decode_data(&c->jc, tile_width, tile_height, src, src_size,
+                               dst, c->framebuf_stride, NULL, 0, 0, c->swapuv);
+    }
+
+    return 0;
+}
+
 static int kempf_restore_buf(const uint8_t *src, int len,
                               uint8_t *dst, int stride,
                               const uint8_t *jpeg_tile, int tile_stride,
@@ -327,6 +1025,7 @@ static int kempf_restore_buf(const uint8_t *src, int len,
     GetBitContext gb;
     int i, j, nb, col;
     int ret;
+    int align_width = FFALIGN(width, 16);
 
     if ((ret = init_get_bits8(&gb, src, len)) < 0)
         return ret;
@@ -346,6 +1045,7 @@ static int kempf_restore_buf(const uint8_t *src, int len,
             else
                 memcpy(dst + i * 3, jpeg_tile + i * 3, 3);
         }
+        skip_bits_long(&gb, nb * (align_width - width));
     }
 
     return 0;
@@ -475,22 +1175,32 @@ static int g2m_init_buffers(G2MContext *c)
             return AVERROR(ENOMEM);
     }
     if (!c->synth_tile || !c->jpeg_tile ||
+        (c->compression == 2 && !c->epic_buf_base) ||
         c->old_tile_w < c->tile_width ||
         c->old_tile_h < c->tile_height) {
-        c->tile_stride = FFALIGN(c->tile_width, 16) * 3;
-        aligned_height = FFALIGN(c->tile_height,    16);
-        av_free(c->synth_tile);
-        av_free(c->jpeg_tile);
-        av_free(c->kempf_buf);
-        av_free(c->kempf_flags);
+        c->tile_stride     = FFALIGN(c->tile_width, 16) * 3;
+        c->epic_buf_stride = FFALIGN(c->tile_width * 4, 16);
+        aligned_height     = FFALIGN(c->tile_height,    16);
+        av_freep(&c->synth_tile);
+        av_freep(&c->jpeg_tile);
+        av_freep(&c->kempf_buf);
+        av_freep(&c->kempf_flags);
+        av_freep(&c->epic_buf_base);
+        c->epic_buf    = NULL;
         c->synth_tile  = av_mallocz(c->tile_stride      * aligned_height);
         c->jpeg_tile   = av_mallocz(c->tile_stride      * aligned_height);
-        c->kempf_buf   = av_mallocz((c->tile_width + 1) * aligned_height
-                                    + FF_INPUT_BUFFER_PADDING_SIZE);
-        c->kempf_flags = av_mallocz( c->tile_width      * aligned_height);
+        c->kempf_buf   = av_mallocz((c->tile_width + 1) * aligned_height +
+                                    AV_INPUT_BUFFER_PADDING_SIZE);
+        c->kempf_flags = av_mallocz(c->tile_width       * aligned_height);
         if (!c->synth_tile || !c->jpeg_tile ||
             !c->kempf_buf || !c->kempf_flags)
             return AVERROR(ENOMEM);
+        if (c->compression == 2) {
+            c->epic_buf_base = av_mallocz(c->epic_buf_stride * aligned_height + 4);
+            if (!c->epic_buf_base)
+                return AVERROR(ENOMEM);
+            c->epic_buf = c->epic_buf_base + 4;
+        }
     }
 
     return 0;
@@ -696,10 +1406,7 @@ static int g2m_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_INVALIDDATA;
     }
 
-    if ((magic & 0xF) < 4) {
-        av_log(avctx, AV_LOG_ERROR, "G2M2 and G2M3 are not yet supported\n");
-        return AVERROR(ENOSYS);
-    }
+    c->swapuv = magic == MKBETAG('G', '2', 'M', '2');
 
     while (bytestream2_get_bytes_left(&bc) > 5) {
         chunk_size  = bytestream2_get_le32(&bc) - 1;
@@ -721,8 +1428,7 @@ static int g2m_decode_frame(AVCodecContext *avctx, void *data,
             }
             c->width  = bytestream2_get_be32(&bc);
             c->height = bytestream2_get_be32(&bc);
-            if (c->width  < 16 || c->width  > c->orig_width ||
-                c->height < 16 || c->height > c->orig_height) {
+            if (c->width < 16 || c->height < 16) {
                 av_log(avctx, AV_LOG_ERROR,
                        "Invalid frame dimensions %dx%d\n",
                        c->width, c->height);
@@ -746,7 +1452,7 @@ static int g2m_decode_frame(AVCodecContext *avctx, void *data,
             c->tile_height = bytestream2_get_be32(&bc);
             if (c->tile_width <= 0 || c->tile_height <= 0 ||
                 ((c->tile_width | c->tile_height) & 0xF) ||
-                c->tile_width * 4LL * c->tile_height >= INT_MAX
+                c->tile_width * (uint64_t)c->tile_height >= INT_MAX / 4
             ) {
                 av_log(avctx, AV_LOG_ERROR,
                        "Invalid tile dimensions %dx%d\n",
@@ -808,9 +1514,10 @@ static int g2m_decode_frame(AVCodecContext *avctx, void *data,
             ret = 0;
             switch (c->compression) {
             case COMPR_EPIC_J_B:
-                av_log(avctx, AV_LOG_ERROR,
-                       "ePIC j-b compression is not implemented yet\n");
-                return AVERROR(ENOSYS);
+                ret = epic_jb_decode_tile(c, c->tile_x, c->tile_y,
+                                          buf + bytestream2_tell(&bc),
+                                          chunk_size - 2, avctx);
+                break;
             case COMPR_KEMPF_J_B:
                 ret = kempf_decode_tile(c, c->tile_x, c->tile_y,
                                         buf + bytestream2_tell(&bc),
@@ -877,6 +1584,8 @@ static int g2m_decode_frame(AVCodecContext *avctx, void *data,
     c->height  = 0;
     c->tiles_x =
     c->tiles_y = 0;
+    c->tile_width =
+    c->tile_height = 0;
     return ret;
 }
 
@@ -906,6 +1615,8 @@ static av_cold int g2m_decode_end(AVCodecContext *avctx)
 
     jpg_free_context(&c->jc);
 
+    av_freep(&c->epic_buf_base);
+    c->epic_buf = NULL;
     av_freep(&c->kempf_buf);
     av_freep(&c->kempf_flags);
     av_freep(&c->synth_tile);
@@ -925,5 +1636,6 @@ AVCodec ff_g2m_decoder = {
     .init           = g2m_decode_init,
     .close          = g2m_decode_end,
     .decode         = g2m_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };
diff --git a/libavcodec/g722dec.c b/libavcodec/g722dec.c
index 22e90a30..0bfa82a3 100644
--- a/libavcodec/g722dec.c
+++ b/libavcodec/g722dec.c
@@ -145,6 +145,6 @@ AVCodec ff_adpcm_g722_decoder = {
     .priv_data_size = sizeof(G722Context),
     .init           = g722_decode_init,
     .decode         = g722_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .priv_class     = &g722_decoder_class,
 };
diff --git a/libavcodec/g722enc.c b/libavcodec/g722enc.c
index 38432f50..01a3db26 100644
--- a/libavcodec/g722enc.c
+++ b/libavcodec/g722enc.c
@@ -358,7 +358,7 @@ static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     int nb_samples, out_size, ret;
 
     out_size = (frame->nb_samples + 1) / 2;
-    if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
         return ret;
 
     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
@@ -389,7 +389,7 @@ AVCodec ff_adpcm_g722_encoder = {
     .init           = g722_encode_init,
     .close          = g722_encode_close,
     .encode2        = g722_encode_frame,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
+    .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/g723_1.c b/libavcodec/g723_1.c
index 66afd6af..a11fec8a 100644
--- a/libavcodec/g723_1.c
+++ b/libavcodec/g723_1.c
@@ -20,328 +20,96 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-/**
- * @file
- * G.723.1 compatible decoder
- */
+#include <stdint.h>
+
+#include "libavutil/common.h"
 
-#define BITSTREAM_READER_LE
-#include "libavutil/channel_layout.h"
-#include "libavutil/mem.h"
-#include "libavutil/opt.h"
-#include "avcodec.h"
-#include "get_bits.h"
 #include "acelp_vectors.h"
-#include "celp_filters.h"
+#include "avcodec.h"
 #include "celp_math.h"
-#include "g723_1_data.h"
-#include "internal.h"
-
-#define CNG_RANDOM_SEED 12345
-
-typedef struct g723_1_context {
-    AVClass *class;
-
-    G723_1_Subframe subframe[4];
-    enum FrameType cur_frame_type;
-    enum FrameType past_frame_type;
-    enum Rate cur_rate;
-    uint8_t lsp_index[LSP_BANDS];
-    int pitch_lag[2];
-    int erased_frames;
-
-    int16_t prev_lsp[LPC_ORDER];
-    int16_t sid_lsp[LPC_ORDER];
-    int16_t prev_excitation[PITCH_MAX];
-    int16_t excitation[PITCH_MAX + FRAME_LEN + 4];
-    int16_t synth_mem[LPC_ORDER];
-    int16_t fir_mem[LPC_ORDER];
-    int     iir_mem[LPC_ORDER];
-
-    int random_seed;
-    int cng_random_seed;
-    int interp_index;
-    int interp_gain;
-    int sid_gain;
-    int cur_gain;
-    int reflection_coef;
-    int pf_gain;                 ///< formant postfilter
-                                 ///< gain scaling unit memory
-    int postfilter;
-
-    int16_t audio[FRAME_LEN + LPC_ORDER + PITCH_MAX + 4];
-    int16_t prev_data[HALF_FRAME_LEN];
-    int16_t prev_weight_sig[PITCH_MAX];
-
-
-    int16_t hpf_fir_mem;                   ///< highpass filter fir
-    int     hpf_iir_mem;                   ///< and iir memories
-    int16_t perf_fir_mem[LPC_ORDER];       ///< perceptual filter fir
-    int16_t perf_iir_mem[LPC_ORDER];       ///< and iir memories
-
-    int16_t harmonic_mem[PITCH_MAX];
-} G723_1_Context;
-
-static av_cold int g723_1_decode_init(AVCodecContext *avctx)
-{
-    G723_1_Context *p = avctx->priv_data;
-
-    avctx->channel_layout = AV_CH_LAYOUT_MONO;
-    avctx->sample_fmt     = AV_SAMPLE_FMT_S16;
-    avctx->channels       = 1;
-    p->pf_gain            = 1 << 12;
-
-    memcpy(p->prev_lsp, dc_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
-    memcpy(p->sid_lsp,  dc_lsp, LPC_ORDER * sizeof(*p->sid_lsp));
-
-    p->cng_random_seed = CNG_RANDOM_SEED;
-    p->past_frame_type = SID_FRAME;
+#include "g723_1.h"
 
-    return 0;
-}
-
-/**
- * Unpack the frame into parameters.
- *
- * @param p           the context
- * @param buf         pointer to the input buffer
- * @param buf_size    size of the input buffer
- */
-static int unpack_bitstream(G723_1_Context *p, const uint8_t *buf,
-                            int buf_size)
+int ff_g723_1_scale_vector(int16_t *dst, const int16_t *vector, int length)
 {
-    GetBitContext gb;
-    int ad_cb_len;
-    int temp, info_bits, i;
-
-    init_get_bits(&gb, buf, buf_size * 8);
-
-    /* Extract frame type and rate info */
-    info_bits = get_bits(&gb, 2);
-
-    if (info_bits == 3) {
-        p->cur_frame_type = UNTRANSMITTED_FRAME;
-        return 0;
-    }
-
-    /* Extract 24 bit lsp indices, 8 bit for each band */
-    p->lsp_index[2] = get_bits(&gb, 8);
-    p->lsp_index[1] = get_bits(&gb, 8);
-    p->lsp_index[0] = get_bits(&gb, 8);
-
-    if (info_bits == 2) {
-        p->cur_frame_type = SID_FRAME;
-        p->subframe[0].amp_index = get_bits(&gb, 6);
-        return 0;
-    }
-
-    /* Extract the info common to both rates */
-    p->cur_rate       = info_bits ? RATE_5300 : RATE_6300;
-    p->cur_frame_type = ACTIVE_FRAME;
-
-    p->pitch_lag[0] = get_bits(&gb, 7);
-    if (p->pitch_lag[0] > 123)       /* test if forbidden code */
-        return -1;
-    p->pitch_lag[0] += PITCH_MIN;
-    p->subframe[1].ad_cb_lag = get_bits(&gb, 2);
-
-    p->pitch_lag[1] = get_bits(&gb, 7);
-    if (p->pitch_lag[1] > 123)
-        return -1;
-    p->pitch_lag[1] += PITCH_MIN;
-    p->subframe[3].ad_cb_lag = get_bits(&gb, 2);
-    p->subframe[0].ad_cb_lag = 1;
-    p->subframe[2].ad_cb_lag = 1;
-
-    for (i = 0; i < SUBFRAMES; i++) {
-        /* Extract combined gain */
-        temp = get_bits(&gb, 12);
-        ad_cb_len = 170;
-        p->subframe[i].dirac_train = 0;
-        if (p->cur_rate == RATE_6300 && p->pitch_lag[i >> 1] < SUBFRAME_LEN - 2) {
-            p->subframe[i].dirac_train = temp >> 11;
-            temp &= 0x7FF;
-            ad_cb_len = 85;
-        }
-        p->subframe[i].ad_cb_gain = FASTDIV(temp, GAIN_LEVELS);
-        if (p->subframe[i].ad_cb_gain < ad_cb_len) {
-            p->subframe[i].amp_index = temp - p->subframe[i].ad_cb_gain *
-                                       GAIN_LEVELS;
-        } else {
-            return -1;
-        }
-    }
-
-    p->subframe[0].grid_index = get_bits1(&gb);
-    p->subframe[1].grid_index = get_bits1(&gb);
-    p->subframe[2].grid_index = get_bits1(&gb);
-    p->subframe[3].grid_index = get_bits1(&gb);
-
-    if (p->cur_rate == RATE_6300) {
-        skip_bits1(&gb);  /* skip reserved bit */
-
-        /* Compute pulse_pos index using the 13-bit combined position index */
-        temp = get_bits(&gb, 13);
-        p->subframe[0].pulse_pos = temp / 810;
-
-        temp -= p->subframe[0].pulse_pos * 810;
-        p->subframe[1].pulse_pos = FASTDIV(temp, 90);
-
-        temp -= p->subframe[1].pulse_pos * 90;
-        p->subframe[2].pulse_pos = FASTDIV(temp, 9);
-        p->subframe[3].pulse_pos = temp - p->subframe[2].pulse_pos * 9;
+    int bits, max = 0;
+    int i;
 
-        p->subframe[0].pulse_pos = (p->subframe[0].pulse_pos << 16) +
-                                   get_bits(&gb, 16);
-        p->subframe[1].pulse_pos = (p->subframe[1].pulse_pos << 14) +
-                                   get_bits(&gb, 14);
-        p->subframe[2].pulse_pos = (p->subframe[2].pulse_pos << 16) +
-                                   get_bits(&gb, 16);
-        p->subframe[3].pulse_pos = (p->subframe[3].pulse_pos << 14) +
-                                   get_bits(&gb, 14);
+    for (i = 0; i < length; i++)
+        max |= FFABS(vector[i]);
 
-        p->subframe[0].pulse_sign = get_bits(&gb, 6);
-        p->subframe[1].pulse_sign = get_bits(&gb, 5);
-        p->subframe[2].pulse_sign = get_bits(&gb, 6);
-        p->subframe[3].pulse_sign = get_bits(&gb, 5);
-    } else { /* 5300 bps */
-        p->subframe[0].pulse_pos  = get_bits(&gb, 12);
-        p->subframe[1].pulse_pos  = get_bits(&gb, 12);
-        p->subframe[2].pulse_pos  = get_bits(&gb, 12);
-        p->subframe[3].pulse_pos  = get_bits(&gb, 12);
+    bits= 14 - av_log2_16bit(max);
+    bits= FFMAX(bits, 0);
 
-        p->subframe[0].pulse_sign = get_bits(&gb, 4);
-        p->subframe[1].pulse_sign = get_bits(&gb, 4);
-        p->subframe[2].pulse_sign = get_bits(&gb, 4);
-        p->subframe[3].pulse_sign = get_bits(&gb, 4);
-    }
+    for (i = 0; i < length; i++)
+        dst[i] = vector[i] << bits >> 3;
 
-    return 0;
+    return bits - 3;
 }
 
-/**
- * Bitexact implementation of sqrt(val/2).
- */
-static int16_t square_root(unsigned val)
+int ff_g723_1_normalize_bits(int num, int width)
 {
-    av_assert2(!(val & 0x80000000));
-
-    return (ff_sqrt(val << 1) >> 1) & (~1);
+    return width - av_log2(num) - 1;
 }
 
-/**
- * Calculate the number of left-shifts required for normalizing the input.
- *
- * @param num   input number
- * @param width width of the input, 15 or 31 bits
- */
-static int normalize_bits(int num, int width)
+int ff_g723_1_dot_product(const int16_t *a, const int16_t *b, int length)
 {
-    return width - av_log2(num) - 1;
+    int sum = ff_dot_product(a, b, length);
+    return av_sat_add32(sum, sum);
 }
 
-#define normalize_bits_int16(num) normalize_bits(num, 15)
-#define normalize_bits_int32(num) normalize_bits(num, 31)
-
-/**
- * Scale vector contents based on the largest of their absolutes.
- */
-static int scale_vector(int16_t *dst, const int16_t *vector, int length)
+void ff_g723_1_get_residual(int16_t *residual, int16_t *prev_excitation,
+                            int lag)
 {
-    int bits, max = 0;
+    int offset = PITCH_MAX - PITCH_ORDER / 2 - lag;
     int i;
 
-    for (i = 0; i < length; i++)
-        max |= FFABS(vector[i]);
-
-    bits= 14 - av_log2_16bit(max);
-    bits= FFMAX(bits, 0);
-
-    for (i = 0; i < length; i++)
-        dst[i] = vector[i] << bits >> 3;
+    residual[0] = prev_excitation[offset];
+    residual[1] = prev_excitation[offset + 1];
 
-    return bits - 3;
+    offset += 2;
+    for (i = 2; i < SUBFRAME_LEN + PITCH_ORDER - 1; i++)
+        residual[i] = prev_excitation[offset + (i - 2) % lag];
 }
 
-/**
- * Perform inverse quantization of LSP frequencies.
- *
- * @param cur_lsp    the current LSP vector
- * @param prev_lsp   the previous LSP vector
- * @param lsp_index  VQ indices
- * @param bad_frame  bad frame flag
- */
-static void inverse_quant(int16_t *cur_lsp, int16_t *prev_lsp,
-                          uint8_t *lsp_index, int bad_frame)
+void ff_g723_1_gen_dirac_train(int16_t *buf, int pitch_lag)
 {
-    int min_dist, pred;
-    int i, j, temp, stable;
+    int16_t vector[SUBFRAME_LEN];
+    int i, j;
 
-    /* Check for frame erasure */
-    if (!bad_frame) {
-        min_dist     = 0x100;
-        pred         = 12288;
-    } else {
-        min_dist     = 0x200;
-        pred         = 23552;
-        lsp_index[0] = lsp_index[1] = lsp_index[2] = 0;
+    memcpy(vector, buf, SUBFRAME_LEN * sizeof(*vector));
+    for (i = pitch_lag; i < SUBFRAME_LEN; i += pitch_lag) {
+        for (j = 0; j < SUBFRAME_LEN - i; j++)
+            buf[i + j] += vector[j];
     }
+}
 
-    /* Get the VQ table entry corresponding to the transmitted index */
-    cur_lsp[0] = lsp_band0[lsp_index[0]][0];
-    cur_lsp[1] = lsp_band0[lsp_index[0]][1];
-    cur_lsp[2] = lsp_band0[lsp_index[0]][2];
-    cur_lsp[3] = lsp_band1[lsp_index[1]][0];
-    cur_lsp[4] = lsp_band1[lsp_index[1]][1];
-    cur_lsp[5] = lsp_band1[lsp_index[1]][2];
-    cur_lsp[6] = lsp_band2[lsp_index[2]][0];
-    cur_lsp[7] = lsp_band2[lsp_index[2]][1];
-    cur_lsp[8] = lsp_band2[lsp_index[2]][2];
-    cur_lsp[9] = lsp_band2[lsp_index[2]][3];
+void ff_g723_1_gen_acb_excitation(int16_t *vector, int16_t *prev_excitation,
+                                  int pitch_lag, G723_1_Subframe *subfrm,
+                                  enum Rate cur_rate)
+{
+    int16_t residual[SUBFRAME_LEN + PITCH_ORDER - 1];
+    const int16_t *cb_ptr;
+    int lag = pitch_lag + subfrm->ad_cb_lag - 1;
 
-    /* Add predicted vector & DC component to the previously quantized vector */
-    for (i = 0; i < LPC_ORDER; i++) {
-        temp        = ((prev_lsp[i] - dc_lsp[i]) * pred + (1 << 14)) >> 15;
-        cur_lsp[i] += dc_lsp[i] + temp;
-    }
+    int i;
+    int sum;
 
-    for (i = 0; i < LPC_ORDER; i++) {
-        cur_lsp[0]             = FFMAX(cur_lsp[0],  0x180);
-        cur_lsp[LPC_ORDER - 1] = FFMIN(cur_lsp[LPC_ORDER - 1], 0x7e00);
+    ff_g723_1_get_residual(residual, prev_excitation, lag);
 
-        /* Stability check */
-        for (j = 1; j < LPC_ORDER; j++) {
-            temp = min_dist + cur_lsp[j - 1] - cur_lsp[j];
-            if (temp > 0) {
-                temp >>= 1;
-                cur_lsp[j - 1] -= temp;
-                cur_lsp[j]     += temp;
-            }
-        }
-        stable = 1;
-        for (j = 1; j < LPC_ORDER; j++) {
-            temp = cur_lsp[j - 1] + min_dist - cur_lsp[j] - 4;
-            if (temp > 0) {
-                stable = 0;
-                break;
-            }
-        }
-        if (stable)
-            break;
+    /* Select quantization table */
+    if (cur_rate == RATE_6300 && pitch_lag < SUBFRAME_LEN - 2) {
+        cb_ptr = adaptive_cb_gain85;
+    } else
+        cb_ptr = adaptive_cb_gain170;
+
+    /* Calculate adaptive vector */
+    cb_ptr += subfrm->ad_cb_gain * 20;
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        sum = ff_dot_product(residual + i, cb_ptr, PITCH_ORDER);
+        vector[i] = av_sat_dadd32(1 << 15, av_sat_add32(sum, sum)) >> 16;
     }
-    if (!stable)
-        memcpy(cur_lsp, prev_lsp, LPC_ORDER * sizeof(*cur_lsp));
 }
 
-/**
- * Bitexact implementation of 2ab scaled by 1/2^16.
- *
- * @param a 32 bit multiplicand
- * @param b 16 bit multiplier
- */
-#define MULL2(a, b) \
-        MULL(a,b,15)
-
 /**
  * Convert LSP frequencies to LPC coefficients.
  *
@@ -409,15 +177,8 @@ static void lsp2lpc(int16_t *lpc)
     }
 }
 
-/**
- * Quantize LSP frequencies by interpolation and convert them to
- * the corresponding LPC coefficients.
- *
- * @param lpc      buffer for LPC coefficients
- * @param cur_lsp  the current LSP vector
- * @param prev_lsp the previous LSP vector
- */
-static void lsp_interpolate(int16_t *lpc, int16_t *cur_lsp, int16_t *prev_lsp)
+void ff_g723_1_lsp_interpolate(int16_t *lpc, int16_t *cur_lsp,
+                               int16_t *prev_lsp)
 {
     int i;
     int16_t *lpc_ptr = lpc;
@@ -437,2048 +198,64 @@ static void lsp_interpolate(int16_t *lpc, int16_t *cur_lsp, int16_t *prev_lsp)
     }
 }
 
-/**
- * Generate a train of dirac functions with period as pitch lag.
- */
-static void gen_dirac_train(int16_t *buf, int pitch_lag)
+void ff_g723_1_inverse_quant(int16_t *cur_lsp, int16_t *prev_lsp,
+                             uint8_t *lsp_index, int bad_frame)
 {
-    int16_t vector[SUBFRAME_LEN];
-    int i, j;
+    int min_dist, pred;
+    int i, j, temp, stable;
 
-    memcpy(vector, buf, SUBFRAME_LEN * sizeof(*vector));
-    for (i = pitch_lag; i < SUBFRAME_LEN; i += pitch_lag) {
-        for (j = 0; j < SUBFRAME_LEN - i; j++)
-            buf[i + j] += vector[j];
+    /* Check for frame erasure */
+    if (!bad_frame) {
+        min_dist     = 0x100;
+        pred         = 12288;
+    } else {
+        min_dist     = 0x200;
+        pred         = 23552;
+        lsp_index[0] = lsp_index[1] = lsp_index[2] = 0;
     }
-}
 
-/**
- * Generate fixed codebook excitation vector.
- *
- * @param vector    decoded excitation vector
- * @param subfrm    current subframe
- * @param cur_rate  current bitrate
- * @param pitch_lag closed loop pitch lag
- * @param index     current subframe index
- */
-static void gen_fcb_excitation(int16_t *vector, G723_1_Subframe *subfrm,
-                               enum Rate cur_rate, int pitch_lag, int index)
-{
-    int temp, i, j;
+    /* Get the VQ table entry corresponding to the transmitted index */
+    cur_lsp[0] = lsp_band0[lsp_index[0]][0];
+    cur_lsp[1] = lsp_band0[lsp_index[0]][1];
+    cur_lsp[2] = lsp_band0[lsp_index[0]][2];
+    cur_lsp[3] = lsp_band1[lsp_index[1]][0];
+    cur_lsp[4] = lsp_band1[lsp_index[1]][1];
+    cur_lsp[5] = lsp_band1[lsp_index[1]][2];
+    cur_lsp[6] = lsp_band2[lsp_index[2]][0];
+    cur_lsp[7] = lsp_band2[lsp_index[2]][1];
+    cur_lsp[8] = lsp_band2[lsp_index[2]][2];
+    cur_lsp[9] = lsp_band2[lsp_index[2]][3];
 
-    memset(vector, 0, SUBFRAME_LEN * sizeof(*vector));
+    /* Add predicted vector & DC component to the previously quantized vector */
+    for (i = 0; i < LPC_ORDER; i++) {
+        temp        = ((prev_lsp[i] - dc_lsp[i]) * pred + (1 << 14)) >> 15;
+        cur_lsp[i] += dc_lsp[i] + temp;
+    }
 
-    if (cur_rate == RATE_6300) {
-        if (subfrm->pulse_pos >= max_pos[index])
-            return;
+    for (i = 0; i < LPC_ORDER; i++) {
+        cur_lsp[0]             = FFMAX(cur_lsp[0],  0x180);
+        cur_lsp[LPC_ORDER - 1] = FFMIN(cur_lsp[LPC_ORDER - 1], 0x7e00);
 
-        /* Decode amplitudes and positions */
-        j = PULSE_MAX - pulses[index];
-        temp = subfrm->pulse_pos;
-        for (i = 0; i < SUBFRAME_LEN / GRID_SIZE; i++) {
-            temp -= combinatorial_table[j][i];
-            if (temp >= 0)
-                continue;
-            temp += combinatorial_table[j++][i];
-            if (subfrm->pulse_sign & (1 << (PULSE_MAX - j))) {
-                vector[subfrm->grid_index + GRID_SIZE * i] =
-                                        -fixed_cb_gain[subfrm->amp_index];
-            } else {
-                vector[subfrm->grid_index + GRID_SIZE * i] =
-                                         fixed_cb_gain[subfrm->amp_index];
+        /* Stability check */
+        for (j = 1; j < LPC_ORDER; j++) {
+            temp = min_dist + cur_lsp[j - 1] - cur_lsp[j];
+            if (temp > 0) {
+                temp >>= 1;
+                cur_lsp[j - 1] -= temp;
+                cur_lsp[j]     += temp;
             }
-            if (j == PULSE_MAX)
-                break;
-        }
-        if (subfrm->dirac_train == 1)
-            gen_dirac_train(vector, pitch_lag);
-    } else { /* 5300 bps */
-        int cb_gain  = fixed_cb_gain[subfrm->amp_index];
-        int cb_shift = subfrm->grid_index;
-        int cb_sign  = subfrm->pulse_sign;
-        int cb_pos   = subfrm->pulse_pos;
-        int offset, beta, lag;
-
-        for (i = 0; i < 8; i += 2) {
-            offset         = ((cb_pos & 7) << 3) + cb_shift + i;
-            vector[offset] = (cb_sign & 1) ? cb_gain : -cb_gain;
-            cb_pos  >>= 3;
-            cb_sign >>= 1;
         }
-
-        /* Enhance harmonic components */
-        lag  = pitch_contrib[subfrm->ad_cb_gain << 1] + pitch_lag +
-               subfrm->ad_cb_lag - 1;
-        beta = pitch_contrib[(subfrm->ad_cb_gain << 1) + 1];
-
-        if (lag < SUBFRAME_LEN - 2) {
-            for (i = lag; i < SUBFRAME_LEN; i++)
-                vector[i] += beta * vector[i - lag] >> 15;
+        stable = 1;
+        for (j = 1; j < LPC_ORDER; j++) {
+            temp = cur_lsp[j - 1] + min_dist - cur_lsp[j] - 4;
+            if (temp > 0) {
+                stable = 0;
+                break;
+            }
         }
+        if (stable)
+            break;
     }
+    if (!stable)
+        memcpy(cur_lsp, prev_lsp, LPC_ORDER * sizeof(*cur_lsp));
 }
-
-/**
- * Get delayed contribution from the previous excitation vector.
- */
-static void get_residual(int16_t *residual, int16_t *prev_excitation, int lag)
-{
-    int offset = PITCH_MAX - PITCH_ORDER / 2 - lag;
-    int i;
-
-    residual[0] = prev_excitation[offset];
-    residual[1] = prev_excitation[offset + 1];
-
-    offset += 2;
-    for (i = 2; i < SUBFRAME_LEN + PITCH_ORDER - 1; i++)
-        residual[i] = prev_excitation[offset + (i - 2) % lag];
-}
-
-static int dot_product(const int16_t *a, const int16_t *b, int length)
-{
-    int sum = ff_dot_product(a,b,length);
-    return av_sat_add32(sum, sum);
-}
-
-/**
- * Generate adaptive codebook excitation.
- */
-static void gen_acb_excitation(int16_t *vector, int16_t *prev_excitation,
-                               int pitch_lag, G723_1_Subframe *subfrm,
-                               enum Rate cur_rate)
-{
-    int16_t residual[SUBFRAME_LEN + PITCH_ORDER - 1];
-    const int16_t *cb_ptr;
-    int lag = pitch_lag + subfrm->ad_cb_lag - 1;
-
-    int i;
-    int sum;
-
-    get_residual(residual, prev_excitation, lag);
-
-    /* Select quantization table */
-    if (cur_rate == RATE_6300 && pitch_lag < SUBFRAME_LEN - 2) {
-        cb_ptr = adaptive_cb_gain85;
-    } else
-        cb_ptr = adaptive_cb_gain170;
-
-    /* Calculate adaptive vector */
-    cb_ptr += subfrm->ad_cb_gain * 20;
-    for (i = 0; i < SUBFRAME_LEN; i++) {
-        sum = ff_dot_product(residual + i, cb_ptr, PITCH_ORDER);
-        vector[i] = av_sat_dadd32(1 << 15, av_sat_add32(sum, sum)) >> 16;
-    }
-}
-
-/**
- * Estimate maximum auto-correlation around pitch lag.
- *
- * @param buf       buffer with offset applied
- * @param offset    offset of the excitation vector
- * @param ccr_max   pointer to the maximum auto-correlation
- * @param pitch_lag decoded pitch lag
- * @param length    length of autocorrelation
- * @param dir       forward lag(1) / backward lag(-1)
- */
-static int autocorr_max(const int16_t *buf, int offset, int *ccr_max,
-                        int pitch_lag, int length, int dir)
-{
-    int limit, ccr, lag = 0;
-    int i;
-
-    pitch_lag = FFMIN(PITCH_MAX - 3, pitch_lag);
-    if (dir > 0)
-        limit = FFMIN(FRAME_LEN + PITCH_MAX - offset - length, pitch_lag + 3);
-    else
-        limit = pitch_lag + 3;
-
-    for (i = pitch_lag - 3; i <= limit; i++) {
-        ccr = dot_product(buf, buf + dir * i, length);
-
-        if (ccr > *ccr_max) {
-            *ccr_max = ccr;
-            lag = i;
-        }
-    }
-    return lag;
-}
-
-/**
- * Calculate pitch postfilter optimal and scaling gains.
- *
- * @param lag      pitch postfilter forward/backward lag
- * @param ppf      pitch postfilter parameters
- * @param cur_rate current bitrate
- * @param tgt_eng  target energy
- * @param ccr      cross-correlation
- * @param res_eng  residual energy
- */
-static void comp_ppf_gains(int lag, PPFParam *ppf, enum Rate cur_rate,
-                           int tgt_eng, int ccr, int res_eng)
-{
-    int pf_residual;     /* square of postfiltered residual */
-    int temp1, temp2;
-
-    ppf->index = lag;
-
-    temp1 = tgt_eng * res_eng >> 1;
-    temp2 = ccr * ccr << 1;
-
-    if (temp2 > temp1) {
-        if (ccr >= res_eng) {
-            ppf->opt_gain = ppf_gain_weight[cur_rate];
-        } else {
-            ppf->opt_gain = (ccr << 15) / res_eng *
-                            ppf_gain_weight[cur_rate] >> 15;
-        }
-        /* pf_res^2 = tgt_eng + 2*ccr*gain + res_eng*gain^2 */
-        temp1       = (tgt_eng << 15) + (ccr * ppf->opt_gain << 1);
-        temp2       = (ppf->opt_gain * ppf->opt_gain >> 15) * res_eng;
-        pf_residual = av_sat_add32(temp1, temp2 + (1 << 15)) >> 16;
-
-        if (tgt_eng >= pf_residual << 1) {
-            temp1 = 0x7fff;
-        } else {
-            temp1 = (tgt_eng << 14) / pf_residual;
-        }
-
-        /* scaling_gain = sqrt(tgt_eng/pf_res^2) */
-        ppf->sc_gain = square_root(temp1 << 16);
-    } else {
-        ppf->opt_gain = 0;
-        ppf->sc_gain  = 0x7fff;
-    }
-
-    ppf->opt_gain = av_clip_int16(ppf->opt_gain * ppf->sc_gain >> 15);
-}
-
-/**
- * Calculate pitch postfilter parameters.
- *
- * @param p         the context
- * @param offset    offset of the excitation vector
- * @param pitch_lag decoded pitch lag
- * @param ppf       pitch postfilter parameters
- * @param cur_rate  current bitrate
- */
-static void comp_ppf_coeff(G723_1_Context *p, int offset, int pitch_lag,
-                           PPFParam *ppf, enum Rate cur_rate)
-{
-
-    int16_t scale;
-    int i;
-    int temp1, temp2;
-
-    /*
-     * 0 - target energy
-     * 1 - forward cross-correlation
-     * 2 - forward residual energy
-     * 3 - backward cross-correlation
-     * 4 - backward residual energy
-     */
-    int energy[5] = {0, 0, 0, 0, 0};
-    int16_t *buf  = p->audio + LPC_ORDER + offset;
-    int fwd_lag   = autocorr_max(buf, offset, &energy[1], pitch_lag,
-                                 SUBFRAME_LEN, 1);
-    int back_lag  = autocorr_max(buf, offset, &energy[3], pitch_lag,
-                                 SUBFRAME_LEN, -1);
-
-    ppf->index    = 0;
-    ppf->opt_gain = 0;
-    ppf->sc_gain  = 0x7fff;
-
-    /* Case 0, Section 3.6 */
-    if (!back_lag && !fwd_lag)
-        return;
-
-    /* Compute target energy */
-    energy[0] = dot_product(buf, buf, SUBFRAME_LEN);
-
-    /* Compute forward residual energy */
-    if (fwd_lag)
-        energy[2] = dot_product(buf + fwd_lag, buf + fwd_lag, SUBFRAME_LEN);
-
-    /* Compute backward residual energy */
-    if (back_lag)
-        energy[4] = dot_product(buf - back_lag, buf - back_lag, SUBFRAME_LEN);
-
-    /* Normalize and shorten */
-    temp1 = 0;
-    for (i = 0; i < 5; i++)
-        temp1 = FFMAX(energy[i], temp1);
-
-    scale = normalize_bits(temp1, 31);
-    for (i = 0; i < 5; i++)
-        energy[i] = (energy[i] << scale) >> 16;
-
-    if (fwd_lag && !back_lag) {  /* Case 1 */
-        comp_ppf_gains(fwd_lag,  ppf, cur_rate, energy[0], energy[1],
-                       energy[2]);
-    } else if (!fwd_lag) {       /* Case 2 */
-        comp_ppf_gains(-back_lag, ppf, cur_rate, energy[0], energy[3],
-                       energy[4]);
-    } else {                     /* Case 3 */
-
-        /*
-         * Select the largest of energy[1]^2/energy[2]
-         * and energy[3]^2/energy[4]
-         */
-        temp1 = energy[4] * ((energy[1] * energy[1] + (1 << 14)) >> 15);
-        temp2 = energy[2] * ((energy[3] * energy[3] + (1 << 14)) >> 15);
-        if (temp1 >= temp2) {
-            comp_ppf_gains(fwd_lag, ppf, cur_rate, energy[0], energy[1],
-                           energy[2]);
-        } else {
-            comp_ppf_gains(-back_lag, ppf, cur_rate, energy[0], energy[3],
-                           energy[4]);
-        }
-    }
-}
-
-/**
- * Classify frames as voiced/unvoiced.
- *
- * @param p         the context
- * @param pitch_lag decoded pitch_lag
- * @param exc_eng   excitation energy estimation
- * @param scale     scaling factor of exc_eng
- *
- * @return residual interpolation index if voiced, 0 otherwise
- */
-static int comp_interp_index(G723_1_Context *p, int pitch_lag,
-                             int *exc_eng, int *scale)
-{
-    int offset = PITCH_MAX + 2 * SUBFRAME_LEN;
-    int16_t *buf = p->audio + LPC_ORDER;
-
-    int index, ccr, tgt_eng, best_eng, temp;
-
-    *scale = scale_vector(buf, p->excitation, FRAME_LEN + PITCH_MAX);
-    buf   += offset;
-
-    /* Compute maximum backward cross-correlation */
-    ccr   = 0;
-    index = autocorr_max(buf, offset, &ccr, pitch_lag, SUBFRAME_LEN * 2, -1);
-    ccr   = av_sat_add32(ccr, 1 << 15) >> 16;
-
-    /* Compute target energy */
-    tgt_eng  = dot_product(buf, buf, SUBFRAME_LEN * 2);
-    *exc_eng = av_sat_add32(tgt_eng, 1 << 15) >> 16;
-
-    if (ccr <= 0)
-        return 0;
-
-    /* Compute best energy */
-    best_eng = dot_product(buf - index, buf - index, SUBFRAME_LEN * 2);
-    best_eng = av_sat_add32(best_eng, 1 << 15) >> 16;
-
-    temp = best_eng * *exc_eng >> 3;
-
-    if (temp < ccr * ccr) {
-        return index;
-    } else
-        return 0;
-}
-
-/**
- * Peform residual interpolation based on frame classification.
- *
- * @param buf   decoded excitation vector
- * @param out   output vector
- * @param lag   decoded pitch lag
- * @param gain  interpolated gain
- * @param rseed seed for random number generator
- */
-static void residual_interp(int16_t *buf, int16_t *out, int lag,
-                            int gain, int *rseed)
-{
-    int i;
-    if (lag) { /* Voiced */
-        int16_t *vector_ptr = buf + PITCH_MAX;
-        /* Attenuate */
-        for (i = 0; i < lag; i++)
-            out[i] = vector_ptr[i - lag] * 3 >> 2;
-        av_memcpy_backptr((uint8_t*)(out + lag), lag * sizeof(*out),
-                          (FRAME_LEN - lag) * sizeof(*out));
-    } else {  /* Unvoiced */
-        for (i = 0; i < FRAME_LEN; i++) {
-            *rseed = *rseed * 521 + 259;
-            out[i] = gain * *rseed >> 15;
-        }
-        memset(buf, 0, (FRAME_LEN + PITCH_MAX) * sizeof(*buf));
-    }
-}
-
-/**
- * Perform IIR filtering.
- *
- * @param fir_coef FIR coefficients
- * @param iir_coef IIR coefficients
- * @param src      source vector
- * @param dest     destination vector
- * @param width    width of the output, 16 bits(0) / 32 bits(1)
- */
-#define iir_filter(fir_coef, iir_coef, src, dest, width)\
-{\
-    int m, n;\
-    int res_shift = 16 & ~-(width);\
-    int in_shift  = 16 - res_shift;\
-\
-    for (m = 0; m < SUBFRAME_LEN; m++) {\
-        int64_t filter = 0;\
-        for (n = 1; n <= LPC_ORDER; n++) {\
-            filter -= (fir_coef)[n - 1] * (src)[m - n] -\
-                      (iir_coef)[n - 1] * ((dest)[m - n] >> in_shift);\
-        }\
-\
-        (dest)[m] = av_clipl_int32(((src)[m] << 16) + (filter << 3) +\
-                                   (1 << 15)) >> res_shift;\
-    }\
-}
-
-/**
- * Adjust gain of postfiltered signal.
- *
- * @param p      the context
- * @param buf    postfiltered output vector
- * @param energy input energy coefficient
- */
-static void gain_scale(G723_1_Context *p, int16_t * buf, int energy)
-{
-    int num, denom, gain, bits1, bits2;
-    int i;
-
-    num   = energy;
-    denom = 0;
-    for (i = 0; i < SUBFRAME_LEN; i++) {
-        int temp = buf[i] >> 2;
-        temp *= temp;
-        denom = av_sat_dadd32(denom, temp);
-    }
-
-    if (num && denom) {
-        bits1   = normalize_bits(num,   31);
-        bits2   = normalize_bits(denom, 31);
-        num     = num << bits1 >> 1;
-        denom <<= bits2;
-
-        bits2 = 5 + bits1 - bits2;
-        bits2 = FFMAX(0, bits2);
-
-        gain = (num >> 1) / (denom >> 16);
-        gain = square_root(gain << 16 >> bits2);
-    } else {
-        gain = 1 << 12;
-    }
-
-    for (i = 0; i < SUBFRAME_LEN; i++) {
-        p->pf_gain = (15 * p->pf_gain + gain + (1 << 3)) >> 4;
-        buf[i]     = av_clip_int16((buf[i] * (p->pf_gain + (p->pf_gain >> 4)) +
-                                   (1 << 10)) >> 11);
-    }
-}
-
-/**
- * Perform formant filtering.
- *
- * @param p   the context
- * @param lpc quantized lpc coefficients
- * @param buf input buffer
- * @param dst output buffer
- */
-static void formant_postfilter(G723_1_Context *p, int16_t *lpc,
-                               int16_t *buf, int16_t *dst)
-{
-    int16_t filter_coef[2][LPC_ORDER];
-    int filter_signal[LPC_ORDER + FRAME_LEN], *signal_ptr;
-    int i, j, k;
-
-    memcpy(buf, p->fir_mem, LPC_ORDER * sizeof(*buf));
-    memcpy(filter_signal, p->iir_mem, LPC_ORDER * sizeof(*filter_signal));
-
-    for (i = LPC_ORDER, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) {
-        for (k = 0; k < LPC_ORDER; k++) {
-            filter_coef[0][k] = (-lpc[k] * postfilter_tbl[0][k] +
-                                 (1 << 14)) >> 15;
-            filter_coef[1][k] = (-lpc[k] * postfilter_tbl[1][k] +
-                                 (1 << 14)) >> 15;
-        }
-        iir_filter(filter_coef[0], filter_coef[1], buf + i,
-                   filter_signal + i, 1);
-        lpc += LPC_ORDER;
-    }
-
-    memcpy(p->fir_mem, buf + FRAME_LEN, LPC_ORDER * sizeof(int16_t));
-    memcpy(p->iir_mem, filter_signal + FRAME_LEN, LPC_ORDER * sizeof(int));
-
-    buf += LPC_ORDER;
-    signal_ptr = filter_signal + LPC_ORDER;
-    for (i = 0; i < SUBFRAMES; i++) {
-        int temp;
-        int auto_corr[2];
-        int scale, energy;
-
-        /* Normalize */
-        scale = scale_vector(dst, buf, SUBFRAME_LEN);
-
-        /* Compute auto correlation coefficients */
-        auto_corr[0] = dot_product(dst, dst + 1, SUBFRAME_LEN - 1);
-        auto_corr[1] = dot_product(dst, dst,     SUBFRAME_LEN);
-
-        /* Compute reflection coefficient */
-        temp = auto_corr[1] >> 16;
-        if (temp) {
-            temp = (auto_corr[0] >> 2) / temp;
-        }
-        p->reflection_coef = (3 * p->reflection_coef + temp + 2) >> 2;
-        temp = -p->reflection_coef >> 1 & ~3;
-
-        /* Compensation filter */
-        for (j = 0; j < SUBFRAME_LEN; j++) {
-            dst[j] = av_sat_dadd32(signal_ptr[j],
-                                   (signal_ptr[j - 1] >> 16) * temp) >> 16;
-        }
-
-        /* Compute normalized signal energy */
-        temp = 2 * scale + 4;
-        if (temp < 0) {
-            energy = av_clipl_int32((int64_t)auto_corr[1] << -temp);
-        } else
-            energy = auto_corr[1] >> temp;
-
-        gain_scale(p, dst, energy);
-
-        buf        += SUBFRAME_LEN;
-        signal_ptr += SUBFRAME_LEN;
-        dst        += SUBFRAME_LEN;
-    }
-}
-
-static int sid_gain_to_lsp_index(int gain)
-{
-    if (gain < 0x10)
-        return gain << 6;
-    else if (gain < 0x20)
-        return gain - 8 << 7;
-    else
-        return gain - 20 << 8;
-}
-
-static inline int cng_rand(int *state, int base)
-{
-    *state = (*state * 521 + 259) & 0xFFFF;
-    return (*state & 0x7FFF) * base >> 15;
-}
-
-static int estimate_sid_gain(G723_1_Context *p)
-{
-    int i, shift, seg, seg2, t, val, val_add, x, y;
-
-    shift = 16 - p->cur_gain * 2;
-    if (shift > 0)
-        t = p->sid_gain << shift;
-    else
-        t = p->sid_gain >> -shift;
-    x = t * cng_filt[0] >> 16;
-
-    if (x >= cng_bseg[2])
-        return 0x3F;
-
-    if (x >= cng_bseg[1]) {
-        shift = 4;
-        seg   = 3;
-    } else {
-        shift = 3;
-        seg   = (x >= cng_bseg[0]);
-    }
-    seg2 = FFMIN(seg, 3);
-
-    val     = 1 << shift;
-    val_add = val >> 1;
-    for (i = 0; i < shift; i++) {
-        t = seg * 32 + (val << seg2);
-        t *= t;
-        if (x >= t)
-            val += val_add;
-        else
-            val -= val_add;
-        val_add >>= 1;
-    }
-
-    t = seg * 32 + (val << seg2);
-    y = t * t - x;
-    if (y <= 0) {
-        t = seg * 32 + (val + 1 << seg2);
-        t = t * t - x;
-        val = (seg2 - 1 << 4) + val;
-        if (t >= y)
-            val++;
-    } else {
-        t = seg * 32 + (val - 1 << seg2);
-        t = t * t - x;
-        val = (seg2 - 1 << 4) + val;
-        if (t >= y)
-            val--;
-    }
-
-    return val;
-}
-
-static void generate_noise(G723_1_Context *p)
-{
-    int i, j, idx, t;
-    int off[SUBFRAMES];
-    int signs[SUBFRAMES / 2 * 11], pos[SUBFRAMES / 2 * 11];
-    int tmp[SUBFRAME_LEN * 2];
-    int16_t *vector_ptr;
-    int64_t sum;
-    int b0, c, delta, x, shift;
-
-    p->pitch_lag[0] = cng_rand(&p->cng_random_seed, 21) + 123;
-    p->pitch_lag[1] = cng_rand(&p->cng_random_seed, 19) + 123;
-
-    for (i = 0; i < SUBFRAMES; i++) {
-        p->subframe[i].ad_cb_gain = cng_rand(&p->cng_random_seed, 50) + 1;
-        p->subframe[i].ad_cb_lag  = cng_adaptive_cb_lag[i];
-    }
-
-    for (i = 0; i < SUBFRAMES / 2; i++) {
-        t = cng_rand(&p->cng_random_seed, 1 << 13);
-        off[i * 2]     =   t       & 1;
-        off[i * 2 + 1] = ((t >> 1) & 1) + SUBFRAME_LEN;
-        t >>= 2;
-        for (j = 0; j < 11; j++) {
-            signs[i * 11 + j] = (t & 1) * 2 - 1 << 14;
-            t >>= 1;
-        }
-    }
-
-    idx = 0;
-    for (i = 0; i < SUBFRAMES; i++) {
-        for (j = 0; j < SUBFRAME_LEN / 2; j++)
-            tmp[j] = j;
-        t = SUBFRAME_LEN / 2;
-        for (j = 0; j < pulses[i]; j++, idx++) {
-            int idx2 = cng_rand(&p->cng_random_seed, t);
-
-            pos[idx]  = tmp[idx2] * 2 + off[i];
-            tmp[idx2] = tmp[--t];
-        }
-    }
-
-    vector_ptr = p->audio + LPC_ORDER;
-    memcpy(vector_ptr, p->prev_excitation,
-           PITCH_MAX * sizeof(*p->excitation));
-    for (i = 0; i < SUBFRAMES; i += 2) {
-        gen_acb_excitation(vector_ptr, vector_ptr,
-                           p->pitch_lag[i >> 1], &p->subframe[i],
-                           p->cur_rate);
-        gen_acb_excitation(vector_ptr + SUBFRAME_LEN,
-                           vector_ptr + SUBFRAME_LEN,
-                           p->pitch_lag[i >> 1], &p->subframe[i + 1],
-                           p->cur_rate);
-
-        t = 0;
-        for (j = 0; j < SUBFRAME_LEN * 2; j++)
-            t |= FFABS(vector_ptr[j]);
-        t = FFMIN(t, 0x7FFF);
-        if (!t) {
-            shift = 0;
-        } else {
-            shift = -10 + av_log2(t);
-            if (shift < -2)
-                shift = -2;
-        }
-        sum = 0;
-        if (shift < 0) {
-           for (j = 0; j < SUBFRAME_LEN * 2; j++) {
-               t      = vector_ptr[j] << -shift;
-               sum   += t * t;
-               tmp[j] = t;
-           }
-        } else {
-           for (j = 0; j < SUBFRAME_LEN * 2; j++) {
-               t      = vector_ptr[j] >> shift;
-               sum   += t * t;
-               tmp[j] = t;
-           }
-        }
-
-        b0 = 0;
-        for (j = 0; j < 11; j++)
-            b0 += tmp[pos[(i / 2) * 11 + j]] * signs[(i / 2) * 11 + j];
-        b0 = b0 * 2 * 2979LL + (1 << 29) >> 30; // approximated division by 11
-
-        c = p->cur_gain * (p->cur_gain * SUBFRAME_LEN >> 5);
-        if (shift * 2 + 3 >= 0)
-            c >>= shift * 2 + 3;
-        else
-            c <<= -(shift * 2 + 3);
-        c = (av_clipl_int32(sum << 1) - c) * 2979LL >> 15;
-
-        delta = b0 * b0 * 2 - c;
-        if (delta <= 0) {
-            x = -b0;
-        } else {
-            delta = square_root(delta);
-            x     = delta - b0;
-            t     = delta + b0;
-            if (FFABS(t) < FFABS(x))
-                x = -t;
-        }
-        shift++;
-        if (shift < 0)
-           x >>= -shift;
-        else
-           x <<= shift;
-        x = av_clip(x, -10000, 10000);
-
-        for (j = 0; j < 11; j++) {
-            idx = (i / 2) * 11 + j;
-            vector_ptr[pos[idx]] = av_clip_int16(vector_ptr[pos[idx]] +
-                                                 (x * signs[idx] >> 15));
-        }
-
-        /* copy decoded data to serve as a history for the next decoded subframes */
-        memcpy(vector_ptr + PITCH_MAX, vector_ptr,
-               sizeof(*vector_ptr) * SUBFRAME_LEN * 2);
-        vector_ptr += SUBFRAME_LEN * 2;
-    }
-    /* Save the excitation for the next frame */
-    memcpy(p->prev_excitation, p->audio + LPC_ORDER + FRAME_LEN,
-           PITCH_MAX * sizeof(*p->excitation));
-}
-
-static int g723_1_decode_frame(AVCodecContext *avctx, void *data,
-                               int *got_frame_ptr, AVPacket *avpkt)
-{
-    G723_1_Context *p  = avctx->priv_data;
-    AVFrame *frame     = data;
-    const uint8_t *buf = avpkt->data;
-    int buf_size       = avpkt->size;
-    int dec_mode       = buf[0] & 3;
-
-    PPFParam ppf[SUBFRAMES];
-    int16_t cur_lsp[LPC_ORDER];
-    int16_t lpc[SUBFRAMES * LPC_ORDER];
-    int16_t acb_vector[SUBFRAME_LEN];
-    int16_t *out;
-    int bad_frame = 0, i, j, ret;
-    int16_t *audio = p->audio;
-
-    if (buf_size < frame_size[dec_mode]) {
-        if (buf_size)
-            av_log(avctx, AV_LOG_WARNING,
-                   "Expected %d bytes, got %d - skipping packet\n",
-                   frame_size[dec_mode], buf_size);
-        *got_frame_ptr = 0;
-        return buf_size;
-    }
-
-    if (unpack_bitstream(p, buf, buf_size) < 0) {
-        bad_frame = 1;
-        if (p->past_frame_type == ACTIVE_FRAME)
-            p->cur_frame_type = ACTIVE_FRAME;
-        else
-            p->cur_frame_type = UNTRANSMITTED_FRAME;
-    }
-
-    frame->nb_samples = FRAME_LEN;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
-        return ret;
-
-    out = (int16_t *)frame->data[0];
-
-    if (p->cur_frame_type == ACTIVE_FRAME) {
-        if (!bad_frame)
-            p->erased_frames = 0;
-        else if (p->erased_frames != 3)
-            p->erased_frames++;
-
-        inverse_quant(cur_lsp, p->prev_lsp, p->lsp_index, bad_frame);
-        lsp_interpolate(lpc, cur_lsp, p->prev_lsp);
-
-        /* Save the lsp_vector for the next frame */
-        memcpy(p->prev_lsp, cur_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
-
-        /* Generate the excitation for the frame */
-        memcpy(p->excitation, p->prev_excitation,
-               PITCH_MAX * sizeof(*p->excitation));
-        if (!p->erased_frames) {
-            int16_t *vector_ptr = p->excitation + PITCH_MAX;
-
-            /* Update interpolation gain memory */
-            p->interp_gain = fixed_cb_gain[(p->subframe[2].amp_index +
-                                            p->subframe[3].amp_index) >> 1];
-            for (i = 0; i < SUBFRAMES; i++) {
-                gen_fcb_excitation(vector_ptr, &p->subframe[i], p->cur_rate,
-                                   p->pitch_lag[i >> 1], i);
-                gen_acb_excitation(acb_vector, &p->excitation[SUBFRAME_LEN * i],
-                                   p->pitch_lag[i >> 1], &p->subframe[i],
-                                   p->cur_rate);
-                /* Get the total excitation */
-                for (j = 0; j < SUBFRAME_LEN; j++) {
-                    int v = av_clip_int16(vector_ptr[j] << 1);
-                    vector_ptr[j] = av_clip_int16(v + acb_vector[j]);
-                }
-                vector_ptr += SUBFRAME_LEN;
-            }
-
-            vector_ptr = p->excitation + PITCH_MAX;
-
-            p->interp_index = comp_interp_index(p, p->pitch_lag[1],
-                                                &p->sid_gain, &p->cur_gain);
-
-            /* Peform pitch postfiltering */
-            if (p->postfilter) {
-                i = PITCH_MAX;
-                for (j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
-                    comp_ppf_coeff(p, i, p->pitch_lag[j >> 1],
-                                   ppf + j, p->cur_rate);
-
-                for (i = 0, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
-                    ff_acelp_weighted_vector_sum(p->audio + LPC_ORDER + i,
-                                                 vector_ptr + i,
-                                                 vector_ptr + i + ppf[j].index,
-                                                 ppf[j].sc_gain,
-                                                 ppf[j].opt_gain,
-                                                 1 << 14, 15, SUBFRAME_LEN);
-            } else {
-                audio = vector_ptr - LPC_ORDER;
-            }
-
-            /* Save the excitation for the next frame */
-            memcpy(p->prev_excitation, p->excitation + FRAME_LEN,
-                   PITCH_MAX * sizeof(*p->excitation));
-        } else {
-            p->interp_gain = (p->interp_gain * 3 + 2) >> 2;
-            if (p->erased_frames == 3) {
-                /* Mute output */
-                memset(p->excitation, 0,
-                       (FRAME_LEN + PITCH_MAX) * sizeof(*p->excitation));
-                memset(p->prev_excitation, 0,
-                       PITCH_MAX * sizeof(*p->excitation));
-                memset(frame->data[0], 0,
-                       (FRAME_LEN + LPC_ORDER) * sizeof(int16_t));
-            } else {
-                int16_t *buf = p->audio + LPC_ORDER;
-
-                /* Regenerate frame */
-                residual_interp(p->excitation, buf, p->interp_index,
-                                p->interp_gain, &p->random_seed);
-
-                /* Save the excitation for the next frame */
-                memcpy(p->prev_excitation, buf + (FRAME_LEN - PITCH_MAX),
-                       PITCH_MAX * sizeof(*p->excitation));
-            }
-        }
-        p->cng_random_seed = CNG_RANDOM_SEED;
-    } else {
-        if (p->cur_frame_type == SID_FRAME) {
-            p->sid_gain = sid_gain_to_lsp_index(p->subframe[0].amp_index);
-            inverse_quant(p->sid_lsp, p->prev_lsp, p->lsp_index, 0);
-        } else if (p->past_frame_type == ACTIVE_FRAME) {
-            p->sid_gain = estimate_sid_gain(p);
-        }
-
-        if (p->past_frame_type == ACTIVE_FRAME)
-            p->cur_gain = p->sid_gain;
-        else
-            p->cur_gain = (p->cur_gain * 7 + p->sid_gain) >> 3;
-        generate_noise(p);
-        lsp_interpolate(lpc, p->sid_lsp, p->prev_lsp);
-        /* Save the lsp_vector for the next frame */
-        memcpy(p->prev_lsp, p->sid_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
-    }
-
-    p->past_frame_type = p->cur_frame_type;
-
-    memcpy(p->audio, p->synth_mem, LPC_ORDER * sizeof(*p->audio));
-    for (i = LPC_ORDER, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
-        ff_celp_lp_synthesis_filter(p->audio + i, &lpc[j * LPC_ORDER],
-                                    audio + i, SUBFRAME_LEN, LPC_ORDER,
-                                    0, 1, 1 << 12);
-    memcpy(p->synth_mem, p->audio + FRAME_LEN, LPC_ORDER * sizeof(*p->audio));
-
-    if (p->postfilter) {
-        formant_postfilter(p, lpc, p->audio, out);
-    } else { // if output is not postfiltered it should be scaled by 2
-        for (i = 0; i < FRAME_LEN; i++)
-            out[i] = av_clip_int16(p->audio[LPC_ORDER + i] << 1);
-    }
-
-    *got_frame_ptr = 1;
-
-    return frame_size[dec_mode];
-}
-
-#define OFFSET(x) offsetof(G723_1_Context, x)
-#define AD     AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM
-
-static const AVOption options[] = {
-    { "postfilter", "postfilter on/off", OFFSET(postfilter), AV_OPT_TYPE_INT,
-      { .i64 = 1 }, 0, 1, AD },
-    { NULL }
-};
-
-
-static const AVClass g723_1dec_class = {
-    .class_name = "G.723.1 decoder",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
-AVCodec ff_g723_1_decoder = {
-    .name           = "g723_1",
-    .long_name      = NULL_IF_CONFIG_SMALL("G.723.1"),
-    .type           = AVMEDIA_TYPE_AUDIO,
-    .id             = AV_CODEC_ID_G723_1,
-    .priv_data_size = sizeof(G723_1_Context),
-    .init           = g723_1_decode_init,
-    .decode         = g723_1_decode_frame,
-    .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
-    .priv_class     = &g723_1dec_class,
-};
-
-#if CONFIG_G723_1_ENCODER
-#define BITSTREAM_WRITER_LE
-#include "put_bits.h"
-
-static av_cold int g723_1_encode_init(AVCodecContext *avctx)
-{
-    G723_1_Context *p = avctx->priv_data;
-
-    if (avctx->sample_rate != 8000) {
-        av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
-        return -1;
-    }
-
-    if (avctx->channels != 1) {
-        av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
-        return AVERROR(EINVAL);
-    }
-
-    if (avctx->bit_rate == 6300) {
-        p->cur_rate = RATE_6300;
-    } else if (avctx->bit_rate == 5300) {
-        av_log(avctx, AV_LOG_ERROR, "Bitrate not supported yet, use 6.3k\n");
-        return AVERROR_PATCHWELCOME;
-    } else {
-        av_log(avctx, AV_LOG_ERROR,
-               "Bitrate not supported, use 6.3k\n");
-        return AVERROR(EINVAL);
-    }
-    avctx->frame_size = 240;
-    memcpy(p->prev_lsp, dc_lsp, LPC_ORDER * sizeof(int16_t));
-
-    return 0;
-}
-
-/**
- * Remove DC component from the input signal.
- *
- * @param buf input signal
- * @param fir zero memory
- * @param iir pole memory
- */
-static void highpass_filter(int16_t *buf, int16_t *fir, int *iir)
-{
-    int i;
-    for (i = 0; i < FRAME_LEN; i++) {
-        *iir   = (buf[i] << 15) + ((-*fir) << 15) + MULL2(*iir, 0x7f00);
-        *fir   = buf[i];
-        buf[i] = av_clipl_int32((int64_t)*iir + (1 << 15)) >> 16;
-    }
-}
-
-/**
- * Estimate autocorrelation of the input vector.
- *
- * @param buf      input buffer
- * @param autocorr autocorrelation coefficients vector
- */
-static void comp_autocorr(int16_t *buf, int16_t *autocorr)
-{
-    int i, scale, temp;
-    int16_t vector[LPC_FRAME];
-
-    scale_vector(vector, buf, LPC_FRAME);
-
-    /* Apply the Hamming window */
-    for (i = 0; i < LPC_FRAME; i++)
-        vector[i] = (vector[i] * hamming_window[i] + (1 << 14)) >> 15;
-
-    /* Compute the first autocorrelation coefficient */
-    temp = ff_dot_product(vector, vector, LPC_FRAME);
-
-    /* Apply a white noise correlation factor of (1025/1024) */
-    temp += temp >> 10;
-
-    /* Normalize */
-    scale = normalize_bits_int32(temp);
-    autocorr[0] = av_clipl_int32((int64_t)(temp << scale) +
-                                 (1 << 15)) >> 16;
-
-    /* Compute the remaining coefficients */
-    if (!autocorr[0]) {
-        memset(autocorr + 1, 0, LPC_ORDER * sizeof(int16_t));
-    } else {
-        for (i = 1; i <= LPC_ORDER; i++) {
-           temp = ff_dot_product(vector, vector + i, LPC_FRAME - i);
-           temp = MULL2((temp << scale), binomial_window[i - 1]);
-           autocorr[i] = av_clipl_int32((int64_t)temp + (1 << 15)) >> 16;
-        }
-    }
-}
-
-/**
- * Use Levinson-Durbin recursion to compute LPC coefficients from
- * autocorrelation values.
- *
- * @param lpc      LPC coefficients vector
- * @param autocorr autocorrelation coefficients vector
- * @param error    prediction error
- */
-static void levinson_durbin(int16_t *lpc, int16_t *autocorr, int16_t error)
-{
-    int16_t vector[LPC_ORDER];
-    int16_t partial_corr;
-    int i, j, temp;
-
-    memset(lpc, 0, LPC_ORDER * sizeof(int16_t));
-
-    for (i = 0; i < LPC_ORDER; i++) {
-        /* Compute the partial correlation coefficient */
-        temp = 0;
-        for (j = 0; j < i; j++)
-            temp -= lpc[j] * autocorr[i - j - 1];
-        temp = ((autocorr[i] << 13) + temp) << 3;
-
-        if (FFABS(temp) >= (error << 16))
-            break;
-
-        partial_corr = temp / (error << 1);
-
-        lpc[i] = av_clipl_int32((int64_t)(partial_corr << 14) +
-                                (1 << 15)) >> 16;
-
-        /* Update the prediction error */
-        temp  = MULL2(temp, partial_corr);
-        error = av_clipl_int32((int64_t)(error << 16) - temp +
-                                (1 << 15)) >> 16;
-
-        memcpy(vector, lpc, i * sizeof(int16_t));
-        for (j = 0; j < i; j++) {
-            temp = partial_corr * vector[i - j - 1] << 1;
-            lpc[j] = av_clipl_int32((int64_t)(lpc[j] << 16) - temp +
-                                    (1 << 15)) >> 16;
-        }
-    }
-}
-
-/**
- * Calculate LPC coefficients for the current frame.
- *
- * @param buf       current frame
- * @param prev_data 2 trailing subframes of the previous frame
- * @param lpc       LPC coefficients vector
- */
-static void comp_lpc_coeff(int16_t *buf, int16_t *lpc)
-{
-    int16_t autocorr[(LPC_ORDER + 1) * SUBFRAMES];
-    int16_t *autocorr_ptr = autocorr;
-    int16_t *lpc_ptr      = lpc;
-    int i, j;
-
-    for (i = 0, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) {
-        comp_autocorr(buf + i, autocorr_ptr);
-        levinson_durbin(lpc_ptr, autocorr_ptr + 1, autocorr_ptr[0]);
-
-        lpc_ptr += LPC_ORDER;
-        autocorr_ptr += LPC_ORDER + 1;
-    }
-}
-
-static void lpc2lsp(int16_t *lpc, int16_t *prev_lsp, int16_t *lsp)
-{
-    int f[LPC_ORDER + 2]; ///< coefficients of the sum and difference
-                          ///< polynomials (F1, F2) ordered as
-                          ///< f1[0], f2[0], ...., f1[5], f2[5]
-
-    int max, shift, cur_val, prev_val, count, p;
-    int i, j;
-    int64_t temp;
-
-    /* Initialize f1[0] and f2[0] to 1 in Q25 */
-    for (i = 0; i < LPC_ORDER; i++)
-        lsp[i] = (lpc[i] * bandwidth_expand[i] + (1 << 14)) >> 15;
-
-    /* Apply bandwidth expansion on the LPC coefficients */
-    f[0] = f[1] = 1 << 25;
-
-    /* Compute the remaining coefficients */
-    for (i = 0; i < LPC_ORDER / 2; i++) {
-        /* f1 */
-        f[2 * i + 2] = -f[2 * i] - ((lsp[i] + lsp[LPC_ORDER - 1 - i]) << 12);
-        /* f2 */
-        f[2 * i + 3] = f[2 * i + 1] - ((lsp[i] - lsp[LPC_ORDER - 1 - i]) << 12);
-    }
-
-    /* Divide f1[5] and f2[5] by 2 for use in polynomial evaluation */
-    f[LPC_ORDER] >>= 1;
-    f[LPC_ORDER + 1] >>= 1;
-
-    /* Normalize and shorten */
-    max = FFABS(f[0]);
-    for (i = 1; i < LPC_ORDER + 2; i++)
-        max = FFMAX(max, FFABS(f[i]));
-
-    shift = normalize_bits_int32(max);
-
-    for (i = 0; i < LPC_ORDER + 2; i++)
-        f[i] = av_clipl_int32((int64_t)(f[i] << shift) + (1 << 15)) >> 16;
-
-    /**
-     * Evaluate F1 and F2 at uniform intervals of pi/256 along the
-     * unit circle and check for zero crossings.
-     */
-    p    = 0;
-    temp = 0;
-    for (i = 0; i <= LPC_ORDER / 2; i++)
-        temp += f[2 * i] * cos_tab[0];
-    prev_val = av_clipl_int32(temp << 1);
-    count    = 0;
-    for ( i = 1; i < COS_TBL_SIZE / 2; i++) {
-        /* Evaluate */
-        temp = 0;
-        for (j = 0; j <= LPC_ORDER / 2; j++)
-            temp += f[LPC_ORDER - 2 * j + p] * cos_tab[i * j % COS_TBL_SIZE];
-        cur_val = av_clipl_int32(temp << 1);
-
-        /* Check for sign change, indicating a zero crossing */
-        if ((cur_val ^ prev_val) < 0) {
-            int abs_cur  = FFABS(cur_val);
-            int abs_prev = FFABS(prev_val);
-            int sum      = abs_cur + abs_prev;
-
-            shift        = normalize_bits_int32(sum);
-            sum          <<= shift;
-            abs_prev     = abs_prev << shift >> 8;
-            lsp[count++] = ((i - 1) << 7) + (abs_prev >> 1) / (sum >> 16);
-
-            if (count == LPC_ORDER)
-                break;
-
-            /* Switch between sum and difference polynomials */
-            p ^= 1;
-
-            /* Evaluate */
-            temp = 0;
-            for (j = 0; j <= LPC_ORDER / 2; j++){
-                temp += f[LPC_ORDER - 2 * j + p] *
-                        cos_tab[i * j % COS_TBL_SIZE];
-            }
-            cur_val = av_clipl_int32(temp<<1);
-        }
-        prev_val = cur_val;
-    }
-
-    if (count != LPC_ORDER)
-        memcpy(lsp, prev_lsp, LPC_ORDER * sizeof(int16_t));
-}
-
-/**
- * Quantize the current LSP subvector.
- *
- * @param num    band number
- * @param offset offset of the current subvector in an LPC_ORDER vector
- * @param size   size of the current subvector
- */
-#define get_index(num, offset, size) \
-{\
-    int error, max = -1;\
-    int16_t temp[4];\
-    int i, j;\
-    for (i = 0; i < LSP_CB_SIZE; i++) {\
-        for (j = 0; j < size; j++){\
-            temp[j] = (weight[j + (offset)] * lsp_band##num[i][j] +\
-                      (1 << 14)) >> 15;\
-        }\
-        error =  dot_product(lsp + (offset), temp, size) << 1;\
-        error -= dot_product(lsp_band##num[i], temp, size);\
-        if (error > max) {\
-            max = error;\
-            lsp_index[num] = i;\
-        }\
-    }\
-}
-
-/**
- * Vector quantize the LSP frequencies.
- *
- * @param lsp      the current lsp vector
- * @param prev_lsp the previous lsp vector
- */
-static void lsp_quantize(uint8_t *lsp_index, int16_t *lsp, int16_t *prev_lsp)
-{
-    int16_t weight[LPC_ORDER];
-    int16_t min, max;
-    int shift, i;
-
-    /* Calculate the VQ weighting vector */
-    weight[0] = (1 << 20) / (lsp[1] - lsp[0]);
-    weight[LPC_ORDER - 1] = (1 << 20) /
-                            (lsp[LPC_ORDER - 1] - lsp[LPC_ORDER - 2]);
-
-    for (i = 1; i < LPC_ORDER - 1; i++) {
-        min  = FFMIN(lsp[i] - lsp[i - 1], lsp[i + 1] - lsp[i]);
-        if (min > 0x20)
-            weight[i] = (1 << 20) / min;
-        else
-            weight[i] = INT16_MAX;
-    }
-
-    /* Normalize */
-    max = 0;
-    for (i = 0; i < LPC_ORDER; i++)
-        max = FFMAX(weight[i], max);
-
-    shift = normalize_bits_int16(max);
-    for (i = 0; i < LPC_ORDER; i++) {
-        weight[i] <<= shift;
-    }
-
-    /* Compute the VQ target vector */
-    for (i = 0; i < LPC_ORDER; i++) {
-        lsp[i] -= dc_lsp[i] +
-                  (((prev_lsp[i] - dc_lsp[i]) * 12288 + (1 << 14)) >> 15);
-    }
-
-    get_index(0, 0, 3);
-    get_index(1, 3, 3);
-    get_index(2, 6, 4);
-}
-
-/**
- * Apply the formant perceptual weighting filter.
- *
- * @param flt_coef filter coefficients
- * @param unq_lpc  unquantized lpc vector
- */
-static void perceptual_filter(G723_1_Context *p, int16_t *flt_coef,
-                              int16_t *unq_lpc, int16_t *buf)
-{
-    int16_t vector[FRAME_LEN + LPC_ORDER];
-    int i, j, k, l = 0;
-
-    memcpy(buf, p->iir_mem, sizeof(int16_t) * LPC_ORDER);
-    memcpy(vector, p->fir_mem, sizeof(int16_t) * LPC_ORDER);
-    memcpy(vector + LPC_ORDER, buf + LPC_ORDER, sizeof(int16_t) * FRAME_LEN);
-
-    for (i = LPC_ORDER, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) {
-        for (k = 0; k < LPC_ORDER; k++) {
-            flt_coef[k + 2 * l] = (unq_lpc[k + l] * percept_flt_tbl[0][k] +
-                                  (1 << 14)) >> 15;
-            flt_coef[k + 2 * l + LPC_ORDER] = (unq_lpc[k + l] *
-                                             percept_flt_tbl[1][k] +
-                                             (1 << 14)) >> 15;
-        }
-        iir_filter(flt_coef + 2 * l, flt_coef + 2 * l + LPC_ORDER, vector + i,
-                   buf + i, 0);
-        l += LPC_ORDER;
-    }
-    memcpy(p->iir_mem, buf + FRAME_LEN, sizeof(int16_t) * LPC_ORDER);
-    memcpy(p->fir_mem, vector + FRAME_LEN, sizeof(int16_t) * LPC_ORDER);
-}
-
-/**
- * Estimate the open loop pitch period.
- *
- * @param buf   perceptually weighted speech
- * @param start estimation is carried out from this position
- */
-static int estimate_pitch(int16_t *buf, int start)
-{
-    int max_exp = 32;
-    int max_ccr = 0x4000;
-    int max_eng = 0x7fff;
-    int index   = PITCH_MIN;
-    int offset  = start - PITCH_MIN + 1;
-
-    int ccr, eng, orig_eng, ccr_eng, exp;
-    int diff, temp;
-
-    int i;
-
-    orig_eng = ff_dot_product(buf + offset, buf + offset, HALF_FRAME_LEN);
-
-    for (i = PITCH_MIN; i <= PITCH_MAX - 3; i++) {
-        offset--;
-
-        /* Update energy and compute correlation */
-        orig_eng += buf[offset] * buf[offset] -
-                    buf[offset + HALF_FRAME_LEN] * buf[offset + HALF_FRAME_LEN];
-        ccr      =  ff_dot_product(buf + start, buf + offset, HALF_FRAME_LEN);
-        if (ccr <= 0)
-            continue;
-
-        /* Split into mantissa and exponent to maintain precision */
-        exp  =   normalize_bits_int32(ccr);
-        ccr  =   av_clipl_int32((int64_t)(ccr << exp) + (1 << 15)) >> 16;
-        exp  <<= 1;
-        ccr  *=  ccr;
-        temp =   normalize_bits_int32(ccr);
-        ccr  =   ccr << temp >> 16;
-        exp  +=  temp;
-
-        temp =   normalize_bits_int32(orig_eng);
-        eng  =   av_clipl_int32((int64_t)(orig_eng << temp) + (1 << 15)) >> 16;
-        exp  -=  temp;
-
-        if (ccr >= eng) {
-            exp--;
-            ccr >>= 1;
-        }
-        if (exp > max_exp)
-            continue;
-
-        if (exp + 1 < max_exp)
-            goto update;
-
-        /* Equalize exponents before comparison */
-        if (exp + 1 == max_exp)
-            temp = max_ccr >> 1;
-        else
-            temp = max_ccr;
-        ccr_eng = ccr * max_eng;
-        diff    = ccr_eng - eng * temp;
-        if (diff > 0 && (i - index < PITCH_MIN || diff > ccr_eng >> 2)) {
-update:
-            index   = i;
-            max_exp = exp;
-            max_ccr = ccr;
-            max_eng = eng;
-        }
-    }
-    return index;
-}
-
-/**
- * Compute harmonic noise filter parameters.
- *
- * @param buf       perceptually weighted speech
- * @param pitch_lag open loop pitch period
- * @param hf        harmonic filter parameters
- */
-static void comp_harmonic_coeff(int16_t *buf, int16_t pitch_lag, HFParam *hf)
-{
-    int ccr, eng, max_ccr, max_eng;
-    int exp, max, diff;
-    int energy[15];
-    int i, j;
-
-    for (i = 0, j = pitch_lag - 3; j <= pitch_lag + 3; i++, j++) {
-        /* Compute residual energy */
-        energy[i << 1] = ff_dot_product(buf - j, buf - j, SUBFRAME_LEN);
-        /* Compute correlation */
-        energy[(i << 1) + 1] = ff_dot_product(buf, buf - j, SUBFRAME_LEN);
-    }
-
-    /* Compute target energy */
-    energy[14] = ff_dot_product(buf, buf, SUBFRAME_LEN);
-
-    /* Normalize */
-    max = 0;
-    for (i = 0; i < 15; i++)
-        max = FFMAX(max, FFABS(energy[i]));
-
-    exp = normalize_bits_int32(max);
-    for (i = 0; i < 15; i++) {
-        energy[i] = av_clipl_int32((int64_t)(energy[i] << exp) +
-                                   (1 << 15)) >> 16;
-    }
-
-    hf->index = -1;
-    hf->gain  =  0;
-    max_ccr   =  1;
-    max_eng   =  0x7fff;
-
-    for (i = 0; i <= 6; i++) {
-        eng = energy[i << 1];
-        ccr = energy[(i << 1) + 1];
-
-        if (ccr <= 0)
-            continue;
-
-        ccr  = (ccr * ccr + (1 << 14)) >> 15;
-        diff = ccr * max_eng - eng * max_ccr;
-        if (diff > 0) {
-            max_ccr   = ccr;
-            max_eng   = eng;
-            hf->index = i;
-        }
-    }
-
-    if (hf->index == -1) {
-        hf->index = pitch_lag;
-        return;
-    }
-
-    eng = energy[14] * max_eng;
-    eng = (eng >> 2) + (eng >> 3);
-    ccr = energy[(hf->index << 1) + 1] * energy[(hf->index << 1) + 1];
-    if (eng < ccr) {
-        eng = energy[(hf->index << 1) + 1];
-
-        if (eng >= max_eng)
-            hf->gain = 0x2800;
-        else
-            hf->gain = ((eng << 15) / max_eng * 0x2800 + (1 << 14)) >> 15;
-    }
-    hf->index += pitch_lag - 3;
-}
-
-/**
- * Apply the harmonic noise shaping filter.
- *
- * @param hf filter parameters
- */
-static void harmonic_filter(HFParam *hf, const int16_t *src, int16_t *dest)
-{
-    int i;
-
-    for (i = 0; i < SUBFRAME_LEN; i++) {
-        int64_t temp = hf->gain * src[i - hf->index] << 1;
-        dest[i] = av_clipl_int32((src[i] << 16) - temp + (1 << 15)) >> 16;
-    }
-}
-
-static void harmonic_noise_sub(HFParam *hf, const int16_t *src, int16_t *dest)
-{
-    int i;
-    for (i = 0; i < SUBFRAME_LEN; i++) {
-        int64_t temp = hf->gain * src[i - hf->index] << 1;
-        dest[i] = av_clipl_int32(((dest[i] - src[i]) << 16) + temp +
-                                 (1 << 15)) >> 16;
-
-    }
-}
-
-/**
- * Combined synthesis and formant perceptual weighting filer.
- *
- * @param qnt_lpc  quantized lpc coefficients
- * @param perf_lpc perceptual filter coefficients
- * @param perf_fir perceptual filter fir memory
- * @param perf_iir perceptual filter iir memory
- * @param scale    the filter output will be scaled by 2^scale
- */
-static void synth_percept_filter(int16_t *qnt_lpc, int16_t *perf_lpc,
-                                 int16_t *perf_fir, int16_t *perf_iir,
-                                 const int16_t *src, int16_t *dest, int scale)
-{
-    int i, j;
-    int16_t buf_16[SUBFRAME_LEN + LPC_ORDER];
-    int64_t buf[SUBFRAME_LEN];
-
-    int16_t *bptr_16 = buf_16 + LPC_ORDER;
-
-    memcpy(buf_16, perf_fir, sizeof(int16_t) * LPC_ORDER);
-    memcpy(dest - LPC_ORDER, perf_iir, sizeof(int16_t) * LPC_ORDER);
-
-    for (i = 0; i < SUBFRAME_LEN; i++) {
-        int64_t temp = 0;
-        for (j = 1; j <= LPC_ORDER; j++)
-            temp -= qnt_lpc[j - 1] * bptr_16[i - j];
-
-        buf[i]     = (src[i] << 15) + (temp << 3);
-        bptr_16[i] = av_clipl_int32(buf[i] + (1 << 15)) >> 16;
-    }
-
-    for (i = 0; i < SUBFRAME_LEN; i++) {
-        int64_t fir = 0, iir = 0;
-        for (j = 1; j <= LPC_ORDER; j++) {
-            fir -= perf_lpc[j - 1] * bptr_16[i - j];
-            iir += perf_lpc[j + LPC_ORDER - 1] * dest[i - j];
-        }
-        dest[i] = av_clipl_int32(((buf[i] + (fir << 3)) << scale) + (iir << 3) +
-                                 (1 << 15)) >> 16;
-    }
-    memcpy(perf_fir, buf_16 + SUBFRAME_LEN, sizeof(int16_t) * LPC_ORDER);
-    memcpy(perf_iir, dest + SUBFRAME_LEN - LPC_ORDER,
-           sizeof(int16_t) * LPC_ORDER);
-}
-
-/**
- * Compute the adaptive codebook contribution.
- *
- * @param buf   input signal
- * @param index the current subframe index
- */
-static void acb_search(G723_1_Context *p, int16_t *residual,
-                       int16_t *impulse_resp, const int16_t *buf,
-                       int index)
-{
-
-    int16_t flt_buf[PITCH_ORDER][SUBFRAME_LEN];
-
-    const int16_t *cb_tbl = adaptive_cb_gain85;
-
-    int ccr_buf[PITCH_ORDER * SUBFRAMES << 2];
-
-    int pitch_lag = p->pitch_lag[index >> 1];
-    int acb_lag   = 1;
-    int acb_gain  = 0;
-    int odd_frame = index & 1;
-    int iter      = 3 + odd_frame;
-    int count     = 0;
-    int tbl_size  = 85;
-
-    int i, j, k, l, max;
-    int64_t temp;
-
-    if (!odd_frame) {
-        if (pitch_lag == PITCH_MIN)
-            pitch_lag++;
-        else
-            pitch_lag = FFMIN(pitch_lag, PITCH_MAX - 5);
-    }
-
-    for (i = 0; i < iter; i++) {
-        get_residual(residual, p->prev_excitation, pitch_lag + i - 1);
-
-        for (j = 0; j < SUBFRAME_LEN; j++) {
-            temp = 0;
-            for (k = 0; k <= j; k++)
-                temp += residual[PITCH_ORDER - 1 + k] * impulse_resp[j - k];
-            flt_buf[PITCH_ORDER - 1][j] = av_clipl_int32((temp << 1) +
-                                                         (1 << 15)) >> 16;
-        }
-
-        for (j = PITCH_ORDER - 2; j >= 0; j--) {
-            flt_buf[j][0] = ((residual[j] << 13) + (1 << 14)) >> 15;
-            for (k = 1; k < SUBFRAME_LEN; k++) {
-                temp = (flt_buf[j + 1][k - 1] << 15) +
-                       residual[j] * impulse_resp[k];
-                flt_buf[j][k] = av_clipl_int32((temp << 1) + (1 << 15)) >> 16;
-            }
-        }
-
-        /* Compute crosscorrelation with the signal */
-        for (j = 0; j < PITCH_ORDER; j++) {
-            temp = ff_dot_product(buf, flt_buf[j], SUBFRAME_LEN);
-            ccr_buf[count++] = av_clipl_int32(temp << 1);
-        }
-
-        /* Compute energies */
-        for (j = 0; j < PITCH_ORDER; j++) {
-            ccr_buf[count++] = dot_product(flt_buf[j], flt_buf[j],
-                                           SUBFRAME_LEN);
-        }
-
-        for (j = 1; j < PITCH_ORDER; j++) {
-            for (k = 0; k < j; k++) {
-                temp = ff_dot_product(flt_buf[j], flt_buf[k], SUBFRAME_LEN);
-                ccr_buf[count++] = av_clipl_int32(temp<<2);
-            }
-        }
-    }
-
-    /* Normalize and shorten */
-    max = 0;
-    for (i = 0; i < 20 * iter; i++)
-        max = FFMAX(max, FFABS(ccr_buf[i]));
-
-    temp = normalize_bits_int32(max);
-
-    for (i = 0; i < 20 * iter; i++){
-        ccr_buf[i] = av_clipl_int32((int64_t)(ccr_buf[i] << temp) +
-                                    (1 << 15)) >> 16;
-    }
-
-    max = 0;
-    for (i = 0; i < iter; i++) {
-        /* Select quantization table */
-        if (!odd_frame && pitch_lag + i - 1 >= SUBFRAME_LEN - 2 ||
-            odd_frame && pitch_lag >= SUBFRAME_LEN - 2) {
-            cb_tbl = adaptive_cb_gain170;
-            tbl_size = 170;
-        }
-
-        for (j = 0, k = 0; j < tbl_size; j++, k += 20) {
-            temp = 0;
-            for (l = 0; l < 20; l++)
-                temp += ccr_buf[20 * i + l] * cb_tbl[k + l];
-            temp =  av_clipl_int32(temp);
-
-            if (temp > max) {
-                max      = temp;
-                acb_gain = j;
-                acb_lag  = i;
-            }
-        }
-    }
-
-    if (!odd_frame) {
-        pitch_lag += acb_lag - 1;
-        acb_lag   =  1;
-    }
-
-    p->pitch_lag[index >> 1]      = pitch_lag;
-    p->subframe[index].ad_cb_lag  = acb_lag;
-    p->subframe[index].ad_cb_gain = acb_gain;
-}
-
-/**
- * Subtract the adaptive codebook contribution from the input
- * to obtain the residual.
- *
- * @param buf target vector
- */
-static void sub_acb_contrib(const int16_t *residual, const int16_t *impulse_resp,
-                            int16_t *buf)
-{
-    int i, j;
-    /* Subtract adaptive CB contribution to obtain the residual */
-    for (i = 0; i < SUBFRAME_LEN; i++) {
-        int64_t temp = buf[i] << 14;
-        for (j = 0; j <= i; j++)
-            temp -= residual[j] * impulse_resp[i - j];
-
-        buf[i] = av_clipl_int32((temp << 2) + (1 << 15)) >> 16;
-    }
-}
-
-/**
- * Quantize the residual signal using the fixed codebook (MP-MLQ).
- *
- * @param optim optimized fixed codebook parameters
- * @param buf   excitation vector
- */
-static void get_fcb_param(FCBParam *optim, int16_t *impulse_resp,
-                          int16_t *buf, int pulse_cnt, int pitch_lag)
-{
-    FCBParam param;
-    int16_t impulse_r[SUBFRAME_LEN];
-    int16_t temp_corr[SUBFRAME_LEN];
-    int16_t impulse_corr[SUBFRAME_LEN];
-
-    int ccr1[SUBFRAME_LEN];
-    int ccr2[SUBFRAME_LEN];
-    int amp, err, max, max_amp_index, min, scale, i, j, k, l;
-
-    int64_t temp;
-
-    /* Update impulse response */
-    memcpy(impulse_r, impulse_resp, sizeof(int16_t) * SUBFRAME_LEN);
-    param.dirac_train = 0;
-    if (pitch_lag < SUBFRAME_LEN - 2) {
-        param.dirac_train = 1;
-        gen_dirac_train(impulse_r, pitch_lag);
-    }
-
-    for (i = 0; i < SUBFRAME_LEN; i++)
-        temp_corr[i] = impulse_r[i] >> 1;
-
-    /* Compute impulse response autocorrelation */
-    temp = dot_product(temp_corr, temp_corr, SUBFRAME_LEN);
-
-    scale = normalize_bits_int32(temp);
-    impulse_corr[0] = av_clipl_int32((temp << scale) + (1 << 15)) >> 16;
-
-    for (i = 1; i < SUBFRAME_LEN; i++) {
-        temp = dot_product(temp_corr + i, temp_corr, SUBFRAME_LEN - i);
-        impulse_corr[i] = av_clipl_int32((temp << scale) + (1 << 15)) >> 16;
-    }
-
-    /* Compute crosscorrelation of impulse response with residual signal */
-    scale -= 4;
-    for (i = 0; i < SUBFRAME_LEN; i++){
-        temp = dot_product(buf + i, impulse_r, SUBFRAME_LEN - i);
-        if (scale < 0)
-            ccr1[i] = temp >> -scale;
-        else
-            ccr1[i] = av_clipl_int32(temp << scale);
-    }
-
-    /* Search loop */
-    for (i = 0; i < GRID_SIZE; i++) {
-        /* Maximize the crosscorrelation */
-        max = 0;
-        for (j = i; j < SUBFRAME_LEN; j += GRID_SIZE) {
-            temp = FFABS(ccr1[j]);
-            if (temp >= max) {
-                max = temp;
-                param.pulse_pos[0] = j;
-            }
-        }
-
-        /* Quantize the gain (max crosscorrelation/impulse_corr[0]) */
-        amp = max;
-        min = 1 << 30;
-        max_amp_index = GAIN_LEVELS - 2;
-        for (j = max_amp_index; j >= 2; j--) {
-            temp = av_clipl_int32((int64_t)fixed_cb_gain[j] *
-                                  impulse_corr[0] << 1);
-            temp = FFABS(temp - amp);
-            if (temp < min) {
-                min = temp;
-                max_amp_index = j;
-            }
-        }
-
-        max_amp_index--;
-        /* Select additional gain values */
-        for (j = 1; j < 5; j++) {
-            for (k = i; k < SUBFRAME_LEN; k += GRID_SIZE) {
-                temp_corr[k] = 0;
-                ccr2[k]      = ccr1[k];
-            }
-            param.amp_index = max_amp_index + j - 2;
-            amp = fixed_cb_gain[param.amp_index];
-
-            param.pulse_sign[0] = (ccr2[param.pulse_pos[0]] < 0) ? -amp : amp;
-            temp_corr[param.pulse_pos[0]] = 1;
-
-            for (k = 1; k < pulse_cnt; k++) {
-                max = -1 << 30;
-                for (l = i; l < SUBFRAME_LEN; l += GRID_SIZE) {
-                    if (temp_corr[l])
-                        continue;
-                    temp = impulse_corr[FFABS(l - param.pulse_pos[k - 1])];
-                    temp = av_clipl_int32((int64_t)temp *
-                                          param.pulse_sign[k - 1] << 1);
-                    ccr2[l] -= temp;
-                    temp = FFABS(ccr2[l]);
-                    if (temp > max) {
-                        max = temp;
-                        param.pulse_pos[k] = l;
-                    }
-                }
-
-                param.pulse_sign[k] = (ccr2[param.pulse_pos[k]] < 0) ?
-                                      -amp : amp;
-                temp_corr[param.pulse_pos[k]] = 1;
-            }
-
-            /* Create the error vector */
-            memset(temp_corr, 0, sizeof(int16_t) * SUBFRAME_LEN);
-
-            for (k = 0; k < pulse_cnt; k++)
-                temp_corr[param.pulse_pos[k]] = param.pulse_sign[k];
-
-            for (k = SUBFRAME_LEN - 1; k >= 0; k--) {
-                temp = 0;
-                for (l = 0; l <= k; l++) {
-                    int prod = av_clipl_int32((int64_t)temp_corr[l] *
-                                              impulse_r[k - l] << 1);
-                    temp     = av_clipl_int32(temp + prod);
-                }
-                temp_corr[k] = temp << 2 >> 16;
-            }
-
-            /* Compute square of error */
-            err = 0;
-            for (k = 0; k < SUBFRAME_LEN; k++) {
-                int64_t prod;
-                prod = av_clipl_int32((int64_t)buf[k] * temp_corr[k] << 1);
-                err  = av_clipl_int32(err - prod);
-                prod = av_clipl_int32((int64_t)temp_corr[k] * temp_corr[k]);
-                err  = av_clipl_int32(err + prod);
-            }
-
-            /* Minimize */
-            if (err < optim->min_err) {
-                optim->min_err     = err;
-                optim->grid_index  = i;
-                optim->amp_index   = param.amp_index;
-                optim->dirac_train = param.dirac_train;
-
-                for (k = 0; k < pulse_cnt; k++) {
-                    optim->pulse_sign[k] = param.pulse_sign[k];
-                    optim->pulse_pos[k]  = param.pulse_pos[k];
-                }
-            }
-        }
-    }
-}
-
-/**
- * Encode the pulse position and gain of the current subframe.
- *
- * @param optim optimized fixed CB parameters
- * @param buf   excitation vector
- */
-static void pack_fcb_param(G723_1_Subframe *subfrm, FCBParam *optim,
-                           int16_t *buf, int pulse_cnt)
-{
-    int i, j;
-
-    j = PULSE_MAX - pulse_cnt;
-
-    subfrm->pulse_sign = 0;
-    subfrm->pulse_pos  = 0;
-
-    for (i = 0; i < SUBFRAME_LEN >> 1; i++) {
-        int val = buf[optim->grid_index + (i << 1)];
-        if (!val) {
-            subfrm->pulse_pos += combinatorial_table[j][i];
-        } else {
-            subfrm->pulse_sign <<= 1;
-            if (val < 0) subfrm->pulse_sign++;
-            j++;
-
-            if (j == PULSE_MAX) break;
-        }
-    }
-    subfrm->amp_index   = optim->amp_index;
-    subfrm->grid_index  = optim->grid_index;
-    subfrm->dirac_train = optim->dirac_train;
-}
-
-/**
- * Compute the fixed codebook excitation.
- *
- * @param buf          target vector
- * @param impulse_resp impulse response of the combined filter
- */
-static void fcb_search(G723_1_Context *p, int16_t *impulse_resp,
-                       int16_t *buf, int index)
-{
-    FCBParam optim;
-    int pulse_cnt = pulses[index];
-    int i;
-
-    optim.min_err = 1 << 30;
-    get_fcb_param(&optim, impulse_resp, buf, pulse_cnt, SUBFRAME_LEN);
-
-    if (p->pitch_lag[index >> 1] < SUBFRAME_LEN - 2) {
-        get_fcb_param(&optim, impulse_resp, buf, pulse_cnt,
-                      p->pitch_lag[index >> 1]);
-    }
-
-    /* Reconstruct the excitation */
-    memset(buf, 0, sizeof(int16_t) * SUBFRAME_LEN);
-    for (i = 0; i < pulse_cnt; i++)
-        buf[optim.pulse_pos[i]] = optim.pulse_sign[i];
-
-    pack_fcb_param(&p->subframe[index], &optim, buf, pulse_cnt);
-
-    if (optim.dirac_train)
-        gen_dirac_train(buf, p->pitch_lag[index >> 1]);
-}
-
-/**
- * Pack the frame parameters into output bitstream.
- *
- * @param frame output buffer
- * @param size  size of the buffer
- */
-static int pack_bitstream(G723_1_Context *p, unsigned char *frame, int size)
-{
-    PutBitContext pb;
-    int info_bits, i, temp;
-
-    init_put_bits(&pb, frame, size);
-
-    if (p->cur_rate == RATE_6300) {
-        info_bits = 0;
-        put_bits(&pb, 2, info_bits);
-    }else
-        av_assert0(0);
-
-    put_bits(&pb, 8, p->lsp_index[2]);
-    put_bits(&pb, 8, p->lsp_index[1]);
-    put_bits(&pb, 8, p->lsp_index[0]);
-
-    put_bits(&pb, 7, p->pitch_lag[0] - PITCH_MIN);
-    put_bits(&pb, 2, p->subframe[1].ad_cb_lag);
-    put_bits(&pb, 7, p->pitch_lag[1] - PITCH_MIN);
-    put_bits(&pb, 2, p->subframe[3].ad_cb_lag);
-
-    /* Write 12 bit combined gain */
-    for (i = 0; i < SUBFRAMES; i++) {
-        temp = p->subframe[i].ad_cb_gain * GAIN_LEVELS +
-               p->subframe[i].amp_index;
-        if (p->cur_rate ==  RATE_6300)
-            temp += p->subframe[i].dirac_train << 11;
-        put_bits(&pb, 12, temp);
-    }
-
-    put_bits(&pb, 1, p->subframe[0].grid_index);
-    put_bits(&pb, 1, p->subframe[1].grid_index);
-    put_bits(&pb, 1, p->subframe[2].grid_index);
-    put_bits(&pb, 1, p->subframe[3].grid_index);
-
-    if (p->cur_rate == RATE_6300) {
-        skip_put_bits(&pb, 1); /* reserved bit */
-
-        /* Write 13 bit combined position index */
-        temp = (p->subframe[0].pulse_pos >> 16) * 810 +
-               (p->subframe[1].pulse_pos >> 14) *  90 +
-               (p->subframe[2].pulse_pos >> 16) *   9 +
-               (p->subframe[3].pulse_pos >> 14);
-        put_bits(&pb, 13, temp);
-
-        put_bits(&pb, 16, p->subframe[0].pulse_pos & 0xffff);
-        put_bits(&pb, 14, p->subframe[1].pulse_pos & 0x3fff);
-        put_bits(&pb, 16, p->subframe[2].pulse_pos & 0xffff);
-        put_bits(&pb, 14, p->subframe[3].pulse_pos & 0x3fff);
-
-        put_bits(&pb, 6, p->subframe[0].pulse_sign);
-        put_bits(&pb, 5, p->subframe[1].pulse_sign);
-        put_bits(&pb, 6, p->subframe[2].pulse_sign);
-        put_bits(&pb, 5, p->subframe[3].pulse_sign);
-    }
-
-    flush_put_bits(&pb);
-    return frame_size[info_bits];
-}
-
-static int g723_1_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
-                            const AVFrame *frame, int *got_packet_ptr)
-{
-    G723_1_Context *p = avctx->priv_data;
-    int16_t unq_lpc[LPC_ORDER * SUBFRAMES];
-    int16_t qnt_lpc[LPC_ORDER * SUBFRAMES];
-    int16_t cur_lsp[LPC_ORDER];
-    int16_t weighted_lpc[LPC_ORDER * SUBFRAMES << 1];
-    int16_t vector[FRAME_LEN + PITCH_MAX];
-    int offset, ret;
-    int16_t *in_orig = av_memdup(frame->data[0], frame->nb_samples * sizeof(int16_t));
-    int16_t *in = in_orig;
-
-    HFParam hf[4];
-    int i, j;
-
-    if (!in)
-        return AVERROR(ENOMEM);
-
-    highpass_filter(in, &p->hpf_fir_mem, &p->hpf_iir_mem);
-
-    memcpy(vector, p->prev_data, HALF_FRAME_LEN * sizeof(int16_t));
-    memcpy(vector + HALF_FRAME_LEN, in, FRAME_LEN * sizeof(int16_t));
-
-    comp_lpc_coeff(vector, unq_lpc);
-    lpc2lsp(&unq_lpc[LPC_ORDER * 3], p->prev_lsp, cur_lsp);
-    lsp_quantize(p->lsp_index, cur_lsp, p->prev_lsp);
-
-    /* Update memory */
-    memcpy(vector + LPC_ORDER, p->prev_data + SUBFRAME_LEN,
-           sizeof(int16_t) * SUBFRAME_LEN);
-    memcpy(vector + LPC_ORDER + SUBFRAME_LEN, in,
-           sizeof(int16_t) * (HALF_FRAME_LEN + SUBFRAME_LEN));
-    memcpy(p->prev_data, in + HALF_FRAME_LEN,
-           sizeof(int16_t) * HALF_FRAME_LEN);
-    memcpy(in, vector + LPC_ORDER, sizeof(int16_t) * FRAME_LEN);
-
-    perceptual_filter(p, weighted_lpc, unq_lpc, vector);
-
-    memcpy(in, vector + LPC_ORDER, sizeof(int16_t) * FRAME_LEN);
-    memcpy(vector, p->prev_weight_sig, sizeof(int16_t) * PITCH_MAX);
-    memcpy(vector + PITCH_MAX, in, sizeof(int16_t) * FRAME_LEN);
-
-    scale_vector(vector, vector, FRAME_LEN + PITCH_MAX);
-
-    p->pitch_lag[0] = estimate_pitch(vector, PITCH_MAX);
-    p->pitch_lag[1] = estimate_pitch(vector, PITCH_MAX + HALF_FRAME_LEN);
-
-    for (i = PITCH_MAX, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
-        comp_harmonic_coeff(vector + i, p->pitch_lag[j >> 1], hf + j);
-
-    memcpy(vector, p->prev_weight_sig, sizeof(int16_t) * PITCH_MAX);
-    memcpy(vector + PITCH_MAX, in, sizeof(int16_t) * FRAME_LEN);
-    memcpy(p->prev_weight_sig, vector + FRAME_LEN, sizeof(int16_t) * PITCH_MAX);
-
-    for (i = 0, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
-        harmonic_filter(hf + j, vector + PITCH_MAX + i, in + i);
-
-    inverse_quant(cur_lsp, p->prev_lsp, p->lsp_index, 0);
-    lsp_interpolate(qnt_lpc, cur_lsp, p->prev_lsp);
-
-    memcpy(p->prev_lsp, cur_lsp, sizeof(int16_t) * LPC_ORDER);
-
-    offset = 0;
-    for (i = 0; i < SUBFRAMES; i++) {
-        int16_t impulse_resp[SUBFRAME_LEN];
-        int16_t residual[SUBFRAME_LEN + PITCH_ORDER - 1];
-        int16_t flt_in[SUBFRAME_LEN];
-        int16_t zero[LPC_ORDER], fir[LPC_ORDER], iir[LPC_ORDER];
-
-        /**
-         * Compute the combined impulse response of the synthesis filter,
-         * formant perceptual weighting filter and harmonic noise shaping filter
-         */
-        memset(zero, 0, sizeof(int16_t) * LPC_ORDER);
-        memset(vector, 0, sizeof(int16_t) * PITCH_MAX);
-        memset(flt_in, 0, sizeof(int16_t) * SUBFRAME_LEN);
-
-        flt_in[0] = 1 << 13; /* Unit impulse */
-        synth_percept_filter(qnt_lpc + offset, weighted_lpc + (offset << 1),
-                             zero, zero, flt_in, vector + PITCH_MAX, 1);
-        harmonic_filter(hf + i, vector + PITCH_MAX, impulse_resp);
-
-         /* Compute the combined zero input response */
-        flt_in[0] = 0;
-        memcpy(fir, p->perf_fir_mem, sizeof(int16_t) * LPC_ORDER);
-        memcpy(iir, p->perf_iir_mem, sizeof(int16_t) * LPC_ORDER);
-
-        synth_percept_filter(qnt_lpc + offset, weighted_lpc + (offset << 1),
-                             fir, iir, flt_in, vector + PITCH_MAX, 0);
-        memcpy(vector, p->harmonic_mem, sizeof(int16_t) * PITCH_MAX);
-        harmonic_noise_sub(hf + i, vector + PITCH_MAX, in);
-
-        acb_search(p, residual, impulse_resp, in, i);
-        gen_acb_excitation(residual, p->prev_excitation,p->pitch_lag[i >> 1],
-                           &p->subframe[i], p->cur_rate);
-        sub_acb_contrib(residual, impulse_resp, in);
-
-        fcb_search(p, impulse_resp, in, i);
-
-        /* Reconstruct the excitation */
-        gen_acb_excitation(impulse_resp, p->prev_excitation, p->pitch_lag[i >> 1],
-                           &p->subframe[i], RATE_6300);
-
-        memmove(p->prev_excitation, p->prev_excitation + SUBFRAME_LEN,
-               sizeof(int16_t) * (PITCH_MAX - SUBFRAME_LEN));
-        for (j = 0; j < SUBFRAME_LEN; j++)
-            in[j] = av_clip_int16((in[j] << 1) + impulse_resp[j]);
-        memcpy(p->prev_excitation + PITCH_MAX - SUBFRAME_LEN, in,
-               sizeof(int16_t) * SUBFRAME_LEN);
-
-        /* Update filter memories */
-        synth_percept_filter(qnt_lpc + offset, weighted_lpc + (offset << 1),
-                             p->perf_fir_mem, p->perf_iir_mem,
-                             in, vector + PITCH_MAX, 0);
-        memmove(p->harmonic_mem, p->harmonic_mem + SUBFRAME_LEN,
-                sizeof(int16_t) * (PITCH_MAX - SUBFRAME_LEN));
-        memcpy(p->harmonic_mem + PITCH_MAX - SUBFRAME_LEN, vector + PITCH_MAX,
-               sizeof(int16_t) * SUBFRAME_LEN);
-
-        in += SUBFRAME_LEN;
-        offset += LPC_ORDER;
-    }
-
-    av_freep(&in_orig); in = NULL;
-
-    if ((ret = ff_alloc_packet2(avctx, avpkt, 24)) < 0)
-        return ret;
-
-    *got_packet_ptr = 1;
-    avpkt->size = pack_bitstream(p, avpkt->data, avpkt->size);
-    return 0;
-}
-
-AVCodec ff_g723_1_encoder = {
-    .name           = "g723_1",
-    .long_name      = NULL_IF_CONFIG_SMALL("G.723.1"),
-    .type           = AVMEDIA_TYPE_AUDIO,
-    .id             = AV_CODEC_ID_G723_1,
-    .priv_data_size = sizeof(G723_1_Context),
-    .init           = g723_1_encode_init,
-    .encode2        = g723_1_encode_frame,
-    .sample_fmts    = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,
-                                                    AV_SAMPLE_FMT_NONE},
-};
-#endif
diff --git a/libavcodec/g723_1_data.h b/libavcodec/g723_1.h
similarity index 95%
rename from libavcodec/g723_1_data.h
rename to libavcodec/g723_1.h
index db7f6e4d..40d6e700 100644
--- a/libavcodec/g723_1_data.h
+++ b/libavcodec/g723_1.h
@@ -1,5 +1,5 @@
 /*
- * G723.1 compatible decoder data tables.
+ * G.723.1 common header and data tables
  * Copyright (c) 2006 Benjamin Larsson
  * Copyright (c) 2010 Mohamed Naufal Basheer
  *
@@ -22,14 +22,16 @@
 
 /**
  * @file
- * G723.1 compatible decoder data tables
+ * G.723.1 types, functions and data tables
  */
 
-#ifndef AVCODEC_G723_1_DATA_H
-#define AVCODEC_G723_1_DATA_H
+#ifndef AVCODEC_G723_1_H
+#define AVCODEC_G723_1_H
 
 #include <stdint.h>
 
+#include "libavutil/log.h"
+
 #define SUBFRAMES       4
 #define SUBFRAME_LEN    60
 #define FRAME_LEN       (SUBFRAME_LEN << 2)
@@ -46,21 +48,31 @@
 #define GAIN_LEVELS     24
 #define COS_TBL_SIZE    512
 
+/**
+ * Bitexact implementation of 2ab scaled by 1/2^16.
+ *
+ * @param a 32 bit multiplicand
+ * @param b 16 bit multiplier
+ */
+#define MULL2(a, b) \
+        ((((a) >> 16) * (b) << 1) + (((a) & 0xffff) * (b) >> 15))
+
 /**
  * G723.1 frame types
  */
-typedef enum FrameType {
+enum FrameType {
     ACTIVE_FRAME,        ///< Active speech
     SID_FRAME,           ///< Silence Insertion Descriptor frame
     UNTRANSMITTED_FRAME
-} FrameType;
-
-static const uint8_t frame_size[4] = { 24, 20, 4, 1 };
+};
 
-typedef enum Rate {
+/**
+ * G723.1 rate values
+ */
+enum Rate {
     RATE_6300,
     RATE_5300
-} Rate;
+};
 
 /**
  * G723.1 unpacked data subframe
@@ -104,6 +116,108 @@ typedef struct FCBParam {
     int pulse_sign[PULSE_MAX];
 } FCBParam;
 
+typedef struct g723_1_context {
+    AVClass *class;
+
+    G723_1_Subframe subframe[4];
+    enum FrameType cur_frame_type;
+    enum FrameType past_frame_type;
+    enum Rate cur_rate;
+    uint8_t lsp_index[LSP_BANDS];
+    int pitch_lag[2];
+    int erased_frames;
+
+    int16_t prev_lsp[LPC_ORDER];
+    int16_t sid_lsp[LPC_ORDER];
+    int16_t prev_excitation[PITCH_MAX];
+    int16_t excitation[PITCH_MAX + FRAME_LEN + 4];
+    int16_t synth_mem[LPC_ORDER];
+    int16_t fir_mem[LPC_ORDER];
+    int     iir_mem[LPC_ORDER];
+
+    int random_seed;
+    int cng_random_seed;
+    int interp_index;
+    int interp_gain;
+    int sid_gain;
+    int cur_gain;
+    int reflection_coef;
+    int pf_gain;                 ///< formant postfilter
+                                 ///< gain scaling unit memory
+    int postfilter;
+
+    int16_t audio[FRAME_LEN + LPC_ORDER + PITCH_MAX + 4];
+
+    /* encoder */
+    int16_t prev_data[HALF_FRAME_LEN];
+    int16_t prev_weight_sig[PITCH_MAX];
+
+    int16_t hpf_fir_mem;                   ///< highpass filter fir
+    int     hpf_iir_mem;                   ///< and iir memories
+    int16_t perf_fir_mem[LPC_ORDER];       ///< perceptual filter fir
+    int16_t perf_iir_mem[LPC_ORDER];       ///< and iir memories
+
+    int16_t harmonic_mem[PITCH_MAX];
+} G723_1_Context;
+
+
+/**
+ * Scale vector contents based on the largest of their absolutes.
+ */
+int ff_g723_1_scale_vector(int16_t *dst, const int16_t *vector, int length);
+
+/**
+ * Calculate the number of left-shifts required for normalizing the input.
+ *
+ * @param num   input number
+ * @param width width of the input, 16 bits(0) / 32 bits(1)
+ */
+int ff_g723_1_normalize_bits(int num, int width);
+
+int ff_g723_1_dot_product(const int16_t *a, const int16_t *b, int length);
+
+/**
+ * Get delayed contribution from the previous excitation vector.
+ */
+void ff_g723_1_get_residual(int16_t *residual, int16_t *prev_excitation,
+                            int lag);
+
+/**
+ * Generate a train of dirac functions with period as pitch lag.
+ */
+void ff_g723_1_gen_dirac_train(int16_t *buf, int pitch_lag);
+
+
+/**
+ * Generate adaptive codebook excitation.
+ */
+void ff_g723_1_gen_acb_excitation(int16_t *vector, int16_t *prev_excitation,
+                                  int pitch_lag, G723_1_Subframe *subfrm,
+                                  enum Rate cur_rate);
+/**
+ * Quantize LSP frequencies by interpolation and convert them to
+ * the corresponding LPC coefficients.
+ *
+ * @param lpc      buffer for LPC coefficients
+ * @param cur_lsp  the current LSP vector
+ * @param prev_lsp the previous LSP vector
+ */
+void ff_g723_1_lsp_interpolate(int16_t *lpc, int16_t *cur_lsp,
+                               int16_t *prev_lsp);
+
+/**
+ * Perform inverse quantization of LSP frequencies.
+ *
+ * @param cur_lsp    the current LSP vector
+ * @param prev_lsp   the previous LSP vector
+ * @param lsp_index  VQ indices
+ * @param bad_frame  bad frame flag
+ */
+void ff_g723_1_inverse_quant(int16_t *cur_lsp, int16_t *prev_lsp,
+                             uint8_t *lsp_index, int bad_frame);
+
+static const uint8_t frame_size[4] = { 24, 20, 4, 1 };
+
 /**
  * Postfilter gain weighting factors scaled by 2^15
  */
@@ -125,10 +239,8 @@ static const int16_t dc_lsp[LPC_ORDER] = {
     0x6c46
 };
 
-/**
- * Cosine table scaled by 2^14
- */
-static const int16_t cos_tab[COS_TBL_SIZE+1] = {
+/* Cosine table scaled by 2^14 */
+static const int16_t cos_tab[COS_TBL_SIZE + 1] = {
     16384,  16383,  16379,  16373,  16364,  16353,  16340,  16324,
     16305,  16284,  16261,  16235,  16207,  16176,  16143,  16107,
     16069,  16029,  15986,  15941,  15893,  15843,  15791,  15736,
@@ -1326,4 +1438,4 @@ static const int cng_filt[4] = { 273, 998, 499, 333 };
 
 static const int cng_bseg[3] = { 2048, 18432, 231233 };
 
-#endif /* AVCODEC_G723_1_DATA_H */
+#endif /* AVCODEC_G723_1_H */
diff --git a/libavcodec/g723_1dec.c b/libavcodec/g723_1dec.c
new file mode 100644
index 00000000..3e8c4897
--- /dev/null
+++ b/libavcodec/g723_1dec.c
@@ -0,0 +1,1027 @@
+/*
+ * G.723.1 compatible decoder
+ * Copyright (c) 2006 Benjamin Larsson
+ * Copyright (c) 2010 Mohamed Naufal Basheer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * G.723.1 compatible decoder
+ */
+
+#define BITSTREAM_READER_LE
+#include "libavutil/channel_layout.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "get_bits.h"
+#include "acelp_vectors.h"
+#include "celp_filters.h"
+#include "celp_math.h"
+#include "g723_1.h"
+#include "internal.h"
+
+#define CNG_RANDOM_SEED 12345
+
+static av_cold int g723_1_decode_init(AVCodecContext *avctx)
+{
+    G723_1_Context *p = avctx->priv_data;
+
+    avctx->channel_layout = AV_CH_LAYOUT_MONO;
+    avctx->sample_fmt     = AV_SAMPLE_FMT_S16;
+    avctx->channels       = 1;
+    p->pf_gain            = 1 << 12;
+
+    memcpy(p->prev_lsp, dc_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
+    memcpy(p->sid_lsp,  dc_lsp, LPC_ORDER * sizeof(*p->sid_lsp));
+
+    p->cng_random_seed = CNG_RANDOM_SEED;
+    p->past_frame_type = SID_FRAME;
+
+    return 0;
+}
+
+/**
+ * Unpack the frame into parameters.
+ *
+ * @param p           the context
+ * @param buf         pointer to the input buffer
+ * @param buf_size    size of the input buffer
+ */
+static int unpack_bitstream(G723_1_Context *p, const uint8_t *buf,
+                            int buf_size)
+{
+    GetBitContext gb;
+    int ad_cb_len;
+    int temp, info_bits, i;
+
+    init_get_bits(&gb, buf, buf_size * 8);
+
+    /* Extract frame type and rate info */
+    info_bits = get_bits(&gb, 2);
+
+    if (info_bits == 3) {
+        p->cur_frame_type = UNTRANSMITTED_FRAME;
+        return 0;
+    }
+
+    /* Extract 24 bit lsp indices, 8 bit for each band */
+    p->lsp_index[2] = get_bits(&gb, 8);
+    p->lsp_index[1] = get_bits(&gb, 8);
+    p->lsp_index[0] = get_bits(&gb, 8);
+
+    if (info_bits == 2) {
+        p->cur_frame_type = SID_FRAME;
+        p->subframe[0].amp_index = get_bits(&gb, 6);
+        return 0;
+    }
+
+    /* Extract the info common to both rates */
+    p->cur_rate       = info_bits ? RATE_5300 : RATE_6300;
+    p->cur_frame_type = ACTIVE_FRAME;
+
+    p->pitch_lag[0] = get_bits(&gb, 7);
+    if (p->pitch_lag[0] > 123)       /* test if forbidden code */
+        return -1;
+    p->pitch_lag[0] += PITCH_MIN;
+    p->subframe[1].ad_cb_lag = get_bits(&gb, 2);
+
+    p->pitch_lag[1] = get_bits(&gb, 7);
+    if (p->pitch_lag[1] > 123)
+        return -1;
+    p->pitch_lag[1] += PITCH_MIN;
+    p->subframe[3].ad_cb_lag = get_bits(&gb, 2);
+    p->subframe[0].ad_cb_lag = 1;
+    p->subframe[2].ad_cb_lag = 1;
+
+    for (i = 0; i < SUBFRAMES; i++) {
+        /* Extract combined gain */
+        temp = get_bits(&gb, 12);
+        ad_cb_len = 170;
+        p->subframe[i].dirac_train = 0;
+        if (p->cur_rate == RATE_6300 && p->pitch_lag[i >> 1] < SUBFRAME_LEN - 2) {
+            p->subframe[i].dirac_train = temp >> 11;
+            temp &= 0x7FF;
+            ad_cb_len = 85;
+        }
+        p->subframe[i].ad_cb_gain = FASTDIV(temp, GAIN_LEVELS);
+        if (p->subframe[i].ad_cb_gain < ad_cb_len) {
+            p->subframe[i].amp_index = temp - p->subframe[i].ad_cb_gain *
+                                       GAIN_LEVELS;
+        } else {
+            return -1;
+        }
+    }
+
+    p->subframe[0].grid_index = get_bits1(&gb);
+    p->subframe[1].grid_index = get_bits1(&gb);
+    p->subframe[2].grid_index = get_bits1(&gb);
+    p->subframe[3].grid_index = get_bits1(&gb);
+
+    if (p->cur_rate == RATE_6300) {
+        skip_bits1(&gb);  /* skip reserved bit */
+
+        /* Compute pulse_pos index using the 13-bit combined position index */
+        temp = get_bits(&gb, 13);
+        p->subframe[0].pulse_pos = temp / 810;
+
+        temp -= p->subframe[0].pulse_pos * 810;
+        p->subframe[1].pulse_pos = FASTDIV(temp, 90);
+
+        temp -= p->subframe[1].pulse_pos * 90;
+        p->subframe[2].pulse_pos = FASTDIV(temp, 9);
+        p->subframe[3].pulse_pos = temp - p->subframe[2].pulse_pos * 9;
+
+        p->subframe[0].pulse_pos = (p->subframe[0].pulse_pos << 16) +
+                                   get_bits(&gb, 16);
+        p->subframe[1].pulse_pos = (p->subframe[1].pulse_pos << 14) +
+                                   get_bits(&gb, 14);
+        p->subframe[2].pulse_pos = (p->subframe[2].pulse_pos << 16) +
+                                   get_bits(&gb, 16);
+        p->subframe[3].pulse_pos = (p->subframe[3].pulse_pos << 14) +
+                                   get_bits(&gb, 14);
+
+        p->subframe[0].pulse_sign = get_bits(&gb, 6);
+        p->subframe[1].pulse_sign = get_bits(&gb, 5);
+        p->subframe[2].pulse_sign = get_bits(&gb, 6);
+        p->subframe[3].pulse_sign = get_bits(&gb, 5);
+    } else { /* 5300 bps */
+        p->subframe[0].pulse_pos  = get_bits(&gb, 12);
+        p->subframe[1].pulse_pos  = get_bits(&gb, 12);
+        p->subframe[2].pulse_pos  = get_bits(&gb, 12);
+        p->subframe[3].pulse_pos  = get_bits(&gb, 12);
+
+        p->subframe[0].pulse_sign = get_bits(&gb, 4);
+        p->subframe[1].pulse_sign = get_bits(&gb, 4);
+        p->subframe[2].pulse_sign = get_bits(&gb, 4);
+        p->subframe[3].pulse_sign = get_bits(&gb, 4);
+    }
+
+    return 0;
+}
+
+/**
+ * Bitexact implementation of sqrt(val/2).
+ */
+static int16_t square_root(unsigned val)
+{
+    av_assert2(!(val & 0x80000000));
+
+    return (ff_sqrt(val << 1) >> 1) & (~1);
+}
+
+/**
+ * Generate fixed codebook excitation vector.
+ *
+ * @param vector    decoded excitation vector
+ * @param subfrm    current subframe
+ * @param cur_rate  current bitrate
+ * @param pitch_lag closed loop pitch lag
+ * @param index     current subframe index
+ */
+static void gen_fcb_excitation(int16_t *vector, G723_1_Subframe *subfrm,
+                               enum Rate cur_rate, int pitch_lag, int index)
+{
+    int temp, i, j;
+
+    memset(vector, 0, SUBFRAME_LEN * sizeof(*vector));
+
+    if (cur_rate == RATE_6300) {
+        if (subfrm->pulse_pos >= max_pos[index])
+            return;
+
+        /* Decode amplitudes and positions */
+        j = PULSE_MAX - pulses[index];
+        temp = subfrm->pulse_pos;
+        for (i = 0; i < SUBFRAME_LEN / GRID_SIZE; i++) {
+            temp -= combinatorial_table[j][i];
+            if (temp >= 0)
+                continue;
+            temp += combinatorial_table[j++][i];
+            if (subfrm->pulse_sign & (1 << (PULSE_MAX - j))) {
+                vector[subfrm->grid_index + GRID_SIZE * i] =
+                                        -fixed_cb_gain[subfrm->amp_index];
+            } else {
+                vector[subfrm->grid_index + GRID_SIZE * i] =
+                                         fixed_cb_gain[subfrm->amp_index];
+            }
+            if (j == PULSE_MAX)
+                break;
+        }
+        if (subfrm->dirac_train == 1)
+            ff_g723_1_gen_dirac_train(vector, pitch_lag);
+    } else { /* 5300 bps */
+        int cb_gain  = fixed_cb_gain[subfrm->amp_index];
+        int cb_shift = subfrm->grid_index;
+        int cb_sign  = subfrm->pulse_sign;
+        int cb_pos   = subfrm->pulse_pos;
+        int offset, beta, lag;
+
+        for (i = 0; i < 8; i += 2) {
+            offset         = ((cb_pos & 7) << 3) + cb_shift + i;
+            vector[offset] = (cb_sign & 1) ? cb_gain : -cb_gain;
+            cb_pos  >>= 3;
+            cb_sign >>= 1;
+        }
+
+        /* Enhance harmonic components */
+        lag  = pitch_contrib[subfrm->ad_cb_gain << 1] + pitch_lag +
+               subfrm->ad_cb_lag - 1;
+        beta = pitch_contrib[(subfrm->ad_cb_gain << 1) + 1];
+
+        if (lag < SUBFRAME_LEN - 2) {
+            for (i = lag; i < SUBFRAME_LEN; i++)
+                vector[i] += beta * vector[i - lag] >> 15;
+        }
+    }
+}
+
+/**
+ * Estimate maximum auto-correlation around pitch lag.
+ *
+ * @param buf       buffer with offset applied
+ * @param offset    offset of the excitation vector
+ * @param ccr_max   pointer to the maximum auto-correlation
+ * @param pitch_lag decoded pitch lag
+ * @param length    length of autocorrelation
+ * @param dir       forward lag(1) / backward lag(-1)
+ */
+static int autocorr_max(const int16_t *buf, int offset, int *ccr_max,
+                        int pitch_lag, int length, int dir)
+{
+    int limit, ccr, lag = 0;
+    int i;
+
+    pitch_lag = FFMIN(PITCH_MAX - 3, pitch_lag);
+    if (dir > 0)
+        limit = FFMIN(FRAME_LEN + PITCH_MAX - offset - length, pitch_lag + 3);
+    else
+        limit = pitch_lag + 3;
+
+    for (i = pitch_lag - 3; i <= limit; i++) {
+        ccr = ff_g723_1_dot_product(buf, buf + dir * i, length);
+
+        if (ccr > *ccr_max) {
+            *ccr_max = ccr;
+            lag = i;
+        }
+    }
+    return lag;
+}
+
+/**
+ * Calculate pitch postfilter optimal and scaling gains.
+ *
+ * @param lag      pitch postfilter forward/backward lag
+ * @param ppf      pitch postfilter parameters
+ * @param cur_rate current bitrate
+ * @param tgt_eng  target energy
+ * @param ccr      cross-correlation
+ * @param res_eng  residual energy
+ */
+static void comp_ppf_gains(int lag, PPFParam *ppf, enum Rate cur_rate,
+                           int tgt_eng, int ccr, int res_eng)
+{
+    int pf_residual;     /* square of postfiltered residual */
+    int temp1, temp2;
+
+    ppf->index = lag;
+
+    temp1 = tgt_eng * res_eng >> 1;
+    temp2 = ccr * ccr << 1;
+
+    if (temp2 > temp1) {
+        if (ccr >= res_eng) {
+            ppf->opt_gain = ppf_gain_weight[cur_rate];
+        } else {
+            ppf->opt_gain = (ccr << 15) / res_eng *
+                            ppf_gain_weight[cur_rate] >> 15;
+        }
+        /* pf_res^2 = tgt_eng + 2*ccr*gain + res_eng*gain^2 */
+        temp1       = (tgt_eng << 15) + (ccr * ppf->opt_gain << 1);
+        temp2       = (ppf->opt_gain * ppf->opt_gain >> 15) * res_eng;
+        pf_residual = av_sat_add32(temp1, temp2 + (1 << 15)) >> 16;
+
+        if (tgt_eng >= pf_residual << 1) {
+            temp1 = 0x7fff;
+        } else {
+            temp1 = (tgt_eng << 14) / pf_residual;
+        }
+
+        /* scaling_gain = sqrt(tgt_eng/pf_res^2) */
+        ppf->sc_gain = square_root(temp1 << 16);
+    } else {
+        ppf->opt_gain = 0;
+        ppf->sc_gain  = 0x7fff;
+    }
+
+    ppf->opt_gain = av_clip_int16(ppf->opt_gain * ppf->sc_gain >> 15);
+}
+
+/**
+ * Calculate pitch postfilter parameters.
+ *
+ * @param p         the context
+ * @param offset    offset of the excitation vector
+ * @param pitch_lag decoded pitch lag
+ * @param ppf       pitch postfilter parameters
+ * @param cur_rate  current bitrate
+ */
+static void comp_ppf_coeff(G723_1_Context *p, int offset, int pitch_lag,
+                           PPFParam *ppf, enum Rate cur_rate)
+{
+
+    int16_t scale;
+    int i;
+    int temp1, temp2;
+
+    /*
+     * 0 - target energy
+     * 1 - forward cross-correlation
+     * 2 - forward residual energy
+     * 3 - backward cross-correlation
+     * 4 - backward residual energy
+     */
+    int energy[5] = {0, 0, 0, 0, 0};
+    int16_t *buf  = p->audio + LPC_ORDER + offset;
+    int fwd_lag   = autocorr_max(buf, offset, &energy[1], pitch_lag,
+                                 SUBFRAME_LEN, 1);
+    int back_lag  = autocorr_max(buf, offset, &energy[3], pitch_lag,
+                                 SUBFRAME_LEN, -1);
+
+    ppf->index    = 0;
+    ppf->opt_gain = 0;
+    ppf->sc_gain  = 0x7fff;
+
+    /* Case 0, Section 3.6 */
+    if (!back_lag && !fwd_lag)
+        return;
+
+    /* Compute target energy */
+    energy[0] = ff_g723_1_dot_product(buf, buf, SUBFRAME_LEN);
+
+    /* Compute forward residual energy */
+    if (fwd_lag)
+        energy[2] = ff_g723_1_dot_product(buf + fwd_lag, buf + fwd_lag,
+                                          SUBFRAME_LEN);
+
+    /* Compute backward residual energy */
+    if (back_lag)
+        energy[4] = ff_g723_1_dot_product(buf - back_lag, buf - back_lag,
+                                          SUBFRAME_LEN);
+
+    /* Normalize and shorten */
+    temp1 = 0;
+    for (i = 0; i < 5; i++)
+        temp1 = FFMAX(energy[i], temp1);
+
+    scale = ff_g723_1_normalize_bits(temp1, 31);
+    for (i = 0; i < 5; i++)
+        energy[i] = (energy[i] << scale) >> 16;
+
+    if (fwd_lag && !back_lag) {  /* Case 1 */
+        comp_ppf_gains(fwd_lag,  ppf, cur_rate, energy[0], energy[1],
+                       energy[2]);
+    } else if (!fwd_lag) {       /* Case 2 */
+        comp_ppf_gains(-back_lag, ppf, cur_rate, energy[0], energy[3],
+                       energy[4]);
+    } else {                     /* Case 3 */
+
+        /*
+         * Select the largest of energy[1]^2/energy[2]
+         * and energy[3]^2/energy[4]
+         */
+        temp1 = energy[4] * ((energy[1] * energy[1] + (1 << 14)) >> 15);
+        temp2 = energy[2] * ((energy[3] * energy[3] + (1 << 14)) >> 15);
+        if (temp1 >= temp2) {
+            comp_ppf_gains(fwd_lag, ppf, cur_rate, energy[0], energy[1],
+                           energy[2]);
+        } else {
+            comp_ppf_gains(-back_lag, ppf, cur_rate, energy[0], energy[3],
+                           energy[4]);
+        }
+    }
+}
+
+/**
+ * Classify frames as voiced/unvoiced.
+ *
+ * @param p         the context
+ * @param pitch_lag decoded pitch_lag
+ * @param exc_eng   excitation energy estimation
+ * @param scale     scaling factor of exc_eng
+ *
+ * @return residual interpolation index if voiced, 0 otherwise
+ */
+static int comp_interp_index(G723_1_Context *p, int pitch_lag,
+                             int *exc_eng, int *scale)
+{
+    int offset = PITCH_MAX + 2 * SUBFRAME_LEN;
+    int16_t *buf = p->audio + LPC_ORDER;
+
+    int index, ccr, tgt_eng, best_eng, temp;
+
+    *scale = ff_g723_1_scale_vector(buf, p->excitation, FRAME_LEN + PITCH_MAX);
+    buf   += offset;
+
+    /* Compute maximum backward cross-correlation */
+    ccr   = 0;
+    index = autocorr_max(buf, offset, &ccr, pitch_lag, SUBFRAME_LEN * 2, -1);
+    ccr   = av_sat_add32(ccr, 1 << 15) >> 16;
+
+    /* Compute target energy */
+    tgt_eng  = ff_g723_1_dot_product(buf, buf, SUBFRAME_LEN * 2);
+    *exc_eng = av_sat_add32(tgt_eng, 1 << 15) >> 16;
+
+    if (ccr <= 0)
+        return 0;
+
+    /* Compute best energy */
+    best_eng = ff_g723_1_dot_product(buf - index, buf - index,
+                                     SUBFRAME_LEN * 2);
+    best_eng = av_sat_add32(best_eng, 1 << 15) >> 16;
+
+    temp = best_eng * *exc_eng >> 3;
+
+    if (temp < ccr * ccr) {
+        return index;
+    } else
+        return 0;
+}
+
+/**
+ * Peform residual interpolation based on frame classification.
+ *
+ * @param buf   decoded excitation vector
+ * @param out   output vector
+ * @param lag   decoded pitch lag
+ * @param gain  interpolated gain
+ * @param rseed seed for random number generator
+ */
+static void residual_interp(int16_t *buf, int16_t *out, int lag,
+                            int gain, int *rseed)
+{
+    int i;
+    if (lag) { /* Voiced */
+        int16_t *vector_ptr = buf + PITCH_MAX;
+        /* Attenuate */
+        for (i = 0; i < lag; i++)
+            out[i] = vector_ptr[i - lag] * 3 >> 2;
+        av_memcpy_backptr((uint8_t*)(out + lag), lag * sizeof(*out),
+                          (FRAME_LEN - lag) * sizeof(*out));
+    } else {  /* Unvoiced */
+        for (i = 0; i < FRAME_LEN; i++) {
+            *rseed = *rseed * 521 + 259;
+            out[i] = gain * *rseed >> 15;
+        }
+        memset(buf, 0, (FRAME_LEN + PITCH_MAX) * sizeof(*buf));
+    }
+}
+
+/**
+ * Perform IIR filtering.
+ *
+ * @param fir_coef FIR coefficients
+ * @param iir_coef IIR coefficients
+ * @param src      source vector
+ * @param dest     destination vector
+ * @param width    width of the output, 16 bits(0) / 32 bits(1)
+ */
+#define iir_filter(fir_coef, iir_coef, src, dest, width)\
+{\
+    int m, n;\
+    int res_shift = 16 & ~-(width);\
+    int in_shift  = 16 - res_shift;\
+\
+    for (m = 0; m < SUBFRAME_LEN; m++) {\
+        int64_t filter = 0;\
+        for (n = 1; n <= LPC_ORDER; n++) {\
+            filter -= (fir_coef)[n - 1] * (src)[m - n] -\
+                      (iir_coef)[n - 1] * ((dest)[m - n] >> in_shift);\
+        }\
+\
+        (dest)[m] = av_clipl_int32(((src)[m] << 16) + (filter << 3) +\
+                                   (1 << 15)) >> res_shift;\
+    }\
+}
+
+/**
+ * Adjust gain of postfiltered signal.
+ *
+ * @param p      the context
+ * @param buf    postfiltered output vector
+ * @param energy input energy coefficient
+ */
+static void gain_scale(G723_1_Context *p, int16_t * buf, int energy)
+{
+    int num, denom, gain, bits1, bits2;
+    int i;
+
+    num   = energy;
+    denom = 0;
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        int temp = buf[i] >> 2;
+        temp *= temp;
+        denom = av_sat_dadd32(denom, temp);
+    }
+
+    if (num && denom) {
+        bits1   = ff_g723_1_normalize_bits(num,   31);
+        bits2   = ff_g723_1_normalize_bits(denom, 31);
+        num     = num << bits1 >> 1;
+        denom <<= bits2;
+
+        bits2 = 5 + bits1 - bits2;
+        bits2 = FFMAX(0, bits2);
+
+        gain = (num >> 1) / (denom >> 16);
+        gain = square_root(gain << 16 >> bits2);
+    } else {
+        gain = 1 << 12;
+    }
+
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        p->pf_gain = (15 * p->pf_gain + gain + (1 << 3)) >> 4;
+        buf[i]     = av_clip_int16((buf[i] * (p->pf_gain + (p->pf_gain >> 4)) +
+                                   (1 << 10)) >> 11);
+    }
+}
+
+/**
+ * Perform formant filtering.
+ *
+ * @param p   the context
+ * @param lpc quantized lpc coefficients
+ * @param buf input buffer
+ * @param dst output buffer
+ */
+static void formant_postfilter(G723_1_Context *p, int16_t *lpc,
+                               int16_t *buf, int16_t *dst)
+{
+    int16_t filter_coef[2][LPC_ORDER];
+    int filter_signal[LPC_ORDER + FRAME_LEN], *signal_ptr;
+    int i, j, k;
+
+    memcpy(buf, p->fir_mem, LPC_ORDER * sizeof(*buf));
+    memcpy(filter_signal, p->iir_mem, LPC_ORDER * sizeof(*filter_signal));
+
+    for (i = LPC_ORDER, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) {
+        for (k = 0; k < LPC_ORDER; k++) {
+            filter_coef[0][k] = (-lpc[k] * postfilter_tbl[0][k] +
+                                 (1 << 14)) >> 15;
+            filter_coef[1][k] = (-lpc[k] * postfilter_tbl[1][k] +
+                                 (1 << 14)) >> 15;
+        }
+        iir_filter(filter_coef[0], filter_coef[1], buf + i, filter_signal + i, 1);
+        lpc += LPC_ORDER;
+    }
+
+    memcpy(p->fir_mem, buf + FRAME_LEN, LPC_ORDER * sizeof(int16_t));
+    memcpy(p->iir_mem, filter_signal + FRAME_LEN, LPC_ORDER * sizeof(int));
+
+    buf += LPC_ORDER;
+    signal_ptr = filter_signal + LPC_ORDER;
+    for (i = 0; i < SUBFRAMES; i++) {
+        int temp;
+        int auto_corr[2];
+        int scale, energy;
+
+        /* Normalize */
+        scale = ff_g723_1_scale_vector(dst, buf, SUBFRAME_LEN);
+
+        /* Compute auto correlation coefficients */
+        auto_corr[0] = ff_g723_1_dot_product(dst, dst + 1, SUBFRAME_LEN - 1);
+        auto_corr[1] = ff_g723_1_dot_product(dst, dst,     SUBFRAME_LEN);
+
+        /* Compute reflection coefficient */
+        temp = auto_corr[1] >> 16;
+        if (temp) {
+            temp = (auto_corr[0] >> 2) / temp;
+        }
+        p->reflection_coef = (3 * p->reflection_coef + temp + 2) >> 2;
+        temp = -p->reflection_coef >> 1 & ~3;
+
+        /* Compensation filter */
+        for (j = 0; j < SUBFRAME_LEN; j++) {
+            dst[j] = av_sat_dadd32(signal_ptr[j],
+                                   (signal_ptr[j - 1] >> 16) * temp) >> 16;
+        }
+
+        /* Compute normalized signal energy */
+        temp = 2 * scale + 4;
+        if (temp < 0) {
+            energy = av_clipl_int32((int64_t)auto_corr[1] << -temp);
+        } else
+            energy = auto_corr[1] >> temp;
+
+        gain_scale(p, dst, energy);
+
+        buf        += SUBFRAME_LEN;
+        signal_ptr += SUBFRAME_LEN;
+        dst        += SUBFRAME_LEN;
+    }
+}
+
+static int sid_gain_to_lsp_index(int gain)
+{
+    if (gain < 0x10)
+        return gain << 6;
+    else if (gain < 0x20)
+        return gain - 8 << 7;
+    else
+        return gain - 20 << 8;
+}
+
+static inline int cng_rand(int *state, int base)
+{
+    *state = (*state * 521 + 259) & 0xFFFF;
+    return (*state & 0x7FFF) * base >> 15;
+}
+
+static int estimate_sid_gain(G723_1_Context *p)
+{
+    int i, shift, seg, seg2, t, val, val_add, x, y;
+
+    shift = 16 - p->cur_gain * 2;
+    if (shift > 0)
+        t = p->sid_gain << shift;
+    else
+        t = p->sid_gain >> -shift;
+    x = t * cng_filt[0] >> 16;
+
+    if (x >= cng_bseg[2])
+        return 0x3F;
+
+    if (x >= cng_bseg[1]) {
+        shift = 4;
+        seg   = 3;
+    } else {
+        shift = 3;
+        seg   = (x >= cng_bseg[0]);
+    }
+    seg2 = FFMIN(seg, 3);
+
+    val     = 1 << shift;
+    val_add = val >> 1;
+    for (i = 0; i < shift; i++) {
+        t = seg * 32 + (val << seg2);
+        t *= t;
+        if (x >= t)
+            val += val_add;
+        else
+            val -= val_add;
+        val_add >>= 1;
+    }
+
+    t = seg * 32 + (val << seg2);
+    y = t * t - x;
+    if (y <= 0) {
+        t = seg * 32 + (val + 1 << seg2);
+        t = t * t - x;
+        val = (seg2 - 1 << 4) + val;
+        if (t >= y)
+            val++;
+    } else {
+        t = seg * 32 + (val - 1 << seg2);
+        t = t * t - x;
+        val = (seg2 - 1 << 4) + val;
+        if (t >= y)
+            val--;
+    }
+
+    return val;
+}
+
+static void generate_noise(G723_1_Context *p)
+{
+    int i, j, idx, t;
+    int off[SUBFRAMES];
+    int signs[SUBFRAMES / 2 * 11], pos[SUBFRAMES / 2 * 11];
+    int tmp[SUBFRAME_LEN * 2];
+    int16_t *vector_ptr;
+    int64_t sum;
+    int b0, c, delta, x, shift;
+
+    p->pitch_lag[0] = cng_rand(&p->cng_random_seed, 21) + 123;
+    p->pitch_lag[1] = cng_rand(&p->cng_random_seed, 19) + 123;
+
+    for (i = 0; i < SUBFRAMES; i++) {
+        p->subframe[i].ad_cb_gain = cng_rand(&p->cng_random_seed, 50) + 1;
+        p->subframe[i].ad_cb_lag  = cng_adaptive_cb_lag[i];
+    }
+
+    for (i = 0; i < SUBFRAMES / 2; i++) {
+        t = cng_rand(&p->cng_random_seed, 1 << 13);
+        off[i * 2]     =   t       & 1;
+        off[i * 2 + 1] = ((t >> 1) & 1) + SUBFRAME_LEN;
+        t >>= 2;
+        for (j = 0; j < 11; j++) {
+            signs[i * 11 + j] = (t & 1) * 2 - 1 << 14;
+            t >>= 1;
+        }
+    }
+
+    idx = 0;
+    for (i = 0; i < SUBFRAMES; i++) {
+        for (j = 0; j < SUBFRAME_LEN / 2; j++)
+            tmp[j] = j;
+        t = SUBFRAME_LEN / 2;
+        for (j = 0; j < pulses[i]; j++, idx++) {
+            int idx2 = cng_rand(&p->cng_random_seed, t);
+
+            pos[idx]  = tmp[idx2] * 2 + off[i];
+            tmp[idx2] = tmp[--t];
+        }
+    }
+
+    vector_ptr = p->audio + LPC_ORDER;
+    memcpy(vector_ptr, p->prev_excitation,
+           PITCH_MAX * sizeof(*p->excitation));
+    for (i = 0; i < SUBFRAMES; i += 2) {
+        ff_g723_1_gen_acb_excitation(vector_ptr, vector_ptr,
+                                     p->pitch_lag[i >> 1], &p->subframe[i],
+                                     p->cur_rate);
+        ff_g723_1_gen_acb_excitation(vector_ptr + SUBFRAME_LEN,
+                                     vector_ptr + SUBFRAME_LEN,
+                                     p->pitch_lag[i >> 1], &p->subframe[i + 1],
+                                     p->cur_rate);
+
+        t = 0;
+        for (j = 0; j < SUBFRAME_LEN * 2; j++)
+            t |= FFABS(vector_ptr[j]);
+        t = FFMIN(t, 0x7FFF);
+        if (!t) {
+            shift = 0;
+        } else {
+            shift = -10 + av_log2(t);
+            if (shift < -2)
+                shift = -2;
+        }
+        sum = 0;
+        if (shift < 0) {
+           for (j = 0; j < SUBFRAME_LEN * 2; j++) {
+               t      = vector_ptr[j] << -shift;
+               sum   += t * t;
+               tmp[j] = t;
+           }
+        } else {
+           for (j = 0; j < SUBFRAME_LEN * 2; j++) {
+               t      = vector_ptr[j] >> shift;
+               sum   += t * t;
+               tmp[j] = t;
+           }
+        }
+
+        b0 = 0;
+        for (j = 0; j < 11; j++)
+            b0 += tmp[pos[(i / 2) * 11 + j]] * signs[(i / 2) * 11 + j];
+        b0 = b0 * 2 * 2979LL + (1 << 29) >> 30; // approximated division by 11
+
+        c = p->cur_gain * (p->cur_gain * SUBFRAME_LEN >> 5);
+        if (shift * 2 + 3 >= 0)
+            c >>= shift * 2 + 3;
+        else
+            c <<= -(shift * 2 + 3);
+        c = (av_clipl_int32(sum << 1) - c) * 2979LL >> 15;
+
+        delta = b0 * b0 * 2 - c;
+        if (delta <= 0) {
+            x = -b0;
+        } else {
+            delta = square_root(delta);
+            x     = delta - b0;
+            t     = delta + b0;
+            if (FFABS(t) < FFABS(x))
+                x = -t;
+        }
+        shift++;
+        if (shift < 0)
+           x >>= -shift;
+        else
+           x <<= shift;
+        x = av_clip(x, -10000, 10000);
+
+        for (j = 0; j < 11; j++) {
+            idx = (i / 2) * 11 + j;
+            vector_ptr[pos[idx]] = av_clip_int16(vector_ptr[pos[idx]] +
+                                                 (x * signs[idx] >> 15));
+        }
+
+        /* copy decoded data to serve as a history for the next decoded subframes */
+        memcpy(vector_ptr + PITCH_MAX, vector_ptr,
+               sizeof(*vector_ptr) * SUBFRAME_LEN * 2);
+        vector_ptr += SUBFRAME_LEN * 2;
+    }
+    /* Save the excitation for the next frame */
+    memcpy(p->prev_excitation, p->audio + LPC_ORDER + FRAME_LEN,
+           PITCH_MAX * sizeof(*p->excitation));
+}
+
+static int g723_1_decode_frame(AVCodecContext *avctx, void *data,
+                               int *got_frame_ptr, AVPacket *avpkt)
+{
+    G723_1_Context *p  = avctx->priv_data;
+    AVFrame *frame     = data;
+    const uint8_t *buf = avpkt->data;
+    int buf_size       = avpkt->size;
+    int dec_mode       = buf[0] & 3;
+
+    PPFParam ppf[SUBFRAMES];
+    int16_t cur_lsp[LPC_ORDER];
+    int16_t lpc[SUBFRAMES * LPC_ORDER];
+    int16_t acb_vector[SUBFRAME_LEN];
+    int16_t *out;
+    int bad_frame = 0, i, j, ret;
+    int16_t *audio = p->audio;
+
+    if (buf_size < frame_size[dec_mode]) {
+        if (buf_size)
+            av_log(avctx, AV_LOG_WARNING,
+                   "Expected %d bytes, got %d - skipping packet\n",
+                   frame_size[dec_mode], buf_size);
+        *got_frame_ptr = 0;
+        return buf_size;
+    }
+
+    if (unpack_bitstream(p, buf, buf_size) < 0) {
+        bad_frame = 1;
+        if (p->past_frame_type == ACTIVE_FRAME)
+            p->cur_frame_type = ACTIVE_FRAME;
+        else
+            p->cur_frame_type = UNTRANSMITTED_FRAME;
+    }
+
+    frame->nb_samples = FRAME_LEN;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    out = (int16_t *)frame->data[0];
+
+    if (p->cur_frame_type == ACTIVE_FRAME) {
+        if (!bad_frame)
+            p->erased_frames = 0;
+        else if (p->erased_frames != 3)
+            p->erased_frames++;
+
+        ff_g723_1_inverse_quant(cur_lsp, p->prev_lsp, p->lsp_index, bad_frame);
+        ff_g723_1_lsp_interpolate(lpc, cur_lsp, p->prev_lsp);
+
+        /* Save the lsp_vector for the next frame */
+        memcpy(p->prev_lsp, cur_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
+
+        /* Generate the excitation for the frame */
+        memcpy(p->excitation, p->prev_excitation,
+               PITCH_MAX * sizeof(*p->excitation));
+        if (!p->erased_frames) {
+            int16_t *vector_ptr = p->excitation + PITCH_MAX;
+
+            /* Update interpolation gain memory */
+            p->interp_gain = fixed_cb_gain[(p->subframe[2].amp_index +
+                                            p->subframe[3].amp_index) >> 1];
+            for (i = 0; i < SUBFRAMES; i++) {
+                gen_fcb_excitation(vector_ptr, &p->subframe[i], p->cur_rate,
+                                   p->pitch_lag[i >> 1], i);
+                ff_g723_1_gen_acb_excitation(acb_vector,
+                                             &p->excitation[SUBFRAME_LEN * i],
+                                             p->pitch_lag[i >> 1],
+                                             &p->subframe[i], p->cur_rate);
+                /* Get the total excitation */
+                for (j = 0; j < SUBFRAME_LEN; j++) {
+                    int v = av_clip_int16(vector_ptr[j] << 1);
+                    vector_ptr[j] = av_clip_int16(v + acb_vector[j]);
+                }
+                vector_ptr += SUBFRAME_LEN;
+            }
+
+            vector_ptr = p->excitation + PITCH_MAX;
+
+            p->interp_index = comp_interp_index(p, p->pitch_lag[1],
+                                                &p->sid_gain, &p->cur_gain);
+
+            /* Peform pitch postfiltering */
+            if (p->postfilter) {
+                i = PITCH_MAX;
+                for (j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
+                    comp_ppf_coeff(p, i, p->pitch_lag[j >> 1],
+                                   ppf + j, p->cur_rate);
+
+                for (i = 0, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
+                    ff_acelp_weighted_vector_sum(p->audio + LPC_ORDER + i,
+                                                 vector_ptr + i,
+                                                 vector_ptr + i + ppf[j].index,
+                                                 ppf[j].sc_gain,
+                                                 ppf[j].opt_gain,
+                                                 1 << 14, 15, SUBFRAME_LEN);
+            } else {
+                audio = vector_ptr - LPC_ORDER;
+            }
+
+            /* Save the excitation for the next frame */
+            memcpy(p->prev_excitation, p->excitation + FRAME_LEN,
+                   PITCH_MAX * sizeof(*p->excitation));
+        } else {
+            p->interp_gain = (p->interp_gain * 3 + 2) >> 2;
+            if (p->erased_frames == 3) {
+                /* Mute output */
+                memset(p->excitation, 0,
+                       (FRAME_LEN + PITCH_MAX) * sizeof(*p->excitation));
+                memset(p->prev_excitation, 0,
+                       PITCH_MAX * sizeof(*p->excitation));
+                memset(frame->data[0], 0,
+                       (FRAME_LEN + LPC_ORDER) * sizeof(int16_t));
+            } else {
+                int16_t *buf = p->audio + LPC_ORDER;
+
+                /* Regenerate frame */
+                residual_interp(p->excitation, buf, p->interp_index,
+                                p->interp_gain, &p->random_seed);
+
+                /* Save the excitation for the next frame */
+                memcpy(p->prev_excitation, buf + (FRAME_LEN - PITCH_MAX),
+                       PITCH_MAX * sizeof(*p->excitation));
+            }
+        }
+        p->cng_random_seed = CNG_RANDOM_SEED;
+    } else {
+        if (p->cur_frame_type == SID_FRAME) {
+            p->sid_gain = sid_gain_to_lsp_index(p->subframe[0].amp_index);
+            ff_g723_1_inverse_quant(p->sid_lsp, p->prev_lsp, p->lsp_index, 0);
+        } else if (p->past_frame_type == ACTIVE_FRAME) {
+            p->sid_gain = estimate_sid_gain(p);
+        }
+
+        if (p->past_frame_type == ACTIVE_FRAME)
+            p->cur_gain = p->sid_gain;
+        else
+            p->cur_gain = (p->cur_gain * 7 + p->sid_gain) >> 3;
+        generate_noise(p);
+        ff_g723_1_lsp_interpolate(lpc, p->sid_lsp, p->prev_lsp);
+        /* Save the lsp_vector for the next frame */
+        memcpy(p->prev_lsp, p->sid_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
+    }
+
+    p->past_frame_type = p->cur_frame_type;
+
+    memcpy(p->audio, p->synth_mem, LPC_ORDER * sizeof(*p->audio));
+    for (i = LPC_ORDER, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
+        ff_celp_lp_synthesis_filter(p->audio + i, &lpc[j * LPC_ORDER],
+                                    audio + i, SUBFRAME_LEN, LPC_ORDER,
+                                    0, 1, 1 << 12);
+    memcpy(p->synth_mem, p->audio + FRAME_LEN, LPC_ORDER * sizeof(*p->audio));
+
+    if (p->postfilter) {
+        formant_postfilter(p, lpc, p->audio, out);
+    } else { // if output is not postfiltered it should be scaled by 2
+        for (i = 0; i < FRAME_LEN; i++)
+            out[i] = av_clip_int16(p->audio[LPC_ORDER + i] << 1);
+    }
+
+    *got_frame_ptr = 1;
+
+    return frame_size[dec_mode];
+}
+
+#define OFFSET(x) offsetof(G723_1_Context, x)
+#define AD     AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+
+static const AVOption options[] = {
+    { "postfilter", "enable postfilter", OFFSET(postfilter), AV_OPT_TYPE_BOOL,
+      { .i64 = 1 }, 0, 1, AD },
+    { NULL }
+};
+
+
+static const AVClass g723_1dec_class = {
+    .class_name = "G.723.1 decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_g723_1_decoder = {
+    .name           = "g723_1",
+    .long_name      = NULL_IF_CONFIG_SMALL("G.723.1"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_G723_1,
+    .priv_data_size = sizeof(G723_1_Context),
+    .init           = g723_1_decode_init,
+    .decode         = g723_1_decode_frame,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
+    .priv_class     = &g723_1dec_class,
+};
diff --git a/libavcodec/g723_1enc.c b/libavcodec/g723_1enc.c
new file mode 100644
index 00000000..e7afa4d3
--- /dev/null
+++ b/libavcodec/g723_1enc.c
@@ -0,0 +1,1202 @@
+/*
+ * G.723.1 compatible encoder
+ * Copyright (c) Mohamed Naufal <naufal22@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * G.723.1 compatible encoder
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/common.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "celp_math.h"
+#include "g723_1.h"
+#include "internal.h"
+
+#define BITSTREAM_WRITER_LE
+#include "put_bits.h"
+
+static av_cold int g723_1_encode_init(AVCodecContext *avctx)
+{
+    G723_1_Context *p = avctx->priv_data;
+
+    if (avctx->sample_rate != 8000) {
+        av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->channels != 1) {
+        av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->bit_rate == 6300) {
+        p->cur_rate = RATE_6300;
+    } else if (avctx->bit_rate == 5300) {
+        av_log(avctx, AV_LOG_ERROR, "Bitrate not supported yet, use 6300\n");
+        return AVERROR_PATCHWELCOME;
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Bitrate not supported, use 6300\n");
+        return AVERROR(EINVAL);
+    }
+    avctx->frame_size = 240;
+    memcpy(p->prev_lsp, dc_lsp, LPC_ORDER * sizeof(int16_t));
+
+    return 0;
+}
+
+/**
+ * Remove DC component from the input signal.
+ *
+ * @param buf input signal
+ * @param fir zero memory
+ * @param iir pole memory
+ */
+static void highpass_filter(int16_t *buf, int16_t *fir, int *iir)
+{
+    int i;
+    for (i = 0; i < FRAME_LEN; i++) {
+        *iir   = (buf[i] << 15) + ((-*fir) << 15) + MULL2(*iir, 0x7f00);
+        *fir   = buf[i];
+        buf[i] = av_clipl_int32((int64_t)*iir + (1 << 15)) >> 16;
+    }
+}
+
+/**
+ * Estimate autocorrelation of the input vector.
+ *
+ * @param buf      input buffer
+ * @param autocorr autocorrelation coefficients vector
+ */
+static void comp_autocorr(int16_t *buf, int16_t *autocorr)
+{
+    int i, scale, temp;
+    int16_t vector[LPC_FRAME];
+
+    ff_g723_1_scale_vector(vector, buf, LPC_FRAME);
+
+    /* Apply the Hamming window */
+    for (i = 0; i < LPC_FRAME; i++)
+        vector[i] = (vector[i] * hamming_window[i] + (1 << 14)) >> 15;
+
+    /* Compute the first autocorrelation coefficient */
+    temp = ff_dot_product(vector, vector, LPC_FRAME);
+
+    /* Apply a white noise correlation factor of (1025/1024) */
+    temp += temp >> 10;
+
+    /* Normalize */
+    scale       = ff_g723_1_normalize_bits(temp, 31);
+    autocorr[0] = av_clipl_int32((int64_t) (temp << scale) +
+                                 (1 << 15)) >> 16;
+
+    /* Compute the remaining coefficients */
+    if (!autocorr[0]) {
+        memset(autocorr + 1, 0, LPC_ORDER * sizeof(int16_t));
+    } else {
+        for (i = 1; i <= LPC_ORDER; i++) {
+            temp        = ff_dot_product(vector, vector + i, LPC_FRAME - i);
+            temp        = MULL2((temp << scale), binomial_window[i - 1]);
+            autocorr[i] = av_clipl_int32((int64_t) temp + (1 << 15)) >> 16;
+        }
+    }
+}
+
+/**
+ * Use Levinson-Durbin recursion to compute LPC coefficients from
+ * autocorrelation values.
+ *
+ * @param lpc      LPC coefficients vector
+ * @param autocorr autocorrelation coefficients vector
+ * @param error    prediction error
+ */
+static void levinson_durbin(int16_t *lpc, int16_t *autocorr, int16_t error)
+{
+    int16_t vector[LPC_ORDER];
+    int16_t partial_corr;
+    int i, j, temp;
+
+    memset(lpc, 0, LPC_ORDER * sizeof(int16_t));
+
+    for (i = 0; i < LPC_ORDER; i++) {
+        /* Compute the partial correlation coefficient */
+        temp = 0;
+        for (j = 0; j < i; j++)
+            temp -= lpc[j] * autocorr[i - j - 1];
+        temp = ((autocorr[i] << 13) + temp) << 3;
+
+        if (FFABS(temp) >= (error << 16))
+            break;
+
+        partial_corr = temp / (error << 1);
+
+        lpc[i] = av_clipl_int32((int64_t) (partial_corr << 14) +
+                                (1 << 15)) >> 16;
+
+        /* Update the prediction error */
+        temp  = MULL2(temp, partial_corr);
+        error = av_clipl_int32((int64_t) (error << 16) - temp +
+                               (1 << 15)) >> 16;
+
+        memcpy(vector, lpc, i * sizeof(int16_t));
+        for (j = 0; j < i; j++) {
+            temp   = partial_corr * vector[i - j - 1] << 1;
+            lpc[j] = av_clipl_int32((int64_t) (lpc[j] << 16) - temp +
+                                    (1 << 15)) >> 16;
+        }
+    }
+}
+
+/**
+ * Calculate LPC coefficients for the current frame.
+ *
+ * @param buf       current frame
+ * @param prev_data 2 trailing subframes of the previous frame
+ * @param lpc       LPC coefficients vector
+ */
+static void comp_lpc_coeff(int16_t *buf, int16_t *lpc)
+{
+    int16_t autocorr[(LPC_ORDER + 1) * SUBFRAMES];
+    int16_t *autocorr_ptr = autocorr;
+    int16_t *lpc_ptr      = lpc;
+    int i, j;
+
+    for (i = 0, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) {
+        comp_autocorr(buf + i, autocorr_ptr);
+        levinson_durbin(lpc_ptr, autocorr_ptr + 1, autocorr_ptr[0]);
+
+        lpc_ptr      += LPC_ORDER;
+        autocorr_ptr += LPC_ORDER + 1;
+    }
+}
+
+static void lpc2lsp(int16_t *lpc, int16_t *prev_lsp, int16_t *lsp)
+{
+    int f[LPC_ORDER + 2]; ///< coefficients of the sum and difference
+                          ///< polynomials (F1, F2) ordered as
+                          ///< f1[0], f2[0], ...., f1[5], f2[5]
+
+    int max, shift, cur_val, prev_val, count, p;
+    int i, j;
+    int64_t temp;
+
+    /* Initialize f1[0] and f2[0] to 1 in Q25 */
+    for (i = 0; i < LPC_ORDER; i++)
+        lsp[i] = (lpc[i] * bandwidth_expand[i] + (1 << 14)) >> 15;
+
+    /* Apply bandwidth expansion on the LPC coefficients */
+    f[0] = f[1] = 1 << 25;
+
+    /* Compute the remaining coefficients */
+    for (i = 0; i < LPC_ORDER / 2; i++) {
+        /* f1 */
+        f[2 * i + 2] = -f[2 * i] - ((lsp[i] + lsp[LPC_ORDER - 1 - i]) << 12);
+        /* f2 */
+        f[2 * i + 3] = f[2 * i + 1] - ((lsp[i] - lsp[LPC_ORDER - 1 - i]) << 12);
+    }
+
+    /* Divide f1[5] and f2[5] by 2 for use in polynomial evaluation */
+    f[LPC_ORDER]     >>= 1;
+    f[LPC_ORDER + 1] >>= 1;
+
+    /* Normalize and shorten */
+    max = FFABS(f[0]);
+    for (i = 1; i < LPC_ORDER + 2; i++)
+        max = FFMAX(max, FFABS(f[i]));
+
+    shift = ff_g723_1_normalize_bits(max, 31);
+
+    for (i = 0; i < LPC_ORDER + 2; i++)
+        f[i] = av_clipl_int32((int64_t) (f[i] << shift) + (1 << 15)) >> 16;
+
+    /**
+     * Evaluate F1 and F2 at uniform intervals of pi/256 along the
+     * unit circle and check for zero crossings.
+     */
+    p    = 0;
+    temp = 0;
+    for (i = 0; i <= LPC_ORDER / 2; i++)
+        temp += f[2 * i] * cos_tab[0];
+    prev_val = av_clipl_int32(temp << 1);
+    count    = 0;
+    for (i = 1; i < COS_TBL_SIZE / 2; i++) {
+        /* Evaluate */
+        temp = 0;
+        for (j = 0; j <= LPC_ORDER / 2; j++)
+            temp += f[LPC_ORDER - 2 * j + p] * cos_tab[i * j % COS_TBL_SIZE];
+        cur_val = av_clipl_int32(temp << 1);
+
+        /* Check for sign change, indicating a zero crossing */
+        if ((cur_val ^ prev_val) < 0) {
+            int abs_cur  = FFABS(cur_val);
+            int abs_prev = FFABS(prev_val);
+            int sum      = abs_cur + abs_prev;
+
+            shift        = ff_g723_1_normalize_bits(sum, 31);
+            sum        <<= shift;
+            abs_prev     = abs_prev << shift >> 8;
+            lsp[count++] = ((i - 1) << 7) + (abs_prev >> 1) / (sum >> 16);
+
+            if (count == LPC_ORDER)
+                break;
+
+            /* Switch between sum and difference polynomials */
+            p ^= 1;
+
+            /* Evaluate */
+            temp = 0;
+            for (j = 0; j <= LPC_ORDER / 2; j++)
+                temp += f[LPC_ORDER - 2 * j + p] *
+                        cos_tab[i * j % COS_TBL_SIZE];
+            cur_val = av_clipl_int32(temp << 1);
+        }
+        prev_val = cur_val;
+    }
+
+    if (count != LPC_ORDER)
+        memcpy(lsp, prev_lsp, LPC_ORDER * sizeof(int16_t));
+}
+
+/**
+ * Quantize the current LSP subvector.
+ *
+ * @param num    band number
+ * @param offset offset of the current subvector in an LPC_ORDER vector
+ * @param size   size of the current subvector
+ */
+#define get_index(num, offset, size)                                          \
+{                                                                             \
+    int error, max = -1;                                                      \
+    int16_t temp[4];                                                          \
+    int i, j;                                                                 \
+                                                                              \
+    for (i = 0; i < LSP_CB_SIZE; i++) {                                       \
+        for (j = 0; j < size; j++){                                           \
+            temp[j] = (weight[j + (offset)] * lsp_band##num[i][j] +           \
+                      (1 << 14)) >> 15;                                       \
+        }                                                                     \
+        error  = ff_g723_1_dot_product(lsp + (offset), temp, size) << 1;      \
+        error -= ff_g723_1_dot_product(lsp_band##num[i], temp, size);         \
+        if (error > max) {                                                    \
+            max = error;                                                      \
+            lsp_index[num] = i;                                               \
+        }                                                                     \
+    }                                                                         \
+}
+
+/**
+ * Vector quantize the LSP frequencies.
+ *
+ * @param lsp      the current lsp vector
+ * @param prev_lsp the previous lsp vector
+ */
+static void lsp_quantize(uint8_t *lsp_index, int16_t *lsp, int16_t *prev_lsp)
+{
+    int16_t weight[LPC_ORDER];
+    int16_t min, max;
+    int shift, i;
+
+    /* Calculate the VQ weighting vector */
+    weight[0]             = (1 << 20) / (lsp[1] - lsp[0]);
+    weight[LPC_ORDER - 1] = (1 << 20) /
+                            (lsp[LPC_ORDER - 1] - lsp[LPC_ORDER - 2]);
+
+    for (i = 1; i < LPC_ORDER - 1; i++) {
+        min = FFMIN(lsp[i] - lsp[i - 1], lsp[i + 1] - lsp[i]);
+        if (min > 0x20)
+            weight[i] = (1 << 20) / min;
+        else
+            weight[i] = INT16_MAX;
+    }
+
+    /* Normalize */
+    max = 0;
+    for (i = 0; i < LPC_ORDER; i++)
+        max = FFMAX(weight[i], max);
+
+    shift = ff_g723_1_normalize_bits(max, 15);
+    for (i = 0; i < LPC_ORDER; i++) {
+        weight[i] <<= shift;
+    }
+
+    /* Compute the VQ target vector */
+    for (i = 0; i < LPC_ORDER; i++) {
+        lsp[i] -= dc_lsp[i] +
+                  (((prev_lsp[i] - dc_lsp[i]) * 12288 + (1 << 14)) >> 15);
+    }
+
+    get_index(0, 0, 3);
+    get_index(1, 3, 3);
+    get_index(2, 6, 4);
+}
+
+/**
+ * Perform IIR filtering.
+ *
+ * @param fir_coef FIR coefficients
+ * @param iir_coef IIR coefficients
+ * @param src      source vector
+ * @param dest     destination vector
+ */
+static void iir_filter(int16_t *fir_coef, int16_t *iir_coef,
+                       int16_t *src, int16_t *dest)
+{
+    int m, n;
+
+    for (m = 0; m < SUBFRAME_LEN; m++) {
+        int64_t filter = 0;
+        for (n = 1; n <= LPC_ORDER; n++) {
+            filter -= fir_coef[n - 1] * src[m - n] -
+                      iir_coef[n - 1] * dest[m - n];
+        }
+
+        dest[m] = av_clipl_int32((src[m] << 16) + (filter << 3) +
+                                 (1 << 15)) >> 16;
+    }
+}
+
+/**
+ * Apply the formant perceptual weighting filter.
+ *
+ * @param flt_coef filter coefficients
+ * @param unq_lpc  unquantized lpc vector
+ */
+static void perceptual_filter(G723_1_Context *p, int16_t *flt_coef,
+                              int16_t *unq_lpc, int16_t *buf)
+{
+    int16_t vector[FRAME_LEN + LPC_ORDER];
+    int i, j, k, l = 0;
+
+    memcpy(buf, p->iir_mem, sizeof(int16_t) * LPC_ORDER);
+    memcpy(vector, p->fir_mem, sizeof(int16_t) * LPC_ORDER);
+    memcpy(vector + LPC_ORDER, buf + LPC_ORDER, sizeof(int16_t) * FRAME_LEN);
+
+    for (i = LPC_ORDER, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) {
+        for (k = 0; k < LPC_ORDER; k++) {
+            flt_coef[k + 2 * l] = (unq_lpc[k + l] * percept_flt_tbl[0][k] +
+                                   (1 << 14)) >> 15;
+            flt_coef[k + 2 * l + LPC_ORDER] = (unq_lpc[k + l] *
+                                               percept_flt_tbl[1][k] +
+                                               (1 << 14)) >> 15;
+        }
+        iir_filter(flt_coef + 2 * l, flt_coef + 2 * l + LPC_ORDER,
+                   vector + i, buf + i);
+        l += LPC_ORDER;
+    }
+    memcpy(p->iir_mem, buf + FRAME_LEN, sizeof(int16_t) * LPC_ORDER);
+    memcpy(p->fir_mem, vector + FRAME_LEN, sizeof(int16_t) * LPC_ORDER);
+}
+
+/**
+ * Estimate the open loop pitch period.
+ *
+ * @param buf   perceptually weighted speech
+ * @param start estimation is carried out from this position
+ */
+static int estimate_pitch(int16_t *buf, int start)
+{
+    int max_exp = 32;
+    int max_ccr = 0x4000;
+    int max_eng = 0x7fff;
+    int index   = PITCH_MIN;
+    int offset  = start - PITCH_MIN + 1;
+
+    int ccr, eng, orig_eng, ccr_eng, exp;
+    int diff, temp;
+
+    int i;
+
+    orig_eng = ff_dot_product(buf + offset, buf + offset, HALF_FRAME_LEN);
+
+    for (i = PITCH_MIN; i <= PITCH_MAX - 3; i++) {
+        offset--;
+
+        /* Update energy and compute correlation */
+        orig_eng += buf[offset] * buf[offset] -
+                    buf[offset + HALF_FRAME_LEN] * buf[offset + HALF_FRAME_LEN];
+        ccr = ff_dot_product(buf + start, buf + offset, HALF_FRAME_LEN);
+        if (ccr <= 0)
+            continue;
+
+        /* Split into mantissa and exponent to maintain precision */
+        exp   = ff_g723_1_normalize_bits(ccr, 31);
+        ccr   = av_clipl_int32((int64_t) (ccr << exp) + (1 << 15)) >> 16;
+        exp <<= 1;
+        ccr  *= ccr;
+        temp  = ff_g723_1_normalize_bits(ccr, 31);
+        ccr   = ccr << temp >> 16;
+        exp  += temp;
+
+        temp = ff_g723_1_normalize_bits(orig_eng, 31);
+        eng  = av_clipl_int32((int64_t) (orig_eng << temp) + (1 << 15)) >> 16;
+        exp -= temp;
+
+        if (ccr >= eng) {
+            exp--;
+            ccr >>= 1;
+        }
+        if (exp > max_exp)
+            continue;
+
+        if (exp + 1 < max_exp)
+            goto update;
+
+        /* Equalize exponents before comparison */
+        if (exp + 1 == max_exp)
+            temp = max_ccr >> 1;
+        else
+            temp = max_ccr;
+        ccr_eng = ccr * max_eng;
+        diff    = ccr_eng - eng * temp;
+        if (diff > 0 && (i - index < PITCH_MIN || diff > ccr_eng >> 2)) {
+update:
+            index   = i;
+            max_exp = exp;
+            max_ccr = ccr;
+            max_eng = eng;
+        }
+    }
+    return index;
+}
+
+/**
+ * Compute harmonic noise filter parameters.
+ *
+ * @param buf       perceptually weighted speech
+ * @param pitch_lag open loop pitch period
+ * @param hf        harmonic filter parameters
+ */
+static void comp_harmonic_coeff(int16_t *buf, int16_t pitch_lag, HFParam *hf)
+{
+    int ccr, eng, max_ccr, max_eng;
+    int exp, max, diff;
+    int energy[15];
+    int i, j;
+
+    for (i = 0, j = pitch_lag - 3; j <= pitch_lag + 3; i++, j++) {
+        /* Compute residual energy */
+        energy[i << 1] = ff_dot_product(buf - j, buf - j, SUBFRAME_LEN);
+        /* Compute correlation */
+        energy[(i << 1) + 1] = ff_dot_product(buf, buf - j, SUBFRAME_LEN);
+    }
+
+    /* Compute target energy */
+    energy[14] = ff_dot_product(buf, buf, SUBFRAME_LEN);
+
+    /* Normalize */
+    max = 0;
+    for (i = 0; i < 15; i++)
+        max = FFMAX(max, FFABS(energy[i]));
+
+    exp = ff_g723_1_normalize_bits(max, 31);
+    for (i = 0; i < 15; i++) {
+        energy[i] = av_clipl_int32((int64_t)(energy[i] << exp) +
+                                   (1 << 15)) >> 16;
+    }
+
+    hf->index = -1;
+    hf->gain  =  0;
+    max_ccr   =  1;
+    max_eng   =  0x7fff;
+
+    for (i = 0; i <= 6; i++) {
+        eng = energy[i << 1];
+        ccr = energy[(i << 1) + 1];
+
+        if (ccr <= 0)
+            continue;
+
+        ccr  = (ccr * ccr + (1 << 14)) >> 15;
+        diff = ccr * max_eng - eng * max_ccr;
+        if (diff > 0) {
+            max_ccr   = ccr;
+            max_eng   = eng;
+            hf->index = i;
+        }
+    }
+
+    if (hf->index == -1) {
+        hf->index = pitch_lag;
+        return;
+    }
+
+    eng = energy[14] * max_eng;
+    eng = (eng >> 2) + (eng >> 3);
+    ccr = energy[(hf->index << 1) + 1] * energy[(hf->index << 1) + 1];
+    if (eng < ccr) {
+        eng = energy[(hf->index << 1) + 1];
+
+        if (eng >= max_eng)
+            hf->gain = 0x2800;
+        else
+            hf->gain = ((eng << 15) / max_eng * 0x2800 + (1 << 14)) >> 15;
+    }
+    hf->index += pitch_lag - 3;
+}
+
+/**
+ * Apply the harmonic noise shaping filter.
+ *
+ * @param hf filter parameters
+ */
+static void harmonic_filter(HFParam *hf, const int16_t *src, int16_t *dest)
+{
+    int i;
+
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        int64_t temp = hf->gain * src[i - hf->index] << 1;
+        dest[i] = av_clipl_int32((src[i] << 16) - temp + (1 << 15)) >> 16;
+    }
+}
+
+static void harmonic_noise_sub(HFParam *hf, const int16_t *src, int16_t *dest)
+{
+    int i;
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        int64_t temp = hf->gain * src[i - hf->index] << 1;
+        dest[i] = av_clipl_int32(((dest[i] - src[i]) << 16) + temp +
+                                 (1 << 15)) >> 16;
+    }
+}
+
+/**
+ * Combined synthesis and formant perceptual weighting filer.
+ *
+ * @param qnt_lpc  quantized lpc coefficients
+ * @param perf_lpc perceptual filter coefficients
+ * @param perf_fir perceptual filter fir memory
+ * @param perf_iir perceptual filter iir memory
+ * @param scale    the filter output will be scaled by 2^scale
+ */
+static void synth_percept_filter(int16_t *qnt_lpc, int16_t *perf_lpc,
+                                 int16_t *perf_fir, int16_t *perf_iir,
+                                 const int16_t *src, int16_t *dest, int scale)
+{
+    int i, j;
+    int16_t buf_16[SUBFRAME_LEN + LPC_ORDER];
+    int64_t buf[SUBFRAME_LEN];
+
+    int16_t *bptr_16 = buf_16 + LPC_ORDER;
+
+    memcpy(buf_16, perf_fir, sizeof(int16_t) * LPC_ORDER);
+    memcpy(dest - LPC_ORDER, perf_iir, sizeof(int16_t) * LPC_ORDER);
+
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        int64_t temp = 0;
+        for (j = 1; j <= LPC_ORDER; j++)
+            temp -= qnt_lpc[j - 1] * bptr_16[i - j];
+
+        buf[i]     = (src[i] << 15) + (temp << 3);
+        bptr_16[i] = av_clipl_int32(buf[i] + (1 << 15)) >> 16;
+    }
+
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        int64_t fir = 0, iir = 0;
+        for (j = 1; j <= LPC_ORDER; j++) {
+            fir -= perf_lpc[j - 1] * bptr_16[i - j];
+            iir += perf_lpc[j + LPC_ORDER - 1] * dest[i - j];
+        }
+        dest[i] = av_clipl_int32(((buf[i] + (fir << 3)) << scale) + (iir << 3) +
+                                 (1 << 15)) >> 16;
+    }
+    memcpy(perf_fir, buf_16 + SUBFRAME_LEN, sizeof(int16_t) * LPC_ORDER);
+    memcpy(perf_iir, dest + SUBFRAME_LEN - LPC_ORDER,
+           sizeof(int16_t) * LPC_ORDER);
+}
+
+/**
+ * Compute the adaptive codebook contribution.
+ *
+ * @param buf   input signal
+ * @param index the current subframe index
+ */
+static void acb_search(G723_1_Context *p, int16_t *residual,
+                       int16_t *impulse_resp, const int16_t *buf,
+                       int index)
+{
+    int16_t flt_buf[PITCH_ORDER][SUBFRAME_LEN];
+
+    const int16_t *cb_tbl = adaptive_cb_gain85;
+
+    int ccr_buf[PITCH_ORDER * SUBFRAMES << 2];
+
+    int pitch_lag = p->pitch_lag[index >> 1];
+    int acb_lag   = 1;
+    int acb_gain  = 0;
+    int odd_frame = index & 1;
+    int iter      = 3 + odd_frame;
+    int count     = 0;
+    int tbl_size  = 85;
+
+    int i, j, k, l, max;
+    int64_t temp;
+
+    if (!odd_frame) {
+        if (pitch_lag == PITCH_MIN)
+            pitch_lag++;
+        else
+            pitch_lag = FFMIN(pitch_lag, PITCH_MAX - 5);
+    }
+
+    for (i = 0; i < iter; i++) {
+        ff_g723_1_get_residual(residual, p->prev_excitation, pitch_lag + i - 1);
+
+        for (j = 0; j < SUBFRAME_LEN; j++) {
+            temp = 0;
+            for (k = 0; k <= j; k++)
+                temp += residual[PITCH_ORDER - 1 + k] * impulse_resp[j - k];
+            flt_buf[PITCH_ORDER - 1][j] = av_clipl_int32((temp << 1) +
+                                                         (1 << 15)) >> 16;
+        }
+
+        for (j = PITCH_ORDER - 2; j >= 0; j--) {
+            flt_buf[j][0] = ((residual[j] << 13) + (1 << 14)) >> 15;
+            for (k = 1; k < SUBFRAME_LEN; k++) {
+                temp = (flt_buf[j + 1][k - 1] << 15) +
+                       residual[j] * impulse_resp[k];
+                flt_buf[j][k] = av_clipl_int32((temp << 1) + (1 << 15)) >> 16;
+            }
+        }
+
+        /* Compute crosscorrelation with the signal */
+        for (j = 0; j < PITCH_ORDER; j++) {
+            temp             = ff_dot_product(buf, flt_buf[j], SUBFRAME_LEN);
+            ccr_buf[count++] = av_clipl_int32(temp << 1);
+        }
+
+        /* Compute energies */
+        for (j = 0; j < PITCH_ORDER; j++) {
+            ccr_buf[count++] = ff_g723_1_dot_product(flt_buf[j], flt_buf[j],
+                                                     SUBFRAME_LEN);
+        }
+
+        for (j = 1; j < PITCH_ORDER; j++) {
+            for (k = 0; k < j; k++) {
+                temp             = ff_dot_product(flt_buf[j], flt_buf[k], SUBFRAME_LEN);
+                ccr_buf[count++] = av_clipl_int32(temp << 2);
+            }
+        }
+    }
+
+    /* Normalize and shorten */
+    max = 0;
+    for (i = 0; i < 20 * iter; i++)
+        max = FFMAX(max, FFABS(ccr_buf[i]));
+
+    temp = ff_g723_1_normalize_bits(max, 31);
+
+    for (i = 0; i < 20 * iter; i++)
+        ccr_buf[i] = av_clipl_int32((int64_t) (ccr_buf[i] << temp) +
+                                    (1 << 15)) >> 16;
+
+    max = 0;
+    for (i = 0; i < iter; i++) {
+        /* Select quantization table */
+        if (!odd_frame && pitch_lag + i - 1 >= SUBFRAME_LEN - 2 ||
+            odd_frame && pitch_lag >= SUBFRAME_LEN - 2) {
+            cb_tbl   = adaptive_cb_gain170;
+            tbl_size = 170;
+        }
+
+        for (j = 0, k = 0; j < tbl_size; j++, k += 20) {
+            temp = 0;
+            for (l = 0; l < 20; l++)
+                temp += ccr_buf[20 * i + l] * cb_tbl[k + l];
+            temp = av_clipl_int32(temp);
+
+            if (temp > max) {
+                max      = temp;
+                acb_gain = j;
+                acb_lag  = i;
+            }
+        }
+    }
+
+    if (!odd_frame) {
+        pitch_lag += acb_lag - 1;
+        acb_lag    = 1;
+    }
+
+    p->pitch_lag[index >> 1]      = pitch_lag;
+    p->subframe[index].ad_cb_lag  = acb_lag;
+    p->subframe[index].ad_cb_gain = acb_gain;
+}
+
+/**
+ * Subtract the adaptive codebook contribution from the input
+ * to obtain the residual.
+ *
+ * @param buf target vector
+ */
+static void sub_acb_contrib(const int16_t *residual, const int16_t *impulse_resp,
+                            int16_t *buf)
+{
+    int i, j;
+    /* Subtract adaptive CB contribution to obtain the residual */
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        int64_t temp = buf[i] << 14;
+        for (j = 0; j <= i; j++)
+            temp -= residual[j] * impulse_resp[i - j];
+
+        buf[i] = av_clipl_int32((temp << 2) + (1 << 15)) >> 16;
+    }
+}
+
+/**
+ * Quantize the residual signal using the fixed codebook (MP-MLQ).
+ *
+ * @param optim optimized fixed codebook parameters
+ * @param buf   excitation vector
+ */
+static void get_fcb_param(FCBParam *optim, int16_t *impulse_resp,
+                          int16_t *buf, int pulse_cnt, int pitch_lag)
+{
+    FCBParam param;
+    int16_t impulse_r[SUBFRAME_LEN];
+    int16_t temp_corr[SUBFRAME_LEN];
+    int16_t impulse_corr[SUBFRAME_LEN];
+
+    int ccr1[SUBFRAME_LEN];
+    int ccr2[SUBFRAME_LEN];
+    int amp, err, max, max_amp_index, min, scale, i, j, k, l;
+
+    int64_t temp;
+
+    /* Update impulse response */
+    memcpy(impulse_r, impulse_resp, sizeof(int16_t) * SUBFRAME_LEN);
+    param.dirac_train = 0;
+    if (pitch_lag < SUBFRAME_LEN - 2) {
+        param.dirac_train = 1;
+        ff_g723_1_gen_dirac_train(impulse_r, pitch_lag);
+    }
+
+    for (i = 0; i < SUBFRAME_LEN; i++)
+        temp_corr[i] = impulse_r[i] >> 1;
+
+    /* Compute impulse response autocorrelation */
+    temp = ff_g723_1_dot_product(temp_corr, temp_corr, SUBFRAME_LEN);
+
+    scale           = ff_g723_1_normalize_bits(temp, 31);
+    impulse_corr[0] = av_clipl_int32((temp << scale) + (1 << 15)) >> 16;
+
+    for (i = 1; i < SUBFRAME_LEN; i++) {
+        temp = ff_g723_1_dot_product(temp_corr + i, temp_corr,
+                                     SUBFRAME_LEN - i);
+        impulse_corr[i] = av_clipl_int32((temp << scale) + (1 << 15)) >> 16;
+    }
+
+    /* Compute crosscorrelation of impulse response with residual signal */
+    scale -= 4;
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        temp = ff_g723_1_dot_product(buf + i, impulse_r, SUBFRAME_LEN - i);
+        if (scale < 0)
+            ccr1[i] = temp >> -scale;
+        else
+            ccr1[i] = av_clipl_int32(temp << scale);
+    }
+
+    /* Search loop */
+    for (i = 0; i < GRID_SIZE; i++) {
+        /* Maximize the crosscorrelation */
+        max = 0;
+        for (j = i; j < SUBFRAME_LEN; j += GRID_SIZE) {
+            temp = FFABS(ccr1[j]);
+            if (temp >= max) {
+                max                = temp;
+                param.pulse_pos[0] = j;
+            }
+        }
+
+        /* Quantize the gain (max crosscorrelation/impulse_corr[0]) */
+        amp           = max;
+        min           = 1 << 30;
+        max_amp_index = GAIN_LEVELS - 2;
+        for (j = max_amp_index; j >= 2; j--) {
+            temp = av_clipl_int32((int64_t) fixed_cb_gain[j] *
+                                  impulse_corr[0] << 1);
+            temp = FFABS(temp - amp);
+            if (temp < min) {
+                min           = temp;
+                max_amp_index = j;
+            }
+        }
+
+        max_amp_index--;
+        /* Select additional gain values */
+        for (j = 1; j < 5; j++) {
+            for (k = i; k < SUBFRAME_LEN; k += GRID_SIZE) {
+                temp_corr[k] = 0;
+                ccr2[k]      = ccr1[k];
+            }
+            param.amp_index = max_amp_index + j - 2;
+            amp             = fixed_cb_gain[param.amp_index];
+
+            param.pulse_sign[0] = (ccr2[param.pulse_pos[0]] < 0) ? -amp : amp;
+            temp_corr[param.pulse_pos[0]] = 1;
+
+            for (k = 1; k < pulse_cnt; k++) {
+                max = INT_MIN;
+                for (l = i; l < SUBFRAME_LEN; l += GRID_SIZE) {
+                    if (temp_corr[l])
+                        continue;
+                    temp = impulse_corr[FFABS(l - param.pulse_pos[k - 1])];
+                    temp = av_clipl_int32((int64_t) temp *
+                                          param.pulse_sign[k - 1] << 1);
+                    ccr2[l] -= temp;
+                    temp     = FFABS(ccr2[l]);
+                    if (temp > max) {
+                        max                = temp;
+                        param.pulse_pos[k] = l;
+                    }
+                }
+
+                param.pulse_sign[k] = (ccr2[param.pulse_pos[k]] < 0) ?
+                                      -amp : amp;
+                temp_corr[param.pulse_pos[k]] = 1;
+            }
+
+            /* Create the error vector */
+            memset(temp_corr, 0, sizeof(int16_t) * SUBFRAME_LEN);
+
+            for (k = 0; k < pulse_cnt; k++)
+                temp_corr[param.pulse_pos[k]] = param.pulse_sign[k];
+
+            for (k = SUBFRAME_LEN - 1; k >= 0; k--) {
+                temp = 0;
+                for (l = 0; l <= k; l++) {
+                    int prod = av_clipl_int32((int64_t) temp_corr[l] *
+                                              impulse_r[k - l] << 1);
+                    temp = av_clipl_int32(temp + prod);
+                }
+                temp_corr[k] = temp << 2 >> 16;
+            }
+
+            /* Compute square of error */
+            err = 0;
+            for (k = 0; k < SUBFRAME_LEN; k++) {
+                int64_t prod;
+                prod = av_clipl_int32((int64_t) buf[k] * temp_corr[k] << 1);
+                err  = av_clipl_int32(err - prod);
+                prod = av_clipl_int32((int64_t) temp_corr[k] * temp_corr[k]);
+                err  = av_clipl_int32(err + prod);
+            }
+
+            /* Minimize */
+            if (err < optim->min_err) {
+                optim->min_err     = err;
+                optim->grid_index  = i;
+                optim->amp_index   = param.amp_index;
+                optim->dirac_train = param.dirac_train;
+
+                for (k = 0; k < pulse_cnt; k++) {
+                    optim->pulse_sign[k] = param.pulse_sign[k];
+                    optim->pulse_pos[k]  = param.pulse_pos[k];
+                }
+            }
+        }
+    }
+}
+
+/**
+ * Encode the pulse position and gain of the current subframe.
+ *
+ * @param optim optimized fixed CB parameters
+ * @param buf   excitation vector
+ */
+static void pack_fcb_param(G723_1_Subframe *subfrm, FCBParam *optim,
+                           int16_t *buf, int pulse_cnt)
+{
+    int i, j;
+
+    j = PULSE_MAX - pulse_cnt;
+
+    subfrm->pulse_sign = 0;
+    subfrm->pulse_pos  = 0;
+
+    for (i = 0; i < SUBFRAME_LEN >> 1; i++) {
+        int val = buf[optim->grid_index + (i << 1)];
+        if (!val) {
+            subfrm->pulse_pos += combinatorial_table[j][i];
+        } else {
+            subfrm->pulse_sign <<= 1;
+            if (val < 0)
+                subfrm->pulse_sign++;
+            j++;
+
+            if (j == PULSE_MAX)
+                break;
+        }
+    }
+    subfrm->amp_index   = optim->amp_index;
+    subfrm->grid_index  = optim->grid_index;
+    subfrm->dirac_train = optim->dirac_train;
+}
+
+/**
+ * Compute the fixed codebook excitation.
+ *
+ * @param buf          target vector
+ * @param impulse_resp impulse response of the combined filter
+ */
+static void fcb_search(G723_1_Context *p, int16_t *impulse_resp,
+                       int16_t *buf, int index)
+{
+    FCBParam optim;
+    int pulse_cnt = pulses[index];
+    int i;
+
+    optim.min_err = 1 << 30;
+    get_fcb_param(&optim, impulse_resp, buf, pulse_cnt, SUBFRAME_LEN);
+
+    if (p->pitch_lag[index >> 1] < SUBFRAME_LEN - 2) {
+        get_fcb_param(&optim, impulse_resp, buf, pulse_cnt,
+                      p->pitch_lag[index >> 1]);
+    }
+
+    /* Reconstruct the excitation */
+    memset(buf, 0, sizeof(int16_t) * SUBFRAME_LEN);
+    for (i = 0; i < pulse_cnt; i++)
+        buf[optim.pulse_pos[i]] = optim.pulse_sign[i];
+
+    pack_fcb_param(&p->subframe[index], &optim, buf, pulse_cnt);
+
+    if (optim.dirac_train)
+        ff_g723_1_gen_dirac_train(buf, p->pitch_lag[index >> 1]);
+}
+
+/**
+ * Pack the frame parameters into output bitstream.
+ *
+ * @param frame output buffer
+ * @param size  size of the buffer
+ */
+static int pack_bitstream(G723_1_Context *p, AVPacket *avpkt)
+{
+    PutBitContext pb;
+    int info_bits = 0;
+    int i, temp;
+
+    init_put_bits(&pb, avpkt->data, avpkt->size);
+
+    put_bits(&pb, 2, info_bits);
+
+    put_bits(&pb, 8, p->lsp_index[2]);
+    put_bits(&pb, 8, p->lsp_index[1]);
+    put_bits(&pb, 8, p->lsp_index[0]);
+
+    put_bits(&pb, 7, p->pitch_lag[0] - PITCH_MIN);
+    put_bits(&pb, 2, p->subframe[1].ad_cb_lag);
+    put_bits(&pb, 7, p->pitch_lag[1] - PITCH_MIN);
+    put_bits(&pb, 2, p->subframe[3].ad_cb_lag);
+
+    /* Write 12 bit combined gain */
+    for (i = 0; i < SUBFRAMES; i++) {
+        temp = p->subframe[i].ad_cb_gain * GAIN_LEVELS +
+               p->subframe[i].amp_index;
+        if (p->cur_rate == RATE_6300)
+            temp += p->subframe[i].dirac_train << 11;
+        put_bits(&pb, 12, temp);
+    }
+
+    put_bits(&pb, 1, p->subframe[0].grid_index);
+    put_bits(&pb, 1, p->subframe[1].grid_index);
+    put_bits(&pb, 1, p->subframe[2].grid_index);
+    put_bits(&pb, 1, p->subframe[3].grid_index);
+
+    if (p->cur_rate == RATE_6300) {
+        skip_put_bits(&pb, 1); /* reserved bit */
+
+        /* Write 13 bit combined position index */
+        temp = (p->subframe[0].pulse_pos >> 16) * 810 +
+               (p->subframe[1].pulse_pos >> 14) *  90 +
+               (p->subframe[2].pulse_pos >> 16) *   9 +
+               (p->subframe[3].pulse_pos >> 14);
+        put_bits(&pb, 13, temp);
+
+        put_bits(&pb, 16, p->subframe[0].pulse_pos & 0xffff);
+        put_bits(&pb, 14, p->subframe[1].pulse_pos & 0x3fff);
+        put_bits(&pb, 16, p->subframe[2].pulse_pos & 0xffff);
+        put_bits(&pb, 14, p->subframe[3].pulse_pos & 0x3fff);
+
+        put_bits(&pb, 6, p->subframe[0].pulse_sign);
+        put_bits(&pb, 5, p->subframe[1].pulse_sign);
+        put_bits(&pb, 6, p->subframe[2].pulse_sign);
+        put_bits(&pb, 5, p->subframe[3].pulse_sign);
+    }
+
+    flush_put_bits(&pb);
+    return frame_size[info_bits];
+}
+
+static int g723_1_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                               const AVFrame *frame, int *got_packet_ptr)
+{
+    G723_1_Context *p = avctx->priv_data;
+    int16_t unq_lpc[LPC_ORDER * SUBFRAMES];
+    int16_t qnt_lpc[LPC_ORDER * SUBFRAMES];
+    int16_t cur_lsp[LPC_ORDER];
+    int16_t weighted_lpc[LPC_ORDER * SUBFRAMES << 1];
+    int16_t vector[FRAME_LEN + PITCH_MAX];
+    int offset, ret, i, j;
+    int16_t *in, *start;
+    HFParam hf[4];
+
+    /* duplicate input */
+    start = in = av_malloc(frame->nb_samples * sizeof(int16_t));
+    if (!in)
+        return AVERROR(ENOMEM);
+    memcpy(in, frame->data[0], frame->nb_samples * sizeof(int16_t));
+
+    highpass_filter(in, &p->hpf_fir_mem, &p->hpf_iir_mem);
+
+    memcpy(vector, p->prev_data, HALF_FRAME_LEN * sizeof(int16_t));
+    memcpy(vector + HALF_FRAME_LEN, in, FRAME_LEN * sizeof(int16_t));
+
+    comp_lpc_coeff(vector, unq_lpc);
+    lpc2lsp(&unq_lpc[LPC_ORDER * 3], p->prev_lsp, cur_lsp);
+    lsp_quantize(p->lsp_index, cur_lsp, p->prev_lsp);
+
+    /* Update memory */
+    memcpy(vector + LPC_ORDER, p->prev_data + SUBFRAME_LEN,
+           sizeof(int16_t) * SUBFRAME_LEN);
+    memcpy(vector + LPC_ORDER + SUBFRAME_LEN, in,
+           sizeof(int16_t) * (HALF_FRAME_LEN + SUBFRAME_LEN));
+    memcpy(p->prev_data, in + HALF_FRAME_LEN,
+           sizeof(int16_t) * HALF_FRAME_LEN);
+    memcpy(in, vector + LPC_ORDER, sizeof(int16_t) * FRAME_LEN);
+
+    perceptual_filter(p, weighted_lpc, unq_lpc, vector);
+
+    memcpy(in, vector + LPC_ORDER, sizeof(int16_t) * FRAME_LEN);
+    memcpy(vector, p->prev_weight_sig, sizeof(int16_t) * PITCH_MAX);
+    memcpy(vector + PITCH_MAX, in, sizeof(int16_t) * FRAME_LEN);
+
+    ff_g723_1_scale_vector(vector, vector, FRAME_LEN + PITCH_MAX);
+
+    p->pitch_lag[0] = estimate_pitch(vector, PITCH_MAX);
+    p->pitch_lag[1] = estimate_pitch(vector, PITCH_MAX + HALF_FRAME_LEN);
+
+    for (i = PITCH_MAX, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
+        comp_harmonic_coeff(vector + i, p->pitch_lag[j >> 1], hf + j);
+
+    memcpy(vector, p->prev_weight_sig, sizeof(int16_t) * PITCH_MAX);
+    memcpy(vector + PITCH_MAX, in, sizeof(int16_t) * FRAME_LEN);
+    memcpy(p->prev_weight_sig, vector + FRAME_LEN, sizeof(int16_t) * PITCH_MAX);
+
+    for (i = 0, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
+        harmonic_filter(hf + j, vector + PITCH_MAX + i, in + i);
+
+    ff_g723_1_inverse_quant(cur_lsp, p->prev_lsp, p->lsp_index, 0);
+    ff_g723_1_lsp_interpolate(qnt_lpc, cur_lsp, p->prev_lsp);
+
+    memcpy(p->prev_lsp, cur_lsp, sizeof(int16_t) * LPC_ORDER);
+
+    offset = 0;
+    for (i = 0; i < SUBFRAMES; i++) {
+        int16_t impulse_resp[SUBFRAME_LEN];
+        int16_t residual[SUBFRAME_LEN + PITCH_ORDER - 1];
+        int16_t flt_in[SUBFRAME_LEN];
+        int16_t zero[LPC_ORDER], fir[LPC_ORDER], iir[LPC_ORDER];
+
+        /**
+         * Compute the combined impulse response of the synthesis filter,
+         * formant perceptual weighting filter and harmonic noise shaping filter
+         */
+        memset(zero, 0, sizeof(int16_t) * LPC_ORDER);
+        memset(vector, 0, sizeof(int16_t) * PITCH_MAX);
+        memset(flt_in, 0, sizeof(int16_t) * SUBFRAME_LEN);
+
+        flt_in[0] = 1 << 13; /* Unit impulse */
+        synth_percept_filter(qnt_lpc + offset, weighted_lpc + (offset << 1),
+                             zero, zero, flt_in, vector + PITCH_MAX, 1);
+        harmonic_filter(hf + i, vector + PITCH_MAX, impulse_resp);
+
+        /* Compute the combined zero input response */
+        flt_in[0] = 0;
+        memcpy(fir, p->perf_fir_mem, sizeof(int16_t) * LPC_ORDER);
+        memcpy(iir, p->perf_iir_mem, sizeof(int16_t) * LPC_ORDER);
+
+        synth_percept_filter(qnt_lpc + offset, weighted_lpc + (offset << 1),
+                             fir, iir, flt_in, vector + PITCH_MAX, 0);
+        memcpy(vector, p->harmonic_mem, sizeof(int16_t) * PITCH_MAX);
+        harmonic_noise_sub(hf + i, vector + PITCH_MAX, in);
+
+        acb_search(p, residual, impulse_resp, in, i);
+        ff_g723_1_gen_acb_excitation(residual, p->prev_excitation,
+                                     p->pitch_lag[i >> 1], &p->subframe[i],
+                                     p->cur_rate);
+        sub_acb_contrib(residual, impulse_resp, in);
+
+        fcb_search(p, impulse_resp, in, i);
+
+        /* Reconstruct the excitation */
+        ff_g723_1_gen_acb_excitation(impulse_resp, p->prev_excitation,
+                                     p->pitch_lag[i >> 1], &p->subframe[i],
+                                     RATE_6300);
+
+        memmove(p->prev_excitation, p->prev_excitation + SUBFRAME_LEN,
+                sizeof(int16_t) * (PITCH_MAX - SUBFRAME_LEN));
+        for (j = 0; j < SUBFRAME_LEN; j++)
+            in[j] = av_clip_int16((in[j] << 1) + impulse_resp[j]);
+        memcpy(p->prev_excitation + PITCH_MAX - SUBFRAME_LEN, in,
+               sizeof(int16_t) * SUBFRAME_LEN);
+
+        /* Update filter memories */
+        synth_percept_filter(qnt_lpc + offset, weighted_lpc + (offset << 1),
+                             p->perf_fir_mem, p->perf_iir_mem,
+                             in, vector + PITCH_MAX, 0);
+        memmove(p->harmonic_mem, p->harmonic_mem + SUBFRAME_LEN,
+                sizeof(int16_t) * (PITCH_MAX - SUBFRAME_LEN));
+        memcpy(p->harmonic_mem + PITCH_MAX - SUBFRAME_LEN, vector + PITCH_MAX,
+               sizeof(int16_t) * SUBFRAME_LEN);
+
+        in     += SUBFRAME_LEN;
+        offset += LPC_ORDER;
+    }
+
+    av_free(start);
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 24, 0)) < 0)
+        return ret;
+
+    *got_packet_ptr = 1;
+    avpkt->size = pack_bitstream(p, avpkt);
+    return 0;
+}
+
+AVCodec ff_g723_1_encoder = {
+    .name           = "g723_1",
+    .long_name      = NULL_IF_CONFIG_SMALL("G.723.1"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_G723_1,
+    .priv_data_size = sizeof(G723_1_Context),
+    .init           = g723_1_encode_init,
+    .encode2        = g723_1_encode_frame,
+    .sample_fmts    = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
+    },
+};
diff --git a/libavcodec/g726.c b/libavcodec/g726.c
index 5bbf8971..c3d018fd 100644
--- a/libavcodec/g726.c
+++ b/libavcodec/g726.c
@@ -23,7 +23,6 @@
  */
 #include <limits.h>
 
-#include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
 #include "avcodec.h"
@@ -33,7 +32,7 @@
 
 /**
  * G.726 11bit float.
- * G.726 Standard uses rather odd 11bit floating point arithmentic for
+ * G.726 Standard uses rather odd 11bit floating point arithmetic for
  * numerous occasions. It's a mystery to me why they did it this way
  * instead of simply using 32bit integer arithmetic.
  */
@@ -316,7 +315,11 @@ static av_cold int g726_encode_init(AVCodecContext *avctx)
                "Resample or reduce the compliance level.\n");
         return AVERROR(EINVAL);
     }
-    av_assert0(avctx->sample_rate > 0);
+    if (avctx->sample_rate <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid sample rate %d\n",
+               avctx->sample_rate);
+        return AVERROR(EINVAL);
+    }
 
     if(avctx->channels != 1){
         av_log(avctx, AV_LOG_ERROR, "Only mono is supported\n");
@@ -348,7 +351,7 @@ static int g726_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     int i, ret, out_size;
 
     out_size = (frame->nb_samples * c->code_size + 7) / 8;
-    if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
         return ret;
     init_put_bits(&pb, avpkt->data, avpkt->size);
 
@@ -389,7 +392,7 @@ AVCodec ff_adpcm_g726_encoder = {
     .priv_data_size = sizeof(G726Context),
     .init           = g726_encode_init,
     .encode2        = g726_encode_frame,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
+    .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
     .priv_class     = &g726_class,
@@ -474,7 +477,7 @@ AVCodec ff_adpcm_g726_decoder = {
     .init           = g726_decode_init,
     .decode         = g726_decode_frame,
     .flush          = g726_decode_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 
@@ -487,7 +490,7 @@ AVCodec ff_adpcm_g726le_decoder = {
     .init           = g726_decode_init,
     .decode         = g726_decode_frame,
     .flush          = g726_decode_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .long_name      = NULL_IF_CONFIG_SMALL("G.726 ADPCM little-endian"),
 };
 #endif
diff --git a/libavcodec/g729.h b/libavcodec/g729.h
index 61683130..7c5f693a 100644
--- a/libavcodec/g729.h
+++ b/libavcodec/g729.h
@@ -26,4 +26,8 @@
  */
 #define SUBFRAME_SIZE 40
 
+/* bytes per block */
+#define G729_8K_BLOCK_SIZE     10
+#define G729D_6K4_BLOCK_SIZE   8
+
 #endif // AVCODEC_G729_H
diff --git a/libavcodec/g729_parser.c b/libavcodec/g729_parser.c
new file mode 100644
index 00000000..d13c9908
--- /dev/null
+++ b/libavcodec/g729_parser.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2015  Ganesh Ajjanagadde
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * G.729 audio parser
+ *
+ * Splits packets into individual blocks.
+ */
+
+#include "libavutil/avassert.h"
+#include "parser.h"
+#include "g729.h"
+
+typedef struct G729ParseContext {
+    ParseContext pc;
+    int block_size;
+    int duration;
+    int remaining;
+} G729ParseContext;
+
+static int g729_parse(AVCodecParserContext *s1, AVCodecContext *avctx,
+                     const uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size)
+{
+    G729ParseContext *s = s1->priv_data;
+    ParseContext *pc = &s->pc;
+    int next;
+
+    if (!s->block_size) {
+        av_assert1(avctx->codec_id == AV_CODEC_ID_G729);
+        /* FIXME: replace this heuristic block_size with more precise estimate */
+        s->block_size = (avctx->bit_rate < 8000) ? G729D_6K4_BLOCK_SIZE : G729_8K_BLOCK_SIZE;
+        s->duration   = avctx->frame_size;
+    }
+
+    if (!s->remaining)
+        s->remaining = s->block_size;
+    if (s->remaining <= buf_size) {
+        next = s->remaining;
+        s->remaining = 0;
+    } else {
+        next = END_NOT_FOUND;
+        s->remaining -= buf_size;
+    }
+
+    if (ff_combine_frame(pc, next, &buf, &buf_size) < 0 || !buf_size) {
+        *poutbuf      = NULL;
+        *poutbuf_size = 0;
+        return buf_size;
+    }
+
+    s1->duration = s->duration;
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+AVCodecParser ff_g729_parser = {
+    .codec_ids      = { AV_CODEC_ID_G729 },
+    .priv_data_size = sizeof(G729ParseContext),
+    .parser_parse   = g729_parse,
+    .parser_close   = ff_parse_close,
+};
diff --git a/libavcodec/g729dec.c b/libavcodec/g729dec.c
index ed717baf..2e1bf18e 100644
--- a/libavcodec/g729dec.c
+++ b/libavcodec/g729dec.c
@@ -180,14 +180,6 @@ static inline uint16_t g729_prng(uint16_t value)
     return 31821 * value + 13849;
 }
 
-/**
- * Get parity bit of bit 2..7
- */
-static inline int get_parity(uint8_t value)
-{
-   return (0x6996966996696996ULL >> (value >> 2)) & 1;
-}
-
 /**
  * Decodes LSF (Line Spectral Frequencies) from L0-L3 (3.2.4).
  * @param[out] lsfq (2.13) quantized LSF coefficients
@@ -480,7 +472,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
 
         ac_index      = get_bits(&gb, format->ac_index_bits[i]);
         if(!i && format->parity_bit)
-            bad_pitch = get_parity(ac_index) == get_bits1(&gb);
+            bad_pitch = av_parity(ac_index >> 2) == get_bits1(&gb);
         fc_indexes    = get_bits(&gb, format->fc_indexes_bits);
         pulses_signs  = get_bits(&gb, format->fc_signs_bits);
         gc_1st_index  = get_bits(&gb, format->gc_1st_index_bits);
@@ -722,5 +714,5 @@ AVCodec ff_g729_decoder = {
     .priv_data_size = sizeof(G729Context),
     .init           = decoder_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/g729postfilter.c b/libavcodec/g729postfilter.c
index 9a775c47..d9076ec7 100644
--- a/libavcodec/g729postfilter.c
+++ b/libavcodec/g729postfilter.c
@@ -165,7 +165,8 @@ static int16_t long_term_filter(AudioDSPContext *adsp, int pitch_delay_int,
                                     sig_scaled + RES_PREV_DATA_SIZE,
                                     subframe_size);
     if (ener) {
-        sh_ener = FFMAX(av_log2(ener) - 14, 0);
+        sh_ener = av_log2(ener) - 14;
+        sh_ener = FFMAX(sh_ener, 0);
         ener >>= sh_ener;
         /* Search for best pitch delay.
 
@@ -320,7 +321,8 @@ static int16_t long_term_filter(AudioDSPContext *adsp, int pitch_delay_int,
             gain_long_num = 0;
             sh_gain_long_num = 0;
         } else {
-            tmp = FFMAX(av_log2(sum) - 14, 0);
+            tmp = av_log2(sum) - 14;
+            tmp = FFMAX(tmp, 0);
             sum >>= tmp;
             gain_long_num = sum;
             sh_gain_long_num = tmp;
@@ -329,7 +331,8 @@ static int16_t long_term_filter(AudioDSPContext *adsp, int pitch_delay_int,
         /* Compute R'(k) correlation's denominator. */
         sum = adsp->scalarproduct_int16(residual_filt, residual_filt, subframe_size);
 
-        tmp = FFMAX(av_log2(sum) - 14, 0);
+        tmp = av_log2(sum) - 14;
+        tmp = FFMAX(tmp, 0);
         sum >>= tmp;
         gain_long_den = sum;
         sh_gain_long_den = tmp;
@@ -541,9 +544,10 @@ void ff_g729_postfilter(AudioDSPContext *adsp, int16_t* ht_prev_data, int* voici
 
     /* long-term filter. If long-term prediction gain is larger than 3dB (returned value is
        nonzero) then declare current subframe as periodic. */
-    *voicing = FFMAX(*voicing, long_term_filter(adsp, pitch_delay_int,
+    i = long_term_filter(adsp, pitch_delay_int,
                                                 residual, residual_filt_buf + 10,
-                                                subframe_size));
+                                                subframe_size);
+    *voicing = FFMAX(*voicing, i);
 
     /* shift residual for using in next subframe */
     memmove(residual, residual + subframe_size, RES_PREV_DATA_SIZE * sizeof(int16_t));
diff --git a/libavcodec/g729postfilter.h b/libavcodec/g729postfilter.h
index 89e3e40c..5c2aaf23 100644
--- a/libavcodec/g729postfilter.h
+++ b/libavcodec/g729postfilter.h
@@ -18,8 +18,8 @@
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
-#ifndef FFMPEG_G729POSTFILTER_H
-#define FFMPEG_G729POSTFILTER_H
+#ifndef AVCODEC_G729POSTFILTER_H
+#define AVCODEC_G729POSTFILTER_H
 
 #include <stdint.h>
 #include "audiodsp.h"
@@ -113,4 +113,4 @@ void ff_g729_postfilter(AudioDSPContext *adsp, int16_t* ht_prev_data, int* voici
 int16_t ff_g729_adaptive_gain_control(int gain_before, int gain_after, int16_t *speech,
                                    int subframe_size, int16_t gain_prev);
 
-#endif // FFMPEG_G729POSTFILTER_H
+#endif // AVCODEC_G729POSTFILTER_H
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index affaecb1..4cf61d6f 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -269,6 +269,14 @@ static inline unsigned int get_bits(GetBitContext *s, int n)
     return tmp;
 }
 
+/**
+ * Read 0-25 bits.
+ */
+static av_always_inline int get_bitsz(GetBitContext *s, int n)
+{
+    return n ? get_bits(s, n) : 0;
+}
+
 static inline unsigned int get_bits_le(GetBitContext *s, int n)
 {
     register int tmp;
@@ -401,7 +409,7 @@ static inline int check_marker(GetBitContext *s, const char *msg)
 
 /**
  * Initialize GetBitContext.
- * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes
+ * @param buffer bitstream buffer, must be AV_INPUT_BUFFER_PADDING_SIZE bytes
  *        larger than the actual read bits because some optimized bitstream
  *        readers read 32 or 64 bit at once and could read over the end
  * @param bit_size the size of the buffer in bits
@@ -432,7 +440,7 @@ static inline int init_get_bits(GetBitContext *s, const uint8_t *buffer,
 
 /**
  * Initialize GetBitContext.
- * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes
+ * @param buffer bitstream buffer, must be AV_INPUT_BUFFER_PADDING_SIZE bytes
  *        larger than the actual read bits because some optimized bitstream
  *        readers read 32 or 64 bit at once and could read over the end
  * @param byte_size the size of the buffer in bytes
@@ -539,6 +547,17 @@ void ff_free_vlc(VLC *vlc);
             index = SHOW_UBITS(name, gb, nb_bits) + level;      \
             level = table[index].level;                         \
             n     = table[index].len;                           \
+            if (max_depth > 2 && n < 0) {                       \
+                LAST_SKIP_BITS(name, gb, nb_bits);              \
+                if (need_update) {                              \
+                    UPDATE_CACHE(name, gb);                     \
+                }                                               \
+                nb_bits = -n;                                   \
+                                                                \
+                index = SHOW_UBITS(name, gb, nb_bits) + level;  \
+                level = table[index].level;                     \
+                n     = table[index].len;                       \
+            }                                                   \
         }                                                       \
         run = table[index].run;                                 \
         SKIP_BITS(name, gb, n);                                 \
diff --git a/libavcodec/gif.c b/libavcodec/gif.c
index cf5d438a..6af1f4ab 100644
--- a/libavcodec/gif.c
+++ b/libavcodec/gif.c
@@ -43,6 +43,7 @@ typedef struct GIFContext {
     const AVClass *class;
     LZWState *lzw;
     uint8_t *buf;
+    int buf_size;
     AVFrame *last_frame;
     int flags;
     uint32_t palette[AVPALETTE_COUNT];  ///< local reference palette for !pal8
@@ -174,7 +175,7 @@ static int gif_image_write_image(AVCodecContext *avctx,
 
     bytestream_put_byte(bytestream, 0x08);
 
-    ff_lzw_encode_init(s->lzw, s->buf, 2 * width * height,
+    ff_lzw_encode_init(s->lzw, s->buf, s->buf_size,
                        12, FF_LZW_GIF, put_bits);
 
     ptr = buf + y_start*linesize + x_start;
@@ -221,18 +222,18 @@ static av_cold int gif_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "GIF does not support resolutions above 65535x65535\n");
         return AVERROR(EINVAL);
     }
-
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     s->transparent_index = -1;
 
     s->lzw = av_mallocz(ff_lzw_encode_state_size);
-    s->buf = av_malloc(avctx->width*avctx->height*2);
+    s->buf_size = avctx->width*avctx->height*2 + 1000;
+    s->buf = av_malloc(s->buf_size);
     s->tmpl = av_malloc(avctx->width);
     if (!s->tmpl || !s->buf || !s->lzw)
         return AVERROR(ENOMEM);
@@ -270,7 +271,7 @@ static int gif_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     const uint32_t *palette = NULL;
     int ret;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width*avctx->height*7/5 + FF_MIN_BUFFER_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width*avctx->height*7/5 + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;
     outbuf_ptr = pkt->data;
     end        = pkt->data + pkt->size;
@@ -320,10 +321,9 @@ static int gif_encode_close(AVCodecContext *avctx)
 {
     GIFContext *s = avctx->priv_data;
 
-    av_frame_free(&avctx->coded_frame);
-
     av_freep(&s->lzw);
     av_freep(&s->buf);
+    s->buf_size = 0;
     av_frame_free(&s->last_frame);
     av_freep(&s->tmpl);
     return 0;
diff --git a/libavcodec/gif.h b/libavcodec/gif.h
index b4cf6654..9f357788 100644
--- a/libavcodec/gif.h
+++ b/libavcodec/gif.h
@@ -26,8 +26,8 @@
  * GIF format definitions.
  */
 
-#ifndef AVCODEC_GIFDEFS_H
-#define AVCODEC_GIFDEFS_H
+#ifndef AVCODEC_GIF_H
+#define AVCODEC_GIF_H
 
 #include <stdint.h>
 
@@ -46,4 +46,4 @@ static const uint8_t gif89a_sig[6] = "GIF89a";
 #define GIF_APP_EXT_LABEL           0xff
 #define NETSCAPE_EXT_STR            "NETSCAPE2.0"
 
-#endif /* AVCODEC_GIFDEFS_H */
+#endif /* AVCODEC_GIF_H */
diff --git a/libavcodec/gifdec.c b/libavcodec/gifdec.c
index c179f45c..20ae903a 100644
--- a/libavcodec/gifdec.c
+++ b/libavcodec/gifdec.c
@@ -130,7 +130,7 @@ static void gif_copy_img_rect(const uint32_t *src, uint32_t *dst,
 static int gif_read_image(GifState *s, AVFrame *frame)
 {
     int left, top, width, height, bits_per_pixel, code_size, flags, pw;
-    int is_interleaved, has_local_palette, y, pass, y1, linesize, pal_size;
+    int is_interleaved, has_local_palette, y, pass, y1, linesize, pal_size, lzwed_len;
     uint32_t *ptr, *pal, *px, *pr, *ptr1;
     int ret;
     uint8_t *idx;
@@ -293,7 +293,8 @@ static int gif_read_image(GifState *s, AVFrame *frame)
 
  decode_tail:
     /* read the garbage data until end marker is found */
-    ff_lzw_decode_tail(s->lzw);
+    lzwed_len = ff_lzw_decode_tail(s->lzw);
+    bytestream2_skipu(&s->gb, lzwed_len);
 
     /* Graphic Control Extension's scope is single frame.
      * Remove its influence. */
@@ -553,6 +554,6 @@ AVCodec ff_gif_decoder = {
     .init           = gif_decode_init,
     .close          = gif_decode_close,
     .decode         = gif_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .priv_class     = &decoder_class,
 };
diff --git a/libavcodec/golomb.h b/libavcodec/golomb.h
index d30bb6bc..d4df0b39 100644
--- a/libavcodec/golomb.h
+++ b/libavcodec/golomb.h
@@ -48,7 +48,7 @@ extern const  int8_t ff_interleaved_se_golomb_vlc_code[256];
 extern const uint8_t ff_interleaved_dirac_golomb_vlc_code[256];
 
 /**
- * read unsigned exp golomb code.
+ * Read an unsigned Exp-Golomb code in the range 0 to 8190.
  */
 static inline int get_ue_golomb(GetBitContext *gb)
 {
@@ -68,7 +68,7 @@ static inline int get_ue_golomb(GetBitContext *gb)
         int log = 2 * av_log2(buf) - 31;
         LAST_SKIP_BITS(re, gb, 32 - log);
         CLOSE_READER(re, gb);
-        if (CONFIG_FTRAPV && log < 0) {
+        if (log < 7) {
             av_log(NULL, AV_LOG_ERROR, "Invalid UE golomb code\n");
             return AVERROR_INVALIDDATA;
         }
diff --git a/libavcodec/gsm_parser.c b/libavcodec/gsm_parser.c
index 9a3b94ef..1054a30c 100644
--- a/libavcodec/gsm_parser.c
+++ b/libavcodec/gsm_parser.c
@@ -25,6 +25,7 @@
  * Splits packets into individual blocks.
  */
 
+#include "libavutil/avassert.h"
 #include "parser.h"
 #include "gsm.h"
 
@@ -55,10 +56,7 @@ static int gsm_parse(AVCodecParserContext *s1, AVCodecContext *avctx,
             s->duration   = GSM_FRAME_SIZE * 2;
             break;
         default:
-            *poutbuf      = buf;
-            *poutbuf_size = buf_size;
-            av_log(avctx, AV_LOG_ERROR, "Invalid codec_id\n");
-            return buf_size;
+            av_assert0(0);
         }
     }
 
diff --git a/libavcodec/gsmdec.c b/libavcodec/gsmdec.c
index c4cde929..cd569951 100644
--- a/libavcodec/gsmdec.c
+++ b/libavcodec/gsmdec.c
@@ -120,7 +120,7 @@ AVCodec ff_gsm_decoder = {
     .init           = gsm_init,
     .decode         = gsm_decode_frame,
     .flush          = gsm_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 #if CONFIG_GSM_MS_DECODER
@@ -133,6 +133,6 @@ AVCodec ff_gsm_ms_decoder = {
     .init           = gsm_init,
     .decode         = gsm_decode_frame,
     .flush          = gsm_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
diff --git a/libavcodec/gsmdec_template.c b/libavcodec/gsmdec_template.c
index 0c60813a..4cb777c8 100644
--- a/libavcodec/gsmdec_template.c
+++ b/libavcodec/gsmdec_template.c
@@ -64,7 +64,7 @@ static inline int decode_log_area(int coded, int factor, int offset)
 {
     coded <<= 10;
     coded -= offset;
-    return gsm_mult(coded, factor) << 1;
+    return gsm_mult(coded, factor) * 2;
 }
 
 static av_noinline int get_rrp(int filtered)
@@ -121,7 +121,7 @@ static int postprocess(int16_t *data, int msr)
     int i;
     for (i = 0; i < 160; i++) {
         msr = av_clip_int16(data[i] + gsm_mult(msr, 28180));
-        data[i] = av_clip_int16(msr << 1) & ~7;
+        data[i] = av_clip_int16(msr * 2) & ~7;
     }
     return msr;
 }
diff --git a/libavcodec/h261dec.c b/libavcodec/h261dec.c
index 4f5994a6..df60ac5d 100644
--- a/libavcodec/h261dec.c
+++ b/libavcodec/h261dec.c
@@ -685,6 +685,6 @@ AVCodec ff_h261_decoder = {
     .init           = h261_decode_init,
     .close          = h261_decode_end,
     .decode         = h261_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
 };
diff --git a/libavcodec/h261enc.c b/libavcodec/h261enc.c
index 8ef272db..24ef577a 100644
--- a/libavcodec/h261enc.c
+++ b/libavcodec/h261enc.c
@@ -68,14 +68,14 @@ void ff_h261_encode_picture_header(MpegEncContext *s, int picture_number)
 
     put_bits(&s->pb, 1, 0); /* split screen off */
     put_bits(&s->pb, 1, 0); /* camera  off */
-    put_bits(&s->pb, 1, 0); /* freeze picture release off */
+    put_bits(&s->pb, 1, s->pict_type == AV_PICTURE_TYPE_I); /* freeze picture release on/off */
 
     format = ff_h261_get_picture_format(s->width, s->height);
 
     put_bits(&s->pb, 1, format); /* 0 == QCIF, 1 == CIF */
 
-    put_bits(&s->pb, 1, 0); /* still image mode */
-    put_bits(&s->pb, 1, 0); /* reserved */
+    put_bits(&s->pb, 1, 1); /* still image mode */
+    put_bits(&s->pb, 1, 1); /* reserved */
 
     put_bits(&s->pb, 1, 0); /* no PEI */
     if (format == 0)
@@ -378,7 +378,12 @@ av_cold void ff_h261_encode_init(MpegEncContext *s)
     s->intra_ac_vlc_last_length = s->inter_ac_vlc_last_length = uni_h261_rl_len + 128*64;
 }
 
-FF_MPV_GENERIC_CLASS(h261)
+static const AVClass h261_class = {
+    .class_name = "h261 encoder",
+    .item_name  = av_default_item_name,
+    .option     = ff_mpv_generic_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_h261_encoder = {
     .name           = "h261",
diff --git a/libavcodec/h263.c b/libavcodec/h263.c
index 41836a80..800387d1 100644
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -40,9 +40,6 @@
 #include "mpeg4video.h"
 
 
-uint8_t ff_h263_static_rl_table_store[2][2][2*MAX_RUN + MAX_LEVEL + 3];
-
-
 void ff_h263_update_motion_val(MpegEncContext * s){
     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
                //FIXME a lot of that is only needed for !low_delay
diff --git a/libavcodec/h263.h b/libavcodec/h263.h
index 1f954cdf..3c3f1698 100644
--- a/libavcodec/h263.h
+++ b/libavcodec/h263.h
@@ -24,6 +24,7 @@
 #include "libavutil/rational.h"
 #include "get_bits.h"
 #include "mpegvideo.h"
+#include "h263data.h"
 #include "rl.h"
 
 #if !FF_API_ASPECT_EXTENDED
@@ -42,39 +43,10 @@
 
 #define H263_GOB_HEIGHT(h) ((h) <= 400 ? 1 : (h) <= 800 ? 2 : 4)
 
-extern const AVRational ff_h263_pixel_aspect[16];
-extern const uint8_t ff_h263_cbpy_tab[16][2];
-
-extern const uint8_t ff_cbpc_b_tab[4][2];
-
-extern const uint8_t ff_mvtab[33][2];
-
-extern const uint8_t ff_h263_intra_MCBPC_code[9];
-extern const uint8_t ff_h263_intra_MCBPC_bits[9];
-
-extern const uint8_t ff_h263_inter_MCBPC_code[28];
-extern const uint8_t ff_h263_inter_MCBPC_bits[28];
-extern const uint8_t ff_h263_mbtype_b_tab[15][2];
-
 extern VLC ff_h263_intra_MCBPC_vlc;
 extern VLC ff_h263_inter_MCBPC_vlc;
 extern VLC ff_h263_cbpy_vlc;
 
-extern const uint16_t ff_inter_vlc[103][2];
-extern const int8_t ff_inter_level[102];
-extern const int8_t ff_inter_run[102];
-
-extern RLTable ff_h263_rl_inter;
-
-extern RLTable ff_rl_intra_aic;
-
-extern const uint16_t ff_h263_format[8][2];
-extern const uint8_t ff_modified_quant_tab[2][32];
-extern const uint16_t ff_mba_max[6];
-extern const uint8_t ff_mba_length[7];
-
-extern uint8_t ff_h263_static_rl_table_store[2][2][2*MAX_RUN + MAX_LEVEL + 3];
-
 extern const enum AVPixelFormat ff_h263_hwaccel_pixfmt_list_420[];
 
 
@@ -144,7 +116,7 @@ static inline int h263_get_motion_length(int val, int f_code){
 }
 
 static inline void ff_h263_encode_motion_vector(MpegEncContext * s, int x, int y, int f_code){
-    if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT) {
+    if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT) {
         skip_put_bits(&s->pb,
             h263_get_motion_length(x, f_code)
            +h263_get_motion_length(y, f_code));
diff --git a/libavcodec/h263data.c b/libavcodec/h263data.c
new file mode 100644
index 00000000..ceda80fb
--- /dev/null
+++ b/libavcodec/h263data.c
@@ -0,0 +1,292 @@
+/*
+ * H263+ tables
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.263 tables.
+ */
+
+#include <stdint.h>
+
+#include "h263data.h"
+#include "mpegvideo.h"
+
+uint8_t ff_h263_static_rl_table_store[2][2][2 * MAX_RUN + MAX_LEVEL + 3];
+
+/* intra MCBPC, mb_type = (intra), then (intraq) */
+const uint8_t ff_h263_intra_MCBPC_code[9] = { 1, 1, 2, 3, 1, 1, 2, 3, 1 };
+const uint8_t ff_h263_intra_MCBPC_bits[9] = { 1, 3, 3, 3, 4, 6, 6, 6, 9 };
+
+/* inter MCBPC, mb_type = (inter), (intra), (interq), (intraq), (inter4v) */
+/* Changed the tables for interq and inter4v+q, following the standard
+ * ** Juanjo ** */
+const uint8_t ff_h263_inter_MCBPC_code[28] = {
+    1,  3,  2,  5,
+    3,  4,  3,  3,
+    3,  7,  6,  5,
+    4,  4,  3,  2,
+    2,  5,  4,  5,
+    1,  0,  0,  0,  /* Stuffing */
+    2, 12, 14, 15,
+};
+const uint8_t ff_h263_inter_MCBPC_bits[28] = {
+     1,  4,  4,  6,  /* inter  */
+     5,  8,  8,  7,  /* intra  */
+     3,  7,  7,  9,  /* interQ */
+     6,  9,  9,  9,  /* intraQ */
+     3,  7,  7,  8,  /* inter4 */
+     9,  0,  0,  0,  /* Stuffing */
+    11, 13, 13, 13,  /* inter4Q */
+};
+
+const uint8_t ff_h263_mbtype_b_tab[15][2] = {
+    { 1,  1 },
+    { 3,  3 },
+    { 1,  5 },
+    { 4,  4 },
+    { 5,  4 },
+    { 6,  6 },
+    { 2,  4 },
+    { 3,  4 },
+    { 7,  6 },
+    { 4,  6 },
+    { 5,  6 },
+    { 1,  6 },
+    { 1, 10 },
+    { 1,  7 },
+    { 1,  8 },
+};
+
+const uint8_t ff_cbpc_b_tab[4][2] = {
+    { 0, 1 },
+    { 2, 2 },
+    { 7, 3 },
+    { 6, 3 },
+};
+
+const uint8_t ff_h263_cbpy_tab[16][2] = {
+    {  3, 4 }, { 5, 5 }, { 4, 5 }, { 9, 4 }, {  3, 5 }, { 7, 4 }, { 2, 6 },
+    { 11, 4 }, { 2, 5 }, { 3, 6 }, { 5, 4 }, { 10, 4 }, { 4, 4 }, { 8, 4 },
+    {  6, 4 }, { 3, 2 }
+};
+
+const uint8_t ff_mvtab[33][2] = {
+    {  1,  1 }, {  1,  2 }, {  1,  3 }, {  1,  4 }, {  3,  6 }, {  5,  7 },
+    {  4,  7 }, {  3,  7 }, { 11,  9 }, { 10,  9 }, {  9,  9 }, { 17, 10 },
+    { 16, 10 }, { 15, 10 }, { 14, 10 }, { 13, 10 }, { 12, 10 }, { 11, 10 },
+    { 10, 10 }, {  9, 10 }, {  8, 10 }, {  7, 10 }, {  6, 10 }, {  5, 10 },
+    {  4, 10 }, {  7, 11 }, {  6, 11 }, {  5, 11 }, {  4, 11 }, {  3, 11 },
+    {  2, 11 }, {  3, 12 }, {  2, 12 }
+};
+
+/* third non intra table */
+const uint16_t ff_inter_vlc[103][2] = {
+    {  0x2,  2 }, {  0xf,  4 }, { 0x15,  6 }, { 0x17,  7 },
+    { 0x1f,  8 }, { 0x25,  9 }, { 0x24,  9 }, { 0x21, 10 },
+    { 0x20, 10 }, {  0x7, 11 }, {  0x6, 11 }, { 0x20, 11 },
+    {  0x6,  3 }, { 0x14,  6 }, { 0x1e,  8 }, {  0xf, 10 },
+    { 0x21, 11 }, { 0x50, 12 }, {  0xe,  4 }, { 0x1d,  8 },
+    {  0xe, 10 }, { 0x51, 12 }, {  0xd,  5 }, { 0x23,  9 },
+    {  0xd, 10 }, {  0xc,  5 }, { 0x22,  9 }, { 0x52, 12 },
+    {  0xb,  5 }, {  0xc, 10 }, { 0x53, 12 }, { 0x13,  6 },
+    {  0xb, 10 }, { 0x54, 12 }, { 0x12,  6 }, {  0xa, 10 },
+    { 0x11,  6 }, {  0x9, 10 }, { 0x10,  6 }, {  0x8, 10 },
+    { 0x16,  7 }, { 0x55, 12 }, { 0x15,  7 }, { 0x14,  7 },
+    { 0x1c,  8 }, { 0x1b,  8 }, { 0x21,  9 }, { 0x20,  9 },
+    { 0x1f,  9 }, { 0x1e,  9 }, { 0x1d,  9 }, { 0x1c,  9 },
+    { 0x1b,  9 }, { 0x1a,  9 }, { 0x22, 11 }, { 0x23, 11 },
+    { 0x56, 12 }, { 0x57, 12 }, {  0x7,  4 }, { 0x19,  9 },
+    {  0x5, 11 }, {  0xf,  6 }, {  0x4, 11 }, {  0xe,  6 },
+    {  0xd,  6 }, {  0xc,  6 }, { 0x13,  7 }, { 0x12,  7 },
+    { 0x11,  7 }, { 0x10,  7 }, { 0x1a,  8 }, { 0x19,  8 },
+    { 0x18,  8 }, { 0x17,  8 }, { 0x16,  8 }, { 0x15,  8 },
+    { 0x14,  8 }, { 0x13,  8 }, { 0x18,  9 }, { 0x17,  9 },
+    { 0x16,  9 }, { 0x15,  9 }, { 0x14,  9 }, { 0x13,  9 },
+    { 0x12,  9 }, { 0x11,  9 }, {  0x7, 10 }, {  0x6, 10 },
+    {  0x5, 10 }, {  0x4, 10 }, { 0x24, 11 }, { 0x25, 11 },
+    { 0x26, 11 }, { 0x27, 11 }, { 0x58, 12 }, { 0x59, 12 },
+    { 0x5a, 12 }, { 0x5b, 12 }, { 0x5c, 12 }, { 0x5d, 12 },
+    { 0x5e, 12 }, { 0x5f, 12 }, {  0x3,  7 },
+};
+
+const int8_t ff_inter_level[102] = {
+    1,  2,  3,  4, 5, 6, 7, 8,
+    9, 10, 11, 12, 1, 2, 3, 4,
+    5,  6,  1,  2, 3, 4, 1, 2,
+    3,  1,  2,  3, 1, 2, 3, 1,
+    2,  3,  1,  2, 1, 2, 1, 2,
+    1,  2,  1,  1, 1, 1, 1, 1,
+    1,  1,  1,  1, 1, 1, 1, 1,
+    1,  1,  1,  2, 3, 1, 2, 1,
+    1,  1,  1,  1, 1, 1, 1, 1,
+    1,  1,  1,  1, 1, 1, 1, 1,
+    1,  1,  1,  1, 1, 1, 1, 1,
+    1,  1,  1,  1, 1, 1, 1, 1,
+    1,  1,  1,  1, 1, 1,
+};
+
+const int8_t ff_inter_run[102] = {
+    0,   0,  0,  0,  0,  0,  0,  0,
+    0,   0,  0,  0,  1,  1,  1,  1,
+    1,   1,  2,  2,  2,  2,  3,  3,
+    3,   4,  4,  4,  5,  5,  5,  6,
+    6,   6,  7,  7,  8,  8,  9,  9,
+    10, 10, 11, 12, 13, 14, 15, 16,
+    17, 18, 19, 20, 21, 22, 23, 24,
+    25, 26,  0,  0,  0,  1,  1,  2,
+    3,   4,  5,  6,  7,  8,  9, 10,
+    11, 12, 13, 14, 15, 16, 17, 18,
+    19, 20, 21, 22, 23, 24, 25, 26,
+    27, 28, 29, 30, 31, 32, 33, 34,
+    35, 36, 37, 38, 39, 40,
+};
+
+RLTable ff_h263_rl_inter = {
+    102,
+    58,
+    ff_inter_vlc,
+    ff_inter_run,
+    ff_inter_level,
+};
+
+static const uint16_t intra_vlc_aic[103][2] = {
+    {  0x2,  2 }, {  0x6,  3 }, {  0xe,  4 }, {  0xc,  5 },
+    {  0xd,  5 }, { 0x10,  6 }, { 0x11,  6 }, { 0x12,  6 },
+    { 0x16,  7 }, { 0x1b,  8 }, { 0x20,  9 }, { 0x21,  9 },
+    { 0x1a,  9 }, { 0x1b,  9 }, { 0x1c,  9 }, { 0x1d,  9 },
+    { 0x1e,  9 }, { 0x1f,  9 }, { 0x23, 11 }, { 0x22, 11 },
+    { 0x57, 12 }, { 0x56, 12 }, { 0x55, 12 }, { 0x54, 12 },
+    { 0x53, 12 }, {  0xf,  4 }, { 0x14,  6 }, { 0x14,  7 },
+    { 0x1e,  8 }, {  0xf, 10 }, { 0x21, 11 }, { 0x50, 12 },
+    {  0xb,  5 }, { 0x15,  7 }, {  0xe, 10 }, {  0x9, 10 },
+    { 0x15,  6 }, { 0x1d,  8 }, {  0xd, 10 }, { 0x51, 12 },
+    { 0x13,  6 }, { 0x23,  9 }, {  0x7, 11 }, { 0x17,  7 },
+    { 0x22,  9 }, { 0x52, 12 }, { 0x1c,  8 }, {  0xc, 10 },
+    { 0x1f,  8 }, {  0xb, 10 }, { 0x25,  9 }, {  0xa, 10 },
+    { 0x24,  9 }, {  0x6, 11 }, { 0x21, 10 }, { 0x20, 10 },
+    {  0x8, 10 }, { 0x20, 11 }, {  0x7,  4 }, {  0xc,  6 },
+    { 0x10,  7 }, { 0x13,  8 }, { 0x11,  9 }, { 0x12,  9 },
+    {  0x4, 10 }, { 0x27, 11 }, { 0x26, 11 }, { 0x5f, 12 },
+    {  0xf,  6 }, { 0x13,  9 }, {  0x5, 10 }, { 0x25, 11 },
+    {  0xe,  6 }, { 0x14,  9 }, { 0x24, 11 }, {  0xd,  6 },
+    {  0x6, 10 }, { 0x5e, 12 }, { 0x11,  7 }, {  0x7, 10 },
+    { 0x13,  7 }, { 0x5d, 12 }, { 0x12,  7 }, { 0x5c, 12 },
+    { 0x14,  8 }, { 0x5b, 12 }, { 0x15,  8 }, { 0x1a,  8 },
+    { 0x19,  8 }, { 0x18,  8 }, { 0x17,  8 }, { 0x16,  8 },
+    { 0x19,  9 }, { 0x15,  9 }, { 0x16,  9 }, { 0x18,  9 },
+    { 0x17,  9 }, {  0x4, 11 }, {  0x5, 11 }, { 0x58, 12 },
+    { 0x59, 12 }, { 0x5a, 12 }, {  0x3,  7 },
+};
+
+static const int8_t intra_run_aic[102] = {
+    0,   0,  0,  0,  0,  0,  0,  0,
+    0,   0,  0,  0,  0,  0,  0,  0,
+    0,   0,  0,  0,  0,  0,  0,  0,
+    0,   1,  1,  1,  1,  1,  1,  1,
+    2,   2,  2,  2,  3,  3,  3,  3,
+    4,   4,  4,  5,  5,  5,  6,  6,
+    7,   7,  8,  8,  9,  9, 10, 11,
+    12, 13,  0,  0,  0,  0,  0,  0,
+    0,   0,  0,  0,  1,  1,  1,  1,
+    2,   2,  2,  3,  3,  3,  4,  4,
+    5,   5,  6,  6,  7,  7,  8,  9,
+    10, 11, 12, 13, 14, 15, 16, 17,
+    18, 19, 20, 21, 22, 23,
+};
+
+static const int8_t intra_level_aic[102] = {
+    1,   2,  3,  4,  5,  6,  7,  8,
+    9,  10, 11, 12, 13, 14, 15, 16,
+    17, 18, 19, 20, 21, 22, 23, 24,
+    25,  1,  2,  3,  4,  5,  6,  7,
+    1,   2,  3,  4,  1,  2,  3,  4,
+    1,   2,  3,  1,  2,  3,  1,  2,
+    1,   2,  1,  2,  1,  2,  1,  1,
+    1,   1,  1,  2,  3,  4,  5,  6,
+    7,   8,  9, 10,  1,  2,  3,  4,
+    1,   2,  3,  1,  2,  3,  1,  2,
+    1,   2,  1,  2,  1,  2,  1,  1,
+    1,   1,  1,  1,  1,  1,  1,  1,
+    1,   1,  1,  1,  1,  1,
+};
+
+RLTable ff_rl_intra_aic = {
+    102,
+    58,
+    intra_vlc_aic,
+    intra_run_aic,
+    intra_level_aic,
+};
+
+const uint16_t ff_h263_format[8][2] = {
+    {    0,    0 },
+    {  128,   96 },
+    {  176,  144 },
+    {  352,  288 },
+    {  704,  576 },
+    { 1408, 1152 },
+};
+
+const uint8_t ff_aic_dc_scale_table[32] = {
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62
+};
+
+const uint8_t ff_modified_quant_tab[2][32] = {
+//      0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    {
+        0, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
+    },
+    {
+        0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 31, 31, 26
+    }
+};
+
+const uint8_t ff_h263_chroma_qscale_table[32] = {
+//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+    0, 1, 2, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15
+};
+
+const uint16_t ff_mba_max[6] = {
+    47, 98, 395, 1583, 6335, 9215
+};
+
+const uint8_t ff_mba_length[7] = {
+    6, 7, 9, 11, 13, 14, 14
+};
+
+const AVRational ff_h263_pixel_aspect[16] = {
+    {  0,  1 },
+    {  1,  1 },
+    { 12, 11 },
+    { 10, 11 },
+    { 16, 11 },
+    { 40, 33 },
+    {  0,  1 },
+    {  0,  1 },
+    {  0,  1 },
+    {  0,  1 },
+    {  0,  1 },
+    {  0,  1 },
+    {  0,  1 },
+    {  0,  1 },
+    {  0,  1 },
+    {  0,  1 },
+};
diff --git a/libavcodec/h263data.h b/libavcodec/h263data.h
index 1cd965f1..0ad2ef46 100644
--- a/libavcodec/h263data.h
+++ b/libavcodec/h263data.h
@@ -30,265 +30,48 @@
 #define AVCODEC_H263DATA_H
 
 #include <stdint.h>
-#include "mpegvideo.h"
+
+#include "libavutil/rational.h"
+
+#include "rl.h"
+
+extern const AVRational ff_h263_pixel_aspect[16];
 
 /* intra MCBPC, mb_type = (intra), then (intraq) */
-const uint8_t ff_h263_intra_MCBPC_code[9] = { 1, 1, 2, 3, 1, 1, 2, 3, 1 };
-const uint8_t ff_h263_intra_MCBPC_bits[9] = { 1, 3, 3, 3, 4, 6, 6, 6, 9 };
+extern const uint8_t ff_h263_intra_MCBPC_code[9];
+extern const uint8_t ff_h263_intra_MCBPC_bits[9];
 
 /* inter MCBPC, mb_type = (inter), (intra), (interq), (intraq), (inter4v) */
 /* Changed the tables for interq and inter4v+q, following the standard ** Juanjo ** */
-const uint8_t ff_h263_inter_MCBPC_code[28] = {
-    1, 3, 2, 5,
-    3, 4, 3, 3,
-    3, 7, 6, 5,
-    4, 4, 3, 2,
-    2, 5, 4, 5,
-    1, 0, 0, 0, /* Stuffing */
-    2, 12, 14, 15,
-};
-const uint8_t ff_h263_inter_MCBPC_bits[28] = {
-    1, 4, 4, 6, /* inter  */
-    5, 8, 8, 7, /* intra  */
-    3, 7, 7, 9, /* interQ */
-    6, 9, 9, 9, /* intraQ */
-    3, 7, 7, 8, /* inter4 */
-    9, 0, 0, 0, /* Stuffing */
-    11, 13, 13, 13,/* inter4Q*/
-};
+extern const uint8_t ff_h263_inter_MCBPC_code[28];
+extern const uint8_t ff_h263_inter_MCBPC_bits[28];
 
-const uint8_t ff_h263_mbtype_b_tab[15][2] = {
- {1, 1},
- {3, 3},
- {1, 5},
- {4, 4},
- {5, 4},
- {6, 6},
- {2, 4},
- {3, 4},
- {7, 6},
- {4, 6},
- {5, 6},
- {1, 6},
- {1,10},
- {1, 7},
- {1, 8},
-};
+extern const uint8_t ff_h263_mbtype_b_tab[15][2];
 
-const uint8_t ff_cbpc_b_tab[4][2] = {
-{0, 1},
-{2, 2},
-{7, 3},
-{6, 3},
-};
+extern const uint8_t ff_cbpc_b_tab[4][2];
+extern const uint8_t ff_h263_cbpy_tab[16][2];
 
-const uint8_t ff_h263_cbpy_tab[16][2] =
-{
-  {3,4}, {5,5}, {4,5}, {9,4}, {3,5}, {7,4}, {2,6}, {11,4},
-  {2,5}, {3,6}, {5,4}, {10,4}, {4,4}, {8,4}, {6,4}, {3,2}
-};
-
-const uint8_t ff_mvtab[33][2] =
-{
-  {1,1}, {1,2}, {1,3}, {1,4}, {3,6}, {5,7}, {4,7}, {3,7},
-  {11,9}, {10,9}, {9,9}, {17,10}, {16,10}, {15,10}, {14,10}, {13,10},
-  {12,10}, {11,10}, {10,10}, {9,10}, {8,10}, {7,10}, {6,10}, {5,10},
-  {4,10}, {7,11}, {6,11}, {5,11}, {4,11}, {3,11}, {2,11}, {3,12},
-  {2,12}
-};
+extern const uint8_t ff_mvtab[33][2];
 
 /* third non intra table */
-const uint16_t ff_inter_vlc[103][2] = {
-{ 0x2, 2 },{ 0xf, 4 },{ 0x15, 6 },{ 0x17, 7 },
-{ 0x1f, 8 },{ 0x25, 9 },{ 0x24, 9 },{ 0x21, 10 },
-{ 0x20, 10 },{ 0x7, 11 },{ 0x6, 11 },{ 0x20, 11 },
-{ 0x6, 3 },{ 0x14, 6 },{ 0x1e, 8 },{ 0xf, 10 },
-{ 0x21, 11 },{ 0x50, 12 },{ 0xe, 4 },{ 0x1d, 8 },
-{ 0xe, 10 },{ 0x51, 12 },{ 0xd, 5 },{ 0x23, 9 },
-{ 0xd, 10 },{ 0xc, 5 },{ 0x22, 9 },{ 0x52, 12 },
-{ 0xb, 5 },{ 0xc, 10 },{ 0x53, 12 },{ 0x13, 6 },
-{ 0xb, 10 },{ 0x54, 12 },{ 0x12, 6 },{ 0xa, 10 },
-{ 0x11, 6 },{ 0x9, 10 },{ 0x10, 6 },{ 0x8, 10 },
-{ 0x16, 7 },{ 0x55, 12 },{ 0x15, 7 },{ 0x14, 7 },
-{ 0x1c, 8 },{ 0x1b, 8 },{ 0x21, 9 },{ 0x20, 9 },
-{ 0x1f, 9 },{ 0x1e, 9 },{ 0x1d, 9 },{ 0x1c, 9 },
-{ 0x1b, 9 },{ 0x1a, 9 },{ 0x22, 11 },{ 0x23, 11 },
-{ 0x56, 12 },{ 0x57, 12 },{ 0x7, 4 },{ 0x19, 9 },
-{ 0x5, 11 },{ 0xf, 6 },{ 0x4, 11 },{ 0xe, 6 },
-{ 0xd, 6 },{ 0xc, 6 },{ 0x13, 7 },{ 0x12, 7 },
-{ 0x11, 7 },{ 0x10, 7 },{ 0x1a, 8 },{ 0x19, 8 },
-{ 0x18, 8 },{ 0x17, 8 },{ 0x16, 8 },{ 0x15, 8 },
-{ 0x14, 8 },{ 0x13, 8 },{ 0x18, 9 },{ 0x17, 9 },
-{ 0x16, 9 },{ 0x15, 9 },{ 0x14, 9 },{ 0x13, 9 },
-{ 0x12, 9 },{ 0x11, 9 },{ 0x7, 10 },{ 0x6, 10 },
-{ 0x5, 10 },{ 0x4, 10 },{ 0x24, 11 },{ 0x25, 11 },
-{ 0x26, 11 },{ 0x27, 11 },{ 0x58, 12 },{ 0x59, 12 },
-{ 0x5a, 12 },{ 0x5b, 12 },{ 0x5c, 12 },{ 0x5d, 12 },
-{ 0x5e, 12 },{ 0x5f, 12 },{ 0x3, 7 },
-};
-
-const int8_t ff_inter_level[102] = {
-  1,  2,  3,  4,  5,  6,  7,  8,
-  9, 10, 11, 12,  1,  2,  3,  4,
-  5,  6,  1,  2,  3,  4,  1,  2,
-  3,  1,  2,  3,  1,  2,  3,  1,
-  2,  3,  1,  2,  1,  2,  1,  2,
-  1,  2,  1,  1,  1,  1,  1,  1,
-  1,  1,  1,  1,  1,  1,  1,  1,
-  1,  1,  1,  2,  3,  1,  2,  1,
-  1,  1,  1,  1,  1,  1,  1,  1,
-  1,  1,  1,  1,  1,  1,  1,  1,
-  1,  1,  1,  1,  1,  1,  1,  1,
-  1,  1,  1,  1,  1,  1,  1,  1,
-  1,  1,  1,  1,  1,  1,
-};
-
-const int8_t ff_inter_run[102] = {
-  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  1,  1,  1,  1,
-  1,  1,  2,  2,  2,  2,  3,  3,
-  3,  4,  4,  4,  5,  5,  5,  6,
-  6,  6,  7,  7,  8,  8,  9,  9,
- 10, 10, 11, 12, 13, 14, 15, 16,
- 17, 18, 19, 20, 21, 22, 23, 24,
- 25, 26,  0,  0,  0,  1,  1,  2,
-  3,  4,  5,  6,  7,  8,  9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18,
- 19, 20, 21, 22, 23, 24, 25, 26,
- 27, 28, 29, 30, 31, 32, 33, 34,
- 35, 36, 37, 38, 39, 40,
-};
-
-RLTable ff_h263_rl_inter = {
-    102,
-    58,
-    ff_inter_vlc,
-    ff_inter_run,
-    ff_inter_level,
-};
-
-static const uint16_t intra_vlc_aic[103][2] = {
-{  0x2,  2 }, {  0x6,  3 }, {  0xe,  4 }, {  0xc,  5 },
-{  0xd,  5 }, { 0x10,  6 }, { 0x11,  6 }, { 0x12,  6 },
-{ 0x16,  7 }, { 0x1b,  8 }, { 0x20,  9 }, { 0x21,  9 },
-{ 0x1a,  9 }, { 0x1b,  9 }, { 0x1c,  9 }, { 0x1d,  9 },
-{ 0x1e,  9 }, { 0x1f,  9 }, { 0x23, 11 }, { 0x22, 11 },
-{ 0x57, 12 }, { 0x56, 12 }, { 0x55, 12 }, { 0x54, 12 },
-{ 0x53, 12 }, {  0xf,  4 }, { 0x14,  6 }, { 0x14,  7 },
-{ 0x1e,  8 }, {  0xf, 10 }, { 0x21, 11 }, { 0x50, 12 },
-{  0xb,  5 }, { 0x15,  7 }, {  0xe, 10 }, {  0x9, 10 },
-{ 0x15,  6 }, { 0x1d,  8 }, {  0xd, 10 }, { 0x51, 12 },
-{ 0x13,  6 }, { 0x23,  9 }, {  0x7, 11 }, { 0x17,  7 },
-{ 0x22,  9 }, { 0x52, 12 }, { 0x1c,  8 }, {  0xc, 10 },
-{ 0x1f,  8 }, {  0xb, 10 }, { 0x25,  9 }, {  0xa, 10 },
-{ 0x24,  9 }, {  0x6, 11 }, { 0x21, 10 }, { 0x20, 10 },
-{  0x8, 10 }, { 0x20, 11 }, {  0x7,  4 }, {  0xc,  6 },
-{ 0x10,  7 }, { 0x13,  8 }, { 0x11,  9 }, { 0x12,  9 },
-{  0x4, 10 }, { 0x27, 11 }, { 0x26, 11 }, { 0x5f, 12 },
-{  0xf,  6 }, { 0x13,  9 }, {  0x5, 10 }, { 0x25, 11 },
-{  0xe,  6 }, { 0x14,  9 }, { 0x24, 11 }, {  0xd,  6 },
-{  0x6, 10 }, { 0x5e, 12 }, { 0x11,  7 }, {  0x7, 10 },
-{ 0x13,  7 }, { 0x5d, 12 }, { 0x12,  7 }, { 0x5c, 12 },
-{ 0x14,  8 }, { 0x5b, 12 }, { 0x15,  8 }, { 0x1a,  8 },
-{ 0x19,  8 }, { 0x18,  8 }, { 0x17,  8 }, { 0x16,  8 },
-{ 0x19,  9 }, { 0x15,  9 }, { 0x16,  9 }, { 0x18,  9 },
-{ 0x17,  9 }, {  0x4, 11 }, {  0x5, 11 }, { 0x58, 12 },
-{ 0x59, 12 }, { 0x5a, 12 }, {  0x3,  7 },
-};
-
-static const int8_t intra_run_aic[102] = {
- 0,  0,  0,  0,  0,  0,  0,  0,
- 0,  0,  0,  0,  0,  0,  0,  0,
- 0,  0,  0,  0,  0,  0,  0,  0,
- 0,  1,  1,  1,  1,  1,  1,  1,
- 2,  2,  2,  2,  3,  3,  3,  3,
- 4,  4,  4,  5,  5,  5,  6,  6,
- 7,  7,  8,  8,  9,  9, 10, 11,
-12, 13,  0,  0,  0,  0,  0,  0,
- 0,  0,  0,  0,  1,  1,  1,  1,
- 2,  2,  2,  3,  3,  3,  4,  4,
- 5,  5,  6,  6,  7,  7,  8,  9,
-10, 11, 12, 13, 14, 15, 16, 17,
-18, 19, 20, 21, 22, 23,
-};
-
-static const int8_t intra_level_aic[102] = {
- 1,  2,  3,  4,  5,  6,  7,  8,
- 9, 10, 11, 12, 13, 14, 15, 16,
-17, 18, 19, 20, 21, 22, 23, 24,
-25,  1,  2,  3,  4,  5,  6,  7,
- 1,  2,  3,  4,  1,  2,  3,  4,
- 1,  2,  3,  1,  2,  3,  1,  2,
- 1,  2,  1,  2,  1,  2,  1,  1,
- 1,  1,  1,  2,  3,  4,  5,  6,
- 7,  8,  9, 10,  1,  2,  3,  4,
- 1,  2,  3,  1,  2,  3,  1,  2,
- 1,  2,  1,  2,  1,  2,  1,  1,
- 1,  1,  1,  1,  1,  1,  1,  1,
- 1,  1,  1,  1,  1,  1,
-};
-
-RLTable ff_rl_intra_aic = {
-    102,
-    58,
-    intra_vlc_aic,
-    intra_run_aic,
-    intra_level_aic,
-};
+extern const uint16_t ff_inter_vlc[103][2];
 
-const uint16_t ff_h263_format[8][2] = {
-    { 0, 0 },
-    { 128, 96 },
-    { 176, 144 },
-    { 352, 288 },
-    { 704, 576 },
-    { 1408, 1152 },
-};
+extern const int8_t ff_inter_level[102];
+extern const int8_t ff_inter_run[102];
 
-const uint8_t ff_aic_dc_scale_table[32]={
-//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
-    0, 2, 4, 6, 8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62
-};
+extern RLTable ff_h263_rl_inter;
+extern RLTable ff_rl_intra_aic;
+extern uint8_t ff_h263_static_rl_table_store[2][2][2 * MAX_RUN + MAX_LEVEL + 3];
 
-const uint8_t ff_modified_quant_tab[2][32]={
-//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
-{
-    0, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9,10,11,12,13,14,15,16,17,18,18,19,20,21,22,23,24,25,26,27,28
-},{
-    0, 2, 3, 4, 5, 6, 7, 8, 9,10,11,13,14,15,16,17,18,19,20,21,22,24,25,26,27,28,29,30,31,31,31,26
-}
-};
+extern const uint16_t ff_h263_format[8][2];
 
-const uint8_t ff_h263_chroma_qscale_table[32]={
-//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
-    0, 1, 2, 3, 4, 5, 6, 6, 7, 8, 9, 9,10,10,11,11,12,12,12,13,13,13,14,14,14,14,14,15,15,15,15,15
-};
+extern const uint8_t ff_aic_dc_scale_table[32];
 
-const uint16_t ff_mba_max[6]={
-     47,  98, 395,1583,6335,9215
-};
+extern const uint8_t ff_modified_quant_tab[2][32];
 
-const uint8_t ff_mba_length[7]={
-      6,   7,   9,  11,  13,  14,  14
-};
+extern const uint8_t ff_h263_chroma_qscale_table[32];
 
-const AVRational ff_h263_pixel_aspect[16]={
- {0, 1},
- {1, 1},
- {12, 11},
- {10, 11},
- {16, 11},
- {40, 33},
- {0, 1},
- {0, 1},
- {0, 1},
- {0, 1},
- {0, 1},
- {0, 1},
- {0, 1},
- {0, 1},
- {0, 1},
- {0, 1},
-};
+extern const uint16_t ff_mba_max[6];
+extern const uint8_t ff_mba_length[7];
 
 #endif /* AVCODEC_H263DATA_H */
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index 15f073ae..628546bb 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -50,7 +50,7 @@ static enum AVPixelFormat h263_get_format(AVCodecContext *avctx)
     if (avctx->codec->id == AV_CODEC_ID_MSS2)
         return AV_PIX_FMT_YUV420P;
 
-    if (CONFIG_GRAY && (avctx->flags & CODEC_FLAG_GRAY)) {
+    if (CONFIG_GRAY && (avctx->flags & AV_CODEC_FLAG_GRAY)) {
         if (avctx->color_range == AVCOL_RANGE_UNSPECIFIED)
             avctx->color_range = AVCOL_RANGE_MPEG;
         return AV_PIX_FMT_GRAY8;
@@ -165,7 +165,7 @@ static int get_consumed_bytes(MpegEncContext *s, int buf_size)
         /* We would have to scan through the whole buf to handle the weird
          * reordering ... */
         return buf_size;
-    } else if (s->avctx->flags & CODEC_FLAG_TRUNCATED) {
+    } else if (s->avctx->flags & AV_CODEC_FLAG_TRUNCATED) {
         pos -= s->parse_context.last_index;
         // padding is not really read so this might be -1
         if (pos < 0)
@@ -249,8 +249,8 @@ static int decode_slice(MpegEncContext *s)
 
             s->mv_dir  = MV_DIR_FORWARD;
             s->mv_type = MV_TYPE_16X16;
-            ff_dlog(s, "%d %d %06X\n",
-                    ret, get_bits_count(&s->gb), show_bits(&s->gb, 24));
+            ff_dlog(s, "%d %06X\n",
+                    get_bits_count(&s->gb), show_bits(&s->gb, 24));
 
             ff_tlog(NULL, "Decoding MB at %dx%d\n", s->mb_x, s->mb_y);
             ret = s->decode_mb(s, s->block);
@@ -430,7 +430,7 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return 0;
     }
 
-    if (s->avctx->flags & CODEC_FLAG_TRUNCATED) {
+    if (s->avctx->flags & AV_CODEC_FLAG_TRUNCATED) {
         int next;
 
         if (CONFIG_MPEG4_DECODER && s->codec_id == AV_CODEC_ID_MPEG4) {
@@ -603,10 +603,12 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if (!s->divx_packed && !avctx->hwaccel)
         ff_thread_finish_setup(avctx);
 
-    if (CONFIG_MPEG4_VDPAU_DECODER && (s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)) {
+#if FF_API_CAP_VDPAU
+    if (CONFIG_MPEG4_VDPAU_DECODER && (s->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU)) {
         ff_vdpau_mpeg4_decode_picture(avctx->priv_data, s->gb.buffer, s->gb.buffer_end - s->gb.buffer);
         goto frame_end;
     }
+#endif
 
     if (avctx->hwaccel) {
         ret = avctx->hwaccel->start_frame(avctx, s->gb.buffer,
@@ -697,8 +699,8 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             int x, y, p;
             av_frame_make_writable(pict);
             for (p=0; p<3; p++) {
-                int w = FF_CEIL_RSHIFT(pict-> width, !!p);
-                int h = FF_CEIL_RSHIFT(pict->height, !!p);
+                int w = AV_CEIL_RSHIFT(pict-> width, !!p);
+                int h = AV_CEIL_RSHIFT(pict->height, !!p);
                 int linesize = pict->linesize[p];
                 for (y=0; y<(h>>1); y++)
                     for (x=0; x<w; x++)
@@ -718,10 +720,13 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
 const enum AVPixelFormat ff_h263_hwaccel_pixfmt_list_420[] = {
 #if CONFIG_H263_VAAPI_HWACCEL || CONFIG_MPEG4_VAAPI_HWACCEL
-    AV_PIX_FMT_VAAPI_VLD,
+    AV_PIX_FMT_VAAPI,
 #endif
-#if CONFIG_H263_VDPAU_HWACCEL || CONFIG_MPEG4_VDPAU_HWACCEL
+#if CONFIG_MPEG4_VDPAU_HWACCEL
     AV_PIX_FMT_VDPAU,
+#endif
+#if CONFIG_H263_VIDEOTOOLBOX_HWACCEL || CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL
+    AV_PIX_FMT_VIDEOTOOLBOX,
 #endif
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_NONE
@@ -736,8 +741,8 @@ AVCodec ff_h263_decoder = {
     .init           = ff_h263_decode_init,
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 |
-                      CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1 |
+                      AV_CODEC_CAP_TRUNCATED | AV_CODEC_CAP_DELAY,
     .flush          = ff_mpeg_flush,
     .max_lowres     = 3,
     .pix_fmts       = ff_h263_hwaccel_pixfmt_list_420,
@@ -752,8 +757,8 @@ AVCodec ff_h263p_decoder = {
     .init           = ff_h263_decode_init,
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 |
-                      CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1 |
+                      AV_CODEC_CAP_TRUNCATED | AV_CODEC_CAP_DELAY,
     .flush          = ff_mpeg_flush,
     .max_lowres     = 3,
     .pix_fmts       = ff_h263_hwaccel_pixfmt_list_420,
diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
index a70ff24f..b3c0bcd4 100644
--- a/libavcodec/h263dsp.c
+++ b/libavcodec/h263dsp.c
@@ -121,4 +121,6 @@ av_cold void ff_h263dsp_init(H263DSPContext *ctx)
 
     if (ARCH_X86)
         ff_h263dsp_init_x86(ctx);
+    if (ARCH_MIPS)
+        ff_h263dsp_init_mips(ctx);
 }
diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
index d2cc2ffe..1abea3ca 100644
--- a/libavcodec/h263dsp.h
+++ b/libavcodec/h263dsp.h
@@ -30,5 +30,6 @@ typedef struct H263DSPContext {
 
 void ff_h263dsp_init(H263DSPContext *ctx);
 void ff_h263dsp_init_x86(H263DSPContext *ctx);
+void ff_h263dsp_init_mips(H263DSPContext *ctx);
 
 #endif /* AVCODEC_H263DSP_H */
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 9a002142..88768af7 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -46,11 +46,14 @@
 #include "mathops.h"
 #include "me_cmp.h"
 #include "mpegutils.h"
+#include "profiles.h"
 #include "rectangle.h"
 #include "svq3.h"
 #include "thread.h"
 #include "vdpau_compat.h"
 
+static int h264_decode_end(AVCodecContext *avctx);
+
 const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 };
 
 int avpriv_h264_has_num_reorder_frames(AVCodecContext *avctx)
@@ -293,7 +296,7 @@ const uint8_t *ff_h264_decode_nal(H264Context *h, H264SliceContext *sl,
     if(i>=length-1){ //no escaped 0
         *dst_length= length;
         *consumed= length+1; //+1 for the header
-        if(h->avctx->flags2 & CODEC_FLAG2_FAST){
+        if(h->avctx->flags2 & AV_CODEC_FLAG2_FAST){
             return src;
         }else{
             memcpy(dst, src, length);
@@ -324,7 +327,7 @@ const uint8_t *ff_h264_decode_nal(H264Context *h, H264SliceContext *sl,
         dst[di++] = src[si++];
 
 nsc:
-    memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(dst + di, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
     *dst_length = di;
     *consumed   = si + 1; // +1 for the header
@@ -591,6 +594,9 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h)
     int i;
 
     h->avctx                 = avctx;
+    h->backup_width          = -1;
+    h->backup_height         = -1;
+    h->backup_pix_fmt        = AV_PIX_FMT_NONE;
     h->dequant_coeff_pps     = -1;
     h->current_sps_id        = -1;
     h->cur_chroma_format_idc = -1;
@@ -641,6 +647,8 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h)
     return 0;
 }
 
+static AVOnce h264_vlc_init = AV_ONCE_INIT;
+
 av_cold int ff_h264_decode_init(AVCodecContext *avctx)
 {
     H264Context *h = avctx->priv_data;
@@ -654,9 +662,11 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
     if (!avctx->has_b_frames)
         h->low_delay = 1;
 
-    ff_h264_decode_init_vlc();
-
-    ff_init_cabac_states();
+    ret = ff_thread_once(&h264_vlc_init, ff_h264_decode_init_vlc);
+    if (ret != 0) {
+        av_log(avctx, AV_LOG_ERROR, "pthread_once has failed.");
+        return AVERROR_UNKNOWN;
+    }
 
     if (avctx->codec_id == AV_CODEC_ID_H264) {
         if (avctx->ticks_per_frame == 1) {
@@ -671,7 +681,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
     if (avctx->extradata_size > 0 && avctx->extradata) {
         ret = ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size);
         if (ret < 0) {
-            ff_h264_free_context(h);
+            h264_decode_end(avctx);
             return ret;
         }
     }
@@ -698,6 +708,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+#if HAVE_THREADS
 static int decode_init_thread_copy(AVCodecContext *avctx)
 {
     H264Context *h = avctx->priv_data;
@@ -716,6 +727,7 @@ static int decode_init_thread_copy(AVCodecContext *avctx)
 
     return 0;
 }
+#endif
 
 /**
  * Run setup operations that must be run after slice header decoding.
@@ -800,7 +812,7 @@ static void decode_postinit(H264Context *h, int setup_finished)
         /* Derive top_field_first from field pocs. */
         cur->f->top_field_first = cur->field_poc[0] < cur->field_poc[1];
     } else {
-        if (cur->f->interlaced_frame || h->sps.pic_struct_present_flag) {
+        if (h->sps.pic_struct_present_flag) {
             /* Use picture timing SEI information. Even if it is a
              * information of a past frame, better than nothing. */
             if (h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM ||
@@ -808,6 +820,10 @@ static void decode_postinit(H264Context *h, int setup_finished)
                 cur->f->top_field_first = 1;
             else
                 cur->f->top_field_first = 0;
+        } else if (cur->f->interlaced_frame) {
+            /* Default to top field first when pic_struct_present_flag
+             * is not set but interlaced frame detected */
+            cur->f->top_field_first = 1;
         } else {
             /* Most likely progressive */
             cur->f->top_field_first = 0;
@@ -866,24 +882,38 @@ static void decode_postinit(H264Context *h, int setup_finished)
         }
     }
 
+    if (h->sei_reguserdata_afd_present) {
+        AVFrameSideData *sd = av_frame_new_side_data(cur->f, AV_FRAME_DATA_AFD,
+                                                     sizeof(uint8_t));
+
+        if (sd) {
+            *sd->data = h->active_format_description;
+            h->sei_reguserdata_afd_present = 0;
+        }
+    }
+
+    if (h->a53_caption) {
+        AVFrameSideData *sd = av_frame_new_side_data(cur->f,
+                                                     AV_FRAME_DATA_A53_CC,
+                                                     h->a53_caption_size);
+        if (sd)
+            memcpy(sd->data, h->a53_caption, h->a53_caption_size);
+        av_freep(&h->a53_caption);
+        h->a53_caption_size = 0;
+        h->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+    }
+
     cur->mmco_reset = h->mmco_reset;
     h->mmco_reset = 0;
 
     // FIXME do something with unavailable reference frames
 
     /* Sort B-frames into display order */
-
-    if (h->sps.bitstream_restriction_flag &&
-        h->avctx->has_b_frames < h->sps.num_reorder_frames) {
-        h->avctx->has_b_frames = h->sps.num_reorder_frames;
-        h->low_delay           = 0;
-    }
-
-    if (h->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT &&
-        !h->sps.bitstream_restriction_flag) {
-        h->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1;
-        h->low_delay           = 0;
+    if (h->sps.bitstream_restriction_flag ||
+        h->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT) {
+        h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, h->sps.num_reorder_frames);
     }
+    h->low_delay = !h->avctx->has_b_frames;
 
     for (i = 0; 1; i++) {
         if(i == MAX_DELAYED_PIC_COUNT || cur->poc < h->last_pocs[i]){
@@ -905,7 +935,7 @@ static void decode_postinit(H264Context *h, int setup_finished)
         h->last_pocs[0] = cur->poc;
         cur->mmco_reset = 1;
     } else if(h->avctx->has_b_frames < out_of_order && !h->sps.bitstream_restriction_flag){
-        av_log(h->avctx, AV_LOG_VERBOSE, "Increasing reorder buffer to %d\n", out_of_order);
+        av_log(h->avctx, AV_LOG_INFO, "Increasing reorder buffer to %d\n", out_of_order);
         h->avctx->has_b_frames = out_of_order;
         h->low_delay = 0;
     }
@@ -961,8 +991,12 @@ static void decode_postinit(H264Context *h, int setup_finished)
         h->next_output_pic->recovered |= !!(h->frame_recovered & FRAME_RECOVERED_SEI);
     }
 
-    if (setup_finished && !h->avctx->hwaccel)
+    if (setup_finished && !h->avctx->hwaccel) {
         ff_thread_finish_setup(h->avctx);
+
+        if (h->avctx->active_thread_type & FF_THREAD_FRAME)
+            h->setup_finished = 1;
+    }
 }
 
 int ff_pred_weight_table(H264Context *h, H264SliceContext *sl)
@@ -1334,7 +1368,7 @@ static int get_last_needed_nal(H264Context *h, const uint8_t *buf, int buf_size)
         case NAL_IDR_SLICE:
         case NAL_SLICE:
             init_get_bits(&gb, ptr, bit_length);
-            if (!get_ue_golomb(&gb) ||
+            if (!get_ue_golomb_long(&gb) ||  // first_mb_in_slice
                 !first_slice ||
                 first_slice != h->nal_unit_type)
                 nals_needed = nal_index;
@@ -1364,7 +1398,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
     if(!h->slice_context_count)
          h->slice_context_count= 1;
     h->max_contexts = h->slice_context_count;
-    if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS)) {
+    if (!(avctx->flags2 & AV_CODEC_FLAG2_CHUNKS)) {
         h->current_slice = 0;
         if (!h->first_field)
             h->cur_pic_ptr = NULL;
@@ -1518,8 +1552,6 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
                 // "recovered".
                 if (h->nal_unit_type == NAL_IDR_SLICE)
                     h->frame_recovered |= FRAME_RECOVERED_IDR;
-                h->frame_recovered |= 3*!!(avctx->flags2 & CODEC_FLAG2_SHOW_ALL);
-                h->frame_recovered |= 3*!!(avctx->flags & CODEC_FLAG_OUTPUT_CORRUPT);
 #if 1
                 h->cur_pic_ptr->recovered |= h->frame_recovered;
 #else
@@ -1527,15 +1559,17 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
 #endif
 
                 if (h->current_slice == 1) {
-                    if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS))
+                    if (!(avctx->flags2 & AV_CODEC_FLAG2_CHUNKS))
                         decode_postinit(h, nal_index >= nals_needed);
 
                     if (h->avctx->hwaccel &&
                         (ret = h->avctx->hwaccel->start_frame(h->avctx, buf, buf_size)) < 0)
                         goto end;
+#if FF_API_CAP_VDPAU
                     if (CONFIG_H264_VDPAU_DECODER &&
-                        h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
+                        h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU)
                         ff_vdpau_h264_picture_start(h);
+#endif
                 }
 
                 if (sl->redundant_pic_count == 0) {
@@ -1545,14 +1579,16 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
                                                            consumed);
                         if (ret < 0)
                             goto end;
+#if FF_API_CAP_VDPAU
                     } else if (CONFIG_H264_VDPAU_DECODER &&
-                               h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) {
+                               h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU) {
                         ff_vdpau_add_data_chunk(h->cur_pic_ptr->f->data[0],
                                                 start_code,
                                                 sizeof(start_code));
                         ff_vdpau_add_data_chunk(h->cur_pic_ptr->f->data[0],
                                                 &buf[buf_index - consumed],
                                                 consumed);
+#endif
                     } else
                         context_count++;
                 }
@@ -1642,6 +1678,47 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
 
     ret = 0;
 end:
+
+#if CONFIG_ERROR_RESILIENCE
+    sl = h->slice_ctx;
+    /*
+     * FIXME: Error handling code does not seem to support interlaced
+     * when slices span multiple rows
+     * The ff_er_add_slice calls don't work right for bottom
+     * fields; they cause massive erroneous error concealing
+     * Error marking covers both fields (top and bottom).
+     * This causes a mismatched s->error_count
+     * and a bad error table. Further, the error count goes to
+     * INT_MAX when called for bottom field, because mb_y is
+     * past end by one (callers fault) and resync_mb_y != 0
+     * causes problems for the first MB line, too.
+     */
+    if (!FIELD_PICTURE(h) && h->current_slice && !h->sps.new && h->enable_er) {
+        int use_last_pic = h->last_pic_for_ec.f->buf[0] && !sl->ref_count[0];
+
+        ff_h264_set_erpic(&sl->er.cur_pic, h->cur_pic_ptr);
+
+        if (use_last_pic) {
+            ff_h264_set_erpic(&sl->er.last_pic, &h->last_pic_for_ec);
+            sl->ref_list[0][0].parent = &h->last_pic_for_ec;
+            memcpy(sl->ref_list[0][0].data, h->last_pic_for_ec.f->data, sizeof(sl->ref_list[0][0].data));
+            memcpy(sl->ref_list[0][0].linesize, h->last_pic_for_ec.f->linesize, sizeof(sl->ref_list[0][0].linesize));
+            sl->ref_list[0][0].reference = h->last_pic_for_ec.reference;
+        } else if (sl->ref_count[0]) {
+            ff_h264_set_erpic(&sl->er.last_pic, sl->ref_list[0][0].parent);
+        } else
+            ff_h264_set_erpic(&sl->er.last_pic, NULL);
+
+        if (sl->ref_count[1])
+            ff_h264_set_erpic(&sl->er.next_pic, sl->ref_list[1][0].parent);
+
+        sl->er.ref_count = sl->ref_count[0];
+
+        ff_er_frame_end(&sl->er);
+        if (use_last_pic)
+            memset(&sl->ref_list[0][0], 0, sizeof(sl->ref_list[0][0]));
+    }
+#endif /* CONFIG_ERROR_RESILIENCE */
     /* clean up */
     if (h->cur_pic_ptr && !h->droppable) {
         ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
@@ -1675,6 +1752,14 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
 
     av_dict_set(&dst->metadata, "stereo_mode", ff_h264_sei_stereo_mode(h), 0);
 
+    h->backup_width   = h->avctx->width;
+    h->backup_height  = h->avctx->height;
+    h->backup_pix_fmt = h->avctx->pix_fmt;
+
+    h->avctx->width   = dst->width;
+    h->avctx->height  = dst->height;
+    h->avctx->pix_fmt = dst->format;
+
     if (srcp->sei_recovery_frame_cnt == 0)
         dst->key_frame = 1;
     if (!srcp->crop)
@@ -1696,7 +1781,7 @@ static int is_extra(const uint8_t *buf, int buf_size)
     const uint8_t *p= buf+6;
     while(cnt--){
         int nalsize= AV_RB16(p) + 2;
-        if(nalsize > buf_size - (p-buf) || p[2]!=0x67)
+        if(nalsize > buf_size - (p-buf) || (p[2] & 0x9F) != 7)
             return 0;
         p += nalsize;
     }
@@ -1705,7 +1790,7 @@ static int is_extra(const uint8_t *buf, int buf_size)
         return 0;
     while(cnt--){
         int nalsize= AV_RB16(p) + 2;
-        if(nalsize > buf_size - (p-buf) || p[2]!=0x68)
+        if(nalsize > buf_size - (p-buf) || (p[2] & 0x9F) != 8)
             return 0;
         p += nalsize;
     }
@@ -1725,6 +1810,20 @@ static int h264_decode_frame(AVCodecContext *avctx, void *data,
     int ret;
 
     h->flags = avctx->flags;
+    h->setup_finished = 0;
+
+    if (h->backup_width != -1) {
+        avctx->width    = h->backup_width;
+        h->backup_width = -1;
+    }
+    if (h->backup_height != -1) {
+        avctx->height    = h->backup_height;
+        h->backup_height = -1;
+    }
+    if (h->backup_pix_fmt != AV_PIX_FMT_NONE) {
+        avctx->pix_fmt    = h->backup_pix_fmt;
+        h->backup_pix_fmt = AV_PIX_FMT_NONE;
+    }
 
     ff_h264_unref_picture(h, &h->last_pic_for_ec);
 
@@ -1781,7 +1880,7 @@ static int h264_decode_frame(AVCodecContext *avctx, void *data,
         goto out;
     }
 
-    if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS) && !h->cur_pic_ptr) {
+    if (!(avctx->flags2 & AV_CODEC_FLAG2_CHUNKS) && !h->cur_pic_ptr) {
         if (avctx->skip_frame >= AVDISCARD_NONREF ||
             buf_size >= 4 && !memcmp("Q264", buf, 4))
             return buf_size;
@@ -1789,16 +1888,18 @@ static int h264_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_INVALIDDATA;
     }
 
-    if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS) ||
+    if (!(avctx->flags2 & AV_CODEC_FLAG2_CHUNKS) ||
         (h->mb_y >= h->mb_height && h->mb_height)) {
-        if (avctx->flags2 & CODEC_FLAG2_CHUNKS)
+        if (avctx->flags2 & AV_CODEC_FLAG2_CHUNKS)
             decode_postinit(h, 1);
 
-        ff_h264_field_end(h, &h->slice_ctx[0], 0);
+        if ((ret = ff_h264_field_end(h, &h->slice_ctx[0], 0)) < 0)
+            return ret;
 
         /* Wait for second field. */
         *got_frame = 0;
-        if (h->next_output_pic && (
+        if (h->next_output_pic && ((avctx->flags & AV_CODEC_FLAG_OUTPUT_CORRUPT) ||
+                                   (avctx->flags2 & AV_CODEC_FLAG2_SHOW_ALL) ||
                                    h->next_output_pic->recovered)) {
             if (!h->next_output_pic->recovered)
                 h->next_output_pic->f->flags |= AV_FRAME_FLAG_CORRUPT;
@@ -1867,6 +1968,9 @@ av_cold void ff_h264_free_context(H264Context *h)
     av_freep(&h->slice_ctx);
     h->nb_slice_ctx = 0;
 
+    h->a53_caption_size = 0;
+    av_freep(&h->a53_caption);
+
     for (i = 0; i < MAX_SPS_COUNT; i++)
         av_freep(h->sps_buffers + i);
 
@@ -1892,9 +1996,9 @@ static av_cold int h264_decode_end(AVCodecContext *avctx)
 #define OFFSET(x) offsetof(H264Context, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption h264_options[] = {
-    {"is_avc", "is avc", offsetof(H264Context, is_avc), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 1, 0},
-    {"nal_length_size", "nal_length_size", offsetof(H264Context, nal_length_size), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 4, 0},
-    { "enable_er", "Enable error resilience on damaged frames (unsafe)", OFFSET(enable_er), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VD },
+    {"is_avc", "is avc", offsetof(H264Context, is_avc), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, 0},
+    {"nal_length_size", "nal_length_size", offsetof(H264Context, nal_length_size), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 4, 0},
+    { "enable_er", "Enable error resilience on damaged frames (unsafe)", OFFSET(enable_er), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VD },
     { NULL },
 };
 
@@ -1905,23 +2009,6 @@ static const AVClass h264_class = {
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
-static const AVProfile profiles[] = {
-    { FF_PROFILE_H264_BASELINE,             "Baseline"              },
-    { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
-    { FF_PROFILE_H264_MAIN,                 "Main"                  },
-    { FF_PROFILE_H264_EXTENDED,             "Extended"              },
-    { FF_PROFILE_H264_HIGH,                 "High"                  },
-    { FF_PROFILE_H264_HIGH_10,              "High 10"               },
-    { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
-    { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
-    { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
-    { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
-    { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
-    { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
-    { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
-    { FF_PROFILE_UNKNOWN },
-};
-
 AVCodec ff_h264_decoder = {
     .name                  = "h264",
     .long_name             = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
@@ -1931,17 +2018,18 @@ AVCodec ff_h264_decoder = {
     .init                  = ff_h264_decode_init,
     .close                 = h264_decode_end,
     .decode                = h264_decode_frame,
-    .capabilities          = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 |
-                             CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS |
-                             CODEC_CAP_FRAME_THREADS,
+    .capabilities          = /*AV_CODEC_CAP_DRAW_HORIZ_BAND |*/ AV_CODEC_CAP_DR1 |
+                             AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS |
+                             AV_CODEC_CAP_FRAME_THREADS,
+    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE,
     .flush                 = flush_dpb,
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(ff_h264_update_thread_context),
-    .profiles              = NULL_IF_CONFIG_SMALL(profiles),
+    .profiles              = NULL_IF_CONFIG_SMALL(ff_h264_profiles),
     .priv_class            = &h264_class,
 };
 
-#if CONFIG_H264_VDPAU_DECODER
+#if CONFIG_H264_VDPAU_DECODER && FF_API_VDPAU
 static const AVClass h264_vdpau_class = {
     .class_name = "H264 VDPAU Decoder",
     .item_name  = av_default_item_name,
@@ -1958,11 +2046,11 @@ AVCodec ff_h264_vdpau_decoder = {
     .init           = ff_h264_decode_init,
     .close          = h264_decode_end,
     .decode         = h264_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HWACCEL_VDPAU,
     .flush          = flush_dpb,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_VDPAU_H264,
                                                      AV_PIX_FMT_NONE},
-    .profiles       = NULL_IF_CONFIG_SMALL(profiles),
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_h264_profiles),
     .priv_class     = &h264_vdpau_class,
 };
 #endif
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 95db9124..a5fc3a05 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -29,6 +29,7 @@
 #define AVCODEC_H264_H
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/thread.h"
 #include "cabac.h"
 #include "error_resilience.h"
 #include "get_bits.h"
@@ -132,11 +133,12 @@ enum {
 typedef enum {
     SEI_TYPE_BUFFERING_PERIOD       = 0,   ///< buffering period (H.264, D.1.1)
     SEI_TYPE_PIC_TIMING             = 1,   ///< picture timing
-    SEI_TYPE_USER_DATA_ITU_T_T35    = 4,   ///< user data registered by ITU-T Recommendation T.35
+    SEI_TYPE_USER_DATA_REGISTERED   = 4,   ///< registered user data as specified by Rec. ITU-T T.35
     SEI_TYPE_USER_DATA_UNREGISTERED = 5,   ///< unregistered user data
     SEI_TYPE_RECOVERY_POINT         = 6,   ///< recovery point (frame # to decoder sync)
     SEI_TYPE_FRAME_PACKING          = 45,  ///< frame packing arrangement
     SEI_TYPE_DISPLAY_ORIENTATION    = 47,  ///< display orientation
+    SEI_TYPE_GREEN_METADATA         = 56   ///< GreenMPEG information
 } SEI_Type;
 
 /**
@@ -228,6 +230,8 @@ typedef struct SPS {
     int residual_color_transform_flag;    ///< residual_colour_transform_flag
     int constraint_set_flags;             ///< constraint_set[0-3]_flag
     int new;                              ///< flag to keep track if the decoder context needs re-init due to changed SPS
+    uint8_t data[4096];
+    size_t data_size;
 } SPS;
 
 /**
@@ -253,6 +257,8 @@ typedef struct PPS {
     uint8_t scaling_matrix8[6][64];
     uint8_t chroma_qp_table[2][QP_MAX_NUM+1];  ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
     int chroma_qp_diff;
+    uint8_t data[4096];
+    size_t data_size;
 } PPS;
 
 /**
@@ -267,6 +273,22 @@ typedef struct FPA {
     int         quincunx_sampling_flag;
 } FPA;
 
+/**
+ *     Green MetaData Information Type
+ */
+typedef struct GreenMetaData {
+    uint8_t  green_metadata_type;
+    uint8_t  period_type;
+    uint16_t  num_seconds;
+    uint16_t  num_pictures;
+    uint8_t percent_non_zero_macroblocks;
+    uint8_t percent_intra_coded_macroblocks;
+    uint8_t percent_six_tap_filtering;
+    uint8_t percent_alpha_point_deblocking_instance;
+    uint8_t xsd_metric_type;
+    uint16_t xsd_metric_value;
+} GreenMetaData;
+
 /**
  * Memory management control operation opcode.
  */
@@ -409,7 +431,8 @@ typedef struct H264SliceContext {
     int mb_xy;
     int resync_mb_x;
     int resync_mb_y;
-    int mb_index_end;
+    // index of the first MB of the next slice
+    int next_slice_idx;
     int mb_skip_run;
     int is_complex;
 
@@ -519,6 +542,14 @@ typedef struct H264Context {
     int width, height;
     int chroma_x_shift, chroma_y_shift;
 
+    /**
+     * Backup frame properties: needed, because they can be different
+     * between returned frame and last decoded frame.
+     **/
+    int backup_width;
+    int backup_height;
+    enum AVPixelFormat backup_pix_fmt;
+
     int droppable;
     int coded_picture_number;
     int low_delay;
@@ -638,7 +669,7 @@ typedef struct H264Context {
      */
     int max_pic_num;
 
-    H264Ref default_ref_list[2][32]; ///< base reference list for all slices of a coded picture
+    H264Ref default_ref[2];
     H264Picture *short_ref[32];
     H264Picture *long_ref[32];
     H264Picture *delayed_pic[MAX_DELAYED_PIC_COUNT + 2]; // FIXME size?
@@ -683,8 +714,6 @@ typedef struct H264Context {
 
     enum AVPictureType pict_type;
 
-    int last_slice_type;
-    unsigned int last_ref_count[2];
     /** @} */
 
     /**
@@ -715,6 +744,14 @@ typedef struct H264Context {
     int sei_anticlockwise_rotation;
     int sei_hflip, sei_vflip;
 
+    /**
+     * User data registered by Rec. ITU-T T.35 SEI
+     */
+    int sei_reguserdata_afd_present;
+    uint8_t active_format_description;
+    int a53_caption_size;
+    uint8_t *a53_caption;
+
     /**
      * Bit set of clock types for fields/frames in picture timing SEI message.
      * For each found ct_type, appropriate bit is set (e.g., bit 1 for
@@ -773,6 +810,11 @@ typedef struct H264Context {
 
     int missing_fields;
 
+/* for frame threading, this is set to 1
+     * after finish_setup() has been called, so we cannot modify
+     * some context properties (which are supposed to stay constant between
+     * slices) anymore */
+    int setup_finished;
 
     // Timestamp stuff
     int sei_buffering_period_present;   ///< Buffering period SEI flag
@@ -796,6 +838,10 @@ typedef struct H264Context {
     /* Motion Estimation */
     qpel_mc_func (*qpel_put)[16];
     qpel_mc_func (*qpel_avg)[16];
+
+    /*Green Metadata */
+    GreenMetaData sei_green_metadata;
+
 } H264Context;
 
 extern const uint8_t ff_h264_chroma_qp[7][QP_MAX_NUM + 1]; ///< One chroma qp table for each possible bit depth (8-14).
@@ -849,11 +895,6 @@ int ff_h264_get_slice_type(const H264SliceContext *sl);
  */
 int ff_h264_alloc_tables(H264Context *h);
 
-/**
- * Fill the default_ref_list.
- */
-int ff_h264_fill_default_ref_list(H264Context *h, H264SliceContext *sl);
-
 int ff_h264_decode_ref_pic_list_reordering(H264Context *h, H264SliceContext *sl);
 void ff_h264_fill_mbaff_ref_list(H264Context *h, H264SliceContext *sl);
 void ff_h264_remove_all_refs(H264Context *h);
@@ -1148,15 +1189,17 @@ static inline int get_avc_nalsize(H264Context *h, const uint8_t *buf,
 {
     int i, nalsize = 0;
 
-    if (*buf_index >= buf_size - h->nal_length_size)
-        return -1;
+    if (*buf_index >= buf_size - h->nal_length_size) {
+        // the end of the buffer is reached, refill it.
+        return AVERROR(EAGAIN);
+    }
 
     for (i = 0; i < h->nal_length_size; i++)
         nalsize = ((unsigned)nalsize << 8) | buf[(*buf_index)++];
     if (nalsize <= 0 || nalsize > buf_size - *buf_index) {
         av_log(h->avctx, AV_LOG_ERROR,
                "AVC: nal size %d\n", nalsize);
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
     return nalsize;
 }
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index c1c8b808..deab35a3 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -1540,8 +1540,12 @@ static int decode_cabac_mb_mvd(H264SliceContext *sl, int ctxbase, int amvd, int
     int amvd1 = sl->mvd_cache[list][scan8[n] - 1][1] +\
                 sl->mvd_cache[list][scan8[n] - 8][1];\
 \
-    mx += decode_cabac_mb_mvd(sl, 40, amvd0, &mpx);\
-    my += decode_cabac_mb_mvd(sl, 47, amvd1, &mpy);\
+    int mxd = decode_cabac_mb_mvd(sl, 40, amvd0, &mpx);\
+    int myd = decode_cabac_mb_mvd(sl, 47, amvd1, &mpy);\
+    if (mxd == INT_MIN || myd == INT_MIN) \
+        return AVERROR_INVALIDDATA; \
+    mx += mxd;\
+    my += myd;\
 }
 
 static av_always_inline int get_cabac_cbf_ctx(H264SliceContext *sl,
@@ -2026,6 +2030,7 @@ int ff_h264_decode_mb_cabac(const H264Context *h, H264SliceContext *sl)
         const int mb_size = ff_h264_mb_sizes[h->sps.chroma_format_idc] *
                             h->sps.bit_depth_luma >> 3;
         const uint8_t *ptr;
+        int ret;
 
         // We assume these blocks are very rare so we do not optimize it.
         // FIXME The two following lines get the bitstream position in the cabac
@@ -2042,7 +2047,9 @@ int ff_h264_decode_mb_cabac(const H264Context *h, H264SliceContext *sl)
         sl->intra_pcm_ptr = ptr;
         ptr += mb_size;
 
-        ff_init_cabac_decoder(&sl->cabac, ptr, sl->cabac.bytestream_end - ptr);
+        ret = ff_init_cabac_decoder(&sl->cabac, ptr, sl->cabac.bytestream_end - ptr);
+        if (ret < 0)
+            return ret;
 
         // All blocks are present
         h->cbp_table[mb_xy] = 0xf7ef;
diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c
index 5756a7ba..5f66a67d 100644
--- a/libavcodec/h264_direct.c
+++ b/libavcodec/h264_direct.c
@@ -137,6 +137,10 @@ void ff_h264_direct_ref_list_init(const H264Context *const h, H264SliceContext *
     if (h->picture_structure == PICT_FRAME) {
         int cur_poc  = h->cur_pic_ptr->poc;
         int *col_poc = sl->ref_list[1][0].parent->field_poc;
+        if (col_poc[0] == INT_MAX && col_poc[1] == INT_MAX) {
+            av_log(h->avctx, AV_LOG_ERROR, "co located POCs unavailable\n");
+            sl->col_parity = 1;
+        } else
         sl->col_parity = (FFABS(col_poc[0] - cur_poc) >=
                           FFABS(col_poc[1] - cur_poc));
         ref1sidx =
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index cb911341..00149272 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -242,7 +242,7 @@ static av_always_inline void h264_filter_mb_fast_internal(const H264Context *h,
                                                           unsigned int uvlinesize,
                                                           int pixel_shift)
 {
-    int chroma = CHROMA(h) && !(CONFIG_GRAY && (h->flags&CODEC_FLAG_GRAY));
+    int chroma = CHROMA(h) && !(CONFIG_GRAY && (h->flags & AV_CODEC_FLAG_GRAY));
     int chroma444 = CHROMA444(h);
     int chroma422 = CHROMA422(h);
 
@@ -723,7 +723,7 @@ void ff_h264_filter_mb(const H264Context *h, H264SliceContext *sl,
     const int mb_type = h->cur_pic.mb_type[mb_xy];
     const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
     int first_vertical_edge_done = 0;
-    int chroma = CHROMA(h) && !(CONFIG_GRAY && (h->flags&CODEC_FLAG_GRAY));
+    int chroma = CHROMA(h) && !(CONFIG_GRAY && (h->flags & AV_CODEC_FLAG_GRAY));
     int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
     int a = 52 + sl->slice_alpha_c0_offset - qp_bd_offset;
     int b = 52 + sl->slice_beta_offset - qp_bd_offset;
diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index 634d8144..8302de04 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -250,7 +250,7 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext
     if (!square)
         qpix_op[luma_xy](dest_y + delta, src_y + delta, sl->mb_linesize);
 
-    if (CONFIG_GRAY && h->flags & CODEC_FLAG_GRAY)
+    if (CONFIG_GRAY && h->flags & AV_CODEC_FLAG_GRAY)
         return;
 
     if (chroma_idc == 3 /* yuv444 */) {
@@ -425,7 +425,7 @@ static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceCon
             int weight1 = 64 - weight0;
             luma_weight_avg(dest_y, tmp_y, sl->mb_linesize,
                             height, 5, weight0, weight1, 0);
-            if (!CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
+            if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
                 chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize,
                                   chroma_height, 5, weight0, weight1, 0);
                 chroma_weight_avg(dest_cr, tmp_cr, sl->mb_uvlinesize,
@@ -438,7 +438,7 @@ static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceCon
                             sl->luma_weight[refn1][1][0],
                             sl->luma_weight[refn0][0][1] +
                             sl->luma_weight[refn1][1][1]);
-            if (!CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
+            if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
                 chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize, chroma_height,
                                   sl->chroma_log2_weight_denom,
                                   sl->chroma_weight[refn0][0][0][0],
@@ -465,7 +465,7 @@ static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceCon
                        sl->luma_log2_weight_denom,
                        sl->luma_weight[refn][list][0],
                        sl->luma_weight[refn][list][1]);
-        if (!CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
+        if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
             if (sl->use_weight_chroma) {
                 chroma_weight_op(dest_cb, sl->mb_uvlinesize, chroma_height,
                                  sl->chroma_log2_weight_denom,
@@ -566,7 +566,7 @@ static av_always_inline void xchg_mb_border(const H264Context *h, H264SliceConte
             XCHG(sl->top_borders[top_idx][sl->mb_x + 1],
                  src_y + (17 << pixel_shift), 1);
         }
-        if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
+        if (simple || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
             if (chroma444) {
                 if (deblock_topleft) {
                     XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c
index bc68a738..54420b90 100644
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -112,7 +112,7 @@ static av_noinline void FUNC(hl_decode_mb)(const H264Context *h, H264SliceContex
                 for (j = 0; j < 16; j++)
                     tmp_y[j] = get_bits(&gb, bit_depth);
             }
-            if (SIMPLE || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
+            if (SIMPLE || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
                 if (!h->sps.chroma_format_idc) {
                     for (i = 0; i < block_h; i++) {
                         uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
@@ -137,7 +137,7 @@ static av_noinline void FUNC(hl_decode_mb)(const H264Context *h, H264SliceContex
         } else {
             for (i = 0; i < 16; i++)
                 memcpy(dest_y + i * linesize, sl->intra_pcm_ptr + i * 16, 16);
-            if (SIMPLE || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
+            if (SIMPLE || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
                 if (!h->sps.chroma_format_idc) {
                     for (i = 0; i < 8; i++) {
                         memset(dest_cb + i * uvlinesize, 1 << (bit_depth - 1), 8);
@@ -159,7 +159,7 @@ static av_noinline void FUNC(hl_decode_mb)(const H264Context *h, H264SliceContex
                 xchg_mb_border(h, sl, dest_y, dest_cb, dest_cr, linesize,
                                uvlinesize, 1, 0, SIMPLE, PIXEL_SHIFT);
 
-            if (SIMPLE || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
+            if (SIMPLE || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
                 h->hpc.pred8x8[sl->chroma_pred_mode](dest_cb, uvlinesize);
                 h->hpc.pred8x8[sl->chroma_pred_mode](dest_cr, uvlinesize);
             }
@@ -190,7 +190,7 @@ static av_noinline void FUNC(hl_decode_mb)(const H264Context *h, H264SliceContex
         hl_decode_mb_idct_luma(h, sl, mb_type, is_h264, SIMPLE, transform_bypass,
                                PIXEL_SHIFT, block_offset, linesize, dest_y, 0);
 
-        if ((SIMPLE || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) &&
+        if ((SIMPLE || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) &&
             (sl->cbp & 0x30)) {
             uint8_t *dest[2] = { dest_cb, dest_cr };
             if (transform_bypass) {
@@ -280,7 +280,7 @@ static av_noinline void FUNC(hl_decode_mb_444)(const H264Context *h, H264SliceCo
     int i, j, p;
     const int *block_offset = &h->block_offset[0];
     const int transform_bypass = !SIMPLE && (sl->qscale == 0 && h->sps.transform_bypass);
-    const int plane_count      = (SIMPLE || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) ? 3 : 1;
+    const int plane_count      = (SIMPLE || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) ? 3 : 1;
 
     for (p = 0; p < plane_count; p++) {
         dest[p] = h->cur_pic.f->data[p] +
diff --git a/libavcodec/h264_mc_template.c b/libavcodec/h264_mc_template.c
index eaead35b..e4333a73 100644
--- a/libavcodec/h264_mc_template.c
+++ b/libavcodec/h264_mc_template.c
@@ -158,6 +158,7 @@ static void MCFUNC(hl_motion)(const H264Context *h, H264SliceContext *sl,
         }
     }
 
-    prefetch_motion(h, sl, 1, PIXEL_SHIFT, CHROMA_IDC);
+    if (USES_LIST(mb_type, 1))
+        prefetch_motion(h, sl, 1, PIXEL_SHIFT, CHROMA_IDC);
 }
 
diff --git a/libavcodec/h264_mp4toannexb_bsf.c b/libavcodec/h264_mp4toannexb_bsf.c
index ae96ee95..2d447f75 100644
--- a/libavcodec/h264_mp4toannexb_bsf.c
+++ b/libavcodec/h264_mp4toannexb_bsf.c
@@ -33,6 +33,18 @@ typedef struct H264BSFContext {
     uint8_t  idr_sps_seen;
     uint8_t  idr_pps_seen;
     int      extradata_parsed;
+
+    /* When private_spspps is zero then spspps_buf points to global extradata
+       and bsf does replace a global extradata to own-allocated version (default
+       behaviour).
+       When private_spspps is non-zero the bsf uses a private version of spspps buf.
+       This mode necessary when bsf uses in decoder, else bsf has issues after
+       decoder re-initialization. Use the "private_spspps_buf" argument to
+       activate this mode.
+     */
+    int      private_spspps;
+    uint8_t *spspps_buf;
+    uint32_t spspps_size;
 } H264BSFContext;
 
 static int alloc_and_copy(uint8_t **poutbuf, int *poutbuf_size,
@@ -45,7 +57,7 @@ static int alloc_and_copy(uint8_t **poutbuf, int *poutbuf_size,
 
     *poutbuf_size += sps_pps_size + in_size + nal_header_size;
     if ((err = av_reallocp(poutbuf,
-                           *poutbuf_size + FF_INPUT_BUFFER_PADDING_SIZE)) < 0) {
+                           *poutbuf_size + AV_INPUT_BUFFER_PADDING_SIZE)) < 0) {
         *poutbuf_size = 0;
         return err;
     }
@@ -110,7 +122,7 @@ static int h264_extradata_to_annexb(H264BSFContext *ctx, AVCodecContext *avctx,
         if (!unit_nb && !sps_done++) {
             unit_nb = *extradata++; /* number of pps unit(s) */
             if (unit_nb) {
-                ctx->pps_offset = (extradata - 1) - (avctx->extradata + 4);
+                ctx->pps_offset = total_size;
                 pps_seen = 1;
             }
         }
@@ -129,9 +141,13 @@ static int h264_extradata_to_annexb(H264BSFContext *ctx, AVCodecContext *avctx,
                "Warning: PPS NALU missing or invalid. "
                "The resulting stream may not play.\n");
 
-    av_free(avctx->extradata);
-    avctx->extradata      = out;
-    avctx->extradata_size = total_size;
+    if (!ctx->private_spspps) {
+        av_free(avctx->extradata);
+        avctx->extradata      = out;
+        avctx->extradata_size = total_size;
+    }
+    ctx->spspps_buf  = out;
+    ctx->spspps_size = total_size;
 
     return length_size;
 }
@@ -159,7 +175,10 @@ static int h264_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
 
     /* retrieve sps and pps NAL units from extradata */
     if (!ctx->extradata_parsed) {
-        ret = h264_extradata_to_annexb(ctx, avctx, FF_INPUT_BUFFER_PADDING_SIZE);
+        if (args && strstr(args, "private_spspps_buf"))
+            ctx->private_spspps = 1;
+
+        ret = h264_extradata_to_annexb(ctx, avctx, AV_INPUT_BUFFER_PADDING_SIZE);
         if (ret < 0)
             return ret;
         ctx->length_size      = ret;
@@ -182,7 +201,7 @@ static int h264_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
         buf      += ctx->length_size;
         unit_type = *buf & 0x1f;
 
-        if (buf + nal_size > buf_end || nal_size < 0)
+        if (nal_size > buf_end - buf || nal_size < 0)
             goto fail;
 
         if (unit_type == 7)
@@ -195,8 +214,8 @@ static int h264_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
                     av_log(avctx, AV_LOG_WARNING, "SPS not present in the stream, nor in AVCC, stream may be unreadable\n");
                 else {
                     if ((ret = alloc_and_copy(poutbuf, poutbuf_size,
-                                         avctx->extradata + ctx->sps_offset,
-                                         ctx->pps_offset != -1 ? ctx->pps_offset : avctx->extradata_size - ctx->sps_offset,
+                                         ctx->spspps_buf + ctx->sps_offset,
+                                         ctx->pps_offset != -1 ? ctx->pps_offset : ctx->spspps_size - ctx->sps_offset,
                                          buf, nal_size)) < 0)
                         goto fail;
                     ctx->idr_sps_seen = 1;
@@ -214,7 +233,7 @@ static int h264_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
         /* prepend only to the first type 5 NAL unit of an IDR picture, if no sps/pps are already present */
         if (ctx->new_idr && unit_type == 5 && !ctx->idr_sps_seen && !ctx->idr_pps_seen) {
             if ((ret=alloc_and_copy(poutbuf, poutbuf_size,
-                               avctx->extradata, avctx->extradata_size,
+                               ctx->spspps_buf, ctx->spspps_size,
                                buf, nal_size)) < 0)
                 goto fail;
             ctx->new_idr = 0;
@@ -226,7 +245,7 @@ static int h264_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
                                      NULL, 0, buf, nal_size)) < 0)
                     goto fail;
             } else if ((ret = alloc_and_copy(poutbuf, poutbuf_size,
-                                        avctx->extradata + ctx->pps_offset, avctx->extradata_size - ctx->pps_offset,
+                                        ctx->spspps_buf + ctx->pps_offset, ctx->spspps_size - ctx->pps_offset,
                                         buf, nal_size)) < 0)
                 goto fail;
         } else {
@@ -253,8 +272,16 @@ static int h264_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
     return ret;
 }
 
+static void h264_mp4toannexb_filter_close(AVBitStreamFilterContext *bsfc)
+{
+    H264BSFContext *ctx = bsfc->priv_data;
+    if (ctx->private_spspps)
+        av_freep(&ctx->spspps_buf);
+}
+
 AVBitStreamFilter ff_h264_mp4toannexb_bsf = {
     .name           = "h264_mp4toannexb",
     .priv_data_size = sizeof(H264BSFContext),
     .filter         = h264_mp4toannexb_filter,
+    .close          = h264_mp4toannexb_filter_close,
 };
diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h
index 57fa9b90..763746cc 100644
--- a/libavcodec/h264_mvpred.h
+++ b/libavcodec/h264_mvpred.h
@@ -771,7 +771,7 @@ static void fill_decode_caches(const H264Context *h, H264SliceContext *sl, int m
 
 #define MAP_F2F(idx, mb_type)                                           \
     if (!IS_INTERLACED(mb_type) && sl->ref_cache[list][idx] >= 0) {     \
-        sl->ref_cache[list][idx]    <<= 1;                              \
+        sl->ref_cache[list][idx]     *= 2;                              \
         sl->mv_cache[list][idx][1]   /= 2;                              \
         sl->mvd_cache[list][idx][1] >>= 1;                              \
     }
@@ -783,7 +783,7 @@ static void fill_decode_caches(const H264Context *h, H264SliceContext *sl, int m
 #define MAP_F2F(idx, mb_type)                                           \
     if (IS_INTERLACED(mb_type) && sl->ref_cache[list][idx] >= 0) {      \
         sl->ref_cache[list][idx]    >>= 1;                              \
-        sl->mv_cache[list][idx][1]  <<= 1;                              \
+        sl->mv_cache[list][idx][1]   *= 2;                              \
         sl->mvd_cache[list][idx][1] <<= 1;                              \
     }
 
diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index 19d1aa3f..12d6397f 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -152,7 +152,7 @@ static int scan_mmco_reset(AVCodecParserContext *s)
                     unsigned int reordering_of_pic_nums_idc = get_ue_golomb_31(&sl->gb);
 
                     if (reordering_of_pic_nums_idc < 3)
-                        get_ue_golomb(&sl->gb);
+                        get_ue_golomb_long(&sl->gb);
                     else if (reordering_of_pic_nums_idc > 3) {
                         av_log(h->avctx, AV_LOG_ERROR,
                                "illegal reordering_of_pic_nums_idc %d\n",
@@ -191,7 +191,7 @@ static int scan_mmco_reset(AVCodecParserContext *s)
                 return 1;
 
             if (opcode == MMCO_SHORT2UNUSED || opcode == MMCO_SHORT2LONG)
-                get_ue_golomb(&sl->gb);
+                get_ue_golomb_long(&sl->gb); // difference_of_pic_nums_minus1
             if (opcode == MMCO_SHORT2LONG || opcode == MMCO_LONG2UNUSED ||
                 opcode == MMCO_LONG || opcode == MMCO_SET_MAX_LONG)
                 get_ue_golomb_31(&sl->gb);
@@ -373,7 +373,7 @@ static inline int parse_nal_units(AVCodecParserContext *s,
             }
 
             if (h->nal_unit_type == NAL_IDR_SLICE)
-                get_ue_golomb(&sl->gb); /* idr_pic_id */
+                get_ue_golomb_long(&sl->gb); /* idr_pic_id */
             if (h->sps.poc_type == 0) {
                 h->poc_lsb = get_bits(&sl->gb, h->sps.log2_max_poc_lsb);
 
diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index bf1471de..c4b17c03 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -157,9 +157,11 @@ int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
     int err = 0;
     h->mb_y = 0;
 
+#if FF_API_CAP_VDPAU
     if (CONFIG_H264_VDPAU_DECODER &&
-        h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
+        h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU)
         ff_vdpau_h264_set_reference_frames(h);
+#endif
 
     if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) {
         if (!h->droppable) {
@@ -172,55 +174,17 @@ int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
     }
 
     if (avctx->hwaccel) {
-        if (avctx->hwaccel->end_frame(avctx) < 0)
+        err = avctx->hwaccel->end_frame(avctx);
+        if (err < 0)
             av_log(avctx, AV_LOG_ERROR,
                    "hardware accelerator failed to decode picture\n");
     }
 
+#if FF_API_CAP_VDPAU
     if (CONFIG_H264_VDPAU_DECODER &&
-        h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
+        h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU)
         ff_vdpau_h264_picture_complete(h);
-
-#if CONFIG_ERROR_RESILIENCE
-    av_assert0(sl == h->slice_ctx);
-    /*
-     * FIXME: Error handling code does not seem to support interlaced
-     * when slices span multiple rows
-     * The ff_er_add_slice calls don't work right for bottom
-     * fields; they cause massive erroneous error concealing
-     * Error marking covers both fields (top and bottom).
-     * This causes a mismatched s->error_count
-     * and a bad error table. Further, the error count goes to
-     * INT_MAX when called for bottom field, because mb_y is
-     * past end by one (callers fault) and resync_mb_y != 0
-     * causes problems for the first MB line, too.
-     */
-    if (!FIELD_PICTURE(h) && h->current_slice && !h->sps.new && h->enable_er) {
-        int use_last_pic = h->last_pic_for_ec.f->buf[0] && !sl->ref_count[0];
-
-        ff_h264_set_erpic(&sl->er.cur_pic, h->cur_pic_ptr);
-
-        if (use_last_pic) {
-            ff_h264_set_erpic(&sl->er.last_pic, &h->last_pic_for_ec);
-            sl->ref_list[0][0].parent = &h->last_pic_for_ec;
-            memcpy(sl->ref_list[0][0].data, h->last_pic_for_ec.f->data, sizeof(sl->ref_list[0][0].data));
-            memcpy(sl->ref_list[0][0].linesize, h->last_pic_for_ec.f->linesize, sizeof(sl->ref_list[0][0].linesize));
-            sl->ref_list[0][0].reference = h->last_pic_for_ec.reference;
-        } else if (sl->ref_count[0]) {
-            ff_h264_set_erpic(&sl->er.last_pic, sl->ref_list[0][0].parent);
-        } else
-            ff_h264_set_erpic(&sl->er.last_pic, NULL);
-
-        if (sl->ref_count[1])
-            ff_h264_set_erpic(&sl->er.next_pic, sl->ref_list[1][0].parent);
-
-        sl->er.ref_count = sl->ref_count[0];
-
-        ff_er_frame_end(&sl->er);
-        if (use_last_pic)
-            memset(&sl->ref_list[0][0], 0, sizeof(sl->ref_list[0][0]));
-    }
-#endif /* CONFIG_ERROR_RESILIENCE */
+#endif
 
     if (!in_setup && !h->droppable)
         ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index ae1b60a4..0bca9c1a 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -107,6 +107,26 @@ static const uint8_t default_scaling8[2][64] = {
       24, 25, 27, 28, 30, 32, 33, 35 }
 };
 
+/* maximum number of MBs in the DPB for a given level */
+static const int level_max_dpb_mbs[][2] = {
+    { 10, 396       },
+    { 11, 900       },
+    { 12, 2376      },
+    { 13, 2376      },
+    { 20, 2376      },
+    { 21, 4752      },
+    { 22, 8100      },
+    { 30, 8100      },
+    { 31, 18000     },
+    { 32, 20480     },
+    { 40, 32768     },
+    { 41, 32768     },
+    { 42, 34816     },
+    { 50, 110400    },
+    { 51, 184320    },
+    { 52, 184320    },
+};
+
 static inline int decode_hrd_parameters(H264Context *h, SPS *sps)
 {
     int cpb_count, i;
@@ -307,6 +327,17 @@ int ff_h264_decode_seq_parameter_set(H264Context *h, int ignore_truncation)
     int i, log2_max_frame_num_minus4;
     SPS *sps;
 
+    sps = av_mallocz(sizeof(SPS));
+    if (!sps)
+        return AVERROR(ENOMEM);
+
+    sps->data_size = h->gb.buffer_end - h->gb.buffer;
+    if (sps->data_size > sizeof(sps->data)) {
+        av_log(h->avctx, AV_LOG_WARNING, "Truncating likely oversized SPS\n");
+        sps->data_size = sizeof(sps->data);
+    }
+    memcpy(sps->data, h->gb.buffer, sps->data_size);
+
     profile_idc           = get_bits(&h->gb, 8);
     constraint_set_flags |= get_bits1(&h->gb) << 0;   // constraint_set0_flag
     constraint_set_flags |= get_bits1(&h->gb) << 1;   // constraint_set1_flag
@@ -320,11 +351,8 @@ int ff_h264_decode_seq_parameter_set(H264Context *h, int ignore_truncation)
 
     if (sps_id >= MAX_SPS_COUNT) {
         av_log(h->avctx, AV_LOG_ERROR, "sps_id %u out of range\n", sps_id);
-        return AVERROR_INVALIDDATA;
+        goto fail;
     }
-    sps = av_mallocz(sizeof(SPS));
-    if (!sps)
-        return AVERROR(ENOMEM);
 
     sps->sps_id               = sps_id;
     sps->time_offset_length   = 24;
@@ -464,7 +492,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h, int ignore_truncation)
         int width  = 16 * sps->mb_width;
         int height = 16 * sps->mb_height * (2 - sps->frame_mbs_only_flag);
 
-        if (h->avctx->flags2 & CODEC_FLAG2_IGNORE_CROP) {
+        if (h->avctx->flags2 & AV_CODEC_FLAG2_IGNORE_CROP) {
             av_log(h->avctx, AV_LOG_DEBUG, "discarding sps cropping, original "
                                            "values are l:%d r:%d t:%d b:%d\n",
                    crop_left, crop_right, crop_top, crop_bottom);
@@ -481,7 +509,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h, int ignore_truncation)
             int step_y = (2 - sps->frame_mbs_only_flag) << vsub;
 
             if (crop_left & (0x1F >> (sps->bit_depth_luma > 8)) &&
-                !(h->avctx->flags & CODEC_FLAG_UNALIGNED)) {
+                !(h->avctx->flags & AV_CODEC_FLAG_UNALIGNED)) {
                 crop_left &= ~(0x1F >> (sps->bit_depth_luma > 8));
                 av_log(h->avctx, AV_LOG_WARNING,
                        "Reducing left cropping to %d "
@@ -527,6 +555,19 @@ int ff_h264_decode_seq_parameter_set(H264Context *h, int ignore_truncation)
             goto fail;
     }
 
+    /* if the maximum delay is not stored in the SPS, derive it based on the
+     * level */
+    if (!sps->bitstream_restriction_flag) {
+        sps->num_reorder_frames = MAX_DELAYED_PIC_COUNT - 1;
+        for (i = 0; i < FF_ARRAY_ELEMS(level_max_dpb_mbs); i++) {
+            if (level_max_dpb_mbs[i][0] == sps->level_idc) {
+                sps->num_reorder_frames = FFMIN(level_max_dpb_mbs[i][1] / (sps->mb_width * sps->mb_height),
+                                                sps->num_reorder_frames);
+                break;
+            }
+        }
+    }
+
     if (!sps->sar.den)
         sps->sar.den = 1;
 
@@ -603,6 +644,12 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length)
     pps = av_mallocz(sizeof(PPS));
     if (!pps)
         return AVERROR(ENOMEM);
+    pps->data_size = h->gb.buffer_end - h->gb.buffer;
+    if (pps->data_size > sizeof(pps->data)) {
+        av_log(h->avctx, AV_LOG_WARNING, "Truncating likely oversized PPS\n");
+        pps->data_size = sizeof(pps->data);
+    }
+    memcpy(pps->data, h->gb.buffer, pps->data_size);
     pps->sps_id = get_ue_golomb_31(&h->gb);
     if ((unsigned)pps->sps_id >= MAX_SPS_COUNT ||
         !h->sps_buffers[pps->sps_id]) {
diff --git a/libavcodec/h264_refs.c b/libavcodec/h264_refs.c
index 548a3ba8..d46d940d 100644
--- a/libavcodec/h264_refs.c
+++ b/libavcodec/h264_refs.c
@@ -122,7 +122,15 @@ static int add_sorted(H264Picture **sorted, H264Picture **src, int len, int limi
     return out_i;
 }
 
-int ff_h264_fill_default_ref_list(H264Context *h, H264SliceContext *sl)
+static int mismatches_ref(H264Context *h, H264Picture *pic)
+{
+    AVFrame *f = pic->f;
+    return (h->cur_pic_ptr->f->width  != f->width ||
+            h->cur_pic_ptr->f->height != f->height ||
+            h->cur_pic_ptr->f->format != f->format);
+}
+
+static void h264_initialise_ref_list(H264Context *h, H264SliceContext *sl)
 {
     int i, len;
     int j;
@@ -142,69 +150,66 @@ int ff_h264_fill_default_ref_list(H264Context *h, H264SliceContext *sl)
             len += add_sorted(sorted + len, h->short_ref, h->short_ref_count, cur_poc, 0 ^ list);
             av_assert0(len <= 32);
 
-            len  = build_def_list(h->default_ref_list[list], FF_ARRAY_ELEMS(h->default_ref_list[0]),
+            len  = build_def_list(sl->ref_list[list], FF_ARRAY_ELEMS(sl->ref_list[0]),
                                   sorted, len, 0, h->picture_structure);
-            len += build_def_list(h->default_ref_list[list] + len,
-                                  FF_ARRAY_ELEMS(h->default_ref_list[0]) - len,
+            len += build_def_list(sl->ref_list[list] + len,
+                                  FF_ARRAY_ELEMS(sl->ref_list[0]) - len,
                                   h->long_ref, 16, 1, h->picture_structure);
             av_assert0(len <= 32);
 
             if (len < sl->ref_count[list])
-                memset(&h->default_ref_list[list][len], 0, sizeof(H264Ref) * (sl->ref_count[list] - len));
+                memset(&sl->ref_list[list][len], 0, sizeof(H264Ref) * (sl->ref_count[list] - len));
             lens[list] = len;
         }
 
         if (lens[0] == lens[1] && lens[1] > 1) {
             for (i = 0; i < lens[0] &&
-                        h->default_ref_list[0][i].parent->f->buf[0]->buffer ==
-                        h->default_ref_list[1][i].parent->f->buf[0]->buffer; i++);
+                        sl->ref_list[0][i].parent->f->buf[0]->buffer ==
+                        sl->ref_list[1][i].parent->f->buf[0]->buffer; i++);
             if (i == lens[0]) {
-                FFSWAP(H264Ref, h->default_ref_list[1][0], h->default_ref_list[1][1]);
+                FFSWAP(H264Ref, sl->ref_list[1][0], sl->ref_list[1][1]);
             }
         }
     } else {
-        len  = build_def_list(h->default_ref_list[0], FF_ARRAY_ELEMS(h->default_ref_list[0]),
+        len  = build_def_list(sl->ref_list[0], FF_ARRAY_ELEMS(sl->ref_list[0]),
                               h->short_ref, h->short_ref_count, 0, h->picture_structure);
-        len += build_def_list(h->default_ref_list[0] + len,
-                              FF_ARRAY_ELEMS(h->default_ref_list[0]) - len,
+        len += build_def_list(sl->ref_list[0] + len,
+                              FF_ARRAY_ELEMS(sl->ref_list[0]) - len,
                               h-> long_ref, 16, 1, h->picture_structure);
         av_assert0(len <= 32);
 
         if (len < sl->ref_count[0])
-            memset(&h->default_ref_list[0][len], 0, sizeof(H264Ref) * (sl->ref_count[0] - len));
+            memset(&sl->ref_list[0][len], 0, sizeof(H264Ref) * (sl->ref_count[0] - len));
     }
 #ifdef TRACE
     for (i = 0; i < sl->ref_count[0]; i++) {
         ff_tlog(h->avctx, "List0: %s fn:%d 0x%p\n",
-                h->default_ref_list[0][i].parent ? (h->default_ref_list[0][i].parent->long_ref ? "LT" : "ST") : "NULL",
-                h->default_ref_list[0][i].pic_id,
-                h->default_ref_list[0][i].parent ? h->default_ref_list[0][i].parent->f->data[0] : 0);
+                (sl->ref_list[0][i].parent ? (sl->ref_list[0][i].parent->long_ref ? "LT" : "ST") : "??"),
+                sl->ref_list[0][i].pic_id,
+                sl->ref_list[0][i].data[0]);
     }
     if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
         for (i = 0; i < sl->ref_count[1]; i++) {
             ff_tlog(h->avctx, "List1: %s fn:%d 0x%p\n",
-                    h->default_ref_list[1][i].parent ? (h->default_ref_list[1][i].parent->long_ref ? "LT" : "ST") : "NULL",
-                    h->default_ref_list[1][i].pic_id,
-                    h->default_ref_list[1][i].parent ? h->default_ref_list[1][i].parent->f->data[0] : 0);
+                    (sl->ref_list[1][i].parent ? (sl->ref_list[1][i].parent->long_ref ? "LT" : "ST") : "??"),
+                    sl->ref_list[1][i].pic_id,
+                    sl->ref_list[1][i].data[0]);
         }
     }
 #endif
 
     for (j = 0; j<1+(sl->slice_type_nos == AV_PICTURE_TYPE_B); j++) {
         for (i = 0; i < sl->ref_count[j]; i++) {
-            if (h->default_ref_list[j][i].parent) {
-                AVFrame *f = h->default_ref_list[j][i].parent->f;
-                if (h->cur_pic_ptr->f->width  != f->width ||
-                    h->cur_pic_ptr->f->height != f->height ||
-                    h->cur_pic_ptr->f->format != f->format) {
+            if (sl->ref_list[j][i].parent) {
+                if (mismatches_ref(h, sl->ref_list[j][i].parent)) {
                     av_log(h->avctx, AV_LOG_ERROR, "Discarding mismatching reference\n");
-                    memset(&h->default_ref_list[j][i], 0, sizeof(h->default_ref_list[j][i]));
+                    memset(&sl->ref_list[j][i], 0, sizeof(sl->ref_list[j][i]));
                 }
             }
         }
     }
-
-    return 0;
+    for (i = 0; i < sl->list_count; i++)
+        h->default_ref[i] = sl->ref_list[i][0];
 }
 
 static void print_short_term(H264Context *h);
@@ -240,9 +245,9 @@ int ff_h264_decode_ref_pic_list_reordering(H264Context *h, H264SliceContext *sl)
     print_short_term(h);
     print_long_term(h);
 
-    for (list = 0; list < sl->list_count; list++) {
-        memcpy(sl->ref_list[list], h->default_ref_list[list], sl->ref_count[list] * sizeof(sl->ref_list[0][0]));
+    h264_initialise_ref_list(h, sl);
 
+    for (list = 0; list < sl->list_count; list++) {
         if (get_bits1(&sl->gb)) {    // ref_pic_list_modification_flag_l[01]
             int pred = h->curr_pic_num;
 
@@ -263,7 +268,7 @@ int ff_h264_decode_ref_pic_list_reordering(H264Context *h, H264SliceContext *sl)
                 switch (modification_of_pic_nums_idc) {
                 case 0:
                 case 1: {
-                    const unsigned int abs_diff_pic_num = get_ue_golomb(&sl->gb) + 1;
+                    const unsigned int abs_diff_pic_num = get_ue_golomb_long(&sl->gb) + 1;
                     int frame_num;
 
                     if (abs_diff_pic_num > h->max_pic_num) {
@@ -298,14 +303,14 @@ int ff_h264_decode_ref_pic_list_reordering(H264Context *h, H264SliceContext *sl)
 
                     long_idx = pic_num_extract(h, pic_id, &pic_structure);
 
-                    if (long_idx > 31) {
+                    if (long_idx > 31U) {
                         av_log(h->avctx, AV_LOG_ERROR,
                                "long_term_pic_idx overflow\n");
                         return AVERROR_INVALIDDATA;
                     }
                     ref = h->long_ref[long_idx];
                     assert(!(ref && !ref->reference));
-                    if (ref && (ref->reference & pic_structure)) {
+                    if (ref && (ref->reference & pic_structure) && !mismatches_ref(h, ref)) {
                         ref->pic_id = pic_id;
                         assert(ref->long_ref);
                         i = 0;
@@ -348,12 +353,12 @@ int ff_h264_decode_ref_pic_list_reordering(H264Context *h, H264SliceContext *sl)
             if (   !sl->ref_list[list][index].parent
                 || (!FIELD_PICTURE(h) && (sl->ref_list[list][index].reference&3) != 3)) {
                 int i;
-                av_log(h->avctx, AV_LOG_ERROR, "Missing reference picture, default is %d\n", h->default_ref_list[list][0].poc);
+                av_log(h->avctx, AV_LOG_ERROR, "Missing reference picture, default is %d\n", h->default_ref[list].poc);
                 for (i = 0; i < FF_ARRAY_ELEMS(h->last_pocs); i++)
                     h->last_pocs[i] = INT_MIN;
-                if (h->default_ref_list[list][0].parent
-                    && !(!FIELD_PICTURE(h) && (h->default_ref_list[list][0].reference&3) != 3))
-                    sl->ref_list[list][index] = h->default_ref_list[list][0];
+                if (h->default_ref[list].parent
+                    && !(!FIELD_PICTURE(h) && (h->default_ref[list].reference&3) != 3))
+                    sl->ref_list[list][index] = h->default_ref[list];
                 else
                     return -1;
             }
@@ -515,7 +520,8 @@ void ff_h264_remove_all_refs(H264Context *h)
 
     if (h->short_ref_count && !h->last_pic_for_ec.f->data[0]) {
         ff_h264_unref_picture(h, &h->last_pic_for_ec);
-        ff_h264_ref_picture(h, &h->last_pic_for_ec, h->short_ref[0]);
+        if (h->short_ref[0]->f->buf[0])
+            ff_h264_ref_picture(h, &h->last_pic_for_ec, h->short_ref[0]);
     }
 
     for (i = 0; i < h->short_ref_count; i++) {
@@ -524,7 +530,7 @@ void ff_h264_remove_all_refs(H264Context *h)
     }
     h->short_ref_count = 0;
 
-    memset(h->default_ref_list, 0, sizeof(h->default_ref_list));
+    memset(h->default_ref, 0, sizeof(h->default_ref));
     for (i = 0; i < h->nb_slice_ctx; i++) {
         H264SliceContext *sl = &h->slice_ctx[i];
         sl->list_count = sl->ref_count[0] = sl->ref_count[1] = 0;
@@ -617,6 +623,7 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count)
 {
     int i, av_uninit(j);
     int pps_count;
+    int pps_ref_count[2] = {0};
     int current_ref_assigned = 0, err = 0;
     H264Picture *av_uninit(pic);
 
@@ -683,15 +690,18 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count)
                 remove_short_at_index(h, 0);
             }
 
-            if (h->long_ref[mmco[i].long_arg] != h->cur_pic_ptr) {
-                if (h->cur_pic_ptr->long_ref) {
-                    for(j=0; j<16; j++) {
-                        if(h->long_ref[j] == h->cur_pic_ptr) {
-                            remove_long(h, j, 0);
+            /* make sure the current picture is not already assigned as a long ref */
+            if (h->cur_pic_ptr->long_ref) {
+                for (j = 0; j < FF_ARRAY_ELEMS(h->long_ref); j++) {
+                    if (h->long_ref[j] == h->cur_pic_ptr) {
+                        if (j != mmco[i].long_arg)
                             av_log(h->avctx, AV_LOG_ERROR, "mmco: cannot assign current picture to 2 long term references\n");
-                        }
+                        remove_long(h, j, 0);
                     }
                 }
+            }
+
+            if (h->long_ref[mmco[i].long_arg] != h->cur_pic_ptr) {
                 av_assert0(!h->cur_pic_ptr->long_ref);
                 remove_long(h, mmco[i].long_arg, 0);
 
@@ -798,13 +808,17 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count)
     print_long_term(h);
 
     pps_count = 0;
-    for (i = 0; i < FF_ARRAY_ELEMS(h->pps_buffers); i++)
+    for (i = 0; i < FF_ARRAY_ELEMS(h->pps_buffers); i++) {
         pps_count += !!h->pps_buffers[i];
+        pps_ref_count[0] = FFMAX(pps_ref_count[0], h->pps.ref_count[0]);
+        pps_ref_count[1] = FFMAX(pps_ref_count[1], h->pps.ref_count[1]);
+    }
 
     if (   err >= 0
         && h->long_ref_count==0
-        && (h->short_ref_count<=2 || h->pps.ref_count[0] <= 1 && h->pps.ref_count[1] <= 1 && pps_count == 1)
-        && h->pps.ref_count[0]<=2 + (h->picture_structure != PICT_FRAME) + (2*!h->has_recovery_point)
+        && (   h->short_ref_count<=2
+            || pps_ref_count[0] <= 1 + (h->picture_structure != PICT_FRAME) && pps_ref_count[1] <= 1)
+        && pps_ref_count[0]<=2 + (h->picture_structure != PICT_FRAME) + (2*!h->has_recovery_point)
         && h->cur_pic_ptr->f->pict_type == AV_PICTURE_TYPE_I){
         h->cur_pic_ptr->recovered |= 1;
         if(!h->avctx->has_b_frames)
@@ -836,7 +850,7 @@ int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb,
                 mmco[i].opcode = opcode;
                 if (opcode == MMCO_SHORT2UNUSED || opcode == MMCO_SHORT2LONG) {
                     mmco[i].short_pic_num =
-                        (h->curr_pic_num - get_ue_golomb(gb) - 1) &
+                        (h->curr_pic_num - get_ue_golomb_long(gb) - 1) &
                             (h->max_pic_num - 1);
 #if 0
                     if (mmco[i].short_pic_num >= h->short_ref_count ||
diff --git a/libavcodec/h264_sei.c b/libavcodec/h264_sei.c
index 8e1697a3..77dd7b21 100644
--- a/libavcodec/h264_sei.c
+++ b/libavcodec/h264_sei.c
@@ -42,6 +42,10 @@ void ff_h264_reset_sei(H264Context *h)
     h->sei_buffering_period_present =  0;
     h->sei_frame_packing_present    =  0;
     h->sei_display_orientation_present = 0;
+    h->sei_reguserdata_afd_present  =  0;
+
+    h->a53_caption_size = 0;
+    av_freep(&h->a53_caption);
 }
 
 static int decode_picture_timing(H264Context *h)
@@ -108,35 +112,108 @@ static int decode_picture_timing(H264Context *h)
     return 0;
 }
 
-static int decode_user_data_itu_t_t35(H264Context *h, int size)
+static int decode_registered_user_data_afd(H264Context *h, int size)
+{
+    int flag;
+
+    if (size-- < 1)
+        return AVERROR_INVALIDDATA;
+    skip_bits(&h->gb, 1);               // 0
+    flag = get_bits(&h->gb, 1);         // active_format_flag
+    skip_bits(&h->gb, 6);               // reserved
+
+    if (flag) {
+        if (size-- < 1)
+            return AVERROR_INVALIDDATA;
+        skip_bits(&h->gb, 4);           // reserved
+        h->active_format_description   = get_bits(&h->gb, 4);
+        h->sei_reguserdata_afd_present = 1;
+#if FF_API_AFD
+FF_DISABLE_DEPRECATION_WARNINGS
+        h->avctx->dtg_active_format = h->active_format_description;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif /* FF_API_AFD */
+    }
+
+    return 0;
+}
+
+static int decode_registered_user_data_closed_caption(H264Context *h, int size)
 {
+    int flag;
+    int user_data_type_code;
+    int cc_count;
+
+    if (size < 3)
+        return AVERROR(EINVAL);
+
+    user_data_type_code = get_bits(&h->gb, 8);
+    if (user_data_type_code == 0x3) {
+        skip_bits(&h->gb, 1);           // reserved
+
+        flag = get_bits(&h->gb, 1);     // process_cc_data_flag
+        if (flag) {
+            skip_bits(&h->gb, 1);       // zero bit
+            cc_count = get_bits(&h->gb, 5);
+            skip_bits(&h->gb, 8);       // reserved
+            size -= 2;
+
+            if (cc_count && size >= cc_count * 3) {
+                const uint64_t new_size = (h->a53_caption_size + cc_count
+                                           * UINT64_C(3));
+                int i, ret;
+
+                if (new_size > INT_MAX)
+                    return AVERROR(EINVAL);
+
+                /* Allow merging of the cc data from two fields. */
+                ret = av_reallocp(&h->a53_caption, new_size);
+                if (ret < 0)
+                    return ret;
+
+                for (i = 0; i < cc_count; i++) {
+                    h->a53_caption[h->a53_caption_size++] = get_bits(&h->gb, 8);
+                    h->a53_caption[h->a53_caption_size++] = get_bits(&h->gb, 8);
+                    h->a53_caption[h->a53_caption_size++] = get_bits(&h->gb, 8);
+                }
+
+                skip_bits(&h->gb, 8);   // marker_bits
+            }
+        }
+    } else {
+        int i;
+        for (i = 0; i < size - 1; i++)
+            skip_bits(&h->gb, 8);
+    }
+
+    return 0;
+}
+
+static int decode_registered_user_data(H264Context *h, int size)
+{
+    uint32_t country_code;
     uint32_t user_identifier;
-    int dtg_active_format;
 
     if (size < 7)
-        return -1;
+        return AVERROR_INVALIDDATA;
     size -= 7;
 
-    skip_bits(&h->gb, 8);   // country_code
-    skip_bits(&h->gb, 16);  // provider_code
+    country_code = get_bits(&h->gb, 8); // itu_t_t35_country_code
+    if (country_code == 0xFF) {
+        skip_bits(&h->gb, 8);           // itu_t_t35_country_code_extension_byte
+        size--;
+    }
+
+    /* itu_t_t35_payload_byte follows */
+    skip_bits(&h->gb, 8);              // terminal provider code
+    skip_bits(&h->gb, 8);              // terminal provider oriented code
     user_identifier = get_bits_long(&h->gb, 32);
 
     switch (user_identifier) {
-        case 0x44544731:    // "DTG1" - AFD_data
-            if (size < 1)
-                return -1;
-            skip_bits(&h->gb, 1);
-            if (get_bits(&h->gb, 1)) {
-                skip_bits(&h->gb, 6);
-                if (size < 2)
-                    return -1;
-                skip_bits(&h->gb, 4);
-                dtg_active_format = get_bits(&h->gb, 4);
-                h->avctx->dtg_active_format = dtg_active_format;
-            } else {
-                skip_bits(&h->gb, 6);
-            }
-            break;
+        case MKBETAG('D', 'T', 'G', '1'):       // afd_data
+            return decode_registered_user_data_afd(h, size);
+        case MKBETAG('G', 'A', '9', '4'):       // closed captions
+            return decode_registered_user_data_closed_caption(h, size);
         default:
             skip_bits(&h->gb, size * 8);
             break;
@@ -147,13 +224,17 @@ static int decode_user_data_itu_t_t35(H264Context *h, int size)
 
 static int decode_unregistered_user_data(H264Context *h, int size)
 {
-    uint8_t user_data[16 + 256];
+    uint8_t *user_data;
     int e, build, i;
 
-    if (size < 16)
+    if (size < 16 || size >= INT_MAX - 16)
         return AVERROR_INVALIDDATA;
 
-    for (i = 0; i < sizeof(user_data) - 1 && i < size; i++)
+    user_data = av_malloc(16 + size + 1);
+    if (!user_data)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < size + 16; i++)
         user_data[i] = get_bits(&h->gb, 8);
 
     user_data[i] = 0;
@@ -163,18 +244,16 @@ static int decode_unregistered_user_data(H264Context *h, int size)
     if (e == 1 && build == 1 && !strncmp(user_data+16, "x264 - core 0000", 16))
         h->x264_build = 67;
 
-    if (h->avctx->debug & FF_DEBUG_BUGS)
+    if (strlen(user_data + 16) > 0)
         av_log(h->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data + 16);
 
-    for (; i < size; i++)
-        skip_bits(&h->gb, 8);
-
+    av_free(user_data);
     return 0;
 }
 
 static int decode_recovery_point(H264Context *h)
 {
-    h->sei_recovery_frame_cnt = get_ue_golomb(&h->gb);
+    h->sei_recovery_frame_cnt = get_ue_golomb_long(&h->gb);
 
     /* 1b exact_match_flag,
      * 1b broken_link_flag,
@@ -227,7 +306,7 @@ static int decode_buffering_period(H264Context *h)
 
 static int decode_frame_packing_arrangement(H264Context *h)
 {
-    h->sei_fpa.frame_packing_arrangement_id          = get_ue_golomb(&h->gb);
+    h->sei_fpa.frame_packing_arrangement_id          = get_ue_golomb_long(&h->gb);
     h->sei_fpa.frame_packing_arrangement_cancel_flag = get_bits1(&h->gb);
     h->sei_frame_packing_present = !h->sei_fpa.frame_packing_arrangement_cancel_flag;
 
@@ -247,7 +326,7 @@ static int decode_frame_packing_arrangement(H264Context *h)
         if (!h->quincunx_subsampling && h->frame_packing_arrangement_type != 5)
             skip_bits(&h->gb, 16);      // frame[01]_grid_position_[xy]
         skip_bits(&h->gb, 8);           // frame_packing_arrangement_reserved_byte
-        h->sei_fpa.frame_packing_arrangement_repetition_period = get_ue_golomb(&h->gb) /* frame_packing_arrangement_repetition_period */;
+        h->sei_fpa.frame_packing_arrangement_repetition_period = get_ue_golomb_long(&h->gb);
     }
     skip_bits1(&h->gb);                 // frame_packing_arrangement_extension_flag
 
@@ -272,8 +351,68 @@ static int decode_display_orientation(H264Context *h)
         h->sei_vflip = get_bits1(&h->gb);     // ver_flip
 
         h->sei_anticlockwise_rotation = get_bits(&h->gb, 16);
-        get_ue_golomb(&h->gb);  // display_orientation_repetition_period
-        skip_bits1(&h->gb);     // display_orientation_extension_flag
+        get_ue_golomb_long(&h->gb);  // display_orientation_repetition_period
+        skip_bits1(&h->gb);          // display_orientation_extension_flag
+    }
+
+    return 0;
+}
+
+static int decode_GreenMetadata(H264Context *h)
+{
+    if (h->avctx->debug & FF_DEBUG_GREEN_MD)
+        av_log(h->avctx, AV_LOG_DEBUG,          "Green Metadata Info SEI message\n");
+
+    h->sei_green_metadata.green_metadata_type=get_bits(&h->gb, 8);
+
+    if (h->avctx->debug & FF_DEBUG_GREEN_MD)
+        av_log(h->avctx, AV_LOG_DEBUG,          "green_metadata_type                            = %d\n",
+               h->sei_green_metadata.green_metadata_type);
+
+    if (h->sei_green_metadata.green_metadata_type==0){
+        h->sei_green_metadata.period_type=get_bits(&h->gb, 8);
+
+        if (h->avctx->debug & FF_DEBUG_GREEN_MD)
+            av_log(h->avctx, AV_LOG_DEBUG,      "green_metadata_period_type                     = %d\n",
+                   h->sei_green_metadata.period_type);
+
+        if (h->sei_green_metadata.green_metadata_type==2){
+            h->sei_green_metadata.num_seconds = get_bits(&h->gb, 16);
+            if (h->avctx->debug & FF_DEBUG_GREEN_MD)
+                av_log(h->avctx, AV_LOG_DEBUG,  "green_metadata_num_seconds                     = %d\n",
+                       h->sei_green_metadata.num_seconds);
+        }
+        else if (h->sei_green_metadata.period_type==3){
+            h->sei_green_metadata.num_pictures = get_bits(&h->gb, 16);
+            if (h->avctx->debug & FF_DEBUG_GREEN_MD)
+                av_log(h->avctx, AV_LOG_DEBUG,  "green_metadata_num_pictures                    = %d\n",
+                       h->sei_green_metadata.num_pictures);
+        }
+
+        h->sei_green_metadata.percent_non_zero_macroblocks=get_bits(&h->gb, 8);
+        h->sei_green_metadata.percent_intra_coded_macroblocks=get_bits(&h->gb, 8);
+        h->sei_green_metadata.percent_six_tap_filtering=get_bits(&h->gb, 8);
+        h->sei_green_metadata.percent_alpha_point_deblocking_instance=get_bits(&h->gb, 8);
+
+        if (h->avctx->debug & FF_DEBUG_GREEN_MD)
+            av_log(h->avctx, AV_LOG_DEBUG,      "SEI GREEN Complexity Metrics                   = %f %f %f %f\n",
+                                           (float)h->sei_green_metadata.percent_non_zero_macroblocks/255,
+                                           (float)h->sei_green_metadata.percent_intra_coded_macroblocks/255,
+                                           (float)h->sei_green_metadata.percent_six_tap_filtering/255,
+                                           (float)h->sei_green_metadata.percent_alpha_point_deblocking_instance/255);
+
+    }else if( h->sei_green_metadata.green_metadata_type==1){
+        h->sei_green_metadata.xsd_metric_type=get_bits(&h->gb, 8);
+        h->sei_green_metadata.xsd_metric_value=get_bits(&h->gb, 16);
+
+        if (h->avctx->debug & FF_DEBUG_GREEN_MD)
+            av_log(h->avctx, AV_LOG_DEBUG,      "xsd_metric_type                                = %d\n",
+                   h->sei_green_metadata.xsd_metric_type);
+        if ( h->sei_green_metadata.xsd_metric_type==0){
+            if (h->avctx->debug & FF_DEBUG_GREEN_MD)
+                av_log(h->avctx, AV_LOG_DEBUG,  "xsd_metric_value                               = %f\n",
+                       (float)h->sei_green_metadata.xsd_metric_value/100);
+        }
     }
 
     return 0;
@@ -312,41 +451,34 @@ int ff_h264_decode_sei(H264Context *h)
         switch (type) {
         case SEI_TYPE_PIC_TIMING: // Picture timing SEI
             ret = decode_picture_timing(h);
-            if (ret < 0)
-                return ret;
             break;
-        case SEI_TYPE_USER_DATA_ITU_T_T35:
-            if (decode_user_data_itu_t_t35(h, size) < 0)
-                return -1;
+        case SEI_TYPE_USER_DATA_REGISTERED:
+            ret = decode_registered_user_data(h, size);
             break;
         case SEI_TYPE_USER_DATA_UNREGISTERED:
             ret = decode_unregistered_user_data(h, size);
-            if (ret < 0)
-                return ret;
             break;
         case SEI_TYPE_RECOVERY_POINT:
             ret = decode_recovery_point(h);
-            if (ret < 0)
-                return ret;
             break;
         case SEI_TYPE_BUFFERING_PERIOD:
             ret = decode_buffering_period(h);
-            if (ret < 0)
-                return ret;
             break;
         case SEI_TYPE_FRAME_PACKING:
             ret = decode_frame_packing_arrangement(h);
-            if (ret < 0)
-                return ret;
             break;
         case SEI_TYPE_DISPLAY_ORIENTATION:
             ret = decode_display_orientation(h);
-            if (ret < 0)
-                return ret;
+            break;
+        case SEI_TYPE_GREEN_METADATA:
+            ret = decode_GreenMetadata(h);
             break;
         default:
             av_log(h->avctx, AV_LOG_DEBUG, "unknown SEI type %d\n", type);
         }
+        if (ret < 0)
+            return ret;
+
         skip_bits_long(&h->gb, next - get_bits_count(&h->gb));
 
         // FIXME check bits here
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 968e3ecb..0b3e0406 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -171,9 +171,9 @@ static int alloc_scratch_buffers(H264SliceContext *sl, int linesize)
     // (= 21x21 for  h264)
     av_fast_malloc(&sl->edge_emu_buffer, &sl->edge_emu_buffer_allocated, alloc_size * 2 * 21);
 
-    av_fast_malloc(&sl->top_borders[0], &sl->top_borders_allocated[0],
+    av_fast_mallocz(&sl->top_borders[0], &sl->top_borders_allocated[0],
                    h->mb_width * 16 * 3 * sizeof(uint8_t) * 2);
-    av_fast_malloc(&sl->top_borders[1], &sl->top_borders_allocated[1],
+    av_fast_mallocz(&sl->top_borders[1], &sl->top_borders_allocated[1],
                    h->mb_width * 16 * 3 * sizeof(uint8_t) * 2);
 
     if (!sl->bipred_scratchpad || !sl->edge_emu_buffer ||
@@ -246,16 +246,16 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
             pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data;
         }
     }
-    if (CONFIG_GRAY && !h->avctx->hwaccel && h->flags & CODEC_FLAG_GRAY && pic->f->data[2]) {
+    if (CONFIG_GRAY && !h->avctx->hwaccel && h->flags & AV_CODEC_FLAG_GRAY && pic->f->data[2]) {
         int h_chroma_shift, v_chroma_shift;
         av_pix_fmt_get_chroma_sub_sample(pic->f->format,
                                          &h_chroma_shift, &v_chroma_shift);
 
-        for(i=0; i<FF_CEIL_RSHIFT(pic->f->height, v_chroma_shift); i++) {
+        for(i=0; i<AV_CEIL_RSHIFT(pic->f->height, v_chroma_shift); i++) {
             memset(pic->f->data[1] + pic->f->linesize[1]*i,
-                   0x80, FF_CEIL_RSHIFT(pic->f->width, h_chroma_shift));
+                   0x80, AV_CEIL_RSHIFT(pic->f->width, h_chroma_shift));
             memset(pic->f->data[2] + pic->f->linesize[2]*i,
-                   0x80, FF_CEIL_RSHIFT(pic->f->width, h_chroma_shift));
+                   0x80, AV_CEIL_RSHIFT(pic->f->width, h_chroma_shift));
         }
     }
 
@@ -383,7 +383,7 @@ void ff_h264_init_dequant_tables(H264Context *h)
     }
 }
 
-#define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size))))
+#define IN_RANGE(a, b, size) (((void*)(a) >= (void*)(b)) && ((void*)(a) < (void*)((b) + (size))))
 
 #define REBASE_PICTURE(pic, new_ctx, old_ctx)             \
     (((pic) && (pic) >= (old_ctx)->DPB &&                       \
@@ -397,10 +397,9 @@ static void copy_picture_range(H264Picture **to, H264Picture **from, int count,
     int i;
 
     for (i = 0; i < count; i++) {
-        av_assert1((IN_RANGE(from[i], old_base, 1) ||
-                IN_RANGE(from[i], old_base->DPB,
-                         H264_MAX_PICTURE_COUNT) ||
-                !from[i]));
+        av_assert1(!from[i] ||
+                   IN_RANGE(from[i], old_base, 1) ||
+                   IN_RANGE(from[i], old_base->DPB, H264_MAX_PICTURE_COUNT));
         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
     }
 }
@@ -497,6 +496,9 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
     h->picture_structure    = h1->picture_structure;
     h->droppable            = h1->droppable;
     h->low_delay            = h1->low_delay;
+    h->backup_width         = h1->backup_width;
+    h->backup_height        = h1->backup_height;
+    h->backup_pix_fmt       = h1->backup_pix_fmt;
 
     for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
         ff_h264_unref_picture(h, &h->DPB[i]);
@@ -538,10 +540,7 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
     h->dequant_coeff_pps = h1->dequant_coeff_pps;
 
     // POC timing
-    copy_fields(h, h1, poc_lsb, default_ref_list);
-
-    // reference lists
-    copy_fields(h, h1, short_ref, current_slice);
+    copy_fields(h, h1, poc_lsb, current_slice);
 
     copy_picture_range(h->short_ref, h1->short_ref, 32, h, h1);
     copy_picture_range(h->long_ref, h1->long_ref, 32, h, h1);
@@ -596,6 +595,7 @@ static int h264_frame_start(H264Context *h)
     pic->reference              = h->droppable ? 0 : h->picture_structure;
     pic->f->coded_picture_number = h->coded_picture_number++;
     pic->field_picture          = h->picture_structure != PICT_FRAME;
+    pic->frame_num               = h->frame_num;
 
     /*
      * Zero key_frame here; IDR markings per slice in frame or fields are ORed
@@ -610,9 +610,12 @@ static int h264_frame_start(H264Context *h)
 
     if ((ret = alloc_picture(h, pic)) < 0)
         return ret;
-    if(!h->frame_recovered && !h->avctx->hwaccel &&
-       !(h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU))
-        avpriv_color_frame(pic->f, c);
+    if(!h->frame_recovered && !h->avctx->hwaccel
+#if FF_API_CAP_VDPAU
+       && !(h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU)
+#endif
+       )
+        ff_color_frame(pic->f, c);
 
     h->cur_pic_ptr = pic;
     ff_h264_unref_picture(h, &h->cur_pic);
@@ -683,7 +686,7 @@ static av_always_inline void backup_mb_border(const H264Context *h, H264SliceCon
                 AV_COPY128(top_border, src_y + 15 * linesize);
                 if (pixel_shift)
                     AV_COPY128(top_border + 16, src_y + 15 * linesize + 16);
-                if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
+                if (simple || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
                     if (chroma444) {
                         if (pixel_shift) {
                             AV_COPY128(top_border + 32, src_cb + 15 * uvlinesize);
@@ -726,7 +729,7 @@ static av_always_inline void backup_mb_border(const H264Context *h, H264SliceCon
     if (pixel_shift)
         AV_COPY128(top_border + 16, src_y + 16 * linesize + 16);
 
-    if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
+    if (simple || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) {
         if (chroma444) {
             if (pixel_shift) {
                 AV_COPY128(top_border + 32, src_cb + 16 * linesize);
@@ -778,7 +781,7 @@ static void implicit_weight_table(const H264Context *h, H264SliceContext *sl, in
             cur_poc = h->cur_pic_ptr->field_poc[h->picture_structure - 1];
         }
         if (sl->ref_count[0] == 1 && sl->ref_count[1] == 1 && !FRAME_MBAFF(h) &&
-            sl->ref_list[0][0].poc + sl->ref_list[1][0].poc == 2 * cur_poc) {
+            sl->ref_list[0][0].poc + (int64_t)sl->ref_list[1][0].poc == 2 * cur_poc) {
             sl->use_weight        = 0;
             sl->use_weight_chroma = 0;
             return;
@@ -799,7 +802,7 @@ static void implicit_weight_table(const H264Context *h, H264SliceContext *sl, in
     sl->chroma_log2_weight_denom = 5;
 
     for (ref0 = ref_start; ref0 < ref_count0; ref0++) {
-        int poc0 = sl->ref_list[0][ref0].poc;
+        int64_t poc0 = sl->ref_list[0][ref0].poc;
         for (ref1 = ref_start; ref1 < ref_count1; ref1++) {
             int w = 32;
             if (!sl->ref_list[0][ref0].parent->long_ref && !sl->ref_list[1][ref1].parent->long_ref) {
@@ -866,6 +869,7 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
                      CONFIG_H264_D3D11VA_HWACCEL + \
                      CONFIG_H264_VAAPI_HWACCEL + \
                      (CONFIG_H264_VDA_HWACCEL * 2) + \
+                     CONFIG_H264_VIDEOTOOLBOX_HWACCEL + \
                      CONFIG_H264_VDPAU_HWACCEL)
     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
     const enum AVPixelFormat *choices = pix_fmts;
@@ -940,11 +944,14 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
             *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 #endif
 #if CONFIG_H264_VAAPI_HWACCEL
-            *fmt++ = AV_PIX_FMT_VAAPI_VLD;
+            *fmt++ = AV_PIX_FMT_VAAPI;
 #endif
 #if CONFIG_H264_VDA_HWACCEL
             *fmt++ = AV_PIX_FMT_VDA_VLD;
             *fmt++ = AV_PIX_FMT_VDA;
+#endif
+#if CONFIG_H264_VIDEOTOOLBOX_HWACCEL
+            *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 #endif
             if (h->avctx->codec->pix_fmts)
                 choices = h->avctx->codec->pix_fmts;
@@ -973,15 +980,15 @@ static int init_dimensions(H264Context *h)
 {
     int width  = h->width  - (h->sps.crop_right + h->sps.crop_left);
     int height = h->height - (h->sps.crop_top   + h->sps.crop_bottom);
-    int crop_present = h->sps.crop_left  || h->sps.crop_top ||
-                       h->sps.crop_right || h->sps.crop_bottom;
     av_assert0(h->sps.crop_right + h->sps.crop_left < (unsigned)h->width);
     av_assert0(h->sps.crop_top + h->sps.crop_bottom < (unsigned)h->height);
 
     /* handle container cropping */
-    if (!crop_present &&
-        FFALIGN(h->avctx->width,  16) == h->width &&
-        FFALIGN(h->avctx->height, 16) == h->height) {
+    if (FFALIGN(h->avctx->width,  16) == FFALIGN(width,  16) &&
+        FFALIGN(h->avctx->height, 16) == FFALIGN(height, 16) &&
+        h->avctx->width  <= width &&
+        h->avctx->height <= height
+    ) {
         width  = h->avctx->width;
         height = h->avctx->height;
     }
@@ -1042,14 +1049,16 @@ static int h264_slice_header_init(H264Context *h)
         goto fail;
     }
 
+#if FF_API_CAP_VDPAU
     if (h->avctx->codec &&
-        h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU &&
+        h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU &&
         (h->sps.bit_depth_luma != 8 || h->sps.chroma_format_idc > 1)) {
         av_log(h->avctx, AV_LOG_ERROR,
                 "VDPAU decoding does not support video colorspace.\n");
         ret = AVERROR_INVALIDDATA;
         goto fail;
     }
+#endif
 
     if (h->sps.bit_depth_luma < 8 || h->sps.bit_depth_luma > 14 ||
         h->sps.bit_depth_luma == 11 || h->sps.bit_depth_luma == 13
@@ -1086,6 +1095,7 @@ static int h264_slice_header_init(H264Context *h)
         nb_slices = max_slices;
     }
     h->slice_context_count = nb_slices;
+    h->max_contexts = FFMIN(h->max_contexts, nb_slices);
 
     if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_SLICE)) {
         ret = ff_h264_slice_context_init(h, &h->slice_ctx[0]);
@@ -1148,10 +1158,13 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
     int needs_reinit = 0;
     int field_pic_flag, bottom_field_flag;
     int first_slice = sl == h->slice_ctx && !h->current_slice;
-    int frame_num, picture_structure, droppable;
+    int frame_num, droppable, picture_structure;
     int mb_aff_frame, last_mb_aff_frame;
     PPS *pps;
 
+    if (first_slice)
+        av_assert0(!h->setup_finished);
+
     h->qpel_put = h->h264qpel.put_h264_qpel_pixels_tab;
     h->qpel_avg = h->h264qpel.avg_h264_qpel_pixels_tab;
 
@@ -1159,16 +1172,33 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
 
     if (first_mb_in_slice == 0) { // FIXME better field boundary detection
         if (h->current_slice) {
+            if (h->setup_finished) {
+                av_log(h->avctx, AV_LOG_ERROR, "Too many fields\n");
+                return AVERROR_INVALIDDATA;
+            }
+            if (h->max_contexts > 1) {
+                if (!h->single_decode_warning) {
+                    av_log(h->avctx, AV_LOG_WARNING, "Cannot decode multiple access units as slice threads\n");
+                    h->single_decode_warning = 1;
+                }
+                h->max_contexts = 1;
+                return SLICE_SINGLETHREAD;
+            }
+
             if (h->cur_pic_ptr && FIELD_PICTURE(h) && h->first_field) {
-                ff_h264_field_end(h, h->slice_ctx, 1);
+                ret = ff_h264_field_end(h, h->slice_ctx, 1);
                 h->current_slice = 0;
+                if (ret < 0)
+                    return ret;
             } else if (h->cur_pic_ptr && !FIELD_PICTURE(h) && !h->first_field && h->nal_unit_type  == NAL_IDR_SLICE) {
                 av_log(h, AV_LOG_WARNING, "Broken frame packetizing\n");
-                ff_h264_field_end(h, h->slice_ctx, 1);
+                ret = ff_h264_field_end(h, h->slice_ctx, 1);
                 h->current_slice = 0;
                 ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 0);
                 ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 1);
                 h->cur_pic_ptr = NULL;
+                if (ret < 0)
+                    return ret;
             } else
                 return AVERROR_INVALIDDATA;
         }
@@ -1182,6 +1212,9 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
         }
     }
 
+    if (!h->current_slice)
+        av_assert0(sl == h->slice_ctx);
+
     slice_type = get_ue_golomb_31(&sl->gb);
     if (slice_type > 9) {
         av_log(h->avctx, AV_LOG_ERROR,
@@ -1196,7 +1229,6 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
         sl->slice_type_fixed = 0;
 
     slice_type = golomb_to_pict_type[slice_type];
-
     sl->slice_type     = slice_type;
     sl->slice_type_nos = slice_type & 3;
 
@@ -1206,17 +1238,20 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
         return AVERROR_INVALIDDATA;
     }
 
-    if (
-        (h->avctx->skip_frame >= AVDISCARD_NONREF && !h->nal_ref_idc) ||
-        (h->avctx->skip_frame >= AVDISCARD_BIDIR  && sl->slice_type_nos == AV_PICTURE_TYPE_B) ||
-        (h->avctx->skip_frame >= AVDISCARD_NONINTRA && sl->slice_type_nos != AV_PICTURE_TYPE_I) ||
-        (h->avctx->skip_frame >= AVDISCARD_NONKEY && h->nal_unit_type != NAL_IDR_SLICE) ||
-         h->avctx->skip_frame >= AVDISCARD_ALL) {
-         return SLICE_SKIPED;
-     }
+    if (h->current_slice == 0 && !h->first_field) {
+        if (
+            (h->avctx->skip_frame >= AVDISCARD_NONREF && !h->nal_ref_idc) ||
+            (h->avctx->skip_frame >= AVDISCARD_BIDIR  && sl->slice_type_nos == AV_PICTURE_TYPE_B) ||
+            (h->avctx->skip_frame >= AVDISCARD_NONINTRA && sl->slice_type_nos != AV_PICTURE_TYPE_I) ||
+            (h->avctx->skip_frame >= AVDISCARD_NONKEY && h->nal_unit_type != NAL_IDR_SLICE && h->sei_recovery_frame_cnt < 0) ||
+            h->avctx->skip_frame >= AVDISCARD_ALL) {
+            return SLICE_SKIPED;
+        }
+    }
 
     // to make a few old functions happy, it's wrong though
-    h->pict_type = sl->slice_type;
+    if (!h->setup_finished)
+        h->pict_type = sl->slice_type;
 
     pps_id = get_ue_golomb(&sl->gb);
     if (pps_id >= MAX_PPS_COUNT) {
@@ -1244,8 +1279,17 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
                h->pps.sps_id);
         return AVERROR_INVALIDDATA;
     }
-    if (first_slice)
+
+    if (first_slice) {
         h->pps = *h->pps_buffers[pps_id];
+    } else {
+        if (h->pps.sps_id != pps->sps_id ||
+            h->pps.transform_8x8_mode != pps->transform_8x8_mode ||
+            (h->setup_finished && h->dequant_coeff_pps != pps_id)) {
+            av_log(h->avctx, AV_LOG_ERROR, "PPS changed between slices\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
 
     if (pps->sps_id != h->sps.sps_id ||
         pps->sps_id != h->current_sps_id ||
@@ -1270,7 +1314,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
             h->chroma_format_idc != h->sps.chroma_format_idc)
             needs_reinit         = 1;
 
-        if (h->flags & CODEC_FLAG_LOW_DELAY ||
+        if (h->flags & AV_CODEC_FLAG_LOW_DELAY ||
             (h->sps.bitstream_restriction_flag &&
              !h->sps.num_reorder_frames)) {
             if (h->avctx->has_b_frames > 1 || h->delayed_pic[0])
@@ -1285,10 +1329,6 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
 
     }
 
-    h->avctx->profile = ff_h264_get_profile(&h->sps);
-    h->avctx->level   = h->sps.level_idc;
-    h->avctx->refs    = h->sps.ref_frame_count;
-
     must_reinit = (h->context_initialized &&
                     (   16*h->sps.mb_width != h->avctx->coded_width
                      || 16*h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) != h->avctx->coded_height
@@ -1304,31 +1344,37 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
     if (first_slice && av_cmp_q(h->sps.sar, h->avctx->sample_aspect_ratio))
         must_reinit = 1;
 
-    h->mb_width  = h->sps.mb_width;
-    h->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
-    h->mb_num    = h->mb_width * h->mb_height;
-    h->mb_stride = h->mb_width + 1;
+    if (!h->setup_finished) {
+        h->avctx->profile = ff_h264_get_profile(&h->sps);
+        h->avctx->level   = h->sps.level_idc;
+        h->avctx->refs    = h->sps.ref_frame_count;
 
-    h->b_stride = h->mb_width * 4;
+        h->mb_width  = h->sps.mb_width;
+        h->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
+        h->mb_num    = h->mb_width * h->mb_height;
+        h->mb_stride = h->mb_width + 1;
 
-    h->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
+        h->b_stride = h->mb_width * 4;
 
-    h->width  = 16 * h->mb_width;
-    h->height = 16 * h->mb_height;
+        h->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
 
-    ret = init_dimensions(h);
-    if (ret < 0)
-        return ret;
+        h->width  = 16 * h->mb_width;
+        h->height = 16 * h->mb_height;
+
+        ret = init_dimensions(h);
+        if (ret < 0)
+            return ret;
 
-    if (h->sps.video_signal_type_present_flag) {
-        h->avctx->color_range = h->sps.full_range>0 ? AVCOL_RANGE_JPEG
-                                                    : AVCOL_RANGE_MPEG;
-        if (h->sps.colour_description_present_flag) {
-            if (h->avctx->colorspace != h->sps.colorspace)
-                needs_reinit = 1;
-            h->avctx->color_primaries = h->sps.color_primaries;
-            h->avctx->color_trc       = h->sps.color_trc;
-            h->avctx->colorspace      = h->sps.colorspace;
+        if (h->sps.video_signal_type_present_flag) {
+            h->avctx->color_range = h->sps.full_range>0 ? AVCOL_RANGE_JPEG
+                                                        : AVCOL_RANGE_MPEG;
+            if (h->sps.colour_description_present_flag) {
+                if (h->avctx->colorspace != h->sps.colorspace)
+                    needs_reinit = 1;
+                h->avctx->color_primaries = h->sps.color_primaries;
+                h->avctx->color_trc       = h->sps.color_trc;
+                h->avctx->colorspace      = h->sps.colorspace;
+            }
         }
     }
 
@@ -1380,7 +1426,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
         }
     }
 
-    if (first_slice && h->dequant_coeff_pps != pps_id) {
+    if (!h->current_slice && h->dequant_coeff_pps != pps_id) {
         h->dequant_coeff_pps = pps_id;
         ff_h264_init_dequant_tables(h);
     }
@@ -1394,12 +1440,16 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
         }
     }
 
+    if (!h->setup_finished)
+        h->frame_num = frame_num;
+
     sl->mb_mbaff       = 0;
     mb_aff_frame       = 0;
     last_mb_aff_frame  = h->mb_aff_frame;
     last_pic_structure = h->picture_structure;
     last_pic_droppable = h->droppable;
-    droppable          = h->nal_ref_idc == 0;
+
+    droppable = h->nal_ref_idc == 0;
     if (h->sps.frame_mbs_only_flag) {
         picture_structure = PICT_FRAME;
     } else {
@@ -1417,6 +1467,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
             mb_aff_frame      = h->sps.mb_aff;
         }
     }
+
     if (h->current_slice) {
         if (last_pic_structure != picture_structure ||
             last_pic_droppable != droppable ||
@@ -1433,10 +1484,11 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
         }
     }
 
-    h->picture_structure = picture_structure;
-    h->droppable         = droppable;
-    h->frame_num         = frame_num;
-    h->mb_aff_frame      = mb_aff_frame;
+    if (!h->setup_finished) {
+        h->droppable         = droppable;
+        h->picture_structure = picture_structure;
+        h->mb_aff_frame      = mb_aff_frame;
+    }
     sl->mb_field_decoding_flag = picture_structure != PICT_FRAME;
 
     if (h->current_slice == 0) {
@@ -1618,12 +1670,8 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
             memset(h->slice_table, -1,
                 (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table));
         }
-        h->last_slice_type = -1;
     }
 
-
-    h->cur_pic_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup
-
     av_assert1(h->mb_num == h->mb_width * h->mb_height);
     if (first_mb_in_slice << FIELD_OR_MBAFF_PICTURE(h) >= h->mb_num ||
         first_mb_in_slice >= h->mb_num) {
@@ -1646,23 +1694,37 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
     }
 
     if (h->nal_unit_type == NAL_IDR_SLICE)
-        get_ue_golomb(&sl->gb); /* idr_pic_id */
+        get_ue_golomb_long(&sl->gb); /* idr_pic_id */
 
     if (h->sps.poc_type == 0) {
-        h->poc_lsb = get_bits(&sl->gb, h->sps.log2_max_poc_lsb);
+        int poc_lsb = get_bits(&sl->gb, h->sps.log2_max_poc_lsb);
 
-        if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME)
-            h->delta_poc_bottom = get_se_golomb(&sl->gb);
+        if (!h->setup_finished)
+            h->poc_lsb = poc_lsb;
+
+        if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME) {
+            int delta_poc_bottom = get_se_golomb(&sl->gb);
+            if (!h->setup_finished)
+                h->delta_poc_bottom = delta_poc_bottom;
+        }
     }
 
     if (h->sps.poc_type == 1 && !h->sps.delta_pic_order_always_zero_flag) {
-        h->delta_poc[0] = get_se_golomb(&sl->gb);
+        int delta_poc = get_se_golomb(&sl->gb);
 
-        if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME)
-            h->delta_poc[1] = get_se_golomb(&sl->gb);
+        if (!h->setup_finished)
+            h->delta_poc[0] = delta_poc;
+
+        if (h->pps.pic_order_present == 1 && h->picture_structure == PICT_FRAME) {
+            delta_poc = get_se_golomb(&sl->gb);
+
+            if (!h->setup_finished)
+                h->delta_poc[1] = delta_poc;
+        }
     }
 
-    ff_init_poc(h, h->cur_pic_ptr->field_poc, &h->cur_pic_ptr->poc);
+    if (!h->setup_finished)
+        ff_init_poc(h, h->cur_pic_ptr->field_poc, &h->cur_pic_ptr->poc);
 
     if (h->pps.redundant_pic_cnt_present)
         sl->redundant_pic_count = get_ue_golomb(&sl->gb);
@@ -1671,14 +1733,6 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
     if (ret < 0)
         return ret;
 
-    if (slice_type != AV_PICTURE_TYPE_I &&
-        (h->current_slice == 0 ||
-         slice_type != h->last_slice_type ||
-         memcmp(h->last_ref_count, sl->ref_count, sizeof(sl->ref_count)))) {
-
-        ff_h264_fill_default_ref_list(h, sl);
-    }
-
     if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
        ret = ff_h264_decode_ref_pic_list_reordering(h, sl);
        if (ret < 0) {
@@ -1794,7 +1848,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
         sl->deblocking_filter = 0;
 
     if (sl->deblocking_filter == 1 && h->max_contexts > 1) {
-        if (h->avctx->flags2 & CODEC_FLAG2_FAST) {
+        if (h->avctx->flags2 & AV_CODEC_FLAG2_FAST) {
             /* Cheat slightly for speed:
              * Do not bother to deblock across slices. */
             sl->deblocking_filter = 2;
@@ -1822,8 +1876,6 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl)
                           h->pps.chroma_qp_index_offset[1]) +
                    6 * (h->sps.bit_depth_luma - 8);
 
-    h->last_slice_type = slice_type;
-    memcpy(h->last_ref_count, sl->ref_count, sizeof(h->last_ref_count));
     sl->slice_num       = ++h->current_slice;
 
     if (sl->slice_num)
@@ -1930,12 +1982,12 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h,
         if (USES_LIST(top_type, list)) {
             const int b_xy  = h->mb2b_xy[top_xy] + 3 * b_stride;
             const int b8_xy = 4 * top_xy + 2;
-            int (*ref2frm)[64] = (void*)(sl->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2));
+            int *ref2frm = sl->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][list] + (MB_MBAFF(sl) ? 20 : 2);
             AV_COPY128(mv_dst - 1 * 8, h->cur_pic.motion_val[list][b_xy + 0]);
             ref_cache[0 - 1 * 8] =
-            ref_cache[1 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 0]];
+            ref_cache[1 - 1 * 8] = ref2frm[h->cur_pic.ref_index[list][b8_xy + 0]];
             ref_cache[2 - 1 * 8] =
-            ref_cache[3 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 1]];
+            ref_cache[3 - 1 * 8] = ref2frm[h->cur_pic.ref_index[list][b8_xy + 1]];
         } else {
             AV_ZERO128(mv_dst - 1 * 8);
             AV_WN32A(&ref_cache[0 - 1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
@@ -1945,15 +1997,15 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h,
             if (USES_LIST(left_type[LTOP], list)) {
                 const int b_xy  = h->mb2b_xy[left_xy[LTOP]] + 3;
                 const int b8_xy = 4 * left_xy[LTOP] + 1;
-                int (*ref2frm)[64] =(void*)( sl->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2));
+                int *ref2frm = sl->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][list] + (MB_MBAFF(sl) ? 20 : 2);
                 AV_COPY32(mv_dst - 1 +  0, h->cur_pic.motion_val[list][b_xy + b_stride * 0]);
                 AV_COPY32(mv_dst - 1 +  8, h->cur_pic.motion_val[list][b_xy + b_stride * 1]);
                 AV_COPY32(mv_dst - 1 + 16, h->cur_pic.motion_val[list][b_xy + b_stride * 2]);
                 AV_COPY32(mv_dst - 1 + 24, h->cur_pic.motion_val[list][b_xy + b_stride * 3]);
                 ref_cache[-1 +  0] =
-                ref_cache[-1 +  8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 0]];
+                ref_cache[-1 +  8] = ref2frm[h->cur_pic.ref_index[list][b8_xy + 2 * 0]];
                 ref_cache[-1 + 16] =
-                ref_cache[-1 + 24] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 1]];
+                ref_cache[-1 + 24] = ref2frm[h->cur_pic.ref_index[list][b8_xy + 2 * 1]];
             } else {
                 AV_ZERO32(mv_dst - 1 +  0);
                 AV_ZERO32(mv_dst - 1 +  8);
@@ -1978,9 +2030,9 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h,
 
     {
         int8_t *ref = &h->cur_pic.ref_index[list][4 * mb_xy];
-        int (*ref2frm)[64] = (void*)(sl->ref2frm[sl->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2));
-        uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101;
-        uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101;
+        int *ref2frm = sl->ref2frm[sl->slice_num & (MAX_SLICES - 1)][list] + (MB_MBAFF(sl) ? 20 : 2);
+        uint32_t ref01 = (pack16to32(ref2frm[ref[0]], ref2frm[ref[1]]) & 0x00FF00FF) * 0x0101;
+        uint32_t ref23 = (pack16to32(ref2frm[ref[2]], ref2frm[ref[3]]) & 0x00FF00FF) * 0x0101;
         AV_WN32A(&ref_cache[0 * 8], ref01);
         AV_WN32A(&ref_cache[1 * 8], ref01);
         AV_WN32A(&ref_cache[2 * 8], ref23);
@@ -2292,7 +2344,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
 
     sl->is_complex = FRAME_MBAFF(h) || h->picture_structure != PICT_FRAME ||
                      avctx->codec_id != AV_CODEC_ID_H264 ||
-                     (CONFIG_GRAY && (h->flags & CODEC_FLAG_GRAY));
+                     (CONFIG_GRAY && (h->flags & AV_CODEC_FLAG_GRAY));
 
     if (!(h->avctx->active_thread_type & FF_THREAD_SLICE) && h->picture_structure == PICT_FRAME && h->slice_ctx[0].er.error_status_table) {
         const int start_i  = av_clip(sl->resync_mb_x + sl->resync_mb_y * h->mb_width, 0, h->mb_num - 1);
@@ -2309,19 +2361,20 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
         align_get_bits(&sl->gb);
 
         /* init cabac */
-        ff_init_cabac_decoder(&sl->cabac,
+        ret = ff_init_cabac_decoder(&sl->cabac,
                               sl->gb.buffer + get_bits_count(&sl->gb) / 8,
                               (get_bits_left(&sl->gb) + 7) / 8);
+        if (ret < 0)
+            return ret;
 
         ff_h264_init_cabac_states(h, sl);
 
         for (;;) {
             // START_TIMER
             int ret, eos;
-
-            if (sl->mb_x + sl->mb_y * h->mb_width >= sl->mb_index_end) {
-                av_log(h->avctx, AV_LOG_ERROR, "Slice overlaps next at %d\n",
-                       sl->mb_index_end);
+            if (sl->mb_x + sl->mb_y * h->mb_width >= sl->next_slice_idx) {
+                av_log(h->avctx, AV_LOG_ERROR, "Slice overlaps with next at %d\n",
+                       sl->next_slice_idx);
                 er_add_slice(sl, sl->resync_mb_x, sl->resync_mb_y, sl->mb_x,
                              sl->mb_y, ER_MB_ERROR);
                 return AVERROR_INVALIDDATA;
@@ -2391,9 +2444,9 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
         for (;;) {
             int ret;
 
-            if (sl->mb_x + sl->mb_y * h->mb_width >= sl->mb_index_end) {
-                av_log(h->avctx, AV_LOG_ERROR, "Slice overlaps next at %d\n",
-                       sl->mb_index_end);
+            if (sl->mb_x + sl->mb_y * h->mb_width >= sl->next_slice_idx) {
+                av_log(h->avctx, AV_LOG_ERROR, "Slice overlaps with next at %d\n",
+                       sl->next_slice_idx);
                 er_add_slice(sl, sl->resync_mb_x, sl->resync_mb_y, sl->mb_x,
                              sl->mb_y, ER_MB_ERROR);
                 return AVERROR_INVALIDDATA;
@@ -2483,38 +2536,48 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count)
 {
     AVCodecContext *const avctx = h->avctx;
     H264SliceContext *sl;
-    int i;
+    int i, j;
 
     av_assert0(context_count && h->slice_ctx[context_count - 1].mb_y < h->mb_height);
 
-    h->slice_ctx[0].mb_index_end = INT_MAX;
+    h->slice_ctx[0].next_slice_idx = INT_MAX;
 
-    if (h->avctx->hwaccel ||
-        h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
+    if (h->avctx->hwaccel
+#if FF_API_CAP_VDPAU
+        || h->avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU
+#endif
+        )
         return 0;
     if (context_count == 1) {
-        int ret = decode_slice(avctx, &h->slice_ctx[0]);
+        int ret;
+
+        h->slice_ctx[0].next_slice_idx = h->mb_width * h->mb_height;
+
+        ret = decode_slice(avctx, &h->slice_ctx[0]);
         h->mb_y = h->slice_ctx[0].mb_y;
         return ret;
     } else {
-        int j, mb_index;
         av_assert0(context_count > 0);
         for (i = 0; i < context_count; i++) {
-            int mb_index_end = h->mb_width * h->mb_height;
+            int next_slice_idx = h->mb_width * h->mb_height;
+            int slice_idx;
+
             sl                 = &h->slice_ctx[i];
-            mb_index = sl->resync_mb_x + sl->resync_mb_y * h->mb_width;
             if (CONFIG_ERROR_RESILIENCE) {
                 sl->er.error_count = 0;
             }
+
+            /* make sure none of those slices overlap */
+            slice_idx = sl->mb_y * h->mb_width + sl->mb_x;
             for (j = 0; j < context_count; j++) {
                 H264SliceContext *sl2 = &h->slice_ctx[j];
-                int mb_index2 = sl2->resync_mb_x + sl2->resync_mb_y * h->mb_width;
+                int        slice_idx2 = sl2->mb_y * h->mb_width + sl2->mb_x;
 
-                if (i==j || mb_index > mb_index2)
+                if (i == j || slice_idx2 < slice_idx)
                     continue;
-                mb_index_end = FFMIN(mb_index_end, mb_index2);
+                next_slice_idx = FFMIN(next_slice_idx, slice_idx2);
             }
-            sl->mb_index_end = mb_index_end;
+            sl->next_slice_idx = next_slice_idx;
         }
 
         avctx->execute(avctx, decode_slice, h->slice_ctx,
diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c
index fa110196..9b2cc245 100644
--- a/libavcodec/h264dsp_template.c
+++ b/libavcodec/h264dsp_template.c
@@ -110,7 +110,7 @@ static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *p_
     alpha <<= BIT_DEPTH - 8;
     beta  <<= BIT_DEPTH - 8;
     for( i = 0; i < 4; i++ ) {
-        const int tc_orig = tc0[i] << (BIT_DEPTH - 8);
+        const int tc_orig = tc0[i] * (1 << (BIT_DEPTH - 8));
         if( tc_orig < 0 ) {
             pix += inner_iters*ystride;
             continue;
@@ -141,7 +141,7 @@ static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *p_
                     tc++;
                 }
 
-                i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+                i_delta = av_clip( (((q0 - p0 ) * 4) + (p1 - q1) + 4) >> 3, -tc, tc );
                 pix[-xstride] = av_clip_pixel( p0 + i_delta );    /* p0' */
                 pix[0]        = av_clip_pixel( q0 - i_delta );    /* q0' */
             }
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 044fc90c..8f15f71f 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -592,6 +592,12 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id,
             break;
     }
 
-    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc);
-    if (ARCH_X86) ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc);
+    if (ARCH_AARCH64)
+        ff_h264_pred_init_aarch64(h, codec_id, bit_depth, chroma_format_idc);
+    if (ARCH_ARM)
+        ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc);
+    if (ARCH_X86)
+        ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc);
+    if (ARCH_MIPS)
+        ff_h264_pred_init_mips(h, codec_id, bit_depth, chroma_format_idc);
 }
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index 67082920..091dcbbf 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -113,9 +113,14 @@ typedef struct H264PredContext {
 
 void ff_h264_pred_init(H264PredContext *h, int codec_id,
                        const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id,
+                               const int bit_depth,
+                               const int chroma_format_idc);
 void ff_h264_pred_init_arm(H264PredContext *h, int codec_id,
                            const int bit_depth, const int chroma_format_idc);
 void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
                            const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_mips(H264PredContext *h, int codec_id,
+                            const int bit_depth, const int chroma_format_idc);
 
 #endif /* AVCODEC_H264PRED_H */
diff --git a/libavcodec/h264qpel.c b/libavcodec/h264qpel.c
index 5f1bfa31..50e82e23 100644
--- a/libavcodec/h264qpel.c
+++ b/libavcodec/h264qpel.c
@@ -104,4 +104,6 @@ av_cold void ff_h264qpel_init(H264QpelContext *c, int bit_depth)
         ff_h264qpel_init_ppc(c, bit_depth);
     if (ARCH_X86)
         ff_h264qpel_init_x86(c, bit_depth);
+    if (ARCH_MIPS)
+        ff_h264qpel_init_mips(c, bit_depth);
 }
diff --git a/libavcodec/h264qpel.h b/libavcodec/h264qpel.h
index d71130d1..7c57ad00 100644
--- a/libavcodec/h264qpel.h
+++ b/libavcodec/h264qpel.h
@@ -35,5 +35,6 @@ void ff_h264qpel_init_aarch64(H264QpelContext *c, int bit_depth);
 void ff_h264qpel_init_arm(H264QpelContext *c, int bit_depth);
 void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth);
 void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth);
+void ff_h264qpel_init_mips(H264QpelContext *c, int bit_depth);
 
 #endif /* AVCODEC_H264QPEL_H */
diff --git a/libavcodec/hap.c b/libavcodec/hap.c
new file mode 100644
index 00000000..5b3af5e1
--- /dev/null
+++ b/libavcodec/hap.c
@@ -0,0 +1,55 @@
+/*
+ * Vidvox Hap utility functions
+ * Copyright (C) 2015 Tom Butterworth <bangnoise@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Hap utilities
+ */
+#include "hap.h"
+
+int ff_hap_set_chunk_count(HapContext *ctx, int count, int first_in_frame)
+{
+    int ret = 0;
+    if (first_in_frame == 1 && ctx->chunk_count != count) {
+        int ret = av_reallocp_array(&ctx->chunks, count, sizeof(HapChunk));
+        if (ret == 0)
+            ret = av_reallocp_array(&ctx->chunk_results, count, sizeof(int));
+        if (ret < 0) {
+            ctx->chunk_count = 0;
+        } else {
+            ctx->chunk_count = count;
+        }
+    } else if (ctx->chunk_count != count) {
+        /* If this is not the first chunk count calculated for a frame and a
+         * different count has already been encountered, then reject the frame:
+         * each table in the Decode Instructions Container must describe the
+         * same number of chunks. */
+        ret = AVERROR_INVALIDDATA;
+    }
+    return ret;
+}
+
+av_cold void ff_hap_free_context(HapContext *ctx)
+{
+    av_freep(&ctx->tex_buf);
+    av_freep(&ctx->chunks);
+    av_freep(&ctx->chunk_results);
+}
diff --git a/libavcodec/hap.h b/libavcodec/hap.h
new file mode 100644
index 00000000..e4762ee4
--- /dev/null
+++ b/libavcodec/hap.h
@@ -0,0 +1,98 @@
+/*
+ * Vidvox Hap
+ * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ * Copyright (C) 2015 Tom Butterworth <bangnoise@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HAP_H
+#define AVCODEC_HAP_H
+
+#include <stdint.h>
+
+#include "libavutil/opt.h"
+
+#include "bytestream.h"
+#include "texturedsp.h"
+
+enum HapTextureFormat {
+    HAP_FMT_RGBDXT1   = 0x0B,
+    HAP_FMT_RGBADXT5  = 0x0E,
+    HAP_FMT_YCOCGDXT5 = 0x0F,
+};
+
+enum HapCompressor {
+    HAP_COMP_NONE    = 0xA0,
+    HAP_COMP_SNAPPY  = 0xB0,
+    HAP_COMP_COMPLEX = 0xC0,
+};
+
+enum HapSectionType {
+    HAP_ST_DECODE_INSTRUCTIONS = 0x01,
+    HAP_ST_COMPRESSOR_TABLE    = 0x02,
+    HAP_ST_SIZE_TABLE          = 0x03,
+    HAP_ST_OFFSET_TABLE        = 0x04,
+};
+
+typedef struct HapChunk {
+    enum HapCompressor compressor;
+    int compressed_offset;
+    size_t compressed_size;
+    int uncompressed_offset;
+    size_t uncompressed_size;
+} HapChunk;
+
+typedef struct HapContext {
+    AVClass *class;
+
+    TextureDSPContext dxtc;
+    GetByteContext gbc;
+
+    enum HapTextureFormat opt_tex_fmt; /* Texture type (encoder only) */
+    int opt_chunk_count; /* User-requested chunk count (encoder only) */
+
+    int chunk_count;
+    HapChunk *chunks;
+    int *chunk_results;      /* Results from threaded operations */
+
+    int tex_rat;             /* Compression ratio */
+    const uint8_t *tex_data; /* Compressed texture */
+    uint8_t *tex_buf;        /* Buffer for compressed texture */
+    size_t tex_size;         /* Size of the compressed texture */
+
+    size_t max_snappy;       /* Maximum compressed size for snappy buffer */
+
+    int slice_count;         /* Number of slices for threaded operations */
+
+    /* Pointer to the selected compress or decompress function */
+    int (*tex_fun)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+} HapContext;
+
+/*
+ * Set the number of chunks in the frame. Returns 0 on success or an error if:
+ * - first_in_frame is 0 and the number of chunks has changed
+ * - any other error occurs
+ */
+int ff_hap_set_chunk_count(HapContext *ctx, int count, int first_in_frame);
+
+/*
+ * Free resources associated with the context
+ */
+av_cold void ff_hap_free_context(HapContext *ctx);
+
+#endif /* AVCODEC_HAP_H */
diff --git a/libavcodec/hapdec.c b/libavcodec/hapdec.c
new file mode 100644
index 00000000..5a399dcb
--- /dev/null
+++ b/libavcodec/hapdec.c
@@ -0,0 +1,441 @@
+/*
+ * Vidvox Hap decoder
+ * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ * Copyright (C) 2015 Tom Butterworth <bangnoise@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Hap decoder
+ *
+ * Fourcc: Hap1, Hap5, HapY
+ *
+ * https://github.com/Vidvox/hap/blob/master/documentation/HapVideoDRAFT.md
+ */
+
+#include <stdint.h>
+
+#include "libavutil/imgutils.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "hap.h"
+#include "internal.h"
+#include "memory.h"
+#include "snappy.h"
+#include "texturedsp.h"
+#include "thread.h"
+
+/* The first three bytes are the size of the section past the header, or zero
+ * if the length is stored in the next long word. The fourth byte in the first
+ * long word indicates the type of the current section. */
+static int parse_section_header(GetByteContext *gbc, int *section_size,
+                                enum HapSectionType *section_type)
+{
+    if (bytestream2_get_bytes_left(gbc) < 4)
+        return AVERROR_INVALIDDATA;
+
+    *section_size = bytestream2_get_le24(gbc);
+    *section_type = bytestream2_get_byte(gbc);
+
+    if (*section_size == 0) {
+        if (bytestream2_get_bytes_left(gbc) < 4)
+            return AVERROR_INVALIDDATA;
+
+        *section_size = bytestream2_get_le32(gbc);
+    }
+
+    if (*section_size > bytestream2_get_bytes_left(gbc) || *section_size < 0)
+        return AVERROR_INVALIDDATA;
+    else
+        return 0;
+}
+
+static int hap_parse_decode_instructions(HapContext *ctx, int size)
+{
+    GetByteContext *gbc = &ctx->gbc;
+    int section_size;
+    enum HapSectionType section_type;
+    int is_first_table = 1, had_offsets = 0, had_compressors = 0, had_sizes = 0;
+    int i, ret;
+
+    while (size > 0) {
+        int stream_remaining = bytestream2_get_bytes_left(gbc);
+        ret = parse_section_header(gbc, &section_size, &section_type);
+        if (ret != 0)
+            return ret;
+
+        size -= stream_remaining - bytestream2_get_bytes_left(gbc);
+
+        switch (section_type) {
+            case HAP_ST_COMPRESSOR_TABLE:
+                ret = ff_hap_set_chunk_count(ctx, section_size, is_first_table);
+                if (ret != 0)
+                    return ret;
+                for (i = 0; i < section_size; i++) {
+                    ctx->chunks[i].compressor = bytestream2_get_byte(gbc) << 4;
+                }
+                had_compressors = 1;
+                is_first_table = 0;
+                break;
+            case HAP_ST_SIZE_TABLE:
+                ret = ff_hap_set_chunk_count(ctx, section_size / 4, is_first_table);
+                if (ret != 0)
+                    return ret;
+                for (i = 0; i < section_size / 4; i++) {
+                    ctx->chunks[i].compressed_size = bytestream2_get_le32(gbc);
+                }
+                had_sizes = 1;
+                is_first_table = 0;
+                break;
+            case HAP_ST_OFFSET_TABLE:
+                ret = ff_hap_set_chunk_count(ctx, section_size / 4, is_first_table);
+                if (ret != 0)
+                    return ret;
+                for (i = 0; i < section_size / 4; i++) {
+                    ctx->chunks[i].compressed_offset = bytestream2_get_le32(gbc);
+                }
+                had_offsets = 1;
+                is_first_table = 0;
+                break;
+            default:
+                break;
+        }
+        size -= section_size;
+    }
+
+    if (!had_sizes || !had_compressors)
+        return AVERROR_INVALIDDATA;
+
+    /* The offsets table is optional. If not present than calculate offsets by
+     * summing the sizes of preceding chunks. */
+    if (!had_offsets) {
+        size_t running_size = 0;
+        for (i = 0; i < ctx->chunk_count; i++) {
+            ctx->chunks[i].compressed_offset = running_size;
+            running_size += ctx->chunks[i].compressed_size;
+        }
+    }
+
+    return 0;
+}
+
+static int hap_can_use_tex_in_place(HapContext *ctx)
+{
+    int i;
+    size_t running_offset = 0;
+    for (i = 0; i < ctx->chunk_count; i++) {
+        if (ctx->chunks[i].compressed_offset != running_offset
+            || ctx->chunks[i].compressor != HAP_COMP_NONE)
+            return 0;
+        running_offset += ctx->chunks[i].compressed_size;
+    }
+    return 1;
+}
+
+static int hap_parse_frame_header(AVCodecContext *avctx)
+{
+    HapContext *ctx = avctx->priv_data;
+    GetByteContext *gbc = &ctx->gbc;
+    int section_size;
+    enum HapSectionType section_type;
+    const char *compressorstr;
+    int i, ret;
+
+    ret = parse_section_header(gbc, &section_size, &section_type);
+    if (ret != 0)
+        return ret;
+
+    if ((avctx->codec_tag == MKTAG('H','a','p','1') && (section_type & 0x0F) != HAP_FMT_RGBDXT1) ||
+        (avctx->codec_tag == MKTAG('H','a','p','5') && (section_type & 0x0F) != HAP_FMT_RGBADXT5) ||
+        (avctx->codec_tag == MKTAG('H','a','p','Y') && (section_type & 0x0F) != HAP_FMT_YCOCGDXT5)) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Invalid texture format %#04x.\n", section_type & 0x0F);
+        return AVERROR_INVALIDDATA;
+    }
+
+    switch (section_type & 0xF0) {
+        case HAP_COMP_NONE:
+        case HAP_COMP_SNAPPY:
+            ret = ff_hap_set_chunk_count(ctx, 1, 1);
+            if (ret == 0) {
+                ctx->chunks[0].compressor = section_type & 0xF0;
+                ctx->chunks[0].compressed_offset = 0;
+                ctx->chunks[0].compressed_size = section_size;
+            }
+            if (ctx->chunks[0].compressor == HAP_COMP_NONE) {
+                compressorstr = "none";
+            } else {
+                compressorstr = "snappy";
+            }
+            break;
+        case HAP_COMP_COMPLEX:
+            ret = parse_section_header(gbc, &section_size, &section_type);
+            if (ret == 0 && section_type != HAP_ST_DECODE_INSTRUCTIONS)
+                ret = AVERROR_INVALIDDATA;
+            if (ret == 0)
+                ret = hap_parse_decode_instructions(ctx, section_size);
+            compressorstr = "complex";
+            break;
+        default:
+            ret = AVERROR_INVALIDDATA;
+            break;
+    }
+
+    if (ret != 0)
+        return ret;
+
+    /* Check the frame is valid and read the uncompressed chunk sizes */
+    ctx->tex_size = 0;
+    for (i = 0; i < ctx->chunk_count; i++) {
+        HapChunk *chunk = &ctx->chunks[i];
+
+        /* Check the compressed buffer is valid */
+        if (chunk->compressed_offset + chunk->compressed_size > bytestream2_get_bytes_left(gbc))
+            return AVERROR_INVALIDDATA;
+
+        /* Chunks are unpacked sequentially, ctx->tex_size is the uncompressed
+         * size thus far */
+        chunk->uncompressed_offset = ctx->tex_size;
+
+        /* Fill out uncompressed size */
+        if (chunk->compressor == HAP_COMP_SNAPPY) {
+            GetByteContext gbc_tmp;
+            int64_t uncompressed_size;
+            bytestream2_init(&gbc_tmp, gbc->buffer + chunk->compressed_offset,
+                             chunk->compressed_size);
+            uncompressed_size = ff_snappy_peek_uncompressed_length(&gbc_tmp);
+            if (uncompressed_size < 0) {
+                return uncompressed_size;
+            }
+            chunk->uncompressed_size = uncompressed_size;
+        } else if (chunk->compressor == HAP_COMP_NONE) {
+            chunk->uncompressed_size = chunk->compressed_size;
+        } else {
+            return AVERROR_INVALIDDATA;
+        }
+        ctx->tex_size += chunk->uncompressed_size;
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "%s compressor\n", compressorstr);
+
+    return ret;
+}
+
+static int decompress_chunks_thread(AVCodecContext *avctx, void *arg,
+                                    int chunk_nb, int thread_nb)
+{
+    HapContext *ctx = avctx->priv_data;
+
+    HapChunk *chunk = &ctx->chunks[chunk_nb];
+    GetByteContext gbc;
+    uint8_t *dst = ctx->tex_buf + chunk->uncompressed_offset;
+
+    bytestream2_init(&gbc, ctx->gbc.buffer + chunk->compressed_offset, chunk->compressed_size);
+
+    if (chunk->compressor == HAP_COMP_SNAPPY) {
+        int ret;
+        int64_t uncompressed_size = ctx->tex_size;
+
+        /* Uncompress the frame */
+        ret = ff_snappy_uncompress(&gbc, dst, &uncompressed_size);
+        if (ret < 0) {
+             av_log(avctx, AV_LOG_ERROR, "Snappy uncompress error\n");
+             return ret;
+        }
+    } else if (chunk->compressor == HAP_COMP_NONE) {
+        bytestream2_get_buffer(&gbc, dst, chunk->compressed_size);
+    }
+
+    return 0;
+}
+
+static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
+                                     int slice, int thread_nb)
+{
+    HapContext *ctx = avctx->priv_data;
+    AVFrame *frame = arg;
+    const uint8_t *d = ctx->tex_data;
+    int w_block = avctx->coded_width / TEXTURE_BLOCK_W;
+    int h_block = avctx->coded_height / TEXTURE_BLOCK_H;
+    int x, y;
+    int start_slice, end_slice;
+    int base_blocks_per_slice = h_block / ctx->slice_count;
+    int remainder_blocks = h_block % ctx->slice_count;
+
+    /* When the frame height (in blocks) doesn't divide evenly between the
+     * number of slices, spread the remaining blocks evenly between the first
+     * operations */
+    start_slice = slice * base_blocks_per_slice;
+    /* Add any extra blocks (one per slice) that have been added before this slice */
+    start_slice += FFMIN(slice, remainder_blocks);
+
+    end_slice = start_slice + base_blocks_per_slice;
+    /* Add an extra block if there are still remainder blocks to be accounted for */
+    if (slice < remainder_blocks)
+        end_slice++;
+
+    for (y = start_slice; y < end_slice; y++) {
+        uint8_t *p = frame->data[0] + y * frame->linesize[0] * TEXTURE_BLOCK_H;
+        int off  = y * w_block;
+        for (x = 0; x < w_block; x++) {
+            ctx->tex_fun(p + x * 16, frame->linesize[0],
+                         d + (off + x) * ctx->tex_rat);
+        }
+    }
+
+    return 0;
+}
+
+static int hap_decode(AVCodecContext *avctx, void *data,
+                      int *got_frame, AVPacket *avpkt)
+{
+    HapContext *ctx = avctx->priv_data;
+    ThreadFrame tframe;
+    int ret, i;
+    int tex_size;
+
+    bytestream2_init(&ctx->gbc, avpkt->data, avpkt->size);
+
+    /* Check for section header */
+    ret = hap_parse_frame_header(avctx);
+    if (ret < 0)
+        return ret;
+
+    /* Get the output frame ready to receive data */
+    tframe.f = data;
+    ret = ff_thread_get_buffer(avctx, &tframe, 0);
+    if (ret < 0)
+        return ret;
+    if (avctx->codec->update_thread_context)
+        ff_thread_finish_setup(avctx);
+
+    /* Unpack the DXT texture */
+    if (hap_can_use_tex_in_place(ctx)) {
+        /* Only DXTC texture compression in a contiguous block */
+        ctx->tex_data = ctx->gbc.buffer;
+        tex_size = bytestream2_get_bytes_left(&ctx->gbc);
+    } else {
+        /* Perform the second-stage decompression */
+        ret = av_reallocp(&ctx->tex_buf, ctx->tex_size);
+        if (ret < 0)
+            return ret;
+
+        avctx->execute2(avctx, decompress_chunks_thread, NULL,
+                        ctx->chunk_results, ctx->chunk_count);
+
+        for (i = 0; i < ctx->chunk_count; i++) {
+            if (ctx->chunk_results[i] < 0)
+                return ctx->chunk_results[i];
+        }
+
+        ctx->tex_data = ctx->tex_buf;
+        tex_size = ctx->tex_size;
+    }
+
+    if (tex_size < (avctx->coded_width  / TEXTURE_BLOCK_W)
+                  *(avctx->coded_height / TEXTURE_BLOCK_H)
+                  *ctx->tex_rat) {
+        av_log(avctx, AV_LOG_ERROR, "Insufficient data\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* Use the decompress function on the texture, one block per thread */
+    avctx->execute2(avctx, decompress_texture_thread, tframe.f, NULL, ctx->slice_count);
+
+    /* Frame is ready to be output */
+    tframe.f->pict_type = AV_PICTURE_TYPE_I;
+    tframe.f->key_frame = 1;
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+static av_cold int hap_init(AVCodecContext *avctx)
+{
+    HapContext *ctx = avctx->priv_data;
+    const char *texture_name;
+    int ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
+
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid video size %dx%d.\n",
+               avctx->width, avctx->height);
+        return ret;
+    }
+
+    /* Since codec is based on 4x4 blocks, size is aligned to 4 */
+    avctx->coded_width  = FFALIGN(avctx->width,  TEXTURE_BLOCK_W);
+    avctx->coded_height = FFALIGN(avctx->height, TEXTURE_BLOCK_H);
+
+    ff_texturedsp_init(&ctx->dxtc);
+
+    switch (avctx->codec_tag) {
+    case MKTAG('H','a','p','1'):
+        texture_name = "DXT1";
+        ctx->tex_rat = 8;
+        ctx->tex_fun = ctx->dxtc.dxt1_block;
+        avctx->pix_fmt = AV_PIX_FMT_RGB0;
+        break;
+    case MKTAG('H','a','p','5'):
+        texture_name = "DXT5";
+        ctx->tex_rat = 16;
+        ctx->tex_fun = ctx->dxtc.dxt5_block;
+        avctx->pix_fmt = AV_PIX_FMT_RGBA;
+        break;
+    case MKTAG('H','a','p','Y'):
+        texture_name = "DXT5-YCoCg-scaled";
+        ctx->tex_rat = 16;
+        ctx->tex_fun = ctx->dxtc.dxt5ys_block;
+        avctx->pix_fmt = AV_PIX_FMT_RGB0;
+        break;
+    default:
+        return AVERROR_DECODER_NOT_FOUND;
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "%s texture\n", texture_name);
+
+    ctx->slice_count = av_clip(avctx->thread_count, 1,
+                               avctx->coded_height / TEXTURE_BLOCK_H);
+
+    return 0;
+}
+
+static av_cold int hap_close(AVCodecContext *avctx)
+{
+    HapContext *ctx = avctx->priv_data;
+
+    ff_hap_free_context(ctx);
+
+    return 0;
+}
+
+AVCodec ff_hap_decoder = {
+    .name           = "hap",
+    .long_name      = NULL_IF_CONFIG_SMALL("Vidvox Hap decoder"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_HAP,
+    .init           = hap_init,
+    .decode         = hap_decode,
+    .close          = hap_close,
+    .priv_data_size = sizeof(HapContext),
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS |
+                      AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
+                      FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/hapenc.c b/libavcodec/hapenc.c
new file mode 100644
index 00000000..cb5dcfac
--- /dev/null
+++ b/libavcodec/hapenc.c
@@ -0,0 +1,334 @@
+/*
+ * Vidvox Hap encoder
+ * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ * Copyright (C) 2015 Tom Butterworth <bangnoise@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Hap encoder
+ *
+ * Fourcc: Hap1, Hap5, HapY
+ *
+ * https://github.com/Vidvox/hap/blob/master/documentation/HapVideoDRAFT.md
+ */
+
+#include <stdint.h>
+#include "snappy-c.h"
+
+#include "libavutil/frame.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "hap.h"
+#include "internal.h"
+#include "texturedsp.h"
+
+#define HAP_MAX_CHUNKS 64
+
+enum HapHeaderLength {
+    /* Short header: four bytes with a 24 bit size value */
+    HAP_HDR_SHORT = 4,
+    /* Long header: eight bytes with a 32 bit size value */
+    HAP_HDR_LONG = 8,
+};
+
+static void compress_texture(AVCodecContext *avctx, const AVFrame *f)
+{
+    HapContext *ctx = avctx->priv_data;
+    uint8_t *out = ctx->tex_buf;
+    int i, j;
+
+    for (j = 0; j < avctx->height; j += 4) {
+        for (i = 0; i < avctx->width; i += 4) {
+            uint8_t *p = f->data[0] + i * 4 + j * f->linesize[0];
+            const int step = ctx->tex_fun(out, f->linesize[0], p);
+            out += step;
+        }
+    }
+}
+
+/* section_length does not include the header */
+static void hap_write_section_header(PutByteContext *pbc,
+                                     enum HapHeaderLength header_length,
+                                     int section_length,
+                                     enum HapSectionType section_type)
+{
+    /* The first three bytes are the length of the section (not including the
+     * header) or zero if using an eight-byte header.
+     * For an eight-byte header, the length is in the last four bytes.
+     * The fourth byte stores the section type. */
+    bytestream2_put_le24(pbc, header_length == HAP_HDR_LONG ? 0 : section_length);
+    bytestream2_put_byte(pbc, section_type);
+
+    if (header_length == HAP_HDR_LONG) {
+        bytestream2_put_le32(pbc, section_length);
+    }
+}
+
+static int hap_compress_frame(AVCodecContext *avctx, uint8_t *dst)
+{
+    HapContext *ctx = avctx->priv_data;
+    int i, final_size = 0;
+
+    for (i = 0; i < ctx->chunk_count; i++) {
+        HapChunk *chunk = &ctx->chunks[i];
+        uint8_t *chunk_src, *chunk_dst;
+        int ret;
+
+        if (i == 0) {
+            chunk->compressed_offset = 0;
+        } else {
+            chunk->compressed_offset = ctx->chunks[i-1].compressed_offset
+                                       + ctx->chunks[i-1].compressed_size;
+        }
+        chunk->uncompressed_size = ctx->tex_size / ctx->chunk_count;
+        chunk->uncompressed_offset = i * chunk->uncompressed_size;
+        chunk->compressed_size = ctx->max_snappy;
+        chunk_src = ctx->tex_buf + chunk->uncompressed_offset;
+        chunk_dst = dst + chunk->compressed_offset;
+
+        /* Compress with snappy too, write directly on packet buffer. */
+        ret = snappy_compress(chunk_src, chunk->uncompressed_size,
+                              chunk_dst, &chunk->compressed_size);
+        if (ret != SNAPPY_OK) {
+            av_log(avctx, AV_LOG_ERROR, "Snappy compress error.\n");
+            return AVERROR_BUG;
+        }
+
+        /* If there is no gain from snappy, just use the raw texture. */
+        if (chunk->compressed_size >= chunk->uncompressed_size) {
+            av_log(avctx, AV_LOG_VERBOSE,
+                   "Snappy buffer bigger than uncompressed (%lu >= %lu bytes).\n",
+                   chunk->compressed_size, chunk->uncompressed_size);
+            memcpy(chunk_dst, chunk_src, chunk->uncompressed_size);
+            chunk->compressor = HAP_COMP_NONE;
+            chunk->compressed_size = chunk->uncompressed_size;
+        } else {
+            chunk->compressor = HAP_COMP_SNAPPY;
+        }
+
+        final_size += chunk->compressed_size;
+    }
+
+    return final_size;
+}
+
+static int hap_decode_instructions_length(HapContext *ctx)
+{
+    /*    Second-Stage Compressor Table (one byte per entry)
+     *  + Chunk Size Table (four bytes per entry)
+     *  + headers for both sections (short versions)
+     *  = chunk_count + (4 * chunk_count) + 4 + 4 */
+    return (5 * ctx->chunk_count) + 8;
+}
+
+static int hap_header_length(HapContext *ctx)
+{
+    /* Top section header (long version) */
+    int length = HAP_HDR_LONG;
+
+    if (ctx->chunk_count > 1) {
+        /* Decode Instructions header (short) + Decode Instructions Container */
+        length += HAP_HDR_SHORT + hap_decode_instructions_length(ctx);
+    }
+
+    return length;
+}
+
+static void hap_write_frame_header(HapContext *ctx, uint8_t *dst, int frame_length)
+{
+    PutByteContext pbc;
+    int i;
+
+    bytestream2_init_writer(&pbc, dst, frame_length);
+    if (ctx->chunk_count == 1) {
+        /* Write a simple header */
+        hap_write_section_header(&pbc, HAP_HDR_LONG, frame_length - 8,
+                                 ctx->chunks[0].compressor | ctx->opt_tex_fmt);
+    } else {
+        /* Write a complex header with Decode Instructions Container */
+        hap_write_section_header(&pbc, HAP_HDR_LONG, frame_length - 8,
+                                 HAP_COMP_COMPLEX | ctx->opt_tex_fmt);
+        hap_write_section_header(&pbc, HAP_HDR_SHORT, hap_decode_instructions_length(ctx),
+                                 HAP_ST_DECODE_INSTRUCTIONS);
+        hap_write_section_header(&pbc, HAP_HDR_SHORT, ctx->chunk_count,
+                                 HAP_ST_COMPRESSOR_TABLE);
+
+        for (i = 0; i < ctx->chunk_count; i++) {
+            bytestream2_put_byte(&pbc, ctx->chunks[i].compressor >> 4);
+        }
+
+        hap_write_section_header(&pbc, HAP_HDR_SHORT, ctx->chunk_count * 4,
+                                 HAP_ST_SIZE_TABLE);
+
+        for (i = 0; i < ctx->chunk_count; i++) {
+            bytestream2_put_le32(&pbc, ctx->chunks[i].compressed_size);
+        }
+    }
+}
+
+static int hap_encode(AVCodecContext *avctx, AVPacket *pkt,
+                      const AVFrame *frame, int *got_packet)
+{
+    HapContext *ctx = avctx->priv_data;
+    int header_length = hap_header_length(ctx);
+    int final_data_size, ret;
+    int pktsize = FFMAX(ctx->tex_size, ctx->max_snappy * ctx->chunk_count) + header_length;
+
+    /* Allocate maximum size packet, shrink later. */
+    ret = ff_alloc_packet2(avctx, pkt, pktsize, header_length);
+    if (ret < 0)
+        return ret;
+
+    /* DXTC compression. */
+    compress_texture(avctx, frame);
+
+    /* Compress (using Snappy) the frame */
+    final_data_size = hap_compress_frame(avctx, pkt->data + header_length);
+    if (final_data_size < 0)
+        return final_data_size;
+
+    /* Write header at the start. */
+    hap_write_frame_header(ctx, pkt->data, final_data_size + header_length);
+
+    av_shrink_packet(pkt, final_data_size + header_length);
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+    return 0;
+}
+
+static av_cold int hap_init(AVCodecContext *avctx)
+{
+    HapContext *ctx = avctx->priv_data;
+    int ratio;
+    int corrected_chunk_count;
+    int ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
+
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid video size %dx%d.\n",
+               avctx->width, avctx->height);
+        return ret;
+    }
+
+    if (avctx->width % 4 || avctx->height % 4) {
+        av_log(avctx, AV_LOG_ERROR, "Video size %dx%d is not multiple of 4.\n",
+               avctx->width, avctx->height);
+        return AVERROR_INVALIDDATA;
+    }
+
+    ff_texturedspenc_init(&ctx->dxtc);
+
+    switch (ctx->opt_tex_fmt) {
+    case HAP_FMT_RGBDXT1:
+        ratio = 8;
+        avctx->codec_tag = MKTAG('H', 'a', 'p', '1');
+        avctx->bits_per_coded_sample = 24;
+        ctx->tex_fun = ctx->dxtc.dxt1_block;
+        break;
+    case HAP_FMT_RGBADXT5:
+        ratio = 4;
+        avctx->codec_tag = MKTAG('H', 'a', 'p', '5');
+        avctx->bits_per_coded_sample = 32;
+        ctx->tex_fun = ctx->dxtc.dxt5_block;
+        break;
+    case HAP_FMT_YCOCGDXT5:
+        ratio = 4;
+        avctx->codec_tag = MKTAG('H', 'a', 'p', 'Y');
+        avctx->bits_per_coded_sample = 24;
+        ctx->tex_fun = ctx->dxtc.dxt5ys_block;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Invalid format %02X\n", ctx->opt_tex_fmt);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* Texture compression ratio is constant, so can we computer
+     * beforehand the final size of the uncompressed buffer. */
+    ctx->tex_size   = FFALIGN(avctx->width,  TEXTURE_BLOCK_W) *
+                      FFALIGN(avctx->height, TEXTURE_BLOCK_H) * 4 / ratio;
+
+    /* Round the chunk count to divide evenly on DXT block edges */
+    corrected_chunk_count = av_clip(ctx->opt_chunk_count, 1, HAP_MAX_CHUNKS);
+    while ((ctx->tex_size / (64 / ratio)) % corrected_chunk_count != 0) {
+        corrected_chunk_count--;
+    }
+    if (corrected_chunk_count != ctx->opt_chunk_count) {
+        av_log(avctx, AV_LOG_INFO, "%d chunks requested but %d used.\n",
+                                    ctx->opt_chunk_count, corrected_chunk_count);
+    }
+    ret = ff_hap_set_chunk_count(ctx, corrected_chunk_count, 1);
+    if (ret != 0)
+        return ret;
+
+    ctx->max_snappy = snappy_max_compressed_length(ctx->tex_size / corrected_chunk_count);
+
+    ctx->tex_buf  = av_malloc(ctx->tex_size);
+    if (!ctx->tex_buf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static av_cold int hap_close(AVCodecContext *avctx)
+{
+    HapContext *ctx = avctx->priv_data;
+
+    ff_hap_free_context(ctx);
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(HapContext, x)
+#define FLAGS     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "format", NULL, OFFSET(opt_tex_fmt), AV_OPT_TYPE_INT, { .i64 = HAP_FMT_RGBDXT1 }, HAP_FMT_RGBDXT1, HAP_FMT_YCOCGDXT5, FLAGS, "format" },
+        { "hap",       "Hap 1 (DXT1 textures)", 0, AV_OPT_TYPE_CONST, { .i64 = HAP_FMT_RGBDXT1   }, 0, 0, FLAGS, "format" },
+        { "hap_alpha", "Hap Alpha (DXT5 textures)", 0, AV_OPT_TYPE_CONST, { .i64 = HAP_FMT_RGBADXT5  }, 0, 0, FLAGS, "format" },
+        { "hap_q",     "Hap Q (DXT5-YCoCg textures)", 0, AV_OPT_TYPE_CONST, { .i64 = HAP_FMT_YCOCGDXT5 }, 0, 0, FLAGS, "format" },
+    { "chunks", "chunk count", OFFSET(opt_chunk_count), AV_OPT_TYPE_INT, {.i64 = 1 }, 1, HAP_MAX_CHUNKS, FLAGS, },
+    { NULL },
+};
+
+static const AVClass hapenc_class = {
+    .class_name = "Hap encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_hap_encoder = {
+    .name           = "hap",
+    .long_name      = NULL_IF_CONFIG_SMALL("Vidvox Hap encoder"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_HAP,
+    .priv_data_size = sizeof(HapContext),
+    .priv_class     = &hapenc_class,
+    .init           = hap_init,
+    .encode2        = hap_encode,
+    .close          = hap_close,
+    .pix_fmts       = (const enum AVPixelFormat[]) {
+        AV_PIX_FMT_RGBA, AV_PIX_FMT_NONE,
+    },
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
+                      FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index a56063f8..203f90ab 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -28,6 +28,7 @@
 #include "libavutil/common.h"
 #include "libavutil/display.h"
 #include "libavutil/internal.h"
+#include "libavutil/mastering_display_metadata.h"
 #include "libavutil/md5.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -38,6 +39,7 @@
 #include "cabac_functions.h"
 #include "golomb.h"
 #include "hevc.h"
+#include "profiles.h"
 
 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
 
@@ -149,7 +151,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
-    if (s->sps->chroma_format_idc != 0) {
+    if (s->ps.sps->chroma_format_idc != 0) {
         int delta = get_se_golomb(gb);
         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
     }
@@ -161,7 +163,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
             s->sh.luma_offset_l0[i] = 0;
         }
     }
-    if (s->sps->chroma_format_idc != 0) {
+    if (s->ps.sps->chroma_format_idc != 0) {
         for (i = 0; i < s->sh.nb_refs[L0]; i++)
             chroma_weight_l0_flag[i] = get_bits1(gb);
     } else {
@@ -197,7 +199,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
                 s->sh.luma_offset_l1[i] = 0;
             }
         }
-        if (s->sps->chroma_format_idc != 0) {
+        if (s->ps.sps->chroma_format_idc != 0) {
             for (i = 0; i < s->sh.nb_refs[L1]; i++)
                 chroma_weight_l1_flag[i] = get_bits1(gb);
         } else {
@@ -230,7 +232,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 
 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 {
-    const HEVCSPS *sps = s->sps;
+    const HEVCSPS *sps = s->ps.sps;
     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
     int prev_delta_msb = 0;
     unsigned int nb_sps = 0, nb_sh;
@@ -280,10 +282,10 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
     return 0;
 }
 
-static void export_stream_params(AVCodecContext *avctx,
-                                 const HEVCContext *s, const HEVCSPS *sps)
+static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
+                                 const HEVCSPS *sps)
 {
-    const HEVCVPS *vps = (const HEVCVPS*)s->vps_list[sps->vps_id]->data;
+    const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
     unsigned int num = 0, den = 0;
 
     avctx->pix_fmt             = sps->pix_fmt;
@@ -328,23 +330,35 @@ static void export_stream_params(AVCodecContext *avctx,
 
 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
 {
-    #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL)
+    #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VAAPI_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
     int ret, i;
 
-    export_stream_params(s->avctx, s, sps);
-
     pic_arrays_free(s);
+    s->ps.sps = NULL;
+    s->ps.vps = NULL;
+
+    if (!sps)
+        return 0;
+
     ret = pic_arrays_init(s, sps);
     if (ret < 0)
         goto fail;
 
+    export_stream_params(s->avctx, &s->ps, sps);
+
     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 #if CONFIG_HEVC_DXVA2_HWACCEL
         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 #endif
 #if CONFIG_HEVC_D3D11VA_HWACCEL
         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
+#endif
+#if CONFIG_HEVC_VAAPI_HWACCEL
+        *fmt++ = AV_PIX_FMT_VAAPI;
+#endif
+#if CONFIG_HEVC_VDPAU_HWACCEL
+        *fmt++ = AV_PIX_FMT_VDPAU;
 #endif
     }
 
@@ -386,14 +400,14 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fm
         }
     }
 
-    s->sps = sps;
-    s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
+    s->ps.sps = sps;
+    s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 
     return 0;
 
 fail:
     pic_arrays_free(s);
-    s->sps = NULL;
+    s->ps.sps = NULL;
     return ret;
 }
 
@@ -416,30 +430,30 @@ static int hls_slice_header(HEVCContext *s)
         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 
     sh->pps_id = get_ue_golomb_long(gb);
-    if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
+    if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
         return AVERROR_INVALIDDATA;
     }
     if (!sh->first_slice_in_pic_flag &&
-        s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
+        s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
         return AVERROR_INVALIDDATA;
     }
-    s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
+    s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
         sh->no_output_of_prior_pics_flag = 1;
 
-    if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
-        const HEVCSPS* last_sps = s->sps;
-        s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
+    if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
+        const HEVCSPS* last_sps = s->ps.sps;
+        s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
-            if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
-                s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering !=
+            if (s->ps.sps->width !=  last_sps->width || s->ps.sps->height != last_sps->height ||
+                s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering !=
                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
                 sh->no_output_of_prior_pics_flag = 0;
         }
         ff_hevc_clear_refs(s);
-        ret = set_sps(s, s->sps, AV_PIX_FMT_NONE);
+        ret = set_sps(s, s->ps.sps, AV_PIX_FMT_NONE);
         if (ret < 0)
             return ret;
 
@@ -451,13 +465,13 @@ static int hls_slice_header(HEVCContext *s)
     if (!sh->first_slice_in_pic_flag) {
         int slice_address_length;
 
-        if (s->pps->dependent_slice_segments_enabled_flag)
+        if (s->ps.pps->dependent_slice_segments_enabled_flag)
             sh->dependent_slice_segment_flag = get_bits1(gb);
 
-        slice_address_length = av_ceil_log2(s->sps->ctb_width *
-                                            s->sps->ctb_height);
-        sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
-        if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
+        slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
+                                            s->ps.sps->ctb_height);
+        sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
+        if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
             av_log(s->avctx, AV_LOG_ERROR,
                    "Invalid slice segment address: %u.\n",
                    sh->slice_segment_addr);
@@ -477,7 +491,7 @@ static int hls_slice_header(HEVCContext *s)
     if (!sh->dependent_slice_segment_flag) {
         s->slice_initialized = 0;
 
-        for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
+        for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 
         sh->slice_type = get_ue_golomb_long(gb);
@@ -495,16 +509,16 @@ static int hls_slice_header(HEVCContext *s)
 
         // when flag is not present, picture is inferred to be output
         sh->pic_output_flag = 1;
-        if (s->pps->output_flag_present_flag)
+        if (s->ps.pps->output_flag_present_flag)
             sh->pic_output_flag = get_bits1(gb);
 
-        if (s->sps->separate_colour_plane_flag)
+        if (s->ps.sps->separate_colour_plane_flag)
             sh->colour_plane_id = get_bits(gb, 2);
 
         if (!IS_IDR(s)) {
-            int poc;
+            int poc, pos;
 
-            sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
+            sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
                 av_log(s->avctx, AV_LOG_WARNING,
@@ -516,35 +530,37 @@ static int hls_slice_header(HEVCContext *s)
             s->poc = poc;
 
             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
+            pos = get_bits_left(gb);
             if (!sh->short_term_ref_pic_set_sps_flag) {
-                int pos = get_bits_left(gb);
-                ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
+                ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
                 if (ret < 0)
                     return ret;
 
-                sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
                 sh->short_term_rps = &sh->slice_rps;
             } else {
                 int numbits, rps_idx;
 
-                if (!s->sps->nb_st_rps) {
+                if (!s->ps.sps->nb_st_rps) {
                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
                     return AVERROR_INVALIDDATA;
                 }
 
-                numbits = av_ceil_log2(s->sps->nb_st_rps);
+                numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
-                sh->short_term_rps = &s->sps->st_rps[rps_idx];
+                sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
             }
+            sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 
+            pos = get_bits_left(gb);
             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
             if (ret < 0) {
                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
                     return AVERROR_INVALIDDATA;
             }
+            sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 
-            if (s->sps->sps_temporal_mvp_enabled_flag)
+            if (s->ps.sps->sps_temporal_mvp_enabled_flag)
                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
             else
                 sh->slice_temporal_mvp_enabled_flag = 0;
@@ -564,9 +580,9 @@ static int hls_slice_header(HEVCContext *s)
             s->nal_unit_type != NAL_RASL_R)
             s->pocTid0 = s->poc;
 
-        if (s->sps->sao_enabled) {
+        if (s->ps.sps->sao_enabled) {
             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
-            if (s->sps->chroma_format_idc) {
+            if (s->ps.sps->chroma_format_idc) {
                 sh->slice_sample_adaptive_offset_flag[1] =
                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
             }
@@ -580,9 +596,9 @@ static int hls_slice_header(HEVCContext *s)
         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
             int nb_refs;
 
-            sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
+            sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
             if (sh->slice_type == B_SLICE)
-                sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
+                sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 
             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
@@ -603,7 +619,7 @@ static int hls_slice_header(HEVCContext *s)
                 return AVERROR_INVALIDDATA;
             }
 
-            if (s->pps->lists_modification_present_flag && nb_refs > 1) {
+            if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
                 sh->rpl_modification_flag[0] = get_bits1(gb);
                 if (sh->rpl_modification_flag[0]) {
                     for (i = 0; i < sh->nb_refs[L0]; i++)
@@ -621,7 +637,7 @@ static int hls_slice_header(HEVCContext *s)
             if (sh->slice_type == B_SLICE)
                 sh->mvd_l1_zero_flag = get_bits1(gb);
 
-            if (s->pps->cabac_init_present_flag)
+            if (s->ps.pps->cabac_init_present_flag)
                 sh->cabac_init_flag = get_bits1(gb);
             else
                 sh->cabac_init_flag = 0;
@@ -643,8 +659,8 @@ static int hls_slice_header(HEVCContext *s)
                 }
             }
 
-            if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
-                (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
+            if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
+                (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
                 pred_weight_table(s, gb);
             }
 
@@ -659,7 +675,7 @@ static int hls_slice_header(HEVCContext *s)
 
         sh->slice_qp_delta = get_se_golomb(gb);
 
-        if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
+        if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
             sh->slice_cb_qp_offset = get_se_golomb(gb);
             sh->slice_cr_qp_offset = get_se_golomb(gb);
         } else {
@@ -667,15 +683,15 @@ static int hls_slice_header(HEVCContext *s)
             sh->slice_cr_qp_offset = 0;
         }
 
-        if (s->pps->chroma_qp_offset_list_enabled_flag)
+        if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
         else
             sh->cu_chroma_qp_offset_enabled_flag = 0;
 
-        if (s->pps->deblocking_filter_control_present_flag) {
+        if (s->ps.pps->deblocking_filter_control_present_flag) {
             int deblocking_filter_override_flag = 0;
 
-            if (s->pps->deblocking_filter_override_enabled_flag)
+            if (s->ps.pps->deblocking_filter_override_enabled_flag)
                 deblocking_filter_override_flag = get_bits1(gb);
 
             if (deblocking_filter_override_flag) {
@@ -685,9 +701,9 @@ static int hls_slice_header(HEVCContext *s)
                     sh->tc_offset   = get_se_golomb(gb) * 2;
                 }
             } else {
-                sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
-                sh->beta_offset                    = s->pps->beta_offset;
-                sh->tc_offset                      = s->pps->tc_offset;
+                sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
+                sh->beta_offset                    = s->ps.pps->beta_offset;
+                sh->tc_offset                      = s->ps.pps->tc_offset;
             }
         } else {
             sh->disable_deblocking_filter_flag = 0;
@@ -695,13 +711,13 @@ static int hls_slice_header(HEVCContext *s)
             sh->tc_offset                      = 0;
         }
 
-        if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
+        if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
             (sh->slice_sample_adaptive_offset_flag[0] ||
              sh->slice_sample_adaptive_offset_flag[1] ||
              !sh->disable_deblocking_filter_flag)) {
             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
         } else {
-            sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
+            sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
         }
     } else if (!s->slice_initialized) {
         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
@@ -709,7 +725,7 @@ static int hls_slice_header(HEVCContext *s)
     }
 
     sh->num_entry_point_offsets = 0;
-    if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
+    if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
         // It would be possible to bound this tighter but this here is simpler
         if (num_entry_point_offsets > get_bits_left(gb)) {
@@ -730,7 +746,7 @@ static int hls_slice_header(HEVCContext *s)
             av_freep(&sh->entry_point_offset);
             av_freep(&sh->offset);
             av_freep(&sh->size);
-            sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
+            sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
@@ -742,7 +758,7 @@ static int hls_slice_header(HEVCContext *s)
                 unsigned val = get_bits_long(gb, offset_len);
                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
             }
-            if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
+            if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
                 s->threads_number = 1;
             } else
@@ -751,7 +767,7 @@ static int hls_slice_header(HEVCContext *s)
             s->enable_parallel_tiles = 0;
     }
 
-    if (s->pps->slice_header_extension_present_flag) {
+    if (s->ps.pps->slice_header_extension_present_flag) {
         unsigned int length = get_ue_golomb_long(gb);
         if (length*8LL > get_bits_left(gb)) {
             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
@@ -762,14 +778,14 @@ static int hls_slice_header(HEVCContext *s)
     }
 
     // Inferred parameters
-    sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
+    sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
     if (sh->slice_qp > 51 ||
-        sh->slice_qp < -s->sps->qp_bd_offset) {
+        sh->slice_qp < -s->ps.sps->qp_bd_offset) {
         av_log(s->avctx, AV_LOG_ERROR,
                "The slice_qp %d is outside the valid range "
                "[%d, 51].\n",
                sh->slice_qp,
-               -s->sps->qp_bd_offset);
+               -s->ps.sps->qp_bd_offset);
         return AVERROR_INVALIDDATA;
     }
 
@@ -788,17 +804,19 @@ static int hls_slice_header(HEVCContext *s)
 
     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 
-    if (!s->pps->cu_qp_delta_enabled_flag)
+    if (!s->ps.pps->cu_qp_delta_enabled_flag)
         s->HEVClc->qp_y = s->sh.slice_qp;
 
     s->slice_initialized = 1;
     s->HEVClc->tu.cu_qp_offset_cb = 0;
     s->HEVClc->tu.cu_qp_offset_cr = 0;
 
+    s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == NAL_CRA_NUT && s->last_eos);
+
     return 0;
 }
 
-#define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
+#define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 
 #define SET_SAO(elem, value)                            \
 do {                                                    \
@@ -832,9 +850,9 @@ static void hls_sao_param(HEVCContext *s, int rx, int ry)
         }
     }
 
-    for (c_idx = 0; c_idx < (s->sps->chroma_format_idc ? 3 : 1); c_idx++) {
-        int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
-                                                 s->pps->log2_sao_offset_scale_chroma;
+    for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
+        int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
+                                                 s->ps.pps->log2_sao_offset_scale_chroma;
 
         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
@@ -908,7 +926,7 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 {
     HEVCLocalContext *lc = s->HEVClc;
-    const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
+    const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
     int i;
 
     if (lc->cu.pred_mode == MODE_INTRA) {
@@ -919,28 +937,28 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
     }
 
     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
-        (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
+        (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
         int scan_idx   = SCAN_DIAG;
         int scan_idx_c = SCAN_DIAG;
         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
-                         (s->sps->chroma_format_idc == 2 &&
+                         (s->ps.sps->chroma_format_idc == 2 &&
                          (cbf_cb[1] || cbf_cr[1]));
 
-        if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
+        if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
             if (lc->tu.cu_qp_delta != 0)
                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
             lc->tu.is_cu_qp_delta_coded = 1;
 
-            if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
-                lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
+            if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
+                lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
                 av_log(s->avctx, AV_LOG_ERROR,
                        "The cu_qp_delta %d is outside the valid range "
                        "[%d, %d].\n",
                        lc->tu.cu_qp_delta,
-                       -(26 + s->sps->qp_bd_offset / 2),
-                        (25 + s->sps->qp_bd_offset / 2));
+                       -(26 + s->ps.sps->qp_bd_offset / 2),
+                        (25 + s->ps.sps->qp_bd_offset / 2));
                 return AVERROR_INVALIDDATA;
             }
 
@@ -952,13 +970,13 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
             if (cu_chroma_qp_offset_flag) {
                 int cu_chroma_qp_offset_idx  = 0;
-                if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
+                if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
                     av_log(s->avctx, AV_LOG_ERROR,
                         "cu_chroma_qp_offset_idx not yet tested.\n");
                 }
-                lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
-                lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
+                lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
+                lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
             } else {
                 lc->tu.cu_qp_offset_cb = 0;
                 lc->tu.cu_qp_offset_cr = 0;
@@ -988,17 +1006,17 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 
         if (cbf_luma)
             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
-        if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
-            int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
-            int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
-            lc->tu.cross_pf  = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
+        if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
+            int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
+            int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
+            lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
                                 (lc->cu.pred_mode == MODE_INTER ||
                                  (lc->tu.chroma_mode_c ==  4)));
 
             if (lc->tu.cross_pf) {
                 hls_cross_component_pred(s, 0);
             }
-            for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
                 if (lc->cu.pred_mode == MODE_INTRA) {
                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
@@ -1009,14 +1027,14 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
                 else
                     if (lc->tu.cross_pf) {
                         ptrdiff_t stride = s->frame->linesize[1];
-                        int hshift = s->sps->hshift[1];
-                        int vshift = s->sps->vshift[1];
+                        int hshift = s->ps.sps->hshift[1];
+                        int vshift = s->ps.sps->vshift[1];
                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
                         int size = 1 << log2_trafo_size_c;
 
                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
-                                                              ((x0 >> hshift) << s->sps->pixel_shift)];
+                                                              ((x0 >> hshift) << s->ps.sps->pixel_shift)];
                         for (i = 0; i < (size * size); i++) {
                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
                         }
@@ -1027,7 +1045,7 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
             if (lc->tu.cross_pf) {
                 hls_cross_component_pred(s, 1);
             }
-            for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
                 if (lc->cu.pred_mode == MODE_INTRA) {
                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
@@ -1038,24 +1056,24 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
                 else
                     if (lc->tu.cross_pf) {
                         ptrdiff_t stride = s->frame->linesize[2];
-                        int hshift = s->sps->hshift[2];
-                        int vshift = s->sps->vshift[2];
+                        int hshift = s->ps.sps->hshift[2];
+                        int vshift = s->ps.sps->vshift[2];
                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
                         int size = 1 << log2_trafo_size_c;
 
                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
-                                                          ((x0 >> hshift) << s->sps->pixel_shift)];
+                                                          ((x0 >> hshift) << s->ps.sps->pixel_shift)];
                         for (i = 0; i < (size * size); i++) {
                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
                         }
                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
                     }
             }
-        } else if (s->sps->chroma_format_idc && blk_idx == 3) {
+        } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
             int trafo_size_h = 1 << (log2_trafo_size + 1);
-            int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
-            for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+            int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
+            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
                 if (lc->cu.pred_mode == MODE_INTRA) {
                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
                                                     trafo_size_h, trafo_size_v);
@@ -1065,7 +1083,7 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
                                                 log2_trafo_size, scan_idx_c, 1);
             }
-            for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+            for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
                 if (lc->cu.pred_mode == MODE_INTRA) {
                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
                                                 trafo_size_h, trafo_size_v);
@@ -1076,14 +1094,14 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
                                                 log2_trafo_size, scan_idx_c, 2);
             }
         }
-    } else if (s->sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
-        if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
-            int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
-            int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
+    } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
+        if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
+            int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
+            int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
-            if (s->sps->chroma_format_idc == 2) {
+            if (s->ps.sps->chroma_format_idc == 2) {
                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
                                                 trafo_size_h, trafo_size_v);
                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
@@ -1091,12 +1109,12 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
             }
         } else if (blk_idx == 3) {
             int trafo_size_h = 1 << (log2_trafo_size + 1);
-            int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
+            int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
             ff_hevc_set_neighbour_available(s, xBase, yBase,
                                             trafo_size_h, trafo_size_v);
             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
-            if (s->sps->chroma_format_idc == 2) {
+            if (s->ps.sps->chroma_format_idc == 2) {
                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
                                                 trafo_size_h, trafo_size_v);
                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
@@ -1111,11 +1129,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
 {
     int cb_size          = 1 << log2_cb_size;
-    int log2_min_pu_size = s->sps->log2_min_pu_size;
+    int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
 
-    int min_pu_width     = s->sps->min_pu_width;
-    int x_end = FFMIN(x0 + cb_size, s->sps->width);
-    int y_end = FFMIN(y0 + cb_size, s->sps->height);
+    int min_pu_width     = s->ps.sps->min_pu_width;
+    int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
+    int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
     int i, j;
 
     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
@@ -1143,7 +1161,7 @@ static int hls_transform_tree(HEVCContext *s, int x0, int y0,
     if (lc->cu.intra_split_flag) {
         if (trafo_depth == 1) {
             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
-            if (s->sps->chroma_format_idc == 3) {
+            if (s->ps.sps->chroma_format_idc == 3) {
                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
             } else {
@@ -1157,33 +1175,33 @@ static int hls_transform_tree(HEVCContext *s, int x0, int y0,
         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
     }
 
-    if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
-        log2_trafo_size >  s->sps->log2_min_tb_size    &&
+    if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
+        log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
         trafo_depth     < lc->cu.max_trafo_depth       &&
         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
     } else {
-        int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
+        int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
                           lc->cu.pred_mode == MODE_INTER &&
                           lc->cu.part_mode != PART_2Nx2N &&
                           trafo_depth == 0;
 
-        split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
+        split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
                                inter_split;
     }
 
-    if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
+    if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
         if (trafo_depth == 0 || cbf_cb[0]) {
             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
-            if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
+            if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
             }
         }
 
         if (trafo_depth == 0 || cbf_cr[0]) {
             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
-            if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
+            if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
             }
         }
@@ -1210,14 +1228,14 @@ do {
 
 #undef SUBDIVIDE
     } else {
-        int min_tu_size      = 1 << s->sps->log2_min_tb_size;
-        int log2_min_tu_size = s->sps->log2_min_tb_size;
-        int min_tu_width     = s->sps->min_tb_width;
+        int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
+        int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
+        int min_tu_width     = s->ps.sps->min_tb_width;
         int cbf_luma         = 1;
 
         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
             cbf_cb[0] || cbf_cr[0] ||
-            (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
+            (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
         }
 
@@ -1238,7 +1256,7 @@ do {
         }
         if (!s->sh.disable_deblocking_filter_flag) {
             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
-            if (s->pps->transquant_bypass_enable_flag &&
+            if (s->ps.pps->transquant_bypass_enable_flag &&
                 lc->cu.cu_transquant_bypass_flag)
                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
         }
@@ -1252,16 +1270,16 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
     GetBitContext gb;
     int cb_size   = 1 << log2_cb_size;
     int stride0   = s->frame->linesize[0];
-    uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
+    uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
     int   stride1 = s->frame->linesize[1];
-    uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
+    uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
     int   stride2 = s->frame->linesize[2];
-    uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
+    uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
 
-    int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
-                         (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
-                          ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
-                          s->sps->pcm.bit_depth_chroma;
+    int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
+                         (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
+                          ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
+                          s->ps.sps->pcm.bit_depth_chroma;
     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
     int ret;
 
@@ -1272,16 +1290,16 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
     if (ret < 0)
         return ret;
 
-    s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
-    if (s->sps->chroma_format_idc) {
+    s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
+    if (s->ps.sps->chroma_format_idc) {
         s->hevcdsp.put_pcm(dst1, stride1,
-                           cb_size >> s->sps->hshift[1],
-                           cb_size >> s->sps->vshift[1],
-                           &gb, s->sps->pcm.bit_depth_chroma);
+                           cb_size >> s->ps.sps->hshift[1],
+                           cb_size >> s->ps.sps->vshift[1],
+                           &gb, s->ps.sps->pcm.bit_depth_chroma);
         s->hevcdsp.put_pcm(dst2, stride2,
-                           cb_size >> s->sps->hshift[2],
-                           cb_size >> s->sps->vshift[2],
-                           &gb, s->sps->pcm.bit_depth_chroma);
+                           cb_size >> s->ps.sps->hshift[2],
+                           cb_size >> s->ps.sps->vshift[2],
+                           &gb, s->ps.sps->pcm.bit_depth_chroma);
     }
 
     return 0;
@@ -1310,24 +1328,24 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
     HEVCLocalContext *lc = s->HEVClc;
     uint8_t *src         = ref->data[0];
     ptrdiff_t srcstride  = ref->linesize[0];
-    int pic_width        = s->sps->width;
-    int pic_height       = s->sps->height;
+    int pic_width        = s->ps.sps->width;
+    int pic_height       = s->ps.sps->height;
     int mx               = mv->x & 3;
     int my               = mv->y & 3;
-    int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-                           (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
+    int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
     int idx              = ff_hevc_pel_weight[block_w];
 
     x_off += mv->x >> 2;
     y_off += mv->y >> 2;
-    src   += y_off * srcstride + x_off * (1 << s->sps->pixel_shift);
+    src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
 
     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
-        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
-        int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
-        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
+        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
 
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
                                  edge_emu_stride, srcstride,
@@ -1371,29 +1389,29 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
     HEVCLocalContext *lc = s->HEVClc;
     ptrdiff_t src0stride  = ref0->linesize[0];
     ptrdiff_t src1stride  = ref1->linesize[0];
-    int pic_width        = s->sps->width;
-    int pic_height       = s->sps->height;
+    int pic_width        = s->ps.sps->width;
+    int pic_height       = s->ps.sps->height;
     int mx0              = mv0->x & 3;
     int my0              = mv0->y & 3;
     int mx1              = mv1->x & 3;
     int my1              = mv1->y & 3;
-    int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-                           (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
+    int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
     int x_off0           = x_off + (mv0->x >> 2);
     int y_off0           = y_off + (mv0->y >> 2);
     int x_off1           = x_off + (mv1->x >> 2);
     int y_off1           = y_off + (mv1->y >> 2);
     int idx              = ff_hevc_pel_weight[block_w];
 
-    uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
-    uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
+    uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
+    uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
 
     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
-        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
-        int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
-        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
+        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
 
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
                                  edge_emu_stride, src0stride,
@@ -1408,9 +1426,9 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
-        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
-        int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
-        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
+        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
 
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
                                  edge_emu_stride, src1stride,
@@ -1460,14 +1478,14 @@ static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
 {
     HEVCLocalContext *lc = s->HEVClc;
-    int pic_width        = s->sps->width >> s->sps->hshift[1];
-    int pic_height       = s->sps->height >> s->sps->vshift[1];
+    int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
+    int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
     const Mv *mv         = &current_mv->mv[reflist];
-    int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-                           (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
+    int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
     int idx              = ff_hevc_pel_weight[block_w];
-    int hshift           = s->sps->hshift[1];
-    int vshift           = s->sps->vshift[1];
+    int hshift           = s->ps.sps->hshift[1];
+    int vshift           = s->ps.sps->vshift[1];
     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
     intptr_t _mx         = mx << (1 - hshift);
@@ -1475,15 +1493,15 @@ static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
 
     x_off += mv->x >> (2 + hshift);
     y_off += mv->y >> (2 + vshift);
-    src0  += y_off * srcstride + x_off * (1 << s->sps->pixel_shift);
+    src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
 
     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
-        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
-        int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
         int buf_offset0 = EPEL_EXTRA_BEFORE *
-                          (edge_emu_stride + (1 << s->sps->pixel_shift));
+                          (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
                                  edge_emu_stride, srcstride,
                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
@@ -1528,14 +1546,14 @@ static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVF
     uint8_t *src2        = ref1->data[cidx+1];
     ptrdiff_t src1stride = ref0->linesize[cidx+1];
     ptrdiff_t src2stride = ref1->linesize[cidx+1];
-    int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-                           (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
-    int pic_width        = s->sps->width >> s->sps->hshift[1];
-    int pic_height       = s->sps->height >> s->sps->vshift[1];
+    int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
+    int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
+    int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
     Mv *mv0              = &current_mv->mv[0];
     Mv *mv1              = &current_mv->mv[1];
-    int hshift = s->sps->hshift[1];
-    int vshift = s->sps->vshift[1];
+    int hshift = s->ps.sps->hshift[1];
+    int vshift = s->ps.sps->vshift[1];
 
     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
@@ -1551,16 +1569,16 @@ static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVF
     int x_off1 = x_off + (mv1->x >> (2 + hshift));
     int y_off1 = y_off + (mv1->y >> (2 + vshift));
     int idx = ff_hevc_pel_weight[block_w];
-    src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
-    src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
+    src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
+    src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
 
     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
-        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
-        int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
         int buf_offset1 = EPEL_EXTRA_BEFORE *
-                          (edge_emu_stride + (1 << s->sps->pixel_shift));
+                          (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
 
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
                                  edge_emu_stride, src1stride,
@@ -1576,10 +1594,10 @@ static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVF
     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
-        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
-        int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
         int buf_offset1 = EPEL_EXTRA_BEFORE *
-                          (edge_emu_stride + (1 << s->sps->pixel_shift));
+                          (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
 
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
                                  edge_emu_stride, src2stride,
@@ -1669,13 +1687,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
                                 int log2_cb_size, int partIdx, int idx)
 {
 #define POS(c_idx, x, y)                                                              \
-    &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
-                           (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
+    &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
+                           (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
     HEVCLocalContext *lc = s->HEVClc;
     int merge_idx = 0;
     struct MvField current_mv = {{{ 0 }}};
 
-    int min_pu_width = s->sps->min_pu_width;
+    int min_pu_width = s->ps.sps->min_pu_width;
 
     MvField *tab_mvf = s->ref->tab_mvf;
     RefPicList  *refPicList = s->ref->refPicList;
@@ -1683,8 +1701,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
     uint8_t *dst0 = POS(0, x0, y0);
     uint8_t *dst1 = POS(1, x0, y0);
     uint8_t *dst2 = POS(2, x0, y0);
-    int log2_min_cb_size = s->sps->log2_min_cb_size;
-    int min_cb_width     = s->sps->min_cb_width;
+    int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
+    int min_cb_width     = s->ps.sps->min_cb_width;
     int x_cb             = x0 >> log2_min_cb_size;
     int y_cb             = y0 >> log2_min_cb_size;
     int x_pu, y_pu;
@@ -1708,11 +1726,11 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
                               partIdx, merge_idx, &current_mv);
     }
 
-    x_pu = x0 >> s->sps->log2_min_pu_size;
-    y_pu = y0 >> s->sps->log2_min_pu_size;
+    x_pu = x0 >> s->ps.sps->log2_min_pu_size;
+    y_pu = y0 >> s->ps.sps->log2_min_pu_size;
 
-    for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
-        for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
+    for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
+        for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
 
     if (current_mv.pred_flag & PF_L0) {
@@ -1729,17 +1747,17 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
     }
 
     if (current_mv.pred_flag == PF_L0) {
-        int x0_c = x0 >> s->sps->hshift[1];
-        int y0_c = y0 >> s->sps->vshift[1];
-        int nPbW_c = nPbW >> s->sps->hshift[1];
-        int nPbH_c = nPbH >> s->sps->vshift[1];
+        int x0_c = x0 >> s->ps.sps->hshift[1];
+        int y0_c = y0 >> s->ps.sps->vshift[1];
+        int nPbW_c = nPbW >> s->ps.sps->hshift[1];
+        int nPbH_c = nPbH >> s->ps.sps->vshift[1];
 
         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
 
-        if (s->sps->chroma_format_idc) {
+        if (s->ps.sps->chroma_format_idc) {
             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
@@ -1748,17 +1766,17 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
         }
     } else if (current_mv.pred_flag == PF_L1) {
-        int x0_c = x0 >> s->sps->hshift[1];
-        int y0_c = y0 >> s->sps->vshift[1];
-        int nPbW_c = nPbW >> s->sps->hshift[1];
-        int nPbH_c = nPbH >> s->sps->vshift[1];
+        int x0_c = x0 >> s->ps.sps->hshift[1];
+        int y0_c = y0 >> s->ps.sps->vshift[1];
+        int nPbW_c = nPbW >> s->ps.sps->hshift[1];
+        int nPbH_c = nPbH >> s->ps.sps->vshift[1];
 
         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
 
-        if (s->sps->chroma_format_idc) {
+        if (s->ps.sps->chroma_format_idc) {
             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
@@ -1768,16 +1786,16 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
         }
     } else if (current_mv.pred_flag == PF_BI) {
-        int x0_c = x0 >> s->sps->hshift[1];
-        int y0_c = y0 >> s->sps->vshift[1];
-        int nPbW_c = nPbW >> s->sps->hshift[1];
-        int nPbH_c = nPbH >> s->sps->vshift[1];
+        int x0_c = x0 >> s->ps.sps->hshift[1];
+        int y0_c = y0 >> s->ps.sps->vshift[1];
+        int nPbW_c = nPbW >> s->ps.sps->hshift[1];
+        int nPbH_c = nPbH >> s->ps.sps->vshift[1];
 
         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
                    ref1->frame, &current_mv.mv[1], &current_mv);
 
-        if (s->sps->chroma_format_idc) {
+        if (s->ps.sps->chroma_format_idc) {
             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
 
@@ -1794,19 +1812,19 @@ static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
                                 int prev_intra_luma_pred_flag)
 {
     HEVCLocalContext *lc = s->HEVClc;
-    int x_pu             = x0 >> s->sps->log2_min_pu_size;
-    int y_pu             = y0 >> s->sps->log2_min_pu_size;
-    int min_pu_width     = s->sps->min_pu_width;
-    int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
-    int x0b              = av_mod_uintp2(x0, s->sps->log2_ctb_size);
-    int y0b              = av_mod_uintp2(y0, s->sps->log2_ctb_size);
+    int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
+    int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
+    int min_pu_width     = s->ps.sps->min_pu_width;
+    int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
+    int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
+    int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
 
     int cand_up   = (lc->ctb_up_flag || y0b) ?
                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
     int cand_left = (lc->ctb_left_flag || x0b) ?
                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
 
-    int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
+    int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
 
     MvField *tab_mvf = s->ref->tab_mvf;
     int intra_pred_mode;
@@ -1873,13 +1891,13 @@ static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
                                           int log2_cb_size, int ct_depth)
 {
-    int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
-    int x_cb   = x0 >> s->sps->log2_min_cb_size;
-    int y_cb   = y0 >> s->sps->log2_min_cb_size;
+    int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
+    int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
+    int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
     int y;
 
     for (y = 0; y < length; y++)
-        memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
+        memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
                ct_depth, length);
 }
 
@@ -1916,7 +1934,7 @@ static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
         }
     }
 
-    if (s->sps->chroma_format_idc == 3) {
+    if (s->ps.sps->chroma_format_idc == 3) {
         for (i = 0; i < side; i++) {
             for (j = 0; j < side; j++) {
                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
@@ -1930,7 +1948,7 @@ static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
                 }
             }
         }
-    } else if (s->sps->chroma_format_idc == 2) {
+    } else if (s->ps.sps->chroma_format_idc == 2) {
         int mode_idx;
         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
         if (chroma_mode != 4) {
@@ -1942,7 +1960,7 @@ static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
             mode_idx = lc->pu.intra_pred_mode[0];
         }
         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
-    } else if (s->sps->chroma_format_idc != 0) {
+    } else if (s->ps.sps->chroma_format_idc != 0) {
         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
         if (chroma_mode != 4) {
             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
@@ -1961,11 +1979,11 @@ static void intra_prediction_unit_default_value(HEVCContext *s,
 {
     HEVCLocalContext *lc = s->HEVClc;
     int pb_size          = 1 << log2_cb_size;
-    int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
-    int min_pu_width     = s->sps->min_pu_width;
+    int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
+    int min_pu_width     = s->ps.sps->min_pu_width;
     MvField *tab_mvf     = s->ref->tab_mvf;
-    int x_pu             = x0 >> s->sps->log2_min_pu_size;
-    int y_pu             = y0 >> s->sps->log2_min_pu_size;
+    int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
+    int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
     int j, k;
 
     if (size_in_pus == 0)
@@ -1982,13 +2000,13 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
 {
     int cb_size          = 1 << log2_cb_size;
     HEVCLocalContext *lc = s->HEVClc;
-    int log2_min_cb_size = s->sps->log2_min_cb_size;
+    int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
     int length           = cb_size >> log2_min_cb_size;
-    int min_cb_width     = s->sps->min_cb_width;
+    int min_cb_width     = s->ps.sps->min_cb_width;
     int x_cb             = x0 >> log2_min_cb_size;
     int y_cb             = y0 >> log2_min_cb_size;
     int idx              = log2_cb_size - 2;
-    int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
+    int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
     int x, y, ret;
 
     lc->cu.x                = x0;
@@ -2000,7 +2018,7 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
     for (x = 0; x < 4; x++)
         lc->pu.intra_pred_mode[x] = 1;
-    if (s->pps->transquant_bypass_enable_flag) {
+    if (s->ps.pps->transquant_bypass_enable_flag) {
         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
         if (lc->cu.cu_transquant_bypass_flag)
             set_deblocking_bypass(s, x0, y0, log2_cb_size);
@@ -2036,22 +2054,22 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
         if (s->sh.slice_type != I_SLICE)
             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
         if (lc->cu.pred_mode != MODE_INTRA ||
-            log2_cb_size == s->sps->log2_min_cb_size) {
+            log2_cb_size == s->ps.sps->log2_min_cb_size) {
             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
                                       lc->cu.pred_mode == MODE_INTRA;
         }
 
         if (lc->cu.pred_mode == MODE_INTRA) {
-            if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
-                log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
-                log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
+            if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
+                log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
+                log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
                 pcm_flag = ff_hevc_pcm_flag_decode(s);
             }
             if (pcm_flag) {
                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
-                if (s->sps->pcm.loop_filter_disable_flag)
+                if (s->ps.sps->pcm.loop_filter_disable_flag)
                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
 
                 if (ret < 0)
@@ -2108,8 +2126,8 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
             if (rqt_root_cbf) {
                 const static int cbf[2] = { 0 };
                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
-                                         s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
-                                         s->sps->max_transform_hierarchy_depth_inter;
+                                         s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
+                                         s->ps.sps->max_transform_hierarchy_depth_inter;
                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
                                          log2_cb_size,
                                          log2_cb_size, 0, 0, cbf, cbf);
@@ -2122,7 +2140,7 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
         }
     }
 
-    if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
+    if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
 
     x = y_cb * min_cb_width + x_cb;
@@ -2150,26 +2168,26 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
     int split_cu;
 
     lc->ct_depth = cb_depth;
-    if (x0 + cb_size <= s->sps->width  &&
-        y0 + cb_size <= s->sps->height &&
-        log2_cb_size > s->sps->log2_min_cb_size) {
+    if (x0 + cb_size <= s->ps.sps->width  &&
+        y0 + cb_size <= s->ps.sps->height &&
+        log2_cb_size > s->ps.sps->log2_min_cb_size) {
         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
     } else {
-        split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
+        split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
     }
-    if (s->pps->cu_qp_delta_enabled_flag &&
-        log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
+    if (s->ps.pps->cu_qp_delta_enabled_flag &&
+        log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
         lc->tu.is_cu_qp_delta_coded = 0;
         lc->tu.cu_qp_delta          = 0;
     }
 
     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
-        log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
+        log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
         lc->tu.is_cu_chroma_qp_offset_coded = 0;
     }
 
     if (split_cu) {
-        int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
+        int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
         const int cb_size_split = cb_size >> 1;
         const int x1 = x0 + cb_size_split;
         const int y1 = y0 + cb_size_split;
@@ -2180,18 +2198,18 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
         if (more_data < 0)
             return more_data;
 
-        if (more_data && x1 < s->sps->width) {
+        if (more_data && x1 < s->ps.sps->width) {
             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
             if (more_data < 0)
                 return more_data;
         }
-        if (more_data && y1 < s->sps->height) {
+        if (more_data && y1 < s->ps.sps->height) {
             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
             if (more_data < 0)
                 return more_data;
         }
-        if (more_data && x1 < s->sps->width &&
-            y1 < s->sps->height) {
+        if (more_data && x1 < s->ps.sps->width &&
+            y1 < s->ps.sps->height) {
             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
             if (more_data < 0)
                 return more_data;
@@ -2202,8 +2220,8 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
             lc->qPy_pred = lc->qp_y;
 
         if (more_data)
-            return ((x1 + cb_size_split) < s->sps->width ||
-                    (y1 + cb_size_split) < s->sps->height);
+            return ((x1 + cb_size_split) < s->ps.sps->width ||
+                    (y1 + cb_size_split) < s->ps.sps->height);
         else
             return 0;
     } else {
@@ -2211,11 +2229,11 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
         if (ret < 0)
             return ret;
         if ((!((x0 + cb_size) %
-               (1 << (s->sps->log2_ctb_size))) ||
-             (x0 + cb_size >= s->sps->width)) &&
+               (1 << (s->ps.sps->log2_ctb_size))) ||
+             (x0 + cb_size >= s->ps.sps->width)) &&
             (!((y0 + cb_size) %
-               (1 << (s->sps->log2_ctb_size))) ||
-             (y0 + cb_size >= s->sps->height))) {
+               (1 << (s->ps.sps->log2_ctb_size))) ||
+             (y0 + cb_size >= s->ps.sps->height))) {
             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
             return !end_of_slice_flag;
         } else {
@@ -2230,59 +2248,59 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
                                  int ctb_addr_ts)
 {
     HEVCLocalContext *lc  = s->HEVClc;
-    int ctb_size          = 1 << s->sps->log2_ctb_size;
-    int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
+    int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
+    int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
 
     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
 
-    if (s->pps->entropy_coding_sync_enabled_flag) {
+    if (s->ps.pps->entropy_coding_sync_enabled_flag) {
         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
             lc->first_qp_group = 1;
-        lc->end_of_tiles_x = s->sps->width;
-    } else if (s->pps->tiles_enabled_flag) {
-        if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
-            int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
-            lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
+        lc->end_of_tiles_x = s->ps.sps->width;
+    } else if (s->ps.pps->tiles_enabled_flag) {
+        if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
+            int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
+            lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
             lc->first_qp_group   = 1;
         }
     } else {
-        lc->end_of_tiles_x = s->sps->width;
+        lc->end_of_tiles_x = s->ps.sps->width;
     }
 
-    lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
+    lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
 
     lc->boundary_flags = 0;
-    if (s->pps->tiles_enabled_flag) {
-        if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
+    if (s->ps.pps->tiles_enabled_flag) {
+        if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
-        if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
+        if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
-        if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
+        if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
     } else {
         if (ctb_addr_in_slice <= 0)
             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
-        if (ctb_addr_in_slice < s->sps->ctb_width)
+        if (ctb_addr_in_slice < s->ps.sps->ctb_width)
             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
     }
 
     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
-    lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
-    lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
-    lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
+    lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
+    lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
+    lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
 }
 
 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
 {
     HEVCContext *s  = avctxt->priv_data;
-    int ctb_size    = 1 << s->sps->log2_ctb_size;
+    int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
     int more_data   = 1;
     int x_ctb       = 0;
     int y_ctb       = 0;
-    int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
+    int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
 
     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
@@ -2290,29 +2308,29 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
     }
 
     if (s->sh.dependent_slice_segment_flag) {
-        int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
+        int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
             return AVERROR_INVALIDDATA;
         }
     }
 
-    while (more_data && ctb_addr_ts < s->sps->ctb_size) {
-        int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
+    while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
+        int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
 
-        x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
-        y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
+        x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
+        y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
 
         ff_hevc_cabac_init(s, ctb_addr_ts);
 
-        hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
+        hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
 
         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
 
-        more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
+        more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
         if (more_data < 0) {
             s->tab_slice_address[ctb_addr_rs] = -1;
             return more_data;
@@ -2324,8 +2342,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
     }
 
-    if (x_ctb + ctb_size >= s->sps->width &&
-        y_ctb + ctb_size >= s->sps->height)
+    if (x_ctb + ctb_size >= s->ps.sps->width &&
+        y_ctb + ctb_size >= s->ps.sps->height)
         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
 
     return ctb_addr_ts;
@@ -2346,12 +2364,12 @@ static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int
 {
     HEVCContext *s1  = avctxt->priv_data, *s;
     HEVCLocalContext *lc;
-    int ctb_size    = 1<< s1->sps->log2_ctb_size;
+    int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
     int more_data   = 1;
     int *ctb_row_p    = input_ctb_row;
     int ctb_row = ctb_row_p[job];
-    int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
-    int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
+    int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
+    int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
     int thread = ctb_row % s1->threads_number;
     int ret;
 
@@ -2366,9 +2384,9 @@ static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int
         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
     }
 
-    while(more_data && ctb_addr_ts < s->sps->ctb_size) {
-        int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
-        int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
+    while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
+        int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
+        int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
 
         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
 
@@ -2380,11 +2398,13 @@ static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int
         }
 
         ff_hevc_cabac_init(s, ctb_addr_ts);
-        hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
-        more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
+        hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
+        more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
 
         if (more_data < 0) {
             s->tab_slice_address[ctb_addr_rs] = -1;
+            avpriv_atomic_int_set(&s1->wpp_err,  1);
+            ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
             return more_data;
         }
 
@@ -2394,21 +2414,21 @@ static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int
         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
 
-        if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
+        if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
             avpriv_atomic_int_set(&s1->wpp_err,  1);
             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
             return 0;
         }
 
-        if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
+        if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
             return ctb_addr_ts;
         }
-        ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
+        ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
         x_ctb+=ctb_size;
 
-        if(x_ctb >= s->sps->width) {
+        if(x_ctb >= s->ps.sps->width) {
             break;
         }
     }
@@ -2417,13 +2437,15 @@ static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int
     return 0;
 }
 
-static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
+static int hls_slice_data_wpp(HEVCContext *s, const HEVCNAL *nal)
 {
+    const uint8_t *data = nal->data;
+    int length          = nal->size;
     HEVCLocalContext *lc = s->HEVClc;
     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
-    int offset;
-    int startheader, cmpt = 0;
+    int64_t offset;
+    int64_t startheader, cmpt = 0;
     int i, j, res = 0;
 
     if (!ret || !arg) {
@@ -2432,11 +2454,18 @@ static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
         return AVERROR(ENOMEM);
     }
 
+    if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
+        av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
+            s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
+            s->ps.sps->ctb_width, s->ps.sps->ctb_height
+        );
+        res = AVERROR_INVALIDDATA;
+        goto error;
+    }
 
-    if (!s->sList[1]) {
-        ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
-
+    ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
 
+    if (!s->sList[1]) {
         for (i = 1; i < s->threads_number; i++) {
             s->sList[i] = av_malloc(sizeof(HEVCContext));
             memcpy(s->sList[i], s, sizeof(HEVCContext));
@@ -2447,8 +2476,8 @@ static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
 
     offset = (lc->gb.index >> 3);
 
-    for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
-        if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
+    for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
+        if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
             startheader--;
             cmpt++;
         }
@@ -2457,8 +2486,8 @@ static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
         for (j = 0, cmpt = 0, startheader = offset
-             + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
-            if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
+             + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
+            if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
                 startheader--;
                 cmpt++;
             }
@@ -2469,11 +2498,16 @@ static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
     }
     if (s->sh.num_entry_point_offsets != 0) {
         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
+        if (length < offset) {
+            av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
+            res = AVERROR_INVALIDDATA;
+            goto error;
+        }
         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
 
     }
-    s->data = nal;
+    s->data = data;
 
     for (i = 1; i < s->threads_number; i++) {
         s->sList[i]->HEVClc->first_qp_group = 1;
@@ -2490,42 +2524,17 @@ static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
         ret[i] = 0;
     }
 
-    if (s->pps->entropy_coding_sync_enabled_flag)
-        s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
+    if (s->ps.pps->entropy_coding_sync_enabled_flag)
+        s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
 
     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
         res += ret[i];
+error:
     av_free(ret);
     av_free(arg);
     return res;
 }
 
-/**
- * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
- * 0 if the unit should be skipped, 1 otherwise
- */
-static int hls_nal_unit(HEVCContext *s)
-{
-    GetBitContext *gb = &s->HEVClc->gb;
-    int nuh_layer_id;
-
-    if (get_bits1(gb) != 0)
-        return AVERROR_INVALIDDATA;
-
-    s->nal_unit_type = get_bits(gb, 6);
-
-    nuh_layer_id   = get_bits(gb, 6);
-    s->temporal_id = get_bits(gb, 3) - 1;
-    if (s->temporal_id < 0)
-        return AVERROR_INVALIDDATA;
-
-    av_log(s->avctx, AV_LOG_DEBUG,
-           "nal_unit_type: %d, nuh_layer_id: %d, temporal_id: %d\n",
-           s->nal_unit_type, nuh_layer_id, s->temporal_id);
-
-    return nuh_layer_id == 0;
-}
-
 static int set_side_data(HEVCContext *s)
 {
     AVFrame *out = s->ref->frame;
@@ -2572,27 +2581,89 @@ static int set_side_data(HEVCContext *s)
                                s->sei_hflip, s->sei_vflip);
     }
 
+    // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
+    // so the side data persists for the entire coded video sequence.
+    if (s->sei_mastering_display_info_present > 0 &&
+        IS_IRAP(s) && s->no_rasl_output_flag) {
+        s->sei_mastering_display_info_present--;
+    }
+    if (s->sei_mastering_display_info_present) {
+        // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
+        const int mapping[3] = {2, 0, 1};
+        const int chroma_den = 50000;
+        const int luma_den = 10000;
+        int i;
+        AVMasteringDisplayMetadata *metadata =
+            av_mastering_display_metadata_create_side_data(out);
+        if (!metadata)
+            return AVERROR(ENOMEM);
+
+        for (i = 0; i < 3; i++) {
+            const int j = mapping[i];
+            metadata->display_primaries[i][0].num = s->display_primaries[j][0];
+            metadata->display_primaries[i][0].den = chroma_den;
+            metadata->display_primaries[i][1].num = s->display_primaries[j][1];
+            metadata->display_primaries[i][1].den = chroma_den;
+        }
+        metadata->white_point[0].num = s->white_point[0];
+        metadata->white_point[0].den = chroma_den;
+        metadata->white_point[1].num = s->white_point[1];
+        metadata->white_point[1].den = chroma_den;
+
+        metadata->max_luminance.num = s->max_mastering_luminance;
+        metadata->max_luminance.den = luma_den;
+        metadata->min_luminance.num = s->min_mastering_luminance;
+        metadata->min_luminance.den = luma_den;
+        metadata->has_luminance = 1;
+        metadata->has_primaries = 1;
+
+        av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
+        av_log(s->avctx, AV_LOG_DEBUG,
+               "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
+               av_q2d(metadata->display_primaries[0][0]),
+               av_q2d(metadata->display_primaries[0][1]),
+               av_q2d(metadata->display_primaries[1][0]),
+               av_q2d(metadata->display_primaries[1][1]),
+               av_q2d(metadata->display_primaries[2][0]),
+               av_q2d(metadata->display_primaries[2][1]),
+               av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
+        av_log(s->avctx, AV_LOG_DEBUG,
+               "min_luminance=%f, max_luminance=%f\n",
+               av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
+    }
+
+    if (s->a53_caption) {
+        AVFrameSideData* sd = av_frame_new_side_data(out,
+                                                     AV_FRAME_DATA_A53_CC,
+                                                     s->a53_caption_size);
+        if (sd)
+            memcpy(sd->data, s->a53_caption, s->a53_caption_size);
+        av_freep(&s->a53_caption);
+        s->a53_caption_size = 0;
+        s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+    }
+
     return 0;
 }
 
 static int hevc_frame_start(HEVCContext *s)
 {
     HEVCLocalContext *lc = s->HEVClc;
-    int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
-                           ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
+    int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
+                           ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
     int ret;
 
     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
-    memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
-    memset(s->is_pcm,        0, (s->sps->min_pu_width + 1) * (s->sps->min_pu_height + 1));
+    memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
+    memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
 
     s->is_decoded        = 0;
     s->first_nal_type    = s->nal_unit_type;
 
-    if (s->pps->tiles_enabled_flag)
-        lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
+    if (s->ps.pps->tiles_enabled_flag)
+        lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
 
     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
     if (ret < 0)
@@ -2638,31 +2709,24 @@ static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
     GetBitContext *gb    = &lc->gb;
     int ctb_addr_ts, ret;
 
-    ret = init_get_bits8(gb, nal->data, nal->size);
-    if (ret < 0)
-        return ret;
-
-    ret = hls_nal_unit(s);
-    if (ret < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
-               s->nal_unit_type);
-        goto fail;
-    } else if (!ret)
-        return 0;
+    *gb              = nal->gb;
+    s->nal_unit_type = nal->type;
+    s->temporal_id   = nal->temporal_id;
 
     switch (s->nal_unit_type) {
     case NAL_VPS:
-        ret = ff_hevc_decode_nal_vps(s);
+        ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
         if (ret < 0)
             goto fail;
         break;
     case NAL_SPS:
-        ret = ff_hevc_decode_nal_sps(s);
+        ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
+                                     s->apply_defdispwin);
         if (ret < 0)
             goto fail;
         break;
     case NAL_PPS:
-        ret = ff_hevc_decode_nal_pps(s);
+        ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
         if (ret < 0)
             goto fail;
         break;
@@ -2748,10 +2812,10 @@ static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
                 goto fail;
         } else {
             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
-                ctb_addr_ts = hls_slice_data_wpp(s, nal->data, nal->size);
+                ctb_addr_ts = hls_slice_data_wpp(s, nal);
             else
                 ctb_addr_ts = hls_slice_data(s);
-            if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
+            if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
                 s->is_decoded = 1;
             }
 
@@ -2781,122 +2845,9 @@ static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
     return 0;
 }
 
-/* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
- * between these functions would be nice. */
-int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
-                         HEVCNAL *nal)
-{
-    int i, si, di;
-    uint8_t *dst;
-
-    s->skipped_bytes = 0;
-#define STARTCODE_TEST                                                  \
-        if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
-            if (src[i + 2] != 3) {                                      \
-                /* startcode, so we must be past the end */             \
-                length = i;                                             \
-            }                                                           \
-            break;                                                      \
-        }
-#if HAVE_FAST_UNALIGNED
-#define FIND_FIRST_ZERO                                                 \
-        if (i > 0 && !src[i])                                           \
-            i--;                                                        \
-        while (src[i])                                                  \
-            i++
-#if HAVE_FAST_64BIT
-    for (i = 0; i + 1 < length; i += 9) {
-        if (!((~AV_RN64A(src + i) &
-               (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
-              0x8000800080008080ULL))
-            continue;
-        FIND_FIRST_ZERO;
-        STARTCODE_TEST;
-        i -= 7;
-    }
-#else
-    for (i = 0; i + 1 < length; i += 5) {
-        if (!((~AV_RN32A(src + i) &
-               (AV_RN32A(src + i) - 0x01000101U)) &
-              0x80008080U))
-            continue;
-        FIND_FIRST_ZERO;
-        STARTCODE_TEST;
-        i -= 3;
-    }
-#endif /* HAVE_FAST_64BIT */
-#else
-    for (i = 0; i + 1 < length; i += 2) {
-        if (src[i])
-            continue;
-        if (i > 0 && src[i - 1] == 0)
-            i--;
-        STARTCODE_TEST;
-    }
-#endif /* HAVE_FAST_UNALIGNED */
-
-    if (i >= length - 1) { // no escaped 0
-        nal->data     =
-        nal->raw_data = src;
-        nal->size     =
-        nal->raw_size = length;
-        return length;
-    }
-
-    av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
-                   length + FF_INPUT_BUFFER_PADDING_SIZE);
-    if (!nal->rbsp_buffer)
-        return AVERROR(ENOMEM);
-
-    dst = nal->rbsp_buffer;
-
-    memcpy(dst, src, i);
-    si = di = i;
-    while (si + 2 < length) {
-        // remove escapes (very rare 1:2^22)
-        if (src[si + 2] > 3) {
-            dst[di++] = src[si++];
-            dst[di++] = src[si++];
-        } else if (src[si] == 0 && src[si + 1] == 0) {
-            if (src[si + 2] == 3) { // escape
-                dst[di++] = 0;
-                dst[di++] = 0;
-                si       += 3;
-
-                s->skipped_bytes++;
-                if (s->skipped_bytes_pos_size < s->skipped_bytes) {
-                    s->skipped_bytes_pos_size *= 2;
-                    av_reallocp_array(&s->skipped_bytes_pos,
-                            s->skipped_bytes_pos_size,
-                            sizeof(*s->skipped_bytes_pos));
-                    if (!s->skipped_bytes_pos)
-                        return AVERROR(ENOMEM);
-                }
-                if (s->skipped_bytes_pos)
-                    s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
-                continue;
-            } else // next start code
-                goto nsc;
-        }
-
-        dst[di++] = src[si++];
-    }
-    while (si < length)
-        dst[di++] = src[si++];
-
-nsc:
-    memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
-
-    nal->data = dst;
-    nal->size = di;
-    nal->raw_data = src;
-    nal->raw_size = si;
-    return si;
-}
-
 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
 {
-    int i, consumed, ret = 0;
+    int i, ret = 0;
 
     s->ref = NULL;
     s->last_eos = s->eos;
@@ -2904,109 +2855,23 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
 
     /* split the input packet into NAL units, so we know the upper bound on the
      * number of slices in the frame */
-    s->nb_nals = 0;
-    while (length >= 4) {
-        HEVCNAL *nal;
-        int extract_length = 0;
-
-        if (s->is_nalff) {
-            int i;
-            for (i = 0; i < s->nal_length_size; i++)
-                extract_length = (extract_length << 8) | buf[i];
-            buf    += s->nal_length_size;
-            length -= s->nal_length_size;
-
-            if (extract_length > length) {
-                av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-        } else {
-            /* search start code */
-            while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
-                ++buf;
-                --length;
-                if (length < 4) {
-                    av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
-                    ret = AVERROR_INVALIDDATA;
-                    goto fail;
-                }
-            }
-
-            buf           += 3;
-            length        -= 3;
-        }
-
-        if (!s->is_nalff)
-            extract_length = length;
-
-        if (s->nals_allocated < s->nb_nals + 1) {
-            int new_size = s->nals_allocated + 1;
-            void *tmp = av_realloc_array(s->nals, new_size, sizeof(*s->nals));
-            ret = AVERROR(ENOMEM);
-            if (!tmp) {
-                goto fail;
-            }
-            s->nals = tmp;
-            memset(s->nals + s->nals_allocated, 0,
-                   (new_size - s->nals_allocated) * sizeof(*s->nals));
-
-            tmp = av_realloc_array(s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
-            if (!tmp)
-                goto fail;
-            s->skipped_bytes_nal = tmp;
-
-            tmp = av_realloc_array(s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
-            if (!tmp)
-                goto fail;
-            s->skipped_bytes_pos_size_nal = tmp;
-
-            tmp = av_realloc_array(s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
-            if (!tmp)
-                goto fail;
-            s->skipped_bytes_pos_nal = tmp;
-
-            s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
-            s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
-            if (!s->skipped_bytes_pos_nal[s->nals_allocated])
-                goto fail;
-            s->nals_allocated = new_size;
-        }
-        s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
-        s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
-        nal = &s->nals[s->nb_nals];
-
-        consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
-
-        s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
-        s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
-        s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
-
-
-        if (consumed < 0) {
-            ret = consumed;
-            goto fail;
-        }
-
-        ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
-        if (ret < 0)
-            goto fail;
-        hls_nal_unit(s);
+    ret = ff_hevc_split_packet(s, &s->pkt, buf, length, s->avctx, s->is_nalff,
+                               s->nal_length_size);
+    if (ret < 0) {
+        av_log(s->avctx, AV_LOG_ERROR,
+               "Error splitting the input into NAL units.\n");
+        return ret;
+    }
 
-        if (s->nal_unit_type == NAL_EOB_NUT ||
-            s->nal_unit_type == NAL_EOS_NUT)
+    for (i = 0; i < s->pkt.nb_nals; i++) {
+        if (s->pkt.nals[i].type == NAL_EOB_NUT ||
+            s->pkt.nals[i].type == NAL_EOS_NUT)
             s->eos = 1;
-
-        buf    += consumed;
-        length -= consumed;
     }
 
-    /* parse the NAL units */
-    for (i = 0; i < s->nb_nals; i++) {
-        s->skipped_bytes = s->skipped_bytes_nal[i];
-        s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
-
-        ret = decode_nal_unit(s, &s->nals[i]);
+    /* decode the NAL units */
+    for (i = 0; i < s->pkt.nb_nals; i++) {
+        ret = decode_nal_unit(s, &s->pkt.nals[i]);
         if (ret < 0) {
             av_log(s->avctx, AV_LOG_WARNING,
                    "Error parsing NAL unit #%d.\n", i);
@@ -3037,7 +2902,7 @@ static int verify_md5(HEVCContext *s, AVFrame *frame)
     if (!desc)
         return AVERROR(EINVAL);
 
-    pixel_shift = desc->comp[0].depth_minus1 > 7;
+    pixel_shift = desc->comp[0].depth > 8;
 
     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
            s->poc);
@@ -3115,9 +2980,12 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
         return ret;
 
     if (avctx->hwaccel) {
-        if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
+        if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
             av_log(avctx, AV_LOG_ERROR,
                    "hardware accelerator failed to decode picture\n");
+            ff_hevc_unref_frame(s, s->ref, ~0);
+            return ret;
+        }
     } else {
         /* verify the SEI checksum */
         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
@@ -3194,13 +3062,6 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
 
     av_freep(&s->md5_ctx);
 
-    for(i=0; i < s->nals_allocated; i++) {
-        av_freep(&s->skipped_bytes_pos_nal[i]);
-    }
-    av_freep(&s->skipped_bytes_pos_size_nal);
-    av_freep(&s->skipped_bytes_nal);
-    av_freep(&s->skipped_bytes_pos_nal);
-
     av_freep(&s->cabac_state);
 
     for (i = 0; i < 3; i++) {
@@ -3214,15 +3075,15 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
         av_frame_free(&s->DPB[i].frame);
     }
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
-        av_buffer_unref(&s->vps_list[i]);
-    for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
-        av_buffer_unref(&s->sps_list[i]);
-    for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
-        av_buffer_unref(&s->pps_list[i]);
-    s->sps = NULL;
-    s->pps = NULL;
-    s->vps = NULL;
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
+        av_buffer_unref(&s->ps.vps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
+        av_buffer_unref(&s->ps.sps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
+        av_buffer_unref(&s->ps.pps_list[i]);
+    s->ps.sps = NULL;
+    s->ps.pps = NULL;
+    s->ps.vps = NULL;
 
     av_freep(&s->sh.entry_point_offset);
     av_freep(&s->sh.offset);
@@ -3239,10 +3100,12 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
         s->HEVClc = NULL;
     av_freep(&s->HEVClcList[0]);
 
-    for (i = 0; i < s->nals_allocated; i++)
-        av_freep(&s->nals[i].rbsp_buffer);
-    av_freep(&s->nals);
-    s->nals_allocated = 0;
+    for (i = 0; i < s->pkt.nals_allocated; i++) {
+        av_freep(&s->pkt.nals[i].rbsp_buffer);
+        av_freep(&s->pkt.nals[i].skipped_bytes_pos);
+    }
+    av_freep(&s->pkt.nals);
+    s->pkt.nals_allocated = 0;
 
     return 0;
 }
@@ -3315,37 +3178,37 @@ static int hevc_update_thread_context(AVCodecContext *dst,
         }
     }
 
-    if (s->sps != s0->sps)
-        s->sps = NULL;
-    for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
-        av_buffer_unref(&s->vps_list[i]);
-        if (s0->vps_list[i]) {
-            s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
-            if (!s->vps_list[i])
+    if (s->ps.sps != s0->ps.sps)
+        s->ps.sps = NULL;
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
+        av_buffer_unref(&s->ps.vps_list[i]);
+        if (s0->ps.vps_list[i]) {
+            s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
+            if (!s->ps.vps_list[i])
                 return AVERROR(ENOMEM);
         }
     }
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
-        av_buffer_unref(&s->sps_list[i]);
-        if (s0->sps_list[i]) {
-            s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
-            if (!s->sps_list[i])
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
+        av_buffer_unref(&s->ps.sps_list[i]);
+        if (s0->ps.sps_list[i]) {
+            s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
+            if (!s->ps.sps_list[i])
                 return AVERROR(ENOMEM);
         }
     }
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
-        av_buffer_unref(&s->pps_list[i]);
-        if (s0->pps_list[i]) {
-            s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
-            if (!s->pps_list[i])
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
+        av_buffer_unref(&s->ps.pps_list[i]);
+        if (s0->ps.pps_list[i]) {
+            s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
+            if (!s->ps.pps_list[i])
                 return AVERROR(ENOMEM);
         }
     }
 
-    if (s->sps != s0->sps)
-        if ((ret = set_sps(s, s0->sps, src->pix_fmt)) < 0)
+    if (s->ps.sps != s0->ps.sps)
+        if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
             return ret;
 
     s->seq_decode = s0->seq_decode;
@@ -3353,6 +3216,7 @@ static int hevc_update_thread_context(AVCodecContext *dst,
     s->pocTid0    = s0->pocTid0;
     s->max_ra     = s0->max_ra;
     s->eos        = s0->eos;
+    s->no_rasl_output_flag = s0->no_rasl_output_flag;
 
     s->is_nalff        = s0->is_nalff;
     s->nal_length_size = s0->nal_length_size;
@@ -3431,10 +3295,10 @@ static int hevc_decode_extradata(HEVCContext *s)
     }
 
     /* export stream parameters from the first SPS */
-    for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
-        if (s->sps_list[i]) {
-            const HEVCSPS *sps = (const HEVCSPS*)s->sps_list[i]->data;
-            export_stream_params(s->avctx, s, sps);
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
+        if (s->ps.sps_list[i]) {
+            const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
+            export_stream_params(s->avctx, &s->ps, sps);
             break;
         }
     }
@@ -3447,8 +3311,6 @@ static av_cold int hevc_decode_init(AVCodecContext *avctx)
     HEVCContext *s = avctx->priv_data;
     int ret;
 
-    ff_init_cabac_states();
-
     avctx->internal->allocate_progress = 1;
 
     ret = hevc_init_context(avctx);
@@ -3457,6 +3319,7 @@ static av_cold int hevc_decode_init(AVCodecContext *avctx)
 
     s->enable_parallel_tiles = 0;
     s->picture_struct = 0;
+    s->eos = 1;
 
     if(avctx->active_thread_type & FF_THREAD_SLICE)
         s->threads_number = avctx->thread_count;
@@ -3498,24 +3361,17 @@ static void hevc_decode_flush(AVCodecContext *avctx)
     HEVCContext *s = avctx->priv_data;
     ff_hevc_flush_dpb(s);
     s->max_ra = INT_MAX;
+    s->eos = 1;
 }
 
 #define OFFSET(x) offsetof(HEVCContext, x)
 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
 
-static const AVProfile profiles[] = {
-    { FF_PROFILE_HEVC_MAIN,                 "Main"                },
-    { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
-    { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
-    { FF_PROFILE_HEVC_REXT,                 "Rext"  },
-    { FF_PROFILE_UNKNOWN },
-};
-
 static const AVOption options[] = {
     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
-        AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
+        AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
-        AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
+        AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
     { NULL },
 };
 
@@ -3539,7 +3395,7 @@ AVCodec ff_hevc_decoder = {
     .flush                 = hevc_decode_flush,
     .update_thread_context = hevc_update_thread_context,
     .init_thread_copy      = hevc_init_thread_copy,
-    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
-                             CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
-    .profiles              = NULL_IF_CONFIG_SMALL(profiles),
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
+                             AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
+    .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
 };
diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
index eecccb7f..c91f8158 100644
--- a/libavcodec/hevc.h
+++ b/libavcodec/hevc.h
@@ -276,6 +276,7 @@ enum ScanType {
 typedef struct ShortTermRPS {
     unsigned int num_negative_pics;
     int num_delta_pocs;
+    int rps_idx_num_delta_pocs;
     int32_t delta_poc[32];
     uint8_t used[32];
 } ShortTermRPS;
@@ -557,6 +558,17 @@ typedef struct HEVCPPS {
     int *min_tb_addr_zs_tab;///< MinTbAddrZS
 } HEVCPPS;
 
+typedef struct HEVCParamSets {
+    AVBufferRef *vps_list[MAX_VPS_COUNT];
+    AVBufferRef *sps_list[MAX_SPS_COUNT];
+    AVBufferRef *pps_list[MAX_PPS_COUNT];
+
+    /* currently active parameter sets */
+    const HEVCVPS *vps;
+    const HEVCSPS *sps;
+    const HEVCPPS *pps;
+} HEVCParamSets;
+
 typedef struct SliceHeader {
     unsigned int pps_id;
 
@@ -579,6 +591,7 @@ typedef struct SliceHeader {
     int short_term_ref_pic_set_size;
     ShortTermRPS slice_rps;
     const ShortTermRPS *short_term_rps;
+    int long_term_ref_pic_set_size;
     LongTermRPS long_term_rps;
     unsigned int list_entry_lx[2][32];
 
@@ -609,7 +622,7 @@ typedef struct SliceHeader {
 
     unsigned int max_num_merge_cand; ///< 5 - 5_minus_max_num_merge_cand
 
-    int *entry_point_offset;
+    unsigned *entry_point_offset;
     int * offset;
     int * size;
     int num_entry_point_offsets;
@@ -742,8 +755,24 @@ typedef struct HEVCNAL {
 
     int raw_size;
     const uint8_t *raw_data;
+
+    GetBitContext gb;
+
+    enum NALUnitType type;
+    int temporal_id;
+
+    int skipped_bytes;
+    int skipped_bytes_pos_size;
+    int *skipped_bytes_pos;
 } HEVCNAL;
 
+/* an input packet split into unescaped NAL units */
+typedef struct HEVCPacket {
+    HEVCNAL *nals;
+    int nb_nals;
+    int nals_allocated;
+} HEVCPacket;
+
 typedef struct HEVCLocalContext {
     uint8_t cabac_state[HEVC_CONTEXTS];
 
@@ -812,12 +841,7 @@ typedef struct HEVCContext {
     uint8_t *sao_pixel_buffer_h[3];
     uint8_t *sao_pixel_buffer_v[3];
 
-    const HEVCVPS *vps;
-    const HEVCSPS *sps;
-    const HEVCPPS *pps;
-    AVBufferRef *vps_list[MAX_VPS_COUNT];
-    AVBufferRef *sps_list[MAX_SPS_COUNT];
-    AVBufferRef *pps_list[MAX_PPS_COUNT];
+    HEVCParamSets ps;
 
     AVBufferPool *tab_mvf_pool;
     AVBufferPool *rpl_tab_pool;
@@ -842,6 +866,7 @@ typedef struct HEVCContext {
     int bs_height;
 
     int is_decoded;
+    int no_rasl_output_flag;
 
     HEVCPredContext hpc;
     HEVCDSPContext hevcdsp;
@@ -878,19 +903,10 @@ typedef struct HEVCContext {
 
     int enable_parallel_tiles;
     int wpp_err;
-    int skipped_bytes;
-    int *skipped_bytes_pos;
-    int skipped_bytes_pos_size;
-
-    int *skipped_bytes_nal;
-    int **skipped_bytes_pos_nal;
-    int *skipped_bytes_pos_size_nal;
 
     const uint8_t *data;
 
-    HEVCNAL *nals;
-    int nb_nals;
-    int nals_allocated;
+    HEVCPacket pkt;
     // type of the first VCL NAL of the current frame
     enum NALUnitType first_nal_type;
 
@@ -921,17 +937,41 @@ typedef struct HEVCContext {
     int sei_hflip, sei_vflip;
 
     int picture_struct;
+
+    uint8_t* a53_caption;
+    int a53_caption_size;
+
+    /** mastering display */
+    int sei_mastering_display_info_present;
+    uint16_t display_primaries[3][2];
+    uint16_t white_point[2];
+    uint32_t max_mastering_luminance;
+    uint32_t min_mastering_luminance;
+
 } HEVCContext;
 
-int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
-                                  const HEVCSPS *sps, int is_slice_header);
-int ff_hevc_decode_nal_vps(HEVCContext *s);
-int ff_hevc_decode_nal_sps(HEVCContext *s);
-int ff_hevc_decode_nal_pps(HEVCContext *s);
-int ff_hevc_decode_nal_sei(HEVCContext *s);
+int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
+                                  ShortTermRPS *rps, const HEVCSPS *sps, int is_slice_header);
 
-int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
-                         HEVCNAL *nal);
+/**
+ * Parse the SPS from the bitstream into the provided HEVCSPS struct.
+ *
+ * @param sps_id the SPS id will be written here
+ * @param apply_defdispwin if set 1, the default display window from the VUI
+ *                         will be applied to the video dimensions
+ * @param vps_list if non-NULL, this function will validate that the SPS refers
+ *                 to an existing VPS
+ */
+int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
+                      int apply_defdispwin, AVBufferRef **vps_list, AVCodecContext *avctx);
+
+int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps);
+int ff_hevc_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps, int apply_defdispwin);
+int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps);
+int ff_hevc_decode_nal_sei(HEVCContext *s);
 
 /**
  * Mark all frames in DPB as unused for reference.
@@ -1037,6 +1077,21 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
 void ff_hevc_hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size);
 
 
+/**
+ * Extract the raw (unescaped) HEVC bitstream.
+ */
+int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
+                         HEVCNAL *nal);
+
+/**
+ * Split an input packet into NAL units.
+ */
+int ff_hevc_split_packet(HEVCContext *s, HEVCPacket *pkt, const uint8_t *buf, int length,
+                         AVCodecContext *avctx, int is_nalff, int nal_length_size);
+
+int ff_hevc_encode_nal_vps(HEVCVPS *vps, unsigned int id,
+                           uint8_t *buf, int buf_size);
+
 extern const uint8_t ff_hevc_qpel_extra_before[4];
 extern const uint8_t ff_hevc_qpel_extra_after[4];
 extern const uint8_t ff_hevc_qpel_extra[4];
diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c
index 3d16896b..d1bef832 100644
--- a/libavcodec/hevc_cabac.c
+++ b/libavcodec/hevc_cabac.c
@@ -427,20 +427,6 @@ static const uint8_t diag_scan2x2_inv[2][2] = {
     { 1, 3, },
 };
 
-const uint8_t ff_hevc_diag_scan4x4_x[16] = {
-    0, 0, 1, 0,
-    1, 2, 0, 1,
-    2, 3, 1, 2,
-    3, 2, 3, 3,
-};
-
-const uint8_t ff_hevc_diag_scan4x4_y[16] = {
-    0, 1, 0, 2,
-    1, 0, 3, 2,
-    1, 0, 3, 2,
-    1, 3, 2, 3,
-};
-
 static const uint8_t diag_scan4x4_inv[4][4] = {
     { 0,  2,  5,  9, },
     { 1,  4,  8, 12, },
@@ -448,44 +434,6 @@ static const uint8_t diag_scan4x4_inv[4][4] = {
     { 6, 10, 13, 15, },
 };
 
-const uint8_t ff_hevc_diag_scan8x8_x[64] = {
-    0, 0, 1, 0,
-    1, 2, 0, 1,
-    2, 3, 0, 1,
-    2, 3, 4, 0,
-    1, 2, 3, 4,
-    5, 0, 1, 2,
-    3, 4, 5, 6,
-    0, 1, 2, 3,
-    4, 5, 6, 7,
-    1, 2, 3, 4,
-    5, 6, 7, 2,
-    3, 4, 5, 6,
-    7, 3, 4, 5,
-    6, 7, 4, 5,
-    6, 7, 5, 6,
-    7, 6, 7, 7,
-};
-
-const uint8_t ff_hevc_diag_scan8x8_y[64] = {
-    0, 1, 0, 2,
-    1, 0, 3, 2,
-    1, 0, 4, 3,
-    2, 1, 0, 5,
-    4, 3, 2, 1,
-    0, 6, 5, 4,
-    3, 2, 1, 0,
-    7, 6, 5, 4,
-    3, 2, 1, 0,
-    7, 6, 5, 4,
-    3, 2, 1, 7,
-    6, 5, 4, 3,
-    2, 7, 6, 5,
-    4, 3, 7, 6,
-    5, 4, 7, 6,
-    5, 7, 6, 7,
-};
-
 static const uint8_t diag_scan8x8_inv[8][8] = {
     {  0,  2,  5,  9, 14, 20, 27, 35, },
     {  1,  4,  8, 13, 19, 26, 34, 42, },
@@ -499,10 +447,10 @@ static const uint8_t diag_scan8x8_inv[8][8] = {
 
 void ff_hevc_save_states(HEVCContext *s, int ctb_addr_ts)
 {
-    if (s->pps->entropy_coding_sync_enabled_flag &&
-        (ctb_addr_ts % s->sps->ctb_width == 2 ||
-         (s->sps->ctb_width == 2 &&
-          ctb_addr_ts % s->sps->ctb_width == 0))) {
+    if (s->ps.pps->entropy_coding_sync_enabled_flag &&
+        (ctb_addr_ts % s->ps.sps->ctb_width == 2 ||
+         (s->ps.sps->ctb_width == 2 &&
+          ctb_addr_ts % s->ps.sps->ctb_width == 0))) {
         memcpy(s->cabac_state, s->HEVClc->cabac_state, HEVC_CONTEXTS);
     }
 }
@@ -553,40 +501,40 @@ static void cabac_init_state(HEVCContext *s)
 
 void ff_hevc_cabac_init(HEVCContext *s, int ctb_addr_ts)
 {
-    if (ctb_addr_ts == s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]) {
+    if (ctb_addr_ts == s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]) {
         cabac_init_decoder(s);
         if (s->sh.dependent_slice_segment_flag == 0 ||
-            (s->pps->tiles_enabled_flag &&
-             s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]))
+            (s->ps.pps->tiles_enabled_flag &&
+             s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]))
             cabac_init_state(s);
 
         if (!s->sh.first_slice_in_pic_flag &&
-            s->pps->entropy_coding_sync_enabled_flag) {
-            if (ctb_addr_ts % s->sps->ctb_width == 0) {
-                if (s->sps->ctb_width == 1)
+            s->ps.pps->entropy_coding_sync_enabled_flag) {
+            if (ctb_addr_ts % s->ps.sps->ctb_width == 0) {
+                if (s->ps.sps->ctb_width == 1)
                     cabac_init_state(s);
                 else if (s->sh.dependent_slice_segment_flag == 1)
                     load_states(s);
             }
         }
     } else {
-        if (s->pps->tiles_enabled_flag &&
-            s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
+        if (s->ps.pps->tiles_enabled_flag &&
+            s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
             if (s->threads_number == 1)
                 cabac_reinit(s->HEVClc);
             else
                 cabac_init_decoder(s);
             cabac_init_state(s);
         }
-        if (s->pps->entropy_coding_sync_enabled_flag) {
-            if (ctb_addr_ts % s->sps->ctb_width == 0) {
+        if (s->ps.pps->entropy_coding_sync_enabled_flag) {
+            if (ctb_addr_ts % s->ps.sps->ctb_width == 0) {
                 get_cabac_terminate(&s->HEVClc->cc);
                 if (s->threads_number == 1)
                     cabac_reinit(s->HEVClc);
                 else
                     cabac_init_decoder(s);
 
-                if (s->sps->ctb_width == 1)
+                if (s->ps.sps->ctb_width == 1)
                     cabac_init_state(s);
                 else
                     load_states(s);
@@ -625,7 +573,7 @@ int ff_hevc_sao_band_position_decode(HEVCContext *s)
 int ff_hevc_sao_offset_abs_decode(HEVCContext *s)
 {
     int i = 0;
-    int length = (1 << (FFMIN(s->sps->bit_depth, 10) - 5)) - 1;
+    int length = (1 << (FFMIN(s->ps.sps->bit_depth, 10) - 5)) - 1;
 
     while (i < length && get_cabac_bypass(&s->HEVClc->cc))
         i++;
@@ -656,10 +604,10 @@ int ff_hevc_cu_transquant_bypass_flag_decode(HEVCContext *s)
 
 int ff_hevc_skip_flag_decode(HEVCContext *s, int x0, int y0, int x_cb, int y_cb)
 {
-    int min_cb_width = s->sps->min_cb_width;
+    int min_cb_width = s->ps.sps->min_cb_width;
     int inc = 0;
-    int x0b = av_mod_uintp2(x0, s->sps->log2_ctb_size);
-    int y0b = av_mod_uintp2(y0, s->sps->log2_ctb_size);
+    int x0b = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
+    int y0b = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
 
     if (s->HEVClc->ctb_left_flag || x0b)
         inc = !!SAMPLE_CTB(s->skip_flag, x_cb - 1, y_cb);
@@ -706,7 +654,7 @@ int ff_hevc_cu_chroma_qp_offset_flag(HEVCContext *s)
 
 int ff_hevc_cu_chroma_qp_offset_idx(HEVCContext *s)
 {
-    int c_max= FFMAX(5, s->pps->chroma_qp_offset_list_len_minus1);
+    int c_max= FFMAX(5, s->ps.pps->chroma_qp_offset_list_len_minus1);
     int i = 0;
 
     while (i < c_max && GET_CABAC(elem_offset[CU_CHROMA_QP_OFFSET_IDX]))
@@ -723,15 +671,15 @@ int ff_hevc_pred_mode_decode(HEVCContext *s)
 int ff_hevc_split_coding_unit_flag_decode(HEVCContext *s, int ct_depth, int x0, int y0)
 {
     int inc = 0, depth_left = 0, depth_top = 0;
-    int x0b  = av_mod_uintp2(x0, s->sps->log2_ctb_size);
-    int y0b  = av_mod_uintp2(y0, s->sps->log2_ctb_size);
-    int x_cb = x0 >> s->sps->log2_min_cb_size;
-    int y_cb = y0 >> s->sps->log2_min_cb_size;
+    int x0b  = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
+    int y0b  = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
+    int x_cb = x0 >> s->ps.sps->log2_min_cb_size;
+    int y_cb = y0 >> s->ps.sps->log2_min_cb_size;
 
     if (s->HEVClc->ctb_left_flag || x0b)
-        depth_left = s->tab_ct_depth[(y_cb) * s->sps->min_cb_width + x_cb - 1];
+        depth_left = s->tab_ct_depth[(y_cb) * s->ps.sps->min_cb_width + x_cb - 1];
     if (s->HEVClc->ctb_up_flag || y0b)
-        depth_top = s->tab_ct_depth[(y_cb - 1) * s->sps->min_cb_width + x_cb];
+        depth_top = s->tab_ct_depth[(y_cb - 1) * s->ps.sps->min_cb_width + x_cb];
 
     inc += (depth_left > ct_depth);
     inc += (depth_top  > ct_depth);
@@ -743,7 +691,7 @@ int ff_hevc_part_mode_decode(HEVCContext *s, int log2_cb_size)
 {
     if (GET_CABAC(elem_offset[PART_MODE])) // 1
         return PART_2Nx2N;
-    if (log2_cb_size == s->sps->log2_min_cb_size) {
+    if (log2_cb_size == s->ps.sps->log2_min_cb_size) {
         if (s->HEVClc->cu.pred_mode == MODE_INTRA) // 0
             return PART_NxN;
         if (GET_CABAC(elem_offset[PART_MODE] + 1)) // 01
@@ -755,7 +703,7 @@ int ff_hevc_part_mode_decode(HEVCContext *s, int log2_cb_size)
         return PART_NxN; // 000
     }
 
-    if (!s->sps->amp_enabled_flag) {
+    if (!s->ps.sps->amp_enabled_flag) {
         if (GET_CABAC(elem_offset[PART_MODE] + 1)) // 01
             return PART_2NxN;
         return PART_Nx2N;
@@ -883,11 +831,13 @@ static av_always_inline int mvd_decode(HEVCContext *s)
     int k = 1;
 
     while (k < CABAC_MAX_BIN && get_cabac_bypass(&s->HEVClc->cc)) {
-        ret += 1 << k;
+        ret += 1U << k;
         k++;
     }
-    if (k == CABAC_MAX_BIN)
+    if (k == CABAC_MAX_BIN) {
         av_log(s->avctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", k);
+        return 0;
+    }
     while (k--)
         ret += get_cabac_bypass(&s->HEVClc->cc) << k;
     return get_cabac_bypass_sign(&s->HEVClc->cc, -ret);
@@ -1025,8 +975,10 @@ static av_always_inline int coeff_abs_level_remaining_decode(HEVCContext *s, int
 
     while (prefix < CABAC_MAX_BIN && get_cabac_bypass(&s->HEVClc->cc))
         prefix++;
-    if (prefix == CABAC_MAX_BIN)
+    if (prefix == CABAC_MAX_BIN) {
         av_log(s->avctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", prefix);
+        return 0;
+    }
     if (prefix < 3) {
         for (i = 0; i < rc_rice_param; i++)
             suffix = (suffix << 1) | get_cabac_bypass(&s->HEVClc->cc);
@@ -1075,10 +1027,10 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 
     ptrdiff_t stride = s->frame->linesize[c_idx];
-    int hshift = s->sps->hshift[c_idx];
-    int vshift = s->sps->vshift[c_idx];
+    int hshift = s->ps.sps->hshift[c_idx];
+    int vshift = s->ps.sps->vshift[c_idx];
     uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride +
-                                          ((x0 >> hshift) << s->sps->pixel_shift)];
+                                          ((x0 >> hshift) << s->ps.sps->pixel_shift)];
     int16_t *coeffs = (int16_t*)(c_idx ? lc->edge_emu_buffer2 : lc->edge_emu_buffer);
     uint8_t significant_coeff_group_flag[8][8] = {{0}};
     int explicit_rdpcm_flag = 0;
@@ -1113,25 +1065,25 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
         };
         int qp_y = lc->qp_y;
 
-        if (s->pps->transform_skip_enabled_flag &&
-            log2_trafo_size <= s->pps->log2_max_transform_skip_block_size) {
+        if (s->ps.pps->transform_skip_enabled_flag &&
+            log2_trafo_size <= s->ps.pps->log2_max_transform_skip_block_size) {
             transform_skip_flag = hevc_transform_skip_flag_decode(s, c_idx);
         }
 
         if (c_idx == 0) {
-            qp = qp_y + s->sps->qp_bd_offset;
+            qp = qp_y + s->ps.sps->qp_bd_offset;
         } else {
             int qp_i, offset;
 
             if (c_idx == 1)
-                offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset +
+                offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset +
                          lc->tu.cu_qp_offset_cb;
             else
-                offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset +
+                offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset +
                          lc->tu.cu_qp_offset_cr;
 
-            qp_i = av_clip(qp_y + offset, - s->sps->qp_bd_offset, 57);
-            if (s->sps->chroma_format_idc == 1) {
+            qp_i = av_clip(qp_y + offset, - s->ps.sps->qp_bd_offset, 57);
+            if (s->ps.sps->chroma_format_idc == 1) {
                 if (qp_i < 30)
                     qp = qp_i;
                 else if (qp_i > 43)
@@ -1145,18 +1097,18 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
                     qp = qp_i;
             }
 
-            qp += s->sps->qp_bd_offset;
+            qp += s->ps.sps->qp_bd_offset;
         }
 
-        shift    = s->sps->bit_depth + log2_trafo_size - 5;
+        shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
         add      = 1 << (shift-1);
         scale    = level_scale[rem6[qp]] << (div6[qp]);
         scale_m  = 16; // default when no custom scaling lists.
         dc_scale = 16;
 
-        if (s->sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) {
-            const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
-            &s->pps->scaling_list : &s->sps->scaling_list;
+        if (s->ps.sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) {
+            const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
+            &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 
             matrix_id = 3 * matrix_id + c_idx;
@@ -1172,7 +1124,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
         dc_scale     = 0;
     }
 
-    if (lc->cu.pred_mode == MODE_INTER && s->sps->explicit_rdpcm_enabled_flag &&
+    if (lc->cu.pred_mode == MODE_INTER && s->ps.sps->explicit_rdpcm_enabled_flag &&
         (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) {
         explicit_rdpcm_flag = explicit_rdpcm_flag_decode(s, c_idx);
         if (explicit_rdpcm_flag) {
@@ -1303,7 +1255,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
             };
             const uint8_t *ctx_idx_map_p;
             int scf_offset = 0;
-            if (s->sps->transform_skip_context_enabled_flag &&
+            if (s->ps.sps->transform_skip_context_enabled_flag &&
                 (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) {
                 ctx_idx_map_p = (uint8_t*) &ctx_idx_map[4 * 16];
                 if (c_idx == 0) {
@@ -1344,7 +1296,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
                 }
             }
             if (implicit_non_zero_coeff == 0) {
-                if (s->sps->transform_skip_context_enabled_flag &&
+                if (s->ps.sps->transform_skip_context_enabled_flag &&
                     (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) {
                     if (c_idx == 0) {
                         scf_offset = 42;
@@ -1389,7 +1341,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
             // initialize first elem of coeff_bas_level_greater1_flag
             int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
 
-            if (s->sps->persistent_rice_adaptation_enabled_flag) {
+            if (s->ps.sps->persistent_rice_adaptation_enabled_flag) {
                 if (!transform_skip_flag && !lc->cu.cu_transquant_bypass_flag)
                     sb_type = 2 * (c_idx == 0 ? 1 : 0);
                 else
@@ -1418,7 +1370,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
 
             if (lc->cu.cu_transquant_bypass_flag ||
                 (lc->cu.pred_mode ==  MODE_INTRA  &&
-                 s->sps->implicit_rdpcm_enabled_flag  &&  transform_skip_flag  &&
+                 s->ps.sps->implicit_rdpcm_enabled_flag  &&  transform_skip_flag  &&
                  (pred_mode_intra == 10 || pred_mode_intra  ==  26 )) ||
                  explicit_rdpcm_flag)
                 sign_hidden = 0;
@@ -1428,7 +1380,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
             if (first_greater1_coeff_idx != -1) {
                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
             }
-            if (!s->pps->sign_data_hiding_flag || !sign_hidden ) {
+            if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden ) {
                 coeff_sign_flag = coeff_sign_flag_decode(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
             } else {
                 coeff_sign_flag = coeff_sign_flag_decode(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
@@ -1444,8 +1396,8 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
 
                         trans_coeff_level += last_coeff_abs_level_remaining;
                         if (trans_coeff_level > (3 << c_rice_param))
-                            c_rice_param = s->sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4);
-                        if (s->sps->persistent_rice_adaptation_enabled_flag && !rice_init) {
+                            c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4);
+                        if (s->ps.sps->persistent_rice_adaptation_enabled_flag && !rice_init) {
                             int c_rice_p_init = lc->stat_coeff[sb_type] / 4;
                             if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init))
                                 lc->stat_coeff[sb_type]++;
@@ -1460,8 +1412,8 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
 
                     trans_coeff_level = 1 + last_coeff_abs_level_remaining;
                     if (trans_coeff_level > (3 << c_rice_param))
-                        c_rice_param = s->sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4);
-                    if (s->sps->persistent_rice_adaptation_enabled_flag && !rice_init) {
+                        c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4);
+                    if (s->ps.sps->persistent_rice_adaptation_enabled_flag && !rice_init) {
                         int c_rice_p_init = lc->stat_coeff[sb_type] / 4;
                         if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init))
                             lc->stat_coeff[sb_type]++;
@@ -1471,7 +1423,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
                         rice_init = 1;
                     }
                 }
-                if (s->pps->sign_data_hiding_flag && sign_hidden) {
+                if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
                     sum_abs += trans_coeff_level;
                     if (n == first_nz_pos_in_cg && (sum_abs&1))
                         trans_coeff_level = -trans_coeff_level;
@@ -1480,7 +1432,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
                     trans_coeff_level = -trans_coeff_level;
                 coeff_sign_flag <<= 1;
                 if(!lc->cu.cu_transquant_bypass_flag) {
-                    if (s->sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) {
+                    if (s->ps.sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) {
                         if(y_c || x_c || log2_trafo_size < 4) {
                             switch(log2_trafo_size) {
                                 case 3: pos = (y_c << 3) + x_c; break;
@@ -1508,15 +1460,15 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
     }
 
     if (lc->cu.cu_transquant_bypass_flag) {
-        if (explicit_rdpcm_flag || (s->sps->implicit_rdpcm_enabled_flag &&
+        if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag &&
                                     (pred_mode_intra == 10 || pred_mode_intra == 26))) {
-            int mode = s->sps->implicit_rdpcm_enabled_flag ? (pred_mode_intra == 26) : explicit_rdpcm_dir_flag;
+            int mode = s->ps.sps->implicit_rdpcm_enabled_flag ? (pred_mode_intra == 26) : explicit_rdpcm_dir_flag;
 
             s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode);
         }
     } else {
         if (transform_skip_flag) {
-            int rot = s->sps->transform_skip_rotation_enabled_flag &&
+            int rot = s->ps.sps->transform_skip_rotation_enabled_flag &&
                       log2_trafo_size == 2 &&
                       lc->cu.pred_mode == MODE_INTRA;
             if (rot) {
@@ -1526,7 +1478,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
 
             s->hevcdsp.transform_skip(coeffs, log2_trafo_size);
 
-            if (explicit_rdpcm_flag || (s->sps->implicit_rdpcm_enabled_flag &&
+            if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag &&
                                         lc->cu.pred_mode == MODE_INTRA &&
                                         (pred_mode_intra == 10 || pred_mode_intra == 26))) {
                 int mode = explicit_rdpcm_flag ? explicit_rdpcm_dir_flag : (pred_mode_intra == 26);
diff --git a/libavcodec/hevc_data.c b/libavcodec/hevc_data.c
new file mode 100644
index 00000000..f74f2725
--- /dev/null
+++ b/libavcodec/hevc_data.c
@@ -0,0 +1,75 @@
+/*
+ * HEVC shared tables
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "hevc.h"
+
+const uint8_t ff_hevc_diag_scan4x4_x[16] = {
+    0, 0, 1, 0,
+    1, 2, 0, 1,
+    2, 3, 1, 2,
+    3, 2, 3, 3,
+};
+
+const uint8_t ff_hevc_diag_scan4x4_y[16] = {
+    0, 1, 0, 2,
+    1, 0, 3, 2,
+    1, 0, 3, 2,
+    1, 3, 2, 3,
+};
+
+const uint8_t ff_hevc_diag_scan8x8_x[64] = {
+    0, 0, 1, 0,
+    1, 2, 0, 1,
+    2, 3, 0, 1,
+    2, 3, 4, 0,
+    1, 2, 3, 4,
+    5, 0, 1, 2,
+    3, 4, 5, 6,
+    0, 1, 2, 3,
+    4, 5, 6, 7,
+    1, 2, 3, 4,
+    5, 6, 7, 2,
+    3, 4, 5, 6,
+    7, 3, 4, 5,
+    6, 7, 4, 5,
+    6, 7, 5, 6,
+    7, 6, 7, 7,
+};
+
+const uint8_t ff_hevc_diag_scan8x8_y[64] = {
+    0, 1, 0, 2,
+    1, 0, 3, 2,
+    1, 0, 4, 3,
+    2, 1, 0, 5,
+    4, 3, 2, 1,
+    0, 6, 5, 4,
+    3, 2, 1, 0,
+    7, 6, 5, 4,
+    3, 2, 1, 0,
+    7, 6, 5, 4,
+    3, 2, 1, 7,
+    6, 5, 4, 3,
+    2, 7, 6, 5,
+    4, 3, 7, 6,
+    5, 4, 7, 6,
+    5, 7, 6, 7,
+};
diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c
index f50a640e..1f33b0cd 100644
--- a/libavcodec/hevc_filter.c
+++ b/libavcodec/hevc_filter.c
@@ -56,12 +56,12 @@ static int chroma_tc(HEVCContext *s, int qp_y, int c_idx, int tc_offset)
 
     // slice qp offset is not used for deblocking
     if (c_idx == 1)
-        offset = s->pps->cb_qp_offset;
+        offset = s->ps.pps->cb_qp_offset;
     else
-        offset = s->pps->cr_qp_offset;
+        offset = s->ps.pps->cr_qp_offset;
 
     qp_i = av_clip(qp_y + offset, 0, 57);
-    if (s->sps->chroma_format_idc == 1) {
+    if (s->ps.sps->chroma_format_idc == 1) {
         if (qp_i < 30)
             qp = qp_i;
         else if (qp_i > 43)
@@ -79,14 +79,14 @@ static int chroma_tc(HEVCContext *s, int qp_y, int c_idx, int tc_offset)
 static int get_qPy_pred(HEVCContext *s, int xBase, int yBase, int log2_cb_size)
 {
     HEVCLocalContext *lc     = s->HEVClc;
-    int ctb_size_mask        = (1 << s->sps->log2_ctb_size) - 1;
-    int MinCuQpDeltaSizeMask = (1 << (s->sps->log2_ctb_size -
-                                      s->pps->diff_cu_qp_delta_depth)) - 1;
+    int ctb_size_mask        = (1 << s->ps.sps->log2_ctb_size) - 1;
+    int MinCuQpDeltaSizeMask = (1 << (s->ps.sps->log2_ctb_size -
+                                      s->ps.pps->diff_cu_qp_delta_depth)) - 1;
     int xQgBase              = xBase - (xBase & MinCuQpDeltaSizeMask);
     int yQgBase              = yBase - (yBase & MinCuQpDeltaSizeMask);
-    int min_cb_width         = s->sps->min_cb_width;
-    int x_cb                 = xQgBase >> s->sps->log2_min_cb_size;
-    int y_cb                 = yQgBase >> s->sps->log2_min_cb_size;
+    int min_cb_width         = s->ps.sps->min_cb_width;
+    int x_cb                 = xQgBase >> s->ps.sps->log2_min_cb_size;
+    int y_cb                 = yQgBase >> s->ps.sps->log2_min_cb_size;
     int availableA           = (xBase   & ctb_size_mask) &&
                                (xQgBase & ctb_size_mask);
     int availableB           = (yBase   & ctb_size_mask) &&
@@ -113,8 +113,8 @@ static int get_qPy_pred(HEVCContext *s, int xBase, int yBase, int log2_cb_size)
     else
         qPy_b = s->qp_y_tab[x_cb + (y_cb - 1) * min_cb_width];
 
-    av_assert2(qPy_a >= -s->sps->qp_bd_offset && qPy_a < 52);
-    av_assert2(qPy_b >= -s->sps->qp_bd_offset && qPy_b < 52);
+    av_assert2(qPy_a >= -s->ps.sps->qp_bd_offset && qPy_a < 52);
+    av_assert2(qPy_b >= -s->ps.sps->qp_bd_offset && qPy_b < 52);
 
     return (qPy_a + qPy_b + 1) >> 1;
 }
@@ -124,7 +124,7 @@ void ff_hevc_set_qPy(HEVCContext *s, int xBase, int yBase, int log2_cb_size)
     int qp_y = get_qPy_pred(s, xBase, yBase, log2_cb_size);
 
     if (s->HEVClc->tu.cu_qp_delta != 0) {
-        int off = s->sps->qp_bd_offset;
+        int off = s->ps.sps->qp_bd_offset;
         s->HEVClc->qp_y = FFUMOD(qp_y + s->HEVClc->tu.cu_qp_delta + 52 + 2 * off,
                                  52 + off) - off;
     } else
@@ -133,10 +133,10 @@ void ff_hevc_set_qPy(HEVCContext *s, int xBase, int yBase, int log2_cb_size)
 
 static int get_qPy(HEVCContext *s, int xC, int yC)
 {
-    int log2_min_cb_size  = s->sps->log2_min_cb_size;
+    int log2_min_cb_size  = s->ps.sps->log2_min_cb_size;
     int x                 = xC >> log2_min_cb_size;
     int y                 = yC >> log2_min_cb_size;
-    return s->qp_y_tab[x + y * s->sps->min_cb_width];
+    return s->qp_y_tab[x + y * s->ps.sps->min_cb_width];
 }
 
 static void copy_CTB(uint8_t *dst, const uint8_t *src, int width, int height,
@@ -193,9 +193,9 @@ static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src,
                            int stride_src, int x, int y, int width, int height,
                            int c_idx, int x_ctb, int y_ctb)
 {
-    int sh = s->sps->pixel_shift;
-    int w = s->sps->width >> s->sps->hshift[c_idx];
-    int h = s->sps->height >> s->sps->vshift[c_idx];
+    int sh = s->ps.sps->pixel_shift;
+    int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx];
+    int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx];
 
     /* copy horizontal edges */
     memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << sh),
@@ -214,23 +214,23 @@ static void restore_tqb_pixels(HEVCContext *s,
                                ptrdiff_t stride_src, ptrdiff_t stride_dst,
                                int x0, int y0, int width, int height, int c_idx)
 {
-    if ( s->pps->transquant_bypass_enable_flag ||
-            (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) {
+    if ( s->ps.pps->transquant_bypass_enable_flag ||
+            (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) {
         int x, y;
-        int min_pu_size  = 1 << s->sps->log2_min_pu_size;
-        int hshift       = s->sps->hshift[c_idx];
-        int vshift       = s->sps->vshift[c_idx];
-        int x_min        = ((x0         ) >> s->sps->log2_min_pu_size);
-        int y_min        = ((y0         ) >> s->sps->log2_min_pu_size);
-        int x_max        = ((x0 + width ) >> s->sps->log2_min_pu_size);
-        int y_max        = ((y0 + height) >> s->sps->log2_min_pu_size);
-        int len          = (min_pu_size >> hshift) << s->sps->pixel_shift;
+        int min_pu_size  = 1 << s->ps.sps->log2_min_pu_size;
+        int hshift       = s->ps.sps->hshift[c_idx];
+        int vshift       = s->ps.sps->vshift[c_idx];
+        int x_min        = ((x0         ) >> s->ps.sps->log2_min_pu_size);
+        int y_min        = ((y0         ) >> s->ps.sps->log2_min_pu_size);
+        int x_max        = ((x0 + width ) >> s->ps.sps->log2_min_pu_size);
+        int y_max        = ((y0 + height) >> s->ps.sps->log2_min_pu_size);
+        int len          = (min_pu_size >> hshift) << s->ps.sps->pixel_shift;
         for (y = y_min; y < y_max; y++) {
             for (x = x_min; x < x_max; x++) {
-                if (s->is_pcm[y * s->sps->min_pu_width + x]) {
+                if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
                     int n;
-                    uint8_t *src = src1 + (((y << s->sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->sps->log2_min_pu_size) - x0) >> hshift) << s->sps->pixel_shift);
-                    const uint8_t *dst = dst1 + (((y << s->sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->sps->log2_min_pu_size) - x0) >> hshift) << s->sps->pixel_shift);
+                    uint8_t *src = src1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift);
+                    const uint8_t *dst = dst1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift);
                     for (n = 0; n < (min_pu_size >> vshift); n++) {
                         memcpy(src, dst, len);
                         src += stride_src;
@@ -242,7 +242,7 @@ static void restore_tqb_pixels(HEVCContext *s,
     }
 }
 
-#define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
+#define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 
 static void sao_filter_CTB(HEVCContext *s, int x, int y)
 {
@@ -250,18 +250,18 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
     HEVCLocalContext *lc = s->HEVClc;
     int c_idx;
     int edges[4];  // 0 left 1 top 2 right 3 bottom
-    int x_ctb                = x >> s->sps->log2_ctb_size;
-    int y_ctb                = y >> s->sps->log2_ctb_size;
-    int ctb_addr_rs          = y_ctb * s->sps->ctb_width + x_ctb;
-    int ctb_addr_ts          = s->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
+    int x_ctb                = x >> s->ps.sps->log2_ctb_size;
+    int y_ctb                = y >> s->ps.sps->log2_ctb_size;
+    int ctb_addr_rs          = y_ctb * s->ps.sps->ctb_width + x_ctb;
+    int ctb_addr_ts          = s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
     SAOParams *sao           = &CTB(s->sao, x_ctb, y_ctb);
     // flags indicating unfilterable edges
     uint8_t vert_edge[]      = { 0, 0 };
     uint8_t horiz_edge[]     = { 0, 0 };
     uint8_t diag_edge[]      = { 0, 0, 0, 0 };
     uint8_t lfase            = CTB(s->filter_slice_edges, x_ctb, y_ctb);
-    uint8_t no_tile_filter   = s->pps->tiles_enabled_flag &&
-                               !s->pps->loop_filter_across_tiles_enabled_flag;
+    uint8_t no_tile_filter   = s->ps.pps->tiles_enabled_flag &&
+                               !s->ps.pps->loop_filter_across_tiles_enabled_flag;
     uint8_t restore          = no_tile_filter || !lfase;
     uint8_t left_tile_edge   = 0;
     uint8_t right_tile_edge  = 0;
@@ -270,24 +270,24 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
 
     edges[0]   = x_ctb == 0;
     edges[1]   = y_ctb == 0;
-    edges[2]   = x_ctb == s->sps->ctb_width  - 1;
-    edges[3]   = y_ctb == s->sps->ctb_height - 1;
+    edges[2]   = x_ctb == s->ps.sps->ctb_width  - 1;
+    edges[3]   = y_ctb == s->ps.sps->ctb_height - 1;
 
     if (restore) {
         if (!edges[0]) {
-            left_tile_edge  = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
+            left_tile_edge  = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
             vert_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb)) || left_tile_edge;
         }
         if (!edges[2]) {
-            right_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1]];
+            right_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1]];
             vert_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb)) || right_tile_edge;
         }
         if (!edges[1]) {
-            up_tile_edge     = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
+            up_tile_edge     = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]];
             horiz_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) || up_tile_edge;
         }
         if (!edges[3]) {
-            bottom_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs + s->sps->ctb_width]];
+            bottom_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs + s->ps.sps->ctb_width]];
             horiz_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb + 1)) || bottom_tile_edge;
         }
         if (!edges[0] && !edges[1]) {
@@ -304,16 +304,16 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
         }
     }
 
-    for (c_idx = 0; c_idx < (s->sps->chroma_format_idc ? 3 : 1); c_idx++) {
-        int x0       = x >> s->sps->hshift[c_idx];
-        int y0       = y >> s->sps->vshift[c_idx];
+    for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
+        int x0       = x >> s->ps.sps->hshift[c_idx];
+        int y0       = y >> s->ps.sps->vshift[c_idx];
         int stride_src = s->frame->linesize[c_idx];
-        int ctb_size_h = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx];
-        int ctb_size_v = (1 << (s->sps->log2_ctb_size)) >> s->sps->vshift[c_idx];
-        int width    = FFMIN(ctb_size_h, (s->sps->width  >> s->sps->hshift[c_idx]) - x0);
-        int height   = FFMIN(ctb_size_v, (s->sps->height >> s->sps->vshift[c_idx]) - y0);
+        int ctb_size_h = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->hshift[c_idx];
+        int ctb_size_v = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->vshift[c_idx];
+        int width    = FFMIN(ctb_size_h, (s->ps.sps->width  >> s->ps.sps->hshift[c_idx]) - x0);
+        int height   = FFMIN(ctb_size_v, (s->ps.sps->height >> s->ps.sps->vshift[c_idx]) - y0);
         int tab      = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
-        uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->sps->pixel_shift)];
+        uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->ps.sps->pixel_shift)];
         int stride_dst;
         uint8_t *dst;
 
@@ -321,11 +321,11 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
         case SAO_BAND:
             copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
                            x_ctb, y_ctb);
-            if (s->pps->transquant_bypass_enable_flag ||
-                (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) {
+            if (s->ps.pps->transquant_bypass_enable_flag ||
+                (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) {
             dst = lc->edge_emu_buffer;
             stride_dst = 2*MAX_PB_SIZE;
-            copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride_dst, stride_src);
+            copy_CTB(dst, src, width << s->ps.sps->pixel_shift, height, stride_dst, stride_src);
             s->hevcdsp.sao_band_filter[tab](src, dst, stride_src, stride_dst,
                                             sao->offset_val[c_idx], sao->band_position[c_idx],
                                             width, height);
@@ -340,17 +340,17 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
             break;
         case SAO_EDGE:
         {
-            int w = s->sps->width >> s->sps->hshift[c_idx];
-            int h = s->sps->height >> s->sps->vshift[c_idx];
+            int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx];
+            int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx];
             int left_edge = edges[0];
             int top_edge = edges[1];
             int right_edge = edges[2];
             int bottom_edge = edges[3];
-            int sh = s->sps->pixel_shift;
+            int sh = s->ps.sps->pixel_shift;
             int left_pixels, right_pixels;
 
-            stride_dst = 2*MAX_PB_SIZE + FF_INPUT_BUFFER_PADDING_SIZE;
-            dst = lc->edge_emu_buffer + stride_dst + FF_INPUT_BUFFER_PADDING_SIZE;
+            stride_dst = 2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE;
+            dst = lc->edge_emu_buffer + stride_dst + AV_INPUT_BUFFER_PADDING_SIZE;
 
             if (!top_edge) {
                 int left = 1 - left_edge;
@@ -455,7 +455,7 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y)
 
 static int get_pcm(HEVCContext *s, int x, int y)
 {
-    int log2_min_pu_size = s->sps->log2_min_pu_size;
+    int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
     int x_pu, y_pu;
 
     if (x < 0 || y < 0)
@@ -464,9 +464,9 @@ static int get_pcm(HEVCContext *s, int x, int y)
     x_pu = x >> log2_min_pu_size;
     y_pu = y >> log2_min_pu_size;
 
-    if (x_pu >= s->sps->min_pu_width || y_pu >= s->sps->min_pu_height)
+    if (x_pu >= s->ps.sps->min_pu_width || y_pu >= s->ps.sps->min_pu_height)
         return 2;
-    return s->is_pcm[y_pu * s->sps->min_pu_width + x_pu];
+    return s->is_pcm[y_pu * s->ps.sps->min_pu_width + x_pu];
 }
 
 #define TC_CALC(qp, bs)                                                 \
@@ -483,18 +483,18 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
     uint8_t no_p[2] = { 0 };
     uint8_t no_q[2] = { 0 };
 
-    int log2_ctb_size = s->sps->log2_ctb_size;
+    int log2_ctb_size = s->ps.sps->log2_ctb_size;
     int x_end, x_end2, y_end;
     int ctb_size        = 1 << log2_ctb_size;
     int ctb             = (x0 >> log2_ctb_size) +
-                          (y0 >> log2_ctb_size) * s->sps->ctb_width;
+                          (y0 >> log2_ctb_size) * s->ps.sps->ctb_width;
     int cur_tc_offset   = s->deblock[ctb].tc_offset;
     int cur_beta_offset = s->deblock[ctb].beta_offset;
     int left_tc_offset, left_beta_offset;
     int tc_offset, beta_offset;
-    int pcmf = (s->sps->pcm_enabled_flag &&
-                s->sps->pcm.loop_filter_disable_flag) ||
-               s->pps->transquant_bypass_enable_flag;
+    int pcmf = (s->ps.sps->pcm_enabled_flag &&
+                s->ps.sps->pcm.loop_filter_disable_flag) ||
+               s->ps.pps->transquant_bypass_enable_flag;
 
     if (x0) {
         left_tc_offset   = s->deblock[ctb - 1].tc_offset;
@@ -505,17 +505,17 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
     }
 
     x_end = x0 + ctb_size;
-    if (x_end > s->sps->width)
-        x_end = s->sps->width;
+    if (x_end > s->ps.sps->width)
+        x_end = s->ps.sps->width;
     y_end = y0 + ctb_size;
-    if (y_end > s->sps->height)
-        y_end = s->sps->height;
+    if (y_end > s->ps.sps->height)
+        y_end = s->ps.sps->height;
 
     tc_offset   = cur_tc_offset;
     beta_offset = cur_beta_offset;
 
     x_end2 = x_end;
-    if (x_end2 != s->sps->width)
+    if (x_end2 != s->ps.sps->width)
         x_end2 -= 8;
     for (y = y0; y < y_end; y += 8) {
         // vertical filtering luma
@@ -529,7 +529,7 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
 
                 tc[0]   = bs0 ? TC_CALC(qp, bs0) : 0;
                 tc[1]   = bs1 ? TC_CALC(qp, bs1) : 0;
-                src     = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->sps->pixel_shift)];
+                src     = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->ps.sps->pixel_shift)];
                 if (pcmf) {
                     no_p[0] = get_pcm(s, x - 1, y);
                     no_p[1] = get_pcm(s, x - 1, y + 4);
@@ -561,7 +561,7 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
                 beta = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
                 tc[0]   = bs0 ? TC_CALC(qp, bs0) : 0;
                 tc[1]   = bs1 ? TC_CALC(qp, bs1) : 0;
-                src     = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->sps->pixel_shift)];
+                src     = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->ps.sps->pixel_shift)];
                 if (pcmf) {
                     no_p[0] = get_pcm(s, x, y - 1);
                     no_p[1] = get_pcm(s, x + 4, y - 1);
@@ -578,10 +578,10 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
         }
     }
 
-    if (s->sps->chroma_format_idc) {
+    if (s->ps.sps->chroma_format_idc) {
         for (chroma = 1; chroma <= 2; chroma++) {
-            int h = 1 << s->sps->hshift[chroma];
-            int v = 1 << s->sps->vshift[chroma];
+            int h = 1 << s->ps.sps->hshift[chroma];
+            int v = 1 << s->ps.sps->vshift[chroma];
 
             // vertical filtering chroma
             for (y = y0; y < y_end; y += (8 * v)) {
@@ -595,7 +595,7 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
 
                         c_tc[0] = (bs0 == 2) ? chroma_tc(s, qp0, chroma, tc_offset) : 0;
                         c_tc[1] = (bs1 == 2) ? chroma_tc(s, qp1, chroma, tc_offset) : 0;
-                        src       = &s->frame->data[chroma][(y >> s->sps->vshift[chroma]) * s->frame->linesize[chroma] + ((x >> s->sps->hshift[chroma]) << s->sps->pixel_shift)];
+                        src       = &s->frame->data[chroma][(y >> s->ps.sps->vshift[chroma]) * s->frame->linesize[chroma] + ((x >> s->ps.sps->hshift[chroma]) << s->ps.sps->pixel_shift)];
                         if (pcmf) {
                             no_p[0] = get_pcm(s, x - 1, y);
                             no_p[1] = get_pcm(s, x - 1, y + (4 * v));
@@ -617,7 +617,7 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
                 // horizontal filtering chroma
                 tc_offset = x0 ? left_tc_offset : cur_tc_offset;
                 x_end2 = x_end;
-                if (x_end != s->sps->width)
+                if (x_end != s->ps.sps->width)
                     x_end2 = x_end - 8 * h;
                 for (x = x0 ? x0 - 8 * h : 0; x < x_end2; x += (8 * h)) {
                     const int bs0 = s->horizontal_bs[( x          + y * s->bs_width) >> 2];
@@ -628,7 +628,7 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
 
                         c_tc[0]   = bs0 == 2 ? chroma_tc(s, qp0, chroma, tc_offset)     : 0;
                         c_tc[1]   = bs1 == 2 ? chroma_tc(s, qp1, chroma, cur_tc_offset) : 0;
-                        src       = &s->frame->data[chroma][(y >> s->sps->vshift[1]) * s->frame->linesize[chroma] + ((x >> s->sps->hshift[1]) << s->sps->pixel_shift)];
+                        src       = &s->frame->data[chroma][(y >> s->ps.sps->vshift[1]) * s->frame->linesize[chroma] + ((x >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
                         if (pcmf) {
                             no_p[0] = get_pcm(s, x,           y - 1);
                             no_p[1] = get_pcm(s, x + (4 * h), y - 1);
@@ -717,10 +717,10 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0,
 {
     HEVCLocalContext *lc = s->HEVClc;
     MvField *tab_mvf     = s->ref->tab_mvf;
-    int log2_min_pu_size = s->sps->log2_min_pu_size;
-    int log2_min_tu_size = s->sps->log2_min_tb_size;
-    int min_pu_width     = s->sps->min_pu_width;
-    int min_tu_width     = s->sps->min_tb_width;
+    int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
+    int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
+    int min_pu_width     = s->ps.sps->min_pu_width;
+    int min_tu_width     = s->ps.sps->min_tb_width;
     int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width +
                            (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA;
     int boundary_upper, boundary_left;
@@ -730,10 +730,10 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0,
     if (boundary_upper &&
         ((!s->sh.slice_loop_filter_across_slices_enabled_flag &&
           lc->boundary_flags & BOUNDARY_UPPER_SLICE &&
-          (y0 % (1 << s->sps->log2_ctb_size)) == 0) ||
-         (!s->pps->loop_filter_across_tiles_enabled_flag &&
+          (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0) ||
+         (!s->ps.pps->loop_filter_across_tiles_enabled_flag &&
           lc->boundary_flags & BOUNDARY_UPPER_TILE &&
-          (y0 % (1 << s->sps->log2_ctb_size)) == 0)))
+          (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0)))
         boundary_upper = 0;
 
     if (boundary_upper) {
@@ -768,10 +768,10 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0,
     if (boundary_left &&
         ((!s->sh.slice_loop_filter_across_slices_enabled_flag &&
           lc->boundary_flags & BOUNDARY_LEFT_SLICE &&
-          (x0 % (1 << s->sps->log2_ctb_size)) == 0) ||
-         (!s->pps->loop_filter_across_tiles_enabled_flag &&
+          (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0) ||
+         (!s->ps.pps->loop_filter_across_tiles_enabled_flag &&
           lc->boundary_flags & BOUNDARY_LEFT_TILE &&
-          (x0 % (1 << s->sps->log2_ctb_size)) == 0)))
+          (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0)))
         boundary_left = 0;
 
     if (boundary_left) {
@@ -842,10 +842,11 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0,
 
 void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size)
 {
-    int x_end = x >= s->sps->width  - ctb_size;
-    deblocking_filter_CTB(s, x, y);
-    if (s->sps->sao_enabled) {
-        int y_end = y >= s->sps->height - ctb_size;
+    int x_end = x >= s->ps.sps->width  - ctb_size;
+    if (s->avctx->skip_loop_filter < AVDISCARD_ALL)
+        deblocking_filter_CTB(s, x, y);
+    if (s->ps.sps->sao_enabled) {
+        int y_end = y >= s->ps.sps->height - ctb_size;
         if (y && x)
             sao_filter_CTB(s, x - ctb_size, y - ctb_size);
         if (x && y_end)
@@ -866,8 +867,8 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size)
 
 void ff_hevc_hls_filters(HEVCContext *s, int x_ctb, int y_ctb, int ctb_size)
 {
-    int x_end = x_ctb >= s->sps->width  - ctb_size;
-    int y_end = y_ctb >= s->sps->height - ctb_size;
+    int x_end = x_ctb >= s->ps.sps->width  - ctb_size;
+    int y_end = y_ctb >= s->ps.sps->height - ctb_size;
     if (y_ctb && x_ctb)
         ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb - ctb_size, ctb_size);
     if (y_ctb && x_end)
diff --git a/libavcodec/hevc_mp4toannexb_bsf.c b/libavcodec/hevc_mp4toannexb_bsf.c
new file mode 100644
index 00000000..d6feb998
--- /dev/null
+++ b/libavcodec/hevc_mp4toannexb_bsf.c
@@ -0,0 +1,212 @@
+/*
+ * HEVC MP4 to Annex B byte stream format filter
+ * copyright (c) 2015 Anton Khirnov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "hevc.h"
+
+#define MIN_HEVCC_LENGTH 23
+
+typedef struct HEVCBSFContext {
+    uint8_t  length_size;
+    int      extradata_parsed;
+
+    int logged_nonmp4_warning;
+
+    /* When private_spspps is zero then spspps_buf points to global extradata
+       and bsf does replace a global extradata to own-allocated version (default
+       behaviour).
+       When private_spspps is non-zero the bsf uses a private version of spspps buf.
+       This mode necessary when bsf uses in decoder, else bsf has issues after
+       decoder re-initialization. Use the "private_spspps_buf" argument to
+       activate this mode.
+     */
+    int      private_spspps;
+    uint8_t *spspps_buf;
+    uint32_t spspps_size;
+} HEVCBSFContext;
+
+static int hevc_extradata_to_annexb(HEVCBSFContext* ctx, AVCodecContext *avctx)
+{
+    GetByteContext gb;
+    int length_size, num_arrays, i, j;
+    int ret = 0;
+
+    uint8_t *new_extradata = NULL;
+    size_t   new_extradata_size = 0;
+
+    bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
+
+    bytestream2_skip(&gb, 21);
+    length_size = (bytestream2_get_byte(&gb) & 3) + 1;
+    num_arrays  = bytestream2_get_byte(&gb);
+
+    for (i = 0; i < num_arrays; i++) {
+        int type = bytestream2_get_byte(&gb) & 0x3f;
+        int cnt  = bytestream2_get_be16(&gb);
+
+        if (!(type == NAL_VPS || type == NAL_SPS || type == NAL_PPS ||
+              type == NAL_SEI_PREFIX || type == NAL_SEI_SUFFIX)) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid NAL unit type in extradata: %d\n",
+                   type);
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        for (j = 0; j < cnt; j++) {
+            int nalu_len = bytestream2_get_be16(&gb);
+
+            if (4 + AV_INPUT_BUFFER_PADDING_SIZE + nalu_len > SIZE_MAX - new_extradata_size) {
+                ret = AVERROR_INVALIDDATA;
+                goto fail;
+            }
+            ret = av_reallocp(&new_extradata, new_extradata_size + nalu_len + 4 + AV_INPUT_BUFFER_PADDING_SIZE);
+            if (ret < 0)
+                goto fail;
+
+            AV_WB32(new_extradata + new_extradata_size, 1); // add the startcode
+            bytestream2_get_buffer(&gb, new_extradata + new_extradata_size + 4, nalu_len);
+            new_extradata_size += 4 + nalu_len;
+            memset(new_extradata + new_extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+        }
+    }
+
+    if (!ctx->private_spspps) {
+        av_freep(&avctx->extradata);
+        avctx->extradata      = new_extradata;
+        avctx->extradata_size = new_extradata_size;
+    }
+    ctx->spspps_buf  = new_extradata;
+    ctx->spspps_size = new_extradata_size;
+
+    if (!new_extradata_size)
+        av_log(avctx, AV_LOG_WARNING, "No parameter sets in the extradata\n");
+
+    return length_size;
+fail:
+    av_freep(&new_extradata);
+    return ret;
+}
+
+static int hevc_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
+                                   AVCodecContext *avctx, const char *args,
+                                   uint8_t **poutbuf, int *poutbuf_size,
+                                   const uint8_t *buf, int buf_size,
+                                   int keyframe)
+{
+    HEVCBSFContext *ctx = bsfc->priv_data;
+    GetByteContext gb;
+
+    uint8_t *out = NULL;
+    size_t   out_size = 0;
+    int got_irap = 0;
+    int i, ret = 0;
+
+    if (!ctx->extradata_parsed) {
+        if (avctx->extradata_size < MIN_HEVCC_LENGTH ||
+            AV_RB24(avctx->extradata) == 1           ||
+            AV_RB32(avctx->extradata) == 1) {
+            if (!ctx->logged_nonmp4_warning) {
+                av_log(avctx, AV_LOG_VERBOSE,
+                       "The input looks like it is Annex B already\n");
+                ctx->logged_nonmp4_warning = 1;
+            }
+            *poutbuf      = (uint8_t *)buf;
+            *poutbuf_size = buf_size;
+            return 0;
+        }
+        if (args && strstr(args, "private_spspps_buf"))
+            ctx->private_spspps = 1;
+
+        ret = hevc_extradata_to_annexb(ctx, avctx);
+        if (ret < 0)
+            return ret;
+        ctx->length_size      = ret;
+        ctx->extradata_parsed = 1;
+    }
+
+    *poutbuf_size = 0;
+    *poutbuf      = NULL;
+
+    bytestream2_init(&gb, buf, buf_size);
+
+    while (bytestream2_get_bytes_left(&gb)) {
+        uint32_t nalu_size = 0;
+        int      nalu_type;
+        int is_irap, add_extradata, extra_size;
+
+        for (i = 0; i < ctx->length_size; i++)
+            nalu_size = (nalu_size << 8) | bytestream2_get_byte(&gb);
+
+        nalu_type = (bytestream2_peek_byte(&gb) >> 1) & 0x3f;
+
+        /* prepend extradata to IRAP frames */
+        is_irap       = nalu_type >= 16 && nalu_type <= 23;
+        add_extradata = is_irap && !got_irap;
+        extra_size    = add_extradata * ctx->spspps_size;
+        got_irap     |= is_irap;
+
+        if (SIZE_MAX - out_size < 4             ||
+            SIZE_MAX - out_size - 4 < nalu_size ||
+            SIZE_MAX - out_size - 4 - nalu_size < extra_size) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+
+        ret = av_reallocp(&out, out_size + 4 + nalu_size + extra_size);
+        if (ret < 0)
+            goto fail;
+
+        if (add_extradata)
+            memcpy(out + out_size, ctx->spspps_buf, extra_size);
+        AV_WB32(out + out_size + extra_size, 1);
+        bytestream2_get_buffer(&gb, out + out_size + 4 + extra_size, nalu_size);
+        out_size += 4 + nalu_size + extra_size;
+    }
+
+    *poutbuf      = out;
+    *poutbuf_size = out_size;
+
+    return 1;
+
+fail:
+    av_freep(&out);
+    return ret;
+}
+
+static void hevc_mp4toannexb_close(AVBitStreamFilterContext *bsfc)
+{
+    HEVCBSFContext *ctx = bsfc->priv_data;
+    if (ctx->private_spspps)
+        av_freep(&ctx->spspps_buf);
+}
+
+AVBitStreamFilter ff_hevc_mp4toannexb_bsf = {
+    "hevc_mp4toannexb",
+    sizeof(HEVCBSFContext),
+    hevc_mp4toannexb_filter,
+    hevc_mp4toannexb_close,
+};
diff --git a/libavcodec/hevc_mvs.c b/libavcodec/hevc_mvs.c
index e504257c..00da575a 100644
--- a/libavcodec/hevc_mvs.c
+++ b/libavcodec/hevc_mvs.c
@@ -42,14 +42,14 @@ void ff_hevc_set_neighbour_available(HEVCContext *s, int x0, int y0,
                                      int nPbW, int nPbH)
 {
     HEVCLocalContext *lc = s->HEVClc;
-    int x0b = av_mod_uintp2(x0, s->sps->log2_ctb_size);
-    int y0b = av_mod_uintp2(y0, s->sps->log2_ctb_size);
+    int x0b = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
+    int y0b = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
 
     lc->na.cand_up       = (lc->ctb_up_flag   || y0b);
     lc->na.cand_left     = (lc->ctb_left_flag || x0b);
     lc->na.cand_up_left  = (!x0b && !y0b) ? lc->ctb_up_left_flag : lc->na.cand_left && lc->na.cand_up;
     lc->na.cand_up_right_sap =
-            ((x0b + nPbW) == (1 << s->sps->log2_ctb_size)) ?
+            ((x0b + nPbW) == (1 << s->ps.sps->log2_ctb_size)) ?
                     lc->ctb_up_right_flag && !y0b : lc->na.cand_up;
     lc->na.cand_up_right =
             lc->na.cand_up_right_sap
@@ -64,19 +64,19 @@ static av_always_inline int z_scan_block_avail(HEVCContext *s, int xCurr, int yC
                               int xN, int yN)
 {
 #define MIN_TB_ADDR_ZS(x, y)                                            \
-    s->pps->min_tb_addr_zs[(y) * (s->sps->tb_mask+2) + (x)]
+    s->ps.pps->min_tb_addr_zs[(y) * (s->ps.sps->tb_mask+2) + (x)]
 
-    int xCurr_ctb = xCurr >> s->sps->log2_ctb_size;
-    int yCurr_ctb = yCurr >> s->sps->log2_ctb_size;
-    int xN_ctb    = xN    >> s->sps->log2_ctb_size;
-    int yN_ctb    = yN    >> s->sps->log2_ctb_size;
+    int xCurr_ctb = xCurr >> s->ps.sps->log2_ctb_size;
+    int yCurr_ctb = yCurr >> s->ps.sps->log2_ctb_size;
+    int xN_ctb    = xN    >> s->ps.sps->log2_ctb_size;
+    int yN_ctb    = yN    >> s->ps.sps->log2_ctb_size;
     if( yN_ctb < yCurr_ctb || xN_ctb < xCurr_ctb )
         return 1;
     else {
-        int Curr = MIN_TB_ADDR_ZS((xCurr >> s->sps->log2_min_tb_size) & s->sps->tb_mask,
-                (yCurr >> s->sps->log2_min_tb_size) & s->sps->tb_mask);
-        int N    = MIN_TB_ADDR_ZS((xN >> s->sps->log2_min_tb_size) & s->sps->tb_mask,
-                (yN >> s->sps->log2_min_tb_size) & s->sps->tb_mask);
+        int Curr = MIN_TB_ADDR_ZS((xCurr >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask,
+                (yCurr >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask);
+        int N    = MIN_TB_ADDR_ZS((xN >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask,
+                (yN >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask);
         return N <= Curr;
     }
 }
@@ -84,7 +84,7 @@ static av_always_inline int z_scan_block_avail(HEVCContext *s, int xCurr, int yC
 //check if the two luma locations belong to the same mostion estimation region
 static av_always_inline int is_diff_mer(HEVCContext *s, int xN, int yN, int xP, int yP)
 {
-    uint8_t plevel = s->pps->log2_parallel_merge_level;
+    uint8_t plevel = s->ps.pps->log2_parallel_merge_level;
 
     return xN >> plevel == xP >> plevel &&
            yN >> plevel == yP >> plevel;
@@ -203,8 +203,8 @@ static int derive_temporal_colocated_mvs(HEVCContext *s, MvField temp_col,
     tab_mvf[(y) * min_pu_width + x]
 
 #define TAB_MVF_PU(v)                                                   \
-    TAB_MVF(((x ## v) >> s->sps->log2_min_pu_size),                     \
-            ((y ## v) >> s->sps->log2_min_pu_size))
+    TAB_MVF(((x ## v) >> s->ps.sps->log2_min_pu_size),                     \
+            ((y ## v) >> s->ps.sps->log2_min_pu_size))
 
 #define DERIVE_TEMPORAL_COLOCATED_MVS                                   \
     derive_temporal_colocated_mvs(s, temp_col,                          \
@@ -221,7 +221,7 @@ static int temporal_luma_motion_vector(HEVCContext *s, int x0, int y0,
     MvField *tab_mvf;
     MvField temp_col;
     int x, y, x_pu, y_pu;
-    int min_pu_width = s->sps->min_pu_width;
+    int min_pu_width = s->ps.sps->min_pu_width;
     int availableFlagLXCol = 0;
     int colPic;
 
@@ -240,15 +240,15 @@ static int temporal_luma_motion_vector(HEVCContext *s, int x0, int y0,
     y = y0 + nPbH;
 
     if (tab_mvf &&
-        (y0 >> s->sps->log2_ctb_size) == (y >> s->sps->log2_ctb_size) &&
-        y < s->sps->height &&
-        x < s->sps->width) {
+        (y0 >> s->ps.sps->log2_ctb_size) == (y >> s->ps.sps->log2_ctb_size) &&
+        y < s->ps.sps->height &&
+        x < s->ps.sps->width) {
         x                 &= ~15;
         y                 &= ~15;
         if (s->threads_type == FF_THREAD_FRAME)
             ff_thread_await_progress(&ref->tf, y, 0);
-        x_pu               = x >> s->sps->log2_min_pu_size;
-        y_pu               = y >> s->sps->log2_min_pu_size;
+        x_pu               = x >> s->ps.sps->log2_min_pu_size;
+        y_pu               = y >> s->ps.sps->log2_min_pu_size;
         temp_col           = TAB_MVF(x_pu, y_pu);
         availableFlagLXCol = DERIVE_TEMPORAL_COLOCATED_MVS;
     }
@@ -261,8 +261,8 @@ static int temporal_luma_motion_vector(HEVCContext *s, int x0, int y0,
         y                 &= ~15;
         if (s->threads_type == FF_THREAD_FRAME)
             ff_thread_await_progress(&ref->tf, y, 0);
-        x_pu               = x >> s->sps->log2_min_pu_size;
-        y_pu               = y >> s->sps->log2_min_pu_size;
+        x_pu               = x >> s->ps.sps->log2_min_pu_size;
+        y_pu               = y >> s->ps.sps->log2_min_pu_size;
         temp_col           = TAB_MVF(x_pu, y_pu);
         availableFlagLXCol = DERIVE_TEMPORAL_COLOCATED_MVS;
     }
@@ -292,7 +292,7 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
     RefPicList *refPicList = s->ref->refPicList;
     MvField *tab_mvf       = s->ref->tab_mvf;
 
-    const int min_pu_width = s->sps->min_pu_width;
+    const int min_pu_width = s->ps.sps->min_pu_width;
 
     const int cand_bottom_left = lc->na.cand_bottom_left;
     const int cand_left        = lc->na.cand_left;
@@ -365,7 +365,7 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
 
     // above right spatial merge candidate
     is_available_b0 = AVAILABLE(cand_up_right, B0) &&
-                      xB0 < s->sps->width &&
+                      xB0 < s->ps.sps->width &&
                       PRED_BLOCK_AVAILABLE(B0) &&
                       !is_diff_mer(s, xB0, yB0, x0, y0);
 
@@ -379,7 +379,7 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
 
     // left bottom spatial merge candidate
     is_available_a0 = AVAILABLE(cand_bottom_left, A0) &&
-                      yA0 < s->sps->height &&
+                      yA0 < s->ps.sps->height &&
                       PRED_BLOCK_AVAILABLE(A0) &&
                       !is_diff_mer(s, xA0, yA0, x0, y0);
 
@@ -486,7 +486,7 @@ void ff_hevc_luma_mv_merge_mode(HEVCContext *s, int x0, int y0, int nPbW,
     int nPbH2 = nPbH;
     HEVCLocalContext *lc = s->HEVClc;
 
-    if (s->pps->log2_parallel_merge_level > 2 && nCS == 8) {
+    if (s->ps.pps->log2_parallel_merge_level > 2 && nCS == 8) {
         singleMCLFlag = 1;
         x0            = lc->cu.x;
         y0            = lc->cu.y;
@@ -529,7 +529,7 @@ static int mv_mp_mode_mx(HEVCContext *s, int x, int y, int pred_flag_index,
                          Mv *mv, int ref_idx_curr, int ref_idx)
 {
     MvField *tab_mvf = s->ref->tab_mvf;
-    int min_pu_width = s->sps->min_pu_width;
+    int min_pu_width = s->ps.sps->min_pu_width;
 
     RefPicList *refPicList = s->ref->refPicList;
 
@@ -545,7 +545,7 @@ static int mv_mp_mode_mx_lt(HEVCContext *s, int x, int y, int pred_flag_index,
                             Mv *mv, int ref_idx_curr, int ref_idx)
 {
     MvField *tab_mvf = s->ref->tab_mvf;
-    int min_pu_width = s->sps->min_pu_width;
+    int min_pu_width = s->ps.sps->min_pu_width;
 
     RefPicList *refPicList = s->ref->refPicList;
 
@@ -568,14 +568,14 @@ static int mv_mp_mode_mx_lt(HEVCContext *s, int x, int y, int pred_flag_index,
 
 #define MP_MX(v, pred, mx)                                      \
     mv_mp_mode_mx(s,                                            \
-                  (x ## v) >> s->sps->log2_min_pu_size,         \
-                  (y ## v) >> s->sps->log2_min_pu_size,         \
+                  (x ## v) >> s->ps.sps->log2_min_pu_size,         \
+                  (y ## v) >> s->ps.sps->log2_min_pu_size,         \
                   pred, &mx, ref_idx_curr, ref_idx)
 
 #define MP_MX_LT(v, pred, mx)                                   \
     mv_mp_mode_mx_lt(s,                                         \
-                     (x ## v) >> s->sps->log2_min_pu_size,      \
-                     (y ## v) >> s->sps->log2_min_pu_size,      \
+                     (x ## v) >> s->ps.sps->log2_min_pu_size,      \
+                     (y ## v) >> s->ps.sps->log2_min_pu_size,      \
                      pred, &mx, ref_idx_curr, ref_idx)
 
 void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
@@ -589,7 +589,7 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
     int availableFlagLXA0 = 1;
     int availableFlagLXB0 = 1;
     int numMVPCandLX = 0;
-    int min_pu_width = s->sps->min_pu_width;
+    int min_pu_width = s->ps.sps->min_pu_width;
 
     int xA0, yA0;
     int is_available_a0;
@@ -625,7 +625,7 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
     yA0 = y0 + nPbH;
 
     is_available_a0 = AVAILABLE(cand_bottom_left, A0) &&
-                      yA0 < s->sps->height &&
+                      yA0 < s->ps.sps->height &&
                       PRED_BLOCK_AVAILABLE(A0);
 
     //left spatial merge candidate
@@ -680,7 +680,7 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
     yB0    = y0 - 1;
 
     is_available_b0 =  AVAILABLE(cand_up_right, B0) &&
-                       xB0 < s->sps->width &&
+                       xB0 < s->ps.sps->width &&
                        PRED_BLOCK_AVAILABLE(B0);
 
     // above spatial merge candidate
diff --git a/libavcodec/hevc_parse.c b/libavcodec/hevc_parse.c
new file mode 100644
index 00000000..d557cc7f
--- /dev/null
+++ b/libavcodec/hevc_parse.c
@@ -0,0 +1,297 @@
+/*
+ * HEVC common code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "config.h"
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+
+#include "hevc.h"
+
+/* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
+ * between these functions would be nice. */
+int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
+                         HEVCNAL *nal)
+{
+    int i, si, di;
+    uint8_t *dst;
+
+    if (s)
+        nal->skipped_bytes = 0;
+#define STARTCODE_TEST                                                  \
+        if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
+            if (src[i + 2] != 3) {                                      \
+                /* startcode, so we must be past the end */             \
+                length = i;                                             \
+            }                                                           \
+            break;                                                      \
+        }
+#if HAVE_FAST_UNALIGNED
+#define FIND_FIRST_ZERO                                                 \
+        if (i > 0 && !src[i])                                           \
+            i--;                                                        \
+        while (src[i])                                                  \
+            i++
+#if HAVE_FAST_64BIT
+    for (i = 0; i + 1 < length; i += 9) {
+        if (!((~AV_RN64A(src + i) &
+               (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
+              0x8000800080008080ULL))
+            continue;
+        FIND_FIRST_ZERO;
+        STARTCODE_TEST;
+        i -= 7;
+    }
+#else
+    for (i = 0; i + 1 < length; i += 5) {
+        if (!((~AV_RN32A(src + i) &
+               (AV_RN32A(src + i) - 0x01000101U)) &
+              0x80008080U))
+            continue;
+        FIND_FIRST_ZERO;
+        STARTCODE_TEST;
+        i -= 3;
+    }
+#endif /* HAVE_FAST_64BIT */
+#else
+    for (i = 0; i + 1 < length; i += 2) {
+        if (src[i])
+            continue;
+        if (i > 0 && src[i - 1] == 0)
+            i--;
+        STARTCODE_TEST;
+    }
+#endif /* HAVE_FAST_UNALIGNED */
+
+    if (i >= length - 1) { // no escaped 0
+        nal->data     =
+        nal->raw_data = src;
+        nal->size     =
+        nal->raw_size = length;
+        return length;
+    }
+
+    av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
+                   length + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!nal->rbsp_buffer)
+        return AVERROR(ENOMEM);
+
+    dst = nal->rbsp_buffer;
+
+    memcpy(dst, src, i);
+    si = di = i;
+    while (si + 2 < length) {
+        // remove escapes (very rare 1:2^22)
+        if (src[si + 2] > 3) {
+            dst[di++] = src[si++];
+            dst[di++] = src[si++];
+        } else if (src[si] == 0 && src[si + 1] == 0) {
+            if (src[si + 2] == 3) { // escape
+                dst[di++] = 0;
+                dst[di++] = 0;
+                si       += 3;
+
+                if (s && nal->skipped_bytes_pos) {
+                    nal->skipped_bytes++;
+                    if (nal->skipped_bytes_pos_size < nal->skipped_bytes) {
+                        nal->skipped_bytes_pos_size *= 2;
+                        av_assert0(nal->skipped_bytes_pos_size >= nal->skipped_bytes);
+                        av_reallocp_array(&nal->skipped_bytes_pos,
+                                nal->skipped_bytes_pos_size,
+                                sizeof(*nal->skipped_bytes_pos));
+                        if (!nal->skipped_bytes_pos) {
+                            nal->skipped_bytes_pos_size = 0;
+                            return AVERROR(ENOMEM);
+                        }
+                    }
+                    if (nal->skipped_bytes_pos)
+                        nal->skipped_bytes_pos[nal->skipped_bytes-1] = di - 1;
+                }
+                continue;
+            } else // next start code
+                goto nsc;
+        }
+
+        dst[di++] = src[si++];
+    }
+    while (si < length)
+        dst[di++] = src[si++];
+
+nsc:
+    memset(dst + di, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    nal->data = dst;
+    nal->size = di;
+    nal->raw_data = src;
+    nal->raw_size = si;
+    return si;
+}
+
+static const char *nal_unit_name(int nal_type)
+{
+    switch(nal_type) {
+    case NAL_TRAIL_N    : return "TRAIL_N";
+    case NAL_TRAIL_R    : return "TRAIL_R";
+    case NAL_TSA_N      : return "TSA_N";
+    case NAL_TSA_R      : return "TSA_R";
+    case NAL_STSA_N     : return "STSA_N";
+    case NAL_STSA_R     : return "STSA_R";
+    case NAL_RADL_N     : return "RADL_N";
+    case NAL_RADL_R     : return "RADL_R";
+    case NAL_RASL_N     : return "RASL_N";
+    case NAL_RASL_R     : return "RASL_R";
+    case NAL_BLA_W_LP   : return "BLA_W_LP";
+    case NAL_BLA_W_RADL : return "BLA_W_RADL";
+    case NAL_BLA_N_LP   : return "BLA_N_LP";
+    case NAL_IDR_W_RADL : return "IDR_W_RADL";
+    case NAL_IDR_N_LP   : return "IDR_N_LP";
+    case NAL_CRA_NUT    : return "CRA_NUT";
+    case NAL_VPS        : return "VPS";
+    case NAL_SPS        : return "SPS";
+    case NAL_PPS        : return "PPS";
+    case NAL_AUD        : return "AUD";
+    case NAL_EOS_NUT    : return "EOS_NUT";
+    case NAL_EOB_NUT    : return "EOB_NUT";
+    case NAL_FD_NUT     : return "FD_NUT";
+    case NAL_SEI_PREFIX : return "SEI_PREFIX";
+    case NAL_SEI_SUFFIX : return "SEI_SUFFIX";
+    default : return "?";
+    }
+}
+
+/**
+ * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
+ * 0 if the unit should be skipped, 1 otherwise
+ */
+static int hls_nal_unit(HEVCNAL *nal, AVCodecContext *avctx)
+{
+    GetBitContext *gb = &nal->gb;
+    int nuh_layer_id;
+
+    if (get_bits1(gb) != 0)
+        return AVERROR_INVALIDDATA;
+
+    nal->type = get_bits(gb, 6);
+
+    nuh_layer_id   = get_bits(gb, 6);
+    nal->temporal_id = get_bits(gb, 3) - 1;
+    if (nal->temporal_id < 0)
+        return AVERROR_INVALIDDATA;
+
+    av_log(avctx, AV_LOG_DEBUG,
+           "nal_unit_type: %d(%s), nuh_layer_id: %d, temporal_id: %d\n",
+           nal->type, nal_unit_name(nal->type), nuh_layer_id, nal->temporal_id);
+
+    return nuh_layer_id == 0;
+}
+
+
+int ff_hevc_split_packet(HEVCContext *s, HEVCPacket *pkt, const uint8_t *buf, int length,
+                         AVCodecContext *avctx, int is_nalff, int nal_length_size)
+{
+    int consumed, ret = 0;
+
+    pkt->nb_nals = 0;
+    while (length >= 4) {
+        HEVCNAL *nal;
+        int extract_length = 0;
+
+        if (is_nalff) {
+            int i;
+            for (i = 0; i < nal_length_size; i++)
+                extract_length = (extract_length << 8) | buf[i];
+            buf    += nal_length_size;
+            length -= nal_length_size;
+
+            if (extract_length > length) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
+                return AVERROR_INVALIDDATA;
+            }
+        } else {
+            /* search start code */
+            while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
+                ++buf;
+                --length;
+                if (length < 4) {
+                    if (pkt->nb_nals > 0) {
+                        // No more start codes: we discarded some irrelevant
+                        // bytes at the end of the packet.
+                        return 0;
+                    } else {
+                        av_log(avctx, AV_LOG_ERROR, "No start code is found.\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                }
+            }
+
+            buf           += 3;
+            length        -= 3;
+            extract_length = length;
+        }
+
+        if (pkt->nals_allocated < pkt->nb_nals + 1) {
+            int new_size = pkt->nals_allocated + 1;
+            void *tmp = av_realloc_array(pkt->nals, new_size, sizeof(*pkt->nals));
+
+            if (!tmp)
+                return AVERROR(ENOMEM);
+
+            pkt->nals = tmp;
+            memset(pkt->nals + pkt->nals_allocated, 0,
+                   (new_size - pkt->nals_allocated) * sizeof(*pkt->nals));
+
+            nal = &pkt->nals[pkt->nb_nals];
+            nal->skipped_bytes_pos_size = 1024; // initial buffer size
+            nal->skipped_bytes_pos = av_malloc_array(nal->skipped_bytes_pos_size, sizeof(*nal->skipped_bytes_pos));
+            if (!nal->skipped_bytes_pos)
+                return AVERROR(ENOMEM);
+
+            pkt->nals_allocated = new_size;
+        }
+        nal = &pkt->nals[pkt->nb_nals];
+
+        consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
+        if (consumed < 0)
+            return consumed;
+
+        pkt->nb_nals++;
+
+        ret = init_get_bits8(&nal->gb, nal->data, nal->size);
+        if (ret < 0)
+            return ret;
+
+        ret = hls_nal_unit(nal, avctx);
+        if (ret <= 0) {
+            if (ret < 0) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
+                       nal->type);
+            }
+            pkt->nb_nals--;
+        }
+
+        buf    += consumed;
+        length -= consumed;
+    }
+
+    return 0;
+}
+
diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c
index 31452f20..4625e614 100644
--- a/libavcodec/hevc_parser.c
+++ b/libavcodec/hevc_parser.c
@@ -22,16 +22,112 @@
 
 #include "libavutil/common.h"
 
-#include "parser.h"
-#include "hevc.h"
 #include "golomb.h"
+#include "hevc.h"
+#include "parser.h"
 
 #define START_CODE 0x000001 ///< start_code_prefix_one_3bytes
 
-typedef struct HEVCParseContext {
-    HEVCContext  h;
+#define IS_IRAP_NAL(nal) (nal->type >= 16 && nal->type <= 23)
+
+#define ADVANCED_PARSER CONFIG_HEVC_DECODER
+
+typedef struct HEVCParserContext {
     ParseContext pc;
-} HEVCParseContext;
+
+    HEVCPacket pkt;
+    HEVCParamSets ps;
+
+    int parsed_extradata;
+
+#if ADVANCED_PARSER
+    HEVCContext h;
+#endif
+} HEVCParserContext;
+
+#if !ADVANCED_PARSER
+static int hevc_parse_slice_header(AVCodecParserContext *s, HEVCNAL *nal,
+                                   AVCodecContext *avctx)
+{
+    HEVCParserContext *ctx = s->priv_data;
+    GetBitContext *gb = &nal->gb;
+
+    HEVCPPS *pps;
+    HEVCSPS *sps;
+    unsigned int pps_id;
+
+    get_bits1(gb);          // first slice in pic
+    if (IS_IRAP_NAL(nal))
+        get_bits1(gb);      // no output of prior pics
+
+    pps_id = get_ue_golomb_long(gb);
+    if (pps_id >= MAX_PPS_COUNT || !ctx->ps.pps_list[pps_id]) {
+        av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
+        return AVERROR_INVALIDDATA;
+    }
+    pps = (HEVCPPS*)ctx->ps.pps_list[pps_id]->data;
+    sps = (HEVCSPS*)ctx->ps.sps_list[pps->sps_id]->data;
+
+    /* export the stream parameters */
+    s->coded_width  = sps->width;
+    s->coded_height = sps->height;
+    s->width        = sps->output_width;
+    s->height       = sps->output_height;
+    s->format       = sps->pix_fmt;
+    avctx->profile  = sps->ptl.general_ptl.profile_idc;
+    avctx->level    = sps->ptl.general_ptl.level_idc;
+
+    /* ignore the rest for now*/
+
+    return 0;
+}
+
+static int parse_nal_units(AVCodecParserContext *s, const uint8_t *buf,
+                           int buf_size, AVCodecContext *avctx)
+{
+    HEVCParserContext *ctx = s->priv_data;
+    int ret, i;
+
+    ret = ff_hevc_split_packet(NULL, &ctx->pkt, buf, buf_size, avctx, 0, 0);
+    if (ret < 0)
+        return ret;
+
+    for (i = 0; i < ctx->pkt.nb_nals; i++) {
+        HEVCNAL *nal = &ctx->pkt.nals[i];
+
+        /* ignore everything except parameter sets and VCL NALUs */
+        switch (nal->type) {
+        case NAL_VPS: ff_hevc_decode_nal_vps(&nal->gb, avctx, &ctx->ps);    break;
+        case NAL_SPS: ff_hevc_decode_nal_sps(&nal->gb, avctx, &ctx->ps, 1); break;
+        case NAL_PPS: ff_hevc_decode_nal_pps(&nal->gb, avctx, &ctx->ps);    break;
+        case NAL_TRAIL_R:
+        case NAL_TRAIL_N:
+        case NAL_TSA_N:
+        case NAL_TSA_R:
+        case NAL_STSA_N:
+        case NAL_STSA_R:
+        case NAL_BLA_W_LP:
+        case NAL_BLA_W_RADL:
+        case NAL_BLA_N_LP:
+        case NAL_IDR_W_RADL:
+        case NAL_IDR_N_LP:
+        case NAL_CRA_NUT:
+        case NAL_RADL_N:
+        case NAL_RADL_R:
+        case NAL_RASL_N:
+        case NAL_RASL_R:
+            if (buf == avctx->extradata) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid NAL unit: %d\n", nal->type);
+                return AVERROR_INVALIDDATA;
+            }
+            hevc_parse_slice_header(s, nal, avctx);
+            break;
+        }
+    }
+
+    return 0;
+}
+#endif
 
 /**
  * Find the end of the current frame in the bitstream.
@@ -41,7 +137,7 @@ static int hevc_find_frame_end(AVCodecParserContext *s, const uint8_t *buf,
                                int buf_size)
 {
     int i;
-    ParseContext *pc = &((HEVCParseContext *)s->priv_data)->pc;
+    ParseContext *pc = s->priv_data;
 
     for (i = 0; i < buf_size; i++) {
         int nut;
@@ -76,6 +172,7 @@ static int hevc_find_frame_end(AVCodecParserContext *s, const uint8_t *buf,
     return END_NOT_FOUND;
 }
 
+#if ADVANCED_PARSER
 /**
  * Parse NAL units of found picture and decode some basic information.
  *
@@ -84,15 +181,26 @@ static int hevc_find_frame_end(AVCodecParserContext *s, const uint8_t *buf,
  * @param buf buffer with field/frame data.
  * @param buf_size size of the buffer.
  */
-static inline int parse_nal_units(AVCodecParserContext *s, AVCodecContext *avctx,
-                      const uint8_t *buf, int buf_size)
+static inline int parse_nal_units(AVCodecParserContext *s, const uint8_t *buf,
+                           int buf_size, AVCodecContext *avctx)
 {
-    HEVCContext   *h  = &((HEVCParseContext *)s->priv_data)->h;
-    GetBitContext *gb = &h->HEVClc->gb;
-    SliceHeader   *sh = &h->sh;
+    HEVCParserContext *ctx = s->priv_data;
+    HEVCContext       *h   = &ctx->h;
+    GetBitContext      *gb;
+    SliceHeader        *sh = &h->sh;
+    HEVCParamSets *ps = &h->ps;
+    HEVCPacket   *pkt = &ctx->pkt;
     const uint8_t *buf_end = buf + buf_size;
     int state = -1, i;
     HEVCNAL *nal;
+    int is_global = buf == avctx->extradata;
+
+    if (!h->HEVClc)
+        h->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
+    if (!h->HEVClc)
+        return AVERROR(ENOMEM);
+
+    gb = &h->HEVClc->gb;
 
     /* set some sane default values */
     s->pict_type         = AV_PICTURE_TYPE_I;
@@ -104,19 +212,20 @@ static inline int parse_nal_units(AVCodecParserContext *s, AVCodecContext *avctx
     if (!buf_size)
         return 0;
 
-    if (h->nals_allocated < 1) {
-        HEVCNAL *tmp = av_realloc_array(h->nals, 1, sizeof(*tmp));
+    if (pkt->nals_allocated < 1) {
+        HEVCNAL *tmp = av_realloc_array(pkt->nals, 1, sizeof(*tmp));
         if (!tmp)
             return AVERROR(ENOMEM);
-        h->nals = tmp;
-        memset(h->nals, 0, sizeof(*tmp));
-        h->nals_allocated = 1;
+        pkt->nals = tmp;
+        memset(pkt->nals, 0, sizeof(*tmp));
+        pkt->nals_allocated = 1;
     }
 
-    nal = &h->nals[0];
+    nal = &pkt->nals[0];
 
     for (;;) {
         int src_length, consumed;
+        int ret;
         buf = avpriv_find_start_code(buf, buf_end, &state);
         if (--buf + 2 >= buf_end)
             break;
@@ -130,20 +239,23 @@ static inline int parse_nal_units(AVCodecParserContext *s, AVCodecContext *avctx
                 src_length = 20;
         }
 
-        consumed = ff_hevc_extract_rbsp(h, buf, src_length, nal);
+        consumed = ff_hevc_extract_rbsp(NULL, buf, src_length, nal);
         if (consumed < 0)
             return consumed;
 
-        init_get_bits8(gb, nal->data + 2, nal->size);
+        ret = init_get_bits8(gb, nal->data + 2, nal->size);
+        if (ret < 0)
+            return ret;
+
         switch (h->nal_unit_type) {
         case NAL_VPS:
-            ff_hevc_decode_nal_vps(h);
+            ff_hevc_decode_nal_vps(gb, avctx, ps);
             break;
         case NAL_SPS:
-            ff_hevc_decode_nal_sps(h);
+            ff_hevc_decode_nal_sps(gb, avctx, ps, 1);
             break;
         case NAL_PPS:
-            ff_hevc_decode_nal_pps(h);
+            ff_hevc_decode_nal_pps(gb, avctx, ps);
             break;
         case NAL_SEI_PREFIX:
         case NAL_SEI_SUFFIX:
@@ -165,6 +277,12 @@ static inline int parse_nal_units(AVCodecParserContext *s, AVCodecContext *avctx
         case NAL_IDR_W_RADL:
         case NAL_IDR_N_LP:
         case NAL_CRA_NUT:
+
+            if (is_global) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid NAL unit: %d\n", h->nal_unit_type);
+                return AVERROR_INVALIDDATA;
+            }
+
             sh->first_slice_in_pic_flag = get_bits1(gb);
             s->picture_structure = h->picture_struct;
             s->field_order = h->picture_struct;
@@ -175,34 +293,34 @@ static inline int parse_nal_units(AVCodecParserContext *s, AVCodecContext *avctx
             }
 
             sh->pps_id = get_ue_golomb(gb);
-            if (sh->pps_id >= MAX_PPS_COUNT || !h->pps_list[sh->pps_id]) {
-                av_log(h->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
+            if (sh->pps_id >= MAX_PPS_COUNT || !ps->pps_list[sh->pps_id]) {
+                av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
                 return AVERROR_INVALIDDATA;
             }
-            h->pps = (HEVCPPS*)h->pps_list[sh->pps_id]->data;
+            ps->pps = (HEVCPPS*)ps->pps_list[sh->pps_id]->data;
 
-            if (h->pps->sps_id >= MAX_SPS_COUNT || !h->sps_list[h->pps->sps_id]) {
-                av_log(h->avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", h->pps->sps_id);
+            if (ps->pps->sps_id >= MAX_SPS_COUNT || !ps->sps_list[ps->pps->sps_id]) {
+                av_log(avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", ps->pps->sps_id);
                 return AVERROR_INVALIDDATA;
             }
-            if (h->sps != (HEVCSPS*)h->sps_list[h->pps->sps_id]->data) {
-                h->sps = (HEVCSPS*)h->sps_list[h->pps->sps_id]->data;
-                h->vps = (HEVCVPS*)h->vps_list[h->sps->vps_id]->data;
+            if (ps->sps != (HEVCSPS*)ps->sps_list[ps->pps->sps_id]->data) {
+                ps->sps = (HEVCSPS*)ps->sps_list[ps->pps->sps_id]->data;
+                ps->vps = (HEVCVPS*)ps->vps_list[ps->sps->vps_id]->data;
             }
 
             if (!sh->first_slice_in_pic_flag) {
                 int slice_address_length;
 
-                if (h->pps->dependent_slice_segments_enabled_flag)
+                if (ps->pps->dependent_slice_segments_enabled_flag)
                     sh->dependent_slice_segment_flag = get_bits1(gb);
                 else
                     sh->dependent_slice_segment_flag = 0;
 
-                slice_address_length = av_ceil_log2_c(h->sps->ctb_width *
-                                                      h->sps->ctb_height);
-                sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
-                if (sh->slice_segment_addr >= h->sps->ctb_width * h->sps->ctb_height) {
-                    av_log(h->avctx, AV_LOG_ERROR, "Invalid slice segment address: %u.\n",
+                slice_address_length = av_ceil_log2_c(ps->sps->ctb_width *
+                                                      ps->sps->ctb_height);
+                sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
+                if (sh->slice_segment_addr >= ps->sps->ctb_width * ps->sps->ctb_height) {
+                    av_log(avctx, AV_LOG_ERROR, "Invalid slice segment address: %u.\n",
                            sh->slice_segment_addr);
                     return AVERROR_INVALIDDATA;
                 }
@@ -212,13 +330,13 @@ static inline int parse_nal_units(AVCodecParserContext *s, AVCodecContext *avctx
             if (sh->dependent_slice_segment_flag)
                 break;
 
-            for (i = 0; i < h->pps->num_extra_slice_header_bits; i++)
+            for (i = 0; i < ps->pps->num_extra_slice_header_bits; i++)
                 skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
 
             sh->slice_type = get_ue_golomb(gb);
             if (!(sh->slice_type == I_SLICE || sh->slice_type == P_SLICE ||
                   sh->slice_type == B_SLICE)) {
-                av_log(h->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
+                av_log(avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
                        sh->slice_type);
                 return AVERROR_INVALIDDATA;
             }
@@ -226,14 +344,14 @@ static inline int parse_nal_units(AVCodecParserContext *s, AVCodecContext *avctx
                            sh->slice_type == P_SLICE ? AV_PICTURE_TYPE_P :
                                                        AV_PICTURE_TYPE_I;
 
-            if (h->pps->output_flag_present_flag)
+            if (ps->pps->output_flag_present_flag)
                 sh->pic_output_flag = get_bits1(gb);
 
-            if (h->sps->separate_colour_plane_flag)
+            if (ps->sps->separate_colour_plane_flag)
                 sh->colour_plane_id = get_bits(gb, 2);
 
             if (!IS_IDR(h)) {
-                sh->pic_order_cnt_lsb = get_bits(gb, h->sps->log2_max_poc_lsb);
+                sh->pic_order_cnt_lsb = get_bits(gb, ps->sps->log2_max_poc_lsb);
                 s->output_picture_number = h->poc = ff_hevc_compute_poc(h, sh->pic_order_cnt_lsb);
             } else
                 s->output_picture_number = h->poc = 0;
@@ -253,9 +371,11 @@ static inline int parse_nal_units(AVCodecParserContext *s, AVCodecContext *avctx
         buf += consumed;
     }
     /* didn't find a picture! */
-    av_log(h->avctx, AV_LOG_ERROR, "missing picture in access unit\n");
+    if (!is_global)
+        av_log(h->avctx, AV_LOG_ERROR, "missing picture in access unit\n");
     return -1;
 }
+#endif
 
 static int hevc_parse(AVCodecParserContext *s,
                       AVCodecContext *avctx,
@@ -263,7 +383,13 @@ static int hevc_parse(AVCodecParserContext *s,
                       const uint8_t *buf, int buf_size)
 {
     int next;
-    ParseContext *pc = &((HEVCParseContext *)s->priv_data)->pc;
+    HEVCParserContext *ctx = s->priv_data;
+    ParseContext *pc = &ctx->pc;
+
+    if (avctx->extradata && !ctx->parsed_extradata) {
+        parse_nal_units(s, avctx->extradata, avctx->extradata_size, avctx);
+        ctx->parsed_extradata = 1;
+    }
 
     if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
         next = buf_size;
@@ -276,7 +402,7 @@ static int hevc_parse(AVCodecParserContext *s,
         }
     }
 
-    parse_nal_units(s, avctx, buf, buf_size);
+    parse_nal_units(s, buf, buf_size, avctx);
 
     *poutbuf      = buf;
     *poutbuf_size = buf_size;
@@ -288,64 +414,77 @@ static int hevc_split(AVCodecContext *avctx, const uint8_t *buf, int buf_size)
 {
     const uint8_t *ptr = buf, *end = buf + buf_size;
     uint32_t state = -1;
-    int has_ps = 0, nut;
+    int has_vps = 0;
+    int has_sps = 0;
+    int has_pps = 0;
+    int nut;
 
     while (ptr < end) {
         ptr = avpriv_find_start_code(ptr, end, &state);
         if ((state >> 8) != START_CODE)
             break;
         nut = (state >> 1) & 0x3F;
-        if (nut >= NAL_VPS && nut <= NAL_PPS)
-            has_ps = 1;
-        else if (has_ps)
-            return ptr - 4 - buf;
-        else // no parameter set at the beginning of the stream
-            return 0;
+        if (nut == NAL_VPS)
+            has_vps = 1;
+        else if (nut == NAL_SPS)
+            has_sps = 1;
+        else if (nut == NAL_PPS)
+            has_pps = 1;
+        else if ((nut != NAL_SEI_PREFIX || has_pps) &&
+                  nut != NAL_AUD) {
+            if (has_vps && has_sps) {
+                while (ptr - 4 > buf && ptr[-5] == 0)
+                    ptr--;
+                return ptr - 4 - buf;
+            }
+        }
     }
     return 0;
 }
 
-static int hevc_init(AVCodecParserContext *s)
+static void hevc_parser_close(AVCodecParserContext *s)
 {
-    HEVCContext  *h  = &((HEVCParseContext *)s->priv_data)->h;
-    h->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
-    if (!h->HEVClc)
-        return AVERROR(ENOMEM);
-    h->skipped_bytes_pos_size = INT_MAX;
+    HEVCParserContext *ctx = s->priv_data;
+    int i;
 
-    return 0;
-}
+#if ADVANCED_PARSER
+    HEVCContext  *h  = &ctx->h;
 
-static void hevc_close(AVCodecParserContext *s)
-{
-    int i;
-    HEVCContext  *h  = &((HEVCParseContext *)s->priv_data)->h;
-    ParseContext *pc = &((HEVCParseContext *)s->priv_data)->pc;
+    for (i = 0; i < FF_ARRAY_ELEMS(h->ps.vps_list); i++)
+        av_buffer_unref(&h->ps.vps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(h->ps.sps_list); i++)
+        av_buffer_unref(&h->ps.sps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(h->ps.pps_list); i++)
+        av_buffer_unref(&h->ps.pps_list[i]);
+
+    h->ps.sps = NULL;
 
-    av_freep(&h->skipped_bytes_pos);
     av_freep(&h->HEVClc);
-    av_freep(&pc->buffer);
+#endif
 
-    for (i = 0; i < FF_ARRAY_ELEMS(h->vps_list); i++)
-        av_buffer_unref(&h->vps_list[i]);
-    for (i = 0; i < FF_ARRAY_ELEMS(h->sps_list); i++)
-        av_buffer_unref(&h->sps_list[i]);
-    for (i = 0; i < FF_ARRAY_ELEMS(h->pps_list); i++)
-        av_buffer_unref(&h->pps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(ctx->ps.vps_list); i++)
+        av_buffer_unref(&ctx->ps.vps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(ctx->ps.sps_list); i++)
+        av_buffer_unref(&ctx->ps.sps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(ctx->ps.pps_list); i++)
+        av_buffer_unref(&ctx->ps.pps_list[i]);
 
-    h->sps = NULL;
+    ctx->ps.sps = NULL;
+
+    for (i = 0; i < ctx->pkt.nals_allocated; i++) {
+        av_freep(&ctx->pkt.nals[i].rbsp_buffer);
+        av_freep(&ctx->pkt.nals[i].skipped_bytes_pos);
+    }
+    av_freep(&ctx->pkt.nals);
+    ctx->pkt.nals_allocated = 0;
 
-    for (i = 0; i < h->nals_allocated; i++)
-        av_freep(&h->nals[i].rbsp_buffer);
-    av_freep(&h->nals);
-    h->nals_allocated = 0;
+    av_freep(&ctx->pc.buffer);
 }
 
 AVCodecParser ff_hevc_parser = {
     .codec_ids      = { AV_CODEC_ID_HEVC },
-    .priv_data_size = sizeof(HEVCParseContext),
-    .parser_init    = hevc_init,
+    .priv_data_size = sizeof(HEVCParserContext),
     .parser_parse   = hevc_parse,
-    .parser_close   = hevc_close,
+    .parser_close   = hevc_parser_close,
     .split          = hevc_split,
 };
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index 757f6662..64d6e2f0 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -69,14 +69,14 @@ static const AVRational vui_sar[] = {
     {  2,   1 },
 };
 
-static void remove_pps(HEVCContext *s, int id)
+static void remove_pps(HEVCParamSets *s, int id)
 {
     if (s->pps_list[id] && s->pps == (const HEVCPPS*)s->pps_list[id]->data)
         s->pps = NULL;
     av_buffer_unref(&s->pps_list[id]);
 }
 
-static void remove_sps(HEVCContext *s, int id)
+static void remove_sps(HEVCParamSets *s, int id)
 {
     int i;
     if (s->sps_list[id]) {
@@ -93,7 +93,7 @@ static void remove_sps(HEVCContext *s, int id)
     av_buffer_unref(&s->sps_list[id]);
 }
 
-static void remove_vps(HEVCContext *s, int id)
+static void remove_vps(HEVCParamSets *s, int id)
 {
     int i;
     if (s->vps_list[id]) {
@@ -107,10 +107,9 @@ static void remove_vps(HEVCContext *s, int id)
     av_buffer_unref(&s->vps_list[id]);
 }
 
-int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
-                                  const HEVCSPS *sps, int is_slice_header)
+int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
+                                  ShortTermRPS *rps, const HEVCSPS *sps, int is_slice_header)
 {
-    HEVCLocalContext *lc = s->HEVClc;
     uint8_t rps_predict = 0;
     int delta_poc;
     int k0 = 0;
@@ -118,8 +117,6 @@ int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
     int k  = 0;
     int i;
 
-    GetBitContext *gb = &lc->gb;
-
     if (rps != sps->st_rps && sps->nb_st_rps)
         rps_predict = get_bits1(gb);
 
@@ -133,19 +130,20 @@ int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
         if (is_slice_header) {
             unsigned int delta_idx = get_ue_golomb_long(gb) + 1;
             if (delta_idx > sps->nb_st_rps) {
-                av_log(s->avctx, AV_LOG_ERROR,
+                av_log(avctx, AV_LOG_ERROR,
                        "Invalid value of delta_idx in slice header RPS: %d > %d.\n",
                        delta_idx, sps->nb_st_rps);
                 return AVERROR_INVALIDDATA;
             }
             rps_ridx = &sps->st_rps[sps->nb_st_rps - delta_idx];
+            rps->rps_idx_num_delta_pocs = rps_ridx->num_delta_pocs;
         } else
             rps_ridx = &sps->st_rps[rps - sps->st_rps - 1];
 
         delta_rps_sign = get_bits1(gb);
         abs_delta_rps  = get_ue_golomb_long(gb) + 1;
         if (abs_delta_rps < 1 || abs_delta_rps > 32768) {
-            av_log(s->avctx, AV_LOG_ERROR,
+            av_log(avctx, AV_LOG_ERROR,
                    "Invalid value of abs_delta_rps: %d\n",
                    abs_delta_rps);
             return AVERROR_INVALIDDATA;
@@ -211,7 +209,7 @@ int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
 
         if (rps->num_negative_pics >= MAX_REFS ||
             nb_positive_pics >= MAX_REFS) {
-            av_log(s->avctx, AV_LOG_ERROR, "Too many refs in a short term RPS.\n");
+            av_log(avctx, AV_LOG_ERROR, "Too many refs in a short term RPS.\n");
             return AVERROR_INVALIDDATA;
         }
 
@@ -237,11 +235,10 @@ int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
 }
 
 
-static int decode_profile_tier_level(HEVCContext *s, PTLCommon *ptl)
+static int decode_profile_tier_level(GetBitContext *gb, AVCodecContext *avctx,
+                                      PTLCommon *ptl)
 {
     int i;
-    HEVCLocalContext *lc = s->HEVClc;
-    GetBitContext *gb = &lc->gb;
 
     if (get_bits_left(gb) < 2+1+5 + 32 + 4 + 16 + 16 + 12)
         return -1;
@@ -250,18 +247,22 @@ static int decode_profile_tier_level(HEVCContext *s, PTLCommon *ptl)
     ptl->tier_flag     = get_bits1(gb);
     ptl->profile_idc   = get_bits(gb, 5);
     if (ptl->profile_idc == FF_PROFILE_HEVC_MAIN)
-        av_log(s->avctx, AV_LOG_DEBUG, "Main profile bitstream\n");
+        av_log(avctx, AV_LOG_DEBUG, "Main profile bitstream\n");
     else if (ptl->profile_idc == FF_PROFILE_HEVC_MAIN_10)
-        av_log(s->avctx, AV_LOG_DEBUG, "Main 10 profile bitstream\n");
+        av_log(avctx, AV_LOG_DEBUG, "Main 10 profile bitstream\n");
     else if (ptl->profile_idc == FF_PROFILE_HEVC_MAIN_STILL_PICTURE)
-        av_log(s->avctx, AV_LOG_DEBUG, "Main Still Picture profile bitstream\n");
+        av_log(avctx, AV_LOG_DEBUG, "Main Still Picture profile bitstream\n");
     else if (ptl->profile_idc == FF_PROFILE_HEVC_REXT)
-        av_log(s->avctx, AV_LOG_DEBUG, "Range Extension profile bitstream\n");
+        av_log(avctx, AV_LOG_DEBUG, "Range Extension profile bitstream\n");
     else
-        av_log(s->avctx, AV_LOG_WARNING, "Unknown HEVC profile: %d\n", ptl->profile_idc);
+        av_log(avctx, AV_LOG_WARNING, "Unknown HEVC profile: %d\n", ptl->profile_idc);
 
-    for (i = 0; i < 32; i++)
+    for (i = 0; i < 32; i++) {
         ptl->profile_compatibility_flag[i] = get_bits1(gb);
+
+        if (ptl->profile_idc == 0 && i > 0 && ptl->profile_compatibility_flag[i])
+            ptl->profile_idc = i;
+    }
     ptl->progressive_source_flag    = get_bits1(gb);
     ptl->interlaced_source_flag     = get_bits1(gb);
     ptl->non_packed_constraint_flag = get_bits1(gb);
@@ -274,14 +275,13 @@ static int decode_profile_tier_level(HEVCContext *s, PTLCommon *ptl)
     return 0;
 }
 
-static int parse_ptl(HEVCContext *s, PTL *ptl, int max_num_sub_layers)
+static int parse_ptl(GetBitContext *gb, AVCodecContext *avctx,
+                      PTL *ptl, int max_num_sub_layers)
 {
     int i;
-    HEVCLocalContext *lc = s->HEVClc;
-    GetBitContext *gb = &lc->gb;
-    if (decode_profile_tier_level(s, &ptl->general_ptl) < 0 ||
+    if (decode_profile_tier_level(gb, avctx, &ptl->general_ptl) < 0 ||
         get_bits_left(gb) < 8 + 8*2) {
-        av_log(s->avctx, AV_LOG_ERROR, "PTL information too short\n");
+        av_log(avctx, AV_LOG_ERROR, "PTL information too short\n");
         return -1;
     }
 
@@ -297,14 +297,14 @@ static int parse_ptl(HEVCContext *s, PTL *ptl, int max_num_sub_layers)
             skip_bits(gb, 2); // reserved_zero_2bits[i]
     for (i = 0; i < max_num_sub_layers - 1; i++) {
         if (ptl->sub_layer_profile_present_flag[i] &&
-            decode_profile_tier_level(s, &ptl->sub_layer_ptl[i]) < 0) {
-            av_log(s->avctx, AV_LOG_ERROR,
+            decode_profile_tier_level(gb, avctx, &ptl->sub_layer_ptl[i]) < 0) {
+            av_log(avctx, AV_LOG_ERROR,
                    "PTL information for sublayer %i too short\n", i);
             return -1;
         }
         if (ptl->sub_layer_level_present_flag[i]) {
             if (get_bits_left(gb) < 8) {
-                av_log(s->avctx, AV_LOG_ERROR,
+                av_log(avctx, AV_LOG_ERROR,
                        "Not enough data for sublayer %i level_idc\n", i);
                 return -1;
             } else
@@ -315,10 +315,9 @@ static int parse_ptl(HEVCContext *s, PTL *ptl, int max_num_sub_layers)
     return 0;
 }
 
-static void decode_sublayer_hrd(HEVCContext *s, unsigned int nb_cpb,
+static void decode_sublayer_hrd(GetBitContext *gb, unsigned int nb_cpb,
                                 int subpic_params_present)
 {
-    GetBitContext *gb = &s->HEVClc->gb;
     int i;
 
     for (i = 0; i < nb_cpb; i++) {
@@ -333,10 +332,9 @@ static void decode_sublayer_hrd(HEVCContext *s, unsigned int nb_cpb,
     }
 }
 
-static int decode_hrd(HEVCContext *s, int common_inf_present,
+static int decode_hrd(GetBitContext *gb, int common_inf_present,
                        int max_sublayers)
 {
-    GetBitContext *gb = &s->HEVClc->gb;
     int nal_params_present = 0, vcl_params_present = 0;
     int subpic_params_present = 0;
     int i;
@@ -383,23 +381,23 @@ static int decode_hrd(HEVCContext *s, int common_inf_present,
         if (!low_delay) {
             nb_cpb = get_ue_golomb_long(gb) + 1;
             if (nb_cpb < 1 || nb_cpb > 32) {
-                av_log(s->avctx, AV_LOG_ERROR, "nb_cpb %d invalid\n", nb_cpb);
+                av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n", nb_cpb);
                 return AVERROR_INVALIDDATA;
             }
         }
 
         if (nal_params_present)
-            decode_sublayer_hrd(s, nb_cpb, subpic_params_present);
+            decode_sublayer_hrd(gb, nb_cpb, subpic_params_present);
         if (vcl_params_present)
-            decode_sublayer_hrd(s, nb_cpb, subpic_params_present);
+            decode_sublayer_hrd(gb, nb_cpb, subpic_params_present);
     }
     return 0;
 }
 
-int ff_hevc_decode_nal_vps(HEVCContext *s)
+int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps)
 {
     int i,j;
-    GetBitContext *gb = &s->HEVClc->gb;
     int vps_id = 0;
     HEVCVPS *vps;
     AVBufferRef *vps_buf = av_buffer_allocz(sizeof(*vps));
@@ -408,16 +406,16 @@ int ff_hevc_decode_nal_vps(HEVCContext *s)
         return AVERROR(ENOMEM);
     vps = (HEVCVPS*)vps_buf->data;
 
-    av_log(s->avctx, AV_LOG_DEBUG, "Decoding VPS\n");
+    av_log(avctx, AV_LOG_DEBUG, "Decoding VPS\n");
 
     vps_id = get_bits(gb, 4);
     if (vps_id >= MAX_VPS_COUNT) {
-        av_log(s->avctx, AV_LOG_ERROR, "VPS id out of range: %d\n", vps_id);
+        av_log(avctx, AV_LOG_ERROR, "VPS id out of range: %d\n", vps_id);
         goto err;
     }
 
     if (get_bits(gb, 2) != 3) { // vps_reserved_three_2bits
-        av_log(s->avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n");
+        av_log(avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n");
         goto err;
     }
 
@@ -426,17 +424,17 @@ int ff_hevc_decode_nal_vps(HEVCContext *s)
     vps->vps_temporal_id_nesting_flag = get_bits1(gb);
 
     if (get_bits(gb, 16) != 0xffff) { // vps_reserved_ffff_16bits
-        av_log(s->avctx, AV_LOG_ERROR, "vps_reserved_ffff_16bits is not 0xffff\n");
+        av_log(avctx, AV_LOG_ERROR, "vps_reserved_ffff_16bits is not 0xffff\n");
         goto err;
     }
 
     if (vps->vps_max_sub_layers > MAX_SUB_LAYERS) {
-        av_log(s->avctx, AV_LOG_ERROR, "vps_max_sub_layers out of range: %d\n",
+        av_log(avctx, AV_LOG_ERROR, "vps_max_sub_layers out of range: %d\n",
                vps->vps_max_sub_layers);
         goto err;
     }
 
-    if (parse_ptl(s, &vps->ptl, vps->vps_max_sub_layers) < 0)
+    if (parse_ptl(gb, avctx, &vps->ptl, vps->vps_max_sub_layers) < 0)
         goto err;
 
     vps->vps_sub_layer_ordering_info_present_flag = get_bits1(gb);
@@ -448,14 +446,14 @@ int ff_hevc_decode_nal_vps(HEVCContext *s)
         vps->vps_max_latency_increase[i]  = get_ue_golomb_long(gb) - 1;
 
         if (vps->vps_max_dec_pic_buffering[i] > MAX_DPB_SIZE || !vps->vps_max_dec_pic_buffering[i]) {
-            av_log(s->avctx, AV_LOG_ERROR, "vps_max_dec_pic_buffering_minus1 out of range: %d\n",
+            av_log(avctx, AV_LOG_ERROR, "vps_max_dec_pic_buffering_minus1 out of range: %d\n",
                    vps->vps_max_dec_pic_buffering[i] - 1);
             goto err;
         }
         if (vps->vps_num_reorder_pics[i] > vps->vps_max_dec_pic_buffering[i] - 1) {
-            av_log(s->avctx, AV_LOG_WARNING, "vps_max_num_reorder_pics out of range: %d\n",
+            av_log(avctx, AV_LOG_WARNING, "vps_max_num_reorder_pics out of range: %d\n",
                    vps->vps_num_reorder_pics[i]);
-            if (s->avctx->err_recognition & AV_EF_EXPLODE)
+            if (avctx->err_recognition & AV_EF_EXPLODE)
                 goto err;
         }
     }
@@ -464,7 +462,7 @@ int ff_hevc_decode_nal_vps(HEVCContext *s)
     vps->vps_num_layer_sets = get_ue_golomb_long(gb) + 1;
     if (vps->vps_num_layer_sets < 1 || vps->vps_num_layer_sets > 1024 ||
         (vps->vps_num_layer_sets - 1LL) * (vps->vps_max_layer_id + 1LL) > get_bits_left(gb)) {
-        av_log(s->avctx, AV_LOG_ERROR, "too many layer_id_included_flags\n");
+        av_log(avctx, AV_LOG_ERROR, "too many layer_id_included_flags\n");
         goto err;
     }
 
@@ -481,7 +479,7 @@ int ff_hevc_decode_nal_vps(HEVCContext *s)
             vps->vps_num_ticks_poc_diff_one = get_ue_golomb_long(gb) + 1;
         vps->vps_num_hrd_parameters = get_ue_golomb_long(gb);
         if (vps->vps_num_hrd_parameters > (unsigned)vps->vps_num_layer_sets) {
-            av_log(s->avctx, AV_LOG_ERROR,
+            av_log(avctx, AV_LOG_ERROR,
                    "vps_num_hrd_parameters %d is invalid\n", vps->vps_num_hrd_parameters);
             goto err;
         }
@@ -491,24 +489,24 @@ int ff_hevc_decode_nal_vps(HEVCContext *s)
             get_ue_golomb_long(gb); // hrd_layer_set_idx
             if (i)
                 common_inf_present = get_bits1(gb);
-            decode_hrd(s, common_inf_present, vps->vps_max_sub_layers);
+            decode_hrd(gb, common_inf_present, vps->vps_max_sub_layers);
         }
     }
     get_bits1(gb); /* vps_extension_flag */
 
     if (get_bits_left(gb) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR,
+        av_log(avctx, AV_LOG_ERROR,
                "Overread VPS by %d bits\n", -get_bits_left(gb));
-        if (s->vps_list[vps_id])
+        if (ps->vps_list[vps_id])
             goto err;
     }
 
-    if (s->vps_list[vps_id] &&
-        !memcmp(s->vps_list[vps_id]->data, vps_buf->data, vps_buf->size)) {
+    if (ps->vps_list[vps_id] &&
+        !memcmp(ps->vps_list[vps_id]->data, vps_buf->data, vps_buf->size)) {
         av_buffer_unref(&vps_buf);
     } else {
-        remove_vps(s, vps_id);
-        s->vps_list[vps_id] = vps_buf;
+        remove_vps(ps, vps_id);
+        ps->vps_list[vps_id] = vps_buf;
     }
 
     return 0;
@@ -518,14 +516,14 @@ int ff_hevc_decode_nal_vps(HEVCContext *s)
     return AVERROR_INVALIDDATA;
 }
 
-static void decode_vui(HEVCContext *s, HEVCSPS *sps)
+static void decode_vui(GetBitContext *gb, AVCodecContext *avctx,
+                       int apply_defdispwin, HEVCSPS *sps)
 {
     VUI *vui          = &sps->vui;
-    GetBitContext *gb = &s->HEVClc->gb;
     GetBitContext backup;
     int sar_present, alt = 0;
 
-    av_log(s->avctx, AV_LOG_DEBUG, "Decoding VUI\n");
+    av_log(avctx, AV_LOG_DEBUG, "Decoding VUI\n");
 
     sar_present = get_bits1(gb);
     if (sar_present) {
@@ -536,7 +534,7 @@ static void decode_vui(HEVCContext *s, HEVCSPS *sps)
             vui->sar.num = get_bits(gb, 16);
             vui->sar.den = get_bits(gb, 16);
         } else
-            av_log(s->avctx, AV_LOG_WARNING,
+            av_log(avctx, AV_LOG_WARNING,
                    "Unknown SAR index: %u.\n", sar_idx);
     }
 
@@ -578,7 +576,7 @@ static void decode_vui(HEVCContext *s, HEVCSPS *sps)
 
     if (get_bits_left(gb) >= 68 && show_bits_long(gb, 21) == 0x100000) {
         vui->default_display_window_flag = 0;
-        av_log(s->avctx, AV_LOG_WARNING, "Invalid default display window\n");
+        av_log(avctx, AV_LOG_WARNING, "Invalid default display window\n");
     } else
         vui->default_display_window_flag = get_bits1(gb);
     // Backup context in case an alternate header is detected
@@ -591,9 +589,9 @@ static void decode_vui(HEVCContext *s, HEVCSPS *sps)
         vui->def_disp_win.top_offset    = get_ue_golomb_long(gb) * 2;
         vui->def_disp_win.bottom_offset = get_ue_golomb_long(gb) * 2;
 
-        if (s->apply_defdispwin &&
-            s->avctx->flags2 & CODEC_FLAG2_IGNORE_CROP) {
-            av_log(s->avctx, AV_LOG_DEBUG,
+        if (apply_defdispwin &&
+            avctx->flags2 & AV_CODEC_FLAG2_IGNORE_CROP) {
+            av_log(avctx, AV_LOG_DEBUG,
                    "discarding vui default display window, "
                    "original values are l:%u r:%u t:%u b:%u\n",
                    vui->def_disp_win.left_offset,
@@ -614,7 +612,7 @@ static void decode_vui(HEVCContext *s, HEVCSPS *sps)
         if( get_bits_left(gb) < 66) {
             // The alternate syntax seem to have timing info located
             // at where def_disp_win is normally located
-            av_log(s->avctx, AV_LOG_WARNING,
+            av_log(avctx, AV_LOG_WARNING,
                    "Strange VUI timing information, retrying...\n");
             vui->default_display_window_flag = 0;
             memset(&vui->def_disp_win, 0, sizeof(vui->def_disp_win));
@@ -624,7 +622,7 @@ static void decode_vui(HEVCContext *s, HEVCSPS *sps)
         vui->vui_num_units_in_tick               = get_bits_long(gb, 32);
         vui->vui_time_scale                      = get_bits_long(gb, 32);
         if (alt) {
-            av_log(s->avctx, AV_LOG_INFO, "Retry got %i/%i fps\n",
+            av_log(avctx, AV_LOG_INFO, "Retry got %i/%i fps\n",
                    vui->vui_time_scale, vui->vui_num_units_in_tick);
         }
         vui->vui_poc_proportional_to_timing_flag = get_bits1(gb);
@@ -632,7 +630,7 @@ static void decode_vui(HEVCContext *s, HEVCSPS *sps)
             vui->vui_num_ticks_poc_diff_one_minus1 = get_ue_golomb_long(gb);
         vui->vui_hrd_parameters_present_flag = get_bits1(gb);
         if (vui->vui_hrd_parameters_present_flag)
-            decode_hrd(s, 1, sps->max_sub_layers);
+            decode_hrd(gb, 1, sps->max_sub_layers);
     }
 
     vui->bitstream_restriction_flag = get_bits1(gb);
@@ -678,9 +676,8 @@ static void set_default_scaling_list_data(ScalingList *sl)
     memcpy(sl->sl[3][5], default_scaling_list_inter, 64);
 }
 
-static int scaling_list_data(HEVCContext *s, ScalingList *sl, HEVCSPS *sps)
+static int scaling_list_data(GetBitContext *gb, AVCodecContext *avctx, ScalingList *sl, HEVCSPS *sps)
 {
-    GetBitContext *gb = &s->HEVClc->gb;
     uint8_t scaling_list_pred_mode_flag;
     int32_t scaling_list_dc_coef[2][6];
     int size_id, matrix_id, pos;
@@ -696,7 +693,7 @@ static int scaling_list_data(HEVCContext *s, ScalingList *sl, HEVCSPS *sps)
                 if (delta) {
                     // Copy from previous array.
                     if (matrix_id < delta) {
-                        av_log(s->avctx, AV_LOG_ERROR,
+                        av_log(avctx, AV_LOG_ERROR,
                                "Invalid delta in scaling list data: %d.\n", delta);
                         return AVERROR_INVALIDDATA;
                     }
@@ -750,62 +747,101 @@ static int scaling_list_data(HEVCContext *s, ScalingList *sl, HEVCSPS *sps)
     return 0;
 }
 
-int ff_hevc_decode_nal_sps(HEVCContext *s)
+static int map_pixel_format(AVCodecContext *avctx, HEVCSPS *sps)
 {
     const AVPixFmtDescriptor *desc;
-    GetBitContext *gb = &s->HEVClc->gb;
+    switch (sps->bit_depth) {
+    case 8:
+        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY8;
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P;
+       break;
+    case 9:
+        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY16;
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P9;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P9;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P9;
+        break;
+    case 10:
+        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY16;
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P10;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P10;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P10;
+        break;
+    case 12:
+        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY16;
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P12;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P12;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P12;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR,
+               "4:2:0, 4:2:2, 4:4:4 supports are currently specified for 8, 10 and 12 bits.\n");
+        av_log(avctx, AV_LOG_ERROR,
+               "chroma_format_idc is %d, depth is %d",
+               sps->chroma_format_idc, sps->bit_depth);
+        return AVERROR_INVALIDDATA;
+    }
+
+    desc = av_pix_fmt_desc_get(sps->pix_fmt);
+    if (!desc)
+        return AVERROR(EINVAL);
+
+    sps->hshift[0] = sps->vshift[0] = 0;
+    sps->hshift[2] = sps->hshift[1] = desc->log2_chroma_w;
+    sps->vshift[2] = sps->vshift[1] = desc->log2_chroma_h;
+
+    sps->pixel_shift = sps->bit_depth > 8;
+
+    return 0;
+}
+
+int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
+                      int apply_defdispwin, AVBufferRef **vps_list, AVCodecContext *avctx)
+{
     int ret = 0;
-    unsigned int sps_id = 0;
     int log2_diff_max_min_transform_block_size;
     int bit_depth_chroma, start, vui_present, sublayer_ordering_info;
     int i;
 
-    HEVCSPS *sps;
-    AVBufferRef *sps_buf = av_buffer_allocz(sizeof(*sps));
-
-    if (!sps_buf)
-        return AVERROR(ENOMEM);
-    sps = (HEVCSPS*)sps_buf->data;
-
-    av_log(s->avctx, AV_LOG_DEBUG, "Decoding SPS\n");
-
     // Coded parameters
 
     sps->vps_id = get_bits(gb, 4);
     if (sps->vps_id >= MAX_VPS_COUNT) {
-        av_log(s->avctx, AV_LOG_ERROR, "VPS id out of range: %d\n", sps->vps_id);
-        ret = AVERROR_INVALIDDATA;
-        goto err;
+        av_log(avctx, AV_LOG_ERROR, "VPS id out of range: %d\n", sps->vps_id);
+        return AVERROR_INVALIDDATA;
     }
 
-    if (!s->vps_list[sps->vps_id]) {
-        av_log(s->avctx, AV_LOG_ERROR, "VPS %d does not exist\n",
+    if (vps_list && !vps_list[sps->vps_id]) {
+        av_log(avctx, AV_LOG_ERROR, "VPS %d does not exist\n",
                sps->vps_id);
-        ret = AVERROR_INVALIDDATA;
-        goto err;
+        return AVERROR_INVALIDDATA;
     }
 
     sps->max_sub_layers = get_bits(gb, 3) + 1;
     if (sps->max_sub_layers > MAX_SUB_LAYERS) {
-        av_log(s->avctx, AV_LOG_ERROR, "sps_max_sub_layers out of range: %d\n",
+        av_log(avctx, AV_LOG_ERROR, "sps_max_sub_layers out of range: %d\n",
                sps->max_sub_layers);
-        ret = AVERROR_INVALIDDATA;
-        goto err;
+        return AVERROR_INVALIDDATA;
     }
 
     skip_bits1(gb); // temporal_id_nesting_flag
 
-    if (parse_ptl(s, &sps->ptl, sps->max_sub_layers) < 0)
-        goto err;
+    if ((ret = parse_ptl(gb, avctx, &sps->ptl, sps->max_sub_layers)) < 0)
+        return ret;
 
-    sps_id = get_ue_golomb_long(gb);
-    if (sps_id >= MAX_SPS_COUNT) {
-        av_log(s->avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", sps_id);
-        ret = AVERROR_INVALIDDATA;
-        goto err;
+    *sps_id = get_ue_golomb_long(gb);
+    if (*sps_id >= MAX_SPS_COUNT) {
+        av_log(avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", *sps_id);
+        return AVERROR_INVALIDDATA;
     }
 
     sps->chroma_format_idc = get_ue_golomb_long(gb);
+    if (sps->chroma_format_idc > 3U) {
+        av_log(avctx, AV_LOG_ERROR, "chroma_format_idc %d is invalid\n", sps->chroma_format_idc);
+        return AVERROR_INVALIDDATA;
+    }
 
     if (sps->chroma_format_idc == 3)
         sps->separate_colour_plane_flag = get_bits1(gb);
@@ -816,8 +852,8 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
     sps->width  = get_ue_golomb_long(gb);
     sps->height = get_ue_golomb_long(gb);
     if ((ret = av_image_check_size(sps->width,
-                                   sps->height, 0, s->avctx)) < 0)
-        goto err;
+                                   sps->height, 0, avctx)) < 0)
+        return ret;
 
     if (get_bits1(gb)) { // pic_conformance_flag
         //TODO: * 2 is only valid for 420
@@ -826,8 +862,8 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
         sps->pic_conf_win.top_offset    = get_ue_golomb_long(gb) * 2;
         sps->pic_conf_win.bottom_offset = get_ue_golomb_long(gb) * 2;
 
-        if (s->avctx->flags2 & CODEC_FLAG2_IGNORE_CROP) {
-            av_log(s->avctx, AV_LOG_DEBUG,
+        if (avctx->flags2 & AV_CODEC_FLAG2_IGNORE_CROP) {
+            av_log(avctx, AV_LOG_DEBUG,
                    "discarding sps conformance window, "
                    "original values are l:%u r:%u t:%u b:%u\n",
                    sps->pic_conf_win.left_offset,
@@ -846,64 +882,22 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
     sps->bit_depth   = get_ue_golomb_long(gb) + 8;
     bit_depth_chroma = get_ue_golomb_long(gb) + 8;
     if (sps->chroma_format_idc && bit_depth_chroma != sps->bit_depth) {
-        av_log(s->avctx, AV_LOG_ERROR,
+        av_log(avctx, AV_LOG_ERROR,
                "Luma bit depth (%d) is different from chroma bit depth (%d), "
                "this is unsupported.\n",
                sps->bit_depth, bit_depth_chroma);
-        ret = AVERROR_INVALIDDATA;
-        goto err;
+        return AVERROR_INVALIDDATA;
     }
 
-    switch (sps->bit_depth) {
-    case 8:
-        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY8;
-        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P;
-        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P;
-        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P;
-       break;
-    case 9:
-        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY16;
-        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P9;
-        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P9;
-        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P9;
-        break;
-    case 10:
-        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY16;
-        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P10;
-        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P10;
-        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P10;
-        break;
-    case 12:
-        if (sps->chroma_format_idc == 0) sps->pix_fmt = AV_PIX_FMT_GRAY16;
-        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P12;
-        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P12;
-        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P12;
-        break;
-    default:
-        av_log(s->avctx, AV_LOG_ERROR,
-               "4:2:0, 4:2:2, 4:4:4 supports are currently specified for 8, 10 and 12 bits.\n");
-        ret = AVERROR_PATCHWELCOME;
-        goto err;
-    }
-
-    desc = av_pix_fmt_desc_get(sps->pix_fmt);
-    if (!desc) {
-        ret = AVERROR(EINVAL);
-        goto err;
-    }
-
-    sps->hshift[0] = sps->vshift[0] = 0;
-    sps->hshift[2] = sps->hshift[1] = desc->log2_chroma_w;
-    sps->vshift[2] = sps->vshift[1] = desc->log2_chroma_h;
-
-    sps->pixel_shift = sps->bit_depth > 8;
+    ret = map_pixel_format(avctx, sps);
+    if (ret < 0)
+        return ret;
 
     sps->log2_max_poc_lsb = get_ue_golomb_long(gb) + 4;
     if (sps->log2_max_poc_lsb > 16) {
-        av_log(s->avctx, AV_LOG_ERROR, "log2_max_pic_order_cnt_lsb_minus4 out range: %d\n",
+        av_log(avctx, AV_LOG_ERROR, "log2_max_pic_order_cnt_lsb_minus4 out range: %d\n",
                sps->log2_max_poc_lsb - 4);
-        ret = AVERROR_INVALIDDATA;
-        goto err;
+        return AVERROR_INVALIDDATA;
     }
 
     sublayer_ordering_info = get_bits1(gb);
@@ -913,18 +907,16 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
         sps->temporal_layer[i].num_reorder_pics      = get_ue_golomb_long(gb);
         sps->temporal_layer[i].max_latency_increase  = get_ue_golomb_long(gb) - 1;
         if (sps->temporal_layer[i].max_dec_pic_buffering > MAX_DPB_SIZE) {
-            av_log(s->avctx, AV_LOG_ERROR, "sps_max_dec_pic_buffering_minus1 out of range: %d\n",
+            av_log(avctx, AV_LOG_ERROR, "sps_max_dec_pic_buffering_minus1 out of range: %d\n",
                    sps->temporal_layer[i].max_dec_pic_buffering - 1);
-            ret = AVERROR_INVALIDDATA;
-            goto err;
+            return AVERROR_INVALIDDATA;
         }
         if (sps->temporal_layer[i].num_reorder_pics > sps->temporal_layer[i].max_dec_pic_buffering - 1) {
-            av_log(s->avctx, AV_LOG_WARNING, "sps_max_num_reorder_pics out of range: %d\n",
+            av_log(avctx, AV_LOG_WARNING, "sps_max_num_reorder_pics out of range: %d\n",
                    sps->temporal_layer[i].num_reorder_pics);
-            if (s->avctx->err_recognition & AV_EF_EXPLODE ||
+            if (avctx->err_recognition & AV_EF_EXPLODE ||
                 sps->temporal_layer[i].num_reorder_pics > MAX_DPB_SIZE - 1) {
-                ret = AVERROR_INVALIDDATA;
-                goto err;
+                return AVERROR_INVALIDDATA;
             }
             sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[i].num_reorder_pics + 1;
         }
@@ -946,27 +938,23 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
                                                sps->log2_min_tb_size;
 
     if (sps->log2_min_cb_size < 3 || sps->log2_min_cb_size > 30) {
-        av_log(s->avctx, AV_LOG_ERROR, "Invalid value %d for log2_min_cb_size", sps->log2_min_cb_size);
-        ret = AVERROR_INVALIDDATA;
-        goto err;
+        av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_min_cb_size", sps->log2_min_cb_size);
+        return AVERROR_INVALIDDATA;
     }
 
     if (sps->log2_diff_max_min_coding_block_size > 30) {
-        av_log(s->avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_coding_block_size", sps->log2_diff_max_min_coding_block_size);
-        ret = AVERROR_INVALIDDATA;
-        goto err;
+        av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_coding_block_size", sps->log2_diff_max_min_coding_block_size);
+        return AVERROR_INVALIDDATA;
     }
 
     if (sps->log2_min_tb_size >= sps->log2_min_cb_size || sps->log2_min_tb_size < 2) {
-        av_log(s->avctx, AV_LOG_ERROR, "Invalid value for log2_min_tb_size");
-        ret = AVERROR_INVALIDDATA;
-        goto err;
+        av_log(avctx, AV_LOG_ERROR, "Invalid value for log2_min_tb_size");
+        return AVERROR_INVALIDDATA;
     }
 
     if (log2_diff_max_min_transform_block_size < 0 || log2_diff_max_min_transform_block_size > 30) {
-        av_log(s->avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", log2_diff_max_min_transform_block_size);
-        ret = AVERROR_INVALIDDATA;
-        goto err;
+        av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", log2_diff_max_min_transform_block_size);
+        return AVERROR_INVALIDDATA;
     }
 
     sps->max_transform_hierarchy_depth_inter = get_ue_golomb_long(gb);
@@ -977,9 +965,9 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
         set_default_scaling_list_data(&sps->scaling_list);
 
         if (get_bits1(gb)) {
-            ret = scaling_list_data(s, &sps->scaling_list, sps);
+            ret = scaling_list_data(gb, avctx, &sps->scaling_list, sps);
             if (ret < 0)
-                goto err;
+                return ret;
         }
     }
 
@@ -994,11 +982,10 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
         sps->pcm.log2_max_pcm_cb_size = sps->pcm.log2_min_pcm_cb_size +
                                         get_ue_golomb_long(gb);
         if (sps->pcm.bit_depth > sps->bit_depth) {
-            av_log(s->avctx, AV_LOG_ERROR,
+            av_log(avctx, AV_LOG_ERROR,
                    "PCM bit depth (%d) is greater than normal bit depth (%d)\n",
                    sps->pcm.bit_depth, sps->bit_depth);
-            ret = AVERROR_INVALIDDATA;
-            goto err;
+            return AVERROR_INVALIDDATA;
         }
 
         sps->pcm.loop_filter_disable_flag = get_bits1(gb);
@@ -1006,24 +993,23 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
 
     sps->nb_st_rps = get_ue_golomb_long(gb);
     if (sps->nb_st_rps > MAX_SHORT_TERM_RPS_COUNT) {
-        av_log(s->avctx, AV_LOG_ERROR, "Too many short term RPS: %d.\n",
+        av_log(avctx, AV_LOG_ERROR, "Too many short term RPS: %d.\n",
                sps->nb_st_rps);
-        ret = AVERROR_INVALIDDATA;
-        goto err;
+        return AVERROR_INVALIDDATA;
     }
     for (i = 0; i < sps->nb_st_rps; i++) {
-        if ((ret = ff_hevc_decode_short_term_rps(s, &sps->st_rps[i],
+        if ((ret = ff_hevc_decode_short_term_rps(gb, avctx, &sps->st_rps[i],
                                                  sps, 0)) < 0)
-            goto err;
+            return ret;
     }
 
     sps->long_term_ref_pics_present_flag = get_bits1(gb);
     if (sps->long_term_ref_pics_present_flag) {
         sps->num_long_term_ref_pics_sps = get_ue_golomb_long(gb);
         if (sps->num_long_term_ref_pics_sps > 31U) {
-            av_log(s->avctx, AV_LOG_ERROR, "num_long_term_ref_pics_sps %d is out of range.\n",
+            av_log(avctx, AV_LOG_ERROR, "num_long_term_ref_pics_sps %d is out of range.\n",
                    sps->num_long_term_ref_pics_sps);
-            goto err;
+            return AVERROR_INVALIDDATA;
         }
         for (i = 0; i < sps->num_long_term_ref_pics_sps; i++) {
             sps->lt_ref_pic_poc_lsb_sps[i]       = get_bits(gb, sps->log2_max_poc_lsb);
@@ -1036,7 +1022,7 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
     sps->vui.sar = (AVRational){0, 1};
     vui_present = get_bits1(gb);
     if (vui_present)
-        decode_vui(s, sps);
+        decode_vui(gb, avctx, apply_defdispwin, sps);
 
     if (get_bits1(gb)) { // sps_extension_flag
         int sps_extension_flag[1];
@@ -1056,33 +1042,33 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
 
             extended_precision_processing_flag = get_bits1(gb);
             if (extended_precision_processing_flag)
-                av_log(s->avctx, AV_LOG_WARNING,
+                av_log(avctx, AV_LOG_WARNING,
                    "extended_precision_processing_flag not yet implemented\n");
 
             sps->intra_smoothing_disabled_flag       = get_bits1(gb);
             high_precision_offsets_enabled_flag  = get_bits1(gb);
             if (high_precision_offsets_enabled_flag)
-                av_log(s->avctx, AV_LOG_WARNING,
+                av_log(avctx, AV_LOG_WARNING,
                    "high_precision_offsets_enabled_flag not yet implemented\n");
 
             sps->persistent_rice_adaptation_enabled_flag = get_bits1(gb);
 
             cabac_bypass_alignment_enabled_flag  = get_bits1(gb);
             if (cabac_bypass_alignment_enabled_flag)
-                av_log(s->avctx, AV_LOG_WARNING,
+                av_log(avctx, AV_LOG_WARNING,
                    "cabac_bypass_alignment_enabled_flag not yet implemented\n");
         }
     }
-    if (s->apply_defdispwin) {
+    if (apply_defdispwin) {
         sps->output_window.left_offset   += sps->vui.def_disp_win.left_offset;
         sps->output_window.right_offset  += sps->vui.def_disp_win.right_offset;
         sps->output_window.top_offset    += sps->vui.def_disp_win.top_offset;
         sps->output_window.bottom_offset += sps->vui.def_disp_win.bottom_offset;
     }
     if (sps->output_window.left_offset & (0x1F >> (sps->pixel_shift)) &&
-        !(s->avctx->flags & CODEC_FLAG_UNALIGNED)) {
+        !(avctx->flags & AV_CODEC_FLAG_UNALIGNED)) {
         sps->output_window.left_offset &= ~(0x1F >> (sps->pixel_shift));
-        av_log(s->avctx, AV_LOG_WARNING, "Reducing left output window to %d "
+        av_log(avctx, AV_LOG_WARNING, "Reducing left output window to %d "
                "chroma samples to preserve alignment.\n",
                sps->output_window.left_offset);
     }
@@ -1092,13 +1078,12 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
                          (sps->output_window.top_offset + sps->output_window.bottom_offset);
     if (sps->width  <= sps->output_window.left_offset + (int64_t)sps->output_window.right_offset  ||
         sps->height <= sps->output_window.top_offset  + (int64_t)sps->output_window.bottom_offset) {
-        av_log(s->avctx, AV_LOG_WARNING, "Invalid visible frame dimensions: %dx%d.\n",
+        av_log(avctx, AV_LOG_WARNING, "Invalid visible frame dimensions: %dx%d.\n",
                sps->output_width, sps->output_height);
-        if (s->avctx->err_recognition & AV_EF_EXPLODE) {
-            ret = AVERROR_INVALIDDATA;
-            goto err;
+        if (avctx->err_recognition & AV_EF_EXPLODE) {
+            return AVERROR_INVALIDDATA;
         }
-        av_log(s->avctx, AV_LOG_WARNING,
+        av_log(avctx, AV_LOG_WARNING,
                "Displaying the whole video surface.\n");
         memset(&sps->pic_conf_win, 0, sizeof(sps->pic_conf_win));
         memset(&sps->output_window, 0, sizeof(sps->output_window));
@@ -1112,16 +1097,16 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
     sps->log2_min_pu_size = sps->log2_min_cb_size - 1;
 
     if (sps->log2_ctb_size > MAX_LOG2_CTB_SIZE) {
-        av_log(s->avctx, AV_LOG_ERROR, "CTB size out of range: 2^%d\n", sps->log2_ctb_size);
-        goto err;
+        av_log(avctx, AV_LOG_ERROR, "CTB size out of range: 2^%d\n", sps->log2_ctb_size);
+        return AVERROR_INVALIDDATA;
     }
     if (sps->log2_ctb_size < 4) {
-        av_log(s->avctx,
+        av_log(avctx,
                AV_LOG_ERROR,
                "log2_ctb_size %d differs from the bounds of any known profile\n",
                sps->log2_ctb_size);
-        avpriv_request_sample(s->avctx, "log2_ctb_size %d", sps->log2_ctb_size);
-        goto err;
+        avpriv_request_sample(avctx, "log2_ctb_size %d", sps->log2_ctb_size);
+        return AVERROR_INVALIDDATA;
     }
 
     sps->ctb_width  = (sps->width  + (1 << sps->log2_ctb_size) - 1) >> sps->log2_ctb_size;
@@ -1140,35 +1125,60 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
 
     if (av_mod_uintp2(sps->width, sps->log2_min_cb_size) ||
         av_mod_uintp2(sps->height, sps->log2_min_cb_size)) {
-        av_log(s->avctx, AV_LOG_ERROR, "Invalid coded frame dimensions.\n");
-        goto err;
+        av_log(avctx, AV_LOG_ERROR, "Invalid coded frame dimensions.\n");
+        return AVERROR_INVALIDDATA;
     }
 
     if (sps->max_transform_hierarchy_depth_inter > sps->log2_ctb_size - sps->log2_min_tb_size) {
-        av_log(s->avctx, AV_LOG_ERROR, "max_transform_hierarchy_depth_inter out of range: %d\n",
+        av_log(avctx, AV_LOG_ERROR, "max_transform_hierarchy_depth_inter out of range: %d\n",
                sps->max_transform_hierarchy_depth_inter);
-        goto err;
+        return AVERROR_INVALIDDATA;
     }
     if (sps->max_transform_hierarchy_depth_intra > sps->log2_ctb_size - sps->log2_min_tb_size) {
-        av_log(s->avctx, AV_LOG_ERROR, "max_transform_hierarchy_depth_intra out of range: %d\n",
+        av_log(avctx, AV_LOG_ERROR, "max_transform_hierarchy_depth_intra out of range: %d\n",
                sps->max_transform_hierarchy_depth_intra);
-        goto err;
+        return AVERROR_INVALIDDATA;
     }
     if (sps->log2_max_trafo_size > FFMIN(sps->log2_ctb_size, 5)) {
-        av_log(s->avctx, AV_LOG_ERROR,
+        av_log(avctx, AV_LOG_ERROR,
                "max transform block size out of range: %d\n",
                sps->log2_max_trafo_size);
-        goto err;
+        return AVERROR_INVALIDDATA;
     }
 
     if (get_bits_left(gb) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR,
+        av_log(avctx, AV_LOG_ERROR,
                "Overread SPS by %d bits\n", -get_bits_left(gb));
-        goto err;
+        return AVERROR_INVALIDDATA;
     }
 
-    if (s->avctx->debug & FF_DEBUG_BITSTREAM) {
-        av_log(s->avctx, AV_LOG_DEBUG,
+    return 0;
+}
+
+int ff_hevc_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps, int apply_defdispwin)
+{
+    HEVCSPS *sps;
+    AVBufferRef *sps_buf = av_buffer_allocz(sizeof(*sps));
+    unsigned int sps_id;
+    int ret;
+
+    if (!sps_buf)
+        return AVERROR(ENOMEM);
+    sps = (HEVCSPS*)sps_buf->data;
+
+    av_log(avctx, AV_LOG_DEBUG, "Decoding SPS\n");
+
+    ret = ff_hevc_parse_sps(sps, gb, &sps_id,
+                            apply_defdispwin,
+                            ps->vps_list, avctx);
+    if (ret < 0) {
+        av_buffer_unref(&sps_buf);
+        return ret;
+    }
+
+    if (avctx->debug & FF_DEBUG_BITSTREAM) {
+        av_log(avctx, AV_LOG_DEBUG,
                "Parsed SPS: id %d; coded wxh: %dx%d; "
                "cropped wxh: %dx%d; pix_fmt: %s.\n",
                sps_id, sps->width, sps->height,
@@ -1179,19 +1189,15 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
     /* check if this is a repeat of an already parsed SPS, then keep the
      * original one.
      * otherwise drop all PPSes that depend on it */
-    if (s->sps_list[sps_id] &&
-        !memcmp(s->sps_list[sps_id]->data, sps_buf->data, sps_buf->size)) {
+    if (ps->sps_list[sps_id] &&
+        !memcmp(ps->sps_list[sps_id]->data, sps_buf->data, sps_buf->size)) {
         av_buffer_unref(&sps_buf);
     } else {
-        remove_sps(s, sps_id);
-        s->sps_list[sps_id] = sps_buf;
+        remove_sps(ps, sps_id);
+        ps->sps_list[sps_id] = sps_buf;
     }
 
     return 0;
-
-err:
-    av_buffer_unref(&sps_buf);
-    return ret;
 }
 
 static void hevc_pps_free(void *opaque, uint8_t *data)
@@ -1212,8 +1218,8 @@ static void hevc_pps_free(void *opaque, uint8_t *data)
     av_freep(&pps);
 }
 
-static int pps_range_extensions(HEVCContext *s, HEVCPPS *pps, HEVCSPS *sps) {
-    GetBitContext *gb = &s->HEVClc->gb;
+static int pps_range_extensions(GetBitContext *gb, AVCodecContext *avctx,
+                                HEVCPPS *pps, HEVCSPS *sps) {
     int i;
 
     if (pps->transform_skip_enabled_flag) {
@@ -1225,19 +1231,19 @@ static int pps_range_extensions(HEVCContext *s, HEVCPPS *pps, HEVCSPS *sps) {
         pps->diff_cu_chroma_qp_offset_depth = get_ue_golomb_long(gb);
         pps->chroma_qp_offset_list_len_minus1 = get_ue_golomb_long(gb);
         if (pps->chroma_qp_offset_list_len_minus1 && pps->chroma_qp_offset_list_len_minus1 >= 5) {
-            av_log(s->avctx, AV_LOG_ERROR,
+            av_log(avctx, AV_LOG_ERROR,
                    "chroma_qp_offset_list_len_minus1 shall be in the range [0, 5].\n");
             return AVERROR_INVALIDDATA;
         }
         for (i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) {
             pps->cb_qp_offset_list[i] = get_se_golomb_long(gb);
             if (pps->cb_qp_offset_list[i]) {
-                av_log(s->avctx, AV_LOG_WARNING,
+                av_log(avctx, AV_LOG_WARNING,
                        "cb_qp_offset_list not tested yet.\n");
             }
             pps->cr_qp_offset_list[i] = get_se_golomb_long(gb);
             if (pps->cr_qp_offset_list[i]) {
-                av_log(s->avctx, AV_LOG_WARNING,
+                av_log(avctx, AV_LOG_WARNING,
                        "cb_qp_offset_list not tested yet.\n");
             }
         }
@@ -1248,14 +1254,143 @@ static int pps_range_extensions(HEVCContext *s, HEVCPPS *pps, HEVCSPS *sps) {
     return(0);
 }
 
-int ff_hevc_decode_nal_pps(HEVCContext *s)
+static inline int setup_pps(AVCodecContext *avctx, GetBitContext *gb,
+                            HEVCPPS *pps, HEVCSPS *sps)
 {
-    GetBitContext *gb = &s->HEVClc->gb;
-    HEVCSPS      *sps = NULL;
+    int log2_diff;
     int pic_area_in_ctbs;
-    int log2_diff_ctb_min_tb_size;
     int i, j, x, y, ctb_addr_rs, tile_id;
-    int ret = 0;
+
+    // Inferred parameters
+    pps->col_bd   = av_malloc_array(pps->num_tile_columns + 1, sizeof(*pps->col_bd));
+    pps->row_bd   = av_malloc_array(pps->num_tile_rows + 1,    sizeof(*pps->row_bd));
+    pps->col_idxX = av_malloc_array(sps->ctb_width,    sizeof(*pps->col_idxX));
+    if (!pps->col_bd || !pps->row_bd || !pps->col_idxX)
+        return AVERROR(ENOMEM);
+
+    if (pps->uniform_spacing_flag) {
+        if (!pps->column_width) {
+            pps->column_width = av_malloc_array(pps->num_tile_columns, sizeof(*pps->column_width));
+            pps->row_height   = av_malloc_array(pps->num_tile_rows,    sizeof(*pps->row_height));
+        }
+        if (!pps->column_width || !pps->row_height)
+            return AVERROR(ENOMEM);
+
+        for (i = 0; i < pps->num_tile_columns; i++) {
+            pps->column_width[i] = ((i + 1) * sps->ctb_width) / pps->num_tile_columns -
+                                   (i * sps->ctb_width) / pps->num_tile_columns;
+        }
+
+        for (i = 0; i < pps->num_tile_rows; i++) {
+            pps->row_height[i] = ((i + 1) * sps->ctb_height) / pps->num_tile_rows -
+                                 (i * sps->ctb_height) / pps->num_tile_rows;
+        }
+    }
+
+    pps->col_bd[0] = 0;
+    for (i = 0; i < pps->num_tile_columns; i++)
+        pps->col_bd[i + 1] = pps->col_bd[i] + pps->column_width[i];
+
+    pps->row_bd[0] = 0;
+    for (i = 0; i < pps->num_tile_rows; i++)
+        pps->row_bd[i + 1] = pps->row_bd[i] + pps->row_height[i];
+
+    for (i = 0, j = 0; i < sps->ctb_width; i++) {
+        if (i > pps->col_bd[j])
+            j++;
+        pps->col_idxX[i] = j;
+    }
+
+    /**
+     * 6.5
+     */
+    pic_area_in_ctbs     = sps->ctb_width    * sps->ctb_height;
+
+    pps->ctb_addr_rs_to_ts = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_rs_to_ts));
+    pps->ctb_addr_ts_to_rs = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_ts_to_rs));
+    pps->tile_id           = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->tile_id));
+    pps->min_tb_addr_zs_tab = av_malloc_array((sps->tb_mask+2) * (sps->tb_mask+2), sizeof(*pps->min_tb_addr_zs_tab));
+    if (!pps->ctb_addr_rs_to_ts || !pps->ctb_addr_ts_to_rs ||
+        !pps->tile_id || !pps->min_tb_addr_zs_tab) {
+        return AVERROR(ENOMEM);
+    }
+
+    for (ctb_addr_rs = 0; ctb_addr_rs < pic_area_in_ctbs; ctb_addr_rs++) {
+        int tb_x   = ctb_addr_rs % sps->ctb_width;
+        int tb_y   = ctb_addr_rs / sps->ctb_width;
+        int tile_x = 0;
+        int tile_y = 0;
+        int val    = 0;
+
+        for (i = 0; i < pps->num_tile_columns; i++) {
+            if (tb_x < pps->col_bd[i + 1]) {
+                tile_x = i;
+                break;
+            }
+        }
+
+        for (i = 0; i < pps->num_tile_rows; i++) {
+            if (tb_y < pps->row_bd[i + 1]) {
+                tile_y = i;
+                break;
+            }
+        }
+
+        for (i = 0; i < tile_x; i++)
+            val += pps->row_height[tile_y] * pps->column_width[i];
+        for (i = 0; i < tile_y; i++)
+            val += sps->ctb_width * pps->row_height[i];
+
+        val += (tb_y - pps->row_bd[tile_y]) * pps->column_width[tile_x] +
+               tb_x - pps->col_bd[tile_x];
+
+        pps->ctb_addr_rs_to_ts[ctb_addr_rs] = val;
+        pps->ctb_addr_ts_to_rs[val]         = ctb_addr_rs;
+    }
+
+    for (j = 0, tile_id = 0; j < pps->num_tile_rows; j++)
+        for (i = 0; i < pps->num_tile_columns; i++, tile_id++)
+            for (y = pps->row_bd[j]; y < pps->row_bd[j + 1]; y++)
+                for (x = pps->col_bd[i]; x < pps->col_bd[i + 1]; x++)
+                    pps->tile_id[pps->ctb_addr_rs_to_ts[y * sps->ctb_width + x]] = tile_id;
+
+    pps->tile_pos_rs = av_malloc_array(tile_id, sizeof(*pps->tile_pos_rs));
+    if (!pps->tile_pos_rs)
+        return AVERROR(ENOMEM);
+
+    for (j = 0; j < pps->num_tile_rows; j++)
+        for (i = 0; i < pps->num_tile_columns; i++)
+            pps->tile_pos_rs[j * pps->num_tile_columns + i] =
+                pps->row_bd[j] * sps->ctb_width + pps->col_bd[i];
+
+    log2_diff = sps->log2_ctb_size - sps->log2_min_tb_size;
+    pps->min_tb_addr_zs = &pps->min_tb_addr_zs_tab[1*(sps->tb_mask+2)+1];
+    for (y = 0; y < sps->tb_mask+2; y++) {
+        pps->min_tb_addr_zs_tab[y*(sps->tb_mask+2)] = -1;
+        pps->min_tb_addr_zs_tab[y]    = -1;
+    }
+    for (y = 0; y < sps->tb_mask+1; y++) {
+        for (x = 0; x < sps->tb_mask+1; x++) {
+            int tb_x = x >> log2_diff;
+            int tb_y = y >> log2_diff;
+            int rs   = sps->ctb_width * tb_y + tb_x;
+            int val  = pps->ctb_addr_rs_to_ts[rs] << (log2_diff * 2);
+            for (i = 0; i < log2_diff; i++) {
+                int m = 1 << i;
+                val += (m & x ? m * m : 0) + (m & y ? 2 * m * m : 0);
+            }
+            pps->min_tb_addr_zs[y * (sps->tb_mask+2) + x] = val;
+        }
+    }
+
+    return 0;
+}
+
+int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
+                           HEVCParamSets *ps)
+{
+    HEVCSPS      *sps = NULL;
+    int i, ret = 0;
     unsigned int pps_id = 0;
 
     AVBufferRef *pps_buf;
@@ -1271,7 +1406,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
         return AVERROR(ENOMEM);
     }
 
-    av_log(s->avctx, AV_LOG_DEBUG, "Decoding PPS\n");
+    av_log(avctx, AV_LOG_DEBUG, "Decoding PPS\n");
 
     // Default values
     pps->loop_filter_across_tiles_enabled_flag = 1;
@@ -1286,22 +1421,22 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
     // Coded parameters
     pps_id = get_ue_golomb_long(gb);
     if (pps_id >= MAX_PPS_COUNT) {
-        av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
+        av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
         ret = AVERROR_INVALIDDATA;
         goto err;
     }
     pps->sps_id = get_ue_golomb_long(gb);
     if (pps->sps_id >= MAX_SPS_COUNT) {
-        av_log(s->avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", pps->sps_id);
+        av_log(avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", pps->sps_id);
         ret = AVERROR_INVALIDDATA;
         goto err;
     }
-    if (!s->sps_list[pps->sps_id]) {
-        av_log(s->avctx, AV_LOG_ERROR, "SPS %u does not exist.\n", pps->sps_id);
+    if (!ps->sps_list[pps->sps_id]) {
+        av_log(avctx, AV_LOG_ERROR, "SPS %u does not exist.\n", pps->sps_id);
         ret = AVERROR_INVALIDDATA;
         goto err;
     }
-    sps = (HEVCSPS *)s->sps_list[pps->sps_id]->data;
+    sps = (HEVCSPS *)ps->sps_list[pps->sps_id]->data;
 
     pps->dependent_slice_segments_enabled_flag = get_bits1(gb);
     pps->output_flag_present_flag              = get_bits1(gb);
@@ -1326,7 +1461,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
 
     if (pps->diff_cu_qp_delta_depth < 0 ||
         pps->diff_cu_qp_delta_depth > sps->log2_diff_max_min_coding_block_size) {
-        av_log(s->avctx, AV_LOG_ERROR, "diff_cu_qp_delta_depth %d is invalid\n",
+        av_log(avctx, AV_LOG_ERROR, "diff_cu_qp_delta_depth %d is invalid\n",
                pps->diff_cu_qp_delta_depth);
         ret = AVERROR_INVALIDDATA;
         goto err;
@@ -1334,14 +1469,14 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
 
     pps->cb_qp_offset = get_se_golomb(gb);
     if (pps->cb_qp_offset < -12 || pps->cb_qp_offset > 12) {
-        av_log(s->avctx, AV_LOG_ERROR, "pps_cb_qp_offset out of range: %d\n",
+        av_log(avctx, AV_LOG_ERROR, "pps_cb_qp_offset out of range: %d\n",
                pps->cb_qp_offset);
         ret = AVERROR_INVALIDDATA;
         goto err;
     }
     pps->cr_qp_offset = get_se_golomb(gb);
     if (pps->cr_qp_offset < -12 || pps->cr_qp_offset > 12) {
-        av_log(s->avctx, AV_LOG_ERROR, "pps_cr_qp_offset out of range: %d\n",
+        av_log(avctx, AV_LOG_ERROR, "pps_cr_qp_offset out of range: %d\n",
                pps->cr_qp_offset);
         ret = AVERROR_INVALIDDATA;
         goto err;
@@ -1360,14 +1495,14 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
         pps->num_tile_rows    = get_ue_golomb_long(gb) + 1;
         if (pps->num_tile_columns <= 0 ||
             pps->num_tile_columns >= sps->width) {
-            av_log(s->avctx, AV_LOG_ERROR, "num_tile_columns_minus1 out of range: %d\n",
+            av_log(avctx, AV_LOG_ERROR, "num_tile_columns_minus1 out of range: %d\n",
                    pps->num_tile_columns - 1);
             ret = AVERROR_INVALIDDATA;
             goto err;
         }
         if (pps->num_tile_rows <= 0 ||
             pps->num_tile_rows >= sps->height) {
-            av_log(s->avctx, AV_LOG_ERROR, "num_tile_rows_minus1 out of range: %d\n",
+            av_log(avctx, AV_LOG_ERROR, "num_tile_rows_minus1 out of range: %d\n",
                    pps->num_tile_rows - 1);
             ret = AVERROR_INVALIDDATA;
             goto err;
@@ -1388,7 +1523,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
                 sum                 += pps->column_width[i];
             }
             if (sum >= sps->ctb_width) {
-                av_log(s->avctx, AV_LOG_ERROR, "Invalid tile widths.\n");
+                av_log(avctx, AV_LOG_ERROR, "Invalid tile widths.\n");
                 ret = AVERROR_INVALIDDATA;
                 goto err;
             }
@@ -1400,7 +1535,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
                 sum               += pps->row_height[i];
             }
             if (sum >= sps->ctb_height) {
-                av_log(s->avctx, AV_LOG_ERROR, "Invalid tile heights.\n");
+                av_log(avctx, AV_LOG_ERROR, "Invalid tile heights.\n");
                 ret = AVERROR_INVALIDDATA;
                 goto err;
             }
@@ -1419,13 +1554,13 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
             pps->beta_offset = get_se_golomb(gb) * 2;
             pps->tc_offset = get_se_golomb(gb) * 2;
             if (pps->beta_offset/2 < -6 || pps->beta_offset/2 > 6) {
-                av_log(s->avctx, AV_LOG_ERROR, "pps_beta_offset_div2 out of range: %d\n",
+                av_log(avctx, AV_LOG_ERROR, "pps_beta_offset_div2 out of range: %d\n",
                        pps->beta_offset/2);
                 ret = AVERROR_INVALIDDATA;
                 goto err;
             }
             if (pps->tc_offset/2 < -6 || pps->tc_offset/2 > 6) {
-                av_log(s->avctx, AV_LOG_ERROR, "pps_tc_offset_div2 out of range: %d\n",
+                av_log(avctx, AV_LOG_ERROR, "pps_tc_offset_div2 out of range: %d\n",
                        pps->tc_offset/2);
                 ret = AVERROR_INVALIDDATA;
                 goto err;
@@ -1436,14 +1571,14 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
     pps->scaling_list_data_present_flag = get_bits1(gb);
     if (pps->scaling_list_data_present_flag) {
         set_default_scaling_list_data(&pps->scaling_list);
-        ret = scaling_list_data(s, &pps->scaling_list, sps);
+        ret = scaling_list_data(gb, avctx, &pps->scaling_list, sps);
         if (ret < 0)
             goto err;
     }
     pps->lists_modification_present_flag = get_bits1(gb);
     pps->log2_parallel_merge_level       = get_ue_golomb_long(gb) + 2;
     if (pps->log2_parallel_merge_level > sps->log2_ctb_size) {
-        av_log(s->avctx, AV_LOG_ERROR, "log2_parallel_merge_level_minus2 out of range: %d\n",
+        av_log(avctx, AV_LOG_ERROR, "log2_parallel_merge_level_minus2 out of range: %d\n",
                pps->log2_parallel_merge_level - 2);
         ret = AVERROR_INVALIDDATA;
         goto err;
@@ -1455,148 +1590,23 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
         int pps_range_extensions_flag = get_bits1(gb);
         /* int pps_extension_7bits = */ get_bits(gb, 7);
         if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps_range_extensions_flag) {
-            if ((ret = pps_range_extensions(s, pps, sps)) < 0)
+            if ((ret = pps_range_extensions(gb, avctx, pps, sps)) < 0)
                 goto err;
         }
     }
 
-    // Inferred parameters
-    pps->col_bd   = av_malloc_array(pps->num_tile_columns + 1, sizeof(*pps->col_bd));
-    pps->row_bd   = av_malloc_array(pps->num_tile_rows + 1,    sizeof(*pps->row_bd));
-    pps->col_idxX = av_malloc_array(sps->ctb_width,    sizeof(*pps->col_idxX));
-    if (!pps->col_bd || !pps->row_bd || !pps->col_idxX) {
-        ret = AVERROR(ENOMEM);
-        goto err;
-    }
-
-    if (pps->uniform_spacing_flag) {
-        if (!pps->column_width) {
-            pps->column_width = av_malloc_array(pps->num_tile_columns, sizeof(*pps->column_width));
-            pps->row_height   = av_malloc_array(pps->num_tile_rows,    sizeof(*pps->row_height));
-        }
-        if (!pps->column_width || !pps->row_height) {
-            ret = AVERROR(ENOMEM);
-            goto err;
-        }
-
-        for (i = 0; i < pps->num_tile_columns; i++) {
-            pps->column_width[i] = ((i + 1) * sps->ctb_width) / pps->num_tile_columns -
-                                   (i * sps->ctb_width) / pps->num_tile_columns;
-        }
-
-        for (i = 0; i < pps->num_tile_rows; i++) {
-            pps->row_height[i] = ((i + 1) * sps->ctb_height) / pps->num_tile_rows -
-                                 (i * sps->ctb_height) / pps->num_tile_rows;
-        }
-    }
-
-    pps->col_bd[0] = 0;
-    for (i = 0; i < pps->num_tile_columns; i++)
-        pps->col_bd[i + 1] = pps->col_bd[i] + pps->column_width[i];
-
-    pps->row_bd[0] = 0;
-    for (i = 0; i < pps->num_tile_rows; i++)
-        pps->row_bd[i + 1] = pps->row_bd[i] + pps->row_height[i];
-
-    for (i = 0, j = 0; i < sps->ctb_width; i++) {
-        if (i > pps->col_bd[j])
-            j++;
-        pps->col_idxX[i] = j;
-    }
-
-    /**
-     * 6.5
-     */
-    pic_area_in_ctbs     = sps->ctb_width    * sps->ctb_height;
-
-    pps->ctb_addr_rs_to_ts = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_rs_to_ts));
-    pps->ctb_addr_ts_to_rs = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_ts_to_rs));
-    pps->tile_id           = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->tile_id));
-    pps->min_tb_addr_zs_tab = av_malloc_array((sps->tb_mask+2) * (sps->tb_mask+2), sizeof(*pps->min_tb_addr_zs_tab));
-    if (!pps->ctb_addr_rs_to_ts || !pps->ctb_addr_ts_to_rs ||
-        !pps->tile_id || !pps->min_tb_addr_zs_tab) {
-        ret = AVERROR(ENOMEM);
+    ret = setup_pps(avctx, gb, pps, sps);
+    if (ret < 0)
         goto err;
-    }
-
-    for (ctb_addr_rs = 0; ctb_addr_rs < pic_area_in_ctbs; ctb_addr_rs++) {
-        int tb_x   = ctb_addr_rs % sps->ctb_width;
-        int tb_y   = ctb_addr_rs / sps->ctb_width;
-        int tile_x = 0;
-        int tile_y = 0;
-        int val    = 0;
-
-        for (i = 0; i < pps->num_tile_columns; i++) {
-            if (tb_x < pps->col_bd[i + 1]) {
-                tile_x = i;
-                break;
-            }
-        }
-
-        for (i = 0; i < pps->num_tile_rows; i++) {
-            if (tb_y < pps->row_bd[i + 1]) {
-                tile_y = i;
-                break;
-            }
-        }
-
-        for (i = 0; i < tile_x; i++)
-            val += pps->row_height[tile_y] * pps->column_width[i];
-        for (i = 0; i < tile_y; i++)
-            val += sps->ctb_width * pps->row_height[i];
-
-        val += (tb_y - pps->row_bd[tile_y]) * pps->column_width[tile_x] +
-               tb_x - pps->col_bd[tile_x];
-
-        pps->ctb_addr_rs_to_ts[ctb_addr_rs] = val;
-        pps->ctb_addr_ts_to_rs[val]         = ctb_addr_rs;
-    }
-
-    for (j = 0, tile_id = 0; j < pps->num_tile_rows; j++)
-        for (i = 0; i < pps->num_tile_columns; i++, tile_id++)
-            for (y = pps->row_bd[j]; y < pps->row_bd[j + 1]; y++)
-                for (x = pps->col_bd[i]; x < pps->col_bd[i + 1]; x++)
-                    pps->tile_id[pps->ctb_addr_rs_to_ts[y * sps->ctb_width + x]] = tile_id;
-
-    pps->tile_pos_rs = av_malloc_array(tile_id, sizeof(*pps->tile_pos_rs));
-    if (!pps->tile_pos_rs) {
-        ret = AVERROR(ENOMEM);
-        goto err;
-    }
-
-    for (j = 0; j < pps->num_tile_rows; j++)
-        for (i = 0; i < pps->num_tile_columns; i++)
-            pps->tile_pos_rs[j * pps->num_tile_columns + i] = pps->row_bd[j] * sps->ctb_width + pps->col_bd[i];
-
-    log2_diff_ctb_min_tb_size = sps->log2_ctb_size - sps->log2_min_tb_size;
-    pps->min_tb_addr_zs = &pps->min_tb_addr_zs_tab[1*(sps->tb_mask+2)+1];
-    for (y = 0; y < sps->tb_mask+2; y++) {
-        pps->min_tb_addr_zs_tab[y*(sps->tb_mask+2)] = -1;
-        pps->min_tb_addr_zs_tab[y]    = -1;
-    }
-    for (y = 0; y < sps->tb_mask+1; y++) {
-        for (x = 0; x < sps->tb_mask+1; x++) {
-            int tb_x        = x >> log2_diff_ctb_min_tb_size;
-            int tb_y        = y >> log2_diff_ctb_min_tb_size;
-            int ctb_addr_rs = sps->ctb_width * tb_y + tb_x;
-            int val         = pps->ctb_addr_rs_to_ts[ctb_addr_rs] <<
-                              (log2_diff_ctb_min_tb_size * 2);
-            for (i = 0; i < log2_diff_ctb_min_tb_size; i++) {
-                int m = 1 << i;
-                val += (m & x ? m * m : 0) + (m & y ? 2 * m * m : 0);
-            }
-            pps->min_tb_addr_zs[y * (sps->tb_mask+2) + x] = val;
-        }
-    }
 
     if (get_bits_left(gb) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR,
+        av_log(avctx, AV_LOG_ERROR,
                "Overread PPS by %d bits\n", -get_bits_left(gb));
         goto err;
     }
 
-    remove_pps(s, pps_id);
-    s->pps_list[pps_id] = pps_buf;
+    remove_pps(ps, pps_id);
+    ps->pps_list[pps_id] = pps_buf;
 
     return 0;
 
diff --git a/libavcodec/hevc_ps_enc.c b/libavcodec/hevc_ps_enc.c
new file mode 100644
index 00000000..c05bf63d
--- /dev/null
+++ b/libavcodec/hevc_ps_enc.c
@@ -0,0 +1,116 @@
+/*
+ * HEVC Parameter Set encoding
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "golomb.h"
+#include "hevc.h"
+#include "put_bits.h"
+
+static void write_ptl_layer(PutBitContext *pb, PTLCommon *ptl)
+{
+    int i;
+
+    put_bits(pb, 2, ptl->profile_space);
+    put_bits(pb, 1, ptl->tier_flag);
+    put_bits(pb, 5, ptl->profile_idc);
+    for (i = 0; i < 32; i++)
+        put_bits(pb, 1, ptl->profile_compatibility_flag[i]);
+    put_bits(pb, 1, ptl->progressive_source_flag);
+    put_bits(pb, 1, ptl->interlaced_source_flag);
+    put_bits(pb, 1, ptl->non_packed_constraint_flag);
+    put_bits(pb, 1, ptl->frame_only_constraint_flag);
+    put_bits32(pb, 0);   // reserved
+    put_bits(pb, 12, 0); // reserved
+}
+
+static void write_ptl(PutBitContext *pb, PTL *ptl, int max_num_sub_layers)
+{
+    int i;
+
+    write_ptl_layer(pb, &ptl->general_ptl);
+    put_bits(pb, 8, ptl->general_ptl.level_idc);
+
+    for (i = 0; i < max_num_sub_layers - 1; i++) {
+        put_bits(pb, 1, ptl->sub_layer_profile_present_flag[i]);
+        put_bits(pb, 1, ptl->sub_layer_level_present_flag[i]);
+    }
+
+    if (max_num_sub_layers > 1)
+        for (i = max_num_sub_layers - 1; i < 8; i++)
+            put_bits(pb, 2, 0); // reserved
+
+    for (i = 0; i < max_num_sub_layers - 1; i++) {
+        if (ptl->sub_layer_profile_present_flag[i])
+            write_ptl_layer(pb, &ptl->sub_layer_ptl[i]);
+        if (ptl->sub_layer_level_present_flag[i])
+            put_bits(pb, 8, ptl->sub_layer_ptl[i].level_idc);
+    }
+}
+
+int ff_hevc_encode_nal_vps(HEVCVPS *vps, unsigned int id,
+                           uint8_t *buf, int buf_size)
+{
+    PutBitContext pb;
+    int i;
+
+    init_put_bits(&pb, buf, buf_size);
+    put_bits(&pb,  4, id);
+    put_bits(&pb,  2, 3);                               // reserved
+    put_bits(&pb,  6, vps->vps_max_layers - 1);
+    put_bits(&pb,  3, vps->vps_max_sub_layers - 1);
+    put_bits(&pb,  1, vps->vps_temporal_id_nesting_flag);
+    put_bits(&pb, 16, 0xffff);                          // reserved
+
+    write_ptl(&pb, &vps->ptl, vps->vps_max_sub_layers);
+
+    put_bits(&pb, 1, vps->vps_sub_layer_ordering_info_present_flag);
+    for (i = vps->vps_sub_layer_ordering_info_present_flag ? 0 : vps->vps_max_layers - 1;
+         i < vps->vps_max_sub_layers; i++) {
+        set_ue_golomb(&pb, vps->vps_max_dec_pic_buffering[i] - 1);
+        set_ue_golomb(&pb, vps->vps_num_reorder_pics[i]);
+        set_ue_golomb(&pb, vps->vps_max_latency_increase[i] + 1);
+    }
+
+    put_bits(&pb, 6, vps->vps_max_layer_id);
+    set_ue_golomb(&pb, vps->vps_num_layer_sets - 1);
+
+    // writing layer_id_included_flag not supported
+    if (vps->vps_num_layer_sets > 1)
+        return AVERROR_PATCHWELCOME;
+
+    put_bits(&pb, 1, vps->vps_timing_info_present_flag);
+    if (vps->vps_timing_info_present_flag) {
+        put_bits32(&pb, vps->vps_num_units_in_tick);
+        put_bits32(&pb, vps->vps_time_scale);
+        put_bits(&pb, 1, vps->vps_poc_proportional_to_timing_flag);
+        if (vps->vps_poc_proportional_to_timing_flag)
+            set_ue_golomb(&pb, vps->vps_num_ticks_poc_diff_one - 1);
+
+        // writing HRD parameters not supported
+        if (vps->vps_num_hrd_parameters)
+            return AVERROR_PATCHWELCOME;
+    }
+
+    put_bits(&pb, 1, 0);    // extension flag
+
+    put_bits(&pb, 1, 1);    // stop bit
+    avpriv_align_put_bits(&pb);
+
+    return put_bits_count(&pb) / 8;
+}
diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
index fea3d125..611ad458 100644
--- a/libavcodec/hevc_refs.c
+++ b/libavcodec/hevc_refs.c
@@ -55,10 +55,10 @@ void ff_hevc_unref_frame(HEVCContext *s, HEVCFrame *frame, int flags)
 
 RefPicList *ff_hevc_get_ref_list(HEVCContext *s, HEVCFrame *ref, int x0, int y0)
 {
-    int x_cb         = x0 >> s->sps->log2_ctb_size;
-    int y_cb         = y0 >> s->sps->log2_ctb_size;
-    int pic_width_cb = s->sps->ctb_width;
-    int ctb_addr_ts  = s->pps->ctb_addr_rs_to_ts[y_cb * pic_width_cb + x_cb];
+    int x_cb         = x0 >> s->ps.sps->log2_ctb_size;
+    int y_cb         = y0 >> s->ps.sps->log2_ctb_size;
+    int pic_width_cb = s->ps.sps->ctb_width;
+    int ctb_addr_ts  = s->ps.pps->ctb_addr_rs_to_ts[y_cb * pic_width_cb + x_cb];
     return (RefPicList *)ref->rpl_tab[ctb_addr_ts];
 }
 
@@ -91,7 +91,7 @@ static HEVCFrame *alloc_frame(HEVCContext *s)
         if (ret < 0)
             return NULL;
 
-        frame->rpl_buf = av_buffer_allocz(s->nb_nals * sizeof(RefPicListTab));
+        frame->rpl_buf = av_buffer_allocz(s->pkt.nb_nals * sizeof(RefPicListTab));
         if (!frame->rpl_buf)
             goto fail;
 
@@ -104,7 +104,7 @@ static HEVCFrame *alloc_frame(HEVCContext *s)
         if (!frame->rpl_tab_buf)
             goto fail;
         frame->rpl_tab   = (RefPicListTab **)frame->rpl_tab_buf->data;
-        frame->ctb_count = s->sps->ctb_width * s->sps->ctb_height;
+        frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
         for (j = 0; j < frame->ctb_count; j++)
             frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
 
@@ -162,7 +162,7 @@ int ff_hevc_set_new_ref(HEVCContext *s, AVFrame **frame, int poc)
 
     ref->poc      = poc;
     ref->sequence = s->seq_decode;
-    ref->window   = s->sps->output_window;
+    ref->window   = s->ps.sps->output_window;
 
     return 0;
 }
@@ -174,7 +174,7 @@ int ff_hevc_output_frame(HEVCContext *s, AVFrame *out, int flush)
         int min_poc   = INT_MAX;
         int i, min_idx, ret;
 
-        if (s->sh.no_output_of_prior_pics_flag == 1) {
+        if (s->sh.no_output_of_prior_pics_flag == 1 && s->no_rasl_output_flag == 1) {
             for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
                 HEVCFrame *frame = &s->DPB[i];
                 if (!(frame->flags & HEVC_FRAME_FLAG_BUMPING) && frame->poc != s->poc &&
@@ -197,8 +197,8 @@ int ff_hevc_output_frame(HEVCContext *s, AVFrame *out, int flush)
         }
 
         /* wait for more frames before output */
-        if (!flush && s->seq_output == s->seq_decode && s->sps &&
-            nb_output <= s->sps->temporal_layer[s->sps->max_sub_layers - 1].num_reorder_pics)
+        if (!flush && s->seq_output == s->seq_decode && s->ps.sps &&
+            nb_output <= s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].num_reorder_pics)
             return 0;
 
         if (nb_output) {
@@ -206,7 +206,7 @@ int ff_hevc_output_frame(HEVCContext *s, AVFrame *out, int flush)
             AVFrame *dst = out;
             AVFrame *src = frame->frame;
             const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(src->format);
-            int pixel_shift = !!(desc->comp[0].depth_minus1 > 7);
+            int pixel_shift = !!(desc->comp[0].depth > 8);
 
             ret = av_frame_ref(out, src);
             if (frame->flags & HEVC_FRAME_FLAG_BUMPING)
@@ -252,7 +252,7 @@ void ff_hevc_bump_frame(HEVCContext *s)
         }
     }
 
-    if (s->sps && dpb >= s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering) {
+    if (s->ps.sps && dpb >= s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering) {
         for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
             HEVCFrame *frame = &s->DPB[i];
             if ((frame->flags) &&
@@ -281,7 +281,7 @@ static int init_slice_rpl(HEVCContext *s)
 {
     HEVCFrame *frame = s->ref;
     int ctb_count    = frame->ctb_count;
-    int ctb_addr_ts  = s->pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
+    int ctb_addr_ts  = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
     int i;
 
     if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab))
@@ -368,7 +368,7 @@ int ff_hevc_slice_rpl(HEVCContext *s)
 static HEVCFrame *find_ref_idx(HEVCContext *s, int poc)
 {
     int i;
-    int LtMask = (1 << s->sps->log2_max_poc_lsb) - 1;
+    int LtMask = (1 << s->ps.sps->log2_max_poc_lsb) - 1;
 
     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
         HEVCFrame *ref = &s->DPB[i];
@@ -408,16 +408,16 @@ static HEVCFrame *generate_missing_ref(HEVCContext *s, int poc)
         return NULL;
 
     if (!s->avctx->hwaccel) {
-        if (!s->sps->pixel_shift) {
+        if (!s->ps.sps->pixel_shift) {
             for (i = 0; frame->frame->buf[i]; i++)
-                memset(frame->frame->buf[i]->data, 1 << (s->sps->bit_depth - 1),
+                memset(frame->frame->buf[i]->data, 1 << (s->ps.sps->bit_depth - 1),
                        frame->frame->buf[i]->size);
         } else {
             for (i = 0; frame->frame->data[i]; i++)
-                for (y = 0; y < (s->sps->height >> s->sps->vshift[i]); y++)
-                    for (x = 0; x < (s->sps->width >> s->sps->hshift[i]); x++) {
+                for (y = 0; y < (s->ps.sps->height >> s->ps.sps->vshift[i]); y++)
+                    for (x = 0; x < (s->ps.sps->width >> s->ps.sps->hshift[i]); x++) {
                         AV_WN16(frame->frame->data[i] + y * frame->frame->linesize[i] + 2 * x,
-                                1 << (s->sps->bit_depth - 1));
+                                1 << (s->ps.sps->bit_depth - 1));
                     }
         }
     }
@@ -517,7 +517,7 @@ int ff_hevc_frame_rps(HEVCContext *s)
 
 int ff_hevc_compute_poc(HEVCContext *s, int poc_lsb)
 {
-    int max_poc_lsb  = 1 << s->sps->log2_max_poc_lsb;
+    int max_poc_lsb  = 1 << s->ps.sps->log2_max_poc_lsb;
     int prev_poc_lsb = s->pocTid0 % max_poc_lsb;
     int prev_poc_msb = s->pocTid0 - prev_poc_lsb;
     int poc_msb;
diff --git a/libavcodec/hevc_sei.c b/libavcodec/hevc_sei.c
index 13ebcd3e..40685fe5 100644
--- a/libavcodec/hevc_sei.c
+++ b/libavcodec/hevc_sei.c
@@ -25,7 +25,35 @@
 #include "golomb.h"
 #include "hevc.h"
 
-static void decode_nal_sei_decoded_picture_hash(HEVCContext *s)
+enum HEVC_SEI_TYPE {
+    SEI_TYPE_BUFFERING_PERIOD                     = 0,
+    SEI_TYPE_PICTURE_TIMING                       = 1,
+    SEI_TYPE_PAN_SCAN_RECT                        = 2,
+    SEI_TYPE_FILLER_PAYLOAD                       = 3,
+    SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35       = 4,
+    SEI_TYPE_USER_DATA_UNREGISTERED               = 5,
+    SEI_TYPE_RECOVERY_POINT                       = 6,
+    SEI_TYPE_SCENE_INFO                           = 9,
+    SEI_TYPE_FULL_FRAME_SNAPSHOT                  = 15,
+    SEI_TYPE_PROGRESSIVE_REFINEMENT_SEGMENT_START = 16,
+    SEI_TYPE_PROGRESSIVE_REFINEMENT_SEGMENT_END   = 17,
+    SEI_TYPE_FILM_GRAIN_CHARACTERISTICS           = 19,
+    SEI_TYPE_POST_FILTER_HINT                     = 22,
+    SEI_TYPE_TONE_MAPPING_INFO                    = 23,
+    SEI_TYPE_FRAME_PACKING                        = 45,
+    SEI_TYPE_DISPLAY_ORIENTATION                  = 47,
+    SEI_TYPE_SOP_DESCRIPTION                      = 128,
+    SEI_TYPE_ACTIVE_PARAMETER_SETS                = 129,
+    SEI_TYPE_DECODING_UNIT_INFO                   = 130,
+    SEI_TYPE_TEMPORAL_LEVEL0_INDEX                = 131,
+    SEI_TYPE_DECODED_PICTURE_HASH                 = 132,
+    SEI_TYPE_SCALABLE_NESTING                     = 133,
+    SEI_TYPE_REGION_REFRESH_INFO                  = 134,
+    SEI_TYPE_MASTERING_DISPLAY_INFO               = 137,
+    SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO             = 144,
+};
+
+static int decode_nal_sei_decoded_picture_hash(HEVCContext *s)
 {
     int cIdx, i;
     uint8_t hash_type;
@@ -47,13 +75,38 @@ static void decode_nal_sei_decoded_picture_hash(HEVCContext *s)
             skip_bits(gb, 32);
         }
     }
+    return 0;
+}
+
+static int decode_nal_sei_mastering_display_info(HEVCContext *s)
+{
+    GetBitContext *gb = &s->HEVClc->gb;
+    int i;
+    // Mastering primaries
+    for (i = 0; i < 3; i++) {
+        s->display_primaries[i][0] = get_bits(gb, 16);
+        s->display_primaries[i][1] = get_bits(gb, 16);
+    }
+    // White point (x, y)
+    s->white_point[0] = get_bits(gb, 16);
+    s->white_point[1] = get_bits(gb, 16);
+
+    // Max and min luminance of mastering display
+    s->max_mastering_luminance = get_bits_long(gb, 32);
+    s->min_mastering_luminance = get_bits_long(gb, 32);
+
+    // As this SEI message comes before the first frame that references it,
+    // initialize the flag to 2 and decrement on IRAP access unit so it
+    // persists for the coded video sequence (e.g., between two IRAPs)
+    s->sei_mastering_display_info_present = 2;
+    return 0;
 }
 
-static void decode_nal_sei_frame_packing_arrangement(HEVCContext *s)
+static int decode_nal_sei_frame_packing_arrangement(HEVCContext *s)
 {
     GetBitContext *gb = &s->HEVClc->gb;
 
-    get_ue_golomb(gb);                  // frame_packing_arrangement_id
+    get_ue_golomb_long(gb);             // frame_packing_arrangement_id
     s->sei_frame_packing_present = !get_bits1(gb);
 
     if (s->sei_frame_packing_present) {
@@ -72,9 +125,10 @@ static void decode_nal_sei_frame_packing_arrangement(HEVCContext *s)
         skip_bits1(gb);         // frame_packing_arrangement_persistance_flag
     }
     skip_bits1(gb);             // upsampled_aspect_ratio_flag
+    return 0;
 }
 
-static void decode_nal_sei_display_orientation(HEVCContext *s)
+static int decode_nal_sei_display_orientation(HEVCContext *s)
 {
     GetBitContext *gb = &s->HEVClc->gb;
 
@@ -87,6 +141,8 @@ static void decode_nal_sei_display_orientation(HEVCContext *s)
         s->sei_anticlockwise_rotation = get_bits(gb, 16);
         skip_bits1(gb);     // display_orientation_persistence_flag
     }
+
+    return 0;
 }
 
 static int decode_pic_timing(HEVCContext *s)
@@ -94,9 +150,9 @@ static int decode_pic_timing(HEVCContext *s)
     GetBitContext *gb = &s->HEVClc->gb;
     HEVCSPS *sps;
 
-    if (!s->sps_list[s->active_seq_parameter_set_id])
+    if (!s->ps.sps_list[s->active_seq_parameter_set_id])
         return(AVERROR(ENOMEM));
-    sps = (HEVCSPS*)s->sps_list[s->active_seq_parameter_set_id]->data;
+    sps = (HEVCSPS*)s->ps.sps_list[s->active_seq_parameter_set_id]->data;
 
     if (sps->vui.frame_field_info_present_flag) {
         int pic_struct = get_bits(gb, 4);
@@ -114,6 +170,90 @@ static int decode_pic_timing(HEVCContext *s)
     return 1;
 }
 
+static int decode_registered_user_data_closed_caption(HEVCContext *s, int size)
+{
+    int flag;
+    int user_data_type_code;
+    int cc_count;
+
+    GetBitContext *gb = &s->HEVClc->gb;
+
+    if (size < 3)
+       return AVERROR(EINVAL);
+
+    user_data_type_code = get_bits(gb, 8);
+    if (user_data_type_code == 0x3) {
+        skip_bits(gb, 1); // reserved
+
+        flag = get_bits(gb, 1); // process_cc_data_flag
+        if (flag) {
+            skip_bits(gb, 1);
+            cc_count = get_bits(gb, 5);
+            skip_bits(gb, 8); // reserved
+            size -= 2;
+
+            if (cc_count && size >= cc_count * 3) {
+                const uint64_t new_size = (s->a53_caption_size + cc_count
+                                           * UINT64_C(3));
+                int i, ret;
+
+                if (new_size > INT_MAX)
+                    return AVERROR(EINVAL);
+
+                /* Allow merging of the cc data from two fields. */
+                ret = av_reallocp(&s->a53_caption, new_size);
+                if (ret < 0)
+                    return ret;
+
+                for (i = 0; i < cc_count; i++) {
+                    s->a53_caption[s->a53_caption_size++] = get_bits(gb, 8);
+                    s->a53_caption[s->a53_caption_size++] = get_bits(gb, 8);
+                    s->a53_caption[s->a53_caption_size++] = get_bits(gb, 8);
+                }
+                skip_bits(gb, 8); // marker_bits
+            }
+        }
+    } else {
+        int i;
+        for (i = 0; i < size - 1; i++)
+            skip_bits(gb, 8);
+    }
+
+    return 0;
+}
+
+static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCContext *s, int size)
+{
+    uint32_t country_code;
+    uint32_t user_identifier;
+
+    GetBitContext *gb = &s->HEVClc->gb;
+
+    if (size < 7)
+        return AVERROR(EINVAL);
+    size -= 7;
+
+    country_code = get_bits(gb, 8);
+    if (country_code == 0xFF) {
+        skip_bits(gb, 8);
+        size--;
+    }
+
+    skip_bits(gb, 8);
+    skip_bits(gb, 8);
+
+    user_identifier = get_bits_long(gb, 32);
+
+    switch (user_identifier) {
+        case MKBETAG('G', 'A', '9', '4'):
+            return decode_registered_user_data_closed_caption(s, size);
+        default:
+            skip_bits_long(gb, size * 8);
+            break;
+    }
+    return 0;
+}
+
 static int active_parameter_sets(HEVCContext *s)
 {
     GetBitContext *gb = &s->HEVClc->gb;
@@ -144,6 +284,53 @@ static int active_parameter_sets(HEVCContext *s)
     return 0;
 }
 
+static int decode_nal_sei_prefix(HEVCContext *s, int type, int size)
+{
+    GetBitContext *gb = &s->HEVClc->gb;
+
+    switch (type) {
+    case 256:  // Mismatched value from HM 8.1
+        return decode_nal_sei_decoded_picture_hash(s);
+    case SEI_TYPE_FRAME_PACKING:
+        return decode_nal_sei_frame_packing_arrangement(s);
+    case SEI_TYPE_DISPLAY_ORIENTATION:
+        return decode_nal_sei_display_orientation(s);
+    case SEI_TYPE_PICTURE_TIMING:
+        {
+            int ret = decode_pic_timing(s);
+            av_log(s->avctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", type);
+            skip_bits(gb, 8 * size);
+            return ret;
+        }
+    case SEI_TYPE_MASTERING_DISPLAY_INFO:
+        return decode_nal_sei_mastering_display_info(s);
+    case SEI_TYPE_ACTIVE_PARAMETER_SETS:
+        active_parameter_sets(s);
+        av_log(s->avctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", type);
+        return 0;
+    case SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35:
+        return decode_nal_sei_user_data_registered_itu_t_t35(s, size);
+    default:
+        av_log(s->avctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", type);
+        skip_bits_long(gb, 8 * size);
+        return 0;
+    }
+}
+
+static int decode_nal_sei_suffix(HEVCContext *s, int type, int size)
+{
+    GetBitContext *gb = &s->HEVClc->gb;
+
+    switch (type) {
+    case SEI_TYPE_DECODED_PICTURE_HASH:
+        return decode_nal_sei_decoded_picture_hash(s);
+    default:
+        av_log(s->avctx, AV_LOG_DEBUG, "Skipped SUFFIX SEI %d\n", type);
+        skip_bits_long(gb, 8 * size);
+        return 0;
+    }
+}
+
 static int decode_nal_sei_message(HEVCContext *s)
 {
     GetBitContext *gb = &s->HEVClc->gb;
@@ -163,31 +350,9 @@ static int decode_nal_sei_message(HEVCContext *s)
         payload_size += byte;
     }
     if (s->nal_unit_type == NAL_SEI_PREFIX) {
-        if (payload_type == 256 /*&& s->decode_checksum_sei*/) {
-            decode_nal_sei_decoded_picture_hash(s);
-        } else if (payload_type == 45) {
-            decode_nal_sei_frame_packing_arrangement(s);
-        } else if (payload_type == 47) {
-            decode_nal_sei_display_orientation(s);
-        } else if (payload_type == 1){
-            int ret = decode_pic_timing(s);
-            av_log(s->avctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", payload_type);
-            skip_bits(gb, 8 * payload_size);
-            return ret;
-        } else if (payload_type == 129){
-            active_parameter_sets(s);
-            av_log(s->avctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", payload_type);
-        } else {
-            av_log(s->avctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", payload_type);
-            skip_bits(gb, 8*payload_size);
-        }
+        return decode_nal_sei_prefix(s, payload_type, payload_size);
     } else { /* nal_unit_type == NAL_SEI_SUFFIX */
-        if (payload_type == 132 /* && s->decode_checksum_sei */)
-            decode_nal_sei_decoded_picture_hash(s);
-        else {
-            av_log(s->avctx, AV_LOG_DEBUG, "Skipped SUFFIX SEI %d\n", payload_type);
-            skip_bits(gb, 8 * payload_size);
-        }
+        return decode_nal_sei_suffix(s, payload_type, payload_size);
     }
     return 1;
 }
diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index be01e927..9d773d96 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -216,7 +216,7 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
     hevcdsp->sao_band_filter[1] =                                              \
     hevcdsp->sao_band_filter[2] =                                              \
     hevcdsp->sao_band_filter[3] =                                              \
-    hevcdsp->sao_band_filter[4] = FUNC(sao_band_filter_0, depth);              \
+    hevcdsp->sao_band_filter[4] = FUNC(sao_band_filter, depth);                \
     hevcdsp->sao_edge_filter[0] =                                              \
     hevcdsp->sao_edge_filter[1] =                                              \
     hevcdsp->sao_edge_filter[2] =                                              \
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index d2ea8672..9f1f6dd5 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -61,7 +61,7 @@ typedef struct HEVCDSPContext {
     void (*sao_band_filter[5])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
                                int16_t *sao_offset_val, int sao_left_class, int width, int height);
 
-    /* implicit stride_src parameter has value of 2 * MAX_PB_SIZE + FF_INPUT_BUFFER_PADDING_SIZE */
+    /* implicit stride_src parameter has value of 2 * MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE */
     void (*sao_edge_filter[5])(uint8_t *_dst /* align 16 */, uint8_t *_src /* align 32 */, ptrdiff_t stride_dst,
                                int16_t *sao_offset_val, int sao_eo_class, int width, int height);
 
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c
index cec28e45..b840d179 100644
--- a/libavcodec/hevcdsp_template.c
+++ b/libavcodec/hevcdsp_template.c
@@ -301,10 +301,10 @@ IDCT_DC(32)
 #undef SCALE
 #undef ADD_AND_SCALE
 
-static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src,
-                                    ptrdiff_t stride_dst, ptrdiff_t stride_src,
-                                    int16_t *sao_offset_val, int sao_left_class,
-                                    int width, int height)
+static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
+                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
+                                  int16_t *sao_offset_val, int sao_left_class,
+                                  int width, int height)
 {
     pixel *dst = (pixel *)_dst;
     pixel *src = (pixel *)_src;
@@ -341,7 +341,7 @@ static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride
     pixel *src = (pixel *)_src;
     int a_stride, b_stride;
     int x, y;
-    ptrdiff_t stride_src = (2*MAX_PB_SIZE + FF_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel);
+    ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel);
     stride_dst /= sizeof(pixel);
 
     a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
diff --git a/libavcodec/hevcpred.c b/libavcodec/hevcpred.c
index 4598229b..02c17660 100644
--- a/libavcodec/hevcpred.c
+++ b/libavcodec/hevcpred.c
@@ -74,4 +74,7 @@ void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth)
         HEVC_PRED(8);
         break;
     }
+
+    if (ARCH_MIPS)
+        ff_hevc_pred_init_mips(hpc, bit_depth);
 }
diff --git a/libavcodec/hevcpred.h b/libavcodec/hevcpred.h
index 7f14a76d..eb176636 100644
--- a/libavcodec/hevcpred.h
+++ b/libavcodec/hevcpred.h
@@ -41,5 +41,6 @@ typedef struct HEVCPredContext {
 } HEVCPredContext;
 
 void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth);
+void ff_hevc_pred_init_mips(HEVCPredContext *hpc, int bit_depth);
 
 #endif /* AVCODEC_HEVCPRED_H */
diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c
index 6b763b3a..6ae87cca 100644
--- a/libavcodec/hevcpred_template.c
+++ b/libavcodec/hevcpred_template.c
@@ -31,7 +31,7 @@ static av_always_inline void FUNC(intra_pred)(HEVCContext *s, int x0, int y0,
                                               int log2_size, int c_idx)
 {
 #define PU(x) \
-    ((x) >> s->sps->log2_min_pu_size)
+    ((x) >> s->ps.sps->log2_min_pu_size)
 #define MVF(x, y) \
     (s->ref->tab_mvf[(x) + (y) * min_pu_width])
 #define MVF_PU(x, y) \
@@ -39,7 +39,7 @@ static av_always_inline void FUNC(intra_pred)(HEVCContext *s, int x0, int y0,
 #define IS_INTRA(x, y) \
     (MVF_PU(x, y).pred_flag == PF_INTRA)
 #define MIN_TB_ADDR_ZS(x, y) \
-    s->pps->min_tb_addr_zs[(y) * (s->sps->tb_mask+2) + (x)]
+    s->ps.pps->min_tb_addr_zs[(y) * (s->ps.sps->tb_mask+2) + (x)]
 #define EXTEND(ptr, val, len)         \
 do {                                  \
     pixel4 pix = PIXEL_SPLAT_X4(val); \
@@ -72,24 +72,24 @@ do {                                  \
 
     HEVCLocalContext *lc = s->HEVClc;
     int i;
-    int hshift = s->sps->hshift[c_idx];
-    int vshift = s->sps->vshift[c_idx];
+    int hshift = s->ps.sps->hshift[c_idx];
+    int vshift = s->ps.sps->vshift[c_idx];
     int size = (1 << log2_size);
     int size_in_luma_h = size << hshift;
-    int size_in_tbs_h  = size_in_luma_h >> s->sps->log2_min_tb_size;
+    int size_in_tbs_h  = size_in_luma_h >> s->ps.sps->log2_min_tb_size;
     int size_in_luma_v = size << vshift;
-    int size_in_tbs_v  = size_in_luma_v >> s->sps->log2_min_tb_size;
+    int size_in_tbs_v  = size_in_luma_v >> s->ps.sps->log2_min_tb_size;
     int x = x0 >> hshift;
     int y = y0 >> vshift;
-    int x_tb = (x0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
-    int y_tb = (y0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
+    int x_tb = (x0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
+    int y_tb = (y0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
 
     int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb);
 
     ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(pixel);
     pixel *src = (pixel*)s->frame->data[c_idx] + x + y * stride;
 
-    int min_pu_width = s->sps->min_pu_width;
+    int min_pu_width = s->ps.sps->min_pu_width;
 
     enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c :
                               lc->tu.intra_pred_mode;
@@ -103,28 +103,28 @@ do {                                  \
     pixel  *top           = top_array  + 1;
     pixel  *filtered_left = filtered_left_array + 1;
     pixel  *filtered_top  = filtered_top_array  + 1;
-    int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS( x_tb - 1, (y_tb + size_in_tbs_v) & s->sps->tb_mask);
+    int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS( x_tb - 1, (y_tb + size_in_tbs_v) & s->ps.sps->tb_mask);
     int cand_left        = lc->na.cand_left;
     int cand_up_left     = lc->na.cand_up_left;
     int cand_up          = lc->na.cand_up;
-    int cand_up_right    = lc->na.cand_up_right    && cur_tb_addr > MIN_TB_ADDR_ZS((x_tb + size_in_tbs_h) & s->sps->tb_mask, y_tb - 1);
+    int cand_up_right    = lc->na.cand_up_right    && cur_tb_addr > MIN_TB_ADDR_ZS((x_tb + size_in_tbs_h) & s->ps.sps->tb_mask, y_tb - 1);
 
-    int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma_v, s->sps->height) -
+    int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma_v, s->ps.sps->height) -
                            (y0 + size_in_luma_v)) >> vshift;
-    int top_right_size   = (FFMIN(x0 + 2 * size_in_luma_h, s->sps->width) -
+    int top_right_size   = (FFMIN(x0 + 2 * size_in_luma_h, s->ps.sps->width) -
                            (x0 + size_in_luma_h)) >> hshift;
 
-    if (s->pps->constrained_intra_pred_flag == 1) {
+    if (s->ps.pps->constrained_intra_pred_flag == 1) {
         int size_in_luma_pu_v = PU(size_in_luma_v);
         int size_in_luma_pu_h = PU(size_in_luma_h);
-        int on_pu_edge_x    = !av_mod_uintp2(x0, s->sps->log2_min_pu_size);
-        int on_pu_edge_y    = !av_mod_uintp2(y0, s->sps->log2_min_pu_size);
+        int on_pu_edge_x    = !av_mod_uintp2(x0, s->ps.sps->log2_min_pu_size);
+        int on_pu_edge_y    = !av_mod_uintp2(y0, s->ps.sps->log2_min_pu_size);
         if (!size_in_luma_pu_h)
             size_in_luma_pu_h++;
         if (cand_bottom_left == 1 && on_pu_edge_x) {
             int x_left_pu   = PU(x0 - 1);
             int y_bottom_pu = PU(y0 + size_in_luma_v);
-            int max = FFMIN(size_in_luma_pu_v, s->sps->min_pu_height - y_bottom_pu);
+            int max = FFMIN(size_in_luma_pu_v, s->ps.sps->min_pu_height - y_bottom_pu);
             cand_bottom_left = 0;
             for (i = 0; i < max; i += 2)
                 cand_bottom_left |= (MVF(x_left_pu, y_bottom_pu + i).pred_flag == PF_INTRA);
@@ -132,7 +132,7 @@ do {                                  \
         if (cand_left == 1 && on_pu_edge_x) {
             int x_left_pu   = PU(x0 - 1);
             int y_left_pu   = PU(y0);
-            int max = FFMIN(size_in_luma_pu_v, s->sps->min_pu_height - y_left_pu);
+            int max = FFMIN(size_in_luma_pu_v, s->ps.sps->min_pu_height - y_left_pu);
             cand_left = 0;
             for (i = 0; i < max; i += 2)
                 cand_left |= (MVF(x_left_pu, y_left_pu + i).pred_flag == PF_INTRA);
@@ -145,7 +145,7 @@ do {                                  \
         if (cand_up == 1 && on_pu_edge_y) {
             int x_top_pu    = PU(x0);
             int y_top_pu    = PU(y0 - 1);
-            int max = FFMIN(size_in_luma_pu_h, s->sps->min_pu_width - x_top_pu);
+            int max = FFMIN(size_in_luma_pu_h, s->ps.sps->min_pu_width - x_top_pu);
             cand_up = 0;
             for (i = 0; i < max; i += 2)
                 cand_up |= (MVF(x_top_pu + i, y_top_pu).pred_flag == PF_INTRA);
@@ -153,7 +153,7 @@ do {                                  \
         if (cand_up_right == 1 && on_pu_edge_y) {
             int y_top_pu    = PU(y0 - 1);
             int x_right_pu  = PU(x0 + size_in_luma_h);
-            int max = FFMIN(size_in_luma_pu_h, s->sps->min_pu_width - x_right_pu);
+            int max = FFMIN(size_in_luma_pu_h, s->ps.sps->min_pu_width - x_right_pu);
             cand_up_right = 0;
             for (i = 0; i < max; i += 2)
                 cand_up_right |= (MVF(x_right_pu + i, y_top_pu).pred_flag == PF_INTRA);
@@ -183,20 +183,20 @@ do {                                  \
                size - bottom_left_size);
     }
 
-    if (s->pps->constrained_intra_pred_flag == 1) {
+    if (s->ps.pps->constrained_intra_pred_flag == 1) {
         if (cand_bottom_left || cand_left || cand_up_left || cand_up || cand_up_right) {
-            int size_max_x = x0 + ((2 * size) << hshift) < s->sps->width ?
-                                    2 * size : (s->sps->width - x0) >> hshift;
-            int size_max_y = y0 + ((2 * size) << vshift) < s->sps->height ?
-                                    2 * size : (s->sps->height - y0) >> vshift;
+            int size_max_x = x0 + ((2 * size) << hshift) < s->ps.sps->width ?
+                                    2 * size : (s->ps.sps->width - x0) >> hshift;
+            int size_max_y = y0 + ((2 * size) << vshift) < s->ps.sps->height ?
+                                    2 * size : (s->ps.sps->height - y0) >> vshift;
             int j = size + (cand_bottom_left? bottom_left_size: 0) -1;
             if (!cand_up_right) {
-                size_max_x = x0 + ((size) << hshift) < s->sps->width ?
-                                                    size : (s->sps->width - x0) >> hshift;
+                size_max_x = x0 + ((size) << hshift) < s->ps.sps->width ?
+                                                    size : (s->ps.sps->width - x0) >> hshift;
             }
             if (!cand_bottom_left) {
-                size_max_y = y0 + (( size) << vshift) < s->sps->height ?
-                                                     size : (s->sps->height - y0) >> vshift;
+                size_max_y = y0 + (( size) << vshift) < s->ps.sps->height ?
+                                                     size : (s->ps.sps->height - y0) >> vshift;
             }
             if (cand_bottom_left || cand_left || cand_up_left) {
                 while (j > -1 && !IS_INTRA(-1, j))
@@ -287,14 +287,14 @@ do {                                  \
     top[-1] = left[-1];
 
     // Filtering process
-    if (!s->sps->intra_smoothing_disabled_flag && (c_idx == 0  || s->sps->chroma_format_idc == 3)) {
+    if (!s->ps.sps->intra_smoothing_disabled_flag && (c_idx == 0  || s->ps.sps->chroma_format_idc == 3)) {
         if (mode != INTRA_DC && size != 4){
             int intra_hor_ver_dist_thresh[] = { 7, 1, 0 };
             int min_dist_vert_hor = FFMIN(FFABS((int)(mode - 26U)),
                                           FFABS((int)(mode - 10U)));
             if (min_dist_vert_hor > intra_hor_ver_dist_thresh[log2_size - 3]) {
                 int threshold = 1 << (BIT_DEPTH - 5);
-                if (s->sps->sps_strong_intra_smoothing_enable_flag && c_idx == 0 &&
+                if (s->ps.sps->sps_strong_intra_smoothing_enable_flag && c_idx == 0 &&
                     log2_size == 5 &&
                     FFABS(top[-1]  + top[63]  - 2 * top[31])  < threshold &&
                     FFABS(left[-1] + left[63] - 2 * left[31]) < threshold) {
diff --git a/libavcodec/hnm4video.c b/libavcodec/hnm4video.c
index 31995bc7..a64dbb17 100644
--- a/libavcodec/hnm4video.c
+++ b/libavcodec/hnm4video.c
@@ -510,5 +510,5 @@ AVCodec ff_hnm4_video_decoder = {
     .init           = hnm_decode_init,
     .close          = hnm_decode_end,
     .decode         = hnm_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/hpeldsp.c b/libavcodec/hpeldsp.c
index 7763760e..8e2fd8fc 100644
--- a/libavcodec/hpeldsp.c
+++ b/libavcodec/hpeldsp.c
@@ -365,4 +365,6 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
         ff_hpeldsp_init_ppc(c, flags);
     if (ARCH_X86)
         ff_hpeldsp_init_x86(c, flags);
+    if (ARCH_MIPS)
+        ff_hpeldsp_init_mips(c, flags);
 }
diff --git a/libavcodec/hpeldsp.h b/libavcodec/hpeldsp.h
index 07c293ae..1a3cea54 100644
--- a/libavcodec/hpeldsp.h
+++ b/libavcodec/hpeldsp.h
@@ -99,5 +99,6 @@ void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags);
 void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags);
 void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags);
 void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags);
 
 #endif /* AVCODEC_HPELDSP_H */
diff --git a/libavcodec/hq_hqa.c b/libavcodec/hq_hqa.c
index 44092e7c..3ef83d4e 100644
--- a/libavcodec/hq_hqa.c
+++ b/libavcodec/hq_hqa.c
@@ -307,9 +307,11 @@ static int hq_hqa_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_INVALIDDATA;
     }
 
-    info_tag = bytestream2_get_le32(&ctx->gbc);
+    info_tag = bytestream2_peek_le32(&ctx->gbc);
     if (info_tag == MKTAG('I', 'N', 'F', 'O')) {
-        int info_size = bytestream2_get_le32(&ctx->gbc);
+        int info_size;
+        bytestream2_skip(&ctx->gbc, 4);
+        info_size = bytestream2_get_le32(&ctx->gbc);
         if (bytestream2_get_bytes_left(&ctx->gbc) < info_size) {
             av_log(avctx, AV_LOG_ERROR, "Invalid INFO size (%d).\n", info_size);
             return AVERROR_INVALIDDATA;
@@ -379,7 +381,7 @@ AVCodec ff_hq_hqa_decoder = {
     .init           = hq_hqa_decode_init,
     .decode         = hq_hqa_decode_frame,
     .close          = hq_hqa_decode_close,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
                       FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/hqx.c b/libavcodec/hqx.c
index 44016ac3..138d9604 100644
--- a/libavcodec/hqx.c
+++ b/libavcodec/hqx.c
@@ -417,8 +417,8 @@ static int hqx_decode_frame(AVCodecContext *avctx, void *data,
 
     info_tag    = AV_RL32(src);
     if (info_tag == MKTAG('I', 'N', 'F', 'O')) {
-        int info_offset = AV_RL32(src + 4);
-        if (info_offset > UINT32_MAX - 8 || info_offset + 8 > avpkt->size) {
+        unsigned info_offset = AV_RL32(src + 4);
+        if (info_offset > INT_MAX || info_offset + 8 > avpkt->size) {
             av_log(avctx, AV_LOG_ERROR,
                    "Invalid INFO header offset: 0x%08"PRIX32" is too large.\n",
                    info_offset);
@@ -536,7 +536,7 @@ AVCodec ff_hqx_decoder = {
     .init           = hqx_decode_init,
     .decode         = hqx_decode_frame,
     .close          = hqx_decode_close,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
                       FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/hqxdsp.h b/libavcodec/hqxdsp.h
index 16a42cc2..39ab3e2f 100644
--- a/libavcodec/hqxdsp.h
+++ b/libavcodec/hqxdsp.h
@@ -37,4 +37,3 @@ typedef struct HQXDSPContext {
 void ff_hqxdsp_init(HQXDSPContext *c);
 
 #endif /* AVCODEC_HQXDSP_H */
-
diff --git a/libavcodec/htmlsubtitles.c b/libavcodec/htmlsubtitles.c
new file mode 100644
index 00000000..a2cd40fa
--- /dev/null
+++ b/libavcodec/htmlsubtitles.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/parseutils.h"
+#include "htmlsubtitles.h"
+
+static int html_color_parse(void *log_ctx, const char *str)
+{
+    uint8_t rgba[4];
+    if (av_parse_color(rgba, str, strcspn(str, "\" >"), log_ctx) < 0)
+        return -1;
+    return rgba[0] | rgba[1] << 8 | rgba[2] << 16;
+}
+
+enum {
+    PARAM_UNKNOWN = -1,
+    PARAM_SIZE,
+    PARAM_COLOR,
+    PARAM_FACE,
+    PARAM_NUMBER
+};
+
+typedef struct SrtStack {
+    char tag[128];
+    char param[PARAM_NUMBER][128];
+} SrtStack;
+
+static void rstrip_spaces_buf(AVBPrint *buf)
+{
+    while (buf->len > 0 && buf->str[buf->len - 1] == ' ')
+        buf->str[--buf->len] = 0;
+}
+
+void ff_htmlmarkup_to_ass(void *log_ctx, AVBPrint *dst, const char *in)
+{
+    char *param, buffer[128], tmp[128];
+    int len, tag_close, sptr = 1, line_start = 1, an = 0, end = 0;
+    SrtStack stack[16];
+
+    stack[0].tag[0] = 0;
+    strcpy(stack[0].param[PARAM_SIZE],  "{\\fs}");
+    strcpy(stack[0].param[PARAM_COLOR], "{\\c}");
+    strcpy(stack[0].param[PARAM_FACE],  "{\\fn}");
+
+    for (; !end && *in; in++) {
+        switch (*in) {
+        case '\r':
+            break;
+        case '\n':
+            if (line_start) {
+                end = 1;
+                break;
+            }
+            rstrip_spaces_buf(dst);
+            av_bprintf(dst, "\\N");
+            line_start = 1;
+            break;
+        case ' ':
+            if (!line_start)
+                av_bprint_chars(dst, *in, 1);
+            break;
+        case '{':    /* skip all {\xxx} substrings except for {\an%d}
+                        and all microdvd like styles such as {Y:xxx} */
+            len = 0;
+            an += sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0;
+            if ((an != 1 && (len = 0, sscanf(in, "{\\%*[^}]}%n", &len) >= 0 && len > 0)) ||
+                (len = 0, sscanf(in, "{%*1[CcFfoPSsYy]:%*[^}]}%n", &len) >= 0 && len > 0)) {
+                in += len - 1;
+            } else
+                av_bprint_chars(dst, *in, 1);
+            break;
+        case '<':
+            tag_close = in[1] == '/';
+            len = 0;
+            if (sscanf(in+tag_close+1, "%127[^>]>%n", buffer, &len) >= 1 && len > 0) {
+                const char *tagname = buffer;
+                while (*tagname == ' ')
+                    tagname++;
+                if ((param = strchr(tagname, ' ')))
+                    *param++ = 0;
+                if ((!tag_close && sptr < FF_ARRAY_ELEMS(stack)) ||
+                    ( tag_close && sptr > 0 && !strcmp(stack[sptr-1].tag, tagname))) {
+                    int i, j, unknown = 0;
+                    in += len + tag_close;
+                    if (!tag_close)
+                        memset(stack+sptr, 0, sizeof(*stack));
+                    if (!strcmp(tagname, "font")) {
+                        if (tag_close) {
+                            for (i=PARAM_NUMBER-1; i>=0; i--)
+                                if (stack[sptr-1].param[i][0])
+                                    for (j=sptr-2; j>=0; j--)
+                                        if (stack[j].param[i][0]) {
+                                            av_bprintf(dst, "%s", stack[j].param[i]);
+                                            break;
+                                        }
+                        } else {
+                            while (param) {
+                                if (!strncmp(param, "size=", 5)) {
+                                    unsigned font_size;
+                                    param += 5 + (param[5] == '"');
+                                    if (sscanf(param, "%u", &font_size) == 1) {
+                                        snprintf(stack[sptr].param[PARAM_SIZE],
+                                             sizeof(stack[0].param[PARAM_SIZE]),
+                                             "{\\fs%u}", font_size);
+                                    }
+                                } else if (!strncmp(param, "color=", 6)) {
+                                    param += 6 + (param[6] == '"');
+                                    snprintf(stack[sptr].param[PARAM_COLOR],
+                                         sizeof(stack[0].param[PARAM_COLOR]),
+                                         "{\\c&H%X&}",
+                                         html_color_parse(log_ctx, param));
+                                } else if (!strncmp(param, "face=", 5)) {
+                                    param += 5 + (param[5] == '"');
+                                    len = strcspn(param,
+                                                  param[-1] == '"' ? "\"" :" ");
+                                    av_strlcpy(tmp, param,
+                                               FFMIN(sizeof(tmp), len+1));
+                                    param += len;
+                                    snprintf(stack[sptr].param[PARAM_FACE],
+                                             sizeof(stack[0].param[PARAM_FACE]),
+                                             "{\\fn%s}", tmp);
+                                }
+                                if ((param = strchr(param, ' ')))
+                                    param++;
+                            }
+                            for (i=0; i<PARAM_NUMBER; i++)
+                                if (stack[sptr].param[i][0])
+                                    av_bprintf(dst, "%s", stack[sptr].param[i]);
+                        }
+                    } else if (!tagname[1] && strspn(tagname, "bisu") == 1) {
+                        av_bprintf(dst, "{\\%c%d}", tagname[0], !tag_close);
+                    } else {
+                        unknown = 1;
+                        snprintf(tmp, sizeof(tmp), "</%s>", tagname);
+                    }
+                    if (tag_close) {
+                        sptr--;
+                    } else if (unknown && !strstr(in, tmp)) {
+                        in -= len + tag_close;
+                        av_bprint_chars(dst, *in, 1);
+                    } else
+                        av_strlcpy(stack[sptr++].tag, tagname,
+                                   sizeof(stack[0].tag));
+                    break;
+                }
+            }
+        default:
+            av_bprint_chars(dst, *in, 1);
+            break;
+        }
+        if (*in != ' ' && *in != '\r' && *in != '\n')
+            line_start = 0;
+    }
+
+    while (dst->len >= 2 && !strncmp(&dst->str[dst->len - 2], "\\N", 2))
+        dst->len -= 2;
+    dst->str[dst->len] = 0;
+    rstrip_spaces_buf(dst);
+}
diff --git a/libavcodec/htmlsubtitles.h b/libavcodec/htmlsubtitles.h
new file mode 100644
index 00000000..e10cdda2
--- /dev/null
+++ b/libavcodec/htmlsubtitles.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HTMLSUBTITLES_H
+#define AVCODEC_HTMLSUBTITLES_H
+
+#include "libavutil/bprint.h"
+
+void ff_htmlmarkup_to_ass(void *log_ctx, AVBPrint *dst, const char *in);
+
+#endif /* AVCODEC_HTMLSUBTITLES_H */
diff --git a/libavcodec/huffman.c b/libavcodec/huffman.c
index c771bcfe..d7403b88 100644
--- a/libavcodec/huffman.c
+++ b/libavcodec/huffman.c
@@ -26,6 +26,7 @@
 
 #include <stdint.h>
 
+#include "libavutil/qsort.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "huffman.h"
@@ -170,7 +171,7 @@ int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes, int nb_bit
                "Tree construction is not possible\n");
         return -1;
     }
-    qsort(nodes, nb_codes, sizeof(Node), cmp);
+    AV_QSORT(nodes, nb_codes, Node, cmp);
     cur_node = nb_codes;
     nodes[nb_codes*2-1].count = 0;
     for (i = 0; i < nb_codes * 2 - 1; i += 2) {
diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c
index a99ac71a..7314519f 100644
--- a/libavcodec/huffyuvdec.c
+++ b/libavcodec/huffyuvdec.c
@@ -41,7 +41,7 @@
 #include "libavutil/pixdesc.h"
 
 #define classic_shift_luma_table_size 42
-static const unsigned char classic_shift_luma[classic_shift_luma_table_size + FF_INPUT_BUFFER_PADDING_SIZE] = {
+static const unsigned char classic_shift_luma[classic_shift_luma_table_size + AV_INPUT_BUFFER_PADDING_SIZE] = {
     34, 36, 35, 69, 135, 232,   9, 16, 10, 24,  11,  23,  12,  16, 13, 10,
     14,  8, 15,  8,  16,   8,  17, 20, 16, 10, 207, 206, 205, 236, 11,  8,
     10, 21,  9, 23,   8,   8, 199, 70, 69, 68,   0,
@@ -49,7 +49,7 @@ static const unsigned char classic_shift_luma[classic_shift_luma_table_size + FF
 };
 
 #define classic_shift_chroma_table_size 59
-static const unsigned char classic_shift_chroma[classic_shift_chroma_table_size + FF_INPUT_BUFFER_PADDING_SIZE] = {
+static const unsigned char classic_shift_chroma[classic_shift_chroma_table_size + AV_INPUT_BUFFER_PADDING_SIZE] = {
     66, 36,  37,  38, 39, 40,  41,  75,  76,  77, 110, 239, 144, 81, 82,  83,
     84, 85, 118, 183, 56, 57,  88,  89,  56,  89, 154,  57,  58, 57, 26, 141,
     57, 56,  58,  57, 58, 57, 184, 119, 214, 245, 116,  83,  82, 49, 80,  79,
@@ -571,6 +571,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     return ret;
 }
 
+#if HAVE_THREADS
 static av_cold int decode_init_thread_copy(AVCodecContext *avctx)
 {
     HYuvContext *s = avctx->priv_data;
@@ -595,6 +596,7 @@ static av_cold int decode_init_thread_copy(AVCodecContext *avctx)
 
     return 0;
 }
+#endif
 
 /** Subset of GET_VLC for use in hand-roller VLC code */
 #define VLC_INTERN(dst, table, gb, name, bits, max_depth)   \
@@ -1038,7 +1040,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 decode_422_bitstream(s, width - 2);
                 lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + 2, s->temp[0],
                                                    width - 2, lefty);
-                if (!(s->flags & CODEC_FLAG_GRAY)) {
+                if (!(s->flags & AV_CODEC_FLAG_GRAY)) {
                     leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + 1, s->temp[1], width2 - 1, leftu);
                     leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + 1, s->temp[2], width2 - 1, leftv);
                 }
@@ -1071,14 +1073,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                     decode_422_bitstream(s, width);
                     lefty = s->hdsp.add_hfyu_left_pred(ydst, s->temp[0],
                                                        width, lefty);
-                    if (!(s->flags & CODEC_FLAG_GRAY)) {
+                    if (!(s->flags & AV_CODEC_FLAG_GRAY)) {
                         leftu = s->hdsp.add_hfyu_left_pred(udst, s->temp[1], width2, leftu);
                         leftv = s->hdsp.add_hfyu_left_pred(vdst, s->temp[2], width2, leftv);
                     }
                     if (s->predictor == PLANE) {
                         if (cy > s->interlaced) {
                             s->hdsp.add_bytes(ydst, ydst - fake_ystride, width);
-                            if (!(s->flags & CODEC_FLAG_GRAY)) {
+                            if (!(s->flags & AV_CODEC_FLAG_GRAY)) {
                                 s->hdsp.add_bytes(udst, udst - fake_ustride, width2);
                                 s->hdsp.add_bytes(vdst, vdst - fake_vstride, width2);
                             }
@@ -1093,7 +1095,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 decode_422_bitstream(s, width - 2);
                 lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + 2, s->temp[0],
                                                    width - 2, lefty);
-                if (!(s->flags & CODEC_FLAG_GRAY)) {
+                if (!(s->flags & AV_CODEC_FLAG_GRAY)) {
                     leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + 1, s->temp[1], width2 - 1, leftu);
                     leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + 1, s->temp[2], width2 - 1, leftv);
                 }
@@ -1105,7 +1107,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                     decode_422_bitstream(s, width);
                     lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + p->linesize[0],
                                                        s->temp[0], width, lefty);
-                    if (!(s->flags & CODEC_FLAG_GRAY)) {
+                    if (!(s->flags & AV_CODEC_FLAG_GRAY)) {
                         leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + p->linesize[2], s->temp[1], width2, leftu);
                         leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + p->linesize[1], s->temp[2], width2, leftv);
                     }
@@ -1117,7 +1119,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 decode_422_bitstream(s, 4);
                 lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + fake_ystride,
                                                    s->temp[0], 4, lefty);
-                if (!(s->flags & CODEC_FLAG_GRAY)) {
+                if (!(s->flags & AV_CODEC_FLAG_GRAY)) {
                     leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + fake_ustride, s->temp[1], 2, leftu);
                     leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + fake_vstride, s->temp[2], 2, leftv);
                 }
@@ -1128,7 +1130,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 s->hdsp.add_hfyu_median_pred(p->data[0] + fake_ystride + 4,
                                              p->data[0] + 4, s->temp[0],
                                              width - 4, &lefty, &lefttopy);
-                if (!(s->flags & CODEC_FLAG_GRAY)) {
+                if (!(s->flags & AV_CODEC_FLAG_GRAY)) {
                     lefttopu = p->data[1][1];
                     lefttopv = p->data[2][1];
                     s->hdsp.add_hfyu_median_pred(p->data[1] + fake_ustride + 2, p->data[1] + 2, s->temp[1], width2 - 2, &leftu, &lefttopu);
@@ -1163,7 +1165,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                     s->hdsp.add_hfyu_median_pred(ydst, ydst - fake_ystride,
                                                  s->temp[0], width,
                                                  &lefty, &lefttopy);
-                    if (!(s->flags & CODEC_FLAG_GRAY)) {
+                    if (!(s->flags & AV_CODEC_FLAG_GRAY)) {
                         s->hdsp.add_hfyu_median_pred(udst, udst - fake_ustride, s->temp[1], width2, &leftu, &lefttopu);
                         s->hdsp.add_hfyu_median_pred(vdst, vdst - fake_vstride, s->temp[2], width2, &leftv, &lefttopv);
                     }
@@ -1243,8 +1245,8 @@ AVCodec ff_huffyuv_decoder = {
     .init             = decode_init,
     .close            = decode_end,
     .decode           = decode_frame,
-    .capabilities     = CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND |
-                        CODEC_CAP_FRAME_THREADS,
+    .capabilities     = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DRAW_HORIZ_BAND |
+                        AV_CODEC_CAP_FRAME_THREADS,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
 };
 
@@ -1258,8 +1260,8 @@ AVCodec ff_ffvhuff_decoder = {
     .init             = decode_init,
     .close            = decode_end,
     .decode           = decode_frame,
-    .capabilities     = CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND |
-                        CODEC_CAP_FRAME_THREADS,
+    .capabilities     = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DRAW_HORIZ_BAND |
+                        AV_CODEC_CAP_FRAME_THREADS,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
 };
 #endif /* CONFIG_FFVHUFF_DECODER */
diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
index 22e2cb86..572de16a 100644
--- a/libavcodec/huffyuvenc.c
+++ b/libavcodec/huffyuvenc.c
@@ -60,12 +60,12 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst,
             }
             return left;
         } else {
-            for (i = 0; i < 16; i++) {
+            for (i = 0; i < 32; i++) {
                 const int temp = src[i];
                 dst[i] = temp - left;
                 left   = temp;
             }
-            s->hencdsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16);
+            s->hencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32);
             return src[w-1];
         }
     } else {
@@ -220,7 +220,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     ff_huffyuvencdsp_init(&s->hencdsp);
 
     avctx->extradata = av_mallocz(3*MAX_N + 4);
-    if (s->flags&CODEC_FLAG_PASS1) {
+    if (s->flags&AV_CODEC_FLAG_PASS1) {
 #define STATS_OUT_SIZE 21*MAX_N*3 + 4
         avctx->stats_out = av_mallocz(STATS_OUT_SIZE); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132
         if (!avctx->stats_out)
@@ -228,14 +228,23 @@ static av_cold int encode_init(AVCodecContext *avctx)
     }
     s->version = 2;
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->extradata || !avctx->coded_frame)
+    if (!avctx->extradata)
         return AVERROR(ENOMEM);
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->context_model == 1)
+        s->context = avctx->context_model;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
-    s->bps = desc->comp[0].depth_minus1 + 1;
+    s->bps = desc->comp[0].depth;
     s->yuv = !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components >= 2;
     s->chroma = desc->nb_components > 2;
     s->alpha = !!(desc->flags & AV_PIX_FMT_FLAG_ALPHA);
@@ -310,17 +319,21 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
     avctx->bits_per_coded_sample = s->bitstream_bpp;
     s->decorrelate = s->bitstream_bpp >= 24 && !s->yuv && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR);
-    s->predictor = avctx->prediction_method;
-    s->interlaced = avctx->flags&CODEC_FLAG_INTERLACED_ME ? 1 : 0;
-    if (avctx->context_model == 1) {
-        s->context = avctx->context_model;
-        if (s->flags & (CODEC_FLAG_PASS1|CODEC_FLAG_PASS2)) {
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->prediction_method)
+        s->predictor = avctx->prediction_method;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    s->interlaced = avctx->flags & AV_CODEC_FLAG_INTERLACED_ME ? 1 : 0;
+    if (s->context) {
+        if (s->flags & (AV_CODEC_FLAG_PASS1 | AV_CODEC_FLAG_PASS2)) {
             av_log(avctx, AV_LOG_ERROR,
                    "context=1 is not compatible with "
                    "2 pass huffyuv encoding\n");
             return AVERROR(EINVAL);
         }
-    }else s->context= 0;
+    }
 
     if (avctx->codec->id == AV_CODEC_ID_HUFFYUV) {
         if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
@@ -329,7 +342,8 @@ static av_cold int encode_init(AVCodecContext *avctx)
                    "vcodec=ffvhuff or format=422p\n");
             return AVERROR(EINVAL);
         }
-        if (avctx->context_model) {
+#if FF_API_PRIVATE_OPT
+        if (s->context) {
             av_log(avctx, AV_LOG_ERROR,
                    "Error: per-frame huffman tables are not supported "
                    "by huffyuv; use vcodec=ffvhuff\n");
@@ -341,6 +355,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
                    "by huffyuv; use vcodec=ffvhuff\n");
             return AVERROR(EINVAL);
         }
+#endif
         if (s->interlaced != ( s->height > 288 ))
             av_log(avctx, AV_LOG_INFO,
                    "using huffyuv 2.2.0 or newer interlacing flag\n");
@@ -451,7 +466,7 @@ static int encode_422_bitstream(HYuvContext *s, int offset, int count)
 
     count /= 2;
 
-    if (s->flags & CODEC_FLAG_PASS1) {
+    if (s->flags & AV_CODEC_FLAG_PASS1) {
         for(i = 0; i < count; i++) {
             LOAD4;
             s->stats[0][y0]++;
@@ -460,7 +475,7 @@ static int encode_422_bitstream(HYuvContext *s, int offset, int count)
             s->stats[2][v0]++;
         }
     }
-    if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
+    if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)
         return 0;
     if (s->context) {
         for (i = 0; i < count; i++) {
@@ -536,7 +551,7 @@ static int encode_plane_bitstream(HYuvContext *s, int width, int plane)
             put_bits(&s->pb, 2, y1&3);
 
     if (s->bps <= 8) {
-    if (s->flags & CODEC_FLAG_PASS1) {
+    if (s->flags & AV_CODEC_FLAG_PASS1) {
         for (i = 0; i < count; i++) {
             LOAD2;
             STAT2;
@@ -546,7 +561,7 @@ static int encode_plane_bitstream(HYuvContext *s, int width, int plane)
             STATEND;
         }
     }
-    if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
+    if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)
         return 0;
 
     if (s->context) {
@@ -572,7 +587,7 @@ static int encode_plane_bitstream(HYuvContext *s, int width, int plane)
     }
     } else if (s->bps <= 14) {
         int mask = s->n - 1;
-        if (s->flags & CODEC_FLAG_PASS1) {
+        if (s->flags & AV_CODEC_FLAG_PASS1) {
             for (i = 0; i < count; i++) {
                 LOAD2_14;
                 STAT2;
@@ -582,7 +597,7 @@ static int encode_plane_bitstream(HYuvContext *s, int width, int plane)
                 STATEND;
             }
         }
-        if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
+        if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)
             return 0;
 
         if (s->context) {
@@ -607,7 +622,7 @@ static int encode_plane_bitstream(HYuvContext *s, int width, int plane)
             }
         }
     } else {
-        if (s->flags & CODEC_FLAG_PASS1) {
+        if (s->flags & AV_CODEC_FLAG_PASS1) {
             for (i = 0; i < count; i++) {
                 LOAD2_16;
                 STAT2_16;
@@ -617,7 +632,7 @@ static int encode_plane_bitstream(HYuvContext *s, int width, int plane)
                 STATEND_16;
             }
         }
-        if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
+        if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)
             return 0;
 
         if (s->context) {
@@ -669,13 +684,13 @@ static int encode_gray_bitstream(HYuvContext *s, int count)
 
     count /= 2;
 
-    if (s->flags & CODEC_FLAG_PASS1) {
+    if (s->flags & AV_CODEC_FLAG_PASS1) {
         for (i = 0; i < count; i++) {
             LOAD2;
             STAT2;
         }
     }
-    if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
+    if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)
         return 0;
 
     if (s->context) {
@@ -723,13 +738,13 @@ static inline int encode_bgra_bitstream(HYuvContext *s, int count, int planes)
     if (planes == 4)                                                    \
         put_bits(&s->pb, s->len[2][a], s->bits[2][a]);
 
-    if ((s->flags & CODEC_FLAG_PASS1) &&
-        (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)) {
+    if ((s->flags & AV_CODEC_FLAG_PASS1) &&
+        (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)) {
         for (i = 0; i < count; i++) {
             LOAD_GBRA;
             STAT_BGRA;
         }
-    } else if (s->context || (s->flags & CODEC_FLAG_PASS1)) {
+    } else if (s->context || (s->flags & AV_CODEC_FLAG_PASS1)) {
         for (i = 0; i < count; i++) {
             LOAD_GBRA;
             STAT_BGRA;
@@ -757,7 +772,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     const AVFrame * const p = pict;
     int i, j, size = 0, ret;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, width * height * 3 * 4 + FF_MIN_BUFFER_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, width * height * 3 * 4 + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;
 
     if (s->context) {
@@ -997,7 +1012,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     put_bits(&s->pb, 15, 0);
     size /= 4;
 
-    if ((s->flags&CODEC_FLAG_PASS1) && (s->picture_number & 31) == 0) {
+    if ((s->flags & AV_CODEC_FLAG_PASS1) && (s->picture_number & 31) == 0) {
         int j;
         char *p = avctx->stats_out;
         char *end = p + STATS_OUT_SIZE;
@@ -1014,7 +1029,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         }
     } else if (avctx->stats_out)
         avctx->stats_out[0] = '\0';
-    if (!(s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)) {
+    if (!(s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)) {
         flush_put_bits(&s->pb);
         s->bdsp.bswap_buf((uint32_t *) pkt->data, (uint32_t *) pkt->data, size);
     }
@@ -1037,29 +1052,43 @@ static av_cold int encode_end(AVCodecContext *avctx)
     av_freep(&avctx->extradata);
     av_freep(&avctx->stats_out);
 
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
-static const AVOption options[] = {
-    { "non_deterministic", "Allow multithreading for e.g. context=1 at the expense of determinism",
-      offsetof(HYuvContext, non_determ), AV_OPT_TYPE_INT, { .i64 = 1 },
-      0, 1, AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+#define OFFSET(x) offsetof(HYuvContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+
+#define COMMON_OPTIONS \
+    { "non_deterministic", "Allow multithreading for e.g. context=1 at the expense of determinism", \
+      OFFSET(non_determ), AV_OPT_TYPE_BOOL, { .i64 = 1 }, \
+      0, 1, VE }, \
+    { "pred", "Prediction method", OFFSET(predictor), AV_OPT_TYPE_INT, { .i64 = LEFT }, LEFT, MEDIAN, VE, "pred" }, \
+        { "left",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LEFT },   INT_MIN, INT_MAX, VE, "pred" }, \
+        { "plane",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PLANE },  INT_MIN, INT_MAX, VE, "pred" }, \
+        { "median", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MEDIAN }, INT_MIN, INT_MAX, VE, "pred" }, \
+
+static const AVOption normal_options[] = {
+    COMMON_OPTIONS
+    { NULL },
+};
+
+static const AVOption ff_options[] = {
+    COMMON_OPTIONS
+    { "context", "Set per-frame huffman tables", OFFSET(context), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
     { NULL },
 };
 
 static const AVClass normal_class = {
     .class_name = "huffyuv",
     .item_name  = av_default_item_name,
-    .option     = options,
+    .option     = normal_options,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
 static const AVClass ff_class = {
     .class_name = "ffvhuff",
     .item_name  = av_default_item_name,
-    .option     = options,
+    .option     = ff_options,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
@@ -1072,7 +1101,7 @@ AVCodec ff_huffyuv_encoder = {
     .init           = encode_init,
     .encode2        = encode_frame,
     .close          = encode_end,
-    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .priv_class     = &normal_class,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_YUV422P, AV_PIX_FMT_RGB24,
@@ -1092,7 +1121,7 @@ AVCodec ff_ffvhuff_encoder = {
     .init           = encode_init,
     .encode2        = encode_frame,
     .close          = encode_end,
-    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .priv_class     = &ff_class,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV411P,
diff --git a/libavcodec/huffyuvencdsp.c b/libavcodec/huffyuvencdsp.c
index 95fcc195..fdcd0b06 100644
--- a/libavcodec/huffyuvencdsp.c
+++ b/libavcodec/huffyuvencdsp.c
@@ -25,7 +25,7 @@
 #define pb_7f (~0UL / 255 * 0x7f)
 #define pb_80 (~0UL / 255 * 0x80)
 
-static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w)
+static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w)
 {
     long i;
 
@@ -54,7 +54,7 @@ static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
 }
 
 static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
-                                   const uint8_t *src2, int w,
+                                   const uint8_t *src2, intptr_t w,
                                    int *left, int *left_top)
 {
     int i;
diff --git a/libavcodec/huffyuvencdsp.h b/libavcodec/huffyuvencdsp.h
index 3a49b4a7..9d090953 100644
--- a/libavcodec/huffyuvencdsp.h
+++ b/libavcodec/huffyuvencdsp.h
@@ -25,13 +25,13 @@ typedef struct HuffYUVEncDSPContext {
     void (*diff_bytes)(uint8_t *dst /* align 16 */,
                        const uint8_t *src1 /* align 16 */,
                        const uint8_t *src2 /* align 1 */,
-                       int w);
+                       intptr_t w);
     /**
      * Subtract HuffYUV's variant of median prediction.
      * Note, this might read from src1[-1], src2[-1].
      */
     void (*sub_hfyu_median_pred)(uint8_t *dst, const uint8_t *src1,
-                                 const uint8_t *src2, int w,
+                                 const uint8_t *src2, intptr_t w,
                                  int *left, int *left_top);
 } HuffYUVEncDSPContext;
 
diff --git a/libavcodec/idcinvideo.c b/libavcodec/idcinvideo.c
index 55319e51..4a0a6fb6 100644
--- a/libavcodec/idcinvideo.c
+++ b/libavcodec/idcinvideo.c
@@ -248,5 +248,5 @@ AVCodec ff_idcin_decoder = {
     .priv_data_size = sizeof(IdcinContext),
     .init           = idcin_decode_init,
     .decode         = idcin_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c
index ae804d99..63e9b521 100644
--- a/libavcodec/idctdsp.c
+++ b/libavcodec/idctdsp.c
@@ -305,6 +305,8 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
         ff_idctdsp_init_ppc(c, avctx, high_bit_depth);
     if (ARCH_X86)
         ff_idctdsp_init_x86(c, avctx, high_bit_depth);
+    if (ARCH_MIPS)
+        ff_idctdsp_init_mips(c, avctx, high_bit_depth);
 
     ff_put_pixels_clamped = c->put_pixels_clamped;
     ff_add_pixels_clamped = c->add_pixels_clamped;
diff --git a/libavcodec/idctdsp.h b/libavcodec/idctdsp.h
index 538b7166..b180a676 100644
--- a/libavcodec/idctdsp.h
+++ b/libavcodec/idctdsp.h
@@ -108,5 +108,7 @@ void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx,
                          unsigned high_bit_depth);
 void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
                          unsigned high_bit_depth);
+void ff_idctdsp_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
+                          unsigned high_bit_depth);
 
 #endif /* AVCODEC_IDCTDSP_H */
diff --git a/libavcodec/iff.c b/libavcodec/iff.c
index 03bb3f5b..49df17cc 100644
--- a/libavcodec/iff.c
+++ b/libavcodec/iff.c
@@ -240,7 +240,7 @@ static int extract_header(AVCodecContext *const avctx,
                 avctx->pix_fmt = AV_PIX_FMT_RGB32;
                 av_freep(&s->mask_buf);
                 av_freep(&s->mask_palbuf);
-                s->mask_buf = av_malloc((s->planesize * 32) + FF_INPUT_BUFFER_PADDING_SIZE);
+                s->mask_buf = av_malloc((s->planesize * 32) + AV_INPUT_BUFFER_PADDING_SIZE);
                 if (!s->mask_buf)
                     return AVERROR(ENOMEM);
                 if (s->bpp > 16) {
@@ -248,7 +248,7 @@ static int extract_header(AVCodecContext *const avctx,
                     av_freep(&s->mask_buf);
                     return AVERROR(ENOMEM);
                 }
-                s->mask_palbuf = av_malloc((2 << s->bpp) * sizeof(uint32_t) + FF_INPUT_BUFFER_PADDING_SIZE);
+                s->mask_palbuf = av_malloc((2 << s->bpp) * sizeof(uint32_t) + AV_INPUT_BUFFER_PADDING_SIZE);
                 if (!s->mask_palbuf) {
                     av_freep(&s->mask_buf);
                     return AVERROR(ENOMEM);
@@ -275,12 +275,12 @@ static int extract_header(AVCodecContext *const avctx,
             int ham_count;
             const uint8_t *const palette = avctx->extradata + AV_RB16(avctx->extradata);
 
-            s->ham_buf = av_malloc((s->planesize * 8) + FF_INPUT_BUFFER_PADDING_SIZE);
+            s->ham_buf = av_malloc((s->planesize * 8) + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!s->ham_buf)
                 return AVERROR(ENOMEM);
 
             ham_count = 8 * (1 << s->ham);
-            s->ham_palbuf = av_malloc((ham_count << !!(s->masking == MASK_HAS_MASK)) * sizeof (uint32_t) + FF_INPUT_BUFFER_PADDING_SIZE);
+            s->ham_palbuf = av_malloc((ham_count << !!(s->masking == MASK_HAS_MASK)) * sizeof (uint32_t) + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!s->ham_palbuf) {
                 av_freep(&s->ham_buf);
                 return AVERROR(ENOMEM);
@@ -366,7 +366,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     if ((err = av_image_check_size(avctx->width, avctx->height, 0, avctx)))
         return err;
     s->planesize = FFALIGN(avctx->width, 16) >> 3; // Align plane size in bits to word-boundary
-    s->planebuf  = av_malloc(s->planesize + FF_INPUT_BUFFER_PADDING_SIZE);
+    s->planebuf  = av_malloc(s->planesize + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!s->planebuf)
         return AVERROR(ENOMEM);
 
@@ -887,19 +887,6 @@ AVCodec ff_iff_ilbm_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
-};
-#endif
-#if CONFIG_IFF_BYTERUN1_DECODER
-AVCodec ff_iff_byterun1_decoder = {
-    .name           = "iff",
-    .long_name      = NULL_IF_CONFIG_SMALL("IFF"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_IFF_BYTERUN1,
-    .priv_data_size = sizeof(IffContext),
-    .init           = decode_init,
-    .close          = decode_end,
-    .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index e15ac9cf..f7eff63f 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -137,8 +137,8 @@ static av_cold void iac_generate_tabs(IMCContext *q, int sampling_rate)
 
         if (i > 0) {
             tb = bark - prev_bark;
-            q->weights1[i - 1] = pow(10.0, -1.0 * tb);
-            q->weights2[i - 1] = pow(10.0, -2.7 * tb);
+            q->weights1[i - 1] = ff_exp10(-1.0 * tb);
+            q->weights2[i - 1] = ff_exp10(-2.7 * tb);
         }
         prev_bark = bark;
 
@@ -256,7 +256,7 @@ static av_cold int imc_decode_init(AVCodecContext *avctx)
         return ret;
     }
     ff_bswapdsp_init(&q->bdsp);
-    q->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    q->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!q->fdsp) {
         ff_fft_end(&q->fft);
 
@@ -1021,7 +1021,7 @@ static int imc_decode_frame(AVCodecContext *avctx, void *data,
 
     IMCContext *q = avctx->priv_data;
 
-    LOCAL_ALIGNED_16(uint16_t, buf16, [(IMC_BLOCK_SIZE + FF_INPUT_BUFFER_PADDING_SIZE) / 2]);
+    LOCAL_ALIGNED_16(uint16_t, buf16, [(IMC_BLOCK_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / 2]);
 
     if (buf_size < IMC_BLOCK_SIZE * avctx->channels) {
         av_log(avctx, AV_LOG_ERROR, "frame too small!\n");
@@ -1085,7 +1085,7 @@ AVCodec ff_imc_decoder = {
     .close          = imc_decode_close,
     .decode         = imc_decode_frame,
     .flush          = flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
@@ -1101,7 +1101,7 @@ AVCodec ff_iac_decoder = {
     .close          = imc_decode_close,
     .decode         = imc_decode_frame,
     .flush          = flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 8cb60994..0035dc6e 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -24,22 +24,17 @@
  * misc image conversion routines
  */
 
-/* TODO:
- * - write 'ffimg' program to test all the image related stuff
- * - move all api to slice based system
- * - integrate deinterlacing, postprocessing and scaling in the conversion process
- */
-
 #include "avcodec.h"
-#include "imgconvert.h"
 #include "internal.h"
 #include "mathops.h"
 #include "libavutil/avassert.h"
 #include "libavutil/colorspace.h"
 #include "libavutil/common.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/internal.h"
 #include "libavutil/imgutils.h"
 
+#if FF_API_GETCHROMA
 void avcodec_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
@@ -47,6 +42,7 @@ void avcodec_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int
     *h_shift = desc->log2_chroma_w;
     *v_shift = desc->log2_chroma_h;
 }
+#endif
 
 int avcodec_get_pix_fmt_loss(enum AVPixelFormat dst_pix_fmt,
                              enum AVPixelFormat src_pix_fmt,
@@ -88,92 +84,8 @@ enum AVPixelFormat avcodec_find_best_pix_fmt_of_list(const enum AVPixelFormat *p
     return best;
 }
 
-/* 2x2 -> 1x1 */
-void ff_shrink22(uint8_t *dst, int dst_wrap,
-                     const uint8_t *src, int src_wrap,
-                     int width, int height)
-{
-    int w;
-    const uint8_t *s1, *s2;
-    uint8_t *d;
-
-    for(;height > 0; height--) {
-        s1 = src;
-        s2 = s1 + src_wrap;
-        d = dst;
-        for(w = width;w >= 4; w-=4) {
-            d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 2;
-            d[1] = (s1[2] + s1[3] + s2[2] + s2[3] + 2) >> 2;
-            d[2] = (s1[4] + s1[5] + s2[4] + s2[5] + 2) >> 2;
-            d[3] = (s1[6] + s1[7] + s2[6] + s2[7] + 2) >> 2;
-            s1 += 8;
-            s2 += 8;
-            d += 4;
-        }
-        for(;w > 0; w--) {
-            d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 2;
-            s1 += 2;
-            s2 += 2;
-            d++;
-        }
-        src += 2 * src_wrap;
-        dst += dst_wrap;
-    }
-}
-
-/* 4x4 -> 1x1 */
-void ff_shrink44(uint8_t *dst, int dst_wrap,
-                     const uint8_t *src, int src_wrap,
-                     int width, int height)
-{
-    int w;
-    const uint8_t *s1, *s2, *s3, *s4;
-    uint8_t *d;
-
-    for(;height > 0; height--) {
-        s1 = src;
-        s2 = s1 + src_wrap;
-        s3 = s2 + src_wrap;
-        s4 = s3 + src_wrap;
-        d = dst;
-        for(w = width;w > 0; w--) {
-            d[0] = (s1[0] + s1[1] + s1[2] + s1[3] +
-                    s2[0] + s2[1] + s2[2] + s2[3] +
-                    s3[0] + s3[1] + s3[2] + s3[3] +
-                    s4[0] + s4[1] + s4[2] + s4[3] + 8) >> 4;
-            s1 += 4;
-            s2 += 4;
-            s3 += 4;
-            s4 += 4;
-            d++;
-        }
-        src += 4 * src_wrap;
-        dst += dst_wrap;
-    }
-}
-
-/* 8x8 -> 1x1 */
-void ff_shrink88(uint8_t *dst, int dst_wrap,
-                     const uint8_t *src, int src_wrap,
-                     int width, int height)
-{
-    int w, i;
-
-    for(;height > 0; height--) {
-        for(w = width;w > 0; w--) {
-            int tmp=0;
-            for(i=0; i<8; i++){
-                tmp += src[0] + src[1] + src[2] + src[3] + src[4] + src[5] + src[6] + src[7];
-                src += src_wrap;
-            }
-            *(dst++) = (tmp + 32)>>6;
-            src += 8 - 8*src_wrap;
-        }
-        src += 8*src_wrap - 8*width;
-        dst += dst_wrap - width;
-    }
-}
-
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
 /* return true if yuv planar */
 static inline int is_yuv_planar(const AVPixFmtDescriptor *desc)
 {
@@ -201,12 +113,14 @@ int av_picture_crop(AVPicture *dst, const AVPicture *src,
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
     int y_shift;
     int x_shift;
+    int max_step[4];
 
     if (pix_fmt < 0 || pix_fmt >= AV_PIX_FMT_NB)
         return -1;
 
     y_shift = desc->log2_chroma_h;
     x_shift = desc->log2_chroma_w;
+    av_image_fill_max_pixsteps(max_step, NULL, desc);
 
     if (is_yuv_planar(desc)) {
     dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band;
@@ -215,9 +129,7 @@ int av_picture_crop(AVPicture *dst, const AVPicture *src,
     } else{
         if(top_band % (1<<y_shift) || left_band % (1<<x_shift))
             return -1;
-        if(left_band) //FIXME add support for this too
-            return -1;
-        dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band;
+        dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + (left_band * max_step[0]);
     }
 
     dst->linesize[0] = src->linesize[0];
@@ -236,9 +148,41 @@ int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width,
     int x_shift;
     int yheight;
     int i, y;
+    int max_step[4];
 
-    if (pix_fmt < 0 || pix_fmt >= AV_PIX_FMT_NB ||
-        !is_yuv_planar(desc)) return -1;
+    if (pix_fmt < 0 || pix_fmt >= AV_PIX_FMT_NB)
+        return -1;
+
+    if (!is_yuv_planar(desc)) {
+        if (src)
+            return -1; //TODO: Not yet implemented
+
+        av_image_fill_max_pixsteps(max_step, NULL, desc);
+
+        if (padtop || padleft) {
+            memset(dst->data[0], color[0],
+                    dst->linesize[0] * padtop + (padleft * max_step[0]));
+        }
+
+        if (padleft || padright) {
+            optr = dst->data[0] + dst->linesize[0] * padtop +
+                    (dst->linesize[0] - (padright * max_step[0]));
+            yheight = height - 1 - (padtop + padbottom);
+            for (y = 0; y < yheight; y++) {
+                memset(optr, color[0], (padleft + padright) * max_step[0]);
+                optr += dst->linesize[0];
+            }
+        }
+
+        if (padbottom || padright) {
+            optr = dst->data[0] + dst->linesize[0] * (height - padbottom) -
+                    (padright * max_step[0]);
+            memset(optr, color[0], dst->linesize[0] * padbottom +
+                    (padright * max_step[0]));
+        }
+
+        return 0;
+    }
 
     for (i = 0; i < 3; i++) {
         x_shift = i ? desc->log2_chroma_w : 0;
@@ -284,184 +228,10 @@ int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width,
                 (padbottom >> y_shift) + (padright >> x_shift));
         }
     }
-    return 0;
-}
-
-#if FF_API_DEINTERLACE
-
-#if HAVE_MMX_EXTERNAL
-#define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx
-#define deinterlace_line         ff_deinterlace_line_mmx
-#else
-#define deinterlace_line_inplace deinterlace_line_inplace_c
-#define deinterlace_line         deinterlace_line_c
-
-/* filter parameters: [-1 4 2 4 -1] // 8 */
-static void deinterlace_line_c(uint8_t *dst,
-                             const uint8_t *lum_m4, const uint8_t *lum_m3,
-                             const uint8_t *lum_m2, const uint8_t *lum_m1,
-                             const uint8_t *lum,
-                             int size)
-{
-    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
-    int sum;
-
-    for(;size > 0;size--) {
-        sum = -lum_m4[0];
-        sum += lum_m3[0] << 2;
-        sum += lum_m2[0] << 1;
-        sum += lum_m1[0] << 2;
-        sum += -lum[0];
-        dst[0] = cm[(sum + 4) >> 3];
-        lum_m4++;
-        lum_m3++;
-        lum_m2++;
-        lum_m1++;
-        lum++;
-        dst++;
-    }
-}
-
-static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3,
-                                       uint8_t *lum_m2, uint8_t *lum_m1,
-                                       uint8_t *lum, int size)
-{
-    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
-    int sum;
-
-    for(;size > 0;size--) {
-        sum = -lum_m4[0];
-        sum += lum_m3[0] << 2;
-        sum += lum_m2[0] << 1;
-        lum_m4[0]=lum_m2[0];
-        sum += lum_m1[0] << 2;
-        sum += -lum[0];
-        lum_m2[0] = cm[(sum + 4) >> 3];
-        lum_m4++;
-        lum_m3++;
-        lum_m2++;
-        lum_m1++;
-        lum++;
-    }
-}
-#endif /* !HAVE_MMX_EXTERNAL */
-
-/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
-   top field is copied as is, but the bottom field is deinterlaced
-   against the top field. */
-static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap,
-                                    const uint8_t *src1, int src_wrap,
-                                    int width, int height)
-{
-    const uint8_t *src_m2, *src_m1, *src_0, *src_p1, *src_p2;
-    int y;
-
-    src_m2 = src1;
-    src_m1 = src1;
-    src_0=&src_m1[src_wrap];
-    src_p1=&src_0[src_wrap];
-    src_p2=&src_p1[src_wrap];
-    for(y=0;y<(height-2);y+=2) {
-        memcpy(dst,src_m1,width);
-        dst += dst_wrap;
-        deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width);
-        src_m2 = src_0;
-        src_m1 = src_p1;
-        src_0 = src_p2;
-        src_p1 += 2*src_wrap;
-        src_p2 += 2*src_wrap;
-        dst += dst_wrap;
-    }
-    memcpy(dst,src_m1,width);
-    dst += dst_wrap;
-    /* do last line */
-    deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width);
-}
 
-static int deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap,
-                                            int width, int height)
-{
-    uint8_t *src_m1, *src_0, *src_p1, *src_p2;
-    int y;
-    uint8_t *buf;
-    buf = av_malloc(width);
-    if (!buf)
-        return AVERROR(ENOMEM);
-
-    src_m1 = src1;
-    memcpy(buf,src_m1,width);
-    src_0=&src_m1[src_wrap];
-    src_p1=&src_0[src_wrap];
-    src_p2=&src_p1[src_wrap];
-    for(y=0;y<(height-2);y+=2) {
-        deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width);
-        src_m1 = src_p1;
-        src_0 = src_p2;
-        src_p1 += 2*src_wrap;
-        src_p2 += 2*src_wrap;
-    }
-    /* do last line */
-    deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width);
-    av_free(buf);
     return 0;
 }
 
-int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
-                          enum AVPixelFormat pix_fmt, int width, int height)
-{
-    int i, ret;
-
-    if (pix_fmt != AV_PIX_FMT_YUV420P &&
-        pix_fmt != AV_PIX_FMT_YUVJ420P &&
-        pix_fmt != AV_PIX_FMT_YUV422P &&
-        pix_fmt != AV_PIX_FMT_YUVJ422P &&
-        pix_fmt != AV_PIX_FMT_YUV444P &&
-        pix_fmt != AV_PIX_FMT_YUV411P &&
-        pix_fmt != AV_PIX_FMT_GRAY8)
-        return -1;
-    if ((width & 3) != 0 || (height & 3) != 0)
-        return -1;
-
-    for(i=0;i<3;i++) {
-        if (i == 1) {
-            switch(pix_fmt) {
-            case AV_PIX_FMT_YUVJ420P:
-            case AV_PIX_FMT_YUV420P:
-                width >>= 1;
-                height >>= 1;
-                break;
-            case AV_PIX_FMT_YUV422P:
-            case AV_PIX_FMT_YUVJ422P:
-                width >>= 1;
-                break;
-            case AV_PIX_FMT_YUV411P:
-                width >>= 2;
-                break;
-            default:
-                break;
-            }
-            if (pix_fmt == AV_PIX_FMT_GRAY8) {
-                break;
-            }
-        }
-        if (src == dst) {
-            ret = deinterlace_bottom_field_inplace(dst->data[i],
-                                                   dst->linesize[i],
-                                                   width, height);
-            if (ret < 0)
-                return ret;
-        } else {
-            deinterlace_bottom_field(dst->data[i],dst->linesize[i],
-                                        src->data[i], src->linesize[i],
-                                        width, height);
-        }
-    }
-    emms_c();
-    return 0;
-}
-
-#endif /* FF_API_DEINTERLACE */
-
 #ifdef TEST
 
 int main(void){
@@ -489,3 +259,5 @@ int main(void){
 }
 
 #endif
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif /* FF_API_AVPICTURE */
diff --git a/libavcodec/imgconvert.h b/libavcodec/imgconvert.h
deleted file mode 100644
index 0ce626d7..00000000
--- a/libavcodec/imgconvert.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_IMGCONVERT_H
-#define AVCODEC_IMGCONVERT_H
-
-#include <stdint.h>
-
-#include "version.h"
-
-#if FF_API_DEINTERLACE
-
-void ff_deinterlace_line_mmx(uint8_t *dst,
-                             const uint8_t *lum_m4, const uint8_t *lum_m3,
-                             const uint8_t *lum_m2, const uint8_t *lum_m1,
-                             const uint8_t *lum,
-                             int size);
-
-void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
-                                     const uint8_t *lum_m3,
-                                     const uint8_t *lum_m2,
-                                     const uint8_t *lum_m1,
-                                     const uint8_t *lum, int size);
-
-#endif /* FF_API_DEINTERLACE */
-
-/* 1/2^n downscaling functions */
-void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
-void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
-void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
-
-#endif /* AVCODEC_IMGCONVERT_H */
diff --git a/libavcodec/imx_dump_header_bsf.c b/libavcodec/imx_dump_header_bsf.c
index d53f338a..3a69e98c 100644
--- a/libavcodec/imx_dump_header_bsf.c
+++ b/libavcodec/imx_dump_header_bsf.c
@@ -42,7 +42,7 @@ static int imx_dump_header(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx
         return 0;
     }
 
-    *poutbuf = av_malloc(buf_size + 20 + FF_INPUT_BUFFER_PADDING_SIZE);
+    *poutbuf = av_malloc(buf_size + 20 + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!*poutbuf)
         return AVERROR(ENOMEM);
     poutbufp = *poutbuf;
diff --git a/libavcodec/indeo2.c b/libavcodec/indeo2.c
index 39735c2e..17f23676 100644
--- a/libavcodec/indeo2.c
+++ b/libavcodec/indeo2.c
@@ -146,6 +146,7 @@ static int ir2_decode_frame(AVCodecContext *avctx,
     AVFrame *picture     = data;
     AVFrame * const p    = s->picture;
     int start, ret;
+    int ltab, ctab;
 
     if ((ret = ff_reget_buffer(avctx, p)) < 0)
         return ret;
@@ -165,36 +166,39 @@ static int ir2_decode_frame(AVCodecContext *avctx,
         buf[i] = ff_reverse[buf[i]];
 #endif
 
-    init_get_bits(&s->gb, buf + start, (buf_size - start) * 8);
+    if ((ret = init_get_bits8(&s->gb, buf + start, buf_size - start)) < 0)
+        return ret;
 
+    ltab = buf[0x22] & 3;
+    ctab = buf[0x22] >> 2;
     if (s->decode_delta) { /* intraframe */
         if ((ret = ir2_decode_plane(s, avctx->width, avctx->height,
                                     p->data[0], p->linesize[0],
-                                    ir2_luma_table)) < 0)
+                                    ir2_delta_table[ltab])) < 0)
             return ret;
 
         /* swapped U and V */
         if ((ret = ir2_decode_plane(s, avctx->width >> 2, avctx->height >> 2,
                                     p->data[2], p->linesize[2],
-                                    ir2_luma_table)) < 0)
+                                    ir2_delta_table[ctab])) < 0)
             return ret;
         if ((ret = ir2_decode_plane(s, avctx->width >> 2, avctx->height >> 2,
                                     p->data[1], p->linesize[1],
-                                    ir2_luma_table)) < 0)
+                                    ir2_delta_table[ctab])) < 0)
             return ret;
     } else { /* interframe */
         if ((ret = ir2_decode_plane_inter(s, avctx->width, avctx->height,
                                           p->data[0], p->linesize[0],
-                                          ir2_luma_table)) < 0)
+                                          ir2_delta_table[ltab])) < 0)
             return ret;
         /* swapped U and V */
         if ((ret = ir2_decode_plane_inter(s, avctx->width >> 2, avctx->height >> 2,
                                           p->data[2], p->linesize[2],
-                                          ir2_luma_table)) < 0)
+                                          ir2_delta_table[ctab])) < 0)
             return ret;
         if ((ret = ir2_decode_plane_inter(s, avctx->width >> 2, avctx->height >> 2,
                                           p->data[1], p->linesize[1],
-                                          ir2_luma_table)) < 0)
+                                          ir2_delta_table[ctab])) < 0)
             return ret;
     }
 
@@ -252,5 +256,5 @@ AVCodec ff_indeo2_decoder = {
     .init           = ir2_decode_init,
     .close          = ir2_decode_end,
     .decode         = ir2_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/indeo2data.h b/libavcodec/indeo2data.h
index 0d6d82f2..e05c91ff 100644
--- a/libavcodec/indeo2data.h
+++ b/libavcodec/indeo2data.h
@@ -27,115 +27,211 @@
 #define IR2_CODES 143
 static const uint16_t ir2_codes[IR2_CODES][2] = {
 #ifdef BITSTREAM_READER_LE
-{0x0000,  3}, {0x0004,  3}, {0x0006,  3}, {0x0001,  5},
-{0x0009,  5}, {0x0019,  5}, {0x000D,  5}, {0x001D,  5},
-{0x0023,  6}, {0x0013,  6}, {0x0033,  6}, {0x000B,  6},
-{0x002B,  6}, {0x001B,  6}, {0x0007,  8}, {0x0087,  8},
-{0x0027,  8}, {0x00A7,  8}, {0x0067,  8}, {0x00E7,  8},
-{0x0097,  8}, {0x0057,  8}, {0x0037,  8}, {0x00B7,  8},
-{0x00F7,  8}, {0x000F,  9}, {0x008F,  9}, {0x018F,  9},
-{0x014F,  9}, {0x00CF,  9}, {0x002F,  9}, {0x012F,  9},
-{0x01AF,  9}, {0x006F,  9}, {0x00EF,  9}, {0x01EF,  9},
-{0x001F, 10}, {0x021F, 10}, {0x011F, 10}, {0x031F, 10},
-{0x009F, 10}, {0x029F, 10}, {0x019F, 10}, {0x039F, 10},
-{0x005F, 10}, {0x025F, 10}, {0x015F, 10}, {0x035F, 10},
-{0x00DF, 10}, {0x02DF, 10}, {0x01DF, 10}, {0x03DF, 10},
-{0x003F, 13}, {0x103F, 13}, {0x083F, 13}, {0x183F, 13},
-{0x043F, 13}, {0x143F, 13}, {0x0C3F, 13}, {0x1C3F, 13},
-{0x023F, 13}, {0x123F, 13}, {0x0A3F, 13}, {0x1A3F, 13},
-{0x063F, 13}, {0x163F, 13}, {0x0E3F, 13}, {0x1E3F, 13},
-{0x013F, 13}, {0x113F, 13}, {0x093F, 13}, {0x193F, 13},
-{0x053F, 13}, {0x153F, 13}, {0x0D3F, 13}, {0x1D3F, 13},
-{0x033F, 13}, {0x133F, 13}, {0x0B3F, 13}, {0x1B3F, 13},
-{0x073F, 13}, {0x173F, 13}, {0x0F3F, 13}, {0x1F3F, 13},
-{0x00BF, 13}, {0x10BF, 13}, {0x08BF, 13}, {0x18BF, 13},
-{0x04BF, 13}, {0x14BF, 13}, {0x0CBF, 13}, {0x1CBF, 13},
-{0x02BF, 13}, {0x12BF, 13}, {0x0ABF, 13}, {0x1ABF, 13},
-{0x06BF, 13}, {0x16BF, 13}, {0x0EBF, 13}, {0x1EBF, 13},
-{0x01BF, 13}, {0x11BF, 13}, {0x09BF, 13}, {0x19BF, 13},
-{0x05BF, 13}, {0x15BF, 13}, {0x0DBF, 13}, {0x1DBF, 13},
-{0x03BF, 13}, {0x13BF, 13}, {0x0BBF, 13}, {0x1BBF, 13},
-{0x07BF, 13}, {0x17BF, 13}, {0x0FBF, 13}, {0x1FBF, 13},
-{0x007F, 14}, {0x207F, 14}, {0x107F, 14}, {0x307F, 14},
-{0x087F, 14}, {0x287F, 14}, {0x187F, 14}, {0x387F, 14},
-{0x047F, 14}, {0x247F, 14}, {0x147F, 14}, {0x0002,  3},
-{0x0011,  5}, {0x0005,  5}, {0x0015,  5}, {0x0003,  6},
-{0x003B,  6}, {0x0047,  8}, {0x00C7,  8}, {0x0017,  8},
-{0x00D7,  8}, {0x0077,  8}, {0x010F,  9}, {0x004F,  9},
-{0x01CF,  9}, {0x00AF,  9}, {0x016F,  9},
+    { 0x0000,  3 }, { 0x0004,  3 }, { 0x0006,  3 }, { 0x0001,  5 },
+    { 0x0009,  5 }, { 0x0019,  5 }, { 0x000D,  5 }, { 0x001D,  5 },
+    { 0x0023,  6 }, { 0x0013,  6 }, { 0x0033,  6 }, { 0x000B,  6 },
+    { 0x002B,  6 }, { 0x001B,  6 }, { 0x0007,  8 }, { 0x0087,  8 },
+    { 0x0027,  8 }, { 0x00A7,  8 }, { 0x0067,  8 }, { 0x00E7,  8 },
+    { 0x0097,  8 }, { 0x0057,  8 }, { 0x0037,  8 }, { 0x00B7,  8 },
+    { 0x00F7,  8 }, { 0x000F,  9 }, { 0x008F,  9 }, { 0x018F,  9 },
+    { 0x014F,  9 }, { 0x00CF,  9 }, { 0x002F,  9 }, { 0x012F,  9 },
+    { 0x01AF,  9 }, { 0x006F,  9 }, { 0x00EF,  9 }, { 0x01EF,  9 },
+    { 0x001F, 10 }, { 0x021F, 10 }, { 0x011F, 10 }, { 0x031F, 10 },
+    { 0x009F, 10 }, { 0x029F, 10 }, { 0x019F, 10 }, { 0x039F, 10 },
+    { 0x005F, 10 }, { 0x025F, 10 }, { 0x015F, 10 }, { 0x035F, 10 },
+    { 0x00DF, 10 }, { 0x02DF, 10 }, { 0x01DF, 10 }, { 0x03DF, 10 },
+    { 0x003F, 13 }, { 0x103F, 13 }, { 0x083F, 13 }, { 0x183F, 13 },
+    { 0x043F, 13 }, { 0x143F, 13 }, { 0x0C3F, 13 }, { 0x1C3F, 13 },
+    { 0x023F, 13 }, { 0x123F, 13 }, { 0x0A3F, 13 }, { 0x1A3F, 13 },
+    { 0x063F, 13 }, { 0x163F, 13 }, { 0x0E3F, 13 }, { 0x1E3F, 13 },
+    { 0x013F, 13 }, { 0x113F, 13 }, { 0x093F, 13 }, { 0x193F, 13 },
+    { 0x053F, 13 }, { 0x153F, 13 }, { 0x0D3F, 13 }, { 0x1D3F, 13 },
+    { 0x033F, 13 }, { 0x133F, 13 }, { 0x0B3F, 13 }, { 0x1B3F, 13 },
+    { 0x073F, 13 }, { 0x173F, 13 }, { 0x0F3F, 13 }, { 0x1F3F, 13 },
+    { 0x00BF, 13 }, { 0x10BF, 13 }, { 0x08BF, 13 }, { 0x18BF, 13 },
+    { 0x04BF, 13 }, { 0x14BF, 13 }, { 0x0CBF, 13 }, { 0x1CBF, 13 },
+    { 0x02BF, 13 }, { 0x12BF, 13 }, { 0x0ABF, 13 }, { 0x1ABF, 13 },
+    { 0x06BF, 13 }, { 0x16BF, 13 }, { 0x0EBF, 13 }, { 0x1EBF, 13 },
+    { 0x01BF, 13 }, { 0x11BF, 13 }, { 0x09BF, 13 }, { 0x19BF, 13 },
+    { 0x05BF, 13 }, { 0x15BF, 13 }, { 0x0DBF, 13 }, { 0x1DBF, 13 },
+    { 0x03BF, 13 }, { 0x13BF, 13 }, { 0x0BBF, 13 }, { 0x1BBF, 13 },
+    { 0x07BF, 13 }, { 0x17BF, 13 }, { 0x0FBF, 13 }, { 0x1FBF, 13 },
+    { 0x007F, 14 }, { 0x207F, 14 }, { 0x107F, 14 }, { 0x307F, 14 },
+    { 0x087F, 14 }, { 0x287F, 14 }, { 0x187F, 14 }, { 0x387F, 14 },
+    { 0x047F, 14 }, { 0x247F, 14 }, { 0x147F, 14 }, { 0x0002,  3 },
+    { 0x0011,  5 }, { 0x0005,  5 }, { 0x0015,  5 }, { 0x0003,  6 },
+    { 0x003B,  6 }, { 0x0047,  8 }, { 0x00C7,  8 }, { 0x0017,  8 },
+    { 0x00D7,  8 }, { 0x0077,  8 }, { 0x010F,  9 }, { 0x004F,  9 },
+    { 0x01CF,  9 }, { 0x00AF,  9 }, { 0x016F,  9 },
 #else
-    {0x0000,  3}, {0x0001,  3}, {0x0003,  3}, {0x0010,  5},
-    {0x0012,  5}, {0x0013,  5}, {0x0016,  5}, {0x0017,  5},
-    {0x0031,  6}, {0x0032,  6}, {0x0033,  6}, {0x0034,  6},
-    {0x0035,  6}, {0x0036,  6}, {0x00E0,  8}, {0x00E1,  8},
-    {0x00E4,  8}, {0x00E5,  8}, {0x00E6,  8}, {0x00E7,  8},
-    {0x00E9,  8}, {0x00EA,  8}, {0x00EC,  8}, {0x00ED,  8},
-    {0x00EF,  8}, {0x01E0,  9}, {0x01E2,  9}, {0x01E3,  9},
-    {0x01E5,  9}, {0x01E6,  9}, {0x01E8,  9}, {0x01E9,  9},
-    {0x01EB,  9}, {0x01EC,  9}, {0x01EE,  9}, {0x01EF,  9},
-    {0x03E0, 10}, {0x03E1, 10}, {0x03E2, 10}, {0x03E3, 10},
-    {0x03E4, 10}, {0x03E5, 10}, {0x03E6, 10}, {0x03E7, 10},
-    {0x03E8, 10}, {0x03E9, 10}, {0x03EA, 10}, {0x03EB, 10},
-    {0x03EC, 10}, {0x03ED, 10}, {0x03EE, 10}, {0x03EF, 10},
-    {0x1F80, 13}, {0x1F81, 13}, {0x1F82, 13}, {0x1F83, 13},
-    {0x1F84, 13}, {0x1F85, 13}, {0x1F86, 13}, {0x1F87, 13},
-    {0x1F88, 13}, {0x1F89, 13}, {0x1F8A, 13}, {0x1F8B, 13},
-    {0x1F8C, 13}, {0x1F8D, 13}, {0x1F8E, 13}, {0x1F8F, 13},
-    {0x1F90, 13}, {0x1F91, 13}, {0x1F92, 13}, {0x1F93, 13},
-    {0x1F94, 13}, {0x1F95, 13}, {0x1F96, 13}, {0x1F97, 13},
-    {0x1F98, 13}, {0x1F99, 13}, {0x1F9A, 13}, {0x1F9B, 13},
-    {0x1F9C, 13}, {0x1F9D, 13}, {0x1F9E, 13}, {0x1F9F, 13},
-    {0x1FA0, 13}, {0x1FA1, 13}, {0x1FA2, 13}, {0x1FA3, 13},
-    {0x1FA4, 13}, {0x1FA5, 13}, {0x1FA6, 13}, {0x1FA7, 13},
-    {0x1FA8, 13}, {0x1FA9, 13}, {0x1FAA, 13}, {0x1FAB, 13},
-    {0x1FAC, 13}, {0x1FAD, 13}, {0x1FAE, 13}, {0x1FAF, 13},
-    {0x1FB0, 13}, {0x1FB1, 13}, {0x1FB2, 13}, {0x1FB3, 13},
-    {0x1FB4, 13}, {0x1FB5, 13}, {0x1FB6, 13}, {0x1FB7, 13},
-    {0x1FB8, 13}, {0x1FB9, 13}, {0x1FBA, 13}, {0x1FBB, 13},
-    {0x1FBC, 13}, {0x1FBD, 13}, {0x1FBE, 13}, {0x1FBF, 13},
-    {0x3F80, 14}, {0x3F81, 14}, {0x3F82, 14}, {0x3F83, 14},
-    {0x3F84, 14}, {0x3F85, 14}, {0x3F86, 14}, {0x3F87, 14},
-    {0x3F88, 14}, {0x3F89, 14}, {0x3F8A, 14}, {0x0002,  3},
-    {0x0011,  5}, {0x0014,  5}, {0x0015,  5}, {0x0030,  6},
-    {0x0037,  6}, {0x00E2,  8}, {0x00E3,  8}, {0x00E8,  8},
-    {0x00EB,  8}, {0x00EE,  8}, {0x01E1,  9}, {0x01E4,  9},
-    {0x01E7,  9}, {0x01EA,  9}, {0x01ED,  9}
+    { 0x0000,  3 }, { 0x0001,  3 }, { 0x0003,  3 }, { 0x0010,  5 },
+    { 0x0012,  5 }, { 0x0013,  5 }, { 0x0016,  5 }, { 0x0017,  5 },
+    { 0x0031,  6 }, { 0x0032,  6 }, { 0x0033,  6 }, { 0x0034,  6 },
+    { 0x0035,  6 }, { 0x0036,  6 }, { 0x00E0,  8 }, { 0x00E1,  8 },
+    { 0x00E4,  8 }, { 0x00E5,  8 }, { 0x00E6,  8 }, { 0x00E7,  8 },
+    { 0x00E9,  8 }, { 0x00EA,  8 }, { 0x00EC,  8 }, { 0x00ED,  8 },
+    { 0x00EF,  8 }, { 0x01E0,  9 }, { 0x01E2,  9 }, { 0x01E3,  9 },
+    { 0x01E5,  9 }, { 0x01E6,  9 }, { 0x01E8,  9 }, { 0x01E9,  9 },
+    { 0x01EB,  9 }, { 0x01EC,  9 }, { 0x01EE,  9 }, { 0x01EF,  9 },
+    { 0x03E0, 10 }, { 0x03E1, 10 }, { 0x03E2, 10 }, { 0x03E3, 10 },
+    { 0x03E4, 10 }, { 0x03E5, 10 }, { 0x03E6, 10 }, { 0x03E7, 10 },
+    { 0x03E8, 10 }, { 0x03E9, 10 }, { 0x03EA, 10 }, { 0x03EB, 10 },
+    { 0x03EC, 10 }, { 0x03ED, 10 }, { 0x03EE, 10 }, { 0x03EF, 10 },
+    { 0x1F80, 13 }, { 0x1F81, 13 }, { 0x1F82, 13 }, { 0x1F83, 13 },
+    { 0x1F84, 13 }, { 0x1F85, 13 }, { 0x1F86, 13 }, { 0x1F87, 13 },
+    { 0x1F88, 13 }, { 0x1F89, 13 }, { 0x1F8A, 13 }, { 0x1F8B, 13 },
+    { 0x1F8C, 13 }, { 0x1F8D, 13 }, { 0x1F8E, 13 }, { 0x1F8F, 13 },
+    { 0x1F90, 13 }, { 0x1F91, 13 }, { 0x1F92, 13 }, { 0x1F93, 13 },
+    { 0x1F94, 13 }, { 0x1F95, 13 }, { 0x1F96, 13 }, { 0x1F97, 13 },
+    { 0x1F98, 13 }, { 0x1F99, 13 }, { 0x1F9A, 13 }, { 0x1F9B, 13 },
+    { 0x1F9C, 13 }, { 0x1F9D, 13 }, { 0x1F9E, 13 }, { 0x1F9F, 13 },
+    { 0x1FA0, 13 }, { 0x1FA1, 13 }, { 0x1FA2, 13 }, { 0x1FA3, 13 },
+    { 0x1FA4, 13 }, { 0x1FA5, 13 }, { 0x1FA6, 13 }, { 0x1FA7, 13 },
+    { 0x1FA8, 13 }, { 0x1FA9, 13 }, { 0x1FAA, 13 }, { 0x1FAB, 13 },
+    { 0x1FAC, 13 }, { 0x1FAD, 13 }, { 0x1FAE, 13 }, { 0x1FAF, 13 },
+    { 0x1FB0, 13 }, { 0x1FB1, 13 }, { 0x1FB2, 13 }, { 0x1FB3, 13 },
+    { 0x1FB4, 13 }, { 0x1FB5, 13 }, { 0x1FB6, 13 }, { 0x1FB7, 13 },
+    { 0x1FB8, 13 }, { 0x1FB9, 13 }, { 0x1FBA, 13 }, { 0x1FBB, 13 },
+    { 0x1FBC, 13 }, { 0x1FBD, 13 }, { 0x1FBE, 13 }, { 0x1FBF, 13 },
+    { 0x3F80, 14 }, { 0x3F81, 14 }, { 0x3F82, 14 }, { 0x3F83, 14 },
+    { 0x3F84, 14 }, { 0x3F85, 14 }, { 0x3F86, 14 }, { 0x3F87, 14 },
+    { 0x3F88, 14 }, { 0x3F89, 14 }, { 0x3F8A, 14 }, { 0x0002,  3 },
+    { 0x0011,  5 }, { 0x0014,  5 }, { 0x0015,  5 }, { 0x0030,  6 },
+    { 0x0037,  6 }, { 0x00E2,  8 }, { 0x00E3,  8 }, { 0x00E8,  8 },
+    { 0x00EB,  8 }, { 0x00EE,  8 }, { 0x01E1,  9 }, { 0x01E4,  9 },
+    { 0x01E7,  9 }, { 0x01EA,  9 }, { 0x01ED,  9 },
 #endif
 };
 
-static const uint8_t ir2_luma_table[256] = {
- 0x80, 0x80, 0x84, 0x84, 0x7C, 0x7C, 0x7F, 0x85,
- 0x81, 0x7B, 0x85, 0x7F, 0x7B, 0x81, 0x8C, 0x8C,
- 0x74, 0x74, 0x83, 0x8D, 0x7D, 0x73, 0x8D, 0x83,
- 0x73, 0x7D, 0x77, 0x89, 0x89, 0x77, 0x89, 0x77,
- 0x77, 0x89, 0x8C, 0x95, 0x74, 0x6B, 0x95, 0x8C,
- 0x6B, 0x74, 0x7C, 0x90, 0x84, 0x70, 0x90, 0x7C,
- 0x70, 0x84, 0x96, 0x96, 0x6A, 0x6A, 0x82, 0x98,
- 0x7E, 0x68, 0x98, 0x82, 0x68, 0x7E, 0x97, 0xA2,
- 0x69, 0x5E, 0xA2, 0x97, 0x5E, 0x69, 0xA2, 0xA2,
- 0x5E, 0x5E, 0x8B, 0xA3, 0x75, 0x5D, 0xA3, 0x8B,
- 0x5D, 0x75, 0x71, 0x95, 0x8F, 0x6B, 0x95, 0x71,
- 0x6B, 0x8F, 0x78, 0x9D, 0x88, 0x63, 0x9D, 0x78,
- 0x63, 0x88, 0x7F, 0xA7, 0x81, 0x59, 0xA7, 0x7F,
- 0x59, 0x81, 0xA4, 0xB1, 0x5C, 0x4F, 0xB1, 0xA4,
- 0x4F, 0x5C, 0x96, 0xB1, 0x6A, 0x4F, 0xB1, 0x96,
- 0x4F, 0x6A, 0xB2, 0xB2, 0x4E, 0x4E, 0x65, 0x9B,
- 0x9B, 0x65, 0x9B, 0x65, 0x65, 0x9B, 0x89, 0xB4,
- 0x77, 0x4C, 0xB4, 0x89, 0x4C, 0x77, 0x6A, 0xA3,
- 0x96, 0x5D, 0xA3, 0x6A, 0x5D, 0x96, 0x73, 0xAC,
- 0x8D, 0x54, 0xAC, 0x73, 0x54, 0x8D, 0xB4, 0xC3,
- 0x4C, 0x3D, 0xC3, 0xB4, 0x3D, 0x4C, 0xA4, 0xC3,
- 0x5C, 0x3D, 0xC3, 0xA4, 0x3D, 0x5C, 0xC4, 0xC4,
- 0x3C, 0x3C, 0x96, 0xC6, 0x6A, 0x3A, 0xC6, 0x96,
- 0x3A, 0x6A, 0x7C, 0xBA, 0x84, 0x46, 0xBA, 0x7C,
- 0x46, 0x84, 0x5B, 0xAB, 0xA5, 0x55, 0xAB, 0x5B,
- 0x55, 0xA5, 0x63, 0xB4, 0x9D, 0x4C, 0xB4, 0x63,
- 0x4C, 0x9D, 0x86, 0xCA, 0x7A, 0x36, 0xCA, 0x86,
- 0x36, 0x7A, 0xB6, 0xD7, 0x4A, 0x29, 0xD7, 0xB6,
- 0x29, 0x4A, 0xC8, 0xD7, 0x38, 0x29, 0xD7, 0xC8,
- 0x29, 0x38, 0xA4, 0xD8, 0x5C, 0x28, 0xD8, 0xA4,
- 0x28, 0x5C, 0x6C, 0xC1, 0x94, 0x3F, 0xC1, 0x6C,
- 0x3F, 0x94, 0xD9, 0xD9, 0x27, 0x27, 0x80, 0x80
+static const uint8_t ir2_delta_table[4][256] = {
+    { 0x80, 0x80, 0x84, 0x84, 0x7C, 0x7C, 0x7F, 0x85,
+      0x81, 0x7B, 0x85, 0x7F, 0x7B, 0x81, 0x8C, 0x8C,
+      0x74, 0x74, 0x83, 0x8D, 0x7D, 0x73, 0x8D, 0x83,
+      0x73, 0x7D, 0x77, 0x89, 0x89, 0x77, 0x89, 0x77,
+      0x77, 0x89, 0x8C, 0x95, 0x74, 0x6B, 0x95, 0x8C,
+      0x6B, 0x74, 0x7C, 0x90, 0x84, 0x70, 0x90, 0x7C,
+      0x70, 0x84, 0x96, 0x96, 0x6A, 0x6A, 0x82, 0x98,
+      0x7E, 0x68, 0x98, 0x82, 0x68, 0x7E, 0x97, 0xA2,
+      0x69, 0x5E, 0xA2, 0x97, 0x5E, 0x69, 0xA2, 0xA2,
+      0x5E, 0x5E, 0x8B, 0xA3, 0x75, 0x5D, 0xA3, 0x8B,
+      0x5D, 0x75, 0x71, 0x95, 0x8F, 0x6B, 0x95, 0x71,
+      0x6B, 0x8F, 0x78, 0x9D, 0x88, 0x63, 0x9D, 0x78,
+      0x63, 0x88, 0x7F, 0xA7, 0x81, 0x59, 0xA7, 0x7F,
+      0x59, 0x81, 0xA4, 0xB1, 0x5C, 0x4F, 0xB1, 0xA4,
+      0x4F, 0x5C, 0x96, 0xB1, 0x6A, 0x4F, 0xB1, 0x96,
+      0x4F, 0x6A, 0xB2, 0xB2, 0x4E, 0x4E, 0x65, 0x9B,
+      0x9B, 0x65, 0x9B, 0x65, 0x65, 0x9B, 0x89, 0xB4,
+      0x77, 0x4C, 0xB4, 0x89, 0x4C, 0x77, 0x6A, 0xA3,
+      0x96, 0x5D, 0xA3, 0x6A, 0x5D, 0x96, 0x73, 0xAC,
+      0x8D, 0x54, 0xAC, 0x73, 0x54, 0x8D, 0xB4, 0xC3,
+      0x4C, 0x3D, 0xC3, 0xB4, 0x3D, 0x4C, 0xA4, 0xC3,
+      0x5C, 0x3D, 0xC3, 0xA4, 0x3D, 0x5C, 0xC4, 0xC4,
+      0x3C, 0x3C, 0x96, 0xC6, 0x6A, 0x3A, 0xC6, 0x96,
+      0x3A, 0x6A, 0x7C, 0xBA, 0x84, 0x46, 0xBA, 0x7C,
+      0x46, 0x84, 0x5B, 0xAB, 0xA5, 0x55, 0xAB, 0x5B,
+      0x55, 0xA5, 0x63, 0xB4, 0x9D, 0x4C, 0xB4, 0x63,
+      0x4C, 0x9D, 0x86, 0xCA, 0x7A, 0x36, 0xCA, 0x86,
+      0x36, 0x7A, 0xB6, 0xD7, 0x4A, 0x29, 0xD7, 0xB6,
+      0x29, 0x4A, 0xC8, 0xD7, 0x38, 0x29, 0xD7, 0xC8,
+      0x29, 0x38, 0xA4, 0xD8, 0x5C, 0x28, 0xD8, 0xA4,
+      0x28, 0x5C, 0x6C, 0xC1, 0x94, 0x3F, 0xC1, 0x6C,
+      0x3F, 0x94, 0xD9, 0xD9, 0x27, 0x27, 0x80, 0x80, },
+    { 0x80, 0x80, 0x85, 0x85, 0x7B, 0x7B, 0x7E, 0x87,
+      0x82, 0x79, 0x87, 0x7E, 0x79, 0x82, 0x8F, 0x8F,
+      0x71, 0x71, 0x84, 0x8F, 0x7C, 0x71, 0x8F, 0x84,
+      0x71, 0x7C, 0x75, 0x8B, 0x8B, 0x75, 0x8B, 0x75,
+      0x75, 0x8B, 0x8E, 0x9A, 0x72, 0x66, 0x9A, 0x8E,
+      0x66, 0x72, 0x7B, 0x93, 0x85, 0x6D, 0x93, 0x7B,
+      0x6D, 0x85, 0x9B, 0x9B, 0x65, 0x65, 0x82, 0x9D,
+      0x7E, 0x63, 0x9D, 0x82, 0x63, 0x7E, 0x9B, 0xA8,
+      0x65, 0x58, 0xA8, 0x9B, 0x58, 0x65, 0xA9, 0xA9,
+      0x57, 0x57, 0x8D, 0xAA, 0x73, 0x56, 0xAA, 0x8D,
+      0x56, 0x73, 0x6E, 0x99, 0x92, 0x67, 0x99, 0x6E,
+      0x67, 0x92, 0x76, 0xA2, 0x8A, 0x5E, 0xA2, 0x76,
+      0x5E, 0x8A, 0x7F, 0xAF, 0x81, 0x51, 0xAF, 0x7F,
+      0x51, 0x81, 0xAB, 0xBA, 0x55, 0x46, 0xBA, 0xAB,
+      0x46, 0x55, 0x9A, 0xBB, 0x66, 0x45, 0xBB, 0x9A,
+      0x45, 0x66, 0xBB, 0xBB, 0x45, 0x45, 0x60, 0xA0,
+      0xA0, 0x60, 0xA0, 0x60, 0x60, 0xA0, 0x8B, 0xBE,
+      0x75, 0x42, 0xBE, 0x8B, 0x42, 0x75, 0x66, 0xAA,
+      0x9A, 0x56, 0xAA, 0x66, 0x56, 0x9A, 0x70, 0xB5,
+      0x90, 0x4B, 0xB5, 0x70, 0x4B, 0x90, 0xBE, 0xCF,
+      0x42, 0x31, 0xCF, 0xBE, 0x31, 0x42, 0xAB, 0xD0,
+      0x55, 0x30, 0xD0, 0xAB, 0x30, 0x55, 0xD1, 0xD1,
+      0x2F, 0x2F, 0x9A, 0xD3, 0x66, 0x2D, 0xD3, 0x9A,
+      0x2D, 0x66, 0x7B, 0xC5, 0x85, 0x3B, 0xC5, 0x7B,
+      0x3B, 0x85, 0x54, 0xB4, 0xAC, 0x4C, 0xB4, 0x54,
+      0x4C, 0xAC, 0x5E, 0xBE, 0xA2, 0x42, 0xBE, 0x5E,
+      0x42, 0xA2, 0x87, 0xD8, 0x79, 0x28, 0xD8, 0x87,
+      0x28, 0x79, 0xC0, 0xE8, 0x40, 0x18, 0xE8, 0xC0,
+      0x18, 0x40, 0xD5, 0xE8, 0x2B, 0x18, 0xE8, 0xD5,
+      0x18, 0x2B, 0xAB, 0xE9, 0x55, 0x17, 0xE9, 0xAB,
+      0x17, 0x55, 0x68, 0xCD, 0x98, 0x33, 0xCD, 0x68,
+      0x33, 0x98, 0xEA, 0xEA, 0x16, 0x16, 0x80, 0x80, },
+    { 0x80, 0x80, 0x86, 0x86, 0x7A, 0x7A, 0x7E, 0x88,
+      0x82, 0x78, 0x88, 0x7E, 0x78, 0x82, 0x92, 0x92,
+      0x6E, 0x6E, 0x85, 0x92, 0x7B, 0x6E, 0x92, 0x85,
+      0x6E, 0x7B, 0x73, 0x8D, 0x8D, 0x73, 0x8D, 0x73,
+      0x73, 0x8D, 0x91, 0x9E, 0x6F, 0x62, 0x9E, 0x91,
+      0x62, 0x6F, 0x79, 0x97, 0x87, 0x69, 0x97, 0x79,
+      0x69, 0x87, 0xA0, 0xA0, 0x60, 0x60, 0x83, 0xA2,
+      0x7D, 0x5E, 0xA2, 0x83, 0x5E, 0x7D, 0xA0, 0xB0,
+      0x60, 0x50, 0xB0, 0xA0, 0x50, 0x60, 0xB1, 0xB1,
+      0x4F, 0x4F, 0x8F, 0xB2, 0x71, 0x4E, 0xB2, 0x8F,
+      0x4E, 0x71, 0x6B, 0x9E, 0x95, 0x62, 0x9E, 0x6B,
+      0x62, 0x95, 0x74, 0xA9, 0x8C, 0x57, 0xA9, 0x74,
+      0x57, 0x8C, 0x7F, 0xB8, 0x81, 0x48, 0xB8, 0x7F,
+      0x48, 0x81, 0xB4, 0xC5, 0x4C, 0x3B, 0xC5, 0xB4,
+      0x3B, 0x4C, 0x9F, 0xC6, 0x61, 0x3A, 0xC6, 0x9F,
+      0x3A, 0x61, 0xC6, 0xC6, 0x3A, 0x3A, 0x59, 0xA7,
+      0xA7, 0x59, 0xA7, 0x59, 0x59, 0xA7, 0x8D, 0xCA,
+      0x73, 0x36, 0xCA, 0x8D, 0x36, 0x73, 0x61, 0xB2,
+      0x9F, 0x4E, 0xB2, 0x61, 0x4E, 0x9F, 0x6D, 0xBF,
+      0x93, 0x41, 0xBF, 0x6D, 0x41, 0x93, 0xCA, 0xDF,
+      0x36, 0x21, 0xDF, 0xCA, 0x21, 0x36, 0xB3, 0xDF,
+      0x4D, 0x21, 0xDF, 0xB3, 0x21, 0x4D, 0xE1, 0xE1,
+      0x1F, 0x1F, 0x9F, 0xE3, 0x61, 0x1D, 0xE3, 0x9F,
+      0x1D, 0x61, 0x7A, 0xD3, 0x86, 0x2D, 0xD3, 0x7A,
+      0x2D, 0x86, 0x4C, 0xBE, 0xB4, 0x42, 0xBE, 0x4C,
+      0x42, 0xB4, 0x57, 0xCA, 0xA9, 0x36, 0xCA, 0x57,
+      0x36, 0xA9, 0x88, 0xE9, 0x78, 0x17, 0xE9, 0x88,
+      0x17, 0x78, 0xCC, 0xFB, 0x34, 0x05, 0xFB, 0xCC,
+      0x05, 0x34, 0xE6, 0xFB, 0x1A, 0x05, 0xFB, 0xE6,
+      0x05, 0x1A, 0xB4, 0xFD, 0x4C, 0x03, 0xFD, 0xB4,
+      0x03, 0x4C, 0x63, 0xDC, 0x9D, 0x24, 0xDC, 0x63,
+      0x24, 0x9D, 0xFE, 0xFE, 0x02, 0x02, 0x80, 0x80, },
+    { 0x80, 0x80, 0x87, 0x87, 0x79, 0x79, 0x7E, 0x89,
+      0x82, 0x77, 0x89, 0x7E, 0x77, 0x82, 0x95, 0x95,
+      0x6B, 0x6B, 0x86, 0x96, 0x7A, 0x6A, 0x96, 0x86,
+      0x6A, 0x7A, 0x70, 0x90, 0x90, 0x70, 0x90, 0x70,
+      0x70, 0x90, 0x94, 0xA4, 0x6C, 0x5C, 0xA4, 0x94,
+      0x5C, 0x6C, 0x78, 0x9B, 0x88, 0x65, 0x9B, 0x78,
+      0x65, 0x88, 0xA6, 0xA6, 0x5A, 0x5A, 0x83, 0xA9,
+      0x7D, 0x57, 0xA9, 0x83, 0x57, 0x7D, 0xA6, 0xB9,
+      0x5A, 0x47, 0xB9, 0xA6, 0x47, 0x5A, 0xBA, 0xBA,
+      0x46, 0x46, 0x92, 0xBC, 0x6E, 0x44, 0xBC, 0x92,
+      0x44, 0x6E, 0x67, 0xA3, 0x99, 0x5D, 0xA3, 0x67,
+      0x5D, 0x99, 0x72, 0xB0, 0x8E, 0x50, 0xB0, 0x72,
+      0x50, 0x8E, 0x7F, 0xC3, 0x81, 0x3D, 0xC3, 0x7F,
+      0x3D, 0x81, 0xBE, 0xD2, 0x42, 0x2E, 0xD2, 0xBE,
+      0x2E, 0x42, 0xA5, 0xD4, 0x5B, 0x2C, 0xD4, 0xA5,
+      0x2C, 0x5B, 0xD4, 0xD4, 0x2C, 0x2C, 0x52, 0xAE,
+      0xAE, 0x52, 0xAE, 0x52, 0x52, 0xAE, 0x8F, 0xD8,
+      0x71, 0x28, 0xD8, 0x8F, 0x28, 0x71, 0x5B, 0xBB,
+      0xA5, 0x45, 0xBB, 0x5B, 0x45, 0xA5, 0x69, 0xCB,
+      0x97, 0x35, 0xCB, 0x69, 0x35, 0x97, 0xD8, 0xF0,
+      0x28, 0x10, 0xF0, 0xD8, 0x10, 0x28, 0xBD, 0xF1,
+      0x43, 0x0F, 0xF1, 0xBD, 0x0F, 0x43, 0xF3, 0xF3,
+      0x0D, 0x0D, 0xA5, 0xF6, 0x5B, 0x0A, 0xF6, 0xA5,
+      0x0A, 0x5B, 0x78, 0xE2, 0x88, 0x1E, 0xE2, 0x78,
+      0x1E, 0x88, 0x42, 0xC9, 0xBE, 0x37, 0xC9, 0x42,
+      0x37, 0xBE, 0x4F, 0xD8, 0xB1, 0x28, 0xD8, 0x4F,
+      0x28, 0xB1, 0x8A, 0xFD, 0x76, 0x03, 0xFD, 0x8A,
+      0x03, 0x76, 0xDB, 0xFF, 0x25, 0x01, 0xFF, 0xDB,
+      0x01, 0x25, 0xF9, 0xFF, 0x07, 0x01, 0xFF, 0xF9,
+      0x01, 0x07, 0xBE, 0xFF, 0x42, 0x01, 0xFF, 0xBE,
+      0x01, 0x42, 0x5E, 0xED, 0xA2, 0x13, 0xED, 0x5E,
+      0x13, 0xA2, 0xFF, 0xFF, 0x01, 0x01, 0x80, 0x80, },
 };
 
 #endif /* AVCODEC_INDEO2DATA_H */
diff --git a/libavcodec/indeo3.c b/libavcodec/indeo3.c
index 3d0f906e..3f31946d 100644
--- a/libavcodec/indeo3.c
+++ b/libavcodec/indeo3.c
@@ -1142,5 +1142,5 @@ AVCodec ff_indeo3_decoder = {
     .init           = decode_init,
     .close          = decode_close,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/indeo3data.h b/libavcodec/indeo3data.h
index e7e28a3b..fe8f0bae 100644
--- a/libavcodec/indeo3data.h
+++ b/libavcodec/indeo3data.h
@@ -238,9 +238,9 @@
  * according with endianness of the host machine.
  */
 #if HAVE_BIGENDIAN
-#define PD(a,b) (((a) << 8) + (b))
+#define PD(a,b) (((a) * (1 << 8)) + (b))
 #else
-#define PD(a,b) (((b) << 8) + (a))
+#define PD(a,b) (((b) * (1 << 8)) + (a))
 #endif
 
 /**
@@ -285,9 +285,9 @@ static const int16_t delta_tab_3_5[79]  = { TAB_3_5 };
  * according with endianness of the host machine.
  */
 #if HAVE_BIGENDIAN
-#define PD(a,b) (((a) << 24) + ((a) << 16) + ((b) << 8) + (b))
+#define PD(a,b) (((a) * (1 << 24)) + ((a) * (1 << 16)) + ((b) * (1 << 8)) + (b))
 #else
-#define PD(a,b) (((b) << 24) + ((b) << 16) + ((a) << 8) + (a))
+#define PD(a,b) (((b) * (1 << 24)) + ((b) * (1 << 16)) + ((a) * (1 << 8)) + (a))
 #endif
 
 /*
diff --git a/libavcodec/indeo4.c b/libavcodec/indeo4.c
index 1c2491d6..0065b52c 100644
--- a/libavcodec/indeo4.c
+++ b/libavcodec/indeo4.c
@@ -704,5 +704,5 @@ AVCodec ff_indeo4_decoder = {
     .init           = decode_init,
     .close          = ff_ivi_decode_close,
     .decode         = ff_ivi_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/indeo5.c b/libavcodec/indeo5.c
index 74748633..5f931c8b 100644
--- a/libavcodec/indeo5.c
+++ b/libavcodec/indeo5.c
@@ -688,5 +688,5 @@ AVCodec ff_indeo5_decoder = {
     .init           = decode_init,
     .close          = ff_ivi_decode_close,
     .decode         = ff_ivi_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/intelh263dec.c b/libavcodec/intelh263dec.c
index 71e0112c..fe8d185e 100644
--- a/libavcodec/intelh263dec.c
+++ b/libavcodec/intelh263dec.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "mpegutils.h"
 #include "mpegvideo.h"
 #include "h263.h"
 #include "mpegvideodata.h"
@@ -136,7 +137,7 @@ AVCodec ff_h263i_decoder = {
     .init           = ff_h263_decode_init,
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
         AV_PIX_FMT_NONE
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index a49bf34a..24d320c1 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -46,22 +46,31 @@
  * all.
  */
 #define FF_CODEC_CAP_INIT_CLEANUP           (1 << 1)
-
-
-#ifdef DEBUG
-#   define ff_dlog(ctx, ...) av_log(ctx, AV_LOG_DEBUG, __VA_ARGS__)
-#else
-#   define ff_dlog(ctx, ...) do { if (0) av_log(ctx, AV_LOG_DEBUG, __VA_ARGS__); } while (0)
-#endif
+/**
+ * Decoders marked with FF_CODEC_CAP_SETS_PKT_DTS want to set
+ * AVFrame.pkt_dts manually. If the flag is set, utils.c won't overwrite
+ * this field. If it's unset, utils.c tries to guess the pkt_dts field
+ * from the input AVPacket.
+ */
+#define FF_CODEC_CAP_SETS_PKT_DTS           (1 << 2)
+/**
+ * The decoder extracts and fills its parameters even if the frame is
+ * skipped due to the skip_frame setting.
+ */
+#define FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM  (1 << 3)
 
 #ifdef TRACE
 #   define ff_tlog(ctx, ...) av_log(ctx, AV_LOG_TRACE, __VA_ARGS__)
 #else
-#   define ff_tlog(ctx, ...) while(0)
+#   define ff_tlog(ctx, ...) do { } while(0)
 #endif
 
 
-#define FF_SANE_NB_CHANNELS 63U
+#if !FF_API_QUANT_BIAS
+#define FF_DEFAULT_QUANT_BIAS 999999
+#endif
+
+#define FF_SANE_NB_CHANNELS 64U
 
 #define FF_SIGNBIT(x) ((x) >> CHAR_BIT * sizeof(x) - 1)
 
@@ -116,14 +125,6 @@ typedef struct AVCodecInternal {
      */
     int allocate_progress;
 
-#if FF_API_OLD_ENCODE_AUDIO
-    /**
-     * Internal sample count used by avcodec_encode_audio() to fabricate pts.
-     * Can be removed along with avcodec_encode_audio().
-     */
-    int64_t sample_count;
-#endif
-
     /**
      * An audio frame with less than required samples has been submitted and
      * padded with silence. Reject all subsequent frames.
@@ -182,11 +183,11 @@ unsigned int avpriv_toupper4(unsigned int x);
 int ff_init_buffer_info(AVCodecContext *s, AVFrame *frame);
 
 
-void avpriv_color_frame(AVFrame *frame, const int color[4]);
+void ff_color_frame(AVFrame *frame, const int color[4]);
 
 extern volatile int ff_avcodec_locked;
 int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec);
-int ff_unlock_avcodec(void);
+int ff_unlock_avcodec(const AVCodec *codec);
 
 int avpriv_lock_avformat(void);
 int avpriv_unlock_avformat(void);
@@ -196,7 +197,7 @@ int avpriv_unlock_avformat(void);
  * This value was chosen such that every bit of the buffer is
  * addressable by a 32-bit signed integer as used by get_bits.
  */
-#define FF_MAX_EXTRADATA_SIZE ((1 << 28) - FF_INPUT_BUFFER_PADDING_SIZE)
+#define FF_MAX_EXTRADATA_SIZE ((1 << 28) - AV_INPUT_BUFFER_PADDING_SIZE)
 
 /**
  * Check AVPacket size and/or allocate data.
@@ -213,11 +214,20 @@ int avpriv_unlock_avformat(void);
  *                avpkt->size is set to the specified size.
  *                All other AVPacket fields will be reset with av_init_packet().
  * @param size    the minimum required packet size
+ * @param min_size This is a hint to the allocation algorithm, which indicates
+ *                to what minimal size the caller might later shrink the packet
+ *                to. Encoders often allocate packets which are larger than the
+ *                amount of data that is written into them as the exact amount is
+ *                not known at the time of allocation. min_size represents the
+ *                size a packet might be shrunk to by the caller. Can be set to
+ *                0. setting this roughly correctly allows the allocation code
+ *                to choose between several allocation strategies to improve
+ *                speed slightly.
  * @return        non negative on success, negative error code on failure
  */
-int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size);
+int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size);
 
-int ff_alloc_packet(AVPacket *avpkt, int size);
+attribute_deprecated int ff_alloc_packet(AVPacket *avpkt, int size);
 
 /**
  * Rescale from sample rate to AVCodecContext.time_base.
@@ -231,6 +241,25 @@ static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx,
                         avctx->time_base);
 }
 
+/**
+ * 2^(x) for integer x
+ * @return correctly rounded float
+ */
+static av_always_inline float ff_exp2fi(int x) {
+    /* Normal range */
+    if (-126 <= x && x <= 128)
+        return av_int2float(x+127 << 23);
+    /* Too large */
+    else if (x > 128)
+        return INFINITY;
+    /* Subnormal numbers */
+    else if (x > -150)
+        return av_int2float(1 << (x+149));
+    /* Negligibly small */
+    else
+        return 0;
+}
+
 /**
  * Get a buffer for a frame. This is a wrapper around
  * AVCodecContext.get_buffer() and should be used instead calling get_buffer()
@@ -294,4 +323,11 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt);
  */
 int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame);
 
+/**
+ * Add a CPB properties side data to an encoding context.
+ */
+AVCPBProperties *ff_add_cpb_side_data(AVCodecContext *avctx);
+
+int ff_side_data_set_encoder_stats(AVPacket *pkt, int quality, int64_t *error, int error_count, int pict_type);
+
 #endif /* AVCODEC_INTERNAL_H */
diff --git a/libavcodec/interplayacm.c b/libavcodec/interplayacm.c
new file mode 100644
index 00000000..a676bcb9
--- /dev/null
+++ b/libavcodec/interplayacm.c
@@ -0,0 +1,615 @@
+/*
+ * Interplay ACM decoder
+ *
+ * Copyright (c) 2004-2008 Marko Kreen
+ * Copyright (c) 2008 Adam Gashlin
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+#define BITSTREAM_READER_LE
+#include "get_bits.h"
+#include "internal.h"
+
+static const int8_t map_1bit[]      = { -1, +1 };
+static const int8_t map_2bit_near[] = { -2, -1, +1, +2 };
+static const int8_t map_2bit_far[]  = { -3, -2, +2, +3 };
+static const int8_t map_3bit[]      = { -4, -3, -2, -1, +1, +2, +3, +4 };
+
+static int mul_3x3 [3 * 3 * 3];
+static int mul_3x5 [5 * 5 * 5];
+static int mul_2x11[11  *  11];
+
+typedef struct InterplayACMContext {
+    GetBitContext gb;
+    uint8_t *bitstream;
+    int max_framesize;
+    int bitstream_size;
+    int bitstream_index;
+
+    int level;
+    int rows;
+    int cols;
+    int wrapbuf_len;
+    int block_len;
+    int skip;
+
+    int *block;
+    int *wrapbuf;
+    int *ampbuf;
+    int *midbuf;
+} InterplayACMContext;
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    InterplayACMContext *s = avctx->priv_data;
+    int x1, x2, x3;
+
+    if (avctx->extradata_size < 14)
+        return AVERROR_INVALIDDATA;
+
+    s->level = AV_RL16(avctx->extradata + 12) & 0xf;
+    s->rows  = AV_RL16(avctx->extradata + 12) >>  4;
+    s->cols  = 1 << s->level;
+    s->wrapbuf_len = 2 * s->cols - 2;
+    s->block_len = s->rows * s->cols;
+    s->max_framesize = s->block_len;
+
+    s->block   = av_calloc(s->block_len, sizeof(int));
+    s->wrapbuf = av_calloc(s->wrapbuf_len, sizeof(int));
+    s->ampbuf  = av_calloc(0x10000, sizeof(int));
+    s->bitstream = av_calloc(s->max_framesize, sizeof(*s->bitstream));
+    if (!s->block || !s->wrapbuf || !s->ampbuf || !s->bitstream)
+        return AVERROR(ENOMEM);
+
+    s->midbuf  = s->ampbuf + 0x8000;
+    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+
+    for (x3 = 0; x3 < 3; x3++)
+        for (x2 = 0; x2 < 3; x2++)
+            for (x1 = 0; x1 < 3; x1++)
+                mul_3x3[x1 + x2 * 3 + x3* 3 * 3] = x1 + (x2 << 4) + (x3 << 8);
+    for (x3 = 0; x3 < 5; x3++)
+        for (x2 = 0; x2 < 5; x2++)
+            for (x1 = 0; x1 < 5; x1++)
+                mul_3x5[x1 + x2 * 5 + x3 * 5 * 5] = x1 + (x2 << 4) + (x3 << 8);
+    for (x2 = 0; x2 < 11; x2++)
+        for (x1 = 0; x1 < 11; x1++)
+            mul_2x11[x1 + x2 * 11] = x1 + (x2 << 4);
+
+    return 0;
+}
+
+#define set_pos(s, r, c, idx) do {               \
+        unsigned pos = ((r) << s->level) + (c);  \
+        s->block[pos] = s->midbuf[(idx)];        \
+    } while (0)
+
+static int zero(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    unsigned i;
+
+    for (i = 0; i < s->rows; i++)
+        set_pos(s, i, col, 0);
+    return 0;
+}
+
+static int bad(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    return AVERROR_INVALIDDATA;
+}
+
+static int linear(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned int i;
+    int b, middle = 1 << (ind - 1);
+
+    for (i = 0; i < s->rows; i++) {
+        b = get_bits(gb, ind);
+        set_pos(s, i, col, b - middle);
+    }
+    return 0;
+}
+
+static int k13(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, b;
+
+    for (i = 0; i < s->rows; i++) {
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i++, col, 0);
+            if (i >= s->rows)
+                break;
+            set_pos(s, i, col, 0);
+            continue;
+        }
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i, col, 0);
+            continue;
+        }
+        b = get_bits1(gb);
+        set_pos(s, i, col, map_1bit[b]);
+    }
+    return 0;
+}
+
+static int k12(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, b;
+
+    for (i = 0; i < s->rows; i++) {
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i, col, 0);
+            continue;
+        }
+
+        b = get_bits1(gb);
+        set_pos(s, i, col, map_1bit[b]);
+    }
+    return 0;
+}
+
+static int k24(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, b;
+
+    for (i = 0; i < s->rows; i++) {
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i++, col, 0);
+            if (i >= s->rows) break;
+            set_pos(s, i, col, 0);
+            continue;
+        }
+
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i, col, 0);
+            continue;
+        }
+
+        b = get_bits(gb, 2);
+        set_pos(s, i, col, map_2bit_near[b]);
+    }
+    return 0;
+}
+
+static int k23(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, b;
+
+    for (i = 0; i < s->rows; i++) {
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i, col, 0);
+            continue;
+        }
+
+        b = get_bits(gb, 2);
+        set_pos(s, i, col, map_2bit_near[b]);
+    }
+    return 0;
+}
+
+static int k35(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, b;
+
+    for (i = 0; i < s->rows; i++) {
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i++, col, 0);
+            if (i >= s->rows)
+                break;
+            set_pos(s, i, col, 0);
+            continue;
+        }
+
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i, col, 0);
+            continue;
+        }
+
+        b = get_bits1(gb);
+        if (b == 0) {
+            b = get_bits1(gb);
+            set_pos(s, i, col, map_1bit[b]);
+            continue;
+        }
+
+        b = get_bits(gb, 2);
+        set_pos(s, i, col, map_2bit_far[b]);
+    }
+    return 0;
+}
+
+static int k34(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, b;
+
+    for (i = 0; i < s->rows; i++) {
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i, col, 0);
+            continue;
+        }
+
+        b = get_bits1(gb);
+        if (b == 0) {
+            b = get_bits1(gb);
+            set_pos(s, i, col, map_1bit[b]);
+            continue;
+        }
+
+        b = get_bits(gb, 2);
+        set_pos(s, i, col, map_2bit_far[b]);
+    }
+    return 0;
+}
+
+static int k45(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, b;
+
+    for (i = 0; i < s->rows; i++) {
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i, col, 0); i++;
+            if (i >= s->rows)
+                break;
+            set_pos(s, i, col, 0);
+            continue;
+        }
+
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i, col, 0);
+            continue;
+        }
+
+        b = get_bits(gb, 3);
+        set_pos(s, i, col, map_3bit[b]);
+    }
+    return 0;
+}
+
+static int k44(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, b;
+
+    for (i = 0; i < s->rows; i++) {
+        b = get_bits1(gb);
+        if (b == 0) {
+            set_pos(s, i, col, 0);
+            continue;
+        }
+
+        b = get_bits(gb, 3);
+        set_pos(s, i, col, map_3bit[b]);
+    }
+    return 0;
+}
+
+static int t15(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, b;
+    int n1, n2, n3;
+
+    for (i = 0; i < s->rows; i++) {
+        /* b = (x1) + (x2 * 3) + (x3 * 9) */
+        b = get_bits(gb, 5);
+
+        n1 =  (mul_3x3[b] & 0x0F) - 1;
+        n2 = ((mul_3x3[b] >> 4) & 0x0F) - 1;
+        n3 = ((mul_3x3[b] >> 8) & 0x0F) - 1;
+
+        set_pos(s, i++, col, n1);
+        if (i >= s->rows)
+            break;
+        set_pos(s, i++, col, n2);
+        if (i >= s->rows)
+            break;
+        set_pos(s, i, col, n3);
+    }
+    return 0;
+}
+
+static int t27(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, b;
+    int n1, n2, n3;
+
+    for (i = 0; i < s->rows; i++) {
+        /* b = (x1) + (x2 * 5) + (x3 * 25) */
+        b = get_bits(gb, 7);
+
+        n1 =  (mul_3x5[b] & 0x0F) - 2;
+        n2 = ((mul_3x5[b] >> 4) & 0x0F) - 2;
+        n3 = ((mul_3x5[b] >> 8) & 0x0F) - 2;
+
+        set_pos(s, i++, col, n1);
+        if (i >= s->rows)
+            break;
+        set_pos(s, i++, col, n2);
+        if (i >= s->rows)
+            break;
+        set_pos(s, i, col, n3);
+    }
+    return 0;
+}
+
+static int t37(InterplayACMContext *s, unsigned ind, unsigned col)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, b;
+    int n1, n2;
+    for (i = 0; i < s->rows; i++) {
+        /* b = (x1) + (x2 * 11) */
+        b = get_bits(gb, 7);
+
+        n1 =  (mul_2x11[b] & 0x0F) - 5;
+        n2 = ((mul_2x11[b] >> 4) & 0x0F) - 5;
+
+        set_pos(s, i++, col, n1);
+        if (i >= s->rows)
+            break;
+        set_pos(s, i, col, n2);
+    }
+    return 0;
+}
+
+typedef int (*filler)(InterplayACMContext *s, unsigned ind, unsigned col);
+
+static const filler filler_list[] = {
+    zero,   bad,    bad,    linear,
+    linear, linear, linear, linear,
+    linear, linear, linear, linear,
+    linear, linear, linear, linear,
+    linear, k13,    k12,    t15,
+    k24,    k23,    t27,    k35,
+    k34,    bad,    k45,    k44,
+    bad,    t37,    bad,    bad,
+};
+
+static int fill_block(InterplayACMContext *s)
+{
+    GetBitContext *gb = &s->gb;
+    unsigned i, ind;
+    int ret;
+
+    for (i = 0; i < s->cols; i++) {
+        ind = get_bits(gb, 5);
+        ret = filler_list[ind](s, ind, i);
+        if (ret < 0)
+            return ret;
+    }
+    return 0;
+}
+
+static void juggle(int *wrap_p, int *block_p, unsigned sub_len, unsigned sub_count)
+{
+    unsigned i, j;
+    int *p, r0, r1, r2, r3;
+
+    for (i = 0; i < sub_len; i++) {
+        p = block_p;
+        r0 = wrap_p[0];
+        r1 = wrap_p[1];
+        for (j = 0; j < sub_count/2; j++) {
+            r2 = *p;
+            *p = r1 * 2 + (r0 + r2);
+            p += sub_len;
+            r3 = *p;
+            *p = r2 * 2 - (r1 + r3);
+            p += sub_len;
+            r0 = r2;
+            r1 = r3;
+        }
+
+        *wrap_p++ = r0;
+        *wrap_p++ = r1;
+        block_p++;
+    }
+}
+
+static void juggle_block(InterplayACMContext *s)
+{
+    unsigned sub_count, sub_len, todo_count, step_subcount, i;
+    int *wrap_p, *block_p, *p;
+
+    /* juggle only if subblock_len > 1 */
+    if (s->level == 0)
+        return;
+
+    /* 2048 / subblock_len */
+    if (s->level > 9)
+        step_subcount = 1;
+    else
+        step_subcount = (2048 >> s->level) - 2;
+
+    /* Apply juggle()  (rows)x(cols)
+     * from (step_subcount * 2)            x (subblock_len/2)
+     * to   (step_subcount * subblock_len) x (1)
+     */
+    todo_count = s->rows;
+    block_p = s->block;
+    while (1) {
+        wrap_p = s->wrapbuf;
+        sub_count = step_subcount;
+        if (sub_count > todo_count)
+            sub_count = todo_count;
+
+        sub_len = s->cols / 2;
+        sub_count *= 2;
+
+        juggle(wrap_p, block_p, sub_len, sub_count);
+        wrap_p += sub_len * 2;
+
+        for (i = 0, p = block_p; i < sub_count; i++) {
+            p[0]++;
+            p += sub_len;
+        }
+
+        while (sub_len > 1) {
+            sub_len /= 2;
+            sub_count *= 2;
+            juggle(wrap_p, block_p, sub_len, sub_count);
+            wrap_p += sub_len * 2;
+        }
+
+        if (todo_count <= step_subcount)
+            break;
+
+        todo_count -= step_subcount;
+        block_p += step_subcount << s->level;
+    }
+}
+
+static int decode_block(InterplayACMContext *s)
+{
+    GetBitContext *gb = &s->gb;
+    int pwr, count, val, i, x, ret;
+
+    pwr = get_bits(gb, 4);
+    val = get_bits(gb, 16);
+
+    count = 1 << pwr;
+
+    for (i = 0, x = 0; i < count; i++) {
+        s->midbuf[i] = x;
+        x += val;
+    }
+
+    for (i = 1, x = -val; i <= count; i++) {
+        s->midbuf[-i] = x;
+        x -= val;
+    }
+
+    ret = fill_block(s);
+    if (ret < 0)
+        return ret;
+
+    juggle_block(s);
+
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data,
+                        int *got_frame_ptr, AVPacket *pkt)
+{
+    InterplayACMContext *s = avctx->priv_data;
+    GetBitContext *gb = &s->gb;
+    AVFrame *frame = data;
+    const uint8_t *buf;
+    int16_t *samples;
+    int ret, n, buf_size, input_buf_size;
+
+    if (!pkt->size && !s->bitstream_size) {
+        *got_frame_ptr = 0;
+        return 0;
+    }
+
+    buf_size = FFMIN(pkt->size, s->max_framesize - s->bitstream_size);
+    input_buf_size = buf_size;
+    if (s->bitstream_index + s->bitstream_size + buf_size > s->max_framesize) {
+        memmove(s->bitstream, &s->bitstream[s->bitstream_index], s->bitstream_size);
+        s->bitstream_index = 0;
+    }
+    if (pkt->data)
+        memcpy(&s->bitstream[s->bitstream_index + s->bitstream_size], pkt->data, buf_size);
+    buf                = &s->bitstream[s->bitstream_index];
+    buf_size          += s->bitstream_size;
+    s->bitstream_size  = buf_size;
+    if (buf_size < s->max_framesize && pkt->data) {
+        *got_frame_ptr = 0;
+        return input_buf_size;
+    }
+
+    if ((ret = init_get_bits8(gb, buf, buf_size)) < 0)
+        return ret;
+
+    frame->nb_samples = s->block_len / avctx->channels;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    skip_bits(gb, s->skip);
+    ret = decode_block(s);
+    if (ret < 0)
+        return ret;
+
+    samples = (int16_t *)frame->data[0];
+    for (n = 0; n < frame->nb_samples * avctx->channels; n++) {
+        int val = s->block[n] >> s->level;
+        *samples++ = val;
+    }
+
+    *got_frame_ptr = 1;
+    s->skip = get_bits_count(gb) - 8 * (get_bits_count(gb) / 8);
+    n = get_bits_count(gb) / 8;
+
+    if (n > buf_size && pkt->data) {
+        s->bitstream_size = 0;
+        s->bitstream_index = 0;
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (s->bitstream_size) {
+        s->bitstream_index += n;
+        s->bitstream_size  -= n;
+        return input_buf_size;
+    }
+    return n;
+}
+
+static av_cold int decode_close(AVCodecContext *avctx)
+{
+    InterplayACMContext *s = avctx->priv_data;
+
+    av_freep(&s->block);
+    av_freep(&s->wrapbuf);
+    av_freep(&s->ampbuf);
+    av_freep(&s->bitstream);
+    s->bitstream_size = 0;
+
+    return 0;
+}
+
+AVCodec ff_interplay_acm_decoder = {
+    .name           = "interplayacm",
+    .long_name      = NULL_IF_CONFIG_SMALL("Interplay ACM"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_INTERPLAY_ACM,
+    .init           = decode_init,
+    .close          = decode_close,
+    .decode         = decode_frame,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
+    .priv_data_size = sizeof(InterplayACMContext),
+};
diff --git a/libavcodec/interplayvideo.c b/libavcodec/interplayvideo.c
index 2106419d..1460741a 100644
--- a/libavcodec/interplayvideo.c
+++ b/libavcodec/interplayvideo.c
@@ -38,6 +38,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "hpeldsp.h"
@@ -949,7 +950,7 @@ static void ipvideo_decode_opcodes(IpvideoContext *s, AVFrame *frame)
         }
     }
     if (bytestream2_get_bytes_left(&s->stream_ptr) > 1) {
-        av_log(s->avctx, AV_LOG_ERROR,
+        av_log(s->avctx, AV_LOG_DEBUG,
                "decode finished with %d bytes left over\n",
                bytestream2_get_bytes_left(&s->stream_ptr));
     }
@@ -987,12 +988,15 @@ static int ipvideo_decode_frame(AVCodecContext *avctx,
     AVFrame *frame = data;
     int ret;
 
+    if (buf_size < 2)
+        return AVERROR_INVALIDDATA;
+
     /* decoding map contains 4 bits of information per 8x8 block */
-    s->decoding_map_size = avctx->width * avctx->height / (8 * 8 * 2);
+    s->decoding_map_size = AV_RL16(avpkt->data);
 
     /* compressed buffer needs to be large enough to at least hold an entire
      * decoding map */
-    if (buf_size < s->decoding_map_size)
+    if (buf_size < s->decoding_map_size + 2)
         return buf_size;
 
     if (av_packet_get_side_data(avpkt, AV_PKT_DATA_PARAM_CHANGE, NULL)) {
@@ -1000,8 +1004,8 @@ static int ipvideo_decode_frame(AVCodecContext *avctx,
         av_frame_unref(s->second_last_frame);
     }
 
-    s->decoding_map = buf;
-    bytestream2_init(&s->stream_ptr, buf + s->decoding_map_size,
+    s->decoding_map = buf + 2;
+    bytestream2_init(&s->stream_ptr, buf + 2 + s->decoding_map_size,
                      buf_size - s->decoding_map_size);
 
     if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
@@ -1048,5 +1052,5 @@ AVCodec ff_interplay_video_decoder = {
     .init           = ipvideo_decode_init,
     .close          = ipvideo_decode_end,
     .decode         = ipvideo_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_PARAM_CHANGE,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_PARAM_CHANGE,
 };
diff --git a/libavcodec/ituh263dec.c b/libavcodec/ituh263dec.c
index bc699d34..2e449f8e 100644
--- a/libavcodec/ituh263dec.c
+++ b/libavcodec/ituh263dec.c
@@ -31,16 +31,19 @@
 #include <limits.h>
 
 #include "libavutil/attributes.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
 #include "libavutil/mathematics.h"
 #include "avcodec.h"
 #include "mpegvideo.h"
 #include "h263.h"
+#include "h263data.h"
 #include "internal.h"
 #include "mathops.h"
 #include "mpegutils.h"
 #include "unary.h"
 #include "flv.h"
+#include "rv10.h"
 #include "mpeg4video.h"
 #include "mpegvideodata.h"
 
@@ -137,12 +140,12 @@ int ff_h263_decode_mba(MpegEncContext *s)
 {
     int i, mb_pos;
 
-    for(i=0; i<6; i++){
-        if(s->mb_num-1 <= ff_mba_max[i]) break;
-    }
-    mb_pos= get_bits(&s->gb, ff_mba_length[i]);
-    s->mb_x= mb_pos % s->mb_width;
-    s->mb_y= mb_pos / s->mb_width;
+    for (i = 0; i < 6; i++)
+        if (s->mb_num - 1 <= ff_mba_max[i])
+            break;
+    mb_pos  = get_bits(&s->gb, ff_mba_length[i]);
+    s->mb_x = mb_pos % s->mb_width;
+    s->mb_y = mb_pos / s->mb_width;
 
     return mb_pos;
 }
@@ -872,7 +875,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
 /* most is hardcoded. should extend to handle all h263 streams */
 int ff_h263_decode_picture_header(MpegEncContext *s)
 {
-    int format, width, height, i;
+    int format, width, height, i, ret;
     uint32_t startcode;
 
     align_get_bits(&s->gb);
@@ -1082,10 +1085,9 @@ int ff_h263_decode_picture_header(MpegEncContext *s)
         s->qscale = get_bits(&s->gb, 5);
     }
 
-    if (s->width == 0 || s->height == 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "dimensions 0\n");
-        return -1;
-    }
+    if ((ret = av_image_check_size(s->width, s->height, 0, s)) < 0)
+        return ret;
+
     s->mb_width = (s->width  + 15) / 16;
     s->mb_height = (s->height  + 15) / 16;
     s->mb_num = s->mb_width * s->mb_height;
diff --git a/libavcodec/ituh263enc.c b/libavcodec/ituh263enc.c
index 3ea2d194..d9596c9f 100644
--- a/libavcodec/ituh263enc.c
+++ b/libavcodec/ituh263enc.c
@@ -34,6 +34,7 @@
 #include "mpegvideo.h"
 #include "mpegvideodata.h"
 #include "h263.h"
+#include "h263data.h"
 #include "mathops.h"
 #include "mpegutils.h"
 #include "unary.h"
@@ -44,7 +45,7 @@
 /**
  * Table of number of bits a motion vector component needs.
  */
-static uint8_t mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
+static uint8_t mv_penalty[MAX_FCODE+1][MAX_DMV*2+1];
 
 /**
  * Minimal fcode that a motion vector component would need.
@@ -452,7 +453,7 @@ void ff_h263_encode_mb(MpegEncContext * s,
     int16_t pred_dc;
     int16_t rec_intradc[6];
     int16_t *dc_ptr[6];
-    const int interleaved_stats = s->avctx->flags & CODEC_FLAG_PASS1;
+    const int interleaved_stats = s->avctx->flags & AV_CODEC_FLAG_PASS1;
 
     if (!s->mb_intra) {
         /* compute cbp */
@@ -677,7 +678,7 @@ static av_cold void init_mv_penalty_and_fcode(MpegEncContext *s)
     int mv;
 
     for(f_code=1; f_code<=MAX_FCODE; f_code++){
-        for(mv=-MAX_MV; mv<=MAX_MV; mv++){
+        for(mv=-MAX_DMV; mv<=MAX_DMV; mv++){
             int len;
 
             if(mv==0) len= ff_mvtab[0][1];
@@ -698,7 +699,7 @@ static av_cold void init_mv_penalty_and_fcode(MpegEncContext *s)
                 }
             }
 
-            mv_penalty[f_code][mv+MAX_MV]= len;
+            mv_penalty[f_code][mv+MAX_DMV]= len;
         }
     }
 
diff --git a/libavcodec/ivi.c b/libavcodec/ivi.c
index 4525ff49..e7799003 100644
--- a/libavcodec/ivi.c
+++ b/libavcodec/ivi.c
@@ -30,6 +30,7 @@
 
 #define BITSTREAM_READER_LE
 #include "libavutil/attributes.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/timer.h"
 #include "avcodec.h"
 #include "get_bits.h"
@@ -310,7 +311,7 @@ av_cold int ff_ivi_init_planes(IVIPlaneDesc *planes, const IVIPicConfig *cfg,
 
     ivi_free_buffers(planes);
 
-    if (cfg->pic_width < 1 || cfg->pic_height < 1 ||
+    if (av_image_check_size(cfg->pic_width, cfg->pic_height, 0, NULL) < 0 ||
         cfg->luma_bands < 1 || cfg->chroma_bands < 1)
         return AVERROR_INVALIDDATA;
 
@@ -1083,7 +1084,7 @@ int ff_ivi_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     }
 
     if (ctx->gop_flags & IVI5_IS_PROTECTED) {
-        avpriv_report_missing_feature(avctx, "Password-protected clip!\n");
+        avpriv_report_missing_feature(avctx, "Password-protected clip");
         return AVERROR_PATCHWELCOME;
     }
 
diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
index 593ceb40..c8d38617 100644
--- a/libavcodec/j2kenc.c
+++ b/libavcodec/j2kenc.c
@@ -17,8 +17,46 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * **********************************************************************************************************************
+ *
+ *
+ *
+ * This source code incorporates work covered by the following copyright and
+ * permission notice:
+ *
+ * Copyright (c) 2002-2007, Communications and Remote Sensing Laboratory, Universite catholique de Louvain (UCL), Belgium
+ * Copyright (c) 2002-2007, Professor Benoit Macq
+ * Copyright (c) 2001-2003, David Janssens
+ * Copyright (c) 2002-2003, Yannick Verschueren
+ * Copyright (c) 2003-2007, Francois-Olivier Devaux and Antonin Descampe
+ * Copyright (c) 2005, Herve Drolon, FreeImage Team
+ * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
  */
 
+
 /**
  * JPEG2000 image encoder
  * @file
@@ -31,12 +69,16 @@
 #include "bytestream.h"
 #include "jpeg2000.h"
 #include "libavutil/common.h"
+#include "libavutil/opt.h"
 
 #define NMSEDEC_BITS 7
 #define NMSEDEC_FRACBITS (NMSEDEC_BITS-1)
 #define WMSEDEC_SHIFT 13 ///< must be >= 13
 #define LAMBDA_SCALE (100000000LL << (WMSEDEC_SHIFT - 13))
 
+#define CODEC_JP2 1
+#define CODEC_J2K 0
+
 static int lut_nmsedec_ref [1<<NMSEDEC_BITS],
            lut_nmsedec_ref0[1<<NMSEDEC_BITS],
            lut_nmsedec_sig [1<<NMSEDEC_BITS],
@@ -59,6 +101,7 @@ typedef struct {
 } Jpeg2000Tile;
 
 typedef struct {
+    AVClass *class;
     AVCodecContext *avctx;
     const AVFrame *picture;
 
@@ -81,6 +124,9 @@ typedef struct {
     Jpeg2000QuantStyle  qntsty;
 
     Jpeg2000Tile *tile;
+
+    int format;
+    int pred;
 } Jpeg2000EncoderContext;
 
 
@@ -271,7 +317,7 @@ static int put_cod(Jpeg2000EncoderContext *s)
     bytestream_put_byte(&s->buf, 0); // progression level
     bytestream_put_be16(&s->buf, 1); // num of layers
     if(s->avctx->pix_fmt == AV_PIX_FMT_YUV444P){
-        bytestream_put_byte(&s->buf, 2); // ICT
+        bytestream_put_byte(&s->buf, 0); // unspecified
     }else{
         bytestream_put_byte(&s->buf, 0); // unspecified
     }
@@ -310,6 +356,25 @@ static int put_qcd(Jpeg2000EncoderContext *s, int compno)
     return 0;
 }
 
+static int put_com(Jpeg2000EncoderContext *s, int compno)
+{
+    int size = 4 + strlen(LIBAVCODEC_IDENT);
+
+    if (s->avctx->flags & AV_CODEC_FLAG_BITEXACT)
+        return 0;
+
+    if (s->buf_end - s->buf < size + 2)
+        return -1;
+
+    bytestream_put_be16(&s->buf, JPEG2000_COM);
+    bytestream_put_be16(&s->buf, size);
+    bytestream_put_be16(&s->buf, 1); // General use (ISO/IEC 8859-15 (Latin) values)
+
+    bytestream_put_buffer(&s->buf, LIBAVCODEC_IDENT, strlen(LIBAVCODEC_IDENT));
+
+    return 0;
+}
+
 static uint8_t *put_sot(Jpeg2000EncoderContext *s, int tileno)
 {
     uint8_t *psotptr;
@@ -486,18 +551,18 @@ static void encode_sigpass(Jpeg2000T1Context *t1, int width, int height, int ban
     for (y0 = 0; y0 < height; y0 += 4)
         for (x = 0; x < width; x++)
             for (y = y0; y < height && y < y0+4; y++){
-                if (!(t1->flags[y+1][x+1] & JPEG2000_T1_SIG) && (t1->flags[y+1][x+1] & JPEG2000_T1_SIG_NB)){
-                    int ctxno = ff_jpeg2000_getsigctxno(t1->flags[y+1][x+1], bandno),
-                        bit = t1->data[y][x] & mask ? 1 : 0;
+                if (!(t1->flags[(y+1) * t1->stride + x+1] & JPEG2000_T1_SIG) && (t1->flags[(y+1) * t1->stride + x+1] & JPEG2000_T1_SIG_NB)){
+                    int ctxno = ff_jpeg2000_getsigctxno(t1->flags[(y+1) * t1->stride + x+1], bandno),
+                        bit = t1->data[(y) * t1->stride + x] & mask ? 1 : 0;
                     ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, bit);
                     if (bit){
                         int xorbit;
-                        int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
-                        ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit);
-                        *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
-                        ff_jpeg2000_set_significance(t1, x, y, t1->flags[y+1][x+1] >> 15);
+                        int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[(y+1) * t1->stride + x+1], &xorbit);
+                        ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[(y+1) * t1->stride + x+1] >> 15) ^ xorbit);
+                        *nmsedec += getnmsedec_sig(t1->data[(y) * t1->stride + x], bpno + NMSEDEC_FRACBITS);
+                        ff_jpeg2000_set_significance(t1, x, y, t1->flags[(y+1) * t1->stride + x+1] >> 15);
                     }
-                    t1->flags[y+1][x+1] |= JPEG2000_T1_VIS;
+                    t1->flags[(y+1) * t1->stride + x+1] |= JPEG2000_T1_VIS;
                 }
             }
 }
@@ -508,11 +573,11 @@ static void encode_refpass(Jpeg2000T1Context *t1, int width, int height, int *nm
     for (y0 = 0; y0 < height; y0 += 4)
         for (x = 0; x < width; x++)
             for (y = y0; y < height && y < y0+4; y++)
-                if ((t1->flags[y+1][x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS)) == JPEG2000_T1_SIG){
-                    int ctxno = ff_jpeg2000_getrefctxno(t1->flags[y+1][x+1]);
-                    *nmsedec += getnmsedec_ref(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
-                    ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0);
-                    t1->flags[y+1][x+1] |= JPEG2000_T1_REF;
+                if ((t1->flags[(y+1) * t1->stride + x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS)) == JPEG2000_T1_SIG){
+                    int ctxno = ff_jpeg2000_getrefctxno(t1->flags[(y+1) * t1->stride + x+1]);
+                    *nmsedec += getnmsedec_ref(t1->data[(y) * t1->stride + x], bpno + NMSEDEC_FRACBITS);
+                    ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[(y) * t1->stride + x] & mask ? 1:0);
+                    t1->flags[(y+1) * t1->stride + x+1] |= JPEG2000_T1_REF;
                 }
 }
 
@@ -522,15 +587,15 @@ static void encode_clnpass(Jpeg2000T1Context *t1, int width, int height, int ban
     for (y0 = 0; y0 < height; y0 += 4)
         for (x = 0; x < width; x++){
             if (y0 + 3 < height && !(
-            (t1->flags[y0+1][x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
-            (t1->flags[y0+2][x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
-            (t1->flags[y0+3][x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
-            (t1->flags[y0+4][x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG))))
+            (t1->flags[(y0+1) * t1->stride + x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
+            (t1->flags[(y0+2) * t1->stride + x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
+            (t1->flags[(y0+3) * t1->stride + x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
+            (t1->flags[(y0+4) * t1->stride + x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG))))
             {
                 // aggregation mode
                 int rlen;
                 for (rlen = 0; rlen < 4; rlen++)
-                    if (t1->data[y0+rlen][x] & mask)
+                    if (t1->data[(y0+rlen) * t1->stride + x] & mask)
                         break;
                 ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_RL, rlen != 4);
                 if (rlen == 4)
@@ -538,34 +603,34 @@ static void encode_clnpass(Jpeg2000T1Context *t1, int width, int height, int ban
                 ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI, rlen >> 1);
                 ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI, rlen & 1);
                 for (y = y0 + rlen; y < y0 + 4; y++){
-                    if (!(t1->flags[y+1][x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))){
-                        int ctxno = ff_jpeg2000_getsigctxno(t1->flags[y+1][x+1], bandno);
+                    if (!(t1->flags[(y+1) * t1->stride + x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))){
+                        int ctxno = ff_jpeg2000_getsigctxno(t1->flags[(y+1) * t1->stride + x+1], bandno);
                         if (y > y0 + rlen)
-                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0);
-                        if (t1->data[y][x] & mask){ // newly significant
+                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[(y) * t1->stride + x] & mask ? 1:0);
+                        if (t1->data[(y) * t1->stride + x] & mask){ // newly significant
                             int xorbit;
-                            int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
-                            *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
-                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit);
-                            ff_jpeg2000_set_significance(t1, x, y, t1->flags[y+1][x+1] >> 15);
+                            int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[(y+1) * t1->stride + x+1], &xorbit);
+                            *nmsedec += getnmsedec_sig(t1->data[(y) * t1->stride + x], bpno + NMSEDEC_FRACBITS);
+                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[(y+1) * t1->stride + x+1] >> 15) ^ xorbit);
+                            ff_jpeg2000_set_significance(t1, x, y, t1->flags[(y+1) * t1->stride + x+1] >> 15);
                         }
                     }
-                    t1->flags[y+1][x+1] &= ~JPEG2000_T1_VIS;
+                    t1->flags[(y+1) * t1->stride + x+1] &= ~JPEG2000_T1_VIS;
                 }
             } else{
                 for (y = y0; y < y0 + 4 && y < height; y++){
-                    if (!(t1->flags[y+1][x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))){
-                        int ctxno = ff_jpeg2000_getsigctxno(t1->flags[y+1][x+1], bandno);
-                        ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0);
-                        if (t1->data[y][x] & mask){ // newly significant
+                    if (!(t1->flags[(y+1) * t1->stride + x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))){
+                        int ctxno = ff_jpeg2000_getsigctxno(t1->flags[(y+1) * t1->stride + x+1], bandno);
+                        ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[(y) * t1->stride + x] & mask ? 1:0);
+                        if (t1->data[(y) * t1->stride + x] & mask){ // newly significant
                             int xorbit;
-                            int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
-                            *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
-                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit);
-                            ff_jpeg2000_set_significance(t1, x, y, t1->flags[y+1][x+1] >> 15);
+                            int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[(y+1) * t1->stride + x+1], &xorbit);
+                            *nmsedec += getnmsedec_sig(t1->data[(y) * t1->stride + x], bpno + NMSEDEC_FRACBITS);
+                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[(y+1) * t1->stride + x+1] >> 15) ^ xorbit);
+                            ff_jpeg2000_set_significance(t1, x, y, t1->flags[(y+1) * t1->stride + x+1] >> 15);
                         }
                     }
-                    t1->flags[y+1][x+1] &= ~JPEG2000_T1_VIS;
+                    t1->flags[(y+1) * t1->stride + x+1] &= ~JPEG2000_T1_VIS;
                 }
             }
         }
@@ -577,16 +642,15 @@ static void encode_cblk(Jpeg2000EncoderContext *s, Jpeg2000T1Context *t1, Jpeg20
     int pass_t = 2, passno, x, y, max=0, nmsedec, bpno;
     int64_t wmsedec = 0;
 
-    for (y = 0; y < height+2; y++)
-        memset(t1->flags[y], 0, (width+2)*sizeof(int));
+    memset(t1->flags, 0, t1->stride * (height + 2) * sizeof(*t1->flags));
 
     for (y = 0; y < height; y++){
         for (x = 0; x < width; x++){
-            if (t1->data[y][x] < 0){
-                t1->flags[y+1][x+1] |= JPEG2000_T1_SGN;
-                t1->data[y][x] = -t1->data[y][x];
+            if (t1->data[(y) * t1->stride + x] < 0){
+                t1->flags[(y+1) * t1->stride + x+1] |= JPEG2000_T1_SGN;
+                t1->data[(y) * t1->stride + x] = -t1->data[(y) * t1->stride + x];
             }
-            max = FFMAX(max, t1->data[y][x]);
+            max = FFMAX(max, t1->data[(y) * t1->stride + x]);
         }
     }
 
@@ -612,7 +676,7 @@ static void encode_cblk(Jpeg2000EncoderContext *s, Jpeg2000T1Context *t1, Jpeg20
                     break;
         }
 
-        cblk->passes[passno].rate = 3 + ff_mqc_length(&t1->mqc);
+        cblk->passes[passno].rate = ff_mqc_flush_to(&t1->mqc, cblk->passes[passno].flushed, &cblk->passes[passno].flushed_len);
         wmsedec += (int64_t)nmsedec << (2*bpno);
         cblk->passes[passno].disto = wmsedec;
 
@@ -624,8 +688,7 @@ static void encode_cblk(Jpeg2000EncoderContext *s, Jpeg2000T1Context *t1, Jpeg20
     cblk->npasses = passno;
     cblk->ninclpasses = passno;
 
-    // TODO: optional flush on each pass
-    cblk->passes[passno-1].rate = ff_mqc_flush(&t1->mqc);
+    cblk->passes[passno-1].rate = ff_mqc_flush_to(&t1->mqc, cblk->passes[passno-1].flushed, &cblk->passes[passno-1].flushed_len);
 }
 
 /* tier-2 routines: */
@@ -732,7 +795,10 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in
                 if (cblk->ninclpasses){
                     if (s->buf_end - s->buf < cblk->passes[cblk->ninclpasses-1].rate)
                         return -1;
-                    bytestream_put_buffer(&s->buf, cblk->data, cblk->passes[cblk->ninclpasses-1].rate);
+                    bytestream_put_buffer(&s->buf, cblk->data,   cblk->passes[cblk->ninclpasses-1].rate
+                                                               - cblk->passes[cblk->ninclpasses-1].flushed_len);
+                    bytestream_put_buffer(&s->buf, cblk->passes[cblk->ninclpasses-1].flushed,
+                                                   cblk->passes[cblk->ninclpasses-1].flushed_len);
                 }
             }
         }
@@ -818,6 +884,8 @@ static int encode_tile(Jpeg2000EncoderContext *s, Jpeg2000Tile *tile, int tileno
     for (compno = 0; compno < s->ncomponents; compno++){
         Jpeg2000Component *comp = s->tile[tileno].comp + compno;
 
+        t1.stride = (1<<codsty->log2_cblk_width) + 2;
+
         av_log(s->avctx, AV_LOG_DEBUG,"dwt\n");
         if ((ret = ff_dwt_encode(&comp->dwt, comp->i_data)) < 0)
             return ret;
@@ -853,14 +921,14 @@ static int encode_tile(Jpeg2000EncoderContext *s, Jpeg2000Tile *tile, int tileno
                         int y, x;
                         if (codsty->transform == FF_DWT53){
                             for (y = yy0; y < yy1; y++){
-                                int *ptr = t1.data[y-yy0];
+                                int *ptr = t1.data + (y-yy0)*t1.stride;
                                 for (x = xx0; x < xx1; x++){
                                     *ptr++ = comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * y + x] << NMSEDEC_FRACBITS;
                                 }
                             }
                         } else{
                             for (y = yy0; y < yy1; y++){
-                                int *ptr = t1.data[y-yy0];
+                                int *ptr = t1.data + (y-yy0)*t1.stride;
                                 for (x = xx0; x < xx1; x++){
                                     *ptr = (comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * y + x]);
                                     *ptr = (int64_t)*ptr * (int64_t)(16384 * 65536 / band->i_stepsize) >> 15 - NMSEDEC_FRACBITS;
@@ -914,13 +982,19 @@ static void reinit(Jpeg2000EncoderContext *s)
     }
 }
 
+static void update_size(uint8_t *size, const uint8_t *end)
+{
+    AV_WB32(size, end-size);
+}
+
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                         const AVFrame *pict, int *got_packet)
 {
     int tileno, ret;
     Jpeg2000EncoderContext *s = avctx->priv_data;
+    uint8_t *chunkstart, *jp2cstart, *jp2hstart;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width*avctx->height*9 + FF_MIN_BUFFER_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width*avctx->height*9 + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;
 
     // init:
@@ -934,6 +1008,58 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     copy_frame(s);
     reinit(s);
 
+    if (s->format == CODEC_JP2) {
+        av_assert0(s->buf == pkt->data);
+
+        bytestream_put_be32(&s->buf, 0x0000000C);
+        bytestream_put_be32(&s->buf, 0x6A502020);
+        bytestream_put_be32(&s->buf, 0x0D0A870A);
+
+        chunkstart = s->buf;
+        bytestream_put_be32(&s->buf, 0);
+        bytestream_put_buffer(&s->buf, "ftyp", 4);
+        bytestream_put_buffer(&s->buf, "jp2\040\040", 4);
+        bytestream_put_be32(&s->buf, 0);
+        bytestream_put_buffer(&s->buf, "jp2\040", 4);
+        update_size(chunkstart, s->buf);
+
+        jp2hstart = s->buf;
+        bytestream_put_be32(&s->buf, 0);
+        bytestream_put_buffer(&s->buf, "jp2h", 4);
+
+        chunkstart = s->buf;
+        bytestream_put_be32(&s->buf, 0);
+        bytestream_put_buffer(&s->buf, "ihdr", 4);
+        bytestream_put_be32(&s->buf, avctx->height);
+        bytestream_put_be32(&s->buf, avctx->width);
+        bytestream_put_be16(&s->buf, s->ncomponents);
+        bytestream_put_byte(&s->buf, s->cbps[0]);
+        bytestream_put_byte(&s->buf, 7);
+        bytestream_put_byte(&s->buf, 0);
+        bytestream_put_byte(&s->buf, 0);
+        update_size(chunkstart, s->buf);
+
+        chunkstart = s->buf;
+        bytestream_put_be32(&s->buf, 0);
+        bytestream_put_buffer(&s->buf, "colr", 4);
+        bytestream_put_byte(&s->buf, 1);
+        bytestream_put_byte(&s->buf, 0);
+        bytestream_put_byte(&s->buf, 0);
+        if (s->ncomponents == 1) {
+            bytestream_put_be32(&s->buf, 17);
+        } else if (avctx->pix_fmt == AV_PIX_FMT_RGB24) {
+            bytestream_put_be32(&s->buf, 16);
+        } else {
+            bytestream_put_be32(&s->buf, 18);
+        }
+        update_size(chunkstart, s->buf);
+        update_size(jp2hstart, s->buf);
+
+        jp2cstart = s->buf;
+        bytestream_put_be32(&s->buf, 0);
+        bytestream_put_buffer(&s->buf, "jp2c", 4);
+    }
+
     if (s->buf_end - s->buf < 2)
         return -1;
     bytestream_put_be16(&s->buf, JPEG2000_SOC);
@@ -943,6 +1069,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         return ret;
     if ((ret = put_qcd(s, 0)) < 0)
         return ret;
+    if ((ret = put_com(s, 0)) < 0)
+        return ret;
 
     for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
         uint8_t *psotptr;
@@ -959,6 +1087,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         return -1;
     bytestream_put_be16(&s->buf, JPEG2000_EOC);
 
+    if (s->format == CODEC_JP2)
+        update_size(jp2cstart, s->buf);
+
     av_log(s->avctx, AV_LOG_DEBUG, "end\n");
     pkt->size = s->buf - s->buf_start;
     pkt->flags |= AV_PKT_FLAG_KEY;
@@ -977,6 +1108,13 @@ static av_cold int j2kenc_init(AVCodecContext *avctx)
     s->avctx = avctx;
     av_log(s->avctx, AV_LOG_DEBUG, "init\n");
 
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->prediction_method)
+        s->pred = avctx->prediction_method;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     // defaults:
     // TODO: implement setting non-standard precinct size
     memset(codsty->log2_prec_widths , 15, sizeof(codsty->log2_prec_widths ));
@@ -985,12 +1123,14 @@ static av_cold int j2kenc_init(AVCodecContext *avctx)
     codsty->nreslevels       = 7;
     codsty->log2_cblk_width  = 4;
     codsty->log2_cblk_height = 4;
-    codsty->transform        = avctx->prediction_method ? FF_DWT53 : FF_DWT97_INT;
+    codsty->transform        = s->pred ? FF_DWT53 : FF_DWT97_INT;
 
     qntsty->nguardbits       = 1;
 
-    s->tile_width            = 256;
-    s->tile_height           = 256;
+    if ((s->tile_width  & (s->tile_width -1)) ||
+        (s->tile_height & (s->tile_height-1))) {
+        av_log(avctx, AV_LOG_WARNING, "Tile dimension not a power of 2\n");
+    }
 
     if (codsty->transform == FF_DWT53)
         qntsty->quantsty = JPEG2000_QSTY_NONE;
@@ -1035,6 +1175,30 @@ static int j2kenc_destroy(AVCodecContext *avctx)
     return 0;
 }
 
+// taken from the libopenjpeg wraper so it matches
+
+#define OFFSET(x) offsetof(Jpeg2000EncoderContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "format",        "Codec Format",      OFFSET(format),        AV_OPT_TYPE_INT,   { .i64 = CODEC_JP2   }, CODEC_J2K, CODEC_JP2,   VE, "format"      },
+    { "j2k",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = CODEC_J2K   }, 0,         0,           VE, "format"      },
+    { "jp2",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = CODEC_JP2   }, 0,         0,           VE, "format"      },
+    { "tile_width",    "Tile Width",        OFFSET(tile_width),    AV_OPT_TYPE_INT,   { .i64 = 256         }, 1,     1<<30,           VE, },
+    { "tile_height",   "Tile Height",       OFFSET(tile_height),   AV_OPT_TYPE_INT,   { .i64 = 256         }, 1,     1<<30,           VE, },
+    { "pred",          "DWT Type",          OFFSET(pred),          AV_OPT_TYPE_INT,   { .i64 = 0           }, 0,         1,           VE, "pred"        },
+    { "dwt97int",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = 0           }, INT_MIN, INT_MAX,       VE, "pred"        },
+    { "dwt53",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = 0           }, INT_MIN, INT_MAX,       VE, "pred"        },
+
+    { NULL }
+};
+
+static const AVClass j2k_class = {
+    .class_name = "jpeg 2000 encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_jpeg2000_encoder = {
     .name           = "jpeg2000",
     .long_name      = NULL_IF_CONFIG_SMALL("JPEG 2000"),
@@ -1044,12 +1208,11 @@ AVCodec ff_jpeg2000_encoder = {
     .init           = j2kenc_init,
     .encode2        = encode_frame,
     .close          = j2kenc_destroy,
-    .capabilities   = CODEC_CAP_EXPERIMENTAL,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_RGB24, AV_PIX_FMT_YUV444P, AV_PIX_FMT_GRAY8,
-/*      AV_PIX_FMT_YUV420P,
-        AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
-        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,*/
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
         AV_PIX_FMT_NONE
-    }
+    },
+    .priv_class     = &j2k_class,
 };
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 38df58dd..b5b2dbf2 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -28,8 +28,10 @@
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
 #include "libavutil/common.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/mem.h"
 #include "avcodec.h"
+#include "internal.h"
 #include "jpeg2000.h"
 
 #define SHL(a, n) ((n) >= 0 ? (a) << (n) : (a) >> -(n))
@@ -37,11 +39,11 @@
 /* tag tree routines */
 
 /* allocate the memory for tag tree */
-static int32_t tag_tree_size(uint16_t w, uint16_t h)
+static int32_t tag_tree_size(int w, int h)
 {
-    uint32_t res = 0;
+    int64_t res = 0;
     while (w > 1 || h > 1) {
-        res += w * h;
+        res += w * (int64_t)h;
         av_assert0(res + 1 < INT32_MAX);
         w = (w + 1) >> 1;
         h = (h + 1) >> 1;
@@ -171,25 +173,277 @@ void ff_jpeg2000_set_significance(Jpeg2000T1Context *t1, int x, int y,
 {
     x++;
     y++;
-    t1->flags[y][x] |= JPEG2000_T1_SIG;
+    t1->flags[(y) * t1->stride + x] |= JPEG2000_T1_SIG;
     if (negative) {
-        t1->flags[y][x + 1] |= JPEG2000_T1_SIG_W | JPEG2000_T1_SGN_W;
-        t1->flags[y][x - 1] |= JPEG2000_T1_SIG_E | JPEG2000_T1_SGN_E;
-        t1->flags[y + 1][x] |= JPEG2000_T1_SIG_N | JPEG2000_T1_SGN_N;
-        t1->flags[y - 1][x] |= JPEG2000_T1_SIG_S | JPEG2000_T1_SGN_S;
+        t1->flags[(y) * t1->stride + x + 1] |= JPEG2000_T1_SIG_W | JPEG2000_T1_SGN_W;
+        t1->flags[(y) * t1->stride + x - 1] |= JPEG2000_T1_SIG_E | JPEG2000_T1_SGN_E;
+        t1->flags[(y + 1) * t1->stride + x] |= JPEG2000_T1_SIG_N | JPEG2000_T1_SGN_N;
+        t1->flags[(y - 1) * t1->stride + x] |= JPEG2000_T1_SIG_S | JPEG2000_T1_SGN_S;
     } else {
-        t1->flags[y][x + 1] |= JPEG2000_T1_SIG_W;
-        t1->flags[y][x - 1] |= JPEG2000_T1_SIG_E;
-        t1->flags[y + 1][x] |= JPEG2000_T1_SIG_N;
-        t1->flags[y - 1][x] |= JPEG2000_T1_SIG_S;
+        t1->flags[(y) * t1->stride + x + 1] |= JPEG2000_T1_SIG_W;
+        t1->flags[(y) * t1->stride + x - 1] |= JPEG2000_T1_SIG_E;
+        t1->flags[(y + 1) * t1->stride + x] |= JPEG2000_T1_SIG_N;
+        t1->flags[(y - 1) * t1->stride + x] |= JPEG2000_T1_SIG_S;
     }
-    t1->flags[y + 1][x + 1] |= JPEG2000_T1_SIG_NW;
-    t1->flags[y + 1][x - 1] |= JPEG2000_T1_SIG_NE;
-    t1->flags[y - 1][x + 1] |= JPEG2000_T1_SIG_SW;
-    t1->flags[y - 1][x - 1] |= JPEG2000_T1_SIG_SE;
+    t1->flags[(y + 1) * t1->stride + x + 1] |= JPEG2000_T1_SIG_NW;
+    t1->flags[(y + 1) * t1->stride + x - 1] |= JPEG2000_T1_SIG_NE;
+    t1->flags[(y - 1) * t1->stride + x + 1] |= JPEG2000_T1_SIG_SW;
+    t1->flags[(y - 1) * t1->stride + x - 1] |= JPEG2000_T1_SIG_SE;
 }
 
-static const uint8_t lut_gain[2][4] = { { 0, 0, 0, 0 }, { 0, 1, 1, 2 } };
+// static const uint8_t lut_gain[2][4] = { { 0, 0, 0, 0 }, { 0, 1, 1, 2 } }; (unused)
+
+static void init_band_stepsize(AVCodecContext *avctx,
+                               Jpeg2000Band *band,
+                               Jpeg2000CodingStyle *codsty,
+                               Jpeg2000QuantStyle *qntsty,
+                               int bandno, int gbandno, int reslevelno,
+                               int cbps)
+{
+    /* TODO: Implementation of quantization step not finished,
+     * see ISO/IEC 15444-1:2002 E.1 and A.6.4. */
+    switch (qntsty->quantsty) {
+        uint8_t gain;
+    case JPEG2000_QSTY_NONE:
+        /* TODO: to verify. No quantization in this case */
+        band->f_stepsize = 1;
+        break;
+    case JPEG2000_QSTY_SI:
+        /*TODO: Compute formula to implement. */
+//         numbps = cbps +
+//                  lut_gain[codsty->transform == FF_DWT53][bandno + (reslevelno > 0)];
+//         band->f_stepsize = SHL(2048 + qntsty->mant[gbandno],
+//                                2 + numbps - qntsty->expn[gbandno]);
+//         break;
+    case JPEG2000_QSTY_SE:
+        /* Exponent quantization step.
+         * Formula:
+         * delta_b = 2 ^ (R_b - expn_b) * (1 + (mant_b / 2 ^ 11))
+         * R_b = R_I + log2 (gain_b )
+         * see ISO/IEC 15444-1:2002 E.1.1 eqn. E-3 and E-4 */
+        gain            = cbps;
+        band->f_stepsize  = ff_exp2fi(gain - qntsty->expn[gbandno]);
+        band->f_stepsize *= qntsty->mant[gbandno] / 2048.0 + 1.0;
+        break;
+    default:
+        band->f_stepsize = 0;
+        av_log(avctx, AV_LOG_ERROR, "Unknown quantization format\n");
+        break;
+    }
+    if (codsty->transform != FF_DWT53) {
+        int lband = 0;
+        switch (bandno + (reslevelno > 0)) {
+            case 1:
+            case 2:
+                band->f_stepsize *= F_LFTG_X * 2;
+                lband = 1;
+                break;
+            case 3:
+                band->f_stepsize *= F_LFTG_X * F_LFTG_X * 4;
+                break;
+        }
+        if (codsty->transform == FF_DWT97) {
+            band->f_stepsize *= pow(F_LFTG_K, 2*(codsty->nreslevels2decode - reslevelno) + lband - 2);
+        }
+    }
+
+    band->i_stepsize = band->f_stepsize * (1 << 15);
+
+    /* FIXME: In openjepg code stespize = stepsize * 0.5. Why?
+     * If not set output of entropic decoder is not correct. */
+    if (!av_codec_is_encoder(avctx->codec))
+        band->f_stepsize *= 0.5;
+}
+
+static int init_prec(Jpeg2000Band *band,
+                     Jpeg2000ResLevel *reslevel,
+                     Jpeg2000Component *comp,
+                     int precno, int bandno, int reslevelno,
+                     int log2_band_prec_width,
+                     int log2_band_prec_height)
+{
+    Jpeg2000Prec *prec = band->prec + precno;
+    int nb_codeblocks, cblkno;
+
+    prec->decoded_layers = 0;
+
+    /* TODO: Explain formula for JPEG200 DCINEMA. */
+    /* TODO: Verify with previous count of codeblocks per band */
+
+    /* Compute P_x0 */
+    prec->coord[0][0] = ((band->coord[0][0] >> log2_band_prec_width) + precno % reslevel->num_precincts_x) *
+                        (1 << log2_band_prec_width);
+
+    /* Compute P_y0 */
+    prec->coord[1][0] = ((band->coord[1][0] >> log2_band_prec_height) + precno / reslevel->num_precincts_x) *
+                        (1 << log2_band_prec_height);
+
+    /* Compute P_x1 */
+    prec->coord[0][1] = prec->coord[0][0] +
+                        (1 << log2_band_prec_width);
+    prec->coord[0][0] = FFMAX(prec->coord[0][0], band->coord[0][0]);
+    prec->coord[0][1] = FFMIN(prec->coord[0][1], band->coord[0][1]);
+
+    /* Compute P_y1 */
+    prec->coord[1][1] = prec->coord[1][0] +
+                        (1 << log2_band_prec_height);
+    prec->coord[1][0] = FFMAX(prec->coord[1][0], band->coord[1][0]);
+    prec->coord[1][1] = FFMIN(prec->coord[1][1], band->coord[1][1]);
+
+    prec->nb_codeblocks_width =
+        ff_jpeg2000_ceildivpow2(prec->coord[0][1],
+                                band->log2_cblk_width)
+        - (prec->coord[0][0] >> band->log2_cblk_width);
+    prec->nb_codeblocks_height =
+        ff_jpeg2000_ceildivpow2(prec->coord[1][1],
+                                band->log2_cblk_height)
+        - (prec->coord[1][0] >> band->log2_cblk_height);
+
+
+    /* Tag trees initialization */
+    prec->cblkincl =
+        ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
+                                  prec->nb_codeblocks_height);
+    if (!prec->cblkincl)
+        return AVERROR(ENOMEM);
+
+    prec->zerobits =
+        ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
+                                  prec->nb_codeblocks_height);
+    if (!prec->zerobits)
+        return AVERROR(ENOMEM);
+
+    if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT_MAX) {
+        prec->cblk = NULL;
+        return AVERROR(ENOMEM);
+    }
+    nb_codeblocks = prec->nb_codeblocks_width * prec->nb_codeblocks_height;
+    prec->cblk = av_mallocz_array(nb_codeblocks, sizeof(*prec->cblk));
+    if (!prec->cblk)
+        return AVERROR(ENOMEM);
+    for (cblkno = 0; cblkno < nb_codeblocks; cblkno++) {
+        Jpeg2000Cblk *cblk = prec->cblk + cblkno;
+        int Cx0, Cy0;
+
+        /* Compute coordinates of codeblocks */
+        /* Compute Cx0*/
+        Cx0 = ((prec->coord[0][0]) >> band->log2_cblk_width) << band->log2_cblk_width;
+        Cx0 = Cx0 + ((cblkno % prec->nb_codeblocks_width)  << band->log2_cblk_width);
+        cblk->coord[0][0] = FFMAX(Cx0, prec->coord[0][0]);
+
+        /* Compute Cy0*/
+        Cy0 = ((prec->coord[1][0]) >> band->log2_cblk_height) << band->log2_cblk_height;
+        Cy0 = Cy0 + ((cblkno / prec->nb_codeblocks_width)   << band->log2_cblk_height);
+        cblk->coord[1][0] = FFMAX(Cy0, prec->coord[1][0]);
+
+        /* Compute Cx1 */
+        cblk->coord[0][1] = FFMIN(Cx0 + (1 << band->log2_cblk_width),
+                                  prec->coord[0][1]);
+
+        /* Compute Cy1 */
+        cblk->coord[1][1] = FFMIN(Cy0 + (1 << band->log2_cblk_height),
+                                  prec->coord[1][1]);
+        /* Update code-blocks coordinates according sub-band position */
+        if ((bandno + !!reslevelno) & 1) {
+            cblk->coord[0][0] += comp->reslevel[reslevelno-1].coord[0][1] -
+                                 comp->reslevel[reslevelno-1].coord[0][0];
+            cblk->coord[0][1] += comp->reslevel[reslevelno-1].coord[0][1] -
+                                 comp->reslevel[reslevelno-1].coord[0][0];
+        }
+        if ((bandno + !!reslevelno) & 2) {
+            cblk->coord[1][0] += comp->reslevel[reslevelno-1].coord[1][1] -
+                                 comp->reslevel[reslevelno-1].coord[1][0];
+            cblk->coord[1][1] += comp->reslevel[reslevelno-1].coord[1][1] -
+                                 comp->reslevel[reslevelno-1].coord[1][0];
+        }
+
+        cblk->zero      = 0;
+        cblk->lblock    = 3;
+        cblk->length    = 0;
+        memset(cblk->lengthinc, 0, sizeof(cblk->lengthinc));
+        cblk->npasses   = 0;
+    }
+
+    return 0;
+}
+
+static int init_band(AVCodecContext *avctx,
+                     Jpeg2000ResLevel *reslevel,
+                     Jpeg2000Component *comp,
+                     Jpeg2000CodingStyle *codsty,
+                     Jpeg2000QuantStyle *qntsty,
+                     int bandno, int gbandno, int reslevelno,
+                     int cbps, int dx, int dy)
+{
+    Jpeg2000Band *band = reslevel->band + bandno;
+    uint8_t log2_band_prec_width, log2_band_prec_height;
+    int declvl = codsty->nreslevels - reslevelno;    // N_L -r see  ISO/IEC 15444-1:2002 B.5
+    int precno;
+    int nb_precincts;
+    int i, j, ret;
+
+    init_band_stepsize(avctx, band, codsty, qntsty, bandno, gbandno, reslevelno, cbps);
+
+    /* computation of tbx_0, tbx_1, tby_0, tby_1
+     * see ISO/IEC 15444-1:2002 B.5 eq. B-15 and tbl B.1
+     * codeblock width and height is computed for
+     * DCI JPEG 2000 codeblock_width = codeblock_width = 32 = 2 ^ 5 */
+    if (reslevelno == 0) {
+        /* for reslevelno = 0, only one band, x0_b = y0_b = 0 */
+        for (i = 0; i < 2; i++)
+            for (j = 0; j < 2; j++)
+                band->coord[i][j] =
+                    ff_jpeg2000_ceildivpow2(comp->coord_o[i][j],
+                                            declvl - 1);
+        log2_band_prec_width  = reslevel->log2_prec_width;
+        log2_band_prec_height = reslevel->log2_prec_height;
+        /* see ISO/IEC 15444-1:2002 eq. B-17 and eq. B-15 */
+        band->log2_cblk_width  = FFMIN(codsty->log2_cblk_width,
+                                       reslevel->log2_prec_width);
+        band->log2_cblk_height = FFMIN(codsty->log2_cblk_height,
+                                       reslevel->log2_prec_height);
+    } else {
+        /* 3 bands x0_b = 1 y0_b = 0; x0_b = 0 y0_b = 1; x0_b = y0_b = 1 */
+        /* x0_b and y0_b are computed with ((bandno + 1 >> i) & 1) */
+        for (i = 0; i < 2; i++)
+            for (j = 0; j < 2; j++)
+                /* Formula example for tbx_0 = ceildiv((tcx_0 - 2 ^ (declvl - 1) * x0_b) / declvl) */
+                band->coord[i][j] =
+                    ff_jpeg2000_ceildivpow2(comp->coord_o[i][j] -
+                                            (((bandno + 1 >> i) & 1LL) << declvl - 1),
+                                            declvl);
+        /* TODO: Manage case of 3 band offsets here or
+         * in coding/decoding function? */
+
+        /* see ISO/IEC 15444-1:2002 eq. B-17 and eq. B-15 */
+        band->log2_cblk_width  = FFMIN(codsty->log2_cblk_width,
+                                       reslevel->log2_prec_width - 1);
+        band->log2_cblk_height = FFMIN(codsty->log2_cblk_height,
+                                       reslevel->log2_prec_height - 1);
+
+        log2_band_prec_width  = reslevel->log2_prec_width  - 1;
+        log2_band_prec_height = reslevel->log2_prec_height - 1;
+    }
+
+    if (reslevel->num_precincts_x * (uint64_t)reslevel->num_precincts_y > INT_MAX) {
+        band->prec = NULL;
+        return AVERROR(ENOMEM);
+    }
+    nb_precincts = reslevel->num_precincts_x * reslevel->num_precincts_y;
+    band->prec = av_mallocz_array(nb_precincts, sizeof(*band->prec));
+    if (!band->prec)
+        return AVERROR(ENOMEM);
+
+    for (precno = 0; precno < nb_precincts; precno++) {
+        ret = init_prec(band, reslevel, comp,
+                        precno, bandno, reslevelno,
+                        log2_band_prec_width, log2_band_prec_height);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
 
 int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                Jpeg2000CodingStyle *codsty,
@@ -197,7 +451,6 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                int cbps, int dx, int dy,
                                AVCodecContext *avctx)
 {
-    uint8_t log2_band_prec_width, log2_band_prec_height;
     int reslevelno, bandno, gbandno = 0, ret, i, j;
     uint32_t csize;
 
@@ -210,16 +463,26 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                    codsty->nreslevels2decode - 1,
                                    codsty->transform))
         return ret;
-    // component size comp->coord is uint16_t so ir cannot overflow
+
+    if (av_image_check_size(comp->coord[0][1] - comp->coord[0][0],
+                            comp->coord[1][1] - comp->coord[1][0], 0, avctx))
+        return AVERROR_INVALIDDATA;
     csize = (comp->coord[0][1] - comp->coord[0][0]) *
             (comp->coord[1][1] - comp->coord[1][0]);
+    if (comp->coord[0][1] - comp->coord[0][0] > 32768 ||
+        comp->coord[1][1] - comp->coord[1][0] > 32768) {
+        av_log(avctx, AV_LOG_ERROR, "component size too large\n");
+        return AVERROR_PATCHWELCOME;
+    }
 
     if (codsty->transform == FF_DWT97) {
+        csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->f_data);
         comp->i_data = NULL;
         comp->f_data = av_mallocz_array(csize, sizeof(*comp->f_data));
         if (!comp->f_data)
             return AVERROR(ENOMEM);
     } else {
+        csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
         comp->f_data = NULL;
         comp->i_data = av_mallocz_array(csize, sizeof(*comp->i_data));
         if (!comp->i_data)
@@ -278,200 +541,12 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
             return AVERROR(ENOMEM);
 
         for (bandno = 0; bandno < reslevel->nbands; bandno++, gbandno++) {
-            Jpeg2000Band *band = reslevel->band + bandno;
-            int cblkno, precno;
-            int nb_precincts;
-
-            /* TODO: Implementation of quantization step not finished,
-             * see ISO/IEC 15444-1:2002 E.1 and A.6.4. */
-            switch (qntsty->quantsty) {
-                uint8_t gain;
-            case JPEG2000_QSTY_NONE:
-                /* TODO: to verify. No quantization in this case */
-                band->f_stepsize = 1;
-                break;
-            case JPEG2000_QSTY_SI:
-                /*TODO: Compute formula to implement. */
-//                 numbps = cbps +
-//                          lut_gain[codsty->transform == FF_DWT53][bandno + (reslevelno > 0)];
-//                 band->f_stepsize = SHL(2048 + qntsty->mant[gbandno],
-//                                        2 + numbps - qntsty->expn[gbandno]);
-//                 break;
-            case JPEG2000_QSTY_SE:
-                /* Exponent quantization step.
-                 * Formula:
-                 * delta_b = 2 ^ (R_b - expn_b) * (1 + (mant_b / 2 ^ 11))
-                 * R_b = R_I + log2 (gain_b )
-                 * see ISO/IEC 15444-1:2002 E.1.1 eqn. E-3 and E-4 */
-                /* TODO/WARN: value of log2 (gain_b ) not taken into account
-                 * but it works (compared to OpenJPEG). Why?
-                 * Further investigation needed. */
-                gain            = cbps;
-                band->f_stepsize  = pow(2.0, gain - qntsty->expn[gbandno]);
-                band->f_stepsize *= qntsty->mant[gbandno] / 2048.0 + 1.0;
-                break;
-            default:
-                band->f_stepsize = 0;
-                av_log(avctx, AV_LOG_ERROR, "Unknown quantization format\n");
-                break;
-            }
-            /* FIXME: In openjepg code stespize = stepsize * 0.5. Why?
-             * If not set output of entropic decoder is not correct. */
-            if (!av_codec_is_encoder(avctx->codec))
-                band->f_stepsize *= 0.5;
-
-            band->i_stepsize = band->f_stepsize * (1 << 15);
-
-            /* computation of tbx_0, tbx_1, tby_0, tby_1
-             * see ISO/IEC 15444-1:2002 B.5 eq. B-15 and tbl B.1
-             * codeblock width and height is computed for
-             * DCI JPEG 2000 codeblock_width = codeblock_width = 32 = 2 ^ 5 */
-            if (reslevelno == 0) {
-                /* for reslevelno = 0, only one band, x0_b = y0_b = 0 */
-                for (i = 0; i < 2; i++)
-                    for (j = 0; j < 2; j++)
-                        band->coord[i][j] =
-                            ff_jpeg2000_ceildivpow2(comp->coord_o[i][j] - comp->coord_o[i][0],
-                                                    declvl - 1);
-                log2_band_prec_width  = reslevel->log2_prec_width;
-                log2_band_prec_height = reslevel->log2_prec_height;
-                /* see ISO/IEC 15444-1:2002 eq. B-17 and eq. B-15 */
-                band->log2_cblk_width  = FFMIN(codsty->log2_cblk_width,
-                                               reslevel->log2_prec_width);
-                band->log2_cblk_height = FFMIN(codsty->log2_cblk_height,
-                                               reslevel->log2_prec_height);
-            } else {
-                /* 3 bands x0_b = 1 y0_b = 0; x0_b = 0 y0_b = 1; x0_b = y0_b = 1 */
-                /* x0_b and y0_b are computed with ((bandno + 1 >> i) & 1) */
-                for (i = 0; i < 2; i++)
-                    for (j = 0; j < 2; j++)
-                        /* Formula example for tbx_0 = ceildiv((tcx_0 - 2 ^ (declvl - 1) * x0_b) / declvl) */
-                        band->coord[i][j] =
-                            ff_jpeg2000_ceildivpow2(comp->coord_o[i][j] - comp->coord_o[i][0] -
-                                                    (((bandno + 1 >> i) & 1) << declvl - 1),
-                                                    declvl);
-                /* TODO: Manage case of 3 band offsets here or
-                 * in coding/decoding function? */
-
-                /* see ISO/IEC 15444-1:2002 eq. B-17 and eq. B-15 */
-                band->log2_cblk_width  = FFMIN(codsty->log2_cblk_width,
-                                               reslevel->log2_prec_width - 1);
-                band->log2_cblk_height = FFMIN(codsty->log2_cblk_height,
-                                               reslevel->log2_prec_height - 1);
-
-                log2_band_prec_width  = reslevel->log2_prec_width  - 1;
-                log2_band_prec_height = reslevel->log2_prec_height - 1;
-            }
-
-            if (reslevel->num_precincts_x * (uint64_t)reslevel->num_precincts_y > INT_MAX) {
-                band->prec = NULL;
-                return AVERROR(ENOMEM);
-            }
-            nb_precincts = reslevel->num_precincts_x * reslevel->num_precincts_y;
-            band->prec = av_mallocz_array(nb_precincts, sizeof(*band->prec));
-            if (!band->prec)
-                return AVERROR(ENOMEM);
-
-            for (precno = 0; precno < nb_precincts; precno++) {
-                Jpeg2000Prec *prec = band->prec + precno;
-                int nb_codeblocks;
-
-                /* TODO: Explain formula for JPEG200 DCINEMA. */
-                /* TODO: Verify with previous count of codeblocks per band */
-
-                /* Compute P_x0 */
-                prec->coord[0][0] = (precno % reslevel->num_precincts_x) *
-                                    (1 << log2_band_prec_width);
-                prec->coord[0][0] = FFMAX(prec->coord[0][0], band->coord[0][0]);
-
-                /* Compute P_y0 */
-                prec->coord[1][0] = (precno / reslevel->num_precincts_x) *
-                                    (1 << log2_band_prec_height);
-                prec->coord[1][0] = FFMAX(prec->coord[1][0], band->coord[1][0]);
-
-                /* Compute P_x1 */
-                prec->coord[0][1] = prec->coord[0][0] +
-                                    (1 << log2_band_prec_width);
-                prec->coord[0][1] = FFMIN(prec->coord[0][1], band->coord[0][1]);
-
-                /* Compute P_y1 */
-                prec->coord[1][1] = prec->coord[1][0] +
-                                    (1 << log2_band_prec_height);
-                prec->coord[1][1] = FFMIN(prec->coord[1][1], band->coord[1][1]);
-
-                prec->nb_codeblocks_width =
-                    ff_jpeg2000_ceildivpow2(prec->coord[0][1] -
-                                            prec->coord[0][0],
-                                            band->log2_cblk_width);
-                prec->nb_codeblocks_height =
-                    ff_jpeg2000_ceildivpow2(prec->coord[1][1] -
-                                            prec->coord[1][0],
-                                            band->log2_cblk_height);
-
-                /* Tag trees initialization */
-                prec->cblkincl =
-                    ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
-                                              prec->nb_codeblocks_height);
-                if (!prec->cblkincl)
-                    return AVERROR(ENOMEM);
-
-                prec->zerobits =
-                    ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
-                                              prec->nb_codeblocks_height);
-                if (!prec->zerobits)
-                    return AVERROR(ENOMEM);
-
-                if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT_MAX) {
-                    prec->cblk = NULL;
-                    return AVERROR(ENOMEM);
-                }
-                nb_codeblocks = prec->nb_codeblocks_width * prec->nb_codeblocks_height;
-                prec->cblk = av_mallocz_array(nb_codeblocks, sizeof(*prec->cblk));
-                if (!prec->cblk)
-                    return AVERROR(ENOMEM);
-                for (cblkno = 0; cblkno < nb_codeblocks; cblkno++) {
-                    Jpeg2000Cblk *cblk = prec->cblk + cblkno;
-                    uint16_t Cx0, Cy0;
-
-                    /* Compute coordinates of codeblocks */
-                    /* Compute Cx0*/
-                    Cx0 = (prec->coord[0][0] >> band->log2_cblk_width) << band->log2_cblk_width;
-                    Cx0 = Cx0 + ((cblkno % prec->nb_codeblocks_width)  << band->log2_cblk_width);
-                    cblk->coord[0][0] = FFMAX(Cx0, prec->coord[0][0]);
-
-                    /* Compute Cy0*/
-                    Cy0 = (prec->coord[1][0] >> band->log2_cblk_height) << band->log2_cblk_height;
-                    Cy0 = Cy0 + ((cblkno / prec->nb_codeblocks_width)   << band->log2_cblk_height);
-                    cblk->coord[1][0] = FFMAX(Cy0, prec->coord[1][0]);
-
-                    /* Compute Cx1 */
-                    cblk->coord[0][1] = FFMIN(Cx0 + (1 << band->log2_cblk_width),
-                                              prec->coord[0][1]);
-
-                    /* Compute Cy1 */
-                    cblk->coord[1][1] = FFMIN(Cy0 + (1 << band->log2_cblk_height),
-                                              prec->coord[1][1]);
-                    /* Update code-blocks coordinates according sub-band position */
-                    if ((bandno + !!reslevelno) & 1) {
-                        cblk->coord[0][0] += comp->reslevel[reslevelno-1].coord[0][1] -
-                                             comp->reslevel[reslevelno-1].coord[0][0];
-                        cblk->coord[0][1] += comp->reslevel[reslevelno-1].coord[0][1] -
-                                             comp->reslevel[reslevelno-1].coord[0][0];
-                    }
-                    if ((bandno + !!reslevelno) & 2) {
-                        cblk->coord[1][0] += comp->reslevel[reslevelno-1].coord[1][1] -
-                                             comp->reslevel[reslevelno-1].coord[1][0];
-                        cblk->coord[1][1] += comp->reslevel[reslevelno-1].coord[1][1] -
-                                             comp->reslevel[reslevelno-1].coord[1][0];
-                    }
-
-                    cblk->zero      = 0;
-                    cblk->lblock    = 3;
-                    cblk->length    = 0;
-                    cblk->lengthinc = 0;
-                    cblk->npasses   = 0;
-                }
-            }
+            ret = init_band(avctx, reslevel,
+                            comp, codsty, qntsty,
+                            bandno, gbandno, reslevelno,
+                            cbps, dx, dy);
+            if (ret < 0)
+                return ret;
         }
     }
     return 0;
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index acdba62a..ed3b421a 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -58,19 +58,20 @@ enum Jpeg2000Markers {
     JPEG2000_EOC = 0xffd9, // end of codestream
 };
 
+#define JPEG2000_SOP_FIXED_BYTES 0xFF910004
+#define JPEG2000_SOP_BYTE_LENGTH 6
+
 enum Jpeg2000Quantsty { // quantization style
     JPEG2000_QSTY_NONE, // no quantization
     JPEG2000_QSTY_SI,   // scalar derived
     JPEG2000_QSTY_SE    // scalar expounded
 };
 
-#define JPEG2000_MAX_CBLKW 64
-#define JPEG2000_MAX_CBLKH 64
-
-
-#define JPEG2000_MAX_DECLEVELS 32
+#define JPEG2000_MAX_DECLEVELS 33
 #define JPEG2000_MAX_RESLEVELS (JPEG2000_MAX_DECLEVELS + 1)
 
+#define JPEG2000_MAX_PASSES 100
+
 // T1 flags
 // flags determining significance of neighbor coefficients
 #define JPEG2000_T1_SIG_N  0x0001
@@ -118,9 +119,10 @@ enum Jpeg2000Quantsty { // quantization style
 #define JPEG2000_PGOD_CPRL      0x04  // Component-position-resolution level-layer progression
 
 typedef struct Jpeg2000T1Context {
-    int data[JPEG2000_MAX_CBLKW][JPEG2000_MAX_CBLKH];
-    int flags[JPEG2000_MAX_CBLKW + 2][JPEG2000_MAX_CBLKH + 2];
+    int data[6144];
+    uint16_t flags[6156];
     MqcState mqc;
+    int stride;
 } Jpeg2000T1Context;
 
 typedef struct Jpeg2000TgtNode {
@@ -154,6 +156,8 @@ typedef struct Jpeg2000QuantStyle {
 typedef struct Jpeg2000Pass {
     uint16_t rate;
     int64_t disto;
+    uint8_t flushed[4];
+    int flushed_len;
 } Jpeg2000Pass;
 
 typedef struct Jpeg2000Cblk {
@@ -161,25 +165,30 @@ typedef struct Jpeg2000Cblk {
     uint8_t ninclpasses; // number coding of passes included in codestream
     uint8_t nonzerobits;
     uint16_t length;
-    uint16_t lengthinc;
+    uint16_t lengthinc[JPEG2000_MAX_PASSES];
+    uint8_t nb_lengthinc;
     uint8_t lblock;
     uint8_t zero;
     uint8_t data[8192];
-    Jpeg2000Pass passes[100];
-    uint16_t coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
+    int nb_terminations;
+    int nb_terminationsinc;
+    int data_start[JPEG2000_MAX_PASSES];
+    Jpeg2000Pass passes[JPEG2000_MAX_PASSES];
+    int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
 } Jpeg2000Cblk; // code block
 
 typedef struct Jpeg2000Prec {
-    uint16_t nb_codeblocks_width;
-    uint16_t nb_codeblocks_height;
+    int nb_codeblocks_width;
+    int nb_codeblocks_height;
     Jpeg2000TgtNode *zerobits;
     Jpeg2000TgtNode *cblkincl;
     Jpeg2000Cblk *cblk;
-    uint16_t coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
+    int decoded_layers;
+    int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
 } Jpeg2000Prec; // precinct
 
 typedef struct Jpeg2000Band {
-    uint16_t coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
+    int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
     uint16_t log2_cblk_width, log2_cblk_height;
     int i_stepsize; // quantization stepsize
     float f_stepsize; // quantization stepsize
@@ -188,8 +197,8 @@ typedef struct Jpeg2000Band {
 
 typedef struct Jpeg2000ResLevel {
     uint8_t nbands;
-    uint16_t coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
-    uint16_t num_precincts_x, num_precincts_y; // number of precincts in x/y direction
+    int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
+    int num_precincts_x, num_precincts_y; // number of precincts in x/y direction
     uint8_t log2_prec_width, log2_prec_height; // exponent of precinct size
     Jpeg2000Band *band;
 } Jpeg2000ResLevel; // resolution level
@@ -199,14 +208,14 @@ typedef struct Jpeg2000Component {
     DWTContext dwt;
     float *f_data;
     int *i_data;
-    uint16_t coord[2][2];   // border coordinates {{x0, x1}, {y0, y1}} -- can be reduced with lowres option
-    uint16_t coord_o[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- original values from jpeg2000 headers
+    int coord[2][2];   // border coordinates {{x0, x1}, {y0, y1}} -- can be reduced with lowres option
+    int coord_o[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- original values from jpeg2000 headers
 } Jpeg2000Component;
 
 /* misc tools */
 static inline int ff_jpeg2000_ceildivpow2(int a, int b)
 {
-    return (a + (1 << b) - 1) >> b;
+    return -(((int64_t)(-a)) >> b);
 }
 
 static inline int ff_jpeg2000_ceildiv(int a, int b)
@@ -262,4 +271,21 @@ void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty);
 
 void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty);
 
+static inline int needs_termination(int style, int passno) {
+    if (style & JPEG2000_CBLK_BYPASS) {
+        int type = passno % 3;
+        passno /= 3;
+        if (type == 0 && passno > 2)
+            return 2;
+        if (type == 2 && passno > 2)
+            return 1;
+        if (style & JPEG2000_CBLK_TERMALL) {
+            return passno > 2 ? 2 : 1;
+        }
+    }
+    if (style & JPEG2000_CBLK_TERMALL)
+        return 1;
+    return 0;
+}
+
 #endif /* AVCODEC_JPEG2000_H */
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 24cb8a17..c13670e6 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -30,6 +30,7 @@
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
 #include "libavutil/common.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avcodec.h"
@@ -38,6 +39,7 @@
 #include "thread.h"
 #include "jpeg2000.h"
 #include "jpeg2000dsp.h"
+#include "profiles.h"
 
 #define JP2_SIG_TYPE    0x6A502020
 #define JP2_SIG_VALUE   0x0D0A870A
@@ -47,6 +49,23 @@
 #define HAD_COC 0x01
 #define HAD_QCC 0x02
 
+#define MAX_POCS 32
+
+typedef struct Jpeg2000POCEntry {
+    uint16_t LYEpoc;
+    uint16_t CSpoc;
+    uint16_t CEpoc;
+    uint8_t RSpoc;
+    uint8_t REpoc;
+    uint8_t Ppoc;
+} Jpeg2000POCEntry;
+
+typedef struct Jpeg2000POC {
+    Jpeg2000POCEntry poc[MAX_POCS];
+    int nb_poc;
+    int is_default;
+} Jpeg2000POC;
+
 typedef struct Jpeg2000TilePart {
     uint8_t tile_index;                 // Tile index who refers the tile-part
     const uint8_t *tp_end;
@@ -60,8 +79,10 @@ typedef struct Jpeg2000Tile {
     uint8_t             properties[4];
     Jpeg2000CodingStyle codsty[4];
     Jpeg2000QuantStyle  qntsty[4];
-    Jpeg2000TilePart    tile_part[4];
+    Jpeg2000POC         poc;
+    Jpeg2000TilePart    tile_part[256];
     uint16_t tp_idx;                    // Tile-part index
+    int coord[2][2];                    // border coordinates {{x0, x1}, {y0, y1}}
 } Jpeg2000Tile;
 
 typedef struct Jpeg2000DecoderContext {
@@ -88,6 +109,7 @@ typedef struct Jpeg2000DecoderContext {
 
     Jpeg2000CodingStyle codsty[4];
     Jpeg2000QuantStyle  qntsty[4];
+    Jpeg2000POC         poc;
 
     int             bit_index;
 
@@ -133,8 +155,10 @@ static int tag_tree_decode(Jpeg2000DecoderContext *s, Jpeg2000TgtNode *node,
     Jpeg2000TgtNode *stack[30];
     int sp = -1, curval = 0;
 
-    if (!node)
+    if (!node) {
+        av_log(s->avctx, AV_LOG_ERROR, "missing node\n");
         return AVERROR_INVALIDDATA;
+    }
 
     while (node && !node->vis) {
         stack[++sp] = node;
@@ -171,26 +195,28 @@ static int pix_fmt_match(enum AVPixelFormat pix_fmt, int components,
     int match = 1;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 
+    av_assert2(desc);
+
     if (desc->nb_components != components) {
         return 0;
     }
 
     switch (components) {
     case 4:
-        match = match && desc->comp[3].depth_minus1 + 1 >= bpc &&
+        match = match && desc->comp[3].depth >= bpc &&
                          (log2_chroma_wh >> 14 & 3) == 0 &&
                          (log2_chroma_wh >> 12 & 3) == 0;
     case 3:
-        match = match && desc->comp[2].depth_minus1 + 1 >= bpc &&
+        match = match && desc->comp[2].depth >= bpc &&
                          (log2_chroma_wh >> 10 & 3) == desc->log2_chroma_w &&
                          (log2_chroma_wh >>  8 & 3) == desc->log2_chroma_h;
     case 2:
-        match = match && desc->comp[1].depth_minus1 + 1 >= bpc &&
+        match = match && desc->comp[1].depth >= bpc &&
                          (log2_chroma_wh >>  6 & 3) == desc->log2_chroma_w &&
                          (log2_chroma_wh >>  4 & 3) == desc->log2_chroma_h;
 
     case 1:
-        match = match && desc->comp[0].depth_minus1 + 1 >= bpc &&
+        match = match && desc->comp[0].depth >= bpc &&
                          (log2_chroma_wh >>  2 & 3) == 0 &&
                          (log2_chroma_wh       & 3) == 0 &&
                          (desc->flags & AV_PIX_FMT_FLAG_PAL) == pal8 * AV_PIX_FMT_FLAG_PAL;
@@ -218,7 +244,8 @@ static int pix_fmt_match(enum AVPixelFormat pix_fmt, int components,
 static const enum AVPixelFormat rgb_pix_fmts[]  = {RGB_PIXEL_FORMATS};
 static const enum AVPixelFormat gray_pix_fmts[] = {GRAY_PIXEL_FORMATS};
 static const enum AVPixelFormat yuv_pix_fmts[]  = {YUV_PIXEL_FORMATS};
-static const enum AVPixelFormat xyz_pix_fmts[]  = {XYZ_PIXEL_FORMATS};
+static const enum AVPixelFormat xyz_pix_fmts[]  = {XYZ_PIXEL_FORMATS,
+                                                   YUV_PIXEL_FORMATS};
 static const enum AVPixelFormat all_pix_fmts[]  = {RGB_PIXEL_FORMATS,
                                                    GRAY_PIXEL_FORMATS,
                                                    YUV_PIXEL_FORMATS,
@@ -234,8 +261,10 @@ static int get_siz(Jpeg2000DecoderContext *s)
     const enum AVPixelFormat *possible_fmts = NULL;
     int possible_fmts_nb = 0;
 
-    if (bytestream2_get_bytes_left(&s->g) < 36)
+    if (bytestream2_get_bytes_left(&s->g) < 36) {
+        av_log(s->avctx, AV_LOG_ERROR, "Insufficient space for SIZ\n");
         return AVERROR_INVALIDDATA;
+    }
 
     s->avctx->profile = bytestream2_get_be16u(&s->g); // Rsiz
     s->width          = bytestream2_get_be32u(&s->g); // Width
@@ -252,6 +281,10 @@ static int get_siz(Jpeg2000DecoderContext *s)
         avpriv_request_sample(s->avctx, "Support for image offsets");
         return AVERROR_PATCHWELCOME;
     }
+    if (av_image_check_size(s->width, s->height, 0, s->avctx)) {
+        avpriv_request_sample(s->avctx, "Large Dimensions");
+        return AVERROR_PATCHWELCOME;
+    }
 
     if (ncomponents <= 0) {
         av_log(s->avctx, AV_LOG_ERROR, "Invalid number of components: %d\n",
@@ -261,7 +294,7 @@ static int get_siz(Jpeg2000DecoderContext *s)
 
     if (ncomponents > 4) {
         avpriv_request_sample(s->avctx, "Support for %d components",
-                              s->ncomponents);
+                              ncomponents);
         return AVERROR_PATCHWELCOME;
     }
 
@@ -273,8 +306,10 @@ static int get_siz(Jpeg2000DecoderContext *s)
         return AVERROR_INVALIDDATA;
     }
 
-    if (bytestream2_get_bytes_left(&s->g) < 3 * s->ncomponents)
+    if (bytestream2_get_bytes_left(&s->g) < 3 * s->ncomponents) {
+        av_log(s->avctx, AV_LOG_ERROR, "Insufficient space for %d components in SIZ\n", s->ncomponents);
         return AVERROR_INVALIDDATA;
+    }
 
     for (i = 0; i < s->ncomponents; i++) { // Ssiz_i XRsiz_i, YRsiz_i
         uint8_t x    = bytestream2_get_byteu(&s->g);
@@ -349,16 +384,41 @@ static int get_siz(Jpeg2000DecoderContext *s)
             break;
         }
     }
+
+    if (i == possible_fmts_nb) {
+        if (ncomponents == 4 &&
+            s->cdy[0] == 1 && s->cdx[0] == 1 &&
+            s->cdy[1] == 1 && s->cdx[1] == 1 &&
+            s->cdy[2] == s->cdy[3] && s->cdx[2] == s->cdx[3]) {
+            if (s->precision == 8 && s->cdy[2] == 2 && s->cdx[2] == 2 && !s->pal8) {
+                s->avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
+                s->cdef[0] = 0;
+                s->cdef[1] = 1;
+                s->cdef[2] = 2;
+                s->cdef[3] = 3;
+                i = 0;
+            }
+        }
+    }
+
+
     if (i == possible_fmts_nb) {
         av_log(s->avctx, AV_LOG_ERROR,
                "Unknown pix_fmt, profile: %d, colour_space: %d, "
-               "components: %d, precision: %d, "
-               "cdx[1]: %d, cdy[1]: %d, cdx[2]: %d, cdy[2]: %d\n",
+               "components: %d, precision: %d\n"
+               "cdx[0]: %d, cdy[0]: %d\n"
+               "cdx[1]: %d, cdy[1]: %d\n"
+               "cdx[2]: %d, cdy[2]: %d\n"
+               "cdx[3]: %d, cdy[3]: %d\n",
                s->avctx->profile, s->colour_space, ncomponents, s->precision,
-               ncomponents > 2 ? s->cdx[1] : 0,
-               ncomponents > 2 ? s->cdy[1] : 0,
+               s->cdx[0],
+               s->cdy[0],
+               ncomponents > 1 ? s->cdx[1] : 0,
+               ncomponents > 1 ? s->cdy[1] : 0,
                ncomponents > 2 ? s->cdx[2] : 0,
-               ncomponents > 2 ? s->cdy[2] : 0);
+               ncomponents > 2 ? s->cdy[2] : 0,
+               ncomponents > 3 ? s->cdx[3] : 0,
+               ncomponents > 3 ? s->cdy[3] : 0);
         return AVERROR_PATCHWELCOME;
     }
     s->avctx->bits_per_raw_sample = s->precision;
@@ -370,8 +430,10 @@ static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c)
 {
     uint8_t byte;
 
-    if (bytestream2_get_bytes_left(&s->g) < 5)
+    if (bytestream2_get_bytes_left(&s->g) < 5) {
+        av_log(s->avctx, AV_LOG_ERROR, "Insufficient space for COX\n");
         return AVERROR_INVALIDDATA;
+    }
 
     /*  nreslevels = number of resolution levels
                    = number of decomposition level +1 */
@@ -403,19 +465,19 @@ static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c)
         return AVERROR_INVALIDDATA;
     }
 
-    if (c->log2_cblk_width > 6 || c->log2_cblk_height > 6) {
-        avpriv_request_sample(s->avctx, "cblk size > 64");
-        return AVERROR_PATCHWELCOME;
-    }
-
     c->cblk_style = bytestream2_get_byteu(&s->g);
     if (c->cblk_style != 0) { // cblk style
         av_log(s->avctx, AV_LOG_WARNING, "extra cblk styles %X\n", c->cblk_style);
+        if (c->cblk_style & JPEG2000_CBLK_BYPASS)
+            av_log(s->avctx, AV_LOG_WARNING, "Selective arithmetic coding bypass\n");
     }
     c->transform = bytestream2_get_byteu(&s->g); // DWT transformation type
     /* set integer 9/7 DWT in case of BITEXACT flag */
-    if ((s->avctx->flags & CODEC_FLAG_BITEXACT) && (c->transform == FF_DWT97))
+    if ((s->avctx->flags & AV_CODEC_FLAG_BITEXACT) && (c->transform == FF_DWT97))
         c->transform = FF_DWT97_INT;
+    else if (c->transform == FF_DWT53) {
+        s->avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
+    }
 
     if (c->csty & JPEG2000_CSTY_PREC) {
         int i;
@@ -423,6 +485,13 @@ static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c)
             byte = bytestream2_get_byte(&s->g);
             c->log2_prec_widths[i]  =  byte       & 0x0F;    // precinct PPx
             c->log2_prec_heights[i] = (byte >> 4) & 0x0F;    // precinct PPy
+            if (i)
+                if (c->log2_prec_widths[i] == 0 || c->log2_prec_heights[i] == 0) {
+                    av_log(s->avctx, AV_LOG_ERROR, "PPx %d PPy %d invalid\n",
+                           c->log2_prec_widths[i], c->log2_prec_heights[i]);
+                    c->log2_prec_widths[i] = c->log2_prec_heights[i] = 1;
+                    return AVERROR_INVALIDDATA;
+                }
         }
     } else {
         memset(c->log2_prec_widths , 15, sizeof(c->log2_prec_widths ));
@@ -438,8 +507,10 @@ static int get_cod(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c,
     Jpeg2000CodingStyle tmp;
     int compno, ret;
 
-    if (bytestream2_get_bytes_left(&s->g) < 5)
+    if (bytestream2_get_bytes_left(&s->g) < 5) {
+        av_log(s->avctx, AV_LOG_ERROR, "Insufficient space for COD\n");
         return AVERROR_INVALIDDATA;
+    }
 
     tmp.csty = bytestream2_get_byteu(&s->g);
 
@@ -472,8 +543,10 @@ static int get_coc(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c,
 {
     int compno, ret;
 
-    if (bytestream2_get_bytes_left(&s->g) < 2)
+    if (bytestream2_get_bytes_left(&s->g) < 2) {
+        av_log(s->avctx, AV_LOG_ERROR, "Insufficient space for COC\n");
         return AVERROR_INVALIDDATA;
+    }
 
     compno = bytestream2_get_byteu(&s->g);
 
@@ -579,13 +652,74 @@ static int get_qcc(Jpeg2000DecoderContext *s, int n, Jpeg2000QuantStyle *q,
     return get_qcx(s, n - 1, q + compno);
 }
 
+static int get_poc(Jpeg2000DecoderContext *s, int size, Jpeg2000POC *p)
+{
+    int i;
+    int elem_size = s->ncomponents <= 257 ? 7 : 9;
+    Jpeg2000POC tmp = {{{0}}};
+
+    if (bytestream2_get_bytes_left(&s->g) < 5 || size < 2 + elem_size) {
+        av_log(s->avctx, AV_LOG_ERROR, "Insufficient space for POC\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (elem_size > 7) {
+        avpriv_request_sample(s->avctx, "Fat POC not supported");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    tmp.nb_poc = (size - 2) / elem_size;
+    if (tmp.nb_poc > MAX_POCS) {
+        avpriv_request_sample(s->avctx, "Too many POCs (%d)", tmp.nb_poc);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    for (i = 0; i<tmp.nb_poc; i++) {
+        Jpeg2000POCEntry *e = &tmp.poc[i];
+        e->RSpoc  = bytestream2_get_byteu(&s->g);
+        e->CSpoc  = bytestream2_get_byteu(&s->g);
+        e->LYEpoc = bytestream2_get_be16u(&s->g);
+        e->REpoc  = bytestream2_get_byteu(&s->g);
+        e->CEpoc  = bytestream2_get_byteu(&s->g);
+        e->Ppoc   = bytestream2_get_byteu(&s->g);
+        if (!e->CEpoc)
+            e->CEpoc = 256;
+        if (e->CEpoc > s->ncomponents)
+            e->CEpoc = s->ncomponents;
+        if (   e->RSpoc >= e->REpoc || e->REpoc > 33
+            || e->CSpoc >= e->CEpoc || e->CEpoc > s->ncomponents
+            || !e->LYEpoc) {
+            av_log(s->avctx, AV_LOG_ERROR, "POC Entry %d is invalid (%d, %d, %d, %d, %d, %d)\n", i,
+                e->RSpoc, e->CSpoc, e->LYEpoc, e->REpoc, e->CEpoc, e->Ppoc
+            );
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    if (!p->nb_poc || p->is_default) {
+        *p = tmp;
+    } else {
+        if (p->nb_poc + tmp.nb_poc > MAX_POCS) {
+            av_log(s->avctx, AV_LOG_ERROR, "Insufficient space for POC\n");
+            return AVERROR_INVALIDDATA;
+        }
+        memcpy(p->poc + p->nb_poc, tmp.poc, tmp.nb_poc * sizeof(tmp.poc[0]));
+        p->nb_poc += tmp.nb_poc;
+    }
+
+    p->is_default = 0;
+
+    return 0;
+}
+
+
 /* Get start of tile segment. */
 static int get_sot(Jpeg2000DecoderContext *s, int n)
 {
     Jpeg2000TilePart *tp;
     uint16_t Isot;
     uint32_t Psot;
-    uint8_t TPsot;
+    unsigned TPsot;
 
     if (bytestream2_get_bytes_left(&s->g) < 8)
         return AVERROR_INVALIDDATA;
@@ -610,10 +744,7 @@ static int get_sot(Jpeg2000DecoderContext *s, int n)
         return AVERROR_INVALIDDATA;
     }
 
-    if (TPsot >= FF_ARRAY_ELEMS(s->tile[Isot].tile_part)) {
-        avpriv_request_sample(s->avctx, "Support for %"PRIu8" components", TPsot);
-        return AVERROR_PATCHWELCOME;
-    }
+    av_assert0(TPsot < FF_ARRAY_ELEMS(s->tile[Isot].tile_part));
 
     s->tile[Isot].tp_idx = TPsot;
     tp             = s->tile[Isot].tile_part + TPsot;
@@ -626,6 +757,8 @@ static int get_sot(Jpeg2000DecoderContext *s, int n)
         /* copy defaults */
         memcpy(tile->codsty, s->codsty, s->ncomponents * sizeof(Jpeg2000CodingStyle));
         memcpy(tile->qntsty, s->qntsty, s->ncomponents * sizeof(Jpeg2000QuantStyle));
+        memcpy(&tile->poc  , &s->poc  , sizeof(tile->poc));
+        tile->poc.is_default = 1;
     }
 
     return 0;
@@ -677,7 +810,7 @@ static uint8_t get_plt(Jpeg2000DecoderContext *s, int n)
 {
     int i;
 
-    av_log(s->avctx, AV_LOG_ERROR,
+    av_log(s->avctx, AV_LOG_DEBUG,
             "PLT marker at pos 0x%X\n", bytestream2_tell(&s->g) - 4);
 
     /*Zplt =*/ bytestream2_get_byte(&s->g);
@@ -699,16 +832,21 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno)
     if (!tile->comp)
         return AVERROR(ENOMEM);
 
+    tile->coord[0][0] = av_clip(tilex       * (int64_t)s->tile_width  + s->tile_offset_x, s->image_offset_x, s->width);
+    tile->coord[0][1] = av_clip((tilex + 1) * (int64_t)s->tile_width  + s->tile_offset_x, s->image_offset_x, s->width);
+    tile->coord[1][0] = av_clip(tiley       * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height);
+    tile->coord[1][1] = av_clip((tiley + 1) * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height);
+
     for (compno = 0; compno < s->ncomponents; compno++) {
         Jpeg2000Component *comp = tile->comp + compno;
         Jpeg2000CodingStyle *codsty = tile->codsty + compno;
         Jpeg2000QuantStyle  *qntsty = tile->qntsty + compno;
         int ret; // global bandno
 
-        comp->coord_o[0][0] = FFMAX(tilex       * s->tile_width  + s->tile_offset_x, s->image_offset_x);
-        comp->coord_o[0][1] = FFMIN((tilex + 1) * s->tile_width  + s->tile_offset_x, s->width);
-        comp->coord_o[1][0] = FFMAX(tiley       * s->tile_height + s->tile_offset_y, s->image_offset_y);
-        comp->coord_o[1][1] = FFMIN((tiley + 1) * s->tile_height + s->tile_offset_y, s->height);
+        comp->coord_o[0][0] = tile->coord[0][0];
+        comp->coord_o[0][1] = tile->coord[0][1];
+        comp->coord_o[1][0] = tile->coord[1][0];
+        comp->coord_o[1][1] = tile->coord[1][1];
         if (compno) {
             comp->coord_o[0][0] /= s->cdx[compno];
             comp->coord_o[0][1] /= s->cdx[compno];
@@ -756,12 +894,26 @@ static int getlblockinc(Jpeg2000DecoderContext *s)
     return res;
 }
 
-static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s,
+static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, int *tp_index,
                                   Jpeg2000CodingStyle *codsty,
                                   Jpeg2000ResLevel *rlevel, int precno,
                                   int layno, uint8_t *expn, int numgbits)
 {
     int bandno, cblkno, ret, nb_code_blocks;
+    int cwsno;
+
+    if (layno < rlevel->band[0].prec[precno].decoded_layers)
+        return 0;
+    rlevel->band[0].prec[precno].decoded_layers = layno + 1;
+
+    if (bytestream2_get_bytes_left(&s->g) == 0 && s->bit_index == 8) {
+        if (*tp_index < FF_ARRAY_ELEMS(tile->tile_part) - 1) {
+            s->g = tile->tile_part[++(*tp_index)].tpg;
+        }
+    }
+
+    if (bytestream2_peek_be32(&s->g) == JPEG2000_SOP_FIXED_BYTES)
+        bytestream2_skip(&s->g, JPEG2000_SOP_BYTE_LENGTH);
 
     if (!(ret = get_bits(s, 1))) {
         jpeg2000_flush(s);
@@ -803,19 +955,46 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s,
             }
             if ((newpasses = getnpasses(s)) < 0)
                 return newpasses;
+            av_assert2(newpasses > 0);
+            if (cblk->npasses + newpasses >= JPEG2000_MAX_PASSES) {
+                avpriv_request_sample(s->avctx, "Too many passes");
+                return AVERROR_PATCHWELCOME;
+            }
             if ((llen = getlblockinc(s)) < 0)
                 return llen;
-            cblk->lblock += llen;
-            if ((ret = get_bits(s, av_log2(newpasses) + cblk->lblock)) < 0)
-                return ret;
-            if (ret > sizeof(cblk->data)) {
+            if (cblk->lblock + llen + av_log2(newpasses) > 16) {
                 avpriv_request_sample(s->avctx,
-                                      "Block with lengthinc greater than %"SIZE_SPECIFIER"",
-                                      sizeof(cblk->data));
+                                      "Block with length beyond 16 bits");
                 return AVERROR_PATCHWELCOME;
             }
-            cblk->lengthinc = ret;
-            cblk->npasses  += newpasses;
+
+            cblk->lblock += llen;
+
+            cblk->nb_lengthinc = 0;
+            cblk->nb_terminationsinc = 0;
+            do {
+                int newpasses1 = 0;
+
+                while (newpasses1 < newpasses) {
+                    newpasses1 ++;
+                    if (needs_termination(codsty->cblk_style, cblk->npasses + newpasses1 - 1)) {
+                        cblk->nb_terminationsinc ++;
+                        break;
+                    }
+                }
+
+                if ((ret = get_bits(s, av_log2(newpasses1) + cblk->lblock)) < 0)
+                    return ret;
+                if (ret > sizeof(cblk->data)) {
+                    avpriv_request_sample(s->avctx,
+                                        "Block with lengthinc greater than %"SIZE_SPECIFIER"",
+                                        sizeof(cblk->data));
+                    return AVERROR_PATCHWELCOME;
+                }
+                cblk->lengthinc[cblk->nb_lengthinc++] = ret;
+                cblk->npasses  += newpasses1;
+                newpasses -= newpasses1;
+            } while(newpasses);
         }
     }
     jpeg2000_flush(s);
@@ -824,7 +1003,7 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s,
         if (bytestream2_peek_be16(&s->g) == JPEG2000_EPH)
             bytestream2_skip(&s->g, 2);
         else
-            av_log(s->avctx, AV_LOG_ERROR, "EPH marker not found.\n");
+            av_log(s->avctx, AV_LOG_ERROR, "EPH marker not found. instead %X\n", bytestream2_peek_be32(&s->g));
     }
 
     for (bandno = 0; bandno < rlevel->nbands; bandno++) {
@@ -834,40 +1013,76 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s,
         nb_code_blocks = prec->nb_codeblocks_height * prec->nb_codeblocks_width;
         for (cblkno = 0; cblkno < nb_code_blocks; cblkno++) {
             Jpeg2000Cblk *cblk = prec->cblk + cblkno;
-            if (   bytestream2_get_bytes_left(&s->g) < cblk->lengthinc
-                || sizeof(cblk->data) < cblk->length + cblk->lengthinc + 2
-            ) {
-                av_log(s->avctx, AV_LOG_ERROR,
-                       "Block length %"PRIu16" or lengthinc %d is too large\n",
-                       cblk->length, cblk->lengthinc);
-                return AVERROR_INVALIDDATA;
-            }
+            for (cwsno = 0; cwsno < cblk->nb_lengthinc; cwsno ++) {
+                if (   bytestream2_get_bytes_left(&s->g) < cblk->lengthinc[cwsno]
+                    || sizeof(cblk->data) < cblk->length + cblk->lengthinc[cwsno] + 4
+                ) {
+                    av_log(s->avctx, AV_LOG_ERROR,
+                        "Block length %"PRIu16" or lengthinc %d is too large, left %d\n",
+                        cblk->length, cblk->lengthinc[cwsno], bytestream2_get_bytes_left(&s->g));
+                    return AVERROR_INVALIDDATA;
+                }
 
-            bytestream2_get_bufferu(&s->g, cblk->data + cblk->length, cblk->lengthinc);
-            cblk->length   += cblk->lengthinc;
-            cblk->lengthinc = 0;
+                bytestream2_get_bufferu(&s->g, cblk->data + cblk->length, cblk->lengthinc[cwsno]);
+                cblk->length   += cblk->lengthinc[cwsno];
+                cblk->lengthinc[cwsno] = 0;
+                if (cblk->nb_terminationsinc) {
+                    cblk->nb_terminationsinc--;
+                    cblk->nb_terminations++;
+                    cblk->data[cblk->length++] = 0xFF;
+                    cblk->data[cblk->length++] = 0xFF;
+                    cblk->data_start[cblk->nb_terminations] = cblk->length;
+                }
+            }
         }
     }
     return 0;
 }
 
-static int jpeg2000_decode_packets(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
+static int jpeg2000_decode_packets_po_iteration(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
+                                             int RSpoc, int CSpoc,
+                                             int LYEpoc, int REpoc, int CEpoc,
+                                             int Ppoc, int *tp_index)
 {
     int ret = 0;
     int layno, reslevelno, compno, precno, ok_reslevel;
     int x, y;
+    int step_x, step_y;
 
-    s->bit_index = 8;
-    switch (tile->codsty[0].prog_order) {
+    switch (Ppoc) {
     case JPEG2000_PGOD_RLCP:
-        avpriv_request_sample(s->avctx, "Progression order RLCP");
+        av_log(s->avctx, AV_LOG_DEBUG, "Progression order RLCP\n");
+        ok_reslevel = 1;
+        for (reslevelno = RSpoc; ok_reslevel && reslevelno < REpoc; reslevelno++) {
+            ok_reslevel = 0;
+            for (layno = 0; layno < LYEpoc; layno++) {
+                for (compno = CSpoc; compno < CEpoc; compno++) {
+                    Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+                    Jpeg2000QuantStyle *qntsty  = tile->qntsty + compno;
+                    if (reslevelno < codsty->nreslevels) {
+                        Jpeg2000ResLevel *rlevel = tile->comp[compno].reslevel +
+                                                reslevelno;
+                        ok_reslevel = 1;
+                        for (precno = 0; precno < rlevel->num_precincts_x * rlevel->num_precincts_y; precno++)
+                            if ((ret = jpeg2000_decode_packet(s, tile, tp_index,
+                                                              codsty, rlevel,
+                                                              precno, layno,
+                                                              qntsty->expn + (reslevelno ? 3 * (reslevelno - 1) + 1 : 0),
+                                                              qntsty->nguardbits)) < 0)
+                                return ret;
+                    }
+                }
+            }
+        }
+        break;
 
     case JPEG2000_PGOD_LRCP:
-        for (layno = 0; layno < tile->codsty[0].nlayers; layno++) {
+        av_log(s->avctx, AV_LOG_DEBUG, "Progression order LRCP\n");
+        for (layno = 0; layno < LYEpoc; layno++) {
             ok_reslevel = 1;
-            for (reslevelno = 0; ok_reslevel; reslevelno++) {
+            for (reslevelno = RSpoc; ok_reslevel && reslevelno < REpoc; reslevelno++) {
                 ok_reslevel = 0;
-                for (compno = 0; compno < s->ncomponents; compno++) {
+                for (compno = CSpoc; compno < CEpoc; compno++) {
                     Jpeg2000CodingStyle *codsty = tile->codsty + compno;
                     Jpeg2000QuantStyle *qntsty  = tile->qntsty + compno;
                     if (reslevelno < codsty->nreslevels) {
@@ -875,7 +1090,7 @@ static int jpeg2000_decode_packets(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile
                                                 reslevelno;
                         ok_reslevel = 1;
                         for (precno = 0; precno < rlevel->num_precincts_x * rlevel->num_precincts_y; precno++)
-                            if ((ret = jpeg2000_decode_packet(s,
+                            if ((ret = jpeg2000_decode_packet(s, tile, tp_index,
                                                               codsty, rlevel,
                                                               precno, layno,
                                                               qntsty->expn + (reslevelno ? 3 * (reslevelno - 1) + 1 : 0),
@@ -888,46 +1103,55 @@ static int jpeg2000_decode_packets(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile
         break;
 
     case JPEG2000_PGOD_CPRL:
-        for (compno = 0; compno < s->ncomponents; compno++) {
+        av_log(s->avctx, AV_LOG_DEBUG, "Progression order CPRL\n");
+        for (compno = CSpoc; compno < CEpoc; compno++) {
+            Jpeg2000Component *comp     = tile->comp + compno;
             Jpeg2000CodingStyle *codsty = tile->codsty + compno;
             Jpeg2000QuantStyle *qntsty  = tile->qntsty + compno;
-
-            /* Set bit stream buffer address according to tile-part.
-             * For DCinema one tile-part per component, so can be
-             * indexed by component. */
-            s->g = tile->tile_part[compno].tpg;
-
-            /* Position loop (y axis)
-             * TODO: Automate computing of step 256.
-             * Fixed here, but to be computed before entering here. */
-            for (y = 0; y < s->height; y += 256) {
-                /* Position loop (y axis)
-                 * TODO: automate computing of step 256.
-                 * Fixed here, but to be computed before entering here. */
-                for (x = 0; x < s->width; x += 256) {
-                    for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++) {
-                        uint16_t prcx, prcy;
+            step_x = 32;
+            step_y = 32;
+
+            for (reslevelno = RSpoc; reslevelno < FFMIN(codsty->nreslevels, REpoc); reslevelno++) {
+                uint8_t reducedresno = codsty->nreslevels - 1 -reslevelno; //  ==> N_L - r
+                Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
+                step_x = FFMIN(step_x, rlevel->log2_prec_width  + reducedresno);
+                step_y = FFMIN(step_y, rlevel->log2_prec_height + reducedresno);
+            }
+            av_assert0(step_x < 32 && step_y < 32);
+            step_x = 1<<step_x;
+            step_y = 1<<step_y;
+
+            for (y = tile->coord[1][0]; y < tile->coord[1][1]; y = (y/step_y + 1)*step_y) {
+                for (x = tile->coord[0][0]; x < tile->coord[0][1]; x = (x/step_x + 1)*step_x) {
+                    for (reslevelno = RSpoc; reslevelno < FFMIN(codsty->nreslevels, REpoc); reslevelno++) {
+                        unsigned prcx, prcy;
                         uint8_t reducedresno = codsty->nreslevels - 1 -reslevelno; //  ==> N_L - r
-                        Jpeg2000ResLevel *rlevel = tile->comp[compno].reslevel + reslevelno;
+                        Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
+                        int xc = x / s->cdx[compno];
+                        int yc = y / s->cdy[compno];
 
-                        if (!((y % (1 << (rlevel->log2_prec_height + reducedresno)) == 0) ||
-                              (y == 0))) // TODO: 2nd condition simplified as try0 always =0 for dcinema
+                        if (yc % (1 << (rlevel->log2_prec_height + reducedresno)) && y != tile->coord[1][0]) //FIXME this is a subset of the check
                             continue;
 
-                        if (!((x % (1 << (rlevel->log2_prec_width + reducedresno)) == 0) ||
-                              (x == 0))) // TODO: 2nd condition simplified as try0 always =0 for dcinema
+                        if (xc % (1 << (rlevel->log2_prec_width + reducedresno)) && x != tile->coord[0][0]) //FIXME this is a subset of the check
                             continue;
 
                         // check if a precinct exists
-                        prcx   = ff_jpeg2000_ceildivpow2(x, reducedresno) >> rlevel->log2_prec_width;
-                        prcy   = ff_jpeg2000_ceildivpow2(y, reducedresno) >> rlevel->log2_prec_height;
+                        prcx   = ff_jpeg2000_ceildivpow2(xc, reducedresno) >> rlevel->log2_prec_width;
+                        prcy   = ff_jpeg2000_ceildivpow2(yc, reducedresno) >> rlevel->log2_prec_height;
+                        prcx  -= ff_jpeg2000_ceildivpow2(comp->coord_o[0][0], reducedresno) >> rlevel->log2_prec_width;
+                        prcy  -= ff_jpeg2000_ceildivpow2(comp->coord_o[1][0], reducedresno) >> rlevel->log2_prec_height;
+
                         precno = prcx + rlevel->num_precincts_x * prcy;
 
-                        if (prcx >= rlevel->num_precincts_x || prcy >= rlevel->num_precincts_y)
-                            return AVERROR_PATCHWELCOME;
+                        if (prcx >= rlevel->num_precincts_x || prcy >= rlevel->num_precincts_y) {
+                            av_log(s->avctx, AV_LOG_WARNING, "prc %d %d outside limits %d %d\n",
+                                   prcx, prcy, rlevel->num_precincts_x, rlevel->num_precincts_y);
+                            continue;
+                        }
 
-                        for (layno = 0; layno < tile->codsty[0].nlayers; layno++) {
-                            if ((ret = jpeg2000_decode_packet(s, codsty, rlevel,
+                        for (layno = 0; layno < LYEpoc; layno++) {
+                            if ((ret = jpeg2000_decode_packet(s, tile, tp_index, codsty, rlevel,
                                                               precno, layno,
                                                               qntsty->expn + (reslevelno ? 3 * (reslevelno - 1) + 1 : 0),
                                                               qntsty->nguardbits)) < 0)
@@ -940,19 +1164,183 @@ static int jpeg2000_decode_packets(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile
         break;
 
     case JPEG2000_PGOD_RPCL:
-        avpriv_request_sample(s->avctx, "Progression order RPCL");
-        ret = AVERROR_PATCHWELCOME;
+        av_log(s->avctx, AV_LOG_WARNING, "Progression order RPCL\n");
+        ok_reslevel = 1;
+        for (reslevelno = RSpoc; ok_reslevel && reslevelno < REpoc; reslevelno++) {
+            ok_reslevel = 0;
+            step_x = 30;
+            step_y = 30;
+            for (compno = CSpoc; compno < CEpoc; compno++) {
+                Jpeg2000Component *comp     = tile->comp + compno;
+                Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+
+                if (reslevelno < codsty->nreslevels) {
+                    uint8_t reducedresno = codsty->nreslevels - 1 -reslevelno; //  ==> N_L - r
+                    Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
+                    step_x = FFMIN(step_x, rlevel->log2_prec_width  + reducedresno);
+                    step_y = FFMIN(step_y, rlevel->log2_prec_height + reducedresno);
+                }
+            }
+            step_x = 1<<step_x;
+            step_y = 1<<step_y;
+
+            for (y = tile->coord[1][0]; y < tile->coord[1][1]; y = (y/step_y + 1)*step_y) {
+                for (x = tile->coord[0][0]; x < tile->coord[0][1]; x = (x/step_x + 1)*step_x) {
+                    for (compno = CSpoc; compno < CEpoc; compno++) {
+                        Jpeg2000Component *comp     = tile->comp + compno;
+                        Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+                        Jpeg2000QuantStyle *qntsty  = tile->qntsty + compno;
+                        uint8_t reducedresno = codsty->nreslevels - 1 -reslevelno; //  ==> N_L - r
+                        Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
+                        unsigned prcx, prcy;
+
+                        int xc = x / s->cdx[compno];
+                        int yc = y / s->cdy[compno];
+
+                        if (reslevelno >= codsty->nreslevels)
+                            continue;
+
+                        if (yc % (1 << (rlevel->log2_prec_height + reducedresno)) && y != tile->coord[1][0]) //FIXME this is a subset of the check
+                            continue;
+
+                        if (xc % (1 << (rlevel->log2_prec_width + reducedresno)) && x != tile->coord[0][0]) //FIXME this is a subset of the check
+                            continue;
+
+                        // check if a precinct exists
+                        prcx   = ff_jpeg2000_ceildivpow2(xc, reducedresno) >> rlevel->log2_prec_width;
+                        prcy   = ff_jpeg2000_ceildivpow2(yc, reducedresno) >> rlevel->log2_prec_height;
+                        prcx  -= ff_jpeg2000_ceildivpow2(comp->coord_o[0][0], reducedresno) >> rlevel->log2_prec_width;
+                        prcy  -= ff_jpeg2000_ceildivpow2(comp->coord_o[1][0], reducedresno) >> rlevel->log2_prec_height;
+
+                        precno = prcx + rlevel->num_precincts_x * prcy;
+
+                        ok_reslevel = 1;
+                        if (prcx >= rlevel->num_precincts_x || prcy >= rlevel->num_precincts_y) {
+                            av_log(s->avctx, AV_LOG_WARNING, "prc %d %d outside limits %d %d\n",
+                                   prcx, prcy, rlevel->num_precincts_x, rlevel->num_precincts_y);
+                            continue;
+                        }
+
+                            for (layno = 0; layno < LYEpoc; layno++) {
+                                if ((ret = jpeg2000_decode_packet(s, tile, tp_index,
+                                                                codsty, rlevel,
+                                                                precno, layno,
+                                                                qntsty->expn + (reslevelno ? 3 * (reslevelno - 1) + 1 : 0),
+                                                                qntsty->nguardbits)) < 0)
+                                    return ret;
+                            }
+                    }
+                }
+            }
+        }
         break;
 
     case JPEG2000_PGOD_PCRL:
-        avpriv_request_sample(s->avctx, "Progression order PCRL");
-        ret = AVERROR_PATCHWELCOME;
+        av_log(s->avctx, AV_LOG_WARNING, "Progression order PCRL\n");
+        step_x = 32;
+        step_y = 32;
+        for (compno = CSpoc; compno < CEpoc; compno++) {
+            Jpeg2000Component *comp     = tile->comp + compno;
+            Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+
+            for (reslevelno = RSpoc; reslevelno < FFMIN(codsty->nreslevels, REpoc); reslevelno++) {
+                uint8_t reducedresno = codsty->nreslevels - 1 -reslevelno; //  ==> N_L - r
+                Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
+                step_x = FFMIN(step_x, rlevel->log2_prec_width  + reducedresno);
+                step_y = FFMIN(step_y, rlevel->log2_prec_height + reducedresno);
+            }
+        }
+        if (step_x >= 31 || step_y >= 31){
+            avpriv_request_sample(s->avctx, "PCRL with large step");
+            return AVERROR_PATCHWELCOME;
+        }
+        step_x = 1<<step_x;
+        step_y = 1<<step_y;
+
+        for (y = tile->coord[1][0]; y < tile->coord[1][1]; y = (y/step_y + 1)*step_y) {
+            for (x = tile->coord[0][0]; x < tile->coord[0][1]; x = (x/step_x + 1)*step_x) {
+                for (compno = CSpoc; compno < CEpoc; compno++) {
+                    Jpeg2000Component *comp     = tile->comp + compno;
+                    Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+                    Jpeg2000QuantStyle *qntsty  = tile->qntsty + compno;
+                    int xc = x / s->cdx[compno];
+                    int yc = y / s->cdy[compno];
+
+                    for (reslevelno = RSpoc; reslevelno < FFMIN(codsty->nreslevels, REpoc); reslevelno++) {
+                        unsigned prcx, prcy;
+                        uint8_t reducedresno = codsty->nreslevels - 1 -reslevelno; //  ==> N_L - r
+                        Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
+
+                        if (yc % (1 << (rlevel->log2_prec_height + reducedresno)) && y != tile->coord[1][0]) //FIXME this is a subset of the check
+                            continue;
+
+                        if (xc % (1 << (rlevel->log2_prec_width + reducedresno)) && x != tile->coord[0][0]) //FIXME this is a subset of the check
+                            continue;
+
+                        // check if a precinct exists
+                        prcx   = ff_jpeg2000_ceildivpow2(xc, reducedresno) >> rlevel->log2_prec_width;
+                        prcy   = ff_jpeg2000_ceildivpow2(yc, reducedresno) >> rlevel->log2_prec_height;
+                        prcx  -= ff_jpeg2000_ceildivpow2(comp->coord_o[0][0], reducedresno) >> rlevel->log2_prec_width;
+                        prcy  -= ff_jpeg2000_ceildivpow2(comp->coord_o[1][0], reducedresno) >> rlevel->log2_prec_height;
+
+                        precno = prcx + rlevel->num_precincts_x * prcy;
+
+                        if (prcx >= rlevel->num_precincts_x || prcy >= rlevel->num_precincts_y) {
+                            av_log(s->avctx, AV_LOG_WARNING, "prc %d %d outside limits %d %d\n",
+                                   prcx, prcy, rlevel->num_precincts_x, rlevel->num_precincts_y);
+                            continue;
+                        }
+
+                        for (layno = 0; layno < LYEpoc; layno++) {
+                            if ((ret = jpeg2000_decode_packet(s, tile, tp_index, codsty, rlevel,
+                                                              precno, layno,
+                                                              qntsty->expn + (reslevelno ? 3 * (reslevelno - 1) + 1 : 0),
+                                                              qntsty->nguardbits)) < 0)
+                                return ret;
+                        }
+                    }
+                }
+            }
+        }
         break;
 
     default:
         break;
     }
 
+    return ret;
+}
+
+static int jpeg2000_decode_packets(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
+{
+    int ret = AVERROR_BUG;
+    int i;
+    int tp_index = 0;
+
+    s->bit_index = 8;
+    if (tile->poc.nb_poc) {
+        for (i=0; i<tile->poc.nb_poc; i++) {
+            Jpeg2000POCEntry *e = &tile->poc.poc[i];
+            ret = jpeg2000_decode_packets_po_iteration(s, tile,
+                e->RSpoc, e->CSpoc,
+                FFMIN(e->LYEpoc, tile->codsty[0].nlayers),
+                e->REpoc,
+                FFMIN(e->CEpoc, s->ncomponents),
+                e->Ppoc, &tp_index
+                );
+            if (ret < 0)
+                return ret;
+        }
+    } else {
+        ret = jpeg2000_decode_packets_po_iteration(s, tile,
+            0, 0,
+            tile->codsty[0].nlayers,
+            33,
+            s->ncomponents,
+            tile->codsty[0].prog_order,
+            &tp_index
+        );
+    }
     /* EOC marker reached */
     bytestream2_skip(&s->g, 2);
 
@@ -961,7 +1349,7 @@ static int jpeg2000_decode_packets(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile
 
 /* TIER-1 routines */
 static void decode_sigpass(Jpeg2000T1Context *t1, int width, int height,
-                           int bpno, int bandno, int bpass_csty_symbol,
+                           int bpno, int bandno,
                            int vert_causal_ctx_csty_symbol)
 {
     int mask = 3 << (bpno - 1), y0, x, y;
@@ -969,29 +1357,29 @@ static void decode_sigpass(Jpeg2000T1Context *t1, int width, int height,
     for (y0 = 0; y0 < height; y0 += 4)
         for (x = 0; x < width; x++)
             for (y = y0; y < height && y < y0 + 4; y++) {
-                if ((t1->flags[y+1][x+1] & JPEG2000_T1_SIG_NB)
-                && !(t1->flags[y+1][x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))) {
-                    int flags_mask = -1;
-                    if (vert_causal_ctx_csty_symbol && y == y0 + 3)
-                        flags_mask &= ~(JPEG2000_T1_SIG_S | JPEG2000_T1_SIG_SW | JPEG2000_T1_SIG_SE);
-                    if (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_jpeg2000_getsigctxno(t1->flags[y+1][x+1] & flags_mask, bandno))) {
-                        int xorbit, ctxno = ff_jpeg2000_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
-                        if (bpass_csty_symbol)
-                             t1->data[y][x] = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ? -mask : mask;
+                int flags_mask = -1;
+                if (vert_causal_ctx_csty_symbol && y == y0 + 3)
+                    flags_mask &= ~(JPEG2000_T1_SIG_S | JPEG2000_T1_SIG_SW | JPEG2000_T1_SIG_SE | JPEG2000_T1_SGN_S);
+                if ((t1->flags[(y+1) * t1->stride + x+1] & JPEG2000_T1_SIG_NB & flags_mask)
+                && !(t1->flags[(y+1) * t1->stride + x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))) {
+                    if (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_jpeg2000_getsigctxno(t1->flags[(y+1) * t1->stride + x+1] & flags_mask, bandno))) {
+                        int xorbit, ctxno = ff_jpeg2000_getsgnctxno(t1->flags[(y+1) * t1->stride + x+1] & flags_mask, &xorbit);
+                        if (t1->mqc.raw)
+                             t1->data[(y) * t1->stride + x] = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ? -mask : mask;
                         else
-                             t1->data[y][x] = (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) ?
+                             t1->data[(y) * t1->stride + x] = (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) ?
                                                -mask : mask;
 
                         ff_jpeg2000_set_significance(t1, x, y,
-                                                     t1->data[y][x] < 0);
+                                                     t1->data[(y) * t1->stride + x] < 0);
                     }
-                    t1->flags[y + 1][x + 1] |= JPEG2000_T1_VIS;
+                    t1->flags[(y + 1) * t1->stride + x + 1] |= JPEG2000_T1_VIS;
                 }
             }
 }
 
 static void decode_refpass(Jpeg2000T1Context *t1, int width, int height,
-                           int bpno)
+                           int bpno, int vert_causal_ctx_csty_symbol)
 {
     int phalf, nhalf;
     int y0, x, y;
@@ -1002,13 +1390,15 @@ static void decode_refpass(Jpeg2000T1Context *t1, int width, int height,
     for (y0 = 0; y0 < height; y0 += 4)
         for (x = 0; x < width; x++)
             for (y = y0; y < height && y < y0 + 4; y++)
-                if ((t1->flags[y + 1][x + 1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS)) == JPEG2000_T1_SIG) {
-                    int ctxno = ff_jpeg2000_getrefctxno(t1->flags[y + 1][x + 1]);
+                if ((t1->flags[(y + 1) * t1->stride + x + 1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS)) == JPEG2000_T1_SIG) {
+                    int flags_mask = (vert_causal_ctx_csty_symbol && y == y0 + 3) ?
+                        ~(JPEG2000_T1_SIG_S | JPEG2000_T1_SIG_SW | JPEG2000_T1_SIG_SE | JPEG2000_T1_SGN_S) : -1;
+                    int ctxno = ff_jpeg2000_getrefctxno(t1->flags[(y + 1) * t1->stride + x + 1] & flags_mask);
                     int r     = ff_mqc_decode(&t1->mqc,
                                               t1->mqc.cx_states + ctxno)
                                 ? phalf : nhalf;
-                    t1->data[y][x]          += t1->data[y][x] < 0 ? -r : r;
-                    t1->flags[y + 1][x + 1] |= JPEG2000_T1_REF;
+                    t1->data[(y) * t1->stride + x]          += t1->data[(y) * t1->stride + x] < 0 ? -r : r;
+                    t1->flags[(y + 1) * t1->stride + x + 1] |= JPEG2000_T1_REF;
                 }
 }
 
@@ -1020,11 +1410,14 @@ static void decode_clnpass(Jpeg2000DecoderContext *s, Jpeg2000T1Context *t1,
 
     for (y0 = 0; y0 < height; y0 += 4) {
         for (x = 0; x < width; x++) {
+            int flags_mask = -1;
+            if (vert_causal_ctx_csty_symbol)
+                flags_mask &= ~(JPEG2000_T1_SIG_S | JPEG2000_T1_SIG_SW | JPEG2000_T1_SIG_SE | JPEG2000_T1_SGN_S);
             if (y0 + 3 < height &&
-                !((t1->flags[y0 + 1][x + 1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
-                  (t1->flags[y0 + 2][x + 1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
-                  (t1->flags[y0 + 3][x + 1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
-                  (t1->flags[y0 + 4][x + 1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)))) {
+                !((t1->flags[(y0 + 1) * t1->stride + x + 1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
+                  (t1->flags[(y0 + 2) * t1->stride + x + 1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
+                  (t1->flags[(y0 + 3) * t1->stride + x + 1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
+                  (t1->flags[(y0 + 4) * t1->stride + x + 1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG) & flags_mask))) {
                 if (!ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + MQC_CX_RL))
                     continue;
                 runlen = ff_mqc_decode(&t1->mqc,
@@ -1039,27 +1432,27 @@ static void decode_clnpass(Jpeg2000DecoderContext *s, Jpeg2000T1Context *t1,
             }
 
             for (y = y0 + runlen; y < y0 + 4 && y < height; y++) {
+                int flags_mask = -1;
+                if (vert_causal_ctx_csty_symbol && y == y0 + 3)
+                    flags_mask &= ~(JPEG2000_T1_SIG_S | JPEG2000_T1_SIG_SW | JPEG2000_T1_SIG_SE | JPEG2000_T1_SGN_S);
                 if (!dec) {
-                    if (!(t1->flags[y+1][x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))) {
-                        int flags_mask = -1;
-                        if (vert_causal_ctx_csty_symbol && y == y0 + 3)
-                            flags_mask &= ~(JPEG2000_T1_SIG_S | JPEG2000_T1_SIG_SW | JPEG2000_T1_SIG_SE);
-                        dec = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_jpeg2000_getsigctxno(t1->flags[y+1][x+1] & flags_mask,
+                    if (!(t1->flags[(y+1) * t1->stride + x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))) {
+                        dec = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_jpeg2000_getsigctxno(t1->flags[(y+1) * t1->stride + x+1] & flags_mask,
                                                                                              bandno));
                     }
                 }
                 if (dec) {
                     int xorbit;
-                    int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[y + 1][x + 1],
+                    int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[(y + 1) * t1->stride + x + 1] & flags_mask,
                                                         &xorbit);
-                    t1->data[y][x] = (ff_mqc_decode(&t1->mqc,
+                    t1->data[(y) * t1->stride + x] = (ff_mqc_decode(&t1->mqc,
                                                     t1->mqc.cx_states + ctxno) ^
                                       xorbit)
                                      ? -mask : mask;
-                    ff_jpeg2000_set_significance(t1, x, y, t1->data[y][x] < 0);
+                    ff_jpeg2000_set_significance(t1, x, y, t1->data[(y) * t1->stride + x] < 0);
                 }
                 dec = 0;
-                t1->flags[y + 1][x + 1] &= ~JPEG2000_T1_VIS;
+                t1->flags[(y + 1) * t1->stride + x + 1] &= ~JPEG2000_T1_VIS;
             }
         }
     }
@@ -1079,56 +1472,77 @@ static int decode_cblk(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *codsty,
                        Jpeg2000T1Context *t1, Jpeg2000Cblk *cblk,
                        int width, int height, int bandpos)
 {
-    int passno = cblk->npasses, pass_t = 2, bpno = cblk->nonzerobits - 1, y;
-    int clnpass_cnt = 0;
-    int bpass_csty_symbol           = codsty->cblk_style & JPEG2000_CBLK_BYPASS;
+    int passno = cblk->npasses, pass_t = 2, bpno = cblk->nonzerobits - 1;
+    int pass_cnt = 0;
     int vert_causal_ctx_csty_symbol = codsty->cblk_style & JPEG2000_CBLK_VSC;
+    int term_cnt = 0;
+    int coder_type;
 
-    av_assert0(width  <= JPEG2000_MAX_CBLKW);
-    av_assert0(height <= JPEG2000_MAX_CBLKH);
+    av_assert0(width <= 1024U && height <= 1024U);
+    av_assert0(width*height <= 4096);
 
-    for (y = 0; y < height; y++)
-        memset(t1->data[y], 0, width * sizeof(**t1->data));
+    memset(t1->data, 0, t1->stride * height * sizeof(*t1->data));
 
     /* If code-block contains no compressed data: nothing to do. */
     if (!cblk->length)
         return 0;
 
-    for (y = 0; y < height + 2; y++)
-        memset(t1->flags[y], 0, (width + 2) * sizeof(**t1->flags));
+    memset(t1->flags, 0, t1->stride * (height + 2) * sizeof(*t1->flags));
 
     cblk->data[cblk->length] = 0xff;
     cblk->data[cblk->length+1] = 0xff;
-    ff_mqc_initdec(&t1->mqc, cblk->data);
+    ff_mqc_initdec(&t1->mqc, cblk->data, 0, 1);
 
     while (passno--) {
+        if (bpno < 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "bpno became negative\n");
+            return AVERROR_INVALIDDATA;
+        }
         switch(pass_t) {
         case 0:
             decode_sigpass(t1, width, height, bpno + 1, bandpos,
-                           bpass_csty_symbol && (clnpass_cnt >= 4),
                            vert_causal_ctx_csty_symbol);
             break;
         case 1:
-            decode_refpass(t1, width, height, bpno + 1);
-            if (bpass_csty_symbol && clnpass_cnt >= 4)
-                ff_mqc_initdec(&t1->mqc, cblk->data);
+            decode_refpass(t1, width, height, bpno + 1, vert_causal_ctx_csty_symbol);
             break;
         case 2:
+            av_assert2(!t1->mqc.raw);
             decode_clnpass(s, t1, width, height, bpno + 1, bandpos,
                            codsty->cblk_style & JPEG2000_CBLK_SEGSYM,
                            vert_causal_ctx_csty_symbol);
-            clnpass_cnt = clnpass_cnt + 1;
-            if (bpass_csty_symbol && clnpass_cnt >= 4)
-                ff_mqc_initdec(&t1->mqc, cblk->data);
             break;
         }
+        if (codsty->cblk_style & JPEG2000_CBLK_RESET) // XXX no testcase for just this
+            ff_mqc_init_contexts(&t1->mqc);
+
+        if (passno && (coder_type = needs_termination(codsty->cblk_style, pass_cnt))) {
+            if (term_cnt >= cblk->nb_terminations) {
+                av_log(s->avctx, AV_LOG_ERROR, "Missing needed termination \n");
+                return AVERROR_INVALIDDATA;
+            }
+            if (FFABS(cblk->data + cblk->data_start[term_cnt + 1] - 2 - t1->mqc.bp) > 0) {
+                av_log(s->avctx, AV_LOG_WARNING, "Mid mismatch %"PTRDIFF_SPECIFIER" in pass %d of %d\n",
+                    cblk->data + cblk->data_start[term_cnt + 1] - 2 - t1->mqc.bp,
+                    pass_cnt, cblk->npasses);
+            }
+
+            ff_mqc_initdec(&t1->mqc, cblk->data + cblk->data_start[++term_cnt], coder_type == 2, 0);
+        }
 
         pass_t++;
         if (pass_t == 3) {
             bpno--;
             pass_t = 0;
         }
+        pass_cnt ++;
     }
+
+    if (cblk->data + cblk->length - 2*(term_cnt < cblk->nb_terminations) != t1->mqc.bp) {
+        av_log(s->avctx, AV_LOG_WARNING, "End mismatch %"PTRDIFF_SPECIFIER"\n",
+               cblk->data + cblk->length - 2*(term_cnt < cblk->nb_terminations) - t1->mqc.bp);
+    }
+
     return 0;
 }
 
@@ -1147,7 +1561,7 @@ static void dequantization_float(int x, int y, Jpeg2000Cblk *cblk,
     int w = cblk->coord[0][1] - cblk->coord[0][0];
     for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) {
         float *datap = &comp->f_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x];
-        int *src = t1->data[j];
+        int *src = t1->data + j*t1->stride;
         for (i = 0; i < w; ++i)
             datap[i] = src[i] * band->f_stepsize;
     }
@@ -1162,9 +1576,15 @@ static void dequantization_int(int x, int y, Jpeg2000Cblk *cblk,
     int w = cblk->coord[0][1] - cblk->coord[0][0];
     for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) {
         int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x];
-        int *src = t1->data[j];
-        for (i = 0; i < w; ++i)
-            datap[i] = (src[i] * band->i_stepsize) / 32768;
+        int *src = t1->data + j*t1->stride;
+        if (band->i_stepsize == 32768) {
+            for (i = 0; i < w; ++i)
+                datap[i] = src[i] / 2;
+        } else {
+            // This should be VERY uncommon
+            for (i = 0; i < w; ++i)
+                datap[i] = (src[i] * (int64_t)band->i_stepsize) / 65536;
+        }
     }
 }
 
@@ -1176,9 +1596,9 @@ static void dequantization_int_97(int x, int y, Jpeg2000Cblk *cblk,
     int w = cblk->coord[0][1] - cblk->coord[0][0];
     for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) {
         int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x];
-        int *src = t1->data[j];
+        int *src = t1->data + j*t1->stride;
         for (i = 0; i < w; ++i)
-            datap[i] = (src[i] * band->i_stepsize + (1<<14)) >> 15;
+            datap[i] = (src[i] * (int64_t)band->i_stepsize + (1<<15)) >> 16;
     }
 }
 
@@ -1210,23 +1630,19 @@ static inline void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
     s->dsp.mct_decode[tile->codsty[0].transform](src[0], src[1], src[2], csize);
 }
 
-static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
-                                AVFrame *picture)
+static inline void tile_codeblocks(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
 {
-    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
-    int compno, reslevelno, bandno;
-    int x, y;
-    int planar    = !!(pixdesc->flags & AV_PIX_FMT_FLAG_PLANAR);
-    int pixelsize = planar ? 1 : pixdesc->nb_components;
-
-    uint8_t *line;
     Jpeg2000T1Context t1;
 
+    int compno, reslevelno, bandno;
+
     /* Loop on tile components */
     for (compno = 0; compno < s->ncomponents; compno++) {
         Jpeg2000Component *comp     = tile->comp + compno;
         Jpeg2000CodingStyle *codsty = tile->codsty + compno;
 
+        t1.stride = (1<<codsty->log2_cblk_width) + 2;
+
         /* Loop on resolution levels */
         for (reslevelno = 0; reslevelno < codsty->nreslevels2decode; reslevelno++) {
             Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
@@ -1248,7 +1664,9 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                     Jpeg2000Prec *prec = band->prec + precno;
 
                     /* Loop on codeblocks */
-                    for (cblkno = 0; cblkno < prec->nb_codeblocks_width * prec->nb_codeblocks_height; cblkno++) {
+                    for (cblkno = 0;
+                         cblkno < prec->nb_codeblocks_width * prec->nb_codeblocks_height;
+                         cblkno++) {
                         int x, y;
                         Jpeg2000Cblk *cblk = prec->cblk + cblkno;
                         decode_cblk(s, codsty, &t1, cblk,
@@ -1256,8 +1674,8 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                                     cblk->coord[1][1] - cblk->coord[1][0],
                                     bandpos);
 
-                        x = cblk->coord[0][0];
-                        y = cblk->coord[1][0];
+                        x = cblk->coord[0][0] - band->coord[0][0];
+                        y = cblk->coord[1][0] - band->coord[1][0];
 
                         if (codsty->transform == FF_DWT97)
                             dequantization_float(x, y, cblk, comp, &t1, band);
@@ -1273,110 +1691,101 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
         /* inverse DWT */
         ff_dwt_decode(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data);
     } /*end comp */
+}
+
+#define WRITE_FRAME(D, PIXEL)                                                                     \
+    static inline void write_frame_ ## D(Jpeg2000DecoderContext * s, Jpeg2000Tile * tile,         \
+                                         AVFrame * picture, int precision)                        \
+    {                                                                                             \
+        const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->avctx->pix_fmt);               \
+        int planar    = !!(pixdesc->flags & AV_PIX_FMT_FLAG_PLANAR);                              \
+        int pixelsize = planar ? 1 : pixdesc->nb_components;                                      \
+                                                                                                  \
+        int compno;                                                                               \
+        int x, y;                                                                                 \
+                                                                                                  \
+        for (compno = 0; compno < s->ncomponents; compno++) {                                     \
+            Jpeg2000Component *comp     = tile->comp + compno;                                    \
+            Jpeg2000CodingStyle *codsty = tile->codsty + compno;                                  \
+            PIXEL *line;                                                                          \
+            float *datap     = comp->f_data;                                                      \
+            int32_t *i_datap = comp->i_data;                                                      \
+            int cbps         = s->cbps[compno];                                                   \
+            int w            = tile->comp[compno].coord[0][1] - s->image_offset_x;                \
+            int plane        = 0;                                                                 \
+                                                                                                  \
+            if (planar)                                                                           \
+                plane = s->cdef[compno] ? s->cdef[compno]-1 : (s->ncomponents-1);                 \
+                                                                                                  \
+            y    = tile->comp[compno].coord[1][0] - s->image_offset_y / s->cdy[compno];           \
+            line = (PIXEL *)picture->data[plane] + y * (picture->linesize[plane] / sizeof(PIXEL));\
+            for (; y < tile->comp[compno].coord[1][1] - s->image_offset_y; y++) {                 \
+                PIXEL *dst;                                                                       \
+                                                                                                  \
+                x   = tile->comp[compno].coord[0][0] - s->image_offset_x / s->cdx[compno];        \
+                dst = line + x * pixelsize + compno*!planar;                                      \
+                                                                                                  \
+                if (codsty->transform == FF_DWT97) {                                              \
+                    for (; x < w; x++) {                                                          \
+                        int val = lrintf(*datap) + (1 << (cbps - 1));                             \
+                        /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */                  \
+                        val  = av_clip(val, 0, (1 << cbps) - 1);                                  \
+                        *dst = val << (precision - cbps);                                         \
+                        datap++;                                                                  \
+                        dst += pixelsize;                                                         \
+                    }                                                                             \
+                } else {                                                                          \
+                    for (; x < w; x++) {                                                          \
+                        int val = *i_datap + (1 << (cbps - 1));                                   \
+                        /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */                  \
+                        val  = av_clip(val, 0, (1 << cbps) - 1);                                  \
+                        *dst = val << (precision - cbps);                                         \
+                        i_datap++;                                                                \
+                        dst += pixelsize;                                                         \
+                    }                                                                             \
+                }                                                                                 \
+                line += picture->linesize[plane] / sizeof(PIXEL);                                 \
+            }                                                                                     \
+        }                                                                                         \
+                                                                                                  \
+    }
+
+WRITE_FRAME(8, uint8_t)
+WRITE_FRAME(16, uint16_t)
+
+#undef WRITE_FRAME
+
+static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
+                                AVFrame *picture)
+{
+    int x;
+
+    tile_codeblocks(s, tile);
 
     /* inverse MCT transformation */
     if (tile->codsty[0].mct)
         mct_decode(s, tile);
 
-    if (s->cdef[0] < 0) {
-        for (x = 0; x < s->ncomponents; x++)
-            s->cdef[x] = x + 1;
-        if ((s->ncomponents & 1) == 0)
-            s->cdef[s->ncomponents-1] = 0;
+    for (x = 0; x < s->ncomponents; x++) {
+        if (s->cdef[x] < 0) {
+            for (x = 0; x < s->ncomponents; x++) {
+                s->cdef[x] = x + 1;
+            }
+            if ((s->ncomponents & 1) == 0)
+                s->cdef[s->ncomponents-1] = 0;
+            break;
+        }
     }
 
     if (s->precision <= 8) {
-        for (compno = 0; compno < s->ncomponents; compno++) {
-            Jpeg2000Component *comp = tile->comp + compno;
-            Jpeg2000CodingStyle *codsty = tile->codsty + compno;
-            float *datap = comp->f_data;
-            int32_t *i_datap = comp->i_data;
-            int cbps = s->cbps[compno];
-            int w = tile->comp[compno].coord[0][1] - s->image_offset_x;
-            int plane = 0;
-
-            if (planar)
-                plane = s->cdef[compno] ? s->cdef[compno]-1 : (s->ncomponents-1);
-
-
-            y    = tile->comp[compno].coord[1][0] - s->image_offset_y;
-            line = picture->data[plane] + y / s->cdy[compno] * picture->linesize[plane];
-            for (; y < tile->comp[compno].coord[1][1] - s->image_offset_y; y ++) {
-                uint8_t *dst;
-
-                x   = tile->comp[compno].coord[0][0] - s->image_offset_x;
-                dst = line + x / s->cdx[compno] * pixelsize + compno*!planar;
-
-                if (codsty->transform == FF_DWT97) {
-                    for (; x < w; x ++) {
-                        int val = lrintf(*datap) + (1 << (cbps - 1));
-                        /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */
-                        val = av_clip(val, 0, (1 << cbps) - 1);
-                        *dst = val << (8 - cbps);
-                        datap++;
-                        dst += pixelsize;
-                    }
-                } else {
-                    for (; x < w; x ++) {
-                        int val = *i_datap + (1 << (cbps - 1));
-                        /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */
-                        val = av_clip(val, 0, (1 << cbps) - 1);
-                        *dst = val << (8 - cbps);
-                        i_datap++;
-                        dst += pixelsize;
-                    }
-                }
-                line += picture->linesize[plane];
-            }
-        }
+        write_frame_8(s, tile, picture, 8);
     } else {
         int precision = picture->format == AV_PIX_FMT_XYZ12 ||
+                        picture->format == AV_PIX_FMT_RGB48 ||
+                        picture->format == AV_PIX_FMT_RGBA64 ||
                         picture->format == AV_PIX_FMT_GRAY16 ? 16 : s->precision;
 
-        for (compno = 0; compno < s->ncomponents; compno++) {
-            Jpeg2000Component *comp = tile->comp + compno;
-            Jpeg2000CodingStyle *codsty = tile->codsty + compno;
-            float *datap = comp->f_data;
-            int32_t *i_datap = comp->i_data;
-            uint16_t *linel;
-            int cbps = s->cbps[compno];
-            int w = tile->comp[compno].coord[0][1] - s->image_offset_x;
-            int plane = 0;
-
-            if (planar)
-                plane = s->cdef[compno] ? s->cdef[compno]-1 : (s->ncomponents-1);
-
-            y     = tile->comp[compno].coord[1][0] - s->image_offset_y;
-            linel = (uint16_t *)picture->data[plane] + y / s->cdy[compno] * (picture->linesize[plane] >> 1);
-            for (; y < tile->comp[compno].coord[1][1] - s->image_offset_y; y ++) {
-                uint16_t *dst;
-
-                x   = tile->comp[compno].coord[0][0] - s->image_offset_x;
-                dst = linel + (x / s->cdx[compno] * pixelsize + compno*!planar);
-                if (codsty->transform == FF_DWT97) {
-                    for (; x < w; x ++) {
-                        int  val = lrintf(*datap) + (1 << (cbps - 1));
-                        /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */
-                        val = av_clip(val, 0, (1 << cbps) - 1);
-                        /* align 12 bit values in little-endian mode */
-                        *dst = val << (precision - cbps);
-                        datap++;
-                        dst += pixelsize;
-                    }
-                } else {
-                    for (; x < w; x ++) {
-                        int val = *i_datap + (1 << (cbps - 1));
-                        /* DC level shift and clip see ISO 15444-1:2002 G.1.2 */
-                        val = av_clip(val, 0, (1 << cbps) - 1);
-                        /* align 12 bit values in little-endian mode */
-                        *dst = val << (precision - cbps);
-                        i_datap++;
-                        dst += pixelsize;
-                    }
-                }
-                linel += picture->linesize[plane] >> 1;
-            }
-        }
+        write_frame_16(s, tile, picture, precision);
     }
 
     return 0;
@@ -1399,13 +1808,17 @@ static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s)
     av_freep(&s->tile);
     memset(s->codsty, 0, sizeof(s->codsty));
     memset(s->qntsty, 0, sizeof(s->qntsty));
+    memset(s->properties, 0, sizeof(s->properties));
+    memset(&s->poc  , 0, sizeof(s->poc));
     s->numXtiles = s->numYtiles = 0;
+    s->ncomponents = 0;
 }
 
 static int jpeg2000_read_main_headers(Jpeg2000DecoderContext *s)
 {
     Jpeg2000CodingStyle *codsty = s->codsty;
     Jpeg2000QuantStyle *qntsty  = s->qntsty;
+    Jpeg2000POC         *poc    = &s->poc;
     uint8_t *properties         = s->properties;
 
     for (;;) {
@@ -1449,11 +1862,17 @@ static int jpeg2000_read_main_headers(Jpeg2000DecoderContext *s)
             break;
 
         len = bytestream2_get_be16(&s->g);
-        if (len < 2 || bytestream2_get_bytes_left(&s->g) < len - 2)
+        if (len < 2 || bytestream2_get_bytes_left(&s->g) < len - 2) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid len %d left=%d\n", len, bytestream2_get_bytes_left(&s->g));
             return AVERROR_INVALIDDATA;
+        }
 
         switch (marker) {
         case JPEG2000_SIZ:
+            if (s->ncomponents) {
+                av_log(s->avctx, AV_LOG_ERROR, "Duplicate SIZ\n");
+                return AVERROR_INVALIDDATA;
+            }
             ret = get_siz(s);
             if (!s->tile)
                 s->numXtiles = s->numYtiles = 0;
@@ -1470,14 +1889,20 @@ static int jpeg2000_read_main_headers(Jpeg2000DecoderContext *s)
         case JPEG2000_QCD:
             ret = get_qcd(s, len, qntsty, properties);
             break;
+        case JPEG2000_POC:
+            ret = get_poc(s, len, poc);
+            break;
         case JPEG2000_SOT:
             if (!(ret = get_sot(s, len))) {
                 av_assert1(s->curtileno >= 0);
                 codsty = s->tile[s->curtileno].codsty;
                 qntsty = s->tile[s->curtileno].qntsty;
+                poc    = &s->tile[s->curtileno].poc;
                 properties = s->tile[s->curtileno].properties;
             }
             break;
+        case JPEG2000_PLM:
+            // the PLM marker is ignored
         case JPEG2000_COM:
             // the comment is ignored
             bytestream2_skip(&s->g, len - 2);
@@ -1516,11 +1941,11 @@ static int jpeg2000_read_bitstream_packets(Jpeg2000DecoderContext *s)
     for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
         Jpeg2000Tile *tile = s->tile + tileno;
 
-        if (ret = init_tile(s, tileno))
+        if ((ret = init_tile(s, tileno)) < 0)
             return ret;
 
         s->g = tile->tile_part[0].tpg;
-        if (ret = jpeg2000_decode_packets(s, tile))
+        if ((ret = jpeg2000_decode_packets(s, tile)) < 0)
             return ret;
     }
 
@@ -1722,15 +2147,6 @@ static const AVOption options[] = {
     { NULL },
 };
 
-static const AVProfile profiles[] = {
-    { FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_0,  "JPEG 2000 codestream restriction 0"   },
-    { FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_1,  "JPEG 2000 codestream restriction 1"   },
-    { FF_PROFILE_JPEG2000_CSTREAM_NO_RESTRICTION, "JPEG 2000 no codestream restrictions" },
-    { FF_PROFILE_JPEG2000_DCINEMA_2K,             "JPEG 2000 digital cinema 2K"          },
-    { FF_PROFILE_JPEG2000_DCINEMA_4K,             "JPEG 2000 digital cinema 4K"          },
-    { FF_PROFILE_UNKNOWN },
-};
-
 static const AVClass jpeg2000_class = {
     .class_name = "jpeg2000",
     .item_name  = av_default_item_name,
@@ -1743,12 +2159,12 @@ AVCodec ff_jpeg2000_decoder = {
     .long_name        = NULL_IF_CONFIG_SMALL("JPEG 2000"),
     .type             = AVMEDIA_TYPE_VIDEO,
     .id               = AV_CODEC_ID_JPEG2000,
-    .capabilities     = CODEC_CAP_FRAME_THREADS,
+    .capabilities     = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_DR1,
     .priv_data_size   = sizeof(Jpeg2000DecoderContext),
     .init_static_data = jpeg2000_init_static_data,
     .init             = jpeg2000_decode_init,
     .decode           = jpeg2000_decode_frame,
     .priv_class       = &jpeg2000_class,
     .max_lowres       = 5,
-    .profiles         = NULL_IF_CONFIG_SMALL(profiles)
+    .profiles         = NULL_IF_CONFIG_SMALL(ff_jpeg2000_profiles)
 };
diff --git a/libavcodec/jpeg2000dsp.c b/libavcodec/jpeg2000dsp.c
index a7c7f53b..d183cbb8 100644
--- a/libavcodec/jpeg2000dsp.c
+++ b/libavcodec/jpeg2000dsp.c
@@ -95,4 +95,7 @@ av_cold void ff_jpeg2000dsp_init(Jpeg2000DSPContext *c)
     c->mct_decode[FF_DWT97]     = ict_float;
     c->mct_decode[FF_DWT53]     = rct_int;
     c->mct_decode[FF_DWT97_INT] = ict_int;
+
+    if (ARCH_X86)
+        ff_jpeg2000dsp_init_x86(c);
 }
diff --git a/libavcodec/jpeg2000dsp.h b/libavcodec/jpeg2000dsp.h
index de1ddb94..1ae5b95d 100644
--- a/libavcodec/jpeg2000dsp.h
+++ b/libavcodec/jpeg2000dsp.h
@@ -31,5 +31,6 @@ typedef struct Jpeg2000DSPContext {
 } Jpeg2000DSPContext;
 
 void ff_jpeg2000dsp_init(Jpeg2000DSPContext *c);
+void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c);
 
 #endif /* AVCODEC_JPEG2000DSP_H */
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index ceceda36..a46c93a9 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -25,6 +25,7 @@
  * Discrete wavelet transform
  */
 
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "libavutil/mem.h"
 #include "jpeg2000dwt.h"
@@ -36,20 +37,16 @@
 #define F_LFTG_BETA   0.052980118572961f
 #define F_LFTG_GAMMA  0.882911075530934f
 #define F_LFTG_DELTA  0.443506852043971f
-#define F_LFTG_K      1.230174104914001f
-#define F_LFTG_X      1.625732422f
-/* FIXME: Why use 1.625732422 instead of 1/F_LFTG_K?
- * Incorrect value in JPEG2000 norm.
- * see (ISO/IEC 15444:1 (version 2002) F.3.8.2 */
 
 /* Lifting parameters in integer format.
  * Computed as param = (float param) * (1 << 16) */
-#define I_LFTG_ALPHA  103949
-#define I_LFTG_BETA     3472
-#define I_LFTG_GAMMA   57862
-#define I_LFTG_DELTA   29066
-#define I_LFTG_K       80621
-#define I_LFTG_X      106544
+#define I_LFTG_ALPHA  103949ll
+#define I_LFTG_BETA     3472ll
+#define I_LFTG_GAMMA   57862ll
+#define I_LFTG_DELTA   29066ll
+#define I_LFTG_K       80621ll
+#define I_LFTG_X       53274ll
+#define I_PRESHIFT 8
 
 static inline void extend53(int *p, int i0, int i1)
 {
@@ -83,14 +80,17 @@ static void sd_1d53(int *p, int i0, int i1)
 {
     int i;
 
-    if (i1 == i0 + 1)
+    if (i1 <= i0 + 1) {
+        if (i0 == 1)
+            p[1] <<= 1;
         return;
+    }
 
     extend53(p, i0, i1);
 
-    for (i = (i0+1)/2 - 1; i < (i1+1)/2; i++)
+    for (i = ((i0+1)>>1) - 1; i < (i1+1)>>1; i++)
         p[2*i+1] -= (p[2*i] + p[2*i+2]) >> 1;
-    for (i = (i0+1)/2; i < (i1+1)/2; i++)
+    for (i = ((i0+1)>>1); i < (i1+1)>>1; i++)
         p[2*i] += (p[2*i-1] + p[2*i+1] + 2) >> 2;
 }
 
@@ -109,23 +109,6 @@ static void dwt_encode53(DWTContext *s, int *t)
             lp;
         int *l;
 
-        // HOR_SD
-        l = line + mh;
-        for (lp = 0; lp < lv; lp++){
-            int i, j = 0;
-
-            for (i = 0; i < lh; i++)
-                l[i] = t[w*lp + i];
-
-            sd_1d53(line, mh, mh + lh);
-
-            // copy back and deinterleave
-            for (i =   mh; i < lh; i+=2, j++)
-                t[w*lp + j] = l[i];
-            for (i = 1-mh; i < lh; i+=2, j++)
-                t[w*lp + j] = l[i];
-        }
-
         // VER_SD
         l = line + mv;
         for (lp = 0; lp < lh; lp++) {
@@ -142,25 +125,47 @@ static void dwt_encode53(DWTContext *s, int *t)
             for (i = 1-mv; i < lv; i+=2, j++)
                 t[w*j + lp] = l[i];
         }
+
+        // HOR_SD
+        l = line + mh;
+        for (lp = 0; lp < lv; lp++){
+            int i, j = 0;
+
+            for (i = 0; i < lh; i++)
+                l[i] = t[w*lp + i];
+
+            sd_1d53(line, mh, mh + lh);
+
+            // copy back and deinterleave
+            for (i =   mh; i < lh; i+=2, j++)
+                t[w*lp + j] = l[i];
+            for (i = 1-mh; i < lh; i+=2, j++)
+                t[w*lp + j] = l[i];
+        }
     }
 }
 static void sd_1d97_float(float *p, int i0, int i1)
 {
     int i;
 
-    if (i1 == i0 + 1)
+    if (i1 <= i0 + 1) {
+        if (i0 == 1)
+            p[1] *= F_LFTG_X * 2;
+        else
+            p[0] *= F_LFTG_K;
         return;
+    }
 
     extend97_float(p, i0, i1);
     i0++; i1++;
 
-    for (i = i0/2 - 2; i < i1/2 + 1; i++)
+    for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++)
         p[2*i+1] -= 1.586134 * (p[2*i] + p[2*i+2]);
-    for (i = i0/2 - 1; i < i1/2 + 1; i++)
+    for (i = (i0>>1) - 1; i < (i1>>1) + 1; i++)
         p[2*i] -= 0.052980 * (p[2*i-1] + p[2*i+1]);
-    for (i = i0/2 - 1; i < i1/2; i++)
+    for (i = (i0>>1) - 1; i < (i1>>1); i++)
         p[2*i+1] += 0.882911 * (p[2*i] + p[2*i+2]);
-    for (i = i0/2; i < i1/2; i++)
+    for (i = (i0>>1); i < (i1>>1); i++)
         p[2*i] += 0.443506 * (p[2*i-1] + p[2*i+1]);
 }
 
@@ -191,9 +196,9 @@ static void dwt_encode97_float(DWTContext *s, float *t)
 
             // copy back and deinterleave
             for (i =   mh; i < lh; i+=2, j++)
-                t[w*lp + j] = F_LFTG_X * l[i] / 2;
+                t[w*lp + j] = l[i];
             for (i = 1-mh; i < lh; i+=2, j++)
-                t[w*lp + j] = F_LFTG_K * l[i] / 2;
+                t[w*lp + j] = l[i];
         }
 
         // VER_SD
@@ -208,9 +213,9 @@ static void dwt_encode97_float(DWTContext *s, float *t)
 
             // copy back and deinterleave
             for (i =   mv; i < lv; i+=2, j++)
-                t[w*j + lp] = F_LFTG_X * l[i] / 2;
+                t[w*j + lp] = l[i];
             for (i = 1-mv; i < lv; i+=2, j++)
-                t[w*j + lp] = F_LFTG_K * l[i] / 2;
+                t[w*j + lp] = l[i];
         }
     }
 }
@@ -219,29 +224,39 @@ static void sd_1d97_int(int *p, int i0, int i1)
 {
     int i;
 
-    if (i1 == i0 + 1)
+    if (i1 <= i0 + 1) {
+        if (i0 == 1)
+            p[1] = (p[1] * I_LFTG_X + (1<<14)) >> 15;
+        else
+            p[0] = (p[0] * I_LFTG_K + (1<<15)) >> 16;
         return;
+    }
 
     extend97_int(p, i0, i1);
     i0++; i1++;
 
-    for (i = i0/2 - 2; i < i1/2 + 1; i++)
+    for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++)
         p[2 * i + 1] -= (I_LFTG_ALPHA * (p[2 * i]     + p[2 * i + 2]) + (1 << 15)) >> 16;
-    for (i = i0/2 - 1; i < i1/2 + 1; i++)
+    for (i = (i0>>1) - 1; i < (i1>>1) + 1; i++)
         p[2 * i]     -= (I_LFTG_BETA  * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
-    for (i = i0/2 - 1; i < i1/2; i++)
+    for (i = (i0>>1) - 1; i < (i1>>1); i++)
         p[2 * i + 1] += (I_LFTG_GAMMA * (p[2 * i]     + p[2 * i + 2]) + (1 << 15)) >> 16;
-    for (i = i0/2; i < i1/2; i++)
+    for (i = (i0>>1); i < (i1>>1); i++)
         p[2 * i]     += (I_LFTG_DELTA * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
 }
 
 static void dwt_encode97_int(DWTContext *s, int *t)
 {
-    int lev,
-        w = s->linelen[s->ndeclevels-1][0];
+    int lev;
+    int w = s->linelen[s->ndeclevels-1][0];
+    int h = s->linelen[s->ndeclevels-1][1];
+    int i;
     int *line = s->i_linebuf;
     line += 5;
 
+    for (i = 0; i < w * h; i++)
+        t[i] <<= I_PRESHIFT;
+
     for (lev = s->ndeclevels-1; lev >= 0; lev--){
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
@@ -250,6 +265,23 @@ static void dwt_encode97_int(DWTContext *s, int *t)
             lp;
         int *l;
 
+        // VER_SD
+        l = line + mv;
+        for (lp = 0; lp < lh; lp++) {
+            int i, j = 0;
+
+            for (i = 0; i < lv; i++)
+                l[i] = t[w*i + lp];
+
+            sd_1d97_int(line, mv, mv + lv);
+
+            // copy back and deinterleave
+            for (i =   mv; i < lv; i+=2, j++)
+                t[w*j + lp] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16;
+            for (i = 1-mv; i < lv; i+=2, j++)
+                t[w*j + lp] = l[i];
+        }
+
         // HOR_SD
         l = line + mh;
         for (lp = 0; lp < lv; lp++){
@@ -262,42 +294,32 @@ static void dwt_encode97_int(DWTContext *s, int *t)
 
             // copy back and deinterleave
             for (i =   mh; i < lh; i+=2, j++)
-                t[w*lp + j] = ((l[i] * I_LFTG_X) + (1 << 16)) >> 17;
+                t[w*lp + j] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16;
             for (i = 1-mh; i < lh; i+=2, j++)
-                t[w*lp + j] = ((l[i] * I_LFTG_K) + (1 << 16)) >> 17;
+                t[w*lp + j] = l[i];
         }
 
-        // VER_SD
-        l = line + mv;
-        for (lp = 0; lp < lh; lp++) {
-            int i, j = 0;
-
-            for (i = 0; i < lv; i++)
-                l[i] = t[w*i + lp];
-
-            sd_1d97_int(line, mv, mv + lv);
-
-            // copy back and deinterleave
-            for (i =   mv; i < lv; i+=2, j++)
-                t[w*j + lp] = ((l[i] * I_LFTG_X) + (1 << 16)) >> 17;
-            for (i = 1-mv; i < lv; i+=2, j++)
-                t[w*j + lp] = ((l[i] * I_LFTG_K) + (1 << 16)) >> 17;
-        }
     }
+
+    for (i = 0; i < w * h; i++)
+        t[i] = (t[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
 }
 
 static void sr_1d53(int *p, int i0, int i1)
 {
     int i;
 
-    if (i1 == i0 + 1)
+    if (i1 <= i0 + 1) {
+        if (i0 == 1)
+            p[1] >>= 1;
         return;
+    }
 
     extend53(p, i0, i1);
 
-    for (i = i0 / 2; i < i1 / 2 + 1; i++)
+    for (i = (i0 >> 1); i < (i1 >> 1) + 1; i++)
         p[2 * i] -= (p[2 * i - 1] + p[2 * i + 1] + 2) >> 2;
-    for (i = i0 / 2; i < i1 / 2; i++)
+    for (i = (i0 >> 1); i < (i1 >> 1); i++)
         p[2 * i + 1] += (p[2 * i] + p[2 * i + 2]) >> 1;
 }
 
@@ -354,21 +376,26 @@ static void sr_1d97_float(float *p, int i0, int i1)
 {
     int i;
 
-    if (i1 == i0 + 1)
+    if (i1 <= i0 + 1) {
+        if (i0 == 1)
+            p[1] *= F_LFTG_K/2;
+        else
+            p[0] *= F_LFTG_X;
         return;
+    }
 
     extend97_float(p, i0, i1);
 
-    for (i = i0 / 2 - 1; i < i1 / 2 + 2; i++)
+    for (i = (i0 >> 1) - 1; i < (i1 >> 1) + 2; i++)
         p[2 * i]     -= F_LFTG_DELTA * (p[2 * i - 1] + p[2 * i + 1]);
     /* step 4 */
-    for (i = i0 / 2 - 1; i < i1 / 2 + 1; i++)
+    for (i = (i0 >> 1) - 1; i < (i1 >> 1) + 1; i++)
         p[2 * i + 1] -= F_LFTG_GAMMA * (p[2 * i]     + p[2 * i + 2]);
     /*step 5*/
-    for (i = i0 / 2; i < i1 / 2 + 1; i++)
+    for (i = (i0 >> 1); i < (i1 >> 1) + 1; i++)
         p[2 * i]     += F_LFTG_BETA  * (p[2 * i - 1] + p[2 * i + 1]);
     /* step 6 */
-    for (i = i0 / 2; i < i1 / 2; i++)
+    for (i = (i0 >> 1); i < (i1 >> 1); i++)
         p[2 * i + 1] += F_LFTG_ALPHA * (p[2 * i]     + p[2 * i + 2]);
 }
 
@@ -394,9 +421,9 @@ static void dwt_decode97_float(DWTContext *s, float *t)
             int i, j = 0;
             // copy with interleaving
             for (i = mh; i < lh; i += 2, j++)
-                l[i] = data[w * lp + j] * F_LFTG_K;
+                l[i] = data[w * lp + j];
             for (i = 1 - mh; i < lh; i += 2, j++)
-                l[i] = data[w * lp + j] * F_LFTG_X;
+                l[i] = data[w * lp + j];
 
             sr_1d97_float(line, mh, mh + lh);
 
@@ -410,9 +437,9 @@ static void dwt_decode97_float(DWTContext *s, float *t)
             int i, j = 0;
             // copy with interleaving
             for (i = mv; i < lv; i += 2, j++)
-                l[i] = data[w * j + lp] * F_LFTG_K;
+                l[i] = data[w * j + lp];
             for (i = 1 - mv; i < lv; i += 2, j++)
-                l[i] = data[w * j + lp] * F_LFTG_X;
+                l[i] = data[w * j + lp];
 
             sr_1d97_float(line, mv, mv + lv);
 
@@ -426,21 +453,26 @@ static void sr_1d97_int(int32_t *p, int i0, int i1)
 {
     int i;
 
-    if (i1 == i0 + 1)
+    if (i1 <= i0 + 1) {
+        if (i0 == 1)
+            p[1] = (p[1] * I_LFTG_K + (1<<16)) >> 17;
+        else
+            p[0] = (p[0] * I_LFTG_X + (1<<15)) >> 16;
         return;
+    }
 
     extend97_int(p, i0, i1);
 
-    for (i = i0 / 2 - 1; i < i1 / 2 + 2; i++)
+    for (i = (i0 >> 1) - 1; i < (i1 >> 1) + 2; i++)
         p[2 * i]     -= (I_LFTG_DELTA * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
     /* step 4 */
-    for (i = i0 / 2 - 1; i < i1 / 2 + 1; i++)
+    for (i = (i0 >> 1) - 1; i < (i1 >> 1) + 1; i++)
         p[2 * i + 1] -= (I_LFTG_GAMMA * (p[2 * i]     + p[2 * i + 2]) + (1 << 15)) >> 16;
     /*step 5*/
-    for (i = i0 / 2; i < i1 / 2 + 1; i++)
+    for (i = (i0 >> 1); i < (i1 >> 1) + 1; i++)
         p[2 * i]     += (I_LFTG_BETA  * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
     /* step 6 */
-    for (i = i0 / 2; i < i1 / 2; i++)
+    for (i = (i0 >> 1); i < (i1 >> 1); i++)
         p[2 * i + 1] += (I_LFTG_ALPHA * (p[2 * i]     + p[2 * i + 2]) + (1 << 15)) >> 16;
 }
 
@@ -448,11 +480,16 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
 {
     int lev;
     int w       = s->linelen[s->ndeclevels - 1][0];
+    int h       = s->linelen[s->ndeclevels - 1][1];
+    int i;
     int32_t *line = s->i_linebuf;
     int32_t *data = t;
     /* position at index O of line range [0-5,w+5] cf. extend function */
     line += 5;
 
+    for (i = 0; i < w * h; i++)
+        data[i] <<= I_PRESHIFT;
+
     for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
@@ -468,7 +505,7 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
             for (i = mh; i < lh; i += 2, j++)
                 l[i] = ((data[w * lp + j] * I_LFTG_K) + (1 << 15)) >> 16;
             for (i = 1 - mh; i < lh; i += 2, j++)
-                l[i] = ((data[w * lp + j] * I_LFTG_X) + (1 << 15)) >> 16;
+                l[i] = data[w * lp + j];
 
             sr_1d97_int(line, mh, mh + lh);
 
@@ -484,7 +521,7 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
             for (i = mv; i < lv; i += 2, j++)
                 l[i] = ((data[w * j + lp] * I_LFTG_K) + (1 << 15)) >> 16;
             for (i = 1 - mv; i < lv; i += 2, j++)
-                l[i] = ((data[w * j + lp] * I_LFTG_X) + (1 << 15)) >> 16;
+                l[i] = data[w * j + lp];
 
             sr_1d97_int(line, mv, mv + lv);
 
@@ -492,9 +529,12 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
                 data[w * i + lp] = l[i];
         }
     }
+
+    for (i = 0; i < w * h; i++)
+        data[i] = (data[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
 }
 
-int ff_jpeg2000_dwt_init(DWTContext *s, uint16_t border[2][2],
+int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
                          int decomp_levels, int type)
 {
     int i, j, lev = decomp_levels, maxlen,
@@ -540,6 +580,9 @@ int ff_jpeg2000_dwt_init(DWTContext *s, uint16_t border[2][2],
 
 int ff_dwt_encode(DWTContext *s, void *t)
 {
+    if (s->ndeclevels == 0)
+        return 0;
+
     switch(s->type){
         case FF_DWT97:
             dwt_encode97_float(s, t); break;
@@ -555,6 +598,9 @@ int ff_dwt_encode(DWTContext *s, void *t)
 
 int ff_dwt_decode(DWTContext *s, void *t)
 {
+    if (s->ndeclevels == 0)
+        return 0;
+
     switch (s->type) {
     case FF_DWT97:
         dwt_decode97_float(s, t);
@@ -576,3 +622,125 @@ void ff_dwt_destroy(DWTContext *s)
     av_freep(&s->f_linebuf);
     av_freep(&s->i_linebuf);
 }
+
+#ifdef TEST
+
+#include "libavutil/lfg.h"
+
+#define MAX_W 256
+
+static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, int type, int max_diff) {
+    int ret, j;
+    DWTContext s1={{{0}}}, *s= &s1;
+    int64_t err2 = 0;
+
+    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, type);
+    if (ret < 0) {
+        fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
+        return 1;
+    }
+    ret = ff_dwt_encode(s, array);
+    if (ret < 0) {
+        fprintf(stderr, "ff_dwt_encode failed\n");
+        return 1;
+    }
+    ret = ff_dwt_decode(s, array);
+    if (ret < 0) {
+        fprintf(stderr, "ff_dwt_encode failed\n");
+        return 1;
+    }
+    for (j = 0; j<MAX_W * MAX_W; j++) {
+        if (FFABS(array[j] - ref[j]) > max_diff) {
+            fprintf(stderr, "missmatch at %d (%d != %d) decomp:%d border %d %d %d %d\n",
+                    j, array[j], ref[j],decomp_levels, border[0][0], border[0][1], border[1][0], border[1][1]);
+            return 2;
+        }
+        err2 += (array[j] - ref[j]) * (array[j] - ref[j]);
+        array[j] = ref[j];
+    }
+    ff_dwt_destroy(s);
+
+    printf("%s, decomp:%2d border %3d %3d %3d %3d milli-err2:%9"PRId64"\n",
+           type == FF_DWT53 ? "5/3i" : "9/7i",
+           decomp_levels, border[0][0], border[0][1], border[1][0], border[1][1],
+           1000*err2 / ((border[0][1] - border[0][0])*(border[1][1] - border[1][0])));
+
+    return 0;
+}
+
+static int test_dwtf(float *array, float *ref, int border[2][2], int decomp_levels, float max_diff) {
+    int ret, j;
+    DWTContext s1={{{0}}}, *s= &s1;
+    double err2 = 0;
+
+    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, FF_DWT97);
+    if (ret < 0) {
+        fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
+        return 1;
+    }
+    ret = ff_dwt_encode(s, array);
+    if (ret < 0) {
+        fprintf(stderr, "ff_dwt_encode failed\n");
+        return 1;
+    }
+    ret = ff_dwt_decode(s, array);
+    if (ret < 0) {
+        fprintf(stderr, "ff_dwt_encode failed\n");
+        return 1;
+    }
+    for (j = 0; j<MAX_W * MAX_W; j++) {
+        if (FFABS(array[j] - ref[j]) > max_diff) {
+            fprintf(stderr, "missmatch at %d (%f != %f) decomp:%d border %d %d %d %d\n",
+                    j, array[j], ref[j],decomp_levels, border[0][0], border[0][1], border[1][0], border[1][1]);
+            return 2;
+        }
+        err2 += (array[j] - ref[j]) * (array[j] - ref[j]);
+        array[j] = ref[j];
+    }
+    ff_dwt_destroy(s);
+
+    printf("9/7f, decomp:%2d border %3d %3d %3d %3d err2:%20.3f\n",
+           decomp_levels, border[0][0], border[0][1], border[1][0], border[1][1],
+           err2 / ((border[0][1] - border[0][0])*(border[1][1] - border[1][0])));
+
+    return 0;
+}
+
+static int array[MAX_W * MAX_W];
+static int ref  [MAX_W * MAX_W];
+static float arrayf[MAX_W * MAX_W];
+static float reff  [MAX_W * MAX_W];
+
+int main(void) {
+    AVLFG prng;
+    int i,j;
+    int border[2][2];
+    int ret, decomp_levels;
+
+    av_lfg_init(&prng, 1);
+
+    for (i = 0; i<MAX_W * MAX_W; i++)
+        arrayf[i] = reff[i] = array[i] = ref[i] =  av_lfg_get(&prng) % 2048;
+
+    for (i = 0; i < 100; i++) {
+        for (j=0; j<4; j++)
+            border[j>>1][j&1] = av_lfg_get(&prng) % MAX_W;
+        if (border[0][0] >= border[0][1] || border[1][0] >= border[1][1])
+            continue;
+        decomp_levels = av_lfg_get(&prng) % FF_DWT_MAX_DECLVLS;
+
+        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT53, 0);
+        if (ret)
+            return ret;
+        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT97_INT, FFMIN(7+5*decomp_levels, 15+3*decomp_levels));
+        if (ret)
+            return ret;
+        ret = test_dwtf(arrayf, reff, border, decomp_levels, 0.05);
+        if (ret)
+            return ret;
+    }
+
+    return 0;
+}
+
+#endif
diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h
index b6d296d8..718d183a 100644
--- a/libavcodec/jpeg2000dwt.h
+++ b/libavcodec/jpeg2000dwt.h
@@ -30,6 +30,8 @@
 #include <stdint.h>
 
 #define FF_DWT_MAX_DECLVLS 32 ///< max number of decomposition levels
+#define F_LFTG_K      1.230174104914001f
+#define F_LFTG_X      0.812893066115961f
 
 enum DWTType {
     FF_DWT97,
@@ -40,7 +42,7 @@ enum DWTType {
 
 typedef struct DWTContext {
     /// line lengths { horizontal, vertical } in consecutive decomposition levels
-    uint16_t linelen[FF_DWT_MAX_DECLVLS][2];
+    int linelen[FF_DWT_MAX_DECLVLS][2];
     uint8_t mod[FF_DWT_MAX_DECLVLS][2];  ///< coordinates (x0, y0) of decomp. levels mod 2
     uint8_t ndeclevels;                  ///< number of decomposition levels
     uint8_t type;                        ///< 0 for 9/7; 1 for 5/3
@@ -55,7 +57,7 @@ typedef struct DWTContext {
  * @param decomp_levels     number of decomposition levels
  * @param type              0 for DWT 9/7; 1 for DWT 5/3
  */
-int ff_jpeg2000_dwt_init(DWTContext *s, uint16_t border[2][2],
+int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
                          int decomp_levels, int type);
 
 int ff_dwt_encode(DWTContext *s, void *t);
diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
index 2ea75f75..68151cbb 100644
--- a/libavcodec/jpeglsdec.c
+++ b/libavcodec/jpeglsdec.c
@@ -523,6 +523,6 @@ AVCodec ff_jpegls_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     .decode         = ff_mjpeg_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };
diff --git a/libavcodec/jpeglsenc.c b/libavcodec/jpeglsenc.c
index d2749101..cd16184c 100644
--- a/libavcodec/jpeglsenc.c
+++ b/libavcodec/jpeglsenc.c
@@ -35,6 +35,12 @@
 #include "mjpegenc.h"
 #include "jpegls.h"
 
+typedef struct JPEGLSContext {
+    AVClass *class;
+
+    int pred;
+} JPEGLSContext;
+
 /**
  * Encode error from regular symbol
  */
@@ -250,8 +256,8 @@ static void ls_store_lse(JLSState *state, PutBitContext *pb)
 static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
                              const AVFrame *pict, int *got_packet)
 {
+    JPEGLSContext *ctx = avctx->priv_data;
     const AVFrame *const p = pict;
-    const int near         = avctx->prediction_method;
     PutBitContext pb, pb2;
     GetBitContext gb;
     uint8_t *buf2 = NULL;
@@ -262,6 +268,13 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
     int i, size, ret;
     int comps;
 
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->prediction_method)
+        ctx->pred = avctx->prediction_method;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     if (avctx->pix_fmt == AV_PIX_FMT_GRAY8 ||
         avctx->pix_fmt == AV_PIX_FMT_GRAY16)
         comps = 1;
@@ -269,7 +282,7 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
         comps = 3;
 
     if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width  *avctx->height * comps * 4 +
-                                FF_MIN_BUFFER_SIZE)) < 0)
+                                AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;
 
     buf2 = av_malloc(pkt->size);
@@ -300,7 +313,7 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
         put_bits(&pb, 8, i);   // component ID
         put_bits(&pb, 8, 0);   // mapping index: none
     }
-    put_bits(&pb, 8, near);
+    put_bits(&pb, 8, ctx->pred);
     put_bits(&pb, 8, (comps > 1) ? 1 : 0);  // interleaving: 0 - plane, 1 - line
     put_bits(&pb, 8, 0);  // point transform: none
 
@@ -309,7 +322,7 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
         goto memfail;
 
     /* initialize JPEG-LS state from JPEG parameters */
-    state->near = near;
+    state->near = ctx->pred;
     state->bpp  = (avctx->pix_fmt == AV_PIX_FMT_GRAY16) ? 16 : 8;
     ff_jpegls_reset_coding_parameters(state, 0);
     ff_jpegls_init_state(state);
@@ -405,27 +418,21 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 
 memfail:
-    av_free_packet(pkt);
+    av_packet_unref(pkt);
     av_freep(&buf2);
     av_freep(&state);
     av_freep(&zero);
     return AVERROR(ENOMEM);
 }
 
-static av_cold int encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
-
 static av_cold int encode_init_ls(AVCodecContext *ctx)
 {
-    ctx->coded_frame = av_frame_alloc();
-    if (!ctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     ctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     ctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     if (ctx->pix_fmt != AV_PIX_FMT_GRAY8  &&
         ctx->pix_fmt != AV_PIX_FMT_GRAY16 &&
@@ -438,14 +445,33 @@ static av_cold int encode_init_ls(AVCodecContext *ctx)
     return 0;
 }
 
+#define OFFSET(x) offsetof(JPEGLSContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+{ "pred", "Prediction method", OFFSET(pred), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, VE, "pred" },
+    { "left",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, VE, "pred" },
+    { "plane",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, VE, "pred" },
+    { "median", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, INT_MIN, INT_MAX, VE, "pred" },
+
+    { NULL},
+};
+
+static const AVClass jpegls_class = {
+    .class_name = "jpegls",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_jpegls_encoder = {
     .name           = "jpegls",
     .long_name      = NULL_IF_CONFIG_SMALL("JPEG-LS"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_JPEGLS,
+    .priv_data_size = sizeof(JPEGLSContext),
+    .priv_class     = &jpegls_class,
     .init           = encode_init_ls,
-    .close          = encode_close,
-    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .encode2        = encode_picture_ls,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_BGR24, AV_PIX_FMT_RGB24,
diff --git a/libavcodec/jvdec.c b/libavcodec/jvdec.c
index 9c4a8d4c..cbe83d3c 100644
--- a/libavcodec/jvdec.c
+++ b/libavcodec/jvdec.c
@@ -231,5 +231,5 @@ AVCodec ff_jv_decoder = {
     .init           = decode_init,
     .close          = decode_close,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/kgv1dec.c b/libavcodec/kgv1dec.c
index 4f9329f9..5359411c 100644
--- a/libavcodec/kgv1dec.c
+++ b/libavcodec/kgv1dec.c
@@ -183,5 +183,5 @@ AVCodec ff_kgv1_decoder = {
     .close          = decode_end,
     .decode         = decode_frame,
     .flush          = decode_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/kmvc.c b/libavcodec/kmvc.c
index f879c353..7acaba7d 100644
--- a/libavcodec/kmvc.c
+++ b/libavcodec/kmvc.c
@@ -421,5 +421,5 @@ AVCodec ff_kmvc_decoder = {
     .priv_data_size = sizeof(KmvcContext),
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c
index 2c6d70c0..94d723d3 100644
--- a/libavcodec/lagarith.c
+++ b/libavcodec/lagarith.c
@@ -748,5 +748,5 @@ AVCodec ff_lagarith_decoder = {
     .init           = lag_decode_init,
     .close          = lag_decode_end,
     .decode         = lag_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/lagarithrac.c b/libavcodec/lagarithrac.c
index 37ac2cf5..3d36d1b9 100644
--- a/libavcodec/lagarithrac.c
+++ b/libavcodec/lagarithrac.c
@@ -45,7 +45,7 @@ void ff_lag_rac_init(lag_rac *l, GetBitContext *gb, int length)
 
     l->range        = 0x80;
     l->low          = *l->bytestream >> 1;
-    l->hash_shift   = FFMAX((int)l->scale - 10, 0);
+    l->hash_shift   = FFMAX(l->scale, 10) - 10;
 
     for (i = j = 0; i < 1024; i++) {
         unsigned r = i << l->hash_shift;
diff --git a/libavcodec/lcldec.c b/libavcodec/lcldec.c
index 1d94041f..c04ed5e6 100644
--- a/libavcodec/lcldec.c
+++ b/libavcodec/lcldec.c
@@ -512,7 +512,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
         avctx->pix_fmt = AV_PIX_FMT_YUV422P;
         av_log(avctx, AV_LOG_DEBUG, "Image type is YUV 4:2:2.\n");
         if (avctx->width % 4) {
-            avpriv_request_sample(avctx, "Unsupported dimensions\n");
+            avpriv_request_sample(avctx, "Unsupported dimensions");
             return AVERROR_INVALIDDATA;
         }
         break;
@@ -547,7 +547,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &subsample_h, &subsample_v);
     if (avctx->width % (1<<subsample_h) || avctx->height % (1<<subsample_v)) {
-        avpriv_request_sample(avctx, "Unsupported dimensions\n");
+        avpriv_request_sample(avctx, "Unsupported dimensions");
         return AVERROR_INVALIDDATA;
     }
 
@@ -660,7 +660,7 @@ AVCodec ff_mszh_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 
@@ -674,6 +674,6 @@ AVCodec ff_zlib_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
diff --git a/libavcodec/lclenc.c b/libavcodec/lclenc.c
index bce1d537..1b1e08bd 100644
--- a/libavcodec/lclenc.c
+++ b/libavcodec/lclenc.c
@@ -79,7 +79,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int zret; // Zlib return code
     int max_size = deflateBound(&c->zstream, avctx->width * avctx->height * 3);
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, max_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, max_size, 0)) < 0)
         return ret;
 
     if(avctx->pix_fmt != AV_PIX_FMT_BGR24){
@@ -131,16 +131,16 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
     av_assert0(avctx->width && avctx->height);
 
-    avctx->extradata = av_mallocz(8 + FF_INPUT_BUFFER_PADDING_SIZE);
+    avctx->extradata = av_mallocz(8 + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!avctx->extradata)
         return AVERROR(ENOMEM);
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     c->compression = avctx->compression_level == FF_COMPRESSION_DEFAULT ?
                             COMP_ZLIB_NORMAL :
@@ -183,8 +183,6 @@ static av_cold int encode_end(AVCodecContext *avctx)
     av_freep(&avctx->extradata);
     deflateEnd(&c->zstream);
 
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
@@ -197,7 +195,7 @@ AVCodec ff_zlib_encoder = {
     .init           = encode_init,
     .encode2        = encode_frame,
     .close          = encode_end,
-    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_BGR24, AV_PIX_FMT_NONE },
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
                       FF_CODEC_CAP_INIT_CLEANUP,
diff --git a/libavcodec/libaacplus.c b/libavcodec/libaacplus.c
deleted file mode 100644
index 9087d006..00000000
--- a/libavcodec/libaacplus.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Interface to libaacplus for aac+ (sbr+ps) encoding
- * Copyright (c) 2010 tipok <piratfm@gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * Interface to libaacplus for aac+ (sbr+ps) encoding.
- */
-
-#include <aacplus.h>
-
-#include "avcodec.h"
-#include "internal.h"
-
-typedef struct aacPlusAudioContext {
-    aacplusEncHandle aacplus_handle;
-    unsigned long max_output_bytes;
-    unsigned long samples_input;
-} aacPlusAudioContext;
-
-static av_cold int aacPlus_encode_init(AVCodecContext *avctx)
-{
-    aacPlusAudioContext *s = avctx->priv_data;
-    aacplusEncConfiguration *aacplus_cfg;
-
-    /* number of channels */
-    if (avctx->channels < 1 || avctx->channels > 2) {
-        av_log(avctx, AV_LOG_ERROR, "encoding %d channel(s) is not allowed\n", avctx->channels);
-        return AVERROR(EINVAL);
-    }
-
-    if (avctx->profile != FF_PROFILE_AAC_LOW && avctx->profile != FF_PROFILE_UNKNOWN) {
-        av_log(avctx, AV_LOG_ERROR, "invalid AAC profile: %d, only LC supported\n", avctx->profile);
-        return AVERROR(EINVAL);
-    }
-
-    s->aacplus_handle = aacplusEncOpen(avctx->sample_rate, avctx->channels,
-                                       &s->samples_input, &s->max_output_bytes);
-    if (!s->aacplus_handle) {
-        av_log(avctx, AV_LOG_ERROR, "can't open encoder\n");
-        return AVERROR(EINVAL);
-    }
-
-    /* check aacplus version */
-    aacplus_cfg = aacplusEncGetCurrentConfiguration(s->aacplus_handle);
-
-    aacplus_cfg->bitRate = avctx->bit_rate;
-    aacplus_cfg->bandWidth = avctx->cutoff;
-    aacplus_cfg->outputFormat = !(avctx->flags & CODEC_FLAG_GLOBAL_HEADER);
-    aacplus_cfg->inputFormat = avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? AACPLUS_INPUT_FLOAT : AACPLUS_INPUT_16BIT;
-    if (!aacplusEncSetConfiguration(s->aacplus_handle, aacplus_cfg)) {
-        av_log(avctx, AV_LOG_ERROR, "libaacplus doesn't support this output format!\n");
-        return AVERROR(EINVAL);
-    }
-
-    avctx->frame_size = s->samples_input / avctx->channels;
-
-    /* Set decoder specific info */
-    avctx->extradata_size = 0;
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
-
-        unsigned char *buffer = NULL;
-        unsigned long decoder_specific_info_size;
-
-        if (aacplusEncGetDecoderSpecificInfo(s->aacplus_handle, &buffer,
-                                           &decoder_specific_info_size) == 1) {
-            avctx->extradata = av_malloc(decoder_specific_info_size + FF_INPUT_BUFFER_PADDING_SIZE);
-            if (!avctx->extradata) {
-                free(buffer);
-                return AVERROR(ENOMEM);
-            }
-            avctx->extradata_size = decoder_specific_info_size;
-            memcpy(avctx->extradata, buffer, avctx->extradata_size);
-        }
-        free(buffer);
-    }
-    return 0;
-}
-
-static int aacPlus_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
-                                const AVFrame *frame, int *got_packet)
-{
-    aacPlusAudioContext *s = avctx->priv_data;
-    int32_t *input_buffer = (int32_t *)frame->data[0];
-    int ret;
-
-    if ((ret = ff_alloc_packet2(avctx, pkt, s->max_output_bytes)) < 0)
-        return ret;
-
-    pkt->size = aacplusEncEncode(s->aacplus_handle, input_buffer,
-                                 s->samples_input, pkt->data, pkt->size);
-    *got_packet   = 1;
-    pkt->pts      = frame->pts;
-    return 0;
-}
-
-static av_cold int aacPlus_encode_close(AVCodecContext *avctx)
-{
-    aacPlusAudioContext *s = avctx->priv_data;
-
-    av_freep(&avctx->extradata);
-    aacplusEncClose(s->aacplus_handle);
-
-    return 0;
-}
-
-static const AVProfile profiles[] = {
-    { FF_PROFILE_AAC_LOW, "LC" },
-    { FF_PROFILE_UNKNOWN },
-};
-
-AVCodec ff_libaacplus_encoder = {
-    .name           = "libaacplus",
-    .long_name      = NULL_IF_CONFIG_SMALL("libaacplus AAC+ (Advanced Audio Codec with SBR+PS)"),
-    .type           = AVMEDIA_TYPE_AUDIO,
-    .id             = AV_CODEC_ID_AAC,
-    .priv_data_size = sizeof(aacPlusAudioContext),
-    .init           = aacPlus_encode_init,
-    .encode2        = aacPlus_encode_frame,
-    .close          = aacPlus_encode_close,
-    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
-                                                     AV_SAMPLE_FMT_FLT,
-                                                     AV_SAMPLE_FMT_NONE },
-    .profiles       = profiles,
-    .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
-                                            AV_CH_LAYOUT_STEREO,
-                                            0 },
-};
diff --git a/libavcodec/libcelt_dec.c b/libavcodec/libcelt_dec.c
index 4e62fe53..878e4cc6 100644
--- a/libavcodec/libcelt_dec.c
+++ b/libavcodec/libcelt_dec.c
@@ -136,5 +136,5 @@ AVCodec ff_libcelt_decoder = {
     .init           = libcelt_dec_init,
     .close          = libcelt_dec_close,
     .decode         = libcelt_dec_decode,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/libdcadec.c b/libavcodec/libdcadec.c
index 890d2709..7d1f9318 100644
--- a/libavcodec/libdcadec.c
+++ b/libavcodec/libdcadec.c
@@ -29,18 +29,50 @@
 #include "dca.h"
 #include "dca_syncwords.h"
 #include "internal.h"
+#include "profiles.h"
 
 typedef struct DCADecContext {
+    const AVClass *class;
     struct dcadec_context *ctx;
     uint8_t *buffer;
     int buffer_size;
+    int lfe_filter;
+    int core_only;
 } DCADecContext;
 
+static void my_log_cb(int level, const char *file, int line,
+                      const char *message, void *cbarg)
+{
+    int av_level;
+
+    switch (level) {
+    case DCADEC_LOG_ERROR:
+        av_level = AV_LOG_ERROR;
+        break;
+    case DCADEC_LOG_WARNING:
+        av_level = AV_LOG_WARNING;
+        break;
+    case DCADEC_LOG_INFO:
+        av_level = AV_LOG_INFO;
+        break;
+    case DCADEC_LOG_VERBOSE:
+        av_level = AV_LOG_VERBOSE;
+        break;
+    case DCADEC_LOG_DEBUG:
+    default:
+        av_level = AV_LOG_DEBUG;
+        break;
+    }
+
+    av_log(cbarg, av_level, "%s\n", message);
+}
+
 static int dcadec_decode_frame(AVCodecContext *avctx, void *data,
                                int *got_frame_ptr, AVPacket *avpkt)
 {
     DCADecContext *s = avctx->priv_data;
     AVFrame *frame = data;
+    struct dcadec_exss_info *exss;
     int ret, i, k;
     int **samples, nsamples, channel_mask, sample_rate, bits_per_sample, profile;
     uint32_t mrk;
@@ -54,7 +86,7 @@ static int dcadec_decode_frame(AVCodecContext *avctx, void *data,
     }
     mrk = AV_RB32(input);
     if (mrk != DCA_SYNCWORD_CORE_BE && mrk != DCA_SYNCWORD_SUBSTREAM) {
-        s->buffer = av_fast_realloc(s->buffer, &s->buffer_size, avpkt->size + FF_INPUT_BUFFER_PADDING_SIZE);
+        s->buffer = av_fast_realloc(s->buffer, &s->buffer_size, avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!s->buffer)
             return AVERROR(ENOMEM);
 
@@ -127,6 +159,24 @@ static int dcadec_decode_frame(AVCodecContext *avctx, void *data,
     } else
         avctx->bit_rate = 0;
 
+    if (exss = dcadec_context_get_exss_info(s->ctx)) {
+        enum AVMatrixEncoding matrix_encoding = AV_MATRIX_ENCODING_NONE;
+
+        switch(exss->matrix_encoding) {
+        case DCADEC_MATRIX_ENCODING_SURROUND:
+            matrix_encoding = AV_MATRIX_ENCODING_DOLBY;
+            break;
+        case DCADEC_MATRIX_ENCODING_HEADPHONE:
+            matrix_encoding = AV_MATRIX_ENCODING_DOLBYHEADPHONE;
+            break;
+        }
+        dcadec_context_free_exss_info(exss);
+
+        if (matrix_encoding != AV_MATRIX_ENCODING_NONE &&
+            (ret = ff_side_data_update_matrix_encoding(frame, matrix_encoding)) < 0)
+            return ret;
+    }
+
     frame->nb_samples = nsamples;
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
@@ -173,27 +223,74 @@ static av_cold int dcadec_init(AVCodecContext *avctx)
     int flags = 0;
 
     /* Affects only lossy DTS profiles. DTS-HD MA is always bitexact */
-    if (avctx->flags & CODEC_FLAG_BITEXACT)
+    if (avctx->flags & AV_CODEC_FLAG_BITEXACT)
         flags |= DCADEC_FLAG_CORE_BIT_EXACT;
 
+    if (avctx->err_recognition & AV_EF_EXPLODE)
+        flags |= DCADEC_FLAG_STRICT;
+
+    if (avctx->request_channel_layout) {
+        switch (avctx->request_channel_layout) {
+        case AV_CH_LAYOUT_STEREO:
+        case AV_CH_LAYOUT_STEREO_DOWNMIX:
+            flags |= DCADEC_FLAG_KEEP_DMIX_2CH;
+            break;
+        case AV_CH_LAYOUT_5POINT1:
+            flags |= DCADEC_FLAG_KEEP_DMIX_6CH;
+            break;
+        case AV_CH_LAYOUT_NATIVE:
+            flags |= DCADEC_FLAG_NATIVE_LAYOUT;
+            break;
+        default:
+            av_log(avctx, AV_LOG_WARNING, "Invalid request_channel_layout\n");
+            break;
+        }
+    }
+
+    if (s->core_only)
+        flags |= DCADEC_FLAG_CORE_ONLY;
+
+    switch (s->lfe_filter) {
+#if DCADEC_API_VERSION >= DCADEC_VERSION_CODE(0, 1, 0)
+    case 1:
+        flags |= DCADEC_FLAG_CORE_LFE_IIR;
+        break;
+#endif
+    case 2:
+        flags |= DCADEC_FLAG_CORE_LFE_FIR;
+        break;
+    }
+
     s->ctx = dcadec_context_create(flags);
     if (!s->ctx)
         return AVERROR(ENOMEM);
 
+    dcadec_context_set_log_cb(s->ctx, my_log_cb, avctx);
+
     avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
     avctx->bits_per_raw_sample = 24;
 
     return 0;
 }
 
-static const AVProfile profiles[] = {
-    { FF_PROFILE_DTS,         "DTS"         },
-    { FF_PROFILE_DTS_ES,      "DTS-ES"      },
-    { FF_PROFILE_DTS_96_24,   "DTS 96/24"   },
-    { FF_PROFILE_DTS_HD_HRA,  "DTS-HD HRA"  },
-    { FF_PROFILE_DTS_HD_MA,   "DTS-HD MA"   },
-    { FF_PROFILE_DTS_EXPRESS, "DTS Express" },
-    { FF_PROFILE_UNKNOWN },
+#define OFFSET(x) offsetof(DCADecContext, x)
+#define PARAM AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+
+static const AVOption dcadec_options[] = {
+    { "lfe_filter", "Lossy LFE channel interpolation filter", OFFSET(lfe_filter), AV_OPT_TYPE_INT,   { .i64 = 0 }, 0,       2,       PARAM, "lfe_filter" },
+    { "default",    "Library default",                        0,                  AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, PARAM, "lfe_filter" },
+    { "iir",        "IIR filter",                             0,                  AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, PARAM, "lfe_filter" },
+    { "fir",        "FIR filter",                             0,                  AV_OPT_TYPE_CONST, { .i64 = 2 }, INT_MIN, INT_MAX, PARAM, "lfe_filter" },
+    { "core_only",  "Decode core only without extensions",    OFFSET(core_only),  AV_OPT_TYPE_BOOL,  { .i64 = 0 }, 0,       1,       PARAM },
+    { NULL }
+};
+
+static const AVClass dcadec_class = {
+    .class_name = "libdcadec decoder",
+    .item_name  = av_default_item_name,
+    .option     = dcadec_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_DECODER,
 };
 
 AVCodec ff_libdcadec_decoder = {
@@ -206,8 +303,9 @@ AVCodec ff_libdcadec_decoder = {
     .decode         = dcadec_decode_frame,
     .close          = dcadec_close,
     .flush          = dcadec_flush,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_CHANNEL_CONF,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S32P, AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_NONE },
-    .profiles       = NULL_IF_CONFIG_SMALL(profiles),
+    .priv_class     = &dcadec_class,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_dca_profiles),
 };
diff --git a/libavcodec/libfaac.c b/libavcodec/libfaac.c
index 69c186b1..98b3ba81 100644
--- a/libavcodec/libfaac.c
+++ b/libavcodec/libfaac.c
@@ -117,7 +117,7 @@ static av_cold int Faac_encode_init(AVCodecContext *avctx)
     faac_cfg->allowMidside = 1;
     faac_cfg->bitRate = avctx->bit_rate / avctx->channels;
     faac_cfg->bandWidth = avctx->cutoff;
-    if(avctx->flags & CODEC_FLAG_QSCALE) {
+    if(avctx->flags & AV_CODEC_FLAG_QSCALE) {
         faac_cfg->bitRate = 0;
         faac_cfg->quantqual = avctx->global_quality / FF_QP2LAMBDA;
     }
@@ -131,14 +131,14 @@ static av_cold int Faac_encode_init(AVCodecContext *avctx)
 
     /* Set decoder specific info */
     avctx->extradata_size = 0;
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
 
         unsigned char *buffer = NULL;
         unsigned long decoder_specific_info_size;
 
         if (!faacEncGetDecoderSpecificInfo(s->faac_handle, &buffer,
                                            &decoder_specific_info_size)) {
-            avctx->extradata = av_malloc(decoder_specific_info_size + FF_INPUT_BUFFER_PADDING_SIZE);
+            avctx->extradata = av_malloc(decoder_specific_info_size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!avctx->extradata) {
                 ret = AVERROR(ENOMEM);
                 goto error;
@@ -184,7 +184,7 @@ static int Faac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     int num_samples  = frame ? frame->nb_samples : 0;
     void *samples    = frame ? frame->data[0]    : NULL;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, (7 + 768) * avctx->channels)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, (7 + 768) * avctx->channels, 0)) < 0)
         return ret;
 
     bytes_written = faacEncEncode(s->faac_handle, samples,
@@ -240,7 +240,7 @@ AVCodec ff_libfaac_encoder = {
     .init           = Faac_encode_init,
     .encode2        = Faac_encode_frame,
     .close          = Faac_encode_close,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
     .profiles       = NULL_IF_CONFIG_SMALL(profiles),
diff --git a/libavcodec/libfdk-aacdec.c b/libavcodec/libfdk-aacdec.c
index f7fc8119..e5f7c4eb 100644
--- a/libavcodec/libfdk-aacdec.c
+++ b/libavcodec/libfdk-aacdec.c
@@ -41,8 +41,8 @@ enum ConcealMethod {
 typedef struct FDKAACDecContext {
     const AVClass *class;
     HANDLE_AACDECODER handle;
-    int initialized;
     uint8_t *decoder_buffer;
+    int decoder_buffer_size;
     uint8_t *anc_buffer;
     int conceal_method;
     int drc_level;
@@ -54,7 +54,7 @@ typedef struct FDKAACDecContext {
 
 
 #define DMX_ANC_BUFFSIZE       128
-#define DECODER_MAX_CHANNELS     6
+#define DECODER_MAX_CHANNELS     8
 #define DECODER_BUFFSIZE      2048 * sizeof(INT_PCM)
 
 #define OFFSET(x) offsetof(FDKAACDecContext, x)
@@ -209,7 +209,6 @@ static av_cold int fdk_aac_decode_init(AVCodecContext *avctx)
 {
     FDKAACDecContext *s = avctx->priv_data;
     AAC_DECODER_ERROR err;
-    int ret;
 
     s->handle = aacDecoder_Open(avctx->extradata_size ? TT_MP4_RAW : TT_MP4_ADTS, 1);
     if (!s->handle) {
@@ -256,13 +255,11 @@ static av_cold int fdk_aac_decode_init(AVCodecContext *avctx)
                s->anc_buffer = av_malloc(DMX_ANC_BUFFSIZE);
                if (!s->anc_buffer) {
                    av_log(avctx, AV_LOG_ERROR, "Unable to allocate ancillary buffer for the decoder\n");
-                   ret = AVERROR(ENOMEM);
-                   goto fail;
+                   return AVERROR(ENOMEM);
                }
                if (aacDecoder_AncDataInit(s->handle, s->anc_buffer, DMX_ANC_BUFFSIZE)) {
                    av_log(avctx, AV_LOG_ERROR, "Unable to register downmix ancillary buffer in the decoder\n");
-                   ret = AVERROR_UNKNOWN;
-                   goto fail;
+                   return AVERROR_UNKNOWN;
                }
             }
         }
@@ -305,10 +302,12 @@ static av_cold int fdk_aac_decode_init(AVCodecContext *avctx)
 
     avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 
+    s->decoder_buffer_size = DECODER_BUFFSIZE * DECODER_MAX_CHANNELS;
+    s->decoder_buffer = av_malloc(s->decoder_buffer_size);
+    if (!s->decoder_buffer)
+        return AVERROR(ENOMEM);
+
     return 0;
-fail:
-    fdk_aac_decode_close(avctx);
-    return ret;
 }
 
 static int fdk_aac_decode_frame(AVCodecContext *avctx, void *data,
@@ -319,8 +318,6 @@ static int fdk_aac_decode_frame(AVCodecContext *avctx, void *data,
     int ret;
     AAC_DECODER_ERROR err;
     UINT valid = avpkt->size;
-    uint8_t *buf, *tmpptr = NULL;
-    int buf_size;
 
     err = aacDecoder_Fill(s->handle, &avpkt->data, &avpkt->size, &valid);
     if (err != AAC_DEC_OK) {
@@ -328,31 +325,7 @@ static int fdk_aac_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_INVALIDDATA;
     }
 
-    if (s->initialized) {
-        frame->nb_samples = avctx->frame_size;
-        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
-            return ret;
-
-        if (s->anc_buffer) {
-            buf_size = DECODER_BUFFSIZE * DECODER_MAX_CHANNELS;
-            buf = s->decoder_buffer;
-        } else {
-            buf = frame->extended_data[0];
-            buf_size = avctx->channels * frame->nb_samples *
-                       av_get_bytes_per_sample(avctx->sample_fmt);
-        }
-    } else {
-        buf_size = DECODER_BUFFSIZE * DECODER_MAX_CHANNELS;
-
-        if (!s->decoder_buffer)
-            s->decoder_buffer = av_malloc(buf_size);
-        if (!s->decoder_buffer)
-            return AVERROR(ENOMEM);
-
-        buf = tmpptr = s->decoder_buffer;
-    }
-
-    err = aacDecoder_DecodeFrame(s->handle, (INT_PCM *) buf, buf_size, 0);
+    err = aacDecoder_DecodeFrame(s->handle, (INT_PCM *) s->decoder_buffer, s->decoder_buffer_size, 0);
     if (err == AAC_DEC_NOT_ENOUGH_BITS) {
         ret = avpkt->size - valid;
         goto end;
@@ -364,26 +337,16 @@ static int fdk_aac_decode_frame(AVCodecContext *avctx, void *data,
         goto end;
     }
 
-    if (!s->initialized) {
-        if ((ret = get_stream_info(avctx)) < 0)
-            goto end;
-        s->initialized = 1;
-        frame->nb_samples = avctx->frame_size;
-    }
+    if ((ret = get_stream_info(avctx)) < 0)
+        goto end;
+    frame->nb_samples = avctx->frame_size;
 
-    if (tmpptr) {
-        frame->nb_samples = avctx->frame_size;
-        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
-            goto end;
-    }
-    if (s->decoder_buffer) {
-        memcpy(frame->extended_data[0], buf,
-               avctx->channels * avctx->frame_size *
-               av_get_bytes_per_sample(avctx->sample_fmt));
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        goto end;
 
-        if (!s->anc_buffer)
-            av_freep(&s->decoder_buffer);
-    }
+    memcpy(frame->extended_data[0], s->decoder_buffer,
+           avctx->channels * avctx->frame_size *
+           av_get_bytes_per_sample(avctx->sample_fmt));
 
     *got_frame_ptr = 1;
     ret = avpkt->size - valid;
@@ -415,6 +378,8 @@ AVCodec ff_libfdk_aac_decoder = {
     .decode         = fdk_aac_decode_frame,
     .close          = fdk_aac_decode_close,
     .flush          = fdk_aac_decode_flush,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_CHANNEL_CONF,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
     .priv_class     = &fdk_aac_dec_class,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
+                      FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/libfdk-aacenc.c b/libavcodec/libfdk-aacenc.c
index 3eadb36b..98a817b5 100644
--- a/libavcodec/libfdk-aacenc.c
+++ b/libavcodec/libfdk-aacenc.c
@@ -184,7 +184,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
         goto error;
     }
 
-    if (avctx->flags & CODEC_FLAG_QSCALE || s->vbr) {
+    if (avctx->flags & AV_CODEC_FLAG_QSCALE || s->vbr) {
         int mode = s->vbr ? s->vbr : avctx->global_quality;
         if (mode <  1 || mode > 5) {
             av_log(avctx, AV_LOG_WARNING,
@@ -215,8 +215,8 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
         }
         if ((err = aacEncoder_SetParam(s->handle, AACENC_BITRATE,
                                        avctx->bit_rate)) != AACENC_OK) {
-            av_log(avctx, AV_LOG_ERROR, "Unable to set the bitrate %d: %s\n",
-                   avctx->bit_rate, aac_get_error(err));
+            av_log(avctx, AV_LOG_ERROR, "Unable to set the bitrate %"PRId64": %s\n",
+                   (int64_t)avctx->bit_rate, aac_get_error(err));
             goto error;
         }
     }
@@ -224,7 +224,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     /* Choose bitstream format - if global header is requested, use
      * raw access units, otherwise use ADTS. */
     if ((err = aacEncoder_SetParam(s->handle, AACENC_TRANSMUX,
-                                   avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 0 : s->latm ? 10 : 2)) != AACENC_OK) {
+                                   avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER ? 0 : s->latm ? 10 : 2)) != AACENC_OK) {
         av_log(avctx, AV_LOG_ERROR, "Unable to set the transmux format: %s\n",
                aac_get_error(err));
         goto error;
@@ -243,7 +243,7 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
      * if using mp4 mode (raw access units, with global header) and
      * implicit signaling if using ADTS. */
     if (s->signaling < 0)
-        s->signaling = avctx->flags & CODEC_FLAG_GLOBAL_HEADER ? 2 : 0;
+        s->signaling = avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER ? 2 : 0;
 
     if ((err = aacEncoder_SetParam(s->handle, AACENC_SIGNALING_MODE,
                                    s->signaling)) != AACENC_OK) {
@@ -289,10 +289,10 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     avctx->initial_padding = info.encoderDelay;
     ff_af_queue_init(avctx, &s->afq);
 
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
         avctx->extradata_size = info.confSize;
         avctx->extradata      = av_mallocz(avctx->extradata_size +
-                                           FF_INPUT_BUFFER_PADDING_SIZE);
+                                           AV_INPUT_BUFFER_PADDING_SIZE);
         if (!avctx->extradata) {
             ret = AVERROR(ENOMEM);
             goto error;
@@ -342,7 +342,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     }
 
     /* The maximum packet size is 6144 bits aka 768 bytes per channel. */
-    if ((ret = ff_alloc_packet2(avctx, avpkt, FFMAX(8192, 768 * avctx->channels))) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, FFMAX(8192, 768 * avctx->channels), 0)) < 0)
         return ret;
 
     out_ptr                   = avpkt->data;
@@ -417,7 +417,7 @@ AVCodec ff_libfdk_aac_encoder = {
     .init                  = aac_encode_init,
     .encode2               = aac_encode_frame,
     .close                 = aac_encode_close,
-    .capabilities          = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
+    .capabilities          = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
     .sample_fmts           = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                             AV_SAMPLE_FMT_NONE },
     .priv_class            = &aac_enc_class,
diff --git a/libavcodec/libgsmdec.c b/libavcodec/libgsmdec.c
index 8740108d..a503215f 100644
--- a/libavcodec/libgsmdec.c
+++ b/libavcodec/libgsmdec.c
@@ -134,7 +134,7 @@ AVCodec ff_libgsm_decoder = {
     .close          = libgsm_decode_close,
     .decode         = libgsm_decode_frame,
     .flush          = libgsm_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 #if CONFIG_LIBGSM_MS_DECODER
@@ -148,6 +148,6 @@ AVCodec ff_libgsm_ms_decoder = {
     .close          = libgsm_decode_close,
     .decode         = libgsm_decode_frame,
     .flush          = libgsm_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
diff --git a/libavcodec/libgsmenc.c b/libavcodec/libgsmenc.c
index b06ec649..69ce439e 100644
--- a/libavcodec/libgsmenc.c
+++ b/libavcodec/libgsmenc.c
@@ -62,8 +62,8 @@ static av_cold int libgsm_encode_init(AVCodecContext *avctx) {
     if (avctx->bit_rate != 13000 /* Official */ &&
         avctx->bit_rate != 13200 /* Very common */ &&
         avctx->bit_rate != 0 /* Unknown; a.o. mov does not set bitrate when decoding */ ) {
-        av_log(avctx, AV_LOG_ERROR, "Bitrate 13000bps required for GSM, got %dbps\n",
-               avctx->bit_rate);
+        av_log(avctx, AV_LOG_ERROR, "Bitrate 13000bps required for GSM, got %"PRId64"bps\n",
+               (int64_t)avctx->bit_rate);
         if (avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)
             return -1;
     }
@@ -98,7 +98,7 @@ static int libgsm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     gsm_signal *samples = (gsm_signal *)frame->data[0];
     struct gsm_state *state = avctx->priv_data;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, avctx->block_align)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, avctx->block_align, 0)) < 0)
         return ret;
 
     switch(avctx->codec_id) {
diff --git a/libavcodec/libilbc.c b/libavcodec/libilbc.c
index 9fdd3c83..c4c054fa 100644
--- a/libavcodec/libilbc.c
+++ b/libavcodec/libilbc.c
@@ -111,7 +111,7 @@ AVCodec ff_libilbc_decoder = {
     .priv_data_size = sizeof(ILBCDecContext),
     .init           = ilbc_decode_init,
     .decode         = ilbc_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .priv_class     = &ilbc_dec_class,
 };
 
@@ -166,7 +166,7 @@ static int ilbc_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     ILBCEncContext *s = avctx->priv_data;
     int ret;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, 50)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 50, 0)) < 0)
         return ret;
 
     WebRtcIlbcfix_EncodeImpl((uint16_t *) avpkt->data, (const int16_t *) frame->data[0], &s->encoder);
diff --git a/libavcodec/libkvazaar.c b/libavcodec/libkvazaar.c
new file mode 100644
index 00000000..79fde41b
--- /dev/null
+++ b/libavcodec/libkvazaar.c
@@ -0,0 +1,304 @@
+/*
+ * libkvazaar encoder
+ *
+ * Copyright (c) 2015 Tampere University of Technology
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <kvazaar.h>
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/dict.h"
+#include "libavutil/error.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "internal.h"
+
+typedef struct LibkvazaarContext {
+    const AVClass *class;
+
+    const kvz_api *api;
+    kvz_encoder *encoder;
+    kvz_config *config;
+
+    char *kvz_params;
+} LibkvazaarContext;
+
+static av_cold int libkvazaar_init(AVCodecContext *avctx)
+{
+    LibkvazaarContext *const ctx = avctx->priv_data;
+    const kvz_api *const api = ctx->api = kvz_api_get(8);
+    kvz_config *cfg = NULL;
+    kvz_encoder *enc = NULL;
+
+    /* Kvazaar requires width and height to be multiples of eight. */
+    if (avctx->width % 8 || avctx->height % 8) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Video dimensions are not a multiple of 8 (%dx%d).\n",
+               avctx->width, avctx->height);
+        return AVERROR(ENOSYS);
+    }
+
+    ctx->config = cfg = api->config_alloc();
+    if (!cfg) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Could not allocate kvazaar config structure.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    if (!api->config_init(cfg)) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Could not initialize kvazaar config structure.\n");
+        return AVERROR_BUG;
+    }
+
+    cfg->width  = avctx->width;
+    cfg->height = avctx->height;
+
+    if (avctx->ticks_per_frame > INT_MAX / avctx->time_base.num) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Could not set framerate for kvazaar: integer overflow\n");
+        return AVERROR(EINVAL);
+    }
+    cfg->framerate_num   = avctx->time_base.den;
+    cfg->framerate_denom = avctx->time_base.num * avctx->ticks_per_frame;
+    cfg->target_bitrate = avctx->bit_rate;
+    cfg->vui.sar_width  = avctx->sample_aspect_ratio.num;
+    cfg->vui.sar_height = avctx->sample_aspect_ratio.den;
+
+    if (ctx->kvz_params) {
+        AVDictionary *dict = NULL;
+        if (!av_dict_parse_string(&dict, ctx->kvz_params, "=", ",", 0)) {
+            AVDictionaryEntry *entry = NULL;
+            while ((entry = av_dict_get(dict, "", entry, AV_DICT_IGNORE_SUFFIX))) {
+                if (!api->config_parse(cfg, entry->key, entry->value)) {
+                    av_log(avctx, AV_LOG_WARNING, "Invalid option: %s=%s.\n",
+                           entry->key, entry->value);
+                }
+            }
+            av_dict_free(&dict);
+        }
+    }
+
+    ctx->encoder = enc = api->encoder_open(cfg);
+    if (!enc) {
+        av_log(avctx, AV_LOG_ERROR, "Could not open kvazaar encoder.\n");
+        return AVERROR_BUG;
+    }
+
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
+        kvz_data_chunk *data_out = NULL;
+        kvz_data_chunk *chunk = NULL;
+        uint32_t len_out;
+        uint8_t *p;
+
+        if (!api->encoder_headers(enc, &data_out, &len_out))
+            return AVERROR(ENOMEM);
+
+        avctx->extradata = p = av_mallocz(len_out + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!p) {
+            ctx->api->chunk_free(data_out);
+            return AVERROR(ENOMEM);
+        }
+
+        avctx->extradata_size = len_out;
+
+        for (chunk = data_out; chunk != NULL; chunk = chunk->next) {
+            memcpy(p, chunk->data, chunk->len);
+            p += chunk->len;
+        }
+
+        ctx->api->chunk_free(data_out);
+    }
+
+    return 0;
+}
+
+static av_cold int libkvazaar_close(AVCodecContext *avctx)
+{
+    LibkvazaarContext *ctx = avctx->priv_data;
+
+    if (ctx->api) {
+      ctx->api->encoder_close(ctx->encoder);
+      ctx->api->config_destroy(ctx->config);
+    }
+
+    if (avctx->extradata)
+        av_freep(&avctx->extradata);
+
+    return 0;
+}
+
+static int libkvazaar_encode(AVCodecContext *avctx,
+                             AVPacket *avpkt,
+                             const AVFrame *frame,
+                             int *got_packet_ptr)
+{
+    LibkvazaarContext *ctx = avctx->priv_data;
+    kvz_picture *input_pic = NULL;
+    kvz_picture *recon_pic = NULL;
+    kvz_frame_info frame_info;
+    kvz_data_chunk *data_out = NULL;
+    uint32_t len_out = 0;
+    int retval = 0;
+
+    *got_packet_ptr = 0;
+
+    if (frame) {
+        if (frame->width != ctx->config->width ||
+                frame->height != ctx->config->height) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Changing video dimensions during encoding is not supported. "
+                   "(changed from %dx%d to %dx%d)\n",
+                   ctx->config->width, ctx->config->height,
+                   frame->width, frame->height);
+            retval = AVERROR_INVALIDDATA;
+            goto done;
+        }
+
+        if (frame->format != avctx->pix_fmt) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Changing pixel format during encoding is not supported. "
+                   "(changed from %s to %s)\n",
+                   av_get_pix_fmt_name(avctx->pix_fmt),
+                   av_get_pix_fmt_name(frame->format));
+            retval = AVERROR_INVALIDDATA;
+            goto done;
+        }
+
+        // Allocate input picture for kvazaar.
+        input_pic = ctx->api->picture_alloc(frame->width, frame->height);
+        if (!input_pic) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to allocate picture.\n");
+            retval = AVERROR(ENOMEM);
+            goto done;
+        }
+
+        // Copy pixels from frame to input_pic.
+        {
+            int dst_linesizes[4] = {
+              frame->width,
+              frame->width / 2,
+              frame->width / 2,
+              0
+            };
+            av_image_copy(input_pic->data, dst_linesizes,
+                          frame->data, frame->linesize,
+                          frame->format, frame->width, frame->height);
+        }
+
+        input_pic->pts = frame->pts;
+    }
+
+    retval = ctx->api->encoder_encode(ctx->encoder,
+                                      input_pic,
+                                      &data_out, &len_out,
+                                      &recon_pic, NULL,
+                                      &frame_info);
+    if (!retval) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to encode frame.\n");
+        retval = AVERROR_INVALIDDATA;
+        goto done;
+    }
+    else
+        retval = 0; /* kvazaar returns 1 on success */
+
+    if (data_out) {
+        kvz_data_chunk *chunk = NULL;
+        uint64_t written = 0;
+
+        retval = ff_alloc_packet(avpkt, len_out);
+        if (retval < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to allocate output packet.\n");
+            goto done;
+        }
+
+        for (chunk = data_out; chunk != NULL; chunk = chunk->next) {
+            av_assert0(written + chunk->len <= len_out);
+            memcpy(avpkt->data + written, chunk->data, chunk->len);
+            written += chunk->len;
+        }
+
+        avpkt->pts = recon_pic->pts;
+        avpkt->dts = recon_pic->dts;
+        avpkt->flags = 0;
+        // IRAP VCL NAL unit types span the range
+        // [BLA_W_LP (16), RSV_IRAP_VCL23 (23)].
+        if (frame_info.nal_unit_type >= KVZ_NAL_BLA_W_LP &&
+                frame_info.nal_unit_type <= KVZ_NAL_RSV_IRAP_VCL23) {
+            avpkt->flags |= AV_PKT_FLAG_KEY;
+        }
+
+        *got_packet_ptr = 1;
+    }
+
+done:
+    ctx->api->picture_free(input_pic);
+    ctx->api->picture_free(recon_pic);
+    ctx->api->chunk_free(data_out);
+    return retval;
+}
+
+static const enum AVPixelFormat pix_fmts[] = {
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_NONE
+};
+
+#define OFFSET(x) offsetof(LibkvazaarContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "kvazaar-params", "Set kvazaar parameters as a comma-separated list of key=value pairs.",
+        OFFSET(kvz_params), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VE },
+    { NULL },
+};
+
+static const AVClass class = {
+    .class_name = "libkvazaar",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVCodecDefault defaults[] = {
+    { "b", "0" },
+    { NULL },
+};
+
+AVCodec ff_libkvazaar_encoder = {
+    .name             = "libkvazaar",
+    .long_name        = NULL_IF_CONFIG_SMALL("libkvazaar H.265 / HEVC"),
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_HEVC,
+    .capabilities     = AV_CODEC_CAP_DELAY,
+    .pix_fmts         = pix_fmts,
+
+    .priv_class       = &class,
+    .priv_data_size   = sizeof(LibkvazaarContext),
+    .defaults         = defaults,
+
+    .init             = libkvazaar_init,
+    .encode2          = libkvazaar_encode,
+    .close            = libkvazaar_close,
+
+    .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c
index b5d50048..5642264a 100644
--- a/libavcodec/libmp3lame.c
+++ b/libavcodec/libmp3lame.c
@@ -111,7 +111,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
         lame_set_quality(s->gfp, avctx->compression_level);
 
     /* rate control */
-    if (avctx->flags & CODEC_FLAG_QSCALE) { // VBR
+    if (avctx->flags & AV_CODEC_FLAG_QSCALE) { // VBR
         lame_set_VBR(s->gfp, vbr_default);
         lame_set_VBR_quality(s->gfp, avctx->global_quality / (float)FF_QP2LAMBDA);
     } else {
@@ -159,7 +159,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
     if (ret < 0)
         goto error;
 
-    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp) {
         ret = AVERROR(ENOMEM);
         goto error;
@@ -246,11 +246,12 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     if (s->buffer_index < 4)
         return 0;
     h = AV_RB32(s->buffer);
-    if (ff_mpa_check_header(h) < 0) {
+
+    ret = avpriv_mpegaudio_decode_header(&hdr, h);
+    if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Invalid mp3 header at start of buffer\n");
         return AVERROR_BUG;
-    }
-    if (avpriv_mpegaudio_decode_header(&hdr, h)) {
+    } else if (ret) {
         av_log(avctx, AV_LOG_ERROR, "free format output not supported\n");
         return -1;
     }
@@ -258,7 +259,7 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     ff_dlog(avctx, "in:%d packet-len:%d index:%d\n", avctx->frame_size, len,
             s->buffer_index);
     if (len <= s->buffer_index) {
-        if ((ret = ff_alloc_packet2(avctx, avpkt, len)) < 0)
+        if ((ret = ff_alloc_packet2(avctx, avpkt, len, 0)) < 0)
             return ret;
         memcpy(avpkt->data, s->buffer, len);
         s->buffer_index -= len;
@@ -277,9 +278,9 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 #define OFFSET(x) offsetof(LAMEContext, x)
 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "reservoir",    "use bit reservoir", OFFSET(reservoir),    AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
-    { "joint_stereo", "use joint stereo",  OFFSET(joint_stereo), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
-    { "abr",          "use ABR",           OFFSET(abr),          AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
+    { "reservoir",    "use bit reservoir", OFFSET(reservoir),    AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, AE },
+    { "joint_stereo", "use joint stereo",  OFFSET(joint_stereo), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, AE },
+    { "abr",          "use ABR",           OFFSET(abr),          AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AE },
     { NULL },
 };
 
@@ -308,7 +309,7 @@ AVCodec ff_libmp3lame_encoder = {
     .init                  = mp3lame_encode_init,
     .encode2               = mp3lame_encode_frame,
     .close                 = mp3lame_encode_close,
-    .capabilities          = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
+    .capabilities          = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts           = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S32P,
                                                              AV_SAMPLE_FMT_FLTP,
                                                              AV_SAMPLE_FMT_S16P,
diff --git a/libavcodec/libopencore-amr.c b/libavcodec/libopencore-amr.c
index 556792ad..f0e34268 100644
--- a/libavcodec/libopencore-amr.c
+++ b/libavcodec/libopencore-amr.c
@@ -135,7 +135,7 @@ AVCodec ff_libopencore_amrnb_decoder = {
     .init           = amr_nb_decode_init,
     .close          = amr_nb_decode_close,
     .decode         = amr_nb_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif /* CONFIG_LIBOPENCORE_AMRNB_DECODER */
 
@@ -236,7 +236,7 @@ static int amr_nb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         s->enc_bitrate = avctx->bit_rate;
     }
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, 32)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 32, 0)) < 0)
         return ret;
 
     if (frame) {
@@ -287,7 +287,7 @@ AVCodec ff_libopencore_amrnb_encoder = {
     .init           = amr_nb_encode_init,
     .encode2        = amr_nb_encode_frame,
     .close          = amr_nb_encode_close,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
     .priv_class     = &amrnb_class,
@@ -372,7 +372,7 @@ AVCodec ff_libopencore_amrwb_decoder = {
     .init           = amr_wb_decode_init,
     .close          = amr_wb_decode_close,
     .decode         = amr_wb_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 
 #endif /* CONFIG_LIBOPENCORE_AMRWB_DECODER */
diff --git a/libavcodec/libopenh264enc.c b/libavcodec/libopenh264enc.c
index 2b7bad3a..6850568e 100644
--- a/libavcodec/libopenh264enc.c
+++ b/libavcodec/libopenh264enc.c
@@ -25,6 +25,7 @@
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"
 #include "libavutil/opt.h"
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mathematics.h"
 
@@ -37,6 +38,10 @@ typedef struct SVCContext {
     int slice_mode;
     int loopfilter;
     char *profile;
+    int max_nal_size;
+    int skip_frames;
+    int skipped;
+    int cabac;
 } SVCContext;
 
 #define OPENH264_VER_AT_LEAST(maj, min) \
@@ -46,12 +51,16 @@ typedef struct SVCContext {
 #define OFFSET(x) offsetof(SVCContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "slice_mode", "Slice mode", OFFSET(slice_mode), AV_OPT_TYPE_INT, { .i64 = SM_AUTO_SLICE }, SM_SINGLE_SLICE, SM_RESERVED, VE, "slice_mode" },
-    { "fixed", "A fixed number of slices", 0, AV_OPT_TYPE_CONST, { .i64 = SM_FIXEDSLCNUM_SLICE }, 0, 0, VE, "slice_mode" },
-    { "rowmb", "One slice per row of macroblocks", 0, AV_OPT_TYPE_CONST, { .i64 = SM_ROWMB_SLICE }, 0, 0, VE, "slice_mode" },
-    { "auto", "Automatic number of slices according to number of threads", 0, AV_OPT_TYPE_CONST, { .i64 = SM_AUTO_SLICE }, 0, 0, VE, "slice_mode" },
-    { "loopfilter", "Enable loop filter", OFFSET(loopfilter), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
-    { "profile", "Set profile restrictions", OFFSET(profile), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
+    { "slice_mode", "set slice mode", OFFSET(slice_mode), AV_OPT_TYPE_INT, { .i64 = SM_AUTO_SLICE }, SM_SINGLE_SLICE, SM_RESERVED, VE, "slice_mode" },
+        { "fixed", "a fixed number of slices", 0, AV_OPT_TYPE_CONST, { .i64 = SM_FIXEDSLCNUM_SLICE }, 0, 0, VE, "slice_mode" },
+        { "rowmb", "one slice per row of macroblocks", 0, AV_OPT_TYPE_CONST, { .i64 = SM_ROWMB_SLICE }, 0, 0, VE, "slice_mode" },
+        { "auto", "automatic number of slices according to number of threads", 0, AV_OPT_TYPE_CONST, { .i64 = SM_AUTO_SLICE }, 0, 0, VE, "slice_mode" },
+        { "dyn", "Dynamic slicing", 0, AV_OPT_TYPE_CONST, { .i64 = SM_DYN_SLICE }, 0, 0, VE, "slice_mode" },
+    { "loopfilter", "enable loop filter", OFFSET(loopfilter), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
+    { "profile", "set profile restrictions", OFFSET(profile), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VE },
+    { "max_nal_size", "set maximum NAL size in bytes", OFFSET(max_nal_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+    { "allow_skip_frames", "allow skipping frames to hit the target bitrate", OFFSET(skip_frames), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "cabac", "Enable cabac", OFFSET(cabac), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
     { NULL }
 };
 
@@ -59,12 +68,39 @@ static const AVClass class = {
     "libopenh264enc", av_default_item_name, options, LIBAVUTIL_VERSION_INT
 };
 
+// Convert libopenh264 log level to equivalent ffmpeg log level.
+static int libopenh264_to_ffmpeg_log_level(int libopenh264_log_level)
+{
+    if      (libopenh264_log_level >= WELS_LOG_DETAIL)  return AV_LOG_TRACE;
+    else if (libopenh264_log_level >= WELS_LOG_DEBUG)   return AV_LOG_DEBUG;
+    else if (libopenh264_log_level >= WELS_LOG_INFO)    return AV_LOG_VERBOSE;
+    else if (libopenh264_log_level >= WELS_LOG_WARNING) return AV_LOG_WARNING;
+    else if (libopenh264_log_level >= WELS_LOG_ERROR)   return AV_LOG_ERROR;
+    else                                                return AV_LOG_QUIET;
+}
+
+// This function will be provided to the libopenh264 library.  The function will be called
+// when libopenh264 wants to log a message (error, warning, info, etc.).  The signature for
+// this function (defined in .../codec/api/svc/codec_api.h) is:
+//
+//        typedef void (*WelsTraceCallback) (void* ctx, int level, const char* string);
+
+static void libopenh264_trace_callback(void *ctx, int level, char const *msg)
+{
+    // The message will be logged only if the requested EQUIVALENT ffmpeg log level is
+    // less than or equal to the current ffmpeg log level.
+    int equiv_ffmpeg_log_level = libopenh264_to_ffmpeg_log_level(level);
+    av_log(ctx, equiv_ffmpeg_log_level, "%s\n", msg);
+}
+
 static av_cold int svc_encode_close(AVCodecContext *avctx)
 {
     SVCContext *s = avctx->priv_data;
 
     if (s->encoder)
         WelsDestroySVCEncoder(s->encoder);
+    if (s->skipped > 0)
+        av_log(avctx, AV_LOG_WARNING, "%d frames skipped\n", s->skipped);
     return 0;
 }
 
@@ -73,6 +109,9 @@ static av_cold int svc_encode_init(AVCodecContext *avctx)
     SVCContext *s = avctx->priv_data;
     SEncParamExt param = { 0 };
     int err = AVERROR_UNKNOWN;
+    int log_level;
+    WelsTraceCallback callback_function;
+    AVCPBProperties *props;
 
     // Mingw GCC < 4.7 on x86_32 uses an incorrect/buggy ABI for the WelsGetCodecVersion
     // function (for functions returning larger structs), thus skip the check in those
@@ -90,9 +129,27 @@ static av_cold int svc_encode_init(AVCodecContext *avctx)
         return AVERROR_UNKNOWN;
     }
 
+    // Pass all libopenh264 messages to our callback, to allow ourselves to filter them.
+    log_level = WELS_LOG_DETAIL;
+    (*s->encoder)->SetOption(s->encoder, ENCODER_OPTION_TRACE_LEVEL, &log_level);
+
+    // Set the logging callback function to one that uses av_log() (see implementation above).
+    callback_function = (WelsTraceCallback) libopenh264_trace_callback;
+    (*s->encoder)->SetOption(s->encoder, ENCODER_OPTION_TRACE_CALLBACK, (void *)&callback_function);
+
+    // Set the AVCodecContext as the libopenh264 callback context so that it can be passed to av_log().
+    (*s->encoder)->SetOption(s->encoder, ENCODER_OPTION_TRACE_CALLBACK_CONTEXT, (void *)&avctx);
+
     (*s->encoder)->GetDefaultParams(s->encoder, &param);
 
-    param.fMaxFrameRate              = avctx->time_base.den / avctx->time_base.num;
+#if FF_API_CODER_TYPE
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (!s->cabac)
+        s->cabac = avctx->coder_type == FF_CODER_TYPE_AC;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    param.fMaxFrameRate              = 1/av_q2d(avctx->time_base);
     param.iPicWidth                  = avctx->width;
     param.iPicHeight                 = avctx->height;
     param.iTargetBitrate             = avctx->bit_rate;
@@ -103,7 +160,7 @@ static av_cold int svc_encode_init(AVCodecContext *avctx)
     param.bEnableDenoise             = 0;
     param.bEnableBackgroundDetection = 1;
     param.bEnableAdaptiveQuant       = 1;
-    param.bEnableFrameSkip           = 0;
+    param.bEnableFrameSkip           = s->skip_frames;
     param.bEnableLongTermReference   = 0;
     param.iLtrMarkPeriod             = 30;
     param.uiIntraPeriod              = avctx->gop_size;
@@ -118,7 +175,7 @@ static av_cold int svc_encode_init(AVCodecContext *avctx)
     param.iMultipleThreadIdc         = avctx->thread_count;
     if (s->profile && !strcmp(s->profile, "main"))
         param.iEntropyCodingModeFlag = 1;
-    else if (!s->profile && avctx->coder_type == FF_CODER_TYPE_AC)
+    else if (!s->profile && s->cabac)
         param.iEntropyCodingModeFlag = 1;
 
     param.sSpatialLayers[0].iVideoWidth         = param.iPicWidth;
@@ -127,23 +184,43 @@ static av_cold int svc_encode_init(AVCodecContext *avctx)
     param.sSpatialLayers[0].iSpatialBitrate     = param.iTargetBitrate;
     param.sSpatialLayers[0].iMaxSpatialBitrate  = param.iMaxBitrate;
 
+    if ((avctx->slices > 1) && (s->max_nal_size)){
+        av_log(avctx,AV_LOG_ERROR,"Invalid combination -slices %d and -max_nal_size %d.\n",avctx->slices,s->max_nal_size);
+        goto fail;
+    }
+
     if (avctx->slices > 1)
         s->slice_mode = SM_FIXEDSLCNUM_SLICE;
+
+    if (s->max_nal_size)
+        s->slice_mode = SM_DYN_SLICE;
+
     param.sSpatialLayers[0].sSliceCfg.uiSliceMode               = s->slice_mode;
     param.sSpatialLayers[0].sSliceCfg.sSliceArgument.uiSliceNum = avctx->slices;
 
+    if (s->slice_mode == SM_DYN_SLICE) {
+        if (s->max_nal_size){
+            param.uiMaxNalSize = s->max_nal_size;
+            param.sSpatialLayers[0].sSliceCfg.sSliceArgument.uiSliceSizeConstraint = s->max_nal_size;
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Invalid -max_nal_size, "
+                   "specify a valid max_nal_size to use -slice_mode dyn\n");
+            goto fail;
+        }
+    }
+
     if ((*s->encoder)->InitializeExt(s->encoder, &param) != cmResultSuccess) {
         av_log(avctx, AV_LOG_ERROR, "Initialize failed\n");
         goto fail;
     }
 
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
         SFrameBSInfo fbi = { 0 };
         int i, size = 0;
         (*s->encoder)->EncodeParameterSets(s->encoder, &fbi);
         for (i = 0; i < fbi.sLayerInfo[0].iNalCount; i++)
             size += fbi.sLayerInfo[0].pNalLengthInByte[i];
-        avctx->extradata = av_mallocz(size + FF_INPUT_BUFFER_PADDING_SIZE);
+        avctx->extradata = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!avctx->extradata) {
             err = AVERROR(ENOMEM);
             goto fail;
@@ -152,6 +229,14 @@ static av_cold int svc_encode_init(AVCodecContext *avctx)
         memcpy(avctx->extradata, fbi.sLayerInfo[0].pBsBuf, size);
     }
 
+    props = ff_add_cpb_side_data(avctx);
+    if (!props) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+    props->max_bitrate = param.iMaxBitrate;
+    props->avg_bitrate = param.iTargetBitrate;
+
     return 0;
 
 fail:
@@ -184,6 +269,7 @@ static int svc_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         return AVERROR_UNKNOWN;
     }
     if (fbi.eFrameType == videoFrameTypeSkip) {
+        s->skipped++;
         av_log(avctx, AV_LOG_DEBUG, "frame skipped\n");
         return 0;
     }
@@ -192,7 +278,7 @@ static int svc_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     // frames have two layers, where the first layer contains the SPS/PPS.
     // If using global headers, don't include the SPS/PPS in the returned
     // packet - thus, only return one layer.
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER)
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)
         first_layer = fbi.iLayerNum - 1;
 
     for (layer = first_layer; layer < fbi.iLayerNum; layer++) {
@@ -202,7 +288,7 @@ static int svc_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     }
     av_log(avctx, AV_LOG_DEBUG, "%d slices\n", fbi.sLayerInfo[fbi.iLayerNum - 1].iNalCount);
 
-    if ((ret = ff_alloc_packet(avpkt, size))) {
+    if ((ret = ff_alloc_packet2(avctx, avpkt, size, size))) {
         av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
         return ret;
     }
@@ -227,7 +313,7 @@ AVCodec ff_libopenh264_encoder = {
     .init           = svc_encode_init,
     .encode2        = svc_encode_frame,
     .close          = svc_encode_close,
-    .capabilities   = CODEC_CAP_AUTO_THREADS,
+    .capabilities   = AV_CODEC_CAP_AUTO_THREADS,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P,
                                                     AV_PIX_FMT_NONE },
     .priv_class     = &class,
diff --git a/libavcodec/libopenjpegdec.c b/libavcodec/libopenjpegdec.c
index 7f28e87d..cae3d209 100644
--- a/libavcodec/libopenjpegdec.c
+++ b/libavcodec/libopenjpegdec.c
@@ -36,10 +36,22 @@
 #include "internal.h"
 #include "thread.h"
 
-#if HAVE_OPENJPEG_1_5_OPENJPEG_H
-# include <openjpeg-1.5/openjpeg.h>
+#if HAVE_OPENJPEG_2_1_OPENJPEG_H
+#  include <openjpeg-2.1/openjpeg.h>
+#elif HAVE_OPENJPEG_2_0_OPENJPEG_H
+#  include <openjpeg-2.0/openjpeg.h>
+#elif HAVE_OPENJPEG_1_5_OPENJPEG_H
+#  include <openjpeg-1.5/openjpeg.h>
 #else
-# include <openjpeg.h>
+#  include <openjpeg.h>
+#endif
+
+#if HAVE_OPENJPEG_2_1_OPENJPEG_H || HAVE_OPENJPEG_2_0_OPENJPEG_H
+#  define OPENJPEG_MAJOR_VERSION 2
+#  define OPJ(x) OPJ_##x
+#else
+#  define OPENJPEG_MAJOR_VERSION 1
+#  define OPJ(x) x
 #endif
 
 #define JP2_SIG_TYPE    0x6A502020
@@ -83,7 +95,9 @@ static const enum AVPixelFormat libopenjpeg_all_pix_fmts[]  = {
 typedef struct LibOpenJPEGContext {
     AVClass *class;
     opj_dparameters_t dec_params;
+#if OPENJPEG_MAJOR_VERSION == 1
     opj_event_mgr_t event_mgr;
+#endif // OPENJPEG_MAJOR_VERSION == 1
     int lowqual;
 } LibOpenJPEGContext;
 
@@ -102,6 +116,62 @@ static void info_callback(const char *msg, void *data)
     av_log(data, AV_LOG_DEBUG, "%s", msg);
 }
 
+#if OPENJPEG_MAJOR_VERSION == 2
+typedef struct BufferReader {
+    int pos;
+    int size;
+    const uint8_t *buffer;
+} BufferReader;
+
+static OPJ_SIZE_T stream_read(void *out_buffer, OPJ_SIZE_T nb_bytes, void *user_data)
+{
+    BufferReader *reader = user_data;
+    if (reader->pos == reader->size) {
+        return (OPJ_SIZE_T)-1;
+    }
+    int remaining = reader->size - reader->pos;
+    if (nb_bytes > remaining) {
+        nb_bytes = remaining;
+    }
+    memcpy(out_buffer, reader->buffer + reader->pos, nb_bytes);
+    reader->pos += (int)nb_bytes;
+    return nb_bytes;
+}
+
+static OPJ_OFF_T stream_skip(OPJ_OFF_T nb_bytes, void *user_data)
+{
+    BufferReader *reader = user_data;
+    if (nb_bytes < 0) {
+        if (reader->pos == 0) {
+            return (OPJ_SIZE_T)-1;
+        }
+        if (nb_bytes + reader->pos < 0) {
+            nb_bytes = -reader->pos;
+        }
+    } else {
+        if (reader->pos == reader->size) {
+            return (OPJ_SIZE_T)-1;
+        }
+        int remaining = reader->size - reader->pos;
+        if (nb_bytes > remaining) {
+            nb_bytes = remaining;
+        }
+    }
+    reader->pos += (int)nb_bytes;
+    return nb_bytes;
+}
+
+static OPJ_BOOL stream_seek(OPJ_OFF_T nb_bytes, void *user_data)
+{
+    BufferReader *reader = user_data;
+    if (nb_bytes < 0 || nb_bytes > reader->size) {
+        return OPJ_FALSE;
+    }
+    reader->pos = (int)nb_bytes;
+    return OPJ_TRUE;
+}
+#endif // OPENJPEG_MAJOR_VERSION == 2
+
 static inline int libopenjpeg_matches_pix_fmt(const opj_image_t *image, enum AVPixelFormat pix_fmt)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
@@ -114,22 +184,22 @@ static inline int libopenjpeg_matches_pix_fmt(const opj_image_t *image, enum AVP
     switch (desc->nb_components) {
     case 4:
         match = match &&
-                desc->comp[3].depth_minus1 + 1 >= image->comps[3].prec &&
+                desc->comp[3].depth >= image->comps[3].prec &&
                 1 == image->comps[3].dx &&
                 1 == image->comps[3].dy;
     case 3:
         match = match &&
-                desc->comp[2].depth_minus1 + 1 >= image->comps[2].prec &&
+                desc->comp[2].depth >= image->comps[2].prec &&
                 1 << desc->log2_chroma_w == image->comps[2].dx &&
                 1 << desc->log2_chroma_h == image->comps[2].dy;
     case 2:
         match = match &&
-                desc->comp[1].depth_minus1 + 1 >= image->comps[1].prec &&
+                desc->comp[1].depth >= image->comps[1].prec &&
                 1 << desc->log2_chroma_w == image->comps[1].dx &&
                 1 << desc->log2_chroma_h == image->comps[1].dy;
     case 1:
         match = match &&
-                desc->comp[0].depth_minus1 + 1 >= image->comps[0].prec &&
+                desc->comp[0].depth >= image->comps[0].prec &&
                 1 == image->comps[0].dx &&
                 1 == image->comps[0].dy;
     default:
@@ -145,15 +215,15 @@ static inline enum AVPixelFormat libopenjpeg_guess_pix_fmt(const opj_image_t *im
     int possible_fmts_nb = 0;
 
     switch (image->color_space) {
-    case CLRSPC_SRGB:
+    case OPJ(CLRSPC_SRGB):
         possible_fmts    = libopenjpeg_rgb_pix_fmts;
         possible_fmts_nb = FF_ARRAY_ELEMS(libopenjpeg_rgb_pix_fmts);
         break;
-    case CLRSPC_GRAY:
+    case OPJ(CLRSPC_GRAY):
         possible_fmts    = libopenjpeg_gray_pix_fmts;
         possible_fmts_nb = FF_ARRAY_ELEMS(libopenjpeg_gray_pix_fmts);
         break;
-    case CLRSPC_SYCC:
+    case OPJ(CLRSPC_SYCC):
         possible_fmts    = libopenjpeg_yuv_pix_fmts;
         possible_fmts_nb = FF_ARRAY_ELEMS(libopenjpeg_yuv_pix_fmts);
         break;
@@ -204,7 +274,7 @@ static inline void libopenjpeg_copy_to_packed16(AVFrame *picture, opj_image_t *i
     int index, x, y, c;
     int adjust[4];
     for (x = 0; x < image->numcomps; x++)
-        adjust[x] = FFMAX(FFMIN(desc->comp[x].depth_minus1 + 1 - image->comps[x].prec, 8), 0) + desc->comp[x].shift;
+        adjust[x] = FFMAX(FFMIN(desc->comp[x].depth - image->comps[x].prec, 8), 0) + desc->comp[x].shift;
 
     for (y = 0; y < picture->height; y++) {
         index   = y * picture->width;
@@ -241,7 +311,7 @@ static inline void libopenjpeg_copyto16(AVFrame *picture, opj_image_t *image) {
     int index, x, y;
     int adjust[4];
     for (x = 0; x < image->numcomps; x++)
-        adjust[x] = FFMAX(FFMIN(desc->comp[x].depth_minus1 + 1 - image->comps[x].prec, 8), 0) + desc->comp[x].shift;
+        adjust[x] = FFMAX(FFMIN(desc->comp[x].depth - image->comps[x].prec, 8), 0) + desc->comp[x].shift;
 
     for (index = 0; index < image->numcomps; index++) {
         comp_data = image->comps[index].data;
@@ -275,13 +345,19 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
     ThreadFrame frame       = { .f = data };
     AVFrame *picture        = data;
     const AVPixFmtDescriptor *desc;
-    opj_dinfo_t *dec;
-    opj_cio_t *stream;
-    opj_image_t *image;
     int width, height, ret;
     int pixel_size = 0;
     int ispacked   = 0;
     int i;
+    opj_image_t *image = NULL;
+#if OPENJPEG_MAJOR_VERSION == 1
+    opj_dinfo_t *dec = NULL;
+    opj_cio_t *stream = NULL;
+#else // OPENJPEG_MAJOR_VERSION == 2
+    BufferReader reader = {0, avpkt->size, avpkt->data};
+    opj_codec_t *dec = NULL;
+    opj_stream_t *stream = NULL;
+#endif // OPENJPEG_MAJOR_VERSION == 1
 
     *got_frame = 0;
 
@@ -289,19 +365,22 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
     if ((AV_RB32(buf) == 12) &&
         (AV_RB32(buf + 4) == JP2_SIG_TYPE) &&
         (AV_RB32(buf + 8) == JP2_SIG_VALUE)) {
-        dec = opj_create_decompress(CODEC_JP2);
+        dec = opj_create_decompress(OPJ(CODEC_JP2));
     } else {
         /* If the AVPacket contains a jp2c box, then skip to
          * the starting byte of the codestream. */
         if (AV_RB32(buf + 4) == AV_RB32("jp2c"))
             buf += 8;
-        dec = opj_create_decompress(CODEC_J2K);
+        dec = opj_create_decompress(OPJ(CODEC_J2K));
     }
 
     if (!dec) {
         av_log(avctx, AV_LOG_ERROR, "Error initializing decoder.\n");
-        return AVERROR_UNKNOWN;
+        ret = AVERROR_EXTERNAL;
+        goto done;
     }
+
+#if OPENJPEG_MAJOR_VERSION == 1
     memset(&ctx->event_mgr, 0, sizeof(ctx->event_mgr));
     ctx->event_mgr.info_handler    = info_callback;
     ctx->event_mgr.error_handler   = error_callback;
@@ -309,25 +388,61 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
     opj_set_event_mgr((opj_common_ptr) dec, &ctx->event_mgr, avctx);
     ctx->dec_params.cp_limit_decoding = LIMIT_TO_MAIN_HEADER;
     ctx->dec_params.cp_layer          = ctx->lowqual;
+#else // OPENJPEG_MAJOR_VERSION == 2
+    if (!opj_set_error_handler(dec, error_callback, avctx) ||
+        !opj_set_warning_handler(dec, warning_callback, avctx) ||
+        !opj_set_info_handler(dec, info_callback, avctx)) {
+        av_log(avctx, AV_LOG_ERROR, "Error setting decoder handlers.\n");
+        ret = AVERROR_EXTERNAL;
+        goto done;
+    }
+
+    ctx->dec_params.cp_layer = ctx->lowqual;
+    ctx->dec_params.cp_reduce = avctx->lowres;
+#endif // OPENJPEG_MAJOR_VERSION == 1
+
     // Tie decoder with decoding parameters
     opj_setup_decoder(dec, &ctx->dec_params);
+
+#if OPENJPEG_MAJOR_VERSION == 1
     stream = opj_cio_open((opj_common_ptr) dec, buf, buf_size);
+#else // OPENJPEG_MAJOR_VERSION == 2
+    stream = opj_stream_default_create(OPJ_STREAM_READ);
+#endif // OPENJPEG_MAJOR_VERSION == 1
 
     if (!stream) {
         av_log(avctx, AV_LOG_ERROR,
                "Codestream could not be opened for reading.\n");
-        opj_destroy_decompress(dec);
-        return AVERROR_UNKNOWN;
+        ret = AVERROR_EXTERNAL;
+        goto done;
     }
 
+#if OPENJPEG_MAJOR_VERSION == 1
     // Decode the header only.
     image = opj_decode_with_info(dec, stream, NULL);
     opj_cio_close(stream);
+    stream = NULL;
+    ret = !image;
+#else // OPENJPEG_MAJOR_VERSION == 2
+    opj_stream_set_read_function(stream, stream_read);
+    opj_stream_set_skip_function(stream, stream_skip);
+    opj_stream_set_seek_function(stream, stream_seek);
+#if HAVE_OPENJPEG_2_1_OPENJPEG_H
+    opj_stream_set_user_data(stream, &reader, NULL);
+#elif HAVE_OPENJPEG_2_0_OPENJPEG_H
+    opj_stream_set_user_data(stream, &reader);
+#else
+#error Missing call to opj_stream_set_user_data
+#endif
+    opj_stream_set_user_data_length(stream, avpkt->size);
+    // Decode the header only.
+    ret = !opj_read_header(stream, dec, &image);
+#endif // OPENJPEG_MAJOR_VERSION == 1
 
-    if (!image) {
-        av_log(avctx, AV_LOG_ERROR, "Error decoding codestream.\n");
-        opj_destroy_decompress(dec);
-        return AVERROR_UNKNOWN;
+    if (ret) {
+        av_log(avctx, AV_LOG_ERROR, "Error decoding codestream header.\n");
+        ret = AVERROR_EXTERNAL;
+        goto done;
     }
 
     width  = image->x1 - image->x0;
@@ -345,7 +460,8 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
         avctx->pix_fmt = libopenjpeg_guess_pix_fmt(image);
 
     if (avctx->pix_fmt == AV_PIX_FMT_NONE) {
-        av_log(avctx, AV_LOG_ERROR, "Unable to determine pixel format\n");
+        av_log(avctx, AV_LOG_ERROR, "Unable to determine pixel format.\n");
+        ret = AVERROR_UNKNOWN;
         goto done;
     }
     for (i = 0; i < image->numcomps; i++)
@@ -355,6 +471,7 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
     if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
         goto done;
 
+#if OPENJPEG_MAJOR_VERSION == 1
     ctx->dec_params.cp_limit_decoding = NO_LIMITATION;
     ctx->dec_params.cp_reduce = avctx->lowres;
     // Tie decoder with decoding parameters.
@@ -363,18 +480,20 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
     if (!stream) {
         av_log(avctx, AV_LOG_ERROR,
                "Codestream could not be opened for reading.\n");
-        ret = AVERROR_UNKNOWN;
+        ret = AVERROR_EXTERNAL;
         goto done;
     }
-
     opj_image_destroy(image);
     // Decode the codestream
     image = opj_decode_with_info(dec, stream, NULL);
-    opj_cio_close(stream);
+    ret = !image;
+#else // OPENJPEG_MAJOR_VERSION == 2
+    ret = !opj_decode(dec, stream, image);
+#endif // OPENJPEG_MAJOR_VERSION == 1
 
-    if (!image) {
+    if (ret) {
         av_log(avctx, AV_LOG_ERROR, "Error decoding codestream.\n");
-        ret = AVERROR_UNKNOWN;
+        ret = AVERROR_EXTERNAL;
         goto done;
     }
 
@@ -388,7 +507,7 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
     }
 
     desc       = av_pix_fmt_desc_get(avctx->pix_fmt);
-    pixel_size = desc->comp[0].step_minus1 + 1;
+    pixel_size = desc->comp[0].step;
     ispacked   = libopenjpeg_ispacked(avctx->pix_fmt);
 
     switch (pixel_size) {
@@ -429,7 +548,13 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
 
 done:
     opj_image_destroy(image);
+#if OPENJPEG_MAJOR_VERSION == 2
+    opj_stream_destroy(stream);
+    opj_destroy_codec(dec);
+#else
+    opj_cio_close(stream);
     opj_destroy_decompress(dec);
+#endif
     return ret;
 }
 
@@ -439,7 +564,7 @@ static av_cold void libopenjpeg_static_init(AVCodec *codec)
     int major, minor;
 
     if (sscanf(version, "%d.%d", &major, &minor) == 2 && 1000*major + minor <= 1003)
-        codec->capabilities |= CODEC_CAP_EXPERIMENTAL;
+        codec->capabilities |= AV_CODEC_CAP_EXPERIMENTAL;
 }
 
 #define OFFSET(x) offsetof(LibOpenJPEGContext, x)
@@ -466,7 +591,7 @@ AVCodec ff_libopenjpeg_decoder = {
     .priv_data_size = sizeof(LibOpenJPEGContext),
     .init           = libopenjpeg_decode_init,
     .decode         = libopenjpeg_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .max_lowres     = 31,
     .priv_class     = &openjpeg_class,
     .init_static_data = libopenjpeg_static_init,
diff --git a/libavcodec/libopenjpegenc.c b/libavcodec/libopenjpegenc.c
index 014c2f9d..b67c320b 100644
--- a/libavcodec/libopenjpegenc.c
+++ b/libavcodec/libopenjpegenc.c
@@ -34,17 +34,31 @@
 #include "avcodec.h"
 #include "internal.h"
 
-#if HAVE_OPENJPEG_1_5_OPENJPEG_H
-# include <openjpeg-1.5/openjpeg.h>
+#if HAVE_OPENJPEG_2_1_OPENJPEG_H
+#  include <openjpeg-2.1/openjpeg.h>
+#elif HAVE_OPENJPEG_2_0_OPENJPEG_H
+#  include <openjpeg-2.0/openjpeg.h>
+#elif HAVE_OPENJPEG_1_5_OPENJPEG_H
+#  include <openjpeg-1.5/openjpeg.h>
 #else
-# include <openjpeg.h>
+#  include <openjpeg.h>
+#endif
+
+#if HAVE_OPENJPEG_2_1_OPENJPEG_H || HAVE_OPENJPEG_2_0_OPENJPEG_H
+#  define OPENJPEG_MAJOR_VERSION 2
+#  define OPJ(x) OPJ_##x
+#else
+#  define OPENJPEG_MAJOR_VERSION 1
+#  define OPJ(x) x
 #endif
 
 typedef struct LibOpenJPEGContext {
     AVClass *avclass;
     opj_image_t *image;
     opj_cparameters_t enc_params;
+#if OPENJPEG_MAJOR_VERSION == 1
     opj_event_mgr_t event_mgr;
+#endif // OPENJPEG_MAJOR_VERSION == 1
     int format;
     int profile;
     int prog_order;
@@ -71,6 +85,78 @@ static void info_callback(const char *msg, void *data)
     av_log(data, AV_LOG_DEBUG, "%s\n", msg);
 }
 
+#if OPENJPEG_MAJOR_VERSION == 2
+typedef struct PacketWriter {
+    int pos;
+    AVPacket *packet;
+} PacketWriter;
+
+static OPJ_SIZE_T stream_write(void *out_buffer, OPJ_SIZE_T nb_bytes, void *user_data)
+{
+    PacketWriter *writer = user_data;
+    AVPacket *packet = writer->packet;
+    int remaining = packet->size - writer->pos;
+    if (nb_bytes > remaining) {
+        OPJ_SIZE_T needed = nb_bytes - remaining;
+        int max_growth = INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE - packet->size;
+        if (needed > max_growth) {
+            return (OPJ_SIZE_T)-1;
+        }
+        if (av_grow_packet(packet, (int)needed)) {
+            return (OPJ_SIZE_T)-1;
+        }
+    }
+    memcpy(packet->data + writer->pos, out_buffer, nb_bytes);
+    writer->pos += (int)nb_bytes;
+    return nb_bytes;
+}
+
+static OPJ_OFF_T stream_skip(OPJ_OFF_T nb_bytes, void *user_data)
+{
+    PacketWriter *writer = user_data;
+    AVPacket *packet = writer->packet;
+    if (nb_bytes < 0) {
+        if (writer->pos == 0) {
+            return (OPJ_SIZE_T)-1;
+        }
+        if (nb_bytes + writer->pos < 0) {
+            nb_bytes = -writer->pos;
+        }
+    } else {
+        int remaining = packet->size - writer->pos;
+        if (nb_bytes > remaining) {
+            OPJ_SIZE_T needed = nb_bytes - remaining;
+            int max_growth = INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE - packet->size;
+            if (needed > max_growth) {
+                return (OPJ_SIZE_T)-1;
+            }
+            if (av_grow_packet(packet, (int)needed)) {
+                return (OPJ_SIZE_T)-1;
+            }
+        }
+    }
+    writer->pos += (int)nb_bytes;
+    return nb_bytes;
+}
+
+static OPJ_BOOL stream_seek(OPJ_OFF_T nb_bytes, void *user_data)
+{
+    PacketWriter *writer = user_data;
+    AVPacket *packet = writer->packet;
+    if (nb_bytes < 0) {
+        return OPJ_FALSE;
+    }
+    if (nb_bytes > packet->size) {
+        if (nb_bytes > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE ||
+            av_grow_packet(packet, (int)nb_bytes - packet->size)) {
+            return OPJ_FALSE;
+        }
+    }
+    writer->pos = (int)nb_bytes;
+    return OPJ_TRUE;
+}
+#endif // OPENJPEG_MAJOR_VERSION == 2
+
 static void cinema_parameters(opj_cparameters_t *p)
 {
     p->tile_size_on = 0;
@@ -93,7 +179,7 @@ static void cinema_parameters(opj_cparameters_t *p)
     p->csty |= 0x01;
 
     /* The progression order shall be CPRL */
-    p->prog_order = CPRL;
+    p->prog_order = OPJ(CPRL);
 
     /* No ROI */
     p->roi_compno = -1;
@@ -117,7 +203,7 @@ static opj_image_t *mj2_create_image(AVCodecContext *avctx, opj_cparameters_t *p
     int sub_dx[4];
     int sub_dy[4];
     int numcomps;
-    OPJ_COLOR_SPACE color_space = CLRSPC_UNKNOWN;
+    OPJ_COLOR_SPACE color_space = OPJ(CLRSPC_UNKNOWN);
 
     sub_dx[0] = sub_dx[3] = 1;
     sub_dy[0] = sub_dy[3] = 1;
@@ -131,7 +217,7 @@ static opj_image_t *mj2_create_image(AVCodecContext *avctx, opj_cparameters_t *p
     case AV_PIX_FMT_YA8:
     case AV_PIX_FMT_GRAY16:
     case AV_PIX_FMT_YA16:
-        color_space = CLRSPC_GRAY;
+        color_space = OPJ(CLRSPC_GRAY);
         break;
     case AV_PIX_FMT_RGB24:
     case AV_PIX_FMT_RGBA:
@@ -144,7 +230,7 @@ static opj_image_t *mj2_create_image(AVCodecContext *avctx, opj_cparameters_t *p
     case AV_PIX_FMT_GBRP14:
     case AV_PIX_FMT_GBRP16:
     case AV_PIX_FMT_XYZ12:
-        color_space = CLRSPC_SRGB;
+        color_space = OPJ(CLRSPC_SRGB);
         break;
     case AV_PIX_FMT_YUV410P:
     case AV_PIX_FMT_YUV411P:
@@ -179,7 +265,7 @@ static opj_image_t *mj2_create_image(AVCodecContext *avctx, opj_cparameters_t *p
     case AV_PIX_FMT_YUVA420P16:
     case AV_PIX_FMT_YUVA422P16:
     case AV_PIX_FMT_YUVA444P16:
-        color_space = CLRSPC_SYCC;
+        color_space = OPJ(CLRSPC_SYCC);
         break;
     default:
         av_log(avctx, AV_LOG_ERROR,
@@ -189,8 +275,8 @@ static opj_image_t *mj2_create_image(AVCodecContext *avctx, opj_cparameters_t *p
     }
 
     for (i = 0; i < numcomps; i++) {
-        cmptparm[i].prec = desc->comp[i].depth_minus1 + 1;
-        cmptparm[i].bpp  = desc->comp[i].depth_minus1 + 1;
+        cmptparm[i].prec = desc->comp[i].depth;
+        cmptparm[i].bpp  = desc->comp[i].depth;
         cmptparm[i].sgnd = 0;
         cmptparm[i].dx = sub_dx[i];
         cmptparm[i].dy = sub_dy[i];
@@ -216,13 +302,57 @@ static opj_image_t *mj2_create_image(AVCodecContext *avctx, opj_cparameters_t *p
 static av_cold int libopenjpeg_encode_init(AVCodecContext *avctx)
 {
     LibOpenJPEGContext *ctx = avctx->priv_data;
-    int err = AVERROR(ENOMEM);
+    int err = 0;
 
     opj_set_default_encoder_parameters(&ctx->enc_params);
 
+#if HAVE_OPENJPEG_2_1_OPENJPEG_H
+    switch (ctx->cinema_mode) {
+    case OPJ_CINEMA2K_24:
+        ctx->enc_params.rsiz = OPJ_PROFILE_CINEMA_2K;
+        ctx->enc_params.max_cs_size = OPJ_CINEMA_24_CS;
+        ctx->enc_params.max_comp_size = OPJ_CINEMA_24_COMP;
+        break;
+    case OPJ_CINEMA2K_48:
+        ctx->enc_params.rsiz = OPJ_PROFILE_CINEMA_2K;
+        ctx->enc_params.max_cs_size = OPJ_CINEMA_48_CS;
+        ctx->enc_params.max_comp_size = OPJ_CINEMA_48_COMP;
+        break;
+    case OPJ_CINEMA4K_24:
+        ctx->enc_params.rsiz = OPJ_PROFILE_CINEMA_4K;
+        ctx->enc_params.max_cs_size = OPJ_CINEMA_24_CS;
+        ctx->enc_params.max_comp_size = OPJ_CINEMA_24_COMP;
+        break;
+    }
+
+    switch (ctx->profile) {
+    case OPJ_CINEMA2K:
+        if (ctx->enc_params.rsiz == OPJ_PROFILE_CINEMA_4K) {
+            err = AVERROR(EINVAL);
+            break;
+        }
+        ctx->enc_params.rsiz = OPJ_PROFILE_CINEMA_2K;
+        break;
+    case OPJ_CINEMA4K:
+        if (ctx->enc_params.rsiz == OPJ_PROFILE_CINEMA_2K) {
+            err = AVERROR(EINVAL);
+            break;
+        }
+        ctx->enc_params.rsiz = OPJ_PROFILE_CINEMA_4K;
+        break;
+    }
+
+    if (err) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Invalid parameter pairing: cinema_mode and profile conflict.\n");
+        goto fail;
+    }
+#else
     ctx->enc_params.cp_rsiz = ctx->profile;
-    ctx->enc_params.mode = !!avctx->global_quality;
     ctx->enc_params.cp_cinema = ctx->cinema_mode;
+#endif
+
+    ctx->enc_params.mode = !!avctx->global_quality;
     ctx->enc_params.prog_order = ctx->prog_order;
     ctx->enc_params.numresolution = ctx->numresolution;
     ctx->enc_params.cp_disto_alloc = ctx->disto_alloc;
@@ -242,18 +372,11 @@ static av_cold int libopenjpeg_encode_init(AVCodecContext *avctx)
         goto fail;
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame) {
-        av_log(avctx, AV_LOG_ERROR, "Error allocating coded frame\n");
-        goto fail;
-    }
-
     return 0;
 
 fail:
     opj_image_destroy(ctx->image);
     ctx->image = NULL;
-    av_frame_free(&avctx->coded_frame);
     return err;
 }
 
@@ -465,10 +588,16 @@ static int libopenjpeg_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 {
     LibOpenJPEGContext *ctx = avctx->priv_data;
     opj_image_t *image      = ctx->image;
+#if OPENJPEG_MAJOR_VERSION == 1
     opj_cinfo_t *compress   = NULL;
     opj_cio_t *stream       = NULL;
+    int len;
+#else // OPENJPEG_MAJOR_VERSION == 2
+    opj_codec_t *compress   = NULL;
+    opj_stream_t *stream    = NULL;
+#endif // OPENJPEG_MAJOR_VERSION == 1
     int cpyresult = 0;
-    int ret, len;
+    int ret;
     AVFrame *gbrframe;
 
     switch (avctx->pix_fmt) {
@@ -560,46 +689,99 @@ static int libopenjpeg_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         return -1;
     }
 
+#if OPENJPEG_MAJOR_VERSION == 2
+    if ((ret = ff_alloc_packet2(avctx, pkt, 1024, 0)) < 0) {
+        return ret;
+    }
+#endif // OPENJPEG_MAJOR_VERSION == 2
+
     compress = opj_create_compress(ctx->format);
     if (!compress) {
         av_log(avctx, AV_LOG_ERROR, "Error creating the compressor\n");
-        return AVERROR(ENOMEM);
+        ret = AVERROR(ENOMEM);
+        goto done;
     }
 
+#if OPENJPEG_MAJOR_VERSION == 1
     opj_setup_encoder(compress, &ctx->enc_params, image);
-
     stream = opj_cio_open((opj_common_ptr) compress, NULL, 0);
+#else // OPENJPEG_MAJOR_VERSION == 2
+    if (!opj_set_error_handler(compress, error_callback, avctx) ||
+        !opj_set_warning_handler(compress, warning_callback, avctx) ||
+        !opj_set_info_handler(compress, info_callback, avctx)) {
+        av_log(avctx, AV_LOG_ERROR, "Error setting the compressor handlers\n");
+        ret = AVERROR_EXTERNAL;
+        goto done;
+    }
+
+    if (!opj_setup_encoder(compress, &ctx->enc_params, image)) {
+        av_log(avctx, AV_LOG_ERROR, "Error setting up the compressor\n");
+        ret = AVERROR_EXTERNAL;
+        goto done;
+    }
+    stream = opj_stream_default_create(OPJ_STREAM_WRITE);
+#endif // OPENJPEG_MAJOR_VERSION == 1
+
     if (!stream) {
         av_log(avctx, AV_LOG_ERROR, "Error creating the cio stream\n");
-        return AVERROR(ENOMEM);
+        ret = AVERROR(ENOMEM);
+        goto done;
     }
-
+#if OPENJPEG_MAJOR_VERSION == 1
     memset(&ctx->event_mgr, 0, sizeof(ctx->event_mgr));
     ctx->event_mgr.info_handler    = info_callback;
     ctx->event_mgr.error_handler   = error_callback;
     ctx->event_mgr.warning_handler = warning_callback;
     opj_set_event_mgr((opj_common_ptr) compress, &ctx->event_mgr, avctx);
-
     if (!opj_encode(compress, stream, image, NULL)) {
         av_log(avctx, AV_LOG_ERROR, "Error during the opj encode\n");
-        return -1;
+        ret = AVERROR_EXTERNAL;
+        goto done;
     }
 
     len = cio_tell(stream);
-    if ((ret = ff_alloc_packet2(avctx, pkt, len)) < 0) {
-        return ret;
+    if ((ret = ff_alloc_packet2(avctx, pkt, len, 0)) < 0) {
+        goto done;
     }
 
     memcpy(pkt->data, stream->buffer, len);
+#else // OPENJPEG_MAJOR_VERSION == 2
+    PacketWriter writer = {0, pkt};
+    opj_stream_set_write_function(stream, stream_write);
+    opj_stream_set_skip_function(stream, stream_skip);
+    opj_stream_set_seek_function(stream, stream_seek);
+#if HAVE_OPENJPEG_2_1_OPENJPEG_H
+    opj_stream_set_user_data(stream, &writer, NULL);
+#elif HAVE_OPENJPEG_2_0_OPENJPEG_H
+    opj_stream_set_user_data(stream, &writer);
+#else
+#error Missing call to opj_stream_set_user_data
+#endif
+
+    if (!opj_start_compress(compress, ctx->image, stream) ||
+        !opj_encode(compress, stream) ||
+        !opj_end_compress(compress, stream)) {
+        av_log(avctx, AV_LOG_ERROR, "Error during the opj encode\n");
+        ret = AVERROR_EXTERNAL;
+        goto done;
+    }
+
+    av_shrink_packet(pkt, writer.pos);
+#endif // OPENJPEG_MAJOR_VERSION == 1
+
     pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
+    ret = 0;
 
+done:
+#if OPENJPEG_MAJOR_VERSION == 2
+    opj_stream_destroy(stream);
+    opj_destroy_codec(compress);
+#else
     opj_cio_close(stream);
-    stream = NULL;
     opj_destroy_compress(compress);
-    compress = NULL;
-
-    return 0;
+#endif
+    return ret;
 }
 
 static av_cold int libopenjpeg_encode_close(AVCodecContext *avctx)
@@ -608,31 +790,30 @@ static av_cold int libopenjpeg_encode_close(AVCodecContext *avctx)
 
     opj_image_destroy(ctx->image);
     ctx->image = NULL;
-    av_frame_free(&avctx->coded_frame);
     return 0;
 }
 
 #define OFFSET(x) offsetof(LibOpenJPEGContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "format",        "Codec Format",      OFFSET(format),        AV_OPT_TYPE_INT,   { .i64 = CODEC_JP2   }, CODEC_J2K, CODEC_JP2,   VE, "format"      },
-    { "j2k",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = CODEC_J2K   }, 0,         0,           VE, "format"      },
-    { "jp2",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = CODEC_JP2   }, 0,         0,           VE, "format"      },
-    { "profile",       NULL,                OFFSET(profile),       AV_OPT_TYPE_INT,   { .i64 = STD_RSIZ    }, STD_RSIZ,  CINEMA4K,    VE, "profile"     },
-    { "jpeg2000",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = STD_RSIZ    }, 0,         0,           VE, "profile"     },
-    { "cinema2k",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = CINEMA2K    }, 0,         0,           VE, "profile"     },
-    { "cinema4k",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = CINEMA4K    }, 0,         0,           VE, "profile"     },
-    { "cinema_mode",   "Digital Cinema",    OFFSET(cinema_mode),   AV_OPT_TYPE_INT,   { .i64 = OFF         }, OFF,       CINEMA4K_24, VE, "cinema_mode" },
-    { "off",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OFF         }, 0,         0,           VE, "cinema_mode" },
-    { "2k_24",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = CINEMA2K_24 }, 0,         0,           VE, "cinema_mode" },
-    { "2k_48",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = CINEMA2K_48 }, 0,         0,           VE, "cinema_mode" },
-    { "4k_24",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = CINEMA4K_24 }, 0,         0,           VE, "cinema_mode" },
-    { "prog_order",    "Progression Order", OFFSET(prog_order),    AV_OPT_TYPE_INT,   { .i64 = LRCP        }, LRCP,      CPRL,        VE, "prog_order"  },
-    { "lrcp",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = LRCP        }, 0,         0,           VE, "prog_order"  },
-    { "rlcp",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = RLCP        }, 0,         0,           VE, "prog_order"  },
-    { "rpcl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = RPCL        }, 0,         0,           VE, "prog_order"  },
-    { "pcrl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = PCRL        }, 0,         0,           VE, "prog_order"  },
-    { "cprl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = CPRL        }, 0,         0,           VE, "prog_order"  },
+    { "format",        "Codec Format",      OFFSET(format),        AV_OPT_TYPE_INT,   { .i64 = OPJ(CODEC_JP2)   }, OPJ(CODEC_J2K), OPJ(CODEC_JP2),   VE, "format"      },
+    { "j2k",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CODEC_J2K)   }, 0,         0,           VE, "format"      },
+    { "jp2",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CODEC_JP2)   }, 0,         0,           VE, "format"      },
+    { "profile",       NULL,                OFFSET(profile),       AV_OPT_TYPE_INT,   { .i64 = OPJ(STD_RSIZ)    }, OPJ(STD_RSIZ),  OPJ(CINEMA4K),    VE, "profile"     },
+    { "jpeg2000",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(STD_RSIZ)    }, 0,         0,           VE, "profile"     },
+    { "cinema2k",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CINEMA2K)    }, 0,         0,           VE, "profile"     },
+    { "cinema4k",      NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CINEMA4K)    }, 0,         0,           VE, "profile"     },
+    { "cinema_mode",   "Digital Cinema",    OFFSET(cinema_mode),   AV_OPT_TYPE_INT,   { .i64 = OPJ(OFF)         }, OPJ(OFF),       OPJ(CINEMA4K_24), VE, "cinema_mode" },
+    { "off",           NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(OFF)         }, 0,         0,           VE, "cinema_mode" },
+    { "2k_24",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CINEMA2K_24) }, 0,         0,           VE, "cinema_mode" },
+    { "2k_48",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CINEMA2K_48) }, 0,         0,           VE, "cinema_mode" },
+    { "4k_24",         NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CINEMA4K_24) }, 0,         0,           VE, "cinema_mode" },
+    { "prog_order",    "Progression Order", OFFSET(prog_order),    AV_OPT_TYPE_INT,   { .i64 = OPJ(LRCP)    }, OPJ(LRCP),  OPJ(CPRL),    VE, "prog_order"  },
+    { "lrcp",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(LRCP)    }, 0,         0,           VE, "prog_order"  },
+    { "rlcp",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(RLCP)    }, 0,         0,           VE, "prog_order"  },
+    { "rpcl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(RPCL)    }, 0,         0,           VE, "prog_order"  },
+    { "pcrl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(PCRL)    }, 0,         0,           VE, "prog_order"  },
+    { "cprl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = OPJ(CPRL)    }, 0,         0,           VE, "prog_order"  },
     { "numresolution", NULL,                OFFSET(numresolution), AV_OPT_TYPE_INT,   { .i64 = 6           }, 1,         INT_MAX,     VE                },
     { "numlayers",     NULL,                OFFSET(numlayers),     AV_OPT_TYPE_INT,   { .i64 = 1           }, 1,         10,          VE                },
     { "disto_alloc",   NULL,                OFFSET(disto_alloc),   AV_OPT_TYPE_INT,   { .i64 = 1           }, 0,         1,           VE                },
@@ -657,7 +838,7 @@ AVCodec ff_libopenjpeg_encoder = {
     .init           = libopenjpeg_encode_init,
     .encode2        = libopenjpeg_encode_frame,
     .close          = libopenjpeg_encode_close,
-    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA, AV_PIX_FMT_RGB48,
         AV_PIX_FMT_RGBA64, AV_PIX_FMT_GBR24P,
diff --git a/libavcodec/libopusdec.c b/libavcodec/libopusdec.c
index 8436302a..1e976041 100644
--- a/libavcodec/libopusdec.c
+++ b/libavcodec/libopusdec.c
@@ -23,6 +23,7 @@
 #include <opus_multistream.h>
 
 #include "libavutil/avassert.h"
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "internal.h"
@@ -100,7 +101,7 @@ static av_cold int libopus_decode_init(AVCodecContext *avc)
                opus_strerror(ret));
 #else
     {
-        double gain_lin = pow(10, gain_db / (20.0 * 256));
+        double gain_lin = ff_exp10(gain_db / (20.0 * 256));
         if (avc->sample_fmt == AV_SAMPLE_FMT_FLT)
             opus->gain.d = gain_lin;
         else
@@ -191,7 +192,7 @@ AVCodec ff_libopus_decoder = {
     .close          = libopus_decode_close,
     .decode         = libopus_decode,
     .flush          = libopus_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
                                                      AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/libopusenc.c b/libavcodec/libopusenc.c
index 78546097..3f3e80d4 100644
--- a/libavcodec/libopusenc.c
+++ b/libavcodec/libopusenc.c
@@ -180,12 +180,12 @@ static av_cold int libopus_encode_init(AVCodecContext *avctx)
         avctx->bit_rate = 64000 * opus->stream_count +
                           32000 * coupled_stream_count;
         av_log(avctx, AV_LOG_WARNING,
-               "No bit rate set. Defaulting to %d bps.\n", avctx->bit_rate);
+               "No bit rate set. Defaulting to %"PRId64" bps.\n", (int64_t)avctx->bit_rate);
     }
 
     if (avctx->bit_rate < 500 || avctx->bit_rate > 256000 * avctx->channels) {
-        av_log(avctx, AV_LOG_ERROR, "The bit rate %d bps is unsupported. "
-               "Please choose a value between 500 and %d.\n", avctx->bit_rate,
+        av_log(avctx, AV_LOG_ERROR, "The bit rate %"PRId64" bps is unsupported. "
+               "Please choose a value between 500 and %d.\n", (int64_t)avctx->bit_rate,
                256000 * avctx->channels);
         return AVERROR(EINVAL);
     }
@@ -269,7 +269,7 @@ static av_cold int libopus_encode_init(AVCodecContext *avctx)
     }
 
     header_size = 19 + (avctx->channels > 2 ? 2 + avctx->channels : 0);
-    avctx->extradata = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    avctx->extradata = av_malloc(header_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!avctx->extradata) {
         av_log(avctx, AV_LOG_ERROR, "Failed to allocate extradata.\n");
         ret = AVERROR(ENOMEM);
@@ -326,7 +326,7 @@ static int libopus_encode(AVCodecContext *avctx, AVPacket *avpkt,
         } else
             audio = frame->data[0];
     } else {
-        if (!opus->afq.remaining_samples)
+        if (!opus->afq.remaining_samples || (!opus->afq.frame_alloc && !opus->afq.frame_count))
             return 0;
         audio = opus->samples;
         memset(audio, 0, opus->opts.packet_size * sample_size);
@@ -335,7 +335,7 @@ static int libopus_encode(AVCodecContext *avctx, AVPacket *avpkt,
     /* Maximum packet size taken from opusenc in opus-tools. 60ms packets
      * consist of 3 frames in one packet. The maximum frame size is 1275
      * bytes along with the largest possible packet header of 7 bytes. */
-    if ((ret = ff_alloc_packet2(avctx, avpkt, (1275 * 3 + 7) * opus->stream_count)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, (1275 * 3 + 7) * opus->stream_count, 0)) < 0)
         return ret;
 
     if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
@@ -361,7 +361,7 @@ static int libopus_encode(AVCodecContext *avctx, AVPacket *avpkt,
     discard_padding = opus->opts.packet_size - avpkt->duration;
     // Check if subtraction resulted in an overflow
     if ((discard_padding < opus->opts.packet_size) != (avpkt->duration > 0)) {
-        av_free_packet(avpkt);
+        av_packet_unref(avpkt);
         av_free(avpkt);
         return AVERROR(EINVAL);
     }
@@ -370,7 +370,7 @@ static int libopus_encode(AVCodecContext *avctx, AVPacket *avpkt,
                                                      AV_PKT_DATA_SKIP_SAMPLES,
                                                      10);
         if(!side_data) {
-            av_free_packet(avpkt);
+            av_packet_unref(avpkt);
             av_free(avpkt);
             return AVERROR(ENOMEM);
         }
@@ -438,7 +438,7 @@ AVCodec ff_libopus_encoder = {
     .init            = libopus_encode_init,
     .encode2         = libopus_encode,
     .close           = libopus_encode_close,
-    .capabilities    = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
+    .capabilities    = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts     = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                       AV_SAMPLE_FMT_FLT,
                                                       AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/libschroedinger.c b/libavcodec/libschroedinger.c
index 9f0b25c0..0b02b2c2 100644
--- a/libavcodec/libschroedinger.c
+++ b/libavcodec/libschroedinger.c
@@ -26,6 +26,7 @@
 #include "libavutil/attributes.h"
 #include "libavutil/mem.h"
 #include "libschroedinger.h"
+#include "internal.h"
 
 static const SchroVideoFormatInfo ff_schro_video_format_info[] = {
     { 640,  480,  24000, 1001},
@@ -167,19 +168,14 @@ int ff_get_schro_frame_format (SchroChromaFormat schro_pix_fmt,
 
 static void free_schro_frame(SchroFrame *frame, void *priv)
 {
-    AVPicture *p_pic = priv;
-
-    if (!p_pic)
-        return;
-
-    avpicture_free(p_pic);
-    av_freep(&p_pic);
+    AVFrame *p_pic = priv;
+    av_frame_free(&p_pic);
 }
 
 SchroFrame *ff_create_schro_frame(AVCodecContext *avctx,
                                   SchroFrameFormat schro_frame_fmt)
 {
-    AVPicture *p_pic;
+    AVFrame *p_pic;
     SchroFrame *p_frame;
     int y_width, uv_width;
     int y_height, uv_height;
@@ -190,9 +186,12 @@ SchroFrame *ff_create_schro_frame(AVCodecContext *avctx,
     uv_width  = y_width  >> (SCHRO_FRAME_FORMAT_H_SHIFT(schro_frame_fmt));
     uv_height = y_height >> (SCHRO_FRAME_FORMAT_V_SHIFT(schro_frame_fmt));
 
-    p_pic = av_mallocz(sizeof(AVPicture));
-    if (!p_pic || avpicture_alloc(p_pic, avctx->pix_fmt, y_width, y_height) < 0) {
-        av_free(p_pic);
+    p_pic = av_frame_alloc();
+    if (!p_pic)
+        return NULL;
+
+    if (ff_get_buffer(avctx, p_pic, AV_GET_BUFFER_FLAG_REF) < 0) {
+        av_frame_free(&p_pic);
         return NULL;
     }
 
diff --git a/libavcodec/libschroedingerdec.c b/libavcodec/libschroedingerdec.c
index 8778869a..152cbe7d 100644
--- a/libavcodec/libschroedingerdec.c
+++ b/libavcodec/libschroedingerdec.c
@@ -383,6 +383,6 @@ AVCodec ff_libschroedinger_decoder = {
     .init           = libschroedinger_decode_init,
     .close          = libschroedinger_decode_close,
     .decode         = libschroedinger_decode_frame,
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
     .flush          = libschroedinger_flush,
 };
diff --git a/libavcodec/libschroedingerenc.c b/libavcodec/libschroedingerenc.c
index f7a32353..d5da6fc6 100644
--- a/libavcodec/libschroedingerenc.c
+++ b/libavcodec/libschroedingerenc.c
@@ -33,6 +33,9 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+
 #include "avcodec.h"
 #include "internal.h"
 #include "libschroedinger.h"
@@ -41,6 +44,8 @@
 
 /** libschroedinger encoder private data */
 typedef struct SchroEncoderParams {
+    AVClass        *class;
+
     /** Schroedinger video format */
     SchroVideoFormat *format;
 
@@ -70,6 +75,9 @@ typedef struct SchroEncoderParams {
 
     /* counter for frames submitted to encoder, used as dts */
     int64_t dts;
+
+    /** enable noarith */
+    int noarith;
 } SchroEncoderParams;
 
 /**
@@ -155,22 +163,24 @@ static av_cold int libschroedinger_encode_init(AVCodecContext *avctx)
     p_schro_params->format->frame_rate_numerator   = avctx->time_base.den;
     p_schro_params->format->frame_rate_denominator = avctx->time_base.num;
 
-    p_schro_params->frame_size = avpicture_get_size(avctx->pix_fmt,
-                                                    avctx->width,
-                                                    avctx->height);
-
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
+    p_schro_params->frame_size = av_image_get_buffer_size(avctx->pix_fmt,
+                                                          avctx->width,
+                                                          avctx->height, 1);
 
     if (!avctx->gop_size) {
         schro_encoder_setting_set_double(p_schro_params->encoder,
                                          "gop_structure",
                                          SCHRO_ENCODER_GOP_INTRA_ONLY);
 
-        if (avctx->coder_type == FF_CODER_TYPE_VLC)
-            schro_encoder_setting_set_double(p_schro_params->encoder,
-                                             "enable_noarith", 1);
+#if FF_API_CODER_TYPE
+FF_DISABLE_DEPRECATION_WARNINGS
+        if (avctx->coder_type != FF_CODER_TYPE_VLC)
+            p_schro_params->noarith = 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        schro_encoder_setting_set_double(p_schro_params->encoder,
+                                         "enable_noarith",
+                                         p_schro_params->noarith);
     } else {
         schro_encoder_setting_set_double(p_schro_params->encoder,
                                          "au_distance", avctx->gop_size);
@@ -179,7 +189,7 @@ static av_cold int libschroedinger_encode_init(AVCodecContext *avctx)
     }
 
     /* FIXME - Need to handle SCHRO_ENCODER_RATE_CONTROL_LOW_DELAY. */
-    if (avctx->flags & CODEC_FLAG_QSCALE) {
+    if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
         if (!avctx->global_quality) {
             /* lossless coding */
             schro_encoder_setting_set_double(p_schro_params->encoder,
@@ -206,14 +216,14 @@ static av_cold int libschroedinger_encode_init(AVCodecContext *avctx)
                                          "bitrate", avctx->bit_rate);
     }
 
-    if (avctx->flags & CODEC_FLAG_INTERLACED_ME)
+    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_ME)
         /* All material can be coded as interlaced or progressive
            irrespective of the type of source material. */
         schro_encoder_setting_set_double(p_schro_params->encoder,
                                          "interlaced_coding", 1);
 
     schro_encoder_setting_set_double(p_schro_params->encoder, "open_gop",
-                                     !(avctx->flags & CODEC_FLAG_CLOSED_GOP));
+                                     !(avctx->flags & AV_CODEC_FLAG_CLOSED_GOP));
 
     /* FIXME: Signal range hardcoded to 8-bit data until both libschroedinger
      * and libdirac support other bit-depth data. */
@@ -238,17 +248,17 @@ static SchroFrame *libschroedinger_frame_from_data(AVCodecContext *avctx,
                                                    const AVFrame *frame)
 {
     SchroEncoderParams *p_schro_params = avctx->priv_data;
-    SchroFrame *in_frame;
-    /* Input line size may differ from what the codec supports. Especially
-     * when transcoding from one format to another. So use avpicture_layout
-     * to copy the frame. */
-    in_frame = ff_create_schro_frame(avctx, p_schro_params->frame_format);
-
-    if (in_frame)
-        avpicture_layout((const AVPicture *)frame, avctx->pix_fmt,
-                          avctx->width, avctx->height,
-                          in_frame->components[0].data,
-                          p_schro_params->frame_size);
+    SchroFrame *in_frame = ff_create_schro_frame(avctx,
+                                                 p_schro_params->frame_format);
+
+    if (in_frame) {
+        /* Copy input data to SchroFrame buffers (they match the ones
+         * referenced by the AVFrame stored in priv) */
+        if (av_frame_copy(in_frame->priv, frame) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to copy input data\n");
+            return NULL;
+        }
+    }
 
     return in_frame;
 }
@@ -383,16 +393,20 @@ static int libschroedinger_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     pkt_size = p_frame_output->size;
     if (last_frame_in_sequence && p_schro_params->enc_buf_size > 0)
         pkt_size += p_schro_params->enc_buf_size;
-    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
         goto error;
 
     memcpy(pkt->data, p_frame_output->p_encbuf, p_frame_output->size);
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->key_frame = p_frame_output->key_frame;
+    avctx->coded_frame->pts = p_frame_output->frame_num;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     /* Use the frame number of the encoded frame as the pts. It is OK to
      * do so since Dirac is a constant frame rate codec. It expects input
      * to be of constant frame rate. */
-    pkt->pts =
-    avctx->coded_frame->pts = p_frame_output->frame_num;
+    pkt->pts = p_frame_output->frame_num;
     pkt->dts = p_schro_params->dts++;
     enc_size = p_frame_output->size;
 
@@ -436,11 +450,23 @@ static int libschroedinger_encode_close(AVCodecContext *avctx)
     /* Free the video format structure. */
     av_freep(&p_schro_params->format);
 
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
+#define OFFSET(x) offsetof(SchroEncoderParams, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "noarith", "Enable noarith", OFFSET(noarith), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
+
+    { NULL },
+};
+
+static const AVClass libschroedinger_class = {
+    .class_name = "libschroedinger",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_libschroedinger_encoder = {
     .name           = "libschroedinger",
@@ -448,10 +474,11 @@ AVCodec ff_libschroedinger_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_DIRAC,
     .priv_data_size = sizeof(SchroEncoderParams),
+    .priv_class     = &libschroedinger_class,
     .init           = libschroedinger_encode_init,
     .encode2        = libschroedinger_encode_frame,
     .close          = libschroedinger_encode_close,
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_NONE
     },
diff --git a/libavcodec/libshine.c b/libavcodec/libshine.c
index 27c1a5f4..f4cf5981 100644
--- a/libavcodec/libshine.c
+++ b/libavcodec/libshine.c
@@ -102,7 +102,7 @@ static int libshine_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 
     len = hdr.frame_size;
     if (len <= s->buffer_index) {
-        if ((ret = ff_alloc_packet2(avctx, avpkt, len)))
+        if ((ret = ff_alloc_packet2(avctx, avpkt, len, 0)))
             return ret;
         memcpy(avpkt->data, s->buffer, len);
         s->buffer_index -= len;
@@ -139,7 +139,7 @@ AVCodec ff_libshine_encoder = {
     .init                  = libshine_encode_init,
     .encode2               = libshine_encode_frame,
     .close                 = libshine_encode_close,
-    .capabilities          = CODEC_CAP_DELAY,
+    .capabilities          = AV_CODEC_CAP_DELAY,
     .sample_fmts           = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16P,
                                                             AV_SAMPLE_FMT_NONE },
     .supported_samplerates = libshine_sample_rates,
diff --git a/libavcodec/libspeexdec.c b/libavcodec/libspeexdec.c
index 6ca592ae..044883af 100644
--- a/libavcodec/libspeexdec.c
+++ b/libavcodec/libspeexdec.c
@@ -199,5 +199,5 @@ AVCodec ff_libspeex_decoder = {
     .close          = libspeex_decode_close,
     .decode         = libspeex_decode_frame,
     .flush          = libspeex_decode_flush,
-    .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DELAY | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/libspeexenc.c b/libavcodec/libspeexenc.c
index fac8e725..65a84dc5 100644
--- a/libavcodec/libspeexenc.c
+++ b/libavcodec/libspeexenc.c
@@ -40,7 +40,7 @@
  *     used to set the encoding mode.
  *
  * Rate Control
- *     VBR mode is turned on by setting CODEC_FLAG_QSCALE in avctx->flags.
+ *     VBR mode is turned on by setting AV_CODEC_FLAG_QSCALE in avctx->flags.
  *     avctx->global_quality is used to set the encoding quality.
  *     For CBR mode, avctx->bit_rate can be used to set the constant bitrate.
  *     Alternatively, the 'cbr_quality' option can be set from 0 to 10 to set
@@ -125,10 +125,10 @@ static av_cold void print_enc_params(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_DEBUG, "  quality: %f\n", s->vbr_quality);
     } else if (s->abr) {
         av_log(avctx, AV_LOG_DEBUG, "rate control: ABR\n");
-        av_log(avctx, AV_LOG_DEBUG, "  bitrate: %d bps\n", avctx->bit_rate);
+        av_log(avctx, AV_LOG_DEBUG, "  bitrate: %"PRId64" bps\n", (int64_t)avctx->bit_rate);
     } else {
         av_log(avctx, AV_LOG_DEBUG, "rate control: CBR\n");
-        av_log(avctx, AV_LOG_DEBUG, "  bitrate: %d bps\n", avctx->bit_rate);
+        av_log(avctx, AV_LOG_DEBUG, "  bitrate: %"PRId64" bps\n", (int64_t)avctx->bit_rate);
     }
     av_log(avctx, AV_LOG_DEBUG, "complexity: %d\n",
            avctx->compression_level);
@@ -177,7 +177,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     speex_init_header(&s->header, avctx->sample_rate, avctx->channels, mode);
 
     /* rate control method and parameters */
-    if (avctx->flags & CODEC_FLAG_QSCALE) {
+    if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
         /* VBR */
         s->header.vbr = 1;
         s->vad = 1; /* VAD is always implicitly activated for VBR */
@@ -244,8 +244,8 @@ static av_cold int encode_init(AVCodecContext *avctx)
              below with speex_header_free() */
     header_data = speex_header_to_packet(&s->header, &header_size);
 
-    /* allocate extradata and coded_frame */
-    avctx->extradata   = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    /* allocate extradata */
+    avctx->extradata = av_malloc(header_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!avctx->extradata) {
         speex_header_free(header_data);
         speex_encoder_destroy(s->enc_state);
@@ -294,7 +294,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     /* write output if all frames for the packet have been encoded */
     if (s->pkt_frame_count == s->frames_per_packet) {
         s->pkt_frame_count = 0;
-        if ((ret = ff_alloc_packet2(avctx, avpkt, speex_bits_nbytes(&s->bits))) < 0)
+        if ((ret = ff_alloc_packet2(avctx, avpkt, speex_bits_nbytes(&s->bits), 0)) < 0)
             return ret;
         ret = speex_bits_write(&s->bits, avpkt->data, avpkt->size);
         speex_bits_reset(&s->bits);
@@ -356,7 +356,7 @@ AVCodec ff_libspeex_encoder = {
     .init           = encode_init,
     .encode2        = encode_frame,
     .close          = encode_close,
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
     .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO,
diff --git a/libavcodec/libstagefright.cpp b/libavcodec/libstagefright.cpp
deleted file mode 100644
index 11d60387..00000000
--- a/libavcodec/libstagefright.cpp
+++ /dev/null
@@ -1,591 +0,0 @@
-/*
- * Interface to the Android Stagefright library for
- * H/W accelerated H.264 decoding
- *
- * Copyright (C) 2011 Mohamed Naufal
- * Copyright (C) 2011 Martin Storsjö
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <binder/ProcessState.h>
-#include <media/stagefright/MetaData.h>
-#include <media/stagefright/MediaBufferGroup.h>
-#include <media/stagefright/MediaDebug.h>
-#include <media/stagefright/MediaDefs.h>
-#include <media/stagefright/OMXClient.h>
-#include <media/stagefright/OMXCodec.h>
-#include <utils/List.h>
-#include <new>
-#include <map>
-
-extern "C" {
-#include "avcodec.h"
-#include "libavutil/imgutils.h"
-#include "internal.h"
-}
-
-#define OMX_QCOM_COLOR_FormatYVU420SemiPlanar 0x7FA30C00
-
-using namespace android;
-
-struct Frame {
-    status_t status;
-    size_t size;
-    int64_t time;
-    int key;
-    uint8_t *buffer;
-    AVFrame *vframe;
-};
-
-struct TimeStamp {
-    int64_t pts;
-    int64_t reordered_opaque;
-};
-
-class CustomSource;
-
-struct StagefrightContext {
-    AVCodecContext *avctx;
-    AVBitStreamFilterContext *bsfc;
-    uint8_t* orig_extradata;
-    int orig_extradata_size;
-    sp<MediaSource> *source;
-    List<Frame*> *in_queue, *out_queue;
-    pthread_mutex_t in_mutex, out_mutex;
-    pthread_cond_t condition;
-    pthread_t decode_thread_id;
-
-    Frame *end_frame;
-    bool source_done;
-    volatile sig_atomic_t thread_started, thread_exited, stop_decode;
-
-    AVFrame *prev_frame;
-    std::map<int64_t, TimeStamp> *ts_map;
-    int64_t frame_index;
-
-    uint8_t *dummy_buf;
-    int dummy_bufsize;
-
-    OMXClient *client;
-    sp<MediaSource> *decoder;
-    const char *decoder_component;
-};
-
-class CustomSource : public MediaSource {
-public:
-    CustomSource(AVCodecContext *avctx, sp<MetaData> meta) {
-        s = (StagefrightContext*)avctx->priv_data;
-        source_meta = meta;
-        frame_size  = (avctx->width * avctx->height * 3) / 2;
-        buf_group.add_buffer(new MediaBuffer(frame_size));
-    }
-
-    virtual sp<MetaData> getFormat() {
-        return source_meta;
-    }
-
-    virtual status_t start(MetaData *params) {
-        return OK;
-    }
-
-    virtual status_t stop() {
-        return OK;
-    }
-
-    virtual status_t read(MediaBuffer **buffer,
-                          const MediaSource::ReadOptions *options) {
-        Frame *frame;
-        status_t ret;
-
-        if (s->thread_exited)
-            return ERROR_END_OF_STREAM;
-        pthread_mutex_lock(&s->in_mutex);
-
-        while (s->in_queue->empty())
-            pthread_cond_wait(&s->condition, &s->in_mutex);
-
-        frame = *s->in_queue->begin();
-        ret = frame->status;
-
-        if (ret == OK) {
-            ret = buf_group.acquire_buffer(buffer);
-            if (ret == OK) {
-                memcpy((*buffer)->data(), frame->buffer, frame->size);
-                (*buffer)->set_range(0, frame->size);
-                (*buffer)->meta_data()->clear();
-                (*buffer)->meta_data()->setInt32(kKeyIsSyncFrame,frame->key);
-                (*buffer)->meta_data()->setInt64(kKeyTime, frame->time);
-            } else {
-                av_log(s->avctx, AV_LOG_ERROR, "Failed to acquire MediaBuffer\n");
-            }
-            av_freep(&frame->buffer);
-        }
-
-        s->in_queue->erase(s->in_queue->begin());
-        pthread_mutex_unlock(&s->in_mutex);
-
-        av_freep(&frame);
-        return ret;
-    }
-
-private:
-    MediaBufferGroup buf_group;
-    sp<MetaData> source_meta;
-    StagefrightContext *s;
-    int frame_size;
-};
-
-void* decode_thread(void *arg)
-{
-    AVCodecContext *avctx = (AVCodecContext*)arg;
-    StagefrightContext *s = (StagefrightContext*)avctx->priv_data;
-    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt);
-    Frame* frame;
-    MediaBuffer *buffer;
-    int32_t w, h;
-    int decode_done = 0;
-    int ret;
-    int src_linesize[3];
-    const uint8_t *src_data[3];
-    int64_t out_frame_index = 0;
-
-    do {
-        buffer = NULL;
-        frame = (Frame*)av_mallocz(sizeof(Frame));
-        if (!frame) {
-            frame         = s->end_frame;
-            frame->status = AVERROR(ENOMEM);
-            decode_done   = 1;
-            s->end_frame  = NULL;
-            goto push_frame;
-        }
-        frame->status = (*s->decoder)->read(&buffer);
-        if (frame->status == OK) {
-            sp<MetaData> outFormat = (*s->decoder)->getFormat();
-            outFormat->findInt32(kKeyWidth , &w);
-            outFormat->findInt32(kKeyHeight, &h);
-            frame->vframe = av_frame_alloc();
-            if (!frame->vframe) {
-                frame->status = AVERROR(ENOMEM);
-                decode_done   = 1;
-                buffer->release();
-                goto push_frame;
-            }
-            ret = ff_get_buffer(avctx, frame->vframe, AV_GET_BUFFER_FLAG_REF);
-            if (ret < 0) {
-                frame->status = ret;
-                decode_done   = 1;
-                buffer->release();
-                goto push_frame;
-            }
-
-            // The OMX.SEC decoder doesn't signal the modified width/height
-            if (s->decoder_component && !strncmp(s->decoder_component, "OMX.SEC", 7) &&
-                (w & 15 || h & 15)) {
-                if (((w + 15)&~15) * ((h + 15)&~15) * 3/2 == buffer->range_length()) {
-                    w = (w + 15)&~15;
-                    h = (h + 15)&~15;
-                }
-            }
-
-            if (!avctx->width || !avctx->height || avctx->width > w || avctx->height > h) {
-                avctx->width  = w;
-                avctx->height = h;
-            }
-
-            src_linesize[0] = av_image_get_linesize(avctx->pix_fmt, w, 0);
-            src_linesize[1] = av_image_get_linesize(avctx->pix_fmt, w, 1);
-            src_linesize[2] = av_image_get_linesize(avctx->pix_fmt, w, 2);
-
-            src_data[0] = (uint8_t*)buffer->data();
-            src_data[1] = src_data[0] + src_linesize[0] * h;
-            src_data[2] = src_data[1] + src_linesize[1] * -(-h>>pix_desc->log2_chroma_h);
-            av_image_copy(frame->vframe->data, frame->vframe->linesize,
-                          src_data, src_linesize,
-                          avctx->pix_fmt, avctx->width, avctx->height);
-
-            buffer->meta_data()->findInt64(kKeyTime, &out_frame_index);
-            if (out_frame_index && s->ts_map->count(out_frame_index) > 0) {
-                frame->vframe->pts = (*s->ts_map)[out_frame_index].pts;
-                frame->vframe->reordered_opaque = (*s->ts_map)[out_frame_index].reordered_opaque;
-                s->ts_map->erase(out_frame_index);
-            }
-            buffer->release();
-            } else if (frame->status == INFO_FORMAT_CHANGED) {
-                if (buffer)
-                    buffer->release();
-                av_free(frame);
-                continue;
-            } else {
-                decode_done = 1;
-            }
-push_frame:
-        while (true) {
-            pthread_mutex_lock(&s->out_mutex);
-            if (s->out_queue->size() >= 10) {
-                pthread_mutex_unlock(&s->out_mutex);
-                usleep(10000);
-                continue;
-            }
-            break;
-        }
-        s->out_queue->push_back(frame);
-        pthread_mutex_unlock(&s->out_mutex);
-    } while (!decode_done && !s->stop_decode);
-
-    s->thread_exited = true;
-
-    return 0;
-}
-
-static av_cold int Stagefright_init(AVCodecContext *avctx)
-{
-    StagefrightContext *s = (StagefrightContext*)avctx->priv_data;
-    sp<MetaData> meta, outFormat;
-    int32_t colorFormat = 0;
-    int ret;
-
-    if (!avctx->extradata || !avctx->extradata_size || avctx->extradata[0] != 1)
-        return -1;
-
-    s->avctx = avctx;
-    s->bsfc  = av_bitstream_filter_init("h264_mp4toannexb");
-    if (!s->bsfc) {
-        av_log(avctx, AV_LOG_ERROR, "Cannot open the h264_mp4toannexb BSF!\n");
-        return -1;
-    }
-
-    s->orig_extradata_size = avctx->extradata_size;
-    s->orig_extradata = (uint8_t*) av_mallocz(avctx->extradata_size +
-                                              FF_INPUT_BUFFER_PADDING_SIZE);
-    if (!s->orig_extradata) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-    memcpy(s->orig_extradata, avctx->extradata, avctx->extradata_size);
-
-    meta = new MetaData;
-    if (!meta) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
-    meta->setInt32(kKeyWidth, avctx->width);
-    meta->setInt32(kKeyHeight, avctx->height);
-    meta->setData(kKeyAVCC, kTypeAVCC, avctx->extradata, avctx->extradata_size);
-
-    android::ProcessState::self()->startThreadPool();
-
-    s->source    = new sp<MediaSource>();
-    *s->source   = new CustomSource(avctx, meta);
-    s->in_queue  = new List<Frame*>;
-    s->out_queue = new List<Frame*>;
-    s->ts_map    = new std::map<int64_t, TimeStamp>;
-    s->client    = new OMXClient;
-    s->end_frame = (Frame*)av_mallocz(sizeof(Frame));
-    if (s->source == NULL || !s->in_queue || !s->out_queue || !s->client ||
-        !s->ts_map || !s->end_frame) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    if (s->client->connect() !=  OK) {
-        av_log(avctx, AV_LOG_ERROR, "Cannot connect OMX client\n");
-        ret = -1;
-        goto fail;
-    }
-
-    s->decoder  = new sp<MediaSource>();
-    *s->decoder = OMXCodec::Create(s->client->interface(), meta,
-                                  false, *s->source, NULL,
-                                  OMXCodec::kClientNeedsFramebuffer);
-    if ((*s->decoder)->start() !=  OK) {
-        av_log(avctx, AV_LOG_ERROR, "Cannot start decoder\n");
-        ret = -1;
-        s->client->disconnect();
-        goto fail;
-    }
-
-    outFormat = (*s->decoder)->getFormat();
-    outFormat->findInt32(kKeyColorFormat, &colorFormat);
-    if (colorFormat == OMX_QCOM_COLOR_FormatYVU420SemiPlanar ||
-        colorFormat == OMX_COLOR_FormatYUV420SemiPlanar)
-        avctx->pix_fmt = AV_PIX_FMT_NV21;
-    else if (colorFormat == OMX_COLOR_FormatYCbYCr)
-        avctx->pix_fmt = AV_PIX_FMT_YUYV422;
-    else if (colorFormat == OMX_COLOR_FormatCbYCrY)
-        avctx->pix_fmt = AV_PIX_FMT_UYVY422;
-    else
-        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
-
-    outFormat->findCString(kKeyDecoderComponent, &s->decoder_component);
-    if (s->decoder_component)
-        s->decoder_component = av_strdup(s->decoder_component);
-
-    pthread_mutex_init(&s->in_mutex, NULL);
-    pthread_mutex_init(&s->out_mutex, NULL);
-    pthread_cond_init(&s->condition, NULL);
-    return 0;
-
-fail:
-    av_bitstream_filter_close(s->bsfc);
-    av_freep(&s->orig_extradata);
-    av_freep(&s->end_frame);
-    delete s->in_queue;
-    delete s->out_queue;
-    delete s->ts_map;
-    delete s->client;
-    return ret;
-}
-
-static int Stagefright_decode_frame(AVCodecContext *avctx, void *data,
-                                    int *got_frame, AVPacket *avpkt)
-{
-    StagefrightContext *s = (StagefrightContext*)avctx->priv_data;
-    Frame *frame;
-    status_t status;
-    int orig_size = avpkt->size;
-    AVPacket pkt = *avpkt;
-    AVFrame *ret_frame;
-
-    if (!s->thread_started) {
-        if(pthread_create(&s->decode_thread_id, NULL, &decode_thread, avctx))
-            return AVERROR(ENOMEM);
-        s->thread_started = true;
-    }
-
-    if (avpkt && avpkt->data) {
-        av_bitstream_filter_filter(s->bsfc, avctx, NULL, &pkt.data, &pkt.size,
-                                   avpkt->data, avpkt->size, avpkt->flags & AV_PKT_FLAG_KEY);
-        avpkt = &pkt;
-    }
-
-    if (!s->source_done) {
-        if(!s->dummy_buf) {
-            s->dummy_buf = (uint8_t*)av_malloc(avpkt->size);
-            if (!s->dummy_buf)
-                return AVERROR(ENOMEM);
-            s->dummy_bufsize = avpkt->size;
-            memcpy(s->dummy_buf, avpkt->data, avpkt->size);
-        }
-
-        frame = (Frame*)av_mallocz(sizeof(Frame));
-        if (avpkt->data) {
-            frame->status  = OK;
-            frame->size    = avpkt->size;
-            frame->key     = avpkt->flags & AV_PKT_FLAG_KEY ? 1 : 0;
-            frame->buffer  = (uint8_t*)av_malloc(avpkt->size);
-            if (!frame->buffer) {
-                av_freep(&frame);
-                return AVERROR(ENOMEM);
-            }
-            uint8_t *ptr = avpkt->data;
-            // The OMX.SEC decoder fails without this.
-            if (avpkt->size == orig_size + avctx->extradata_size) {
-                ptr += avctx->extradata_size;
-                frame->size = orig_size;
-            }
-            memcpy(frame->buffer, ptr, orig_size);
-            if (avpkt == &pkt)
-                av_free(avpkt->data);
-
-            frame->time = ++s->frame_index;
-            (*s->ts_map)[s->frame_index].pts = avpkt->pts;
-            (*s->ts_map)[s->frame_index].reordered_opaque = avctx->reordered_opaque;
-        } else {
-            frame->status  = ERROR_END_OF_STREAM;
-            s->source_done = true;
-        }
-
-        while (true) {
-            if (s->thread_exited) {
-                s->source_done = true;
-                break;
-            }
-            pthread_mutex_lock(&s->in_mutex);
-            if (s->in_queue->size() >= 10) {
-                pthread_mutex_unlock(&s->in_mutex);
-                usleep(10000);
-                continue;
-            }
-            s->in_queue->push_back(frame);
-            pthread_cond_signal(&s->condition);
-            pthread_mutex_unlock(&s->in_mutex);
-            break;
-        }
-    }
-    while (true) {
-        pthread_mutex_lock(&s->out_mutex);
-        if (!s->out_queue->empty()) break;
-        pthread_mutex_unlock(&s->out_mutex);
-        if (s->source_done) {
-            usleep(10000);
-            continue;
-        } else {
-            return orig_size;
-        }
-    }
-
-    frame = *s->out_queue->begin();
-    s->out_queue->erase(s->out_queue->begin());
-    pthread_mutex_unlock(&s->out_mutex);
-
-    ret_frame = frame->vframe;
-    status  = frame->status;
-    av_freep(&frame);
-
-    if (status == ERROR_END_OF_STREAM)
-        return 0;
-    if (status != OK) {
-        if (status == AVERROR(ENOMEM))
-            return status;
-        av_log(avctx, AV_LOG_ERROR, "Decode failed: %x\n", status);
-        return -1;
-    }
-
-    if (s->prev_frame)
-        av_frame_free(&s->prev_frame);
-    s->prev_frame = ret_frame;
-
-    *got_frame = 1;
-    *(AVFrame*)data = *ret_frame;
-    return orig_size;
-}
-
-static av_cold int Stagefright_close(AVCodecContext *avctx)
-{
-    StagefrightContext *s = (StagefrightContext*)avctx->priv_data;
-    Frame *frame;
-
-    if (s->thread_started) {
-        if (!s->thread_exited) {
-            s->stop_decode = 1;
-
-            // Make sure decode_thread() doesn't get stuck
-            pthread_mutex_lock(&s->out_mutex);
-            while (!s->out_queue->empty()) {
-                frame = *s->out_queue->begin();
-                s->out_queue->erase(s->out_queue->begin());
-                if (frame->vframe)
-                    av_frame_free(&frame->vframe);
-                av_freep(&frame);
-            }
-            pthread_mutex_unlock(&s->out_mutex);
-
-            // Feed a dummy frame prior to signalling EOF.
-            // This is required to terminate the decoder(OMX.SEC)
-            // when only one frame is read during stream info detection.
-            if (s->dummy_buf && (frame = (Frame*)av_mallocz(sizeof(Frame)))) {
-                frame->status = OK;
-                frame->size   = s->dummy_bufsize;
-                frame->key    = 1;
-                frame->buffer = s->dummy_buf;
-                pthread_mutex_lock(&s->in_mutex);
-                s->in_queue->push_back(frame);
-                pthread_cond_signal(&s->condition);
-                pthread_mutex_unlock(&s->in_mutex);
-                s->dummy_buf = NULL;
-            }
-
-            pthread_mutex_lock(&s->in_mutex);
-            s->end_frame->status = ERROR_END_OF_STREAM;
-            s->in_queue->push_back(s->end_frame);
-            pthread_cond_signal(&s->condition);
-            pthread_mutex_unlock(&s->in_mutex);
-            s->end_frame = NULL;
-        }
-
-        pthread_join(s->decode_thread_id, NULL);
-
-        if (s->prev_frame)
-            av_frame_free(&s->prev_frame);
-
-        s->thread_started = false;
-    }
-
-    while (!s->in_queue->empty()) {
-        frame = *s->in_queue->begin();
-        s->in_queue->erase(s->in_queue->begin());
-        if (frame->size)
-            av_freep(&frame->buffer);
-        av_freep(&frame);
-    }
-
-    while (!s->out_queue->empty()) {
-        frame = *s->out_queue->begin();
-        s->out_queue->erase(s->out_queue->begin());
-        if (frame->vframe)
-            av_frame_free(&frame->vframe);
-        av_freep(&frame);
-    }
-
-    (*s->decoder)->stop();
-    s->client->disconnect();
-
-    if (s->decoder_component)
-        av_freep(&s->decoder_component);
-    av_freep(&s->dummy_buf);
-    av_freep(&s->end_frame);
-
-    // Reset the extradata back to the original mp4 format, so that
-    // the next invocation (both when decoding and when called from
-    // av_find_stream_info) get the original mp4 format extradata.
-    av_freep(&avctx->extradata);
-    avctx->extradata = s->orig_extradata;
-    avctx->extradata_size = s->orig_extradata_size;
-
-    delete s->in_queue;
-    delete s->out_queue;
-    delete s->ts_map;
-    delete s->client;
-    delete s->decoder;
-    delete s->source;
-
-    pthread_mutex_destroy(&s->in_mutex);
-    pthread_mutex_destroy(&s->out_mutex);
-    pthread_cond_destroy(&s->condition);
-    av_bitstream_filter_close(s->bsfc);
-    return 0;
-}
-
-AVCodec ff_libstagefright_h264_decoder = {
-    "libstagefright_h264",
-    NULL_IF_CONFIG_SMALL("libstagefright H.264"),
-    AVMEDIA_TYPE_VIDEO,
-    AV_CODEC_ID_H264,
-    CODEC_CAP_DELAY,
-    NULL, //supported_framerates
-    NULL, //pix_fmts
-    NULL, //supported_samplerates
-    NULL, //sample_fmts
-    NULL, //channel_layouts
-    0,    //max_lowres
-    NULL, //priv_class
-    NULL, //profiles
-    sizeof(StagefrightContext),
-    NULL, //next
-    NULL, //init_thread_copy
-    NULL, //update_thread_context
-    NULL, //defaults
-    NULL, //init_static_data
-    Stagefright_init,
-    NULL, //encode
-    NULL, //encode2
-    Stagefright_decode_frame,
-    Stagefright_close,
-};
diff --git a/libavcodec/libtheoraenc.c b/libavcodec/libtheoraenc.c
index e6b1cbd2..c581b34e 100644
--- a/libavcodec/libtheoraenc.c
+++ b/libavcodec/libtheoraenc.c
@@ -210,7 +210,7 @@ static av_cold int encode_init(AVCodecContext* avc_context)
     }
     avcodec_get_chroma_sub_sample(avc_context->pix_fmt, &h->uv_hshift, &h->uv_vshift);
 
-    if (avc_context->flags & CODEC_FLAG_QSCALE) {
+    if (avc_context->flags & AV_CODEC_FLAG_QSCALE) {
         /* Clip global_quality in QP units to the [0 - 10] range
            to be consistent with the libvorbis implementation.
            Theora accepts a quality parameter which is an int value in
@@ -241,10 +241,10 @@ static av_cold int encode_init(AVCodecContext* avc_context)
     }
 
     // need to enable 2 pass (via TH_ENCCTL_2PASS_) before encoding headers
-    if (avc_context->flags & CODEC_FLAG_PASS1) {
+    if (avc_context->flags & AV_CODEC_FLAG_PASS1) {
         if ((ret = get_stats(avc_context, 0)) < 0)
             return ret;
-    } else if (avc_context->flags & CODEC_FLAG_PASS2) {
+    } else if (avc_context->flags & AV_CODEC_FLAG_PASS2) {
         if ((ret = submit_stats(avc_context)) < 0)
             return ret;
     }
@@ -267,11 +267,6 @@ static av_cold int encode_init(AVCodecContext* avc_context)
 
     th_comment_clear(&t_comment);
 
-    /* Set up the output AVFrame */
-    avc_context->coded_frame = av_frame_alloc();
-    if (!avc_context->coded_frame)
-        return AVERROR(ENOMEM);
-
     return 0;
 }
 
@@ -286,7 +281,7 @@ static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt,
     // EOS, finish and get 1st pass stats if applicable
     if (!frame) {
         th_encode_packetout(h->t_state, 1, &o_packet);
-        if (avc_context->flags & CODEC_FLAG_PASS1)
+        if (avc_context->flags & AV_CODEC_FLAG_PASS1)
             if ((ret = get_stats(avc_context, 1)) < 0)
                 return ret;
         return 0;
@@ -300,7 +295,7 @@ static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt,
         t_yuv_buffer[i].data   = frame->data[i];
     }
 
-    if (avc_context->flags & CODEC_FLAG_PASS2)
+    if (avc_context->flags & AV_CODEC_FLAG_PASS2)
         if ((ret = submit_stats(avc_context)) < 0)
             return ret;
 
@@ -323,7 +318,7 @@ static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt,
         return AVERROR_EXTERNAL;
     }
 
-    if (avc_context->flags & CODEC_FLAG_PASS1)
+    if (avc_context->flags & AV_CODEC_FLAG_PASS1)
         if ((ret = get_stats(avc_context, 0)) < 0)
             return ret;
 
@@ -342,15 +337,19 @@ static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt,
     }
 
     /* Copy ogg_packet content out to buffer */
-    if ((ret = ff_alloc_packet2(avc_context, pkt, o_packet.bytes)) < 0)
+    if ((ret = ff_alloc_packet2(avc_context, pkt, o_packet.bytes, 0)) < 0)
         return ret;
     memcpy(pkt->data, o_packet.packet, o_packet.bytes);
 
     // HACK: assumes no encoder delay, this is true until libtheora becomes
     // multithreaded (which will be disabled unless explicitly requested)
     pkt->pts = pkt->dts = frame->pts;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avc_context->coded_frame->key_frame = !(o_packet.granulepos & h->keyframe_mask);
-    if (avc_context->coded_frame->key_frame)
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    if (!(o_packet.granulepos & h->keyframe_mask))
         pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
 
@@ -363,7 +362,6 @@ static av_cold int encode_close(AVCodecContext* avc_context)
 
     th_encode_free(h->t_state);
     av_freep(&h->stats);
-    av_frame_free(&avc_context->coded_frame);
     av_freep(&avc_context->stats_out);
     av_freep(&avc_context->extradata);
     avc_context->extradata_size = 0;
@@ -381,7 +379,7 @@ AVCodec ff_libtheora_encoder = {
     .init           = encode_init,
     .close          = encode_close,
     .encode2        = encode_frame,
-    .capabilities   = CODEC_CAP_DELAY, // needed to get the statsfile summary
+    .capabilities   = AV_CODEC_CAP_DELAY, // needed to get the statsfile summary
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_NONE
     },
diff --git a/libavcodec/libtwolame.c b/libavcodec/libtwolame.c
index dc188575..12d71e7a 100644
--- a/libavcodec/libtwolame.c
+++ b/libavcodec/libtwolame.c
@@ -81,7 +81,7 @@ static av_cold int twolame_encode_init(AVCodecContext *avctx)
     if (!avctx->bit_rate)
         avctx->bit_rate = avctx->sample_rate < 28000 ? 160000 : 384000;
 
-    if (avctx->flags & CODEC_FLAG_QSCALE || !avctx->bit_rate) {
+    if (avctx->flags & AV_CODEC_FLAG_QSCALE || !avctx->bit_rate) {
         twolame_set_VBR(s->glopts, TRUE);
         twolame_set_VBR_level(s->glopts,
                               avctx->global_quality / (float) FF_QP2LAMBDA);
@@ -106,7 +106,7 @@ static int twolame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     TWOLAMEContext *s = avctx->priv_data;
     int ret;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, MPA_MAX_CODED_FRAME_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, MPA_MAX_CODED_FRAME_SIZE, 0)) < 0)
         return ret;
 
     if (frame) {
@@ -211,7 +211,7 @@ AVCodec ff_libtwolame_encoder = {
     .init           = twolame_encode_init,
     .encode2        = twolame_encode_frame,
     .close          = twolame_encode_close,
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY,
     .defaults       = twolame_defaults,
     .priv_class     = &twolame_class,
     .sample_fmts    = (const enum AVSampleFormat[]) {
diff --git a/libavcodec/libutvideo.h b/libavcodec/libutvideo.h
index 5fb1174c..0c03097b 100644
--- a/libavcodec/libutvideo.h
+++ b/libavcodec/libutvideo.h
@@ -62,9 +62,11 @@ typedef struct {
 } UtVideoExtra;
 
 typedef struct {
+    const AVClass *c;
     CCodec *codec;
     unsigned int buf_size;
     uint8_t *buffer;
+    int pred;
 } UtVideoContext;
 
 #endif /* AVCODEC_LIBUTVIDEO_H */
diff --git a/libavcodec/libutvideodec.cpp b/libavcodec/libutvideodec.cpp
index e4b87a8b..a456735d 100644
--- a/libavcodec/libutvideodec.cpp
+++ b/libavcodec/libutvideodec.cpp
@@ -27,6 +27,7 @@
 
 extern "C" {
 #include "avcodec.h"
+#include "libavutil/imgutils.h"
 }
 
 #include "libutvideo.h"
@@ -93,7 +94,7 @@ static av_cold int utvideo_decode_init(AVCodecContext *avctx)
     }
 
     /* Only allocate the buffer once */
-    utv->buf_size = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
+    utv->buf_size = av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
 #ifdef UTVF_UQY2
     if (format == UTVF_v210)
         utv->buf_size += avctx->height * ((avctx->width + 47) / 48) * 128; // the linesize used by the decoder, this does not seem to be exported
@@ -145,7 +146,6 @@ static int utvideo_decode_frame(AVCodecContext *avctx, void *data,
     int w = avctx->width, h = avctx->height;
 
     /* Set flags */
-    pic->reference = 0;
     pic->pict_type = AV_PICTURE_TYPE_I;
     pic->key_frame = 1;
 
@@ -222,9 +222,19 @@ static int utvideo_decode_frame(AVCodecContext *avctx, void *data,
         pic->data[0] = utv->buffer + utv->buf_size + pic->linesize[0];
         break;
     }
+    pic->width  = w;
+    pic->height = h;
+    pic->format = avctx->pix_fmt;
+
+    if (avctx->refcounted_frames) {
+        int ret = av_frame_ref((AVFrame*)data, pic);
+        if (ret < 0)
+             return ret;
+    } else {
+        av_frame_move_ref((AVFrame*)data, pic);
+    }
 
     *got_frame = 1;
-    av_frame_move_ref((AVFrame*)data, pic);
 
     return avpkt->size;
 }
diff --git a/libavcodec/libutvideoenc.cpp b/libavcodec/libutvideoenc.cpp
index cf669d28..d5dfef93 100644
--- a/libavcodec/libutvideoenc.cpp
+++ b/libavcodec/libutvideoenc.cpp
@@ -26,7 +26,9 @@
  */
 
 extern "C" {
+#include "libavutil/opt.h"
 #include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
 #include "avcodec.h"
 #include "internal.h"
 }
@@ -72,16 +74,22 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->prediction_method)
+        utv->pred = avctx->prediction_method;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     /* Check before we alloc anything */
-    if (avctx->prediction_method != 0 && avctx->prediction_method != 2) {
+    if (utv->pred != 0 && utv->pred != 2) {
         av_log(avctx, AV_LOG_ERROR, "Invalid prediction method.\n");
         return AVERROR(EINVAL);
     }
 
-    flags = ((avctx->prediction_method + 1) << 8) | (avctx->thread_count - 1);
+    flags = ((utv->pred + 1) << 8) | (avctx->thread_count - 1);
 
     avctx->priv_data = utv;
-    avctx->coded_frame = av_frame_alloc();
 
     /* Alloc extradata buffer */
     info = (UtVideoExtra *)av_malloc(sizeof(*info));
@@ -95,7 +103,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
      * We use this buffer to hold the data that Ut Video returns,
      * since we cannot decode planes separately with it.
      */
-    ret = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
+    ret = av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
     if (ret < 0) {
         av_free(info);
         return ret;
@@ -144,7 +152,7 @@ static int utvideo_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint8_t *dst;
 
     /* Alloc buffer */
-    if ((ret = ff_alloc_packet2(avctx, pkt, utv->buf_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, utv->buf_size, 0)) < 0)
         return ret;
 
     dst = pkt->data;
@@ -199,8 +207,6 @@ static int utvideo_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
      * assert that this is true.
      */
     av_assert2(keyframe == true);
-    avctx->coded_frame->key_frame = 1;
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
 
     pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
@@ -211,7 +217,6 @@ static av_cold int utvideo_encode_close(AVCodecContext *avctx)
 {
     UtVideoContext *utv = (UtVideoContext *)avctx->priv_data;
 
-    av_frame_free(&avctx->coded_frame);
     av_freep(&avctx->extradata);
     av_freep(&utv->buffer);
 
@@ -221,12 +226,35 @@ static av_cold int utvideo_encode_close(AVCodecContext *avctx)
     return 0;
 }
 
+#define OFFSET(x) offsetof(UtVideoContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "pred", "Prediction method", OFFSET(pred), AV_OPT_TYPE_INT, 0, 0, 2, VE, "pred" },
+    { "left",   NULL, 0, AV_OPT_TYPE_CONST, 0, INT_MIN, INT_MAX, VE, "pred" },
+    { "median",   NULL, 0, AV_OPT_TYPE_CONST, 2, INT_MIN, INT_MAX, VE, "pred" },
+    { NULL },
+};
+
+static const AVClass utvideo_class = {
+    "libutvideo",
+    av_default_item_name,
+    options,
+    LIBAVUTIL_VERSION_INT,
+    0,
+    0,
+    NULL,
+    NULL,
+    AV_CLASS_CATEGORY_NA,
+    NULL,
+    NULL,
+};
+
 AVCodec ff_libutvideo_encoder = {
     "libutvideo",
     NULL_IF_CONFIG_SMALL("Ut Video"),
     AVMEDIA_TYPE_VIDEO,
     AV_CODEC_ID_UTVIDEO,
-    CODEC_CAP_AUTO_THREADS | CODEC_CAP_LOSSLESS,
+    AV_CODEC_CAP_AUTO_THREADS | AV_CODEC_CAP_LOSSLESS | AV_CODEC_CAP_INTRA_ONLY,
     NULL, /* supported_framerates */
     (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUYV422, AV_PIX_FMT_BGR24,
@@ -236,7 +264,7 @@ AVCodec ff_libutvideo_encoder = {
     NULL, /* sample_fmts */
     NULL, /* channel_layouts */
     0,    /* max_lowres */
-    NULL, /* priv_class */
+    &utvideo_class, /* priv_class */
     NULL, /* profiles */
     sizeof(UtVideoContext),
     NULL, /* next */
diff --git a/libavcodec/libvo-aacenc.c b/libavcodec/libvo-aacenc.c
deleted file mode 100644
index 2c4a4242..00000000
--- a/libavcodec/libvo-aacenc.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * AAC encoder wrapper
- * Copyright (c) 2010 Martin Storsjo
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <vo-aacenc/voAAC.h>
-#include <vo-aacenc/cmnMemory.h>
-
-#include "avcodec.h"
-#include "audio_frame_queue.h"
-#include "internal.h"
-#include "mpeg4audio.h"
-
-#define FRAME_SIZE 1024
-#define ENC_DELAY  1600
-
-typedef struct AACContext {
-    VO_AUDIO_CODECAPI codec_api;
-    VO_HANDLE handle;
-    VO_MEM_OPERATOR mem_operator;
-    VO_CODEC_INIT_USERDATA user_data;
-    VO_PBYTE end_buffer;
-    AudioFrameQueue afq;
-    int last_frame;
-    int last_samples;
-} AACContext;
-
-
-static int aac_encode_close(AVCodecContext *avctx)
-{
-    AACContext *s = avctx->priv_data;
-
-    s->codec_api.Uninit(s->handle);
-    av_freep(&avctx->extradata);
-    ff_af_queue_close(&s->afq);
-    av_freep(&s->end_buffer);
-
-    return 0;
-}
-
-static av_cold int aac_encode_init(AVCodecContext *avctx)
-{
-    AACContext *s = avctx->priv_data;
-    AACENC_PARAM params = { 0 };
-    int index, ret;
-
-    avctx->frame_size = FRAME_SIZE;
-    avctx->initial_padding = ENC_DELAY;
-    s->last_frame     = 2;
-    ff_af_queue_init(avctx, &s->afq);
-
-    s->end_buffer = av_mallocz_array(avctx->channels, avctx->frame_size * 2);
-    if (!s->end_buffer) {
-        ret = AVERROR(ENOMEM);
-        goto error;
-    }
-
-    voGetAACEncAPI(&s->codec_api);
-
-    s->mem_operator.Alloc = cmnMemAlloc;
-    s->mem_operator.Copy = cmnMemCopy;
-    s->mem_operator.Free = cmnMemFree;
-    s->mem_operator.Set = cmnMemSet;
-    s->mem_operator.Check = cmnMemCheck;
-    s->user_data.memflag = VO_IMF_USERMEMOPERATOR;
-    s->user_data.memData = &s->mem_operator;
-    s->codec_api.Init(&s->handle, VO_AUDIO_CodingAAC, &s->user_data);
-
-    params.sampleRate = avctx->sample_rate;
-    params.bitRate    = avctx->bit_rate;
-    params.nChannels  = avctx->channels;
-    params.adtsUsed   = !(avctx->flags & CODEC_FLAG_GLOBAL_HEADER);
-    if (s->codec_api.SetParam(s->handle, VO_PID_AAC_ENCPARAM, &params)
-        != VO_ERR_NONE) {
-        av_log(avctx, AV_LOG_ERROR, "Unable to set encoding parameters\n");
-        ret = AVERROR(EINVAL);
-        goto error;
-    }
-
-    for (index = 0; index < 16; index++)
-        if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[index])
-            break;
-    if (index == 16) {
-        av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n",
-                                    avctx->sample_rate);
-        ret = AVERROR(ENOSYS);
-        goto error;
-    }
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
-        avctx->extradata_size = 2;
-        avctx->extradata      = av_mallocz(avctx->extradata_size +
-                                           FF_INPUT_BUFFER_PADDING_SIZE);
-        if (!avctx->extradata) {
-            ret = AVERROR(ENOMEM);
-            goto error;
-        }
-
-        avctx->extradata[0] = 0x02 << 3 | index >> 1;
-        avctx->extradata[1] = (index & 0x01) << 7 | avctx->channels << 3;
-    }
-    return 0;
-error:
-    aac_encode_close(avctx);
-    return ret;
-}
-
-static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
-                            const AVFrame *frame, int *got_packet_ptr)
-{
-    AACContext *s = avctx->priv_data;
-    VO_CODECBUFFER input = { 0 }, output = { 0 };
-    VO_AUDIO_OUTPUTINFO output_info = { { 0 } };
-    VO_PBYTE samples;
-    int ret;
-
-    /* handle end-of-stream small frame and flushing */
-    if (!frame) {
-        if (s->last_frame <= 0)
-            return 0;
-        if (s->last_samples > 0 && s->last_samples < ENC_DELAY - FRAME_SIZE) {
-            s->last_samples = 0;
-            s->last_frame--;
-        }
-        s->last_frame--;
-        memset(s->end_buffer, 0, 2 * avctx->channels * avctx->frame_size);
-        samples = s->end_buffer;
-    } else {
-        if (frame->nb_samples < avctx->frame_size) {
-            s->last_samples = frame->nb_samples;
-            memcpy(s->end_buffer, frame->data[0], 2 * avctx->channels * frame->nb_samples);
-            samples = s->end_buffer;
-        } else {
-            samples = (VO_PBYTE)frame->data[0];
-        }
-        /* add current frame to the queue */
-        if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
-            return ret;
-    }
-
-    if ((ret = ff_alloc_packet2(avctx, avpkt, FFMAX(8192, 768 * avctx->channels))) < 0)
-        return ret;
-
-    input.Buffer  = samples;
-    input.Length  = 2 * avctx->channels * avctx->frame_size;
-    output.Buffer = avpkt->data;
-    output.Length = avpkt->size;
-
-    s->codec_api.SetInputData(s->handle, &input);
-    if (s->codec_api.GetOutputData(s->handle, &output, &output_info)
-        != VO_ERR_NONE) {
-        av_log(avctx, AV_LOG_ERROR, "Unable to encode frame\n");
-        return AVERROR(EINVAL);
-    }
-
-    /* Get the next frame pts/duration */
-    ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
-                       &avpkt->duration);
-
-    avpkt->size = output.Length;
-    *got_packet_ptr = 1;
-    return 0;
-}
-
-/* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
- * failures */
-static const int mpeg4audio_sample_rates[16] = {
-    96000, 88200, 64000, 48000, 44100, 32000,
-    24000, 22050, 16000, 12000, 11025, 8000, 7350
-};
-
-AVCodec ff_libvo_aacenc_encoder = {
-    .name           = "libvo_aacenc",
-    .long_name      = NULL_IF_CONFIG_SMALL("Android VisualOn AAC (Advanced Audio Coding)"),
-    .type           = AVMEDIA_TYPE_AUDIO,
-    .id             = AV_CODEC_ID_AAC,
-    .priv_data_size = sizeof(AACContext),
-    .init           = aac_encode_init,
-    .encode2        = aac_encode_frame,
-    .close          = aac_encode_close,
-    .supported_samplerates = mpeg4audio_sample_rates,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
-    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
-                                                     AV_SAMPLE_FMT_NONE },
-};
diff --git a/libavcodec/libvo-amrwbenc.c b/libavcodec/libvo-amrwbenc.c
index fe19e711..92fa1850 100644
--- a/libavcodec/libvo-amrwbenc.c
+++ b/libavcodec/libvo-amrwbenc.c
@@ -115,7 +115,7 @@ static int amr_wb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     const int16_t *samples = (const int16_t *)frame->data[0];
     int size, ret;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, MAX_PACKET_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, MAX_PACKET_SIZE, 0)) < 0)
         return ret;
 
     if (s->last_bitrate != avctx->bit_rate) {
diff --git a/libavcodec/libvorbisdec.c b/libavcodec/libvorbisdec.c
index db005725..ecf690a5 100644
--- a/libavcodec/libvorbisdec.c
+++ b/libavcodec/libvorbisdec.c
@@ -32,6 +32,8 @@ typedef struct OggVorbisDecContext {
     ogg_packet op;                      /**< ogg packet                     */
 } OggVorbisDecContext;
 
+static int oggvorbis_decode_close(AVCodecContext *avccontext);
+
 static int oggvorbis_decode_init(AVCodecContext *avccontext) {
     OggVorbisDecContext *context = avccontext->priv_data ;
     uint8_t *p= avccontext->extradata;
@@ -110,8 +112,7 @@ static int oggvorbis_decode_init(AVCodecContext *avccontext) {
     return 0 ;
 
   error:
-    vorbis_info_clear(&context->vi);
-    vorbis_comment_clear(&context->vc) ;
+    oggvorbis_decode_close(avccontext);
     return ret;
 }
 
@@ -187,6 +188,8 @@ static int oggvorbis_decode_frame(AVCodecContext *avccontext, void *data,
 static int oggvorbis_decode_close(AVCodecContext *avccontext) {
     OggVorbisDecContext *context = avccontext->priv_data ;
 
+    vorbis_block_clear(&context->vb);
+    vorbis_dsp_clear(&context->vd);
     vorbis_info_clear(&context->vi) ;
     vorbis_comment_clear(&context->vc) ;
 
@@ -203,5 +206,5 @@ AVCodec ff_libvorbis_decoder = {
     .init           = oggvorbis_decode_init,
     .decode         = oggvorbis_decode_frame,
     .close          = oggvorbis_decode_close,
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY,
 };
diff --git a/libavcodec/libvorbisenc.c b/libavcodec/libvorbisenc.c
index 231d1be2..3ca5b55e 100644
--- a/libavcodec/libvorbisenc.c
+++ b/libavcodec/libvorbisenc.c
@@ -86,14 +86,14 @@ static av_cold int libvorbis_setup(vorbis_info *vi, AVCodecContext *avctx)
     double cfreq;
     int ret;
 
-    if (avctx->flags & CODEC_FLAG_QSCALE || !avctx->bit_rate) {
+    if (avctx->flags & AV_CODEC_FLAG_QSCALE || !avctx->bit_rate) {
         /* variable bitrate
          * NOTE: we use the oggenc range of -1 to 10 for global_quality for
          *       user convenience, but libvorbis uses -0.1 to 1.0.
          */
         float q = avctx->global_quality / (float)FF_QP2LAMBDA;
         /* default to 3 if the user did not set quality or bitrate */
-        if (!(avctx->flags & CODEC_FLAG_QSCALE))
+        if (!(avctx->flags & AV_CODEC_FLAG_QSCALE))
             q = 3.0;
         if ((ret = vorbis_encode_setup_vbr(vi, avctx->channels,
                                            avctx->sample_rate,
@@ -218,7 +218,7 @@ static av_cold int libvorbis_encode_init(AVCodecContext *avctx)
     }
 
     vorbis_comment_init(&s->vc);
-    if (!(avctx->flags & CODEC_FLAG_BITEXACT))
+    if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT))
         vorbis_comment_add_tag(&s->vc, "encoder", LIBAVCODEC_IDENT);
 
     if ((ret = vorbis_analysis_headerout(&s->vd, &s->vc, &header, &header_comm,
@@ -231,7 +231,7 @@ static av_cold int libvorbis_encode_init(AVCodecContext *avctx)
                                 xiph_len(header_comm.bytes) +
                                 header_code.bytes;
     p = avctx->extradata = av_malloc(avctx->extradata_size +
-                                     FF_INPUT_BUFFER_PADDING_SIZE);
+                                     AV_INPUT_BUFFER_PADDING_SIZE);
     if (!p) {
         ret = AVERROR(ENOMEM);
         goto error;
@@ -338,7 +338,7 @@ static int libvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 
     av_fifo_generic_read(s->pkt_fifo, &op, sizeof(ogg_packet), NULL);
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, op.bytes)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, op.bytes, 0)) < 0)
         return ret;
     av_fifo_generic_read(s->pkt_fifo, avpkt->data, op.bytes, NULL);
 
@@ -372,7 +372,7 @@ AVCodec ff_libvorbis_encoder = {
     .init           = libvorbis_encode_init,
     .encode2        = libvorbis_encode_frame,
     .close          = libvorbis_encode_close,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
     .priv_class     = &vorbis_class,
diff --git a/libavcodec/libvpx.c b/libavcodec/libvpx.c
index e0f9df3c..a60d186f 100644
--- a/libavcodec/libvpx.c
+++ b/libavcodec/libvpx.c
@@ -62,7 +62,7 @@ av_cold void ff_vp9_init_static(AVCodec *codec)
 {
     if (    vpx_codec_version_major() < 1
         || (vpx_codec_version_major() == 1 && vpx_codec_version_minor() < 3))
-        codec->capabilities |= CODEC_CAP_EXPERIMENTAL;
+        codec->capabilities |= AV_CODEC_CAP_EXPERIMENTAL;
     codec->pix_fmts = vp9_pix_fmts_def;
 #if CONFIG_LIBVPX_VP9_ENCODER
     if (    vpx_codec_version_major() > 1
@@ -77,3 +77,60 @@ av_cold void ff_vp9_init_static(AVCodec *codec)
     }
 #endif
 }
+#if 0
+enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img)
+{
+    switch (img) {
+    case VPX_IMG_FMT_RGB24:     return AV_PIX_FMT_RGB24;
+    case VPX_IMG_FMT_RGB565:    return AV_PIX_FMT_RGB565BE;
+    case VPX_IMG_FMT_RGB555:    return AV_PIX_FMT_RGB555BE;
+    case VPX_IMG_FMT_UYVY:      return AV_PIX_FMT_UYVY422;
+    case VPX_IMG_FMT_YUY2:      return AV_PIX_FMT_YUYV422;
+    case VPX_IMG_FMT_YVYU:      return AV_PIX_FMT_YVYU422;
+    case VPX_IMG_FMT_BGR24:     return AV_PIX_FMT_BGR24;
+    case VPX_IMG_FMT_ARGB:      return AV_PIX_FMT_ARGB;
+    case VPX_IMG_FMT_ARGB_LE:   return AV_PIX_FMT_BGRA;
+    case VPX_IMG_FMT_RGB565_LE: return AV_PIX_FMT_RGB565LE;
+    case VPX_IMG_FMT_RGB555_LE: return AV_PIX_FMT_RGB555LE;
+    case VPX_IMG_FMT_I420:      return AV_PIX_FMT_YUV420P;
+    case VPX_IMG_FMT_I422:      return AV_PIX_FMT_YUV422P;
+    case VPX_IMG_FMT_I444:      return AV_PIX_FMT_YUV444P;
+    case VPX_IMG_FMT_444A:      return AV_PIX_FMT_YUVA444P;
+#if VPX_IMAGE_ABI_VERSION >= 3
+    case VPX_IMG_FMT_I440:      return AV_PIX_FMT_YUV440P;
+    case VPX_IMG_FMT_I42016:    return AV_PIX_FMT_YUV420P16BE;
+    case VPX_IMG_FMT_I42216:    return AV_PIX_FMT_YUV422P16BE;
+    case VPX_IMG_FMT_I44416:    return AV_PIX_FMT_YUV444P16BE;
+#endif
+    default:                    return AV_PIX_FMT_NONE;
+    }
+}
+
+vpx_img_fmt_t ff_vpx_pixfmt_to_imgfmt(enum AVPixelFormat pix)
+{
+    switch (pix) {
+    case AV_PIX_FMT_RGB24:        return VPX_IMG_FMT_RGB24;
+    case AV_PIX_FMT_RGB565BE:     return VPX_IMG_FMT_RGB565;
+    case AV_PIX_FMT_RGB555BE:     return VPX_IMG_FMT_RGB555;
+    case AV_PIX_FMT_UYVY422:      return VPX_IMG_FMT_UYVY;
+    case AV_PIX_FMT_YUYV422:      return VPX_IMG_FMT_YUY2;
+    case AV_PIX_FMT_YVYU422:      return VPX_IMG_FMT_YVYU;
+    case AV_PIX_FMT_BGR24:        return VPX_IMG_FMT_BGR24;
+    case AV_PIX_FMT_ARGB:         return VPX_IMG_FMT_ARGB;
+    case AV_PIX_FMT_BGRA:         return VPX_IMG_FMT_ARGB_LE;
+    case AV_PIX_FMT_RGB565LE:     return VPX_IMG_FMT_RGB565_LE;
+    case AV_PIX_FMT_RGB555LE:     return VPX_IMG_FMT_RGB555_LE;
+    case AV_PIX_FMT_YUV420P:      return VPX_IMG_FMT_I420;
+    case AV_PIX_FMT_YUV422P:      return VPX_IMG_FMT_I422;
+    case AV_PIX_FMT_YUV444P:      return VPX_IMG_FMT_I444;
+    case AV_PIX_FMT_YUVA444P:     return VPX_IMG_FMT_444A;
+#if VPX_IMAGE_ABI_VERSION >= 3
+    case AV_PIX_FMT_YUV440P:      return VPX_IMG_FMT_I440;
+    case AV_PIX_FMT_YUV420P16BE:  return VPX_IMG_FMT_I42016;
+    case AV_PIX_FMT_YUV422P16BE:  return VPX_IMG_FMT_I42216;
+    case AV_PIX_FMT_YUV444P16BE:  return VPX_IMG_FMT_I44416;
+#endif
+    default:                      return VPX_IMG_FMT_NONE;
+    }
+}
+#endif
diff --git a/libavcodec/libvpx.h b/libavcodec/libvpx.h
index 36a275c5..22b697fa 100644
--- a/libavcodec/libvpx.h
+++ b/libavcodec/libvpx.h
@@ -21,8 +21,14 @@
 #ifndef AVCODEC_LIBVPX_H
 #define AVCODEC_LIBVPX_H
 
+#include <vpx/vpx_codec.h>
+
 #include "avcodec.h"
 
 void ff_vp9_init_static(AVCodec *codec);
+#if 0
+enum AVPixelFormat ff_vpx_imgfmt_to_pixfmt(vpx_img_fmt_t img);
+vpx_img_fmt_t ff_vpx_pixfmt_to_imgfmt(enum AVPixelFormat pix);
+#endif
 
 #endif /* AVCODEC_LIBVPX_H */
diff --git a/libavcodec/libvpxdec.c b/libavcodec/libvpxdec.c
index c69e8889..b51bfa2f 100644
--- a/libavcodec/libvpxdec.c
+++ b/libavcodec/libvpxdec.c
@@ -32,6 +32,7 @@
 #include "avcodec.h"
 #include "internal.h"
 #include "libvpx.h"
+#include "profiles.h"
 
 typedef struct VP8DecoderContext {
     struct vpx_codec_ctx decoder;
@@ -62,26 +63,50 @@ static av_cold int vpx_init(AVCodecContext *avctx,
 // returns 0 on success, AVERROR_INVALIDDATA otherwise
 static int set_pix_fmt(AVCodecContext *avctx, struct vpx_image *img)
 {
+#if VPX_IMAGE_ABI_VERSION >= 3
+    static const enum AVColorSpace colorspaces[8] = {
+        AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
+        AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
+    };
+#if VPX_IMAGE_ABI_VERSION >= 4
+    static const enum AVColorRange color_ranges[] = {
+        AVCOL_RANGE_MPEG, AVCOL_RANGE_JPEG
+    };
+    avctx->color_range = color_ranges[img->range];
+#endif
+    avctx->colorspace = colorspaces[img->cs];
+#endif
     if (avctx->codec_id == AV_CODEC_ID_VP8 && img->fmt != VPX_IMG_FMT_I420)
         return AVERROR_INVALIDDATA;
     switch (img->fmt) {
     case VPX_IMG_FMT_I420:
+        if (avctx->codec_id == AV_CODEC_ID_VP9)
+            avctx->profile = FF_PROFILE_VP9_0;
         avctx->pix_fmt = AV_PIX_FMT_YUV420P;
         return 0;
 #if CONFIG_LIBVPX_VP9_DECODER
     case VPX_IMG_FMT_I422:
+        avctx->profile = FF_PROFILE_VP9_1;
         avctx->pix_fmt = AV_PIX_FMT_YUV422P;
         return 0;
 #if VPX_IMAGE_ABI_VERSION >= 3
     case VPX_IMG_FMT_I440:
+        avctx->profile = FF_PROFILE_VP9_1;
         avctx->pix_fmt = AV_PIX_FMT_YUV440P;
         return 0;
 #endif
     case VPX_IMG_FMT_I444:
+        avctx->profile = FF_PROFILE_VP9_1;
+#if VPX_IMAGE_ABI_VERSION >= 3
+        avctx->pix_fmt = avctx->colorspace == AVCOL_SPC_RGB ?
+                         AV_PIX_FMT_GBRP : AV_PIX_FMT_YUV444P;
+#else
         avctx->pix_fmt = AV_PIX_FMT_YUV444P;
+#endif
         return 0;
 #ifdef VPX_IMG_FMT_HIGHBITDEPTH
     case VPX_IMG_FMT_I42016:
+        avctx->profile = FF_PROFILE_VP9_2;
         if (img->bit_depth == 10) {
             avctx->pix_fmt = AV_PIX_FMT_YUV420P10LE;
             return 0;
@@ -92,6 +117,7 @@ static int set_pix_fmt(AVCodecContext *avctx, struct vpx_image *img)
             return AVERROR_INVALIDDATA;
         }
     case VPX_IMG_FMT_I42216:
+        avctx->profile = FF_PROFILE_VP9_3;
         if (img->bit_depth == 10) {
             avctx->pix_fmt = AV_PIX_FMT_YUV422P10LE;
             return 0;
@@ -103,6 +129,7 @@ static int set_pix_fmt(AVCodecContext *avctx, struct vpx_image *img)
         }
 #if VPX_IMAGE_ABI_VERSION >= 3
     case VPX_IMG_FMT_I44016:
+        avctx->profile = FF_PROFILE_VP9_3;
         if (img->bit_depth == 10) {
             avctx->pix_fmt = AV_PIX_FMT_YUV440P10LE;
             return 0;
@@ -114,11 +141,22 @@ static int set_pix_fmt(AVCodecContext *avctx, struct vpx_image *img)
         }
 #endif
     case VPX_IMG_FMT_I44416:
+        avctx->profile = FF_PROFILE_VP9_3;
         if (img->bit_depth == 10) {
+#if VPX_IMAGE_ABI_VERSION >= 3
+            avctx->pix_fmt = avctx->colorspace == AVCOL_SPC_RGB ?
+                             AV_PIX_FMT_GBRP10LE : AV_PIX_FMT_YUV444P10LE;
+#else
             avctx->pix_fmt = AV_PIX_FMT_YUV444P10LE;
+#endif
             return 0;
         } else if (img->bit_depth == 12) {
+#if VPX_IMAGE_ABI_VERSION >= 3
+            avctx->pix_fmt = avctx->colorspace == AVCOL_SPC_RGB ?
+                             AV_PIX_FMT_GBRP12LE : AV_PIX_FMT_YUV444P12LE;
+#else
             avctx->pix_fmt = AV_PIX_FMT_YUV444P12LE;
+#endif
             return 0;
         } else {
             return AVERROR_INVALIDDATA;
@@ -201,7 +239,7 @@ AVCodec ff_libvpx_vp8_decoder = {
     .init           = vp8_init,
     .close          = vp8_free,
     .decode         = vp8_decode,
-    .capabilities   = CODEC_CAP_AUTO_THREADS | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_AUTO_THREADS | AV_CODEC_CAP_DR1,
 };
 #endif /* CONFIG_LIBVPX_VP8_DECODER */
 
@@ -220,7 +258,8 @@ AVCodec ff_libvpx_vp9_decoder = {
     .init           = vp9_init,
     .close          = vp8_free,
     .decode         = vp8_decode,
-    .capabilities   = CODEC_CAP_AUTO_THREADS | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_AUTO_THREADS | AV_CODEC_CAP_DR1,
     .init_static_data = ff_vp9_init_static,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
 };
 #endif /* CONFIG_LIBVPX_VP9_DECODER */
diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index adf4b2e2..8992497c 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -32,8 +32,10 @@
 #include "internal.h"
 #include "libavutil/avassert.h"
 #include "libvpx.h"
+#include "profiles.h"
 #include "libavutil/base64.h"
 #include "libavutil/common.h"
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
@@ -86,11 +88,15 @@ typedef struct VP8EncoderContext {
     int arnr_strength;
     int arnr_type;
 
+    int tune;
+
     int lag_in_frames;
     int error_resilient;
     int crf;
     int static_thresh;
     int max_intra_rate;
+    int rc_undershoot_pct;
+    int rc_overshoot_pct;
 
     // VP9-only
     int lossless;
@@ -98,26 +104,21 @@ typedef struct VP8EncoderContext {
     int tile_rows;
     int frame_parallel;
     int aq_mode;
+    int drop_threshold;
+    int noise_sensitivity;
 } VP8Context;
 
 /** String mappings for enum vp8e_enc_control_id */
 static const char *const ctlidstr[] = {
-    [VP8E_UPD_ENTROPY]           = "VP8E_UPD_ENTROPY",
-    [VP8E_UPD_REFERENCE]         = "VP8E_UPD_REFERENCE",
-    [VP8E_USE_REFERENCE]         = "VP8E_USE_REFERENCE",
-    [VP8E_SET_ROI_MAP]           = "VP8E_SET_ROI_MAP",
-    [VP8E_SET_ACTIVEMAP]         = "VP8E_SET_ACTIVEMAP",
-    [VP8E_SET_SCALEMODE]         = "VP8E_SET_SCALEMODE",
     [VP8E_SET_CPUUSED]           = "VP8E_SET_CPUUSED",
     [VP8E_SET_ENABLEAUTOALTREF]  = "VP8E_SET_ENABLEAUTOALTREF",
     [VP8E_SET_NOISE_SENSITIVITY] = "VP8E_SET_NOISE_SENSITIVITY",
-    [VP8E_SET_SHARPNESS]         = "VP8E_SET_SHARPNESS",
     [VP8E_SET_STATIC_THRESHOLD]  = "VP8E_SET_STATIC_THRESHOLD",
     [VP8E_SET_TOKEN_PARTITIONS]  = "VP8E_SET_TOKEN_PARTITIONS",
-    [VP8E_GET_LAST_QUANTIZER]    = "VP8E_GET_LAST_QUANTIZER",
     [VP8E_SET_ARNR_MAXFRAMES]    = "VP8E_SET_ARNR_MAXFRAMES",
     [VP8E_SET_ARNR_STRENGTH]     = "VP8E_SET_ARNR_STRENGTH",
     [VP8E_SET_ARNR_TYPE]         = "VP8E_SET_ARNR_TYPE",
+    [VP8E_SET_TUNING]            = "VP8E_SET_TUNING",
     [VP8E_SET_CQ_LEVEL]          = "VP8E_SET_CQ_LEVEL",
     [VP8E_SET_MAX_INTRA_BITRATE_PCT] = "VP8E_SET_MAX_INTRA_BITRATE_PCT",
 #if CONFIG_LIBVPX_VP9_ENCODER
@@ -126,6 +127,12 @@ static const char *const ctlidstr[] = {
     [VP9E_SET_TILE_ROWS]               = "VP9E_SET_TILE_ROWS",
     [VP9E_SET_FRAME_PARALLEL_DECODING] = "VP9E_SET_FRAME_PARALLEL_DECODING",
     [VP9E_SET_AQ_MODE]                 = "VP9E_SET_AQ_MODE",
+#if VPX_ENCODER_ABI_VERSION > 8
+    [VP9E_SET_COLOR_SPACE]             = "VP9E_SET_COLOR_SPACE",
+#endif
+#if VPX_ENCODER_ABI_VERSION >= 11
+    [VP9E_SET_COLOR_RANGE]             = "VP9E_SET_COLOR_RANGE",
+#endif
 #endif
 };
 
@@ -260,7 +267,6 @@ static av_cold int vp8_free(AVCodecContext *avctx)
     if (ctx->is_alpha)
         vpx_codec_destroy(&ctx->encoder_alpha);
     av_freep(&ctx->twopass_stats.buf);
-    av_frame_free(&avctx->coded_frame);
     av_freep(&avctx->stats_out);
     free_frame_list(ctx->coded_frame_list);
     return 0;
@@ -347,15 +353,57 @@ static int set_pix_fmt(AVCodecContext *avctx, vpx_codec_caps_t codec_caps,
     av_log(avctx, AV_LOG_ERROR, "Unsupported pixel format.\n");
     return AVERROR_INVALIDDATA;
 }
+
+#if VPX_ENCODER_ABI_VERSION > 8
+static void set_colorspace(AVCodecContext *avctx)
+{
+    enum vpx_color_space vpx_cs;
+
+    switch (avctx->colorspace) {
+    case AVCOL_SPC_RGB:         vpx_cs = VPX_CS_SRGB;      break;
+    case AVCOL_SPC_BT709:       vpx_cs = VPX_CS_BT_709;    break;
+    case AVCOL_SPC_UNSPECIFIED: vpx_cs = VPX_CS_UNKNOWN;   break;
+    case AVCOL_SPC_RESERVED:    vpx_cs = VPX_CS_RESERVED;  break;
+    case AVCOL_SPC_BT470BG:     vpx_cs = VPX_CS_BT_601;    break;
+    case AVCOL_SPC_SMPTE170M:   vpx_cs = VPX_CS_SMPTE_170; break;
+    case AVCOL_SPC_SMPTE240M:   vpx_cs = VPX_CS_SMPTE_240; break;
+    case AVCOL_SPC_BT2020_NCL:  vpx_cs = VPX_CS_BT_2020;   break;
+    default:
+        av_log(avctx, AV_LOG_WARNING, "Unsupported colorspace (%d)\n",
+               avctx->colorspace);
+        return;
+    }
+    codecctl_int(avctx, VP9E_SET_COLOR_SPACE, vpx_cs);
+}
+#endif
+
+#if VPX_ENCODER_ABI_VERSION >= 11
+static void set_color_range(AVCodecContext *avctx)
+{
+    enum vpx_color_range vpx_cr;
+    switch (avctx->color_range) {
+    case AVCOL_RANGE_UNSPECIFIED:
+    case AVCOL_RANGE_MPEG:       vpx_cr = VPX_CR_STUDIO_RANGE; break;
+    case AVCOL_RANGE_JPEG:       vpx_cr = VPX_CR_FULL_RANGE;   break;
+    default:
+        av_log(avctx, AV_LOG_WARNING, "Unsupported color range (%d)\n",
+               avctx->color_range);
+        return;
+    }
+
+    codecctl_int(avctx, VP9E_SET_COLOR_RANGE, vpx_cr);
+}
+#endif
 #endif
 
 static av_cold int vpx_init(AVCodecContext *avctx,
                             const struct vpx_codec_iface *iface)
 {
     VP8Context *ctx = avctx->priv_data;
-    struct vpx_codec_enc_cfg enccfg;
+    struct vpx_codec_enc_cfg enccfg = { 0 };
     struct vpx_codec_enc_cfg enccfg_alpha;
-    vpx_codec_flags_t flags = (avctx->flags & CODEC_FLAG_PSNR) ? VPX_CODEC_USE_PSNR : 0;
+    vpx_codec_flags_t flags = (avctx->flags & AV_CODEC_FLAG_PSNR) ? VPX_CODEC_USE_PSNR : 0;
+    AVCPBProperties *cpb_props;
     int res;
     vpx_img_fmt_t img_fmt = VPX_IMG_FMT_I420;
 #if CONFIG_LIBVPX_VP9_ENCODER
@@ -396,9 +444,9 @@ static av_cold int vpx_init(AVCodecContext *avctx,
     enccfg.g_threads      = avctx->thread_count;
     enccfg.g_lag_in_frames= ctx->lag_in_frames;
 
-    if (avctx->flags & CODEC_FLAG_PASS1)
+    if (avctx->flags & AV_CODEC_FLAG_PASS1)
         enccfg.g_pass = VPX_RC_FIRST_PASS;
-    else if (avctx->flags & CODEC_FLAG_PASS2)
+    else if (avctx->flags & AV_CODEC_FLAG_PASS2)
         enccfg.g_pass = VPX_RC_LAST_PASS;
     else
         enccfg.g_pass = VPX_RC_ONE_PASS;
@@ -454,10 +502,16 @@ static av_cold int vpx_init(AVCodecContext *avctx,
         }
     }
 
-    enccfg.rc_dropframe_thresh = avctx->frame_skip_threshold;
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->frame_skip_threshold)
+        ctx->drop_threshold = avctx->frame_skip_threshold;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    enccfg.rc_dropframe_thresh = ctx->drop_threshold;
 
     //0-100 (0 => CBR, 100 => VBR)
-    enccfg.rc_2pass_vbr_bias_pct           = round(avctx->qcompress * 100);
+    enccfg.rc_2pass_vbr_bias_pct           = lrint(avctx->qcompress * 100);
     if (avctx->bit_rate)
         enccfg.rc_2pass_vbr_minsection_pct =
             avctx->rc_min_rate * 100LL / avctx->bit_rate;
@@ -472,7 +526,19 @@ static av_cold int vpx_init(AVCodecContext *avctx,
         enccfg.rc_buf_initial_sz =
             avctx->rc_initial_buffer_occupancy * 1000LL / avctx->bit_rate;
     enccfg.rc_buf_optimal_sz     = enccfg.rc_buf_sz * 5 / 6;
-    enccfg.rc_undershoot_pct     = round(avctx->rc_buffer_aggressivity * 100);
+#if FF_API_MPV_OPT
+    FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->rc_buffer_aggressivity != 1.0) {
+        av_log(avctx, AV_LOG_WARNING, "The rc_buffer_aggressivity option is "
+               "deprecated, use the undershoot-pct private option instead.\n");
+        enccfg.rc_undershoot_pct = lrint(avctx->rc_buffer_aggressivity * 100);
+    }
+    FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    if (ctx->rc_undershoot_pct >= 0)
+        enccfg.rc_undershoot_pct = ctx->rc_undershoot_pct;
+    if (ctx->rc_overshoot_pct >= 0)
+        enccfg.rc_overshoot_pct = ctx->rc_overshoot_pct;
 
     //_enc_init() will balk if kf_min_dist differs from max w/VPX_KF_AUTO
     if (avctx->keyint_min >= 0 && avctx->keyint_min == avctx->gop_size)
@@ -548,8 +614,17 @@ static av_cold int vpx_init(AVCodecContext *avctx,
         codecctl_int(avctx, VP8E_SET_ARNR_STRENGTH,    ctx->arnr_strength);
     if (ctx->arnr_type >= 0)
         codecctl_int(avctx, VP8E_SET_ARNR_TYPE,        ctx->arnr_type);
-    if (avctx->codec_id == AV_CODEC_ID_VP8) {
-        codecctl_int(avctx, VP8E_SET_NOISE_SENSITIVITY, avctx->noise_reduction);
+    if (ctx->tune >= 0)
+        codecctl_int(avctx, VP8E_SET_TUNING,           ctx->tune);
+
+    if (CONFIG_LIBVPX_VP8_ENCODER && avctx->codec_id == AV_CODEC_ID_VP8) {
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+        if (avctx->noise_reduction)
+            ctx->noise_sensitivity = avctx->noise_reduction;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        codecctl_int(avctx, VP8E_SET_NOISE_SENSITIVITY, ctx->noise_sensitivity);
         codecctl_int(avctx, VP8E_SET_TOKEN_PARTITIONS,  av_log2(avctx->slices));
     }
 #if FF_API_MPV_OPT
@@ -579,6 +654,12 @@ static av_cold int vpx_init(AVCodecContext *avctx,
             codecctl_int(avctx, VP9E_SET_FRAME_PARALLEL_DECODING, ctx->frame_parallel);
         if (ctx->aq_mode >= 0)
             codecctl_int(avctx, VP9E_SET_AQ_MODE, ctx->aq_mode);
+#if VPX_ENCODER_ABI_VERSION > 8
+        set_colorspace(avctx);
+#endif
+#if VPX_ENCODER_ABI_VERSION >= 11
+        set_color_range(avctx);
+#endif
     }
 #endif
 
@@ -596,12 +677,18 @@ static av_cold int vpx_init(AVCodecContext *avctx,
         vpx_img_wrap(&ctx->rawimg_alpha, VPX_IMG_FMT_I420, avctx->width, avctx->height, 1,
                      (unsigned char*)1);
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame) {
-        av_log(avctx, AV_LOG_ERROR, "Error allocating coded frame\n");
-        vp8_free(avctx);
+    cpb_props = ff_add_cpb_side_data(avctx);
+    if (!cpb_props)
         return AVERROR(ENOMEM);
+
+    if (enccfg.rc_end_usage == VPX_CBR ||
+        enccfg.g_pass != VPX_RC_ONE_PASS) {
+        cpb_props->max_bitrate = avctx->rc_max_rate;
+        cpb_props->min_bitrate = avctx->rc_min_rate;
+        cpb_props->avg_bitrate = avctx->bit_rate;
     }
+    cpb_props->buffer_size = avctx->rc_buffer_size;
+
     return 0;
 }
 
@@ -648,31 +735,54 @@ static inline void cx_pktcpy(struct FrameListData *dst,
  * @return a negative AVERROR on error
  */
 static int storeframe(AVCodecContext *avctx, struct FrameListData *cx_frame,
-                      AVPacket *pkt, AVFrame *coded_frame)
+                      AVPacket *pkt)
 {
-    int ret = ff_alloc_packet2(avctx, pkt, cx_frame->sz);
+    int ret = ff_alloc_packet2(avctx, pkt, cx_frame->sz, 0);
     uint8_t *side_data;
     if (ret >= 0) {
+        int pict_type;
         memcpy(pkt->data, cx_frame->buf, pkt->size);
-        pkt->pts = pkt->dts    = cx_frame->pts;
-        coded_frame->pts       = cx_frame->pts;
-        coded_frame->key_frame = !!(cx_frame->flags & VPX_FRAME_IS_KEY);
+        pkt->pts = pkt->dts = cx_frame->pts;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+        avctx->coded_frame->pts       = cx_frame->pts;
+        avctx->coded_frame->key_frame = !!(cx_frame->flags & VPX_FRAME_IS_KEY);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+        if (!!(cx_frame->flags & VPX_FRAME_IS_KEY)) {
+            pict_type = AV_PICTURE_TYPE_I;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+            avctx->coded_frame->pict_type = pict_type;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+            pkt->flags |= AV_PKT_FLAG_KEY;
+        } else {
+            pict_type = AV_PICTURE_TYPE_P;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+            avctx->coded_frame->pict_type = pict_type;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        }
 
-        if (coded_frame->key_frame) {
-            coded_frame->pict_type = AV_PICTURE_TYPE_I;
-            pkt->flags            |= AV_PKT_FLAG_KEY;
-        } else
-            coded_frame->pict_type = AV_PICTURE_TYPE_P;
+        ff_side_data_set_encoder_stats(pkt, 0, cx_frame->sse + 1,
+                                       cx_frame->have_sse ? 3 : 0, pict_type);
 
         if (cx_frame->have_sse) {
             int i;
             /* Beware of the Y/U/V/all order! */
-            coded_frame->error[0] = cx_frame->sse[1];
-            coded_frame->error[1] = cx_frame->sse[2];
-            coded_frame->error[2] = cx_frame->sse[3];
-            coded_frame->error[3] = 0;    // alpha
-            for (i = 0; i < 4; ++i) {
-                avctx->error[i] += coded_frame->error[i];
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+            avctx->coded_frame->error[0] = cx_frame->sse[1];
+            avctx->coded_frame->error[1] = cx_frame->sse[2];
+            avctx->coded_frame->error[2] = cx_frame->sse[3];
+            avctx->coded_frame->error[3] = 0;    // alpha
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+            for (i = 0; i < 3; ++i) {
+                avctx->error[i] += cx_frame->sse[i + 1];
             }
             cx_frame->have_sse = 0;
         }
@@ -681,7 +791,7 @@ static int storeframe(AVCodecContext *avctx, struct FrameListData *cx_frame,
                                                 AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,
                                                 cx_frame->sz_alpha + 8);
             if(!side_data) {
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 av_free(pkt);
                 return AVERROR(ENOMEM);
             }
@@ -702,8 +812,7 @@ static int storeframe(AVCodecContext *avctx, struct FrameListData *cx_frame,
  * @return AVERROR(EINVAL) on output size error
  * @return AVERROR(ENOMEM) on coded frame queue data allocation error
  */
-static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out,
-                        AVFrame *coded_frame)
+static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out)
 {
     VP8Context *ctx = avctx->priv_data;
     const struct vpx_codec_cx_pkt *pkt;
@@ -715,7 +824,7 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out,
     if (ctx->coded_frame_list) {
         struct FrameListData *cx_frame = ctx->coded_frame_list;
         /* return the leading frame if we've already begun queueing */
-        size = storeframe(avctx, cx_frame, pkt_out, coded_frame);
+        size = storeframe(avctx, cx_frame, pkt_out);
         if (size < 0)
             return size;
         ctx->coded_frame_list = cx_frame->next;
@@ -736,7 +845,7 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out,
                    provided a frame for output */
                 av_assert0(!ctx->coded_frame_list);
                 cx_pktcpy(&cx_frame, pkt, pkt_alpha, ctx);
-                size = storeframe(avctx, &cx_frame, pkt_out, coded_frame);
+                size = storeframe(avctx, &cx_frame, pkt_out);
                 if (size < 0)
                     return size;
             } else {
@@ -863,9 +972,9 @@ static int vp8_encode(AVCodecContext *avctx, AVPacket *pkt,
         }
     }
 
-    coded_size = queue_frames(avctx, pkt, avctx->coded_frame);
+    coded_size = queue_frames(avctx, pkt);
 
-    if (!frame && avctx->flags & CODEC_FLAG_PASS1) {
+    if (!frame && avctx->flags & AV_CODEC_FLAG_PASS1) {
         unsigned int b64_size = AV_BASE64_SIZE(ctx->twopass_stats.sz);
 
         avctx->stats_out = av_malloc(b64_size);
@@ -896,9 +1005,8 @@ static int vp8_encode(AVCodecContext *avctx, AVPacket *pkt,
 #endif
 
 #define COMMON_OPTIONS \
-    { "cpu-used",        "Quality/Speed ratio modifier",           OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1},       -16,     16,      VE}, \
     { "auto-alt-ref",    "Enable use of alternate reference " \
-                         "frames (2-pass only)",                   OFFSET(auto_alt_ref),    AV_OPT_TYPE_INT, {.i64 = -1},      -1,      1,       VE}, \
+                         "frames (2-pass only)",                   OFFSET(auto_alt_ref),    AV_OPT_TYPE_BOOL, {.i64 = -1},     -1,      1,       VE}, \
     { "lag-in-frames",   "Number of frames to look ahead for " \
                          "alternate reference frame selection",    OFFSET(lag_in_frames),   AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE}, \
     { "arnr-maxframes",  "altref noise reduction max frame count", OFFSET(arnr_max_frames), AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE}, \
@@ -907,6 +1015,9 @@ static int vp8_encode(AVCodecContext *avctx, AVPacket *pkt,
     { "backward",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, VE, "arnr_type" }, \
     { "forward",         NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2}, 0, 0, VE, "arnr_type" }, \
     { "centered",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 3}, 0, 0, VE, "arnr_type" }, \
+    { "tune",            "Tune the encoding to a specific scenario", OFFSET(tune),          AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE, "tune"}, \
+    { "psnr",            NULL, 0, AV_OPT_TYPE_CONST, {.i64 = VP8_TUNE_PSNR}, 0, 0, VE, "tune"}, \
+    { "ssim",            NULL, 0, AV_OPT_TYPE_CONST, {.i64 = VP8_TUNE_SSIM}, 0, 0, VE, "tune"}, \
     { "deadline",        "Time to spend encoding, in microseconds.", OFFSET(deadline),      AV_OPT_TYPE_INT, {.i64 = VPX_DL_GOOD_QUALITY}, INT_MIN, INT_MAX, VE, "quality"}, \
     { "best",            NULL, 0, AV_OPT_TYPE_CONST, {.i64 = VPX_DL_BEST_QUALITY}, 0, 0, VE, "quality"}, \
     { "good",            NULL, 0, AV_OPT_TYPE_CONST, {.i64 = VPX_DL_GOOD_QUALITY}, 0, 0, VE, "quality"}, \
@@ -920,13 +1031,17 @@ static int vp8_encode(AVCodecContext *avctx, AVPacket *pkt,
                          " is still done over the partition boundary.",       0, AV_OPT_TYPE_CONST, {.i64 = VPX_ERROR_RESILIENT_PARTITIONS}, 0, 0, VE, "er"}, \
     { "crf",              "Select the quality for constant quality mode", offsetof(VP8Context, crf), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 63, VE }, \
     { "static-thresh",    "A change threshold on blocks below which they will be skipped by the encoder", OFFSET(static_thresh), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE }, \
+    { "drop-threshold",   "Frame drop threshold", offsetof(VP8Context, drop_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, VE }, \
+    { "noise-sensitivity", "Noise sensitivity", OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE}, \
+    { "undershoot-pct",  "Datarate undershoot (min) target (%)", OFFSET(rc_undershoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 100, VE }, \
+    { "overshoot-pct",   "Datarate overshoot (max) target (%)", OFFSET(rc_overshoot_pct), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1000, VE }, \
 
 #define LEGACY_OPTIONS \
     {"speed", "", offsetof(VP8Context, cpu_used), AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE}, \
     {"quality", "", offsetof(VP8Context, deadline), AV_OPT_TYPE_INT, {.i64 = VPX_DL_GOOD_QUALITY}, INT_MIN, INT_MAX, VE, "quality"}, \
-    {"vp8flags", "", offsetof(VP8Context, flags), FF_OPT_TYPE_FLAGS, {.i64 = 0}, 0, UINT_MAX, VE, "flags"}, \
-    {"error_resilient", "enable error resilience", 0, FF_OPT_TYPE_CONST, {.dbl = VP8F_ERROR_RESILIENT}, INT_MIN, INT_MAX, VE, "flags"}, \
-    {"altref", "enable use of alternate reference frames (VP8/2-pass only)", 0, FF_OPT_TYPE_CONST, {.dbl = VP8F_AUTO_ALT_REF}, INT_MIN, INT_MAX, VE, "flags"}, \
+    {"vp8flags", "", offsetof(VP8Context, flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, 0, UINT_MAX, VE, "flags"}, \
+    {"error_resilient", "enable error resilience", 0, AV_OPT_TYPE_CONST, {.i64 = VP8F_ERROR_RESILIENT}, INT_MIN, INT_MAX, VE, "flags"}, \
+    {"altref", "enable use of alternate reference frames (VP8/2-pass only)", 0, AV_OPT_TYPE_CONST, {.i64 = VP8F_AUTO_ALT_REF}, INT_MIN, INT_MAX, VE, "flags"}, \
     {"arnr_max_frames", "altref noise reduction max frame count", offsetof(VP8Context, arnr_max_frames), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 15, VE}, \
     {"arnr_strength", "altref noise reduction filter strength", offsetof(VP8Context, arnr_strength), AV_OPT_TYPE_INT, {.i64 = 3}, 0, 6, VE}, \
     {"arnr_type", "altref noise reduction filter type", offsetof(VP8Context, arnr_type), AV_OPT_TYPE_INT, {.i64 = 3}, 1, 3, VE}, \
@@ -935,6 +1050,7 @@ static int vp8_encode(AVCodecContext *avctx, AVPacket *pkt,
 #if CONFIG_LIBVPX_VP8_ENCODER
 static const AVOption vp8_options[] = {
     COMMON_OPTIONS
+    { "cpu-used",        "Quality/Speed ratio modifier",                OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE},
     LEGACY_OPTIONS
     { NULL }
 };
@@ -943,15 +1059,16 @@ static const AVOption vp8_options[] = {
 #if CONFIG_LIBVPX_VP9_ENCODER
 static const AVOption vp9_options[] = {
     COMMON_OPTIONS
+    { "cpu-used",        "Quality/Speed ratio modifier",                OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1},  -8, 8, VE},
     { "lossless",        "Lossless mode",                               OFFSET(lossless),        AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, VE},
     { "tile-columns",    "Number of tile columns to use, log2",         OFFSET(tile_columns),    AV_OPT_TYPE_INT, {.i64 = -1}, -1, 6, VE},
     { "tile-rows",       "Number of tile rows to use, log2",            OFFSET(tile_rows),       AV_OPT_TYPE_INT, {.i64 = -1}, -1, 2, VE},
-    { "frame-parallel",  "Enable frame parallel decodability features", OFFSET(frame_parallel),  AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, VE},
+    { "frame-parallel",  "Enable frame parallel decodability features", OFFSET(frame_parallel),  AV_OPT_TYPE_BOOL,{.i64 = -1}, -1, 1, VE},
     { "aq-mode",         "adaptive quantization mode",                  OFFSET(aq_mode),         AV_OPT_TYPE_INT, {.i64 = -1}, -1, 3, VE, "aq_mode"},
-    { "none",            "Aq not used",         0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 0, VE, "aq_mode" }, \
-    { "variance",        "Variance based Aq",   0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, VE, "aq_mode" }, \
-    { "complexity",      "Complexity based Aq", 0, AV_OPT_TYPE_CONST, {.i64 = 2}, 0, 0, VE, "aq_mode" }, \
-    { "cyclic",          "Cyclic Refresh Aq",   0, AV_OPT_TYPE_CONST, {.i64 = 3}, 0, 0, VE, "aq_mode" }, \
+    { "none",            "Aq not used",         0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 0, VE, "aq_mode" },
+    { "variance",        "Variance based Aq",   0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, VE, "aq_mode" },
+    { "complexity",      "Complexity based Aq", 0, AV_OPT_TYPE_CONST, {.i64 = 2}, 0, 0, VE, "aq_mode" },
+    { "cyclic",          "Cyclic Refresh Aq",   0, AV_OPT_TYPE_CONST, {.i64 = 3}, 0, 0, VE, "aq_mode" },
     LEGACY_OPTIONS
     { NULL }
 };
@@ -990,7 +1107,7 @@ AVCodec ff_libvpx_vp8_encoder = {
     .init           = vp8_init,
     .encode2        = vp8_encode,
     .close          = vp8_free,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_AUTO_THREADS,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_NONE },
     .priv_class     = &class_vp8,
     .defaults       = defaults,
@@ -1019,7 +1136,8 @@ AVCodec ff_libvpx_vp9_encoder = {
     .init           = vp9_init,
     .encode2        = vp8_encode,
     .close          = vp8_free,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_AUTO_THREADS,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
     .priv_class     = &class_vp9,
     .defaults       = defaults,
     .init_static_data = ff_vp9_init_static,
diff --git a/libavcodec/libwavpackenc.c b/libavcodec/libwavpackenc.c
index 77d98a21..6d570898 100644
--- a/libavcodec/libwavpackenc.c
+++ b/libavcodec/libwavpackenc.c
@@ -188,7 +188,7 @@ AVCodec ff_libwavpack_encoder = {
     .init           = wavpack_encode_init,
     .encode2        = wavpack_encode_frame,
     .close          = wavpack_encode_close,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32,
                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/libwebpenc.c b/libavcodec/libwebpenc.c
index db96e163..0bcf628e 100644
--- a/libavcodec/libwebpenc.c
+++ b/libavcodec/libwebpenc.c
@@ -57,7 +57,7 @@ static int libwebp_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         goto end;
     }
 
-    ret = ff_alloc_packet(pkt, mw.size);
+    ret = ff_alloc_packet2(avctx, pkt, mw.size, mw.size);
     if (ret < 0)
         goto end;
     memcpy(pkt->data, mw.mem, mw.size);
diff --git a/libavcodec/libwebpenc_animencoder.c b/libavcodec/libwebpenc_animencoder.c
index e9582016..61ecae8a 100644
--- a/libavcodec/libwebpenc_animencoder.c
+++ b/libavcodec/libwebpenc_animencoder.c
@@ -41,8 +41,9 @@ static av_cold int libwebp_anim_encode_init(AVCodecContext *avctx)
     int ret = ff_libwebp_encode_init_common(avctx);
     if (!ret) {
         LibWebPAnimContext *s = avctx->priv_data;
-        WebPAnimEncoderOptions enc_options;
+        WebPAnimEncoderOptions enc_options = { 0 };
         WebPAnimEncoderOptionsInit(&enc_options);
+        enc_options.verbose = av_log_get_level() >= AV_LOG_VERBOSE;
         // TODO(urvang): Expose some options on command-line perhaps.
         s->enc = WebPAnimEncoderNew(avctx->width, avctx->height, &enc_options);
         if (!s->enc)
@@ -66,7 +67,7 @@ static int libwebp_anim_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
             WebPData assembled_data = { 0 };
             ret = WebPAnimEncoderAssemble(s->enc, &assembled_data);
             if (ret) {
-                ret = ff_alloc_packet(pkt, assembled_data.size);
+                ret = ff_alloc_packet2(avctx, pkt, assembled_data.size, assembled_data.size);
                 if (ret < 0)
                     return ret;
                 memcpy(pkt->data, assembled_data.bytes, assembled_data.size);
@@ -139,7 +140,7 @@ AVCodec ff_libwebp_anim_encoder = {
     .init           = libwebp_anim_encode_init,
     .encode2        = libwebp_anim_encode_frame,
     .close          = libwebp_anim_encode_close,
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_RGB32,
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P,
diff --git a/libavcodec/libwebpenc_common.c b/libavcodec/libwebpenc_common.c
index a76b6da5..21d7adaf 100644
--- a/libavcodec/libwebpenc_common.c
+++ b/libavcodec/libwebpenc_common.c
@@ -167,8 +167,8 @@ int ff_libwebp_get_frame(AVCodecContext *avctx, LibWebPContextCommon *s,
                         int sse = 0;
                         for (p = 0; p < 3; p++) {
                             int bs2 = bs >> !!p;
-                            int w = FF_CEIL_RSHIFT(frame->width , !!p);
-                            int h = FF_CEIL_RSHIFT(frame->height, !!p);
+                            int w = AV_CEIL_RSHIFT(frame->width , !!p);
+                            int h = AV_CEIL_RSHIFT(frame->height, !!p);
                             int xs = x >> !!p;
                             int ys = y >> !!p;
                             for (y2 = ys; y2 < FFMIN(ys + bs2, h); y2++) {
@@ -183,8 +183,8 @@ int ff_libwebp_get_frame(AVCodecContext *avctx, LibWebPContextCommon *s,
                         if (!skip)
                             for (p = 0; p < 3; p++) {
                                 int bs2 = bs >> !!p;
-                                int w = FF_CEIL_RSHIFT(frame->width , !!p);
-                                int h = FF_CEIL_RSHIFT(frame->height, !!p);
+                                int w = AV_CEIL_RSHIFT(frame->width , !!p);
+                                int h = AV_CEIL_RSHIFT(frame->height, !!p);
                                 int xs = x >> !!p;
                                 int ys = y >> !!p;
                                 for (y2 = ys; y2 < FFMIN(ys + bs2, h); y2++) {
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index d25f69af..5030d65f 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -25,6 +25,7 @@
 #include "libavutil/mem.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/stereo3d.h"
+#include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "internal.h"
 
@@ -81,6 +82,15 @@ typedef struct X264Context {
     char *stats;
     int nal_hrd;
     int avcintra_class;
+    int motion_est;
+    int forced_idr;
+    int coder;
+    int a53_cc;
+    int b_frame_strategy;
+    int chroma_offset;
+    int scenechange_threshold;
+    int noise_reduction;
+
     char *x264_params;
 } X264Context;
 
@@ -113,7 +123,7 @@ static int encode_nals(AVCodecContext *ctx, AVPacket *pkt,
     for (i = 0; i < nnal; i++)
         size += nals[i].i_payload;
 
-    if ((ret = ff_alloc_packet2(ctx, pkt, size)) < 0)
+    if ((ret = ff_alloc_packet2(ctx, pkt, size, 0)) < 0)
         return ret;
 
     p = pkt->data;
@@ -158,6 +168,94 @@ static int avfmt2_num_planes(int avfmt)
     }
 }
 
+static void reconfig_encoder(AVCodecContext *ctx, const AVFrame *frame)
+{
+    X264Context *x4 = ctx->priv_data;
+    AVFrameSideData *side_data;
+
+
+  if (x4->avcintra_class < 0) {
+    if (x4->params.b_interlaced && x4->params.b_tff != frame->top_field_first) {
+
+        x4->params.b_tff = frame->top_field_first;
+        x264_encoder_reconfig(x4->enc, &x4->params);
+    }
+    if (x4->params.vui.i_sar_height*ctx->sample_aspect_ratio.num != ctx->sample_aspect_ratio.den * x4->params.vui.i_sar_width) {
+        x4->params.vui.i_sar_height = ctx->sample_aspect_ratio.den;
+        x4->params.vui.i_sar_width  = ctx->sample_aspect_ratio.num;
+        x264_encoder_reconfig(x4->enc, &x4->params);
+    }
+
+    if (x4->params.rc.i_vbv_buffer_size != ctx->rc_buffer_size / 1000 ||
+        x4->params.rc.i_vbv_max_bitrate != ctx->rc_max_rate    / 1000) {
+        x4->params.rc.i_vbv_buffer_size = ctx->rc_buffer_size / 1000;
+        x4->params.rc.i_vbv_max_bitrate = ctx->rc_max_rate    / 1000;
+        x264_encoder_reconfig(x4->enc, &x4->params);
+    }
+
+    if (x4->params.rc.i_rc_method == X264_RC_ABR &&
+        x4->params.rc.i_bitrate != ctx->bit_rate / 1000) {
+        x4->params.rc.i_bitrate = ctx->bit_rate / 1000;
+        x264_encoder_reconfig(x4->enc, &x4->params);
+    }
+
+    if (x4->crf >= 0 &&
+        x4->params.rc.i_rc_method == X264_RC_CRF &&
+        x4->params.rc.f_rf_constant != x4->crf) {
+        x4->params.rc.f_rf_constant = x4->crf;
+        x264_encoder_reconfig(x4->enc, &x4->params);
+    }
+
+    if (x4->params.rc.i_rc_method == X264_RC_CQP &&
+        x4->cqp >= 0 &&
+        x4->params.rc.i_qp_constant != x4->cqp) {
+        x4->params.rc.i_qp_constant = x4->cqp;
+        x264_encoder_reconfig(x4->enc, &x4->params);
+    }
+
+    if (x4->crf_max >= 0 &&
+        x4->params.rc.f_rf_constant_max != x4->crf_max) {
+        x4->params.rc.f_rf_constant_max = x4->crf_max;
+        x264_encoder_reconfig(x4->enc, &x4->params);
+    }
+  }
+
+    side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_STEREO3D);
+    if (side_data) {
+        AVStereo3D *stereo = (AVStereo3D *)side_data->data;
+        int fpa_type;
+
+        switch (stereo->type) {
+        case AV_STEREO3D_CHECKERBOARD:
+            fpa_type = 0;
+            break;
+        case AV_STEREO3D_COLUMNS:
+            fpa_type = 1;
+            break;
+        case AV_STEREO3D_LINES:
+            fpa_type = 2;
+            break;
+        case AV_STEREO3D_SIDEBYSIDE:
+            fpa_type = 3;
+            break;
+        case AV_STEREO3D_TOPBOTTOM:
+            fpa_type = 4;
+            break;
+        case AV_STEREO3D_FRAMESEQUENCE:
+            fpa_type = 5;
+            break;
+        default:
+            fpa_type = -1;
+            break;
+        }
+
+        if (fpa_type != x4->params.i_frame_packing) {
+            x4->params.i_frame_packing = fpa_type;
+            x264_encoder_reconfig(x4->enc, &x4->params);
+        }
+    }
+}
+
 static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
                       int *got_packet)
 {
@@ -165,6 +263,7 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
     x264_nal_t *nal;
     int nnal, i, ret;
     x264_picture_t pic_out = {0};
+    int pict_type;
     AVFrameSideData *side_data;
 
     x264_picture_init( &x4->pic );
@@ -180,92 +279,65 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
         }
 
         x4->pic.i_pts  = frame->pts;
-        x4->pic.i_type =
-            frame->pict_type == AV_PICTURE_TYPE_I ? X264_TYPE_KEYFRAME :
-            frame->pict_type == AV_PICTURE_TYPE_P ? X264_TYPE_P :
-            frame->pict_type == AV_PICTURE_TYPE_B ? X264_TYPE_B :
-                                            X264_TYPE_AUTO;
-
-        if (x4->avcintra_class < 0) {
-        if (x4->params.b_interlaced && x4->params.b_tff != frame->top_field_first) {
-            x4->params.b_tff = frame->top_field_first;
-            x264_encoder_reconfig(x4->enc, &x4->params);
-        }
-        if (x4->params.vui.i_sar_height*ctx->sample_aspect_ratio.num != ctx->sample_aspect_ratio.den * x4->params.vui.i_sar_width) {
-            x4->params.vui.i_sar_height = ctx->sample_aspect_ratio.den;
-            x4->params.vui.i_sar_width  = ctx->sample_aspect_ratio.num;
-            x264_encoder_reconfig(x4->enc, &x4->params);
-        }
-
-        if (x4->params.rc.i_vbv_buffer_size != ctx->rc_buffer_size / 1000 ||
-            x4->params.rc.i_vbv_max_bitrate != ctx->rc_max_rate    / 1000) {
-            x4->params.rc.i_vbv_buffer_size = ctx->rc_buffer_size / 1000;
-            x4->params.rc.i_vbv_max_bitrate = ctx->rc_max_rate    / 1000;
-            x264_encoder_reconfig(x4->enc, &x4->params);
-        }
-
-        if (x4->params.rc.i_rc_method == X264_RC_ABR &&
-            x4->params.rc.i_bitrate != ctx->bit_rate / 1000) {
-            x4->params.rc.i_bitrate = ctx->bit_rate / 1000;
-            x264_encoder_reconfig(x4->enc, &x4->params);
-        }
 
-        if (x4->crf >= 0 &&
-            x4->params.rc.i_rc_method == X264_RC_CRF &&
-            x4->params.rc.f_rf_constant != x4->crf) {
-            x4->params.rc.f_rf_constant = x4->crf;
-            x264_encoder_reconfig(x4->enc, &x4->params);
-        }
-
-        if (x4->params.rc.i_rc_method == X264_RC_CQP &&
-            x4->cqp >= 0 &&
-            x4->params.rc.i_qp_constant != x4->cqp) {
-            x4->params.rc.i_qp_constant = x4->cqp;
-            x264_encoder_reconfig(x4->enc, &x4->params);
-        }
-
-        if (x4->crf_max >= 0 &&
-            x4->params.rc.f_rf_constant_max != x4->crf_max) {
-            x4->params.rc.f_rf_constant_max = x4->crf_max;
-            x264_encoder_reconfig(x4->enc, &x4->params);
-        }
+        switch (frame->pict_type) {
+        case AV_PICTURE_TYPE_I:
+            x4->pic.i_type = x4->forced_idr >= 0 ? X264_TYPE_IDR
+                                                 : X264_TYPE_KEYFRAME;
+            break;
+        case AV_PICTURE_TYPE_P:
+            x4->pic.i_type = X264_TYPE_P;
+            break;
+        case AV_PICTURE_TYPE_B:
+            x4->pic.i_type = X264_TYPE_B;
+            break;
+        default:
+            x4->pic.i_type = X264_TYPE_AUTO;
+            break;
         }
-
-        side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_STEREO3D);
-        if (side_data) {
-            AVStereo3D *stereo = (AVStereo3D *)side_data->data;
-            int fpa_type;
-
-            switch (stereo->type) {
-            case AV_STEREO3D_CHECKERBOARD:
-                fpa_type = 0;
-                break;
-            case AV_STEREO3D_COLUMNS:
-                fpa_type = 1;
-                break;
-            case AV_STEREO3D_LINES:
-                fpa_type = 2;
-                break;
-            case AV_STEREO3D_SIDEBYSIDE:
-                fpa_type = 3;
-                break;
-            case AV_STEREO3D_TOPBOTTOM:
-                fpa_type = 4;
-                break;
-            case AV_STEREO3D_FRAMESEQUENCE:
-                fpa_type = 5;
-                break;
-            default:
-                fpa_type = -1;
-                break;
-            }
-
-            if (fpa_type != x4->params.i_frame_packing) {
-                x4->params.i_frame_packing = fpa_type;
-                x264_encoder_reconfig(x4->enc, &x4->params);
+        reconfig_encoder(ctx, frame);
+
+        if (x4->a53_cc) {
+            side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC);
+            if (side_data) {
+                x4->pic.extra_sei.payloads = av_mallocz(sizeof(x4->pic.extra_sei.payloads[0]));
+                if (x4->pic.extra_sei.payloads == NULL) {
+                    av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
+                    goto skip_a53cc;
+                }
+                x4->pic.extra_sei.sei_free = av_free;
+
+                x4->pic.extra_sei.payloads[0].payload_size = side_data->size + 11;
+                x4->pic.extra_sei.payloads[0].payload = av_mallocz(x4->pic.extra_sei.payloads[0].payload_size);
+                if (x4->pic.extra_sei.payloads[0].payload == NULL) {
+                    av_log(ctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");
+                    av_freep(&x4->pic.extra_sei.payloads);
+                    goto skip_a53cc;
+                }
+                x4->pic.extra_sei.num_payloads = 1;
+                x4->pic.extra_sei.payloads[0].payload_type = 4;
+                memcpy(x4->pic.extra_sei.payloads[0].payload + 10, side_data->data, side_data->size);
+                x4->pic.extra_sei.payloads[0].payload[0] = 181;
+                x4->pic.extra_sei.payloads[0].payload[1] = 0;
+                x4->pic.extra_sei.payloads[0].payload[2] = 49;
+
+                /**
+                 * 'GA94' is standard in North America for ATSC, but hard coding
+                 * this style may not be the right thing to do -- other formats
+                 * do exist. This information is not available in the side_data
+                 * so we are going with this right now.
+                 */
+                AV_WL32(x4->pic.extra_sei.payloads[0].payload + 3,
+                    MKTAG('G', 'A', '9', '4'));
+                x4->pic.extra_sei.payloads[0].payload[7] = 3;
+                x4->pic.extra_sei.payloads[0].payload[8] =
+                    ((side_data->size/3) & 0x1f) | 0x40;
+                x4->pic.extra_sei.payloads[0].payload[9] = 0;
+                x4->pic.extra_sei.payloads[0].payload[side_data->size+10] = 255;
             }
         }
     }
+skip_a53cc:
     do {
         if (x264_encoder_encode(x4->enc, &nal, &nnal, frame? &x4->pic: NULL, &pic_out) < 0)
             return AVERROR_EXTERNAL;
@@ -278,23 +350,38 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
     pkt->pts = pic_out.i_pts;
     pkt->dts = pic_out.i_dts;
 
+
     switch (pic_out.i_type) {
     case X264_TYPE_IDR:
     case X264_TYPE_I:
-        ctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+        pict_type = AV_PICTURE_TYPE_I;
         break;
     case X264_TYPE_P:
-        ctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
+        pict_type = AV_PICTURE_TYPE_P;
         break;
     case X264_TYPE_B:
     case X264_TYPE_BREF:
-        ctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
+        pict_type = AV_PICTURE_TYPE_B;
         break;
+    default:
+        pict_type = AV_PICTURE_TYPE_NONE;
     }
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    ctx->coded_frame->pict_type = pict_type;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     pkt->flags |= AV_PKT_FLAG_KEY*pic_out.b_keyframe;
-    if (ret)
+    if (ret) {
+        ff_side_data_set_encoder_stats(pkt, (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA, NULL, 0, pict_type);
+
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
         ctx->coded_frame->quality = (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    }
 
     *got_packet = ret;
     return 0;
@@ -312,15 +399,13 @@ static av_cold int X264_close(AVCodecContext *avctx)
         x4->enc = NULL;
     }
 
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
 #define OPT_STR(opt, param)                                                   \
     do {                                                                      \
         int ret;                                                              \
-        if (param && (ret = x264_param_parse(&x4->params, opt, param)) < 0) { \
+        if ((ret = x264_param_parse(&x4->params, opt, param)) < 0) { \
             if(ret == X264_PARAM_BAD_NAME)                                    \
                 av_log(avctx, AV_LOG_ERROR,                                   \
                         "bad option '%s': '%s'\n", opt, param);               \
@@ -357,6 +442,9 @@ static int convert_pix_fmt(enum AVPixelFormat pix_fmt)
     case AV_PIX_FMT_NV12:      return X264_CSP_NV12;
     case AV_PIX_FMT_NV16:
     case AV_PIX_FMT_NV20:      return X264_CSP_NV16;
+#ifdef X264_CSP_NV21
+    case AV_PIX_FMT_NV21:      return X264_CSP_NV21;
+#endif
     };
     return 0;
 }
@@ -370,14 +458,21 @@ static int convert_pix_fmt(enum AVPixelFormat pix_fmt)
 static av_cold int X264_init(AVCodecContext *avctx)
 {
     X264Context *x4 = avctx->priv_data;
+    AVCPBProperties *cpb_props;
     int sw,sh;
 
     if (avctx->global_quality > 0)
         av_log(avctx, AV_LOG_WARNING, "-qscale is ignored, -crf is recommended.\n");
 
+#if CONFIG_LIBX262_ENCODER
+    if (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
+        x4->params.b_mpeg2 = 1;
+        x264_param_default_mpeg2(&x4->params);
+    } else
+#endif
     x264_param_default(&x4->params);
 
-    x4->params.b_deblocking_filter         = avctx->flags & CODEC_FLAG_LOOP_FILTER;
+    x4->params.b_deblocking_filter         = avctx->flags & AV_CODEC_FLAG_LOOP_FILTER;
 
     if (x4->preset || x4->tune)
         if (x264_param_default_preset(&x4->params, x4->preset, x4->tune) < 0) {
@@ -402,7 +497,7 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.i_log_level          = X264_LOG_DEBUG;
     x4->params.i_csp                = convert_pix_fmt(avctx->pix_fmt);
 
-    OPT_STR("weightp", x4->wpredp);
+    PARSE_X264_OPT("weightp", wpredp);
 
     if (avctx->bit_rate) {
         x4->params.rc.i_bitrate   = avctx->bit_rate / 1000;
@@ -410,8 +505,8 @@ static av_cold int X264_init(AVCodecContext *avctx)
     }
     x4->params.rc.i_vbv_buffer_size = avctx->rc_buffer_size / 1000;
     x4->params.rc.i_vbv_max_bitrate = avctx->rc_max_rate    / 1000;
-    x4->params.rc.b_stat_write      = avctx->flags & CODEC_FLAG_PASS1;
-    if (avctx->flags & CODEC_FLAG_PASS2) {
+    x4->params.rc.b_stat_write      = avctx->flags & AV_CODEC_FLAG_PASS1;
+    if (avctx->flags & AV_CODEC_FLAG_PASS2) {
         x4->params.rc.b_stat_read = 1;
     } else {
         if (x4->crf >= 0) {
@@ -432,32 +527,36 @@ static av_cold int X264_init(AVCodecContext *avctx)
             (float)avctx->rc_initial_buffer_occupancy / avctx->rc_buffer_size;
     }
 
-    OPT_STR("level", x4->level);
+    PARSE_X264_OPT("level", level);
 
     if (avctx->i_quant_factor > 0)
         x4->params.rc.f_ip_factor         = 1 / fabs(avctx->i_quant_factor);
     if (avctx->b_quant_factor > 0)
         x4->params.rc.f_pb_factor         = avctx->b_quant_factor;
-    if (avctx->chromaoffset)
-        x4->params.analyse.i_chroma_qp_offset = avctx->chromaoffset;
-
-    if (avctx->me_method == ME_EPZS)
-        x4->params.analyse.i_me_method = X264_ME_DIA;
-    else if (avctx->me_method == ME_HEX)
-        x4->params.analyse.i_me_method = X264_ME_HEX;
-    else if (avctx->me_method == ME_UMH)
-        x4->params.analyse.i_me_method = X264_ME_UMH;
-    else if (avctx->me_method == ME_FULL)
-        x4->params.analyse.i_me_method = X264_ME_ESA;
-    else if (avctx->me_method == ME_TESA)
-        x4->params.analyse.i_me_method = X264_ME_TESA;
+
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->chromaoffset >= 0)
+        x4->chroma_offset = avctx->chromaoffset;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    if (x4->chroma_offset >= 0)
+        x4->params.analyse.i_chroma_qp_offset = x4->chroma_offset;
 
     if (avctx->gop_size >= 0)
         x4->params.i_keyint_max         = avctx->gop_size;
     if (avctx->max_b_frames >= 0)
         x4->params.i_bframe             = avctx->max_b_frames;
+
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
     if (avctx->scenechange_threshold >= 0)
-        x4->params.i_scenecut_threshold = avctx->scenechange_threshold;
+        x4->scenechange_threshold = avctx->scenechange_threshold;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    if (x4->scenechange_threshold >= 0)
+        x4->params.i_scenecut_threshold = x4->scenechange_threshold;
+
     if (avctx->qmin >= 0)
         x4->params.rc.i_qp_min          = avctx->qmin;
     if (avctx->qmax >= 0)
@@ -472,7 +571,7 @@ static av_cold int X264_init(AVCodecContext *avctx)
         x4->params.i_frame_reference    = avctx->refs;
     else if (x4->level) {
         int i;
-        int mbn = FF_CEIL_RSHIFT(avctx->width, 4) * FF_CEIL_RSHIFT(avctx->height, 4);
+        int mbn = AV_CEIL_RSHIFT(avctx->width, 4) * AV_CEIL_RSHIFT(avctx->height, 4);
         int level_id = -1;
         char *tail;
         int scale = X264_BUILD < 129 ? 384 : 1;
@@ -496,16 +595,30 @@ static av_cold int X264_init(AVCodecContext *avctx)
         x4->params.analyse.i_trellis    = avctx->trellis;
     if (avctx->me_range >= 0)
         x4->params.analyse.i_me_range   = avctx->me_range;
+#if FF_API_PRIVATE_OPT
+    FF_DISABLE_DEPRECATION_WARNINGS
     if (avctx->noise_reduction >= 0)
-        x4->params.analyse.i_noise_reduction = avctx->noise_reduction;
+        x4->noise_reduction = avctx->noise_reduction;
+    FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    if (x4->noise_reduction >= 0)
+        x4->params.analyse.i_noise_reduction = x4->noise_reduction;
     if (avctx->me_subpel_quality >= 0)
         x4->params.analyse.i_subpel_refine   = avctx->me_subpel_quality;
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
     if (avctx->b_frame_strategy >= 0)
-        x4->params.i_bframe_adaptive = avctx->b_frame_strategy;
+        x4->b_frame_strategy = avctx->b_frame_strategy;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     if (avctx->keyint_min >= 0)
         x4->params.i_keyint_min = avctx->keyint_min;
+#if FF_API_CODER_TYPE
+FF_DISABLE_DEPRECATION_WARNINGS
     if (avctx->coder_type >= 0)
-        x4->params.b_cabac = avctx->coder_type == FF_CODER_TYPE_AC;
+        x4->coder = avctx->coder_type == FF_CODER_TYPE_AC;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     if (avctx->me_cmp >= 0)
         x4->params.analyse.b_chroma_me = avctx->me_cmp & FF_CMP_CHROMA;
 
@@ -562,16 +675,6 @@ static av_cold int X264_init(AVCodecContext *avctx)
 
     if (x4->slice_max_size >= 0)
         x4->params.i_slice_max_size =  x4->slice_max_size;
-    else {
-        /*
-         * Allow x264 to be instructed through AVCodecContext about the maximum
-         * size of the RTP payload. For example, this enables the production of
-         * payload suitable for the H.264 RTP packetization-mode 0 i.e. single
-         * NAL unit per RTP packet.
-         */
-        if (avctx->rtp_payload_size)
-            x4->params.i_slice_max_size = avctx->rtp_payload_size;
-    }
 
     if (x4->fastfirstpass)
         x264_param_apply_fastfirstpass(&x4->params);
@@ -604,6 +707,31 @@ static av_cold int X264_init(AVCodecContext *avctx)
     if (x4->nal_hrd >= 0)
         x4->params.i_nal_hrd = x4->nal_hrd;
 
+    if (x4->motion_est >= 0) {
+        x4->params.analyse.i_me_method = x4->motion_est;
+#if FF_API_MOTION_EST
+FF_DISABLE_DEPRECATION_WARNINGS
+    } else {
+        if (avctx->me_method == ME_EPZS)
+            x4->params.analyse.i_me_method = X264_ME_DIA;
+        else if (avctx->me_method == ME_HEX)
+            x4->params.analyse.i_me_method = X264_ME_HEX;
+        else if (avctx->me_method == ME_UMH)
+            x4->params.analyse.i_me_method = X264_ME_UMH;
+        else if (avctx->me_method == ME_FULL)
+            x4->params.analyse.i_me_method = X264_ME_ESA;
+        else if (avctx->me_method == ME_TESA)
+            x4->params.analyse.i_me_method = X264_ME_TESA;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    }
+
+    if (x4->coder >= 0)
+        x4->params.b_cabac = x4->coder;
+
+    if (x4->b_frame_strategy >= 0)
+        x4->params.i_bframe_adaptive = x4->b_frame_strategy;
+
     if (x4->profile)
         if (x264_param_apply_profile(&x4->params, x4->profile) < 0) {
             int i;
@@ -625,15 +753,15 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.i_fps_num = avctx->time_base.den;
     x4->params.i_fps_den = avctx->time_base.num * avctx->ticks_per_frame;
 
-    x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR;
+    x4->params.analyse.b_psnr = avctx->flags & AV_CODEC_FLAG_PSNR;
 
     x4->params.i_threads      = avctx->thread_count;
     if (avctx->thread_type)
         x4->params.b_sliced_threads = avctx->thread_type == FF_THREAD_SLICE;
 
-    x4->params.b_interlaced   = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
+    x4->params.b_interlaced   = avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT;
 
-    x4->params.b_open_gop     = !(avctx->flags & CODEC_FLAG_CLOSED_GOP);
+    x4->params.b_open_gop     = !(avctx->flags & AV_CODEC_FLAG_CLOSED_GOP);
 
     x4->params.i_slice_count  = avctx->slices;
 
@@ -649,7 +777,7 @@ static av_cold int X264_init(AVCodecContext *avctx)
     if (avctx->color_trc != AVCOL_TRC_UNSPECIFIED)
         x4->params.vui.i_transfer  = avctx->color_trc;
 
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER)
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)
         x4->params.b_repeat_headers = 0;
 
     if(x4->x264opts){
@@ -693,17 +821,13 @@ static av_cold int X264_init(AVCodecContext *avctx)
     if (!x4->enc)
         return AVERROR_EXTERNAL;
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
         x264_nal_t *nal;
         uint8_t *p;
         int nnal, s, i;
 
         s = x264_encoder_headers(x4->enc, &nal, &nnal);
-        avctx->extradata = p = av_malloc(s);
+        avctx->extradata = p = av_mallocz(s + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!p)
             return AVERROR(ENOMEM);
 
@@ -724,6 +848,13 @@ static av_cold int X264_init(AVCodecContext *avctx)
         avctx->extradata_size = p - avctx->extradata;
     }
 
+    cpb_props = ff_add_cpb_side_data(avctx);
+    if (!cpb_props)
+        return AVERROR(ENOMEM);
+    cpb_props->buffer_size = x4->params.rc.i_vbv_buffer_size * 1000;
+    cpb_props->max_bitrate = x4->params.rc.i_vbv_max_bitrate * 1000;
+    cpb_props->avg_bitrate = x4->params.rc.i_bitrate         * 1000;
+
     return 0;
 }
 
@@ -736,6 +867,9 @@ static const enum AVPixelFormat pix_fmts_8bit[] = {
     AV_PIX_FMT_YUVJ444P,
     AV_PIX_FMT_NV12,
     AV_PIX_FMT_NV16,
+#ifdef X264_CSP_NV21
+    AV_PIX_FMT_NV21,
+#endif
     AV_PIX_FMT_NONE
 };
 static const enum AVPixelFormat pix_fmts_9bit[] = {
@@ -775,10 +909,11 @@ static const AVOption options[] = {
     { "preset",        "Set the encoding preset (cf. x264 --fullhelp)",   OFFSET(preset),        AV_OPT_TYPE_STRING, { .str = "medium" }, 0, 0, VE},
     { "tune",          "Tune the encoding params (cf. x264 --fullhelp)",  OFFSET(tune),          AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE},
     { "profile",       "Set profile restrictions (cf. x264 --fullhelp) ", OFFSET(profile),       AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE},
-    { "fastfirstpass", "Use fast settings when encoding first pass",      OFFSET(fastfirstpass), AV_OPT_TYPE_INT,    { .i64 = 1 }, 0, 1, VE},
+    { "fastfirstpass", "Use fast settings when encoding first pass",      OFFSET(fastfirstpass), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE},
     {"level", "Specify level (as defined by Annex A)", OFFSET(level), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
     {"passlogfile", "Filename for 2 pass stats", OFFSET(stats), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
     {"wpredp", "Weighted prediction for P-frames", OFFSET(wpredp), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
+    {"a53cc",          "Use A53 Closed Captions (if available)",          OFFSET(a53_cc),        AV_OPT_TYPE_BOOL,   {.i64 = 0}, 0, 1, VE},
     {"x264opts", "x264 options", OFFSET(x264opts), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
     { "crf",           "Select the quality for constant quality mode",    OFFSET(crf),           AV_OPT_TYPE_FLOAT,  {.dbl = -1 }, -1, FLT_MAX, VE },
     { "crf_max",       "In CRF mode, prevents VBV from lowering quality beyond this point.",OFFSET(crf_max), AV_OPT_TYPE_FLOAT, {.dbl = -1 }, -1, FLT_MAX, VE },
@@ -791,27 +926,27 @@ static const AVOption options[] = {
     { "autovariance-biased", "Auto-variance AQ with bias to dark scenes", 0, AV_OPT_TYPE_CONST, {.i64 = X264_AQ_AUTOVARIANCE_BIASED}, INT_MIN, INT_MAX, VE, "aq_mode" },
 #endif
     { "aq-strength",   "AQ strength. Reduces blocking and blurring in flat and textured areas.", OFFSET(aq_strength), AV_OPT_TYPE_FLOAT, {.dbl = -1}, -1, FLT_MAX, VE},
-    { "psy",           "Use psychovisual optimizations.",                 OFFSET(psy),           AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, 1, VE },
+    { "psy",           "Use psychovisual optimizations.",                 OFFSET(psy),           AV_OPT_TYPE_BOOL,   { .i64 = -1 }, -1, 1, VE },
     { "psy-rd",        "Strength of psychovisual optimization, in <psy-rd>:<psy-trellis> format.", OFFSET(psy_rd), AV_OPT_TYPE_STRING,  {0 }, 0, 0, VE},
     { "rc-lookahead",  "Number of frames to look ahead for frametype and ratecontrol", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
-    { "weightb",       "Weighted prediction for B-frames.",               OFFSET(weightb),       AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, 1, VE },
+    { "weightb",       "Weighted prediction for B-frames.",               OFFSET(weightb),       AV_OPT_TYPE_BOOL,   { .i64 = -1 }, -1, 1, VE },
     { "weightp",       "Weighted prediction analysis method.",            OFFSET(weightp),       AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, INT_MAX, VE, "weightp" },
     { "none",          NULL, 0, AV_OPT_TYPE_CONST, {.i64 = X264_WEIGHTP_NONE},   INT_MIN, INT_MAX, VE, "weightp" },
     { "simple",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = X264_WEIGHTP_SIMPLE}, INT_MIN, INT_MAX, VE, "weightp" },
     { "smart",         NULL, 0, AV_OPT_TYPE_CONST, {.i64 = X264_WEIGHTP_SMART},  INT_MIN, INT_MAX, VE, "weightp" },
-    { "ssim",          "Calculate and print SSIM stats.",                 OFFSET(ssim),          AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, 1, VE },
-    { "intra-refresh", "Use Periodic Intra Refresh instead of IDR frames.",OFFSET(intra_refresh),AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, 1, VE },
-    { "bluray-compat", "Bluray compatibility workarounds.",               OFFSET(bluray_compat) ,AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, 1, VE },
+    { "ssim",          "Calculate and print SSIM stats.",                 OFFSET(ssim),          AV_OPT_TYPE_BOOL,   { .i64 = -1 }, -1, 1, VE },
+    { "intra-refresh", "Use Periodic Intra Refresh instead of IDR frames.",OFFSET(intra_refresh),AV_OPT_TYPE_BOOL,   { .i64 = -1 }, -1, 1, VE },
+    { "bluray-compat", "Bluray compatibility workarounds.",               OFFSET(bluray_compat) ,AV_OPT_TYPE_BOOL,   { .i64 = -1 }, -1, 1, VE },
     { "b-bias",        "Influences how often B-frames are used",          OFFSET(b_bias),        AV_OPT_TYPE_INT,    { .i64 = INT_MIN}, INT_MIN, INT_MAX, VE },
     { "b-pyramid",     "Keep some B-frames as references.",               OFFSET(b_pyramid),     AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, INT_MAX, VE, "b_pyramid" },
     { "none",          NULL,                                  0, AV_OPT_TYPE_CONST, {.i64 = X264_B_PYRAMID_NONE},   INT_MIN, INT_MAX, VE, "b_pyramid" },
     { "strict",        "Strictly hierarchical pyramid",       0, AV_OPT_TYPE_CONST, {.i64 = X264_B_PYRAMID_STRICT}, INT_MIN, INT_MAX, VE, "b_pyramid" },
     { "normal",        "Non-strict (not Blu-ray compatible)", 0, AV_OPT_TYPE_CONST, {.i64 = X264_B_PYRAMID_NORMAL}, INT_MIN, INT_MAX, VE, "b_pyramid" },
-    { "mixed-refs",    "One reference per partition, as opposed to one reference per macroblock", OFFSET(mixed_refs), AV_OPT_TYPE_INT, { .i64 = -1}, -1, 1, VE },
-    { "8x8dct",        "High profile 8x8 transform.",                     OFFSET(dct8x8),        AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, 1, VE},
-    { "fast-pskip",    NULL,                                              OFFSET(fast_pskip),    AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, 1, VE},
-    { "aud",           "Use access unit delimiters.",                     OFFSET(aud),           AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, 1, VE},
-    { "mbtree",        "Use macroblock tree ratecontrol.",                OFFSET(mbtree),        AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, 1, VE},
+    { "mixed-refs",    "One reference per partition, as opposed to one reference per macroblock", OFFSET(mixed_refs), AV_OPT_TYPE_BOOL, { .i64 = -1}, -1, 1, VE },
+    { "8x8dct",        "High profile 8x8 transform.",                     OFFSET(dct8x8),        AV_OPT_TYPE_BOOL,   { .i64 = -1 }, -1, 1, VE},
+    { "fast-pskip",    NULL,                                              OFFSET(fast_pskip),    AV_OPT_TYPE_BOOL,   { .i64 = -1 }, -1, 1, VE},
+    { "aud",           "Use access unit delimiters.",                     OFFSET(aud),           AV_OPT_TYPE_BOOL,   { .i64 = -1 }, -1, 1, VE},
+    { "mbtree",        "Use macroblock tree ratecontrol.",                OFFSET(mbtree),        AV_OPT_TYPE_BOOL,   { .i64 = -1 }, -1, 1, VE},
     { "deblock",       "Loop filter parameters, in <alpha:beta> form.",   OFFSET(deblock),       AV_OPT_TYPE_STRING, { 0 },  0, 0, VE},
     { "cplxblur",      "Reduce fluctuations in QP (before curve compression)", OFFSET(cplxblur), AV_OPT_TYPE_FLOAT,  {.dbl = -1 }, -1, FLT_MAX, VE},
     { "partitions",    "A comma-separated list of partitions to consider. "
@@ -829,24 +964,28 @@ static const AVOption options[] = {
     { "vbr",           NULL, 0, AV_OPT_TYPE_CONST, {.i64 = X264_NAL_HRD_VBR},  INT_MIN, INT_MAX, VE, "nal-hrd" },
     { "cbr",           NULL, 0, AV_OPT_TYPE_CONST, {.i64 = X264_NAL_HRD_CBR},  INT_MIN, INT_MAX, VE, "nal-hrd" },
     { "avcintra-class","AVC-Intra class 50/100/200",                      OFFSET(avcintra_class),AV_OPT_TYPE_INT,     { .i64 = -1 }, -1, 200   , VE},
+    { "motion-est",   "Set motion estimation method",                     OFFSET(motion_est),    AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, X264_ME_TESA, VE, "motion-est"},
+    { "dia",           NULL, 0, AV_OPT_TYPE_CONST, { .i64 = X264_ME_DIA },  INT_MIN, INT_MAX, VE, "motion-est" },
+    { "hex",           NULL, 0, AV_OPT_TYPE_CONST, { .i64 = X264_ME_HEX },  INT_MIN, INT_MAX, VE, "motion-est" },
+    { "umh",           NULL, 0, AV_OPT_TYPE_CONST, { .i64 = X264_ME_UMH },  INT_MIN, INT_MAX, VE, "motion-est" },
+    { "esa",           NULL, 0, AV_OPT_TYPE_CONST, { .i64 = X264_ME_ESA },  INT_MIN, INT_MAX, VE, "motion-est" },
+    { "tesa",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = X264_ME_TESA }, INT_MIN, INT_MAX, VE, "motion-est" },
+    { "forced-idr",   "If forcing keyframes, force them as IDR frames.",                                  OFFSET(forced_idr),  AV_OPT_TYPE_BOOL,   { .i64 = -1 }, -1, 1, VE },
+    { "coder",    "Coder type",                                           OFFSET(coder), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE, "coder" },
+    { "default",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 }, INT_MIN, INT_MAX, VE, "coder" },
+    { "cavlc",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 },  INT_MIN, INT_MAX, VE, "coder" },
+    { "cabac",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "coder" },
+    { "vlc",              NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 },  INT_MIN, INT_MAX, VE, "coder" },
+    { "ac",               NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "coder" },
+    { "b_strategy",   "Strategy to choose between I/P/B-frames",          OFFSET(b_frame_strategy), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 2, VE },
+    { "chromaoffset", "QP difference between chroma and luma",            OFFSET(chroma_offset), AV_OPT_TYPE_INT, { .i64 = -1 }, INT_MIN, INT_MAX, VE },
+    { "sc_threshold", "Scene change threshold",                           OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, { .i64 = -1 }, INT_MIN, INT_MAX, VE },
+    { "noise_reduction", "Noise reduction",                               OFFSET(noise_reduction), AV_OPT_TYPE_INT, { .i64 = -1 }, INT_MIN, INT_MAX, VE },
+
     { "x264-params",  "Override the x264 configuration using a :-separated list of key=value parameters", OFFSET(x264_params), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
     { NULL },
 };
 
-static const AVClass x264_class = {
-    .class_name = "libx264",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
-static const AVClass rgbclass = {
-    .class_name = "libx264rgb",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
 static const AVCodecDefault x264_defaults[] = {
     { "b",                "0" },
     { "bf",               "-1" },
@@ -861,15 +1000,25 @@ static const AVCodecDefault x264_defaults[] = {
     { "qcomp",            "-1" },
 //     { "rc_lookahead",     "-1" },
     { "refs",             "-1" },
+#if FF_API_PRIVATE_OPT
     { "sc_threshold",     "-1" },
+#endif
     { "trellis",          "-1" },
+#if FF_API_PRIVATE_OPT
     { "nr",               "-1" },
+#endif
     { "me_range",         "-1" },
+#if FF_API_MOTION_EST
     { "me_method",        "-1" },
+#endif
     { "subq",             "-1" },
+#if FF_API_PRIVATE_OPT
     { "b_strategy",       "-1" },
+#endif
     { "keyint_min",       "-1" },
+#if FF_API_CODER_TYPE
     { "coder",            "-1" },
+#endif
     { "cmp",              "-1" },
     { "threads",          AV_STRINGIFY(X264_THREADS_AUTO) },
     { "thread_type",      "0" },
@@ -878,6 +1027,21 @@ static const AVCodecDefault x264_defaults[] = {
     { NULL },
 };
 
+#if CONFIG_LIBX264_ENCODER
+static const AVClass x264_class = {
+    .class_name = "libx264",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVClass rgbclass = {
+    .class_name = "libx264rgb",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_libx264_encoder = {
     .name             = "libx264",
     .long_name        = NULL_IF_CONFIG_SMALL("libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
@@ -887,7 +1051,7 @@ AVCodec ff_libx264_encoder = {
     .init             = X264_init,
     .encode2          = X264_frame,
     .close            = X264_close,
-    .capabilities     = CODEC_CAP_DELAY | CODEC_CAP_AUTO_THREADS,
+    .capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
     .priv_class       = &x264_class,
     .defaults         = x264_defaults,
     .init_static_data = X264_init_static,
@@ -904,8 +1068,35 @@ AVCodec ff_libx264rgb_encoder = {
     .init           = X264_init,
     .encode2        = X264_frame,
     .close          = X264_close,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_AUTO_THREADS,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
     .priv_class     = &rgbclass,
     .defaults       = x264_defaults,
     .pix_fmts       = pix_fmts_8bit_rgb,
 };
+#endif
+
+#if CONFIG_LIBX262_ENCODER
+static const AVClass X262_class = {
+    .class_name = "libx262",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_libx262_encoder = {
+    .name             = "libx262",
+    .long_name        = NULL_IF_CONFIG_SMALL("libx262 MPEG2VIDEO"),
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_MPEG2VIDEO,
+    .priv_data_size   = sizeof(X264Context),
+    .init             = X264_init,
+    .encode2          = X264_frame,
+    .close            = X264_close,
+    .capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
+    .priv_class       = &X262_class,
+    .defaults         = x264_defaults,
+    .pix_fmts         = pix_fmts_8bit,
+    .caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |
+                        FF_CODEC_CAP_INIT_CLEANUP,
+};
+#endif
diff --git a/libavcodec/libx265.c b/libavcodec/libx265.c
index e9240f94..68c7fba3 100644
--- a/libavcodec/libx265.c
+++ b/libavcodec/libx265.c
@@ -66,8 +66,6 @@ static av_cold int libx265_encode_close(AVCodecContext *avctx)
 {
     libx265Context *ctx = avctx->priv_data;
 
-    av_frame_free(&avctx->coded_frame);
-
     ctx->api->param_free(ctx->params);
 
     if (ctx->encoder)
@@ -80,24 +78,10 @@ static av_cold int libx265_encode_init(AVCodecContext *avctx)
 {
     libx265Context *ctx = avctx->priv_data;
 
-    ctx->api = x265_api_get(av_pix_fmt_desc_get(avctx->pix_fmt)->comp[0].depth_minus1 + 1);
+    ctx->api = x265_api_get(av_pix_fmt_desc_get(avctx->pix_fmt)->comp[0].depth);
     if (!ctx->api)
         ctx->api = x265_api_get(0);
 
-    if (avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL &&
-        !av_pix_fmt_desc_get(avctx->pix_fmt)->log2_chroma_w) {
-        av_log(avctx, AV_LOG_ERROR,
-               "4:2:2 and 4:4:4 support is not fully defined for HEVC yet. "
-               "Set -strict experimental to encode anyway.\n");
-        return AVERROR(ENOSYS);
-    }
-
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
-        return AVERROR(ENOMEM);
-    }
-
     ctx->params = ctx->api->param_alloc();
     if (!ctx->params) {
         av_log(avctx, AV_LOG_ERROR, "Could not allocate x265 param structure.\n");
@@ -127,7 +111,7 @@ static av_cold int libx265_encode_init(AVCodecContext *avctx)
     ctx->params->fpsDenom        = avctx->time_base.num * avctx->ticks_per_frame;
     ctx->params->sourceWidth     = avctx->width;
     ctx->params->sourceHeight    = avctx->height;
-    ctx->params->bEnablePsnr     = !!(avctx->flags & CODEC_FLAG_PSNR);
+    ctx->params->bEnablePsnr     = !!(avctx->flags & AV_CODEC_FLAG_PSNR);
 
     if ((avctx->color_primaries <= AVCOL_PRI_BT2020 &&
          avctx->color_primaries != AVCOL_PRI_UNSPECIFIED) ||
@@ -162,14 +146,17 @@ static av_cold int libx265_encode_init(AVCodecContext *avctx)
     switch (avctx->pix_fmt) {
     case AV_PIX_FMT_YUV420P:
     case AV_PIX_FMT_YUV420P10:
+    case AV_PIX_FMT_YUV420P12:
         ctx->params->internalCsp = X265_CSP_I420;
         break;
     case AV_PIX_FMT_YUV422P:
     case AV_PIX_FMT_YUV422P10:
+    case AV_PIX_FMT_YUV422P12:
         ctx->params->internalCsp = X265_CSP_I422;
         break;
     case AV_PIX_FMT_YUV444P:
     case AV_PIX_FMT_YUV444P10:
+    case AV_PIX_FMT_YUV444P12:
         ctx->params->internalCsp = X265_CSP_I444;
         break;
     }
@@ -187,7 +174,7 @@ static av_cold int libx265_encode_init(AVCodecContext *avctx)
         ctx->params->rc.rateControlMode = X265_RC_ABR;
     }
 
-    if (!(avctx->flags & CODEC_FLAG_GLOBAL_HEADER))
+    if (!(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER))
         ctx->params->bRepeatHeaders = 1;
 
     if (ctx->x265_opts) {
@@ -222,7 +209,7 @@ static av_cold int libx265_encode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
         x265_nal *nal;
         int nnal;
 
@@ -233,7 +220,7 @@ static av_cold int libx265_encode_init(AVCodecContext *avctx)
             return AVERROR_INVALIDDATA;
         }
 
-        avctx->extradata = av_malloc(avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+        avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!avctx->extradata) {
             av_log(avctx, AV_LOG_ERROR,
                    "Cannot allocate HEVC header of size %d.\n", avctx->extradata_size);
@@ -269,7 +256,7 @@ static int libx265_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         }
 
         x265pic.pts      = pic->pts;
-        x265pic.bitDepth = av_pix_fmt_desc_get(avctx->pix_fmt)->comp[0].depth_minus1 + 1;
+        x265pic.bitDepth = av_pix_fmt_desc_get(avctx->pix_fmt)->comp[0].depth;
 
         x265pic.sliceType = pic->pict_type == AV_PICTURE_TYPE_I ? X265_TYPE_I :
                             pic->pict_type == AV_PICTURE_TYPE_P ? X265_TYPE_P :
@@ -306,6 +293,8 @@ static int libx265_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     pkt->pts = x265pic_out.pts;
     pkt->dts = x265pic_out.dts;
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     switch (x265pic_out.sliceType) {
     case X265_TYPE_IDR:
     case X265_TYPE_I:
@@ -318,6 +307,8 @@ static int libx265_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
         break;
     }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     *got_packet = 1;
     return 0;
@@ -330,6 +321,16 @@ static const enum AVPixelFormat x265_csp_eight[] = {
     AV_PIX_FMT_NONE
 };
 
+static const enum AVPixelFormat x265_csp_ten[] = {
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUV444P,
+    AV_PIX_FMT_YUV420P10,
+    AV_PIX_FMT_YUV422P10,
+    AV_PIX_FMT_YUV444P10,
+    AV_PIX_FMT_NONE
+};
+
 static const enum AVPixelFormat x265_csp_twelve[] = {
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_YUV422P,
@@ -337,13 +338,18 @@ static const enum AVPixelFormat x265_csp_twelve[] = {
     AV_PIX_FMT_YUV420P10,
     AV_PIX_FMT_YUV422P10,
     AV_PIX_FMT_YUV444P10,
+    AV_PIX_FMT_YUV420P12,
+    AV_PIX_FMT_YUV422P12,
+    AV_PIX_FMT_YUV444P12,
     AV_PIX_FMT_NONE
 };
 
 static av_cold void libx265_encode_init_csp(AVCodec *codec)
 {
-    if (x265_api_get(10))
+    if (x265_api_get(12))
         codec->pix_fmts = x265_csp_twelve;
+    else if (x265_api_get(10))
+        codec->pix_fmts = x265_csp_ten;
     else if (x265_api_get(8))
         codec->pix_fmts = x265_csp_eight;
 }
@@ -382,5 +388,5 @@ AVCodec ff_libx265_encoder = {
     .priv_data_size   = sizeof(libx265Context),
     .priv_class       = &class,
     .defaults         = x265_defaults,
-    .capabilities     = CODEC_CAP_DELAY | CODEC_CAP_AUTO_THREADS,
+    .capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
 };
diff --git a/libavcodec/libxavs.c b/libavcodec/libxavs.c
index 30d14798..f257e558 100644
--- a/libavcodec/libxavs.c
+++ b/libavcodec/libxavs.c
@@ -53,8 +53,13 @@ typedef struct XavsContext {
     int direct_pred;
     int aud;
     int fast_pskip;
+    int motion_est;
     int mbtree;
     int mixed_refs;
+    int b_frame_strategy;
+    int chroma_offset;
+    int scenechange_threshold;
+    int noise_reduction;
 
     int64_t *pts_buffer;
     int out_frame_count;
@@ -80,7 +85,7 @@ static int encode_nals(AVCodecContext *ctx, AVPacket *pkt,
 {
     XavsContext *x4 = ctx->priv_data;
     uint8_t *p;
-    int i, s, ret, size = x4->sei_size + FF_MIN_BUFFER_SIZE;
+    int i, s, ret, size = x4->sei_size + AV_INPUT_BUFFER_MIN_SIZE;
 
     if (!nnal)
         return 0;
@@ -88,7 +93,7 @@ static int encode_nals(AVCodecContext *ctx, AVPacket *pkt,
     for (i = 0; i < nnal; i++)
         size += nals[i].i_payload;
 
-    if ((ret = ff_alloc_packet2(ctx, pkt, size)) < 0)
+    if ((ret = ff_alloc_packet2(ctx, pkt, size, 0)) < 0)
         return ret;
     p = pkt->data;
 
@@ -117,6 +122,7 @@ static int XAVS_frame(AVCodecContext *avctx, AVPacket *pkt,
     xavs_nal_t *nal;
     int nnal, i, ret;
     xavs_picture_t pic_out;
+    int pict_type;
 
     x4->pic.img.i_csp   = XAVS_CSP_I420;
     x4->pic.img.i_plane = 3;
@@ -143,7 +149,7 @@ static int XAVS_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     if (!ret) {
         if (!frame && !(x4->end_of_stream)) {
-            if ((ret = ff_alloc_packet2(avctx, pkt, 4)) < 0)
+            if ((ret = ff_alloc_packet2(avctx, pkt, 4, 0)) < 0)
                 return ret;
 
             pkt->data[0] = 0x0;
@@ -158,7 +164,11 @@ static int XAVS_frame(AVCodecContext *avctx, AVPacket *pkt,
         return 0;
     }
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pts = pic_out.i_pts;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     pkt->pts = pic_out.i_pts;
     if (avctx->has_b_frames) {
         if (!x4->out_frame_count)
@@ -171,25 +181,42 @@ static int XAVS_frame(AVCodecContext *avctx, AVPacket *pkt,
     switch (pic_out.i_type) {
     case XAVS_TYPE_IDR:
     case XAVS_TYPE_I:
-        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+        pict_type = AV_PICTURE_TYPE_I;
         break;
     case XAVS_TYPE_P:
-        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
+        pict_type = AV_PICTURE_TYPE_P;
         break;
     case XAVS_TYPE_B:
     case XAVS_TYPE_BREF:
-        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
+        pict_type = AV_PICTURE_TYPE_B;
         break;
+    default:
+        pict_type = AV_PICTURE_TYPE_NONE;
     }
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    avctx->coded_frame->pict_type = pict_type;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     /* There is no IDR frame in AVS JiZhun */
     /* Sequence header is used as a flag */
     if (pic_out.i_type == XAVS_TYPE_I) {
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
         avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
         pkt->flags |= AV_PKT_FLAG_KEY;
     }
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->quality = (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    ff_side_data_set_encoder_stats(pkt, (pic_out.i_qpplus1 - 1) * FF_QP2LAMBDA, NULL, 0, pict_type);
 
     x4->out_frame_count++;
     *got_packet = ret;
@@ -207,8 +234,6 @@ static av_cold int XAVS_close(AVCodecContext *avctx)
     if (x4->enc)
         xavs_encoder_close(x4->enc);
 
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
@@ -228,8 +253,8 @@ static av_cold int XAVS_init(AVCodecContext *avctx)
     }
     x4->params.rc.i_vbv_buffer_size = avctx->rc_buffer_size / 1000;
     x4->params.rc.i_vbv_max_bitrate = avctx->rc_max_rate    / 1000;
-    x4->params.rc.b_stat_write      = avctx->flags & CODEC_FLAG_PASS1;
-    if (avctx->flags & CODEC_FLAG_PASS2) {
+    x4->params.rc.b_stat_write      = avctx->flags & AV_CODEC_FLAG_PASS1;
+    if (avctx->flags & AV_CODEC_FLAG_PASS2) {
         x4->params.rc.b_stat_read = 1;
     } else {
         if (x4->crf >= 0) {
@@ -249,6 +274,8 @@ static av_cold int XAVS_init(AVCodecContext *avctx)
         x4->params.analyse.i_direct_mv_pred   = x4->direct_pred;
     if (x4->fast_pskip >= 0)
         x4->params.analyse.b_fast_pskip       = x4->fast_pskip;
+    if (x4->motion_est >= 0)
+        x4->params.analyse.i_me_method        = x4->motion_est;
     if (x4->mixed_refs >= 0)
         x4->params.analyse.b_mixed_references = x4->mixed_refs;
     if (x4->b_bias != INT_MIN)
@@ -256,11 +283,44 @@ static av_cold int XAVS_init(AVCodecContext *avctx)
     if (x4->cplxblur >= 0)
         x4->params.rc.f_complexity_blur = x4->cplxblur;
 
+#if FF_API_MOTION_EST
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (x4->motion_est < 0) {
+        switch (avctx->me_method) {
+        case  ME_EPZS:
+            x4->params.analyse.i_me_method = XAVS_ME_DIA;
+            break;
+        case  ME_HEX:
+            x4->params.analyse.i_me_method = XAVS_ME_HEX;
+            break;
+        case  ME_UMH:
+            x4->params.analyse.i_me_method = XAVS_ME_UMH;
+            break;
+        case  ME_FULL:
+            x4->params.analyse.i_me_method = XAVS_ME_ESA;
+            break;
+        case  ME_TESA:
+            x4->params.analyse.i_me_method = XAVS_ME_TESA;
+            break;
+        default:
+            x4->params.analyse.i_me_method = XAVS_ME_HEX;
+        }
+    }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     x4->params.i_bframe          = avctx->max_b_frames;
     /* cabac is not included in AVS JiZhun Profile */
     x4->params.b_cabac           = 0;
 
-    x4->params.i_bframe_adaptive = avctx->b_frame_strategy;
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->b_frame_strategy)
+        x4->b_frame_strategy = avctx->b_frame_strategy;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    x4->params.i_bframe_adaptive = x4->b_frame_strategy;
 
     avctx->has_b_frames          = !!avctx->max_b_frames;
 
@@ -270,9 +330,16 @@ static av_cold int XAVS_init(AVCodecContext *avctx)
     if (x4->params.i_keyint_min > x4->params.i_keyint_max)
         x4->params.i_keyint_min = x4->params.i_keyint_max;
 
-    x4->params.i_scenecut_threshold        = avctx->scenechange_threshold;
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->scenechange_threshold)
+        x4->scenechange_threshold = avctx->scenechange_threshold;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
-   // x4->params.b_deblocking_filter       = avctx->flags & CODEC_FLAG_LOOP_FILTER;
+    x4->params.i_scenecut_threshold = x4->scenechange_threshold;
+
+   // x4->params.b_deblocking_filter       = avctx->flags & AV_CODEC_FLAG_LOOP_FILTER;
 
     x4->params.rc.i_qp_min                 = avctx->qmin;
     x4->params.rc.i_qp_max                 = avctx->qmax;
@@ -292,42 +359,30 @@ static av_cold int XAVS_init(AVCodecContext *avctx)
     x4->params.i_fps_den            = avctx->time_base.num;
     x4->params.analyse.inter        = XAVS_ANALYSE_I8x8 |XAVS_ANALYSE_PSUB16x16| XAVS_ANALYSE_BSUB16x16;
 
-    switch (avctx->me_method) {
-         case  ME_EPZS:
-               x4->params.analyse.i_me_method = XAVS_ME_DIA;
-               break;
-         case  ME_HEX:
-               x4->params.analyse.i_me_method = XAVS_ME_HEX;
-               break;
-         case  ME_UMH:
-               x4->params.analyse.i_me_method = XAVS_ME_UMH;
-               break;
-         case  ME_FULL:
-               x4->params.analyse.i_me_method = XAVS_ME_ESA;
-               break;
-         case  ME_TESA:
-               x4->params.analyse.i_me_method = XAVS_ME_TESA;
-               break;
-         default:
-               x4->params.analyse.i_me_method = XAVS_ME_HEX;
-    }
-
     x4->params.analyse.i_me_range = avctx->me_range;
     x4->params.analyse.i_subpel_refine    = avctx->me_subpel_quality;
 
     x4->params.analyse.b_chroma_me        = avctx->me_cmp & FF_CMP_CHROMA;
     /* AVS P2 only enables 8x8 transform */
-    x4->params.analyse.b_transform_8x8    = 1; //avctx->flags2 & CODEC_FLAG2_8X8DCT;
+    x4->params.analyse.b_transform_8x8    = 1; //avctx->flags2 & AV_CODEC_FLAG2_8X8DCT;
 
     x4->params.analyse.i_trellis          = avctx->trellis;
-    x4->params.analyse.i_noise_reduction  = avctx->noise_reduction;
+
+#if FF_API_PRIVATE_OPT
+    FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->noise_reduction >= 0)
+        x4->noise_reduction = avctx->noise_reduction;
+    FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    x4->params.analyse.i_noise_reduction  = x4->noise_reduction;
 
     if (avctx->level > 0)
         x4->params.i_level_idc = avctx->level;
 
     if (avctx->bit_rate > 0)
         x4->params.rc.f_rate_tolerance =
-            (float)avctx->bit_rate_tolerance/avctx->bit_rate;
+            (float)avctx->bit_rate_tolerance / avctx->bit_rate;
 
     if ((avctx->rc_buffer_size) &&
         (avctx->rc_initial_buffer_occupancy <= avctx->rc_buffer_size)) {
@@ -340,14 +395,22 @@ static av_cold int XAVS_init(AVCodecContext *avctx)
     /* what is the RC method we are now using? Default NO */
     x4->params.rc.f_ip_factor             = 1 / fabs(avctx->i_quant_factor);
     x4->params.rc.f_pb_factor             = avctx->b_quant_factor;
-    x4->params.analyse.i_chroma_qp_offset = avctx->chromaoffset;
 
-    x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR;
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->chromaoffset)
+        x4->chroma_offset = avctx->chromaoffset;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    x4->params.analyse.i_chroma_qp_offset = x4->chroma_offset;
+
+    x4->params.analyse.b_psnr = avctx->flags & AV_CODEC_FLAG_PSNR;
     x4->params.i_log_level    = XAVS_LOG_DEBUG;
     x4->params.i_threads      = avctx->thread_count;
-    x4->params.b_interlaced   = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
+    x4->params.b_interlaced   = avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT;
 
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER)
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)
         x4->params.b_repeat_headers = 0;
 
     x4->enc = xavs_encoder_open(&x4->params);
@@ -357,13 +420,9 @@ static av_cold int XAVS_init(AVCodecContext *avctx)
     if (!(x4->pts_buffer = av_mallocz_array((avctx->max_b_frames+1), sizeof(*x4->pts_buffer))))
         return AVERROR(ENOMEM);
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
     /* TAG: Do we have GLOBAL HEADER in AVS */
     /* We Have PPS and SPS in AVS */
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER && 0) {
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER && 0) {
         xavs_nal_t *nal;
         int nnal, s, i, size;
         uint8_t *p;
@@ -402,10 +461,21 @@ static const AVOption options[] = {
     { "spatial",       NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_DIRECT_PRED_SPATIAL },  0, 0, VE, "direct-pred" },
     { "temporal",      NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_DIRECT_PRED_TEMPORAL }, 0, 0, VE, "direct-pred" },
     { "auto",          NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_DIRECT_PRED_AUTO },     0, 0, VE, "direct-pred" },
-    { "aud",           "Use access unit delimiters.",                     OFFSET(aud),           AV_OPT_TYPE_INT,    {.i64 = -1 }, -1, 1, VE},
-    { "mbtree",        "Use macroblock tree ratecontrol.",                OFFSET(mbtree),        AV_OPT_TYPE_INT,    {.i64 = -1 }, -1, 1, VE},
-    { "mixed-refs",    "One reference per partition, as opposed to one reference per macroblock", OFFSET(mixed_refs), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, VE },
-    { "fast-pskip",    NULL,                                              OFFSET(fast_pskip),    AV_OPT_TYPE_INT,    {.i64 = -1 }, -1, 1, VE},
+    { "aud",           "Use access unit delimiters.",                     OFFSET(aud),           AV_OPT_TYPE_BOOL,    {.i64 = -1 }, -1, 1, VE},
+    { "mbtree",        "Use macroblock tree ratecontrol.",                OFFSET(mbtree),        AV_OPT_TYPE_BOOL,    {.i64 = -1 }, -1, 1, VE},
+    { "mixed-refs",    "One reference per partition, as opposed to one reference per macroblock", OFFSET(mixed_refs), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, VE },
+    { "fast-pskip",    NULL,                                              OFFSET(fast_pskip),    AV_OPT_TYPE_BOOL,    {.i64 = -1 }, -1, 1, VE},
+    { "motion-est",   "Set motion estimation method",                     OFFSET(motion_est),    AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, XAVS_ME_TESA, VE, "motion-est"},
+    { "dia",           NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_ME_DIA },               INT_MIN, INT_MAX, VE, "motion-est" },
+    { "hex",           NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_ME_HEX },               INT_MIN, INT_MAX, VE, "motion-est" },
+    { "umh",           NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_ME_UMH },               INT_MIN, INT_MAX, VE, "motion-est" },
+    { "esa",           NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_ME_ESA },               INT_MIN, INT_MAX, VE, "motion-est" },
+    { "tesa",          NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_ME_TESA },              INT_MIN, INT_MAX, VE, "motion-est" },
+    { "b_strategy",    "Strategy to choose between I/P/B-frames",         OFFSET(b_frame_strategy), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 2, VE},
+    { "chromaoffset", "QP difference between chroma and luma",           OFFSET(chroma_offset), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, VE},
+    { "sc_threshold", "Scene change threshold",                           OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, VE},
+    { "noise_reduction", "Noise reduction",                               OFFSET(noise_reduction), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, VE},
+
     { NULL },
 };
 
@@ -430,7 +500,7 @@ AVCodec ff_libxavs_encoder = {
     .init           = XAVS_init,
     .encode2        = XAVS_frame,
     .close          = XAVS_close,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_AUTO_THREADS,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AUTO_THREADS,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
     .priv_class     = &xavs_class,
     .defaults       = xavs_defaults,
diff --git a/libavcodec/libxvid.c b/libavcodec/libxvid.c
index bd88326c..6fd4e162 100644
--- a/libavcodec/libxvid.c
+++ b/libavcodec/libxvid.c
@@ -25,17 +25,23 @@
  * @author Adam Thayer (krevnik@comcast.net)
  */
 
+#include <stdio.h>
+#include <string.h>
 #include <xvid.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/cpu.h"
 #include "libavutil/file.h"
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mathematics.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
 
 #include "avcodec.h"
 #include "internal.h"
 #include "libxvid.h"
-#include "mpegvideo.h"
+#include "mpegutils.h"
 
 #if HAVE_UNISTD_H
 #include <unistd.h>
@@ -77,6 +83,8 @@ struct xvid_context {
     int ssim;                      /**< SSIM information display mode */
     int ssim_acc;                  /**< SSIM accuracy. 0: accurate. 4: fast. */
     int gmc;
+    int me_quality;                /**< Motion estimation quality. 0: fast 6: best. */
+    int mpeg_quant;                /**< Quantization type. 0: H263, 1: MPEG */
 };
 
 /**
@@ -381,37 +389,56 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)
 
     /* Bring in VOP flags from ffmpeg command-line */
     x->vop_flags = XVID_VOP_HALFPEL;              /* Bare minimum quality */
-    if (xvid_flags & CODEC_FLAG_4MV)
+    if (xvid_flags & AV_CODEC_FLAG_4MV)
         x->vop_flags    |= XVID_VOP_INTER4V;      /* Level 3 */
     if (avctx->trellis)
         x->vop_flags    |= XVID_VOP_TRELLISQUANT; /* Level 5 */
-    if (xvid_flags & CODEC_FLAG_AC_PRED)
+    if (xvid_flags & AV_CODEC_FLAG_AC_PRED)
         x->vop_flags    |= XVID_VOP_HQACPRED;     /* Level 6 */
-    if (xvid_flags & CODEC_FLAG_GRAY)
+    if (xvid_flags & AV_CODEC_FLAG_GRAY)
         x->vop_flags    |= XVID_VOP_GREYSCALE;
 
     /* Decide which ME quality setting to use */
     x->me_flags = 0;
-    switch (avctx->me_method) {
-    case ME_FULL:   /* Quality 6 */
+    switch (x->me_quality) {
+    case 6:
+    case 5:
         x->me_flags |= XVID_ME_EXTSEARCH16 |
                        XVID_ME_EXTSEARCH8;
-
-    case ME_EPZS:   /* Quality 4 */
+    case 4:
+    case 3:
         x->me_flags |= XVID_ME_ADVANCEDDIAMOND8 |
                        XVID_ME_HALFPELREFINE8   |
                        XVID_ME_CHROMA_PVOP      |
                        XVID_ME_CHROMA_BVOP;
-
-    case ME_LOG:    /* Quality 2 */
-    case ME_PHODS:
-    case ME_X1:
+    case 2:
+    case 1:
         x->me_flags |= XVID_ME_ADVANCEDDIAMOND16 |
                        XVID_ME_HALFPELREFINE16;
-
-    case ME_ZERO:   /* Quality 0 */
-    default:
+#if FF_API_MOTION_EST
+FF_DISABLE_DEPRECATION_WARNINGS
         break;
+    default:
+        switch (avctx->me_method) {
+        case ME_FULL:   /* Quality 6 */
+             x->me_flags |= XVID_ME_EXTSEARCH16 |
+                            XVID_ME_EXTSEARCH8;
+        case ME_EPZS:   /* Quality 4 */
+             x->me_flags |= XVID_ME_ADVANCEDDIAMOND8 |
+                            XVID_ME_HALFPELREFINE8   |
+                            XVID_ME_CHROMA_PVOP      |
+                            XVID_ME_CHROMA_BVOP;
+        case ME_LOG:    /* Quality 2 */
+        case ME_PHODS:
+        case ME_X1:
+             x->me_flags |= XVID_ME_ADVANCEDDIAMOND16 |
+                            XVID_ME_HALFPELREFINE16;
+        case ME_ZERO:   /* Quality 0 */
+        default:
+            break;
+        }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     }
 
     /* Decide how we should decide blocks */
@@ -442,7 +469,7 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)
         x->vol_flags |= XVID_VOL_GMC;
         x->me_flags  |= XVID_ME_GME_REFINE;
     }
-    if (xvid_flags & CODEC_FLAG_QPEL) {
+    if (xvid_flags & AV_CODEC_FLAG_QPEL) {
         x->vol_flags |= XVID_VOL_QUARTERPEL;
         x->me_flags  |= XVID_ME_QUARTERPELREFINE16;
         if (x->vop_flags & XVID_VOP_INTER4V)
@@ -494,7 +521,7 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)
     x->old_twopassbuffer = NULL;
     x->twopassfile       = NULL;
 
-    if (xvid_flags & CODEC_FLAG_PASS1) {
+    if (xvid_flags & AV_CODEC_FLAG_PASS1) {
         rc2pass1.version     = XVID_VERSION;
         rc2pass1.context     = x;
         x->twopassbuffer     = av_malloc(BUFFER_SIZE);
@@ -510,7 +537,7 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)
         plugins[xvid_enc_create.num_plugins].func  = xvid_ff_2pass;
         plugins[xvid_enc_create.num_plugins].param = &rc2pass1;
         xvid_enc_create.num_plugins++;
-    } else if (xvid_flags & CODEC_FLAG_PASS2) {
+    } else if (xvid_flags & AV_CODEC_FLAG_PASS2) {
         rc2pass2.version = XVID_VERSION;
         rc2pass2.bitrate = avctx->bit_rate;
 
@@ -541,7 +568,7 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)
         plugins[xvid_enc_create.num_plugins].func  = xvid_plugin_2pass2;
         plugins[xvid_enc_create.num_plugins].param = &rc2pass2;
         xvid_enc_create.num_plugins++;
-    } else if (!(xvid_flags & CODEC_FLAG_QSCALE)) {
+    } else if (!(xvid_flags & AV_CODEC_FLAG_QSCALE)) {
         /* Single Pass Bitrate Control! */
         single.version = XVID_VERSION;
         single.bitrate = avctx->bit_rate;
@@ -600,7 +627,7 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)
         xvid_enc_create.max_key_interval = 240; /* Xvid's best default */
 
     /* Quants */
-    if (xvid_flags & CODEC_FLAG_QSCALE)
+    if (xvid_flags & AV_CODEC_FLAG_QSCALE)
         x->qscale = 1;
     else
         x->qscale = 0;
@@ -615,7 +642,15 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)
     /* Quant Matrices */
     x->intra_matrix =
     x->inter_matrix = NULL;
+
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
     if (avctx->mpeg_quant)
+        x->mpeg_quant = avctx->mpeg_quant;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (x->mpeg_quant)
         x->vol_flags |= XVID_VOL_MPEGQUANT;
     if ((avctx->intra_matrix || avctx->inter_matrix)) {
         x->vol_flags |= XVID_VOL_MPEGQUANT;
@@ -646,13 +681,13 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)
     /* Misc Settings */
     xvid_enc_create.frame_drop_ratio = 0;
     xvid_enc_create.global           = 0;
-    if (xvid_flags & CODEC_FLAG_CLOSED_GOP)
+    if (xvid_flags & AV_CODEC_FLAG_CLOSED_GOP)
         xvid_enc_create.global |= XVID_GLOBAL_CLOSED_GOP;
 
     /* Determines which codec mode we are operating in */
     avctx->extradata      = NULL;
     avctx->extradata_size = 0;
-    if (xvid_flags & CODEC_FLAG_GLOBAL_HEADER) {
+    if (xvid_flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
         /* In this case, we are claiming to be MPEG4 */
         x->quicktime_format = 1;
         avctx->codec_id     = AV_CODEC_ID_MPEG4;
@@ -680,9 +715,6 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)
     }
 
     x->encoder_handle  = xvid_enc_create.handle;
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
 
     return 0;
 }
@@ -692,7 +724,6 @@ static int xvid_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 {
     int xerr, i, ret, user_packet = !!pkt->data;
     struct xvid_context *x = avctx->priv_data;
-    AVFrame *p             = avctx->coded_frame;
     int mb_width  = (avctx->width  + 15) / 16;
     int mb_height = (avctx->height + 15) / 16;
     char *tmp;
@@ -700,7 +731,7 @@ static int xvid_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     xvid_enc_frame_t xvid_enc_frame = { 0 };
     xvid_enc_stats_t xvid_enc_stats = { 0 };
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, mb_width*(int64_t)mb_height*MAX_MB_BYTES + FF_MIN_BUFFER_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, mb_width*(int64_t)mb_height*MAX_MB_BYTES + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;
 
     /* Start setting up the frame */
@@ -775,32 +806,52 @@ static int xvid_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
 
     if (xerr > 0) {
+        int pict_type;
+
         *got_packet = 1;
 
-        p->quality = xvid_enc_stats.quant * FF_QP2LAMBDA;
         if (xvid_enc_stats.type == XVID_TYPE_PVOP)
-            p->pict_type = AV_PICTURE_TYPE_P;
+            pict_type = AV_PICTURE_TYPE_P;
         else if (xvid_enc_stats.type == XVID_TYPE_BVOP)
-            p->pict_type = AV_PICTURE_TYPE_B;
+            pict_type = AV_PICTURE_TYPE_B;
         else if (xvid_enc_stats.type == XVID_TYPE_SVOP)
-            p->pict_type = AV_PICTURE_TYPE_S;
+            pict_type = AV_PICTURE_TYPE_S;
         else
-            p->pict_type = AV_PICTURE_TYPE_I;
+            pict_type = AV_PICTURE_TYPE_I;
+
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+        avctx->coded_frame->pict_type = pict_type;
+        avctx->coded_frame->quality = xvid_enc_stats.quant * FF_QP2LAMBDA;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+        ff_side_data_set_encoder_stats(pkt, xvid_enc_stats.quant * FF_QP2LAMBDA, NULL, 0, pict_type);
+
         if (xvid_enc_frame.out_flags & XVID_KEYFRAME) {
-            p->key_frame = 1;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+            avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
             pkt->flags  |= AV_PKT_FLAG_KEY;
             if (x->quicktime_format)
                 return xvid_strip_vol_header(avctx, pkt,
                                              xvid_enc_stats.hlength, xerr);
-        } else
-            p->key_frame = 0;
+        } else {
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+            avctx->coded_frame->key_frame = 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        }
 
         pkt->size = xerr;
 
         return 0;
     } else {
         if (!user_packet)
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
         if (!xerr)
             return 0;
         av_log(avctx, AV_LOG_ERROR,
@@ -818,7 +869,6 @@ static av_cold int xvid_encode_close(AVCodecContext *avctx)
         x->encoder_handle = NULL;
     }
 
-    av_frame_free(&avctx->coded_frame);
     av_freep(&avctx->extradata);
     if (x->twopassbuffer) {
         av_freep(&x->twopassbuffer);
@@ -848,6 +898,8 @@ static const AVOption options[] = {
     { "frame",       NULL,                                                0, AV_OPT_TYPE_CONST, { .i64 = 2 }, INT_MIN, INT_MAX, VE, "ssim" },
     { "ssim_acc",    "SSIM accuracy",                   OFFSET(ssim_acc),    AV_OPT_TYPE_INT,   { .i64 = 2 },       0,       4, VE         },
     { "gmc",         "use GMC",                         OFFSET(gmc),         AV_OPT_TYPE_INT,   { .i64 = 0 },       0,       1, VE         },
+    { "me_quality",  "Motion estimation quality",       OFFSET(me_quality),  AV_OPT_TYPE_INT,   { .i64 = 0 },       0,       6, VE         },
+    { "mpeg_quant",  "Use MPEG quantizers instead of H.263", OFFSET(mpeg_quant), AV_OPT_TYPE_INT, { .i64 = 0 },     0,       1, VE         },
     { NULL },
 };
 
diff --git a/libavcodec/libzvbi-teletextdec.c b/libavcodec/libzvbi-teletextdec.c
index 15c1a5de..308f735b 100644
--- a/libavcodec/libzvbi-teletextdec.c
+++ b/libavcodec/libzvbi-teletextdec.c
@@ -22,6 +22,7 @@
 #include "libavcodec/ass.h"
 #include "libavutil/opt.h"
 #include "libavutil/bprint.h"
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/log.h"
 
@@ -29,6 +30,7 @@
 
 #define TEXT_MAXSZ    (25 * (56 + 1) * 4 + 2)
 #define VBI_NB_COLORS 40
+#define VBI_TRANSPARENT_BLACK 8
 #define RGBA(r,g,b,a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
 #define VBI_R(rgba)   (((rgba) >> 0) & 0xFF)
 #define VBI_G(rgba)   (((rgba) >> 8) & 0xFF)
@@ -57,6 +59,7 @@ typedef struct TeletextContext
     int             chop_top;
     int             sub_duration; /* in msec */
     int             transparent_bg;
+    int             opacity;
     int             chop_spaces;
 
     int             lines_processed;
@@ -85,8 +88,8 @@ static int chop_spaces_utf8(const unsigned char* t, int len)
 
 static void subtitle_rect_free(AVSubtitleRect **sub_rect)
 {
-    av_freep(&(*sub_rect)->pict.data[0]);
-    av_freep(&(*sub_rect)->pict.data[1]);
+    av_freep(&(*sub_rect)->data[0]);
+    av_freep(&(*sub_rect)->data[1]);
     av_freep(&(*sub_rect)->ass);
     av_freep(sub_rect);
 }
@@ -192,29 +195,36 @@ static int gen_sub_text(TeletextContext *ctx, AVSubtitleRect *sub_rect, vbi_page
 }
 
 static void fix_transparency(TeletextContext *ctx, AVSubtitleRect *sub_rect, vbi_page *page,
-                             int chop_top, uint8_t transparent_color, int resx, int resy)
+                             int chop_top, int resx, int resy)
 {
     int iy;
 
     // Hack for transparency, inspired by VLC code...
     for (iy = 0; iy < resy; iy++) {
-        uint8_t *pixel = sub_rect->pict.data[0] + iy * sub_rect->pict.linesize[0];
+        uint8_t *pixel = sub_rect->data[0] + iy * sub_rect->linesize[0];
         vbi_char *vc = page->text + (iy / BITMAP_CHAR_HEIGHT + chop_top) * page->columns;
         vbi_char *vcnext = vc + page->columns;
         for (; vc < vcnext; vc++) {
             uint8_t *pixelnext = pixel + BITMAP_CHAR_WIDTH;
             switch (vc->opacity) {
                 case VBI_TRANSPARENT_SPACE:
-                    memset(pixel, transparent_color, BITMAP_CHAR_WIDTH);
+                    memset(pixel, VBI_TRANSPARENT_BLACK, BITMAP_CHAR_WIDTH);
                     break;
                 case VBI_OPAQUE:
-                case VBI_SEMI_TRANSPARENT:
                     if (!ctx->transparent_bg)
                         break;
+                case VBI_SEMI_TRANSPARENT:
+                    if (ctx->opacity > 0) {
+                        if (ctx->opacity < 255)
+                            for(; pixel < pixelnext; pixel++)
+                                if (*pixel == vc->background)
+                                    *pixel += VBI_NB_COLORS;
+                        break;
+                    }
                 case VBI_TRANSPARENT_FULL:
                     for(; pixel < pixelnext; pixel++)
                         if (*pixel == vc->background)
-                            *pixel = transparent_color;
+                            *pixel = VBI_TRANSPARENT_BLACK;
                     break;
             }
             pixel = pixelnext;
@@ -227,56 +237,55 @@ static int gen_sub_bitmap(TeletextContext *ctx, AVSubtitleRect *sub_rect, vbi_pa
 {
     int resx = page->columns * BITMAP_CHAR_WIDTH;
     int resy = (page->rows - chop_top) * BITMAP_CHAR_HEIGHT;
-    uint8_t ci, cmax = 0;
-    int ret;
+    uint8_t ci;
     vbi_char *vc = page->text + (chop_top * page->columns);
     vbi_char *vcend = page->text + (page->rows * page->columns);
 
     for (; vc < vcend; vc++) {
-        if (vc->opacity != VBI_TRANSPARENT_SPACE) {
-            cmax = VBI_NB_COLORS;
+        if (vc->opacity != VBI_TRANSPARENT_SPACE)
             break;
-        }
     }
 
-    if (cmax == 0) {
+    if (vc >= vcend) {
         av_log(ctx, AV_LOG_DEBUG, "dropping empty page %3x\n", page->pgno);
         sub_rect->type = SUBTITLE_NONE;
         return 0;
     }
 
-    if ((ret = avpicture_alloc(&sub_rect->pict, AV_PIX_FMT_PAL8, resx, resy)) < 0)
-        return ret;
-    // Yes, we want to allocate the palette on our own because AVSubtitle works this way
-    sub_rect->pict.data[1] = NULL;
+    sub_rect->data[0] = av_mallocz(resx * resy);
+    sub_rect->linesize[0] = resx;
+    if (!sub_rect->data[0])
+        return AVERROR(ENOMEM);
 
     vbi_draw_vt_page_region(page, VBI_PIXFMT_PAL8,
-                            sub_rect->pict.data[0], sub_rect->pict.linesize[0],
+                            sub_rect->data[0], sub_rect->linesize[0],
                             0, chop_top, page->columns, page->rows - chop_top,
                             /*reveal*/ 1, /*flash*/ 1);
 
-    fix_transparency(ctx, sub_rect, page, chop_top, cmax, resx, resy);
+    fix_transparency(ctx, sub_rect, page, chop_top, resx, resy);
     sub_rect->x = ctx->x_offset;
     sub_rect->y = ctx->y_offset + chop_top * BITMAP_CHAR_HEIGHT;
     sub_rect->w = resx;
     sub_rect->h = resy;
-    sub_rect->nb_colors = (int)cmax + 1;
-    sub_rect->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
-    if (!sub_rect->pict.data[1]) {
-        av_freep(&sub_rect->pict.data[0]);
+    sub_rect->nb_colors = ctx->opacity > 0 && ctx->opacity < 255 ? 2 * VBI_NB_COLORS : VBI_NB_COLORS;
+    sub_rect->data[1] = av_mallocz(AVPALETTE_SIZE);
+    if (!sub_rect->data[1]) {
+        av_freep(&sub_rect->data[0]);
         return AVERROR(ENOMEM);
     }
-    for (ci = 0; ci < cmax; ci++) {
+    for (ci = 0; ci < VBI_NB_COLORS; ci++) {
         int r, g, b, a;
 
         r = VBI_R(page->color_map[ci]);
         g = VBI_G(page->color_map[ci]);
         b = VBI_B(page->color_map[ci]);
         a = VBI_A(page->color_map[ci]);
-        ((uint32_t *)sub_rect->pict.data[1])[ci] = RGBA(r, g, b, a);
-        av_dlog(ctx, "palette %0x\n", ((uint32_t *)sub_rect->pict.data[1])[ci]);
+        ((uint32_t *)sub_rect->data[1])[ci] = RGBA(r, g, b, a);
+        ((uint32_t *)sub_rect->data[1])[ci + VBI_NB_COLORS] = RGBA(r, g, b, ctx->opacity);
+        ff_dlog(ctx, "palette %0x\n", ((uint32_t *)sub_rect->data[1])[ci]);
     }
-    ((uint32_t *)sub_rect->pict.data[1])[cmax] = RGBA(0, 0, 0, 0);
+    ((uint32_t *)sub_rect->data[1])[VBI_TRANSPARENT_BLACK] = RGBA(0, 0, 0, 0);
+    ((uint32_t *)sub_rect->data[1])[VBI_TRANSPARENT_BLACK + VBI_NB_COLORS] = RGBA(0, 0, 0, 0);
     sub_rect->type = SUBTITLE_BITMAP;
     return 0;
 }
@@ -400,6 +409,7 @@ static int teletext_decode_frame(AVCodecContext *avctx, void *data, int *data_si
     TeletextContext *ctx = avctx->priv_data;
     AVSubtitle      *sub = data;
     int             ret = 0;
+    int j;
 
     if (!ctx->vbi) {
         if (!(ctx->vbi = vbi_decoder_new()))
@@ -411,7 +421,7 @@ static int teletext_decode_frame(AVCodecContext *avctx, void *data, int *data_si
         }
     }
 
-    if (avctx->pkt_timebase.den && pkt->pts != AV_NOPTS_VALUE)
+    if (avctx->pkt_timebase.num && pkt->pts != AV_NOPTS_VALUE)
         ctx->pts = av_rescale_q(pkt->pts, avctx->pkt_timebase, AV_TIME_BASE_Q);
 
     if (pkt->size) {
@@ -427,7 +437,7 @@ static int teletext_decode_frame(AVCodecContext *avctx, void *data, int *data_si
         if (data_identifier_is_teletext(*pkt->data)) {
             if ((lines = slice_to_vbi_lines(ctx, pkt->data + 1, pkt->size - 1)) < 0)
                 return lines;
-            av_dlog(avctx, "ctx=%p buf_size=%d lines=%u pkt_pts=%7.3f\n",
+            ff_dlog(avctx, "ctx=%p buf_size=%d lines=%u pkt_pts=%7.3f\n",
                     ctx, pkt->size, lines, (double)pkt->pts/90000.0);
             if (lines > 0) {
 #ifdef DEBUG
@@ -462,6 +472,14 @@ static int teletext_decode_frame(AVCodecContext *avctx, void *data, int *data_si
             if (sub->rects) {
                 sub->num_rects = 1;
                 sub->rects[0] = ctx->pages->sub_rect;
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
+                for (j = 0; j < 4; j++) {
+                    sub->rects[0]->pict.data[j] = sub->rects[0]->data[j];
+                    sub->rects[0]->pict.linesize[j] = sub->rects[0]->linesize[j];
+                }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
             } else {
                 ret = AVERROR(ENOMEM);
             }
@@ -503,6 +521,9 @@ static int teletext_init_decoder(AVCodecContext *avctx)
     ctx->vbi = NULL;
     ctx->pts = AV_NOPTS_VALUE;
 
+    if (ctx->opacity == -1)
+        ctx->opacity = ctx->transparent_bg ? 0 : 255;
+
 #ifdef DEBUG
     {
         char *t;
@@ -517,7 +538,7 @@ static int teletext_close_decoder(AVCodecContext *avctx)
 {
     TeletextContext *ctx = avctx->priv_data;
 
-    av_dlog(avctx, "lines_total=%u\n", ctx->lines_processed);
+    ff_dlog(avctx, "lines_total=%u\n", ctx->lines_processed);
     while (ctx->nb_pages)
         subtitle_rect_free(&ctx->pages[--ctx->nb_pages].sub_rect);
     av_freep(&ctx->pages);
@@ -546,6 +567,7 @@ static const AVOption options[] = {
     {"txt_chop_spaces", "chops leading and trailing spaces from text",       OFFSET(chop_spaces),    AV_OPT_TYPE_INT,    {.i64 = 1},        0, 1,        SD},
     {"txt_duration",    "display duration of teletext pages in msecs",       OFFSET(sub_duration),   AV_OPT_TYPE_INT,    {.i64 = 30000},    0, 86400000, SD},
     {"txt_transparent", "force transparent background of the teletext",      OFFSET(transparent_bg), AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,        SD},
+    {"txt_opacity",     "set opacity of the transparent background",         OFFSET(opacity),        AV_OPT_TYPE_INT,    {.i64 = -1},      -1, 255,      SD},
     { NULL },
 };
 
@@ -565,7 +587,7 @@ AVCodec ff_libzvbi_teletext_decoder = {
     .init      = teletext_init_decoder,
     .close     = teletext_close_decoder,
     .decode    = teletext_decode_frame,
-    .capabilities = CODEC_CAP_DELAY,
+    .capabilities = AV_CODEC_CAP_DELAY,
     .flush     = teletext_flush,
     .priv_class= &teletext_class,
 };
diff --git a/libavcodec/ljpegenc.c b/libavcodec/ljpegenc.c
index 8c0ebf5e..afaab055 100644
--- a/libavcodec/ljpegenc.c
+++ b/libavcodec/ljpegenc.c
@@ -39,17 +39,17 @@
 #include "internal.h"
 #include "jpegtables.h"
 #include "mjpegenc_common.h"
-#include "mpegvideo.h"
 #include "mjpeg.h"
 #include "mjpegenc.h"
 
 typedef struct LJpegEncContext {
+    AVClass *class;
     IDCTDSPContext idsp;
     ScanTable scantable;
     uint16_t matrix[64];
 
-    int vsample[3];
-    int hsample[3];
+    int vsample[4];
+    int hsample[4];
 
     uint16_t huff_code_dc_luminance[12];
     uint16_t huff_code_dc_chrominance[12];
@@ -57,6 +57,7 @@ typedef struct LJpegEncContext {
     uint8_t  huff_size_dc_chrominance[12];
 
     uint16_t (*scratch)[4];
+    int pred;
 } LJpegEncContext;
 
 static int ljpeg_encode_bgr(AVCodecContext *avctx, PutBitContext *pb,
@@ -67,23 +68,29 @@ static int ljpeg_encode_bgr(AVCodecContext *avctx, PutBitContext *pb,
     const int height      = frame->height;
     const int linesize    = frame->linesize[0];
     uint16_t (*buffer)[4] = s->scratch;
-    const int predictor   = avctx->prediction_method+1;
-    int left[3], top[3], topleft[3];
+    int left[4], top[4], topleft[4];
     int x, y, i;
 
-    for (i = 0; i < 3; i++)
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->prediction_method)
+        s->pred = avctx->prediction_method + 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    for (i = 0; i < 4; i++)
         buffer[0][i] = 1 << (9 - 1);
 
     for (y = 0; y < height; y++) {
-        const int modified_predictor = y ? predictor : 1;
+        const int modified_predictor = y ? s->pred : 1;
         uint8_t *ptr = frame->data[0] + (linesize * y);
 
-        if (pb->buf_end - pb->buf - (put_bits_count(pb) >> 3) < width * 3 * 4) {
+        if (pb->buf_end - pb->buf - (put_bits_count(pb) >> 3) < width * 4 * 4) {
             av_log(avctx, AV_LOG_ERROR, "encoded frame too large\n");
             return -1;
         }
 
-        for (i = 0; i < 3; i++)
+        for (i = 0; i < 4; i++)
             top[i]= left[i]= topleft[i]= buffer[0][i];
 
         for (x = 0; x < width; x++) {
@@ -95,9 +102,11 @@ static int ljpeg_encode_bgr(AVCodecContext *avctx, PutBitContext *pb,
                 buffer[x][1] =  ptr[4 * x + 0] -     ptr[4 * x + 1] + 0x100;
                 buffer[x][2] =  ptr[4 * x + 2] -     ptr[4 * x + 1] + 0x100;
                 buffer[x][0] = (ptr[4 * x + 0] + 2 * ptr[4 * x + 1] + ptr[4 * x + 2]) >> 2;
+                if (avctx->pix_fmt == AV_PIX_FMT_BGRA)
+                    buffer[x][3] =  ptr[4 * x + 3];
             }
 
-            for (i = 0; i < 3; i++) {
+            for (i = 0; i < 3 + (avctx->pix_fmt == AV_PIX_FMT_BGRA); i++) {
                 int pred, diff;
 
                 PREDICT(pred, topleft[i], top[i], left[i], modified_predictor);
@@ -109,7 +118,7 @@ static int ljpeg_encode_bgr(AVCodecContext *avctx, PutBitContext *pb,
 
                 diff       = ((left[i] - pred + 0x100) & 0x1FF) - 0x100;
 
-                if (i == 0)
+                if (i == 0 || i == 3)
                     ff_mjpeg_encode_dc(pb, diff, s->huff_size_dc_luminance, s->huff_code_dc_luminance); //FIXME ugly
                 else
                     ff_mjpeg_encode_dc(pb, diff, s->huff_size_dc_chrominance, s->huff_code_dc_chrominance);
@@ -188,12 +197,18 @@ static inline void ljpeg_encode_yuv_mb(LJpegEncContext *s, PutBitContext *pb,
 static int ljpeg_encode_yuv(AVCodecContext *avctx, PutBitContext *pb,
                             const AVFrame *frame)
 {
-    const int predictor = avctx->prediction_method + 1;
     LJpegEncContext *s  = avctx->priv_data;
     const int mb_width  = (avctx->width  + s->hsample[0] - 1) / s->hsample[0];
     const int mb_height = (avctx->height + s->vsample[0] - 1) / s->vsample[0];
     int mb_x, mb_y;
 
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->prediction_method)
+        s->pred = avctx->prediction_method + 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     for (mb_y = 0; mb_y < mb_height; mb_y++) {
         if (pb->buf_end - pb->buf - (put_bits_count(pb) >> 3) <
             mb_width * 4 * 3 * s->hsample[0] * s->vsample[0]) {
@@ -202,7 +217,7 @@ static int ljpeg_encode_yuv(AVCodecContext *avctx, PutBitContext *pb,
         }
 
         for (mb_x = 0; mb_x < mb_width; mb_x++)
-            ljpeg_encode_yuv_mb(s, pb, frame, predictor, mb_x, mb_y);
+            ljpeg_encode_yuv_mb(s, pb, frame, s->pred, mb_x, mb_y);
     }
 
     return 0;
@@ -217,25 +232,26 @@ static int ljpeg_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     const int height = avctx->height;
     const int mb_width  = (width  + s->hsample[0] - 1) / s->hsample[0];
     const int mb_height = (height + s->vsample[0] - 1) / s->vsample[0];
-    int max_pkt_size = FF_MIN_BUFFER_SIZE;
+    int max_pkt_size = AV_INPUT_BUFFER_MIN_SIZE;
     int ret, header_bits;
 
     if(    avctx->pix_fmt == AV_PIX_FMT_BGR0
-        || avctx->pix_fmt == AV_PIX_FMT_BGRA
         || avctx->pix_fmt == AV_PIX_FMT_BGR24)
         max_pkt_size += width * height * 3 * 4;
+    else if(avctx->pix_fmt == AV_PIX_FMT_BGRA)
+        max_pkt_size += width * height * 4 * 4;
     else {
         max_pkt_size += mb_width * mb_height * 3 * 4
                         * s->hsample[0] * s->vsample[0];
     }
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, max_pkt_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, max_pkt_size, 0)) < 0)
         return ret;
 
     init_put_bits(&pb, pkt->data, pkt->size);
 
     ff_mjpeg_encode_picture_header(avctx, &pb, &s->scantable,
-                                   s->matrix, s->matrix);
+                                   s->pred, s->matrix, s->matrix);
 
     header_bits = put_bits_count(&pb);
 
@@ -265,7 +281,6 @@ static av_cold int ljpeg_encode_close(AVCodecContext *avctx)
 {
     LJpegEncContext *s = avctx->priv_data;
 
-    av_frame_free(&avctx->coded_frame);
     av_freep(&s->scratch);
 
     return 0;
@@ -286,12 +301,12 @@ static av_cold int ljpeg_encode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     s->scratch = av_malloc_array(avctx->width + 1, sizeof(*s->scratch));
     if (!s->scratch)
@@ -318,16 +333,35 @@ static av_cold int ljpeg_encode_init(AVCodecContext *avctx)
     return AVERROR(ENOMEM);
 }
 
+#define OFFSET(x) offsetof(LJpegEncContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+{ "pred", "Prediction method", OFFSET(pred), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 3, VE, "pred" },
+    { "left",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, VE, "pred" },
+    { "plane",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, INT_MIN, INT_MAX, VE, "pred" },
+    { "median", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 3 }, INT_MIN, INT_MAX, VE, "pred" },
+
+    { NULL},
+};
+
+static const AVClass ljpeg_class = {
+    .class_name = "ljpeg",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_ljpeg_encoder = {
     .name           = "ljpeg",
     .long_name      = NULL_IF_CONFIG_SMALL("Lossless JPEG"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_LJPEG,
     .priv_data_size = sizeof(LJpegEncContext),
+    .priv_class     = &ljpeg_class,
     .init           = ljpeg_encode_init,
     .encode2        = ljpeg_encode_frame,
     .close          = ljpeg_encode_close,
-    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_BGR24   , AV_PIX_FMT_BGRA    , AV_PIX_FMT_BGR0,
         AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
diff --git a/libavcodec/loco.c b/libavcodec/loco.c
index aea478d5..9d0f1444 100644
--- a/libavcodec/loco.c
+++ b/libavcodec/loco.c
@@ -330,5 +330,5 @@ AVCodec ff_loco_decoder = {
     .priv_data_size = sizeof(LOCOContext),
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/lossless_audiodsp.h b/libavcodec/lossless_audiodsp.h
index 9ce2e63d..79ca30d4 100644
--- a/libavcodec/lossless_audiodsp.h
+++ b/libavcodec/lossless_audiodsp.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_LLAUDDSP_H
-#define AVCODEC_LLAUDDSP_H
+#ifndef AVCODEC_LOSSLESS_AUDIODSP_H
+#define AVCODEC_LOSSLESS_AUDIODSP_H
 
 #include <stdint.h>
 
@@ -43,4 +43,4 @@ void ff_llauddsp_init_arm(LLAudDSPContext *c);
 void ff_llauddsp_init_ppc(LLAudDSPContext *c);
 void ff_llauddsp_init_x86(LLAudDSPContext *c);
 
-#endif /* AVCODEC_LLAUDDSP_H */
+#endif /* AVCODEC_LOSSLESS_AUDIODSP_H */
diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c
index deb02e7f..3839119c 100644
--- a/libavcodec/lpc.c
+++ b/libavcodec/lpc.c
@@ -37,13 +37,19 @@ static void lpc_apply_welch_window_c(const int32_t *data, int len,
     double w;
     double c;
 
-    /* The optimization in commit fa4ed8c does not support odd len.
-     * If someone wants odd len extend that change. */
-    av_assert2(!(len & 1));
-
     n2 = (len >> 1);
     c = 2.0 / (len - 1.0);
 
+    if (len & 1) {
+        for(i=0; i<n2; i++) {
+            w = c - i - 1.0;
+            w = 1.0 - (w * w);
+            w_data[i] = data[i] * w;
+            w_data[len-1-i] = data[len-1-i] * w;
+        }
+        return;
+    }
+
     w_data+=n2;
       data+=n2;
     for(i=0; i<n2; i++) {
@@ -161,6 +167,28 @@ int ff_lpc_calc_ref_coefs(LPCContext *s,
     return order;
 }
 
+double ff_lpc_calc_ref_coefs_f(LPCContext *s, const float *samples, int len,
+                               int order, double *ref)
+{
+    int i;
+    double signal = 0.0f, avg_err = 0.0f;
+    double autoc[MAX_LPC_ORDER+1] = {0}, error[MAX_LPC_ORDER+1] = {0};
+    const double a = 0.5f, b = 1.0f - a;
+
+    /* Apply windowing */
+    for (i = 0; i < len; i++) {
+        double weight = a - b*cos((2*M_PI*i)/(len - 1));
+        s->windowed_samples[i] = weight*samples[i];
+    }
+
+    s->lpc_compute_autocorr(s->windowed_samples, len, order, autoc);
+    signal = autoc[0];
+    compute_ref_coefs(autoc, order, ref, error);
+    for (i = 0; i < order; i++)
+        avg_err = (avg_err + error[i])/2.0f;
+    return signal/avg_err;
+}
+
 /**
  * Calculate LPC coefficients for multiple orders
  *
diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h
index 96acb371..edb1a6bc 100644
--- a/libavcodec/lpc.h
+++ b/libavcodec/lpc.h
@@ -25,6 +25,7 @@
 #include <stdint.h>
 #include "libavutil/avassert.h"
 #include "libavutil/lls.h"
+#include "aac_defines.h"
 
 #define ORDER_METHOD_EST     0
 #define ORDER_METHOD_2LEVEL  1
@@ -99,6 +100,9 @@ int ff_lpc_calc_coefs(LPCContext *s,
 int ff_lpc_calc_ref_coefs(LPCContext *s,
                           const int32_t *samples, int order, double *ref);
 
+double ff_lpc_calc_ref_coefs_f(LPCContext *s, const float *samples, int len,
+                               int order, double *ref);
+
 /**
  * Initialize LPCContext.
  */
@@ -111,11 +115,15 @@ void ff_lpc_init_x86(LPCContext *s);
  */
 void ff_lpc_end(LPCContext *s);
 
+#if USE_FIXED
+typedef int LPC_TYPE;
+#else
 #ifdef LPC_USE_DOUBLE
-#define LPC_TYPE double
+typedef double LPC_TYPE;
 #else
-#define LPC_TYPE float
+typedef float LPC_TYPE;
 #endif
+#endif // USE_FIXED
 
 /**
  * Schur recursion.
@@ -152,7 +160,7 @@ static inline void compute_ref_coefs(const LPC_TYPE *autoc, int max_order,
  * Levinson-Durbin recursion.
  * Produce LPC coefficients from autocorrelation data.
  */
-static inline int compute_lpc_coefs(const LPC_TYPE *autoc, int max_order,
+static inline int AAC_RENAME(compute_lpc_coefs)(const LPC_TYPE *autoc, int max_order,
                                     LPC_TYPE *lpc, int lpc_stride, int fail,
                                     int normalize)
 {
@@ -169,14 +177,14 @@ static inline int compute_lpc_coefs(const LPC_TYPE *autoc, int max_order,
         return -1;
 
     for(i=0; i<max_order; i++) {
-        LPC_TYPE r = -autoc[i];
+        LPC_TYPE r = AAC_SRA_R(-autoc[i], 5);
 
         if (normalize) {
             for(j=0; j<i; j++)
                 r -= lpc_last[j] * autoc[i-j-1];
 
             r /= err;
-            err *= 1.0 - (r * r);
+            err *= FIXR(1.0) - (r * r);
         }
 
         lpc[i] = r;
@@ -184,8 +192,8 @@ static inline int compute_lpc_coefs(const LPC_TYPE *autoc, int max_order,
         for(j=0; j < (i+1)>>1; j++) {
             LPC_TYPE f = lpc_last[    j];
             LPC_TYPE b = lpc_last[i-1-j];
-            lpc[    j] = f + r * b;
-            lpc[i-1-j] = b + r * f;
+            lpc[    j] = f + AAC_MUL26(r, b);
+            lpc[i-1-j] = b + AAC_MUL26(r, f);
         }
 
         if (fail && err < 0)
diff --git a/libavcodec/lzf.c b/libavcodec/lzf.c
new file mode 100644
index 00000000..409a7ffd
--- /dev/null
+++ b/libavcodec/lzf.c
@@ -0,0 +1,90 @@
+/*
+ * lzf decompression algorithm
+ * Copyright (c) 2015 Luca Barbato
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * lzf decompression
+ *
+ * LZF is a fast compression/decompression algorithm that takes very little
+ * code space and working memory, ideal for real-time and block compression.
+ *
+ * https://en.wikibooks.org/wiki/Data_Compression/Dictionary_compression#LZF
+ */
+
+#include "libavutil/mem.h"
+
+#include "bytestream.h"
+#include "lzf.h"
+
+#define LZF_LITERAL_MAX (1 << 5)
+#define LZF_LONG_BACKREF 7 + 2
+
+int ff_lzf_uncompress(GetByteContext *gb, uint8_t **buf, int64_t *size)
+{
+    int ret     = 0;
+    uint8_t *p  = *buf;
+    int64_t len = 0;
+
+    while (bytestream2_get_bytes_left(gb) > 2) {
+        uint8_t s = bytestream2_get_byte(gb);
+
+        if (s < LZF_LITERAL_MAX) {
+            s++;
+            if (s > *size - len) {
+                *size += *size /2;
+                ret = av_reallocp(buf, *size);
+                if (ret < 0)
+                    return ret;
+            }
+
+            bytestream2_get_buffer(gb, p, s);
+            p   += s;
+            len += s;
+        } else {
+            int l   = 2 + (s >> 5);
+            int off = ((s & 0x1f) << 8) + 1;
+
+            if (l == LZF_LONG_BACKREF)
+                l += bytestream2_get_byte(gb);
+
+            off += bytestream2_get_byte(gb);
+
+            if (off > len)
+                return AVERROR_INVALIDDATA;
+
+            if (l > *size - len) {
+                *size += *size / 2;
+                ret = av_reallocp(buf, *size);
+                if (ret < 0)
+                    return ret;
+            }
+
+            av_memcpy_backptr(p, off, l);
+
+            p   += l;
+            len += l;
+        }
+    }
+
+    *size = len;
+
+    return 0;
+}
diff --git a/libavcodec/lzf.h b/libavcodec/lzf.h
new file mode 100644
index 00000000..0ad73d9f
--- /dev/null
+++ b/libavcodec/lzf.h
@@ -0,0 +1,29 @@
+/*
+ * lzf decompression algorithm
+ * Copyright (c) 2015 Luca Barbato
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_LZF_H
+#define AVCODEC_LZF_H
+
+#include "bytestream.h"
+
+int ff_lzf_uncompress(GetByteContext *gb, uint8_t **buf, int64_t *size);
+
+#endif /* AVCODEC_LZF_H */
diff --git a/libavcodec/lzw.c b/libavcodec/lzw.c
index 6832c122..b0b9a343 100644
--- a/libavcodec/lzw.c
+++ b/libavcodec/lzw.c
@@ -93,7 +93,7 @@ static int lzw_get_code(struct LZWState * s)
     return c & s->curmask;
 }
 
-void ff_lzw_decode_tail(LZWState *p)
+int ff_lzw_decode_tail(LZWState *p)
 {
     struct LZWState *s = (struct LZWState *)p;
 
@@ -104,6 +104,7 @@ void ff_lzw_decode_tail(LZWState *p)
         }
     }else
         bytestream2_skip(&s->gb, bytestream2_get_bytes_left(&s->gb));
+    return bytestream2_tell(&s->gb);
 }
 
 av_cold void ff_lzw_decode_open(LZWState **p)
diff --git a/libavcodec/lzw.h b/libavcodec/lzw.h
index 4653c1c7..6af8a6b8 100644
--- a/libavcodec/lzw.h
+++ b/libavcodec/lzw.h
@@ -47,7 +47,7 @@ void ff_lzw_decode_open(LZWState **p);
 void ff_lzw_decode_close(LZWState **p);
 int ff_lzw_decode_init(LZWState *s, int csize, const uint8_t *buf, int buf_size, int mode);
 int ff_lzw_decode(LZWState *s, uint8_t *buf, int len);
-void ff_lzw_decode_tail(LZWState *lzw);
+int ff_lzw_decode_tail(LZWState *lzw);
 
 /** LZW encode state */
 struct LZWEncodeState;
diff --git a/libavcodec/mace.c b/libavcodec/mace.c
index 6eaa2966..e332a72d 100644
--- a/libavcodec/mace.c
+++ b/libavcodec/mace.c
@@ -292,7 +292,7 @@ AVCodec ff_mace3_decoder = {
     .priv_data_size = sizeof(MACEContext),
     .init           = mace_decode_init,
     .decode         = mace_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_NONE },
 };
@@ -305,7 +305,7 @@ AVCodec ff_mace6_decoder = {
     .priv_data_size = sizeof(MACEContext),
     .init           = mace_decode_init,
     .decode         = mace_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
index 46283ca4..4988f1d3 100644
--- a/libavcodec/mathops.h
+++ b/libavcodec/mathops.h
@@ -30,7 +30,6 @@
 #define MAX_NEG_CROP 1024
 
 extern const uint32_t ff_inverse[257];
-extern const uint8_t  ff_reverse[256];
 extern const uint8_t ff_sqrt_tab[256];
 extern const uint8_t ff_crop_tab[256 + 2 * MAX_NEG_CROP];
 extern const uint8_t ff_zigzag_direct[64];
@@ -234,6 +233,11 @@ static inline av_const unsigned int ff_sqrt(unsigned int a)
 }
 #endif
 
+static inline av_const float ff_sqrf(float a)
+{
+    return a*a;
+}
+
 static inline int8_t ff_u8_to_s8(uint8_t a)
 {
     union {
diff --git a/libavcodec/mathtables.c b/libavcodec/mathtables.c
index a07ac50c..7b5efb88 100644
--- a/libavcodec/mathtables.c
+++ b/libavcodec/mathtables.c
@@ -71,25 +71,6 @@ const uint8_t ff_sqrt_tab[256]={
 240,240,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,255,255
 };
 
-const uint8_t ff_reverse[256] = {
-0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0,
-0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8,
-0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4,
-0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC,
-0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2,
-0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA,
-0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6,
-0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE,
-0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1,
-0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9,
-0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5,
-0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD,
-0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3,
-0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB,
-0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7,
-0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF,
-};
-
 #define times4(x) x, x, x, x
 #define times256(x) times4(times4(times4(times4(times4(x)))))
 
diff --git a/libavcodec/mdct_template.c b/libavcodec/mdct_template.c
index 7fa8bcce..04396b46 100644
--- a/libavcodec/mdct_template.c
+++ b/libavcodec/mdct_template.c
@@ -22,6 +22,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "libavutil/common.h"
+#include "libavutil/libm.h"
 #include "libavutil/mathematics.h"
 #include "fft.h"
 #include "fft-internal.h"
@@ -81,8 +82,13 @@ av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
     scale = sqrt(fabs(scale));
     for(i=0;i<n4;i++) {
         alpha = 2 * M_PI * (i + theta) / n;
+#if FFT_FIXED_32
+        s->tcos[i*tstep] = lrint(-cos(alpha) * 2147483648.0);
+        s->tsin[i*tstep] = lrint(-sin(alpha) * 2147483648.0);
+#else
         s->tcos[i*tstep] = FIX15(-cos(alpha) * scale);
         s->tsin[i*tstep] = FIX15(-sin(alpha) * scale);
+#endif
     }
     return 0;
  fail:
diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
index 5fd06f4a..1cc4ca47 100644
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -31,7 +31,6 @@
 #include "blockdsp.h"
 #include "bswapdsp.h"
 #include "idctdsp.h"
-#include "mpegvideo.h"
 #include "mpeg12.h"
 #include "thread.h"
 
@@ -160,7 +159,7 @@ static inline void idct_put(MDECContext *a, AVFrame *frame, int mb_x, int mb_y)
     a->idsp.idct_put(dest_y + 8 * linesize,     linesize, block[2]);
     a->idsp.idct_put(dest_y + 8 * linesize + 8, linesize, block[3]);
 
-    if (!(a->avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!(a->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         a->idsp.idct_put(dest_cb, frame->linesize[1], block[4]);
         a->idsp.idct_put(dest_cr, frame->linesize[2], block[5]);
     }
@@ -234,6 +233,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+#if HAVE_THREADS
 static av_cold int decode_init_thread_copy(AVCodecContext *avctx)
 {
     MDECContext * const a = avctx->priv_data;
@@ -242,6 +242,7 @@ static av_cold int decode_init_thread_copy(AVCodecContext *avctx)
 
     return 0;
 }
+#endif
 
 static av_cold int decode_end(AVCodecContext *avctx)
 {
@@ -262,6 +263,6 @@ AVCodec ff_mdec_decoder = {
     .init             = decode_init,
     .close            = decode_end,
     .decode           = decode_frame,
-    .capabilities     = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities     = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy)
 };
diff --git a/libavcodec/me_cmp.c b/libavcodec/me_cmp.c
index d4213d27..dc76b07b 100644
--- a/libavcodec/me_cmp.c
+++ b/libavcodec/me_cmp.c
@@ -991,4 +991,6 @@ av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx)
         ff_me_cmp_init_ppc(c, avctx);
     if (ARCH_X86)
         ff_me_cmp_init_x86(c, avctx);
+    if (ARCH_MIPS)
+        ff_me_cmp_init_mips(c, avctx);
 }
diff --git a/libavcodec/me_cmp.h b/libavcodec/me_cmp.h
index 98ee53ce..a3603ec2 100644
--- a/libavcodec/me_cmp.h
+++ b/libavcodec/me_cmp.h
@@ -87,6 +87,7 @@ void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx);
 void ff_me_cmp_init_arm(MECmpContext *c, AVCodecContext *avctx);
 void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx);
 void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_mips(MECmpContext *c, AVCodecContext *avctx);
 
 void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type);
 
diff --git a/libavcodec/metasound.c b/libavcodec/metasound.c
index 2dab135b..5a7f4c3f 100644
--- a/libavcodec/metasound.c
+++ b/libavcodec/metasound.c
@@ -383,7 +383,7 @@ AVCodec ff_metasound_decoder = {
     .init           = metasound_decode_init,
     .close          = ff_twinvq_decode_close,
     .decode         = ff_twinvq_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/metasound_data.c b/libavcodec/metasound_data.c
index e439b3d8..6d871174 100644
--- a/libavcodec/metasound_data.c
+++ b/libavcodec/metasound_data.c
@@ -14946,9 +14946,10 @@ static const uint16_t bark_tab_s16_128[] = {
     2, 2, 2, 3, 3, 5, 7, 12, 25, 67
 };
 
+/* unused
 static const uint16_t bark_tab_s16_64[] = {
     1, 1, 2, 2, 3, 6, 11, 38
-};
+}; */
 
 static const uint16_t bark_tab_l16s_1024[] = {
       9,   9,   8,   9,  10,   9,  10,  10,
diff --git a/libavcodec/microdvddec.c b/libavcodec/microdvddec.c
index 96034a04..46d6d147 100644
--- a/libavcodec/microdvddec.c
+++ b/libavcodec/microdvddec.c
@@ -164,6 +164,8 @@ static char *microdvd_load_tags(struct microdvd_tag *tags, char *s)
 
         /* Position */
         case 'P':
+            if (!*s)
+                break;
             tag.persistent = MICRODVD_PERSISTENT_ON;
             tag.data1 = (*s++ == '1');
             if (*s != '}')
@@ -368,7 +370,8 @@ static int microdvd_init(AVCodecContext *avctx)
     }
     return ff_ass_subtitle_header(avctx, font_buf.str, font_size, color,
                                   ASS_DEFAULT_BACK_COLOR, bold, italic,
-                                  underline, alignment);
+                                  underline, ASS_DEFAULT_BORDERSTYLE,
+                                  alignment);
 }
 
 AVCodec ff_microdvd_decoder = {
diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c
index 73f2a13d..06fb393b 100644
--- a/libavcodec/mimic.c
+++ b/libavcodec/mimic.c
@@ -48,7 +48,6 @@ typedef struct MimicContext {
     int             prev_index;
 
     ThreadFrame     frames     [16];
-    AVPicture       flipped_ptrs[16];
 
     DECLARE_ALIGNED(16, int16_t, dct_block)[64];
 
@@ -167,6 +166,7 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+#if HAVE_THREADS
 static int mimic_decode_update_thread_context(AVCodecContext *avctx, const AVCodecContext *avctx_from)
 {
     MimicContext *dst = avctx->priv_data, *src = avctx_from->priv_data;
@@ -178,8 +178,6 @@ static int mimic_decode_update_thread_context(AVCodecContext *avctx, const AVCod
     dst->cur_index  = src->next_cur_index;
     dst->prev_index = src->next_prev_index;
 
-    memcpy(dst->flipped_ptrs, src->flipped_ptrs, sizeof(src->flipped_ptrs));
-
     for (i = 0; i < FF_ARRAY_ELEMS(dst->frames); i++) {
         ff_thread_release_buffer(avctx, &dst->frames[i]);
         if (i != src->next_cur_index && src->frames[i].f->data[0]) {
@@ -191,6 +189,7 @@ static int mimic_decode_update_thread_context(AVCodecContext *avctx, const AVCod
 
     return 0;
 }
+#endif
 
 static const int8_t vlcdec_lookup[9][64] = {
     {    0, },
@@ -282,9 +281,9 @@ static int decode(MimicContext *ctx, int quality, int num_coeffs,
         const int is_chroma = !!plane;
         const int qscale    = av_clip(10000 - quality, is_chroma ? 1000 : 2000,
                                       10000) << 2;
-        const int stride    = ctx->flipped_ptrs[ctx->cur_index ].linesize[plane];
-        const uint8_t *src  = ctx->flipped_ptrs[ctx->prev_index].data[plane];
-        uint8_t       *dst  = ctx->flipped_ptrs[ctx->cur_index ].data[plane];
+        const int stride    = ctx->frames[ctx->cur_index ].f->linesize[plane];
+        const uint8_t *src  = ctx->frames[ctx->prev_index].f->data[plane];
+        uint8_t       *dst  = ctx->frames[ctx->cur_index ].f->data[plane];
 
         for (y = 0; y < ctx->num_vblocks[plane]; y++) {
             for (x = 0; x < ctx->num_hblocks[plane]; x++) {
@@ -307,13 +306,13 @@ static int decode(MimicContext *ctx, int quality, int num_coeffs,
                     } else {
                         unsigned int backref = get_bits(&ctx->gb, 4);
                         int index            = (ctx->cur_index + backref) & 15;
-                        uint8_t *p           = ctx->flipped_ptrs[index].data[0];
+                        uint8_t *p           = ctx->frames[index].f->data[0];
 
                         if (index != ctx->cur_index && p) {
                             ff_thread_await_progress(&ctx->frames[index],
                                                      cur_row, 0);
                             p += src -
-                                 ctx->flipped_ptrs[ctx->prev_index].data[plane];
+                                 ctx->frames[ctx->prev_index].f->data[plane];
                             ctx->hdsp.put_pixels_tab[1][0](dst, p, stride, 8);
                         } else {
                             av_log(ctx->avctx, AV_LOG_ERROR,
@@ -340,17 +339,18 @@ static int decode(MimicContext *ctx, int quality, int num_coeffs,
 }
 
 /**
- * Flip the buffer upside-down and put it in the YVU order to match the
+ * Flip the buffer upside-down and put it in the YVU order to revert the
  * way Mimic encodes frames.
  */
-static void prepare_avpic(MimicContext *ctx, AVPicture *dst, AVFrame *src)
+static void flip_swap_frame(AVFrame *f)
 {
     int i;
-    dst->data[0] = src->data[0] + ( ctx->avctx->height       - 1) * src->linesize[0];
-    dst->data[1] = src->data[2] + ((ctx->avctx->height >> 1) - 1) * src->linesize[2];
-    dst->data[2] = src->data[1] + ((ctx->avctx->height >> 1) - 1) * src->linesize[1];
+    uint8_t *data_1 = f->data[1];
+    f->data[0] = f->data[0] + ( f->height       - 1) * f->linesize[0];
+    f->data[1] = f->data[2] + ((f->height >> 1) - 1) * f->linesize[2];
+    f->data[2] = data_1     + ((f->height >> 1) - 1) * f->linesize[1];
     for (i = 0; i < 3; i++)
-        dst->linesize[i] = -src->linesize[i];
+        f->linesize[i] *= -1;
 }
 
 static int mimic_decode_frame(AVCodecContext *avctx, void *data,
@@ -395,7 +395,7 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data,
         avctx->height  = height;
         avctx->pix_fmt = AV_PIX_FMT_YUV420P;
         for (i = 0; i < 3; i++) {
-            ctx->num_vblocks[i] = FF_CEIL_RSHIFT(height,   3 + !!i);
+            ctx->num_vblocks[i] = AV_CEIL_RSHIFT(height,   3 + !!i);
             ctx->num_hblocks[i] =                width >> (3 + !!i);
         }
     } else if (width != ctx->avctx->width || height != ctx->avctx->height) {
@@ -418,9 +418,6 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data,
     ctx->next_prev_index = ctx->cur_index;
     ctx->next_cur_index  = (ctx->cur_index - 1) & 15;
 
-    prepare_avpic(ctx, &ctx->flipped_ptrs[ctx->cur_index],
-                  ctx->frames[ctx->cur_index].f);
-
     ff_thread_finish_setup(avctx);
 
     av_fast_padded_malloc(&ctx->swap_buf, &ctx->swap_buf_size, swap_buf_size);
@@ -435,16 +432,17 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data,
     res = decode(ctx, quality, num_coeffs, !is_pframe);
     ff_thread_report_progress(&ctx->frames[ctx->cur_index], INT_MAX, 0);
     if (res < 0) {
-        if (!(avctx->active_thread_type & FF_THREAD_FRAME)) {
+        if (!(avctx->active_thread_type & FF_THREAD_FRAME))
             ff_thread_release_buffer(avctx, &ctx->frames[ctx->cur_index]);
-            return res;
-        }
+        return res;
     }
 
     if ((res = av_frame_ref(data, ctx->frames[ctx->cur_index].f)) < 0)
         return res;
     *got_frame      = 1;
 
+    flip_swap_frame(data);
+
     ctx->prev_index = ctx->next_prev_index;
     ctx->cur_index  = ctx->next_cur_index;
 
@@ -454,6 +452,7 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data,
     return buf_size;
 }
 
+#if HAVE_THREADS
 static av_cold int mimic_init_thread_copy(AVCodecContext *avctx)
 {
     MimicContext *ctx = avctx->priv_data;
@@ -469,6 +468,7 @@ static av_cold int mimic_init_thread_copy(AVCodecContext *avctx)
 
     return 0;
 }
+#endif
 
 AVCodec ff_mimic_decoder = {
     .name                  = "mimic",
@@ -479,7 +479,7 @@ AVCodec ff_mimic_decoder = {
     .init                  = mimic_decode_init,
     .close                 = mimic_decode_end,
     .decode                = mimic_decode_frame,
-    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .update_thread_context = ONLY_IF_THREADS_ENABLED(mimic_decode_update_thread_context),
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(mimic_init_thread_copy),
 };
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
index 463072a7..f66017ab 100644
--- a/libavcodec/mips/Makefile
+++ b/libavcodec/mips/Makefile
@@ -8,7 +8,7 @@ MIPSFPU-OBJS-$(CONFIG_AMRWB_DECODER)      += mips/acelp_filters_mips.o     \
                                              mips/celp_math_mips.o         \
                                              mips/acelp_vectors_mips.o
 MIPSFPU-OBJS-$(CONFIG_MPEGAUDIODSP)       += mips/mpegaudiodsp_mips_float.o
-MIPSDSPR1-OBJS-$(CONFIG_MPEGAUDIODSP)     += mips/mpegaudiodsp_mips_fixed.o
+MIPSDSP-OBJS-$(CONFIG_MPEGAUDIODSP)       += mips/mpegaudiodsp_mips_fixed.o
 MIPSFPU-OBJS-$(CONFIG_FFT)                += mips/fft_mips.o
 MIPSFPU-OBJS-$(CONFIG_FMTCONVERT)         += mips/fmtconvert_mips.o
 OBJS-$(CONFIG_AC3DSP)                     += mips/ac3dsp_mips.o
@@ -16,17 +16,64 @@ OBJS-$(CONFIG_AAC_DECODER)                += mips/aacdec_mips.o            \
                                              mips/aacsbr_mips.o            \
                                              mips/sbrdsp_mips.o            \
                                              mips/aacpsdsp_mips.o
-MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)      += mips/aaccoder_mips.o
+MIPSDSP-OBJS-$(CONFIG_AAC_ENCODER)        += mips/aaccoder_mips.o
 MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)        += mips/iirfilter_mips.o
-OBJS-$(CONFIG_HEVC_DECODER)               += mips/hevcdsp_init_mips.o
+OBJS-$(CONFIG_HEVC_DECODER)               += mips/hevcdsp_init_mips.o      \
+                                             mips/hevcpred_init_mips.o
+OBJS-$(CONFIG_VP9_DECODER)                += mips/vp9dsp_init_mips.o
+OBJS-$(CONFIG_VP8_DECODER)                += mips/vp8dsp_init_mips.o
 OBJS-$(CONFIG_H264DSP)                    += mips/h264dsp_init_mips.o
+OBJS-$(CONFIG_H264QPEL)                   += mips/h264qpel_init_mips.o
 OBJS-$(CONFIG_H264CHROMA)                 += mips/h264chroma_init_mips.o
+OBJS-$(CONFIG_H264PRED)                   += mips/h264pred_init_mips.o
+OBJS-$(CONFIG_H263DSP)                    += mips/h263dsp_init_mips.o
+OBJS-$(CONFIG_QPELDSP)                    += mips/qpeldsp_init_mips.o
+OBJS-$(CONFIG_HPELDSP)                    += mips/hpeldsp_init_mips.o
+OBJS-$(CONFIG_BLOCKDSP)                   += mips/blockdsp_init_mips.o
+OBJS-$(CONFIG_PIXBLOCKDSP)                += mips/pixblockdsp_init_mips.o
+OBJS-$(CONFIG_IDCTDSP)                    += mips/idctdsp_init_mips.o
+OBJS-$(CONFIG_MPEGVIDEO)                  += mips/mpegvideo_init_mips.o
+OBJS-$(CONFIG_MPEGVIDEOENC)               += mips/mpegvideoencdsp_init_mips.o
+OBJS-$(CONFIG_ME_CMP)                     += mips/me_cmp_init_mips.o
+OBJS-$(CONFIG_MPEG4_DECODER)              += mips/xvididct_init_mips.o
 MSA-OBJS-$(CONFIG_HEVC_DECODER)           += mips/hevcdsp_msa.o            \
                                              mips/hevc_mc_uni_msa.o        \
                                              mips/hevc_mc_uniw_msa.o       \
                                              mips/hevc_mc_bi_msa.o         \
                                              mips/hevc_mc_biw_msa.o        \
-                                             mips/hevc_idct_msa.o
-MSA-OBJS-$(CONFIG_H264DSP)                += mips/h264dsp_msa.o
-LOONGSON3-OBJS-$(CONFIG_H264DSP)          += mips/h264dsp_mmi.o
-LOONGSON3-OBJS-$(CONFIG_H264CHROMA)       += mips/h264chroma_mmi.o
+                                             mips/hevc_idct_msa.o          \
+                                             mips/hevc_lpf_sao_msa.o       \
+                                             mips/hevcpred_msa.o
+MSA-OBJS-$(CONFIG_VP9_DECODER)            += mips/vp9_mc_msa.o             \
+                                             mips/vp9_lpf_msa.o            \
+                                             mips/vp9_idct_msa.o           \
+                                             mips/vp9_intra_msa.o
+MSA-OBJS-$(CONFIG_VP8_DECODER)            += mips/vp8_mc_msa.o             \
+                                             mips/vp8_idct_msa.o           \
+                                             mips/vp8_lpf_msa.o
+MSA-OBJS-$(CONFIG_H264DSP)                += mips/h264dsp_msa.o            \
+                                             mips/h264idct_msa.o
+MSA-OBJS-$(CONFIG_H264QPEL)               += mips/h264qpel_msa.o
+MSA-OBJS-$(CONFIG_H264CHROMA)             += mips/h264chroma_msa.o
+MSA-OBJS-$(CONFIG_H264PRED)               += mips/h264pred_msa.o
+MSA-OBJS-$(CONFIG_H263DSP)                += mips/h263dsp_msa.o
+MSA-OBJS-$(CONFIG_QPELDSP)                += mips/qpeldsp_msa.o
+MSA-OBJS-$(CONFIG_HPELDSP)                += mips/hpeldsp_msa.o
+MSA-OBJS-$(CONFIG_BLOCKDSP)               += mips/blockdsp_msa.o
+MSA-OBJS-$(CONFIG_PIXBLOCKDSP)            += mips/pixblockdsp_msa.o
+MSA-OBJS-$(CONFIG_IDCTDSP)                += mips/idctdsp_msa.o           \
+                                             mips/simple_idct_msa.o
+MSA-OBJS-$(CONFIG_MPEGVIDEO)              += mips/mpegvideo_msa.o
+MSA-OBJS-$(CONFIG_MPEGVIDEOENC)           += mips/mpegvideoencdsp_msa.o
+MSA-OBJS-$(CONFIG_ME_CMP)                 += mips/me_cmp_msa.o
+MMI-OBJS                                  += mips/constants.o
+MMI-OBJS-$(CONFIG_H264DSP)                += mips/h264dsp_mmi.o
+MMI-OBJS-$(CONFIG_H264CHROMA)             += mips/h264chroma_mmi.o
+MMI-OBJS-$(CONFIG_H264PRED)               += mips/h264pred_mmi.o
+MMI-OBJS-$(CONFIG_MPEGVIDEO)              += mips/mpegvideo_mmi.o
+MMI-OBJS-$(CONFIG_IDCTDSP)                += mips/idctdsp_mmi.o           \
+                                             mips/simple_idct_mmi.o
+MMI-OBJS-$(CONFIG_MPEG4_DECODER)          += mips/xvid_idct_mmi.o
+MMI-OBJS-$(CONFIG_BLOCKDSP)               += mips/blockdsp_mmi.o
+MMI-OBJS-$(CONFIG_PIXBLOCKDSP)            += mips/pixblockdsp_mmi.o
+MMI-OBJS-$(CONFIG_H264QPEL)               += mips/h264qpel_mmi.o
diff --git a/libavcodec/mips/aaccoder_mips.c b/libavcodec/mips/aaccoder_mips.c
index ea0bf315..d690c8c2 100644
--- a/libavcodec/mips/aaccoder_mips.c
+++ b/libavcodec/mips/aaccoder_mips.c
@@ -62,29 +62,17 @@
 #include "libavcodec/aac.h"
 #include "libavcodec/aacenc.h"
 #include "libavcodec/aactab.h"
+#include "libavcodec/aacenctab.h"
+#include "libavcodec/aacenc_utils.h"
 
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 typedef struct BandCodingPath {
     int prev_idx;
     float cost;
     int run;
 } BandCodingPath;
 
-static const uint8_t run_value_bits_long[64] = {
-     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
-};
-
-static const uint8_t run_value_bits_short[16] = {
-    3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
-};
-
-static const uint8_t * const run_value_bits[2] = {
-    run_value_bits_long, run_value_bits_short
-};
-
 static const uint8_t uquad_sign_bits[81] = {
     0, 1, 1, 1, 2, 2, 1, 2, 2,
     1, 2, 2, 2, 3, 3, 2, 3, 3,
@@ -144,77 +132,24 @@ static const uint8_t esc_sign_bits[289] = {
     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 };
 
-static void abs_pow34_v(float *out, const float *in, const int size) {
-#ifndef USE_REALLY_FULL_SEARCH
-    int i;
-    float a, b, c, d;
-    float ax, bx, cx, dx;
-
-    for (i = 0; i < size; i += 4) {
-        a = fabsf(in[i  ]);
-        b = fabsf(in[i+1]);
-        c = fabsf(in[i+2]);
-        d = fabsf(in[i+3]);
-
-        ax = sqrtf(a);
-        bx = sqrtf(b);
-        cx = sqrtf(c);
-        dx = sqrtf(d);
-
-        a = a * ax;
-        b = b * bx;
-        c = c * cx;
-        d = d * dx;
-
-        out[i  ] = sqrtf(a);
-        out[i+1] = sqrtf(b);
-        out[i+2] = sqrtf(c);
-        out[i+3] = sqrtf(d);
-    }
-#endif /* USE_REALLY_FULL_SEARCH */
-}
-
-static float find_max_val(int group_len, int swb_size, const float *scaled) {
-    float maxval = 0.0f;
-    int w2, i;
-    for (w2 = 0; w2 < group_len; w2++) {
-        for (i = 0; i < swb_size; i++) {
-            maxval = FFMAX(maxval, scaled[w2*128+i]);
-        }
-    }
-    return maxval;
-}
-
-static int find_min_book(float maxval, int sf) {
-    float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
-    float Q34 = sqrtf(Q * sqrtf(Q));
-    int qmaxval, cb;
-    qmaxval = maxval * Q34 + 0.4054f;
-    if      (qmaxval ==  0) cb = 0;
-    else if (qmaxval ==  1) cb = 1;
-    else if (qmaxval ==  2) cb = 3;
-    else if (qmaxval <=  4) cb = 5;
-    else if (qmaxval <=  7) cb = 7;
-    else if (qmaxval <= 12) cb = 9;
-    else                    cb = 11;
-    return cb;
-}
-
 /**
  * Functions developed from template function and optimized for quantizing and encoding band
  */
 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
-                                                     PutBitContext *pb, const float *in,
+                                                     PutBitContext *pb, const float *in, float *out,
                                                      const float *scaled, int size, int scale_idx,
                                                      int cb, const float lambda, const float uplim,
-                                                     int *bits)
+                                                     int *bits, float *energy, const float ROUNDING)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     int i;
     int qc1, qc2, qc3, qc4;
+    float qenergy = 0.0f;
 
     uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];
     uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
+    float    *p_vec   = (float    *)ff_aac_codebook_vectors[cb-1];
 
     abs_pow34_v(s->scoefs, in, size);
     scaled = s->scoefs;
@@ -222,11 +157,12 @@ static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
         int curidx;
         int *in_int = (int *)&in[i];
         int t0, t1, t2, t3, t4, t5, t6, t7;
+        const float *vec;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                      \n\t"
@@ -273,21 +209,43 @@ static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
         curidx += 40;
 
         put_bits(pb, p_bits[curidx], p_codes[curidx]);
+
+        if (out || energy) {
+            float e1,e2,e3,e4;
+            vec = &p_vec[curidx*4];
+            e1 = vec[0] * IQ;
+            e2 = vec[1] * IQ;
+            e3 = vec[2] * IQ;
+            e4 = vec[3] * IQ;
+            if (out) {
+                out[i+0] = e1;
+                out[i+1] = e2;
+                out[i+2] = e3;
+                out[i+3] = e4;
+            }
+            if (energy)
+                qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
+        }
     }
+    if (energy)
+        *energy = qenergy;
 }
 
 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
-                                                     PutBitContext *pb, const float *in,
+                                                     PutBitContext *pb, const float *in, float *out,
                                                      const float *scaled, int size, int scale_idx,
                                                      int cb, const float lambda, const float uplim,
-                                                     int *bits)
+                                                     int *bits, float *energy, const float ROUNDING)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     int i;
     int qc1, qc2, qc3, qc4;
+    float qenergy = 0.0f;
 
     uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];
     uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
+    float    *p_vec   = (float    *)ff_aac_codebook_vectors[cb-1];
 
     abs_pow34_v(s->scoefs, in, size);
     scaled = s->scoefs;
@@ -297,11 +255,12 @@ static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
         uint8_t v_bits;
         unsigned int v_codes;
         int t0, t1, t2, t3, t4;
+        const float *vec;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                              \n\t"
@@ -365,21 +324,43 @@ static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
         v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
         v_bits  = p_bits[curidx] + count;
         put_bits(pb, v_bits, v_codes);
+
+        if (out || energy) {
+            float e1,e2,e3,e4;
+            vec = &p_vec[curidx*4];
+            e1 = copysignf(vec[0] * IQ, in[i+0]);
+            e2 = copysignf(vec[1] * IQ, in[i+1]);
+            e3 = copysignf(vec[2] * IQ, in[i+2]);
+            e4 = copysignf(vec[3] * IQ, in[i+3]);
+            if (out) {
+                out[i+0] = e1;
+                out[i+1] = e2;
+                out[i+2] = e3;
+                out[i+3] = e4;
+            }
+            if (energy)
+                qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
+        }
     }
+    if (energy)
+        *energy = qenergy;
 }
 
 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
-                                                     PutBitContext *pb, const float *in,
+                                                     PutBitContext *pb, const float *in, float *out,
                                                      const float *scaled, int size, int scale_idx,
                                                      int cb, const float lambda, const float uplim,
-                                                     int *bits)
+                                                     int *bits, float *energy, const float ROUNDING)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     int i;
     int qc1, qc2, qc3, qc4;
+    float qenergy = 0.0f;
 
     uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];
     uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
+    float    *p_vec   = (float    *)ff_aac_codebook_vectors[cb-1];
 
     abs_pow34_v(s->scoefs, in, size);
     scaled = s->scoefs;
@@ -389,11 +370,12 @@ static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
         uint8_t v_bits;
         unsigned int v_codes;
         int t0, t1, t2, t3, t4, t5, t6, t7;
+        const float *vec1, *vec2;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                      \n\t"
@@ -444,35 +426,59 @@ static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
         v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
         v_bits  = p_bits[curidx] + p_bits[curidx2];
         put_bits(pb, v_bits, v_codes);
+
+        if (out || energy) {
+            float e1,e2,e3,e4;
+            vec1 = &p_vec[curidx*2 ];
+            vec2 = &p_vec[curidx2*2];
+            e1 = vec1[0] * IQ;
+            e2 = vec1[1] * IQ;
+            e3 = vec2[0] * IQ;
+            e4 = vec2[1] * IQ;
+            if (out) {
+                out[i+0] = e1;
+                out[i+1] = e2;
+                out[i+2] = e3;
+                out[i+3] = e4;
+            }
+            if (energy)
+                qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
+        }
     }
+    if (energy)
+        *energy = qenergy;
 }
 
 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
-                                                      PutBitContext *pb, const float *in,
+                                                      PutBitContext *pb, const float *in, float *out,
                                                       const float *scaled, int size, int scale_idx,
                                                       int cb, const float lambda, const float uplim,
-                                                      int *bits)
+                                                      int *bits, float *energy, const float ROUNDING)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     int i;
     int qc1, qc2, qc3, qc4;
+    float qenergy = 0.0f;
 
     uint8_t  *p_bits  = (uint8_t*) ff_aac_spectral_bits[cb-1];
     uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
+    float    *p_vec   = (float    *)ff_aac_codebook_vectors[cb-1];
 
     abs_pow34_v(s->scoefs, in, size);
     scaled = s->scoefs;
     for (i = 0; i < size; i += 4) {
-        int curidx, sign1, count1, sign2, count2;
+        int curidx1, curidx2, sign1, count1, sign2, count2;
         int *in_int = (int *)&in[i];
         uint8_t v_bits;
         unsigned int v_codes;
         int t0, t1, t2, t3, t4;
+        const float *vec1, *vec2;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                              \n\t"
@@ -525,48 +531,72 @@ static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
               "memory"
         );
 
-        curidx  = 8 * qc1;
-        curidx += qc2;
+        curidx1  = 8 * qc1;
+        curidx1 += qc2;
 
-        v_codes = (p_codes[curidx] << count1) | sign1;
-        v_bits  = p_bits[curidx] + count1;
+        v_codes = (p_codes[curidx1] << count1) | sign1;
+        v_bits  = p_bits[curidx1] + count1;
         put_bits(pb, v_bits, v_codes);
 
-        curidx  = 8 * qc3;
-        curidx += qc4;
+        curidx2  = 8 * qc3;
+        curidx2 += qc4;
 
-        v_codes = (p_codes[curidx] << count2) | sign2;
-        v_bits  = p_bits[curidx] + count2;
+        v_codes = (p_codes[curidx2] << count2) | sign2;
+        v_bits  = p_bits[curidx2] + count2;
         put_bits(pb, v_bits, v_codes);
+
+        if (out || energy) {
+            float e1,e2,e3,e4;
+            vec1 = &p_vec[curidx1*2];
+            vec2 = &p_vec[curidx2*2];
+            e1 = copysignf(vec1[0] * IQ, in[i+0]);
+            e2 = copysignf(vec1[1] * IQ, in[i+1]);
+            e3 = copysignf(vec2[0] * IQ, in[i+2]);
+            e4 = copysignf(vec2[1] * IQ, in[i+3]);
+            if (out) {
+                out[i+0] = e1;
+                out[i+1] = e2;
+                out[i+2] = e3;
+                out[i+3] = e4;
+            }
+            if (energy)
+                qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
+        }
     }
+    if (energy)
+        *energy = qenergy;
 }
 
 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
-                                                       PutBitContext *pb, const float *in,
+                                                       PutBitContext *pb, const float *in, float *out,
                                                        const float *scaled, int size, int scale_idx,
                                                        int cb, const float lambda, const float uplim,
-                                                       int *bits)
+                                                       int *bits, float *energy, const float ROUNDING)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     int i;
     int qc1, qc2, qc3, qc4;
+    float qenergy = 0.0f;
 
     uint8_t  *p_bits  = (uint8_t*) ff_aac_spectral_bits[cb-1];
     uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
+    float    *p_vec   = (float   *)ff_aac_codebook_vectors[cb-1];
 
     abs_pow34_v(s->scoefs, in, size);
     scaled = s->scoefs;
     for (i = 0; i < size; i += 4) {
-        int curidx, sign1, count1, sign2, count2;
+        int curidx1, curidx2, sign1, count1, sign2, count2;
         int *in_int = (int *)&in[i];
         uint8_t v_bits;
         unsigned int v_codes;
         int t0, t1, t2, t3, t4;
+        const float *vec1, *vec2;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                              \n\t"
@@ -618,31 +648,53 @@ static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
             : "memory"
         );
 
-        curidx  = 13 * qc1;
-        curidx += qc2;
+        curidx1  = 13 * qc1;
+        curidx1 += qc2;
 
-        v_codes = (p_codes[curidx] << count1) | sign1;
-        v_bits  = p_bits[curidx] + count1;
+        v_codes = (p_codes[curidx1] << count1) | sign1;
+        v_bits  = p_bits[curidx1] + count1;
         put_bits(pb, v_bits, v_codes);
 
-        curidx  = 13 * qc3;
-        curidx += qc4;
+        curidx2  = 13 * qc3;
+        curidx2 += qc4;
 
-        v_codes = (p_codes[curidx] << count2) | sign2;
-        v_bits  = p_bits[curidx] + count2;
+        v_codes = (p_codes[curidx2] << count2) | sign2;
+        v_bits  = p_bits[curidx2] + count2;
         put_bits(pb, v_bits, v_codes);
+
+        if (out || energy) {
+            float e1,e2,e3,e4;
+            vec1 = &p_vec[curidx1*2];
+            vec2 = &p_vec[curidx2*2];
+            e1 = copysignf(vec1[0] * IQ, in[i+0]);
+            e2 = copysignf(vec1[1] * IQ, in[i+1]);
+            e3 = copysignf(vec2[0] * IQ, in[i+2]);
+            e4 = copysignf(vec2[1] * IQ, in[i+3]);
+            if (out) {
+                out[i+0] = e1;
+                out[i+1] = e2;
+                out[i+2] = e3;
+                out[i+3] = e4;
+            }
+            if (energy)
+                qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
+        }
     }
+    if (energy)
+        *energy = qenergy;
 }
 
 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
-                                                   PutBitContext *pb, const float *in,
+                                                   PutBitContext *pb, const float *in, float *out,
                                                    const float *scaled, int size, int scale_idx,
                                                    int cb, const float lambda, const float uplim,
-                                                   int *bits)
+                                                   int *bits, float *energy, const float ROUNDING)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     int i;
     int qc1, qc2, qc3, qc4;
+    float qenergy = 0.0f;
 
     uint8_t  *p_bits    = (uint8_t* )ff_aac_spectral_bits[cb-1];
     uint16_t *p_codes   = (uint16_t*)ff_aac_spectral_codes[cb-1];
@@ -658,11 +710,12 @@ static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
             uint8_t v_bits;
             unsigned int v_codes;
             int t0, t1, t2, t3, t4;
+            const float *vec1, *vec2;
 
-            qc1 = scaled[i  ] * Q34 + 0.4054f;
-            qc2 = scaled[i+1] * Q34 + 0.4054f;
-            qc3 = scaled[i+2] * Q34 + 0.4054f;
-            qc4 = scaled[i+3] * Q34 + 0.4054f;
+            qc1 = scaled[i  ] * Q34 + ROUNDING;
+            qc2 = scaled[i+1] * Q34 + ROUNDING;
+            qc3 = scaled[i+2] * Q34 + ROUNDING;
+            qc4 = scaled[i+3] * Q34 + ROUNDING;
 
             __asm__ volatile (
                 ".set push                                  \n\t"
@@ -726,6 +779,24 @@ static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
             v_codes = (p_codes[curidx2] << count2) | sign2;
             v_bits  = p_bits[curidx2] + count2;
             put_bits(pb, v_bits, v_codes);
+
+            if (out || energy) {
+                float e1,e2,e3,e4;
+                vec1 = &p_vectors[curidx*2 ];
+                vec2 = &p_vectors[curidx2*2];
+                e1 = copysignf(vec1[0] * IQ, in[i+0]);
+                e2 = copysignf(vec1[1] * IQ, in[i+1]);
+                e3 = copysignf(vec2[0] * IQ, in[i+2]);
+                e4 = copysignf(vec2[1] * IQ, in[i+3]);
+                if (out) {
+                    out[i+0] = e1;
+                    out[i+1] = e2;
+                    out[i+2] = e3;
+                    out[i+3] = e4;
+                }
+                if (energy)
+                    qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
+            }
         }
     } else {
         for (i = 0; i < size; i += 4) {
@@ -736,10 +807,10 @@ static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
             int c1, c2, c3, c4;
             int t0, t1, t2, t3, t4;
 
-            qc1 = scaled[i  ] * Q34 + 0.4054f;
-            qc2 = scaled[i+1] * Q34 + 0.4054f;
-            qc3 = scaled[i+2] * Q34 + 0.4054f;
-            qc4 = scaled[i+3] * Q34 + 0.4054f;
+            qc1 = scaled[i  ] * Q34 + ROUNDING;
+            qc2 = scaled[i+1] * Q34 + ROUNDING;
+            qc3 = scaled[i+2] * Q34 + ROUNDING;
+            qc4 = scaled[i+3] * Q34 + ROUNDING;
 
             __asm__ volatile (
                 ".set push                                  \n\t"
@@ -836,16 +907,62 @@ static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
                 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
                 put_bits(pb, len * 2 - 3, v_codes);
             }
+
+            if (out || energy) {
+                float e1, e2, e3, e4;
+                e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
+                e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
+                e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
+                e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
+                if (out) {
+                    out[i+0] = e1;
+                    out[i+1] = e2;
+                    out[i+2] = e3;
+                    out[i+3] = e4;
+                }
+                if (energy)
+                    qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
+            }
+        }
+    }
+    if (energy)
+        *energy = qenergy;
+}
+
+static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
+                                                         PutBitContext *pb, const float *in, float *out,
+                                                         const float *scaled, int size, int scale_idx,
+                                                         int cb, const float lambda, const float uplim,
+                                                         int *bits, float *energy, const float ROUNDING) {
+    av_assert0(0);
+}
+
+static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
+                                                         PutBitContext *pb, const float *in, float *out,
+                                                         const float *scaled, int size, int scale_idx,
+                                                         int cb, const float lambda, const float uplim,
+                                                         int *bits, float *energy, const float ROUNDING) {
+    int i;
+    if (bits)
+        *bits = 0;
+    if (out) {
+        for (i = 0; i < size; i += 4) {
+           out[i  ] = 0.0f;
+           out[i+1] = 0.0f;
+           out[i+2] = 0.0f;
+           out[i+3] = 0.0f;
         }
     }
+    if (energy)
+        *energy = 0.0f;
 }
 
 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
-                                                         PutBitContext *pb, const float *in,
+                                                         PutBitContext *pb, const float *in, float *out,
                                                          const float *scaled, int size, int scale_idx,
                                                          int cb, const float lambda, const float uplim,
-                                                         int *bits) = {
-    NULL,
+                                                         int *bits, float *energy, const float ROUNDING) = {
+    quantize_and_encode_band_cost_ZERO_mips,
     quantize_and_encode_band_cost_SQUAD_mips,
     quantize_and_encode_band_cost_SQUAD_mips,
     quantize_and_encode_band_cost_UQUAD_mips,
@@ -857,21 +974,25 @@ static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s
     quantize_and_encode_band_cost_UPAIR12_mips,
     quantize_and_encode_band_cost_UPAIR12_mips,
     quantize_and_encode_band_cost_ESC_mips,
+    quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
+    quantize_and_encode_band_cost_ZERO_mips,
+    quantize_and_encode_band_cost_ZERO_mips,
+    quantize_and_encode_band_cost_ZERO_mips,
 };
 
-#define quantize_and_encode_band_cost(                                  \
-                                s, pb, in, scaled, size, scale_idx, cb, \
-                                lambda, uplim, bits)                    \
-    quantize_and_encode_band_cost_arr[cb](                              \
-                                s, pb, in, scaled, size, scale_idx, cb, \
-                                lambda, uplim, bits)
+#define quantize_and_encode_band_cost(                                       \
+                                s, pb, in, out, scaled, size, scale_idx, cb, \
+                                lambda, uplim, bits, energy, ROUNDING)       \
+    quantize_and_encode_band_cost_arr[cb](                                   \
+                                s, pb, in, out, scaled, size, scale_idx, cb, \
+                                lambda, uplim, bits, energy, ROUNDING)
 
 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
-                                          const float *in, int size, int scale_idx,
-                                          int cb, const float lambda)
+                                          const float *in, float *out, int size, int scale_idx,
+                                          int cb, const float lambda, int rtz)
 {
-    quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
-                                  INFINITY, NULL);
+    quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
+                                  INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
 }
 
 /**
@@ -886,6 +1007,16 @@ static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
     return 0;
 }
 
+static float get_band_numbits_NONE_mips(struct AACEncContext *s,
+                                        PutBitContext *pb, const float *in,
+                                        const float *scaled, int size, int scale_idx,
+                                        int cb, const float lambda, const float uplim,
+                                        int *bits)
+{
+    av_assert0(0);
+    return 0;
+}
+
 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
                                          PutBitContext *pb, const float *in,
                                          const float *scaled, int size, int scale_idx,
@@ -904,10 +1035,10 @@ static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
         int *in_int = (int *)&in[i];
         int t0, t1, t2, t3, t4, t5, t6, t7;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                      \n\t"
@@ -975,10 +1106,10 @@ static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
         int curidx;
         int t0, t1, t2, t3, t4;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                      \n\t"
@@ -1034,10 +1165,10 @@ static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
         int *in_int = (int *)&in[i];
         int t0, t1, t2, t3, t4, t5, t6, t7;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                      \n\t"
@@ -1107,10 +1238,10 @@ static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
         int curidx, curidx2;
         int t0, t1, t2, t3, t4;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                      \n\t"
@@ -1165,10 +1296,10 @@ static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
         int curidx, curidx2;
         int t0, t1, t2, t3, t4;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                      \n\t"
@@ -1225,10 +1356,10 @@ static float get_band_numbits_ESC_mips(struct AACEncContext *s,
         int c1, c2, c3, c4;
         int t4, t5;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                                  \n\t"
@@ -1325,6 +1456,10 @@ static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
     get_band_numbits_UPAIR12_mips,
     get_band_numbits_UPAIR12_mips,
     get_band_numbits_ESC_mips,
+    get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
+    get_band_numbits_ZERO_mips,
+    get_band_numbits_ZERO_mips,
+    get_band_numbits_ZERO_mips,
 };
 
 #define get_band_numbits(                                  \
@@ -1337,7 +1472,7 @@ static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
                                      const float *scaled, int size, int scale_idx,
                                      int cb, const float lambda, const float uplim,
-                                     int *bits)
+                                     int *bits, float *energy, int rtz)
 {
     return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
 }
@@ -1350,7 +1485,7 @@ static float get_band_cost_ZERO_mips(struct AACEncContext *s,
                                      PutBitContext *pb, const float *in,
                                      const float *scaled, int size, int scale_idx,
                                      int cb, const float lambda, const float uplim,
-                                     int *bits)
+                                     int *bits, float *energy)
 {
     int i;
     float cost = 0;
@@ -1363,19 +1498,32 @@ static float get_band_cost_ZERO_mips(struct AACEncContext *s,
     }
     if (bits)
         *bits = 0;
+    if (energy)
+        *energy = 0.0f;
     return cost * lambda;
 }
 
+static float get_band_cost_NONE_mips(struct AACEncContext *s,
+                                     PutBitContext *pb, const float *in,
+                                     const float *scaled, int size, int scale_idx,
+                                     int cb, const float lambda, const float uplim,
+                                     int *bits, float *energy)
+{
+    av_assert0(0);
+    return 0;
+}
+
 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
                                       PutBitContext *pb, const float *in,
                                       const float *scaled, int size, int scale_idx,
                                       int cb, const float lambda, const float uplim,
-                                      int *bits)
+                                      int *bits, float *energy)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     int i;
     float cost = 0;
+    float qenergy = 0.0f;
     int qc1, qc2, qc3, qc4;
     int curbits = 0;
 
@@ -1390,10 +1538,10 @@ static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
         float di0, di1, di2, di3;
         int t0, t1, t2, t3, t4, t5, t6, t7;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                                  \n\t"
@@ -1442,6 +1590,9 @@ static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
         curbits += p_bits[curidx];
         vec     = &p_codes[curidx*4];
 
+        qenergy += vec[0]*vec[0] + vec[1]*vec[1]
+                +  vec[2]*vec[2] + vec[3]*vec[3];
+
         __asm__ volatile (
             ".set push                                  \n\t"
             ".set noreorder                             \n\t"
@@ -1476,6 +1627,8 @@ static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
 
     if (bits)
         *bits = curbits;
+    if (energy)
+        *energy = qenergy * (IQ*IQ);
     return cost * lambda + curbits;
 }
 
@@ -1483,12 +1636,13 @@ static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
                                       PutBitContext *pb, const float *in,
                                       const float *scaled, int size, int scale_idx,
                                       int cb, const float lambda, const float uplim,
-                                      int *bits)
+                                      int *bits, float *energy)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     int i;
     float cost = 0;
+    float qenergy = 0.0f;
     int curbits = 0;
     int qc1, qc2, qc3, qc4;
 
@@ -1502,10 +1656,10 @@ static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
         float di0, di1, di2, di3;
         int t0, t1, t2, t3, t4;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                                  \n\t"
@@ -1541,6 +1695,9 @@ static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
         curbits += uquad_sign_bits[curidx];
         vec     = &p_codes[curidx*4];
 
+        qenergy += vec[0]*vec[0] + vec[1]*vec[1]
+                +  vec[2]*vec[2] + vec[3]*vec[3];
+
         __asm__ volatile (
             ".set push                                  \n\t"
             ".set noreorder                             \n\t"
@@ -1578,6 +1735,8 @@ static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
 
     if (bits)
         *bits = curbits;
+    if (energy)
+        *energy = qenergy * (IQ*IQ);
     return cost * lambda + curbits;
 }
 
@@ -1585,12 +1744,13 @@ static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
                                       PutBitContext *pb, const float *in,
                                       const float *scaled, int size, int scale_idx,
                                       int cb, const float lambda, const float uplim,
-                                      int *bits)
+                                      int *bits, float *energy)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     int i;
     float cost = 0;
+    float qenergy = 0.0f;
     int qc1, qc2, qc3, qc4;
     int curbits = 0;
 
@@ -1605,10 +1765,10 @@ static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
         float di0, di1, di2, di3;
         int t0, t1, t2, t3, t4, t5, t6, t7;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                                  \n\t"
@@ -1662,6 +1822,9 @@ static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
         vec     = &p_codes[curidx*2];
         vec2    = &p_codes[curidx2*2];
 
+        qenergy += vec[0]*vec[0] + vec[1]*vec[1]
+                +  vec2[0]*vec2[0] + vec2[1]*vec2[1];
+
         __asm__ volatile (
             ".set push                                  \n\t"
             ".set noreorder                             \n\t"
@@ -1696,6 +1859,8 @@ static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
 
     if (bits)
         *bits = curbits;
+    if (energy)
+        *energy = qenergy * (IQ*IQ);
     return cost * lambda + curbits;
 }
 
@@ -1703,12 +1868,13 @@ static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
                                        PutBitContext *pb, const float *in,
                                        const float *scaled, int size, int scale_idx,
                                        int cb, const float lambda, const float uplim,
-                                       int *bits)
+                                       int *bits, float *energy)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     int i;
     float cost = 0;
+    float qenergy = 0.0f;
     int qc1, qc2, qc3, qc4;
     int curbits = 0;
 
@@ -1723,10 +1889,10 @@ static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
         float di0, di1, di2, di3;
         int t0, t1, t2, t3, t4;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                                          \n\t"
@@ -1792,6 +1958,9 @@ static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
         curbits += upair7_sign_bits[curidx2];
         vec2    = &p_codes[curidx2*2];
 
+        qenergy += vec[0]*vec[0] + vec[1]*vec[1]
+                +  vec2[0]*vec2[0] + vec2[1]*vec2[1];
+
         __asm__ volatile (
             ".set push                                          \n\t"
             ".set noreorder                                     \n\t"
@@ -1829,6 +1998,8 @@ static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
 
     if (bits)
         *bits = curbits;
+    if (energy)
+        *energy = qenergy * (IQ*IQ);
     return cost * lambda + curbits;
 }
 
@@ -1836,12 +2007,13 @@ static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
                                         PutBitContext *pb, const float *in,
                                         const float *scaled, int size, int scale_idx,
                                         int cb, const float lambda, const float uplim,
-                                        int *bits)
+                                        int *bits, float *energy)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     int i;
     float cost = 0;
+    float qenergy = 0.0f;
     int qc1, qc2, qc3, qc4;
     int curbits = 0;
 
@@ -1857,10 +2029,10 @@ static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
         float di0, di1, di2, di3;
         int t0, t1, t2, t3, t4;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                                          \n\t"
@@ -1925,6 +2097,9 @@ static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
         vec     = &p_codes[curidx*2];
         vec2    = &p_codes[curidx2*2];
 
+        qenergy += vec[0]*vec[0] + vec[1]*vec[1]
+                +  vec2[0]*vec2[0] + vec2[1]*vec2[1];
+
         __asm__ volatile (
             ".set push                                          \n\t"
             ".set noreorder                                     \n\t"
@@ -1962,6 +2137,8 @@ static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
 
     if (bits)
         *bits = curbits;
+    if (energy)
+        *energy = qenergy * (IQ*IQ);
     return cost * lambda + curbits;
 }
 
@@ -1969,13 +2146,14 @@ static float get_band_cost_ESC_mips(struct AACEncContext *s,
                                     PutBitContext *pb, const float *in,
                                     const float *scaled, int size, int scale_idx,
                                     int cb, const float lambda, const float uplim,
-                                    int *bits)
+                                    int *bits, float *energy)
 {
     const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
     const float CLIPPED_ESCAPE = 165140.0f * IQ;
     int i;
     float cost = 0;
+    float qenergy = 0.0f;
     int qc1, qc2, qc3, qc4;
     int curbits = 0;
 
@@ -1985,16 +2163,16 @@ static float get_band_cost_ESC_mips(struct AACEncContext *s,
     for (i = 0; i < size; i += 4) {
         const float *vec, *vec2;
         int curidx, curidx2;
-        float t1, t2, t3, t4;
+        float t1, t2, t3, t4, V;
         float di1, di2, di3, di4;
         int cond0, cond1, cond2, cond3;
         int c1, c2, c3, c4;
         int t6, t7;
 
-        qc1 = scaled[i  ] * Q34 + 0.4054f;
-        qc2 = scaled[i+1] * Q34 + 0.4054f;
-        qc3 = scaled[i+2] * Q34 + 0.4054f;
-        qc4 = scaled[i+3] * Q34 + 0.4054f;
+        qc1 = scaled[i  ] * Q34 + ROUND_STANDARD;
+        qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
+        qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
+        qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
 
         __asm__ volatile (
             ".set push                                  \n\t"
@@ -2057,38 +2235,54 @@ static float get_band_cost_ESC_mips(struct AACEncContext *s,
         if (cond0) {
             if (t1 >= CLIPPED_ESCAPE) {
                 di1 = t1 - CLIPPED_ESCAPE;
+                qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
             } else {
-                di1 = t1 - c1 * cbrtf(c1) * IQ;
+                di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
+                qenergy += V*V;
             }
-        } else
-            di1 = t1 - vec[0] * IQ;
+        } else {
+            di1 = t1 - (V = vec[0] * IQ);
+            qenergy += V*V;
+        }
 
         if (cond1) {
             if (t2 >= CLIPPED_ESCAPE) {
                 di2 = t2 - CLIPPED_ESCAPE;
+                qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
             } else {
-                di2 = t2 - c2 * cbrtf(c2) * IQ;
+                di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
+                qenergy += V*V;
             }
-        } else
-            di2 = t2 - vec[1] * IQ;
+        } else {
+            di2 = t2 - (V = vec[1] * IQ);
+            qenergy += V*V;
+        }
 
         if (cond2) {
             if (t3 >= CLIPPED_ESCAPE) {
                 di3 = t3 - CLIPPED_ESCAPE;
+                qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
             } else {
-                di3 = t3 - c3 * cbrtf(c3) * IQ;
+                di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
+                qenergy += V*V;
             }
-        } else
-            di3 = t3 - vec2[0] * IQ;
+        } else {
+            di3 = t3 - (V = vec2[0] * IQ);
+            qenergy += V*V;
+        }
 
         if (cond3) {
             if (t4 >= CLIPPED_ESCAPE) {
                 di4 = t4 - CLIPPED_ESCAPE;
+                qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
             } else {
-                di4 = t4 - c4 * cbrtf(c4) * IQ;
+                di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
+                qenergy += V*V;
             }
-        } else
-            di4 = t4 - vec2[1]*IQ;
+        } else {
+            di4 = t4 - (V = vec2[1]*IQ);
+            qenergy += V*V;
+        }
 
         cost += di1 * di1 + di2 * di2
                 + di3 * di3 + di4 * di4;
@@ -2103,7 +2297,7 @@ static float (*const get_band_cost_arr[])(struct AACEncContext *s,
                                           PutBitContext *pb, const float *in,
                                           const float *scaled, int size, int scale_idx,
                                           int cb, const float lambda, const float uplim,
-                                          int *bits) = {
+                                          int *bits, float *energy) = {
     get_band_cost_ZERO_mips,
     get_band_cost_SQUAD_mips,
     get_band_cost_SQUAD_mips,
@@ -2116,408 +2310,193 @@ static float (*const get_band_cost_arr[])(struct AACEncContext *s,
     get_band_cost_UPAIR12_mips,
     get_band_cost_UPAIR12_mips,
     get_band_cost_ESC_mips,
+    get_band_cost_NONE_mips, /* cb 12 doesn't exist */
+    get_band_cost_ZERO_mips,
+    get_band_cost_ZERO_mips,
+    get_band_cost_ZERO_mips,
 };
 
 #define get_band_cost(                                  \
                                 s, pb, in, scaled, size, scale_idx, cb, \
-                                lambda, uplim, bits)                    \
+                                lambda, uplim, bits, energy)            \
     get_band_cost_arr[cb](                              \
                                 s, pb, in, scaled, size, scale_idx, cb, \
-                                lambda, uplim, bits)
+                                lambda, uplim, bits, energy)
 
 static float quantize_band_cost(struct AACEncContext *s, const float *in,
                                 const float *scaled, int size, int scale_idx,
                                 int cb, const float lambda, const float uplim,
-                                int *bits)
+                                int *bits, float *energy, int rtz)
 {
-    return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
+    return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
 }
 
-static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
-                                               AACEncContext *s,
-                                               SingleChannelElement *sce,
-                                               const float lambda)
-{
-    int start = 0, i, w, w2, g;
-    int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
-    float dists[128] = { 0 }, uplims[128];
-    float maxvals[128];
-    int fflag, minscaler;
-    int its  = 0;
-    int allz = 0;
-    float minthr = INFINITY;
-
-    destbits = FFMIN(destbits, 5800);
-    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-        for (g = 0;  g < sce->ics.num_swb; g++) {
-            int nz = 0;
-            float uplim = 0.0f;
-            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
-                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
-                uplim += band->threshold;
-                if (band->energy <= band->threshold || band->threshold == 0.0f) {
-                    sce->zeroes[(w+w2)*16+g] = 1;
-                    continue;
-                }
-                nz = 1;
-            }
-            uplims[w*16+g] = uplim *512;
-            sce->zeroes[w*16+g] = !nz;
-            if (nz)
-                minthr = FFMIN(minthr, uplim);
-            allz |= nz;
-        }
-    }
-    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-        for (g = 0;  g < sce->ics.num_swb; g++) {
-            if (sce->zeroes[w*16+g]) {
-                sce->sf_idx[w*16+g] = SCALE_ONE_POS;
-                continue;
-            }
-            sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
-        }
-    }
+#include "libavcodec/aacenc_quantization_misc.h"
 
-    if (!allz)
-        return;
-    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
-
-    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-        start = w*128;
-        for (g = 0;  g < sce->ics.num_swb; g++) {
-            const float *scaled = s->scoefs + start;
-            maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
-            start += sce->ics.swb_sizes[g];
-        }
-    }
+#include "libavcodec/aaccoder_twoloop.h"
 
-    do {
-        int tbits, qstep;
-        minscaler = sce->sf_idx[0];
-        qstep = its ? 1 : 32;
-        do {
-            int prev = -1;
-            tbits = 0;
-            fflag = 0;
-
-            if (qstep > 1) {
-                for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-                    start = w*128;
-                    for (g = 0;  g < sce->ics.num_swb; g++) {
-                        const float *coefs = sce->coeffs + start;
-                        const float *scaled = s->scoefs + start;
-                        int bits = 0;
-                        int cb;
-
-                        if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
-                            start += sce->ics.swb_sizes[g];
-                            continue;
-                        }
-                        minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
-                        cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
-                        for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
-                            int b;
-                            bits += quantize_band_cost_bits(s, coefs + w2*128,
-                                                            scaled + w2*128,
-                                                            sce->ics.swb_sizes[g],
-                                                            sce->sf_idx[w*16+g],
-                                                            cb,
-                                                            1.0f,
-                                                            INFINITY,
-                                                            &b);
-                        }
-                        if (prev != -1) {
-                            bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
-                        }
-                        tbits += bits;
-                        start += sce->ics.swb_sizes[g];
-                        prev = sce->sf_idx[w*16+g];
-                    }
-                }
-            }
-            else {
-                for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-                    start = w*128;
-                    for (g = 0;  g < sce->ics.num_swb; g++) {
-                        const float *coefs = sce->coeffs + start;
-                        const float *scaled = s->scoefs + start;
-                        int bits = 0;
-                        int cb;
-                        float dist = 0.0f;
-
-                        if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
-                            start += sce->ics.swb_sizes[g];
-                            continue;
-                        }
-                        minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
-                        cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
-                        for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
-                            int b;
-                            dist += quantize_band_cost(s, coefs + w2*128,
-                                                       scaled + w2*128,
-                                                       sce->ics.swb_sizes[g],
-                                                       sce->sf_idx[w*16+g],
-                                                       cb,
-                                                       1.0f,
-                                                       INFINITY,
-                                                       &b);
-                            bits += b;
-                        }
-                        dists[w*16+g] = dist - bits;
-                        if (prev != -1) {
-                            bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
-                        }
-                        tbits += bits;
-                        start += sce->ics.swb_sizes[g];
-                        prev = sce->sf_idx[w*16+g];
-                    }
-                }
-            }
-            if (tbits > destbits) {
-                for (i = 0; i < 128; i++)
-                    if (sce->sf_idx[i] < 218 - qstep)
-                        sce->sf_idx[i] += qstep;
-            } else {
-                for (i = 0; i < 128; i++)
-                    if (sce->sf_idx[i] > 60 - qstep)
-                        sce->sf_idx[i] -= qstep;
-            }
-            qstep >>= 1;
-            if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
-                qstep = 1;
-        } while (qstep);
-
-        fflag = 0;
-        minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
-        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
-            for (g = 0; g < sce->ics.num_swb; g++) {
-                int prevsc = sce->sf_idx[w*16+g];
-                if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
-                    if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
-                        sce->sf_idx[w*16+g]--;
-                    else
-                        sce->sf_idx[w*16+g]-=2;
-                }
-                sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
-                sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
-                if (sce->sf_idx[w*16+g] != prevsc)
-                    fflag = 1;
-                sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
-            }
-        }
-        its++;
-    } while (fflag && its < 10);
-}
-
-static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe,
-                               const float lambda)
+static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
 {
-    int start = 0, i, w, w2, g;
+    int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
+    uint8_t nextband0[128], nextband1[128];
     float M[128], S[128];
     float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
+    const float lambda = s->lambda;
+    const float mslambda = FFMIN(1.0f, lambda / 120.f);
     SingleChannelElement *sce0 = &cpe->ch[0];
     SingleChannelElement *sce1 = &cpe->ch[1];
     if (!cpe->common_window)
         return;
+
+    /** Scout out next nonzero bands */
+    ff_init_nextband_map(sce0, nextband0);
+    ff_init_nextband_map(sce1, nextband1);
+
+    prev_mid = sce0->sf_idx[0];
+    prev_side = sce1->sf_idx[0];
     for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
+        start = 0;
         for (g = 0;  g < sce0->ics.num_swb; g++) {
-            if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
-                float dist1 = 0.0f, dist2 = 0.0f;
+            float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
+            if (!cpe->is_mask[w*16+g])
+                cpe->ms_mask[w*16+g] = 0;
+            if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
+                float Mmax = 0.0f, Smax = 0.0f;
+
+                /* Must compute mid/side SF and book for the whole window group */
                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
-                    FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
-                    FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
-                    float minthr = FFMIN(band0->threshold, band1->threshold);
-                    float maxthr = FFMAX(band0->threshold, band1->threshold);
-                    for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
-                        M[i  ] = (sce0->coeffs[start+w2*128+i  ]
-                                + sce1->coeffs[start+w2*128+i  ]) * 0.5;
-                        M[i+1] = (sce0->coeffs[start+w2*128+i+1]
-                                + sce1->coeffs[start+w2*128+i+1]) * 0.5;
-                        M[i+2] = (sce0->coeffs[start+w2*128+i+2]
-                                + sce1->coeffs[start+w2*128+i+2]) * 0.5;
-                        M[i+3] = (sce0->coeffs[start+w2*128+i+3]
-                                + sce1->coeffs[start+w2*128+i+3]) * 0.5;
-
-                        S[i  ] =  M[i  ]
-                                - sce1->coeffs[start+w2*128+i  ];
-                        S[i+1] =  M[i+1]
-                                - sce1->coeffs[start+w2*128+i+1];
-                        S[i+2] =  M[i+2]
-                                - sce1->coeffs[start+w2*128+i+2];
-                        S[i+3] =  M[i+3]
-                                - sce1->coeffs[start+w2*128+i+3];
-                   }
-                    abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
-                    abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
-                    abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
-                    abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
-                    dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
-                                                L34,
-                                                sce0->ics.swb_sizes[g],
-                                                sce0->sf_idx[(w+w2)*16+g],
-                                                sce0->band_type[(w+w2)*16+g],
-                                                lambda / band0->threshold, INFINITY, NULL);
-                    dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
-                                                R34,
-                                                sce1->ics.swb_sizes[g],
-                                                sce1->sf_idx[(w+w2)*16+g],
-                                                sce1->band_type[(w+w2)*16+g],
-                                                lambda / band1->threshold, INFINITY, NULL);
-                    dist2 += quantize_band_cost(s, M,
-                                                M34,
-                                                sce0->ics.swb_sizes[g],
-                                                sce0->sf_idx[(w+w2)*16+g],
-                                                sce0->band_type[(w+w2)*16+g],
-                                                lambda / maxthr, INFINITY, NULL);
-                    dist2 += quantize_band_cost(s, S,
-                                                S34,
-                                                sce1->ics.swb_sizes[g],
-                                                sce1->sf_idx[(w+w2)*16+g],
-                                                sce1->band_type[(w+w2)*16+g],
-                                                lambda / minthr, INFINITY, NULL);
+                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+                        M[i] = (sce0->coeffs[start+(w+w2)*128+i]
+                              + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
+                        S[i] =  M[i]
+                              - sce1->coeffs[start+(w+w2)*128+i];
+                    }
+                    abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
+                    abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
+                    for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
+                        Mmax = FFMAX(Mmax, M34[i]);
+                        Smax = FFMAX(Smax, S34[i]);
+                    }
+                }
+
+                for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
+                    float dist1 = 0.0f, dist2 = 0.0f;
+                    int B0 = 0, B1 = 0;
+                    int minidx;
+                    int mididx, sididx;
+                    int midcb, sidcb;
+
+                    minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
+                    mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
+                    sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
+                    if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
+                        && (   !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
+                            || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
+                        /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
+                        continue;
+                    }
+
+                    midcb = find_min_book(Mmax, mididx);
+                    sidcb = find_min_book(Smax, sididx);
+
+                    /* No CB can be zero */
+                    midcb = FFMAX(1,midcb);
+                    sidcb = FFMAX(1,sidcb);
+
+                    for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+                        FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
+                        FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
+                        float minthr = FFMIN(band0->threshold, band1->threshold);
+                        int b1,b2,b3,b4;
+                        for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
+                            M[i] = (sce0->coeffs[start+(w+w2)*128+i]
+                                  + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
+                            S[i] =  M[i]
+                                  - sce1->coeffs[start+(w+w2)*128+i];
+                        }
+
+                        abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
+                        abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
+                        abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
+                        abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
+                        dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
+                                                    L34,
+                                                    sce0->ics.swb_sizes[g],
+                                                    sce0->sf_idx[w*16+g],
+                                                    sce0->band_type[w*16+g],
+                                                    lambda / band0->threshold, INFINITY, &b1, NULL, 0);
+                        dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
+                                                    R34,
+                                                    sce1->ics.swb_sizes[g],
+                                                    sce1->sf_idx[w*16+g],
+                                                    sce1->band_type[w*16+g],
+                                                    lambda / band1->threshold, INFINITY, &b2, NULL, 0);
+                        dist2 += quantize_band_cost(s, M,
+                                                    M34,
+                                                    sce0->ics.swb_sizes[g],
+                                                    mididx,
+                                                    midcb,
+                                                    lambda / minthr, INFINITY, &b3, NULL, 0);
+                        dist2 += quantize_band_cost(s, S,
+                                                    S34,
+                                                    sce1->ics.swb_sizes[g],
+                                                    sididx,
+                                                    sidcb,
+                                                    mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
+                        B0 += b1+b2;
+                        B1 += b3+b4;
+                        dist1 -= b1+b2;
+                        dist2 -= b3+b4;
+                    }
+                    cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
+                    if (cpe->ms_mask[w*16+g]) {
+                        if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
+                            sce0->sf_idx[w*16+g] = mididx;
+                            sce1->sf_idx[w*16+g] = sididx;
+                            sce0->band_type[w*16+g] = midcb;
+                            sce1->band_type[w*16+g] = sidcb;
+                        } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
+                            /* ms_mask unneeded, and it confuses some decoders */
+                            cpe->ms_mask[w*16+g] = 0;
+                        }
+                        break;
+                    } else if (B1 > B0) {
+                        /* More boost won't fix this */
+                        break;
+                    }
                 }
-                cpe->ms_mask[w*16+g] = dist2 < dist1;
             }
+            if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
+                prev_mid = sce0->sf_idx[w*16+g];
+            if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
+                prev_side = sce1->sf_idx[w*16+g];
             start += sce0->ics.swb_sizes[g];
         }
     }
 }
 #endif /*HAVE_MIPSFPU */
 
-static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
-                                       int win, int group_len, const float lambda)
-{
-    BandCodingPath path[120][12];
-    int w, swb, cb, start, size;
-    int i, j;
-    const int max_sfb  = sce->ics.max_sfb;
-    const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
-    const int run_esc  = (1 << run_bits) - 1;
-    int idx, ppos, count;
-    int stackrun[120], stackcb[120], stack_len;
-    float next_minbits = INFINITY;
-    int next_mincb = 0;
-
-    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
-    start = win*128;
-    for (cb = 0; cb < 12; cb++) {
-        path[0][cb].cost     = run_bits+4;
-        path[0][cb].prev_idx = -1;
-        path[0][cb].run      = 0;
-    }
-    for (swb = 0; swb < max_sfb; swb++) {
-        size = sce->ics.swb_sizes[swb];
-        if (sce->zeroes[win*16 + swb]) {
-            float cost_stay_here = path[swb][0].cost;
-            float cost_get_here  = next_minbits + run_bits + 4;
-            if (   run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
-                != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
-                cost_stay_here += run_bits;
-            if (cost_get_here < cost_stay_here) {
-                path[swb+1][0].prev_idx = next_mincb;
-                path[swb+1][0].cost     = cost_get_here;
-                path[swb+1][0].run      = 1;
-            } else {
-                path[swb+1][0].prev_idx = 0;
-                path[swb+1][0].cost     = cost_stay_here;
-                path[swb+1][0].run      = path[swb][0].run + 1;
-            }
-            next_minbits = path[swb+1][0].cost;
-            next_mincb = 0;
-            for (cb = 1; cb < 12; cb++) {
-                path[swb+1][cb].cost = 61450;
-                path[swb+1][cb].prev_idx = -1;
-                path[swb+1][cb].run = 0;
-            }
-        } else {
-            float minbits = next_minbits;
-            int mincb = next_mincb;
-            int startcb = sce->band_type[win*16+swb];
-            next_minbits = INFINITY;
-            next_mincb = 0;
-            for (cb = 0; cb < startcb; cb++) {
-                path[swb+1][cb].cost = 61450;
-                path[swb+1][cb].prev_idx = -1;
-                path[swb+1][cb].run = 0;
-            }
-            for (cb = startcb; cb < 12; cb++) {
-                float cost_stay_here, cost_get_here;
-                float bits = 0.0f;
-                for (w = 0; w < group_len; w++) {
-                    bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
-                                                    s->scoefs + start + w*128, size,
-                                                    sce->sf_idx[(win+w)*16+swb], cb,
-                                                    0, INFINITY, NULL);
-                }
-                cost_stay_here = path[swb][cb].cost + bits;
-                cost_get_here  = minbits            + bits + run_bits + 4;
-                if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
-                    != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
-                    cost_stay_here += run_bits;
-                if (cost_get_here < cost_stay_here) {
-                    path[swb+1][cb].prev_idx = mincb;
-                    path[swb+1][cb].cost     = cost_get_here;
-                    path[swb+1][cb].run      = 1;
-                } else {
-                    path[swb+1][cb].prev_idx = cb;
-                    path[swb+1][cb].cost     = cost_stay_here;
-                    path[swb+1][cb].run      = path[swb][cb].run + 1;
-                }
-                if (path[swb+1][cb].cost < next_minbits) {
-                    next_minbits = path[swb+1][cb].cost;
-                    next_mincb = cb;
-                }
-            }
-        }
-        start += sce->ics.swb_sizes[swb];
-    }
-
-    stack_len = 0;
-    idx       = 0;
-    for (cb = 1; cb < 12; cb++)
-        if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
-            idx = cb;
-    ppos = max_sfb;
-    while (ppos > 0) {
-        av_assert1(idx >= 0);
-        cb = idx;
-        stackrun[stack_len] = path[ppos][cb].run;
-        stackcb [stack_len] = cb;
-        idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
-        ppos -= path[ppos][cb].run;
-        stack_len++;
-    }
+#include "libavcodec/aaccoder_trellis.h"
 
-    start = 0;
-    for (i = stack_len - 1; i >= 0; i--) {
-        put_bits(&s->pb, 4, stackcb[i]);
-        count = stackrun[i];
-        memset(sce->zeroes + win*16 + start, !stackcb[i], count);
-        for (j = 0; j < count; j++) {
-            sce->band_type[win*16 + start] =  stackcb[i];
-            start++;
-        }
-        while (count >= run_esc) {
-            put_bits(&s->pb, run_bits, run_esc);
-            count -= run_esc;
-        }
-        put_bits(&s->pb, run_bits, count);
-    }
-}
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM */
 
 void ff_aac_coder_init_mips(AACEncContext *c) {
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     AACCoefficientsEncoder *e = c->coder;
-    int option = c->options.aac_coder;
+    int option = c->options.coder;
 
     if (option == 2) {
         e->quantize_and_encode_band = quantize_and_encode_band_mips;
-        e->encode_window_bands_info = codebook_trellis_rate_mips;
+        e->encode_window_bands_info = codebook_trellis_rate;
 #if HAVE_MIPSFPU
-        e->search_for_quantizers    = search_for_quantizers_twoloop_mips;
-        e->search_for_ms            = search_for_ms_mips;
+        e->search_for_quantizers    = search_for_quantizers_twoloop;
 #endif /* HAVE_MIPSFPU */
     }
+#if HAVE_MIPSFPU
+    e->search_for_ms            = search_for_ms_mips;
+#endif /* HAVE_MIPSFPU */
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavcodec/mips/aacdec_mips.h b/libavcodec/mips/aacdec_mips.h
index 054a9fba..758266fc 100644
--- a/libavcodec/mips/aacdec_mips.h
+++ b/libavcodec/mips/aacdec_mips.h
@@ -54,13 +54,14 @@
  * Reference: libavcodec/aacdec.c
  */
 
-#ifndef AVCODEC_MIPS_AACDEC_FLOAT_H
-#define AVCODEC_MIPS_AACDEC_FLOAT_H
+#ifndef AVCODEC_MIPS_AACDEC_MIPS_H
+#define AVCODEC_MIPS_AACDEC_MIPS_H
 
 #include "libavcodec/aac.h"
 #include "libavutil/mips/asmdefs.h"
 
 #if HAVE_INLINE_ASM && HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static inline float *VMUL2_mips(float *dst, const float *v, unsigned idx,
                            const float *scale)
 {
@@ -246,6 +247,7 @@ static inline float *VMUL4S_mips(float *dst, const float *v, unsigned idx,
 #define VMUL4 VMUL4_mips
 #define VMUL2S VMUL2S_mips
 #define VMUL4S VMUL4S_mips
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
 
-#endif /* AVCODEC_MIPS_AACDEC_FLOAT_H */
+#endif /* AVCODEC_MIPS_AACDEC_MIPS_H */
diff --git a/libavcodec/mips/aacpsdsp_mips.c b/libavcodec/mips/aacpsdsp_mips.c
index 695f9ef3..83fdc2f9 100644
--- a/libavcodec/mips/aacpsdsp_mips.c
+++ b/libavcodec/mips/aacpsdsp_mips.c
@@ -188,6 +188,7 @@ static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
 }
 
 #if HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void ps_add_squares_mips(float *dst, const float (*src)[2], int n)
 {
     int i;
@@ -442,6 +443,7 @@ static void ps_stereo_interpolate_mips(float (*l)[2], float (*r)[2],
         : "memory"
     );
 }
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_MIPSFPU */
 #endif /* HAVE_INLINE_ASM */
 
@@ -451,10 +453,12 @@ void ff_psdsp_init_mips(PSDSPContext *s)
     s->hybrid_analysis_ileave = ps_hybrid_analysis_ileave_mips;
     s->hybrid_synthesis_deint = ps_hybrid_synthesis_deint_mips;
 #if HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     s->add_squares            = ps_add_squares_mips;
     s->mul_pair_single        = ps_mul_pair_single_mips;
     s->decorrelate            = ps_decorrelate_mips;
     s->stereo_interpolate[0]  = ps_stereo_interpolate_mips;
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_MIPSFPU */
 #endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavcodec/mips/aacpsy_mips.h b/libavcodec/mips/aacpsy_mips.h
index 596dcadd..a1fe5cce 100644
--- a/libavcodec/mips/aacpsy_mips.h
+++ b/libavcodec/mips/aacpsy_mips.h
@@ -59,60 +59,65 @@
 #include "libavutil/mips/asmdefs.h"
 
 #if HAVE_INLINE_ASM && HAVE_MIPSFPU && ( PSY_LAME_FIR_LEN == 21 )
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void calc_thr_3gpp_mips(const FFPsyWindowInfo *wi, const int num_bands,
                                AacPsyChannel *pch, const uint8_t *band_sizes,
-                               const float *coefs)
+                               const float *coefs, const int cutoff)
 {
     int i, w, g;
-    int start = 0;
+    int start = 0, wstart = 0;
     for (w = 0; w < wi->num_windows*16; w += 16) {
+        wstart = 0;
         for (g = 0; g < num_bands; g++) {
             AacPsyBand *band = &pch->band[w+g];
 
             float form_factor = 0.0f;
             float Temp;
             band->energy = 0.0f;
-            for (i = 0; i < band_sizes[g]; i+=4) {
-                float a, b, c, d;
-                float ax, bx, cx, dx;
-                float *cf = (float *)&coefs[start+i];
-
-                __asm__ volatile (
-                    "lwc1   %[a],   0(%[cf])                \n\t"
-                    "lwc1   %[b],   4(%[cf])                \n\t"
-                    "lwc1   %[c],   8(%[cf])                \n\t"
-                    "lwc1   %[d],   12(%[cf])               \n\t"
-                    "abs.s  %[a],   %[a]                    \n\t"
-                    "abs.s  %[b],   %[b]                    \n\t"
-                    "abs.s  %[c],   %[c]                    \n\t"
-                    "abs.s  %[d],   %[d]                    \n\t"
-                    "sqrt.s %[ax],  %[a]                    \n\t"
-                    "sqrt.s %[bx],  %[b]                    \n\t"
-                    "sqrt.s %[cx],  %[c]                    \n\t"
-                    "sqrt.s %[dx],  %[d]                    \n\t"
-                    "madd.s %[e],   %[e],   %[a],   %[a]    \n\t"
-                    "madd.s %[e],   %[e],   %[b],   %[b]    \n\t"
-                    "madd.s %[e],   %[e],   %[c],   %[c]    \n\t"
-                    "madd.s %[e],   %[e],   %[d],   %[d]    \n\t"
-                    "add.s  %[f],   %[f],   %[ax]           \n\t"
-                    "add.s  %[f],   %[f],   %[bx]           \n\t"
-                    "add.s  %[f],   %[f],   %[cx]           \n\t"
-                    "add.s  %[f],   %[f],   %[dx]           \n\t"
-
-                    : [a]"=&f"(a), [b]"=&f"(b),
-                      [c]"=&f"(c), [d]"=&f"(d),
-                      [e]"+f"(band->energy), [f]"+f"(form_factor),
-                      [ax]"=&f"(ax), [bx]"=&f"(bx),
-                      [cx]"=&f"(cx), [dx]"=&f"(dx)
-                    : [cf]"r"(cf)
-                    : "memory"
-                );
+            if (wstart < cutoff) {
+                for (i = 0; i < band_sizes[g]; i+=4) {
+                    float a, b, c, d;
+                    float ax, bx, cx, dx;
+                    float *cf = (float *)&coefs[start+i];
+
+                    __asm__ volatile (
+                        "lwc1   %[a],   0(%[cf])                \n\t"
+                        "lwc1   %[b],   4(%[cf])                \n\t"
+                        "lwc1   %[c],   8(%[cf])                \n\t"
+                        "lwc1   %[d],   12(%[cf])               \n\t"
+                        "abs.s  %[a],   %[a]                    \n\t"
+                        "abs.s  %[b],   %[b]                    \n\t"
+                        "abs.s  %[c],   %[c]                    \n\t"
+                        "abs.s  %[d],   %[d]                    \n\t"
+                        "sqrt.s %[ax],  %[a]                    \n\t"
+                        "sqrt.s %[bx],  %[b]                    \n\t"
+                        "sqrt.s %[cx],  %[c]                    \n\t"
+                        "sqrt.s %[dx],  %[d]                    \n\t"
+                        "madd.s %[e],   %[e],   %[a],   %[a]    \n\t"
+                        "madd.s %[e],   %[e],   %[b],   %[b]    \n\t"
+                        "madd.s %[e],   %[e],   %[c],   %[c]    \n\t"
+                        "madd.s %[e],   %[e],   %[d],   %[d]    \n\t"
+                        "add.s  %[f],   %[f],   %[ax]           \n\t"
+                        "add.s  %[f],   %[f],   %[bx]           \n\t"
+                        "add.s  %[f],   %[f],   %[cx]           \n\t"
+                        "add.s  %[f],   %[f],   %[dx]           \n\t"
+
+                        : [a]"=&f"(a), [b]"=&f"(b),
+                          [c]"=&f"(c), [d]"=&f"(d),
+                          [e]"+f"(band->energy), [f]"+f"(form_factor),
+                          [ax]"=&f"(ax), [bx]"=&f"(bx),
+                          [cx]"=&f"(cx), [dx]"=&f"(dx)
+                        : [cf]"r"(cf)
+                        : "memory"
+                    );
+                }
             }
 
             Temp = sqrtf((float)band_sizes[g] / band->energy);
             band->thr      = band->energy * 0.001258925f;
             band->nz_lines = form_factor * sqrtf(Temp);
             start += band_sizes[g];
+            wstart += band_sizes[g];
         }
     }
 }
@@ -228,5 +233,6 @@ static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float
 #define calc_thr_3gpp calc_thr_3gpp_mips
 #define psy_hp_filter psy_hp_filter_mips
 
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
 #endif /* AVCODEC_MIPS_AACPSY_MIPS_H */
diff --git a/libavcodec/mips/aacsbr_mips.c b/libavcodec/mips/aacsbr_mips.c
index e478290e..56aa4e86 100644
--- a/libavcodec/mips/aacsbr_mips.c
+++ b/libavcodec/mips/aacsbr_mips.c
@@ -311,6 +311,7 @@ static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
 }
 
 #if HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void sbr_hf_assemble_mips(float Y1[38][64][2],
                             const float X_high[64][40][2],
                             SpectralBandReplication *sbr, SBRData *ch_data,
@@ -603,6 +604,7 @@ static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
         }
     }
 }
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_MIPSFPU */
 #endif /* HAVE_INLINE_ASM */
 
@@ -612,8 +614,10 @@ void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
     c->sbr_lf_gen            = sbr_lf_gen_mips;
     c->sbr_x_gen             = sbr_x_gen_mips;
 #if HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
     c->sbr_hf_assemble       = sbr_hf_assemble_mips;
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_MIPSFPU */
 #endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavcodec/mips/aacsbr_mips.h b/libavcodec/mips/aacsbr_mips.h
index e525197a..4461e763 100644
--- a/libavcodec/mips/aacsbr_mips.h
+++ b/libavcodec/mips/aacsbr_mips.h
@@ -51,8 +51,8 @@
  * Reference: libavcodec/aacsbr.c
  */
 
-#ifndef AVCODEC_MIPS_AACSBR_FLOAT_H
-#define AVCODEC_MIPS_AACSBR_FLOAT_H
+#ifndef AVCODEC_MIPS_AACSBR_MIPS_H
+#define AVCODEC_MIPS_AACSBR_MIPS_H
 
 #include "libavcodec/aac.h"
 #include "libavcodec/sbr.h"
@@ -149,7 +149,8 @@ static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct,
     }
 }
 
-#if (HAVE_MIPSFPU && !HAVE_LOONGSON3)
+#if HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void sbr_qmf_synthesis_mips(FFTContext *mdct,
                               SBRDSPContext *sbrdsp, AVFloatDSPContext *fdsp,
                               float *out, float X[2][38][64],
@@ -488,7 +489,8 @@ static void sbr_qmf_synthesis_mips(FFTContext *mdct,
 #define sbr_qmf_analysis sbr_qmf_analysis_mips
 #define sbr_qmf_synthesis sbr_qmf_synthesis_mips
 
-#endif /* (HAVE_MIPSFPU && !HAVE_LOONGSON3) */
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
+#endif /* HAVE_MIPSFPU */
 #endif /* HAVE_INLINE_ASM */
 
-#endif /* AVCODEC_MIPS_AACSBR_FLOAT_H */
+#endif /* AVCODEC_MIPS_AACSBR_MIPS_H */
diff --git a/libavcodec/mips/ac3dsp_mips.c b/libavcodec/mips/ac3dsp_mips.c
index 01c7de57..f9aaf156 100644
--- a/libavcodec/mips/ac3dsp_mips.c
+++ b/libavcodec/mips/ac3dsp_mips.c
@@ -59,7 +59,7 @@
 #include "libavutil/mips/asmdefs.h"
 
 #if HAVE_INLINE_ASM
-#if HAVE_MIPSDSPR1
+#if HAVE_MIPSDSP
 static void ac3_bit_alloc_calc_bap_mips(int16_t *mask, int16_t *psd,
                                         int start, int end,
                                         int snr_offset, int floor,
@@ -201,6 +201,7 @@ static void ac3_update_bap_counts_mips(uint16_t mant_cnt[16], uint8_t *bap,
 #endif
 
 #if HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void float_to_fixed24_mips(int32_t *dst, const float *src, unsigned int len)
 {
     const float scale = 1 << 24;
@@ -395,19 +396,22 @@ static void ac3_downmix_mips(float **samples, float (*matrix)[2],
         :"memory"
     );
 }
-#endif
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
+#endif /* HAVE_MIPSFPU */
 #endif /* HAVE_INLINE_ASM */
 
 void ff_ac3dsp_init_mips(AC3DSPContext *c, int bit_exact) {
 #if HAVE_INLINE_ASM
-#if HAVE_MIPSDSPR1
+#if HAVE_MIPSDSP
     c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_mips;
     c->update_bap_counts  = ac3_update_bap_counts_mips;
 #endif
 #if HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     c->float_to_fixed24 = float_to_fixed24_mips;
     c->downmix          = ac3_downmix_mips;
 #endif
 #endif
 
+#endif
 }
diff --git a/libavcodec/mips/acelp_filters_mips.c b/libavcodec/mips/acelp_filters_mips.c
index ba789abe..478db855 100644
--- a/libavcodec/mips/acelp_filters_mips.c
+++ b/libavcodec/mips/acelp_filters_mips.c
@@ -57,6 +57,7 @@
 #include "libavutil/mips/asmdefs.h"
 
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void ff_acelp_interpolatef_mips(float *out, const float *in,
                            const float *filter_coeffs, int precision,
                            int frac_pos, int filter_length, int length)
@@ -206,12 +207,15 @@ static void ff_acelp_apply_order_2_transfer_function_mips(float *out, const floa
            "$f12", "$f13", "$f14", "$f15", "$f16", "memory"
     );
 }
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM */
 
 void ff_acelp_filter_init_mips(ACELPFContext *c)
 {
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     c->acelp_interpolatef                      = ff_acelp_interpolatef_mips;
     c->acelp_apply_order_2_transfer_function   = ff_acelp_apply_order_2_transfer_function_mips;
 #endif
+#endif
 }
diff --git a/libavcodec/mips/acelp_vectors_mips.c b/libavcodec/mips/acelp_vectors_mips.c
index ad943486..0ab2b6a8 100644
--- a/libavcodec/mips/acelp_vectors_mips.c
+++ b/libavcodec/mips/acelp_vectors_mips.c
@@ -57,6 +57,7 @@
 #include "libavutil/mips/asmdefs.h"
 
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void ff_weighted_vector_sumf_mips(
                   float *out, const float *in_a, const float *in_b,
                   float weight_coeff_a, float weight_coeff_b, int length)
@@ -92,11 +93,14 @@ static void ff_weighted_vector_sumf_mips(
         : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "memory"
     );
 }
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM */
 
 void ff_acelp_vectors_init_mips(ACELPVContext *c)
 {
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     c->weighted_vector_sumf = ff_weighted_vector_sumf_mips;
 #endif
+#endif
 }
diff --git a/libavcodec/mips/amrwbdec_mips.c b/libavcodec/mips/amrwbdec_mips.c
index 1d6ed2df..5dc05436 100644
--- a/libavcodec/mips/amrwbdec_mips.c
+++ b/libavcodec/mips/amrwbdec_mips.c
@@ -54,7 +54,8 @@
 #include "amrwbdec_mips.h"
 
 #if HAVE_INLINE_ASM
-void hb_fir_filter_mips(float *out, const float fir_coef[HB_FIR_SIZE + 1],
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
+void ff_hb_fir_filter_mips(float *out, const float fir_coef[HB_FIR_SIZE + 1],
                           float mem[HB_FIR_SIZE], const float *in)
 {
     int i;
@@ -184,4 +185,5 @@ void hb_fir_filter_mips(float *out, const float fir_coef[HB_FIR_SIZE + 1],
     }
     memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
 }
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/mips/amrwbdec_mips.h b/libavcodec/mips/amrwbdec_mips.h
index a469918d..a9f66fef 100644
--- a/libavcodec/mips/amrwbdec_mips.h
+++ b/libavcodec/mips/amrwbdec_mips.h
@@ -49,14 +49,16 @@
  * @file
  * Reference: libavcodec/amrwbdec.c
  */
-#ifndef AVCODEC_AMRWBDEC_MIPS_H
-#define AVCODEC_AMRWBDEC_MIPS_H
+#ifndef AVCODEC_MIPS_AMRWBDEC_MIPS_H
+#define AVCODEC_MIPS_AMRWBDEC_MIPS_H
 #include "config.h"
 
 #if HAVE_MIPSFPU && HAVE_INLINE_ASM
-void hb_fir_filter_mips(float *out, const float fir_coef[],
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
+void ff_hb_fir_filter_mips(float *out, const float fir_coef[],
                           float mem[], const float *in);
-#define hb_fir_filter hb_fir_filter_mips
+#define hb_fir_filter ff_hb_fir_filter_mips
+#endif
 #endif
 
-#endif /* AVCODEC_AMRWBDEC_MIPS_H  */
+#endif /* AVCODEC_MIPS_AMRWBDEC_MIPS_H  */
diff --git a/libavcodec/mips/blockdsp_init_mips.c b/libavcodec/mips/blockdsp_init_mips.c
new file mode 100644
index 00000000..30ae95fa
--- /dev/null
+++ b/libavcodec/mips/blockdsp_init_mips.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (parag.salasakar@imgtec.com)
+ *                    Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "blockdsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void blockdsp_init_msa(BlockDSPContext *c)
+{
+    c->clear_block = ff_clear_block_msa;
+    c->clear_blocks = ff_clear_blocks_msa;
+
+    c->fill_block_tab[0] = ff_fill_block16_msa;
+    c->fill_block_tab[1] = ff_fill_block8_msa;
+}
+#endif  // #if HAVE_MSA
+
+#if HAVE_MMI
+static av_cold void blockdsp_init_mmi(BlockDSPContext *c)
+{
+    c->clear_block = ff_clear_block_mmi;
+    c->clear_blocks = ff_clear_blocks_mmi;
+
+    c->fill_block_tab[0] = ff_fill_block16_mmi;
+    c->fill_block_tab[1] = ff_fill_block8_mmi;
+}
+#endif /* HAVE_MMI */
+
+void ff_blockdsp_init_mips(BlockDSPContext *c)
+{
+#if HAVE_MSA
+    blockdsp_init_msa(c);
+#endif  // #if HAVE_MSA
+#if HAVE_MMI
+    blockdsp_init_mmi(c);
+#endif /* HAVE_MMI */
+}
diff --git a/libavcodec/mips/blockdsp_mips.h b/libavcodec/mips/blockdsp_mips.h
new file mode 100644
index 00000000..9559d40e
--- /dev/null
+++ b/libavcodec/mips/blockdsp_mips.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (parag.salasakar@imgtec.com)
+ *                    Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_BLOCKDSP_MIPS_H
+#define AVCODEC_MIPS_BLOCKDSP_MIPS_H
+
+#include "../mpegvideo.h"
+
+void ff_fill_block16_msa(uint8_t *src, uint8_t val, int stride, int height);
+void ff_fill_block8_msa(uint8_t *src, uint8_t val, int stride, int height);
+void ff_clear_block_msa(int16_t *block);
+void ff_clear_blocks_msa(int16_t *block);
+
+void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h);
+void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h);
+void ff_clear_block_mmi(int16_t *block);
+void ff_clear_blocks_mmi(int16_t *block);
+
+#endif  // #ifndef AVCODEC_MIPS_BLOCKDSP_MIPS_H
diff --git a/libavcodec/mips/blockdsp_mmi.c b/libavcodec/mips/blockdsp_mmi.c
new file mode 100644
index 00000000..63eaf69a
--- /dev/null
+++ b/libavcodec/mips/blockdsp_mmi.c
@@ -0,0 +1,147 @@
+/*
+ * Loongson SIMD optimized blockdsp
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "blockdsp_mips.h"
+
+void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h)
+{
+    __asm__ volatile (
+        "move $8, %3                \r\n"
+        "move $9, %0                \r\n"
+        "dmtc1 %1, $f2              \r\n"
+        "punpcklbh $f2, $f2, $f2    \r\n"
+        "punpcklbh $f2, $f2, $f2    \r\n"
+        "punpcklbh $f2, $f2, $f2    \r\n"
+        "1:                         \r\n"
+        "gssdlc1 $f2, 7($9)         \r\n"
+        "gssdrc1 $f2, 0($9)         \r\n"
+        "gssdlc1 $f2, 15($9)        \r\n"
+        "gssdrc1 $f2, 8($9)         \r\n"
+        "daddi $8, $8, -1           \r\n"
+        "daddu $9, $9, %2           \r\n"
+        "bnez $8, 1b                \r\n"
+        ::"r"(block),"r"(value),"r"(line_size),"r"(h)
+        : "$8","$9"
+    );
+}
+
+void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h)
+{
+    __asm__ volatile (
+        "move $8, %3                \r\n"
+        "move $9, %0                \r\n"
+        "dmtc1 %1, $f2              \r\n"
+        "punpcklbh $f2, $f2, $f2    \r\n"
+        "punpcklbh $f2, $f2, $f2    \r\n"
+        "punpcklbh $f2, $f2, $f2    \r\n"
+        "1:                         \r\n"
+        "gssdlc1 $f2, 7($9)         \r\n"
+        "gssdrc1 $f2, 0($9)         \r\n"
+        "daddi $8, $8, -1           \r\n"
+        "daddu $9, $9, %2           \r\n"
+        "bnez $8, 1b                \r\n"
+        ::"r"(block),"r"(value),"r"(line_size),"r"(h)
+        : "$8","$9"
+    );
+}
+
+void ff_clear_block_mmi(int16_t *block)
+{
+    __asm__ volatile (
+        "xor $f0, $f0, $f0              \r\n"
+        "xor $f2, $f2, $f2              \r\n"
+        "gssqc1 $f0, $f2,   0(%0)       \r\n"
+        "gssqc1 $f0, $f2,  16(%0)       \r\n"
+        "gssqc1 $f0, $f2,  32(%0)       \r\n"
+        "gssqc1 $f0, $f2,  48(%0)       \r\n"
+        "gssqc1 $f0, $f2,  64(%0)       \r\n"
+        "gssqc1 $f0, $f2,  80(%0)       \r\n"
+        "gssqc1 $f0, $f2,  96(%0)       \r\n"
+        "gssqc1 $f0, $f2, 112(%0)       \r\n"
+        ::"r"(block)
+        : "memory"
+    );
+}
+
+void ff_clear_blocks_mmi(int16_t *block)
+{
+    __asm__ volatile (
+        "xor $f0, $f0, $f0              \r\n"
+        "xor $f2, $f2, $f2              \r\n"
+        "gssqc1 $f0, $f2,   0(%0)       \r\n"
+        "gssqc1 $f0, $f2,  16(%0)       \r\n"
+        "gssqc1 $f0, $f2,  32(%0)       \r\n"
+        "gssqc1 $f0, $f2,  48(%0)       \r\n"
+        "gssqc1 $f0, $f2,  64(%0)       \r\n"
+        "gssqc1 $f0, $f2,  80(%0)       \r\n"
+        "gssqc1 $f0, $f2,  96(%0)       \r\n"
+        "gssqc1 $f0, $f2, 112(%0)       \r\n"
+
+        "gssqc1 $f0, $f2, 128(%0)       \r\n"
+        "gssqc1 $f0, $f2, 144(%0)       \r\n"
+        "gssqc1 $f0, $f2, 160(%0)       \r\n"
+        "gssqc1 $f0, $f2, 176(%0)       \r\n"
+        "gssqc1 $f0, $f2, 192(%0)       \r\n"
+        "gssqc1 $f0, $f2, 208(%0)       \r\n"
+        "gssqc1 $f0, $f2, 224(%0)       \r\n"
+        "gssqc1 $f0, $f2, 240(%0)       \r\n"
+
+        "gssqc1 $f0, $f2, 256(%0)       \r\n"
+        "gssqc1 $f0, $f2, 272(%0)       \r\n"
+        "gssqc1 $f0, $f2, 288(%0)       \r\n"
+        "gssqc1 $f0, $f2, 304(%0)       \r\n"
+        "gssqc1 $f0, $f2, 320(%0)       \r\n"
+        "gssqc1 $f0, $f2, 336(%0)       \r\n"
+        "gssqc1 $f0, $f2, 352(%0)       \r\n"
+        "gssqc1 $f0, $f2, 368(%0)       \r\n"
+
+        "gssqc1 $f0, $f2, 384(%0)       \r\n"
+        "gssqc1 $f0, $f2, 400(%0)       \r\n"
+        "gssqc1 $f0, $f2, 416(%0)       \r\n"
+        "gssqc1 $f0, $f2, 432(%0)       \r\n"
+        "gssqc1 $f0, $f2, 448(%0)       \r\n"
+        "gssqc1 $f0, $f2, 464(%0)       \r\n"
+        "gssqc1 $f0, $f2, 480(%0)       \r\n"
+        "gssqc1 $f0, $f2, 496(%0)       \r\n"
+
+        "gssqc1 $f0, $f2, 512(%0)       \r\n"
+        "gssqc1 $f0, $f2, 528(%0)       \r\n"
+        "gssqc1 $f0, $f2, 544(%0)       \r\n"
+        "gssqc1 $f0, $f2, 560(%0)       \r\n"
+        "gssqc1 $f0, $f2, 576(%0)       \r\n"
+        "gssqc1 $f0, $f2, 592(%0)       \r\n"
+        "gssqc1 $f0, $f2, 608(%0)       \r\n"
+        "gssqc1 $f0, $f2, 624(%0)       \r\n"
+
+        "gssqc1 $f0, $f2, 640(%0)       \r\n"
+        "gssqc1 $f0, $f2, 656(%0)       \r\n"
+        "gssqc1 $f0, $f2, 672(%0)       \r\n"
+        "gssqc1 $f0, $f2, 688(%0)       \r\n"
+        "gssqc1 $f0, $f2, 704(%0)       \r\n"
+        "gssqc1 $f0, $f2, 720(%0)       \r\n"
+        "gssqc1 $f0, $f2, 736(%0)       \r\n"
+        "gssqc1 $f0, $f2, 752(%0)       \r\n"
+        ::"r"(block)
+        : "memory"
+    );
+}
diff --git a/libavcodec/mips/blockdsp_msa.c b/libavcodec/mips/blockdsp_msa.c
new file mode 100644
index 00000000..32ac858e
--- /dev/null
+++ b/libavcodec/mips/blockdsp_msa.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (parag.salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "blockdsp_mips.h"
+
+static void copy_8bit_value_width8_msa(uint8_t *src, uint8_t val,
+                                       int32_t src_stride, int32_t height)
+{
+    int32_t cnt;
+    uint64_t dst0;
+    v16u8 val0;
+
+    val0 = (v16u8) __msa_fill_b(val);
+    dst0 = __msa_copy_u_d((v2i64) val0, 0);
+
+    for (cnt = (height >> 2); cnt--;) {
+        SD4(dst0, dst0, dst0, dst0, src, src_stride);
+        src += (4 * src_stride);
+    }
+}
+
+static void copy_8bit_value_width16_msa(uint8_t *src, uint8_t val,
+                                        int32_t src_stride, int32_t height)
+{
+    int32_t cnt;
+    v16u8 val0;
+
+    val0 = (v16u8) __msa_fill_b(val);
+
+    for (cnt = (height >> 3); cnt--;) {
+        ST_UB8(val0, val0, val0, val0, val0, val0, val0, val0, src, src_stride);
+        src += (8 * src_stride);
+    }
+}
+
+static void memset_zero_16width_msa(uint8_t *src, int32_t stride,
+                                    int32_t height)
+{
+    int8_t cnt;
+    v16u8 zero = { 0 };
+
+    for (cnt = (height / 2); cnt--;) {
+        ST_UB(zero, src);
+        src += stride;
+        ST_UB(zero, src);
+        src += stride;
+    }
+}
+
+void ff_fill_block16_msa(uint8_t *src, uint8_t val, int stride, int height)
+{
+    copy_8bit_value_width16_msa(src, val, stride, height);
+}
+
+void ff_fill_block8_msa(uint8_t *src, uint8_t val, int stride, int height)
+{
+    copy_8bit_value_width8_msa(src, val, stride, height);
+}
+
+void ff_clear_block_msa(int16_t *block)
+{
+    memset_zero_16width_msa((uint8_t *) block, 16, 8);
+}
+
+void ff_clear_blocks_msa(int16_t *block)
+{
+    memset_zero_16width_msa((uint8_t *) block, 16, 8 * 6);
+}
diff --git a/libavcodec/mips/celp_filters_mips.c b/libavcodec/mips/celp_filters_mips.c
index 88ac4584..926f1cb3 100644
--- a/libavcodec/mips/celp_filters_mips.c
+++ b/libavcodec/mips/celp_filters_mips.c
@@ -58,6 +58,7 @@
 #include "libavutil/mips/asmdefs.h"
 
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void ff_celp_lp_synthesis_filterf_mips(float *out,
                                   const float *filter_coeffs,
                                   const float* in, int buffer_length,
@@ -278,12 +279,15 @@ static void ff_celp_lp_zero_synthesis_filterf_mips(float *out,
         out[n] = sum_out1;
     }
 }
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM */
 
 void ff_celp_filter_init_mips(CELPFContext *c)
 {
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     c->celp_lp_synthesis_filterf        = ff_celp_lp_synthesis_filterf_mips;
     c->celp_lp_zero_synthesis_filterf   = ff_celp_lp_zero_synthesis_filterf_mips;
 #endif
+#endif
 }
diff --git a/libavcodec/mips/celp_math_mips.c b/libavcodec/mips/celp_math_mips.c
index 008dd803..ce711bd6 100644
--- a/libavcodec/mips/celp_math_mips.c
+++ b/libavcodec/mips/celp_math_mips.c
@@ -56,6 +56,7 @@
 #include "libavutil/mips/asmdefs.h"
 
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static float ff_dot_productf_mips(const float* a, const float* b,
                                               int length)
 {
@@ -80,11 +81,14 @@ static float ff_dot_productf_mips(const float* a, const float* b,
     );
     return sum;
 }
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM */
 
 void ff_celp_math_init_mips(CELPMContext *c)
 {
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     c->dot_productf = ff_dot_productf_mips;
 #endif
+#endif
 }
diff --git a/libavcodec/mips/compute_antialias_float.h b/libavcodec/mips/compute_antialias_float.h
index f6cf4650..e2b4f29f 100644
--- a/libavcodec/mips/compute_antialias_float.h
+++ b/libavcodec/mips/compute_antialias_float.h
@@ -58,6 +58,7 @@
 #include "libavutil/mips/asmdefs.h"
 
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void compute_antialias_mips_float(MPADecodeContext *s,
                                         GranuleDef *g)
 {
@@ -179,6 +180,7 @@ static void compute_antialias_mips_float(MPADecodeContext *s,
     );
 }
 #define compute_antialias compute_antialias_mips_float
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM */
 
 #endif /* AVCODEC_MIPS_COMPUTE_ANTIALIAS_FLOAT_H */
diff --git a/libavcodec/mips/constants.c b/libavcodec/mips/constants.c
new file mode 100644
index 00000000..f8130d9e
--- /dev/null
+++ b/libavcodec/mips/constants.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/mem.h"
+#include "constants.h"
+
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_1) =       {0x0001000100010001ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_3) =       {0x0003000300030003ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_4) =       {0x0004000400040004ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_5) =       {0x0005000500050005ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_8) =       {0x0008000800080008ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_9) =       {0x0009000900090009ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_10) =      {0x000A000A000A000AULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_16) =      {0x0010001000100010ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_18) =      {0x0012001200120012ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_20) =      {0x0014001400140014ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_28) =      {0x001C001C001C001CULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_32) =      {0x0020002000200020ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) =      {0x0035003500350035ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_64) =      {0x0040004000400040ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) =     {0x0080008000800080ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_512) =     {0x0200020002000200ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_m8tom5) =  {0xFFFBFFFAFFF9FFF8ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_m4tom1) =  {0xFFFFFFFEFFFDFFFCULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_1to4) =    {0x0004000300020001ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_5to8) =    {0x0008000700060005ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_0to3) =    {0x0003000200010000ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_4to7) =    {0x0007000600050004ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_8tob) =    {0x000b000a00090008ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_ctof) =    {0x000f000e000d000cULL};
+
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_1) =       {0x0101010101010101ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_3) =       {0x0303030303030303ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_80) =      {0x8080808080808080ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1) =      {0xA1A1A1A1A1A1A1A1ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_FE) =      {0xFEFEFEFEFEFEFEFEULL};
+
+DECLARE_ALIGNED(8, const uint64_t, ff_rnd) =        {0x0004000400040004ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_rnd2) =       {0x0040004000400040ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_rnd3) =       {0x0020002000200020ULL};
+
+DECLARE_ALIGNED(8, const uint64_t, ff_wm1010) =     {0xFFFF0000FFFF0000ULL};
+DECLARE_ALIGNED(8, const uint64_t, ff_d40000) =     {0x0000000000040000ULL};
diff --git a/libavcodec/mips/constants.h b/libavcodec/mips/constants.h
new file mode 100644
index 00000000..0a4effda
--- /dev/null
+++ b/libavcodec/mips/constants.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_CONSTANTS_H
+#define AVCODEC_MIPS_CONSTANTS_H
+
+#include <stdint.h>
+
+extern const uint64_t ff_pw_1;
+extern const uint64_t ff_pw_3;
+extern const uint64_t ff_pw_4;
+extern const uint64_t ff_pw_5;
+extern const uint64_t ff_pw_8;
+extern const uint64_t ff_pw_9;
+extern const uint64_t ff_pw_10;
+extern const uint64_t ff_pw_16;
+extern const uint64_t ff_pw_18;
+extern const uint64_t ff_pw_20;
+extern const uint64_t ff_pw_28;
+extern const uint64_t ff_pw_32;
+extern const uint64_t ff_pw_53;
+extern const uint64_t ff_pw_64;
+extern const uint64_t ff_pw_128;
+extern const uint64_t ff_pw_512;
+extern const uint64_t ff_pw_m8tom5;
+extern const uint64_t ff_pw_m4tom1;
+extern const uint64_t ff_pw_1to4;
+extern const uint64_t ff_pw_5to8;
+extern const uint64_t ff_pw_0to3;
+extern const uint64_t ff_pw_4to7;
+extern const uint64_t ff_pw_8tob;
+extern const uint64_t ff_pw_ctof;
+
+extern const uint64_t ff_pb_1;
+extern const uint64_t ff_pb_3;
+extern const uint64_t ff_pb_80;
+extern const uint64_t ff_pb_A1;
+extern const uint64_t ff_pb_FE;
+
+extern const uint64_t ff_rnd;
+extern const uint64_t ff_rnd2;
+extern const uint64_t ff_rnd3;
+
+extern const uint64_t ff_wm1010;
+extern const uint64_t ff_d40000;
+
+#endif /* AVCODEC_MIPS_CONSTANTS_H */
diff --git a/libavcodec/mips/fft_mips.c b/libavcodec/mips/fft_mips.c
index cf008c65..3cb1a4c1 100644
--- a/libavcodec/mips/fft_mips.c
+++ b/libavcodec/mips/fft_mips.c
@@ -57,6 +57,7 @@
  */
 
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
 {
     int nbits, i, n, num_transforms, offset, step;
@@ -494,6 +495,7 @@ static void ff_imdct_calc_mips(FFTContext *s, FFTSample *output, const FFTSample
         output[n-k-4] = output[n2+k+3];
     }
 }
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM */
 
 av_cold void ff_fft_init_mips(FFTContext *s)
@@ -504,10 +506,12 @@ av_cold void ff_fft_init_mips(FFTContext *s)
     ff_init_ff_cos_tabs(16);
 
 #if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     s->fft_calc     = ff_fft_calc_mips;
 #if CONFIG_MDCT
     s->imdct_calc   = ff_imdct_calc_mips;
     s->imdct_half   = ff_imdct_half_mips;
 #endif
 #endif
+#endif
 }
diff --git a/libavcodec/mips/h263dsp_init_mips.c b/libavcodec/mips/h263dsp_init_mips.c
new file mode 100644
index 00000000..09bd9370
--- /dev/null
+++ b/libavcodec/mips/h263dsp_init_mips.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h263dsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void h263dsp_init_msa(H263DSPContext *c)
+{
+    c->h263_h_loop_filter = ff_h263_h_loop_filter_msa;
+    c->h263_v_loop_filter = ff_h263_v_loop_filter_msa;
+}
+#endif  // #if HAVE_MSA
+
+av_cold void ff_h263dsp_init_mips(H263DSPContext *c)
+{
+#if HAVE_MSA
+    h263dsp_init_msa(c);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/h263dsp_mips.h b/libavcodec/mips/h263dsp_mips.h
new file mode 100644
index 00000000..99a43cd4
--- /dev/null
+++ b/libavcodec/mips/h263dsp_mips.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_H263DSP_MIPS_H
+#define AVCODEC_MIPS_H263DSP_MIPS_H
+
+#include "libavcodec/mpegvideo.h"
+
+void ff_h263_h_loop_filter_msa(uint8_t *src, int stride, int q_scale);
+void ff_h263_v_loop_filter_msa(uint8_t *src, int stride, int q_scale);
+void ff_dct_unquantize_mpeg2_inter_msa(MpegEncContext *s, int16_t *block,
+                                       int32_t index, int32_t q_scale);
+void ff_dct_unquantize_h263_inter_msa(MpegEncContext *s, int16_t *block,
+                                      int32_t index, int32_t q_scale);
+void ff_dct_unquantize_h263_intra_msa(MpegEncContext *s, int16_t *block,
+                                      int32_t index, int32_t q_scale);
+int ff_pix_sum_msa(uint8_t *pix, int line_size);
+
+#endif  // #ifndef AVCODEC_MIPS_H263DSP_MIPS_H
diff --git a/libavcodec/mips/h263dsp_msa.c b/libavcodec/mips/h263dsp_msa.c
new file mode 100644
index 00000000..472bcbd7
--- /dev/null
+++ b/libavcodec/mips/h263dsp_msa.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "h263dsp_mips.h"
+
+static const uint8_t h263_loop_filter_strength_msa[32] = {
+    0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 7,
+    7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12
+};
+
+static void h263_h_loop_filter_msa(uint8_t *src, int32_t stride, int32_t qscale)
+{
+    int32_t strength = h263_loop_filter_strength_msa[qscale];
+    v16u8 in0, in1, in2, in3, in4, in5, in6, in7;
+    v8i16 temp0, temp1, temp2;
+    v8i16 diff0, diff2, diff4, diff6, diff8;
+    v8i16 d0, a_d0, str_x2, str;
+
+    src -= 2;
+    LD_UB8(src, stride, in0, in1, in2, in3, in4, in5, in6, in7);
+    TRANSPOSE8x4_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in0, in3, in2, in1);
+
+    temp0 = (v8i16) __msa_ilvr_b((v16i8) in0, (v16i8) in1);
+    a_d0 = __msa_hsub_u_h((v16u8) temp0, (v16u8) temp0);
+    temp2 = (v8i16) __msa_ilvr_b((v16i8) in2, (v16i8) in3);
+    temp2 = __msa_hsub_u_h((v16u8) temp2, (v16u8) temp2);
+    temp2 <<= 2;
+    diff0 = a_d0 + temp2;
+    diff2 = -(-diff0 >> 3);
+    str_x2 = __msa_fill_h(-(strength << 1));
+    temp0 = (str_x2 <= diff2);
+    diff2 = (v8i16) __msa_bmz_v((v16u8) diff2, (v16u8) temp0, (v16u8) temp0);
+    temp2 = str_x2 - diff2;
+    str = __msa_fill_h(-strength);
+    temp0 = (diff2 < str);
+    diff2 = (v8i16) __msa_bmnz_v((v16u8) diff2, (v16u8) temp2, (v16u8) temp0);
+    diff4 = diff0 >> 3;
+    str_x2 = __msa_fill_h(strength << 1);
+    temp0 = (diff4 <= str_x2);
+    diff4 = (v8i16) __msa_bmz_v((v16u8) diff4, (v16u8) temp0, (v16u8) temp0);
+    temp2 = str_x2 - diff4;
+    str = __msa_fill_h(strength);
+    temp0 = (str < diff4);
+    diff4 = (v8i16) __msa_bmnz_v((v16u8) diff4, (v16u8) temp2, (v16u8) temp0);
+    temp0 = __msa_clti_s_h(diff0, 0);
+    d0 = (v8i16) __msa_bmnz_v((v16u8) diff4, (v16u8) diff2, (v16u8) temp0);
+    diff2 = -diff2 >> 1;
+    diff4 >>= 1;
+    diff8 = (v8i16) __msa_bmnz_v((v16u8) diff4, (v16u8) diff2, (v16u8) temp0);
+    diff6 = (-a_d0) >> 2;
+    diff6 = -(diff6);
+    temp2 = -diff8;
+    temp0 = (diff6 < temp2);
+    diff6 = (v8i16) __msa_bmnz_v((v16u8) diff6, (v16u8) temp2, (v16u8) temp0);
+    diff2 = a_d0 >> 2;
+    temp0 = (diff2 <= diff8);
+    diff2 = (v8i16) __msa_bmz_v((v16u8) diff2, (v16u8) diff8, (v16u8) temp0);
+    temp0 = __msa_clti_s_h(a_d0, 0);
+    diff6 = (v8i16) __msa_bmz_v((v16u8) diff6, (v16u8) diff2, (v16u8) temp0);
+    PCKEV_B2_SH(a_d0, diff6, a_d0, d0, diff6, d0);
+    in0 = (v16u8) ((v16i8) in0 - (v16i8) diff6);
+    in1 = (v16u8) ((v16i8) in1 + (v16i8) diff6);
+    in3 = __msa_xori_b(in3, 128);
+    in3 = (v16u8) __msa_adds_s_b((v16i8) in3, (v16i8) d0);
+    in3 = __msa_xori_b(in3, 128);
+    in2 = __msa_subsus_u_b(in2, (v16i8) d0);
+    ILVR_B2_SH(in3, in0, in1, in2, temp0, temp1);
+    in0 = (v16u8) __msa_ilvr_h(temp1, temp0);
+    in3 = (v16u8) __msa_ilvl_h(temp1, temp0);
+    ST4x4_UB(in0, in0, 0, 1, 2, 3, src, stride);
+    src += 4 * stride;
+    ST4x4_UB(in3, in3, 0, 1, 2, 3, src, stride);
+    src += 4 * stride;
+}
+
+static void h263_v_loop_filter_msa(uint8_t *src, int32_t stride, int32_t qscale)
+{
+    int32_t strength = h263_loop_filter_strength_msa[qscale];
+    uint64_t res0, res1, res2, res3;
+    v16u8 in0, in1, in2, in3;
+    v8i16 temp0, temp2, diff0, diff2, diff4, diff6, diff8;
+    v8i16 d0, a_d0, str_x2, str;
+
+    src -= 2 * stride;
+    LD_UB4(src, stride, in0, in3, in2, in1);
+    temp0 = (v8i16) __msa_ilvr_b((v16i8) in0, (v16i8) in1);
+    a_d0 = __msa_hsub_u_h((v16u8) temp0, (v16u8) temp0);
+    temp2 = (v8i16) __msa_ilvr_b((v16i8) in2, (v16i8) in3);
+    temp2 = __msa_hsub_u_h((v16u8) temp2, (v16u8) temp2);
+    temp2 <<= 2;
+    diff0 = a_d0 + temp2;
+    diff2 = -(-diff0 >> 3);
+    str_x2 = __msa_fill_h(-(strength << 1));
+    temp0 = (str_x2 <= diff2);
+    diff2 = (v8i16) __msa_bmz_v((v16u8) diff2, (v16u8) temp0, (v16u8) temp0);
+    temp2 = str_x2 - diff2;
+    str = __msa_fill_h(-strength);
+    temp0 = (diff2 < str);
+    diff2 = (v8i16) __msa_bmnz_v((v16u8) diff2, (v16u8) temp2, (v16u8) temp0);
+    diff4 = diff0 >> 3;
+    str_x2 = __msa_fill_h(strength << 1);
+    temp0 = (diff4 <= str_x2);
+    diff4 = (v8i16) __msa_bmz_v((v16u8) diff4, (v16u8) temp0, (v16u8) temp0);
+    temp2 = str_x2 - diff4;
+    str = __msa_fill_h(strength);
+    temp0 = (str < diff4);
+    diff4 = (v8i16) __msa_bmnz_v((v16u8) diff4, (v16u8) temp2, (v16u8) temp0);
+    temp0 = __msa_clti_s_h(diff0, 0);
+    d0 = (v8i16) __msa_bmnz_v((v16u8) diff4, (v16u8) diff2, (v16u8) temp0);
+    diff2 = -diff2 >> 1;
+    diff4 >>= 1;
+    diff8 = (v8i16) __msa_bmnz_v((v16u8) diff4, (v16u8) diff2, (v16u8) temp0);
+    diff6 = (-a_d0) >> 2;
+    diff6 = -(diff6);
+    temp2 = -diff8;
+    temp0 = (diff6 < temp2);
+    diff6 = (v8i16) __msa_bmnz_v((v16u8) diff6, (v16u8) temp2, (v16u8) temp0);
+    diff2 = a_d0 >> 2;
+    temp0 = (diff2 <= diff8);
+    diff2 = (v8i16) __msa_bmz_v((v16u8) diff2, (v16u8) diff8, (v16u8) temp0);
+    temp0 = __msa_clti_s_h(a_d0, 0);
+    diff6 = (v8i16) __msa_bmz_v((v16u8) diff6, (v16u8) diff2, (v16u8) temp0);
+    PCKEV_B2_SH(a_d0, diff6, a_d0, d0, diff6, d0);
+    in0 = (v16u8) ((v16i8) in0 - (v16i8) diff6);
+    in1 = (v16u8) ((v16i8) in1 + (v16i8) diff6);
+    in3 = __msa_xori_b(in3, 128);
+    in3 = (v16u8) __msa_adds_s_b((v16i8) in3, (v16i8) d0);
+    in3 = __msa_xori_b(in3, 128);
+    in2 = __msa_subsus_u_b(in2, (v16i8) d0);
+    res0 = __msa_copy_u_d((v2i64) in0, 0);
+    res1 = __msa_copy_u_d((v2i64) in3, 0);
+    res2 = __msa_copy_u_d((v2i64) in2, 0);
+    res3 = __msa_copy_u_d((v2i64) in1, 0);
+    SD4(res0, res1, res2, res3, src, stride);
+}
+
+void ff_h263_h_loop_filter_msa(uint8_t *src, int32_t stride, int32_t q_scale)
+{
+    h263_h_loop_filter_msa(src, stride, q_scale);
+}
+
+void ff_h263_v_loop_filter_msa(uint8_t *src, int32_t stride, int32_t q_scale)
+{
+    h263_v_loop_filter_msa(src, stride, q_scale);
+}
diff --git a/libavcodec/mips/h264chroma_init_mips.c b/libavcodec/mips/h264chroma_init_mips.c
index 4c10da74..122148dc 100644
--- a/libavcodec/mips/h264chroma_init_mips.c
+++ b/libavcodec/mips/h264chroma_init_mips.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
  *
  * This file is part of FFmpeg.
  *
@@ -20,7 +21,24 @@
 
 #include "h264chroma_mips.h"
 
-#if HAVE_LOONGSON3
+#if HAVE_MSA
+static av_cold void h264chroma_init_msa(H264ChromaContext *c, int bit_depth)
+{
+    const int high_bit_depth = bit_depth > 8;
+
+    if (!high_bit_depth) {
+        c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_msa;
+        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_msa;
+        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_msa;
+
+        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_msa;
+        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_msa;
+        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_msa;
+    }
+}
+#endif  // #if HAVE_MSA
+
+#if HAVE_MMI
 static av_cold void h264chroma_init_mmi(H264ChromaContext *c, int bit_depth)
 {
     int high_bit_depth = bit_depth > 8;
@@ -32,11 +50,14 @@ static av_cold void h264chroma_init_mmi(H264ChromaContext *c, int bit_depth)
         c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmi;
     }
 }
-#endif /* HAVE_LOONGSON3 */
+#endif /* HAVE_MMI */
 
 av_cold void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth)
 {
-#if HAVE_LOONGSON3
+#if HAVE_MSA
+    h264chroma_init_msa(c, bit_depth);
+#endif  // #if HAVE_MSA
+#if HAVE_MMI
     h264chroma_init_mmi(c, bit_depth);
-#endif /* HAVE_LOONGSON3 */
+#endif /* HAVE_MMI */
 }
diff --git a/libavcodec/mips/h264chroma_mips.h b/libavcodec/mips/h264chroma_mips.h
index 314e8a38..0ef6c746 100644
--- a/libavcodec/mips/h264chroma_mips.h
+++ b/libavcodec/mips/h264chroma_mips.h
@@ -18,10 +18,22 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef H264_CHROMA_MIPS_H
-#define H264_CHROMA_MIPS_H
+#ifndef AVCODEC_MIPS_H264CHROMA_MIPS_H
+#define AVCODEC_MIPS_H264CHROMA_MIPS_H
 
 #include "libavcodec/h264.h"
+void ff_put_h264_chroma_mc8_msa(uint8_t *dst, uint8_t *src, int stride,
+                                int height, int x, int y);
+void ff_put_h264_chroma_mc4_msa(uint8_t *dst, uint8_t *src, int stride,
+                                int height, int x, int y);
+void ff_put_h264_chroma_mc2_msa(uint8_t *dst, uint8_t *src, int stride,
+                                int height, int x, int y);
+void ff_avg_h264_chroma_mc8_msa(uint8_t *dst, uint8_t *src, int stride,
+                                int height, int x, int y);
+void ff_avg_h264_chroma_mc4_msa(uint8_t *dst, uint8_t *src, int stride,
+                                int height, int x, int y);
+void ff_avg_h264_chroma_mc2_msa(uint8_t *dst, uint8_t *src, int stride,
+                                int height, int x, int y);
 
 void ff_put_h264_chroma_mc8_mmi(uint8_t *dst, uint8_t *src, int stride,
         int h, int x, int y);
@@ -32,4 +44,4 @@ void ff_put_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
 void ff_avg_h264_chroma_mc4_mmi(uint8_t *dst, uint8_t *src, int stride,
         int h, int x, int y);
 
-#endif /* H264_CHROMA_MIPS_H */
+#endif /* AVCODEC_MIPS_H264CHROMA_MIPS_H */
diff --git a/libavcodec/mips/h264chroma_msa.c b/libavcodec/mips/h264chroma_msa.c
new file mode 100644
index 00000000..67d0bc12
--- /dev/null
+++ b/libavcodec/mips/h264chroma_msa.c
@@ -0,0 +1,2003 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "h264chroma_mips.h"
+
+static const uint8_t chroma_mask_arr[16 * 5] = {
+    0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20,
+    0, 2, 2, 4, 4, 6, 6, 8, 16, 18, 18, 20, 20, 22, 22, 24,
+    0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+    0, 1, 1, 2, 16, 17, 17, 18, 4, 5, 5, 6, 6, 7, 7, 8,
+    0, 1, 1, 2, 16, 17, 17, 18, 16, 17, 17, 18, 18, 19, 19, 20
+};
+
+static void avc_chroma_hz_2x2_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coeff0, uint32_t coeff1)
+{
+    uint16_t out0, out1;
+    v16i8 src0, src1;
+    v8u16 res_r;
+    v8i16 res;
+    v16i8 mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[0]);
+
+    LD_SB2(src, src_stride, src0, src1);
+
+    src0 = __msa_vshf_b(mask, src1, src0);
+    res_r = __msa_dotp_u_h((v16u8) src0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+
+    out0 = __msa_copy_u_h(res, 0);
+    out1 = __msa_copy_u_h(res, 2);
+
+    SH(out0, dst);
+    dst += dst_stride;
+    SH(out1, dst);
+}
+
+static void avc_chroma_hz_2x4_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coeff0, uint32_t coeff1)
+{
+    v16u8 src0, src1, src2, src3;
+    v8u16 res_r;
+    v8i16 res;
+    v16i8 mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[64]);
+
+    LD_UB4(src, src_stride, src0, src1, src2, src3);
+
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, src0, src2);
+
+    src0 = (v16u8) __msa_ilvr_d((v2i64) src2, (v2i64) src0);
+
+    res_r = __msa_dotp_u_h(src0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+}
+
+static void avc_chroma_hz_2x8_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coeff0, uint32_t coeff1)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v8u16 res_r;
+    v8i16 res;
+    v16i8 mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[64]);
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, src0, src2);
+    VSHF_B2_UB(src4, src5, src6, src7, mask, mask, src4, src6);
+
+    ILVR_D2_UB(src2, src0, src6, src4, src0, src4);
+
+    res_r = __msa_dotp_u_h(src0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    res_r = __msa_dotp_u_h(src4, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+}
+
+static void avc_chroma_hz_2w_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint32_t coeff0, uint32_t coeff1,
+                                 int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_hz_2x2_msa(src, src_stride, dst, dst_stride, coeff0, coeff1);
+    } else if (4 == height) {
+        avc_chroma_hz_2x4_msa(src, src_stride, dst, dst_stride, coeff0, coeff1);
+    } else if (8 == height) {
+        avc_chroma_hz_2x8_msa(src, src_stride, dst, dst_stride, coeff0, coeff1);
+    }
+}
+
+static void avc_chroma_hz_4x2_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coeff0, uint32_t coeff1)
+{
+    v16i8 src0, src1;
+    v8u16 res_r;
+    v4i32 res;
+    v16i8 mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[0]);
+
+    LD_SB2(src, src_stride, src0, src1);
+
+    src0 = __msa_vshf_b(mask, src1, src0);
+    res_r = __msa_dotp_u_h((v16u8) src0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+    res = (v4i32) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+
+    ST4x2_UB(res, dst, dst_stride);
+}
+
+static void avc_chroma_hz_4x4multiple_msa(uint8_t *src, int32_t src_stride,
+                                          uint8_t *dst, int32_t dst_stride,
+                                          uint32_t coeff0, uint32_t coeff1,
+                                          int32_t height)
+{
+    uint32_t row;
+    v16u8 src0, src1, src2, src3;
+    v8u16 res0_r, res1_r;
+    v4i32 res0, res1;
+    v16i8 mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[0]);
+
+    for (row = (height >> 2); row--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        VSHF_B2_UB(src0, src1, src2, src3, mask, mask, src0, src2);
+        DOTP_UB2_UH(src0, src2, coeff_vec, coeff_vec, res0_r, res1_r);
+
+        res0_r <<= 3;
+        res1_r <<= 3;
+
+        SRARI_H2_UH(res0_r, res1_r, 6);
+        SAT_UH2_UH(res0_r, res1_r, 7);
+        PCKEV_B2_SW(res0_r, res0_r, res1_r, res1_r, res0, res1);
+
+        ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avc_chroma_hz_4w_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint32_t coeff0, uint32_t coeff1,
+                                 int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_hz_4x2_msa(src, src_stride, dst, dst_stride, coeff0, coeff1);
+    } else {
+        avc_chroma_hz_4x4multiple_msa(src, src_stride, dst, dst_stride, coeff0,
+                                      coeff1, height);
+    }
+}
+
+static void avc_chroma_hz_8w_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint32_t coeff0, uint32_t coeff1,
+                                 int32_t height)
+{
+    uint32_t row;
+    v16u8 src0, src1, src2, src3, out0, out1;
+    v8u16 res0, res1, res2, res3;
+    v16i8 mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[32]);
+
+    for (row = height >> 2; row--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        VSHF_B2_UB(src0, src0, src1, src1, mask, mask, src0, src1);
+        VSHF_B2_UB(src2, src2, src3, src3, mask, mask, src2, src3);
+        DOTP_UB4_UH(src0, src1, src2, src3, coeff_vec, coeff_vec, coeff_vec,
+                    coeff_vec, res0, res1, res2, res3);
+        SLLI_4V(res0, res1, res2, res3, 3);
+        SRARI_H4_UH(res0, res1, res2, res3, 6);
+        SAT_UH4_UH(res0, res1, res2, res3, 7);
+        PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+
+    if (0 != (height % 4)) {
+        for (row = (height % 4); row--;) {
+            src0 = LD_UB(src);
+            src += src_stride;
+
+            src0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, (v16i8) src0);
+
+            res0 = __msa_dotp_u_h(src0, coeff_vec);
+            res0 <<= 3;
+            res0 = (v8u16) __msa_srari_h((v8i16) res0, 6);
+            res0 = __msa_sat_u_h(res0, 7);
+            res0 = (v8u16) __msa_pckev_b((v16i8) res0, (v16i8) res0);
+
+            ST8x1_UB(res0, dst);
+            dst += dst_stride;
+        }
+    }
+}
+
+static void avc_chroma_vt_2x2_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coeff0, uint32_t coeff1)
+{
+    uint16_t out0, out1;
+    v16i8 src0, src1, src2;
+    v16u8 tmp0, tmp1;
+    v8i16 res;
+    v8u16 res_r;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+
+    ILVR_B2_UB(src1, src0, src2, src1, tmp0, tmp1);
+
+    tmp0 = (v16u8) __msa_ilvr_d((v2i64) tmp1, (v2i64) tmp0);
+
+    res_r = __msa_dotp_u_h(tmp0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+
+    out0 = __msa_copy_u_h(res, 0);
+    out1 = __msa_copy_u_h(res, 2);
+
+    SH(out0, dst);
+    dst += dst_stride;
+    SH(out1, dst);
+}
+
+static void avc_chroma_vt_2x4_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coeff0, uint32_t coeff1)
+{
+    v16u8 src0, src1, src2, src3, src4;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8i16 res;
+    v8u16 res_r;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+               tmp0, tmp1, tmp2, tmp3);
+    ILVR_W2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
+
+    tmp0 = (v16u8) __msa_ilvr_d((v2i64) tmp2, (v2i64) tmp0);
+
+    res_r = __msa_dotp_u_h(tmp0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+
+    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+}
+
+static void avc_chroma_vt_2x8_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coeff0, uint32_t coeff1)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8i16 res;
+    v8u16 res_r;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+    LD_UB4(src, src_stride, src5, src6, src7, src8);
+
+    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+               tmp0, tmp1, tmp2, tmp3);
+    ILVR_W2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
+
+    tmp0 = (v16u8) __msa_ilvr_d((v2i64) tmp2, (v2i64) tmp0);
+
+    res_r = __msa_dotp_u_h(tmp0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+
+    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7,
+               tmp0, tmp1, tmp2, tmp3);
+    ILVR_W2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
+
+    tmp0 = (v16u8) __msa_ilvr_d((v2i64) tmp2, (v2i64) tmp0);
+
+    res_r = __msa_dotp_u_h(tmp0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+
+    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+    dst += (4 * dst_stride);
+}
+
+static void avc_chroma_vt_2w_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint32_t coeff0, uint32_t coeff1,
+                                 int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_vt_2x2_msa(src, src_stride, dst, dst_stride, coeff0, coeff1);
+    } else if (4 == height) {
+        avc_chroma_vt_2x4_msa(src, src_stride, dst, dst_stride, coeff0, coeff1);
+    } else if (8 == height) {
+        avc_chroma_vt_2x8_msa(src, src_stride, dst, dst_stride, coeff0, coeff1);
+    }
+}
+
+static void avc_chroma_vt_4x2_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coeff0, uint32_t coeff1)
+{
+    v16u8 src0, src1, src2;
+    v16u8 tmp0, tmp1;
+    v4i32 res;
+    v8u16 res_r;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    LD_UB3(src, src_stride, src0, src1, src2);
+    ILVR_B2_UB(src1, src0, src2, src1, tmp0, tmp1);
+
+    tmp0 = (v16u8) __msa_ilvr_d((v2i64) tmp1, (v2i64) tmp0);
+    res_r = __msa_dotp_u_h(tmp0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+    res = (v4i32) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+
+    ST4x2_UB(res, dst, dst_stride);
+}
+
+static void avc_chroma_vt_4x4multiple_msa(uint8_t *src, int32_t src_stride,
+                                          uint8_t *dst, int32_t dst_stride,
+                                          uint32_t coeff0, uint32_t coeff1,
+                                          int32_t height)
+{
+    uint32_t row;
+    v16u8 src0, src1, src2, src3, src4;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8u16 res0_r, res1_r;
+    v4i32 res0, res1;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (row = (height >> 2); row--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+                   tmp0, tmp1, tmp2, tmp3);
+        ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
+        DOTP_UB2_UH(tmp0, tmp2, coeff_vec, coeff_vec, res0_r, res1_r);
+
+        res0_r <<= 3;
+        res1_r <<= 3;
+
+        SRARI_H2_UH(res0_r, res1_r, 6);
+        SAT_UH2_UH(res0_r, res1_r, 7);
+        PCKEV_B2_SW(res0_r, res0_r, res1_r, res1_r, res0, res1);
+
+        ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+        dst += (4 * dst_stride);
+        src0 = src4;
+    }
+}
+
+static void avc_chroma_vt_4w_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint32_t coeff0, uint32_t coeff1,
+                                 int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_vt_4x2_msa(src, src_stride, dst, dst_stride, coeff0, coeff1);
+    } else {
+        avc_chroma_vt_4x4multiple_msa(src, src_stride, dst, dst_stride, coeff0,
+                                      coeff1, height);
+    }
+}
+
+static void avc_chroma_vt_8w_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint32_t coeff0, uint32_t coeff1,
+                                 int32_t height)
+{
+    uint32_t row;
+    v16u8 src0, src1, src2, src3, src4, out0, out1;
+    v8u16 res0, res1, res2, res3;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (row = height >> 2; row--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+                   src0, src1, src2, src3);
+        DOTP_UB4_UH(src0, src1, src2, src3, coeff_vec, coeff_vec, coeff_vec,
+                    coeff_vec, res0, res1, res2, res3);
+        SLLI_4V(res0, res1, res2, res3, 3);
+        SRARI_H4_UH(res0, res1, res2, res3, 6);
+        SAT_UH4_UH(res0, res1, res2, res3, 7);
+        PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
+
+        ST8x4_UB(out0, out1, dst, dst_stride);
+
+        dst += (4 * dst_stride);
+        src0 = src4;
+    }
+}
+
+static void avc_chroma_hv_2x2_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coef_hor0, uint32_t coef_hor1,
+                                  uint32_t coef_ver0, uint32_t coef_ver1)
+{
+    uint16_t out0, out1;
+    v16u8 src0, src1, src2;
+    v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
+    v8i16 res_vert;
+    v16i8 mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+
+    mask = LD_SB(&chroma_mask_arr[48]);
+
+    LD_UB3(src, src_stride, src0, src1, src2);
+    VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
+    DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
+    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
+
+    res_vt0 += res_vt1;
+    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
+    res_vt0 = __msa_sat_u_h(res_vt0, 7);
+    res_vert = (v8i16) __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
+
+    out0 = __msa_copy_u_h(res_vert, 0);
+    out1 = __msa_copy_u_h(res_vert, 1);
+
+    SH(out0, dst);
+    dst += dst_stride;
+    SH(out1, dst);
+}
+
+static void avc_chroma_hv_2x4_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coef_hor0, uint32_t coef_hor1,
+                                  uint32_t coef_ver0, uint32_t coef_ver1)
+{
+    v16u8 src0, src1, src2, src3, src4;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
+    v8i16 res;
+    v16i8 mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+
+    mask = LD_SB(&chroma_mask_arr[48]);
+
+    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, tmp0, tmp1);
+    VSHF_B2_UB(src1, src2, src3, src4, mask, mask, tmp2, tmp3);
+    ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
+    DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
+    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
+
+    res_vt0 += res_vt1;
+    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
+    res_vt0 = __msa_sat_u_h(res_vt0, 7);
+    res = (v8i16) __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+}
+
+static void avc_chroma_hv_2x8_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coef_hor0, uint32_t coef_hor1,
+                                  uint32_t coef_ver0, uint32_t coef_ver1)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
+    v8i16 res;
+    v16i8 mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+
+    mask = LD_SB(&chroma_mask_arr[48]);
+
+    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+    LD_UB4(src, src_stride, src5, src6, src7, src8);
+
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, tmp0, tmp1);
+    VSHF_B2_UB(src1, src2, src3, src4, mask, mask, tmp2, tmp3);
+    ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
+    VSHF_B2_UB(src4, src5, src6, src7, mask, mask, tmp0, tmp1);
+    VSHF_B2_UB(src5, src6, src7, src8, mask, mask, tmp2, tmp3);
+    ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src4, src5);
+    DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
+    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
+
+    res_vt0 += res_vt1;
+    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
+    res_vt0 = __msa_sat_u_h(res_vt0, 7);
+
+    res = (v8i16) __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    DOTP_UB2_UH(src4, src5, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
+    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
+
+    res_vt0 += res_vt1;
+    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
+    res_vt0 = __msa_sat_u_h(res_vt0, 7);
+
+    res = (v8i16) __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+}
+
+static void avc_chroma_hv_2w_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint32_t coef_hor0, uint32_t coef_hor1,
+                                 uint32_t coef_ver0, uint32_t coef_ver1,
+                                 int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_hv_2x2_msa(src, src_stride, dst, dst_stride, coef_hor0,
+                              coef_hor1, coef_ver0, coef_ver1);
+    } else if (4 == height) {
+        avc_chroma_hv_2x4_msa(src, src_stride, dst, dst_stride, coef_hor0,
+                              coef_hor1, coef_ver0, coef_ver1);
+    } else if (8 == height) {
+        avc_chroma_hv_2x8_msa(src, src_stride, dst, dst_stride, coef_hor0,
+                              coef_hor1, coef_ver0, coef_ver1);
+    }
+}
+
+static void avc_chroma_hv_4x2_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint32_t coef_hor0, uint32_t coef_hor1,
+                                  uint32_t coef_ver0, uint32_t coef_ver1)
+{
+    v16u8 src0, src1, src2;
+    v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
+    v16i8 mask;
+    v4i32 res;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+
+    mask = LD_SB(&chroma_mask_arr[0]);
+    LD_UB3(src, src_stride, src0, src1, src2);
+    VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
+    DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
+    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
+
+    res_vt0 += res_vt1;
+    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
+    res_vt0 = __msa_sat_u_h(res_vt0, 7);
+    res = (v4i32) __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
+
+    ST4x2_UB(res, dst, dst_stride);
+}
+
+static void avc_chroma_hv_4x4multiple_msa(uint8_t *src, int32_t src_stride,
+                                          uint8_t *dst, int32_t dst_stride,
+                                          uint32_t coef_hor0,
+                                          uint32_t coef_hor1,
+                                          uint32_t coef_ver0,
+                                          uint32_t coef_ver1,
+                                          int32_t height)
+{
+    uint32_t row;
+    v16u8 src0, src1, src2, src3, src4;
+    v8u16 res_hz0, res_hz1, res_hz2, res_hz3;
+    v8u16 res_vt0, res_vt1, res_vt2, res_vt3;
+    v16i8 mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+    v4i32 res0, res1;
+
+    mask = LD_SB(&chroma_mask_arr[0]);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (row = (height >> 2); row--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
+        VSHF_B2_UB(src2, src3, src3, src4, mask, mask, src2, src3);
+        DOTP_UB4_UH(src0, src1, src2, src3, coeff_hz_vec, coeff_hz_vec,
+                    coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1, res_hz2,
+                    res_hz3);
+        MUL4(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_hz2,
+             coeff_vt_vec1, res_hz3, coeff_vt_vec0, res_vt0, res_vt1, res_vt2,
+             res_vt3);
+        ADD2(res_vt0, res_vt1, res_vt2, res_vt3, res_vt0, res_vt1);
+        SRARI_H2_UH(res_vt0, res_vt1, 6);
+        SAT_UH2_UH(res_vt0, res_vt1, 7);
+        PCKEV_B2_SW(res_vt0, res_vt0, res_vt1, res_vt1, res0, res1);
+
+        ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+        dst += (4 * dst_stride);
+        src0 = src4;
+    }
+}
+
+static void avc_chroma_hv_4w_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint32_t coef_hor0, uint32_t coef_hor1,
+                                 uint32_t coef_ver0, uint32_t coef_ver1,
+                                 int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_hv_4x2_msa(src, src_stride, dst, dst_stride, coef_hor0,
+                              coef_hor1, coef_ver0, coef_ver1);
+    } else {
+        avc_chroma_hv_4x4multiple_msa(src, src_stride, dst, dst_stride,
+                                      coef_hor0, coef_hor1, coef_ver0,
+                                      coef_ver1, height);
+    }
+}
+
+static void avc_chroma_hv_8w_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint32_t coef_hor0, uint32_t coef_hor1,
+                                 uint32_t coef_ver0, uint32_t coef_ver1,
+                                 int32_t height)
+{
+    uint32_t row;
+    v16u8 src0, src1, src2, src3, src4, out0, out1;
+    v8u16 res_hz0, res_hz1, res_hz2, res_hz3, res_hz4;
+    v8u16 res_vt0, res_vt1, res_vt2, res_vt3;
+    v16i8 mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+
+    mask = LD_SB(&chroma_mask_arr[32]);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    src0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, (v16i8) src0);
+    res_hz0 = __msa_dotp_u_h(src0, coeff_hz_vec);
+
+    for (row = (height >> 2); row--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        VSHF_B2_UB(src1, src1, src2, src2, mask, mask, src1, src2);
+        VSHF_B2_UB(src3, src3, src4, src4, mask, mask, src3, src4);
+        DOTP_UB4_UH(src1, src2, src3, src4, coeff_hz_vec, coeff_hz_vec,
+                    coeff_hz_vec, coeff_hz_vec, res_hz1, res_hz2, res_hz3,
+                    res_hz4);
+        MUL4(res_hz1, coeff_vt_vec0, res_hz2, coeff_vt_vec0, res_hz3,
+             coeff_vt_vec0, res_hz4, coeff_vt_vec0, res_vt0, res_vt1, res_vt2,
+             res_vt3);
+
+        res_vt0 += (res_hz0 * coeff_vt_vec1);
+        res_vt1 += (res_hz1 * coeff_vt_vec1);
+        res_vt2 += (res_hz2 * coeff_vt_vec1);
+        res_vt3 += (res_hz3 * coeff_vt_vec1);
+
+        SRARI_H4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 6);
+        SAT_UH4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 7);
+        PCKEV_B2_UB(res_vt1, res_vt0, res_vt3, res_vt2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+
+        dst += (4 * dst_stride);
+
+        res_hz0 = res_hz4;
+    }
+}
+
+static void avc_chroma_hz_and_aver_dst_2x2_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coeff0, uint32_t coeff1)
+{
+    uint16_t out0, out1;
+    uint32_t load0, load1;
+    v16i8 src0, src1;
+    v16u8 dst_data = { 0 };
+    v8u16 res_r;
+    v16u8 res;
+    v16i8 mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[0]);
+
+    LD_SB2(src, src_stride, src0, src1);
+
+    load0 = LW(dst);
+    load1 = LW(dst + dst_stride);
+
+    INSERT_W2_UB(load0, load1, dst_data);
+
+    src0 = __msa_vshf_b(mask, src1, src0);
+
+    res_r = __msa_dotp_u_h((v16u8) src0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+
+    res = (v16u8) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+    dst_data = __msa_aver_u_b(res, dst_data);
+
+    out0 = __msa_copy_u_h((v8i16) dst_data, 0);
+    out1 = __msa_copy_u_h((v8i16) dst_data, 2);
+
+    SH(out0, dst);
+    dst += dst_stride;
+    SH(out1, dst);
+}
+
+static void avc_chroma_hz_and_aver_dst_2x4_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coeff0, uint32_t coeff1)
+{
+    v16u8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v8u16 res_r;
+    v16i8 res, mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[64]);
+
+    LD_UB4(src, src_stride, src0, src1, src2, src3);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 2, (v8i16) dst2);
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 3, (v8i16) dst3);
+
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, src0, src2);
+
+    src0 = (v16u8) __msa_ilvr_d((v2i64) src2, (v2i64) src0);
+
+    res_r = __msa_dotp_u_h(src0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+
+    res = __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+    dst0 = __msa_aver_u_b((v16u8) res, dst0);
+
+    ST2x4_UB(dst0, 0, dst, dst_stride);
+}
+
+static void avc_chroma_hz_and_aver_dst_2x8_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coeff0, uint32_t coeff1)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8u16 res0_r, res1_r;
+    v16u8 res0, res1, mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_UB(&chroma_mask_arr[64]);
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 2, (v8i16) dst2);
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 3, (v8i16) dst3);
+
+    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 1, (v8i16) dst5);
+    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 2, (v8i16) dst6);
+    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 3, (v8i16) dst7);
+
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, src0, src2);
+    VSHF_B2_UB(src4, src5, src6, src7, mask, mask, src4, src6);
+    ILVR_D2_UB(src2, src0, src6, src4, src0, src4);
+    DOTP_UB2_UH(src0, src4, coeff_vec, coeff_vec, res0_r, res1_r);
+
+    res0_r <<= 3;
+    res1_r <<= 3;
+
+    SRARI_H2_UH(res0_r, res1_r, 6);
+    SAT_UH2_UH(res0_r, res1_r, 7);
+    PCKEV_B2_UB(res0_r, res0_r, res1_r, res1_r, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst4, dst0, dst4);
+
+    ST2x4_UB(dst0, 0, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST2x4_UB(dst4, 0, dst, dst_stride);
+}
+
+static void avc_chroma_hz_and_aver_dst_2w_msa(uint8_t *src, int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint32_t coeff0, uint32_t coeff1,
+                                              int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_hz_and_aver_dst_2x2_msa(src, src_stride, dst, dst_stride,
+                                           coeff0, coeff1);
+    } else if (4 == height) {
+        avc_chroma_hz_and_aver_dst_2x4_msa(src, src_stride, dst, dst_stride,
+                                           coeff0, coeff1);
+    } else if (8 == height) {
+        avc_chroma_hz_and_aver_dst_2x8_msa(src, src_stride, dst, dst_stride,
+                                           coeff0, coeff1);
+    }
+}
+
+static void avc_chroma_hz_and_aver_dst_4x2_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coeff0, uint32_t coeff1)
+{
+    uint32_t load0, load1;
+    v16i8 src0, src1;
+    v16u8 dst_data = { 0 };
+    v8u16 res_r;
+    v16i8 res, mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[0]);
+
+    LD_SB2(src, src_stride, src0, src1);
+
+    load0 = LW(dst);
+    load1 = LW(dst + dst_stride);
+
+    INSERT_W2_UB(load0, load1, dst_data);
+
+    src0 = __msa_vshf_b(mask, src1, src0);
+
+    res_r = __msa_dotp_u_h((v16u8) src0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+    res = __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+    dst_data = __msa_aver_u_b((v16u8) res, dst_data);
+
+    ST4x2_UB(dst_data, dst, dst_stride);
+}
+
+static void avc_chroma_hz_and_aver_dst_4x4multiple_msa(uint8_t *src,
+                                                       int32_t src_stride,
+                                                       uint8_t *dst,
+                                                       int32_t dst_stride,
+                                                       uint32_t coeff0,
+                                                       uint32_t coeff1,
+                                                       int32_t height)
+{
+    uint32_t load0, load1;
+    uint32_t row;
+    v16u8 src0, src1, src2, src3;
+    v16u8 dst0 = { 0 };
+    v16u8 dst1 = { 0 };
+    v8u16 res0_r, res1_r;
+    v16u8 res0, res1, mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_UB(&chroma_mask_arr[0]);
+
+    for (row = (height >> 2); row--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        load0 = LW(dst);
+        load1 = LW(dst + dst_stride);
+
+        INSERT_W2_UB(load0, load1, dst0);
+
+        load0 = LW(dst + 2 * dst_stride);
+        load1 = LW(dst + 3 * dst_stride);
+
+        INSERT_W2_UB(load0, load1, dst1);
+
+        VSHF_B2_UB(src0, src1, src2, src3, mask, mask, src0, src2);
+        DOTP_UB2_UH(src0, src2, coeff_vec, coeff_vec, res0_r, res1_r);
+
+        res0_r <<= 3;
+        res1_r <<= 3;
+
+        SRARI_H2_UH(res0_r, res1_r, 6);
+        SAT_UH2_UH(res0_r, res1_r, 7);
+        PCKEV_B2_UB(res0_r, res0_r, res1_r, res1_r, res0, res1);
+        AVER_UB2_UB(res0, dst0, res1, dst1, dst0, dst1);
+
+        ST4x4_UB(dst0, dst1, 0, 1, 0, 1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avc_chroma_hz_and_aver_dst_4w_msa(uint8_t *src, int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint32_t coeff0, uint32_t coeff1,
+                                              int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_hz_and_aver_dst_4x2_msa(src, src_stride, dst, dst_stride,
+                                           coeff0, coeff1);
+    } else {
+        avc_chroma_hz_and_aver_dst_4x4multiple_msa(src, src_stride,
+                                                   dst, dst_stride,
+                                                   coeff0, coeff1, height);
+    }
+}
+
+static void avc_chroma_hz_and_aver_dst_8w_msa(uint8_t *src, int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint32_t coeff0, uint32_t coeff1,
+                                              int32_t height)
+{
+    uint32_t row;
+    v16u8 src0, src1, src2, src3, out0, out1;
+    v8u16 res0, res1, res2, res3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 mask;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    mask = LD_SB(&chroma_mask_arr[32]);
+
+    for (row = height >> 2; row--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        VSHF_B2_UB(src0, src0, src1, src1, mask, mask, src0, src1);
+        VSHF_B2_UB(src2, src2, src3, src3, mask, mask, src2, src3);
+        DOTP_UB4_UH(src0, src1, src2, src3, coeff_vec, coeff_vec, coeff_vec,
+                    coeff_vec, res0, res1, res2, res3);
+        SLLI_4V(res0, res1, res2, res3, 3);
+        SRARI_H4_UH(res0, res1, res2, res3, 6);
+        SAT_UH4_UH(res0, res1, res2, res3, 7);
+        PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
+        PCKEV_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+        AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avc_chroma_vt_and_aver_dst_2x2_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coeff0, uint32_t coeff1)
+{
+    uint16_t out0, out1;
+    uint32_t load0, load1;
+    v16i8 src0, src1, src2, tmp0, tmp1, res;
+    v16u8 dst_data = { 0 };
+    v8u16 res_r;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+    load0 = LW(dst);
+    load1 = LW(dst + dst_stride);
+
+    INSERT_W2_UB(load0, load1, dst_data);
+
+    ILVR_B2_SB(src1, src0, src2, src1, tmp0, tmp1);
+
+    tmp0 = (v16i8) __msa_ilvr_d((v2i64) tmp1, (v2i64) tmp0);
+    res_r = __msa_dotp_u_h((v16u8) tmp0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+    res = __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+    dst_data = __msa_aver_u_b((v16u8) res, dst_data);
+    out0 = __msa_copy_u_h((v8i16) dst_data, 0);
+    out1 = __msa_copy_u_h((v8i16) dst_data, 2);
+
+    SH(out0, dst);
+    dst += dst_stride;
+    SH(out1, dst);
+}
+
+static void avc_chroma_vt_and_aver_dst_2x4_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coeff0, uint32_t coeff1)
+{
+    uint32_t load0, load1;
+    v16i8 src0, src1, src2, src3, src4;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8u16 res_r;
+    v8i16 res;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+    v16u8 dst_data = { 0 };
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+
+    load0 = LW(dst);
+    load1 = LW(dst + dst_stride);
+
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 0, load0);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 1, load1);
+
+    load0 = LW(dst + 2 * dst_stride);
+    load1 = LW(dst + 3 * dst_stride);
+
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 2, load0);
+    dst_data = (v16u8) __msa_insert_h((v8i16) dst_data, 3, load1);
+
+    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+               tmp0, tmp1, tmp2, tmp3);
+    ILVR_W2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
+
+    tmp0 = (v16u8) __msa_ilvr_d((v2i64) tmp2, (v2i64) tmp0);
+
+    res_r = __msa_dotp_u_h(tmp0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+
+    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+    res = (v8i16) __msa_aver_u_b((v16u8) res, dst_data);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+    dst += (4 * dst_stride);
+}
+
+static void avc_chroma_vt_and_aver_dst_2x8_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coeff0, uint32_t coeff1)
+{
+    uint32_t load0, load1, load2, load3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8i16 res;
+    v8u16 res_r;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+    v16u8 dst_data0 = { 0 };
+    v16u8 dst_data1 = { 0 };
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+    LD_SB4(src, src_stride, src5, src6, src7, src8);
+
+    LW4(dst, dst_stride, load0, load1, load2, load3);
+
+    dst_data0 = (v16u8) __msa_insert_h((v8i16) dst_data0, 0, load0);
+    dst_data0 = (v16u8) __msa_insert_h((v8i16) dst_data0, 1, load1);
+    dst_data0 = (v16u8) __msa_insert_h((v8i16) dst_data0, 2, load2);
+    dst_data0 = (v16u8) __msa_insert_h((v8i16) dst_data0, 3, load3);
+
+    LW4(dst + 4 * dst_stride, dst_stride, load0, load1, load2, load3);
+
+    dst_data1 = (v16u8) __msa_insert_h((v8i16) dst_data1, 0, load0);
+    dst_data1 = (v16u8) __msa_insert_h((v8i16) dst_data1, 1, load1);
+    dst_data1 = (v16u8) __msa_insert_h((v8i16) dst_data1, 2, load2);
+    dst_data1 = (v16u8) __msa_insert_h((v8i16) dst_data1, 3, load3);
+
+    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+               tmp0, tmp1, tmp2, tmp3);
+
+    ILVR_W2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
+
+    tmp0 = (v16u8) __msa_ilvr_d((v2i64) tmp2, (v2i64) tmp0);
+
+    res_r = __msa_dotp_u_h(tmp0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+
+    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+    res = (v8i16) __msa_aver_u_b((v16u8) res, dst_data0);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7,
+               tmp0, tmp1, tmp2, tmp3);
+
+    ILVR_W2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
+
+    tmp0 = (v16u8) __msa_ilvr_d((v2i64) tmp2, (v2i64) tmp0);
+
+    res_r = __msa_dotp_u_h(tmp0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+
+    res = (v8i16) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+    res = (v8i16) __msa_aver_u_b((v16u8) res, dst_data1);
+
+    ST2x4_UB(res, 0, dst, dst_stride);
+}
+
+static void avc_chroma_vt_and_aver_dst_2w_msa(uint8_t *src, int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint32_t coeff0, uint32_t coeff1,
+                                              int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_vt_and_aver_dst_2x2_msa(src, src_stride, dst, dst_stride,
+                                           coeff0, coeff1);
+    } else if (4 == height) {
+        avc_chroma_vt_and_aver_dst_2x4_msa(src, src_stride, dst, dst_stride,
+                                           coeff0, coeff1);
+    } else if (8 == height) {
+        avc_chroma_vt_and_aver_dst_2x8_msa(src, src_stride, dst, dst_stride,
+                                           coeff0, coeff1);
+    }
+}
+
+static void avc_chroma_vt_and_aver_dst_4x2_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coeff0, uint32_t coeff1)
+{
+    uint32_t load0, load1;
+    v16i8 src0, src1, src2, tmp0, tmp1;
+    v16u8 dst_data = { 0 };
+    v8u16 res_r;
+    v16u8 res;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+
+    load0 = LW(dst);
+    load1 = LW(dst + dst_stride);
+
+    INSERT_W2_UB(load0, load1, dst_data);
+    ILVR_B2_SB(src1, src0, src2, src1, tmp0, tmp1);
+
+    tmp0 = (v16i8) __msa_ilvr_d((v2i64) tmp1, (v2i64) tmp0);
+
+    res_r = __msa_dotp_u_h((v16u8) tmp0, coeff_vec);
+    res_r <<= 3;
+    res_r = (v8u16) __msa_srari_h((v8i16) res_r, 6);
+    res_r = __msa_sat_u_h(res_r, 7);
+    res = (v16u8) __msa_pckev_b((v16i8) res_r, (v16i8) res_r);
+    res = __msa_aver_u_b(res, dst_data);
+
+    ST4x2_UB(res, dst, dst_stride);
+}
+
+static void avc_chroma_vt_and_aver_dst_4x4mul_msa(uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  uint32_t coeff0,
+                                                  uint32_t coeff1,
+                                                  int32_t height)
+{
+    uint32_t load0, load1, row;
+    v16i8 src0, src1, src2, src3, src4;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v16u8 dst0 = { 0 };
+    v16u8 dst1 = { 0 };
+    v8u16 res0_r, res1_r;
+    v16u8 res0, res1;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    src0 = LD_SB(src);
+    src += src_stride;
+
+    for (row = (height >> 2); row--;) {
+        LD_SB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        load0 = LW(dst);
+        load1 = LW(dst + dst_stride);
+
+        INSERT_W2_UB(load0, load1, dst0);
+        load0 = LW(dst + 2 * dst_stride);
+        load1 = LW(dst + 3 * dst_stride);
+        INSERT_W2_UB(load0, load1, dst1);
+
+        ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+                   tmp0, tmp1, tmp2, tmp3);
+        ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, tmp0, tmp2);
+        DOTP_UB2_UH(tmp0, tmp2, coeff_vec, coeff_vec, res0_r, res1_r);
+
+        res0_r <<= 3;
+        res1_r <<= 3;
+
+        SRARI_H2_UH(res0_r, res1_r, 6);
+        SAT_UH2_UH(res0_r, res1_r, 7);
+        PCKEV_B2_UB(res0_r, res0_r, res1_r, res1_r, res0, res1);
+        AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+
+        ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+        dst += (4 * dst_stride);
+        src0 = src4;
+    }
+}
+
+static void avc_chroma_vt_and_aver_dst_4w_msa(uint8_t *src, int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint32_t coeff0, uint32_t coeff1,
+                                              int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_vt_and_aver_dst_4x2_msa(src, src_stride, dst, dst_stride,
+                                           coeff0, coeff1);
+    } else {
+        avc_chroma_vt_and_aver_dst_4x4mul_msa(src, src_stride, dst, dst_stride,
+                                              coeff0, coeff1, height);
+    }
+}
+
+static void avc_chroma_vt_and_aver_dst_8w_msa(uint8_t *src, int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint32_t coeff0, uint32_t coeff1,
+                                              int32_t height)
+{
+    uint32_t row;
+    v16u8 src0, src1, src2, src3, src4;
+    v16u8 out0, out1;
+    v8u16 res0, res1, res2, res3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 coeff_vec0 = __msa_fill_b(coeff0);
+    v16i8 coeff_vec1 = __msa_fill_b(coeff1);
+    v16u8 coeff_vec = (v16u8) __msa_ilvr_b(coeff_vec0, coeff_vec1);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (row = height >> 2; row--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+                   src0, src1, src2, src3);
+        DOTP_UB4_UH(src0, src1, src2, src3, coeff_vec, coeff_vec, coeff_vec,
+                    coeff_vec, res0, res1, res2, res3);
+        SLLI_4V(res0, res1, res2, res3, 3);
+        SRARI_H4_UH(res0, res1, res2, res3, 6);
+        SAT_UH4_UH(res0, res1, res2, res3, 7);
+        PCKEV_B2_UB(res1, res0, res3, res2, out0, out1);
+        PCKEV_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+        AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+
+        dst += (4 * dst_stride);
+        src0 = src4;
+    }
+}
+
+static void avc_chroma_hv_and_aver_dst_2x2_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coef_hor0,
+                                               uint32_t coef_hor1,
+                                               uint32_t coef_ver0,
+                                               uint32_t coef_ver1)
+{
+    uint16_t out0, out1;
+    v16u8 dst0, dst1;
+    v16u8 src0, src1, src2;
+    v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
+    v16i8 res, mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+
+    mask = LD_SB(&chroma_mask_arr[48]);
+
+    LD_UB3(src, src_stride, src0, src1, src2);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
+    DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
+    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
+
+    res_vt0 += res_vt1;
+    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
+    res_vt0 = __msa_sat_u_h(res_vt0, 7);
+    res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
+    dst0 = __msa_aver_u_b((v16u8) res, dst0);
+    out0 = __msa_copy_u_h((v8i16) dst0, 0);
+    out1 = __msa_copy_u_h((v8i16) dst0, 1);
+
+    SH(out0, dst);
+    dst += dst_stride;
+    SH(out1, dst);
+}
+
+static void avc_chroma_hv_and_aver_dst_2x4_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coef_hor0,
+                                               uint32_t coef_hor1,
+                                               uint32_t coef_ver0,
+                                               uint32_t coef_ver1)
+{
+    v16u8 src0, src1, src2, src3, src4;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
+    v16i8 res, mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+
+    mask = LD_SB(&chroma_mask_arr[48]);
+
+    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, tmp0, tmp1);
+    VSHF_B2_UB(src1, src2, src3, src4, mask, mask, tmp2, tmp3);
+    ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
+    DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
+    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
+
+    res_vt0 += res_vt1;
+    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
+    res_vt0 = __msa_sat_u_h(res_vt0, 7);
+    res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
+
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 2, (v8i16) dst2);
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 3, (v8i16) dst3);
+    dst0 = __msa_aver_u_b((v16u8) res, dst0);
+
+    ST2x4_UB(dst0, 0, dst, dst_stride);
+}
+
+static void avc_chroma_hv_and_aver_dst_2x8_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coef_hor0,
+                                               uint32_t coef_hor1,
+                                               uint32_t coef_ver0,
+                                               uint32_t coef_ver1)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
+    v16i8 res, mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+
+    mask = LD_SB(&chroma_mask_arr[48]);
+
+    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+    LD_UB4(src, src_stride, src5, src6, src7, src8);
+
+    LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 1, (v8i16) dst1);
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 2, (v8i16) dst2);
+    dst0 = (v16u8) __msa_insve_h((v8i16) dst0, 3, (v8i16) dst3);
+
+    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 1, (v8i16) dst5);
+    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 2, (v8i16) dst6);
+    dst4 = (v16u8) __msa_insve_h((v8i16) dst4, 3, (v8i16) dst7);
+
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, tmp0, tmp1);
+    VSHF_B2_UB(src1, src2, src3, src4, mask, mask, tmp2, tmp3);
+    ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
+    VSHF_B2_UB(src4, src5, src6, src7, mask, mask, tmp0, tmp1);
+    VSHF_B2_UB(src5, src6, src7, src8, mask, mask, tmp2, tmp3);
+    ILVR_D2_UB(tmp1, tmp0, tmp3, tmp2, src4, src5);
+    DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
+    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
+
+    res_vt0 += res_vt1;
+    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
+    res_vt0 = __msa_sat_u_h(res_vt0, 7);
+    res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
+    dst0 = __msa_aver_u_b((v16u8) res, dst0);
+
+    ST2x4_UB(dst0, 0, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    DOTP_UB2_UH(src4, src5, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
+    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
+
+    res_vt0 += res_vt1;
+    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
+    res_vt0 = __msa_sat_u_h(res_vt0, 7);
+    res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
+    dst4 = __msa_aver_u_b((v16u8) res, dst4);
+
+    ST2x4_UB(dst4, 0, dst, dst_stride);
+}
+
+static void avc_chroma_hv_and_aver_dst_2w_msa(uint8_t *src, int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint32_t coef_hor0,
+                                              uint32_t coef_hor1,
+                                              uint32_t coef_ver0,
+                                              uint32_t coef_ver1,
+                                              int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_hv_and_aver_dst_2x2_msa(src, src_stride, dst, dst_stride,
+                                           coef_hor0, coef_hor1,
+                                           coef_ver0, coef_ver1);
+    } else if (4 == height) {
+        avc_chroma_hv_and_aver_dst_2x4_msa(src, src_stride, dst, dst_stride,
+                                           coef_hor0, coef_hor1,
+                                           coef_ver0, coef_ver1);
+    } else if (8 == height) {
+        avc_chroma_hv_and_aver_dst_2x8_msa(src, src_stride, dst, dst_stride,
+                                           coef_hor0, coef_hor1,
+                                           coef_ver0, coef_ver1);
+    }
+}
+
+static void avc_chroma_hv_and_aver_dst_4x2_msa(uint8_t *src, int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint32_t coef_hor0,
+                                               uint32_t coef_hor1,
+                                               uint32_t coef_ver0,
+                                               uint32_t coef_ver1)
+{
+    v16u8 src0, src1, src2;
+    v16u8 dst0, dst1;
+    v8u16 res_hz0, res_hz1, res_vt0, res_vt1;
+    v16i8 res, mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+
+    mask = LD_SB(&chroma_mask_arr[0]);
+
+    LD_UB3(src, src_stride, src0, src1, src2);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
+    DOTP_UB2_UH(src0, src1, coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1);
+    MUL2(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_vt0, res_vt1);
+
+    res_vt0 += res_vt1;
+    res_vt0 = (v8u16) __msa_srari_h((v8i16) res_vt0, 6);
+    res_vt0 = __msa_sat_u_h(res_vt0, 7);
+    res = __msa_pckev_b((v16i8) res_vt0, (v16i8) res_vt0);
+    dst0 = (v16u8) __msa_insve_w((v4i32) dst0, 1, (v4i32) dst1);
+    dst0 = __msa_aver_u_b((v16u8) res, dst0);
+
+    ST4x2_UB(dst0, dst, dst_stride);
+}
+
+static void avc_chroma_hv_and_aver_dst_4x4mul_msa(uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  uint32_t coef_hor0,
+                                                  uint32_t coef_hor1,
+                                                  uint32_t coef_ver0,
+                                                  uint32_t coef_ver1,
+                                                  int32_t height)
+{
+    uint32_t row;
+    v16u8 src0, src1, src2, src3, src4;
+    v16u8 dst0, dst1, dst2, dst3;
+    v8u16 res_hz0, res_hz1, res_hz2, res_hz3;
+    v8u16 res_vt0, res_vt1, res_vt2, res_vt3;
+    v16i8 mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+    v16u8 res0, res1;
+
+    mask = LD_SB(&chroma_mask_arr[0]);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (row = (height >> 2); row--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        VSHF_B2_UB(src0, src1, src1, src2, mask, mask, src0, src1);
+        VSHF_B2_UB(src2, src3, src3, src4, mask, mask, src2, src3);
+        DOTP_UB4_UH(src0, src1, src2, src3, coeff_hz_vec, coeff_hz_vec,
+                    coeff_hz_vec, coeff_hz_vec, res_hz0, res_hz1, res_hz2,
+                    res_hz3);
+        MUL4(res_hz0, coeff_vt_vec1, res_hz1, coeff_vt_vec0, res_hz2,
+             coeff_vt_vec1, res_hz3, coeff_vt_vec0, res_vt0, res_vt1, res_vt2,
+             res_vt3);
+        ADD2(res_vt0, res_vt1, res_vt2, res_vt3, res_vt0, res_vt1);
+        SRARI_H2_UH(res_vt0, res_vt1, 6);
+        SAT_UH2_UH(res_vt0, res_vt1, 7);
+        PCKEV_B2_UB(res_vt0, res_vt0, res_vt1, res_vt1, res0, res1);
+
+        dst0 = (v16u8) __msa_insve_w((v4i32) dst0, 1, (v4i32) dst1);
+        dst1 = (v16u8) __msa_insve_w((v4i32) dst2, 1, (v4i32) dst3);
+
+        AVER_UB2_UB(res0, dst0, res1, dst1, dst0, dst1);
+
+        ST4x4_UB(dst0, dst1, 0, 1, 0, 1, dst, dst_stride);
+        dst += (4 * dst_stride);
+        src0 = src4;
+    }
+}
+
+static void avc_chroma_hv_and_aver_dst_4w_msa(uint8_t *src, int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint32_t coef_hor0,
+                                              uint32_t coef_hor1,
+                                              uint32_t coef_ver0,
+                                              uint32_t coef_ver1,
+                                              int32_t height)
+{
+    if (2 == height) {
+        avc_chroma_hv_and_aver_dst_4x2_msa(src, src_stride, dst, dst_stride,
+                                           coef_hor0, coef_hor1,
+                                           coef_ver0, coef_ver1);
+    } else {
+        avc_chroma_hv_and_aver_dst_4x4mul_msa(src, src_stride, dst, dst_stride,
+                                              coef_hor0, coef_hor1,
+                                              coef_ver0, coef_ver1, height);
+    }
+}
+
+static void avc_chroma_hv_and_aver_dst_8w_msa(uint8_t *src, int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint32_t coef_hor0,
+                                              uint32_t coef_hor1,
+                                              uint32_t coef_ver0,
+                                              uint32_t coef_ver1,
+                                              int32_t height)
+{
+    uint32_t row;
+    v16u8 src0, src1, src2, src3, src4, out0, out1;
+    v8u16 res_hz0, res_hz1, res_hz2;
+    v8u16 res_hz3, res_hz4;
+    v8u16 res_vt0, res_vt1, res_vt2, res_vt3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 mask;
+    v16i8 coeff_hz_vec0 = __msa_fill_b(coef_hor0);
+    v16i8 coeff_hz_vec1 = __msa_fill_b(coef_hor1);
+    v16u8 coeff_hz_vec = (v16u8) __msa_ilvr_b(coeff_hz_vec0, coeff_hz_vec1);
+    v8u16 coeff_vt_vec0 = (v8u16) __msa_fill_h(coef_ver0);
+    v8u16 coeff_vt_vec1 = (v8u16) __msa_fill_h(coef_ver1);
+
+    mask = LD_SB(&chroma_mask_arr[32]);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    src0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, (v16i8) src0);
+    res_hz0 = __msa_dotp_u_h(src0, coeff_hz_vec);
+
+    for (row = (height >> 2); row--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        VSHF_B2_UB(src1, src1, src2, src2, mask, mask, src1, src2);
+        VSHF_B2_UB(src3, src3, src4, src4, mask, mask, src3, src4);
+        DOTP_UB4_UH(src1, src2, src3, src4, coeff_hz_vec, coeff_hz_vec,
+                    coeff_hz_vec, coeff_hz_vec, res_hz1, res_hz2, res_hz3,
+                    res_hz4);
+        MUL4(res_hz1, coeff_vt_vec0, res_hz2, coeff_vt_vec0, res_hz3,
+             coeff_vt_vec0, res_hz4, coeff_vt_vec0, res_vt0, res_vt1, res_vt2,
+             res_vt3);
+
+        res_vt0 += (res_hz0 * coeff_vt_vec1);
+        res_vt1 += (res_hz1 * coeff_vt_vec1);
+        res_vt2 += (res_hz2 * coeff_vt_vec1);
+        res_vt3 += (res_hz3 * coeff_vt_vec1);
+
+        SRARI_H4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 6);
+        SAT_UH4_UH(res_vt0, res_vt1, res_vt2, res_vt3, 7);
+
+        PCKEV_B2_UB(res_vt1, res_vt0, res_vt3, res_vt2, out0, out1);
+        PCKEV_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+        AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        res_hz0 = res_hz4;
+    }
+}
+
+static void copy_width8_msa(uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride,
+                            int32_t height)
+{
+    int32_t cnt;
+    uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    if (0 == height % 12) {
+        for (cnt = (height / 12); cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+            out4 = __msa_copy_u_d((v2i64) src4, 0);
+            out5 = __msa_copy_u_d((v2i64) src5, 0);
+            out6 = __msa_copy_u_d((v2i64) src6, 0);
+            out7 = __msa_copy_u_d((v2i64) src7, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+            SD4(out4, out5, out6, out7, dst, dst_stride);
+            dst += (4 * dst_stride);
+
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 8) {
+        for (cnt = height >> 3; cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+            out4 = __msa_copy_u_d((v2i64) src4, 0);
+            out5 = __msa_copy_u_d((v2i64) src5, 0);
+            out6 = __msa_copy_u_d((v2i64) src6, 0);
+            out7 = __msa_copy_u_d((v2i64) src7, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+            SD4(out4, out5, out6, out7, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 4) {
+        for (cnt = (height / 4); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 2) {
+        for (cnt = (height / 2); cnt--;) {
+            LD_UB2(src, src_stride, src0, src1);
+            src += (2 * src_stride);
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+
+            SD(out0, dst);
+            dst += dst_stride;
+            SD(out1, dst);
+            dst += dst_stride;
+        }
+    }
+}
+
+static void avg_width4_msa(uint8_t *src, int32_t src_stride,
+                           uint8_t *dst, int32_t dst_stride,
+                           int32_t height)
+{
+    int32_t cnt;
+    uint32_t out0, out1, out2, out3;
+    v16u8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+
+    if (0 == (height % 4)) {
+        for (cnt = (height / 4); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+            AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                        dst0, dst1, dst2, dst3);
+
+            out0 = __msa_copy_u_w((v4i32) dst0, 0);
+            out1 = __msa_copy_u_w((v4i32) dst1, 0);
+            out2 = __msa_copy_u_w((v4i32) dst2, 0);
+            out3 = __msa_copy_u_w((v4i32) dst3, 0);
+            SW4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == (height % 2)) {
+        for (cnt = (height / 2); cnt--;) {
+            LD_UB2(src, src_stride, src0, src1);
+            src += (2 * src_stride);
+
+            LD_UB2(dst, dst_stride, dst0, dst1);
+
+            AVER_UB2_UB(src0, dst0, src1, dst1, dst0, dst1);
+
+            out0 = __msa_copy_u_w((v4i32) dst0, 0);
+            out1 = __msa_copy_u_w((v4i32) dst1, 0);
+            SW(out0, dst);
+            dst += dst_stride;
+            SW(out1, dst);
+            dst += dst_stride;
+        }
+    }
+}
+
+static void avg_width8_msa(uint8_t *src, int32_t src_stride,
+                           uint8_t *dst, int32_t dst_stride,
+                           int32_t height)
+{
+    int32_t cnt;
+    uint64_t out0, out1, out2, out3;
+    v16u8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+
+    for (cnt = (height / 4); cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                    dst0, dst1, dst2, dst3);
+
+        out0 = __msa_copy_u_d((v2i64) dst0, 0);
+        out1 = __msa_copy_u_d((v2i64) dst1, 0);
+        out2 = __msa_copy_u_d((v2i64) dst2, 0);
+        out3 = __msa_copy_u_d((v2i64) dst3, 0);
+        SD4(out0, out1, out2, out3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+void ff_put_h264_chroma_mc8_msa(uint8_t *dst, uint8_t *src,
+                                int stride, int height, int x, int y)
+{
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
+
+    if (x && y) {
+        avc_chroma_hv_8w_msa(src, stride, dst,
+                             stride, x, (8 - x), y, (8 - y), height);
+    } else if (x) {
+        avc_chroma_hz_8w_msa(src, stride, dst, stride, x, (8 - x), height);
+    } else if (y) {
+        avc_chroma_vt_8w_msa(src, stride, dst, stride, y, (8 - y), height);
+    } else {
+        copy_width8_msa(src, stride, dst, stride, height);
+    }
+}
+
+void ff_put_h264_chroma_mc4_msa(uint8_t *dst, uint8_t *src,
+                                int stride, int height, int x, int y)
+{
+    int32_t cnt;
+
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
+
+    if (x && y) {
+        avc_chroma_hv_4w_msa(src, stride, dst,
+                             stride, x, (8 - x), y, (8 - y), height);
+    } else if (x) {
+        avc_chroma_hz_4w_msa(src, stride, dst, stride, x, (8 - x), height);
+    } else if (y) {
+        avc_chroma_vt_4w_msa(src, stride, dst, stride, y, (8 - y), height);
+    } else {
+        for (cnt = height; cnt--;) {
+            *((uint32_t *) dst) = *((uint32_t *) src);
+
+            src += stride;
+            dst += stride;
+        }
+    }
+}
+
+void ff_put_h264_chroma_mc2_msa(uint8_t *dst, uint8_t *src,
+                                int stride, int height, int x, int y)
+{
+    int32_t cnt;
+
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
+
+    if (x && y) {
+        avc_chroma_hv_2w_msa(src, stride, dst,
+                             stride, x, (8 - x), y, (8 - y), height);
+    } else if (x) {
+        avc_chroma_hz_2w_msa(src, stride, dst, stride, x, (8 - x), height);
+    } else if (y) {
+        avc_chroma_vt_2w_msa(src, stride, dst, stride, y, (8 - y), height);
+    } else {
+        for (cnt = height; cnt--;) {
+            *((uint16_t *) dst) = *((uint16_t *) src);
+
+            src += stride;
+            dst += stride;
+        }
+    }
+}
+
+void ff_avg_h264_chroma_mc8_msa(uint8_t *dst, uint8_t *src,
+                                int stride, int height, int x, int y)
+{
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
+
+
+    if (x && y) {
+        avc_chroma_hv_and_aver_dst_8w_msa(src, stride, dst,
+                                          stride, x, (8 - x), y,
+                                          (8 - y), height);
+    } else if (x) {
+        avc_chroma_hz_and_aver_dst_8w_msa(src, stride, dst,
+                                          stride, x, (8 - x), height);
+    } else if (y) {
+        avc_chroma_vt_and_aver_dst_8w_msa(src, stride, dst,
+                                          stride, y, (8 - y), height);
+    } else {
+        avg_width8_msa(src, stride, dst, stride, height);
+    }
+}
+
+void ff_avg_h264_chroma_mc4_msa(uint8_t *dst, uint8_t *src,
+                                int stride, int height, int x, int y)
+{
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
+
+    if (x && y) {
+        avc_chroma_hv_and_aver_dst_4w_msa(src, stride, dst,
+                                          stride, x, (8 - x), y,
+                                          (8 - y), height);
+    } else if (x) {
+        avc_chroma_hz_and_aver_dst_4w_msa(src, stride, dst,
+                                          stride, x, (8 - x), height);
+    } else if (y) {
+        avc_chroma_vt_and_aver_dst_4w_msa(src, stride, dst,
+                                          stride, y, (8 - y), height);
+    } else {
+        avg_width4_msa(src, stride, dst, stride, height);
+    }
+}
+
+void ff_avg_h264_chroma_mc2_msa(uint8_t *dst, uint8_t *src,
+                                int stride, int height, int x, int y)
+{
+    int32_t cnt;
+
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
+
+    if (x && y) {
+        avc_chroma_hv_and_aver_dst_2w_msa(src, stride, dst,
+                                          stride, x, (8 - x), y,
+                                          (8 - y), height);
+    } else if (x) {
+        avc_chroma_hz_and_aver_dst_2w_msa(src, stride, dst,
+                                          stride, x, (8 - x), height);
+    } else if (y) {
+        avc_chroma_vt_and_aver_dst_2w_msa(src, stride, dst,
+                                          stride, y, (8 - y), height);
+    } else {
+        for (cnt = height; cnt--;) {
+            dst[0] = (dst[0] + src[0] + 1) >> 1;
+            dst[1] = (dst[1] + src[1] + 1) >> 1;
+
+            src += stride;
+            dst += stride;
+        }
+    }
+}
diff --git a/libavcodec/mips/h264dsp_init_mips.c b/libavcodec/mips/h264dsp_init_mips.c
index d9182f28..1fe7f846 100644
--- a/libavcodec/mips/h264dsp_init_mips.c
+++ b/libavcodec/mips/h264dsp_init_mips.c
@@ -62,16 +62,53 @@ static av_cold void h264dsp_init_msa(H264DSPContext *c,
         c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16_8_msa;
         c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels8_8_msa;
         c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels4_8_msa;
+
+        c->h264_idct_add = ff_h264_idct_add_msa;
+        c->h264_idct8_add = ff_h264_idct8_addblk_msa;
+        c->h264_idct_dc_add = ff_h264_idct4x4_addblk_dc_msa;
+        c->h264_idct8_dc_add = ff_h264_idct8_dc_addblk_msa;
+        c->h264_idct_add16 = ff_h264_idct_add16_msa;
+        c->h264_idct8_add4 = ff_h264_idct8_add4_msa;
+
+        if (chroma_format_idc <= 1)
+            c->h264_idct_add8 = ff_h264_idct_add8_msa;
+        else
+            c->h264_idct_add8 = ff_h264_idct_add8_422_msa;
+
+        c->h264_idct_add16intra = ff_h264_idct_add16_intra_msa;
+        c->h264_luma_dc_dequant_idct = ff_h264_deq_idct_luma_dc_msa;
     }  // if (8 == bit_depth)
 }
 #endif  // #if HAVE_MSA
 
-#if HAVE_LOONGSON3
-static av_cold void h264dsp_init_mmi(H264DSPContext * c,
-                                     const int bit_depth,
-                                     const int chroma_format_idc)
+#if HAVE_MMI
+static av_cold void h264dsp_init_mmi(H264DSPContext * c, const int bit_depth,
+        const int chroma_format_idc)
 {
     if (bit_depth == 8) {
+        c->h264_add_pixels4_clear = ff_h264_add_pixels4_8_mmi;
+        c->h264_idct_add = ff_h264_idct_add_8_mmi;
+        c->h264_idct8_add = ff_h264_idct8_add_8_mmi;
+        c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmi;
+        c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmi;
+        c->h264_idct_add16 = ff_h264_idct_add16_8_mmi;
+        c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmi;
+        c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmi;
+
+        if (chroma_format_idc <= 1)
+            c->h264_idct_add8 = ff_h264_idct_add8_8_mmi;
+        else
+            c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmi;
+
+        c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_8_mmi;
+
+        if (chroma_format_idc <= 1)
+            c->h264_chroma_dc_dequant_idct =
+                ff_h264_chroma_dc_dequant_idct_8_mmi;
+        else
+            c->h264_chroma_dc_dequant_idct =
+                ff_h264_chroma422_dc_dequant_idct_8_mmi;
+
         c->weight_h264_pixels_tab[0] = ff_h264_weight_pixels16_8_mmi;
         c->weight_h264_pixels_tab[1] = ff_h264_weight_pixels8_8_mmi;
         c->weight_h264_pixels_tab[2] = ff_h264_weight_pixels4_8_mmi;
@@ -79,9 +116,24 @@ static av_cold void h264dsp_init_mmi(H264DSPContext * c,
         c->biweight_h264_pixels_tab[0] = ff_h264_biweight_pixels16_8_mmi;
         c->biweight_h264_pixels_tab[1] = ff_h264_biweight_pixels8_8_mmi;
         c->biweight_h264_pixels_tab[2] = ff_h264_biweight_pixels4_8_mmi;
+
+        c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_mmi;
+        c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmi;
+
+        if (chroma_format_idc <= 1) {
+            c->h264_h_loop_filter_chroma =
+                ff_deblock_h_chroma_8_mmi;
+            c->h264_h_loop_filter_chroma_intra =
+                ff_deblock_h_chroma_intra_8_mmi;
+        }
+
+        c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmi;
+        c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmi;
+        c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmi;
+        c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmi;
     }
 }
-#endif /* HAVE_LOONGSON3 */
+#endif /* HAVE_MMI */
 
 av_cold void ff_h264dsp_init_mips(H264DSPContext *c, const int bit_depth,
                                   const int chroma_format_idc)
@@ -89,7 +141,7 @@ av_cold void ff_h264dsp_init_mips(H264DSPContext *c, const int bit_depth,
 #if HAVE_MSA
     h264dsp_init_msa(c, bit_depth, chroma_format_idc);
 #endif  // #if HAVE_MSA
-#if HAVE_LOONGSON3
+#if HAVE_MMI
     h264dsp_init_mmi(c, bit_depth, chroma_format_idc);
-#endif /* HAVE_LOONGSON3 */
+#endif /* HAVE_MMI */
 }
diff --git a/libavcodec/mips/h264dsp_mips.h b/libavcodec/mips/h264dsp_mips.h
index 319f6d3b..2fdfd11d 100644
--- a/libavcodec/mips/h264dsp_mips.h
+++ b/libavcodec/mips/h264dsp_mips.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+                      Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
  *
  * This file is part of FFmpeg.
  *
@@ -18,10 +19,11 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef H264_DSP_MIPS_H
-#define H264_DSP_MIPS_H
+#ifndef AVCODEC_MIPS_H264DSP_MIPS_H
+#define AVCODEC_MIPS_H264DSP_MIPS_H
 
 #include "libavcodec/h264.h"
+#include "constants.h"
 
 void ff_h264_h_lpf_luma_inter_msa(uint8_t *src, int stride,
                                   int alpha, int beta, int8_t *tc0);
@@ -41,6 +43,30 @@ void ff_h264_h_loop_filter_luma_mbaff_msa(uint8_t *src, int32_t stride,
                                           int32_t alpha, int32_t beta,
                                           int8_t *tc0);
 
+void ff_h264_idct_add_msa(uint8_t *dst, int16_t *src, int32_t dst_stride);
+void ff_h264_idct4x4_addblk_dc_msa(uint8_t *dst, int16_t *src,
+                                   int32_t dst_stride);
+void ff_h264_deq_idct_luma_dc_msa(int16_t *dst, int16_t *src,
+                                  int32_t de_q_val);
+void ff_h264_idct_add16_msa(uint8_t *dst, const int32_t *blk_offset,
+                            int16_t *block, int32_t stride,
+                            const uint8_t nnzc[15 * 8]);
+void ff_h264_idct_add16_intra_msa(uint8_t *dst, const int32_t *blk_offset,
+                                  int16_t *block, int32_t dst_stride,
+                                  const uint8_t nnzc[15 * 8]);
+void ff_h264_idct_add8_msa(uint8_t **dst, const int32_t *blk_offset,
+                           int16_t *block, int32_t dst_stride,
+                           const uint8_t nnzc[15 * 8]);
+void ff_h264_idct_add8_422_msa(uint8_t **dst, const int32_t *blk_offset,
+                               int16_t *block, int32_t dst_stride,
+                               const uint8_t nnzc[15 * 8]);
+void ff_h264_idct8_addblk_msa(uint8_t *dst, int16_t *src, int32_t dst_stride);
+void ff_h264_idct8_dc_addblk_msa(uint8_t *dst, int16_t *src,
+                                 int32_t dst_stride);
+void ff_h264_idct8_add4_msa(uint8_t *dst, const int *blk_offset,
+                            int16_t *blk, int dst_stride,
+                            const uint8_t nnzc[15 * 8]);
+
 void ff_h264_h_lpf_luma_intra_msa(uint8_t *src, int stride,
                                   int alpha, int beta);
 void ff_h264_v_lpf_luma_intra_msa(uint8_t *src, int stride,
@@ -68,6 +94,251 @@ void ff_weight_h264_pixels8_8_msa(uint8_t *src, int stride, int height,
 void ff_weight_h264_pixels4_8_msa(uint8_t *src, int stride, int height,
                                   int log2_denom, int weight, int offset);
 
+void ff_put_h264_qpel16_mc00_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+
+void ff_put_h264_qpel8_mc00_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+
+void ff_put_h264_qpel4_mc00_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+
+void ff_avg_h264_qpel16_mc00_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t dst_stride);
+
+void ff_avg_h264_qpel8_mc00_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+
+void ff_avg_h264_qpel4_mc00_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t dst_stride);
+
+void ff_h264_intra_predict_plane_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_predict_dc_4blk_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_predict_hor_dc_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_predict_vert_dc_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_predict_mad_cow_dc_l0t_8x8_msa(uint8_t *src,
+                                                  ptrdiff_t stride);
+void ff_h264_intra_predict_mad_cow_dc_0lt_8x8_msa(uint8_t *src,
+                                                  ptrdiff_t stride);
+void ff_h264_intra_predict_mad_cow_dc_l00_8x8_msa(uint8_t *src,
+                                                  ptrdiff_t stride);
+void ff_h264_intra_predict_mad_cow_dc_0l0_8x8_msa(uint8_t *src,
+                                                  ptrdiff_t stride);
+void ff_h264_intra_predict_plane_16x16_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_vert_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_horiz_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_dc_16x16_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_vert_16x16_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_horiz_16x16_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_dc_left_16x16_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_dc_top_16x16_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_dc_128_8x8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_h264_intra_pred_dc_128_16x16_msa(uint8_t *src, ptrdiff_t stride);
+void ff_vp8_pred8x8_127_dc_8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_vp8_pred8x8_129_dc_8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_vp8_pred16x16_127_dc_8_msa(uint8_t *src, ptrdiff_t stride);
+void ff_vp8_pred16x16_129_dc_8_msa(uint8_t *src, ptrdiff_t stride);
+
+void ff_h264_add_pixels4_8_mmi(uint8_t *_dst, int16_t *_src, int stride);
+void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride);
+void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
+        int16_t *block, int stride, const uint8_t nnzc[15*8]);
+void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
+        int16_t *block, int stride, const uint8_t nnzc[15*8]);
+void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
+        int16_t *block, int stride, const uint8_t nnzc[15*8]);
+void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
+        int16_t *block, int stride, const uint8_t nnzc[15*8]);
+void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
+        int16_t *block, int stride, const uint8_t nnzc[15*8]);
+void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
+        int qmul);
+void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul);
+void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul);
+
 void ff_h264_weight_pixels16_8_mmi(uint8_t *block, int stride, int height,
         int log2_denom, int weight, int offset);
 void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src,
@@ -84,4 +355,223 @@ void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
         int stride, int height, int log2_denom, int weightd, int weights,
         int offset);
 
-#endif  // #ifndef H264_DSP_MIPS_H
+void ff_deblock_v_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
+        int8_t *tc0);
+void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
+        int beta);
+void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
+        int8_t *tc0);
+void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
+        int beta);
+void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
+        int8_t *tc0);
+void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
+        int beta);
+void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
+        int8_t *tc0);
+void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
+        int beta);
+void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
+        int8_t *tc0);
+void ff_deblock_v8_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
+        int beta);
+
+void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+
+void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+
+void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+
+void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+
+void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+
+void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t dst_stride);
+
+#endif  // #ifndef AVCODEC_MIPS_H264DSP_MIPS_H
diff --git a/libavcodec/mips/h264dsp_mmi.c b/libavcodec/mips/h264dsp_mmi.c
index 641cd2f4..14c4a432 100644
--- a/libavcodec/mips/h264dsp_mmi.c
+++ b/libavcodec/mips/h264dsp_mmi.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2015 Loongson Technology Corporation Limited
  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
  *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
+ *                    Heiher <r@hev.cc>
  *
  * This file is part of FFmpeg.
  *
@@ -25,6 +26,1011 @@
 #include "libavcodec/bit_depth_template.c"
 #include "h264dsp_mips.h"
 
+void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
+{
+    __asm__ volatile (
+        "xor $f0, $f0, $f0              \r\n"
+        "ldc1 $f2, 0(%[src])            \r\n"
+        "ldc1 $f4, 8(%[src])            \r\n"
+        "ldc1 $f6, 16(%[src])           \r\n"
+        "ldc1 $f8, 24(%[src])           \r\n"
+        "lwc1 $f10, 0(%[dst0])          \r\n"
+        "lwc1 $f12, 0(%[dst1])          \r\n"
+        "lwc1 $f14, 0(%[dst2])          \r\n"
+        "lwc1 $f16, 0(%[dst3])          \r\n"
+        "punpcklbh $f10, $f10, $f0      \r\n"
+        "punpcklbh $f12, $f12, $f0      \r\n"
+        "punpcklbh $f14, $f14, $f0      \r\n"
+        "punpcklbh $f16, $f16, $f0      \r\n"
+        "paddh $f2, $f2, $f10           \r\n"
+        "paddh $f4, $f4, $f12           \r\n"
+        "paddh $f6, $f6, $f14           \r\n"
+        "paddh $f8, $f8, $f16           \r\n"
+        "packushb $f2, $f2, $f0         \r\n"
+        "packushb $f4, $f4, $f0         \r\n"
+        "packushb $f6, $f6, $f0         \r\n"
+        "packushb $f8, $f8, $f0         \r\n"
+        "swc1 $f2, 0(%[dst0])           \r\n"
+        "swc1 $f4, 0(%[dst1])           \r\n"
+        "swc1 $f6, 0(%[dst2])           \r\n"
+        "swc1 $f8, 0(%[dst3])           \r\n"
+        ::[dst0]"r"(dst),[dst1]"r"(dst+stride),[dst2]"r"(dst+2*stride),
+          [dst3]"r"(dst+3*stride),[src]"r"(src)
+        : "$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16"
+    );
+
+    memset(src, 0, 32);
+}
+
+void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
+{
+    __asm__ volatile (
+        "dli $8, 1                              \r\n"
+        "ldc1 $f0, 0(%[block])                  \r\n"
+        "dmtc1 $8, $f16                         \r\n"
+        "ldc1 $f2, 8(%[block])                  \r\n"
+        "dli $8, 6                              \r\n"
+        "ldc1 $f4, 16(%[block])                 \r\n"
+        "dmtc1 $8, $f18                         \r\n"
+        "psrah $f8, $f2, $f16                   \r\n"
+        "ldc1 $f6, 24(%[block])                 \r\n"
+        "psrah $f10, $f6, $f16                  \r\n"
+        "psubh $f8, $f8, $f6                    \r\n"
+        "paddh $f10, $f10, $f2                  \r\n"
+        "paddh $f20, $f4, $f0                   \r\n"
+        "psubh $f0, $f0, $f4                    \r\n"
+        "paddh $f22, $f10, $f20                 \r\n"
+        "psubh $f4, $f20, $f10                  \r\n"
+        "paddh $f20, $f8, $f0                   \r\n"
+        "psubh $f0, $f0, $f8                    \r\n"
+        "punpckhhw $f2, $f22, $f20              \r\n"
+        "punpcklhw $f10, $f22, $f20             \r\n"
+        "punpckhhw $f8, $f0, $f4                \r\n"
+        "punpcklhw $f0, $f0, $f4                \r\n"
+        "punpckhwd $f4, $f10, $f0               \r\n"
+        "punpcklwd $f10, $f10, $f0              \r\n"
+        "punpcklwd $f20, $f2, $f8               \r\n"
+        "punpckhwd $f0, $f2, $f8                \r\n"
+        "paddh $f10, $f10, %[ff_pw_32]          \r\n"
+        "psrah $f8, $f4, $f16                   \r\n"
+        "psrah $f6, $f0, $f16                   \r\n"
+        "psubh $f8, $f8, $f0                    \r\n"
+        "paddh $f6, $f6, $f4                    \r\n"
+        "paddh $f2, $f20, $f10                  \r\n"
+        "psubh $f10, $f10, $f20                 \r\n"
+        "paddh $f20, $f6, $f2                   \r\n"
+        "psubh $f2, $f2, $f6                    \r\n"
+        "paddh $f22, $f8, $f10                  \r\n"
+        "xor $f14, $f14, $f14                   \r\n"
+        "psubh $f10, $f10, $f8                  \r\n"
+        "sdc1 $f14, 0(%[block])                 \r\n"
+        "sdc1 $f14, 8(%[block])                 \r\n"
+        "sdc1 $f14, 16(%[block])                \r\n"
+        "sdc1 $f14, 24(%[block])                \r\n"
+        "lwc1 $f4, 0(%[dst])                    \r\n"
+        "psrah $f6, $f20, $f18                  \r\n"
+        "gslwxc1 $f0, 0(%[dst], %[stride])      \r\n"
+        "psrah $f8, $f22, $f18                  \r\n"
+        "punpcklbh $f4, $f4, $f14               \r\n"
+        "punpcklbh $f0, $f0, $f14               \r\n"
+        "paddh $f4, $f4, $f6                    \r\n"
+        "paddh $f0, $f0, $f8                    \r\n"
+        "packushb $f4, $f4, $f14                \r\n"
+        "packushb $f0, $f0, $f14                \r\n"
+        "swc1 $f4, 0(%[dst])                    \r\n"
+        "gsswxc1 $f0, 0(%[dst], %[stride])      \r\n"
+        "daddu %[dst], %[dst], %[stride]        \r\n"
+        "daddu %[dst], %[dst], %[stride]        \r\n"
+        "lwc1 $f4, 0(%[dst])                    \r\n"
+        "psrah $f10, $f10, $f18                 \r\n"
+        "gslwxc1 $f0, 0(%[dst], %[stride])      \r\n"
+        "psrah $f2, $f2, $f18                   \r\n"
+        "punpcklbh $f4, $f4, $f14               \r\n"
+        "punpcklbh $f0, $f0, $f14               \r\n"
+        "paddh $f4, $f4, $f10                   \r\n"
+        "paddh $f0, $f0, $f2                    \r\n"
+        "packushb $f4, $f4, $f14                \r\n"
+        "swc1 $f4, 0(%[dst])                    \r\n"
+        "packushb $f0, $f0, $f14                \r\n"
+        "gsswxc1 $f0, 0(%[dst], %[stride])      \r\n"
+        ::[dst]"r"(dst),[block]"r"(block),[stride]"r"((uint64_t)stride),
+          [ff_pw_32]"f"(ff_pw_32)
+        : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
+          "$f18","$f20","$f22"
+    );
+
+    memset(block, 0, 32);
+}
+
+void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
+{
+    __asm__ volatile (
+        "lhu $10, 0x0(%[block])                     \r\n"
+        "daddiu $29, $29, -0x20                     \r\n"
+        "daddiu $10, $10, 0x20                      \r\n"
+        "ldc1 $f2, 0x10(%[block])                   \r\n"
+        "sh $10, 0x0(%[block])                      \r\n"
+        "ldc1 $f4, 0x20(%[block])                   \r\n"
+        "dli $10, 0x1                               \r\n"
+        "ldc1 $f6, 0x30(%[block])                   \r\n"
+        "dmtc1 $10, $f16                            \r\n"
+        "ldc1 $f10, 0x50(%[block])                  \r\n"
+        "ldc1 $f12, 0x60(%[block])                  \r\n"
+        "ldc1 $f14, 0x70(%[block])                  \r\n"
+        "mov.d $f0, $f2                             \r\n"
+        "psrah $f2, $f2, $f16                       \r\n"
+        "psrah $f8, $f10, $f16                      \r\n"
+        "paddh $f2, $f2, $f0                        \r\n"
+        "paddh $f8, $f8, $f10                       \r\n"
+        "paddh $f2, $f2, $f10                       \r\n"
+        "paddh $f8, $f8, $f14                       \r\n"
+        "paddh $f2, $f2, $f6                        \r\n"
+        "psubh $f8, $f8, $f0                        \r\n"
+        "psubh $f0, $f0, $f6                        \r\n"
+        "psubh $f10, $f10, $f6                      \r\n"
+        "psrah $f6, $f6, $f16                       \r\n"
+        "paddh $f0, $f0, $f14                       \r\n"
+        "psubh $f10, $f10, $f14                     \r\n"
+        "psrah $f14, $f14, $f16                     \r\n"
+        "psubh $f0, $f0, $f6                        \r\n"
+        "dli $10, 0x2                               \r\n"
+        "psubh $f10, $f10, $f14                     \r\n"
+        "dmtc1 $10, $f18                            \r\n"
+        "mov.d $f14, $f2                            \r\n"
+        "psrah $f2, $f2, $f18                       \r\n"
+        "psrah $f6, $f8, $f18                       \r\n"
+        "paddh $f6, $f6, $f0                        \r\n"
+        "psrah $f0, $f0, $f18                       \r\n"
+        "paddh $f2, $f2, $f10                       \r\n"
+        "psrah $f10, $f10, $f18                     \r\n"
+        "psubh $f0, $f0, $f8                        \r\n"
+        "psubh $f14, $f14, $f10                     \r\n"
+        "mov.d $f10, $f12                           \r\n"
+        "psrah $f12, $f12, $f16                     \r\n"
+        "psrah $f8, $f4, $f16                       \r\n"
+        "paddh $f12, $f12, $f4                      \r\n"
+        "psubh $f8, $f8, $f10                       \r\n"
+        "ldc1 $f4, 0x0(%[block])                    \r\n"
+        "ldc1 $f10, 0x40(%[block])                  \r\n"
+        "paddh $f10, $f10, $f4                      \r\n"
+        "paddh $f4, $f4, $f4                        \r\n"
+        "paddh $f12, $f12, $f10                     \r\n"
+        "psubh $f4, $f4, $f10                       \r\n"
+        "paddh $f10, $f10, $f10                     \r\n"
+        "paddh $f8, $f8, $f4                        \r\n"
+        "psubh $f10, $f10, $f12                     \r\n"
+        "paddh $f4, $f4, $f4                        \r\n"
+        "paddh $f14, $f14, $f12                     \r\n"
+        "psubh $f4, $f4, $f8                        \r\n"
+        "paddh $f12, $f12, $f12                     \r\n"
+        "paddh $f0, $f0, $f8                        \r\n"
+        "psubh $f12, $f12, $f14                     \r\n"
+        "paddh $f8, $f8, $f8                        \r\n"
+        "paddh $f6, $f6, $f4                        \r\n"
+        "psubh $f8, $f8, $f0                        \r\n"
+        "paddh $f4, $f4, $f4                        \r\n"
+        "paddh $f2, $f2, $f10                       \r\n"
+        "psubh $f4, $f4, $f6                        \r\n"
+        "paddh $f10, $f10, $f10                     \r\n"
+        "sdc1 $f12, 0x0(%[block])                   \r\n"
+        "psubh $f10, $f10, $f2                      \r\n"
+        "punpckhhw $f12, $f14, $f0                  \r\n"
+        "punpcklhw $f14, $f14, $f0                  \r\n"
+        "punpckhhw $f0, $f6, $f2                    \r\n"
+        "punpcklhw $f6, $f6, $f2                    \r\n"
+        "punpckhwd $f2, $f14, $f6                   \r\n"
+        "punpcklwd $f14, $f14, $f6                  \r\n"
+        "punpckhwd $f6, $f12, $f0                   \r\n"
+        "punpcklwd $f12, $f12, $f0                  \r\n"
+        "ldc1 $f0, 0x0(%[block])                    \r\n"
+        "sdc1 $f14, 0x0($29)                        \r\n"
+        "sdc1 $f2, 0x10($29)                        \r\n"
+        "dmfc1 $8, $f12                             \r\n"
+        "dmfc1 $11, $f6                             \r\n"
+        "punpckhhw $f6, $f10, $f4                   \r\n"
+        "punpcklhw $f10, $f10, $f4                  \r\n"
+        "punpckhhw $f4, $f8, $f0                    \r\n"
+        "punpcklhw $f8, $f8, $f0                    \r\n"
+        "punpckhwd $f0, $f10, $f8                   \r\n"
+        "punpcklwd $f10, $f10, $f8                  \r\n"
+        "punpckhwd $f8, $f6, $f4                    \r\n"
+        "punpcklwd $f6, $f6, $f4                    \r\n"
+        "sdc1 $f10, 0x8($29)                        \r\n"
+        "sdc1 $f0, 0x18($29)                        \r\n"
+        "dmfc1 $9, $f6                              \r\n"
+        "dmfc1 $12, $f8                             \r\n"
+        "ldc1 $f2, 0x18(%[block])                   \r\n"
+        "ldc1 $f12, 0x28(%[block])                  \r\n"
+        "ldc1 $f4, 0x38(%[block])                   \r\n"
+        "ldc1 $f0, 0x58(%[block])                   \r\n"
+        "ldc1 $f6, 0x68(%[block])                   \r\n"
+        "ldc1 $f8, 0x78(%[block])                   \r\n"
+        "mov.d $f14, $f2                            \r\n"
+        "psrah $f10, $f0, $f16                      \r\n"
+        "psrah $f2, $f2, $f16                       \r\n"
+        "paddh $f10, $f10, $f0                      \r\n"
+        "paddh $f2, $f2, $f14                       \r\n"
+        "paddh $f10, $f10, $f8                      \r\n"
+        "paddh $f2, $f2, $f0                        \r\n"
+        "psubh $f10, $f10, $f14                     \r\n"
+        "paddh $f2, $f2, $f4                        \r\n"
+        "psubh $f14, $f14, $f4                      \r\n"
+        "psubh $f0, $f0, $f4                        \r\n"
+        "psrah $f4, $f4, $f16                       \r\n"
+        "paddh $f14, $f14, $f8                      \r\n"
+        "psubh $f0, $f0, $f8                        \r\n"
+        "psrah $f8, $f8, $f16                       \r\n"
+        "psubh $f14, $f14, $f4                      \r\n"
+        "psubh $f0, $f0, $f8                        \r\n"
+        "mov.d $f8, $f2                             \r\n"
+        "psrah $f4, $f10, $f18                      \r\n"
+        "psrah $f2, $f2, $f18                       \r\n"
+        "paddh $f4, $f4, $f14                       \r\n"
+        "psrah $f14, $f14, $f18                     \r\n"
+        "paddh $f2, $f2, $f0                        \r\n"
+        "psrah $f0, $f0, $f18                       \r\n"
+        "psubh $f14, $f14, $f10                     \r\n"
+        "psubh $f8, $f8, $f0                        \r\n"
+        "mov.d $f0, $f6                             \r\n"
+        "psrah $f6, $f6, $f16                       \r\n"
+        "psrah $f10, $f12, $f16                     \r\n"
+        "paddh $f6, $f6, $f12                       \r\n"
+        "psubh $f10, $f10, $f0                      \r\n"
+        "ldc1 $f12, 0x8(%[block])                   \r\n"
+        "ldc1 $f0, 0x48(%[block])                   \r\n"
+        "paddh $f0, $f0, $f12                       \r\n"
+        "paddh $f12, $f12, $f12                     \r\n"
+        "paddh $f6, $f6, $f0                        \r\n"
+        "psubh $f12, $f12, $f0                      \r\n"
+        "paddh $f0, $f0, $f0                        \r\n"
+        "paddh $f10, $f10, $f12                     \r\n"
+        "psubh $f0, $f0, $f6                        \r\n"
+        "paddh $f12, $f12, $f12                     \r\n"
+        "paddh $f8, $f8, $f6                        \r\n"
+        "psubh $f12, $f12, $f10                     \r\n"
+        "paddh $f6, $f6, $f6                        \r\n"
+        "paddh $f14, $f14, $f10                     \r\n"
+        "psubh $f6, $f6, $f8                        \r\n"
+        "paddh $f10, $f10, $f10                     \r\n"
+        "paddh $f4, $f4, $f12                       \r\n"
+        "psubh $f10, $f10, $f14                     \r\n"
+        "paddh $f12, $f12, $f12                     \r\n"
+        "paddh $f2, $f2, $f0                        \r\n"
+        "psubh $f12, $f12, $f4                      \r\n"
+        "paddh $f0, $f0, $f0                        \r\n"
+        "sdc1 $f6, 0x8(%[block])                    \r\n"
+        "psubh $f0, $f0, $f2                        \r\n"
+        "punpckhhw $f6, $f8, $f14                   \r\n"
+        "punpcklhw $f8, $f8, $f14                   \r\n"
+        "punpckhhw $f14, $f4, $f2                   \r\n"
+        "punpcklhw $f4, $f4, $f2                    \r\n"
+        "punpckhwd $f2, $f8, $f4                    \r\n"
+        "punpcklwd $f8, $f8, $f4                    \r\n"
+        "punpckhwd $f4, $f6, $f14                   \r\n"
+        "punpcklwd $f6, $f6, $f14                   \r\n"
+        "ldc1 $f14, 0x8(%[block])                   \r\n"
+        "dmfc1 $13, $f8                             \r\n"
+        "dmfc1 $15, $f2                             \r\n"
+        "mov.d $f24, $f6                            \r\n"
+        "mov.d $f28, $f4                            \r\n"
+        "punpckhhw $f4, $f0, $f12                   \r\n"
+        "punpcklhw $f0, $f0, $f12                   \r\n"
+        "punpckhhw $f12, $f10, $f14                 \r\n"
+        "punpcklhw $f10, $f10, $f14                 \r\n"
+        "punpckhwd $f14, $f0, $f10                  \r\n"
+        "punpcklwd $f0, $f0, $f10                   \r\n"
+        "punpckhwd $f10, $f4, $f12                  \r\n"
+        "punpcklwd $f4, $f4, $f12                   \r\n"
+        "dmfc1 $14, $f0                             \r\n"
+        "mov.d $f22, $f14                           \r\n"
+        "mov.d $f26, $f4                            \r\n"
+        "mov.d $f30, $f10                           \r\n"
+        "daddiu $10, %[dst], 0x4                    \r\n"
+        "dmtc1 $15, $f14                            \r\n"
+        "dmtc1 $11, $f12                            \r\n"
+        "ldc1 $f2, 0x10($29)                        \r\n"
+        "dmtc1 $8, $f6                              \r\n"
+        "mov.d $f8, $f2                             \r\n"
+        "psrah $f2, $f2, $f16                       \r\n"
+        "psrah $f0, $f14, $f16                      \r\n"
+        "paddh $f2, $f2, $f8                        \r\n"
+        "paddh $f0, $f0, $f14                       \r\n"
+        "paddh $f2, $f2, $f14                       \r\n"
+        "paddh $f0, $f0, $f28                       \r\n"
+        "paddh $f2, $f2, $f12                       \r\n"
+        "psubh $f0, $f0, $f8                        \r\n"
+        "psubh $f8, $f8, $f12                       \r\n"
+        "psubh $f14, $f14, $f12                     \r\n"
+        "psrah $f12, $f12, $f16                     \r\n"
+        "paddh $f8, $f8, $f28                       \r\n"
+        "psubh $f14, $f14, $f28                     \r\n"
+        "psrah $f10, $f28, $f16                     \r\n"
+        "psubh $f8, $f8, $f12                       \r\n"
+        "psubh $f14, $f14, $f10                     \r\n"
+        "mov.d $f10, $f2                            \r\n"
+        "psrah $f2, $f2, $f18                       \r\n"
+        "psrah $f12, $f0, $f18                      \r\n"
+        "paddh $f2, $f2, $f14                       \r\n"
+        "paddh $f12, $f12, $f8                      \r\n"
+        "psrah $f8, $f8, $f18                       \r\n"
+        "psrah $f14, $f14, $f18                     \r\n"
+        "psubh $f8, $f8, $f0                        \r\n"
+        "psubh $f10, $f10, $f14                     \r\n"
+        "mov.d $f14, $f24                           \r\n"
+        "psrah $f4, $f24, $f16                      \r\n"
+        "psrah $f0, $f6, $f16                       \r\n"
+        "paddh $f4, $f4, $f6                        \r\n"
+        "psubh $f0, $f0, $f14                       \r\n"
+        "ldc1 $f6, 0x0($29)                         \r\n"
+        "dmtc1 $13, $f14                            \r\n"
+        "paddh $f14, $f14, $f6                      \r\n"
+        "paddh $f6, $f6, $f6                        \r\n"
+        "paddh $f4, $f4, $f14                       \r\n"
+        "psubh $f6, $f6, $f14                       \r\n"
+        "paddh $f14, $f14, $f14                     \r\n"
+        "paddh $f0, $f0, $f6                        \r\n"
+        "psubh $f14, $f14, $f4                      \r\n"
+        "paddh $f6, $f6, $f6                        \r\n"
+        "paddh $f10, $f10, $f4                      \r\n"
+        "psubh $f6, $f6, $f0                        \r\n"
+        "paddh $f4, $f4, $f4                        \r\n"
+        "paddh $f8, $f8, $f0                        \r\n"
+        "psubh $f4, $f4, $f10                       \r\n"
+        "paddh $f0, $f0, $f0                        \r\n"
+        "paddh $f12, $f12, $f6                      \r\n"
+        "psubh $f0, $f0, $f8                        \r\n"
+        "paddh $f6, $f6, $f6                        \r\n"
+        "paddh $f2, $f2, $f14                       \r\n"
+        "psubh $f6, $f6, $f12                       \r\n"
+        "paddh $f14, $f14, $f14                     \r\n"
+        "sdc1 $f6, 0x0($29)                         \r\n"
+        "psubh $f14, $f14, $f2                      \r\n"
+        "sdc1 $f0, 0x10($29)                        \r\n"
+        "dmfc1 $8, $f4                              \r\n"
+        "xor $f4, $f4, $f4                          \r\n"
+        "sdc1 $f4, 0x0(%[block])                    \r\n"
+        "sdc1 $f4, 0x8(%[block])                    \r\n"
+        "sdc1 $f4, 0x10(%[block])                   \r\n"
+        "sdc1 $f4, 0x18(%[block])                   \r\n"
+        "sdc1 $f4, 0x20(%[block])                   \r\n"
+        "sdc1 $f4, 0x28(%[block])                   \r\n"
+        "sdc1 $f4, 0x30(%[block])                   \r\n"
+        "sdc1 $f4, 0x38(%[block])                   \r\n"
+        "sdc1 $f4, 0x40(%[block])                   \r\n"
+        "sdc1 $f4, 0x48(%[block])                   \r\n"
+        "sdc1 $f4, 0x50(%[block])                   \r\n"
+        "sdc1 $f4, 0x58(%[block])                   \r\n"
+        "sdc1 $f4, 0x60(%[block])                   \r\n"
+        "sdc1 $f4, 0x68(%[block])                   \r\n"
+        "sdc1 $f4, 0x70(%[block])                   \r\n"
+        "sdc1 $f4, 0x78(%[block])                   \r\n"
+        "dli $11, 0x6                               \r\n"
+        "lwc1 $f6, 0x0(%[dst])                      \r\n"
+        "dmtc1 $11, $f20                            \r\n"
+        "gslwxc1 $f0, 0x0(%[dst], %[stride])        \r\n"
+        "psrah $f10, $f10, $f20                     \r\n"
+        "psrah $f8, $f8, $f20                       \r\n"
+        "punpcklbh $f6, $f6, $f4                    \r\n"
+        "punpcklbh $f0, $f0, $f4                    \r\n"
+        "paddh $f6, $f6, $f10                       \r\n"
+        "paddh $f0, $f0, $f8                        \r\n"
+        "packushb $f6, $f6, $f4                     \r\n"
+        "packushb $f0, $f0, $f4                     \r\n"
+        "swc1 $f6, 0x0(%[dst])                      \r\n"
+        "gsswxc1 $f0, 0x0(%[dst], %[stride])        \r\n"
+        "daddu %[dst], %[dst], %[stride]            \r\n"
+        "daddu %[dst], %[dst], %[stride]            \r\n"
+        "lwc1 $f6, 0x0(%[dst])                      \r\n"
+        "gslwxc1 $f0, 0x0(%[dst], %[stride])        \r\n"
+        "psrah $f12, $f12, $f20                     \r\n"
+        "psrah $f2, $f2, $f20                       \r\n"
+        "punpcklbh $f6, $f6, $f4                    \r\n"
+        "punpcklbh $f0, $f0, $f4                    \r\n"
+        "paddh $f6, $f6, $f12                       \r\n"
+        "paddh $f0, $f0, $f2                        \r\n"
+        "packushb $f6, $f6, $f4                     \r\n"
+        "packushb $f0, $f0, $f4                     \r\n"
+        "swc1 $f6, 0x0(%[dst])                      \r\n"
+        "gsswxc1 $f0, 0x0(%[dst], %[stride])        \r\n"
+        "ldc1 $f10, 0x0($29)                        \r\n"
+        "ldc1 $f8, 0x10($29)                        \r\n"
+        "dmtc1 $8, $f12                             \r\n"
+        "daddu %[dst], %[dst], %[stride]            \r\n"
+        "daddu %[dst], %[dst], %[stride]            \r\n"
+        "lwc1 $f6, 0x0(%[dst])                      \r\n"
+        "gslwxc1 $f0, 0x0(%[dst], %[stride])        \r\n"
+        "psrah $f14, $f14, $f20                     \r\n"
+        "psrah $f10, $f10, $f20                     \r\n"
+        "punpcklbh $f6, $f6, $f4                    \r\n"
+        "punpcklbh $f0, $f0, $f4                    \r\n"
+        "paddh $f6, $f6, $f14                       \r\n"
+        "paddh $f0, $f0, $f10                       \r\n"
+        "packushb $f6, $f6, $f4                     \r\n"
+        "packushb $f0, $f0, $f4                     \r\n"
+        "swc1 $f6, 0x0(%[dst])                      \r\n"
+        "gsswxc1 $f0, 0x0(%[dst], %[stride])        \r\n"
+        "daddu %[dst], %[dst], %[stride]            \r\n"
+        "daddu %[dst], %[dst], %[stride]            \r\n"
+        "lwc1 $f6, 0x0(%[dst])                      \r\n"
+        "gslwxc1 $f0, 0x0(%[dst], %[stride])        \r\n"
+        "psrah $f8, $f8, $f20                       \r\n"
+        "psrah $f12, $f12, $f20                     \r\n"
+        "punpcklbh $f6, $f6, $f4                    \r\n"
+        "punpcklbh $f0, $f0, $f4                    \r\n"
+        "paddh $f6, $f6, $f8                        \r\n"
+        "paddh $f0, $f0, $f12                       \r\n"
+        "packushb $f6, $f6, $f4                     \r\n"
+        "packushb $f0, $f0, $f4                     \r\n"
+        "swc1 $f6, 0x0(%[dst])                      \r\n"
+        "gsswxc1 $f0, 0x0(%[dst], %[stride])        \r\n"
+        "dmtc1 $12, $f2                             \r\n"
+        "dmtc1 $9, $f12                             \r\n"
+        "ldc1 $f8, 0x18($29)                        \r\n"
+        "mov.d $f10, $f8                            \r\n"
+        "psrah $f8, $f8, $f16                       \r\n"
+        "psrah $f14, $f22, $f16                     \r\n"
+        "paddh $f14, $f14, $f22                     \r\n"
+        "paddh $f8, $f8, $f10                       \r\n"
+        "paddh $f14, $f14, $f30                     \r\n"
+        "paddh $f8, $f8, $f22                       \r\n"
+        "psubh $f14, $f14, $f10                     \r\n"
+        "paddh $f8, $f8, $f2                        \r\n"
+        "psubh $f10, $f10, $f2                      \r\n"
+        "psubh $f6, $f22, $f2                       \r\n"
+        "psrah $f2, $f2, $f16                       \r\n"
+        "paddh $f10, $f10, $f30                     \r\n"
+        "psubh $f6, $f6, $f30                       \r\n"
+        "psrah $f4, $f30, $f16                      \r\n"
+        "psubh $f10, $f10, $f2                      \r\n"
+        "psubh $f6, $f6, $f4                        \r\n"
+        "mov.d $f4, $f8                             \r\n"
+        "psrah $f8, $f8, $f18                       \r\n"
+        "psrah $f2, $f14, $f18                      \r\n"
+        "paddh $f8, $f8, $f6                        \r\n"
+        "paddh $f2, $f2, $f10                       \r\n"
+        "psrah $f10, $f10, $f18                     \r\n"
+        "psrah $f6, $f6, $f18                       \r\n"
+        "psubh $f10, $f10, $f14                     \r\n"
+        "psubh $f4, $f4, $f6                        \r\n"
+        "mov.d $f6, $f26                            \r\n"
+        "psrah $f0, $f26, $f16                      \r\n"
+        "psrah $f14, $f12, $f16                     \r\n"
+        "paddh $f0, $f0, $f12                       \r\n"
+        "psubh $f14, $f14, $f6                      \r\n"
+        "ldc1 $f12, 0x8($29)                        \r\n"
+        "dmtc1 $14, $f6                             \r\n"
+        "paddh $f6, $f6, $f12                       \r\n"
+        "paddh $f12, $f12, $f12                     \r\n"
+        "paddh $f0, $f0, $f6                        \r\n"
+        "psubh $f12, $f12, $f6                      \r\n"
+        "paddh $f6, $f6, $f6                        \r\n"
+        "paddh $f14, $f14, $f12                     \r\n"
+        "psubh $f6, $f6, $f0                        \r\n"
+        "paddh $f12, $f12, $f12                     \r\n"
+        "paddh $f4, $f4, $f0                        \r\n"
+        "psubh $f12, $f12, $f14                     \r\n"
+        "paddh $f0, $f0, $f0                        \r\n"
+        "paddh $f10, $f10, $f14                     \r\n"
+        "psubh $f0, $f0, $f4                        \r\n"
+        "paddh $f14, $f14, $f14                     \r\n"
+        "paddh $f2, $f2, $f12                       \r\n"
+        "psubh $f14, $f14, $f10                     \r\n"
+        "paddh $f12, $f12, $f12                     \r\n"
+        "paddh $f8, $f8, $f6                        \r\n"
+        "psubh $f12, $f12, $f2                      \r\n"
+        "paddh $f6, $f6, $f6                        \r\n"
+        "sdc1 $f12, 0x8($29)                        \r\n"
+        "psubh $f6, $f6, $f8                        \r\n"
+        "sdc1 $f14, 0x18($29)                       \r\n"
+        "dmfc1 $9, $f0                              \r\n"
+        "xor $f0, $f0, $f0                          \r\n"
+        "lwc1 $f12, 0x0($10)                        \r\n"
+        "gslwxc1 $f14, 0x0($10, %[stride])          \r\n"
+        "psrah $f4, $f4, $f20                       \r\n"
+        "psrah $f10, $f10, $f20                     \r\n"
+        "punpcklbh $f12, $f12, $f0                  \r\n"
+        "punpcklbh $f14, $f14, $f0                  \r\n"
+        "paddh $f12, $f12, $f4                      \r\n"
+        "paddh $f14, $f14, $f10                     \r\n"
+        "packushb $f12, $f12, $f0                   \r\n"
+        "packushb $f14, $f14, $f0                   \r\n"
+        "swc1 $f12, 0x0($10)                        \r\n"
+        "gsswxc1 $f14, 0x0($10, %[stride])          \r\n"
+        "daddu $10, $10, %[stride]                  \r\n"
+        "daddu $10, $10, %[stride]                  \r\n"
+        "lwc1 $f12, 0x0($10)                        \r\n"
+        "gslwxc1 $f14, 0x0($10, %[stride])          \r\n"
+        "psrah $f2, $f2, $f20                       \r\n"
+        "psrah $f8, $f8, $f20                       \r\n"
+        "punpcklbh $f12, $f12, $f0                  \r\n"
+        "punpcklbh $f14, $f14, $f0                  \r\n"
+        "paddh $f12, $f12, $f2                      \r\n"
+        "paddh $f14, $f14, $f8                      \r\n"
+        "packushb $f12, $f12, $f0                   \r\n"
+        "packushb $f14, $f14, $f0                   \r\n"
+        "swc1 $f12, 0x0($10)                        \r\n"
+        "gsswxc1 $f14, 0x0($10, %[stride])          \r\n"
+        "ldc1 $f4, 0x8($29)                         \r\n"
+        "ldc1 $f10, 0x18($29)                       \r\n"
+        "daddu $10, $10, %[stride]                  \r\n"
+        "dmtc1 $9, $f2                              \r\n"
+        "daddu $10, $10, %[stride]                  \r\n"
+        "lwc1 $f12, 0x0($10)                        \r\n"
+        "gslwxc1 $f14, 0x0($10, %[stride])          \r\n"
+        "psrah $f6, $f6, $f20                       \r\n"
+        "psrah $f4, $f4, $f20                       \r\n"
+        "punpcklbh $f12, $f12, $f0                  \r\n"
+        "punpcklbh $f14, $f14, $f0                  \r\n"
+        "paddh $f12, $f12, $f6                      \r\n"
+        "paddh $f14, $f14, $f4                      \r\n"
+        "packushb $f12, $f12, $f0                   \r\n"
+        "packushb $f14, $f14, $f0                   \r\n"
+        "swc1 $f12, 0x0($10)                        \r\n"
+        "gsswxc1 $f14, 0x0($10, %[stride])          \r\n"
+        "daddu $10, $10, %[stride]                  \r\n"
+        "daddu $10, $10, %[stride]                  \r\n"
+        "lwc1 $f12, 0x0($10)                        \r\n"
+        "gslwxc1 $f14, 0x0($10, %[stride])          \r\n"
+        "psrah $f10, $f10, $f20                     \r\n"
+        "psrah $f2, $f2, $f20                       \r\n"
+        "punpcklbh $f12, $f12, $f0                  \r\n"
+        "punpcklbh $f14, $f14, $f0                  \r\n"
+        "paddh $f12, $f12, $f10                     \r\n"
+        "paddh $f14, $f14, $f2                      \r\n"
+        "packushb $f12, $f12, $f0                   \r\n"
+        "packushb $f14, $f14, $f0                   \r\n"
+        "swc1 $f12, 0x0($10)                        \r\n"
+        "gsswxc1 $f14, 0x0($10, %[stride])          \r\n"
+        "daddiu $29, $29, 0x20                      \r\n"
+        ::[dst]"r"(dst),[block]"r"(block),[stride]"r"((uint64_t)stride)
+        :"$8","$9","$10","$11","$12","$13","$14","$15","$29","$f0","$f2","$f4",
+         "$f8","$f10","$f12","$f14","$f16","$f18","$f20","$f22","$f24","$f26",
+         "$f28","$f30"
+    );
+
+    memset(block, 0, 128);
+}
+
+void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
+{
+    __asm__ volatile (
+        "lh $8, 0x0(%[block])                       \r\n"
+        "sd $0, 0x0(%[block])                       \r\n"
+        "daddiu $8, $8, 0x20                        \r\n"
+        "daddu $10, %[stride], %[stride]            \r\n"
+        "dsra $8, $8, 0x6                           \r\n"
+        "xor $f2, $f2, $f2                          \r\n"
+        "mtc1 $8, $f0                               \r\n"
+        "pshufh $f0, $f0, $f2                       \r\n"
+        "daddu $8, $10, %[stride]                   \r\n"
+        "psubh $f2, $f2, $f0                        \r\n"
+        "packushb $f0, $f0, $f0                     \r\n"
+        "packushb $f2, $f2, $f2                     \r\n"
+        "lwc1 $f4, 0x0(%[dst])                      \r\n"
+        "gslwxc1 $f6, 0x0(%[dst], %[stride])        \r\n"
+        "gslwxc1 $f8, 0x0(%[dst], $10)              \r\n"
+        "gslwxc1 $f10, 0x0(%[dst], $8)              \r\n"
+        "paddusb $f4, $f4, $f0                      \r\n"
+        "paddusb $f6, $f6, $f0                      \r\n"
+        "paddusb $f8, $f8, $f0                      \r\n"
+        "paddusb $f10, $f10, $f0                    \r\n"
+        "psubusb $f4, $f4, $f2                      \r\n"
+        "psubusb $f6, $f6, $f2                      \r\n"
+        "psubusb $f8, $f8, $f2                      \r\n"
+        "psubusb $f10, $f10, $f2                    \r\n"
+        "swc1 $f4, 0x0(%[dst])                      \r\n"
+        "gsswxc1 $f6, 0x0(%[dst], %[stride])        \r\n"
+        "gsswxc1 $f8, 0x0(%[dst], $10)              \r\n"
+        "gsswxc1 $f10, 0x0(%[dst], $8)              \r\n"
+        ::[dst]"r"(dst),[block]"r"(block),[stride]"r"((uint64_t)stride)
+        : "$8","$10","$f0","$f2","$f4","$f6","$f8","$f10"
+    );
+}
+
+void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
+{
+    __asm__ volatile (
+        "lh $8, 0x0(%[block])                       \r\n"
+        "sd $0, 0x0(%[block])                       \r\n"
+        "daddiu $8, $8, 0x20                        \r\n"
+        "daddu $10, %[stride], %[stride]            \r\n"
+        "dsra $8, $8, 0x6                           \r\n"
+        "xor $f2, $f2, $f2                          \r\n"
+        "mtc1 $8, $f0                               \r\n"
+        "pshufh $f0, $f0, $f2                       \r\n"
+        "daddu $8, $10, %[stride]                   \r\n"
+        "psubh $f2, $f2, $f0                        \r\n"
+        "packushb $f0, $f0, $f0                     \r\n"
+        "packushb $f2, $f2, $f2                     \r\n"
+        "ldc1 $f4, 0x0(%[dst])                      \r\n"
+        "gsldxc1 $f6, 0x0(%[dst], %[stride])        \r\n"
+        "gsldxc1 $f8, 0x0(%[dst], $10)              \r\n"
+        "gsldxc1 $f10, 0x0(%[dst], $8)              \r\n"
+        "paddusb $f4, $f4, $f0                      \r\n"
+        "paddusb $f6, $f6, $f0                      \r\n"
+        "paddusb $f8, $f8, $f0                      \r\n"
+        "paddusb $f10, $f10, $f0                    \r\n"
+        "psubusb $f4, $f4, $f2                      \r\n"
+        "psubusb $f6, $f6, $f2                      \r\n"
+        "psubusb $f8, $f8, $f2                      \r\n"
+        "psubusb $f10, $f10, $f2                    \r\n"
+        "sdc1 $f4, 0x0(%[dst])                      \r\n"
+        "gssdxc1 $f6, 0x0(%[dst], %[stride])        \r\n"
+        "gssdxc1 $f8, 0x0(%[dst], $10)              \r\n"
+        "daddu $9, $10, $10                         \r\n"
+        "gssdxc1 $f10, 0x0(%[dst], $8)              \r\n"
+        "daddu %[dst], %[dst], $9                   \r\n"
+        "ldc1 $f4, 0x0(%[dst])                      \r\n"
+        "gsldxc1 $f6, 0x0(%[dst], %[stride])        \r\n"
+        "gsldxc1 $f8, 0x0(%[dst], $10)              \r\n"
+        "gsldxc1 $f10, 0x0(%[dst], $8)              \r\n"
+        "paddusb $f4, $f4, $f0                      \r\n"
+        "paddusb $f6, $f6, $f0                      \r\n"
+        "paddusb $f8, $f8, $f0                      \r\n"
+        "paddusb $f10, $f10, $f0                    \r\n"
+        "psubusb $f4, $f4, $f2                      \r\n"
+        "psubusb $f6, $f6, $f2                      \r\n"
+        "psubusb $f8, $f8, $f2                      \r\n"
+        "psubusb $f10, $f10, $f2                    \r\n"
+        "sdc1 $f4, 0x0(%[dst])                      \r\n"
+        "gssdxc1 $f6, 0x0(%[dst], %[stride])        \r\n"
+        "gssdxc1 $f8, 0x0(%[dst], $10)              \r\n"
+        "gssdxc1 $f10, 0x0(%[dst], $8)              \r\n"
+        ::[dst]"r"(dst),[block]"r"(block),[stride]"r"((uint64_t)stride)
+        : "$8","$9","$10","$f0","$f2","$f4","$f6","$f8","$f10"
+    );
+}
+
+void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
+        int16_t *block, int stride, const uint8_t nnzc[15*8])
+{
+    int i;
+    for(i=0; i<16; i++){
+        int nnz = nnzc[ scan8[i] ];
+        if(nnz){
+            if(nnz==1 && ((int16_t*)block)[i*16])
+                ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
+                        stride);
+            else
+                ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16,
+                        stride);
+        }
+    }
+}
+
+void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
+        int16_t *block, int stride, const uint8_t nnzc[15*8])
+{
+    int i;
+    for(i=0; i<16; i++){
+        if(nnzc[ scan8[i] ])
+            ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride);
+        else if(((int16_t*)block)[i*16])
+            ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
+                    stride);
+    }
+}
+
+void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
+        int16_t *block, int stride, const uint8_t nnzc[15*8])
+{
+    int i;
+    for(i=0; i<16; i+=4){
+        int nnz = nnzc[ scan8[i] ];
+        if(nnz){
+            if(nnz==1 && ((int16_t*)block)[i*16])
+                ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i],
+                        block + i*16, stride);
+            else
+                ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16,
+                        stride);
+        }
+    }
+}
+
+void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
+        int16_t *block, int stride, const uint8_t nnzc[15*8])
+{
+    int i, j;
+    for(j=1; j<3; j++){
+        for(i=j*16; i<j*16+4; i++){
+            if(nnzc[ scan8[i] ])
+                ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
+                        block + i*16, stride);
+            else if(((int16_t*)block)[i*16])
+                ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
+                        block + i*16, stride);
+        }
+    }
+}
+
+void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
+        int16_t *block, int stride, const uint8_t nnzc[15*8])
+{
+    int i, j;
+
+    for(j=1; j<3; j++){
+        for(i=j*16; i<j*16+4; i++){
+            if(nnzc[ scan8[i] ])
+                ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
+                        block + i*16, stride);
+            else if(((int16_t*)block)[i*16])
+                ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
+                        block + i*16, stride);
+        }
+    }
+
+    for(j=1; j<3; j++){
+        for(i=j*16+4; i<j*16+8; i++){
+            if(nnzc[ scan8[i+4] ])
+                ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4],
+                        block + i*16, stride);
+            else if(((int16_t*)block)[i*16])
+                ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4],
+                        block + i*16, stride);
+        }
+    }
+}
+
+void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input,
+        int qmul)
+{
+    __asm__ volatile (
+        ".set noreorder                                 \r\n"
+        "dli $10, 0x8                                   \r\n"
+        "ldc1 $f6, 0x18(%[input])                       \r\n"
+        "dmtc1 $10, $f16                                \r\n"
+        "ldc1 $f4, 0x10(%[input])                       \r\n"
+        "dli $10, 0x20                                  \r\n"
+        "ldc1 $f2, 0x8(%[input])                        \r\n"
+        "dmtc1 $10, $f18                                \r\n"
+        "ldc1 $f0, 0x0(%[input])                        \r\n"
+        "mov.d $f8, $f6                                 \r\n"
+        "paddh $f6, $f6, $f4                            \r\n"
+        "psubh $f4, $f4, $f8                            \r\n"
+        "mov.d $f8, $f2                                 \r\n"
+        "paddh $f2, $f2, $f0                            \r\n"
+        "psubh $f0, $f0, $f8                            \r\n"
+        "mov.d $f8, $f6                                 \r\n"
+        "paddh $f6, $f6, $f2                            \r\n"
+        "psubh $f2, $f2, $f8                            \r\n"
+        "mov.d $f8, $f4                                 \r\n"
+        "paddh $f4, $f4, $f0                            \r\n"
+        "psubh $f0, $f0, $f8                            \r\n"
+        "mov.d $f8, $f6                                 \r\n"
+        "punpcklhw $f6, $f6, $f2                        \r\n"
+        "punpckhhw $f8, $f8, $f2                        \r\n"
+        "punpckhhw $f2, $f0, $f4                        \r\n"
+        "punpcklhw $f0, $f0, $f4                        \r\n"
+        "punpckhwd $f4, $f6, $f0                        \r\n"
+        "punpcklwd $f6, $f6, $f0                        \r\n"
+        "mov.d $f0, $f8                                 \r\n"
+        "punpcklwd $f8, $f8, $f2                        \r\n"
+        "punpckhwd $f0, $f0, $f2                        \r\n"
+        "mov.d $f2, $f0                                 \r\n"
+        "paddh $f0, $f0, $f8                            \r\n"
+        "psubh $f8, $f8, $f2                            \r\n"
+        "mov.d $f2, $f4                                 \r\n"
+        "paddh $f4, $f4, $f6                            \r\n"
+        "psubh $f6, $f6, $f2                            \r\n"
+        "mov.d $f2, $f0                                 \r\n"
+        "paddh $f0, $f0, $f4                            \r\n"
+        "psubh $f4, $f4, $f2                            \r\n"
+        "mov.d $f2, $f8                                 \r\n"
+        "daddiu $10, %[qmul], -0x7fff                   \r\n"
+        "paddh $f8, $f8, $f6                            \r\n"
+        "bgtz $10, 1f                                   \r\n"
+        "psubh $f6, $f6, $f2                            \r\n"
+        "ori $10, $0, 0x80                              \r\n"
+        "dsll $10, $10, 0x10                            \r\n"
+        "punpckhhw $f2, $f0, %[ff_pw_1]                 \r\n"
+        "daddu %[qmul], %[qmul], $10                    \r\n"
+        "punpcklhw $f0, $f0, %[ff_pw_1]                 \r\n"
+        "punpckhhw $f10, $f4, %[ff_pw_1]                \r\n"
+        "punpcklhw $f4, $f4, %[ff_pw_1]                 \r\n"
+        "mtc1 %[qmul], $f14                             \r\n"
+        "punpcklwd $f14, $f14, $f14                     \r\n"
+        "pmaddhw $f0, $f0, $f14                         \r\n"
+        "pmaddhw $f4, $f4, $f14                         \r\n"
+        "pmaddhw $f2, $f2, $f14                         \r\n"
+        "pmaddhw $f10, $f10, $f14                       \r\n"
+        "psraw $f0, $f0, $f16                           \r\n"
+        "psraw $f4, $f4, $f16                           \r\n"
+        "psraw $f2, $f2, $f16                           \r\n"
+        "psraw $f10, $f10, $f16                         \r\n"
+        "packsswh $f0, $f0, $f2                         \r\n"
+        "packsswh $f4, $f4, $f10                        \r\n"
+        "mfc1 $9, $f0                                   \r\n"
+        "dsrl $f0, $f0, $f18                            \r\n"
+        "mfc1 %[input], $f0                             \r\n"
+        "sh $9, 0x0(%[output])                          \r\n"
+        "sh %[input], 0x80(%[output])                   \r\n"
+        "dsrl $9, $9, 0x10                              \r\n"
+        "dsrl %[input], %[input], 0x10                  \r\n"
+        "sh $9, 0x20(%[output])                         \r\n"
+        "sh %[input], 0xa0(%[output])                   \r\n"
+        "mfc1 $9, $f4                                   \r\n"
+        "dsrl $f4, $f4, $f18                            \r\n"
+        "mfc1 %[input], $f4                             \r\n"
+        "sh $9, 0x40(%[output])                         \r\n"
+        "sh %[input], 0xc0(%[output])                   \r\n"
+        "dsrl $9, $9, 0x10                              \r\n"
+        "dsrl %[input], %[input], 0x10                  \r\n"
+        "sh $9, 0x60(%[output])                         \r\n"
+        "sh %[input], 0xe0(%[output])                   \r\n"
+        "punpckhhw $f2, $f6, %[ff_pw_1]                 \r\n"
+        "punpcklhw $f6, $f6, %[ff_pw_1]                 \r\n"
+        "punpckhhw $f10, $f8, %[ff_pw_1]                \r\n"
+        "punpcklhw $f8, $f8, %[ff_pw_1]                 \r\n"
+        "mtc1 %[qmul], $f14                             \r\n"
+        "punpcklwd $f14, $f14, $f14                     \r\n"
+        "pmaddhw $f6, $f6, $f14                         \r\n"
+        "pmaddhw $f8, $f8, $f14                         \r\n"
+        "pmaddhw $f2, $f2, $f14                         \r\n"
+        "pmaddhw $f10, $f10, $f14                       \r\n"
+        "psraw $f6, $f6, $f16                           \r\n"
+        "psraw $f8, $f8, $f16                           \r\n"
+        "psraw $f2, $f2, $f16                           \r\n"
+        "psraw $f10, $f10, $f16                         \r\n"
+        "packsswh $f6, $f6, $f2                         \r\n"
+        "packsswh $f8, $f8, $f10                        \r\n"
+        "mfc1 $9, $f6                                   \r\n"
+        "dsrl $f6, $f6, $f18                            \r\n"
+        "mfc1 %[input], $f6                             \r\n"
+        "sh $9, 0x100(%[output])                        \r\n"
+        "sh %[input], 0x180(%[output])                  \r\n"
+        "dsrl $9, $9, 0x10                              \r\n"
+        "dsrl %[input], %[input], 0x10                  \r\n"
+        "sh $9, 0x120(%[output])                        \r\n"
+        "sh %[input], 0x1a0(%[output])                  \r\n"
+        "mfc1 $9, $f8                                   \r\n"
+        "dsrl $f8, $f8, $f18                            \r\n"
+        "mfc1 %[input], $f8                             \r\n"
+        "sh $9, 0x140(%[output])                        \r\n"
+        "sh %[input], 0x1c0(%[output])                  \r\n"
+        "dsrl $9, $9, 0x10                              \r\n"
+        "dsrl %[input], %[input], 0x10                  \r\n"
+        "sh $9, 0x160(%[output])                        \r\n"
+        "jr $31                                         \r\n"
+        "sh %[input], 0x1e0(%[output])                  \r\n"
+        "1:                                             \r\n"
+        "ori $10, $0, 0x1f                              \r\n"
+        "clz $9, %[qmul]                                \r\n"
+        "ori %[input], $0, 0x7                          \r\n"
+        "dsubu $9, $10, $9                              \r\n"
+        "ori $10, $0, 0x80                              \r\n"
+        "dsll $10, $10, 0x10                            \r\n"
+        "daddu %[qmul], %[qmul], $10                    \r\n"
+        "dsubu $10, $9, %[input]                        \r\n"
+        "movn $9, %[input], $10                         \r\n"
+        "daddiu %[input], %[input], 0x1                 \r\n"
+        "andi $10, $9, 0xff                             \r\n"
+        "dsrlv %[qmul], %[qmul], $10                    \r\n"
+        "dsubu %[input], %[input], $9                   \r\n"
+        "mtc1 %[input], $f12                            \r\n"
+        "punpckhhw $f2, $f0, %[ff_pw_1]                 \r\n"
+        "punpcklhw $f0, $f0, %[ff_pw_1]                 \r\n"
+        "punpckhhw $f10, $f4, %[ff_pw_1]                \r\n"
+        "punpcklhw $f4, $f4, %[ff_pw_1]                 \r\n"
+        "mtc1 %[qmul], $f14                             \r\n"
+        "punpcklwd $f14, $f14, $f14                     \r\n"
+        "pmaddhw $f0, $f0, $f14                         \r\n"
+        "pmaddhw $f4, $f4, $f14                         \r\n"
+        "pmaddhw $f2, $f2, $f14                         \r\n"
+        "pmaddhw $f10, $f10, $f14                       \r\n"
+        "psraw $f0, $f0, $f12                           \r\n"
+        "psraw $f4, $f4, $f12                           \r\n"
+        "psraw $f2, $f2, $f12                           \r\n"
+        "psraw $f10, $f10, $f12                         \r\n"
+        "packsswh $f0, $f0, $f2                         \r\n"
+        "packsswh $f4, $f4, $f10                        \r\n"
+        "mfc1 $9, $f0                                   \r\n"
+        "dsrl $f0, $f0, $f18                            \r\n"
+        "sh $9, 0x0(%[output])                          \r\n"
+        "mfc1 %[input], $f0                             \r\n"
+        "dsrl $9, $9, 0x10                              \r\n"
+        "sh %[input], 0x80(%[output])                   \r\n"
+        "sh $9, 0x20(%[output])                         \r\n"
+        "dsrl %[input], %[input], 0x10                  \r\n"
+        "mfc1 $9, $f4                                   \r\n"
+        "sh %[input], 0xa0(%[output])                   \r\n"
+        "dsrl $f4, $f4, $f18                            \r\n"
+        "sh $9, 0x40(%[output])                         \r\n"
+        "mfc1 %[input], $f4                             \r\n"
+        "dsrl $9, $9, 0x10                              \r\n"
+        "sh %[input], 0xc0(%[output])                   \r\n"
+        "sh $9, 0x60(%[output])                         \r\n"
+        "dsrl %[input], %[input], 0x10                  \r\n"
+        "sh %[input], 0xe0(%[output])                   \r\n"
+        "punpckhhw $f2, $f6, %[ff_pw_1]                 \r\n"
+        "punpcklhw $f6, $f6, %[ff_pw_1]                 \r\n"
+        "punpckhhw $f10, $f8, %[ff_pw_1]                \r\n"
+        "punpcklhw $f8, $f8, %[ff_pw_1]                 \r\n"
+        "mtc1 %[qmul], $f14                             \r\n"
+        "punpcklwd $f14, $f14, $f14                     \r\n"
+        "pmaddhw $f6, $f6, $f14                         \r\n"
+        "pmaddhw $f8, $f8, $f14                         \r\n"
+        "pmaddhw $f2, $f2, $f14                         \r\n"
+        "pmaddhw $f10, $f10, $f14                       \r\n"
+        "psraw $f6, $f6, $f12                           \r\n"
+        "psraw $f8, $f8, $f12                           \r\n"
+        "psraw $f2, $f2, $f12                           \r\n"
+        "psraw $f10, $f10, $f12                         \r\n"
+        "packsswh $f6, $f6, $f2                         \r\n"
+        "packsswh $f8, $f8, $f10                        \r\n"
+        "mfc1 $9, $f6                                   \r\n"
+        "dsrl $f6, $f6, $f18                            \r\n"
+        "mfc1 %[input], $f6                             \r\n"
+        "sh $9, 0x100(%[output])                        \r\n"
+        "sh %[input], 0x180(%[output])                  \r\n"
+        "dsrl $9, $9, 0x10                              \r\n"
+        "dsrl %[input], %[input], 0x10                  \r\n"
+        "sh $9, 0x120(%[output])                        \r\n"
+        "sh %[input], 0x1a0(%[output])                  \r\n"
+        "mfc1 $9, $f8                                   \r\n"
+        "dsrl $f8, $f8, $f18                            \r\n"
+        "mfc1 %[input], $f8                             \r\n"
+        "sh $9, 0x140(%[output])                        \r\n"
+        "sh %[input], 0x1c0(%[output])                  \r\n"
+        "dsrl $9, $9, 0x10                              \r\n"
+        "dsrl %[input], %[input], 0x10                  \r\n"
+        "sh $9, 0x160(%[output])                        \r\n"
+        "sh %[input], 0x1e0(%[output])                  \r\n"
+        ".set reorder                                   \r\n"
+        ::[output]"r"(output),[input]"r"(input),[qmul]"r"((uint64_t)qmul),
+          [ff_pw_1]"f"(ff_pw_1)
+        : "$9","$10","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
+          "$f18"
+    );
+}
+
+void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
+{
+    int temp[8];
+    int t[8];
+
+    temp[0] = block[0] + block[16];
+    temp[1] = block[0] - block[16];
+    temp[2] = block[32] + block[48];
+    temp[3] = block[32] - block[48];
+    temp[4] = block[64] + block[80];
+    temp[5] = block[64] - block[80];
+    temp[6] = block[96] + block[112];
+    temp[7] = block[96] - block[112];
+
+    t[0] = temp[0] + temp[4] + temp[2] + temp[6];
+    t[1] = temp[0] - temp[4] + temp[2] - temp[6];
+    t[2] = temp[0] - temp[4] - temp[2] + temp[6];
+    t[3] = temp[0] + temp[4] - temp[2] - temp[6];
+    t[4] = temp[1] + temp[5] + temp[3] + temp[7];
+    t[5] = temp[1] - temp[5] + temp[3] - temp[7];
+    t[6] = temp[1] - temp[5] - temp[3] + temp[7];
+    t[7] = temp[1] + temp[5] - temp[3] - temp[7];
+
+    block[  0]= (t[0]*qmul + 128) >> 8;
+    block[ 32]= (t[1]*qmul + 128) >> 8;
+    block[ 64]= (t[2]*qmul + 128) >> 8;
+    block[ 96]= (t[3]*qmul + 128) >> 8;
+    block[ 16]= (t[4]*qmul + 128) >> 8;
+    block[ 48]= (t[5]*qmul + 128) >> 8;
+    block[ 80]= (t[6]*qmul + 128) >> 8;
+    block[112]= (t[7]*qmul + 128) >> 8;
+}
+
+void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
+{
+    int a,b,c,d;
+
+    d = block[0] - block[16];
+    a = block[0] + block[16];
+    b = block[32] - block[48];
+    c = block[32] + block[48];
+    block[0] = ((a+c)*qmul) >> 7;
+    block[16]= ((d+b)*qmul) >> 7;
+    block[32]= ((a-c)*qmul) >> 7;
+    block[48]= ((d-b)*qmul) >> 7;
+}
+
 void ff_h264_weight_pixels16_8_mmi(uint8_t *block, int stride,
         int height, int log2_denom, int weight, int offset)
 {
@@ -276,3 +1282,1219 @@ void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
         );
     }
 }
+
+static void inline chroma_inter_body_mmi(uint8_t *pix, int stride,
+        int alpha, int beta, int8_t *tc0)
+{
+    __asm__ volatile (
+        "xor $f16, $f16, $f16                           \r\n"
+        "mtc1 %[alpha], $f8                             \r\n"
+        "mtc1 %[beta], $f10                             \r\n"
+        "pshufh $f8, $f8, $f16                          \r\n"
+        "pshufh $f10, $f10, $f16                        \r\n"
+        "packushb $f8, $f8, $f8                         \r\n"
+        "packushb $f10, $f10, $f10                      \r\n"
+        "psubusb $f12, $f4, $f2                         \r\n"
+        "psubusb $f14, $f2, $f4                         \r\n"
+        "or $f14, $f14, $f12                            \r\n"
+        "psubusb $f14, $f14, $f8                        \r\n"
+        "psubusb $f12, $f2, $f0                         \r\n"
+        "psubusb $f8, $f0, $f2                          \r\n"
+        "or $f8, $f8, $f12                              \r\n"
+        "psubusb $f8, $f8, $f10                         \r\n"
+        "or $f14, $f14, $f8                             \r\n"
+        "psubusb $f12, $f4, $f6                         \r\n"
+        "psubusb $f8, $f6, $f4                          \r\n"
+        "or $f8, $f8, $f12                              \r\n"
+        "psubusb $f8, $f8, $f10                         \r\n"
+        "or $f14, $f14, $f8                             \r\n"
+        "xor $f12, $f12, $f12                           \r\n"
+        "pcmpeqb $f14, $f14, $f12                       \r\n"
+        "lwc1 $f12, 0x0(%[tc0])                         \r\n"
+        "punpcklbh $f12, $f12, $f12                     \r\n"
+        "and $f14, $f14, $f12                           \r\n"
+        "pcmpeqb $f8, $f8, $f8                          \r\n"
+        "xor $f10, $f2, $f4                             \r\n"
+        "xor $f6, $f6, $f8                              \r\n"
+        "and $f10, $f10, %[ff_pb_1]                     \r\n"
+        "pavgb $f6, $f6, $f0                            \r\n"
+        "xor $f8, $f8, $f2                              \r\n"
+        "pavgb $f6, $f6, %[ff_pb_3]                     \r\n"
+        "pavgb $f8, $f8, $f4                            \r\n"
+        "pavgb $f6, $f6, $f10                           \r\n"
+        "paddusb $f6, $f6, $f8                          \r\n"
+        "psubusb $f12, %[ff_pb_A1], $f6                 \r\n"
+        "psubusb $f6, $f6, %[ff_pb_A1]                  \r\n"
+        "pminub $f12, $f12, $f14                        \r\n"
+        "pminub $f6, $f6, $f14                          \r\n"
+        "psubusb $f2, $f2, $f12                         \r\n"
+        "psubusb $f4, $f4, $f6                          \r\n"
+        "paddusb $f2, $f2, $f6                          \r\n"
+        "paddusb $f4, $f4, $f12                         \r\n"
+        ::[pix]"r"(pix),[stride]"r"((int64_t)stride),
+          [alpha]"r"((int64_t)alpha),[beta]"r"((int64_t)beta),[tc0]"r"(tc0),
+          [ff_pb_1]"f"(ff_pb_1),[ff_pb_3]"f"(ff_pb_3),[ff_pb_A1]"f"(ff_pb_A1)
+        : "$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16"
+    );
+}
+
+static void inline chroma_intra_body_mmi(uint8_t *pix, int stride,
+        int alpha, int beta)
+{
+    __asm__ volatile (
+        "xor $f16, $f16, $f16                           \r\n"
+        "mtc1 %[alpha], $f8                             \r\n"
+        "mtc1 %[beta], $f10                             \r\n"
+        "pshufh $f8, $f8, $f16                          \r\n"
+        "pshufh $f10, $f10, $f16                        \r\n"
+        "packushb $f8, $f8, $f8                         \r\n"
+        "packushb $f10, $f10, $f10                      \r\n"
+        "psubusb $f12, $f4, $f2                         \r\n"
+        "psubusb $f14, $f2, $f4                         \r\n"
+        "or $f14, $f14, $f12                            \r\n"
+        "psubusb $f14, $f14, $f8                        \r\n"
+        "psubusb $f12, $f2, $f0                         \r\n"
+        "psubusb $f8, $f0, $f2                          \r\n"
+        "or $f8, $f8, $f12                              \r\n"
+        "psubusb $f8, $f8, $f10                         \r\n"
+        "or $f14, $f14, $f8                             \r\n"
+        "psubusb $f12, $f4, $f6                         \r\n"
+        "psubusb $f8, $f6, $f4                          \r\n"
+        "or $f8, $f8, $f12                              \r\n"
+        "psubusb $f8, $f8, $f10                         \r\n"
+        "or $f14, $f14, $f8                             \r\n"
+        "xor $f12, $f12, $f12                           \r\n"
+        "pcmpeqb $f14, $f14, $f12                       \r\n"
+        "mov.d $f10, $f2                                \r\n"
+        "mov.d $f12, $f4                                \r\n"
+        "xor $f8, $f2, $f6                              \r\n"
+        "and $f8, $f8, %[ff_pb_1]                       \r\n"
+        "pavgb $f2, $f2, $f6                            \r\n"
+        "psubusb $f2, $f2, $f8                          \r\n"
+        "pavgb $f2, $f2, $f0                            \r\n"
+        "xor $f8, $f4, $f0                              \r\n"
+        "and $f8, $f8, %[ff_pb_1]                       \r\n"
+        "pavgb $f4, $f4, $f0                            \r\n"
+        "psubusb $f4, $f4, $f8                          \r\n"
+        "pavgb $f4, $f4, $f6                            \r\n"
+        "psubb $f2, $f2, $f10                           \r\n"
+        "psubb $f4, $f4, $f12                           \r\n"
+        "and $f2, $f2, $f14                             \r\n"
+        "and $f4, $f4, $f14                             \r\n"
+        "paddb $f2, $f2, $f10                           \r\n"
+        "paddb $f4, $f4, $f12                           \r\n"
+        ::[pix]"r"(pix),[stride]"r"((int64_t)stride),
+          [alpha]"r"((int64_t)alpha),[beta]"r"((int64_t)beta),
+          [ff_pb_1]"f"(ff_pb_1)
+        : "$f0","$f2","$f4","$f8","$f10","$f12","$f14","$f16"
+    );
+}
+
+void ff_deblock_v8_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
+        int8_t *tc0)
+{
+    __asm__ volatile (
+        "daddu $8, %[stride], %[stride]                 \r\n"
+        "xor $f16, $f16, $f16                           \r\n"
+        "daddu $9, %[stride], $8                        \r\n"
+        "daddiu %[alpha], %[alpha], -0x1                \r\n"
+        "dsubu $9, $0, $9                               \r\n"
+        "daddiu %[beta], %[beta], -0x1                  \r\n"
+        "daddu $9, $9, %[pix]                           \r\n"
+        "ldc1 $f4, 0x0(%[pix])                          \r\n"
+        "gsldxc1 $f0, 0x0($9, %[stride])                \r\n"
+        "gsldxc1 $f2, 0x0($9, $8)                       \r\n"
+        "gsldxc1 $f6, 0x0(%[pix], %[stride])            \r\n"
+        "mtc1 %[alpha], $f8                             \r\n"
+        "mtc1 %[beta], $f10                             \r\n"
+        "pshufh $f8, $f8, $f16                          \r\n"
+        "pshufh $f10, $f10, $f16                        \r\n"
+        "packushb $f8, $f8, $f8                         \r\n"
+        "packushb $f10, $f10, $f10                      \r\n"
+        "psubusb $f12, $f4, $f2                         \r\n"
+        "psubusb $f14, $f2, $f4                         \r\n"
+        "or $f14, $f14, $f12                            \r\n"
+        "psubusb $f12, $f2, $f0                         \r\n"
+        "psubusb $f14, $f14, $f8                        \r\n"
+        "psubusb $f8, $f0, $f2                          \r\n"
+        "or $f8, $f8, $f12                              \r\n"
+        "psubusb $f12, $f4, $f6                         \r\n"
+        "psubusb $f8, $f8, $f10                         \r\n"
+        "or $f14, $f14, $f8                             \r\n"
+        "psubusb $f8, $f6, $f4                          \r\n"
+        "or $f8, $f8, $f12                              \r\n"
+        "psubusb $f8, $f8, $f10                         \r\n"
+        "or $f14, $f14, $f8                             \r\n"
+        "pcmpeqb $f14, $f14, $f16                       \r\n"
+        "pcmpeqb $f6, $f6, $f6                          \r\n"
+        "gslwlc1 $f8, 0x3(%[tc0])                       \r\n"
+        "gslwrc1 $f8, 0x0(%[tc0])                       \r\n"
+        "punpcklbh $f8, $f8, $f8                        \r\n"
+        "punpcklbh $f18, $f8, $f8                       \r\n"
+        "pcmpgtb $f8, $f18, $f6                         \r\n"
+        "ldc1 $f6, 0x0($9)                              \r\n"
+        "and $f20, $f8, $f14                            \r\n"
+        "psubusb $f14, $f6, $f2                         \r\n"
+        "psubusb $f12, $f2, $f6                         \r\n"
+        "psubusb $f14, $f14, $f10                       \r\n"
+        "psubusb $f12, $f12, $f10                       \r\n"
+        "pcmpeqb $f12, $f12, $f14                       \r\n"
+        "and $f12, $f12, $f20                           \r\n"
+        "and $f8, $f20, $f18                            \r\n"
+        "psubb $f14, $f8, $f12                          \r\n"
+        "and $f12, $f12, $f8                            \r\n"
+        "pavgb $f8, $f2, $f4                            \r\n"
+        "ldc1 $f22, 0x0($9)                             \r\n"
+        "pavgb $f6, $f6, $f8                            \r\n"
+        "xor $f8, $f8, $f22                             \r\n"
+        "and $f8, $f8, %[ff_pb_1]                       \r\n"
+        "psubusb $f6, $f6, $f8                          \r\n"
+        "psubusb $f8, $f0, $f12                         \r\n"
+        "paddusb $f12, $f12, $f0                        \r\n"
+        "pmaxub $f6, $f6, $f8                           \r\n"
+        "pminub $f6, $f6, $f12                          \r\n"
+        "gssdxc1 $f6, 0x0($9, %[stride])                \r\n"
+        "gsldxc1 $f8, 0x0(%[pix], $8)                   \r\n"
+        "psubusb $f6, $f8, $f4                          \r\n"
+        "psubusb $f12, $f4, $f8                         \r\n"
+        "psubusb $f6, $f6, $f10                         \r\n"
+        "psubusb $f12, $f12, $f10                       \r\n"
+        "pcmpeqb $f12, $f12, $f6                        \r\n"
+        "and $f12, $f12, $f20                           \r\n"
+        "psubb $f14, $f14, $f12                         \r\n"
+        "and $f10, $f18, $f12                           \r\n"
+        "gsldxc1 $f6, 0x0(%[pix], %[stride])            \r\n"
+        "pavgb $f12, $f2, $f4                           \r\n"
+        "gsldxc1 $f22, 0x0(%[pix], $8)                  \r\n"
+        "pavgb $f8, $f8, $f12                           \r\n"
+        "xor $f12, $f12, $f22                           \r\n"
+        "and $f12, $f12, %[ff_pb_1]                     \r\n"
+        "psubusb $f8, $f8, $f12                         \r\n"
+        "psubusb $f12, $f6, $f10                        \r\n"
+        "paddusb $f10, $f10, $f6                        \r\n"
+        "pmaxub $f8, $f8, $f12                          \r\n"
+        "pminub $f8, $f8, $f10                          \r\n"
+        "gssdxc1 $f8, 0x0(%[pix], %[stride])            \r\n"
+        "xor $f10, $f2, $f4                             \r\n"
+        "pcmpeqb $f8, $f8, $f8                          \r\n"
+        "and $f10, $f10, %[ff_pb_1]                     \r\n"
+        "xor $f6, $f6, $f8                              \r\n"
+        "xor $f8, $f8, $f2                              \r\n"
+        "pavgb $f6, $f6, $f0                            \r\n"
+        "pavgb $f6, $f6, %[ff_pb_3]                     \r\n"
+        "pavgb $f8, $f8, $f4                            \r\n"
+        "pavgb $f6, $f6, $f10                           \r\n"
+        "paddusb $f6, $f6, $f8                          \r\n"
+        "psubusb $f12, %[ff_pb_A1], $f6                 \r\n"
+        "psubusb $f6, $f6, %[ff_pb_A1]                  \r\n"
+        "pminub $f12, $f12, $f14                        \r\n"
+        "pminub $f6, $f6, $f14                          \r\n"
+        "psubusb $f2, $f2, $f12                         \r\n"
+        "psubusb $f4, $f4, $f6                          \r\n"
+        "paddusb $f2, $f2, $f6                          \r\n"
+        "paddusb $f4, $f4, $f12                         \r\n"
+        "gssdxc1 $f2, 0x0($9, $8)                       \r\n"
+        "sdc1 $f4, 0x0(%[pix])                          \r\n"
+        ::[pix]"r"(pix),[stride]"r"((int64_t)stride),
+          [alpha]"r"((int64_t)alpha),[beta]"r"((int64_t)beta),[tc0]"r"(tc0),
+          [ff_pb_1]"f"(ff_pb_1),[ff_pb_3]"f"(ff_pb_3),[ff_pb_A1]"f"(ff_pb_A1)
+        : "$8","$9","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
+          "$f18","$f20","$f22"
+    );
+}
+
+void ff_deblock_v8_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
+        int beta)
+{
+    uint64_t stack[0xa];
+
+    __asm__ volatile (
+        "ori $8, $0, 0x1                                \r\n"
+        "xor $f30, $f30, $f30                           \r\n"
+        "dmtc1 $8, $f16                                 \r\n"
+        "dsll $8, %[stride], 2                          \r\n"
+        "daddu $10, %[stride], %[stride]                \r\n"
+        "daddiu %[alpha], %[alpha], -0x1                \r\n"
+        "dsll $f20, $f16, $f16                          \r\n"
+        "bltz %[alpha], 1f                              \r\n"
+        "daddu $9, $10, %[stride]                       \r\n"
+        "daddiu %[beta], %[beta], -0x1                  \r\n"
+        "bltz %[beta], 1f                               \r\n"
+        "dsubu $8, $0, $8                               \r\n"
+        "daddu $8, $8, %[pix]                           \r\n"
+        "ldc1 $f4, 0x0(%[pix])                          \r\n"
+        "gsldxc1 $f0, 0x0($8, $10)                      \r\n"
+        "gsldxc1 $f2, 0x0($8, $9)                       \r\n"
+        "gsldxc1 $f6, 0x0(%[pix], %[stride])            \r\n"
+        "mtc1 %[alpha], $f8                             \r\n"
+        "mtc1 %[beta], $f10                             \r\n"
+        "pshufh $f8, $f8, $f30                          \r\n"
+        "pshufh $f10, $f10, $f30                        \r\n"
+        "packushb $f8, $f8, $f8                         \r\n"
+        "psubusb $f12, $f4, $f2                         \r\n"
+        "psubusb $f14, $f2, $f4                         \r\n"
+        "packushb $f10, $f10, $f10                      \r\n"
+        "or $f14, $f14, $f12                            \r\n"
+        "sdc1 $f8, 0x10+%[stack]                        \r\n"
+        "psubusb $f14, $f14, $f8                        \r\n"
+        "psubusb $f12, $f2, $f0                         \r\n"
+        "psubusb $f8, $f0, $f2                          \r\n"
+        "or $f8, $f8, $f12                              \r\n"
+        "psubusb $f8, $f8, $f10                         \r\n"
+        "or $f14, $f14, $f8                             \r\n"
+        "psubusb $f12, $f4, $f6                         \r\n"
+        "psubusb $f8, $f6, $f4                          \r\n"
+        "or $f8, $f8, $f12                              \r\n"
+        "psubusb $f8, $f8, $f10                         \r\n"
+        "or $f14, $f14, $f8                             \r\n"
+        "xor $f12, $f12, $f12                           \r\n"
+        "ldc1 $f8, 0x10+%[stack]                        \r\n"
+        "pcmpeqb $f14, $f14, $f12                       \r\n"
+        "sdc1 $f14, 0x20+%[stack]                       \r\n"
+        "pavgb $f8, $f8, $f30                           \r\n"
+        "psubusb $f14, $f4, $f2                         \r\n"
+        "pavgb $f8, $f8, %[ff_pb_1]                     \r\n"
+        "psubusb $f12, $f2, $f4                         \r\n"
+        "psubusb $f14, $f14, $f8                        \r\n"
+        "psubusb $f12, $f12, $f8                        \r\n"
+        "ldc1 $f28, 0x20+%[stack]                       \r\n"
+        "pcmpeqb $f12, $f12, $f14                       \r\n"
+        "and $f12, $f12, $f28                           \r\n"
+        "gsldxc1 $f28, 0x0($8, %[stride])               \r\n"
+        "psubusb $f14, $f28, $f2                        \r\n"
+        "psubusb $f8, $f2, $f28                         \r\n"
+        "psubusb $f14, $f14, $f10                       \r\n"
+        "psubusb $f8, $f8, $f10                         \r\n"
+        "pcmpeqb $f8, $f8, $f14                         \r\n"
+        "and $f8, $f8, $f12                             \r\n"
+        "gsldxc1 $f26, 0x0(%[pix], $10)                 \r\n"
+        "sdc1 $f8, 0x30+%[stack]                        \r\n"
+        "psubusb $f14, $f26, $f4                        \r\n"
+        "psubusb $f8, $f4, $f26                         \r\n"
+        "psubusb $f14, $f14, $f10                       \r\n"
+        "psubusb $f8, $f8, $f10                         \r\n"
+        "pcmpeqb $f8, $f8, $f14                         \r\n"
+        "and $f8, $f8, $f12                             \r\n"
+        "sdc1 $f8, 0x40+%[stack]                        \r\n"
+        "pavgb $f8, $f28, $f0                           \r\n"
+        "pavgb $f10, $f2, $f4                           \r\n"
+        "pavgb $f8, $f8, $f10                           \r\n"
+        "sdc1 $f10, 0x10+%[stack]                       \r\n"
+        "paddb $f12, $f28, $f0                          \r\n"
+        "paddb $f14, $f2, $f4                           \r\n"
+        "paddb $f12, $f12, $f14                         \r\n"
+        "mov.d $f14, $f12                               \r\n"
+        "sdc1 $f12, 0x0+%[stack]                        \r\n"
+        "psrlh $f12, $f12, $f16                         \r\n"
+        "pavgb $f12, $f12, $f30                         \r\n"
+        "xor $f12, $f12, $f8                            \r\n"
+        "and $f12, $f12, %[ff_pb_1]                     \r\n"
+        "psubb $f8, $f8, $f12                           \r\n"
+        "pavgb $f10, $f28, $f6                          \r\n"
+        "psubb $f12, $f28, $f6                          \r\n"
+        "paddb $f14, $f14, $f14                         \r\n"
+        "psubb $f14, $f14, $f12                         \r\n"
+        "and $f12, $f12, %[ff_pb_1]                     \r\n"
+        "psubb $f10, $f10, $f12                         \r\n"
+        "ldc1 $f24, 0x10+%[stack]                       \r\n"
+        "pavgb $f10, $f10, $f0                          \r\n"
+        "psrlh $f14, $f14, $f20                         \r\n"
+        "pavgb $f10, $f10, $f24                         \r\n"
+        "pavgb $f14, $f14, $f30                         \r\n"
+        "xor $f14, $f14, $f10                           \r\n"
+        "and $f14, $f14, %[ff_pb_1]                     \r\n"
+        "psubb $f10, $f10, $f14                         \r\n"
+        "xor $f14, $f2, $f6                             \r\n"
+        "pavgb $f12, $f2, $f6                           \r\n"
+        "and $f14, $f14, %[ff_pb_1]                     \r\n"
+        "psubb $f12, $f12, $f14                         \r\n"
+        "ldc1 $f24, 0x30+%[stack]                       \r\n"
+        "pavgb $f12, $f12, $f0                          \r\n"
+        "ldc1 $f22, 0x20+%[stack]                       \r\n"
+        "xor $f10, $f10, $f12                           \r\n"
+        "xor $f12, $f12, $f2                            \r\n"
+        "and $f10, $f10, $f24                           \r\n"
+        "and $f12, $f12, $f22                           \r\n"
+        "xor $f10, $f10, $f12                           \r\n"
+        "xor $f10, $f10, $f2                            \r\n"
+        "gssdxc1 $f10, 0x0($8, $9)                      \r\n"
+        "ldc1 $f10, 0x0($8)                             \r\n"
+        "paddb $f12, $f28, $f10                         \r\n"
+        "pavgb $f10, $f10, $f28                         \r\n"
+        "ldc1 $f22, 0x0+%[stack]                        \r\n"
+        "pavgb $f10, $f10, $f8                          \r\n"
+        "paddb $f12, $f12, $f12                         \r\n"
+        "paddb $f12, $f12, $f22                         \r\n"
+        "psrlh $f12, $f12, $f20                         \r\n"
+        "pavgb $f12, $f12, $f30                         \r\n"
+        "xor $f12, $f12, $f10                           \r\n"
+        "and $f12, $f12, %[ff_pb_1]                     \r\n"
+        "ldc1 $f22, 0x30+%[stack]                       \r\n"
+        "psubb $f10, $f10, $f12                         \r\n"
+        "xor $f8, $f8, $f0                              \r\n"
+        "xor $f10, $f10, $f28                           \r\n"
+        "and $f8, $f8, $f22                             \r\n"
+        "and $f10, $f10, $f22                           \r\n"
+        "xor $f8, $f8, $f0                              \r\n"
+        "xor $f10, $f10, $f28                           \r\n"
+        "gssdxc1 $f8, 0x0($8, $10)                      \r\n"
+        "gssdxc1 $f10, 0x0($8, %[stride])               \r\n"
+        "pavgb $f8, $f26, $f6                           \r\n"
+        "pavgb $f10, $f4, $f2                           \r\n"
+        "pavgb $f8, $f8, $f10                           \r\n"
+        "sdc1 $f10, 0x10+%[stack]                       \r\n"
+        "paddb $f12, $f26, $f6                          \r\n"
+        "paddb $f14, $f4, $f2                           \r\n"
+        "paddb $f12, $f12, $f14                         \r\n"
+        "mov.d $f14, $f12                               \r\n"
+        "sdc1 $f12, 0x0+%[stack]                        \r\n"
+        "psrlh $f12, $f12, $f16                         \r\n"
+        "pavgb $f12, $f12, $f30                         \r\n"
+        "xor $f12, $f12, $f8                            \r\n"
+        "and $f12, $f12, %[ff_pb_1]                     \r\n"
+        "psubb $f8, $f8, $f12                           \r\n"
+        "pavgb $f10, $f26, $f0                          \r\n"
+        "paddb $f14, $f14, $f14                         \r\n"
+        "psubb $f12, $f26, $f0                          \r\n"
+        "psubb $f14, $f14, $f12                         \r\n"
+        "and $f12, $f12, %[ff_pb_1]                     \r\n"
+        "psubb $f10, $f10, $f12                         \r\n"
+        "ldc1 $f22, 0x10+%[stack]                       \r\n"
+        "pavgb $f10, $f10, $f6                          \r\n"
+        "pavgb $f10, $f10, $f22                         \r\n"
+        "psrlh $f14, $f14, $f20                         \r\n"
+        "pavgb $f14, $f14, $f30                         \r\n"
+        "xor $f14, $f14, $f10                           \r\n"
+        "and $f14, $f14, %[ff_pb_1]                     \r\n"
+        "psubb $f10, $f10, $f14                         \r\n"
+        "xor $f14, $f4, $f0                             \r\n"
+        "pavgb $f12, $f4, $f0                           \r\n"
+        "and $f14, $f14, %[ff_pb_1]                     \r\n"
+        "ldc1 $f22, 0x40+%[stack]                       \r\n"
+        "psubb $f12, $f12, $f14                         \r\n"
+        "ldc1 $f24, 0x20+%[stack]                       \r\n"
+        "pavgb $f12, $f12, $f6                          \r\n"
+        "xor $f10, $f10, $f12                           \r\n"
+        "xor $f12, $f12, $f4                            \r\n"
+        "and $f10, $f10, $f22                           \r\n"
+        "and $f12, $f12, $f24                           \r\n"
+        "xor $f10, $f10, $f12                           \r\n"
+        "xor $f10, $f10, $f4                            \r\n"
+        "sdc1 $f10, 0x0(%[pix])                         \r\n"
+        "gsldxc1 $f10, 0x0(%[pix], $9)                  \r\n"
+        "paddb $f12, $f26, $f10                         \r\n"
+        "pavgb $f10, $f10, $f26                         \r\n"
+        "ldc1 $f22, 0x0+%[stack]                        \r\n"
+        "pavgb $f10, $f10, $f8                          \r\n"
+        "paddb $f12, $f12, $f12                         \r\n"
+        "paddb $f12, $f12, $f22                         \r\n"
+        "psrlh $f12, $f12, $f20                         \r\n"
+        "pavgb $f12, $f12, $f30                         \r\n"
+        "xor $f12, $f12, $f10                           \r\n"
+        "and $f12, $f12, %[ff_pb_1]                     \r\n"
+        "ldc1 $f22, 0x40+%[stack]                       \r\n"
+        "psubb $f10, $f10, $f12                         \r\n"
+        "xor $f8, $f8, $f6                              \r\n"
+        "xor $f10, $f10, $f26                           \r\n"
+        "and $f8, $f8, $f22                             \r\n"
+        "and $f10, $f10, $f22                           \r\n"
+        "xor $f8, $f8, $f6                              \r\n"
+        "xor $f10, $f10, $f26                           \r\n"
+        "gssdxc1 $f8, 0x0(%[pix], %[stride])            \r\n"
+        "gssdxc1 $f10, 0x0(%[pix], $10)                 \r\n"
+        "1:                                             \r\n"
+        ::[pix]"r"(pix),[stride]"r"((int64_t)stride),
+          [alpha]"r"((int64_t)alpha),[beta]"r"((int64_t)beta),
+          [stack]"m"(stack[0]),[ff_pb_1]"f"(ff_pb_1)
+        : "$8","$9","$10","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14",
+          "$f16","$f18","$f20","$f22","$f24","$f26","$f28","$f30"
+    );
+}
+
+void ff_deblock_v_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
+        int8_t *tc0)
+{
+    __asm__ volatile (
+        "daddiu %[alpha], %[alpha], -0x1                \r\n"
+        "daddiu %[beta], %[beta], -0x1                  \r\n"
+        "or $16, $0, %[pix]                             \r\n"
+        "dsubu $16, $16, %[stride]                      \r\n"
+        "dsubu $16, $16, %[stride]                      \r\n"
+        "ldc1 $f0, 0x0($16)                             \r\n"
+        "gsldxc1 $f2, 0x0($16, %[stride])               \r\n"
+        "ldc1 $f4, 0x0(%[pix])                          \r\n"
+        "gsldxc1 $f6, 0x0(%[pix], %[stride])            \r\n"
+        : [pix]"+r"(pix),[stride]"+r"(stride),[alpha]"+r"(alpha),
+          [beta]"+r"(beta)
+        : [tc0]"r"(tc0)
+        : "$16","$f2","$f4"
+    );
+
+    chroma_inter_body_mmi(pix, stride, alpha, beta, tc0);
+
+    __asm__ volatile (
+        "gssdxc1 $f2, 0x0($16, %[stride])               \r\n"
+        "sdc1 $f4, 0x0(%[pix])                          \r\n"
+        ::[pix]"r"(pix),[stride]"r"((int64_t)stride)
+        : "$16","$f2","$f4"
+    );
+}
+
+void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
+        int beta)
+{
+    __asm__ volatile (
+        "daddiu %[alpha], %[alpha], -0x1                \r\n"
+        "daddiu %[beta], %[beta], -0x1                  \r\n"
+        "or $16, $0, %[pix]                             \r\n"
+        "dsubu $16, $16, %[stride]                      \r\n"
+        "dsubu $16, $16, %[stride]                      \r\n"
+        "ldc1 $f0, 0x0($16)                             \r\n"
+        "gsldxc1 $f2, 0x0($16, %[stride])               \r\n"
+        "ldc1 $f4, 0x0(%[pix])                          \r\n"
+        "gsldxc1 $f6, 0x0(%[pix], %[stride])            \r\n"
+        : [pix]"+r"(pix),[stride]"+r"(stride),[alpha]"+r"(alpha),
+          [beta]"+r"(beta)
+        ::"$16","$f0","$f2","$f4","$f6"
+    );
+
+    chroma_intra_body_mmi(pix, stride, alpha, beta);
+
+    __asm__ volatile (
+        "gssdxc1 $f2, 0x0($16, %[stride])               \r\n"
+        "sdc1 $f4, 0x0(%[pix])                          \r\n"
+        ::[pix]"r"(pix),[stride]"r"((int64_t)stride)
+        : "$16","$f2","$f4"
+    );
+}
+
+void ff_deblock_h_chroma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
+        int8_t *tc0)
+{
+    __asm__ volatile (
+        "daddiu %[alpha], %[alpha], -0x1                \r\n"
+        "daddiu %[beta], %[beta], -0x1                  \r\n"
+        "daddu $16, %[stride], %[stride]                \r\n"
+        "daddiu %[pix], %[pix], -0x2                    \r\n"
+        "daddu $17, $16, %[stride]                      \r\n"
+        "daddu $19, $16, $16                            \r\n"
+        "or $18, $0, %[pix]                             \r\n"
+        "daddu %[pix], %[pix], $17                      \r\n"
+        "gslwlc1 $f0, 0x3($18)                          \r\n"
+        "daddu $12, $18, %[stride]                      \r\n"
+        "gslwrc1 $f0, 0x0($18)                          \r\n"
+        "gslwlc1 $f4, 0x3($12)                          \r\n"
+        "daddu $13, $18, $16                            \r\n"
+        "gslwrc1 $f4, 0x0($12)                          \r\n"
+        "gslwlc1 $f2, 0x3($13)                          \r\n"
+        "gslwrc1 $f2, 0x0($13)                          \r\n"
+        "gslwlc1 $f6, 0x3(%[pix])                       \r\n"
+        "gslwrc1 $f6, 0x0(%[pix])                       \r\n"
+        "punpcklbh $f0, $f0, $f4                        \r\n"
+        "punpcklbh $f2, $f2, $f6                        \r\n"
+        "daddu $12, %[pix], %[stride]                   \r\n"
+        "punpckhhw $f4, $f0, $f2                        \r\n"
+        "punpcklhw $f0, $f0, $f2                        \r\n"
+        "gslwlc1 $f8, 0x3($12)                          \r\n"
+        "daddu $13, %[pix], $16                         \r\n"
+        "gslwrc1 $f8, 0x0($12)                          \r\n"
+        "gslwlc1 $f12, 0x3($13)                         \r\n"
+        "daddu $12, %[pix], $17                         \r\n"
+        "gslwrc1 $f12, 0x0($13)                         \r\n"
+        "gslwlc1 $f10, 0x3($12)                         \r\n"
+        "daddu $13, %[pix], $19                         \r\n"
+        "gslwrc1 $f10, 0x0($12)                         \r\n"
+        "gslwlc1 $f14, 0x3($13)                         \r\n"
+        "gslwrc1 $f14, 0x0($13)                         \r\n"
+        "punpcklbh $f8, $f8, $f12                       \r\n"
+        "punpcklbh $f10, $f10, $f14                     \r\n"
+        "mov.d $f12, $f8                                \r\n"
+        "punpcklhw $f8, $f8, $f10                       \r\n"
+        "punpckhhw $f12, $f12, $f10                     \r\n"
+        "punpckhwd $f2, $f0, $f8                        \r\n"
+        "punpckhwd $f6, $f4, $f12                       \r\n"
+        "punpcklwd $f0, $f0, $f8                        \r\n"
+        "punpcklwd $f4, $f4, $f12                       \r\n"
+        "mov.d $f20, $f0                                \r\n"
+        "mov.d $f22, $f6                                \r\n"
+        : [pix]"+r"(pix),[stride]"+r"(stride),[alpha]"+r"(alpha),
+          [beta]"+r"(beta)
+        ::"$12","$13","$16","$17","$18","$19","$f0","$f2","$f4","$f6","$f8",
+          "$f10","$f12","$f14","$f20","$f22"
+    );
+
+    chroma_inter_body_mmi(pix, stride, alpha, beta, tc0);
+
+    __asm__ volatile (
+        "punpckhwd $f8, $f20, $f20                      \r\n"
+        "punpckhwd $f10, $f2, $f2                       \r\n"
+        "punpckhwd $f12, $f4, $f4                       \r\n"
+        "punpcklbh $f0, $f20, $f2                       \r\n"
+        "punpcklbh $f4, $f4, $f22                       \r\n"
+        "punpcklhw $f2, $f0, $f4                        \r\n"
+        "punpckhhw $f0, $f0, $f4                        \r\n"
+        "gsswlc1 $f2, 0x3($18)                          \r\n"
+        "gsswrc1 $f2, 0x0($18)                          \r\n"
+        "daddu $12, $18, %[stride]                      \r\n"
+        "punpckhwd $f2, $f2, $f2                        \r\n"
+        "gsswlc1 $f2, 0x3($12)                          \r\n"
+        "daddu $13, $18, $16                            \r\n"
+        "gsswrc1 $f2, 0x0($12)                          \r\n"
+        "gsswlc1 $f0, 0x3($13)                          \r\n"
+        "gsswrc1 $f0, 0x0($13)                          \r\n"
+        "punpckhwd $f0, $f0, $f0                        \r\n"
+        "punpckhwd $f6, $f22, $f22                      \r\n"
+        "gsswlc1 $f0, 0x3(%[pix])                       \r\n"
+        "gsswrc1 $f0, 0x0(%[pix])                       \r\n"
+        "punpcklbh $f8, $f8, $f10                       \r\n"
+        "punpcklbh $f12, $f12, $f6                      \r\n"
+        "daddu $12, %[pix], %[stride]                   \r\n"
+        "punpcklhw $f10, $f8, $f12                      \r\n"
+        "punpckhhw $f8, $f8, $f12                       \r\n"
+        "gsswlc1 $f10, 0x3($12)                         \r\n"
+        "gsswrc1 $f10, 0x0($12)                         \r\n"
+        "punpckhwd $f10, $f10, $f10                     \r\n"
+        "daddu $12, %[pix], $16                         \r\n"
+        "daddu $13, %[pix], $17                         \r\n"
+        "gsswlc1 $f10, 0x3($12)                         \r\n"
+        "gsswrc1 $f10, 0x0($12)                         \r\n"
+        "gsswlc1 $f8, 0x3($13)                          \r\n"
+        "daddu $12, %[pix], $19                         \r\n"
+        "punpckhwd $f20, $f8, $f8                       \r\n"
+        "gsswrc1 $f8, 0x0($13)                          \r\n"
+        "gsswlc1 $f20, 0x3($12)                         \r\n"
+        "gsswrc1 $f20, 0x0($12)                         \r\n"
+        ::[pix]"r"(pix),[stride]"r"((int64_t)stride)
+        : "$12","$13","$16","$17","$18","$19","$f0","$f2","$f4","$f6","$f8",
+          "$f10","$f12","$f20"
+    );
+}
+
+void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
+        int beta)
+{
+    __asm__ volatile (
+        "daddiu %[alpha], %[alpha], -0x1                \r\n"
+        "daddiu %[beta], %[beta], -0x1                  \r\n"
+        "daddu $16, %[stride], %[stride]                \r\n"
+        "daddiu %[pix], %[pix], -0x2                    \r\n"
+        "daddu $17, $16, %[stride]                      \r\n"
+        "daddu $19, $16, $16                            \r\n"
+        "or $18, $0, %[pix]                             \r\n"
+        "daddu %[pix], %[pix], $17                      \r\n"
+        "gslwlc1 $f0, 0x3($18)                          \r\n"
+        "daddu $12, $18, %[stride]                      \r\n"
+        "gslwrc1 $f0, 0x0($18)                          \r\n"
+        "gslwlc1 $f4, 0x3($12)                          \r\n"
+        "daddu $13, $18, $16                            \r\n"
+        "gslwrc1 $f4, 0x0($12)                          \r\n"
+        "gslwlc1 $f2, 0x3($13)                          \r\n"
+        "gslwrc1 $f2, 0x0($13)                          \r\n"
+        "gslwlc1 $f6, 0x3(%[pix])                       \r\n"
+        "gslwrc1 $f6, 0x0(%[pix])                       \r\n"
+        "punpcklbh $f0, $f0, $f4                        \r\n"
+        "punpcklbh $f2, $f2, $f6                        \r\n"
+        "daddu $12, %[pix], %[stride]                   \r\n"
+        "punpckhhw $f4, $f0, $f2                        \r\n"
+        "punpcklhw $f0, $f0, $f2                        \r\n"
+        "gslwlc1 $f8, 0x3($12)                          \r\n"
+        "daddu $13, %[pix], $16                         \r\n"
+        "gslwrc1 $f8, 0x0($12)                          \r\n"
+        "gslwlc1 $f12, 0x3($13)                         \r\n"
+        "daddu $12, %[pix], $17                         \r\n"
+        "gslwrc1 $f12, 0x0($13)                         \r\n"
+        "gslwlc1 $f10, 0x3($12)                         \r\n"
+        "daddu $13, %[pix], $19                         \r\n"
+        "gslwrc1 $f10, 0x0($12)                         \r\n"
+        "gslwlc1 $f14, 0x3($13)                         \r\n"
+        "gslwrc1 $f14, 0x0($13)                         \r\n"
+        "punpcklbh $f8, $f8, $f12                       \r\n"
+        "punpcklbh $f10, $f10, $f14                     \r\n"
+        "mov.d $f12, $f8                                \r\n"
+        "punpcklhw $f8, $f8, $f10                       \r\n"
+        "punpckhhw $f12, $f12, $f10                     \r\n"
+        "punpckhwd $f2, $f0, $f8                        \r\n"
+        "punpckhwd $f6, $f4, $f12                       \r\n"
+        "punpcklwd $f0, $f0, $f8                        \r\n"
+        "punpcklwd $f4, $f4, $f12                       \r\n"
+        : [pix]"+r"(pix),[stride]"+r"(stride),[alpha]"+r"(alpha),
+          [beta]"+r"(beta)
+        ::"$12","$13","$16","$17","$18","$19","$f0","$f2","$f4","$f6","$f8",
+          "$f10","$f12","$f14","$f20","$f22"
+    );
+
+    chroma_intra_body_mmi(pix, stride, alpha, beta);
+
+    __asm__ volatile (
+        "punpckhwd $f8, $f0, $f0                        \r\n"
+        "punpckhwd $f10, $f2, $f2                       \r\n"
+        "punpckhwd $f12, $f4, $f4                       \r\n"
+        "punpcklbh $f0, $f0, $f2                        \r\n"
+        "punpcklbh $f4, $f4, $f6                        \r\n"
+        "punpcklhw $f2, $f0, $f4                        \r\n"
+        "punpckhhw $f0, $f0, $f4                        \r\n"
+        "gsswlc1 $f2, 0x3($18)                          \r\n"
+        "gsswrc1 $f2, 0x0($18)                          \r\n"
+        "daddu $12, $18, %[stride]                      \r\n"
+        "punpckhwd $f2, $f2, $f2                        \r\n"
+        "gsswlc1 $f2, 0x3($12)                          \r\n"
+        "daddu $13, $18, $16                            \r\n"
+        "gsswrc1 $f2, 0x0($12)                          \r\n"
+        "gsswlc1 $f0, 0x3($13)                          \r\n"
+        "gsswrc1 $f0, 0x0($13)                          \r\n"
+        "punpckhwd $f0, $f0, $f0                        \r\n"
+        "punpckhwd $f6, $f6, $f6                        \r\n"
+        "gsswlc1 $f0, 0x3(%[pix])                       \r\n"
+        "gsswrc1 $f0, 0x0(%[pix])                       \r\n"
+        "punpcklbh $f8, $f8, $f10                       \r\n"
+        "punpcklbh $f12, $f12, $f6                      \r\n"
+        "daddu $12, %[pix], %[stride]                   \r\n"
+        "punpcklhw $f10, $f8, $f12                      \r\n"
+        "punpckhhw $f8, $f8, $f12                       \r\n"
+        "gsswlc1 $f10, 0x3($12)                         \r\n"
+        "gsswrc1 $f10, 0x0($12)                         \r\n"
+        "punpckhwd $f10, $f10, $f10                     \r\n"
+        "daddu $12, %[pix], $16                         \r\n"
+        "daddu $13, %[pix], $17                         \r\n"
+        "gsswlc1 $f10, 0x3($12)                         \r\n"
+        "gsswrc1 $f10, 0x0($12)                         \r\n"
+        "gsswlc1 $f8, 0x3($13)                          \r\n"
+        "daddu $12, %[pix], $19                         \r\n"
+        "punpckhwd $f20, $f8, $f8                       \r\n"
+        "gsswrc1 $f8, 0x0($13)                          \r\n"
+        "gsswlc1 $f20, 0x3($12)                         \r\n"
+        "gsswrc1 $f20, 0x0($12)                         \r\n"
+        ::[pix]"r"(pix),[stride]"r"((int64_t)stride)
+        : "$12","$13","$16","$17","$18","$19","$f0","$f2","$f4","$f6","$f8",
+          "$f10","$f12","$f20"
+    );
+}
+
+void ff_deblock_v_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
+        int8_t *tc0)
+{
+    if ((tc0[0] & tc0[1]) >= 0)
+        ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0);
+    if ((tc0[2] & tc0[3]) >= 0)
+        ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2);
+}
+
+void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
+        int beta)
+{
+    ff_deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta);
+    ff_deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta);
+}
+
+void ff_deblock_h_luma_8_mmi(uint8_t *pix, int stride, int alpha, int beta,
+        int8_t *tc0)
+{
+    uint64_t stack[0xd];
+
+    __asm__ volatile (
+        "daddu $15, %[stride], %[stride]                \r\n"
+        "daddiu $8, %[pix], -0x4                        \r\n"
+        "daddu $9, %[stride], $15                       \r\n"
+        "gsldlc1 $f0, 0x7($8)                           \r\n"
+        "gsldrc1 $f0, 0x0($8)                           \r\n"
+        "daddu $12, $8, %[stride]                       \r\n"
+        "daddu $10, $8, $9                              \r\n"
+        "gsldlc1 $f2, 0x7($12)                          \r\n"
+        "daddu $11, $8, $15                             \r\n"
+        "gsldrc1 $f2, 0x0($12)                          \r\n"
+        "gsldlc1 $f4, 0x7($11)                          \r\n"
+        "gsldrc1 $f4, 0x0($11)                          \r\n"
+        "gsldlc1 $f6, 0x7($10)                          \r\n"
+        "daddu $12, $10, %[stride]                      \r\n"
+        "gsldrc1 $f6, 0x0($10)                          \r\n"
+        "gsldlc1 $f8, 0x7($12)                          \r\n"
+        "daddu $11, $10, $15                            \r\n"
+        "gsldrc1 $f8, 0x0($12)                          \r\n"
+        "gsldlc1 $f10, 0x7($11)                         \r\n"
+        "daddu $12, $10, $9                             \r\n"
+        "gsldrc1 $f10, 0x0($11)                         \r\n"
+        "gsldlc1 $f12, 0x7($12)                         \r\n"
+        "gsldrc1 $f12, 0x0($12)                         \r\n"
+        "daddu $14, $15, $15                            \r\n"
+        "punpckhbh $f14, $f0, $f2                       \r\n"
+        "punpcklbh $f0, $f0, $f2                        \r\n"
+        "punpckhbh $f2, $f4, $f6                        \r\n"
+        "punpcklbh $f4, $f4, $f6                        \r\n"
+        "punpckhbh $f6, $f8, $f10                       \r\n"
+        "punpcklbh $f8, $f8, $f10                       \r\n"
+        "daddu $12, $10, $14                            \r\n"
+        "sdc1 $f2, 0x10+%[stack]                        \r\n"
+        "gsldlc1 $f16, 0x7($12)                         \r\n"
+        "gsldrc1 $f16, 0x0($12)                         \r\n"
+        "daddu $13, $14, $14                            \r\n"
+        "punpckhbh $f10, $f12, $f16                     \r\n"
+        "punpcklbh $f12, $f12, $f16                     \r\n"
+        "punpckhhw $f2, $f0, $f4                        \r\n"
+        "punpcklhw $f0, $f0, $f4                        \r\n"
+        "punpckhhw $f4, $f8, $f12                       \r\n"
+        "punpcklhw $f8, $f8, $f12                       \r\n"
+        "ldc1 $f16, 0x10+%[stack]                       \r\n"
+        "punpckhwd $f0, $f0, $f8                        \r\n"
+        "sdc1 $f0, 0x0+%[stack]                         \r\n"
+        "punpckhhw $f12, $f14, $f16                     \r\n"
+        "punpcklhw $f14, $f14, $f16                     \r\n"
+        "punpckhhw $f0, $f6, $f10                       \r\n"
+        "punpcklhw $f6, $f6, $f10                       \r\n"
+        "punpcklwd $f12, $f12, $f0                      \r\n"
+        "punpckhwd $f10, $f14, $f6                      \r\n"
+        "punpcklwd $f14, $f14, $f6                      \r\n"
+        "punpckhwd $f6, $f2, $f4                        \r\n"
+        "punpcklwd $f2, $f2, $f4                        \r\n"
+        "sdc1 $f2, 0x10+%[stack]                        \r\n"
+        "sdc1 $f6, 0x20+%[stack]                        \r\n"
+        "sdc1 $f14, 0x30+%[stack]                       \r\n"
+        "sdc1 $f10, 0x40+%[stack]                       \r\n"
+        "sdc1 $f12, 0x50+%[stack]                       \r\n"
+        "daddu $8, $8, $13                              \r\n"
+        "daddu $10, $10, $13                            \r\n"
+        "gsldlc1 $f0, 0x7($8)                           \r\n"
+        "daddu $12, $8, %[stride]                       \r\n"
+        "gsldrc1 $f0, 0x0($8)                           \r\n"
+        "gsldlc1 $f2, 0x7($12)                          \r\n"
+        "daddu $11, $8, $15                             \r\n"
+        "gsldrc1 $f2, 0x0($12)                          \r\n"
+        "gsldlc1 $f4, 0x7($11)                          \r\n"
+        "gsldrc1 $f4, 0x0($11)                          \r\n"
+        "gsldlc1 $f6, 0x7($10)                          \r\n"
+        "daddu $12, $10, %[stride]                      \r\n"
+        "gsldrc1 $f6, 0x0($10)                          \r\n"
+        "gsldlc1 $f8, 0x7($12)                          \r\n"
+        "daddu $11, $10, $15                            \r\n"
+        "gsldrc1 $f8, 0x0($12)                          \r\n"
+        "gsldlc1 $f10, 0x7($11)                         \r\n"
+        "daddu $12, $10, $9                             \r\n"
+        "gsldrc1 $f10, 0x0($11)                         \r\n"
+        "gsldlc1 $f12, 0x7($12)                         \r\n"
+        "gsldrc1 $f12, 0x0($12)                         \r\n"
+        "punpckhbh $f14, $f0, $f2                       \r\n"
+        "punpcklbh $f0, $f0, $f2                        \r\n"
+        "punpckhbh $f2, $f4, $f6                        \r\n"
+        "punpcklbh $f4, $f4, $f6                        \r\n"
+        "punpckhbh $f6, $f8, $f10                       \r\n"
+        "punpcklbh $f8, $f8, $f10                       \r\n"
+        "daddu $12, $10, $14                            \r\n"
+        "sdc1 $f2, 0x18+%[stack]                        \r\n"
+        "gsldlc1 $f16, 0x7($12)                         \r\n"
+        "gsldrc1 $f16, 0x0($12)                         \r\n"
+        "punpckhhw $f2, $f0, $f4                        \r\n"
+        "punpckhbh $f10, $f12, $f16                     \r\n"
+        "punpcklbh $f12, $f12, $f16                     \r\n"
+        "punpcklhw $f0, $f0, $f4                        \r\n"
+        "punpckhhw $f4, $f8, $f12                       \r\n"
+        "punpcklhw $f8, $f8, $f12                       \r\n"
+        "punpckhwd $f0, $f0, $f8                        \r\n"
+        "ldc1 $f16, 0x18+%[stack]                       \r\n"
+        "sdc1 $f0, 0x8+%[stack]                         \r\n"
+        "punpckhhw $f12, $f14, $f16                     \r\n"
+        "punpcklhw $f14, $f14, $f16                     \r\n"
+        "punpckhhw $f0, $f6, $f10                       \r\n"
+        "punpcklhw $f6, $f6, $f10                       \r\n"
+        "punpckhwd $f10, $f14, $f6                      \r\n"
+        "punpcklwd $f14, $f14, $f6                      \r\n"
+        "punpckhwd $f6, $f2, $f4                        \r\n"
+        "punpcklwd $f2, $f2, $f4                        \r\n"
+        "punpcklwd $f12, $f12, $f0                      \r\n"
+        "sdc1 $f2, 0x18+%[stack]                        \r\n"
+        "sdc1 $f6, 0x28+%[stack]                        \r\n"
+        "sdc1 $f14, 0x38+%[stack]                       \r\n"
+        "sdc1 $f10, 0x48+%[stack]                       \r\n"
+        "sdc1 $f12, 0x58+%[stack]                       \r\n"
+        ::[pix]"r"(pix),[stride]"r"((int64_t)stride),[stack]"m"(stack[0])
+        : "$8","$9","$10","$11","$12","$13","$14","$15","$f0","$f2","$f4",
+          "$f6","$f8","$f10","$f12","$f14","$f16"
+    );
+
+    ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0);
+
+    __asm__ volatile (
+        "daddu $15, %[stride], %[stride]                \r\n"
+        "daddiu $8, %[pix], -0x2                        \r\n"
+        "daddu $14, $15, $15                            \r\n"
+        "daddu $9, $15, %[stride]                       \r\n"
+        "daddu $13, $14, $14                            \r\n"
+        "daddu $10, $8, $9                              \r\n"
+        "ldc1 $f0, 0x10+%[stack]                        \r\n"
+        "ldc1 $f2, 0x20+%[stack]                        \r\n"
+        "ldc1 $f4, 0x30+%[stack]                        \r\n"
+        "ldc1 $f6, 0x40+%[stack]                        \r\n"
+        "punpckhwd $f8, $f0, $f0                        \r\n"
+        "punpckhwd $f10, $f2, $f2                       \r\n"
+        "punpckhwd $f12, $f4, $f4                       \r\n"
+        "punpcklbh $f0, $f0, $f2                        \r\n"
+        "punpcklbh $f4, $f4, $f6                        \r\n"
+        "punpcklhw $f2, $f0, $f4                        \r\n"
+        "punpckhhw $f0, $f0, $f4                        \r\n"
+        "gsswlc1 $f2, 0x3($8)                           \r\n"
+        "gsswrc1 $f2, 0x0($8)                           \r\n"
+        "daddu $12, $8, %[stride]                       \r\n"
+        "punpckhwd $f2, $f2, $f2                        \r\n"
+        "daddu $11, $8, $15                             \r\n"
+        "gsswlc1 $f2, 0x3($12)                          \r\n"
+        "gsswrc1 $f2, 0x0($12)                          \r\n"
+        "gsswlc1 $f0, 0x3($11)                          \r\n"
+        "gsswrc1 $f0, 0x0($11)                          \r\n"
+        "punpckhwd $f0, $f0, $f0                        \r\n"
+        "punpckhwd $f6, $f6, $f6                        \r\n"
+        "gsswlc1 $f0, 0x3($10)                          \r\n"
+        "gsswrc1 $f0, 0x0($10)                          \r\n"
+        "punpcklbh $f8, $f8, $f10                       \r\n"
+        "punpcklbh $f12, $f12, $f6                      \r\n"
+        "punpcklhw $f10, $f8, $f12                      \r\n"
+        "daddu $12, $10, %[stride]                      \r\n"
+        "punpckhhw $f8, $f8, $f12                       \r\n"
+        "gsswlc1 $f10, 0x3($12)                         \r\n"
+        "gsswrc1 $f10, 0x0($12)                         \r\n"
+        "daddu $12, $10, $15                            \r\n"
+        "punpckhwd $f10, $f10, $f10                     \r\n"
+        "daddu $11, $10, $9                             \r\n"
+        "gsswlc1 $f10, 0x3($12)                         \r\n"
+        "gsswrc1 $f10, 0x0($12)                         \r\n"
+        "gsswlc1 $f8, 0x3($11)                          \r\n"
+        "gsswrc1 $f8, 0x0($11)                          \r\n"
+        "daddu $12, $10, $14                            \r\n"
+        "punpckhwd $f8, $f8, $f8                        \r\n"
+        "daddu $8, $8, $13                              \r\n"
+        "gsswlc1 $f8, 0x3($12)                          \r\n"
+        "gsswrc1 $f8, 0x0($12)                          \r\n"
+        "daddu $10, $10, $13                            \r\n"
+        "ldc1 $f0, 0x18+%[stack]                        \r\n"
+        "ldc1 $f2, 0x28+%[stack]                        \r\n"
+        "ldc1 $f4, 0x38+%[stack]                        \r\n"
+        "ldc1 $f6, 0x48+%[stack]                        \r\n"
+        "daddu $15, %[stride], %[stride]                \r\n"
+        "punpckhwd $f8, $f0, $f0                        \r\n"
+        "daddu $14, $15, $15                            \r\n"
+        "punpckhwd $f10, $f2, $f2                       \r\n"
+        "punpckhwd $f12, $f4, $f4                       \r\n"
+        "punpcklbh $f0, $f0, $f2                        \r\n"
+        "punpcklbh $f4, $f4, $f6                        \r\n"
+        "daddu $12, $8, %[stride]                       \r\n"
+        "punpcklhw $f2, $f0, $f4                        \r\n"
+        "punpckhhw $f0, $f0, $f4                        \r\n"
+        "gsswlc1 $f2, 0x3($8)                           \r\n"
+        "gsswrc1 $f2, 0x0($8)                           \r\n"
+        "punpckhwd $f2, $f2, $f2                        \r\n"
+        "daddu $11, $8, $15                             \r\n"
+        "gsswlc1 $f2, 0x3($12)                          \r\n"
+        "gsswrc1 $f2, 0x0($12)                          \r\n"
+        "gsswlc1 $f0, 0x3($11)                          \r\n"
+        "gsswrc1 $f0, 0x0($11)                          \r\n"
+        "punpckhwd $f0, $f0, $f0                        \r\n"
+        "punpckhwd $f6, $f6, $f6                        \r\n"
+        "gsswlc1 $f0, 0x3($10)                          \r\n"
+        "gsswrc1 $f0, 0x0($10)                          \r\n"
+        "punpcklbh $f8, $f8, $f10                       \r\n"
+        "punpcklbh $f12, $f12, $f6                      \r\n"
+        "daddu $12, $10, %[stride]                      \r\n"
+        "punpcklhw $f10, $f8, $f12                      \r\n"
+        "punpckhhw $f8, $f8, $f12                       \r\n"
+        "gsswlc1 $f10, 0x3($12)                         \r\n"
+        "gsswrc1 $f10, 0x0($12)                         \r\n"
+        "daddu $12, $10, $15                            \r\n"
+        "punpckhwd $f10, $f10, $f10                     \r\n"
+        "daddu $11, $10, $9                             \r\n"
+        "gsswlc1 $f10, 0x3($12)                         \r\n"
+        "gsswrc1 $f10, 0x0($12)                         \r\n"
+        "gsswlc1 $f8, 0x3($11)                          \r\n"
+        "gsswrc1 $f8, 0x0($11)                          \r\n"
+        "daddu $12, $10, $14                            \r\n"
+        "punpckhwd $f8, $f8, $f8                        \r\n"
+        "gsswlc1 $f8, 0x3($12)                          \r\n"
+        "gsswrc1 $f8, 0x0($12)                          \r\n"
+        ::[pix]"r"(pix),[stride]"r"((int64_t)stride),[stack]"m"(stack[0])
+        : "$8","$9","$10","$11","$12","$13","$14","$15","$f0","$f2","$f4",
+          "$f6","$f8","$f10","$f12","$f14","$f16"
+    );
+}
+
+void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, int stride, int alpha,
+        int beta)
+{
+    uint64_t ptmp[0x11];
+    uint64_t pdat[4];
+
+    __asm__ volatile (
+        "daddu $12, %[stride], %[stride]                \r\n"
+        "daddiu $10, %[pix], -0x4                       \r\n"
+        "daddu $11, $12, %[stride]                      \r\n"
+        "daddu $13, $12, $12                            \r\n"
+        "daddu $9, $10, $11                             \r\n"
+        "daddu $8, $10, %[stride]                       \r\n"
+        "gsldlc1 $f0, 0x7($10)                          \r\n"
+        "gsldrc1 $f0, 0x0($10)                          \r\n"
+        "daddu $14, $10, $12                            \r\n"
+        "gsldlc1 $f2, 0x7($8)                           \r\n"
+        "gsldrc1 $f2, 0x0($8)                           \r\n"
+        "gsldlc1 $f4, 0x7($14)                          \r\n"
+        "gsldrc1 $f4, 0x0($14)                          \r\n"
+        "daddu $8, $9, %[stride]                        \r\n"
+        "gsldlc1 $f6, 0x7($9)                           \r\n"
+        "gsldrc1 $f6, 0x0($9)                           \r\n"
+        "daddu $14, $9, $12                             \r\n"
+        "gsldlc1 $f8, 0x7($8)                           \r\n"
+        "gsldrc1 $f8, 0x0($8)                           \r\n"
+        "daddu $8, $9, $11                              \r\n"
+        "gsldlc1 $f10, 0x7($14)                         \r\n"
+        "gsldrc1 $f10, 0x0($14)                         \r\n"
+        "gsldlc1 $f12, 0x7($8)                          \r\n"
+        "gsldrc1 $f12, 0x0($8)                          \r\n"
+        "daddu $8, $9, $13                              \r\n"
+        "punpckhbh $f14, $f0, $f2                       \r\n"
+        "punpcklbh $f0, $f0, $f2                        \r\n"
+        "punpckhbh $f2, $f4, $f6                        \r\n"
+        "punpcklbh $f4, $f4, $f6                        \r\n"
+        "punpckhbh $f6, $f8, $f10                       \r\n"
+        "punpcklbh $f8, $f8, $f10                       \r\n"
+        "gsldlc1 $f16, 0x7($8)                          \r\n"
+        "gsldrc1 $f16, 0x0($8)                          \r\n"
+        "punpckhbh $f10, $f12, $f16                     \r\n"
+        "punpcklbh $f12, $f12, $f16                     \r\n"
+        "sdc1 $f6, 0x0+%[ptmp]                          \r\n"
+        "punpckhhw $f6, $f0, $f4                        \r\n"
+        "punpcklhw $f0, $f0, $f4                        \r\n"
+        "punpckhhw $f4, $f8, $f12                       \r\n"
+        "punpcklhw $f8, $f8, $f12                       \r\n"
+        "punpckhhw $f12, $f14, $f2                      \r\n"
+        "punpcklhw $f14, $f14, $f2                      \r\n"
+        "sdc1 $f4, 0x20+%[ptmp]                         \r\n"
+        "ldc1 $f4, 0x0+%[ptmp]                          \r\n"
+        "punpckhhw $f2, $f4, $f10                       \r\n"
+        "punpcklhw $f4, $f4, $f10                       \r\n"
+        "punpckhwd $f10, $f0, $f8                       \r\n"
+        "punpcklwd $f0, $f0, $f8                        \r\n"
+        "punpckhwd $f8, $f14, $f4                       \r\n"
+        "punpcklwd $f14, $f14, $f4                      \r\n"
+        "sdc1 $f0, 0x0+%[ptmp]                          \r\n"
+        "sdc1 $f10, 0x10+%[ptmp]                        \r\n"
+        "sdc1 $f14, 0x40+%[ptmp]                        \r\n"
+        "sdc1 $f8, 0x50+%[ptmp]                         \r\n"
+        "ldc1 $f16, 0x20+%[ptmp]                        \r\n"
+        "punpckhwd $f0, $f6, $f16                       \r\n"
+        "punpcklwd $f6, $f6, $f16                       \r\n"
+        "punpckhwd $f10, $f12, $f2                      \r\n"
+        "punpcklwd $f12, $f12, $f2                      \r\n"
+        "daddu $8, $13, $13                             \r\n"
+        "sdc1 $f6, 0x20+%[ptmp]                         \r\n"
+        "sdc1 $f0, 0x30+%[ptmp]                         \r\n"
+        "sdc1 $f12, 0x60+%[ptmp]                        \r\n"
+        "sdc1 $f10, 0x70+%[ptmp]                        \r\n"
+        "daddu $10, $10, $8                             \r\n"
+        "daddu $9, $9, $8                               \r\n"
+        "daddu $8, $10, %[stride]                       \r\n"
+        "gsldlc1 $f0, 0x7($10)                          \r\n"
+        "gsldrc1 $f0, 0x0($10)                          \r\n"
+        "daddu $14, $10, $12                            \r\n"
+        "gsldlc1 $f2, 0x7($8)                           \r\n"
+        "gsldrc1 $f2, 0x0($8)                           \r\n"
+        "gsldlc1 $f4, 0x7($14)                          \r\n"
+        "gsldrc1 $f4, 0x0($14)                          \r\n"
+        "daddu $8, $9, %[stride]                        \r\n"
+        "gsldlc1 $f6, 0x7($9)                           \r\n"
+        "gsldrc1 $f6, 0x0($9)                           \r\n"
+        "daddu $14, $9, $12                             \r\n"
+        "gsldlc1 $f8, 0x7($8)                           \r\n"
+        "gsldrc1 $f8, 0x0($8)                           \r\n"
+        "daddu $8, $9, $11                              \r\n"
+        "gsldlc1 $f10, 0x7($14)                         \r\n"
+        "gsldrc1 $f10, 0x0($14)                         \r\n"
+        "gsldlc1 $f12, 0x7($8)                          \r\n"
+        "gsldrc1 $f12, 0x0($8)                          \r\n"
+        "daddu $8, $9, $13                              \r\n"
+        "punpckhbh $f14, $f0, $f2                       \r\n"
+        "punpcklbh $f0, $f0, $f2                        \r\n"
+        "punpckhbh $f2, $f4, $f6                        \r\n"
+        "punpcklbh $f4, $f4, $f6                        \r\n"
+        "punpckhbh $f6, $f8, $f10                       \r\n"
+        "punpcklbh $f8, $f8, $f10                       \r\n"
+        "gsldlc1 $f16, 0x7($8)                          \r\n"
+        "gsldrc1 $f16, 0x0($8)                          \r\n"
+        "punpckhbh $f10, $f12, $f16                     \r\n"
+        "punpcklbh $f12, $f12, $f16                     \r\n"
+        "sdc1 $f6, 0x8+%[ptmp]                          \r\n"
+        "punpckhhw $f6, $f0, $f4                        \r\n"
+        "punpcklhw $f0, $f0, $f4                        \r\n"
+        "punpckhhw $f4, $f8, $f12                       \r\n"
+        "punpcklhw $f8, $f8, $f12                       \r\n"
+        "punpckhhw $f12, $f14, $f2                      \r\n"
+        "punpcklhw $f14, $f14, $f2                      \r\n"
+        "sdc1 $f4, 0x28+%[ptmp]                         \r\n"
+        "ldc1 $f4, 0x8+%[ptmp]                          \r\n"
+        "punpckhhw $f2, $f4, $f10                       \r\n"
+        "punpcklhw $f4, $f4, $f10                       \r\n"
+        "punpckhwd $f10, $f0, $f8                       \r\n"
+        "punpcklwd $f0, $f0, $f8                        \r\n"
+        "punpckhwd $f8, $f14, $f4                       \r\n"
+        "punpcklwd $f14, $f14, $f4                      \r\n"
+        "sdc1 $f0, 0x8+%[ptmp]                          \r\n"
+        "sdc1 $f10, 0x18+%[ptmp]                        \r\n"
+        "sdc1 $f14, 0x48+%[ptmp]                        \r\n"
+        "sdc1 $f8, 0x58+%[ptmp]                         \r\n"
+        "ldc1 $f16, 0x28+%[ptmp]                        \r\n"
+        "punpckhwd $f0, $f6, $f16                       \r\n"
+        "punpcklwd $f6, $f6, $f16                       \r\n"
+        "punpckhwd $f10, $f12, $f2                      \r\n"
+        "punpcklwd $f12, $f12, $f2                      \r\n"
+        "sdc1 $f6, 0x28+%[ptmp]                         \r\n"
+        "sdc1 $f0, 0x38+%[ptmp]                         \r\n"
+        "sdc1 $f12, 0x68+%[ptmp]                        \r\n"
+        "sdc1 $f10, 0x78+%[ptmp]                        \r\n"
+        "sd $10, 0x00+%[pdat]                           \r\n"
+        "sd $11, 0x08+%[pdat]                           \r\n"
+        "sd $12, 0x10+%[pdat]                           \r\n"
+        "sd $13, 0x18+%[pdat]                           \r\n"
+        ::[pix]"r"(pix),[stride]"r"((uint64_t)stride),[ptmp]"m"(ptmp[0]),
+          [pdat]"m"(pdat[0])
+        : "$8","$9","$10","$11","$12","$13","$14","$f0","$f2","$f4","$f6",
+          "$f8","$f10","$f12","$f14","$f16"
+    );
+
+    ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta);
+
+    __asm__ volatile (
+        "ld $10, 0x00+%[pdat]                           \r\n"
+        "ld $11, 0x08+%[pdat]                           \r\n"
+        "ld $12, 0x10+%[pdat]                           \r\n"
+        "ld $13, 0x18+%[pdat]                           \r\n"
+        "daddu $9, $10, $11                             \r\n"
+        "ldc1 $f0, 0x8+%[ptmp]                          \r\n"
+        "ldc1 $f2, 0x18+%[ptmp]                         \r\n"
+        "ldc1 $f4, 0x28+%[ptmp]                         \r\n"
+        "ldc1 $f6, 0x38+%[ptmp]                         \r\n"
+        "ldc1 $f8, 0x48+%[ptmp]                         \r\n"
+        "ldc1 $f10, 0x58+%[ptmp]                        \r\n"
+        "ldc1 $f12, 0x68+%[ptmp]                        \r\n"
+        "punpckhbh $f14, $f0, $f2                       \r\n"
+        "punpcklbh $f0, $f0, $f2                        \r\n"
+        "punpckhbh $f2, $f4, $f6                        \r\n"
+        "punpcklbh $f4, $f4, $f6                        \r\n"
+        "punpckhbh $f6, $f8, $f10                       \r\n"
+        "punpcklbh $f8, $f8, $f10                       \r\n"
+        "ldc1 $f16, 0x78+%[ptmp]                        \r\n"
+        "punpckhbh $f10, $f12, $f16                     \r\n"
+        "punpcklbh $f12, $f12, $f16                     \r\n"
+        "gssdlc1 $f6, 0x7($10)                          \r\n"
+        "gssdrc1 $f6, 0x0($10)                          \r\n"
+        "daddu $8, $10, $12                             \r\n"
+        "punpckhhw $f6, $f0, $f4                        \r\n"
+        "punpcklhw $f0, $f0, $f4                        \r\n"
+        "punpckhhw $f4, $f8, $f12                       \r\n"
+        "punpcklhw $f8, $f8, $f12                       \r\n"
+        "punpckhhw $f12, $f14, $f2                      \r\n"
+        "punpcklhw $f14, $f14, $f2                      \r\n"
+        "gssdlc1 $f4, 0x7($8)                           \r\n"
+        "gssdrc1 $f4, 0x0($8)                           \r\n"
+        "gsldlc1 $f4, 0x7($10)                          \r\n"
+        "gsldrc1 $f4, 0x0($10)                          \r\n"
+        "punpckhhw $f2, $f4, $f10                       \r\n"
+        "punpcklhw $f4, $f4, $f10                       \r\n"
+        "punpckhwd $f10, $f0, $f8                       \r\n"
+        "punpcklwd $f0, $f0, $f8                        \r\n"
+        "punpckhwd $f8, $f14, $f4                       \r\n"
+        "punpcklwd $f14, $f14, $f4                      \r\n"
+        "daddu $8, $10, %[stride]                       \r\n"
+        "gssdlc1 $f0, 0x7($10)                          \r\n"
+        "gssdrc1 $f0, 0x0($10)                          \r\n"
+        "daddu $14, $9, %[stride]                       \r\n"
+        "gssdlc1 $f10, 0x7($8)                          \r\n"
+        "gssdrc1 $f10, 0x0($8)                          \r\n"
+        "daddu $8, $9, $12                              \r\n"
+        "gssdlc1 $f14, 0x7($14)                         \r\n"
+        "gssdrc1 $f14, 0x0($14)                         \r\n"
+        "daddu $14, $10, $12                            \r\n"
+        "gssdlc1 $f8, 0x7($8)                           \r\n"
+        "gssdrc1 $f8, 0x0($8)                           \r\n"
+        "gsldlc1 $f16, 0x7($14)                         \r\n"
+        "gsldrc1 $f16, 0x0($14)                         \r\n"
+        "daddu $8, $10, $12                             \r\n"
+        "punpckhwd $f0, $f6, $f16                       \r\n"
+        "punpcklwd $f6, $f6, $f16                       \r\n"
+        "punpckhwd $f10, $f12, $f2                      \r\n"
+        "punpcklwd $f12, $f12, $f2                      \r\n"
+        "gssdlc1 $f6, 0x7($8)                           \r\n"
+        "gssdrc1 $f6, 0x0($8)                           \r\n"
+        "daddu $8, $9, $11                              \r\n"
+        "gssdlc1 $f0, 0x7($9)                           \r\n"
+        "gssdrc1 $f0, 0x0($9)                           \r\n"
+        "daddu $14, $9, $13                             \r\n"
+        "gssdlc1 $f12, 0x7($8)                          \r\n"
+        "gssdrc1 $f12, 0x0($8)                          \r\n"
+        "daddu $8, $13, $13                             \r\n"
+        "gssdlc1 $f10, 0x7($14)                         \r\n"
+        "gssdrc1 $f10, 0x0($14)                         \r\n"
+        "dsubu $10, $10, $8                             \r\n"
+        "dsubu $9, $9, $8                               \r\n"
+        "ldc1 $f0, 0x0+%[ptmp]                          \r\n"
+        "ldc1 $f2, 0x10+%[ptmp]                         \r\n"
+        "ldc1 $f4, 0x20+%[ptmp]                         \r\n"
+        "ldc1 $f6, 0x30+%[ptmp]                         \r\n"
+        "ldc1 $f8, 0x40+%[ptmp]                         \r\n"
+        "ldc1 $f10, 0x50+%[ptmp]                        \r\n"
+        "ldc1 $f12, 0x60+%[ptmp]                        \r\n"
+        "punpckhbh $f14, $f0, $f2                       \r\n"
+        "punpcklbh $f0, $f0, $f2                        \r\n"
+        "punpckhbh $f2, $f4, $f6                        \r\n"
+        "punpcklbh $f4, $f4, $f6                        \r\n"
+        "punpckhbh $f6, $f8, $f10                       \r\n"
+        "punpcklbh $f8, $f8, $f10                       \r\n"
+        "ldc1 $f16, 0x70+%[ptmp]                        \r\n"
+        "punpckhbh $f10, $f12, $f16                     \r\n"
+        "punpcklbh $f12, $f12, $f16                     \r\n"
+        "gssdlc1 $f6, 0x7($10)                          \r\n"
+        "gssdrc1 $f6, 0x0($10)                          \r\n"
+        "daddu $8, $10, $12                             \r\n"
+        "punpckhhw $f6, $f0, $f4                        \r\n"
+        "punpcklhw $f0, $f0, $f4                        \r\n"
+        "punpckhhw $f4, $f8, $f12                       \r\n"
+        "punpcklhw $f8, $f8, $f12                       \r\n"
+        "punpckhhw $f12, $f14, $f2                      \r\n"
+        "punpcklhw $f14, $f14, $f2                      \r\n"
+        "gssdlc1 $f4, 0x7($8)                           \r\n"
+        "gssdrc1 $f4, 0x0($8)                           \r\n"
+        "gsldlc1 $f4, 0x7($10)                          \r\n"
+        "gsldrc1 $f4, 0x0($10)                          \r\n"
+        "punpckhhw $f2, $f4, $f10                       \r\n"
+        "punpcklhw $f4, $f4, $f10                       \r\n"
+        "punpckhwd $f10, $f0, $f8                       \r\n"
+        "punpcklwd $f0, $f0, $f8                        \r\n"
+        "punpckhwd $f8, $f14, $f4                       \r\n"
+        "punpcklwd $f14, $f14, $f4                      \r\n"
+        "daddu $8, $10, %[stride]                       \r\n"
+        "gssdlc1 $f0, 0x7($10)                          \r\n"
+        "gssdrc1 $f0, 0x0($10)                          \r\n"
+        "daddu $14, $9, %[stride]                       \r\n"
+        "gssdlc1 $f10, 0x7($8)                          \r\n"
+        "gssdrc1 $f10, 0x0($8)                          \r\n"
+        "daddu $8, $9, $12                              \r\n"
+        "gssdlc1 $f14, 0x7($14)                         \r\n"
+        "gssdrc1 $f14, 0x0($14)                         \r\n"
+        "daddu $14, $10, $12                            \r\n"
+        "gssdlc1 $f8, 0x7($8)                           \r\n"
+        "gssdrc1 $f8, 0x0($8)                           \r\n"
+        "gsldlc1 $f16, 0x7($14)                         \r\n"
+        "gsldrc1 $f16, 0x0($14)                         \r\n"
+        "daddu $8, $10, $12                             \r\n"
+        "punpckhwd $f0, $f6, $f16                       \r\n"
+        "punpcklwd $f6, $f6, $f16                       \r\n"
+        "punpckhwd $f10, $f12, $f2                      \r\n"
+        "punpcklwd $f12, $f12, $f2                      \r\n"
+        "gssdlc1 $f6, 0x7($8)                           \r\n"
+        "gssdrc1 $f6, 0x0($8)                           \r\n"
+        "daddu $8, $9, $11                              \r\n"
+        "gssdlc1 $f0, 0x7($9)                           \r\n"
+        "gssdrc1 $f0, 0x0($9)                           \r\n"
+        "daddu $14, $9, $13                             \r\n"
+        "gssdlc1 $f12, 0x7($8)                          \r\n"
+        "gssdrc1 $f12, 0x0($8)                          \r\n"
+        "gssdlc1 $f10, 0x7($14)                         \r\n"
+        "gssdrc1 $f10, 0x0($14)                         \r\n"
+        ::[pix]"r"(pix),[stride]"r"((uint64_t)stride),[ptmp]"m"(ptmp[0]),
+          [pdat]"m"(pdat[0])
+        : "$8","$9","$10","$11","$12","$13","$14","$f0","$f2","$f4","$f6",
+          "$f8","$f10","$f12","$f14","$f16"
+    );
+}
diff --git a/libavcodec/mips/h264idct_msa.c b/libavcodec/mips/h264idct_msa.c
new file mode 100644
index 00000000..fac1e7ad
--- /dev/null
+++ b/libavcodec/mips/h264idct_msa.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "h264dsp_mips.h"
+#include "libavcodec/bit_depth_template.c"
+
+#define AVC_ITRANS_H(in0, in1, in2, in3, out0, out1, out2, out3)          \
+{                                                                         \
+    v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                 \
+                                                                          \
+    tmp0_m = in0 + in2;                                                   \
+    tmp1_m = in0 - in2;                                                   \
+    tmp2_m = in1 >> 1;                                                    \
+    tmp2_m = tmp2_m - in3;                                                \
+    tmp3_m = in3 >> 1;                                                    \
+    tmp3_m = in1 + tmp3_m;                                                \
+                                                                          \
+    BUTTERFLY_4(tmp0_m, tmp1_m, tmp2_m, tmp3_m, out0, out1, out2, out3);  \
+}
+
+static void avc_idct4x4_addblk_msa(uint8_t *dst, int16_t *src,
+                                   int32_t dst_stride)
+{
+    v8i16 src0, src1, src2, src3;
+    v8i16 hres0, hres1, hres2, hres3;
+    v8i16 vres0, vres1, vres2, vres3;
+    v8i16 zeros = { 0 };
+
+    LD4x4_SH(src, src0, src1, src2, src3);
+    AVC_ITRANS_H(src0, src1, src2, src3, hres0, hres1, hres2, hres3);
+    TRANSPOSE4x4_SH_SH(hres0, hres1, hres2, hres3, hres0, hres1, hres2, hres3);
+    AVC_ITRANS_H(hres0, hres1, hres2, hres3, vres0, vres1, vres2, vres3);
+    SRARI_H4_SH(vres0, vres1, vres2, vres3, 6);
+    ADDBLK_ST4x4_UB(vres0, vres1, vres2, vres3, dst, dst_stride);
+    ST_SH2(zeros, zeros, src, 8);
+}
+
+static void avc_idct4x4_addblk_dc_msa(uint8_t *dst, int16_t *src,
+                                      int32_t dst_stride)
+{
+    int16_t dc;
+    uint32_t src0, src1, src2, src3;
+    v16u8 pred = { 0 };
+    v16i8 out;
+    v8i16 input_dc, pred_r, pred_l;
+
+    dc = (src[0] + 32) >> 6;
+    input_dc = __msa_fill_h(dc);
+    src[0] = 0;
+
+    LW4(dst, dst_stride, src0, src1, src2, src3);
+    INSERT_W4_UB(src0, src1, src2, src3, pred);
+    UNPCK_UB_SH(pred, pred_r, pred_l);
+
+    pred_r += input_dc;
+    pred_l += input_dc;
+
+    CLIP_SH2_0_255(pred_r, pred_l);
+    out = __msa_pckev_b((v16i8) pred_l, (v16i8) pred_r);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void avc_deq_idct_luma_dc_msa(int16_t *dst, int16_t *src,
+                                     int32_t de_q_val)
+{
+#define DC_DEST_STRIDE 16
+    int16_t out0, out1, out2, out3;
+    v8i16 src0, src1, src2, src3;
+    v8i16 vec0, vec1, vec2, vec3;
+    v8i16 hres0, hres1, hres2, hres3;
+    v8i16 vres0, vres1, vres2, vres3;
+    v4i32 vres0_r, vres1_r, vres2_r, vres3_r;
+    v4i32 de_q_vec = __msa_fill_w(de_q_val);
+
+    LD4x4_SH(src, src0, src1, src2, src3);
+    TRANSPOSE4x4_SH_SH(src0, src1, src2, src3, src0, src1, src2, src3);
+    BUTTERFLY_4(src0, src2, src3, src1, vec0, vec3, vec2, vec1);
+    BUTTERFLY_4(vec0, vec1, vec2, vec3, hres0, hres3, hres2, hres1);
+    TRANSPOSE4x4_SH_SH(hres0, hres1, hres2, hres3, hres0, hres1, hres2, hres3);
+    BUTTERFLY_4(hres0, hres1, hres3, hres2, vec0, vec3, vec2, vec1);
+    BUTTERFLY_4(vec0, vec1, vec2, vec3, vres0, vres1, vres2, vres3);
+    UNPCK_R_SH_SW(vres0, vres0_r);
+    UNPCK_R_SH_SW(vres1, vres1_r);
+    UNPCK_R_SH_SW(vres2, vres2_r);
+    UNPCK_R_SH_SW(vres3, vres3_r);
+
+    vres0_r *= de_q_vec;
+    vres1_r *= de_q_vec;
+    vres2_r *= de_q_vec;
+    vres3_r *= de_q_vec;
+
+    SRARI_W4_SW(vres0_r, vres1_r, vres2_r, vres3_r, 8);
+    PCKEV_H2_SH(vres1_r, vres0_r, vres3_r, vres2_r, vec0, vec1);
+
+    out0 = __msa_copy_s_h(vec0, 0);
+    out1 = __msa_copy_s_h(vec0, 1);
+    out2 = __msa_copy_s_h(vec0, 2);
+    out3 = __msa_copy_s_h(vec0, 3);
+    SH(out0, dst);
+    SH(out1, (dst + 2 * DC_DEST_STRIDE));
+    SH(out2, (dst + 8 * DC_DEST_STRIDE));
+    SH(out3, (dst + 10 * DC_DEST_STRIDE));
+    dst += DC_DEST_STRIDE;
+
+    out0 = __msa_copy_s_h(vec0, 4);
+    out1 = __msa_copy_s_h(vec0, 5);
+    out2 = __msa_copy_s_h(vec0, 6);
+    out3 = __msa_copy_s_h(vec0, 7);
+    SH(out0, dst);
+    SH(out1, (dst + 2 * DC_DEST_STRIDE));
+    SH(out2, (dst + 8 * DC_DEST_STRIDE));
+    SH(out3, (dst + 10 * DC_DEST_STRIDE));
+    dst += (3 * DC_DEST_STRIDE);
+
+    out0 = __msa_copy_s_h(vec1, 0);
+    out1 = __msa_copy_s_h(vec1, 1);
+    out2 = __msa_copy_s_h(vec1, 2);
+    out3 = __msa_copy_s_h(vec1, 3);
+    SH(out0, dst);
+    SH(out1, (dst + 2 * DC_DEST_STRIDE));
+    SH(out2, (dst + 8 * DC_DEST_STRIDE));
+    SH(out3, (dst + 10 * DC_DEST_STRIDE));
+    dst += DC_DEST_STRIDE;
+
+    out0 = __msa_copy_s_h(vec1, 4);
+    out1 = __msa_copy_s_h(vec1, 5);
+    out2 = __msa_copy_s_h(vec1, 6);
+    out3 = __msa_copy_s_h(vec1, 7);
+    SH(out0, dst);
+    SH(out1, (dst + 2 * DC_DEST_STRIDE));
+    SH(out2, (dst + 8 * DC_DEST_STRIDE));
+    SH(out3, (dst + 10 * DC_DEST_STRIDE));
+
+#undef DC_DEST_STRIDE
+}
+
+static void avc_idct8_addblk_msa(uint8_t *dst, int16_t *src, int32_t dst_stride)
+{
+    v8i16 src0, src1, src2, src3, src4, src5, src6, src7;
+    v8i16 vec0, vec1, vec2, vec3;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v4i32 tmp0_r, tmp1_r, tmp2_r, tmp3_r, tmp4_r, tmp5_r, tmp6_r, tmp7_r;
+    v4i32 tmp0_l, tmp1_l, tmp2_l, tmp3_l, tmp4_l, tmp5_l, tmp6_l, tmp7_l;
+    v4i32 vec0_r, vec1_r, vec2_r, vec3_r, vec0_l, vec1_l, vec2_l, vec3_l;
+    v4i32 res0_r, res1_r, res2_r, res3_r, res4_r, res5_r, res6_r, res7_r;
+    v4i32 res0_l, res1_l, res2_l, res3_l, res4_l, res5_l, res6_l, res7_l;
+    v16i8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v16i8 zeros = { 0 };
+
+    src[0] += 32;
+
+    LD_SH8(src, 8, src0, src1, src2, src3, src4, src5, src6, src7);
+
+    vec0 = src0 + src4;
+    vec1 = src0 - src4;
+    vec2 = src2 >> 1;
+    vec2 = vec2 - src6;
+    vec3 = src6 >> 1;
+    vec3 = src2 + vec3;
+
+    BUTTERFLY_4(vec0, vec1, vec2, vec3, tmp0, tmp1, tmp2, tmp3);
+
+    vec0 = src7 >> 1;
+    vec0 = src5 - vec0 - src3 - src7;
+    vec1 = src3 >> 1;
+    vec1 = src1 - vec1 + src7 - src3;
+    vec2 = src5 >> 1;
+    vec2 = vec2 - src1 + src7 + src5;
+    vec3 = src1 >> 1;
+    vec3 = vec3 + src3 + src5 + src1;
+    tmp4 = vec3 >> 2;
+    tmp4 += vec0;
+    tmp5 = vec2 >> 2;
+    tmp5 += vec1;
+    tmp6 = vec1 >> 2;
+    tmp6 -= vec2;
+    tmp7 = vec0 >> 2;
+    tmp7 = vec3 - tmp7;
+
+    BUTTERFLY_8(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
+                res0, res1, res2, res3, res4, res5, res6, res7);
+    TRANSPOSE8x8_SH_SH(res0, res1, res2, res3, res4, res5, res6, res7,
+                       res0, res1, res2, res3, res4, res5, res6, res7);
+    UNPCK_SH_SW(res0, tmp0_r, tmp0_l);
+    UNPCK_SH_SW(res1, tmp1_r, tmp1_l);
+    UNPCK_SH_SW(res2, tmp2_r, tmp2_l);
+    UNPCK_SH_SW(res3, tmp3_r, tmp3_l);
+    UNPCK_SH_SW(res4, tmp4_r, tmp4_l);
+    UNPCK_SH_SW(res5, tmp5_r, tmp5_l);
+    UNPCK_SH_SW(res6, tmp6_r, tmp6_l);
+    UNPCK_SH_SW(res7, tmp7_r, tmp7_l);
+    BUTTERFLY_4(tmp0_r, tmp0_l, tmp4_l, tmp4_r, vec0_r, vec0_l, vec1_l, vec1_r);
+
+    vec2_r = tmp2_r >> 1;
+    vec2_l = tmp2_l >> 1;
+    vec2_r -= tmp6_r;
+    vec2_l -= tmp6_l;
+    vec3_r = tmp6_r >> 1;
+    vec3_l = tmp6_l >> 1;
+    vec3_r += tmp2_r;
+    vec3_l += tmp2_l;
+
+    BUTTERFLY_4(vec0_r, vec1_r, vec2_r, vec3_r, tmp0_r, tmp2_r, tmp4_r, tmp6_r);
+    BUTTERFLY_4(vec0_l, vec1_l, vec2_l, vec3_l, tmp0_l, tmp2_l, tmp4_l, tmp6_l);
+
+    vec0_r = tmp7_r >> 1;
+    vec0_l = tmp7_l >> 1;
+    vec0_r = tmp5_r - vec0_r - tmp3_r - tmp7_r;
+    vec0_l = tmp5_l - vec0_l - tmp3_l - tmp7_l;
+    vec1_r = tmp3_r >> 1;
+    vec1_l = tmp3_l >> 1;
+    vec1_r = tmp1_r - vec1_r + tmp7_r - tmp3_r;
+    vec1_l = tmp1_l - vec1_l + tmp7_l - tmp3_l;
+    vec2_r = tmp5_r >> 1;
+    vec2_l = tmp5_l >> 1;
+    vec2_r = vec2_r - tmp1_r + tmp7_r + tmp5_r;
+    vec2_l = vec2_l - tmp1_l + tmp7_l + tmp5_l;
+    vec3_r = tmp1_r >> 1;
+    vec3_l = tmp1_l >> 1;
+    vec3_r = vec3_r + tmp3_r + tmp5_r + tmp1_r;
+    vec3_l = vec3_l + tmp3_l + tmp5_l + tmp1_l;
+    tmp1_r = vec3_r >> 2;
+    tmp1_l = vec3_l >> 2;
+    tmp1_r += vec0_r;
+    tmp1_l += vec0_l;
+    tmp3_r = vec2_r >> 2;
+    tmp3_l = vec2_l >> 2;
+    tmp3_r += vec1_r;
+    tmp3_l += vec1_l;
+    tmp5_r = vec1_r >> 2;
+    tmp5_l = vec1_l >> 2;
+    tmp5_r -= vec2_r;
+    tmp5_l -= vec2_l;
+    tmp7_r = vec0_r >> 2;
+    tmp7_l = vec0_l >> 2;
+    tmp7_r = vec3_r - tmp7_r;
+    tmp7_l = vec3_l - tmp7_l;
+
+    BUTTERFLY_4(tmp0_r, tmp0_l, tmp7_l, tmp7_r, res0_r, res0_l, res7_l, res7_r);
+    BUTTERFLY_4(tmp2_r, tmp2_l, tmp5_l, tmp5_r, res1_r, res1_l, res6_l, res6_r);
+    BUTTERFLY_4(tmp4_r, tmp4_l, tmp3_l, tmp3_r, res2_r, res2_l, res5_l, res5_r);
+    BUTTERFLY_4(tmp6_r, tmp6_l, tmp1_l, tmp1_r, res3_r, res3_l, res4_l, res4_r);
+    SRA_4V(res0_r, res0_l, res1_r, res1_l, 6);
+    SRA_4V(res2_r, res2_l, res3_r, res3_l, 6);
+    SRA_4V(res4_r, res4_l, res5_r, res5_l, 6);
+    SRA_4V(res6_r, res6_l, res7_r, res7_l, 6);
+    PCKEV_H4_SH(res0_l, res0_r, res1_l, res1_r, res2_l, res2_r, res3_l, res3_r,
+                res0, res1, res2, res3);
+    PCKEV_H4_SH(res4_l, res4_r, res5_l, res5_r, res6_l, res6_r, res7_l, res7_r,
+                res4, res5, res6, res7);
+    LD_SB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+    ILVR_B4_SH(zeros, dst0, zeros, dst1, zeros, dst2, zeros, dst3,
+               tmp0, tmp1, tmp2, tmp3);
+    ILVR_B4_SH(zeros, dst4, zeros, dst5, zeros, dst6, zeros, dst7,
+               tmp4, tmp5, tmp6, tmp7);
+    ADD4(res0, tmp0, res1, tmp1, res2, tmp2, res3, tmp3,
+         res0, res1, res2, res3);
+    ADD4(res4, tmp4, res5, tmp5, res6, tmp6, res7, tmp7,
+         res4, res5, res6, res7);
+    CLIP_SH4_0_255(res0, res1, res2, res3);
+    CLIP_SH4_0_255(res4, res5, res6, res7);
+    PCKEV_B4_SB(res1, res0, res3, res2, res5, res4, res7, res6,
+                dst0, dst1, dst2, dst3);
+    ST8x4_UB(dst0, dst1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST8x4_UB(dst2, dst3, dst, dst_stride);
+}
+
+static void avc_idct8_dc_addblk_msa(uint8_t *dst, int16_t *src,
+                                    int32_t dst_stride)
+{
+    int32_t dc_val;
+    v16i8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 dst0_r, dst1_r, dst2_r, dst3_r, dst4_r, dst5_r, dst6_r, dst7_r;
+    v8i16 dc;
+    v16i8 zeros = { 0 };
+
+    dc_val = (src[0] + 32) >> 6;
+    dc = __msa_fill_h(dc_val);
+
+    src[0] = 0;
+
+    LD_SB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+    ILVR_B4_SH(zeros, dst0, zeros, dst1, zeros, dst2, zeros, dst3,
+               dst0_r, dst1_r, dst2_r, dst3_r);
+    ILVR_B4_SH(zeros, dst4, zeros, dst5, zeros, dst6, zeros, dst7,
+               dst4_r, dst5_r, dst6_r, dst7_r);
+    ADD4(dst0_r, dc, dst1_r, dc, dst2_r, dc, dst3_r, dc,
+         dst0_r, dst1_r, dst2_r, dst3_r);
+    ADD4(dst4_r, dc, dst5_r, dc, dst6_r, dc, dst7_r, dc,
+         dst4_r, dst5_r, dst6_r, dst7_r);
+    CLIP_SH4_0_255(dst0_r, dst1_r, dst2_r, dst3_r);
+    CLIP_SH4_0_255(dst4_r, dst5_r, dst6_r, dst7_r);
+    PCKEV_B4_SB(dst1_r, dst0_r, dst3_r, dst2_r, dst5_r, dst4_r, dst7_r, dst6_r,
+                dst0, dst1, dst2, dst3);
+    ST8x4_UB(dst0, dst1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST8x4_UB(dst2, dst3, dst, dst_stride);
+}
+
+void ff_h264_idct_add_msa(uint8_t *dst, int16_t *src,
+                          int32_t dst_stride)
+{
+    avc_idct4x4_addblk_msa(dst, src, dst_stride);
+    memset(src, 0, 16 * sizeof(dctcoef));
+}
+
+void ff_h264_idct8_addblk_msa(uint8_t *dst, int16_t *src,
+                              int32_t dst_stride)
+{
+    avc_idct8_addblk_msa(dst, src, dst_stride);
+    memset(src, 0, 64 * sizeof(dctcoef));
+}
+
+void ff_h264_idct4x4_addblk_dc_msa(uint8_t *dst, int16_t *src,
+                                   int32_t dst_stride)
+{
+    avc_idct4x4_addblk_dc_msa(dst, src, dst_stride);
+}
+
+void ff_h264_idct8_dc_addblk_msa(uint8_t *dst, int16_t *src,
+                                 int32_t dst_stride)
+{
+    avc_idct8_dc_addblk_msa(dst, src, dst_stride);
+}
+
+void ff_h264_idct_add16_msa(uint8_t *dst,
+                            const int32_t *blk_offset,
+                            int16_t *block, int32_t dst_stride,
+                            const uint8_t nzc[15 * 8])
+{
+    int32_t i;
+
+    for (i = 0; i < 16; i++) {
+        int32_t nnz = nzc[scan8[i]];
+
+        if (nnz) {
+            if (nnz == 1 && ((dctcoef *) block)[i * 16])
+                ff_h264_idct4x4_addblk_dc_msa(dst + blk_offset[i],
+                                              block + i * 16 * sizeof(pixel),
+                                              dst_stride);
+            else
+                ff_h264_idct_add_msa(dst + blk_offset[i],
+                                     block + i * 16 * sizeof(pixel),
+                                     dst_stride);
+        }
+    }
+}
+
+void ff_h264_idct8_add4_msa(uint8_t *dst, const int32_t *blk_offset,
+                            int16_t *block, int32_t dst_stride,
+                            const uint8_t nzc[15 * 8])
+{
+    int32_t cnt;
+
+    for (cnt = 0; cnt < 16; cnt += 4) {
+        int32_t nnz = nzc[scan8[cnt]];
+
+        if (nnz) {
+            if (nnz == 1 && ((dctcoef *) block)[cnt * 16])
+                ff_h264_idct8_dc_addblk_msa(dst + blk_offset[cnt],
+                                            block + cnt * 16 * sizeof(pixel),
+                                            dst_stride);
+            else
+                ff_h264_idct8_addblk_msa(dst + blk_offset[cnt],
+                                         block + cnt * 16 * sizeof(pixel),
+                                         dst_stride);
+        }
+    }
+}
+
+void ff_h264_idct_add8_msa(uint8_t **dst,
+                           const int32_t *blk_offset,
+                           int16_t *block, int32_t dst_stride,
+                           const uint8_t nzc[15 * 8])
+{
+    int32_t i, j;
+
+    for (j = 1; j < 3; j++) {
+        for (i = (j * 16); i < (j * 16 + 4); i++) {
+            if (nzc[scan8[i]])
+                ff_h264_idct_add_msa(dst[j - 1] + blk_offset[i],
+                                     block + i * 16 * sizeof(pixel),
+                                     dst_stride);
+            else if (((dctcoef *) block)[i * 16])
+                ff_h264_idct4x4_addblk_dc_msa(dst[j - 1] + blk_offset[i],
+                                              block + i * 16 * sizeof(pixel),
+                                              dst_stride);
+        }
+    }
+}
+
+void ff_h264_idct_add8_422_msa(uint8_t **dst,
+                               const int32_t *blk_offset,
+                               int16_t *block, int32_t dst_stride,
+                               const uint8_t nzc[15 * 8])
+{
+    int32_t i, j;
+
+    for (j = 1; j < 3; j++) {
+        for (i = (j * 16); i < (j * 16 + 4); i++) {
+            if (nzc[scan8[i]])
+                ff_h264_idct_add_msa(dst[j - 1] + blk_offset[i],
+                                     block + i * 16 * sizeof(pixel),
+                                     dst_stride);
+            else if (((dctcoef *) block)[i * 16])
+                ff_h264_idct4x4_addblk_dc_msa(dst[j - 1] + blk_offset[i],
+                                              block + i * 16 * sizeof(pixel),
+                                              dst_stride);
+        }
+    }
+
+    for (j = 1; j < 3; j++) {
+        for (i = (j * 16 + 4); i < (j * 16 + 8); i++) {
+            if (nzc[scan8[i + 4]])
+                ff_h264_idct_add_msa(dst[j - 1] + blk_offset[i + 4],
+                                     block + i * 16 * sizeof(pixel),
+                                     dst_stride);
+            else if (((dctcoef *) block)[i * 16])
+                ff_h264_idct4x4_addblk_dc_msa(dst[j - 1] + blk_offset[i + 4],
+                                              block + i * 16 * sizeof(pixel),
+                                              dst_stride);
+        }
+    }
+}
+
+void ff_h264_idct_add16_intra_msa(uint8_t *dst,
+                                  const int32_t *blk_offset,
+                                  int16_t *block,
+                                  int32_t dst_stride,
+                                  const uint8_t nzc[15 * 8])
+{
+    int32_t i;
+
+    for (i = 0; i < 16; i++) {
+        if (nzc[scan8[i]])
+            ff_h264_idct_add_msa(dst + blk_offset[i],
+                                 block + i * 16 * sizeof(pixel), dst_stride);
+        else if (((dctcoef *) block)[i * 16])
+            ff_h264_idct4x4_addblk_dc_msa(dst + blk_offset[i],
+                                          block + i * 16 * sizeof(pixel),
+                                          dst_stride);
+    }
+}
+
+void ff_h264_deq_idct_luma_dc_msa(int16_t *dst, int16_t *src,
+                                  int32_t de_qval)
+{
+    avc_deq_idct_luma_dc_msa(dst, src, de_qval);
+}
diff --git a/libavcodec/mips/h264pred_init_mips.c b/libavcodec/mips/h264pred_init_mips.c
new file mode 100644
index 00000000..93a2409a
--- /dev/null
+++ b/libavcodec/mips/h264pred_init_mips.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *                    Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "h264dsp_mips.h"
+#include "h264pred_mips.h"
+
+#if HAVE_MSA
+static av_cold void h264_pred_init_msa(H264PredContext *h, int codec_id,
+                                       const int bit_depth,
+                                       const int chroma_format_idc)
+{
+    if (8 == bit_depth) {
+        if (chroma_format_idc == 1) {
+            h->pred8x8[VERT_PRED8x8] = ff_h264_intra_pred_vert_8x8_msa;
+            h->pred8x8[HOR_PRED8x8] = ff_h264_intra_pred_horiz_8x8_msa;
+        }
+
+        if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) {
+            if (chroma_format_idc == 1) {
+                h->pred8x8[PLANE_PRED8x8] = ff_h264_intra_predict_plane_8x8_msa;
+            }
+        }
+        if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7
+            && codec_id != AV_CODEC_ID_VP8) {
+            if (chroma_format_idc == 1) {
+                h->pred8x8[DC_PRED8x8] = ff_h264_intra_predict_dc_4blk_8x8_msa;
+                h->pred8x8[LEFT_DC_PRED8x8] =
+                    ff_h264_intra_predict_hor_dc_8x8_msa;
+                h->pred8x8[TOP_DC_PRED8x8] =
+                    ff_h264_intra_predict_vert_dc_8x8_msa;
+                h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] =
+                    ff_h264_intra_predict_mad_cow_dc_l0t_8x8_msa;
+                h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] =
+                    ff_h264_intra_predict_mad_cow_dc_0lt_8x8_msa;
+                h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] =
+                    ff_h264_intra_predict_mad_cow_dc_l00_8x8_msa;
+                h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] =
+                    ff_h264_intra_predict_mad_cow_dc_0l0_8x8_msa;
+            }
+        } else {
+            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
+                h->pred8x8[7] = ff_vp8_pred8x8_127_dc_8_msa;
+                h->pred8x8[8] = ff_vp8_pred8x8_129_dc_8_msa;
+            }
+        }
+
+        if (chroma_format_idc == 1) {
+            h->pred8x8[DC_128_PRED8x8] = ff_h264_intra_pred_dc_128_8x8_msa;
+        }
+
+        h->pred16x16[DC_PRED8x8] = ff_h264_intra_pred_dc_16x16_msa;
+        h->pred16x16[VERT_PRED8x8] = ff_h264_intra_pred_vert_16x16_msa;
+        h->pred16x16[HOR_PRED8x8] = ff_h264_intra_pred_horiz_16x16_msa;
+
+        switch (codec_id) {
+        case AV_CODEC_ID_SVQ3:
+            ;
+            break;
+        case AV_CODEC_ID_RV40:
+            ;
+            break;
+        case AV_CODEC_ID_VP7:
+        case AV_CODEC_ID_VP8:
+            h->pred16x16[7] = ff_vp8_pred16x16_127_dc_8_msa;
+            h->pred16x16[8] = ff_vp8_pred16x16_129_dc_8_msa;
+            break;
+        default:
+            h->pred16x16[PLANE_PRED8x8] =
+                ff_h264_intra_predict_plane_16x16_msa;
+            break;
+        }
+
+        h->pred16x16[LEFT_DC_PRED8x8] = ff_h264_intra_pred_dc_left_16x16_msa;
+        h->pred16x16[TOP_DC_PRED8x8] = ff_h264_intra_pred_dc_top_16x16_msa;
+        h->pred16x16[DC_128_PRED8x8] = ff_h264_intra_pred_dc_128_16x16_msa;
+    }
+}
+#endif  // #if HAVE_MSA
+
+#if HAVE_MMI
+static av_cold void h264_pred_init_mmi(H264PredContext *h, int codec_id,
+        const int bit_depth, const int chroma_format_idc)
+{
+    if (bit_depth == 8) {
+        if (chroma_format_idc == 1) {
+            h->pred8x8  [VERT_PRED8x8       ] = ff_pred8x8_vertical_8_mmi;
+            h->pred8x8  [HOR_PRED8x8        ] = ff_pred8x8_horizontal_8_mmi;
+        } else {
+            h->pred8x8  [VERT_PRED8x8       ] = ff_pred8x16_vertical_8_mmi;
+            h->pred8x8  [HOR_PRED8x8        ] = ff_pred8x16_horizontal_8_mmi;
+        }
+
+        h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_8_mmi;
+        h->pred16x16[VERT_PRED8x8           ] = ff_pred16x16_vertical_8_mmi;
+        h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_8_mmi;
+        h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_8_mmi;
+        h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_8_mmi;
+
+        switch (codec_id) {
+        case AV_CODEC_ID_SVQ3:
+            h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_plane_svq3_8_mmi;
+            ;
+            break;
+        case AV_CODEC_ID_RV40:
+            h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_plane_rv40_8_mmi;
+            ;
+            break;
+        case AV_CODEC_ID_VP7:
+        case AV_CODEC_ID_VP8:
+            ;
+            break;
+        default:
+            h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_plane_h264_8_mmi;
+            break;
+        }
+
+        if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
+            if (chroma_format_idc == 1) {
+                h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmi;
+                h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_8_mmi;
+            }
+        }
+    }
+}
+#endif /* HAVE_MMI */
+
+av_cold void ff_h264_pred_init_mips(H264PredContext *h, int codec_id,
+                                    int bit_depth,
+                                    const int chroma_format_idc)
+{
+#if HAVE_MSA
+    h264_pred_init_msa(h, codec_id, bit_depth, chroma_format_idc);
+#endif  // #if HAVE_MSA
+#if HAVE_MMI
+    h264_pred_init_mmi(h, codec_id, bit_depth, chroma_format_idc);
+#endif /* HAVE_MMI */
+}
diff --git a/libavcodec/mips/h264pred_mips.h b/libavcodec/mips/h264pred_mips.h
new file mode 100644
index 00000000..136e2912
--- /dev/null
+++ b/libavcodec/mips/h264pred_mips.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_H264PRED_MIPS_H
+#define AVCODEC_MIPS_H264PRED_MIPS_H
+
+#include "constants.h"
+#include "libavcodec/h264pred.h"
+
+void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
+        ptrdiff_t stride);
+void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft, int has_topright,
+        ptrdiff_t stride);
+void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft,
+        int has_topright, ptrdiff_t stride);
+void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
+        ptrdiff_t stride);
+void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_plane_svq3_8_mmi(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_plane_rv40_8_mmi(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_plane_h264_8_mmi(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_top_dc_8_mmi(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_dc_8_mmi(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride);
+
+#endif  /* AVCODEC_MIPS_H264PRED_MIPS_H */
diff --git a/libavcodec/mips/h264pred_mmi.c b/libavcodec/mips/h264pred_mmi.c
new file mode 100644
index 00000000..e949d111
--- /dev/null
+++ b/libavcodec/mips/h264pred_mmi.c
@@ -0,0 +1,780 @@
+/*
+ * Loongson SIMD optimized h264pred
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h264pred_mips.h"
+
+void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    __asm__ volatile (
+        "dli $8, 16                         \r\n"
+        "gsldlc1 $f2, 7(%[srcA])            \r\n"
+        "gsldrc1 $f2, 0(%[srcA])            \r\n"
+        "gsldlc1 $f4, 15(%[srcA])           \r\n"
+        "gsldrc1 $f4, 8(%[srcA])            \r\n"
+        "1:                                 \r\n"
+        "gssdlc1 $f2, 7(%[src])             \r\n"
+        "gssdrc1 $f2, 0(%[src])             \r\n"
+        "gssdlc1 $f4, 15(%[src])            \r\n"
+        "gssdrc1 $f4, 8(%[src])             \r\n"
+        "daddu %[src], %[src], %[stride]    \r\n"
+        "daddi $8, $8, -1                   \r\n"
+        "bnez $8, 1b                        \r\n"
+        : [src]"+&r"(src)
+        : [stride]"r"(stride),[srcA]"r"(src-stride)
+        : "$8","$f2","$f4"
+    );
+}
+
+void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    __asm__ volatile (
+        "daddiu $2, %[src], -1              \r\n"
+        "daddu $3, %[src], $0               \r\n"
+        "dli $6, 0x10                       \r\n"
+        "1:                                 \r\n"
+        "lbu $4, 0($2)                      \r\n"
+        "dmul $5, $4, %[ff_pb_1]            \r\n"
+        "sdl $5, 7($3)                      \r\n"
+        "sdr $5, 0($3)                      \r\n"
+        "sdl $5, 15($3)                     \r\n"
+        "sdr $5, 8($3)                      \r\n"
+        "daddu $2, %[stride]                \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "daddiu $6, -1                      \r\n"
+        "bnez $6, 1b                        \r\n"
+        ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
+        : "$2","$3","$4","$5","$6"
+    );
+}
+
+void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    __asm__ volatile (
+        "daddiu $2, %[src], -1              \r\n"
+        "dli $6, 0x10                       \r\n"
+        "xor $8, $8, $8                     \r\n"
+        "1:                                 \r\n"
+        "lbu $4, 0($2)                      \r\n"
+        "daddu $8, $8, $4                   \r\n"
+        "daddu $2, $2, %[stride]            \r\n"
+        "daddiu $6, $6, -1                  \r\n"
+        "bnez $6, 1b                        \r\n"
+        "dli $6, 0x10                       \r\n"
+        "negu $3, %[stride]                 \r\n"
+        "daddu $2, %[src], $3               \r\n"
+        "2:                                 \r\n"
+        "lbu $4, 0($2)                      \r\n"
+        "daddu $8, $8, $4                   \r\n"
+        "daddiu $2, $2, 1                   \r\n"
+        "daddiu $6, $6, -1                  \r\n"
+        "bnez $6, 2b                        \r\n"
+        "daddiu $8, $8, 0x10                \r\n"
+        "dsra $8, 5                         \r\n"
+        "dmul $5, $8, %[ff_pb_1]            \r\n"
+        "daddu $2, %[src], $0               \r\n"
+        "dli $6, 0x10                       \r\n"
+        "3:                                 \r\n"
+        "sdl $5, 7($2)                      \r\n"
+        "sdr $5, 0($2)                      \r\n"
+        "sdl $5, 15($2)                     \r\n"
+        "sdr $5, 8($2)                      \r\n"
+        "daddu $2, $2, %[stride]            \r\n"
+        "daddiu $6, $6, -1                  \r\n"
+        "bnez $6, 3b                        \r\n"
+        ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
+        : "$2","$3","$4","$5","$6","$8"
+    );
+}
+
+void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
+        int has_topright, ptrdiff_t stride)
+{
+    uint32_t dc;
+
+    __asm__ volatile (
+        "ldl $8, 7(%[srcA])                 \r\n"
+        "ldr $8, 0(%[srcA])                 \r\n"
+        "ldl $9, 7(%[src0])                 \r\n"
+        "ldr $9, 0(%[src0])                 \r\n"
+        "ldl $10, 7(%[src1])                \r\n"
+        "ldr $10, 0(%[src1])                \r\n"
+        "dmtc1 $8, $f2                      \r\n"
+        "dmtc1 $9, $f4                      \r\n"
+        "dmtc1 $10, $f6                     \r\n"
+        "dmtc1 $0, $f0                      \r\n"
+        "punpcklbh $f8, $f2, $f0            \r\n"
+        "punpckhbh $f10, $f2, $f0           \r\n"
+        "punpcklbh $f12, $f4, $f0           \r\n"
+        "punpckhbh $f14, $f4, $f0           \r\n"
+        "punpcklbh $f16, $f6, $f0           \r\n"
+        "punpckhbh $f18, $f6, $f0           \r\n"
+        "bnez %[has_topleft], 1f            \r\n"
+        "pinsrh_0 $f8, $f8, $f12            \r\n"
+        "1:                                 \r\n"
+        "bnez %[has_topright], 2f           \r\n"
+        "pinsrh_3 $f18, $f18, $f14          \r\n"
+        "2:                                 \r\n"
+        "daddiu $8, $0, 2                   \r\n"
+        "dmtc1 $8, $f20                     \r\n"
+        "pshufh $f22, $f20, $f0             \r\n"
+        "pmullh $f12, $f12, $f22            \r\n"
+        "pmullh $f14, $f14, $f22            \r\n"
+        "paddh $f8, $f8, $f12               \r\n"
+        "paddh $f10, $f10, $f14             \r\n"
+        "paddh $f8, $f8, $f16               \r\n"
+        "paddh $f10, $f10, $f18             \r\n"
+        "paddh $f8, $f8, $f22               \r\n"
+        "paddh $f10, $f10, $f22             \r\n"
+        "psrah $f8, $f8, $f20               \r\n"
+        "psrah $f10, $f10, $f20             \r\n"
+        "packushb $f4, $f8, $f10            \r\n"
+        "biadd $f2, $f4                     \r\n"
+        "mfc1 $9, $f2                       \r\n"
+        "addiu $9, $9, 4                    \r\n"
+        "dsrl $9, $9, 3                     \r\n"
+        "mul %[dc], $9, %[ff_pb_1]          \r\n"
+        : [dc]"=r"(dc)
+        : [srcA]"r"(src-stride-1),[src0]"r"(src-stride),
+          [src1]"r"(src-stride+1),[has_topleft]"r"(has_topleft),
+          [has_topright]"r"(has_topright),[ff_pb_1]"r"(ff_pb_1)
+        : "$8","$9","$10","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
+          "$f18","$f20","$f22"
+    );
+
+    __asm__ volatile (
+        "dli $8, 8                          \r\n"
+        "1:                                 \r\n"
+        "punpcklwd $f2, %[dc], %[dc]        \r\n"
+        "gssdlc1 $f2, 7(%[src])             \r\n"
+        "gssdrc1 $f2, 0(%[src])             \r\n"
+        "daddu %[src], %[src], %[stride]    \r\n"
+        "daddi $8, $8, -1                   \r\n"
+        "bnez $8, 1b                        \r\n"
+        : [src]"+&r"(src)
+        : [dc]"f"(dc),[stride]"r"(stride)
+        : "$8","$f2"
+    );
+}
+
+void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft,
+        int has_topright, ptrdiff_t stride)
+{
+    uint32_t dc, dc1, dc2;
+
+    const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
+    const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
+    const int l2 = (src[-1+1*stride] + 2*src[-1+2*stride] + src[-1+3*stride] + 2) >> 2;
+    const int l3 = (src[-1+2*stride] + 2*src[-1+3*stride] + src[-1+4*stride] + 2) >> 2;
+    const int l4 = (src[-1+3*stride] + 2*src[-1+4*stride] + src[-1+5*stride] + 2) >> 2;
+    const int l5 = (src[-1+4*stride] + 2*src[-1+5*stride] + src[-1+6*stride] + 2) >> 2;
+    const int l6 = (src[-1+5*stride] + 2*src[-1+6*stride] + src[-1+7*stride] + 2) >> 2;
+    const int l7 = (src[-1+6*stride] + 2*src[-1+7*stride] + src[-1+7*stride] + 2) >> 2;
+
+    __asm__ volatile (
+        "ldl $8, 7(%[srcA])                 \r\n"
+        "ldr $8, 0(%[srcA])                 \r\n"
+        "ldl $9, 7(%[src0])                 \r\n"
+        "ldr $9, 0(%[src0])                 \r\n"
+        "ldl $10, 7(%[src1])                \r\n"
+        "ldr $10, 0(%[src1])                \r\n"
+        "dmtc1 $8, $f2                      \r\n"
+        "dmtc1 $9, $f4                      \r\n"
+        "dmtc1 $10, $f6                     \r\n"
+        "dmtc1 $0, $f0                      \r\n"
+        "punpcklbh $f8, $f2, $f0            \r\n"
+        "punpckhbh $f10, $f2, $f0           \r\n"
+        "punpcklbh $f12, $f4, $f0           \r\n"
+        "punpckhbh $f14, $f4, $f0           \r\n"
+        "punpcklbh $f16, $f6, $f0           \r\n"
+        "punpckhbh $f18, $f6, $f0           \r\n"
+        "daddiu $8, $0, 3                   \r\n"
+        "dmtc1 $8, $f20                     \r\n"
+        "pshufh $f28, $f10, $f20            \r\n"
+        "pshufh $f30, $f18, $f20            \r\n"
+        "pinsrh_3 $f10, $f10, $f30          \r\n"
+        "pinsrh_3 $f18, $f18, $f28          \r\n"
+        "bnez %[has_topleft], 1f            \r\n"
+        "pinsrh_0 $f8, $f8, $f12            \r\n"
+        "1:                                 \r\n"
+        "bnez %[has_topright], 2f           \r\n"
+        "pshufh $f30, $f14, $f20            \r\n"
+        "pinsrh_3 $f10, $f10, $f30          \r\n"
+        "2:                                 \r\n"
+        "daddiu $8, $0, 2                   \r\n"
+        "dmtc1 $8, $f20                     \r\n"
+        "pshufh $f22, $f20, $f0             \r\n"
+        "pmullh $f12, $f12, $f22            \r\n"
+        "pmullh $f14, $f14, $f22            \r\n"
+        "paddh $f8, $f8, $f12               \r\n"
+        "paddh $f10, $f10, $f14             \r\n"
+        "paddh $f8, $f8, $f16               \r\n"
+        "paddh $f10, $f10, $f18             \r\n"
+        "paddh $f8, $f8, $f22               \r\n"
+        "paddh $f10, $f10, $f22             \r\n"
+        "psrah $f8, $f8, $f20               \r\n"
+        "psrah $f10, $f10, $f20             \r\n"
+        "packushb $f4, $f8, $f10            \r\n"
+        "biadd $f2, $f4                     \r\n"
+        "mfc1 %[dc2], $f2                   \r\n"
+        : [dc2]"=r"(dc2)
+        : [srcA]"r"(src-stride-1),[src0]"r"(src-stride),
+          [src1]"r"(src-stride+1),[has_topleft]"r"(has_topleft),
+          [has_topright]"r"(has_topright)
+        : "$8","$9","$10","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
+          "$f18","$f20","$f22"
+    );
+
+    dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
+    dc = ((dc1+dc2+8)>>4)*0x01010101U;
+
+    __asm__ volatile (
+        "dli $8, 8                          \r\n"
+        "1:                                 \r\n"
+        "punpcklwd $f2, %[dc], %[dc]        \r\n"
+        "gssdlc1 $f2, 7(%[src])             \r\n"
+        "gssdrc1 $f2, 0(%[src])             \r\n"
+        "daddu %[src], %[src], %[stride]    \r\n"
+        "daddi $8, $8, -1                   \r\n"
+        "bnez $8, 1b                        \r\n"
+        : [src]"+&r"(src)
+        : [dc]"f"(dc),[stride]"r"(stride)
+        : "$8","$f2"
+    );
+}
+
+void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft,
+        int has_topright, ptrdiff_t stride)
+{
+    __asm__ volatile (
+        "ldl $8, 7(%[srcA])                 \r\n"
+        "ldr $8, 0(%[srcA])                 \r\n"
+        "ldl $9, 7(%[src0])                 \r\n"
+        "ldr $9, 0(%[src0])                 \r\n"
+        "ldl $10, 7(%[src1])                \r\n"
+        "ldr $10, 0(%[src1])                \r\n"
+        "dmtc1 $8, $f2                      \r\n"
+        "dmtc1 $9, $f4                      \r\n"
+        "dmtc1 $10, $f6                     \r\n"
+        "dmtc1 $0, $f0                      \r\n"
+        "punpcklbh $f8, $f2, $f0            \r\n"
+        "punpckhbh $f10, $f2, $f0           \r\n"
+        "punpcklbh $f12, $f4, $f0           \r\n"
+        "punpckhbh $f14, $f4, $f0           \r\n"
+        "punpcklbh $f16, $f6, $f0           \r\n"
+        "punpckhbh $f18, $f6, $f0           \r\n"
+        "bnez %[has_topleft], 1f            \r\n"
+        "pinsrh_0 $f8, $f8, $f12            \r\n"
+        "1:                                 \r\n"
+        "bnez %[has_topright], 2f           \r\n"
+        "pinsrh_3 $f18, $f18, $f14          \r\n"
+        "2:                                 \r\n"
+        "daddiu $8, $0, 2                   \r\n"
+        "dmtc1 $8, $f20                     \r\n"
+        "pshufh $f22, $f20, $f0             \r\n"
+        "pmullh $f12, $f12, $f22            \r\n"
+        "pmullh $f14, $f14, $f22            \r\n"
+        "paddh $f8, $f8, $f12               \r\n"
+        "paddh $f10, $f10, $f14             \r\n"
+        "paddh $f8, $f8, $f16               \r\n"
+        "paddh $f10, $f10, $f18             \r\n"
+        "paddh $f8, $f8, $f22               \r\n"
+        "paddh $f10, $f10, $f22             \r\n"
+        "psrah $f8, $f8, $f20               \r\n"
+        "psrah $f10, $f10, $f20             \r\n"
+        "packushb $f4, $f8, $f10            \r\n"
+        "sdc1 $f4, 0(%[src])                \r\n"
+        : [src]"=r"(src)
+        : [srcA]"r"(src-stride-1),[src0]"r"(src-stride),
+          [src1]"r"(src-stride+1),[has_topleft]"r"(has_topleft),
+          [has_topright]"r"(has_topright)
+        : "$8","$9","$10","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
+          "$f18","$f20","$f22"
+    );
+
+    __asm__ volatile (
+        "dli $8, 7                          \r\n"
+        "gsldlc1 $f2, 7(%[src])             \r\n"
+        "gsldrc1 $f2, 0(%[src])             \r\n"
+        "dadd %[src], %[src], %[stride]     \r\n"
+        "1:                                 \r\n"
+        "gssdlc1 $f2, 7(%[src])             \r\n"
+        "gssdrc1 $f2, 0(%[src])             \r\n"
+        "daddu %[src], %[src], %[stride]    \r\n"
+        "daddi $8, $8, -1                   \r\n"
+        "bnez $8, 1b                        \r\n"
+        : [src]"+&r"(src)
+        : [stride]"r"(stride)
+        : "$8","$f2"
+    );
+}
+
+void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
+        ptrdiff_t stride)
+{
+    const int dc = (src[-stride] + src[1-stride] + src[2-stride]
+                 + src[3-stride] + src[-1+0*stride] + src[-1+1*stride]
+                 + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
+
+    __asm__ volatile (
+        "daddu $2, %[dc], $0                \r\n"
+        "dmul $3, $2, %[ff_pb_1]            \r\n"
+        "xor $4, $4, $4                     \r\n"
+        "gsswx $3, 0(%[src],$4)             \r\n"
+        "daddu $4, %[stride]                \r\n"
+        "gsswx $3, 0(%[src],$4)             \r\n"
+        "daddu $4, %[stride]                \r\n"
+        "gsswx $3, 0(%[src],$4)             \r\n"
+        "daddu $4, %[stride]                \r\n"
+        "gsswx $3, 0(%[src],$4)             \r\n"
+        ::[src]"r"(src),[stride]"r"(stride),[dc]"r"(dc),[ff_pb_1]"r"(ff_pb_1)
+        : "$2","$3","$4"
+    );
+}
+
+void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    __asm__ volatile (
+        "dsubu $2, %[src], %[stride]        \r\n"
+        "daddu $3, %[src], $0               \r\n"
+        "ldl $4, 7($2)                      \r\n"
+        "ldr $4, 0($2)                      \r\n"
+        "dli $5, 0x8                        \r\n"
+        "1:                                 \r\n"
+        "sdl $4, 7($3)                      \r\n"
+        "sdr $4, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "daddiu $5, -1                      \r\n"
+        "bnez $5, 1b                        \r\n"
+        ::[src]"r"(src),[stride]"r"(stride)
+        : "$2","$3","$4","$5"
+    );
+}
+
+void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    __asm__ volatile (
+        "daddiu $2, %[src], -1              \r\n"
+        "daddu $3, %[src], $0               \r\n"
+        "dli $6, 0x8                        \r\n"
+        "1:                                 \r\n"
+        "lbu $4, 0($2)                      \r\n"
+        "dmul $5, $4, %[ff_pb_1]            \r\n"
+        "sdl $5, 7($3)                      \r\n"
+        "sdr $5, 0($3)                      \r\n"
+        "daddu $2, %[stride]                \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "daddiu $6, -1                      \r\n"
+        "bnez $6, 1b                        \r\n"
+        ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
+        : "$2","$3","$4","$5","$6"
+    );
+}
+
+static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride,
+        const int svq3, const int rv40)
+{
+    __asm__ volatile (
+        "negu $2, %[stride]                 \r\n"
+        "daddu $3, %[src], $2               \r\n"
+        "xor $f8, $f8, $f8                  \r\n"
+        "gslwlc1 $f0, 2($3)                 \r\n"
+        "gslwrc1 $f0, -1($3)                \r\n"
+        "gslwlc1 $f2, 6($3)                 \r\n"
+        "gslwrc1 $f2, 3($3)                 \r\n"
+        "gslwlc1 $f4, 11($3)                \r\n"
+        "gslwrc1 $f4, 8($3)                 \r\n"
+        "gslwlc1 $f6, 15($3)                \r\n"
+        "gslwrc1 $f6, 12($3)                \r\n"
+        "punpcklbh $f0, $f0, $f8            \r\n"
+        "punpcklbh $f2, $f2, $f8            \r\n"
+        "punpcklbh $f4, $f4, $f8            \r\n"
+        "punpcklbh $f6, $f6, $f8            \r\n"
+        "dmtc1 %[ff_pw_m8tom5], $f20        \r\n"
+        "dmtc1 %[ff_pw_m4tom1], $f22        \r\n"
+        "dmtc1 %[ff_pw_1to4], $f24          \r\n"
+        "dmtc1 %[ff_pw_5to8], $f26          \r\n"
+        "pmullh $f0, $f0, $f20              \r\n"
+        "pmullh $f2, $f2, $f22              \r\n"
+        "pmullh $f4, $f4, $f24              \r\n"
+        "pmullh $f6, $f6, $f26              \r\n"
+        "paddsh $f0, $f0, $f4               \r\n"
+        "paddsh $f2, $f2, $f6               \r\n"
+        "paddsh $f0, $f0, $f2               \r\n"
+        "dli $4, 0xE                        \r\n"
+        "dmtc1 $4, $f28                     \r\n"
+        "pshufh $f2, $f0, $f28              \r\n"
+        "paddsh $f0, $f0, $f2               \r\n"
+        "dli $4, 0x1                        \r\n"
+        "dmtc1 $4, $f30                     \r\n"
+        "pshufh $f2, $f0, $f30              \r\n"
+        "paddsh $f10, $f0, $f2              \r\n"
+        "daddiu $3, %[src], -1              \r\n"
+        "daddu $3, $2                       \r\n"
+        "lbu $4, 0($3)                      \r\n"
+        "lbu $8, 16($3)                     \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $5, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $6, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $7, 0($3)                      \r\n"
+        "dsll $5, 16                        \r\n"
+        "dsll $6, 32                        \r\n"
+        "dsll $7, 48                        \r\n"
+        "or $6, $7                          \r\n"
+        "or $4, $5                          \r\n"
+        "or $4, $6                          \r\n"
+        "dmtc1 $4, $f0                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $4, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $5, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $6, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $7, 0($3)                      \r\n"
+        "dsll $5, 16                        \r\n"
+        "dsll $6, 32                        \r\n"
+        "dsll $7, 48                        \r\n"
+        "or $6, $7                          \r\n"
+        "or $4, $5                          \r\n"
+        "or $4, $6                          \r\n"
+        "dmtc1 $4, $f2                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $4, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $5, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $6, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $7, 0($3)                      \r\n"
+        "dsll $5, 16                        \r\n"
+        "dsll $6, 32                        \r\n"
+        "dsll $7, 48                        \r\n"
+        "or $6, $7                          \r\n"
+        "or $4, $5                          \r\n"
+        "or $4, $6                          \r\n"
+        "dmtc1 $4, $f4                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $4, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $5, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $6, 0($3)                      \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "lbu $7, 0($3)                      \r\n"
+        "daddu $8, $7                       \r\n"
+        "daddiu $8, 1                       \r\n"
+        "dsll $8, 4                         \r\n"
+        "dsll $5, 16                        \r\n"
+        "dsll $6, 32                        \r\n"
+        "dsll $7, 48                        \r\n"
+        "or $6, $7                          \r\n"
+        "or $4, $5                          \r\n"
+        "or $4, $6                          \r\n"
+        "dmtc1 $4, $f6                      \r\n"
+        "pmullh $f0, $f0, $f20              \r\n"
+        "pmullh $f2, $f2, $f22              \r\n"
+        "pmullh $f4, $f4, $f24              \r\n"
+        "pmullh $f6, $f6, $f26              \r\n"
+        "paddsh $f0, $f0, $f4               \r\n"
+        "paddsh $f2, $f2, $f6               \r\n"
+        "paddsh $f0, $f0, $f2               \r\n"
+        "pshufh $f2, $f0, $f28              \r\n"
+        "paddsh $f0, $f0, $f2               \r\n"
+        "pshufh $f2, $f0, $f30              \r\n"
+        "paddsh $f12, $f0, $f2              \r\n"
+        "dmfc1 $2, $f10                     \r\n"
+        "dsll $2, 48                        \r\n"
+        "dsra $2, 48                        \r\n"
+        "dmfc1 $3, $f12                     \r\n"
+        "dsll $3, 48                        \r\n"
+        "dsra $3, 48                        \r\n"
+        "beqz %[svq3], 1f                   \r\n"
+        "dli $4, 4                          \r\n"
+        "ddiv $2, $4                        \r\n"
+        "ddiv $3, $4                        \r\n"
+        "dli $4, 5                          \r\n"
+        "dmul $2, $4                        \r\n"
+        "dmul $3, $4                        \r\n"
+        "dli $4, 16                         \r\n"
+        "ddiv $2, $4                        \r\n"
+        "ddiv $3, $4                        \r\n"
+        "daddu $4, $2, $0                   \r\n"
+        "daddu $2, $3, $0                   \r\n"
+        "daddu $3, $4, $0                   \r\n"
+        "b 2f                               \r\n"
+        "1:                                 \r\n"
+        "beqz %[rv40], 1f                   \r\n"
+        "dsra $4, $2, 2                     \r\n"
+        "daddu $2, $4                       \r\n"
+        "dsra $4, $3, 2                     \r\n"
+        "daddu $3, $4                       \r\n"
+        "dsra $2, 4                         \r\n"
+        "dsra $3, 4                         \r\n"
+        "b 2f                               \r\n"
+        "1:                                 \r\n"
+        "dli $4, 5                          \r\n"
+        "dmul $2, $4                        \r\n"
+        "dmul $3, $4                        \r\n"
+        "daddiu $2, 32                      \r\n"
+        "daddiu $3, 32                      \r\n"
+        "dsra $2, 6                         \r\n"
+        "dsra $3, 6                         \r\n"
+        "2:                                 \r\n"
+        "daddu $5, $2, $3                   \r\n"
+        "dli $4, 7                          \r\n"
+        "dmul $5, $4                        \r\n"
+        "dsubu $8, $5                       \r\n"
+        "dmtc1 $0, $f8                      \r\n"
+        "dmtc1 $2, $f0                      \r\n"
+        "pshufh $f0, $f0, $f8               \r\n"
+        "dmtc1 $3, $f10                     \r\n"
+        "pshufh $f10, $f10, $f8             \r\n"
+        "dmtc1 $8, $f12                     \r\n"
+        "pshufh $f12, $f12, $f8             \r\n"
+        "dli $4, 5                          \r\n"
+        "dmtc1 $4, $f14                     \r\n"
+        "pmullh $f2, %[ff_pw_0to3], $f0     \r\n"
+        "pmullh $f4, %[ff_pw_4to7], $f0     \r\n"
+        "pmullh $f6, %[ff_pw_8tob], $f0     \r\n"
+        "pmullh $f8, %[ff_pw_ctof], $f0     \r\n"
+        "daddu $3, %[src], $0               \r\n"
+        "dli $2, 16                         \r\n"
+        "1:                                 \r\n"
+        "paddsh $f16, $f2, $f12             \r\n"
+        "psrah $f16, $f16, $f14             \r\n"
+        "paddsh $f18, $f4, $f12             \r\n"
+        "psrah $f18, $f18, $f14             \r\n"
+        "packushb $f20, $f16, $f18          \r\n"
+        "gssdlc1 $f20, 7($3)                \r\n"
+        "gssdrc1 $f20, 0($3)                \r\n"
+        "paddsh $f16, $f6, $f12             \r\n"
+        "psrah $f16, $f16, $f14             \r\n"
+        "paddsh $f18, $f8, $f12             \r\n"
+        "psrah $f18, $f18, $f14             \r\n"
+        "packushb $f20, $f16, $f18          \r\n"
+        "gssdlc1 $f20, 15($3)               \r\n"
+        "gssdrc1 $f20, 8($3)                \r\n"
+        "paddsh $f12, $f12, $f10            \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "daddiu $2, -1                      \r\n"
+        "bnez $2, 1b                        \r\n"
+        ::[src]"r"(src),[stride]"r"(stride),[svq3]"r"(svq3),[rv40]"r"(rv40),
+          [ff_pw_m8tom5]"r"(ff_pw_m8tom5),[ff_pw_m4tom1]"r"(ff_pw_m4tom1),
+          [ff_pw_1to4]"r"(ff_pw_1to4),[ff_pw_5to8]"r"(ff_pw_5to8),
+          [ff_pw_0to3]"f"(ff_pw_0to3),[ff_pw_4to7]"f"(ff_pw_4to7),
+          [ff_pw_8tob]"f"(ff_pw_8tob),[ff_pw_ctof]"f"(ff_pw_ctof)
+        : "$2","$3","$4","$5","$6","$7","$8","$f0","$f2","$f4","$f6","$f8",
+          "$f10","$f12","$f14","$f16","$f18","$f20","$f22","$f24","$f26",
+          "$f28","$f30"
+    );
+}
+
+void ff_pred16x16_plane_svq3_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    ff_pred16x16_plane_compat_8_mmi(src, stride, 1, 0);
+}
+
+void ff_pred16x16_plane_rv40_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    ff_pred16x16_plane_compat_8_mmi(src, stride, 0, 1);
+}
+
+void ff_pred16x16_plane_h264_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    ff_pred16x16_plane_compat_8_mmi(src, stride, 0, 0);
+}
+
+void ff_pred8x8_top_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    __asm__ volatile (
+        "dli $2, 2                          \r\n"
+        "xor $f0, $f0, $f0                  \r\n"
+        "xor $f2, $f2, $f2                  \r\n"
+        "xor $f30, $f30, $f30               \r\n"
+        "negu $3, %[stride]                 \r\n"
+        "daddu $3, $3, %[src]               \r\n"
+        "gsldlc1 $f4, 7($3)                 \r\n"
+        "gsldrc1 $f4, 0($3)                 \r\n"
+        "punpcklbh $f0, $f4, $f30           \r\n"
+        "punpckhbh $f2, $f4, $f30           \r\n"
+        "biadd $f0, $f0                     \r\n"
+        "biadd $f2, $f2                     \r\n"
+        "pshufh $f0, $f0, $f30              \r\n"
+        "pshufh $f2, $f2, $f30              \r\n"
+        "dmtc1 $2, $f4                      \r\n"
+        "pshufh $f4, $f4, $f30              \r\n"
+        "paddush $f0, $f0, $f4              \r\n"
+        "paddush $f2, $f2, $f4              \r\n"
+        "dmtc1 $2, $f4                      \r\n"
+        "psrlh $f0, $f0, $f4                \r\n"
+        "psrlh $f2, $f2, $f4                \r\n"
+        "packushb $f4, $f0, $f2             \r\n"
+        "dli $2, 8                          \r\n"
+        "1:                                 \r\n"
+        "gssdlc1 $f4, 7(%[src])             \r\n"
+        "gssdrc1 $f4, 0(%[src])             \r\n"
+        "daddu %[src], %0, %[stride]        \r\n"
+        "daddiu $2, $2, -1                  \r\n"
+        "bnez $2, 1b                        \r\n"
+        ::[src]"r"(src),[stride]"r"(stride)
+        : "$2","$3","$f0","$f2","$f4","$f30"
+    );
+}
+
+void ff_pred8x8_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    __asm__ volatile (
+        "negu $2, %[stride]                 \r\n"
+        "daddu $2, $2, %[src]               \r\n"
+        "daddiu $5, $2, 4                   \r\n"
+        "lbu $6, 0($2)                      \r\n"
+        "daddu $3, $0, $6                   \r\n"
+        "daddiu $2, 1                       \r\n"
+        "lbu $6, 0($5)                      \r\n"
+        "daddu $4, $0, $6                   \r\n"
+        "daddiu $5, 1                       \r\n"
+        "lbu $6, 0($2)                      \r\n"
+        "daddu $3, $3, $6                   \r\n"
+        "daddiu $2, 1                       \r\n"
+        "lbu $6, 0($5)                      \r\n"
+        "daddu $4, $4, $6                   \r\n"
+        "daddiu $5, 1                       \r\n"
+        "lbu $6, 0($2)                      \r\n"
+        "daddu $3, $3, $6                   \r\n"
+        "daddiu $2, 1                       \r\n"
+        "lbu $6, 0($5)                      \r\n"
+        "daddu $4, $4, $6                   \r\n"
+        "daddiu $5, 1                       \r\n"
+        "lbu $6, 0($2)                      \r\n"
+        "daddu $3, $3, $6                   \r\n"
+        "daddiu $2, 1                       \r\n"
+        "lbu $6, 0($5)                      \r\n"
+        "daddu $4, $4, $6                   \r\n"
+        "daddiu $5, 1                       \r\n"
+        "dli $6, -1                         \r\n"
+        "daddu $6, $6, %[src]               \r\n"
+        "lbu $5, 0($6)                      \r\n"
+        "daddu $7, $0, $5                   \r\n"
+        "daddu $6, $6, %[stride]            \r\n"
+        "lbu $5, 0($6)                      \r\n"
+        "daddu $7, $7, $5                   \r\n"
+        "daddu $6, $6, %[stride]            \r\n"
+        "lbu $5, 0($6)                      \r\n"
+        "daddu $7, $7, $5                   \r\n"
+        "daddu $6, $6, %[stride]            \r\n"
+        "lbu $5, 0($6)                      \r\n"
+        "daddu $7, $7, $5                   \r\n"
+        "daddu $6, $6, %[stride]            \r\n"
+        "lbu $5, 0($6)                      \r\n"
+        "daddu $8, $0, $5                   \r\n"
+        "daddu $6, $6, %[stride]            \r\n"
+        "lbu $5, 0($6)                      \r\n"
+        "daddu $8, $8, $5                   \r\n"
+        "daddu $6, $6, %[stride]            \r\n"
+        "lbu $5, 0($6)                      \r\n"
+        "daddu $8, $8, $5                   \r\n"
+        "daddu $6, $6, %[stride]            \r\n"
+        "lbu $5, 0($6)                      \r\n"
+        "daddu $8, $8, $5                   \r\n"
+        "daddu $3, $3, $7                   \r\n"
+        "daddiu $3, $3, 4                   \r\n"
+        "daddiu $4, $4, 2                   \r\n"
+        "daddiu $5, $8, 2                   \r\n"
+        "daddu $6, $4, $5                   \r\n"
+        "dsrl $3, 3                         \r\n"
+        "dsrl $4, 2                         \r\n"
+        "dsrl $5, 2                         \r\n"
+        "dsrl $6, 3                         \r\n"
+        "xor $f30, $f30, $f30               \r\n"
+        "dmtc1 $3, $f0                      \r\n"
+        "pshufh $f0, $f0, $f30              \r\n"
+        "dmtc1 $4, $f2                      \r\n"
+        "pshufh $f2, $f2, $f30              \r\n"
+        "dmtc1 $5, $f4                      \r\n"
+        "pshufh $f4, $f4, $f30              \r\n"
+        "dmtc1 $6, $f6                      \r\n"
+        "pshufh $f6, $f6, $f30              \r\n"
+        "packushb $f0, $f0, $f2             \r\n"
+        "packushb $f2, $f4, $f6             \r\n"
+        "daddu $2, $0, %[src]               \r\n"
+        "sdc1 $f0, 0($2)                    \r\n"
+        "daddu $2, $2, %[stride]            \r\n"
+        "sdc1 $f0, 0($2)                    \r\n"
+        "daddu $2, $2, %[stride]            \r\n"
+        "sdc1 $f0, 0($2)                    \r\n"
+        "daddu $2, $2, %[stride]            \r\n"
+        "sdc1 $f0, 0($2)                    \r\n"
+        "daddu $2, $2, %[stride]            \r\n"
+        "sdc1 $f2, 0($2)                    \r\n"
+        "daddu $2, $2, %[stride]            \r\n"
+        "sdc1 $f2, 0($2)                    \r\n"
+        "daddu $2, $2, %[stride]            \r\n"
+        "sdc1 $f2, 0($2)                    \r\n"
+        "daddu $2, $2, %[stride]            \r\n"
+        "sdc1 $f2, 0($2)                    \r\n"
+        ::[src]"r"(src),[stride]"r"(stride)
+        : "$2","$3","$4","$5","$6","$7","$8","$f0","$f2","$f4","$f6","$f30"
+    );
+}
+
+void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    __asm__ volatile (
+        "gsldlc1 $f2, 7(%[srcA])            \r\n"
+        "gsldrc1 $f2, 0(%[srcA])            \r\n"
+        "dli $8, 16                         \r\n"
+        "1:                                 \r\n"
+        "gssdlc1 $f2, 7(%[src])             \r\n"
+        "gssdrc1 $f2, 0(%[src])             \r\n"
+        "daddu %[src], %[src], %[stride]    \r\n"
+        "daddi $8, $8, -1                   \r\n"
+        "bnez $8, 1b                        \r\n"
+        : [src]"+&r"(src)
+        : [stride]"r"(stride),[srcA]"r"(src-stride)
+        : "$8","$f2"
+    );
+}
+
+void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
+{
+    __asm__ volatile (
+        "daddiu $2, %[src], -1              \r\n"
+        "daddu $3, %[src], $0               \r\n"
+        "dli $6, 0x10                       \r\n"
+        "1:                                 \r\n"
+        "lbu $4, 0($2)                      \r\n"
+        "dmul $5, $4, %[ff_pb_1]            \r\n"
+        "sdl $5, 7($3)                      \r\n"
+        "sdr $5, 0($3)                      \r\n"
+        "daddu $2, %[stride]                \r\n"
+        "daddu $3, %[stride]                \r\n"
+        "daddiu $6, -1                      \r\n"
+        "bnez $6, 1b                        \r\n"
+        ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
+        : "$2","$3","$4","$5","$6"
+    );
+}
diff --git a/libavcodec/mips/h264pred_msa.c b/libavcodec/mips/h264pred_msa.c
new file mode 100644
index 00000000..cddcd2e8
--- /dev/null
+++ b/libavcodec/mips/h264pred_msa.c
@@ -0,0 +1,723 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "h264dsp_mips.h"
+
+static void intra_predict_vert_8x8_msa(uint8_t *src, uint8_t *dst,
+                                       int32_t dst_stride)
+{
+    uint32_t row;
+    uint32_t src_data1, src_data2;
+
+    src_data1 = LW(src);
+    src_data2 = LW(src + 4);
+
+    for (row = 8; row--;) {
+        SW(src_data1, dst);
+        SW(src_data2, (dst + 4));
+        dst += dst_stride;
+    }
+}
+
+static void intra_predict_vert_16x16_msa(uint8_t *src, uint8_t *dst,
+                                         int32_t dst_stride)
+{
+    uint32_t row;
+    v16u8 src0;
+
+    src0 = LD_UB(src);
+
+    for (row = 16; row--;) {
+        ST_UB(src0, dst);
+        dst += dst_stride;
+    }
+}
+
+static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride,
+                                        uint8_t *dst, int32_t dst_stride)
+{
+    uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+    out0 = src[0 * src_stride] * 0x0101010101010101;
+    out1 = src[1 * src_stride] * 0x0101010101010101;
+    out2 = src[2 * src_stride] * 0x0101010101010101;
+    out3 = src[3 * src_stride] * 0x0101010101010101;
+    out4 = src[4 * src_stride] * 0x0101010101010101;
+    out5 = src[5 * src_stride] * 0x0101010101010101;
+    out6 = src[6 * src_stride] * 0x0101010101010101;
+    out7 = src[7 * src_stride] * 0x0101010101010101;
+
+    SD4(out0, out1, out2, out3, dst, dst_stride);
+    dst += (4 * dst_stride);
+    SD4(out4, out5, out6, out7, dst, dst_stride);
+}
+
+static void intra_predict_horiz_16x16_msa(uint8_t *src, int32_t src_stride,
+                                          uint8_t *dst, int32_t dst_stride)
+{
+    uint32_t row;
+    uint8_t inp0, inp1, inp2, inp3;
+    v16u8 src0, src1, src2, src3;
+
+    for (row = 4; row--;) {
+        inp0 = src[0];
+        src += src_stride;
+        inp1 = src[0];
+        src += src_stride;
+        inp2 = src[0];
+        src += src_stride;
+        inp3 = src[0];
+        src += src_stride;
+
+        src0 = (v16u8) __msa_fill_b(inp0);
+        src1 = (v16u8) __msa_fill_b(inp1);
+        src2 = (v16u8) __msa_fill_b(inp2);
+        src3 = (v16u8) __msa_fill_b(inp3);
+
+        ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left,
+                                     int32_t src_stride_left,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     uint8_t is_above, uint8_t is_left)
+{
+    uint32_t row;
+    uint32_t out, addition = 0;
+    v16u8 src_above, store;
+    v8u16 sum_above;
+    v4u32 sum_top;
+    v2u64 sum;
+
+    if (is_left && is_above) {
+        src_above = LD_UB(src_top);
+
+        sum_above = __msa_hadd_u_h(src_above, src_above);
+        sum_top = __msa_hadd_u_w(sum_above, sum_above);
+        sum = __msa_hadd_u_d(sum_top, sum_top);
+        addition = __msa_copy_u_w((v4i32) sum, 0);
+
+        for (row = 0; row < 8; row++) {
+            addition += src_left[row * src_stride_left];
+        }
+
+        addition = (addition + 8) >> 4;
+        store = (v16u8) __msa_fill_b(addition);
+    } else if (is_left) {
+        for (row = 0; row < 8; row++) {
+            addition += src_left[row * src_stride_left];
+        }
+
+        addition = (addition + 4) >> 3;
+        store = (v16u8) __msa_fill_b(addition);
+    } else if (is_above) {
+        src_above = LD_UB(src_top);
+
+        sum_above = __msa_hadd_u_h(src_above, src_above);
+        sum_top = __msa_hadd_u_w(sum_above, sum_above);
+        sum = __msa_hadd_u_d(sum_top, sum_top);
+        sum = (v2u64) __msa_srari_d((v2i64) sum, 3);
+        store = (v16u8) __msa_splati_b((v16i8) sum, 0);
+    } else {
+        store = (v16u8) __msa_ldi_b(128);
+    }
+
+    out = __msa_copy_u_w((v4i32) store, 0);
+
+    for (row = 8; row--;) {
+        SW(out, dst);
+        SW(out, (dst + 4));
+        dst += dst_stride;
+    }
+}
+
+static void intra_predict_dc_16x16_msa(uint8_t *src_top, uint8_t *src_left,
+                                       int32_t src_stride_left,
+                                       uint8_t *dst, int32_t dst_stride,
+                                       uint8_t is_above, uint8_t is_left)
+{
+    uint32_t row;
+    uint32_t addition = 0;
+    v16u8 src_above, store;
+    v8u16 sum_above;
+    v4u32 sum_top;
+    v2u64 sum;
+
+    if (is_left && is_above) {
+        src_above = LD_UB(src_top);
+
+        sum_above = __msa_hadd_u_h(src_above, src_above);
+        sum_top = __msa_hadd_u_w(sum_above, sum_above);
+        sum = __msa_hadd_u_d(sum_top, sum_top);
+        sum_top = (v4u32) __msa_pckev_w((v4i32) sum, (v4i32) sum);
+        sum = __msa_hadd_u_d(sum_top, sum_top);
+        addition = __msa_copy_u_w((v4i32) sum, 0);
+
+        for (row = 0; row < 16; row++) {
+            addition += src_left[row * src_stride_left];
+        }
+
+        addition = (addition + 16) >> 5;
+        store = (v16u8) __msa_fill_b(addition);
+    } else if (is_left) {
+        for (row = 0; row < 16; row++) {
+            addition += src_left[row * src_stride_left];
+        }
+
+        addition = (addition + 8) >> 4;
+        store = (v16u8) __msa_fill_b(addition);
+    } else if (is_above) {
+        src_above = LD_UB(src_top);
+
+        sum_above = __msa_hadd_u_h(src_above, src_above);
+        sum_top = __msa_hadd_u_w(sum_above, sum_above);
+        sum = __msa_hadd_u_d(sum_top, sum_top);
+        sum_top = (v4u32) __msa_pckev_w((v4i32) sum, (v4i32) sum);
+        sum = __msa_hadd_u_d(sum_top, sum_top);
+        sum = (v2u64) __msa_srari_d((v2i64) sum, 4);
+        store = (v16u8) __msa_splati_b((v16i8) sum, 0);
+    } else {
+        store = (v16u8) __msa_ldi_b(128);
+    }
+
+    for (row = 16; row--;) {
+        ST_UB(store, dst);
+        dst += dst_stride;
+    }
+}
+
+#define INTRA_PREDICT_VALDC_8X8_MSA(val)                         \
+static void intra_predict_##val##dc_8x8_msa(uint8_t *dst,        \
+                                            int32_t dst_stride)  \
+{                                                                \
+    uint32_t row, out;                                           \
+    v16i8 store;                                                 \
+                                                                 \
+    store = __msa_ldi_b(val);                                    \
+    out = __msa_copy_u_w((v4i32) store, 0);                      \
+                                                                 \
+    for (row = 8; row--;) {                                      \
+        SW(out, dst);                                            \
+        SW(out, (dst + 4));                                      \
+        dst += dst_stride;                                       \
+    }                                                            \
+}
+
+INTRA_PREDICT_VALDC_8X8_MSA(127);
+INTRA_PREDICT_VALDC_8X8_MSA(129);
+
+#define INTRA_PREDICT_VALDC_16X16_MSA(val)                         \
+static void intra_predict_##val##dc_16x16_msa(uint8_t *dst,        \
+                                              int32_t dst_stride)  \
+{                                                                  \
+    uint32_t row;                                                  \
+    v16u8 store;                                                   \
+                                                                   \
+    store = (v16u8) __msa_ldi_b(val);                              \
+                                                                   \
+    for (row = 16; row--;) {                                       \
+        ST_UB(store, dst);                                         \
+        dst += dst_stride;                                         \
+    }                                                              \
+}
+
+INTRA_PREDICT_VALDC_16X16_MSA(127);
+INTRA_PREDICT_VALDC_16X16_MSA(129);
+
+static void intra_predict_plane_8x8_msa(uint8_t *src, int32_t stride)
+{
+    uint8_t lpcnt;
+    int32_t res, res0, res1, res2, res3;
+    uint64_t out0, out1;
+    v16i8 shf_mask = { 3, 5, 2, 6, 1, 7, 0, 8, 3, 5, 2, 6, 1, 7, 0, 8 };
+    v8i16 short_multiplier = { 1, 2, 3, 4, 1, 2, 3, 4 };
+    v4i32 int_multiplier = { 0, 1, 2, 3 };
+    v16u8 src_top;
+    v8i16 vec9, vec10, vec11;
+    v4i32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8;
+    v2i64 sum;
+
+    src_top = LD_UB(src - (stride + 1));
+    src_top = (v16u8) __msa_vshf_b(shf_mask, (v16i8) src_top, (v16i8) src_top);
+
+    vec9 = __msa_hsub_u_h(src_top, src_top);
+    vec9 *= short_multiplier;
+    vec8 = __msa_hadd_s_w(vec9, vec9);
+    sum = __msa_hadd_s_d(vec8, vec8);
+
+    res0 = __msa_copy_s_w((v4i32) sum, 0);
+
+    res1 = (src[4 * stride - 1] - src[2 * stride - 1]) +
+        2 * (src[5 * stride - 1] - src[stride - 1]) +
+        3 * (src[6 * stride - 1] - src[-1]) +
+        4 * (src[7 * stride - 1] - src[-stride - 1]);
+
+    res0 *= 17;
+    res1 *= 17;
+    res0 = (res0 + 16) >> 5;
+    res1 = (res1 + 16) >> 5;
+
+    res3 = 3 * (res0 + res1);
+    res2 = 16 * (src[7 * stride - 1] + src[-stride + 7] + 1);
+    res = res2 - res3;
+
+    vec8 = __msa_fill_w(res0);
+    vec4 = __msa_fill_w(res);
+    vec2 = __msa_fill_w(res1);
+    vec5 = vec8 * int_multiplier;
+    vec3 = vec8 * 4;
+
+    for (lpcnt = 4; lpcnt--;) {
+        vec0 = vec5;
+        vec0 += vec4;
+        vec1 = vec0 + vec3;
+        vec6 = vec5;
+        vec4 += vec2;
+        vec6 += vec4;
+        vec7 = vec6 + vec3;
+
+        SRA_4V(vec0, vec1, vec6, vec7, 5);
+        PCKEV_H2_SH(vec1, vec0, vec7, vec6, vec10, vec11);
+        CLIP_SH2_0_255(vec10, vec11);
+        PCKEV_B2_SH(vec10, vec10, vec11, vec11, vec10, vec11);
+
+        out0 = __msa_copy_s_d((v2i64) vec10, 0);
+        out1 = __msa_copy_s_d((v2i64) vec11, 0);
+        SD(out0, src);
+        src += stride;
+        SD(out1, src);
+        src += stride;
+
+        vec4 += vec2;
+    }
+}
+
+static void intra_predict_plane_16x16_msa(uint8_t *src, int32_t stride)
+{
+    uint8_t lpcnt;
+    int32_t res0, res1, res2, res3;
+    uint64_t load0, load1;
+    v16i8 shf_mask = { 7, 8, 6, 9, 5, 10, 4, 11, 3, 12, 2, 13, 1, 14, 0, 15 };
+    v8i16 short_multiplier = { 1, 2, 3, 4, 5, 6, 7, 8 };
+    v4i32 int_multiplier = { 0, 1, 2, 3 };
+    v16u8 src_top = { 0 };
+    v8i16 vec9, vec10;
+    v4i32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, res_add;
+
+    load0 = LD(src - (stride + 1));
+    load1 = LD(src - (stride + 1) + 9);
+
+    INSERT_D2_UB(load0, load1, src_top);
+
+    src_top = (v16u8) __msa_vshf_b(shf_mask, (v16i8) src_top, (v16i8) src_top);
+
+    vec9 = __msa_hsub_u_h(src_top, src_top);
+    vec9 *= short_multiplier;
+    vec8 = __msa_hadd_s_w(vec9, vec9);
+    res_add = (v4i32) __msa_hadd_s_d(vec8, vec8);
+
+    res0 = __msa_copy_s_w(res_add, 0) + __msa_copy_s_w(res_add, 2);
+
+    res1 = (src[8 * stride - 1] - src[6 * stride - 1]) +
+        2 * (src[9 * stride - 1] - src[5 * stride - 1]) +
+        3 * (src[10 * stride - 1] - src[4 * stride - 1]) +
+        4 * (src[11 * stride - 1] - src[3 * stride - 1]) +
+        5 * (src[12 * stride - 1] - src[2 * stride - 1]) +
+        6 * (src[13 * stride - 1] - src[stride - 1]) +
+        7 * (src[14 * stride - 1] - src[-1]) +
+        8 * (src[15 * stride - 1] - src[-1 * stride - 1]);
+
+    res0 *= 5;
+    res1 *= 5;
+    res0 = (res0 + 32) >> 6;
+    res1 = (res1 + 32) >> 6;
+
+    res3 = 7 * (res0 + res1);
+    res2 = 16 * (src[15 * stride - 1] + src[-stride + 15] + 1);
+    res2 -= res3;
+
+    vec8 = __msa_fill_w(res0);
+    vec4 = __msa_fill_w(res2);
+    vec5 = __msa_fill_w(res1);
+    vec6 = vec8 * 4;
+    vec7 = vec8 * int_multiplier;
+
+    for (lpcnt = 16; lpcnt--;) {
+        vec0 = vec7;
+        vec0 += vec4;
+        vec1 = vec0 + vec6;
+        vec2 = vec1 + vec6;
+        vec3 = vec2 + vec6;
+
+        SRA_4V(vec0, vec1, vec2, vec3, 5);
+        PCKEV_H2_SH(vec1, vec0, vec3, vec2, vec9, vec10);
+        CLIP_SH2_0_255(vec9, vec10);
+        PCKEV_ST_SB(vec9, vec10, src);
+        src += stride;
+
+        vec4 += vec5;
+    }
+}
+
+static void intra_predict_dc_4blk_8x8_msa(uint8_t *src, int32_t stride)
+{
+    uint8_t lp_cnt;
+    uint32_t src0, src1, src3, src2 = 0;
+    uint32_t out0, out1, out2, out3;
+    v16u8 src_top;
+    v8u16 add;
+    v4u32 sum;
+
+    src_top = LD_UB(src - stride);
+    add = __msa_hadd_u_h((v16u8) src_top, (v16u8) src_top);
+    sum = __msa_hadd_u_w(add, add);
+    src0 = __msa_copy_u_w((v4i32) sum, 0);
+    src1 = __msa_copy_u_w((v4i32) sum, 1);
+
+    for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
+        src0 += src[lp_cnt * stride - 1];
+        src2 += src[(4 + lp_cnt) * stride - 1];
+    }
+
+    src0 = (src0 + 4) >> 3;
+    src3 = (src1 + src2 + 4) >> 3;
+    src1 = (src1 + 2) >> 2;
+    src2 = (src2 + 2) >> 2;
+    out0 = src0 * 0x01010101;
+    out1 = src1 * 0x01010101;
+    out2 = src2 * 0x01010101;
+    out3 = src3 * 0x01010101;
+
+    for (lp_cnt = 4; lp_cnt--;) {
+        SW(out0, src);
+        SW(out1, (src + 4));
+        SW(out2, (src + 4 * stride));
+        SW(out3, (src + 4 * stride + 4));
+        src += stride;
+    }
+}
+
+static void intra_predict_hor_dc_8x8_msa(uint8_t *src, int32_t stride)
+{
+    uint8_t lp_cnt;
+    uint32_t src0 = 0, src1 = 0;
+    uint64_t out0, out1;
+
+    for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
+        src0 += src[lp_cnt * stride - 1];
+        src1 += src[(4 + lp_cnt) * stride - 1];
+    }
+
+    src0 = (src0 + 2) >> 2;
+    src1 = (src1 + 2) >> 2;
+    out0 = src0 * 0x0101010101010101;
+    out1 = src1 * 0x0101010101010101;
+
+    for (lp_cnt = 4; lp_cnt--;) {
+        SD(out0, src);
+        SD(out1, (src + 4 * stride));
+        src += stride;
+    }
+}
+
+static void intra_predict_vert_dc_8x8_msa(uint8_t *src, int32_t stride)
+{
+    uint8_t lp_cnt;
+    uint32_t out0 = 0, out1 = 0;
+    v16u8 src_top;
+    v8u16 add;
+    v4u32 sum;
+    v4i32 res0, res1;
+
+    src_top = LD_UB(src - stride);
+    add = __msa_hadd_u_h(src_top, src_top);
+    sum = __msa_hadd_u_w(add, add);
+    sum = (v4u32) __msa_srari_w((v4i32) sum, 2);
+    res0 = (v4i32) __msa_splati_b((v16i8) sum, 0);
+    res1 = (v4i32) __msa_splati_b((v16i8) sum, 4);
+    out0 = __msa_copy_u_w(res0, 0);
+    out1 = __msa_copy_u_w(res1, 0);
+
+    for (lp_cnt = 8; lp_cnt--;) {
+        SW(out0, src);
+        SW(out1, src + 4);
+        src += stride;
+    }
+}
+
+static void intra_predict_mad_cow_dc_l0t_8x8_msa(uint8_t *src, int32_t stride)
+{
+    uint8_t lp_cnt;
+    uint32_t src0, src1, src2 = 0;
+    uint32_t out0, out1, out2;
+    v16u8 src_top;
+    v8u16 add;
+    v4u32 sum;
+
+    src_top = LD_UB(src - stride);
+    add = __msa_hadd_u_h(src_top, src_top);
+    sum = __msa_hadd_u_w(add, add);
+    src0 = __msa_copy_u_w((v4i32) sum, 0);
+    src1 = __msa_copy_u_w((v4i32) sum, 1);
+
+    for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
+        src2 += src[lp_cnt * stride - 1];
+    }
+    src2 = (src0 + src2 + 4) >> 3;
+    src0 = (src0 + 2) >> 2;
+    src1 = (src1 + 2) >> 2;
+    out0 = src0 * 0x01010101;
+    out1 = src1 * 0x01010101;
+    out2 = src2 * 0x01010101;
+
+    for (lp_cnt = 4; lp_cnt--;) {
+        SW(out2, src);
+        SW(out1, src + 4);
+        SW(out0, src + stride * 4);
+        SW(out1, src + stride * 4 + 4);
+        src += stride;
+    }
+}
+
+static void intra_predict_mad_cow_dc_0lt_8x8_msa(uint8_t *src, int32_t stride)
+{
+    uint8_t lp_cnt;
+    uint32_t src0, src1, src2 = 0, src3;
+    uint32_t out0, out1, out2, out3;
+    v16u8 src_top;
+    v8u16 add;
+    v4u32 sum;
+
+    src_top = LD_UB(src - stride);
+    add = __msa_hadd_u_h(src_top, src_top);
+    sum = __msa_hadd_u_w(add, add);
+    src0 = __msa_copy_u_w((v4i32) sum, 0);
+    src1 = __msa_copy_u_w((v4i32) sum, 1);
+
+    for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
+        src2 += src[(4 + lp_cnt) * stride - 1];
+    }
+
+    src0 = (src0 + 2) >> 2;
+    src3 = (src1 + src2 + 4) >> 3;
+    src1 = (src1 + 2) >> 2;
+    src2 = (src2 + 2) >> 2;
+
+    out0 = src0 * 0x01010101;
+    out1 = src1 * 0x01010101;
+    out2 = src2 * 0x01010101;
+    out3 = src3 * 0x01010101;
+
+    for (lp_cnt = 4; lp_cnt--;) {
+        SW(out0, src);
+        SW(out1, src + 4);
+        SW(out2, src + stride * 4);
+        SW(out3, src + stride * 4 + 4);
+        src += stride;
+    }
+}
+
+static void intra_predict_mad_cow_dc_l00_8x8_msa(uint8_t *src, int32_t stride)
+{
+    uint8_t lp_cnt;
+    uint32_t src0 = 0;
+    uint64_t out0, out1;
+
+    for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
+        src0 += src[lp_cnt * stride - 1];
+    }
+
+    src0 = (src0 + 2) >> 2;
+    out0 = src0 * 0x0101010101010101;
+    out1 = 0x8080808080808080;
+
+    for (lp_cnt = 4; lp_cnt--;) {
+        SD(out0, src);
+        SD(out1, src + stride * 4);
+        src += stride;
+    }
+}
+
+static void intra_predict_mad_cow_dc_0l0_8x8_msa(uint8_t *src, int32_t stride)
+{
+    uint8_t lp_cnt;
+    uint32_t src0 = 0;
+    uint64_t out0, out1;
+
+    for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
+        src0 += src[(4 + lp_cnt) * stride - 1];
+    }
+
+    src0 = (src0 + 2) >> 2;
+
+    out0 = 0x8080808080808080;
+    out1 = src0 * 0x0101010101010101;
+
+    for (lp_cnt = 4; lp_cnt--;) {
+        SD(out0, src);
+        SD(out1, src + stride * 4);
+        src += stride;
+    }
+}
+
+void ff_h264_intra_predict_plane_8x8_msa(uint8_t *src, ptrdiff_t stride)
+{
+    intra_predict_plane_8x8_msa(src, stride);
+}
+
+void ff_h264_intra_predict_dc_4blk_8x8_msa(uint8_t *src, ptrdiff_t stride)
+{
+    intra_predict_dc_4blk_8x8_msa(src, stride);
+}
+
+void ff_h264_intra_predict_hor_dc_8x8_msa(uint8_t *src, ptrdiff_t stride)
+{
+    intra_predict_hor_dc_8x8_msa(src, stride);
+}
+
+void ff_h264_intra_predict_vert_dc_8x8_msa(uint8_t *src, ptrdiff_t stride)
+{
+    intra_predict_vert_dc_8x8_msa(src, stride);
+}
+
+void ff_h264_intra_predict_mad_cow_dc_l0t_8x8_msa(uint8_t *src,
+                                                  ptrdiff_t stride)
+{
+    intra_predict_mad_cow_dc_l0t_8x8_msa(src, stride);
+}
+
+void ff_h264_intra_predict_mad_cow_dc_0lt_8x8_msa(uint8_t *src,
+                                                  ptrdiff_t stride)
+{
+    intra_predict_mad_cow_dc_0lt_8x8_msa(src, stride);
+}
+
+void ff_h264_intra_predict_mad_cow_dc_l00_8x8_msa(uint8_t *src,
+                                                  ptrdiff_t stride)
+{
+    intra_predict_mad_cow_dc_l00_8x8_msa(src, stride);
+}
+
+void ff_h264_intra_predict_mad_cow_dc_0l0_8x8_msa(uint8_t *src,
+                                                  ptrdiff_t stride)
+{
+    intra_predict_mad_cow_dc_0l0_8x8_msa(src, stride);
+}
+
+void ff_h264_intra_predict_plane_16x16_msa(uint8_t *src, ptrdiff_t stride)
+{
+    intra_predict_plane_16x16_msa(src, stride);
+}
+
+void ff_h264_intra_pred_vert_8x8_msa(uint8_t *src, ptrdiff_t stride)
+{
+    uint8_t *dst = src;
+
+    intra_predict_vert_8x8_msa(src - stride, dst, stride);
+}
+
+void ff_h264_intra_pred_horiz_8x8_msa(uint8_t *src, ptrdiff_t stride)
+{
+    uint8_t *dst = src;
+
+    intra_predict_horiz_8x8_msa(src - 1, stride, dst, stride);
+}
+
+void ff_h264_intra_pred_dc_16x16_msa(uint8_t *src, ptrdiff_t stride)
+{
+    uint8_t *src_top = src - stride;
+    uint8_t *src_left = src - 1;
+    uint8_t *dst = src;
+
+    intra_predict_dc_16x16_msa(src_top, src_left, stride, dst, stride, 1, 1);
+}
+
+void ff_h264_intra_pred_vert_16x16_msa(uint8_t *src, ptrdiff_t stride)
+{
+    uint8_t *dst = src;
+
+    intra_predict_vert_16x16_msa(src - stride, dst, stride);
+}
+
+void ff_h264_intra_pred_horiz_16x16_msa(uint8_t *src, ptrdiff_t stride)
+{
+    uint8_t *dst = src;
+
+    intra_predict_horiz_16x16_msa(src - 1, stride, dst, stride);
+}
+
+void ff_h264_intra_pred_dc_left_16x16_msa(uint8_t *src, ptrdiff_t stride)
+{
+    uint8_t *src_top = src - stride;
+    uint8_t *src_left = src - 1;
+    uint8_t *dst = src;
+
+    intra_predict_dc_16x16_msa(src_top, src_left, stride, dst, stride, 0, 1);
+}
+
+void ff_h264_intra_pred_dc_top_16x16_msa(uint8_t *src, ptrdiff_t stride)
+{
+    uint8_t *src_top = src - stride;
+    uint8_t *src_left = src - 1;
+    uint8_t *dst = src;
+
+    intra_predict_dc_16x16_msa(src_top, src_left, stride, dst, stride, 1, 0);
+}
+
+void ff_h264_intra_pred_dc_128_8x8_msa(uint8_t *src, ptrdiff_t stride)
+{
+    uint8_t *src_top = src - stride;
+    uint8_t *src_left = src - 1;
+    uint8_t *dst = src;
+
+    intra_predict_dc_8x8_msa(src_top, src_left, stride, dst, stride, 0, 0);
+}
+
+void ff_h264_intra_pred_dc_128_16x16_msa(uint8_t *src, ptrdiff_t stride)
+{
+    uint8_t *src_top = src - stride;
+    uint8_t *src_left = src - 1;
+    uint8_t *dst = src;
+
+    intra_predict_dc_16x16_msa(src_top, src_left, stride, dst, stride, 0, 0);
+}
+
+void ff_vp8_pred8x8_127_dc_8_msa(uint8_t *src, ptrdiff_t stride)
+{
+    intra_predict_127dc_8x8_msa(src, stride);
+}
+
+void ff_vp8_pred8x8_129_dc_8_msa(uint8_t *src, ptrdiff_t stride)
+{
+    intra_predict_129dc_8x8_msa(src, stride);
+}
+
+void ff_vp8_pred16x16_127_dc_8_msa(uint8_t *src, ptrdiff_t stride)
+{
+    intra_predict_127dc_16x16_msa(src, stride);
+}
+
+void ff_vp8_pred16x16_129_dc_8_msa(uint8_t *src, ptrdiff_t stride)
+{
+    intra_predict_129dc_16x16_msa(src, stride);
+}
diff --git a/libavcodec/mips/h264qpel_init_mips.c b/libavcodec/mips/h264qpel_init_mips.c
new file mode 100644
index 00000000..92219f88
--- /dev/null
+++ b/libavcodec/mips/h264qpel_init_mips.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *                    Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h264dsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void h264qpel_init_msa(H264QpelContext *c, int bit_depth)
+{
+    if (8 == bit_depth) {
+        c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_msa;
+        c->put_h264_qpel_pixels_tab[0][1] = ff_put_h264_qpel16_mc10_msa;
+        c->put_h264_qpel_pixels_tab[0][2] = ff_put_h264_qpel16_mc20_msa;
+        c->put_h264_qpel_pixels_tab[0][3] = ff_put_h264_qpel16_mc30_msa;
+        c->put_h264_qpel_pixels_tab[0][4] = ff_put_h264_qpel16_mc01_msa;
+        c->put_h264_qpel_pixels_tab[0][5] = ff_put_h264_qpel16_mc11_msa;
+        c->put_h264_qpel_pixels_tab[0][6] = ff_put_h264_qpel16_mc21_msa;
+        c->put_h264_qpel_pixels_tab[0][7] = ff_put_h264_qpel16_mc31_msa;
+        c->put_h264_qpel_pixels_tab[0][8] = ff_put_h264_qpel16_mc02_msa;
+        c->put_h264_qpel_pixels_tab[0][9] = ff_put_h264_qpel16_mc12_msa;
+        c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_msa;
+        c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_msa;
+        c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_msa;
+        c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_msa;
+        c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_msa;
+        c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_msa;
+
+        c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_msa;
+        c->put_h264_qpel_pixels_tab[1][1] = ff_put_h264_qpel8_mc10_msa;
+        c->put_h264_qpel_pixels_tab[1][2] = ff_put_h264_qpel8_mc20_msa;
+        c->put_h264_qpel_pixels_tab[1][3] = ff_put_h264_qpel8_mc30_msa;
+        c->put_h264_qpel_pixels_tab[1][4] = ff_put_h264_qpel8_mc01_msa;
+        c->put_h264_qpel_pixels_tab[1][5] = ff_put_h264_qpel8_mc11_msa;
+        c->put_h264_qpel_pixels_tab[1][6] = ff_put_h264_qpel8_mc21_msa;
+        c->put_h264_qpel_pixels_tab[1][7] = ff_put_h264_qpel8_mc31_msa;
+        c->put_h264_qpel_pixels_tab[1][8] = ff_put_h264_qpel8_mc02_msa;
+        c->put_h264_qpel_pixels_tab[1][9] = ff_put_h264_qpel8_mc12_msa;
+        c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_msa;
+        c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_msa;
+        c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_msa;
+        c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_msa;
+        c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_msa;
+        c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_msa;
+
+        c->put_h264_qpel_pixels_tab[2][1] = ff_put_h264_qpel4_mc10_msa;
+        c->put_h264_qpel_pixels_tab[2][2] = ff_put_h264_qpel4_mc20_msa;
+        c->put_h264_qpel_pixels_tab[2][3] = ff_put_h264_qpel4_mc30_msa;
+        c->put_h264_qpel_pixels_tab[2][4] = ff_put_h264_qpel4_mc01_msa;
+        c->put_h264_qpel_pixels_tab[2][5] = ff_put_h264_qpel4_mc11_msa;
+        c->put_h264_qpel_pixels_tab[2][6] = ff_put_h264_qpel4_mc21_msa;
+        c->put_h264_qpel_pixels_tab[2][7] = ff_put_h264_qpel4_mc31_msa;
+        c->put_h264_qpel_pixels_tab[2][8] = ff_put_h264_qpel4_mc02_msa;
+        c->put_h264_qpel_pixels_tab[2][9] = ff_put_h264_qpel4_mc12_msa;
+        c->put_h264_qpel_pixels_tab[2][10] = ff_put_h264_qpel4_mc22_msa;
+        c->put_h264_qpel_pixels_tab[2][11] = ff_put_h264_qpel4_mc32_msa;
+        c->put_h264_qpel_pixels_tab[2][12] = ff_put_h264_qpel4_mc03_msa;
+        c->put_h264_qpel_pixels_tab[2][13] = ff_put_h264_qpel4_mc13_msa;
+        c->put_h264_qpel_pixels_tab[2][14] = ff_put_h264_qpel4_mc23_msa;
+        c->put_h264_qpel_pixels_tab[2][15] = ff_put_h264_qpel4_mc33_msa;
+
+        c->avg_h264_qpel_pixels_tab[0][0] = ff_avg_h264_qpel16_mc00_msa;
+        c->avg_h264_qpel_pixels_tab[0][1] = ff_avg_h264_qpel16_mc10_msa;
+        c->avg_h264_qpel_pixels_tab[0][2] = ff_avg_h264_qpel16_mc20_msa;
+        c->avg_h264_qpel_pixels_tab[0][3] = ff_avg_h264_qpel16_mc30_msa;
+        c->avg_h264_qpel_pixels_tab[0][4] = ff_avg_h264_qpel16_mc01_msa;
+        c->avg_h264_qpel_pixels_tab[0][5] = ff_avg_h264_qpel16_mc11_msa;
+        c->avg_h264_qpel_pixels_tab[0][6] = ff_avg_h264_qpel16_mc21_msa;
+        c->avg_h264_qpel_pixels_tab[0][7] = ff_avg_h264_qpel16_mc31_msa;
+        c->avg_h264_qpel_pixels_tab[0][8] = ff_avg_h264_qpel16_mc02_msa;
+        c->avg_h264_qpel_pixels_tab[0][9] = ff_avg_h264_qpel16_mc12_msa;
+        c->avg_h264_qpel_pixels_tab[0][10] = ff_avg_h264_qpel16_mc22_msa;
+        c->avg_h264_qpel_pixels_tab[0][11] = ff_avg_h264_qpel16_mc32_msa;
+        c->avg_h264_qpel_pixels_tab[0][12] = ff_avg_h264_qpel16_mc03_msa;
+        c->avg_h264_qpel_pixels_tab[0][13] = ff_avg_h264_qpel16_mc13_msa;
+        c->avg_h264_qpel_pixels_tab[0][14] = ff_avg_h264_qpel16_mc23_msa;
+        c->avg_h264_qpel_pixels_tab[0][15] = ff_avg_h264_qpel16_mc33_msa;
+
+        c->avg_h264_qpel_pixels_tab[1][0] = ff_avg_h264_qpel8_mc00_msa;
+        c->avg_h264_qpel_pixels_tab[1][1] = ff_avg_h264_qpel8_mc10_msa;
+        c->avg_h264_qpel_pixels_tab[1][2] = ff_avg_h264_qpel8_mc20_msa;
+        c->avg_h264_qpel_pixels_tab[1][3] = ff_avg_h264_qpel8_mc30_msa;
+        c->avg_h264_qpel_pixels_tab[1][4] = ff_avg_h264_qpel8_mc01_msa;
+        c->avg_h264_qpel_pixels_tab[1][5] = ff_avg_h264_qpel8_mc11_msa;
+        c->avg_h264_qpel_pixels_tab[1][6] = ff_avg_h264_qpel8_mc21_msa;
+        c->avg_h264_qpel_pixels_tab[1][7] = ff_avg_h264_qpel8_mc31_msa;
+        c->avg_h264_qpel_pixels_tab[1][8] = ff_avg_h264_qpel8_mc02_msa;
+        c->avg_h264_qpel_pixels_tab[1][9] = ff_avg_h264_qpel8_mc12_msa;
+        c->avg_h264_qpel_pixels_tab[1][10] = ff_avg_h264_qpel8_mc22_msa;
+        c->avg_h264_qpel_pixels_tab[1][11] = ff_avg_h264_qpel8_mc32_msa;
+        c->avg_h264_qpel_pixels_tab[1][12] = ff_avg_h264_qpel8_mc03_msa;
+        c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_msa;
+        c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_msa;
+        c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_msa;
+
+        c->avg_h264_qpel_pixels_tab[2][0] = ff_avg_h264_qpel4_mc00_msa;
+        c->avg_h264_qpel_pixels_tab[2][1] = ff_avg_h264_qpel4_mc10_msa;
+        c->avg_h264_qpel_pixels_tab[2][2] = ff_avg_h264_qpel4_mc20_msa;
+        c->avg_h264_qpel_pixels_tab[2][3] = ff_avg_h264_qpel4_mc30_msa;
+        c->avg_h264_qpel_pixels_tab[2][4] = ff_avg_h264_qpel4_mc01_msa;
+        c->avg_h264_qpel_pixels_tab[2][5] = ff_avg_h264_qpel4_mc11_msa;
+        c->avg_h264_qpel_pixels_tab[2][6] = ff_avg_h264_qpel4_mc21_msa;
+        c->avg_h264_qpel_pixels_tab[2][7] = ff_avg_h264_qpel4_mc31_msa;
+        c->avg_h264_qpel_pixels_tab[2][8] = ff_avg_h264_qpel4_mc02_msa;
+        c->avg_h264_qpel_pixels_tab[2][9] = ff_avg_h264_qpel4_mc12_msa;
+        c->avg_h264_qpel_pixels_tab[2][10] = ff_avg_h264_qpel4_mc22_msa;
+        c->avg_h264_qpel_pixels_tab[2][11] = ff_avg_h264_qpel4_mc32_msa;
+        c->avg_h264_qpel_pixels_tab[2][12] = ff_avg_h264_qpel4_mc03_msa;
+        c->avg_h264_qpel_pixels_tab[2][13] = ff_avg_h264_qpel4_mc13_msa;
+        c->avg_h264_qpel_pixels_tab[2][14] = ff_avg_h264_qpel4_mc23_msa;
+        c->avg_h264_qpel_pixels_tab[2][15] = ff_avg_h264_qpel4_mc33_msa;
+    }
+}
+#endif  // #if HAVE_MSA
+
+#if HAVE_MMI
+static av_cold void h264qpel_init_mmi(H264QpelContext *c, int bit_depth)
+{
+    if (8 == bit_depth) {
+        c->put_h264_qpel_pixels_tab[0][0] = ff_put_h264_qpel16_mc00_mmi;
+        c->put_h264_qpel_pixels_tab[0][1] = ff_put_h264_qpel16_mc10_mmi;
+        c->put_h264_qpel_pixels_tab[0][2] = ff_put_h264_qpel16_mc20_mmi;
+        c->put_h264_qpel_pixels_tab[0][3] = ff_put_h264_qpel16_mc30_mmi;
+        c->put_h264_qpel_pixels_tab[0][4] = ff_put_h264_qpel16_mc01_mmi;
+        c->put_h264_qpel_pixels_tab[0][5] = ff_put_h264_qpel16_mc11_mmi;
+        c->put_h264_qpel_pixels_tab[0][6] = ff_put_h264_qpel16_mc21_mmi;
+        c->put_h264_qpel_pixels_tab[0][7] = ff_put_h264_qpel16_mc31_mmi;
+        c->put_h264_qpel_pixels_tab[0][8] = ff_put_h264_qpel16_mc02_mmi;
+        c->put_h264_qpel_pixels_tab[0][9] = ff_put_h264_qpel16_mc12_mmi;
+        c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_mmi;
+        c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_mmi;
+        c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_mmi;
+        c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_mmi;
+        c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_mmi;
+        c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_mmi;
+
+        c->put_h264_qpel_pixels_tab[1][0] = ff_put_h264_qpel8_mc00_mmi;
+        c->put_h264_qpel_pixels_tab[1][1] = ff_put_h264_qpel8_mc10_mmi;
+        c->put_h264_qpel_pixels_tab[1][2] = ff_put_h264_qpel8_mc20_mmi;
+        c->put_h264_qpel_pixels_tab[1][3] = ff_put_h264_qpel8_mc30_mmi;
+        c->put_h264_qpel_pixels_tab[1][4] = ff_put_h264_qpel8_mc01_mmi;
+        c->put_h264_qpel_pixels_tab[1][5] = ff_put_h264_qpel8_mc11_mmi;
+        c->put_h264_qpel_pixels_tab[1][6] = ff_put_h264_qpel8_mc21_mmi;
+        c->put_h264_qpel_pixels_tab[1][7] = ff_put_h264_qpel8_mc31_mmi;
+        c->put_h264_qpel_pixels_tab[1][8] = ff_put_h264_qpel8_mc02_mmi;
+        c->put_h264_qpel_pixels_tab[1][9] = ff_put_h264_qpel8_mc12_mmi;
+        c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_mmi;
+        c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_mmi;
+        c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_mmi;
+        c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_mmi;
+        c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_mmi;
+        c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_mmi;
+
+        c->put_h264_qpel_pixels_tab[2][0] = ff_put_h264_qpel4_mc00_mmi;
+        c->put_h264_qpel_pixels_tab[2][1] = ff_put_h264_qpel4_mc10_mmi;
+        c->put_h264_qpel_pixels_tab[2][2] = ff_put_h264_qpel4_mc20_mmi;
+        c->put_h264_qpel_pixels_tab[2][3] = ff_put_h264_qpel4_mc30_mmi;
+        c->put_h264_qpel_pixels_tab[2][4] = ff_put_h264_qpel4_mc01_mmi;
+        c->put_h264_qpel_pixels_tab[2][5] = ff_put_h264_qpel4_mc11_mmi;
+        c->put_h264_qpel_pixels_tab[2][6] = ff_put_h264_qpel4_mc21_mmi;
+        c->put_h264_qpel_pixels_tab[2][7] = ff_put_h264_qpel4_mc31_mmi;
+        c->put_h264_qpel_pixels_tab[2][8] = ff_put_h264_qpel4_mc02_mmi;
+        c->put_h264_qpel_pixels_tab[2][9] = ff_put_h264_qpel4_mc12_mmi;
+        c->put_h264_qpel_pixels_tab[2][10] = ff_put_h264_qpel4_mc22_mmi;
+        c->put_h264_qpel_pixels_tab[2][11] = ff_put_h264_qpel4_mc32_mmi;
+        c->put_h264_qpel_pixels_tab[2][12] = ff_put_h264_qpel4_mc03_mmi;
+        c->put_h264_qpel_pixels_tab[2][13] = ff_put_h264_qpel4_mc13_mmi;
+        c->put_h264_qpel_pixels_tab[2][14] = ff_put_h264_qpel4_mc23_mmi;
+        c->put_h264_qpel_pixels_tab[2][15] = ff_put_h264_qpel4_mc33_mmi;
+
+        c->avg_h264_qpel_pixels_tab[0][0] = ff_avg_h264_qpel16_mc00_mmi;
+        c->avg_h264_qpel_pixels_tab[0][1] = ff_avg_h264_qpel16_mc10_mmi;
+        c->avg_h264_qpel_pixels_tab[0][2] = ff_avg_h264_qpel16_mc20_mmi;
+        c->avg_h264_qpel_pixels_tab[0][3] = ff_avg_h264_qpel16_mc30_mmi;
+        c->avg_h264_qpel_pixels_tab[0][4] = ff_avg_h264_qpel16_mc01_mmi;
+        c->avg_h264_qpel_pixels_tab[0][5] = ff_avg_h264_qpel16_mc11_mmi;
+        c->avg_h264_qpel_pixels_tab[0][6] = ff_avg_h264_qpel16_mc21_mmi;
+        c->avg_h264_qpel_pixels_tab[0][7] = ff_avg_h264_qpel16_mc31_mmi;
+        c->avg_h264_qpel_pixels_tab[0][8] = ff_avg_h264_qpel16_mc02_mmi;
+        c->avg_h264_qpel_pixels_tab[0][9] = ff_avg_h264_qpel16_mc12_mmi;
+        c->avg_h264_qpel_pixels_tab[0][10] = ff_avg_h264_qpel16_mc22_mmi;
+        c->avg_h264_qpel_pixels_tab[0][11] = ff_avg_h264_qpel16_mc32_mmi;
+        c->avg_h264_qpel_pixels_tab[0][12] = ff_avg_h264_qpel16_mc03_mmi;
+        c->avg_h264_qpel_pixels_tab[0][13] = ff_avg_h264_qpel16_mc13_mmi;
+        c->avg_h264_qpel_pixels_tab[0][14] = ff_avg_h264_qpel16_mc23_mmi;
+        c->avg_h264_qpel_pixels_tab[0][15] = ff_avg_h264_qpel16_mc33_mmi;
+
+        c->avg_h264_qpel_pixels_tab[1][0] = ff_avg_h264_qpel8_mc00_mmi;
+        c->avg_h264_qpel_pixels_tab[1][1] = ff_avg_h264_qpel8_mc10_mmi;
+        c->avg_h264_qpel_pixels_tab[1][2] = ff_avg_h264_qpel8_mc20_mmi;
+        c->avg_h264_qpel_pixels_tab[1][3] = ff_avg_h264_qpel8_mc30_mmi;
+        c->avg_h264_qpel_pixels_tab[1][4] = ff_avg_h264_qpel8_mc01_mmi;
+        c->avg_h264_qpel_pixels_tab[1][5] = ff_avg_h264_qpel8_mc11_mmi;
+        c->avg_h264_qpel_pixels_tab[1][6] = ff_avg_h264_qpel8_mc21_mmi;
+        c->avg_h264_qpel_pixels_tab[1][7] = ff_avg_h264_qpel8_mc31_mmi;
+        c->avg_h264_qpel_pixels_tab[1][8] = ff_avg_h264_qpel8_mc02_mmi;
+        c->avg_h264_qpel_pixels_tab[1][9] = ff_avg_h264_qpel8_mc12_mmi;
+        c->avg_h264_qpel_pixels_tab[1][10] = ff_avg_h264_qpel8_mc22_mmi;
+        c->avg_h264_qpel_pixels_tab[1][11] = ff_avg_h264_qpel8_mc32_mmi;
+        c->avg_h264_qpel_pixels_tab[1][12] = ff_avg_h264_qpel8_mc03_mmi;
+        c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_mmi;
+        c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_mmi;
+        c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_mmi;
+
+        c->avg_h264_qpel_pixels_tab[2][0] = ff_avg_h264_qpel4_mc00_mmi;
+        c->avg_h264_qpel_pixels_tab[2][1] = ff_avg_h264_qpel4_mc10_mmi;
+        c->avg_h264_qpel_pixels_tab[2][2] = ff_avg_h264_qpel4_mc20_mmi;
+        c->avg_h264_qpel_pixels_tab[2][3] = ff_avg_h264_qpel4_mc30_mmi;
+        c->avg_h264_qpel_pixels_tab[2][4] = ff_avg_h264_qpel4_mc01_mmi;
+        c->avg_h264_qpel_pixels_tab[2][5] = ff_avg_h264_qpel4_mc11_mmi;
+        c->avg_h264_qpel_pixels_tab[2][6] = ff_avg_h264_qpel4_mc21_mmi;
+        c->avg_h264_qpel_pixels_tab[2][7] = ff_avg_h264_qpel4_mc31_mmi;
+        c->avg_h264_qpel_pixels_tab[2][8] = ff_avg_h264_qpel4_mc02_mmi;
+        c->avg_h264_qpel_pixels_tab[2][9] = ff_avg_h264_qpel4_mc12_mmi;
+        c->avg_h264_qpel_pixels_tab[2][10] = ff_avg_h264_qpel4_mc22_mmi;
+        c->avg_h264_qpel_pixels_tab[2][11] = ff_avg_h264_qpel4_mc32_mmi;
+        c->avg_h264_qpel_pixels_tab[2][12] = ff_avg_h264_qpel4_mc03_mmi;
+        c->avg_h264_qpel_pixels_tab[2][13] = ff_avg_h264_qpel4_mc13_mmi;
+        c->avg_h264_qpel_pixels_tab[2][14] = ff_avg_h264_qpel4_mc23_mmi;
+        c->avg_h264_qpel_pixels_tab[2][15] = ff_avg_h264_qpel4_mc33_mmi;
+    }
+}
+#endif /* HAVE_MMI */
+
+av_cold void ff_h264qpel_init_mips(H264QpelContext *c, int bit_depth)
+{
+#if HAVE_MSA
+    h264qpel_init_msa(c, bit_depth);
+#endif  // #if HAVE_MSA
+#if HAVE_MMI
+    h264qpel_init_mmi(c, bit_depth);
+#endif /* HAVE_MMI */
+}
diff --git a/libavcodec/mips/h264qpel_mmi.c b/libavcodec/mips/h264qpel_mmi.c
new file mode 100644
index 00000000..e04a2d59
--- /dev/null
+++ b/libavcodec/mips/h264qpel_mmi.c
@@ -0,0 +1,2637 @@
+/*
+ * Loongson SIMD optimized h264qpel
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h264dsp_mips.h"
+#include "libavcodec/bit_depth_template.c"
+
+static inline void copy_block4_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride, int h)
+{
+    __asm__ volatile (
+        "1:                                     \r\n"
+        "gslwlc1 $f2, 3(%[src])                 \r\n"
+        "gslwrc1 $f2, 0(%[src])                 \r\n"
+        "gsswlc1 $f2, 3(%[dst])                 \r\n"
+        "gsswrc1 $f2, 0(%[dst])                 \r\n"
+        "dadd %[src], %[src], %[srcStride]      \r\n"
+        "dadd %[dst], %[dst], %[dstStride]      \r\n"
+        "daddi %[h], %[h], -1                   \r\n"
+        "bnez %[h], 1b                          \r\n"
+        : [dst]"+&r"(dst),[src]"+&r"(src)
+        : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),[h]"r"(h)
+        : "$f2"
+    );
+}
+
+static inline void copy_block8_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride, int h)
+{
+    __asm__ volatile (
+        "1:                                     \r\n"
+        "gsldlc1 $f2, 7(%[src])                 \r\n"
+        "gsldrc1 $f2, 0(%[src])                 \r\n"
+        "gssdlc1 $f2, 7(%[dst])                 \r\n"
+        "gssdrc1 $f2, 0(%[dst])                 \r\n"
+        "dadd %[src], %[src], %[srcStride]      \r\n"
+        "dadd %[dst], %[dst], %[dstStride]      \r\n"
+        "daddi %[h], %[h], -1                   \r\n"
+        "bnez %[h], 1b                          \r\n"
+        : [dst]"+&r"(dst),[src]"+&r"(src)
+        : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),[h]"r"(h)
+        : "$f2"
+    );
+}
+
+static inline void copy_block16_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride, int h)
+{
+    __asm__ volatile (
+        "1:                                     \r\n"
+        "gsldlc1 $f2, 7(%[src])                 \r\n"
+        "gsldrc1 $f2, 0(%[src])                 \r\n"
+        "gsldlc1 $f4, 15(%[src])                \r\n"
+        "gsldrc1 $f4, 8(%[src])                 \r\n"
+        "gssdlc1 $f2, 7(%[dst])                 \r\n"
+        "gssdrc1 $f2, 0(%[dst])                 \r\n"
+        "gssdlc1 $f4, 15(%[dst])                \r\n"
+        "gssdrc1 $f4, 8(%[dst])                 \r\n"
+        "dadd %[src], %[src], %[srcStride]      \r\n"
+        "dadd %[dst], %[dst], %[dstStride]      \r\n"
+        "daddi %[h], %[h], -1                   \r\n"
+        "bnez %[h], 1b                          \r\n"
+        : [dst]"+&r"(dst),[src]"+&r"(src)
+        : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),[h]"r"(h)
+        : "$f2","$f4"
+    );
+}
+
+#define op_put(a, b) a = b
+#define op_avg(a, b) a = rnd_avg_pixel4(a, b)
+static inline void put_pixels4_mmi(uint8_t *block, const uint8_t *pixels,
+        ptrdiff_t line_size, int h)
+{
+    __asm__ volatile (
+        "1:                                     \r\n"
+        "gslwlc1 $f2, 3(%[pixels])              \r\n"
+        "gslwrc1 $f2, 0(%[pixels])              \r\n"
+        "gsswlc1 $f2, 3(%[block])               \r\n"
+        "gsswrc1 $f2, 0(%[block])               \r\n"
+        "dadd %[pixels], %[pixels], %[line_size]\r\n"
+        "dadd %[block], %[block], %[line_size]  \r\n"
+        "daddi %[h], %[h], -1                   \r\n"
+        "bnez %[h], 1b                          \r\n"
+        : [block]"+&r"(block),[pixels]"+&r"(pixels)
+        : [line_size]"r"(line_size),[h]"r"(h)
+        : "$f2"
+    );
+}
+
+static inline void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels,
+        ptrdiff_t line_size, int h)
+{
+    __asm__ volatile (
+        "1:                                     \r\n"
+        "gsldlc1 $f2, 7(%[pixels])              \r\n"
+        "gsldrc1 $f2, 0(%[pixels])              \r\n"
+        "gssdlc1 $f2, 7(%[block])               \r\n"
+        "gssdrc1 $f2, 0(%[block])               \r\n"
+        "dadd %[pixels], %[pixels], %[line_size]\r\n"
+        "dadd %[block], %[block], %[line_size]  \r\n"
+        "daddi %[h], %[h], -1                   \r\n"
+        "bnez %[h], 1b                          \r\n"
+        : [block]"+&r"(block),[pixels]"+&r"(pixels)
+        : [line_size]"r"(line_size),[h]"r"(h)
+        : "$f2"
+    );
+}
+
+static inline void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels,
+        ptrdiff_t line_size, int h)
+{
+    __asm__ volatile (
+        "1:                                     \r\n"
+        "gsldlc1 $f2, 7(%[pixels])              \r\n"
+        "gsldrc1 $f2, 0(%[pixels])              \r\n"
+        "gsldlc1 $f4, 15(%[pixels])             \r\n"
+        "gsldrc1 $f4, 8(%[pixels])              \r\n"
+        "gssdlc1 $f2, 7(%[block])               \r\n"
+        "gssdrc1 $f2, 0(%[block])               \r\n"
+        "gssdlc1 $f4, 15(%[block])              \r\n"
+        "gssdrc1 $f4, 8(%[block])               \r\n"
+        "dadd %[pixels], %[pixels], %[line_size]\r\n"
+        "dadd %[block], %[block], %[line_size]  \r\n"
+        "daddi %[h], %[h], -1                   \r\n"
+        "bnez %[h], 1b                          \r\n"
+        : [block]"+&r"(block),[pixels]"+&r"(pixels)
+        : [line_size]"r"(line_size),[h]"r"(h)
+        : "$f2","$f4"
+    );
+}
+
+static inline void avg_pixels4_mmi(uint8_t *block, const uint8_t *pixels,
+        ptrdiff_t line_size, int h)
+{
+    __asm__ volatile (
+        "1:                                     \r\n"
+        "gslwlc1 $f2, 3(%[pixels])              \r\n"
+        "gslwrc1 $f2, 0(%[pixels])              \r\n"
+        "gslwlc1 $f4, 3(%[block])               \r\n"
+        "gslwrc1 $f4, 0(%[block])               \r\n"
+        "pavgb $f2, $f2, $f4                    \r\n"
+        "gsswlc1 $f2, 3(%[block])               \r\n"
+        "gsswrc1 $f2, 0(%[block])               \r\n"
+        "dadd %[pixels], %[pixels], %[line_size]\r\n"
+        "dadd %[block], %[block], %[line_size]  \r\n"
+        "daddi %[h], %[h], -1                   \r\n"
+        "bnez %[h], 1b                          \r\n"
+        : [block]"+&r"(block),[pixels]"+&r"(pixels)
+        : [line_size]"r"(line_size),[h]"r"(h)
+        : "$f2","$f4"
+    );
+}
+
+static inline void avg_pixels8_mmi(uint8_t *block, const uint8_t *pixels,
+        ptrdiff_t line_size, int h)
+{
+    __asm__ volatile (
+        "1:                                     \r\n"
+        "gsldlc1 $f2, 7(%[block])               \r\n"
+        "gsldrc1 $f2, 0(%[block])               \r\n"
+        "gsldlc1 $f4, 7(%[pixels])              \r\n"
+        "gsldrc1 $f4, 0(%[pixels])              \r\n"
+        "pavgb $f2, $f2, $f4                    \r\n"
+        "gssdlc1 $f2, 7(%[block])               \r\n"
+        "gssdrc1 $f2, 0(%[block])               \r\n"
+        "dadd %[pixels], %[pixels], %[line_size]\r\n"
+        "dadd %[block], %[block], %[line_size]  \r\n"
+        "daddi %[h], %[h], -1                   \r\n"
+        "bnez %[h], 1b                          \r\n"
+        : [block]"+&r"(block),[pixels]"+&r"(pixels)
+        : [line_size]"r"(line_size),[h]"r"(h)
+        : "$f2","$f4"
+    );
+}
+
+static inline void avg_pixels16_mmi(uint8_t *block, const uint8_t *pixels,
+        ptrdiff_t line_size, int h)
+{
+    __asm__ volatile (
+        "1:                                     \r\n"
+        "gsldlc1 $f2, 7(%[block])               \r\n"
+        "gsldrc1 $f2, 0(%[block])               \r\n"
+        "gsldlc1 $f4, 15(%[block])              \r\n"
+        "gsldrc1 $f4, 8(%[block])               \r\n"
+        "gsldlc1 $f6, 7(%[pixels])              \r\n"
+        "gsldrc1 $f6, 0(%[pixels])              \r\n"
+        "gsldlc1 $f8, 15(%[pixels])             \r\n"
+        "gsldrc1 $f8, 8(%[pixels])              \r\n"
+        "pavgb $f2, $f2, $f6                    \r\n"
+        "pavgb $f4, $f4, $f8                    \r\n"
+        "gssdlc1 $f2, 7(%[block])               \r\n"
+        "gssdrc1 $f2, 0(%[block])               \r\n"
+        "gssdlc1 $f4, 15(%[block])              \r\n"
+        "gssdrc1 $f4, 8(%[block])               \r\n"
+        "dadd %[pixels], %[pixels], %[line_size]\r\n"
+        "dadd %[block], %[block], %[line_size]  \r\n"
+        "daddi %[h], %[h], -1                   \r\n"
+        "bnez %[h], 1b                          \r\n"
+        : [block]"+&r"(block),[pixels]"+&r"(pixels)
+        : [line_size]"r"(line_size),[h]"r"(h)
+        : "$f2","$f4","$f6","$f8"
+    );
+}
+
+static inline void put_pixels4_l2_mmi(uint8_t *dst, const uint8_t *src1,
+        const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+        int h)
+{
+    int i;
+    for (i = 0; i < h; i++) {
+        pixel4 a, b;
+        a = AV_RN4P(&src1[i * src_stride1]);
+        b = AV_RN4P(&src2[i * src_stride2]);
+        op_put(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
+    }
+}
+
+static inline void put_pixels8_l2_mmi(uint8_t *dst, const uint8_t *src1,
+        const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+        int h)
+{
+    int i;
+    for (i = 0; i < h; i++) {
+        pixel4 a, b;
+        a = AV_RN4P(&src1[i * src_stride1]);
+        b = AV_RN4P(&src2[i * src_stride2]);
+        op_put(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
+        a = AV_RN4P(&src1[i * src_stride1 + 4]);
+        b = AV_RN4P(&src2[i * src_stride2 + 4]);
+        op_put(*((pixel4 *) &dst[i * dst_stride + 4]), rnd_avg_pixel4(a, b));
+    }
+}
+
+static inline void put_pixels16_l2_mmi(uint8_t *dst, const uint8_t *src1,
+        const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+        int h)
+{
+    int i;
+    for (i = 0; i < h; i++) {
+        pixel4 a, b;
+        a = AV_RN4P(&src1[i * src_stride1]);
+        b = AV_RN4P(&src2[i * src_stride2]);
+        op_put(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
+        a = AV_RN4P(&src1[i * src_stride1 + 4]);
+        b = AV_RN4P(&src2[i * src_stride2 + 4]);
+        op_put(*((pixel4 *) &dst[i * dst_stride + 4]), rnd_avg_pixel4(a, b));
+        a = AV_RN4P(&src1[i * src_stride1 + 8]);
+        b = AV_RN4P(&src2[i * src_stride2 + 8]);
+        op_put(*((pixel4 *) &dst[i * dst_stride + 8]), rnd_avg_pixel4(a, b));
+        a = AV_RN4P(&src1[i * src_stride1 + 12]);
+        b = AV_RN4P(&src2[i * src_stride2 + 12]);
+        op_put(*((pixel4 *) &dst[i * dst_stride + 12]), rnd_avg_pixel4(a, b));
+    }
+}
+
+static inline void avg_pixels4_l2_mmi(uint8_t *dst, const uint8_t *src1,
+        const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+        int h)
+{
+    int i;
+    for (i = 0; i < h; i++) {
+        pixel4 a, b;
+        a = AV_RN4P(&src1[i * src_stride1]);
+        b = AV_RN4P(&src2[i * src_stride2]);
+        op_avg(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
+    }
+}
+
+static inline void avg_pixels8_l2_mmi(uint8_t *dst, const uint8_t *src1,
+        const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+        int h)
+{
+    int i;
+    for (i = 0; i < h; i++) {
+        pixel4 a, b;
+        a = AV_RN4P(&src1[i * src_stride1]);
+        b = AV_RN4P(&src2[i * src_stride2]);
+        op_avg(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
+        a = AV_RN4P(&src1[i * src_stride1 + 4]);
+        b = AV_RN4P(&src2[i * src_stride2 + 4]);
+        op_avg(*((pixel4 *) &dst[i * dst_stride + 4]), rnd_avg_pixel4(a, b));
+    }
+}
+
+static inline void avg_pixels16_l2_mmi(uint8_t *dst, const uint8_t *src1,
+        const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2,
+        int h)
+{
+    int i;
+    for (i = 0; i < h; i++) {
+        pixel4 a, b;
+        a = AV_RN4P(&src1[i * src_stride1]);
+        b = AV_RN4P(&src2[i * src_stride2]);
+        op_avg(*((pixel4 *) &dst[i * dst_stride]), rnd_avg_pixel4(a, b));
+        a = AV_RN4P(&src1[i * src_stride1 + 4]);
+        b = AV_RN4P(&src2[i * src_stride2 + 4]);
+        op_avg(*((pixel4 *) &dst[i * dst_stride + 4]), rnd_avg_pixel4(a, b));
+        a = AV_RN4P(&src1[i * src_stride1 + 8]);
+        b = AV_RN4P(&src2[i * src_stride2 + 8]);
+        op_avg(*((pixel4 *) &dst[i * dst_stride + 8]), rnd_avg_pixel4(a, b));
+        a = AV_RN4P(&src1[i * src_stride1 + 12]);
+        b = AV_RN4P(&src2[i * src_stride2 + 12]);
+        op_avg(*((pixel4 *) &dst[i * dst_stride + 12]), rnd_avg_pixel4(a, b));
+
+    }
+}
+#undef op_put
+#undef op_avg
+
+#define op2_avg(a, b)  a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
+#define op2_put(a, b)  a = CLIP(((b) + 512)>>10)
+static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "dli $8, 4                              \r\n"
+        "1:                                     \r\n"
+        "gslwlc1 $f2, 1(%[src])                 \r\n"
+        "gslwrc1 $f2, -2(%[src])                \r\n"
+        "gslwlc1 $f4, 2(%[src])                 \r\n"
+        "gslwrc1 $f4, -1(%[src])                \r\n"
+        "gslwlc1 $f6, 3(%[src])                 \r\n"
+        "gslwrc1 $f6, 0(%[src])                 \r\n"
+        "gslwlc1 $f8, 4(%[src])                 \r\n"
+        "gslwrc1 $f8, 1(%[src])                 \r\n"
+        "gslwlc1 $f10, 5(%[src])                \r\n"
+        "gslwrc1 $f10, 2(%[src])                \r\n"
+        "gslwlc1 $f12, 6(%[src])                \r\n"
+        "gslwrc1 $f12, 3(%[src])                \r\n"
+        "punpcklbh $f2, $f2, $f0                \r\n"
+        "punpcklbh $f4, $f4, $f0                \r\n"
+        "punpcklbh $f6, $f6, $f0                \r\n"
+        "punpcklbh $f8, $f8, $f0                \r\n"
+        "punpcklbh $f10, $f10, $f0              \r\n"
+        "punpcklbh $f12, $f12, $f0              \r\n"
+        "paddsh $f14, $f6, $f8                  \r\n"
+        "paddsh $f16, $f4, $f10                 \r\n"
+        "paddsh $f18, $f2, $f12                 \r\n"
+        "pmullh $f14, $f14, %[ff_pw_20]         \r\n"
+        "pmullh $f16, $f16, %[ff_pw_5]          \r\n"
+        "psubsh $f14, $f14, $f16                \r\n"
+        "paddsh $f18, $f14, $f18                \r\n"
+        "paddsh $f18, $f18, %[ff_pw_16]         \r\n"
+        "psrah $f18, $f18, %[ff_pw_5]           \r\n"
+        "packushb $f18, $f18, $f0               \r\n"
+        "gsswlc1 $f18, 3(%[dst])                \r\n"
+        "gsswrc1 $f18, 0(%[dst])                \r\n"
+        "dadd %[dst], %[dst], %[dstStride]      \r\n"
+        "dadd %[src], %[src], %[srcStride]      \r\n"
+        "daddi $8, $8, -1                       \r\n"
+        "bnez $8, 1b                            \r\n"
+        : [dst]"+&r"(dst),[src]"+&r"(src)
+        : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),
+          [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5),[ff_pw_16]"f"(ff_pw_16)
+        : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
+          "$f18"
+    );
+}
+
+static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "dli $8, 8                              \r\n"
+        "1:                                     \r\n"
+        "gsldlc1 $f2, 5(%[src])                 \r\n"
+        "gsldrc1 $f2, -2(%[src])                \r\n"
+        "gsldlc1 $f4, 6(%[src])                 \r\n"
+        "gsldrc1 $f4, -1(%[src])                \r\n"
+        "gsldlc1 $f6, 7(%[src])                 \r\n"
+        "gsldrc1 $f6, 0(%[src])                 \r\n"
+        "gsldlc1 $f8, 8(%[src])                 \r\n"
+        "gsldrc1 $f8, 1(%[src])                 \r\n"
+        "gsldlc1 $f10, 9(%[src])                \r\n"
+        "gsldrc1 $f10, 2(%[src])                \r\n"
+        "gsldlc1 $f12, 10(%[src])               \r\n"
+        "gsldrc1 $f12, 3(%[src])                \r\n"
+        "punpcklbh $f14, $f6, $f0               \r\n"
+        "punpckhbh $f16, $f6, $f0               \r\n"
+        "punpcklbh $f18, $f8, $f0               \r\n"
+        "punpckhbh $f20, $f8, $f0               \r\n"
+        "paddsh $f6, $f14, $f18                 \r\n"
+        "paddsh $f8, $f16, $f20                 \r\n"
+        "pmullh $f6, $f6, %[ff_pw_20]           \r\n"
+        "pmullh $f8, $f8, %[ff_pw_20]           \r\n"
+        "punpcklbh $f14, $f4, $f0               \r\n"
+        "punpckhbh $f16, $f4, $f0               \r\n"
+        "punpcklbh $f18, $f10, $f0              \r\n"
+        "punpckhbh $f20, $f10, $f0              \r\n"
+        "paddsh $f4, $f14, $f18                 \r\n"
+        "paddsh $f10, $f16, $f20                \r\n"
+        "pmullh $f4, $f4, %[ff_pw_5]            \r\n"
+        "pmullh $f10, $f10, %[ff_pw_5]          \r\n"
+        "punpcklbh $f14, $f2, $f0               \r\n"
+        "punpckhbh $f16, $f2, $f0               \r\n"
+        "punpcklbh $f18, $f12, $f0              \r\n"
+        "punpckhbh $f20, $f12, $f0              \r\n"
+        "paddsh $f2, $f14, $f18                 \r\n"
+        "paddsh $f12, $f16, $f20                \r\n"
+        "psubsh $f6, $f6, $f4                   \r\n"
+        "psubsh $f8, $f8, $f10                  \r\n"
+        "paddsh $f6, $f6, $f2                   \r\n"
+        "paddsh $f8, $f8, $f12                  \r\n"
+        "paddsh $f6, $f6, %[ff_pw_16]           \r\n"
+        "paddsh $f8, $f8, %[ff_pw_16]           \r\n"
+        "psrah $f6, $f6, %[ff_pw_5]             \r\n"
+        "psrah $f8, $f8, %[ff_pw_5]             \r\n"
+        "packushb $f18, $f6, $f8                \r\n"
+        "sdc1 $f18, 0(%[dst])                   \r\n"
+        "dadd %[dst], %[dst], %[dstStride]      \r\n"
+        "dadd %[src], %[src], %[srcStride]      \r\n"
+        "daddi $8, $8, -1                       \r\n"
+        "bnez $8, 1b                            \r\n"
+        : [dst]"+&r"(dst),[src]"+&r"(src)
+        : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),
+          [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5),[ff_pw_16]"f"(ff_pw_16)
+        : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
+          "$f18","$f20"
+    );
+}
+
+static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
+    put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+    src += 8*srcStride;
+    dst += 8*dstStride;
+    put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
+    put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+}
+
+static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "dli $8, 4                              \r\n"
+        "1:                                     \r\n"
+        "gslwlc1 $f2, 1(%[src])                 \r\n"
+        "gslwrc1 $f2, -2(%[src])                \r\n"
+        "gslwlc1 $f4, 2(%[src])                 \r\n"
+        "gslwrc1 $f4, -1(%[src])                \r\n"
+        "gslwlc1 $f6, 3(%[src])                 \r\n"
+        "gslwrc1 $f6, 0(%[src])                 \r\n"
+        "gslwlc1 $f8, 4(%[src])                 \r\n"
+        "gslwrc1 $f8, 1(%[src])                 \r\n"
+        "gslwlc1 $f10, 5(%[src])                \r\n"
+        "gslwrc1 $f10, 2(%[src])                \r\n"
+        "gslwlc1 $f12, 6(%[src])                \r\n"
+        "gslwrc1 $f12, 3(%[src])                \r\n"
+        "punpcklbh $f2, $f2, $f0                \r\n"
+        "punpcklbh $f4, $f4, $f0                \r\n"
+        "punpcklbh $f6, $f6, $f0                \r\n"
+        "punpcklbh $f8, $f8, $f0                \r\n"
+        "punpcklbh $f10, $f10, $f0              \r\n"
+        "punpcklbh $f12, $f12, $f0              \r\n"
+        "paddsh $f14, $f6, $f8                  \r\n"
+        "paddsh $f16, $f4, $f10                 \r\n"
+        "paddsh $f18, $f2, $f12                 \r\n"
+        "pmullh $f14, $f14, %[ff_pw_20]         \r\n"
+        "pmullh $f16, $f16, %[ff_pw_5]          \r\n"
+        "psubsh $f14, $f14, $f16                \r\n"
+        "paddsh $f18, $f14, $f18                \r\n"
+        "paddsh $f18, $f18, %[ff_pw_16]         \r\n"
+        "psrah $f18, $f18, %[ff_pw_5]           \r\n"
+        "packushb $f18, $f18, $f0               \r\n"
+        "lwc1 $f20, 0(%[dst])                   \r\n"
+        "pavgb $f18, $f18, $f20                 \r\n"
+        "gsswlc1 $f18, 3(%[dst])                \r\n"
+        "gsswrc1 $f18, 0(%[dst])                \r\n"
+        "dadd %[dst], %[dst], %[dstStride]      \r\n"
+        "dadd %[src], %[src], %[srcStride]      \r\n"
+        "daddi $8, $8, -1                       \r\n"
+        "bnez $8, 1b                            \r\n"
+        : [dst]"+&r"(dst),[src]"+&r"(src)
+        : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),
+          [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5),[ff_pw_16]"f"(ff_pw_16)
+        : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
+          "$f18","$f20"
+    );
+}
+
+static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "dli $8, 8                              \r\n"
+        "1:                                     \r\n"
+        "gsldlc1 $f2, 5(%[src])                 \r\n"
+        "gsldrc1 $f2, -2(%[src])                \r\n"
+        "gsldlc1 $f4, 6(%[src])                 \r\n"
+        "gsldrc1 $f4, -1(%[src])                \r\n"
+        "gsldlc1 $f6, 7(%[src])                 \r\n"
+        "gsldrc1 $f6, 0(%[src])                 \r\n"
+        "gsldlc1 $f8, 8(%[src])                 \r\n"
+        "gsldrc1 $f8, 1(%[src])                 \r\n"
+        "gsldlc1 $f10, 9(%[src])                \r\n"
+        "gsldrc1 $f10, 2(%[src])                \r\n"
+        "gsldlc1 $f12, 10(%[src])               \r\n"
+        "gsldrc1 $f12, 3(%[src])                \r\n"
+        "punpcklbh $f14, $f6, $f0               \r\n"
+        "punpckhbh $f16, $f6, $f0               \r\n"
+        "punpcklbh $f18, $f8, $f0               \r\n"
+        "punpckhbh $f20, $f8, $f0               \r\n"
+        "paddsh $f6, $f14, $f18                 \r\n"
+        "paddsh $f8, $f16, $f20                 \r\n"
+        "pmullh $f6, $f6, %[ff_pw_20]           \r\n"
+        "pmullh $f8, $f8, %[ff_pw_20]           \r\n"
+        "punpcklbh $f14, $f4, $f0               \r\n"
+        "punpckhbh $f16, $f4, $f0               \r\n"
+        "punpcklbh $f18, $f10, $f0              \r\n"
+        "punpckhbh $f20, $f10, $f0              \r\n"
+        "paddsh $f4, $f14, $f18                 \r\n"
+        "paddsh $f10, $f16, $f20                \r\n"
+        "pmullh $f4, $f4, %[ff_pw_5]            \r\n"
+        "pmullh $f10, $f10, %[ff_pw_5]          \r\n"
+        "punpcklbh $f14, $f2, $f0               \r\n"
+        "punpckhbh $f16, $f2, $f0               \r\n"
+        "punpcklbh $f18, $f12, $f0              \r\n"
+        "punpckhbh $f20, $f12, $f0              \r\n"
+        "paddsh $f2, $f14, $f18                 \r\n"
+        "paddsh $f12, $f16, $f20                \r\n"
+        "psubsh $f6, $f6, $f4                   \r\n"
+        "psubsh $f8, $f8, $f10                  \r\n"
+        "paddsh $f6, $f6, $f2                   \r\n"
+        "paddsh $f8, $f8, $f12                  \r\n"
+        "paddsh $f6, $f6, %[ff_pw_16]           \r\n"
+        "paddsh $f8, $f8, %[ff_pw_16]           \r\n"
+        "psrah $f6, $f6, %[ff_pw_5]             \r\n"
+        "psrah $f8, $f8, %[ff_pw_5]             \r\n"
+        "packushb $f18, $f6, $f8                \r\n"
+        "ldc1 $f20, 0(%[dst])                   \r\n"
+        "pavgb $f18, $f18, $f20                 \r\n"
+        "sdc1 $f18, 0(%[dst])                   \r\n"
+        "dadd %[dst], %[dst], %[dstStride]      \r\n"
+        "dadd %[src], %[src], %[srcStride]      \r\n"
+        "daddi $8, $8, -1                       \r\n"
+        "bnez $8, 1b                            \r\n"
+        : [dst]"+&r"(dst),[src]"+&r"(src)
+        : [dstStride]"r"(dstStride),[srcStride]"r"(srcStride),
+          [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5),[ff_pw_16]"f"(ff_pw_16)
+        : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
+          "$f18","$f20"
+    );
+}
+
+static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
+    avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+    src += 8*srcStride;
+    dst += 8*dstStride;
+    avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
+    avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+}
+
+static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "gslwlc1 $f2, 3(%[srcB])                \r\n"
+        "gslwrc1 $f2, 0(%[srcB])                \r\n"
+        "gslwlc1 $f4, 3(%[srcA])                \r\n"
+        "gslwrc1 $f4, 0(%[srcA])                \r\n"
+        "gslwlc1 $f6, 3(%[src0])                \r\n"
+        "gslwrc1 $f6, 0(%[src0])                \r\n"
+        "gslwlc1 $f8, 3(%[src1])                \r\n"
+        "gslwrc1 $f8, 0(%[src1])                \r\n"
+        "gslwlc1 $f10, 3(%[src2])               \r\n"
+        "gslwrc1 $f10, 0(%[src2])               \r\n"
+        "gslwlc1 $f12, 3(%[src3])               \r\n"
+        "gslwrc1 $f12, 0(%[src3])               \r\n"
+        "gslwlc1 $f14, 3(%[src4])               \r\n"
+        "gslwrc1 $f14, 0(%[src4])               \r\n"
+        "gslwlc1 $f16, 3(%[src5])               \r\n"
+        "gslwrc1 $f16, 0(%[src5])               \r\n"
+        "gslwlc1 $f18, 3(%[src6])               \r\n"
+        "gslwrc1 $f18, 0(%[src6])               \r\n"
+        "punpcklbh $f2, $f2, $f0                \r\n"
+        "punpcklbh $f4, $f4, $f0                \r\n"
+        "punpcklbh $f6, $f6, $f0                \r\n"
+        "punpcklbh $f8, $f8, $f0                \r\n"
+        "punpcklbh $f10, $f10, $f0              \r\n"
+        "punpcklbh $f12, $f12, $f0              \r\n"
+        "punpcklbh $f14, $f14, $f0              \r\n"
+        "punpcklbh $f16, $f16, $f0              \r\n"
+        "punpcklbh $f18, $f18, $f0              \r\n"
+        "paddsh $f20, $f6, $f8                  \r\n"
+        "pmullh $f20, $f20, %[ff_pw_20]         \r\n"
+        "paddsh $f22, $f4, $f10                 \r\n"
+        "pmullh $f22, $f22, %[ff_pw_5]          \r\n"
+        "psubsh $f24, $f20, $f22                \r\n"
+        "paddsh $f24, $f24, $f2                 \r\n"
+        "paddsh $f24, $f24, $f12                \r\n"
+        "paddsh $f20, $f8, $f10                 \r\n"
+        "pmullh $f20, $f20, %[ff_pw_20]         \r\n"
+        "paddsh $f22, $f6, $f12                 \r\n"
+        "pmullh $f22, $f22, %[ff_pw_5]          \r\n"
+        "psubsh $f26, $f20, $f22                \r\n"
+        "paddsh $f26, $f26, $f4                 \r\n"
+        "paddsh $f26, $f26, $f14                \r\n"
+        "paddsh $f20, $f10, $f12                \r\n"
+        "pmullh $f20, $f20, %[ff_pw_20]         \r\n"
+        "paddsh $f22, $f8, $f14                 \r\n"
+        "pmullh $f22, $f22, %[ff_pw_5]          \r\n"
+        "psubsh $f28, $f20, $f22                \r\n"
+        "paddsh $f28, $f28, $f6                 \r\n"
+        "paddsh $f28, $f28, $f16                \r\n"
+        "paddsh $f20, $f12, $f14                \r\n"
+        "pmullh $f20, $f20, %[ff_pw_20]         \r\n"
+        "paddsh $f22, $f10, $f16                \r\n"
+        "pmullh $f22, $f22, %[ff_pw_5]          \r\n"
+        "psubsh $f30, $f20, $f22                \r\n"
+        "paddsh $f30, $f30, $f8                 \r\n"
+        "paddsh $f30, $f30, $f18                \r\n"
+        "paddsh $f24, $f24, %[ff_pw_16]         \r\n"
+        "paddsh $f26, $f26, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "paddsh $f30, $f30, %[ff_pw_16]         \r\n"
+        "psrah $f24, $f24, %[ff_pw_5]           \r\n"
+        "psrah $f26, $f26, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "psrah $f30, $f30, %[ff_pw_5]           \r\n"
+        "packushb $f24, $f24, $f0               \r\n"
+        "packushb $f26, $f26, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "packushb $f30, $f30, $f0               \r\n"
+        "swc1 $f24, 0(%[dst0])                  \r\n"
+        "swc1 $f26, 0(%[dst1])                  \r\n"
+        "swc1 $f28, 0(%[dst2])                  \r\n"
+        "swc1 $f30, 0(%[dst3])                  \r\n"
+        ::[dst0]"r"(dst),               [dst1]"r"(dst+dstStride),
+          [dst2]"r"(dst+2*dstStride),   [dst3]"r"(dst+3*dstStride),
+          [srcB]"r"(src-2*srcStride),   [srcA]"r"(src-srcStride),
+          [src0]"r"(src),               [src1]"r"(src+srcStride),
+          [src2]"r"(src+2*srcStride),   [src3]"r"(src+3*srcStride),
+          [src4]"r"(src+4*srcStride),   [src5]"r"(src+5*srcStride),
+          [src6]"r"(src+6*srcStride),   [ff_pw_20]"f"(ff_pw_20),
+          [ff_pw_5]"f"(ff_pw_5),        [ff_pw_16]"f"(ff_pw_16)
+        : "$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16","$f18",
+          "$f20","$f22","$f24","$f26","$f28","$f30"
+    );
+}
+
+static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "gsldlc1 $f2, 7(%[srcB])                \r\n"
+        "gsldrc1 $f2, 0(%[srcB])                \r\n"
+        "gsldlc1 $f4, 7(%[srcA])                \r\n"
+        "gsldrc1 $f4, 0(%[srcA])                \r\n"
+        "gsldlc1 $f6, 7(%[src0])                \r\n"
+        "gsldrc1 $f6, 0(%[src0])                \r\n"
+        "gsldlc1 $f8, 7(%[src1])                \r\n"
+        "gsldrc1 $f8, 0(%[src1])                \r\n"
+        "gsldlc1 $f10, 7(%[src2])               \r\n"
+        "gsldrc1 $f10, 0(%[src2])               \r\n"
+        "gsldlc1 $f12, 7(%[src3])               \r\n"
+        "gsldrc1 $f12, 0(%[src3])               \r\n"
+        "gsldlc1 $f14, 7(%[src4])               \r\n"
+        "gsldrc1 $f14, 0(%[src4])               \r\n"
+        "gsldlc1 $f16, 7(%[src5])               \r\n"
+        "gsldrc1 $f16, 0(%[src5])               \r\n"
+        "gsldlc1 $f18, 7(%[src6])               \r\n"
+        "gsldrc1 $f18, 0(%[src6])               \r\n"
+        "gsldlc1 $f20, 7(%[src7])               \r\n"
+        "gsldrc1 $f20, 0(%[src7])               \r\n"
+        "gsldlc1 $f22, 7(%[src8])               \r\n"
+        "gsldrc1 $f22, 0(%[src8])               \r\n"
+        "gsldlc1 $f24, 7(%[src9])               \r\n"
+        "gsldrc1 $f24, 0(%[src9])               \r\n"
+        "gsldlc1 $f26, 7(%[src10])              \r\n"
+        "gsldrc1 $f26, 0(%[src10])              \r\n"
+        "punpcklbh $f1, $f2, $f0                \r\n"
+        "punpckhbh $f2, $f2, $f0                \r\n"
+        "punpcklbh $f3, $f4, $f0                \r\n"
+        "punpckhbh $f4, $f4, $f0                \r\n"
+        "punpcklbh $f5, $f6, $f0                \r\n"
+        "punpckhbh $f6, $f6, $f0                \r\n"
+        "punpcklbh $f7, $f8, $f0                \r\n"
+        "punpckhbh $f8, $f8, $f0                \r\n"
+        "punpcklbh $f9, $f10, $f0               \r\n"
+        "punpckhbh $f10, $f10, $f0              \r\n"
+        "punpcklbh $f11, $f12, $f0              \r\n"
+        "punpckhbh $f12, $f12, $f0              \r\n"
+        "punpcklbh $f13, $f14, $f0              \r\n"
+        "punpckhbh $f14, $f14, $f0              \r\n"
+        "punpcklbh $f15, $f16, $f0              \r\n"
+        "punpckhbh $f16, $f16, $f0              \r\n"
+        "punpcklbh $f17, $f18, $f0              \r\n"
+        "punpckhbh $f18, $f18, $f0              \r\n"
+        "punpcklbh $f19, $f20, $f0              \r\n"
+        "punpckhbh $f20, $f20, $f0              \r\n"
+        "punpcklbh $f21, $f22, $f0              \r\n"
+        "punpckhbh $f22, $f22, $f0              \r\n"
+        "punpcklbh $f23, $f24, $f0              \r\n"
+        "punpckhbh $f24, $f24, $f0              \r\n"
+        "punpcklbh $f25, $f26, $f0              \r\n"
+        "punpckhbh $f26, $f26, $f0              \r\n"
+        "paddsh $f27, $f5, $f7                  \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f6, $f8                  \r\n"//src0+src1
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f3                 \r\n"
+        "psubsh $f28, $f28, $f4                 \r\n"
+        "psubsh $f27, $f27, $f9                 \r\n"
+        "psubsh $f28, $f28, $f10                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f1                 \r\n"
+        "paddsh $f28, $f28, $f2                 \r\n"
+        "paddsh $f27, $f27, $f11                \r\n"
+        "paddsh $f28, $f28, $f12                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f2, $f27, $f28              \r\n"
+        "sdc1 $f2, 0(%[dst0])                   \r\n"
+        "paddsh $f27, $f7, $f9                  \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f8, $f10                 \r\n"//src1+src2
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f5                 \r\n"
+        "psubsh $f28, $f28, $f6                 \r\n"
+        "psubsh $f27, $f27, $f11                \r\n"
+        "psubsh $f28, $f28, $f12                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f3                 \r\n"
+        "paddsh $f28, $f28, $f4                 \r\n"
+        "paddsh $f27, $f27, $f13                \r\n"
+        "paddsh $f28, $f28, $f14                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f4, $f27, $f28              \r\n"
+        "sdc1 $f4, 0(%[dst1])                   \r\n"
+        "paddsh $f27, $f9, $f11                 \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f10, $f12                \r\n"//src2+src3
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f7                 \r\n"
+        "psubsh $f28, $f28, $f8                 \r\n"
+        "psubsh $f27, $f27, $f13                \r\n"
+        "psubsh $f28, $f28, $f14                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f5                 \r\n"
+        "paddsh $f28, $f28, $f6                 \r\n"
+        "paddsh $f27, $f27, $f15                \r\n"
+        "paddsh $f28, $f28, $f16                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f6, $f27, $f28              \r\n"
+        "sdc1 $f6, 0(%[dst2])                   \r\n"
+        "paddsh $f27, $f11, $f13                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f12, $f14                \r\n"//src3+src4
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f9                 \r\n"
+        "psubsh $f28, $f28, $f10                \r\n"
+        "psubsh $f27, $f27, $f15                \r\n"
+        "psubsh $f28, $f28, $f16                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f7                 \r\n"
+        "paddsh $f28, $f28, $f8                 \r\n"
+        "paddsh $f27, $f27, $f17                \r\n"
+        "paddsh $f28, $f28, $f18                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f8, $f27, $f28              \r\n"
+        "sdc1 $f8, 0(%[dst3])                   \r\n"
+        "paddsh $f27, $f13, $f15                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f14, $f16                \r\n"//src4+src5
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f11                \r\n"
+        "psubsh $f28, $f28, $f12                \r\n"
+        "psubsh $f27, $f27, $f17                \r\n"
+        "psubsh $f28, $f28, $f18                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f9                 \r\n"
+        "paddsh $f28, $f28, $f10                \r\n"
+        "paddsh $f27, $f27, $f19                \r\n"
+        "paddsh $f28, $f28, $f20                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f10, $f27, $f28             \r\n"
+        "sdc1 $f10, 0(%[dst4])                  \r\n"
+
+        "paddsh $f27, $f15, $f17                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f16, $f18                \r\n"//src5+src6
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f13                \r\n"
+        "psubsh $f28, $f28, $f14                \r\n"
+        "psubsh $f27, $f27, $f19                \r\n"
+        "psubsh $f28, $f28, $f20                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f11                \r\n"
+        "paddsh $f28, $f28, $f12                \r\n"
+        "paddsh $f27, $f27, $f21                \r\n"
+        "paddsh $f28, $f28, $f22                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f12, $f27, $f28             \r\n"
+        "sdc1 $f12, 0(%[dst5])                  \r\n"
+        "paddsh $f27, $f17, $f19                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f18, $f20                \r\n"//src6+src7
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f15                \r\n"
+        "psubsh $f28, $f28, $f16                \r\n"
+        "psubsh $f27, $f27, $f21                \r\n"
+        "psubsh $f28, $f28, $f22                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f13                \r\n"
+        "paddsh $f28, $f28, $f14                \r\n"
+        "paddsh $f27, $f27, $f23                \r\n"
+        "paddsh $f28, $f28, $f24                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f14, $f27, $f28             \r\n"
+        "sdc1 $f14, 0(%[dst6])                  \r\n"
+        "paddsh $f27, $f19, $f21                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f20, $f22                \r\n"//src7+src8
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f17                \r\n"
+        "psubsh $f28, $f28, $f18                \r\n"
+        "psubsh $f27, $f27, $f23                \r\n"
+        "psubsh $f28, $f28, $f24                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f15                \r\n"
+        "paddsh $f28, $f28, $f16                \r\n"
+        "paddsh $f27, $f27, $f25                \r\n"
+        "paddsh $f28, $f28, $f26                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f16, $f27, $f28             \r\n"
+        "sdc1 $f16, 0(%[dst7])                  \r\n"
+        ::[dst0]"r"(dst),               [dst1]"r"(dst+dstStride),
+          [dst2]"r"(dst+2*dstStride),   [dst3]"r"(dst+3*dstStride),
+          [dst4]"r"(dst+4*dstStride),   [dst5]"r"(dst+5*dstStride),
+          [dst6]"r"(dst+6*dstStride),   [dst7]"r"(dst+7*dstStride),
+          [srcB]"r"(src-2*srcStride),   [srcA]"r"(src-srcStride),
+          [src0]"r"(src),               [src1]"r"(src+srcStride),
+          [src2]"r"(src+2*srcStride),   [src3]"r"(src+3*srcStride),
+          [src4]"r"(src+4*srcStride),   [src5]"r"(src+5*srcStride),
+          [src6]"r"(src+6*srcStride),   [src7]"r"(src+7*srcStride),
+          [src8]"r"(src+8*srcStride),   [src9]"r"(src+9*srcStride),
+          [src10]"r"(src+10*srcStride), [ff_pw_4]"f"(ff_pw_4),
+          [ff_pw_5]"f"(ff_pw_5),        [ff_pw_16]"f"(ff_pw_16)
+        : "$f0","$f1","$f2","$f3","$f4","$f5","$f6","$f7","$f8","$f9","$f10",
+          "$f11","$f12","$f13","$f14","$f15","$f16","$f17","$f18","$f19",
+          "$f20","$f21","$f22","$f23","$f24","$f25","$f26","$f27","$f28"
+    );
+}
+
+static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
+    put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+    src += 8*srcStride;
+    dst += 8*dstStride;
+    put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
+    put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+}
+
+static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "gslwlc1 $f2, 3(%[srcB])                \r\n"
+        "gslwrc1 $f2, 0(%[srcB])                \r\n"
+        "gslwlc1 $f4, 3(%[srcA])                \r\n"
+        "gslwrc1 $f4, 0(%[srcA])                \r\n"
+        "gslwlc1 $f6, 3(%[src0])                \r\n"
+        "gslwrc1 $f6, 0(%[src0])                \r\n"
+        "gslwlc1 $f8, 3(%[src1])                \r\n"
+        "gslwrc1 $f8, 0(%[src1])                \r\n"
+        "gslwlc1 $f10, 3(%[src2])               \r\n"
+        "gslwrc1 $f10, 0(%[src2])               \r\n"
+        "gslwlc1 $f12, 3(%[src3])               \r\n"
+        "gslwrc1 $f12, 0(%[src3])               \r\n"
+        "gslwlc1 $f14, 3(%[src4])               \r\n"
+        "gslwrc1 $f14, 0(%[src4])               \r\n"
+        "gslwlc1 $f16, 3(%[src5])               \r\n"
+        "gslwrc1 $f16, 0(%[src5])               \r\n"
+        "gslwlc1 $f18, 3(%[src6])               \r\n"
+        "gslwrc1 $f18, 0(%[src6])               \r\n"
+        "punpcklbh $f2, $f2, $f0                \r\n"
+        "punpcklbh $f4, $f4, $f0                \r\n"
+        "punpcklbh $f6, $f6, $f0                \r\n"
+        "punpcklbh $f8, $f8, $f0                \r\n"
+        "punpcklbh $f10, $f10, $f0              \r\n"
+        "punpcklbh $f12, $f12, $f0              \r\n"
+        "punpcklbh $f14, $f14, $f0              \r\n"
+        "punpcklbh $f16, $f16, $f0              \r\n"
+        "punpcklbh $f18, $f18, $f0              \r\n"
+        "paddsh $f20, $f6, $f8                  \r\n"
+        "pmullh $f20, $f20, %[ff_pw_20]         \r\n"
+        "paddsh $f22, $f4, $f10                 \r\n"
+        "pmullh $f22, $f22, %[ff_pw_5]          \r\n"
+        "psubsh $f24, $f20, $f22                \r\n"
+        "paddsh $f24, $f24, $f2                 \r\n"
+        "paddsh $f24, $f24, $f12                \r\n"
+        "paddsh $f20, $f8, $f10                 \r\n"
+        "pmullh $f20, $f20, %[ff_pw_20]         \r\n"
+        "paddsh $f22, $f6, $f12                 \r\n"
+        "pmullh $f22, $f22, %[ff_pw_5]          \r\n"
+        "psubsh $f26, $f20, $f22                \r\n"
+        "paddsh $f26, $f26, $f4                 \r\n"
+        "paddsh $f26, $f26, $f14                \r\n"
+        "paddsh $f20, $f10, $f12                \r\n"
+        "pmullh $f20, $f20, %[ff_pw_20]         \r\n"
+        "paddsh $f22, $f8, $f14                 \r\n"
+        "pmullh $f22, $f22, %[ff_pw_5]          \r\n"
+        "psubsh $f28, $f20, $f22                \r\n"
+        "paddsh $f28, $f28, $f6                 \r\n"
+        "paddsh $f28, $f28, $f16                \r\n"
+        "paddsh $f20, $f12, $f14                \r\n"
+        "pmullh $f20, $f20, %[ff_pw_20]         \r\n"
+        "paddsh $f22, $f10, $f16                \r\n"
+        "pmullh $f22, $f22, %[ff_pw_5]          \r\n"
+        "psubsh $f30, $f20, $f22                \r\n"
+        "paddsh $f30, $f30, $f8                 \r\n"
+        "paddsh $f30, $f30, $f18                \r\n"
+        "paddsh $f24, $f24, %[ff_pw_16]         \r\n"
+        "paddsh $f26, $f26, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "paddsh $f30, $f30, %[ff_pw_16]         \r\n"
+        "psrah $f24, $f24, %[ff_pw_5]           \r\n"
+        "psrah $f26, $f26, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "psrah $f30, $f30, %[ff_pw_5]           \r\n"
+        "packushb $f24, $f24, $f0               \r\n"
+        "packushb $f26, $f26, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "packushb $f30, $f30, $f0               \r\n"
+        "lwc1 $f2, 0(%[dst0])                   \r\n"
+        "lwc1 $f4, 0(%[dst1])                   \r\n"
+        "lwc1 $f6, 0(%[dst2])                   \r\n"
+        "lwc1 $f8, 0(%[dst3])                   \r\n"
+        "pavgb $f24, $f2, $f24                  \r\n"
+        "pavgb $f26, $f4, $f26                  \r\n"
+        "pavgb $f28, $f6, $f28                  \r\n"
+        "pavgb $f30, $f8, $f30                  \r\n"
+        "swc1 $f24, 0(%[dst0])                  \r\n"
+        "swc1 $f26, 0(%[dst1])                  \r\n"
+        "swc1 $f28, 0(%[dst2])                  \r\n"
+        "swc1 $f30, 0(%[dst3])                  \r\n"
+        ::[dst0]"r"(dst),               [dst1]"r"(dst+dstStride),
+          [dst2]"r"(dst+2*dstStride),   [dst3]"r"(dst+3*dstStride),
+          [srcB]"r"(src-2*srcStride),   [srcA]"r"(src-srcStride),
+          [src0]"r"(src),               [src1]"r"(src+srcStride),
+          [src2]"r"(src+2*srcStride),   [src3]"r"(src+3*srcStride),
+          [src4]"r"(src+4*srcStride),   [src5]"r"(src+5*srcStride),
+          [src6]"r"(src+6*srcStride),   [ff_pw_20]"f"(ff_pw_20),
+          [ff_pw_5]"f"(ff_pw_5),        [ff_pw_16]"f"(ff_pw_16)
+        : "$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16","$f18",
+          "$f20","$f22","$f24","$f26","$f28","$f30"
+    );
+}
+
+static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "gsldlc1 $f2, 7(%[srcB])                \r\n"
+        "gsldrc1 $f2, 0(%[srcB])                \r\n"
+        "gsldlc1 $f4, 7(%[srcA])                \r\n"
+        "gsldrc1 $f4, 0(%[srcA])                \r\n"
+        "gsldlc1 $f6, 7(%[src0])                \r\n"
+        "gsldrc1 $f6, 0(%[src0])                \r\n"
+        "gsldlc1 $f8, 7(%[src1])                \r\n"
+        "gsldrc1 $f8, 0(%[src1])                \r\n"
+        "gsldlc1 $f10, 7(%[src2])               \r\n"
+        "gsldrc1 $f10, 0(%[src2])               \r\n"
+        "gsldlc1 $f12, 7(%[src3])               \r\n"
+        "gsldrc1 $f12, 0(%[src3])               \r\n"
+        "gsldlc1 $f14, 7(%[src4])               \r\n"
+        "gsldrc1 $f14, 0(%[src4])               \r\n"
+        "gsldlc1 $f16, 7(%[src5])               \r\n"
+        "gsldrc1 $f16, 0(%[src5])               \r\n"
+        "gsldlc1 $f18, 7(%[src6])               \r\n"
+        "gsldrc1 $f18, 0(%[src6])               \r\n"
+        "gsldlc1 $f20, 7(%[src7])               \r\n"
+        "gsldrc1 $f20, 0(%[src7])               \r\n"
+        "gsldlc1 $f22, 7(%[src8])               \r\n"
+        "gsldrc1 $f22, 0(%[src8])               \r\n"
+        "gsldlc1 $f24, 7(%[src9])               \r\n"
+        "gsldrc1 $f24, 0(%[src9])               \r\n"
+        "gsldlc1 $f26, 7(%[src10])              \r\n"
+        "gsldrc1 $f26, 0(%[src10])              \r\n"
+        "punpcklbh $f1, $f2, $f0                \r\n"
+        "punpckhbh $f2, $f2, $f0                \r\n"
+        "punpcklbh $f3, $f4, $f0                \r\n"
+        "punpckhbh $f4, $f4, $f0                \r\n"
+        "punpcklbh $f5, $f6, $f0                \r\n"
+        "punpckhbh $f6, $f6, $f0                \r\n"
+        "punpcklbh $f7, $f8, $f0                \r\n"
+        "punpckhbh $f8, $f8, $f0                \r\n"
+        "punpcklbh $f9, $f10, $f0               \r\n"
+        "punpckhbh $f10, $f10, $f0              \r\n"
+        "punpcklbh $f11, $f12, $f0              \r\n"
+        "punpckhbh $f12, $f12, $f0              \r\n"
+        "punpcklbh $f13, $f14, $f0              \r\n"
+        "punpckhbh $f14, $f14, $f0              \r\n"
+        "punpcklbh $f15, $f16, $f0              \r\n"
+        "punpckhbh $f16, $f16, $f0              \r\n"
+        "punpcklbh $f17, $f18, $f0              \r\n"
+        "punpckhbh $f18, $f18, $f0              \r\n"
+        "punpcklbh $f19, $f20, $f0              \r\n"
+        "punpckhbh $f20, $f20, $f0              \r\n"
+        "punpcklbh $f21, $f22, $f0              \r\n"
+        "punpckhbh $f22, $f22, $f0              \r\n"
+        "punpcklbh $f23, $f24, $f0              \r\n"
+        "punpckhbh $f24, $f24, $f0              \r\n"
+        "punpcklbh $f25, $f26, $f0              \r\n"
+        "punpckhbh $f26, $f26, $f0              \r\n"
+        "paddsh $f27, $f5, $f7                  \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f6, $f8                  \r\n"//src0+src1
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f3                 \r\n"
+        "psubsh $f28, $f28, $f4                 \r\n"
+        "psubsh $f27, $f27, $f9                 \r\n"
+        "psubsh $f28, $f28, $f10                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f1                 \r\n"
+        "paddsh $f28, $f28, $f2                 \r\n"
+        "paddsh $f27, $f27, $f11                \r\n"
+        "paddsh $f28, $f28, $f12                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f2, $f27, $f28              \r\n"
+        "ldc1 $f28, 0(%[dst0])                  \r\n"
+        "pavgb $f2, $f2, $f28                   \r\n"
+        "sdc1 $f2, 0(%[dst0])                   \r\n"
+        "paddsh $f27, $f7, $f9                  \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f8, $f10                 \r\n"//src1+src2
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f5                 \r\n"
+        "psubsh $f28, $f28, $f6                 \r\n"
+        "psubsh $f27, $f27, $f11                \r\n"
+        "psubsh $f28, $f28, $f12                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f3                 \r\n"
+        "paddsh $f28, $f28, $f4                 \r\n"
+        "paddsh $f27, $f27, $f13                \r\n"
+        "paddsh $f28, $f28, $f14                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f4, $f27, $f28              \r\n"
+        "ldc1 $f28, 0(%[dst1])                  \r\n"
+        "pavgb $f4, $f4, $f28                   \r\n"
+        "sdc1 $f4, 0(%[dst1])                   \r\n"
+        "paddsh $f27, $f9, $f11                 \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f10, $f12                \r\n"//src2+src3
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f7                 \r\n"
+        "psubsh $f28, $f28, $f8                 \r\n"
+        "psubsh $f27, $f27, $f13                \r\n"
+        "psubsh $f28, $f28, $f14                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f5                 \r\n"
+        "paddsh $f28, $f28, $f6                 \r\n"
+        "paddsh $f27, $f27, $f15                \r\n"
+        "paddsh $f28, $f28, $f16                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f6, $f27, $f28              \r\n"
+        "ldc1 $f28, 0(%[dst2])                  \r\n"
+        "pavgb $f6, $f6, $f28                   \r\n"
+        "sdc1 $f6, 0(%[dst2])                   \r\n"
+        "paddsh $f27, $f11, $f13                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f12, $f14                \r\n"//src3+src4
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f9                 \r\n"
+        "psubsh $f28, $f28, $f10                \r\n"
+        "psubsh $f27, $f27, $f15                \r\n"
+        "psubsh $f28, $f28, $f16                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f7                 \r\n"
+        "paddsh $f28, $f28, $f8                 \r\n"
+        "paddsh $f27, $f27, $f17                \r\n"
+        "paddsh $f28, $f28, $f18                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f8, $f27, $f28              \r\n"
+        "ldc1 $f28, 0(%[dst3])                  \r\n"
+        "pavgb $f8, $f8, $f28                   \r\n"
+        "sdc1 $f8, 0(%[dst3])                   \r\n"
+        "paddsh $f27, $f13, $f15                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f14, $f16                \r\n"//src4+src5
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f11                \r\n"
+        "psubsh $f28, $f28, $f12                \r\n"
+        "psubsh $f27, $f27, $f17                \r\n"
+        "psubsh $f28, $f28, $f18                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f9                 \r\n"
+        "paddsh $f28, $f28, $f10                \r\n"
+        "paddsh $f27, $f27, $f19                \r\n"
+        "paddsh $f28, $f28, $f20                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f10, $f27, $f28             \r\n"
+        "ldc1 $f28, 0(%[dst4])                  \r\n"
+        "pavgb $f10, $f10, $f28                 \r\n"
+        "sdc1 $f10, 0(%[dst4])                  \r\n"
+        "paddsh $f27, $f15, $f17                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f16, $f18                \r\n"//src5+src6
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f13                \r\n"
+        "psubsh $f28, $f28, $f14                \r\n"
+        "psubsh $f27, $f27, $f19                \r\n"
+        "psubsh $f28, $f28, $f20                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f11                \r\n"
+        "paddsh $f28, $f28, $f12                \r\n"
+        "paddsh $f27, $f27, $f21                \r\n"
+        "paddsh $f28, $f28, $f22                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f12, $f27, $f28             \r\n"
+        "ldc1 $f28, 0(%[dst5])                  \r\n"
+        "pavgb $f12, $f12, $f28                 \r\n"
+        "sdc1 $f12, 0(%[dst5])                  \r\n"
+        "paddsh $f27, $f17, $f19                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f18, $f20                \r\n"//src6+src7
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f15                \r\n"
+        "psubsh $f28, $f28, $f16                \r\n"
+        "psubsh $f27, $f27, $f21                \r\n"
+        "psubsh $f28, $f28, $f22                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f13                \r\n"
+        "paddsh $f28, $f28, $f14                \r\n"
+        "paddsh $f27, $f27, $f23                \r\n"
+        "paddsh $f28, $f28, $f24                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f14, $f27, $f28             \r\n"
+        "ldc1 $f28, 0(%[dst6])                  \r\n"
+        "pavgb $f14, $f14, $f28                 \r\n"
+        "sdc1 $f14, 0(%[dst6])                  \r\n"
+        "paddsh $f27, $f19, $f21                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_4]          \r\n"
+        "paddsh $f28, $f20, $f22                \r\n"//src7+src8
+        "pmullh $f28, $f28, %[ff_pw_4]          \r\n"
+        "psubsh $f27, $f27, $f17                \r\n"
+        "psubsh $f28, $f28, $f18                \r\n"
+        "psubsh $f27, $f27, $f23                \r\n"
+        "psubsh $f28, $f28, $f24                \r\n"
+        "pmullh $f27, $f27, %[ff_pw_5]          \r\n"
+        "pmullh $f28, $f28, %[ff_pw_5]          \r\n"
+        "paddsh $f27, $f27, $f15                \r\n"
+        "paddsh $f28, $f28, $f16                \r\n"
+        "paddsh $f27, $f27, $f25                \r\n"
+        "paddsh $f28, $f28, $f26                \r\n"
+        "paddsh $f27, $f27, %[ff_pw_16]         \r\n"
+        "paddsh $f28, $f28, %[ff_pw_16]         \r\n"
+        "psrah $f27, $f27, %[ff_pw_5]           \r\n"
+        "psrah $f28, $f28, %[ff_pw_5]           \r\n"
+        "packushb $f27, $f27, $f0               \r\n"
+        "packushb $f28, $f28, $f0               \r\n"
+        "punpcklwd $f16, $f27, $f28             \r\n"
+        "ldc1 $f28, 0(%[dst7])                  \r\n"
+        "pavgb $f16, $f16, $f28                 \r\n"
+        "sdc1 $f16, 0(%[dst7])                  \r\n"
+        ::[dst0]"r"(dst),               [dst1]"r"(dst+dstStride),
+          [dst2]"r"(dst+2*dstStride),   [dst3]"r"(dst+3*dstStride),
+          [dst4]"r"(dst+4*dstStride),   [dst5]"r"(dst+5*dstStride),
+          [dst6]"r"(dst+6*dstStride),   [dst7]"r"(dst+7*dstStride),
+          [srcB]"r"(src-2*srcStride),   [srcA]"r"(src-srcStride),
+          [src0]"r"(src),               [src1]"r"(src+srcStride),
+          [src2]"r"(src+2*srcStride),   [src3]"r"(src+3*srcStride),
+          [src4]"r"(src+4*srcStride),   [src5]"r"(src+5*srcStride),
+          [src6]"r"(src+6*srcStride),   [src7]"r"(src+7*srcStride),
+          [src8]"r"(src+8*srcStride),   [src9]"r"(src+9*srcStride),
+          [src10]"r"(src+10*srcStride), [ff_pw_4]"f"(ff_pw_4),
+          [ff_pw_5]"f"(ff_pw_5),        [ff_pw_16]"f"(ff_pw_16)
+        : "$f0","$f1","$f2","$f3","$f4","$f5","$f6","$f7","$f8","$f9","$f10",
+          "$f11","$f12","$f13","$f14","$f15","$f16","$f17","$f18","$f19",
+          "$f20","$f21","$f22","$f23","$f24","$f25","$f26","$f27","$f28"
+    );
+}
+
+static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
+    avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+    src += 8*srcStride;
+    dst += 8*dstStride;
+    avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
+    avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+}
+
+static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    int i;
+    int16_t _tmp[36];
+    int16_t *tmp = _tmp;
+    src -= 2*srcStride;
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "dli $8, 9                              \r\n"
+        "1:                                     \r\n"
+        "gslwlc1 $f2, 1(%[src])                 \r\n"
+        "gslwrc1 $f2, -2(%[src])                \r\n"
+        "gslwlc1 $f4, 2(%[src])                 \r\n"
+        "gslwrc1 $f4, -1(%[src])                \r\n"
+        "gslwlc1 $f6, 3(%[src])                 \r\n"
+        "gslwrc1 $f6, 0(%[src])                 \r\n"
+        "gslwlc1 $f8, 4(%[src])                 \r\n"
+        "gslwrc1 $f8, 1(%[src])                 \r\n"
+        "gslwlc1 $f10, 5(%[src])                \r\n"
+        "gslwrc1 $f10, 2(%[src])                \r\n"
+        "gslwlc1 $f12, 6(%[src])                \r\n"
+        "gslwrc1 $f12, 3(%[src])                \r\n"
+        "punpcklbh $f2, $f2, $f0                \r\n"
+        "punpcklbh $f4, $f4, $f0                \r\n"
+        "punpcklbh $f6, $f6, $f0                \r\n"
+        "punpcklbh $f8, $f8, $f0                \r\n"
+        "punpcklbh $f10, $f10, $f0              \r\n"
+        "punpcklbh $f12, $f12, $f0              \r\n"
+        "paddsh $f14, $f6, $f8                  \r\n"
+        "paddsh $f16, $f4, $f10                 \r\n"
+        "paddsh $f18, $f2, $f12                 \r\n"
+        "pmullh $f14, $f14, %[ff_pw_20]         \r\n"
+        "pmullh $f16, $f16, %[ff_pw_5]          \r\n"
+        "psubsh $f14, $f14, $f16                \r\n"
+        "paddsh $f18, $f14, $f18                \r\n"
+        "sdc1 $f18, 0(%[tmp])                   \r\n"
+        "dadd %[tmp], %[tmp], %[tmpStride]      \r\n"
+        "dadd %[src], %[src], %[srcStride]      \r\n"
+        "daddi $8, $8, -1                       \r\n"
+        "bnez $8, 1b                            \r\n"
+        : [tmp]"+&r"(tmp),[src]"+&r"(src)
+        : [tmpStride]"r"(8),[srcStride]"r"(srcStride),
+          [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5)
+        : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16","$f18"
+    );
+
+    tmp -= 28;
+
+    for(i=0; i<4; i++) {
+        const int16_t tmpB= tmp[-8];
+        const int16_t tmpA= tmp[-4];
+        const int16_t tmp0= tmp[ 0];
+        const int16_t tmp1= tmp[ 4];
+        const int16_t tmp2= tmp[ 8];
+        const int16_t tmp3= tmp[12];
+        const int16_t tmp4= tmp[16];
+        const int16_t tmp5= tmp[20];
+        const int16_t tmp6= tmp[24];
+        op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
+        op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
+        op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
+        op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
+        dst++;
+        tmp++;
+    }
+}
+
+static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    int16_t _tmp[104];
+    int16_t *tmp = _tmp;
+    int i;
+    src -= 2*srcStride;
+
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "dli $8, 13                             \r\n"
+        "1:                                     \r\n"
+        "gsldlc1 $f2, 5(%[src])                 \r\n"
+        "gsldrc1 $f2, -2(%[src])                \r\n"
+        "gsldlc1 $f4, 6(%[src])                 \r\n"
+        "gsldrc1 $f4, -1(%[src])                \r\n"
+        "gsldlc1 $f6, 7(%[src])                 \r\n"
+        "gsldrc1 $f6, 0(%[src])                 \r\n"
+        "gsldlc1 $f8, 8(%[src])                 \r\n"
+        "gsldrc1 $f8, 1(%[src])                 \r\n"
+        "gsldlc1 $f10, 9(%[src])                \r\n"
+        "gsldrc1 $f10, 2(%[src])                \r\n"
+        "gsldlc1 $f12, 10(%[src])               \r\n"
+        "gsldrc1 $f12, 3(%[src])                \r\n"
+        "punpcklbh $f1, $f2, $f0                \r\n"
+        "punpcklbh $f3, $f4, $f0                \r\n"
+        "punpcklbh $f5, $f6, $f0                \r\n"
+        "punpcklbh $f7, $f8, $f0                \r\n"
+        "punpcklbh $f9, $f10, $f0               \r\n"
+        "punpcklbh $f11, $f12, $f0              \r\n"
+        "punpckhbh $f2, $f2, $f0                \r\n"
+        "punpckhbh $f4, $f4, $f0                \r\n"
+        "punpckhbh $f6, $f6, $f0                \r\n"
+        "punpckhbh $f8, $f8, $f0                \r\n"
+        "punpckhbh $f10, $f10, $f0              \r\n"
+        "punpckhbh $f12, $f12, $f0              \r\n"
+        "paddsh $f13, $f5, $f7                  \r\n"
+        "paddsh $f15, $f3, $f9                 \r\n"
+        "paddsh $f17, $f1, $f11                 \r\n"
+        "pmullh $f13, $f13, %[ff_pw_20]         \r\n"
+        "pmullh $f15, $f15, %[ff_pw_5]          \r\n"
+        "psubsh $f13, $f13, $f15                \r\n"
+        "paddsh $f17, $f13, $f17                \r\n"
+        "paddsh $f14, $f6, $f8                  \r\n"
+        "paddsh $f16, $f4, $f10                 \r\n"
+        "paddsh $f18, $f2, $f12                 \r\n"
+        "pmullh $f14, $f14, %[ff_pw_20]         \r\n"
+        "pmullh $f16, $f16, %[ff_pw_5]          \r\n"
+        "psubsh $f14, $f14, $f16                \r\n"
+        "paddsh $f18, $f14, $f18                \r\n"
+        "sdc1 $f17, 0(%[tmp])                   \r\n"
+        "sdc1 $f18, 8(%[tmp])                   \r\n"
+        "dadd %[tmp], %[tmp], %[tmpStride]      \r\n"
+        "dadd %[src], %[src], %[srcStride]      \r\n"
+        "daddi $8, $8, -1                       \r\n"
+        "bnez $8, 1b                            \r\n"
+        : [tmp]"+&r"(tmp),[src]"+&r"(src)
+        : [tmpStride]"r"(16),[srcStride]"r"(srcStride),
+          [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5)
+        : "$8","$f0","$f1","$f2","$f3","$f4","$f5","$f6","$f7","$f8","$f9",
+          "$f10","$f11","$f12","$f13","$f14","$f15","$f16","$f17","$f18"
+    );
+
+    tmp -= 88;
+
+    for(i=0; i<8; i++) {
+        const int tmpB= tmp[-16];
+        const int tmpA= tmp[ -8];
+        const int tmp0= tmp[  0];
+        const int tmp1= tmp[  8];
+        const int tmp2= tmp[ 16];
+        const int tmp3= tmp[ 24];
+        const int tmp4= tmp[ 32];
+        const int tmp5= tmp[ 40];
+        const int tmp6= tmp[ 48];
+        const int tmp7= tmp[ 56];
+        const int tmp8= tmp[ 64];
+        const int tmp9= tmp[ 72];
+        const int tmp10=tmp[ 80];
+        op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
+        op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
+        op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
+        op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
+        op2_put(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));
+        op2_put(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));
+        op2_put(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));
+        op2_put(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));
+        dst++;
+        tmp++;
+    }
+}
+
+static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    put_h264_qpel8_hv_lowpass_mmi(dst, src, dstStride, srcStride);
+    put_h264_qpel8_hv_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+    src += 8*srcStride;
+    dst += 8*dstStride;
+    put_h264_qpel8_hv_lowpass_mmi(dst, src, dstStride, srcStride);
+    put_h264_qpel8_hv_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+}
+
+static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    int i;
+    int16_t _tmp[36];
+    int16_t *tmp = _tmp;
+    src -= 2*srcStride;
+
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "dli $8, 9                              \r\n"
+        "1:                                     \r\n"
+        "gslwlc1 $f2, 1(%[src])                 \r\n"
+        "gslwrc1 $f2, -2(%[src])                \r\n"
+        "gslwlc1 $f4, 2(%[src])                 \r\n"
+        "gslwrc1 $f4, -1(%[src])                \r\n"
+        "gslwlc1 $f6, 3(%[src])                 \r\n"
+        "gslwrc1 $f6, 0(%[src])                 \r\n"
+        "gslwlc1 $f8, 4(%[src])                 \r\n"
+        "gslwrc1 $f8, 1(%[src])                 \r\n"
+        "gslwlc1 $f10, 5(%[src])                \r\n"
+        "gslwrc1 $f10, 2(%[src])                \r\n"
+        "gslwlc1 $f12, 6(%[src])                \r\n"
+        "gslwrc1 $f12, 3(%[src])                \r\n"
+        "punpcklbh $f2, $f2, $f0                \r\n"
+        "punpcklbh $f4, $f4, $f0                \r\n"
+        "punpcklbh $f6, $f6, $f0                \r\n"
+        "punpcklbh $f8, $f8, $f0                \r\n"
+        "punpcklbh $f10, $f10, $f0              \r\n"
+        "punpcklbh $f12, $f12, $f0              \r\n"
+        "paddsh $f14, $f6, $f8                  \r\n"
+        "paddsh $f16, $f4, $f10                 \r\n"
+        "paddsh $f18, $f2, $f12                 \r\n"
+        "pmullh $f14, $f14, %[ff_pw_20]         \r\n"
+        "pmullh $f16, $f16, %[ff_pw_5]          \r\n"
+        "psubsh $f14, $f14, $f16                \r\n"
+        "paddsh $f18, $f14, $f18                \r\n"
+        "sdc1 $f18, 0(%[tmp])                   \r\n"
+        "dadd %[tmp], %[tmp], %[tmpStride]      \r\n"
+        "dadd %[src], %[src], %[srcStride]      \r\n"
+        "daddi $8, $8, -1                       \r\n"
+        "bnez $8, 1b                            \r\n"
+        : [tmp]"+&r"(tmp),[src]"+&r"(src)
+        : [tmpStride]"r"(8),[srcStride]"r"(srcStride),
+          [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5)
+        : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16","$f18"
+    );
+
+    tmp -= 28;
+
+    for(i=0; i<4; i++)
+    {
+        const int16_t tmpB= tmp[-8];
+        const int16_t tmpA= tmp[-4];
+        const int16_t tmp0= tmp[ 0];
+        const int16_t tmp1= tmp[ 4];
+        const int16_t tmp2= tmp[ 8];
+        const int16_t tmp3= tmp[12];
+        const int16_t tmp4= tmp[16];
+        const int16_t tmp5= tmp[20];
+        const int16_t tmp6= tmp[24];
+        op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
+        op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
+        op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
+        op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
+        dst++;
+        tmp++;
+    }
+}
+
+static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride)
+{
+    int16_t _tmp[104];
+    int16_t *tmp = _tmp;
+    int i;
+    src -= 2*srcStride;
+
+    __asm__ volatile (
+        "xor $f0, $f0, $f0                      \r\n"
+        "dli $8, 13                             \r\n"
+        "1:                                     \r\n"
+        "gsldlc1 $f2, 5(%[src])                 \r\n"
+        "gsldrc1 $f2, -2(%[src])                \r\n"
+        "gsldlc1 $f4, 6(%[src])                 \r\n"
+        "gsldrc1 $f4, -1(%[src])                \r\n"
+        "gsldlc1 $f6, 7(%[src])                 \r\n"
+        "gsldrc1 $f6, 0(%[src])                 \r\n"
+        "gsldlc1 $f8, 8(%[src])                 \r\n"
+        "gsldrc1 $f8, 1(%[src])                 \r\n"
+        "gsldlc1 $f10, 9(%[src])                \r\n"
+        "gsldrc1 $f10, 2(%[src])                \r\n"
+        "gsldlc1 $f12, 10(%[src])               \r\n"
+        "gsldrc1 $f12, 3(%[src])                \r\n"
+        "punpcklbh $f1, $f2, $f0                \r\n"
+        "punpcklbh $f3, $f4, $f0                \r\n"
+        "punpcklbh $f5, $f6, $f0                \r\n"
+        "punpcklbh $f7, $f8, $f0                \r\n"
+        "punpcklbh $f9, $f10, $f0               \r\n"
+        "punpcklbh $f11, $f12, $f0              \r\n"
+        "punpckhbh $f2, $f2, $f0                \r\n"
+        "punpckhbh $f4, $f4, $f0                \r\n"
+        "punpckhbh $f6, $f6, $f0                \r\n"
+        "punpckhbh $f8, $f8, $f0                \r\n"
+        "punpckhbh $f10, $f10, $f0              \r\n"
+        "punpckhbh $f12, $f12, $f0              \r\n"
+        "paddsh $f13, $f5, $f7                  \r\n"
+        "paddsh $f15, $f3, $f9                 \r\n"
+        "paddsh $f17, $f1, $f11                 \r\n"
+        "pmullh $f13, $f13, %[ff_pw_20]         \r\n"
+        "pmullh $f15, $f15, %[ff_pw_5]          \r\n"
+        "psubsh $f13, $f13, $f15                \r\n"
+        "paddsh $f17, $f13, $f17                \r\n"
+        "paddsh $f14, $f6, $f8                  \r\n"
+        "paddsh $f16, $f4, $f10                 \r\n"
+        "paddsh $f18, $f2, $f12                 \r\n"
+        "pmullh $f14, $f14, %[ff_pw_20]         \r\n"
+        "pmullh $f16, $f16, %[ff_pw_5]          \r\n"
+        "psubsh $f14, $f14, $f16                \r\n"
+        "paddsh $f18, $f14, $f18                \r\n"
+
+        "sdc1 $f17, 0(%[tmp])                   \r\n"
+        "sdc1 $f18, 8(%[tmp])                   \r\n"
+        "dadd %[tmp], %[tmp], %[tmpStride]      \r\n"
+        "dadd %[src], %[src], %[srcStride]      \r\n"
+        "daddi $8, $8, -1                       \r\n"
+        "bnez $8, 1b                            \r\n"
+        : [tmp]"+&r"(tmp),[src]"+&r"(src)
+        : [tmpStride]"r"(16),[srcStride]"r"(srcStride),
+          [ff_pw_20]"f"(ff_pw_20),[ff_pw_5]"f"(ff_pw_5)
+        : "$8","$f0","$f1","$f2","$f3","$f4","$f5","$f6","$f7","$f8","$f9",
+          "$f10","$f11","$f12","$f13","$f14","$f15","$f16","$f17","$f18"
+    );
+
+    tmp -= 88;
+
+    for(i=0; i<8; i++) {
+        const int tmpB= tmp[-16];
+        const int tmpA= tmp[ -8];
+        const int tmp0= tmp[  0];
+        const int tmp1= tmp[  8];
+        const int tmp2= tmp[ 16];
+        const int tmp3= tmp[ 24];
+        const int tmp4= tmp[ 32];
+        const int tmp5= tmp[ 40];
+        const int tmp6= tmp[ 48];
+        const int tmp7= tmp[ 56];
+        const int tmp8= tmp[ 64];
+        const int tmp9= tmp[ 72];
+        const int tmp10=tmp[ 80];
+        op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
+        op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
+        op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
+        op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
+        op2_avg(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));
+        op2_avg(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));
+        op2_avg(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));
+        op2_avg(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));
+        dst++;
+        tmp++;
+    }
+}
+
+static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
+        int dstStride, int srcStride){
+    avg_h264_qpel8_hv_lowpass_mmi(dst, src, dstStride, srcStride);
+    avg_h264_qpel8_hv_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+    src += 8*srcStride;
+    dst += 8*dstStride;
+    avg_h264_qpel8_hv_lowpass_mmi(dst, src, dstStride, srcStride);
+    avg_h264_qpel8_hv_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
+}
+
+//DEF_H264_MC_MMI(put_, 4)
+void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    put_pixels4_mmi(dst, src, stride, 4);
+}
+
+void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[16];
+    put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
+    put_pixels4_l2_mmi(dst, src, half, stride, stride, 4, 4);
+}
+
+void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    put_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[16];
+    put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
+    put_pixels4_l2_mmi(dst, src+1, half, stride, stride, 4, 4);
+}
+
+void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t half[16];
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
+    put_pixels4_l2_mmi(dst, full_mid, half, stride, 4, 4, 4);
+}
+
+void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
+}
+
+void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t half[16];
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
+    put_pixels4_l2_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
+}
+
+void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfH[16];
+    uint8_t halfV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    put_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
+}
+
+void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfH[16];
+    uint8_t halfV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
+    copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    put_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
+}
+
+void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfH[16];
+    uint8_t halfV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    put_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
+}
+
+void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfH[16];
+    uint8_t halfV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
+    copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    put_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
+}
+
+void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    put_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[16];
+    uint8_t halfHV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
+    put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
+    put_pixels4_l2_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
+}
+
+void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[16];
+    uint8_t halfHV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
+    put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
+    put_pixels4_l2_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
+}
+
+void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfV[16];
+    uint8_t halfHV[16];
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
+    put_pixels4_l2_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
+}
+
+void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfV[16];
+    uint8_t halfHV[16];
+    copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
+    put_pixels4_l2_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
+}
+
+//DEF_H264_MC_MMI(avg_, 4)
+void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    avg_pixels4_mmi(dst, src, stride, 4);
+}
+
+void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[16];
+    put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
+    avg_pixels4_l2_mmi(dst, src, half, stride, stride, 4, 4);
+}
+
+void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    avg_h264_qpel4_h_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[16];
+    put_h264_qpel4_h_lowpass_mmi(half, src, 4, stride);
+    avg_pixels4_l2_mmi(dst, src+1, half, stride, stride, 4, 4);
+}
+
+void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t half[16];
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
+    avg_pixels4_l2_mmi(dst, full_mid, half, stride, 4, 4, 4);
+}
+
+void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    avg_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
+}
+
+void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t half[16];
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
+    avg_pixels4_l2_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
+}
+
+void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfH[16];
+    uint8_t halfV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    avg_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
+}
+
+void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfH[16];
+    uint8_t halfV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
+    copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    avg_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
+}
+
+void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfH[16];
+    uint8_t halfV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    avg_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
+}
+
+void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfH[16];
+    uint8_t halfV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
+    copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    avg_pixels4_l2_mmi(dst, halfH, halfV, stride, 4, 4, 4);
+}
+
+void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    avg_h264_qpel4_hv_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[16];
+    uint8_t halfHV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src, 4, stride);
+    put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
+    avg_pixels4_l2_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
+}
+
+void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[16];
+    uint8_t halfHV[16];
+    put_h264_qpel4_h_lowpass_mmi(halfH, src + stride, 4, stride);
+    put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
+    avg_pixels4_l2_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
+}
+
+void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfV[16];
+    uint8_t halfHV[16];
+    copy_block4_mmi(full, src - stride*2, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
+    avg_pixels4_l2_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
+}
+
+void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[36];
+    uint8_t * const full_mid= full + 8;
+    uint8_t halfV[16];
+    uint8_t halfHV[16];
+    copy_block4_mmi(full, src - stride*2 + 1, 4,  stride, 9);
+    put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
+    put_h264_qpel4_hv_lowpass_mmi(halfHV, src, 4, stride);
+    avg_pixels4_l2_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
+}
+
+//DEF_H264_MC_MMI(put_, 8)
+void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    put_pixels8_mmi(dst, src, stride, 8);
+}
+
+void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[64];
+    put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
+    put_pixels8_l2_mmi(dst, src, half, stride, stride, 8, 8);
+}
+
+void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    put_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[64];
+    put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
+    put_pixels8_l2_mmi(dst, src+1, half, stride, stride, 8, 8);
+}
+
+void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t half[64];
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
+    put_pixels8_l2_mmi(dst, full_mid, half, stride, 8, 8, 8);
+}
+
+void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
+}
+
+void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t half[64];
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
+    put_pixels8_l2_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
+}
+
+void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfH[64];
+    uint8_t halfV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    put_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
+}
+
+void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfH[64];
+    uint8_t halfV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
+    copy_block8_mmi(full, src - stride*2 + 1, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    put_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
+}
+
+void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfH[64];
+    uint8_t halfV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    put_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
+}
+
+void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfH[64];
+    uint8_t halfV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
+    copy_block8_mmi(full, src - stride*2 + 1, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    put_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
+}
+
+void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    put_h264_qpel8_hv_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[64];
+    uint8_t halfHV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
+    put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
+    put_pixels8_l2_mmi(dst, halfH, halfHV, stride, 8, 8, 8);
+}
+
+void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[64];
+    uint8_t halfHV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
+    put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
+    put_pixels8_l2_mmi(dst, halfH, halfHV, stride, 8, 8, 8);
+}
+
+void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
+    put_pixels8_l2_mmi(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+
+void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    copy_block8_mmi(full, src - stride*2 + 1, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
+    put_pixels8_l2_mmi(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+
+//DEF_H264_MC_MMI(avg_, 8)
+void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    avg_pixels8_mmi(dst, src, stride, 8);
+}
+
+void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[64];
+    put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
+    avg_pixels8_l2_mmi(dst, src, half, stride, stride, 8, 8);
+}
+
+void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    avg_h264_qpel8_h_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[64];
+    put_h264_qpel8_h_lowpass_mmi(half, src, 8, stride);
+    avg_pixels8_l2_mmi(dst, src+1, half, stride, stride, 8, 8);
+}
+
+void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t half[64];
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
+    avg_pixels8_l2_mmi(dst, full_mid, half, stride, 8, 8, 8);
+}
+
+void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    avg_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
+}
+
+void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t half[64];
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
+    avg_pixels8_l2_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
+}
+
+void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfH[64];
+    uint8_t halfV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    avg_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
+}
+
+void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfH[64];
+    uint8_t halfV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
+    copy_block8_mmi(full, src - stride*2 + 1, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    avg_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
+}
+
+void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfH[64];
+    uint8_t halfV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    avg_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
+}
+
+void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfH[64];
+    uint8_t halfV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
+    copy_block8_mmi(full, src - stride*2 + 1, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    avg_pixels8_l2_mmi(dst, halfH, halfV, stride, 8, 8, 8);
+}
+
+void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    avg_h264_qpel8_hv_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[64];
+    uint8_t halfHV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src, 8, stride);
+    put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
+    avg_pixels8_l2_mmi(dst, halfH, halfHV, stride, 8, 8, 8);
+}
+
+void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[64];
+    uint8_t halfHV[64];
+    put_h264_qpel8_h_lowpass_mmi(halfH, src + stride, 8, stride);
+    put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
+    avg_pixels8_l2_mmi(dst, halfH, halfHV, stride, 8, 8, 8);
+}
+
+void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    copy_block8_mmi(full, src - stride*2, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
+    avg_pixels8_l2_mmi(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+
+void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[104];
+    uint8_t * const full_mid= full + 16;
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    copy_block8_mmi(full, src - stride*2 + 1, 8,  stride, 13);
+    put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
+    put_h264_qpel8_hv_lowpass_mmi(halfHV, src, 8, stride);
+    avg_pixels8_l2_mmi(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+
+//DEF_H264_MC_MMI(put_, 16)
+void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    put_pixels16_mmi(dst, src, stride, 16);
+}
+
+void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[256];
+    put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
+    put_pixels16_l2_mmi(dst, src, half, stride, stride, 16, 16);
+}
+
+void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    put_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[256];
+    put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
+    put_pixels16_l2_mmi(dst, src+1, half, stride, stride, 16, 16);
+}
+
+void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t half[256];
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
+    put_pixels16_l2_mmi(dst, full_mid, half, stride, 16, 16, 16);
+}
+
+void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
+}
+
+void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t half[256];
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
+    put_pixels16_l2_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
+}
+
+void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfH[256];
+    uint8_t halfV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    put_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
+}
+
+void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfH[256];
+    uint8_t halfV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
+    copy_block16_mmi(full, src - stride*2 + 1, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    put_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
+}
+
+void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfH[256];
+    uint8_t halfV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    put_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
+}
+
+void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfH[256];
+    uint8_t halfV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
+    copy_block16_mmi(full, src - stride*2 + 1, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    put_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
+}
+
+void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    put_h264_qpel16_hv_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[256];
+    uint8_t halfHV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
+    put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
+    put_pixels16_l2_mmi(dst, halfH, halfHV, stride, 16, 16, 16);
+}
+
+void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[256];
+    uint8_t halfHV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
+    put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
+    put_pixels16_l2_mmi(dst, halfH, halfHV, stride, 16, 16, 16);
+}
+
+void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfV[256];
+    uint8_t halfHV[256];
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
+    put_pixels16_l2_mmi(dst, halfV, halfHV, stride, 16, 16, 16);
+}
+
+void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfV[256];
+    uint8_t halfHV[256];
+    copy_block16_mmi(full, src - stride*2 + 1, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
+    put_pixels16_l2_mmi(dst, halfV, halfHV, stride, 16, 16, 16);
+}
+
+//DEF_H264_MC_MMI(avg_, 16)
+void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    avg_pixels16_mmi(dst, src, stride, 16);
+}
+
+void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[256];
+    put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
+    avg_pixels16_l2_mmi(dst, src, half, stride, stride, 16, 16);
+}
+
+void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    avg_h264_qpel16_h_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t half[256];
+    put_h264_qpel16_h_lowpass_mmi(half, src, 16, stride);
+    avg_pixels16_l2_mmi(dst, src+1, half, stride, stride, 16, 16);
+}
+
+void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t half[256];
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
+    avg_pixels16_l2_mmi(dst, full_mid, half, stride, 16, 16, 16);
+}
+
+void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    avg_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
+}
+
+void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t half[256];
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
+    avg_pixels16_l2_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
+}
+
+void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfH[256];
+    uint8_t halfV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    avg_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
+}
+
+void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfH[256];
+    uint8_t halfV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
+    copy_block16_mmi(full, src - stride*2 + 1, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    avg_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
+}
+
+void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfH[256];
+    uint8_t halfV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    avg_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
+}
+
+void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfH[256];
+    uint8_t halfV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
+    copy_block16_mmi(full, src - stride*2 + 1, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    avg_pixels16_l2_mmi(dst, halfH, halfV, stride, 16, 16, 16);
+}
+
+void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    avg_h264_qpel16_hv_lowpass_mmi(dst, src, stride, stride);
+}
+
+void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[256];
+    uint8_t halfHV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src, 16, stride);
+    put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
+    avg_pixels16_l2_mmi(dst, halfH, halfHV, stride, 16, 16, 16);
+}
+
+void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t halfH[256];
+    uint8_t halfHV[256];
+    put_h264_qpel16_h_lowpass_mmi(halfH, src + stride, 16, stride);
+    put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
+    avg_pixels16_l2_mmi(dst, halfH, halfHV, stride, 16, 16, 16);
+}
+
+void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfV[256];
+    uint8_t halfHV[256];
+    copy_block16_mmi(full, src - stride*2, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
+    avg_pixels16_l2_mmi(dst, halfV, halfHV, stride, 16, 16, 16);
+}
+
+void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
+        ptrdiff_t stride)
+{
+    uint8_t full[336];
+    uint8_t * const full_mid= full + 32;
+    uint8_t halfV[256];
+    uint8_t halfHV[256];
+    copy_block16_mmi(full, src - stride*2 + 1, 16,  stride, 21);
+    put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
+    put_h264_qpel16_hv_lowpass_mmi(halfHV, src, 16, stride);
+    avg_pixels16_l2_mmi(dst, halfV, halfHV, stride, 16, 16, 16);
+}
+
+#undef op2_avg
+#undef op2_put
diff --git a/libavcodec/mips/h264qpel_msa.c b/libavcodec/mips/h264qpel_msa.c
new file mode 100644
index 00000000..c38f1f7a
--- /dev/null
+++ b/libavcodec/mips/h264qpel_msa.c
@@ -0,0 +1,3600 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "h264dsp_mips.h"
+
+#define AVC_CALC_DPADD_H_6PIX_2COEFF_SH(in0, in1, in2, in3, in4, in5)    \
+( {                                                                      \
+    v4i32 tmp0_m, tmp1_m;                                                \
+    v8i16 out0_m, out1_m, out2_m, out3_m;                                \
+    v8i16 minus5h_m = __msa_ldi_h(-5);                                   \
+    v8i16 plus20h_m = __msa_ldi_h(20);                                   \
+                                                                         \
+    ILVRL_H2_SW(in5, in0, tmp0_m, tmp1_m);                               \
+                                                                         \
+    tmp0_m = __msa_hadd_s_w((v8i16) tmp0_m, (v8i16) tmp0_m);             \
+    tmp1_m = __msa_hadd_s_w((v8i16) tmp1_m, (v8i16) tmp1_m);             \
+                                                                         \
+    ILVRL_H2_SH(in1, in4, out0_m, out1_m);                               \
+    DPADD_SH2_SW(out0_m, out1_m, minus5h_m, minus5h_m, tmp0_m, tmp1_m);  \
+    ILVRL_H2_SH(in2, in3, out2_m, out3_m);                               \
+    DPADD_SH2_SW(out2_m, out3_m, plus20h_m, plus20h_m, tmp0_m, tmp1_m);  \
+                                                                         \
+    SRARI_W2_SW(tmp0_m, tmp1_m, 10);                                     \
+    SAT_SW2_SW(tmp0_m, tmp1_m, 7);                                       \
+    out0_m = __msa_pckev_h((v8i16) tmp1_m, (v8i16) tmp0_m);              \
+                                                                         \
+    out0_m;                                                              \
+} )
+
+#define AVC_HORZ_FILTER_SH(in, mask0, mask1, mask2)     \
+( {                                                     \
+    v8i16 out0_m, out1_m;                               \
+    v16i8 tmp0_m, tmp1_m;                               \
+    v16i8 minus5b = __msa_ldi_b(-5);                    \
+    v16i8 plus20b = __msa_ldi_b(20);                    \
+                                                        \
+    tmp0_m = __msa_vshf_b((v16i8) mask0, in, in);       \
+    out0_m = __msa_hadd_s_h(tmp0_m, tmp0_m);            \
+                                                        \
+    tmp0_m = __msa_vshf_b((v16i8) mask1, in, in);       \
+    out0_m = __msa_dpadd_s_h(out0_m, minus5b, tmp0_m);  \
+                                                        \
+    tmp1_m = __msa_vshf_b((v16i8) (mask2), in, in);     \
+    out1_m = __msa_dpadd_s_h(out0_m, plus20b, tmp1_m);  \
+                                                        \
+    out1_m;                                             \
+} )
+
+static const uint8_t luma_mask_arr[16 * 8] = {
+    /* 8 width cases */
+    0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12,
+    1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 9, 7, 10, 8, 11,
+    2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
+
+    /* 4 width cases */
+    0, 5, 1, 6, 2, 7, 3, 8, 16, 21, 17, 22, 18, 23, 19, 24,
+    1, 4, 2, 5, 3, 6, 4, 7, 17, 20, 18, 21, 19, 22, 20, 23,
+    2, 3, 3, 4, 4, 5, 5, 6, 18, 19, 19, 20, 20, 21, 21, 22,
+
+    2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 25,
+    3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26
+};
+
+#define AVC_CALC_DPADD_B_6PIX_2COEFF_SH(vec0, vec1, vec2, vec3, vec4, vec5,  \
+                                        out1, out2)                          \
+{                                                                            \
+    v16i8 tmp0_m, tmp1_m;                                                    \
+    v16i8 minus5b_m = __msa_ldi_b(-5);                                       \
+    v16i8 plus20b_m = __msa_ldi_b(20);                                       \
+                                                                             \
+    ILVRL_B2_SB(vec5, vec0, tmp0_m, tmp1_m);                                 \
+    HADD_SB2_SH(tmp0_m, tmp1_m, out1, out2);                                 \
+    ILVRL_B2_SB(vec4, vec1, tmp0_m, tmp1_m);                                 \
+    DPADD_SB2_SH(tmp0_m, tmp1_m, minus5b_m, minus5b_m, out1, out2);          \
+    ILVRL_B2_SB(vec3, vec2, tmp0_m, tmp1_m);                                 \
+    DPADD_SB2_SH(tmp0_m, tmp1_m, plus20b_m, plus20b_m, out1, out2);          \
+}
+
+#define AVC_CALC_DPADD_B_6PIX_2COEFF_R_SH(vec0, vec1, vec2, vec3, vec4, vec5)  \
+( {                                                                            \
+    v8i16 tmp1_m;                                                              \
+    v16i8 tmp0_m, tmp2_m;                                                      \
+    v16i8 minus5b_m = __msa_ldi_b(-5);                                         \
+    v16i8 plus20b_m = __msa_ldi_b(20);                                         \
+                                                                               \
+    tmp1_m = (v8i16) __msa_ilvr_b((v16i8) vec5, (v16i8) vec0);                 \
+    tmp1_m = __msa_hadd_s_h((v16i8) tmp1_m, (v16i8) tmp1_m);                   \
+                                                                               \
+    ILVR_B2_SB(vec4, vec1, vec3, vec2, tmp0_m, tmp2_m);                        \
+    DPADD_SB2_SH(tmp0_m, tmp2_m, minus5b_m, plus20b_m, tmp1_m, tmp1_m);        \
+                                                                               \
+    tmp1_m;                                                                    \
+} )
+
+#define AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(vec0, vec1, vec2, vec3, vec4, vec5)  \
+( {                                                                            \
+    v4i32 tmp1_m;                                                              \
+    v8i16 tmp2_m, tmp3_m;                                                      \
+    v8i16 minus5h_m = __msa_ldi_h(-5);                                         \
+    v8i16 plus20h_m = __msa_ldi_h(20);                                         \
+                                                                               \
+    tmp1_m = (v4i32) __msa_ilvr_h((v8i16) vec5, (v8i16) vec0);                 \
+    tmp1_m = __msa_hadd_s_w((v8i16) tmp1_m, (v8i16) tmp1_m);                   \
+                                                                               \
+    ILVR_H2_SH(vec1, vec4, vec2, vec3, tmp2_m, tmp3_m);                        \
+    DPADD_SH2_SW(tmp2_m, tmp3_m, minus5h_m, plus20h_m, tmp1_m, tmp1_m);        \
+                                                                               \
+    tmp1_m = __msa_srari_w(tmp1_m, 10);                                        \
+    tmp1_m = __msa_sat_s_w(tmp1_m, 7);                                         \
+                                                                               \
+    tmp2_m = __msa_pckev_h((v8i16) tmp1_m, (v8i16) tmp1_m);                    \
+                                                                               \
+    tmp2_m;                                                                    \
+} )
+
+#define AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,              \
+                                                    mask0, mask1, mask2)     \
+( {                                                                          \
+    v8i16 hz_out_m;                                                          \
+    v16i8 vec0_m, vec1_m, vec2_m;                                            \
+    v16i8 minus5b_m = __msa_ldi_b(-5);                                       \
+    v16i8 plus20b_m = __msa_ldi_b(20);                                       \
+                                                                             \
+    vec0_m = __msa_vshf_b((v16i8) mask0, (v16i8) src1, (v16i8) src0);        \
+    hz_out_m = __msa_hadd_s_h(vec0_m, vec0_m);                               \
+                                                                             \
+    VSHF_B2_SB(src0, src1, src0, src1, mask1, mask2, vec1_m, vec2_m);        \
+    DPADD_SB2_SH(vec1_m, vec2_m, minus5b_m, plus20b_m, hz_out_m, hz_out_m);  \
+                                                                             \
+    hz_out_m;                                                                \
+} )
+
+static void avc_luma_hz_4w_msa(const uint8_t *src, int32_t src_stride,
+                               uint8_t *dst, int32_t dst_stride,
+                               int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3;
+    v8i16 res0, res1;
+    v16u8 out;
+    v16i8 mask0, mask1, mask2;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+        HADD_SB2_SH(vec0, vec1, res0, res1);
+        VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2, vec3);
+        DPADD_SB2_SH(vec2, vec3, minus5b, minus5b, res0, res1);
+        VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4, vec5);
+        DPADD_SB2_SH(vec4, vec5, plus20b, plus20b, res0, res1);
+        SRARI_H2_SH(res0, res1, 5);
+        SAT_SH2_SH(res0, res1, 7);
+        out = PCKEV_XORI128_UB(res0, res1);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avc_luma_hz_8w_msa(const uint8_t *src, int32_t src_stride,
+                               uint8_t *dst, int32_t dst_stride,
+                               int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3;
+    v8i16 res0, res1, res2, res3;
+    v16i8 mask0, mask1, mask2;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+    v16u8 out0, out1;
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        HADD_SB4_SH(vec0, vec1, vec2, vec3, res0, res1, res2, res3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec7);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
+                     res0, res1, res2, res3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec8, vec9);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec10, vec11);
+        DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res0, res1, res2, res3);
+        SRARI_H4_SH(res0, res1, res2, res3, 5);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        out0 = PCKEV_XORI128_UB(res0, res1);
+        out1 = PCKEV_XORI128_UB(res2, res3);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avc_luma_hz_16w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v16i8 mask0, mask1, mask2;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB2(src, 8, src0, src1);
+        src += src_stride;
+        LD_SB2(src, 8, src2, src3);
+        src += src_stride;
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec3);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec6, vec9);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec1, vec4);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec7, vec10);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec2, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res0, res1, res2, res3);
+
+        LD_SB2(src, 8, src4, src5);
+        src += src_stride;
+        LD_SB2(src, 8, src6, src7);
+        src += src_stride;
+
+        XORI_B4_128_SB(src4, src5, src6, src7);
+        VSHF_B2_SB(src4, src4, src5, src5, mask0, mask0, vec0, vec3);
+        VSHF_B2_SB(src6, src6, src7, src7, mask0, mask0, vec6, vec9);
+        VSHF_B2_SB(src4, src4, src5, src5, mask1, mask1, vec1, vec4);
+        VSHF_B2_SB(src6, src6, src7, src7, mask1, mask1, vec7, vec10);
+        VSHF_B2_SB(src4, src4, src5, src5, mask2, mask2, vec2, vec5);
+        VSHF_B2_SB(src6, src6, src7, src7, mask2, mask2, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res4, res5, res6, res7);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res4, res5, res6, res7);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res4, res5, res6, res7);
+        SRARI_H4_SH(res0, res1, res2, res3, 5);
+        SRARI_H4_SH(res4, res5, res6, res7, 5);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        SAT_SH4_SH(res4, res5, res6, res7, 7);
+        PCKEV_B4_SB(res1, res0, res3, res2, res5, res4, res7, res6,
+                    vec0, vec1, vec2, vec3);
+        XORI_B4_128_SB(vec0, vec1, vec2, vec3);
+
+        ST_SB4(vec0, vec1, vec2, vec3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avc_luma_hz_qrt_4w_msa(const uint8_t *src, int32_t src_stride,
+                                   uint8_t *dst, int32_t dst_stride,
+                                   int32_t height, uint8_t hor_offset)
+{
+    uint8_t slide;
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3;
+    v8i16 res0, res1;
+    v16i8 res, mask0, mask1, mask2;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+    slide = 2 + hor_offset;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+        HADD_SB2_SH(vec0, vec1, res0, res1);
+        VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2, vec3);
+        DPADD_SB2_SH(vec2, vec3, minus5b, minus5b, res0, res1);
+        VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4, vec5);
+        DPADD_SB2_SH(vec4, vec5, plus20b, plus20b, res0, res1);
+        SRARI_H2_SH(res0, res1, 5);
+        SAT_SH2_SH(res0, res1, 7);
+
+        res = __msa_pckev_b((v16i8) res1, (v16i8) res0);
+        src0 = __msa_sld_b(src0, src0, slide);
+        src1 = __msa_sld_b(src1, src1, slide);
+        src2 = __msa_sld_b(src2, src2, slide);
+        src3 = __msa_sld_b(src3, src3, slide);
+        src0 = (v16i8) __msa_insve_w((v4i32) src0, 1, (v4i32) src1);
+        src1 = (v16i8) __msa_insve_w((v4i32) src2, 1, (v4i32) src3);
+        src0 = (v16i8) __msa_insve_d((v2i64) src0, 1, (v2i64) src1);
+        res = __msa_aver_s_b(res, src0);
+        res = (v16i8) __msa_xori_b((v16u8) res, 128);
+
+        ST4x4_UB(res, res, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avc_luma_hz_qrt_8w_msa(const uint8_t *src, int32_t src_stride,
+                                   uint8_t *dst, int32_t dst_stride,
+                                   int32_t height, uint8_t hor_offset)
+{
+    uint8_t slide;
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3;
+    v16i8 tmp0, tmp1;
+    v8i16 res0, res1, res2, res3;
+    v16i8 mask0, mask1, mask2;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+    slide = 2 + hor_offset;
+
+    for (loop_cnt = height >> 2; loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        HADD_SB4_SH(vec0, vec1, vec2, vec3, res0, res1, res2, res3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec7);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
+                     res0, res1, res2, res3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec8, vec9);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec10, vec11);
+        DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res0, res1, res2, res3);
+
+        src0 = __msa_sld_b(src0, src0, slide);
+        src1 = __msa_sld_b(src1, src1, slide);
+        src2 = __msa_sld_b(src2, src2, slide);
+        src3 = __msa_sld_b(src3, src3, slide);
+
+        SRARI_H4_SH(res0, res1, res2, res3, 5);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        PCKEV_B2_SB(res1, res0, res3, res2, tmp0, tmp1);
+        PCKEV_D2_SB(src1, src0, src3, src2, src0, src1);
+
+        tmp0 = __msa_aver_s_b(tmp0, src0);
+        tmp1 = __msa_aver_s_b(tmp1, src1);
+
+        XORI_B2_128_SB(tmp0, tmp1);
+        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avc_luma_hz_qrt_16w_msa(const uint8_t *src, int32_t src_stride,
+                                    uint8_t *dst, int32_t dst_stride,
+                                    int32_t height, uint8_t hor_offset)
+{
+    uint32_t loop_cnt;
+    v16i8 dst0, dst1;
+    v16i8 src0, src1, src2, src3;
+    v16i8 mask0, mask1, mask2, vshf;
+    v8i16 res0, res1, res2, res3;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    if (hor_offset) {
+        vshf = LD_SB(&luma_mask_arr[16 + 96]);
+    } else {
+        vshf = LD_SB(&luma_mask_arr[96]);
+    }
+
+    for (loop_cnt = height >> 1; loop_cnt--;) {
+        LD_SB2(src, 8, src0, src1);
+        src += src_stride;
+        LD_SB2(src, 8, src2, src3);
+        src += src_stride;
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec3);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec6, vec9);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec1, vec4);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec7, vec10);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec2, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res0, res1, res2, res3);
+        VSHF_B2_SB(src0, src1, src2, src3, vshf, vshf, src0, src2);
+        SRARI_H4_SH(res0, res1, res2, res3, 5);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        PCKEV_B2_SB(res1, res0, res3, res2, dst0, dst1);
+
+        dst0 = __msa_aver_s_b(dst0, src0);
+        dst1 = __msa_aver_s_b(dst1, src2);
+
+        XORI_B2_128_SB(dst0, dst1);
+
+        ST_SB2(dst0, dst1, dst, dst_stride);
+        dst += (2 * dst_stride);
+    }
+}
+
+static void avc_luma_vt_4w_msa(const uint8_t *src, int32_t src_stride,
+                               uint8_t *dst, int32_t dst_stride,
+                               int32_t height)
+{
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src2110, src4332, src6554, src8776;
+    v16i8 filt0, filt1, filt2;
+    v8i16 out10, out32;
+    v16u8 out;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    XORI_B2_128_SB(src2110, src4332);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src5, src6, src7, src8);
+        src += (4 * src_stride);
+
+        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   src54_r, src65_r, src76_r, src87_r);
+        ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776);
+        XORI_B2_128_SB(src6554, src8776);
+        out10 = DPADD_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2);
+        out32 = DPADD_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2);
+        SRARI_H2_SH(out10, out32, 5);
+        SAT_SH2_SH(out10, out32, 7);
+        out = PCKEV_XORI128_UB(out10, out32);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+
+        dst += (4 * dst_stride);
+        src2110 = src6554;
+        src4332 = src8776;
+        src4 = src8;
+    }
+}
+
+static void avc_luma_vt_8w_msa(const uint8_t *src, int32_t src_stride,
+                               uint8_t *dst, int32_t dst_stride,
+                               int32_t height)
+{
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16i8 src0, src1, src2, src3, src4, src7, src8, src9, src10;
+    v16i8 src10_r, src32_r, src76_r, src98_r;
+    v16i8 src21_r, src43_r, src87_r, src109_r;
+    v8i16 out0_r, out1_r, out2_r, out3_r;
+    v16i8 filt0, filt1, filt2;
+    v16u8 out0, out1;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        ILVR_B4_SB(src7, src4, src8, src7, src9, src8, src10, src9,
+                   src76_r, src87_r, src98_r, src109_r);
+        out0_r = DPADD_SH3_SH(src10_r, src32_r, src76_r, filt0, filt1, filt2);
+        out1_r = DPADD_SH3_SH(src21_r, src43_r, src87_r, filt0, filt1, filt2);
+        out2_r = DPADD_SH3_SH(src32_r, src76_r, src98_r, filt0, filt1, filt2);
+        out3_r = DPADD_SH3_SH(src43_r, src87_r, src109_r, filt0, filt1, filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        out0 = PCKEV_XORI128_UB(out0_r, out1_r);
+        out1 = PCKEV_XORI128_UB(out2_r, out3_r);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src76_r;
+        src32_r = src98_r;
+        src21_r = src87_r;
+        src43_r = src109_r;
+        src4 = src10;
+    }
+}
+
+static void avc_luma_vt_16w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                int32_t height)
+{
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src10_l, src32_l, src54_l, src76_l, src21_l, src43_l;
+    v16i8 src65_l, src87_l;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+    v16u8 res0, res1, res2, res3;
+    v16i8 filt0, filt1, filt2;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_l, src21_l, src32_l, src43_l);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src5, src6, src7, src8);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src5, src6, src7, src8);
+        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   src54_r, src65_r, src76_r, src87_r);
+        ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   src54_l, src65_l, src76_l, src87_l);
+        out0_r = DPADD_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+        out1_r = DPADD_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+        out2_r = DPADD_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+        out3_r = DPADD_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+        out0_l = DPADD_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+        out1_l = DPADD_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+        out2_l = DPADD_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+        out3_l = DPADD_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 5);
+        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                    out3_r, res0, res1, res2, res3);
+        XORI_B4_128_UB(res0, res1, res2, res3);
+
+        ST_UB4(res0, res1, res2, res3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src10_l = src54_l;
+        src32_l = src76_l;
+        src21_l = src65_l;
+        src43_l = src87_l;
+        src4 = src8;
+    }
+}
+
+static void avc_luma_vt_qrt_4w_msa(const uint8_t *src, int32_t src_stride,
+                                   uint8_t *dst, int32_t dst_stride,
+                                   int32_t height, uint8_t ver_offset)
+{
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src2110, src4332, src6554, src8776;
+    v8i16 out10, out32;
+    v16i8 filt0, filt1, filt2;
+    v16u8 out;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    XORI_B2_128_SB(src2110, src4332);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src5, src6, src7, src8);
+        src += (4 * src_stride);
+
+        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   src54_r, src65_r, src76_r, src87_r);
+        ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776);
+        XORI_B2_128_SB(src6554, src8776);
+        out10 = DPADD_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2);
+        out32 = DPADD_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2);
+        SRARI_H2_SH(out10, out32, 5);
+        SAT_SH2_SH(out10, out32, 7);
+
+        out = PCKEV_XORI128_UB(out10, out32);
+
+        if (ver_offset) {
+            src32_r = (v16i8) __msa_insve_w((v4i32) src3, 1, (v4i32) src4);
+            src54_r = (v16i8) __msa_insve_w((v4i32) src5, 1, (v4i32) src6);
+        } else {
+            src32_r = (v16i8) __msa_insve_w((v4i32) src2, 1, (v4i32) src3);
+            src54_r = (v16i8) __msa_insve_w((v4i32) src4, 1, (v4i32) src5);
+        }
+
+        src32_r = (v16i8) __msa_insve_d((v2i64) src32_r, 1, (v2i64) src54_r);
+        out = __msa_aver_u_b(out, (v16u8) src32_r);
+
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+        src2110 = src6554;
+        src4332 = src8776;
+        src2 = src6;
+        src3 = src7;
+        src4 = src8;
+    }
+}
+
+static void avc_luma_vt_qrt_8w_msa(const uint8_t *src, int32_t src_stride,
+                                   uint8_t *dst, int32_t dst_stride,
+                                   int32_t height, uint8_t ver_offset)
+{
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16i8 src0, src1, src2, src3, src4, src7, src8, src9, src10;
+    v16i8 src10_r, src32_r, src76_r, src98_r;
+    v16i8 src21_r, src43_r, src87_r, src109_r;
+    v8i16 out0_r, out1_r, out2_r, out3_r;
+    v16i8 res0, res1;
+    v16i8 filt0, filt1, filt2;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        ILVR_B4_SB(src7, src4, src8, src7, src9, src8, src10, src9,
+                   src76_r, src87_r, src98_r, src109_r);
+        out0_r = DPADD_SH3_SH(src10_r, src32_r, src76_r, filt0, filt1, filt2);
+        out1_r = DPADD_SH3_SH(src21_r, src43_r, src87_r, filt0, filt1, filt2);
+        out2_r = DPADD_SH3_SH(src32_r, src76_r, src98_r, filt0, filt1, filt2);
+        out3_r = DPADD_SH3_SH(src43_r, src87_r, src109_r, filt0, filt1, filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        PCKEV_B2_SB(out1_r, out0_r, out3_r, out2_r, res0, res1);
+
+        if (ver_offset) {
+            PCKEV_D2_SB(src4, src3, src8, src7, src10_r, src32_r);
+        } else {
+            PCKEV_D2_SB(src3, src2, src7, src4, src10_r, src32_r);
+        }
+
+        res0 = __msa_aver_s_b(res0, (v16i8) src10_r);
+        res1 = __msa_aver_s_b(res1, (v16i8) src32_r);
+
+        XORI_B2_128_SB(res0, res1);
+        ST8x4_UB(res0, res1, dst, dst_stride);
+
+        dst += (4 * dst_stride);
+        src10_r = src76_r;
+        src32_r = src98_r;
+        src21_r = src87_r;
+        src43_r = src109_r;
+        src2 = src8;
+        src3 = src9;
+        src4 = src10;
+    }
+}
+
+static void avc_luma_vt_qrt_16w_msa(const uint8_t *src, int32_t src_stride,
+                                    uint8_t *dst, int32_t dst_stride,
+                                    int32_t height, uint8_t ver_offset)
+{
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src10_l, src32_l, src54_l, src76_l, src21_l, src43_l;
+    v16i8 src65_l, src87_l;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+    v16u8 res0, res1, res2, res3;
+    v16i8 filt0, filt1, filt2;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_l, src21_l, src32_l, src43_l);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src5, src6, src7, src8);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src5, src6, src7, src8);
+        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   src54_r, src65_r, src76_r, src87_r);
+        ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   src54_l, src65_l, src76_l, src87_l);
+        out0_r = DPADD_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+        out1_r = DPADD_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+        out2_r = DPADD_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+        out3_r = DPADD_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+        out0_l = DPADD_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+        out1_l = DPADD_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+        out2_l = DPADD_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+        out3_l = DPADD_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 5);
+        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                    out3_r, res0, res1, res2, res3);
+
+        if (ver_offset) {
+            res0 = (v16u8) __msa_aver_s_b((v16i8) res0, src3);
+            res1 = (v16u8) __msa_aver_s_b((v16i8) res1, src4);
+            res2 = (v16u8) __msa_aver_s_b((v16i8) res2, src5);
+            res3 = (v16u8) __msa_aver_s_b((v16i8) res3, src6);
+        } else {
+            res0 = (v16u8) __msa_aver_s_b((v16i8) res0, src2);
+            res1 = (v16u8) __msa_aver_s_b((v16i8) res1, src3);
+            res2 = (v16u8) __msa_aver_s_b((v16i8) res2, src4);
+            res3 = (v16u8) __msa_aver_s_b((v16i8) res3, src5);
+        }
+
+        XORI_B4_128_UB(res0, res1, res2, res3);
+        ST_UB4(res0, res1, res2, res3, dst, dst_stride);
+
+        dst += (4 * dst_stride);
+
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src10_l = src54_l;
+        src32_l = src76_l;
+        src21_l = src65_l;
+        src43_l = src87_l;
+        src2 = src6;
+        src3 = src7;
+        src4 = src8;
+    }
+}
+
+static void avc_luma_mid_4w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
+    v8i16 dst0, dst1, dst2, dst3;
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
+                                                          mask0, mask1, mask2);
+    hz_out2 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
+                                                          mask0, mask1, mask2);
+
+    PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
+
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, mask0, mask1, mask2);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+
+        hz_out5 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
+                                                              mask0, mask1,
+                                                              mask2);
+        hz_out7 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
+                                                              mask0, mask1,
+                                                              mask2);
+
+        PCKOD_D2_SH(hz_out5, hz_out5, hz_out7, hz_out7, hz_out6, hz_out8);
+
+        dst0 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out0, hz_out1, hz_out2,
+                                                 hz_out3, hz_out4, hz_out5);
+        dst1 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out1, hz_out2, hz_out3,
+                                                 hz_out4, hz_out5, hz_out6);
+        dst2 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out2, hz_out3, hz_out4,
+                                                 hz_out5, hz_out6, hz_out7);
+        dst3 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out3, hz_out4, hz_out5,
+                                                 hz_out6, hz_out7, hz_out8);
+
+        PCKEV_B2_SB(dst1, dst0, dst3, dst2, src0, src1);
+        XORI_B2_128_SB(src0, src1);
+
+        ST4x4_UB(src0, src1, 0, 2, 0, 2, dst, dst_stride);
+
+        dst += (4 * dst_stride);
+
+        hz_out0 = hz_out4;
+        hz_out1 = hz_out5;
+        hz_out2 = hz_out6;
+        hz_out3 = hz_out7;
+        hz_out4 = hz_out8;
+    }
+}
+
+static void avc_luma_mid_8w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
+    v8i16 dst0, dst1, dst2, dst3;
+    v16u8 out0, out1;
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, mask0, mask1, mask2);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        hz_out5 = AVC_HORZ_FILTER_SH(src0, mask0, mask1, mask2);
+        hz_out6 = AVC_HORZ_FILTER_SH(src1, mask0, mask1, mask2);
+        hz_out7 = AVC_HORZ_FILTER_SH(src2, mask0, mask1, mask2);
+        hz_out8 = AVC_HORZ_FILTER_SH(src3, mask0, mask1, mask2);
+        dst0 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out0, hz_out1, hz_out2,
+                                               hz_out3, hz_out4, hz_out5);
+        dst1 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out1, hz_out2, hz_out3,
+                                               hz_out4, hz_out5, hz_out6);
+        dst2 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out2, hz_out3, hz_out4,
+                                               hz_out5, hz_out6, hz_out7);
+        dst3 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out3, hz_out4, hz_out5,
+                                               hz_out6, hz_out7, hz_out8);
+        out0 = PCKEV_XORI128_UB(dst0, dst1);
+        out1 = PCKEV_XORI128_UB(dst2, dst3);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+
+        dst += (4 * dst_stride);
+        hz_out3 = hz_out7;
+        hz_out1 = hz_out5;
+        hz_out5 = hz_out4;
+        hz_out4 = hz_out8;
+        hz_out2 = hz_out6;
+        hz_out0 = hz_out5;
+    }
+}
+
+static void avc_luma_mid_16w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int32_t height)
+{
+    uint32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        avc_luma_mid_8w_msa(src, src_stride, dst, dst_stride, height);
+        src += 8;
+        dst += 8;
+    }
+}
+
+static void avc_luma_midh_qrt_4w_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     int32_t height, uint8_t horiz_offset)
+{
+    uint32_t row;
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3;
+    v4i32 hz_res0, hz_res1;
+    v8i16 dst0, dst1;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+    v8i16 zeros = { 0 };
+    v16u8 out;
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    for (row = (height >> 1); row--;) {
+        LD_SB2(src, src_stride, src5, src6);
+        src += (2 * src_stride);
+
+        XORI_B2_128_SB(src5, src6);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
+                                        vt_res0, vt_res1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src1, src2, src3, src4, src5, src6,
+                                        vt_res2, vt_res3);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1,
+                   mask0, mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3,
+                   mask0, mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+
+        SRARI_W2_SW(hz_res0, hz_res1, 10);
+        SAT_SW2_SW(hz_res0, hz_res1, 7);
+
+        dst0 = __msa_srari_h(shf_vec2, 5);
+        dst1 = __msa_srari_h(shf_vec5, 5);
+
+        SAT_SH2_SH(dst0, dst1, 7);
+
+        if (horiz_offset) {
+            dst0 = __msa_ilvod_h(zeros, dst0);
+            dst1 = __msa_ilvod_h(zeros, dst1);
+        } else {
+            ILVEV_H2_SH(dst0, zeros, dst1, zeros, dst0, dst1);
+        }
+
+        hz_res0 = __msa_aver_s_w(hz_res0, (v4i32) dst0);
+        hz_res1 = __msa_aver_s_w(hz_res1, (v4i32) dst1);
+        dst0 = __msa_pckev_h((v8i16) hz_res1, (v8i16) hz_res0);
+
+        out = PCKEV_XORI128_UB(dst0, dst0);
+        ST4x2_UB(out, dst, dst_stride);
+
+        dst += (2 * dst_stride);
+
+        src0 = src2;
+        src1 = src3;
+        src2 = src4;
+        src3 = src5;
+        src4 = src6;
+    }
+}
+
+static void avc_luma_midh_qrt_8w_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     int32_t height, uint8_t horiz_offset)
+{
+    uint32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        avc_luma_midh_qrt_4w_msa(src, src_stride, dst, dst_stride, height,
+                                 horiz_offset);
+
+        src += 4;
+        dst += 4;
+    }
+}
+
+static void avc_luma_midh_qrt_16w_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      int32_t height, uint8_t horiz_offset)
+{
+    uint32_t multiple8_cnt;
+
+    for (multiple8_cnt = 4; multiple8_cnt--;) {
+        avc_luma_midh_qrt_4w_msa(src, src_stride, dst, dst_stride, height,
+                                 horiz_offset);
+
+        src += 4;
+        dst += 4;
+    }
+}
+
+static void avc_luma_midv_qrt_4w_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     int32_t height, uint8_t ver_offset)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
+                                                          mask0, mask1, mask2);
+    hz_out2 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
+                                                          mask0, mask1, mask2);
+
+    PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
+
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, mask0, mask1, mask2);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+
+        hz_out5 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
+                                                              mask0, mask1,
+                                                              mask2);
+        hz_out7 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
+                                                              mask0, mask1,
+                                                              mask2);
+
+        PCKOD_D2_SH(hz_out5, hz_out5, hz_out7, hz_out7, hz_out6, hz_out8);
+
+        dst0 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out0, hz_out1, hz_out2,
+                                               hz_out3, hz_out4, hz_out5);
+        dst2 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out1, hz_out2, hz_out3,
+                                               hz_out4, hz_out5, hz_out6);
+        dst4 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out2, hz_out3, hz_out4,
+                                               hz_out5, hz_out6, hz_out7);
+        dst6 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out3, hz_out4, hz_out5,
+                                               hz_out6, hz_out7, hz_out8);
+
+        if (ver_offset) {
+            dst1 = __msa_srari_h(hz_out3, 5);
+            dst3 = __msa_srari_h(hz_out4, 5);
+            dst5 = __msa_srari_h(hz_out5, 5);
+            dst7 = __msa_srari_h(hz_out6, 5);
+        } else {
+            dst1 = __msa_srari_h(hz_out2, 5);
+            dst3 = __msa_srari_h(hz_out3, 5);
+            dst5 = __msa_srari_h(hz_out4, 5);
+            dst7 = __msa_srari_h(hz_out5, 5);
+        }
+
+        SAT_SH4_SH(dst1, dst3, dst5, dst7, 7);
+
+        dst0 = __msa_aver_s_h(dst0, dst1);
+        dst1 = __msa_aver_s_h(dst2, dst3);
+        dst2 = __msa_aver_s_h(dst4, dst5);
+        dst3 = __msa_aver_s_h(dst6, dst7);
+
+        PCKEV_B2_SB(dst1, dst0, dst3, dst2, src0, src1);
+        XORI_B2_128_SB(src0, src1);
+
+        ST4x4_UB(src0, src1, 0, 2, 0, 2, dst, dst_stride);
+
+        dst += (4 * dst_stride);
+        hz_out0 = hz_out4;
+        hz_out1 = hz_out5;
+        hz_out2 = hz_out6;
+        hz_out3 = hz_out7;
+        hz_out4 = hz_out8;
+    }
+}
+
+static void avc_luma_midv_qrt_8w_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     int32_t height, uint8_t ver_offset)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v16u8 out;
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, mask0, mask1, mask2);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        hz_out5 = AVC_HORZ_FILTER_SH(src0, mask0, mask1, mask2);
+        hz_out6 = AVC_HORZ_FILTER_SH(src1, mask0, mask1, mask2);
+        hz_out7 = AVC_HORZ_FILTER_SH(src2, mask0, mask1, mask2);
+        hz_out8 = AVC_HORZ_FILTER_SH(src3, mask0, mask1, mask2);
+
+        dst0 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out0, hz_out1, hz_out2,
+                                               hz_out3, hz_out4, hz_out5);
+        dst2 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out1, hz_out2, hz_out3,
+                                               hz_out4, hz_out5, hz_out6);
+        dst4 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out2, hz_out3, hz_out4,
+                                               hz_out5, hz_out6, hz_out7);
+        dst6 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out3, hz_out4, hz_out5,
+                                               hz_out6, hz_out7, hz_out8);
+
+        if (ver_offset) {
+            dst1 = __msa_srari_h(hz_out3, 5);
+            dst3 = __msa_srari_h(hz_out4, 5);
+            dst5 = __msa_srari_h(hz_out5, 5);
+            dst7 = __msa_srari_h(hz_out6, 5);
+        } else {
+            dst1 = __msa_srari_h(hz_out2, 5);
+            dst3 = __msa_srari_h(hz_out3, 5);
+            dst5 = __msa_srari_h(hz_out4, 5);
+            dst7 = __msa_srari_h(hz_out5, 5);
+        }
+
+        SAT_SH4_SH(dst1, dst3, dst5, dst7, 7);
+
+        dst0 = __msa_aver_s_h(dst0, dst1);
+        dst1 = __msa_aver_s_h(dst2, dst3);
+        dst2 = __msa_aver_s_h(dst4, dst5);
+        dst3 = __msa_aver_s_h(dst6, dst7);
+
+        out = PCKEV_XORI128_UB(dst0, dst0);
+        ST8x1_UB(out, dst);
+        dst += dst_stride;
+        out = PCKEV_XORI128_UB(dst1, dst1);
+        ST8x1_UB(out, dst);
+        dst += dst_stride;
+        out = PCKEV_XORI128_UB(dst2, dst2);
+        ST8x1_UB(out, dst);
+        dst += dst_stride;
+        out = PCKEV_XORI128_UB(dst3, dst3);
+        ST8x1_UB(out, dst);
+        dst += dst_stride;
+
+        hz_out0 = hz_out4;
+        hz_out1 = hz_out5;
+        hz_out2 = hz_out6;
+        hz_out3 = hz_out7;
+        hz_out4 = hz_out8;
+    }
+}
+
+static void avc_luma_midv_qrt_16w_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      int32_t height, uint8_t vert_offset)
+{
+    uint32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        avc_luma_midv_qrt_8w_msa(src, src_stride, dst, dst_stride, height,
+                                 vert_offset);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+static void avc_luma_hv_qrt_4w_msa(const uint8_t *src_x, const uint8_t *src_y,
+                                   int32_t src_stride, uint8_t *dst,
+                                   int32_t dst_stride, int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src_hz0, src_hz1, src_hz2, src_hz3;
+    v16i8 src_vt0, src_vt1, src_vt2, src_vt3, src_vt4;
+    v16i8 src_vt5, src_vt6, src_vt7, src_vt8;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, vert_out0, vert_out1;
+    v8i16 out0, out1;
+    v16u8 out;
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+
+    LD_SB5(src_y, src_stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+    src_y += (5 * src_stride);
+
+    src_vt0 = (v16i8) __msa_insve_w((v4i32) src_vt0, 1, (v4i32) src_vt1);
+    src_vt1 = (v16i8) __msa_insve_w((v4i32) src_vt1, 1, (v4i32) src_vt2);
+    src_vt2 = (v16i8) __msa_insve_w((v4i32) src_vt2, 1, (v4i32) src_vt3);
+    src_vt3 = (v16i8) __msa_insve_w((v4i32) src_vt3, 1, (v4i32) src_vt4);
+
+    XORI_B4_128_SB(src_vt0, src_vt1, src_vt2, src_vt3);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src_x, src_stride, src_hz0, src_hz1, src_hz2, src_hz3);
+        src_x += (4 * src_stride);
+
+        XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
+
+        hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src_hz0,
+                                                              src_hz1, mask0,
+                                                              mask1, mask2);
+        hz_out1 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src_hz2,
+                                                              src_hz3, mask0,
+                                                              mask1, mask2);
+
+        SRARI_H2_SH(hz_out0, hz_out1, 5);
+        SAT_SH2_SH(hz_out0, hz_out1, 7);
+
+        LD_SB4(src_y, src_stride, src_vt5, src_vt6, src_vt7, src_vt8);
+        src_y += (4 * src_stride);
+
+        src_vt4 = (v16i8) __msa_insve_w((v4i32) src_vt4, 1, (v4i32) src_vt5);
+        src_vt5 = (v16i8) __msa_insve_w((v4i32) src_vt5, 1, (v4i32) src_vt6);
+        src_vt6 = (v16i8) __msa_insve_w((v4i32) src_vt6, 1, (v4i32) src_vt7);
+        src_vt7 = (v16i8) __msa_insve_w((v4i32) src_vt7, 1, (v4i32) src_vt8);
+
+        XORI_B4_128_SB(src_vt4, src_vt5, src_vt6, src_vt7);
+
+        /* filter calc */
+        vert_out0 = AVC_CALC_DPADD_B_6PIX_2COEFF_R_SH(src_vt0, src_vt1,
+                                                      src_vt2, src_vt3,
+                                                      src_vt4, src_vt5);
+        vert_out1 = AVC_CALC_DPADD_B_6PIX_2COEFF_R_SH(src_vt2, src_vt3,
+                                                      src_vt4, src_vt5,
+                                                      src_vt6, src_vt7);
+
+        SRARI_H2_SH(vert_out0, vert_out1, 5);
+        SAT_SH2_SH(vert_out0, vert_out1, 7);
+
+        out0 = __msa_srari_h((hz_out0 + vert_out0), 1);
+        out1 = __msa_srari_h((hz_out1 + vert_out1), 1);
+
+        SAT_SH2_SH(out0, out1, 7);
+        out = PCKEV_XORI128_UB(out0, out1);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src_vt3 = src_vt7;
+        src_vt1 = src_vt5;
+        src_vt0 = src_vt4;
+        src_vt4 = src_vt8;
+        src_vt2 = src_vt6;
+    }
+}
+
+static void avc_luma_hv_qrt_8w_msa(const uint8_t *src_x, const uint8_t *src_y,
+                                   int32_t src_stride, uint8_t *dst,
+                                   int32_t dst_stride, int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src_hz0, src_hz1, src_hz2, src_hz3;
+    v16i8 src_vt0, src_vt1, src_vt2, src_vt3, src_vt4;
+    v16i8 src_vt5, src_vt6, src_vt7, src_vt8;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 vert_out0, vert_out1, vert_out2, vert_out3;
+    v8i16 out0, out1, out2, out3;
+    v16u8 tmp0, tmp1;
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+    LD_SB5(src_y, src_stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+    src_y += (5 * src_stride);
+
+    src_vt0 = (v16i8) __msa_insve_d((v2i64) src_vt0, 1, (v2i64) src_vt1);
+    src_vt1 = (v16i8) __msa_insve_d((v2i64) src_vt1, 1, (v2i64) src_vt2);
+    src_vt2 = (v16i8) __msa_insve_d((v2i64) src_vt2, 1, (v2i64) src_vt3);
+    src_vt3 = (v16i8) __msa_insve_d((v2i64) src_vt3, 1, (v2i64) src_vt4);
+
+    XORI_B4_128_SB(src_vt0, src_vt1, src_vt2, src_vt3);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src_x, src_stride, src_hz0, src_hz1, src_hz2, src_hz3);
+        XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
+        src_x += (4 * src_stride);
+
+        hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, mask0, mask1, mask2);
+        hz_out1 = AVC_HORZ_FILTER_SH(src_hz1, mask0, mask1, mask2);
+        hz_out2 = AVC_HORZ_FILTER_SH(src_hz2, mask0, mask1, mask2);
+        hz_out3 = AVC_HORZ_FILTER_SH(src_hz3, mask0, mask1, mask2);
+
+        SRARI_H4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 5);
+        SAT_SH4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 7);
+
+        LD_SB4(src_y, src_stride, src_vt5, src_vt6, src_vt7, src_vt8);
+        src_y += (4 * src_stride);
+
+        src_vt4 = (v16i8) __msa_insve_d((v2i64) src_vt4, 1, (v2i64) src_vt5);
+        src_vt5 = (v16i8) __msa_insve_d((v2i64) src_vt5, 1, (v2i64) src_vt6);
+        src_vt6 = (v16i8) __msa_insve_d((v2i64) src_vt6, 1, (v2i64) src_vt7);
+        src_vt7 = (v16i8) __msa_insve_d((v2i64) src_vt7, 1, (v2i64) src_vt8);
+
+        XORI_B4_128_SB(src_vt4, src_vt5, src_vt6, src_vt7);
+
+        /* filter calc */
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src_vt0, src_vt1, src_vt2, src_vt3,
+                                        src_vt4, src_vt5, vert_out0, vert_out1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src_vt2, src_vt3, src_vt4, src_vt5,
+                                        src_vt6, src_vt7, vert_out2, vert_out3);
+
+        SRARI_H4_SH(vert_out0, vert_out1, vert_out2, vert_out3, 5);
+        SAT_SH4_SH(vert_out0, vert_out1, vert_out2, vert_out3, 7);
+
+        out0 = __msa_srari_h((hz_out0 + vert_out0), 1);
+        out1 = __msa_srari_h((hz_out1 + vert_out1), 1);
+        out2 = __msa_srari_h((hz_out2 + vert_out2), 1);
+        out3 = __msa_srari_h((hz_out3 + vert_out3), 1);
+
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        tmp0 = PCKEV_XORI128_UB(out0, out1);
+        tmp1 = PCKEV_XORI128_UB(out2, out3);
+        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+
+        dst += (4 * dst_stride);
+        src_vt3 = src_vt7;
+        src_vt1 = src_vt5;
+        src_vt5 = src_vt4;
+        src_vt4 = src_vt8;
+        src_vt2 = src_vt6;
+        src_vt0 = src_vt5;
+    }
+}
+
+static void avc_luma_hv_qrt_16w_msa(const uint8_t *src_x, const uint8_t *src_y,
+                                    int32_t src_stride, uint8_t *dst,
+                                    int32_t dst_stride, int32_t height)
+{
+    uint32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        avc_luma_hv_qrt_8w_msa(src_x, src_y, src_stride, dst, dst_stride,
+                               height);
+
+        src_x += 8;
+        src_y += 8;
+        dst += 8;
+    }
+}
+
+static void avc_luma_hz_and_aver_dst_4x4_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst, int32_t dst_stride)
+{
+    v16i8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3, res;
+    v8i16 res0, res1;
+    v16i8 mask0, mask1, mask2;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+    HADD_SB2_SH(vec0, vec1, res0, res1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2, vec3);
+    DPADD_SB2_SH(vec2, vec3, minus5b, minus5b, res0, res1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4, vec5);
+    DPADD_SB2_SH(vec4, vec5, plus20b, plus20b, res0, res1);
+    SRARI_H2_SH(res0, res1, 5);
+    SAT_SH2_SH(res0, res1, 7);
+    res = PCKEV_XORI128_UB(res0, res1);
+    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+
+    dst0 = (v16u8) __msa_pckev_d((v2i64) dst1, (v2i64) dst0);
+    res = __msa_aver_u_b(res, dst0);
+
+    ST4x4_UB(res, res, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void avc_luma_hz_and_aver_dst_8x8_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst, int32_t dst_stride)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v8i16 res0, res1, res2, res3;
+    v16i8 mask0, mask1, mask2;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        HADD_SB4_SH(vec0, vec1, vec2, vec3, res0, res1, res2, res3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec7);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
+                     res0, res1, res2, res3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec8, vec9);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec10, vec11);
+        DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res0, res1, res2, res3);
+        SRARI_H4_SH(res0, res1, res2, res3, 5);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1, dst2, dst3,
+                                dst, dst_stride);
+
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avc_luma_hz_and_aver_dst_16x16_msa(const uint8_t *src,
+                                               int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 mask0, mask1, mask2;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB2(src, 8, src0, src1);
+        src += src_stride;
+        LD_SB2(src, 8, src2, src3);
+        src += src_stride;
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec3);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec6, vec9);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec1, vec4);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec7, vec10);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec2, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res0, res1, res2, res3);
+        LD_SB2(src, 8, src4, src5);
+        src += src_stride;
+        LD_SB2(src, 8, src6, src7);
+        src += src_stride;
+        XORI_B4_128_SB(src4, src5, src6, src7);
+        VSHF_B2_SB(src4, src4, src5, src5, mask0, mask0, vec0, vec3);
+        VSHF_B2_SB(src6, src6, src7, src7, mask0, mask0, vec6, vec9);
+        VSHF_B2_SB(src4, src4, src5, src5, mask1, mask1, vec1, vec4);
+        VSHF_B2_SB(src6, src6, src7, src7, mask1, mask1, vec7, vec10);
+        VSHF_B2_SB(src4, src4, src5, src5, mask2, mask2, vec2, vec5);
+        VSHF_B2_SB(src6, src6, src7, src7, mask2, mask2, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res4, res5, res6, res7);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res4, res5, res6, res7);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res4, res5, res6, res7);
+        SRARI_H4_SH(res0, res1, res2, res3, 5);
+        SRARI_H4_SH(res4, res5, res6, res7, 5);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        SAT_SH4_SH(res4, res5, res6, res7, 7);
+        PCKEV_B4_SB(res1, res0, res3, res2, res5, res4, res7, res6,
+                    vec0, vec1, vec2, vec3);
+        XORI_B4_128_SB(vec0, vec1, vec2, vec3);
+        AVER_UB4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+                    dst0, dst1, dst2, dst3);
+        ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avc_luma_hz_qrt_and_aver_dst_4x4_msa(const uint8_t *src,
+                                                 int32_t src_stride,
+                                                 uint8_t *dst,
+                                                 int32_t dst_stride,
+                                                 uint8_t hor_offset)
+{
+    uint8_t slide;
+    v16i8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 mask0, mask1, mask2;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v8i16 out0, out1;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+    v16u8 res0, res1;
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+
+    if (hor_offset) {
+        slide = 3;
+    } else {
+        slide = 2;
+    }
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0, vec1);
+    HADD_SB2_SH(vec0, vec1, out0, out1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2, vec3);
+    DPADD_SB2_SH(vec2, vec3, minus5b, minus5b, out0, out1);
+    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4, vec5);
+    DPADD_SB2_SH(vec4, vec5, plus20b, plus20b, out0, out1);
+    SRARI_H2_SH(out0, out1, 5);
+    SAT_SH2_SH(out0, out1, 7);
+
+    PCKEV_B2_UB(out0, out0, out1, out1, res0, res1);
+
+    src0 = __msa_sld_b(src0, src0, slide);
+    src1 = __msa_sld_b(src1, src1, slide);
+    src2 = __msa_sld_b(src2, src2, slide);
+    src3 = __msa_sld_b(src3, src3, slide);
+    src0 = (v16i8) __msa_insve_w((v4i32) src0, 1, (v4i32) src1);
+    src1 = (v16i8) __msa_insve_w((v4i32) src2, 1, (v4i32) src3);
+    res0 = (v16u8) __msa_aver_s_b((v16i8) res0, src0);
+    res1 = (v16u8) __msa_aver_s_b((v16i8) res1, src1);
+
+    XORI_B2_128_UB(res0, res1);
+
+    dst0 = (v16u8) __msa_insve_w((v4i32) dst0, 1, (v4i32) dst1);
+    dst1 = (v16u8) __msa_insve_w((v4i32) dst2, 1, (v4i32) dst3);
+
+    AVER_UB2_UB(res0, dst0, res1, dst1, dst0, dst1);
+
+    ST4x4_UB(dst0, dst1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void avc_luma_hz_qrt_and_aver_dst_8x8_msa(const uint8_t *src,
+                                                 int32_t src_stride,
+                                                 uint8_t *dst,
+                                                 int32_t dst_stride,
+                                                 uint8_t hor_offset)
+{
+    uint8_t slide;
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3;
+    v16i8 mask0, mask1, mask2;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
+    v8i16 out0, out1, out2, out3;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+    v16i8 res0, res1, res2, res3;
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    if (hor_offset) {
+        slide = 3;
+    } else {
+        slide = 2;
+    }
+
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec1);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2, vec3);
+        HADD_SB4_SH(vec0, vec1, vec2, vec3, out0, out1, out2, out3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6, vec7);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, minus5b, minus5b, minus5b, minus5b,
+                     out0, out1, out2, out3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec8, vec9);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec10, vec11);
+        DPADD_SB4_SH(vec8, vec9, vec10, vec11, plus20b, plus20b, plus20b,
+                     plus20b, out0, out1, out2, out3);
+
+        src0 = __msa_sld_b(src0, src0, slide);
+        src1 = __msa_sld_b(src1, src1, slide);
+        src2 = __msa_sld_b(src2, src2, slide);
+        src3 = __msa_sld_b(src3, src3, slide);
+
+        SRARI_H4_SH(out0, out1, out2, out3, 5);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+
+        PCKEV_B4_SB(out0, out0, out1, out1, out2, out2, out3, out3,
+                    res0, res1, res2, res3);
+
+        res0 = __msa_aver_s_b(res0, src0);
+        res1 = __msa_aver_s_b(res1, src1);
+        res2 = __msa_aver_s_b(res2, src2);
+        res3 = __msa_aver_s_b(res3, src3);
+
+        XORI_B4_128_SB(res0, res1, res2, res3);
+        AVER_ST8x4_UB(res0, dst0, res1, dst1, res2, dst2, res3, dst3,
+                      dst, dst_stride);
+
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avc_luma_hz_qrt_and_aver_dst_16x16_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   uint8_t hor_offset)
+{
+    uint32_t loop_cnt;
+    v16i8 out0, out1;
+    v16i8 src0, src1, src2, src3;
+    v16i8 mask0, mask1, mask2, vshf;
+    v16u8 dst0, dst1;
+    v8i16 res0, res1, res2, res3;
+    v16i8 vec0, vec1, vec2, vec3, vec4, vec5;
+    v16i8 vec6, vec7, vec8, vec9, vec10, vec11;
+    v16i8 minus5b = __msa_ldi_b(-5);
+    v16i8 plus20b = __msa_ldi_b(20);
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    if (hor_offset) {
+        vshf = LD_SB(&luma_mask_arr[16 + 96]);
+    } else {
+        vshf = LD_SB(&luma_mask_arr[96]);
+    }
+
+    for (loop_cnt = 8; loop_cnt--;) {
+        LD_SB2(src, 8, src0, src1);
+        src += src_stride;
+        LD_SB2(src, 8, src2, src3);
+        src += src_stride;
+
+        LD_UB2(dst, dst_stride, dst0, dst1);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0, vec3);
+        VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec6, vec9);
+        VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec1, vec4);
+        VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec7, vec10);
+        VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec2, vec5);
+        VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec8, vec11);
+        HADD_SB4_SH(vec0, vec3, vec6, vec9, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec1, vec4, vec7, vec10, minus5b, minus5b, minus5b,
+                     minus5b, res0, res1, res2, res3);
+        DPADD_SB4_SH(vec2, vec5, vec8, vec11, plus20b, plus20b, plus20b,
+                     plus20b, res0, res1, res2, res3);
+        VSHF_B2_SB(src0, src1, src2, src3, vshf, vshf, src0, src2);
+        SRARI_H4_SH(res0, res1, res2, res3, 5);
+        SAT_SH4_SH(res0, res1, res2, res3, 7);
+        PCKEV_B2_SB(res1, res0, res3, res2, out0, out1);
+
+        out0 = __msa_aver_s_b(out0, src0);
+        out1 = __msa_aver_s_b(out1, src2);
+
+        XORI_B2_128_SB(out0, out1);
+        AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1);
+        ST_UB2(dst0, dst1, dst, dst_stride);
+        dst += (2 * dst_stride);
+    }
+}
+
+static void avc_luma_vt_and_aver_dst_4x4_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst, int32_t dst_stride)
+{
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src2110, src4332, src6554, src8776;
+    v8i16 out10, out32;
+    v16i8 filt0, filt1, filt2;
+    v16u8 res;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    XORI_B2_128_SB(src2110, src4332);
+    LD_SB4(src, src_stride, src5, src6, src7, src8);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+               src54_r, src65_r, src76_r, src87_r);
+    ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776);
+    XORI_B2_128_SB(src6554, src8776);
+    out10 = DPADD_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2);
+    out32 = DPADD_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2);
+    SRARI_H2_SH(out10, out32, 5);
+    SAT_SH2_SH(out10, out32, 7);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    res = PCKEV_XORI128_UB(out10, out32);
+
+    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+
+    dst0 = (v16u8) __msa_pckev_d((v2i64) dst1, (v2i64) dst0);
+    dst0 = __msa_aver_u_b(res, dst0);
+
+    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void avc_luma_vt_and_aver_dst_8x8_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst, int32_t dst_stride)
+{
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 src0, src1, src2, src3, src4, src7, src8, src9, src10;
+    v16i8 src10_r, src32_r, src76_r, src98_r;
+    v16i8 src21_r, src43_r, src87_r, src109_r;
+    v8i16 out0, out1, out2, out3;
+    v16i8 filt0, filt1, filt2;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        ILVR_B4_SB(src7, src4, src8, src7, src9, src8, src10, src9,
+                   src76_r, src87_r, src98_r, src109_r);
+        out0 = DPADD_SH3_SH(src10_r, src32_r, src76_r, filt0, filt1, filt2);
+        out1 = DPADD_SH3_SH(src21_r, src43_r, src87_r, filt0, filt1, filt2);
+        out2 = DPADD_SH3_SH(src32_r, src76_r, src98_r, filt0, filt1, filt2);
+        out3 = DPADD_SH3_SH(src43_r, src87_r, src109_r, filt0, filt1, filt2);
+        SRARI_H4_SH(out0, out1, out2, out3, 5);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
+                                dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src76_r;
+        src32_r = src98_r;
+        src21_r = src87_r;
+        src43_r = src109_r;
+        src4 = src10;
+    }
+}
+
+static void avc_luma_vt_and_aver_dst_16x16_msa(const uint8_t *src,
+                                               int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride)
+{
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src10_l, src32_l, src54_l, src76_l, src21_l, src43_l;
+    v16i8 src65_l, src87_l;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+    v16i8 filt0, filt1, filt2;
+    v16u8 res0, res1, res2, res3;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_l, src21_l, src32_l, src43_l);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src, src_stride, src5, src6, src7, src8);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src5, src6, src7, src8);
+        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   src54_r, src65_r, src76_r, src87_r);
+        ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   src54_l, src65_l, src76_l, src87_l);
+        out0_r = DPADD_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+        out1_r = DPADD_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+        out2_r = DPADD_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+        out3_r = DPADD_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+        out0_l = DPADD_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+        out1_l = DPADD_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+        out2_l = DPADD_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+        out3_l = DPADD_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 5);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                    out3_r, res0, res1, res2, res3);
+        XORI_B4_128_UB(res0, res1, res2, res3);
+        AVER_UB4_UB(res0, dst0, res1, dst1, res2, dst2, res3, dst3,
+                    res0, res1, res2, res3);
+        ST_UB4(res0, res1, res2, res3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src10_l = src54_l;
+        src32_l = src76_l;
+        src21_l = src65_l;
+        src43_l = src87_l;
+        src4 = src8;
+    }
+}
+
+static void avc_luma_vt_qrt_and_aver_dst_4x4_msa(const uint8_t *src,
+                                                 int32_t src_stride,
+                                                 uint8_t *dst,
+                                                 int32_t dst_stride,
+                                                 uint8_t ver_offset)
+{
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src2110, src4332, src6554, src8776;
+    v8i16 out10, out32;
+    v16i8 filt0, filt1, filt2;
+    v16u8 res;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    XORI_B2_128_SB(src2110, src4332);
+    LD_SB4(src, src_stride, src5, src6, src7, src8);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+               src54_r, src65_r, src76_r, src87_r);
+    ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776);
+    XORI_B2_128_SB(src6554, src8776);
+    out10 = DPADD_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2);
+    out32 = DPADD_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2);
+    SRARI_H2_SH(out10, out32, 5);
+    SAT_SH2_SH(out10, out32, 7);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    res = PCKEV_XORI128_UB(out10, out32);
+
+    if (ver_offset) {
+        src32_r = (v16i8) __msa_insve_w((v4i32) src3, 1, (v4i32) src4);
+        src54_r = (v16i8) __msa_insve_w((v4i32) src5, 1, (v4i32) src6);
+    } else {
+        src32_r = (v16i8) __msa_insve_w((v4i32) src2, 1, (v4i32) src3);
+        src54_r = (v16i8) __msa_insve_w((v4i32) src4, 1, (v4i32) src5);
+    }
+
+    src32_r = (v16i8) __msa_insve_d((v2i64) src32_r, 1, (v2i64) src54_r);
+    res = __msa_aver_u_b(res, (v16u8) src32_r);
+
+    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+
+    dst0 = (v16u8) __msa_pckev_d((v2i64) dst1, (v2i64) dst0);
+    dst0 = __msa_aver_u_b(res, dst0);
+
+    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void avc_luma_vt_qrt_and_aver_dst_8x8_msa(const uint8_t *src,
+                                                 int32_t src_stride,
+                                                 uint8_t *dst,
+                                                 int32_t dst_stride,
+                                                 uint8_t ver_offset)
+{
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 src0, src1, src2, src3, src4, src7, src8, src9, src10;
+    v16i8 src10_r, src32_r, src76_r, src98_r;
+    v16i8 src21_r, src43_r, src87_r, src109_r;
+    v8i16 out0_r, out1_r, out2_r, out3_r;
+    v16i8 res0, res1;
+    v16u8 vec0, vec1;
+    v16i8 filt0, filt1, filt2;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        ILVR_B4_SB(src7, src4, src8, src7, src9, src8, src10, src9,
+                   src76_r, src87_r, src98_r, src109_r);
+        out0_r = DPADD_SH3_SH(src10_r, src32_r, src76_r, filt0, filt1, filt2);
+        out1_r = DPADD_SH3_SH(src21_r, src43_r, src87_r, filt0, filt1, filt2);
+        out2_r = DPADD_SH3_SH(src32_r, src76_r, src98_r, filt0, filt1, filt2);
+        out3_r = DPADD_SH3_SH(src43_r, src87_r, src109_r, filt0, filt1, filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        PCKEV_B2_SB(out1_r, out0_r, out3_r, out2_r, res0, res1);
+
+        if (ver_offset) {
+            PCKEV_D2_SB(src4, src3, src8, src7, src10_r, src32_r);
+        } else {
+            PCKEV_D2_SB(src3, src2, src7, src4, src10_r, src32_r);
+        }
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+
+        vec0 = (v16u8) __msa_aver_s_b(res0, src10_r);
+        vec1 = (v16u8) __msa_aver_s_b(res1, src32_r);
+
+        XORI_B2_128_UB(vec0, vec1);
+        AVER_UB2_UB(vec0, dst0, vec1, dst1, vec0, vec1);
+        ST8x4_UB(vec0, vec1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src76_r;
+        src32_r = src98_r;
+        src21_r = src87_r;
+        src43_r = src109_r;
+        src2 = src8;
+        src3 = src9;
+        src4 = src10;
+    }
+}
+
+static void avc_luma_vt_qrt_and_aver_dst_16x16_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   uint8_t ver_offset)
+{
+    int32_t loop_cnt;
+    int16_t filt_const0 = 0xfb01;
+    int16_t filt_const1 = 0x1414;
+    int16_t filt_const2 = 0x1fb;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src10_l, src32_l, src54_l, src76_l, src21_l, src43_l;
+    v16i8 src65_l, src87_l;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+    v16i8 out0, out1, out2, out3;
+    v16i8 filt0, filt1, filt2;
+    v16u8 res0, res1, res2, res3;
+
+    filt0 = (v16i8) __msa_fill_h(filt_const0);
+    filt1 = (v16i8) __msa_fill_h(filt_const1);
+    filt2 = (v16i8) __msa_fill_h(filt_const2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+    ILVL_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_l, src21_l, src32_l, src43_l);
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        LD_SB4(src, src_stride, src5, src6, src7, src8);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src5, src6, src7, src8);
+        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   src54_r, src65_r, src76_r, src87_r);
+        ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   src54_l, src65_l, src76_l, src87_l);
+        out0_r = DPADD_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2);
+        out1_r = DPADD_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2);
+        out2_r = DPADD_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2);
+        out3_r = DPADD_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2);
+        out0_l = DPADD_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2);
+        out1_l = DPADD_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2);
+        out2_l = DPADD_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2);
+        out3_l = DPADD_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 5);
+        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 5);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        PCKEV_B4_SB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                    out3_r, out0, out1, out2, out3);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        if (ver_offset) {
+            res0 = (v16u8) __msa_aver_s_b(out0, src3);
+            res1 = (v16u8) __msa_aver_s_b(out1, src4);
+            res2 = (v16u8) __msa_aver_s_b(out2, src5);
+            res3 = (v16u8) __msa_aver_s_b(out3, src6);
+        } else {
+            res0 = (v16u8) __msa_aver_s_b(out0, src2);
+            res1 = (v16u8) __msa_aver_s_b(out1, src3);
+            res2 = (v16u8) __msa_aver_s_b(out2, src4);
+            res3 = (v16u8) __msa_aver_s_b(out3, src5);
+        }
+
+        XORI_B4_128_UB(res0, res1, res2, res3);
+        AVER_UB4_UB(res0, dst0, res1, dst1, res2, dst2, res3, dst3,
+                    dst0, dst1, dst2, dst3);
+        ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src10_l = src54_l;
+        src32_l = src76_l;
+        src21_l = src65_l;
+        src43_l = src87_l;
+        src2 = src6;
+        src3 = src7;
+        src4 = src8;
+    }
+}
+
+static void avc_luma_mid_and_aver_dst_4x4_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride)
+{
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
+    v8i16 res0, res1, res2, res3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
+                                                          mask0, mask1, mask2);
+    hz_out2 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
+                                                          mask0, mask1, mask2);
+
+    PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
+
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, mask0, mask1, mask2);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+
+    hz_out5 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
+                                                          mask0, mask1, mask2);
+    hz_out7 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
+                                                          mask0, mask1, mask2);
+
+    PCKOD_D2_SH(hz_out5, hz_out5, hz_out7, hz_out7, hz_out6, hz_out8);
+
+    res0 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out0, hz_out1, hz_out2,
+                                             hz_out3, hz_out4, hz_out5);
+    res1 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out1, hz_out2, hz_out3,
+                                             hz_out4, hz_out5, hz_out6);
+    res2 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out2, hz_out3, hz_out4,
+                                             hz_out5, hz_out6, hz_out7);
+    res3 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out3, hz_out4, hz_out5,
+                                             hz_out6, hz_out7, hz_out8);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    tmp0 = PCKEV_XORI128_UB(res0, res1);
+    tmp1 = PCKEV_XORI128_UB(res2, res3);
+    PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2, tmp3);
+    AVER_UB2_UB(tmp0, tmp2, tmp1, tmp3, tmp0, tmp1);
+
+    ST4x4_UB(tmp0, tmp1, 0, 2, 0, 2, dst, dst_stride);
+}
+
+static void avc_luma_mid_and_aver_dst_8w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst, int32_t dst_stride,
+                                             int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
+    v16u8 dst0, dst1, dst2, dst3;
+    v8i16 res0, res1, res2, res3;
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, mask0, mask1, mask2);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        hz_out5 = AVC_HORZ_FILTER_SH(src0, mask0, mask1, mask2);
+        hz_out6 = AVC_HORZ_FILTER_SH(src1, mask0, mask1, mask2);
+        hz_out7 = AVC_HORZ_FILTER_SH(src2, mask0, mask1, mask2);
+        hz_out8 = AVC_HORZ_FILTER_SH(src3, mask0, mask1, mask2);
+
+        res0 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out0, hz_out1, hz_out2,
+                                               hz_out3, hz_out4, hz_out5);
+        res1 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out1, hz_out2, hz_out3,
+                                               hz_out4, hz_out5, hz_out6);
+        res2 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out2, hz_out3, hz_out4,
+                                               hz_out5, hz_out6, hz_out7);
+        res3 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out3, hz_out4, hz_out5,
+                                               hz_out6, hz_out7, hz_out8);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1, dst2, dst3,
+                                dst, dst_stride);
+
+        dst += (4 * dst_stride);
+        hz_out3 = hz_out7;
+        hz_out1 = hz_out5;
+        hz_out5 = hz_out4;
+        hz_out4 = hz_out8;
+        hz_out2 = hz_out6;
+        hz_out0 = hz_out5;
+    }
+}
+
+static void avc_luma_mid_and_aver_dst_16x16_msa(const uint8_t *src,
+                                                int32_t src_stride,
+                                                uint8_t *dst,
+                                                int32_t dst_stride)
+{
+    avc_luma_mid_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride, 16);
+    avc_luma_mid_and_aver_dst_8w_msa(src + 8, src_stride, dst + 8, dst_stride,
+                                     16);
+}
+
+static void avc_luma_midh_qrt_and_aver_dst_4w_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  int32_t height,
+                                                  uint8_t horiz_offset)
+{
+    uint32_t row;
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v16u8 dst0, dst1, res;
+    v8i16 vt_res0, vt_res1, vt_res2, vt_res3;
+    v4i32 hz_res0, hz_res1;
+    v8i16 res0, res1;
+    v8i16 shf_vec0, shf_vec1, shf_vec2, shf_vec3, shf_vec4, shf_vec5;
+    v8i16 mask0 = { 0, 5, 1, 6, 2, 7, 3, 8 };
+    v8i16 mask1 = { 1, 4, 2, 5, 3, 6, 4, 7 };
+    v8i16 mask2 = { 2, 3, 3, 4, 4, 5, 5, 6 };
+    v8i16 minus5h = __msa_ldi_h(-5);
+    v8i16 plus20h = __msa_ldi_h(20);
+    v8i16 zeros = { 0 };
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    for (row = (height >> 1); row--;) {
+        LD_SB2(src, src_stride, src5, src6);
+        src += (2 * src_stride);
+
+        XORI_B2_128_SB(src5, src6);
+        LD_UB2(dst, dst_stride, dst0, dst1);
+
+        dst0 = (v16u8) __msa_ilvr_w((v4i32) dst1, (v4i32) dst0);
+
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src0, src1, src2, src3, src4, src5,
+                                        vt_res0, vt_res1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src1, src2, src3, src4, src5, src6,
+                                        vt_res2, vt_res3);
+        VSHF_H3_SH(vt_res0, vt_res1, vt_res0, vt_res1, vt_res0, vt_res1,
+                   mask0, mask1, mask2, shf_vec0, shf_vec1, shf_vec2);
+        VSHF_H3_SH(vt_res2, vt_res3, vt_res2, vt_res3, vt_res2, vt_res3,
+                   mask0, mask1, mask2, shf_vec3, shf_vec4, shf_vec5);
+
+        hz_res0 = __msa_hadd_s_w(shf_vec0, shf_vec0);
+        DPADD_SH2_SW(shf_vec1, shf_vec2, minus5h, plus20h, hz_res0, hz_res0);
+
+        hz_res1 = __msa_hadd_s_w(shf_vec3, shf_vec3);
+        DPADD_SH2_SW(shf_vec4, shf_vec5, minus5h, plus20h, hz_res1, hz_res1);
+
+        SRARI_W2_SW(hz_res0, hz_res1, 10);
+        SAT_SW2_SW(hz_res0, hz_res1, 7);
+
+        res0 = __msa_srari_h(shf_vec2, 5);
+        res1 = __msa_srari_h(shf_vec5, 5);
+
+        SAT_SH2_SH(res0, res1, 7);
+
+        if (horiz_offset) {
+            res0 = __msa_ilvod_h(zeros, res0);
+            res1 = __msa_ilvod_h(zeros, res1);
+        } else {
+            ILVEV_H2_SH(res0, zeros, res1, zeros, res0, res1);
+        }
+        hz_res0 = __msa_aver_s_w(hz_res0, (v4i32) res0);
+        hz_res1 = __msa_aver_s_w(hz_res1, (v4i32) res1);
+        res0 = __msa_pckev_h((v8i16) hz_res1, (v8i16) hz_res0);
+
+        res = PCKEV_XORI128_UB(res0, res0);
+
+        dst0 = __msa_aver_u_b(res, dst0);
+
+        ST4x2_UB(dst0, dst, dst_stride);
+        dst += (2 * dst_stride);
+
+        src0 = src2;
+        src1 = src3;
+        src2 = src4;
+        src3 = src5;
+        src4 = src6;
+    }
+}
+
+static void avc_luma_midh_qrt_and_aver_dst_8w_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  int32_t height,
+                                                  uint8_t horiz_offset)
+{
+    uint32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        avc_luma_midh_qrt_and_aver_dst_4w_msa(src, src_stride, dst, dst_stride,
+                                              height, horiz_offset);
+
+        src += 4;
+        dst += 4;
+    }
+}
+
+static void avc_luma_midh_qrt_and_aver_dst_16w_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int32_t height,
+                                                   uint8_t horiz_offset)
+{
+    uint32_t multiple8_cnt;
+
+    for (multiple8_cnt = 4; multiple8_cnt--;) {
+        avc_luma_midh_qrt_and_aver_dst_4w_msa(src, src_stride, dst, dst_stride,
+                                              height, horiz_offset);
+
+        src += 4;
+        dst += 4;
+    }
+}
+
+static void avc_luma_midv_qrt_and_aver_dst_4w_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  int32_t height,
+                                                  uint8_t ver_offset)
+{
+    int32_t loop_cnt;
+    int32_t out0, out1;
+    v16i8 src0, src1, src2, src3, src4;
+    v16u8 dst0, dst1;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 hz_out4, hz_out5, hz_out6;
+    v8i16 res0, res1, res2, res3;
+    v16u8 vec0, vec1;
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+
+    hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
+                                                          mask0, mask1, mask2);
+    hz_out2 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src2, src3,
+                                                          mask0, mask1, mask2);
+
+    PCKOD_D2_SH(hz_out0, hz_out0, hz_out2, hz_out2, hz_out1, hz_out3);
+
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, mask0, mask1, mask2);
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_SB2(src, src_stride, src0, src1);
+        src += (2 * src_stride);
+
+        XORI_B2_128_SB(src0, src1);
+        LD_UB2(dst, dst_stride, dst0, dst1);
+        hz_out5 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src0, src1,
+                                                              mask0, mask1,
+                                                              mask2);
+        hz_out6 = (v8i16) __msa_pckod_d((v2i64) hz_out5, (v2i64) hz_out5);
+        res0 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out0, hz_out1, hz_out2,
+                                                 hz_out3, hz_out4, hz_out5);
+        res2 = AVC_CALC_DPADD_H_6PIX_2COEFF_R_SH(hz_out1, hz_out2, hz_out3,
+                                                 hz_out4, hz_out5, hz_out6);
+
+        if (ver_offset) {
+            res1 = __msa_srari_h(hz_out3, 5);
+            res3 = __msa_srari_h(hz_out4, 5);
+        } else {
+            res1 = __msa_srari_h(hz_out2, 5);
+            res3 = __msa_srari_h(hz_out3, 5);
+        }
+
+        SAT_SH2_SH(res1, res3, 7);
+
+        res0 = __msa_aver_s_h(res0, res1);
+        res1 = __msa_aver_s_h(res2, res3);
+
+        vec0 = PCKEV_XORI128_UB(res0, res0);
+        vec1 = PCKEV_XORI128_UB(res1, res1);
+
+        AVER_UB2_UB(vec0, dst0, vec1, dst1, dst0, dst1);
+
+        out0 = __msa_copy_u_w((v4i32) dst0, 0);
+        out1 = __msa_copy_u_w((v4i32) dst1, 0);
+        SW(out0, dst);
+        dst += dst_stride;
+        SW(out1, dst);
+        dst += dst_stride;
+
+        hz_out0 = hz_out2;
+        hz_out1 = hz_out3;
+        hz_out2 = hz_out4;
+        hz_out3 = hz_out5;
+        hz_out4 = hz_out6;
+    }
+}
+
+static void avc_luma_midv_qrt_and_aver_dst_8w_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  int32_t height,
+                                                  uint8_t vert_offset)
+{
+    int32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 hz_out4, hz_out5, hz_out6, hz_out7, hz_out8;
+    v8i16 res0, res1, res2, res3;
+    v8i16 res4, res5, res6, res7;
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    hz_out0 = AVC_HORZ_FILTER_SH(src0, mask0, mask1, mask2);
+    hz_out1 = AVC_HORZ_FILTER_SH(src1, mask0, mask1, mask2);
+    hz_out2 = AVC_HORZ_FILTER_SH(src2, mask0, mask1, mask2);
+    hz_out3 = AVC_HORZ_FILTER_SH(src3, mask0, mask1, mask2);
+    hz_out4 = AVC_HORZ_FILTER_SH(src4, mask0, mask1, mask2);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        hz_out5 = AVC_HORZ_FILTER_SH(src0, mask0, mask1, mask2);
+        hz_out6 = AVC_HORZ_FILTER_SH(src1, mask0, mask1, mask2);
+        hz_out7 = AVC_HORZ_FILTER_SH(src2, mask0, mask1, mask2);
+        hz_out8 = AVC_HORZ_FILTER_SH(src3, mask0, mask1, mask2);
+
+        res0 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out0, hz_out1, hz_out2,
+                                               hz_out3, hz_out4, hz_out5);
+        res2 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out1, hz_out2, hz_out3,
+                                               hz_out4, hz_out5, hz_out6);
+        res4 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out2, hz_out3, hz_out4,
+                                               hz_out5, hz_out6, hz_out7);
+        res6 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out3, hz_out4, hz_out5,
+                                               hz_out6, hz_out7, hz_out8);
+
+        if (vert_offset) {
+            res1 = __msa_srari_h(hz_out3, 5);
+            res3 = __msa_srari_h(hz_out4, 5);
+            res5 = __msa_srari_h(hz_out5, 5);
+            res7 = __msa_srari_h(hz_out6, 5);
+        } else {
+            res1 = __msa_srari_h(hz_out2, 5);
+            res3 = __msa_srari_h(hz_out3, 5);
+            res5 = __msa_srari_h(hz_out4, 5);
+            res7 = __msa_srari_h(hz_out5, 5);
+        }
+
+        SAT_SH4_SH(res1, res3, res5, res7, 7);
+
+        res0 = __msa_aver_s_h(res0, res1);
+        res1 = __msa_aver_s_h(res2, res3);
+        res2 = __msa_aver_s_h(res4, res5);
+        res3 = __msa_aver_s_h(res6, res7);
+
+        CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1, dst2, dst3,
+                                dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out0 = hz_out4;
+        hz_out1 = hz_out5;
+        hz_out2 = hz_out6;
+        hz_out3 = hz_out7;
+        hz_out4 = hz_out8;
+    }
+}
+
+static void avc_luma_midv_qrt_and_aver_dst_16w_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int32_t height,
+                                                   uint8_t vert_offset)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        avc_luma_midv_qrt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride,
+                                              height, vert_offset);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+static void avc_luma_hv_qrt_and_aver_dst_4x4_msa(const uint8_t *src_x,
+                                                 const uint8_t *src_y,
+                                                 int32_t src_stride,
+                                                 uint8_t *dst,
+                                                 int32_t dst_stride)
+{
+    v16i8 src_hz0, src_hz1, src_hz2, src_hz3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 src_vt0, src_vt1, src_vt2, src_vt3, src_vt4;
+    v16i8 src_vt5, src_vt6, src_vt7, src_vt8;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, vert_out0, vert_out1;
+    v8i16 res0, res1;
+    v16u8 res;
+
+    LD_SB3(&luma_mask_arr[48], 16, mask0, mask1, mask2);
+    LD_SB5(src_y, src_stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+    src_y += (5 * src_stride);
+
+    src_vt0 = (v16i8) __msa_insve_w((v4i32) src_vt0, 1, (v4i32) src_vt1);
+    src_vt1 = (v16i8) __msa_insve_w((v4i32) src_vt1, 1, (v4i32) src_vt2);
+    src_vt2 = (v16i8) __msa_insve_w((v4i32) src_vt2, 1, (v4i32) src_vt3);
+    src_vt3 = (v16i8) __msa_insve_w((v4i32) src_vt3, 1, (v4i32) src_vt4);
+
+    XORI_B4_128_SB(src_vt0, src_vt1, src_vt2, src_vt3);
+    LD_SB4(src_x, src_stride, src_hz0, src_hz1, src_hz2, src_hz3);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
+    hz_out0 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src_hz0, src_hz1,
+                                                          mask0, mask1, mask2);
+    hz_out1 = AVC_XOR_VSHF_B_AND_APPLY_6TAP_HORIZ_FILT_SH(src_hz2, src_hz3,
+                                                          mask0, mask1, mask2);
+    SRARI_H2_SH(hz_out0, hz_out1, 5);
+    SAT_SH2_SH(hz_out0, hz_out1, 7);
+    LD_SB4(src_y, src_stride, src_vt5, src_vt6, src_vt7, src_vt8);
+
+    src_vt4 = (v16i8) __msa_insve_w((v4i32) src_vt4, 1, (v4i32) src_vt5);
+    src_vt5 = (v16i8) __msa_insve_w((v4i32) src_vt5, 1, (v4i32) src_vt6);
+    src_vt6 = (v16i8) __msa_insve_w((v4i32) src_vt6, 1, (v4i32) src_vt7);
+    src_vt7 = (v16i8) __msa_insve_w((v4i32) src_vt7, 1, (v4i32) src_vt8);
+
+    XORI_B4_128_SB(src_vt4, src_vt5, src_vt6, src_vt7);
+
+    /* filter calc */
+    vert_out0 = AVC_CALC_DPADD_B_6PIX_2COEFF_R_SH(src_vt0, src_vt1, src_vt2,
+                                                  src_vt3, src_vt4, src_vt5);
+    vert_out1 = AVC_CALC_DPADD_B_6PIX_2COEFF_R_SH(src_vt2, src_vt3, src_vt4,
+                                                  src_vt5, src_vt6, src_vt7);
+    SRARI_H2_SH(vert_out0, vert_out1, 5);
+    SAT_SH2_SH(vert_out0, vert_out1, 7);
+
+    res1 = __msa_srari_h((hz_out1 + vert_out1), 1);
+    res0 = __msa_srari_h((hz_out0 + vert_out0), 1);
+
+    SAT_SH2_SH(res0, res1, 7);
+    res = PCKEV_XORI128_UB(res0, res1);
+
+    dst0 = (v16u8) __msa_insve_w((v4i32) dst0, 1, (v4i32) dst1);
+    dst1 = (v16u8) __msa_insve_w((v4i32) dst2, 1, (v4i32) dst3);
+    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
+    dst0 = __msa_aver_u_b(res, dst0);
+
+    ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void avc_luma_hv_qrt_and_aver_dst_8x8_msa(const uint8_t *src_x,
+                                                 const uint8_t *src_y,
+                                                 int32_t src_stride,
+                                                 uint8_t *dst,
+                                                 int32_t dst_stride)
+{
+    uint32_t loop_cnt;
+    v16i8 src_hz0, src_hz1, src_hz2, src_hz3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 src_vt0, src_vt1, src_vt2, src_vt3;
+    v16i8 src_vt4, src_vt5, src_vt6, src_vt7, src_vt8;
+    v16i8 mask0, mask1, mask2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 vert_out0, vert_out1, vert_out2, vert_out3;
+    v8i16 out0, out1, out2, out3;
+
+    LD_SB3(&luma_mask_arr[0], 16, mask0, mask1, mask2);
+
+    LD_SB5(src_y, src_stride, src_vt0, src_vt1, src_vt2, src_vt3, src_vt4);
+    src_y += (5 * src_stride);
+
+    src_vt0 = (v16i8) __msa_insve_d((v2i64) src_vt0, 1, (v2i64) src_vt1);
+    src_vt1 = (v16i8) __msa_insve_d((v2i64) src_vt1, 1, (v2i64) src_vt2);
+    src_vt2 = (v16i8) __msa_insve_d((v2i64) src_vt2, 1, (v2i64) src_vt3);
+    src_vt3 = (v16i8) __msa_insve_d((v2i64) src_vt3, 1, (v2i64) src_vt4);
+
+    XORI_B4_128_SB(src_vt0, src_vt1, src_vt2, src_vt3);
+
+    for (loop_cnt = 2; loop_cnt--;) {
+        LD_SB4(src_x, src_stride, src_hz0, src_hz1, src_hz2, src_hz3);
+        XORI_B4_128_SB(src_hz0, src_hz1, src_hz2, src_hz3);
+        src_x += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        hz_out0 = AVC_HORZ_FILTER_SH(src_hz0, mask0, mask1, mask2);
+        hz_out1 = AVC_HORZ_FILTER_SH(src_hz1, mask0, mask1, mask2);
+        hz_out2 = AVC_HORZ_FILTER_SH(src_hz2, mask0, mask1, mask2);
+        hz_out3 = AVC_HORZ_FILTER_SH(src_hz3, mask0, mask1, mask2);
+        SRARI_H4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 5);
+        SAT_SH4_SH(hz_out0, hz_out1, hz_out2, hz_out3, 7);
+        LD_SB4(src_y, src_stride, src_vt5, src_vt6, src_vt7, src_vt8);
+        src_y += (4 * src_stride);
+
+        src_vt4 = (v16i8) __msa_insve_d((v2i64) src_vt4, 1, (v2i64) src_vt5);
+        src_vt5 = (v16i8) __msa_insve_d((v2i64) src_vt5, 1, (v2i64) src_vt6);
+        src_vt6 = (v16i8) __msa_insve_d((v2i64) src_vt6, 1, (v2i64) src_vt7);
+        src_vt7 = (v16i8) __msa_insve_d((v2i64) src_vt7, 1, (v2i64) src_vt8);
+
+        XORI_B4_128_SB(src_vt4, src_vt5, src_vt6, src_vt7);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src_vt0, src_vt1, src_vt2, src_vt3,
+                                        src_vt4, src_vt5, vert_out0, vert_out1);
+        AVC_CALC_DPADD_B_6PIX_2COEFF_SH(src_vt2, src_vt3, src_vt4, src_vt5,
+                                        src_vt6, src_vt7, vert_out2, vert_out3);
+        SRARI_H4_SH(vert_out0, vert_out1, vert_out2, vert_out3, 5);
+        SAT_SH4_SH(vert_out0, vert_out1, vert_out2, vert_out3, 7);
+
+        out0 = __msa_srari_h((hz_out0 + vert_out0), 1);
+        out1 = __msa_srari_h((hz_out1 + vert_out1), 1);
+        out2 = __msa_srari_h((hz_out2 + vert_out2), 1);
+        out3 = __msa_srari_h((hz_out3 + vert_out3), 1);
+
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
+                                dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src_vt0 = src_vt4;
+        src_vt1 = src_vt5;
+        src_vt2 = src_vt6;
+        src_vt3 = src_vt7;
+        src_vt4 = src_vt8;
+    }
+}
+
+static void avc_luma_hv_qrt_and_aver_dst_16x16_msa(const uint8_t *src_x,
+                                                   const uint8_t *src_y,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride)
+{
+    uint32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        avc_luma_hv_qrt_and_aver_dst_8x8_msa(src_x, src_y, src_stride,
+                                             dst, dst_stride);
+
+        src_x += 8;
+        src_y += 8;
+        dst += 8;
+    }
+
+    src_x += (8 * src_stride) - 16;
+    src_y += (8 * src_stride) - 16;
+    dst += (8 * dst_stride) - 16;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        avc_luma_hv_qrt_and_aver_dst_8x8_msa(src_x, src_y, src_stride,
+                                             dst, dst_stride);
+
+        src_x += 8;
+        src_y += 8;
+        dst += 8;
+    }
+}
+
+static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride,
+                            int32_t height)
+{
+    int32_t cnt;
+    uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    if (0 == height % 12) {
+        for (cnt = (height / 12); cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+            out4 = __msa_copy_u_d((v2i64) src4, 0);
+            out5 = __msa_copy_u_d((v2i64) src5, 0);
+            out6 = __msa_copy_u_d((v2i64) src6, 0);
+            out7 = __msa_copy_u_d((v2i64) src7, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+            SD4(out4, out5, out6, out7, dst, dst_stride);
+            dst += (4 * dst_stride);
+
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 8) {
+        for (cnt = height >> 3; cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+            out4 = __msa_copy_u_d((v2i64) src4, 0);
+            out5 = __msa_copy_u_d((v2i64) src5, 0);
+            out6 = __msa_copy_u_d((v2i64) src6, 0);
+            out7 = __msa_copy_u_d((v2i64) src7, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+            SD4(out4, out5, out6, out7, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 4) {
+        for (cnt = (height / 4); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 2) {
+        for (cnt = (height / 2); cnt--;) {
+            LD_UB2(src, src_stride, src0, src1);
+            src += (2 * src_stride);
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+
+            SD(out0, dst);
+            dst += dst_stride;
+            SD(out1, dst);
+            dst += dst_stride;
+        }
+    }
+}
+
+static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  int32_t height, int32_t width)
+{
+    int32_t cnt, loop_cnt;
+    const uint8_t *src_tmp;
+    uint8_t *dst_tmp;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    for (cnt = (width >> 4); cnt--;) {
+        src_tmp = src;
+        dst_tmp = dst;
+
+        for (loop_cnt = (height >> 3); loop_cnt--;) {
+            LD_UB8(src_tmp, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src_tmp += (8 * src_stride);
+
+            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+                   dst_tmp, dst_stride);
+            dst_tmp += (8 * dst_stride);
+        }
+
+        src += 16;
+        dst += 16;
+    }
+}
+
+static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
+                             uint8_t *dst, int32_t dst_stride,
+                             int32_t height)
+{
+    int32_t cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    if (0 == height % 12) {
+        for (cnt = (height / 12); cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+                   dst, dst_stride);
+            dst += (8 * dst_stride);
+
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 8) {
+        copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
+    } else if (0 == height % 4) {
+        for (cnt = (height >> 2); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    }
+}
+
+static void avg_width4_msa(const uint8_t *src, int32_t src_stride,
+                           uint8_t *dst, int32_t dst_stride,
+                           int32_t height)
+{
+    int32_t cnt;
+    uint32_t out0, out1, out2, out3;
+    v16u8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+
+    if (0 == (height % 4)) {
+        for (cnt = (height / 4); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+            AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                        dst0, dst1, dst2, dst3);
+
+            out0 = __msa_copy_u_w((v4i32) dst0, 0);
+            out1 = __msa_copy_u_w((v4i32) dst1, 0);
+            out2 = __msa_copy_u_w((v4i32) dst2, 0);
+            out3 = __msa_copy_u_w((v4i32) dst3, 0);
+            SW4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == (height % 2)) {
+        for (cnt = (height / 2); cnt--;) {
+            LD_UB2(src, src_stride, src0, src1);
+            src += (2 * src_stride);
+
+            LD_UB2(dst, dst_stride, dst0, dst1);
+
+            AVER_UB2_UB(src0, dst0, src1, dst1, dst0, dst1);
+
+            out0 = __msa_copy_u_w((v4i32) dst0, 0);
+            out1 = __msa_copy_u_w((v4i32) dst1, 0);
+            SW(out0, dst);
+            dst += dst_stride;
+            SW(out1, dst);
+            dst += dst_stride;
+        }
+    }
+}
+
+static void avg_width8_msa(const uint8_t *src, int32_t src_stride,
+                           uint8_t *dst, int32_t dst_stride,
+                           int32_t height)
+{
+    int32_t cnt;
+    uint64_t out0, out1, out2, out3;
+    v16u8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+
+    for (cnt = (height / 4); cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                    dst0, dst1, dst2, dst3);
+
+        out0 = __msa_copy_u_d((v2i64) dst0, 0);
+        out1 = __msa_copy_u_d((v2i64) dst1, 0);
+        out2 = __msa_copy_u_d((v2i64) dst2, 0);
+        out3 = __msa_copy_u_d((v2i64) dst3, 0);
+        SD4(out0, out1, out2, out3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avg_width16_msa(const uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride,
+                            int32_t height)
+{
+    int32_t cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+
+    for (cnt = (height / 8); cnt--;) {
+        LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+        src += (8 * src_stride);
+        LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                    dst0, dst1, dst2, dst3);
+        AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+                    dst4, dst5, dst6, dst7);
+        ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, dst_stride);
+        dst += (8 * dst_stride);
+    }
+}
+
+void ff_put_h264_qpel16_mc00_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    copy_width16_msa(src, stride, dst, stride, 16);
+}
+
+void ff_put_h264_qpel8_mc00_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    copy_width8_msa(src, stride, dst, stride, 8);
+}
+
+void ff_avg_h264_qpel16_mc00_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avg_width16_msa(src, stride, dst, stride, 16);
+}
+
+void ff_avg_h264_qpel8_mc00_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avg_width8_msa(src, stride, dst, stride, 8);
+}
+
+void ff_avg_h264_qpel4_mc00_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avg_width4_msa(src, stride, dst, stride, 4);
+}
+
+void ff_put_h264_qpel16_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_16w_msa(src - 2, stride, dst, stride, 16, 0);
+}
+
+void ff_put_h264_qpel16_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_16w_msa(src - 2, stride, dst, stride, 16, 1);
+}
+
+void ff_put_h264_qpel8_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_8w_msa(src - 2, stride, dst, stride, 8, 0);
+}
+
+void ff_put_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_8w_msa(src - 2, stride, dst, stride, 8, 1);
+}
+
+void ff_put_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_4w_msa(src - 2, stride, dst, stride, 4, 0);
+}
+
+void ff_put_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_4w_msa(src - 2, stride, dst, stride, 4, 1);
+}
+
+void ff_put_h264_qpel16_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hz_16w_msa(src - 2, stride, dst, stride, 16);
+}
+
+void ff_put_h264_qpel8_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_8w_msa(src - 2, stride, dst, stride, 8);
+}
+
+void ff_put_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_4w_msa(src - 2, stride, dst, stride, 4);
+}
+
+void ff_put_h264_qpel16_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_16w_msa(src - (stride * 2), stride, dst, stride, 16, 0);
+}
+
+void ff_put_h264_qpel16_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_16w_msa(src - (stride * 2), stride, dst, stride, 16, 1);
+}
+
+void ff_put_h264_qpel8_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_8w_msa(src - (stride * 2), stride, dst, stride, 8, 0);
+}
+
+void ff_put_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_8w_msa(src - (stride * 2), stride, dst, stride, 8, 1);
+}
+
+void ff_put_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_4w_msa(src - (stride * 2), stride, dst, stride, 4, 0);
+}
+
+void ff_put_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_4w_msa(src - (stride * 2), stride, dst, stride, 4, 1);
+}
+
+void ff_put_h264_qpel16_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_16w_msa(src - 2,
+                            src - (stride * 2), stride, dst, stride, 16);
+}
+
+void ff_put_h264_qpel16_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_16w_msa(src - 2,
+                            src - (stride * 2) +
+                            sizeof(uint8_t), stride, dst, stride, 16);
+}
+
+void ff_put_h264_qpel16_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_16w_msa(src + stride - 2,
+                            src - (stride * 2), stride, dst, stride, 16);
+}
+
+void ff_put_h264_qpel16_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_16w_msa(src + stride - 2,
+                            src - (stride * 2) +
+                            sizeof(uint8_t), stride, dst, stride, 16);
+}
+
+void ff_put_h264_qpel8_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_8w_msa(src - 2, src - (stride * 2), stride, dst, stride, 8);
+}
+
+void ff_put_h264_qpel8_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_8w_msa(src - 2,
+                           src - (stride * 2) +
+                           sizeof(uint8_t), stride, dst, stride, 8);
+}
+
+void ff_put_h264_qpel8_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_8w_msa(src + stride - 2,
+                           src - (stride * 2), stride, dst, stride, 8);
+}
+
+void ff_put_h264_qpel8_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_8w_msa(src + stride - 2,
+                           src - (stride * 2) +
+                           sizeof(uint8_t), stride, dst, stride, 8);
+}
+
+
+void ff_put_h264_qpel4_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_4w_msa(src - 2, src - (stride * 2), stride, dst, stride, 4);
+}
+
+void ff_put_h264_qpel4_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_4w_msa(src - 2,
+                           src - (stride * 2) +
+                           sizeof(uint8_t), stride, dst, stride, 4);
+}
+
+void ff_put_h264_qpel4_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_4w_msa(src + stride - 2,
+                           src - (stride * 2), stride, dst, stride, 4);
+}
+
+void ff_put_h264_qpel4_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_4w_msa(src + stride - 2,
+                           src - (stride * 2) +
+                           sizeof(uint8_t), stride, dst, stride, 4);
+}
+
+void ff_put_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_16w_msa(src - (2 * stride) - 2,
+                              stride, dst, stride, 16, 0);
+}
+
+void ff_put_h264_qpel16_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_16w_msa(src - (2 * stride) - 2,
+                              stride, dst, stride, 16, 1);
+}
+
+void ff_put_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_8w_msa(src - (2 * stride) - 2, stride, dst, stride, 8, 0);
+}
+
+void ff_put_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_8w_msa(src - (2 * stride) - 2, stride, dst, stride, 8, 1);
+}
+
+void ff_put_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_4w_msa(src - (2 * stride) - 2, stride, dst, stride, 4, 0);
+}
+
+void ff_put_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_4w_msa(src - (2 * stride) - 2, stride, dst, stride, 4, 1);
+}
+
+void ff_put_h264_qpel16_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_vt_16w_msa(src - (stride * 2), stride, dst, stride, 16);
+}
+
+void ff_put_h264_qpel8_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_8w_msa(src - (stride * 2), stride, dst, stride, 8);
+}
+
+void ff_put_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_4w_msa(src - (stride * 2), stride, dst, stride, 4);
+}
+
+void ff_put_h264_qpel16_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_16w_msa(src - (2 * stride) - 2,
+                              stride, dst, stride, 16, 0);
+}
+
+void ff_put_h264_qpel16_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_16w_msa(src - (2 * stride) - 2,
+                              stride, dst, stride, 16, 1);
+}
+
+void ff_put_h264_qpel8_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_8w_msa(src - (2 * stride) - 2, stride, dst, stride, 8, 0);
+}
+
+void ff_put_h264_qpel8_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_8w_msa(src - (2 * stride) - 2, stride, dst, stride, 8, 1);
+}
+
+void ff_put_h264_qpel4_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_4w_msa(src - (2 * stride) - 2, stride, dst, stride, 4, 0);
+}
+
+void ff_put_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_4w_msa(src - (2 * stride) - 2, stride, dst, stride, 4, 1);
+}
+
+void ff_put_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_mid_16w_msa(src - (2 * stride) - 2, stride, dst, stride, 16);
+}
+
+void ff_put_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_mid_8w_msa(src - (2 * stride) - 2, stride, dst, stride, 8);
+}
+
+void ff_put_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_mid_4w_msa(src - (2 * stride) - 2, stride, dst, stride, 4);
+}
+
+void ff_avg_h264_qpel16_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_and_aver_dst_16x16_msa(src - 2, stride, dst, stride, 0);
+}
+
+void ff_avg_h264_qpel16_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_and_aver_dst_16x16_msa(src - 2, stride, dst, stride, 1);
+}
+
+void ff_avg_h264_qpel8_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_and_aver_dst_8x8_msa(src - 2, stride, dst, stride, 0);
+}
+
+void ff_avg_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_and_aver_dst_8x8_msa(src - 2, stride, dst, stride, 1);
+}
+
+void ff_avg_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_and_aver_dst_4x4_msa(src - 2, stride, dst, stride, 0);
+}
+
+void ff_avg_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_qrt_and_aver_dst_4x4_msa(src - 2, stride, dst, stride, 1);
+}
+
+void ff_avg_h264_qpel16_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hz_and_aver_dst_16x16_msa(src - 2, stride, dst, stride);
+}
+
+void ff_avg_h264_qpel8_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_and_aver_dst_8x8_msa(src - 2, stride, dst, stride);
+}
+
+void ff_avg_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hz_and_aver_dst_4x4_msa(src - 2, stride, dst, stride);
+}
+
+void ff_avg_h264_qpel16_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_and_aver_dst_16x16_msa(src - (stride * 2),
+                                           stride, dst, stride, 0);
+}
+
+void ff_avg_h264_qpel16_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_and_aver_dst_16x16_msa(src - (stride * 2),
+                                           stride, dst, stride, 1);
+}
+
+void ff_avg_h264_qpel8_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_and_aver_dst_8x8_msa(src - (stride * 2),
+                                         stride, dst, stride, 0);
+}
+
+void ff_avg_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_and_aver_dst_8x8_msa(src - (stride * 2),
+                                         stride, dst, stride, 1);
+}
+
+void ff_avg_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_and_aver_dst_4x4_msa(src - (stride * 2),
+                                         stride, dst, stride, 0);
+}
+
+void ff_avg_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_qrt_and_aver_dst_4x4_msa(src - (stride * 2),
+                                         stride, dst, stride, 1);
+}
+
+void ff_avg_h264_qpel16_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_16x16_msa(src - 2,
+                                           src - (stride * 2),
+                                           stride, dst, stride);
+}
+
+void ff_avg_h264_qpel16_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_16x16_msa(src - 2,
+                                           src - (stride * 2) +
+                                           sizeof(uint8_t), stride,
+                                           dst, stride);
+}
+
+void ff_avg_h264_qpel16_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_16x16_msa(src + stride - 2,
+                                           src - (stride * 2),
+                                           stride, dst, stride);
+}
+
+void ff_avg_h264_qpel16_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_16x16_msa(src + stride - 2,
+                                           src - (stride * 2) +
+                                           sizeof(uint8_t), stride,
+                                           dst, stride);
+}
+
+void ff_avg_h264_qpel8_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_8x8_msa(src - 2,
+                                         src - (stride * 2),
+                                         stride, dst, stride);
+}
+
+void ff_avg_h264_qpel8_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_8x8_msa(src - 2,
+                                         src - (stride * 2) +
+                                         sizeof(uint8_t), stride, dst, stride);
+}
+
+void ff_avg_h264_qpel8_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_8x8_msa(src + stride - 2,
+                                         src - (stride * 2),
+                                         stride, dst, stride);
+}
+
+void ff_avg_h264_qpel8_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_8x8_msa(src + stride - 2,
+                                         src - (stride * 2) +
+                                         sizeof(uint8_t), stride, dst, stride);
+}
+
+
+void ff_avg_h264_qpel4_mc11_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_4x4_msa(src - 2,
+                                         src - (stride * 2),
+                                         stride, dst, stride);
+}
+
+void ff_avg_h264_qpel4_mc31_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_4x4_msa(src - 2,
+                                         src - (stride * 2) +
+                                         sizeof(uint8_t), stride, dst, stride);
+}
+
+void ff_avg_h264_qpel4_mc13_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_4x4_msa(src + stride - 2,
+                                         src - (stride * 2),
+                                         stride, dst, stride);
+}
+
+void ff_avg_h264_qpel4_mc33_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_hv_qrt_and_aver_dst_4x4_msa(src + stride - 2,
+                                         src - (stride * 2) +
+                                         sizeof(uint8_t), stride, dst, stride);
+}
+
+void ff_avg_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_and_aver_dst_16w_msa(src - (2 * stride) - 2,
+                                           stride, dst, stride, 16, 0);
+}
+
+void ff_avg_h264_qpel16_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_and_aver_dst_16w_msa(src - (2 * stride) - 2,
+                                           stride, dst, stride, 16, 1);
+}
+
+void ff_avg_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_and_aver_dst_8w_msa(src - (2 * stride) - 2,
+                                          stride, dst, stride, 8, 0);
+}
+
+void ff_avg_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_and_aver_dst_8w_msa(src - (2 * stride) - 2,
+                                          stride, dst, stride, 8, 1);
+}
+
+void ff_avg_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_and_aver_dst_4w_msa(src - (2 * stride) - 2,
+                                          stride, dst, stride, 4, 0);
+}
+
+void ff_avg_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midv_qrt_and_aver_dst_4w_msa(src - (2 * stride) - 2,
+                                          stride, dst, stride, 4, 1);
+}
+
+void ff_avg_h264_qpel16_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_vt_and_aver_dst_16x16_msa(src - (stride * 2), stride, dst, stride);
+}
+
+void ff_avg_h264_qpel8_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_and_aver_dst_8x8_msa(src - (stride * 2), stride, dst, stride);
+}
+
+void ff_avg_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_vt_and_aver_dst_4x4_msa(src - (stride * 2), stride, dst, stride);
+}
+
+void ff_avg_h264_qpel16_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_and_aver_dst_16w_msa(src - (2 * stride) - 2,
+                                           stride, dst, stride, 16, 0);
+}
+
+void ff_avg_h264_qpel16_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_and_aver_dst_16w_msa(src - (2 * stride) - 2,
+                                           stride, dst, stride, 16, 1);
+}
+
+void ff_avg_h264_qpel8_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_and_aver_dst_8w_msa(src - (2 * stride) - 2,
+                                          stride, dst, stride, 8, 0);
+}
+
+void ff_avg_h264_qpel8_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_and_aver_dst_8w_msa(src - (2 * stride) - 2,
+                                          stride, dst, stride, 8, 1);
+}
+
+void ff_avg_h264_qpel4_mc12_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_and_aver_dst_4w_msa(src - (2 * stride) - 2,
+                                          stride, dst, stride, 4, 0);
+}
+
+void ff_avg_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_midh_qrt_and_aver_dst_4w_msa(src - (2 * stride) - 2,
+                                          stride, dst, stride, 4, 1);
+}
+
+void ff_avg_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    avc_luma_mid_and_aver_dst_16x16_msa(src - (2 * stride) - 2,
+                                        stride, dst, stride);
+}
+
+void ff_avg_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_mid_and_aver_dst_8w_msa(src - (2 * stride) - 2,
+                                     stride, dst, stride, 8);
+}
+
+void ff_avg_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src,
+                                ptrdiff_t stride)
+{
+    avc_luma_mid_and_aver_dst_4x4_msa(src - (2 * stride) - 2,
+                                      stride, dst, stride);
+}
diff --git a/libavcodec/mips/hevc_idct_msa.c b/libavcodec/mips/hevc_idct_msa.c
index b5a4c5ad..975d91f8 100644
--- a/libavcodec/mips/hevc_idct_msa.c
+++ b/libavcodec/mips/hevc_idct_msa.c
@@ -21,18 +21,18 @@
 #include "libavutil/mips/generic_macros_msa.h"
 #include "libavcodec/mips/hevcdsp_mips.h"
 
-static int16_t gt8x8_cnst[16] = {
+static const int16_t gt8x8_cnst[16] = {
     64, 64, 83, 36, 89, 50, 18, 75, 64, -64, 36, -83, 75, -89, -50, -18
 };
 
-static int16_t gt16x16_cnst[64] = {
+static const int16_t gt16x16_cnst[64] = {
     64, 83, 64, 36, 89, 75, 50, 18, 90, 80, 57, 25, 70, 87, 9, 43,
     64, 36, -64, -83, 75, -18, -89, -50, 87, 9, -80, -70, -43, 57, -25, -90,
     64, -36, -64, 83, 50, -89, 18, 75, 80, -70, -25, 90, -87, 9, 43, 57,
     64, -83, 64, -36, 18, -50, 75, -89, 70, -87, 90, -80, 9, -43, -57, 25
 };
 
-static int16_t gt32x32_cnst0[256] = {
+static const int16_t gt32x32_cnst0[256] = {
     90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4,
     90, 82, 67, 46, 22, -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13,
     88, 67, 31, -13, -54, -82, -90, -78, -46, -4, 38, 73, 90, 85, 61, 22,
@@ -51,18 +51,18 @@ static int16_t gt32x32_cnst0[256] = {
     4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90
 };
 
-static int16_t gt32x32_cnst1[64] = {
+static const int16_t gt32x32_cnst1[64] = {
     90, 87, 80, 70, 57, 43, 25, 9, 87, 57, 9, -43, -80, -90, -70, -25,
     80, 9, -70, -87, -25, 57, 90, 43, 70, -43, -87, 9, 90, 25, -80, -57,
     57, -80, -25, 90, -9, -87, 43, 70, 43, -90, 57, 25, -87, 70, 9, -80,
     25, -70, 90, -80, 43, 9, -57, 87, 9, -25, 43, -57, 70, -80, 87, -90
 };
 
-static int16_t gt32x32_cnst2[16] = {
+static const int16_t gt32x32_cnst2[16] = {
     89, 75, 50, 18, 75, -18, -89, -50, 50, -89, 18, 75, 18, -50, 75, -89
 };
 
-static int16_t gt32x32_cnst3[16] = {
+static const int16_t gt32x32_cnst3[16] = {
     64, 64, 64, 64, 83, 36, -36, -83, 64, -64, -64, 64, 36, -83, 83, -36
 };
 
diff --git a/libavcodec/mips/hevc_lpf_sao_msa.c b/libavcodec/mips/hevc_lpf_sao_msa.c
new file mode 100644
index 00000000..da1db51e
--- /dev/null
+++ b/libavcodec/mips/hevc_lpf_sao_msa.c
@@ -0,0 +1,2088 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "libavcodec/mips/hevcdsp_mips.h"
+
+static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
+                                         int32_t beta, int32_t *tc,
+                                         uint8_t *p_is_pcm, uint8_t *q_is_pcm)
+{
+    uint8_t *p3 = src - (stride << 2);
+    uint8_t *p2 = src - ((stride << 1) + stride);
+    uint8_t *p1 = src - (stride << 1);
+    uint8_t *p0 = src - stride;
+    uint8_t *q0 = src;
+    uint8_t *q1 = src + stride;
+    uint8_t *q2 = src + (stride << 1);
+    uint8_t *q3 = src + (stride << 1) + stride;
+    uint8_t flag0, flag1;
+    int32_t dp00, dq00, dp30, dq30, d00, d30;
+    int32_t dp04, dq04, dp34, dq34, d04, d34;
+    int32_t tc0, p_is_pcm0, q_is_pcm0, beta30, beta20, tc250;
+    int32_t tc4, p_is_pcm4, q_is_pcm4, tc254, tmp;
+    uint64_t dst_val0, dst_val1;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5;
+    v2i64 cmp0, cmp1, cmp2, p_is_pcm_vec, q_is_pcm_vec;
+    v8u16 temp0, temp1;
+    v8i16 temp2;
+    v8i16 tc_pos, tc_neg;
+    v8i16 diff0, diff1, delta0, delta1, delta2, abs_delta0;
+    v16i8 zero = { 0 };
+    v8u16 p3_src, p2_src, p1_src, p0_src, q0_src, q1_src, q2_src, q3_src;
+
+    dp00 = abs(p2[0] - (p1[0] << 1) + p0[0]);
+    dq00 = abs(q2[0] - (q1[0] << 1) + q0[0]);
+    dp30 = abs(p2[3] - (p1[3] << 1) + p0[3]);
+    dq30 = abs(q2[3] - (q1[3] << 1) + q0[3]);
+    d00 = dp00 + dq00;
+    d30 = dp30 + dq30;
+    p_is_pcm0 = p_is_pcm[0];
+    q_is_pcm0 = q_is_pcm[0];
+    dp04 = abs(p2[4] - (p1[4] << 1) + p0[4]);
+    dq04 = abs(q2[4] - (q1[4] << 1) + q0[4]);
+    dp34 = abs(p2[7] - (p1[7] << 1) + p0[7]);
+    dq34 = abs(q2[7] - (q1[7] << 1) + q0[7]);
+    d04 = dp04 + dq04;
+    d34 = dp34 + dq34;
+    p_is_pcm4 = p_is_pcm[1];
+    q_is_pcm4 = q_is_pcm[1];
+
+    if (!p_is_pcm0 || !p_is_pcm4 || !q_is_pcm0 || !q_is_pcm4) {
+        if (!(d00 + d30 >= beta) || !(d04 + d34 >= beta)) {
+            p3_src = LD_UH(p3);
+            p2_src = LD_UH(p2);
+            p1_src = LD_UH(p1);
+            p0_src = LD_UH(p0);
+            q0_src = LD_UH(q0);
+            q1_src = LD_UH(q1);
+            q2_src = LD_UH(q2);
+            q3_src = LD_UH(q3);
+
+            tc0 = tc[0];
+            beta30 = beta >> 3;
+            beta20 = beta >> 2;
+            tc250 = ((tc0 * 5 + 1) >> 1);
+            tc4 = tc[1];
+            tc254 = ((tc4 * 5 + 1) >> 1);
+
+            flag0 = (abs(p3[0] - p0[0]) + abs(q3[0] - q0[0]) < beta30 &&
+                     abs(p0[0] - q0[0]) < tc250 &&
+                     abs(p3[3] - p0[3]) + abs(q3[3] - q0[3]) < beta30 &&
+                     abs(p0[3] - q0[3]) < tc250 &&
+                     (d00 << 1) < beta20 && (d30 << 1) < beta20);
+            cmp0 = __msa_fill_d(flag0);
+
+            flag1 = (abs(p3[4] - p0[4]) + abs(q3[4] - q0[4]) < beta30 &&
+                     abs(p0[4] - q0[4]) < tc254 &&
+                     abs(p3[7] - p0[7]) + abs(q3[7] - q0[7]) < beta30 &&
+                     abs(p0[7] - q0[7]) < tc254 &&
+                     (d04 << 1) < beta20 && (d34 << 1) < beta20);
+            cmp1 = __msa_fill_d(flag1);
+            cmp2 = __msa_ilvev_d(cmp1, cmp0);
+            cmp2 = __msa_ceqi_d(cmp2, 0);
+
+            ILVR_B8_UH(zero, p3_src, zero, p2_src, zero, p1_src, zero, p0_src,
+                       zero, q0_src, zero, q1_src, zero, q2_src, zero, q3_src,
+                       p3_src, p2_src, p1_src, p0_src, q0_src, q1_src, q2_src,
+                       q3_src);
+
+            cmp0 = (v2i64) __msa_fill_h(tc0);
+            cmp1 = (v2i64) __msa_fill_h(tc4);
+            tc_pos = (v8i16) __msa_ilvev_d(cmp1, cmp0);
+            tc_pos <<= 1;
+            tc_neg = -tc_pos;
+
+            temp0 = (p1_src + p0_src + q0_src);
+            temp1 = ((p3_src + p2_src) << 1) + p2_src + temp0;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
+            temp2 = (v8i16) (temp1 - p2_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst0 = (v16u8) (temp2 + (v8i16) p2_src);
+
+            temp1 = temp0 + p2_src;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
+            temp2 = (v8i16) (temp1 - p1_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst1 = (v16u8) (temp2 + (v8i16) p1_src);
+
+            temp1 = (temp0 << 1) + p2_src + q1_src;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
+            temp2 = (v8i16) (temp1 - p0_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst2 = (v16u8) (temp2 + (v8i16) p0_src);
+
+            cmp0 = __msa_fill_d(p_is_pcm0);
+            cmp1 = __msa_fill_d(p_is_pcm4);
+            p_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+            p_is_pcm_vec = __msa_ceqi_d(p_is_pcm_vec, 0);
+
+            dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, (v16u8) p_is_pcm_vec);
+            dst1 = __msa_bmz_v(dst1, (v16u8) p1_src, (v16u8) p_is_pcm_vec);
+            dst2 = __msa_bmz_v(dst2, (v16u8) p0_src, (v16u8) p_is_pcm_vec);
+
+            temp0 = (q1_src + p0_src + q0_src);
+
+            temp1 = ((q3_src + q2_src) << 1) + q2_src + temp0;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
+            temp2 = (v8i16) (temp1 - q2_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst5 = (v16u8) (temp2 + (v8i16) q2_src);
+
+            temp1 = temp0 + q2_src;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
+            temp2 = (v8i16) (temp1 - q1_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst4 = (v16u8) (temp2 + (v8i16) q1_src);
+
+            temp1 = (temp0 << 1) + p1_src + q2_src;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
+            temp2 = (v8i16) (temp1 - q0_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst3 = (v16u8) (temp2 + (v8i16) q0_src);
+
+            cmp0 = __msa_fill_d(q_is_pcm0);
+            cmp1 = __msa_fill_d(q_is_pcm4);
+            q_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+            q_is_pcm_vec = __msa_ceqi_d(q_is_pcm_vec, 0);
+
+            dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, (v16u8) q_is_pcm_vec);
+            dst4 = __msa_bmz_v(dst4, (v16u8) q1_src, (v16u8) q_is_pcm_vec);
+            dst5 = __msa_bmz_v(dst5, (v16u8) q2_src, (v16u8) q_is_pcm_vec);
+
+            tc_pos >>= 1;
+            tc_neg = -tc_pos;
+
+            diff0 = (v8i16) (q0_src - p0_src);
+            diff1 = (v8i16) (q1_src - p1_src);
+            diff0 = (diff0 << 3) + diff0;
+            diff1 = (diff1 << 1) + diff1;
+            delta0 = diff0 - diff1;
+            delta0 = __msa_srari_h(delta0, 4);
+
+            temp1 = (v8u16) ((tc_pos << 3) + (tc_pos << 1));
+            abs_delta0 = __msa_add_a_h(delta0, (v8i16) zero);
+            abs_delta0 = (v8u16) abs_delta0 < temp1;
+
+            delta0 = CLIP_SH(delta0, tc_neg, tc_pos);
+
+            temp0 = (v8u16) (delta0 + p0_src);
+            temp0 = (v8u16) CLIP_SH_0_255(temp0);
+            temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+                                        (v16u8) p_is_pcm_vec);
+
+            temp2 = (v8i16) (q0_src - delta0);
+            temp2 = CLIP_SH_0_255(temp2);
+            temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
+                                        (v16u8) q_is_pcm_vec);
+
+            tmp = (beta + (beta >> 1)) >> 3;
+            cmp0 = __msa_fill_d(!p_is_pcm0 && ((dp00 + dp30) < tmp));
+            cmp1 = __msa_fill_d(!p_is_pcm4 && ((dp04 + dp34) < tmp));
+            p_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+            p_is_pcm_vec = __msa_ceqi_d(p_is_pcm_vec, 0);
+
+            cmp0 = (v2i64) __msa_fill_h((!q_is_pcm0) && (dq00 + dq30 < tmp));
+            cmp1 = (v2i64) __msa_fill_h((!q_is_pcm4) && (dq04 + dq34 < tmp));
+            q_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+            q_is_pcm_vec = __msa_ceqi_d(q_is_pcm_vec, 0);
+
+            tc_pos >>= 1;
+            tc_neg = -tc_pos;
+
+            delta1 = (v8i16) __msa_aver_u_h(p2_src, p0_src);
+            delta1 -= (v8i16) p1_src;
+            delta1 += delta0;
+            delta1 >>= 1;
+            delta1 = CLIP_SH(delta1, tc_neg, tc_pos);
+            delta1 = (v8i16) p1_src + (v8i16) delta1;
+            delta1 = CLIP_SH_0_255(delta1);
+            delta1 = (v8i16) __msa_bmnz_v((v16u8) delta1, (v16u8) p1_src,
+                                          (v16u8) p_is_pcm_vec);
+
+            delta2 = (v8i16) __msa_aver_u_h(q0_src, q2_src);
+            delta2 = delta2 - (v8i16) q1_src;
+            delta2 = delta2 - delta0;
+            delta2 = delta2 >> 1;
+            delta2 = CLIP_SH(delta2, tc_neg, tc_pos);
+            delta2 = (v8i16) q1_src + (v8i16) delta2;
+            delta2 = CLIP_SH_0_255(delta2);
+            delta2 = (v8i16) __msa_bmnz_v((v16u8) delta2, (v16u8) q1_src,
+                                          (v16u8) q_is_pcm_vec);
+
+            delta1 = (v8i16) __msa_bmz_v((v16u8) delta1, (v16u8) p1_src,
+                                         (v16u8) abs_delta0);
+            temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+                                        (v16u8) abs_delta0);
+            temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
+                                        (v16u8) abs_delta0);
+            delta2 = (v8i16) __msa_bmz_v((v16u8) delta2, (v16u8) q1_src,
+                                         (v16u8) abs_delta0);
+
+            dst2 = __msa_bmnz_v(dst2, (v16u8) temp0, (v16u8) cmp2);
+            dst3 = __msa_bmnz_v(dst3, (v16u8) temp2, (v16u8) cmp2);
+            dst1 = __msa_bmnz_v(dst1, (v16u8) delta1, (v16u8) cmp2);
+            dst4 = __msa_bmnz_v(dst4, (v16u8) delta2, (v16u8) cmp2);
+            dst0 = __msa_bmnz_v(dst0, (v16u8) p2_src, (v16u8) cmp2);
+            dst5 = __msa_bmnz_v(dst5, (v16u8) q2_src, (v16u8) cmp2);
+
+            cmp0 = __msa_fill_d(d00 + d30 >= beta);
+            cmp1 = __msa_fill_d(d04 + d34 >= beta);
+            cmp0 = __msa_ilvev_d(cmp1, cmp0);
+            cmp0 = __msa_ceqi_d(cmp0, 0);
+
+            dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, (v16u8) cmp0);
+            dst1 = __msa_bmz_v(dst1, (v16u8) p1_src, (v16u8) cmp0);
+            dst2 = __msa_bmz_v(dst2, (v16u8) p0_src, (v16u8) cmp0);
+            dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, (v16u8) cmp0);
+            dst4 = __msa_bmz_v(dst4, (v16u8) q1_src, (v16u8) cmp0);
+            dst5 = __msa_bmz_v(dst5, (v16u8) q2_src, (v16u8) cmp0);
+
+            PCKEV_B2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+            dst2 = (v16u8) __msa_pckev_b((v16i8) dst5, (v16i8) dst4);
+
+            dst_val0 = __msa_copy_u_d((v2i64) dst2, 0);
+            dst_val1 = __msa_copy_u_d((v2i64) dst2, 1);
+
+            ST8x4_UB(dst0, dst1, p2, stride);
+            p2 += (4 * stride);
+            SD(dst_val0, p2);
+            p2 += stride;
+            SD(dst_val1, p2);
+        }
+    }
+}
+
+static void hevc_loopfilter_luma_ver_msa(uint8_t *src, int32_t stride,
+                                         int32_t beta, int32_t *tc,
+                                         uint8_t *p_is_pcm, uint8_t *q_is_pcm)
+{
+    uint8_t *p3 = src;
+    uint8_t *p2 = src + 3 * stride;
+    uint8_t *p1 = src + (stride << 2);
+    uint8_t *p0 = src + 7 * stride;
+    uint8_t flag0, flag1;
+    uint16_t tmp0, tmp1;
+    uint32_t tmp2, tmp3;
+    int32_t dp00, dq00, dp30, dq30, d00, d30;
+    int32_t dp04, dq04, dp34, dq34, d04, d34;
+    int32_t tc0, p_is_pcm0, q_is_pcm0, beta30, beta20, tc250;
+    int32_t tc4, p_is_pcm4, q_is_pcm4, tc254, tmp;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v2i64 cmp0, cmp1, cmp2, p_is_pcm_vec, q_is_pcm_vec;
+    v8u16 temp0, temp1;
+    v8i16 temp2;
+    v8i16 tc_pos, tc_neg;
+    v8i16 diff0, diff1, delta0, delta1, delta2, abs_delta0;
+    v16i8 zero = { 0 };
+    v8u16 p3_src, p2_src, p1_src, p0_src, q0_src, q1_src, q2_src, q3_src;
+
+    dp00 = abs(p3[-3] - (p3[-2] << 1) + p3[-1]);
+    dq00 = abs(p3[2] - (p3[1] << 1) + p3[0]);
+    dp30 = abs(p2[-3] - (p2[-2] << 1) + p2[-1]);
+    dq30 = abs(p2[2] - (p2[1] << 1) + p2[0]);
+    d00 = dp00 + dq00;
+    d30 = dp30 + dq30;
+    p_is_pcm0 = p_is_pcm[0];
+    q_is_pcm0 = q_is_pcm[0];
+
+    dp04 = abs(p1[-3] - (p1[-2] << 1) + p1[-1]);
+    dq04 = abs(p1[2] - (p1[1] << 1) + p1[0]);
+    dp34 = abs(p0[-3] - (p0[-2] << 1) + p0[-1]);
+    dq34 = abs(p0[2] - (p0[1] << 1) + p0[0]);
+    d04 = dp04 + dq04;
+    d34 = dp34 + dq34;
+    p_is_pcm4 = p_is_pcm[1];
+    q_is_pcm4 = q_is_pcm[1];
+
+    if (!p_is_pcm0 || !p_is_pcm4 || !q_is_pcm0 || !q_is_pcm4) {
+        if (!(d00 + d30 >= beta) || !(d04 + d34 >= beta)) {
+            src -= 4;
+            LD_UH8(src, stride,
+                   p3_src, p2_src, p1_src, p0_src, q0_src, q1_src, q2_src,
+                   q3_src);
+
+            tc0 = tc[0];
+            beta30 = beta >> 3;
+            beta20 = beta >> 2;
+            tc250 = ((tc0 * 5 + 1) >> 1);
+
+            tc4 = tc[1];
+            tc254 = ((tc4 * 5 + 1) >> 1);
+
+            TRANSPOSE8x8_UB_UH(p3_src, p2_src, p1_src, p0_src, q0_src, q1_src,
+                               q2_src, q3_src, p3_src, p2_src, p1_src, p0_src,
+                               q0_src, q1_src, q2_src, q3_src);
+
+            flag0 = (abs(p3[-4] - p3[-1]) + abs(p3[3] - p3[0]) < beta30 &&
+                     abs(p3[-1] - p3[0]) < tc250 &&
+                     abs(p2[-4] - p2[-1]) + abs(p2[3] - p2[0]) < beta30 &&
+                     abs(p2[-1] - p2[0]) < tc250 &&
+                     (d00 << 1) < beta20 && (d30 << 1) < beta20);
+            cmp0 = __msa_fill_d(flag0);
+
+            flag1 = (abs(p1[-4] - p1[-1]) + abs(p1[3] - p1[0]) < beta30 &&
+                     abs(p1[-1] - p1[0]) < tc254 &&
+                     abs(p0[-4] - p0[-1]) + abs(p0[3] - p0[0]) < beta30 &&
+                     abs(p0[-1] - p0[0]) < tc254 &&
+                     (d04 << 1) < beta20 && (d34 << 1) < beta20);
+            cmp1 = __msa_fill_d(flag1);
+            cmp2 = __msa_ilvev_d(cmp1, cmp0);
+            cmp2 = __msa_ceqi_d(cmp2, 0);
+
+            ILVR_B8_UH(zero, p3_src, zero, p2_src, zero, p1_src, zero, p0_src,
+                       zero, q0_src, zero, q1_src, zero, q2_src, zero, q3_src,
+                       p3_src, p2_src, p1_src, p0_src, q0_src, q1_src, q2_src,
+                       q3_src);
+
+            cmp0 = (v2i64) __msa_fill_h(tc0 << 1);
+            cmp1 = (v2i64) __msa_fill_h(tc4 << 1);
+            tc_pos = (v8i16) __msa_ilvev_d(cmp1, cmp0);
+            tc_neg = -tc_pos;
+
+            temp0 = (p1_src + p0_src + q0_src);
+
+            temp1 = ((p3_src + p2_src) << 1) + p2_src + temp0;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
+            temp2 = (v8i16) (temp1 - p2_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst0 = (v16u8) (temp2 + (v8i16) p2_src);
+
+            temp1 = temp0 + p2_src;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
+            temp2 = (v8i16) (temp1 - p1_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst1 = (v16u8) (temp2 + (v8i16) p1_src);
+
+            temp1 = (temp0 << 1) + p2_src + q1_src;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
+            temp2 = (v8i16) (temp1 - p0_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst2 = (v16u8) (temp2 + (v8i16) p0_src);
+
+            cmp0 = __msa_fill_d(p_is_pcm0);
+            cmp1 = __msa_fill_d(p_is_pcm4);
+            p_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+            p_is_pcm_vec = __msa_ceqi_d(p_is_pcm_vec, 0);
+
+            dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, (v16u8) p_is_pcm_vec);
+            dst1 = __msa_bmz_v(dst1, (v16u8) p1_src, (v16u8) p_is_pcm_vec);
+            dst2 = __msa_bmz_v(dst2, (v16u8) p0_src, (v16u8) p_is_pcm_vec);
+
+            temp0 = (q1_src + p0_src + q0_src);
+            temp1 = ((q3_src + q2_src) << 1) + q2_src + temp0;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
+            temp2 = (v8i16) (temp1 - q2_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst5 = (v16u8) (temp2 + (v8i16) q2_src);
+
+            temp1 = temp0 + q2_src;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
+            temp2 = (v8i16) (temp1 - q1_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst4 = (v16u8) (temp2 + (v8i16) q1_src);
+
+            temp1 = (temp0 << 1) + p1_src + q2_src;
+            temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
+            temp2 = (v8i16) (temp1 - q0_src);
+            temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+            dst3 = (v16u8) (temp2 + (v8i16) q0_src);
+
+            cmp0 = __msa_fill_d(q_is_pcm0);
+            cmp1 = __msa_fill_d(q_is_pcm4);
+            q_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+            q_is_pcm_vec = __msa_ceqi_d(q_is_pcm_vec, 0);
+
+            dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, (v16u8) q_is_pcm_vec);
+            dst4 = __msa_bmz_v(dst4, (v16u8) q1_src, (v16u8) q_is_pcm_vec);
+            dst5 = __msa_bmz_v(dst5, (v16u8) q2_src, (v16u8) q_is_pcm_vec);
+
+            tc_pos >>= 1;
+            tc_neg = -tc_pos;
+
+            diff0 = (v8i16) (q0_src - p0_src);
+            diff1 = (v8i16) (q1_src - p1_src);
+            diff0 = (v8i16) (diff0 << 3) + diff0;
+            diff1 = (v8i16) (diff1 << 1) + diff1;
+            delta0 = diff0 - diff1;
+            delta0 = __msa_srari_h(delta0, 4);
+
+            temp1 = (v8u16) ((tc_pos << 3) + (tc_pos << 1));
+            abs_delta0 = __msa_add_a_h(delta0, (v8i16) zero);
+            abs_delta0 = (v8u16) abs_delta0 < temp1;
+
+            delta0 = CLIP_SH(delta0, tc_neg, tc_pos);
+            temp0 = (v8u16) delta0 + p0_src;
+            temp0 = (v8u16) CLIP_SH_0_255(temp0);
+            temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+                                        (v16u8) p_is_pcm_vec);
+
+            temp2 = (v8i16) q0_src - delta0;
+            temp2 = CLIP_SH_0_255(temp2);
+            temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
+                                        (v16u8) q_is_pcm_vec);
+
+            tmp = ((beta + (beta >> 1)) >> 3);
+            cmp0 = __msa_fill_d(!p_is_pcm0 && (dp00 + dp30 < tmp));
+            cmp1 = __msa_fill_d(!p_is_pcm4 && (dp04 + dp34 < tmp));
+            p_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+            p_is_pcm_vec = __msa_ceqi_d(p_is_pcm_vec, 0);
+
+            cmp0 = (v2i64) __msa_fill_h((!q_is_pcm0) && (dq00 + dq30 < tmp));
+            cmp1 = (v2i64) __msa_fill_h((!q_is_pcm4) && (dq04 + dq34 < tmp));
+            q_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+            q_is_pcm_vec = __msa_ceqi_d(q_is_pcm_vec, 0);
+
+            tc_pos >>= 1;
+            tc_neg = -tc_pos;
+
+            delta1 = (v8i16) __msa_aver_u_h(p2_src, p0_src);
+            delta1 -= (v8i16) p1_src;
+            delta1 += delta0;
+            delta1 >>= 1;
+            delta1 = CLIP_SH(delta1, tc_neg, tc_pos);
+            delta1 = (v8i16) p1_src + (v8i16) delta1;
+            delta1 = CLIP_SH_0_255(delta1);
+            delta1 = (v8i16) __msa_bmnz_v((v16u8) delta1, (v16u8) p1_src,
+                                          (v16u8) p_is_pcm_vec);
+
+            delta2 = (v8i16) __msa_aver_u_h(q0_src, q2_src);
+            delta2 = delta2 - (v8i16) q1_src;
+            delta2 = delta2 - delta0;
+            delta2 = delta2 >> 1;
+            delta2 = CLIP_SH(delta2, tc_neg, tc_pos);
+            delta2 = (v8i16) q1_src + (v8i16) delta2;
+            delta2 = CLIP_SH_0_255(delta2);
+            delta2 = (v8i16) __msa_bmnz_v((v16u8) delta2, (v16u8) q1_src,
+                                          (v16u8) q_is_pcm_vec);
+            delta1 = (v8i16) __msa_bmz_v((v16u8) delta1, (v16u8) p1_src,
+                                         (v16u8) abs_delta0);
+            temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+                                        (v16u8) abs_delta0);
+            temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
+                                        (v16u8) abs_delta0);
+            delta2 = (v8i16) __msa_bmz_v((v16u8) delta2, (v16u8) q1_src,
+                                         (v16u8) abs_delta0);
+
+            dst2 = __msa_bmnz_v(dst2, (v16u8) temp0, (v16u8) cmp2);
+            dst3 = __msa_bmnz_v(dst3, (v16u8) temp2, (v16u8) cmp2);
+            dst1 = __msa_bmnz_v(dst1, (v16u8) delta1, (v16u8) cmp2);
+            dst4 = __msa_bmnz_v(dst4, (v16u8) delta2, (v16u8) cmp2);
+            dst0 = __msa_bmnz_v(dst0, (v16u8) p2_src, (v16u8) cmp2);
+            dst5 = __msa_bmnz_v(dst5, (v16u8) q2_src, (v16u8) cmp2);
+
+            cmp0 = __msa_fill_d(d00 + d30 >= beta);
+            dst7 = (v16u8) __msa_fill_d(d04 + d34 >= beta);
+            cmp0 = __msa_ilvev_d((v2i64) dst7, cmp0);
+            dst6 = (v16u8) __msa_ceqi_d(cmp0, 0);
+
+            dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, dst6);
+            dst1 = __msa_bmz_v(dst1, (v16u8) p1_src, dst6);
+            dst2 = __msa_bmz_v(dst2, (v16u8) p0_src, dst6);
+            dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, dst6);
+            dst4 = __msa_bmz_v(dst4, (v16u8) q1_src, dst6);
+            dst5 = __msa_bmz_v(dst5, (v16u8) q2_src, dst6);
+
+            PCKEV_B4_UB(dst0, dst0, dst1, dst1, dst2, dst2, dst3, dst3,
+                        dst0, dst1, dst2, dst3);
+            PCKEV_B2_UB(dst4, dst4, dst5, dst5, dst4, dst5);
+
+            TRANSPOSE8x8_UB_UB(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7,
+                               dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+
+            src += 1;
+
+            tmp2 = __msa_copy_u_w((v4i32) dst0, 0);
+            tmp0 = __msa_copy_u_h((v8i16) dst0, 2);
+            tmp3 = __msa_copy_u_w((v4i32) dst1, 0);
+            tmp1 = __msa_copy_u_h((v8i16) dst1, 2);
+            SW(tmp2, src);
+            SH(tmp0, src + 4);
+            src += stride;
+            SW(tmp3, src);
+            SH(tmp1, src + 4);
+            src += stride;
+
+            tmp2 = __msa_copy_u_w((v4i32) dst2, 0);
+            tmp0 = __msa_copy_u_h((v8i16) dst2, 2);
+            tmp3 = __msa_copy_u_w((v4i32) dst3, 0);
+            tmp1 = __msa_copy_u_h((v8i16) dst3, 2);
+            SW(tmp2, src);
+            SH(tmp0, src + 4);
+            src += stride;
+            SW(tmp3, src);
+            SH(tmp1, src + 4);
+            src += stride;
+
+            tmp2 = __msa_copy_u_w((v4i32) dst4, 0);
+            tmp0 = __msa_copy_u_h((v8i16) dst4, 2);
+            tmp3 = __msa_copy_u_w((v4i32) dst5, 0);
+            tmp1 = __msa_copy_u_h((v8i16) dst5, 2);
+            SW(tmp2, src);
+            SH(tmp0, src + 4);
+            src += stride;
+            SW(tmp3, src);
+            SH(tmp1, src + 4);
+            src += stride;
+
+            tmp2 = __msa_copy_u_w((v4i32) dst6, 0);
+            tmp0 = __msa_copy_u_h((v8i16) dst6, 2);
+            tmp3 = __msa_copy_u_w((v4i32) dst7, 0);
+            tmp1 = __msa_copy_u_h((v8i16) dst7, 2);
+            SW(tmp2, src);
+            SH(tmp0, src + 4);
+            src += stride;
+            SW(tmp3, src);
+            SH(tmp1, src + 4);
+        }
+    }
+}
+
+static void hevc_loopfilter_chroma_hor_msa(uint8_t *src, int32_t stride,
+                                           int32_t *tc, uint8_t *p_is_pcm,
+                                           uint8_t *q_is_pcm)
+{
+    uint8_t *p1_ptr = src - (stride << 1);
+    uint8_t *p0_ptr = src - stride;
+    uint8_t *q0_ptr = src;
+    uint8_t *q1_ptr = src + stride;
+    v2i64 cmp0, cmp1, p_is_pcm_vec, q_is_pcm_vec;
+    v8u16 p1, p0, q0, q1;
+    v8i16 tc_pos, tc_neg;
+    v16i8 zero = { 0 };
+    v8i16 temp0, temp1, delta;
+
+    if (!(tc[0] <= 0) || !(tc[1] <= 0)) {
+        cmp0 = (v2i64) __msa_fill_h(tc[0]);
+        cmp1 = (v2i64) __msa_fill_h(tc[1]);
+        tc_pos = (v8i16) __msa_ilvev_d(cmp1, cmp0);
+        tc_neg = -tc_pos;
+
+        cmp0 = __msa_fill_d(p_is_pcm[0]);
+        cmp1 = __msa_fill_d(p_is_pcm[1]);
+        p_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+        p_is_pcm_vec = __msa_ceqi_d(p_is_pcm_vec, 0);
+
+        cmp0 = __msa_fill_d(q_is_pcm[0]);
+        cmp1 = __msa_fill_d(q_is_pcm[1]);
+        q_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+        q_is_pcm_vec = __msa_ceqi_d(q_is_pcm_vec, 0);
+
+        p1 = LD_UH(p1_ptr);
+        p0 = LD_UH(p0_ptr);
+        q0 = LD_UH(q0_ptr);
+        q1 = LD_UH(q1_ptr);
+
+        ILVR_B4_UH(zero, p1, zero, p0, zero, q0, zero, q1, p1, p0, q0, q1);
+
+        temp0 = (v8i16) (q0 - p0);
+        temp1 = (v8i16) (p1 - q1);
+        temp0 <<= 2;
+        temp0 += temp1;
+        delta = __msa_srari_h((v8i16) temp0, 3);
+        delta = CLIP_SH(delta, tc_neg, tc_pos);
+
+        temp0 = (v8i16) ((v8i16) p0 + delta);
+        temp0 = CLIP_SH_0_255(temp0);
+        temp0 = (v8i16) __msa_bmz_v((v16u8) temp0, (v16u8) p0,
+                                    (v16u8) p_is_pcm_vec);
+
+        temp1 = (v8i16) ((v8i16) q0 - delta);
+        temp1 = CLIP_SH_0_255(temp1);
+        temp1 = (v8i16) __msa_bmz_v((v16u8) temp1, (v16u8) q0,
+                                    (v16u8) q_is_pcm_vec);
+
+        tc_pos = (v8i16) __msa_clei_s_d((v2i64) tc_pos, 0);
+        temp0 = (v8i16) __msa_bmnz_v((v16u8) temp0, (v16u8) p0, (v16u8) tc_pos);
+        temp1 = (v8i16) __msa_bmnz_v((v16u8) temp1, (v16u8) q0, (v16u8) tc_pos);
+
+        temp0 = (v8i16) __msa_pckev_b((v16i8) temp1, (v16i8) temp0);
+        ST8x2_UB(temp0, p0_ptr, stride);
+    }
+}
+
+static void hevc_loopfilter_chroma_ver_msa(uint8_t *src, int32_t stride,
+                                           int32_t *tc, uint8_t *p_is_pcm,
+                                           uint8_t *q_is_pcm)
+{
+    v2i64 cmp0, cmp1, p_is_pcm_vec, q_is_pcm_vec;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v8u16 p1, p0, q0, q1;
+    v8i16 tc_pos, tc_neg;
+    v16i8 zero = { 0 };
+    v8i16 temp0, temp1, delta;
+
+    if (!(tc[0] <= 0) || !(tc[1] <= 0)) {
+        cmp0 = (v2i64) __msa_fill_h(tc[0]);
+        cmp1 = (v2i64) __msa_fill_h(tc[1]);
+        tc_pos = (v8i16) __msa_ilvev_d(cmp1, cmp0);
+        tc_neg = -tc_pos;
+
+        cmp0 = __msa_fill_d(p_is_pcm[0]);
+        cmp1 = __msa_fill_d(p_is_pcm[1]);
+        p_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+        p_is_pcm_vec = __msa_ceqi_d(p_is_pcm_vec, 0);
+
+        cmp0 = __msa_fill_d(q_is_pcm[0]);
+        cmp1 = __msa_fill_d(q_is_pcm[1]);
+        q_is_pcm_vec = __msa_ilvev_d(cmp1, cmp0);
+        q_is_pcm_vec = __msa_ceqi_d(q_is_pcm_vec, 0);
+
+        src -= 2;
+        LD_UB8(src, stride, src0, src1, src2, src3, src4, src5, src6, src7);
+        TRANSPOSE8x4_UB_UH(src0, src1, src2, src3, src4, src5, src6, src7,
+                           p1, p0, q0, q1);
+        ILVR_B4_UH(zero, p1, zero, p0, zero, q0, zero, q1, p1, p0, q0, q1);
+
+        temp0 = (v8i16) (q0 - p0);
+        temp1 = (v8i16) (p1 - q1);
+        temp0 <<= 2;
+        temp0 += temp1;
+        delta = __msa_srari_h((v8i16) temp0, 3);
+        delta = CLIP_SH(delta, tc_neg, tc_pos);
+
+        temp0 = (v8i16) ((v8i16) p0 + delta);
+        temp0 = CLIP_SH_0_255(temp0);
+        temp0 = (v8i16) __msa_bmz_v((v16u8) temp0, (v16u8) p0,
+                                    (v16u8) p_is_pcm_vec);
+
+        temp1 = (v8i16) ((v8i16) q0 - delta);
+        temp1 = CLIP_SH_0_255(temp1);
+        temp1 = (v8i16) __msa_bmz_v((v16u8) temp1, (v16u8) q0,
+                                    (v16u8) q_is_pcm_vec);
+
+        tc_pos = (v8i16) __msa_clei_s_d((v2i64) tc_pos, 0);
+        temp0 = (v8i16) __msa_bmnz_v((v16u8) temp0, (v16u8) p0, (v16u8) tc_pos);
+        temp1 = (v8i16) __msa_bmnz_v((v16u8) temp1, (v16u8) q0, (v16u8) tc_pos);
+
+        temp0 = (v8i16) __msa_ilvev_b((v16i8) temp1, (v16i8) temp0);
+
+        src += 1;
+        ST2x4_UB(temp0, 0, src, stride);
+        src += (4 * stride);
+        ST2x4_UB(temp0, 4, src, stride);
+    }
+}
+
+static void hevc_sao_band_filter_4width_msa(uint8_t *dst, int32_t dst_stride,
+                                            uint8_t *src, int32_t src_stride,
+                                            int32_t sao_left_class,
+                                            int16_t *sao_offset_val,
+                                            int32_t height)
+{
+    int32_t h_cnt;
+    v16u8 src0, src1, src2, src3;
+    v16i8 src0_r, src1_r;
+    v16i8 offset, offset_val, mask;
+    v16i8 offset0 = { 0 };
+    v16i8 offset1 = { 0 };
+    v16i8 zero = { 0 };
+    v8i16 temp0, temp1, dst0, dst1;
+
+    offset_val = LD_SB(sao_offset_val + 1);
+    offset_val = (v16i8) __msa_pckev_d((v2i64) offset_val, (v2i64) offset_val);
+
+    offset_val = __msa_pckev_b(offset_val, offset_val);
+    offset1 = (v16i8) __msa_insve_w((v4i32) offset1, 3, (v4i32) offset_val);
+    offset0 = __msa_sld_b(offset1, offset0, 28 - ((sao_left_class) & 31));
+    offset1 = __msa_sld_b(zero, offset1, 28 - ((sao_left_class) & 31));
+
+    if (!((sao_left_class > 12) & (sao_left_class < 29))) {
+        SWAP(offset0, offset1);
+    }
+
+    for (h_cnt = height >> 2; h_cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        ILVEV_D2_SB(src0, src1, src2, src3, src0_r, src1_r);
+
+        src0_r = (v16i8) __msa_pckev_w((v4i32) src1_r, (v4i32) src0_r);
+        mask = __msa_srli_b(src0_r, 3);
+        offset = __msa_vshf_b(mask, offset1, offset0);
+
+        UNPCK_SB_SH(offset, temp0, temp1);
+        ILVRL_B2_SH(zero, src0_r, dst0, dst1);
+        ADD2(dst0, temp0, dst1, temp1, dst0, dst1);
+        CLIP_SH2_0_255(dst0, dst1);
+        dst0 = (v8i16) __msa_pckev_b((v16i8) dst1, (v16i8) dst0);
+        ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void hevc_sao_band_filter_8width_msa(uint8_t *dst, int32_t dst_stride,
+                                            uint8_t *src, int32_t src_stride,
+                                            int32_t sao_left_class,
+                                            int16_t *sao_offset_val,
+                                            int32_t height)
+{
+    int32_t h_cnt;
+    v16u8 src0, src1, src2, src3;
+    v16i8 src0_r, src1_r, mask0, mask1;
+    v16i8 offset, offset_val;
+    v16i8 offset0 = { 0 };
+    v16i8 offset1 = { 0 };
+    v16i8 zero = { 0 };
+    v8i16 dst0, dst1, dst2, dst3;
+    v8i16 temp0, temp1, temp2, temp3;
+
+    offset_val = LD_SB(sao_offset_val + 1);
+    offset_val = (v16i8) __msa_pckev_d((v2i64) offset_val, (v2i64) offset_val);
+    offset_val = __msa_pckev_b(offset_val, offset_val);
+    offset1 = (v16i8) __msa_insve_w((v4i32) offset1, 3, (v4i32) offset_val);
+    offset0 = __msa_sld_b(offset1, offset0, 28 - ((sao_left_class) & 31));
+    offset1 = __msa_sld_b(zero, offset1, 28 - ((sao_left_class) & 31));
+
+    if (!((sao_left_class > 12) & (sao_left_class < 29))) {
+        SWAP(offset0, offset1);
+    }
+
+    for (h_cnt = height >> 2; h_cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        ILVR_D2_SB(src1, src0, src3, src2, src0_r, src1_r);
+
+        mask0 = __msa_srli_b(src0_r, 3);
+        mask1 = __msa_srli_b(src1_r, 3);
+
+        offset = __msa_vshf_b(mask0, offset1, offset0);
+        UNPCK_SB_SH(offset, temp0, temp1);
+
+        offset = __msa_vshf_b(mask1, offset1, offset0);
+        UNPCK_SB_SH(offset, temp2, temp3);
+
+        UNPCK_UB_SH(src0_r, dst0, dst1);
+        UNPCK_UB_SH(src1_r, dst2, dst3);
+        ADD4(dst0, temp0, dst1, temp1, dst2, temp2, dst3, temp3,
+             dst0, dst1, dst2, dst3);
+        CLIP_SH4_0_255(dst0, dst1, dst2, dst3);
+        PCKEV_B2_SH(dst1, dst0, dst3, dst2, dst0, dst2);
+        ST8x4_UB(dst0, dst2, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void hevc_sao_band_filter_16multiple_msa(uint8_t *dst,
+                                                int32_t dst_stride,
+                                                uint8_t *src,
+                                                int32_t src_stride,
+                                                int32_t sao_left_class,
+                                                int16_t *sao_offset_val,
+                                                int32_t width, int32_t height)
+{
+    int32_t h_cnt, w_cnt;
+    v16u8 src0, src1, src2, src3;
+    v8i16 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v16i8 out0, out1, out2, out3;
+    v16i8 mask0, mask1, mask2, mask3;
+    v16i8 tmp0, tmp1, tmp2, tmp3, offset_val;
+    v16i8 offset0 = { 0 };
+    v16i8 offset1 = { 0 };
+    v16i8 zero = { 0 };
+    v8i16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    offset_val = LD_SB(sao_offset_val + 1);
+    offset_val = (v16i8) __msa_pckev_d((v2i64) offset_val, (v2i64) offset_val);
+    offset_val = __msa_pckev_b(offset_val, offset_val);
+    offset1 = (v16i8) __msa_insve_w((v4i32) offset1, 3, (v4i32) offset_val);
+    offset0 = __msa_sld_b(offset1, offset0, 28 - ((sao_left_class) & 31));
+    offset1 = __msa_sld_b(zero, offset1, 28 - ((sao_left_class) & 31));
+
+    if (!((sao_left_class > 12) & (sao_left_class < 29))) {
+        SWAP(offset0, offset1);
+    }
+
+    for (h_cnt = height >> 2; h_cnt--;) {
+        for (w_cnt = 0; w_cnt < (width >> 4); w_cnt++) {
+            LD_UB4(src + w_cnt * 16, src_stride, src0, src1, src2, src3);
+
+            mask0 = __msa_srli_b((v16i8) src0, 3);
+            mask1 = __msa_srli_b((v16i8) src1, 3);
+            mask2 = __msa_srli_b((v16i8) src2, 3);
+            mask3 = __msa_srli_b((v16i8) src3, 3);
+
+            VSHF_B2_SB(offset0, offset1, offset0, offset1, mask0, mask1,
+                       tmp0, tmp1);
+            VSHF_B2_SB(offset0, offset1, offset0, offset1, mask2, mask3,
+                       tmp2, tmp3);
+            UNPCK_SB_SH(tmp0, temp0, temp1);
+            UNPCK_SB_SH(tmp1, temp2, temp3);
+            UNPCK_SB_SH(tmp2, temp4, temp5);
+            UNPCK_SB_SH(tmp3, temp6, temp7);
+            ILVRL_B2_SH(zero, src0, dst0, dst1);
+            ILVRL_B2_SH(zero, src1, dst2, dst3);
+            ILVRL_B2_SH(zero, src2, dst4, dst5);
+            ILVRL_B2_SH(zero, src3, dst6, dst7);
+            ADD4(dst0, temp0, dst1, temp1, dst2, temp2, dst3, temp3,
+                 dst0, dst1, dst2, dst3);
+            ADD4(dst4, temp4, dst5, temp5, dst6, temp6, dst7, temp7,
+                 dst4, dst5, dst6, dst7);
+            CLIP_SH4_0_255(dst0, dst1, dst2, dst3);
+            CLIP_SH4_0_255(dst4, dst5, dst6, dst7);
+            PCKEV_B4_SB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+                        out0, out1, out2, out3);
+            ST_SB4(out0, out1, out2, out3, dst + w_cnt * 16, dst_stride);
+        }
+
+        src += src_stride << 2;
+        dst += dst_stride << 2;
+    }
+}
+
+static void hevc_sao_edge_filter_0degree_4width_msa(uint8_t *dst,
+                                                    int32_t dst_stride,
+                                                    uint8_t *src,
+                                                    int32_t src_stride,
+                                                    int16_t *sao_offset_val,
+                                                    int32_t height)
+{
+    int32_t h_cnt;
+    uint32_t dst_val0, dst_val1;
+    v8i16 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16u8 cmp_minus10, diff_minus10, cmp_minus11, diff_minus11;
+    v16u8 src_minus10, src_minus11;
+    v16i8 zero = { 0 };
+    v16i8 src_zero0, src_zero1, src_plus10, src_plus11, dst0;
+    v8i16 offset_mask0, offset_mask1;
+    v8i16 sao_offset, src00, src01;
+
+    sao_offset = LD_SH(sao_offset_val);
+    src -= 1;
+
+    for (h_cnt = (height >> 1); h_cnt--;) {
+        LD_UB2(src, src_stride, src_minus10, src_minus11);
+        src += (2 * src_stride);
+
+        SLDI_B2_0_SB(src_minus10, src_minus11, src_zero0, src_zero1, 1);
+        SLDI_B2_0_SB(src_minus10, src_minus11, src_plus10, src_plus11, 2);
+        ILVR_B2_UB(src_plus10, src_minus10, src_plus11, src_minus11,
+                   src_minus10, src_minus11);
+        ILVR_B2_SB(src_zero0, src_zero0, src_zero1, src_zero1, src_zero0,
+                   src_zero1);
+
+        cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+        diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+        cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+        diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+
+        cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+        diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+        cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+        diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+
+        offset_mask0 = (v8i16) (__msa_hadd_u_h(diff_minus10, diff_minus10) + 2);
+        offset_mask1 = (v8i16) (__msa_hadd_u_h(diff_minus11, diff_minus11) + 2);
+
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask0,
+                   offset_mask0, offset_mask0, offset_mask0);
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask1,
+                   offset_mask1, offset_mask1, offset_mask1);
+        ILVEV_B2_SH(src_zero0, zero, src_zero1, zero, src00, src01);
+        ADD2(offset_mask0, src00, offset_mask1, src01, offset_mask0,
+             offset_mask1);
+        CLIP_SH2_0_255(offset_mask0, offset_mask1);
+
+        dst0 = __msa_pckev_b((v16i8) offset_mask1, (v16i8) offset_mask0);
+        dst_val0 = __msa_copy_u_w((v4i32) dst0, 0);
+        dst_val1 = __msa_copy_u_w((v4i32) dst0, 2);
+        SW(dst_val0, dst);
+        dst += dst_stride;
+        SW(dst_val1, dst);
+        dst += dst_stride;
+    }
+}
+
+static void hevc_sao_edge_filter_0degree_8width_msa(uint8_t *dst,
+                                                    int32_t dst_stride,
+                                                    uint8_t *src,
+                                                    int32_t src_stride,
+                                                    int16_t *sao_offset_val,
+                                                    int32_t height)
+{
+    uint8_t *src_minus1;
+    int32_t h_cnt;
+    uint64_t dst_val0, dst_val1;
+    v8i16 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16i8 dst0, dst1;
+    v16i8 zero = { 0 };
+    v16u8 cmp_minus10, diff_minus10, cmp_minus11, diff_minus11;
+    v16u8 src_minus10, src_minus11;
+    v16i8 src_zero0, src_plus10, src_zero1, src_plus11;
+    v8i16 sao_offset, src00, offset_mask0, src01, offset_mask1;
+
+    sao_offset = LD_SH(sao_offset_val);
+
+    for (h_cnt = (height >> 1); h_cnt--;) {
+        src_minus1 = src - 1;
+        LD_UB2(src_minus1, src_stride, src_minus10, src_minus11);
+
+        SLDI_B2_0_SB(src_minus10, src_minus11, src_zero0, src_zero1, 1);
+        SLDI_B2_0_SB(src_minus10, src_minus11, src_plus10, src_plus11, 2);
+        ILVR_B2_UB(src_plus10, src_minus10, src_plus11, src_minus11,
+                   src_minus10, src_minus11);
+        ILVR_B2_SB(src_zero0, src_zero0, src_zero1, src_zero1,
+                   src_zero0, src_zero1);
+
+        cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+        diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+        cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+        diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+
+        cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+        diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+        cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+        diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+
+        offset_mask0 = (v8i16) (__msa_hadd_u_h(diff_minus10, diff_minus10) + 2);
+        offset_mask1 = (v8i16) (__msa_hadd_u_h(diff_minus11, diff_minus11) + 2);
+
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask0,
+                   offset_mask0, offset_mask0, offset_mask0);
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask1,
+                   offset_mask1, offset_mask1, offset_mask1);
+        ILVEV_B2_SH(src_zero0, zero, src_zero1, zero, src00, src01);
+
+        ADD2(offset_mask0, src00, offset_mask1, src01, offset_mask0,
+             offset_mask1);
+        CLIP_SH2_0_255(offset_mask0, offset_mask1);
+        PCKEV_B2_SB(offset_mask0, offset_mask0, offset_mask1, offset_mask1,
+                    dst0, dst1);
+
+        dst_val0 = __msa_copy_u_d((v2i64) dst0, 0);
+        dst_val1 = __msa_copy_u_d((v2i64) dst1, 0);
+        SD(dst_val0, dst);
+        dst += dst_stride;
+        SD(dst_val1, dst);
+        dst += dst_stride;
+        src += (src_stride << 1);
+    }
+}
+
+static void hevc_sao_edge_filter_0degree_16multiple_msa(uint8_t *dst,
+                                                        int32_t dst_stride,
+                                                        uint8_t *src,
+                                                        int32_t src_stride,
+                                                        int16_t *sao_offset_val,
+                                                        int32_t width,
+                                                        int32_t height)
+{
+    uint8_t *dst_ptr, *src_minus1;
+    int32_t h_cnt, v_cnt;
+    v16i8 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16i8 sao_offset;
+    v16u8 cmp_minus10, cmp_plus10, diff_minus10, diff_plus10, cmp_minus11;
+    v16u8 cmp_plus11, diff_minus11, diff_plus11, cmp_minus12, cmp_plus12;
+    v16u8 diff_minus12, diff_plus12, cmp_minus13, cmp_plus13, diff_minus13;
+    v16u8 diff_plus13;
+    v16u8 src10, src11, src12, src13, dst0, dst1, dst2, dst3;
+    v16u8 src_minus10, src_minus11, src_minus12, src_minus13;
+    v16i8 offset_mask0, offset_mask1, offset_mask2, offset_mask3;
+    v16i8 src_zero0, src_zero1, src_zero2, src_zero3;
+    v16i8 src_plus10, src_plus11, src_plus12, src_plus13;
+    v8i16 src0, src1, src2, src3, src4, src5, src6, src7;
+    v8i16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    sao_offset = LD_SB(sao_offset_val);
+    sao_offset = __msa_pckev_b(sao_offset, sao_offset);
+
+    for (h_cnt = (height >> 2); h_cnt--;) {
+        src_minus1 = src - 1;
+        LD_UB4(src_minus1, src_stride,
+               src_minus10, src_minus11, src_minus12, src_minus13);
+
+        for (v_cnt = 0; v_cnt < (width >> 4); v_cnt++) {
+            src_minus1 += 16;
+            dst_ptr = dst + (v_cnt << 4);
+            LD_UB4(src_minus1, src_stride, src10, src11, src12, src13);
+
+            SLDI_B2_SB(src10, src11, src_minus10, src_minus11, src_zero0,
+                       src_zero1, 1);
+            SLDI_B2_SB(src12, src13, src_minus12, src_minus13, src_zero2,
+                       src_zero3, 1);
+            SLDI_B2_SB(src10, src11, src_minus10, src_minus11, src_plus10,
+                       src_plus11, 2);
+            SLDI_B2_SB(src12, src13, src_minus12, src_minus13, src_plus12,
+                       src_plus13, 2);
+
+            cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+            cmp_plus10 = ((v16u8) src_zero0 == (v16u8) src_plus10);
+            cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+            cmp_plus11 = ((v16u8) src_zero1 == (v16u8) src_plus11);
+            cmp_minus12 = ((v16u8) src_zero2 == src_minus12);
+            cmp_plus12 = ((v16u8) src_zero2 == (v16u8) src_plus12);
+            cmp_minus13 = ((v16u8) src_zero3 == src_minus13);
+            cmp_plus13 = ((v16u8) src_zero3 == (v16u8) src_plus13);
+
+            diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+            diff_plus10 = __msa_nor_v(cmp_plus10, cmp_plus10);
+            diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+            diff_plus11 = __msa_nor_v(cmp_plus11, cmp_plus11);
+            diff_minus12 = __msa_nor_v(cmp_minus12, cmp_minus12);
+            diff_plus12 = __msa_nor_v(cmp_plus12, cmp_plus12);
+            diff_minus13 = __msa_nor_v(cmp_minus13, cmp_minus13);
+            diff_plus13 = __msa_nor_v(cmp_plus13, cmp_plus13);
+
+            cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+            cmp_plus10 = ((v16u8) src_plus10 < (v16u8) src_zero0);
+            cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+            cmp_plus11 = ((v16u8) src_plus11 < (v16u8) src_zero1);
+            cmp_minus12 = (src_minus12 < (v16u8) src_zero2);
+            cmp_plus12 = ((v16u8) src_plus12 < (v16u8) src_zero2);
+            cmp_minus13 = (src_minus13 < (v16u8) src_zero3);
+            cmp_plus13 = ((v16u8) src_plus13 < (v16u8) src_zero3);
+
+            diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+            diff_plus10 = __msa_bmnz_v(diff_plus10, const1, cmp_plus10);
+            diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+            diff_plus11 = __msa_bmnz_v(diff_plus11, const1, cmp_plus11);
+            diff_minus12 = __msa_bmnz_v(diff_minus12, const1, cmp_minus12);
+            diff_plus12 = __msa_bmnz_v(diff_plus12, const1, cmp_plus12);
+            diff_minus13 = __msa_bmnz_v(diff_minus13, const1, cmp_minus13);
+            diff_plus13 = __msa_bmnz_v(diff_plus13, const1, cmp_plus13);
+
+            offset_mask0 = 2 + (v16i8) diff_minus10 + (v16i8) diff_plus10;
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask0,
+                       offset_mask0, offset_mask0, offset_mask0);
+            offset_mask1 = 2 + (v16i8) diff_minus11 + (v16i8) diff_plus11;
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask1,
+                       offset_mask1, offset_mask1, offset_mask1);
+            offset_mask2 = 2 + (v16i8) diff_minus12 + (v16i8) diff_plus12;
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask2,
+                       offset_mask2, offset_mask2, offset_mask2);
+            offset_mask3 = 2 + (v16i8) diff_minus13 + (v16i8) diff_plus13;
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask3,
+                       offset_mask3, offset_mask3, offset_mask3);
+
+            UNPCK_UB_SH(src_zero0, src0, src1);
+            UNPCK_SB_SH(offset_mask0, temp0, temp1);
+            UNPCK_UB_SH(src_zero1, src2, src3);
+            UNPCK_SB_SH(offset_mask1, temp2, temp3);
+            UNPCK_UB_SH(src_zero2, src4, src5);
+            UNPCK_SB_SH(offset_mask2, temp4, temp5);
+            UNPCK_UB_SH(src_zero3, src6, src7);
+            UNPCK_SB_SH(offset_mask3, temp6, temp7);
+            ADD4(temp0, src0, temp1, src1, temp2, src2, temp3, src3, temp0,
+                 temp1, temp2, temp3);
+            ADD4(temp4, src4, temp5, src5, temp6, src6, temp7, src7, temp4,
+                 temp5, temp6, temp7);
+            CLIP_SH4_0_255(temp0, temp1, temp2, temp3);
+            CLIP_SH4_0_255(temp4, temp5, temp6, temp7);
+            PCKEV_B4_UB(temp1, temp0, temp3, temp2, temp5, temp4, temp7, temp6,
+                        dst0, dst1, dst2, dst3);
+
+            src_minus10 = src10;
+            ST_UB(dst0, dst_ptr);
+            src_minus11 = src11;
+            ST_UB(dst1, dst_ptr + dst_stride);
+            src_minus12 = src12;
+            ST_UB(dst2, dst_ptr + (dst_stride << 1));
+            src_minus13 = src13;
+            ST_UB(dst3, dst_ptr + (dst_stride * 3));
+        }
+
+        src += (src_stride << 2);
+        dst += (dst_stride << 2);
+    }
+}
+
+static void hevc_sao_edge_filter_90degree_4width_msa(uint8_t *dst,
+                                                     int32_t dst_stride,
+                                                     uint8_t *src,
+                                                     int32_t src_stride,
+                                                     int16_t *sao_offset_val,
+                                                     int32_t height)
+{
+    int32_t h_cnt;
+    uint32_t dst_val0, dst_val1;
+    v8i16 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16i8 dst0;
+    v16i8 zero = { 0 };
+    v16u8 cmp_minus10, diff_minus10, cmp_minus11, diff_minus11;
+    v16u8 src_minus10, src_minus11, src10, src11;
+    v16i8 src_zero0, src_zero1;
+    v8i16 sao_offset, src00, src01, offset_mask0, offset_mask1;
+
+    sao_offset = LD_SH(sao_offset_val);
+
+    LD_UB2(src - src_stride, src_stride, src_minus10, src_minus11);
+
+    for (h_cnt = (height >> 1); h_cnt--;) {
+        LD_UB2(src + src_stride, src_stride, src10, src11);
+
+        src_minus10 = (v16u8) __msa_ilvr_b((v16i8) src10, (v16i8) src_minus10);
+        src_zero0 = __msa_ilvr_b((v16i8) src_minus11, (v16i8) src_minus11);
+        src_minus11 = (v16u8) __msa_ilvr_b((v16i8) src11, (v16i8) src_minus11);
+        src_zero1 = __msa_ilvr_b((v16i8) src10, (v16i8) src10);
+
+        cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+        diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+        cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+        diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+
+        cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+        diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+        cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+        diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+
+        offset_mask0 = (v8i16) (__msa_hadd_u_h(diff_minus10, diff_minus10) + 2);
+        offset_mask1 = (v8i16) (__msa_hadd_u_h(diff_minus11, diff_minus11) + 2);
+
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask0,
+                   offset_mask0, offset_mask0, offset_mask0);
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask1,
+                   offset_mask1, offset_mask1, offset_mask1);
+        ILVEV_B2_SH(src_zero0, zero, src_zero1, zero, src00, src01);
+        ADD2(offset_mask0, src00, offset_mask1, src01, offset_mask0,
+             offset_mask1);
+        CLIP_SH2_0_255(offset_mask0, offset_mask1);
+        dst0 = __msa_pckev_b((v16i8) offset_mask1, (v16i8) offset_mask0);
+
+        src_minus10 = src10;
+        src_minus11 = src11;
+
+        dst_val0 = __msa_copy_u_w((v4i32) dst0, 0);
+        dst_val1 = __msa_copy_u_w((v4i32) dst0, 2);
+        SW(dst_val0, dst);
+        dst += dst_stride;
+        SW(dst_val1, dst);
+
+        dst += dst_stride;
+        src += (src_stride << 1);
+    }
+}
+
+static void hevc_sao_edge_filter_90degree_8width_msa(uint8_t *dst,
+                                                     int32_t dst_stride,
+                                                     uint8_t *src,
+                                                     int32_t src_stride,
+                                                     int16_t *sao_offset_val,
+                                                     int32_t height)
+{
+    int32_t h_cnt;
+    uint64_t dst_val0, dst_val1;
+    v8i16 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16i8 zero = { 0 };
+    v16i8 src_zero0, src_zero1, dst0, dst1;
+    v16u8 cmp_minus10, diff_minus10, cmp_minus11, diff_minus11;
+    v16u8 src_minus10, src_minus11, src10, src11;
+    v8i16 sao_offset, src00, offset_mask0, src01, offset_mask1;
+
+    sao_offset = LD_SH(sao_offset_val);
+
+    LD_UB2(src - src_stride, src_stride, src_minus10, src_minus11);
+
+    for (h_cnt = (height >> 1); h_cnt--;) {
+        LD_UB2(src + src_stride, src_stride, src10, src11);
+
+        src_minus10 = (v16u8) __msa_ilvr_b((v16i8) src10, (v16i8) src_minus10);
+        src_zero0 = __msa_ilvr_b((v16i8) src_minus11, (v16i8) src_minus11);
+        src_minus11 = (v16u8) __msa_ilvr_b((v16i8) src11, (v16i8) src_minus11);
+        src_zero1 = __msa_ilvr_b((v16i8) src10, (v16i8) src10);
+
+        cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+        diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+        cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+        diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+
+        cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+        diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+        cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+        diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+
+        offset_mask0 = (v8i16) (__msa_hadd_u_h(diff_minus10, diff_minus10) + 2);
+        offset_mask1 = (v8i16) (__msa_hadd_u_h(diff_minus11, diff_minus11) + 2);
+
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask0,
+                   offset_mask0, offset_mask0, offset_mask0);
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask1,
+                   offset_mask1, offset_mask1, offset_mask1);
+        ILVEV_B2_SH(src_zero0, zero, src_zero1, zero, src00, src01);
+        ADD2(offset_mask0, src00, offset_mask1, src01, offset_mask0,
+             offset_mask1);
+        CLIP_SH2_0_255(offset_mask0, offset_mask1);
+        PCKEV_B2_SB(offset_mask0, offset_mask0, offset_mask1, offset_mask1,
+                    dst0, dst1);
+
+        src_minus10 = src10;
+        src_minus11 = src11;
+
+        dst_val0 = __msa_copy_u_d((v2i64) dst0, 0);
+        dst_val1 = __msa_copy_u_d((v2i64) dst1, 0);
+        SD(dst_val0, dst);
+        dst += dst_stride;
+        SD(dst_val1, dst);
+        dst += dst_stride;
+        src += (src_stride << 1);
+    }
+}
+
+static void hevc_sao_edge_filter_90degree_16multiple_msa(uint8_t *dst,
+                                                         int32_t dst_stride,
+                                                         uint8_t *src,
+                                                         int32_t src_stride,
+                                                         int16_t *
+                                                         sao_offset_val,
+                                                         int32_t width,
+                                                         int32_t height)
+{
+    uint8_t *src_orig = src;
+    uint8_t *dst_orig = dst;
+    int32_t h_cnt, v_cnt;
+    v16i8 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16u8 cmp_minus10, cmp_plus10, diff_minus10, diff_plus10, cmp_minus11;
+    v16u8 cmp_plus11, diff_minus11, diff_plus11, cmp_minus12, cmp_plus12;
+    v16u8 diff_minus12, diff_plus12, cmp_minus13, cmp_plus13, diff_minus13;
+    v16u8 diff_plus13;
+    v16u8 src10, src_minus10, dst0, src11, src_minus11, dst1;
+    v16u8 src12, dst2, src13, dst3;
+    v16i8 offset_mask0, offset_mask1, offset_mask2, offset_mask3, sao_offset;
+    v8i16 src0, src1, src2, src3, src4, src5, src6, src7;
+    v8i16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    sao_offset = LD_SB(sao_offset_val);
+    sao_offset = __msa_pckev_b(sao_offset, sao_offset);
+
+    for (v_cnt = 0; v_cnt < (width >> 4); v_cnt++) {
+        src = src_orig + (v_cnt << 4);
+        dst = dst_orig + (v_cnt << 4);
+
+        LD_UB2(src - src_stride, src_stride, src_minus10, src_minus11);
+
+        for (h_cnt = (height >> 2); h_cnt--;) {
+            LD_UB4(src + src_stride, src_stride, src10, src11, src12, src13);
+
+            cmp_minus10 = (src_minus11 == src_minus10);
+            cmp_plus10 = (src_minus11 == src10);
+            cmp_minus11 = (src10 == src_minus11);
+            cmp_plus11 = (src10 == src11);
+            cmp_minus12 = (src11 == src10);
+            cmp_plus12 = (src11 == src12);
+            cmp_minus13 = (src12 == src11);
+            cmp_plus13 = (src12 == src13);
+
+            diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+            diff_plus10 = __msa_nor_v(cmp_plus10, cmp_plus10);
+            diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+            diff_plus11 = __msa_nor_v(cmp_plus11, cmp_plus11);
+            diff_minus12 = __msa_nor_v(cmp_minus12, cmp_minus12);
+            diff_plus12 = __msa_nor_v(cmp_plus12, cmp_plus12);
+            diff_minus13 = __msa_nor_v(cmp_minus13, cmp_minus13);
+            diff_plus13 = __msa_nor_v(cmp_plus13, cmp_plus13);
+
+            cmp_minus10 = (src_minus10 < src_minus11);
+            cmp_plus10 = (src10 < src_minus11);
+            cmp_minus11 = (src_minus11 < src10);
+            cmp_plus11 = (src11 < src10);
+            cmp_minus12 = (src10 < src11);
+            cmp_plus12 = (src12 < src11);
+            cmp_minus13 = (src11 < src12);
+            cmp_plus13 = (src13 < src12);
+
+            diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+            diff_plus10 = __msa_bmnz_v(diff_plus10, const1, cmp_plus10);
+            diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+            diff_plus11 = __msa_bmnz_v(diff_plus11, const1, cmp_plus11);
+            diff_minus12 = __msa_bmnz_v(diff_minus12, const1, cmp_minus12);
+            diff_plus12 = __msa_bmnz_v(diff_plus12, const1, cmp_plus12);
+            diff_minus13 = __msa_bmnz_v(diff_minus13, const1, cmp_minus13);
+            diff_plus13 = __msa_bmnz_v(diff_plus13, const1, cmp_plus13);
+
+            offset_mask0 = 2 + (v16i8) diff_minus10 + (v16i8) diff_plus10;
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask0, offset_mask0, offset_mask0, offset_mask0);
+            offset_mask1 = 2 + (v16i8) diff_minus11 + (v16i8) diff_plus11;
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask1, offset_mask1, offset_mask1, offset_mask1);
+            offset_mask2 = 2 + (v16i8) diff_minus12 + (v16i8) diff_plus12;
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask2, offset_mask2, offset_mask2, offset_mask2);
+            offset_mask3 = 2 + (v16i8) diff_minus13 + (v16i8) diff_plus13;
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask3, offset_mask3, offset_mask3, offset_mask3);
+
+            UNPCK_UB_SH(src_minus11, src0, src1);
+            UNPCK_SB_SH(offset_mask0, temp0, temp1);
+            UNPCK_UB_SH(src10, src2, src3);
+            UNPCK_SB_SH(offset_mask1, temp2, temp3);
+            UNPCK_UB_SH(src11, src4, src5);
+            UNPCK_SB_SH(offset_mask2, temp4, temp5);
+            UNPCK_UB_SH(src12, src6, src7);
+            UNPCK_SB_SH(offset_mask3, temp6, temp7);
+            ADD4(temp0, src0, temp1, src1, temp2, src2, temp3, src3, temp0,
+                 temp1, temp2, temp3);
+            ADD4(temp4, src4, temp5, src5, temp6, src6, temp7, src7, temp4,
+                 temp5, temp6, temp7);
+            CLIP_SH4_0_255(temp0, temp1, temp2, temp3);
+            CLIP_SH4_0_255(temp4, temp5, temp6, temp7);
+            PCKEV_B4_UB(temp1, temp0, temp3, temp2, temp5, temp4, temp7, temp6,
+                        dst0, dst1, dst2, dst3);
+
+            src_minus10 = src12;
+            src_minus11 = src13;
+
+            ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
+
+            src += (src_stride << 2);
+            dst += (dst_stride << 2);
+        }
+    }
+}
+
+static void hevc_sao_edge_filter_45degree_4width_msa(uint8_t *dst,
+                                                     int32_t dst_stride,
+                                                     uint8_t *src,
+                                                     int32_t src_stride,
+                                                     int16_t *sao_offset_val,
+                                                     int32_t height)
+{
+    uint8_t *src_orig;
+    int32_t h_cnt;
+    uint32_t dst_val0, dst_val1;
+    v8i16 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16i8 zero = { 0 };
+    v16u8 cmp_minus10, diff_minus10, src_minus10, cmp_minus11, diff_minus11;
+    v16u8 src_minus11, src10, src11;
+    v16i8 src_plus0, src_zero0, src_plus1, src_zero1, dst0;
+    v8i16 sao_offset, src00, src01, offset_mask0, offset_mask1;
+
+    sao_offset = LD_SH(sao_offset_val);
+
+    src_orig = src - 1;
+    LD_UB2(src_orig - src_stride, src_stride, src_minus10, src_minus11);
+
+    for (h_cnt = (height >> 1); h_cnt--;) {
+        LD_UB2(src_orig + src_stride, src_stride, src10, src11);
+
+        SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
+        SLDI_B2_0_SB(src10, src11, src_plus0, src_plus1, 2);
+
+        ILVR_B2_UB(src_plus0, src_minus10, src_plus1, src_minus11, src_minus10,
+                   src_minus11);
+        ILVR_B2_SB(src_zero0, src_zero0, src_zero1, src_zero1, src_zero0,
+                   src_zero1);
+
+        cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+        diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+        cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+        diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+
+        cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+        diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+        cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+        diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+
+        offset_mask0 = (v8i16) (__msa_hadd_u_h(diff_minus10, diff_minus10) + 2);
+        offset_mask1 = (v8i16) (__msa_hadd_u_h(diff_minus11, diff_minus11) + 2);
+
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask0,
+                   offset_mask0, offset_mask0, offset_mask0);
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask1,
+                   offset_mask1, offset_mask1, offset_mask1);
+        ILVEV_B2_SH(src_zero0, zero, src_zero1, zero, src00, src01);
+        ADD2(offset_mask0, src00, offset_mask1, src01, offset_mask0,
+             offset_mask1);
+        CLIP_SH2_0_255(offset_mask0, offset_mask1);
+
+        dst0 = __msa_pckev_b((v16i8) offset_mask1, (v16i8) offset_mask0);
+
+        src_minus10 = src10;
+        src_minus11 = src11;
+
+        dst_val0 = __msa_copy_u_w((v4i32) dst0, 0);
+        dst_val1 = __msa_copy_u_w((v4i32) dst0, 2);
+        SW(dst_val0, dst);
+        dst += dst_stride;
+        SW(dst_val1, dst);
+
+        dst += dst_stride;
+        src_orig += (src_stride << 1);
+    }
+}
+
+static void hevc_sao_edge_filter_45degree_8width_msa(uint8_t *dst,
+                                                     int32_t dst_stride,
+                                                     uint8_t *src,
+                                                     int32_t src_stride,
+                                                     int16_t *sao_offset_val,
+                                                     int32_t height)
+{
+    uint8_t *src_orig;
+    int32_t h_cnt;
+    uint64_t dst_val0, dst_val1;
+    v8i16 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16i8 zero = { 0 };
+    v16u8 cmp_minus10, diff_minus10, cmp_minus11, diff_minus11;
+    v16u8 src_minus10, src10, src_minus11, src11;
+    v16i8 src_zero0, src_plus10, src_zero1, src_plus11, dst0, dst1;
+    v8i16 sao_offset, src00, offset_mask0, src01, offset_mask1;
+
+    sao_offset = LD_SH(sao_offset_val);
+    src_orig = src - 1;
+
+    LD_UB2(src_orig - src_stride, src_stride, src_minus10, src_minus11);
+
+    for (h_cnt = (height >> 1); h_cnt--;) {
+        LD_UB2(src_orig + src_stride, src_stride, src10, src11);
+
+        SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
+        SLDI_B2_0_SB(src10, src11, src_plus10, src_plus11, 2);
+
+        ILVR_B2_UB(src_plus10, src_minus10, src_plus11, src_minus11,
+                   src_minus10, src_minus11);
+        ILVR_B2_SB(src_zero0, src_zero0, src_zero1, src_zero1,
+                   src_zero0, src_zero1);
+
+        cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+        diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+        cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+        diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+
+        cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+        diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+        cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+        diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+
+        offset_mask0 = (v8i16) (__msa_hadd_u_h(diff_minus10, diff_minus10) + 2);
+        offset_mask1 = (v8i16) (__msa_hadd_u_h(diff_minus11, diff_minus11) + 2);
+
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask0,
+                   offset_mask0, offset_mask0, offset_mask0);
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask1,
+                   offset_mask1, offset_mask1, offset_mask1);
+        ILVEV_B2_SH(src_zero0, zero, src_zero1, zero, src00, src01);
+        ADD2(offset_mask0, src00, offset_mask1, src01, offset_mask0,
+             offset_mask1);
+        CLIP_SH2_0_255(offset_mask0, offset_mask1);
+        PCKEV_B2_SB(offset_mask0, offset_mask0, offset_mask1, offset_mask1,
+                    dst0, dst1);
+
+        src_minus10 = src10;
+        src_minus11 = src11;
+
+        dst_val0 = __msa_copy_u_d((v2i64) dst0, 0);
+        dst_val1 = __msa_copy_u_d((v2i64) dst1, 0);
+        SD(dst_val0, dst);
+        dst += dst_stride;
+        SD(dst_val1, dst);
+
+        dst += dst_stride;
+        src_orig += (src_stride << 1);
+    }
+}
+
+static void hevc_sao_edge_filter_45degree_16multiple_msa(uint8_t *dst,
+                                                         int32_t dst_stride,
+                                                         uint8_t *src,
+                                                         int32_t src_stride,
+                                                         int16_t *
+                                                         sao_offset_val,
+                                                         int32_t width,
+                                                         int32_t height)
+{
+    uint8_t *src_orig = src;
+    uint8_t *dst_orig = dst;
+    int32_t h_cnt, v_cnt;
+    v16i8 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16u8 cmp_minus10, cmp_plus10, diff_minus10, diff_plus10, cmp_minus11;
+    v16u8 cmp_plus11, diff_minus11, diff_plus11, cmp_minus12, cmp_plus12;
+    v16u8 diff_minus12, diff_plus12, cmp_minus13, cmp_plus13, diff_minus13;
+    v16u8 diff_plus13, src_minus14, src_plus13;
+    v16i8 offset_mask0, offset_mask1, offset_mask2, offset_mask3;
+    v16u8 src10, src_minus10, dst0, src11, src_minus11, dst1;
+    v16u8 src12, src_minus12, dst2, src13, src_minus13, dst3;
+    v16i8 src_zero0, src_plus10, src_zero1, src_plus11, src_zero2, src_plus12;
+    v16i8 src_zero3, sao_offset;
+    v8i16 src0, src1, src2, src3, src4, src5, src6, src7;
+    v8i16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    sao_offset = LD_SB(sao_offset_val);
+    sao_offset = __msa_pckev_b(sao_offset, sao_offset);
+
+    for (h_cnt = (height >> 2); h_cnt--;) {
+        src_orig = src - 1;
+        dst_orig = dst;
+        LD_UB4(src_orig, src_stride,
+               src_minus11, src_minus12, src_minus13, src_minus14);
+
+        for (v_cnt = 0; v_cnt < (width >> 4); v_cnt++) {
+            src_minus10 = LD_UB(src_orig - src_stride);
+            LD_UB4(src_orig + 16, src_stride, src10, src11, src12, src13);
+            src_plus13 = LD_UB(src + 1 + (v_cnt << 4) + (src_stride << 2));
+            src_orig += 16;
+
+            SLDI_B2_SB(src10, src11, src_minus11, src_minus12, src_zero0,
+                       src_zero1, 1);
+            SLDI_B2_SB(src12, src13, src_minus13, src_minus14, src_zero2,
+                       src_zero3, 1);
+            SLDI_B2_SB(src11, src12, src_minus12, src_minus13, src_plus10,
+                       src_plus11, 2);
+
+            src_plus12 = __msa_sldi_b((v16i8) src13, (v16i8) src_minus14, 2);
+
+            cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+            cmp_plus10 = ((v16u8) src_zero0 == (v16u8) src_plus10);
+            cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+            cmp_plus11 = ((v16u8) src_zero1 == (v16u8) src_plus11);
+            cmp_minus12 = ((v16u8) src_zero2 == src_minus12);
+            cmp_plus12 = ((v16u8) src_zero2 == (v16u8) src_plus12);
+            cmp_minus13 = ((v16u8) src_zero3 == src_minus13);
+            cmp_plus13 = ((v16u8) src_zero3 == src_plus13);
+
+            diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+            diff_plus10 = __msa_nor_v(cmp_plus10, cmp_plus10);
+            diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+            diff_plus11 = __msa_nor_v(cmp_plus11, cmp_plus11);
+            diff_minus12 = __msa_nor_v(cmp_minus12, cmp_minus12);
+            diff_plus12 = __msa_nor_v(cmp_plus12, cmp_plus12);
+            diff_minus13 = __msa_nor_v(cmp_minus13, cmp_minus13);
+            diff_plus13 = __msa_nor_v(cmp_plus13, cmp_plus13);
+
+            cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+            cmp_plus10 = ((v16u8) src_plus10 < (v16u8) src_zero0);
+            cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+            cmp_plus11 = ((v16u8) src_plus11 < (v16u8) src_zero1);
+            cmp_minus12 = (src_minus12 < (v16u8) src_zero2);
+            cmp_plus12 = ((v16u8) src_plus12 < (v16u8) src_zero2);
+            cmp_minus13 = (src_minus13 < (v16u8) src_zero3);
+            cmp_plus13 = (src_plus13 < (v16u8) src_zero3);
+
+            diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+            diff_plus10 = __msa_bmnz_v(diff_plus10, const1, cmp_plus10);
+            diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+            diff_plus11 = __msa_bmnz_v(diff_plus11, const1, cmp_plus11);
+            diff_minus12 = __msa_bmnz_v(diff_minus12, const1, cmp_minus12);
+            diff_plus12 = __msa_bmnz_v(diff_plus12, const1, cmp_plus12);
+            diff_minus13 = __msa_bmnz_v(diff_minus13, const1, cmp_minus13);
+            diff_plus13 = __msa_bmnz_v(diff_plus13, const1, cmp_plus13);
+
+            offset_mask0 = 2 + (v16i8) diff_minus10 + (v16i8) diff_plus10;
+            offset_mask1 = 2 + (v16i8) diff_minus11 + (v16i8) diff_plus11;
+            offset_mask2 = 2 + (v16i8) diff_minus12 + (v16i8) diff_plus12;
+            offset_mask3 = 2 + (v16i8) diff_minus13 + (v16i8) diff_plus13;
+
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask0, offset_mask0, offset_mask0, offset_mask0);
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask1, offset_mask1, offset_mask1, offset_mask1);
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask2, offset_mask2, offset_mask2, offset_mask2);
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask3, offset_mask3, offset_mask3, offset_mask3);
+
+            UNPCK_UB_SH(src_zero0, src0, src1);
+            UNPCK_SB_SH(offset_mask0, temp0, temp1);
+            UNPCK_UB_SH(src_zero1, src2, src3);
+            UNPCK_SB_SH(offset_mask1, temp2, temp3);
+            UNPCK_UB_SH(src_zero2, src4, src5);
+            UNPCK_SB_SH(offset_mask2, temp4, temp5);
+            UNPCK_UB_SH(src_zero3, src6, src7);
+            UNPCK_SB_SH(offset_mask3, temp6, temp7);
+            ADD4(temp0, src0, temp1, src1, temp2, src2, temp3, src3, temp0,
+                 temp1, temp2, temp3);
+            ADD4(temp4, src4, temp5, src5, temp6, src6, temp7, src7, temp4,
+                 temp5, temp6, temp7);
+            CLIP_SH4_0_255(temp0, temp1, temp2, temp3);
+            CLIP_SH4_0_255(temp4, temp5, temp6, temp7);
+            PCKEV_B4_UB(temp1, temp0, temp3, temp2, temp5, temp4,
+                        temp7, temp6, dst0, dst1, dst2, dst3);
+
+            src_minus11 = src10;
+            src_minus12 = src11;
+            src_minus13 = src12;
+            src_minus14 = src13;
+
+            ST_UB4(dst0, dst1, dst2, dst3, dst_orig, dst_stride);
+            dst_orig += 16;
+        }
+
+        src += (src_stride << 2);
+        dst += (dst_stride << 2);
+    }
+}
+
+static void hevc_sao_edge_filter_135degree_4width_msa(uint8_t *dst,
+                                                      int32_t dst_stride,
+                                                      uint8_t *src,
+                                                      int32_t src_stride,
+                                                      int16_t *sao_offset_val,
+                                                      int32_t height)
+{
+    uint8_t *src_orig;
+    int32_t h_cnt;
+    uint32_t dst_val0, dst_val1;
+    v8i16 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16i8 zero = { 0 };
+    v16i8 src_zero0, src_zero1, dst0;
+    v16u8 cmp_minus10, diff_minus10, cmp_minus11, diff_minus11;
+    v16u8 src_minus10, src10, src_minus11, src11;
+    v8i16 offset_mask0, offset_mask1, sao_offset, src00, src01;
+
+    sao_offset = LD_SH(sao_offset_val);
+    src_orig = src - 1;
+
+    LD_UB2(src_orig - src_stride, src_stride, src_minus10, src_minus11);
+
+    for (h_cnt = (height >> 1); h_cnt--;) {
+        LD_UB2(src_orig + src_stride, src_stride, src10, src11);
+
+        SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
+        SLDI_B2_0_UB(src_minus10, src_minus11, src_minus10, src_minus11, 2);
+
+        ILVR_B2_UB(src10, src_minus10, src11, src_minus11, src_minus10,
+                   src_minus11);
+        ILVR_B2_SB(src_zero0, src_zero0, src_zero1, src_zero1, src_zero0,
+                   src_zero1);
+
+        cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+        diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+        cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+        diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+
+        cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+        diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+        cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+        diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+
+        offset_mask0 = (v8i16) (__msa_hadd_u_h(diff_minus10, diff_minus10) + 2);
+        offset_mask1 = (v8i16) (__msa_hadd_u_h(diff_minus11, diff_minus11) + 2);
+
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask0,
+                   offset_mask0, offset_mask0, offset_mask0);
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask1,
+                   offset_mask1, offset_mask1, offset_mask1);
+        ILVEV_B2_SH(src_zero0, zero, src_zero1, zero, src00, src01);
+        ADD2(offset_mask0, src00, offset_mask1, src01, offset_mask0,
+             offset_mask1);
+        CLIP_SH2_0_255(offset_mask0, offset_mask1);
+        dst0 = __msa_pckev_b((v16i8) offset_mask1, (v16i8) offset_mask0);
+
+        src_minus10 = src10;
+        src_minus11 = src11;
+
+        dst_val0 = __msa_copy_u_w((v4i32) dst0, 0);
+        dst_val1 = __msa_copy_u_w((v4i32) dst0, 2);
+
+        SW(dst_val0, dst);
+        dst += dst_stride;
+        SW(dst_val1, dst);
+
+        dst += dst_stride;
+        src_orig += (src_stride << 1);
+    }
+}
+
+static void hevc_sao_edge_filter_135degree_8width_msa(uint8_t *dst,
+                                                      int32_t dst_stride,
+                                                      uint8_t *src,
+                                                      int32_t src_stride,
+                                                      int16_t *sao_offset_val,
+                                                      int32_t height)
+{
+    uint8_t *src_orig;
+    int32_t h_cnt;
+    uint64_t dst_val0, dst_val1;
+    v8i16 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16i8 zero = { 0 };
+    v16i8 src_zero0, src_zero1, dst0, dst1;
+    v16u8 cmp_minus10, diff_minus10, cmp_minus11, diff_minus11;
+    v16u8 src_minus10, src10, src_minus11, src11;
+    v8i16 sao_offset, src00, offset_mask0, src01, offset_mask1;
+
+    sao_offset = LD_SH(sao_offset_val);
+    src_orig = src - 1;
+
+    LD_UB2(src_orig - src_stride, src_stride, src_minus10, src_minus11);
+
+    for (h_cnt = (height >> 1); h_cnt--;) {
+        LD_UB2(src_orig + src_stride, src_stride, src10, src11);
+
+        SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
+        SLDI_B2_0_UB(src_minus10, src_minus11, src_minus10, src_minus11, 2);
+        ILVR_B2_UB(src10, src_minus10, src11, src_minus11, src_minus10,
+                   src_minus11);
+        ILVR_B2_SB(src_zero0, src_zero0, src_zero1, src_zero1, src_zero0,
+                   src_zero1);
+
+        cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+        diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+        cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+        diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+
+        cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+        diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+        cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+        diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+
+        offset_mask0 = (v8i16) (__msa_hadd_u_h(diff_minus10, diff_minus10) + 2);
+        offset_mask1 = (v8i16) (__msa_hadd_u_h(diff_minus11, diff_minus11) + 2);
+
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask0,
+                   offset_mask0, offset_mask0, offset_mask0);
+        VSHF_H2_SH(edge_idx, edge_idx, sao_offset, sao_offset, offset_mask1,
+                   offset_mask1, offset_mask1, offset_mask1);
+        ILVEV_B2_SH(src_zero0, zero, src_zero1, zero, src00, src01);
+        ADD2(offset_mask0, src00, offset_mask1, src01, offset_mask0,
+             offset_mask1);
+        CLIP_SH2_0_255(offset_mask0, offset_mask1);
+        PCKEV_B2_SB(offset_mask0, offset_mask0, offset_mask1, offset_mask1,
+                    dst0, dst1);
+
+        src_minus10 = src10;
+        src_minus11 = src11;
+
+        dst_val0 = __msa_copy_u_d((v2i64) dst0, 0);
+        dst_val1 = __msa_copy_u_d((v2i64) dst1, 0);
+
+        SD(dst_val0, dst);
+        dst += dst_stride;
+        SD(dst_val1, dst);
+        dst += dst_stride;
+
+        src_orig += (src_stride << 1);
+    }
+}
+
+static void hevc_sao_edge_filter_135degree_16multiple_msa(uint8_t *dst,
+                                                          int32_t dst_stride,
+                                                          uint8_t *src,
+                                                          int32_t src_stride,
+                                                          int16_t *
+                                                          sao_offset_val,
+                                                          int32_t width,
+                                                          int32_t height)
+{
+    uint8_t *src_orig, *dst_orig;
+    int32_t h_cnt, v_cnt;
+    v16i8 edge_idx = { 1, 2, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+    v16u8 const1 = (v16u8) __msa_ldi_b(1);
+    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 cmp_minus10, cmp_minus11, cmp_minus12, cmp_minus13, cmp_plus10;
+    v16u8 cmp_plus11, cmp_plus12, cmp_plus13, diff_minus10, diff_minus11;
+    v16u8 diff_minus12, diff_minus13, diff_plus10, diff_plus11, diff_plus12;
+    v16u8 diff_plus13, src10, src11, src12, src13, src_minus10, src_minus11;
+    v16u8 src_plus10, src_plus11, src_plus12, src_plus13;
+    v16i8 src_minus12, src_minus13, src_zero0, src_zero1, src_zero2, src_zero3;
+    v16i8 offset_mask0, offset_mask1, offset_mask2, offset_mask3, sao_offset;
+    v8i16 src0, src1, src2, src3, src4, src5, src6, src7;
+    v8i16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    sao_offset = LD_SB(sao_offset_val);
+    sao_offset = __msa_pckev_b(sao_offset, sao_offset);
+
+    for (h_cnt = (height >> 2); h_cnt--;) {
+        src_orig = src - 1;
+        dst_orig = dst;
+
+        LD_UB4(src_orig, src_stride,
+               src_minus11, src_plus10, src_plus11, src_plus12);
+
+        for (v_cnt = 0; v_cnt < (width >> 4); v_cnt++) {
+            src_minus10 = LD_UB(src_orig + 2 - src_stride);
+            LD_UB4(src_orig + 16, src_stride, src10, src11, src12, src13);
+            src_plus13 = LD_UB(src_orig + (src_stride << 2));
+            src_orig += 16;
+
+            src_zero0 = __msa_sldi_b((v16i8) src10, (v16i8) src_minus11, 1);
+            cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
+            cmp_plus10 = ((v16u8) src_zero0 == src_plus10);
+
+            src_zero1 = __msa_sldi_b((v16i8) src11, (v16i8) src_plus10, 1);
+            src_minus11 = (v16u8) __msa_sldi_b((v16i8) src10,
+                                               (v16i8) src_minus11, 2);
+            cmp_minus11 = ((v16u8) src_zero1 == src_minus11);
+            cmp_plus11 = ((v16u8) src_zero1 == src_plus11);
+
+            src_zero2 = __msa_sldi_b((v16i8) src12, (v16i8) src_plus11, 1);
+            src_minus12 = __msa_sldi_b((v16i8) src11, (v16i8) src_plus10, 2);
+            cmp_minus12 = ((v16u8) src_zero2 == (v16u8) src_minus12);
+            cmp_plus12 = ((v16u8) src_zero2 == src_plus12);
+
+            src_zero3 = __msa_sldi_b((v16i8) src13, (v16i8) src_plus12, 1);
+            src_minus13 = __msa_sldi_b((v16i8) src12, (v16i8) src_plus11, 2);
+            cmp_minus13 = ((v16u8) src_zero3 == (v16u8) src_minus13);
+            cmp_plus13 = ((v16u8) src_zero3 == src_plus13);
+
+            diff_minus10 = __msa_nor_v(cmp_minus10, cmp_minus10);
+            diff_plus10 = __msa_nor_v(cmp_plus10, cmp_plus10);
+            diff_minus11 = __msa_nor_v(cmp_minus11, cmp_minus11);
+            diff_plus11 = __msa_nor_v(cmp_plus11, cmp_plus11);
+            diff_minus12 = __msa_nor_v(cmp_minus12, cmp_minus12);
+            diff_plus12 = __msa_nor_v(cmp_plus12, cmp_plus12);
+            diff_minus13 = __msa_nor_v(cmp_minus13, cmp_minus13);
+            diff_plus13 = __msa_nor_v(cmp_plus13, cmp_plus13);
+
+            cmp_minus10 = (src_minus10 < (v16u8) src_zero0);
+            cmp_plus10 = (src_plus10 < (v16u8) src_zero0);
+            cmp_minus11 = (src_minus11 < (v16u8) src_zero1);
+            cmp_plus11 = (src_plus11 < (v16u8) src_zero1);
+            cmp_minus12 = ((v16u8) src_minus12 < (v16u8) src_zero2);
+            cmp_plus12 = (src_plus12 < (v16u8) src_zero2);
+            cmp_minus13 = ((v16u8) src_minus13 < (v16u8) src_zero3);
+            cmp_plus13 = (src_plus13 < (v16u8) src_zero3);
+
+            diff_minus10 = __msa_bmnz_v(diff_minus10, const1, cmp_minus10);
+            diff_plus10 = __msa_bmnz_v(diff_plus10, const1, cmp_plus10);
+            diff_minus11 = __msa_bmnz_v(diff_minus11, const1, cmp_minus11);
+            diff_plus11 = __msa_bmnz_v(diff_plus11, const1, cmp_plus11);
+            diff_minus12 = __msa_bmnz_v(diff_minus12, const1, cmp_minus12);
+            diff_plus12 = __msa_bmnz_v(diff_plus12, const1, cmp_plus12);
+            diff_minus13 = __msa_bmnz_v(diff_minus13, const1, cmp_minus13);
+            diff_plus13 = __msa_bmnz_v(diff_plus13, const1, cmp_plus13);
+
+            offset_mask0 = 2 + (v16i8) diff_minus10 + (v16i8) diff_plus10;
+            offset_mask1 = 2 + (v16i8) diff_minus11 + (v16i8) diff_plus11;
+            offset_mask2 = 2 + (v16i8) diff_minus12 + (v16i8) diff_plus12;
+            offset_mask3 = 2 + (v16i8) diff_minus13 + (v16i8) diff_plus13;
+
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask0, offset_mask0, offset_mask0, offset_mask0);
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask1, offset_mask1, offset_mask1, offset_mask1);
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask2, offset_mask2, offset_mask2, offset_mask2);
+            VSHF_B2_SB(edge_idx, edge_idx, sao_offset, sao_offset,
+                       offset_mask3, offset_mask3, offset_mask3, offset_mask3);
+
+            UNPCK_UB_SH(src_zero0, src0, src1);
+            UNPCK_SB_SH(offset_mask0, temp0, temp1);
+            UNPCK_UB_SH(src_zero1, src2, src3);
+            UNPCK_SB_SH(offset_mask1, temp2, temp3);
+            UNPCK_UB_SH(src_zero2, src4, src5);
+            UNPCK_SB_SH(offset_mask2, temp4, temp5);
+            UNPCK_UB_SH(src_zero3, src6, src7);
+            UNPCK_SB_SH(offset_mask3, temp6, temp7);
+
+            ADD4(temp0, src0, temp1, src1, temp2, src2, temp3, src3, temp0,
+                 temp1, temp2, temp3);
+            ADD4(temp4, src4, temp5, src5, temp6, src6, temp7, src7, temp4,
+                 temp5, temp6, temp7);
+            CLIP_SH4_0_255(temp0, temp1, temp2, temp3);
+            CLIP_SH4_0_255(temp4, temp5, temp6, temp7);
+            PCKEV_B4_UB(temp1, temp0, temp3, temp2, temp5, temp4, temp7, temp6,
+                        dst0, dst1, dst2, dst3);
+
+            src_minus11 = src10;
+            src_plus10 = src11;
+            src_plus11 = src12;
+            src_plus12 = src13;
+
+            ST_UB4(dst0, dst1, dst2, dst3, dst_orig, dst_stride);
+            dst_orig += 16;
+        }
+
+        src += (src_stride << 2);
+        dst += (dst_stride << 2);
+    }
+}
+
+void ff_hevc_loop_filter_luma_h_8_msa(uint8_t *src,
+                                      ptrdiff_t src_stride,
+                                      int32_t beta, int32_t *tc,
+                                      uint8_t *no_p, uint8_t *no_q)
+{
+    hevc_loopfilter_luma_hor_msa(src, src_stride, beta, tc, no_p, no_q);
+}
+
+void ff_hevc_loop_filter_luma_v_8_msa(uint8_t *src,
+                                      ptrdiff_t src_stride,
+                                      int32_t beta, int32_t *tc,
+                                      uint8_t *no_p, uint8_t *no_q)
+{
+    hevc_loopfilter_luma_ver_msa(src, src_stride, beta, tc, no_p, no_q);
+}
+
+void ff_hevc_loop_filter_chroma_h_8_msa(uint8_t *src,
+                                        ptrdiff_t src_stride,
+                                        int32_t *tc, uint8_t *no_p,
+                                        uint8_t *no_q)
+{
+    hevc_loopfilter_chroma_hor_msa(src, src_stride, tc, no_p, no_q);
+}
+
+void ff_hevc_loop_filter_chroma_v_8_msa(uint8_t *src,
+                                        ptrdiff_t src_stride,
+                                        int32_t *tc, uint8_t *no_p,
+                                        uint8_t *no_q)
+{
+    hevc_loopfilter_chroma_ver_msa(src, src_stride, tc, no_p, no_q);
+}
+
+void ff_hevc_sao_band_filter_0_8_msa(uint8_t *dst, uint8_t *src,
+                                     ptrdiff_t stride_dst, ptrdiff_t stride_src,
+                                     int16_t *sao_offset_val, int sao_left_class,
+                                     int width, int height)
+{
+    if (width >> 4) {
+        hevc_sao_band_filter_16multiple_msa(dst, stride_dst, src, stride_src,
+                                            sao_left_class, sao_offset_val,
+                                            width - (width % 16), height);
+        dst += width - (width % 16);
+        src += width - (width % 16);
+        width %= 16;
+    }
+
+    if (width >> 3) {
+        hevc_sao_band_filter_8width_msa(dst, stride_dst, src, stride_src,
+                                        sao_left_class, sao_offset_val, height);
+        dst += 8;
+        src += 8;
+        width %= 8;
+    }
+
+    if (width) {
+        hevc_sao_band_filter_4width_msa(dst, stride_dst, src, stride_src,
+                                        sao_left_class, sao_offset_val, height);
+    }
+}
+
+void ff_hevc_sao_edge_filter_8_msa(uint8_t *dst, uint8_t *src,
+                                   ptrdiff_t stride_dst,
+                                   int16_t *sao_offset_val,
+                                   int eo, int width, int height)
+{
+    ptrdiff_t stride_src = (2 * 64 + 32) / sizeof(uint8_t);
+
+    switch (eo) {
+    case 0:
+        if (width >> 4) {
+            hevc_sao_edge_filter_0degree_16multiple_msa(dst, stride_dst,
+                                                        src, stride_src,
+                                                        sao_offset_val,
+                                                        width - (width % 16),
+                                                        height);
+            dst += width - (width % 16);
+            src += width - (width % 16);
+            width %= 16;
+        }
+
+        if (width >> 3) {
+            hevc_sao_edge_filter_0degree_8width_msa(dst, stride_dst,
+                                                    src, stride_src,
+                                                    sao_offset_val, height);
+            dst += 8;
+            src += 8;
+            width %= 8;
+        }
+
+        if (width) {
+            hevc_sao_edge_filter_0degree_4width_msa(dst, stride_dst,
+                                                    src, stride_src,
+                                                    sao_offset_val, height);
+        }
+        break;
+
+    case 1:
+        if (width >> 4) {
+            hevc_sao_edge_filter_90degree_16multiple_msa(dst, stride_dst,
+                                                         src, stride_src,
+                                                         sao_offset_val,
+                                                         width - (width % 16),
+                                                         height);
+            dst += width - (width % 16);
+            src += width - (width % 16);
+            width %= 16;
+        }
+
+        if (width >> 3) {
+            hevc_sao_edge_filter_90degree_8width_msa(dst, stride_dst,
+                                                     src, stride_src,
+                                                     sao_offset_val, height);
+            dst += 8;
+            src += 8;
+            width %= 8;
+        }
+
+        if (width) {
+            hevc_sao_edge_filter_90degree_4width_msa(dst, stride_dst,
+                                                     src, stride_src,
+                                                     sao_offset_val, height);
+        }
+        break;
+
+    case 2:
+        if (width >> 4) {
+            hevc_sao_edge_filter_45degree_16multiple_msa(dst, stride_dst,
+                                                         src, stride_src,
+                                                         sao_offset_val,
+                                                         width - (width % 16),
+                                                         height);
+            dst += width - (width % 16);
+            src += width - (width % 16);
+            width %= 16;
+        }
+
+        if (width >> 3) {
+            hevc_sao_edge_filter_45degree_8width_msa(dst, stride_dst,
+                                                     src, stride_src,
+                                                     sao_offset_val, height);
+            dst += 8;
+            src += 8;
+            width %= 8;
+        }
+
+        if (width) {
+            hevc_sao_edge_filter_45degree_4width_msa(dst, stride_dst,
+                                                     src, stride_src,
+                                                     sao_offset_val, height);
+        }
+        break;
+
+    case 3:
+        if (width >> 4) {
+            hevc_sao_edge_filter_135degree_16multiple_msa(dst, stride_dst,
+                                                          src, stride_src,
+                                                          sao_offset_val,
+                                                          width - (width % 16),
+                                                          height);
+            dst += width - (width % 16);
+            src += width - (width % 16);
+            width %= 16;
+        }
+
+        if (width >> 3) {
+            hevc_sao_edge_filter_135degree_8width_msa(dst, stride_dst,
+                                                      src, stride_src,
+                                                      sao_offset_val, height);
+            dst += 8;
+            src += 8;
+            width %= 8;
+        }
+
+        if (width) {
+            hevc_sao_edge_filter_135degree_4width_msa(dst, stride_dst,
+                                                      src, stride_src,
+                                                      sao_offset_val, height);
+        }
+        break;
+    }
+}
diff --git a/libavcodec/mips/hevc_mc_bi_msa.c b/libavcodec/mips/hevc_mc_bi_msa.c
index 0709b40c..8208be32 100644
--- a/libavcodec/mips/hevc_mc_bi_msa.c
+++ b/libavcodec/mips/hevc_mc_bi_msa.c
@@ -4369,7 +4369,7 @@ BI_MC_COPY(64);
 #undef BI_MC_COPY
 
 #define BI_MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                            \
-void ff_hevc_put_hevc_bi_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,          \
+void ff_hevc_put_hevc_bi_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,            \
                                                         ptrdiff_t dst_stride,  \
                                                         uint8_t *src,          \
                                                         ptrdiff_t src_stride,  \
@@ -4423,7 +4423,7 @@ BI_MC(epel, v, 32, 4, vt, my);
 #undef BI_MC
 
 #define BI_MC_HV(PEL, DIR, WIDTH, TAP, DIR1)                                   \
-void ff_hevc_put_hevc_bi_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,          \
+void ff_hevc_put_hevc_bi_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,            \
                                                         ptrdiff_t dst_stride,  \
                                                         uint8_t *src,          \
                                                         ptrdiff_t src_stride,  \
diff --git a/libavcodec/mips/hevc_mc_biw_msa.c b/libavcodec/mips/hevc_mc_biw_msa.c
index a1deb0ec..05a28ece 100644
--- a/libavcodec/mips/hevc_mc_biw_msa.c
+++ b/libavcodec/mips/hevc_mc_biw_msa.c
@@ -5454,7 +5454,7 @@ BI_W_MC_COPY(64);
 #undef BI_W_MC_COPY
 
 #define BI_W_MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                          \
-void ff_hevc_put_hevc_bi_w_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,        \
+void ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,          \
                                                           ptrdiff_t            \
                                                           dst_stride,          \
                                                           uint8_t *src,        \
@@ -5521,7 +5521,7 @@ BI_W_MC(epel, v, 32, 4, vt, my);
 #undef BI_W_MC
 
 #define BI_W_MC_HV(PEL, DIR, WIDTH, TAP, DIR1)                                 \
-void ff_hevc_put_hevc_bi_w_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,        \
+void ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,          \
                                                           ptrdiff_t            \
                                                           dst_stride,          \
                                                           uint8_t *src,        \
diff --git a/libavcodec/mips/hevc_mc_uni_msa.c b/libavcodec/mips/hevc_mc_uni_msa.c
index 7d02ce82..754fbdbb 100644
--- a/libavcodec/mips/hevc_mc_uni_msa.c
+++ b/libavcodec/mips/hevc_mc_uni_msa.c
@@ -249,7 +249,7 @@ static void copy_width64_msa(uint8_t *src, int32_t src_stride,
     copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64);
 }
 
-uint8_t mc_filt_mask_arr[16 * 3] = {
+static const uint8_t mc_filt_mask_arr[16 * 3] = {
     /* 8 width cases */
     0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
     /* 4 width cases */
@@ -3871,7 +3871,7 @@ UNI_MC_COPY(64);
 #undef UNI_MC_COPY
 
 #define UNI_MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                           \
-void ff_hevc_put_hevc_uni_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,         \
+void ff_hevc_put_hevc_uni_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,           \
                                                          ptrdiff_t             \
                                                          dst_stride,           \
                                                          uint8_t *src,         \
@@ -3925,7 +3925,7 @@ UNI_MC(epel, v, 32, 4, vt, my);
 #undef UNI_MC
 
 #define UNI_MC_HV(PEL, DIR, WIDTH, TAP, DIR1)                           \
-void ff_hevc_put_hevc_uni_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,  \
+void ff_hevc_put_hevc_uni_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,    \
                                                          ptrdiff_t      \
                                                          dst_stride,    \
                                                          uint8_t *src,  \
diff --git a/libavcodec/mips/hevc_mc_uniw_msa.c b/libavcodec/mips/hevc_mc_uniw_msa.c
index 90796300..ce10f413 100644
--- a/libavcodec/mips/hevc_mc_uniw_msa.c
+++ b/libavcodec/mips/hevc_mc_uniw_msa.c
@@ -4687,7 +4687,7 @@ UNIWGT_MC_COPY(64);
 #undef UNIWGT_MC_COPY
 
 #define UNI_W_MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                        \
-void ff_hevc_put_hevc_uni_w_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,      \
+void ff_hevc_put_hevc_uni_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,        \
                                                            ptrdiff_t          \
                                                            dst_stride,        \
                                                            uint8_t *src,      \
@@ -4746,7 +4746,7 @@ UNI_W_MC(epel, v, 32, 4, vt, my);
 #undef UNI_W_MC
 
 #define UNI_W_MC_HV(PEL, DIR, WIDTH, TAP, DIR1)                              \
-void ff_hevc_put_hevc_uni_w_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,     \
+void ff_hevc_put_hevc_uni_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,       \
                                                            ptrdiff_t         \
                                                            dst_stride,       \
                                                            uint8_t *src,     \
diff --git a/libavcodec/mips/hevcdsp_init_mips.c b/libavcodec/mips/hevcdsp_init_mips.c
index e3d98b1c..3675b931 100644
--- a/libavcodec/mips/hevcdsp_init_mips.c
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -403,6 +403,32 @@ static av_cold void hevc_dsp_init_msa(HEVCDSPContext *c,
         c->put_hevc_epel_bi_w[6][1][1] = ff_hevc_put_hevc_bi_w_epel_hv24_8_msa;
         c->put_hevc_epel_bi_w[7][1][1] = ff_hevc_put_hevc_bi_w_epel_hv32_8_msa;
 
+        c->sao_band_filter[0] =
+        c->sao_band_filter[1] =
+        c->sao_band_filter[2] =
+        c->sao_band_filter[3] =
+        c->sao_band_filter[4] = ff_hevc_sao_band_filter_0_8_msa;
+
+        c->sao_edge_filter[0] =
+        c->sao_edge_filter[1] =
+        c->sao_edge_filter[2] =
+        c->sao_edge_filter[3] =
+        c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_8_msa;
+
+        c->hevc_h_loop_filter_luma = ff_hevc_loop_filter_luma_h_8_msa;
+        c->hevc_v_loop_filter_luma = ff_hevc_loop_filter_luma_v_8_msa;
+
+        c->hevc_h_loop_filter_chroma = ff_hevc_loop_filter_chroma_h_8_msa;
+        c->hevc_v_loop_filter_chroma = ff_hevc_loop_filter_chroma_v_8_msa;
+
+        c->hevc_h_loop_filter_luma_c = ff_hevc_loop_filter_luma_h_8_msa;
+        c->hevc_v_loop_filter_luma_c = ff_hevc_loop_filter_luma_v_8_msa;
+
+        c->hevc_h_loop_filter_chroma_c =
+            ff_hevc_loop_filter_chroma_h_8_msa;
+        c->hevc_v_loop_filter_chroma_c =
+            ff_hevc_loop_filter_chroma_v_8_msa;
+
         c->idct[0] = ff_hevc_idct_4x4_msa;
         c->idct[1] = ff_hevc_idct_8x8_msa;
         c->idct[2] = ff_hevc_idct_16x16_msa;
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
index b7c903e8..1573d1cc 100644
--- a/libavcodec/mips/hevcdsp_mips.h
+++ b/libavcodec/mips/hevcdsp_mips.h
@@ -24,7 +24,7 @@
 #include "libavcodec/hevcdsp.h"
 
 #define MC(PEL, DIR, WIDTH)                                                 \
-void ff_hevc_put_hevc_##PEL##_##DIR####WIDTH##_8_msa(int16_t *dst,          \
+void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_msa(int16_t *dst,            \
                                                      uint8_t *src,          \
                                                      ptrdiff_t src_stride,  \
                                                      int height,            \
@@ -102,7 +102,7 @@ MC(epel, hv, 64);
 #undef MC
 
 #define UNI_MC(PEL, DIR, WIDTH)                                                \
-void ff_hevc_put_hevc_uni_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,         \
+void ff_hevc_put_hevc_uni_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,           \
                                                          ptrdiff_t dst_stride, \
                                                          uint8_t *src,         \
                                                          ptrdiff_t src_stride, \
@@ -181,7 +181,7 @@ UNI_MC(epel, hv, 64);
 #undef UNI_MC
 
 #define UNI_W_MC(PEL, DIR, WIDTH)                                         \
-void ff_hevc_put_hevc_uni_w_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,  \
+void ff_hevc_put_hevc_uni_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,    \
                                                            ptrdiff_t      \
                                                            dst_stride,    \
                                                            uint8_t *src,  \
@@ -265,7 +265,7 @@ UNI_W_MC(epel, hv, 64);
 #undef UNI_W_MC
 
 #define BI_MC(PEL, DIR, WIDTH)                                                 \
-void ff_hevc_put_hevc_bi_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,          \
+void ff_hevc_put_hevc_bi_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,            \
                                                         ptrdiff_t dst_stride,  \
                                                         uint8_t *src,          \
                                                         ptrdiff_t src_stride,  \
@@ -345,7 +345,7 @@ BI_MC(epel, hv, 64);
 #undef BI_MC
 
 #define BI_W_MC(PEL, DIR, WIDTH)                                               \
-void ff_hevc_put_hevc_bi_w_##PEL##_##DIR####WIDTH##_8_msa(uint8_t *dst,        \
+void ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,          \
                                                           ptrdiff_t            \
                                                           dst_stride,          \
                                                           uint8_t *src,        \
@@ -431,6 +431,36 @@ BI_W_MC(epel, hv, 64);
 
 #undef BI_W_MC
 
+void ff_hevc_loop_filter_luma_h_8_msa(uint8_t *src,
+                                      ptrdiff_t src_stride,
+                                      int32_t beta, int32_t *tc,
+                                      uint8_t *no_p, uint8_t *no_q);
+
+void ff_hevc_loop_filter_luma_v_8_msa(uint8_t *src,
+                                      ptrdiff_t src_stride,
+                                      int32_t beta, int32_t *tc,
+                                      uint8_t *no_p, uint8_t *no_q);
+
+void ff_hevc_loop_filter_chroma_h_8_msa(uint8_t *src,
+                                        ptrdiff_t src_stride,
+                                        int32_t *tc, uint8_t *no_p,
+                                        uint8_t *no_q);
+
+void ff_hevc_loop_filter_chroma_v_8_msa(uint8_t *src,
+                                        ptrdiff_t src_stride,
+                                        int32_t *tc, uint8_t *no_p,
+                                        uint8_t *no_q);
+
+void ff_hevc_sao_band_filter_0_8_msa(uint8_t *dst, uint8_t *src,
+                                     ptrdiff_t stride_dst, ptrdiff_t stride_src,
+                                     int16_t *sao_offset_val, int sao_left_class,
+                                     int width, int height);
+
+void ff_hevc_sao_edge_filter_8_msa(uint8_t *dst, uint8_t *src,
+                                   ptrdiff_t stride_dst,
+                                   int16_t *sao_offset_val,
+                                   int eo, int width, int height);
+
 void ff_hevc_idct_4x4_msa(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_8x8_msa(int16_t *coeffs, int col_limit);
 void ff_hevc_idct_16x16_msa(int16_t *coeffs, int col_limit);
diff --git a/libavcodec/mips/hevcdsp_msa.c b/libavcodec/mips/hevcdsp_msa.c
index ed3acbb8..f2bc748e 100644
--- a/libavcodec/mips/hevcdsp_msa.c
+++ b/libavcodec/mips/hevcdsp_msa.c
@@ -3792,7 +3792,7 @@ MC_COPY(64);
 #undef MC_COPY
 
 #define MC(PEL, DIR, WIDTH, TAP, DIR1, FILT_DIR)                            \
-void ff_hevc_put_hevc_##PEL##_##DIR####WIDTH##_8_msa(int16_t *dst,          \
+void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_msa(int16_t *dst,            \
                                                      uint8_t *src,          \
                                                      ptrdiff_t src_stride,  \
                                                      int height,            \
@@ -3843,7 +3843,7 @@ MC(epel, v, 32, 4, vt, my);
 #undef MC
 
 #define MC_HV(PEL, DIR, WIDTH, TAP, DIR1)                                     \
-void ff_hevc_put_hevc_##PEL##_##DIR####WIDTH##_8_msa(int16_t *dst,            \
+void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_msa(int16_t *dst,              \
                                                      uint8_t *src,            \
                                                      ptrdiff_t src_stride,    \
                                                      int height,              \
diff --git a/libavcodec/mips/hevcpred_init_mips.c b/libavcodec/mips/hevcpred_init_mips.c
new file mode 100644
index 00000000..331cfac1
--- /dev/null
+++ b/libavcodec/mips/hevcpred_init_mips.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/hevc.h"
+#include "libavcodec/mips/hevcpred_mips.h"
+
+#if HAVE_MSA
+static av_cold void hevc_pred_init_msa(HEVCPredContext *c, const int bit_depth)
+{
+    if (8 == bit_depth) {
+        c->intra_pred[2] = ff_intra_pred_8_16x16_msa;
+        c->intra_pred[3] = ff_intra_pred_8_32x32_msa;
+        c->pred_planar[0] = ff_hevc_intra_pred_planar_0_msa;
+        c->pred_planar[1] = ff_hevc_intra_pred_planar_1_msa;
+        c->pred_planar[2] = ff_hevc_intra_pred_planar_2_msa;
+        c->pred_planar[3] = ff_hevc_intra_pred_planar_3_msa;
+        c->pred_dc = ff_hevc_intra_pred_dc_msa;
+        c->pred_angular[0] = ff_pred_intra_pred_angular_0_msa;
+        c->pred_angular[1] = ff_pred_intra_pred_angular_1_msa;
+        c->pred_angular[2] = ff_pred_intra_pred_angular_2_msa;
+        c->pred_angular[3] = ff_pred_intra_pred_angular_3_msa;
+    }
+}
+#endif  // #if HAVE_MSA
+
+void ff_hevc_pred_init_mips(HEVCPredContext *c, const int bit_depth)
+{
+#if HAVE_MSA
+    hevc_pred_init_msa(c, bit_depth);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/hevcpred_mips.h b/libavcodec/mips/hevcpred_mips.h
new file mode 100644
index 00000000..12f57a2a
--- /dev/null
+++ b/libavcodec/mips/hevcpred_mips.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H
+#define AVCODEC_MIPS_HEVCPRED_MIPS_H
+
+#include "libavcodec/hevcdsp.h"
+
+void ff_hevc_intra_pred_planar_0_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride);
+
+void ff_hevc_intra_pred_planar_1_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride);
+
+void ff_hevc_intra_pred_planar_2_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride);
+
+void ff_hevc_intra_pred_planar_3_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride);
+
+void ff_hevc_intra_pred_dc_msa(uint8_t *dst, const uint8_t *src_top,
+                               const uint8_t *src_left,
+                               ptrdiff_t stride, int log2, int c_idx);
+
+void ff_pred_intra_pred_angular_0_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode);
+
+void ff_pred_intra_pred_angular_1_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode);
+
+void ff_pred_intra_pred_angular_2_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode);
+
+void ff_pred_intra_pred_angular_3_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode);
+
+void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, int y0, int c_idx);
+void ff_intra_pred_8_32x32_msa(HEVCContext *s, int x0, int y0, int c_idx);
+
+#endif  // #ifndef AVCODEC_MIPS_HEVCPRED_MIPS_H
diff --git a/libavcodec/mips/hevcpred_msa.c b/libavcodec/mips/hevcpred_msa.c
new file mode 100644
index 00000000..6a3b2815
--- /dev/null
+++ b/libavcodec/mips/hevcpred_msa.c
@@ -0,0 +1,3084 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/hevc.h"
+#include "libavutil/mips/generic_macros_msa.h"
+#include "hevcpred_mips.h"
+
+static const int8_t intra_pred_angle_up[17] = {
+    -32, -26, -21, -17, -13, -9, -5, -2, 0, 2, 5, 9, 13, 17, 21, 26, 32
+};
+
+static const int8_t intra_pred_angle_low[16] = {
+    32, 26, 21, 17, 13, 9, 5, 2, 0, -2, -5, -9, -13, -17, -21, -26
+};
+
+#define HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,          \
+                              mul_val_h0, mul_val_h1, mul_val_h2, mul_val_h3,  \
+                              res0, res1, mul_val_b0, mul_val_b1, round)       \
+{                                                                              \
+    v8i16 res0_m, res1_m, res2_m, res3_m;                                      \
+                                                                               \
+    MUL4(mul_val_h0, vec0, mul_val_h2, vec0, mul_val_h0, vec1,                 \
+         mul_val_h2, vec1, res0_m, res1_m, res2_m, res3_m);                    \
+                                                                               \
+    res0_m += mul_val_h1 * tmp0;                                               \
+    res1_m += mul_val_h3 * tmp0;                                               \
+    res2_m += mul_val_h1 * tmp0;                                               \
+    res3_m += mul_val_h3 * tmp0;                                               \
+                                                                               \
+    res0_m += mul_val_b0 * src0_r;                                             \
+    res1_m += mul_val_b0 * src0_l;                                             \
+    res2_m += (mul_val_b0 - 1) * src0_r;                                       \
+    res3_m += (mul_val_b0 - 1) * src0_l;                                       \
+                                                                               \
+    res0_m += mul_val_b1 * tmp1;                                               \
+    res1_m += mul_val_b1 * tmp1;                                               \
+    res2_m += (mul_val_b1 + 1) * tmp1;                                         \
+    res3_m += (mul_val_b1 + 1) * tmp1;                                         \
+                                                                               \
+    SRARI_H4_SH(res0_m, res1_m, res2_m, res3_m, round);                        \
+    PCKEV_B2_SH(res1_m, res0_m, res3_m, res2_m, res0, res1);                   \
+}
+
+static void hevc_intra_pred_vert_4x4_msa(const uint8_t *src_top,
+                                         const uint8_t *src_left,
+                                         uint8_t *dst, int32_t stride,
+                                         int32_t flag)
+{
+    uint32_t col;
+    uint32_t src_data;
+    v8i16 vec0, vec1, vec2;
+    v16i8 zero = { 0 };
+
+    src_data = LW(src_top);
+    SW4(src_data, src_data, src_data, src_data, dst, stride);
+
+    if (0 == flag) {
+        src_data = LW(src_left);
+
+        vec2 = (v8i16) __msa_insert_w((v4i32) vec2, 0, src_data);
+
+        vec0 = __msa_fill_h(src_left[-1]);
+        vec1 = __msa_fill_h(src_top[0]);
+
+        vec2 = (v8i16) __msa_ilvr_b(zero, (v16i8) vec2);
+        vec2 -= vec0;
+        vec2 >>= 1;
+        vec2 += vec1;
+        vec2 = CLIP_SH_0_255(vec2);
+
+        for (col = 0; col < 4; col++) {
+            dst[stride * col] = (uint8_t) vec2[col];
+        }
+    }
+}
+
+static void hevc_intra_pred_vert_8x8_msa(const uint8_t *src_top,
+                                         const uint8_t *src_left,
+                                         uint8_t *dst, int32_t stride,
+                                         int32_t flag)
+{
+    uint8_t *tmp_dst = dst;
+    uint32_t row;
+    uint16_t val0, val1, val2, val3;
+    uint64_t src_data1;
+    v8i16 vec0, vec1, vec2;
+    v16i8 zero = { 0 };
+
+    src_data1 = LD(src_top);
+
+    for (row = 8; row--;) {
+        SD(src_data1, tmp_dst);
+        tmp_dst += stride;
+    }
+
+    if (0 == flag) {
+        src_data1 = LD(src_left);
+
+        vec2 = (v8i16) __msa_insert_d((v2i64) zero, 0, src_data1);
+
+        vec0 = __msa_fill_h(src_left[-1]);
+        vec1 = __msa_fill_h(src_top[0]);
+
+        vec2 = (v8i16) __msa_ilvr_b(zero, (v16i8) vec2);
+        vec2 -= vec0;
+        vec2 >>= 1;
+        vec2 += vec1;
+        vec2 = CLIP_SH_0_255(vec2);
+
+        val0 = vec2[0];
+        val1 = vec2[1];
+        val2 = vec2[2];
+        val3 = vec2[3];
+
+        dst[0] = val0;
+        dst[stride] = val1;
+        dst[2 * stride] = val2;
+        dst[3 * stride] = val3;
+
+        val0 = vec2[4];
+        val1 = vec2[5];
+        val2 = vec2[6];
+        val3 = vec2[7];
+
+        dst[4 * stride] = val0;
+        dst[5 * stride] = val1;
+        dst[6 * stride] = val2;
+        dst[7 * stride] = val3;
+    }
+}
+
+static void hevc_intra_pred_vert_16x16_msa(const uint8_t *src_top,
+                                           const uint8_t *src_left,
+                                           uint8_t *dst, int32_t stride,
+                                           int32_t flag)
+{
+    int32_t col;
+    uint8_t *tmp_dst = dst;
+    uint32_t row;
+    v16u8 src;
+    v8i16 vec0, vec1, vec2, vec3;
+
+    src = LD_UB(src_top);
+
+    for (row = 16; row--;) {
+        ST_UB(src, tmp_dst);
+        tmp_dst += stride;
+    }
+
+    if (0 == flag) {
+        src = LD_UB(src_left);
+
+        vec0 = __msa_fill_h(src_left[-1]);
+        vec1 = __msa_fill_h(src_top[0]);
+
+        UNPCK_UB_SH(src, vec2, vec3);
+        SUB2(vec2, vec0, vec3, vec0, vec2, vec3);
+
+        vec2 >>= 1;
+        vec3 >>= 1;
+
+        ADD2(vec2, vec1, vec3, vec1, vec2, vec3);
+        CLIP_SH2_0_255(vec2, vec3);
+
+        src = (v16u8) __msa_pckev_b((v16i8) vec3, (v16i8) vec2);
+
+        for (col = 0; col < 16; col++) {
+            dst[stride * col] = src[col];
+        }
+    }
+}
+
+static void hevc_intra_pred_horiz_4x4_msa(const uint8_t *src_top,
+                                          const uint8_t *src_left,
+                                          uint8_t *dst, int32_t stride,
+                                          int32_t flag)
+{
+    uint32_t val0, val1, val2, val3;
+    v16i8 src0;
+    v8i16 src0_r, src_top_val, src_left_val;
+    v16i8 zero = { 0 };
+
+    val0 = src_left[0] * 0x01010101;
+    val1 = src_left[1] * 0x01010101;
+    val2 = src_left[2] * 0x01010101;
+    val3 = src_left[3] * 0x01010101;
+    SW4(val0, val1, val2, val3, dst, stride);
+
+    if (0 == flag) {
+        val0 = LW(src_top);
+        src0 = (v16i8) __msa_insert_w((v4i32) src0, 0, val0);
+        src_top_val = __msa_fill_h(src_top[-1]);
+        src_left_val = __msa_fill_h(src_left[0]);
+
+        src0_r = (v8i16) __msa_ilvr_b(zero, src0);
+
+        src0_r -= src_top_val;
+        src0_r >>= 1;
+        src0_r += src_left_val;
+        src0_r = CLIP_SH_0_255(src0_r);
+        src0 = __msa_pckev_b((v16i8) src0_r, (v16i8) src0_r);
+        val0 = __msa_copy_s_w((v4i32) src0, 0);
+        SW(val0, dst);
+    }
+}
+
+static void hevc_intra_pred_horiz_8x8_msa(const uint8_t *src_top,
+                                          const uint8_t *src_left,
+                                          uint8_t *dst, int32_t stride,
+                                          int32_t flag)
+{
+    uint64_t val0, val1, val2, val3;
+    v16i8 src0;
+    v8i16 src0_r, src_top_val, src_left_val;
+    v16i8 zero = { 0 };
+
+    val0 = src_left[0] * 0x0101010101010101;
+    val1 = src_left[1] * 0x0101010101010101;
+    val2 = src_left[2] * 0x0101010101010101;
+    val3 = src_left[3] * 0x0101010101010101;
+    SD4(val0, val1, val2, val3, dst, stride);
+
+    val0 = src_left[4] * 0x0101010101010101;
+    val1 = src_left[5] * 0x0101010101010101;
+    val2 = src_left[6] * 0x0101010101010101;
+    val3 = src_left[7] * 0x0101010101010101;
+    SD4(val0, val1, val2, val3, dst + 4 * stride, stride);
+
+    if (0 == flag) {
+        val0 = LD(src_top);
+        src0 = (v16i8) __msa_insert_d((v2i64) src0, 0, val0);
+        src_top_val = __msa_fill_h(src_top[-1]);
+        src_left_val = __msa_fill_h(src_left[0]);
+
+        src0_r = (v8i16) __msa_ilvr_b(zero, src0);
+
+        src0_r -= src_top_val;
+        src0_r >>= 1;
+        src0_r += src_left_val;
+        src0_r = CLIP_SH_0_255(src0_r);
+        src0 = __msa_pckev_b((v16i8) src0_r, (v16i8) src0_r);
+        val0 = __msa_copy_s_d((v2i64) src0, 0);
+        SD(val0, dst);
+    }
+}
+
+static void hevc_intra_pred_horiz_16x16_msa(const uint8_t *src_top,
+                                            const uint8_t *src_left,
+                                            uint8_t *dst, int32_t stride,
+                                            int32_t flag)
+{
+    uint8_t *tmp_dst = dst;
+    uint32_t row;
+    uint8_t inp0, inp1, inp2, inp3;
+    v16i8 src0, src1, src2, src3;
+    v8i16 src0_r, src0_l, src_left_val, src_top_val;
+
+    src_left_val = __msa_fill_h(src_left[0]);
+
+    for (row = 4; row--;) {
+        inp0 = src_left[0];
+        inp1 = src_left[1];
+        inp2 = src_left[2];
+        inp3 = src_left[3];
+        src_left += 4;
+
+        src0 = __msa_fill_b(inp0);
+        src1 = __msa_fill_b(inp1);
+        src2 = __msa_fill_b(inp2);
+        src3 = __msa_fill_b(inp3);
+
+        ST_SB4(src0, src1, src2, src3, tmp_dst, stride);
+        tmp_dst += (4 * stride);
+    }
+
+    if (0 == flag) {
+        src0 = LD_SB(src_top);
+        src_top_val = __msa_fill_h(src_top[-1]);
+
+        UNPCK_UB_SH(src0, src0_r, src0_l);
+        SUB2(src0_r, src_top_val, src0_l, src_top_val, src0_r, src0_l);
+
+        src0_r >>= 1;
+        src0_l >>= 1;
+
+        ADD2(src0_r, src_left_val, src0_l, src_left_val, src0_r, src0_l);
+        CLIP_SH2_0_255(src0_r, src0_l);
+        src0 = __msa_pckev_b((v16i8) src0_l, (v16i8) src0_r);
+        ST_SB(src0, dst);
+    }
+}
+
+static void hevc_intra_pred_horiz_32x32_msa(const uint8_t *src_top,
+                                            const uint8_t *src_left,
+                                            uint8_t *dst, int32_t stride)
+{
+    uint32_t row;
+    uint8_t inp0, inp1, inp2, inp3;
+    v16i8 src0, src1, src2, src3;
+
+    for (row = 0; row < 8; row++) {
+        inp0 = src_left[row * 4];
+        inp1 = src_left[row * 4 + 1];
+        inp2 = src_left[row * 4 + 2];
+        inp3 = src_left[row * 4 + 3];
+
+        src0 = __msa_fill_b(inp0);
+        src1 = __msa_fill_b(inp1);
+        src2 = __msa_fill_b(inp2);
+        src3 = __msa_fill_b(inp3);
+
+        ST_SB2(src0, src0, dst, 16);
+        dst += stride;
+        ST_SB2(src1, src1, dst, 16);
+        dst += stride;
+        ST_SB2(src2, src2, dst, 16);
+        dst += stride;
+        ST_SB2(src3, src3, dst, 16);
+        dst += stride;
+    }
+}
+
+static void hevc_intra_pred_dc_4x4_msa(const uint8_t *src_top,
+                                       const uint8_t *src_left,
+                                       uint8_t *dst, int32_t stride,
+                                       int32_t flag)
+{
+    uint8_t *tmp_dst = dst;
+    uint32_t addition = 0;
+    uint32_t val0, val1, val2;
+    v16i8 src = { 0 };
+    v16u8 store;
+    v16i8 zero = { 0 };
+    v8u16 sum, vec0, vec1;
+
+    val0 = LW(src_top);
+    val1 = LW(src_left);
+    INSERT_W2_SB(val0, val1, src);
+    sum = __msa_hadd_u_h((v16u8) src, (v16u8) src);
+    sum = (v8u16) __msa_hadd_u_w(sum, sum);
+    sum = (v8u16) __msa_hadd_u_d((v4u32) sum, (v4u32) sum);
+    sum = (v8u16) __msa_srari_w((v4i32) sum, 3);
+    addition = __msa_copy_u_w((v4i32) sum, 0);
+    store = (v16u8) __msa_fill_b(addition);
+    val0 = __msa_copy_u_w((v4i32) store, 0);
+    SW4(val0, val0, val0, val0, dst, stride)
+
+        if (0 == flag) {
+        ILVR_B2_UH(zero, store, zero, src, vec0, vec1);
+
+        vec1 += vec0;
+        vec0 += vec0;
+        vec1 += vec0;
+
+        vec1 = (v8u16) __msa_srari_h((v8i16) vec1, 2);
+        store = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec1);
+        val1 = (src_left[0] + 2 * addition + src_top[0] + 2) >> 2;
+        store = (v16u8) __msa_insert_b((v16i8) store, 0, val1);
+        val0 = __msa_copy_u_w((v4i32) store, 0);
+        SW(val0, tmp_dst);
+
+        val0 = src_left[1];
+        val1 = src_left[2];
+        val2 = src_left[3];
+
+        addition *= 3;
+
+        ADD2(val0, addition, val1, addition, val0, val1);
+        val2 += addition;
+
+        val0 += 2;
+        val1 += 2;
+        val2 += 2;
+        val0 >>= 2;
+        val1 >>= 2;
+        val2 >>= 2;
+
+        tmp_dst[stride * 1] = val0;
+        tmp_dst[stride * 2] = val1;
+        tmp_dst[stride * 3] = val2;
+    }
+}
+
+static void hevc_intra_pred_dc_8x8_msa(const uint8_t *src_top,
+                                       const uint8_t *src_left,
+                                       uint8_t *dst, int32_t stride,
+                                       int32_t flag)
+{
+    uint8_t *tmp_dst = dst;
+    uint32_t row, col, val;
+    uint32_t addition = 0;
+    uint64_t val0, val1;
+    v16u8 src = { 0 };
+    v16u8 store;
+    v8u16 sum, vec0, vec1;
+    v16i8 zero = { 0 };
+
+    val0 = LD(src_top);
+    val1 = LD(src_left);
+    INSERT_D2_UB(val0, val1, src);
+    sum = __msa_hadd_u_h((v16u8) src, (v16u8) src);
+    sum = (v8u16) __msa_hadd_u_w(sum, sum);
+    sum = (v8u16) __msa_hadd_u_d((v4u32) sum, (v4u32) sum);
+    sum = (v8u16) __msa_pckev_w((v4i32) sum, (v4i32) sum);
+    sum = (v8u16) __msa_hadd_u_d((v4u32) sum, (v4u32) sum);
+    sum = (v8u16) __msa_srari_w((v4i32) sum, 4);
+    addition = __msa_copy_u_w((v4i32) sum, 0);
+    store = (v16u8) __msa_fill_b(addition);
+    val0 = __msa_copy_u_d((v2i64) store, 0);
+
+    for (row = 8; row--;) {
+        SD(val0, dst);
+        dst += stride;
+    }
+
+    if (0 == flag) {
+        ILVR_B2_UH(zero, store, zero, src, vec0, vec1);
+
+        vec1 += vec0;
+        vec0 += vec0;
+        vec1 += vec0;
+        vec1 = (v8u16) __msa_srari_h((v8i16) vec1, 2);
+        store = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec1);
+        val = (src_left[0] + 2 * addition + src_top[0] + 2) >> 2;
+        store = (v16u8) __msa_insert_b((v16i8) store, 0, val);
+        val0 = __msa_copy_u_d((v2i64) store, 0);
+        SD(val0, tmp_dst);
+
+        val0 = LD(src_left);
+        src = (v16u8) __msa_insert_d((v2i64) src, 0, val0);
+        vec1 = (v8u16) __msa_ilvr_b(zero, (v16i8) src);
+        vec0 = (v8u16) __msa_fill_h(addition);
+        vec0 *= 3;
+        vec1 += vec0;
+        vec1 = (v8u16) __msa_srari_h((v8i16) vec1, 2);
+
+        for (col = 1; col < 8; col++) {
+            tmp_dst[stride * col] = vec1[col];
+        }
+    }
+}
+
+static void hevc_intra_pred_dc_16x16_msa(const uint8_t *src_top,
+                                         const uint8_t *src_left,
+                                         uint8_t *dst, int32_t stride,
+                                         int32_t flag)
+{
+    uint8_t *tmp_dst = dst;
+    uint32_t row, col, val;
+    uint32_t addition = 0;
+    v16u8 src_above1, store, src_left1;
+    v8u16 sum, sum_above, sum_left;
+    v8u16 vec0, vec1, vec2;
+    v16i8 zero = { 0 };
+
+    src_above1 = LD_UB(src_top);
+    src_left1 = LD_UB(src_left);
+
+    HADD_UB2_UH(src_above1, src_left1, sum_above, sum_left);
+    sum = sum_above + sum_left;
+    sum = (v8u16) __msa_hadd_u_w(sum, sum);
+    sum = (v8u16) __msa_hadd_u_d((v4u32) sum, (v4u32) sum);
+    sum = (v8u16) __msa_pckev_w((v4i32) sum, (v4i32) sum);
+    sum = (v8u16) __msa_hadd_u_d((v4u32) sum, (v4u32) sum);
+    sum = (v8u16) __msa_srari_w((v4i32) sum, 5);
+    addition = __msa_copy_u_w((v4i32) sum, 0);
+    store = (v16u8) __msa_fill_b(addition);
+
+    for (row = 16; row--;) {
+        ST_UB(store, dst);
+        dst += stride;
+    }
+
+    if (0 == flag) {
+        vec0 = (v8u16) __msa_ilvr_b(zero, (v16i8) store);
+        ILVRL_B2_UH(zero, src_above1, vec1, vec2);
+        ADD2(vec1, vec0, vec2, vec0, vec1, vec2);
+        vec0 += vec0;
+        ADD2(vec1, vec0, vec2, vec0, vec1, vec2);
+        SRARI_H2_UH(vec1, vec2, 2);
+        store = (v16u8) __msa_pckev_b((v16i8) vec2, (v16i8) vec1);
+        val = (src_left[0] + 2 * addition + src_top[0] + 2) >> 2;
+        store = (v16u8) __msa_insert_b((v16i8) store, 0, val);
+        ST_UB(store, tmp_dst);
+
+        ILVRL_B2_UH(zero, src_left1, vec1, vec2);
+        vec0 = (v8u16) __msa_fill_h(addition);
+        vec0 *= 3;
+        ADD2(vec1, vec0, vec2, vec0, vec1, vec2);
+        SRARI_H2_UH(vec1, vec2, 2);
+        store = (v16u8) __msa_pckev_b((v16i8) vec2, (v16i8) vec1);
+
+        for (col = 1; col < 16; col++) {
+            tmp_dst[stride * col] = store[col];
+        }
+    }
+}
+
+static void hevc_intra_pred_dc_32x32_msa(const uint8_t *src_top,
+                                         const uint8_t *src_left,
+                                         uint8_t *dst, int32_t stride)
+{
+    uint32_t row;
+    v16u8 src_above1, src_above2, store, src_left1, src_left2;
+    v8u16 sum_above1, sum_above2;
+    v8u16 sum_left1, sum_left2;
+    v8u16 sum, sum_above, sum_left;
+
+    LD_UB2(src_top, 16, src_above1, src_above2);
+    LD_UB2(src_left, 16, src_left1, src_left2);
+    HADD_UB2_UH(src_above1, src_above2, sum_above1, sum_above2);
+    HADD_UB2_UH(src_left1, src_left2, sum_left1, sum_left2);
+    sum_above = sum_above1 + sum_above2;
+    sum_left = sum_left1 + sum_left2;
+    sum = sum_above + sum_left;
+    sum = (v8u16) __msa_hadd_u_w(sum, sum);
+    sum = (v8u16) __msa_hadd_u_d((v4u32) sum, (v4u32) sum);
+    sum = (v8u16) __msa_pckev_w((v4i32) sum, (v4i32) sum);
+    sum = (v8u16) __msa_hadd_u_d((v4u32) sum, (v4u32) sum);
+    sum = (v8u16) __msa_srari_w((v4i32) sum, 6);
+    store = (v16u8) __msa_splati_b((v16i8) sum, 0);
+
+    for (row = 16; row--;) {
+        ST_UB2(store, store, dst, 16);
+        dst += stride;
+        ST_UB2(store, store, dst, 16);
+        dst += stride;
+    }
+}
+
+static void hevc_intra_pred_plane_4x4_msa(const uint8_t *src_top,
+                                          const uint8_t *src_left,
+                                          uint8_t *dst, int32_t stride)
+{
+    uint32_t src0, src1;
+    v16i8 src_vec0, src_vec1;
+    v8i16 src_vec0_r, src1_r, tmp0, tmp1, mul_val1;
+    v8i16 vec0, vec1, vec2, vec3, res0, res1, res2, res3;
+    v8i16 mul_val0 = { 3, 2, 1, 0, 1, 2, 3, 4 };
+    v16i8 zero = { 0 };
+
+    src0 = LW(src_top);
+    src1 = LW(src_left);
+
+    mul_val1 = (v8i16) __msa_pckod_d((v2i64) mul_val0, (v2i64) mul_val0);
+
+    src_vec0 = (v16i8) __msa_insert_w((v4i32) zero, 0, src0);
+    src_vec1 = (v16i8) __msa_insert_w((v4i32) zero, 0, src1);
+
+    ILVR_B2_SH(zero, src_vec0, zero, src_vec1, src_vec0_r, src1_r);
+    SPLATI_H4_SH(src1_r, 0, 1, 2, 3, vec0, vec1, vec2, vec3);
+
+    tmp0 = __msa_fill_h(src_top[4]);
+    tmp1 = __msa_fill_h(src_left[4]);
+
+    MUL4(mul_val0, vec0, mul_val0, vec1, mul_val0, vec2, mul_val0, vec3,
+         res0, res1, res2, res3);
+
+    res0 += mul_val1 * tmp0;
+    res1 += mul_val1 * tmp0;
+    res2 += mul_val1 * tmp0;
+    res3 += mul_val1 * tmp0;
+
+    res0 += 3 * src_vec0_r;
+    res1 += 2 * src_vec0_r;
+    res2 += src_vec0_r;
+    res0 += tmp1;
+    res1 += 2 * tmp1;
+    res2 += 3 * tmp1;
+    res3 += 4 * tmp1;
+
+    PCKEV_D2_SH(res1, res0, res3, res2, res0, res1);
+    SRARI_H2_SH(res0, res1, 3);
+    src_vec0 = __msa_pckev_b((v16i8) res1, (v16i8) res0);
+    ST4x4_UB(src_vec0, src_vec0, 0, 1, 2, 3, dst, stride);
+}
+
+static void hevc_intra_pred_plane_8x8_msa(const uint8_t *src_top,
+                                          const uint8_t *src_left,
+                                          uint8_t *dst, int32_t stride)
+{
+    uint64_t src0, src1;
+    v16i8 src_vec0, src_vec1, src_vec2, src_vec3;
+    v8i16 src_vec0_r, src_vec1_r;
+    v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v8i16 tmp0, tmp1, tmp2;
+    v8i16 mul_val1 = { 1, 2, 3, 4, 5, 6, 7, 8 };
+    v8i16 mul_val0 = { 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16i8 zero = { 0 };
+
+    src0 = LD(src_top);
+    src1 = LD(src_left);
+
+    src_vec0 = (v16i8) __msa_insert_d((v2i64) zero, 0, src0);
+    src_vec1 = (v16i8) __msa_insert_d((v2i64) zero, 0, src1);
+
+    ILVR_B2_SH(zero, src_vec0, zero, src_vec1, src_vec0_r, src_vec1_r);
+    SPLATI_H4_SH(src_vec1_r, 0, 1, 2, 3, vec0, vec1, vec2, vec3);
+    SPLATI_H4_SH(src_vec1_r, 4, 5, 6, 7, vec4, vec5, vec6, vec7);
+
+    tmp0 = __msa_fill_h(src_top[8]);
+    tmp1 = __msa_fill_h(src_left[8]);
+
+    MUL4(mul_val0, vec0, mul_val0, vec1, mul_val0, vec2, mul_val0, vec3,
+         res0, res1, res2, res3);
+    MUL4(mul_val0, vec4, mul_val0, vec5, mul_val0, vec6, mul_val0, vec7,
+         res4, res5, res6, res7);
+
+    tmp2 = mul_val1 * tmp0;
+    res0 += tmp2;
+    res1 += tmp2;
+    res2 += tmp2;
+    res3 += tmp2;
+    res4 += tmp2;
+    res5 += tmp2;
+    res6 += tmp2;
+    res7 += tmp2;
+
+    res0 += 7 * src_vec0_r;
+    res1 += 6 * src_vec0_r;
+    res2 += 5 * src_vec0_r;
+    res3 += 4 * src_vec0_r;
+    res4 += 3 * src_vec0_r;
+    res5 += 2 * src_vec0_r;
+    res6 += src_vec0_r;
+
+    res0 += tmp1;
+    res1 += 2 * tmp1;
+    res2 += 3 * tmp1;
+    res3 += 4 * tmp1;
+    res4 += 5 * tmp1;
+    res5 += 6 * tmp1;
+    res6 += 7 * tmp1;
+    res7 += 8 * tmp1;
+
+    SRARI_H4_SH(res0, res1, res2, res3, 4);
+    SRARI_H4_SH(res4, res5, res6, res7, 4);
+    PCKEV_B4_SB(res1, res0, res3, res2, res5, res4, res7, res6,
+                src_vec0, src_vec1, src_vec2, src_vec3);
+
+    ST8x8_UB(src_vec0, src_vec1, src_vec2, src_vec3, dst, stride);
+}
+
+static void hevc_intra_pred_plane_16x16_msa(const uint8_t *src_top,
+                                            const uint8_t *src_left,
+                                            uint8_t *dst, int32_t stride)
+{
+    v16u8 src0, src1;
+    v8i16 src0_r, src1_r, src0_l, src1_l;
+    v8i16 vec0, vec1;
+    v8i16 res0, res1, tmp0, tmp1;
+    v8i16 mul_val2, mul_val3;
+    v8i16 mul_val1 = { 1, 2, 3, 4, 5, 6, 7, 8 };
+    v8i16 mul_val0 = { 15, 14, 13, 12, 11, 10, 9, 8 };
+
+    src0 = LD_UB(src_top);
+    src1 = LD_UB(src_left);
+
+    UNPCK_UB_SH(src0, src0_r, src0_l);
+    UNPCK_UB_SH(src1, src1_r, src1_l);
+
+    mul_val2 = mul_val0 - 8;
+    mul_val3 = mul_val1 + 8;
+
+    tmp0 = __msa_fill_h(src_top[16]);
+    tmp1 = __msa_fill_h(src_left[16]);
+
+    SPLATI_H2_SH(src1_r, 0, 1, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 15, 1, 5);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_r, 2, 3, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 13, 3, 5);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_r, 4, 5, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 11, 5, 5);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_r, 6, 7, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 9, 7, 5);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 0, 1, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 7, 9, 5);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 2, 3, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 5, 11, 5);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 4, 5, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 3, 13, 5);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 6, 7, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 1, 15, 5);
+    ST_SH2(res0, res1, dst, stride);
+}
+
+static void process_intra_upper_16x16_msa(const uint8_t *src_top,
+                                          const uint8_t *src_left,
+                                          uint8_t *dst, int32_t stride,
+                                          uint8_t offset)
+{
+    v16i8 src0, src1;
+    v8i16 src0_r, src1_r, src0_l, src1_l;
+    v8i16 vec0, vec1, res0, res1;
+    v8i16 tmp0, tmp1;
+    v8i16 mul_val2, mul_val3;
+    v8i16 mul_val1 = { 1, 2, 3, 4, 5, 6, 7, 8 };
+    v8i16 mul_val0 = { 31, 30, 29, 28, 27, 26, 25, 24 };
+
+    tmp0 = __msa_fill_h(src_top[32 - offset]);
+    tmp1 = __msa_fill_h(src_left[32]);
+
+    src0 = LD_SB(src_top);
+    src1 = LD_SB(src_left);
+
+    UNPCK_UB_SH(src0, src0_r, src0_l);
+    UNPCK_UB_SH(src1, src1_r, src1_l);
+
+    mul_val1 += offset;
+    mul_val0 -= offset;
+    mul_val2 = mul_val0 - 8;
+    mul_val3 = mul_val1 + 8;
+
+    SPLATI_H2_SH(src1_r, 0, 1, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 31, 1, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_r, 2, 3, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 29, 3, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_r, 4, 5, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 27, 5, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_r, 6, 7, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 25, 7, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 0, 1, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 23, 9, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 2, 3, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 21, 11, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 4, 5, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 19, 13, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 6, 7, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 17, 15, 6);
+    ST_SH2(res0, res1, dst, stride);
+}
+
+static void process_intra_lower_16x16_msa(const uint8_t *src_top,
+                                          const uint8_t *src_left,
+                                          uint8_t *dst, int32_t stride,
+                                          uint8_t offset)
+{
+    v16i8 src0, src1;
+    v8i16 src0_r, src1_r, src0_l, src1_l;
+    v8i16 vec0, vec1, res0, res1, tmp0, tmp1;
+    v8i16 mul_val2, mul_val3;
+    v8i16 mul_val1 = { 1, 2, 3, 4, 5, 6, 7, 8 };
+    v8i16 mul_val0 = { 31, 30, 29, 28, 27, 26, 25, 24 };
+
+    tmp0 = __msa_fill_h(src_top[32 - offset]);
+    tmp1 = __msa_fill_h(src_left[16]);
+
+    src0 = LD_SB(src_top);
+    src1 = LD_SB(src_left);
+
+    UNPCK_UB_SH(src0, src0_r, src0_l);
+    UNPCK_UB_SH(src1, src1_r, src1_l);
+
+    mul_val1 += offset;
+    mul_val0 -= offset;
+    mul_val2 = mul_val0 - 8;
+    mul_val3 = mul_val1 + 8;
+
+    SPLATI_H2_SH(src1_r, 0, 1, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 15, 17, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_r, 2, 3, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 13, 19, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_r, 4, 5, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 11, 21, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_r, 6, 7, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 9, 23, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 0, 1, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 7, 25, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 2, 3, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 5, 27, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 4, 5, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 3, 29, 6);
+    ST_SH2(res0, res1, dst, stride);
+    dst += (2 * stride);
+
+    SPLATI_H2_SH(src1_l, 6, 7, vec0, vec1);
+    HEVC_PRED_PLANAR_16x2(src0_r, src0_l, tmp0, tmp1, vec0, vec1,
+                          mul_val0, mul_val1, mul_val2, mul_val3,
+                          res0, res1, 1, 31, 6);
+    ST_SH2(res0, res1, dst, stride);
+}
+
+static void hevc_intra_pred_plane_32x32_msa(const uint8_t *src_top,
+                                            const uint8_t *src_left,
+                                            uint8_t *dst, int32_t stride)
+{
+    process_intra_upper_16x16_msa(src_top, src_left, dst, stride, 0);
+    process_intra_upper_16x16_msa((src_top + 16), src_left,
+                                  (dst + 16), stride, 16);
+    dst += (16 * stride);
+    src_left += 16;
+
+    process_intra_lower_16x16_msa(src_top, src_left, dst, stride, 0);
+    process_intra_lower_16x16_msa((src_top + 16), src_left,
+                                  (dst + 16), stride, 16);
+}
+
+static void hevc_intra_pred_angular_upper_4width_msa(const uint8_t *src_top,
+                                                     const uint8_t *src_left,
+                                                     uint8_t *dst,
+                                                     int32_t stride,
+                                                     int32_t mode)
+{
+    int16_t inv_angle[] = { -256, -315, -390, -482, -630, -910, -1638, -4096 };
+    uint8_t ref_array[3 * 32 + 4];
+    uint8_t *ref_tmp = ref_array + 4;
+    const uint8_t *ref;
+    int32_t last;
+    int32_t h_cnt, idx0, fact_val0, idx1, fact_val1;
+    int32_t idx2, fact_val2, idx3, fact_val3;
+    int32_t angle, angle_loop;
+    int32_t inv_angle_val, offset;
+    uint64_t tmp0;
+    v16i8 top0, top1, top2, top3;
+    v16i8 dst_val0;
+    v16i8 zero = { 0 };
+    v8i16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
+    v8i16 fact0, fact1, fact2, fact3, fact4, fact5, fact6, fact7;
+
+    angle = intra_pred_angle_up[mode - 18];
+    inv_angle_val = inv_angle[mode - 18];
+    last = (angle) >> 3;
+    angle_loop = angle;
+
+    ref = src_top - 1;
+    if (angle < 0 && last < -1) {
+        inv_angle_val = inv_angle[mode - 18];
+
+        tmp0 = LD(ref);
+        SD(tmp0, ref_tmp);
+
+        for (h_cnt = last; h_cnt <= -1; h_cnt++) {
+            offset = -1 + ((h_cnt * inv_angle_val + 128) >> 8);
+            ref_tmp[h_cnt] = src_left[offset];
+        }
+
+        ref = ref_tmp;
+    }
+
+    idx0 = angle_loop >> 5;
+    fact_val0 = angle_loop & 31;
+    angle_loop += angle;
+
+    idx1 = angle_loop >> 5;
+    fact_val1 = angle_loop & 31;
+    angle_loop += angle;
+
+    idx2 = angle_loop >> 5;
+    fact_val2 = angle_loop & 31;
+    angle_loop += angle;
+
+    idx3 = angle_loop >> 5;
+    fact_val3 = angle_loop & 31;
+
+    top0 = LD_SB(ref + idx0 + 1);
+    top1 = LD_SB(ref + idx1 + 1);
+    top2 = LD_SB(ref + idx2 + 1);
+    top3 = LD_SB(ref + idx3 + 1);
+
+    fact0 = __msa_fill_h(fact_val0);
+    fact1 = __msa_fill_h(32 - fact_val0);
+
+    fact2 = __msa_fill_h(fact_val1);
+    fact3 = __msa_fill_h(32 - fact_val1);
+
+    fact4 = __msa_fill_h(fact_val2);
+    fact5 = __msa_fill_h(32 - fact_val2);
+
+    fact6 = __msa_fill_h(fact_val3);
+    fact7 = __msa_fill_h(32 - fact_val3);
+
+    ILVR_D2_SH(fact2, fact0, fact6, fact4, fact0, fact2);
+    ILVR_D2_SH(fact3, fact1, fact7, fact5, fact1, fact3);
+    ILVR_B4_SH(zero, top0, zero, top1, zero, top2, zero, top3,
+               diff0, diff2, diff4, diff6);
+    SLDI_B4_0_SH(diff0, diff2, diff4, diff6, diff1, diff3, diff5, diff7, 2);
+    ILVR_D2_SH(diff2, diff0, diff6, diff4, diff0, diff2);
+    ILVR_D2_SH(diff3, diff1, diff7, diff5, diff1, diff3);
+    MUL2(diff1, fact0, diff3, fact2, diff1, diff3);
+
+    diff1 += diff0 * fact1;
+    diff3 += diff2 * fact3;
+
+    SRARI_H2_SH(diff1, diff3, 5);
+    dst_val0 = __msa_pckev_b((v16i8) diff3, (v16i8) diff1);
+    ST4x4_UB(dst_val0, dst_val0, 0, 1, 2, 3, dst, stride);
+}
+
+static void hevc_intra_pred_angular_upper_8width_msa(const uint8_t *src_top,
+                                                     const uint8_t *src_left,
+                                                     uint8_t *dst,
+                                                     int32_t stride,
+                                                     int32_t mode)
+{
+    int16_t inv_angle[] = { -256, -315, -390, -482, -630, -910, -1638, -4096 };
+    uint8_t ref_array[3 * 32 + 4];
+    uint8_t *ref_tmp = ref_array + 8;
+    const uint8_t *ref;
+    const uint8_t *src_left_tmp = src_left - 1;
+    int32_t last, offset;
+    int32_t h_cnt, v_cnt, idx0, fact_val0, idx1, fact_val1;
+    int32_t idx2, fact_val2, idx3, fact_val3;
+    int32_t angle, angle_loop;
+    int32_t inv_angle_val, inv_angle_val_loop;
+    int32_t tmp0, tmp1, tmp2;
+    v16i8 top0, top1, top2, top3;
+    v16u8 dst_val0, dst_val1;
+    v8i16 fact0, fact1, fact2, fact3, fact4, fact5, fact6, fact7;
+    v8i16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
+
+    angle = intra_pred_angle_up[mode - 18];
+    inv_angle_val = inv_angle[mode - 18];
+    last = (angle) >> 2;
+    angle_loop = angle;
+
+    ref = src_top - 1;
+    if (last < -1) {
+        inv_angle_val_loop = inv_angle_val * last;
+
+        tmp0 = LW(ref);
+        tmp1 = LW(ref + 4);
+        tmp2 = LW(ref + 8);
+        SW(tmp0, ref_tmp);
+        SW(tmp1, ref_tmp + 4);
+        SW(tmp2, ref_tmp + 8);
+
+        for (h_cnt = last; h_cnt <= -1; h_cnt++) {
+            offset = (inv_angle_val_loop + 128) >> 8;
+            ref_tmp[h_cnt] = src_left_tmp[offset];
+            inv_angle_val_loop += inv_angle_val;
+        }
+        ref = ref_tmp;
+    }
+
+    for (v_cnt = 0; v_cnt < 2; v_cnt++) {
+        idx0 = (angle_loop) >> 5;
+        fact_val0 = (angle_loop) & 31;
+        angle_loop += angle;
+
+        idx1 = (angle_loop) >> 5;
+        fact_val1 = (angle_loop) & 31;
+        angle_loop += angle;
+
+        idx2 = (angle_loop) >> 5;
+        fact_val2 = (angle_loop) & 31;
+        angle_loop += angle;
+
+        idx3 = (angle_loop) >> 5;
+        fact_val3 = (angle_loop) & 31;
+        angle_loop += angle;
+
+        top0 = LD_SB(ref + idx0 + 1);
+        top1 = LD_SB(ref + idx1 + 1);
+        top2 = LD_SB(ref + idx2 + 1);
+        top3 = LD_SB(ref + idx3 + 1);
+
+        fact0 = __msa_fill_h(fact_val0);
+        fact1 = __msa_fill_h(32 - fact_val0);
+        fact2 = __msa_fill_h(fact_val1);
+        fact3 = __msa_fill_h(32 - fact_val1);
+        fact4 = __msa_fill_h(fact_val2);
+        fact5 = __msa_fill_h(32 - fact_val2);
+        fact6 = __msa_fill_h(fact_val3);
+        fact7 = __msa_fill_h(32 - fact_val3);
+
+        UNPCK_UB_SH(top0, diff0, diff1);
+        UNPCK_UB_SH(top1, diff2, diff3);
+        UNPCK_UB_SH(top2, diff4, diff5);
+        UNPCK_UB_SH(top3, diff6, diff7);
+
+        SLDI_B2_SH(diff1, diff3, diff0, diff2, diff1, diff3, 2);
+        SLDI_B2_SH(diff5, diff7, diff4, diff6, diff5, diff7, 2);
+        MUL4(diff1, fact0, diff3, fact2, diff5, fact4, diff7, fact6,
+             diff1, diff3, diff5, diff7);
+
+        diff1 += diff0 * fact1;
+        diff3 += diff2 * fact3;
+        diff5 += diff4 * fact5;
+        diff7 += diff6 * fact7;
+
+        SRARI_H4_SH(diff1, diff3, diff5, diff7, 5);
+        PCKEV_B2_UB(diff3, diff1, diff7, diff5, dst_val0, dst_val1);
+        ST8x4_UB(dst_val0, dst_val1, dst, stride);
+        dst += (4 * stride);
+    }
+}
+
+static void hevc_intra_pred_angular_upper_16width_msa(const uint8_t *src_top,
+                                                      const uint8_t *src_left,
+                                                      uint8_t *dst,
+                                                      int32_t stride,
+                                                      int32_t mode)
+{
+    int16_t inv_angle[] = { -256, -315, -390, -482, -630, -910, -1638, -4096 };
+    int32_t h_cnt, v_cnt, idx0, fact_val0, idx1, fact_val1;
+    int32_t idx2, fact_val2, idx3, fact_val3;
+    int32_t tmp0;
+    int32_t angle, angle_loop, offset;
+    int32_t inv_angle_val, inv_angle_val_loop;
+    uint8_t ref_array[3 * 32 + 4];
+    uint8_t *ref_tmp = ref_array + 16;
+    const uint8_t *ref;
+    const uint8_t *src_left_tmp = src_left - 1;
+    int32_t last;
+    v16u8 top0, top1, top2, top3, top4, top5, top6, top7;
+    v16i8 dst0, dst1, dst2, dst3;
+    v8i16 fact0, fact1, fact2, fact3, fact4, fact5, fact6, fact7;
+    v8i16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
+    v8i16 diff8, diff9, diff10, diff11, diff12, diff13, diff14, diff15;
+
+    angle = intra_pred_angle_up[mode - 18];
+    inv_angle_val = inv_angle[mode - 18];
+    last = angle >> 1;
+    angle_loop = angle;
+
+    ref = src_top - 1;
+    if (last < -1) {
+        inv_angle_val_loop = inv_angle_val * last;
+
+        top0 = LD_UB(ref);
+        tmp0 = LW(ref + 16);
+        ST_UB(top0, ref_tmp);
+        SW(tmp0, ref_tmp + 16);
+
+        for (h_cnt = last; h_cnt <= -1; h_cnt++) {
+            offset = (inv_angle_val_loop + 128) >> 8;
+            ref_tmp[h_cnt] = src_left_tmp[offset];
+            inv_angle_val_loop += inv_angle_val;
+        }
+        ref = ref_tmp;
+    }
+
+    for (v_cnt = 4; v_cnt--;) {
+        idx0 = (angle_loop) >> 5;
+        fact_val0 = (angle_loop) & 31;
+        angle_loop += angle;
+
+        idx1 = (angle_loop) >> 5;
+        fact_val1 = (angle_loop) & 31;
+        angle_loop += angle;
+
+        idx2 = (angle_loop) >> 5;
+        fact_val2 = (angle_loop) & 31;
+        angle_loop += angle;
+
+        idx3 = (angle_loop) >> 5;
+        fact_val3 = (angle_loop) & 31;
+        angle_loop += angle;
+
+        LD_UB2(ref + idx0 + 1, 16, top0, top1);
+        LD_UB2(ref + idx1 + 1, 16, top2, top3);
+        LD_UB2(ref + idx2 + 1, 16, top4, top5);
+        LD_UB2(ref + idx3 + 1, 16, top6, top7);
+
+        fact0 = __msa_fill_h(fact_val0);
+        fact1 = __msa_fill_h(32 - fact_val0);
+        fact2 = __msa_fill_h(fact_val1);
+        fact3 = __msa_fill_h(32 - fact_val1);
+        fact4 = __msa_fill_h(fact_val2);
+        fact5 = __msa_fill_h(32 - fact_val2);
+        fact6 = __msa_fill_h(fact_val3);
+        fact7 = __msa_fill_h(32 - fact_val3);
+
+        SLDI_B2_UB(top1, top3, top0, top2, top1, top3, 1);
+        SLDI_B2_UB(top5, top7, top4, top6, top5, top7, 1);
+        UNPCK_UB_SH(top0, diff0, diff1);
+        UNPCK_UB_SH(top1, diff2, diff3);
+        UNPCK_UB_SH(top2, diff4, diff5);
+        UNPCK_UB_SH(top3, diff6, diff7);
+        UNPCK_UB_SH(top4, diff8, diff9);
+        UNPCK_UB_SH(top5, diff10, diff11);
+        UNPCK_UB_SH(top6, diff12, diff13);
+        UNPCK_UB_SH(top7, diff14, diff15);
+
+        MUL4(diff2, fact0, diff3, fact0, diff6, fact2, diff7, fact2,
+             diff2, diff3, diff6, diff7);
+        MUL4(diff10, fact4, diff11, fact4, diff14, fact6, diff15, fact6,
+             diff10, diff11, diff14, diff15);
+
+        diff2 += diff0 * fact1;
+        diff3 += diff1 * fact1;
+        diff6 += diff4 * fact3;
+        diff7 += diff5 * fact3;
+        diff10 += diff8 * fact5;
+        diff11 += diff9 * fact5;
+        diff14 += diff12 * fact7;
+        diff15 += diff13 * fact7;
+
+        SRARI_H4_SH(diff2, diff3, diff6, diff7, 5);
+        SRARI_H4_SH(diff10, diff11, diff14, diff15, 5);
+        PCKEV_B4_SB(diff3, diff2, diff7, diff6, diff11, diff10, diff15, diff14,
+                    dst0, dst1, dst2, dst3);
+        ST_SB4(dst0, dst1, dst2, dst3, dst, stride);
+        dst += (4 * stride);
+    }
+}
+
+static void hevc_intra_pred_angular_upper_32width_msa(const uint8_t *src_top,
+                                                      const uint8_t *src_left,
+                                                      uint8_t *dst,
+                                                      int32_t stride,
+                                                      int32_t mode)
+{
+    int16_t inv_angle[] = { -256, -315, -390, -482, -630, -910, -1638, -4096 };
+    uint8_t ref_array[3 * 32 + 4];
+    uint8_t *ref_tmp;
+    const uint8_t *ref;
+    const uint8_t *src_left_tmp = src_left - 1;
+    int32_t h_cnt, v_cnt, idx0, fact_val0, idx1, fact_val1;
+    int32_t tmp0, tmp1, tmp2, tmp3;
+    int32_t angle, angle_loop;
+    int32_t inv_angle_val, inv_angle_val_loop;
+    int32_t last, offset;
+    v16u8 top0, top1, top2, top3, top4, top5, top6, top7;
+    v16i8 dst0, dst1, dst2, dst3;
+    v8i16 fact0, fact1, fact2, fact3;
+    v8i16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
+    v8i16 diff8, diff9, diff10, diff11, diff12, diff13, diff14, diff15;
+
+    ref_tmp = ref_array + 32;
+
+    angle = intra_pred_angle_up[mode - 18];
+    inv_angle_val = inv_angle[mode - 18];
+    last = angle;
+    angle_loop = angle;
+
+    ref = src_top - 1;
+    if (last < -1) {
+        inv_angle_val_loop = inv_angle_val * last;
+        LD_UB2(ref, 16, top0, top1);
+        tmp0 = ref[32];
+        tmp1 = ref[33];
+        tmp2 = ref[34];
+        tmp3 = ref[35];
+
+        ST_UB2(top0, top1, ref_tmp, 16);
+        ref_tmp[32] = tmp0;
+        ref_tmp[33] = tmp1;
+        ref_tmp[34] = tmp2;
+        ref_tmp[35] = tmp3;
+
+        for (h_cnt = last; h_cnt <= -1; h_cnt++) {
+            offset = (inv_angle_val_loop + 128) >> 8;
+            ref_tmp[h_cnt] = src_left_tmp[offset];
+            inv_angle_val_loop += inv_angle_val;
+        }
+
+        ref = ref_tmp;
+    }
+
+    for (v_cnt = 16; v_cnt--;) {
+        idx0 = (angle_loop) >> 5;
+        fact_val0 = (angle_loop) & 31;
+        angle_loop += angle;
+
+        idx1 = (angle_loop) >> 5;
+        fact_val1 = (angle_loop) & 31;
+        angle_loop += angle;
+
+        top0 = LD_UB(ref + idx0 + 1);
+        top4 = LD_UB(ref + idx1 + 1);
+        top1 = LD_UB(ref + idx0 + 17);
+        top5 = LD_UB(ref + idx1 + 17);
+        top3 = LD_UB(ref + idx0 + 33);
+        top7 = LD_UB(ref + idx1 + 33);
+
+        fact0 = __msa_fill_h(fact_val0);
+        fact1 = __msa_fill_h(32 - fact_val0);
+        fact2 = __msa_fill_h(fact_val1);
+        fact3 = __msa_fill_h(32 - fact_val1);
+
+        top2 = top1;
+        top6 = top5;
+
+        SLDI_B2_UB(top1, top3, top0, top2, top1, top3, 1);
+        SLDI_B2_UB(top5, top7, top4, top6, top5, top7, 1);
+        UNPCK_UB_SH(top0, diff0, diff1);
+        UNPCK_UB_SH(top1, diff2, diff3);
+        UNPCK_UB_SH(top2, diff4, diff5);
+        UNPCK_UB_SH(top3, diff6, diff7);
+        UNPCK_UB_SH(top4, diff8, diff9);
+        UNPCK_UB_SH(top5, diff10, diff11);
+        UNPCK_UB_SH(top6, diff12, diff13);
+        UNPCK_UB_SH(top7, diff14, diff15);
+
+        MUL4(diff2, fact0, diff3, fact0, diff6, fact0, diff7, fact0,
+             diff2, diff3, diff6, diff7);
+        MUL4(diff10, fact2, diff11, fact2, diff14, fact2, diff15, fact2,
+             diff10, diff11, diff14, diff15);
+
+        diff2 += diff0 * fact1;
+        diff3 += diff1 * fact1;
+        diff6 += diff4 * fact1;
+        diff7 += diff5 * fact1;
+        diff10 += diff8 * fact3;
+        diff11 += diff9 * fact3;
+        diff14 += diff12 * fact3;
+        diff15 += diff13 * fact3;
+
+        SRARI_H4_SH(diff2, diff3, diff6, diff7, 5);
+        SRARI_H4_SH(diff10, diff11, diff14, diff15, 5);
+        PCKEV_B4_SB(diff3, diff2, diff7, diff6, diff11, diff10, diff15, diff14,
+                    dst0, dst1, dst2, dst3);
+
+        ST_SB2(dst0, dst1, dst, 16);
+        dst += stride;
+        ST_SB2(dst2, dst3, dst, 16);
+        dst += stride;
+    }
+}
+
+static void hevc_intra_pred_angular_lower_4width_msa(const uint8_t *src_top,
+                                                     const uint8_t *src_left,
+                                                     uint8_t *dst,
+                                                     int32_t stride,
+                                                     int32_t mode)
+{
+    int16_t inv_angle[] = { -4096, -1638, -910, -630, -482, -390, -315 };
+    uint8_t ref_array[3 * 32 + 4];
+    uint8_t *ref_tmp = ref_array + 4;
+    const uint8_t *ref;
+    int32_t last, offset;
+    int32_t h_cnt, idx0, fact_val0, idx1, fact_val1;
+    int32_t idx2, fact_val2, idx3, fact_val3;
+    int32_t angle, angle_loop, inv_angle_val;
+    uint64_t tmp0;
+    v16i8 dst_val0, dst_val1;
+    v16u8 top0, top1, top2, top3;
+    v16u8 zero = { 0 };
+    v8i16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
+    v8i16 fact0, fact1, fact2, fact3, fact4, fact5, fact6, fact7;
+
+    angle = intra_pred_angle_low[mode - 2];
+    last = angle >> 3;
+    angle_loop = angle;
+
+    ref = src_left - 1;
+    if (last < -1) {
+        inv_angle_val = inv_angle[mode - 11];
+
+        tmp0 = LD(ref);
+        SD(tmp0, ref_tmp);
+
+        for (h_cnt = last; h_cnt <= -1; h_cnt++) {
+            offset = -1 + ((h_cnt * inv_angle_val + 128) >> 8);
+            ref_tmp[h_cnt] = src_top[offset];
+        }
+
+        ref = ref_tmp;
+    }
+
+    idx0 = angle_loop >> 5;
+    fact_val0 = angle_loop & 31;
+    angle_loop += angle;
+
+    idx1 = angle_loop >> 5;
+    fact_val1 = angle_loop & 31;
+    angle_loop += angle;
+
+    idx2 = angle_loop >> 5;
+    fact_val2 = angle_loop & 31;
+    angle_loop += angle;
+
+    idx3 = angle_loop >> 5;
+    fact_val3 = angle_loop & 31;
+
+    top0 = LD_UB(ref + idx0 + 1);
+    top1 = LD_UB(ref + idx1 + 1);
+    top2 = LD_UB(ref + idx2 + 1);
+    top3 = LD_UB(ref + idx3 + 1);
+
+    fact0 = __msa_fill_h(fact_val0);
+    fact1 = __msa_fill_h(32 - fact_val0);
+    fact2 = __msa_fill_h(fact_val1);
+    fact3 = __msa_fill_h(32 - fact_val1);
+    fact4 = __msa_fill_h(fact_val2);
+    fact5 = __msa_fill_h(32 - fact_val2);
+    fact6 = __msa_fill_h(fact_val3);
+    fact7 = __msa_fill_h(32 - fact_val3);
+
+    ILVR_D2_SH(fact2, fact0, fact6, fact4, fact0, fact2);
+    ILVR_D2_SH(fact3, fact1, fact7, fact5, fact1, fact3);
+    ILVR_B4_SH(zero, top0, zero, top1, zero, top2, zero, top3,
+               diff0, diff2, diff4, diff6);
+    SLDI_B4_0_SH(diff0, diff2, diff4, diff6, diff1, diff3, diff5, diff7, 2);
+    ILVR_D2_SH(diff2, diff0, diff6, diff4, diff0, diff2);
+    ILVR_D2_SH(diff3, diff1, diff7, diff5, diff1, diff3);
+    MUL2(diff1, fact0, diff3, fact2, diff1, diff3);
+
+    diff1 += diff0 * fact1;
+    diff3 += diff2 * fact3;
+
+    SRARI_H2_SH(diff1, diff3, 5);
+    PCKEV_B2_SB(diff1, diff1, diff3, diff3, dst_val0, dst_val1);
+
+    diff0 = (v8i16) __msa_pckev_b(dst_val1, dst_val0);
+    diff1 = (v8i16) __msa_pckod_b(dst_val1, dst_val0);
+
+    diff2 = (v8i16) __msa_pckev_w((v4i32) diff1, (v4i32) diff0);
+
+    dst_val0 = __msa_pckev_b((v16i8) diff2, (v16i8) diff2);
+    dst_val1 = __msa_pckod_b((v16i8) diff2, (v16i8) diff2);
+
+    ST4x2_UB(dst_val0, dst, stride);
+    dst += (2 * stride);
+    ST4x2_UB(dst_val1, dst, stride);
+}
+
+static void hevc_intra_pred_angular_lower_8width_msa(const uint8_t *src_top,
+                                                     const uint8_t *src_left,
+                                                     uint8_t *dst,
+                                                     int32_t stride,
+                                                     int32_t mode)
+{
+    int16_t inv_angle[] = { -4096, -1638, -910, -630, -482, -390, -315 };
+    uint8_t ref_array[3 * 32 + 4];
+    uint8_t *ref_tmp = ref_array + 8;
+    const uint8_t *ref;
+    const uint8_t *src_top_tmp = src_top - 1;
+    uint8_t *dst_org;
+    int32_t last, offset, tmp0, tmp1, tmp2;
+    int32_t h_cnt, v_cnt, idx0, fact_val0, idx1, fact_val1;
+    int32_t idx2, fact_val2, idx3, fact_val3;
+    int32_t angle, angle_loop, inv_angle_val;
+    v16i8 top0, top1, top2, top3;
+    v16i8 dst_val0, dst_val1, dst_val2, dst_val3;
+    v8i16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
+    v8i16 fact0, fact1, fact2, fact3, fact4, fact5, fact6, fact7;
+
+    angle = intra_pred_angle_low[mode - 2];
+    last = (angle) >> 2;
+    angle_loop = angle;
+
+    ref = src_left - 1;
+    if (last < -1) {
+        inv_angle_val = inv_angle[mode - 11];
+
+        tmp0 = LW(ref);
+        tmp1 = LW(ref + 4);
+        tmp2 = LW(ref + 8);
+        SW(tmp0, ref_tmp);
+        SW(tmp1, ref_tmp + 4);
+        SW(tmp2, ref_tmp + 8);
+
+        for (h_cnt = last; h_cnt <= -1; h_cnt++) {
+            offset = (h_cnt * inv_angle_val + 128) >> 8;
+            ref_tmp[h_cnt] = src_top_tmp[offset];
+        }
+
+        ref = ref_tmp;
+    }
+
+    for (v_cnt = 0; v_cnt < 2; v_cnt++) {
+        dst_org = dst;
+
+        idx0 = angle_loop >> 5;
+        fact_val0 = angle_loop & 31;
+        angle_loop += angle;
+
+        idx1 = angle_loop >> 5;
+        fact_val1 = angle_loop & 31;
+        angle_loop += angle;
+
+        idx2 = angle_loop >> 5;
+        fact_val2 = angle_loop & 31;
+        angle_loop += angle;
+
+        idx3 = angle_loop >> 5;
+        fact_val3 = angle_loop & 31;
+        angle_loop += angle;
+
+        top0 = LD_SB(ref + idx0 + 1);
+        top1 = LD_SB(ref + idx1 + 1);
+        top2 = LD_SB(ref + idx2 + 1);
+        top3 = LD_SB(ref + idx3 + 1);
+
+        fact0 = __msa_fill_h(fact_val0);
+        fact1 = __msa_fill_h(32 - fact_val0);
+        fact2 = __msa_fill_h(fact_val1);
+        fact3 = __msa_fill_h(32 - fact_val1);
+        fact4 = __msa_fill_h(fact_val2);
+        fact5 = __msa_fill_h(32 - fact_val2);
+        fact6 = __msa_fill_h(fact_val3);
+        fact7 = __msa_fill_h(32 - fact_val3);
+
+        UNPCK_UB_SH(top0, diff0, diff1);
+        UNPCK_UB_SH(top1, diff2, diff3);
+        UNPCK_UB_SH(top2, diff4, diff5);
+        UNPCK_UB_SH(top3, diff6, diff7);
+        SLDI_B2_SH(diff1, diff3, diff0, diff2, diff1, diff3, 2);
+        SLDI_B2_SH(diff5, diff7, diff4, diff6, diff5, diff7, 2);
+        MUL4(diff1, fact0, diff3, fact2, diff5, fact4, diff7, fact6,
+             diff1, diff3, diff5, diff7);
+
+        diff1 += diff0 * fact1;
+        diff3 += diff2 * fact3;
+        diff5 += diff4 * fact5;
+        diff7 += diff6 * fact7;
+
+        SRARI_H4_SH(diff1, diff3, diff5, diff7, 5);
+        PCKEV_B4_SB(diff1, diff1, diff3, diff3, diff5, diff5, diff7, diff7,
+                    dst_val0, dst_val1, dst_val2, dst_val3);
+        ILVR_B2_SH(dst_val1, dst_val0, dst_val3, dst_val2, diff0, diff1);
+        ILVRL_H2_SH(diff1, diff0, diff3, diff4);
+        ST4x8_UB(diff3, diff4, dst_org, stride);
+        dst += 4;
+    }
+}
+
+static void hevc_intra_pred_angular_lower_16width_msa(const uint8_t *src_top,
+                                                      const uint8_t *src_left,
+                                                      uint8_t *dst,
+                                                      int32_t stride,
+                                                      int32_t mode)
+{
+    int16_t inv_angle[] = { -4096, -1638, -910, -630, -482, -390, -315 };
+    int32_t h_cnt, v_cnt, idx0, fact_val0, idx1, fact_val1;
+    int32_t idx2, fact_val2, idx3, fact_val3, tmp0;
+    v16i8 top0, top1, dst_val0, top2, top3, dst_val1;
+    v16i8 top4, top5, dst_val2, top6, top7, dst_val3;
+    v8i16 fact0, fact1, fact2, fact3, fact4, fact5, fact6, fact7;
+    v8i16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
+    v8i16 diff8, diff9, diff10, diff11, diff12, diff13, diff14, diff15;
+    int32_t angle, angle_loop, inv_angle_val, offset;
+    uint8_t ref_array[3 * 32 + 4];
+    uint8_t *ref_tmp = ref_array + 16;
+    const uint8_t *ref, *src_top_tmp = src_top - 1;
+    uint8_t *dst_org;
+    int32_t last;
+
+    angle = intra_pred_angle_low[mode - 2];
+    last = (angle) >> 1;
+    angle_loop = angle;
+
+    ref = src_left - 1;
+    if (last < -1) {
+        inv_angle_val = inv_angle[mode - 11];
+
+        top0 = LD_SB(ref);
+        tmp0 = LW(ref + 16);
+        ST_SB(top0, ref_tmp);
+        SW(tmp0, ref_tmp + 16);
+
+        for (h_cnt = last; h_cnt <= -1; h_cnt++) {
+            offset = (h_cnt * inv_angle_val + 128) >> 8;
+            ref_tmp[h_cnt] = src_top_tmp[offset];
+        }
+
+        ref = ref_tmp;
+    }
+
+    for (v_cnt = 0; v_cnt < 4; v_cnt++) {
+        dst_org = dst;
+
+        idx0 = angle_loop >> 5;
+        fact_val0 = angle_loop & 31;
+        angle_loop += angle;
+
+        idx1 = angle_loop >> 5;
+        fact_val1 = angle_loop & 31;
+        angle_loop += angle;
+
+        idx2 = angle_loop >> 5;
+        fact_val2 = angle_loop & 31;
+        angle_loop += angle;
+
+        idx3 = angle_loop >> 5;
+        fact_val3 = angle_loop & 31;
+        angle_loop += angle;
+
+        LD_SB2(ref + idx0 + 1, 16, top0, top1);
+        LD_SB2(ref + idx1 + 1, 16, top2, top3);
+        LD_SB2(ref + idx2 + 1, 16, top4, top5);
+        LD_SB2(ref + idx3 + 1, 16, top6, top7);
+
+        fact0 = __msa_fill_h(fact_val0);
+        fact1 = __msa_fill_h(32 - fact_val0);
+        fact2 = __msa_fill_h(fact_val1);
+        fact3 = __msa_fill_h(32 - fact_val1);
+        fact4 = __msa_fill_h(fact_val2);
+        fact5 = __msa_fill_h(32 - fact_val2);
+        fact6 = __msa_fill_h(fact_val3);
+        fact7 = __msa_fill_h(32 - fact_val3);
+
+        SLDI_B2_SB(top1, top3, top0, top2, top1, top3, 1);
+        SLDI_B2_SB(top5, top7, top4, top6, top5, top7, 1);
+
+        UNPCK_UB_SH(top0, diff0, diff1);
+        UNPCK_UB_SH(top1, diff2, diff3);
+        UNPCK_UB_SH(top2, diff4, diff5);
+        UNPCK_UB_SH(top3, diff6, diff7);
+        UNPCK_UB_SH(top4, diff8, diff9);
+        UNPCK_UB_SH(top5, diff10, diff11);
+        UNPCK_UB_SH(top6, diff12, diff13);
+        UNPCK_UB_SH(top7, diff14, diff15);
+
+        MUL4(diff2, fact0, diff3, fact0, diff6, fact2, diff7, fact2,
+             diff2, diff3, diff6, diff7);
+        MUL4(diff10, fact4, diff11, fact4, diff14, fact6, diff15, fact6,
+             diff10, diff11, diff14, diff15);
+
+        diff2 += diff0 * fact1;
+        diff3 += diff1 * fact1;
+        diff6 += diff4 * fact3;
+        diff7 += diff5 * fact3;
+        diff10 += diff8 * fact5;
+        diff11 += diff9 * fact5;
+        diff14 += diff12 * fact7;
+        diff15 += diff13 * fact7;
+
+        SRARI_H4_SH(diff2, diff3, diff6, diff7, 5);
+        SRARI_H4_SH(diff10, diff11, diff14, diff15, 5);
+        PCKEV_B4_SB(diff3, diff2, diff7, diff6, diff11, diff10, diff15, diff14,
+                    dst_val0, dst_val1, dst_val2, dst_val3);
+        ILVR_B2_SH(dst_val1, dst_val0, dst_val3, dst_val2, diff0, diff1);
+        ILVL_B2_SH(dst_val1, dst_val0, dst_val3, dst_val2, diff2, diff3);
+        ILVRL_H2_SH(diff1, diff0, diff4, diff5);
+        ILVRL_H2_SH(diff3, diff2, diff6, diff7);
+        ST4x8_UB(diff4, diff5, dst_org, stride);
+        dst_org += (8 * stride);
+        ST4x8_UB(diff6, diff7, dst_org, stride);
+        dst += 4;
+    }
+}
+
+static void hevc_intra_pred_angular_lower_32width_msa(const uint8_t *src_top,
+                                                      const uint8_t *src_left,
+                                                      uint8_t *dst,
+                                                      int32_t stride,
+                                                      int32_t mode)
+{
+    int16_t inv_angle[] = { -4096, -1638, -910, -630, -482, -390, -315 };
+    int32_t h_cnt, v_cnt, idx0, fact_val0, idx1, fact_val1, tmp0;
+    v16i8 top0, top1, dst_val0, top2, top3, dst_val1;
+    v16i8 top4, top5, dst_val2, top6, top7, dst_val3;
+    v8i16 fact0, fact1, fact2, fact3;
+    v8i16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
+    v8i16 diff8, diff9, diff10, diff11, diff12, diff13, diff14, diff15;
+    int32_t angle, angle_loop, inv_angle_val, offset;
+    uint8_t ref_array[3 * 32 + 4];
+    uint8_t *ref_tmp = ref_array + 32;
+    const uint8_t *ref, *src_top_tmp = src_top - 1;
+    uint8_t *dst_org;
+    int32_t last;
+
+    angle = intra_pred_angle_low[mode - 2];
+    last = angle;
+    angle_loop = angle;
+
+    ref = src_left - 1;
+    if (last < -1) {
+        inv_angle_val = inv_angle[mode - 11];
+
+        LD_SB2(ref, 16, top0, top1);
+        tmp0 = LW(ref + 32);
+        ST_SB2(top0, top1, ref_tmp, 16);
+        SW(tmp0, ref_tmp + 32);
+
+        for (h_cnt = last; h_cnt <= -1; h_cnt++) {
+            offset = (h_cnt * inv_angle_val + 128) >> 8;
+            ref_tmp[h_cnt] = src_top_tmp[offset];
+        }
+
+        ref = ref_tmp;
+    }
+
+    for (v_cnt = 0; v_cnt < 16; v_cnt++) {
+        dst_org = dst;
+        idx0 = angle_loop >> 5;
+        fact_val0 = angle_loop & 31;
+        angle_loop += angle;
+
+        idx1 = angle_loop >> 5;
+        fact_val1 = angle_loop & 31;
+        angle_loop += angle;
+
+        top0 = LD_SB(ref + idx0 + 1);
+        top4 = LD_SB(ref + idx1 + 1);
+        top1 = LD_SB(ref + idx0 + 17);
+        top5 = LD_SB(ref + idx1 + 17);
+        top3 = LD_SB(ref + idx0 + 33);
+        top7 = LD_SB(ref + idx1 + 33);
+
+        fact0 = __msa_fill_h(fact_val0);
+        fact1 = __msa_fill_h(32 - fact_val0);
+        fact2 = __msa_fill_h(fact_val1);
+        fact3 = __msa_fill_h(32 - fact_val1);
+
+        top2 = top1;
+        top6 = top5;
+
+        SLDI_B2_SB(top1, top3, top0, top2, top1, top3, 1);
+        SLDI_B2_SB(top5, top7, top4, top6, top5, top7, 1);
+
+        UNPCK_UB_SH(top0, diff0, diff1);
+        UNPCK_UB_SH(top1, diff2, diff3);
+        UNPCK_UB_SH(top2, diff4, diff5);
+        UNPCK_UB_SH(top3, diff6, diff7);
+        UNPCK_UB_SH(top4, diff8, diff9);
+        UNPCK_UB_SH(top5, diff10, diff11);
+        UNPCK_UB_SH(top6, diff12, diff13);
+        UNPCK_UB_SH(top7, diff14, diff15);
+
+        MUL4(diff2, fact0, diff3, fact0, diff6, fact0, diff7, fact0,
+             diff2, diff3, diff6, diff7);
+        MUL4(diff10, fact2, diff11, fact2, diff14, fact2, diff15, fact2,
+             diff10, diff11, diff14, diff15);
+
+        diff2 += diff0 * fact1;
+        diff3 += diff1 * fact1;
+        diff6 += diff4 * fact1;
+        diff7 += diff5 * fact1;
+        diff10 += diff8 * fact3;
+        diff11 += diff9 * fact3;
+        diff14 += diff12 * fact3;
+        diff15 += diff13 * fact3;
+
+        SRARI_H4_SH(diff2, diff3, diff6, diff7, 5);
+        SRARI_H4_SH(diff10, diff11, diff14, diff15, 5);
+        PCKEV_B4_SB(diff3, diff2, diff7, diff6, diff11, diff10, diff15, diff14,
+                    dst_val0, dst_val1, dst_val2, dst_val3);
+        ILVRL_B2_SH(dst_val2, dst_val0, diff0, diff1);
+        ILVRL_B2_SH(dst_val3, dst_val1, diff2, diff3);
+
+        ST2x4_UB(diff0, 0, dst_org, stride);
+        dst_org += (4 * stride);
+        ST2x4_UB(diff0, 4, dst_org, stride);
+        dst_org += (4 * stride);
+        ST2x4_UB(diff1, 0, dst_org, stride);
+        dst_org += (4 * stride);
+        ST2x4_UB(diff1, 4, dst_org, stride);
+        dst_org += (4 * stride);
+
+        ST2x4_UB(diff2, 0, dst_org, stride);
+        dst_org += (4 * stride);
+        ST2x4_UB(diff2, 4, dst_org, stride);
+        dst_org += (4 * stride);
+        ST2x4_UB(diff3, 0, dst_org, stride);
+        dst_org += (4 * stride);
+        ST2x4_UB(diff3, 4, dst_org, stride);
+        dst_org += (4 * stride);
+
+        dst += 2;
+    }
+}
+
+static void intra_predict_vert_32x32_msa(const uint8_t *src, uint8_t *dst,
+                                         int32_t dst_stride)
+{
+    uint32_t row;
+    v16u8 src1, src2;
+
+    src1 = LD_UB(src);
+    src2 = LD_UB(src + 16);
+
+    for (row = 32; row--;) {
+        ST_UB2(src1, src2, dst, 16);
+        dst += dst_stride;
+    }
+}
+
+void ff_hevc_intra_pred_planar_0_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride)
+{
+    hevc_intra_pred_plane_4x4_msa(src_top, src_left, dst, stride);
+}
+
+void ff_hevc_intra_pred_planar_1_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride)
+{
+    hevc_intra_pred_plane_8x8_msa(src_top, src_left, dst, stride);
+}
+
+void ff_hevc_intra_pred_planar_2_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride)
+{
+    hevc_intra_pred_plane_16x16_msa(src_top, src_left, dst, stride);
+}
+
+void ff_hevc_intra_pred_planar_3_msa(uint8_t *dst,
+                                     const uint8_t *src_top,
+                                     const uint8_t *src_left,
+                                     ptrdiff_t stride)
+{
+    hevc_intra_pred_plane_32x32_msa(src_top, src_left, dst, stride);
+}
+
+void ff_hevc_intra_pred_dc_msa(uint8_t *dst, const uint8_t *src_top,
+                               const uint8_t *src_left,
+                               ptrdiff_t stride, int log2, int c_idx)
+{
+    switch (log2) {
+    case 2:
+        hevc_intra_pred_dc_4x4_msa(src_top, src_left, dst, stride, c_idx);
+        break;
+
+    case 3:
+        hevc_intra_pred_dc_8x8_msa(src_top, src_left, dst, stride, c_idx);
+        break;
+
+    case 4:
+        hevc_intra_pred_dc_16x16_msa(src_top, src_left, dst, stride, c_idx);
+        break;
+
+    case 5:
+        hevc_intra_pred_dc_32x32_msa(src_top, src_left, dst, stride);
+        break;
+    }
+}
+
+void ff_pred_intra_pred_angular_0_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode)
+{
+    if (mode == 10) {
+        hevc_intra_pred_horiz_4x4_msa(src_top, src_left, dst, stride, c_idx);
+    } else if (mode == 26) {
+        hevc_intra_pred_vert_4x4_msa(src_top, src_left, dst, stride, c_idx);
+    } else if (mode >= 18) {
+        hevc_intra_pred_angular_upper_4width_msa(src_top, src_left,
+                                                 dst, stride, mode);
+    } else {
+        hevc_intra_pred_angular_lower_4width_msa(src_top, src_left,
+                                                 dst, stride, mode);
+    }
+}
+
+void ff_pred_intra_pred_angular_1_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode)
+{
+    if (mode == 10) {
+        hevc_intra_pred_horiz_8x8_msa(src_top, src_left, dst, stride, c_idx);
+    } else if (mode == 26) {
+        hevc_intra_pred_vert_8x8_msa(src_top, src_left, dst, stride, c_idx);
+    } else if (mode >= 18) {
+        hevc_intra_pred_angular_upper_8width_msa(src_top, src_left,
+                                                 dst, stride, mode);
+    } else {
+        hevc_intra_pred_angular_lower_8width_msa(src_top, src_left,
+                                                 dst, stride, mode);
+    }
+}
+
+void ff_pred_intra_pred_angular_2_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode)
+{
+    if (mode == 10) {
+        hevc_intra_pred_horiz_16x16_msa(src_top, src_left, dst, stride, c_idx);
+    } else if (mode == 26) {
+        hevc_intra_pred_vert_16x16_msa(src_top, src_left, dst, stride, c_idx);
+    } else if (mode >= 18) {
+        hevc_intra_pred_angular_upper_16width_msa(src_top, src_left,
+                                                  dst, stride, mode);
+    } else {
+        hevc_intra_pred_angular_lower_16width_msa(src_top, src_left,
+                                                  dst, stride, mode);
+    }
+}
+
+void ff_pred_intra_pred_angular_3_msa(uint8_t *dst,
+                                      const uint8_t *src_top,
+                                      const uint8_t *src_left,
+                                      ptrdiff_t stride, int c_idx, int mode)
+{
+    if (mode == 10) {
+        hevc_intra_pred_horiz_32x32_msa(src_top, src_left, dst, stride);
+    } else if (mode == 26) {
+        intra_predict_vert_32x32_msa(src_top, dst, stride);
+    } else if (mode >= 18) {
+        hevc_intra_pred_angular_upper_32width_msa(src_top, src_left,
+                                                  dst, stride, mode);
+    } else {
+        hevc_intra_pred_angular_lower_32width_msa(src_top, src_left,
+                                                  dst, stride, mode);
+    }
+}
+
+void ff_intra_pred_8_16x16_msa(HEVCContext *s, int x0, int y0, int c_idx)
+{
+    v16u8 vec0;
+    HEVCLocalContext *lc = s->HEVClc;
+    int i;
+    int hshift = s->ps.sps->hshift[c_idx];
+    int vshift = s->ps.sps->vshift[c_idx];
+    int size_in_luma_h = 16 << hshift;
+    int size_in_tbs_h = size_in_luma_h >> s->ps.sps->log2_min_tb_size;
+    int size_in_luma_v = 16 << vshift;
+    int size_in_tbs_v = size_in_luma_v >> s->ps.sps->log2_min_tb_size;
+    int x = x0 >> hshift;
+    int y = y0 >> vshift;
+    int x_tb = (x0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
+    int y_tb = (y0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
+
+    int cur_tb_addr =
+        s->ps.pps->min_tb_addr_zs[(y_tb) * (s->ps.sps->tb_mask + 2) + (x_tb)];
+
+    ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(uint8_t);
+    uint8_t *src = (uint8_t *) s->frame->data[c_idx] + x + y * stride;
+
+    int min_pu_width = s->ps.sps->min_pu_width;
+
+    enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c :
+        lc->tu.intra_pred_mode;
+    uint32_t a;
+    uint8_t left_array[2 * 32 + 1];
+    uint8_t filtered_left_array[2 * 32 + 1];
+    uint8_t top_array[2 * 32 + 1];
+    uint8_t filtered_top_array[2 * 32 + 1];
+
+    uint8_t *left = left_array + 1;
+    uint8_t *top = top_array + 1;
+    uint8_t *filtered_left = filtered_left_array + 1;
+    uint8_t *filtered_top = filtered_top_array + 1;
+    int cand_bottom_left = lc->na.cand_bottom_left
+        && cur_tb_addr >
+        s->ps.pps->min_tb_addr_zs[((y_tb + size_in_tbs_v) & s->ps.sps->tb_mask) *
+                               (s->ps.sps->tb_mask + 2) + (x_tb - 1)];
+    int cand_left = lc->na.cand_left;
+    int cand_up_left = lc->na.cand_up_left;
+    int cand_up = lc->na.cand_up;
+    int cand_up_right = lc->na.cand_up_right
+        && cur_tb_addr >
+        s->ps.pps->min_tb_addr_zs[(y_tb - 1) * (s->ps.sps->tb_mask + 2) +
+                               ((x_tb + size_in_tbs_h) & s->ps.sps->tb_mask)];
+
+    int bottom_left_size =
+        (((y0 + 2 * size_in_luma_v) >
+          (s->ps.sps->height) ? (s->ps.sps->height) : (y0 +
+                                                 2 * size_in_luma_v)) -
+         (y0 + size_in_luma_v)) >> vshift;
+    int top_right_size =
+        (((x0 + 2 * size_in_luma_h) >
+          (s->ps.sps->width) ? (s->ps.sps->width) : (x0 + 2 * size_in_luma_h)) -
+         (x0 + size_in_luma_h)) >> hshift;
+
+    if (s->ps.pps->constrained_intra_pred_flag == 1) {
+        int size_in_luma_pu_v = ((size_in_luma_v) >> s->ps.sps->log2_min_pu_size);
+        int size_in_luma_pu_h = ((size_in_luma_h) >> s->ps.sps->log2_min_pu_size);
+        int on_pu_edge_x = !(x0 & ((1 << s->ps.sps->log2_min_pu_size) - 1));
+        int on_pu_edge_y = !(y0 & ((1 << s->ps.sps->log2_min_pu_size) - 1));
+        if (!size_in_luma_pu_h)
+            size_in_luma_pu_h++;
+        if (cand_bottom_left == 1 && on_pu_edge_x) {
+            int x_left_pu = ((x0 - 1) >> s->ps.sps->log2_min_pu_size);
+            int y_bottom_pu =
+                ((y0 + size_in_luma_v) >> s->ps.sps->log2_min_pu_size);
+            int max =
+                ((size_in_luma_pu_v) >
+                 (s->ps.sps->min_pu_height -
+                  y_bottom_pu) ? (s->ps.sps->min_pu_height -
+                                  y_bottom_pu) : (size_in_luma_pu_v));
+            cand_bottom_left = 0;
+            for (i = 0; i < max; i += 2)
+                cand_bottom_left |=
+                    ((s->ref->tab_mvf[(x_left_pu) +
+                                      (y_bottom_pu +
+                                       i) * min_pu_width]).pred_flag ==
+                     PF_INTRA);
+        }
+        if (cand_left == 1 && on_pu_edge_x) {
+            int x_left_pu = ((x0 - 1) >> s->ps.sps->log2_min_pu_size);
+            int y_left_pu = ((y0) >> s->ps.sps->log2_min_pu_size);
+            int max =
+                ((size_in_luma_pu_v) >
+                 (s->ps.sps->min_pu_height -
+                  y_left_pu) ? (s->ps.sps->min_pu_height -
+                                y_left_pu) : (size_in_luma_pu_v));
+            cand_left = 0;
+            for (i = 0; i < max; i += 2)
+                cand_left |=
+                    ((s->ref->tab_mvf[(x_left_pu) +
+                                      (y_left_pu +
+                                       i) * min_pu_width]).pred_flag ==
+                     PF_INTRA);
+        }
+        if (cand_up_left == 1) {
+            int x_left_pu = ((x0 - 1) >> s->ps.sps->log2_min_pu_size);
+            int y_top_pu = ((y0 - 1) >> s->ps.sps->log2_min_pu_size);
+            cand_up_left =
+                (s->ref->tab_mvf[(x_left_pu) +
+                                 (y_top_pu) * min_pu_width]).pred_flag ==
+                PF_INTRA;
+        }
+        if (cand_up == 1 && on_pu_edge_y) {
+            int x_top_pu = ((x0) >> s->ps.sps->log2_min_pu_size);
+            int y_top_pu = ((y0 - 1) >> s->ps.sps->log2_min_pu_size);
+            int max =
+                ((size_in_luma_pu_h) >
+                 (s->ps.sps->min_pu_width -
+                  x_top_pu) ? (s->ps.sps->min_pu_width -
+                               x_top_pu) : (size_in_luma_pu_h));
+            cand_up = 0;
+            for (i = 0; i < max; i += 2)
+                cand_up |=
+                    ((s->ref->tab_mvf[(x_top_pu + i) +
+                                      (y_top_pu) *
+                                      min_pu_width]).pred_flag == PF_INTRA);
+        }
+        if (cand_up_right == 1 && on_pu_edge_y) {
+            int y_top_pu = ((y0 - 1) >> s->ps.sps->log2_min_pu_size);
+            int x_right_pu =
+                ((x0 + size_in_luma_h) >> s->ps.sps->log2_min_pu_size);
+            int max =
+                ((size_in_luma_pu_h) >
+                 (s->ps.sps->min_pu_width -
+                  x_right_pu) ? (s->ps.sps->min_pu_width -
+                                 x_right_pu) : (size_in_luma_pu_h));
+            cand_up_right = 0;
+            for (i = 0; i < max; i += 2)
+                cand_up_right |=
+                    ((s->ref->tab_mvf[(x_right_pu + i) +
+                                      (y_top_pu) *
+                                      min_pu_width]).pred_flag == PF_INTRA);
+        }
+
+        vec0 = (v16u8) __msa_ldi_b(128);
+
+        ST_UB4(vec0, vec0, vec0, vec0, left, 16);
+
+        ST_UB4(vec0, vec0, vec0, vec0, top, 16);
+
+        top[-1] = 128;
+    }
+    if (cand_up_left) {
+        left[-1] = src[(-1) + stride * (-1)];
+        top[-1] = left[-1];
+    }
+    if (cand_up) {
+        vec0 = LD_UB(src - stride);
+        ST_UB(vec0, top);
+    }
+    if (cand_up_right) {
+        vec0 = LD_UB(src - stride + 16);
+        ST_UB(vec0, (top + 16));
+
+        do {
+            uint32_t pix =
+                ((src[(16 + top_right_size - 1) + stride * (-1)]) *
+                 0x01010101U);
+            for (i = 0; i < (16 - top_right_size); i += 4)
+                ((((union unaligned_32 *) (top + 16 + top_right_size +
+                                           i))->l) = (pix));
+        } while (0);
+    }
+    if (cand_left)
+        for (i = 0; i < 16; i++)
+            left[i] = src[(-1) + stride * (i)];
+    if (cand_bottom_left) {
+        for (i = 16; i < 16 + bottom_left_size; i++)
+            left[i] = src[(-1) + stride * (i)];
+        do {
+            uint32_t pix =
+                ((src[(-1) + stride * (16 + bottom_left_size - 1)]) *
+                 0x01010101U);
+            for (i = 0; i < (16 - bottom_left_size); i += 4)
+                ((((union unaligned_32 *) (left + 16 + bottom_left_size +
+                                           i))->l) = (pix));
+        } while (0);
+    }
+
+    if (s->ps.pps->constrained_intra_pred_flag == 1) {
+        if (cand_bottom_left || cand_left || cand_up_left || cand_up
+            || cand_up_right) {
+            int size_max_x =
+                x0 + ((2 * 16) << hshift) <
+                s->ps.sps->width ? 2 * 16 : (s->ps.sps->width - x0) >> hshift;
+            int size_max_y =
+                y0 + ((2 * 16) << vshift) <
+                s->ps.sps->height ? 2 * 16 : (s->ps.sps->height - y0) >> vshift;
+            int j = 16 + (cand_bottom_left ? bottom_left_size : 0) - 1;
+            if (!cand_up_right) {
+                size_max_x = x0 + ((16) << hshift) < s->ps.sps->width ?
+                    16 : (s->ps.sps->width - x0) >> hshift;
+            }
+            if (!cand_bottom_left) {
+                size_max_y = y0 + ((16) << vshift) < s->ps.sps->height ?
+                    16 : (s->ps.sps->height - y0) >> vshift;
+            }
+            if (cand_bottom_left || cand_left || cand_up_left) {
+                while (j > -1
+                       &&
+                       !((s->ref->tab_mvf[(((x0 +
+                                             ((-1) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 +
+                                                                    ((j) <<
+                                                                     vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                    j--;
+                if (!
+                    ((s->ref->tab_mvf[(((x0 +
+                                         ((-1) << hshift)) >> s->ps.sps->
+                                        log2_min_pu_size)) + (((y0 + ((j)
+                                                                      <<
+                                                                      vshift))
+                                                               >> s->ps.sps->
+                                                               log2_min_pu_size))
+                                      * min_pu_width]).pred_flag == PF_INTRA)) {
+                    j = 0;
+                    while (j < size_max_x
+                           &&
+                           !((s->ref->tab_mvf[(((x0 +
+                                                 ((j) << hshift)) >> s->ps.sps->
+                                                log2_min_pu_size)) + (((y0 +
+                                                                        ((-1) <<
+                                                                         vshift))
+                                                                       >> s->
+                                                                       ps.sps->
+                                                                       log2_min_pu_size))
+                                              * min_pu_width]).pred_flag ==
+                             PF_INTRA))
+                        j++;
+                    for (i = j; i > (j) - (j + 1); i--)
+                        if (!
+                            ((s->ref->tab_mvf[(((x0 +
+                                                 ((i -
+                                                   1) << hshift)) >> s->ps.sps->
+                                                log2_min_pu_size)) + (((y0 +
+                                                                        ((-1) <<
+                                                                         vshift))
+                                                                       >> s->
+                                                                       ps.sps->
+                                                                       log2_min_pu_size))
+                                              * min_pu_width]).pred_flag ==
+                             PF_INTRA))
+                            top[i - 1] = top[i];
+                    left[-1] = top[-1];
+                }
+            } else {
+                j = 0;
+                while (j < size_max_x
+                       &&
+                       !((s->ref->tab_mvf[(((x0 +
+                                             ((j) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 + ((-1)
+                                                                          <<
+                                                                          vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                    j++;
+                if (j > 0)
+                    if (x0 > 0) {
+                        for (i = j; i > (j) - (j + 1); i--)
+                            if (!
+                                ((s->ref->tab_mvf[(((x0 +
+                                                     ((i -
+                                                       1) << hshift)) >>
+                                                    s->ps.sps->log2_min_pu_size))
+                                                  + (((y0 + ((-1)
+                                                             << vshift))
+                                                      >>
+                                                      s->ps.sps->log2_min_pu_size))
+                                                  *
+                                                  min_pu_width]).pred_flag ==
+                                 PF_INTRA))
+                                top[i - 1] = top[i];
+                    } else {
+                        for (i = j; i > (j) - (j); i--)
+                            if (!
+                                ((s->ref->tab_mvf[(((x0 +
+                                                     ((i -
+                                                       1) << hshift)) >>
+                                                    s->ps.sps->log2_min_pu_size))
+                                                  + (((y0 + ((-1)
+                                                             << vshift))
+                                                      >>
+                                                      s->ps.sps->log2_min_pu_size))
+                                                  *
+                                                  min_pu_width]).pred_flag ==
+                                 PF_INTRA))
+                                top[i - 1] = top[i];
+                        top[-1] = top[0];
+                    }
+                left[-1] = top[-1];
+            }
+            left[-1] = top[-1];
+            if (cand_bottom_left || cand_left) {
+                a = ((left[-1]) * 0x01010101U);
+                for (i = 0; i < (0) + (size_max_y); i += 4)
+                    if (!
+                        ((s->ref->tab_mvf[(((x0 +
+                                             ((-1) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 +
+                                                                    ((i) <<
+                                                                     vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                        ((((union unaligned_32 *) (&left[i]))->l) = (a));
+                    else
+                        a = ((left[i + 3]) * 0x01010101U);
+            }
+            if (!cand_left) {
+                vec0 = (v16u8) __msa_fill_b(left[-1]);
+
+                ST_UB(vec0, left);
+            }
+            if (!cand_bottom_left) {
+
+                vec0 = (v16u8) __msa_fill_b(left[15]);
+
+                ST_UB(vec0, (left + 16));
+            }
+            if (x0 != 0 && y0 != 0) {
+                a = ((left[size_max_y - 1]) * 0x01010101U);
+                for (i = (size_max_y - 1);
+                     i > (size_max_y - 1) - (size_max_y); i -= 4)
+                    if (!
+                        ((s->ref->tab_mvf[(((x0 +
+                                             ((-1) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 +
+                                                                    ((i -
+                                                                      3) <<
+                                                                     vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                        ((((union unaligned_32 *) (&left[i - 3]))->l) = (a));
+                    else
+                        a = ((left[i - 3]) * 0x01010101U);
+                if (!
+                    ((s->ref->tab_mvf[(((x0 +
+                                         ((-1) << hshift)) >> s->ps.sps->
+                                        log2_min_pu_size)) + (((y0 + ((-1)
+                                                                      <<
+                                                                      vshift))
+                                                               >> s->ps.sps->
+                                                               log2_min_pu_size))
+                                      * min_pu_width]).pred_flag == PF_INTRA))
+                    left[-1] = left[0];
+            } else if (x0 == 0) {
+                do {
+                    uint32_t pix = ((0) * 0x01010101U);
+                    for (i = 0; i < (size_max_y); i += 4)
+                        ((((union unaligned_32 *) (left + i))->l) = (pix));
+                } while (0);
+            } else {
+                a = ((left[size_max_y - 1]) * 0x01010101U);
+                for (i = (size_max_y - 1);
+                     i > (size_max_y - 1) - (size_max_y); i -= 4)
+                    if (!
+                        ((s->ref->tab_mvf[(((x0 +
+                                             ((-1) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 +
+                                                                    ((i -
+                                                                      3) <<
+                                                                     vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                        ((((union unaligned_32 *) (&left[i - 3]))->l) = (a));
+                    else
+                        a = ((left[i - 3]) * 0x01010101U);
+            }
+            top[-1] = left[-1];
+            if (y0 != 0) {
+                a = ((left[-1]) * 0x01010101U);
+                for (i = 0; i < (0) + (size_max_x); i += 4)
+                    if (!
+                        ((s->ref->tab_mvf[(((x0 +
+                                             ((i) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 + ((-1)
+                                                                          <<
+                                                                          vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                        ((((union unaligned_32 *) (&top[i]))->l) = (a));
+                    else
+                        a = ((top[i + 3]) * 0x01010101U);
+            }
+        }
+    }
+
+    if (!cand_bottom_left) {
+        if (cand_left) {
+            vec0 = (v16u8) __msa_fill_b(left[15]);
+
+            ST_UB(vec0, (left + 16));
+
+        } else if (cand_up_left) {
+            vec0 = (v16u8) __msa_fill_b(left[-1]);
+
+            ST_UB2(vec0, vec0, left, 16);
+
+            cand_left = 1;
+        } else if (cand_up) {
+            left[-1] = top[0];
+
+            vec0 = (v16u8) __msa_fill_b(left[-1]);
+
+            ST_UB2(vec0, vec0, left, 16);
+
+            cand_up_left = 1;
+            cand_left = 1;
+        } else if (cand_up_right) {
+            vec0 = (v16u8) __msa_fill_b(top[16]);
+
+            ST_UB(vec0, top);
+
+            left[-1] = top[16];
+
+            ST_UB2(vec0, vec0, left, 16);
+
+            cand_up = 1;
+            cand_up_left = 1;
+            cand_left = 1;
+        } else {
+            left[-1] = 128;
+            vec0 = (v16u8) __msa_ldi_b(128);
+
+            ST_UB2(vec0, vec0, top, 16);
+            ST_UB2(vec0, vec0, left, 16);
+        }
+    }
+
+    if (!cand_left) {
+        vec0 = (v16u8) __msa_fill_b(left[16]);
+        ST_UB(vec0, left);
+    }
+    if (!cand_up_left) {
+        left[-1] = left[0];
+    }
+    if (!cand_up) {
+        vec0 = (v16u8) __msa_fill_b(left[-1]);
+        ST_UB(vec0, top);
+    }
+    if (!cand_up_right) {
+        vec0 = (v16u8) __msa_fill_b(top[15]);
+        ST_UB(vec0, (top + 16));
+    }
+
+    top[-1] = left[-1];
+
+
+    if (!s->ps.sps->intra_smoothing_disabled_flag
+        && (c_idx == 0 || s->ps.sps->chroma_format_idc == 3)) {
+        if (mode != INTRA_DC && 16 != 4) {
+            int intra_hor_ver_dist_thresh[] = { 7, 1, 0 };
+            int min_dist_vert_hor =
+                (((((int) (mode - 26U)) >=
+                   0 ? ((int) (mode - 26U)) : (-((int) (mode - 26U))))) >
+                 ((((int) (mode - 10U)) >=
+                   0 ? ((int) (mode - 10U)) : (-((int) (mode - 10U)))))
+                 ? ((((int) (mode - 10U)) >=
+                     0 ? ((int) (mode - 10U)) : (-((int) (mode - 10U)))))
+                 : ((((int) (mode - 26U)) >=
+                     0 ? ((int) (mode - 26U)) : (-((int) (mode - 26U))))));
+            if (min_dist_vert_hor > intra_hor_ver_dist_thresh[4 - 3]) {
+                filtered_left[2 * 16 - 1] = left[2 * 16 - 1];
+                filtered_top[2 * 16 - 1] = top[2 * 16 - 1];
+                for (i = 2 * 16 - 2; i >= 0; i--)
+                    filtered_left[i] = (left[i + 1] + 2 * left[i] +
+                                        left[i - 1] + 2) >> 2;
+                filtered_top[-1] =
+                    filtered_left[-1] =
+                    (left[0] + 2 * left[-1] + top[0] + 2) >> 2;
+                for (i = 2 * 16 - 2; i >= 0; i--)
+                    filtered_top[i] = (top[i + 1] + 2 * top[i] +
+                                       top[i - 1] + 2) >> 2;
+                left = filtered_left;
+                top = filtered_top;
+            }
+        }
+    }
+
+    switch (mode) {
+    case INTRA_PLANAR:
+        s->hpc.pred_planar[4 - 2] ((uint8_t *) src, (uint8_t *) top,
+                                   (uint8_t *) left, stride);
+        break;
+    case INTRA_DC:
+        s->hpc.pred_dc((uint8_t *) src, (uint8_t *) top,
+                       (uint8_t *) left, stride, 4, c_idx);
+        break;
+    default:
+        s->hpc.pred_angular[4 - 2] ((uint8_t *) src, (uint8_t *) top,
+                                    (uint8_t *) left, stride, c_idx, mode);
+        break;
+    }
+}
+
+void ff_intra_pred_8_32x32_msa(HEVCContext *s, int x0, int y0, int c_idx)
+{
+    v16u8 vec0, vec1;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v8i16 res0, res1, res2, res3;
+    v8i16 mul_val0 = { 63, 62, 61, 60, 59, 58, 57, 56 };
+    v8i16 mul_val1 = { 1, 2, 3, 4, 5, 6, 7, 8 };
+    HEVCLocalContext *lc = s->HEVClc;
+    int i;
+    int hshift = s->ps.sps->hshift[c_idx];
+    int vshift = s->ps.sps->vshift[c_idx];
+    int size_in_luma_h = 32 << hshift;
+    int size_in_tbs_h = size_in_luma_h >> s->ps.sps->log2_min_tb_size;
+    int size_in_luma_v = 32 << vshift;
+    int size_in_tbs_v = size_in_luma_v >> s->ps.sps->log2_min_tb_size;
+    int x = x0 >> hshift;
+    int y = y0 >> vshift;
+    int x_tb = (x0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
+    int y_tb = (y0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
+
+    int cur_tb_addr =
+        s->ps.pps->min_tb_addr_zs[(y_tb) * (s->ps.sps->tb_mask + 2) + (x_tb)];
+
+    ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(uint8_t);
+    uint8_t *src = (uint8_t *) s->frame->data[c_idx] + x + y * stride;
+
+    int min_pu_width = s->ps.sps->min_pu_width;
+
+    enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c :
+        lc->tu.intra_pred_mode;
+    uint32_t a;
+    uint8_t left_array[2 * 32 + 1];
+    uint8_t filtered_left_array[2 * 32 + 1];
+    uint8_t top_array[2 * 32 + 1];
+    uint8_t filtered_top_array[2 * 32 + 1];
+
+    uint8_t *left = left_array + 1;
+    uint8_t *top = top_array + 1;
+    uint8_t *filtered_left = filtered_left_array + 1;
+    uint8_t *filtered_top = filtered_top_array + 1;
+    int cand_bottom_left = lc->na.cand_bottom_left
+        && cur_tb_addr >
+        s->ps.pps->min_tb_addr_zs[((y_tb + size_in_tbs_v) & s->ps.sps->tb_mask) *
+                               (s->ps.sps->tb_mask + 2) + (x_tb - 1)];
+    int cand_left = lc->na.cand_left;
+    int cand_up_left = lc->na.cand_up_left;
+    int cand_up = lc->na.cand_up;
+    int cand_up_right = lc->na.cand_up_right
+        && cur_tb_addr >
+        s->ps.pps->min_tb_addr_zs[(y_tb - 1) * (s->ps.sps->tb_mask + 2) +
+                               ((x_tb + size_in_tbs_h) & s->ps.sps->tb_mask)];
+
+    int bottom_left_size =
+        (((y0 + 2 * size_in_luma_v) >
+          (s->ps.sps->height) ? (s->ps.sps->height) : (y0 +
+                                                 2 * size_in_luma_v)) -
+         (y0 + size_in_luma_v)) >> vshift;
+    int top_right_size =
+        (((x0 + 2 * size_in_luma_h) >
+          (s->ps.sps->width) ? (s->ps.sps->width) : (x0 + 2 * size_in_luma_h)) -
+         (x0 + size_in_luma_h)) >> hshift;
+
+    if (s->ps.pps->constrained_intra_pred_flag == 1) {
+        int size_in_luma_pu_v = ((size_in_luma_v) >> s->ps.sps->log2_min_pu_size);
+        int size_in_luma_pu_h = ((size_in_luma_h) >> s->ps.sps->log2_min_pu_size);
+        int on_pu_edge_x = !(x0 & ((1 << s->ps.sps->log2_min_pu_size) - 1));
+        int on_pu_edge_y = !(y0 & ((1 << s->ps.sps->log2_min_pu_size) - 1));
+        if (!size_in_luma_pu_h)
+            size_in_luma_pu_h++;
+        if (cand_bottom_left == 1 && on_pu_edge_x) {
+            int x_left_pu = ((x0 - 1) >> s->ps.sps->log2_min_pu_size);
+            int y_bottom_pu =
+                ((y0 + size_in_luma_v) >> s->ps.sps->log2_min_pu_size);
+            int max =
+                ((size_in_luma_pu_v) >
+                 (s->ps.sps->min_pu_height -
+                  y_bottom_pu) ? (s->ps.sps->min_pu_height -
+                                  y_bottom_pu) : (size_in_luma_pu_v));
+            cand_bottom_left = 0;
+            for (i = 0; i < max; i += 2)
+                cand_bottom_left |=
+                    ((s->ref->tab_mvf[(x_left_pu) +
+                                      (y_bottom_pu +
+                                       i) * min_pu_width]).pred_flag ==
+                     PF_INTRA);
+        }
+        if (cand_left == 1 && on_pu_edge_x) {
+            int x_left_pu = ((x0 - 1) >> s->ps.sps->log2_min_pu_size);
+            int y_left_pu = ((y0) >> s->ps.sps->log2_min_pu_size);
+            int max =
+                ((size_in_luma_pu_v) >
+                 (s->ps.sps->min_pu_height -
+                  y_left_pu) ? (s->ps.sps->min_pu_height -
+                                y_left_pu) : (size_in_luma_pu_v));
+            cand_left = 0;
+            for (i = 0; i < max; i += 2)
+                cand_left |=
+                    ((s->ref->tab_mvf[(x_left_pu) +
+                                      (y_left_pu +
+                                       i) * min_pu_width]).pred_flag ==
+                     PF_INTRA);
+        }
+        if (cand_up_left == 1) {
+            int x_left_pu = ((x0 - 1) >> s->ps.sps->log2_min_pu_size);
+            int y_top_pu = ((y0 - 1) >> s->ps.sps->log2_min_pu_size);
+            cand_up_left =
+                (s->ref->tab_mvf[(x_left_pu) +
+                                 (y_top_pu) * min_pu_width]).pred_flag ==
+                PF_INTRA;
+        }
+        if (cand_up == 1 && on_pu_edge_y) {
+            int x_top_pu = ((x0) >> s->ps.sps->log2_min_pu_size);
+            int y_top_pu = ((y0 - 1) >> s->ps.sps->log2_min_pu_size);
+            int max =
+                ((size_in_luma_pu_h) >
+                 (s->ps.sps->min_pu_width -
+                  x_top_pu) ? (s->ps.sps->min_pu_width -
+                               x_top_pu) : (size_in_luma_pu_h));
+            cand_up = 0;
+            for (i = 0; i < max; i += 2)
+                cand_up |=
+                    ((s->ref->tab_mvf[(x_top_pu + i) +
+                                      (y_top_pu) *
+                                      min_pu_width]).pred_flag == PF_INTRA);
+        }
+        if (cand_up_right == 1 && on_pu_edge_y) {
+            int y_top_pu = ((y0 - 1) >> s->ps.sps->log2_min_pu_size);
+            int x_right_pu =
+                ((x0 + size_in_luma_h) >> s->ps.sps->log2_min_pu_size);
+            int max =
+                ((size_in_luma_pu_h) >
+                 (s->ps.sps->min_pu_width -
+                  x_right_pu) ? (s->ps.sps->min_pu_width -
+                                 x_right_pu) : (size_in_luma_pu_h));
+            cand_up_right = 0;
+            for (i = 0; i < max; i += 2)
+                cand_up_right |=
+                    ((s->ref->tab_mvf[(x_right_pu + i) +
+                                      (y_top_pu) *
+                                      min_pu_width]).pred_flag == PF_INTRA);
+        }
+        vec0 = (v16u8) __msa_ldi_b(128);
+
+        ST_UB4(vec0, vec0, vec0, vec0, left, 16);
+        ST_UB4(vec0, vec0, vec0, vec0, top, 16);
+
+        top[-1] = 128;
+    }
+    if (cand_up_left) {
+        left[-1] = src[(-1) + stride * (-1)];
+        top[-1] = left[-1];
+    }
+    if (cand_up) {
+        LD_UB2(src - stride, 16, vec0, vec1);
+        ST_UB2(vec0, vec1, top, 16);
+    }
+
+    if (cand_up_right) {
+        LD_UB2(src - stride + 32, 16, vec0, vec1);
+        ST_UB2(vec0, vec1, (top + 32), 16);
+        do {
+            uint32_t pix =
+                ((src[(32 + top_right_size - 1) + stride * (-1)]) *
+                 0x01010101U);
+            for (i = 0; i < (32 - top_right_size); i += 4)
+                ((((union unaligned_32 *) (top + 32 + top_right_size +
+                                           i))->l) = (pix));
+        } while (0);
+    }
+    if (cand_left)
+        for (i = 0; i < 32; i++)
+            left[i] = src[(-1) + stride * (i)];
+    if (cand_bottom_left) {
+        for (i = 32; i < 32 + bottom_left_size; i++)
+            left[i] = src[(-1) + stride * (i)];
+        do {
+            uint32_t pix =
+                ((src[(-1) + stride * (32 + bottom_left_size - 1)]) *
+                 0x01010101U);
+            for (i = 0; i < (32 - bottom_left_size); i += 4)
+                ((((union unaligned_32 *) (left + 32 + bottom_left_size +
+                                           i))->l) = (pix));
+        } while (0);
+    }
+
+    if (s->ps.pps->constrained_intra_pred_flag == 1) {
+        if (cand_bottom_left || cand_left || cand_up_left || cand_up
+            || cand_up_right) {
+            int size_max_x =
+                x0 + ((2 * 32) << hshift) <
+                s->ps.sps->width ? 2 * 32 : (s->ps.sps->width - x0) >> hshift;
+            int size_max_y =
+                y0 + ((2 * 32) << vshift) <
+                s->ps.sps->height ? 2 * 32 : (s->ps.sps->height - y0) >> vshift;
+            int j = 32 + (cand_bottom_left ? bottom_left_size : 0) - 1;
+            if (!cand_up_right) {
+                size_max_x = x0 + ((32) << hshift) < s->ps.sps->width ?
+                    32 : (s->ps.sps->width - x0) >> hshift;
+            }
+            if (!cand_bottom_left) {
+                size_max_y = y0 + ((32) << vshift) < s->ps.sps->height ?
+                    32 : (s->ps.sps->height - y0) >> vshift;
+            }
+            if (cand_bottom_left || cand_left || cand_up_left) {
+                while (j > -1
+                       &&
+                       !((s->ref->tab_mvf[(((x0 +
+                                             ((-1) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 +
+                                                                    ((j) <<
+                                                                     vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                    j--;
+                if (!
+                    ((s->ref->tab_mvf[(((x0 +
+                                         ((-1) << hshift)) >> s->ps.sps->
+                                        log2_min_pu_size)) + (((y0 + ((j)
+                                                                      <<
+                                                                      vshift))
+                                                               >> s->ps.sps->
+                                                               log2_min_pu_size))
+                                      * min_pu_width]).pred_flag == PF_INTRA)) {
+                    j = 0;
+                    while (j < size_max_x
+                           &&
+                           !((s->ref->tab_mvf[(((x0 +
+                                                 ((j) << hshift)) >> s->ps.sps->
+                                                log2_min_pu_size)) + (((y0 +
+                                                                        ((-1) <<
+                                                                         vshift))
+                                                                       >> s->
+                                                                       ps.sps->
+                                                                       log2_min_pu_size))
+                                              * min_pu_width]).pred_flag ==
+                             PF_INTRA))
+                        j++;
+                    for (i = j; i > (j) - (j + 1); i--)
+                        if (!
+                            ((s->ref->tab_mvf[(((x0 +
+                                                 ((i -
+                                                   1) << hshift)) >> s->ps.sps->
+                                                log2_min_pu_size)) + (((y0 +
+                                                                        ((-1) <<
+                                                                         vshift))
+                                                                       >> s->
+                                                                       ps.sps->
+                                                                       log2_min_pu_size))
+                                              * min_pu_width]).pred_flag ==
+                             PF_INTRA))
+                            top[i - 1] = top[i];
+                    left[-1] = top[-1];
+                }
+            } else {
+                j = 0;
+                while (j < size_max_x
+                       &&
+                       !((s->ref->tab_mvf[(((x0 +
+                                             ((j) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 + ((-1)
+                                                                          <<
+                                                                          vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                    j++;
+                if (j > 0)
+                    if (x0 > 0) {
+                        for (i = j; i > (j) - (j + 1); i--)
+                            if (!
+                                ((s->ref->tab_mvf[(((x0 +
+                                                     ((i -
+                                                       1) << hshift)) >>
+                                                    s->ps.sps->log2_min_pu_size))
+                                                  + (((y0 + ((-1)
+                                                             << vshift))
+                                                      >>
+                                                      s->ps.sps->log2_min_pu_size))
+                                                  *
+                                                  min_pu_width]).pred_flag ==
+                                 PF_INTRA))
+                                top[i - 1] = top[i];
+                    } else {
+                        for (i = j; i > (j) - (j); i--)
+                            if (!
+                                ((s->ref->tab_mvf[(((x0 +
+                                                     ((i -
+                                                       1) << hshift)) >>
+                                                    s->ps.sps->log2_min_pu_size))
+                                                  + (((y0 + ((-1)
+                                                             << vshift))
+                                                      >>
+                                                      s->ps.sps->log2_min_pu_size))
+                                                  *
+                                                  min_pu_width]).pred_flag ==
+                                 PF_INTRA))
+                                top[i - 1] = top[i];
+                        top[-1] = top[0];
+                    }
+                left[-1] = top[-1];
+            }
+            left[-1] = top[-1];
+            if (cand_bottom_left || cand_left) {
+                a = ((left[-1]) * 0x01010101U);
+                for (i = 0; i < (0) + (size_max_y); i += 4)
+                    if (!
+                        ((s->ref->tab_mvf[(((x0 +
+                                             ((-1) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 +
+                                                                    ((i) <<
+                                                                     vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                        ((((union unaligned_32 *) (&left[i]))->l) = (a));
+                    else
+                        a = ((left[i + 3]) * 0x01010101U);
+            }
+            if (!cand_left) {
+                vec0 = (v16u8) __msa_fill_b(left[-1]);
+
+                ST_UB2(vec0, vec0, left, 16);
+            }
+            if (!cand_bottom_left) {
+                vec0 = (v16u8) __msa_fill_b(left[31]);
+
+                ST_UB2(vec0, vec0, (left + 32), 16);
+            }
+            if (x0 != 0 && y0 != 0) {
+                a = ((left[size_max_y - 1]) * 0x01010101U);
+                for (i = (size_max_y - 1);
+                     i > (size_max_y - 1) - (size_max_y); i -= 4)
+                    if (!
+                        ((s->ref->tab_mvf[(((x0 +
+                                             ((-1) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 +
+                                                                    ((i -
+                                                                      3) <<
+                                                                     vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                        ((((union unaligned_32 *) (&left[i - 3]))->l) = (a));
+                    else
+                        a = ((left[i - 3]) * 0x01010101U);
+                if (!
+                    ((s->ref->tab_mvf[(((x0 +
+                                         ((-1) << hshift)) >> s->ps.sps->
+                                        log2_min_pu_size)) + (((y0 + ((-1)
+                                                                      <<
+                                                                      vshift))
+                                                               >> s->ps.sps->
+                                                               log2_min_pu_size))
+                                      * min_pu_width]).pred_flag == PF_INTRA))
+                    left[-1] = left[0];
+            } else if (x0 == 0) {
+                do {
+                    uint32_t pix = ((0) * 0x01010101U);
+                    for (i = 0; i < (size_max_y); i += 4)
+                        ((((union unaligned_32 *) (left + i))->l) = (pix));
+                } while (0);
+            } else {
+                a = ((left[size_max_y - 1]) * 0x01010101U);
+                for (i = (size_max_y - 1);
+                     i > (size_max_y - 1) - (size_max_y); i -= 4)
+                    if (!
+                        ((s->ref->tab_mvf[(((x0 +
+                                             ((-1) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 +
+                                                                    ((i -
+                                                                      3) <<
+                                                                     vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                        ((((union unaligned_32 *) (&left[i - 3]))->l) = (a));
+                    else
+                        a = ((left[i - 3]) * 0x01010101U);
+            }
+            top[-1] = left[-1];
+            if (y0 != 0) {
+                a = ((left[-1]) * 0x01010101U);
+                for (i = 0; i < (0) + (size_max_x); i += 4)
+                    if (!
+                        ((s->ref->tab_mvf[(((x0 +
+                                             ((i) << hshift)) >> s->ps.sps->
+                                            log2_min_pu_size)) + (((y0 + ((-1)
+                                                                          <<
+                                                                          vshift))
+                                                                   >> s->ps.sps->
+                                                                   log2_min_pu_size))
+                                          * min_pu_width]).pred_flag ==
+                         PF_INTRA))
+                        ((((union unaligned_32 *) (&top[i]))->l) = (a));
+                    else
+                        a = ((top[i + 3]) * 0x01010101U);
+            }
+        }
+    }
+
+    if (!cand_bottom_left) {
+        if (cand_left) {
+            vec0 = (v16u8) __msa_fill_b(left[31]);
+
+            ST_UB2(vec0, vec0, (left + 32), 16);
+        } else if (cand_up_left) {
+            vec0 = (v16u8) __msa_fill_b(left[-1]);
+
+            ST_UB4(vec0, vec0, vec0, vec0, left, 16);
+
+            cand_left = 1;
+        } else if (cand_up) {
+            left[-1] = top[0];
+
+            vec0 = (v16u8) __msa_fill_b(left[-1]);
+
+            ST_UB4(vec0, vec0, vec0, vec0, left, 16);
+
+            cand_up_left = 1;
+            cand_left = 1;
+        } else if (cand_up_right) {
+            vec0 = (v16u8) __msa_fill_b(top[32]);
+
+            ST_UB2(vec0, vec0, top, 16);
+
+            left[-1] = top[32];
+
+            ST_UB4(vec0, vec0, vec0, vec0, left, 16);
+
+            cand_up = 1;
+            cand_up_left = 1;
+            cand_left = 1;
+        } else {
+            left[-1] = 128;
+
+            vec0 = (v16u8) __msa_ldi_b(128);
+
+            ST_UB4(vec0, vec0, vec0, vec0, top, 16);
+            ST_UB4(vec0, vec0, vec0, vec0, left, 16);
+        }
+    }
+
+    if (!cand_left) {
+        vec0 = (v16u8) __msa_fill_b(left[32]);
+
+        ST_UB2(vec0, vec0, left, 16);
+    }
+    if (!cand_up_left) {
+        left[-1] = left[0];
+    }
+    if (!cand_up) {
+        vec0 = (v16u8) __msa_fill_b(left[-1]);
+
+        ST_UB2(vec0, vec0, top, 16);
+    }
+    if (!cand_up_right) {
+        vec0 = (v16u8) __msa_fill_b(top[31]);
+
+        ST_UB2(vec0, vec0, (top + 32), 16);
+    }
+
+    top[-1] = left[-1];
+
+
+    if (!s->ps.sps->intra_smoothing_disabled_flag
+        && (c_idx == 0 || s->ps.sps->chroma_format_idc == 3)) {
+        if (mode != INTRA_DC && 32 != 4) {
+            int intra_hor_ver_dist_thresh[] = { 7, 1, 0 };
+            int min_dist_vert_hor =
+                (((((int) (mode - 26U)) >=
+                   0 ? ((int) (mode - 26U)) : (-((int) (mode - 26U))))) >
+                 ((((int) (mode - 10U)) >=
+                   0 ? ((int) (mode - 10U)) : (-((int) (mode - 10U)))))
+                 ? ((((int) (mode - 10U)) >=
+                     0 ? ((int) (mode - 10U)) : (-((int) (mode - 10U)))))
+                 : ((((int) (mode - 26U)) >=
+                     0 ? ((int) (mode - 26U)) : (-((int) (mode - 26U))))));
+            if (min_dist_vert_hor > intra_hor_ver_dist_thresh[5 - 3]) {
+                int threshold = 1 << (8 - 5);
+                if (s->ps.sps->sps_strong_intra_smoothing_enable_flag
+                    && c_idx == 0
+                    && ((top[-1] + top[63] - 2 * top[31]) >=
+                        0 ? (top[-1] + top[63] -
+                             2 * top[31]) : (-(top[-1] + top[63] -
+                                               2 * top[31]))) < threshold
+                    && ((left[-1] + left[63] - 2 * left[31]) >=
+                        0 ? (left[-1] + left[63] -
+                             2 * left[31]) : (-(left[-1] + left[63] -
+                                                2 * left[31]))) < threshold) {
+
+
+                    filtered_top[-1] = top[-1];
+                    filtered_top[63] = top[63];
+
+
+                    for (i = 0; i < 63; i++) {
+                        filtered_top[i] =
+                            ((63 - i) * top[-1] + (i + 1) * top[63] + 32) >> 6;
+                    }
+
+                    tmp0 = __msa_fill_h(top[-1]);
+                    tmp1 = __msa_fill_h(top[63]);
+
+                    tmp2 = mul_val0 - 8;
+                    tmp3 = mul_val0 - 16;
+                    tmp4 = mul_val0 - 24;
+                    tmp5 = mul_val1 + 8;
+                    tmp6 = mul_val1 + 16;
+                    tmp7 = mul_val1 + 24;
+
+                    res0 = mul_val0 * tmp0;
+                    res1 = tmp2 * tmp0;
+                    res2 = tmp3 * tmp0;
+                    res3 = tmp4 * tmp0;
+                    res0 += mul_val1 * tmp1;
+                    res1 += tmp5 * tmp1;
+                    res2 += tmp6 * tmp1;
+                    res3 += tmp7 * tmp1;
+
+                    res0 = __msa_srari_h(res0, 6);
+                    res1 = __msa_srari_h(res1, 6);
+                    res2 = __msa_srari_h(res2, 6);
+                    res3 = __msa_srari_h(res3, 6);
+
+                    vec0 = (v16u8) __msa_pckev_b((v16i8) res1, (v16i8) res0);
+                    vec1 = (v16u8) __msa_pckev_b((v16i8) res3, (v16i8) res2);
+
+                    ST_UB2(vec0, vec1, filtered_top, 16);
+
+                    res0 = mul_val0 - 32;
+                    tmp2 = mul_val0 - 40;
+                    tmp3 = mul_val0 - 48;
+                    tmp4 = mul_val0 - 56;
+                    res3 = mul_val1 + 32;
+                    tmp5 = mul_val1 + 40;
+                    tmp6 = mul_val1 + 48;
+                    tmp7 = mul_val1 + 56;
+
+                    res0 = res0 * tmp0;
+                    res1 = tmp2 * tmp0;
+                    res2 = tmp3 * tmp0;
+                    res0 += res3 * tmp1;
+                    res3 = tmp4 * tmp0;
+                    res1 += tmp5 * tmp1;
+                    res2 += tmp6 * tmp1;
+                    res3 += tmp7 * tmp1;
+
+                    res0 = __msa_srari_h(res0, 6);
+                    res1 = __msa_srari_h(res1, 6);
+                    res2 = __msa_srari_h(res2, 6);
+                    res3 = __msa_srari_h(res3, 6);
+
+                    vec0 = (v16u8) __msa_pckev_b((v16i8) res1, (v16i8) res0);
+                    vec1 = (v16u8) __msa_pckev_b((v16i8) res3, (v16i8) res2);
+
+                    ST_UB2(vec0, vec1, (filtered_top + 32), 16);
+
+                    filtered_top[63] = top[63];
+
+                    tmp0 = __msa_fill_h(left[-1]);
+                    tmp1 = __msa_fill_h(left[63]);
+
+                    tmp2 = mul_val0 - 8;
+                    tmp3 = mul_val0 - 16;
+                    tmp4 = mul_val0 - 24;
+                    tmp5 = mul_val1 + 8;
+                    tmp6 = mul_val1 + 16;
+                    tmp7 = mul_val1 + 24;
+
+                    res0 = mul_val0 * tmp0;
+                    res1 = tmp2 * tmp0;
+                    res2 = tmp3 * tmp0;
+                    res3 = tmp4 * tmp0;
+                    res0 += mul_val1 * tmp1;
+                    res1 += tmp5 * tmp1;
+                    res2 += tmp6 * tmp1;
+                    res3 += tmp7 * tmp1;
+
+                    res0 = __msa_srari_h(res0, 6);
+                    res1 = __msa_srari_h(res1, 6);
+                    res2 = __msa_srari_h(res2, 6);
+                    res3 = __msa_srari_h(res3, 6);
+
+                    vec0 = (v16u8) __msa_pckev_b((v16i8) res1, (v16i8) res0);
+                    vec1 = (v16u8) __msa_pckev_b((v16i8) res3, (v16i8) res2);
+
+                    ST_UB2(vec0, vec1, left, 16);
+
+                    res0 = mul_val0 - 32;
+                    tmp2 = mul_val0 - 40;
+                    tmp3 = mul_val0 - 48;
+                    tmp4 = mul_val0 - 56;
+                    res3 = mul_val1 + 32;
+                    tmp5 = mul_val1 + 40;
+                    tmp6 = mul_val1 + 48;
+                    tmp7 = mul_val1 + 56;
+
+                    res0 = res0 * tmp0;
+                    res1 = tmp2 * tmp0;
+                    res2 = tmp3 * tmp0;
+                    res0 += res3 * tmp1;
+                    res3 = tmp4 * tmp0;
+                    res1 += tmp5 * tmp1;
+                    res2 += tmp6 * tmp1;
+                    res3 += tmp7 * tmp1;
+
+                    res0 = __msa_srari_h(res0, 6);
+                    res1 = __msa_srari_h(res1, 6);
+                    res2 = __msa_srari_h(res2, 6);
+                    res3 = __msa_srari_h(res3, 6);
+
+                    vec0 = (v16u8) __msa_pckev_b((v16i8) res1, (v16i8) res0);
+                    vec1 = (v16u8) __msa_pckev_b((v16i8) res3, (v16i8) res2);
+
+                    ST_UB2(vec0, vec1, (left + 32), 16);
+
+                    left[63] = tmp1[0];
+
+                    top = filtered_top;
+                } else {
+                    filtered_left[2 * 32 - 1] = left[2 * 32 - 1];
+                    filtered_top[2 * 32 - 1] = top[2 * 32 - 1];
+                    for (i = 2 * 32 - 2; i >= 0; i--)
+                        filtered_left[i] = (left[i + 1] + 2 * left[i] +
+                                            left[i - 1] + 2) >> 2;
+                    filtered_top[-1] =
+                        filtered_left[-1] =
+                        (left[0] + 2 * left[-1] + top[0] + 2) >> 2;
+                    for (i = 2 * 32 - 2; i >= 0; i--)
+                        filtered_top[i] = (top[i + 1] + 2 * top[i] +
+                                           top[i - 1] + 2) >> 2;
+                    left = filtered_left;
+                    top = filtered_top;
+                }
+            }
+        }
+    }
+
+    switch (mode) {
+    case INTRA_PLANAR:
+        s->hpc.pred_planar[3] ((uint8_t *) src, (uint8_t *) top,
+                               (uint8_t *) left, stride);
+        break;
+    case INTRA_DC:
+        s->hpc.pred_dc((uint8_t *) src, (uint8_t *) top,
+                       (uint8_t *) left, stride, 5, c_idx);
+        break;
+    default:
+        s->hpc.pred_angular[3] ((uint8_t *) src, (uint8_t *) top,
+                                (uint8_t *) left, stride, c_idx, mode);
+        break;
+    }
+}
diff --git a/libavcodec/mips/hpeldsp_init_mips.c b/libavcodec/mips/hpeldsp_init_mips.c
new file mode 100644
index 00000000..82f23109
--- /dev/null
+++ b/libavcodec/mips/hpeldsp_init_mips.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../hpeldsp.h"
+#include "libavcodec/mips/hpeldsp_mips.h"
+
+#if HAVE_MSA
+static void ff_hpeldsp_init_msa(HpelDSPContext *c, int flags)
+{
+    c->put_pixels_tab[0][0] = ff_put_pixels16_msa;
+    c->put_pixels_tab[0][1] = ff_put_pixels16_x2_msa;
+    c->put_pixels_tab[0][2] = ff_put_pixels16_y2_msa;
+    c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_msa;
+
+    c->put_pixels_tab[1][0] = ff_put_pixels8_msa;
+    c->put_pixels_tab[1][1] = ff_put_pixels8_x2_msa;
+    c->put_pixels_tab[1][2] = ff_put_pixels8_y2_msa;
+    c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_msa;
+
+    c->put_pixels_tab[2][1] = ff_put_pixels4_x2_msa;
+    c->put_pixels_tab[2][2] = ff_put_pixels4_y2_msa;
+    c->put_pixels_tab[2][3] = ff_put_pixels4_xy2_msa;
+
+    c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_msa;
+    c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_msa;
+    c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_msa;
+    c->put_no_rnd_pixels_tab[0][3] = ff_put_no_rnd_pixels16_xy2_msa;
+
+    c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_msa;
+    c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_msa;
+    c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_msa;
+    c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_msa;
+
+    c->avg_pixels_tab[0][0] = ff_avg_pixels16_msa;
+    c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_msa;
+    c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_msa;
+    c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_msa;
+
+    c->avg_pixels_tab[1][0] = ff_avg_pixels8_msa;
+    c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_msa;
+    c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_msa;
+    c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_msa;
+
+    c->avg_pixels_tab[2][0] = ff_avg_pixels4_msa;
+    c->avg_pixels_tab[2][1] = ff_avg_pixels4_x2_msa;
+    c->avg_pixels_tab[2][2] = ff_avg_pixels4_y2_msa;
+    c->avg_pixels_tab[2][3] = ff_avg_pixels4_xy2_msa;
+}
+#endif  // #if HAVE_MSA
+
+void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags)
+{
+#if HAVE_MSA
+    ff_hpeldsp_init_msa(c, flags);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/hpeldsp_mips.h b/libavcodec/mips/hpeldsp_mips.h
new file mode 100644
index 00000000..f4ab53eb
--- /dev/null
+++ b/libavcodec/mips/hpeldsp_mips.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_HPELDSP_MIPS_H
+#define AVCODEC_MIPS_HPELDSP_MIPS_H
+
+#include "libavcodec/bit_depth_template.c"
+
+void ff_put_pixels16_msa(uint8_t *block, const uint8_t *pixels,
+                         ptrdiff_t line_size, int32_t h);
+void ff_put_pixels16_x2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int32_t h);
+void ff_put_pixels16_y2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int32_t h);
+void ff_put_pixels16_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                             ptrdiff_t line_size, int32_t h);
+void ff_put_pixels8_msa(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int32_t h);
+void ff_put_pixels8_x2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int32_t h);
+void ff_put_pixels8_y2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int32_t h);
+void ff_put_pixels8_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int32_t h);
+void ff_put_pixels4_msa(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int32_t h);
+void ff_put_pixels4_x2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int32_t h);
+void ff_put_pixels4_y2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int32_t h);
+void ff_put_pixels4_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int32_t h);
+void ff_put_no_rnd_pixels16_x2_msa(uint8_t *block, const uint8_t *pixels,
+                                   ptrdiff_t line_size, int32_t h);
+void ff_put_no_rnd_pixels16_y2_msa(uint8_t *block, const uint8_t *pixels,
+                                   ptrdiff_t line_size, int32_t h);
+void ff_put_no_rnd_pixels16_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                                    ptrdiff_t line_size, int32_t h);
+void ff_put_no_rnd_pixels8_x2_msa(uint8_t *block, const uint8_t *pixels,
+                                  ptrdiff_t line_size, int32_t h);
+void ff_put_no_rnd_pixels8_y2_msa(uint8_t *block, const uint8_t *pixels,
+                                  ptrdiff_t line_size, int32_t h);
+void ff_put_no_rnd_pixels8_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                                   ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels16_msa(uint8_t *block, const uint8_t *pixels,
+                         ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels16_x2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels16_y2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels16_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                             ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels8_msa(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels8_x2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels8_y2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels8_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels4_msa(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels4_x2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels4_y2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int32_t h);
+void ff_avg_pixels4_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int32_t h);
+
+#endif  // #ifndef AVCODEC_MIPS_HPELDSP_MIPS_H
diff --git a/libavcodec/mips/hpeldsp_msa.c b/libavcodec/mips/hpeldsp_msa.c
new file mode 100644
index 00000000..40a0dca0
--- /dev/null
+++ b/libavcodec/mips/hpeldsp_msa.c
@@ -0,0 +1,1498 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "libavcodec/mips/hpeldsp_mips.h"
+
+#define PCKEV_AVG_ST_UB(in0, in1, dst, pdst)                  \
+{                                                             \
+    v16u8 tmp_m;                                              \
+                                                              \
+    tmp_m = (v16u8) __msa_pckev_b((v16i8) in0, (v16i8) in1);  \
+    tmp_m = __msa_aver_u_b(tmp_m, (v16u8) dst);               \
+    ST_UB(tmp_m, (pdst));                                     \
+}
+
+#define PCKEV_ST_SB4(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride)  \
+{                                                                           \
+    v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                   \
+    uint8_t *pdst_m = (uint8_t *) (pdst);                                   \
+                                                                            \
+    PCKEV_B4_SB(in0, in1, in2, in3, in4, in5, in6, in7,                     \
+                tmp0_m, tmp1_m, tmp2_m, tmp3_m);                            \
+    ST_SB4(tmp0_m, tmp1_m, tmp2_m, tmp3_m, pdst_m, stride);                 \
+}
+
+#define PCKEV_AVG_ST8x4_UB(in1, dst0, in2, dst1, in3, dst2, in4, dst3,  \
+                           pdst, stride)                                \
+{                                                                       \
+    v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                               \
+    uint8_t *pdst_m = (uint8_t *) (pdst);                               \
+                                                                        \
+    PCKEV_B2_UB(in2, in1, in4, in3, tmp0_m, tmp1_m);                    \
+    PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m);                \
+    AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m);        \
+    ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride);                           \
+}
+
+static void common_hz_bil_4w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint8_t height)
+{
+    uint8_t loop_cnt;
+    uint32_t out0, out1;
+    v16u8 src0, src1, src0_sld1, src1_sld1, res0, res1;
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_UB2(src, src_stride, src0, src1);
+        src += (2 * src_stride);
+
+        SLDI_B2_0_UB(src0, src1, src0_sld1, src1_sld1, 1);
+        AVER_UB2_UB(src0_sld1, src0, src1_sld1, src1, res0, res1);
+
+        out0 = __msa_copy_u_w((v4i32) res0, 0);
+        out1 = __msa_copy_u_w((v4i32) res1, 0);
+        SW(out0, dst);
+        dst += dst_stride;
+        SW(out1, dst);
+        dst += dst_stride;
+    }
+}
+
+static void common_hz_bil_8w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src0_sld1, src1_sld1, src2_sld1, src3_sld1;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        SLDI_B4_0_SB(src0, src1, src2, src3,
+                     src0_sld1, src1_sld1, src2_sld1, src3_sld1, 1);
+        AVER_ST8x4_UB(src0, src0_sld1, src1, src1_sld1,
+                      src2, src2_sld1, src3, src3_sld1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void common_hz_bil_16w_msa(const uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
+
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+        LD_UB8((src + 1), src_stride,
+               src8, src9, src10, src11, src12, src13, src14, src15);
+        src += (8 * src_stride);
+
+        AVER_ST16x4_UB(src0, src8, src1, src9, src2, src10, src3, src11,
+                       dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        AVER_ST16x4_UB(src4, src12, src5, src13, src6, src14, src7, src15,
+                       dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void common_hz_bil_no_rnd_8x8_msa(const uint8_t *src, int32_t src_stride,
+                                         uint8_t *dst, int32_t dst_stride)
+{
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 src0_sld1, src1_sld1, src2_sld1, src3_sld1;
+    v16i8 src4_sld1, src5_sld1, src6_sld1, src7_sld1;
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+
+    SLDI_B4_0_SB(src0, src1, src2, src3,
+                 src0_sld1, src1_sld1, src2_sld1, src3_sld1, 1);
+    SLDI_B4_0_SB(src4, src5, src6, src7,
+                 src4_sld1, src5_sld1, src6_sld1, src7_sld1, 1);
+
+    AVE_ST8x4_UB(src0, src0_sld1, src1, src1_sld1,
+                 src2, src2_sld1, src3, src3_sld1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    AVE_ST8x4_UB(src4, src4_sld1, src5, src5_sld1,
+                 src6, src6_sld1, src7, src7_sld1, dst, dst_stride);
+}
+
+static void common_hz_bil_no_rnd_4x8_msa(const uint8_t *src, int32_t src_stride,
+                                         uint8_t *dst, int32_t dst_stride)
+{
+    v16i8 src0, src1, src2, src3, src0_sld1, src1_sld1, src2_sld1, src3_sld1;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    SLDI_B4_0_SB(src0, src1, src2, src3,
+                 src0_sld1, src1_sld1, src2_sld1, src3_sld1, 1);
+    AVE_ST8x4_UB(src0, src0_sld1, src1, src1_sld1,
+                 src2, src2_sld1, src3, src3_sld1, dst, dst_stride);
+}
+
+static void common_hz_bil_no_rnd_16x16_msa(const uint8_t *src,
+                                           int32_t src_stride,
+                                           uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 src9, src10, src11, src12, src13, src14, src15;
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    LD_UB8((src + 1), src_stride,
+           src8, src9, src10, src11, src12, src13, src14, src15);
+    src += (8 * src_stride);
+
+    AVE_ST16x4_UB(src0, src8, src1, src9, src2, src10, src3, src11,
+                  dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    LD_UB4(src, src_stride, src0, src1, src2, src3);
+    LD_UB4((src + 1), src_stride, src8, src9, src10, src11);
+    src += (4 * src_stride);
+
+    AVE_ST16x4_UB(src4, src12, src5, src13, src6, src14, src7, src15,
+                  dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    LD_UB4(src, src_stride, src4, src5, src6, src7);
+    LD_UB4((src + 1), src_stride, src12, src13, src14, src15);
+    src += (4 * src_stride);
+
+    AVE_ST16x4_UB(src0, src8, src1, src9, src2, src10, src3, src11,
+                  dst, dst_stride);
+    dst += (4 * dst_stride);
+    AVE_ST16x4_UB(src4, src12, src5, src13, src6, src14, src7, src15,
+                  dst, dst_stride);
+}
+
+static void common_hz_bil_no_rnd_8x16_msa(const uint8_t *src,
+                                          int32_t src_stride,
+                                          uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 src9, src10, src11, src12, src13, src14, src15;
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    LD_UB8((src + 1), src_stride,
+           src8, src9, src10, src11, src12, src13, src14, src15);
+
+    AVE_ST16x4_UB(src0, src8, src1, src9, src2, src10, src3, src11,
+                  dst, dst_stride);
+    dst += (4 * dst_stride);
+    AVE_ST16x4_UB(src4, src12, src5, src13, src6, src14, src7, src15,
+                  dst, dst_stride);
+}
+
+static void common_hz_bil_and_aver_dst_4w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint8_t height)
+{
+    uint8_t loop_cnt;
+    uint32_t dst0, dst1, out0, out1;
+    v16u8 src0, src1, src0_sld1, src1_sld1, res0, res1;
+    v16u8 tmp0 = { 0 };
+    v16u8 tmp1 = { 0 };
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_UB2(src, src_stride, src0, src1);
+        src += (2 * src_stride);
+
+        SLDI_B2_0_UB(src0, src1, src0_sld1, src1_sld1, 1);
+
+        dst0 = LW(dst);
+        dst1 = LW(dst + dst_stride);
+        tmp0 = (v16u8) __msa_insert_w((v4i32) tmp0, 0, dst0);
+        tmp1 = (v16u8) __msa_insert_w((v4i32) tmp1, 0, dst1);
+
+        AVER_UB2_UB(src0_sld1, src0, src1_sld1, src1, res0, res1);
+        AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
+
+        out0 = __msa_copy_u_w((v4i32) res0, 0);
+        out1 = __msa_copy_u_w((v4i32) res1, 0);
+        SW(out0, dst);
+        dst += dst_stride;
+        SW(out1, dst);
+        dst += dst_stride;
+    }
+}
+
+static void common_hz_bil_and_aver_dst_8w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src0_sld1, src1_sld1, src2_sld1, src3_sld1;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        SLDI_B4_0_SB(src0, src1, src2, src3,
+                     src0_sld1, src1_sld1, src2_sld1, src3_sld1, 1);
+
+        AVER_DST_ST8x4_UB(src0, src0_sld1, src1, src1_sld1, src2, src2_sld1,
+                          src3, src3_sld1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void common_hz_bil_and_aver_dst_16w_msa(const uint8_t *src,
+                                               int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 src9, src10, src11, src12, src13, src14, src15;
+
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+        LD_UB8((src + 1), src_stride,
+               src8, src9, src10, src11, src12, src13, src14, src15);
+        src += (8 * src_stride);
+
+        AVER_DST_ST16x4_UB(src0, src8, src1, src9, src2, src10, src3, src11,
+                           dst, dst_stride);
+        dst += (4 * dst_stride);
+        AVER_DST_ST16x4_UB(src4, src12, src5, src13, src6, src14, src7, src15,
+                           dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void common_vt_bil_4w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint8_t height)
+{
+    uint8_t loop_cnt;
+    uint32_t out0, out1;
+    v16u8 src0, src1, src2, res0, res1;
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_UB2(src, src_stride, src1, src2);
+        src += (2 * src_stride);
+
+        AVER_UB2_UB(src0, src1, src1, src2, res0, res1);
+
+        out0 = __msa_copy_u_w((v4i32) res0, 0);
+        out1 = __msa_copy_u_w((v4i32) res1, 0);
+        SW(out0, dst);
+        dst += dst_stride;
+        SW(out1, dst);
+        dst += dst_stride;
+
+        src0 = src2;
+    }
+}
+
+static void common_vt_bil_8w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16u8 src0, src1, src2, src3, src4;
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        AVER_ST8x4_UB(src0, src1, src1, src2, src2, src3, src3, src4,
+                      dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src0 = src4;
+    }
+}
+
+static void common_vt_bil_16w_msa(const uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
+        src += (8 * src_stride);
+
+        AVER_ST16x4_UB(src0, src1, src1, src2, src2, src3, src3, src4,
+                       dst, dst_stride);
+        dst += (4 * dst_stride);
+        AVER_ST16x4_UB(src4, src5, src5, src6, src6, src7, src7, src8,
+                       dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src0 = src8;
+    }
+}
+
+static void common_vt_bil_no_rnd_8x8_msa(const uint8_t *src, int32_t src_stride,
+                                         uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+    src8 = LD_UB(src);
+
+    AVE_ST8x4_UB(src0, src1, src1, src2, src2, src3, src3, src4,
+                 dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    AVE_ST8x4_UB(src4, src5, src5, src6, src6, src7, src7, src8,
+                 dst, dst_stride);
+}
+
+static void common_vt_bil_no_rnd_4x8_msa(const uint8_t *src, int32_t src_stride,
+                                         uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 src0, src1, src2, src3, src4;
+
+    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+    AVE_ST8x4_UB(src0, src1, src1, src2, src2, src3, src3, src4,
+                 dst, dst_stride);
+}
+
+static void common_vt_bil_no_rnd_16x16_msa(const uint8_t *src,
+                                           int32_t src_stride,
+                                           uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 src9, src10, src11, src12, src13, src14, src15, src16;
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+    LD_UB8(src, src_stride,
+           src8, src9, src10, src11, src12, src13, src14, src15);
+    src += (8 * src_stride);
+    src16 = LD_UB(src);
+
+    AVE_ST16x4_UB(src0, src1, src1, src2, src2, src3, src3, src4,
+                  dst, dst_stride);
+    dst += (4 * dst_stride);
+    AVE_ST16x4_UB(src4, src5, src5, src6, src6, src7, src7, src8,
+                  dst, dst_stride);
+    dst += (4 * dst_stride);
+    AVE_ST16x4_UB(src8, src9, src9, src10, src10, src11, src11, src12,
+                  dst, dst_stride);
+    dst += (4 * dst_stride);
+    AVE_ST16x4_UB(src12, src13, src13, src14,
+                  src14, src15, src15, src16, dst, dst_stride);
+}
+
+static void common_vt_bil_no_rnd_8x16_msa(const uint8_t *src,
+                                          int32_t src_stride,
+                                          uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+    src8 = LD_UB(src);
+
+    AVE_ST16x4_UB(src0, src1, src1, src2, src2, src3, src3, src4,
+                  dst, dst_stride);
+    dst += (4 * dst_stride);
+    AVE_ST16x4_UB(src4, src5, src5, src6, src6, src7, src7, src8,
+                  dst, dst_stride);
+}
+
+static void common_vt_bil_and_aver_dst_4w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint8_t height)
+{
+    uint8_t loop_cnt;
+    uint32_t out0, out1, dst0, dst1;
+    v16u8 src0, src1, src2;
+    v16u8 tmp0 = { 0 };
+    v16u8 tmp1 = { 0 };
+    v16u8 res0, res1;
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_UB2(src, src_stride, src1, src2);
+        src += (2 * src_stride);
+        dst0 = LW(dst);
+        dst1 = LW(dst + dst_stride);
+        tmp0 = (v16u8) __msa_insert_w((v4i32) tmp0, 0, dst0);
+        tmp1 = (v16u8) __msa_insert_w((v4i32) tmp1, 0, dst1);
+        AVER_UB2_UB(src0, src1, src1, src2, res0, res1);
+        AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
+        out0 = __msa_copy_u_w((v4i32) res0, 0);
+        out1 = __msa_copy_u_w((v4i32) res1, 0);
+        SW(out0, dst);
+        dst += dst_stride;
+        SW(out1, dst);
+        dst += dst_stride;
+        src0 = src2;
+    }
+}
+
+static void common_vt_bil_and_aver_dst_8w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16u8 src0, src1, src2, src3, src4;
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        AVER_DST_ST8x4_UB(src0, src1, src1, src2, src2, src3, src3, src4,
+                          dst, dst_stride);
+        dst += (4 * dst_stride);
+        src0 = src4;
+    }
+}
+
+static void common_vt_bil_and_aver_dst_16w_msa(const uint8_t *src,
+                                               int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 res0, res1, res2, res3, res4, res5, res6, res7;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
+        src += (8 * src_stride);
+        AVER_UB4_UB(src0, src1, src1, src2, src2, src3, src3, src4,
+                    res0, res1, res2, res3);
+        AVER_UB4_UB(src4, src5, src5, src6, src6, src7, src7, src8,
+                    res4, res5, res6, res7);
+
+        LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+        AVER_UB4_UB(dst0, res0, dst1, res1, dst2, res2, dst3, res3,
+                    res0, res1, res2, res3);
+        AVER_UB4_UB(dst4, res4, dst5, res5, dst6, res6, dst7, res7,
+                    res4, res5, res6, res7);
+        ST_UB8(res0, res1, res2, res3, res4, res5, res6, res7, dst, dst_stride);
+        dst += (8 * dst_stride);
+
+        src0 = src8;
+    }
+}
+
+static void common_hv_bil_4w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint8_t height)
+{
+    uint8_t loop_cnt;
+    uint32_t res0, res1;
+    v16i8 src0, src1, src2, src0_sld1, src1_sld1, src2_sld1;
+    v16u8 src0_r, src1_r, src2_r, res;
+    v8u16 add0, add1, add2, sum0, sum1;
+
+    src0 = LD_SB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_SB2(src, src_stride, src1, src2);
+        src += (2 * src_stride);
+
+        SLDI_B3_0_SB(src0, src1, src2, src0_sld1, src1_sld1, src2_sld1, 1);
+        ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2,
+                   src0_r, src1_r, src2_r);
+        HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2);
+        ADD2(add0, add1, add1, add2, sum0, sum1);
+        SRARI_H2_UH(sum0, sum1, 2);
+        res = (v16u8) __msa_pckev_b((v16i8) sum1, (v16i8) sum0);
+        res0 = __msa_copy_u_w((v4i32) res, 0);
+        res1 = __msa_copy_u_w((v4i32) res, 2);
+        SW(res0, dst);
+        dst += dst_stride;
+        SW(res1, dst);
+        dst += dst_stride;
+
+        src0 = src2;
+    }
+}
+
+static void common_hv_bil_8w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 src0_sld1, src1_sld1, src2_sld1, src3_sld1, src4_sld1;
+    v16u8 src0_r, src1_r, src2_r, src3_r, src4_r;
+    v8u16 add0, add1, add2, add3, add4;
+    v8u16 sum0, sum1, sum2, sum3;
+
+    src0 = LD_SB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        SLDI_B3_0_SB(src0, src1, src2, src0_sld1, src1_sld1, src2_sld1, 1);
+        SLDI_B2_0_SB(src3, src4, src3_sld1, src4_sld1, 1);
+        ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r,
+                   src1_r, src2_r);
+        ILVR_B2_UB(src3_sld1, src3, src4_sld1, src4, src3_r, src4_r);
+        HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2);
+        HADD_UB2_UH(src3_r, src4_r, add3, add4);
+        ADD4(add0, add1, add1, add2, add2, add3, add3, add4,
+             sum0, sum1, sum2, sum3);
+        SRARI_H4_UH(sum0, sum1, sum2, sum3, 2);
+        PCKEV_B2_SB(sum1, sum0, sum3, sum2, src0, src1);
+        ST8x4_UB(src0, src1, dst, dst_stride);
+        dst += (4 * dst_stride);
+        src0 = src4;
+    }
+}
+
+static void common_hv_bil_16w_msa(const uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
+    v16u8 src10, src11, src12, src13, src14, src15, src16, src17;
+    v8u16 src0_r, src1_r, src2_r, src3_r, src4_r, src5_r, src6_r, src7_r;
+    v8u16 src8_r, src0_l, src1_l, src2_l, src3_l, src4_l, src5_l, src6_l;
+    v8u16 src7_l, src8_l;
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r, sum4_r, sum5_r, sum6_r, sum7_r;
+    v8u16 sum0_l, sum1_l, sum2_l, sum3_l, sum4_l, sum5_l, sum6_l, sum7_l;
+
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+        LD_UB8((src + 1), src_stride,
+               src9, src10, src11, src12, src13, src14, src15, src16);
+        src += (8 * src_stride);
+
+        src8 = LD_UB(src);
+        src17 = LD_UB(src + 1);
+
+        ILVRL_B2_UH(src9, src0, src0_r, src0_l);
+        ILVRL_B2_UH(src10, src1, src1_r, src1_l);
+        ILVRL_B2_UH(src11, src2, src2_r, src2_l);
+        ILVRL_B2_UH(src12, src3, src3_r, src3_l);
+        ILVRL_B2_UH(src13, src4, src4_r, src4_l);
+        ILVRL_B2_UH(src14, src5, src5_r, src5_l);
+        ILVRL_B2_UH(src15, src6, src6_r, src6_l);
+        ILVRL_B2_UH(src16, src7, src7_r, src7_l);
+        ILVRL_B2_UH(src17, src8, src8_r, src8_l);
+        HADD_UB3_UH(src0_r, src1_r, src2_r, src0_r, src1_r, src2_r);
+        HADD_UB3_UH(src3_r, src4_r, src5_r, src3_r, src4_r, src5_r);
+        HADD_UB3_UH(src6_r, src7_r, src8_r, src6_r, src7_r, src8_r);
+        HADD_UB3_UH(src0_l, src1_l, src2_l, src0_l, src1_l, src2_l);
+        HADD_UB3_UH(src3_l, src4_l, src5_l, src3_l, src4_l, src5_l);
+        HADD_UB3_UH(src6_l, src7_l, src8_l, src6_l, src7_l, src8_l);
+        ADD4(src0_r, src1_r, src1_r, src2_r, src2_r, src3_r, src3_r, src4_r,
+             sum0_r, sum1_r, sum2_r, sum3_r);
+        ADD4(src4_r, src5_r, src5_r, src6_r, src6_r, src7_r, src7_r, src8_r,
+             sum4_r, sum5_r, sum6_r, sum7_r);
+        ADD4(src0_l, src1_l, src1_l, src2_l, src2_l, src3_l, src3_l, src4_l,
+             sum0_l, sum1_l, sum2_l, sum3_l);
+        ADD4(src4_l, src5_l, src5_l, src6_l, src6_l, src7_l, src7_l, src8_l,
+             sum4_l, sum5_l, sum6_l, sum7_l);
+        SRARI_H4_UH(sum0_r, sum1_r, sum2_r, sum3_r, 2);
+        SRARI_H4_UH(sum4_r, sum5_r, sum6_r, sum7_r, 2);
+        SRARI_H4_UH(sum0_l, sum1_l, sum2_l, sum3_l, 2);
+        SRARI_H4_UH(sum4_l, sum5_l, sum6_l, sum7_l, 2);
+        PCKEV_ST_SB4(sum0_l, sum0_r, sum1_l, sum1_r, sum2_l, sum2_r,
+                     sum3_l, sum3_r, dst, dst_stride);
+        dst += (4 * dst_stride);
+        PCKEV_ST_SB4(sum4_l, sum4_r, sum5_l, sum5_r, sum6_l, sum6_r,
+                     sum7_l, sum7_r, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void common_hv_bil_no_rnd_8x8_msa(const uint8_t *src, int32_t src_stride,
+                                         uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 src0_sld1, src1_sld1, src2_sld1, src3_sld1;
+    v16u8 src4_sld1, src5_sld1, src6_sld1, src7_sld1, src8_sld1;
+    v8u16 src0_r, src1_r, src2_r, src3_r;
+    v8u16 src4_r, src5_r, src6_r, src7_r, src8_r;
+    v8u16 add0, add1, add2, add3, add4, add5, add6, add7, add8;
+    v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
+    v16i8 out0, out1;
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+    src8 = LD_UB(src);
+
+    SLDI_B4_0_UB(src0, src1, src2, src3, src0_sld1, src1_sld1, src2_sld1,
+                 src3_sld1, 1);
+    SLDI_B3_0_UB(src4, src5, src6, src4_sld1, src5_sld1, src6_sld1, 1);
+    SLDI_B2_0_UB(src7, src8, src7_sld1, src8_sld1, 1);
+    ILVR_B4_UH(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src3_sld1,
+               src3, src0_r, src1_r, src2_r, src3_r);
+    ILVR_B3_UH(src4_sld1, src4, src5_sld1, src5, src6_sld1, src6, src4_r,
+               src5_r, src6_r);
+    ILVR_B2_UH(src7_sld1, src7, src8_sld1, src8, src7_r, src8_r);
+    HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2);
+    HADD_UB3_UH(src3_r, src4_r, src5_r, add3, add4, add5);
+    HADD_UB3_UH(src6_r, src7_r, src8_r, add6, add7, add8);
+
+    sum0 = add0 + add1 + 1;
+    sum1 = add1 + add2 + 1;
+    sum2 = add2 + add3 + 1;
+    sum3 = add3 + add4 + 1;
+    sum4 = add4 + add5 + 1;
+    sum5 = add5 + add6 + 1;
+    sum6 = add6 + add7 + 1;
+    sum7 = add7 + add8 + 1;
+
+    SRA_4V(sum0, sum1, sum2, sum3, 2);
+    SRA_4V(sum4, sum5, sum6, sum7, 2);
+    PCKEV_B2_SB(sum1, sum0, sum3, sum2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+    PCKEV_B2_SB(sum5, sum4, sum7, sum6, out0, out1);
+    ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride);
+}
+
+static void common_hv_bil_no_rnd_4x8_msa(const uint8_t *src, int32_t src_stride,
+                                         uint8_t *dst, int32_t dst_stride)
+{
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 src0_sld1, src1_sld1, src2_sld1, src3_sld1, src4_sld1;
+    v8u16 src0_r, src1_r, src2_r, src3_r, src4_r;
+    v8u16 add0, add1, add2, add3, add4;
+    v8u16 sum0, sum1, sum2, sum3;
+    v16i8 out0, out1;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+    src4 = LD_SB(src);
+
+    SLDI_B3_0_SB(src0, src1, src2, src0_sld1, src1_sld1, src2_sld1, 1);
+    SLDI_B2_0_SB(src3, src4, src3_sld1, src4_sld1, 1);
+    ILVR_B3_UH(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r,
+               src1_r, src2_r);
+    ILVR_B2_UH(src3_sld1, src3, src4_sld1, src4, src3_r, src4_r);
+    HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2);
+    HADD_UB2_UH(src3_r, src4_r, add3, add4);
+
+    sum0 = add0 + add1 + 1;
+    sum1 = add1 + add2 + 1;
+    sum2 = add2 + add3 + 1;
+    sum3 = add3 + add4 + 1;
+
+    SRA_4V(sum0, sum1, sum2, sum3, 2);
+    PCKEV_B2_SB(sum1, sum0, sum3, sum2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+}
+
+static void common_hv_bil_no_rnd_16x16_msa(const uint8_t *src,
+                                           int32_t src_stride,
+                                           uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
+    v16u8 src10, src11, src12, src13, src14, src15, src16, src17;
+    v8u16 src0_r, src1_r, src2_r, src3_r, src4_r, src5_r, src6_r, src7_r;
+    v8u16 src8_r, src0_l, src1_l, src2_l, src3_l, src4_l, src5_l, src6_l;
+    v8u16 src7_l, src8_l;
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r, sum4_r, sum5_r, sum6_r, sum7_r;
+    v8u16 sum0_l, sum1_l, sum2_l, sum3_l, sum4_l, sum5_l, sum6_l, sum7_l;
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    LD_UB8((src + 1), src_stride,
+           src9, src10, src11, src12, src13, src14, src15, src16);
+    src += (8 * src_stride);
+    src8 = LD_UB(src);
+    src17 = LD_UB(src + 1);
+
+    ILVRL_B2_UH(src9, src0, src0_r, src0_l);
+    ILVRL_B2_UH(src10, src1, src1_r, src1_l);
+    ILVRL_B2_UH(src11, src2, src2_r, src2_l);
+    ILVRL_B2_UH(src12, src3, src3_r, src3_l);
+    ILVRL_B2_UH(src13, src4, src4_r, src4_l);
+    ILVRL_B2_UH(src14, src5, src5_r, src5_l);
+    ILVRL_B2_UH(src15, src6, src6_r, src6_l);
+    ILVRL_B2_UH(src16, src7, src7_r, src7_l);
+    ILVRL_B2_UH(src17, src8, src8_r, src8_l);
+
+    HADD_UB3_UH(src0_r, src1_r, src2_r, src0_r, src1_r, src2_r);
+    HADD_UB3_UH(src3_r, src4_r, src5_r, src3_r, src4_r, src5_r);
+    HADD_UB3_UH(src6_r, src7_r, src8_r, src6_r, src7_r, src8_r);
+    HADD_UB3_UH(src0_l, src1_l, src2_l, src0_l, src1_l, src2_l);
+    HADD_UB3_UH(src3_l, src4_l, src5_l, src3_l, src4_l, src5_l);
+    HADD_UB3_UH(src6_l, src7_l, src8_l, src6_l, src7_l, src8_l);
+
+    sum0_r = src0_r + src1_r + 1;
+    sum1_r = src1_r + src2_r + 1;
+    sum2_r = src2_r + src3_r + 1;
+    sum3_r = src3_r + src4_r + 1;
+    sum4_r = src4_r + src5_r + 1;
+    sum5_r = src5_r + src6_r + 1;
+    sum6_r = src6_r + src7_r + 1;
+    sum7_r = src7_r + src8_r + 1;
+    sum0_l = src0_l + src1_l + 1;
+    sum1_l = src1_l + src2_l + 1;
+    sum2_l = src2_l + src3_l + 1;
+    sum3_l = src3_l + src4_l + 1;
+    sum4_l = src4_l + src5_l + 1;
+    sum5_l = src5_l + src6_l + 1;
+    sum6_l = src6_l + src7_l + 1;
+    sum7_l = src7_l + src8_l + 1;
+
+    SRA_4V(sum0_r, sum1_r, sum2_r, sum3_r, 2);
+    SRA_4V(sum4_r, sum5_r, sum6_r, sum7_r, 2);
+    SRA_4V(sum0_l, sum1_l, sum2_l, sum3_l, 2);
+    SRA_4V(sum4_l, sum5_l, sum6_l, sum7_l, 2);
+    PCKEV_ST_SB4(sum0_l, sum0_r, sum1_l, sum1_r,
+                 sum2_l, sum2_r, sum3_l, sum3_r, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    LD_UB8((src + 1), src_stride,
+           src9, src10, src11, src12, src13, src14, src15, src16);
+    src += (8 * src_stride);
+    src8 = LD_UB(src);
+    src17 = LD_UB(src + 1);
+
+    PCKEV_ST_SB4(sum4_l, sum4_r, sum5_l, sum5_r,
+                 sum6_l, sum6_r, sum7_l, sum7_r, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    ILVRL_B2_UH(src9, src0, src0_r, src0_l);
+    ILVRL_B2_UH(src10, src1, src1_r, src1_l);
+    ILVRL_B2_UH(src11, src2, src2_r, src2_l);
+    ILVRL_B2_UH(src12, src3, src3_r, src3_l);
+    ILVRL_B2_UH(src13, src4, src4_r, src4_l);
+    ILVRL_B2_UH(src14, src5, src5_r, src5_l);
+    ILVRL_B2_UH(src15, src6, src6_r, src6_l);
+    ILVRL_B2_UH(src16, src7, src7_r, src7_l);
+    ILVRL_B2_UH(src17, src8, src8_r, src8_l);
+
+    HADD_UB3_UH(src0_r, src1_r, src2_r, src0_r, src1_r, src2_r);
+    HADD_UB3_UH(src3_r, src4_r, src5_r, src3_r, src4_r, src5_r);
+    HADD_UB3_UH(src6_r, src7_r, src8_r, src6_r, src7_r, src8_r);
+    HADD_UB3_UH(src0_l, src1_l, src2_l, src0_l, src1_l, src2_l);
+    HADD_UB3_UH(src3_l, src4_l, src5_l, src3_l, src4_l, src5_l);
+    HADD_UB3_UH(src6_l, src7_l, src8_l, src6_l, src7_l, src8_l);
+
+    sum0_r = src0_r + src1_r + 1;
+    sum1_r = src1_r + src2_r + 1;
+    sum2_r = src2_r + src3_r + 1;
+    sum3_r = src3_r + src4_r + 1;
+    sum4_r = src4_r + src5_r + 1;
+    sum5_r = src5_r + src6_r + 1;
+    sum6_r = src6_r + src7_r + 1;
+    sum7_r = src7_r + src8_r + 1;
+    sum0_l = src0_l + src1_l + 1;
+    sum1_l = src1_l + src2_l + 1;
+    sum2_l = src2_l + src3_l + 1;
+    sum3_l = src3_l + src4_l + 1;
+    sum4_l = src4_l + src5_l + 1;
+    sum5_l = src5_l + src6_l + 1;
+    sum6_l = src6_l + src7_l + 1;
+    sum7_l = src7_l + src8_l + 1;
+
+    SRA_4V(sum0_r, sum1_r, sum2_r, sum3_r, 2);
+    SRA_4V(sum4_r, sum5_r, sum6_r, sum7_r, 2);
+    SRA_4V(sum0_l, sum1_l, sum2_l, sum3_l, 2);
+    SRA_4V(sum4_l, sum5_l, sum6_l, sum7_l, 2);
+    PCKEV_ST_SB4(sum0_l, sum0_r, sum1_l, sum1_r,
+                 sum2_l, sum2_r, sum3_l, sum3_r, dst, dst_stride);
+    dst += (4 * dst_stride);
+    PCKEV_ST_SB4(sum4_l, sum4_r, sum5_l, sum5_r,
+                 sum6_l, sum6_r, sum7_l, sum7_r, dst, dst_stride);
+}
+
+static void common_hv_bil_no_rnd_8x16_msa(const uint8_t *src,
+                                          int32_t src_stride,
+                                          uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
+    v16u8 src10, src11, src12, src13, src14, src15, src16, src17;
+    v8u16 src0_r, src1_r, src2_r, src3_r, src4_r, src5_r, src6_r, src7_r;
+    v8u16 src8_r, src0_l, src1_l, src2_l, src3_l, src4_l, src5_l, src6_l;
+    v8u16 src7_l, src8_l;
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r, sum4_r, sum5_r, sum6_r, sum7_r;
+    v8u16 sum0_l, sum1_l, sum2_l, sum3_l, sum4_l, sum5_l, sum6_l, sum7_l;
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    LD_UB8((src + 1), src_stride,
+           src9, src10, src11, src12, src13, src14, src15, src16);
+    src += (8 * src_stride);
+    src8 = LD_UB(src);
+    src17 = LD_UB(src + 1);
+
+    ILVRL_B2_UH(src9, src0, src0_r, src0_l);
+    ILVRL_B2_UH(src10, src1, src1_r, src1_l);
+    ILVRL_B2_UH(src11, src2, src2_r, src2_l);
+    ILVRL_B2_UH(src12, src3, src3_r, src3_l);
+    ILVRL_B2_UH(src13, src4, src4_r, src4_l);
+    ILVRL_B2_UH(src14, src5, src5_r, src5_l);
+    ILVRL_B2_UH(src15, src6, src6_r, src6_l);
+    ILVRL_B2_UH(src16, src7, src7_r, src7_l);
+    ILVRL_B2_UH(src17, src8, src8_r, src8_l);
+
+    HADD_UB3_UH(src0_r, src1_r, src2_r, src0_r, src1_r, src2_r);
+    HADD_UB3_UH(src3_r, src4_r, src5_r, src3_r, src4_r, src5_r);
+    HADD_UB3_UH(src6_r, src7_r, src8_r, src6_r, src7_r, src8_r);
+    HADD_UB3_UH(src0_l, src1_l, src2_l, src0_l, src1_l, src2_l);
+    HADD_UB3_UH(src3_l, src4_l, src5_l, src3_l, src4_l, src5_l);
+    HADD_UB3_UH(src6_l, src7_l, src8_l, src6_l, src7_l, src8_l);
+
+    sum0_r = src0_r + src1_r + 1;
+    sum1_r = src1_r + src2_r + 1;
+    sum2_r = src2_r + src3_r + 1;
+    sum3_r = src3_r + src4_r + 1;
+    sum4_r = src4_r + src5_r + 1;
+    sum5_r = src5_r + src6_r + 1;
+    sum6_r = src6_r + src7_r + 1;
+    sum7_r = src7_r + src8_r + 1;
+    sum0_l = src0_l + src1_l + 1;
+    sum1_l = src1_l + src2_l + 1;
+    sum2_l = src2_l + src3_l + 1;
+    sum3_l = src3_l + src4_l + 1;
+    sum4_l = src4_l + src5_l + 1;
+    sum5_l = src5_l + src6_l + 1;
+    sum6_l = src6_l + src7_l + 1;
+    sum7_l = src7_l + src8_l + 1;
+
+    SRA_4V(sum0_r, sum1_r, sum2_r, sum3_r, 2);
+    SRA_4V(sum4_r, sum5_r, sum6_r, sum7_r, 2);
+    SRA_4V(sum0_l, sum1_l, sum2_l, sum3_l, 2);
+    SRA_4V(sum4_l, sum5_l, sum6_l, sum7_l, 2);
+    PCKEV_ST_SB4(sum0_l, sum0_r, sum1_l, sum1_r,
+                 sum2_l, sum2_r, sum3_l, sum3_r, dst, dst_stride);
+    dst += (4 * dst_stride);
+    PCKEV_ST_SB4(sum4_l, sum4_r, sum5_l, sum5_r,
+                 sum6_l, sum6_r, sum7_l, sum7_r, dst, dst_stride);
+}
+
+static void common_hv_bil_and_aver_dst_4w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint8_t height)
+{
+    uint8_t loop_cnt;
+    uint32_t out0, out1;
+    v16i8 src0, src1, src2, src0_sld1, src1_sld1, src2_sld1;
+    v16u8 src0_r, src1_r, src2_r;
+    v8u16 add0, add1, add2, sum0, sum1;
+    v16u8 dst0, dst1, res0, res1;
+
+    src0 = LD_SB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_SB2(src, src_stride, src1, src2);
+        src += (2 * src_stride);
+
+        LD_UB2(dst, dst_stride, dst0, dst1);
+        SLDI_B3_0_SB(src0, src1, src2, src0_sld1, src1_sld1, src2_sld1, 1);
+        ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r,
+                   src1_r, src2_r);
+        HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2);
+        ADD2(add0, add1, add1, add2, sum0, sum1);
+        SRARI_H2_UH(sum0, sum1, 2);
+        PCKEV_B2_UB(sum0, sum0, sum1, sum1, res0, res1);
+        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
+
+        out0 = __msa_copy_u_w((v4i32) res0, 0);
+        out1 = __msa_copy_u_w((v4i32) res1, 0);
+        SW(out0, dst);
+        dst += dst_stride;
+        SW(out1, dst);
+        dst += dst_stride;
+
+        src0 = src2;
+    }
+}
+
+static void common_hv_bil_and_aver_dst_8w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 src0_sld1, src1_sld1, src2_sld1, src3_sld1, src4_sld1;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 src0_r, src1_r, src2_r, src3_r, src4_r;
+    v8u16 add0, add1, add2, add3, add4;
+    v8u16 sum0, sum1, sum2, sum3;
+
+    src0 = LD_SB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        SLDI_B3_0_SB(src0, src1, src2, src0_sld1, src1_sld1, src2_sld1, 1);
+        SLDI_B2_0_SB(src3, src4, src3_sld1, src4_sld1, 1);
+        ILVR_B3_UB(src0_sld1, src0, src1_sld1, src1, src2_sld1, src2, src0_r,
+                   src1_r, src2_r);
+        ILVR_B2_UB(src3_sld1, src3, src4_sld1, src4, src3_r, src4_r);
+        HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2);
+        HADD_UB2_UH(src3_r, src4_r, add3, add4);
+        ADD4(add0, add1, add1, add2, add2, add3, add3, add4,
+             sum0, sum1, sum2, sum3);
+        SRARI_H4_UH(sum0, sum1, sum2, sum3, 2);
+        PCKEV_AVG_ST8x4_UB(sum0, dst0, sum1, dst1,
+                           sum2, dst2, sum3, dst3, dst, dst_stride);
+        dst += (4 * dst_stride);
+        src0 = src4;
+    }
+}
+
+static void common_hv_bil_and_aver_dst_16w_msa(const uint8_t *src,
+                                               int32_t src_stride,
+                                               uint8_t *dst, int32_t dst_stride,
+                                               uint8_t height)
+{
+    uint8_t loop_cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16u8 src11, src12, src13, src14, src15, src16, src17;
+    v16u8 src0_r, src1_r, src2_r, src3_r, src4_r, src5_r, src6_r, src7_r;
+    v16u8 src8_r, src0_l, src1_l, src2_l, src3_l, src4_l, src5_l, src6_l;
+    v16u8 src7_l, src8_l;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r, sum4_r, sum5_r, sum6_r, sum7_r;
+    v8u16 sum0_l, sum1_l, sum2_l, sum3_l, sum4_l, sum5_l, sum6_l, sum7_l;
+    v8u16 add0, add1, add2, add3, add4, add5, add6, add7, add8;
+
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+        LD_UB8((src + 1), src_stride,
+               src9, src10, src11, src12, src13, src14, src15, src16);
+        src += (8 * src_stride);
+
+        src8 = LD_UB(src);
+        src17 = LD_UB(src + 1);
+
+        ILVRL_B2_UB(src9, src0, src0_r, src0_l);
+        ILVRL_B2_UB(src10, src1, src1_r, src1_l);
+        ILVRL_B2_UB(src11, src2, src2_r, src2_l);
+        ILVRL_B2_UB(src12, src3, src3_r, src3_l);
+        ILVRL_B2_UB(src13, src4, src4_r, src4_l);
+        ILVRL_B2_UB(src14, src5, src5_r, src5_l);
+        ILVRL_B2_UB(src15, src6, src6_r, src6_l);
+        ILVRL_B2_UB(src16, src7, src7_r, src7_l);
+        ILVRL_B2_UB(src17, src8, src8_r, src8_l);
+        HADD_UB3_UH(src0_r, src1_r, src2_r, add0, add1, add2);
+        HADD_UB3_UH(src3_r, src4_r, src5_r, add3, add4, add5);
+        HADD_UB3_UH(src6_r, src7_r, src8_r, add6, add7, add8);
+        ADD4(add0, add1, add1, add2, add2, add3, add3, add4, sum0_r, sum1_r,
+             sum2_r, sum3_r);
+        ADD4(add4, add5, add5, add6, add6, add7, add7, add8, sum4_r, sum5_r,
+             sum6_r, sum7_r);
+        HADD_UB3_UH(src0_l, src1_l, src2_l, add0, add1, add2);
+        HADD_UB3_UH(src3_l, src4_l, src5_l, add3, add4, add5);
+        HADD_UB3_UH(src6_l, src7_l, src8_l, add6, add7, add8);
+        ADD4(add0, add1, add1, add2, add2, add3, add3, add4, sum0_l, sum1_l,
+             sum2_l, sum3_l);
+        ADD4(add4, add5, add5, add6, add6, add7, add7, add8, sum4_l, sum5_l,
+             sum6_l, sum7_l);
+        SRARI_H4_UH(sum0_r, sum1_r, sum2_r, sum3_r, 2);
+        SRARI_H4_UH(sum4_r, sum5_r, sum6_r, sum7_r, 2);
+        SRARI_H4_UH(sum0_l, sum1_l, sum2_l, sum3_l, 2);
+        SRARI_H4_UH(sum4_l, sum5_l, sum6_l, sum7_l, 2);
+        LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+        PCKEV_AVG_ST_UB(sum0_l, sum0_r, dst0, dst);
+        dst += dst_stride;
+        PCKEV_AVG_ST_UB(sum1_l, sum1_r, dst1, dst);
+        dst += dst_stride;
+        PCKEV_AVG_ST_UB(sum2_l, sum2_r, dst2, dst);
+        dst += dst_stride;
+        PCKEV_AVG_ST_UB(sum3_l, sum3_r, dst3, dst);
+        dst += dst_stride;
+        PCKEV_AVG_ST_UB(sum4_l, sum4_r, dst4, dst);
+        dst += dst_stride;
+        PCKEV_AVG_ST_UB(sum5_l, sum5_r, dst5, dst);
+        dst += dst_stride;
+        PCKEV_AVG_ST_UB(sum6_l, sum6_r, dst6, dst);
+        dst += dst_stride;
+        PCKEV_AVG_ST_UB(sum7_l, sum7_r, dst7, dst);
+        dst += dst_stride;
+    }
+}
+
+static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride,
+                            int32_t height)
+{
+    int32_t cnt;
+    uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    if (0 == height % 12) {
+        for (cnt = (height / 12); cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+            out4 = __msa_copy_u_d((v2i64) src4, 0);
+            out5 = __msa_copy_u_d((v2i64) src5, 0);
+            out6 = __msa_copy_u_d((v2i64) src6, 0);
+            out7 = __msa_copy_u_d((v2i64) src7, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+            SD4(out4, out5, out6, out7, dst, dst_stride);
+            dst += (4 * dst_stride);
+
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 8) {
+        for (cnt = height >> 3; cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+            out4 = __msa_copy_u_d((v2i64) src4, 0);
+            out5 = __msa_copy_u_d((v2i64) src5, 0);
+            out6 = __msa_copy_u_d((v2i64) src6, 0);
+            out7 = __msa_copy_u_d((v2i64) src7, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+            SD4(out4, out5, out6, out7, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 4) {
+        for (cnt = (height / 4); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 2) {
+        for (cnt = (height / 2); cnt--;) {
+            LD_UB2(src, src_stride, src0, src1);
+            src += (2 * src_stride);
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+
+            SD(out0, dst);
+            dst += dst_stride;
+            SD(out1, dst);
+            dst += dst_stride;
+        }
+    }
+}
+
+static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  int32_t height, int32_t width)
+{
+    int32_t cnt, loop_cnt;
+    const uint8_t *src_tmp;
+    uint8_t *dst_tmp;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    for (cnt = (width >> 4); cnt--;) {
+        src_tmp = src;
+        dst_tmp = dst;
+
+        for (loop_cnt = (height >> 3); loop_cnt--;) {
+            LD_UB8(src_tmp, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src_tmp += (8 * src_stride);
+
+            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+                   dst_tmp, dst_stride);
+            dst_tmp += (8 * dst_stride);
+        }
+
+        src += 16;
+        dst += 16;
+    }
+}
+
+static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
+                             uint8_t *dst, int32_t dst_stride,
+                             int32_t height)
+{
+    int32_t cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    if (0 == height % 12) {
+        for (cnt = (height / 12); cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+                   dst, dst_stride);
+            dst += (8 * dst_stride);
+
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 8) {
+        copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
+    } else if (0 == height % 4) {
+        for (cnt = (height >> 2); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    }
+}
+
+static void avg_width4_msa(const uint8_t *src, int32_t src_stride,
+                           uint8_t *dst, int32_t dst_stride,
+                           int32_t height)
+{
+    int32_t cnt;
+    uint32_t out0, out1, out2, out3;
+    v16u8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+
+    if (0 == (height % 4)) {
+        for (cnt = (height / 4); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+            AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                        dst0, dst1, dst2, dst3);
+
+            out0 = __msa_copy_u_w((v4i32) dst0, 0);
+            out1 = __msa_copy_u_w((v4i32) dst1, 0);
+            out2 = __msa_copy_u_w((v4i32) dst2, 0);
+            out3 = __msa_copy_u_w((v4i32) dst3, 0);
+            SW4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == (height % 2)) {
+        for (cnt = (height / 2); cnt--;) {
+            LD_UB2(src, src_stride, src0, src1);
+            src += (2 * src_stride);
+
+            LD_UB2(dst, dst_stride, dst0, dst1);
+
+            AVER_UB2_UB(src0, dst0, src1, dst1, dst0, dst1);
+
+            out0 = __msa_copy_u_w((v4i32) dst0, 0);
+            out1 = __msa_copy_u_w((v4i32) dst1, 0);
+            SW(out0, dst);
+            dst += dst_stride;
+            SW(out1, dst);
+            dst += dst_stride;
+        }
+    }
+}
+
+static void avg_width8_msa(const uint8_t *src, int32_t src_stride,
+                           uint8_t *dst, int32_t dst_stride,
+                           int32_t height)
+{
+    int32_t cnt;
+    uint64_t out0, out1, out2, out3;
+    v16u8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+
+    for (cnt = (height / 4); cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                    dst0, dst1, dst2, dst3);
+
+        out0 = __msa_copy_u_d((v2i64) dst0, 0);
+        out1 = __msa_copy_u_d((v2i64) dst1, 0);
+        out2 = __msa_copy_u_d((v2i64) dst2, 0);
+        out3 = __msa_copy_u_d((v2i64) dst3, 0);
+        SD4(out0, out1, out2, out3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avg_width16_msa(const uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride,
+                            int32_t height)
+{
+    int32_t cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+
+    for (cnt = (height / 8); cnt--;) {
+        LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+        src += (8 * src_stride);
+        LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                    dst0, dst1, dst2, dst3);
+        AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+                    dst4, dst5, dst6, dst7);
+        ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, dst_stride);
+        dst += (8 * dst_stride);
+    }
+}
+
+void ff_put_pixels16_msa(uint8_t *block, const uint8_t *pixels,
+                         ptrdiff_t line_size, int h)
+{
+    copy_width16_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_put_pixels16_x2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int h)
+{
+    common_hz_bil_16w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_put_pixels16_y2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int h)
+{
+    common_vt_bil_16w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_put_pixels16_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                             ptrdiff_t line_size, int h)
+{
+    common_hv_bil_16w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_put_pixels8_msa(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int h)
+{
+    copy_width8_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_put_pixels8_x2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h)
+{
+    common_hz_bil_8w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_put_pixels8_y2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h)
+{
+    common_vt_bil_8w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_put_pixels8_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int h)
+{
+    common_hv_bil_8w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_put_pixels4_x2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h)
+{
+    common_hz_bil_4w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_put_pixels4_y2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h)
+{
+    common_vt_bil_4w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_put_pixels4_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int h)
+{
+    common_hv_bil_4w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_put_no_rnd_pixels16_x2_msa(uint8_t *block, const uint8_t *pixels,
+                                   ptrdiff_t line_size, int h)
+{
+    if (h == 16) {
+        common_hz_bil_no_rnd_16x16_msa(pixels, line_size, block, line_size);
+    } else if (h == 8) {
+        common_hz_bil_no_rnd_8x16_msa(pixels, line_size, block, line_size);
+    }
+}
+
+void ff_put_no_rnd_pixels16_y2_msa(uint8_t *block, const uint8_t *pixels,
+                                   ptrdiff_t line_size, int h)
+{
+    if (h == 16) {
+        common_vt_bil_no_rnd_16x16_msa(pixels, line_size, block, line_size);
+    } else if (h == 8) {
+        common_vt_bil_no_rnd_8x16_msa(pixels, line_size, block, line_size);
+    }
+}
+
+void ff_put_no_rnd_pixels16_xy2_msa(uint8_t *block,
+                                    const uint8_t *pixels,
+                                    ptrdiff_t line_size, int h)
+{
+    if (h == 16) {
+        common_hv_bil_no_rnd_16x16_msa(pixels, line_size, block, line_size);
+    } else if (h == 8) {
+        common_hv_bil_no_rnd_8x16_msa(pixels, line_size, block, line_size);
+    }
+}
+
+void ff_put_no_rnd_pixels8_x2_msa(uint8_t *block, const uint8_t *pixels,
+                                  ptrdiff_t line_size, int h)
+{
+    if (h == 8) {
+        common_hz_bil_no_rnd_8x8_msa(pixels, line_size, block, line_size);
+    } else if (h == 4) {
+        common_hz_bil_no_rnd_4x8_msa(pixels, line_size, block, line_size);
+    }
+}
+
+void ff_put_no_rnd_pixels8_y2_msa(uint8_t *block, const uint8_t *pixels,
+                                  ptrdiff_t line_size, int h)
+{
+    if (h == 8) {
+        common_vt_bil_no_rnd_8x8_msa(pixels, line_size, block, line_size);
+    } else if (h == 4) {
+        common_vt_bil_no_rnd_4x8_msa(pixels, line_size, block, line_size);
+    }
+}
+
+void ff_put_no_rnd_pixels8_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                                   ptrdiff_t line_size, int h)
+{
+    if (h == 8) {
+        common_hv_bil_no_rnd_8x8_msa(pixels, line_size, block, line_size);
+    } else if (h == 4) {
+        common_hv_bil_no_rnd_4x8_msa(pixels, line_size, block, line_size);
+    }
+}
+
+void ff_avg_pixels16_msa(uint8_t *block, const uint8_t *pixels,
+                         ptrdiff_t line_size, int h)
+{
+    avg_width16_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_avg_pixels16_x2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int h)
+{
+    common_hz_bil_and_aver_dst_16w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_avg_pixels16_y2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int h)
+{
+    common_vt_bil_and_aver_dst_16w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_avg_pixels16_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                             ptrdiff_t line_size, int h)
+{
+    common_hv_bil_and_aver_dst_16w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_avg_pixels8_msa(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int h)
+{
+    avg_width8_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_avg_pixels8_x2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h)
+{
+    common_hz_bil_and_aver_dst_8w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_avg_pixels8_y2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h)
+{
+    common_vt_bil_and_aver_dst_8w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_avg_pixels8_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int h)
+{
+    common_hv_bil_and_aver_dst_8w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_avg_pixels4_msa(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int h)
+{
+    avg_width4_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_avg_pixels4_x2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h)
+{
+    common_hz_bil_and_aver_dst_4w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_avg_pixels4_y2_msa(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h)
+{
+    common_vt_bil_and_aver_dst_4w_msa(pixels, line_size, block, line_size, h);
+}
+
+void ff_avg_pixels4_xy2_msa(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int h)
+{
+    common_hv_bil_and_aver_dst_4w_msa(pixels, line_size, block, line_size, h);
+}
diff --git a/libavcodec/mips/idctdsp_init_mips.c b/libavcodec/mips/idctdsp_init_mips.c
new file mode 100644
index 00000000..8c26bca5
--- /dev/null
+++ b/libavcodec/mips/idctdsp_init_mips.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *                    Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "idctdsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void idctdsp_init_msa(IDCTDSPContext *c, AVCodecContext *avctx,
+                                     unsigned high_bit_depth)
+{
+    if ((avctx->lowres != 1) && (avctx->lowres != 2) && (avctx->lowres != 3) &&
+        (avctx->bits_per_raw_sample != 10) &&
+        (avctx->bits_per_raw_sample != 12) &&
+        (avctx->idct_algo == FF_IDCT_AUTO)) {
+                c->idct_put = ff_simple_idct_put_msa;
+                c->idct_add = ff_simple_idct_add_msa;
+                c->idct = ff_simple_idct_msa;
+                c->perm_type = FF_IDCT_PERM_NONE;
+    }
+
+    c->put_pixels_clamped = ff_put_pixels_clamped_msa;
+    c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_msa;
+    c->add_pixels_clamped = ff_add_pixels_clamped_msa;
+}
+#endif  // #if HAVE_MSA
+
+#if HAVE_MMI
+static av_cold void idctdsp_init_mmi(IDCTDSPContext *c, AVCodecContext *avctx,
+        unsigned high_bit_depth)
+{
+    if ((avctx->lowres != 1) && (avctx->lowres != 2) && (avctx->lowres != 3) &&
+        (avctx->bits_per_raw_sample != 10) &&
+        (avctx->bits_per_raw_sample != 12) &&
+        (avctx->idct_algo == FF_IDCT_AUTO)) {
+                c->idct = ff_simple_idct_mmi;
+                c->perm_type = FF_IDCT_PERM_NONE;
+    }
+
+    c->put_pixels_clamped = ff_put_pixels_clamped_mmi;
+    c->add_pixels_clamped = ff_add_pixels_clamped_mmi;
+    c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmi;
+}
+#endif /* HAVE_MMI */
+
+av_cold void ff_idctdsp_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
+                          unsigned high_bit_depth)
+{
+#if HAVE_MSA
+    idctdsp_init_msa(c, avctx, high_bit_depth);
+#endif  // #if HAVE_MSA
+#if HAVE_MMI
+    idctdsp_init_mmi(c, avctx, high_bit_depth);
+#endif /* HAVE_MMI */
+}
diff --git a/libavcodec/mips/idctdsp_mips.h b/libavcodec/mips/idctdsp_mips.h
new file mode 100644
index 00000000..19267e67
--- /dev/null
+++ b/libavcodec/mips/idctdsp_mips.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *                    Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_IDCTDSP_MIPS_H
+#define AVCODEC_MIPS_IDCTDSP_MIPS_H
+
+#include "../mpegvideo.h"
+
+void ff_put_pixels_clamped_msa(const int16_t *block,
+                               uint8_t *av_restrict pixels,
+                               ptrdiff_t line_size);
+void ff_put_signed_pixels_clamped_msa(const int16_t *block,
+                                      uint8_t *av_restrict pixels,
+                                      ptrdiff_t line_size);
+void ff_add_pixels_clamped_msa(const int16_t *block,
+                               uint8_t *av_restrict pixels,
+                               ptrdiff_t line_size);
+void ff_j_rev_dct_msa(int16_t *data);
+void ff_jref_idct_put_msa(uint8_t *dest, int32_t stride, int16_t *block);
+void ff_jref_idct_add_msa(uint8_t *dest, int32_t stride, int16_t *block);
+void ff_simple_idct_msa(int16_t *block);
+void ff_simple_idct_put_msa(uint8_t *dest, int32_t stride_dst, int16_t *block);
+void ff_simple_idct_add_msa(uint8_t *dest, int32_t stride_dst, int16_t *block);
+
+void ff_put_pixels_clamped_mmi(const int16_t *block,
+        uint8_t *av_restrict pixels, ptrdiff_t line_size);
+void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
+        uint8_t *av_restrict pixels, ptrdiff_t line_size);
+void ff_add_pixels_clamped_mmi(const int16_t *block,
+        uint8_t *av_restrict pixels, ptrdiff_t line_size);
+void ff_simple_idct_mmi(int16_t *block);
+void ff_simple_idct_put_mmi(uint8_t *dest, int32_t line_size, int16_t *block);
+void ff_simple_idct_add_mmi(uint8_t *dest, int32_t line_size, int16_t *block);
+
+#endif  // #ifndef AVCODEC_MIPS_IDCTDSP_MIPS_H
diff --git a/libavcodec/mips/idctdsp_mmi.c b/libavcodec/mips/idctdsp_mmi.c
new file mode 100644
index 00000000..25476f3c
--- /dev/null
+++ b/libavcodec/mips/idctdsp_mmi.c
@@ -0,0 +1,190 @@
+/*
+ * Loongson SIMD optimized idctdsp
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "idctdsp_mips.h"
+#include "constants.h"
+
+void ff_put_pixels_clamped_mmi(const int16_t *block,
+        uint8_t *av_restrict pixels, ptrdiff_t line_size)
+{
+    const int16_t *p;
+    uint8_t *pix;
+
+    p = block;
+    pix = pixels;
+
+    __asm__ volatile (
+        "ldc1 $f0, 0+%3                 \r\n"
+        "ldc1 $f2, 8+%3                 \r\n"
+        "ldc1 $f4, 16+%3                \r\n"
+        "ldc1 $f6, 24+%3                \r\n"
+        "ldc1 $f8, 32+%3                \r\n"
+        "ldc1 $f10, 40+%3               \r\n"
+        "ldc1 $f12, 48+%3               \r\n"
+        "ldc1 $f14, 56+%3               \r\n"
+        "dadd $10, %0, %1               \r\n"
+        "packushb $f0, $f0, $f2         \r\n"
+        "packushb $f4, $f4, $f6         \r\n"
+        "packushb $f8, $f8, $f10        \r\n"
+        "packushb $f12, $f12, $f14      \r\n"
+        "sdc1 $f0, 0(%0)                \r\n"
+        "sdc1 $f4, 0($10)               \r\n"
+        "gssdxc1 $f8, 0($10, %1)        \r\n"
+        "gssdxc1 $f12, 0(%0, %2)        \r\n"
+        ::"r"(pix),"r"((int)line_size),
+          "r"((int)line_size*3),"m"(*p)
+        : "$10","memory"
+    );
+
+    pix += line_size*4;
+    p += 32;
+
+    __asm__ volatile (
+        "ldc1 $f0, 0+%3                 \r\n"
+        "ldc1 $f2, 8+%3                 \r\n"
+        "ldc1 $f4, 16+%3                \r\n"
+        "ldc1 $f6, 24+%3                \r\n"
+        "ldc1 $f8, 32+%3                \r\n"
+        "ldc1 $f10, 40+%3               \r\n"
+        "ldc1 $f12, 48+%3               \r\n"
+        "ldc1 $f14, 56+%3               \r\n"
+        "dadd $10, %0, %1               \r\n"
+        "packushb $f0, $f0, $f2         \r\n"
+        "packushb $f4, $f4, $f6         \r\n"
+        "packushb $f8, $f8, $f10        \r\n"
+        "packushb $f12, $f12, $f14      \r\n"
+        "sdc1 $f0, 0(%0)                \r\n"
+        "sdc1 $f4, 0($10)               \r\n"
+        "gssdxc1 $f8, 0($10, %1)        \r\n"
+        "gssdxc1 $f12, 0(%0, %2)        \r\n"
+        ::"r"(pix),"r"((int)line_size),
+          "r"((int)line_size*3),"m"(*p)
+        : "$10","memory"
+    );
+}
+
+void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
+    uint8_t *av_restrict pixels, ptrdiff_t line_size)
+{
+    int64_t line_skip = line_size;
+    int64_t line_skip3;
+
+    __asm__ volatile (
+        "dmtc1 %4, $f0                  \n\t"
+        "daddu %1, %3, %3               \n\t"
+        "ldc1 $f2, 0(%2)                \n\t"
+        "ldc1 $f10, 8(%2)               \n\t"
+        "packsshb $f2, $f2, $f10        \n\t"
+        "ldc1 $f4, 16(%2)               \n\t"
+        "ldc1 $f10, 24(%2)              \n\t"
+        "packsshb $f4, $f4, $f10        \n\t"
+        "ldc1 $f6, 32(%2)               \n\t"
+        "ldc1 $f10, 40(%2)              \n\t"
+        "packsshb $f6, $f6, $f10        \n\t"
+        "ldc1 $f8, 48(%2)               \n\t"
+        "ldc1 $f10, 56(%2)              \n\t"
+        "packsshb $f8, $f8, $f10        \n\t"
+        "paddb $f2, $f2, $f0            \n\t"
+        "paddb $f4, $f4, $f0            \n\t"
+        "paddb $f6, $f6, $f0            \n\t"
+        "paddb $f8, $f8, $f0            \n\t"
+        "sdc1 $f2, 0(%0)                \n\t"
+        "gssdxc1 $f4, 0(%0, %3)         \n\t"
+        "gssdxc1 $f6, 0(%0, %1)         \n\t"
+        "daddu %1, %1, %3               \n\t"
+        "gssdxc1 $f8, 0(%0, %1)         \n\t"
+        "daddu $10, %1, %3              \n\t"
+        "daddu %0, %0, $10              \n\t"
+        "ldc1 $f2, 64(%2)               \n\t"
+        "ldc1 $f10, 8+64(%2)            \n\t"
+        "packsshb  $f2, $f2, $f10       \n\t"
+        "ldc1 $f4, 16+64(%2)            \n\t"
+        "ldc1 $f10, 24+64(%2)           \n\t"
+        "packsshb $f4, $f4, $f10        \n\t"
+        "ldc1 $f6, 32+64(%2)            \n\t"
+        "ldc1 $f10, 40+64(%2)           \n\t"
+        "packsshb $f6, $f6, $f10        \n\t"
+        "ldc1 $f8, 48+64(%2)            \n\t"
+        "ldc1 $f10, 56+64(%2)           \n\t"
+        "packsshb $f8, $f8, $f10        \n\t"
+        "paddb $f2, $f2, $f0            \n\t"
+        "paddb $f4, $f4, $f0            \n\t"
+        "paddb $f6, $f6, $f0            \n\t"
+        "paddb $f8, $f8, $f0            \n\t"
+        "sdc1 $f2, 0(%0)                \n\t"
+        "gssdxc1 $f4, 0(%0, %3)         \n\t"
+        "daddu $10, %3, %3              \n\t"
+        "gssdxc1 $f6, 0(%0, $10)        \n\t"
+        "gssdxc1 $f8, 0(%0, %1)         \n\t"
+        : "+&r"(pixels),"=&r"(line_skip3)
+        : "r"(block),"r"(line_skip),"r"(ff_pb_80)
+        : "$10","memory"
+    );
+}
+
+void ff_add_pixels_clamped_mmi(const int16_t *block,
+        uint8_t *av_restrict pixels, ptrdiff_t line_size)
+{
+    const int16_t *p;
+    uint8_t *pix;
+    int i = 4;
+
+    p = block;
+    pix = pixels;
+
+    __asm__ volatile (
+        "xor $f14, $f14, $f14           \r\n"
+        ::
+    );
+
+    do {
+        __asm__ volatile (
+            "ldc1 $f0, 0+%2             \r\n"
+            "ldc1 $f2, 8+%2             \r\n"
+            "ldc1 $f4, 16+%2            \r\n"
+            "ldc1 $f6, 24+%2            \r\n"
+            "ldc1 $f8, %0               \r\n"
+            "ldc1 $f12, %1              \r\n"
+            "mov.d $f10, $f8            \r\n"
+            "punpcklbh $f8, $f8, $f14   \r\n"
+            "punpckhbh $f10, $f10, $f14 \r\n"
+            "paddsh $f0, $f0, $f8       \r\n"
+            "paddsh $f2, $f2, $f10      \r\n"
+            "mov.d $f10, $f12           \r\n"
+            "punpcklbh $f12, $f12, $f14 \r\n"
+            "punpckhbh $f10, $f10, $f14 \r\n"
+            "paddsh $f4, $f4, $f12      \r\n"
+            "paddsh $f6, $f6, $f10      \r\n"
+            "packushb $f0, $f0, $f2     \r\n"
+            "packushb $f4, $f4, $f6     \r\n"
+            "sdc1 $f0, %0               \r\n"
+            "sdc1 $f4, %1               \r\n"
+            : "+m"(*pix),"+m"(*(pix+line_size))
+            : "m"(*p)
+            : "memory"
+        );
+
+        pix += line_size*2;
+        p += 16;
+    } while (--i);
+}
diff --git a/libavcodec/mips/idctdsp_msa.c b/libavcodec/mips/idctdsp_msa.c
new file mode 100644
index 00000000..b29e4205
--- /dev/null
+++ b/libavcodec/mips/idctdsp_msa.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "idctdsp_mips.h"
+
+static void put_pixels_clamped_msa(const int16_t *block, uint8_t *pixels,
+                                   int32_t stride)
+{
+    uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+    LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+    CLIP_SH4_0_255(in0, in1, in2, in3);
+    CLIP_SH4_0_255(in4, in5, in6, in7);
+    PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3);
+    PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7);
+
+    in0_d = __msa_copy_u_d((v2i64) in0, 0);
+    in1_d = __msa_copy_u_d((v2i64) in1, 0);
+    in2_d = __msa_copy_u_d((v2i64) in2, 0);
+    in3_d = __msa_copy_u_d((v2i64) in3, 0);
+    in4_d = __msa_copy_u_d((v2i64) in4, 0);
+    in5_d = __msa_copy_u_d((v2i64) in5, 0);
+    in6_d = __msa_copy_u_d((v2i64) in6, 0);
+    in7_d = __msa_copy_u_d((v2i64) in7, 0);
+    SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride);
+    pixels += 4 * stride;
+    SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride);
+}
+
+static void put_signed_pixels_clamped_msa(const int16_t *block, uint8_t *pixels,
+                                          int32_t stride)
+{
+    uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+    LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+
+    in0 += 128;
+    in1 += 128;
+    in2 += 128;
+    in3 += 128;
+    in4 += 128;
+    in5 += 128;
+    in6 += 128;
+    in7 += 128;
+
+    CLIP_SH4_0_255(in0, in1, in2, in3);
+    CLIP_SH4_0_255(in4, in5, in6, in7);
+    PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3);
+    PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7);
+
+    in0_d = __msa_copy_u_d((v2i64) in0, 0);
+    in1_d = __msa_copy_u_d((v2i64) in1, 0);
+    in2_d = __msa_copy_u_d((v2i64) in2, 0);
+    in3_d = __msa_copy_u_d((v2i64) in3, 0);
+    in4_d = __msa_copy_u_d((v2i64) in4, 0);
+    in5_d = __msa_copy_u_d((v2i64) in5, 0);
+    in6_d = __msa_copy_u_d((v2i64) in6, 0);
+    in7_d = __msa_copy_u_d((v2i64) in7, 0);
+    SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride);
+    pixels += 4 * stride;
+    SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride);
+}
+
+static void add_pixels_clamped_msa(const int16_t *block, uint8_t *pixels,
+                                   int32_t stride)
+{
+    uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d;
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v16u8 pix_in0, pix_in1, pix_in2, pix_in3;
+    v16u8 pix_in4, pix_in5, pix_in6, pix_in7;
+    v8u16 pix0, pix1, pix2, pix3, pix4, pix5, pix6, pix7;
+    v8i16 zero = { 0 };
+
+    LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+    LD_UB8(pixels, stride, pix_in0, pix_in1, pix_in2,
+           pix_in3, pix_in4, pix_in5, pix_in6, pix_in7);
+
+    ILVR_B4_UH(zero, pix_in0, zero, pix_in1, zero, pix_in2, zero, pix_in3,
+               pix0, pix1, pix2, pix3);
+    ILVR_B4_UH(zero, pix_in4, zero, pix_in5, zero, pix_in6, zero, pix_in7,
+               pix4, pix5, pix6, pix7);
+
+    in0 += (v8i16) pix0;
+    in1 += (v8i16) pix1;
+    in2 += (v8i16) pix2;
+    in3 += (v8i16) pix3;
+    in4 += (v8i16) pix4;
+    in5 += (v8i16) pix5;
+    in6 += (v8i16) pix6;
+    in7 += (v8i16) pix7;
+
+    CLIP_SH4_0_255(in0, in1, in2, in3);
+    CLIP_SH4_0_255(in4, in5, in6, in7);
+    PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3);
+    PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7);
+
+    in0_d = __msa_copy_u_d((v2i64) in0, 0);
+    in1_d = __msa_copy_u_d((v2i64) in1, 0);
+    in2_d = __msa_copy_u_d((v2i64) in2, 0);
+    in3_d = __msa_copy_u_d((v2i64) in3, 0);
+    in4_d = __msa_copy_u_d((v2i64) in4, 0);
+    in5_d = __msa_copy_u_d((v2i64) in5, 0);
+    in6_d = __msa_copy_u_d((v2i64) in6, 0);
+    in7_d = __msa_copy_u_d((v2i64) in7, 0);
+    SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride);
+    pixels += 4 * stride;
+    SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride);
+}
+
+void ff_put_pixels_clamped_msa(const int16_t *block,
+                               uint8_t *av_restrict pixels,
+                               ptrdiff_t line_size)
+{
+    put_pixels_clamped_msa(block, pixels, line_size);
+}
+
+void ff_put_signed_pixels_clamped_msa(const int16_t *block,
+                                      uint8_t *av_restrict pixels,
+                                      ptrdiff_t line_size)
+{
+    put_signed_pixels_clamped_msa(block, pixels, line_size);
+}
+
+void ff_add_pixels_clamped_msa(const int16_t *block,
+                               uint8_t *av_restrict pixels,
+                               ptrdiff_t line_size)
+{
+    add_pixels_clamped_msa(block, pixels, line_size);
+}
diff --git a/libavcodec/mips/iirfilter_mips.c b/libavcodec/mips/iirfilter_mips.c
index 5a145cff..87db9ffe 100644
--- a/libavcodec/mips/iirfilter_mips.c
+++ b/libavcodec/mips/iirfilter_mips.c
@@ -55,7 +55,7 @@
 #include "libavcodec/iirfilter.h"
 
 #if HAVE_INLINE_ASM
-#if !HAVE_LOONGSON3
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 typedef struct FFIIRFilterCoeffs {
     int   order;
     float gain;
@@ -196,13 +196,13 @@ static void ff_iir_filter_flt_mips(const struct FFIIRFilterCoeffs *c,
         }
     }
 }
-#endif /* !HAVE_LOONGSON3 */
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM */
 
 void ff_iir_filter_init_mips(FFIIRFilterContext *f) {
 #if HAVE_INLINE_ASM
-#if !HAVE_LOONGSON3
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     f->filter_flt = ff_iir_filter_flt_mips;
-#endif /* !HAVE_LOONGSON3 */
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavcodec/mips/lsp_mips.h b/libavcodec/mips/lsp_mips.h
index 9d752186..6219c5aa 100644
--- a/libavcodec/mips/lsp_mips.h
+++ b/libavcodec/mips/lsp_mips.h
@@ -51,10 +51,11 @@
  * @file
  * Reference: libavcodec/lsp.c
  */
-#ifndef AVCODEC_LSP_MIPS_H
-#define AVCODEC_LSP_MIPS_H
+#ifndef AVCODEC_MIPS_LSP_MIPS_H
+#define AVCODEC_MIPS_LSP_MIPS_H
 
 #if HAVE_MIPSFPU && HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 #include "libavutil/mips/asmdefs.h"
 
 static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int lp_half_order)
@@ -107,5 +108,6 @@ static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int
     }
 }
 #define ff_lsp2polyf ff_lsp2polyf_mips
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM */
-#endif /* AVCODEC_LSP_MIPS_H */
+#endif /* AVCODEC_MIPS_LSP_MIPS_H */
diff --git a/libavcodec/mips/me_cmp_init_mips.c b/libavcodec/mips/me_cmp_init_mips.c
new file mode 100644
index 00000000..219a0dc0
--- /dev/null
+++ b/libavcodec/mips/me_cmp_init_mips.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "me_cmp_mips.h"
+
+#if HAVE_MSA
+static av_cold void me_cmp_msa(MECmpContext *c, AVCodecContext *avctx)
+{
+#if BIT_DEPTH == 8
+    c->pix_abs[0][0] = ff_pix_abs16_msa;
+    c->pix_abs[0][1] = ff_pix_abs16_x2_msa;
+    c->pix_abs[0][2] = ff_pix_abs16_y2_msa;
+    c->pix_abs[0][3] = ff_pix_abs16_xy2_msa;
+    c->pix_abs[1][0] = ff_pix_abs8_msa;
+    c->pix_abs[1][1] = ff_pix_abs8_x2_msa;
+    c->pix_abs[1][2] = ff_pix_abs8_y2_msa;
+    c->pix_abs[1][3] = ff_pix_abs8_xy2_msa;
+
+    c->hadamard8_diff[0] = ff_hadamard8_diff16_msa;
+    c->hadamard8_diff[1] = ff_hadamard8_diff8x8_msa;
+
+    c->hadamard8_diff[4] = ff_hadamard8_intra16_msa;
+    c->hadamard8_diff[5] = ff_hadamard8_intra8x8_msa;
+
+    c->sad[0] = ff_pix_abs16_msa;
+    c->sad[1] = ff_pix_abs8_msa;
+    c->sse[0] = ff_sse16_msa;
+    c->sse[1] = ff_sse8_msa;
+    c->sse[2] = ff_sse4_msa;
+#endif
+}
+#endif  // #if HAVE_MSA
+
+av_cold void ff_me_cmp_init_mips(MECmpContext *c, AVCodecContext *avctx)
+{
+#if HAVE_MSA
+    me_cmp_msa(c, avctx);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/me_cmp_mips.h b/libavcodec/mips/me_cmp_mips.h
new file mode 100644
index 00000000..e0d0f51a
--- /dev/null
+++ b/libavcodec/mips/me_cmp_mips.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_ME_CMP_MIPS_H
+#define AVCODEC_MIPS_ME_CMP_MIPS_H
+
+#include "../mpegvideo.h"
+#include "libavcodec/bit_depth_template.c"
+
+int ff_hadamard8_diff8x8_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src,
+                             ptrdiff_t stride, int h);
+int ff_hadamard8_intra8x8_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src,
+                              ptrdiff_t stride, int h);
+int ff_hadamard8_diff16_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src,
+                            ptrdiff_t stride, int h);
+int ff_hadamard8_intra16_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src,
+                             ptrdiff_t stride, int h);
+int ff_pix_abs16_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                     ptrdiff_t stride, int h);
+int ff_pix_abs16_x2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                        ptrdiff_t stride, int h);
+int ff_pix_abs16_y2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                        ptrdiff_t stride, int h);
+int ff_pix_abs16_xy2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                         ptrdiff_t stride, int h);
+int ff_pix_abs8_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                    ptrdiff_t stride, int h);
+int ff_pix_abs8_x2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                       ptrdiff_t stride, int h);
+int ff_pix_abs8_y2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                       ptrdiff_t stride, int h);
+int ff_pix_abs8_xy2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                        ptrdiff_t stride, int h);
+int ff_sse16_msa(MpegEncContext *v, uint8_t *pu8Src, uint8_t *pu8Ref,
+                 ptrdiff_t stride, int i32Height);
+int ff_sse8_msa(MpegEncContext *v, uint8_t *pu8Src, uint8_t *pu8Ref,
+                ptrdiff_t stride, int i32Height);
+int ff_sse4_msa(MpegEncContext *v, uint8_t *pu8Src, uint8_t *pu8Ref,
+                ptrdiff_t stride, int i32Height);
+void ff_add_pixels8_msa(uint8_t *av_restrict pixels, int16_t *block,
+                        ptrdiff_t stride);
+
+#endif  // #ifndef AVCODEC_MIPS_ME_CMP_MIPS_H
diff --git a/libavcodec/mips/me_cmp_msa.c b/libavcodec/mips/me_cmp_msa.c
new file mode 100644
index 00000000..0e3165cd
--- /dev/null
+++ b/libavcodec/mips/me_cmp_msa.c
@@ -0,0 +1,686 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "me_cmp_mips.h"
+
+static uint32_t sad_8width_msa(uint8_t *src, int32_t src_stride,
+                               uint8_t *ref, int32_t ref_stride,
+                               int32_t height)
+{
+    int32_t ht_cnt;
+    v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
+    v8u16 sad = { 0 };
+
+    for (ht_cnt = (height >> 2); ht_cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
+        ref += (4 * ref_stride);
+
+        PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2,
+                    src0, src1, ref0, ref1);
+        sad += SAD_UB2_UH(src0, src1, ref0, ref1);
+    }
+
+    return (HADD_UH_U32(sad));
+}
+
+static uint32_t sad_16width_msa(uint8_t *src, int32_t src_stride,
+                                uint8_t *ref, int32_t ref_stride,
+                                int32_t height)
+{
+    int32_t ht_cnt;
+    v16u8 src0, src1, ref0, ref1;
+    v8u16 sad = { 0 };
+
+    for (ht_cnt = (height >> 2); ht_cnt--;) {
+        LD_UB2(src, src_stride, src0, src1);
+        src += (2 * src_stride);
+        LD_UB2(ref, ref_stride, ref0, ref1);
+        ref += (2 * ref_stride);
+        sad += SAD_UB2_UH(src0, src1, ref0, ref1);
+
+        LD_UB2(src, src_stride, src0, src1);
+        src += (2 * src_stride);
+        LD_UB2(ref, ref_stride, ref0, ref1);
+        ref += (2 * ref_stride);
+        sad += SAD_UB2_UH(src0, src1, ref0, ref1);
+    }
+
+    return (HADD_UH_U32(sad));
+}
+
+static uint32_t sad_horiz_bilinear_filter_8width_msa(uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *ref,
+                                                     int32_t ref_stride,
+                                                     int32_t height)
+{
+    int32_t ht_cnt;
+    v16u8 src0, src1, src2, src3, comp0, comp1;
+    v16u8 ref0, ref1, ref2, ref3, ref4, ref5;
+    v8u16 sad = { 0 };
+
+    for (ht_cnt = (height >> 3); ht_cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
+        ref += (4 * ref_stride);
+
+        PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
+        PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref4, ref5);
+        SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1);
+        SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1);
+        PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
+        AVER_UB2_UB(ref4, ref0, ref5, ref1, comp0, comp1);
+        sad += SAD_UB2_UH(src0, src1, comp0, comp1);
+
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
+        ref += (4 * ref_stride);
+
+        PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
+        PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref4, ref5);
+        SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1);
+        SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1);
+        PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
+        AVER_UB2_UB(ref4, ref0, ref5, ref1, comp0, comp1);
+        sad += SAD_UB2_UH(src0, src1, comp0, comp1);
+    }
+
+    return (HADD_UH_U32(sad));
+}
+
+static uint32_t sad_horiz_bilinear_filter_16width_msa(uint8_t *src,
+                                                      int32_t src_stride,
+                                                      uint8_t *ref,
+                                                      int32_t ref_stride,
+                                                      int32_t height)
+{
+    int32_t ht_cnt;
+    v16u8 src0, src1, src2, src3, comp0, comp1;
+    v16u8 ref00, ref10, ref20, ref30, ref01, ref11, ref21, ref31;
+    v8u16 sad = { 0 };
+
+    for (ht_cnt = (height >> 3); ht_cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(ref, ref_stride, ref00, ref10, ref20, ref30);
+        LD_UB4(ref + 1, ref_stride, ref01, ref11, ref21, ref31);
+        ref += (4 * ref_stride);
+
+        AVER_UB2_UB(ref01, ref00, ref11, ref10, comp0, comp1);
+        sad += SAD_UB2_UH(src0, src1, comp0, comp1);
+        AVER_UB2_UB(ref21, ref20, ref31, ref30, comp0, comp1);
+        sad += SAD_UB2_UH(src2, src3, comp0, comp1);
+
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(ref, ref_stride, ref00, ref10, ref20, ref30);
+        LD_UB4(ref + 1, ref_stride, ref01, ref11, ref21, ref31);
+        ref += (4 * ref_stride);
+
+        AVER_UB2_UB(ref01, ref00, ref11, ref10, comp0, comp1);
+        sad += SAD_UB2_UH(src0, src1, comp0, comp1);
+        AVER_UB2_UB(ref21, ref20, ref31, ref30, comp0, comp1);
+        sad += SAD_UB2_UH(src2, src3, comp0, comp1);
+    }
+
+    return (HADD_UH_U32(sad));
+}
+
+static uint32_t sad_vert_bilinear_filter_8width_msa(uint8_t *src,
+                                                    int32_t src_stride,
+                                                    uint8_t *ref,
+                                                    int32_t ref_stride,
+                                                    int32_t height)
+{
+    int32_t ht_cnt;
+    v16u8 src0, src1, src2, src3, comp0, comp1;
+    v16u8 ref0, ref1, ref2, ref3, ref4;
+    v8u16 sad = { 0 };
+
+    for (ht_cnt = (height >> 3); ht_cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB5(ref, ref_stride, ref0, ref1, ref2, ref3, ref4);
+        ref += (4 * ref_stride);
+
+        PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
+        PCKEV_D2_UB(ref1, ref0, ref2, ref1, ref0, ref1);
+        PCKEV_D2_UB(ref3, ref2, ref4, ref3, ref2, ref3);
+        AVER_UB2_UB(ref1, ref0, ref3, ref2, comp0, comp1);
+        sad += SAD_UB2_UH(src0, src1, comp0, comp1);
+
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB5(ref, ref_stride, ref0, ref1, ref2, ref3, ref4);
+        ref += (4 * ref_stride);
+
+        PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
+        PCKEV_D2_UB(ref1, ref0, ref2, ref1, ref0, ref1);
+        PCKEV_D2_UB(ref3, ref2, ref4, ref3, ref2, ref3);
+        AVER_UB2_UB(ref1, ref0, ref3, ref2, comp0, comp1);
+        sad += SAD_UB2_UH(src0, src1, comp0, comp1);
+    }
+
+    return (HADD_UH_U32(sad));
+}
+
+static uint32_t sad_vert_bilinear_filter_16width_msa(uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *ref,
+                                                     int32_t ref_stride,
+                                                     int32_t height)
+{
+    int32_t ht_cnt;
+    v16u8 src0, src1, src2, src3, comp0, comp1;
+    v16u8 ref0, ref1, ref2, ref3, ref4;
+    v8u16 sad = { 0 };
+
+    for (ht_cnt = (height >> 3); ht_cnt--;) {
+        LD_UB5(ref, ref_stride, ref4, ref0, ref1, ref2, ref3);
+        ref += (5 * ref_stride);
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        AVER_UB2_UB(ref0, ref4, ref1, ref0, comp0, comp1);
+        sad += SAD_UB2_UH(src0, src1, comp0, comp1);
+        AVER_UB2_UB(ref2, ref1, ref3, ref2, comp0, comp1);
+        sad += SAD_UB2_UH(src2, src3, comp0, comp1);
+
+        ref4 = ref3;
+
+        LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
+        ref += (3 * ref_stride);
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        AVER_UB2_UB(ref0, ref4, ref1, ref0, comp0, comp1);
+        sad += SAD_UB2_UH(src0, src1, comp0, comp1);
+        AVER_UB2_UB(ref2, ref1, ref3, ref2, comp0, comp1);
+        sad += SAD_UB2_UH(src2, src3, comp0, comp1);
+    }
+
+    return (HADD_UH_U32(sad));
+}
+
+static uint32_t sad_hv_bilinear_filter_8width_msa(uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *ref,
+                                                  int32_t ref_stride,
+                                                  int32_t height)
+{
+    int32_t ht_cnt;
+    v16u8 src0, src1, src2, src3, temp0, temp1, diff;
+    v16u8 ref0, ref1, ref2, ref3, ref4;
+    v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v8u16 comp0, comp1, comp2, comp3;
+    v8u16 sad = { 0 };
+
+    for (ht_cnt = (height >> 2); ht_cnt--;) {
+        LD_UB5(ref, ref_stride, ref4, ref0, ref1, ref2, ref3);
+        ref += (4 * ref_stride);
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
+
+        VSHF_B2_UB(ref4, ref4, ref0, ref0, mask, mask, temp0, temp1);
+        comp0 = __msa_hadd_u_h(temp0, temp0);
+        comp1 = __msa_hadd_u_h(temp1, temp1);
+        comp0 += comp1;
+        comp0 = (v8u16) __msa_srari_h((v8i16) comp0, 2);
+        comp0 = (v8u16) __msa_pckev_b((v16i8) comp0, (v16i8) comp0);
+
+        temp0 = (v16u8) __msa_vshf_b(mask, (v16i8) ref1, (v16i8) ref1);
+        comp2 = __msa_hadd_u_h(temp0, temp0);
+        comp1 += comp2;
+        comp1 = (v8u16) __msa_srari_h((v8i16) comp1, 2);
+        comp1 = (v8u16) __msa_pckev_b((v16i8) comp1, (v16i8) comp1);
+        comp1 = (v8u16) __msa_pckev_d((v2i64) comp1, (v2i64) comp0);
+        diff = (v16u8) __msa_asub_u_b(src0, (v16u8) comp1);
+        sad += __msa_hadd_u_h(diff, diff);
+
+        temp1 = (v16u8) __msa_vshf_b(mask, (v16i8) ref2, (v16i8) ref2);
+        comp3 = __msa_hadd_u_h(temp1, temp1);
+        comp2 += comp3;
+        comp2 = (v8u16) __msa_srari_h((v8i16) comp2, 2);
+        comp2 = (v8u16) __msa_pckev_b((v16i8) comp2, (v16i8) comp2);
+
+        temp0 = (v16u8) __msa_vshf_b(mask, (v16i8) ref3, (v16i8) ref3);
+        comp0 = __msa_hadd_u_h(temp0, temp0);
+        comp3 += comp0;
+        comp3 = (v8u16) __msa_srari_h((v8i16) comp3, 2);
+        comp3 = (v8u16) __msa_pckev_b((v16i8) comp3, (v16i8) comp3);
+        comp3 = (v8u16) __msa_pckev_d((v2i64) comp3, (v2i64) comp2);
+        diff = (v16u8) __msa_asub_u_b(src1, (v16u8) comp3);
+        sad += __msa_hadd_u_h(diff, diff);
+    }
+
+    return (HADD_UH_U32(sad));
+}
+
+static uint32_t sad_hv_bilinear_filter_16width_msa(uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *ref,
+                                                   int32_t ref_stride,
+                                                   int32_t height)
+{
+    int32_t ht_cnt;
+    v16u8 src0, src1, src2, src3, comp, diff;
+    v16u8 temp0, temp1, temp2, temp3;
+    v16u8 ref00, ref01, ref02, ref03, ref04, ref10, ref11, ref12, ref13, ref14;
+    v8u16 comp0, comp1, comp2, comp3;
+    v8u16 sad = { 0 };
+
+    for (ht_cnt = (height >> 3); ht_cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB5(ref, ref_stride, ref04, ref00, ref01, ref02, ref03);
+        LD_UB5(ref + 1, ref_stride, ref14, ref10, ref11, ref12, ref13);
+        ref += (5 * ref_stride);
+
+        ILVRL_B2_UB(ref14, ref04, temp0, temp1);
+        comp0 = __msa_hadd_u_h(temp0, temp0);
+        comp1 = __msa_hadd_u_h(temp1, temp1);
+        ILVRL_B2_UB(ref10, ref00, temp2, temp3);
+        comp2 = __msa_hadd_u_h(temp2, temp2);
+        comp3 = __msa_hadd_u_h(temp3, temp3);
+        comp0 += comp2;
+        comp1 += comp3;
+        SRARI_H2_UH(comp0, comp1, 2);
+        comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
+        diff = __msa_asub_u_b(src0, comp);
+        sad += __msa_hadd_u_h(diff, diff);
+
+        ILVRL_B2_UB(ref11, ref01, temp0, temp1);
+        comp0 = __msa_hadd_u_h(temp0, temp0);
+        comp1 = __msa_hadd_u_h(temp1, temp1);
+        comp2 += comp0;
+        comp3 += comp1;
+        SRARI_H2_UH(comp2, comp3, 2);
+        comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
+        diff = __msa_asub_u_b(src1, comp);
+        sad += __msa_hadd_u_h(diff, diff);
+
+        ILVRL_B2_UB(ref12, ref02, temp2, temp3);
+        comp2 = __msa_hadd_u_h(temp2, temp2);
+        comp3 = __msa_hadd_u_h(temp3, temp3);
+        comp0 += comp2;
+        comp1 += comp3;
+        SRARI_H2_UH(comp0, comp1, 2);
+        comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
+        diff = __msa_asub_u_b(src2, comp);
+        sad += __msa_hadd_u_h(diff, diff);
+
+        ILVRL_B2_UB(ref13, ref03, temp0, temp1);
+        comp0 = __msa_hadd_u_h(temp0, temp0);
+        comp1 = __msa_hadd_u_h(temp1, temp1);
+        comp2 += comp0;
+        comp3 += comp1;
+        SRARI_H2_UH(comp2, comp3, 2);
+        comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
+        diff = __msa_asub_u_b(src3, comp);
+        sad += __msa_hadd_u_h(diff, diff);
+
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(ref, ref_stride, ref00, ref01, ref02, ref03);
+        LD_UB4(ref + 1, ref_stride, ref10, ref11, ref12, ref13);
+        ref += (3 * ref_stride);
+
+        ILVRL_B2_UB(ref10, ref00, temp2, temp3);
+        comp2 = __msa_hadd_u_h(temp2, temp2);
+        comp3 = __msa_hadd_u_h(temp3, temp3);
+        comp0 += comp2;
+        comp1 += comp3;
+        SRARI_H2_UH(comp0, comp1, 2);
+        comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
+        diff = __msa_asub_u_b(src0, comp);
+        sad += __msa_hadd_u_h(diff, diff);
+
+        ILVRL_B2_UB(ref11, ref01, temp0, temp1);
+        comp0 = __msa_hadd_u_h(temp0, temp0);
+        comp1 = __msa_hadd_u_h(temp1, temp1);
+        comp2 += comp0;
+        comp3 += comp1;
+        SRARI_H2_UH(comp2, comp3, 2);
+        comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
+        diff = __msa_asub_u_b(src1, comp);
+        sad += __msa_hadd_u_h(diff, diff);
+
+        ILVRL_B2_UB(ref12, ref02, temp2, temp3);
+        comp2 = __msa_hadd_u_h(temp2, temp2);
+        comp3 = __msa_hadd_u_h(temp3, temp3);
+        comp0 += comp2;
+        comp1 += comp3;
+        SRARI_H2_UH(comp0, comp1, 2);
+        comp = (v16u8) __msa_pckev_b((v16i8) comp1, (v16i8) comp0);
+        diff = __msa_asub_u_b(src2, comp);
+        sad += __msa_hadd_u_h(diff, diff);
+
+        ILVRL_B2_UB(ref13, ref03, temp0, temp1);
+        comp0 = __msa_hadd_u_h(temp0, temp0);
+        comp1 = __msa_hadd_u_h(temp1, temp1);
+        comp2 += comp0;
+        comp3 += comp1;
+        SRARI_H2_UH(comp2, comp3, 2);
+        comp = (v16u8) __msa_pckev_b((v16i8) comp3, (v16i8) comp2);
+        diff = __msa_asub_u_b(src3, comp);
+        sad += __msa_hadd_u_h(diff, diff);
+    }
+
+    return (HADD_UH_U32(sad));
+}
+
+#define CALC_MSE_B(src, ref, var)                                    \
+{                                                                    \
+    v16u8 src_l0_m, src_l1_m;                                        \
+    v8i16 res_l0_m, res_l1_m;                                        \
+                                                                     \
+    ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m);                       \
+    HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m);             \
+    DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var);  \
+}
+
+static uint32_t sse_4width_msa(uint8_t *src_ptr, int32_t src_stride,
+                               uint8_t *ref_ptr, int32_t ref_stride,
+                               int32_t height)
+{
+    int32_t ht_cnt;
+    uint32_t sse;
+    uint32_t src0, src1, src2, src3;
+    uint32_t ref0, ref1, ref2, ref3;
+    v16u8 src = { 0 };
+    v16u8 ref = { 0 };
+    v4i32 var = { 0 };
+
+    for (ht_cnt = (height >> 2); ht_cnt--;) {
+        LW4(src_ptr, src_stride, src0, src1, src2, src3);
+        src_ptr += (4 * src_stride);
+        LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
+        ref_ptr += (4 * ref_stride);
+
+        INSERT_W4_UB(src0, src1, src2, src3, src);
+        INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
+        CALC_MSE_B(src, ref, var);
+    }
+
+    sse = HADD_SW_S32(var);
+
+    return sse;
+}
+
+static uint32_t sse_8width_msa(uint8_t *src_ptr, int32_t src_stride,
+                               uint8_t *ref_ptr, int32_t ref_stride,
+                               int32_t height)
+{
+    int32_t ht_cnt;
+    uint32_t sse;
+    v16u8 src0, src1, src2, src3;
+    v16u8 ref0, ref1, ref2, ref3;
+    v4i32 var = { 0 };
+
+    for (ht_cnt = (height >> 2); ht_cnt--;) {
+        LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
+        src_ptr += (4 * src_stride);
+        LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
+        ref_ptr += (4 * ref_stride);
+
+        PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2,
+                    src0, src1, ref0, ref1);
+        CALC_MSE_B(src0, ref0, var);
+        CALC_MSE_B(src1, ref1, var);
+    }
+
+    sse = HADD_SW_S32(var);
+
+    return sse;
+}
+
+static uint32_t sse_16width_msa(uint8_t *src_ptr, int32_t src_stride,
+                                uint8_t *ref_ptr, int32_t ref_stride,
+                                int32_t height)
+{
+    int32_t ht_cnt;
+    uint32_t sse;
+    v16u8 src, ref;
+    v4i32 var = { 0 };
+
+    for (ht_cnt = (height >> 2); ht_cnt--;) {
+        src = LD_UB(src_ptr);
+        src_ptr += src_stride;
+        ref = LD_UB(ref_ptr);
+        ref_ptr += ref_stride;
+        CALC_MSE_B(src, ref, var);
+
+        src = LD_UB(src_ptr);
+        src_ptr += src_stride;
+        ref = LD_UB(ref_ptr);
+        ref_ptr += ref_stride;
+        CALC_MSE_B(src, ref, var);
+
+        src = LD_UB(src_ptr);
+        src_ptr += src_stride;
+        ref = LD_UB(ref_ptr);
+        ref_ptr += ref_stride;
+        CALC_MSE_B(src, ref, var);
+
+        src = LD_UB(src_ptr);
+        src_ptr += src_stride;
+        ref = LD_UB(ref_ptr);
+        ref_ptr += ref_stride;
+        CALC_MSE_B(src, ref, var);
+    }
+
+    sse = HADD_SW_S32(var);
+
+    return sse;
+}
+
+static int32_t hadamard_diff_8x8_msa(uint8_t *src, int32_t src_stride,
+                                     uint8_t *ref, int32_t ref_stride)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
+    v8u16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
+    v8u16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+    v8i16 sum = { 0 };
+    v8i16 zero = { 0 };
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7);
+    ILVR_B8_UH(src0, ref0, src1, ref1, src2, ref2, src3, ref3,
+               src4, ref4, src5, ref5, src6, ref6, src7, ref7,
+               diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7);
+    HSUB_UB4_UH(diff0, diff1, diff2, diff3, diff0, diff1, diff2, diff3);
+    HSUB_UB4_UH(diff4, diff5, diff6, diff7, diff4, diff5, diff6, diff7);
+    TRANSPOSE8x8_UH_UH(diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7,
+                       diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7);
+    BUTTERFLY_8(diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1,
+                temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1);
+    BUTTERFLY_8(temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2,
+                diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2);
+    BUTTERFLY_8(diff0, diff1, diff2, diff3, diff7, diff6, diff5, diff4,
+                temp0, temp1, temp2, temp3, temp7, temp6, temp5, temp4);
+    TRANSPOSE8x8_UH_UH(temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7,
+                       temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7);
+    BUTTERFLY_8(temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1,
+                diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1);
+    BUTTERFLY_8(diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2,
+                temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2);
+    ADD4(temp0, temp4, temp1, temp5, temp2, temp6, temp3, temp7,
+         diff0, diff1, diff2, diff3);
+    sum = __msa_asub_s_h((v8i16) temp3, (v8i16) temp7);
+    sum += __msa_asub_s_h((v8i16) temp2, (v8i16) temp6);
+    sum += __msa_asub_s_h((v8i16) temp1, (v8i16) temp5);
+    sum += __msa_asub_s_h((v8i16) temp0, (v8i16) temp4);
+    sum += __msa_add_a_h((v8i16) diff0, zero);
+    sum += __msa_add_a_h((v8i16) diff1, zero);
+    sum += __msa_add_a_h((v8i16) diff2, zero);
+    sum += __msa_add_a_h((v8i16) diff3, zero);
+
+    return (HADD_UH_U32(sum));
+}
+
+static int32_t hadamard_intra_8x8_msa(uint8_t *src, int32_t src_stride,
+                                      uint8_t *ref, int32_t ref_stride)
+{
+    int32_t sum_res = 0;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v8u16 diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
+    v8u16 temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+    v8i16 sum = { 0 };
+    v16i8 zero = { 0 };
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    TRANSPOSE8x8_UB_UB(src0, src1, src2, src3, src4, src5, src6, src7,
+                       src0, src1, src2, src3, src4, src5, src6, src7);
+    ILVR_B8_UH(zero, src0, zero, src1, zero, src2, zero, src3,
+               zero, src4, zero, src5, zero, src6, zero, src7,
+               diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7);
+    BUTTERFLY_8(diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1,
+                temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1);
+    BUTTERFLY_8(temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2,
+                diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2);
+    BUTTERFLY_8(diff0, diff1, diff2, diff3, diff7, diff6, diff5, diff4,
+                temp0, temp1, temp2, temp3, temp7, temp6, temp5, temp4);
+    TRANSPOSE8x8_UH_UH(temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7,
+                       temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7);
+    BUTTERFLY_8(temp0, temp2, temp4, temp6, temp7, temp5, temp3, temp1,
+                diff0, diff2, diff4, diff6, diff7, diff5, diff3, diff1);
+    BUTTERFLY_8(diff0, diff1, diff4, diff5, diff7, diff6, diff3, diff2,
+                temp0, temp1, temp4, temp5, temp7, temp6, temp3, temp2);
+    ADD4(temp0, temp4, temp1, temp5, temp2, temp6, temp3, temp7,
+         diff0, diff1, diff2, diff3);
+    sum = __msa_asub_s_h((v8i16) temp3, (v8i16) temp7);
+    sum += __msa_asub_s_h((v8i16) temp2, (v8i16) temp6);
+    sum += __msa_asub_s_h((v8i16) temp1, (v8i16) temp5);
+    sum += __msa_asub_s_h((v8i16) temp0, (v8i16) temp4);
+    sum += __msa_add_a_h((v8i16) diff0, (v8i16) zero);
+    sum += __msa_add_a_h((v8i16) diff1, (v8i16) zero);
+    sum += __msa_add_a_h((v8i16) diff2, (v8i16) zero);
+    sum += __msa_add_a_h((v8i16) diff3, (v8i16) zero);
+    sum_res = (HADD_UH_U32(sum));
+    sum_res -= abs(temp0[0] + temp4[0]);
+
+    return sum_res;
+}
+
+int ff_pix_abs16_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref,
+                     ptrdiff_t stride, int height)
+{
+    return sad_16width_msa(src, stride, ref, stride, height);
+}
+
+int ff_pix_abs8_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref,
+                    ptrdiff_t stride, int height)
+{
+    return sad_8width_msa(src, stride, ref, stride, height);
+}
+
+int ff_pix_abs16_x2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                        ptrdiff_t stride, int h)
+{
+    return sad_horiz_bilinear_filter_16width_msa(pix1, stride, pix2, stride, h);
+}
+
+int ff_pix_abs16_y2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                        ptrdiff_t stride, int h)
+{
+    return sad_vert_bilinear_filter_16width_msa(pix1, stride, pix2, stride, h);
+}
+
+int ff_pix_abs16_xy2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                         ptrdiff_t stride, int h)
+{
+    return sad_hv_bilinear_filter_16width_msa(pix1, stride, pix2, stride, h);
+}
+
+int ff_pix_abs8_x2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                       ptrdiff_t stride, int h)
+{
+    return sad_horiz_bilinear_filter_8width_msa(pix1, stride, pix2, stride, h);
+}
+
+int ff_pix_abs8_y2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                       ptrdiff_t stride, int h)
+{
+    return sad_vert_bilinear_filter_8width_msa(pix1, stride, pix2, stride, h);
+}
+
+int ff_pix_abs8_xy2_msa(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                        ptrdiff_t stride, int h)
+{
+    return sad_hv_bilinear_filter_8width_msa(pix1, stride, pix2, stride, h);
+}
+
+int ff_sse16_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref,
+                 ptrdiff_t stride, int height)
+{
+    return sse_16width_msa(src, stride, ref, stride, height);
+}
+
+int ff_sse8_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref,
+                ptrdiff_t stride, int height)
+{
+    return sse_8width_msa(src, stride, ref, stride, height);
+}
+
+int ff_sse4_msa(MpegEncContext *v, uint8_t *src, uint8_t *ref,
+                ptrdiff_t stride, int height)
+{
+    return sse_4width_msa(src, stride, ref, stride, height);
+}
+
+int ff_hadamard8_diff8x8_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src,
+                             ptrdiff_t stride, int h)
+{
+    return hadamard_diff_8x8_msa(src, stride, dst, stride);
+}
+
+int ff_hadamard8_intra8x8_msa(MpegEncContext *s, uint8_t *dst, uint8_t *src,
+                              ptrdiff_t stride, int h)
+{
+    return hadamard_intra_8x8_msa(src, stride, dst, stride);
+}
+
+/* Hadamard Transform functions */
+#define WRAPPER8_16_SQ(name8, name16)                      \
+int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src,  \
+           ptrdiff_t stride, int h)                        \
+{                                                          \
+    int score = 0;                                         \
+    score += name8(s, dst, src, stride, 8);                \
+    score += name8(s, dst + 8, src + 8, stride, 8);        \
+    if(h == 16) {                                          \
+        dst += 8 * stride;                                 \
+        src += 8 * stride;                                 \
+        score +=name8(s, dst, src, stride, 8);             \
+        score +=name8(s, dst + 8, src + 8, stride, 8);     \
+    }                                                      \
+    return score;                                          \
+}
+
+WRAPPER8_16_SQ(ff_hadamard8_diff8x8_msa, ff_hadamard8_diff16_msa);
+WRAPPER8_16_SQ(ff_hadamard8_intra8x8_msa, ff_hadamard8_intra16_msa);
diff --git a/libavcodec/mips/mpegaudiodsp_mips_fixed.c b/libavcodec/mips/mpegaudiodsp_mips_fixed.c
index 86ea13d8..ed8c8908 100644
--- a/libavcodec/mips/mpegaudiodsp_mips_fixed.c
+++ b/libavcodec/mips/mpegaudiodsp_mips_fixed.c
@@ -57,6 +57,9 @@
 #include "libavutil/mips/asmdefs.h"
 #include "libavcodec/mpegaudiodsp.h"
 
+#if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
+
 static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *window,
                                int *dither_state, int16_t *samples, int incr)
 {
@@ -901,8 +904,15 @@ static void ff_imdct36_blocks_mips_fixed(int *out, int *buf, int *in,
     }
 }
 
-void ff_mpadsp_init_mipsdspr1(MPADSPContext *s)
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
+#endif /* HAVE_INLINE_ASM */
+
+void ff_mpadsp_init_mipsdsp(MPADSPContext *s)
 {
+#if HAVE_INLINE_ASM
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     s->apply_window_fixed   = ff_mpadsp_apply_window_mips_fixed;
     s->imdct36_blocks_fixed = ff_imdct36_blocks_mips_fixed;
+#endif
+#endif
 }
diff --git a/libavcodec/mips/mpegaudiodsp_mips_float.c b/libavcodec/mips/mpegaudiodsp_mips_float.c
index beebace5..270838eb 100644
--- a/libavcodec/mips/mpegaudiodsp_mips_float.c
+++ b/libavcodec/mips/mpegaudiodsp_mips_float.c
@@ -58,6 +58,9 @@
 #include "libavutil/mips/asmdefs.h"
 #include "libavcodec/mpegaudiodsp.h"
 
+#if HAVE_INLINE_ASM && HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
+
 static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window,
                                int *dither_state, float *samples, int incr)
 {
@@ -278,7 +281,6 @@ static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window,
     );
 }
 
-#if !HAVE_LOONGSON3
 static void ff_dct32_mips_float(float *out, const float *tab)
 {
     float val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7,
@@ -787,7 +789,6 @@ static void ff_dct32_mips_float(float *out, const float *tab)
     out[15] = val30 + val17;
     out[31] = val31;
 }
-#endif /* !HAVE_LOONGSON3 */
 
 static void imdct36_mips_float(float *out, float *buf, float *in, float *win)
 {
@@ -1226,7 +1227,6 @@ static void imdct36_mips_float(float *out, float *buf, float *in, float *win)
     );
 }
 
-#if !HAVE_LOONGSON3
 static void ff_imdct36_blocks_mips_float(float *out, float *buf, float *in,
                                int count, int switch_point, int block_type)
 {
@@ -1245,13 +1245,17 @@ static void ff_imdct36_blocks_mips_float(float *out, float *buf, float *in,
         out++;
     }
 }
-#endif /* !HAVE_LOONGSON3 */
+
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
+#endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
 
 void ff_mpadsp_init_mipsfpu(MPADSPContext *s)
 {
+#if HAVE_INLINE_ASM && HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     s->apply_window_float   = ff_mpadsp_apply_window_mips_float;
-#if !HAVE_LOONGSON3
     s->imdct36_blocks_float = ff_imdct36_blocks_mips_float;
     s->dct32_float          = ff_dct32_mips_float;
-#endif /* !HAVE_LOONGSON3 */
+#endif
+#endif
 }
diff --git a/libavcodec/mips/mpegvideo_init_mips.c b/libavcodec/mips/mpegvideo_init_mips.c
new file mode 100644
index 00000000..1918da5f
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_init_mips.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h263dsp_mips.h"
+#include "mpegvideo_mips.h"
+
+#if HAVE_MSA
+static av_cold void dct_unquantize_init_msa(MpegEncContext *s)
+{
+    s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_msa;
+    s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_msa;
+    if (!s->q_scale_type)
+        s->dct_unquantize_mpeg2_inter = ff_dct_unquantize_mpeg2_inter_msa;
+}
+#endif  // #if HAVE_MSA
+
+#if HAVE_MMI
+static av_cold void dct_unquantize_init_mmi(MpegEncContext *s)
+{
+    s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_mmi;
+    s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_mmi;
+    s->dct_unquantize_mpeg1_intra = ff_dct_unquantize_mpeg1_intra_mmi;
+    s->dct_unquantize_mpeg1_inter = ff_dct_unquantize_mpeg1_inter_mmi;
+
+    if (!(s->avctx->flags & AV_CODEC_FLAG_BITEXACT))
+        if (!s->q_scale_type)
+            s->dct_unquantize_mpeg2_intra = ff_dct_unquantize_mpeg2_intra_mmi;
+
+    s->denoise_dct= ff_denoise_dct_mmi;
+}
+#endif /* HAVE_MMI */
+
+av_cold void ff_mpv_common_init_mips(MpegEncContext *s)
+{
+#if HAVE_MSA
+    dct_unquantize_init_msa(s);
+#endif  // #if HAVE_MSA
+#if HAVE_MMI
+    dct_unquantize_init_mmi(s);
+#endif /* HAVE_MMI */
+}
diff --git a/libavcodec/mips/mpegvideo_mips.h b/libavcodec/mips/mpegvideo_mips.h
new file mode 100644
index 00000000..760d7b32
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_mips.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_MPEGVIDEO_MIPS_H
+#define AVCODEC_MIPS_MPEGVIDEO_MIPS_H
+
+#include "libavcodec/mpegvideo.h"
+
+void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
+        int n, int qscale);
+void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
+        int n, int qscale);
+void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block,
+        int n, int qscale);
+void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
+        int n, int qscale);
+void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block,
+        int n, int qscale);
+void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block);
+
+#endif /* AVCODEC_MIPS_MPEGVIDEO_MIPS_H */
diff --git a/libavcodec/mips/mpegvideo_mmi.c b/libavcodec/mips/mpegvideo_mmi.c
new file mode 100644
index 00000000..94781e6e
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_mmi.c
@@ -0,0 +1,443 @@
+/*
+ * Loongson SIMD optimized mpegvideo
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mpegvideo_mips.h"
+
+void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block,
+        int n, int qscale)
+{
+    int64_t level, qmul, qadd, nCoeffs;
+
+    qmul = qscale << 1;
+    av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
+
+    if (!s->h263_aic) {
+        if (n<4)
+            level = block[0] * s->y_dc_scale;
+        else
+            level = block[0] * s->c_dc_scale;
+        qadd = (qscale-1) | 1;
+    } else {
+        qadd = 0;
+        level = block[0];
+    }
+
+    if(s->ac_pred)
+        nCoeffs = 63;
+    else
+        nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
+
+    __asm__ volatile (
+        "xor $f12, $f12, $f12           \r\n"
+        "lwc1 $f12, %1                  \n\r"
+        "xor $f10, $f10, $f10           \r\n"
+        "lwc1 $f10, %2                  \r\n"
+        "xor $f14, $f14, $f14           \r\n"
+        "packsswh $f12, $f12, $f12      \r\n"
+        "packsswh $f12, $f12, $f12      \r\n"
+        "packsswh $f10, $f10, $f10      \r\n"
+        "packsswh $f10, $f10, $f10      \r\n"
+        "psubh $f14, $f14, $f10         \r\n"
+        "xor $f8, $f8, $f8              \r\n"
+        ".p2align 4                     \r\n"
+        "1:                             \r\n"
+        "daddu $8, %0, %3               \r\n"
+        "gsldlc1 $f0, 7($8)             \r\n"
+        "gsldrc1 $f0, 0($8)             \r\n"
+        "gsldlc1 $f2, 15($8)            \r\n"
+        "gsldrc1 $f2, 8($8)             \r\n"
+        "mov.d $f4, $f0                 \r\n"
+        "mov.d $f6, $f2                 \r\n"
+        "pmullh $f0, $f0, $f12          \r\n"
+        "pmullh $f2, $f2, $f12          \r\n"
+        "pcmpgth $f4, $f4, $f8          \r\n"
+        "pcmpgth $f6, $f6, $f8          \r\n"
+        "xor $f0, $f0, $f4              \r\n"
+        "xor $f2, $f2, $f6              \r\n"
+        "paddh $f0, $f0, $f14           \r\n"
+        "paddh $f2, $f2, $f14           \r\n"
+        "xor $f4, $f4, $f0              \r\n"
+        "xor $f6, $f6, $f2              \r\n"
+        "pcmpeqh $f0, $f0, $f14         \r\n"
+        "pcmpeqh $f2, $f2, $f14         \r\n"
+        "pandn $f0, $f0, $f4            \r\n"
+        "pandn $f2, $f2, $f6            \r\n"
+        "gssdlc1 $f0, 7($8)             \r\n"
+        "gssdrc1 $f0, 0($8)             \r\n"
+        "gssdlc1 $f2, 15($8)            \r\n"
+        "gssdrc1 $f2, 8($8)             \r\n"
+        "addi %3, %3, 16                \r\n"
+        "blez %3, 1b                    \r\n"
+        ::"r"(block+nCoeffs),"m"(qmul),"m"(qadd),"r"(2*(-nCoeffs))
+        :"$8","memory"
+    );
+
+    block[0] = level;
+}
+
+void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block,
+        int n, int qscale)
+{
+    int64_t qmul, qadd, nCoeffs;
+
+    qmul = qscale << 1;
+    qadd = (qscale - 1) | 1;
+    av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
+    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
+
+    __asm__ volatile (
+        "xor $f12, $f12, $f12           \r\n"
+        "lwc1 $f12, %1                  \r\n"
+        "xor $f10, $f10, $f10           \r\n"
+        "lwc1 $f10, %2                  \r\n"
+        "packsswh $f12, $f12, $f12      \r\n"
+        "packsswh $f12, $f12, $f12      \r\n"
+        "xor $f14, $f14, $f14           \r\n"
+        "packsswh $f10, $f10, $f10      \r\n"
+        "packsswh $f10, $f10, $f10      \r\n"
+        "psubh $f14, $f14, $f10         \r\n"
+        "xor $f8, $f8, $f8              \r\n"
+        ".p2align 4                     \r\n"
+        "1:                             \r\n"
+        "daddu $8, %0, %3               \r\n"
+        "gsldlc1 $f0, 7($8)             \r\n"
+        "gsldrc1 $f0, 0($8)             \r\n"
+        "gsldlc1 $f2, 15($8)            \r\n"
+        "gsldrc1 $f2, 8($8)             \r\n"
+        "mov.d $f4, $f0                 \r\n"
+        "mov.d $f6, $f2                 \r\n"
+        "pmullh $f0, $f0, $f12          \r\n"
+        "pmullh $f2, $f2, $f12          \r\n"
+        "pcmpgth $f4, $f4, $f8          \r\n"
+        "pcmpgth $f6, $f6, $f8          \r\n"
+        "xor $f0, $f0, $f4              \r\n"
+        "xor $f2, $f2, $f6              \r\n"
+        "paddh $f0, $f0, $f14           \r\n"
+        "paddh $f2, $f2, $f14           \r\n"
+        "xor $f4, $f4, $f0              \r\n"
+        "xor $f6, $f6, $f2              \r\n"
+        "pcmpeqh $f0, $f0, $f14         \r\n"
+        "pcmpeqh $f2, $f2, $f14         \r\n"
+        "pandn $f0, $f0, $f4            \r\n"
+        "pandn $f2, $f2, $f6            \r\n"
+        "gssdlc1 $f0, 7($8)             \r\n"
+        "gssdrc1 $f0, 0($8)             \r\n"
+        "gssdlc1 $f2, 15($8)            \r\n"
+        "gssdrc1 $f2, 8($8)             \r\n"
+        "addi %3, %3, 16                \r\n"
+        "blez %3, 1b                    \r\n"
+        ::"r"(block+nCoeffs),"m"(qmul),"m"(qadd),"r"(2*(-nCoeffs))
+        : "$8","memory"
+    );
+}
+
+void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block,
+        int n, int qscale)
+{
+    int64_t nCoeffs;
+    const uint16_t *quant_matrix;
+    int block0;
+
+    av_assert2(s->block_last_index[n]>=0);
+    nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
+
+    if (n<4)
+        block0 = block[0] * s->y_dc_scale;
+    else
+        block0 = block[0] * s->c_dc_scale;
+
+    /* XXX: only mpeg1 */
+    quant_matrix = s->intra_matrix;
+
+    __asm__ volatile (
+        "pcmpeqh $f14, $f14, $f14       \r\n"
+        "dli $10, 15                    \r\n"
+        "dmtc1 $10, $f16                \r\n"
+        "xor $f12, $f12, $f12           \r\n"
+        "lwc1 $f12, %2                  \r\n"
+        "psrlh $f14, $f14, $f16         \r\n"
+        "packsswh $f12, $f12, $f12      \r\n"
+        "packsswh $f12, $f12, $f12      \r\n"
+        "or $8, %3, $0                  \r\n"
+        ".p2align 4                     \r\n"
+        "1:                             \r\n"
+        "gsldxc1 $f0, 0($8, %0)         \r\n"
+        "gsldxc1 $f2, 8($8, %0)         \r\n"
+        "mov.d $f16, $f0                \r\n"
+        "mov.d $f18, $f2                \r\n"
+        "gsldxc1 $f8, 0($8, %1)         \r\n"
+        "gsldxc1 $f10, 8($8, %1)        \r\n"
+        "pmullh $f8, $f8, $f12          \r\n"
+        "pmullh $f10, $f10, $f12        \r\n"
+        "xor $f4, $f4, $f4              \r\n"
+        "xor $f6, $f6, $f6              \r\n"
+        "pcmpgth $f4, $f4, $f0          \r\n"
+        "pcmpgth $f6, $f6, $f2          \r\n"
+        "xor $f0, $f0, $f4              \r\n"
+        "xor $f2, $f2, $f6              \r\n"
+        "psubh $f0, $f0, $f4            \r\n"
+        "psubh $f2, $f2, $f6            \r\n"
+        "pmullh $f0, $f0, $f8           \r\n"
+        "pmullh $f2, $f2, $f10          \r\n"
+        "xor $f8, $f8, $f8              \r\n"
+        "xor $f10, $f10, $f10           \r\n"
+        "pcmpeqh $f8, $f8, $f16         \r\n"
+        "pcmpeqh $f10, $f10, $f18       \r\n"
+        "dli $10, 3                     \r\n"
+        "dmtc1 $10, $f16                \r\n"
+        "psrah $f0, $f0, $f16           \r\n"
+        "psrah $f2, $f2, $f16           \r\n"
+        "psubh $f0, $f0, $f14           \r\n"
+        "psubh $f2, $f2, $f14           \r\n"
+        "or $f0, $f0, $f14              \r\n"
+        "or $f2, $f2, $f14              \r\n"
+        "xor $f0, $f0, $f4              \r\n"
+        "xor $f2, $f2, $f6              \r\n"
+        "psubh $f0, $f0, $f4            \r\n"
+        "psubh $f2, $f2, $f6            \r\n"
+        "pandn $f8, $f8, $f0            \r\n"
+        "pandn $f10, $f10, $f2          \r\n"
+        "gssdxc1 $f8, 0($8, %0)         \r\n"
+        "gssdxc1 $f10, 8($8, %0)        \r\n"
+        "addi $8, $8, 16                \r\n"
+        "bltz $8, 1b                    \r\n"
+        ::"r"(block+nCoeffs),"r"(quant_matrix+nCoeffs),"m"(qscale),
+          "g"(-2*nCoeffs)
+        : "$8","$10","memory"
+    );
+
+    block[0] = block0;
+}
+
+void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block,
+        int n, int qscale)
+{
+    int64_t nCoeffs;
+    const uint16_t *quant_matrix;
+
+    av_assert2(s->block_last_index[n] >= 0);
+    nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
+    quant_matrix = s->inter_matrix;
+
+    __asm__ volatile (
+        "pcmpeqh $f14, $f14, $f14       \r\n"
+        "dli $10, 15                    \r\n"
+        "dmtc1 $10, $f16                \r\n"
+        "xor $f12, $f12, $f12           \r\n"
+        "lwc1 $f12, %2                  \r\n"
+        "psrlh $f14, $f14, $f16         \r\n"
+        "packsswh $f12, $f12, $f12      \r\n"
+        "packsswh $f12, $f12, $f12      \r\n"
+        "or $8, %3, $0                  \r\n"
+        ".p2align 4                     \r\n"
+        "1:                             \r\n"
+        "gsldxc1 $f0, 0($8, %0)         \r\n"
+        "gsldxc1 $f2, 8($8, %0)         \r\n"
+        "mov.d $f16, $f0                \r\n"
+        "mov.d $f18, $f2                \r\n"
+        "gsldxc1 $f8, 0($8, %1)         \r\n"
+        "gsldxc1 $f10, 8($8, %1)        \r\n"
+        "pmullh $f8, $f8, $f12          \r\n"
+        "pmullh $f10, $f10, $f12        \r\n"
+        "xor $f4, $f4, $f4              \r\n"
+        "xor $f6, $f6, $f6              \r\n"
+        "pcmpgth $f4, $f4, $f0          \r\n"
+        "pcmpgth $f6, $f6, $f2          \r\n"
+        "xor $f0, $f0, $f4              \r\n"
+        "xor $f2, $f2, $f6              \r\n"
+        "psubh $f0, $f0, $f4            \r\n"
+        "psubh $f2, $f2, $f6            \r\n"
+        "paddh $f0, $f0, $f0            \r\n"
+        "paddh $f2, $f2, $f2            \r\n"
+        "paddh $f0, $f0, $f14           \r\n"
+        "paddh $f2, $f2, $f14           \r\n"
+        "pmullh $f0, $f0, $f8           \r\n"
+        "pmullh $f2, $f2, $f10          \r\n"
+        "xor $f8, $f8, $f8              \r\n"
+        "xor $f10, $f10, $f10           \r\n"
+        "pcmpeqh $f8, $f8, $f16         \r\n"
+        "pcmpeqh $f10, $f10, $f18       \r\n"
+        "dli $10, 4                     \r\n"
+        "dmtc1 $10, $f16                \r\n"
+        "psrah $f0, $f0, $f16           \r\n"
+        "psrah $f2, $f2, $f16           \r\n"
+        "psubh $f0, $f0, $f14           \r\n"
+        "psubh $f2, $f2, $f14           \r\n"
+        "or $f0, $f0, $f14              \r\n"
+        "or $f2, $f2, $f14              \r\n"
+        "xor $f0, $f0, $f4              \r\n"
+        "xor $f2, $f2, $f6              \r\n"
+        "psubh $f0, $f0, $f4            \r\n"
+        "psubh $f2, $f2, $f6            \r\n"
+        "pandn $f8, $f8, $f0            \r\n"
+        "pandn $f10, $f10, $f2          \r\n"
+        "gssdxc1 $f8, 0($8, %0)         \r\n"
+        "gssdxc1 $f10, 8($8, %0)        \r\n"
+        "addi $8, $8, 16                \r\n"
+        "bltz $8, 1b                    \r\n"
+        ::"r"(block+nCoeffs),"r"(quant_matrix+nCoeffs),"m"(qscale),
+          "g"(-2*nCoeffs)
+        :"$8","$10","memory"
+    );
+}
+
+void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block)
+{
+    const int intra = s->mb_intra;
+    int *sum = s->dct_error_sum[intra];
+    uint16_t *offset = s->dct_offset[intra];
+
+    s->dct_count[intra]++;
+
+    __asm__ volatile(
+        "xor $f14, $f14, $f14               \r\n"
+        "1:                                 \r\n"
+        "ldc1 $f4, 0(%[block])              \r\n"
+        "xor $f0, $f0, $f0                  \r\n"
+        "ldc1 $f6, 8(%[block])              \r\n"
+        "xor $f2, $f2, $f2                  \r\n"
+        "pcmpgth $f0, $f0, $f4              \r\n"
+        "pcmpgth $f2, $f2, $f6              \r\n"
+        "xor $f4, $f4, $f0                  \r\n"
+        "xor $f6, $f6, $f2                  \r\n"
+        "psubh $f4, $f4, $f0                \r\n"
+        "psubh $f6, $f6, $f2                \r\n"
+        "ldc1 $f12, 0(%[offset])            \r\n"
+        "mov.d $f8, $f4                     \r\n"
+        "psubush $f4, $f4, $f12             \r\n"
+        "ldc1 $f12, 8(%[offset])            \r\n"
+        "mov.d $f10, $f6                    \r\n"
+        "psubush $f6, $f6, $f12             \r\n"
+        "xor $f4, $f4, $f0                  \r\n"
+        "xor $f6, $f6, $f2                  \r\n"
+        "psubh $f4, $f4, $f0                \r\n"
+        "psubh $f6, $f6, $f2                \r\n"
+        "sdc1 $f4, 0(%[block])              \r\n"
+        "sdc1 $f6, 8(%[block])              \r\n"
+        "mov.d $f4, $f8                     \r\n"
+        "mov.d $f6, $f10                    \r\n"
+        "punpcklhw $f8, $f8, $f14           \r\n"
+        "punpckhhw $f4, $f4, $f14           \r\n"
+        "punpcklhw $f10, $f10, $f14         \r\n"
+        "punpckhhw $f6, $f6, $f14           \r\n"
+        "ldc1 $f0, 0(%[sum])                \r\n"
+        "paddw $f8, $f8, $f0                \r\n"
+        "ldc1 $f0, 8(%[sum])                \r\n"
+        "paddw $f4, $f4, $f0                \r\n"
+        "ldc1 $f0, 16(%[sum])               \r\n"
+        "paddw $f10, $f10, $f0              \r\n"
+        "ldc1 $f0, 24(%[sum])               \r\n"
+        "paddw $f6, $f6, $f0                \r\n"
+        "sdc1 $f8, 0(%[sum])                \r\n"
+        "sdc1 $f4, 8(%[sum])                \r\n"
+        "sdc1 $f10, 16(%[sum])              \r\n"
+        "sdc1 $f6, 24(%[sum])               \r\n"
+        "daddiu %[block], %[block], 16      \r\n"
+        "daddiu %[sum], %[sum], 32          \r\n"
+        "daddiu %[offset], %[offset], 16    \r\n"
+        "dsubu $8, %[block1], %[block]      \r\n"
+        "bgtz $8, 1b                        \r\n"
+        : [block]"+r"(block),[sum]"+r"(sum),[offset]"+r"(offset)
+        : [block1]"r"(block+64)
+        : "$8","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14"
+    );
+}
+
+void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block,
+        int n, int qscale)
+{
+    uint64_t nCoeffs;
+    const uint16_t *quant_matrix;
+    int block0;
+
+    assert(s->block_last_index[n]>=0);
+
+    if (s->alternate_scan)
+        nCoeffs = 63;
+    else
+        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]];
+
+    if (n < 4)
+        block0 = block[0] * s->y_dc_scale;
+    else
+        block0 = block[0] * s->c_dc_scale;
+
+    quant_matrix = s->intra_matrix;
+
+    __asm__ volatile (
+        "pcmpeqh $f14, $f14, $f14           \r\n"
+        "dli $10, 15                        \r\n"
+        "dmtc1 $10, $f16                    \r\n"
+        "xor $f12, $f12, $f12               \r\n"
+        "lwc1 $f12, %[qscale]               \r\n"
+        "psrlh $f14, $f14, $f16             \r\n"
+        "packsswh $f12, $f12, $f12          \r\n"
+        "packsswh $f12, $f12, $f12          \r\n"
+        "or $8, %[ncoeffs], $0              \r\n"
+        ".p2align 4                         \r\n"
+        "1:                                 \r\n"
+        "gsldxc1 $f0, 0($8, %[block])       \r\n"
+        "gsldxc1 $f2, 8($8, %[block])       \r\n"
+        "mov.d $f16, $f0                    \r\n"
+        "mov.d $f18, $f2                    \r\n"
+        "gsldxc1 $f8, 0($8, %[quant])       \r\n"
+        "gsldxc1 $f10, 0($8, %[quant])      \r\n"
+        "pmullh $f8, $f8, $f12              \r\n"
+        "pmullh $f10, $f10, $f12            \r\n"
+        "xor $f4, $f4, $f4                  \r\n"
+        "xor $f6, $f6, $f6                  \r\n"
+        "pcmpgth $f4, $f4, $f0              \r\n"
+        "pcmpgth $f6, $f6, $f2              \r\n"
+        "xor $f0, $f0, $f4                  \r\n"
+        "xor $f2, $f2, $f6                  \r\n"
+        "psubh $f0, $f0, $f4                \r\n"
+        "psubh $f2, $f2, $f6                \r\n"
+        "pmullh $f0, $f0, $f8               \r\n"
+        "pmullh $f2, $f2, $f10              \r\n"
+        "xor $f8, $f8, $f8                  \r\n"
+        "xor $f10, $f10, $f10               \r\n"
+        "pcmpeqh $f8, $f8, $f16             \r\n"
+        "pcmpeqh $f10 ,$f10, $f18           \r\n"
+        "dli $10, 3                         \r\n"
+        "dmtc1 $10, $f16                    \r\n"
+        "psrah $f0, $f0, $f16               \r\n"
+        "psrah $f2, $f2, $f16               \r\n"
+        "xor $f0, $f0, $f4                  \r\n"
+        "xor $f2, $f2, $f6                  \r\n"
+        "psubh $f0, $f0, $f4                \r\n"
+        "psubh $f2, $f2, $f6                \r\n"
+        "pandn $f8, $f8, $f0                \r\n"
+        "pandn $f10, $f10, $f2              \r\n"
+        "gssdxc1 $f8, 0($8, %[block])       \r\n"
+        "gssdxc1 $f10, 8($8, %[block])      \r\n"
+        "daddiu $8, $8, 16                  \r\n"
+        "blez $8, 1b                        \r\n"
+        ::[block]"r"(block+nCoeffs),[quant]"r"(quant_matrix+nCoeffs),
+          [qscale]"m"(qscale),[ncoeffs]"g"(-2*nCoeffs)
+        : "$8","$10","$f0","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
+          "$f18"
+    );
+
+    block[0]= block0;
+}
diff --git a/libavcodec/mips/mpegvideo_msa.c b/libavcodec/mips/mpegvideo_msa.c
new file mode 100644
index 00000000..aa9ef770
--- /dev/null
+++ b/libavcodec/mips/mpegvideo_msa.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "h263dsp_mips.h"
+
+static void h263_dct_unquantize_msa(int16_t *block, int16_t qmul,
+                                    int16_t qadd, int8_t n_coeffs,
+                                    uint8_t loop_start)
+{
+    int16_t *block_dup = block;
+    int32_t level, cnt;
+    v8i16 block_vec, qmul_vec, qadd_vec, sub;
+    v8i16 add, mask, mul, zero_mask;
+
+    qmul_vec = __msa_fill_h(qmul);
+    qadd_vec = __msa_fill_h(qadd);
+    for (cnt = 0; cnt < (n_coeffs >> 3); cnt++) {
+        block_vec = LD_SH(block_dup + loop_start);
+        mask = __msa_clti_s_h(block_vec, 0);
+        zero_mask = __msa_ceqi_h(block_vec, 0);
+        mul = block_vec * qmul_vec;
+        sub = mul - qadd_vec;
+        add = mul + qadd_vec;
+        add = (v8i16) __msa_bmnz_v((v16u8) add, (v16u8) sub, (v16u8) mask);
+        block_vec = (v8i16) __msa_bmnz_v((v16u8) add, (v16u8) block_vec,
+                                         (v16u8) zero_mask);
+        ST_SH(block_vec, block_dup + loop_start);
+        block_dup += 8;
+    }
+
+    cnt = ((n_coeffs >> 3) * 8) + loop_start;
+
+    for (; cnt <= n_coeffs; cnt++) {
+        level = block[cnt];
+        if (level) {
+            if (level < 0) {
+                level = level * qmul - qadd;
+            } else {
+                level = level * qmul + qadd;
+            }
+            block[cnt] = level;
+        }
+    }
+}
+
+static int32_t mpeg2_dct_unquantize_inter_msa(int16_t *block,
+                                              int32_t qscale,
+                                              const int16_t *quant_matrix)
+{
+    int32_t cnt, sum_res = -1;
+    v8i16 block_vec, block_neg, qscale_vec, mask;
+    v8i16 block_org0, block_org1, block_org2, block_org3;
+    v8i16 quant_m0, quant_m1, quant_m2, quant_m3;
+    v8i16 sum, mul, zero_mask;
+    v4i32 mul_vec, qscale_l, qscale_r, quant_m_r, quant_m_l;
+    v4i32 block_l, block_r, sad;
+
+    qscale_vec = __msa_fill_h(qscale);
+    for (cnt = 0; cnt < 2; cnt++) {
+        LD_SH4(block, 8, block_org0, block_org1, block_org2, block_org3);
+        LD_SH4(quant_matrix, 8, quant_m0, quant_m1, quant_m2, quant_m3);
+        mask = __msa_clti_s_h(block_org0, 0);
+        zero_mask = __msa_ceqi_h(block_org0, 0);
+        block_neg = -block_org0;
+        block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org0, (v16u8) block_neg,
+                                         (v16u8) mask);
+        block_vec <<= 1;
+        block_vec += 1;
+        UNPCK_SH_SW(block_vec, block_r, block_l);
+        UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
+        UNPCK_SH_SW(quant_m0, quant_m_r, quant_m_l);
+        mul_vec = block_l * qscale_l;
+        mul_vec *= quant_m_l;
+        block_l = mul_vec >> 4;
+        mul_vec = block_r * qscale_r;
+        mul_vec *= quant_m_r;
+        block_r = mul_vec >> 4;
+        mul = (v8i16) __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
+        block_neg = - mul;
+        sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
+                                   (v16u8) mask);
+        sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org0,
+                                   (v16u8) zero_mask);
+        ST_SH(sum, block);
+        block += 8;
+        quant_matrix += 8;
+        sad = __msa_hadd_s_w(sum, sum);
+        sum_res += HADD_SW_S32(sad);
+        mask = __msa_clti_s_h(block_org1, 0);
+        zero_mask = __msa_ceqi_h(block_org1, 0);
+        block_neg = - block_org1;
+        block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org1, (v16u8) block_neg,
+                                         (v16u8) mask);
+        block_vec <<= 1;
+        block_vec += 1;
+        UNPCK_SH_SW(block_vec, block_r, block_l);
+        UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
+        UNPCK_SH_SW(quant_m1, quant_m_r, quant_m_l);
+        mul_vec = block_l * qscale_l;
+        mul_vec *= quant_m_l;
+        block_l = mul_vec >> 4;
+        mul_vec = block_r * qscale_r;
+        mul_vec *= quant_m_r;
+        block_r = mul_vec >> 4;
+        mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
+        block_neg = - mul;
+        sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
+                                   (v16u8) mask);
+        sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org1,
+                                   (v16u8) zero_mask);
+        ST_SH(sum, block);
+
+        block += 8;
+        quant_matrix += 8;
+        sad = __msa_hadd_s_w(sum, sum);
+        sum_res += HADD_SW_S32(sad);
+        mask = __msa_clti_s_h(block_org2, 0);
+        zero_mask = __msa_ceqi_h(block_org2, 0);
+        block_neg = - block_org2;
+        block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org2, (v16u8) block_neg,
+                                         (v16u8) mask);
+        block_vec <<= 1;
+        block_vec += 1;
+        UNPCK_SH_SW(block_vec, block_r, block_l);
+        UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
+        UNPCK_SH_SW(quant_m2, quant_m_r, quant_m_l);
+        mul_vec = block_l * qscale_l;
+        mul_vec *= quant_m_l;
+        block_l = mul_vec >> 4;
+        mul_vec = block_r * qscale_r;
+        mul_vec *= quant_m_r;
+        block_r = mul_vec >> 4;
+        mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
+        block_neg = - mul;
+        sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
+                                   (v16u8) mask);
+        sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org2,
+                                   (v16u8) zero_mask);
+        ST_SH(sum, block);
+
+        block += 8;
+        quant_matrix += 8;
+        sad = __msa_hadd_s_w(sum, sum);
+        sum_res += HADD_SW_S32(sad);
+        mask = __msa_clti_s_h(block_org3, 0);
+        zero_mask = __msa_ceqi_h(block_org3, 0);
+        block_neg = - block_org3;
+        block_vec = (v8i16) __msa_bmnz_v((v16u8) block_org3, (v16u8) block_neg,
+                                         (v16u8) mask);
+        block_vec <<= 1;
+        block_vec += 1;
+        UNPCK_SH_SW(block_vec, block_r, block_l);
+        UNPCK_SH_SW(qscale_vec, qscale_r, qscale_l);
+        UNPCK_SH_SW(quant_m3, quant_m_r, quant_m_l);
+        mul_vec = block_l * qscale_l;
+        mul_vec *= quant_m_l;
+        block_l = mul_vec >> 4;
+        mul_vec = block_r * qscale_r;
+        mul_vec *= quant_m_r;
+        block_r = mul_vec >> 4;
+        mul = __msa_pckev_h((v8i16) block_l, (v8i16) block_r);
+        block_neg = - mul;
+        sum = (v8i16) __msa_bmnz_v((v16u8) mul, (v16u8) block_neg,
+                                   (v16u8) mask);
+        sum = (v8i16) __msa_bmnz_v((v16u8) sum, (v16u8) block_org3,
+                                   (v16u8) zero_mask);
+        ST_SH(sum, block);
+
+        block += 8;
+        quant_matrix += 8;
+        sad = __msa_hadd_s_w(sum, sum);
+        sum_res += HADD_SW_S32(sad);
+    }
+
+    return sum_res;
+}
+
+void ff_dct_unquantize_h263_intra_msa(MpegEncContext *s,
+                                      int16_t *block, int32_t index,
+                                      int32_t qscale)
+{
+    int32_t qmul, qadd;
+    int32_t nCoeffs;
+
+    av_assert2(s->block_last_index[index] >= 0 || s->h263_aic);
+
+    qmul = qscale << 1;
+
+    if (!s->h263_aic) {
+        block[0] *= index < 4 ? s->y_dc_scale : s->c_dc_scale;
+        qadd = (qscale - 1) | 1;
+    } else {
+        qadd = 0;
+    }
+    if (s->ac_pred)
+        nCoeffs = 63;
+    else
+        nCoeffs = s->inter_scantable.raster_end[s->block_last_index[index]];
+
+    h263_dct_unquantize_msa(block, qmul, qadd, nCoeffs, 1);
+}
+
+void ff_dct_unquantize_h263_inter_msa(MpegEncContext *s,
+                                      int16_t *block, int32_t index,
+                                      int32_t qscale)
+{
+    int32_t qmul, qadd;
+    int32_t nCoeffs;
+
+    av_assert2(s->block_last_index[index] >= 0);
+
+    qadd = (qscale - 1) | 1;
+    qmul = qscale << 1;
+
+    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[index]];
+
+    h263_dct_unquantize_msa(block, qmul, qadd, nCoeffs, 0);
+}
+
+void ff_dct_unquantize_mpeg2_inter_msa(MpegEncContext *s,
+                                       int16_t *block, int32_t index,
+                                       int32_t qscale)
+{
+    const uint16_t *quant_matrix;
+    int32_t sum = -1;
+
+    quant_matrix = s->inter_matrix;
+
+    sum = mpeg2_dct_unquantize_inter_msa(block, qscale, quant_matrix);
+
+    block[63] ^= sum & 1;
+}
diff --git a/libavcodec/mips/mpegvideoencdsp_init_mips.c b/libavcodec/mips/mpegvideoencdsp_init_mips.c
new file mode 100644
index 00000000..9bfe94e4
--- /dev/null
+++ b/libavcodec/mips/mpegvideoencdsp_init_mips.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/bit_depth_template.c"
+#include "h263dsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void mpegvideoencdsp_init_msa(MpegvideoEncDSPContext *c,
+                                             AVCodecContext *avctx)
+{
+#if BIT_DEPTH == 8
+    c->pix_sum = ff_pix_sum_msa;
+#endif
+}
+#endif  // #if HAVE_MSA
+
+av_cold void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c,
+                                          AVCodecContext *avctx)
+{
+#if HAVE_MSA
+    mpegvideoencdsp_init_msa(c, avctx);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/mpegvideoencdsp_msa.c b/libavcodec/mips/mpegvideoencdsp_msa.c
new file mode 100644
index 00000000..46473daf
--- /dev/null
+++ b/libavcodec/mips/mpegvideoencdsp_msa.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "h263dsp_mips.h"
+#include "libavutil/mips/generic_macros_msa.h"
+
+static int32_t sum_u8src_16width_msa(uint8_t *src, int32_t stride)
+{
+    uint32_t sum = 0;
+    v16u8 in0, in1, in2, in3, in4, in5, in6, in7;
+    v16u8 in8, in9, in10, in11, in12, in13, in14, in15;
+
+    LD_UB8(src, stride, in0, in1, in2, in3, in4, in5, in6, in7);
+    src += (8 * stride);
+    LD_UB8(src, stride, in8, in9, in10, in11, in12, in13, in14, in15);
+
+    HADD_UB4_UB(in0, in1, in2, in3, in0, in1, in2, in3);
+    HADD_UB4_UB(in4, in5, in6, in7, in4, in5, in6, in7);
+    HADD_UB4_UB(in8, in9, in10, in11, in8, in9, in10, in11);
+    HADD_UB4_UB(in12, in13, in14, in15, in12, in13, in14, in15);
+
+    sum = HADD_UH_U32(in0);
+    sum += HADD_UH_U32(in1);
+    sum += HADD_UH_U32(in2);
+    sum += HADD_UH_U32(in3);
+    sum += HADD_UH_U32(in4);
+    sum += HADD_UH_U32(in5);
+    sum += HADD_UH_U32(in6);
+    sum += HADD_UH_U32(in7);
+    sum += HADD_UH_U32(in8);
+    sum += HADD_UH_U32(in9);
+    sum += HADD_UH_U32(in10);
+    sum += HADD_UH_U32(in11);
+    sum += HADD_UH_U32(in12);
+    sum += HADD_UH_U32(in13);
+    sum += HADD_UH_U32(in14);
+    sum += HADD_UH_U32(in15);
+
+    return sum;
+}
+
+int ff_pix_sum_msa(uint8_t *pix, int line_size)
+{
+    return sum_u8src_16width_msa(pix, line_size);
+}
diff --git a/libavcodec/mips/pixblockdsp_init_mips.c b/libavcodec/mips/pixblockdsp_init_mips.c
new file mode 100644
index 00000000..1b3741ea
--- /dev/null
+++ b/libavcodec/mips/pixblockdsp_init_mips.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *                    Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "pixblockdsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void pixblockdsp_init_msa(PixblockDSPContext *c,
+                                         AVCodecContext *avctx,
+                                         unsigned high_bit_depth)
+{
+    c->diff_pixels = ff_diff_pixels_msa;
+
+    switch (avctx->bits_per_raw_sample) {
+    case 9:
+    case 10:
+    case 12:
+    case 14:
+        c->get_pixels = ff_get_pixels_16_msa;
+        break;
+    default:
+        if (avctx->bits_per_raw_sample <= 8 || avctx->codec_type !=
+            AVMEDIA_TYPE_VIDEO) {
+            c->get_pixels = ff_get_pixels_8_msa;
+        }
+        break;
+    }
+}
+#endif  // #if HAVE_MSA
+
+#if HAVE_MMI
+static av_cold void pixblockdsp_init_mmi(PixblockDSPContext *c,
+        AVCodecContext *avctx, unsigned high_bit_depth)
+{
+    c->diff_pixels = ff_diff_pixels_mmi;
+
+    if (!high_bit_depth || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
+        c->get_pixels = ff_get_pixels_8_mmi;
+    }
+}
+#endif /* HAVE_MMI */
+
+void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx,
+                              unsigned high_bit_depth)
+{
+#if HAVE_MSA
+    pixblockdsp_init_msa(c, avctx, high_bit_depth);
+#endif  // #if HAVE_MSA
+#if HAVE_MMI
+    pixblockdsp_init_mmi(c, avctx, high_bit_depth);
+#endif /* HAVE_MMI */
+}
diff --git a/libavcodec/mips/pixblockdsp_mips.h b/libavcodec/mips/pixblockdsp_mips.h
new file mode 100644
index 00000000..7f8cc966
--- /dev/null
+++ b/libavcodec/mips/pixblockdsp_mips.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *                    Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
+#define AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
+
+#include "../mpegvideo.h"
+
+void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
+                        const uint8_t *src2, int stride);
+void ff_get_pixels_16_msa(int16_t *restrict dst, const uint8_t *src,
+                          ptrdiff_t stride);
+void ff_get_pixels_8_msa(int16_t *restrict dst, const uint8_t *src,
+                         ptrdiff_t stride);
+
+void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
+        ptrdiff_t line_size);
+void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
+        const uint8_t *src2, int stride);
+
+#endif  // #ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
diff --git a/libavcodec/mips/pixblockdsp_mmi.c b/libavcodec/mips/pixblockdsp_mmi.c
new file mode 100644
index 00000000..30631d80
--- /dev/null
+++ b/libavcodec/mips/pixblockdsp_mmi.c
@@ -0,0 +1,79 @@
+/*
+ * Loongson SIMD optimized pixblockdsp
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "pixblockdsp_mips.h"
+
+void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
+        ptrdiff_t line_size)
+{
+    __asm__ volatile (
+        "move $8, $0                    \n\t"
+        "xor $f0, $f0, $f0              \n\t"
+        "1:                             \n\t"
+        "gsldlc1 $f2, 7(%1)             \n\t"
+        "gsldrc1 $f2, 0(%1)             \n\t"
+        "punpcklbh $f4, $f2, $f0        \n\t"
+        "punpckhbh $f6, $f2, $f0        \n\t"
+        "gssdxc1 $f4, 0(%0, $8)         \n\t"
+        "gssdxc1 $f6, 8(%0, $8)         \n\t"
+        "daddiu $8, $8, 16              \n\t"
+        "daddu %1, %1, %2               \n\t"
+        "daddi %3, %3, -1               \n\t"
+        "bnez %3, 1b                    \n\t"
+        ::"r"((uint8_t *)block),"r"(pixels),"r"(line_size),"r"(8)
+        : "$8","memory"
+    );
+}
+
+void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
+        const uint8_t *src2, int stride)
+{
+    __asm__ volatile (
+        "dli $2, 8                     \n\t"
+        "xor $f14, $f14, $f14          \n\t"
+        "1:                            \n\t"
+        "gsldlc1 $f0, 7(%1)            \n\t"
+        "gsldrc1 $f0, 0(%1)            \n\t"
+        "or $f2, $f0, $f0              \n\t"
+        "gsldlc1 $f4, 7(%2)            \n\t"
+        "gsldrc1 $f4, 0(%2)            \n\t"
+        "or $f6, $f4, $f4              \n\t"
+        "punpcklbh $f0, $f0, $f14      \n\t"
+        "punpckhbh $f2, $f2, $f14      \n\t"
+        "punpcklbh $f4, $f4, $f14      \n\t"
+        "punpckhbh $f6, $f6, $f14      \n\t"
+        "psubh $f0, $f0, $f4           \n\t"
+        "psubh $f2, $f2, $f6           \n\t"
+        "gssdlc1 $f0, 7(%0)            \n\t"
+        "gssdrc1 $f0, 0(%0)            \n\t"
+        "gssdlc1 $f2, 15(%0)           \n\t"
+        "gssdrc1 $f2, 8(%0)            \n\t"
+        "daddi %0, %0, 16              \n\t"
+        "daddu %1, %1, %3              \n\t"
+        "daddu %2, %2, %3              \n\t"
+        "daddi $2, $2, -1              \n\t"
+        "bgtz $2, 1b                   \n\t"
+        ::"r"(block),"r"(src1),"r"(src2),"r"(stride)
+        : "$2","memory"
+    );
+}
diff --git a/libavcodec/mips/pixblockdsp_msa.c b/libavcodec/mips/pixblockdsp_msa.c
new file mode 100644
index 00000000..966e11a7
--- /dev/null
+++ b/libavcodec/mips/pixblockdsp_msa.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "pixblockdsp_mips.h"
+
+static void diff_pixels_msa(int16_t *block, const uint8_t *src1,
+                            const uint8_t *src2, int32_t stride)
+{
+    v16u8 in10, in11, in12, in13, in14, in15, in16, in17;
+    v16u8 in20, in21, in22, in23, in24, in25, in26, in27;
+    v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
+
+    LD_UB8(src1, stride, in10, in11, in12, in13, in14, in15, in16, in17);
+    LD_UB8(src2, stride, in20, in21, in22, in23, in24, in25, in26, in27);
+    ILVR_B4_SH(in10, in20, in11, in21, in12, in22, in13, in23,
+               out0, out1, out2, out3);
+    ILVR_B4_SH(in14, in24, in15, in25, in16, in26, in17, in27,
+               out4, out5, out6, out7);
+    HSUB_UB4_SH(out0, out1, out2, out3, out0, out1, out2, out3);
+    HSUB_UB4_SH(out4, out5, out6, out7, out4, out5, out6, out7);
+    ST_SH8(out0, out1, out2, out3, out4, out5, out6, out7, block, 8);
+}
+
+static void copy_8bit_to_16bit_width8_msa(const uint8_t *src, int32_t src_stride,
+                                          int16_t *dst, int32_t dst_stride,
+                                          int32_t height)
+{
+    uint8_t *dst_ptr;
+    int32_t cnt;
+    v16u8 src0, src1, src2, src3;
+    v16i8 zero = { 0 };
+
+    dst_ptr = (uint8_t *) dst;
+
+    for (cnt = (height >> 2); cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        ILVR_B4_UB(zero, src0, zero, src1, zero, src2, zero, src3,
+                   src0, src1, src2, src3);
+
+        ST_UB4(src0, src1, src2, src3, dst_ptr, (dst_stride * 2));
+        dst_ptr += (4 * 2 * dst_stride);
+    }
+}
+
+static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  int32_t height, int32_t width)
+{
+    int32_t cnt, loop_cnt;
+    const uint8_t *src_tmp;
+    uint8_t *dst_tmp;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    for (cnt = (width >> 4); cnt--;) {
+        src_tmp = src;
+        dst_tmp = dst;
+
+        for (loop_cnt = (height >> 3); loop_cnt--;) {
+            LD_UB8(src_tmp, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src_tmp += (8 * src_stride);
+
+            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+                   dst_tmp, dst_stride);
+            dst_tmp += (8 * dst_stride);
+        }
+
+        src += 16;
+        dst += 16;
+    }
+}
+
+static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
+                             uint8_t *dst, int32_t dst_stride,
+                             int32_t height)
+{
+    int32_t cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    if (0 == height % 12) {
+        for (cnt = (height / 12); cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+                   dst, dst_stride);
+            dst += (8 * dst_stride);
+
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 8) {
+        copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
+    } else if (0 == height % 4) {
+        for (cnt = (height >> 2); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    }
+}
+
+void ff_get_pixels_16_msa(int16_t *av_restrict dest, const uint8_t *src,
+                          ptrdiff_t stride)
+{
+    copy_width16_msa(src, stride, (uint8_t *) dest, 16, 8);
+}
+
+void ff_get_pixels_8_msa(int16_t *av_restrict dest, const uint8_t *src,
+                         ptrdiff_t stride)
+{
+    copy_8bit_to_16bit_width8_msa(src, stride, dest, 8, 8);
+}
+
+void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
+                        const uint8_t *src2, int stride)
+{
+    diff_pixels_msa(block, src1, src2, stride);
+}
diff --git a/libavcodec/mips/qpeldsp_init_mips.c b/libavcodec/mips/qpeldsp_init_mips.c
new file mode 100644
index 00000000..140e8f89
--- /dev/null
+++ b/libavcodec/mips/qpeldsp_init_mips.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "qpeldsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void qpeldsp_init_msa(QpelDSPContext *c)
+{
+    c->put_qpel_pixels_tab[0][0] = ff_copy_16x16_msa;
+    c->put_qpel_pixels_tab[0][1] = ff_horiz_mc_qpel_aver_src0_16width_msa;
+    c->put_qpel_pixels_tab[0][2] = ff_horiz_mc_qpel_16width_msa;
+    c->put_qpel_pixels_tab[0][3] = ff_horiz_mc_qpel_aver_src1_16width_msa;
+    c->put_qpel_pixels_tab[0][4] = ff_vert_mc_qpel_aver_src0_16x16_msa;
+    c->put_qpel_pixels_tab[0][5] = ff_hv_mc_qpel_aver_hv_src00_16x16_msa;
+    c->put_qpel_pixels_tab[0][6] = ff_hv_mc_qpel_aver_v_src0_16x16_msa;
+    c->put_qpel_pixels_tab[0][7] = ff_hv_mc_qpel_aver_hv_src10_16x16_msa;
+    c->put_qpel_pixels_tab[0][8] = ff_vert_mc_qpel_16x16_msa;
+    c->put_qpel_pixels_tab[0][9] = ff_hv_mc_qpel_aver_h_src0_16x16_msa;
+    c->put_qpel_pixels_tab[0][10] = ff_hv_mc_qpel_16x16_msa;
+    c->put_qpel_pixels_tab[0][11] = ff_hv_mc_qpel_aver_h_src1_16x16_msa;
+    c->put_qpel_pixels_tab[0][12] = ff_vert_mc_qpel_aver_src1_16x16_msa;
+    c->put_qpel_pixels_tab[0][13] = ff_hv_mc_qpel_aver_hv_src01_16x16_msa;
+    c->put_qpel_pixels_tab[0][14] = ff_hv_mc_qpel_aver_v_src1_16x16_msa;
+    c->put_qpel_pixels_tab[0][15] = ff_hv_mc_qpel_aver_hv_src11_16x16_msa;
+
+    c->put_qpel_pixels_tab[1][0] = ff_copy_8x8_msa;
+    c->put_qpel_pixels_tab[1][1] = ff_horiz_mc_qpel_aver_src0_8width_msa;
+    c->put_qpel_pixels_tab[1][2] = ff_horiz_mc_qpel_8width_msa;
+    c->put_qpel_pixels_tab[1][3] = ff_horiz_mc_qpel_aver_src1_8width_msa;
+    c->put_qpel_pixels_tab[1][4] = ff_vert_mc_qpel_aver_src0_8x8_msa;
+    c->put_qpel_pixels_tab[1][5] = ff_hv_mc_qpel_aver_hv_src00_8x8_msa;
+    c->put_qpel_pixels_tab[1][6] = ff_hv_mc_qpel_aver_v_src0_8x8_msa;
+    c->put_qpel_pixels_tab[1][7] = ff_hv_mc_qpel_aver_hv_src10_8x8_msa;
+    c->put_qpel_pixels_tab[1][8] = ff_vert_mc_qpel_8x8_msa;
+    c->put_qpel_pixels_tab[1][9] = ff_hv_mc_qpel_aver_h_src0_8x8_msa;
+    c->put_qpel_pixels_tab[1][10] = ff_hv_mc_qpel_8x8_msa;
+    c->put_qpel_pixels_tab[1][11] = ff_hv_mc_qpel_aver_h_src1_8x8_msa;
+    c->put_qpel_pixels_tab[1][12] = ff_vert_mc_qpel_aver_src1_8x8_msa;
+    c->put_qpel_pixels_tab[1][13] = ff_hv_mc_qpel_aver_hv_src01_8x8_msa;
+    c->put_qpel_pixels_tab[1][14] = ff_hv_mc_qpel_aver_v_src1_8x8_msa;
+    c->put_qpel_pixels_tab[1][15] = ff_hv_mc_qpel_aver_hv_src11_8x8_msa;
+
+    c->put_no_rnd_qpel_pixels_tab[0][0] = ff_copy_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][1] =
+        ff_horiz_mc_qpel_no_rnd_aver_src0_16width_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][2] = ff_horiz_mc_qpel_no_rnd_16width_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][3] =
+        ff_horiz_mc_qpel_no_rnd_aver_src1_16width_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][4] =
+        ff_vert_mc_qpel_no_rnd_aver_src0_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][5] =
+        ff_hv_mc_qpel_no_rnd_aver_hv_src00_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][6] =
+        ff_hv_mc_qpel_no_rnd_aver_v_src0_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][7] =
+        ff_hv_mc_qpel_no_rnd_aver_hv_src10_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][8] = ff_vert_mc_qpel_no_rnd_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][9] =
+        ff_hv_mc_qpel_no_rnd_aver_h_src0_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][10] = ff_hv_mc_qpel_no_rnd_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][11] =
+        ff_hv_mc_qpel_no_rnd_aver_h_src1_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][12] =
+        ff_vert_mc_qpel_no_rnd_aver_src1_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][13] =
+        ff_hv_mc_qpel_no_rnd_aver_hv_src01_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][14] =
+        ff_hv_mc_qpel_no_rnd_aver_v_src1_16x16_msa;
+    c->put_no_rnd_qpel_pixels_tab[0][15] =
+        ff_hv_mc_qpel_no_rnd_aver_hv_src11_16x16_msa;
+
+    c->put_no_rnd_qpel_pixels_tab[1][0] = ff_copy_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][1] =
+        ff_horiz_mc_qpel_no_rnd_aver_src0_8width_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][2] = ff_horiz_mc_qpel_no_rnd_8width_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][3] =
+        ff_horiz_mc_qpel_no_rnd_aver_src1_8width_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][4] =
+        ff_vert_mc_qpel_no_rnd_aver_src0_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][5] =
+        ff_hv_mc_qpel_no_rnd_aver_hv_src00_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][6] =
+        ff_hv_mc_qpel_no_rnd_aver_v_src0_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][7] =
+        ff_hv_mc_qpel_no_rnd_aver_hv_src10_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][8] = ff_vert_mc_qpel_no_rnd_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][9] =
+        ff_hv_mc_qpel_no_rnd_aver_h_src0_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][10] = ff_hv_mc_qpel_no_rnd_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][11] =
+        ff_hv_mc_qpel_no_rnd_aver_h_src1_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][12] =
+        ff_vert_mc_qpel_no_rnd_aver_src1_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][13] =
+        ff_hv_mc_qpel_no_rnd_aver_hv_src01_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][14] =
+        ff_hv_mc_qpel_no_rnd_aver_v_src1_8x8_msa;
+    c->put_no_rnd_qpel_pixels_tab[1][15] =
+        ff_hv_mc_qpel_no_rnd_aver_hv_src11_8x8_msa;
+
+    c->avg_qpel_pixels_tab[0][0] = ff_avg_width16_msa;
+    c->avg_qpel_pixels_tab[0][1] =
+        ff_horiz_mc_qpel_avg_dst_aver_src0_16width_msa;
+    c->avg_qpel_pixels_tab[0][2] = ff_horiz_mc_qpel_avg_dst_16width_msa;
+    c->avg_qpel_pixels_tab[0][3] =
+        ff_horiz_mc_qpel_avg_dst_aver_src1_16width_msa;
+    c->avg_qpel_pixels_tab[0][4] = ff_vert_mc_qpel_avg_dst_aver_src0_16x16_msa;
+    c->avg_qpel_pixels_tab[0][5] =
+        ff_hv_mc_qpel_avg_dst_aver_hv_src00_16x16_msa;
+    c->avg_qpel_pixels_tab[0][6] = ff_hv_mc_qpel_avg_dst_aver_v_src0_16x16_msa;
+    c->avg_qpel_pixels_tab[0][7] =
+        ff_hv_mc_qpel_avg_dst_aver_hv_src10_16x16_msa;
+    c->avg_qpel_pixels_tab[0][8] = ff_vert_mc_qpel_avg_dst_16x16_msa;
+    c->avg_qpel_pixels_tab[0][9] = ff_hv_mc_qpel_avg_dst_aver_h_src0_16x16_msa;
+    c->avg_qpel_pixels_tab[0][10] = ff_hv_mc_qpel_avg_dst_16x16_msa;
+    c->avg_qpel_pixels_tab[0][11] = ff_hv_mc_qpel_avg_dst_aver_h_src1_16x16_msa;
+    c->avg_qpel_pixels_tab[0][12] = ff_vert_mc_qpel_avg_dst_aver_src1_16x16_msa;
+    c->avg_qpel_pixels_tab[0][13] =
+        ff_hv_mc_qpel_avg_dst_aver_hv_src01_16x16_msa;
+    c->avg_qpel_pixels_tab[0][14] = ff_hv_mc_qpel_avg_dst_aver_v_src1_16x16_msa;
+    c->avg_qpel_pixels_tab[0][15] =
+        ff_hv_mc_qpel_avg_dst_aver_hv_src11_16x16_msa;
+
+    c->avg_qpel_pixels_tab[1][0] = ff_avg_width8_msa;
+    c->avg_qpel_pixels_tab[1][1] =
+        ff_horiz_mc_qpel_avg_dst_aver_src0_8width_msa;
+    c->avg_qpel_pixels_tab[1][2] = ff_horiz_mc_qpel_avg_dst_8width_msa;
+    c->avg_qpel_pixels_tab[1][3] =
+        ff_horiz_mc_qpel_avg_dst_aver_src1_8width_msa;
+    c->avg_qpel_pixels_tab[1][4] = ff_vert_mc_qpel_avg_dst_aver_src0_8x8_msa;
+    c->avg_qpel_pixels_tab[1][5] = ff_hv_mc_qpel_avg_dst_aver_hv_src00_8x8_msa;
+    c->avg_qpel_pixels_tab[1][6] = ff_hv_mc_qpel_avg_dst_aver_v_src0_8x8_msa;
+    c->avg_qpel_pixels_tab[1][7] = ff_hv_mc_qpel_avg_dst_aver_hv_src10_8x8_msa;
+    c->avg_qpel_pixels_tab[1][8] = ff_vert_mc_qpel_avg_dst_8x8_msa;
+    c->avg_qpel_pixels_tab[1][9] = ff_hv_mc_qpel_avg_dst_aver_h_src0_8x8_msa;
+    c->avg_qpel_pixels_tab[1][10] = ff_hv_mc_qpel_avg_dst_8x8_msa;
+    c->avg_qpel_pixels_tab[1][11] = ff_hv_mc_qpel_avg_dst_aver_h_src1_8x8_msa;
+    c->avg_qpel_pixels_tab[1][12] = ff_vert_mc_qpel_avg_dst_aver_src1_8x8_msa;
+    c->avg_qpel_pixels_tab[1][13] = ff_hv_mc_qpel_avg_dst_aver_hv_src01_8x8_msa;
+    c->avg_qpel_pixels_tab[1][14] = ff_hv_mc_qpel_avg_dst_aver_v_src1_8x8_msa;
+    c->avg_qpel_pixels_tab[1][15] = ff_hv_mc_qpel_avg_dst_aver_hv_src11_8x8_msa;
+}
+#endif  // #if HAVE_MSA
+
+void ff_qpeldsp_init_mips(QpelDSPContext *c)
+{
+#if HAVE_MSA
+    qpeldsp_init_msa(c);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/qpeldsp_mips.h b/libavcodec/mips/qpeldsp_mips.h
new file mode 100644
index 00000000..704d2213
--- /dev/null
+++ b/libavcodec/mips/qpeldsp_mips.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_QPELDSP_MIPS_H
+#define AVCODEC_MIPS_QPELDSP_MIPS_H
+
+#include "../mpegvideo.h"
+
+void ff_copy_8x8_msa(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_copy_16x16_msa(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_width8_msa(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_width16_msa(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+
+void ff_horiz_mc_qpel_aver_src0_8width_msa(uint8_t *dst, const uint8_t *src,
+                                           ptrdiff_t stride);
+void ff_horiz_mc_qpel_aver_src0_16width_msa(uint8_t *dst, const uint8_t *src,
+                                            ptrdiff_t stride);
+void ff_horiz_mc_qpel_8width_msa(uint8_t *dst, const uint8_t *src,
+                                 ptrdiff_t stride);
+void ff_horiz_mc_qpel_16width_msa(uint8_t *dst, const uint8_t *src,
+                                  ptrdiff_t stride);
+void ff_horiz_mc_qpel_aver_src1_8width_msa(uint8_t *dst, const uint8_t *src,
+                                           ptrdiff_t stride);
+void ff_horiz_mc_qpel_aver_src1_16width_msa(uint8_t *dst, const uint8_t *src,
+                                            ptrdiff_t stride);
+void ff_horiz_mc_qpel_no_rnd_aver_src0_8width_msa(uint8_t *dst,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride);
+void ff_horiz_mc_qpel_no_rnd_aver_src0_16width_msa(uint8_t *dst,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride);
+void ff_horiz_mc_qpel_no_rnd_8width_msa(uint8_t *dst, const uint8_t *src,
+                                        ptrdiff_t stride);
+void ff_horiz_mc_qpel_no_rnd_16width_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_horiz_mc_qpel_no_rnd_aver_src1_8width_msa(uint8_t *dst,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride);
+void ff_horiz_mc_qpel_no_rnd_aver_src1_16width_msa(uint8_t *dst,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride);
+void ff_horiz_mc_qpel_avg_dst_aver_src0_8width_msa(uint8_t *dst,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride);
+void ff_horiz_mc_qpel_avg_dst_aver_src0_16width_msa(uint8_t *dst,
+                                                    const uint8_t *src,
+                                                    ptrdiff_t stride);
+void ff_horiz_mc_qpel_avg_dst_8width_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_horiz_mc_qpel_avg_dst_16width_msa(uint8_t *dst, const uint8_t *src,
+                                          ptrdiff_t stride);
+void ff_horiz_mc_qpel_avg_dst_aver_src1_8width_msa(uint8_t *dst,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride);
+void ff_horiz_mc_qpel_avg_dst_aver_src1_16width_msa(uint8_t *dst,
+                                                    const uint8_t *src,
+                                                    ptrdiff_t stride);
+
+void ff_vert_mc_qpel_aver_src0_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                       ptrdiff_t stride);
+void ff_vert_mc_qpel_aver_src0_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_vert_mc_qpel_8x8_msa(uint8_t *dst, const uint8_t *src,
+                             ptrdiff_t stride);
+void ff_vert_mc_qpel_16x16_msa(uint8_t *dst, const uint8_t *src,
+                               ptrdiff_t stride);
+void ff_vert_mc_qpel_aver_src1_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                       ptrdiff_t stride);
+void ff_vert_mc_qpel_aver_src1_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_vert_mc_qpel_no_rnd_aver_src0_8x8_msa(uint8_t *dst,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride);
+void ff_vert_mc_qpel_no_rnd_aver_src0_16x16_msa(uint8_t *dst,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride);
+void ff_vert_mc_qpel_no_rnd_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                    ptrdiff_t stride);
+void ff_vert_mc_qpel_no_rnd_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                      ptrdiff_t stride);
+void ff_vert_mc_qpel_no_rnd_aver_src1_8x8_msa(uint8_t *dst,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride);
+void ff_vert_mc_qpel_no_rnd_aver_src1_16x16_msa(uint8_t *dst,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride);
+void ff_vert_mc_qpel_avg_dst_aver_src0_8x8_msa(uint8_t *dst,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride);
+void ff_vert_mc_qpel_avg_dst_aver_src0_16x16_msa(uint8_t *dst,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride);
+void ff_vert_mc_qpel_avg_dst_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                     ptrdiff_t stride);
+void ff_vert_mc_qpel_avg_dst_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                       ptrdiff_t stride);
+void ff_vert_mc_qpel_avg_dst_aver_src1_8x8_msa(uint8_t *dst,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride);
+void ff_vert_mc_qpel_avg_dst_aver_src1_16x16_msa(uint8_t *dst,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride);
+
+void ff_hv_mc_qpel_aver_hv_src00_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                           ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_hv_src00_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_v_src0_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_v_src0_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                       ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_hv_src10_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                           ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_hv_src10_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_h_src0_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_h_src0_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                       ptrdiff_t stride);
+void ff_hv_mc_qpel_16x16_msa(uint8_t *dst, const uint8_t *src,
+                             ptrdiff_t stride);
+void ff_hv_mc_qpel_8x8_msa(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_h_src1_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_h_src1_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                       ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_hv_src01_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                           ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_hv_src01_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_v_src1_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_v_src1_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                       ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_hv_src11_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                           ptrdiff_t stride);
+void ff_hv_mc_qpel_aver_hv_src11_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                         ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_hv_src00_16x16_msa(uint8_t *dst,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_hv_src00_8x8_msa(uint8_t *dst,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_v_src0_16x16_msa(uint8_t *dst,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_v_src0_8x8_msa(uint8_t *dst,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_hv_src10_16x16_msa(uint8_t *dst,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_hv_src10_8x8_msa(uint8_t *dst,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_h_src0_16x16_msa(uint8_t *dst,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_h_src0_8x8_msa(uint8_t *dst,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                     ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                   ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_h_src1_16x16_msa(uint8_t *dst,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_h_src1_8x8_msa(uint8_t *dst,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_hv_src01_16x16_msa(uint8_t *dst,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_hv_src01_8x8_msa(uint8_t *dst,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_v_src1_16x16_msa(uint8_t *dst,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_v_src1_8x8_msa(uint8_t *dst,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_hv_src11_16x16_msa(uint8_t *dst,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride);
+void ff_hv_mc_qpel_avg_dst_aver_hv_src11_8x8_msa(uint8_t *dst,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_hv_src00_16x16_msa(uint8_t *dst,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_hv_src00_8x8_msa(uint8_t *dst,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_v_src0_16x16_msa(uint8_t *dst,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_v_src0_8x8_msa(uint8_t *dst,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_hv_src10_16x16_msa(uint8_t *dst,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_hv_src10_8x8_msa(uint8_t *dst,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_h_src0_16x16_msa(uint8_t *dst,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_h_src0_8x8_msa(uint8_t *dst,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_16x16_msa(uint8_t *dst, const uint8_t *src,
+                                    ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_8x8_msa(uint8_t *dst, const uint8_t *src,
+                                  ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_h_src1_16x16_msa(uint8_t *dst,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_h_src1_8x8_msa(uint8_t *dst,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_hv_src01_16x16_msa(uint8_t *dst,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_hv_src01_8x8_msa(uint8_t *dst,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_v_src1_16x16_msa(uint8_t *dst,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_v_src1_8x8_msa(uint8_t *dst,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_hv_src11_16x16_msa(uint8_t *dst,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride);
+void ff_hv_mc_qpel_no_rnd_aver_hv_src11_8x8_msa(uint8_t *dst,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride);
+
+#endif  // #ifndef AVCODEC_MIPS_QPELDSP_MIPS_H
diff --git a/libavcodec/mips/qpeldsp_msa.c b/libavcodec/mips/qpeldsp_msa.c
new file mode 100644
index 00000000..4710b3f7
--- /dev/null
+++ b/libavcodec/mips/qpeldsp_msa.c
@@ -0,0 +1,6518 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "qpeldsp_mips.h"
+
+#define APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, coef0, coef1, coef2)  \
+( {                                                                     \
+    v16u8 out, tmp0, tmp1;                                              \
+    v16u8 data0, data1, data2, data3, data4, data5;                     \
+    v8i16 res_r, res_l;                                                 \
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
+    v8u16 sum0_l, sum1_l, sum2_l, sum3_l;                               \
+                                                                        \
+    VSHF_B2_UB(inp0, inp0, inp1, inp1, mask, mask, tmp0, tmp1);         \
+    ILVRL_B2_UH(inp1, inp0, sum0_r, sum0_l);                            \
+    data0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 15);       \
+    data3 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 1);        \
+    HADD_UB2_UH(sum0_r, sum0_l, sum0_r, sum0_l);                        \
+    ILVRL_B2_UH(data3, data0, sum1_r, sum1_l);                          \
+    data1 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 14);       \
+    data4 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 2);        \
+    sum0_r *= (v8u16) (coef0);                                          \
+    sum0_l *= (v8u16) (coef0);                                          \
+    ILVRL_B2_UH(data4, data1, sum2_r, sum2_l);                          \
+    data2 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 13);       \
+    data5 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 3);        \
+    DPADD_UB2_UH(sum2_r, sum2_l, coef2, coef2, sum0_r, sum0_l);         \
+    ILVRL_B2_UH(data5, data2, sum3_r, sum3_l);                          \
+    HADD_UB2_UH(sum3_r, sum3_l, sum3_r, sum3_l);                        \
+    DPADD_UB2_UH(sum1_r, sum1_l, coef1, coef1, sum3_r, sum3_l);         \
+    res_r = (v8i16) (sum0_r - sum3_r);                                  \
+    res_l = (v8i16) (sum0_l - sum3_l);                                  \
+    SRARI_H2_SH(res_r, res_l, 5);                                       \
+    CLIP_SH2_0_255(res_r, res_l);                                       \
+    out = (v16u8) __msa_pckev_b((v16i8) res_l, (v16i8) res_r);          \
+                                                                        \
+    out;                                                                \
+} )
+
+#define APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,                       \
+                                      mask0, mask1, mask2, mask3,       \
+                                      coef0, coef1, coef2)              \
+( {                                                                     \
+    v16u8 out;                                                          \
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
+    v8u16 sum4_r, sum5_r, sum6_r, sum7_r;                               \
+    v8i16 res0_r, res1_r;                                               \
+                                                                        \
+    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask0, mask0, sum0_r, sum4_r);   \
+    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask3, mask3, sum3_r, sum7_r);   \
+    HADD_UB2_UH(sum3_r, sum7_r, sum3_r, sum7_r);                        \
+    DOTP_UB2_UH(sum0_r, sum4_r, coef0, coef0, sum0_r, sum4_r);          \
+    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask2, mask2, sum2_r, sum6_r);   \
+    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask1, mask1, sum1_r, sum5_r);   \
+    DPADD_UB2_UH(sum2_r, sum6_r, coef2, coef2, sum0_r, sum4_r);         \
+    DPADD_UB2_UH(sum1_r, sum5_r, coef1, coef1, sum3_r, sum7_r);         \
+    res0_r = (v8i16) (sum0_r - sum3_r);                                 \
+    res1_r = (v8i16) (sum4_r - sum7_r);                                 \
+    SRARI_H2_SH(res0_r, res1_r, 5);                                     \
+    CLIP_SH2_0_255(res0_r, res1_r);                                     \
+    out = (v16u8) __msa_pckev_b((v16i8) res1_r, (v16i8) res0_r);        \
+                                                                        \
+    out;                                                                \
+} )
+
+#define APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,                        \
+                                           mask0, mask1, mask2, mask3,  \
+                                           coef0, coef1, coef2)         \
+( {                                                                     \
+    v16u8 out;                                                          \
+    v8i16 res0_r;                                                       \
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
+                                                                        \
+    VSHF_B2_UH(inp0, inp0, inp0, inp0, mask0, mask3, sum0_r, sum3_r);   \
+    sum3_r = __msa_hadd_u_h((v16u8) sum3_r, (v16u8) sum3_r);            \
+    sum0_r = __msa_dotp_u_h((v16u8) sum0_r, (v16u8) coef0);             \
+    VSHF_B2_UH(inp0, inp0, inp0, inp0, mask2, mask1, sum2_r, sum1_r);   \
+    DPADD_UB2_UH(sum2_r, sum1_r, coef2, coef1, sum0_r, sum3_r);         \
+    res0_r = (v8i16) (sum0_r - sum3_r);                                 \
+    res0_r = __msa_srari_h(res0_r, 5);                                  \
+    res0_r = CLIP_SH_0_255(res0_r);                                     \
+    out = (v16u8) __msa_pckev_b((v16i8) res0_r, (v16i8) res0_r);        \
+                                                                        \
+    out;                                                                \
+} )
+
+#define APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,   \
+                                                    mask2, mask3, coef0,  \
+                                                    coef1, coef2)         \
+( {                                                                       \
+    v16u8 out;                                                            \
+    v8i16 res0_r;                                                         \
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                                 \
+                                                                          \
+    VSHF_B2_UH(inp0, inp0, inp0, inp0, mask0, mask3, sum0_r, sum3_r);     \
+    sum3_r = __msa_hadd_u_h((v16u8) sum3_r, (v16u8) sum3_r);              \
+    sum0_r = __msa_dotp_u_h((v16u8) sum0_r, (v16u8) coef0);               \
+    VSHF_B2_UH(inp0, inp0, inp0, inp0, mask2, mask1, sum2_r, sum1_r);     \
+    DPADD_UB2_UH(sum2_r, sum1_r, coef2, coef1, sum0_r, sum3_r);           \
+    res0_r = (v8i16) (sum0_r - sum3_r);                                   \
+    res0_r += 15;                                                         \
+    res0_r >>= 5;                                                         \
+    res0_r = CLIP_SH_0_255(res0_r);                                       \
+    out = (v16u8) __msa_pckev_b((v16i8) res0_r, (v16i8) res0_r);          \
+                                                                          \
+    out;                                                                  \
+} )
+
+#define APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,              \
+                                         coef0, coef1, coef2)           \
+( {                                                                     \
+    v16u8 out, tmp0, tmp1;                                              \
+    v16u8 data0, data1, data2, data3, data4, data5;                     \
+    v8i16 res_r, res_l;                                                 \
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
+    v8u16 sum0_l, sum1_l, sum2_l, sum3_l;                               \
+                                                                        \
+    VSHF_B2_UB(inp0, inp0, inp1, inp1, mask, mask, tmp0, tmp1);         \
+    ILVRL_B2_UH(inp1, inp0, sum0_r, sum0_l);                            \
+    data0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 15);       \
+    data3 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 1);        \
+    HADD_UB2_UH(sum0_r, sum0_l, sum0_r, sum0_l);                        \
+    ILVRL_B2_UH(data3, data0, sum1_r, sum1_l);                          \
+    data1 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 14);       \
+    data4 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 2);        \
+    sum0_r *= (v8u16) (coef0);                                          \
+    sum0_l *= (v8u16) (coef0);                                          \
+    ILVRL_B2_UH(data4, data1, sum2_r, sum2_l);                          \
+    data2 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 13);       \
+    data5 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 3);        \
+    DPADD_UB2_UH(sum2_r, sum2_l, coef2, coef2, sum0_r, sum0_l);         \
+    ILVRL_B2_UH(data5, data2, sum3_r, sum3_l);                          \
+    HADD_UB2_UH(sum3_r, sum3_l, sum3_r, sum3_l);                        \
+    DPADD_UB2_UH(sum1_r, sum1_l, coef1, coef1, sum3_r, sum3_l);         \
+    res_r = (v8i16) (sum0_r - sum3_r);                                  \
+    res_l = (v8i16) (sum0_l - sum3_l);                                  \
+    res_r += 15;                                                        \
+    res_l += 15;                                                        \
+    res_r >>= 5;                                                        \
+    res_l >>= 5;                                                        \
+    CLIP_SH2_0_255(res_r, res_l);                                       \
+    out = (v16u8) __msa_pckev_b((v16i8) res_l, (v16i8) res_r);          \
+                                                                        \
+    out;                                                                \
+} )
+
+#define APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1,                  \
+                                               mask0, mask1, mask2, mask3,  \
+                                               coef0, coef1, coef2)         \
+( {                                                                         \
+    v16u8 out;                                                              \
+    v8i16 res0_r, res1_r;                                                   \
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                                   \
+    v8u16 sum4_r, sum5_r, sum6_r, sum7_r;                                   \
+                                                                            \
+    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask0, mask0, sum0_r, sum4_r);       \
+    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask3, mask3, sum3_r, sum7_r);       \
+    HADD_UB2_UH(sum3_r, sum7_r, sum3_r, sum7_r);                            \
+    DOTP_UB2_UH(sum0_r, sum4_r, coef0, coef0, sum0_r, sum4_r);              \
+    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask2, mask2, sum2_r, sum6_r);       \
+    VSHF_B2_UH(inp0, inp0, inp1, inp1, mask1, mask1, sum1_r, sum5_r);       \
+    DPADD_UB2_UH(sum2_r, sum6_r, coef2, coef2, sum0_r, sum4_r);             \
+    DPADD_UB2_UH(sum1_r, sum5_r, coef1, coef1, sum3_r, sum7_r);             \
+    res0_r = (v8i16) (sum0_r - sum3_r);                                     \
+    res1_r = (v8i16) (sum4_r - sum7_r);                                     \
+    res0_r += 15;                                                           \
+    res1_r += 15;                                                           \
+    res0_r >>= 5;                                                           \
+    res1_r >>= 5;                                                           \
+    CLIP_SH2_0_255(res0_r, res1_r);                                         \
+    out = (v16u8) __msa_pckev_b((v16i8) res1_r, (v16i8) res0_r);            \
+                                                                            \
+    out;                                                                    \
+} )
+
+#define APPLY_VERT_QPEL_FILTER(inp0, inp1, inp2, inp3,                  \
+                               inp4, inp5, inp6, inp7,                  \
+                               coef0, coef1, coef2)                     \
+( {                                                                     \
+    v16u8 res;                                                          \
+    v8i16 res_r, res_l;                                                 \
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
+    v8u16 sum0_l, sum1_l, sum2_l, sum3_l;                               \
+                                                                        \
+    ILVRL_B2_UH(inp4, inp0, sum0_r, sum0_l);                            \
+    ILVRL_B2_UH(inp7, inp3, sum3_r, sum3_l);                            \
+    DOTP_UB2_UH(sum0_r, sum0_l, coef0, coef0, sum0_r, sum0_l);          \
+    HADD_UB2_UH(sum3_r, sum3_l, sum3_r, sum3_l);                        \
+    ILVRL_B2_UH(inp6, inp2, sum2_r, sum2_l);                            \
+    ILVRL_B2_UH(inp5, inp1, sum1_r, sum1_l);                            \
+    DPADD_UB2_UH(sum2_r, sum2_l, coef2, coef2, sum0_r, sum0_l);         \
+    DPADD_UB2_UH(sum1_r, sum1_l, coef1, coef1, sum3_r, sum3_l);         \
+    res_r = (v8i16) (sum0_r - sum3_r);                                  \
+    res_l = (v8i16) (sum0_l - sum3_l);                                  \
+    SRARI_H2_SH(res_r, res_l, 5);                                       \
+    CLIP_SH2_0_255(res_r, res_l);                                       \
+    res = (v16u8) __msa_pckev_b((v16i8) res_l, (v16i8) res_r);          \
+                                                                        \
+    res;                                                                \
+} )
+
+#define APPLY_VERT_QPEL_FILTER_8BYTE(inp00, inp01, inp02, inp03,        \
+                                     inp04, inp05, inp06, inp07,        \
+                                     inp10, inp11, inp12, inp13,        \
+                                     inp14, inp15, inp16, inp17,        \
+                                     coef0, coef1, coef2)               \
+( {                                                                     \
+    v16u8 res;                                                          \
+    v8i16 val0, val1;                                                   \
+    v8u16 sum00, sum01, sum02, sum03;                                   \
+    v8u16 sum10, sum11, sum12, sum13;                                   \
+                                                                        \
+    ILVR_B4_UH(inp04, inp00, inp14, inp10, inp07, inp03, inp17, inp13,  \
+               sum00, sum10, sum03, sum13);                             \
+    DOTP_UB2_UH(sum00, sum10, coef0, coef0, sum00, sum10);              \
+    HADD_UB2_UH(sum03, sum13, sum03, sum13);                            \
+    ILVR_B4_UH(inp06, inp02, inp16, inp12, inp05, inp01, inp15, inp11,  \
+               sum02, sum12, sum01, sum11);                             \
+    DPADD_UB2_UH(sum02, sum12, coef2, coef2, sum00, sum10);             \
+    DPADD_UB2_UH(sum01, sum11, coef1, coef1, sum03, sum13);             \
+    val0 = (v8i16) (sum00 - sum03);                                     \
+    val1 = (v8i16) (sum10 - sum13);                                     \
+    SRARI_H2_SH(val0, val1, 5);                                         \
+    CLIP_SH2_0_255(val0, val1);                                         \
+    res = (v16u8) __msa_pckev_b((v16i8) val1, (v16i8) val0);            \
+                                                                        \
+    res;                                                                \
+} )
+
+#define APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp1, inp2, inp3,         \
+                                        inp4, inp5, inp6, inp7,         \
+                                        coef0, coef1, coef2)            \
+( {                                                                     \
+    v16u8 res;                                                          \
+    v8i16 res_r, res_l;                                                 \
+    v8u16 sum0_r, sum1_r, sum2_r, sum3_r;                               \
+    v8u16 sum0_l, sum1_l, sum2_l, sum3_l;                               \
+                                                                        \
+    ILVRL_B2_UH(inp4, inp0, sum0_r, sum0_l);                            \
+    ILVRL_B2_UH(inp7, inp3, sum3_r, sum3_l);                            \
+    DOTP_UB2_UH(sum0_r, sum0_l, coef0, coef0, sum0_r, sum0_l);          \
+    HADD_UB2_UH(sum3_r, sum3_l, sum3_r, sum3_l);                        \
+    ILVRL_B2_UH(inp6, inp2, sum2_r, sum2_l);                            \
+    ILVRL_B2_UH(inp5, inp1, sum1_r, sum1_l);                            \
+    DPADD_UB2_UH(sum2_r, sum2_l, coef2, coef2, sum0_r, sum0_l);         \
+    DPADD_UB2_UH(sum1_r, sum1_l, coef1, coef1, sum3_r, sum3_l);         \
+    res_r = (v8i16) (sum0_r - sum3_r);                                  \
+    res_l = (v8i16) (sum0_l - sum3_l);                                  \
+    res_r += 15;                                                        \
+    res_l += 15;                                                        \
+    res_r >>= 5;                                                        \
+    res_l >>= 5;                                                        \
+    CLIP_SH2_0_255(res_r, res_l);                                       \
+    res = (v16u8) __msa_pckev_b((v16i8) res_l, (v16i8) res_r);          \
+                                                                        \
+    res;                                                                \
+} )
+
+#define APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp00, inp01, inp02, inp03,  \
+                                              inp04, inp05, inp06, inp07,  \
+                                              inp10, inp11, inp12, inp13,  \
+                                              inp14, inp15, inp16, inp17,  \
+                                              coef0, coef1, coef2)         \
+( {                                                                        \
+    v16u8 res;                                                             \
+    v8i16 val0, val1;                                                      \
+    v8u16 sum00, sum01, sum02, sum03;                                      \
+    v8u16 sum10, sum11, sum12, sum13;                                      \
+                                                                           \
+    ILVR_B4_UH(inp04, inp00, inp14, inp10, inp07, inp03, inp17, inp13,     \
+               sum00, sum10, sum03, sum13);                                \
+    DOTP_UB2_UH(sum00, sum10, coef0, coef0, sum00, sum10);                 \
+    HADD_UB2_UH(sum03, sum13, sum03, sum13);                               \
+    ILVR_B4_UH(inp06, inp02, inp16, inp12, inp05, inp01, inp15, inp11,     \
+               sum02, sum12, sum01, sum11);                                \
+    DPADD_UB2_UH(sum02, sum12, coef2, coef2, sum00, sum10);                \
+    DPADD_UB2_UH(sum01, sum11, coef1, coef1, sum03, sum13);                \
+    val0 = (v8i16) (sum00 - sum03);                                        \
+    val1 = (v8i16) (sum10 - sum13);                                        \
+    val0 += 15;                                                            \
+    val1 += 15;                                                            \
+    val0 >>= 5;                                                            \
+    val1 >>= 5;                                                            \
+    CLIP_SH2_0_255(val0, val1);                                            \
+    res = (v16u8) __msa_pckev_b((v16i8) val1, (v16i8) val0);               \
+                                                                           \
+    res;                                                                   \
+} )
+
+static void horiz_mc_qpel_aver_src0_8width_msa(const uint8_t *src,
+                                               int32_t src_stride,
+                                               uint8_t *dst,
+                                               int32_t dst_stride,
+                                               int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+        AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
+        ST8x4_UB(res0, res1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void horiz_mc_qpel_aver_src0_16width_msa(const uint8_t *src,
+                                                int32_t src_stride,
+                                                uint8_t *dst,
+                                                int32_t dst_stride,
+                                                int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(inp0, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(inp2, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(inp4, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(inp6, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+}
+
+static void horiz_mc_qpel_8width_msa(const uint8_t *src,
+                                     int32_t src_stride,
+                                     uint8_t *dst,
+                                     int32_t dst_stride,
+                                     int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        ST8x4_UB(res0, res1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void horiz_mc_qpel_16width_msa(const uint8_t *src,
+                                      int32_t src_stride,
+                                      uint8_t *dst,
+                                      int32_t dst_stride,
+                                      int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
+                                      const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
+                                      const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
+                                      const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
+                                      const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+}
+
+static void horiz_mc_qpel_aver_src1_8width_msa(const uint8_t *src,
+                                               int32_t src_stride,
+                                               uint8_t *dst,
+                                               int32_t dst_stride,
+                                               int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+        SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+        AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
+        ST8x4_UB(res0, res1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void horiz_mc_qpel_aver_src1_16width_msa(const uint8_t *src,
+                                                int32_t src_stride,
+                                                uint8_t *dst,
+                                                int32_t dst_stride,
+                                                int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(res, inp1);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(res, inp3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(res, inp5);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(res, inp7);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+}
+
+static void horiz_mc_qpel_no_rnd_aver_src0_8width_msa(const uint8_t *src,
+                                                      int32_t src_stride,
+                                                      uint8_t *dst,
+                                                      int32_t dst_stride,
+                                                      int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                      mask2, mask3, const20,
+                                                      const6, const3);
+        res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                      mask2, mask3, const20,
+                                                      const6, const3);
+        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+        res0 = __msa_ave_u_b(inp0, res0);
+        res1 = __msa_ave_u_b(inp2, res1);
+        ST8x4_UB(res0, res1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void horiz_mc_qpel_no_rnd_aver_src0_16width_msa(const uint8_t *src,
+                                                       int32_t src_stride,
+                                                       uint8_t *dst,
+                                                       int32_t dst_stride,
+                                                       int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(inp0, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(inp2, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(inp4, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(inp6, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+}
+
+static void horiz_mc_qpel_no_rnd_8width_msa(const uint8_t *src,
+                                            int32_t src_stride,
+                                            uint8_t *dst,
+                                            int32_t dst_stride,
+                                            int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                      mask2, mask3, const20,
+                                                      const6, const3);
+        res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                      mask2, mask3, const20,
+                                                      const6, const3);
+        ST8x4_UB(res0, res1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void horiz_mc_qpel_no_rnd_16width_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride,
+                                             int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
+                                               const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
+                                               const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
+                                               const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
+                                               const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+}
+
+static void horiz_mc_qpel_no_rnd_aver_src1_8width_msa(const uint8_t *src,
+                                                      int32_t src_stride,
+                                                      uint8_t *dst,
+                                                      int32_t dst_stride,
+                                                      int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                      mask2, mask3, const20,
+                                                      const6, const3);
+        res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                      mask2, mask3, const20,
+                                                      const6, const3);
+        SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+        SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+        res0 = __msa_ave_u_b(inp0, res0);
+        res1 = __msa_ave_u_b(inp2, res1);
+        ST8x4_UB(res0, res1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void horiz_mc_qpel_no_rnd_aver_src1_16width_msa(const uint8_t *src,
+                                                       int32_t src_stride,
+                                                       uint8_t *dst,
+                                                       int32_t dst_stride,
+                                                       int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(res, inp1);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(res, inp3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(res, inp5);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(res, inp7);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+}
+
+static void horiz_mc_qpel_avg_dst_aver_src0_8width_msa(const uint8_t *src,
+                                                       int32_t src_stride,
+                                                       uint8_t *dst,
+                                                       int32_t dst_stride,
+                                                       int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 res0, res1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+        dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
+        dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
+        AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
+        AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
+        ST8x4_UB(res0, res1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void horiz_mc_qpel_avg_dst_aver_src0_16width_msa(const uint8_t *src,
+                                                        int32_t src_stride,
+                                                        uint8_t *dst,
+                                                        int32_t dst_stride,
+                                                        int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res0, res1;
+    v16u8 dst0, dst1;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
+                                       const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
+                                       const20, const6, const3);
+        LD_UB2(dst, dst_stride, dst0, dst1);
+        AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
+        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
+        ST_UB2(res0, res1, dst, dst_stride);
+        dst += (2 * dst_stride);
+
+        res0 = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
+                                       const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
+                                       const20, const6, const3);
+        LD_UB2(dst, dst_stride, dst0, dst1);
+        AVER_UB2_UB(inp4, res0, inp6, res1, res0, res1);
+        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
+        ST_UB2(res0, res1, dst, dst_stride);
+        dst += (2 * dst_stride);
+    }
+}
+
+static void horiz_mc_qpel_avg_dst_8width_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride,
+                                             int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 res0, res1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
+        dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
+        AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
+        ST8x4_UB(res0, res1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void horiz_mc_qpel_avg_dst_16width_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res0, res1;
+    v16u8 dst0, dst1;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
+                                       const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
+                                       const20, const6, const3);
+        LD_UB2(dst, dst_stride, dst0, dst1);
+        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
+        ST_UB2(res0, res1, dst, dst_stride);
+        dst += (2 * dst_stride);
+
+        res0 = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
+                                       const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
+                                       const20, const6, const3);
+        LD_UB2(dst, dst_stride, dst0, dst1);
+        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
+        ST_UB2(res0, res1, dst, dst_stride);
+        dst += (2 * dst_stride);
+    }
+}
+
+static void horiz_mc_qpel_avg_dst_aver_src1_8width_msa(const uint8_t *src,
+                                                       int32_t src_stride,
+                                                       uint8_t *dst,
+                                                       int32_t dst_stride,
+                                                       int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 res0, res1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                             mask0, mask1, mask2, mask3,
+                                             const20, const6, const3);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+        SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+        inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+        inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+        dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
+        dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
+        AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
+        AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
+        ST8x4_UB(res0, res1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void horiz_mc_qpel_avg_dst_aver_src1_16width_msa(const uint8_t *src,
+                                                        int32_t src_stride,
+                                                        uint8_t *dst,
+                                                        int32_t dst_stride,
+                                                        int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res0, res1, dst0, dst1;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
+                                       const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
+                                       const20, const6, const3);
+        LD_UB2(dst, dst_stride, dst0, dst1);
+        AVER_UB2_UB(res0, inp1, res1, inp3, res0, res1);
+        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
+        ST_UB2(res0, res1, dst, dst_stride);
+        dst += (2 * dst_stride);
+        res0 = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
+                                       const20, const6, const3);
+        res1 = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
+                                       const20, const6, const3);
+        LD_UB2(dst, dst_stride, dst0, dst1);
+        AVER_UB2_UB(res0, inp5, res1, inp7, res0, res1);
+        AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
+        ST_UB2(res0, res1, dst, dst_stride);
+        dst += (2 * dst_stride);
+    }
+}
+
+
+static void vert_mc_qpel_aver_src0_8x8_msa(const uint8_t *src,
+                                           int32_t src_stride,
+                                           uint8_t *dst,
+                                           int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 tmp0, tmp1, res0, res1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    LD_UB2(src, src_stride, inp4, inp5);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
+                                        inp1, inp2, inp3, inp4,
+                                        inp1, inp0, inp0, inp1,
+                                        inp2, inp3, inp4, inp5,
+                                        const20, const6, const3);
+    LD_UB2(src, src_stride, inp6, inp7);
+    src += (2 * src_stride);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
+                                        inp3, inp4, inp5, inp6,
+                                        inp3, inp2, inp1, inp0,
+                                        inp4, inp5, inp6, inp7,
+                                        const20, const6, const3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    inp8 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
+                                        inp5, inp6, inp7, inp8,
+                                        inp5, inp4, inp3, inp2,
+                                        inp6, inp7, inp8, inp8,
+                                        const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
+                                        inp7, inp8, inp8, inp7,
+                                        inp7, inp6, inp5, inp4,
+                                        inp8, inp8, inp7, inp6,
+                                        const20, const6, const3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp4, 1, (v2i64) inp5);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp6, 1, (v2i64) inp7);
+    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+}
+
+static void vert_mc_qpel_aver_src0_16x16_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
+    v16u8 res0;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
+    src += (5 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
+                                  inp1, inp2, inp3, inp4,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp0);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp5 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
+                                  inp2, inp3, inp4, inp5,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp1);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp6 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
+                                  inp3, inp4, inp5, inp6,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp2);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp7 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
+                                  inp4, inp5, inp6, inp7,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    LD_UB2(src, src_stride, inp8, inp9);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
+                                  inp5, inp6, inp7, inp8,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp4);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
+                                  inp6, inp7, inp8, inp9,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp5);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    LD_UB2(src, src_stride, inp10, inp11);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
+                                  inp7, inp8, inp9, inp10,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp6);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
+                                  inp8, inp9, inp10, inp11,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp7);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    LD_UB2(src, src_stride, inp12, inp13);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
+                                  inp9, inp10, inp11, inp12,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp8);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
+                                  inp10, inp11, inp12, inp13,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp9);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    LD_UB2(src, src_stride, inp14, inp15);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
+                                  inp11, inp12, inp13, inp14,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp10);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
+                                  inp12, inp13, inp14, inp15,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp11);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp16 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
+                                  inp13, inp14, inp15, inp16,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp12);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
+                                  inp14, inp15, inp16, inp16,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp13);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
+                                  inp15, inp16, inp16, inp15,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp14);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
+                                  inp16, inp16, inp15, inp14,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp15);
+    ST_UB(res0, dst);
+}
+
+static void vert_mc_qpel_8x8_msa(const uint8_t *src,
+                                 int32_t src_stride,
+                                 uint8_t *dst,
+                                 int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 res0, res1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    LD_UB2(src, src_stride, inp4, inp5);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
+                                        inp1, inp2, inp3, inp4,
+                                        inp1, inp0, inp0, inp1,
+                                        inp2, inp3, inp4, inp5,
+                                        const20, const6, const3);
+    LD_UB2(src, src_stride, inp6, inp7);
+    src += (2 * src_stride);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
+                                        inp3, inp4, inp5, inp6,
+                                        inp3, inp2, inp1, inp0,
+                                        inp4, inp5, inp6, inp7,
+                                        const20, const6, const3);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    inp8 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
+                                        inp5, inp6, inp7, inp8,
+                                        inp5, inp4, inp3, inp2,
+                                        inp6, inp7, inp8, inp8,
+                                        const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
+                                        inp7, inp8, inp8, inp7,
+                                        inp7, inp6, inp5, inp4,
+                                        inp8, inp8, inp7, inp6,
+                                        const20, const6, const3);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+}
+
+static void vert_mc_qpel_16x16_msa(const uint8_t *src,
+                                   int32_t src_stride,
+                                   uint8_t *dst,
+                                   int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
+    v16u8 res0;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    inp4 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
+                                  inp1, inp2, inp3, inp4,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp5 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
+                                  inp2, inp3, inp4, inp5,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp6 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
+                                  inp3, inp4, inp5, inp6,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp7 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
+                                  inp4, inp5, inp6, inp7,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp8 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
+                                  inp5, inp6, inp7, inp8,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp9 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
+                                  inp6, inp7, inp8, inp9,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp10 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
+                                  inp7, inp8, inp9, inp10,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp11 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
+                                  inp8, inp9, inp10, inp11,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp12 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
+                                  inp9, inp10, inp11, inp12,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp13 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
+                                  inp10, inp11, inp12, inp13,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp14 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
+                                  inp11, inp12, inp13, inp14,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp15 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
+                                  inp12, inp13, inp14, inp15,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp16 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
+                                  inp13, inp14, inp15, inp16,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
+                                  inp14, inp15, inp16, inp16,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
+                                  inp15, inp16, inp16, inp15,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
+                                  inp16, inp16, inp15, inp14,
+                                  const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+}
+
+static void vert_mc_qpel_aver_src1_8x8_msa(const uint8_t *src,
+                                           int32_t src_stride,
+                                           uint8_t *dst,
+                                           int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 tmp0, tmp1, res0, res1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    LD_UB2(src, src_stride, inp4, inp5);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
+                                        inp1, inp2, inp3, inp4,
+                                        inp1, inp0, inp0, inp1,
+                                        inp2, inp3, inp4, inp5,
+                                        const20, const6, const3);
+
+    LD_UB2(src, src_stride, inp6, inp7);
+    src += (2 * src_stride);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
+                                        inp3, inp4, inp5, inp6,
+                                        inp3, inp2, inp1, inp0,
+                                        inp4, inp5, inp6, inp7,
+                                        const20, const6, const3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp3, 1, (v2i64) inp4);
+    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    inp8 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
+                                        inp5, inp6, inp7, inp8,
+                                        inp5, inp4, inp3, inp2,
+                                        inp6, inp7, inp8, inp8,
+                                        const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
+                                        inp7, inp8, inp8, inp7,
+                                        inp7, inp6, inp5, inp4,
+                                        inp8, inp8, inp7, inp6,
+                                        const20, const6, const3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp5, 1, (v2i64) inp6);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp7, 1, (v2i64) inp8);
+    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+}
+
+static void vert_mc_qpel_aver_src1_16x16_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
+    v16u8 res0;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    inp4 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
+                                  inp1, inp2, inp3, inp4,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp1);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp5 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
+                                  inp2, inp3, inp4, inp5,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp2);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp6 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
+                                  inp3, inp4, inp5, inp6,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp7 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
+                                  inp4, inp5, inp6, inp7,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp4);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp8 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
+                                  inp5, inp6, inp7, inp8,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp5);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp9 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
+                                  inp6, inp7, inp8, inp9,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp6);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp10 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
+                                  inp7, inp8, inp9, inp10,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp7);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp11 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
+                                  inp8, inp9, inp10, inp11,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp8);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp12 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
+                                  inp9, inp10, inp11, inp12,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp9);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp13 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
+                                  inp10, inp11, inp12, inp13,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp10);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp14 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
+                                  inp11, inp12, inp13, inp14,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp11);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp15 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
+                                  inp12, inp13, inp14, inp15,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp12);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp16 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
+                                  inp13, inp14, inp15, inp16,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp13);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
+                                  inp14, inp15, inp16, inp16,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp14);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
+                                  inp15, inp16, inp16, inp15,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp15);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
+                                  inp16, inp16, inp15, inp14,
+                                  const20, const6, const3);
+    res0 = __msa_aver_u_b(res0, inp16);
+    ST_UB(res0, dst);
+}
+
+static void vert_mc_qpel_no_rnd_aver_src0_8x8_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 tmp0, tmp1, res0, res1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    LD_UB2(src, src_stride, inp4, inp5);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
+                                                 inp1, inp2, inp3, inp4,
+                                                 inp1, inp0, inp0, inp1,
+                                                 inp2, inp3, inp4, inp5,
+                                                 const20, const6, const3);
+    LD_UB2(src, src_stride, inp6, inp7);
+    src += (2 * src_stride);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
+                                                 inp3, inp4, inp5, inp6,
+                                                 inp3, inp2, inp1, inp0,
+                                                 inp4, inp5, inp6, inp7,
+                                                 const20, const6, const3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    res0 = __msa_ave_u_b(res0, tmp0);
+    res1 = __msa_ave_u_b(res1, tmp1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    inp8 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
+                                                 inp5, inp6, inp7, inp8,
+                                                 inp5, inp4, inp3, inp2,
+                                                 inp6, inp7, inp8, inp8,
+                                                 const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
+                                                 inp7, inp8, inp8, inp7,
+                                                 inp7, inp6, inp5, inp4,
+                                                 inp8, inp8, inp7, inp6,
+                                                 const20, const6, const3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp4, 1, (v2i64) inp5);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp6, 1, (v2i64) inp7);
+    res0 = __msa_ave_u_b(res0, tmp0);
+    res1 = __msa_ave_u_b(res1, tmp1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+}
+
+static void vert_mc_qpel_no_rnd_aver_src0_16x16_msa(const uint8_t *src,
+                                                    int32_t src_stride,
+                                                    uint8_t *dst,
+                                                    int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
+    v16u8 res0;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
+    src += (5 * src_stride);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
+                                           inp1, inp2, inp3, inp4,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp0);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp5 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
+                                           inp2, inp3, inp4, inp5,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp1);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp6 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
+                                           inp3, inp4, inp5, inp6,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp2);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp7 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
+                                           inp4, inp5, inp6, inp7,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp8 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
+                                           inp5, inp6, inp7, inp8,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp4);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp9 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
+                                           inp6, inp7, inp8, inp9,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp5);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp10 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
+                                           inp7, inp8, inp9, inp10,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp6);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp11 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp7, inp6, inp5, inp4,
+                                           inp8, inp9, inp10, inp11,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp7);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp12 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp8, inp7, inp6, inp5,
+                                           inp9, inp10, inp11, inp12,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp8);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp13 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp9, inp8, inp7, inp6,
+                                           inp10, inp11, inp12, inp13,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp9);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp14 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp10, inp9, inp8, inp7,
+                                           inp11, inp12, inp13, inp14,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp10);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp15 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp11, inp10, inp9, inp8,
+                                           inp12, inp13, inp14, inp15,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp11);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp16 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp12, inp11, inp10, inp9,
+                                           inp13, inp14, inp15, inp16,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp12);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp13, inp12, inp11, inp10,
+                                           inp14, inp15, inp16, inp16,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp13);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp14, inp13, inp12, inp11,
+                                           inp15, inp16, inp16, inp15,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp14);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp15, inp14, inp13, inp12,
+                                           inp16, inp16, inp15, inp14,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp15);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+}
+
+static void vert_mc_qpel_no_rnd_8x8_msa(const uint8_t *src,
+                                        int32_t src_stride,
+                                        uint8_t *dst,
+                                        int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 res0, res1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    LD_UB2(src, src_stride, inp4, inp5);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
+                                                 inp1, inp2, inp3, inp4,
+                                                 inp1, inp0, inp0, inp1,
+                                                 inp2, inp3, inp4, inp5,
+                                                 const20, const6, const3);
+    LD_UB2(src, src_stride, inp6, inp7);
+    src += (2 * src_stride);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
+                                                 inp3, inp4, inp5, inp6,
+                                                 inp3, inp2, inp1, inp0,
+                                                 inp4, inp5, inp6, inp7,
+                                                 const20, const6, const3);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    inp8 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
+                                                 inp5, inp6, inp7, inp8,
+                                                 inp5, inp4, inp3, inp2,
+                                                 inp6, inp7, inp8, inp8,
+                                                 const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
+                                                 inp7, inp8, inp8, inp7,
+                                                 inp7, inp6, inp5, inp4,
+                                                 inp8, inp8, inp7, inp6,
+                                                 const20, const6, const3);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+}
+
+static void vert_mc_qpel_no_rnd_16x16_msa(const uint8_t *src,
+                                          int32_t src_stride,
+                                          uint8_t *dst,
+                                          int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
+    v16u8 res0;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
+    src += (5 * src_stride);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
+                                           inp1, inp2, inp3, inp4,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp5 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
+                                           inp2, inp3, inp4, inp5,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp6 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
+                                           inp3, inp4, inp5, inp6,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp7 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
+                                           inp4, inp5, inp6, inp7,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp8 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
+                                           inp5, inp6, inp7, inp8,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp9 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
+                                           inp6, inp7, inp8, inp9,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp10 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
+                                           inp7, inp8, inp9, inp10,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp11 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp7, inp6, inp5, inp4,
+                                           inp8, inp9, inp10, inp11,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp12 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp8, inp7, inp6, inp5,
+                                           inp9, inp10, inp11, inp12,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp13 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp9, inp8, inp7, inp6,
+                                           inp10, inp11, inp12, inp13,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp14 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp10, inp9, inp8, inp7,
+                                           inp11, inp12, inp13, inp14,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp15 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp11, inp10, inp9, inp8,
+                                           inp12, inp13, inp14, inp15,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp16 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp12, inp11, inp10, inp9,
+                                           inp13, inp14, inp15, inp16,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp13, inp12, inp11, inp10,
+                                           inp14, inp15, inp16, inp16,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp14, inp13, inp12, inp11,
+                                           inp15, inp16, inp16, inp15,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp15, inp14, inp13, inp12,
+                                           inp16, inp16, inp15, inp14,
+                                           const20, const6, const3);
+    ST_UB(res0, dst);
+}
+
+static void vert_mc_qpel_no_rnd_aver_src1_8x8_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 tmp0, tmp1, res0, res1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    LD_UB2(src, src_stride, inp4, inp5);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
+                                                 inp1, inp2, inp3, inp4,
+                                                 inp1, inp0, inp0, inp1,
+                                                 inp2, inp3, inp4, inp5,
+                                                 const20, const6, const3);
+    LD_UB2(src, src_stride, inp6, inp7);
+    src += (2 * src_stride);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
+                                                 inp3, inp4, inp5, inp6,
+                                                 inp3, inp2, inp1, inp0,
+                                                 inp4, inp5, inp6, inp7,
+                                                 const20, const6, const3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp3, 1, (v2i64) inp4);
+    res0 = __msa_ave_u_b(res0, tmp0);
+    res1 = __msa_ave_u_b(res1, tmp1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    inp8 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
+                                                 inp5, inp6, inp7, inp8,
+                                                 inp5, inp4, inp3, inp2,
+                                                 inp6, inp7, inp8, inp8,
+                                                 const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
+                                                 inp7, inp8, inp8, inp7,
+                                                 inp7, inp6, inp5, inp4,
+                                                 inp8, inp8, inp7, inp6,
+                                                 const20, const6, const3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp5, 1, (v2i64) inp6);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp7, 1, (v2i64) inp8);
+    res0 = __msa_ave_u_b(res0, tmp0);
+    res1 = __msa_ave_u_b(res1, tmp1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+}
+
+static void vert_mc_qpel_no_rnd_aver_src1_16x16_msa(const uint8_t *src,
+                                                    int32_t src_stride,
+                                                    uint8_t *dst,
+                                                    int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
+    v16u8 res0;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
+    src += (5 * src_stride);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
+                                           inp1, inp2, inp3, inp4,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp1);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp5 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
+                                           inp2, inp3, inp4, inp5,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp2);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp6 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
+                                           inp3, inp4, inp5, inp6,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp3);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp7 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
+                                           inp4, inp5, inp6, inp7,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp4);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp8 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
+                                           inp5, inp6, inp7, inp8,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp5);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp9 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
+                                           inp6, inp7, inp8, inp9,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp6);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp10 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
+                                           inp7, inp8, inp9, inp10,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp7);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp11 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp7, inp6, inp5, inp4,
+                                           inp8, inp9, inp10, inp11,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp8);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp12 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp8, inp7, inp6, inp5,
+                                           inp9, inp10, inp11, inp12,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp9);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp13 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp9, inp8, inp7, inp6,
+                                           inp10, inp11, inp12, inp13,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp10);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp14 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp10, inp9, inp8, inp7,
+                                           inp11, inp12, inp13, inp14,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp11);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp15 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp11, inp10, inp9, inp8,
+                                           inp12, inp13, inp14, inp15,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp12);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    inp16 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp12, inp11, inp10, inp9,
+                                           inp13, inp14, inp15, inp16,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp13);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp13, inp12, inp11, inp10,
+                                           inp14, inp15, inp16, inp16,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp14);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp14, inp13, inp12, inp11,
+                                           inp15, inp16, inp16, inp15,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp15);
+    ST_UB(res0, dst);
+    dst += dst_stride;
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp15, inp14, inp13, inp12,
+                                           inp16, inp16, inp15, inp14,
+                                           const20, const6, const3);
+    res0 = __msa_ave_u_b(res0, inp16);
+    ST_UB(res0, dst);
+}
+
+static void vert_mc_qpel_avg_dst_aver_src0_8x8_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 tmp0, tmp1, res0, res1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    LD_UB2(src, src_stride, inp4, inp5);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
+                                        inp1, inp2, inp3, inp4,
+                                        inp1, inp0, inp0, inp1,
+                                        inp2, inp3, inp4, inp5,
+                                        const20, const6, const3);
+
+    LD_UB2(src, src_stride, inp6, inp7);
+    src += (2 * src_stride);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
+                                        inp3, inp4, inp5, inp6,
+                                        inp3, inp2, inp1, inp0,
+                                        inp4, inp5, inp6, inp7,
+                                        const20, const6, const3);
+
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
+    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
+    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
+    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    inp8 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
+                                        inp5, inp6, inp7, inp8,
+                                        inp5, inp4, inp3, inp2,
+                                        inp6, inp7, inp8, inp8,
+                                        const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
+                                        inp7, inp8, inp8, inp7,
+                                        inp7, inp6, inp5, inp4,
+                                        inp8, inp8, inp7, inp6,
+                                        const20, const6, const3);
+
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp4, 1, (v2i64) inp5);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp6, 1, (v2i64) inp7);
+    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
+    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
+    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
+    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+}
+
+static void vert_mc_qpel_avg_dst_aver_src0_16x16_msa(const uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *dst,
+                                                     int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
+    v16u8 res0, res1, dst0, dst1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
+    src += (5 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
+                                  inp1, inp2, inp3, inp4,
+                                  const20, const6, const3);
+
+    inp5 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
+                                  inp2, inp3, inp4, inp5,
+                                  const20, const6, const3);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp0, res1, inp1, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp6 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
+                                  inp3, inp4, inp5, inp6,
+                                  const20, const6, const3);
+
+    inp7 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
+                                  inp4, inp5, inp6, inp7,
+                                  const20, const6, const3);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp2, res1, inp3, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp8, inp9);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
+                                  inp5, inp6, inp7, inp8,
+                                  const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
+                                  inp6, inp7, inp8, inp9,
+                                  const20, const6, const3);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp4, res1, inp5, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp10, inp11);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
+                                  inp7, inp8, inp9, inp10,
+                                  const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
+                                  inp8, inp9, inp10, inp11,
+                                  const20, const6, const3);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp6, res1, inp7, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp12, inp13);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
+                                  inp9, inp10, inp11, inp12,
+                                  const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
+                                  inp10, inp11, inp12, inp13,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp8, res1, inp9, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp14, inp15);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
+                                  inp11, inp12, inp13, inp14,
+                                  const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
+                                  inp12, inp13, inp14, inp15,
+                                  const20, const6, const3);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp10, res1, inp11, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp16 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
+                                  inp13, inp14, inp15, inp16,
+                                  const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
+                                  inp14, inp15, inp16, inp16,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp12, res1, inp13, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
+                                  inp15, inp16, inp16, inp15,
+                                  const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
+                                  inp16, inp16, inp15, inp14,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp14, res1, inp15, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+}
+
+static void vert_mc_qpel_avg_dst_8x8_msa(const uint8_t *src,
+                                         int32_t src_stride,
+                                         uint8_t *dst,
+                                         int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 res0, res1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    LD_UB2(src, src_stride, inp4, inp5);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
+                                        inp1, inp2, inp3, inp4,
+                                        inp1, inp0, inp0, inp1,
+                                        inp2, inp3, inp4, inp5,
+                                        const20, const6, const3);
+    LD_UB2(src, src_stride, inp6, inp7);
+    src += (2 * src_stride);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
+                                        inp3, inp4, inp5, inp6,
+                                        inp3, inp2, inp1, inp0,
+                                        inp4, inp5, inp6, inp7,
+                                        const20, const6, const3);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
+    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
+    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    inp8 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
+                                        inp5, inp6, inp7, inp8,
+                                        inp5, inp4, inp3, inp2,
+                                        inp6, inp7, inp8, inp8,
+                                        const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
+                                        inp7, inp8, inp8, inp7,
+                                        inp7, inp6, inp5, inp4,
+                                        inp8, inp8, inp7, inp6,
+                                        const20, const6, const3);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
+    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
+    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+}
+
+static void vert_mc_qpel_avg_dst_16x16_msa(const uint8_t *src,
+                                           int32_t src_stride,
+                                           uint8_t *dst,
+                                           int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
+    v16u8 res0, res1, dst0, dst1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
+    src += (5 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
+                                  inp1, inp2, inp3, inp4,
+                                  const20, const6, const3);
+    inp5 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
+                                  inp2, inp3, inp4, inp5,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp6 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
+                                  inp3, inp4, inp5, inp6,
+                                  const20, const6, const3);
+    inp7 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
+                                  inp4, inp5, inp6, inp7,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp8 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
+                                  inp5, inp6, inp7, inp8,
+                                  const20, const6, const3);
+    inp9 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
+                                  inp6, inp7, inp8, inp9,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp10 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
+                                  inp7, inp8, inp9, inp10,
+                                  const20, const6, const3);
+    inp11 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
+                                  inp8, inp9, inp10, inp11,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp12 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
+                                  inp9, inp10, inp11, inp12,
+                                  const20, const6, const3);
+    inp13 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
+                                  inp10, inp11, inp12, inp13,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp14 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
+                                  inp11, inp12, inp13, inp14,
+                                  const20, const6, const3);
+    inp15 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
+                                  inp12, inp13, inp14, inp15,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp16 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
+                                  inp13, inp14, inp15, inp16,
+                                  const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
+                                  inp14, inp15, inp16, inp16,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
+                                  inp15, inp16, inp16, inp15,
+                                  const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
+                                  inp16, inp16, inp15, inp14,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+}
+
+static void vert_mc_qpel_avg_dst_aver_src1_8x8_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16u8 tmp0, tmp1, res0, res1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    LD_UB2(src, src_stride, inp4, inp5);
+    src += (2 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
+                                        inp1, inp2, inp3, inp4,
+                                        inp1, inp0, inp0, inp1,
+                                        inp2, inp3, inp4, inp5,
+                                        const20, const6, const3);
+    LD_UB2(src, src_stride, inp6, inp7);
+    src += (2 * src_stride);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
+                                        inp3, inp4, inp5, inp6,
+                                        inp3, inp2, inp1, inp0,
+                                        inp4, inp5, inp6, inp7,
+                                        const20, const6, const3);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp3, 1, (v2i64) inp4);
+    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
+    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
+    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
+    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    inp8 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
+                                        inp5, inp6, inp7, inp8,
+                                        inp5, inp4, inp3, inp2,
+                                        inp6, inp7, inp8, inp8,
+                                        const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
+                                        inp7, inp8, inp8, inp7,
+                                        inp7, inp6, inp5, inp4,
+                                        inp8, inp8, inp7, inp6,
+                                        const20, const6, const3);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    tmp0 = (v16u8) __msa_insve_d((v2i64) inp5, 1, (v2i64) inp6);
+    tmp1 = (v16u8) __msa_insve_d((v2i64) inp7, 1, (v2i64) inp8);
+    dst0 = (v16u8) __msa_insve_d((v2i64) dst0, 1, (v2i64) dst1);
+    dst2 = (v16u8) __msa_insve_d((v2i64) dst2, 1, (v2i64) dst3);
+    AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
+    AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+}
+
+static void vert_mc_qpel_avg_dst_aver_src1_16x16_msa(const uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *dst,
+                                                     int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
+    v16u8 inp9, inp10, inp11, inp12, inp13, inp14, inp15, inp16;
+    v16u8 res0, res1, dst0, dst1;
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
+    src += (5 * src_stride);
+    res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
+                                  inp1, inp2, inp3, inp4,
+                                  const20, const6, const3);
+    inp5 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
+                                  inp2, inp3, inp4, inp5,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp1, res1, inp2, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp6 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
+                                  inp3, inp4, inp5, inp6,
+                                  const20, const6, const3);
+    inp7 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
+                                  inp4, inp5, inp6, inp7,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp3, res1, inp4, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp8 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
+                                  inp5, inp6, inp7, inp8,
+                                  const20, const6, const3);
+    inp9 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
+                                  inp6, inp7, inp8, inp9,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp5, res1, inp6, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp10 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
+                                  inp7, inp8, inp9, inp10,
+                                  const20, const6, const3);
+    inp11 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
+                                  inp8, inp9, inp10, inp11,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp7, res1, inp8, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp12 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
+                                  inp9, inp10, inp11, inp12,
+                                  const20, const6, const3);
+    inp13 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
+                                  inp10, inp11, inp12, inp13,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp9, res1, inp10, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp14 = LD_UB(src);
+    src += src_stride;
+    res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
+                                  inp11, inp12, inp13, inp14,
+                                  const20, const6, const3);
+    inp15 = LD_UB(src);
+    src += src_stride;
+    res1 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
+                                  inp12, inp13, inp14, inp15,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp11, res1, inp12, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp16 = LD_UB(src);
+    res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
+                                  inp13, inp14, inp15, inp16,
+                                  const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
+                                  inp14, inp15, inp16, inp16,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp13, res1, inp14, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
+                                  inp15, inp16, inp16, inp15,
+                                  const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
+                                  inp16, inp16, inp15, inp14,
+                                  const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    AVER_UB2_UB(res0, inp15, res1, inp16, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
+    ST_UB2(res0, res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_horiz_src0_16x16_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(inp0, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(inp2, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(inp4, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(inp6, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+
+    LD_UB2(src, 1, inp0, inp1);
+    res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
+                                           const20, const6, const3);
+    res = __msa_ave_u_b(inp0, res);
+    ST_UB(res, dst);
+}
+
+static void hv_mc_qpel_no_rnd_aver_hv_src00_16x16_msa(const uint8_t *src,
+                                                      int32_t src_stride,
+                                                      uint8_t *dst,
+                                                      int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_no_rnd_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_no_rnd_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_hv_src00_8x8_msa(const uint8_t *src,
+                                                    int32_t src_stride,
+                                                    uint8_t *dst,
+                                                    int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_ave_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_ave_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_ave_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                                 horiz1, horiz2, horiz3, horiz4,
+                                                 horiz1, horiz0, horiz0, horiz1,
+                                                 horiz2, horiz3, horiz4, horiz5,
+                                                 const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
+    res0 = __msa_ave_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_ave_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
+                                                       mask2, mask3, const20,
+                                                       const6, const3);
+    horiz8 = __msa_ave_u_b(inp0, res0);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                                 horiz3, horiz4, horiz5, horiz6,
+                                                 horiz3, horiz2, horiz1, horiz0,
+                                                 horiz4, horiz5, horiz6, horiz7,
+                                                 const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
+    res1 = __msa_ave_u_b(avg1, res1);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                                 horiz5, horiz6, horiz7, horiz8,
+                                                 horiz5, horiz4, horiz3, horiz2,
+                                                 horiz6, horiz7, horiz8, horiz8,
+                                                 const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
+    res0 = __msa_ave_u_b(avg0, res0);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                                 horiz7, horiz8, horiz8, horiz7,
+                                                 horiz7, horiz6, horiz5, horiz4,
+                                                 horiz8, horiz8, horiz7, horiz6,
+                                                 const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
+    res1 = __msa_ave_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_horiz_16x16_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
+                                               const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
+                                               const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
+                                               const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
+                                               const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+
+    LD_UB2(src, 1, inp0, inp1);
+    res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
+                                           const20, const6, const3);
+    ST_UB(res, dst);
+}
+
+static void hv_mc_qpel_no_rnd_aver_v_src0_16x16_msa(const uint8_t *src,
+                                                    int32_t src_stride,
+                                                    uint8_t *dst,
+                                                    int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_no_rnd_horiz_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_no_rnd_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_v_src0_8x8_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                                 horiz1, horiz2, horiz3, horiz4,
+                                                 horiz1, horiz0, horiz0, horiz1,
+                                                 horiz2, horiz3, horiz4, horiz5,
+                                                 const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
+    res0 = __msa_ave_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    inp0 = LD_UB(src);
+    horiz8 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
+                                                         mask2, mask3, const20,
+                                                         const6, const3);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                                 horiz3, horiz4, horiz5, horiz6,
+                                                 horiz3, horiz2, horiz1, horiz0,
+                                                 horiz4, horiz5, horiz6, horiz7,
+                                                 const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
+    res1 = __msa_ave_u_b(avg1, res1);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
+    res0 = __msa_ave_u_b(avg0, res0);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                                 horiz5, horiz6, horiz7, horiz8,
+                                                 horiz5, horiz4, horiz3, horiz2,
+                                                 horiz6, horiz7, horiz8, horiz8,
+                                                 const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
+    res0 = __msa_ave_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                                 horiz7, horiz8, horiz8, horiz7,
+                                                 horiz7, horiz6, horiz5, horiz4,
+                                                 horiz8, horiz8, horiz7, horiz6,
+                                                 const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
+    res1 = __msa_ave_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_horiz_src1_16x16_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(res, inp1);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(res, inp3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp4, inp5, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(res, inp5);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp6, inp7, mask,
+                                               const20, const6, const3);
+        res = __msa_ave_u_b(res, inp7);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+
+    LD_UB2(src, 1, inp0, inp1);
+    res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
+                                           const20, const6, const3);
+    res = __msa_ave_u_b(inp1, res);
+    ST_UB(res, dst);
+}
+
+static void hv_mc_qpel_no_rnd_aver_hv_src10_16x16_msa(const uint8_t *src,
+                                                      int32_t src_stride,
+                                                      uint8_t *dst,
+                                                      int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_no_rnd_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_no_rnd_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_hv_src10_8x8_msa(const uint8_t *src,
+                                                    int32_t src_stride,
+                                                    uint8_t *dst,
+                                                    int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz0 = __msa_ave_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz2 = __msa_ave_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz4 = __msa_ave_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                                 horiz1, horiz2, horiz3, horiz4,
+                                                 horiz1, horiz0, horiz0, horiz1,
+                                                 horiz2, horiz3, horiz4, horiz5,
+                                                 const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
+    res0 = __msa_ave_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz6 = __msa_ave_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
+                                                       mask2, mask3, const20,
+                                                       const6, const3);
+    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
+    horiz8 = __msa_ave_u_b(inp0, res0);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                                 horiz3, horiz4, horiz5, horiz6,
+                                                 horiz3, horiz2, horiz1, horiz0,
+                                                 horiz4, horiz5, horiz6, horiz7,
+                                                 const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
+    res1 = __msa_ave_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                                 horiz5, horiz6, horiz7, horiz8,
+                                                 horiz5, horiz4, horiz3, horiz2,
+                                                 horiz6, horiz7, horiz8, horiz8,
+                                                 const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
+    res0 = __msa_ave_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                                 horiz7, horiz8, horiz8, horiz7,
+                                                 horiz7, horiz6, horiz5, horiz4,
+                                                 horiz8, horiz8, horiz7, horiz6,
+                                                 const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
+    res1 = __msa_ave_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_h_src0_16x16_msa(const uint8_t *src,
+                                                    int32_t src_stride,
+                                                    uint8_t *dst,
+                                                    int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_no_rnd_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_no_rnd_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_h_src0_8x8_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_ave_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_ave_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_ave_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                                 horiz1, horiz2, horiz3, horiz4,
+                                                 horiz1, horiz0, horiz0, horiz1,
+                                                 horiz2, horiz3, horiz4, horiz5,
+                                                 const20, const6, const3);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_ave_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
+                                                       mask2, mask3, const20,
+                                                       const6, const3);
+    horiz8 = __msa_ave_u_b(inp0, res0);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                                 horiz3, horiz4, horiz5, horiz6,
+                                                 horiz3, horiz2, horiz1, horiz0,
+                                                 horiz4, horiz5, horiz6, horiz7,
+                                                 const20, const6, const3);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                                 horiz5, horiz6, horiz7, horiz8,
+                                                 horiz5, horiz4, horiz3, horiz2,
+                                                 horiz6, horiz7, horiz8, horiz8,
+                                                 const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                                 horiz7, horiz8, horiz8, horiz7,
+                                                 horiz7, horiz6, horiz5, horiz4,
+                                                 horiz8, horiz8, horiz7, horiz6,
+                                                 const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_16x16_msa(const uint8_t *src,
+                                        int32_t src_stride,
+                                        uint8_t *dst,
+                                        int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_no_rnd_horiz_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_no_rnd_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_8x8_msa(const uint8_t *src,
+                                      int32_t src_stride,
+                                      uint8_t *dst,
+                                      int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                                 horiz1, horiz2, horiz3, horiz4,
+                                                 horiz1, horiz0, horiz0, horiz1,
+                                                 horiz2, horiz3, horiz4, horiz5,
+                                                 const20, const6, const3);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    inp0 = LD_UB(src);
+    horiz8 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
+                                                         mask2, mask3, const20,
+                                                         const6, const3);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                                 horiz3, horiz4, horiz5, horiz6,
+                                                 horiz3, horiz2, horiz1, horiz0,
+                                                 horiz4, horiz5, horiz6, horiz7,
+                                                 const20, const6, const3);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                                 horiz5, horiz6, horiz7, horiz8,
+                                                 horiz5, horiz4, horiz3, horiz2,
+                                                 horiz6, horiz7, horiz8, horiz8,
+                                                 const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                                 horiz7, horiz8, horiz8, horiz7,
+                                                 horiz7, horiz6, horiz5, horiz4,
+                                                 horiz8, horiz8, horiz7, horiz6,
+                                                 const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_h_src1_16x16_msa(const uint8_t *src,
+                                                    int32_t src_stride,
+                                                    uint8_t *dst,
+                                                    int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_no_rnd_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_no_rnd_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_h_src1_8x8_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz0 = __msa_ave_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz2 = __msa_ave_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz4 = __msa_ave_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                                 horiz1, horiz2, horiz3, horiz4,
+                                                 horiz1, horiz0, horiz0, horiz1,
+                                                 horiz2, horiz3, horiz4, horiz5,
+                                                 const20, const6, const3);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz6 = __msa_ave_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
+                                                       mask2, mask3, const20,
+                                                       const6, const3);
+    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
+    horiz8 = __msa_ave_u_b(inp0, res0);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                                 horiz3, horiz4, horiz5, horiz6,
+                                                 horiz3, horiz2, horiz1, horiz0,
+                                                 horiz4, horiz5, horiz6, horiz7,
+                                                 const20, const6, const3);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                                 horiz5, horiz6, horiz7, horiz8,
+                                                 horiz5, horiz4, horiz3, horiz2,
+                                                 horiz6, horiz7, horiz8, horiz8,
+                                                 const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                                 horiz7, horiz8, horiz8, horiz7,
+                                                 horiz7, horiz6, horiz5, horiz4,
+                                                 horiz8, horiz8, horiz7, horiz6,
+                                                 const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_hv_src01_16x16_msa(const uint8_t *src,
+                                                      int32_t src_stride,
+                                                      uint8_t *dst,
+                                                      int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_no_rnd_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_no_rnd_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_hv_src01_8x8_msa(const uint8_t *src,
+                                                    int32_t src_stride,
+                                                    uint8_t *dst,
+                                                    int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_ave_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_ave_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_ave_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                                 horiz1, horiz2, horiz3, horiz4,
+                                                 horiz1, horiz0, horiz0, horiz1,
+                                                 horiz2, horiz3, horiz4, horiz5,
+                                                 const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
+    res0 = __msa_ave_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_ave_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
+                                                       mask2, mask3, const20,
+                                                       const6, const3);
+    horiz8 = __msa_ave_u_b(inp0, res0);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                                 horiz3, horiz4, horiz5, horiz6,
+                                                 horiz3, horiz2, horiz1, horiz0,
+                                                 horiz4, horiz5, horiz6, horiz7,
+                                                 const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
+    res1 = __msa_ave_u_b(avg1, res1);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                                 horiz5, horiz6, horiz7, horiz8,
+                                                 horiz5, horiz4, horiz3, horiz2,
+                                                 horiz6, horiz7, horiz8, horiz8,
+                                                 const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
+    res0 = __msa_ave_u_b(avg0, res0);
+
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                                 horiz7, horiz8, horiz8, horiz7,
+                                                 horiz7, horiz6, horiz5, horiz4,
+                                                 horiz8, horiz8, horiz7, horiz6,
+                                                 const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
+    res1 = __msa_ave_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_v_src1_16x16_msa(const uint8_t *src,
+                                                    int32_t src_stride,
+                                                    uint8_t *dst,
+                                                    int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_no_rnd_horiz_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_no_rnd_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_v_src1_8x8_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                                 horiz1, horiz2, horiz3, horiz4,
+                                                 horiz1, horiz0, horiz0, horiz1,
+                                                 horiz2, horiz3, horiz4, horiz5,
+                                                 const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
+    res0 = __msa_ave_u_b(avg0, res0);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                    mask2, mask3, const20,
+                                                    const6, const3);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                                 horiz3, horiz4, horiz5, horiz6,
+                                                 horiz3, horiz2, horiz1, horiz0,
+                                                 horiz4, horiz5, horiz6, horiz7,
+                                                 const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
+    res1 = __msa_ave_u_b(avg1, res1);
+    inp0 = LD_UB(src);
+    horiz8 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
+                                                         mask2, mask3, const20,
+                                                         const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                                 horiz5, horiz6, horiz7, horiz8,
+                                                 horiz5, horiz4, horiz3, horiz2,
+                                                 horiz6, horiz7, horiz8, horiz8,
+                                                 const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
+    res0 = __msa_ave_u_b(avg0, res0);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                                 horiz7, horiz8, horiz8, horiz7,
+                                                 horiz7, horiz6, horiz5, horiz4,
+                                                 horiz8, horiz8, horiz7, horiz6,
+                                                 const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
+    res1 = __msa_ave_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_hv_src11_16x16_msa(const uint8_t *src,
+                                                      int32_t src_stride,
+                                                      uint8_t *dst,
+                                                      int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_no_rnd_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_no_rnd_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_no_rnd_aver_hv_src11_8x8_msa(const uint8_t *src,
+                                                    int32_t src_stride,
+                                                    uint8_t *dst,
+                                                    int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz0 = __msa_ave_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz2 = __msa_ave_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz4 = __msa_ave_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                                 horiz1, horiz2, horiz3, horiz4,
+                                                 horiz1, horiz0, horiz0, horiz1,
+                                                 horiz2, horiz3, horiz4, horiz5,
+                                                 const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
+    res0 = __msa_ave_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
+                                                  mask2, mask3, const20,
+                                                  const6, const3);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz6 = __msa_ave_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                                 horiz3, horiz4, horiz5, horiz6,
+                                                 horiz3, horiz2, horiz1, horiz0,
+                                                 horiz4, horiz5, horiz6, horiz7,
+                                                 const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
+    res1 = __msa_ave_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
+                                                       mask2, mask3, const20,
+                                                       const6, const3);
+    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
+    horiz8 = __msa_ave_u_b(inp0, res0);
+    res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                                 horiz5, horiz6, horiz7, horiz8,
+                                                 horiz5, horiz4, horiz3, horiz2,
+                                                 horiz6, horiz7, horiz8, horiz8,
+                                                 const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                                 horiz7, horiz8, horiz8, horiz7,
+                                                 horiz7, horiz6, horiz5, horiz4,
+                                                 horiz8, horiz8, horiz7, horiz6,
+                                                 const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
+    res0 = __msa_ave_u_b(avg0, res0);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
+    res1 = __msa_ave_u_b(avg1, res1);
+    ST8x4_UB(res0, res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_horiz_src0_16x16_msa(const uint8_t *src,
+                                                 int32_t src_stride,
+                                                 uint8_t *dst,
+                                                 int32_t dst_stride,
+                                                 int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(inp0, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(inp2, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(inp4, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(inp6, res);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+
+    LD_UB2(src, 1, inp0, inp1);
+    res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
+    res = __msa_aver_u_b(inp0, res);
+    ST_UB(res, dst);
+}
+
+static void hv_mc_qpel_aver_hv_src00_16x16_msa(const uint8_t *src,
+                                               int32_t src_stride,
+                                               uint8_t *dst,
+                                               int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_hv_src00_8x8_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
+    res1 = __msa_aver_u_b(avg1, res1);
+
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
+    res0 = __msa_aver_u_b(avg0, res0);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_horiz_16x16_msa(const uint8_t *src,
+                                            int32_t src_stride,
+                                            uint8_t *dst,
+                                            int32_t dst_stride,
+                                            int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
+                                      const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
+                                      const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
+                                      const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
+                                      const20, const6, const3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+
+    LD_UB2(src, 1, inp0, inp1);
+    res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
+    ST_UB(res, dst);
+}
+
+static void hv_mc_qpel_aver_v_src0_16x16_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_v_src0_8x8_msa(const uint8_t *src,
+                                           int32_t src_stride,
+                                           uint8_t *dst,
+                                           int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    inp0 = LD_UB(src);
+    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
+                                                mask0, mask1, mask2, mask3,
+                                                const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
+    res1 = __msa_aver_u_b(avg1, res1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
+    res0 = __msa_aver_u_b(avg0, res0);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_horiz_src1_16x16_msa(const uint8_t *src,
+                                                 int32_t src_stride,
+                                                 uint8_t *dst,
+                                                 int32_t dst_stride,
+                                                 int32_t height)
+{
+    uint8_t loop_count;
+    v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
+    v16u8 res;
+    v16u8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+    v8u16 const20 = (v8u16) __msa_ldi_h(20);
+
+    for (loop_count = (height >> 2); loop_count--;) {
+        LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
+        LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
+        src += (4 * src_stride);
+        res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(res, inp1);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(res, inp3);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(res, inp5);
+        ST_UB(res, dst);
+        dst += dst_stride;
+
+        res = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
+                                      const20, const6, const3);
+        res = __msa_aver_u_b(res, inp7);
+        ST_UB(res, dst);
+        dst += dst_stride;
+    }
+
+    LD_UB2(src, 1, inp0, inp1);
+    res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
+    res = __msa_aver_u_b(inp1, res);
+    ST_UB(res, dst);
+}
+
+static void hv_mc_qpel_aver_hv_src10_16x16_msa(const uint8_t *src,
+                                               int32_t src_stride,
+                                               uint8_t *dst,
+                                               int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_hv_src10_8x8_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
+    res1 = __msa_aver_u_b(avg1, res1);
+    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
+    res0 = __msa_aver_u_b(avg0, res0);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_h_src0_16x16_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_h_src0_8x8_msa(const uint8_t *src,
+                                           int32_t src_stride,
+                                           uint8_t *dst,
+                                           int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_16x16_msa(const uint8_t *src,
+                                 int32_t src_stride,
+                                 uint8_t *dst,
+                                 int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_8x8_msa(const uint8_t *src, int32_t src_stride,
+                               uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    inp0 = LD_UB(src);
+    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
+                                                mask0, mask1, mask2, mask3,
+                                                const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_h_src1_16x16_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_h_src1_8x8_msa(const uint8_t *src,
+                                           int32_t src_stride,
+                                           uint8_t *dst,
+                                           int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_hv_src01_16x16_msa(const uint8_t *src,
+                                               int32_t src_stride,
+                                               uint8_t *dst,
+                                               int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_hv_src01_8x8_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_insve_d((v2i64) horiz1, 1, (v2i64) horiz2);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    avg1 = (v16u8) __msa_insve_d((v2i64) horiz3, 1, (v2i64) horiz4);
+    res1 = __msa_aver_u_b(avg1, res1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg0 = (v16u8) __msa_insve_d((v2i64) horiz5, 1, (v2i64) horiz6);
+    res0 = __msa_aver_u_b(avg0, res0);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg1 = (v16u8) __msa_insve_d((v2i64) horiz7, 1, (v2i64) horiz8);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+}
+
+static void hv_mc_qpel_aver_v_src1_16x16_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_v_src1_8x8_msa(const uint8_t *src,
+                                           int32_t src_stride,
+                                           uint8_t *dst,
+                                           int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_insve_d((v2i64) horiz1, 1, (v2i64) horiz2);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    inp0 = LD_UB(src);
+    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
+                                                mask0, mask1, mask2, mask3,
+                                                const20, const6, const3);
+    avg1 = (v16u8) __msa_insve_d((v2i64) horiz3, 1, (v2i64) horiz4);
+    res1 = __msa_aver_u_b(avg1, res1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+    avg0 = (v16u8) __msa_insve_d((v2i64) horiz5, 1, (v2i64) horiz6);
+    res0 = __msa_aver_u_b(avg0, res0);
+
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+    avg1 = (v16u8) __msa_insve_d((v2i64) horiz7, 1, (v2i64) horiz8);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_hv_src11_16x16_msa(const uint8_t *src,
+                                               int32_t src_stride,
+                                               uint8_t *dst,
+                                               int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_aver_hv_src11_8x8_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
+    src += (4 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                         mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
+    res0 = __msa_aver_u_b(avg0, res0);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
+    res1 = __msa_aver_u_b(avg1, res1);
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
+    res0 = __msa_aver_u_b(avg0, res0);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += 2 * dst_stride;
+
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_hv_src00_16x16_msa(const uint8_t *src,
+                                                       int32_t src_stride,
+                                                       uint8_t *dst,
+                                                       int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_avg_dst_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_hv_src00_8x8_msa(const uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *dst,
+                                                     int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 dst0, dst1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    res0 = __msa_aver_u_b(avg0, res0);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    res1 = __msa_aver_u_b(avg1, res1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    res0 = __msa_aver_u_b(avg0, res0);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    res1 = __msa_aver_u_b(avg1, res1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_v_src0_16x16_msa(const uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *dst,
+                                                     int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_avg_dst_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_v_src0_8x8_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 dst0, dst1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    res0 = __msa_aver_u_b(avg0, res0);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    res1 = __msa_aver_u_b(avg1, res1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp0 = LD_UB(src);
+    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
+                                                mask0, mask1, mask2, mask3,
+                                                const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    res0 = __msa_aver_u_b(avg0, res0);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    res1 = __msa_aver_u_b(avg1, res1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_hv_src10_16x16_msa(const uint8_t *src,
+                                                       int32_t src_stride,
+                                                       uint8_t *dst,
+                                                       int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_avg_dst_aver_src0_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_hv_src10_8x8_msa(const uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *dst,
+                                                     int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 dst0, dst1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz1, (v2i64) horiz0);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    res0 = __msa_aver_u_b(avg0, res0);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz3, (v2i64) horiz2);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    res1 = __msa_aver_u_b(avg1, res1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz5, (v2i64) horiz4);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    res0 = __msa_aver_u_b(avg0, res0);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz7, (v2i64) horiz6);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    res1 = __msa_aver_u_b(avg1, res1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_h_src0_16x16_msa(const uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *dst,
+                                                     int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_avg_dst_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_h_src0_8x8_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 dst0, dst1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_16x16_msa(const uint8_t *src, int32_t src_stride,
+                                         uint8_t *dst, int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_avg_dst_16x16_msa(buff, 16, dst, dst_stride);
+
+}
+
+static void hv_mc_qpel_avg_dst_8x8_msa(const uint8_t *src, int32_t src_stride,
+                                       uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 dst0, dst1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    inp0 = LD_UB(src);
+    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
+                                                mask0, mask1, mask2, mask3,
+                                                const20, const6, const3);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_h_src1_16x16_msa(const uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *dst,
+                                                     int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_avg_dst_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_h_src1_8x8_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 dst0, dst1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_hv_src01_16x16_msa(const uint8_t *src,
+                                                       int32_t src_stride,
+                                                       uint8_t *dst,
+                                                       int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src0_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_avg_dst_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_hv_src01_8x8_msa(const uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *dst,
+                                                     int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 dst0, dst1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
+    res0 = __msa_aver_u_b(avg0, res0);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
+    res1 = __msa_aver_u_b(avg1, res1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
+                                        horiz5, horiz6, horiz7, horiz8,
+                                        horiz5, horiz4, horiz3, horiz2,
+                                        horiz6, horiz7, horiz8, horiz8,
+                                        const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
+                                        horiz7, horiz8, horiz8, horiz7,
+                                        horiz7, horiz6, horiz5, horiz4,
+                                        horiz8, horiz8, horiz7, horiz6,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
+    res0 = __msa_aver_u_b(avg0, res0);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
+    res1 = __msa_aver_u_b(avg1, res1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_v_src1_16x16_msa(const uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *dst,
+                                                     int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_avg_dst_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_v_src1_8x8_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 dst0, dst1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
+                                        horiz1, horiz2, horiz3, horiz4,
+                                        horiz1, horiz0, horiz0, horiz1,
+                                        horiz2, horiz3, horiz4, horiz5,
+                                        const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
+    res0 = __msa_aver_u_b(avg0, res0);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
+                                           mask0, mask1, mask2, mask3,
+                                           const20, const6, const3);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
+                                        horiz3, horiz4, horiz5, horiz6,
+                                        horiz3, horiz2, horiz1, horiz0,
+                                        horiz4, horiz5, horiz6, horiz7,
+                                        const20, const6, const3);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
+    res1 = __msa_aver_u_b(avg1, res1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp0 = LD_UB(src);
+    horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
+                                                mask0, mask1, mask2, mask3,
+                                                const20, const6, const3);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1, horiz5,
+                                        horiz6, horiz7, horiz8, horiz5, horiz4,
+                                        horiz3, horiz2, horiz6, horiz7, horiz8,
+                                        horiz8, const20, const6, const3);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3, horiz7,
+                                        horiz8, horiz8, horiz7, horiz7, horiz6,
+                                        horiz5, horiz4, horiz8, horiz8, horiz7,
+                                        horiz6, const20, const6, const3);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
+    res0 = __msa_aver_u_b(avg0, res0);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
+    res1 = __msa_aver_u_b(avg1, res1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_hv_src11_16x16_msa(const uint8_t *src,
+                                                       int32_t src_stride,
+                                                       uint8_t *dst,
+                                                       int32_t dst_stride)
+{
+    uint8_t buff[272];
+
+    hv_mc_qpel_aver_horiz_src1_16x16_msa(src, src_stride, buff, 16, 16);
+    vert_mc_qpel_avg_dst_aver_src1_16x16_msa(buff, 16, dst, dst_stride);
+}
+
+static void hv_mc_qpel_avg_dst_aver_hv_src11_8x8_msa(const uint8_t *src,
+                                                     int32_t src_stride,
+                                                     uint8_t *dst,
+                                                     int32_t dst_stride)
+{
+    v16u8 inp0, inp1, inp2, inp3;
+    v16u8 res0, res1, avg0, avg1;
+    v16u8 horiz0, horiz1, horiz2, horiz3;
+    v16u8 horiz4, horiz5, horiz6, horiz7, horiz8;
+    v16u8 dst0, dst1;
+    v16u8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
+    v16u8 mask1 = { 0, 2, 0, 3, 1, 4, 2, 5, 3, 6, 4, 7, 5, 8, 6, 8 };
+    v16u8 mask2 = { 1, 3, 0, 4, 0, 5, 1, 6, 2, 7, 3, 8, 4, 8, 5, 7 };
+    v16u8 mask3 = { 2, 4, 1, 5, 0, 6, 0, 7, 1, 8, 2, 8, 3, 7, 4, 6 };
+    v16u8 const20 = (v16u8) __msa_ldi_b(20);
+    v16u8 const6 = (v16u8) __msa_ldi_b(6);
+    v16u8 const3 = (v16u8) __msa_ldi_b(3);
+
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz0 = __msa_aver_u_b(inp0, res0);
+    horiz1 = (v16u8) __msa_splati_d((v2i64) horiz0, 1);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    LD_UB2(src, src_stride, inp0, inp1);
+    src += (2 * src_stride);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz2 = __msa_aver_u_b(inp2, res1);
+    horiz3 = (v16u8) __msa_splati_d((v2i64) horiz2, 1);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    SLDI_B2_UB(inp0, inp1, inp0, inp1, inp0, inp1, 1);
+
+    inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
+    horiz4 = __msa_aver_u_b(inp0, res0);
+    horiz5 = (v16u8) __msa_splati_d((v2i64) horiz4, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz2, (v2i64) horiz1);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2, horiz1,
+                                        horiz2, horiz3, horiz4, horiz1, horiz0,
+                                        horiz0, horiz1, horiz2, horiz3, horiz4,
+                                        horiz5, const20, const6, const3);
+    res0 = __msa_aver_u_b(avg0, res0);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(src, src_stride, inp2, inp3);
+    src += (2 * src_stride);
+    res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
+                                         const20, const6, const3);
+    SLDI_B2_UB(inp2, inp3, inp2, inp3, inp2, inp3, 1);
+
+    inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
+    horiz6 = __msa_aver_u_b(inp2, res1);
+    horiz7 = (v16u8) __msa_splati_d((v2i64) horiz6, 1);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz4, (v2i64) horiz3);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0, horiz3,
+                                        horiz4, horiz5, horiz6, horiz3, horiz2,
+                                        horiz1, horiz0, horiz4, horiz5, horiz6,
+                                        horiz7, const20, const6, const3);
+    res1 = __msa_aver_u_b(avg1, res1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    inp0 = LD_UB(src);
+    res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
+                                              const20, const6, const3);
+    inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
+    horiz8 = __msa_aver_u_b(inp0, res0);
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) horiz6, (v2i64) horiz5);
+    res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1, horiz5,
+                                        horiz6, horiz7, horiz8, horiz5, horiz4,
+                                        horiz3, horiz2, horiz6, horiz7, horiz8,
+                                        horiz8, const20, const6, const3);
+    res0 = __msa_aver_u_b(avg0, res0);
+    avg0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res0 = __msa_aver_u_b(avg0, res0);
+    ST8x2_UB(res0, dst, dst_stride);
+    dst += (2 * dst_stride);
+
+    LD_UB2(dst, dst_stride, dst0, dst1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) horiz8, (v2i64) horiz7);
+    res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3, horiz7,
+                                        horiz8, horiz8, horiz7, horiz7, horiz6,
+                                        horiz5, horiz4, horiz8, horiz8, horiz7,
+                                        horiz6, const20, const6, const3);
+    res1 = __msa_aver_u_b(avg1, res1);
+    avg1 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    res1 = __msa_aver_u_b(avg1, res1);
+    ST8x2_UB(res1, dst, dst_stride);
+}
+
+static void copy_8x8_msa(const uint8_t *src, int32_t src_stride,
+                         uint8_t *dst, int32_t dst_stride)
+{
+    uint64_t src0, src1;
+    int32_t loop_cnt;
+
+    for (loop_cnt = 4; loop_cnt--;) {
+        src0 = LD(src);
+        src += src_stride;
+        src1 = LD(src);
+        src += src_stride;
+
+        SD(src0, dst);
+        dst += dst_stride;
+        SD(src1, dst);
+        dst += dst_stride;
+    }
+}
+
+static void copy_16x16_msa(const uint8_t *src, int32_t src_stride,
+                           uint8_t *dst, int32_t dst_stride)
+{
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
+
+    LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+    LD_UB8(src, src_stride,
+           src8, src9, src10, src11, src12, src13, src14, src15);
+
+    ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
+    dst += (8 * dst_stride);
+    ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15,
+           dst, dst_stride);
+}
+
+static void avg_width8_msa(const uint8_t *src, int32_t src_stride,
+                           uint8_t *dst, int32_t dst_stride,
+                           int32_t height)
+{
+    int32_t cnt;
+    uint64_t out0, out1, out2, out3;
+    v16u8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+
+    for (cnt = (height / 4); cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                    dst0, dst1, dst2, dst3);
+
+        out0 = __msa_copy_u_d((v2i64) dst0, 0);
+        out1 = __msa_copy_u_d((v2i64) dst1, 0);
+        out2 = __msa_copy_u_d((v2i64) dst2, 0);
+        out3 = __msa_copy_u_d((v2i64) dst3, 0);
+        SD4(out0, out1, out2, out3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avg_width16_msa(const uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride,
+                            int32_t height)
+{
+    int32_t cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+
+    for (cnt = (height / 8); cnt--;) {
+        LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+        src += (8 * src_stride);
+        LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                    dst0, dst1, dst2, dst3);
+        AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+                    dst4, dst5, dst6, dst7);
+        ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, dst_stride);
+        dst += (8 * dst_stride);
+    }
+}
+
+void ff_copy_16x16_msa(uint8_t *dest, const uint8_t *src, ptrdiff_t stride)
+{
+    copy_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_copy_8x8_msa(uint8_t *dest, const uint8_t *src, ptrdiff_t stride)
+{
+    copy_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_horiz_mc_qpel_aver_src0_8width_msa(uint8_t *dest,
+                                           const uint8_t *src,
+                                           ptrdiff_t stride)
+{
+    horiz_mc_qpel_aver_src0_8width_msa(src, stride, dest, stride, 8);
+}
+
+void ff_horiz_mc_qpel_aver_src0_16width_msa(uint8_t *dest,
+                                            const uint8_t *src,
+                                            ptrdiff_t stride)
+{
+    horiz_mc_qpel_aver_src0_16width_msa(src, stride, dest, stride, 16);
+}
+
+void ff_horiz_mc_qpel_8width_msa(uint8_t *dest, const uint8_t *src,
+                                 ptrdiff_t stride)
+{
+    horiz_mc_qpel_8width_msa(src, stride, dest, stride, 8);
+}
+
+void ff_horiz_mc_qpel_16width_msa(uint8_t *dest,
+                                  const uint8_t *src, ptrdiff_t stride)
+{
+    horiz_mc_qpel_16width_msa(src, stride, dest, stride, 16);
+}
+
+void ff_horiz_mc_qpel_aver_src1_8width_msa(uint8_t *dest,
+                                           const uint8_t *src,
+                                           ptrdiff_t stride)
+{
+    horiz_mc_qpel_aver_src1_8width_msa(src, stride, dest, stride, 8);
+}
+
+void ff_horiz_mc_qpel_aver_src1_16width_msa(uint8_t *dest,
+                                            const uint8_t *src,
+                                            ptrdiff_t stride)
+{
+    horiz_mc_qpel_aver_src1_16width_msa(src, stride, dest, stride, 16);
+}
+
+void ff_horiz_mc_qpel_no_rnd_aver_src0_8width_msa(uint8_t *dest,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride)
+{
+    horiz_mc_qpel_no_rnd_aver_src0_8width_msa(src, stride, dest, stride, 8);
+}
+
+void ff_horiz_mc_qpel_no_rnd_aver_src0_16width_msa(uint8_t *dest,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride)
+{
+    horiz_mc_qpel_no_rnd_aver_src0_16width_msa(src, stride, dest, stride, 16);
+}
+
+void ff_horiz_mc_qpel_no_rnd_8width_msa(uint8_t *dest,
+                                        const uint8_t *src, ptrdiff_t stride)
+{
+    horiz_mc_qpel_no_rnd_8width_msa(src, stride, dest, stride, 8);
+}
+
+void ff_horiz_mc_qpel_no_rnd_16width_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    horiz_mc_qpel_no_rnd_16width_msa(src, stride, dest, stride, 16);
+}
+
+void ff_horiz_mc_qpel_no_rnd_aver_src1_8width_msa(uint8_t *dest,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride)
+{
+    horiz_mc_qpel_no_rnd_aver_src1_8width_msa(src, stride, dest, stride, 8);
+}
+
+void ff_horiz_mc_qpel_no_rnd_aver_src1_16width_msa(uint8_t *dest,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride)
+{
+    horiz_mc_qpel_no_rnd_aver_src1_16width_msa(src, stride, dest, stride, 16);
+}
+
+void ff_avg_width8_msa(uint8_t *dest, const uint8_t *src, ptrdiff_t stride)
+{
+    avg_width8_msa(src, stride, dest, stride, 8);
+}
+
+void ff_avg_width16_msa(uint8_t *dest, const uint8_t *src, ptrdiff_t stride)
+{
+    avg_width16_msa(src, stride, dest, stride, 16);
+}
+
+void ff_horiz_mc_qpel_avg_dst_aver_src0_8width_msa(uint8_t *dest,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride)
+{
+    horiz_mc_qpel_avg_dst_aver_src0_8width_msa(src, stride, dest, stride, 8);
+}
+
+void ff_horiz_mc_qpel_avg_dst_aver_src0_16width_msa(uint8_t *dest,
+                                                    const uint8_t *src,
+                                                    ptrdiff_t stride)
+{
+    horiz_mc_qpel_avg_dst_aver_src0_16width_msa(src, stride, dest, stride, 16);
+}
+
+void ff_horiz_mc_qpel_avg_dst_8width_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    horiz_mc_qpel_avg_dst_8width_msa(src, stride, dest, stride, 8);
+}
+
+void ff_horiz_mc_qpel_avg_dst_16width_msa(uint8_t *dest,
+                                          const uint8_t *src, ptrdiff_t stride)
+{
+    horiz_mc_qpel_avg_dst_16width_msa(src, stride, dest, stride, 16);
+}
+
+void ff_horiz_mc_qpel_avg_dst_aver_src1_8width_msa(uint8_t *dest,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride)
+{
+    horiz_mc_qpel_avg_dst_aver_src1_8width_msa(src, stride, dest, stride, 8);
+}
+
+void ff_horiz_mc_qpel_avg_dst_aver_src1_16width_msa(uint8_t *dest,
+                                                    const uint8_t *src,
+                                                    ptrdiff_t stride)
+{
+    horiz_mc_qpel_avg_dst_aver_src1_16width_msa(src, stride, dest, stride, 16);
+}
+
+
+void ff_vert_mc_qpel_aver_src0_8x8_msa(uint8_t *dest,
+                                       const uint8_t *src, ptrdiff_t stride)
+{
+    vert_mc_qpel_aver_src0_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_aver_src0_16x16_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    vert_mc_qpel_aver_src0_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_8x8_msa(uint8_t *dest, const uint8_t *src,
+                             ptrdiff_t stride)
+{
+    vert_mc_qpel_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_16x16_msa(uint8_t *dest, const uint8_t *src,
+                               ptrdiff_t stride)
+{
+    vert_mc_qpel_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_aver_src1_8x8_msa(uint8_t *dest,
+                                       const uint8_t *src, ptrdiff_t stride)
+{
+    vert_mc_qpel_aver_src1_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_aver_src1_16x16_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    vert_mc_qpel_aver_src1_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_no_rnd_aver_src0_8x8_msa(uint8_t *dest,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride)
+{
+    vert_mc_qpel_no_rnd_aver_src0_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_no_rnd_aver_src0_16x16_msa(uint8_t *dest,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride)
+{
+    vert_mc_qpel_no_rnd_aver_src0_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_no_rnd_8x8_msa(uint8_t *dest,
+                                    const uint8_t *src, ptrdiff_t stride)
+{
+    vert_mc_qpel_no_rnd_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_no_rnd_16x16_msa(uint8_t *dest,
+                                      const uint8_t *src, ptrdiff_t stride)
+{
+    vert_mc_qpel_no_rnd_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_no_rnd_aver_src1_8x8_msa(uint8_t *dest,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride)
+{
+    vert_mc_qpel_no_rnd_aver_src1_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_no_rnd_aver_src1_16x16_msa(uint8_t *dest,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride)
+{
+    vert_mc_qpel_no_rnd_aver_src1_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_avg_dst_aver_src0_8x8_msa(uint8_t *dest,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride)
+{
+    vert_mc_qpel_avg_dst_aver_src0_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_avg_dst_aver_src0_16x16_msa(uint8_t *dest,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride)
+{
+    vert_mc_qpel_avg_dst_aver_src0_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_avg_dst_8x8_msa(uint8_t *dest,
+                                     const uint8_t *src, ptrdiff_t stride)
+{
+    vert_mc_qpel_avg_dst_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_avg_dst_16x16_msa(uint8_t *dest,
+                                       const uint8_t *src, ptrdiff_t stride)
+{
+    vert_mc_qpel_avg_dst_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_avg_dst_aver_src1_8x8_msa(uint8_t *dest,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride)
+{
+    vert_mc_qpel_avg_dst_aver_src1_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_vert_mc_qpel_avg_dst_aver_src1_16x16_msa(uint8_t *dest,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride)
+{
+    vert_mc_qpel_avg_dst_aver_src1_16x16_msa(src, stride, dest, stride);
+}
+
+/* HV cases */
+void ff_hv_mc_qpel_aver_hv_src00_16x16_msa(uint8_t *dest,
+                                           const uint8_t *src,
+                                           ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_hv_src00_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_hv_src00_8x8_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_hv_src00_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_v_src0_16x16_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_v_src0_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_v_src0_8x8_msa(uint8_t *dest,
+                                       const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_v_src0_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_hv_src10_16x16_msa(uint8_t *dest,
+                                           const uint8_t *src,
+                                           ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_hv_src10_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_hv_src10_8x8_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_hv_src10_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_h_src0_16x16_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_h_src0_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_h_src0_8x8_msa(uint8_t *dest,
+                                       const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_h_src0_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_16x16_msa(uint8_t *dest, const uint8_t *src,
+                             ptrdiff_t stride)
+{
+    hv_mc_qpel_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_8x8_msa(uint8_t *dest, const uint8_t *src,
+                           ptrdiff_t stride)
+{
+    hv_mc_qpel_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_h_src1_16x16_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_h_src1_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_h_src1_8x8_msa(uint8_t *dest,
+                                       const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_h_src1_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_hv_src01_16x16_msa(uint8_t *dest,
+                                           const uint8_t *src,
+                                           ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_hv_src01_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_hv_src01_8x8_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_hv_src01_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_v_src1_16x16_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_v_src1_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_v_src1_8x8_msa(uint8_t *dest,
+                                       const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_v_src1_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_hv_src11_16x16_msa(uint8_t *dest,
+                                           const uint8_t *src,
+                                           ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_hv_src11_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_aver_hv_src11_8x8_msa(uint8_t *dest,
+                                         const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_aver_hv_src11_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_hv_src00_16x16_msa(uint8_t *dest,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_hv_src00_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_hv_src00_8x8_msa(uint8_t *dest,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_hv_src00_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_v_src0_16x16_msa(uint8_t *dest,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_v_src0_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_v_src0_8x8_msa(uint8_t *dest,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_v_src0_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_hv_src10_16x16_msa(uint8_t *dest,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_hv_src10_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_hv_src10_8x8_msa(uint8_t *dest,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_hv_src10_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_h_src0_16x16_msa(uint8_t *dest,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_h_src0_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_h_src0_8x8_msa(uint8_t *dest,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_h_src0_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_16x16_msa(uint8_t *dest,
+                                     const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_8x8_msa(uint8_t *dest,
+                                   const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_h_src1_16x16_msa(uint8_t *dest,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_h_src1_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_h_src1_8x8_msa(uint8_t *dest,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_h_src1_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_hv_src01_16x16_msa(uint8_t *dest,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_hv_src01_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_hv_src01_8x8_msa(uint8_t *dest,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_hv_src01_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_v_src1_16x16_msa(uint8_t *dest,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_v_src1_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_v_src1_8x8_msa(uint8_t *dest,
+                                               const uint8_t *src,
+                                               ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_v_src1_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_hv_src11_16x16_msa(uint8_t *dest,
+                                                   const uint8_t *src,
+                                                   ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_hv_src11_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_avg_dst_aver_hv_src11_8x8_msa(uint8_t *dest,
+                                                 const uint8_t *src,
+                                                 ptrdiff_t stride)
+{
+    hv_mc_qpel_avg_dst_aver_hv_src11_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_hv_src00_16x16_msa(uint8_t *dest,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_hv_src00_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_hv_src00_8x8_msa(uint8_t *dest,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_hv_src00_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_v_src0_16x16_msa(uint8_t *dest,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_v_src0_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_v_src0_8x8_msa(uint8_t *dest,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_v_src0_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_hv_src10_16x16_msa(uint8_t *dest,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_hv_src10_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_hv_src10_8x8_msa(uint8_t *dest,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_hv_src10_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_h_src0_16x16_msa(uint8_t *dest,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_h_src0_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_h_src0_8x8_msa(uint8_t *dest,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_h_src0_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_16x16_msa(uint8_t *dest,
+                                    const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_8x8_msa(uint8_t *dest,
+                                  const uint8_t *src, ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_h_src1_16x16_msa(uint8_t *dest,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_h_src1_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_h_src1_8x8_msa(uint8_t *dest,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_h_src1_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_hv_src01_16x16_msa(uint8_t *dest,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_hv_src01_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_hv_src01_8x8_msa(uint8_t *dest,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_hv_src01_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_v_src1_16x16_msa(uint8_t *dest,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_v_src1_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_v_src1_8x8_msa(uint8_t *dest,
+                                              const uint8_t *src,
+                                              ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_v_src1_8x8_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_hv_src11_16x16_msa(uint8_t *dest,
+                                                  const uint8_t *src,
+                                                  ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_hv_src11_16x16_msa(src, stride, dest, stride);
+}
+
+void ff_hv_mc_qpel_no_rnd_aver_hv_src11_8x8_msa(uint8_t *dest,
+                                                const uint8_t *src,
+                                                ptrdiff_t stride)
+{
+    hv_mc_qpel_no_rnd_aver_hv_src11_8x8_msa(src, stride, dest, stride);
+}
diff --git a/libavcodec/mips/sbrdsp_mips.c b/libavcodec/mips/sbrdsp_mips.c
index 63361e4e..1b0a1060 100644
--- a/libavcodec/mips/sbrdsp_mips.c
+++ b/libavcodec/mips/sbrdsp_mips.c
@@ -166,6 +166,7 @@ static void sbr_qmf_post_shuffle_mips(float W[32][2], const float *z)
 }
 
 #if HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void sbr_sum64x5_mips(float *z)
 {
     int k;
@@ -438,7 +439,6 @@ static void sbr_qmf_deint_bfly_mips(float *v, const float *src0, const float *sr
     }
 }
 
-#if !HAVE_LOONGSON3
 static void sbr_autocorrelate_mips(const float x[40][2], float phi[3][2][2])
 {
     int i;
@@ -607,7 +607,6 @@ static void sbr_autocorrelate_mips(const float x[40][2], float phi[3][2][2])
         : "memory"
     );
 }
-#endif /* !HAVE_LOONGSON3 */
 
 static void sbr_hf_gen_mips(float (*X_high)[2], const float (*X_low)[2],
                          const float alpha0[2], const float alpha1[2],
@@ -884,6 +883,7 @@ static void sbr_hf_apply_noise_3_mips(float (*Y)[2], const float *s_m,
        phi_sign = -phi_sign;
     }
 }
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_MIPSFPU */
 #endif /* HAVE_INLINE_ASM */
 
@@ -893,12 +893,11 @@ void ff_sbrdsp_init_mips(SBRDSPContext *s)
     s->qmf_pre_shuffle = sbr_qmf_pre_shuffle_mips;
     s->qmf_post_shuffle = sbr_qmf_post_shuffle_mips;
 #if HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     s->sum64x5 = sbr_sum64x5_mips;
     s->sum_square = sbr_sum_square_mips;
     s->qmf_deint_bfly = sbr_qmf_deint_bfly_mips;
-#if !HAVE_LOONGSON3
     s->autocorrelate = sbr_autocorrelate_mips;
-#endif /* !HAVE_LOONGSON3 */
     s->hf_gen = sbr_hf_gen_mips;
     s->hf_g_filt = sbr_hf_g_filt_mips;
 
@@ -906,6 +905,7 @@ void ff_sbrdsp_init_mips(SBRDSPContext *s)
     s->hf_apply_noise[1] = sbr_hf_apply_noise_1_mips;
     s->hf_apply_noise[2] = sbr_hf_apply_noise_2_mips;
     s->hf_apply_noise[3] = sbr_hf_apply_noise_3_mips;
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_MIPSFPU */
 #endif /* HAVE_INLINE_ASM */
 }
diff --git a/libavcodec/mips/simple_idct_mmi.c b/libavcodec/mips/simple_idct_mmi.c
new file mode 100644
index 00000000..628e13f7
--- /dev/null
+++ b/libavcodec/mips/simple_idct_mmi.c
@@ -0,0 +1,816 @@
+/*
+ * Loongson SIMD optimized simple idct
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *                    Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "idctdsp_mips.h"
+#include "constants.h"
+
+#define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
+#define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C6 8867  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C7 4520  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= {
+    1<<(ROW_SHIFT-1),   0, 1<<(ROW_SHIFT-1),   0,
+    1<<(ROW_SHIFT-1),   1, 1<<(ROW_SHIFT-1),   0,
+                  C4,  C4,               C4,  C4,
+                  C4, -C4,               C4, -C4,
+                  C2,  C6,               C2,  C6,
+                  C6, -C2,               C6, -C2,
+                  C1,  C3,               C1,  C3,
+                  C5,  C7,               C5,  C7,
+                  C3, -C7,               C3, -C7,
+                 -C1, -C5,              -C1, -C5,
+                  C5, -C1,               C5, -C1,
+                  C7,  C3,               C7,  C3,
+                  C7, -C5,               C7, -C5,
+                  C3, -C1,               C3, -C1
+};
+
+void ff_simple_idct_mmi(int16_t *block)
+{
+        DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
+        int16_t * const temp= (int16_t*)align_tmp;
+
+        __asm__ volatile (
+#undef  DC_COND_IDCT
+#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift)      \
+        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
+        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
+        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
+        "ldc1 $f8, %3                   \n\t"                                \
+        "and  $f8, $f8, $f0             \n\t"                                \
+        "or $f8, $f8, $f2               \n\t"                                \
+        "or $f8, $f8, $f4               \n\t"                                \
+        "or $f8, $f8, $f6               \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t"                                \
+        "li $11, " #shift "             \n\t"                                \
+        "mfc1 $10, $f8                  \n\t"                                \
+        "mtc1 $11, $f18                 \n\t"                                \
+        "beqz $10, 1f                   \n\t"                                \
+        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
+        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
+        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
+        "ldc1 $f16, " #rarg "           \n\t"                                \
+        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
+        #rounder " $f8, $f8, $f16       \n\t"                                \
+        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
+        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
+        "ldc1 $f10, 56(%2)              \n\t" /* C7     C5      C7      C5 */\
+        "ldc1 $f16, " #rarg "           \n\t"                                \
+        "pmaddhw $f10, $f10, $f6        \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
+        #rounder " $f0, $f0, $f16       \n\t"                                \
+        "paddw $f2, $f2, $f0            \n\t" /* A1             a1         */\
+        "ldc1 $f16, 64(%2)              \n\t"                                \
+        "paddw $f0, $f0, $f0            \n\t"                                \
+        "psubw $f0, $f0, $f2            \n\t" /* A2             a2         */\
+        "pmaddhw $f4, $f4, $f16         \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddw $f14, $f14, $f10         \n\t" /* B0             b0         */\
+        "ldc1 $f10, 72(%2)              \n\t" /* -C5    -C1     -C5    -C1 */\
+        "pmaddhw $f10, $f10, $f6        \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
+        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
+        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
+        "paddw $f10, $f10, $f4          \n\t" /* B1             b1         */\
+        "psraw $f14, $f14, $f18         \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "mov.d $f4, $f2                 \n\t" /* A1             a1         */\
+        "paddw $f2, $f2, $f10           \n\t" /* A1+B1          a1+b1      */\
+        "psubw $f4, $f4, $f10           \n\t" /* A1-B1          a1-b1      */\
+        "psraw $f2, $f2, $f18           \n\t"                                \
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "packsswh $f14, $f14, $f2       \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0   */\
+        "packsswh $f4, $f4, $f8         \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1   */\
+        "sdc1 $f14, " #dst "            \n\t"                                \
+        "ldc1 $f2, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
+        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
+        "sdc1 $f4, 24+" #dst "          \n\t"                                \
+        "pmaddhw $f8, $f8, $f2          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "ldc1 $f16, 96(%2)              \n\t"                                \
+        "ldc1 $f14, 88(%2)              \n\t" /* C3     C7      C3      C7 */\
+        "pmaddhw $f2, $f2, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddhw $f14, $f14, $f6        \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
+        "ldc1 $f16, 104(%2)             \n\t"                                \
+        "mov.d $f4, $f0                 \n\t" /* A2             a2         */\
+        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddw $f8, $f8, $f14           \n\t" /* B2             b2         */\
+        "paddw $f4, $f4, $f8            \n\t" /* A2+B2          a2+b2      */\
+        "psubw $f0, $f0, $f8            \n\t" /* a2-B2          a2-b2      */\
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "psraw $f0, $f0, $f18           \n\t"                                \
+        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
+        "paddw $f6, $f6, $f2            \n\t" /* B3             b3         */\
+        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
+        "psubw $f8, $f8, $f6            \n\t" /* a3-B3          a3-b3      */\
+        "psraw $f12, $f12, $f18         \n\t"                                \
+        "packsswh $f4, $f4, $f12        \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2   */\
+        "sdc1 $f4, 8+" #dst "           \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "packsswh $f8, $f8, $f0         \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3   */\
+        "sdc1 $f8, 16+" #dst "          \n\t"                                \
+        "b 2f                           \n\t"                                \
+        "1:                             \n\t"                                \
+        "li $10, 16                     \n\t"                                \
+        "mtc1 $10, $f16                 \n\t"                                \
+        "psllw $f0, $f0, $f16           \n\t"                                \
+        "ldc1 $f16, %4                  \n\t"                                \
+        "paddw $f0, $f0, $f16           \n\t"                                \
+        "li $10, 13                     \n\t"                                \
+        "mtc1 $10, $f16                 \n\t"                                \
+        "psraw $f0, $f0, $f16           \n\t"                                \
+        "packsswh $f0, $f0, $f0         \n\t"                                \
+        "sdc1 $f0, " #dst "             \n\t"                                \
+        "sdc1 $f0, 8+" #dst "           \n\t"                                \
+        "sdc1 $f0, 16+" #dst "          \n\t"                                \
+        "sdc1 $f0, 24+" #dst "          \n\t"                                \
+        "2:                             \n\t"
+
+#undef  Z_COND_IDCT
+#define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, rarg, shift, bt)   \
+        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
+        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
+        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
+        "mov.d $f8, $f0                 \n\t"                                \
+        "or $f8, $f8, $f2               \n\t"                                \
+        "or $f8, $f8, $f4               \n\t"                                \
+        "or $f8, $f8, $f6               \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t"                                \
+        "mfc1 $10, $f8                  \n\t"                                \
+        "beqz $10, " #bt "              \n\t"                                \
+        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
+        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
+        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
+        "ldc1 $f16, " #rarg "           \n\t"                                \
+        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
+        #rounder " $f8, $f8, $f16       \n\t"                                \
+        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
+        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
+        "ldc1 $f10, 56(%2)              \n\t" /* C7     C5      C7      C5 */\
+        "ldc1 $f16, " #rarg "           \n\t"                                \
+        "pmaddhw $f10, $f10, $f6        \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
+        #rounder " $f0, $f0, $f16       \n\t"                                \
+        "paddw $f2, $f2, $f0            \n\t" /* A1             a1         */\
+        "paddw $f0, $f0, $f0            \n\t"                                \
+        "ldc1 $f16, 64(%2)              \n\t"                                \
+        "psubw $f0, $f0, $f2            \n\t" /* A2             a2         */\
+        "pmaddhw $f4, $f4, $f16         \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddw $f14, $f14, $f10         \n\t" /* B0             b0         */\
+        "ldc1 $f10, 72(%2)              \n\t" /* -C5    -C1     -C5    -C1 */\
+        "pmaddhw $f10, $f10, $f6        \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
+        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
+        "li $10, " #shift "             \n\t"                                \
+        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
+        "mtc1 $10, $f18                 \n\t"                                \
+        "paddw $f10, $f10, $f4          \n\t" /* B1             b1         */\
+        "psraw $f14, $f14, $f18         \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "mov.d $f4, $f2                 \n\t" /* A1             a1         */\
+        "paddw $f2, $f2, $f10           \n\t" /* A1+B1          a1+b1      */\
+        "psubw $f4, $f4, $f10           \n\t" /* A1-B1          a1-b1      */\
+        "psraw $f2, $f2, $f18           \n\t"                                \
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "packsswh $f14, $f14, $f2       \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0   */\
+        "packsswh $f4, $f4, $f8         \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1   */\
+        "sdc1 $f14, " #dst "            \n\t"                                \
+        "ldc1 $f2, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
+        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
+        "sdc1 $f4, 24+" #dst "          \n\t"                                \
+        "pmaddhw $f8, $f8, $f2          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "ldc1 $f16, 96(%2)              \n\t"                                \
+        "ldc1 $f14, 88(%2)              \n\t" /* C3     C7      C3      C7 */\
+        "pmaddhw $f2, $f2, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddhw $f14, $f14, $f6        \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
+        "ldc1 $f16, 104(%2)             \n\t"                                \
+        "mov.d $f4, $f0                 \n\t" /* A2             a2         */\
+        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddw $f8, $f8, $f14           \n\t" /* B2             b2         */\
+        "paddw $f4, $f4, $f8            \n\t" /* A2+B2          a2+b2      */\
+        "psubw $f0, $f0, $f8            \n\t" /* a2-B2          a2-b2      */\
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "psraw $f0, $f0, $f18           \n\t"                                \
+        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
+        "paddw $f6, $f6, $f2            \n\t" /* B3             b3         */\
+        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
+        "psubw $f8, $f8, $f6            \n\t" /* a3-B3          a3-b3      */\
+        "psraw $f12, $f12, $f18         \n\t"                                \
+        "packsswh $f4, $f4, $f12        \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2   */\
+        "sdc1 $f4, 8+" #dst "           \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "packsswh $f8, $f8, $f0         \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3   */\
+        "sdc1 $f8, 16+" #dst "          \n\t"                                \
+
+        //IDCT(       src0,   src4,   src1,   src5,    dst,     rounder, shift)
+        DC_COND_IDCT(0(%0),  8(%0), 16(%0), 24(%0),  0(%1), paddw,8(%2), 11)
+        Z_COND_IDCT(32(%0), 40(%0), 48(%0), 56(%0), 32(%1), paddw,(%2), 11, 4f)
+        Z_COND_IDCT(64(%0), 72(%0), 80(%0), 88(%0), 64(%1), paddw,(%2), 11, 2f)
+        Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1), paddw,(%2), 11, 1f)
+
+#undef  IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift)                             \
+        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
+        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
+        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
+        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
+        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
+        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
+        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
+        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
+        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
+        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddw $f0, $f0, $f2            \n\t" /* A1             a1         */\
+        "psubw $f10, $f10, $f2          \n\t" /* A2             a2         */\
+        "ldc1 $f2, 56(%2)               \n\t" /* C7     C5      C7      C5 */\
+        "ldc1 $f16, 64(%2)              \n\t"                                \
+        "pmaddhw $f2, $f2, $f6          \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
+        "pmaddhw $f4, $f4, $f16         \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "li $10, " #shift "             \n\t"                                \
+        "paddw $f14, $f14, $f2          \n\t" /* B0             b0         */\
+        "ldc1 $f2, 72(%2)               \n\t" /* -C5    -C1     -C5    -C1 */\
+        "mtc1 $10, $f18                 \n\t"                                \
+        "pmaddhw $f2, $f2, $f6          \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
+        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
+        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
+        "paddw $f2, $f2, $f4            \n\t" /* B1             b1         */\
+        "psraw $f14, $f14, $f18         \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "mov.d $f4, $f0                 \n\t" /* A1             a1         */\
+        "paddw $f0, $f0, $f2            \n\t" /* A1+B1          a1+b1      */\
+        "psubw $f4, $f4, $f2            \n\t" /* A1-B1          a1-b1      */\
+        "psraw $f0, $f0, $f18           \n\t"                                \
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "packsswh $f14, $f14, $f14      \n\t" /* A0+B0          a0+b0      */\
+        "swc1 $f14, " #dst "            \n\t"                                \
+        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1          a1+b1      */\
+        "swc1 $f0, 16+" #dst "          \n\t"                                \
+        "packsswh $f4, $f4, $f4         \n\t" /* A1-B1          a1-b1      */\
+        "swc1 $f4, 96+" #dst "          \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0          a0-b0      */\
+        "swc1 $f8, 112+" #dst "         \n\t"                                \
+        "ldc1 $f0, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
+        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddhw $f8, $f8, $f0          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "ldc1 $f16, 96(%2)              \n\t"                                \
+        "ldc1 $f14, 88(%2)              \n\t" /* C3     C7      C3      C7 */\
+        "pmaddhw $f0, $f0, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddhw $f14, $f14, $f6        \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
+        "ldc1 $f16, 104(%2)             \n\t"                                \
+        "mov.d $f4, $f10                \n\t" /* A2             a2         */\
+        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddw $f8, $f8, $f14           \n\t" /* B2             b2         */\
+        "paddw $f4, $f4, $f8            \n\t" /* A2+B2          a2+b2      */\
+        "psubw $f10, $f10, $f8          \n\t" /* a2-B2          a2-b2      */\
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "psraw $f10, $f10, $f18         \n\t"                                \
+        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
+        "paddw $f6, $f6, $f0            \n\t" /* B3             b3         */\
+        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
+        "psubw $f8, $f8, $f6            \n\t" /* a3-B3          a3-b3      */\
+        "psraw $f12, $f12, $f18         \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "packsswh $f4, $f4, $f4         \n\t" /* A2+B2          a2+b2      */\
+        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3          a3+b3      */\
+        "swc1 $f4, 32+" #dst "          \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t" /* A3-B3          a3-b3      */\
+        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2          a2-b2      */\
+        "swc1 $f12, 48+" #dst "         \n\t"                                \
+        "swc1 $f8, 64+" #dst "          \n\t"                                \
+        "swc1 $f10, 80+" #dst "         \n\t"
+
+        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
+        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
+        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
+        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
+        "b 9f                           \n\t"
+
+        "# .p2align 4                   \n\t"
+        "4:                             \n\t"
+        Z_COND_IDCT(64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddw,(%2), 11, 6f)
+        Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1),paddw,(%2), 11, 5f)
+
+#undef  IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift)                             \
+        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
+        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
+        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
+        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
+        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
+        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
+        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddw $f0, $f0, $f2            \n\t" /* A1             a1         */\
+        "psubw $f10, $f10, $f2          \n\t" /* A2             a2         */\
+        "ldc1 $f2, 56(%2)               \n\t" /* C7     C5      C7      C5 */\
+        "li $10, " #shift "             \n\t"                                \
+        "pmaddhw $f2, $f2, $f6          \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
+        "ldc1 $f14, 72(%2)              \n\t" /* -C5    -C1     -C5    -C1 */\
+        "mtc1 $10, $f18                 \n\t"                                \
+        "pmaddhw $f14, $f14, $f6        \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddw $f2, $f2, $f8            \n\t" /* A0+B0          a0+b0      */\
+        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
+        "psubw $f8, $f8, $f2            \n\t" /* A0-B0          a0-b0      */\
+        "psraw $f2, $f2, $f18           \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "mov.d $f4, $f0                 \n\t" /* A1             a1         */\
+        "paddw $f0, $f0, $f14           \n\t" /* A1+B1          a1+b1      */\
+        "psubw $f4, $f4, $f14           \n\t" /* A1-B1          a1-b1      */\
+        "psraw $f0, $f0, $f18           \n\t"                                \
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "packsswh $f2, $f2, $f2         \n\t" /* A0+B0          a0+b0      */\
+        "swc1 $f2, " #dst "             \n\t"                                \
+        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1          a1+b1      */\
+        "swc1 $f0, 16+" #dst "          \n\t"                                \
+        "packsswh $f4, $f4, $f4         \n\t" /* A1-B1          a1-b1      */\
+        "swc1 $f4, 96+" #dst "          \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0          a0-b0      */\
+        "swc1 $f8, 112+" #dst "         \n\t"                                \
+        "ldc1 $f2, 88(%2)               \n\t" /* C3     C7      C3      C7 */\
+        "ldc1 $f16, 104(%2)             \n\t"                                \
+        "pmaddhw $f2, $f2, $f6          \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
+        "mov.d $f4, $f10                \n\t" /* A2             a2         */\
+        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddw $f4, $f4, $f2            \n\t" /* A2+B2          a2+b2      */\
+        "psubw $f10, $f10, $f2          \n\t" /* a2-B2          a2-b2      */\
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "psraw $f10, $f10, $f18         \n\t"                                \
+        "mov.d $f2, $f12                \n\t" /* A3             a3         */\
+        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
+        "psubw $f2, $f2, $f6            \n\t" /* a3-B3          a3-b3      */\
+        "psraw $f12, $f12, $f18         \n\t"                                \
+        "psraw $f2, $f2, $f18           \n\t"                                \
+        "packsswh $f4, $f4, $f4         \n\t" /* A2+B2          a2+b2      */\
+        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3          a3+b3      */\
+        "swc1 $f4, 32+" #dst "          \n\t"                                \
+        "packsswh $f2, $f2, $f2         \n\t" /* A3-B3          a3-b3      */\
+        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2          a2-b2      */\
+        "swc1 $f12, 48+" #dst "         \n\t"                                \
+        "swc1 $f2, 64+" #dst "          \n\t"                                \
+        "swc1 $f10, 80+" #dst "         \n\t"
+
+        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
+        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
+        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
+        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
+        "b 9f                           \n\t"
+
+        "# .p2align 4                   \n\t"
+        "6:                             \n\t"
+        Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1),paddw,(%2), 11, 7f)
+
+#undef  IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift)                             \
+        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
+        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "ldc1 $f2, 56(%2)               \n\t" /* C7     C5      C7      C5 */\
+        "pmaddhw $f2, $f2, $f6          \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
+        "ldc1 $f14, 72(%2)              \n\t" /* -C5    -C1     -C5    -C1 */\
+        "li $10, " #shift "             \n\t"                                \
+        "pmaddhw $f14, $f14, $f6        \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddw $f2, $f2, $f8            \n\t" /* A0+B0          a0+b0      */\
+        "mtc1 $10, $f18                 \n\t"                                \
+        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
+        "psubw $f8, $f8, $f2            \n\t" /* A0-B0          a0-b0      */\
+        "psraw $f2, $f2, $f18           \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "mov.d $f4, $f0                 \n\t" /* A1             a1         */\
+        "paddw $f0, $f0, $f14           \n\t" /* A1+B1          a1+b1      */\
+        "psubw $f4, $f4, $f14           \n\t" /* A1-B1          a1-b1      */\
+        "psraw $f0, $f0, $f18           \n\t"                                \
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "packsswh $f2, $f2, $f2         \n\t" /* A0+B0          a0+b0      */\
+        "swc1 $f2, " #dst "             \n\t"                                \
+        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1          a1+b1      */\
+        "swc1 $f0, 16+" #dst "          \n\t"                                \
+        "packsswh $f4, $f4, $f4         \n\t" /* A1-B1          a1-b1      */\
+        "swc1 $f4, 96+" #dst "          \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0          a0-b0      */\
+        "swc1 $f8, 112+" #dst "         \n\t"                                \
+        "ldc1 $f2, 88(%2)               \n\t" /* C3     C7      C3      C7 */\
+        "ldc1 $f16, 104(%2)             \n\t"                                \
+        "pmaddhw $f2, $f2, $f6          \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
+        "mov.d $f4, $f10                \n\t" /* A2             a2         */\
+        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddw $f4, $f4, $f2            \n\t" /* A2+B2          a2+b2      */\
+        "psubw $f10, $f10, $f2          \n\t" /* a2-B2          a2-b2      */\
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "psraw $f10, $f10, $f18         \n\t"                                \
+        "mov.d $f2, $f12                \n\t" /* A3             a3         */\
+        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
+        "psubw $f2, $f2, $f6            \n\t" /* a3-B3          a3-b3      */\
+        "psraw $f12, $f12, $f18         \n\t"                                \
+        "psraw $f2, $f2, $f18           \n\t"                                \
+        "packsswh $f4, $f4, $f4         \n\t" /* A2+B2          a2+b2      */\
+        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3          a3+b3      */\
+        "swc1 $f4, 32+" #dst "          \n\t"                                \
+        "packsswh $f2, $f2, $f2         \n\t" /* A3-B3          a3-b3      */\
+        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2          a2-b2      */\
+        "swc1 $f12, 48+" #dst "         \n\t"                                \
+        "swc1 $f2, 64+" #dst "          \n\t"                                \
+        "swc1 $f10, 80+" #dst "         \n\t"
+
+        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
+        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
+        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
+        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
+        "b 9f                           \n\t"
+
+        "# .p2align 4                   \n\t"
+        "2:                             \n\t"
+        Z_COND_IDCT(96(%0),104(%0),112(%0),120(%0), 96(%1),paddw,(%2), 11, 3f)
+
+#undef  IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift)                             \
+        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
+        "ldc1 $f6, " #src5 "            \n\t" /* R7     R5      r7      r5 */\
+        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
+        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
+        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "ldc1 $f2, 56(%2)               \n\t" /* C7     C5      C7      C5 */\
+        "pmaddhw $f2, $f2, $f6          \n\t" /* C7R7+C5R5      C7r7+C5r5  */\
+        "ldc1 $f16, 64(%2)              \n\t"                                \
+        "pmaddhw $f4, $f4, $f16         \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddw $f14, $f14, $f2          \n\t" /* B0             b0         */\
+        "ldc1 $f2, 72(%2)               \n\t" /* -C5    -C1     -C5    -C1 */\
+        "li $10, " #shift "             \n\t"                                \
+        "pmaddhw $f2, $f2, $f6          \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
+        "mtc1 $10, $f18                 \n\t"                                \
+        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
+        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
+        "paddw $f2, $f2, $f4            \n\t" /* B1             b1         */\
+        "psraw $f14, $f14, $f18         \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "mov.d $f4, $f0                 \n\t" /* A1             a1         */\
+        "paddw $f0, $f0, $f2            \n\t" /* A1+B1          a1+b1      */\
+        "psubw $f4, $f4, $f2            \n\t" /* A1-B1          a1-b1      */\
+        "psraw $f0, $f0, $f18           \n\t"                                \
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "packsswh $f14, $f14, $f14      \n\t" /* A0+B0          a0+b0      */\
+        "swc1 $f14, " #dst "            \n\t"                                \
+        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1          a1+b1      */\
+        "swc1 $f0, 16+" #dst "          \n\t"                                \
+        "packsswh $f4, $f4, $f4         \n\t" /* A1-B1          a1-b1      */\
+        "swc1 $f4, 96+" #dst "          \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0          a0-b0      */\
+        "swc1 $f8, 112+" #dst "         \n\t"                                \
+        "ldc1 $f0, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
+        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddhw $f8, $f8, $f0          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "ldc1 $f14, 88(%2)              \n\t" /* C3     C7      C3      C7 */\
+        "ldc1 $f16, 96(%2)              \n\t"                                \
+        "pmaddhw $f0, $f0, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddhw $f14, $f14, $f6        \n\t" /* C3R7+C7R5      C3r7+C7r5  */\
+        "mov.d $f4, $f10                \n\t" /* A2             a2         */\
+        "ldc1 $f16, 104(%2)             \n\t"                                \
+        "pmaddhw $f6, $f6, $f16         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddw $f8, $f8, $f14           \n\t" /* B2             b2         */\
+        "paddw $f4, $f4, $f8            \n\t" /* A2+B2          a2+b2      */\
+        "psubw $f10, $f10, $f8          \n\t" /* a2-B2          a2-b2      */\
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "psraw $f10, $f10, $f18         \n\t"                                \
+        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
+        "paddw $f6, $f6, $f0            \n\t" /* B3             b3         */\
+        "paddw $f12, $f12, $f6          \n\t" /* A3+B3          a3+b3      */\
+        "psubw $f8, $f8, $f6            \n\t" /* a3-B3          a3-b3      */\
+        "psraw $f12, $f12, $f18         \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "packsswh $f4, $f4, $f4         \n\t" /* A2+B2          a2+b2      */\
+        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3          a3+b3      */\
+        "swc1 $f4, 32+" #dst "          \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t" /* A3-B3          a3-b3      */\
+        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2          a2-b2      */\
+        "swc1 $f12, 48+" #dst "         \n\t"                                \
+        "swc1 $f8, 64+" #dst "          \n\t"                                \
+        "swc1 $f10, 80+" #dst "         \n\t"
+
+        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
+        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
+        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
+        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
+        "b 9f                           \n\t"
+
+        "# .p2align 4                   \n\t"
+        "3:                             \n\t"
+
+#undef  IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift)                             \
+        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
+        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
+        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
+        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "ldc1 $f6, 64(%2)               \n\t"                                \
+        "pmaddhw $f6, $f6, $f4          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "li $10, " #shift "             \n\t"                                \
+        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
+        "mtc1 $10, $f18                 \n\t"                                \
+        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
+        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
+        "psraw $f14, $f14, $f18         \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "mov.d $f2, $f0                 \n\t" /* A1             a1         */\
+        "paddw $f0, $f0, $f6            \n\t" /* A1+B1          a1+b1      */\
+        "psubw $f2, $f2, $f6            \n\t" /* A1-B1          a1-b1      */\
+        "psraw $f0, $f0, $f18           \n\t"                                \
+        "psraw $f2, $f2, $f18           \n\t"                                \
+        "packsswh $f14, $f14, $f14      \n\t" /* A0+B0  a0+b0              */\
+        "swc1 $f14, " #dst "            \n\t"                                \
+        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1  a1+b1              */\
+        "swc1 $f0, 16+" #dst "          \n\t"                                \
+        "packsswh $f2, $f2, $f2         \n\t" /* A1-B1  a1-b1              */\
+        "swc1 $f2, 96+" #dst "          \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0  a0-b0              */\
+        "swc1 $f8, 112+" #dst "         \n\t"                                \
+        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
+        "ldc1 $f16, 96(%2)              \n\t"                                \
+        "pmaddhw $f8, $f8, $f4          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "pmaddhw $f4, $f4, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "mov.d $f2, $f10                \n\t" /* A2             a2         */\
+        "paddw $f2, $f2, $f8            \n\t" /* A2+B2          a2+b2      */\
+        "psubw $f10, $f10, $f8          \n\t" /* a2-B2          a2-b2      */\
+        "psraw $f2, $f2, $f18           \n\t"                                \
+        "psraw $f10, $f10, $f18         \n\t"                                \
+        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
+        "paddw $f12, $f12, $f4          \n\t" /* A3+B3          a3+b3      */\
+        "psubw $f8, $f8, $f4            \n\t" /* a3-B3          a3-b3      */\
+        "psraw $f12, $f12, $f18         \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "packsswh $f2, $f2, $f2         \n\t" /* A2+B2  a2+b2              */\
+        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3  a3+b3              */\
+        "swc1 $f2, 32+" #dst "          \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t" /* A3-B3  a3-b3              */\
+        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2  a2-b2              */\
+        "swc1 $f12, 48+" #dst "         \n\t"                                \
+        "swc1 $f8, 64+" #dst "          \n\t"                                \
+        "swc1 $f10, 80+" #dst "         \n\t"
+
+        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
+        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
+        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
+        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
+        "b 9f                           \n\t"
+
+        "# .p2align 4                   \n\t"
+        "5:                             \n\t"
+
+#undef  IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift)                             \
+        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
+        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
+        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
+        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
+        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
+        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddw $f0, $f0, $f2            \n\t" /* A1             a1         */\
+        "psubw $f10, $f10, $f2          \n\t" /* A2             a2         */\
+        "ldc1 $f4, 8+" #src0 "          \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f6, 8+" #src4 "          \n\t" /* R6     R2      r6      r2 */\
+        "ldc1 $f2, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "pmaddhw $f2, $f2, $f4          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f14, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddhw $f4, $f4, $f14         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "ldc1 $f14, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
+        "ldc1 $f16, 40(%2)              \n\t"                                \
+        "pmaddhw $f14, $f14, $f6        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
+        "pmaddhw $f6, $f6, $f16         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "paddw $f14, $f14, $f2          \n\t" /* A0             a0         */\
+        "paddw $f2, $f2, $f2            \n\t" /* 2C0            2c0        */\
+        "psubw $f2, $f2, $f14           \n\t" /* A3             a3         */\
+        "li $10, " #shift "             \n\t"                                \
+        "paddw $f6, $f6, $f4            \n\t" /* A1             a1         */\
+        "mtc1 $10, $f18                 \n\t"                                \
+        "paddw $f4, $f4, $f4            \n\t" /* 2C1            2c1        */\
+        "psubw $f4, $f4, $f6            \n\t" /* A2             a2         */\
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "psraw $f14, $f14, $f18         \n\t"                                \
+        "psraw $f6, $f6, $f18           \n\t"                                \
+        "packsswh $f8, $f8, $f14        \n\t" /* A0             a0         */\
+        "sdc1 $f8, " #dst "             \n\t"                                \
+        "psraw $f0, $f0, $f18           \n\t"                                \
+        "packsswh $f0, $f0, $f6         \n\t" /* A1             a1         */\
+        "sdc1 $f0, 16+" #dst "          \n\t"                                \
+        "sdc1 $f0, 96+" #dst "          \n\t"                                \
+        "sdc1 $f8, 112+" #dst "         \n\t"                                \
+        "psraw $f10, $f10, $f18         \n\t"                                \
+        "psraw $f12, $f12, $f18         \n\t"                                \
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "packsswh $f10, $f10, $f4       \n\t" /* A2-B2          a2-b2      */\
+        "sdc1 $f10, 32+" #dst "         \n\t"                                \
+        "psraw $f2, $f2, $f18           \n\t"                                \
+        "packsswh $f12, $f12, $f2       \n\t" /* A3+B3          a3+b3      */\
+        "sdc1 $f12, 48+" #dst "         \n\t"                                \
+        "sdc1 $f12, 64+" #dst "         \n\t"                                \
+        "sdc1 $f10, 80+" #dst "         \n\t"
+
+        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+        IDCT(   0(%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
+        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
+        "b 9f                           \n\t"
+
+        "# .p2align 4                   \n\t"
+        "1:                             \n\t"
+
+#undef  IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift)                             \
+        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f2, " #src4 "            \n\t" /* R6     R2      r6      r2 */\
+        "ldc1 $f4, " #src1 "            \n\t" /* R3     R1      r3      r1 */\
+        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "li $10, " #shift "             \n\t"                                \
+        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "mtc1 $10, $f18                 \n\t"                                \
+        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "ldc1 $f10, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
+        "pmaddhw $f10, $f10, $f2        \n\t" /* C6R6+C2R2      C6r6+C2r2  */\
+        "ldc1 $f12, 40(%2)              \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddhw $f2, $f2, $f12         \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "mov.d $f12, $f8                \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f14, 48(%2)              \n\t" /* C3     C1      C3      C1 */\
+        "pmaddhw $f14, $f14, $f4        \n\t" /* C3R3+C1R1      C3r3+C1r1  */\
+        "paddw $f8, $f8, $f10           \n\t" /* A0             a0         */\
+        "psubw $f12, $f12, $f10         \n\t" /* A3             a3         */\
+        "mov.d $f10, $f0                \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddw $f0, $f0, $f2            \n\t" /* A1             a1         */\
+        "psubw $f10, $f10, $f2          \n\t" /* A2             a2         */\
+        "ldc1 $f2, 64(%2)               \n\t"                                \
+        "pmaddhw $f2, $f2, $f4          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddw $f14, $f14, $f8          \n\t" /* A0+B0          a0+b0      */\
+        "paddw $f8, $f8, $f8            \n\t" /* 2A0            2a0        */\
+        "psubw $f8, $f8, $f14           \n\t" /* A0-B0          a0-b0      */\
+        "psraw $f14, $f14, $f18         \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "mov.d $f6, $f0                 \n\t" /* A1             a1         */\
+        "paddw $f0, $f0, $f2            \n\t" /* A1+B1          a1+b1      */\
+        "psubw $f6, $f6, $f2            \n\t" /* A1-B1          a1-b1      */\
+        "psraw $f0, $f0, $f18           \n\t"                                \
+        "psraw $f6, $f6, $f18           \n\t"                                \
+        "packsswh $f14, $f14, $f14      \n\t" /* A0+B0  a0+b0              */\
+        "swc1 $f14, " #dst "            \n\t"                                \
+        "packsswh $f0, $f0, $f0         \n\t" /* A1+B1  a1+b1              */\
+        "swc1 $f0, 16+" #dst "          \n\t"                                \
+        "packsswh $f6, $f6, $f6         \n\t" /* A1-B1  a1-b1              */\
+        "swc1 $f6, 96+" #dst "          \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t" /* A0-B0  a0-b0              */\
+        "swc1 $f8, 112+" #dst "         \n\t"                                \
+        "ldc1 $f8, 80(%2)               \n\t" /* -C1    C5      -C1     C5 */\
+        "ldc1 $f16, 96(%2)              \n\t"                                \
+        "pmaddhw $f8, $f8, $f4          \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "pmaddhw $f4, $f4, $f16         \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "mov.d $f6, $f10                \n\t" /* A2             a2         */\
+        "paddw $f6, $f6, $f8            \n\t" /* A2+B2          a2+b2      */\
+        "psubw $f10, $f10, $f8          \n\t" /* a2-B2          a2-b2      */\
+        "psraw $f6, $f6, $f18           \n\t"                                \
+        "psraw $f10, $f10, $f18         \n\t"                                \
+        "mov.d $f8, $f12                \n\t" /* A3             a3         */\
+        "paddw $f12, $f12, $f4          \n\t" /* A3+B3          a3+b3      */\
+        "psubw $f8, $f8, $f4            \n\t" /* a3-B3          a3-b3      */\
+        "psraw $f12, $f12, $f18         \n\t"                                \
+        "packsswh $f6, $f6, $f6         \n\t" /* A2+B2          a2+b2      */\
+        "swc1 $f6, 32+" #dst "          \n\t"                                \
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "packsswh $f12, $f12, $f12      \n\t" /* A3+B3          a3+b3      */\
+        "swc1 $f12, 48+" #dst "         \n\t"                                \
+        "packsswh $f8, $f8, $f8         \n\t" /* A3-B3          a3-b3      */\
+        "packsswh $f10, $f10, $f10      \n\t" /* A2-B2          a2-b2      */\
+        "swc1 $f8, 64+" #dst "          \n\t"                                \
+        "swc1 $f10, 80+" #dst "         \n\t"
+
+        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+        IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
+        IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),    20)
+        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
+        IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),    20)
+        "b 9f                           \n\t"
+
+        "# .p2align 4                   \n\t"
+        "7:                             \n\t"
+
+#undef  IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift)                             \
+        "ldc1 $f0, " #src0 "            \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f8, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "li $10, " #shift "             \n\t"                                \
+        "pmaddhw $f8, $f8, $f0          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "mtc1 $10, $f18                 \n\t"                                \
+        "ldc1 $f10, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddhw $f0, $f0, $f10         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "psraw $f8, $f8, $f18           \n\t"                                \
+        "psraw $f0, $f0, $f18           \n\t"                                \
+        "ldc1 $f4, 8+" #src0 "          \n\t" /* R4     R0      r4      r0 */\
+        "ldc1 $f2, 16(%2)               \n\t" /* C4     C4      C4      C4 */\
+        "pmaddhw $f2, $f2, $f4          \n\t" /* C4R4+C4R0      C4r4+C4r0  */\
+        "ldc1 $f14, 24(%2)              \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddhw $f4, $f4, $f14         \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "ldc1 $f14, 32(%2)              \n\t" /* C6     C2      C6      C2 */\
+        "psraw $f2, $f2, $f18           \n\t"                                \
+        "packsswh $f8, $f8, $f2         \n\t" /* A0             a0         */\
+        "sdc1 $f8, " #dst "             \n\t"                                \
+        "psraw $f4, $f4, $f18           \n\t"                                \
+        "packsswh $f0, $f0, $f4         \n\t" /* A1             a1         */\
+        "sdc1 $f0, 16+" #dst "          \n\t"                                \
+        "sdc1 $f0, 96+" #dst "          \n\t"                                \
+        "sdc1 $f8, 112+" #dst "         \n\t"                                \
+        "sdc1 $f0, 32+" #dst "          \n\t"                                \
+        "sdc1 $f8, 48+" #dst "          \n\t"                                \
+        "sdc1 $f8, 64+" #dst "          \n\t"                                \
+        "sdc1 $f0, 80+" #dst "          \n\t"
+
+        //IDCT(  src0,   src4,   src1,    src5,    dst, shift)
+        IDCT(   0(%1), 64(%1), 32(%1),  96(%1),  0(%0),    20)
+        IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),    20)
+
+        "9:                             \n\t"
+        ::"r"(block),"r"(temp),"r"(coeffs),"m"(ff_wm1010),"m"(ff_d40000)
+        : "$10","$11"
+    );
+}
diff --git a/libavcodec/mips/simple_idct_msa.c b/libavcodec/mips/simple_idct_msa.c
new file mode 100644
index 00000000..bd8b3101
--- /dev/null
+++ b/libavcodec/mips/simple_idct_msa.c
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mips/generic_macros_msa.h"
+#include "idctdsp_mips.h"
+
+static void simple_idct_msa(int16_t *block)
+{
+    int32_t const_val;
+    v8i16 weights = { 0, 22725, 21407, 19266, 16383, 12873, 8867, 4520 };
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v8i16 w1, w3, w5, w7;
+    v8i16 const0, const1, const2, const3, const4, const5, const6, const7;
+    v4i32 temp0_r, temp1_r, temp2_r, temp3_r;
+    v4i32 temp0_l, temp1_l, temp2_l, temp3_l;
+    v4i32 a0_r, a1_r, a2_r, a3_r, a0_l, a1_l, a2_l, a3_l;
+    v4i32 b0_r, b1_r, b2_r, b3_r, b0_l, b1_l, b2_l, b3_l;
+    v4i32 w2, w4, w6;
+    v8i16 select_vec, temp;
+    v8i16 zero = { 0 };
+    v4i32 const_val0 = __msa_ldi_w(1);
+    v4i32 const_val1 = __msa_ldi_w(1);
+
+    LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+    const_val0 <<= 10;
+    const_val = 16383 * ((1 << 19) / 16383);
+    const_val1 = __msa_insert_w(const_val0, 0, const_val);
+    const_val1 = __msa_splati_w(const_val1, 0);
+    TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in0, in1, in2, in3, in4, in5, in6, in7);
+    select_vec = in1 | in2 | in3 | in4 | in5 | in6 | in7;
+    select_vec = __msa_clti_u_h((v8u16) select_vec, 1);
+    UNPCK_SH_SW(in0, a0_r, a0_l);
+    UNPCK_SH_SW(in2, temp3_r, temp3_l);
+    temp = in0 << 3;
+    w2 = (v4i32) __msa_splati_h(weights, 2);
+    w2 = (v4i32) __msa_ilvr_h(zero, (v8i16) w2);
+    w4 = (v4i32) __msa_splati_h(weights, 4);
+    w4 = (v4i32) __msa_ilvr_h(zero, (v8i16) w4);
+    w6 = (v4i32) __msa_splati_h(weights, 6);
+    w6 = (v4i32) __msa_ilvr_h(zero, (v8i16) w6);
+    MUL2(a0_r, w4, a0_l, w4, a0_r, a0_l);
+    ADD2(a0_r, const_val0, a0_l, const_val0, temp0_r, temp0_l);
+    MUL4(w2, temp3_r, w2, temp3_l, w6, temp3_r, w6, temp3_l,
+         temp1_r, temp1_l, temp2_r, temp2_l);
+    BUTTERFLY_8(temp0_r, temp0_l, temp0_r, temp0_l,
+                temp2_l, temp2_r, temp1_l, temp1_r,
+                a0_r, a0_l, a1_r, a1_l, a2_l, a2_r, a3_l, a3_r);
+    UNPCK_SH_SW(in4, temp0_r, temp0_l);
+    UNPCK_SH_SW(in6, temp3_r, temp3_l);
+    MUL2(temp0_r, w4, temp0_l, w4, temp0_r, temp0_l);
+    MUL4(w2, temp3_r, w2, temp3_l, w6, temp3_r, w6, temp3_l,
+         temp2_r, temp2_l, temp1_r, temp1_l);
+    ADD2(a0_r, temp0_r, a0_l, temp0_l, a0_r, a0_l);
+    SUB4(a1_r, temp0_r, a1_l, temp0_l, a2_r, temp0_r, a2_l, temp0_l,
+         a1_r, a1_l, a2_r, a2_l);
+    ADD4(a3_r, temp0_r, a3_l, temp0_l, a0_r, temp1_r, a0_l, temp1_l,
+         a3_r, a3_l, a0_r, a0_l);
+    SUB2(a1_r, temp2_r, a1_l, temp2_l, a1_r, a1_l);
+    ADD2(a2_r, temp2_r, a2_l, temp2_l, a2_r, a2_l);
+    SUB2(a3_r, temp1_r, a3_l, temp1_l, a3_r, a3_l);
+    ILVRL_H2_SW(in1, in3, b3_r, b3_l);
+    SPLATI_H4_SH(weights, 1, 3, 5, 7, w1, w3, w5, w7);
+    ILVRL_H2_SW(in5, in7, temp0_r, temp0_l);
+    ILVR_H4_SH(w1, w3, w3, -w7, w5, -w1, w7, -w5,
+               const0, const1, const2, const3);
+    ILVR_H2_SH(w5, w7, w7, w3, const4, const6);
+    const5 = __msa_ilvod_h(-w1, -w5);
+    const7 = __msa_ilvod_h(w3, -w1);
+    DOTP_SH4_SW(b3_r, b3_r, b3_r, b3_r, const0, const1, const2, const3,
+                b0_r, b1_r, b2_r, b3_r);
+    DPADD_SH4_SW(temp0_r, temp0_r, temp0_r, temp0_r,
+                 const4, const5, const6, const7, b0_r, b1_r, b2_r, b3_r);
+    DOTP_SH4_SW(b3_l, b3_l, b3_l, b3_l, const0, const1, const2, const3,
+                b0_l, b1_l, b2_l, b3_l);
+    DPADD_SH4_SW(temp0_l, temp0_l, temp0_l, temp0_l,
+                 const4, const5, const6, const7, b0_l, b1_l, b2_l, b3_l);
+    BUTTERFLY_16(a0_r, a0_l, a1_r, a1_l, a2_r, a2_l, a3_r, a3_l,
+                 b3_l, b3_r, b2_l, b2_r, b1_l, b1_r, b0_l, b0_r,
+                 temp0_r, temp0_l, temp1_r, temp1_l,
+                 temp2_r, temp2_l, temp3_r, temp3_l,
+                 a3_l, a3_r, a2_l, a2_r, a1_l, a1_r, a0_l, a0_r);
+    SRA_4V(temp0_r, temp0_l, temp1_r, temp1_l, 11);
+    SRA_4V(temp2_r, temp2_l, temp3_r, temp3_l, 11);
+    PCKEV_H4_SW(temp0_l, temp0_r, temp1_l, temp1_r,
+                temp2_l, temp2_r, temp3_l, temp3_r,
+                temp0_r, temp1_r, temp2_r, temp3_r);
+    in0 = (v8i16) __msa_bmnz_v((v16u8) temp0_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    in1 = (v8i16) __msa_bmnz_v((v16u8) temp1_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    in2 = (v8i16) __msa_bmnz_v((v16u8) temp2_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    in3 = (v8i16) __msa_bmnz_v((v16u8) temp3_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    SRA_4V(a3_r, a3_l, a2_r, a2_l, 11);
+    SRA_4V(a1_r, a1_l, a0_r, a0_l, 11);
+    PCKEV_H4_SW(a0_l, a0_r, a1_l, a1_r, a2_l, a2_r, a3_l, a3_r,
+                a0_r, a1_r, a2_r, a3_r);
+    in4 = (v8i16) __msa_bmnz_v((v16u8) a3_r, (v16u8) temp, (v16u8) select_vec);
+    in5 = (v8i16) __msa_bmnz_v((v16u8) a2_r, (v16u8) temp, (v16u8) select_vec);
+    in6 = (v8i16) __msa_bmnz_v((v16u8) a1_r, (v16u8) temp, (v16u8) select_vec);
+    in7 = (v8i16) __msa_bmnz_v((v16u8) a0_r, (v16u8) temp, (v16u8) select_vec);
+    TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in0, in1, in2, in3, in4, in5, in6, in7);
+
+    UNPCK_SH_SW(in0, a0_r, a0_l);
+    UNPCK_SH_SW(in2, temp3_r, temp3_l);
+    w2 = (v4i32) __msa_splati_h(weights, 2);
+    w2 = (v4i32) __msa_ilvr_h(zero, (v8i16) w2);
+    w4 = (v4i32) __msa_splati_h(weights, 4);
+    w4 = (v4i32) __msa_ilvr_h(zero, (v8i16) w4);
+    w6 = (v4i32) __msa_splati_h(weights, 6);
+    w6 = (v4i32) __msa_ilvr_h(zero, (v8i16) w6);
+    MUL2(a0_r, w4, a0_l, w4, a0_r, a0_l);
+    ADD2(a0_r, const_val1, a0_l, const_val1, temp0_r, temp0_l);
+    MUL4(w2, temp3_r, w2, temp3_l, w6, temp3_r, w6, temp3_l,
+         temp1_r, temp1_l, temp2_r, temp2_l);
+    BUTTERFLY_8(temp0_r, temp0_l, temp0_r, temp0_l,
+                temp2_l, temp2_r, temp1_l, temp1_r,
+                a0_r, a0_l, a1_r, a1_l, a2_l, a2_r, a3_l, a3_r);
+    UNPCK_SH_SW(in4, temp0_r, temp0_l);
+    UNPCK_SH_SW(in6, temp3_r, temp3_l);
+    MUL2(temp0_r, w4, temp0_l, w4, temp0_r, temp0_l);
+    MUL4(w2, temp3_r, w2, temp3_l, w6, temp3_r, w6, temp3_l,
+         temp2_r, temp2_l, temp1_r, temp1_l);
+    ADD2(a0_r, temp0_r, a0_l, temp0_l, a0_r, a0_l);
+    SUB4(a1_r, temp0_r, a1_l, temp0_l, a2_r, temp0_r, a2_l, temp0_l,
+         a1_r, a1_l, a2_r, a2_l);
+    ADD4(a3_r, temp0_r, a3_l, temp0_l, a0_r, temp1_r, a0_l, temp1_l,
+         a3_r, a3_l, a0_r, a0_l);
+    SUB2(a1_r, temp2_r, a1_l, temp2_l, a1_r, a1_l);
+    ADD2(a2_r, temp2_r, a2_l, temp2_l, a2_r, a2_l);
+    SUB2(a3_r, temp1_r, a3_l, temp1_l, a3_r, a3_l);
+    ILVRL_H2_SW(in1, in3, b3_r, b3_l);
+    SPLATI_H4_SH(weights, 1, 3, 5, 7, w1, w3, w5, w7);
+    ILVR_H4_SH(w1, w3, w3, -w7, w5, -w1, w7, -w5,
+               const0, const1, const2, const3);
+    DOTP_SH4_SW(b3_r, b3_r, b3_r, b3_r, const0, const1, const2, const3,
+                b0_r, b1_r, b2_r, b3_r);
+    DOTP_SH4_SW(b3_l, b3_l, b3_l, b3_l, const0, const1, const2, const3,
+                b0_l, b1_l, b2_l, b3_l);
+    ILVRL_H2_SW(in5, in7, temp0_r, temp0_l);
+    ILVR_H2_SH(w5, w7, w7, w3, const4, const6);
+    const5 = __msa_ilvod_h(-w1, -w5);
+    const7 = __msa_ilvod_h(w3, -w1);
+    DPADD_SH4_SW(temp0_r, temp0_r, temp0_r, temp0_r,
+                 const4, const5, const6, const7, b0_r, b1_r, b2_r, b3_r);
+    DPADD_SH4_SW(temp0_l, temp0_l, temp0_l, temp0_l,
+                 const4, const5, const6, const7, b0_l, b1_l, b2_l, b3_l);
+    BUTTERFLY_16(a0_r, a0_l, a1_r, a1_l, a2_r, a2_l, a3_r, a3_l,
+                 b3_l, b3_r, b2_l, b2_r, b1_l, b1_r, b0_l, b0_r,
+                 temp0_r, temp0_l, temp1_r, temp1_l,
+                 temp2_r, temp2_l, temp3_r, temp3_l,
+                 a3_l, a3_r, a2_l, a2_r, a1_l, a1_r, a0_l, a0_r);
+    SRA_4V(temp0_r, temp0_l, temp1_r, temp1_l, 20);
+    SRA_4V(temp2_r, temp2_l, temp3_r, temp3_l, 20);
+    PCKEV_H4_SW(temp0_l, temp0_r, temp1_l, temp1_r, temp2_l, temp2_r,
+                temp3_l, temp3_r, temp0_r, temp1_r, temp2_r, temp3_r);
+    SRA_4V(a3_r, a3_l, a2_r, a2_l, 20);
+    SRA_4V(a1_r, a1_l, a0_r, a0_l, 20);
+    PCKEV_H4_SW(a0_l, a0_r, a1_l, a1_r, a2_l, a2_r, a3_l, a3_r,
+                a0_r, a1_r, a2_r, a3_r);
+    ST_SW8(temp0_r, temp1_r, temp2_r, temp3_r, a3_r, a2_r, a1_r, a0_r,
+           block, 8);
+}
+
+static void simple_idct_put_msa(uint8_t *dst, int32_t dst_stride,
+                                int16_t *block)
+{
+    int32_t const_val;
+    uint64_t tmp0, tmp1, tmp2, tmp3;
+    v8i16 weights = { 0, 22725, 21407, 19266, 16383, 12873, 8867, 4520 };
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v8i16 w1, w3, w5, w7;
+    v8i16 const0, const1, const2, const3, const4, const5, const6, const7;
+    v4i32 temp0_r, temp1_r, temp2_r, temp3_r;
+    v4i32 temp0_l, temp1_l, temp2_l, temp3_l;
+    v4i32 a0_r, a1_r, a2_r, a3_r, a0_l, a1_l, a2_l, a3_l;
+    v4i32 b0_r, b1_r, b2_r, b3_r, b0_l, b1_l, b2_l, b3_l;
+    v4i32 w2, w4, w6;
+    v8i16 select_vec, temp;
+    v8i16 zero = { 0 };
+    v4i32 const_val0 = __msa_ldi_w(1);
+    v4i32 const_val1 = __msa_ldi_w(1);
+
+    LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+    const_val0 <<= 10;
+    const_val = 16383 * ((1 << 19) / 16383);
+    const_val1 = __msa_insert_w(const_val0, 0, const_val);
+    const_val1 = __msa_splati_w(const_val1, 0);
+    TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in0, in1, in2, in3, in4, in5, in6, in7);
+    select_vec = in1 | in2 | in3 | in4 | in5 | in6 | in7;
+    select_vec = __msa_clti_u_h((v8u16) select_vec, 1);
+    UNPCK_SH_SW(in0, a0_r, a0_l);
+    UNPCK_SH_SW(in2, temp3_r, temp3_l);
+    temp = in0 << 3;
+    w2 = (v4i32) __msa_splati_h(weights, 2);
+    w2 = (v4i32) __msa_ilvr_h(zero, (v8i16) w2);
+    w4 = (v4i32) __msa_splati_h(weights, 4);
+    w4 = (v4i32) __msa_ilvr_h(zero, (v8i16) w4);
+    w6 = (v4i32) __msa_splati_h(weights, 6);
+    w6 = (v4i32) __msa_ilvr_h(zero, (v8i16) w6);
+    MUL2(a0_r, w4, a0_l, w4, a0_r, a0_l);
+    ADD2(a0_r, const_val0, a0_l, const_val0, temp0_r, temp0_l);
+    MUL2(w2, temp3_r, w2, temp3_l, temp1_r, temp1_l);
+    MUL2(w6, temp3_r, w6, temp3_l, temp2_r, temp2_l);
+    BUTTERFLY_8(temp0_r, temp0_l, temp0_r, temp0_l,
+                temp2_l, temp2_r, temp1_l, temp1_r,
+                a0_r, a0_l, a1_r, a1_l, a2_l, a2_r, a3_l, a3_r);
+    UNPCK_SH_SW(in4, temp0_r, temp0_l);
+    UNPCK_SH_SW(in6, temp3_r, temp3_l);
+    MUL2(temp0_r, w4, temp0_l, w4, temp0_r, temp0_l);
+    MUL2(w2, temp3_r, w2, temp3_l, temp2_r, temp2_l);
+    MUL2(w6, temp3_r, w6, temp3_l, temp1_r, temp1_l);
+    ADD2(a0_r, temp0_r, a0_l, temp0_l, a0_r, a0_l);
+    SUB2(a1_r, temp0_r, a1_l, temp0_l, a1_r, a1_l);
+    SUB2(a2_r, temp0_r, a2_l, temp0_l, a2_r, a2_l);
+    ADD2(a3_r, temp0_r, a3_l, temp0_l, a3_r, a3_l);
+    ADD2(a0_r, temp1_r, a0_l, temp1_l, a0_r, a0_l);
+    SUB2(a1_r, temp2_r, a1_l, temp2_l, a1_r, a1_l);
+    ADD2(a2_r, temp2_r, a2_l, temp2_l, a2_r, a2_l);
+    SUB2(a3_r, temp1_r, a3_l, temp1_l, a3_r, a3_l);
+    ILVRL_H2_SW(in1, in3, b3_r, b3_l);
+    SPLATI_H4_SH(weights, 1, 3, 5, 7, w1, w3, w5, w7);
+    ILVRL_H2_SW(in5, in7, temp0_r, temp0_l);
+    ILVR_H4_SH(w1, w3, w3, -w7, w5, -w1, w7, -w5,
+               const0, const1, const2, const3);
+    ILVR_H2_SH(w5, w7, w7, w3, const4, const6);
+    const5 = __msa_ilvod_h(-w1, -w5);
+    const7 = __msa_ilvod_h(w3, -w1);
+    DOTP_SH4_SW(b3_r, b3_r, b3_r, b3_r, const0, const1, const2, const3,
+                b0_r, b1_r, b2_r, b3_r);
+    DPADD_SH4_SW(temp0_r, temp0_r, temp0_r, temp0_r,
+                 const4, const5, const6, const7, b0_r, b1_r, b2_r, b3_r);
+    DOTP_SH4_SW(b3_l, b3_l, b3_l, b3_l, const0, const1, const2, const3,
+                b0_l, b1_l, b2_l, b3_l);
+    DPADD_SH4_SW(temp0_l, temp0_l, temp0_l, temp0_l,
+                 const4, const5, const6, const7, b0_l, b1_l, b2_l, b3_l);
+    BUTTERFLY_16(a0_r, a0_l, a1_r, a1_l, a2_r, a2_l, a3_r, a3_l,
+                 b3_l, b3_r, b2_l, b2_r, b1_l, b1_r, b0_l, b0_r,
+                 temp0_r, temp0_l, temp1_r, temp1_l,
+                 temp2_r, temp2_l, temp3_r, temp3_l,
+                 a3_l, a3_r, a2_l, a2_r, a1_l, a1_r, a0_l, a0_r);
+    SRA_4V(temp0_r, temp0_l, temp1_r, temp1_l, 11);
+    SRA_4V(temp2_r, temp2_l, temp3_r, temp3_l, 11);
+    PCKEV_H4_SW(temp0_l, temp0_r, temp1_l, temp1_r,
+                temp2_l, temp2_r, temp3_l, temp3_r,
+                temp0_r, temp1_r, temp2_r, temp3_r);
+    in0 = (v8i16) __msa_bmnz_v((v16u8) temp0_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    in1 = (v8i16) __msa_bmnz_v((v16u8) temp1_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    in2 = (v8i16) __msa_bmnz_v((v16u8) temp2_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    in3 = (v8i16) __msa_bmnz_v((v16u8) temp3_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    SRA_4V(a3_r, a3_l, a2_r, a2_l, 11);
+    SRA_4V(a1_r, a1_l, a0_r, a0_l, 11);
+    PCKEV_H4_SW(a0_l, a0_r, a1_l, a1_r, a2_l, a2_r, a3_l, a3_r,
+                a0_r, a1_r, a2_r, a3_r);
+    in4 = (v8i16) __msa_bmnz_v((v16u8) a3_r, (v16u8) temp, (v16u8) select_vec);
+    in5 = (v8i16) __msa_bmnz_v((v16u8) a2_r, (v16u8) temp, (v16u8) select_vec);
+    in6 = (v8i16) __msa_bmnz_v((v16u8) a1_r, (v16u8) temp, (v16u8) select_vec);
+    in7 = (v8i16) __msa_bmnz_v((v16u8) a0_r, (v16u8) temp, (v16u8) select_vec);
+    TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in0, in1, in2, in3, in4, in5, in6, in7);
+    UNPCK_SH_SW(in0, a0_r, a0_l);
+    UNPCK_SH_SW(in2, temp3_r, temp3_l);
+    w2 = (v4i32) __msa_splati_h(weights, 2);
+    w2 = (v4i32) __msa_ilvr_h(zero, (v8i16) w2);
+    w4 = (v4i32) __msa_splati_h(weights, 4);
+    w4 = (v4i32) __msa_ilvr_h(zero, (v8i16) w4);
+    w6 = (v4i32) __msa_splati_h(weights, 6);
+    w6 = (v4i32) __msa_ilvr_h(zero, (v8i16) w6);
+    MUL2(a0_r, w4, a0_l, w4, a0_r, a0_l);
+    ADD2(a0_r, const_val1, a0_l, const_val1, temp0_r, temp0_l);
+    MUL2(w2, temp3_r, w2, temp3_l, temp1_r, temp1_l);
+    MUL2(w6, temp3_r, w6, temp3_l, temp2_r, temp2_l);
+    BUTTERFLY_8(temp0_r, temp0_l, temp0_r, temp0_l,
+                temp2_l, temp2_r, temp1_l, temp1_r,
+                a0_r, a0_l, a1_r, a1_l, a2_l, a2_r, a3_l, a3_r);
+    UNPCK_SH_SW(in4, temp0_r, temp0_l);
+    UNPCK_SH_SW(in6, temp3_r, temp3_l);
+    MUL2(temp0_r, w4, temp0_l, w4, temp0_r, temp0_l);
+    MUL2(w2, temp3_r, w2, temp3_l, temp2_r, temp2_l);
+    MUL2(w6, temp3_r, w6, temp3_l, temp1_r, temp1_l);
+    ADD2(a0_r, temp0_r, a0_l, temp0_l, a0_r, a0_l);
+    SUB2(a1_r, temp0_r, a1_l, temp0_l, a1_r, a1_l);
+    SUB2(a2_r, temp0_r, a2_l, temp0_l, a2_r, a2_l);
+    ADD2(a3_r, temp0_r, a3_l, temp0_l, a3_r, a3_l);
+    ADD2(a0_r, temp1_r, a0_l, temp1_l, a0_r, a0_l);
+    SUB2(a1_r, temp2_r, a1_l, temp2_l, a1_r, a1_l);
+    ADD2(a2_r, temp2_r, a2_l, temp2_l, a2_r, a2_l);
+    SUB2(a3_r, temp1_r, a3_l, temp1_l, a3_r, a3_l);
+    ILVRL_H2_SW(in1, in3, b3_r, b3_l);
+    SPLATI_H4_SH(weights, 1, 3, 5, 7, w1, w3, w5, w7);
+    ILVR_H4_SH(w1, w3, w3, -w7, w5, -w1, w7, -w5,
+               const0, const1, const2, const3);
+    DOTP_SH4_SW(b3_r, b3_r, b3_r, b3_r, const0, const1, const2, const3,
+                b0_r, b1_r, b2_r, b3_r);
+    DOTP_SH4_SW(b3_l, b3_l, b3_l, b3_l, const0, const1, const2, const3,
+                b0_l, b1_l, b2_l, b3_l);
+    ILVRL_H2_SW(in5, in7, temp0_r, temp0_l);
+    ILVR_H2_SH(w5, w7, w7, w3, const4, const6);
+    const5 = __msa_ilvod_h(-w1, -w5);
+    const7 = __msa_ilvod_h(w3, -w1);
+    DPADD_SH4_SW(temp0_r, temp0_r, temp0_r, temp0_r,
+                 const4, const5, const6, const7, b0_r, b1_r, b2_r, b3_r);
+    DPADD_SH4_SW(temp0_l, temp0_l, temp0_l, temp0_l,
+                 const4, const5, const6, const7, b0_l, b1_l, b2_l, b3_l);
+    BUTTERFLY_16(a0_r, a0_l, a1_r, a1_l, a2_r, a2_l, a3_r, a3_l,
+                 b3_l, b3_r, b2_l, b2_r, b1_l, b1_r, b0_l, b0_r,
+                 temp0_r, temp0_l, temp1_r, temp1_l,
+                 temp2_r, temp2_l, temp3_r, temp3_l,
+                 a3_l, a3_r, a2_l, a2_r, a1_l, a1_r, a0_l, a0_r);
+    SRA_4V(temp0_r, temp0_l, temp1_r, temp1_l, 20);
+    SRA_4V(temp2_r, temp2_l, temp3_r, temp3_l, 20);
+    SRA_4V(a3_r, a3_l, a2_r, a2_l, 20);
+    SRA_4V(a1_r, a1_l, a0_r, a0_l, 20);
+    PCKEV_H4_SW(temp0_l, temp0_r, temp1_l, temp1_r, temp2_l, temp2_r,
+                temp3_l, temp3_r, temp0_r, temp1_r, temp2_r, temp3_r);
+    PCKEV_H4_SW(a0_l, a0_r, a1_l, a1_r, a2_l, a2_r, a3_l, a3_r,
+                a0_r, a1_r, a2_r, a3_r);
+    temp0_r = (v4i32) CLIP_SH_0_255(temp0_r);
+    temp1_r = (v4i32) CLIP_SH_0_255(temp1_r);
+    temp2_r = (v4i32) CLIP_SH_0_255(temp2_r);
+    temp3_r = (v4i32) CLIP_SH_0_255(temp3_r);
+    PCKEV_B4_SW(temp0_r, temp0_r, temp1_r, temp1_r,
+                temp2_r, temp2_r, temp3_r, temp3_r,
+                temp0_r, temp1_r, temp2_r, temp3_r);
+    tmp0 = __msa_copy_u_d((v2i64) temp0_r, 1);
+    tmp1 = __msa_copy_u_d((v2i64) temp1_r, 1);
+    tmp2 = __msa_copy_u_d((v2i64) temp2_r, 1);
+    tmp3 = __msa_copy_u_d((v2i64) temp3_r, 1);
+    SD4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+    dst += 4 * dst_stride;
+    a0_r = (v4i32) CLIP_SH_0_255(a0_r);
+    a1_r = (v4i32) CLIP_SH_0_255(a1_r);
+    a2_r = (v4i32) CLIP_SH_0_255(a2_r);
+    a3_r = (v4i32) CLIP_SH_0_255(a3_r);
+    PCKEV_B4_SW(a0_r, a0_r, a1_r, a1_r,
+                a2_r, a2_r, a3_r, a3_r, a0_r, a1_r, a2_r, a3_r);
+    tmp3 = __msa_copy_u_d((v2i64) a0_r, 1);
+    tmp2 = __msa_copy_u_d((v2i64) a1_r, 1);
+    tmp1 = __msa_copy_u_d((v2i64) a2_r, 1);
+    tmp0 = __msa_copy_u_d((v2i64) a3_r, 1);
+    SD4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+    dst += 4 * dst_stride;
+}
+
+static void simple_idct_add_msa(uint8_t *dst, int32_t dst_stride,
+                                int16_t *block)
+{
+    int32_t const_val;
+    uint64_t tmp0, tmp1, tmp2, tmp3;
+    v8i16 weights = { 0, 22725, 21407, 19266, 16383, 12873, 8867, 4520 };
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v8i16 w1, w3, w5, w7;
+    v8i16 const0, const1, const2, const3, const4, const5, const6, const7;
+    v4i32 temp0_r, temp1_r, temp2_r, temp3_r;
+    v4i32 temp4_r, temp5_r, temp6_r, temp7_r, temp8_r;
+    v4i32 temp0_l, temp1_l, temp2_l, temp3_l;
+    v4i32 temp4_l, temp5_l, temp6_l, temp7_l, temp8_l;
+    v4i32 a0_r, a1_r, a2_r, a3_r, a0_l, a1_l, a2_l, a3_l;
+    v4i32 b0_r, b1_r, b2_r, b3_r, b0_l, b1_l, b2_l, b3_l;
+    v4i32 w2, w4, w6;
+    v8i16 select_vec, temp;
+    v8i16 zero = { 0 };
+    v4i32 const_val0 = __msa_ldi_w(1);
+    v4i32 const_val1 = __msa_ldi_w(1);
+
+    const_val0 <<= 10;
+    const_val = 16383 * ((1 << 19) / 16383);
+    const_val1 = __msa_insert_w(const_val0, 0, const_val);
+    const_val1 = __msa_splati_w(const_val1, 0);
+    LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+    TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in0, in1, in2, in3, in4, in5, in6, in7);
+
+    select_vec = in1 | in2 | in3 | in4 | in5 | in6 | in7;
+    select_vec = __msa_clti_u_h((v8u16) select_vec, 1);
+    UNPCK_SH_SW(in0, a0_r, a0_l);
+    UNPCK_SH_SW(in2, temp3_r, temp3_l);
+    ILVRL_H2_SW(in1, in3, b3_r, b3_l);
+    UNPCK_SH_SW(in4, temp4_r, temp4_l);
+    UNPCK_SH_SW(in6, temp7_r, temp7_l);
+    ILVRL_H2_SW(in5, in7, temp8_r, temp8_l);
+    temp = in0 << 3;
+    SPLATI_H4_SH(weights, 1, 3, 5, 7, w1, w3, w5, w7);
+    ILVR_H4_SH(w1, w3, w3, -w7, w5, -w1, w7, -w5,
+               const0, const1, const2, const3);
+    ILVR_H2_SH(w5, w7, w7, w3, const4, const6);
+    const5 = __msa_ilvod_h(-w1, -w5);
+    const7 = __msa_ilvod_h(w3, -w1);
+    DOTP_SH4_SW(b3_r, b3_r, b3_r, b3_r, const0, const1, const2, const3,
+                b0_r, b1_r, b2_r, b3_r);
+    DPADD_SH4_SW(temp8_r, temp8_r, temp8_r, temp8_r,
+                 const4, const5, const6, const7, b0_r, b1_r, b2_r, b3_r);
+    DOTP_SH4_SW(b3_l, b3_l, b3_l, b3_l, const0, const1, const2, const3,
+                b0_l, b1_l, b2_l, b3_l);
+    DPADD_SH4_SW(temp8_l, temp8_l, temp8_l, temp8_l,
+                 const4, const5, const6, const7, b0_l, b1_l, b2_l, b3_l);
+    w2 = (v4i32) __msa_splati_h(weights, 2);
+    w2 = (v4i32) __msa_ilvr_h(zero, (v8i16) w2);
+    w4 = (v4i32) __msa_splati_h(weights, 4);
+    w4 = (v4i32) __msa_ilvr_h(zero, (v8i16) w4);
+    w6 = (v4i32) __msa_splati_h(weights, 6);
+    w6 = (v4i32) __msa_ilvr_h(zero, (v8i16) w6);
+    MUL2(a0_r, w4, a0_l, w4, a0_r, a0_l);
+    ADD2(a0_r, const_val0, a0_l, const_val0, temp0_r, temp0_l);
+    MUL2(w2, temp3_r, w2, temp3_l, temp1_r, temp1_l);
+    MUL2(w6, temp3_r, w6, temp3_l, temp2_r, temp2_l);
+    BUTTERFLY_8(temp0_r, temp0_l, temp0_r, temp0_l,
+                temp2_l, temp2_r, temp1_l, temp1_r,
+                a0_r, a0_l, a1_r, a1_l, a2_l, a2_r, a3_l, a3_r);
+    MUL2(temp4_r, w4, temp4_l, w4, temp4_r, temp4_l);
+    MUL2(temp7_r, w2, temp7_l, w2, temp6_r, temp6_l);
+    MUL2(temp7_r, w6, temp7_l, w6, temp5_r, temp5_l);
+    ADD2(a0_r, temp4_r, a0_l, temp4_l, a0_r, a0_l);
+    SUB2(a1_r, temp4_r, a1_l, temp4_l, a1_r, a1_l);
+    SUB2(a2_r, temp4_r, a2_l, temp4_l, a2_r, a2_l);
+    ADD2(a3_r, temp4_r, a3_l, temp4_l, a3_r, a3_l);
+    ADD2(a0_r, temp5_r, a0_l, temp5_l, a0_r, a0_l);
+    SUB2(a1_r, temp6_r, a1_l, temp6_l, a1_r, a1_l);
+    ADD2(a2_r, temp6_r, a2_l, temp6_l, a2_r, a2_l);
+    SUB2(a3_r, temp5_r, a3_l, temp5_l, a3_r, a3_l);
+    BUTTERFLY_16(a0_r, a0_l, a1_r, a1_l, a2_r, a2_l, a3_r, a3_l,
+                 b3_l, b3_r, b2_l, b2_r, b1_l, b1_r, b0_l, b0_r,
+                 temp0_r, temp0_l, temp1_r, temp1_l,
+                 temp2_r, temp2_l, temp3_r, temp3_l,
+                 a3_l, a3_r, a2_l, a2_r, a1_l, a1_r, a0_l, a0_r);
+    SRA_4V(temp0_r, temp0_l, temp1_r, temp1_l, 11);
+    SRA_4V(temp2_r, temp2_l, temp3_r, temp3_l, 11);
+    PCKEV_H4_SW(temp0_l, temp0_r, temp1_l, temp1_r,
+                temp2_l, temp2_r, temp3_l, temp3_r,
+                temp0_r, temp1_r, temp2_r, temp3_r);
+    in0 = (v8i16) __msa_bmnz_v((v16u8) temp0_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    in1 = (v8i16) __msa_bmnz_v((v16u8) temp1_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    in2 = (v8i16) __msa_bmnz_v((v16u8) temp2_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    in3 = (v8i16) __msa_bmnz_v((v16u8) temp3_r, (v16u8) temp,
+                               (v16u8) select_vec);
+    SRA_4V(a3_r, a3_l, a2_r, a2_l, 11);
+    SRA_4V(a1_r, a1_l, a0_r, a0_l, 11);
+    PCKEV_H4_SW(a0_l, a0_r, a1_l, a1_r, a2_l, a2_r, a3_l, a3_r,
+                a0_r, a1_r, a2_r, a3_r);
+    in4 = (v8i16) __msa_bmnz_v((v16u8) a3_r, (v16u8) temp, (v16u8) select_vec);
+    in5 = (v8i16) __msa_bmnz_v((v16u8) a2_r, (v16u8) temp, (v16u8) select_vec);
+    in6 = (v8i16) __msa_bmnz_v((v16u8) a1_r, (v16u8) temp, (v16u8) select_vec);
+    in7 = (v8i16) __msa_bmnz_v((v16u8) a0_r, (v16u8) temp, (v16u8) select_vec);
+    TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in0, in1, in2, in3, in4, in5, in6, in7);
+
+    UNPCK_SH_SW(in0, a0_r, a0_l);
+    UNPCK_SH_SW(in2, temp3_r, temp3_l);
+    MUL2(a0_r, w4, a0_l, w4, a0_r, a0_l);
+    ADD2(a0_r, const_val1, a0_l, const_val1, temp0_r, temp0_l);
+    MUL2(w2, temp3_r, w2, temp3_l, temp1_r, temp1_l);
+    MUL2(w6, temp3_r, w6, temp3_l, temp2_r, temp2_l);
+    BUTTERFLY_8(temp0_r, temp0_l, temp0_r, temp0_l,
+                temp2_l, temp2_r, temp1_l, temp1_r,
+                a0_r, a0_l, a1_r, a1_l, a2_l, a2_r, a3_l, a3_r);
+    UNPCK_SH_SW(in4, temp0_r, temp0_l);
+    UNPCK_SH_SW(in6, temp3_r, temp3_l);
+    MUL2(temp0_r, w4, temp0_l, w4, temp0_r, temp0_l);
+    MUL2(w2, temp3_r, w2, temp3_l, temp2_r, temp2_l);
+    MUL2(w6, temp3_r, w6, temp3_l, temp1_r, temp1_l);
+    ADD2(a0_r, temp0_r, a0_l, temp0_l, a0_r, a0_l);
+    SUB2(a1_r, temp0_r, a1_l, temp0_l, a1_r, a1_l);
+    SUB2(a2_r, temp0_r, a2_l, temp0_l, a2_r, a2_l);
+    ADD2(a3_r, temp0_r, a3_l, temp0_l, a3_r, a3_l);
+    ADD2(a0_r, temp1_r, a0_l, temp1_l, a0_r, a0_l);
+    SUB2(a1_r, temp2_r, a1_l, temp2_l, a1_r, a1_l);
+    ADD2(a2_r, temp2_r, a2_l, temp2_l, a2_r, a2_l);
+    SUB2(a3_r, temp1_r, a3_l, temp1_l, a3_r, a3_l);
+    ILVRL_H2_SW(in1, in3, b3_r, b3_l);
+    ILVRL_H2_SW(in5, in7, temp0_r, temp0_l);
+    DOTP_SH4_SW(b3_r, b3_r, b3_r, b3_r, const0, const1, const2, const3,
+                b0_r, b1_r, b2_r, b3_r);
+    DOTP_SH4_SW(b3_l, b3_l, b3_l, b3_l, const0, const1, const2, const3,
+                b0_l, b1_l, b2_l, b3_l);
+    DPADD_SH4_SW(temp0_r, temp0_r, temp0_r, temp0_r,
+                 const4, const5, const6, const7, b0_r, b1_r, b2_r, b3_r);
+    DPADD_SH4_SW(temp0_l, temp0_l, temp0_l, temp0_l,
+                 const4, const5, const6, const7, b0_l, b1_l, b2_l, b3_l);
+    BUTTERFLY_16(a0_r, a0_l, a1_r, a1_l, a2_r, a2_l, a3_r, a3_l,
+                 b3_l, b3_r, b2_l, b2_r, b1_l, b1_r, b0_l, b0_r,
+                 temp0_r, temp0_l, temp1_r, temp1_l,
+                 temp2_r, temp2_l, temp3_r, temp3_l,
+                 a3_l, a3_r, a2_l, a2_r, a1_l, a1_r, a0_l, a0_r);
+    SRA_4V(temp0_r, temp0_l, temp1_r, temp1_l, 20);
+    SRA_4V(temp2_r, temp2_l, temp3_r, temp3_l, 20);
+    LD_SH4(dst, dst_stride, in0, in1, in2, in3);
+    PCKEV_H4_SW(temp0_l, temp0_r, temp1_l, temp1_r, temp2_l, temp2_r,
+                temp3_l, temp3_r, temp0_r, temp1_r, temp2_r, temp3_r);
+    ILVR_B4_SW(zero, in0, zero, in1, zero, in2, zero, in3,
+               temp0_l, temp1_l, temp2_l, temp3_l);
+    temp0_r = (v4i32) ((v8i16) (temp0_r) + (v8i16) (temp0_l));
+    temp1_r = (v4i32) ((v8i16) (temp1_r) + (v8i16) (temp1_l));
+    temp2_r = (v4i32) ((v8i16) (temp2_r) + (v8i16) (temp2_l));
+    temp3_r = (v4i32) ((v8i16) (temp3_r) + (v8i16) (temp3_l));
+    temp0_r = (v4i32) CLIP_SH_0_255(temp0_r);
+    temp1_r = (v4i32) CLIP_SH_0_255(temp1_r);
+    temp2_r = (v4i32) CLIP_SH_0_255(temp2_r);
+    temp3_r = (v4i32) CLIP_SH_0_255(temp3_r);
+    PCKEV_B4_SW(temp0_r, temp0_r, temp1_r, temp1_r,
+                temp2_r, temp2_r, temp3_r, temp3_r,
+                temp0_r, temp1_r, temp2_r, temp3_r);
+    tmp0 = __msa_copy_u_d((v2i64) temp0_r, 1);
+    tmp1 = __msa_copy_u_d((v2i64) temp1_r, 1);
+    tmp2 = __msa_copy_u_d((v2i64) temp2_r, 1);
+    tmp3 = __msa_copy_u_d((v2i64) temp3_r, 1);
+    SD4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+
+    SRA_4V(a3_r, a3_l, a2_r, a2_l, 20);
+    SRA_4V(a1_r, a1_l, a0_r, a0_l, 20);
+    LD_SH4(dst + 4 * dst_stride, dst_stride, in4, in5, in6, in7);
+    PCKEV_H4_SW(a0_l, a0_r, a1_l, a1_r, a2_l, a2_r, a3_l, a3_r,
+                a0_r, a1_r, a2_r, a3_r);
+    ILVR_B4_SW(zero, in4, zero, in5, zero, in6, zero, in7,
+               a3_l, a2_l, a1_l, a0_l);
+    a3_r = (v4i32) ((v8i16) (a3_r) + (v8i16) (a3_l));
+    a2_r = (v4i32) ((v8i16) (a2_r) + (v8i16) (a2_l));
+    a1_r = (v4i32) ((v8i16) (a1_r) + (v8i16) (a1_l));
+    a0_r = (v4i32) ((v8i16) (a0_r) + (v8i16) (a0_l));
+    a3_r = (v4i32) CLIP_SH_0_255(a3_r);
+    a2_r = (v4i32) CLIP_SH_0_255(a2_r);
+    a1_r = (v4i32) CLIP_SH_0_255(a1_r);
+    a0_r = (v4i32) CLIP_SH_0_255(a0_r);
+    PCKEV_B4_SW(a0_r, a0_r, a1_r, a1_r,
+                a2_r, a2_r, a3_r, a3_r, a0_r, a1_r, a2_r, a3_r);
+    tmp0 = __msa_copy_u_d((v2i64) a3_r, 1);
+    tmp1 = __msa_copy_u_d((v2i64) a2_r, 1);
+    tmp2 = __msa_copy_u_d((v2i64) a1_r, 1);
+    tmp3 = __msa_copy_u_d((v2i64) a0_r, 1);
+    SD4(tmp0, tmp1, tmp2, tmp3, dst + 4 * dst_stride, dst_stride);
+}
+
+void ff_simple_idct_msa(int16_t *block)
+{
+    simple_idct_msa(block);
+}
+
+void ff_simple_idct_put_msa(uint8_t *dst, int32_t dst_stride, int16_t *block)
+{
+    simple_idct_put_msa(dst, dst_stride, block);
+}
+
+void ff_simple_idct_add_msa(uint8_t *dst, int32_t dst_stride, int16_t *block)
+{
+    simple_idct_add_msa(dst, dst_stride, block);
+}
diff --git a/libavcodec/mips/vp8_idct_msa.c b/libavcodec/mips/vp8_idct_msa.c
new file mode 100644
index 00000000..11ac9ff8
--- /dev/null
+++ b/libavcodec/mips/vp8_idct_msa.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+#include "libavcodec/vp8dsp.h"
+#include "libavutil/mips/generic_macros_msa.h"
+#include "vp8dsp_mips.h"
+
+static const int cospi8sqrt2minus1 = 20091;
+static const int sinpi8sqrt2 = 35468;
+
+#define VP8_IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3)    \
+{                                                                    \
+    v4i32 a1_m, b1_m, c1_m, d1_m;                                    \
+    v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m;                    \
+    v4i32 const_cospi8sqrt2minus1_m, sinpi8_sqrt2_m;                 \
+                                                                     \
+    const_cospi8sqrt2minus1_m = __msa_fill_w(cospi8sqrt2minus1);     \
+    sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2);                      \
+    a1_m = in0 + in2;                                                \
+    b1_m = in0 - in2;                                                \
+    c_tmp1_m = ((in1) * sinpi8_sqrt2_m) >> 16;                       \
+    c_tmp2_m = in3 + (((in3) * const_cospi8sqrt2minus1_m) >> 16);    \
+    c1_m = c_tmp1_m - c_tmp2_m;                                      \
+    d_tmp1_m = (in1) + (((in1) * const_cospi8sqrt2minus1_m) >> 16);  \
+    d_tmp2_m = ((in3) * sinpi8_sqrt2_m) >> 16;                       \
+    d1_m = d_tmp1_m + d_tmp2_m;                                      \
+    BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3);     \
+}
+
+void ff_vp8_idct_add_msa(uint8_t *dst, int16_t input[16], ptrdiff_t stride)
+{
+    v8i16 input0, input1;
+    v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
+    v4i32 res0, res1, res2, res3;
+    v16i8 zero = { 0 };
+    v16i8 pred0, pred1, pred2, pred3, dest0, dest1;
+    v16i8 mask = { 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+    /* load short vector elements of 4x4 block */
+    LD_SH2(input, 8, input0, input1);
+    UNPCK_SH_SW(input0, in0, in1);
+    UNPCK_SH_SW(input1, in2, in3);
+    VP8_IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3);
+    /* transpose the block */
+    TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
+    VP8_IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3);
+    SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
+    /* transpose the block */
+    TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
+    LD_SB4(dst, stride, pred0, pred1, pred2, pred3);
+    ILVR_B4_SW(zero, pred0, zero, pred1, zero, pred2, zero, pred3,
+               res0, res1, res2, res3);
+    ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3,
+               res0, res1, res2, res3);
+    ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
+    res0 = CLIP_SW_0_255(res0);
+    res1 = CLIP_SW_0_255(res1);
+    res2 = CLIP_SW_0_255(res2);
+    res3 = CLIP_SW_0_255(res3);
+    VSHF_B2_SB(res0, res1, res2, res3, mask, mask, dest0, dest1);
+    ST4x4_UB(dest0, dest1, 0, 1, 0, 1, dst, stride);
+
+    memset(input, 0, 4 * 4 * sizeof(*input));
+}
+
+void ff_vp8_idct_dc_add_msa(uint8_t *dst, int16_t in_dc[16], ptrdiff_t stride)
+{
+    v8i16 vec;
+    v8i16 res0, res1, res2, res3;
+    v16i8 zero = { 0 };
+    v16i8 pred0, pred1, pred2, pred3, dest0, dest1;
+    v16i8 mask = { 0, 2, 4, 6, 16, 18, 20, 22, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+    vec = __msa_fill_h(in_dc[0]);
+    vec = __msa_srari_h(vec, 3);
+    LD_SB4(dst, stride, pred0, pred1, pred2, pred3);
+    ILVR_B4_SH(zero, pred0, zero, pred1, zero, pred2, zero, pred3,
+               res0, res1, res2, res3);
+    ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
+    CLIP_SH4_0_255(res0, res1, res2, res3);
+    VSHF_B2_SB(res0, res1, res2, res3, mask, mask, dest0, dest1);
+    ST4x4_UB(dest0, dest1, 0, 1, 0, 1, dst, stride);
+
+    in_dc[0] = 0;
+}
+
+void ff_vp8_luma_dc_wht_msa(int16_t block[4][4][16], int16_t input[16])
+{
+    int16_t *mb_dq_coeff = &block[0][0][0];
+    v8i16 input0, input1;
+    v4i32 in0, in1, in2, in3, a1, b1, c1, d1;
+    v4i32 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
+
+    /* load short vector elements of 4x4 block */
+    LD_SH2(input, 8, input0, input1);
+    UNPCK_SH_SW(input0, in0, in1);
+    UNPCK_SH_SW(input1, in2, in3);
+    BUTTERFLY_4(in0, in1, in2, in3, a1, b1, c1, d1);
+    BUTTERFLY_4(a1, d1, c1, b1, hz0, hz1, hz3, hz2);
+    /* transpose the block */
+    TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
+    BUTTERFLY_4(hz0, hz1, hz2, hz3, a1, b1, c1, d1);
+    BUTTERFLY_4(a1, d1, c1, b1, vt0, vt1, vt3, vt2);
+    ADD4(vt0, 3, vt1, 3, vt2, 3, vt3, 3, vt0, vt1, vt2, vt3);
+    SRA_4V(vt0, vt1, vt2, vt3, 3);
+    mb_dq_coeff[0] = __msa_copy_s_h((v8i16) vt0, 0);
+    mb_dq_coeff[16] = __msa_copy_s_h((v8i16) vt1, 0);
+    mb_dq_coeff[32] = __msa_copy_s_h((v8i16) vt2, 0);
+    mb_dq_coeff[48] = __msa_copy_s_h((v8i16) vt3, 0);
+    mb_dq_coeff[64] = __msa_copy_s_h((v8i16) vt0, 2);
+    mb_dq_coeff[80] = __msa_copy_s_h((v8i16) vt1, 2);
+    mb_dq_coeff[96] = __msa_copy_s_h((v8i16) vt2, 2);
+    mb_dq_coeff[112] = __msa_copy_s_h((v8i16) vt3, 2);
+    mb_dq_coeff[128] = __msa_copy_s_h((v8i16) vt0, 4);
+    mb_dq_coeff[144] = __msa_copy_s_h((v8i16) vt1, 4);
+    mb_dq_coeff[160] = __msa_copy_s_h((v8i16) vt2, 4);
+    mb_dq_coeff[176] = __msa_copy_s_h((v8i16) vt3, 4);
+    mb_dq_coeff[192] = __msa_copy_s_h((v8i16) vt0, 6);
+    mb_dq_coeff[208] = __msa_copy_s_h((v8i16) vt1, 6);
+    mb_dq_coeff[224] = __msa_copy_s_h((v8i16) vt2, 6);
+    mb_dq_coeff[240] = __msa_copy_s_h((v8i16) vt3, 6);
+
+    memset(input, 0, 4 * 4 * sizeof(int16_t));
+}
+
+void ff_vp8_idct_dc_add4y_msa(uint8_t *dst, int16_t block[4][16],
+                              ptrdiff_t stride)
+{
+    ff_vp8_idct_dc_add_msa(dst, &block[0][0], stride);
+    ff_vp8_idct_dc_add_msa(dst + 4, &block[1][0], stride);
+    ff_vp8_idct_dc_add_msa(dst + 8, &block[2][0], stride);
+    ff_vp8_idct_dc_add_msa(dst + 12, &block[3][0], stride);
+}
+
+void ff_vp8_idct_dc_add4uv_msa(uint8_t *dst, int16_t block[4][16],
+                               ptrdiff_t stride)
+{
+    ff_vp8_idct_dc_add_msa(dst, &block[0][0], stride);
+    ff_vp8_idct_dc_add_msa(dst + 4, &block[1][0], stride);
+    ff_vp8_idct_dc_add_msa(dst + stride * 4, &block[2][0], stride);
+    ff_vp8_idct_dc_add_msa(dst + stride * 4 + 4, &block[3][0], stride);
+}
diff --git a/libavcodec/mips/vp8_lpf_msa.c b/libavcodec/mips/vp8_lpf_msa.c
new file mode 100644
index 00000000..35909617
--- /dev/null
+++ b/libavcodec/mips/vp8_lpf_msa.c
@@ -0,0 +1,690 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/vp8dsp.h"
+#include "libavutil/mips/generic_macros_msa.h"
+#include "vp8dsp_mips.h"
+
+#define VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask)           \
+{                                                                \
+    v16u8 p1_a_sub_q1, p0_a_sub_q0;                              \
+                                                                 \
+    p0_a_sub_q0 = __msa_asub_u_b(p0, q0);                        \
+    p1_a_sub_q1 = __msa_asub_u_b(p1, q1);                        \
+    p1_a_sub_q1 = (v16u8) __msa_srli_b((v16i8) p1_a_sub_q1, 1);  \
+    p0_a_sub_q0 = __msa_adds_u_b(p0_a_sub_q0, p0_a_sub_q0);      \
+    mask = __msa_adds_u_b(p0_a_sub_q0, p1_a_sub_q1);             \
+    mask = ((v16u8) mask <= b_limit);                            \
+}
+
+#define VP8_LPF_FILTER4_4W(p1_in_out, p0_in_out, q0_in_out, q1_in_out,  \
+                           mask_in, hev_in)                             \
+{                                                                       \
+    v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign;                 \
+    v16i8 filt, filt1, filt2, cnst4b, cnst3b;                           \
+    v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h;             \
+                                                                        \
+    p1_m = (v16i8) __msa_xori_b(p1_in_out, 0x80);                       \
+    p0_m = (v16i8) __msa_xori_b(p0_in_out, 0x80);                       \
+    q0_m = (v16i8) __msa_xori_b(q0_in_out, 0x80);                       \
+    q1_m = (v16i8) __msa_xori_b(q1_in_out, 0x80);                       \
+                                                                        \
+    filt = __msa_subs_s_b(p1_m, q1_m);                                  \
+                                                                        \
+    filt = filt & (v16i8) hev_in;                                       \
+                                                                        \
+    q0_sub_p0 = q0_m - p0_m;                                            \
+    filt_sign = __msa_clti_s_b(filt, 0);                                \
+                                                                        \
+    cnst3h = __msa_ldi_h(3);                                            \
+    q0_sub_p0_r = (v8i16) __msa_ilvr_b(q0_sub_p0, q0_sub_p0);           \
+    q0_sub_p0_r = __msa_dotp_s_h((v16i8) q0_sub_p0_r, (v16i8) cnst3h);  \
+    filt_r = (v8i16) __msa_ilvr_b(filt_sign, filt);                     \
+    filt_r += q0_sub_p0_r;                                              \
+    filt_r = __msa_sat_s_h(filt_r, 7);                                  \
+                                                                        \
+    q0_sub_p0_l = (v8i16) __msa_ilvl_b(q0_sub_p0, q0_sub_p0);           \
+    q0_sub_p0_l = __msa_dotp_s_h((v16i8) q0_sub_p0_l, (v16i8) cnst3h);  \
+    filt_l = (v8i16) __msa_ilvl_b(filt_sign, filt);                     \
+    filt_l += q0_sub_p0_l;                                              \
+    filt_l = __msa_sat_s_h(filt_l, 7);                                  \
+                                                                        \
+    filt = __msa_pckev_b((v16i8) filt_l, (v16i8) filt_r);               \
+    filt = filt & (v16i8) mask_in;                                      \
+                                                                        \
+    cnst4b = __msa_ldi_b(4);                                            \
+    filt1 = __msa_adds_s_b(filt, cnst4b);                               \
+    filt1 >>= 3;                                                        \
+                                                                        \
+    cnst3b = __msa_ldi_b(3);                                            \
+    filt2 = __msa_adds_s_b(filt, cnst3b);                               \
+    filt2 >>= 3;                                                        \
+                                                                        \
+    q0_m = __msa_subs_s_b(q0_m, filt1);                                 \
+    q0_in_out = __msa_xori_b((v16u8) q0_m, 0x80);                       \
+    p0_m = __msa_adds_s_b(p0_m, filt2);                                 \
+    p0_in_out = __msa_xori_b((v16u8) p0_m, 0x80);                       \
+                                                                        \
+    filt = __msa_srari_b(filt1, 1);                                     \
+    hev_in = __msa_xori_b((v16u8) hev_in, 0xff);                        \
+    filt = filt & (v16i8) hev_in;                                       \
+                                                                        \
+    q1_m = __msa_subs_s_b(q1_m, filt);                                  \
+    q1_in_out = __msa_xori_b((v16u8) q1_m, 0x80);                       \
+    p1_m = __msa_adds_s_b(p1_m, filt);                                  \
+    p1_in_out = __msa_xori_b((v16u8) p1_m, 0x80);                       \
+}
+
+#define VP8_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask)           \
+{                                                                   \
+    v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, q0_sub_p0_sign;        \
+    v16i8 filt, filt1, filt2, cnst4b, cnst3b, filt_sign;            \
+    v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h;         \
+                                                                    \
+    p1_m = (v16i8) __msa_xori_b(p1_in, 0x80);                       \
+    p0_m = (v16i8) __msa_xori_b(p0_in, 0x80);                       \
+    q0_m = (v16i8) __msa_xori_b(q0_in, 0x80);                       \
+    q1_m = (v16i8) __msa_xori_b(q1_in, 0x80);                       \
+                                                                    \
+    filt = __msa_subs_s_b(p1_m, q1_m);                              \
+                                                                    \
+    q0_sub_p0 = q0_m - p0_m;                                        \
+    filt_sign = __msa_clti_s_b(filt, 0);                            \
+                                                                    \
+    cnst3h = __msa_ldi_h(3);                                        \
+    q0_sub_p0_sign = __msa_clti_s_b(q0_sub_p0, 0);                  \
+    q0_sub_p0_r = (v8i16) __msa_ilvr_b(q0_sub_p0_sign, q0_sub_p0);  \
+    q0_sub_p0_r *= cnst3h;                                          \
+    filt_r = (v8i16) __msa_ilvr_b(filt_sign, filt);                 \
+    filt_r += q0_sub_p0_r;                                          \
+    filt_r = __msa_sat_s_h(filt_r, 7);                              \
+                                                                    \
+    q0_sub_p0_l = (v8i16) __msa_ilvl_b(q0_sub_p0_sign, q0_sub_p0);  \
+    q0_sub_p0_l *= cnst3h;                                          \
+    filt_l = (v8i16) __msa_ilvl_b(filt_sign, filt);                 \
+    filt_l += q0_sub_p0_l;                                          \
+    filt_l = __msa_sat_s_h(filt_l, 7);                              \
+                                                                    \
+    filt = __msa_pckev_b((v16i8) filt_l, (v16i8) filt_r);           \
+    filt = filt & (v16i8) (mask);                                   \
+                                                                    \
+    cnst4b = __msa_ldi_b(4);                                        \
+    filt1 = __msa_adds_s_b(filt, cnst4b);                           \
+    filt1 >>= 3;                                                    \
+                                                                    \
+    cnst3b = __msa_ldi_b(3);                                        \
+    filt2 = __msa_adds_s_b(filt, cnst3b);                           \
+    filt2 >>= 3;                                                    \
+                                                                    \
+    q0_m = __msa_subs_s_b(q0_m, filt1);                             \
+    p0_m = __msa_adds_s_b(p0_m, filt2);                             \
+    q0_in = __msa_xori_b((v16u8) q0_m, 0x80);                       \
+    p0_in = __msa_xori_b((v16u8) p0_m, 0x80);                       \
+}
+
+#define VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev)             \
+{                                                                   \
+    v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m;                       \
+    v16i8 filt, q0_sub_p0, cnst4b, cnst3b;                          \
+    v16i8 u, filt1, filt2, filt_sign, q0_sub_p0_sign;               \
+    v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_r, u_r, u_l, filt_l;       \
+    v8i16 cnst3h, cnst27h, cnst18h, cnst63h;                        \
+                                                                    \
+    cnst3h = __msa_ldi_h(3);                                        \
+                                                                    \
+    p2_m = (v16i8) __msa_xori_b(p2, 0x80);                          \
+    p1_m = (v16i8) __msa_xori_b(p1, 0x80);                          \
+    p0_m = (v16i8) __msa_xori_b(p0, 0x80);                          \
+    q0_m = (v16i8) __msa_xori_b(q0, 0x80);                          \
+    q1_m = (v16i8) __msa_xori_b(q1, 0x80);                          \
+    q2_m = (v16i8) __msa_xori_b(q2, 0x80);                          \
+                                                                    \
+    filt = __msa_subs_s_b(p1_m, q1_m);                              \
+    q0_sub_p0 = q0_m - p0_m;                                        \
+    q0_sub_p0_sign = __msa_clti_s_b(q0_sub_p0, 0);                  \
+    filt_sign = __msa_clti_s_b(filt, 0);                            \
+                                                                    \
+    /* right part */                                                \
+    q0_sub_p0_r = (v8i16) __msa_ilvr_b(q0_sub_p0_sign, q0_sub_p0);  \
+    q0_sub_p0_r *= cnst3h;                                          \
+    filt_r = (v8i16) __msa_ilvr_b(filt_sign, filt);                 \
+    filt_r = filt_r + q0_sub_p0_r;                                  \
+    filt_r = __msa_sat_s_h(filt_r, 7);                              \
+                                                                    \
+    /* left part */                                                 \
+    q0_sub_p0_l = (v8i16) __msa_ilvl_b(q0_sub_p0_sign, q0_sub_p0);  \
+    q0_sub_p0_l *= cnst3h;                                          \
+    filt_l = (v8i16) __msa_ilvl_b(filt_sign, filt);                 \
+    filt_l = filt_l + q0_sub_p0_l;                                  \
+    filt_l = __msa_sat_s_h(filt_l, 7);                              \
+                                                                    \
+    /* combine left and right part */                               \
+    filt = __msa_pckev_b((v16i8) filt_l, (v16i8) filt_r);           \
+    filt = filt & (v16i8) mask;                                     \
+    filt2 = filt & (v16i8) hev;                                     \
+                                                                    \
+    /* filt_val &= ~hev */                                          \
+    hev = __msa_xori_b(hev, 0xff);                                  \
+    filt = filt & (v16i8) hev;                                      \
+    cnst4b = __msa_ldi_b(4);                                        \
+    filt1 = __msa_adds_s_b(filt2, cnst4b);                          \
+    filt1 >>= 3;                                                    \
+    cnst3b = __msa_ldi_b(3);                                        \
+    filt2 = __msa_adds_s_b(filt2, cnst3b);                          \
+    filt2 >>= 3;                                                    \
+    q0_m = __msa_subs_s_b(q0_m, filt1);                             \
+    p0_m = __msa_adds_s_b(p0_m, filt2);                             \
+                                                                    \
+    filt_sign = __msa_clti_s_b(filt, 0);                            \
+    ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l);                   \
+                                                                    \
+    cnst27h = __msa_ldi_h(27);                                      \
+    cnst63h = __msa_ldi_h(63);                                      \
+                                                                    \
+    /* right part */                                                \
+    u_r = filt_r * cnst27h;                                         \
+    u_r += cnst63h;                                                 \
+    u_r >>= 7;                                                      \
+    u_r = __msa_sat_s_h(u_r, 7);                                    \
+    /* left part */                                                 \
+    u_l = filt_l * cnst27h;                                         \
+    u_l += cnst63h;                                                 \
+    u_l >>= 7;                                                      \
+    u_l = __msa_sat_s_h(u_l, 7);                                    \
+    /* combine left and right part */                               \
+    u = __msa_pckev_b((v16i8) u_l, (v16i8) u_r);                    \
+    q0_m = __msa_subs_s_b(q0_m, u);                                 \
+    q0 = __msa_xori_b((v16u8) q0_m, 0x80);                          \
+    p0_m = __msa_adds_s_b(p0_m, u);                                 \
+    p0 = __msa_xori_b((v16u8) p0_m, 0x80);                          \
+    cnst18h = __msa_ldi_h(18);                                      \
+    u_r = filt_r * cnst18h;                                         \
+    u_r += cnst63h;                                                 \
+    u_r >>= 7;                                                      \
+    u_r = __msa_sat_s_h(u_r, 7);                                    \
+                                                                    \
+    /* left part */                                                 \
+    u_l = filt_l * cnst18h;                                         \
+    u_l += cnst63h;                                                 \
+    u_l >>= 7;                                                      \
+    u_l = __msa_sat_s_h(u_l, 7);                                    \
+    /* combine left and right part */                               \
+    u = __msa_pckev_b((v16i8) u_l, (v16i8) u_r);                    \
+    q1_m = __msa_subs_s_b(q1_m, u);                                 \
+    q1 = __msa_xori_b((v16u8) q1_m, 0x80);                          \
+    p1_m = __msa_adds_s_b(p1_m, u);                                 \
+    p1 = __msa_xori_b((v16u8) p1_m, 0x80);                          \
+    u_r = filt_r << 3;                                              \
+    u_r += filt_r + cnst63h;                                        \
+    u_r >>= 7;                                                      \
+    u_r = __msa_sat_s_h(u_r, 7);                                    \
+                                                                    \
+    /* left part */                                                 \
+    u_l = filt_l << 3;                                              \
+    u_l += filt_l + cnst63h;                                        \
+    u_l >>= 7;                                                      \
+    u_l = __msa_sat_s_h(u_l, 7);                                    \
+    /* combine left and right part */                               \
+    u = __msa_pckev_b((v16i8) u_l, (v16i8) u_r);                    \
+    q2_m = __msa_subs_s_b(q2_m, u);                                 \
+    q2 = __msa_xori_b((v16u8) q2_m, 0x80);                          \
+    p2_m = __msa_adds_s_b(p2_m, u);                                 \
+    p2 = __msa_xori_b((v16u8) p2_m, 0x80);                          \
+}
+
+#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in,                   \
+                     q0_in, q1_in, q2_in, q3_in,                   \
+                     limit_in, b_limit_in, thresh_in,              \
+                     hev_out, mask_out, flat_out)                  \
+{                                                                  \
+    v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m;  \
+    v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m;  \
+                                                                   \
+    /* absolute subtraction of pixel values */                     \
+    p3_asub_p2_m = __msa_asub_u_b((p3_in), (p2_in));               \
+    p2_asub_p1_m = __msa_asub_u_b((p2_in), (p1_in));               \
+    p1_asub_p0_m = __msa_asub_u_b((p1_in), (p0_in));               \
+    q1_asub_q0_m = __msa_asub_u_b((q1_in), (q0_in));               \
+    q2_asub_q1_m = __msa_asub_u_b((q2_in), (q1_in));               \
+    q3_asub_q2_m = __msa_asub_u_b((q3_in), (q2_in));               \
+    p0_asub_q0_m = __msa_asub_u_b((p0_in), (q0_in));               \
+    p1_asub_q1_m = __msa_asub_u_b((p1_in), (q1_in));               \
+    /* calculation of hev */                                       \
+    flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m);          \
+    hev_out = (thresh_in) < (v16u8) flat_out;                      \
+    /* calculation of mask */                                      \
+    p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m);     \
+    p1_asub_q1_m >>= 1;                                            \
+    p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m);     \
+    mask_out = (b_limit_in) < p0_asub_q0_m;                        \
+    mask_out = __msa_max_u_b(flat_out, mask_out);                  \
+    p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m);      \
+    mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out);              \
+    q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m);      \
+    mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out);              \
+    mask_out = (limit_in) < (v16u8) mask_out;                      \
+    mask_out = __msa_xori_b(mask_out, 0xff);                       \
+}
+
+#define VP8_ST6x1_UB(in0, in0_idx, in1, in1_idx, pdst, stride)  \
+{                                                               \
+    uint16_t tmp0_h;                                            \
+    uint32_t tmp0_w;                                            \
+                                                                \
+    tmp0_w = __msa_copy_u_w((v4i32) in0, in0_idx);              \
+    tmp0_h = __msa_copy_u_h((v8i16) in1, in1_idx);              \
+    SW(tmp0_w, pdst);                                           \
+    SH(tmp0_h, pdst + stride);                                  \
+}
+
+void ff_vp8_v_loop_filter16_msa(uint8_t *src, ptrdiff_t pitch, int b_limit_in,
+                                int limit_in, int thresh_in)
+{
+    uint8_t *temp_src;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 mask, hev, flat, thresh, limit, b_limit;
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_in);
+    limit = (v16u8) __msa_fill_b(limit_in);
+    thresh = (v16u8) __msa_fill_b(thresh_in);
+    /* load vector elements */
+    temp_src = src - (pitch << 2);
+    LD_UB8(temp_src, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
+    /* store vector elements */
+    temp_src = src - 3 * pitch;
+    ST_UB4(p2, p1, p0, q0, temp_src, pitch);
+    temp_src += (4 * pitch);
+    ST_UB2(q1, q2, temp_src, pitch);
+}
+
+void ff_vp8_v_loop_filter8uv_msa(uint8_t *src_u, uint8_t *src_v,
+                                 ptrdiff_t pitch, int b_limit_in, int limit_in,
+                                 int thresh_in)
+{
+    uint8_t *temp_src;
+    uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 mask, hev, flat, thresh, limit, b_limit;
+    v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u;
+    v16u8 p3_v, p2_v, p1_v, p0_v, q3_v, q2_v, q1_v, q0_v;
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_in);
+    limit = (v16u8) __msa_fill_b(limit_in);
+    thresh = (v16u8) __msa_fill_b(thresh_in);
+
+    temp_src = src_u - (pitch << 2);
+    LD_UB8(temp_src, pitch, p3_u, p2_u, p1_u, p0_u, q0_u, q1_u, q2_u, q3_u);
+    temp_src = src_v - (pitch << 2);
+    LD_UB8(temp_src, pitch, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v);
+
+    /* rht 8 element of p3 are u pixel and left 8 element of p3 are v pixel */
+    ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
+    ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
+
+    p2_d = __msa_copy_u_d((v2i64) p2, 0);
+    p1_d = __msa_copy_u_d((v2i64) p1, 0);
+    p0_d = __msa_copy_u_d((v2i64) p0, 0);
+    q0_d = __msa_copy_u_d((v2i64) q0, 0);
+    q1_d = __msa_copy_u_d((v2i64) q1, 0);
+    q2_d = __msa_copy_u_d((v2i64) q2, 0);
+    src_u -= (pitch * 3);
+    SD4(p2_d, p1_d, p0_d, q0_d, src_u, pitch);
+    src_u += 4 * pitch;
+    SD(q1_d, src_u);
+    src_u += pitch;
+    SD(q2_d, src_u);
+
+    p2_d = __msa_copy_u_d((v2i64) p2, 1);
+    p1_d = __msa_copy_u_d((v2i64) p1, 1);
+    p0_d = __msa_copy_u_d((v2i64) p0, 1);
+    q0_d = __msa_copy_u_d((v2i64) q0, 1);
+    q1_d = __msa_copy_u_d((v2i64) q1, 1);
+    q2_d = __msa_copy_u_d((v2i64) q2, 1);
+    src_v -= (pitch * 3);
+    SD4(p2_d, p1_d, p0_d, q0_d, src_v, pitch);
+    src_v += 4 * pitch;
+    SD(q1_d, src_v);
+    src_v += pitch;
+    SD(q2_d, src_v);
+}
+
+void ff_vp8_h_loop_filter16_msa(uint8_t *src, ptrdiff_t pitch, int b_limit_in,
+                                int limit_in, int thresh_in)
+{
+    uint8_t *temp_src;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 mask, hev, flat, thresh, limit, b_limit;
+    v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
+    v16u8 row9, row10, row11, row12, row13, row14, row15;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_in);
+    limit = (v16u8) __msa_fill_b(limit_in);
+    thresh = (v16u8) __msa_fill_b(thresh_in);
+    temp_src = src - 4;
+    LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+    temp_src += (8 * pitch);
+    LD_UB8(temp_src, pitch,
+           row8, row9, row10, row11, row12, row13, row14, row15);
+    TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+                        row8, row9, row10, row11, row12, row13, row14, row15,
+                        p3, p2, p1, p0, q0, q1, q2, q3);
+
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
+    ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
+    ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
+    ILVL_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
+    ILVRL_H2_SH(tmp1, tmp0, tmp6, tmp7);
+    ILVRL_B2_SH(q2, q1, tmp2, tmp5);
+
+    temp_src = src - 3;
+    VP8_ST6x1_UB(tmp3, 0, tmp2, 0, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp3, 1, tmp2, 1, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp3, 2, tmp2, 2, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp3, 3, tmp2, 3, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp4, 0, tmp2, 4, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp4, 1, tmp2, 5, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp4, 2, tmp2, 6, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp4, 3, tmp2, 7, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp6, 0, tmp5, 0, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp6, 1, tmp5, 1, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp6, 2, tmp5, 2, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp6, 3, tmp5, 3, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp7, 0, tmp5, 4, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp7, 1, tmp5, 5, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp7, 2, tmp5, 6, temp_src, 4);
+    temp_src += pitch;
+    VP8_ST6x1_UB(tmp7, 3, tmp5, 7, temp_src, 4);
+}
+
+void ff_vp8_h_loop_filter8uv_msa(uint8_t *src_u, uint8_t *src_v,
+                                 ptrdiff_t pitch, int b_limit_in, int limit_in,
+                                 int thresh_in)
+{
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 mask, hev, flat, thresh, limit, b_limit;
+    v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
+    v16u8 row9, row10, row11, row12, row13, row14, row15;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_in);
+    limit = (v16u8) __msa_fill_b(limit_in);
+    thresh = (v16u8) __msa_fill_b(thresh_in);
+
+    LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+    LD_UB8(src_v - 4, pitch,
+           row8, row9, row10, row11, row12, row13, row14, row15);
+    TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+                        row8, row9, row10, row11, row12, row13, row14, row15,
+                        p3, p2, p1, p0, q0, q1, q2, q3);
+
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev);
+
+    ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
+    ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4);
+    ILVL_B2_SH(p1, p2, q0, p0, tmp0, tmp1);
+    ILVRL_H2_SH(tmp1, tmp0, tmp6, tmp7);
+    ILVRL_B2_SH(q2, q1, tmp2, tmp5);
+
+    src_u -= 3;
+    VP8_ST6x1_UB(tmp3, 0, tmp2, 0, src_u, 4);
+    src_u += pitch;
+    VP8_ST6x1_UB(tmp3, 1, tmp2, 1, src_u, 4);
+    src_u += pitch;
+    VP8_ST6x1_UB(tmp3, 2, tmp2, 2, src_u, 4);
+    src_u += pitch;
+    VP8_ST6x1_UB(tmp3, 3, tmp2, 3, src_u, 4);
+    src_u += pitch;
+    VP8_ST6x1_UB(tmp4, 0, tmp2, 4, src_u, 4);
+    src_u += pitch;
+    VP8_ST6x1_UB(tmp4, 1, tmp2, 5, src_u, 4);
+    src_u += pitch;
+    VP8_ST6x1_UB(tmp4, 2, tmp2, 6, src_u, 4);
+    src_u += pitch;
+    VP8_ST6x1_UB(tmp4, 3, tmp2, 7, src_u, 4);
+
+    src_v -= 3;
+    VP8_ST6x1_UB(tmp6, 0, tmp5, 0, src_v, 4);
+    src_v += pitch;
+    VP8_ST6x1_UB(tmp6, 1, tmp5, 1, src_v, 4);
+    src_v += pitch;
+    VP8_ST6x1_UB(tmp6, 2, tmp5, 2, src_v, 4);
+    src_v += pitch;
+    VP8_ST6x1_UB(tmp6, 3, tmp5, 3, src_v, 4);
+    src_v += pitch;
+    VP8_ST6x1_UB(tmp7, 0, tmp5, 4, src_v, 4);
+    src_v += pitch;
+    VP8_ST6x1_UB(tmp7, 1, tmp5, 5, src_v, 4);
+    src_v += pitch;
+    VP8_ST6x1_UB(tmp7, 2, tmp5, 6, src_v, 4);
+    src_v += pitch;
+    VP8_ST6x1_UB(tmp7, 3, tmp5, 7, src_v, 4);
+}
+
+void ff_vp8_v_loop_filter_simple_msa(uint8_t *src, ptrdiff_t pitch,
+                                     int b_limit_ptr)
+{
+    v16u8 p1, p0, q1, q0;
+    v16u8 mask, b_limit;
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    /* load vector elements */
+    LD_UB4(src - (pitch << 1), pitch, p1, p0, q0, q1);
+    VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask);
+    VP8_SIMPLE_FILT(p1, p0, q0, q1, mask);
+    ST_UB2(p0, q0, (src - pitch), pitch);
+}
+
+void ff_vp8_h_loop_filter_simple_msa(uint8_t *src, ptrdiff_t pitch,
+                                     int b_limit_ptr)
+{
+    uint8_t *temp_src;
+    v16u8 p1, p0, q1, q0;
+    v16u8 mask, b_limit;
+    v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
+    v16u8 row9, row10, row11, row12, row13, row14, row15;
+    v8i16 tmp0, tmp1;
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    temp_src = src - 2;
+    LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+    temp_src += (8 * pitch);
+    LD_UB8(temp_src, pitch,
+           row8, row9, row10, row11, row12, row13, row14, row15);
+    TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+                        row8, row9, row10, row11, row12, row13, row14, row15,
+                        p1, p0, q0, q1);
+    VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask);
+    VP8_SIMPLE_FILT(p1, p0, q0, q1, mask);
+    ILVRL_B2_SH(q0, p0, tmp1, tmp0);
+
+    src -= 1;
+    ST2x4_UB(tmp1, 0, src, pitch);
+    src += 4 * pitch;
+    ST2x4_UB(tmp1, 4, src, pitch);
+    src += 4 * pitch;
+    ST2x4_UB(tmp0, 0, src, pitch);
+    src += 4 * pitch;
+    ST2x4_UB(tmp0, 4, src, pitch);
+    src += 4 * pitch;
+}
+
+void ff_vp8_v_loop_filter8uv_inner_msa(uint8_t *src_u, uint8_t *src_v,
+                                       ptrdiff_t pitch, int b_limit_in,
+                                       int limit_in, int thresh_in)
+{
+    uint64_t p1_d, p0_d, q0_d, q1_d;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 mask, hev, flat, thresh, limit, b_limit;
+    v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u;
+    v16u8 p3_v, p2_v, p1_v, p0_v, q3_v, q2_v, q1_v, q0_v;
+
+    thresh = (v16u8) __msa_fill_b(thresh_in);
+    limit = (v16u8) __msa_fill_b(limit_in);
+    b_limit = (v16u8) __msa_fill_b(b_limit_in);
+
+    src_u = src_u - (pitch << 2);
+    LD_UB8(src_u, pitch, p3_u, p2_u, p1_u, p0_u, q0_u, q1_u, q2_u, q3_u);
+    src_u += (5 * pitch);
+    src_v = src_v - (pitch << 2);
+    LD_UB8(src_v, pitch, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v);
+    src_v += (5 * pitch);
+
+    /* right 8 element of p3 are u pixel and
+       left 8 element of p3 are v pixel */
+    ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0);
+    ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3);
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
+
+    p1_d = __msa_copy_u_d((v2i64) p1, 0);
+    p0_d = __msa_copy_u_d((v2i64) p0, 0);
+    q0_d = __msa_copy_u_d((v2i64) q0, 0);
+    q1_d = __msa_copy_u_d((v2i64) q1, 0);
+    SD4(q1_d, q0_d, p0_d, p1_d, src_u, (- pitch));
+
+    p1_d = __msa_copy_u_d((v2i64) p1, 1);
+    p0_d = __msa_copy_u_d((v2i64) p0, 1);
+    q0_d = __msa_copy_u_d((v2i64) q0, 1);
+    q1_d = __msa_copy_u_d((v2i64) q1, 1);
+    SD4(q1_d, q0_d, p0_d, p1_d, src_v, (- pitch));
+}
+
+void ff_vp8_h_loop_filter8uv_inner_msa(uint8_t *src_u, uint8_t *src_v,
+                                       ptrdiff_t pitch, int b_limit_in,
+                                       int limit_in, int thresh_in)
+{
+    uint8_t *temp_src_u, *temp_src_v;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 mask, hev, flat, thresh, limit, b_limit;
+    v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8;
+    v16u8 row9, row10, row11, row12, row13, row14, row15;
+    v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+    thresh = (v16u8) __msa_fill_b(thresh_in);
+    limit = (v16u8) __msa_fill_b(limit_in);
+    b_limit = (v16u8) __msa_fill_b(b_limit_in);
+
+    LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+    LD_UB8(src_v - 4, pitch,
+           row8, row9, row10, row11, row12, row13, row14, row15);
+    TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+                        row8, row9, row10, row11, row12, row13, row14, row15,
+                        p3, p2, p1, p0, q0, q1, q2, q3);
+
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
+    ILVR_B2_SW(p0, p1, q1, q0, tmp0, tmp1);
+    ILVRL_H2_SW(tmp1, tmp0, tmp2, tmp3);
+    tmp0 = (v4i32) __msa_ilvl_b((v16i8) p0, (v16i8) p1);
+    tmp1 = (v4i32) __msa_ilvl_b((v16i8) q1, (v16i8) q0);
+    ILVRL_H2_SW(tmp1, tmp0, tmp4, tmp5);
+
+    temp_src_u = src_u - 2;
+    ST4x4_UB(tmp2, tmp2, 0, 1, 2, 3, temp_src_u, pitch);
+    temp_src_u += 4 * pitch;
+    ST4x4_UB(tmp3, tmp3, 0, 1, 2, 3, temp_src_u, pitch);
+
+    temp_src_v = src_v - 2;
+    ST4x4_UB(tmp4, tmp4, 0, 1, 2, 3, temp_src_v, pitch);
+    temp_src_v += 4 * pitch;
+    ST4x4_UB(tmp5, tmp5, 0, 1, 2, 3, temp_src_v, pitch);
+}
+
+void ff_vp8_v_loop_filter16_inner_msa(uint8_t *src, ptrdiff_t pitch,
+                                      int32_t e, int32_t i, int32_t h)
+{
+    v16u8 mask, hev, flat;
+    v16u8 thresh, b_limit, limit;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+
+    /* load vector elements */
+    LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+    thresh = (v16u8) __msa_fill_b(h);
+    b_limit = (v16u8) __msa_fill_b(e);
+    limit = (v16u8) __msa_fill_b(i);
+
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
+
+    ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
+}
+
+void ff_vp8_h_loop_filter16_inner_msa(uint8_t *src, ptrdiff_t pitch,
+                                      int32_t e, int32_t i, int32_t h)
+{
+    v16u8 mask, hev, flat;
+    v16u8 thresh, b_limit, limit;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
+    v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+    LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+    LD_UB8(src - 4 + (8 * pitch), pitch,
+           row8, row9, row10, row11, row12, row13, row14, row15);
+    TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+                        row8, row9, row10, row11, row12, row13, row14, row15,
+                        p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(h);
+    b_limit = (v16u8) __msa_fill_b(e);
+    limit = (v16u8) __msa_fill_b(i);
+
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev);
+    ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
+    ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
+    ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
+    ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5);
+
+    src -= 2;
+    ST4x8_UB(tmp2, tmp3, src, pitch);
+    src += (8 * pitch);
+    ST4x8_UB(tmp4, tmp5, src, pitch);
+}
diff --git a/libavcodec/mips/vp8_mc_msa.c b/libavcodec/mips/vp8_mc_msa.c
new file mode 100644
index 00000000..2bf0abd8
--- /dev/null
+++ b/libavcodec/mips/vp8_mc_msa.c
@@ -0,0 +1,2332 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/vp8dsp.h"
+#include "libavutil/mips/generic_macros_msa.h"
+#include "vp8dsp_mips.h"
+
+static const uint8_t mc_filt_mask_arr[16 * 3] = {
+    /* 8 width cases */
+    0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+    /* 4 width cases */
+    0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20,
+    /* 4 width cases */
+    8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
+};
+
+static const int8_t subpel_filters_msa[7][8] = {
+    {-6, 123, 12, -1, 0, 0, 0, 0},
+    {2, -11, 108, 36, -8, 1, 0, 0},     /* New 1/4 pel 6 tap filter */
+    {-9, 93, 50, -6, 0, 0, 0, 0},
+    {3, -16, 77, 77, -16, 3, 0, 0},     /* New 1/2 pel 6 tap filter */
+    {-6, 50, 93, -9, 0, 0, 0, 0},
+    {1, -8, 36, 108, -11, 2, 0, 0},     /* New 1/4 pel 6 tap filter */
+    {-1, 12, 123, -6, 0, 0, 0, 0},
+};
+
+static const int8_t bilinear_filters_msa[7][2] = {
+    {112, 16},
+    {96, 32},
+    {80, 48},
+    {64, 64},
+    {48, 80},
+    {32, 96},
+    {16, 112}
+};
+
+#define HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2,                 \
+                        filt_h0, filt_h1, filt_h2)                       \
+( {                                                                      \
+    v16i8 vec0_m, vec1_m, vec2_m;                                        \
+    v8i16 hz_out_m;                                                      \
+                                                                         \
+    VSHF_B3_SB(src0, src1, src0, src1, src0, src1, mask0, mask1, mask2,  \
+               vec0_m, vec1_m, vec2_m);                                  \
+    hz_out_m = DPADD_SH3_SH(vec0_m, vec1_m, vec2_m,                      \
+                            filt_h0, filt_h1, filt_h2);                  \
+                                                                         \
+    hz_out_m = __msa_srari_h(hz_out_m, 7);                               \
+    hz_out_m = __msa_sat_s_h(hz_out_m, 7);                               \
+                                                                         \
+    hz_out_m;                                                            \
+} )
+
+#define HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3,             \
+                                   mask0, mask1, mask2,                \
+                                   filt0, filt1, filt2,                \
+                                   out0, out1)                         \
+{                                                                      \
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m;              \
+                                                                       \
+    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m);  \
+    DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1);             \
+    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m);  \
+    DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1);            \
+    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m);  \
+    DPADD_SB2_SH(vec4_m, vec5_m, filt2, filt2, out0, out1);            \
+}
+
+#define HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3,                    \
+                                   mask0, mask1, mask2,                       \
+                                   filt0, filt1, filt2,                       \
+                                   out0, out1, out2, out3)                    \
+{                                                                             \
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;     \
+                                                                              \
+    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);         \
+    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,   \
+                out0, out1, out2, out3);                                      \
+    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m);         \
+    VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec4_m, vec5_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec6_m, vec7_m);         \
+    DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1,  \
+                 out0, out1, out2, out3);                                     \
+    DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt2, filt2, filt2, filt2,  \
+                 out0, out1, out2, out3);                                     \
+}
+
+#define FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1)           \
+( {                                                             \
+    v8i16 tmp0;                                                 \
+                                                                \
+    tmp0 = __msa_dotp_s_h((v16i8) vec0, (v16i8) filt0);         \
+    tmp0 = __msa_dpadd_s_h(tmp0, (v16i8) vec1, (v16i8) filt1);  \
+                                                                \
+    tmp0;                                                       \
+} )
+
+#define HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_h0, filt_h1)    \
+( {                                                                    \
+    v16i8 vec0_m, vec1_m;                                              \
+    v8i16 hz_out_m;                                                    \
+                                                                       \
+    VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0_m, vec1_m);  \
+    hz_out_m = FILT_4TAP_DPADD_S_H(vec0_m, vec1_m, filt_h0, filt_h1);  \
+                                                                       \
+    hz_out_m = __msa_srari_h(hz_out_m, 7);                             \
+    hz_out_m = __msa_sat_s_h(hz_out_m, 7);                             \
+                                                                       \
+    hz_out_m;                                                          \
+} )
+
+#define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3,             \
+                                   mask0, mask1, filt0, filt1,         \
+                                   out0, out1)                         \
+{                                                                      \
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m;                              \
+                                                                       \
+    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m);  \
+    DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1);             \
+    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m);  \
+    DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1);            \
+}
+
+#define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3,                    \
+                                   mask0, mask1, filt0, filt1,                \
+                                   out0, out1, out2, out3)                    \
+{                                                                             \
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m;                                     \
+                                                                              \
+    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);         \
+    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,   \
+                out0, out1, out2, out3);                                      \
+    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m);         \
+    DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1,  \
+                 out0, out1, out2, out3);                                     \
+}
+
+static void common_hz_6t_4x4_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2;
+    v16u8 mask0, mask1, mask2, out;
+    v8i16 filt, out0, out1;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= 2;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               filt0, filt1, filt2, out0, out1);
+    SRARI_H2_SH(out0, out1, 7);
+    SAT_SH2_SH(out0, out1, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_hz_6t_4x8_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2;
+    v16u8 mask0, mask1, mask2, out;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= 2;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (4 * src_stride);
+    HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               filt0, filt1, filt2, out0, out1);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               filt0, filt1, filt2, out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, 7);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+    dst += (4 * dst_stride);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+void ff_put_vp8_epel4_h6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                             uint8_t *src, ptrdiff_t src_stride,
+                             int height, int mx, int my)
+{
+    const int8_t *filter = subpel_filters_msa[mx - 1];
+
+    if (4 == height) {
+        common_hz_6t_4x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else if (8 == height) {
+        common_hz_6t_4x8_msa(src, src_stride, dst, dst_stride, filter);
+    }
+}
+
+void ff_put_vp8_epel8_h6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                             uint8_t *src, ptrdiff_t src_stride,
+                             int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = subpel_filters_msa[mx - 1];
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2;
+    v16u8 mask0, mask1, mask2, tmp0, tmp1;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+
+    src -= 2;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (4 * src_stride);
+    HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               filt0, filt1, filt2, out0, out1, out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, 7);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    tmp0 = PCKEV_XORI128_UB(out0, out1);
+    tmp1 = PCKEV_XORI128_UB(out2, out3);
+    ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    for (loop_cnt = (height >> 2) - 1; loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        src += (4 * src_stride);
+        HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                                   filt0, filt1, filt2, out0, out1, out2, out3);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        tmp0 = PCKEV_XORI128_UB(out0, out1);
+        tmp1 = PCKEV_XORI128_UB(out2, out3);
+        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+void ff_put_vp8_epel16_h6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                              uint8_t *src, ptrdiff_t src_stride,
+                              int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = subpel_filters_msa[mx - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, filt0, filt1, filt2;
+    v16u8 mask0, mask1, mask2, out;
+    v8i16 filt, out0, out1, out2, out3, out4, out5, out6, out7;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= 2;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src2, src4, src6);
+        LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+        XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+        src += (4 * src_stride);
+
+        HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                                   filt0, filt1, filt2, out0, out1, out2, out3);
+        HORIZ_6TAP_8WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1, mask2,
+                                   filt0, filt1, filt2, out4, out5, out6, out7);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SRARI_H4_SH(out4, out5, out6, out7, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out4, out5, out6, out7, 7);
+        out = PCKEV_XORI128_UB(out0, out1);
+        ST_UB(out, dst);
+        dst += dst_stride;
+        out = PCKEV_XORI128_UB(out2, out3);
+        ST_UB(out, dst);
+        dst += dst_stride;
+        out = PCKEV_XORI128_UB(out4, out5);
+        ST_UB(out, dst);
+        dst += dst_stride;
+        out = PCKEV_XORI128_UB(out6, out7);
+        ST_UB(out, dst);
+        dst += dst_stride;
+    }
+}
+
+void ff_put_vp8_epel4_v6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                             uint8_t *src, ptrdiff_t src_stride,
+                             int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src2110, src4332, src6554, src8776, filt0, filt1, filt2;
+    v16u8 out;
+    v8i16 filt, out10, out32;
+
+    src -= (2 * src_stride);
+
+    filt = LD_SH(filter);
+    SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    XORI_B2_128_SB(src2110, src4332);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src5, src6, src7, src8);
+        src += (4 * src_stride);
+
+        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r,
+                   src65_r, src76_r, src87_r);
+        ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776);
+        XORI_B2_128_SB(src6554, src8776);
+        out10 = DPADD_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2);
+        out32 = DPADD_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2);
+        SRARI_H2_SH(out10, out32, 7);
+        SAT_SH2_SH(out10, out32, 7);
+        out = PCKEV_XORI128_UB(out10, out32);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src2110 = src6554;
+        src4332 = src8776;
+        src4 = src8;
+    }
+}
+
+void ff_put_vp8_epel8_v6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                             uint8_t *src, ptrdiff_t src_stride,
+                             int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src7, src8, src9, src10;
+    v16i8 src10_r, src32_r, src76_r, src98_r, src21_r, src43_r, src87_r;
+    v16i8 src109_r, filt0, filt1, filt2;
+    v16u8 tmp0, tmp1;
+    v8i16 filt, out0_r, out1_r, out2_r, out3_r;
+
+    src -= (2 * src_stride);
+
+    filt = LD_SH(filter);
+    SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src3, src2, src2, src1, src4, src3,
+               src10_r, src32_r, src21_r, src43_r);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        ILVR_B4_SB(src7, src4, src8, src7, src9, src8, src10, src9, src76_r,
+                   src87_r, src98_r, src109_r);
+        out0_r = DPADD_SH3_SH(src10_r, src32_r, src76_r, filt0, filt1, filt2);
+        out1_r = DPADD_SH3_SH(src21_r, src43_r, src87_r, filt0, filt1, filt2);
+        out2_r = DPADD_SH3_SH(src32_r, src76_r, src98_r, filt0, filt1, filt2);
+        out3_r = DPADD_SH3_SH(src43_r, src87_r, src109_r, filt0, filt1, filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        tmp0 = PCKEV_XORI128_UB(out0_r, out1_r);
+        tmp1 = PCKEV_XORI128_UB(out2_r, out3_r);
+        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src76_r;
+        src32_r = src98_r;
+        src21_r = src87_r;
+        src43_r = src109_r;
+        src4 = src10;
+    }
+}
+
+void ff_put_vp8_epel16_v6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                              uint8_t *src, ptrdiff_t src_stride,
+                              int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16i8 src87_r, src10_l, src32_l, src54_l, src76_l, src21_l, src43_l;
+    v16i8 src65_l, src87_l, filt0, filt1, filt2;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l, filt;
+
+    src -= (2 * src_stride);
+
+    filt = LD_SH(filter);
+    SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    ILVR_B4_SB(src1, src0, src3, src2, src4, src3, src2, src1, src10_r,
+               src32_r, src43_r, src21_r);
+    ILVL_B4_SB(src1, src0, src3, src2, src4, src3, src2, src1, src10_l,
+               src32_l, src43_l, src21_l);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src5, src6, src7, src8);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src5, src6, src7, src8);
+        ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r,
+                   src65_r, src76_r, src87_r);
+        ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_l,
+                   src65_l, src76_l, src87_l);
+        out0_r = DPADD_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1,
+                              filt2);
+        out1_r = DPADD_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1,
+                              filt2);
+        out2_r = DPADD_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1,
+                              filt2);
+        out3_r = DPADD_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1,
+                              filt2);
+        out0_l = DPADD_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1,
+                              filt2);
+        out1_l = DPADD_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1,
+                              filt2);
+        out2_l = DPADD_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1,
+                              filt2);
+        out3_l = DPADD_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1,
+                              filt2);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                    out3_r, tmp0, tmp1, tmp2, tmp3);
+        XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
+        ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src10_l = src54_l;
+        src32_l = src76_l;
+        src21_l = src65_l;
+        src43_l = src87_l;
+        src4 = src8;
+    }
+}
+
+void ff_put_vp8_epel4_h6v6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                               uint8_t *src, ptrdiff_t src_stride,
+                               int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter_horiz = subpel_filters_msa[mx - 1];
+    const int8_t *filter_vert = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 filt_hz0, filt_hz1, filt_hz2;
+    v16u8 mask0, mask1, mask2, out;
+    v8i16 tmp0, tmp1;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, filt, filt_vt0, filt_vt1, filt_vt2, out0, out1, out2, out3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= (2 + 2 * src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H3_SB(filt, 0, 1, 2, filt_hz0, filt_hz1, filt_hz2);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H3_SH(filt, 0, 1, 2, filt_vt0, filt_vt1, filt_vt2);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    hz_out0 = HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    hz_out2 = HORIZ_6TAP_FILT(src2, src3, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    hz_out1 = (v8i16) __msa_sldi_b((v16i8) hz_out2, (v16i8) hz_out0, 8);
+    hz_out3 = HORIZ_6TAP_FILT(src3, src4, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB2(src, src_stride, src5, src6);
+        src += (2 * src_stride);
+
+        XORI_B2_128_SB(src5, src6);
+        hz_out5 = HORIZ_6TAP_FILT(src5, src6, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        hz_out4 = (v8i16) __msa_sldi_b((v16i8) hz_out5, (v16i8) hz_out3, 8);
+
+        LD_SB2(src, src_stride, src7, src8);
+        src += (2 * src_stride);
+
+        XORI_B2_128_SB(src7, src8);
+        hz_out7 = HORIZ_6TAP_FILT(src7, src8, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        hz_out6 = (v8i16) __msa_sldi_b((v16i8) hz_out7, (v16i8) hz_out5, 8);
+
+        out2 = (v8i16) __msa_ilvev_b((v16i8) hz_out5, (v16i8) hz_out4);
+        tmp0 = DPADD_SH3_SH(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
+
+        out3 = (v8i16) __msa_ilvev_b((v16i8) hz_out7, (v16i8) hz_out6);
+        tmp1 = DPADD_SH3_SH(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2);
+
+        SRARI_H2_SH(tmp0, tmp1, 7);
+        SAT_SH2_SH(tmp0, tmp1, 7);
+        out = PCKEV_XORI128_UB(tmp0, tmp1);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out3 = hz_out7;
+        out0 = out2;
+        out1 = out3;
+    }
+}
+
+void ff_put_vp8_epel8_h6v6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                               uint8_t *src, ptrdiff_t src_stride,
+                               int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter_horiz = subpel_filters_msa[mx - 1];
+    const int8_t *filter_vert = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 filt_hz0, filt_hz1, filt_hz2;
+    v16u8 mask0, mask1, mask2, vec0, vec1;
+    v8i16 filt, filt_vt0, filt_vt1, filt_vt2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, out0, out1, out2, out3, out4, out5, out6, out7;
+    v8i16 tmp0, tmp1, tmp2, tmp3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= (2 + 2 * src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H3_SB(filt, 0, 1, 2, filt_hz0, filt_hz1, filt_hz2);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    hz_out0 = HORIZ_6TAP_FILT(src0, src0, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    hz_out1 = HORIZ_6TAP_FILT(src1, src1, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    hz_out2 = HORIZ_6TAP_FILT(src2, src2, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    hz_out3 = HORIZ_6TAP_FILT(src3, src3, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    hz_out4 = HORIZ_6TAP_FILT(src4, src4, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H3_SH(filt, 0, 1, 2, filt_vt0, filt_vt1, filt_vt2);
+
+    ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+    ILVEV_B2_SH(hz_out1, hz_out2, hz_out3, hz_out4, out3, out4);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src5, src6, src7, src8);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src5, src6, src7, src8);
+        hz_out5 = HORIZ_6TAP_FILT(src5, src5, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        out2 = (v8i16) __msa_ilvev_b((v16i8) hz_out5, (v16i8) hz_out4);
+        tmp0 = DPADD_SH3_SH(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
+
+        hz_out6 = HORIZ_6TAP_FILT(src6, src6, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        out5 = (v8i16) __msa_ilvev_b((v16i8) hz_out6, (v16i8) hz_out5);
+        tmp1 = DPADD_SH3_SH(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2);
+
+        hz_out7 = HORIZ_6TAP_FILT(src7, src7, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        out7 = (v8i16) __msa_ilvev_b((v16i8) hz_out7, (v16i8) hz_out6);
+        tmp2 = DPADD_SH3_SH(out1, out2, out7, filt_vt0, filt_vt1, filt_vt2);
+
+        hz_out8 = HORIZ_6TAP_FILT(src8, src8, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        out6 = (v8i16) __msa_ilvev_b((v16i8) hz_out8, (v16i8) hz_out7);
+        tmp3 = DPADD_SH3_SH(out4, out5, out6, filt_vt0, filt_vt1, filt_vt2);
+
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        vec0 = PCKEV_XORI128_UB(tmp0, tmp1);
+        vec1 = PCKEV_XORI128_UB(tmp2, tmp3);
+        ST8x4_UB(vec0, vec1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out4 = hz_out8;
+        out0 = out2;
+        out1 = out7;
+        out3 = out5;
+        out4 = out6;
+    }
+}
+
+
+void ff_put_vp8_epel16_h6v6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                               uint8_t *src, ptrdiff_t src_stride,
+                               int height, int mx, int my)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        ff_put_vp8_epel8_h6v6_msa(dst, dst_stride, src, src_stride, height,
+                                  mx, my);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+static void common_hz_4t_4x4_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1;
+    v8i16 filt, out0, out1;
+    v16u8 out;
+
+    mask0 = LD_SB(&mc_filt_mask_arr[16]);
+    src -= 1;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
+
+    mask1 = mask0 + 2;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
+                               filt0, filt1, out0, out1);
+    SRARI_H2_SH(out0, out1, 7);
+    SAT_SH2_SH(out0, out1, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_hz_4t_4x8_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1;
+    v16u8 out;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_SB(&mc_filt_mask_arr[16]);
+    src -= 1;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
+
+    mask1 = mask0 + 2;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
+                               filt0, filt1, out0, out1);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
+                               filt0, filt1, out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, 7);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+    dst += (4 * dst_stride);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_hz_4t_4x16_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 filt0, filt1, mask0, mask1;
+    v16u8 out;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_SB(&mc_filt_mask_arr[16]);
+    src -= 1;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
+
+    mask1 = mask0 + 2;
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+    XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+    HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
+                               filt0, filt1, out0, out1);
+    HORIZ_4TAP_4WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1,
+                               filt0, filt1, out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, 7);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+    dst += (4 * dst_stride);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+    XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+    HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
+                               filt0, filt1, out0, out1);
+    HORIZ_4TAP_4WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1,
+                               filt0, filt1, out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, 7);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+    dst += (4 * dst_stride);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+void ff_put_vp8_epel4_h4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                             uint8_t *src, ptrdiff_t src_stride,
+                             int height, int mx, int my)
+{
+    const int8_t *filter = subpel_filters_msa[mx - 1];
+
+    if (4 == height) {
+        common_hz_4t_4x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else if (8 == height) {
+        common_hz_4t_4x8_msa(src, src_stride, dst, dst_stride, filter);
+    } else if (16 == height) {
+        common_hz_4t_4x16_msa(src, src_stride, dst, dst_stride, filter);
+    }
+}
+
+void ff_put_vp8_epel8_h4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                             uint8_t *src, ptrdiff_t src_stride,
+                             int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = subpel_filters_msa[mx - 1];
+    v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1;
+    v16u8 tmp0, tmp1;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_SB(&mc_filt_mask_arr[0]);
+    src -= 1;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
+
+    mask1 = mask0 + 2;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0,
+                                   filt1, out0, out1, out2, out3);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        tmp0 = PCKEV_XORI128_UB(out0, out1);
+        tmp1 = PCKEV_XORI128_UB(out2, out3);
+        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+void ff_put_vp8_epel16_h4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                              uint8_t *src, ptrdiff_t src_stride,
+                              int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = subpel_filters_msa[mx - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16i8 filt0, filt1, mask0, mask1;
+    v8i16 filt, out0, out1, out2, out3, out4, out5, out6, out7;
+    v16u8 out;
+
+    mask0 = LD_SB(&mc_filt_mask_arr[0]);
+    src -= 1;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
+
+    mask1 = mask0 + 2;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src2, src4, src6);
+        LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+        src += (4 * src_stride);
+
+        XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7);
+        HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0,
+                                   filt1, out0, out1, out2, out3);
+        HORIZ_4TAP_8WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1, filt0,
+                                   filt1, out4, out5, out6, out7);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SRARI_H4_SH(out4, out5, out6, out7, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out4, out5, out6, out7, 7);
+        out = PCKEV_XORI128_UB(out0, out1);
+        ST_UB(out, dst);
+        dst += dst_stride;
+        out = PCKEV_XORI128_UB(out2, out3);
+        ST_UB(out, dst);
+        dst += dst_stride;
+        out = PCKEV_XORI128_UB(out4, out5);
+        ST_UB(out, dst);
+        dst += dst_stride;
+        out = PCKEV_XORI128_UB(out6, out7);
+        ST_UB(out, dst);
+        dst += dst_stride;
+    }
+}
+
+void ff_put_vp8_epel4_v4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                             uint8_t *src, ptrdiff_t src_stride,
+                             int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5;
+    v16i8 src10_r, src32_r, src54_r, src21_r, src43_r, src65_r;
+    v16i8 src2110, src4332, filt0, filt1;
+    v8i16 filt, out10, out32;
+    v16u8 out;
+
+    src -= src_stride;
+
+    filt = LD_SH(filter);
+    SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+    src += (3 * src_stride);
+
+    ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
+
+    src2110 = (v16i8) __msa_ilvr_d((v2i64) src21_r, (v2i64) src10_r);
+    src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB3(src, src_stride, src3, src4, src5);
+        src += (3 * src_stride);
+        ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r);
+        src4332 = (v16i8) __msa_ilvr_d((v2i64) src43_r, (v2i64) src32_r);
+        src4332 = (v16i8) __msa_xori_b((v16u8) src4332, 128);
+        out10 = FILT_4TAP_DPADD_S_H(src2110, src4332, filt0, filt1);
+
+        src2 = LD_SB(src);
+        src += (src_stride);
+        ILVR_B2_SB(src5, src4, src2, src5, src54_r, src65_r);
+        src2110 = (v16i8) __msa_ilvr_d((v2i64) src65_r, (v2i64) src54_r);
+        src2110 = (v16i8) __msa_xori_b((v16u8) src2110, 128);
+        out32 = FILT_4TAP_DPADD_S_H(src4332, src2110, filt0, filt1);
+        SRARI_H2_SH(out10, out32, 7);
+        SAT_SH2_SH(out10, out32, 7);
+        out = PCKEV_XORI128_UB(out10, out32);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+void ff_put_vp8_epel8_v4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                             uint8_t *src, ptrdiff_t src_stride,
+                             int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src7, src8, src9, src10;
+    v16i8 src10_r, src72_r, src98_r, src21_r, src87_r, src109_r, filt0, filt1;
+    v16u8 tmp0, tmp1;
+    v8i16 filt, out0_r, out1_r, out2_r, out3_r;
+
+    src -= src_stride;
+
+    filt = LD_SH(filter);
+    SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+    src += (3 * src_stride);
+
+    XORI_B3_128_SB(src0, src1, src2);
+    ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        ILVR_B4_SB(src7, src2, src8, src7, src9, src8, src10, src9,
+                   src72_r, src87_r, src98_r, src109_r);
+        out0_r = FILT_4TAP_DPADD_S_H(src10_r, src72_r, filt0, filt1);
+        out1_r = FILT_4TAP_DPADD_S_H(src21_r, src87_r, filt0, filt1);
+        out2_r = FILT_4TAP_DPADD_S_H(src72_r, src98_r, filt0, filt1);
+        out3_r = FILT_4TAP_DPADD_S_H(src87_r, src109_r, filt0, filt1);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        tmp0 = PCKEV_XORI128_UB(out0_r, out1_r);
+        tmp1 = PCKEV_XORI128_UB(out2_r, out3_r);
+        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src98_r;
+        src21_r = src109_r;
+        src2 = src10;
+    }
+}
+
+void ff_put_vp8_epel16_v4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                              uint8_t *src, ptrdiff_t src_stride,
+                              int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v16i8 src10_r, src32_r, src54_r, src21_r, src43_r, src65_r, src10_l;
+    v16i8 src32_l, src54_l, src21_l, src43_l, src65_l, filt0, filt1;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+
+    src -= src_stride;
+
+    filt = LD_SH(filter);
+    SPLATI_H2_SB(filt, 0, 1, filt0, filt1);
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+    src += (3 * src_stride);
+
+    XORI_B3_128_SB(src0, src1, src2);
+    ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r);
+    ILVL_B2_SB(src1, src0, src2, src1, src10_l, src21_l);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src3, src4, src5, src6);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src3, src4, src5, src6);
+        ILVR_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5,
+                   src32_r, src43_r, src54_r, src65_r);
+        ILVL_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5,
+                   src32_l, src43_l, src54_l, src65_l);
+        out0_r = FILT_4TAP_DPADD_S_H(src10_r, src32_r, filt0, filt1);
+        out1_r = FILT_4TAP_DPADD_S_H(src21_r, src43_r, filt0, filt1);
+        out2_r = FILT_4TAP_DPADD_S_H(src32_r, src54_r, filt0, filt1);
+        out3_r = FILT_4TAP_DPADD_S_H(src43_r, src65_r, filt0, filt1);
+        out0_l = FILT_4TAP_DPADD_S_H(src10_l, src32_l, filt0, filt1);
+        out1_l = FILT_4TAP_DPADD_S_H(src21_l, src43_l, filt0, filt1);
+        out2_l = FILT_4TAP_DPADD_S_H(src32_l, src54_l, filt0, filt1);
+        out3_l = FILT_4TAP_DPADD_S_H(src43_l, src65_l, filt0, filt1);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                    out3_r, tmp0, tmp1, tmp2, tmp3);
+        XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
+        ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src54_r;
+        src21_r = src65_r;
+        src10_l = src54_l;
+        src21_l = src65_l;
+        src2 = src6;
+    }
+}
+
+void ff_put_vp8_epel4_h4v4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                               uint8_t *src, ptrdiff_t src_stride,
+                               int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter_horiz = subpel_filters_msa[mx - 1];
+    const int8_t *filter_vert = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, filt_hz0, filt_hz1;
+    v16u8 mask0, mask1, out;
+    v8i16 filt, filt_vt0, filt_vt1, tmp0, tmp1, vec0, vec1, vec2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= (1 + 1 * src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H2_SB(filt, 0, 1, filt_hz0, filt_hz1);
+
+    mask1 = mask0 + 2;
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+    src += (3 * src_stride);
+
+    XORI_B3_128_SB(src0, src1, src2);
+    hz_out0 = HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_hz0, filt_hz1);
+    hz_out1 = HORIZ_4TAP_FILT(src1, src2, mask0, mask1, filt_hz0, filt_hz1);
+    vec0 = (v8i16) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H2_SH(filt, 0, 1, filt_vt0, filt_vt1);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src3, src4, src5, src6);
+        src += (4 * src_stride);
+
+        XORI_B2_128_SB(src3, src4);
+        hz_out3 = HORIZ_4TAP_FILT(src3, src4, mask0, mask1, filt_hz0, filt_hz1);
+        hz_out2 = (v8i16) __msa_sldi_b((v16i8) hz_out3, (v16i8) hz_out1, 8);
+        vec1 = (v8i16) __msa_ilvev_b((v16i8) hz_out3, (v16i8) hz_out2);
+        tmp0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt_vt0, filt_vt1);
+
+        XORI_B2_128_SB(src5, src6);
+        hz_out5 = HORIZ_4TAP_FILT(src5, src6, mask0, mask1, filt_hz0, filt_hz1);
+        hz_out4 = (v8i16) __msa_sldi_b((v16i8) hz_out5, (v16i8) hz_out3, 8);
+        vec2 = (v8i16) __msa_ilvev_b((v16i8) hz_out5, (v16i8) hz_out4);
+        tmp1 = FILT_4TAP_DPADD_S_H(vec1, vec2, filt_vt0, filt_vt1);
+
+        SRARI_H2_SH(tmp0, tmp1, 7);
+        SAT_SH2_SH(tmp0, tmp1, 7);
+        out = PCKEV_XORI128_UB(tmp0, tmp1);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out1 = hz_out5;
+        vec0 = vec2;
+    }
+}
+
+void ff_put_vp8_epel8_h4v4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                               uint8_t *src, ptrdiff_t src_stride,
+                               int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter_horiz = subpel_filters_msa[mx - 1];
+    const int8_t *filter_vert = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, filt_hz0, filt_hz1;
+    v16u8 mask0, mask1, out0, out1;
+    v8i16 filt, filt_vt0, filt_vt1, tmp0, tmp1, tmp2, tmp3;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 vec0, vec1, vec2, vec3, vec4;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= (1 + 1 * src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H2_SB(filt, 0, 1, filt_hz0, filt_hz1);
+
+    mask1 = mask0 + 2;
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+    src += (3 * src_stride);
+
+    XORI_B3_128_SB(src0, src1, src2);
+    hz_out0 = HORIZ_4TAP_FILT(src0, src0, mask0, mask1, filt_hz0, filt_hz1);
+    hz_out1 = HORIZ_4TAP_FILT(src1, src1, mask0, mask1, filt_hz0, filt_hz1);
+    hz_out2 = HORIZ_4TAP_FILT(src2, src2, mask0, mask1, filt_hz0, filt_hz1);
+    ILVEV_B2_SH(hz_out0, hz_out1, hz_out1, hz_out2, vec0, vec2);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H2_SH(filt, 0, 1, filt_vt0, filt_vt1);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src3, src4, src5, src6);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src3, src4, src5, src6);
+        hz_out3 = HORIZ_4TAP_FILT(src3, src3, mask0, mask1, filt_hz0, filt_hz1);
+        vec1 = (v8i16) __msa_ilvev_b((v16i8) hz_out3, (v16i8) hz_out2);
+        tmp0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt_vt0, filt_vt1);
+
+        hz_out0 = HORIZ_4TAP_FILT(src4, src4, mask0, mask1, filt_hz0, filt_hz1);
+        vec3 = (v8i16) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out3);
+        tmp1 = FILT_4TAP_DPADD_S_H(vec2, vec3, filt_vt0, filt_vt1);
+
+        hz_out1 = HORIZ_4TAP_FILT(src5, src5, mask0, mask1, filt_hz0, filt_hz1);
+        vec4 = (v8i16) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp2 = FILT_4TAP_DPADD_S_H(vec1, vec4, filt_vt0, filt_vt1);
+
+        hz_out2 = HORIZ_4TAP_FILT(src6, src6, mask0, mask1, filt_hz0, filt_hz1);
+        ILVEV_B2_SH(hz_out3, hz_out0, hz_out1, hz_out2, vec0, vec1);
+        tmp3 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt_vt0, filt_vt1);
+
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+        out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        vec0 = vec4;
+        vec2 = vec1;
+    }
+}
+
+void ff_put_vp8_epel16_h4v4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                                uint8_t *src, ptrdiff_t src_stride,
+                                int height, int mx, int my)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        ff_put_vp8_epel8_h4v4_msa(dst, dst_stride, src, src_stride, height,
+                                  mx, my);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+void ff_put_vp8_epel4_h6v4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                               uint8_t *src, ptrdiff_t src_stride,
+                               int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter_horiz = subpel_filters_msa[mx - 1];
+    const int8_t *filter_vert = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v16i8 filt_hz0, filt_hz1, filt_hz2;
+    v16u8 res0, res1, mask0, mask1, mask2;
+    v8i16 filt, filt_vt0, filt_vt1, tmp0, tmp1, vec0, vec1, vec2;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= (2 + 1 * src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H3_SB(filt, 0, 1, 2, filt_hz0, filt_hz1, filt_hz2);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+    src += (3 * src_stride);
+
+    XORI_B3_128_SB(src0, src1, src2);
+    hz_out0 = HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    hz_out1 = HORIZ_6TAP_FILT(src1, src2, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    vec0 = (v8i16) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H2_SH(filt, 0, 1, filt_vt0, filt_vt1);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src3, src4, src5, src6);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src3, src4, src5, src6);
+        hz_out3 = HORIZ_6TAP_FILT(src3, src4, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        hz_out2 = (v8i16) __msa_sldi_b((v16i8) hz_out3, (v16i8) hz_out1, 8);
+        vec1 = (v8i16) __msa_ilvev_b((v16i8) hz_out3, (v16i8) hz_out2);
+        tmp0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt_vt0, filt_vt1);
+
+        hz_out5 = HORIZ_6TAP_FILT(src5, src6, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        hz_out4 = (v8i16) __msa_sldi_b((v16i8) hz_out5, (v16i8) hz_out3, 8);
+        vec2 = (v8i16) __msa_ilvev_b((v16i8) hz_out5, (v16i8) hz_out4);
+        tmp1 = FILT_4TAP_DPADD_S_H(vec1, vec2, filt_vt0, filt_vt1);
+
+        SRARI_H2_SH(tmp0, tmp1, 7);
+        SAT_SH2_SH(tmp0, tmp1, 7);
+        PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
+        XORI_B2_128_UB(res0, res1);
+        ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out1 = hz_out5;
+        vec0 = vec2;
+    }
+}
+
+void ff_put_vp8_epel8_h6v4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                               uint8_t *src, ptrdiff_t src_stride,
+                               int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter_horiz = subpel_filters_msa[mx - 1];
+    const int8_t *filter_vert = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6;
+    v16i8 filt_hz0, filt_hz1, filt_hz2, mask0, mask1, mask2;
+    v8i16 filt, filt_vt0, filt_vt1, hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 tmp0, tmp1, tmp2, tmp3, vec0, vec1, vec2, vec3;
+    v16u8 out0, out1;
+
+    mask0 = LD_SB(&mc_filt_mask_arr[0]);
+    src -= (2 + src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H3_SB(filt, 0, 1, 2, filt_hz0, filt_hz1, filt_hz2);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+
+    LD_SB3(src, src_stride, src0, src1, src2);
+    src += (3 * src_stride);
+
+    XORI_B3_128_SB(src0, src1, src2);
+    hz_out0 = HORIZ_6TAP_FILT(src0, src0, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    hz_out1 = HORIZ_6TAP_FILT(src1, src1, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    hz_out2 = HORIZ_6TAP_FILT(src2, src2, mask0, mask1, mask2, filt_hz0,
+                              filt_hz1, filt_hz2);
+    ILVEV_B2_SH(hz_out0, hz_out1, hz_out1, hz_out2, vec0, vec2);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H2_SH(filt, 0, 1, filt_vt0, filt_vt1);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src3, src4, src5, src6);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src3, src4, src5, src6);
+
+        hz_out3 = HORIZ_6TAP_FILT(src3, src3, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        vec1 = (v8i16) __msa_ilvev_b((v16i8) hz_out3, (v16i8) hz_out2);
+        tmp0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt_vt0, filt_vt1);
+
+        hz_out0 = HORIZ_6TAP_FILT(src4, src4, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        vec3 = (v8i16) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out3);
+        tmp1 = FILT_4TAP_DPADD_S_H(vec2, vec3, filt_vt0, filt_vt1);
+
+        hz_out1 = HORIZ_6TAP_FILT(src5, src5, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        vec0 = (v8i16) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp2 = FILT_4TAP_DPADD_S_H(vec1, vec0, filt_vt0, filt_vt1);
+
+        hz_out2 = HORIZ_6TAP_FILT(src6, src6, mask0, mask1, mask2, filt_hz0,
+                                  filt_hz1, filt_hz2);
+        ILVEV_B2_SH(hz_out3, hz_out0, hz_out1, hz_out2, vec1, vec2);
+        tmp3 = FILT_4TAP_DPADD_S_H(vec1, vec2, filt_vt0, filt_vt1);
+
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        out0 = PCKEV_XORI128_UB(tmp0, tmp1);
+        out1 = PCKEV_XORI128_UB(tmp2, tmp3);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+void ff_put_vp8_epel16_h6v4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                               uint8_t *src, ptrdiff_t src_stride,
+                               int height, int mx, int my)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        ff_put_vp8_epel8_h6v4_msa(dst, dst_stride, src, src_stride, height,
+                                  mx, my);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+void ff_put_vp8_epel4_h4v6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                               uint8_t *src, ptrdiff_t src_stride,
+                               int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter_horiz = subpel_filters_msa[mx - 1];
+    const int8_t *filter_vert = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 filt_hz0, filt_hz1, mask0, mask1;
+    v16u8 out;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, tmp0, tmp1, out0, out1, out2, out3;
+    v8i16 filt, filt_vt0, filt_vt1, filt_vt2;
+
+    mask0 = LD_SB(&mc_filt_mask_arr[16]);
+
+    src -= (1 + 2 * src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H2_SB(filt, 0, 1, filt_hz0, filt_hz1);
+
+    mask1 = mask0 + 2;
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    hz_out0 = HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_hz0, filt_hz1);
+    hz_out2 = HORIZ_4TAP_FILT(src2, src3, mask0, mask1, filt_hz0, filt_hz1);
+    hz_out3 = HORIZ_4TAP_FILT(src3, src4, mask0, mask1, filt_hz0, filt_hz1);
+    hz_out1 = (v8i16) __msa_sldi_b((v16i8) hz_out2, (v16i8) hz_out0, 8);
+    ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H3_SH(filt, 0, 1, 2, filt_vt0, filt_vt1, filt_vt2);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src5, src6, src7, src8);
+        XORI_B4_128_SB(src5, src6, src7, src8);
+        src += (4 * src_stride);
+
+        hz_out5 = HORIZ_4TAP_FILT(src5, src6, mask0, mask1, filt_hz0, filt_hz1);
+        hz_out4 = (v8i16) __msa_sldi_b((v16i8) hz_out5, (v16i8) hz_out3, 8);
+        out2 = (v8i16) __msa_ilvev_b((v16i8) hz_out5, (v16i8) hz_out4);
+        tmp0 = DPADD_SH3_SH(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
+
+        hz_out7 = HORIZ_4TAP_FILT(src7, src8, mask0, mask1, filt_hz0, filt_hz1);
+        hz_out6 = (v8i16) __msa_sldi_b((v16i8) hz_out7, (v16i8) hz_out5, 8);
+        out3 = (v8i16) __msa_ilvev_b((v16i8) hz_out7, (v16i8) hz_out6);
+        tmp1 = DPADD_SH3_SH(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2);
+
+        SRARI_H2_SH(tmp0, tmp1, 7);
+        SAT_SH2_SH(tmp0, tmp1, 7);
+        out = PCKEV_XORI128_UB(tmp0, tmp1);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out3 = hz_out7;
+        out0 = out2;
+        out1 = out3;
+    }
+}
+
+void ff_put_vp8_epel8_h4v6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                               uint8_t *src, ptrdiff_t src_stride,
+                               int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter_horiz = subpel_filters_msa[mx - 1];
+    const int8_t *filter_vert = subpel_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 filt_hz0, filt_hz1, mask0, mask1;
+    v8i16 filt, filt_vt0, filt_vt1, filt_vt2, tmp0, tmp1, tmp2, tmp3;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, out0, out1, out2, out3, out4, out5, out6, out7;
+    v16u8 vec0, vec1;
+
+    mask0 = LD_SB(&mc_filt_mask_arr[0]);
+    src -= (1 + 2 * src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H2_SB(filt, 0, 1, filt_hz0, filt_hz1);
+
+    mask1 = mask0 + 2;
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    XORI_B5_128_SB(src0, src1, src2, src3, src4);
+    hz_out0 = HORIZ_4TAP_FILT(src0, src0, mask0, mask1, filt_hz0, filt_hz1);
+    hz_out1 = HORIZ_4TAP_FILT(src1, src1, mask0, mask1, filt_hz0, filt_hz1);
+    hz_out2 = HORIZ_4TAP_FILT(src2, src2, mask0, mask1, filt_hz0, filt_hz1);
+    hz_out3 = HORIZ_4TAP_FILT(src3, src3, mask0, mask1, filt_hz0, filt_hz1);
+    hz_out4 = HORIZ_4TAP_FILT(src4, src4, mask0, mask1, filt_hz0, filt_hz1);
+    ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+    ILVEV_B2_SH(hz_out1, hz_out2, hz_out3, hz_out4, out3, out4);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H3_SH(filt, 0, 1, 2, filt_vt0, filt_vt1, filt_vt2);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src5, src6, src7, src8);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src5, src6, src7, src8);
+
+        hz_out5 = HORIZ_4TAP_FILT(src5, src5, mask0, mask1, filt_hz0, filt_hz1);
+        out2 = (v8i16) __msa_ilvev_b((v16i8) hz_out5, (v16i8) hz_out4);
+        tmp0 = DPADD_SH3_SH(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2);
+
+        hz_out6 = HORIZ_4TAP_FILT(src6, src6, mask0, mask1, filt_hz0, filt_hz1);
+        out5 = (v8i16) __msa_ilvev_b((v16i8) hz_out6, (v16i8) hz_out5);
+        tmp1 = DPADD_SH3_SH(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2);
+
+        hz_out7 = HORIZ_4TAP_FILT(src7, src7, mask0, mask1, filt_hz0, filt_hz1);
+        out6 = (v8i16) __msa_ilvev_b((v16i8) hz_out7, (v16i8) hz_out6);
+        tmp2 = DPADD_SH3_SH(out1, out2, out6, filt_vt0, filt_vt1, filt_vt2);
+
+        hz_out8 = HORIZ_4TAP_FILT(src8, src8, mask0, mask1, filt_hz0, filt_hz1);
+        out7 = (v8i16) __msa_ilvev_b((v16i8) hz_out8, (v16i8) hz_out7);
+        tmp3 = DPADD_SH3_SH(out4, out5, out7, filt_vt0, filt_vt1, filt_vt2);
+
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        vec0 = PCKEV_XORI128_UB(tmp0, tmp1);
+        vec1 = PCKEV_XORI128_UB(tmp2, tmp3);
+        ST8x4_UB(vec0, vec1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out4 = hz_out8;
+        out0 = out2;
+        out1 = out6;
+        out3 = out5;
+        out4 = out7;
+    }
+}
+
+void ff_put_vp8_epel16_h4v6_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                                uint8_t *src, ptrdiff_t src_stride,
+                                int height, int mx, int my)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        ff_put_vp8_epel8_h4v6_msa(dst, dst_stride, src, src_stride, height,
+                                  mx, my);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+static void common_hz_2t_4x4_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, mask;
+    v16u8 filt0, vec0, vec1, res0, res1;
+    v8u16 vec2, vec3, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
+    SRARI_H2_UH(vec2, vec3, 7);
+    PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hz_2t_4x8_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16u8 vec0, vec1, vec2, vec3, filt0;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16i8 res0, res1, res2, res3;
+    v8u16 vec4, vec5, vec6, vec7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                vec4, vec5, vec6, vec7);
+    SRARI_H4_UH(vec4, vec5, vec6, vec7, 7);
+    PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7,
+                res0, res1, res2, res3);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+void ff_put_vp8_bilinear4_h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                                uint8_t *src, ptrdiff_t src_stride,
+                                int height, int mx, int my)
+{
+    const int8_t *filter = bilinear_filters_msa[mx - 1];
+
+    if (4 == height) {
+        common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else if (8 == height) {
+        common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
+    }
+}
+
+static void common_hz_2t_8x4_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16u8 filt0;
+    v16i8 src0, src1, src2, src3, mask;
+    v8u16 vec0, vec1, vec2, vec3, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                vec0, vec1, vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+    PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1);
+    ST8x4_UB(src0, src1, dst, dst_stride);
+}
+
+static void common_hz_2t_8x8mult_msa(uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     const int8_t *filter, int32_t height)
+{
+    v16u8 filt0;
+    v16i8 src0, src1, src2, src3, mask, out0, out1;
+    v8u16 vec0, vec1, vec2, vec3, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                vec0, vec1, vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+
+    PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                vec0, vec1, vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+    PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    if (16 == height) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    vec0, vec1, vec2, vec3);
+        SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+
+        VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    vec0, vec1, vec2, vec3);
+        SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+        PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+        ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride);
+    }
+}
+
+void ff_put_vp8_bilinear8_h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                                uint8_t *src, ptrdiff_t src_stride,
+                                int height, int mx, int my)
+{
+    const int8_t *filter = bilinear_filters_msa[mx - 1];
+
+    if (4 == height) {
+        common_hz_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else {
+        common_hz_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter,
+                                 height);
+    }
+}
+
+void ff_put_vp8_bilinear16_h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                                 uint8_t *src, ptrdiff_t src_stride,
+                                 int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = bilinear_filters_msa[mx - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    loop_cnt = (height >> 2) - 1;
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src2, src4, src6);
+    LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+    src += (4 * src_stride);
+
+    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                out0, out1, out2, out3);
+    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+                out4, out5, out6, out7);
+    SRARI_H4_UH(out0, out1, out2, out3, 7);
+    SRARI_H4_UH(out4, out5, out6, out7, 7);
+    PCKEV_ST_SB(out0, out1, dst);
+    dst += dst_stride;
+    PCKEV_ST_SB(out2, out3, dst);
+    dst += dst_stride;
+    PCKEV_ST_SB(out4, out5, dst);
+    dst += dst_stride;
+    PCKEV_ST_SB(out6, out7, dst);
+    dst += dst_stride;
+
+    for (; loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src2, src4, src6);
+        LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+        src += (4 * src_stride);
+
+        VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+        VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    out0, out1, out2, out3);
+        DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+                    out4, out5, out6, out7);
+        SRARI_H4_UH(out0, out1, out2, out3, 7);
+        SRARI_H4_UH(out4, out5, out6, out7, 7);
+        PCKEV_ST_SB(out0, out1, dst);
+        dst += dst_stride;
+        PCKEV_ST_SB(out2, out3, dst);
+        dst += dst_stride;
+        PCKEV_ST_SB(out4, out5, dst);
+        dst += dst_stride;
+        PCKEV_ST_SB(out6, out7, dst);
+        dst += dst_stride;
+    }
+}
+
+static void common_vt_2t_4x4_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332;
+    v16u8 filt0;
+    v8i16 filt;
+    v8u16 tmp0, tmp1;
+
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, 7);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    src2110 = __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+    ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_vt_2t_4x8_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r;
+    v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v16u8 filt0;
+    v8i16 filt;
+
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+
+    src8 = LD_SB(src);
+    src += src_stride;
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+               src76_r, src87_r);
+    ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
+               src87_r, src76_r, src2110, src4332, src6554, src8776);
+    DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
+                tmp0, tmp1, tmp2, tmp3);
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
+    ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
+    ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride);
+}
+
+void ff_put_vp8_bilinear4_v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                                uint8_t *src, ptrdiff_t src_stride,
+                                int height, int mx, int my)
+{
+    const int8_t *filter = bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_vt_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else if (8 == height) {
+        common_vt_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
+    }
+}
+
+static void common_vt_2t_8x4_msa(uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0;
+    v16i8 out0, out1;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    /* rearranging filter_y */
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
+    ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                tmp0, tmp1, tmp2, tmp3);
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+}
+
+static void common_vt_2t_8x8mult_msa(uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     const int8_t *filter, int32_t height)
+{
+    uint32_t loop_cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+    v16i8 out0, out1;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    /* rearranging filter_y */
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
+        src += (8 * src_stride);
+
+        ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+                   vec0, vec1, vec2, vec3);
+        ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   vec4, vec5, vec6, vec7);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+                    tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src0 = src8;
+    }
+}
+
+void ff_put_vp8_bilinear8_v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                                uint8_t *src, ptrdiff_t src_stride,
+                                int height, int mx, int my)
+{
+    const int8_t *filter = bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_vt_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else {
+        common_vt_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter,
+                                 height);
+    }
+}
+
+void ff_put_vp8_bilinear16_v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                                 uint8_t *src, ptrdiff_t src_stride,
+                                 int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = bilinear_filters_msa[my - 1];
+    v16u8 src0, src1, src2, src3, src4;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    /* rearranging filter_y */
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+        ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+        DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_ST_SB(tmp0, tmp1, dst);
+        dst += dst_stride;
+
+        ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+        ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+        DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_ST_SB(tmp2, tmp3, dst);
+        dst += dst_stride;
+
+        DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_ST_SB(tmp0, tmp1, dst);
+        dst += dst_stride;
+
+        DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_ST_SB(tmp2, tmp3, dst);
+        dst += dst_stride;
+
+        src0 = src4;
+    }
+}
+
+static void common_hv_2ht_2vt_4x4_msa(uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      const int8_t *filter_horiz,
+                                      const int8_t *filter_vert)
+{
+    v16i8 src0, src1, src2, src3, src4, mask;
+    v16u8 filt_vt, filt_hz, vec0, vec1, res0, res1;
+    v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, filt, tmp0, tmp1;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    filt = LD_UH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, 7);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, 7);
+    hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+    hz_out1 = (v8u16) __msa_sldi_b((v16i8) hz_out2, (v16i8) hz_out0, 8);
+    hz_out3 = (v8u16) __msa_pckod_d((v2i64) hz_out4, (v2i64) hz_out2);
+
+    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, 7);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_4x8_msa(uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      const int8_t *filter_horiz,
+                                      const int8_t *filter_vert)
+{
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
+    v16i8 res0, res1, res2, res3;
+    v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
+    v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8u16 hz_out7, hz_out8, vec4, vec5, vec6, vec7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    filt = LD_UH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+    src8 = LD_SB(src);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, 7);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, 7);
+    hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, 7);
+    hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, 7);
+    hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, 7);
+    SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1,
+               hz_out3, hz_out5, 8);
+    hz_out7 = (v8u16) __msa_pckod_d((v2i64) hz_out8, (v2i64) hz_out6);
+
+    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+    ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt,
+                vec4, vec5, vec6, vec7);
+    SRARI_H4_UH(vec4, vec5, vec6, vec7, 7);
+    SAT_UH4_UH(vec4, vec5, vec6, vec7, 7);
+    PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7,
+                res0, res1, res2, res3);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+void ff_put_vp8_bilinear4_hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                                 uint8_t *src, ptrdiff_t src_stride,
+                                 int height, int mx, int my)
+{
+    const int8_t *filter_horiz = bilinear_filters_msa[mx - 1];
+    const int8_t *filter_vert = bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride,
+                                  filter_horiz, filter_vert);
+    } else if (8 == height) {
+        common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride,
+                                  filter_horiz, filter_vert);
+    }
+}
+
+static void common_hv_2ht_2vt_8x4_msa(uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      const int8_t *filter_horiz,
+                                      const int8_t *filter_vert)
+{
+    v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
+    v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
+    v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h(filt, 0);
+
+    filt = LD_SH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+    hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+    vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+    tmp0 = __msa_dotp_u_h(vec0, filt_vt);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 7);
+    vec1 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+    tmp1 = __msa_dotp_u_h(vec1, filt_vt);
+
+    hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 7);
+    vec2 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+    tmp2 = __msa_dotp_u_h(vec2, filt_vt);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+    vec3 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+    tmp3 = __msa_dotp_u_h(vec3, filt_vt);
+
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_8x8mult_msa(uint8_t *src, int32_t src_stride,
+                                          uint8_t *dst, int32_t dst_stride,
+                                          const int8_t *filter_horiz,
+                                          const int8_t *filter_vert,
+                                          int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
+    v16u8 filt_hz, filt_vt, vec0;
+    v8u16 hz_out0, hz_out1, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+    v8i16 filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h(filt, 0);
+
+    filt = LD_SH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h(filt, 0);
+
+    src0 = LD_SB(src);
+    src += src_stride;
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_SB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp1 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+        tmp2 = __msa_dotp_u_h(vec0, filt_vt);
+
+        SRARI_H2_UH(tmp1, tmp2, 7);
+        SAT_UH2_UH(tmp1, tmp2, 7);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp3 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+        LD_SB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+        tmp4 = __msa_dotp_u_h(vec0, filt_vt);
+
+        SRARI_H2_UH(tmp3, tmp4, 7);
+        SAT_UH2_UH(tmp3, tmp4, 7);
+        PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp5 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+        tmp6 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp7 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+        tmp8 = __msa_dotp_u_h(vec0, filt_vt);
+
+        SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, 7);
+        SAT_UH4_UH(tmp5, tmp6, tmp7, tmp8, 7);
+        PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+void ff_put_vp8_bilinear8_hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                                 uint8_t *src, ptrdiff_t src_stride,
+                                 int height, int mx, int my)
+{
+    const int8_t *filter_horiz = bilinear_filters_msa[mx - 1];
+    const int8_t *filter_vert = bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_hv_2ht_2vt_8x4_msa(src, src_stride, dst, dst_stride,
+                                  filter_horiz, filter_vert);
+    } else {
+        common_hv_2ht_2vt_8x8mult_msa(src, src_stride, dst, dst_stride,
+                                      filter_horiz, filter_vert, height);
+    }
+}
+
+void ff_put_vp8_bilinear16_hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                                  uint8_t *src, ptrdiff_t src_stride,
+                                  int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter_horiz = bilinear_filters_msa[mx - 1];
+    const int8_t *filter_vert = bilinear_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16u8 filt_hz, filt_vt, vec0, vec1;
+    v8u16 tmp1, tmp2, hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h(filt, 0);
+
+    filt = LD_SH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB2(src, 8, src0, src1);
+    src += src_stride;
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src2, src4, src6);
+        LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+        src += (4 * src_stride);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+        hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+        SRARI_H2_UH(tmp1, tmp2, 7);
+        SAT_UH2_UH(tmp1, tmp2, 7);
+        PCKEV_ST_SB(tmp1, tmp2, dst);
+        dst += dst_stride;
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 7);
+        hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+        SRARI_H2_UH(tmp1, tmp2, 7);
+        SAT_UH2_UH(tmp1, tmp2, 7);
+        PCKEV_ST_SB(tmp1, tmp2, dst);
+        dst += dst_stride;
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+        hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+        SRARI_H2_UH(tmp1, tmp2, 7);
+        SAT_UH2_UH(tmp1, tmp2, 7);
+        PCKEV_ST_SB(tmp1, tmp2, dst);
+        dst += dst_stride;
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, 7);
+        hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+        SRARI_H2_UH(tmp1, tmp2, 7);
+        SAT_UH2_UH(tmp1, tmp2, 7);
+        PCKEV_ST_SB(tmp1, tmp2, dst);
+        dst += dst_stride;
+    }
+}
+
+void ff_put_vp8_pixels8_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                            uint8_t *src, ptrdiff_t src_stride,
+                            int height, int mx, int my)
+{
+    int32_t cnt;
+    uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    if (0 == height % 8) {
+        for (cnt = height >> 3; cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+            out4 = __msa_copy_u_d((v2i64) src4, 0);
+            out5 = __msa_copy_u_d((v2i64) src5, 0);
+            out6 = __msa_copy_u_d((v2i64) src6, 0);
+            out7 = __msa_copy_u_d((v2i64) src7, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+            SD4(out4, out5, out6, out7, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 4) {
+        for (cnt = (height / 4); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    }
+}
+
+static void copy_16multx8mult_msa(uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  int32_t height, int32_t width)
+{
+    int32_t cnt, loop_cnt;
+    uint8_t *src_tmp, *dst_tmp;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    for (cnt = (width >> 4); cnt--;) {
+        src_tmp = src;
+        dst_tmp = dst;
+
+        for (loop_cnt = (height >> 3); loop_cnt--;) {
+            LD_UB8(src_tmp, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src_tmp += (8 * src_stride);
+
+            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+                   dst_tmp, dst_stride);
+            dst_tmp += (8 * dst_stride);
+        }
+
+        src += 16;
+        dst += 16;
+    }
+}
+
+void ff_put_vp8_pixels16_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                            uint8_t *src, ptrdiff_t src_stride,
+                            int height, int mx, int my)
+{
+    int32_t cnt;
+    v16u8 src0, src1, src2, src3;
+
+    if (0 == height % 8) {
+        copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
+    } else if (0 == height % 4) {
+        for (cnt = (height >> 2); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    }
+}
diff --git a/libavcodec/mips/vp8dsp_init_mips.c b/libavcodec/mips/vp8dsp_init_mips.c
new file mode 100644
index 00000000..58d1b6ce
--- /dev/null
+++ b/libavcodec/mips/vp8dsp_init_mips.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * VP8 compatible video decoder
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavcodec/vp8dsp.h"
+#include "vp8dsp_mips.h"
+
+#define VP8_MC_MIPS_FUNC(IDX, SIZE)            \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][1] =  \
+        ff_put_vp8_epel##SIZE##_h4_msa;        \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][2] =  \
+        ff_put_vp8_epel##SIZE##_h6_msa;        \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][0] =  \
+        ff_put_vp8_epel##SIZE##_v4_msa;        \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][1] =  \
+        ff_put_vp8_epel##SIZE##_h4v4_msa;      \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][2] =  \
+        ff_put_vp8_epel##SIZE##_h6v4_msa;      \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][0] =  \
+        ff_put_vp8_epel##SIZE##_v6_msa;        \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][1] =  \
+        ff_put_vp8_epel##SIZE##_h4v6_msa;      \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][2] =  \
+        ff_put_vp8_epel##SIZE##_h6v6_msa
+
+#define VP8_BILINEAR_MC_MIPS_FUNC(IDX, SIZE)       \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] =  \
+        ff_put_vp8_bilinear##SIZE##_h_msa;         \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] =  \
+        ff_put_vp8_bilinear##SIZE##_h_msa;         \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] =  \
+        ff_put_vp8_bilinear##SIZE##_v_msa;         \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] =  \
+        ff_put_vp8_bilinear##SIZE##_hv_msa;        \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] =  \
+        ff_put_vp8_bilinear##SIZE##_hv_msa;        \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] =  \
+        ff_put_vp8_bilinear##SIZE##_v_msa;         \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] =  \
+        ff_put_vp8_bilinear##SIZE##_hv_msa;        \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] =  \
+        ff_put_vp8_bilinear##SIZE##_hv_msa
+
+#define VP8_MC_MIPS_COPY(IDX, SIZE)                \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][0] =      \
+        ff_put_vp8_pixels##SIZE##_msa;             \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] =  \
+        ff_put_vp8_pixels##SIZE##_msa;
+
+#if HAVE_MSA
+static av_cold void vp8dsp_init_msa(VP8DSPContext *dsp)
+{
+    dsp->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_msa;
+    dsp->vp8_idct_add = ff_vp8_idct_add_msa;
+    dsp->vp8_idct_dc_add = ff_vp8_idct_dc_add_msa;
+    dsp->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_msa;
+    dsp->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_msa;
+
+    VP8_MC_MIPS_FUNC(0, 16);
+    VP8_MC_MIPS_FUNC(1, 8);
+    VP8_MC_MIPS_FUNC(2, 4);
+
+    VP8_BILINEAR_MC_MIPS_FUNC(0, 16);
+    VP8_BILINEAR_MC_MIPS_FUNC(1, 8);
+    VP8_BILINEAR_MC_MIPS_FUNC(2, 4);
+
+    VP8_MC_MIPS_COPY(0, 16);
+    VP8_MC_MIPS_COPY(1, 8);
+
+    dsp->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16_msa;
+    dsp->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16_msa;
+    dsp->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_msa;
+    dsp->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_msa;
+
+    dsp->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16_inner_msa;
+    dsp->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16_inner_msa;
+    dsp->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_msa;
+    dsp->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_msa;
+
+    dsp->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_msa;
+    dsp->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_msa;
+}
+#endif  // #if HAVE_MSA
+
+av_cold void ff_vp8dsp_init_mips(VP8DSPContext *dsp)
+{
+#if HAVE_MSA
+    vp8dsp_init_msa(dsp);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/vp8dsp_mips.h b/libavcodec/mips/vp8dsp_mips.h
new file mode 100644
index 00000000..8e715b58
--- /dev/null
+++ b/libavcodec/mips/vp8dsp_mips.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_VP8DSP_MIPS_H
+#define AVCODEC_MIPS_VP8DSP_MIPS_H
+
+void ff_put_vp8_pixels4_msa(uint8_t *dst, ptrdiff_t dststride,
+                            uint8_t *src, ptrdiff_t srcstride,
+                            int h, int x, int y);
+void ff_put_vp8_pixels8_msa(uint8_t *dst, ptrdiff_t dststride,
+                            uint8_t *src, ptrdiff_t srcstride,
+                            int h, int x, int y);
+void ff_put_vp8_pixels16_msa(uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int h, int x, int y);
+
+void ff_put_vp8_epel16_h4_msa(uint8_t *dst, ptrdiff_t dststride,
+                              uint8_t *src, ptrdiff_t srcstride,
+                              int h, int mx, int my);
+void ff_put_vp8_epel16_h6_msa(uint8_t *dst, ptrdiff_t dststride,
+                              uint8_t *src, ptrdiff_t srcstride,
+                              int h, int mx, int my);
+void ff_put_vp8_epel16_v4_msa(uint8_t *dst, ptrdiff_t dststride,
+                              uint8_t *src, ptrdiff_t srcstride,
+                              int h, int mx, int my);
+void ff_put_vp8_epel16_v6_msa(uint8_t *dst, ptrdiff_t dststride,
+                              uint8_t *src, ptrdiff_t srcstride,
+                              int h, int mx, int my);
+void ff_put_vp8_epel16_h4v4_msa(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int h, int mx, int my);
+void ff_put_vp8_epel16_h6v4_msa(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int h, int mx, int my);
+void ff_put_vp8_epel16_h4v6_msa(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int h, int mx, int my);
+void ff_put_vp8_epel16_h6v6_msa(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int h, int mx, int my);
+
+void ff_put_vp8_epel8_h4_msa(uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int h, int mx, int my);
+void ff_put_vp8_epel8_h6_msa(uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int h, int mx, int my);
+void ff_put_vp8_epel8_v4_msa(uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int h, int mx, int my);
+void ff_put_vp8_epel8_v6_msa(uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int h, int mx, int my);
+void ff_put_vp8_epel8_h4v4_msa(uint8_t *dst, ptrdiff_t dststride,
+                               uint8_t *src, ptrdiff_t srcstride,
+                               int h, int mx, int my);
+void ff_put_vp8_epel8_h6v4_msa(uint8_t *dst, ptrdiff_t dststride,
+                               uint8_t *src, ptrdiff_t srcstride,
+                               int h, int mx, int my);
+void ff_put_vp8_epel8_h4v6_msa(uint8_t *dst, ptrdiff_t dststride,
+                               uint8_t *src, ptrdiff_t srcstride,
+                               int h, int mx, int my);
+void ff_put_vp8_epel8_h6v6_msa(uint8_t *dst, ptrdiff_t dststride,
+                               uint8_t *src, ptrdiff_t srcstride,
+                               int h, int mx, int my);
+
+void ff_put_vp8_epel4_h4_msa(uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int h, int mx, int my);
+void ff_put_vp8_epel4_h6_msa(uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int h, int mx, int my);
+void ff_put_vp8_epel4_v4_msa(uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int h, int mx, int my);
+void ff_put_vp8_epel4_v6_msa(uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int h, int mx, int my);
+void ff_put_vp8_epel4_h4v4_msa(uint8_t *dst, ptrdiff_t dststride,
+                               uint8_t *src, ptrdiff_t srcstride,
+                               int h, int mx, int my);
+void ff_put_vp8_epel4_h6v4_msa(uint8_t *dst, ptrdiff_t dststride,
+                               uint8_t *src, ptrdiff_t srcstride,
+                               int h, int mx, int my);
+void ff_put_vp8_epel4_h4v6_msa(uint8_t *dst, ptrdiff_t dststride,
+                               uint8_t *src, ptrdiff_t srcstride,
+                               int h, int mx, int my);
+void ff_put_vp8_epel4_h6v6_msa(uint8_t *dst, ptrdiff_t dststride,
+                               uint8_t *src, ptrdiff_t srcstride,
+                               int h, int mx, int my);
+
+void ff_put_vp8_bilinear16_h_msa(uint8_t *dst, ptrdiff_t dststride,
+                                 uint8_t *src, ptrdiff_t srcstride,
+                                 int h, int mx, int my);
+void ff_put_vp8_bilinear16_v_msa(uint8_t *dst, ptrdiff_t dststride,
+                                 uint8_t *src, ptrdiff_t srcstride,
+                                 int h, int mx, int my);
+void ff_put_vp8_bilinear16_hv_msa(uint8_t *dst, ptrdiff_t dststride,
+                                  uint8_t *src, ptrdiff_t srcstride,
+                                  int h, int mx, int my);
+
+void ff_put_vp8_bilinear8_h_msa(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int h, int mx, int my);
+void ff_put_vp8_bilinear8_v_msa(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int h, int mx, int my);
+void ff_put_vp8_bilinear8_hv_msa(uint8_t *dst, ptrdiff_t dststride,
+                                 uint8_t *src, ptrdiff_t srcstride,
+                                 int h, int mx, int my);
+
+void ff_put_vp8_bilinear4_h_msa(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int h, int mx, int my);
+void ff_put_vp8_bilinear4_v_msa(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int h, int mx, int my);
+void ff_put_vp8_bilinear4_hv_msa(uint8_t *dst, ptrdiff_t dststride,
+                                 uint8_t *src, ptrdiff_t srcstride,
+                                 int h, int mx, int my);
+
+/* loop filter */
+void ff_vp8_h_loop_filter16_inner_msa(uint8_t *dst, ptrdiff_t stride,
+                                      int32_t e, int32_t i, int32_t h);
+void ff_vp8_v_loop_filter16_inner_msa(uint8_t *dst, ptrdiff_t stride,
+                                      int32_t e, int32_t i, int32_t h);
+void ff_vp8_h_loop_filter8uv_inner_msa(uint8_t *dst_u, uint8_t *dst_v,
+                                       ptrdiff_t stride,
+                                       int flim_e, int flim_i, int hev_thresh);
+void ff_vp8_v_loop_filter8uv_inner_msa(uint8_t *dst_u, uint8_t *dst_v,
+                                       ptrdiff_t stride,
+                                       int flim_e, int flim_i, int hev_thresh);
+void ff_vp8_h_loop_filter16_msa(uint8_t *dst, ptrdiff_t stride,
+                                int flim_e, int flim_i, int hev_thresh);
+void ff_vp8_v_loop_filter16_msa(uint8_t *dst, ptrdiff_t stride,
+                                int flim_e, int flim_i, int hev_thresh);
+void ff_vp8_h_loop_filter8uv_msa(uint8_t *dst_u, uint8_t *dst_v,
+                                 ptrdiff_t stride,
+                                 int flim_e, int flim_i, int hev_thresh);
+void ff_vp8_v_loop_filter8uv_msa(uint8_t *dst_u, uint8_t *dst_v,
+                                 ptrdiff_t stride,
+                                 int flim_e, int flim_i, int hev_thresh);
+void ff_vp8_h_loop_filter_simple_msa(uint8_t *dst, ptrdiff_t stride, int flim);
+void ff_vp8_v_loop_filter_simple_msa(uint8_t *dst, ptrdiff_t stride, int flim);
+
+/* Idct functions */
+void ff_vp8_luma_dc_wht_msa(int16_t block[4][4][16], int16_t dc[16]);
+void ff_vp8_idct_add_msa(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add_msa(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp8_idct_dc_add4uv_msa(uint8_t *dst, int16_t block[4][16],
+                               ptrdiff_t stride);
+void ff_vp8_idct_dc_add4y_msa(uint8_t *dst, int16_t block[4][16],
+                              ptrdiff_t stride);
+
+#endif  // #ifndef AVCODEC_MIPS_VP8DSP_MIPS_H
diff --git a/libavcodec/mips/vp9_idct_msa.c b/libavcodec/mips/vp9_idct_msa.c
new file mode 100644
index 00000000..25ea16c7
--- /dev/null
+++ b/libavcodec/mips/vp9_idct_msa.c
@@ -0,0 +1,2138 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+#include "libavcodec/vp9dsp.h"
+#include "libavutil/mips/generic_macros_msa.h"
+#include "vp9dsp_mips.h"
+
+#define VP9_DCT_CONST_BITS   14
+#define ROUND_POWER_OF_TWO(value, n)  (((value) + (1 << ((n) - 1))) >> (n))
+
+static const int32_t cospi_1_64 = 16364;
+static const int32_t cospi_2_64 = 16305;
+static const int32_t cospi_3_64 = 16207;
+static const int32_t cospi_4_64 = 16069;
+static const int32_t cospi_5_64 = 15893;
+static const int32_t cospi_6_64 = 15679;
+static const int32_t cospi_7_64 = 15426;
+static const int32_t cospi_8_64 = 15137;
+static const int32_t cospi_9_64 = 14811;
+static const int32_t cospi_10_64 = 14449;
+static const int32_t cospi_11_64 = 14053;
+static const int32_t cospi_12_64 = 13623;
+static const int32_t cospi_13_64 = 13160;
+static const int32_t cospi_14_64 = 12665;
+static const int32_t cospi_15_64 = 12140;
+static const int32_t cospi_16_64 = 11585;
+static const int32_t cospi_17_64 = 11003;
+static const int32_t cospi_18_64 = 10394;
+static const int32_t cospi_19_64 = 9760;
+static const int32_t cospi_20_64 = 9102;
+static const int32_t cospi_21_64 = 8423;
+static const int32_t cospi_22_64 = 7723;
+static const int32_t cospi_23_64 = 7005;
+static const int32_t cospi_24_64 = 6270;
+static const int32_t cospi_25_64 = 5520;
+static const int32_t cospi_26_64 = 4756;
+static const int32_t cospi_27_64 = 3981;
+static const int32_t cospi_28_64 = 3196;
+static const int32_t cospi_29_64 = 2404;
+static const int32_t cospi_30_64 = 1606;
+static const int32_t cospi_31_64 = 804;
+
+//  16384 * sqrt(2) * sin(kPi/9) * 2 / 3
+static const int32_t sinpi_1_9 = 5283;
+static const int32_t sinpi_2_9 = 9929;
+static const int32_t sinpi_3_9 = 13377;
+static const int32_t sinpi_4_9 = 15212;
+
+#define VP9_DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1)  \
+{                                                                  \
+    v8i16 k0_m = __msa_fill_h(cnst0);                              \
+    v4i32 s0_m, s1_m, s2_m, s3_m;                                  \
+                                                                   \
+    s0_m = (v4i32) __msa_fill_h(cnst1);                            \
+    k0_m = __msa_ilvev_h((v8i16) s0_m, k0_m);                      \
+                                                                   \
+    ILVRL_H2_SW((-reg1), reg0, s1_m, s0_m);                        \
+    ILVRL_H2_SW(reg0, reg1, s3_m, s2_m);                           \
+    DOTP_SH2_SW(s1_m, s0_m, k0_m, k0_m, s1_m, s0_m);               \
+    SRARI_W2_SW(s1_m, s0_m, VP9_DCT_CONST_BITS);                   \
+    out0 = __msa_pckev_h((v8i16) s0_m, (v8i16) s1_m);              \
+                                                                   \
+    DOTP_SH2_SW(s3_m, s2_m, k0_m, k0_m, s1_m, s0_m);               \
+    SRARI_W2_SW(s1_m, s0_m, VP9_DCT_CONST_BITS);                   \
+    out1 = __msa_pckev_h((v8i16) s0_m, (v8i16) s1_m);              \
+}
+
+#define VP9_DOT_ADD_SUB_SRARI_PCK(in0, in1, in2, in3, in4, in5, in6, in7,  \
+                                      dst0, dst1, dst2, dst3)              \
+{                                                                          \
+    v4i32 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m;                               \
+    v4i32 tp5_m, tp6_m, tp7_m, tp8_m, tp9_m;                               \
+                                                                           \
+    DOTP_SH4_SW(in0, in1, in0, in1, in4, in4, in5, in5,                    \
+                tp0_m, tp2_m, tp3_m, tp4_m);                               \
+    DOTP_SH4_SW(in2, in3, in2, in3, in6, in6, in7, in7,                    \
+                tp5_m, tp6_m, tp7_m, tp8_m);                               \
+    BUTTERFLY_4(tp0_m, tp3_m, tp7_m, tp5_m, tp1_m, tp9_m, tp7_m, tp5_m);   \
+    BUTTERFLY_4(tp2_m, tp4_m, tp8_m, tp6_m, tp3_m, tp0_m, tp4_m, tp2_m);   \
+    SRARI_W4_SW(tp1_m, tp9_m, tp7_m, tp5_m, VP9_DCT_CONST_BITS);           \
+    SRARI_W4_SW(tp3_m, tp0_m, tp4_m, tp2_m, VP9_DCT_CONST_BITS);           \
+    PCKEV_H4_SH(tp1_m, tp3_m, tp9_m, tp0_m, tp7_m, tp4_m, tp5_m, tp2_m,    \
+                dst0, dst1, dst2, dst3);                                   \
+}
+
+#define VP9_DOT_SHIFT_RIGHT_PCK_H(in0, in1, in2)          \
+( {                                                       \
+    v8i16 dst_m;                                          \
+    v4i32 tp0_m, tp1_m;                                   \
+                                                          \
+    DOTP_SH2_SW(in0, in1, in2, in2, tp1_m, tp0_m);        \
+    SRARI_W2_SW(tp1_m, tp0_m, VP9_DCT_CONST_BITS);        \
+    dst_m = __msa_pckev_h((v8i16) tp1_m, (v8i16) tp0_m);  \
+                                                          \
+    dst_m;                                                \
+} )
+
+#define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,                 \
+                  out0, out1, out2, out3, out4, out5, out6, out7)         \
+{                                                                         \
+    v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m;                    \
+    v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m;                     \
+    v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64,  \
+        cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 };             \
+    v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64,              \
+        -cospi_16_64, cospi_24_64, -cospi_24_64, 0, 0 };                  \
+                                                                          \
+    SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m);                       \
+    cnst2_m = -cnst0_m;                                                   \
+    ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m);    \
+    SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m);                       \
+    cnst4_m = -cnst2_m;                                                   \
+    ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m);    \
+                                                                          \
+    ILVRL_H2_SH(in0, in7, vec1_m, vec0_m);                                \
+    ILVRL_H2_SH(in4, in3, vec3_m, vec2_m);                                \
+    VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m,    \
+                              cnst1_m, cnst2_m, cnst3_m, in7, in0,        \
+                              in4, in3);                                  \
+                                                                          \
+    SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m);                       \
+    cnst2_m = -cnst0_m;                                                   \
+    ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m);    \
+    SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m);                       \
+    cnst4_m = -cnst2_m;                                                   \
+    ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m);    \
+                                                                          \
+    ILVRL_H2_SH(in2, in5, vec1_m, vec0_m);                                \
+    ILVRL_H2_SH(in6, in1, vec3_m, vec2_m);                                \
+                                                                          \
+    VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m,    \
+                              cnst1_m, cnst2_m, cnst3_m, in5, in2,        \
+                              in6, in1);                                  \
+    BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5);                \
+    out7 = -s0_m;                                                         \
+    out0 = s1_m;                                                          \
+                                                                          \
+    SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5,                                    \
+                 cnst0_m, cnst1_m, cnst2_m, cnst3_m);                     \
+                                                                          \
+    ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m);    \
+    cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m);                            \
+    cnst1_m = cnst0_m;                                                    \
+                                                                          \
+    ILVRL_H2_SH(in4, in3, vec1_m, vec0_m);                                \
+    ILVRL_H2_SH(in6, in1, vec3_m, vec2_m);                                \
+    VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m,    \
+                              cnst2_m, cnst3_m, cnst1_m, out1, out6,      \
+                              s0_m, s1_m);                                \
+                                                                          \
+    SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m);                       \
+    cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m);                            \
+                                                                          \
+    ILVRL_H2_SH(in2, in5, vec1_m, vec0_m);                                \
+    ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m);                              \
+    out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m);            \
+    out4 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m);            \
+    out2 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m);            \
+    out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m);            \
+                                                                          \
+    out1 = -out1;                                                         \
+    out3 = -out3;                                                         \
+    out5 = -out5;                                                         \
+}
+
+#define VP9_MADD_SHORT(m0, m1, c0, c1, res0, res1)                        \
+{                                                                         \
+    v4i32 madd0_m, madd1_m, madd2_m, madd3_m;                             \
+    v8i16 madd_s0_m, madd_s1_m;                                           \
+                                                                          \
+    ILVRL_H2_SH(m1, m0, madd_s0_m, madd_s1_m);                            \
+    DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s0_m, madd_s1_m,               \
+                c0, c0, c1, c1, madd0_m, madd1_m, madd2_m, madd3_m);      \
+    SRARI_W4_SW(madd0_m, madd1_m, madd2_m, madd3_m, VP9_DCT_CONST_BITS);  \
+    PCKEV_H2_SH(madd1_m, madd0_m, madd3_m, madd2_m, res0, res1);          \
+}
+
+#define VP9_MADD_BF(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3,       \
+                    out0, out1, out2, out3)                               \
+{                                                                         \
+    v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m;                     \
+    v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m, m4_m, m5_m;                     \
+                                                                          \
+    ILVRL_H2_SH(inp1, inp0, madd_s0_m, madd_s1_m);                        \
+    ILVRL_H2_SH(inp3, inp2, madd_s2_m, madd_s3_m);                        \
+    DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m,               \
+                cst0, cst0, cst2, cst2, tmp0_m, tmp1_m, tmp2_m, tmp3_m);  \
+    BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m,                           \
+                m4_m, m5_m, tmp3_m, tmp2_m);                              \
+    SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, VP9_DCT_CONST_BITS);          \
+    PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1);                  \
+    DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m,               \
+                cst1, cst1, cst3, cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m);  \
+    BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m,                           \
+                m4_m, m5_m, tmp3_m, tmp2_m);                              \
+    SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, VP9_DCT_CONST_BITS);          \
+    PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3);                  \
+}
+
+#define VP9_SET_COSPI_PAIR(c0_h, c1_h)   \
+( {                                      \
+    v8i16 out0_m, r0_m, r1_m;            \
+                                         \
+    r0_m = __msa_fill_h(c0_h);           \
+    r1_m = __msa_fill_h(c1_h);           \
+    out0_m = __msa_ilvev_h(r1_m, r0_m);  \
+                                         \
+    out0_m;                              \
+} )
+
+#define VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3)  \
+{                                                                 \
+    uint8_t *dst_m = (uint8_t *) (dst);                           \
+    v16u8 dst0_m, dst1_m, dst2_m, dst3_m;                         \
+    v16i8 tmp0_m, tmp1_m;                                         \
+    v16i8 zero_m = { 0 };                                         \
+    v8i16 res0_m, res1_m, res2_m, res3_m;                         \
+                                                                  \
+    LD_UB4(dst_m, dst_stride, dst0_m, dst1_m, dst2_m, dst3_m);    \
+    ILVR_B4_SH(zero_m, dst0_m, zero_m, dst1_m, zero_m, dst2_m,    \
+               zero_m, dst3_m, res0_m, res1_m, res2_m, res3_m);   \
+    ADD4(res0_m, in0, res1_m, in1, res2_m, in2, res3_m, in3,      \
+         res0_m, res1_m, res2_m, res3_m);                         \
+    CLIP_SH4_0_255(res0_m, res1_m, res2_m, res3_m);               \
+    PCKEV_B2_SB(res1_m, res0_m, res3_m, res2_m, tmp0_m, tmp1_m);  \
+    ST8x4_UB(tmp0_m, tmp1_m, dst_m, dst_stride);                  \
+}
+
+#define VP9_IDCT4x4(in0, in1, in2, in3, out0, out1, out2, out3)       \
+{                                                                     \
+    v8i16 c0_m, c1_m, c2_m, c3_m;                                     \
+    v8i16 step0_m, step1_m;                                           \
+    v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                             \
+                                                                      \
+    c0_m = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);              \
+    c1_m = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);             \
+    step0_m = __msa_ilvr_h(in2, in0);                                 \
+    DOTP_SH2_SW(step0_m, step0_m, c0_m, c1_m, tmp0_m, tmp1_m);        \
+                                                                      \
+    c2_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);              \
+    c3_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);               \
+    step1_m = __msa_ilvr_h(in3, in1);                                 \
+    DOTP_SH2_SW(step1_m, step1_m, c2_m, c3_m, tmp2_m, tmp3_m);        \
+    SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, VP9_DCT_CONST_BITS);  \
+                                                                      \
+    PCKEV_H2_SW(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp0_m, tmp2_m);      \
+    SLDI_B2_0_SW(tmp0_m, tmp2_m, tmp1_m, tmp3_m, 8);                  \
+    BUTTERFLY_4((v8i16) tmp0_m, (v8i16) tmp1_m,                       \
+                (v8i16) tmp2_m, (v8i16) tmp3_m,                       \
+                out0, out1, out2, out3);                              \
+}
+
+#define VP9_IADST4x4(in0, in1, in2, in3, out0, out1, out2, out3)      \
+{                                                                     \
+    v8i16 res0_m, res1_m, c0_m, c1_m;                                 \
+    v8i16 k1_m, k2_m, k3_m, k4_m;                                     \
+    v8i16 zero_m = { 0 };                                             \
+    v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                             \
+    v4i32 int0_m, int1_m, int2_m, int3_m;                             \
+    v8i16 mask_m = { sinpi_1_9, sinpi_2_9, sinpi_3_9,                 \
+        sinpi_4_9, -sinpi_1_9, -sinpi_2_9, -sinpi_3_9,                \
+        -sinpi_4_9 };                                                 \
+                                                                      \
+    SPLATI_H4_SH(mask_m, 3, 0, 1, 2, c0_m, c1_m, k1_m, k2_m);         \
+    ILVEV_H2_SH(c0_m, c1_m, k1_m, k2_m, c0_m, c1_m);                  \
+    ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m);                   \
+    DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp2_m, tmp1_m);          \
+    int0_m = tmp2_m + tmp1_m;                                         \
+                                                                      \
+    SPLATI_H2_SH(mask_m, 4, 7, k4_m, k3_m);                           \
+    ILVEV_H2_SH(k4_m, k1_m, k3_m, k2_m, c0_m, c1_m);                  \
+    DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m);          \
+    int1_m = tmp0_m + tmp1_m;                                         \
+                                                                      \
+    c0_m = __msa_splati_h(mask_m, 6);                                 \
+    ILVL_H2_SH(k2_m, c0_m, zero_m, k2_m, c0_m, c1_m);                 \
+    ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m);                   \
+    DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m);          \
+    int2_m = tmp0_m + tmp1_m;                                         \
+                                                                      \
+    c0_m = __msa_splati_h(mask_m, 6);                                 \
+    c0_m = __msa_ilvev_h(c0_m, k1_m);                                 \
+                                                                      \
+    res0_m = __msa_ilvr_h((in1), (in3));                              \
+    tmp0_m = __msa_dotp_s_w(res0_m, c0_m);                            \
+    int3_m = tmp2_m + tmp0_m;                                         \
+                                                                      \
+    res0_m = __msa_ilvr_h((in2), (in3));                              \
+    c1_m = __msa_ilvev_h(k4_m, k3_m);                                 \
+                                                                      \
+    tmp2_m = __msa_dotp_s_w(res0_m, c1_m);                            \
+    res1_m = __msa_ilvr_h((in0), (in2));                              \
+    c1_m = __msa_ilvev_h(k1_m, zero_m);                               \
+                                                                      \
+    tmp3_m = __msa_dotp_s_w(res1_m, c1_m);                            \
+    int3_m += tmp2_m;                                                 \
+    int3_m += tmp3_m;                                                 \
+                                                                      \
+    SRARI_W4_SW(int0_m, int1_m, int2_m, int3_m, VP9_DCT_CONST_BITS);  \
+    PCKEV_H2_SH(int0_m, int0_m, int1_m, int1_m, out0, out1);          \
+    PCKEV_H2_SH(int2_m, int2_m, int3_m, int3_m, out2, out3);          \
+}
+
+#define TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,          \
+                           out0, out1, out2, out3, out4, out5, out6, out7)  \
+{                                                                           \
+    v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                   \
+    v8i16 tmp0_n, tmp1_n, tmp2_n, tmp3_n;                                   \
+    v8i16 zero_m = { 0 };                                                   \
+                                                                            \
+    ILVR_H4_SH(in1, in0, in3, in2, in5, in4, in7, in6,                      \
+               tmp0_n, tmp1_n, tmp2_n, tmp3_n);                             \
+    ILVRL_W2_SH(tmp1_n, tmp0_n, tmp0_m, tmp2_m);                            \
+    ILVRL_W2_SH(tmp3_n, tmp2_n, tmp1_m, tmp3_m);                            \
+                                                                            \
+    out0 = (v8i16) __msa_ilvr_d((v2i64) tmp1_m, (v2i64) tmp0_m);            \
+    out1 = (v8i16) __msa_ilvl_d((v2i64) tmp1_m, (v2i64) tmp0_m);            \
+    out2 = (v8i16) __msa_ilvr_d((v2i64) tmp3_m, (v2i64) tmp2_m);            \
+    out3 = (v8i16) __msa_ilvl_d((v2i64) tmp3_m, (v2i64) tmp2_m);            \
+                                                                            \
+    out4 = zero_m;                                                          \
+    out5 = zero_m;                                                          \
+    out6 = zero_m;                                                          \
+    out7 = zero_m;                                                          \
+}
+
+static void vp9_idct4x4_1_add_msa(int16_t *input, uint8_t *dst,
+                                  int32_t dst_stride)
+{
+    int16_t out;
+    v8i16 vec;
+
+    out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), VP9_DCT_CONST_BITS);
+    out = ROUND_POWER_OF_TWO((out * cospi_16_64), VP9_DCT_CONST_BITS);
+    out = ROUND_POWER_OF_TWO(out, 4);
+    vec = __msa_fill_h(out);
+
+    ADDBLK_ST4x4_UB(vec, vec, vec, vec, dst, dst_stride);
+}
+
+static void vp9_idct4x4_colcol_addblk_msa(int16_t *input, uint8_t *dst,
+                                          int32_t dst_stride)
+{
+    v8i16 in0, in1, in2, in3;
+
+    /* load vector elements of 4x4 block */
+    LD4x4_SH(input, in0, in1, in2, in3);
+    /* rows */
+    VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+    /* columns */
+    TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+    VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+    /* rounding (add 2^3, divide by 2^4) */
+    SRARI_H4_SH(in0, in1, in2, in3, 4);
+    ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
+}
+
+static void vp9_iadst4x4_colcol_addblk_msa(int16_t *input, uint8_t *dst,
+                                           int32_t dst_stride)
+{
+    v8i16 in0, in1, in2, in3;
+
+    /* load vector elements of 4x4 block */
+    LD4x4_SH(input, in0, in1, in2, in3);
+    /* rows */
+    VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+    /* columns */
+    TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+    VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+    /* rounding (add 2^3, divide by 2^4) */
+    SRARI_H4_SH(in0, in1, in2, in3, 4);
+    ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
+}
+
+static void vp9_iadst_idct_4x4_add_msa(int16_t *input, uint8_t *dst,
+                                       int32_t dst_stride, int32_t eob)
+{
+    v8i16 in0, in1, in2, in3;
+
+    /* load vector elements of 4x4 block */
+    LD4x4_SH(input, in0, in1, in2, in3);
+    /* cols */
+    VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+    /* columns */
+    TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+    VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+    /* rounding (add 2^3, divide by 2^4) */
+    SRARI_H4_SH(in0, in1, in2, in3, 4);
+    ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
+}
+
+static void vp9_idct_iadst_4x4_add_msa(int16_t *input, uint8_t *dst,
+                                       int32_t dst_stride, int32_t eob)
+{
+    v8i16 in0, in1, in2, in3;
+
+    /* load vector elements of 4x4 block */
+    LD4x4_SH(input, in0, in1, in2, in3);
+    /* cols */
+    VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+    /* columns */
+    TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+    VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+    /* rounding (add 2^3, divide by 2^4) */
+    SRARI_H4_SH(in0, in1, in2, in3, 4);
+    ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
+}
+
+#define VP9_SET_CONST_PAIR(mask_h, idx1_h, idx2_h)     \
+( {                                                    \
+    v8i16 c0_m, c1_m;                                  \
+                                                       \
+    SPLATI_H2_SH(mask_h, idx1_h, idx2_h, c0_m, c1_m);  \
+    c0_m = __msa_ilvev_h(c1_m, c0_m);                  \
+                                                       \
+    c0_m;                                              \
+} )
+
+/* multiply and add macro */
+#define VP9_MADD(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3,          \
+                 out0, out1, out2, out3)                                  \
+{                                                                         \
+    v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m;                     \
+    v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                 \
+                                                                          \
+    ILVRL_H2_SH(inp1, inp0, madd_s1_m, madd_s0_m);                        \
+    ILVRL_H2_SH(inp3, inp2, madd_s3_m, madd_s2_m);                        \
+    DOTP_SH4_SW(madd_s1_m, madd_s0_m, madd_s1_m, madd_s0_m,               \
+                cst0, cst0, cst1, cst1, tmp0_m, tmp1_m, tmp2_m, tmp3_m);  \
+    SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, VP9_DCT_CONST_BITS);      \
+    PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out1);              \
+    DOTP_SH4_SW(madd_s3_m, madd_s2_m, madd_s3_m, madd_s2_m,               \
+                cst2, cst2, cst3, cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m);  \
+    SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, VP9_DCT_CONST_BITS);      \
+    PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out2, out3);              \
+}
+
+/* idct 8x8 macro */
+#define VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,                 \
+                       out0, out1, out2, out3, out4, out5, out6, out7)         \
+{                                                                              \
+    v8i16 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m;              \
+    v8i16 k0_m, k1_m, k2_m, k3_m, res0_m, res1_m, res2_m, res3_m;              \
+    v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                      \
+    v8i16 mask_m = { cospi_28_64, cospi_4_64, cospi_20_64, cospi_12_64,        \
+       cospi_16_64, -cospi_4_64, -cospi_20_64, -cospi_16_64 };                 \
+                                                                               \
+    k0_m = VP9_SET_CONST_PAIR(mask_m, 0, 5);                                   \
+    k1_m = VP9_SET_CONST_PAIR(mask_m, 1, 0);                                   \
+    k2_m = VP9_SET_CONST_PAIR(mask_m, 6, 3);                                   \
+    k3_m = VP9_SET_CONST_PAIR(mask_m, 3, 2);                                   \
+    VP9_MADD(in1, in7, in3, in5, k0_m, k1_m, k2_m, k3_m, in1, in7, in3, in5);  \
+    SUB2(in1, in3, in7, in5, res0_m, res1_m);                                  \
+    k0_m = VP9_SET_CONST_PAIR(mask_m, 4, 7);                                   \
+    k1_m = __msa_splati_h(mask_m, 4);                                          \
+                                                                               \
+    ILVRL_H2_SH(res0_m, res1_m, res2_m, res3_m);                               \
+    DOTP_SH4_SW(res2_m, res3_m, res2_m, res3_m, k0_m, k0_m, k1_m, k1_m,        \
+                tmp0_m, tmp1_m, tmp2_m, tmp3_m);                               \
+    SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, VP9_DCT_CONST_BITS);           \
+    tp4_m = in1 + in3;                                                         \
+    PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tp5_m, tp6_m);                 \
+    tp7_m = in7 + in5;                                                         \
+    k2_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);                       \
+    k3_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);                        \
+    VP9_MADD(in0, in4, in2, in6, k1_m, k0_m, k2_m, k3_m,                       \
+             in0, in4, in2, in6);                                              \
+    BUTTERFLY_4(in0, in4, in2, in6, tp0_m, tp1_m, tp2_m, tp3_m);               \
+    BUTTERFLY_8(tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m,        \
+                out0, out1, out2, out3, out4, out5, out6, out7);               \
+}
+
+#define VP9_IADST8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,              \
+                        out0, out1, out2, out3, out4, out5, out6, out7)      \
+{                                                                            \
+    v4i32 r0_m, r1_m, r2_m, r3_m, r4_m, r5_m, r6_m, r7_m;                    \
+    v4i32 m0_m, m1_m, m2_m, m3_m, t0_m, t1_m;                                \
+    v8i16 res0_m, res1_m, res2_m, res3_m, k0_m, k1_m, in_s0, in_s1;          \
+    v8i16 mask1_m = { cospi_2_64, cospi_30_64, -cospi_2_64,                  \
+        cospi_10_64, cospi_22_64, -cospi_10_64, cospi_18_64, cospi_14_64 };  \
+    v8i16 mask2_m = { cospi_14_64, -cospi_18_64, cospi_26_64,                \
+        cospi_6_64, -cospi_26_64, cospi_8_64, cospi_24_64, -cospi_8_64 };    \
+    v8i16 mask3_m = { -cospi_24_64, cospi_8_64, cospi_16_64,                 \
+        -cospi_16_64, 0, 0, 0, 0 };                                          \
+                                                                             \
+    k0_m = VP9_SET_CONST_PAIR(mask1_m, 0, 1);                                \
+    k1_m = VP9_SET_CONST_PAIR(mask1_m, 1, 2);                                \
+    ILVRL_H2_SH(in1, in0, in_s1, in_s0);                                     \
+    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+                r0_m, r1_m, r2_m, r3_m);                                     \
+    k0_m = VP9_SET_CONST_PAIR(mask1_m, 6, 7);                                \
+    k1_m = VP9_SET_CONST_PAIR(mask2_m, 0, 1);                                \
+    ILVRL_H2_SH(in5, in4, in_s1, in_s0);                                     \
+    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+                r4_m, r5_m, r6_m, r7_m);                                     \
+    ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m,                     \
+         m0_m, m1_m, m2_m, m3_m);                                            \
+    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, VP9_DCT_CONST_BITS);                 \
+    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res0_m, res1_m);                     \
+    SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m,                     \
+         m0_m, m1_m, m2_m, m3_m);                                            \
+    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, VP9_DCT_CONST_BITS);                 \
+    PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, t0_m, t1_m);                         \
+    k0_m = VP9_SET_CONST_PAIR(mask1_m, 3, 4);                                \
+    k1_m = VP9_SET_CONST_PAIR(mask1_m, 4, 5);                                \
+    ILVRL_H2_SH(in3, in2, in_s1, in_s0);                                     \
+    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+                r0_m, r1_m, r2_m, r3_m);                                     \
+    k0_m = VP9_SET_CONST_PAIR(mask2_m, 2, 3);                                \
+    k1_m = VP9_SET_CONST_PAIR(mask2_m, 3, 4);                                \
+    ILVRL_H2_SH(in7, in6, in_s1, in_s0);                                     \
+    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+                r4_m, r5_m, r6_m, r7_m);                                     \
+    ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m,                     \
+         m0_m, m1_m, m2_m, m3_m);                                            \
+    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, VP9_DCT_CONST_BITS);                 \
+    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res2_m, res3_m);                     \
+    SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m,                     \
+         m0_m, m1_m, m2_m, m3_m);                                            \
+    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, VP9_DCT_CONST_BITS);                 \
+    PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, r2_m, r3_m);                         \
+    ILVRL_H2_SW(r3_m, r2_m, m2_m, m3_m);                                     \
+    BUTTERFLY_4(res0_m, res1_m, res3_m, res2_m, out0, in7, in4, in3);        \
+    k0_m = VP9_SET_CONST_PAIR(mask2_m, 5, 6);                                \
+    k1_m = VP9_SET_CONST_PAIR(mask2_m, 6, 7);                                \
+    ILVRL_H2_SH(t1_m, t0_m, in_s1, in_s0);                                   \
+    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+                r0_m, r1_m, r2_m, r3_m);                                     \
+    k1_m = VP9_SET_CONST_PAIR(mask3_m, 0, 1);                                \
+    DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m,              \
+                r4_m, r5_m, r6_m, r7_m);                                     \
+    ADD4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m,                     \
+         m0_m, m1_m, m2_m, m3_m);                                            \
+    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, VP9_DCT_CONST_BITS);                 \
+    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in1, out6);                          \
+    SUB4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m,                     \
+         m0_m, m1_m, m2_m, m3_m);                                            \
+    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, VP9_DCT_CONST_BITS);                 \
+    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in2, in5);                           \
+    k0_m = VP9_SET_CONST_PAIR(mask3_m, 2, 2);                                \
+    k1_m = VP9_SET_CONST_PAIR(mask3_m, 2, 3);                                \
+    ILVRL_H2_SH(in4, in3, in_s1, in_s0);                                     \
+    DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m,          \
+                m0_m, m1_m, m2_m, m3_m);                                     \
+    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, VP9_DCT_CONST_BITS);                 \
+    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in3, out4);                          \
+    ILVRL_H2_SW(in5, in2, m2_m, m3_m);                                       \
+    DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m,              \
+                m0_m, m1_m, m2_m, m3_m);                                     \
+    SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, VP9_DCT_CONST_BITS);                 \
+    PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, out2, in5);                          \
+                                                                             \
+    out1 = -in1;                                                             \
+    out3 = -in3;                                                             \
+    out5 = -in5;                                                             \
+    out7 = -in7;                                                             \
+}
+
+static void vp9_idct8x8_1_add_msa(int16_t *input, uint8_t *dst,
+                                  int32_t dst_stride)
+{
+    int16_t out;
+    int32_t val;
+    v8i16 vec;
+
+    out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), VP9_DCT_CONST_BITS);
+    out = ROUND_POWER_OF_TWO((out * cospi_16_64), VP9_DCT_CONST_BITS);
+    val = ROUND_POWER_OF_TWO(out, 5);
+    vec = __msa_fill_h(val);
+
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
+    dst += (4 * dst_stride);
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
+}
+
+static void vp9_idct8x8_12_colcol_addblk_msa(int16_t *input, uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3;
+    v4i32 tmp0, tmp1, tmp2, tmp3;
+    v8i16 zero = { 0 };
+
+    /* load vector elements of 8x8 block */
+    LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+    ILVR_D2_SH(in1, in0, in3, in2, in0, in1);
+    ILVR_D2_SH(in5, in4, in7, in6, in2, in3);
+    //TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+
+    /* stage1 */
+    ILVL_H2_SH(in3, in0, in2, in1, s0, s1);
+    k0 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
+    k1 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
+    k2 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
+    k3 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
+    DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
+    SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, VP9_DCT_CONST_BITS);
+    PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
+    PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3);
+    BUTTERFLY_4(s0, s1, s3, s2, s4, s7, s6, s5);
+
+    /* stage2 */
+    ILVR_H2_SH(in3, in1, in2, in0, s1, s0);
+    k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
+    k1 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
+    k2 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
+    k3 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
+    DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
+    SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, VP9_DCT_CONST_BITS);
+    PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
+    PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3);
+    BUTTERFLY_4(s0, s1, s2, s3, m0, m1, m2, m3);
+
+    /* stage3 */
+    s0 = __msa_ilvr_h(s6, s5);
+
+    k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
+    DOTP_SH2_SW(s0, s0, k1, k0, tmp0, tmp1);
+    SRARI_W2_SW(tmp0, tmp1, VP9_DCT_CONST_BITS);
+    PCKEV_H2_SH(zero, tmp0, zero, tmp1, s2, s3);
+
+    /* stage4 */
+    BUTTERFLY_8(m0, m1, m2, m3, s4, s2, s3, s7,
+                in0, in1, in2, in3, in4, in5, in6, in7);
+    TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in0, in1, in2, in3, in4, in5, in6, in7);
+    VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                   in0, in1, in2, in3, in4, in5, in6, in7);
+
+    /* final rounding (add 2^4, divide by 2^5) and shift */
+    SRARI_H4_SH(in0, in1, in2, in3, 5);
+    SRARI_H4_SH(in4, in5, in6, in7, 5);
+
+    /* add block and store 8x8 */
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+    dst += (4 * dst_stride);
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
+
+static void vp9_idct8x8_colcol_addblk_msa(int16_t *input, uint8_t *dst,
+                                          int32_t dst_stride)
+{
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+    /* load vector elements of 8x8 block */
+    LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+    /* 1D idct8x8 */
+    VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                   in0, in1, in2, in3, in4, in5, in6, in7);
+    /* columns transform */
+    TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in0, in1, in2, in3, in4, in5, in6, in7);
+    /* 1D idct8x8 */
+    VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                   in0, in1, in2, in3, in4, in5, in6, in7);
+    /* final rounding (add 2^4, divide by 2^5) and shift */
+    SRARI_H4_SH(in0, in1, in2, in3, 5);
+    SRARI_H4_SH(in4, in5, in6, in7, 5);
+    /* add block and store 8x8 */
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+    dst += (4 * dst_stride);
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
+
+static void vp9_iadst8x8_colcol_addblk_msa(int16_t *input, uint8_t *dst,
+                                           int32_t dst_stride)
+{
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
+    v8i16 cnst0, cnst1, cnst2, cnst3, cnst4;
+    v8i16 temp0, temp1, temp2, temp3, s0, s1;
+    v16i8 zero = { 0 };
+
+    /* load vector elements of 8x8 block */
+    LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+
+    /* 1D adst8x8 */
+    VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+              in0, in1, in2, in3, in4, in5, in6, in7);
+
+    /* columns transform */
+    TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in0, in1, in2, in3, in4, in5, in6, in7);
+
+    cnst0 = __msa_fill_h(cospi_2_64);
+    cnst1 = __msa_fill_h(cospi_30_64);
+    cnst2 = -cnst0;
+    ILVEV_H2_SH(cnst0, cnst1, cnst1, cnst2, cnst0, cnst1);
+    cnst2 = __msa_fill_h(cospi_18_64);
+    cnst3 = __msa_fill_h(cospi_14_64);
+    cnst4 = -cnst2;
+    ILVEV_H2_SH(cnst2, cnst3, cnst3, cnst4, cnst2, cnst3);
+
+    ILVRL_H2_SH(in0, in7, temp1, temp0);
+    ILVRL_H2_SH(in4, in3, temp3, temp2);
+    VP9_DOT_ADD_SUB_SRARI_PCK(temp0, temp1, temp2, temp3, cnst0, cnst1, cnst2,
+                              cnst3, in7, in0, in4, in3);
+
+    cnst0 = __msa_fill_h(cospi_10_64);
+    cnst1 = __msa_fill_h(cospi_22_64);
+    cnst2 = -cnst0;
+    ILVEV_H2_SH(cnst0, cnst1, cnst1, cnst2, cnst0, cnst1);
+    cnst2 = __msa_fill_h(cospi_26_64);
+    cnst3 = __msa_fill_h(cospi_6_64);
+    cnst4 = -cnst2;
+    ILVEV_H2_SH(cnst2, cnst3, cnst3, cnst4, cnst2, cnst3);
+
+    ILVRL_H2_SH(in2, in5, temp1, temp0);
+    ILVRL_H2_SH(in6, in1, temp3, temp2);
+    VP9_DOT_ADD_SUB_SRARI_PCK(temp0, temp1, temp2, temp3, cnst0, cnst1, cnst2,
+                              cnst3, in5, in2, in6, in1);
+    BUTTERFLY_4(in7, in0, in2, in5, s1, s0, in2, in5);
+    out7 = -s0;
+    out0 = s1;
+    SRARI_H2_SH(out0, out7, 5);
+    dst0 = LD_UB(dst + 0 * dst_stride);
+    dst7 = LD_UB(dst + 7 * dst_stride);
+
+    res0 = (v8i16) __msa_ilvr_b(zero, (v16i8) dst0);
+    res0 += out0;
+    res0 = CLIP_SH_0_255(res0);
+    res0 = (v8i16) __msa_pckev_b((v16i8) res0, (v16i8) res0);
+    ST8x1_UB(res0, dst);
+
+    res7 = (v8i16) __msa_ilvr_b(zero, (v16i8) dst7);
+    res7 += out7;
+    res7 = CLIP_SH_0_255(res7);
+    res7 = (v8i16) __msa_pckev_b((v16i8) res7, (v16i8) res7);
+    ST8x1_UB(res7, dst + 7 * dst_stride);
+
+    cnst1 = __msa_fill_h(cospi_24_64);
+    cnst0 = __msa_fill_h(cospi_8_64);
+    cnst3 = -cnst1;
+    cnst2 = -cnst0;
+
+    ILVEV_H2_SH(cnst3, cnst0, cnst1, cnst2, cnst3, cnst2);
+    cnst0 = __msa_ilvev_h(cnst1, cnst0);
+    cnst1 = cnst0;
+
+    ILVRL_H2_SH(in4, in3, temp1, temp0);
+    ILVRL_H2_SH(in6, in1, temp3, temp2);
+    VP9_DOT_ADD_SUB_SRARI_PCK(temp0, temp1, temp2, temp3, cnst0, cnst2, cnst3,
+                              cnst1, out1, out6, s0, s1);
+    out1 = -out1;
+    SRARI_H2_SH(out1, out6, 5);
+    dst1 = LD_UB(dst + 1 * dst_stride);
+    dst6 = LD_UB(dst + 6 * dst_stride);
+    ILVR_B2_SH(zero, dst1, zero, dst6, res1, res6);
+    ADD2(res1, out1, res6, out6, res1, res6);
+    CLIP_SH2_0_255(res1, res6);
+    PCKEV_B2_SH(res1, res1, res6, res6, res1, res6);
+    ST8x1_UB(res1, dst + dst_stride);
+    ST8x1_UB(res6, dst + 6 * dst_stride);
+
+    cnst0 = __msa_fill_h(cospi_16_64);
+    cnst1 = -cnst0;
+    cnst1 = __msa_ilvev_h(cnst1, cnst0);
+
+    ILVRL_H2_SH(in2, in5, temp1, temp0);
+    ILVRL_H2_SH(s0, s1, temp3, temp2);
+    out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(temp0, temp1, cnst0);
+    out4 = VP9_DOT_SHIFT_RIGHT_PCK_H(temp0, temp1, cnst1);
+    out3 = -out3;
+    SRARI_H2_SH(out3, out4, 5);
+    dst3 = LD_UB(dst + 3 * dst_stride);
+    dst4 = LD_UB(dst + 4 * dst_stride);
+    ILVR_B2_SH(zero, dst3, zero, dst4, res3, res4);
+    ADD2(res3, out3, res4, out4, res3, res4);
+    CLIP_SH2_0_255(res3, res4);
+    PCKEV_B2_SH(res3, res3, res4, res4, res3, res4);
+    ST8x1_UB(res3, dst + 3 * dst_stride);
+    ST8x1_UB(res4, dst + 4 * dst_stride);
+
+    out2 = VP9_DOT_SHIFT_RIGHT_PCK_H(temp2, temp3, cnst0);
+    out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(temp2, temp3, cnst1);
+    out5 = -out5;
+    SRARI_H2_SH(out2, out5, 5);
+    dst2 = LD_UB(dst + 2 * dst_stride);
+    dst5 = LD_UB(dst + 5 * dst_stride);
+    ILVR_B2_SH(zero, dst2, zero, dst5, res2, res5);
+    ADD2(res2, out2, res5, out5, res2, res5);
+    CLIP_SH2_0_255(res2, res5);
+    PCKEV_B2_SH(res2, res2, res5, res5, res2, res5);
+    ST8x1_UB(res2, dst + 2 * dst_stride);
+    ST8x1_UB(res5, dst + 5 * dst_stride);
+}
+
+static void vp9_iadst_idct_8x8_add_msa(int16_t *input, uint8_t *dst,
+                                       int32_t dst_stride, int32_t eob)
+{
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+    /* load vector elements of 8x8 block */
+    LD_SH8(input, 8, in1, in6, in3, in4, in5, in2, in7, in0);
+    /* 1D idct8x8 */
+    VP9_IADST8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                    in0, in1, in2, in3, in4, in5, in6, in7);
+    /* columns transform */
+    TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in0, in1, in2, in3, in4, in5, in6, in7);
+    /* 1D idct8x8 */
+    VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                   in0, in1, in2, in3, in4, in5, in6, in7);
+    /* final rounding (add 2^4, divide by 2^5) and shift */
+    SRARI_H4_SH(in0, in1, in2, in3, 5);
+    SRARI_H4_SH(in4, in5, in6, in7, 5);
+    /* add block and store 8x8 */
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+    dst += (4 * dst_stride);
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
+
+static void vp9_idct_iadst_8x8_add_msa(int16_t *input, uint8_t *dst,
+                                       int32_t dst_stride, int32_t eob)
+{
+    v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+    /* load vector elements of 8x8 block */
+    LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+
+    /* 1D idct8x8 */
+    VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                   in0, in1, in2, in3, in4, in5, in6, in7);
+    /* columns transform */
+    TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+                       in1, in6, in3, in4, in5, in2, in7, in0);
+    /* 1D idct8x8 */
+    VP9_IADST8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+                    in0, in1, in2, in3, in4, in5, in6, in7);
+    /* final rounding (add 2^4, divide by 2^5) and shift */
+    SRARI_H4_SH(in0, in1, in2, in3, 5);
+    SRARI_H4_SH(in4, in5, in6, in7, 5);
+    /* add block and store 8x8 */
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+    dst += (4 * dst_stride);
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
+
+#define VP9_IADST8x16_1D(r0, r1, r2, r3, r4, r5, r6, r7, r8,          \
+                         r9, r10, r11, r12, r13, r14, r15,            \
+                         out0, out1, out2, out3, out4, out5,          \
+                         out6, out7, out8, out9, out10, out11,        \
+                         out12, out13, out14, out15)                  \
+{                                                                     \
+    v8i16 g0_m, g1_m, g2_m, g3_m, g4_m, g5_m, g6_m, g7_m;             \
+    v8i16 g8_m, g9_m, g10_m, g11_m, g12_m, g13_m, g14_m, g15_m;       \
+    v8i16 h0_m, h1_m, h2_m, h3_m, h4_m, h5_m, h6_m, h7_m;             \
+    v8i16 h8_m, h9_m, h10_m, h11_m;                                   \
+    v8i16 k0_m, k1_m, k2_m, k3_m;                                     \
+                                                                      \
+    /* stage 1 */                                                     \
+    k0_m = VP9_SET_COSPI_PAIR(cospi_1_64, cospi_31_64);               \
+    k1_m = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64);              \
+    k2_m = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64);              \
+    k3_m = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64);             \
+    VP9_MADD_BF(r15, r0, r7, r8, k0_m, k1_m, k2_m, k3_m,              \
+                g0_m, g1_m, g2_m, g3_m);                              \
+    k0_m = VP9_SET_COSPI_PAIR(cospi_5_64, cospi_27_64);               \
+    k1_m = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64);              \
+    k2_m = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64);              \
+    k3_m = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64);             \
+    VP9_MADD_BF(r13, r2, r5, r10, k0_m, k1_m, k2_m, k3_m,             \
+                g4_m, g5_m, g6_m, g7_m);                              \
+    k0_m = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64);               \
+    k1_m = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64);              \
+    k2_m = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64);               \
+    k3_m = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64);              \
+    VP9_MADD_BF(r11, r4, r3, r12, k0_m, k1_m, k2_m, k3_m,             \
+                g8_m, g9_m, g10_m, g11_m);                            \
+    k0_m = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64);              \
+    k1_m = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64);             \
+    k2_m = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64);               \
+    k3_m = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64);              \
+    VP9_MADD_BF(r9, r6, r1, r14, k0_m, k1_m, k2_m, k3_m,              \
+                g12_m, g13_m, g14_m, g15_m);                          \
+                                                                      \
+    /* stage 2 */                                                     \
+    k0_m = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);               \
+    k1_m = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);              \
+    k2_m = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64);              \
+    VP9_MADD_BF(g1_m, g3_m, g9_m, g11_m, k0_m, k1_m, k2_m, k0_m,      \
+                h0_m, h1_m, h2_m, h3_m);                              \
+    k0_m = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);              \
+    k1_m = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);             \
+    k2_m = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64);             \
+    VP9_MADD_BF(g7_m, g5_m, g15_m, g13_m, k0_m, k1_m, k2_m, k0_m,     \
+                h4_m, h5_m, h6_m, h7_m);                              \
+    BUTTERFLY_4(h0_m, h2_m, h6_m, h4_m, out8, out9, out11, out10);    \
+    BUTTERFLY_8(g0_m, g2_m, g4_m, g6_m, g14_m, g12_m, g10_m, g8_m,    \
+                h8_m, h9_m, h10_m, h11_m, h6_m, h4_m, h2_m, h0_m);    \
+                                                                      \
+    /* stage 3 */                                                     \
+    BUTTERFLY_4(h8_m, h9_m, h11_m, h10_m, out0, out1, h11_m, h10_m);  \
+    k0_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);               \
+    k1_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);              \
+    k2_m = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64);              \
+    VP9_MADD_BF(h0_m, h2_m, h4_m, h6_m, k0_m, k1_m, k2_m, k0_m,       \
+                out4, out6, out5, out7);                              \
+    VP9_MADD_BF(h1_m, h3_m, h5_m, h7_m, k0_m, k1_m, k2_m, k0_m,       \
+                out12, out14, out13, out15);                          \
+                                                                      \
+    /* stage 4 */                                                     \
+    k0_m = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);              \
+    k1_m = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64);            \
+    k2_m = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);             \
+    k3_m = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);             \
+    VP9_MADD_SHORT(h10_m, h11_m, k1_m, k2_m, out2, out3);             \
+    VP9_MADD_SHORT(out6, out7, k0_m, k3_m, out6, out7);               \
+    VP9_MADD_SHORT(out10, out11, k0_m, k3_m, out10, out11);           \
+    VP9_MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15);           \
+}
+
+static void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    v8i16 loc0, loc1, loc2, loc3;
+    v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14;
+    v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15;
+    v8i16 tmp5, tmp6, tmp7;
+
+    /* load up 8x8 */
+    LD_SH8(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+    input += 8 * 16;
+    /* load bottom 8x8 */
+    LD_SH8(input, 16, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
+
+    VP9_DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14);
+    VP9_DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6);
+    BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2);
+    VP9_DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3);
+    VP9_DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8);
+    VP9_DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12);
+    BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14);
+
+    reg0 = reg2 - loc1;
+    reg2 = reg2 + loc1;
+    reg12 = reg14 - loc0;
+    reg14 = reg14 + loc0;
+    reg4 = reg6 - loc3;
+    reg6 = reg6 + loc3;
+    reg8 = reg10 - loc2;
+    reg10 = reg10 + loc2;
+
+    /* stage 2 */
+    VP9_DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15);
+    VP9_DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3);
+
+    reg9 = reg1 - loc2;
+    reg1 = reg1 + loc2;
+    reg7 = reg15 - loc3;
+    reg15 = reg15 + loc3;
+
+    VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11);
+    VP9_DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1);
+    BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5);
+
+    loc1 = reg15 + reg3;
+    reg3 = reg15 - reg3;
+    loc2 = reg2 + loc1;
+    reg15 = reg2 - loc1;
+
+    loc1 = reg1 + reg13;
+    reg13 = reg1 - reg13;
+    loc0 = reg0 + loc1;
+    loc1 = reg0 - loc1;
+    tmp6 = loc0;
+    tmp7 = loc1;
+    reg0 = loc2;
+
+    VP9_DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9);
+    VP9_DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5,
+                        reg11);
+
+    loc0 = reg9 + reg5;
+    reg5 = reg9 - reg5;
+    reg2 = reg6 + loc0;
+    reg1 = reg6 - loc0;
+
+    loc0 = reg7 + reg11;
+    reg11 = reg7 - reg11;
+    loc1 = reg4 + loc0;
+    loc2 = reg4 - loc0;
+    tmp5 = loc1;
+
+    VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11);
+    BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1);
+
+    reg10 = loc0;
+    reg11 = loc1;
+
+    VP9_DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13);
+    BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5);
+    reg13 = loc2;
+
+    /* Transpose and store the output */
+    reg12 = tmp5;
+    reg14 = tmp6;
+    reg3 = tmp7;
+
+    SRARI_H4_SH(reg0, reg2, reg4, reg6, 6);
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg0, reg2, reg4, reg6);
+    dst += (4 * dst_stride);
+    SRARI_H4_SH(reg8, reg10, reg12, reg14, 6);
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg8, reg10, reg12, reg14);
+    dst += (4 * dst_stride);
+    SRARI_H4_SH(reg3, reg13, reg11, reg5, 6);
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg3, reg13, reg11, reg5);
+    dst += (4 * dst_stride);
+    SRARI_H4_SH(reg7, reg9, reg1, reg15, 6);
+    VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg7, reg9, reg1, reg15);
+}
+
+static void vp9_idct16_1d_columns_msa(int16_t *input, int16_t *output)
+{
+    v8i16 loc0, loc1, loc2, loc3;
+    v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14;
+    v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15;
+    v8i16 tmp5, tmp6, tmp7;
+
+    /* load up 8x8 */
+    LD_SH8(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+    input += 8 * 16;
+    /* load bottom 8x8 */
+    LD_SH8(input, 16, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
+
+    VP9_DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14);
+    VP9_DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6);
+    BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2);
+    VP9_DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3);
+    VP9_DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8);
+    VP9_DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12);
+    BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14);
+
+    reg0 = reg2 - loc1;
+    reg2 = reg2 + loc1;
+    reg12 = reg14 - loc0;
+    reg14 = reg14 + loc0;
+    reg4 = reg6 - loc3;
+    reg6 = reg6 + loc3;
+    reg8 = reg10 - loc2;
+    reg10 = reg10 + loc2;
+
+    /* stage 2 */
+    VP9_DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15);
+    VP9_DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3);
+
+    reg9 = reg1 - loc2;
+    reg1 = reg1 + loc2;
+    reg7 = reg15 - loc3;
+    reg15 = reg15 + loc3;
+
+    VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11);
+    VP9_DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1);
+    BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5);
+
+    loc1 = reg15 + reg3;
+    reg3 = reg15 - reg3;
+    loc2 = reg2 + loc1;
+    reg15 = reg2 - loc1;
+
+    loc1 = reg1 + reg13;
+    reg13 = reg1 - reg13;
+    loc0 = reg0 + loc1;
+    loc1 = reg0 - loc1;
+    tmp6 = loc0;
+    tmp7 = loc1;
+    reg0 = loc2;
+
+    VP9_DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9);
+    VP9_DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5,
+                        reg11);
+
+    loc0 = reg9 + reg5;
+    reg5 = reg9 - reg5;
+    reg2 = reg6 + loc0;
+    reg1 = reg6 - loc0;
+
+    loc0 = reg7 + reg11;
+    reg11 = reg7 - reg11;
+    loc1 = reg4 + loc0;
+    loc2 = reg4 - loc0;
+
+    tmp5 = loc1;
+
+    VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11);
+    BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1);
+
+    reg10 = loc0;
+    reg11 = loc1;
+
+    VP9_DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13);
+    BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5);
+    reg13 = loc2;
+
+    /* Transpose and store the output */
+    reg12 = tmp5;
+    reg14 = tmp6;
+    reg3 = tmp7;
+
+    /* transpose block */
+    TRANSPOSE8x8_SH_SH(reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14,
+                       reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14);
+    ST_SH4(reg0, reg2, reg4, reg6, output, 16);
+    ST_SH4(reg8, reg10, reg12, reg14, (output + 4 * 16), 16);
+
+    /* transpose block */
+    TRANSPOSE8x8_SH_SH(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15,
+                       reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15);
+    ST_SH4(reg3, reg13, reg11, reg5, (output + 8), 16);
+    ST_SH4(reg7, reg9, reg1, reg15, (output + 8 + 4 * 16), 16);
+}
+
+static void vp9_idct16x16_1_add_msa(int16_t *input, uint8_t *dst,
+                                    int32_t dst_stride)
+{
+    uint8_t i;
+    int16_t out;
+    v8i16 vec, res0, res1, res2, res3, res4, res5, res6, res7;
+    v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3;
+
+    out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), VP9_DCT_CONST_BITS);
+    out = ROUND_POWER_OF_TWO((out * cospi_16_64), VP9_DCT_CONST_BITS);
+    out = ROUND_POWER_OF_TWO(out, 6);
+
+    vec = __msa_fill_h(out);
+
+    for (i = 4; i--;)
+    {
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        UNPCK_UB_SH(dst0, res0, res4);
+        UNPCK_UB_SH(dst1, res1, res5);
+        UNPCK_UB_SH(dst2, res2, res6);
+        UNPCK_UB_SH(dst3, res3, res7);
+        ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2,
+             res3);
+        ADD4(res4, vec, res5, vec, res6, vec, res7, vec, res4, res5, res6,
+             res7);
+        CLIP_SH4_0_255(res0, res1, res2, res3);
+        CLIP_SH4_0_255(res4, res5, res6, res7);
+        PCKEV_B4_UB(res4, res0, res5, res1, res6, res2, res7, res3,
+                    tmp0, tmp1, tmp2, tmp3);
+        ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void vp9_idct16x16_10_colcol_addblk_msa(int16_t *input, uint8_t *dst,
+                                               int32_t dst_stride)
+{
+    int32_t i;
+    int16_t out_arr[16 * 16] ALLOC_ALIGNED(ALIGNMENT);
+    int16_t *out = out_arr;
+
+    /* transform rows */
+    vp9_idct16_1d_columns_msa(input, out);
+
+    /* short case just considers top 4 rows as valid output */
+    out += 4 * 16;
+    for (i = 12; i--;) {
+        __asm__ volatile (
+            "sw     $zero,   0(%[out])     \n\t"
+            "sw     $zero,   4(%[out])     \n\t"
+            "sw     $zero,   8(%[out])     \n\t"
+            "sw     $zero,  12(%[out])     \n\t"
+            "sw     $zero,  16(%[out])     \n\t"
+            "sw     $zero,  20(%[out])     \n\t"
+            "sw     $zero,  24(%[out])     \n\t"
+            "sw     $zero,  28(%[out])     \n\t"
+
+            :
+            : [out] "r" (out)
+        );
+
+        out += 16;
+    }
+
+    out = out_arr;
+
+    /* transform columns */
+    for (i = 0; i < 2; i++) {
+        /* process 8 * 16 block */
+        vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),
+                                         dst_stride);
+    }
+}
+
+static void vp9_idct16x16_colcol_addblk_msa(int16_t *input, uint8_t *dst,
+                                            int32_t dst_stride)
+{
+    int32_t i;
+    int16_t out_arr[16 * 16] ALLOC_ALIGNED(ALIGNMENT);
+    int16_t *out = out_arr;
+
+    /* transform rows */
+    for (i = 0; i < 2; i++) {
+        /* process 8 * 16 block */
+        vp9_idct16_1d_columns_msa((input + (i << 3)), (out + (i << 7)));
+    }
+
+    /* transform columns */
+    for (i = 0; i < 2; i++) {
+        /* process 8 * 16 block */
+        vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),
+                                         dst_stride);
+    }
+}
+
+static void vp9_iadst16_1d_columns_msa(int16_t *input, int16_t *output)
+{
+    v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+    v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
+
+    /* load input data */
+    LD_SH16(input, 16,
+            l0, l1, l2, l3, l4, l5, l6, l7,
+            l8, l9, l10, l11, l12, l13, l14, l15);
+
+    /* ADST in horizontal */
+    VP9_IADST8x16_1D(l0, l1, l2, l3, l4, l5, l6, l7,
+                     l8, l9, l10, l11, l12, l13, l14, l15,
+                     r0, r1, r2, r3, r4, r5, r6, r7,
+                     r8, r9, r10, r11, r12, r13, r14, r15);
+
+    l1 = -r8;
+    l3 = -r4;
+    l13 = -r13;
+    l15 = -r1;
+
+    TRANSPOSE8x8_SH_SH(r0, l1, r12, l3, r6, r14, r10, r2,
+                       l0, l1, l2, l3, l4, l5, l6, l7);
+    ST_SH8(l0, l1, l2, l3, l4, l5, l6, l7, output, 16);
+    TRANSPOSE8x8_SH_SH(r3, r11, r15, r7, r5, l13, r9, l15,
+                       l8, l9, l10, l11, l12, l13, l14, l15);
+    ST_SH8(l8, l9, l10, l11, l12, l13, l14, l15, (output + 8), 16);
+}
+
+static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+                                              int32_t dst_stride)
+{
+    v8i16 v0, v2, v4, v6, k0, k1, k2, k3;
+    v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+    v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
+    v8i16 out8, out9, out10, out11, out12, out13, out14, out15;
+    v8i16 g0, g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, g11, g12, g13, g14, g15;
+    v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
+    v8i16 res8, res9, res10, res11, res12, res13, res14, res15;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
+    v16i8 zero = { 0 };
+
+    r0 = LD_SH(input + 0 * 16);
+    r3 = LD_SH(input + 3 * 16);
+    r4 = LD_SH(input + 4 * 16);
+    r7 = LD_SH(input + 7 * 16);
+    r8 = LD_SH(input + 8 * 16);
+    r11 = LD_SH(input + 11 * 16);
+    r12 = LD_SH(input + 12 * 16);
+    r15 = LD_SH(input + 15 * 16);
+
+    /* stage 1 */
+    k0 = VP9_SET_COSPI_PAIR(cospi_1_64, cospi_31_64);
+    k1 = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64);
+    k2 = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64);
+    k3 = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64);
+    VP9_MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3);
+    k0 = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64);
+    k1 = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64);
+    k2 = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64);
+    k3 = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64);
+    VP9_MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11);
+    BUTTERFLY_4(g0, g2, g10, g8, h8, h9, v2, v0);
+    k0 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
+    k1 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
+    k2 = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64);
+    VP9_MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3);
+
+    r1 = LD_SH(input + 1 * 16);
+    r2 = LD_SH(input + 2 * 16);
+    r5 = LD_SH(input + 5 * 16);
+    r6 = LD_SH(input + 6 * 16);
+    r9 = LD_SH(input + 9 * 16);
+    r10 = LD_SH(input + 10 * 16);
+    r13 = LD_SH(input + 13 * 16);
+    r14 = LD_SH(input + 14 * 16);
+
+    k0 = VP9_SET_COSPI_PAIR(cospi_5_64, cospi_27_64);
+    k1 = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64);
+    k2 = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64);
+    k3 = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64);
+    VP9_MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, g4, g5, g6, g7);
+    k0 = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64);
+    k1 = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64);
+    k2 = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64);
+    k3 = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64);
+    VP9_MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g12, g13, g14, g15);
+    BUTTERFLY_4(g4, g6, g14, g12, h10, h11, v6, v4);
+    BUTTERFLY_4(h8, h9, h11, h10, out0, out1, h11, h10);
+    out1 = -out1;
+    SRARI_H2_SH(out0, out1, 6);
+    dst0 = LD_UB(dst + 0 * dst_stride);
+    dst1 = LD_UB(dst + 15 * dst_stride);
+    ILVR_B2_SH(zero, dst0, zero, dst1, res0, res1);
+    ADD2(res0, out0, res1, out1, res0, res1);
+    CLIP_SH2_0_255(res0, res1);
+    PCKEV_B2_SH(res0, res0, res1, res1, res0, res1);
+    ST8x1_UB(res0, dst);
+    ST8x1_UB(res1, dst + 15 * dst_stride);
+
+    k0 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
+    k1 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
+    k2 = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64);
+    VP9_MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7);
+    BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10);
+    out8 = -out8;
+
+    SRARI_H2_SH(out8, out9, 6);
+    dst8 = LD_UB(dst + 1 * dst_stride);
+    dst9 = LD_UB(dst + 14 * dst_stride);
+    ILVR_B2_SH(zero, dst8, zero, dst9, res8, res9);
+    ADD2(res8, out8, res9, out9, res8, res9);
+    CLIP_SH2_0_255(res8, res9);
+    PCKEV_B2_SH(res8, res8, res9, res9, res8, res9);
+    ST8x1_UB(res8, dst + dst_stride);
+    ST8x1_UB(res9, dst + 14 * dst_stride);
+
+    k0 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
+    k1 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
+    k2 = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64);
+    VP9_MADD_BF(v0, v2, v4, v6, k0, k1, k2, k0, out4, out6, out5, out7);
+    out4 = -out4;
+    SRARI_H2_SH(out4, out5, 6);
+    dst4 = LD_UB(dst + 3 * dst_stride);
+    dst5 = LD_UB(dst + 12 * dst_stride);
+    ILVR_B2_SH(zero, dst4, zero, dst5, res4, res5);
+    ADD2(res4, out4, res5, out5, res4, res5);
+    CLIP_SH2_0_255(res4, res5);
+    PCKEV_B2_SH(res4, res4, res5, res5, res4, res5);
+    ST8x1_UB(res4, dst + 3 * dst_stride);
+    ST8x1_UB(res5, dst + 12 * dst_stride);
+
+    VP9_MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15);
+    out13 = -out13;
+    SRARI_H2_SH(out12, out13, 6);
+    dst12 = LD_UB(dst + 2 * dst_stride);
+    dst13 = LD_UB(dst + 13 * dst_stride);
+    ILVR_B2_SH(zero, dst12, zero, dst13, res12, res13);
+    ADD2(res12, out12, res13, out13, res12, res13);
+    CLIP_SH2_0_255(res12, res13);
+    PCKEV_B2_SH(res12, res12, res13, res13, res12, res13);
+    ST8x1_UB(res12, dst + 2 * dst_stride);
+    ST8x1_UB(res13, dst + 13 * dst_stride);
+
+    k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
+    k3 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
+    VP9_MADD_SHORT(out6, out7, k0, k3, out6, out7);
+    SRARI_H2_SH(out6, out7, 6);
+    dst6 = LD_UB(dst + 4 * dst_stride);
+    dst7 = LD_UB(dst + 11 * dst_stride);
+    ILVR_B2_SH(zero, dst6, zero, dst7, res6, res7);
+    ADD2(res6, out6, res7, out7, res6, res7);
+    CLIP_SH2_0_255(res6, res7);
+    PCKEV_B2_SH(res6, res6, res7, res7, res6, res7);
+    ST8x1_UB(res6, dst + 4 * dst_stride);
+    ST8x1_UB(res7, dst + 11 * dst_stride);
+
+    VP9_MADD_SHORT(out10, out11, k0, k3, out10, out11);
+    SRARI_H2_SH(out10, out11, 6);
+    dst10 = LD_UB(dst + 6 * dst_stride);
+    dst11 = LD_UB(dst + 9 * dst_stride);
+    ILVR_B2_SH(zero, dst10, zero, dst11, res10, res11);
+    ADD2(res10, out10, res11, out11, res10, res11);
+    CLIP_SH2_0_255(res10, res11);
+    PCKEV_B2_SH(res10, res10, res11, res11, res10, res11);
+    ST8x1_UB(res10, dst + 6 * dst_stride);
+    ST8x1_UB(res11, dst + 9 * dst_stride);
+
+    k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64);
+    k2 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
+    VP9_MADD_SHORT(h10, h11, k1, k2, out2, out3);
+    SRARI_H2_SH(out2, out3, 6);
+    dst2 = LD_UB(dst + 7 * dst_stride);
+    dst3 = LD_UB(dst + 8 * dst_stride);
+    ILVR_B2_SH(zero, dst2, zero, dst3, res2, res3);
+    ADD2(res2, out2, res3, out3, res2, res3);
+    CLIP_SH2_0_255(res2, res3);
+    PCKEV_B2_SH(res2, res2, res3, res3, res2, res3);
+    ST8x1_UB(res2, dst + 7 * dst_stride);
+    ST8x1_UB(res3, dst + 8 * dst_stride);
+
+    VP9_MADD_SHORT(out14, out15, k1, k2, out14, out15);
+    SRARI_H2_SH(out14, out15, 6);
+    dst14 = LD_UB(dst + 5 * dst_stride);
+    dst15 = LD_UB(dst + 10 * dst_stride);
+    ILVR_B2_SH(zero, dst14, zero, dst15, res14, res15);
+    ADD2(res14, out14, res15, out15, res14, res15);
+    CLIP_SH2_0_255(res14, res15);
+    PCKEV_B2_SH(res14, res14, res15, res15, res14, res15);
+    ST8x1_UB(res14, dst + 5 * dst_stride);
+    ST8x1_UB(res15, dst + 10 * dst_stride);
+}
+
+static void vp9_iadst16x16_colcol_addblk_msa(int16_t *input, uint8_t *dst,
+                                             int32_t dst_stride)
+{
+    int16_t out_arr[16 * 16] ALLOC_ALIGNED(ALIGNMENT);
+    int16_t *out = out_arr;
+    int32_t i;
+
+    /* transform rows */
+    for (i = 0; i < 2; i++) {
+        /* process 16 * 8 block */
+        vp9_iadst16_1d_columns_msa((input + (i << 3)), (out + (i << 7)));
+    }
+
+    /* transform columns */
+    for (i = 0; i < 2; i++) {
+        /* process 8 * 16 block */
+        vp9_iadst16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),
+                                          dst_stride);
+    }
+}
+
+static void vp9_iadst_idct_16x16_add_msa(int16_t *input, uint8_t *dst,
+                                         int32_t dst_stride, int32_t eob)
+{
+    int32_t i;
+    int16_t out[16 * 16];
+    int16_t *out_ptr = &out[0];
+
+    /* transform rows */
+    for (i = 0; i < 2; i++) {
+        /* process 8 * 16 block */
+        vp9_iadst16_1d_columns_msa((input + (i << 3)), (out_ptr + (i << 7)));
+    }
+
+    /* transform columns */
+    for (i = 0; i < 2; i++) {
+        /* process 8 * 16 block */
+        vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)),
+                                         (dst + (i << 3)), dst_stride);
+    }
+}
+
+static void vp9_idct_iadst_16x16_add_msa(int16_t *input, uint8_t *dst,
+                                         int32_t dst_stride, int32_t eob)
+{
+    int32_t i;
+    int16_t out[16 * 16];
+    int16_t *out_ptr = &out[0];
+
+    /* transform rows */
+    for (i = 0; i < 2; i++) {
+        /* process 8 * 16 block */
+        vp9_idct16_1d_columns_msa((input + (i << 3)), (out_ptr + (i << 7)));
+    }
+
+    /* transform columns */
+    for (i = 0; i < 2; i++) {
+        /* process 8 * 16 block */
+        vp9_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
+                                          (dst + (i << 3)), dst_stride);
+    }
+}
+
+static void vp9_idct_butterfly_transpose_store(int16_t *tmp_buf,
+                                               int16_t *tmp_eve_buf,
+                                               int16_t *tmp_odd_buf,
+                                               int16_t *dst)
+{
+    v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
+    v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;
+
+    /* FINAL BUTTERFLY : Dependency on Even & Odd */
+    vec0 = LD_SH(tmp_odd_buf);
+    vec1 = LD_SH(tmp_odd_buf + 9 * 8);
+    vec2 = LD_SH(tmp_odd_buf + 14 * 8);
+    vec3 = LD_SH(tmp_odd_buf + 6 * 8);
+    loc0 = LD_SH(tmp_eve_buf);
+    loc1 = LD_SH(tmp_eve_buf + 8 * 8);
+    loc2 = LD_SH(tmp_eve_buf + 4 * 8);
+    loc3 = LD_SH(tmp_eve_buf + 12 * 8);
+
+    ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m0, m4, m2, m6);
+
+    ST_SH((loc0 - vec3), (tmp_buf + 31 * 8));
+    ST_SH((loc1 - vec2), (tmp_buf + 23 * 8));
+    ST_SH((loc2 - vec1), (tmp_buf + 27 * 8));
+    ST_SH((loc3 - vec0), (tmp_buf + 19 * 8));
+
+    /* Load 8 & Store 8 */
+    vec0 = LD_SH(tmp_odd_buf + 4 * 8);
+    vec1 = LD_SH(tmp_odd_buf + 13 * 8);
+    vec2 = LD_SH(tmp_odd_buf + 10 * 8);
+    vec3 = LD_SH(tmp_odd_buf + 3 * 8);
+    loc0 = LD_SH(tmp_eve_buf + 2 * 8);
+    loc1 = LD_SH(tmp_eve_buf + 10 * 8);
+    loc2 = LD_SH(tmp_eve_buf + 6 * 8);
+    loc3 = LD_SH(tmp_eve_buf + 14 * 8);
+
+    ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m1, m5, m3, m7);
+
+    ST_SH((loc0 - vec3), (tmp_buf + 29 * 8));
+    ST_SH((loc1 - vec2), (tmp_buf + 21 * 8));
+    ST_SH((loc2 - vec1), (tmp_buf + 25 * 8));
+    ST_SH((loc3 - vec0), (tmp_buf + 17 * 8));
+
+    /* Load 8 & Store 8 */
+    vec0 = LD_SH(tmp_odd_buf + 2 * 8);
+    vec1 = LD_SH(tmp_odd_buf + 11 * 8);
+    vec2 = LD_SH(tmp_odd_buf + 12 * 8);
+    vec3 = LD_SH(tmp_odd_buf + 7 * 8);
+    loc0 = LD_SH(tmp_eve_buf + 1 * 8);
+    loc1 = LD_SH(tmp_eve_buf + 9 * 8);
+    loc2 = LD_SH(tmp_eve_buf + 5 * 8);
+    loc3 = LD_SH(tmp_eve_buf + 13 * 8);
+
+    ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n0, n4, n2, n6);
+
+    ST_SH((loc0 - vec3), (tmp_buf + 30 * 8));
+    ST_SH((loc1 - vec2), (tmp_buf + 22 * 8));
+    ST_SH((loc2 - vec1), (tmp_buf + 26 * 8));
+    ST_SH((loc3 - vec0), (tmp_buf + 18 * 8));
+
+    /* Load 8 & Store 8 */
+    vec0 = LD_SH(tmp_odd_buf + 5 * 8);
+    vec1 = LD_SH(tmp_odd_buf + 15 * 8);
+    vec2 = LD_SH(tmp_odd_buf + 8 * 8);
+    vec3 = LD_SH(tmp_odd_buf + 1 * 8);
+    loc0 = LD_SH(tmp_eve_buf + 3 * 8);
+    loc1 = LD_SH(tmp_eve_buf + 11 * 8);
+    loc2 = LD_SH(tmp_eve_buf + 7 * 8);
+    loc3 = LD_SH(tmp_eve_buf + 15 * 8);
+
+    ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n1, n5, n3, n7);
+
+    ST_SH((loc0 - vec3), (tmp_buf + 28 * 8));
+    ST_SH((loc1 - vec2), (tmp_buf + 20 * 8));
+    ST_SH((loc2 - vec1), (tmp_buf + 24 * 8));
+    ST_SH((loc3 - vec0), (tmp_buf + 16 * 8));
+
+    /* Transpose : 16 vectors */
+    /* 1st & 2nd 8x8 */
+    TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3,
+                       m0, n0, m1, n1, m2, n2, m3, n3);
+    ST_SH4(m0, n0, m1, n1, (dst + 0), 32);
+    ST_SH4(m2, n2, m3, n3, (dst + 4 * 32), 32);
+
+    TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7,
+                       m4, n4, m5, n5, m6, n6, m7, n7);
+    ST_SH4(m4, n4, m5, n5, (dst + 8), 32);
+    ST_SH4(m6, n6, m7, n7, (dst + 8 + 4 * 32), 32);
+
+    /* 3rd & 4th 8x8 */
+    LD_SH8((tmp_buf + 8 * 16), 8, m0, n0, m1, n1, m2, n2, m3, n3);
+    LD_SH8((tmp_buf + 12 * 16), 8, m4, n4, m5, n5, m6, n6, m7, n7);
+    TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3,
+                       m0, n0, m1, n1, m2, n2, m3, n3);
+    ST_SH4(m0, n0, m1, n1, (dst + 16), 32);
+    ST_SH4(m2, n2, m3, n3, (dst + 16 + 4 * 32), 32);
+
+    TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7,
+                       m4, n4, m5, n5, m6, n6, m7, n7);
+    ST_SH4(m4, n4, m5, n5, (dst + 24), 32);
+    ST_SH4(m6, n6, m7, n7, (dst + 24 + 4 * 32), 32);
+}
+
+static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf,
+                                                   int16_t *tmp_eve_buf)
+{
+    v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
+    v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
+    v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7;
+
+    /* Even stage 1 */
+    LD_SH8(tmp_buf, (4 * 32), reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+    tmp_buf += (2 * 32);
+
+    VP9_DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7);
+    VP9_DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3);
+    BUTTERFLY_4(reg1, reg7, reg3, reg5, vec1, vec3, vec2, vec0);
+    VP9_DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+
+    loc1 = vec3;
+    loc0 = vec1;
+
+    VP9_DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4);
+    VP9_DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6);
+    BUTTERFLY_4(reg4, reg0, reg2, reg6, vec1, vec3, vec2, vec0);
+    BUTTERFLY_4(vec0, vec1, loc1, loc0, stp3, stp0, stp7, stp4);
+    BUTTERFLY_4(vec2, vec3, loc3, loc2, stp2, stp1, stp6, stp5);
+
+    /* Even stage 2 */
+    /* Load 8 */
+    LD_SH8(tmp_buf, (4 * 32), reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+
+    VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7);
+    VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3);
+    VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5);
+    VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1);
+
+    vec0 = reg0 + reg4;
+    reg0 = reg0 - reg4;
+    reg4 = reg6 + reg2;
+    reg6 = reg6 - reg2;
+    reg2 = reg1 + reg5;
+    reg1 = reg1 - reg5;
+    reg5 = reg7 + reg3;
+    reg7 = reg7 - reg3;
+    reg3 = vec0;
+
+    vec1 = reg2;
+    reg2 = reg3 + reg4;
+    reg3 = reg3 - reg4;
+    reg4 = reg5 - vec1;
+    reg5 = reg5 + vec1;
+
+    VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7);
+    VP9_DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1);
+
+    vec0 = reg0 - reg6;
+    reg0 = reg0 + reg6;
+    vec1 = reg7 - reg1;
+    reg7 = reg7 + reg1;
+
+    VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1);
+    VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4);
+
+    /* Even stage 3 : Dependency on Even stage 1 & Even stage 2 */
+    /* Store 8 */
+    BUTTERFLY_4(stp0, stp1, reg7, reg5, loc1, loc3, loc2, loc0);
+    ST_SH2(loc1, loc3, tmp_eve_buf, 8);
+    ST_SH2(loc2, loc0, (tmp_eve_buf + 14 * 8), 8);
+
+    BUTTERFLY_4(stp2, stp3, reg4, reg1, loc1, loc3, loc2, loc0);
+    ST_SH2(loc1, loc3, (tmp_eve_buf + 2 * 8), 8);
+    ST_SH2(loc2, loc0, (tmp_eve_buf + 12 * 8), 8);
+
+    /* Store 8 */
+    BUTTERFLY_4(stp4, stp5, reg6, reg3, loc1, loc3, loc2, loc0);
+    ST_SH2(loc1, loc3, (tmp_eve_buf + 4 * 8), 8);
+    ST_SH2(loc2, loc0, (tmp_eve_buf + 10 * 8), 8);
+
+    BUTTERFLY_4(stp6, stp7, reg2, reg0, loc1, loc3, loc2, loc0);
+    ST_SH2(loc1, loc3, (tmp_eve_buf + 6 * 8), 8);
+    ST_SH2(loc2, loc0, (tmp_eve_buf + 8 * 8), 8);
+}
+
+static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf,
+                                                  int16_t *tmp_odd_buf)
+{
+    v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
+    v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
+
+    /* Odd stage 1 */
+    reg0 = LD_SH(tmp_buf + 32);
+    reg1 = LD_SH(tmp_buf + 7 * 32);
+    reg2 = LD_SH(tmp_buf + 9 * 32);
+    reg3 = LD_SH(tmp_buf + 15 * 32);
+    reg4 = LD_SH(tmp_buf + 17 * 32);
+    reg5 = LD_SH(tmp_buf + 23 * 32);
+    reg6 = LD_SH(tmp_buf + 25 * 32);
+    reg7 = LD_SH(tmp_buf + 31 * 32);
+
+    VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7);
+    VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4);
+    VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5);
+    VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6);
+
+    vec0 = reg0 + reg3;
+    reg0 = reg0 - reg3;
+    reg3 = reg7 + reg4;
+    reg7 = reg7 - reg4;
+    reg4 = reg1 + reg2;
+    reg1 = reg1 - reg2;
+    reg2 = reg6 + reg5;
+    reg6 = reg6 - reg5;
+    reg5 = vec0;
+
+    /* 4 Stores */
+    ADD2(reg5, reg4, reg3, reg2, vec0, vec1);
+    ST_SH2(vec0, vec1, (tmp_odd_buf + 4 * 8), 8);
+    SUB2(reg5, reg4, reg3, reg2, vec0, vec1);
+    VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1);
+    ST_SH2(vec0, vec1, tmp_odd_buf, 8);
+
+    /* 4 Stores */
+    VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7);
+    VP9_DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6);
+    BUTTERFLY_4(reg0, reg7, reg6, reg1, vec0, vec1, vec2, vec3);
+    ST_SH2(vec0, vec1, (tmp_odd_buf + 6 * 8), 8);
+    VP9_DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3);
+    ST_SH2(vec2, vec3, (tmp_odd_buf + 2 * 8), 8);
+
+    /* Odd stage 2 */
+    /* 8 loads */
+    reg0 = LD_SH(tmp_buf + 3 * 32);
+    reg1 = LD_SH(tmp_buf + 5 * 32);
+    reg2 = LD_SH(tmp_buf + 11 * 32);
+    reg3 = LD_SH(tmp_buf + 13 * 32);
+    reg4 = LD_SH(tmp_buf + 19 * 32);
+    reg5 = LD_SH(tmp_buf + 21 * 32);
+    reg6 = LD_SH(tmp_buf + 27 * 32);
+    reg7 = LD_SH(tmp_buf + 29 * 32);
+
+    VP9_DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6);
+    VP9_DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5);
+    VP9_DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4);
+    VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7);
+
+    /* 4 Stores */
+    SUB4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4,
+         vec0, vec1, vec2, vec3);
+    VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1);
+    VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3);
+    BUTTERFLY_4(loc2, loc3, loc1, loc0, vec0, vec1, vec3, vec2);
+    ST_SH2(vec0, vec1, (tmp_odd_buf + 12 * 8), 3 * 8);
+    VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1);
+    ST_SH2(vec0, vec1, (tmp_odd_buf + 10 * 8), 8);
+
+    /* 4 Stores */
+    ADD4(reg0, reg3, reg1, reg2, reg5, reg6, reg4, reg7,
+         vec0, vec1, vec2, vec3);
+    BUTTERFLY_4(vec0, vec3, vec2, vec1, reg0, reg1, reg3, reg2);
+    ST_SH2(reg0, reg1, (tmp_odd_buf + 13 * 8), 8);
+    VP9_DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1);
+    ST_SH2(reg0, reg1, (tmp_odd_buf + 8 * 8), 8);
+
+    /* Odd stage 3 : Dependency on Odd stage 1 & Odd stage 2 */
+    /* Load 8 & Store 8 */
+    LD_SH4(tmp_odd_buf, 8, reg0, reg1, reg2, reg3);
+    LD_SH4((tmp_odd_buf + 8 * 8), 8, reg4, reg5, reg6, reg7);
+
+    ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7,
+         loc0, loc1, loc2, loc3);
+    ST_SH4(loc0, loc1, loc2, loc3, tmp_odd_buf, 8);
+
+    SUB2(reg0, reg4, reg1, reg5, vec0, vec1);
+    VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
+
+    SUB2(reg2, reg6, reg3, reg7, vec0, vec1);
+    VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+    ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 8 * 8), 8);
+
+    /* Load 8 & Store 8 */
+    LD_SH4((tmp_odd_buf + 4 * 8), 8, reg1, reg2, reg0, reg3);
+    LD_SH4((tmp_odd_buf + 12 * 8), 8, reg4, reg5, reg6, reg7);
+
+    ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7,
+         loc0, loc1, loc2, loc3);
+    ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 4 * 8), 8);
+
+    SUB2(reg0, reg4, reg3, reg7, vec0, vec1);
+    VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
+
+    SUB2(reg1, reg5, reg2, reg6, vec0, vec1);
+    VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+    ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8);
+}
+
+static void vp9_idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
+                                                 int16_t *tmp_odd_buf,
+                                                 uint8_t *dst,
+                                                 int32_t dst_stride)
+{
+    v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
+    v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;
+
+    /* FINAL BUTTERFLY : Dependency on Even & Odd */
+    vec0 = LD_SH(tmp_odd_buf);
+    vec1 = LD_SH(tmp_odd_buf + 9 * 8);
+    vec2 = LD_SH(tmp_odd_buf + 14 * 8);
+    vec3 = LD_SH(tmp_odd_buf + 6 * 8);
+    loc0 = LD_SH(tmp_eve_buf);
+    loc1 = LD_SH(tmp_eve_buf + 8 * 8);
+    loc2 = LD_SH(tmp_eve_buf + 4 * 8);
+    loc3 = LD_SH(tmp_eve_buf + 12 * 8);
+
+    ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m0, m4, m2, m6);
+    SRARI_H4_SH(m0, m2, m4, m6, 6);
+    VP9_ADDBLK_ST8x4_UB(dst, (4 * dst_stride), m0, m2, m4, m6);
+
+    SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m6, m2, m4, m0);
+    SRARI_H4_SH(m0, m2, m4, m6, 6);
+    VP9_ADDBLK_ST8x4_UB((dst + 19 * dst_stride), (4 * dst_stride),
+                        m0, m2, m4, m6);
+
+    /* Load 8 & Store 8 */
+    vec0 = LD_SH(tmp_odd_buf + 4 * 8);
+    vec1 = LD_SH(tmp_odd_buf + 13 * 8);
+    vec2 = LD_SH(tmp_odd_buf + 10 * 8);
+    vec3 = LD_SH(tmp_odd_buf + 3 * 8);
+    loc0 = LD_SH(tmp_eve_buf + 2 * 8);
+    loc1 = LD_SH(tmp_eve_buf + 10 * 8);
+    loc2 = LD_SH(tmp_eve_buf + 6 * 8);
+    loc3 = LD_SH(tmp_eve_buf + 14 * 8);
+
+    ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m1, m5, m3, m7);
+    SRARI_H4_SH(m1, m3, m5, m7, 6);
+    VP9_ADDBLK_ST8x4_UB((dst + 2 * dst_stride), (4 * dst_stride),
+                        m1, m3, m5, m7);
+
+    SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m7, m3, m5, m1);
+    SRARI_H4_SH(m1, m3, m5, m7, 6);
+    VP9_ADDBLK_ST8x4_UB((dst + 17 * dst_stride), (4 * dst_stride),
+                        m1, m3, m5, m7);
+
+    /* Load 8 & Store 8 */
+    vec0 = LD_SH(tmp_odd_buf + 2 * 8);
+    vec1 = LD_SH(tmp_odd_buf + 11 * 8);
+    vec2 = LD_SH(tmp_odd_buf + 12 * 8);
+    vec3 = LD_SH(tmp_odd_buf + 7 * 8);
+    loc0 = LD_SH(tmp_eve_buf + 1 * 8);
+    loc1 = LD_SH(tmp_eve_buf + 9 * 8);
+    loc2 = LD_SH(tmp_eve_buf + 5 * 8);
+    loc3 = LD_SH(tmp_eve_buf + 13 * 8);
+
+    ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n0, n4, n2, n6);
+    SRARI_H4_SH(n0, n2, n4, n6, 6);
+    VP9_ADDBLK_ST8x4_UB((dst + 1 * dst_stride), (4 * dst_stride),
+                        n0, n2, n4, n6);
+
+    SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n6, n2, n4, n0);
+    SRARI_H4_SH(n0, n2, n4, n6, 6);
+    VP9_ADDBLK_ST8x4_UB((dst + 18 * dst_stride), (4 * dst_stride),
+                        n0, n2, n4, n6);
+
+    /* Load 8 & Store 8 */
+    vec0 = LD_SH(tmp_odd_buf + 5 * 8);
+    vec1 = LD_SH(tmp_odd_buf + 15 * 8);
+    vec2 = LD_SH(tmp_odd_buf + 8 * 8);
+    vec3 = LD_SH(tmp_odd_buf + 1 * 8);
+    loc0 = LD_SH(tmp_eve_buf + 3 * 8);
+    loc1 = LD_SH(tmp_eve_buf + 11 * 8);
+    loc2 = LD_SH(tmp_eve_buf + 7 * 8);
+    loc3 = LD_SH(tmp_eve_buf + 15 * 8);
+
+    ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n1, n5, n3, n7);
+    SRARI_H4_SH(n1, n3, n5, n7, 6);
+    VP9_ADDBLK_ST8x4_UB((dst + 3 * dst_stride), (4 * dst_stride),
+                        n1, n3, n5, n7);
+
+    SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n7, n3, n5, n1);
+    SRARI_H4_SH(n1, n3, n5, n7, 6);
+    VP9_ADDBLK_ST8x4_UB((dst + 16 * dst_stride), (4 * dst_stride),
+                        n1, n3, n5, n7);
+}
+
+static void vp9_idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+                                               int32_t dst_stride)
+{
+    int16_t tmp_odd_buf[16 * 8] ALLOC_ALIGNED(ALIGNMENT);
+    int16_t tmp_eve_buf[16 * 8] ALLOC_ALIGNED(ALIGNMENT);
+
+    vp9_idct8x32_column_even_process_store(input, &tmp_eve_buf[0]);
+    vp9_idct8x32_column_odd_process_store(input, &tmp_odd_buf[0]);
+    vp9_idct8x32_column_butterfly_addblk(&tmp_eve_buf[0], &tmp_odd_buf[0],
+                                         dst, dst_stride);
+}
+
+static void vp9_idct8x32_1d_columns_msa(int16_t *input, int16_t *output,
+                                        int16_t *tmp_buf)
+{
+    int16_t tmp_odd_buf[16 * 8] ALLOC_ALIGNED(ALIGNMENT);
+    int16_t tmp_eve_buf[16 * 8] ALLOC_ALIGNED(ALIGNMENT);
+
+    vp9_idct8x32_column_even_process_store(input, &tmp_eve_buf[0]);
+    vp9_idct8x32_column_odd_process_store(input, &tmp_odd_buf[0]);
+    vp9_idct_butterfly_transpose_store(tmp_buf, &tmp_eve_buf[0],
+                                       &tmp_odd_buf[0], output);
+}
+
+static void vp9_idct32x32_1_add_msa(int16_t *input, uint8_t *dst,
+                                    int32_t dst_stride)
+{
+    int32_t i;
+    int16_t out;
+    v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3;
+    v8i16 res0, res1, res2, res3, res4, res5, res6, res7, vec;
+
+    out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), VP9_DCT_CONST_BITS);
+    out = ROUND_POWER_OF_TWO((out * cospi_16_64), VP9_DCT_CONST_BITS);
+    out = ROUND_POWER_OF_TWO(out, 6);
+
+    vec = __msa_fill_h(out);
+
+    for (i = 16; i--;)
+    {
+        LD_UB2(dst, 16, dst0, dst1);
+        LD_UB2(dst + dst_stride, 16, dst2, dst3);
+
+        UNPCK_UB_SH(dst0, res0, res4);
+        UNPCK_UB_SH(dst1, res1, res5);
+        UNPCK_UB_SH(dst2, res2, res6);
+        UNPCK_UB_SH(dst3, res3, res7);
+        ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2,
+             res3);
+        ADD4(res4, vec, res5, vec, res6, vec, res7, vec, res4, res5, res6,
+             res7);
+        CLIP_SH4_0_255(res0, res1, res2, res3);
+        CLIP_SH4_0_255(res4, res5, res6, res7);
+        PCKEV_B4_UB(res4, res0, res5, res1, res6, res2, res7, res3,
+                    tmp0, tmp1, tmp2, tmp3);
+
+        ST_UB2(tmp0, tmp1, dst, 16);
+        dst += dst_stride;
+        ST_UB2(tmp2, tmp3, dst, 16);
+        dst += dst_stride;
+    }
+}
+
+static void vp9_idct32x32_34_colcol_addblk_msa(int16_t *input, uint8_t *dst,
+                                               int32_t dst_stride)
+{
+    int32_t i;
+    int16_t out_arr[32 * 32] ALLOC_ALIGNED(ALIGNMENT);
+    int16_t *out_ptr = out_arr;
+    int16_t tmp_buf[8 * 32] ALLOC_ALIGNED(ALIGNMENT);
+
+    for (i = 32; i--;) {
+        __asm__ volatile (
+            "sw     $zero,       (%[out_ptr])     \n\t"
+            "sw     $zero,      4(%[out_ptr])     \n\t"
+            "sw     $zero,      8(%[out_ptr])     \n\t"
+            "sw     $zero,     12(%[out_ptr])     \n\t"
+            "sw     $zero,     16(%[out_ptr])     \n\t"
+            "sw     $zero,     20(%[out_ptr])     \n\t"
+            "sw     $zero,     24(%[out_ptr])     \n\t"
+            "sw     $zero,     28(%[out_ptr])     \n\t"
+            "sw     $zero,     32(%[out_ptr])     \n\t"
+            "sw     $zero,     36(%[out_ptr])     \n\t"
+            "sw     $zero,     40(%[out_ptr])     \n\t"
+            "sw     $zero,     44(%[out_ptr])     \n\t"
+            "sw     $zero,     48(%[out_ptr])     \n\t"
+            "sw     $zero,     52(%[out_ptr])     \n\t"
+            "sw     $zero,     56(%[out_ptr])     \n\t"
+            "sw     $zero,     60(%[out_ptr])     \n\t"
+
+            :
+            : [out_ptr] "r" (out_ptr)
+        );
+
+        out_ptr += 32;
+    }
+
+    out_ptr = out_arr;
+
+    /* process 8*32 block */
+    vp9_idct8x32_1d_columns_msa(input, out_ptr, &tmp_buf[0]);
+
+    /* transform columns */
+    for (i = 0; i < 4; i++) {
+        /* process 8*32 block */
+        vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)),
+                                           (dst + (i << 3)), dst_stride);
+    }
+}
+
+static void vp9_idct32x32_colcol_addblk_msa(int16_t *input, uint8_t *dst,
+                                            int32_t dst_stride)
+{
+    int32_t i;
+    int16_t out_arr[32 * 32] ALLOC_ALIGNED(ALIGNMENT);
+    int16_t *out_ptr = out_arr;
+    int16_t tmp_buf[8 * 32] ALLOC_ALIGNED(ALIGNMENT);
+
+    /* transform rows */
+    for (i = 0; i < 4; i++) {
+        /* process 8*32 block */
+        vp9_idct8x32_1d_columns_msa((input + (i << 3)), (out_ptr + (i << 8)),
+                                    &tmp_buf[0]);
+    }
+
+    /* transform columns */
+    for (i = 0; i < 4; i++) {
+        /* process 8*32 block */
+        vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)),
+                                           (dst + (i << 3)), dst_stride);
+    }
+}
+
+void ff_idct_idct_4x4_add_msa(uint8_t *dst, ptrdiff_t stride,
+                              int16_t *block, int eob)
+{
+    if (eob > 1) {
+        vp9_idct4x4_colcol_addblk_msa(block, dst, stride);
+        memset(block, 0, 4 * 4 * sizeof(*block));
+    }
+    else {
+        vp9_idct4x4_1_add_msa(block, dst, stride);
+        block[0] = 0;
+    }
+}
+
+void ff_idct_idct_8x8_add_msa(uint8_t *dst, ptrdiff_t stride,
+                              int16_t *block, int eob)
+{
+    if (eob == 1) {
+        vp9_idct8x8_1_add_msa(block, dst, stride);
+        block[0] = 0;
+    }
+    else if (eob <= 12) {
+        vp9_idct8x8_12_colcol_addblk_msa(block, dst, stride);
+        memset(block, 0, 4 * 8 * sizeof(*block));
+    }
+    else {
+        vp9_idct8x8_colcol_addblk_msa(block, dst, stride);
+        memset(block, 0, 8 * 8 * sizeof(*block));
+    }
+}
+
+void ff_idct_idct_16x16_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                int16_t *block, int eob)
+{
+    int i;
+
+    if (eob == 1) {
+        /* DC only DCT coefficient. */
+        vp9_idct16x16_1_add_msa(block, dst, stride);
+        block[0] = 0;
+    }
+    else if (eob <= 10) {
+        vp9_idct16x16_10_colcol_addblk_msa(block, dst, stride);
+        for (i = 0; i < 4; ++i) {
+            memset(block, 0, 4 * sizeof(*block));
+            block += 16;
+        }
+    }
+    else {
+        vp9_idct16x16_colcol_addblk_msa(block, dst, stride);
+        memset(block, 0, 16 * 16 * sizeof(*block));
+    }
+}
+
+void ff_idct_idct_32x32_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                int16_t *block, int eob)
+{
+    int i;
+
+    if (eob == 1) {
+        vp9_idct32x32_1_add_msa(block, dst, stride);
+        block[0] = 0;
+    }
+    else if (eob <= 34) {
+        vp9_idct32x32_34_colcol_addblk_msa(block, dst, stride);
+        for (i = 0; i < 8; ++i) {
+            memset(block, 0, 8 * sizeof(*block));
+            block += 32;
+        }
+    }
+    else {
+        vp9_idct32x32_colcol_addblk_msa(block, dst, stride);
+        memset(block, 0, 32 * 32 * sizeof(*block));
+    }
+}
+
+void ff_iadst_iadst_4x4_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                int16_t *block, int eob)
+{
+    vp9_iadst4x4_colcol_addblk_msa(block, dst, stride);
+    memset(block, 0, 4 * 4 * sizeof(*block));
+}
+
+void ff_iadst_iadst_8x8_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                int16_t *block, int eob)
+{
+    vp9_iadst8x8_colcol_addblk_msa(block, dst, stride);
+    memset(block, 0, 8 * 8 * sizeof(*block));
+}
+
+void ff_iadst_iadst_16x16_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                  int16_t *block, int eob)
+{
+    vp9_iadst16x16_colcol_addblk_msa(block, dst, stride);
+    memset(block, 0, 16 * 16 * sizeof(*block));
+}
+
+void ff_idct_iadst_4x4_add_msa(uint8_t *dst, ptrdiff_t stride,
+                               int16_t *block, int eob)
+{
+    vp9_idct_iadst_4x4_add_msa(block, dst, stride, eob);
+    memset(block, 0, 4 * 4 * sizeof(*block));
+}
+
+void ff_idct_iadst_8x8_add_msa(uint8_t *dst, ptrdiff_t stride,
+                               int16_t *block, int eob)
+{
+    vp9_idct_iadst_8x8_add_msa(block, dst, stride, eob);
+    memset(block, 0, 8 * 8 * sizeof(*block));
+}
+
+void ff_idct_iadst_16x16_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                 int16_t *block, int eob)
+{
+    vp9_idct_iadst_16x16_add_msa(block, dst, stride, eob);
+    memset(block, 0, 16 * 16 * sizeof(*block));
+}
+
+void ff_iadst_idct_4x4_add_msa(uint8_t *dst, ptrdiff_t stride,
+                               int16_t *block, int eob)
+{
+    vp9_iadst_idct_4x4_add_msa(block, dst, stride, eob);
+    memset(block, 0, 4 * 4 * sizeof(*block));
+}
+
+void ff_iadst_idct_8x8_add_msa(uint8_t *dst, ptrdiff_t stride,
+                               int16_t *block, int eob)
+{
+    vp9_iadst_idct_8x8_add_msa(block, dst, stride, eob);
+    memset(block, 0, 8 * 8 * sizeof(*block));
+}
+
+void ff_iadst_idct_16x16_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                 int16_t *block, int eob)
+{
+    vp9_iadst_idct_16x16_add_msa(block, dst, stride, eob);
+    memset(block, 0, 16 * 16 * sizeof(*block));
+}
diff --git a/libavcodec/mips/vp9_intra_msa.c b/libavcodec/mips/vp9_intra_msa.c
new file mode 100644
index 00000000..54cf0ae9
--- /dev/null
+++ b/libavcodec/mips/vp9_intra_msa.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/vp9dsp.h"
+#include "libavutil/mips/generic_macros_msa.h"
+#include "vp9dsp_mips.h"
+
+#define IPRED_SUBS_UH2_UH(in0, in1, out0, out1)  \
+{                                                \
+    out0 = __msa_subs_u_h(out0, in0);            \
+    out1 = __msa_subs_u_h(out1, in1);            \
+}
+
+void ff_vert_16x16_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *left,
+                       const uint8_t *src)
+{
+    uint32_t row;
+    v16u8 src0;
+
+    src0 = LD_UB(src);
+
+    for (row = 16; row--;) {
+        ST_UB(src0, dst);
+        dst += dst_stride;
+    }
+}
+
+void ff_vert_32x32_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *left,
+                       const uint8_t *src)
+{
+    uint32_t row;
+    v16u8 src1, src2;
+
+    src1 = LD_UB(src);
+    src2 = LD_UB(src + 16);
+
+    for (row = 32; row--;) {
+        ST_UB2(src1, src2, dst, 16);
+        dst += dst_stride;
+    }
+}
+
+void ff_hor_16x16_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src,
+                      const uint8_t *top)
+{
+    uint32_t row, inp;
+    v16u8 src0, src1, src2, src3;
+
+    src += 12;
+    for (row = 4; row--;) {
+        inp = LW(src);
+        src -= 4;
+
+        src0 = (v16u8) __msa_fill_b(inp >> 24);
+        src1 = (v16u8) __msa_fill_b(inp >> 16);
+        src2 = (v16u8) __msa_fill_b(inp >> 8);
+        src3 = (v16u8) __msa_fill_b(inp);
+
+        ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+void ff_hor_32x32_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src,
+                      const uint8_t *top)
+{
+    uint32_t row, inp;
+    v16u8 src0, src1, src2, src3;
+
+    src += 28;
+    for (row = 8; row--;) {
+        inp = LW(src);
+        src -= 4;
+
+        src0 = (v16u8) __msa_fill_b(inp >> 24);
+        src1 = (v16u8) __msa_fill_b(inp >> 16);
+        src2 = (v16u8) __msa_fill_b(inp >> 8);
+        src3 = (v16u8) __msa_fill_b(inp);
+
+        ST_UB2(src0, src0, dst, 16);
+        dst += dst_stride;
+        ST_UB2(src1, src1, dst, 16);
+        dst += dst_stride;
+        ST_UB2(src2, src2, dst, 16);
+        dst += dst_stride;
+        ST_UB2(src3, src3, dst, 16);
+        dst += dst_stride;
+    }
+}
+
+void ff_dc_4x4_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left,
+                   const uint8_t *src_top)
+{
+    uint32_t val0, val1;
+    v16i8 store, src = { 0 };
+    v8u16 sum_h;
+    v4u32 sum_w;
+    v2u64 sum_d;
+
+    val0 = LW(src_top);
+    val1 = LW(src_left);
+    INSERT_W2_SB(val0, val1, src);
+    sum_h = __msa_hadd_u_h((v16u8) src, (v16u8) src);
+    sum_w = __msa_hadd_u_w(sum_h, sum_h);
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);
+    sum_w = (v4u32) __msa_srari_w((v4i32) sum_d, 3);
+    store = __msa_splati_b((v16i8) sum_w, 0);
+    val0 = __msa_copy_u_w((v4i32) store, 0);
+
+    SW4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+#define INTRA_DC_TL_4x4(dir)                                    \
+void ff_dc_##dir##_4x4_msa(uint8_t *dst, ptrdiff_t dst_stride,  \
+                           const uint8_t *left,                 \
+                           const uint8_t *top)                  \
+{                                                               \
+    uint32_t val0;                                              \
+    v16i8 store, data = { 0 };                                  \
+    v8u16 sum_h;                                                \
+    v4u32 sum_w;                                                \
+                                                                \
+    val0 = LW(dir);                                             \
+    data = (v16i8) __msa_insert_w((v4i32) data, 0, val0);       \
+    sum_h = __msa_hadd_u_h((v16u8) data, (v16u8) data);         \
+    sum_w = __msa_hadd_u_w(sum_h, sum_h);                       \
+    sum_w = (v4u32) __msa_srari_w((v4i32) sum_w, 2);            \
+    store = __msa_splati_b((v16i8) sum_w, 0);                   \
+    val0 = __msa_copy_u_w((v4i32) store, 0);                    \
+                                                                \
+    SW4(val0, val0, val0, val0, dst, dst_stride);               \
+}
+INTRA_DC_TL_4x4(top);
+INTRA_DC_TL_4x4(left);
+
+void ff_dc_8x8_msa(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left,
+                   const uint8_t *src_top)
+{
+    uint64_t val0, val1;
+    v16i8 store;
+    v16u8 src = { 0 };
+    v8u16 sum_h;
+    v4u32 sum_w;
+    v2u64 sum_d;
+
+    val0 = LD(src_top);
+    val1 = LD(src_left);
+    INSERT_D2_UB(val0, val1, src);
+    sum_h = __msa_hadd_u_h(src, src);
+    sum_w = __msa_hadd_u_w(sum_h, sum_h);
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);
+    sum_w = (v4u32) __msa_pckev_w((v4i32) sum_d, (v4i32) sum_d);
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);
+    sum_w = (v4u32) __msa_srari_w((v4i32) sum_d, 4);
+    store = __msa_splati_b((v16i8) sum_w, 0);
+    val0 = __msa_copy_u_d((v2i64) store, 0);
+
+    SD4(val0, val0, val0, val0, dst, dst_stride);
+    dst += (4 * dst_stride);
+    SD4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+#define INTRA_DC_TL_8x8(dir)                                    \
+void ff_dc_##dir##_8x8_msa(uint8_t *dst, ptrdiff_t dst_stride,  \
+                           const uint8_t *left,                 \
+                           const uint8_t *top)                  \
+{                                                               \
+    uint64_t val0;                                              \
+    v16i8 store;                                                \
+    v16u8 data = { 0 };                                         \
+    v8u16 sum_h;                                                \
+    v4u32 sum_w;                                                \
+    v2u64 sum_d;                                                \
+                                                                \
+    val0 = LD(dir);                                             \
+    data = (v16u8) __msa_insert_d((v2i64) data, 0, val0);       \
+    sum_h = __msa_hadd_u_h(data, data);                         \
+    sum_w = __msa_hadd_u_w(sum_h, sum_h);                       \
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);                       \
+    sum_w = (v4u32) __msa_srari_w((v4i32) sum_d, 3);            \
+    store = __msa_splati_b((v16i8) sum_w, 0);                   \
+    val0 = __msa_copy_u_d((v2i64) store, 0);                    \
+                                                                \
+    SD4(val0, val0, val0, val0, dst, dst_stride);               \
+    dst += (4 * dst_stride);                                    \
+    SD4(val0, val0, val0, val0, dst, dst_stride);               \
+}
+
+INTRA_DC_TL_8x8(top);
+INTRA_DC_TL_8x8(left);
+
+void ff_dc_16x16_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                     const uint8_t *src_left, const uint8_t *src_top)
+{
+    v16u8 top, left, out;
+    v8u16 sum_h, sum_top, sum_left;
+    v4u32 sum_w;
+    v2u64 sum_d;
+
+    top = LD_UB(src_top);
+    left = LD_UB(src_left);
+    HADD_UB2_UH(top, left, sum_top, sum_left);
+    sum_h = sum_top + sum_left;
+    sum_w = __msa_hadd_u_w(sum_h, sum_h);
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);
+    sum_w = (v4u32) __msa_pckev_w((v4i32) sum_d, (v4i32) sum_d);
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);
+    sum_w = (v4u32) __msa_srari_w((v4i32) sum_d, 5);
+    out = (v16u8) __msa_splati_b((v16i8) sum_w, 0);
+
+    ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+    dst += (8 * dst_stride);
+    ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+#define INTRA_DC_TL_16x16(dir)                                        \
+void ff_dc_##dir##_16x16_msa(uint8_t *dst, ptrdiff_t dst_stride,      \
+                             const uint8_t *left,                     \
+                             const uint8_t *top)                      \
+{                                                                     \
+    v16u8 data, out;                                                  \
+    v8u16 sum_h;                                                      \
+    v4u32 sum_w;                                                      \
+    v2u64 sum_d;                                                      \
+                                                                      \
+    data = LD_UB(dir);                                                \
+    sum_h = __msa_hadd_u_h(data, data);                               \
+    sum_w = __msa_hadd_u_w(sum_h, sum_h);                             \
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);                             \
+    sum_w = (v4u32) __msa_pckev_w((v4i32) sum_d, (v4i32) sum_d);      \
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);                             \
+    sum_w = (v4u32) __msa_srari_w((v4i32) sum_d, 4);                  \
+    out = (v16u8) __msa_splati_b((v16i8) sum_w, 0);                   \
+                                                                      \
+    ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);  \
+    dst += (8 * dst_stride);                                          \
+    ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);  \
+}
+INTRA_DC_TL_16x16(top);
+INTRA_DC_TL_16x16(left);
+
+void ff_dc_32x32_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                     const uint8_t *src_left, const uint8_t *src_top)
+{
+    uint32_t row;
+    v16u8 top0, top1, left0, left1, out;
+    v8u16 sum_h, sum_top0, sum_top1, sum_left0, sum_left1;
+    v4u32 sum_w;
+    v2u64 sum_d;
+
+    LD_UB2(src_top, 16, top0, top1);
+    LD_UB2(src_left, 16, left0, left1);
+    HADD_UB2_UH(top0, top1, sum_top0, sum_top1);
+    HADD_UB2_UH(left0, left1, sum_left0, sum_left1);
+    sum_h = sum_top0 + sum_top1;
+    sum_h += sum_left0 + sum_left1;
+    sum_w = __msa_hadd_u_w(sum_h, sum_h);
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);
+    sum_w = (v4u32) __msa_pckev_w((v4i32) sum_d, (v4i32) sum_d);
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);
+    sum_w = (v4u32) __msa_srari_w((v4i32) sum_d, 6);
+    out = (v16u8) __msa_splati_b((v16i8) sum_w, 0);
+
+    for (row = 16; row--;)
+    {
+        ST_UB2(out, out, dst, 16);
+        dst += dst_stride;
+        ST_UB2(out, out, dst, 16);
+        dst += dst_stride;
+    }
+}
+
+#define INTRA_DC_TL_32x32(dir)                                    \
+void ff_dc_##dir##_32x32_msa(uint8_t *dst, ptrdiff_t dst_stride,  \
+                             const uint8_t *left,                 \
+                             const uint8_t *top)                  \
+{                                                                 \
+    uint32_t row;                                                 \
+    v16u8 data0, data1, out;                                      \
+    v8u16 sum_h, sum_data0, sum_data1;                            \
+    v4u32 sum_w;                                                  \
+    v2u64 sum_d;                                                  \
+                                                                  \
+    LD_UB2(dir, 16, data0, data1);                                \
+    HADD_UB2_UH(data0, data1, sum_data0, sum_data1);              \
+    sum_h = sum_data0 + sum_data1;                                \
+    sum_w = __msa_hadd_u_w(sum_h, sum_h);                         \
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);                         \
+    sum_w = (v4u32) __msa_pckev_w((v4i32) sum_d, (v4i32) sum_d);  \
+    sum_d = __msa_hadd_u_d(sum_w, sum_w);                         \
+    sum_w = (v4u32) __msa_srari_w((v4i32) sum_d, 5);              \
+    out = (v16u8) __msa_splati_b((v16i8) sum_w, 0);               \
+                                                                  \
+    for (row = 16; row--;)                                        \
+    {                                                             \
+        ST_UB2(out, out, dst, 16);                                \
+        dst += dst_stride;                                        \
+        ST_UB2(out, out, dst, 16);                                \
+        dst += dst_stride;                                        \
+    }                                                             \
+}
+INTRA_DC_TL_32x32(top);
+INTRA_DC_TL_32x32(left);
+
+#define INTRA_PREDICT_VALDC_16X16_MSA(val)                             \
+void ff_dc_##val##_16x16_msa(uint8_t *dst, ptrdiff_t dst_stride,       \
+                             const uint8_t *left, const uint8_t *top)  \
+{                                                                      \
+    v16u8 out = (v16u8) __msa_ldi_b(val);                              \
+                                                                       \
+    ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);   \
+    dst += (8 * dst_stride);                                           \
+    ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);   \
+}
+
+INTRA_PREDICT_VALDC_16X16_MSA(127);
+INTRA_PREDICT_VALDC_16X16_MSA(128);
+INTRA_PREDICT_VALDC_16X16_MSA(129);
+
+#define INTRA_PREDICT_VALDC_32X32_MSA(val)                             \
+void ff_dc_##val##_32x32_msa(uint8_t *dst, ptrdiff_t dst_stride,       \
+                             const uint8_t *left, const uint8_t *top)  \
+{                                                                      \
+    uint32_t row;                                                      \
+    v16u8 out = (v16u8) __msa_ldi_b(val);                              \
+                                                                       \
+    for (row = 16; row--;)                                             \
+    {                                                                  \
+        ST_UB2(out, out, dst, 16);                                     \
+        dst += dst_stride;                                             \
+        ST_UB2(out, out, dst, 16);                                     \
+        dst += dst_stride;                                             \
+    }                                                                  \
+}
+
+INTRA_PREDICT_VALDC_32X32_MSA(127);
+INTRA_PREDICT_VALDC_32X32_MSA(128);
+INTRA_PREDICT_VALDC_32X32_MSA(129);
+
+void ff_tm_4x4_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                   const uint8_t *src_left, const uint8_t *src_top_ptr)
+{
+    uint32_t left;
+    uint8_t top_left = src_top_ptr[-1];
+    v16i8 src_top, src_left0, src_left1, src_left2, src_left3, tmp0, tmp1;
+    v16u8 src0, src1, src2, src3;
+    v8u16 src_top_left, vec0, vec1, vec2, vec3;
+
+    src_top_left = (v8u16) __msa_fill_h(top_left);
+    src_top = LD_SB(src_top_ptr);
+    left = LW(src_left);
+    src_left0 = __msa_fill_b(left >> 24);
+    src_left1 = __msa_fill_b(left >> 16);
+    src_left2 = __msa_fill_b(left >> 8);
+    src_left3 = __msa_fill_b(left);
+
+    ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
+               src_left3, src_top, src0, src1, src2, src3);
+    HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
+    IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
+    SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
+    PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
+    ST4x4_UB(tmp0, tmp1, 0, 2, 0, 2, dst, dst_stride);
+}
+
+void ff_tm_8x8_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                   const uint8_t *src_left, const uint8_t *src_top_ptr)
+{
+    uint8_t top_left = src_top_ptr[-1];
+    uint32_t loop_cnt, left;
+    v16i8 src_top, src_left0, src_left1, src_left2, src_left3, tmp0, tmp1;
+    v8u16 src_top_left, vec0, vec1, vec2, vec3;
+    v16u8 src0, src1, src2, src3;
+
+    src_top = LD_SB(src_top_ptr);
+    src_top_left = (v8u16) __msa_fill_h(top_left);
+
+    src_left += 4;
+    for (loop_cnt = 2; loop_cnt--;) {
+        left = LW(src_left);
+        src_left0 = __msa_fill_b(left >> 24);
+        src_left1 = __msa_fill_b(left >> 16);
+        src_left2 = __msa_fill_b(left >> 8);
+        src_left3 = __msa_fill_b(left);
+        src_left -= 4;
+
+        ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
+                   src_left3, src_top, src0, src1, src2, src3);
+        HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
+        SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
+        PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
+        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+void ff_tm_16x16_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                     const uint8_t *src_left, const uint8_t *src_top_ptr)
+{
+    uint8_t top_left = src_top_ptr[-1];
+    uint32_t loop_cnt, left;
+    v16i8 src_top, src_left0, src_left1, src_left2, src_left3;
+    v8u16 src_top_left, res_r, res_l;
+
+    src_top = LD_SB(src_top_ptr);
+    src_top_left = (v8u16) __msa_fill_h(top_left);
+
+    src_left += 12;
+    for (loop_cnt = 4; loop_cnt--;) {
+        left = LW(src_left);
+        src_left0 = __msa_fill_b(left >> 24);
+        src_left1 = __msa_fill_b(left >> 16);
+        src_left2 = __msa_fill_b(left >> 8);
+        src_left3 = __msa_fill_b(left);
+        src_left -= 4;
+
+        ILVRL_B2_UH(src_left0, src_top, res_r, res_l);
+        HADD_UB2_UH(res_r, res_l, res_r, res_l);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+
+        SAT_UH2_UH(res_r, res_l, 7);
+        PCKEV_ST_SB(res_r, res_l, dst);
+        dst += dst_stride;
+
+        ILVRL_B2_UH(src_left1, src_top, res_r, res_l);
+        HADD_UB2_UH(res_r, res_l, res_r, res_l);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+        SAT_UH2_UH(res_r, res_l, 7);
+        PCKEV_ST_SB(res_r, res_l, dst);
+        dst += dst_stride;
+
+        ILVRL_B2_UH(src_left2, src_top, res_r, res_l);
+        HADD_UB2_UH(res_r, res_l, res_r, res_l);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+        SAT_UH2_UH(res_r, res_l, 7);
+        PCKEV_ST_SB(res_r, res_l, dst);
+        dst += dst_stride;
+
+        ILVRL_B2_UH(src_left3, src_top, res_r, res_l);
+        HADD_UB2_UH(res_r, res_l, res_r, res_l);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+        SAT_UH2_UH(res_r, res_l, 7);
+        PCKEV_ST_SB(res_r, res_l, dst);
+        dst += dst_stride;
+    }
+}
+
+void ff_tm_32x32_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                     const uint8_t *src_left, const uint8_t *src_top_ptr)
+{
+    uint8_t top_left = src_top_ptr[-1];
+    uint32_t loop_cnt, left;
+    v16i8 src_top0, src_top1, src_left0, src_left1, src_left2, src_left3;
+    v8u16 src_top_left, res_r0, res_r1, res_l0, res_l1;
+
+    src_top0 = LD_SB(src_top_ptr);
+    src_top1 = LD_SB(src_top_ptr + 16);
+    src_top_left = (v8u16) __msa_fill_h(top_left);
+
+    src_left += 28;
+    for (loop_cnt = 8; loop_cnt--;) {
+        left = LW(src_left);
+        src_left0 = __msa_fill_b(left >> 24);
+        src_left1 = __msa_fill_b(left >> 16);
+        src_left2 = __msa_fill_b(left >> 8);
+        src_left3 = __msa_fill_b(left);
+        src_left -= 4;
+
+        ILVR_B2_UH(src_left0, src_top0, src_left0, src_top1, res_r0, res_r1);
+        ILVL_B2_UH(src_left0, src_top0, src_left0, src_top1, res_l0, res_l1);
+        HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1,
+                    res_l1);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+        SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+        PCKEV_ST_SB(res_r0, res_l0, dst);
+        PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+        dst += dst_stride;
+
+        ILVR_B2_UH(src_left1, src_top0, src_left1, src_top1, res_r0, res_r1);
+        ILVL_B2_UH(src_left1, src_top0, src_left1, src_top1, res_l0, res_l1);
+        HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1,
+                    res_l1);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+        SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+        PCKEV_ST_SB(res_r0, res_l0, dst);
+        PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+        dst += dst_stride;
+
+        ILVR_B2_UH(src_left2, src_top0, src_left2, src_top1, res_r0, res_r1);
+        ILVL_B2_UH(src_left2, src_top0, src_left2, src_top1, res_l0, res_l1);
+        HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1,
+                    res_l1);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+        SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+        PCKEV_ST_SB(res_r0, res_l0, dst);
+        PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+        dst += dst_stride;
+
+        ILVR_B2_UH(src_left3, src_top0, src_left3, src_top1, res_r0, res_r1);
+        ILVL_B2_UH(src_left3, src_top0, src_left3, src_top1, res_l0, res_l1);
+        HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1,
+                    res_l1);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+        IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+        SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+        PCKEV_ST_SB(res_r0, res_l0, dst);
+        PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+        dst += dst_stride;
+    }
+}
diff --git a/libavcodec/mips/vp9_lpf_msa.c b/libavcodec/mips/vp9_lpf_msa.c
new file mode 100644
index 00000000..eef8afc4
--- /dev/null
+++ b/libavcodec/mips/vp9_lpf_msa.c
@@ -0,0 +1,2599 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/vp9dsp.h"
+#include "libavutil/mips/generic_macros_msa.h"
+#include "vp9dsp_mips.h"
+
+#define VP9_LPF_FILTER4_8W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in,  \
+                           p1_out, p0_out, q0_out, q1_out)               \
+{                                                                        \
+    v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign;                  \
+    v16i8 filt, filt1, filt2, cnst4b, cnst3b;                            \
+    v8i16 q0_sub_p0_r, filt_r, cnst3h;                                   \
+                                                                         \
+    p1_m = (v16i8) __msa_xori_b(p1_in, 0x80);                            \
+    p0_m = (v16i8) __msa_xori_b(p0_in, 0x80);                            \
+    q0_m = (v16i8) __msa_xori_b(q0_in, 0x80);                            \
+    q1_m = (v16i8) __msa_xori_b(q1_in, 0x80);                            \
+                                                                         \
+    filt = __msa_subs_s_b(p1_m, q1_m);                                   \
+    filt = filt & (v16i8) hev_in;                                        \
+    q0_sub_p0 = q0_m - p0_m;                                             \
+    filt_sign = __msa_clti_s_b(filt, 0);                                 \
+                                                                         \
+    cnst3h = __msa_ldi_h(3);                                             \
+    q0_sub_p0_r = (v8i16) __msa_ilvr_b(q0_sub_p0, q0_sub_p0);            \
+    q0_sub_p0_r = __msa_dotp_s_h((v16i8) q0_sub_p0_r, (v16i8) cnst3h);   \
+    filt_r = (v8i16) __msa_ilvr_b(filt_sign, filt);                      \
+    filt_r += q0_sub_p0_r;                                               \
+    filt_r = __msa_sat_s_h(filt_r, 7);                                   \
+                                                                         \
+    /* combine left and right part */                                    \
+    filt = __msa_pckev_b((v16i8) filt_r, (v16i8) filt_r);                \
+                                                                         \
+    filt = filt & (v16i8) mask_in;                                       \
+    cnst4b = __msa_ldi_b(4);                                             \
+    filt1 = __msa_adds_s_b(filt, cnst4b);                                \
+    filt1 >>= 3;                                                         \
+                                                                         \
+    cnst3b = __msa_ldi_b(3);                                             \
+    filt2 = __msa_adds_s_b(filt, cnst3b);                                \
+    filt2 >>= 3;                                                         \
+                                                                         \
+    q0_m = __msa_subs_s_b(q0_m, filt1);                                  \
+    q0_out = __msa_xori_b((v16u8) q0_m, 0x80);                           \
+    p0_m = __msa_adds_s_b(p0_m, filt2);                                  \
+    p0_out = __msa_xori_b((v16u8) p0_m, 0x80);                           \
+                                                                         \
+    filt = __msa_srari_b(filt1, 1);                                      \
+    hev_in = __msa_xori_b((v16u8) hev_in, 0xff);                         \
+    filt = filt & (v16i8) hev_in;                                        \
+                                                                         \
+    q1_m = __msa_subs_s_b(q1_m, filt);                                   \
+    q1_out = __msa_xori_b((v16u8) q1_m, 0x80);                           \
+    p1_m = __msa_adds_s_b(p1_m, filt);                                   \
+    p1_out = __msa_xori_b((v16u8) p1_m, 0x80);                           \
+}
+
+#define VP9_LPF_FILTER4_4W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in,  \
+                           p1_out, p0_out, q0_out, q1_out)               \
+{                                                                        \
+    v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign;                  \
+    v16i8 filt, filt1, filt2, cnst4b, cnst3b;                            \
+    v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h;              \
+                                                                         \
+    p1_m = (v16i8) __msa_xori_b(p1_in, 0x80);                            \
+    p0_m = (v16i8) __msa_xori_b(p0_in, 0x80);                            \
+    q0_m = (v16i8) __msa_xori_b(q0_in, 0x80);                            \
+    q1_m = (v16i8) __msa_xori_b(q1_in, 0x80);                            \
+                                                                         \
+    filt = __msa_subs_s_b(p1_m, q1_m);                                   \
+                                                                         \
+    filt = filt & (v16i8) hev_in;                                        \
+                                                                         \
+    q0_sub_p0 = q0_m - p0_m;                                             \
+    filt_sign = __msa_clti_s_b(filt, 0);                                 \
+                                                                         \
+    cnst3h = __msa_ldi_h(3);                                             \
+    q0_sub_p0_r = (v8i16) __msa_ilvr_b(q0_sub_p0, q0_sub_p0);            \
+    q0_sub_p0_r = __msa_dotp_s_h((v16i8) q0_sub_p0_r, (v16i8) cnst3h);   \
+    filt_r = (v8i16) __msa_ilvr_b(filt_sign, filt);                      \
+    filt_r += q0_sub_p0_r;                                               \
+    filt_r = __msa_sat_s_h(filt_r, 7);                                   \
+                                                                         \
+    q0_sub_p0_l = (v8i16) __msa_ilvl_b(q0_sub_p0, q0_sub_p0);            \
+    q0_sub_p0_l = __msa_dotp_s_h((v16i8) q0_sub_p0_l, (v16i8) cnst3h);   \
+    filt_l = (v8i16) __msa_ilvl_b(filt_sign, filt);                      \
+    filt_l += q0_sub_p0_l;                                               \
+    filt_l = __msa_sat_s_h(filt_l, 7);                                   \
+                                                                         \
+    filt = __msa_pckev_b((v16i8) filt_l, (v16i8) filt_r);                \
+    filt = filt & (v16i8) mask_in;                                       \
+                                                                         \
+    cnst4b = __msa_ldi_b(4);                                             \
+    filt1 = __msa_adds_s_b(filt, cnst4b);                                \
+    filt1 >>= 3;                                                         \
+                                                                         \
+    cnst3b = __msa_ldi_b(3);                                             \
+    filt2 = __msa_adds_s_b(filt, cnst3b);                                \
+    filt2 >>= 3;                                                         \
+                                                                         \
+    q0_m = __msa_subs_s_b(q0_m, filt1);                                  \
+    q0_out = __msa_xori_b((v16u8) q0_m, 0x80);                           \
+    p0_m = __msa_adds_s_b(p0_m, filt2);                                  \
+    p0_out = __msa_xori_b((v16u8) p0_m, 0x80);                           \
+                                                                         \
+    filt = __msa_srari_b(filt1, 1);                                      \
+    hev_in = __msa_xori_b((v16u8) hev_in, 0xff);                         \
+    filt = filt & (v16i8) hev_in;                                        \
+                                                                         \
+    q1_m = __msa_subs_s_b(q1_m, filt);                                   \
+    q1_out = __msa_xori_b((v16u8) q1_m, 0x80);                           \
+    p1_m = __msa_adds_s_b(p1_m, filt);                                   \
+    p1_out = __msa_xori_b((v16u8) p1_m, 0x80);                           \
+}
+
+#define VP9_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out)  \
+{                                                                      \
+    v16u8 tmp, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0;     \
+    v16u8 zero_in = { 0 };                                             \
+                                                                       \
+    tmp = __msa_ori_b(zero_in, 1);                                     \
+    p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in);                        \
+    q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in);                        \
+    p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in);                        \
+    q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in);                        \
+                                                                       \
+    p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0);             \
+    flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out);                   \
+    p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0);             \
+    flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out);                   \
+                                                                       \
+    flat_out = (tmp < (v16u8) flat_out);                               \
+    flat_out = __msa_xori_b(flat_out, 0xff);                           \
+    flat_out = flat_out & (mask);                                      \
+}
+
+#define VP9_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in,  \
+                  q5_in, q6_in, q7_in, flat_in, flat2_out)          \
+{                                                                   \
+    v16u8 tmp, zero_in = { 0 };                                     \
+    v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0;       \
+    v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0;       \
+                                                                    \
+    tmp = __msa_ori_b(zero_in, 1);                                  \
+    p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in);                     \
+    q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in);                     \
+    p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in);                     \
+    q5_a_sub_q0 = __msa_asub_u_b(q5_in, q0_in);                     \
+    p6_a_sub_p0 = __msa_asub_u_b(p6_in, p0_in);                     \
+    q6_a_sub_q0 = __msa_asub_u_b(q6_in, q0_in);                     \
+    p7_a_sub_p0 = __msa_asub_u_b(p7_in, p0_in);                     \
+    q7_a_sub_q0 = __msa_asub_u_b(q7_in, q0_in);                     \
+                                                                    \
+    p4_a_sub_p0 = __msa_max_u_b(p4_a_sub_p0, q4_a_sub_q0);          \
+    flat2_out = __msa_max_u_b(p5_a_sub_p0, q5_a_sub_q0);            \
+    flat2_out = __msa_max_u_b(p4_a_sub_p0, flat2_out);              \
+    p6_a_sub_p0 = __msa_max_u_b(p6_a_sub_p0, q6_a_sub_q0);          \
+    flat2_out = __msa_max_u_b(p6_a_sub_p0, flat2_out);              \
+    p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0);          \
+    flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out);              \
+                                                                    \
+    flat2_out = (tmp < (v16u8) flat2_out);                          \
+    flat2_out = __msa_xori_b(flat2_out, 0xff);                      \
+    flat2_out = flat2_out & flat_in;                                \
+}
+
+#define VP9_FILTER8(p3_in, p2_in, p1_in, p0_in,                \
+                    q0_in, q1_in, q2_in, q3_in,                \
+                    p2_filt8_out, p1_filt8_out, p0_filt8_out,  \
+                    q0_filt8_out, q1_filt8_out, q2_filt8_out)  \
+{                                                              \
+    v8u16 tmp0, tmp1, tmp2;                                    \
+                                                               \
+    tmp2 = p2_in + p1_in + p0_in;                              \
+    tmp0 = p3_in << 1;                                         \
+                                                               \
+    tmp0 = tmp0 + tmp2 + q0_in;                                \
+    tmp1 = tmp0 + p3_in + p2_in;                               \
+    p2_filt8_out = (v8i16) __msa_srari_h((v8i16) tmp1, 3);     \
+                                                               \
+    tmp1 = tmp0 + p1_in + q1_in;                               \
+    p1_filt8_out = (v8i16) __msa_srari_h((v8i16) tmp1, 3);     \
+                                                               \
+    tmp1 = q2_in + q1_in + q0_in;                              \
+    tmp2 = tmp2 + tmp1;                                        \
+    tmp0 = tmp2 + (p0_in);                                     \
+    tmp0 = tmp0 + (p3_in);                                     \
+    p0_filt8_out = (v8i16) __msa_srari_h((v8i16) tmp0, 3);     \
+                                                               \
+    tmp0 = q2_in + q3_in;                                      \
+    tmp0 = p0_in + tmp1 + tmp0;                                \
+    tmp1 = q3_in + q3_in;                                      \
+    tmp1 = tmp1 + tmp0;                                        \
+    q2_filt8_out = (v8i16) __msa_srari_h((v8i16) tmp1, 3);     \
+                                                               \
+    tmp0 = tmp2 + q3_in;                                       \
+    tmp1 = tmp0 + q0_in;                                       \
+    q0_filt8_out = (v8i16) __msa_srari_h((v8i16) tmp1, 3);     \
+                                                               \
+    tmp1 = tmp0 - p2_in;                                       \
+    tmp0 = q1_in + q3_in;                                      \
+    tmp1 = tmp0 + tmp1;                                        \
+    q1_filt8_out = (v8i16) __msa_srari_h((v8i16) tmp1, 3);     \
+}
+
+#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in,                   \
+                     q0_in, q1_in, q2_in, q3_in,                   \
+                     limit_in, b_limit_in, thresh_in,              \
+                     hev_out, mask_out, flat_out)                  \
+{                                                                  \
+    v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m;  \
+    v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m;  \
+                                                                   \
+    /* absolute subtraction of pixel values */                     \
+    p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in);                   \
+    p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in);                   \
+    p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in);                   \
+    q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in);                   \
+    q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in);                   \
+    q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in);                   \
+    p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in);                   \
+    p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in);                   \
+                                                                   \
+    /* calculation of hev */                                       \
+    flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m);          \
+    hev_out = thresh_in < (v16u8) flat_out;                        \
+                                                                   \
+    /* calculation of mask */                                      \
+    p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m);     \
+    p1_asub_q1_m >>= 1;                                            \
+    p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m);     \
+                                                                   \
+    mask_out = b_limit_in < p0_asub_q0_m;                          \
+    mask_out = __msa_max_u_b(flat_out, mask_out);                  \
+    p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m);      \
+    mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out);              \
+    q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m);      \
+    mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out);              \
+                                                                   \
+    mask_out = limit_in < (v16u8) mask_out;                        \
+    mask_out = __msa_xori_b(mask_out, 0xff);                       \
+}
+
+void ff_loop_filter_v_4_8_msa(uint8_t *src, ptrdiff_t pitch,
+                              int32_t b_limit_ptr,
+                              int32_t limit_ptr,
+                              int32_t thresh_ptr)
+{
+    uint64_t p1_d, p0_d, q0_d, q1_d;
+    v16u8 mask, hev, flat, thresh, b_limit, limit;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
+
+    /* load vector elements */
+    LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    p1_d = __msa_copy_u_d((v2i64) p1_out, 0);
+    p0_d = __msa_copy_u_d((v2i64) p0_out, 0);
+    q0_d = __msa_copy_u_d((v2i64) q0_out, 0);
+    q1_d = __msa_copy_u_d((v2i64) q1_out, 0);
+    SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
+}
+
+
+void ff_loop_filter_v_44_16_msa(uint8_t *src, ptrdiff_t pitch,
+                                int32_t b_limit_ptr,
+                                int32_t limit_ptr,
+                                int32_t thresh_ptr)
+{
+    v16u8 mask, hev, flat, thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+
+    /* load vector elements */
+    LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh0 = (v16u8) __msa_fill_b(thresh_ptr);
+    thresh1 = (v16u8) __msa_fill_b(thresh_ptr >> 8);
+    thresh0 = (v16u8) __msa_ilvr_d((v2i64) thresh1, (v2i64) thresh0);
+
+    b_limit0 = (v16u8) __msa_fill_b(b_limit_ptr);
+    b_limit1 = (v16u8) __msa_fill_b(b_limit_ptr >> 8);
+    b_limit0 = (v16u8) __msa_ilvr_d((v2i64) b_limit1, (v2i64) b_limit0);
+
+    limit0 = (v16u8) __msa_fill_b(limit_ptr);
+    limit1 = (v16u8) __msa_fill_b(limit_ptr >> 8);
+    limit0 = (v16u8) __msa_ilvr_d((v2i64) limit1, (v2i64) limit0);
+
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
+                 hev, mask, flat);
+    VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+
+    ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
+}
+
+void ff_loop_filter_v_8_8_msa(uint8_t *src, ptrdiff_t pitch,
+                              int32_t b_limit_ptr,
+                              int32_t limit_ptr,
+                              int32_t thresh_ptr)
+{
+    uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
+    v16u8 mask, hev, flat, thresh, b_limit, limit;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+    v8i16 p2_filter8, p1_filter8, p0_filter8;
+    v8i16 q0_filter8, q1_filter8, q2_filter8;
+    v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
+    v16i8 zero = { 0 };
+
+    /* load vector elements */
+    LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    flat = (v16u8) __msa_ilvr_d((v2i64) zero, (v2i64) flat);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        p1_d = __msa_copy_u_d((v2i64) p1_out, 0);
+        p0_d = __msa_copy_u_d((v2i64) p0_out, 0);
+        q0_d = __msa_copy_u_d((v2i64) q0_out, 0);
+        q1_d = __msa_copy_u_d((v2i64) q1_out, 0);
+        SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
+    } else {
+        ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+                   zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+                   q2_r, q3_r);
+        VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
+                    p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
+
+        /* convert 16 bit output data into 8 bit */
+        PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8,
+                    zero, q0_filter8, p2_filter8, p1_filter8, p0_filter8,
+                    q0_filter8);
+        PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8);
+
+        /* store pixel values */
+        p2_out = __msa_bmnz_v(p2, (v16u8) p2_filter8, flat);
+        p1_out = __msa_bmnz_v(p1_out, (v16u8) p1_filter8, flat);
+        p0_out = __msa_bmnz_v(p0_out, (v16u8) p0_filter8, flat);
+        q0_out = __msa_bmnz_v(q0_out, (v16u8) q0_filter8, flat);
+        q1_out = __msa_bmnz_v(q1_out, (v16u8) q1_filter8, flat);
+        q2_out = __msa_bmnz_v(q2, (v16u8) q2_filter8, flat);
+
+        p2_d = __msa_copy_u_d((v2i64) p2_out, 0);
+        p1_d = __msa_copy_u_d((v2i64) p1_out, 0);
+        p0_d = __msa_copy_u_d((v2i64) p0_out, 0);
+        q0_d = __msa_copy_u_d((v2i64) q0_out, 0);
+        q1_d = __msa_copy_u_d((v2i64) q1_out, 0);
+        q2_d = __msa_copy_u_d((v2i64) q2_out, 0);
+
+        src -= 3 * pitch;
+
+        SD4(p2_d, p1_d, p0_d, q0_d, src, pitch);
+        src += (4 * pitch);
+        SD(q1_d, src);
+        src += pitch;
+        SD(q2_d, src);
+    }
+}
+
+void ff_loop_filter_v_88_16_msa(uint8_t *src, ptrdiff_t pitch,
+                                int32_t b_limit_ptr,
+                                int32_t limit_ptr,
+                                int32_t thresh_ptr)
+{
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+    v16u8 flat, mask, hev, tmp, thresh, b_limit, limit;
+    v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+    v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+    v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r;
+    v8i16 q0_filt8_r, q1_filt8_r, q2_filt8_r;
+    v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l;
+    v8i16 q0_filt8_l, q1_filt8_l, q2_filt8_l;
+    v16u8 zero = { 0 };
+
+    /* load vector elements */
+    LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    tmp = (v16u8) __msa_fill_b(thresh_ptr >> 8);
+    thresh = (v16u8) __msa_ilvr_d((v2i64) tmp, (v2i64) thresh);
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    tmp = (v16u8) __msa_fill_b(b_limit_ptr >> 8);
+    b_limit = (v16u8) __msa_ilvr_d((v2i64) tmp, (v2i64) b_limit);
+
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+    tmp = (v16u8) __msa_fill_b(limit_ptr >> 8);
+    limit = (v16u8) __msa_ilvr_d((v2i64) tmp, (v2i64) limit);
+
+    /* mask and hev */
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
+    } else {
+        ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+                   zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+                   q2_r, q3_r);
+        VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                    p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+        ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l,
+                   p0_l);
+        ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l,
+                   q3_l);
+        VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+                    p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+        /* convert 16 bit output data into 8 bit */
+        PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+                    p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+                    p0_filt8_r, q0_filt8_r);
+        PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r,
+                    q1_filt8_r, q2_filt8_r);
+
+        /* store pixel values */
+        p2_out = __msa_bmnz_v(p2, (v16u8) p2_filt8_r, flat);
+        p1_out = __msa_bmnz_v(p1_out, (v16u8) p1_filt8_r, flat);
+        p0_out = __msa_bmnz_v(p0_out, (v16u8) p0_filt8_r, flat);
+        q0_out = __msa_bmnz_v(q0_out, (v16u8) q0_filt8_r, flat);
+        q1_out = __msa_bmnz_v(q1_out, (v16u8) q1_filt8_r, flat);
+        q2_out = __msa_bmnz_v(q2, (v16u8) q2_filt8_r, flat);
+
+        src -= 3 * pitch;
+
+        ST_UB4(p2_out, p1_out, p0_out, q0_out, src, pitch);
+        src += (4 * pitch);
+        ST_UB2(q1_out, q2_out, src, pitch);
+        src += (2 * pitch);
+    }
+}
+
+void ff_loop_filter_v_84_16_msa(uint8_t *src, ptrdiff_t pitch,
+                                int32_t b_limit_ptr,
+                                int32_t limit_ptr,
+                                int32_t thresh_ptr)
+{
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+    v16u8 flat, mask, hev, tmp, thresh, b_limit, limit;
+    v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+    v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r;
+    v8i16 q0_filt8_r, q1_filt8_r, q2_filt8_r;
+    v16u8 zero = { 0 };
+
+    /* load vector elements */
+    LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    tmp = (v16u8) __msa_fill_b(thresh_ptr >> 8);
+    thresh = (v16u8) __msa_ilvr_d((v2i64) tmp, (v2i64) thresh);
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    tmp = (v16u8) __msa_fill_b(b_limit_ptr >> 8);
+    b_limit = (v16u8) __msa_ilvr_d((v2i64) tmp, (v2i64) b_limit);
+
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+    tmp = (v16u8) __msa_fill_b(limit_ptr >> 8);
+    limit = (v16u8) __msa_ilvr_d((v2i64) tmp, (v2i64) limit);
+
+    /* mask and hev */
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    flat = (v16u8) __msa_ilvr_d((v2i64) zero, (v2i64) flat);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
+    } else {
+        ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+                   zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+                   q2_r, q3_r);
+        VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                    p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+        /* convert 16 bit output data into 8 bit */
+        PCKEV_B4_SH(p2_filt8_r, p2_filt8_r, p1_filt8_r, p1_filt8_r,
+                    p0_filt8_r, p0_filt8_r, q0_filt8_r, q0_filt8_r,
+                    p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r);
+        PCKEV_B2_SH(q1_filt8_r, q1_filt8_r, q2_filt8_r, q2_filt8_r,
+                    q1_filt8_r, q2_filt8_r);
+
+        /* store pixel values */
+        p2_out = __msa_bmnz_v(p2, (v16u8) p2_filt8_r, flat);
+        p1_out = __msa_bmnz_v(p1_out, (v16u8) p1_filt8_r, flat);
+        p0_out = __msa_bmnz_v(p0_out, (v16u8) p0_filt8_r, flat);
+        q0_out = __msa_bmnz_v(q0_out, (v16u8) q0_filt8_r, flat);
+        q1_out = __msa_bmnz_v(q1_out, (v16u8) q1_filt8_r, flat);
+        q2_out = __msa_bmnz_v(q2, (v16u8) q2_filt8_r, flat);
+
+        src -= 3 * pitch;
+
+        ST_UB4(p2_out, p1_out, p0_out, q0_out, src, pitch);
+        src += (4 * pitch);
+        ST_UB2(q1_out, q2_out, src, pitch);
+        src += (2 * pitch);
+    }
+}
+
+void ff_loop_filter_v_48_16_msa(uint8_t *src, ptrdiff_t pitch,
+                                int32_t b_limit_ptr,
+                                int32_t limit_ptr,
+                                int32_t thresh_ptr)
+{
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+    v16u8 flat, mask, hev, tmp, thresh, b_limit, limit;
+    v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+    v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l;
+    v8i16 q0_filt8_l, q1_filt8_l, q2_filt8_l;
+    v16u8 zero = { 0 };
+
+    /* load vector elements */
+    LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    tmp = (v16u8) __msa_fill_b(thresh_ptr >> 8);
+    thresh = (v16u8) __msa_ilvr_d((v2i64) tmp, (v2i64) thresh);
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    tmp = (v16u8) __msa_fill_b(b_limit_ptr >> 8);
+    b_limit = (v16u8) __msa_ilvr_d((v2i64) tmp, (v2i64) b_limit);
+
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+    tmp = (v16u8) __msa_fill_b(limit_ptr >> 8);
+    limit = (v16u8) __msa_ilvr_d((v2i64) tmp, (v2i64) limit);
+
+    /* mask and hev */
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    flat = (v16u8) __msa_insve_d((v2i64) flat, 0, (v2i64) zero);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
+    } else {
+        ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l,
+                   p0_l);
+        ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l,
+                   q3_l);
+        VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+                    p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+        /* convert 16 bit output data into 8 bit */
+        PCKEV_B4_SH(p2_filt8_l, p2_filt8_l, p1_filt8_l, p1_filt8_l,
+                    p0_filt8_l, p0_filt8_l, q0_filt8_l, q0_filt8_l,
+                    p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l);
+        PCKEV_B2_SH(q1_filt8_l, q1_filt8_l, q2_filt8_l, q2_filt8_l,
+                    q1_filt8_l, q2_filt8_l);
+
+        /* store pixel values */
+        p2_out = __msa_bmnz_v(p2, (v16u8) p2_filt8_l, flat);
+        p1_out = __msa_bmnz_v(p1_out, (v16u8) p1_filt8_l, flat);
+        p0_out = __msa_bmnz_v(p0_out, (v16u8) p0_filt8_l, flat);
+        q0_out = __msa_bmnz_v(q0_out, (v16u8) q0_filt8_l, flat);
+        q1_out = __msa_bmnz_v(q1_out, (v16u8) q1_filt8_l, flat);
+        q2_out = __msa_bmnz_v(q2, (v16u8) q2_filt8_l, flat);
+
+        src -= 3 * pitch;
+
+        ST_UB4(p2_out, p1_out, p0_out, q0_out, src, pitch);
+        src += (4 * pitch);
+        ST_UB2(q1_out, q2_out, src, pitch);
+        src += (2 * pitch);
+    }
+}
+
+static int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, ptrdiff_t pitch,
+                                        uint8_t *filter48,
+                                        int32_t b_limit_ptr,
+                                        int32_t limit_ptr,
+                                        int32_t thresh_ptr)
+{
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+    v16u8 flat, mask, hev, thresh, b_limit, limit;
+    v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+    v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+    v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r;
+    v8i16 q0_filt8_r, q1_filt8_r, q2_filt8_r;
+    v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l;
+    v8i16 q0_filt8_l, q1_filt8_l, q2_filt8_l;
+    v16u8 zero = { 0 };
+
+    /* load vector elements */
+    LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+
+    /* mask and hev */
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
+
+        return 1;
+    } else {
+        ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+                   zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+                   q2_r, q3_r);
+        VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                    p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+        ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l,
+                   p0_l);
+        ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l,
+                   q3_l);
+        VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+                    p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+        /* convert 16 bit output data into 8 bit */
+        PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+                    p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+                    p0_filt8_r, q0_filt8_r);
+        PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+                    q2_filt8_r);
+
+        /* store pixel values */
+        p2_out = __msa_bmnz_v(p2, (v16u8) p2_filt8_r, flat);
+        p1_out = __msa_bmnz_v(p1_out, (v16u8) p1_filt8_r, flat);
+        p0_out = __msa_bmnz_v(p0_out, (v16u8) p0_filt8_r, flat);
+        q0_out = __msa_bmnz_v(q0_out, (v16u8) q0_filt8_r, flat);
+        q1_out = __msa_bmnz_v(q1_out, (v16u8) q1_filt8_r, flat);
+        q2_out = __msa_bmnz_v(q2, (v16u8) q2_filt8_r, flat);
+
+        ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
+        filter48 += (4 * 16);
+        ST_UB2(q1_out, q2_out, filter48, 16);
+        filter48 += (2 * 16);
+        ST_UB(flat, filter48);
+
+        return 0;
+    }
+}
+
+static void vp9_hz_lpf_t16_16w(uint8_t *src, ptrdiff_t pitch, uint8_t *filter48)
+{
+    v16u8 flat, flat2, filter8;
+    v16i8 zero = { 0 };
+    v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+    v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in;
+    v8u16 p3_r_in, p2_r_in, p1_r_in, p0_r_in;
+    v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in;
+    v8u16 q3_r_in, q2_r_in, q1_r_in, q0_r_in;
+    v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in;
+    v8u16 p3_l_in, p2_l_in, p1_l_in, p0_l_in;
+    v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in;
+    v8u16 q3_l_in, q2_l_in, q1_l_in, q0_l_in;
+    v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l;
+    v8i16 l_out, r_out;
+
+    flat = LD_UB(filter48 + 96);
+
+    LD_UB8((src - 8 * pitch), pitch, p7, p6, p5, p4, p3, p2, p1, p0);
+    LD_UB8(src, pitch, q0, q1, q2, q3, q4, q5, q6, q7);
+    VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+    /* if flat2 is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat2)) {
+        LD_UB4(filter48, 16, p2, p1, p0, q0);
+        LD_UB2(filter48 + 4 * 16, 16, q1, q2);
+
+        src -= 3 * pitch;
+        ST_UB4(p2, p1, p0, q0, src, pitch);
+        src += (4 * pitch);
+        ST_UB2(q1, q2, src, pitch);
+    } else {
+        src -= 7 * pitch;
+
+        ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2,
+                   zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in,
+                   p3_r_in, p2_r_in, p1_r_in, p0_r_in);
+
+        q0_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q0);
+
+        tmp0_r = p7_r_in << 3;
+        tmp0_r -= p7_r_in;
+        tmp0_r += p6_r_in;
+        tmp0_r += q0_r_in;
+        tmp1_r = p6_r_in + p5_r_in;
+        tmp1_r += p4_r_in;
+        tmp1_r += p3_r_in;
+        tmp1_r += p2_r_in;
+        tmp1_r += p1_r_in;
+        tmp1_r += p0_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in,
+                   p5_l_in, p4_l_in);
+        ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in,
+                   p1_l_in, p0_l_in);
+        q0_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q0);
+
+        tmp0_l = p7_l_in << 3;
+        tmp0_l -= p7_l_in;
+        tmp0_l += p6_l_in;
+        tmp0_l += q0_l_in;
+        tmp1_l = p6_l_in + p5_l_in;
+        tmp1_l += p4_l_in;
+        tmp1_l += p3_l_in;
+        tmp1_l += p2_l_in;
+        tmp1_l += p1_l_in;
+        tmp1_l += p0_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        p6 = __msa_bmnz_v(p6, (v16u8) r_out, flat2);
+        ST_UB(p6, src);
+        src += pitch;
+
+        /* p5 */
+        q1_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q1);
+        tmp0_r = p5_r_in - p6_r_in;
+        tmp0_r += q1_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        q1_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q1);
+        tmp0_l = p5_l_in - p6_l_in;
+        tmp0_l += q1_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        p5 = __msa_bmnz_v(p5, (v16u8) r_out, flat2);
+        ST_UB(p5, src);
+        src += pitch;
+
+        /* p4 */
+        q2_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q2);
+        tmp0_r = p4_r_in - p5_r_in;
+        tmp0_r += q2_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = (v8i16) __msa_srari_h((v8i16) tmp1_r, 4);
+
+        q2_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q2);
+        tmp0_l = p4_l_in - p5_l_in;
+        tmp0_l += q2_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        p4 = __msa_bmnz_v(p4, (v16u8) r_out, flat2);
+        ST_UB(p4, src);
+        src += pitch;
+
+        /* p3 */
+        q3_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q3);
+        tmp0_r = p3_r_in - p4_r_in;
+        tmp0_r += q3_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        q3_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q3);
+        tmp0_l = p3_l_in - p4_l_in;
+        tmp0_l += q3_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        p3 = __msa_bmnz_v(p3, (v16u8) r_out, flat2);
+        ST_UB(p3, src);
+        src += pitch;
+
+        /* p2 */
+        q4_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q4);
+        filter8 = LD_UB(filter48);
+        tmp0_r = p2_r_in - p3_r_in;
+        tmp0_r += q4_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        q4_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q4);
+        tmp0_l = p2_l_in - p3_l_in;
+        tmp0_l += q4_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += pitch;
+
+        /* p1 */
+        q5_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q5);
+        filter8 = LD_UB(filter48 + 16);
+        tmp0_r = p1_r_in - p2_r_in;
+        tmp0_r += q5_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        q5_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q5);
+        tmp0_l = p1_l_in - p2_l_in;
+        tmp0_l += q5_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += pitch;
+
+        /* p0 */
+        q6_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q6);
+        filter8 = LD_UB(filter48 + 32);
+        tmp0_r = p0_r_in - p1_r_in;
+        tmp0_r += q6_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        q6_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q6);
+        tmp0_l = p0_l_in - p1_l_in;
+        tmp0_l += q6_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += pitch;
+
+        /* q0 */
+        q7_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q7);
+        filter8 = LD_UB(filter48 + 48);
+        tmp0_r = q7_r_in - p0_r_in;
+        tmp0_r += q0_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        q7_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q7);
+        tmp0_l = q7_l_in - p0_l_in;
+        tmp0_l += q0_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += pitch;
+
+        /* q1 */
+        filter8 = LD_UB(filter48 + 64);
+        tmp0_r = q7_r_in - q0_r_in;
+        tmp0_r += q1_r_in;
+        tmp0_r -= p6_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        tmp0_l = q7_l_in - q0_l_in;
+        tmp0_l += q1_l_in;
+        tmp0_l -= p6_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += pitch;
+
+        /* q2 */
+        filter8 = LD_UB(filter48 + 80);
+        tmp0_r = q7_r_in - q1_r_in;
+        tmp0_r += q2_r_in;
+        tmp0_r -= p5_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        tmp0_l = q7_l_in - q1_l_in;
+        tmp0_l += q2_l_in;
+        tmp0_l -= p5_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += pitch;
+
+        /* q3 */
+        tmp0_r = q7_r_in - q2_r_in;
+        tmp0_r += q3_r_in;
+        tmp0_r -= p4_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        tmp0_l = q7_l_in - q2_l_in;
+        tmp0_l += q3_l_in;
+        tmp0_l -= p4_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        q3 = __msa_bmnz_v(q3, (v16u8) r_out, flat2);
+        ST_UB(q3, src);
+        src += pitch;
+
+        /* q4 */
+        tmp0_r = q7_r_in - q3_r_in;
+        tmp0_r += q4_r_in;
+        tmp0_r -= p3_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        tmp0_l = q7_l_in - q3_l_in;
+        tmp0_l += q4_l_in;
+        tmp0_l -= p3_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        q4 = __msa_bmnz_v(q4, (v16u8) r_out, flat2);
+        ST_UB(q4, src);
+        src += pitch;
+
+        /* q5 */
+        tmp0_r = q7_r_in - q4_r_in;
+        tmp0_r += q5_r_in;
+        tmp0_r -= p2_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        tmp0_l = q7_l_in - q4_l_in;
+        tmp0_l += q5_l_in;
+        tmp0_l -= p2_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        q5 = __msa_bmnz_v(q5, (v16u8) r_out, flat2);
+        ST_UB(q5, src);
+        src += pitch;
+
+        /* q6 */
+        tmp0_r = q7_r_in - q5_r_in;
+        tmp0_r += q6_r_in;
+        tmp0_r -= p1_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        tmp0_l = q7_l_in - q5_l_in;
+        tmp0_l += q6_l_in;
+        tmp0_l -= p1_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        q6 = __msa_bmnz_v(q6, (v16u8) r_out, flat2);
+        ST_UB(q6, src);
+    }
+}
+
+void ff_loop_filter_v_16_16_msa(uint8_t *src, ptrdiff_t pitch,
+                                int32_t b_limit_ptr,
+                                int32_t limit_ptr,
+                                int32_t thresh_ptr)
+{
+    uint8_t filter48[16 * 8] ALLOC_ALIGNED(ALIGNMENT);
+    uint8_t early_exit = 0;
+
+    early_exit = vp9_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0],
+                                          b_limit_ptr, limit_ptr, thresh_ptr);
+
+    if (0 == early_exit) {
+        vp9_hz_lpf_t16_16w(src, pitch, filter48);
+    }
+}
+
+void ff_loop_filter_v_16_8_msa(uint8_t *src, ptrdiff_t pitch,
+                               int32_t b_limit_ptr,
+                               int32_t limit_ptr,
+                               int32_t thresh_ptr)
+{
+    uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
+    uint64_t dword0, dword1;
+    v16u8 flat2, mask, hev, flat, thresh, b_limit, limit;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p7, p6, p5, p4, q4, q5, q6, q7;
+    v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+    v16u8 p0_filter16, p1_filter16;
+    v8i16 p2_filter8, p1_filter8, p0_filter8;
+    v8i16 q0_filter8, q1_filter8, q2_filter8;
+    v8u16 p7_r, p6_r, p5_r, p4_r, q7_r, q6_r, q5_r, q4_r;
+    v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
+    v16i8 zero = { 0 };
+    v8u16 tmp0, tmp1, tmp2;
+
+    /* load vector elements */
+    LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    flat = (v16u8) __msa_ilvr_d((v2i64) zero, (v2i64) flat);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        p1_d = __msa_copy_u_d((v2i64) p1_out, 0);
+        p0_d = __msa_copy_u_d((v2i64) p0_out, 0);
+        q0_d = __msa_copy_u_d((v2i64) q0_out, 0);
+        q1_d = __msa_copy_u_d((v2i64) q1_out, 0);
+        SD4(p1_d, p0_d, q0_d, q1_d, src - 2 * pitch, pitch);
+    } else {
+        /* convert 8 bit input data into 16 bit */
+        ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero,
+                   q1, zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r,
+                   q1_r, q2_r, q3_r);
+        VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r,
+                    p2_filter8, p1_filter8, p0_filter8, q0_filter8,
+                    q1_filter8, q2_filter8);
+
+        /* convert 16 bit output data into 8 bit */
+        PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8,
+                    zero, q0_filter8, p2_filter8, p1_filter8, p0_filter8,
+                    q0_filter8);
+        PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8,
+                    q2_filter8);
+
+        /* store pixel values */
+        p2_out = __msa_bmnz_v(p2, (v16u8) p2_filter8, flat);
+        p1_out = __msa_bmnz_v(p1_out, (v16u8) p1_filter8, flat);
+        p0_out = __msa_bmnz_v(p0_out, (v16u8) p0_filter8, flat);
+        q0_out = __msa_bmnz_v(q0_out, (v16u8) q0_filter8, flat);
+        q1_out = __msa_bmnz_v(q1_out, (v16u8) q1_filter8, flat);
+        q2_out = __msa_bmnz_v(q2, (v16u8) q2_filter8, flat);
+
+        /* load 16 vector elements */
+        LD_UB4((src - 8 * pitch), pitch, p7, p6, p5, p4);
+        LD_UB4(src + (4 * pitch), pitch, q4, q5, q6, q7);
+
+        VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+        /* if flat2 is zero for all pixels, then no need to calculate other filter */
+        if (__msa_test_bz_v(flat2)) {
+            p2_d = __msa_copy_u_d((v2i64) p2_out, 0);
+            p1_d = __msa_copy_u_d((v2i64) p1_out, 0);
+            p0_d = __msa_copy_u_d((v2i64) p0_out, 0);
+            q0_d = __msa_copy_u_d((v2i64) q0_out, 0);
+            q1_d = __msa_copy_u_d((v2i64) q1_out, 0);
+            q2_d = __msa_copy_u_d((v2i64) q2_out, 0);
+
+            SD4(p2_d, p1_d, p0_d, q0_d, src - 3 * pitch, pitch);
+            SD(q1_d, src + pitch);
+            SD(q2_d, src + 2 * pitch);
+        } else {
+            /* LSB(right) 8 pixel operation */
+            ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, q4,
+                       zero, q5, zero, q6, zero, q7, p7_r, p6_r, p5_r, p4_r,
+                       q4_r, q5_r, q6_r, q7_r);
+
+            tmp0 = p7_r << 3;
+            tmp0 -= p7_r;
+            tmp0 += p6_r;
+            tmp0 += q0_r;
+
+            src -= 7 * pitch;
+
+            /* calculation of p6 and p5 */
+            tmp1 = p6_r + p5_r + p4_r + p3_r;
+            tmp1 += (p2_r + p1_r + p0_r);
+            tmp1 += tmp0;
+            p0_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            tmp0 = p5_r - p6_r + q1_r - p7_r;
+            tmp1 += tmp0;
+            p1_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16,
+                        p0_filter16, p1_filter16);
+            p0_filter16 = __msa_bmnz_v(p6, p0_filter16, flat2);
+            p1_filter16 = __msa_bmnz_v(p5, p1_filter16, flat2);
+            dword0 = __msa_copy_u_d((v2i64) p0_filter16, 0);
+            dword1 = __msa_copy_u_d((v2i64) p1_filter16, 0);
+            SD(dword0, src);
+            src += pitch;
+            SD(dword1, src);
+            src += pitch;
+
+            /* calculation of p4 and p3 */
+            tmp0 = p4_r - p5_r + q2_r - p7_r;
+            tmp2 = p3_r - p4_r + q3_r - p7_r;
+            tmp1 += tmp0;
+            p0_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            tmp1 += tmp2;
+            p1_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16,
+                        p0_filter16, p1_filter16);
+            p0_filter16 = __msa_bmnz_v(p4, p0_filter16, flat2);
+            p1_filter16 = __msa_bmnz_v(p3, p1_filter16, flat2);
+            dword0 = __msa_copy_u_d((v2i64) p0_filter16, 0);
+            dword1 = __msa_copy_u_d((v2i64) p1_filter16, 0);
+            SD(dword0, src);
+            src += pitch;
+            SD(dword1, src);
+            src += pitch;
+
+            /* calculation of p2 and p1 */
+            tmp0 = p2_r - p3_r + q4_r - p7_r;
+            tmp2 = p1_r - p2_r + q5_r - p7_r;
+            tmp1 += tmp0;
+            p0_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            tmp1 += tmp2;
+            p1_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16,
+                        p0_filter16, p1_filter16);
+            p0_filter16 = __msa_bmnz_v(p2_out, p0_filter16, flat2);
+            p1_filter16 = __msa_bmnz_v(p1_out, p1_filter16, flat2);
+            dword0 = __msa_copy_u_d((v2i64) p0_filter16, 0);
+            dword1 = __msa_copy_u_d((v2i64) p1_filter16, 0);
+            SD(dword0, src);
+            src += pitch;
+            SD(dword1, src);
+            src += pitch;
+
+            /* calculation of p0 and q0 */
+            tmp0 = (p0_r - p1_r) + (q6_r - p7_r);
+            tmp2 = (q7_r - p0_r) + (q0_r - p7_r);
+            tmp1 += tmp0;
+            p0_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            tmp1 += tmp2;
+            p1_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16,
+                        p0_filter16, p1_filter16);
+            p0_filter16 = __msa_bmnz_v(p0_out, p0_filter16, flat2);
+            p1_filter16 = __msa_bmnz_v(q0_out, p1_filter16, flat2);
+            dword0 = __msa_copy_u_d((v2i64) p0_filter16, 0);
+            dword1 = __msa_copy_u_d((v2i64) p1_filter16, 0);
+            SD(dword0, src);
+            src += pitch;
+            SD(dword1, src);
+            src += pitch;
+
+            /* calculation of q1 and q2 */
+            tmp0 = q7_r - q0_r + q1_r - p6_r;
+            tmp2 = q7_r - q1_r + q2_r - p5_r;
+            tmp1 += tmp0;
+            p0_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            tmp1 += tmp2;
+            p1_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16,
+                        p0_filter16, p1_filter16);
+            p0_filter16 = __msa_bmnz_v(q1_out, p0_filter16, flat2);
+            p1_filter16 = __msa_bmnz_v(q2_out, p1_filter16, flat2);
+            dword0 = __msa_copy_u_d((v2i64) p0_filter16, 0);
+            dword1 = __msa_copy_u_d((v2i64) p1_filter16, 0);
+            SD(dword0, src);
+            src += pitch;
+            SD(dword1, src);
+            src += pitch;
+
+            /* calculation of q3 and q4 */
+            tmp0 = (q7_r - q2_r) + (q3_r - p4_r);
+            tmp2 = (q7_r - q3_r) + (q4_r - p3_r);
+            tmp1 += tmp0;
+            p0_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            tmp1 += tmp2;
+            p1_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16,
+                        p0_filter16, p1_filter16);
+            p0_filter16 = __msa_bmnz_v(q3, p0_filter16, flat2);
+            p1_filter16 = __msa_bmnz_v(q4, p1_filter16, flat2);
+            dword0 = __msa_copy_u_d((v2i64) p0_filter16, 0);
+            dword1 = __msa_copy_u_d((v2i64) p1_filter16, 0);
+            SD(dword0, src);
+            src += pitch;
+            SD(dword1, src);
+            src += pitch;
+
+            /* calculation of q5 and q6 */
+            tmp0 = (q7_r - q4_r) + (q5_r - p2_r);
+            tmp2 = (q7_r - q5_r) + (q6_r - p1_r);
+            tmp1 += tmp0;
+            p0_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            tmp1 += tmp2;
+            p1_filter16 = (v16u8) __msa_srari_h((v8i16) tmp1, 4);
+            PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16,
+                        p0_filter16, p1_filter16);
+            p0_filter16 = __msa_bmnz_v(q5, p0_filter16, flat2);
+            p1_filter16 = __msa_bmnz_v(q6, p1_filter16, flat2);
+            dword0 = __msa_copy_u_d((v2i64) p0_filter16, 0);
+            dword1 = __msa_copy_u_d((v2i64) p1_filter16, 0);
+            SD(dword0, src);
+            src += pitch;
+            SD(dword1, src);
+        }
+    }
+}
+
+void ff_loop_filter_h_4_8_msa(uint8_t *src, ptrdiff_t pitch,
+                              int32_t b_limit_ptr,
+                              int32_t limit_ptr,
+                              int32_t thresh_ptr)
+{
+    v16u8 mask, hev, flat, limit, thresh, b_limit;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v8i16 vec0, vec1, vec2, vec3;
+
+    LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+
+    TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3,
+                       p3, p2, p1, p0, q0, q1, q2, q3);
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+    ILVR_B2_SH(p0, p1, q1, q0, vec0, vec1);
+    ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+
+    src -= 2;
+    ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+    src += 4 * pitch;
+    ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+}
+
+void ff_loop_filter_h_44_16_msa(uint8_t *src, ptrdiff_t pitch,
+                                int32_t b_limit_ptr,
+                                int32_t limit_ptr,
+                                int32_t thresh_ptr)
+{
+    v16u8 mask, hev, flat;
+    v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
+    v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
+    v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+    LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+    LD_UB8(src - 4 + (8 * pitch), pitch,
+           row8, row9, row10, row11, row12, row13, row14, row15);
+
+    TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+                        row8, row9, row10, row11, row12, row13, row14, row15,
+                        p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh0 = (v16u8) __msa_fill_b(thresh_ptr);
+    thresh1 = (v16u8) __msa_fill_b(thresh_ptr >> 8);
+    thresh0 = (v16u8) __msa_ilvr_d((v2i64) thresh1, (v2i64) thresh0);
+
+    b_limit0 = (v16u8) __msa_fill_b(b_limit_ptr);
+    b_limit1 = (v16u8) __msa_fill_b(b_limit_ptr >> 8);
+    b_limit0 = (v16u8) __msa_ilvr_d((v2i64) b_limit1, (v2i64) b_limit0);
+
+    limit0 = (v16u8) __msa_fill_b(limit_ptr);
+    limit1 = (v16u8) __msa_fill_b(limit_ptr >> 8);
+    limit0 = (v16u8) __msa_ilvr_d((v2i64) limit1, (v2i64) limit0);
+
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
+                 hev, mask, flat);
+    VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+    ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
+    ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
+    ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
+    ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5);
+
+    src -= 2;
+
+    ST4x8_UB(tmp2, tmp3, src, pitch);
+    src += (8 * pitch);
+    ST4x8_UB(tmp4, tmp5, src, pitch);
+}
+
+void ff_loop_filter_h_8_8_msa(uint8_t *src, ptrdiff_t pitch,
+                              int32_t b_limit_ptr,
+                              int32_t limit_ptr,
+                              int32_t thresh_ptr)
+{
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 p1_out, p0_out, q0_out, q1_out;
+    v16u8 flat, mask, hev, thresh, b_limit, limit;
+    v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+    v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r;
+    v8i16 q0_filt8_r, q1_filt8_r, q2_filt8_r;
+    v16u8 zero = { 0 };
+    v8i16 vec0, vec1, vec2, vec3, vec4;
+
+    /* load vector elements */
+    LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3,
+                       p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+
+    /* mask and hev */
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    /* flat4 */
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    /* filter4 */
+    VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    flat = (v16u8) __msa_ilvr_d((v2i64) zero, (v2i64) flat);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        /* Store 4 pixels p1-_q1 */
+        ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+
+        src -= 2;
+        ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+        src += 4 * pitch;
+        ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+    } else {
+        ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+                   zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+                   q3_r);
+        VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                    p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+        /* convert 16 bit output data into 8 bit */
+        PCKEV_B4_SH(p2_filt8_r, p2_filt8_r, p1_filt8_r, p1_filt8_r, p0_filt8_r,
+                    p0_filt8_r, q0_filt8_r, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+                    p0_filt8_r, q0_filt8_r);
+        PCKEV_B2_SH(q1_filt8_r, q1_filt8_r, q2_filt8_r, q2_filt8_r, q1_filt8_r,
+                    q2_filt8_r);
+
+        /* store pixel values */
+        p2 = __msa_bmnz_v(p2, (v16u8) p2_filt8_r, flat);
+        p1 = __msa_bmnz_v(p1_out, (v16u8) p1_filt8_r, flat);
+        p0 = __msa_bmnz_v(p0_out, (v16u8) p0_filt8_r, flat);
+        q0 = __msa_bmnz_v(q0_out, (v16u8) q0_filt8_r, flat);
+        q1 = __msa_bmnz_v(q1_out, (v16u8) q1_filt8_r, flat);
+        q2 = __msa_bmnz_v(q2, (v16u8) q2_filt8_r, flat);
+
+        /* Store 6 pixels p2-_q2 */
+        ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+        vec4 = (v8i16) __msa_ilvr_b((v16i8) q2, (v16i8) q1);
+
+        src -= 3;
+        ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec4, 0, src + 4, pitch);
+        src += (4 * pitch);
+        ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec4, 4, src + 4, pitch);
+    }
+}
+
+void ff_loop_filter_h_88_16_msa(uint8_t *src, ptrdiff_t pitch,
+                                int32_t b_limit_ptr,
+                                int32_t limit_ptr,
+                                int32_t thresh_ptr)
+{
+    uint8_t *temp_src;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 p1_out, p0_out, q0_out, q1_out;
+    v16u8 flat, mask, hev, thresh, b_limit, limit;
+    v16u8 row4, row5, row6, row7, row12, row13, row14, row15;
+    v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+    v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+    v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r;
+    v8i16 q0_filt8_r, q1_filt8_r, q2_filt8_r;
+    v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l;
+    v8i16 q0_filt8_l, q1_filt8_l, q2_filt8_l;
+    v16u8 zero = { 0 };
+    v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+
+    temp_src = src - 4;
+
+    LD_UB8(temp_src, pitch, p0, p1, p2, p3, row4, row5, row6, row7);
+    temp_src += (8 * pitch);
+    LD_UB8(temp_src, pitch, q3, q2, q1, q0, row12, row13, row14, row15);
+
+    /* transpose 16x8 matrix into 8x16 */
+    TRANSPOSE16x8_UB_UB(p0, p1, p2, p3, row4, row5, row6, row7,
+                        q3, q2, q1, q0, row12, row13, row14, row15,
+                        p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    vec0 = (v8i16) __msa_fill_b(thresh_ptr >> 8);
+    thresh = (v16u8) __msa_ilvr_d((v2i64) vec0, (v2i64) thresh);
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    vec0 = (v8i16) __msa_fill_b(b_limit_ptr >> 8);
+    b_limit = (v16u8) __msa_ilvr_d((v2i64) vec0, (v2i64) b_limit);
+
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+    vec0 = (v8i16) __msa_fill_b(limit_ptr >> 8);
+    limit = (v16u8) __msa_ilvr_d((v2i64) vec0, (v2i64) limit);
+
+    /* mask and hev */
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    /* flat4 */
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    /* filter4 */
+    VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+        ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec4, vec5);
+
+        src -= 2;
+        ST4x8_UB(vec2, vec3, src, pitch);
+        src += 8 * pitch;
+        ST4x8_UB(vec4, vec5, src, pitch);
+    } else {
+        ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+                   zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+                   q3_r);
+        VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                    p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+        ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l,
+                   p0_l);
+        ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l,
+                   q3_l);
+
+        /* filter8 */
+        VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+                    p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+        /* convert 16 bit output data into 8 bit */
+        PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+                    p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+                    p0_filt8_r, q0_filt8_r);
+        PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+                    q2_filt8_r);
+
+        /* store pixel values */
+        p2 = __msa_bmnz_v(p2, (v16u8) p2_filt8_r, flat);
+        p1 = __msa_bmnz_v(p1_out, (v16u8) p1_filt8_r, flat);
+        p0 = __msa_bmnz_v(p0_out, (v16u8) p0_filt8_r, flat);
+        q0 = __msa_bmnz_v(q0_out, (v16u8) q0_filt8_r, flat);
+        q1 = __msa_bmnz_v(q1_out, (v16u8) q1_filt8_r, flat);
+        q2 = __msa_bmnz_v(q2, (v16u8) q2_filt8_r, flat);
+
+        ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+        ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec6, vec7);
+        ILVRL_B2_SH(q2, q1, vec2, vec5);
+
+        src -= 3;
+        ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec2, 0, src + 4, pitch);
+        src += (4 * pitch);
+        ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec2, 4, src + 4, pitch);
+        src += (4 * pitch);
+        ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec5, 0, src + 4, pitch);
+        src += (4 * pitch);
+        ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec5, 4, src + 4, pitch);
+    }
+}
+
+void ff_loop_filter_h_84_16_msa(uint8_t *src, ptrdiff_t pitch,
+                                int32_t b_limit_ptr,
+                                int32_t limit_ptr,
+                                int32_t thresh_ptr)
+{
+    uint8_t *temp_src;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 p1_out, p0_out, q0_out, q1_out;
+    v16u8 flat, mask, hev, thresh, b_limit, limit;
+    v16u8 row4, row5, row6, row7, row12, row13, row14, row15;
+    v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+    v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r;
+    v8i16 q0_filt8_r, q1_filt8_r, q2_filt8_r;
+    v16u8 zero = { 0 };
+    v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+
+    temp_src = src - 4;
+
+    LD_UB8(temp_src, pitch, p0, p1, p2, p3, row4, row5, row6, row7);
+    temp_src += (8 * pitch);
+    LD_UB8(temp_src, pitch, q3, q2, q1, q0, row12, row13, row14, row15);
+
+    /* transpose 16x8 matrix into 8x16 */
+    TRANSPOSE16x8_UB_UB(p0, p1, p2, p3, row4, row5, row6, row7,
+                        q3, q2, q1, q0, row12, row13, row14, row15,
+                        p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    vec0 = (v8i16) __msa_fill_b(thresh_ptr >> 8);
+    thresh = (v16u8) __msa_ilvr_d((v2i64) vec0, (v2i64) thresh);
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    vec0 = (v8i16) __msa_fill_b(b_limit_ptr >> 8);
+    b_limit = (v16u8) __msa_ilvr_d((v2i64) vec0, (v2i64) b_limit);
+
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+    vec0 = (v8i16) __msa_fill_b(limit_ptr >> 8);
+    limit = (v16u8) __msa_ilvr_d((v2i64) vec0, (v2i64) limit);
+
+    /* mask and hev */
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    /* flat4 */
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    /* filter4 */
+    VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    flat = (v16u8) __msa_ilvr_d((v2i64) zero, (v2i64) flat);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+        ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec4, vec5);
+
+        src -= 2;
+        ST4x8_UB(vec2, vec3, src, pitch);
+        src += 8 * pitch;
+        ST4x8_UB(vec4, vec5, src, pitch);
+    } else {
+        ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+                   zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+                   q3_r);
+        VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                    p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+        /* convert 16 bit output data into 8 bit */
+        PCKEV_B4_SH(p2_filt8_r, p2_filt8_r, p1_filt8_r, p1_filt8_r,
+                    p0_filt8_r, p0_filt8_r, q0_filt8_r, q0_filt8_r,
+                    p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r);
+        PCKEV_B2_SH(q1_filt8_r, q1_filt8_r, q2_filt8_r, q2_filt8_r,
+                    q1_filt8_r, q2_filt8_r);
+
+        /* store pixel values */
+        p2 = __msa_bmnz_v(p2, (v16u8) p2_filt8_r, flat);
+        p1 = __msa_bmnz_v(p1_out, (v16u8) p1_filt8_r, flat);
+        p0 = __msa_bmnz_v(p0_out, (v16u8) p0_filt8_r, flat);
+        q0 = __msa_bmnz_v(q0_out, (v16u8) q0_filt8_r, flat);
+        q1 = __msa_bmnz_v(q1_out, (v16u8) q1_filt8_r, flat);
+        q2 = __msa_bmnz_v(q2, (v16u8) q2_filt8_r, flat);
+
+        ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+        ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec6, vec7);
+        ILVRL_B2_SH(q2, q1, vec2, vec5);
+
+        src -= 3;
+        ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec2, 0, src + 4, pitch);
+        src += (4 * pitch);
+        ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec2, 4, src + 4, pitch);
+        src += (4 * pitch);
+        ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec5, 0, src + 4, pitch);
+        src += (4 * pitch);
+        ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec5, 4, src + 4, pitch);
+    }
+}
+
+void ff_loop_filter_h_48_16_msa(uint8_t *src, ptrdiff_t pitch,
+                                int32_t b_limit_ptr,
+                                int32_t limit_ptr,
+                                int32_t thresh_ptr)
+{
+    uint8_t *temp_src;
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 p1_out, p0_out, q0_out, q1_out;
+    v16u8 flat, mask, hev, thresh, b_limit, limit;
+    v16u8 row4, row5, row6, row7, row12, row13, row14, row15;
+    v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+    v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l;
+    v8i16 q0_filt8_l, q1_filt8_l, q2_filt8_l;
+    v16u8 zero = { 0 };
+    v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+
+    temp_src = src - 4;
+
+    LD_UB8(temp_src, pitch, p0, p1, p2, p3, row4, row5, row6, row7);
+    temp_src += (8 * pitch);
+    LD_UB8(temp_src, pitch, q3, q2, q1, q0, row12, row13, row14, row15);
+
+    /* transpose 16x8 matrix into 8x16 */
+    TRANSPOSE16x8_UB_UB(p0, p1, p2, p3, row4, row5, row6, row7,
+                        q3, q2, q1, q0, row12, row13, row14, row15,
+                        p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    vec0 = (v8i16) __msa_fill_b(thresh_ptr >> 8);
+    thresh = (v16u8) __msa_ilvr_d((v2i64) vec0, (v2i64) thresh);
+
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    vec0 = (v8i16) __msa_fill_b(b_limit_ptr >> 8);
+    b_limit = (v16u8) __msa_ilvr_d((v2i64) vec0, (v2i64) b_limit);
+
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+    vec0 = (v8i16) __msa_fill_b(limit_ptr >> 8);
+    limit = (v16u8) __msa_ilvr_d((v2i64) vec0, (v2i64) limit);
+
+    /* mask and hev */
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    /* flat4 */
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    /* filter4 */
+    VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    flat = (v16u8) __msa_insve_d((v2i64) flat, 0, (v2i64) zero);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+        ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec4, vec5);
+
+        src -= 2;
+        ST4x8_UB(vec2, vec3, src, pitch);
+        src += 8 * pitch;
+        ST4x8_UB(vec4, vec5, src, pitch);
+    } else {
+        ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l,
+                   p0_l);
+        ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l,
+                   q3_l);
+
+        VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+                    p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+        /* convert 16 bit output data into 8 bit */
+        PCKEV_B4_SH(p2_filt8_l, p2_filt8_l, p1_filt8_l, p1_filt8_l,
+                    p0_filt8_l, p0_filt8_l, q0_filt8_l, q0_filt8_l,
+                    p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l);
+        PCKEV_B2_SH(q1_filt8_l, q1_filt8_l, q2_filt8_l, q2_filt8_l,
+                    q1_filt8_l, q2_filt8_l);
+
+        /* store pixel values */
+        p2 = __msa_bmnz_v(p2, (v16u8) p2_filt8_l, flat);
+        p1 = __msa_bmnz_v(p1_out, (v16u8) p1_filt8_l, flat);
+        p0 = __msa_bmnz_v(p0_out, (v16u8) p0_filt8_l, flat);
+        q0 = __msa_bmnz_v(q0_out, (v16u8) q0_filt8_l, flat);
+        q1 = __msa_bmnz_v(q1_out, (v16u8) q1_filt8_l, flat);
+        q2 = __msa_bmnz_v(q2, (v16u8) q2_filt8_l, flat);
+
+        ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+        ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec6, vec7);
+        ILVRL_B2_SH(q2, q1, vec2, vec5);
+
+        src -= 3;
+        ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec2, 0, src + 4, pitch);
+        src += (4 * pitch);
+        ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec2, 4, src + 4, pitch);
+        src += (4 * pitch);
+        ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec5, 0, src + 4, pitch);
+        src += (4 * pitch);
+        ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src, pitch);
+        ST2x4_UB(vec5, 4, src + 4, pitch);
+    }
+}
+
+static void vp9_transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
+                                       uint8_t *output, int32_t out_pitch)
+{
+    v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org;
+    v16i8 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+
+    LD_UB8(input, in_pitch,
+           p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org);
+    /* 8x8 transpose */
+    TRANSPOSE8x8_UB_UB(p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org,
+                       p0_org, p7, p6, p5, p4, p3, p2, p1, p0);
+    /* 8x8 transpose */
+    ILVL_B4_SB(p5_org, p7_org, p4_org, p6_org, p1_org, p3_org, p0_org, p2_org,
+               tmp0, tmp1, tmp2, tmp3);
+    ILVR_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp4, tmp6);
+    ILVL_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp5, tmp7);
+    ILVR_W2_UB(tmp6, tmp4, tmp7, tmp5, q0, q4);
+    ILVL_W2_UB(tmp6, tmp4, tmp7, tmp5, q2, q6);
+    SLDI_B4_0_UB(q0, q2, q4, q6, q1, q3, q5, q7, 8);
+
+    ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch);
+    output += (8 * out_pitch);
+    ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
+}
+
+static void vp9_transpose_8x16_to_16x8(uint8_t *input, int32_t in_pitch,
+                                       uint8_t *output, int32_t out_pitch)
+{
+    v16u8 p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o;
+    v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+
+    LD_UB8(input, in_pitch, p7, p6, p5, p4, p3, p2, p1, p0);
+    LD_UB8(input + (8 * in_pitch), in_pitch, q0, q1, q2, q3, q4, q5, q6, q7);
+    TRANSPOSE16x8_UB_UB(p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5,
+                        q6, q7, p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o);
+    ST_UB8(p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o, output, out_pitch);
+}
+
+static void vp9_transpose_16x16(uint8_t *input, int32_t in_pitch,
+                                uint8_t *output, int32_t out_pitch)
+{
+    v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
+    v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
+    v8i16 tmp0, tmp1, tmp4, tmp5, tmp6, tmp7;
+    v4i32 tmp2, tmp3;
+    v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+
+    LD_UB8(input, in_pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+    input += (8 * in_pitch);
+    LD_UB8(input, in_pitch,
+           row8, row9, row10, row11, row12, row13, row14, row15);
+
+    TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+                        row8, row9, row10, row11, row12, row13, row14, row15,
+                        p7, p6, p5, p4, p3, p2, p1, p0);
+
+    /* transpose 16x8 matrix into 8x16 */
+    /* total 8 intermediate register and 32 instructions */
+    q7 = (v16u8) __msa_ilvod_d((v2i64) row8, (v2i64) row0);
+    q6 = (v16u8) __msa_ilvod_d((v2i64) row9, (v2i64) row1);
+    q5 = (v16u8) __msa_ilvod_d((v2i64) row10, (v2i64) row2);
+    q4 = (v16u8) __msa_ilvod_d((v2i64) row11, (v2i64) row3);
+    q3 = (v16u8) __msa_ilvod_d((v2i64) row12, (v2i64) row4);
+    q2 = (v16u8) __msa_ilvod_d((v2i64) row13, (v2i64) row5);
+    q1 = (v16u8) __msa_ilvod_d((v2i64) row14, (v2i64) row6);
+    q0 = (v16u8) __msa_ilvod_d((v2i64) row15, (v2i64) row7);
+
+    ILVEV_B2_SH(q7, q6, q5, q4, tmp0, tmp1);
+    tmp4 = (v8i16) __msa_ilvod_b((v16i8) q6, (v16i8) q7);
+    tmp5 = (v8i16) __msa_ilvod_b((v16i8) q4, (v16i8) q5);
+
+    ILVEV_B2_UB(q3, q2, q1, q0, q5, q7);
+    tmp6 = (v8i16) __msa_ilvod_b((v16i8) q2, (v16i8) q3);
+    tmp7 = (v8i16) __msa_ilvod_b((v16i8) q0, (v16i8) q1);
+
+    ILVEV_H2_SW(tmp0, tmp1, q5, q7, tmp2, tmp3);
+    q0 = (v16u8) __msa_ilvev_w(tmp3, tmp2);
+    q4 = (v16u8) __msa_ilvod_w(tmp3, tmp2);
+
+    tmp2 = (v4i32) __msa_ilvod_h(tmp1, tmp0);
+    tmp3 = (v4i32) __msa_ilvod_h((v8i16) q7, (v8i16) q5);
+    q2 = (v16u8) __msa_ilvev_w(tmp3, tmp2);
+    q6 = (v16u8) __msa_ilvod_w(tmp3, tmp2);
+
+    ILVEV_H2_SW(tmp4, tmp5, tmp6, tmp7, tmp2, tmp3);
+    q1 = (v16u8) __msa_ilvev_w(tmp3, tmp2);
+    q5 = (v16u8) __msa_ilvod_w(tmp3, tmp2);
+
+    tmp2 = (v4i32) __msa_ilvod_h(tmp5, tmp4);
+    tmp3 = (v4i32) __msa_ilvod_h(tmp7, tmp6);
+    q3 = (v16u8) __msa_ilvev_w(tmp3, tmp2);
+    q7 = (v16u8) __msa_ilvod_w(tmp3, tmp2);
+
+    ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch);
+    output += (8 * out_pitch);
+    ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
+}
+
+static int32_t vp9_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
+                                       uint8_t *src_org, int32_t pitch_org,
+                                       int32_t b_limit_ptr,
+                                       int32_t limit_ptr,
+                                       int32_t thresh_ptr)
+{
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+    v16u8 flat, mask, hev, thresh, b_limit, limit;
+    v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+    v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r;
+    v8i16 q0_filt8_r, q1_filt8_r, q2_filt8_r;
+    v16i8 zero = { 0 };
+    v8i16 vec0, vec1, vec2, vec3;
+
+    /* load vector elements */
+    LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+
+    /* mask and hev */
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    /* flat4 */
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    /* filter4 */
+    VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    flat = (v16u8) __msa_ilvr_d((v2i64) zero, (v2i64) flat);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+        ST4x8_UB(vec2, vec3, (src_org - 2), pitch_org);
+        return 1;
+    } else {
+        ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+                   zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+                   q3_r);
+        VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                    p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+        /* convert 16 bit output data into 8 bit */
+        p2_r = (v8u16) __msa_pckev_b((v16i8) p2_filt8_r, (v16i8) p2_filt8_r);
+        p1_r = (v8u16) __msa_pckev_b((v16i8) p1_filt8_r, (v16i8) p1_filt8_r);
+        p0_r = (v8u16) __msa_pckev_b((v16i8) p0_filt8_r, (v16i8) p0_filt8_r);
+        q0_r = (v8u16) __msa_pckev_b((v16i8) q0_filt8_r, (v16i8) q0_filt8_r);
+        q1_r = (v8u16) __msa_pckev_b((v16i8) q1_filt8_r, (v16i8) q1_filt8_r);
+        q2_r = (v8u16) __msa_pckev_b((v16i8) q2_filt8_r, (v16i8) q2_filt8_r);
+
+        /* store pixel values */
+        p2_out = __msa_bmnz_v(p2, (v16u8) p2_r, flat);
+        p1_out = __msa_bmnz_v(p1_out, (v16u8) p1_r, flat);
+        p0_out = __msa_bmnz_v(p0_out, (v16u8) p0_r, flat);
+        q0_out = __msa_bmnz_v(q0_out, (v16u8) q0_r, flat);
+        q1_out = __msa_bmnz_v(q1_out, (v16u8) q1_r, flat);
+        q2_out = __msa_bmnz_v(q2, (v16u8) q2_r, flat);
+
+        ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
+        filter48 += (4 * 16);
+        ST_UB2(q1_out, q2_out, filter48, 16);
+        filter48 += (2 * 16);
+        ST_UB(flat, filter48);
+
+        return 0;
+    }
+}
+
+static int32_t vp9_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, ptrdiff_t pitch,
+                                 uint8_t *filter48)
+{
+    v16i8 zero = { 0 };
+    v16u8 filter8, flat, flat2;
+    v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+    v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in;
+    v8u16 p3_r_in, p2_r_in, p1_r_in, p0_r_in;
+    v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in;
+    v8u16 q3_r_in, q2_r_in, q1_r_in, q0_r_in;
+    v8u16 tmp0_r, tmp1_r;
+    v8i16 r_out;
+
+    flat = LD_UB(filter48 + 6 * 16);
+
+    LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
+    LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
+
+    VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+    /* if flat2 is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat2)) {
+        v8i16 vec0, vec1, vec2, vec3, vec4;
+
+        LD_UB4(filter48, 16, p2, p1, p0, q0);
+        LD_UB2(filter48 + 4 * 16, 16, q1, q2);
+
+        ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+        vec2 = (v8i16) __msa_ilvr_b((v16i8) q2, (v16i8) q1);
+
+        src_org -= 3;
+        ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch);
+        ST2x4_UB(vec2, 0, (src_org + 4), pitch);
+        src_org += (4 * pitch);
+        ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch);
+        ST2x4_UB(vec2, 4, (src_org + 4), pitch);
+
+        return 1;
+    } else {
+        src -= 7 * 16;
+
+        ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2,
+                   zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in,
+                   p3_r_in, p2_r_in, p1_r_in, p0_r_in);
+        q0_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q0);
+
+        tmp0_r = p7_r_in << 3;
+        tmp0_r -= p7_r_in;
+        tmp0_r += p6_r_in;
+        tmp0_r += q0_r_in;
+        tmp1_r = p6_r_in + p5_r_in;
+        tmp1_r += p4_r_in;
+        tmp1_r += p3_r_in;
+        tmp1_r += p2_r_in;
+        tmp1_r += p1_r_in;
+        tmp1_r += p0_r_in;
+        tmp1_r += tmp0_r;
+
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        p6 = __msa_bmnz_v(p6, (v16u8) r_out, flat2);
+        ST8x1_UB(p6, src);
+        src += 16;
+
+        /* p5 */
+        q1_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q1);
+        tmp0_r = p5_r_in - p6_r_in;
+        tmp0_r += q1_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        p5 = __msa_bmnz_v(p5, (v16u8) r_out, flat2);
+        ST8x1_UB(p5, src);
+        src += 16;
+
+        /* p4 */
+        q2_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q2);
+        tmp0_r = p4_r_in - p5_r_in;
+        tmp0_r += q2_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        p4 = __msa_bmnz_v(p4, (v16u8) r_out, flat2);
+        ST8x1_UB(p4, src);
+        src += 16;
+
+        /* p3 */
+        q3_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q3);
+        tmp0_r = p3_r_in - p4_r_in;
+        tmp0_r += q3_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        p3 = __msa_bmnz_v(p3, (v16u8) r_out, flat2);
+        ST8x1_UB(p3, src);
+        src += 16;
+
+        /* p2 */
+        q4_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q4);
+        filter8 = LD_UB(filter48);
+        tmp0_r = p2_r_in - p3_r_in;
+        tmp0_r += q4_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST8x1_UB(filter8, src);
+        src += 16;
+
+        /* p1 */
+        q5_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q5);
+        filter8 = LD_UB(filter48 + 16);
+        tmp0_r = p1_r_in - p2_r_in;
+        tmp0_r += q5_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST8x1_UB(filter8, src);
+        src += 16;
+
+        /* p0 */
+        q6_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q6);
+        filter8 = LD_UB(filter48 + 32);
+        tmp0_r = p0_r_in - p1_r_in;
+        tmp0_r += q6_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST8x1_UB(filter8, src);
+        src += 16;
+
+        /* q0 */
+        q7_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q7);
+        filter8 = LD_UB(filter48 + 48);
+        tmp0_r = q7_r_in - p0_r_in;
+        tmp0_r += q0_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST8x1_UB(filter8, src);
+        src += 16;
+
+        /* q1 */
+        filter8 = LD_UB(filter48 + 64);
+        tmp0_r = q7_r_in - q0_r_in;
+        tmp0_r += q1_r_in;
+        tmp0_r -= p6_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST8x1_UB(filter8, src);
+        src += 16;
+
+        /* q2 */
+        filter8 = LD_UB(filter48 + 80);
+        tmp0_r = q7_r_in - q1_r_in;
+        tmp0_r += q2_r_in;
+        tmp0_r -= p5_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST8x1_UB(filter8, src);
+        src += 16;
+
+        /* q3 */
+        tmp0_r = q7_r_in - q2_r_in;
+        tmp0_r += q3_r_in;
+        tmp0_r -= p4_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        q3 = __msa_bmnz_v(q3, (v16u8) r_out, flat2);
+        ST8x1_UB(q3, src);
+        src += 16;
+
+        /* q4 */
+        tmp0_r = q7_r_in - q3_r_in;
+        tmp0_r += q4_r_in;
+        tmp0_r -= p3_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        q4 = __msa_bmnz_v(q4, (v16u8) r_out, flat2);
+        ST8x1_UB(q4, src);
+        src += 16;
+
+        /* q5 */
+        tmp0_r = q7_r_in - q4_r_in;
+        tmp0_r += q5_r_in;
+        tmp0_r -= p2_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        q5 = __msa_bmnz_v(q5, (v16u8) r_out, flat2);
+        ST8x1_UB(q5, src);
+        src += 16;
+
+        /* q6 */
+        tmp0_r = q7_r_in - q5_r_in;
+        tmp0_r += q6_r_in;
+        tmp0_r -= p1_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) r_out, (v16i8) r_out);
+        q6 = __msa_bmnz_v(q6, (v16u8) r_out, flat2);
+        ST8x1_UB(q6, src);
+
+        return 0;
+    }
+}
+
+void ff_loop_filter_h_16_8_msa(uint8_t *src, ptrdiff_t pitch,
+                               int32_t b_limit_ptr,
+                               int32_t limit_ptr,
+                               int32_t thresh_ptr)
+{
+    uint8_t early_exit = 0;
+    uint8_t transposed_input[16 * 24] ALLOC_ALIGNED(ALIGNMENT);
+    uint8_t *filter48 = &transposed_input[16 * 16];
+
+    vp9_transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16);
+
+    early_exit = vp9_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8),
+                                         &filter48[0], src, pitch,
+                                         b_limit_ptr, limit_ptr, thresh_ptr);
+
+    if (0 == early_exit) {
+        early_exit = vp9_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,
+                                       &filter48[0]);
+
+        if (0 == early_exit) {
+            vp9_transpose_8x16_to_16x8(transposed_input, 16, src - 8, pitch);
+        }
+    }
+}
+
+static int32_t vp9_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
+                                        uint8_t *src_org, ptrdiff_t pitch,
+                                        int32_t b_limit_ptr,
+                                        int32_t limit_ptr,
+                                        int32_t thresh_ptr)
+{
+    v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+    v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+    v16u8 flat, mask, hev, thresh, b_limit, limit;
+    v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+    v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+    v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r;
+    v8i16 q0_filt8_r, q1_filt8_r, q2_filt8_r;
+    v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l;
+    v8i16 q0_filt8_l, q1_filt8_l, q2_filt8_l;
+    v16i8 zero = { 0 };
+    v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
+
+    /* load vector elements */
+    LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3);
+
+    thresh = (v16u8) __msa_fill_b(thresh_ptr);
+    b_limit = (v16u8) __msa_fill_b(b_limit_ptr);
+    limit = (v16u8) __msa_fill_b(limit_ptr);
+
+    /* mask and hev */
+    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+                 hev, mask, flat);
+    /* flat4 */
+    VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+    /* filter4 */
+    VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+                       q1_out);
+
+    /* if flat is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat)) {
+        ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+        ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec4, vec5);
+
+        src_org -= 2;
+        ST4x8_UB(vec2, vec3, src_org, pitch);
+        src_org += 8 * pitch;
+        ST4x8_UB(vec4, vec5, src_org, pitch);
+
+        return 1;
+    } else {
+        ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+                   zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+                   q3_r);
+        VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+                    p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+        ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l,
+                   p0_l);
+        ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l,
+                   q3_l);
+        VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+                    p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+        /* convert 16 bit output data into 8 bit */
+        PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+                    p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+                    p0_filt8_r, q0_filt8_r);
+        PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+                    q2_filt8_r);
+
+        /* store pixel values */
+        p2_out = __msa_bmnz_v(p2, (v16u8) p2_filt8_r, flat);
+        p1_out = __msa_bmnz_v(p1_out, (v16u8) p1_filt8_r, flat);
+        p0_out = __msa_bmnz_v(p0_out, (v16u8) p0_filt8_r, flat);
+        q0_out = __msa_bmnz_v(q0_out, (v16u8) q0_filt8_r, flat);
+        q1_out = __msa_bmnz_v(q1_out, (v16u8) q1_filt8_r, flat);
+        q2_out = __msa_bmnz_v(q2, (v16u8) q2_filt8_r, flat);
+
+        ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
+        filter48 += (4 * 16);
+        ST_UB2(q1_out, q2_out, filter48, 16);
+        filter48 += (2 * 16);
+        ST_UB(flat, filter48);
+
+        return 0;
+    }
+}
+
+static int32_t vp9_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, ptrdiff_t pitch,
+                                  uint8_t *filter48)
+{
+    v16u8 flat, flat2, filter8;
+    v16i8 zero = { 0 };
+    v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+    v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in;
+    v8u16 p3_r_in, p2_r_in, p1_r_in, p0_r_in;
+    v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in;
+    v8u16 q3_r_in, q2_r_in, q1_r_in, q0_r_in;
+    v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in;
+    v8u16 p3_l_in, p2_l_in, p1_l_in, p0_l_in;
+    v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in;
+    v8u16 q3_l_in, q2_l_in, q1_l_in, q0_l_in;
+    v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l;
+    v8i16 l_out, r_out;
+
+    flat = LD_UB(filter48 + 6 * 16);
+
+    LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
+    LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
+
+    VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+    /* if flat2 is zero for all pixels, then no need to calculate other filter */
+    if (__msa_test_bz_v(flat2)) {
+        v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+
+        LD_UB4(filter48, 16, p2, p1, p0, q0);
+        LD_UB2(filter48 + 4 * 16, 16, q1, q2);
+
+        ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+        ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
+        ILVRL_H2_SH(vec1, vec0, vec6, vec7);
+        ILVRL_B2_SH(q2, q1, vec2, vec5);
+
+        src_org -= 3;
+        ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch);
+        ST2x4_UB(vec2, 0, (src_org + 4), pitch);
+        src_org += (4 * pitch);
+        ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch);
+        ST2x4_UB(vec2, 4, (src_org + 4), pitch);
+        src_org += (4 * pitch);
+        ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src_org, pitch);
+        ST2x4_UB(vec5, 0, (src_org + 4), pitch);
+        src_org += (4 * pitch);
+        ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src_org, pitch);
+        ST2x4_UB(vec5, 4, (src_org + 4), pitch);
+
+        return 1;
+    } else {
+        src -= 7 * 16;
+
+        ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2,
+                   zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in,
+                   p3_r_in, p2_r_in, p1_r_in, p0_r_in);
+        q0_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q0);
+
+        tmp0_r = p7_r_in << 3;
+        tmp0_r -= p7_r_in;
+        tmp0_r += p6_r_in;
+        tmp0_r += q0_r_in;
+        tmp1_r = p6_r_in + p5_r_in;
+        tmp1_r += p4_r_in;
+        tmp1_r += p3_r_in;
+        tmp1_r += p2_r_in;
+        tmp1_r += p1_r_in;
+        tmp1_r += p0_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+
+        ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in,
+                   p5_l_in, p4_l_in);
+        ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in,
+                   p1_l_in, p0_l_in);
+        q0_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q0);
+
+        tmp0_l = p7_l_in << 3;
+        tmp0_l -= p7_l_in;
+        tmp0_l += p6_l_in;
+        tmp0_l += q0_l_in;
+        tmp1_l = p6_l_in + p5_l_in;
+        tmp1_l += p4_l_in;
+        tmp1_l += p3_l_in;
+        tmp1_l += p2_l_in;
+        tmp1_l += p1_l_in;
+        tmp1_l += p0_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        p6 = __msa_bmnz_v(p6, (v16u8) r_out, flat2);
+        ST_UB(p6, src);
+        src += 16;
+
+        /* p5 */
+        q1_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q1);
+        tmp0_r = p5_r_in - p6_r_in;
+        tmp0_r += q1_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        q1_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q1);
+        tmp0_l = p5_l_in - p6_l_in;
+        tmp0_l += q1_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        p5 = __msa_bmnz_v(p5, (v16u8) r_out, flat2);
+        ST_UB(p5, src);
+        src += 16;
+
+        /* p4 */
+        q2_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q2);
+        tmp0_r = p4_r_in - p5_r_in;
+        tmp0_r += q2_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        q2_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q2);
+        tmp0_l = p4_l_in - p5_l_in;
+        tmp0_l += q2_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        p4 = __msa_bmnz_v(p4, (v16u8) r_out, flat2);
+        ST_UB(p4, src);
+        src += 16;
+
+        /* p3 */
+        q3_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q3);
+        tmp0_r = p3_r_in - p4_r_in;
+        tmp0_r += q3_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        q3_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q3);
+        tmp0_l = p3_l_in - p4_l_in;
+        tmp0_l += q3_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        p3 = __msa_bmnz_v(p3, (v16u8) r_out, flat2);
+        ST_UB(p3, src);
+        src += 16;
+
+        /* p2 */
+        q4_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q4);
+        filter8 = LD_UB(filter48);
+        tmp0_r = p2_r_in - p3_r_in;
+        tmp0_r += q4_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        q4_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q4);
+        tmp0_l = p2_l_in - p3_l_in;
+        tmp0_l += q4_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += 16;
+
+        /* p1 */
+        q5_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q5);
+        filter8 = LD_UB(filter48 + 16);
+        tmp0_r = p1_r_in - p2_r_in;
+        tmp0_r += q5_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        q5_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q5);
+        tmp0_l = p1_l_in - p2_l_in;
+        tmp0_l += q5_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) (tmp1_l), 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += 16;
+
+        /* p0 */
+        q6_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q6);
+        filter8 = LD_UB(filter48 + 32);
+        tmp0_r = p0_r_in - p1_r_in;
+        tmp0_r += q6_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        q6_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q6);
+        tmp0_l = p0_l_in - p1_l_in;
+        tmp0_l += q6_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += 16;
+
+        /* q0 */
+        q7_r_in = (v8u16) __msa_ilvr_b(zero, (v16i8) q7);
+        filter8 = LD_UB(filter48 + 48);
+        tmp0_r = q7_r_in - p0_r_in;
+        tmp0_r += q0_r_in;
+        tmp0_r -= p7_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        q7_l_in = (v8u16) __msa_ilvl_b(zero, (v16i8) q7);
+        tmp0_l = q7_l_in - p0_l_in;
+        tmp0_l += q0_l_in;
+        tmp0_l -= p7_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += 16;
+
+        /* q1 */
+        filter8 = LD_UB(filter48 + 64);
+        tmp0_r = q7_r_in - q0_r_in;
+        tmp0_r += q1_r_in;
+        tmp0_r -= p6_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        tmp0_l = q7_l_in - q0_l_in;
+        tmp0_l += q1_l_in;
+        tmp0_l -= p6_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += 16;
+
+        /* q2 */
+        filter8 = LD_UB(filter48 + 80);
+        tmp0_r = q7_r_in - q1_r_in;
+        tmp0_r += q2_r_in;
+        tmp0_r -= p5_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        tmp0_l = q7_l_in - q1_l_in;
+        tmp0_l += q2_l_in;
+        tmp0_l -= p5_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        filter8 = __msa_bmnz_v(filter8, (v16u8) r_out, flat2);
+        ST_UB(filter8, src);
+        src += 16;
+
+        /* q3 */
+        tmp0_r = q7_r_in - q2_r_in;
+        tmp0_r += q3_r_in;
+        tmp0_r -= p4_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        tmp0_l = q7_l_in - q2_l_in;
+        tmp0_l += q3_l_in;
+        tmp0_l -= p4_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        q3 = __msa_bmnz_v(q3, (v16u8) r_out, flat2);
+        ST_UB(q3, src);
+        src += 16;
+
+        /* q4 */
+        tmp0_r = q7_r_in - q3_r_in;
+        tmp0_r += q4_r_in;
+        tmp0_r -= p3_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        tmp0_l = q7_l_in - q3_l_in;
+        tmp0_l += q4_l_in;
+        tmp0_l -= p3_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        q4 = __msa_bmnz_v(q4, (v16u8) r_out, flat2);
+        ST_UB(q4, src);
+        src += 16;
+
+        /* q5 */
+        tmp0_r = q7_r_in - q4_r_in;
+        tmp0_r += q5_r_in;
+        tmp0_r -= p2_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        tmp0_l = q7_l_in - q4_l_in;
+        tmp0_l += q5_l_in;
+        tmp0_l -= p2_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        q5 = __msa_bmnz_v(q5, (v16u8) r_out, flat2);
+        ST_UB(q5, src);
+        src += 16;
+
+        /* q6 */
+        tmp0_r = q7_r_in - q5_r_in;
+        tmp0_r += q6_r_in;
+        tmp0_r -= p1_r_in;
+        tmp1_r += tmp0_r;
+        r_out = __msa_srari_h((v8i16) tmp1_r, 4);
+        tmp0_l = q7_l_in - q5_l_in;
+        tmp0_l += q6_l_in;
+        tmp0_l -= p1_l_in;
+        tmp1_l += tmp0_l;
+        l_out = __msa_srari_h((v8i16) tmp1_l, 4);
+        r_out = (v8i16) __msa_pckev_b((v16i8) l_out, (v16i8) r_out);
+        q6 = __msa_bmnz_v(q6, (v16u8) r_out, flat2);
+        ST_UB(q6, src);
+
+        return 0;
+    }
+}
+
+void ff_loop_filter_h_16_16_msa(uint8_t *src, ptrdiff_t pitch,
+                                int32_t b_limit_ptr,
+                                int32_t limit_ptr,
+                                int32_t thresh_ptr)
+{
+    uint8_t early_exit = 0;
+    uint8_t transposed_input[16 * 24] ALLOC_ALIGNED(ALIGNMENT);
+    uint8_t *filter48 = &transposed_input[16 * 16];
+
+    vp9_transpose_16x16((src - 8), pitch, &transposed_input[0], 16);
+
+    early_exit = vp9_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8),
+                                          &filter48[0], src, pitch,
+                                          b_limit_ptr, limit_ptr, thresh_ptr);
+
+    if (0 == early_exit) {
+        early_exit = vp9_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,
+                                        &filter48[0]);
+
+        if (0 == early_exit) {
+            vp9_transpose_16x16(transposed_input, 16, (src - 8), pitch);
+        }
+    }
+}
diff --git a/libavcodec/mips/vp9_mc_msa.c b/libavcodec/mips/vp9_mc_msa.c
new file mode 100644
index 00000000..1671d973
--- /dev/null
+++ b/libavcodec/mips/vp9_mc_msa.c
@@ -0,0 +1,4510 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/vp9dsp.h"
+#include "libavutil/mips/generic_macros_msa.h"
+#include "vp9dsp_mips.h"
+
+static const uint8_t mc_filt_mask_arr[16 * 3] = {
+    /* 8 width cases */
+    0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+    /* 4 width cases */
+    0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20,
+    /* 4 width cases */
+    8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
+};
+
+static const int8_t vp9_bilinear_filters_msa[15][2] = {
+    {120, 8},
+    {112, 16},
+    {104, 24},
+    {96, 32},
+    {88, 40},
+    {80, 48},
+    {72, 56},
+    {64, 64},
+    {56, 72},
+    {48, 80},
+    {40, 88},
+    {32, 96},
+    {24, 104},
+    {16, 112},
+    {8, 120}
+};
+
+#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3,             \
+                            filt0, filt1, filt2, filt3)         \
+( {                                                             \
+    v8i16 tmp0, tmp1;                                           \
+                                                                \
+    tmp0 = __msa_dotp_s_h((v16i8) vec0, (v16i8) filt0);         \
+    tmp0 = __msa_dpadd_s_h(tmp0, (v16i8) vec1, (v16i8) filt1);  \
+    tmp1 = __msa_dotp_s_h((v16i8) vec2, (v16i8) filt2);         \
+    tmp1 = __msa_dpadd_s_h(tmp1, (v16i8) vec3, (v16i8) filt3);  \
+    tmp0 = __msa_adds_s_h(tmp0, tmp1);                          \
+                                                                \
+    tmp0;                                                       \
+} )
+
+#define HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3,          \
+                        filt_h0, filt_h1, filt_h2, filt_h3)              \
+( {                                                                      \
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m;                                \
+    v8i16 hz_out_m;                                                      \
+                                                                         \
+    VSHF_B4_SB(src0, src1, mask0, mask1, mask2, mask3,                   \
+               vec0_m, vec1_m, vec2_m, vec3_m);                          \
+    hz_out_m = FILT_8TAP_DPADD_S_H(vec0_m, vec1_m, vec2_m, vec3_m,       \
+                                   filt_h0, filt_h1, filt_h2, filt_h3);  \
+                                                                         \
+    hz_out_m = __msa_srari_h(hz_out_m, 7);                               \
+    hz_out_m = __msa_sat_s_h(hz_out_m, 7);                               \
+                                                                         \
+    hz_out_m;                                                            \
+} )
+
+#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3,                  \
+                                   mask0, mask1, mask2, mask3,              \
+                                   filt0, filt1, filt2, filt3,              \
+                                   out0, out1)                              \
+{                                                                           \
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m,  vec4_m, vec5_m, vec6_m, vec7_m;  \
+    v8i16 res0_m, res1_m, res2_m, res3_m;                                   \
+                                                                            \
+    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m);       \
+    DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, res0_m, res1_m);              \
+    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m);       \
+    DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, res0_m, res1_m);             \
+    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m);       \
+    DOTP_SB2_SH(vec4_m, vec5_m, filt2, filt2, res2_m, res3_m);              \
+    VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec6_m, vec7_m);       \
+    DPADD_SB2_SH(vec6_m, vec7_m, filt3, filt3, res2_m, res3_m);             \
+    ADDS_SH2_SH(res0_m, res2_m, res1_m, res3_m, out0, out1);                \
+}
+
+#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3,                    \
+                                   mask0, mask1, mask2, mask3,                \
+                                   filt0, filt1, filt2, filt3,                \
+                                   out0, out1, out2, out3)                    \
+{                                                                             \
+    v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;     \
+    v8i16 res0_m, res1_m, res2_m, res3_m, res4_m, res5_m, res6_m, res7_m;     \
+                                                                              \
+    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);         \
+    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,   \
+                res0_m, res1_m, res2_m, res3_m);                              \
+    VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m);         \
+    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2,   \
+                res4_m, res5_m, res6_m, res7_m);                              \
+    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m);         \
+    DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1,  \
+                 res0_m, res1_m, res2_m, res3_m);                             \
+    VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m);         \
+    VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m);         \
+    DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3,  \
+                 res4_m, res5_m, res6_m, res7_m);                             \
+    ADDS_SH4_SH(res0_m, res4_m, res1_m, res5_m, res2_m, res6_m, res3_m,       \
+                res7_m, out0, out1, out2, out3);                              \
+}
+
+#define PCKEV_XORI128_AVG_ST_UB(in0, in1, dst, pdst)  \
+{                                                     \
+    v16u8 tmp_m;                                      \
+                                                      \
+    tmp_m = PCKEV_XORI128_UB(in1, in0);               \
+    tmp_m = __msa_aver_u_b(tmp_m, (v16u8) dst);       \
+    ST_UB(tmp_m, (pdst));                             \
+}
+
+#define PCKEV_AVG_ST_UB(in0, in1, dst, pdst)                  \
+{                                                             \
+    v16u8 tmp_m;                                              \
+                                                              \
+    tmp_m = (v16u8) __msa_pckev_b((v16i8) in0, (v16i8) in1);  \
+    tmp_m = __msa_aver_u_b(tmp_m, (v16u8) dst);               \
+    ST_UB(tmp_m, (pdst));                                     \
+}
+
+#define PCKEV_AVG_ST8x4_UB(in1, dst0, in2, dst1, in3, dst2, in4, dst3,  \
+                           pdst, stride)                                \
+{                                                                       \
+    v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                               \
+    uint8_t *pdst_m = (uint8_t *) (pdst);                               \
+                                                                        \
+    PCKEV_B2_UB(in2, in1, in4, in3, tmp0_m, tmp1_m);                    \
+    PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m);                \
+    AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m);        \
+    ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride);                           \
+}
+
+static void common_hz_8t_4x4_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16u8 mask0, mask1, mask2, mask3, out;
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v8i16 filt, out0, out1;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1);
+    SRARI_H2_SH(out0, out1, 7);
+    SAT_SH2_SH(out0, out1, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_hz_8t_4x8_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 filt0, filt1, filt2, filt3;
+    v16i8 src0, src1, src2, src3;
+    v16u8 mask0, mask1, mask2, mask3, out;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (4 * src_stride);
+    HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, 7);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+    dst += (4 * dst_stride);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_hz_8t_4x16_msa(const uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  const int8_t *filter)
+{
+    v16u8 mask0, mask1, mask2, mask3, out;
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (4 * src_stride);
+    HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (4 * src_stride);
+    HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, 7);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+    dst += (4 * dst_stride);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (4 * src_stride);
+    HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (4 * src_stride);
+    HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out2, out3);
+
+    SRARI_H4_SH(out0, out1, out2, out3, 7);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    out = PCKEV_XORI128_UB(out0, out1);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+    dst += (4 * dst_stride);
+    out = PCKEV_XORI128_UB(out2, out3);
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_hz_8t_4w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                const int8_t *filter, int32_t height)
+{
+    if (4 == height) {
+        common_hz_8t_4x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else if (8 == height) {
+        common_hz_8t_4x8_msa(src, src_stride, dst, dst_stride, filter);
+    } else if (16 == height) {
+        common_hz_8t_4x16_msa(src, src_stride, dst, dst_stride, filter);
+    }
+}
+
+static void common_hz_8t_8x4_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1,
+                               out2, out3);
+    SRARI_H4_SH(out0, out1, out2, out3, 7);
+    SAT_SH4_SH(out0, out1, out2, out3, 7);
+    tmp0 = PCKEV_XORI128_UB(out0, out1);
+    tmp1 = PCKEV_XORI128_UB(out2, out3);
+    ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+}
+
+static void common_hz_8t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     const int8_t *filter, int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        src += (4 * src_stride);
+        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                                   mask3, filt0, filt1, filt2, filt3, out0,
+                                   out1, out2, out3);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        tmp0 = PCKEV_XORI128_UB(out0, out1);
+        tmp1 = PCKEV_XORI128_UB(out2, out3);
+        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void common_hz_8t_8w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                const int8_t *filter, int32_t height)
+{
+    if (4 == height) {
+        common_hz_8t_8x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else {
+        common_hz_8t_8x8mult_msa(src, src_stride, dst, dst_stride, filter,
+                                 height);
+    }
+}
+
+static void common_hz_8t_16w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter, int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v16u8 mask0, mask1, mask2, mask3, out;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_SB2(src, src_stride, src0, src2);
+        LD_SB2(src + 8, src_stride, src1, src3);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        src += (2 * src_stride);
+        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                                   mask3, filt0, filt1, filt2, filt3, out0,
+                                   out1, out2, out3);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        out = PCKEV_XORI128_UB(out0, out1);
+        ST_UB(out, dst);
+        dst += dst_stride;
+        out = PCKEV_XORI128_UB(out2, out3);
+        ST_UB(out, dst);
+        dst += dst_stride;
+    }
+}
+
+static void common_hz_8t_32w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter, int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v16u8 mask0, mask1, mask2, mask3, out;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        src0 = LD_SB(src);
+        src2 = LD_SB(src + 16);
+        src3 = LD_SB(src + 24);
+        src1 = __msa_sldi_b(src2, src0, 8);
+        src += src_stride;
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                                   mask3, filt0, filt1, filt2, filt3, out0,
+                                   out1, out2, out3);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+
+        src0 = LD_SB(src);
+        src2 = LD_SB(src + 16);
+        src3 = LD_SB(src + 24);
+        src1 = __msa_sldi_b(src2, src0, 8);
+        src += src_stride;
+
+        out = PCKEV_XORI128_UB(out0, out1);
+        ST_UB(out, dst);
+        out = PCKEV_XORI128_UB(out2, out3);
+        ST_UB(out, dst + 16);
+        dst += dst_stride;
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                                   mask3, filt0, filt1, filt2, filt3, out0,
+                                   out1, out2, out3);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        out = PCKEV_XORI128_UB(out0, out1);
+        ST_UB(out, dst);
+        out = PCKEV_XORI128_UB(out2, out3);
+        ST_UB(out, dst + 16);
+        dst += dst_stride;
+    }
+}
+
+static void common_hz_8t_64w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter, int32_t height)
+{
+    int32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v16u8 mask0, mask1, mask2, mask3, out;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    for (loop_cnt = height; loop_cnt--;) {
+        src0 = LD_SB(src);
+        src2 = LD_SB(src + 16);
+        src3 = LD_SB(src + 24);
+        src1 = __msa_sldi_b(src2, src0, 8);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
+                                   mask2, mask3, filt0, filt1, filt2, filt3,
+                                   out0, out1, out2, out3);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        out = PCKEV_XORI128_UB(out0, out1);
+        ST_UB(out, dst);
+        out = PCKEV_XORI128_UB(out2, out3);
+        ST_UB(out, dst + 16);
+
+        src0 = LD_SB(src + 32);
+        src2 = LD_SB(src + 48);
+        src3 = LD_SB(src + 56);
+        src1 = __msa_sldi_b(src2, src0, 8);
+        src += src_stride;
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,
+                                   mask2, mask3, filt0, filt1, filt2, filt3,
+                                   out0, out1, out2, out3);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        out = PCKEV_XORI128_UB(out0, out1);
+        ST_UB(out, dst + 32);
+        out = PCKEV_XORI128_UB(out2, out3);
+        ST_UB(out, dst + 48);
+        dst += dst_stride;
+    }
+}
+
+static void common_vt_8t_4w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                const int8_t *filter, int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+    v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776;
+    v16i8 src10998, filt0, filt1, filt2, filt3;
+    v16u8 out;
+    v8i16 filt, out10, out32;
+
+    src -= (3 * src_stride);
+
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+
+    ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+               src54_r, src21_r);
+    ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+    ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src2110,
+               src4332, src6554);
+    XORI_B3_128_SB(src2110, src4332, src6554);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+                   src87_r, src98_r, src109_r);
+        ILVR_D2_SB(src87_r, src76_r, src109_r, src98_r, src8776, src10998);
+        XORI_B2_128_SB(src8776, src10998);
+        out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776, filt0,
+                                    filt1, filt2, filt3);
+        out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0,
+                                    filt1, filt2, filt3);
+        SRARI_H2_SH(out10, out32, 7);
+        SAT_SH2_SH(out10, out32, 7);
+        out = PCKEV_XORI128_UB(out10, out32);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src2110 = src6554;
+        src4332 = src8776;
+        src6554 = src10998;
+        src6 = src10;
+    }
+}
+
+static void common_vt_8t_8w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                const int8_t *filter, int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+    v16i8 src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3;
+    v16u8 tmp0, tmp1;
+    v8i16 filt, out0_r, out1_r, out2_r, out3_r;
+
+    src -= (3 * src_stride);
+
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+    ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+               src54_r, src21_r);
+    ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+                   src87_r, src98_r, src109_r);
+        out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+                                     filt1, filt2, filt3);
+        out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+                                     filt1, filt2, filt3);
+        out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+                                     filt1, filt2, filt3);
+        out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+                                     filt1, filt2, filt3);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        tmp0 = PCKEV_XORI128_UB(out0_r, out1_r);
+        tmp1 = PCKEV_XORI128_UB(out2_r, out3_r);
+        ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src54_r = src98_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src65_r = src109_r;
+        src6 = src10;
+    }
+}
+
+static void common_vt_8t_16w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter, int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 filt0, filt1, filt2, filt3;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+    v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
+    v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+
+    src -= (3 * src_stride);
+
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+    ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+               src54_r, src21_r);
+    ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+    ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l,
+               src54_l, src21_l);
+    ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+                   src87_r, src98_r, src109_r);
+        ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
+                   src87_l, src98_l, src109_l);
+        out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+                                     filt1, filt2, filt3);
+        out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+                                     filt1, filt2, filt3);
+        out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+                                     filt1, filt2, filt3);
+        out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+                                     filt1, filt2, filt3);
+        out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
+                                     filt1, filt2, filt3);
+        out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
+                                     filt1, filt2, filt3);
+        out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
+                                     filt1, filt2, filt3);
+        out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
+                                     filt1, filt2, filt3);
+        SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+        SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+        PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                    out3_r, tmp0, tmp1, tmp2, tmp3);
+        XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
+        ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src54_r = src98_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src65_r = src109_r;
+        src10_l = src54_l;
+        src32_l = src76_l;
+        src54_l = src98_l;
+        src21_l = src65_l;
+        src43_l = src87_l;
+        src65_l = src109_l;
+        src6 = src10;
+    }
+}
+
+static void common_vt_8t_16w_mult_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      const int8_t *filter, int32_t height,
+                                      int32_t width)
+{
+    const uint8_t *src_tmp;
+    uint8_t *dst_tmp;
+    uint32_t loop_cnt, cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 filt0, filt1, filt2, filt3;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+    v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
+    v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
+    v16u8 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+
+    src -= (3 * src_stride);
+
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    for (cnt = (width >> 4); cnt--;) {
+        src_tmp = src;
+        dst_tmp = dst;
+
+        LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
+        XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+        src_tmp += (7 * src_stride);
+        ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r,
+                   src32_r, src54_r, src21_r);
+        ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+        ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l,
+                   src32_l, src54_l, src21_l);
+        ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
+
+        for (loop_cnt = (height >> 2); loop_cnt--;) {
+            LD_SB4(src_tmp, src_stride, src7, src8, src9, src10);
+            XORI_B4_128_SB(src7, src8, src9, src10);
+            src_tmp += (4 * src_stride);
+            ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+                       src87_r, src98_r, src109_r);
+            ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
+                       src87_l, src98_l, src109_l);
+            out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r,
+                                         filt0, filt1, filt2, filt3);
+            out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r,
+                                         filt0, filt1, filt2, filt3);
+            out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r,
+                                         filt0, filt1, filt2, filt3);
+            out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r,
+                                         filt0, filt1, filt2, filt3);
+            out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l,
+                                         filt0, filt1, filt2, filt3);
+            out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l,
+                                         filt0, filt1, filt2, filt3);
+            out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l,
+                                         filt0, filt1, filt2, filt3);
+            out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l,
+                                         filt0, filt1, filt2, filt3);
+            SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+            SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+            SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+            SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+            PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                        out3_r, tmp0, tmp1, tmp2, tmp3);
+            XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
+            ST_UB4(tmp0, tmp1, tmp2, tmp3, dst_tmp, dst_stride);
+            dst_tmp += (4 * dst_stride);
+
+            src10_r = src54_r;
+            src32_r = src76_r;
+            src54_r = src98_r;
+            src21_r = src65_r;
+            src43_r = src87_r;
+            src65_r = src109_r;
+            src10_l = src54_l;
+            src32_l = src76_l;
+            src54_l = src98_l;
+            src21_l = src65_l;
+            src43_l = src87_l;
+            src65_l = src109_l;
+            src6 = src10;
+        }
+
+        src += 16;
+        dst += 16;
+    }
+}
+
+static void common_vt_8t_32w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter, int32_t height)
+{
+    common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
+                              32);
+}
+
+static void common_vt_8t_64w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter, int32_t height)
+{
+    common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
+                              64);
+}
+
+static void common_hv_8ht_8vt_4w_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     const int8_t *filter_horiz,
+                                     const int8_t *filter_vert,
+                                     int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+    v16u8 mask0, mask1, mask2, mask3, out;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, hz_out9, tmp0, tmp1, out0, out1, out2, out3, out4;
+    v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= (3 + 3 * src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+
+    hz_out0 = HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out2 = HORIZ_8TAP_FILT(src2, src3, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out4 = HORIZ_8TAP_FILT(src4, src5, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out5 = HORIZ_8TAP_FILT(src5, src6, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    SLDI_B2_SH(hz_out2, hz_out4, hz_out0, hz_out2, hz_out1, hz_out3, 8);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
+
+    ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+    out2 = (v8i16) __msa_ilvev_b((v16i8) hz_out5, (v16i8) hz_out4);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        hz_out7 = HORIZ_8TAP_FILT(src7, src8, mask0, mask1, mask2, mask3,
+                                  filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        hz_out6 = (v8i16) __msa_sldi_b((v16i8) hz_out7, (v16i8) hz_out5, 8);
+        out3 = (v8i16) __msa_ilvev_b((v16i8) hz_out7, (v16i8) hz_out6);
+        tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1,
+                                   filt_vt2, filt_vt3);
+
+        hz_out9 = HORIZ_8TAP_FILT(src9, src10, mask0, mask1, mask2, mask3,
+                                  filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        hz_out8 = (v8i16) __msa_sldi_b((v16i8) hz_out9, (v16i8) hz_out7, 8);
+        out4 = (v8i16) __msa_ilvev_b((v16i8) hz_out9, (v16i8) hz_out8);
+        tmp1 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out4, filt_vt0, filt_vt1,
+                                   filt_vt2, filt_vt3);
+        SRARI_H2_SH(tmp0, tmp1, 7);
+        SAT_SH2_SH(tmp0, tmp1, 7);
+        out = PCKEV_XORI128_UB(tmp0, tmp1);
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out5 = hz_out9;
+        out0 = out2;
+        out1 = out3;
+        out2 = out4;
+    }
+}
+
+static void common_hv_8ht_8vt_8w_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     const int8_t *filter_horiz,
+                                     const int8_t *filter_vert,
+                                     int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+    v16u8 mask0, mask1, mask2, mask3, vec0, vec1;
+    v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, hz_out9, hz_out10, tmp0, tmp1, tmp2, tmp3;
+    v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= (3 + 3 * src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+    hz_out0 = HORIZ_8TAP_FILT(src0, src0, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out1 = HORIZ_8TAP_FILT(src1, src1, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out2 = HORIZ_8TAP_FILT(src2, src2, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out3 = HORIZ_8TAP_FILT(src3, src3, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out4 = HORIZ_8TAP_FILT(src4, src4, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out5 = HORIZ_8TAP_FILT(src5, src5, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out6 = HORIZ_8TAP_FILT(src6, src6, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
+
+    ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+    ILVEV_B2_SH(hz_out4, hz_out5, hz_out1, hz_out2, out2, out4);
+    ILVEV_B2_SH(hz_out3, hz_out4, hz_out5, hz_out6, out5, out6);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        XORI_B4_128_SB(src7, src8, src9, src10);
+
+        hz_out7 = HORIZ_8TAP_FILT(src7, src7, mask0, mask1, mask2, mask3,
+                                  filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        out3 = (v8i16) __msa_ilvev_b((v16i8) hz_out7, (v16i8) hz_out6);
+        tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1,
+                                   filt_vt2, filt_vt3);
+
+        hz_out8 = HORIZ_8TAP_FILT(src8, src8, mask0, mask1, mask2, mask3,
+                                  filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        out7 = (v8i16) __msa_ilvev_b((v16i8) hz_out8, (v16i8) hz_out7);
+        tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vt0, filt_vt1,
+                                   filt_vt2, filt_vt3);
+
+        hz_out9 = HORIZ_8TAP_FILT(src9, src9, mask0, mask1, mask2, mask3,
+                                  filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        out8 = (v8i16) __msa_ilvev_b((v16i8) hz_out9, (v16i8) hz_out8);
+        tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vt0,
+                                   filt_vt1, filt_vt2, filt_vt3);
+
+        hz_out10 = HORIZ_8TAP_FILT(src10, src10, mask0, mask1, mask2, mask3,
+                                   filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        out9 = (v8i16) __msa_ilvev_b((v16i8) hz_out10, (v16i8) hz_out9);
+        tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vt0, filt_vt1,
+                                   filt_vt2, filt_vt3);
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        vec0 = PCKEV_XORI128_UB(tmp0, tmp1);
+        vec1 = PCKEV_XORI128_UB(tmp2, tmp3);
+        ST8x4_UB(vec0, vec1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out6 = hz_out10;
+        out0 = out2;
+        out1 = out3;
+        out2 = out8;
+        out4 = out6;
+        out5 = out7;
+        out6 = out9;
+    }
+}
+
+static void common_hv_8ht_8vt_16w_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      const int8_t *filter_horiz,
+                                      const int8_t *filter_vert,
+                                      int32_t height)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz,
+                                 filter_vert, height);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+static void common_hv_8ht_8vt_32w_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      const int8_t *filter_horiz,
+                                      const int8_t *filter_vert,
+                                      int32_t height)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 4; multiple8_cnt--;) {
+        common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz,
+                                 filter_vert, height);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+static void common_hv_8ht_8vt_64w_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      const int8_t *filter_horiz,
+                                      const int8_t *filter_vert,
+                                      int32_t height)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 8; multiple8_cnt--;) {
+        common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz,
+                                 filter_vert, height);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+static void common_hz_8t_and_aver_dst_4x4_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v16u8 dst0, dst1, dst2, dst3, res2, res3;
+    v16u8 mask0, mask1, mask2, mask3;
+    v8i16 filt, res0, res1;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, res0, res1);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    SRARI_H2_SH(res0, res1, 7);
+    SAT_SH2_SH(res0, res1, 7);
+    PCKEV_B2_UB(res0, res0, res1, res1, res2, res3);
+    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+    XORI_B2_128_UB(res2, res3);
+    AVER_UB2_UB(res2, dst0, res3, dst2, res2, res3);
+    ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hz_8t_and_aver_dst_4x8_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v16u8 mask0, mask1, mask2, mask3, res0, res1, res2, res3;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8i16 filt, vec0, vec1, vec2, vec3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    src += (4 * src_stride);
+    LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+    HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, vec0, vec1);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    XORI_B4_128_SB(src0, src1, src2, src3);
+    HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, vec2, vec3);
+    SRARI_H4_SH(vec0, vec1, vec2, vec3, 7);
+    SAT_SH4_SH(vec0, vec1, vec2, vec3, 7);
+    PCKEV_B4_UB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3,
+                res0, res1, res2, res3);
+    ILVR_D2_UB(res1, res0, res3, res2, res0, res2);
+    XORI_B2_128_UB(res0, res2);
+    ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6,
+               dst0, dst2, dst4, dst6);
+    ILVR_D2_UB(dst2, dst0, dst6, dst4, dst0, dst4);
+    AVER_UB2_UB(res0, dst0, res2, dst4, res0, res2);
+    ST4x8_UB(res0, res2, dst, dst_stride);
+}
+
+static void common_hz_8t_and_aver_dst_4w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst, int32_t dst_stride,
+                                             const int8_t *filter,
+                                             int32_t height)
+{
+    if (4 == height) {
+        common_hz_8t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride,
+                                          filter);
+    } else if (8 == height) {
+        common_hz_8t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride,
+                                          filter);
+    }
+}
+
+static void common_hz_8t_and_aver_dst_8w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst, int32_t dst_stride,
+                                             const int8_t *filter,
+                                             int32_t height)
+{
+    int32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v16u8 mask0, mask1, mask2, mask3, dst0, dst1, dst2, dst3;
+    v8i16 filt, out0, out1, out2, out3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        src += (4 * src_stride);
+        HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                                   mask3, filt0, filt1, filt2, filt3, out0,
+                                   out1, out2, out3);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
+                                dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void common_hz_8t_and_aver_dst_16w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter,
+                                              int32_t height)
+{
+    int32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v16u8 mask0, mask1, mask2, mask3, dst0, dst1;
+    v8i16 filt, out0, out1, out2, out3;
+    v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    for (loop_cnt = height >> 1; loop_cnt--;) {
+        LD_SB2(src, src_stride, src0, src2);
+        LD_SB2(src + 8, src_stride, src1, src3);
+        src += (2 * src_stride);
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8,
+                   vec12);
+        VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9,
+                   vec13);
+        VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10,
+                   vec14);
+        VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11,
+                   vec15);
+        DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0,
+                    vec1, vec2, vec3);
+        DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8,
+                    vec9, vec10, vec11);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0,
+                     vec1, vec2, vec3);
+        DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3,
+                     vec8, vec9, vec10, vec11);
+        ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0,
+                    out1, out2, out3);
+        LD_UB2(dst, dst_stride, dst0, dst1);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        PCKEV_XORI128_AVG_ST_UB(out1, out0, dst0, dst);
+        dst += dst_stride;
+        PCKEV_XORI128_AVG_ST_UB(out3, out2, dst1, dst);
+        dst += dst_stride;
+    }
+}
+
+static void common_hz_8t_and_aver_dst_32w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter,
+                                              int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v16u8 dst1, dst2, mask0, mask1, mask2, mask3;
+    v8i16 filt, out0, out1, out2, out3;
+    v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    for (loop_cnt = height; loop_cnt--;) {
+        src0 = LD_SB(src);
+        src2 = LD_SB(src + 16);
+        src3 = LD_SB(src + 24);
+        src1 = __msa_sldi_b(src2, src0, 8);
+        src += src_stride;
+
+        XORI_B4_128_SB(src0, src1, src2, src3);
+        VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8,
+                   vec12);
+        VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9,
+                   vec13);
+        VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10,
+                   vec14);
+        VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11,
+                   vec15);
+        DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0,
+                    vec1, vec2, vec3);
+        DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8,
+                    vec9, vec10, vec11);
+        DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0,
+                     vec1, vec2, vec3);
+        DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3,
+                     vec8, vec9, vec10, vec11);
+        ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0,
+                    out1, out2, out3);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        LD_UB2(dst, 16, dst1, dst2);
+        PCKEV_XORI128_AVG_ST_UB(out1, out0, dst1, dst);
+        PCKEV_XORI128_AVG_ST_UB(out3, out2, dst2, dst + 16);
+        dst += dst_stride;
+    }
+}
+
+static void common_hz_8t_and_aver_dst_64w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter,
+                                              int32_t height)
+{
+    uint32_t loop_cnt, cnt;
+    v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+    v16u8 dst1, dst2, mask0, mask1, mask2, mask3;
+    v8i16 filt, out0, out1, out2, out3;
+    v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= 3;
+
+    /* rearranging filter */
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    for (loop_cnt = height; loop_cnt--;) {
+        for (cnt = 0; cnt < 2; ++cnt) {
+            src0 = LD_SB(&src[cnt << 5]);
+            src2 = LD_SB(&src[16 + (cnt << 5)]);
+            src3 = LD_SB(&src[24 + (cnt << 5)]);
+            src1 = __msa_sldi_b(src2, src0, 8);
+
+            XORI_B4_128_SB(src0, src1, src2, src3);
+            VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8,
+                       vec12);
+            VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9,
+                       vec13);
+            VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6,
+                       vec10, vec14);
+            VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7,
+                       vec11, vec15);
+            DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                        vec0, vec1, vec2, vec3);
+            DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2,
+                        vec8, vec9, vec10, vec11);
+            DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1,
+                         vec0, vec1, vec2, vec3);
+            DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3,
+                         vec8, vec9, vec10, vec11);
+            ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0,
+                        out1, out2, out3);
+            SRARI_H4_SH(out0, out1, out2, out3, 7);
+            SAT_SH4_SH(out0, out1, out2, out3, 7);
+            LD_UB2(&dst[cnt << 5], 16, dst1, dst2);
+            PCKEV_XORI128_AVG_ST_UB(out1, out0, dst1, &dst[cnt << 5]);
+            PCKEV_XORI128_AVG_ST_UB(out3, out2, dst2, &dst[16 + (cnt << 5)]);
+        }
+
+        src += src_stride;
+        dst += dst_stride;
+    }
+}
+
+static void common_vt_8t_and_aver_dst_4w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst, int32_t dst_stride,
+                                             const int8_t *filter,
+                                             int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16u8 dst0, dst1, dst2, dst3, out;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+    v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776;
+    v16i8 src10998, filt0, filt1, filt2, filt3;
+    v8i16 filt, out10, out32;
+
+    src -= (3 * src_stride);
+
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+
+    ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+               src54_r, src21_r);
+    ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+    ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src2110,
+               src4332, src6554);
+    XORI_B3_128_SB(src2110, src4332, src6554);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+                   src87_r, src98_r, src109_r);
+        ILVR_D2_SB(src87_r, src76_r, src109_r, src98_r, src8776, src10998);
+        XORI_B2_128_SB(src8776, src10998);
+        out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776, filt0,
+                                    filt1, filt2, filt3);
+        out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0,
+                                    filt1, filt2, filt3);
+        SRARI_H2_SH(out10, out32, 7);
+        SAT_SH2_SH(out10, out32, 7);
+        out = PCKEV_XORI128_UB(out10, out32);
+        ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+
+        dst0 = (v16u8) __msa_ilvr_d((v2i64) dst2, (v2i64) dst0);
+        out = __msa_aver_u_b(out, dst0);
+
+        ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src2110 = src6554;
+        src4332 = src8776;
+        src6554 = src10998;
+        src6 = src10;
+    }
+}
+
+static void common_vt_8t_and_aver_dst_8w_msa(const uint8_t *src,
+                                             int32_t src_stride,
+                                             uint8_t *dst, int32_t dst_stride,
+                                             const int8_t *filter,
+                                             int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16u8 dst0, dst1, dst2, dst3;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+    v16i8 src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3;
+    v8i16 filt, out0, out1, out2, out3;
+
+    src -= (3 * src_stride);
+
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+    ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+               src54_r, src21_r);
+    ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+                   src87_r, src98_r, src109_r);
+        out0 = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+                                   filt1, filt2, filt3);
+        out1 = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+                                   filt1, filt2, filt3);
+        out2 = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+                                   filt1, filt2, filt3);
+        out3 = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+                                   filt1, filt2, filt3);
+        SRARI_H4_SH(out0, out1, out2, out3, 7);
+        SAT_SH4_SH(out0, out1, out2, out3, 7);
+        CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
+                                dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src10_r = src54_r;
+        src32_r = src76_r;
+        src54_r = src98_r;
+        src21_r = src65_r;
+        src43_r = src87_r;
+        src65_r = src109_r;
+        src6 = src10;
+    }
+}
+
+static void common_vt_8t_and_aver_dst_16w_mult_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   const int8_t *filter,
+                                                   int32_t height,
+                                                   int32_t width)
+{
+    const uint8_t *src_tmp;
+    uint8_t *dst_tmp;
+    uint32_t loop_cnt, cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+    v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
+    v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
+    v16i8 filt0, filt1, filt2, filt3;
+    v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3;
+    v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l, filt;
+
+    src -= (3 * src_stride);
+
+    filt = LD_SH(filter);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+    for (cnt = (width >> 4); cnt--;) {
+        src_tmp = src;
+        dst_tmp = dst;
+
+        LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
+        XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+        src_tmp += (7 * src_stride);
+
+        ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r,
+                   src32_r, src54_r, src21_r);
+        ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+        ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l,
+                   src32_l, src54_l, src21_l);
+        ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
+
+        for (loop_cnt = (height >> 2); loop_cnt--;) {
+            LD_SB4(src_tmp, src_stride, src7, src8, src9, src10);
+            src_tmp += (4 * src_stride);
+
+            LD_UB4(dst_tmp, dst_stride, dst0, dst1, dst2, dst3);
+            XORI_B4_128_SB(src7, src8, src9, src10);
+            ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+                       src87_r, src98_r, src109_r);
+            ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
+                       src87_l, src98_l, src109_l);
+            out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r,
+                                         filt0, filt1, filt2, filt3);
+            out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r,
+                                         filt0, filt1, filt2, filt3);
+            out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r,
+                                         filt0, filt1, filt2, filt3);
+            out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r,
+                                         filt0, filt1, filt2, filt3);
+            out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l,
+                                         filt0, filt1, filt2, filt3);
+            out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l,
+                                         filt0, filt1, filt2, filt3);
+            out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l,
+                                         filt0, filt1, filt2, filt3);
+            out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l,
+                                         filt0, filt1, filt2, filt3);
+            SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+            SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+            SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+            SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+            PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+                        out3_r, tmp0, tmp1, tmp2, tmp3);
+            XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
+            AVER_UB4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3,
+                        dst0, dst1, dst2, dst3);
+            ST_UB4(dst0, dst1, dst2, dst3, dst_tmp, dst_stride);
+            dst_tmp += (4 * dst_stride);
+
+            src10_r = src54_r;
+            src32_r = src76_r;
+            src54_r = src98_r;
+            src21_r = src65_r;
+            src43_r = src87_r;
+            src65_r = src109_r;
+            src10_l = src54_l;
+            src32_l = src76_l;
+            src54_l = src98_l;
+            src21_l = src65_l;
+            src43_l = src87_l;
+            src65_l = src109_l;
+            src6 = src10;
+        }
+
+        src += 16;
+        dst += 16;
+    }
+}
+
+static void common_vt_8t_and_aver_dst_16w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter,
+                                              int32_t height)
+{
+    common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride,
+                                           filter, height, 16);
+}
+
+static void common_vt_8t_and_aver_dst_32w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter,
+                                              int32_t height)
+{
+    common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride,
+                                           filter, height, 32);
+}
+
+static void common_vt_8t_and_aver_dst_64w_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter,
+                                              int32_t height)
+{
+    common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride,
+                                           filter, height, 64);
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_4w_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  const int8_t *filter_horiz,
+                                                  const int8_t *filter_vert,
+                                                  int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16u8 dst0, dst1, dst2, dst3, mask0, mask1, mask2, mask3, tmp0, tmp1;
+    v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, hz_out9, res0, res1, vec0, vec1, vec2, vec3, vec4;
+    v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[16]);
+    src -= (3 + 3 * src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+
+    hz_out0 = HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out2 = HORIZ_8TAP_FILT(src2, src3, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out4 = HORIZ_8TAP_FILT(src4, src5, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out5 = HORIZ_8TAP_FILT(src5, src6, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    SLDI_B2_SH(hz_out2, hz_out4, hz_out0, hz_out2, hz_out1, hz_out3, 8);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
+
+    ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+    vec2 = (v8i16) __msa_ilvev_b((v16i8) hz_out5, (v16i8) hz_out4);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        hz_out7 = HORIZ_8TAP_FILT(src7, src8, mask0, mask1, mask2, mask3,
+                                  filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        hz_out6 = (v8i16) __msa_sldi_b((v16i8) hz_out7, (v16i8) hz_out5, 8);
+        vec3 = (v8i16) __msa_ilvev_b((v16i8) hz_out7, (v16i8) hz_out6);
+        res0 = FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt_vt0, filt_vt1,
+                                   filt_vt2, filt_vt3);
+
+        hz_out9 = HORIZ_8TAP_FILT(src9, src10, mask0, mask1, mask2, mask3,
+                                  filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        hz_out8 = (v8i16) __msa_sldi_b((v16i8) hz_out9, (v16i8) hz_out7, 8);
+        vec4 = (v8i16) __msa_ilvev_b((v16i8) hz_out9, (v16i8) hz_out8);
+        res1 = FILT_8TAP_DPADD_S_H(vec1, vec2, vec3, vec4, filt_vt0, filt_vt1,
+                                   filt_vt2, filt_vt3);
+        ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+
+        SRARI_H2_SH(res0, res1, 7);
+        SAT_SH2_SH(res0, res1, 7);
+        PCKEV_B2_UB(res0, res0, res1, res1, tmp0, tmp1);
+        XORI_B2_128_UB(tmp0, tmp1);
+        AVER_UB2_UB(tmp0, dst0, tmp1, dst2, tmp0, tmp1);
+        ST4x4_UB(tmp0, tmp1, 0, 1, 0, 1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out5 = hz_out9;
+        vec0 = vec2;
+        vec1 = vec3;
+        vec2 = vec4;
+    }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_8w_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  const int8_t *filter_horiz,
+                                                  const int8_t *filter_vert,
+                                                  int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+    v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
+    v16u8 dst0, dst1, dst2, dst3, mask0, mask1, mask2, mask3;
+    v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8i16 hz_out7, hz_out8, hz_out9, hz_out10, tmp0, tmp1, tmp2, tmp3;
+    v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9;
+
+    mask0 = LD_UB(&mc_filt_mask_arr[0]);
+    src -= (3 + 3 * src_stride);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+
+    mask1 = mask0 + 2;
+    mask2 = mask0 + 4;
+    mask3 = mask0 + 6;
+
+    LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+    src += (7 * src_stride);
+
+    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+    hz_out0 = HORIZ_8TAP_FILT(src0, src0, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out1 = HORIZ_8TAP_FILT(src1, src1, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out2 = HORIZ_8TAP_FILT(src2, src2, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out3 = HORIZ_8TAP_FILT(src3, src3, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out4 = HORIZ_8TAP_FILT(src4, src4, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out5 = HORIZ_8TAP_FILT(src5, src5, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+    hz_out6 = HORIZ_8TAP_FILT(src6, src6, mask0, mask1, mask2, mask3, filt_hz0,
+                              filt_hz1, filt_hz2, filt_hz3);
+
+    filt = LD_SH(filter_vert);
+    SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
+
+    ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+    ILVEV_B2_SH(hz_out4, hz_out5, hz_out1, hz_out2, out2, out4);
+    ILVEV_B2_SH(hz_out3, hz_out4, hz_out5, hz_out6, out5, out6);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src7, src8, src9, src10);
+        XORI_B4_128_SB(src7, src8, src9, src10);
+        src += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        hz_out7 = HORIZ_8TAP_FILT(src7, src7, mask0, mask1, mask2, mask3,
+                                  filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        out3 = (v8i16) __msa_ilvev_b((v16i8) hz_out7, (v16i8) hz_out6);
+        tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1,
+                                   filt_vt2, filt_vt3);
+
+        hz_out8 = HORIZ_8TAP_FILT(src8, src8, mask0, mask1, mask2, mask3,
+                                  filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        out7 = (v8i16) __msa_ilvev_b((v16i8) hz_out8, (v16i8) hz_out7);
+        tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vt0, filt_vt1,
+                                   filt_vt2, filt_vt3);
+
+        hz_out9 = HORIZ_8TAP_FILT(src9, src9, mask0, mask1, mask2, mask3,
+                                  filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        out8 = (v8i16) __msa_ilvev_b((v16i8) hz_out9, (v16i8) hz_out8);
+        tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vt0, filt_vt1,
+                                   filt_vt2, filt_vt3);
+
+        hz_out10 = HORIZ_8TAP_FILT(src10, src10, mask0, mask1, mask2, mask3,
+                                   filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+        out9 = (v8i16) __msa_ilvev_b((v16i8) hz_out10, (v16i8) hz_out9);
+        tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vt0, filt_vt1,
+                                   filt_vt2, filt_vt3);
+
+        SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+        CONVERT_UB_AVG_ST8x4_UB(tmp0, tmp1, tmp2, tmp3, dst0, dst1, dst2, dst3,
+                                dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out6 = hz_out10;
+        out0 = out2;
+        out1 = out3;
+        out2 = out8;
+        out4 = out6;
+        out5 = out7;
+        out6 = out9;
+    }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_16w_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   const int8_t *filter_horiz,
+                                                   const int8_t *filter_vert,
+                                                   int32_t height)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride,
+                                              filter_horiz, filter_vert,
+                                              height);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_32w_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   const int8_t *filter_horiz,
+                                                   const int8_t *filter_vert,
+                                                   int32_t height)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 4; multiple8_cnt--;) {
+        common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride,
+                                              filter_horiz, filter_vert,
+                                              height);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_64w_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   const int8_t *filter_horiz,
+                                                   const int8_t *filter_vert,
+                                                   int32_t height)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 8; multiple8_cnt--;) {
+        common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride,
+                                              filter_horiz, filter_vert,
+                                              height);
+
+        src += 8;
+        dst += 8;
+    }
+}
+
+static void common_hz_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, mask;
+    v16u8 filt0, vec0, vec1, res0, res1;
+    v8u16 vec2, vec3, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
+    SRARI_H2_UH(vec2, vec3, 7);
+    PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hz_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16u8 vec0, vec1, vec2, vec3, filt0;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16i8 res0, res1, res2, res3;
+    v8u16 vec4, vec5, vec6, vec7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                vec4, vec5, vec6, vec7);
+    SRARI_H4_UH(vec4, vec5, vec6, vec7, 7);
+    PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7,
+                res0, res1, res2, res3);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+void ff_put_bilin_4h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                         const uint8_t *src, ptrdiff_t src_stride,
+                         int height, int mx, int my)
+{
+    const int8_t *filter = vp9_bilinear_filters_msa[mx - 1];
+
+    if (4 == height) {
+        common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else if (8 == height) {
+        common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
+    }
+}
+
+static void common_hz_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16u8 filt0;
+    v16i8 src0, src1, src2, src3, mask;
+    v8u16 vec0, vec1, vec2, vec3, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                vec0, vec1, vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+    PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1);
+    ST8x4_UB(src0, src1, dst, dst_stride);
+}
+
+static void common_hz_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     const int8_t *filter, int32_t height)
+{
+    v16u8 filt0;
+    v16i8 src0, src1, src2, src3, mask, out0, out1;
+    v8u16 vec0, vec1, vec2, vec3, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                vec0, vec1, vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+
+    PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                vec0, vec1, vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+    PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    if (16 == height) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    vec0, vec1, vec2, vec3);
+        SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+
+        VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    vec0, vec1, vec2, vec3);
+        SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+        PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+        ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride);
+    }
+}
+
+void ff_put_bilin_8h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                         const uint8_t *src, ptrdiff_t src_stride,
+                         int height, int mx, int my)
+{
+    const int8_t *filter = vp9_bilinear_filters_msa[mx - 1];
+
+    if (4 == height) {
+        common_hz_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else {
+        common_hz_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter,
+                                 height);
+    }
+}
+
+void ff_put_bilin_16h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[mx - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    loop_cnt = (height >> 2) - 1;
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src2, src4, src6);
+    LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+    src += (4 * src_stride);
+
+    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                out0, out1, out2, out3);
+    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+                out4, out5, out6, out7);
+    SRARI_H4_UH(out0, out1, out2, out3, 7);
+    SRARI_H4_UH(out4, out5, out6, out7, 7);
+    PCKEV_ST_SB(out0, out1, dst);
+    dst += dst_stride;
+    PCKEV_ST_SB(out2, out3, dst);
+    dst += dst_stride;
+    PCKEV_ST_SB(out4, out5, dst);
+    dst += dst_stride;
+    PCKEV_ST_SB(out6, out7, dst);
+    dst += dst_stride;
+
+    for (; loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src2, src4, src6);
+        LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+        src += (4 * src_stride);
+
+        VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+        VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    out0, out1, out2, out3);
+        DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+                    out4, out5, out6, out7);
+        SRARI_H4_UH(out0, out1, out2, out3, 7);
+        SRARI_H4_UH(out4, out5, out6, out7, 7);
+        PCKEV_ST_SB(out0, out1, dst);
+        dst += dst_stride;
+        PCKEV_ST_SB(out2, out3, dst);
+        dst += dst_stride;
+        PCKEV_ST_SB(out4, out5, dst);
+        dst += dst_stride;
+        PCKEV_ST_SB(out6, out7, dst);
+        dst += dst_stride;
+    }
+}
+
+void ff_put_bilin_32h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[mx - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    for (loop_cnt = height >> 1; loop_cnt--;) {
+        src0 = LD_SB(src);
+        src2 = LD_SB(src + 16);
+        src3 = LD_SB(src + 24);
+        src1 = __msa_sldi_b(src2, src0, 8);
+        src += src_stride;
+        src4 = LD_SB(src);
+        src6 = LD_SB(src + 16);
+        src7 = LD_SB(src + 24);
+        src5 = __msa_sldi_b(src6, src4, 8);
+        src += src_stride;
+
+        VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+        VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    out0, out1, out2, out3);
+        DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+                    out4, out5, out6, out7);
+        SRARI_H4_UH(out0, out1, out2, out3, 7);
+        SRARI_H4_UH(out4, out5, out6, out7, 7);
+        PCKEV_ST_SB(out0, out1, dst);
+        PCKEV_ST_SB(out2, out3, dst + 16);
+        dst += dst_stride;
+        PCKEV_ST_SB(out4, out5, dst);
+        PCKEV_ST_SB(out6, out7, dst + 16);
+        dst += dst_stride;
+    }
+}
+
+void ff_put_bilin_64h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[mx - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    for (loop_cnt = height; loop_cnt--;) {
+        src0 = LD_SB(src);
+        src2 = LD_SB(src + 16);
+        src4 = LD_SB(src + 32);
+        src6 = LD_SB(src + 48);
+        src7 = LD_SB(src + 56);
+        SLDI_B3_SB(src2, src4, src6, src0, src2, src4, src1, src3, src5, 8);
+        src += src_stride;
+
+        VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+        VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    out0, out1, out2, out3);
+        DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+                    out4, out5, out6, out7);
+        SRARI_H4_UH(out0, out1, out2, out3, 7);
+        SRARI_H4_UH(out4, out5, out6, out7, 7);
+        PCKEV_ST_SB(out0, out1, dst);
+        PCKEV_ST_SB(out2, out3, dst + 16);
+        PCKEV_ST_SB(out4, out5, dst + 32);
+        PCKEV_ST_SB(out6, out7, dst + 48);
+        dst += dst_stride;
+    }
+}
+
+static void common_vt_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, src4;
+    v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332;
+    v16u8 filt0;
+    v8i16 filt;
+    v8u16 tmp0, tmp1;
+
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+    ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, 7);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    src2110 = __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+    ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_vt_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r;
+    v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v16u8 filt0;
+    v8i16 filt;
+
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+
+    src8 = LD_SB(src);
+    src += src_stride;
+
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+               src76_r, src87_r);
+    ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
+               src87_r, src76_r, src2110, src4332, src6554, src8776);
+    DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
+                tmp0, tmp1, tmp2, tmp3);
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
+    ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
+    ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride);
+}
+
+void ff_put_bilin_4v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                         const uint8_t *src, ptrdiff_t src_stride,
+                         int height, int mx, int my)
+{
+    const int8_t *filter = vp9_bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_vt_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else if (8 == height) {
+        common_vt_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
+    }
+}
+
+static void common_vt_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 const int8_t *filter)
+{
+    v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0;
+    v16i8 out0, out1;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    /* rearranging filter_y */
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
+    ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                tmp0, tmp1, tmp2, tmp3);
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+}
+
+static void common_vt_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     const int8_t *filter, int32_t height)
+{
+    uint32_t loop_cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+    v16i8 out0, out1;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    /* rearranging filter_y */
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
+        src += (8 * src_stride);
+
+        ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+                   vec0, vec1, vec2, vec3);
+        ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   vec4, vec5, vec6, vec7);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+                    tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src0 = src8;
+    }
+}
+
+void ff_put_bilin_8v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                         const uint8_t *src, ptrdiff_t src_stride,
+                         int height, int mx, int my)
+{
+    const int8_t *filter = vp9_bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_vt_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
+    } else {
+        common_vt_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter,
+                                 height);
+    }
+}
+
+void ff_put_bilin_16v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[my - 1];
+    v16u8 src0, src1, src2, src3, src4;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    /* rearranging filter_y */
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+        ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+        DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_ST_SB(tmp0, tmp1, dst);
+        dst += dst_stride;
+
+        ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+        ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+        DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_ST_SB(tmp2, tmp3, dst);
+        dst += dst_stride;
+
+        DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_ST_SB(tmp0, tmp1, dst);
+        dst += dst_stride;
+
+        DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_ST_SB(tmp2, tmp3, dst);
+        dst += dst_stride;
+
+        src0 = src4;
+    }
+}
+
+void ff_put_bilin_32v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[my - 1];
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    /* rearranging filter_y */
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    src0 = LD_UB(src);
+    src5 = LD_UB(src + 16);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+        ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+
+        LD_UB4(src + 16, src_stride, src6, src7, src8, src9);
+        src += (4 * src_stride);
+
+        DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_ST_SB(tmp0, tmp1, dst);
+        DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride);
+
+        ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+        ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+        DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_ST_SB(tmp0, tmp1, dst + 2 * dst_stride);
+
+        DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_ST_SB(tmp2, tmp3, dst + 3 * dst_stride);
+
+        ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2);
+        ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3);
+        DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_ST_SB(tmp0, tmp1, dst + 16);
+
+        DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_ST_SB(tmp2, tmp3, dst + 16 + dst_stride);
+
+        ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6);
+        ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7);
+        DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_ST_SB(tmp0, tmp1, dst + 16 + 2 * dst_stride);
+
+        DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_ST_SB(tmp2, tmp3, dst + 16 + 3 * dst_stride);
+        dst += (4 * dst_stride);
+
+        src0 = src4;
+        src5 = src9;
+    }
+}
+
+void ff_put_bilin_64v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[my - 1];
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+    v16u8 src11, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+    v8u16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v8i16 filt;
+
+    /* rearranging filter_y */
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_UB4(src, 16, src0, src3, src6, src9);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_UB2(src, src_stride, src1, src2);
+        LD_UB2(src + 16, src_stride, src4, src5);
+        LD_UB2(src + 32, src_stride, src7, src8);
+        LD_UB2(src + 48, src_stride, src10, src11);
+        src += (2 * src_stride);
+
+        ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+        ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+        DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_ST_SB(tmp0, tmp1, dst);
+
+        DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride);
+
+        ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6);
+        ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7);
+        DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+        SRARI_H2_UH(tmp4, tmp5, 7);
+        SAT_UH2_UH(tmp4, tmp5, 7);
+        PCKEV_ST_SB(tmp4, tmp5, dst + 16);
+
+        DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+        SRARI_H2_UH(tmp6, tmp7, 7);
+        SAT_UH2_UH(tmp6, tmp7, 7);
+        PCKEV_ST_SB(tmp6, tmp7, dst + 16 + dst_stride);
+
+        ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2);
+        ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3);
+        DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_ST_SB(tmp0, tmp1, dst + 32);
+
+        DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_ST_SB(tmp2, tmp3, dst + 32 + dst_stride);
+
+        ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6);
+        ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7);
+        DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+        SRARI_H2_UH(tmp4, tmp5, 7);
+        SAT_UH2_UH(tmp4, tmp5, 7);
+        PCKEV_ST_SB(tmp4, tmp5, dst + 48);
+
+        DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+        SRARI_H2_UH(tmp6, tmp7, 7);
+        SAT_UH2_UH(tmp6, tmp7, 7);
+        PCKEV_ST_SB(tmp6, tmp7, dst + 48 + dst_stride);
+        dst += (2 * dst_stride);
+
+        src0 = src2;
+        src3 = src5;
+        src6 = src8;
+        src9 = src11;
+    }
+}
+
+static void common_hv_2ht_2vt_4x4_msa(const uint8_t *src, int32_t src_stride,
+                               uint8_t *dst, int32_t dst_stride,
+                               const int8_t *filter_horiz, const int8_t *filter_vert)
+{
+    v16i8 src0, src1, src2, src3, src4, mask;
+    v16u8 filt_vt, filt_hz, vec0, vec1, res0, res1;
+    v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, filt, tmp0, tmp1;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    filt = LD_UH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, 7);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, 7);
+    hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+    hz_out1 = (v8u16) __msa_sldi_b((v16i8) hz_out2, (v16i8) hz_out0, 8);
+    hz_out3 = (v8u16) __msa_pckod_d((v2i64) hz_out4, (v2i64) hz_out2);
+
+    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, 7);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_4x8_msa(const uint8_t *src, int32_t src_stride,
+                               uint8_t *dst, int32_t dst_stride,
+                               const int8_t *filter_horiz, const int8_t *filter_vert)
+{
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
+    v16i8 res0, res1, res2, res3;
+    v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
+    v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8u16 hz_out7, hz_out8, vec4, vec5, vec6, vec7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    filt = LD_UH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+    src8 = LD_SB(src);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, 7);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, 7);
+    hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, 7);
+    hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, 7);
+    hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, 7);
+    SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1,
+               hz_out3, hz_out5, 8);
+    hz_out7 = (v8u16) __msa_pckod_d((v2i64) hz_out8, (v2i64) hz_out6);
+
+    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+    ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt,
+                vec4, vec5, vec6, vec7);
+    SRARI_H4_UH(vec4, vec5, vec6, vec7, 7);
+    SAT_UH4_UH(vec4, vec5, vec6, vec7, 7);
+    PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7,
+                res0, res1, res2, res3);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+void ff_put_bilin_4hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    const int8_t *filter_horiz = vp9_bilinear_filters_msa[mx - 1];
+    const int8_t *filter_vert = vp9_bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride,
+                                  filter_horiz, filter_vert);
+    } else if (8 == height) {
+        common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride,
+                                  filter_horiz, filter_vert);
+    }
+}
+
+static void common_hv_2ht_2vt_8x4_msa(const uint8_t *src, int32_t src_stride,
+                               uint8_t *dst, int32_t dst_stride,
+                               const int8_t *filter_horiz, const int8_t *filter_vert)
+{
+    v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
+    v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
+    v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h(filt, 0);
+
+    filt = LD_SH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+    hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+    vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+    tmp0 = __msa_dotp_u_h(vec0, filt_vt);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 7);
+    vec1 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+    tmp1 = __msa_dotp_u_h(vec1, filt_vt);
+
+    hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 7);
+    vec2 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+    tmp2 = __msa_dotp_u_h(vec2, filt_vt);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+    vec3 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+    tmp3 = __msa_dotp_u_h(vec3, filt_vt);
+
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+    ST8x4_UB(out0, out1, dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_8x8mult_msa(const uint8_t *src, int32_t src_stride,
+                                   uint8_t *dst, int32_t dst_stride,
+                                   const int8_t *filter_horiz, const int8_t *filter_vert,
+                                   int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
+    v16u8 filt_hz, filt_vt, vec0;
+    v8u16 hz_out0, hz_out1, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+    v8i16 filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h(filt, 0);
+
+    filt = LD_SH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h(filt, 0);
+
+    src0 = LD_SB(src);
+    src += src_stride;
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_SB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp1 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+        tmp2 = __msa_dotp_u_h(vec0, filt_vt);
+
+        SRARI_H2_UH(tmp1, tmp2, 7);
+        SAT_UH2_UH(tmp1, tmp2, 7);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp3 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+        LD_SB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+        tmp4 = __msa_dotp_u_h(vec0, filt_vt);
+
+        SRARI_H2_UH(tmp3, tmp4, 7);
+        SAT_UH2_UH(tmp3, tmp4, 7);
+        PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp5 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+        tmp6 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp7 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+        tmp8 = __msa_dotp_u_h(vec0, filt_vt);
+
+        SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, 7);
+        SAT_UH4_UH(tmp5, tmp6, tmp7, tmp8, 7);
+        PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1);
+        ST8x4_UB(out0, out1, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+void ff_put_bilin_8hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    const int8_t *filter_horiz = vp9_bilinear_filters_msa[mx - 1];
+    const int8_t *filter_vert = vp9_bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_hv_2ht_2vt_8x4_msa(src, src_stride, dst, dst_stride,
+                                  filter_horiz, filter_vert);
+    } else {
+        common_hv_2ht_2vt_8x8mult_msa(src, src_stride, dst, dst_stride,
+                                      filter_horiz, filter_vert, height);
+    }
+}
+
+void ff_put_bilin_16hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                           const uint8_t *src, ptrdiff_t src_stride,
+                           int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter_horiz = vp9_bilinear_filters_msa[mx - 1];
+    const int8_t *filter_vert = vp9_bilinear_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16u8 filt_hz, filt_vt, vec0, vec1;
+    v8u16 tmp1, tmp2, hz_out0, hz_out1, hz_out2, hz_out3;
+    v8i16 filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h(filt, 0);
+
+    filt = LD_SH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB2(src, 8, src0, src1);
+    src += src_stride;
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src2, src4, src6);
+        LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+        src += (4 * src_stride);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+        hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+        SRARI_H2_UH(tmp1, tmp2, 7);
+        SAT_UH2_UH(tmp1, tmp2, 7);
+        PCKEV_ST_SB(tmp1, tmp2, dst);
+        dst += dst_stride;
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 7);
+        hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+        SRARI_H2_UH(tmp1, tmp2, 7);
+        SAT_UH2_UH(tmp1, tmp2, 7);
+        PCKEV_ST_SB(tmp1, tmp2, dst);
+        dst += dst_stride;
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+        hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+        SRARI_H2_UH(tmp1, tmp2, 7);
+        SAT_UH2_UH(tmp1, tmp2, 7);
+        PCKEV_ST_SB(tmp1, tmp2, dst);
+        dst += dst_stride;
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, 7);
+        hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+        SRARI_H2_UH(tmp1, tmp2, 7);
+        SAT_UH2_UH(tmp1, tmp2, 7);
+        PCKEV_ST_SB(tmp1, tmp2, dst);
+        dst += dst_stride;
+    }
+}
+
+void ff_put_bilin_32hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                           const uint8_t *src, ptrdiff_t src_stride,
+                           int height, int mx, int my)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        ff_put_bilin_16hv_msa(dst, dst_stride, src, src_stride, height, mx, my);
+
+        src += 16;
+        dst += 16;
+    }
+}
+
+void ff_put_bilin_64hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                           const uint8_t *src, ptrdiff_t src_stride,
+                           int height, int mx, int my)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 4; multiple8_cnt--;) {
+        ff_put_bilin_16hv_msa(dst, dst_stride, src, src_stride, height, mx, my);
+
+        src += 16;
+        dst += 16;
+    }
+}
+
+static void common_hz_2t_and_aver_dst_4x4_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, mask;
+    v16u8 filt0, dst0, dst1, dst2, dst3, vec0, vec1, res0, res1;
+    v8u16 vec2, vec3, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+    DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
+    SRARI_H2_UH(vec2, vec3, 7);
+    PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
+    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+    AVER_UB2_UB(res0, dst0, res1, dst2, res0, res1);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hz_2t_and_aver_dst_4x8_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16u8 filt0, vec0, vec1, vec2, vec3, res0, res1, res2, res3;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8u16 vec4, vec5, vec6, vec7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+    VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec4, vec5,
+                vec6, vec7);
+    SRARI_H4_UH(vec4, vec5, vec6, vec7, 7);
+    PCKEV_B4_UB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1,
+                res2, res3);
+    ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2,
+               dst4, dst6);
+    AVER_UB4_UB(res0, dst0, res1, dst2, res2, dst4, res3, dst6, res0, res1,
+                res2, res3);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+void ff_avg_bilin_4h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                         const uint8_t *src, ptrdiff_t src_stride,
+                         int height, int mx, int my)
+{
+    const int8_t *filter = vp9_bilinear_filters_msa[mx - 1];
+
+    if (4 == height) {
+        common_hz_2t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride,
+                                          filter);
+    } else if (8 == height) {
+        common_hz_2t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride,
+                                          filter);
+    }
+}
+
+static void common_hz_2t_and_aver_dst_8x4_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, mask;
+    v16u8 filt0, dst0, dst1, dst2, dst3;
+    v8u16 vec0, vec1, vec2, vec3, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                vec0, vec1, vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+                       dst, dst_stride);
+}
+
+static void common_hz_2t_and_aver_dst_8x8mult_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  const int8_t *filter,
+                                                  int32_t height)
+{
+    v16i8 src0, src1, src2, src3, mask;
+    v16u8 filt0, dst0, dst1, dst2, dst3;
+    v8u16 vec0, vec1, vec2, vec3, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+                vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+    PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+                       dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+                vec2, vec3);
+    SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+                       dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    if (16 == height) {
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+
+        VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0,
+                    vec1, vec2, vec3);
+        SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        LD_SB4(src, src_stride, src0, src1, src2, src3);
+        PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+                           dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0,
+                    vec1, vec2, vec3);
+        SRARI_H4_UH(vec0, vec1, vec2, vec3, 7);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+                           dst, dst_stride);
+    }
+}
+
+void ff_avg_bilin_8h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                         const uint8_t *src, ptrdiff_t src_stride,
+                         int height, int mx, int my)
+{
+    const int8_t *filter = vp9_bilinear_filters_msa[mx - 1];
+
+    if (4 == height) {
+        common_hz_2t_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride,
+                                          filter);
+    } else {
+        common_hz_2t_and_aver_dst_8x8mult_msa(src, src_stride, dst, dst_stride,
+                                              filter, height);
+    }
+}
+
+void ff_avg_bilin_16h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[mx - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16u8 filt0, dst0, dst1, dst2, dst3;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8u16 res0, res1, res2, res3, res4, res5, res6, res7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB4(src, src_stride, src0, src2, src4, src6);
+    LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+    src += (4 * src_stride);
+
+    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1,
+                res2, res3);
+    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5,
+                res6, res7);
+    SRARI_H4_UH(res0, res1, res2, res3, 7);
+    SRARI_H4_UH(res4, res5, res6, res7, 7);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    PCKEV_AVG_ST_UB(res1, res0, dst0, dst);
+    dst += dst_stride;
+    PCKEV_AVG_ST_UB(res3, res2, dst1, dst);
+    dst += dst_stride;
+    PCKEV_AVG_ST_UB(res5, res4, dst2, dst);
+    dst += dst_stride;
+    PCKEV_AVG_ST_UB(res7, res6, dst3, dst);
+    dst += dst_stride;
+
+    for (loop_cnt = (height >> 2) - 1; loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src2, src4, src6);
+        LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+        src += (4 * src_stride);
+
+        VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+        VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0,
+                    res1, res2, res3);
+        DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4,
+                    res5, res6, res7);
+        SRARI_H4_UH(res0, res1, res2, res3, 7);
+        SRARI_H4_UH(res4, res5, res6, res7, 7);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        PCKEV_AVG_ST_UB(res1, res0, dst0, dst);
+        dst += dst_stride;
+        PCKEV_AVG_ST_UB(res3, res2, dst1, dst);
+        dst += dst_stride;
+        PCKEV_AVG_ST_UB(res5, res4, dst2, dst);
+        dst += dst_stride;
+        PCKEV_AVG_ST_UB(res7, res6, dst3, dst);
+        dst += dst_stride;
+    }
+}
+
+void ff_avg_bilin_32h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[mx - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16u8 filt0, dst0, dst1, dst2, dst3;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8u16 res0, res1, res2, res3, res4, res5, res6, res7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        src0 = LD_SB(src);
+        src2 = LD_SB(src + 16);
+        src3 = LD_SB(src + 24);
+        src1 = __msa_sldi_b(src2, src0, 8);
+        src += src_stride;
+        src4 = LD_SB(src);
+        src6 = LD_SB(src + 16);
+        src7 = LD_SB(src + 24);
+        src5 = __msa_sldi_b(src6, src4, 8);
+        src += src_stride;
+
+        VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+        VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    res0, res1, res2, res3);
+        DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+                    res4, res5, res6, res7);
+        SRARI_H4_UH(res0, res1, res2, res3, 7);
+        SRARI_H4_UH(res4, res5, res6, res7, 7);
+        LD_UB2(dst, 16, dst0, dst1);
+        PCKEV_AVG_ST_UB(res1, res0, dst0, dst);
+        PCKEV_AVG_ST_UB(res3, res2, dst1, (dst + 16));
+        dst += dst_stride;
+        LD_UB2(dst, 16, dst2, dst3);
+        PCKEV_AVG_ST_UB(res5, res4, dst2, dst);
+        PCKEV_AVG_ST_UB(res7, res6, dst3, (dst + 16));
+        dst += dst_stride;
+    }
+}
+
+void ff_avg_bilin_64h_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[mx - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16u8 filt0, dst0, dst1, dst2, dst3;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    for (loop_cnt = height; loop_cnt--;) {
+        LD_SB4(src, 16, src0, src2, src4, src6);
+        src7 = LD_SB(src + 56);
+        SLDI_B3_SB(src2, src4, src6, src0, src2, src4, src1, src3, src5, 8);
+        src += src_stride;
+
+        VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+        VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+        VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+        VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    out0, out1, out2, out3);
+        DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+                    out4, out5, out6, out7);
+        SRARI_H4_UH(out0, out1, out2, out3, 7);
+        SRARI_H4_UH(out4, out5, out6, out7, 7);
+        LD_UB4(dst, 16, dst0, dst1, dst2, dst3);
+        PCKEV_AVG_ST_UB(out1, out0, dst0, dst);
+        PCKEV_AVG_ST_UB(out3, out2, dst1, dst + 16);
+        PCKEV_AVG_ST_UB(out5, out4, dst2, dst + 32);
+        PCKEV_AVG_ST_UB(out7, out6, dst3, dst + 48);
+        dst += dst_stride;
+    }
+}
+
+static void common_vt_2t_and_aver_dst_4x4_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter)
+{
+    v16i8 src0, src1, src2, src3, src4;
+    v16u8 dst0, dst1, dst2, dst3, out, filt0, src2110, src4332;
+    v16i8 src10_r, src32_r, src21_r, src43_r;
+    v8i16 filt;
+    v8u16 tmp0, tmp1;
+
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB4(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+
+    src4 = LD_SB(src);
+    src += src_stride;
+
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+    dst0 = (v16u8) __msa_ilvr_d((v2i64) dst1, (v2i64) dst0);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3,
+               src10_r, src21_r, src32_r, src43_r);
+    ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+    DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, 7);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+
+    out = (v16u8) __msa_pckev_b((v16i8) tmp1, (v16i8) tmp0);
+    out = __msa_aver_u_b(out, dst0);
+
+    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_vt_2t_and_aver_dst_4x8_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst, int32_t dst_stride,
+                                              const int8_t *filter)
+{
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src87_r;
+    v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+    v16u8 src2110, src4332, src6554, src8776, filt0;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+    src8 = LD_SB(src);
+
+    LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+    ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst1,
+               dst2, dst3);
+    ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+    ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+               src32_r, src43_r);
+    ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+               src76_r, src87_r);
+    ILVR_D4_UB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
+               src87_r, src76_r, src2110, src4332, src6554, src8776);
+    DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
+                tmp0, tmp1, tmp2, tmp3);
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
+    AVER_UB2_UB(src2110, dst0, src4332, dst1, src2110, src4332);
+    ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst, dst_stride);
+}
+
+void ff_avg_bilin_4v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                         const uint8_t *src, ptrdiff_t src_stride,
+                         int height, int mx, int my)
+{
+    const int8_t *filter = vp9_bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_vt_2t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride,
+                                          filter);
+    } else if (8 == height) {
+        common_vt_2t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride,
+                                          filter);
+    }
+}
+
+static void common_vt_2t_and_aver_dst_8x4_msa(const uint8_t *src,
+                                              int32_t src_stride,
+                                              uint8_t *dst,
+                                              int32_t dst_stride,
+                                              const int8_t *filter)
+{
+    v16u8 src0, src1, src2, src3, src4;
+    v16u8 dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3, filt0;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    /* rearranging filter_y */
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
+    ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                tmp0, tmp1, tmp2, tmp3);
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_AVG_ST8x4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3,
+                       dst, dst_stride);
+}
+
+static void common_vt_2t_and_aver_dst_8x8mult_msa(const uint8_t *src,
+                                                  int32_t src_stride,
+                                                  uint8_t *dst,
+                                                  int32_t dst_stride,
+                                                  const int8_t *filter,
+                                                  int32_t height)
+{
+    uint32_t loop_cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+    v16u8 dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+    v8u16 tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    /* rearranging filter_y */
+    filt = LD_SH(filter);
+    filt0 = (v16u8) __msa_splati_h(filt, 0);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 3); loop_cnt--;) {
+        LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
+        src += (8 * src_stride);
+        LD_UB8(dst, dst_stride, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8);
+
+        ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3,
+                   vec0, vec1, vec2, vec3);
+        ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7,
+                   vec4, vec5, vec6, vec7);
+        DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0,
+                    tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        PCKEV_AVG_ST8x4_UB(tmp0, dst1, tmp1, dst2, tmp2, dst3, tmp3,
+                           dst4, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0,
+                    tmp0, tmp1, tmp2, tmp3);
+        SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+        PCKEV_AVG_ST8x4_UB(tmp0, dst5, tmp1, dst6, tmp2, dst7, tmp3,
+                           dst8, dst, dst_stride);
+        dst += (4 * dst_stride);
+
+        src0 = src8;
+    }
+}
+
+void ff_avg_bilin_8v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                         const uint8_t *src, ptrdiff_t src_stride,
+                         int height, int mx, int my)
+{
+    const int8_t *filter = vp9_bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_vt_2t_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride,
+                                          filter);
+    } else {
+        common_vt_2t_and_aver_dst_8x8mult_msa(src, src_stride, dst, dst_stride,
+                                              filter, height);
+    }
+}
+
+void ff_avg_bilin_16v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[my - 1];
+    v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2, dst3, filt0;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8u16 tmp0, tmp1, tmp2, tmp3, filt;
+
+    /* rearranging filter_y */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    src0 = LD_UB(src);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+        ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+        DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+        dst += dst_stride;
+
+        ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+        ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+        DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst);
+        dst += dst_stride;
+
+        DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst);
+        dst += dst_stride;
+
+        DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst);
+        dst += dst_stride;
+
+        src0 = src4;
+    }
+}
+
+void ff_avg_bilin_32v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[my - 1];
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+    v8u16 tmp0, tmp1, tmp2, tmp3, filt;
+
+    /* rearranging filter_y */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_UB2(src, 16, src0, src5);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_UB4(src, src_stride, src1, src2, src3, src4);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+        ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+
+        LD_UB4(src + 16, src_stride, src6, src7, src8, src9);
+        LD_UB4(dst + 16, dst_stride, dst4, dst5, dst6, dst7);
+        src += (4 * src_stride);
+
+        DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+
+        DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride);
+
+        ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+        ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+        DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst + 2 * dst_stride);
+
+        DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst + 3 * dst_stride);
+
+        ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2);
+        ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3);
+        DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 16);
+
+        DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 16 + dst_stride);
+
+        ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6);
+        ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7);
+        DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst6, dst + 16 + 2 * dst_stride);
+
+        DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_AVG_ST_UB(tmp3, tmp2, dst7, dst + 16 + 3 * dst_stride);
+        dst += (4 * dst_stride);
+
+        src0 = src4;
+        src5 = src9;
+    }
+}
+
+void ff_avg_bilin_64v_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter = vp9_bilinear_filters_msa[my - 1];
+    v16u8 src0, src1, src2, src3, src4, src5;
+    v16u8 src6, src7, src8, src9, src10, src11, filt0;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+    v8u16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    v8u16 filt;
+
+    /* rearranging filter_y */
+    filt = LD_UH(filter);
+    filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_UB4(src, 16, src0, src3, src6, src9);
+    src += src_stride;
+
+    for (loop_cnt = (height >> 1); loop_cnt--;) {
+        LD_UB2(src, src_stride, src1, src2);
+        LD_UB2(dst, dst_stride, dst0, dst1);
+        LD_UB2(src + 16, src_stride, src4, src5);
+        LD_UB2(dst + 16, dst_stride, dst2, dst3);
+        LD_UB2(src + 32, src_stride, src7, src8);
+        LD_UB2(dst + 32, dst_stride, dst4, dst5);
+        LD_UB2(src + 48, src_stride, src10, src11);
+        LD_UB2(dst + 48, dst_stride, dst6, dst7);
+        src += (2 * src_stride);
+
+        ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+        ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+        DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+
+        DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride);
+
+        ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6);
+        ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7);
+        DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+        SRARI_H2_UH(tmp4, tmp5, 7);
+        SAT_UH2_UH(tmp4, tmp5, 7);
+        PCKEV_AVG_ST_UB(tmp5, tmp4, dst2, dst + 16);
+
+        DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+        SRARI_H2_UH(tmp6, tmp7, 7);
+        SAT_UH2_UH(tmp6, tmp7, 7);
+        PCKEV_AVG_ST_UB(tmp7, tmp6, dst3, dst + 16 + dst_stride);
+
+        ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2);
+        ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3);
+        DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 32);
+
+        DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 32 + dst_stride);
+
+        ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6);
+        ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7);
+        DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+        SRARI_H2_UH(tmp4, tmp5, 7);
+        SAT_UH2_UH(tmp4, tmp5, 7);
+        PCKEV_AVG_ST_UB(tmp5, tmp4, dst6, (dst + 48));
+
+        DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+        SRARI_H2_UH(tmp6, tmp7, 7);
+        SAT_UH2_UH(tmp6, tmp7, 7);
+        PCKEV_AVG_ST_UB(tmp7, tmp6, dst7, dst + 48 + dst_stride);
+        dst += (2 * dst_stride);
+
+        src0 = src2;
+        src3 = src5;
+        src6 = src8;
+        src9 = src11;
+    }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_4x4_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   const int8_t *filter_horiz,
+                                                   const int8_t *filter_vert)
+{
+    v16i8 src0, src1, src2, src3, src4, mask;
+    v16u8 filt_hz, filt_vt, vec0, vec1;
+    v16u8 dst0, dst1, dst2, dst3, res0, res1;
+    v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, tmp0, tmp1, filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_UH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    filt = LD_UH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, 7);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, 7);
+    hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+    hz_out1 = (v8u16) __msa_sldi_b((v16i8) hz_out2, (v16i8) hz_out0, 8);
+    hz_out3 = (v8u16) __msa_pckod_d((v2i64) hz_out4, (v2i64) hz_out2);
+    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+    SRARI_H2_UH(tmp0, tmp1, 7);
+    SAT_UH2_UH(tmp0, tmp1, 7);
+    PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
+    AVER_UB2_UB(res0, dst0, res1, dst2, res0, res1);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_4x8_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   const int8_t *filter_horiz,
+                                                   const int8_t *filter_vert)
+{
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
+    v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3, res0, res1, res2, res3;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+    v8u16 hz_out7, hz_out8, tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[16]);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h(filt, 0);
+
+    filt = LD_SH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+    src += (8 * src_stride);
+    src8 = LD_SB(src);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, 7);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, 7);
+    hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, 7);
+    hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, 7);
+    hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, 7);
+    SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1,
+               hz_out3, hz_out5, 8);
+    hz_out7 = (v8u16) __msa_pckod_d((v2i64) hz_out8, (v2i64) hz_out6);
+
+    LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+    ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2,
+               dst4, dst6);
+    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+    ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3);
+    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt,
+                tmp0, tmp1, tmp2, tmp3);
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_B4_UB(tmp0, tmp0, tmp1, tmp1, tmp2, tmp2, tmp3, tmp3, res0, res1,
+                res2, res3);
+    AVER_UB4_UB(res0, dst0, res1, dst2, res2, dst4, res3, dst6, res0, res1,
+                res2, res3);
+    ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+    dst += (4 * dst_stride);
+    ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+void ff_avg_bilin_4hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    const int8_t *filter_horiz = vp9_bilinear_filters_msa[mx - 1];
+    const int8_t *filter_vert = vp9_bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_hv_2ht_2vt_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride,
+                                               filter_horiz, filter_vert);
+    } else if (8 == height) {
+        common_hv_2ht_2vt_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride,
+                                               filter_horiz, filter_vert);
+    }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_8x4_msa(const uint8_t *src,
+                                                   int32_t src_stride,
+                                                   uint8_t *dst,
+                                                   int32_t dst_stride,
+                                                   const int8_t *filter_horiz,
+                                                   const int8_t *filter_vert)
+{
+    v16i8 src0, src1, src2, src3, src4, mask;
+    v16u8 filt_hz, filt_vt, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3;
+    v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h(filt, 0);
+
+    filt = LD_SH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+    src += (5 * src_stride);
+
+    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+    hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+    vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+    tmp0 = __msa_dotp_u_h(vec0, filt_vt);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 7);
+    vec1 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+    tmp1 = __msa_dotp_u_h(vec1, filt_vt);
+
+    hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 7);
+    vec2 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+    tmp2 = __msa_dotp_u_h(vec2, filt_vt);
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+    vec3 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+    tmp3 = __msa_dotp_u_h(vec3, filt_vt);
+
+    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+    PCKEV_AVG_ST8x4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3,
+                       dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_8x8mult_msa(const uint8_t *src,
+                                                       int32_t src_stride,
+                                                       uint8_t *dst,
+                                                       int32_t dst_stride,
+                                                       const int8_t *filter_horiz,
+                                                       const int8_t *filter_vert,
+                                                       int32_t height)
+{
+    uint32_t loop_cnt;
+    v16i8 src0, src1, src2, src3, src4, mask;
+    v16u8 filt_hz, filt_vt, vec0, dst0, dst1, dst2, dst3;
+    v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
+    v8i16 filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h(filt, 0);
+
+    filt = LD_SH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h(filt, 0);
+
+    src0 = LD_SB(src);
+    src += src_stride;
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src1, src2, src3, src4);
+        src += (4 * src_stride);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp0 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+        tmp1 = __msa_dotp_u_h(vec0, filt_vt);
+
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out1, (v16i8) hz_out0);
+        tmp2 = __msa_dotp_u_h(vec0, filt_vt);
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+        vec0 = (v16u8) __msa_ilvev_b((v16i8) hz_out0, (v16i8) hz_out1);
+        tmp3 = __msa_dotp_u_h(vec0, filt_vt);
+
+        SRARI_H2_UH(tmp2, tmp3, 7);
+        SAT_UH2_UH(tmp2, tmp3, 7);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+        PCKEV_AVG_ST8x4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3,
+                           dst3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+void ff_avg_bilin_8hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                          const uint8_t *src, ptrdiff_t src_stride,
+                          int height, int mx, int my)
+{
+    const int8_t *filter_horiz = vp9_bilinear_filters_msa[mx - 1];
+    const int8_t *filter_vert = vp9_bilinear_filters_msa[my - 1];
+
+    if (4 == height) {
+        common_hv_2ht_2vt_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride,
+                                               filter_horiz, filter_vert);
+    } else {
+        common_hv_2ht_2vt_and_aver_dst_8x8mult_msa(src, src_stride,
+                                                   dst, dst_stride,
+                                                   filter_horiz, filter_vert,
+                                                   height);
+    }
+}
+
+void ff_avg_bilin_16hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                           const uint8_t *src, ptrdiff_t src_stride,
+                           int height, int mx, int my)
+{
+    uint32_t loop_cnt;
+    const int8_t *filter_horiz = vp9_bilinear_filters_msa[mx - 1];
+    const int8_t *filter_vert = vp9_bilinear_filters_msa[my - 1];
+    v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+    v16u8 filt_hz, filt_vt, vec0, vec1, dst0, dst1, dst2, dst3;
+    v8u16 hz_out0, hz_out1, hz_out2, hz_out3, tmp0, tmp1;
+    v8i16 filt;
+
+    mask = LD_SB(&mc_filt_mask_arr[0]);
+
+    /* rearranging filter */
+    filt = LD_SH(filter_horiz);
+    filt_hz = (v16u8) __msa_splati_h(filt, 0);
+
+    filt = LD_SH(filter_vert);
+    filt_vt = (v16u8) __msa_splati_h(filt, 0);
+
+    LD_SB2(src, 8, src0, src1);
+    src += src_stride;
+
+    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+    hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+
+    for (loop_cnt = (height >> 2); loop_cnt--;) {
+        LD_SB4(src, src_stride, src0, src2, src4, src6);
+        LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+        src += (4 * src_stride);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, 7);
+        hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+        dst += dst_stride;
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, 7);
+        hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst1, dst);
+        dst += dst_stride;
+
+        hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, 7);
+        hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst);
+        dst += dst_stride;
+
+        hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, 7);
+        hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, 7);
+        ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+        DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+        SRARI_H2_UH(tmp0, tmp1, 7);
+        SAT_UH2_UH(tmp0, tmp1, 7);
+        PCKEV_AVG_ST_UB(tmp1, tmp0, dst3, dst);
+        dst += dst_stride;
+    }
+}
+
+void ff_avg_bilin_32hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                           const uint8_t *src, ptrdiff_t src_stride,
+                           int height, int mx, int my)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 2; multiple8_cnt--;) {
+        ff_avg_bilin_16hv_msa(dst, dst_stride, src, src_stride, height, mx, my);
+
+        src += 16;
+        dst += 16;
+    }
+}
+
+void ff_avg_bilin_64hv_msa(uint8_t *dst, ptrdiff_t dst_stride,
+                           const uint8_t *src, ptrdiff_t src_stride,
+                           int height, int mx, int my)
+{
+    int32_t multiple8_cnt;
+
+    for (multiple8_cnt = 4; multiple8_cnt--;) {
+        ff_avg_bilin_16hv_msa(dst, dst_stride, src, src_stride, height, mx, my);
+
+        src += 16;
+        dst += 16;
+    }
+}
+
+static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride,
+                            int32_t height)
+{
+    int32_t cnt;
+    uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    if (0 == height % 12) {
+        for (cnt = (height / 12); cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+            out4 = __msa_copy_u_d((v2i64) src4, 0);
+            out5 = __msa_copy_u_d((v2i64) src5, 0);
+            out6 = __msa_copy_u_d((v2i64) src6, 0);
+            out7 = __msa_copy_u_d((v2i64) src7, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+            SD4(out4, out5, out6, out7, dst, dst_stride);
+            dst += (4 * dst_stride);
+
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 8) {
+        for (cnt = height >> 3; cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+            out4 = __msa_copy_u_d((v2i64) src4, 0);
+            out5 = __msa_copy_u_d((v2i64) src5, 0);
+            out6 = __msa_copy_u_d((v2i64) src6, 0);
+            out7 = __msa_copy_u_d((v2i64) src7, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+            SD4(out4, out5, out6, out7, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 4) {
+        for (cnt = (height / 4); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+            out2 = __msa_copy_u_d((v2i64) src2, 0);
+            out3 = __msa_copy_u_d((v2i64) src3, 0);
+
+            SD4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 2) {
+        for (cnt = (height / 2); cnt--;) {
+            LD_UB2(src, src_stride, src0, src1);
+            src += (2 * src_stride);
+            out0 = __msa_copy_u_d((v2i64) src0, 0);
+            out1 = __msa_copy_u_d((v2i64) src1, 0);
+
+            SD(out0, dst);
+            dst += dst_stride;
+            SD(out1, dst);
+            dst += dst_stride;
+        }
+    }
+}
+
+static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
+                                  uint8_t *dst, int32_t dst_stride,
+                                  int32_t height, int32_t width)
+{
+    int32_t cnt, loop_cnt;
+    const uint8_t *src_tmp;
+    uint8_t *dst_tmp;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    for (cnt = (width >> 4); cnt--;) {
+        src_tmp = src;
+        dst_tmp = dst;
+
+        for (loop_cnt = (height >> 3); loop_cnt--;) {
+            LD_UB8(src_tmp, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src_tmp += (8 * src_stride);
+
+            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+                   dst_tmp, dst_stride);
+            dst_tmp += (8 * dst_stride);
+        }
+
+        src += 16;
+        dst += 16;
+    }
+}
+
+static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
+                             uint8_t *dst, int32_t dst_stride,
+                             int32_t height)
+{
+    int32_t cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    if (0 == height % 12) {
+        for (cnt = (height / 12); cnt--;) {
+            LD_UB8(src, src_stride,
+                   src0, src1, src2, src3, src4, src5, src6, src7);
+            src += (8 * src_stride);
+            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+                   dst, dst_stride);
+            dst += (8 * dst_stride);
+
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 8) {
+        copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
+    } else if (0 == height % 4) {
+        for (cnt = (height >> 2); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    }
+}
+
+static void copy_width32_msa(const uint8_t *src, int32_t src_stride,
+                             uint8_t *dst, int32_t dst_stride,
+                             int32_t height)
+{
+    int32_t cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+    if (0 == height % 12) {
+        for (cnt = (height / 12); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
+            src += (4 * src_stride);
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
+            dst += (4 * dst_stride);
+
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
+            src += (4 * src_stride);
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
+            dst += (4 * dst_stride);
+
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
+            src += (4 * src_stride);
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == height % 8) {
+        copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 32);
+    } else if (0 == height % 4) {
+        for (cnt = (height >> 2); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
+            src += (4 * src_stride);
+            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+            ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    }
+}
+
+static void copy_width64_msa(const uint8_t *src, int32_t src_stride,
+                             uint8_t *dst, int32_t dst_stride,
+                             int32_t height)
+{
+    copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64);
+}
+
+static void avg_width4_msa(const uint8_t *src, int32_t src_stride,
+                           uint8_t *dst, int32_t dst_stride,
+                           int32_t height)
+{
+    int32_t cnt;
+    uint32_t out0, out1, out2, out3;
+    v16u8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+
+    if (0 == (height % 4)) {
+        for (cnt = (height / 4); cnt--;) {
+            LD_UB4(src, src_stride, src0, src1, src2, src3);
+            src += (4 * src_stride);
+
+            LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+            AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                        dst0, dst1, dst2, dst3);
+
+            out0 = __msa_copy_u_w((v4i32) dst0, 0);
+            out1 = __msa_copy_u_w((v4i32) dst1, 0);
+            out2 = __msa_copy_u_w((v4i32) dst2, 0);
+            out3 = __msa_copy_u_w((v4i32) dst3, 0);
+            SW4(out0, out1, out2, out3, dst, dst_stride);
+            dst += (4 * dst_stride);
+        }
+    } else if (0 == (height % 2)) {
+        for (cnt = (height / 2); cnt--;) {
+            LD_UB2(src, src_stride, src0, src1);
+            src += (2 * src_stride);
+
+            LD_UB2(dst, dst_stride, dst0, dst1);
+
+            AVER_UB2_UB(src0, dst0, src1, dst1, dst0, dst1);
+
+            out0 = __msa_copy_u_w((v4i32) dst0, 0);
+            out1 = __msa_copy_u_w((v4i32) dst1, 0);
+            SW(out0, dst);
+            dst += dst_stride;
+            SW(out1, dst);
+            dst += dst_stride;
+        }
+    }
+}
+
+static void avg_width8_msa(const uint8_t *src, int32_t src_stride,
+                           uint8_t *dst, int32_t dst_stride,
+                           int32_t height)
+{
+    int32_t cnt;
+    uint64_t out0, out1, out2, out3;
+    v16u8 src0, src1, src2, src3;
+    v16u8 dst0, dst1, dst2, dst3;
+
+    for (cnt = (height / 4); cnt--;) {
+        LD_UB4(src, src_stride, src0, src1, src2, src3);
+        src += (4 * src_stride);
+        LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                    dst0, dst1, dst2, dst3);
+
+        out0 = __msa_copy_u_d((v2i64) dst0, 0);
+        out1 = __msa_copy_u_d((v2i64) dst1, 0);
+        out2 = __msa_copy_u_d((v2i64) dst2, 0);
+        out3 = __msa_copy_u_d((v2i64) dst3, 0);
+        SD4(out0, out1, out2, out3, dst, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avg_width16_msa(const uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride,
+                            int32_t height)
+{
+    int32_t cnt;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+
+    for (cnt = (height / 8); cnt--;) {
+        LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+        src += (8 * src_stride);
+        LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                    dst0, dst1, dst2, dst3);
+        AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+                    dst4, dst5, dst6, dst7);
+        ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, dst_stride);
+        dst += (8 * dst_stride);
+    }
+}
+
+static void avg_width32_msa(const uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride,
+                            int32_t height)
+{
+    int32_t cnt;
+    uint8_t *dst_dup = dst;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
+
+    for (cnt = (height / 8); cnt--;) {
+        LD_UB4(src, src_stride, src0, src2, src4, src6);
+        LD_UB4(src + 16, src_stride, src1, src3, src5, src7);
+        src += (4 * src_stride);
+        LD_UB4(dst_dup, dst_stride, dst0, dst2, dst4, dst6);
+        LD_UB4(dst_dup + 16, dst_stride, dst1, dst3, dst5, dst7);
+        dst_dup += (4 * dst_stride);
+        LD_UB4(src, src_stride, src8, src10, src12, src14);
+        LD_UB4(src + 16, src_stride, src9, src11, src13, src15);
+        src += (4 * src_stride);
+        LD_UB4(dst_dup, dst_stride, dst8, dst10, dst12, dst14);
+        LD_UB4(dst_dup + 16, dst_stride, dst9, dst11, dst13, dst15);
+        dst_dup += (4 * dst_stride);
+
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                    dst0, dst1, dst2, dst3);
+        AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+                    dst4, dst5, dst6, dst7);
+        AVER_UB4_UB(src8, dst8, src9, dst9, src10, dst10, src11, dst11,
+                    dst8, dst9, dst10, dst11);
+        AVER_UB4_UB(src12, dst12, src13, dst13, src14, dst14, src15, dst15,
+                    dst12, dst13, dst14, dst15);
+
+        ST_UB4(dst0, dst2, dst4, dst6, dst, dst_stride);
+        ST_UB4(dst1, dst3, dst5, dst7, dst + 16, dst_stride);
+        dst += (4 * dst_stride);
+        ST_UB4(dst8, dst10, dst12, dst14, dst, dst_stride);
+        ST_UB4(dst9, dst11, dst13, dst15, dst + 16, dst_stride);
+        dst += (4 * dst_stride);
+    }
+}
+
+static void avg_width64_msa(const uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride,
+                            int32_t height)
+{
+    int32_t cnt;
+    uint8_t *dst_dup = dst;
+    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+    v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
+    v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+    v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
+
+    for (cnt = (height / 4); cnt--;) {
+        LD_UB4(src, 16, src0, src1, src2, src3);
+        src += src_stride;
+        LD_UB4(src, 16, src4, src5, src6, src7);
+        src += src_stride;
+        LD_UB4(src, 16, src8, src9, src10, src11);
+        src += src_stride;
+        LD_UB4(src, 16, src12, src13, src14, src15);
+        src += src_stride;
+
+        LD_UB4(dst_dup, 16, dst0, dst1, dst2, dst3);
+        dst_dup += dst_stride;
+        LD_UB4(dst_dup, 16, dst4, dst5, dst6, dst7);
+        dst_dup += dst_stride;
+        LD_UB4(dst_dup, 16, dst8, dst9, dst10, dst11);
+        dst_dup += dst_stride;
+        LD_UB4(dst_dup, 16, dst12, dst13, dst14, dst15);
+        dst_dup += dst_stride;
+
+        AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+                    dst0, dst1, dst2, dst3);
+        AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+                    dst4, dst5, dst6, dst7);
+        AVER_UB4_UB(src8, dst8, src9, dst9, src10, dst10, src11, dst11,
+                    dst8, dst9, dst10, dst11);
+        AVER_UB4_UB(src12, dst12, src13, dst13, src14, dst14, src15, dst15,
+                    dst12, dst13, dst14, dst15);
+
+        ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
+        dst += dst_stride;
+        ST_UB4(dst4, dst5, dst6, dst7, dst, 16);
+        dst += dst_stride;
+        ST_UB4(dst8, dst9, dst10, dst11, dst, 16);
+        dst += dst_stride;
+        ST_UB4(dst12, dst13, dst14, dst15, dst, 16);
+        dst += dst_stride;
+    }
+}
+
+static const int8_t vp9_subpel_filters_msa[3][15][8] = {
+    [FILTER_8TAP_REGULAR] = {
+         {0, 1, -5, 126, 8, -3, 1, 0},
+         {-1, 3, -10, 122, 18, -6, 2, 0},
+         {-1, 4, -13, 118, 27, -9, 3, -1},
+         {-1, 4, -16, 112, 37, -11, 4, -1},
+         {-1, 5, -18, 105, 48, -14, 4, -1},
+         {-1, 5, -19, 97, 58, -16, 5, -1},
+         {-1, 6, -19, 88, 68, -18, 5, -1},
+         {-1, 6, -19, 78, 78, -19, 6, -1},
+         {-1, 5, -18, 68, 88, -19, 6, -1},
+         {-1, 5, -16, 58, 97, -19, 5, -1},
+         {-1, 4, -14, 48, 105, -18, 5, -1},
+         {-1, 4, -11, 37, 112, -16, 4, -1},
+         {-1, 3, -9, 27, 118, -13, 4, -1},
+         {0, 2, -6, 18, 122, -10, 3, -1},
+         {0, 1, -3, 8, 126, -5, 1, 0},
+    }, [FILTER_8TAP_SHARP] = {
+        {-1, 3, -7, 127, 8, -3, 1, 0},
+        {-2, 5, -13, 125, 17, -6, 3, -1},
+        {-3, 7, -17, 121, 27, -10, 5, -2},
+        {-4, 9, -20, 115, 37, -13, 6, -2},
+        {-4, 10, -23, 108, 48, -16, 8, -3},
+        {-4, 10, -24, 100, 59, -19, 9, -3},
+        {-4, 11, -24, 90, 70, -21, 10, -4},
+        {-4, 11, -23, 80, 80, -23, 11, -4},
+        {-4, 10, -21, 70, 90, -24, 11, -4},
+        {-3, 9, -19, 59, 100, -24, 10, -4},
+        {-3, 8, -16, 48, 108, -23, 10, -4},
+        {-2, 6, -13, 37, 115, -20, 9, -4},
+        {-2, 5, -10, 27, 121, -17, 7, -3},
+        {-1, 3, -6, 17, 125, -13, 5, -2},
+        {0, 1, -3, 8, 127, -7, 3, -1},
+    }, [FILTER_8TAP_SMOOTH] = {
+        {-3, -1, 32, 64, 38, 1, -3, 0},
+        {-2, -2, 29, 63, 41, 2, -3, 0},
+        {-2, -2, 26, 63, 43, 4, -4, 0},
+        {-2, -3, 24, 62, 46, 5, -4, 0},
+        {-2, -3, 21, 60, 49, 7, -4, 0},
+        {-1, -4, 18, 59, 51, 9, -4, 0},
+        {-1, -4, 16, 57, 53, 12, -4, -1},
+        {-1, -4, 14, 55, 55, 14, -4, -1},
+        {-1, -4, 12, 53, 57, 16, -4, -1},
+        {0, -4, 9, 51, 59, 18, -4, -1},
+        {0, -4, 7, 49, 60, 21, -3, -2},
+        {0, -4, 5, 46, 62, 24, -3, -2},
+        {0, -4, 4, 43, 63, 26, -2, -2},
+        {0, -3, 2, 41, 63, 29, -2, -2},
+        {0, -3, 1, 38, 64, 32, -1, -3},
+    }
+};
+
+#define VP9_8TAP_MIPS_MSA_FUNC(SIZE, type, type_idx)                           \
+void ff_put_8tap_##type##_##SIZE##h_msa(uint8_t *dst, ptrdiff_t dststride,     \
+                                        const uint8_t *src,                    \
+                                        ptrdiff_t srcstride,                   \
+                                        int h, int mx, int my)                 \
+{                                                                              \
+    const int8_t *filter = vp9_subpel_filters_msa[type_idx][mx-1];             \
+                                                                               \
+    common_hz_8t_##SIZE##w_msa(src, srcstride, dst, dststride, filter, h);     \
+}                                                                              \
+                                                                               \
+void ff_put_8tap_##type##_##SIZE##v_msa(uint8_t *dst, ptrdiff_t dststride,     \
+                                        const uint8_t *src,                    \
+                                        ptrdiff_t srcstride,                   \
+                                        int h, int mx, int my)                 \
+{                                                                              \
+    const int8_t *filter = vp9_subpel_filters_msa[type_idx][my-1];             \
+                                                                               \
+    common_vt_8t_##SIZE##w_msa(src, srcstride, dst, dststride, filter, h);     \
+}                                                                              \
+                                                                               \
+void ff_put_8tap_##type##_##SIZE##hv_msa(uint8_t *dst, ptrdiff_t dststride,    \
+                                         const uint8_t *src,                   \
+                                         ptrdiff_t srcstride,                  \
+                                         int h, int mx, int my)                \
+{                                                                              \
+    const uint8_t *hfilter = vp9_subpel_filters_msa[type_idx][mx-1];           \
+    const uint8_t *vfilter = vp9_subpel_filters_msa[type_idx][my-1];           \
+                                                                               \
+    common_hv_8ht_8vt_##SIZE##w_msa(src, srcstride, dst, dststride, hfilter,   \
+                                    vfilter, h);                               \
+}                                                                              \
+                                                                               \
+void ff_avg_8tap_##type##_##SIZE##h_msa(uint8_t *dst, ptrdiff_t dststride,     \
+                                        const uint8_t *src,                    \
+                                        ptrdiff_t srcstride,                   \
+                                        int h, int mx, int my)                 \
+{                                                                              \
+    const int8_t *filter = vp9_subpel_filters_msa[type_idx][mx-1];             \
+                                                                               \
+    common_hz_8t_and_aver_dst_##SIZE##w_msa(src, srcstride, dst,               \
+                                            dststride, filter, h);             \
+}                                                                              \
+                                                                               \
+void ff_avg_8tap_##type##_##SIZE##v_msa(uint8_t *dst, ptrdiff_t dststride,     \
+                                        const uint8_t *src,                    \
+                                        ptrdiff_t srcstride,                   \
+                                        int h, int mx, int my)                 \
+{                                                                              \
+    const int8_t *filter = vp9_subpel_filters_msa[type_idx][my-1];             \
+                                                                               \
+    common_vt_8t_and_aver_dst_##SIZE##w_msa(src, srcstride, dst, dststride,    \
+                                            filter, h);                        \
+}                                                                              \
+                                                                               \
+void ff_avg_8tap_##type##_##SIZE##hv_msa(uint8_t *dst, ptrdiff_t dststride,    \
+                                         const uint8_t *src,                   \
+                                         ptrdiff_t srcstride,                  \
+                                         int h, int mx, int my)                \
+{                                                                              \
+    const uint8_t *hfilter = vp9_subpel_filters_msa[type_idx][mx-1];           \
+    const uint8_t *vfilter = vp9_subpel_filters_msa[type_idx][my-1];           \
+                                                                               \
+    common_hv_8ht_8vt_and_aver_dst_##SIZE##w_msa(src, srcstride, dst,          \
+                                                 dststride, hfilter,           \
+                                                 vfilter, h);                  \
+}
+
+#define VP9_COPY_AVG_MIPS_MSA_FUNC(SIZE)                           \
+void ff_copy##SIZE##_msa(uint8_t *dst, ptrdiff_t dststride,        \
+                         const uint8_t *src, ptrdiff_t srcstride,  \
+                         int h, int mx, int my)                    \
+{                                                                  \
+                                                                   \
+    copy_width##SIZE##_msa(src, srcstride, dst, dststride, h);     \
+}                                                                  \
+                                                                   \
+void ff_avg##SIZE##_msa(uint8_t *dst, ptrdiff_t dststride,         \
+                        const uint8_t *src, ptrdiff_t srcstride,   \
+                        int h, int mx, int my)                     \
+{                                                                  \
+                                                                   \
+    avg_width##SIZE##_msa(src, srcstride, dst, dststride, h);      \
+}
+
+#define VP9_AVG_MIPS_MSA_FUNC(SIZE)                               \
+void ff_avg##SIZE##_msa(uint8_t *dst, ptrdiff_t dststride,        \
+                        const uint8_t *src, ptrdiff_t srcstride,  \
+                        int h, int mx, int my)                    \
+{                                                                 \
+                                                                  \
+    avg_width##SIZE##_msa(src, srcstride, dst, dststride, h);     \
+}
+
+VP9_8TAP_MIPS_MSA_FUNC(64, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MSA_FUNC(32, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MSA_FUNC(16, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MSA_FUNC(8, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MSA_FUNC(4, regular, FILTER_8TAP_REGULAR);
+
+VP9_8TAP_MIPS_MSA_FUNC(64, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MSA_FUNC(32, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MSA_FUNC(16, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MSA_FUNC(8, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MSA_FUNC(4, sharp, FILTER_8TAP_SHARP);
+
+VP9_8TAP_MIPS_MSA_FUNC(64, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MSA_FUNC(32, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MSA_FUNC(16, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MSA_FUNC(8, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MSA_FUNC(4, smooth, FILTER_8TAP_SMOOTH);
+
+VP9_COPY_AVG_MIPS_MSA_FUNC(64);
+VP9_COPY_AVG_MIPS_MSA_FUNC(32);
+VP9_COPY_AVG_MIPS_MSA_FUNC(16);
+VP9_COPY_AVG_MIPS_MSA_FUNC(8);
+VP9_AVG_MIPS_MSA_FUNC(4);
+
+#undef VP9_8TAP_MIPS_MSA_FUNC
+#undef VP9_COPY_AVG_MIPS_MSA_FUNC
+#undef VP9_AVG_MIPS_MSA_FUNC
diff --git a/libavcodec/mips/vp9dsp_init_mips.c b/libavcodec/mips/vp9dsp_init_mips.c
new file mode 100644
index 00000000..c8a48908
--- /dev/null
+++ b/libavcodec/mips/vp9dsp_init_mips.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/common.h"
+#include "libavcodec/vp9dsp.h"
+#include "vp9dsp_mips.h"
+
+#if HAVE_MSA
+static av_cold void vp9dsp_intrapred_init_msa(VP9DSPContext *dsp, int bpp)
+{
+    if (bpp == 8) {
+#define init_intra_pred_msa(tx, sz)                             \
+    dsp->intra_pred[tx][VERT_PRED]    = ff_vert_##sz##_msa;     \
+    dsp->intra_pred[tx][HOR_PRED]     = ff_hor_##sz##_msa;      \
+    dsp->intra_pred[tx][DC_PRED]      = ff_dc_##sz##_msa;       \
+    dsp->intra_pred[tx][LEFT_DC_PRED] = ff_dc_left_##sz##_msa;  \
+    dsp->intra_pred[tx][TOP_DC_PRED]  = ff_dc_top_##sz##_msa;   \
+    dsp->intra_pred[tx][DC_128_PRED]  = ff_dc_128_##sz##_msa;   \
+    dsp->intra_pred[tx][DC_127_PRED]  = ff_dc_127_##sz##_msa;   \
+    dsp->intra_pred[tx][DC_129_PRED]  = ff_dc_129_##sz##_msa;   \
+    dsp->intra_pred[tx][TM_VP8_PRED]  = ff_tm_##sz##_msa;       \
+
+    init_intra_pred_msa(TX_16X16, 16x16);
+    init_intra_pred_msa(TX_32X32, 32x32);
+#undef init_intra_pred_msa
+
+#define init_intra_pred_msa(tx, sz)                             \
+    dsp->intra_pred[tx][DC_PRED]      = ff_dc_##sz##_msa;       \
+    dsp->intra_pred[tx][LEFT_DC_PRED] = ff_dc_left_##sz##_msa;  \
+    dsp->intra_pred[tx][TOP_DC_PRED]  = ff_dc_top_##sz##_msa;   \
+    dsp->intra_pred[tx][TM_VP8_PRED]  = ff_tm_##sz##_msa;       \
+
+    init_intra_pred_msa(TX_4X4, 4x4);
+    init_intra_pred_msa(TX_8X8, 8x8);
+#undef init_intra_pred_msa
+    }
+}
+
+static av_cold void vp9dsp_itxfm_init_msa(VP9DSPContext *dsp, int bpp)
+{
+    if (bpp == 8) {
+#define init_itxfm(tx, sz)                                         \
+    dsp->itxfm_add[tx][DCT_DCT]   = ff_idct_idct_##sz##_add_msa;   \
+    dsp->itxfm_add[tx][DCT_ADST]  = ff_iadst_idct_##sz##_add_msa;  \
+    dsp->itxfm_add[tx][ADST_DCT]  = ff_idct_iadst_##sz##_add_msa;  \
+    dsp->itxfm_add[tx][ADST_ADST] = ff_iadst_iadst_##sz##_add_msa  \
+
+#define init_idct(tx, nm)                        \
+    dsp->itxfm_add[tx][DCT_DCT]   =              \
+    dsp->itxfm_add[tx][ADST_DCT]  =              \
+    dsp->itxfm_add[tx][DCT_ADST]  =              \
+    dsp->itxfm_add[tx][ADST_ADST] = nm##_add_msa
+
+    init_itxfm(TX_4X4, 4x4);
+    init_itxfm(TX_8X8, 8x8);
+    init_itxfm(TX_16X16, 16x16);
+    init_idct(TX_32X32, ff_idct_idct_32x32);
+#undef init_itxfm
+#undef init_idct
+    }
+}
+
+static av_cold void vp9dsp_mc_init_msa(VP9DSPContext *dsp, int bpp)
+{
+    if (bpp == 8) {
+#define init_fpel(idx1, idx2, sz, type)                                    \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = ff_##type##sz##_msa;  \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = ff_##type##sz##_msa;  \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = ff_##type##sz##_msa;  \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_##type##sz##_msa
+
+#define init_copy_avg(idx, sz)    \
+    init_fpel(idx, 0, sz, copy);  \
+    init_fpel(idx, 1, sz, avg)
+
+#define init_avg(idx, sz)  \
+    init_fpel(idx, 1, sz, avg)
+
+    init_copy_avg(0, 64);
+    init_copy_avg(1, 32);
+    init_copy_avg(2, 16);
+    init_copy_avg(3,  8);
+    init_avg(4,  4);
+
+#undef init_copy_avg
+#undef init_avg
+#undef init_fpel
+
+#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type)  \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][idxh][idxv] =   \
+        ff_##type##_bilin_##sz##dir##_msa;                   \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] =   \
+        ff_##type##_8tap_smooth_##sz##dir##_msa;             \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] =   \
+        ff_##type##_8tap_regular_##sz##dir##_msa;            \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] =   \
+        ff_##type##_8tap_sharp_##sz##dir##_msa;
+
+#define init_subpel2(idx, idxh, idxv, dir, type)      \
+    init_subpel1(0, idx, idxh, idxv, 64, dir, type);  \
+    init_subpel1(1, idx, idxh, idxv, 32, dir, type);  \
+    init_subpel1(2, idx, idxh, idxv, 16, dir, type);  \
+    init_subpel1(3, idx, idxh, idxv,  8, dir, type);  \
+    init_subpel1(4, idx, idxh, idxv,  4, dir, type)
+
+#define init_subpel3(idx, type)         \
+    init_subpel2(idx, 1, 1, hv, type);  \
+    init_subpel2(idx, 0, 1, v, type);   \
+    init_subpel2(idx, 1, 0, h, type)
+
+    init_subpel3(0, put);
+    init_subpel3(1, avg);
+
+#undef init_subpel1
+#undef init_subpel2
+#undef init_subpel3
+    }
+}
+
+static av_cold void vp9dsp_loopfilter_init_msa(VP9DSPContext *dsp, int bpp)
+{
+    if (bpp == 8) {
+        dsp->loop_filter_8[0][0] = ff_loop_filter_h_4_8_msa;
+        dsp->loop_filter_8[0][1] = ff_loop_filter_v_4_8_msa;
+        dsp->loop_filter_8[1][0] = ff_loop_filter_h_8_8_msa;
+        dsp->loop_filter_8[1][1] = ff_loop_filter_v_8_8_msa;
+        dsp->loop_filter_8[2][0] = ff_loop_filter_h_16_8_msa;
+        dsp->loop_filter_8[2][1] = ff_loop_filter_v_16_8_msa;
+
+        dsp->loop_filter_16[0] = ff_loop_filter_h_16_16_msa;
+        dsp->loop_filter_16[1] = ff_loop_filter_v_16_16_msa;
+
+        dsp->loop_filter_mix2[0][0][0] = ff_loop_filter_h_44_16_msa;
+        dsp->loop_filter_mix2[0][0][1] = ff_loop_filter_v_44_16_msa;
+        dsp->loop_filter_mix2[0][1][0] = ff_loop_filter_h_48_16_msa;
+        dsp->loop_filter_mix2[0][1][1] = ff_loop_filter_v_48_16_msa;
+        dsp->loop_filter_mix2[1][0][0] = ff_loop_filter_h_84_16_msa;
+        dsp->loop_filter_mix2[1][0][1] = ff_loop_filter_v_84_16_msa;
+        dsp->loop_filter_mix2[1][1][0] = ff_loop_filter_h_88_16_msa;
+        dsp->loop_filter_mix2[1][1][1] = ff_loop_filter_v_88_16_msa;
+    }
+}
+
+static av_cold void vp9dsp_init_msa(VP9DSPContext *dsp, int bpp)
+{
+    vp9dsp_intrapred_init_msa(dsp, bpp);
+    vp9dsp_itxfm_init_msa(dsp, bpp);
+    vp9dsp_mc_init_msa(dsp, bpp);
+    vp9dsp_loopfilter_init_msa(dsp, bpp);
+}
+#endif  // #if HAVE_MSA
+
+av_cold void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp)
+{
+#if HAVE_MSA
+    vp9dsp_init_msa(dsp, bpp);
+#endif  // #if HAVE_MSA
+}
diff --git a/libavcodec/mips/vp9dsp_mips.h b/libavcodec/mips/vp9dsp_mips.h
new file mode 100644
index 00000000..4d730388
--- /dev/null
+++ b/libavcodec/mips/vp9dsp_mips.h
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_VP9DSP_MIPS_H
+#define AVCODEC_MIPS_VP9DSP_MIPS_H
+
+#define VP9_8TAP_MIPS_MSA_FUNC(SIZE, type, type_idx)                         \
+void ff_put_8tap_##type##_##SIZE##h_msa(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_put_8tap_##type##_##SIZE##v_msa(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_put_8tap_##type##_##SIZE##hv_msa(uint8_t *dst, ptrdiff_t dststride,  \
+                                         const uint8_t *src,                 \
+                                         ptrdiff_t srcstride,                \
+                                         int h, int mx, int my);             \
+                                                                             \
+void ff_avg_8tap_##type##_##SIZE##h_msa(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_avg_8tap_##type##_##SIZE##v_msa(uint8_t *dst, ptrdiff_t dststride,   \
+                                        const uint8_t *src,                  \
+                                        ptrdiff_t srcstride,                 \
+                                        int h, int mx, int my);              \
+                                                                             \
+void ff_avg_8tap_##type##_##SIZE##hv_msa(uint8_t *dst, ptrdiff_t dststride,  \
+                                         const uint8_t *src,                 \
+                                         ptrdiff_t srcstride,                \
+                                         int h, int mx, int my);
+
+#define VP9_BILINEAR_MIPS_MSA_FUNC(SIZE)                                   \
+void ff_put_bilin_##SIZE##h_msa(uint8_t *dst, ptrdiff_t dststride,         \
+                                const uint8_t *src, ptrdiff_t srcstride,   \
+                                int h, int mx, int my);                    \
+                                                                           \
+void ff_put_bilin_##SIZE##v_msa(uint8_t *dst, ptrdiff_t dststride,         \
+                                const uint8_t *src, ptrdiff_t srcstride,   \
+                                int h, int mx, int my);                    \
+                                                                           \
+void ff_put_bilin_##SIZE##hv_msa(uint8_t *dst, ptrdiff_t dststride,        \
+                                 const uint8_t *src, ptrdiff_t srcstride,  \
+                                 int h, int mx, int my);                   \
+                                                                           \
+void ff_avg_bilin_##SIZE##h_msa(uint8_t *dst, ptrdiff_t dststride,         \
+                                const uint8_t *src, ptrdiff_t srcstride,   \
+                                int h, int mx, int my);                    \
+                                                                           \
+void ff_avg_bilin_##SIZE##v_msa(uint8_t *dst, ptrdiff_t dststride,         \
+                                const uint8_t *src, ptrdiff_t srcstride,   \
+                                int h, int mx, int my);                    \
+                                                                           \
+void ff_avg_bilin_##SIZE##hv_msa(uint8_t *dst, ptrdiff_t dststride,        \
+                                 const uint8_t *src, ptrdiff_t srcstride,  \
+                                 int h, int mx, int my);
+
+#define VP9_COPY_AVG_MIPS_MSA_FUNC(SIZE)                           \
+void ff_copy##SIZE##_msa(uint8_t *dst, ptrdiff_t dststride,        \
+                         const uint8_t *src, ptrdiff_t srcstride,  \
+                         int h, int mx, int my);                   \
+                                                                   \
+void ff_avg##SIZE##_msa(uint8_t *dst, ptrdiff_t dststride,         \
+                        const uint8_t *src, ptrdiff_t srcstride,   \
+                        int h, int mx, int my);
+
+VP9_8TAP_MIPS_MSA_FUNC(64, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MSA_FUNC(32, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MSA_FUNC(16, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MSA_FUNC(8, regular, FILTER_8TAP_REGULAR);
+VP9_8TAP_MIPS_MSA_FUNC(4, regular, FILTER_8TAP_REGULAR);
+
+VP9_8TAP_MIPS_MSA_FUNC(64, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MSA_FUNC(32, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MSA_FUNC(16, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MSA_FUNC(8, sharp, FILTER_8TAP_SHARP);
+VP9_8TAP_MIPS_MSA_FUNC(4, sharp, FILTER_8TAP_SHARP);
+
+VP9_8TAP_MIPS_MSA_FUNC(64, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MSA_FUNC(32, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MSA_FUNC(16, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MSA_FUNC(8, smooth, FILTER_8TAP_SMOOTH);
+VP9_8TAP_MIPS_MSA_FUNC(4, smooth, FILTER_8TAP_SMOOTH);
+
+VP9_BILINEAR_MIPS_MSA_FUNC(64);
+VP9_BILINEAR_MIPS_MSA_FUNC(32);
+VP9_BILINEAR_MIPS_MSA_FUNC(16);
+VP9_BILINEAR_MIPS_MSA_FUNC(8);
+VP9_BILINEAR_MIPS_MSA_FUNC(4);
+
+VP9_COPY_AVG_MIPS_MSA_FUNC(64);
+VP9_COPY_AVG_MIPS_MSA_FUNC(32);
+VP9_COPY_AVG_MIPS_MSA_FUNC(16);
+VP9_COPY_AVG_MIPS_MSA_FUNC(8);
+VP9_COPY_AVG_MIPS_MSA_FUNC(4);
+
+#undef VP9_8TAP_MIPS_MSA_FUNC
+#undef VP9_BILINEAR_MIPS_MSA_FUNC
+#undef VP9_COPY_AVG_MIPS_MSA_FUNC
+
+void ff_loop_filter_h_4_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                              int32_t i, int32_t h);
+void ff_loop_filter_h_8_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                              int32_t i, int32_t h);
+void ff_loop_filter_h_16_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                               int32_t i, int32_t h);
+void ff_loop_filter_v_4_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                              int32_t i, int32_t h);
+void ff_loop_filter_v_8_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                              int32_t i, int32_t h);
+void ff_loop_filter_v_16_8_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                               int32_t i, int32_t h);
+void ff_loop_filter_h_44_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                                int32_t i, int32_t h);
+void ff_loop_filter_h_88_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                                int32_t i, int32_t h);
+void ff_loop_filter_h_16_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                                int32_t i, int32_t h);
+void ff_loop_filter_v_44_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                                int32_t i, int32_t h);
+void ff_loop_filter_v_88_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                                int32_t i, int32_t h);
+void ff_loop_filter_v_16_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                                int32_t i, int32_t h);
+void ff_loop_filter_h_48_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                                int32_t i, int32_t h);
+void ff_loop_filter_h_84_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                                int32_t i, int32_t h);
+void ff_loop_filter_v_48_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                                int32_t i, int32_t h);
+void ff_loop_filter_v_84_16_msa(uint8_t *dst, ptrdiff_t stride, int32_t e,
+                                int32_t i, int32_t h);
+void ff_idct_idct_4x4_add_msa(uint8_t *dst, ptrdiff_t stride,
+                              int16_t *block, int eob);
+void ff_idct_idct_8x8_add_msa(uint8_t *dst, ptrdiff_t stride,
+                              int16_t *block, int eob);
+void ff_idct_idct_16x16_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                int16_t *block, int eob);
+void ff_idct_idct_32x32_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                int16_t *block, int eob);
+void ff_iadst_iadst_4x4_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                int16_t *block, int eob);
+void ff_iadst_iadst_8x8_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                int16_t *block, int eob);
+void ff_iadst_iadst_16x16_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                  int16_t *block, int eob);
+void ff_iadst_idct_4x4_add_msa(uint8_t *dst, ptrdiff_t stride,
+                               int16_t *block, int eob);
+void ff_iadst_idct_8x8_add_msa(uint8_t *dst, ptrdiff_t stride,
+                               int16_t *block, int eob);
+void ff_iadst_idct_16x16_add_msa(uint8_t *dst, ptrdiff_t stride,
+                                 int16_t *block, int eob);
+void ff_idct_iadst_4x4_add_msa(uint8_t *pu8Dest, ptrdiff_t stride,
+                               int16_t *block, int eob);
+void ff_idct_iadst_8x8_add_msa(uint8_t *pu8Dest, ptrdiff_t stride,
+                               int16_t *block, int eob);
+void ff_idct_iadst_16x16_add_msa(uint8_t *pu8Dest, ptrdiff_t stride,
+                                 int16_t *block, int eob);
+void ff_iwht_iwht_4x4_add_msa(uint8_t *dst, ptrdiff_t stride,
+                              int16_t *block, int eob);
+
+void ff_vert_16x16_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                       const uint8_t *top);
+void ff_vert_32x32_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                       const uint8_t *top);
+void ff_hor_16x16_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                      const uint8_t *top);
+void ff_hor_32x32_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                      const uint8_t *top);
+void ff_dc_4x4_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                   const uint8_t *top);
+void ff_dc_8x8_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                   const uint8_t *top);
+void ff_dc_16x16_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                     const uint8_t *top);
+void ff_dc_32x32_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                     const uint8_t *top);
+void ff_dc_left_4x4_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                        const uint8_t *top);
+void ff_dc_left_8x8_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                        const uint8_t *top);
+void ff_dc_left_16x16_msa(uint8_t *dst, ptrdiff_t stride,
+                          const uint8_t *left, const uint8_t *top);
+void ff_dc_left_32x32_msa(uint8_t *dst, ptrdiff_t stride,
+                          const uint8_t *left, const uint8_t *top);
+void ff_dc_top_4x4_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                       const uint8_t *top);
+void ff_dc_top_8x8_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                       const uint8_t *top);
+void ff_dc_top_16x16_msa(uint8_t *dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top);
+void ff_dc_top_32x32_msa(uint8_t *dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top);
+void ff_dc_128_16x16_msa(uint8_t *dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top);
+void ff_dc_128_32x32_msa(uint8_t *dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top);
+void ff_dc_127_16x16_msa(uint8_t *dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top);
+void ff_dc_127_32x32_msa(uint8_t *dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top);
+void ff_dc_129_16x16_msa(uint8_t *dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top);
+void ff_dc_129_32x32_msa(uint8_t *dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top);
+void ff_tm_4x4_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                   const uint8_t *top);
+void ff_tm_8x8_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                   const uint8_t *top);
+void ff_tm_16x16_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                     const uint8_t *top);
+void ff_tm_32x32_msa(uint8_t *dst, ptrdiff_t stride, const uint8_t *left,
+                     const uint8_t *top);
+
+#endif  // #ifndef AVCODEC_MIPS_VP9DSP_MIPS_H
diff --git a/libavcodec/mips/xvid_idct_mmi.c b/libavcodec/mips/xvid_idct_mmi.c
new file mode 100644
index 00000000..d3f9acb0
--- /dev/null
+++ b/libavcodec/mips/xvid_idct_mmi.c
@@ -0,0 +1,253 @@
+/*
+ * Loongson SIMD optimized xvid idct
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "idctdsp_mips.h"
+#include "xvididct_mips.h"
+
+#define BITS_INV_ACC    5                           // 4 or 5 for IEEE
+#define SHIFT_INV_ROW   (16 - BITS_INV_ACC)         //11
+#define SHIFT_INV_COL   (1 + BITS_INV_ACC)          //6
+#define RND_INV_ROW     (1024 * (6 - BITS_INV_ACC))
+#define RND_INV_COL     (16 * (BITS_INV_ACC - 3))
+#define RND_INV_CORR    (RND_INV_COL - 1)
+
+#define BITS_FRW_ACC    3                           // 2 or 3 for accuracy
+#define SHIFT_FRW_COL   BITS_FRW_ACC
+#define SHIFT_FRW_ROW   (BITS_FRW_ACC + 17)
+#define RND_FRW_ROW     (262144*(BITS_FRW_ACC - 1))
+
+DECLARE_ALIGNED(8, static const int16_t, tg_1_16)[4*4] = {
+     13036, 13036, 13036, 13036,    //  tg * (2<<16) + 0.5
+     27146, 27146, 27146, 27146,    //  tg * (2<<16) + 0.5
+    -21746,-21746,-21746,-21746,    //  tg * (2<<16) + 0.5
+     23170, 23170, 23170, 23170     // cos * (2<<15) + 0.5
+};
+
+DECLARE_ALIGNED(8, static const int32_t, rounder_0)[2*8] = {
+    65536,65536,
+     3597, 3597,
+     2260, 2260,
+     1203, 1203,
+        0,    0,
+      120,  120,
+      512,  512,
+      512,  512
+};
+
+DECLARE_ALIGNED(8, static const int16_t, tab_i_04_mmi)[32*4] = {
+     16384, 21407, 16384,  8867,    // w05 w04 w01 w00
+     16384,  8867,-16384,-21407,    // w07 w06 w03 w02
+     16384, -8867, 16384,-21407,    // w13 w12 w09 w08
+    -16384, 21407, 16384, -8867,    // w15 w14 w11 w10
+     22725, 19266, 19266, -4520,    // w21 w20 w17 w16
+     12873,  4520,-22725,-12873,    // w23 w22 w19 w18
+     12873,-22725,  4520,-12873,    // w29 w28 w25 w24
+      4520, 19266, 19266,-22725,    // w31 w30 w27 w26
+
+     22725, 29692, 22725, 12299,    // w05 w04 w01 w00
+     22725, 12299,-22725,-29692,    // w07 w06 w03 w02
+     22725,-12299, 22725,-29692,    // w13 w12 w09 w08
+    -22725, 29692, 22725,-12299,    // w15 w14 w11 w10
+     31521, 26722, 26722, -6270,    // w21 w20 w17 w16
+     17855,  6270,-31521,-17855,    // w23 w22 w19 w18
+     17855,-31521,  6270,-17855,    // w29 w28 w25 w24
+      6270, 26722, 26722,-31521,    // w31 w30 w27 w26
+
+     21407, 27969, 21407, 11585,    // w05 w04 w01 w00
+     21407, 11585,-21407,-27969,    // w07 w06 w03 w02
+     21407,-11585, 21407,-27969,    // w13 w12 w09 w08
+    -21407, 27969, 21407,-11585,    // w15 w14 w11 w10
+     29692, 25172, 25172, -5906,    // w21 w20 w17 w16
+     16819,  5906,-29692,-16819,    // w23 w22 w19 w18
+     16819,-29692,  5906,-16819,    // w29 w28 w25 w24
+      5906, 25172, 25172,-29692,    // w31 w30 w27 w26
+
+     19266, 25172, 19266, 10426,    // w05 w04 w01 w00
+     19266, 10426,-19266,-25172,    // w07 w06 w03 w02
+     19266,-10426, 19266,-25172,    // w13 w12 w09 w08
+    -19266, 25172, 19266,-10426,    // w15 w14 w11 w10
+     26722, 22654, 22654, -5315,    // w21 w20 w17 w16
+     15137,  5315,-26722,-15137,    // w23 w22 w19 w18
+     15137,-26722,  5315,-15137,    // w29 w28 w25 w24
+      5315, 22654, 22654,-26722,    // w31 w30 w27 w26
+};
+
+#define DCT_8_INV_ROW_MMI(A1,A2,A3,A4)                                      \
+    "dli $10, 0x88              \n\t"                                       \
+    "ldc1 $f4, "#A1"            \n\t" /* 0; x3 x2 x1 x0                   */\
+    "dmtc1 $10, $f16            \n\t"                                       \
+    "ldc1 $f10, 8+"#A1"         \n\t" /* 1; x7 x6 x5 x4                   */\
+    "ldc1 $f6, "#A3"            \n\t" /* 3; w05 w04 w01 w00               */\
+    "pshufh $f0, $f4, $f16      \n\t" /* x2 x0 x2 x0                      */\
+    "ldc1 $f8, 8+"#A3"          \n\t" /* 4; w07 w06 w03 w02               */\
+    "ldc1 $f12, 32+"#A3"        \n\t" /* 6; w21 w20 w17 w16               */\
+    "pmaddhw $f6, $f6, $f0      \n\t" /* x2*w05+x0*w04 x2*w01+x0*w00      */\
+    "dli $10, 0xdd              \n\t"                                       \
+    "pshufh $f2, $f10, $f16     \n\t" /* x6 x4 x6 x4                      */\
+    "dmtc1 $10, $f16            \n\t"                                       \
+    "pmaddhw $f8, $f8, $f2      \n\t" /* x6*w07+x4*w06 x6*w03+x4*w02      */\
+    "ldc1 $f14, 40+"#A3"        \n\t" /* 7; w23 w22 w19 w18               */\
+    "pshufh $f4, $f4, $f16      \n\t" /* x3 x1 x3 x1                      */\
+    "pmaddhw $f12, $f12, $f4    \n\t" /* x3*w21+x1*w20 x3*w17+x1*w16      */\
+    "pshufh $f10, $f10, $f16    \n\t" /* x7 x5 x7 x5                      */\
+    "ldc1 $f18, "#A4"           \n\t"                                       \
+    "pmaddhw $f14, $f14, $f10   \n\t" /* x7*w23+x5*w22 x7*w19+x5*w18      */\
+    "paddw $f6, $f6, $f18       \n\t" /* +%4                              */\
+    "ldc1 $f16, 16+"#A3"        \n\t"                                       \
+    "pmaddhw $f0, $f0, $f16     \n\t" /* x2*w13+x0*w12 x2*w09+x0*w08      */\
+    "ldc1 $f16, 24+"#A3"        \n\t"                                       \
+    "paddw $f6, $f6, $f8        \n\t" /* 4; a1=sum(even1) a0=sum(even0)   */\
+    "pmaddhw $f2, $f2, $f16     \n\t" /* x6*w15+x4*w14 x6*w11+x4*w10      */\
+    "ldc1 $f16, 48+"#A3"        \n\t"                                       \
+    "pmaddhw $f4, $f4, $f16     \n\t" /* x3*w29+x1*w28 x3*w25+x1*w24      */\
+    "ldc1 $f16, 56+"#A3"        \n\t"                                       \
+    "paddw $f12, $f12, $f14     \n\t" /* 7; b1=sum(odd1) b0=sum(odd0)     */\
+    "dli $10, 11                \n\t"                                       \
+    "pmaddhw $f10, $f10, $f16   \n\t" /* x7*w31+x5*w30 x7*w27+x5*w26      */\
+    "dmtc1 $10, $f16            \n\t"                                       \
+    "psubw $f8, $f6, $f12       \n\t" /* 6; a1-b1 a0-b0                   */\
+    "paddw $f6, $f6, $f12       \n\t" /* a1+b1 a0+b0                      */\
+    "paddw $f0, $f0, $f18       \n\t" /* +%4                              */\
+    "psraw $f6, $f6, $f16       \n\t" /* y1=a1+b1 y0=a0+b0                */\
+    "paddw $f0, $f0, $f2        \n\t" /* 1; a3=sum(even3) a2=sum(even2)   */\
+    "paddw $f4, $f4, $f10       \n\t" /* 5; b3=sum(odd3) b2=sum(odd2)     */\
+    "psraw $f8, $f8, $f16       \n\t" /* y6=a1-b1 y7=a0-b0                */\
+    "psubw $f14, $f0, $f4       \n\t" /* 2; a3-b3 a2-b2                   */\
+    "paddw $f0, $f0, $f4        \n\t" /* a3+b3 a2+b2                      */\
+    "psraw $f0, $f0, $f16       \n\t" /* y3=a3+b3 y2=a2+b2                */\
+    "psraw $f14, $f14, $f16     \n\t" /* y4=a3-b3 y5=a2-b2                */\
+    "dli $10, 0xb1              \n\t"                                       \
+    "packsswh $f6, $f6, $f0     \n\t" /* 0; y3 y2 y1 y0                   */\
+    "dmtc1 $10, $f16            \n\t"                                       \
+    "packsswh $f14, $f14, $f8   \n\t" /* 4; y6 y7 y4 y5                   */\
+    "sdc1 $f6, "#A2"            \n\t" /* 3; save y3 y2 y1 y0              */\
+    "pshufh $f14, $f14, $f16    \n\t" /* y7 y6 y5 y4                      */\
+    "sdc1 $f14, 8+"#A2"         \n\t" /* 7; save y7 y6 y5 y4              */\
+
+
+#define DCT_8_INV_COL(A1,A2)                                                \
+    "ldc1 $f2, 2*8(%3)          \n\t"                                       \
+    "ldc1 $f6, 16*3+"#A1"       \n\t"                                       \
+    "ldc1 $f10, 16*5+"#A1"      \n\t"                                       \
+    "pmulhh $f0, $f2, $f6       \n\t" /* x3*(tg_3_16-1)                   */\
+    "ldc1 $f4, 0(%3)            \n\t"                                       \
+    "pmulhh $f2, $f2, $f10      \n\t" /* x5*(tg_3_16-1)                   */\
+    "ldc1 $f14, 16*7+"#A1"      \n\t"                                       \
+    "ldc1 $f12, 16*1+"#A1"      \n\t"                                       \
+    "pmulhh $f8, $f4, $f14      \n\t" /* x7*tg_1_16                       */\
+    "paddsh $f0, $f0, $f6       \n\t" /* x3*tg_3_16                       */\
+    "pmulhh $f4, $f4, $f12      \n\t" /* x1*tg_1_16                       */\
+    "paddsh $f2, $f2, $f6       \n\t" /* x3+x5*(tg_3_16-1)                */\
+    "psubsh $f0, $f0, $f10      \n\t" /* x3*tg_3_16-x5 = tm35             */\
+    "ldc1 $f6, 3*8(%3)          \n\t"                                       \
+    "paddsh $f2, $f2, $f10      \n\t" /* x3+x5*tg_3_16 = tp35             */\
+    "paddsh $f8, $f8, $f12      \n\t" /* x1+tg_1_16*x7 = tp17             */\
+    "psubsh $f4, $f4, $f14      \n\t" /* x1*tg_1_16-x7 = tm17             */\
+    "paddsh $f10, $f8, $f2      \n\t" /* tp17+tp35 = b0                   */\
+    "psubsh $f12, $f4, $f0      \n\t" /* tm17-tm35 = b3                   */\
+    "psubsh $f8, $f8, $f2       \n\t" /* tp17-tp35 = t1                   */\
+    "paddsh $f4, $f4, $f0       \n\t" /* tm17+tm35 = t2                   */\
+    "ldc1 $f14, 1*8(%3)         \n\t"                                       \
+    "sdc1 $f10, 3*16+"#A2"      \n\t" /* save b0                          */\
+    "paddsh $f2, $f8, $f4       \n\t" /* t1+t2                            */\
+    "sdc1 $f12, 5*16+"#A2"      \n\t" /* save b3                          */\
+    "psubsh $f8, $f8, $f4       \n\t" /* t1-t2                            */\
+    "ldc1 $f10, 2*16+"#A1"      \n\t"                                       \
+    "ldc1 $f12, 6*16+"#A1"      \n\t"                                       \
+    "pmulhh $f0, $f14, $f10     \n\t" /* x2*tg_2_16                       */\
+    "pmulhh $f14, $f14, $f12    \n\t" /* x6*tg_2_16                       */\
+    "pmulhh $f2, $f2, $f6       \n\t" /* ocos_4_16*(t1+t2) = b1/2         */\
+    "ldc1 $f4, 0*16+"#A1"       \n\t"                                       \
+    "pmulhh $f8, $f8, $f6       \n\t" /* ocos_4_16*(t1-t2) = b2/2         */\
+    "psubsh $f0, $f0, $f12      \n\t" /* t2*tg_2_16-x6 = tm26             */\
+    "ldc1 $f12, 4*16+"#A1"      \n\t"                                       \
+    "paddsh $f14, $f14, $f10    \n\t" /* x2+x6*tg_2_16 = tp26             */\
+    "psubsh $f6, $f4, $f12      \n\t" /* x0-x4 = tm04                     */\
+    "paddsh $f4, $f4, $f12      \n\t" /* x0+x4 = tp04                     */\
+    "paddsh $f10, $f4, $f14     \n\t" /* tp04+tp26 = a0                   */\
+    "psubsh $f12, $f6, $f0      \n\t" /* tm04-tm26 = a2                   */\
+    "psubsh $f4, $f4, $f14      \n\t" /* tp04-tp26 = a3                   */\
+    "paddsh $f6, $f6, $f0       \n\t" /* tm04+tm26 = a1                   */\
+    "paddsh $f2, $f2, $f2       \n\t" /* b1                               */\
+    "paddsh $f8, $f8, $f8       \n\t" /* b2                               */\
+    "psubsh $f14, $f6, $f2      \n\t" /* a1-b1                            */\
+    "dli $10, 6                 \n\t"                                       \
+    "paddsh $f6, $f6, $f2       \n\t" /* a1+b1                            */\
+    "dmtc1 $10, $f16            \n\t"                                       \
+    "psubsh $f0, $f12, $f8      \n\t" /* a2-b2                            */\
+    "paddsh $f12, $f12, $f8     \n\t" /* a2+b2                            */\
+    "psrah $f6, $f6, $f16       \n\t" /* dst1                             */\
+    "psrah $f12, $f12, $f16     \n\t" /* dst2                             */\
+    "ldc1 $f2, 3*16+"#A2"       \n\t" /* load b0                          */\
+    "psrah $f14, $f14, $f16     \n\t" /* dst6                             */\
+    "psrah $f0, $f0, $f16       \n\t" /* dst5                             */\
+    "sdc1 $f6, 1*16+"#A2"       \n\t"                                       \
+    "psubsh $f8, $f10, $f2      \n\t" /* a0-b0                            */\
+    "paddsh $f10, $f10, $f2     \n\t" /* a0+b0                            */\
+    "sdc1 $f12, 2*16+"#A2"      \n\t"                                       \
+    "ldc1 $f6, 5*16+"#A2"       \n\t" /* load b3                          */\
+    "psrah $f10, $f10, $f16     \n\t" /* dst0                             */\
+    "psrah $f8, $f8, $f16       \n\t" /* dst7                             */\
+    "sdc1 $f0, 5*16+"#A2"       \n\t"                                       \
+    "psubsh $f12, $f4, $f6      \n\t" /* a3-b3                            */\
+    "paddsh $f4, $f4, $f6       \n\t" /* a3+b3                            */\
+    "sdc1 $f14, 6*16+"#A2"      \n\t"                                       \
+    "sdc1 $f10, 0*16+"#A2"      \n\t"                                       \
+    "psrah $f4, $f4, $f16       \n\t" /* dst3                             */\
+    "sdc1 $f8, 7*16+"#A2"       \n\t"                                       \
+    "psrah $f12, $f12, $f16     \n\t" /* dst4                             */\
+    "sdc1 $f4, 3*16+"#A2"       \n\t"                                       \
+    "sdc1 $f12, 4*16+"#A2"      \n\t"                                       \
+
+
+void ff_xvid_idct_mmi(int16_t *block)
+{
+    __asm__ volatile (
+        //# Process each row
+        DCT_8_INV_ROW_MMI(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1))
+        DCT_8_INV_ROW_MMI(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1))
+        DCT_8_INV_ROW_MMI(2*16(%0), 2*16(%0), 64*2(%2), 8*2(%1))
+        DCT_8_INV_ROW_MMI(3*16(%0), 3*16(%0), 64*3(%2), 8*3(%1))
+        DCT_8_INV_ROW_MMI(4*16(%0), 4*16(%0), 64*0(%2), 8*4(%1))
+        DCT_8_INV_ROW_MMI(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1))
+        DCT_8_INV_ROW_MMI(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1))
+        DCT_8_INV_ROW_MMI(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1))
+        //# Process the columns (4 at a time)
+        DCT_8_INV_COL(0(%0), 0(%0))
+        DCT_8_INV_COL(8(%0), 8(%0))
+        ::"r"(block),"r"(rounder_0),"r"(tab_i_04_mmi),"r"(tg_1_16)
+        : "$10"
+    );
+}
+
+void ff_xvid_idct_put_mmi(uint8_t *dest, int32_t line_size, int16_t *block)
+{
+    ff_xvid_idct_mmi(block);
+    ff_put_pixels_clamped_mmi(block, dest, line_size);
+}
+
+void ff_xvid_idct_add_mmi(uint8_t *dest, int32_t line_size, int16_t *block)
+{
+    ff_xvid_idct_mmi(block);
+    ff_add_pixels_clamped_mmi(block, dest, line_size);
+}
diff --git a/libavcodec/mips/xvididct_init_mips.c b/libavcodec/mips/xvididct_init_mips.c
new file mode 100644
index 00000000..c1d82cc3
--- /dev/null
+++ b/libavcodec/mips/xvididct_init_mips.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "xvididct_mips.h"
+
+#if HAVE_MMI
+static av_cold void xvid_idct_init_mmi(IDCTDSPContext *c, AVCodecContext *avctx,
+        unsigned high_bit_depth)
+{
+    if (!high_bit_depth) {
+        if (avctx->idct_algo == FF_IDCT_AUTO ||
+                avctx->idct_algo == FF_IDCT_XVID) {
+            c->idct_put = ff_xvid_idct_put_mmi;
+            c->idct_add = ff_xvid_idct_add_mmi;
+            c->idct = ff_xvid_idct_mmi;
+            c->perm_type = FF_IDCT_PERM_NONE;
+        }
+    }
+}
+#endif /* HAVE_MMI */
+
+av_cold void ff_xvid_idct_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
+        unsigned high_bit_depth)
+{
+#if HAVE_MMI
+    xvid_idct_init_mmi(c, avctx, high_bit_depth);
+#endif /* HAVE_MMI */
+}
diff --git a/libavcodec/mips/xvididct_mips.h b/libavcodec/mips/xvididct_mips.h
new file mode 100644
index 00000000..0768aaa2
--- /dev/null
+++ b/libavcodec/mips/xvididct_mips.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_XVIDIDCT_MIPS_H
+#define AVCODEC_MIPS_XVIDIDCT_MIPS_H
+
+#include "libavcodec/xvididct.h"
+
+void ff_xvid_idct_mmi(int16_t *block);
+void ff_xvid_idct_put_mmi(uint8_t *dest, int32_t line_size, int16_t *block);
+void ff_xvid_idct_add_mmi(uint8_t *dest, int32_t line_size, int16_t *block);
+
+#endif /* AVCODEC_MIPS_XVIDIDCT_MIPS_H */
diff --git a/libavcodec/mjpeg2jpeg_bsf.c b/libavcodec/mjpeg2jpeg_bsf.c
index 68640db9..92dc3ca2 100644
--- a/libavcodec/mjpeg2jpeg_bsf.c
+++ b/libavcodec/mjpeg2jpeg_bsf.c
@@ -28,9 +28,11 @@
 
 #include "libavutil/error.h"
 #include "libavutil/mem.h"
+#include "libavutil/intreadwrite.h"
 
 #include "avcodec.h"
 #include "jpegtables.h"
+#include "mjpeg.h"
 
 static const uint8_t jpeg_header[] = {
     0xff, 0xd8,                     // SOI
@@ -88,11 +90,15 @@ static int mjpeg2jpeg_filter(AVBitStreamFilterContext *bsfc,
         av_log(avctx, AV_LOG_ERROR, "input is truncated\n");
         return AVERROR_INVALIDDATA;
     }
-    if (memcmp("AVI1", buf + 6, 4)) {
-        av_log(avctx, AV_LOG_ERROR, "input is not MJPEG/AVI1\n");
+    if (AV_RB16(buf) != 0xffd8) {
+        av_log(avctx, AV_LOG_ERROR, "input is not MJPEG\n");
         return AVERROR_INVALIDDATA;
     }
-    input_skip = (buf[4] << 8) + buf[5] + 4;
+    if (buf[2] == 0xff && buf[3] == APP0) {
+        input_skip = (buf[4] << 8) + buf[5] + 4;
+    } else {
+        input_skip = 2;
+    }
     if (buf_size < input_skip) {
         av_log(avctx, AV_LOG_ERROR, "input is truncated\n");
         return AVERROR_INVALIDDATA;
diff --git a/libavcodec/mjpega_dump_header_bsf.c b/libavcodec/mjpega_dump_header_bsf.c
index 87829fae..d6d41e6b 100644
--- a/libavcodec/mjpega_dump_header_bsf.c
+++ b/libavcodec/mjpega_dump_header_bsf.c
@@ -44,7 +44,7 @@ static int mjpega_dump_header(AVBitStreamFilterContext *bsfc, AVCodecContext *av
     }
 
     *poutbuf_size = 0;
-    *poutbuf = av_malloc(buf_size + 44 + FF_INPUT_BUFFER_PADDING_SIZE);
+    *poutbuf = av_malloc(buf_size + 44 + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!*poutbuf)
         return AVERROR(ENOMEM);
     poutbufp = *poutbuf;
diff --git a/libavcodec/mjpegbdec.c b/libavcodec/mjpegbdec.c
index 8ac60c22..a858707d 100644
--- a/libavcodec/mjpegbdec.c
+++ b/libavcodec/mjpegbdec.c
@@ -166,7 +166,7 @@ AVCodec ff_mjpegb_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     .decode         = mjpegb_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index 2c18a496..bce5496f 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -98,6 +98,15 @@ static void parse_avid(MJpegDecodeContext *s, uint8_t *buf, int len)
         av_log(s->avctx, AV_LOG_INFO, "AVID: len:%d %d\n", len, len > 14 ? buf[12] : -1);
 }
 
+static void init_idct(AVCodecContext *avctx)
+{
+    MJpegDecodeContext *s = avctx->priv_data;
+
+    ff_idctdsp_init(&s->idsp, avctx);
+    ff_init_scantable(s->idsp.idct_permutation, &s->scantable,
+                      ff_zigzag_direct);
+}
+
 av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx)
 {
     MJpegDecodeContext *s = avctx->priv_data;
@@ -112,9 +121,7 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx)
     s->avctx = avctx;
     ff_blockdsp_init(&s->bdsp, avctx);
     ff_hpeldsp_init(&s->hdsp, avctx->flags);
-    ff_idctdsp_init(&s->idsp, avctx);
-    ff_init_scantable(s->idsp.idct_permutation, &s->scantable,
-                      ff_zigzag_direct);
+    init_idct(avctx);
     s->buffer_size   = 0;
     s->buffer        = NULL;
     s->start_code    = -1;
@@ -267,7 +274,6 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
 
     /* XXX: verify len field validity */
     len     = get_bits(&s->gb, 16);
-    s->avctx->bits_per_raw_sample =
     bits    = get_bits(&s->gb, 8);
 
     if (bits > 16 || bits < 1) {
@@ -275,6 +281,11 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
         return AVERROR_INVALIDDATA;
     }
 
+    if (s->avctx->bits_per_raw_sample != bits) {
+        av_log(s->avctx, AV_LOG_INFO, "Changing bps to %d\n", bits);
+        s->avctx->bits_per_raw_sample = bits;
+        init_idct(s->avctx);
+    }
     if (s->pegasus_rct)
         bits = 9;
     if (bits == 9 && !s->pegasus_rct)
@@ -288,9 +299,6 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
     height = get_bits(&s->gb, 16);
     width  = get_bits(&s->gb, 16);
 
-    if (s->avctx->codec_id == AV_CODEC_ID_AMV && (height&15))
-        avpriv_request_sample(s->avctx, "non mod 16 height AMV\n");
-
     // HACK for odd_height.mov
     if (s->interlaced && s->width == width && s->height == height + 1)
         height= s->height;
@@ -345,6 +353,12 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
                i, h_count[i], v_count[i],
                s->component_id[i], s->quant_index[i]);
     }
+    if (   nb_components == 4
+        && s->component_id[0] == 'C' - 1
+        && s->component_id[1] == 'M' - 1
+        && s->component_id[2] == 'Y' - 1
+        && s->component_id[3] == 'K' - 1)
+        s->adobe_transform = 0;
 
     if (s->ls && (s->h_max > 1 || s->v_max > 1)) {
         avpriv_report_missing_feature(s->avctx, "Subsampling in JPEG-LS");
@@ -367,6 +381,7 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
 
         /* test interlaced mode */
         if (s->first_picture   &&
+            (s->multiscope != 2 || s->avctx->time_base.den >= 25 * s->avctx->time_base.num) &&
             s->org_height != 0 &&
             s->height < ((s->org_height * 3) / 4)) {
             s->interlaced                    = 1;
@@ -617,7 +632,8 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
         av_log(s->avctx, AV_LOG_DEBUG, "decode_sof0: error, len(%d) mismatch\n", len);
     }
 
-    if (s->rgb && !s->lossless && !s->ls) {
+    if ((s->rgb && !s->lossless && !s->ls) ||
+        (!s->rgb && s->ls && s->nb_components > 1)) {
         av_log(s->avctx, AV_LOG_ERROR, "Unsupported coding and pixel format combination\n");
         return AVERROR_PATCHWELCOME;
     }
@@ -671,6 +687,7 @@ static int decode_block(MJpegDecodeContext *s, int16_t *block, int component,
         return AVERROR_INVALIDDATA;
     }
     val = val * quant_matrix[0] + s->last_dc[component];
+    val = FFMIN(val, 32767);
     s->last_dc[component] = val;
     block[0] = val;
     /* AC coefs */
@@ -718,7 +735,7 @@ static int decode_dc_progressive(MJpegDecodeContext *s, int16_t *block,
         av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
         return AVERROR_INVALIDDATA;
     }
-    val = (val * quant_matrix[0] << Al) + s->last_dc[component];
+    val = (val * (quant_matrix[0] << Al)) + s->last_dc[component];
     s->last_dc[component] = val;
     block[0] = val;
     return 0;
@@ -761,14 +778,14 @@ static int decode_block_progressive(MJpegDecodeContext *s, int16_t *block,
                 if (i >= se) {
                     if (i == se) {
                         j = s->scantable.permutated[se];
-                        block[j] = level * quant_matrix[j] << Al;
+                        block[j] = level * (quant_matrix[j] << Al);
                         break;
                     }
                     av_log(s->avctx, AV_LOG_ERROR, "error count: %d\n", i);
                     return AVERROR_INVALIDDATA;
                 }
                 j = s->scantable.permutated[i];
-                block[j] = level * quant_matrix[j] << Al;
+                block[j] = level * (quant_matrix[j] << Al);
             } else {
                 if (run == 0xF) {// ZRL - skip 15 coefficients
                     i += 15;
@@ -847,7 +864,7 @@ static int decode_block_refinement(MJpegDecodeContext *s, int16_t *block,
                 ZERO_RUN;
                 j = s->scantable.permutated[i];
                 val--;
-                block[j] = ((quant_matrix[j]^val) - val) << Al;
+                block[j] = ((quant_matrix[j] << Al) ^ val) - val;
                 if (i == se) {
                     if (i > *last_nnz)
                         *last_nnz = i;
@@ -982,7 +999,7 @@ static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int nb_components, int p
                     return -1;
 
                 left[i] = buffer[mb_x][i] =
-                    mask & (pred + (dc << point_transform));
+                    mask & (pred + (dc * (1 << point_transform)));
             }
 
             if (s->restart_interval && !--s->restart_count) {
@@ -990,7 +1007,14 @@ static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int nb_components, int p
                 skip_bits(&s->gb, 16); /* skip RSTn */
             }
         }
-        if (s->nb_components == 4) {
+        if (s->rct && s->nb_components == 4) {
+            for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
+                ptr[4*mb_x + 2] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2] - 0x200) >> 2);
+                ptr[4*mb_x + 1] = buffer[mb_x][1] + ptr[4*mb_x + 2];
+                ptr[4*mb_x + 3] = buffer[mb_x][2] + ptr[4*mb_x + 2];
+                ptr[4*mb_x + 0] = buffer[mb_x][3];
+            }
+        } else if (s->nb_components == 4) {
             for(i=0; i<nb_components; i++) {
                 int c= s->comp_index[i];
                 if (s->bits <= 8) {
@@ -1081,7 +1105,10 @@ static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor,
                         dc = mjpeg_decode_dc(s, s->dc_index[i]);
                         if(dc == 0xFFFFF)
                             return -1;
-                        if(bits<=8){
+                        if (   h * mb_x + x >= s->width
+                            || v * mb_y + y >= s->height) {
+                            // Nothing to do
+                        } else if (bits<=8) {
                         ptr = s->picture_ptr->data[c] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap
                         if(y==0 && toprow){
                             if(x==0 && leftcol){
@@ -1149,7 +1176,10 @@ static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor,
                         dc = mjpeg_decode_dc(s, s->dc_index[i]);
                         if(dc == 0xFFFFF)
                             return -1;
-                        if(bits<=8){
+                        if (   h * mb_x + x >= s->width
+                            || v * mb_y + y >= s->height) {
+                            // Nothing to do
+                        } else if (bits<=8) {
                             ptr = s->picture_ptr->data[c] +
                               (linesize * (v * mb_y + y)) +
                               (h * mb_x + x); //FIXME optimize this crap
@@ -1217,7 +1247,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
                              int mb_bitmask_size,
                              const AVFrame *reference)
 {
-    int i, mb_x, mb_y;
+    int i, mb_x, mb_y, chroma_h_shift, chroma_v_shift, chroma_width, chroma_height;
     uint8_t *data[MAX_COMPONENTS];
     const uint8_t *reference_data[MAX_COMPONENTS];
     int linesize[MAX_COMPONENTS];
@@ -1234,6 +1264,11 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
 
     s->restart_count = 0;
 
+    av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt, &chroma_h_shift,
+                                     &chroma_v_shift);
+    chroma_width  = AV_CEIL_RSHIFT(s->width,  chroma_h_shift);
+    chroma_height = AV_CEIL_RSHIFT(s->height, chroma_v_shift);
+
     for (i = 0; i < nb_components; i++) {
         int c   = s->comp_index[i];
         data[c] = s->picture_ptr->data[c];
@@ -1270,8 +1305,8 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
 
                     if (s->interlaced && s->bottom_field)
                         block_offset += linesize[c] >> 1;
-                    if (   8*(h * mb_x + x) < s->width
-                        && 8*(v * mb_y + y) < s->height) {
+                    if (   8*(h * mb_x + x) < ((c == 1) || (c == 2) ? chroma_width  : s->width)
+                        && 8*(v * mb_y + y) < ((c == 1) || (c == 2) ? chroma_height : s->height)) {
                         ptr = data[c] + block_offset;
                     } else
                         ptr = NULL;
@@ -1346,12 +1381,10 @@ static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss,
         return AVERROR_INVALIDDATA;
     }
 
-    if (!Al) {
-        // s->coefs_finished is a bitmask for coefficients coded
-        // ss and se are parameters telling start and end coefficients
-        s->coefs_finished[c] |= (2ULL << se) - (1ULL << ss);
-        last_scan = !~s->coefs_finished[c];
-    }
+    // s->coefs_finished is a bitmask for coefficients coded
+    // ss and se are parameters telling start and end coefficients
+    s->coefs_finished[c] |= (2ULL << se) - (1ULL << ss);
+    last_scan = !Al && !~s->coefs_finished[c];
 
     if (s->interlaced && s->bottom_field)
         data += linesize >> 1;
@@ -1618,7 +1651,11 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
 
         s->avctx->sample_aspect_ratio.num = get_bits(&s->gb, 16);
         s->avctx->sample_aspect_ratio.den = get_bits(&s->gb, 16);
-        ff_set_sar(s->avctx, s->avctx->sample_aspect_ratio);
+        if (   s->avctx->sample_aspect_ratio.num <= 0
+            || s->avctx->sample_aspect_ratio.den <= 0) {
+            s->avctx->sample_aspect_ratio.num = 0;
+            s->avctx->sample_aspect_ratio.den = 1;
+        }
 
         if (s->avctx->debug & FF_DEBUG_PICT_INFO)
             av_log(s->avctx, AV_LOG_INFO,
@@ -1830,6 +1867,8 @@ static int mjpeg_decode_com(MJpegDecodeContext *s)
             else if ((!strncmp(cbuf, "Intel(R) JPEG Library, version 1", 32) && s->avctx->codec_tag) ||
                      (!strncmp(cbuf, "Metasoft MJPEG Codec", 20)))
                 s->flipped = 1;
+            else if (!strcmp(cbuf, "MULTISCOPE II"))
+                s->multiscope = 2;
 
             av_free(cbuf);
         }
@@ -1880,28 +1919,58 @@ int ff_mjpeg_find_marker(MJpegDecodeContext *s,
     /* unescape buffer of SOS, use special treatment for JPEG-LS */
     if (start_code == SOS && !s->ls) {
         const uint8_t *src = *buf_ptr;
+        const uint8_t *ptr = src;
         uint8_t *dst = s->buffer;
 
-        while (src < buf_end) {
-            uint8_t x = *(src++);
+        #define copy_data_segment(skip) do {       \
+            ptrdiff_t length = (ptr - src) - (skip);  \
+            if (length > 0) {                         \
+                memcpy(dst, src, length);             \
+                dst += length;                        \
+                src = ptr;                            \
+            }                                         \
+        } while (0)
+
+        if (s->avctx->codec_id == AV_CODEC_ID_THP) {
+            ptr = buf_end;
+            copy_data_segment(0);
+        } else {
+            while (ptr < buf_end) {
+                uint8_t x = *(ptr++);
 
-            *(dst++) = x;
-            if (s->avctx->codec_id != AV_CODEC_ID_THP) {
                 if (x == 0xff) {
-                    while (src < buf_end && x == 0xff)
-                        x = *(src++);
+                    ptrdiff_t skip = 0;
+                    while (ptr < buf_end && x == 0xff) {
+                        x = *(ptr++);
+                        skip++;
+                    }
 
-                    if (x >= 0xd0 && x <= 0xd7)
-                        *(dst++) = x;
-                    else if (x)
-                        break;
+                    /* 0xFF, 0xFF, ... */
+                    if (skip > 1) {
+                        copy_data_segment(skip);
+
+                        /* decrement src as it is equal to ptr after the
+                         * copy_data_segment macro and we might want to
+                         * copy the current value of x later on */
+                        src--;
+                    }
+
+                    if (x < 0xd0 || x > 0xd7) {
+                        copy_data_segment(1);
+                        if (x)
+                            break;
+                    }
                 }
             }
+            if (src < ptr)
+                copy_data_segment(0);
         }
+        #undef copy_data_segment
+
         *unescaped_buf_ptr  = s->buffer;
         *unescaped_buf_size = dst - s->buffer;
         memset(s->buffer + *unescaped_buf_size, 0,
-               FF_INPUT_BUFFER_PADDING_SIZE);
+               AV_INPUT_BUFFER_PADDING_SIZE);
 
         av_log(s->avctx, AV_LOG_DEBUG, "escaping removed %"PTRDIFF_SPECIFIER" bytes\n",
                (buf_end - *buf_ptr) - (dst - s->buffer));
@@ -1931,7 +2000,7 @@ int ff_mjpeg_find_marker(MJpegDecodeContext *s,
         while (b < t) {
             uint8_t x = src[b++];
             put_bits(&pb, 8, x);
-            if (x == 0xFF) {
+            if (x == 0xFF && b < t) {
                 x = src[b++];
                 if (x & 0x80) {
                     av_log(s->avctx, AV_LOG_WARNING, "Invalid escape sequence\n");
@@ -1946,7 +2015,7 @@ int ff_mjpeg_find_marker(MJpegDecodeContext *s,
         *unescaped_buf_ptr  = dst;
         *unescaped_buf_size = (bit_count + 7) >> 3;
         memset(s->buffer + *unescaped_buf_size, 0,
-               FF_INPUT_BUFFER_PADDING_SIZE);
+               AV_INPUT_BUFFER_PADDING_SIZE);
     } else {
         *unescaped_buf_ptr  = *buf_ptr;
         *unescaped_buf_size = buf_end - *buf_ptr;
@@ -2024,6 +2093,22 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             return AVERROR(ENOSYS);
         }
 
+        if (avctx->skip_frame == AVDISCARD_ALL) {
+            switch(start_code) {
+            case SOF0:
+            case SOF1:
+            case SOF2:
+            case SOF3:
+            case SOF48:
+            case SOI:
+            case SOS:
+            case EOI:
+                break;
+            default:
+                goto skip;
+            }
+        }
+
         switch (start_code) {
         case SOI:
             s->restart_interval = 0;
@@ -2055,6 +2140,7 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 goto fail;
             break;
         case SOF3:
+            s->avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
             s->lossless    = 1;
             s->ls          = 0;
             s->progressive = 0;
@@ -2062,6 +2148,7 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 goto fail;
             break;
         case SOF48:
+            s->avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
             s->lossless    = 1;
             s->ls          = 1;
             s->progressive = 0;
@@ -2087,6 +2174,10 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 if (s->bottom_field == !s->interlace_polarity)
                     break;
             }
+            if (avctx->skip_frame == AVDISCARD_ALL) {
+                s->got_picture = 0;
+                goto the_end_no_picture;
+            }
             if ((ret = av_frame_ref(frame, s->picture_ptr)) < 0)
                 return ret;
             *got_frame = 1;
@@ -2110,6 +2201,9 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             goto the_end;
         case SOS:
             s->cur_scan++;
+            if (avctx->skip_frame == AVDISCARD_ALL)
+                break;
+
             if ((ret = ff_mjpeg_decode_sos(s, NULL, 0, NULL)) < 0 &&
                 (avctx->err_recognition & AV_EF_EXPLODE))
                 goto fail;
@@ -2132,6 +2226,7 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             break;
         }
 
+skip:
         /* eof process start code */
         buf_ptr += (get_bits_count(&s->gb) + 7) / 8;
         av_log(avctx, AV_LOG_DEBUG,
@@ -2149,7 +2244,7 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     return ret;
 the_end:
 
-    is16bit = av_pix_fmt_desc_get(s->avctx->pix_fmt)->comp[0].step_minus1;
+    is16bit = av_pix_fmt_desc_get(s->avctx->pix_fmt)->comp[0].step > 1;
 
     if (AV_RB32(s->upscale_h)) {
         int p;
@@ -2174,8 +2269,8 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             if (!s->upscale_h[p])
                 continue;
             if (p==1 || p==2) {
-                w = FF_CEIL_RSHIFT(w, hshift);
-                h = FF_CEIL_RSHIFT(h, vshift);
+                w = AV_CEIL_RSHIFT(w, hshift);
+                h = AV_CEIL_RSHIFT(h, vshift);
             }
             if (s->upscale_v[p])
                 h = (h+1)>>1;
@@ -2232,8 +2327,8 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             if (!s->upscale_v[p])
                 continue;
             if (p==1 || p==2) {
-                w = FF_CEIL_RSHIFT(w, hshift);
-                h = FF_CEIL_RSHIFT(h, vshift);
+                w = AV_CEIL_RSHIFT(w, hshift);
+                h = AV_CEIL_RSHIFT(h, vshift);
             }
             dst = &((uint8_t *)s->picture_ptr->data[p])[(h - 1) * s->linesize[p]];
             for (i = h - 1; i; i--) {
@@ -2257,8 +2352,8 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             int w = s->picture_ptr->width;
             int h = s->picture_ptr->height;
             if(index && index<3){
-                w = FF_CEIL_RSHIFT(w, hshift);
-                h = FF_CEIL_RSHIFT(h, vshift);
+                w = AV_CEIL_RSHIFT(w, hshift);
+                h = AV_CEIL_RSHIFT(h, vshift);
             }
             if(dst){
                 uint8_t *dst2 = dst + s->picture_ptr->linesize[index]*(h-1);
@@ -2328,6 +2423,7 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     av_dict_copy(avpriv_frame_get_metadatap(data), s->exif_metadata, 0);
     av_dict_free(&s->exif_metadata);
 
+the_end_no_picture:
     av_log(avctx, AV_LOG_DEBUG, "decode frame unused %"PTRDIFF_SPECIFIER" bytes\n",
            buf_end - buf_ptr);
 //  return buf_end - buf_ptr;
@@ -2377,7 +2473,7 @@ static void decode_flush(AVCodecContext *avctx)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "extern_huff", "Use external huffman table.",
-      OFFSET(extern_huff), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VD },
+      OFFSET(extern_huff), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
     { NULL },
 };
 
@@ -2398,10 +2494,11 @@ AVCodec ff_mjpeg_decoder = {
     .close          = ff_mjpeg_decode_end,
     .decode         = ff_mjpeg_decode_frame,
     .flush          = decode_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .priv_class     = &mjpegdec_class,
-    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
+                      FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
 };
 #endif
 #if CONFIG_THP_DECODER
@@ -2415,7 +2512,7 @@ AVCodec ff_thp_decoder = {
     .close          = ff_mjpeg_decode_end,
     .decode         = ff_mjpeg_decode_frame,
     .flush          = decode_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };
diff --git a/libavcodec/mjpegdec.h b/libavcodec/mjpegdec.h
index 28d3e4aa..fb811294 100644
--- a/libavcodec/mjpegdec.h
+++ b/libavcodec/mjpegdec.h
@@ -114,6 +114,7 @@ typedef struct MJpegDecodeContext {
     int buggy_avid;
     int cs_itu601;
     int interlace_polarity;
+    int multiscope;
 
     int mjpb_skiptosod;
 
diff --git a/libavcodec/mjpegenc.c b/libavcodec/mjpegenc.c
index 2188725e..3d113770 100644
--- a/libavcodec/mjpegenc.c
+++ b/libavcodec/mjpegenc.c
@@ -224,9 +224,11 @@ static int amv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
 
     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
 
+#if FF_API_EMU_EDGE
     //CODEC_FLAG_EMU_EDGE have to be cleared
     if(s->avctx->flags & CODEC_FLAG_EMU_EDGE)
         return AVERROR(EINVAL);
+#endif
 
     if ((avctx->height & 15) && avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
         av_log(avctx, AV_LOG_ERROR,
@@ -251,8 +253,26 @@ static int amv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
     return ret;
 }
 
+#define OFFSET(x) offsetof(MpegEncContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+FF_MPV_COMMON_OPTS
+{ "pred", "Prediction method", OFFSET(pred), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 3, VE, "pred" },
+    { "left",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, VE, "pred" },
+    { "plane",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 2 }, INT_MIN, INT_MAX, VE, "pred" },
+    { "median", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 3 }, INT_MIN, INT_MAX, VE, "pred" },
+
+{ NULL},
+};
+
 #if CONFIG_MJPEG_ENCODER
-FF_MPV_GENERIC_CLASS(mjpeg)
+
+static const AVClass mjpeg_class = {
+    .class_name = "mjpeg encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_mjpeg_encoder = {
     .name           = "mjpeg",
@@ -263,7 +283,7 @@ AVCodec ff_mjpeg_encoder = {
     .init           = ff_mpv_encode_init,
     .encode2        = ff_mpv_encode_picture,
     .close          = ff_mpv_encode_end,
-    .capabilities   = CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_NONE
     },
@@ -271,7 +291,12 @@ AVCodec ff_mjpeg_encoder = {
 };
 #endif
 #if CONFIG_AMV_ENCODER
-FF_MPV_GENERIC_CLASS(amv)
+static const AVClass amv_class = {
+    .class_name = "amv encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_amv_encoder = {
     .name           = "amv",
diff --git a/libavcodec/mjpegenc_common.c b/libavcodec/mjpegenc_common.c
index daa5b692..7a6fe746 100644
--- a/libavcodec/mjpegenc_common.c
+++ b/libavcodec/mjpegenc_common.c
@@ -64,11 +64,14 @@ static void jpeg_table_header(AVCodecContext *avctx, PutBitContext *p,
 {
     int i, j, size;
     uint8_t *ptr;
+    MpegEncContext *s = avctx->priv_data;
 
     if (avctx->codec_id != AV_CODEC_ID_LJPEG) {
         int matrix_count = 1 + !!memcmp(luma_intra_matrix,
                                         chroma_intra_matrix,
                                         sizeof(luma_intra_matrix[0]) * 64);
+    if (s->force_duplicated_matrix)
+        matrix_count = 2;
     /* quant matrixes */
     put_marker(p, DQT);
     put_bits(p, 16, 2 + matrix_count * (1 + 64));
@@ -119,6 +122,16 @@ static void jpeg_put_comments(AVCodecContext *avctx, PutBitContext *p)
     uint8_t *ptr;
 
     if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 0) {
+        AVRational sar = avctx->sample_aspect_ratio;
+
+        if (sar.num > 65535 || sar.den > 65535) {
+            if (!av_reduce(&sar.num, &sar.den, avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den, 65535))
+                av_log(avctx, AV_LOG_WARNING,
+                    "Cannot store exact aspect ratio %d:%d\n",
+                    avctx->sample_aspect_ratio.num,
+                    avctx->sample_aspect_ratio.den);
+        }
+
         /* JFIF header */
         put_marker(p, APP0);
         put_bits(p, 16, 16);
@@ -128,14 +141,14 @@ static void jpeg_put_comments(AVCodecContext *avctx, PutBitContext *p)
          * released revision. */
         put_bits(p, 16, 0x0102);
         put_bits(p,  8, 0);              /* units type: 0 - aspect ratio */
-        put_bits(p, 16, avctx->sample_aspect_ratio.num);
-        put_bits(p, 16, avctx->sample_aspect_ratio.den);
+        put_bits(p, 16, sar.num);
+        put_bits(p, 16, sar.den);
         put_bits(p, 8, 0); /* thumbnail width */
         put_bits(p, 8, 0); /* thumbnail height */
     }
 
     /* comment */
-    if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+    if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
         put_marker(p, COM);
         flush_put_bits(p);
         ptr = put_bits_ptr(p);
@@ -145,9 +158,10 @@ static void jpeg_put_comments(AVCodecContext *avctx, PutBitContext *p)
         AV_WB16(ptr, size);
     }
 
-    if (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
-        avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
-        avctx->pix_fmt == AV_PIX_FMT_YUV444P) {
+    if (((avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
+          avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
+          avctx->pix_fmt == AV_PIX_FMT_YUV444P) && avctx->color_range != AVCOL_RANGE_JPEG)
+        || avctx->color_range == AVCOL_RANGE_MPEG) {
         put_marker(p, COM);
         flush_put_bits(p);
         ptr = put_bits_ptr(p);
@@ -158,7 +172,7 @@ static void jpeg_put_comments(AVCodecContext *avctx, PutBitContext *p)
     }
 }
 
-void ff_mjpeg_init_hvsample(AVCodecContext *avctx, int hsample[3], int vsample[3])
+void ff_mjpeg_init_hvsample(AVCodecContext *avctx, int hsample[4], int vsample[4])
 {
     int chroma_h_shift, chroma_v_shift;
 
@@ -170,7 +184,8 @@ void ff_mjpeg_init_hvsample(AVCodecContext *avctx, int hsample[3], int vsample[3
          || avctx->pix_fmt == AV_PIX_FMT_BGR24)) {
         vsample[0] = hsample[0] =
         vsample[1] = hsample[1] =
-        vsample[2] = hsample[2] = 1;
+        vsample[2] = hsample[2] =
+        vsample[3] = hsample[3] = 1;
     } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P || avctx->pix_fmt == AV_PIX_FMT_YUVJ444P) {
         vsample[0] = vsample[1] = vsample[2] = 2;
         hsample[0] = hsample[1] = hsample[2] = 1;
@@ -185,13 +200,14 @@ void ff_mjpeg_init_hvsample(AVCodecContext *avctx, int hsample[3], int vsample[3
 }
 
 void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
-                                    ScanTable *intra_scantable,
+                                    ScanTable *intra_scantable, int pred,
                                     uint16_t luma_intra_matrix[64],
                                     uint16_t chroma_intra_matrix[64])
 {
     const int lossless = avctx->codec_id != AV_CODEC_ID_MJPEG && avctx->codec_id != AV_CODEC_ID_AMV;
-    int hsample[3], vsample[3];
+    int hsample[4], vsample[4];
     int i;
+    int components = 3 + (avctx->pix_fmt == AV_PIX_FMT_BGRA);
     int chroma_matrix = !!memcmp(luma_intra_matrix,
                                  chroma_intra_matrix,
                                  sizeof(luma_intra_matrix[0])*64);
@@ -222,7 +238,7 @@ void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
         put_bits(pb, 8, 8); /* 8 bits/component */
     put_bits(pb, 16, avctx->height);
     put_bits(pb, 16, avctx->width);
-    put_bits(pb, 8, 3); /* 3 components */
+    put_bits(pb, 8, components); /* 3 or 4 components */
 
     /* Y component */
     put_bits(pb, 8, 1); /* component number */
@@ -242,10 +258,17 @@ void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
     put_bits(pb, 4, vsample[2]); /* V factor */
     put_bits(pb, 8, lossless ? 0 : chroma_matrix); /* select matrix */
 
+    if (components == 4) {
+        put_bits(pb, 8, 4); /* component number */
+        put_bits(pb, 4, hsample[3]); /* H factor */
+        put_bits(pb, 4, vsample[3]); /* V factor */
+        put_bits(pb, 8, 0); /* select matrix */
+    }
+
     /* scan header */
     put_marker(pb, SOS);
-    put_bits(pb, 16, 12); /* length */
-    put_bits(pb, 8, 3); /* 3 components */
+    put_bits(pb, 16, 6 + 2*components); /* length */
+    put_bits(pb, 8, components); /* 3 components */
 
     /* Y component */
     put_bits(pb, 8, 1); /* index */
@@ -262,7 +285,14 @@ void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
     put_bits(pb, 4, 1); /* DC huffman table index */
     put_bits(pb, 4, lossless ? 0 : 1); /* AC huffman table index */
 
-    put_bits(pb, 8, lossless ? avctx->prediction_method + 1 : 0); /* Ss (not used) */
+    if (components == 4) {
+        /* Alpha component */
+        put_bits(pb, 8, 4); /* index */
+        put_bits(pb, 4, 0); /* DC huffman table index */
+        put_bits(pb, 4, 0); /* AC huffman table index */
+    }
+
+    put_bits(pb, 8, lossless ? pred : 0); /* Ss (not used) */
 
     switch (avctx->codec_id) {
     case AV_CODEC_ID_MJPEG:  put_bits(pb, 8, 63); break; /* Se (not used) */
diff --git a/libavcodec/mjpegenc_common.h b/libavcodec/mjpegenc_common.h
index 87f15055..6e51ca04 100644
--- a/libavcodec/mjpegenc_common.h
+++ b/libavcodec/mjpegenc_common.h
@@ -29,13 +29,13 @@
 #include "put_bits.h"
 
 void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
-                                    ScanTable *intra_scantable,
+                                    ScanTable *intra_scantable, int pred,
                                     uint16_t luma_intra_matrix[64],
                                     uint16_t chroma_intra_matrix[64]);
 void ff_mjpeg_encode_picture_trailer(PutBitContext *pb, int header_bits);
 void ff_mjpeg_escape_FF(PutBitContext *pb, int start);
 int ff_mjpeg_encode_stuffing(MpegEncContext *s);
-void ff_mjpeg_init_hvsample(AVCodecContext *avctx, int hsample[3], int vsample[3]);
+void ff_mjpeg_init_hvsample(AVCodecContext *avctx, int hsample[4], int vsample[4]);
 
 void ff_mjpeg_encode_dc(PutBitContext *pb, int val,
                         uint8_t *huff_size, uint16_t *huff_code);
diff --git a/libavcodec/mlp_parser.c b/libavcodec/mlp_parser.c
index deaa844f..23601c86 100644
--- a/libavcodec/mlp_parser.c
+++ b/libavcodec/mlp_parser.c
@@ -357,15 +357,6 @@ static int mlp_parse(AVCodecParserContext *s,
         if(!avctx->channels || !avctx->channel_layout) {
         if (mh.stream_type == 0xbb) {
             /* MLP stream */
-#if FF_API_REQUEST_CHANNELS
-FF_DISABLE_DEPRECATION_WARNINGS
-            if (avctx->request_channels > 0 && avctx->request_channels <= 2 &&
-                mh.num_substreams > 1) {
-                avctx->channels       = 2;
-                avctx->channel_layout = AV_CH_LAYOUT_STEREO;
-FF_ENABLE_DEPRECATION_WARNINGS
-            } else
-#endif
             if (avctx->request_channel_layout &&
                 (avctx->request_channel_layout & AV_CH_LAYOUT_STEREO) ==
                 avctx->request_channel_layout &&
@@ -378,20 +369,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             }
         } else { /* mh.stream_type == 0xba */
             /* TrueHD stream */
-#if FF_API_REQUEST_CHANNELS
-FF_DISABLE_DEPRECATION_WARNINGS
-            if (avctx->request_channels > 0 && avctx->request_channels <= 2 &&
-                mh.num_substreams > 1) {
-                avctx->channels       = 2;
-                avctx->channel_layout = AV_CH_LAYOUT_STEREO;
-            } else if (avctx->request_channels > 0 &&
-                       avctx->request_channels <= mh.channels_thd_stream1) {
-                avctx->channels       = mh.channels_thd_stream1;
-                avctx->channel_layout = mh.channel_layout_thd_stream1;
-FF_ENABLE_DEPRECATION_WARNINGS
-            } else
-#endif
-                if (avctx->request_channel_layout &&
+            if (avctx->request_channel_layout &&
                     (avctx->request_channel_layout & AV_CH_LAYOUT_STEREO) ==
                     avctx->request_channel_layout &&
                     mh.num_substreams > 1) {
diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index 490d107e..c93b058d 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -533,19 +533,6 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
     s->max_channel        = max_channel;
     s->max_matrix_channel = max_matrix_channel;
 
-#if FF_API_REQUEST_CHANNELS
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (m->avctx->request_channels > 0 &&
-        m->avctx->request_channels <= s->max_channel + 1 &&
-        m->max_decoded_substream > substr) {
-        av_log(m->avctx, AV_LOG_DEBUG,
-               "Extracting %d-channel downmix from substream %d. "
-               "Further substreams will be skipped.\n",
-               s->max_channel + 1, substr);
-        m->max_decoded_substream = substr;
-FF_ENABLE_DEPRECATION_WARNINGS
-    } else
-#endif
     if (m->avctx->request_channel_layout && (s->ch_layout & m->avctx->request_channel_layout) ==
         m->avctx->request_channel_layout && m->max_decoded_substream > substr) {
         av_log(m->avctx, AV_LOG_DEBUG,
@@ -615,7 +602,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
         /* Default audio coding is 24-bit raw PCM. */
         cp->huff_offset      = 0;
-        cp->sign_huff_offset = (-1) << 23;
+        cp->sign_huff_offset = -(1 << 23);
         cp->codebook         = 0;
         cp->huff_lsbs        = 24;
     }
@@ -1314,7 +1301,7 @@ AVCodec ff_mlp_decoder = {
     .priv_data_size = sizeof(MLPDecodeContext),
     .init           = mlp_decode_init,
     .decode         = read_access_unit,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 #if CONFIG_TRUEHD_DECODER
@@ -1326,6 +1313,6 @@ AVCodec ff_truehd_decoder = {
     .priv_data_size = sizeof(MLPDecodeContext),
     .init           = mlp_decode_init,
     .decode         = read_access_unit,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif /* CONFIG_TRUEHD_DECODER */
diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c
index aa46621d..30861d9b 100644
--- a/libavcodec/mmaldec.c
+++ b/libavcodec/mmaldec.c
@@ -26,6 +26,7 @@
 
 #include <bcm_host.h>
 #include <interface/mmal/mmal.h>
+#include <interface/mmal/mmal_parameters_video.h>
 #include <interface/mmal/util/mmal_util.h>
 #include <interface/mmal/util/mmal_util_params.h>
 #include <interface/mmal/util/mmal_default_components.h>
@@ -65,8 +66,7 @@ typedef struct FFBufferRef {
 typedef struct MMALDecodeContext {
     AVClass *av_class;
     int extra_buffers;
-
-    AVBitStreamFilterContext *bsfc;
+    int extra_decoder_buffers;
 
     MMAL_COMPONENT_T *decoder;
     MMAL_QUEUE_T *queue_decoded_frames;
@@ -82,9 +82,11 @@ typedef struct MMALDecodeContext {
     FFBufferEntry *waiting_buffers, *waiting_buffers_tail;
 
     int64_t packets_sent;
+    volatile int packets_buffered;
     int64_t frames_output;
     int eos_received;
     int eos_sent;
+    int extradata_sent;
 } MMALDecodeContext;
 
 // Assume decoder is guaranteed to produce output after at least this many
@@ -159,12 +161,17 @@ static void ffmmal_stop_decoder(AVCodecContext *avctx)
 
         ctx->waiting_buffers = buffer->next;
 
+        if (buffer->flags & MMAL_BUFFER_HEADER_FLAG_FRAME_END)
+            avpriv_atomic_int_add_and_fetch(&ctx->packets_buffered, -1);
+
         av_buffer_unref(&buffer->ref);
         av_free(buffer);
     }
     ctx->waiting_buffers_tail = NULL;
 
-    ctx->frames_output = ctx->eos_received = ctx->eos_sent = ctx->packets_sent = 0;
+    av_assert0(avpriv_atomic_int_get(&ctx->packets_buffered) == 0);
+
+    ctx->frames_output = ctx->eos_received = ctx->eos_sent = ctx->packets_sent = ctx->extradata_sent = 0;
 }
 
 static av_cold int ffmmal_close_decoder(AVCodecContext *avctx)
@@ -180,9 +187,6 @@ static av_cold int ffmmal_close_decoder(AVCodecContext *avctx)
     mmal_pool_destroy(ctx->pool_in);
     ffmmal_poolref_unref(ctx->pool_out);
 
-    if (ctx->bsfc)
-        av_bitstream_filter_close(ctx->bsfc);
-
     mmal_vc_deinit();
 
     return 0;
@@ -190,9 +194,15 @@ static av_cold int ffmmal_close_decoder(AVCodecContext *avctx)
 
 static void input_callback(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
 {
+    AVCodecContext *avctx = (AVCodecContext*)port->userdata;
+    MMALDecodeContext *ctx = avctx->priv_data;
+
     if (!buffer->cmd) {
-        AVBufferRef *buf = buffer->user_data;
-        av_buffer_unref(&buf);
+        FFBufferEntry *entry = buffer->user_data;
+        av_buffer_unref(&entry->ref);
+        if (entry->flags & MMAL_BUFFER_HEADER_FLAG_FRAME_END)
+            avpriv_atomic_int_add_and_fetch(&ctx->packets_buffered, -1);
+        av_free(entry);
     }
     mmal_buffer_header_release(buffer);
 }
@@ -277,6 +287,9 @@ static int ffmal_update_format(AVCodecContext *avctx)
     if ((status = mmal_port_parameter_set_uint32(decoder->output[0], MMAL_PARAMETER_EXTRA_BUFFERS, ctx->extra_buffers)))
         goto fail;
 
+    if ((status = mmal_port_parameter_set_boolean(decoder->output[0], MMAL_PARAMETER_VIDEO_INTERPOLATE_TIMESTAMPS, 0)))
+        goto fail;
+
     if (avctx->pix_fmt == AV_PIX_FMT_MMAL) {
         format_out->encoding = MMAL_ENCODING_OPAQUE;
     } else {
@@ -320,6 +333,7 @@ static av_cold int ffmmal_init_decoder(AVCodecContext *avctx)
     MMAL_STATUS_T status;
     MMAL_ES_FORMAT_T *format_in;
     MMAL_COMPONENT_T *decoder;
+    char tmp[32];
     int ret = 0;
 
     bcm_host_init();
@@ -341,7 +355,21 @@ static av_cold int ffmmal_init_decoder(AVCodecContext *avctx)
 
     format_in = decoder->input[0]->format;
     format_in->type = MMAL_ES_TYPE_VIDEO;
-    format_in->encoding = MMAL_ENCODING_H264;
+    switch (avctx->codec_id) {
+        case AV_CODEC_ID_MPEG2VIDEO:
+            format_in->encoding = MMAL_ENCODING_MP2V;
+            break;
+        case AV_CODEC_ID_MPEG4:
+            format_in->encoding = MMAL_ENCODING_MP4V;
+            break;
+        case AV_CODEC_ID_VC1:
+            format_in->encoding = MMAL_ENCODING_WVC1;
+            break;
+        case AV_CODEC_ID_H264:
+        default:
+            format_in->encoding = MMAL_ENCODING_H264;
+            break;
+    }
     format_in->es->video.width = FFALIGN(avctx->width, 32);
     format_in->es->video.height = FFALIGN(avctx->height, 16);
     format_in->es->video.crop.width = avctx->width;
@@ -352,23 +380,12 @@ static av_cold int ffmmal_init_decoder(AVCodecContext *avctx)
     format_in->es->video.par.den = avctx->sample_aspect_ratio.den;
     format_in->flags = MMAL_ES_FORMAT_FLAG_FRAMED;
 
-    if (avctx->codec->id == AV_CODEC_ID_H264 && avctx->extradata && avctx->extradata[0] == 1) {
-        uint8_t *dummy_p;
-        int dummy_int;
-        ctx->bsfc = av_bitstream_filter_init("h264_mp4toannexb");
-        if (!ctx->bsfc) {
-            av_log(avctx, AV_LOG_ERROR, "Cannot open the h264_mp4toannexb BSF!\n");
-            ret = AVERROR(ENOSYS);
-            goto fail;
-        }
-        av_bitstream_filter_filter(ctx->bsfc, avctx, NULL, &dummy_p, &dummy_int, NULL, 0, 0);
-    }
+    av_get_codec_tag_string(tmp, sizeof(tmp), format_in->encoding);
+    av_log(avctx, AV_LOG_DEBUG, "Using MMAL %s encoding.\n", tmp);
 
-    if (avctx->extradata_size) {
-        if ((status = mmal_format_extradata_alloc(format_in, avctx->extradata_size)))
-            goto fail;
-        format_in->extradata_size = avctx->extradata_size;
-        memcpy(format_in->extradata, avctx->extradata, format_in->extradata_size);
+    if (mmal_port_parameter_set_uint32(decoder->input[0], MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS,
+                                       -1 - ctx->extra_decoder_buffers)) {
+        av_log(avctx, AV_LOG_WARNING, "Could not set input buffering limit.\n");
     }
 
     if ((status = mmal_port_format_commit(decoder->input[0])))
@@ -438,7 +455,9 @@ static void ffmmal_flush(AVCodecContext *avctx)
 // (due to us not reading/returning enough output buffers) and won't accept
 // new input. (This wouldn't be an issue if MMAL input buffers always were
 // complete frames - then the input buffer just would have to be big enough.)
-static int ffmmal_add_packet(AVCodecContext *avctx, AVPacket *avpkt)
+// If is_extradata is set, send it as MMAL_BUFFER_HEADER_FLAG_CONFIG.
+static int ffmmal_add_packet(AVCodecContext *avctx, AVPacket *avpkt,
+                             int is_extradata)
 {
     MMALDecodeContext *ctx = avctx->priv_data;
     AVBufferRef *buf = NULL;
@@ -447,33 +466,34 @@ static int ffmmal_add_packet(AVCodecContext *avctx, AVPacket *avpkt)
     uint8_t *start;
     int ret = 0;
 
-    ctx->packets_sent++;
-
     if (avpkt->size) {
-        if (ctx->bsfc) {
-            uint8_t *tmp_data;
-            int tmp_size;
-            if ((ret = av_bitstream_filter_filter(ctx->bsfc, avctx, NULL,
-                                                  &tmp_data, &tmp_size,
-                                                  avpkt->data, avpkt->size,
-                                                  avpkt->flags & AV_PKT_FLAG_KEY)) < 0)
-                goto done;
-            buf = av_buffer_create(tmp_data, tmp_size, NULL, NULL, 0);
+        if (avpkt->buf) {
+            buf = av_buffer_ref(avpkt->buf);
+            size = avpkt->size;
+            data = avpkt->data;
         } else {
-            if (avpkt->buf) {
-                buf = av_buffer_ref(avpkt->buf);
-            } else {
-                buf = av_buffer_alloc(avpkt->size);
-                if (buf)
-                    memcpy(buf->data, avpkt->data, avpkt->size);
+            buf = av_buffer_alloc(avpkt->size);
+            if (buf) {
+                memcpy(buf->data, avpkt->data, avpkt->size);
+                size = buf->size;
+                data = buf->data;
             }
         }
         if (!buf) {
             ret = AVERROR(ENOMEM);
             goto done;
         }
-        size = buf->size;
-        data = buf->data;
+        if (!is_extradata)
+            ctx->packets_sent++;
+    } else {
+        if (ctx->eos_sent)
+            goto done;
+        if (!ctx->packets_sent) {
+            // Short-cut the flush logic to avoid upsetting MMAL.
+            ctx->eos_sent = 1;
+            ctx->eos_received = 1;
+            goto done;
+        }
     }
 
     start = data;
@@ -488,6 +508,9 @@ static int ffmmal_add_packet(AVCodecContext *avctx, AVPacket *avpkt)
         buffer->data = data;
         buffer->length = FFMIN(size, ctx->decoder->input[0]->buffer_size);
 
+        if (is_extradata)
+            buffer->flags |= MMAL_BUFFER_HEADER_FLAG_CONFIG;
+
         if (data == start)
             buffer->flags |= MMAL_BUFFER_HEADER_FLAG_FRAME_START;
 
@@ -497,8 +520,10 @@ static int ffmmal_add_packet(AVCodecContext *avctx, AVPacket *avpkt)
         buffer->pts = avpkt->pts == AV_NOPTS_VALUE ? MMAL_TIME_UNKNOWN : avpkt->pts;
         buffer->dts = avpkt->dts == AV_NOPTS_VALUE ? MMAL_TIME_UNKNOWN : avpkt->dts;
 
-        if (!size)
+        if (!size) {
             buffer->flags |= MMAL_BUFFER_HEADER_FLAG_FRAME_END;
+            avpriv_atomic_int_add_and_fetch(&ctx->packets_buffered, 1);
+        }
 
         if (!buffer->length) {
             buffer->flags |= MMAL_BUFFER_HEADER_FLAG_EOS;
@@ -550,19 +575,21 @@ static int ffmmal_fill_input_port(AVCodecContext *avctx)
         mbuffer->flags = buffer->flags;
         mbuffer->data = buffer->data;
         mbuffer->length = buffer->length;
-        mbuffer->user_data = buffer->ref;
+        mbuffer->user_data = buffer;
         mbuffer->alloc_size = ctx->decoder->input[0]->buffer_size;
 
-        if ((status = mmal_port_send_buffer(ctx->decoder->input[0], mbuffer))) {
-            mmal_buffer_header_release(mbuffer);
-            av_buffer_unref(&buffer->ref);
-        }
-
         // Remove from start of the list
         ctx->waiting_buffers = buffer->next;
         if (ctx->waiting_buffers_tail == buffer)
             ctx->waiting_buffers_tail = NULL;
-        av_free(buffer);
+
+        if ((status = mmal_port_send_buffer(ctx->decoder->input[0], mbuffer))) {
+            mmal_buffer_header_release(mbuffer);
+            av_buffer_unref(&buffer->ref);
+            if (buffer->flags & MMAL_BUFFER_HEADER_FLAG_FRAME_END)
+                avpriv_atomic_int_add_and_fetch(&ctx->packets_buffered, -1);
+            av_free(buffer);
+        }
 
         if (status) {
             av_log(avctx, AV_LOG_ERROR, "MMAL error %d when sending input\n", (int)status);
@@ -611,10 +638,8 @@ static int ffmal_copy_frame(AVCodecContext *avctx,  AVFrame *frame,
         }
     }
 
-    if (buffer->pts != MMAL_TIME_UNKNOWN) {
-        frame->pkt_pts = buffer->pts;
-        frame->pts = buffer->pts;
-    }
+    frame->pkt_pts = buffer->pts == MMAL_TIME_UNKNOWN ? AV_NOPTS_VALUE : buffer->pts;
+    frame->pkt_dts = AV_NOPTS_VALUE;
 
 done:
     return ret;
@@ -643,13 +668,23 @@ static int ffmmal_read_frame(AVCodecContext *avctx, AVFrame *frame, int *got_fra
         // excessive buffering.
         // We also wait if we sent eos, but didn't receive it yet (think of decoding
         // stream with a very low number of frames).
-        if (ctx->frames_output || ctx->packets_sent > MAX_DELAYED_FRAMES || ctx->eos_sent) {
-            buffer = mmal_queue_wait(ctx->queue_decoded_frames);
+        if (avpriv_atomic_int_get(&ctx->packets_buffered) > MAX_DELAYED_FRAMES ||
+            (ctx->packets_sent && ctx->eos_sent)) {
+            // MMAL will ignore broken input packets, which means the frame we
+            // expect here may never arrive. Dealing with this correctly is
+            // complicated, so here's a hack to avoid that it freezes forever
+            // in this unlikely situation.
+            buffer = mmal_queue_timedwait(ctx->queue_decoded_frames, 100);
+            if (!buffer) {
+                av_log(avctx, AV_LOG_ERROR, "Did not get output frame from MMAL.\n");
+                ret = AVERROR_UNKNOWN;
+                goto done;
+            }
         } else {
             buffer = mmal_queue_get(ctx->queue_decoded_frames);
+            if (!buffer)
+                goto done;
         }
-        if (!buffer)
-            goto done;
 
         ctx->eos_received |= !!(buffer->flags & MMAL_BUFFER_HEADER_FLAG_EOS);
         if (ctx->eos_received)
@@ -715,10 +750,21 @@ static int ffmmal_read_frame(AVCodecContext *avctx, AVFrame *frame, int *got_fra
 static int ffmmal_decode(AVCodecContext *avctx, void *data, int *got_frame,
                          AVPacket *avpkt)
 {
+    MMALDecodeContext *ctx = avctx->priv_data;
     AVFrame *frame = data;
     int ret = 0;
 
-    if ((ret = ffmmal_add_packet(avctx, avpkt)) < 0)
+    if (avctx->extradata_size && !ctx->extradata_sent) {
+        AVPacket pkt = {0};
+        av_init_packet(&pkt);
+        pkt.data = avctx->extradata;
+        pkt.size = avctx->extradata_size;
+        ctx->extradata_sent = 1;
+        if ((ret = ffmmal_add_packet(avctx, &pkt, 1)) < 0)
+            return ret;
+    }
+
+    if ((ret = ffmmal_add_packet(avctx, avpkt, 0)) < 0)
         return ret;
 
     if ((ret = ffmmal_fill_input_port(avctx)) < 0)
@@ -749,30 +795,61 @@ AVHWAccel ff_h264_mmal_hwaccel = {
     .pix_fmt    = AV_PIX_FMT_MMAL,
 };
 
-static const AVOption options[]={
-    {"extra_buffers", "extra buffers", offsetof(MMALDecodeContext, extra_buffers), AV_OPT_TYPE_INT, {.i64 = 10}, 0, 256, 0},
-    {NULL}
+AVHWAccel ff_mpeg2_mmal_hwaccel = {
+    .name       = "mpeg2_mmal",
+    .type       = AVMEDIA_TYPE_VIDEO,
+    .id         = AV_CODEC_ID_MPEG2VIDEO,
+    .pix_fmt    = AV_PIX_FMT_MMAL,
 };
 
-static const AVClass ffmmaldec_class = {
-    .class_name = "mmaldec",
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
+AVHWAccel ff_mpeg4_mmal_hwaccel = {
+    .name       = "mpeg4_mmal",
+    .type       = AVMEDIA_TYPE_VIDEO,
+    .id         = AV_CODEC_ID_MPEG4,
+    .pix_fmt    = AV_PIX_FMT_MMAL,
 };
 
-AVCodec ff_h264_mmal_decoder = {
-    .name           = "h264_mmal",
-    .long_name      = NULL_IF_CONFIG_SMALL("h264 (mmal)"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_H264,
-    .priv_data_size = sizeof(MMALDecodeContext),
-    .init           = ffmmal_init_decoder,
-    .close          = ffmmal_close_decoder,
-    .decode         = ffmmal_decode,
-    .flush          = ffmmal_flush,
-    .priv_class     = &ffmmaldec_class,
-    .capabilities   = CODEC_CAP_DELAY,
-    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_MMAL,
-                                                     AV_PIX_FMT_YUV420P,
-                                                     AV_PIX_FMT_NONE},
+AVHWAccel ff_vc1_mmal_hwaccel = {
+    .name       = "vc1_mmal",
+    .type       = AVMEDIA_TYPE_VIDEO,
+    .id         = AV_CODEC_ID_VC1,
+    .pix_fmt    = AV_PIX_FMT_MMAL,
 };
+
+static const AVOption options[]={
+    {"extra_buffers", "extra buffers", offsetof(MMALDecodeContext, extra_buffers), AV_OPT_TYPE_INT, {.i64 = 10}, 0, 256, 0},
+    {"extra_decoder_buffers", "extra MMAL internal buffered frames", offsetof(MMALDecodeContext, extra_decoder_buffers), AV_OPT_TYPE_INT, {.i64 = 10}, 0, 256, 0},
+    {NULL}
+};
+
+#define FFMMAL_DEC_CLASS(NAME) \
+    static const AVClass ffmmal_##NAME##_dec_class = { \
+        .class_name = "mmal_" #NAME "_dec", \
+        .option     = options, \
+        .version    = LIBAVUTIL_VERSION_INT, \
+    };
+
+#define FFMMAL_DEC(NAME, ID) \
+    FFMMAL_DEC_CLASS(NAME) \
+    AVCodec ff_##NAME##_mmal_decoder = { \
+        .name           = #NAME "_mmal", \
+        .long_name      = NULL_IF_CONFIG_SMALL(#NAME " (mmal)"), \
+        .type           = AVMEDIA_TYPE_VIDEO, \
+        .id             = ID, \
+        .priv_data_size = sizeof(MMALDecodeContext), \
+        .init           = ffmmal_init_decoder, \
+        .close          = ffmmal_close_decoder, \
+        .decode         = ffmmal_decode, \
+        .flush          = ffmmal_flush, \
+        .priv_class     = &ffmmal_##NAME##_dec_class, \
+        .capabilities   = AV_CODEC_CAP_DELAY, \
+        .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS, \
+        .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_MMAL, \
+                                                         AV_PIX_FMT_YUV420P, \
+                                                         AV_PIX_FMT_NONE}, \
+    };
+
+FFMMAL_DEC(h264, AV_CODEC_ID_H264)
+FFMMAL_DEC(mpeg2, AV_CODEC_ID_MPEG2VIDEO)
+FFMMAL_DEC(mpeg4, AV_CODEC_ID_MPEG4)
+FFMMAL_DEC(vc1, AV_CODEC_ID_VC1)
diff --git a/libavcodec/mmvideo.c b/libavcodec/mmvideo.c
index 8b04965d..04de6bb4 100644
--- a/libavcodec/mmvideo.c
+++ b/libavcodec/mmvideo.c
@@ -247,5 +247,5 @@ AVCodec ff_mmvideo_decoder = {
     .init           = mm_decode_init,
     .close          = mm_decode_end,
     .decode         = mm_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/motion-test.c b/libavcodec/motion-test.c
index 7cfe41cf..ebcf4aaf 100644
--- a/libavcodec/motion-test.c
+++ b/libavcodec/motion-test.c
@@ -127,7 +127,7 @@ int main(int argc, char **argv)
     printf("ffmpeg motion test\n");
 
     ctx = avcodec_alloc_context3(NULL);
-    ctx->flags |= CODEC_FLAG_BITEXACT;
+    ctx->flags |= AV_CODEC_FLAG_BITEXACT;
     av_force_cpu_flags(0);
     memset(&cctx, 0, sizeof(cctx));
     ff_me_cmp_init(&cctx, ctx);
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 96aa9ac8..52369384 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -99,7 +99,7 @@ static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3
 }
 
 static int get_flags(MotionEstContext *c, int direct, int chroma){
-    return   ((c->avctx->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
+    return   ((c->avctx->flags&AV_CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
            + (direct ? FLAG_DIRECT : 0)
            + (chroma ? FLAG_CHROMA : 0);
 }
@@ -183,8 +183,8 @@ static av_always_inline int cmp_inline(MpegEncContext *s, const int x, const int
     const int stride= c->stride;
     const int uvstride= c->uvstride;
     const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
-    const int hx= subx + (x<<(1+qpel));
-    const int hy= suby + (y<<(1+qpel));
+    const int hx= subx + x*(1<<(1+qpel));
+    const int hy= suby + y*(1<<(1+qpel));
     uint8_t * const * const ref= c->ref[ref_index];
     uint8_t * const * const src= c->src[src_index];
     int d;
@@ -312,11 +312,26 @@ int ff_init_me(MpegEncContext *s){
         av_log(s->avctx, AV_LOG_ERROR, "ME_MAP size is too small for SAB diamond\n");
         return -1;
     }
+
+#if FF_API_MOTION_EST
     //special case of snow is needed because snow uses its own iterative ME code
-    if(s->me_method!=ME_ZERO && s->me_method!=ME_EPZS && s->me_method!=ME_X1 && s->avctx->codec_id != AV_CODEC_ID_SNOW){
-        av_log(s->avctx, AV_LOG_ERROR, "me_method is only allowed to be set to zero and epzs; for hex,umh,full and others see dia_size\n");
-        return -1;
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (s->motion_est == FF_ME_EPZS) {
+        if (s->me_method == ME_ZERO)
+            s->motion_est = FF_ME_ZERO;
+        else if (s->me_method == ME_EPZS)
+            s->motion_est = FF_ME_EPZS;
+        else if (s->me_method == ME_X1)
+            s->motion_est = FF_ME_XONE;
+        else if (s->avctx->codec_id != AV_CODEC_ID_SNOW) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "me_method is only allowed to be set to zero and epzs; "
+                   "for hex,umh,full and others see dia_size\n");
+            return -1;
+        }
     }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     c->avctx= s->avctx;
 
@@ -337,7 +352,7 @@ int ff_init_me(MpegEncContext *s){
     c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
 
 /*FIXME s->no_rounding b_type*/
-    if (s->avctx->flags & CODEC_FLAG_QPEL) {
+    if (s->avctx->flags & AV_CODEC_FLAG_QPEL) {
         c->sub_motion_search= qpel_motion_search;
         c->qpel_avg = s->qdsp.avg_qpel_pixels_tab;
         if (s->no_rounding)
@@ -426,13 +441,13 @@ static int sad_hpel_motion_search(MpegEncContext * s,
         my > ymin && my < ymax) {
         int dx=0, dy=0;
         int d, pen_x, pen_y;
-        const int index= (my<<ME_MAP_SHIFT) + mx;
+        const int index= my*(1<<ME_MAP_SHIFT) + mx;
         const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
         const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
         const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
         const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
-        mx<<=1;
-        my<<=1;
+        mx += mx;
+        my += my;
 
 
         pen_x= pred_x + mx;
@@ -490,8 +505,8 @@ static int sad_hpel_motion_search(MpegEncContext * s,
         my+=dy;
 
     }else{
-        mx<<=1;
-        my<<=1;
+        mx += mx;
+        my += my;
     }
 
     *mx_ptr = mx;
@@ -584,7 +599,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
     int same=1;
     const int stride= c->stride;
     uint8_t *mv_penalty= c->current_mv_penalty;
-    int saftey_cliping= s->unrestricted_mv && (s->width&15) && (s->height&15);
+    int safety_clipping= s->unrestricted_mv && (s->width&15) && (s->height&15);
 
     init_mv4_ref(c);
 
@@ -596,7 +611,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
         const int mot_stride = s->b8_stride;
         const int mot_xy = s->block_index[block];
 
-        if(saftey_cliping){
+        if(safety_clipping){
             c->xmax = - 16*s->mb_x + s->width  - 8*(block &1);
             c->ymax = - 16*s->mb_y + s->height - 8*(block>>1);
         }
@@ -628,7 +643,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
         }
         P_MV1[0]= mx;
         P_MV1[1]= my;
-        if(saftey_cliping)
+        if(safety_clipping)
             for(i=1; i<10; i++){
                 if (s->first_slice_line && block<2 && i>1 && i<9)
                     continue;
@@ -891,7 +906,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
 {
     MotionEstContext * const c= &s->me;
     uint8_t *pix, *ppix;
-    int sum, mx, my, dmin;
+    int sum, mx = 0, my = 0, dmin = 0;
     int varc;            ///< the variance of the block (sum of squared (p[y][x]-average))
     int vard;            ///< sum of squared differences with the estimated motion vector
     int P[10][2];
@@ -908,7 +923,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
-    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
+    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_DMV;
 
     get_limits(s, 16*mb_x, 16*mb_y);
     c->skip=0;
@@ -923,52 +938,43 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
     pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
     c->mb_var_sum_temp += (varc+128)>>8;
 
-    switch(s->me_method) {
-    case ME_ZERO:
-    default:
-        mx   = 0;
-        my   = 0;
-        dmin = 0;
-        break;
-    case ME_X1:
-    case ME_EPZS:
-       {
-            const int mot_stride = s->b8_stride;
-            const int mot_xy = s->block_index[0];
-
-            P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
-            P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
-
-            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
-
-            if(!s->first_slice_line) {
-                P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
-                P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
-                P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
-                P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
-                if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
-                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
-                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
+    if (s->motion_est != FF_ME_ZERO) {
+        const int mot_stride = s->b8_stride;
+        const int mot_xy = s->block_index[0];
 
-                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
-                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+        P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
+        P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
 
-                if(s->out_format == FMT_H263){
-                    c->pred_x = P_MEDIAN[0];
-                    c->pred_y = P_MEDIAN[1];
-                }else { /* mpeg1 at least */
-                    c->pred_x= P_LEFT[0];
-                    c->pred_y= P_LEFT[1];
-                }
-            }else{
-                c->pred_x= P_LEFT[0];
-                c->pred_y= P_LEFT[1];
-            }
+        if (P_LEFT[0] > (c->xmax << shift))
+            P_LEFT[0] =  c->xmax << shift;
+
+        if (!s->first_slice_line) {
+            P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
+            P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
+            P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
+            P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
+            if (P_TOP[1] > (c->ymax << shift))
+                P_TOP[1] =  c->ymax << shift;
+            if (P_TOPRIGHT[0] < (c->xmin << shift))
+                P_TOPRIGHT[0] =  c->xmin << shift;
+            if (P_TOPRIGHT[1] > (c->ymax << shift))
+                P_TOPRIGHT[1] =  c->ymax << shift;
+
+            P_MEDIAN[0] = mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+            P_MEDIAN[1] = mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
 
+            if (s->out_format == FMT_H263) {
+                c->pred_x = P_MEDIAN[0];
+                c->pred_y = P_MEDIAN[1];
+            } else { /* mpeg1 at least */
+                c->pred_x = P_LEFT[0];
+                c->pred_y = P_LEFT[1];
+            }
+        } else {
+            c->pred_x = P_LEFT[0];
+            c->pred_y = P_LEFT[1];
         }
         dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
-
-        break;
     }
 
     /* At this point (mx,my) are full-pell and the relative displacement */
@@ -997,7 +1003,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
             mx <<=shift;
             my <<=shift;
         }
-        if ((s->avctx->flags & CODEC_FLAG_4MV)
+        if ((s->avctx->flags & AV_CODEC_FLAG_4MV)
            && !c->skip && varc>50<<8 && vard>10<<8){
             if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
                 mb_type|=CANDIDATE_MB_TYPE_INTER4V;
@@ -1005,7 +1011,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
             set_p_mv_tables(s, mx, my, 0);
         }else
             set_p_mv_tables(s, mx, my, 1);
-        if ((s->avctx->flags & CODEC_FLAG_INTERLACED_ME)
+        if ((s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME)
            && !c->skip){ //FIXME varc/d checks
             if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
                 mb_type |= CANDIDATE_MB_TYPE_INTER_I;
@@ -1018,7 +1024,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
         if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
             dmin= get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
 
-        if ((s->avctx->flags & CODEC_FLAG_4MV)
+        if ((s->avctx->flags & AV_CODEC_FLAG_4MV)
            && !c->skip && varc>50<<8 && vard>10<<8){
             int dmin4= h263_mv4_search(s, mx, my, shift);
             if(dmin4 < dmin){
@@ -1026,7 +1032,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
                 dmin=dmin4;
             }
         }
-        if ((s->avctx->flags & CODEC_FLAG_INTERLACED_ME)
+        if ((s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME)
            && !c->skip){ //FIXME varc/d checks
             int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
             if(dmin_i < dmin){
@@ -1084,7 +1090,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
     av_assert0(s->quarter_sample==0 || s->quarter_sample==1);
 
     c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
-    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
+    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_DMV;
 
     get_limits(s, 16*mb_x, 16*mb_y);
     c->skip=0;
@@ -1128,12 +1134,12 @@ static int estimate_motion_b(MpegEncContext *s, int mb_x, int mb_y,
                              int16_t (*mv_table)[2], int ref_index, int f_code)
 {
     MotionEstContext * const c= &s->me;
-    int mx, my, dmin;
+    int mx = 0, my = 0, dmin = 0;
     int P[10][2];
     const int shift= 1+s->quarter_sample;
     const int mot_stride = s->mb_stride;
     const int mot_xy = mb_y*mot_stride + mb_x;
-    uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV;
+    uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_DMV;
     int mv_scale;
 
     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
@@ -1143,15 +1149,7 @@ static int estimate_motion_b(MpegEncContext *s, int mb_x, int mb_y,
 
     get_limits(s, 16*mb_x, 16*mb_y);
 
-    switch(s->me_method) {
-    case ME_ZERO:
-    default:
-        mx   = 0;
-        my   = 0;
-        dmin = 0;
-        break;
-    case ME_X1:
-    case ME_EPZS:
+    if (s->motion_est != FF_ME_ZERO) {
         P_LEFT[0] = mv_table[mot_xy - 1][0];
         P_LEFT[1] = mv_table[mot_xy - 1][1];
 
@@ -1180,8 +1178,6 @@ static int estimate_motion_b(MpegEncContext *s, int mb_x, int mb_y,
         }
 
         dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
-
-        break;
     }
 
     dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
@@ -1207,8 +1203,8 @@ static inline int check_bidir_mv(MpegEncContext * s,
     //FIXME better f_code prediction (max mv & distance)
     //FIXME pointers
     MotionEstContext * const c= &s->me;
-    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
-    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_MV; // f_code of the prev frame
+    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_DMV; // f_code of the prev frame
+    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_DMV; // f_code of the prev frame
     int stride= c->stride;
     uint8_t *dest_y = c->scratchpad;
     uint8_t *ptr;
@@ -1421,7 +1417,7 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
     int mx, my, xmin, xmax, ymin, ymax;
     int16_t (*mv_table)[2]= s->b_direct_mv_table;
 
-    c->current_mv_penalty= c->mv_penalty[1] + MAX_MV;
+    c->current_mv_penalty= c->mv_penalty[1] + MAX_DMV;
     ymin= xmin=(-32)>>shift;
     ymax= xmax=   31>>shift;
 
@@ -1554,14 +1550,14 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
     fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
     ff_dlog(s, "%d %d %d %d\n", dmin, fmin, bmin, fbmin);
 
-    if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
+    if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
 //FIXME mb type penalty
         c->skip=0;
-        c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
+        c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_DMV;
         fimin= interlaced_search(s, 0,
                                  s->b_field_mv_table[0], s->b_field_select_table[0],
                                  s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
-        c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
+        c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_DMV;
         bimin= interlaced_search(s, 2,
                                  s->b_field_mv_table[1], s->b_field_select_table[1],
                                  s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
@@ -1620,7 +1616,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
 /* find best f_code for ME which do unlimited searches */
 int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
 {
-    if(s->me_method>=ME_EPZS){
+    if (s->motion_est != FF_ME_ZERO) {
         int score[8];
         int i, y, range= s->avctx->me_range ? s->avctx->me_range : (INT_MAX/2);
         uint8_t * fcode_tab= s->fcode_tab;
@@ -1685,7 +1681,7 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
 
     if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
 
-    if (s->avctx->flags & CODEC_FLAG_4MV) {
+    if (s->avctx->flags & AV_CODEC_FLAG_4MV) {
         const int wrap= s->b8_stride;
 
         /* clip / convert to intra 8x8 type MVs */
diff --git a/libavcodec/motion_est.h b/libavcodec/motion_est.h
index c6a1691e..3b3a8d73 100644
--- a/libavcodec/motion_est.h
+++ b/libavcodec/motion_est.h
@@ -18,8 +18,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_MOTIONEST_H
-#define AVCODEC_MOTIONEST_H
+#ifndef AVCODEC_MOTION_EST_H
+#define AVCODEC_MOTION_EST_H
 
 #include <stdint.h>
 
@@ -29,7 +29,17 @@
 
 struct MpegEncContext;
 
+#if ARCH_IA64 // Limit static arrays to avoid gcc failing "short data segment overflowed"
+#define MAX_MV 1024
+#else
 #define MAX_MV 4096
+#endif
+#define MAX_DMV (2*MAX_MV)
+#define ME_MAP_SIZE 64
+
+#define FF_ME_ZERO 0
+#define FF_ME_EPZS 1
+#define FF_ME_XONE 2
 
 /**
  * Motion estimation context.
@@ -80,7 +90,7 @@ typedef struct MotionEstContext {
     op_pixels_func(*hpel_avg)[4];
     qpel_mc_func(*qpel_put)[16];
     qpel_mc_func(*qpel_avg)[16];
-    uint8_t (*mv_penalty)[MAX_MV * 2 + 1]; ///< bit amount needed to encode a MV
+    uint8_t (*mv_penalty)[MAX_DMV * 2 + 1]; ///< bit amount needed to encode a MV
     uint8_t *current_mv_penalty;
     int (*sub_motion_search)(struct MpegEncContext *s,
                              int *mx_ptr, int *my_ptr, int dmin,
@@ -122,7 +132,4 @@ void ff_fix_long_mvs(struct MpegEncContext *s, uint8_t *field_select_table,
                      int field_select, int16_t (*mv_table)[2], int f_code,
                      int type, int truncate);
 
-extern const uint8_t ff_aic_dc_scale_table[32];
-extern const uint8_t ff_h263_chroma_qscale_table[32];
-
-#endif /* AVCODEC_MOTIONEST_H */
+#endif /* AVCODEC_MOTION_EST_H */
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
index ae2cbdea..327a24b1 100644
--- a/libavcodec/motion_est_template.c
+++ b/libavcodec/motion_est_template.c
@@ -24,6 +24,7 @@
  * Motion estimation template.
  */
 
+#include "libavutil/qsort.h"
 #include "mpegvideo.h"
 
 //Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
@@ -358,8 +359,8 @@ static int qpel_motion_search(MpegEncContext * s,
 
 #define CHECK_MV(x,y)\
 {\
-    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
-    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
+    const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
+    const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
     av_assert2((x) >= xmin);\
     av_assert2((x) <= xmax);\
     av_assert2((y) >= ymin);\
@@ -368,7 +369,7 @@ static int qpel_motion_search(MpegEncContext * s,
         d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
         map[index]= key;\
         score_map[index]= d;\
-        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
+        d += (mv_penalty[((x)*(1<<shift))-pred_x] + mv_penalty[((y)*(1<<shift))-pred_y])*penalty_factor;\
         COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
     }\
 }
@@ -384,13 +385,13 @@ static int qpel_motion_search(MpegEncContext * s,
 
 #define CHECK_MV_DIR(x,y,new_dir)\
 {\
-    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
-    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
+    const unsigned key = ((unsigned)(y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
+    const int index= (((unsigned)(y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
     if(map[index]!=key){\
         d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
         map[index]= key;\
         score_map[index]= d;\
-        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
+        d += (mv_penalty[(int)((unsigned)(x)<<shift)-pred_x] + mv_penalty[(int)((unsigned)(y)<<shift)-pred_y])*penalty_factor;\
         if(d<dmin){\
             best[0]=x;\
             best[1]=y;\
@@ -426,8 +427,8 @@ static av_always_inline int small_diamond_search(MpegEncContext * s, int *best,
     chroma_cmpf = s->mecc.me_cmp[size + 1];
 
     { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
-        const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
-        const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
+        const unsigned key = ((unsigned)best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
+        const int index= (((unsigned)best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
         if(map[index]!=key){ //this will be executed only very rarey
             score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
             map[index]= key;
@@ -702,7 +703,8 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
 
         key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
 
-        if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
+        if ((key & (-(1 << (2 * ME_MAP_MV_BITS)))) != map_generation)
+            continue;
 
         minima[j].height= score_map[i];
         minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
@@ -722,7 +724,7 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
         j++;
     }
 
-    qsort(minima, j, sizeof(Minima), minima_cmp);
+    AV_QSORT(minima, j, Minima, minima_cmp);
 
     for(; j<minima_count; j++){
         minima[j].height=256*256*256*64;
diff --git a/libavcodec/motionpixels.c b/libavcodec/motionpixels.c
index 84517f99..a88b837b 100644
--- a/libavcodec/motionpixels.c
+++ b/libavcodec/motionpixels.c
@@ -351,5 +351,5 @@ AVCodec ff_motionpixels_decoder = {
     .init           = mp_decode_init,
     .close          = mp_decode_end,
     .decode         = mp_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/movsub_bsf.c b/libavcodec/movsub_bsf.c
index 4820b263..8ee7a3a4 100644
--- a/libavcodec/movsub_bsf.c
+++ b/libavcodec/movsub_bsf.c
@@ -28,7 +28,7 @@ static int text2movsub(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, co
                      const uint8_t *buf, int buf_size, int keyframe){
     if (buf_size > 0xffff) return 0;
     *poutbuf_size = buf_size + 2;
-    *poutbuf = av_malloc(*poutbuf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    *poutbuf = av_malloc(*poutbuf_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!*poutbuf)
         return AVERROR(ENOMEM);
     AV_WB16(*poutbuf, buf_size);
@@ -46,7 +46,7 @@ static int mov2textsub(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, co
                      const uint8_t *buf, int buf_size, int keyframe){
     if (buf_size < 2) return 0;
     *poutbuf_size = FFMIN(buf_size - 2, AV_RB16(buf));
-    *poutbuf = av_malloc(*poutbuf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    *poutbuf = av_malloc(*poutbuf_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!*poutbuf)
         return AVERROR(ENOMEM);
     memcpy(*poutbuf, buf + 2, *poutbuf_size);
diff --git a/libavcodec/movtextdec.c b/libavcodec/movtextdec.c
index 8dda5cea..8d0e8141 100644
--- a/libavcodec/movtextdec.c
+++ b/libavcodec/movtextdec.c
@@ -27,24 +27,359 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mem.h"
 
-#define STYLE_FLAG_BOLD         1
-#define STYLE_FLAG_ITALIC       2
-#define STYLE_FLAG_UNDERLINE    4
+#define STYLE_FLAG_BOLD         (1<<0)
+#define STYLE_FLAG_ITALIC       (1<<1)
+#define STYLE_FLAG_UNDERLINE    (1<<2)
+
+#define BOX_SIZE_INITIAL    40
+
+#define STYL_BOX   (1<<0)
+#define HLIT_BOX   (1<<1)
+#define HCLR_BOX   (1<<2)
+#define TWRP_BOX   (1<<3)
+
+#define BOTTOM_LEFT     1
+#define BOTTOM_CENTER   2
+#define BOTTOM_RIGHT    3
+#define MIDDLE_LEFT     4
+#define MIDDLE_CENTER   5
+#define MIDDLE_RIGHT    6
+#define TOP_LEFT        7
+#define TOP_CENTER      8
+#define TOP_RIGHT       9
+
+typedef struct {
+    char *font;
+    int fontsize;
+    int color;
+    int back_color;
+    int bold;
+    int italic;
+    int underline;
+    int alignment;
+} MovTextDefault;
+
+typedef struct {
+    uint16_t fontID;
+    char *font;
+} FontRecord;
+
+typedef struct {
+    uint16_t style_start;
+    uint16_t style_end;
+    uint8_t style_flag;
+    uint8_t fontsize;
+    uint16_t style_fontID;
+} StyleBox;
+
+typedef struct {
+    uint16_t hlit_start;
+    uint16_t hlit_end;
+} HighlightBox;
+
+typedef struct {
+   uint8_t hlit_color[4];
+} HilightcolorBox;
+
+typedef struct {
+    uint8_t wrap_flag;
+} TextWrapBox;
+
+typedef struct {
+    StyleBox **s;
+    StyleBox *s_temp;
+    HighlightBox h;
+    HilightcolorBox c;
+    FontRecord **ftab;
+    FontRecord *ftab_temp;
+    TextWrapBox w;
+    MovTextDefault d;
+    uint8_t box_flags;
+    uint16_t style_entries, ftab_entries;
+    uint64_t tracksize;
+    int size_var;
+    int count_s, count_f;
+} MovTextContext;
+
+typedef struct {
+    uint32_t type;
+    size_t base_size;
+    int (*decode)(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt);
+} Box;
+
+static void mov_text_cleanup(MovTextContext *m)
+{
+    int i;
+    if (m->box_flags & STYL_BOX) {
+        for(i = 0; i < m->count_s; i++) {
+            av_freep(&m->s[i]);
+        }
+        av_freep(&m->s);
+    }
+}
+
+static void mov_text_cleanup_ftab(MovTextContext *m)
+{
+    int i;
+    if (m->ftab_temp)
+        av_freep(&m->ftab_temp->font);
+    av_freep(&m->ftab_temp);
+    if (m->ftab) {
+        for(i = 0; i < m->count_f; i++) {
+            av_freep(&m->ftab[i]->font);
+            av_freep(&m->ftab[i]);
+        }
+    }
+    av_freep(&m->ftab);
+}
+
+static int mov_text_tx3g(AVCodecContext *avctx, MovTextContext *m)
+{
+    uint8_t *tx3g_ptr = avctx->extradata;
+    int i, box_size, font_length;
+    int8_t v_align, h_align;
+    int style_fontID;
+    StyleBox s_default;
+
+    m->count_f = 0;
+    m->ftab_entries = 0;
+    box_size = BOX_SIZE_INITIAL; /* Size till ftab_entries */
+    if (avctx->extradata_size < box_size)
+        return -1;
+
+    // Display Flags
+    tx3g_ptr += 4;
+    // Alignment
+    h_align = *tx3g_ptr++;
+    v_align = *tx3g_ptr++;
+    if (h_align == 0) {
+        if (v_align == 0)
+            m->d.alignment = TOP_LEFT;
+        if (v_align == 1)
+            m->d.alignment = MIDDLE_LEFT;
+        if (v_align == -1)
+            m->d.alignment = BOTTOM_LEFT;
+    }
+    if (h_align == 1) {
+        if (v_align == 0)
+            m->d.alignment = TOP_CENTER;
+        if (v_align == 1)
+            m->d.alignment = MIDDLE_CENTER;
+        if (v_align == -1)
+            m->d.alignment = BOTTOM_CENTER;
+    }
+    if (h_align == -1) {
+        if (v_align == 0)
+            m->d.alignment = TOP_RIGHT;
+        if (v_align == 1)
+            m->d.alignment = MIDDLE_RIGHT;
+        if (v_align == -1)
+            m->d.alignment = BOTTOM_RIGHT;
+    }
+    // Background Color
+    m->d.back_color = AV_RB24(tx3g_ptr);
+    tx3g_ptr += 4;
+    // BoxRecord
+    tx3g_ptr += 8;
+    // StyleRecord
+    tx3g_ptr += 4;
+    // fontID
+    style_fontID = AV_RB16(tx3g_ptr);
+    tx3g_ptr += 2;
+    // face-style-flags
+    s_default.style_flag = *tx3g_ptr++;
+    m->d.bold = s_default.style_flag & STYLE_FLAG_BOLD;
+    m->d.italic = s_default.style_flag & STYLE_FLAG_ITALIC;
+    m->d.underline = s_default.style_flag & STYLE_FLAG_UNDERLINE;
+    // fontsize
+    m->d.fontsize = *tx3g_ptr++;
+    // Primary color
+    m->d.color = AV_RB24(tx3g_ptr);
+    tx3g_ptr += 4;
+    // FontRecord
+    // FontRecord Size
+    tx3g_ptr += 4;
+    // ftab
+    tx3g_ptr += 4;
+
+    m->ftab_entries = AV_RB16(tx3g_ptr);
+    tx3g_ptr += 2;
+
+    for (i = 0; i < m->ftab_entries; i++) {
+
+        box_size += 3;
+        if (avctx->extradata_size < box_size) {
+            mov_text_cleanup_ftab(m);
+            m->ftab_entries = 0;
+            return -1;
+        }
+        m->ftab_temp = av_mallocz(sizeof(*m->ftab_temp));
+        if (!m->ftab_temp) {
+            mov_text_cleanup_ftab(m);
+            return AVERROR(ENOMEM);
+        }
+        m->ftab_temp->fontID = AV_RB16(tx3g_ptr);
+        tx3g_ptr += 2;
+        font_length = *tx3g_ptr++;
+
+        box_size = box_size + font_length;
+        if (avctx->extradata_size < box_size) {
+            mov_text_cleanup_ftab(m);
+            m->ftab_entries = 0;
+            return -1;
+        }
+        m->ftab_temp->font = av_malloc(font_length + 1);
+        if (!m->ftab_temp->font) {
+            mov_text_cleanup_ftab(m);
+            return AVERROR(ENOMEM);
+        }
+        memcpy(m->ftab_temp->font, tx3g_ptr, font_length);
+        m->ftab_temp->font[font_length] = '\0';
+        av_dynarray_add(&m->ftab, &m->count_f, m->ftab_temp);
+        if (!m->ftab) {
+            mov_text_cleanup_ftab(m);
+            return AVERROR(ENOMEM);
+        }
+        m->ftab_temp = NULL;
+        tx3g_ptr = tx3g_ptr + font_length;
+    }
+    for (i = 0; i < m->ftab_entries; i++) {
+        if (style_fontID == m->ftab[i]->fontID)
+            m->d.font = m->ftab[i]->font;
+    }
+    return 0;
+}
+
+static int decode_twrp(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
+{
+    m->box_flags |= TWRP_BOX;
+    m->w.wrap_flag = *tsmb++;
+    return 0;
+}
+
+static int decode_hlit(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
+{
+    m->box_flags |= HLIT_BOX;
+    m->h.hlit_start = AV_RB16(tsmb);
+    tsmb += 2;
+    m->h.hlit_end = AV_RB16(tsmb);
+    tsmb += 2;
+    return 0;
+}
+
+static int decode_hclr(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
+{
+    m->box_flags |= HCLR_BOX;
+    memcpy(m->c.hlit_color, tsmb, 4);
+    tsmb += 4;
+    return 0;
+}
+
+static int decode_styl(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
+{
+    int i;
+    m->style_entries = AV_RB16(tsmb);
+    tsmb += 2;
+    // A single style record is of length 12 bytes.
+    if (m->tracksize + m->size_var + 2 + m->style_entries * 12 > avpkt->size)
+        return -1;
+
+    m->box_flags |= STYL_BOX;
+    for(i = 0; i < m->style_entries; i++) {
+        m->s_temp = av_malloc(sizeof(*m->s_temp));
+        if (!m->s_temp) {
+            mov_text_cleanup(m);
+            return AVERROR(ENOMEM);
+        }
+        m->s_temp->style_start = AV_RB16(tsmb);
+        tsmb += 2;
+        m->s_temp->style_end = AV_RB16(tsmb);
+        tsmb += 2;
+        m->s_temp->style_fontID = AV_RB16(tsmb);
+        tsmb += 2;
+        m->s_temp->style_flag = AV_RB8(tsmb);
+        tsmb++;
+        m->s_temp->fontsize = AV_RB8(tsmb);
+        av_dynarray_add(&m->s, &m->count_s, m->s_temp);
+        if(!m->s) {
+            mov_text_cleanup(m);
+            return AVERROR(ENOMEM);
+        }
+        tsmb++;
+        // text-color-rgba
+        tsmb += 4;
+    }
+    return 0;
+}
+
+static const Box box_types[] = {
+    { MKBETAG('s','t','y','l'), 2, decode_styl },
+    { MKBETAG('h','l','i','t'), 4, decode_hlit },
+    { MKBETAG('h','c','l','r'), 4, decode_hclr },
+    { MKBETAG('t','w','r','p'), 1, decode_twrp }
+};
+
+const static size_t box_count = FF_ARRAY_ELEMS(box_types);
 
 static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
-                        char **style_start, char **style_end,
-                        uint8_t **style_flags, int style_entries)
+                        MovTextContext *m)
 {
     int i = 0;
+    int j = 0;
+    int text_pos = 0;
+
+    if (text < text_end && m->box_flags & TWRP_BOX) {
+        if (m->w.wrap_flag == 1) {
+            av_bprintf(buf, "{\\q1}"); /* End of line wrap */
+        } else {
+            av_bprintf(buf, "{\\q2}"); /* No wrap */
+        }
+    }
+
     while (text < text_end) {
-        for (i = 0; i < style_entries; i++) {
-            if (*style_flags[i] && text == style_start[i]) {
-                if (*style_flags[i] & STYLE_FLAG_BOLD)
-                    av_bprintf(buf, "{\\b1}");
-                if (*style_flags[i] & STYLE_FLAG_ITALIC)
-                    av_bprintf(buf, "{\\i1}");
-                if (*style_flags[i] & STYLE_FLAG_UNDERLINE)
-                    av_bprintf(buf, "{\\u1}");
+        if (m->box_flags & STYL_BOX) {
+            for (i = 0; i < m->style_entries; i++) {
+                if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
+                    av_bprintf(buf, "{\\r}");
+                }
+            }
+            for (i = 0; i < m->style_entries; i++) {
+                if (m->s[i]->style_flag && text_pos == m->s[i]->style_start) {
+                    if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
+                        av_bprintf(buf, "{\\b1}");
+                    if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
+                        av_bprintf(buf, "{\\i1}");
+                    if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
+                        av_bprintf(buf, "{\\u1}");
+                    av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
+                    for (j = 0; j < m->ftab_entries; j++) {
+                        if (m->s[i]->style_fontID == m->ftab[j]->fontID)
+                            av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
+                    }
+                }
+            }
+        }
+        if (m->box_flags & HLIT_BOX) {
+            if (text_pos == m->h.hlit_start) {
+                /* If hclr box is present, set the secondary color to the color
+                 * specified. Otherwise, set primary color to white and secondary
+                 * color to black. These colors will come from TextSampleModifier
+                 * boxes in future and inverse video technique for highlight will
+                 * be implemented.
+                 */
+                if (m->box_flags & HCLR_BOX) {
+                    av_bprintf(buf, "{\\2c&H%02x%02x%02x&}", m->c.hlit_color[2],
+                                m->c.hlit_color[1], m->c.hlit_color[0]);
+                } else {
+                    av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
+                }
+            }
+            if (text_pos == m->h.hlit_end) {
+                if (m->box_flags & HCLR_BOX) {
+                    av_bprintf(buf, "{\\2c&H000000&}");
+                } else {
+                    av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}");
+                }
             }
         }
 
@@ -58,18 +393,8 @@ static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
             av_bprint_chars(buf, *text, 1);
             break;
         }
-
-        for (i = 0; i < style_entries; i++) {
-            if (*style_flags[i] && text == style_end[i]) {
-                if (*style_flags[i] & STYLE_FLAG_BOLD)
-                    av_bprintf(buf, "{\\b0}");
-                if (*style_flags[i] & STYLE_FLAG_ITALIC)
-                    av_bprintf(buf, "{\\i0}");
-                if (*style_flags[i] & STYLE_FLAG_UNDERLINE)
-                    av_bprintf(buf, "{\\u0}");
-            }
-        }
         text++;
+        text_pos++;
     }
 
     return 0;
@@ -82,27 +407,30 @@ static int mov_text_init(AVCodecContext *avctx) {
      * it's very common to find files where the default style is broken
      * and respecting it results in a worse experience than ignoring it.
      */
-    return ff_ass_subtitle_header_default(avctx);
+    int ret;
+    MovTextContext *m = avctx->priv_data;
+    ret = mov_text_tx3g(avctx, m);
+    if (ret == 0) {
+        return ff_ass_subtitle_header(avctx, m->d.font, m->d.fontsize, m->d.color,
+                                m->d.back_color, m->d.bold, m->d.italic,
+                                m->d.underline, ASS_DEFAULT_BORDERSTYLE,
+                                m->d.alignment);
+    } else
+        return ff_ass_subtitle_header_default(avctx);
 }
 
 static int mov_text_decode_frame(AVCodecContext *avctx,
                             void *data, int *got_sub_ptr, AVPacket *avpkt)
 {
     AVSubtitle *sub = data;
+    MovTextContext *m = avctx->priv_data;
     int ret, ts_start, ts_end;
     AVBPrint buf;
     char *ptr = avpkt->data;
     char *end;
-    //char *ptr_temp;
-    int text_length, tsmb_type, style_entries;
-    uint64_t tsmb_size, tracksize;
-    char **style_start = { 0, };
-    char **style_end = { 0, };
-    uint8_t **style_flags = { 0, };
+    int text_length, tsmb_type, ret_tsmb;
+    uint64_t tsmb_size;
     const uint8_t *tsmb;
-    int index, i, size_var;
-    uint8_t *flag;
-    char *style_pos;
 
     if (!ptr || avpkt->size < 2)
         return AVERROR_INVALIDDATA;
@@ -134,76 +462,49 @@ static int mov_text_decode_frame(AVCodecContext *avctx,
                             (AVRational){1,100});
 
     tsmb_size = 0;
-    tracksize = 2 + text_length;
+    m->tracksize = 2 + text_length;
+    m->style_entries = 0;
+    m->box_flags = 0;
+    m->count_s = 0;
     // Note that the spec recommends lines be no longer than 2048 characters.
     av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
     if (text_length + 2 != avpkt->size) {
-        while (tracksize + 8 <= avpkt->size) {
+        while (m->tracksize + 8 <= avpkt->size) {
             // A box is a minimum of 8 bytes.
-            tsmb = ptr + tracksize - 2;
+            tsmb = ptr + m->tracksize - 2;
             tsmb_size = AV_RB32(tsmb);
             tsmb += 4;
             tsmb_type = AV_RB32(tsmb);
             tsmb += 4;
 
             if (tsmb_size == 1) {
-                if (tracksize + 16 > avpkt->size)
+                if (m->tracksize + 16 > avpkt->size)
                     break;
                 tsmb_size = AV_RB64(tsmb);
                 tsmb += 8;
-                size_var = 18;
+                m->size_var = 16;
             } else
-                size_var = 10;
-            //size_var is equal to 10 or 18 depending on the size of box
+                m->size_var = 8;
+            //size_var is equal to 8 or 16 depending on the size of box
 
-            if (tracksize + tsmb_size > avpkt->size)
+            if (m->tracksize + tsmb_size > avpkt->size)
                 break;
 
-            if (tsmb_type == MKBETAG('s','t','y','l')) {
-                if (tracksize + size_var > avpkt->size)
-                    break;
-                style_entries = AV_RB16(tsmb);
-                tsmb += 2;
-
-                // A single style record is of length 12 bytes.
-                if (tracksize + size_var + style_entries * 12 > avpkt->size)
-                    break;
-
-                for(i = 0; i < style_entries; i++) {
-                    style_pos = ptr + AV_RB16(tsmb);
-                    index = i;
-                    av_dynarray_add(&style_start, &index, style_pos);
-                    tsmb += 2;
-                    style_pos = ptr + AV_RB16(tsmb);
-                    index = i;
-                    av_dynarray_add(&style_end, &index, style_pos);
-                    tsmb += 2;
-                    // fontID = AV_RB16(tsmb);
-                    tsmb += 2;
-                    flag = av_malloc(1);
-                    if (!flag)
-                        return AVERROR(ENOMEM);
-                    *flag = AV_RB8(tsmb);
-                    index = i;
-                    av_dynarray_add(&style_flags, &index, flag);
-                    //fontsize=AV_RB8(tsmb);
-                    tsmb += 2;
-                    // text-color-rgba
-                    tsmb += 4;
+            for (size_t i = 0; i < box_count; i++) {
+                if (tsmb_type == box_types[i].type) {
+                    if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size)
+                        break;
+                    ret_tsmb = box_types[i].decode(tsmb, m, avpkt);
+                    if (ret_tsmb == -1)
+                        break;
                 }
-                text_to_ass(&buf, ptr, end, style_start, style_end, style_flags, style_entries);
-
-                for(i = 0; i < style_entries; i++) {
-                    av_freep(&style_flags[i]);
-                }
-                av_freep(&style_start);
-                av_freep(&style_end);
-                av_freep(&style_flags);
             }
-            tracksize = tracksize + tsmb_size;
+            m->tracksize = m->tracksize + tsmb_size;
         }
+        text_to_ass(&buf, ptr, end, m);
+        mov_text_cleanup(m);
     } else
-        text_to_ass(&buf, ptr, end, NULL, NULL, 0, 0);
+        text_to_ass(&buf, ptr, end, m);
 
     ret = ff_ass_add_rect_bprint(sub, &buf, ts_start, ts_end - ts_start);
     av_bprint_finalize(&buf, NULL);
@@ -213,11 +514,20 @@ static int mov_text_decode_frame(AVCodecContext *avctx,
     return avpkt->size;
 }
 
+static int mov_text_decode_close(AVCodecContext *avctx)
+{
+    MovTextContext *m = avctx->priv_data;
+    mov_text_cleanup_ftab(m);
+    return 0;
+}
+
 AVCodec ff_movtext_decoder = {
     .name         = "mov_text",
     .long_name    = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"),
     .type         = AVMEDIA_TYPE_SUBTITLE,
     .id           = AV_CODEC_ID_MOV_TEXT,
+    .priv_data_size = sizeof(MovTextContext),
     .init         = mov_text_init,
     .decode       = mov_text_decode_frame,
+    .close        = mov_text_decode_close,
 };
diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
index 1b8f454f..6d42d5f3 100644
--- a/libavcodec/movtextenc.c
+++ b/libavcodec/movtextenc.c
@@ -24,16 +24,130 @@
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/mem.h"
+#include "libavutil/common.h"
 #include "ass_split.h"
 #include "ass.h"
 
+#define STYLE_FLAG_BOLD         (1<<0)
+#define STYLE_FLAG_ITALIC       (1<<1)
+#define STYLE_FLAG_UNDERLINE    (1<<2)
+#define STYLE_RECORD_SIZE       12
+#define SIZE_ADD                10
+
+#define STYL_BOX   (1<<0)
+#define HLIT_BOX   (1<<1)
+#define HCLR_BOX   (1<<2)
+
+#define av_bprint_append_any(buf, data, size)   av_bprint_append_data(buf, ((const char*)data), size)
+
+typedef struct {
+    uint16_t style_start;
+    uint16_t style_end;
+    uint8_t style_flag;
+} StyleBox;
+
+typedef struct {
+    uint16_t start;
+    uint16_t end;
+} HighlightBox;
+
+typedef struct {
+   uint32_t color;
+} HilightcolorBox;
+
 typedef struct {
     ASSSplitContext *ass_ctx;
-    char buffer[2048];
-    char *ptr;
-    char *end;
+    AVBPrint buffer;
+    StyleBox **style_attributes;
+    StyleBox *style_attributes_temp;
+    HighlightBox hlit;
+    HilightcolorBox hclr;
+    int count;
+    uint8_t box_flags;
+    uint16_t style_entries;
+    uint16_t style_fontID;
+    uint8_t style_fontsize;
+    uint32_t style_color;
+    uint16_t text_pos;
 } MovTextContext;
 
+typedef struct {
+    uint32_t type;
+    void (*encode)(MovTextContext *s, uint32_t tsmb_type);
+} Box;
+
+static void mov_text_cleanup(MovTextContext *s)
+{
+    int j;
+    if (s->box_flags & STYL_BOX) {
+        for (j = 0; j < s->count; j++) {
+            av_freep(&s->style_attributes[j]);
+        }
+        av_freep(&s->style_attributes);
+    }
+}
+
+static void encode_styl(MovTextContext *s, uint32_t tsmb_type)
+{
+    int j;
+    uint32_t tsmb_size;
+    if (s->box_flags & STYL_BOX) {
+        tsmb_size = s->count * STYLE_RECORD_SIZE + SIZE_ADD;
+        tsmb_size = AV_RB32(&tsmb_size);
+        s->style_entries = AV_RB16(&s->count);
+        s->style_fontID = 0x00 | 0x01<<8;
+        s->style_fontsize = 0x12;
+        s->style_color = MKTAG(0xFF, 0xFF, 0xFF, 0xFF);
+        /*The above three attributes are hard coded for now
+        but will come from ASS style in the future*/
+        av_bprint_append_any(&s->buffer, &tsmb_size, 4);
+        av_bprint_append_any(&s->buffer, &tsmb_type, 4);
+        av_bprint_append_any(&s->buffer, &s->style_entries, 2);
+        for (j = 0; j < s->count; j++) {
+            av_bprint_append_any(&s->buffer, &s->style_attributes[j]->style_start, 2);
+            av_bprint_append_any(&s->buffer, &s->style_attributes[j]->style_end, 2);
+            av_bprint_append_any(&s->buffer, &s->style_fontID, 2);
+            av_bprint_append_any(&s->buffer, &s->style_attributes[j]->style_flag, 1);
+            av_bprint_append_any(&s->buffer, &s->style_fontsize, 1);
+            av_bprint_append_any(&s->buffer, &s->style_color, 4);
+        }
+        mov_text_cleanup(s);
+    }
+}
+
+static void encode_hlit(MovTextContext *s, uint32_t tsmb_type)
+{
+    uint32_t tsmb_size;
+    if (s->box_flags & HLIT_BOX) {
+        tsmb_size = 12;
+        tsmb_size = AV_RB32(&tsmb_size);
+        av_bprint_append_any(&s->buffer, &tsmb_size, 4);
+        av_bprint_append_any(&s->buffer, &tsmb_type, 4);
+        av_bprint_append_any(&s->buffer, &s->hlit.start, 2);
+        av_bprint_append_any(&s->buffer, &s->hlit.end, 2);
+    }
+}
+
+static void encode_hclr(MovTextContext *s, uint32_t tsmb_type)
+{
+    uint32_t tsmb_size;
+    if (s->box_flags & HCLR_BOX) {
+        tsmb_size = 12;
+        tsmb_size = AV_RB32(&tsmb_size);
+        av_bprint_append_any(&s->buffer, &tsmb_size, 4);
+        av_bprint_append_any(&s->buffer, &tsmb_type, 4);
+        av_bprint_append_any(&s->buffer, &s->hclr.color, 4);
+    }
+}
+
+static const Box box_types[] = {
+    { MKTAG('s','t','y','l'), encode_styl },
+    { MKTAG('h','l','i','t'), encode_hlit },
+    { MKTAG('h','c','l','r'), encode_hclr },
+};
+
+const static size_t box_count = FF_ARRAY_ELEMS(box_types);
 
 static av_cold int mov_text_encode_init(AVCodecContext *avctx)
 {
@@ -75,36 +189,134 @@ static av_cold int mov_text_encode_init(AVCodecContext *avctx)
     MovTextContext *s = avctx->priv_data;
 
     avctx->extradata_size = sizeof text_sample_entry;
-    avctx->extradata = av_mallocz(avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    avctx->extradata = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!avctx->extradata)
         return AVERROR(ENOMEM);
 
+    av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
+
     memcpy(avctx->extradata, text_sample_entry, avctx->extradata_size);
 
     s->ass_ctx = ff_ass_split(avctx->subtitle_header);
     return s->ass_ctx ? 0 : AVERROR_INVALIDDATA;
 }
 
+static void mov_text_style_cb(void *priv, const char style, int close)
+{
+    MovTextContext *s = priv;
+    if (!close) {
+        if (!(s->box_flags & STYL_BOX)) {   //first style entry
+
+            s->style_attributes_temp = av_malloc(sizeof(*s->style_attributes_temp));
+
+            if (!s->style_attributes_temp) {
+                av_bprint_clear(&s->buffer);
+                s->box_flags &= ~STYL_BOX;
+                return;
+            }
+
+            s->style_attributes_temp->style_flag = 0;
+            s->style_attributes_temp->style_start = AV_RB16(&s->text_pos);
+        } else {
+            if (s->style_attributes_temp->style_flag) { //break the style record here and start a new one
+                s->style_attributes_temp->style_end = AV_RB16(&s->text_pos);
+                av_dynarray_add(&s->style_attributes, &s->count, s->style_attributes_temp);
+                s->style_attributes_temp = av_malloc(sizeof(*s->style_attributes_temp));
+                if (!s->style_attributes_temp) {
+                    mov_text_cleanup(s);
+                    av_bprint_clear(&s->buffer);
+                    s->box_flags &= ~STYL_BOX;
+                    return;
+                }
+
+                s->style_attributes_temp->style_flag = s->style_attributes[s->count - 1]->style_flag;
+                s->style_attributes_temp->style_start = AV_RB16(&s->text_pos);
+            } else {
+                s->style_attributes_temp->style_flag = 0;
+                s->style_attributes_temp->style_start = AV_RB16(&s->text_pos);
+            }
+        }
+        switch (style){
+        case 'b':
+            s->style_attributes_temp->style_flag |= STYLE_FLAG_BOLD;
+            break;
+        case 'i':
+            s->style_attributes_temp->style_flag |= STYLE_FLAG_ITALIC;
+            break;
+        case 'u':
+            s->style_attributes_temp->style_flag |= STYLE_FLAG_UNDERLINE;
+            break;
+        }
+    } else {
+        s->style_attributes_temp->style_end = AV_RB16(&s->text_pos);
+        av_dynarray_add(&s->style_attributes, &s->count, s->style_attributes_temp);
+
+        s->style_attributes_temp = av_malloc(sizeof(*s->style_attributes_temp));
+
+        if (!s->style_attributes_temp) {
+            mov_text_cleanup(s);
+            av_bprint_clear(&s->buffer);
+            s->box_flags &= ~STYL_BOX;
+            return;
+        }
+
+        s->style_attributes_temp->style_flag = s->style_attributes[s->count - 1]->style_flag;
+        switch (style){
+        case 'b':
+            s->style_attributes_temp->style_flag &= ~STYLE_FLAG_BOLD;
+            break;
+        case 'i':
+            s->style_attributes_temp->style_flag &= ~STYLE_FLAG_ITALIC;
+            break;
+        case 'u':
+            s->style_attributes_temp->style_flag &= ~STYLE_FLAG_UNDERLINE;
+            break;
+        }
+        if (s->style_attributes_temp->style_flag) { //start of new style record
+            s->style_attributes_temp->style_start = AV_RB16(&s->text_pos);
+        }
+    }
+    s->box_flags |= STYL_BOX;
+}
+
+static void mov_text_color_cb(void *priv, unsigned int color, unsigned int color_id)
+{
+    MovTextContext *s = priv;
+    if (color_id == 2) {    //secondary color changes
+        if (s->box_flags & HLIT_BOX) {  //close tag
+            s->hlit.end = AV_RB16(&s->text_pos);
+        } else {
+            s->box_flags |= HCLR_BOX;
+            s->box_flags |= HLIT_BOX;
+            s->hlit.start = AV_RB16(&s->text_pos);
+            s->hclr.color = color | (0xFF << 24);  //set alpha value to FF
+        }
+    }
+    /* If there are more than one secondary color changes in ASS, take start of
+       first section and end of last section. Movtext allows only one
+       highlight box per sample.
+     */
+}
+
 static void mov_text_text_cb(void *priv, const char *text, int len)
 {
     MovTextContext *s = priv;
-    av_assert0(s->end >= s->ptr);
-    av_strlcpy(s->ptr, text, FFMIN(s->end - s->ptr, len + 1));
-    s->ptr += FFMIN(s->end - s->ptr, len);
+    av_bprint_append_data(&s->buffer, text, len);
+    s->text_pos += len;
 }
 
 static void mov_text_new_line_cb(void *priv, int forced)
 {
     MovTextContext *s = priv;
-    av_assert0(s->end >= s->ptr);
-    av_strlcpy(s->ptr, "\n", FFMIN(s->end - s->ptr, 2));
-    if (s->end > s->ptr)
-        s->ptr++;
+    av_bprint_append_data(&s->buffer, "\n", 1);
+    s->text_pos += 1;
 }
 
 static const ASSCodesCallbacks mov_text_callbacks = {
     .text     = mov_text_text_cb,
     .new_line = mov_text_new_line_cb,
+    .style    = mov_text_style_cb,
+    .color    = mov_text_color_cb,
 };
 
 static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf,
@@ -112,11 +324,13 @@ static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf,
 {
     MovTextContext *s = avctx->priv_data;
     ASSDialog *dialog;
-    int i, len, num;
-
-    s->ptr = s->buffer;
-    s->end = s->ptr + sizeof(s->buffer);
+    int i, num, length;
+    size_t j;
 
+    s->text_pos = 0;
+    s->count = 0;
+    s->box_flags = 0;
+    s->style_entries = 0;
     for (i = 0; i < sub->num_rects; i++) {
 
         if (sub->rects[i]->type != SUBTITLE_ASS) {
@@ -128,28 +342,44 @@ static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf,
         for (; dialog && num--; dialog++) {
             ff_ass_split_override_codes(&mov_text_callbacks, s, dialog->text);
         }
-    }
 
-    if (s->ptr == s->buffer)
-        return 0;
+        for (j = 0; j < box_count; j++) {
+            box_types[j].encode(s, box_types[j].type);
+        }
+    }
 
-    AV_WB16(buf, strlen(s->buffer));
+    AV_WB16(buf, s->text_pos);
     buf += 2;
 
-    len = av_strlcpy(buf, s->buffer, bufsize - 2);
+    if (!av_bprint_is_complete(&s->buffer)) {
+        length = AVERROR(ENOMEM);
+        goto exit;
+    }
+
+    if (!s->buffer.len) {
+        length = 0;
+        goto exit;
+    }
 
-    if (len > bufsize-3) {
+    if (s->buffer.len > bufsize - 3) {
         av_log(avctx, AV_LOG_ERROR, "Buffer too small for ASS event.\n");
-        return AVERROR(EINVAL);
+        length = AVERROR(EINVAL);
+        goto exit;
     }
 
-    return len + 2;
+    memcpy(buf, s->buffer.str, s->buffer.len);
+    length = s->buffer.len + 2;
+
+exit:
+    av_bprint_clear(&s->buffer);
+    return length;
 }
 
 static int mov_text_encode_close(AVCodecContext *avctx)
 {
     MovTextContext *s = avctx->priv_data;
     ff_ass_split_free(s->ass_ctx);
+    av_bprint_finalize(&s->buffer, NULL);
     return 0;
 }
 
diff --git a/libavcodec/mp3_header_decompress_bsf.c b/libavcodec/mp3_header_decompress_bsf.c
index df455322..95c0b5b7 100644
--- a/libavcodec/mp3_header_decompress_bsf.c
+++ b/libavcodec/mp3_header_decompress_bsf.c
@@ -71,8 +71,8 @@ static int mp3_header_decompress(AVBitStreamFilterContext *bsfc, AVCodecContext
     header |= (frame_size == buf_size + 4)<<16; //FIXME actually set a correct crc instead of 0
 
     *poutbuf_size= frame_size;
-    *poutbuf= av_malloc(frame_size + FF_INPUT_BUFFER_PADDING_SIZE);
-    memcpy(*poutbuf + frame_size - buf_size, buf, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    *poutbuf= av_malloc(frame_size + AV_INPUT_BUFFER_PADDING_SIZE);
+    memcpy(*poutbuf + frame_size - buf_size, buf, buf_size + AV_INPUT_BUFFER_PADDING_SIZE);
 
     if(avctx->channels==2){
         uint8_t *p= *poutbuf + frame_size - buf_size;
diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c
index 0f1e34a8..d38b22a2 100644
--- a/libavcodec/mpc7.c
+++ b/libavcodec/mpc7.c
@@ -337,7 +337,7 @@ AVCodec ff_mpc7_decoder = {
     .close          = mpc7_decode_close,
     .decode         = mpc7_decode_frame,
     .flush          = mpc7_decode_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/mpc8.c b/libavcodec/mpc8.c
index 29c65f9e..a8feb6c4 100644
--- a/libavcodec/mpc8.c
+++ b/libavcodec/mpc8.c
@@ -440,7 +440,7 @@ AVCodec ff_mpc8_decoder = {
     .init           = mpc8_decode_init,
     .decode         = mpc8_decode_frame,
     .flush          = mpc8_decode_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/mpeg12.h b/libavcodec/mpeg12.h
index 118410f6..16ca195a 100644
--- a/libavcodec/mpeg12.h
+++ b/libavcodec/mpeg12.h
@@ -22,29 +22,12 @@
 #ifndef AVCODEC_MPEG12_H
 #define AVCODEC_MPEG12_H
 
+#include "mpeg12vlc.h"
 #include "mpegvideo.h"
 
-#define DC_VLC_BITS 9
-#define MV_VLC_BITS 9
-#define TEX_VLC_BITS 9
-
-#define MBINCR_VLC_BITS 9
-#define MB_PAT_VLC_BITS 9
-#define MB_PTYPE_VLC_BITS 6
-#define MB_BTYPE_VLC_BITS 6
-
-extern VLC ff_dc_lum_vlc;
-extern VLC ff_dc_chroma_vlc;
-extern VLC ff_mbincr_vlc;
-extern VLC ff_mb_ptype_vlc;
-extern VLC ff_mb_btype_vlc;
-extern VLC ff_mb_pat_vlc;
-extern VLC ff_mv_vlc;
-
 extern uint8_t ff_mpeg12_static_rl_table_store[2][2][2*MAX_RUN + MAX_LEVEL + 3];
 
 void ff_mpeg12_common_init(MpegEncContext *s);
-void ff_mpeg12_init_vlcs(void);
 
 static inline int decode_dc(GetBitContext *gb, int component)
 {
diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
index b0e5ae9a..cc8ace8e 100644
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c
@@ -44,6 +44,7 @@
 #include "mpegutils.h"
 #include "mpegvideo.h"
 #include "mpegvideodata.h"
+#include "profiles.h"
 #include "thread.h"
 #include "version.h"
 #include "vdpau_compat.h"
@@ -96,13 +97,6 @@ static const uint32_t btype2mb_type[11] = {
     MB_TYPE_QUANT | MB_TYPE_L0L1 | MB_TYPE_CBP,
 };
 
-static const uint8_t non_linear_qscale[32] = {
-     0,  1,  2,  3,  4,  5,   6,   7,
-     8, 10, 12, 14, 16, 18,  20,  22,
-    24, 28, 32, 36, 40, 44,  48,  52,
-    56, 64, 72, 80, 88, 96, 104, 112,
-};
-
 /* as H.263, but only 17 codes */
 static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred)
 {
@@ -719,7 +713,7 @@ static inline int get_qscale(MpegEncContext *s)
 {
     int qscale = get_bits(&s->gb, 5);
     if (s->q_scale_type)
-        return non_linear_qscale[qscale];
+        return ff_mpeg2_non_linear_qscale[qscale];
     else
         return qscale << 1;
 }
@@ -844,7 +838,7 @@ static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64])
             ff_xvmc_pack_pblocks(s, -1); // inter are always full blocks
 
         if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
-            if (s->avctx->flags2 & CODEC_FLAG2_FAST) {
+            if (s->avctx->flags2 & AV_CODEC_FLAG2_FAST) {
                 for (i = 0; i < 6; i++)
                     mpeg2_fast_decode_block_intra(s, *s->pblocks[i], i);
             } else {
@@ -1064,7 +1058,7 @@ static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64])
                 ff_xvmc_pack_pblocks(s, cbp);
 
             if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
-                if (s->avctx->flags2 & CODEC_FLAG2_FAST) {
+                if (s->avctx->flags2 & AV_CODEC_FLAG2_FAST) {
                     for (i = 0; i < 6; i++) {
                         if (cbp & 32)
                             mpeg2_fast_decode_block_non_intra(s, *s->pblocks[i], i);
@@ -1086,7 +1080,7 @@ static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64])
                     }
                 }
             } else {
-                if (s->avctx->flags2 & CODEC_FLAG2_FAST) {
+                if (s->avctx->flags2 & AV_CODEC_FLAG2_FAST) {
                     for (i = 0; i < 6; i++) {
                         if (cbp & 32)
                             mpeg1_fast_decode_block_inter(s, *s->pblocks[i], i);
@@ -1137,6 +1131,7 @@ static av_cold int mpeg_decode_init(AVCodecContext *avctx)
     ff_mpeg12_common_init(&s->mpeg_enc_ctx);
     ff_mpeg12_init_vlcs();
 
+    s2->chroma_format              = 1;
     s->mpeg_enc_ctx_allocated      = 0;
     s->mpeg_enc_ctx.picture_number = 0;
     s->repeat_field                = 0;
@@ -1145,6 +1140,7 @@ static av_cold int mpeg_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+#if HAVE_THREADS
 static int mpeg_decode_update_thread_context(AVCodecContext *avctx,
                                              const AVCodecContext *avctx_from)
 {
@@ -1169,6 +1165,7 @@ static int mpeg_decode_update_thread_context(AVCodecContext *avctx,
 
     return 0;
 }
+#endif
 
 static void quant_matrix_rebuild(uint16_t *matrix, const uint8_t *old_perm,
                                  const uint8_t *new_perm)
@@ -1186,8 +1183,10 @@ static const enum AVPixelFormat mpeg1_hwaccel_pixfmt_list_420[] = {
 #if CONFIG_MPEG1_XVMC_HWACCEL
     AV_PIX_FMT_XVMC,
 #endif
-#if CONFIG_MPEG1_VDPAU_HWACCEL
+#if CONFIG_MPEG1_VDPAU_DECODER && FF_API_VDPAU
     AV_PIX_FMT_VDPAU_MPEG1,
+#endif
+#if CONFIG_MPEG1_VDPAU_HWACCEL
     AV_PIX_FMT_VDPAU,
 #endif
     AV_PIX_FMT_YUV420P,
@@ -1198,8 +1197,10 @@ static const enum AVPixelFormat mpeg2_hwaccel_pixfmt_list_420[] = {
 #if CONFIG_MPEG2_XVMC_HWACCEL
     AV_PIX_FMT_XVMC,
 #endif
-#if CONFIG_MPEG2_VDPAU_HWACCEL
+#if CONFIG_MPEG_VDPAU_DECODER && FF_API_VDPAU
     AV_PIX_FMT_VDPAU_MPEG2,
+#endif
+#if CONFIG_MPEG2_VDPAU_HWACCEL
     AV_PIX_FMT_VDPAU,
 #endif
 #if CONFIG_MPEG2_DXVA2_HWACCEL
@@ -1209,7 +1210,10 @@ static const enum AVPixelFormat mpeg2_hwaccel_pixfmt_list_420[] = {
     AV_PIX_FMT_D3D11VA_VLD,
 #endif
 #if CONFIG_MPEG2_VAAPI_HWACCEL
-    AV_PIX_FMT_VAAPI_VLD,
+    AV_PIX_FMT_VAAPI,
+#endif
+#if CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL
+    AV_PIX_FMT_VIDEOTOOLBOX,
 #endif
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_NONE
@@ -1225,9 +1229,11 @@ static const enum AVPixelFormat mpeg12_pixfmt_list_444[] = {
     AV_PIX_FMT_NONE
 };
 
+#if FF_API_VDPAU
 static inline int uses_vdpau(AVCodecContext *avctx) {
     return avctx->pix_fmt == AV_PIX_FMT_VDPAU_MPEG1 || avctx->pix_fmt == AV_PIX_FMT_VDPAU_MPEG2;
 }
+#endif
 
 static enum AVPixelFormat mpeg_get_pixelformat(AVCodecContext *avctx)
 {
@@ -1235,7 +1241,7 @@ static enum AVPixelFormat mpeg_get_pixelformat(AVCodecContext *avctx)
     MpegEncContext *s = &s1->mpeg_enc_ctx;
     const enum AVPixelFormat *pix_fmts;
 
-    if (CONFIG_GRAY && (avctx->flags & CODEC_FLAG_GRAY))
+    if (CONFIG_GRAY && (avctx->flags & AV_CODEC_FLAG_GRAY))
         return AV_PIX_FMT_GRAY8;
 
     if (s->chroma_format < 2)
@@ -1253,7 +1259,11 @@ static enum AVPixelFormat mpeg_get_pixelformat(AVCodecContext *avctx)
 static void setup_hwaccel_for_pixfmt(AVCodecContext *avctx)
 {
     // until then pix_fmt may be changed right after codec init
-    if (avctx->hwaccel || uses_vdpau(avctx))
+    if (avctx->hwaccel
+#if FF_API_VDPAU
+        || uses_vdpau(avctx)
+#endif
+        )
         if (avctx->idct_algo == FF_IDCT_AUTO)
             avctx->idct_algo = FF_IDCT_SIMPLE;
 
@@ -1263,7 +1273,9 @@ static void setup_hwaccel_for_pixfmt(AVCodecContext *avctx)
 
         s->pack_pblocks = 1;
 #if FF_API_XVMC
+FF_DISABLE_DEPRECATION_WARNINGS
         avctx->xvmc_acceleration = 2;
+FF_ENABLE_DEPRECATION_WARNINGS
 #endif /* FF_API_XVMC */
     }
 }
@@ -1381,6 +1393,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx)
             case 1: avctx->chroma_sample_location = AVCHROMA_LOC_LEFT; break;
             case 2:
             case 3: avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT; break;
+            default: av_assert0(0);
             }
         } // MPEG-2
 
@@ -1463,17 +1476,23 @@ static void mpeg_decode_sequence_extension(Mpeg1Context *s1)
     s->avctx->level         = get_bits(&s->gb, 4);
     s->progressive_sequence = get_bits1(&s->gb);   /* progressive_sequence */
     s->chroma_format        = get_bits(&s->gb, 2); /* chroma_format 1=420, 2=422, 3=444 */
+
+    if (!s->chroma_format) {
+        s->chroma_format = 1;
+        av_log(s->avctx, AV_LOG_WARNING, "Chroma format invalid\n");
+    }
+
     horiz_size_ext          = get_bits(&s->gb, 2);
     vert_size_ext           = get_bits(&s->gb, 2);
     s->width  |= (horiz_size_ext << 12);
     s->height |= (vert_size_ext  << 12);
     bit_rate_ext = get_bits(&s->gb, 12);  /* XXX: handle it */
-    s->bit_rate += (bit_rate_ext << 18) * 400;
+    s->bit_rate += (bit_rate_ext << 18) * 400LL;
     check_marker(&s->gb, "after bit rate extension");
     s->avctx->rc_buffer_size += get_bits(&s->gb, 8) * 1024 * 16 << 10;
 
     s->low_delay = get_bits1(&s->gb);
-    if (s->avctx->flags & CODEC_FLAG_LOW_DELAY)
+    if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY)
         s->low_delay = 1;
 
     s1->frame_rate_ext.num = get_bits(&s->gb, 2) + 1;
@@ -1484,7 +1503,7 @@ static void mpeg_decode_sequence_extension(Mpeg1Context *s1)
 
     if (s->avctx->debug & FF_DEBUG_PICT_INFO)
         av_log(s->avctx, AV_LOG_DEBUG,
-               "profile: %d, level: %d ps: %d cf:%d vbv buffer: %d, bitrate:%d\n",
+               "profile: %d, level: %d ps: %d cf:%d vbv buffer: %d, bitrate:%"PRId64"\n",
                s->avctx->profile, s->avctx->level, s->progressive_sequence, s->chroma_format,
                s->avctx->rc_buffer_size, s->bit_rate);
 }
@@ -1685,6 +1704,7 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size)
             if (sd)
                 memcpy(sd->data, s1->a53_caption, s1->a53_caption_size);
             av_freep(&s1->a53_caption);
+            avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
         }
 
         if (s1->has_stereo3d) {
@@ -1719,9 +1739,11 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size)
 
         if (s->avctx->hwaccel &&
             (s->avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD)) {
-            if (s->avctx->hwaccel->end_frame(s->avctx) < 0)
+            if ((ret = s->avctx->hwaccel->end_frame(s->avctx)) < 0) {
                 av_log(avctx, AV_LOG_ERROR,
                        "hardware accelerator failed to decode first field\n");
+                return ret;
+            }
         }
 
         for (i = 0; i < 4; i++) {
@@ -1928,7 +1950,7 @@ static int mpeg_decode_slice(MpegEncContext *s, int mb_y,
                     (left && show_bits(&s->gb, FFMIN(left, 23)) && !is_d10) ||
                     ((avctx->err_recognition & (AV_EF_BITSTREAM | AV_EF_AGGRESSIVE)) && left > 8)) {
                     av_log(avctx, AV_LOG_ERROR, "end mismatch left=%d %0X\n",
-                           left, show_bits(&s->gb, FFMIN(left, 23)));
+                           left, left>0 ? show_bits(&s->gb, FFMIN(left, 23)) : 0);
                     return AVERROR_INVALIDDATA;
                 } else
                     goto eos;
@@ -1938,7 +1960,6 @@ static int mpeg_decode_slice(MpegEncContext *s, int mb_y,
             // area, we detect this here instead of running into the end expecting
             // more data
             if (s->mb_y >= ((s->height + 15) >> 4) &&
-                s->progressive_frame &&
                 !s->progressive_sequence &&
                 get_bits_left(&s->gb) <= 8 &&
                 get_bits_left(&s->gb) >= 0 &&
@@ -2079,9 +2100,12 @@ static int slice_end(AVCodecContext *avctx, AVFrame *pict)
         return 0;
 
     if (s->avctx->hwaccel) {
-        if (s->avctx->hwaccel->end_frame(s->avctx) < 0)
+        int ret = s->avctx->hwaccel->end_frame(s->avctx);
+        if (ret < 0) {
             av_log(avctx, AV_LOG_ERROR,
                    "hardware accelerator failed to decode picture\n");
+            return ret;
+        }
     }
 
     /* end of slice reached */
@@ -2148,12 +2172,10 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
                "frame_rate_index %d is invalid\n", s->frame_rate_index);
         s->frame_rate_index = 1;
     }
-    s->bit_rate = get_bits(&s->gb, 18) * 400;
+    s->bit_rate = get_bits(&s->gb, 18) * 400LL;
     if (check_marker(&s->gb, "in sequence header") == 0) {
         return AVERROR_INVALIDDATA;
     }
-    s->width  = width;
-    s->height = height;
 
     s->avctx->rc_buffer_size = get_bits(&s->gb, 10) * 1024 * 16;
     skip_bits(&s->gb, 1);
@@ -2185,6 +2207,9 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
         return AVERROR_INVALIDDATA;
     }
 
+    s->width  = width;
+    s->height = height;
+
     /* We set MPEG-2 parameters so that it emulates MPEG-1. */
     s->progressive_sequence = 1;
     s->progressive_frame    = 1;
@@ -2196,11 +2221,11 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
     s->avctx->codec_id      = AV_CODEC_ID_MPEG1VIDEO;
     s->out_format           = FMT_MPEG1;
     s->swap_uv              = 0; // AFAIK VCR2 does not have SEQ_HEADER
-    if (s->avctx->flags & CODEC_FLAG_LOW_DELAY)
+    if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY)
         s->low_delay = 1;
 
     if (s->avctx->debug & FF_DEBUG_PICT_INFO)
-        av_log(s->avctx, AV_LOG_DEBUG, "vbv buffer: %d, bitrate:%d, aspect_ratio_info: %d \n",
+        av_log(s->avctx, AV_LOG_DEBUG, "vbv buffer: %d, bitrate:%"PRId64", aspect_ratio_info: %d \n",
                s->avctx->rc_buffer_size, s->bit_rate, s->aspect_ratio_info);
 
     return 0;
@@ -2398,7 +2423,13 @@ static void mpeg_decode_gop(AVCodecContext *avctx,
 
     init_get_bits(&s->gb, buf, buf_size * 8);
 
-    tc = avctx->timecode_frame_start = get_bits(&s->gb, 25);
+    tc = s-> timecode_frame_start = get_bits(&s->gb, 25);
+
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    avctx->timecode_frame_start = tc;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     s->closed_gop = get_bits1(&s->gb);
     /* broken_link indicate that after editing the
@@ -2445,9 +2476,11 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
                         s2->er.error_count += s2->thread_context[i]->er.error_count;
                 }
 
+#if FF_API_VDPAU
                 if ((CONFIG_MPEG_VDPAU_DECODER || CONFIG_MPEG1_VDPAU_DECODER)
                     && uses_vdpau(avctx))
                     ff_vdpau_mpeg_picture_complete(s2, buf, buf_size, s->slice_count);
+#endif
 
                 ret = slice_end(avctx, picture);
                 if (ret < 0)
@@ -2649,7 +2682,7 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
                         }
                     }
                 }
-                if (s2->pict_type == AV_PICTURE_TYPE_I || (s2->avctx->flags2 & CODEC_FLAG2_SHOW_ALL))
+                if (s2->pict_type == AV_PICTURE_TYPE_I || (s2->avctx->flags2 & AV_CODEC_FLAG2_SHOW_ALL))
                     s->sync = 1;
                 if (!s2->next_picture_ptr) {
                     /* Skip P-frames if we do not have a reference frame or
@@ -2696,10 +2729,12 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
                     return AVERROR_INVALIDDATA;
                 }
 
+#if FF_API_VDPAU
                 if (uses_vdpau(avctx)) {
                     s->slice_count++;
                     break;
                 }
+#endif
 
                 if (HAVE_THREADS &&
                     (avctx->active_thread_type & FF_THREAD_SLICE) &&
@@ -2770,7 +2805,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx, void *data,
         return buf_size;
     }
 
-    if (s2->avctx->flags & CODEC_FLAG_TRUNCATED) {
+    if (s2->avctx->flags & AV_CODEC_FLAG_TRUNCATED) {
         int next = ff_mpeg1_find_frame_end(&s2->parse_context, buf,
                                            buf_size, NULL);
 
@@ -2802,9 +2837,21 @@ static int mpeg_decode_frame(AVCodecContext *avctx, void *data,
     }
 
     ret = decode_chunks(avctx, picture, got_output, buf, buf_size);
-    if (ret<0 || *got_output)
+    if (ret<0 || *got_output) {
         s2->current_picture_ptr = NULL;
 
+        if (s2->timecode_frame_start != -1 && *got_output) {
+            AVFrameSideData *tcside = av_frame_new_side_data(picture,
+                                                             AV_FRAME_DATA_GOP_TIMECODE,
+                                                             sizeof(int64_t));
+            if (!tcside)
+                return AVERROR(ENOMEM);
+            memcpy(tcside->data, &s2->timecode_frame_start, sizeof(int64_t));
+
+            s2->timecode_frame_start = -1;
+        }
+    }
+
     return ret;
 }
 
@@ -2827,18 +2874,6 @@ static av_cold int mpeg_decode_end(AVCodecContext *avctx)
     return 0;
 }
 
-static const AVProfile mpeg2_video_profiles[] = {
-    { FF_PROFILE_MPEG2_422,          "4:2:2"              },
-    { FF_PROFILE_MPEG2_HIGH,         "High"               },
-    { FF_PROFILE_MPEG2_SS,           "Spatially Scalable" },
-    { FF_PROFILE_MPEG2_SNR_SCALABLE, "SNR Scalable"       },
-    { FF_PROFILE_MPEG2_MAIN,         "Main"               },
-    { FF_PROFILE_MPEG2_SIMPLE,       "Simple"             },
-    { FF_PROFILE_RESERVED,           "Reserved"           },
-    { FF_PROFILE_RESERVED,           "Reserved"           },
-    { FF_PROFILE_UNKNOWN                                  },
-};
-
 AVCodec ff_mpeg1video_decoder = {
     .name                  = "mpeg1video",
     .long_name             = NULL_IF_CONFIG_SMALL("MPEG-1 video"),
@@ -2848,9 +2883,9 @@ AVCodec ff_mpeg1video_decoder = {
     .init                  = mpeg_decode_init,
     .close                 = mpeg_decode_end,
     .decode                = mpeg_decode_frame,
-    .capabilities          = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 |
-                             CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY |
-                             CODEC_CAP_SLICE_THREADS,
+    .capabilities          = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1 |
+                             AV_CODEC_CAP_TRUNCATED | AV_CODEC_CAP_DELAY |
+                             AV_CODEC_CAP_SLICE_THREADS,
     .flush                 = flush,
     .max_lowres            = 3,
     .update_thread_context = ONLY_IF_THREADS_ENABLED(mpeg_decode_update_thread_context)
@@ -2865,12 +2900,12 @@ AVCodec ff_mpeg2video_decoder = {
     .init           = mpeg_decode_init,
     .close          = mpeg_decode_end,
     .decode         = mpeg_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 |
-                      CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY |
-                      CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1 |
+                      AV_CODEC_CAP_TRUNCATED | AV_CODEC_CAP_DELAY |
+                      AV_CODEC_CAP_SLICE_THREADS,
     .flush          = flush,
     .max_lowres     = 3,
-    .profiles       = NULL_IF_CONFIG_SMALL(mpeg2_video_profiles),
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_mpeg2_video_profiles),
 };
 
 //legacy decoder
@@ -2883,13 +2918,14 @@ AVCodec ff_mpegvideo_decoder = {
     .init           = mpeg_decode_init,
     .close          = mpeg_decode_end,
     .decode         = mpeg_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_TRUNCATED | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS,
     .flush          = flush,
     .max_lowres     = 3,
 };
 
 #if FF_API_XVMC
 #if CONFIG_MPEG_XVMC_DECODER
+FF_DISABLE_DEPRECATION_WARNINGS
 static av_cold int mpeg_mc_decode_init(AVCodecContext *avctx)
 {
     if (avctx->active_thread_type & FF_THREAD_SLICE)
@@ -2916,15 +2952,16 @@ AVCodec ff_mpeg_xvmc_decoder = {
     .init           = mpeg_mc_decode_init,
     .close          = mpeg_decode_end,
     .decode         = mpeg_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 |
-                      CODEC_CAP_TRUNCATED | CODEC_CAP_HWACCEL | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1 |
+                      AV_CODEC_CAP_TRUNCATED | CODEC_CAP_HWACCEL |
+                      AV_CODEC_CAP_DELAY,
     .flush          = flush,
 };
-
+FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 #endif /* FF_API_XVMC */
 
-#if CONFIG_MPEG_VDPAU_DECODER
+#if CONFIG_MPEG_VDPAU_DECODER && FF_API_VDPAU
 AVCodec ff_mpeg_vdpau_decoder = {
     .name           = "mpegvideo_vdpau",
     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-1/2 video (VDPAU acceleration)"),
@@ -2934,13 +2971,13 @@ AVCodec ff_mpeg_vdpau_decoder = {
     .init           = mpeg_decode_init,
     .close          = mpeg_decode_end,
     .decode         = mpeg_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED |
-                      CODEC_CAP_HWACCEL_VDPAU | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_TRUNCATED |
+                      AV_CODEC_CAP_HWACCEL_VDPAU | AV_CODEC_CAP_DELAY,
     .flush          = flush,
 };
 #endif
 
-#if CONFIG_MPEG1_VDPAU_DECODER
+#if CONFIG_MPEG1_VDPAU_DECODER && FF_API_VDPAU
 AVCodec ff_mpeg1_vdpau_decoder = {
     .name           = "mpeg1video_vdpau",
     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-1 video (VDPAU acceleration)"),
@@ -2950,8 +2987,8 @@ AVCodec ff_mpeg1_vdpau_decoder = {
     .init           = mpeg_decode_init,
     .close          = mpeg_decode_end,
     .decode         = mpeg_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED |
-                      CODEC_CAP_HWACCEL_VDPAU | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_TRUNCATED |
+                      AV_CODEC_CAP_HWACCEL_VDPAU | AV_CODEC_CAP_DELAY,
     .flush          = flush,
 };
 #endif
diff --git a/libavcodec/mpeg12enc.c b/libavcodec/mpeg12enc.c
index d131e482..ea45c070 100644
--- a/libavcodec/mpeg12enc.c
+++ b/libavcodec/mpeg12enc.c
@@ -42,17 +42,12 @@
 #include "mpegutils.h"
 #include "mpegvideo.h"
 
-
-static const uint8_t inv_non_linear_qscale[] = {
-    0, 2, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-};
-
 static const uint8_t svcd_scan_offset_placeholder[] = {
     0x10, 0x0E, 0x00, 0x80, 0x81, 0x00, 0x80,
     0x81, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 };
 
-static uint8_t mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
+static uint8_t mv_penalty[MAX_FCODE + 1][MAX_DMV * 2 + 1];
 static uint8_t fcode_tab[MAX_MV * 2 + 1];
 
 static uint8_t uni_mpeg1_ac_vlc_len[64 * 64 * 2];
@@ -144,9 +139,6 @@ static av_cold int encode_init(AVCodecContext *avctx)
 {
     MpegEncContext *s = avctx->priv_data;
 
-    if (avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO && avctx->height > 2800)
-        avctx->thread_count = 1;
-
     if (ff_mpv_encode_init(avctx) < 0)
         return -1;
 
@@ -205,7 +197,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
         }
     }
 
-    s->drop_frame_timecode = s->drop_frame_timecode || !!(avctx->flags2 & CODEC_FLAG2_DROP_FRAME_TIMECODE);
+    s->drop_frame_timecode = s->drop_frame_timecode || !!(avctx->flags2 & AV_CODEC_FLAG2_DROP_FRAME_TIMECODE);
     if (s->drop_frame_timecode)
         s->tc.flags |= AV_TIMECODE_FLAG_DROPFRAME;
     if (s->drop_frame_timecode && s->frame_rate_index != 4) {
@@ -214,16 +206,24 @@ static av_cold int encode_init(AVCodecContext *avctx)
         return -1;
     }
 
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->timecode_frame_start)
+        s->timecode_frame_start = avctx->timecode_frame_start;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     if (s->tc_opt_str) {
         AVRational rate = ff_mpeg12_frame_rate_tab[s->frame_rate_index];
         int ret = av_timecode_init_from_string(&s->tc, rate, s->tc_opt_str, s);
         if (ret < 0)
             return ret;
         s->drop_frame_timecode = !!(s->tc.flags & AV_TIMECODE_FLAG_DROPFRAME);
-        s->avctx->timecode_frame_start = s->tc.start;
+        s->timecode_frame_start = s->tc.start;
     } else {
-        s->avctx->timecode_frame_start = 0; // default is -1
+        s->timecode_frame_start = 0; // default is -1
     }
+
     return 0;
 }
 
@@ -371,7 +371,7 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
          * fake MPEG frame rate in case of low frame rate */
         fps       = (framerate.num + framerate.den / 2) / framerate.den;
         time_code = s->current_picture_ptr->f->coded_picture_number +
-                    s->avctx->timecode_frame_start;
+                    s->timecode_frame_start;
 
         s->gop_picture_number = s->current_picture_ptr->f->coded_picture_number;
 
@@ -384,7 +384,7 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
         put_bits(&s->pb, 1, 1);
         put_bits(&s->pb, 6, (uint32_t)((time_code / fps) % 60));
         put_bits(&s->pb, 6, (uint32_t)((time_code % fps)));
-        put_bits(&s->pb, 1, !!(s->avctx->flags & CODEC_FLAG_CLOSED_GOP) || s->intra_only || !s->gop_picture_number);
+        put_bits(&s->pb, 1, !!(s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) || s->intra_only || !s->gop_picture_number);
         put_bits(&s->pb, 1, 0);                     // broken link
     }
 }
@@ -401,12 +401,7 @@ static inline void encode_mb_skip_run(MpegEncContext *s, int run)
 
 static av_always_inline void put_qscale(MpegEncContext *s)
 {
-    if (s->q_scale_type) {
-        av_assert2(s->qscale >= 1 && s->qscale <= 12);
-        put_bits(&s->pb, 5, inv_non_linear_qscale[s->qscale]);
-    } else {
-        put_bits(&s->pb, 5, s->qscale);
-    }
+    put_bits(&s->pb, 5, s->qscale);
 }
 
 void ff_mpeg1_encode_slice_header(MpegEncContext *s)
@@ -1051,7 +1046,7 @@ av_cold void ff_mpeg1_encode_init(MpegEncContext *s)
         }
 
         for (f_code = 1; f_code <= MAX_FCODE; f_code++)
-            for (mv = -MAX_MV; mv <= MAX_MV; mv++) {
+            for (mv = -MAX_DMV; mv <= MAX_DMV; mv++) {
                 int len;
 
                 if (mv == 0) {
@@ -1074,7 +1069,7 @@ av_cold void ff_mpeg1_encode_init(MpegEncContext *s)
                               2 + bit_size;
                 }
 
-                mv_penalty[f_code][mv + MAX_MV] = len;
+                mv_penalty[f_code][mv + MAX_DMV] = len;
             }
 
 
@@ -1105,14 +1100,16 @@ av_cold void ff_mpeg1_encode_init(MpegEncContext *s)
 #define OFFSET(x) offsetof(MpegEncContext, x)
 #define VE AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
 #define COMMON_OPTS                                                           \
-    { "gop_timecode",        "MPEG GOP Timecode in hh:mm:ss[:;.]ff format",   \
+    { "gop_timecode",        "MPEG GOP Timecode in hh:mm:ss[:;.]ff format. Overrides timecode_frame_start.",   \
       OFFSET(tc_opt_str), AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX, VE },\
     { "intra_vlc",           "Use MPEG-2 intra VLC table.",                   \
-      OFFSET(intra_vlc_format),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, \
+      OFFSET(intra_vlc_format),    AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, \
     { "drop_frame_timecode", "Timecode is in drop frame format.",             \
-      OFFSET(drop_frame_timecode), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, \
+      OFFSET(drop_frame_timecode), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, \
     { "scan_offset",         "Reserve space for SVCD scan offset user data.", \
-      OFFSET(scan_offset),         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+      OFFSET(scan_offset),         AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, \
+    { "timecode_frame_start", "GOP timecode frame start number, in non-drop-frame format", \
+      OFFSET(timecode_frame_start), AV_OPT_TYPE_INT64, {.i64 = -1 }, -1, INT64_MAX, VE}, \
 
 static const AVOption mpeg1_options[] = {
     COMMON_OPTS
@@ -1122,8 +1119,8 @@ static const AVOption mpeg1_options[] = {
 
 static const AVOption mpeg2_options[] = {
     COMMON_OPTS
-    { "non_linear_quant", "Use nonlinear quantizer.",    OFFSET(q_scale_type),   AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
-    { "alternate_scan",   "Enable alternate scantable.", OFFSET(alternate_scan), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    { "non_linear_quant", "Use nonlinear quantizer.",    OFFSET(q_scale_type),   AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "alternate_scan",   "Enable alternate scantable.", OFFSET(alternate_scan), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
     { "seq_disp_ext",     "Write sequence_display_extension blocks.", OFFSET(seq_disp_ext), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE, "seq_disp_ext" },
     {     "auto",   NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = -1},  0, 0, VE, "seq_disp_ext" },
     {     "never",  NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = 0 },  0, 0, VE, "seq_disp_ext" },
@@ -1155,7 +1152,7 @@ AVCodec ff_mpeg1video_encoder = {
     .supported_framerates = ff_mpeg12_frame_rate_tab + 1,
     .pix_fmts             = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P,
                                                            AV_PIX_FMT_NONE },
-    .capabilities         = CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS,
+    .capabilities         = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS,
     .priv_class           = &mpeg1_class,
 };
 
@@ -1172,6 +1169,6 @@ AVCodec ff_mpeg2video_encoder = {
     .pix_fmts             = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P,
                                                            AV_PIX_FMT_YUV422P,
                                                            AV_PIX_FMT_NONE },
-    .capabilities         = CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS,
+    .capabilities         = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS,
     .priv_class           = &mpeg2_class,
 };
diff --git a/libavcodec/mpeg12vlc.h b/libavcodec/mpeg12vlc.h
new file mode 100644
index 00000000..31888fdd
--- /dev/null
+++ b/libavcodec/mpeg12vlc.h
@@ -0,0 +1,52 @@
+/*
+ * MPEG1/2 VLC
+ * copyright (c) 2000,2001 Fabrice Bellard
+ * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * MPEG1/2 VLC.
+ */
+
+#ifndef AVCODEC_MPEG12VLC_H
+#define AVCODEC_MPEG12VLC_H
+
+#include "get_bits.h"
+
+#define DC_VLC_BITS 9
+#define MV_VLC_BITS 9
+#define TEX_VLC_BITS 9
+
+#define MBINCR_VLC_BITS 9
+#define MB_PAT_VLC_BITS 9
+#define MB_PTYPE_VLC_BITS 6
+#define MB_BTYPE_VLC_BITS 6
+
+extern VLC ff_dc_lum_vlc;
+extern VLC ff_dc_chroma_vlc;
+extern VLC ff_mbincr_vlc;
+extern VLC ff_mb_ptype_vlc;
+extern VLC ff_mb_btype_vlc;
+extern VLC ff_mb_pat_vlc;
+extern VLC ff_mv_vlc;
+
+void ff_mpeg12_init_vlcs(void);
+
+#endif /* AVCODEC_MPEG12VLC_H */
diff --git a/libavcodec/mpeg4_unpack_bframes_bsf.c b/libavcodec/mpeg4_unpack_bframes_bsf.c
index e85ea08b..df49d3f4 100644
--- a/libavcodec/mpeg4_unpack_bframes_bsf.c
+++ b/libavcodec/mpeg4_unpack_bframes_bsf.c
@@ -73,11 +73,11 @@ static void scan_buffer(const uint8_t *buf, int buf_size,
 
 /* allocate new buffer and copy size bytes from src */
 static uint8_t *create_new_buffer(const uint8_t *src, int size) {
-    uint8_t *dst = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
+    uint8_t *dst = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
 
     if (dst) {
         memcpy(dst, src, size);
-        memset(dst + size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+        memset(dst + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
     }
 
     return dst;
diff --git a/libavcodec/mpeg4video.h b/libavcodec/mpeg4video.h
index 49bc13f8..5998c719 100644
--- a/libavcodec/mpeg4video.h
+++ b/libavcodec/mpeg4video.h
@@ -140,7 +140,7 @@ void ff_mpeg4_encode_mb(MpegEncContext *s,
 void ff_mpeg4_pred_ac(MpegEncContext *s, int16_t *block, int n,
                       int dir);
 void ff_set_mpeg4_time(MpegEncContext *s);
-void ff_mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
+int ff_mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
 
 int ff_mpeg4_decode_picture_header(Mpeg4DecContext *ctx, GetBitContext *gb);
 void ff_mpeg4_encode_video_packet_header(MpegEncContext *s);
diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index e151f9ee..527cbe96 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -22,6 +22,7 @@
 
 #define UNCHECKED_BITSTREAM_READER 1
 
+#include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "error_resilience.h"
 #include "idctdsp.h"
@@ -31,6 +32,7 @@
 #include "mpegvideodata.h"
 #include "mpeg4video.h"
 #include "h263.h"
+#include "profiles.h"
 #include "thread.h"
 #include "xvididct.h"
 
@@ -65,7 +67,7 @@ void ff_mpeg4_pred_ac(MpegEncContext *s, int16_t *block, int n, int dir)
     int8_t *const qscale_table = s->current_picture.qscale_table;
 
     /* find prediction */
-    ac_val  = s->ac_val[0][0] + s->block_index[n] * 16;
+    ac_val  = &s->ac_val[0][0][0] + s->block_index[n] * 16;
     ac_val1 = ac_val;
     if (s->ac_pred) {
         if (dir == 0) {
@@ -882,7 +884,7 @@ int ff_mpeg4_decode_partitions(Mpeg4DecContext *ctx)
     const int part_a_end   = s->pict_type == AV_PICTURE_TYPE_I ? (ER_DC_END   | ER_MV_END)   : ER_MV_END;
 
     mb_num = mpeg4_decode_partition_a(ctx);
-    if (mb_num < 0) {
+    if (mb_num <= 0) {
         ff_er_add_slice(&s->er, s->resync_mb_x, s->resync_mb_y,
                         s->mb_x, s->mb_y, part_a_error);
         return -1;
@@ -1298,7 +1300,7 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64])
     Mpeg4DecContext *ctx = (Mpeg4DecContext *)s;
     int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant;
     int16_t *mot_val;
-    static int8_t quant_tab[4] = { -1, -2, 1, 2 };
+    static const int8_t quant_tab[4] = { -1, -2, 1, 2 };
     const int xy = s->mb_x + s->mb_y * s->mb_stride;
 
     av_assert2(s->h263_pred);
@@ -1354,6 +1356,11 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64])
         else
             s->mcsel = 0;
         cbpy = get_vlc2(&s->gb, ff_h263_cbpy_vlc.table, CBPY_VLC_BITS, 1) ^ 0x0F;
+        if (cbpy < 0) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "P cbpy damaged at %d %d\n", s->mb_x, s->mb_y);
+            return AVERROR_INVALIDDATA;
+        }
 
         cbp = (cbpc & 3) | (cbpy << 2);
         if (dquant)
@@ -1875,6 +1882,10 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
                 int last = 0;
                 for (i = 0; i < 64; i++) {
                     int j;
+                    if (get_bits_left(gb) < 8) {
+                        av_log(s->avctx, AV_LOG_ERROR, "insufficient data for custom matrix\n");
+                        return AVERROR_INVALIDDATA;
+                    }
                     v = get_bits(gb, 8);
                     if (v == 0)
                         break;
@@ -1898,6 +1909,10 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
                 int last = 0;
                 for (i = 0; i < 64; i++) {
                     int j;
+                    if (get_bits_left(gb) < 8) {
+                        av_log(s->avctx, AV_LOG_ERROR, "insufficient data for custom matrix\n");
+                        return AVERROR_INVALIDDATA;
+                    }
                     v = get_bits(gb, 8);
                     if (v == 0)
                         break;
@@ -2223,7 +2238,7 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
 
     s->pict_type = get_bits(gb, 2) + AV_PICTURE_TYPE_I;        /* pict type: I = 0 , P = 1 */
     if (s->pict_type == AV_PICTURE_TYPE_B && s->low_delay &&
-        ctx->vol_control_parameters == 0 && !(s->avctx->flags & CODEC_FLAG_LOW_DELAY)) {
+        ctx->vol_control_parameters == 0 && !(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY)) {
         av_log(s->avctx, AV_LOG_ERROR, "low_delay flag set incorrectly, clearing it\n");
         s->low_delay = 0;
     }
@@ -2315,9 +2330,7 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
         pts = ROUNDED_DIV(s->time, s->avctx->framerate.den);
     else
         pts = AV_NOPTS_VALUE;
-    if (s->avctx->debug&FF_DEBUG_PTS)
-        av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %"PRId64"\n",
-               pts);
+    ff_dlog(s->avctx, "MPEG4 PTS: %"PRId64"\n", pts);
 
     check_marker(gb, "before vop_coded");
 
@@ -2602,7 +2615,7 @@ int ff_mpeg4_decode_picture_header(Mpeg4DecContext *ctx, GetBitContext *gb)
     }
 
 end:
-    if (s->avctx->flags & CODEC_FLAG_LOW_DELAY)
+    if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY)
         s->low_delay = 1;
     s->avctx->has_b_frames = !s->low_delay;
 
@@ -2665,7 +2678,7 @@ int ff_mpeg4_frame_end(AVCodecContext *avctx, const uint8_t *buf, int buf_size)
             if (!ctx->showed_packed_warning) {
                 av_log(s->avctx, AV_LOG_INFO, "Video uses a non-standard and "
                        "wasteful way to store B-frames ('packed B-frames'). "
-                       "Consider using the mpeg4_unpack_bframes bitstream filter to fix it.\n");
+                       "Consider using the mpeg4_unpack_bframes bitstream filter without encoding but stream copy to fix it.\n");
                 ctx->showed_packed_warning = 1;
             }
             av_fast_padded_malloc(&s->bitstream_buffer,
@@ -2684,6 +2697,7 @@ int ff_mpeg4_frame_end(AVCodecContext *avctx, const uint8_t *buf, int buf_size)
     return 0;
 }
 
+#if HAVE_THREADS
 static int mpeg4_update_thread_context(AVCodecContext *dst,
                                        const AVCodecContext *src)
 {
@@ -2703,6 +2717,7 @@ static int mpeg4_update_thread_context(AVCodecContext *dst,
 
     return 0;
 }
+#endif
 
 static av_cold int decode_init(AVCodecContext *avctx)
 {
@@ -2731,29 +2746,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static const AVProfile mpeg4_video_profiles[] = {
-    { FF_PROFILE_MPEG4_SIMPLE,                    "Simple Profile" },
-    { FF_PROFILE_MPEG4_SIMPLE_SCALABLE,           "Simple Scalable Profile" },
-    { FF_PROFILE_MPEG4_CORE,                      "Core Profile" },
-    { FF_PROFILE_MPEG4_MAIN,                      "Main Profile" },
-    { FF_PROFILE_MPEG4_N_BIT,                     "N-bit Profile" },
-    { FF_PROFILE_MPEG4_SCALABLE_TEXTURE,          "Scalable Texture Profile" },
-    { FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION,     "Simple Face Animation Profile" },
-    { FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE,    "Basic Animated Texture Profile" },
-    { FF_PROFILE_MPEG4_HYBRID,                    "Hybrid Profile" },
-    { FF_PROFILE_MPEG4_ADVANCED_REAL_TIME,        "Advanced Real Time Simple Profile" },
-    { FF_PROFILE_MPEG4_CORE_SCALABLE,             "Code Scalable Profile" },
-    { FF_PROFILE_MPEG4_ADVANCED_CODING,           "Advanced Coding Profile" },
-    { FF_PROFILE_MPEG4_ADVANCED_CORE,             "Advanced Core Profile" },
-    { FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE, "Advanced Scalable Texture Profile" },
-    { FF_PROFILE_MPEG4_SIMPLE_STUDIO,             "Simple Studio Profile" },
-    { FF_PROFILE_MPEG4_ADVANCED_SIMPLE,           "Advanced Simple Profile" },
-    { FF_PROFILE_UNKNOWN },
-};
-
 static const AVOption mpeg4_options[] = {
-    {"quarter_sample", "1/4 subpel MC", offsetof(MpegEncContext, quarter_sample), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 1, 0},
-    {"divx_packed", "divx style packed b frames", offsetof(MpegEncContext, divx_packed), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 1, 0},
+    {"quarter_sample", "1/4 subpel MC", offsetof(MpegEncContext, quarter_sample), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, 0},
+    {"divx_packed", "divx style packed b frames", offsetof(MpegEncContext, divx_packed), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, 0},
     {NULL}
 };
 
@@ -2773,19 +2768,19 @@ AVCodec ff_mpeg4_decoder = {
     .init                  = decode_init,
     .close                 = ff_h263_decode_end,
     .decode                = ff_h263_decode_frame,
-    .capabilities          = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 |
-                             CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY |
-                             CODEC_CAP_FRAME_THREADS,
+    .capabilities          = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1 |
+                             AV_CODEC_CAP_TRUNCATED | AV_CODEC_CAP_DELAY |
+                             AV_CODEC_CAP_FRAME_THREADS,
     .flush                 = ff_mpeg_flush,
     .max_lowres            = 3,
     .pix_fmts              = ff_h263_hwaccel_pixfmt_list_420,
-    .profiles              = NULL_IF_CONFIG_SMALL(mpeg4_video_profiles),
+    .profiles              = NULL_IF_CONFIG_SMALL(ff_mpeg4_video_profiles),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(mpeg4_update_thread_context),
     .priv_class = &mpeg4_class,
 };
 
 
-#if CONFIG_MPEG4_VDPAU_DECODER
+#if CONFIG_MPEG4_VDPAU_DECODER && FF_API_VDPAU
 static const AVClass mpeg4_vdpau_class = {
     "MPEG4 Video VDPAU Decoder",
     av_default_item_name,
@@ -2802,8 +2797,8 @@ AVCodec ff_mpeg4_vdpau_decoder = {
     .init           = decode_init,
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY |
-                      CODEC_CAP_HWACCEL_VDPAU,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_TRUNCATED | AV_CODEC_CAP_DELAY |
+                      AV_CODEC_CAP_HWACCEL_VDPAU,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_VDPAU_MPEG4,
                                                   AV_PIX_FMT_NONE },
     .priv_class     = &mpeg4_vdpau_class,
diff --git a/libavcodec/mpeg4videoenc.c b/libavcodec/mpeg4videoenc.c
index bca294eb..1ba92381 100644
--- a/libavcodec/mpeg4videoenc.c
+++ b/libavcodec/mpeg4videoenc.c
@@ -430,7 +430,7 @@ static inline void mpeg4_encode_blocks(MpegEncContext *s, int16_t block[6][64],
     int i;
 
     if (scan_table) {
-        if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT) {
+        if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT) {
             for (i = 0; i < 6; i++)
                 skip_put_bits(&s->pb,
                               mpeg4_get_block_length(s, block[i], i,
@@ -442,7 +442,7 @@ static inline void mpeg4_encode_blocks(MpegEncContext *s, int16_t block[6][64],
                                    intra_dc[i], scan_table[i], dc_pb, ac_pb);
         }
     } else {
-        if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT) {
+        if (s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT) {
             for (i = 0; i < 6; i++)
                 skip_put_bits(&s->pb,
                               mpeg4_get_block_length(s, block[i], i, 0,
@@ -507,7 +507,7 @@ void ff_mpeg4_encode_mb(MpegEncContext *s, int16_t block[6][64],
     PutBitContext *const pb2    = s->data_partitioning ? &s->pb2 : &s->pb;
     PutBitContext *const tex_pb = s->data_partitioning && s->pict_type != AV_PICTURE_TYPE_B ? &s->tex_pb : &s->pb;
     PutBitContext *const dc_pb  = s->data_partitioning && s->pict_type != AV_PICTURE_TYPE_I ? &s->pb2 : &s->pb;
-    const int interleaved_stats = (s->avctx->flags & CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0;
+    const int interleaved_stats = (s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0;
 
     if (!s->mb_intra) {
         int i, cbp;
@@ -832,7 +832,7 @@ void ff_mpeg4_encode_mb(MpegEncContext *s, int16_t block[6][64],
         for (i = 0; i < 6; i++)
             dc_diff[i] = ff_mpeg4_pred_dc(s, i, block[i][0], &dir[i], 1);
 
-        if (s->avctx->flags & CODEC_FLAG_AC_PRED) {
+        if (s->avctx->flags & AV_CODEC_FLAG_AC_PRED) {
             s->ac_pred = decide_ac_pred(s, block, dir, scan_table, zigzag_last_index);
         } else {
             for (i = 0; i < 6; i++)
@@ -932,7 +932,7 @@ static void mpeg4_encode_gop_header(MpegEncContext *s)
     put_bits(&s->pb, 1, 1);
     put_bits(&s->pb, 6, seconds);
 
-    put_bits(&s->pb, 1, !!(s->avctx->flags & CODEC_FLAG_CLOSED_GOP));
+    put_bits(&s->pb, 1, !!(s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP));
     put_bits(&s->pb, 1, 0);  // broken link == NO
 
     ff_mpeg4_stuffing(&s->pb);
@@ -1078,7 +1078,7 @@ static void mpeg4_encode_vol_header(MpegEncContext *s,
     ff_mpeg4_stuffing(&s->pb);
 
     /* user data */
-    if (!(s->avctx->flags & CODEC_FLAG_BITEXACT)) {
+    if (!(s->avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
         put_bits(&s->pb, 16, 0);
         put_bits(&s->pb, 16, 0x1B2);    /* user_data */
         avpriv_put_string(&s->pb, LIBAVCODEC_IDENT, 0);
@@ -1086,13 +1086,13 @@ static void mpeg4_encode_vol_header(MpegEncContext *s,
 }
 
 /* write mpeg4 VOP header */
-void ff_mpeg4_encode_picture_header(MpegEncContext *s, int picture_number)
+int ff_mpeg4_encode_picture_header(MpegEncContext *s, int picture_number)
 {
-    int time_incr;
-    int time_div, time_mod;
+    uint64_t time_incr;
+    int64_t time_div, time_mod;
 
     if (s->pict_type == AV_PICTURE_TYPE_I) {
-        if (!(s->avctx->flags & CODEC_FLAG_GLOBAL_HEADER)) {
+        if (!(s->avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
             if (s->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT)  // HACK, the reference sw is buggy
                 mpeg4_encode_visual_object_header(s);
             if (s->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT || picture_number == 0)  // HACK, the reference sw is buggy
@@ -1111,7 +1111,12 @@ void ff_mpeg4_encode_picture_header(MpegEncContext *s, int picture_number)
     time_div  = FFUDIV(s->time, s->avctx->time_base.den);
     time_mod  = FFUMOD(s->time, s->avctx->time_base.den);
     time_incr = time_div - s->last_time_base;
-    av_assert0(time_incr >= 0);
+
+    // This limits the frame duration to max 1 hour
+    if (time_incr > 3600) {
+        av_log(s->avctx, AV_LOG_ERROR, "time_incr %"PRIu64" too large\n", time_incr);
+        return AVERROR(EINVAL);
+    }
     while (time_incr--)
         put_bits(&s->pb, 1, 1);
 
@@ -1137,6 +1142,8 @@ void ff_mpeg4_encode_picture_header(MpegEncContext *s, int picture_number)
         put_bits(&s->pb, 3, s->f_code);  /* fcode_for */
     if (s->pict_type == AV_PICTURE_TYPE_B)
         put_bits(&s->pb, 3, s->b_code);  /* fcode_back */
+
+    return 0;
 }
 
 static av_cold void init_uni_dc_tab(void)
@@ -1325,7 +1332,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     s->y_dc_scale_table         = ff_mpeg4_y_dc_scale_table;
     s->c_dc_scale_table         = ff_mpeg4_c_dc_scale_table;
 
-    if (s->avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+    if (s->avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
         s->avctx->extradata = av_malloc(1024);
         init_put_bits(&s->pb, s->avctx->extradata, 1024);
 
@@ -1394,8 +1401,8 @@ void ff_mpeg4_encode_video_packet_header(MpegEncContext *s)
 #define OFFSET(x) offsetof(MpegEncContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "data_partitioning", "Use data partitioning.",      OFFSET(data_partitioning), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
-    { "alternate_scan",    "Enable alternate scantable.", OFFSET(alternate_scan),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    { "data_partitioning", "Use data partitioning.",      OFFSET(data_partitioning), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "alternate_scan",    "Enable alternate scantable.", OFFSET(alternate_scan),    AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
     FF_MPV_COMMON_OPTS
     { NULL },
 };
@@ -1417,6 +1424,6 @@ AVCodec ff_mpeg4_encoder = {
     .encode2        = ff_mpv_encode_picture,
     .close          = ff_mpv_encode_end,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS,
     .priv_class     = &mpeg4enc_class,
 };
diff --git a/libavcodec/mpeg_er.c b/libavcodec/mpeg_er.c
index 3d90582d..dd87ae9c 100644
--- a/libavcodec/mpeg_er.c
+++ b/libavcodec/mpeg_er.c
@@ -58,3 +58,74 @@ void ff_mpeg_er_frame_start(MpegEncContext *s)
 
     ff_er_frame_start(er);
 }
+
+static void mpeg_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
+                              int (*mv)[2][4][2], int mb_x, int mb_y,
+                              int mb_intra, int mb_skipped)
+{
+    MpegEncContext *s = opaque;
+
+    s->mv_dir     = mv_dir;
+    s->mv_type    = mv_type;
+    s->mb_intra   = mb_intra;
+    s->mb_skipped = mb_skipped;
+    s->mb_x       = mb_x;
+    s->mb_y       = mb_y;
+    memcpy(s->mv, mv, sizeof(*mv));
+
+    ff_init_block_index(s);
+    ff_update_block_index(s);
+
+    s->bdsp.clear_blocks(s->block[0]);
+
+    s->dest[0] = s->current_picture.f->data[0] +
+                 s->mb_y * 16 * s->linesize +
+                 s->mb_x * 16;
+    s->dest[1] = s->current_picture.f->data[1] +
+                 s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize +
+                 s->mb_x * (16 >> s->chroma_x_shift);
+    s->dest[2] = s->current_picture.f->data[2] +
+                 s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize +
+                 s->mb_x * (16 >> s->chroma_x_shift);
+
+    if (ref)
+        av_log(s->avctx, AV_LOG_DEBUG,
+               "Interlaced error concealment is not fully implemented\n");
+    ff_mpv_decode_mb(s, s->block);
+}
+
+int ff_mpeg_er_init(MpegEncContext *s)
+{
+    ERContext *er = &s->er;
+    int mb_array_size = s->mb_height * s->mb_stride;
+    int i;
+
+    er->avctx       = s->avctx;
+
+    er->mb_index2xy = s->mb_index2xy;
+    er->mb_num      = s->mb_num;
+    er->mb_width    = s->mb_width;
+    er->mb_height   = s->mb_height;
+    er->mb_stride   = s->mb_stride;
+    er->b8_stride   = s->b8_stride;
+
+    er->er_temp_buffer     = av_malloc(s->mb_height * s->mb_stride);
+    er->error_status_table = av_mallocz(mb_array_size);
+    if (!er->er_temp_buffer || !er->error_status_table)
+        goto fail;
+
+    er->mbskip_table  = s->mbskip_table;
+    er->mbintra_table = s->mbintra_table;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->dc_val); i++)
+        er->dc_val[i] = s->dc_val[i];
+
+    er->decode_mb = mpeg_er_decode_mb;
+    er->opaque    = s;
+
+    return 0;
+fail:
+    av_freep(&er->er_temp_buffer);
+    av_freep(&er->error_status_table);
+    return AVERROR(ENOMEM);
+}
diff --git a/libavcodec/mpeg_er.h b/libavcodec/mpeg_er.h
index bd74fbb0..bb627a4d 100644
--- a/libavcodec/mpeg_er.h
+++ b/libavcodec/mpeg_er.h
@@ -21,6 +21,7 @@
 
 #include "mpegvideo.h"
 
+int ff_mpeg_er_init(MpegEncContext *s);
 void ff_mpeg_er_frame_start(MpegEncContext *s);
 
 #endif /* AVCODEC_MPEG_ER_H */
diff --git a/libavcodec/mpegaudio_parser.c b/libavcodec/mpegaudio_parser.c
index 58098d89..873f9412 100644
--- a/libavcodec/mpegaudio_parser.c
+++ b/libavcodec/mpegaudio_parser.c
@@ -23,7 +23,7 @@
 #include "parser.h"
 #include "mpegaudiodecheader.h"
 #include "libavutil/common.h"
-
+#include "libavformat/id3v1.h" // for ID3v1_TAG_SIZE
 
 typedef struct MpegAudioParseContext {
     ParseContext pc;
@@ -35,7 +35,7 @@ typedef struct MpegAudioParseContext {
 
 #define MPA_HEADER_SIZE 4
 
-/* header + layer + bitrate + freq + lsf/mpeg25 */
+/* header + layer + freq + lsf/mpeg25 */
 #define SAME_HEADER_MASK \
    (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19))
 
@@ -49,6 +49,7 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
     uint32_t state= pc->state;
     int i;
     int next= END_NOT_FOUND;
+    int flush = !buf_size;
 
     for(i=0; i<buf_size; ){
         if(s->frame_size){
@@ -68,7 +69,7 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
 
                 state= (state<<8) + buf[i++];
 
-                ret = avpriv_mpa_decode_header2(state, &sr, &channels, &frame_size, &bit_rate, &codec_id);
+                ret = ff_mpa_decode_header(state, &sr, &channels, &frame_size, &bit_rate, &codec_id);
                 if (ret < 4) {
                     if (i > 4)
                         s->header_count = -2;
@@ -113,6 +114,12 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
         return buf_size;
     }
 
+    if (flush && buf_size >= ID3v1_TAG_SIZE && memcmp(buf, "TAG", 3) == 0) {
+        *poutbuf = NULL;
+        *poutbuf_size = 0;
+        return next;
+    }
+
     *poutbuf = buf;
     *poutbuf_size = buf_size;
     return next;
diff --git a/libavcodec/mpegaudio_tablegen.c b/libavcodec/mpegaudio_tablegen.c
index 90c9de43..ede7c8e2 100644
--- a/libavcodec/mpegaudio_tablegen.c
+++ b/libavcodec/mpegaudio_tablegen.c
@@ -22,6 +22,7 @@
 
 #include <stdlib.h>
 #define CONFIG_HARDCODED_TABLES 0
+#include "libavutil/tablegen.h"
 #include "mpegaudio_tablegen.h"
 #include "tableprint.h"
 
diff --git a/libavcodec/mpegaudio_tablegen.h b/libavcodec/mpegaudio_tablegen.h
index 86b2cd32..0b0ea406 100644
--- a/libavcodec/mpegaudio_tablegen.h
+++ b/libavcodec/mpegaudio_tablegen.h
@@ -45,14 +45,28 @@ static float expval_table_float[512][16];
 static av_cold void mpegaudio_tableinit(void)
 {
     int i, value, exponent;
+    static const double exp2_lut[4] = {
+        1.00000000000000000000, /* 2 ^ (0 * 0.25) */
+        1.18920711500272106672, /* 2 ^ (1 * 0.25) */
+        M_SQRT2               , /* 2 ^ (2 * 0.25) */
+        1.68179283050742908606, /* 2 ^ (3 * 0.25) */
+    };
+    static double pow43_lut[16];
+    double exp2_base = 2.11758236813575084767080625169910490512847900390625e-22; // 2^(-72)
+    double exp2_val;
+    double pow43_val = 0;
+    for (i = 0; i < 16; ++i)
+        pow43_lut[i] = i * cbrt(i);
+
     for (i = 1; i < TABLE_4_3_SIZE; i++) {
-        double value = i / 4;
         double f, fm;
         int e, m;
-        /* cbrtf() isn't available on all systems, so we use powf(). */
-        f  = value / IMDCT_SCALAR * pow(value, 1.0 / 3.0) * pow(2, (i & 3) * 0.25);
+        double value = i / 4;
+        if ((i & 3) == 0)
+            pow43_val = value / IMDCT_SCALAR * cbrt(value);
+        f  = pow43_val * exp2_lut[i & 3];
         fm = frexp(f, &e);
-        m  = (uint32_t)(fm * (1LL << 31) + 0.5);
+        m  = llrint(fm * (1LL << 31));
         e += FRAC_BITS - 31 + 5 - 100;
 
         /* normalized to FRAC_BITS */
@@ -60,11 +74,12 @@ static av_cold void mpegaudio_tableinit(void)
         table_4_3_exp[i]   = -e;
     }
     for (exponent = 0; exponent < 512; exponent++) {
+        if (exponent && (exponent & 3) == 0)
+            exp2_base *= 2;
+        exp2_val = exp2_base * exp2_lut[exponent & 3] / IMDCT_SCALAR;
         for (value = 0; value < 16; value++) {
-            /* cbrtf() isn't available on all systems, so we use powf(). */
-            double f = (double)value * pow(value, 1.0 / 3.0) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5) / IMDCT_SCALAR;
-            /* llrint() isn't always available, so round and cast manually. */
-            expval_table_fixed[exponent][value] = (long long int) (f < 0xFFFFFFFF ? floor(f + 0.5) : 0xFFFFFFFF);
+            double f = pow43_lut[value] * exp2_val;
+            expval_table_fixed[exponent][value] = (f < 0xFFFFFFFF ? llrint(f) : 0xFFFFFFFF);
             expval_table_float[exponent][value] = f;
         }
         exp_table_fixed[exponent] = expval_table_fixed[exponent][1];
diff --git a/libavcodec/mpegaudiodec_fixed.c b/libavcodec/mpegaudiodec_fixed.c
index 904c8856..9421ffbe 100644
--- a/libavcodec/mpegaudiodec_fixed.c
+++ b/libavcodec/mpegaudiodec_fixed.c
@@ -47,7 +47,7 @@ AVCodec ff_mp1_decoder = {
     .priv_data_size = sizeof(MPADecodeContext),
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = flush,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_S16,
@@ -63,7 +63,7 @@ AVCodec ff_mp2_decoder = {
     .priv_data_size = sizeof(MPADecodeContext),
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = flush,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_S16,
@@ -79,7 +79,7 @@ AVCodec ff_mp3_decoder = {
     .priv_data_size = sizeof(MPADecodeContext),
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = flush,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_S16,
@@ -95,7 +95,7 @@ AVCodec ff_mp3adu_decoder = {
     .priv_data_size = sizeof(MPADecodeContext),
     .init           = decode_init,
     .decode         = decode_frame_adu,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = flush,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_S16,
@@ -112,7 +112,7 @@ AVCodec ff_mp3on4_decoder = {
     .init           = decode_init_mp3on4,
     .close          = decode_close_mp3on4,
     .decode         = decode_frame_mp3on4,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = flush_mp3on4,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/mpegaudiodec_float.c b/libavcodec/mpegaudiodec_float.c
index f432c832..ddfa5e0d 100644
--- a/libavcodec/mpegaudiodec_float.c
+++ b/libavcodec/mpegaudiodec_float.c
@@ -48,7 +48,7 @@ AVCodec ff_mp1float_decoder = {
     .init           = decode_init,
     .close          = decode_close,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = flush,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_FLT,
@@ -65,7 +65,7 @@ AVCodec ff_mp2float_decoder = {
     .init           = decode_init,
     .decode         = decode_frame,
     .close          = decode_close,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = flush,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_FLT,
@@ -82,7 +82,7 @@ AVCodec ff_mp3float_decoder = {
     .init           = decode_init,
     .close          = decode_close,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = flush,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_FLT,
@@ -99,7 +99,7 @@ AVCodec ff_mp3adufloat_decoder = {
     .init           = decode_init,
     .close          = decode_close,
     .decode         = decode_frame_adu,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = flush,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_FLT,
@@ -116,7 +116,7 @@ AVCodec ff_mp3on4float_decoder = {
     .init           = decode_init_mp3on4,
     .close          = decode_close_mp3on4,
     .decode         = decode_frame_mp3on4,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = flush_mp3on4,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/mpegaudiodec_template.c b/libavcodec/mpegaudiodec_template.c
index 8c43f41e..5e3fe7e9 100644
--- a/libavcodec/mpegaudiodec_template.c
+++ b/libavcodec/mpegaudiodec_template.c
@@ -429,7 +429,7 @@ static av_cold int decode_init(AVCodecContext * avctx)
     s->avctx = avctx;
 
 #if USE_FLOATS
-    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp)
         return AVERROR(ENOMEM);
 #endif
@@ -816,13 +816,6 @@ static void exponents_from_scale_factors(MPADecodeContext *s, GranuleDef *g,
     }
 }
 
-/* handle n = 0 too */
-static inline int get_bitsz(GetBitContext *s, int n)
-{
-    return n ? get_bits(s, n) : 0;
-}
-
-
 static void switch_buffer(MPADecodeContext *s, int *pos, int *end_pos,
                           int *end_pos2)
 {
@@ -1172,9 +1165,9 @@ static void compute_stereo(MPADecodeContext *s, GranuleDef *g0, GranuleDef *g1)
 #   include "mips/compute_antialias_float.h"
 #endif /* HAVE_MIPSFPU */
 #else
-#if HAVE_MIPSDSPR1
+#if HAVE_MIPSDSP
 #   include "mips/compute_antialias_fixed.h"
-#endif /* HAVE_MIPSDSPR1 */
+#endif /* HAVE_MIPSDSP */
 #endif /* USE_FLOATS */
 
 #ifndef compute_antialias
@@ -1657,9 +1650,11 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr,
     uint32_t header;
     int ret;
 
+    int skipped = 0;
     while(buf_size && !*buf){
         buf++;
         buf_size--;
+        skipped++;
     }
 
     if (buf_size < HEADER_SIZE)
@@ -1670,12 +1665,11 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr,
         av_log(avctx, AV_LOG_DEBUG, "discarding ID3 tag\n");
         return buf_size;
     }
-    if (ff_mpa_check_header(header) < 0) {
+    ret = avpriv_mpegaudio_decode_header((MPADecodeHeader *)s, header);
+    if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Header missing\n");
         return AVERROR_INVALIDDATA;
-    }
-
-    if (avpriv_mpegaudio_decode_header((MPADecodeHeader *)s, header) == 1) {
+    } else if (ret == 1) {
         /* free format: prepare to compute frame size */
         s->frame_size = -1;
         return AVERROR_INVALIDDATA;
@@ -1686,7 +1680,7 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr,
     if (!avctx->bit_rate)
         avctx->bit_rate = s->bit_rate;
 
-    if (s->frame_size <= 0 || s->frame_size > buf_size) {
+    if (s->frame_size <= 0) {
         av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
         return AVERROR_INVALIDDATA;
     } else if (s->frame_size < buf_size) {
@@ -1714,7 +1708,7 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr,
             return ret;
     }
     s->frame_size = 0;
-    return buf_size;
+    return buf_size + skipped;
 }
 
 static void mp_flush(MPADecodeContext *ctx)
@@ -1756,12 +1750,11 @@ static int decode_frame_adu(AVCodecContext *avctx, void *data,
     // Get header and restore sync word
     header = AV_RB32(buf) | 0xffe00000;
 
-    if (ff_mpa_check_header(header) < 0) { // Bad header, discard frame
+    ret = avpriv_mpegaudio_decode_header((MPADecodeHeader *)s, header);
+    if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Invalid frame header\n");
-        return AVERROR_INVALIDDATA;
+        return ret;
     }
-
-    avpriv_mpegaudio_decode_header((MPADecodeHeader *)s, header);
     /* update codec info */
     avctx->sample_rate = s->sample_rate;
     avctx->channels    = s->nb_channels;
@@ -1952,13 +1945,12 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
         }
         header = (AV_RB32(buf) & 0x000fffff) | s->syncword; // patch header
 
-        if (ff_mpa_check_header(header) < 0) {
+        ret = avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header);
+        if (ret < 0) {
             av_log(avctx, AV_LOG_ERROR, "Bad header, discard block\n");
             return AVERROR_INVALIDDATA;
         }
 
-        avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header);
-
         if (ch + m->nb_channels > avctx->channels ||
             s->coff[fr] + m->nb_channels > avctx->channels) {
             av_log(avctx, AV_LOG_ERROR, "frame channel count exceeds codec "
diff --git a/libavcodec/mpegaudiodecheader.c b/libavcodec/mpegaudiodecheader.c
index d522c064..ae86b087 100644
--- a/libavcodec/mpegaudiodecheader.c
+++ b/libavcodec/mpegaudiodecheader.c
@@ -37,6 +37,12 @@ int avpriv_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header)
 {
     int sample_rate, frame_size, mpeg25, padding;
     int sample_rate_index, bitrate_index;
+    int ret;
+
+    ret = ff_mpa_check_header(header);
+    if (ret < 0)
+        return ret;
+
     if (header & (1<<20)) {
         s->lsf = (header & (1<<19)) ? 0 : 1;
         mpeg25 = 0;
@@ -113,13 +119,10 @@ int avpriv_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header)
     return 0;
 }
 
-int avpriv_mpa_decode_header2(uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate, enum AVCodecID *codec_id)
+int ff_mpa_decode_header(uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate, enum AVCodecID *codec_id)
 {
     MPADecodeHeader s1, *s = &s1;
 
-    if (ff_mpa_check_header(head) != 0)
-        return -1;
-
     if (avpriv_mpegaudio_decode_header(s, head) != 0) {
         return -1;
     }
@@ -150,7 +153,14 @@ int avpriv_mpa_decode_header2(uint32_t head, int *sample_rate, int *channels, in
     return s->frame_size;
 }
 
+#if LIBAVCODEC_VERSION_MAJOR < 58
+int avpriv_mpa_decode_header2(uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate, enum AVCodecID *codec_id)
+{
+    return ff_mpa_decode_header(head, sample_rate, channels, frame_size, bit_rate, codec_id);
+}
+
 int avpriv_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate)
 {
-    return avpriv_mpa_decode_header2(head, sample_rate, channels, frame_size, bit_rate, &avctx->codec_id);
+    return ff_mpa_decode_header(head, sample_rate, channels, frame_size, bit_rate, &avctx->codec_id);
 }
+#endif
diff --git a/libavcodec/mpegaudiodecheader.h b/libavcodec/mpegaudiodecheader.h
index 444b85f2..55d5a156 100644
--- a/libavcodec/mpegaudiodecheader.h
+++ b/libavcodec/mpegaudiodecheader.h
@@ -54,9 +54,13 @@ int avpriv_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header);
 
 /* useful helper to get mpeg audio stream infos. Return -1 if error in
    header, otherwise the coded frame size in bytes */
-int avpriv_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
+int ff_mpa_decode_header(uint32_t head, int *sample_rate,
+                         int *channels, int *frame_size, int *bitrate, enum AVCodecID *codec_id);
 
+#if LIBAVCODEC_VERSION_MAJOR < 58
+int avpriv_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
 int avpriv_mpa_decode_header2(uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate, enum AVCodecID *codec_id);
+#endif
 
 /* fast header check for resync */
 static inline int ff_mpa_check_header(uint32_t header){
diff --git a/libavcodec/mpegaudiodsp.c b/libavcodec/mpegaudiodsp.c
index 5fe34448..a5d20df6 100644
--- a/libavcodec/mpegaudiodsp.c
+++ b/libavcodec/mpegaudiodsp.c
@@ -46,5 +46,5 @@ av_cold void ff_mpadsp_init(MPADSPContext *s)
     if (ARCH_PPC)     ff_mpadsp_init_ppc(s);
     if (ARCH_X86)     ff_mpadsp_init_x86(s);
     if (HAVE_MIPSFPU)   ff_mpadsp_init_mipsfpu(s);
-    if (HAVE_MIPSDSPR1) ff_mpadsp_init_mipsdspr1(s);
+    if (HAVE_MIPSDSP) ff_mpadsp_init_mipsdsp(s);
 }
diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h
index a722a2f3..b827163d 100644
--- a/libavcodec/mpegaudiodsp.h
+++ b/libavcodec/mpegaudiodsp.h
@@ -60,7 +60,7 @@ void ff_mpadsp_init_arm(MPADSPContext *s);
 void ff_mpadsp_init_ppc(MPADSPContext *s);
 void ff_mpadsp_init_x86(MPADSPContext *s);
 void ff_mpadsp_init_mipsfpu(MPADSPContext *s);
-void ff_mpadsp_init_mipsdspr1(MPADSPContext *s);
+void ff_mpadsp_init_mipsdsp(MPADSPContext *s);
 
 void ff_mpa_synth_init_float(float *window);
 void ff_mpa_synth_init_fixed(int32_t *window);
diff --git a/libavcodec/mpegaudioenc_template.c b/libavcodec/mpegaudioenc_template.c
index 5a0897f0..b91d0a88 100644
--- a/libavcodec/mpegaudioenc_template.c
+++ b/libavcodec/mpegaudioenc_template.c
@@ -244,11 +244,11 @@ static void idct32(int *out, int *tab)
     do {
         int x1, x2, x3, x4;
 
-        x3 = MUL(t[16], FIX(SQRT2*0.5));
+        x3 = MUL(t[16], FIX(M_SQRT2*0.5));
         x4 = t[0] - x3;
         x3 = t[0] + x3;
 
-        x2 = MUL(-(t[24] + t[8]), FIX(SQRT2*0.5));
+        x2 = MUL(-(t[24] + t[8]), FIX(M_SQRT2*0.5));
         x1 = MUL((t[8] - x2), xp[0]);
         x2 = MUL((t[8] + x2), xp[1]);
 
@@ -763,7 +763,7 @@ static int MPA_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     }
     compute_bit_allocation(s, smr, bit_alloc, &padding);
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, MPA_MAX_CODED_FRAME_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, MPA_MAX_CODED_FRAME_SIZE, 0)) < 0)
         return ret;
 
     init_put_bits(&s->pb, avpkt->data, avpkt->size);
diff --git a/libavcodec/mpegaudiotab.h b/libavcodec/mpegaudiotab.h
index 42d42d88..bb2e5de4 100644
--- a/libavcodec/mpegaudiotab.h
+++ b/libavcodec/mpegaudiotab.h
@@ -33,8 +33,6 @@
 #include <stdint.h>
 #include "mpegaudio.h"
 
-#define SQRT2 1.41421356237309514547
-
 static const int costab32[30] = {
     FIX(0.54119610014619701222),
     FIX(1.3065629648763763537),
diff --git a/libavcodec/mpegpicture.c b/libavcodec/mpegpicture.c
new file mode 100644
index 00000000..16b8f52f
--- /dev/null
+++ b/libavcodec/mpegpicture.c
@@ -0,0 +1,473 @@
+/*
+ * Mpeg video formats-related picture management functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+
+#include "avcodec.h"
+#include "motion_est.h"
+#include "mpegpicture.h"
+#include "mpegutils.h"
+
+static int make_tables_writable(Picture *pic)
+{
+    int ret, i;
+#define MAKE_WRITABLE(table) \
+do {\
+    if (pic->table &&\
+       (ret = av_buffer_make_writable(&pic->table)) < 0)\
+    return ret;\
+} while (0)
+
+    MAKE_WRITABLE(mb_var_buf);
+    MAKE_WRITABLE(mc_mb_var_buf);
+    MAKE_WRITABLE(mb_mean_buf);
+    MAKE_WRITABLE(mbskip_table_buf);
+    MAKE_WRITABLE(qscale_table_buf);
+    MAKE_WRITABLE(mb_type_buf);
+
+    for (i = 0; i < 2; i++) {
+        MAKE_WRITABLE(motion_val_buf[i]);
+        MAKE_WRITABLE(ref_index_buf[i]);
+    }
+
+    return 0;
+}
+
+int ff_mpeg_framesize_alloc(AVCodecContext *avctx, MotionEstContext *me,
+                            ScratchpadContext *sc, int linesize)
+{
+    int alloc_size = FFALIGN(FFABS(linesize) + 64, 32);
+
+    if (avctx->hwaccel
+#if FF_API_CAP_VDPAU
+        || avctx->codec->capabilities & AV_CODEC_CAP_HWACCEL_VDPAU
+#endif
+        )
+        return 0;
+
+    if (linesize < 24) {
+        av_log(avctx, AV_LOG_ERROR, "Image too small, temporary buffers cannot function\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    // edge emu needs blocksize + filter length - 1
+    // (= 17x17 for  halfpel / 21x21 for  h264)
+    // VC1 computes luma and chroma simultaneously and needs 19X19 + 9x9
+    // at uvlinesize. It supports only YUV420 so 24x24 is enough
+    // linesize * interlaced * MBsize
+    // we also use this buffer for encoding in encode_mb_internal() needig an additional 32 lines
+    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, sc->edge_emu_buffer, alloc_size, 4 * 68,
+                      fail);
+
+    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, me->scratchpad, alloc_size, 4 * 16 * 2,
+                      fail)
+    me->temp            = me->scratchpad;
+    sc->rd_scratchpad   = me->scratchpad;
+    sc->b_scratchpad    = me->scratchpad;
+    sc->obmc_scratchpad = me->scratchpad + 16;
+
+    return 0;
+fail:
+    av_freep(&sc->edge_emu_buffer);
+    return AVERROR(ENOMEM);
+}
+
+/**
+ * Allocate a frame buffer
+ */
+static int alloc_frame_buffer(AVCodecContext *avctx,  Picture *pic,
+                              MotionEstContext *me, ScratchpadContext *sc,
+                              int chroma_x_shift, int chroma_y_shift,
+                              int linesize, int uvlinesize)
+{
+    int edges_needed = av_codec_is_encoder(avctx->codec);
+    int r, ret;
+
+    pic->tf.f = pic->f;
+    if (avctx->codec_id != AV_CODEC_ID_WMV3IMAGE &&
+        avctx->codec_id != AV_CODEC_ID_VC1IMAGE  &&
+        avctx->codec_id != AV_CODEC_ID_MSS2) {
+        if (edges_needed) {
+            pic->f->width  = avctx->width  + 2 * EDGE_WIDTH;
+            pic->f->height = avctx->height + 2 * EDGE_WIDTH;
+        }
+
+        r = ff_thread_get_buffer(avctx, &pic->tf,
+                                 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
+    } else {
+        pic->f->width  = avctx->width;
+        pic->f->height = avctx->height;
+        pic->f->format = avctx->pix_fmt;
+        r = avcodec_default_get_buffer2(avctx, pic->f, 0);
+    }
+
+    if (r < 0 || !pic->f->buf[0]) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed (%d %p)\n",
+               r, pic->f->data[0]);
+        return -1;
+    }
+
+    if (edges_needed) {
+        int i;
+        for (i = 0; pic->f->data[i]; i++) {
+            int offset = (EDGE_WIDTH >> (i ? chroma_y_shift : 0)) *
+                         pic->f->linesize[i] +
+                         (EDGE_WIDTH >> (i ? chroma_x_shift : 0));
+            pic->f->data[i] += offset;
+        }
+        pic->f->width  = avctx->width;
+        pic->f->height = avctx->height;
+    }
+
+    if (avctx->hwaccel) {
+        assert(!pic->hwaccel_picture_private);
+        if (avctx->hwaccel->frame_priv_data_size) {
+            pic->hwaccel_priv_buf = av_buffer_allocz(avctx->hwaccel->frame_priv_data_size);
+            if (!pic->hwaccel_priv_buf) {
+                av_log(avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n");
+                return -1;
+            }
+            pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data;
+        }
+    }
+
+    if (linesize && (linesize   != pic->f->linesize[0] ||
+                     uvlinesize != pic->f->linesize[1])) {
+        av_log(avctx, AV_LOG_ERROR,
+               "get_buffer() failed (stride changed)\n");
+        ff_mpeg_unref_picture(avctx, pic);
+        return -1;
+    }
+
+    if (pic->f->linesize[1] != pic->f->linesize[2]) {
+        av_log(avctx, AV_LOG_ERROR,
+               "get_buffer() failed (uv stride mismatch)\n");
+        ff_mpeg_unref_picture(avctx, pic);
+        return -1;
+    }
+
+    if (!sc->edge_emu_buffer &&
+        (ret = ff_mpeg_framesize_alloc(avctx, me, sc,
+                                       pic->f->linesize[0])) < 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "get_buffer() failed to allocate context scratch buffers.\n");
+        ff_mpeg_unref_picture(avctx, pic);
+        return ret;
+    }
+
+    return 0;
+}
+
+static int alloc_picture_tables(AVCodecContext *avctx, Picture *pic, int encoding, int out_format,
+                                int mb_stride, int mb_width, int mb_height, int b8_stride)
+{
+    const int big_mb_num    = mb_stride * (mb_height + 1) + 1;
+    const int mb_array_size = mb_stride * mb_height;
+    const int b8_array_size = b8_stride * mb_height * 2;
+    int i;
+
+
+    pic->mbskip_table_buf = av_buffer_allocz(mb_array_size + 2);
+    pic->qscale_table_buf = av_buffer_allocz(big_mb_num + mb_stride);
+    pic->mb_type_buf      = av_buffer_allocz((big_mb_num + mb_stride) *
+                                             sizeof(uint32_t));
+    if (!pic->mbskip_table_buf || !pic->qscale_table_buf || !pic->mb_type_buf)
+        return AVERROR(ENOMEM);
+
+    if (encoding) {
+        pic->mb_var_buf    = av_buffer_allocz(mb_array_size * sizeof(int16_t));
+        pic->mc_mb_var_buf = av_buffer_allocz(mb_array_size * sizeof(int16_t));
+        pic->mb_mean_buf   = av_buffer_allocz(mb_array_size);
+        if (!pic->mb_var_buf || !pic->mc_mb_var_buf || !pic->mb_mean_buf)
+            return AVERROR(ENOMEM);
+    }
+
+    if (out_format == FMT_H263 || encoding || avctx->debug_mv ||
+        (avctx->flags2 & AV_CODEC_FLAG2_EXPORT_MVS)) {
+        int mv_size        = 2 * (b8_array_size + 4) * sizeof(int16_t);
+        int ref_index_size = 4 * mb_array_size;
+
+        for (i = 0; mv_size && i < 2; i++) {
+            pic->motion_val_buf[i] = av_buffer_allocz(mv_size);
+            pic->ref_index_buf[i]  = av_buffer_allocz(ref_index_size);
+            if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i])
+                return AVERROR(ENOMEM);
+        }
+    }
+
+    pic->alloc_mb_width  = mb_width;
+    pic->alloc_mb_height = mb_height;
+
+    return 0;
+}
+
+/**
+ * Allocate a Picture.
+ * The pixels are allocated/set by calling get_buffer() if shared = 0
+ */
+int ff_alloc_picture(AVCodecContext *avctx, Picture *pic, MotionEstContext *me,
+                     ScratchpadContext *sc, int shared, int encoding,
+                     int chroma_x_shift, int chroma_y_shift, int out_format,
+                     int mb_stride, int mb_width, int mb_height, int b8_stride,
+                     ptrdiff_t *linesize, ptrdiff_t *uvlinesize)
+{
+    int i, ret;
+
+    if (pic->qscale_table_buf)
+        if (   pic->alloc_mb_width  != mb_width
+            || pic->alloc_mb_height != mb_height)
+            ff_free_picture_tables(pic);
+
+    if (shared) {
+        av_assert0(pic->f->data[0]);
+        pic->shared = 1;
+    } else {
+        av_assert0(!pic->f->buf[0]);
+        if (alloc_frame_buffer(avctx, pic, me, sc,
+                               chroma_x_shift, chroma_y_shift,
+                               *linesize, *uvlinesize) < 0)
+            return -1;
+
+        *linesize   = pic->f->linesize[0];
+        *uvlinesize = pic->f->linesize[1];
+    }
+
+    if (!pic->qscale_table_buf)
+        ret = alloc_picture_tables(avctx, pic, encoding, out_format,
+                                   mb_stride, mb_width, mb_height, b8_stride);
+    else
+        ret = make_tables_writable(pic);
+    if (ret < 0)
+        goto fail;
+
+    if (encoding) {
+        pic->mb_var    = (uint16_t*)pic->mb_var_buf->data;
+        pic->mc_mb_var = (uint16_t*)pic->mc_mb_var_buf->data;
+        pic->mb_mean   = pic->mb_mean_buf->data;
+    }
+
+    pic->mbskip_table = pic->mbskip_table_buf->data;
+    pic->qscale_table = pic->qscale_table_buf->data + 2 * mb_stride + 1;
+    pic->mb_type      = (uint32_t*)pic->mb_type_buf->data + 2 * mb_stride + 1;
+
+    if (pic->motion_val_buf[0]) {
+        for (i = 0; i < 2; i++) {
+            pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
+            pic->ref_index[i]  = pic->ref_index_buf[i]->data;
+        }
+    }
+
+    return 0;
+fail:
+    av_log(avctx, AV_LOG_ERROR, "Error allocating a picture.\n");
+    ff_mpeg_unref_picture(avctx, pic);
+    ff_free_picture_tables(pic);
+    return AVERROR(ENOMEM);
+}
+
+/**
+ * Deallocate a picture.
+ */
+void ff_mpeg_unref_picture(AVCodecContext *avctx, Picture *pic)
+{
+    int off = offsetof(Picture, mb_mean) + sizeof(pic->mb_mean);
+
+    pic->tf.f = pic->f;
+    /* WM Image / Screen codecs allocate internal buffers with different
+     * dimensions / colorspaces; ignore user-defined callbacks for these. */
+    if (avctx->codec_id != AV_CODEC_ID_WMV3IMAGE &&
+        avctx->codec_id != AV_CODEC_ID_VC1IMAGE  &&
+        avctx->codec_id != AV_CODEC_ID_MSS2)
+        ff_thread_release_buffer(avctx, &pic->tf);
+    else if (pic->f)
+        av_frame_unref(pic->f);
+
+    av_buffer_unref(&pic->hwaccel_priv_buf);
+
+    if (pic->needs_realloc)
+        ff_free_picture_tables(pic);
+
+    memset((uint8_t*)pic + off, 0, sizeof(*pic) - off);
+}
+
+int ff_update_picture_tables(Picture *dst, Picture *src)
+{
+     int i;
+
+#define UPDATE_TABLE(table)                                                   \
+do {                                                                          \
+    if (src->table &&                                                         \
+        (!dst->table || dst->table->buffer != src->table->buffer)) {          \
+        av_buffer_unref(&dst->table);                                         \
+        dst->table = av_buffer_ref(src->table);                               \
+        if (!dst->table) {                                                    \
+            ff_free_picture_tables(dst);                                      \
+            return AVERROR(ENOMEM);                                           \
+        }                                                                     \
+    }                                                                         \
+} while (0)
+
+    UPDATE_TABLE(mb_var_buf);
+    UPDATE_TABLE(mc_mb_var_buf);
+    UPDATE_TABLE(mb_mean_buf);
+    UPDATE_TABLE(mbskip_table_buf);
+    UPDATE_TABLE(qscale_table_buf);
+    UPDATE_TABLE(mb_type_buf);
+    for (i = 0; i < 2; i++) {
+        UPDATE_TABLE(motion_val_buf[i]);
+        UPDATE_TABLE(ref_index_buf[i]);
+    }
+
+    dst->mb_var        = src->mb_var;
+    dst->mc_mb_var     = src->mc_mb_var;
+    dst->mb_mean       = src->mb_mean;
+    dst->mbskip_table  = src->mbskip_table;
+    dst->qscale_table  = src->qscale_table;
+    dst->mb_type       = src->mb_type;
+    for (i = 0; i < 2; i++) {
+        dst->motion_val[i] = src->motion_val[i];
+        dst->ref_index[i]  = src->ref_index[i];
+    }
+
+    dst->alloc_mb_width  = src->alloc_mb_width;
+    dst->alloc_mb_height = src->alloc_mb_height;
+
+    return 0;
+}
+
+int ff_mpeg_ref_picture(AVCodecContext *avctx, Picture *dst, Picture *src)
+{
+    int ret;
+
+    av_assert0(!dst->f->buf[0]);
+    av_assert0(src->f->buf[0]);
+
+    src->tf.f = src->f;
+    dst->tf.f = dst->f;
+    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
+    if (ret < 0)
+        goto fail;
+
+    ret = ff_update_picture_tables(dst, src);
+    if (ret < 0)
+        goto fail;
+
+    if (src->hwaccel_picture_private) {
+        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
+        if (!dst->hwaccel_priv_buf)
+            goto fail;
+        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
+    }
+
+    dst->field_picture           = src->field_picture;
+    dst->mb_var_sum              = src->mb_var_sum;
+    dst->mc_mb_var_sum           = src->mc_mb_var_sum;
+    dst->b_frame_score           = src->b_frame_score;
+    dst->needs_realloc           = src->needs_realloc;
+    dst->reference               = src->reference;
+    dst->shared                  = src->shared;
+
+    memcpy(dst->encoding_error, src->encoding_error,
+           sizeof(dst->encoding_error));
+
+    return 0;
+fail:
+    ff_mpeg_unref_picture(avctx, dst);
+    return ret;
+}
+
+static inline int pic_is_unused(Picture *pic)
+{
+    if (!pic->f->buf[0])
+        return 1;
+    if (pic->needs_realloc && !(pic->reference & DELAYED_PIC_REF))
+        return 1;
+    return 0;
+}
+
+static int find_unused_picture(AVCodecContext *avctx, Picture *picture, int shared)
+{
+    int i;
+
+    if (shared) {
+        for (i = 0; i < MAX_PICTURE_COUNT; i++) {
+            if (!picture[i].f->buf[0])
+                return i;
+        }
+    } else {
+        for (i = 0; i < MAX_PICTURE_COUNT; i++) {
+            if (pic_is_unused(&picture[i]))
+                return i;
+        }
+    }
+
+    av_log(avctx, AV_LOG_FATAL,
+           "Internal error, picture buffer overflow\n");
+    /* We could return -1, but the codec would crash trying to draw into a
+     * non-existing frame anyway. This is safer than waiting for a random crash.
+     * Also the return of this is never useful, an encoder must only allocate
+     * as much as allowed in the specification. This has no relationship to how
+     * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
+     * enough for such valid streams).
+     * Plus, a decoder has to check stream validity and remove frames if too
+     * many reference frames are around. Waiting for "OOM" is not correct at
+     * all. Similarly, missing reference frames have to be replaced by
+     * interpolated/MC frames, anything else is a bug in the codec ...
+     */
+    abort();
+    return -1;
+}
+
+int ff_find_unused_picture(AVCodecContext *avctx, Picture *picture, int shared)
+{
+    int ret = find_unused_picture(avctx, picture, shared);
+
+    if (ret >= 0 && ret < MAX_PICTURE_COUNT) {
+        if (picture[ret].needs_realloc) {
+            picture[ret].needs_realloc = 0;
+            ff_free_picture_tables(&picture[ret]);
+            ff_mpeg_unref_picture(avctx, &picture[ret]);
+        }
+    }
+    return ret;
+}
+
+void ff_free_picture_tables(Picture *pic)
+{
+    int i;
+
+    pic->alloc_mb_width  =
+    pic->alloc_mb_height = 0;
+
+    av_buffer_unref(&pic->mb_var_buf);
+    av_buffer_unref(&pic->mc_mb_var_buf);
+    av_buffer_unref(&pic->mb_mean_buf);
+    av_buffer_unref(&pic->mbskip_table_buf);
+    av_buffer_unref(&pic->qscale_table_buf);
+    av_buffer_unref(&pic->mb_type_buf);
+
+    for (i = 0; i < 2; i++) {
+        av_buffer_unref(&pic->motion_val_buf[i]);
+        av_buffer_unref(&pic->ref_index_buf[i]);
+    }
+}
diff --git a/libavcodec/mpegpicture.h b/libavcodec/mpegpicture.h
new file mode 100644
index 00000000..2db3d673
--- /dev/null
+++ b/libavcodec/mpegpicture.h
@@ -0,0 +1,114 @@
+/*
+ * Mpeg video formats-related defines and utility functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGPICTURE_H
+#define AVCODEC_MPEGPICTURE_H
+
+#include <stdint.h>
+
+#include "libavutil/frame.h"
+
+#include "avcodec.h"
+#include "motion_est.h"
+#include "thread.h"
+
+#define MAX_PICTURE_COUNT 36
+#define EDGE_WIDTH 16
+
+typedef struct ScratchpadContext {
+    uint8_t *edge_emu_buffer;     ///< temporary buffer for if MVs point to out-of-frame data
+    uint8_t *rd_scratchpad;       ///< scratchpad for rate distortion mb decision
+    uint8_t *obmc_scratchpad;
+    uint8_t *b_scratchpad;        ///< scratchpad used for writing into write only buffers
+} ScratchpadContext;
+
+/**
+ * Picture.
+ */
+typedef struct Picture {
+    struct AVFrame *f;
+    ThreadFrame tf;
+
+    AVBufferRef *qscale_table_buf;
+    int8_t *qscale_table;
+
+    AVBufferRef *motion_val_buf[2];
+    int16_t (*motion_val[2])[2];
+
+    AVBufferRef *mb_type_buf;
+    uint32_t *mb_type;          ///< types and macros are defined in mpegutils.h
+
+    AVBufferRef *mbskip_table_buf;
+    uint8_t *mbskip_table;
+
+    AVBufferRef *ref_index_buf[2];
+    int8_t *ref_index[2];
+
+    AVBufferRef *mb_var_buf;
+    uint16_t *mb_var;           ///< Table for MB variances
+
+    AVBufferRef *mc_mb_var_buf;
+    uint16_t *mc_mb_var;        ///< Table for motion compensated MB variances
+
+    int alloc_mb_width;         ///< mb_width used to allocate tables
+    int alloc_mb_height;        ///< mb_height used to allocate tables
+
+    AVBufferRef *mb_mean_buf;
+    uint8_t *mb_mean;           ///< Table for MB luminance
+
+    AVBufferRef *hwaccel_priv_buf;
+    void *hwaccel_picture_private; ///< Hardware accelerator private data
+
+    int field_picture;          ///< whether or not the picture was encoded in separate fields
+
+    int64_t mb_var_sum;         ///< sum of MB variance for current frame
+    int64_t mc_mb_var_sum;      ///< motion compensated MB variance for current frame
+
+    int b_frame_score;
+    int needs_realloc;          ///< Picture needs to be reallocated (eg due to a frame size change)
+
+    int reference;
+    int shared;
+
+    uint64_t encoding_error[AV_NUM_DATA_POINTERS];
+} Picture;
+
+/**
+ * Allocate a Picture.
+ * The pixels are allocated/set by calling get_buffer() if shared = 0.
+ */
+int ff_alloc_picture(AVCodecContext *avctx, Picture *pic, MotionEstContext *me,
+                     ScratchpadContext *sc, int shared, int encoding,
+                     int chroma_x_shift, int chroma_y_shift, int out_format,
+                     int mb_stride, int mb_width, int mb_height, int b8_stride,
+                     ptrdiff_t *linesize, ptrdiff_t *uvlinesize);
+
+int ff_mpeg_framesize_alloc(AVCodecContext *avctx, MotionEstContext *me,
+                            ScratchpadContext *sc, int linesize);
+
+int ff_mpeg_ref_picture(AVCodecContext *avctx, Picture *dst, Picture *src);
+void ff_mpeg_unref_picture(AVCodecContext *avctx, Picture *picture);
+
+void ff_free_picture_tables(Picture *pic);
+int ff_update_picture_tables(Picture *dst, Picture *src);
+
+int ff_find_unused_picture(AVCodecContext *avctx, Picture *picture, int shared);
+
+#endif /* AVCODEC_MPEGPICTURE_H */
diff --git a/libavcodec/mpegutils.h b/libavcodec/mpegutils.h
index 37cc391c..9cfadfc4 100644
--- a/libavcodec/mpegutils.h
+++ b/libavcodec/mpegutils.h
@@ -28,6 +28,10 @@
 #include "avcodec.h"
 #include "version.h"
 
+/**
+ * Return value for header parsers if frame is not coded.
+ * */
+#define FRAME_SKIPPED 100
 
 /* picture type */
 #define PICT_TOP_FIELD     1
@@ -40,6 +44,8 @@
  */
 #define DELAYED_PIC_REF 4
 
+#define MAX_MB_BYTES    (30 * 16 * 16 * 3 / 8 + 120)
+#define MAX_FCODE        7
 
 /* MB types */
 #if !FF_API_MB_TYPE
@@ -114,6 +120,7 @@
 
 #define CANDIDATE_MB_TYPE_DIRECT0    (1 << 12)
 
+#define INPLACE_OFFSET 16
 
 enum OutputFormat {
     FMT_MPEG1,
@@ -132,4 +139,4 @@ void ff_draw_horiz_band(AVCodecContext *avctx, AVFrame *cur, AVFrame *last,
                         int y, int h, int picture_structure, int first_field,
                         int low_delay);
 
-#endif /* AVCODEC_PICTTYPE_H */
+#endif /* AVCODEC_MPEGUTILS_H */
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 06188a03..236987b3 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -39,6 +39,7 @@
 #include "idctdsp.h"
 #include "internal.h"
 #include "mathops.h"
+#include "mpeg_er.h"
 #include "mpegutils.h"
 #include "mpegvideo.h"
 #include "mpegvideodata.h"
@@ -113,6 +114,9 @@ static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
     int i, level, nCoeffs;
     const uint16_t *quant_matrix;
 
+    if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
+    else                 qscale <<= 1;
+
     if(s->alternate_scan) nCoeffs= 63;
     else nCoeffs= s->block_last_index[n];
 
@@ -124,10 +128,10 @@ static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
         if (level) {
             if (level < 0) {
                 level = -level;
-                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = (int)(level * qscale * quant_matrix[j]) >> 4;
                 level = -level;
             } else {
-                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = (int)(level * qscale * quant_matrix[j]) >> 4;
             }
             block[j] = level;
         }
@@ -141,6 +145,9 @@ static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
     const uint16_t *quant_matrix;
     int sum=-1;
 
+    if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
+    else                 qscale <<= 1;
+
     if(s->alternate_scan) nCoeffs= 63;
     else nCoeffs= s->block_last_index[n];
 
@@ -153,10 +160,10 @@ static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
         if (level) {
             if (level < 0) {
                 level = -level;
-                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = (int)(level * qscale * quant_matrix[j]) >> 4;
                 level = -level;
             } else {
-                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = (int)(level * qscale * quant_matrix[j]) >> 4;
             }
             block[j] = level;
             sum+=level;
@@ -172,6 +179,9 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
     const uint16_t *quant_matrix;
     int sum=-1;
 
+    if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
+    else                 qscale <<= 1;
+
     if(s->alternate_scan) nCoeffs= 63;
     else nCoeffs= s->block_last_index[n];
 
@@ -183,11 +193,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
             if (level < 0) {
                 level = -level;
                 level = (((level << 1) + 1) * qscale *
-                         ((int) (quant_matrix[j]))) >> 4;
+                         ((int) (quant_matrix[j]))) >> 5;
                 level = -level;
             } else {
                 level = (((level << 1) + 1) * qscale *
-                         ((int) (quant_matrix[j]))) >> 4;
+                         ((int) (quant_matrix[j]))) >> 5;
             }
             block[j] = level;
             sum+=level;
@@ -256,34 +266,6 @@ static void dct_unquantize_h263_inter_c(MpegEncContext *s,
     }
 }
 
-static void mpeg_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
-                              int (*mv)[2][4][2],
-                              int mb_x, int mb_y, int mb_intra, int mb_skipped)
-{
-    MpegEncContext *s = opaque;
-
-    s->mv_dir     = mv_dir;
-    s->mv_type    = mv_type;
-    s->mb_intra   = mb_intra;
-    s->mb_skipped = mb_skipped;
-    s->mb_x       = mb_x;
-    s->mb_y       = mb_y;
-    memcpy(s->mv, mv, sizeof(*mv));
-
-    ff_init_block_index(s);
-    ff_update_block_index(s);
-
-    s->bdsp.clear_blocks(s->block[0]);
-
-    s->dest[0] = s->current_picture.f->data[0] + (s->mb_y *  16                       * s->linesize)   + s->mb_x *  16;
-    s->dest[1] = s->current_picture.f->data[1] + (s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16 >> s->chroma_x_shift);
-    s->dest[2] = s->current_picture.f->data[2] + (s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16 >> s->chroma_x_shift);
-
-    if (ref)
-        av_log(s->avctx, AV_LOG_DEBUG,
-               "Interlaced error concealment is not fully implemented\n");
-    ff_mpv_decode_mb(s, s->block);
-}
 
 static void gray16(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
 {
@@ -324,7 +306,7 @@ static av_cold int dct_init(MpegEncContext *s)
     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
     s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
     s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
-    if (s->avctx->flags & CODEC_FLAG_BITEXACT)
+    if (s->avctx->flags & AV_CODEC_FLAG_BITEXACT)
         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 
@@ -339,6 +321,8 @@ static av_cold int dct_init(MpegEncContext *s)
         ff_mpv_common_init_ppc(s);
     if (ARCH_X86)
         ff_mpv_common_init_x86(s);
+    if (ARCH_MIPS)
+        ff_mpv_common_init_mips(s);
 
     return 0;
 }
@@ -361,215 +345,6 @@ av_cold void ff_mpv_idct_init(MpegEncContext *s)
     ff_init_scantable(s->idsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
 }
 
-int ff_mpeg_framesize_alloc(AVCodecContext *avctx, MotionEstContext *me,
-                            ScratchpadContext *sc, int linesize)
-{
-    int alloc_size = FFALIGN(FFABS(linesize) + 64, 32);
-
-    if (avctx->hwaccel || avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
-        return 0;
-
-    if (linesize < 24) {
-        av_log(avctx, AV_LOG_ERROR, "Image too small, temporary buffers cannot function\n");
-        return AVERROR_PATCHWELCOME;
-    }
-
-    // edge emu needs blocksize + filter length - 1
-    // (= 17x17 for  halfpel / 21x21 for  h264)
-    // VC1 computes luma and chroma simultaneously and needs 19X19 + 9x9
-    // at uvlinesize. It supports only YUV420 so 24x24 is enough
-    // linesize * interlaced * MBsize
-    // we also use this buffer for encoding in encode_mb_internal() needig an additional 32 lines
-    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, sc->edge_emu_buffer, alloc_size, 4 * 68,
-                      fail);
-
-    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, me->scratchpad, alloc_size, 4 * 16 * 2,
-                      fail)
-    me->temp            = me->scratchpad;
-    sc->rd_scratchpad   = me->scratchpad;
-    sc->b_scratchpad    = me->scratchpad;
-    sc->obmc_scratchpad = me->scratchpad + 16;
-
-    return 0;
-fail:
-    av_freep(&sc->edge_emu_buffer);
-    return AVERROR(ENOMEM);
-}
-
-/**
- * Allocate a frame buffer
- */
-static int alloc_frame_buffer(AVCodecContext *avctx, Picture *pic,
-                              MotionEstContext *me, ScratchpadContext *sc,
-                              int chroma_x_shift, int chroma_y_shift,
-                              int linesize, int uvlinesize)
-{
-    int edges_needed = av_codec_is_encoder(avctx->codec);
-    int r, ret;
-
-    pic->tf.f = pic->f;
-    if (avctx->codec_id != AV_CODEC_ID_WMV3IMAGE &&
-        avctx->codec_id != AV_CODEC_ID_VC1IMAGE  &&
-        avctx->codec_id != AV_CODEC_ID_MSS2) {
-        if (edges_needed) {
-            pic->f->width  = avctx->width  + 2 * EDGE_WIDTH;
-            pic->f->height = avctx->height + 2 * EDGE_WIDTH;
-        }
-
-        r = ff_thread_get_buffer(avctx, &pic->tf,
-                                 pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
-    } else {
-        pic->f->width  = avctx->width;
-        pic->f->height = avctx->height;
-        pic->f->format = avctx->pix_fmt;
-        r = avcodec_default_get_buffer2(avctx, pic->f, 0);
-    }
-
-    if (r < 0 || !pic->f->buf[0]) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed (%d %p)\n",
-               r, pic->f->data[0]);
-        return -1;
-    }
-
-    if (edges_needed) {
-        int i;
-        for (i = 0; pic->f->data[i]; i++) {
-            int offset = (EDGE_WIDTH >> (i ? chroma_y_shift : 0)) *
-                         pic->f->linesize[i] +
-                         (EDGE_WIDTH >> (i ? chroma_x_shift : 0));
-            pic->f->data[i] += offset;
-        }
-        pic->f->width  = avctx->width;
-        pic->f->height = avctx->height;
-    }
-
-    if (avctx->hwaccel) {
-        assert(!pic->hwaccel_picture_private);
-        if (avctx->hwaccel->frame_priv_data_size) {
-            pic->hwaccel_priv_buf = av_buffer_allocz(avctx->hwaccel->frame_priv_data_size);
-            if (!pic->hwaccel_priv_buf) {
-                av_log(avctx, AV_LOG_ERROR, "alloc_frame_buffer() failed (hwaccel private data allocation)\n");
-                return -1;
-            }
-            pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data;
-        }
-    }
-
-    if (linesize && (linesize   != pic->f->linesize[0] ||
-                     uvlinesize != pic->f->linesize[1])) {
-        av_log(avctx, AV_LOG_ERROR,
-               "get_buffer() failed (stride changed)\n");
-        ff_mpeg_unref_picture(avctx, pic);
-        return -1;
-    }
-
-    if (pic->f->linesize[1] != pic->f->linesize[2]) {
-        av_log(avctx, AV_LOG_ERROR,
-               "get_buffer() failed (uv stride mismatch)\n");
-        ff_mpeg_unref_picture(avctx, pic);
-        return -1;
-    }
-
-    if (!sc->edge_emu_buffer &&
-        (ret = ff_mpeg_framesize_alloc(avctx, me, sc,
-                                       pic->f->linesize[0])) < 0) {
-        av_log(avctx, AV_LOG_ERROR,
-               "get_buffer() failed to allocate context scratch buffers.\n");
-        ff_mpeg_unref_picture(avctx, pic);
-        return ret;
-    }
-
-    return 0;
-}
-
-void ff_free_picture_tables(Picture *pic)
-{
-    int i;
-
-    pic->alloc_mb_width  =
-    pic->alloc_mb_height = 0;
-
-    av_buffer_unref(&pic->mb_var_buf);
-    av_buffer_unref(&pic->mc_mb_var_buf);
-    av_buffer_unref(&pic->mb_mean_buf);
-    av_buffer_unref(&pic->mbskip_table_buf);
-    av_buffer_unref(&pic->qscale_table_buf);
-    av_buffer_unref(&pic->mb_type_buf);
-
-    for (i = 0; i < 2; i++) {
-        av_buffer_unref(&pic->motion_val_buf[i]);
-        av_buffer_unref(&pic->ref_index_buf[i]);
-    }
-}
-
-static int alloc_picture_tables(AVCodecContext *avctx, Picture *pic, int encoding, int out_format,
-                                int mb_stride, int mb_width, int mb_height, int b8_stride)
-{
-    const int big_mb_num    = mb_stride * (mb_height + 1) + 1;
-    const int mb_array_size = mb_stride * mb_height;
-    const int b8_array_size = b8_stride * mb_height * 2;
-    int i;
-
-
-    pic->mbskip_table_buf = av_buffer_allocz(mb_array_size + 2);
-    pic->qscale_table_buf = av_buffer_allocz(big_mb_num + mb_stride);
-    pic->mb_type_buf      = av_buffer_allocz((big_mb_num + mb_stride) *
-                                             sizeof(uint32_t));
-    if (!pic->mbskip_table_buf || !pic->qscale_table_buf || !pic->mb_type_buf)
-        return AVERROR(ENOMEM);
-
-    if (encoding) {
-        pic->mb_var_buf    = av_buffer_allocz(mb_array_size * sizeof(int16_t));
-        pic->mc_mb_var_buf = av_buffer_allocz(mb_array_size * sizeof(int16_t));
-        pic->mb_mean_buf   = av_buffer_allocz(mb_array_size);
-        if (!pic->mb_var_buf || !pic->mc_mb_var_buf || !pic->mb_mean_buf)
-            return AVERROR(ENOMEM);
-    }
-
-    if (out_format == FMT_H263 || encoding || avctx->debug_mv ||
-        (avctx->flags2 & CODEC_FLAG2_EXPORT_MVS)) {
-        int mv_size        = 2 * (b8_array_size + 4) * sizeof(int16_t);
-        int ref_index_size = 4 * mb_array_size;
-
-        for (i = 0; mv_size && i < 2; i++) {
-            pic->motion_val_buf[i] = av_buffer_allocz(mv_size);
-            pic->ref_index_buf[i]  = av_buffer_allocz(ref_index_size);
-            if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i])
-                return AVERROR(ENOMEM);
-        }
-    }
-
-    pic->alloc_mb_width  = mb_width;
-    pic->alloc_mb_height = mb_height;
-
-    return 0;
-}
-
-static int make_tables_writable(Picture *pic)
-{
-    int ret, i;
-#define MAKE_WRITABLE(table) \
-do {\
-    if (pic->table &&\
-       (ret = av_buffer_make_writable(&pic->table)) < 0)\
-    return ret;\
-} while (0)
-
-    MAKE_WRITABLE(mb_var_buf);
-    MAKE_WRITABLE(mc_mb_var_buf);
-    MAKE_WRITABLE(mb_mean_buf);
-    MAKE_WRITABLE(mbskip_table_buf);
-    MAKE_WRITABLE(qscale_table_buf);
-    MAKE_WRITABLE(mb_type_buf);
-
-    for (i = 0; i < 2; i++) {
-        MAKE_WRITABLE(motion_val_buf[i]);
-        MAKE_WRITABLE(ref_index_buf[i]);
-    }
-
-    return 0;
-}
-
 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
 {
     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 0,
@@ -578,178 +353,6 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
                             &s->linesize, &s->uvlinesize);
 }
 
-/**
- * Allocate a Picture.
- * The pixels are allocated/set by calling get_buffer() if shared = 0
- */
-int ff_alloc_picture(AVCodecContext *avctx, Picture *pic, MotionEstContext *me,
-                     ScratchpadContext *sc, int shared, int encoding,
-                     int chroma_x_shift, int chroma_y_shift, int out_format,
-                     int mb_stride, int mb_width, int mb_height, int b8_stride,
-                     ptrdiff_t *linesize, ptrdiff_t *uvlinesize)
-{
-    int i, ret;
-
-    if (pic->qscale_table_buf)
-        if (   pic->alloc_mb_width  != mb_width
-            || pic->alloc_mb_height != mb_height)
-            ff_free_picture_tables(pic);
-
-    if (shared) {
-        av_assert0(pic->f->data[0]);
-        pic->shared = 1;
-    } else {
-        av_assert0(!pic->f->buf[0]);
-        if (alloc_frame_buffer(avctx, pic, me, sc,
-                               chroma_x_shift, chroma_y_shift,
-                               *linesize, *uvlinesize) < 0)
-            return -1;
-
-        *linesize   = pic->f->linesize[0];
-        *uvlinesize = pic->f->linesize[1];
-    }
-
-    if (!pic->qscale_table_buf)
-        ret = alloc_picture_tables(avctx, pic, encoding, out_format,
-                                   mb_stride, mb_width, mb_height, b8_stride);
-    else
-        ret = make_tables_writable(pic);
-    if (ret < 0)
-        goto fail;
-
-    if (encoding) {
-        pic->mb_var    = (uint16_t*)pic->mb_var_buf->data;
-        pic->mc_mb_var = (uint16_t*)pic->mc_mb_var_buf->data;
-        pic->mb_mean   = pic->mb_mean_buf->data;
-    }
-
-    pic->mbskip_table = pic->mbskip_table_buf->data;
-    pic->qscale_table = pic->qscale_table_buf->data + 2 * mb_stride + 1;
-    pic->mb_type      = (uint32_t*)pic->mb_type_buf->data + 2 * mb_stride + 1;
-
-    if (pic->motion_val_buf[0]) {
-        for (i = 0; i < 2; i++) {
-            pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
-            pic->ref_index[i]  = pic->ref_index_buf[i]->data;
-        }
-    }
-
-    return 0;
-fail:
-    av_log(avctx, AV_LOG_ERROR, "Error allocating a picture.\n");
-    ff_mpeg_unref_picture(avctx, pic);
-    ff_free_picture_tables(pic);
-    return AVERROR(ENOMEM);
-}
-
-/**
- * Deallocate a picture.
- */
-void ff_mpeg_unref_picture(AVCodecContext *avctx, Picture *pic)
-{
-    int off = offsetof(Picture, mb_mean) + sizeof(pic->mb_mean);
-
-    pic->tf.f = pic->f;
-    /* WM Image / Screen codecs allocate internal buffers with different
-     * dimensions / colorspaces; ignore user-defined callbacks for these. */
-    if (avctx->codec->id != AV_CODEC_ID_WMV3IMAGE &&
-        avctx->codec->id != AV_CODEC_ID_VC1IMAGE  &&
-        avctx->codec->id != AV_CODEC_ID_MSS2)
-        ff_thread_release_buffer(avctx, &pic->tf);
-    else if (pic->f)
-        av_frame_unref(pic->f);
-
-    av_buffer_unref(&pic->hwaccel_priv_buf);
-
-    if (pic->needs_realloc)
-        ff_free_picture_tables(pic);
-
-    memset((uint8_t*)pic + off, 0, sizeof(*pic) - off);
-}
-
-static int update_picture_tables(Picture *dst, Picture *src)
-{
-     int i;
-
-#define UPDATE_TABLE(table)\
-do {\
-    if (src->table &&\
-        (!dst->table || dst->table->buffer != src->table->buffer)) {\
-        av_buffer_unref(&dst->table);\
-        dst->table = av_buffer_ref(src->table);\
-        if (!dst->table) {\
-            ff_free_picture_tables(dst);\
-            return AVERROR(ENOMEM);\
-        }\
-    }\
-} while (0)
-
-    UPDATE_TABLE(mb_var_buf);
-    UPDATE_TABLE(mc_mb_var_buf);
-    UPDATE_TABLE(mb_mean_buf);
-    UPDATE_TABLE(mbskip_table_buf);
-    UPDATE_TABLE(qscale_table_buf);
-    UPDATE_TABLE(mb_type_buf);
-    for (i = 0; i < 2; i++) {
-        UPDATE_TABLE(motion_val_buf[i]);
-        UPDATE_TABLE(ref_index_buf[i]);
-    }
-
-    dst->mb_var        = src->mb_var;
-    dst->mc_mb_var     = src->mc_mb_var;
-    dst->mb_mean       = src->mb_mean;
-    dst->mbskip_table  = src->mbskip_table;
-    dst->qscale_table  = src->qscale_table;
-    dst->mb_type       = src->mb_type;
-    for (i = 0; i < 2; i++) {
-        dst->motion_val[i] = src->motion_val[i];
-        dst->ref_index[i]  = src->ref_index[i];
-    }
-
-    dst->alloc_mb_width  = src->alloc_mb_width;
-    dst->alloc_mb_height = src->alloc_mb_height;
-
-    return 0;
-}
-
-int ff_mpeg_ref_picture(AVCodecContext *avctx, Picture *dst, Picture *src)
-{
-    int ret;
-
-    av_assert0(!dst->f->buf[0]);
-    av_assert0(src->f->buf[0]);
-
-    src->tf.f = src->f;
-    dst->tf.f = dst->f;
-    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
-    if (ret < 0)
-        goto fail;
-
-    ret = update_picture_tables(dst, src);
-    if (ret < 0)
-        goto fail;
-
-    if (src->hwaccel_picture_private) {
-        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
-        if (!dst->hwaccel_priv_buf)
-            goto fail;
-        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
-    }
-
-    dst->field_picture           = src->field_picture;
-    dst->mb_var_sum              = src->mb_var_sum;
-    dst->mc_mb_var_sum           = src->mc_mb_var_sum;
-    dst->b_frame_score           = src->b_frame_score;
-    dst->needs_realloc           = src->needs_realloc;
-    dst->reference               = src->reference;
-    dst->shared                  = src->shared;
-
-    return 0;
-fail:
-    ff_mpeg_unref_picture(avctx, dst);
-    return ret;
-}
-
 static int init_duplicate_context(MpegEncContext *s)
 {
     int y_size = s->b8_stride * (2 * s->mb_height + 1);
@@ -772,7 +375,7 @@ static int init_duplicate_context(MpegEncContext *s)
                           ME_MAP_SIZE * sizeof(uint32_t), fail)
         FF_ALLOCZ_OR_GOTO(s->avctx, s->me.score_map,
                           ME_MAP_SIZE * sizeof(uint32_t), fail)
-        if (s->avctx->noise_reduction) {
+        if (s->noise_reduction) {
             FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_error_sum,
                               2 * 64 * sizeof(int), fail)
         }
@@ -930,7 +533,7 @@ int ff_mpeg_update_thread_context(AVCodecContext *dst,
     if(s->picture)
     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
         ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
-        if (s1->picture[i].f->buf[0] &&
+        if (s1->picture && s1->picture[i].f->buf[0] &&
             (ret = ff_mpeg_ref_picture(s->avctx, &s->picture[i], &s1->picture[i])) < 0)
             return ret;
     }
@@ -941,7 +544,7 @@ do {\
     if (s1->pic.f && s1->pic.f->buf[0])\
         ret = ff_mpeg_ref_picture(s->avctx, &s->pic, &s1->pic);\
     else\
-        ret = update_picture_tables(&s->pic, &s1->pic);\
+        ret = ff_update_picture_tables(&s->pic, &s1->pic);\
     if (ret < 0)\
         return ret;\
 } while (0)
@@ -979,7 +582,7 @@ do {\
 
     if (s1->bitstream_buffer) {
         if (s1->bitstream_buffer_size +
-            FF_INPUT_BUFFER_PADDING_SIZE > s->allocated_bitstream_buffer_size) {
+            AV_INPUT_BUFFER_PADDING_SIZE > s->allocated_bitstream_buffer_size) {
             av_fast_malloc(&s->bitstream_buffer,
                            &s->allocated_bitstream_buffer_size,
                            s1->allocated_bitstream_buffer_size);
@@ -992,7 +595,7 @@ do {\
         memcpy(s->bitstream_buffer, s1->bitstream_buffer,
                s1->bitstream_buffer_size);
         memset(s->bitstream_buffer + s->bitstream_buffer_size, 0,
-               FF_INPUT_BUFFER_PADDING_SIZE);
+               AV_INPUT_BUFFER_PADDING_SIZE);
     }
 
     // linesize dependend scratch buffer allocation
@@ -1068,42 +671,6 @@ void ff_mpv_decode_init(MpegEncContext *s, AVCodecContext *avctx)
     s->codec_tag          = avpriv_toupper4(avctx->codec_tag);
 }
 
-static int init_er(MpegEncContext *s)
-{
-    ERContext *er = &s->er;
-    int mb_array_size = s->mb_height * s->mb_stride;
-    int i;
-
-    er->avctx       = s->avctx;
-
-    er->mb_index2xy = s->mb_index2xy;
-    er->mb_num      = s->mb_num;
-    er->mb_width    = s->mb_width;
-    er->mb_height   = s->mb_height;
-    er->mb_stride   = s->mb_stride;
-    er->b8_stride   = s->b8_stride;
-
-    er->er_temp_buffer     = av_malloc(s->mb_height * s->mb_stride);
-    er->error_status_table = av_mallocz(mb_array_size);
-    if (!er->er_temp_buffer || !er->error_status_table)
-        goto fail;
-
-    er->mbskip_table  = s->mbskip_table;
-    er->mbintra_table = s->mbintra_table;
-
-    for (i = 0; i < FF_ARRAY_ELEMS(s->dc_val); i++)
-        er->dc_val[i] = s->dc_val[i];
-
-    er->decode_mb = mpeg_er_decode_mb;
-    er->opaque    = s;
-
-    return 0;
-fail:
-    av_freep(&er->er_temp_buffer);
-    av_freep(&er->error_status_table);
-    return AVERROR(ENOMEM);
-}
-
 /**
  * Initialize and allocates MpegEncContext fields dependent on the resolution.
  */
@@ -1173,7 +740,7 @@ static int init_context_frame(MpegEncContext *s)
     }
 
     if (s->codec_id == AV_CODEC_ID_MPEG4 ||
-        (s->avctx->flags & CODEC_FLAG_INTERLACED_ME)) {
+        (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME)) {
         /* interlaced direct mode decoding tables */
         for (i = 0; i < 2; i++) {
             int j, k;
@@ -1222,7 +789,7 @@ static int init_context_frame(MpegEncContext *s)
     FF_ALLOCZ_OR_GOTO(s->avctx, s->mbskip_table, mb_array_size + 2, fail);
     // Note the + 1 is for  a quicker mpeg4 slice_end detection
 
-    return init_er(s);
+    return ff_mpeg_er_init(s);
 fail:
     return AVERROR(ENOMEM);
 }
@@ -1257,6 +824,7 @@ static void clear_context(MpegEncContext *s)
 
     s->parse_context.buffer = NULL;
     s->parse_context.buffer_size = 0;
+    s->parse_context.overread = 0;
     s->bitstream_buffer = NULL;
     s->allocated_bitstream_buffer_size = 0;
     s->picture          = NULL;
@@ -1549,6 +1117,9 @@ void ff_mpv_common_end(MpegEncContext *s)
 {
     int i;
 
+    if (!s)
+        return ;
+
     if (s->slice_context_count > 1) {
         for (i = 0; i < s->slice_context_count; i++) {
             free_duplicate_context(s->thread_context[i]);
@@ -1595,72 +1166,6 @@ void ff_mpv_common_end(MpegEncContext *s)
     s->linesize = s->uvlinesize = 0;
 }
 
-static void release_unused_pictures(AVCodecContext *avctx, Picture *picture)
-{
-    int i;
-
-    /* release non reference frames */
-    for (i = 0; i < MAX_PICTURE_COUNT; i++) {
-        if (!picture[i].reference)
-            ff_mpeg_unref_picture(avctx, &picture[i]);
-    }
-}
-
-static inline int pic_is_unused(Picture *pic)
-{
-    if (!pic->f->buf[0])
-        return 1;
-    if (pic->needs_realloc && !(pic->reference & DELAYED_PIC_REF))
-        return 1;
-    return 0;
-}
-
-static int find_unused_picture(AVCodecContext *avctx, Picture *picture, int shared)
-{
-    int i;
-
-    if (shared) {
-        for (i = 0; i < MAX_PICTURE_COUNT; i++) {
-            if (!picture[i].f->buf[0])
-                return i;
-        }
-    } else {
-        for (i = 0; i < MAX_PICTURE_COUNT; i++) {
-            if (pic_is_unused(&picture[i]))
-                return i;
-        }
-    }
-
-    av_log(avctx, AV_LOG_FATAL,
-           "Internal error, picture buffer overflow\n");
-    /* We could return -1, but the codec would crash trying to draw into a
-     * non-existing frame anyway. This is safer than waiting for a random crash.
-     * Also the return of this is never useful, an encoder must only allocate
-     * as much as allowed in the specification. This has no relationship to how
-     * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
-     * enough for such valid streams).
-     * Plus, a decoder has to check stream validity and remove frames if too
-     * many reference frames are around. Waiting for "OOM" is not correct at
-     * all. Similarly, missing reference frames have to be replaced by
-     * interpolated/MC frames, anything else is a bug in the codec ...
-     */
-    abort();
-    return -1;
-}
-
-int ff_find_unused_picture(AVCodecContext *avctx, Picture *picture, int shared)
-{
-    int ret = find_unused_picture(avctx, picture, shared);
-
-    if (ret >= 0 && ret < MAX_PICTURE_COUNT) {
-        if (picture[ret].needs_realloc) {
-            picture[ret].needs_realloc = 0;
-            ff_free_picture_tables(&picture[ret]);
-            ff_mpeg_unref_picture(avctx, &picture[ret]);
-        }
-    }
-    return ret;
-}
 
 static void gray_frame(AVFrame *frame)
 {
@@ -1670,11 +1175,11 @@ static void gray_frame(AVFrame *frame)
 
     for(i=0; i<frame->height; i++)
         memset(frame->data[0] + frame->linesize[0]*i, 0x80, frame->width);
-    for(i=0; i<FF_CEIL_RSHIFT(frame->height, v_chroma_shift); i++) {
+    for(i=0; i<AV_CEIL_RSHIFT(frame->height, v_chroma_shift); i++) {
         memset(frame->data[1] + frame->linesize[1]*i,
-               0x80, FF_CEIL_RSHIFT(frame->width, h_chroma_shift));
+               0x80, AV_CEIL_RSHIFT(frame->width, h_chroma_shift));
         memset(frame->data[2] + frame->linesize[2]*i,
-               0x80, FF_CEIL_RSHIFT(frame->width, h_chroma_shift));
+               0x80, AV_CEIL_RSHIFT(frame->width, h_chroma_shift));
     }
 }
 
@@ -1706,16 +1211,17 @@ int ff_mpv_frame_start(MpegEncContext *s, AVCodecContext *avctx)
         if (&s->picture[i] != s->last_picture_ptr &&
             &s->picture[i] != s->next_picture_ptr &&
             s->picture[i].reference && !s->picture[i].needs_realloc) {
-            if (!(avctx->active_thread_type & FF_THREAD_FRAME))
-                av_log(avctx, AV_LOG_ERROR,
-                       "releasing zombie picture\n");
             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
         }
     }
 
     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
 
-    release_unused_pictures(s->avctx, s->picture);
+    /* release non reference frames */
+    for (i = 0; i < MAX_PICTURE_COUNT; i++) {
+        if (!s->picture[i].reference)
+            ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
+    }
 
     if (s->current_picture_ptr && !s->current_picture_ptr->f->buf[0]) {
         // we already have a unused image
@@ -1755,7 +1261,7 @@ int ff_mpv_frame_start(MpegEncContext *s, AVCodecContext *avctx)
     s->current_picture_ptr->field_picture      =  s->picture_structure != PICT_FRAME;
 
     s->current_picture_ptr->f->pict_type = s->pict_type;
-    // if (s->avctx->flags && CODEC_FLAG_QSCALE)
+    // if (s->avctx->flags && AV_CODEC_FLAG_QSCALE)
     //     s->current_picture_ptr->quality = s->new_picture_ptr->quality;
     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
 
@@ -1808,16 +1314,20 @@ int ff_mpv_frame_start(MpegEncContext *s, AVCodecContext *avctx)
             return -1;
         }
 
-        if (!avctx->hwaccel && !(avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)) {
+        if (!avctx->hwaccel
+#if FF_API_CAP_VDPAU
+            && !(avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU)
+#endif
+            ) {
             for(i=0; i<avctx->height; i++)
                 memset(s->last_picture_ptr->f->data[0] + s->last_picture_ptr->f->linesize[0]*i,
                        0x80, avctx->width);
             if (s->last_picture_ptr->f->data[2]) {
-                for(i=0; i<FF_CEIL_RSHIFT(avctx->height, v_chroma_shift); i++) {
+                for(i=0; i<AV_CEIL_RSHIFT(avctx->height, v_chroma_shift); i++) {
                     memset(s->last_picture_ptr->f->data[1] + s->last_picture_ptr->f->linesize[1]*i,
-                        0x80, FF_CEIL_RSHIFT(avctx->width, h_chroma_shift));
+                        0x80, AV_CEIL_RSHIFT(avctx->width, h_chroma_shift));
                     memset(s->last_picture_ptr->f->data[2] + s->last_picture_ptr->f->linesize[2]*i,
-                        0x80, FF_CEIL_RSHIFT(avctx->width, h_chroma_shift));
+                        0x80, AV_CEIL_RSHIFT(avctx->width, h_chroma_shift));
                 }
             }
 
@@ -2047,15 +1557,18 @@ static void draw_arrow(uint8_t *buf, int sx, int sy, int ex,
 
 static int add_mb(AVMotionVector *mb, uint32_t mb_type,
                   int dst_x, int dst_y,
-                  int src_x, int src_y,
+                  int motion_x, int motion_y, int motion_scale,
                   int direction)
 {
     mb->w = IS_8X8(mb_type) || IS_8X16(mb_type) ? 8 : 16;
     mb->h = IS_8X8(mb_type) || IS_16X8(mb_type) ? 8 : 16;
-    mb->src_x = src_x;
-    mb->src_y = src_y;
+    mb->motion_x = motion_x;
+    mb->motion_y = motion_y;
+    mb->motion_scale = motion_scale;
     mb->dst_x = dst_x;
     mb->dst_y = dst_y;
+    mb->src_x = dst_x + motion_x / motion_scale;
+    mb->src_y = dst_y + motion_y / motion_scale;
     mb->source = direction ? 1 : -1;
     mb->flags = 0; // XXX: does mb_type contain extra information that could be exported here?
     return 1;
@@ -2069,8 +1582,9 @@ void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_
                          int *low_delay,
                          int mb_width, int mb_height, int mb_stride, int quarter_sample)
 {
-    if ((avctx->flags2 & CODEC_FLAG2_EXPORT_MVS) && mbtype_table && motion_val[0]) {
+    if ((avctx->flags2 & AV_CODEC_FLAG2_EXPORT_MVS) && mbtype_table && motion_val[0]) {
         const int shift = 1 + quarter_sample;
+        const int scale = 1 << shift;
         const int mv_sample_log2 = avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_SVQ3 ? 2 : 1;
         const int mv_stride      = (mb_width << mv_sample_log2) +
                                    (avctx->codec->id == AV_CODEC_ID_H264 ? 0 : 1);
@@ -2094,43 +1608,43 @@ void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_
                             int sy = mb_y * 16 + 4 + 8 * (i >> 1);
                             int xy = (mb_x * 2 + (i & 1) +
                                       (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
-                            int mx = (motion_val[direction][xy][0] >> shift) + sx;
-                            int my = (motion_val[direction][xy][1] >> shift) + sy;
-                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, direction);
+                            int mx = motion_val[direction][xy][0];
+                            int my = motion_val[direction][xy][1];
+                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
                         }
                     } else if (IS_16X8(mb_type)) {
                         for (i = 0; i < 2; i++) {
                             int sx = mb_x * 16 + 8;
                             int sy = mb_y * 16 + 4 + 8 * i;
                             int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) << (mv_sample_log2 - 1);
-                            int mx = (motion_val[direction][xy][0] >> shift);
-                            int my = (motion_val[direction][xy][1] >> shift);
+                            int mx = motion_val[direction][xy][0];
+                            int my = motion_val[direction][xy][1];
 
                             if (IS_INTERLACED(mb_type))
                                 my *= 2;
 
-                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx + sx, my + sy, direction);
+                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
                         }
                     } else if (IS_8X16(mb_type)) {
                         for (i = 0; i < 2; i++) {
                             int sx = mb_x * 16 + 4 + 8 * i;
                             int sy = mb_y * 16 + 8;
                             int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) << (mv_sample_log2 - 1);
-                            int mx = motion_val[direction][xy][0] >> shift;
-                            int my = motion_val[direction][xy][1] >> shift;
+                            int mx = motion_val[direction][xy][0];
+                            int my = motion_val[direction][xy][1];
 
                             if (IS_INTERLACED(mb_type))
                                 my *= 2;
 
-                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx + sx, my + sy, direction);
+                            mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
                         }
                     } else {
                           int sx = mb_x * 16 + 8;
                           int sy = mb_y * 16 + 8;
                           int xy = (mb_x + mb_y * mv_stride) << mv_sample_log2;
-                          int mx = (motion_val[direction][xy][0]>>shift) + sx;
-                          int my = (motion_val[direction][xy][1]>>shift) + sy;
-                          mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, direction);
+                          int mx = motion_val[direction][xy][0];
+                          int my = motion_val[direction][xy][1];
+                          mbcount += add_mb(mvs + mbcount, mb_type, sx, sy, mx, my, scale, direction);
                     }
                 }
             }
@@ -2153,7 +1667,10 @@ void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_
 
     /* TODO: export all the following to make them accessible for users (and filters) */
     if (avctx->hwaccel || !mbtype_table
-        || (avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU))
+#if FF_API_CAP_VDPAU
+        || (avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU)
+#endif
+        )
         return;
 
 
@@ -2358,8 +1875,8 @@ void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_
                     uint64_t u,v;
                     int y;
 #define COLOR(theta, r) \
-    u = (int)(128 + r * cos(theta * 3.141592 / 180)); \
-    v = (int)(128 + r * sin(theta * 3.141592 / 180));
+    u = (int)(128 + r * cos(theta * M_PI / 180)); \
+    v = (int)(128 + r * sin(theta * M_PI / 180));
 
 
                     u = v = 128;
@@ -2597,7 +2114,7 @@ static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
                                 src_x, src_y << field_based, h_edge_pos,
                                 v_edge_pos);
         ptr_y = s->sc.edge_emu_buffer;
-        if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+        if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
             uint8_t *ubuf = s->sc.edge_emu_buffer + 18 * s->linesize;
             uint8_t *vbuf =ubuf + 9 * s->uvlinesize;
             s->vdsp.emulated_edge_mc(ubuf,  ptr_cb,
@@ -2632,7 +2149,7 @@ static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
     sy = (sy << 2) >> lowres;
     pix_op[lowres - 1](dest_y, ptr_y, linesize, h, sx, sy);
 
-    if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         int hc = s->chroma_y_shift ? (h+1-bottom_field)>>1 : h;
         uvsx = (uvsx << 2) >> lowres;
         uvsy = (uvsy << 2) >> lowres;
@@ -2752,7 +2269,7 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
             my += s->mv[dir][i][1];
         }
 
-        if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))
+        if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
             chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture,
                                      pix_op, mx, my);
         break;
@@ -2846,7 +2363,7 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
 /**
  * find the lowest MB row referenced in the MVs
  */
-int ff_mpv_lowest_referenced_row(MpegEncContext *s, int dir)
+static int lowest_referenced_row(MpegEncContext *s, int dir)
 {
     int my_max = INT_MIN, my_min = INT_MAX, qpel_shift = !s->quarter_sample;
     int my, off, i, mvs;
@@ -2992,7 +2509,7 @@ void mpv_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
     else if (!is_mpeg12 && (s->h263_pred || s->h263_aic))
         s->mbintra_table[mb_xy]=1;
 
-    if ((s->avctx->flags & CODEC_FLAG_PSNR) || s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor ||
+    if ((s->avctx->flags & AV_CODEC_FLAG_PSNR) || s->frame_skip_threshold || s->frame_skip_factor ||
         !(s->encoding && (s->intra_only || s->pict_type == AV_PICTURE_TYPE_B) &&
           s->avctx->mb_decision != FF_MB_DECISION_RD)) { // FIXME precalc
         uint8_t *dest_y, *dest_cb, *dest_cr;
@@ -3041,12 +2558,12 @@ void mpv_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
                 if(HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) {
                     if (s->mv_dir & MV_DIR_FORWARD) {
                         ff_thread_await_progress(&s->last_picture_ptr->tf,
-                                                 ff_mpv_lowest_referenced_row(s, 0),
+                                                 lowest_referenced_row(s, 0),
                                                  0);
                     }
                     if (s->mv_dir & MV_DIR_BACKWARD) {
                         ff_thread_await_progress(&s->next_picture_ptr->tf,
-                                                 ff_mpv_lowest_referenced_row(s, 1),
+                                                 lowest_referenced_row(s, 1),
                                                  0);
                     }
                 }
@@ -3095,7 +2612,7 @@ void mpv_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
 
-                if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+                if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
                     if (s->chroma_y_shift){
                         add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
                         add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
@@ -3114,7 +2631,7 @@ void mpv_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
                 add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
                 add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
 
-                if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+                if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
                     if(s->chroma_y_shift){//Chroma420
                         add_dct(s, block[4], 4, dest_cb, uvlinesize);
                         add_dct(s, block[5], 5, dest_cr, uvlinesize);
@@ -3147,7 +2664,7 @@ void mpv_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
                 put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
                 put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
 
-                if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+                if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
                     if(s->chroma_y_shift){
                         put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
                         put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
@@ -3166,7 +2683,7 @@ void mpv_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
                 s->idsp.idct_put(dest_y + dct_offset,              dct_linesize, block[2]);
                 s->idsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
 
-                if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+                if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
                     if(s->chroma_y_shift){
                         s->idsp.idct_put(dest_cb, uvlinesize, block[4]);
                         s->idsp.idct_put(dest_cr, uvlinesize, block[5]);
@@ -3192,7 +2709,7 @@ void mpv_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
 skip_idct:
         if(!readable){
             s->hdsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
-            if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
                 s->hdsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
                 s->hdsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
             }
@@ -3251,35 +2768,6 @@ void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
     }
 }
 
-/**
- * Permute an 8x8 block.
- * @param block the block which will be permuted according to the given permutation vector
- * @param permutation the permutation vector
- * @param last the last non zero coefficient in scantable order, used to speed the permutation up
- * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
- *                  (inverse) permutated to scantable order!
- */
-void ff_block_permute(int16_t *block, uint8_t *permutation, const uint8_t *scantable, int last)
-{
-    int i;
-    int16_t temp[64];
-
-    if(last<=0) return;
-    //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
-
-    for(i=0; i<=last; i++){
-        const int j= scantable[i];
-        temp[j]= block[j];
-        block[j]=0;
-    }
-
-    for(i=0; i<=last; i++){
-        const int j= scantable[i];
-        const int perm_j= permutation[j];
-        block[perm_j]= temp[j];
-    }
-}
-
 void ff_mpeg_flush(AVCodecContext *avctx){
     int i;
     MpegEncContext *s = avctx->priv_data;
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 749c088a..df5e4bf5 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -39,8 +39,10 @@
 #include "h263dsp.h"
 #include "hpeldsp.h"
 #include "idctdsp.h"
+#include "internal.h"
 #include "me_cmp.h"
 #include "motion_est.h"
+#include "mpegpicture.h"
 #include "mpegvideodsp.h"
 #include "mpegvideoencdsp.h"
 #include "pixblockdsp.h"
@@ -56,23 +58,10 @@
 #include "libavutil/opt.h"
 #include "libavutil/timecode.h"
 
-#define FRAME_SKIPPED 100 ///< return value for header parsers if frame is not coded
-
-#define MAX_FCODE 7
-
 #define MAX_THREADS 32
-#define MAX_PICTURE_COUNT 36
 
 #define MAX_B_FRAMES 16
 
-#define ME_MAP_SIZE 64
-
-#define MAX_MB_BYTES (30*16*16*3/8 + 120)
-
-#define INPLACE_OFFSET 16
-
-#define EDGE_WIDTH 16
-
 /* Start codes. */
 #define SEQ_END_CODE            0x000001b7
 #define SEQ_START_CODE          0x000001b3
@@ -83,67 +72,6 @@
 #define EXT_START_CODE          0x000001b5
 #define USER_START_CODE         0x000001b2
 
-/**
- * Picture.
- */
-typedef struct Picture{
-    struct AVFrame *f;
-    ThreadFrame tf;
-
-    AVBufferRef *qscale_table_buf;
-    int8_t *qscale_table;
-
-    AVBufferRef *motion_val_buf[2];
-    int16_t (*motion_val[2])[2];
-
-    AVBufferRef *mb_type_buf;
-    uint32_t *mb_type;          ///< types and macros are defined in mpegutils.h
-
-    AVBufferRef *mbskip_table_buf;
-    uint8_t *mbskip_table;
-
-    AVBufferRef *ref_index_buf[2];
-    int8_t *ref_index[2];
-
-    AVBufferRef *mb_var_buf;
-    uint16_t *mb_var;           ///< Table for MB variances
-
-    AVBufferRef *mc_mb_var_buf;
-    uint16_t *mc_mb_var;        ///< Table for motion compensated MB variances
-
-    int alloc_mb_width;         ///< mb_width used to allocate tables
-    int alloc_mb_height;        ///< mb_height used to allocate tables
-
-    AVBufferRef *mb_mean_buf;
-    uint8_t *mb_mean;           ///< Table for MB luminance
-
-    AVBufferRef *hwaccel_priv_buf;
-    /**
-     * hardware accelerator private data
-     */
-    void *hwaccel_picture_private;
-
-    int field_picture;          ///< whether or not the picture was encoded in separate fields
-
-    int64_t mb_var_sum;         ///< sum of MB variance for current frame
-    int64_t mc_mb_var_sum;      ///< motion compensated MB variance for current frame
-
-    int b_frame_score;
-    int needs_realloc;          ///< Picture needs to be reallocated (eg due to a frame size change)
-
-    int reference;
-    int shared;
-
-    uint64_t error[AV_NUM_DATA_POINTERS];
-} Picture;
-
-typedef struct ScratchpadContext {
-    uint8_t *edge_emu_buffer;     ///< temporary buffer for if MVs point to out-of-frame data
-    uint8_t *rd_scratchpad;       ///< scratchpad for rate distortion mb decision
-    uint8_t *obmc_scratchpad;
-    uint8_t *b_scratchpad;        ///< scratchpad used for writing into write only buffers
-} ScratchpadContext;
-
 /**
  * MpegEncContext.
  */
@@ -169,7 +97,7 @@ typedef struct MpegEncContext {
     int width, height;///< picture size. must be a multiple of 16
     int gop_size;
     int intra_only;   ///< if true, only intra pictures are generated
-    int bit_rate;     ///< wanted bit rate
+    int64_t bit_rate; ///< wanted bit rate
     enum OutputFormat out_format; ///< output format
     int h263_pred;    ///< use mpeg4/h263 ac/dc predictions
     int pb_frame;     ///< PB frame mode (0 = none, 1 = base, 2 = improved)
@@ -324,7 +252,12 @@ typedef struct MpegEncContext {
     int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced b-frame encoding
     uint8_t (*p_field_select_table[2]);
     uint8_t (*b_field_select_table[2][2]);
+#if FF_API_MOTION_EST
     int me_method;                       ///< ME algorithm
+#endif
+    int motion_est;                      ///< ME algorithm
+    int me_penalty_compensation;
+    int me_pre;                          ///< prepass for motion estimation
     int mv_dir;
 #define MV_DIR_FORWARD   1
 #define MV_DIR_BACKWARD  2
@@ -368,6 +301,7 @@ typedef struct MpegEncContext {
     uint16_t chroma_intra_matrix[64];
     uint16_t inter_matrix[64];
     uint16_t chroma_inter_matrix[64];
+    int force_duplicated_matrix; ///< Force duplication of mjpeg matrices, useful for rtp streaming
 
     int intra_quant_bias;    ///< bias for the quantizer
     int inter_quant_bias;    ///< bias for the quantizer
@@ -435,6 +369,7 @@ typedef struct MpegEncContext {
     uint8_t *mb_info_ptr;
     int mb_info_size;
     int ehc_mode;
+    int rc_strategy;
 
     /* H.263+ specific */
     int umvplus;                    ///< == H263+ && unrestricted_mv
@@ -486,6 +421,7 @@ typedef struct MpegEncContext {
     /* MJPEG specific */
     struct MJpegContext *mjpeg_ctx;
     int esc_pos;
+    int pred;
 
     /* MSMPEG4 specific */
     int mv_table_index;
@@ -520,11 +456,13 @@ typedef struct MpegEncContext {
     // picture structure defines are loaded from mpegutils.h
     int picture_structure;
 
+    int64_t timecode_frame_start; ///< GOP timecode frame start number, in non drop frame format
     int intra_dc_precision;
     int frame_pred_frame_dct;
     int top_field_first;
     int concealment_motion_vectors;
     int q_scale_type;
+    int brd_scale;
     int intra_vlc_format;
     int alternate_scan;
     int seq_disp_ext;
@@ -546,6 +484,7 @@ typedef struct MpegEncContext {
 
     /* RTP specific */
     int rtp_mode;
+    int rtp_payload_size;
 
     char *tc_opt_str;        ///< timecode option string
     AVTimecode tc;           ///< timecode context
@@ -597,6 +536,7 @@ typedef struct MpegEncContext {
     float rc_buffer_aggressivity;
     float border_masking;
     int lmin, lmax;
+    int vbv_ignore_qmax;
 
     char *rc_eq;
 
@@ -613,6 +553,17 @@ typedef struct MpegEncContext {
 
     /* temporary frames used by b_frame_strategy = 2 */
     AVFrame *tmp_frames[MAX_B_FRAMES + 2];
+    int b_frame_strategy;
+    int b_sensitivity;
+
+    /* frame skip options for encoding */
+    int frame_skip_threshold;
+    int frame_skip_factor;
+    int frame_skip_exp;
+    int frame_skip_cmp;
+
+    int scenechange_threshold;
+    int noise_reduction;
 } MpegEncContext;
 
 /* mpegvideo_enc common options */
@@ -623,11 +574,34 @@ typedef struct MpegEncContext {
 #define FF_MPV_FLAG_NAQ          0x0010
 #define FF_MPV_FLAG_MV0          0x0020
 
+enum rc_strategy {
+    MPV_RC_STRATEGY_FFMPEG,
+    MPV_RC_STRATEGY_XVID,
+    NB_MPV_RC_STRATEGY
+};
+
+#define FF_MPV_OPT_CMP_FUNC \
+{ "sad",    "Sum of absolute differences, fast", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SAD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "sse",    "Sum of squared errors", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "satd",   "Sum of absolute Hadamard transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SATD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "dct",    "Sum of absolute DCT transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "psnr",   "Sum of squared quantization errors, low quality", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_PSNR }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "bit",    "Number of bits needed for the block", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_BIT }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "rd",     "Rate distortion optimal, slow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_RD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "zero",   "Zero", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_ZERO }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "vsad",   "Sum of absolute vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "vsse",   "Sum of squared vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "nsse",   "Noise preserving sum of squared differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_NSSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "dct264", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT264 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }
+
 #ifndef FF_MPV_OFFSET
 #define FF_MPV_OFFSET(x) offsetof(MpegEncContext, x)
 #endif
 #define FF_MPV_OPT_FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
 #define FF_MPV_COMMON_OPTS \
+FF_MPV_OPT_CMP_FUNC, \
 { "mpv_flags",      "Flags common for all mpegvideo-based encoders.", FF_MPV_OFFSET(mpv_flags), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "mpv_flags" },\
 { "skip_rd",        "RD optimal MB level residual skipping", 0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_SKIP_RD },    0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\
 { "strict_gop",     "Strictly enforce gop size",             0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_STRICT_GOP }, 0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\
@@ -656,17 +630,32 @@ typedef struct MpegEncContext {
 {"border_mask", "increase the quantizer for macroblocks close to borders", FF_MPV_OFFSET(border_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, FF_MPV_OPT_FLAGS},    \
 {"lmin", "minimum Lagrange factor (VBR)",                           FF_MPV_OFFSET(lmin), AV_OPT_TYPE_INT, {.i64 =  2*FF_QP2LAMBDA }, 0, INT_MAX, FF_MPV_OPT_FLAGS },            \
 {"lmax", "maximum Lagrange factor (VBR)",                           FF_MPV_OFFSET(lmax), AV_OPT_TYPE_INT, {.i64 = 31*FF_QP2LAMBDA }, 0, INT_MAX, FF_MPV_OPT_FLAGS },            \
+{"ibias", "intra quant bias",                                       FF_MPV_OFFSET(intra_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS },   \
+{"pbias", "inter quant bias",                                       FF_MPV_OFFSET(inter_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS },   \
+{"rc_strategy", "ratecontrol method",                               FF_MPV_OFFSET(rc_strategy), AV_OPT_TYPE_INT, {.i64 = MPV_RC_STRATEGY_FFMPEG }, 0, NB_MPV_RC_STRATEGY-1, FF_MPV_OPT_FLAGS, "rc_strategy" },   \
+    { "ffmpeg", "default native rate control", 0, AV_OPT_TYPE_CONST, { .i64 = MPV_RC_STRATEGY_FFMPEG }, 0, 0, FF_MPV_OPT_FLAGS, "rc_strategy" }, \
+    { "xvid",   "libxvid (2 pass only)",       0, AV_OPT_TYPE_CONST, { .i64 = MPV_RC_STRATEGY_XVID },   0, 0, FF_MPV_OPT_FLAGS, "rc_strategy" }, \
+{"motion_est", "motion estimation algorithm",                       FF_MPV_OFFSET(motion_est), AV_OPT_TYPE_INT, {.i64 = FF_ME_EPZS }, FF_ME_ZERO, FF_ME_XONE, FF_MPV_OPT_FLAGS, "motion_est" },   \
+{ "zero", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ZERO }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" }, \
+{ "epzs", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_EPZS }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" }, \
+{ "xone", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_XONE }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" }, \
+{ "force_duplicated_matrix", "Always write luma and chroma matrix for mjpeg, useful for rtp streaming.", FF_MPV_OFFSET(force_duplicated_matrix), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FF_MPV_OPT_FLAGS },   \
+{"b_strategy", "Strategy to choose between I/P/B-frames",           FF_MPV_OFFSET(b_frame_strategy), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 2, FF_MPV_OPT_FLAGS }, \
+{"b_sensitivity", "Adjust sensitivity of b_frame_strategy 1",       FF_MPV_OFFSET(b_sensitivity), AV_OPT_TYPE_INT, {.i64 = 40 }, 1, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"brd_scale", "Downscale frames for dynamic B-frame decision",      FF_MPV_OFFSET(brd_scale), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 3, FF_MPV_OPT_FLAGS }, \
+{"skip_threshold", "Frame skip threshold",                          FF_MPV_OFFSET(frame_skip_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"skip_factor", "Frame skip factor",                                FF_MPV_OFFSET(frame_skip_factor), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"skip_exp", "Frame skip exponent",                                 FF_MPV_OFFSET(frame_skip_exp), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"skip_cmp", "Frame skip compare function",                         FF_MPV_OFFSET(frame_skip_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{"sc_threshold", "Scene change threshold",                          FF_MPV_OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"noise_reduction", "Noise reduction",                              FF_MPV_OFFSET(noise_reduction), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"mpeg_quant", "Use MPEG quantizers instead of H.263",              FF_MPV_OFFSET(mpeg_quant), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, FF_MPV_OPT_FLAGS }, \
+{"ps", "RTP payload size in bytes",                             FF_MPV_OFFSET(rtp_payload_size), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"mepc", "Motion estimation bitrate penalty compensation (1.0 = 256)", FF_MPV_OFFSET(me_penalty_compensation), AV_OPT_TYPE_INT, {.i64 = 256 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"mepre", "pre motion estimation", FF_MPV_OFFSET(me_pre), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
 
 extern const AVOption ff_mpv_generic_options[];
 
-#define FF_MPV_GENERIC_CLASS(name) \
-static const AVClass name ## _class = {\
-    .class_name = #name " encoder",\
-    .item_name  = av_default_item_name,\
-    .option     = ff_mpv_generic_options,\
-    .version    = LIBAVUTIL_VERSION_INT,\
-};
-
 /**
  * Set the given MpegEncContext to common defaults (same for encoding
  * and decoding).  The changed fields will not depend upon the prior
@@ -682,6 +671,7 @@ void ff_mpv_common_init_axp(MpegEncContext *s);
 void ff_mpv_common_init_neon(MpegEncContext *s);
 void ff_mpv_common_init_ppc(MpegEncContext *s);
 void ff_mpv_common_init_x86(MpegEncContext *s);
+void ff_mpv_common_init_mips(MpegEncContext *s);
 
 int ff_mpv_common_frame_size_change(MpegEncContext *s);
 void ff_mpv_common_end(MpegEncContext *s);
@@ -694,8 +684,6 @@ void ff_mpv_report_decode_progress(MpegEncContext *s);
 int ff_mpv_frame_start(MpegEncContext *s, AVCodecContext *avctx);
 void ff_mpv_frame_end(MpegEncContext *s);
 
-int ff_mpv_lowest_referenced_row(MpegEncContext *s, int dir);
-
 int ff_mpv_encode_init(AVCodecContext *avctx);
 void ff_mpv_encode_init_x86(MpegEncContext *s);
 
@@ -718,7 +706,6 @@ int ff_mpv_export_qp_table(MpegEncContext *s, AVFrame *f, Picture *p, int qp_typ
 
 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix);
 
-int ff_find_unused_picture(AVCodecContext *avctx, Picture *picture, int shared);
 int ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src);
 int ff_mpeg_update_thread_context(AVCodecContext *dst, const AVCodecContext *src);
 void ff_set_qscale(MpegEncContext * s, int qscale);
@@ -728,7 +715,8 @@ int ff_dct_encode_init(MpegEncContext *s);
 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[2][64],
                        const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra);
 int ff_dct_quantize_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
-
+void ff_block_permute(int16_t *block, uint8_t *permutation,
+                      const uint8_t *scantable, int last);
 void ff_init_block_index(MpegEncContext *s);
 
 void ff_mpv_motion(MpegEncContext *s,
@@ -738,24 +726,6 @@ void ff_mpv_motion(MpegEncContext *s,
                    op_pixels_func (*pix_op)[4],
                    qpel_mc_func (*qpix_op)[16]);
 
-/**
- * Allocate a Picture.
- * The pixels are allocated/set by calling get_buffer() if shared = 0.
- */
-int ff_alloc_picture(AVCodecContext *avctx, Picture *pic, MotionEstContext *me,
-                     ScratchpadContext *sc, int shared, int encoding,
-                     int chroma_x_shift, int chroma_y_shift, int out_format,
-                     int mb_stride, int mb_width, int mb_height, int b8_stride,
-                     ptrdiff_t *linesize, ptrdiff_t *uvlinesize);
-
-int ff_mpeg_framesize_alloc(AVCodecContext *avctx, MotionEstContext *me,
-                            ScratchpadContext *sc, int linesize);
-/**
- * permute block according to permuatation.
- * @param last last non zero element in scantable order
- */
-void ff_block_permute(int16_t *block, uint8_t *permutation, const uint8_t *scantable, int last);
-
 static inline void ff_update_block_index(MpegEncContext *s){
     const int block_size= 8 >> s->avctx->lowres;
 
@@ -779,14 +749,4 @@ static inline int get_bits_diff(MpegEncContext *s){
     return bits - last;
 }
 
-/* rv10.c */
-int ff_rv10_encode_picture_header(MpegEncContext *s, int picture_number);
-int ff_rv_decode_dc(MpegEncContext *s, int n);
-void ff_rv20_encode_picture_header(MpegEncContext *s, int picture_number);
-
-int ff_mpeg_ref_picture(AVCodecContext *avctx, Picture *dst, Picture *src);
-void ff_mpeg_unref_picture(AVCodecContext *avctx, Picture *picture);
-void ff_free_picture_tables(Picture *pic);
-
-
 #endif /* AVCODEC_MPEGVIDEO_H */
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 49fc083d..297ff788 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -22,6 +22,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+/*
+ * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
+ */
+
 /**
  * @file
  * The simplest mpeg encoder (well, it was the simplest!).
@@ -43,6 +47,7 @@
 #include "mpegvideodata.h"
 #include "h261.h"
 #include "h263.h"
+#include "h263data.h"
 #include "mjpegenc_common.h"
 #include "mathops.h"
 #include "mpegutils.h"
@@ -58,6 +63,7 @@
 #include "internal.h"
 #include "bytestream.h"
 #include "wmv2.h"
+#include "rv10.h"
 #include <limits.h>
 #include "sp5x.h"
 
@@ -72,7 +78,7 @@ static int sse_mb(MpegEncContext *s);
 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
 
-static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
+static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_DMV * 2 + 1];
 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
 
 const AVOption ff_mpv_generic_options[] = {
@@ -91,6 +97,11 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
 
     for (qscale = qmin; qscale <= qmax; qscale++) {
         int i;
+        int qscale2;
+
+        if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
+        else                 qscale2 = qscale << 1;
+
         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
 #if CONFIG_FAANDCT
             fdsp->fdct == ff_faandct            ||
@@ -98,46 +109,46 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
             fdsp->fdct == ff_jpeg_fdct_islow_10) {
             for (i = 0; i < 64; i++) {
                 const int j = s->idsp.idct_permutation[i];
-                int64_t den = (int64_t) qscale * quant_matrix[j];
+                int64_t den = (int64_t) qscale2 * quant_matrix[j];
                 /* 16 <= qscale * quant_matrix[i] <= 7905
                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
                  *             19952 <=              x  <= 249205026
                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
                  *           3444240 >= (1 << 36) / (x) >= 275 */
 
-                qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
+                qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
             }
         } else if (fdsp->fdct == ff_fdct_ifast) {
             for (i = 0; i < 64; i++) {
                 const int j = s->idsp.idct_permutation[i];
-                int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
+                int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
                 /* 16 <= qscale * quant_matrix[i] <= 7905
                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
                  *             19952 <=              x  <= 249205026
                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
                  *           3444240 >= (1 << 36) / (x) >= 275 */
 
-                qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
+                qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
             }
         } else {
             for (i = 0; i < 64; i++) {
                 const int j = s->idsp.idct_permutation[i];
-                int64_t den = (int64_t) qscale * quant_matrix[j];
+                int64_t den = (int64_t) qscale2 * quant_matrix[j];
                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
                  * Assume x = qscale * quant_matrix[i]
                  * So             16 <=              x  <= 7905
                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
                  * so          32768 >= (1 << 19) / (x) >= 67 */
-                qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
+                qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
                 //                    (qscale * quant_matrix[i]);
-                qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
+                qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
 
                 if (qmat16[qscale][0][i] == 0 ||
                     qmat16[qscale][0][i] == 128 * 256)
                     qmat16[qscale][0][i] = 128 * 256 - 1;
                 qmat16[qscale][1][i] =
-                    ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
+                    ROUNDED_DIV(bias * (1<<(16 - QUANT_BIAS_SHIFT)),
                                 qmat16[qscale][0][i]);
             }
         }
@@ -161,9 +172,27 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
 
 static inline void update_qscale(MpegEncContext *s)
 {
-    s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
-                (FF_LAMBDA_SHIFT + 7);
-    s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
+    if (s->q_scale_type == 1 && 0) {
+        int i;
+        int bestdiff=INT_MAX;
+        int best = 1;
+
+        for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
+            int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
+            if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
+                (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
+                continue;
+            if (diff < bestdiff) {
+                bestdiff = diff;
+                best = i;
+            }
+        }
+        s->qscale = best;
+    } else {
+        s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
+                    (FF_LAMBDA_SHIFT + 7);
+        s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
+    }
 
     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
                  FF_LAMBDA_SHIFT;
@@ -257,6 +286,7 @@ av_cold int ff_dct_encode_init(MpegEncContext *s) {
 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 {
     MpegEncContext *s = avctx->priv_data;
+    AVCPBProperties *cpb_props;
     int i, ret, format_supported;
 
     mpv_encode_defaults(s);
@@ -317,6 +347,19 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
         break;
     }
 
+    avctx->bits_per_raw_sample = av_clip(avctx->bits_per_raw_sample, 0, 8);
+
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->rtp_payload_size)
+        s->rtp_payload_size = avctx->rtp_payload_size;
+    if (avctx->me_penalty_compensation)
+        s->me_penalty_compensation = avctx->me_penalty_compensation;
+    if (avctx->pre_me)
+        s->me_pre = avctx->pre_me;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     s->bit_rate = avctx->bit_rate;
     s->width    = avctx->width;
     s->height   = avctx->height;
@@ -337,9 +380,8 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
     s->max_b_frames = avctx->max_b_frames;
     s->codec_id     = avctx->codec->id;
     s->strict_std_compliance = avctx->strict_std_compliance;
-    s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
-    s->mpeg_quant         = avctx->mpeg_quant;
-    s->rtp_mode           = !!avctx->rtp_payload_size;
+    s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
+    s->rtp_mode           = !!s->rtp_payload_size;
     s->intra_dc_precision = avctx->intra_dc_precision;
 
     // workaround some differences between how applications specify dc precision
@@ -368,10 +410,14 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
         s->intra_only = 0;
     }
 
+#if FF_API_MOTION_EST
+FF_DISABLE_DEPRECATION_WARNINGS
     s->me_method = avctx->me_method;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     /* Fixed QSCALE */
-    s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
+    s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
 
 #if FF_API_MPV_OPT
     FF_DISABLE_DEPRECATION_WARNINGS
@@ -389,7 +435,7 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
                         !s->fixed_qscale;
 
-    s->loop_filter = !!(s->avctx->flags & CODEC_FLAG_LOOP_FILTER);
+    s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
 
     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
         switch(avctx->codec_id) {
@@ -455,7 +501,7 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
         avctx->bit_rate * av_q2d(avctx->time_base) >
             avctx->bit_rate_tolerance) {
         av_log(avctx, AV_LOG_WARNING,
-               "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
+               "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, (int64_t)avctx->bit_rate);
         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
     }
 
@@ -470,7 +516,7 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
                "specified vbv buffer is too large for the given bitrate!\n");
     }
 
-    if ((s->avctx->flags & CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
+    if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
         s->codec_id != AV_CODEC_ID_FLV1) {
         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
@@ -563,12 +609,19 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
          return -1;
     }
 
-    if ((s->avctx->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
+    if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
         return -1;
     }
 
+#if FF_API_PRIVATE_OPT
+    FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->mpeg_quant)
+        s->mpeg_quant = avctx->mpeg_quant;
+    FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     // FIXME mpeg2 uses that too
     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
@@ -588,15 +641,22 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
         return -1;
     }
 
-    if (s->avctx->scenechange_threshold < 1000000000 &&
-        (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)) {
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->scenechange_threshold)
+        s->scenechange_threshold = avctx->scenechange_threshold;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (s->scenechange_threshold < 1000000000 &&
+        (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
         av_log(avctx, AV_LOG_ERROR,
                "closed gop with scene change detection are not supported yet, "
                "set threshold to 1000000000\n");
         return -1;
     }
 
-    if (s->avctx->flags & CODEC_FLAG_LOW_DELAY) {
+    if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
             av_log(avctx, AV_LOG_ERROR,
                   "low delay forcing is only available for mpeg2\n");
@@ -610,13 +670,19 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
     }
 
     if (s->q_scale_type == 1) {
-        if (avctx->qmax > 12) {
+        if (avctx->qmax > 28) {
             av_log(avctx, AV_LOG_ERROR,
-                   "non linear quant only supports qmax <= 12 currently\n");
+                   "non linear quant only supports qmax <= 28 currently\n");
             return -1;
         }
     }
 
+    if (avctx->slices > 1 &&
+        (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
+        av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
+        return AVERROR(EINVAL);
+    }
+
     if (s->avctx->thread_count > 1         &&
         s->codec_id != AV_CODEC_ID_MPEG4      &&
         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
@@ -635,21 +701,24 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
         return -1;
     }
 
-    if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
-        s->rtp_mode = 1;
-
-    if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
-        s->h263_slice_structured = 1;
-
     if (!avctx->time_base.den || !avctx->time_base.num) {
         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
         return -1;
     }
 
-    if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->b_frame_strategy)
+        s->b_frame_strategy = avctx->b_frame_strategy;
+    if (avctx->b_sensitivity != 40)
+        s->b_sensitivity = avctx->b_sensitivity;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
         av_log(avctx, AV_LOG_INFO,
                "notice: b_frame_strategy only affects the first pass\n");
-        avctx->b_frame_strategy = 0;
+        s->b_frame_strategy = 0;
     }
 
     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
@@ -675,10 +744,14 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
+#if FF_API_QUANT_BIAS
+FF_DISABLE_DEPRECATION_WARNINGS
     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
         s->intra_quant_bias = avctx->intra_quant_bias;
     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
         s->inter_quant_bias = avctx->inter_quant_bias;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 
@@ -696,12 +769,12 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
     switch (avctx->codec->id) {
     case AV_CODEC_ID_MPEG1VIDEO:
         s->out_format = FMT_MPEG1;
-        s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
+        s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
         break;
     case AV_CODEC_ID_MPEG2VIDEO:
         s->out_format = FMT_MPEG1;
-        s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
+        s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
         s->rtp_mode   = 1;
         break;
@@ -750,9 +823,9 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
         s->out_format = FMT_H263;
         s->h263_plus  = 1;
         /* Fx */
-        s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
+        s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
         s->modified_quant  = s->h263_aic;
-        s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
+        s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 
         /* /Fx */
@@ -829,13 +902,20 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
         return -1;
     }
 
+#if FF_API_PRIVATE_OPT
+    FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->noise_reduction)
+        s->noise_reduction = avctx->noise_reduction;
+    FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     avctx->has_b_frames = !s->low_delay;
 
     s->encoding = 1;
 
     s->progressive_frame    =
-    s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
-                                                CODEC_FLAG_INTERLACED_ME) ||
+    s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
+                                                AV_CODEC_FLAG_INTERLACED_ME) ||
                                 s->alternate_scan);
 
     /* init */
@@ -849,8 +929,6 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
     ff_pixblockdsp_init(&s->pdsp, avctx);
     ff_qpeldsp_init(&s->qdsp);
 
-    s->avctx->coded_frame = s->current_picture.f;
-
     if (s->msmpeg4_version) {
         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
                           2 * 2 * (MAX_LEVEL + 1) *
@@ -869,7 +947,8 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 
-    if (s->avctx->noise_reduction) {
+
+    if (s->noise_reduction) {
         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
                           2 * 64 * sizeof(uint16_t), fail);
     }
@@ -879,10 +958,30 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 
+    if (s->slice_context_count > 1) {
+        s->rtp_mode = 1;
+
+        if (avctx->codec_id == AV_CODEC_ID_H263P)
+            s->h263_slice_structured = 1;
+    }
+
     s->quant_precision = 5;
 
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->frame_skip_threshold)
+        s->frame_skip_threshold = avctx->frame_skip_threshold;
+    if (avctx->frame_skip_factor)
+        s->frame_skip_factor = avctx->frame_skip_factor;
+    if (avctx->frame_skip_exp)
+        s->frame_skip_exp = avctx->frame_skip_exp;
+    if (avctx->frame_skip_cmp != FF_CMP_DCTMAX)
+        s->frame_skip_cmp = avctx->frame_skip_cmp;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
-    ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
+    ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
 
     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
         ff_h261_encode_init(s);
@@ -978,15 +1077,25 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
     FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
-    if (avctx->b_frame_strategy == 2) {
+#if FF_API_PRIVATE_OPT
+    FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->brd_scale)
+        s->brd_scale = avctx->brd_scale;
+
+    if (avctx->prediction_method)
+        s->pred = avctx->prediction_method + 1;
+    FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (s->b_frame_strategy == 2) {
         for (i = 0; i < s->max_b_frames + 2; i++) {
             s->tmp_frames[i] = av_frame_alloc();
             if (!s->tmp_frames[i])
                 return AVERROR(ENOMEM);
 
             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
-            s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
-            s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
+            s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
+            s->tmp_frames[i]->height = s->height >> s->brd_scale;
 
             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
             if (ret < 0)
@@ -994,6 +1103,14 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
         }
     }
 
+    cpb_props = ff_add_cpb_side_data(avctx);
+    if (!cpb_props)
+        return AVERROR(ENOMEM);
+    cpb_props->max_bitrate = avctx->rc_max_rate;
+    cpb_props->min_bitrate = avctx->rc_min_rate;
+    cpb_props->avg_bitrate = avctx->bit_rate;
+    cpb_props->buffer_size = avctx->rc_buffer_size;
+
     return 0;
 fail:
     ff_mpv_encode_end(avctx);
@@ -1088,8 +1205,9 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
     Picture *pic = NULL;
     int64_t pts;
     int i, display_picture_number = 0, ret;
-    const int encoding_delay = s->max_b_frames ? s->max_b_frames :
-                                                 (s->low_delay ? 0 : 1);
+    int encoding_delay = s->max_b_frames ? s->max_b_frames
+                                         : (s->low_delay ? 0 : 1);
+    int flush_offset = 1;
     int direct = 1;
 
     if (pic_arg) {
@@ -1122,9 +1240,7 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
                 pts = display_picture_number;
             }
         }
-    }
 
-    if (pic_arg) {
         if (!pic_arg->buf[0] ||
             pic_arg->linesize[0] != s->linesize ||
             pic_arg->linesize[1] != s->uvlinesize ||
@@ -1212,11 +1328,22 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 
         pic->f->display_picture_number = display_picture_number;
         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
+    } else {
+        /* Flushing: When we have not received enough input frames,
+         * ensure s->input_picture[0] contains the first picture */
+        for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
+            if (s->input_picture[flush_offset])
+                break;
+
+        if (flush_offset <= 1)
+            flush_offset = 1;
+        else
+            encoding_delay = encoding_delay - flush_offset + 1;
     }
 
     /* shift buffer entries */
-    for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
-        s->input_picture[i - 1] = s->input_picture[i];
+    for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
+        s->input_picture[i - flush_offset] = s->input_picture[i];
 
     s->input_picture[encoding_delay] = (Picture*) pic;
 
@@ -1239,7 +1366,7 @@ static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
 
-                switch (FFABS(s->avctx->frame_skip_exp)) {
+                switch (FFABS(s->frame_skip_exp)) {
                 case 0: score    =  FFMAX(score, v);          break;
                 case 1: score   += FFABS(v);                  break;
                 case 2: score64 += v * (int64_t)v;                       break;
@@ -1253,13 +1380,13 @@ static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
 
     if (score)
         score64 = score;
-    if (s->avctx->frame_skip_exp < 0)
+    if (s->frame_skip_exp < 0)
         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
-                      -1.0/s->avctx->frame_skip_exp);
+                      -1.0/s->frame_skip_exp);
 
-    if (score64 < s->avctx->frame_skip_threshold)
+    if (score64 < s->frame_skip_threshold)
         return 1;
-    if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
+    if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
         return 1;
     return 0;
 }
@@ -1275,7 +1402,7 @@ static int encode_frame(AVCodecContext *c, AVFrame *frame)
         return ret;
 
     ret = pkt.size;
-    av_free_packet(&pkt);
+    av_packet_unref(&pkt);
     return ret;
 }
 
@@ -1283,7 +1410,7 @@ static int estimate_best_b_count(MpegEncContext *s)
 {
     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
     AVCodecContext *c = avcodec_alloc_context3(NULL);
-    const int scale = s->avctx->brd_scale;
+    const int scale = s->brd_scale;
     int i, j, out_size, p_lambda, b_lambda, lambda2;
     int64_t best_rd  = INT64_MAX;
     int best_b_count = -1;
@@ -1304,8 +1431,8 @@ static int estimate_best_b_count(MpegEncContext *s)
 
     c->width        = s->width  >> scale;
     c->height       = s->height >> scale;
-    c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
-    c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
+    c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
+    c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
     c->mb_decision  = s->avctx->mb_decision;
     c->me_cmp       = s->avctx->me_cmp;
     c->mb_cmp       = s->avctx->mb_cmp;
@@ -1407,7 +1534,7 @@ static int select_input_picture(MpegEncContext *s)
 
     /* set next picture type & ordering */
     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
-        if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
+        if (s->frame_skip_threshold || s->frame_skip_factor) {
             if (s->picture_in_gop_number < s->gop_size &&
                 s->next_picture_ptr &&
                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
@@ -1427,9 +1554,9 @@ static int select_input_picture(MpegEncContext *s)
             s->reordered_input_picture[0]->f->coded_picture_number =
                 s->coded_picture_number++;
         } else {
-            int b_frames;
+            int b_frames = 0;
 
-            if (s->avctx->flags & CODEC_FLAG_PASS2) {
+            if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
                 for (i = 0; i < s->max_b_frames + 1; i++) {
                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
 
@@ -1445,11 +1572,11 @@ static int select_input_picture(MpegEncContext *s)
                 }
             }
 
-            if (s->avctx->b_frame_strategy == 0) {
+            if (s->b_frame_strategy == 0) {
                 b_frames = s->max_b_frames;
                 while (b_frames && !s->input_picture[b_frames])
                     b_frames--;
-            } else if (s->avctx->b_frame_strategy == 1) {
+            } else if (s->b_frame_strategy == 1) {
                 for (i = 1; i < s->max_b_frames + 1; i++) {
                     if (s->input_picture[i] &&
                         s->input_picture[i]->b_frame_score == 0) {
@@ -1463,7 +1590,7 @@ static int select_input_picture(MpegEncContext *s)
                 for (i = 0; i < s->max_b_frames + 1; i++) {
                     if (!s->input_picture[i] ||
                         s->input_picture[i]->b_frame_score - 1 >
-                            s->mb_num / s->avctx->b_sensitivity)
+                            s->mb_num / s->b_sensitivity)
                         break;
                 }
 
@@ -1473,11 +1600,8 @@ static int select_input_picture(MpegEncContext *s)
                 for (i = 0; i < b_frames + 1; i++) {
                     s->input_picture[i]->b_frame_score = 0;
                 }
-            } else if (s->avctx->b_frame_strategy == 2) {
+            } else if (s->b_frame_strategy == 2) {
                 b_frames = estimate_best_b_count(s);
-            } else {
-                av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
-                b_frames = 0;
             }
 
             emms_c();
@@ -1498,13 +1622,13 @@ static int select_input_picture(MpegEncContext *s)
                     s->gop_size > s->picture_in_gop_number) {
                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
                 } else {
-                    if (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)
+                    if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
                         b_frames = 0;
                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
                 }
             }
 
-            if ((s->avctx->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
+            if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
                 b_frames--;
 
@@ -1523,12 +1647,13 @@ static int select_input_picture(MpegEncContext *s)
         }
     }
 no_output_pic:
+    ff_mpeg_unref_picture(s->avctx, &s->new_picture);
+
     if (s->reordered_input_picture[0]) {
         s->reordered_input_picture[0]->reference =
            s->reordered_input_picture[0]->f->pict_type !=
                AV_PICTURE_TYPE_B ? 3 : 0;
 
-        ff_mpeg_unref_picture(s->avctx, &s->new_picture);
         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
             return ret;
 
@@ -1569,8 +1694,6 @@ static int select_input_picture(MpegEncContext *s)
             return ret;
 
         s->picture_number = s->new_picture.f->display_picture_number;
-    } else {
-        ff_mpeg_unref_picture(s->avctx, &s->new_picture);
     }
     return 0;
 }
@@ -1611,8 +1734,17 @@ static void frame_end(MpegEncContext *s)
     if (s->pict_type!= AV_PICTURE_TYPE_B)
         s->last_non_b_pict_type = s->pict_type;
 
-    s->avctx->coded_frame = s->current_picture_ptr->f;
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+#if FF_API_ERROR_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
+           sizeof(s->current_picture.encoding_error));
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 }
 
 static void update_noise_reduction(MpegEncContext *s)
@@ -1628,7 +1760,7 @@ static void update_noise_reduction(MpegEncContext *s)
         }
 
         for (i = 0; i < 64; i++) {
-            s->dct_offset[intra][i] = (s->avctx->noise_reduction *
+            s->dct_offset[intra][i] = (s->noise_reduction *
                                        s->dct_count[intra] +
                                        s->dct_error_sum[intra][i] / 2) /
                                       (s->dct_error_sum[intra][i] + 1);
@@ -1701,7 +1833,7 @@ static int frame_start(MpegEncContext *s)
     }
 
     if (s->dct_error_sum) {
-        av_assert2(s->avctx->noise_reduction && s->encoding);
+        av_assert2(s->noise_reduction && s->encoding);
         update_noise_reduction(s);
     }
 
@@ -1715,6 +1847,8 @@ int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
     int i, stuffing_count, ret;
     int context_count = s->slice_context_count;
 
+    s->vbv_ignore_qmax = 0;
+
     s->picture_in_gop_number++;
 
     if (load_input_picture(s, pic_arg) < 0)
@@ -1727,10 +1861,10 @@ int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
     /* output? */
     if (s->new_picture.f->data[0]) {
         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
-        int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
+        int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
                                               :
                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
-        if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
+        if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
             return ret;
         if (s->mb_info) {
             s->mb_info_ptr = av_packet_new_side_data(pkt,
@@ -1764,6 +1898,8 @@ int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
         if (ret < 0)
             return -1;
 
+#if FF_API_STAT_BITS
+FF_DISABLE_DEPRECATION_WARNINGS
         avctx->header_bits = s->header_bits;
         avctx->mv_bits     = s->mv_bits;
         avctx->misc_bits   = s->misc_bits;
@@ -1773,6 +1909,8 @@ int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
         // FIXME f/b_count in avctx
         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
         avctx->skip_count  = s->skip_count;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
         frame_end(s);
 
@@ -1782,16 +1920,18 @@ int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
         if (avctx->rc_buffer_size) {
             RateControlContext *rcc = &s->rc_context;
             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
+            int hq = (s->avctx->mb_decision == FF_MB_DECISION_RD || s->avctx->trellis);
+            int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
 
             if (put_bits_count(&s->pb) > max_size &&
                 s->lambda < s->lmax) {
-                s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
+                s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
                                        (s->qscale + 1) / s->qscale);
                 if (s->adaptive_quant) {
                     int i;
                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
                         s->lambda_table[i] =
-                            FFMAX(s->lambda_table[i] + 1,
+                            FFMAX(s->lambda_table[i] + min_step,
                                   s->lambda_table[i] * (s->qscale + 1) /
                                   s->qscale);
                 }
@@ -1811,6 +1951,7 @@ int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
                     PutBitContext *pb = &s->thread_context[i]->pb;
                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
                 }
+                s->vbv_ignore_qmax = 1;
                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
                 goto vbv_retry;
             }
@@ -1818,20 +1959,22 @@ int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
             av_assert0(s->avctx->rc_max_rate);
         }
 
-        if (s->avctx->flags & CODEC_FLAG_PASS1)
+        if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
             ff_write_pass1_stats(s);
 
         for (i = 0; i < 4; i++) {
-            s->current_picture_ptr->f->error[i] =
-            s->current_picture.f->error[i] =
-                s->current_picture.error[i];
-            avctx->error[i] += s->current_picture_ptr->f->error[i];
+            s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
+            avctx->error[i] += s->current_picture_ptr->encoding_error[i];
         }
-
-        if (s->avctx->flags & CODEC_FLAG_PASS1)
-            assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
-                   avctx->i_tex_bits + avctx->p_tex_bits ==
-                       put_bits_count(&s->pb));
+        ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
+                                       s->current_picture_ptr->encoding_error,
+                                       (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
+                                       s->pict_type);
+
+        if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
+            assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
+                                             s->misc_bits + s->i_tex_bits +
+                                             s->p_tex_bits);
         flush_put_bits(&s->pb);
         s->frame_bits  = put_bits_count(&s->pb);
 
@@ -1872,6 +2015,9 @@ int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
             s->out_format == FMT_MPEG1                     &&
             90000LL * (avctx->rc_buffer_size - 1) <=
                 s->avctx->rc_max_rate * 0xFFFFLL) {
+            AVCPBProperties *props;
+            size_t props_size;
+
             int vbv_delay, min_delay;
             double inbits  = s->avctx->rc_max_rate *
                              av_q2d(s->avctx->time_base);
@@ -1898,10 +2044,32 @@ int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
             s->vbv_delay_ptr[2] &= 0x07;
             s->vbv_delay_ptr[2] |= vbv_delay << 3;
+
+            props = av_cpb_properties_alloc(&props_size);
+            if (!props)
+                return AVERROR(ENOMEM);
+            props->vbv_delay = vbv_delay * 300;
+
+            ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
+                                          (uint8_t*)props, props_size);
+            if (ret < 0) {
+                av_freep(&props);
+                return ret;
+            }
+
+#if FF_API_VBV_DELAY
+FF_DISABLE_DEPRECATION_WARNINGS
             avctx->vbv_delay     = vbv_delay * 300;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
         }
         s->total_bits     += s->frame_bits;
+#if FF_API_STAT_BITS
+FF_DISABLE_DEPRECATION_WARNINGS
         avctx->frame_bits  = s->frame_bits;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
 
         pkt->pts = s->current_picture.f->pts;
         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
@@ -2130,7 +2298,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
     }
 
     if (s->mb_intra) {
-        if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
+        if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
             int progressive_score, interlaced_score;
 
             s->interlaced_dct = 0;
@@ -2161,7 +2329,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
 
-        if (s->avctx->flags & CODEC_FLAG_GRAY) {
+        if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
             skip_dct[4] = 1;
             skip_dct[5] = 1;
         } else {
@@ -2209,7 +2377,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
                           op_pix, op_qpix);
         }
 
-        if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
+        if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
             int progressive_score, interlaced_score;
 
             s->interlaced_dct = 0;
@@ -2247,7 +2415,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
                             dest_y + dct_offset + 8, wrap_y);
 
-        if (s->avctx->flags & CODEC_FLAG_GRAY) {
+        if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
             skip_dct[4] = 1;
             skip_dct[5] = 1;
         } else {
@@ -2357,7 +2525,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
         }
     }
 
-    if ((s->avctx->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
+    if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
         s->block_last_index[4] =
         s->block_last_index[5] = 0;
         s->block[4][0] =
@@ -2681,7 +2849,7 @@ static void write_slice_end(MpegEncContext *s){
     avpriv_align_put_bits(&s->pb);
     flush_put_bits(&s->pb);
 
-    if ((s->avctx->flags & CODEC_FLAG_PASS1) && !s->partitioned_frame)
+    if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
         s->misc_bits+= get_bits_diff(s);
 }
 
@@ -2739,6 +2907,11 @@ int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t s
         uint8_t *new_buffer = NULL;
         int new_buffer_size = 0;
 
+        if ((s->avctx->internal->byte_buffer_size + size_increase) >= INT_MAX/8) {
+            av_log(s->avctx, AV_LOG_ERROR, "Cannot reallocate putbit buffer\n");
+            return AVERROR(ENOMEM);
+        }
+
         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
                               s->avctx->internal->byte_buffer_size + size_increase);
         if (!new_buffer)
@@ -2791,7 +2964,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
         /* note: quant matrix value (8) is implied here */
         s->last_dc[i] = 128 << s->intra_dc_precision;
 
-        s->current_picture.error[i] = 0;
+        s->current_picture.encoding_error[i] = 0;
     }
     if(s->codec_id==AV_CODEC_ID_AMV){
         s->last_dc[0] = 128*8/13;
@@ -2865,7 +3038,9 @@ static int encode_thread(AVCodecContext *c, void *arg){
 
                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
 
-                is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
+                is_gob_start = s->rtp_payload_size &&
+                               current_packet_size >= s->rtp_payload_size &&
+                               mb_y + mb_x > 0;
 
                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
 
@@ -2907,10 +3082,14 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         }
                     }
 
+#if FF_API_RTP_CALLBACK
+FF_DISABLE_DEPRECATION_WARNINGS
                     if (s->avctx->rtp_callback){
                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
                     }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
                     update_mb_info(s, 1);
 
                     switch(s->codec_id){
@@ -2934,7 +3113,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     break;
                     }
 
-                    if (s->avctx->flags & CODEC_FLAG_PASS1) {
+                    if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
                         int bits= put_bits_count(&s->pb);
                         s->misc_bits+= bits - s->last_bits;
                         s->last_bits= bits;
@@ -3354,20 +3533,20 @@ static int encode_thread(AVCodecContext *c, void *arg){
                 s->p_mv_table[xy][1]=0;
             }
 
-            if (s->avctx->flags & CODEC_FLAG_PSNR) {
+            if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
                 int w= 16;
                 int h= 16;
 
                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
 
-                s->current_picture.error[0] += sse(
+                s->current_picture.encoding_error[0] += sse(
                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
                     s->dest[0], w, h, s->linesize);
-                s->current_picture.error[1] += sse(
+                s->current_picture.encoding_error[1] += sse(
                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
-                s->current_picture.error[2] += sse(
+                s->current_picture.encoding_error[2] += sse(
                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
             }
@@ -3386,6 +3565,8 @@ static int encode_thread(AVCodecContext *c, void *arg){
 
     write_slice_end(s);
 
+#if FF_API_RTP_CALLBACK
+FF_DISABLE_DEPRECATION_WARNINGS
     /* Send the last GOB if RTP */
     if (s->avctx->rtp_callback) {
         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
@@ -3394,6 +3575,8 @@ static int encode_thread(AVCodecContext *c, void *arg){
         emms_c();
         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
     }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     return 0;
 }
@@ -3420,11 +3603,11 @@ static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src)
     MERGE(misc_bits);
     MERGE(er.error_count);
     MERGE(padding_bug_score);
-    MERGE(current_picture.error[0]);
-    MERGE(current_picture.error[1]);
-    MERGE(current_picture.error[2]);
+    MERGE(current_picture.encoding_error[0]);
+    MERGE(current_picture.encoding_error[1]);
+    MERGE(current_picture.encoding_error[2]);
 
-    if(dst->avctx->noise_reduction){
+    if (dst->noise_reduction){
         for(i=0; i<64; i++){
             MERGE(dct_error_sum[0][i]);
             MERGE(dct_error_sum[1][i]);
@@ -3519,11 +3702,11 @@ static int encode_picture(MpegEncContext *s, int picture_number)
             s->no_rounding ^= 1;
     }
 
-    if (s->avctx->flags & CODEC_FLAG_PASS2) {
+    if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
         if (estimate_qp(s,1) < 0)
             return -1;
         ff_get_2pass_fcode(s);
-    } else if (!(s->avctx->flags & CODEC_FLAG_QSCALE)) {
+    } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
         if(s->pict_type==AV_PICTURE_TYPE_B)
             s->lambda= s->last_lambda_for[s->pict_type];
         else
@@ -3550,10 +3733,11 @@ static int encode_picture(MpegEncContext *s, int picture_number)
 
     /* Estimate motion for every MB */
     if(s->pict_type != AV_PICTURE_TYPE_I){
-        s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
-        s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
+        s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
+        s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
         if (s->pict_type != AV_PICTURE_TYPE_B) {
-            if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
+            if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
+                s->me_pre == 2) {
                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
             }
         }
@@ -3576,7 +3760,8 @@ static int encode_picture(MpegEncContext *s, int picture_number)
     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
     emms_c();
 
-    if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
+    if (s->me.scene_change_score > s->scenechange_threshold &&
+        s->pict_type == AV_PICTURE_TYPE_P) {
         s->pict_type= AV_PICTURE_TYPE_I;
         for(i=0; i<s->mb_stride*s->mb_height; i++)
             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
@@ -3590,7 +3775,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
 
-            if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
+            if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
                 int a,b;
                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
@@ -3599,7 +3784,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
 
             ff_fix_long_p_mvs(s);
             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
-            if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
+            if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
                 int j;
                 for(i=0; i<2; i++){
                     for(j=0; j<2; j++)
@@ -3624,7 +3809,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
-            if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
+            if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
                 int dir, j;
                 for(dir=0; dir<2; dir++){
                     for(i=0; i<2; i++){
@@ -3645,7 +3830,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
 
     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
         s->pict_type == AV_PICTURE_TYPE_I &&
-        !(s->avctx->flags & CODEC_FLAG_QSCALE))
+        !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
         s->qscale= 3; //reduce clipping problems
 
     if (s->out_format == FMT_MJPEG) {
@@ -3711,7 +3896,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
     case FMT_MJPEG:
         if (CONFIG_MJPEG_ENCODER)
             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
-                                           s->intra_matrix, s->chroma_intra_matrix);
+                                           s->pred, s->intra_matrix, s->chroma_intra_matrix);
         break;
     case FMT_H261:
         if (CONFIG_H261_ENCODER)
@@ -3722,9 +3907,11 @@ static int encode_picture(MpegEncContext *s, int picture_number)
             ff_wmv2_encode_picture_header(s, picture_number);
         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
             ff_msmpeg4_encode_picture_header(s, picture_number);
-        else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
-            ff_mpeg4_encode_picture_header(s, picture_number);
-        else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
+        else if (CONFIG_MPEG4_ENCODER && s->h263_pred) {
+            ret = ff_mpeg4_encode_picture_header(s, picture_number);
+            if (ret < 0)
+                return ret;
+        } else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
             ret = ff_rv10_encode_picture_header(s, picture_number);
             if (ret < 0)
                 return ret;
@@ -3809,6 +3996,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
     uint8_t * length;
     uint8_t * last_length;
     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
+    int mpeg2_qscale;
 
     s->fdsp.fdct(block);
 
@@ -3817,6 +4005,9 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
     qmul= qscale*16;
     qadd= ((qscale-1)|1)*8;
 
+    if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
+    else                 mpeg2_qscale = qscale << 1;
+
     if (s->mb_intra) {
         int q;
         if (!s->h263_aic) {
@@ -3933,10 +4124,10 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
             }else{ //MPEG1
                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
                 if(s->mb_intra){
-                        unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
+                        unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
                         unquant_coeff =   (unquant_coeff - 1) | 1;
                 }else{
-                        unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
+                        unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
                         unquant_coeff =   (unquant_coeff - 1) | 1;
                 }
                 unquant_coeff<<= 3;
@@ -4052,7 +4243,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
                     unquant_coeff= (alevel*qmul + qadd)>>3;
             }else{ //MPEG1
-                    unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
+                    unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
                     unquant_coeff =   (unquant_coeff - 1) | 1;
             }
             unquant_coeff = (unquant_coeff + 4) >> 3;
@@ -4489,6 +4680,42 @@ STOP_TIMER("iterative search")
     return last_non_zero;
 }
 
+/**
+ * Permute an 8x8 block according to permuatation.
+ * @param block the block which will be permuted according to
+ *              the given permutation vector
+ * @param permutation the permutation vector
+ * @param last the last non zero coefficient in scantable order, used to
+ *             speed the permutation up
+ * @param scantable the used scantable, this is only used to speed the
+ *                  permutation up, the block is not (inverse) permutated
+ *                  to scantable order!
+ */
+void ff_block_permute(int16_t *block, uint8_t *permutation,
+                      const uint8_t *scantable, int last)
+{
+    int i;
+    int16_t temp[64];
+
+    if (last <= 0)
+        return;
+    //FIXME it is ok but not clean and might fail for some permutations
+    // if (permutation[1] == 1)
+    // return;
+
+    for (i = 0; i <= last; i++) {
+        const int j = scantable[i];
+        temp[j] = block[j];
+        block[j] = 0;
+    }
+
+    for (i = 0; i <= last; i++) {
+        const int j = scantable[i];
+        const int perm_j = permutation[j];
+        block[perm_j] = temp[j];
+    }
+}
+
 int ff_dct_quantize_c(MpegEncContext *s,
                         int16_t *block, int n,
                         int qscale, int *overflow)
@@ -4521,12 +4748,12 @@ int ff_dct_quantize_c(MpegEncContext *s,
         start_i = 1;
         last_non_zero = 0;
         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
-        bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
+        bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
     } else {
         start_i = 0;
         last_non_zero = -1;
         qmat = s->q_inter_matrix[qscale];
-        bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
+        bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
     }
     threshold1= (1<<QMAT_SHIFT) - bias - 1;
     threshold2= (threshold1<<1);
@@ -4565,7 +4792,7 @@ int ff_dct_quantize_c(MpegEncContext *s,
     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
         ff_block_permute(block, s->idsp.idct_permutation,
-                         scantable, last_non_zero);
+                      scantable, last_non_zero);
 
     return last_non_zero;
 }
@@ -4573,8 +4800,7 @@ int ff_dct_quantize_c(MpegEncContext *s,
 #define OFFSET(x) offsetof(MpegEncContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption h263_options[] = {
-    { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
-    { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
+    { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
     FF_MPV_COMMON_OPTS
     { NULL },
@@ -4601,10 +4827,10 @@ AVCodec ff_h263_encoder = {
 };
 
 static const AVOption h263p_options[] = {
-    { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
-    { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
-    { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
-    { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
+    { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus),       AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE},
     FF_MPV_COMMON_OPTS
     { NULL },
 };
@@ -4624,12 +4850,17 @@ AVCodec ff_h263p_encoder = {
     .init           = ff_mpv_encode_init,
     .encode2        = ff_mpv_encode_picture,
     .close          = ff_mpv_encode_end,
-    .capabilities   = CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
     .priv_class     = &h263p_class,
 };
 
-FF_MPV_GENERIC_CLASS(msmpeg4v2)
+static const AVClass msmpeg4v2_class = {
+    .class_name = "msmpeg4v2 encoder",
+    .item_name  = av_default_item_name,
+    .option     = ff_mpv_generic_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_msmpeg4v2_encoder = {
     .name           = "msmpeg4v2",
@@ -4644,7 +4875,12 @@ AVCodec ff_msmpeg4v2_encoder = {
     .priv_class     = &msmpeg4v2_class,
 };
 
-FF_MPV_GENERIC_CLASS(msmpeg4v3)
+static const AVClass msmpeg4v3_class = {
+    .class_name = "msmpeg4v3 encoder",
+    .item_name  = av_default_item_name,
+    .option     = ff_mpv_generic_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_msmpeg4v3_encoder = {
     .name           = "msmpeg4",
@@ -4659,7 +4895,12 @@ AVCodec ff_msmpeg4v3_encoder = {
     .priv_class     = &msmpeg4v3_class,
 };
 
-FF_MPV_GENERIC_CLASS(wmv1)
+static const AVClass wmv1_class = {
+    .class_name = "wmv1 encoder",
+    .item_name  = av_default_item_name,
+    .option     = ff_mpv_generic_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_wmv1_encoder = {
     .name           = "wmv1",
diff --git a/libavcodec/mpegvideo_motion.c b/libavcodec/mpegvideo_motion.c
index f1956f0e..51ba4352 100644
--- a/libavcodec/mpegvideo_motion.c
+++ b/libavcodec/mpegvideo_motion.c
@@ -88,7 +88,7 @@ static void gmc1_motion(MpegEncContext *s,
         }
     }
 
-    if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY)
+    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
         return;
 
     motion_x   = s->sprite_offset[1][0];
@@ -165,7 +165,7 @@ static void gmc_motion(MpegEncContext *s,
                 a + 1, (1 << (2 * a + 1)) - s->no_rounding,
                 s->h_edge_pos, s->v_edge_pos);
 
-    if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY)
+    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
         return;
 
     ox = s->sprite_offset[1][0] + s->sprite_delta[0][0] * s->mb_x * 8 +
@@ -324,7 +324,7 @@ void mpeg_motion_internal(MpegEncContext *s,
                                  src_x, src_y,
                                  s->h_edge_pos, s->v_edge_pos);
         ptr_y = s->sc.edge_emu_buffer;
-        if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+        if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
             uint8_t *ubuf = s->sc.edge_emu_buffer + 18 * s->linesize;
             uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
             uvsrc_y = (unsigned)uvsrc_y << field_based;
@@ -359,7 +359,7 @@ void mpeg_motion_internal(MpegEncContext *s,
 
     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
 
-    if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         pix_op[s->chroma_x_shift][uvdxy]
             (dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
         pix_op[s->chroma_x_shift][uvdxy]
@@ -547,7 +547,7 @@ static inline void qpel_motion(MpegEncContext *s,
                                  src_x, src_y << field_based,
                                  s->h_edge_pos, s->v_edge_pos);
         ptr_y = s->sc.edge_emu_buffer;
-        if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+        if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
             uint8_t *ubuf = s->sc.edge_emu_buffer + 18 * s->linesize;
             uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
             s->vdsp.emulated_edge_mc(ubuf, ptr_cb,
@@ -584,7 +584,7 @@ static inline void qpel_motion(MpegEncContext *s,
         qpix_op[1][dxy](dest_y, ptr_y, linesize);
         qpix_op[1][dxy](dest_y + 8, ptr_y + 8, linesize);
     }
-    if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
         pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
     }
@@ -740,7 +740,7 @@ static inline void apply_obmc(MpegEncContext *s,
         mx += mv[0][0];
         my += mv[0][1];
     }
-    if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))
+    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
         chroma_4mv_motion(s, dest_cb, dest_cr,
                           ref_picture, pix_op[1],
                           mx, my);
@@ -813,7 +813,7 @@ static inline void apply_8x8(MpegEncContext *s,
         }
     }
 
-    if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))
+    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
         chroma_4mv_motion(s, dest_cb, dest_cr,
                           ref_picture, pix_op[1], mx, my);
 }
diff --git a/libavcodec/mpegvideo_parser.c b/libavcodec/mpegvideo_parser.c
index cbea9b6c..1f74bfb8 100644
--- a/libavcodec/mpegvideo_parser.c
+++ b/libavcodec/mpegvideo_parser.c
@@ -47,6 +47,8 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
     int set_dim_ret = 0;
     int bit_rate = 0;
     int vbv_delay = 0;
+    int chroma_format;
+    enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE;
 //FIXME replace the crap with get_bits()
     s->repeat_pict = 0;
 
@@ -70,6 +72,7 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
                     set_dim_ret = ff_set_dimensions(avctx, pc->width, pc->height);
                     did_set_size=1;
                 }
+                pix_fmt = AV_PIX_FMT_YUV420P;
                 frame_rate_index = buf[3] & 0xf;
                 pc->frame_rate = avctx->framerate = ff_mpeg12_frame_rate_tab[frame_rate_index];
                 bit_rate = (buf[4]<<10) | (buf[5]<<2) | (buf[6]>>6);
@@ -91,6 +94,13 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
                         pc->progressive_sequence = buf[1] & (1 << 3);
                         avctx->has_b_frames= !(buf[5] >> 7);
 
+                        chroma_format = (buf[1] >> 1) & 3;
+                        switch (chroma_format) {
+                        case 1: pix_fmt = AV_PIX_FMT_YUV420P; break;
+                        case 2: pix_fmt = AV_PIX_FMT_YUV422P; break;
+                        case 3: pix_fmt = AV_PIX_FMT_YUV444P; break;
+                        }
+
                         pc->width  = (pc->width & 0xFFF) | (horiz_size_ext << 12);
                         pc->height = (pc->height& 0xFFF) | ( vert_size_ext << 12);
                         bit_rate = (bit_rate&0x3FFFF) | (bit_rate_ext << 18);
@@ -149,12 +159,19 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
         av_log(avctx, AV_LOG_ERROR, "Failed to set dimensions\n");
 
     if (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO && bit_rate) {
-        avctx->rc_max_rate = 400*bit_rate;
+        avctx->rc_max_rate = 400LL*bit_rate;
     }
     if (bit_rate &&
         ((avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO && bit_rate != 0x3FFFF) || vbv_delay != 0xFFFF)) {
-        avctx->bit_rate = 400*bit_rate;
+        avctx->bit_rate = 400LL*bit_rate;
     }
+
+    if (pix_fmt != AV_PIX_FMT_NONE) {
+        s->format = pix_fmt;
+        s->width  = s->coded_width  = pc->width;
+        s->height = s->coded_height = pc->height;
+    }
+
 #if FF_API_AVCTX_TIMEBASE
     if (avctx->framerate.num)
         avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational){avctx->ticks_per_frame, 1}));
diff --git a/libavcodec/mpegvideo_xvmc.c b/libavcodec/mpegvideo_xvmc.c
index 10475e05..b469c4e2 100644
--- a/libavcodec/mpegvideo_xvmc.c
+++ b/libavcodec/mpegvideo_xvmc.c
@@ -298,7 +298,7 @@ static void ff_xvmc_decode_mb(struct MpegEncContext *s)
             cbp++;
     }
 
-    if (s->avctx->flags & CODEC_FLAG_GRAY) {
+    if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
         if (s->mb_intra) {                                   // intra frames are always full chroma blocks
             for (i = 4; i < blocks_per_mb; i++) {
                 memset(s->pblocks[i], 0, sizeof(*s->pblocks[i]));  // so we need to clear them
diff --git a/libavcodec/mpegvideodata.c b/libavcodec/mpegvideodata.c
index 5c0c9c52..5f1d8f7c 100644
--- a/libavcodec/mpegvideodata.c
+++ b/libavcodec/mpegvideodata.c
@@ -24,6 +24,13 @@ const uint8_t ff_default_chroma_qscale_table[32] = {
     16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
 };
 
+const uint8_t ff_mpeg2_non_linear_qscale[32] = {
+     0,  1,  2,  3,  4,  5,   6,   7,
+     8, 10, 12, 14, 16, 18,  20,  22,
+    24, 28, 32, 36, 40, 44,  48,  52,
+    56, 64, 72, 80, 88, 96, 104, 112,
+};
+
 const uint8_t ff_mpeg1_dc_scale_table[128] = {
 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
diff --git a/libavcodec/mpegvideodata.h b/libavcodec/mpegvideodata.h
index ebb9bbf1..14f4806d 100644
--- a/libavcodec/mpegvideodata.h
+++ b/libavcodec/mpegvideodata.h
@@ -28,6 +28,8 @@ extern const uint8_t ff_alternate_vertical_scan[64];
 extern const uint8_t ff_mpeg1_dc_scale_table[128];
 extern const uint8_t * const ff_mpeg2_dc_scale_table[4];
 
+extern const uint8_t ff_mpeg2_non_linear_qscale[32];
+
 extern const uint8_t ff_default_chroma_qscale_table[32];
 
 #endif /* AVCODEC_MPEGVIDEODATA_H */
diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c
index 860c2d85..a7c6102c 100644
--- a/libavcodec/mpegvideoencdsp.c
+++ b/libavcodec/mpegvideoencdsp.c
@@ -25,7 +25,6 @@
 #include "libavutil/attributes.h"
 #include "libavutil/imgutils.h"
 #include "avcodec.h"
-#include "imgconvert.h"
 #include "me_cmp.h"
 #include "mpegvideoencdsp.h"
 
@@ -154,6 +153,93 @@ static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
             memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
 }
 
+/* 2x2 -> 1x1 */
+static void shrink22(uint8_t *dst, int dst_wrap,
+                     const uint8_t *src, int src_wrap,
+                     int width, int height)
+{
+    int w;
+    const uint8_t *s1, *s2;
+    uint8_t *d;
+
+    for (; height > 0; height--) {
+        s1 = src;
+        s2 = s1 + src_wrap;
+        d = dst;
+        for (w = width; w >= 4; w -= 4) {
+            d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 2;
+            d[1] = (s1[2] + s1[3] + s2[2] + s2[3] + 2) >> 2;
+            d[2] = (s1[4] + s1[5] + s2[4] + s2[5] + 2) >> 2;
+            d[3] = (s1[6] + s1[7] + s2[6] + s2[7] + 2) >> 2;
+            s1 += 8;
+            s2 += 8;
+            d += 4;
+        }
+        for (; w > 0; w--) {
+            d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 2;
+            s1 += 2;
+            s2 += 2;
+            d++;
+        }
+        src += 2 * src_wrap;
+        dst += dst_wrap;
+    }
+}
+
+/* 4x4 -> 1x1 */
+static void shrink44(uint8_t *dst, int dst_wrap,
+                     const uint8_t *src, int src_wrap,
+                     int width, int height)
+{
+    int w;
+    const uint8_t *s1, *s2, *s3, *s4;
+    uint8_t *d;
+
+    for (; height > 0; height--) {
+        s1 = src;
+        s2 = s1 + src_wrap;
+        s3 = s2 + src_wrap;
+        s4 = s3 + src_wrap;
+        d = dst;
+        for (w = width; w > 0; w--) {
+            d[0] = (s1[0] + s1[1] + s1[2] + s1[3] +
+                    s2[0] + s2[1] + s2[2] + s2[3] +
+                    s3[0] + s3[1] + s3[2] + s3[3] +
+                    s4[0] + s4[1] + s4[2] + s4[3] + 8) >> 4;
+            s1 += 4;
+            s2 += 4;
+            s3 += 4;
+            s4 += 4;
+            d++;
+        }
+        src += 4 * src_wrap;
+        dst += dst_wrap;
+    }
+}
+
+/* 8x8 -> 1x1 */
+static void shrink88(uint8_t *dst, int dst_wrap,
+                     const uint8_t *src, int src_wrap,
+                     int width, int height)
+{
+    int w, i;
+
+    for (; height > 0; height--) {
+        for(w = width;w > 0; w--) {
+            int tmp = 0;
+            for (i = 0; i < 8; i++) {
+                tmp += src[0] + src[1] + src[2] + src[3] +
+                       src[4] + src[5] + src[6] + src[7];
+                src += src_wrap;
+            }
+            *(dst++) = (tmp + 32) >> 6;
+            src += 8 - 8 * src_wrap;
+        }
+        src += 8 * src_wrap - 8 * width;
+        dst += dst_wrap - width;
+    }
+}
+
 av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
                                      AVCodecContext *avctx)
 {
@@ -161,9 +247,9 @@ av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
     c->add_8x8basis = add_8x8basis_c;
 
     c->shrink[0] = av_image_copy_plane;
-    c->shrink[1] = ff_shrink22;
-    c->shrink[2] = ff_shrink44;
-    c->shrink[3] = ff_shrink88;
+    c->shrink[1] = shrink22;
+    c->shrink[2] = shrink44;
+    c->shrink[3] = shrink88;
 
     c->pix_sum   = pix_sum_c;
     c->pix_norm1 = pix_norm1_c;
@@ -176,4 +262,6 @@ av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
         ff_mpegvideoencdsp_init_ppc(c, avctx);
     if (ARCH_X86)
         ff_mpegvideoencdsp_init_x86(c, avctx);
+    if (ARCH_MIPS)
+        ff_mpegvideoencdsp_init_mips(c, avctx);
 }
diff --git a/libavcodec/mpegvideoencdsp.h b/libavcodec/mpegvideoencdsp.h
index e12f4c6a..33f0282f 100644
--- a/libavcodec/mpegvideoencdsp.h
+++ b/libavcodec/mpegvideoencdsp.h
@@ -52,5 +52,7 @@ void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c,
                                  AVCodecContext *avctx);
 void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
                                  AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c,
+                                  AVCodecContext *avctx);
 
 #endif /* AVCODEC_MPEGVIDEOENCDSP_H */
diff --git a/libavcodec/mqc.h b/libavcodec/mqc.h
index c0827bd5..39104b1f 100644
--- a/libavcodec/mqc.h
+++ b/libavcodec/mqc.h
@@ -43,6 +43,7 @@ typedef struct MqcState {
     unsigned int c;
     unsigned int ct;
     uint8_t cx_states[19];
+    int raw;
 } MqcState;
 
 /* encoder */
@@ -58,6 +59,7 @@ int ff_mqc_length(MqcState *mqc);
 
 /** flush the encoder [returns number of bytes encoded] */
 int ff_mqc_flush(MqcState *mqc);
+int ff_mqc_flush_to(MqcState *mqc, uint8_t *dst, int *dst_len);
 
 /* decoder */
 
@@ -65,8 +67,10 @@ int ff_mqc_flush(MqcState *mqc);
  * Initialize MQ-decoder.
  * @param mqc   MQ decoder state
  * @param bp    byte poiter
+ * @param raw   raw mode
+ * @param reset reset states
  */
-void ff_mqc_initdec(MqcState *mqc, uint8_t *bp);
+void ff_mqc_initdec(MqcState *mqc, uint8_t *bp, int raw, int reset);
 
 /**
  * MQ decoder.
diff --git a/libavcodec/mqcdec.c b/libavcodec/mqcdec.c
index 36250699..34aa5195 100644
--- a/libavcodec/mqcdec.c
+++ b/libavcodec/mqcdec.c
@@ -68,9 +68,11 @@ static int exchange(MqcState *mqc, uint8_t *cxstate, int lps)
     return d;
 }
 
-void ff_mqc_initdec(MqcState *mqc, uint8_t *bp)
+void ff_mqc_initdec(MqcState *mqc, uint8_t *bp, int raw, int reset)
 {
-    ff_mqc_init_contexts(mqc);
+    mqc->raw = raw;
+    if (reset)
+        ff_mqc_init_contexts(mqc);
     mqc->bp = bp;
     mqc->c  = (*mqc->bp ^ 0xff) << 16;
     bytein(mqc);
@@ -78,8 +80,20 @@ void ff_mqc_initdec(MqcState *mqc, uint8_t *bp)
     mqc->a = 0x8000;
 }
 
+static int mqc_decode_bypass(MqcState *mqc) {
+    int bit = !(mqc->c & 0x40000000);
+    if (!(mqc->c & 0xff)) {
+        mqc->c -= 0x100;
+        bytein(mqc);
+    }
+    mqc->c += mqc->c;
+    return bit;
+}
+
 int ff_mqc_decode(MqcState *mqc, uint8_t *cxstate)
 {
+    if (mqc->raw)
+        return mqc_decode_bypass(mqc);
     mqc->a -= ff_mqc_qe[*cxstate];
     if ((mqc->c >> 16) < mqc->a) {
         if (mqc->a & 0x8000)
diff --git a/libavcodec/mqcenc.c b/libavcodec/mqcenc.c
index 97d352be..7c9e1a0d 100644
--- a/libavcodec/mqcenc.c
+++ b/libavcodec/mqcenc.c
@@ -25,6 +25,7 @@
  * @author Kamil Nowosad
  */
 
+#include "libavutil/avassert.h"
 #include "mqc.h"
 
 static void byteout(MqcState *mqc)
@@ -117,3 +118,22 @@ int ff_mqc_flush(MqcState *mqc)
         mqc->bp++;
     return mqc->bp - mqc->bpstart;
 }
+
+int ff_mqc_flush_to(MqcState *mqc, uint8_t *dst, int *dst_len)
+{
+    MqcState mqc2 = *mqc;
+    mqc2.bpstart=
+    mqc2.bp = dst;
+    *mqc2.bp = *mqc->bp;
+    ff_mqc_flush(&mqc2);
+    *dst_len = mqc2.bp - dst;
+    if (mqc->bp < mqc->bpstart) {
+        av_assert1(mqc->bpstart - mqc->bp == 1);
+        av_assert1(*dst_len > 0);
+        av_assert1(mqc->bp[0] == 0 && dst[0] == 0);
+        (*dst_len) --;
+        memmove(dst, dst+1, *dst_len);
+        return mqc->bp - mqc->bpstart + 1 + *dst_len;
+    }
+    return mqc->bp - mqc->bpstart + *dst_len;
+}
diff --git a/libavcodec/msmpeg4dec.c b/libavcodec/msmpeg4dec.c
index 2feb2beb..aaadd9c0 100644
--- a/libavcodec/msmpeg4dec.c
+++ b/libavcodec/msmpeg4dec.c
@@ -541,7 +541,7 @@ int ff_msmpeg4_decode_picture_header(MpegEncContext * s)
             s->no_rounding = 0;
         }
     }
-    ff_dlog(s->avctx, "%d %d %d %d %d\n", s->pict_type, s->bit_rate,
+    ff_dlog(s->avctx, "%d %"PRId64" %d %d %d\n", s->pict_type, s->bit_rate,
             s->inter_intra_pred, s->width, s->height);
 
     s->esc3_level_length= 0;
@@ -924,7 +924,7 @@ AVCodec ff_msmpeg4v1_decoder = {
     .init           = ff_msmpeg4_decode_init,
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
@@ -941,7 +941,7 @@ AVCodec ff_msmpeg4v2_decoder = {
     .init           = ff_msmpeg4_decode_init,
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
@@ -958,7 +958,7 @@ AVCodec ff_msmpeg4v3_decoder = {
     .init           = ff_msmpeg4_decode_init,
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
@@ -975,7 +975,7 @@ AVCodec ff_wmv1_decoder = {
     .init           = ff_msmpeg4_decode_init,
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
diff --git a/libavcodec/msmpeg4enc.c b/libavcodec/msmpeg4enc.c
index e138f087..e1ade248 100644
--- a/libavcodec/msmpeg4enc.c
+++ b/libavcodec/msmpeg4enc.c
@@ -240,7 +240,7 @@ void ff_msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
     s->per_mb_rl_table = 0;
     if(s->msmpeg4_version==4)
         s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE && s->pict_type==AV_PICTURE_TYPE_P);
-    ff_dlog(s, "%d %d %d %d %d\n", s->pict_type, s->bit_rate,
+    ff_dlog(s, "%d %"PRId64" %d %d %d\n", s->pict_type, s->bit_rate,
             s->inter_intra_pred, s->width, s->height);
 
     if (s->pict_type == AV_PICTURE_TYPE_I) {
diff --git a/libavcodec/msrle.c b/libavcodec/msrle.c
index 260ad807..3090a910 100644
--- a/libavcodec/msrle.c
+++ b/libavcodec/msrle.c
@@ -135,7 +135,7 @@ static int msrle_decode_frame(AVCodecContext *avctx,
         }
     } else {
         bytestream2_init(&s->gb, buf, buf_size);
-        ff_msrle_decode(avctx, (AVPicture*)s->frame, avctx->bits_per_coded_sample, &s->gb);
+        ff_msrle_decode(avctx, s->frame, avctx->bits_per_coded_sample, &s->gb);
     }
 
     if ((ret = av_frame_ref(data, s->frame)) < 0)
@@ -166,5 +166,5 @@ AVCodec ff_msrle_decoder = {
     .init           = msrle_decode_init,
     .close          = msrle_decode_end,
     .decode         = msrle_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/msrledec.c b/libavcodec/msrledec.c
index 3aa5e3ce..805802ae 100644
--- a/libavcodec/msrledec.c
+++ b/libavcodec/msrledec.c
@@ -30,7 +30,7 @@
 #include "avcodec.h"
 #include "msrledec.h"
 
-static int msrle_decode_pal4(AVCodecContext *avctx, AVPicture *pic,
+static int msrle_decode_pal4(AVCodecContext *avctx, AVFrame *pic,
                              GetByteContext *gb)
 {
     unsigned char rle_code;
@@ -126,7 +126,7 @@ static int msrle_decode_pal4(AVCodecContext *avctx, AVPicture *pic,
 }
 
 
-static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic,
+static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVFrame *pic,
                                    int depth, GetByteContext *gb)
 {
     uint8_t *output, *output_end;
@@ -246,7 +246,7 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic,
 }
 
 
-int ff_msrle_decode(AVCodecContext *avctx, AVPicture *pic,
+int ff_msrle_decode(AVCodecContext *avctx, AVFrame *pic,
                     int depth, GetByteContext *gb)
 {
     switch(depth){
diff --git a/libavcodec/msrledec.h b/libavcodec/msrledec.h
index 3f666360..7f7bbcf9 100644
--- a/libavcodec/msrledec.h
+++ b/libavcodec/msrledec.h
@@ -33,7 +33,7 @@
  * @param depth     bit depth
  * @param gb        input bytestream context
  */
-int ff_msrle_decode(AVCodecContext *avctx, AVPicture *pic,
+int ff_msrle_decode(AVCodecContext *avctx, AVFrame *pic,
                     int depth, GetByteContext *gb);
 
 #endif /* AVCODEC_MSRLEDEC_H */
diff --git a/libavcodec/mss1.c b/libavcodec/mss1.c
index 2eb67df6..a579d9d9 100644
--- a/libavcodec/mss1.c
+++ b/libavcodec/mss1.c
@@ -224,5 +224,5 @@ AVCodec ff_mss1_decoder = {
     .init           = mss1_decode_init,
     .close          = mss1_decode_end,
     .decode         = mss1_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
index f57685d7..c6409349 100644
--- a/libavcodec/mss2.c
+++ b/libavcodec/mss2.c
@@ -52,9 +52,9 @@ static void arith2_normalise(ArithCoder *c)
             c->value ^= 0x8000;
             c->low   ^= 0x8000;
         }
-        c->high  = c->high  << 8 & 0xFFFFFF | 0xFF;
-        c->value = c->value << 8 & 0xFFFFFF | bytestream2_get_byte(c->gbc.gB);
-        c->low   = c->low   << 8 & 0xFFFFFF;
+        c->high  = (uint16_t)c->high  << 8  | 0xFF;
+        c->value = (uint16_t)c->value << 8  | bytestream2_get_byte(c->gbc.gB);
+        c->low   = (uint16_t)c->low   << 8;
     }
 }
 
@@ -210,8 +210,13 @@ static int decode_555(GetByteContext *gB, uint16_t *dst, int stride,
                     last_symbol = b << 8 | bytestream2_get_byte(gB);
                 else if (b > 129) {
                     repeat = 0;
-                    while (b-- > 130)
+                    while (b-- > 130) {
+                        if (repeat >= (INT_MAX >> 8) - 1) {
+                            av_log(NULL, AV_LOG_ERROR, "repeat overflow\n");
+                            return AVERROR_INVALIDDATA;
+                        }
                         repeat = (repeat << 8) + bytestream2_get_byte(gB) + 1;
+                    }
                     if (last_symbol == -2) {
                         int skip = FFMIN((unsigned)repeat, dst + w - p);
                         repeat -= skip;
@@ -477,7 +482,7 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     Rectangle wmv9rects[MAX_WMV9_RECTANGLES], *r;
     int used_rects = 0, i, implicit_rect = 0, av_uninit(wmv9_mask);
 
-    av_assert0(FF_INPUT_BUFFER_PADDING_SIZE >=
+    av_assert0(AV_INPUT_BUFFER_PADDING_SIZE >=
                ARITH2_PADDING + (MIN_CACHE_BITS + 7) / 8);
 
     if ((ret = init_get_bits8(&gb, buf, buf_size)) < 0)
@@ -849,5 +854,5 @@ AVCodec ff_mss2_decoder = {
     .init           = mss2_decode_init,
     .close          = mss2_decode_end,
     .decode         = mss2_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/mss3.c b/libavcodec/mss3.c
index 075685b9..01941967 100644
--- a/libavcodec/mss3.c
+++ b/libavcodec/mss3.c
@@ -869,5 +869,5 @@ AVCodec ff_msa1_decoder = {
     .init           = mss3_decode_init,
     .close          = mss3_decode_end,
     .decode         = mss3_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/mss4.c b/libavcodec/mss4.c
index 00c31dda..9639fc82 100644
--- a/libavcodec/mss4.c
+++ b/libavcodec/mss4.c
@@ -679,5 +679,5 @@ AVCodec ff_mts2_decoder = {
     .init           = mss4_decode_init,
     .close          = mss4_decode_end,
     .decode         = mss4_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/msvideo1.c b/libavcodec/msvideo1.c
index 88397beb..891675fc 100644
--- a/libavcodec/msvideo1.c
+++ b/libavcodec/msvideo1.c
@@ -346,5 +346,5 @@ AVCodec ff_msvideo1_decoder = {
     .init           = msvideo1_decode_init,
     .close          = msvideo1_decode_end,
     .decode         = msvideo1_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/msvideo1enc.c b/libavcodec/msvideo1enc.c
index 5e6cdfa3..b6ae92b2 100644
--- a/libavcodec/msvideo1enc.c
+++ b/libavcodec/msvideo1enc.c
@@ -76,7 +76,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int skips = 0;
     int quality = 24;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width*avctx->height*9 + FF_MIN_BUFFER_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width*avctx->height*9 + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;
     dst= buf= pkt->data;
 
diff --git a/libavcodec/mvcdec.c b/libavcodec/mvcdec.c
index 69f0ee25..74f279a6 100644
--- a/libavcodec/mvcdec.c
+++ b/libavcodec/mvcdec.c
@@ -274,7 +274,7 @@ AVCodec ff_mvc1_decoder = {
     .init           = mvc_decode_init,
     .close          = mvc_decode_end,
     .decode         = mvc_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 
@@ -288,6 +288,6 @@ AVCodec ff_mvc2_decoder = {
     .init           = mvc_decode_init,
     .close          = mvc_decode_end,
     .decode         = mvc_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
diff --git a/libavcodec/mxpegdec.c b/libavcodec/mxpegdec.c
index 05c74eed..2e3ebe6e 100644
--- a/libavcodec/mxpegdec.c
+++ b/libavcodec/mxpegdec.c
@@ -343,7 +343,7 @@ AVCodec ff_mxpeg_decoder = {
     .init           = mxpeg_decode_init,
     .close          = mxpeg_decode_end,
     .decode         = mxpeg_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };
diff --git a/libavcodec/nellymoserdec.c b/libavcodec/nellymoserdec.c
index d5a69ed0..e6625cb9 100644
--- a/libavcodec/nellymoserdec.c
+++ b/libavcodec/nellymoserdec.c
@@ -75,7 +75,7 @@ static void nelly_decode_block(NellyMoserDecodeContext *s,
     for (i=0 ; i<NELLY_BANDS ; i++) {
         if (i > 0)
             val += ff_nelly_delta_table[get_bits(&s->gb, 5)];
-        pval = -pow(2, val/2048) * s->scale_bias;
+        pval = -exp2(val/2048) * s->scale_bias;
         for (j = 0; j < ff_nelly_band_sizes_table[i]; j++) {
             *bptr++ = val;
             *pptr++ = pval;
@@ -121,7 +121,7 @@ static av_cold int decode_init(AVCodecContext * avctx) {
     av_lfg_init(&s->random_state, 0);
     ff_mdct_init(&s->imdct_ctx, 8, 1, 1.0);
 
-    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp)
         return AVERROR(ENOMEM);
 
@@ -205,7 +205,7 @@ AVCodec ff_nellymoser_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_tag,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_PARAM_CHANGE,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_PARAM_CHANGE,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/nellymoserenc.c b/libavcodec/nellymoserenc.c
index 7c77ff74..9d22ac8c 100644
--- a/libavcodec/nellymoserenc.c
+++ b/libavcodec/nellymoserenc.c
@@ -171,7 +171,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     s->avctx = avctx;
     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
         goto error;
-    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp) {
         ret = AVERROR(ENOMEM);
         goto error;
@@ -179,8 +179,18 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
     /* Generate overlap window */
     ff_init_ff_sine_windows(7);
+    /* faster way of doing
     for (i = 0; i < POW_TABLE_SIZE; i++)
-        pow_table[i] = pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
+       pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
+    pow_table[0] = 1;
+    pow_table[1024] = M_SQRT1_2;
+    for (i = 1; i < 513; i++) {
+        double tmp = exp2(-i / 2048.0);
+        pow_table[i] = tmp;
+        pow_table[1024-i] = M_SQRT1_2 / tmp;
+        pow_table[1024+i] = tmp * M_SQRT1_2;
+        pow_table[2048-i] = 0.5 / tmp;
+    }
 
     if (s->avctx->trellis) {
         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
@@ -318,7 +328,7 @@ static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int
                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
         }
         cand[band] =
-            log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
+            log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
     }
 
     if (s->avctx->trellis) {
@@ -397,7 +407,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         s->last_frame = 1;
     }
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
         return ret;
     encode_block(s, avpkt->data, avpkt->size);
 
@@ -418,7 +428,7 @@ AVCodec ff_nellymoser_encoder = {
     .init           = encode_init,
     .encode2        = encode_frame,
     .close          = encode_end,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/noise_bsf.c b/libavcodec/noise_bsf.c
index 4f609de7..556ad5c5 100644
--- a/libavcodec/noise_bsf.c
+++ b/libavcodec/noise_bsf.c
@@ -35,11 +35,11 @@ static int noise(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const ch
     if(amount <= 0)
         return AVERROR(EINVAL);
 
-    *poutbuf= av_malloc(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    *poutbuf= av_malloc(buf_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!*poutbuf)
         return AVERROR(ENOMEM);
 
-    memcpy(*poutbuf, buf, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    memcpy(*poutbuf, buf, buf_size + AV_INPUT_BUFFER_PADDING_SIZE);
     for(i=0; i<buf_size; i++){
         (*state) += (*poutbuf)[i] + 1;
         if(*state % amount == 0)
diff --git a/libavcodec/nuv.c b/libavcodec/nuv.c
index e0cec529..ade3310d 100644
--- a/libavcodec/nuv.c
+++ b/libavcodec/nuv.c
@@ -75,9 +75,12 @@ static const uint8_t fallback_cquant[] = {
  */
 static void copy_frame(AVFrame *f, const uint8_t *src, int width, int height)
 {
-    AVPicture pic;
-    avpicture_fill(&pic, src, AV_PIX_FMT_YUV420P, width, height);
-    av_picture_copy((AVPicture *)f, &pic, AV_PIX_FMT_YUV420P, width, height);
+    uint8_t *src_data[4];
+    int src_linesize[4];
+    av_image_fill_arrays(src_data, src_linesize, src,
+                         f->format, width, height, 1);
+    av_image_copy(f->data, f->linesize, (const uint8_t **)src_data, src_linesize,
+                  f->format, width, height);
 }
 
 /**
@@ -124,7 +127,7 @@ static int codec_reinit(AVCodecContext *avctx, int width, int height,
     if (width != c->width || height != c->height) {
         // also reserve space for a possible additional header
         int buf_size = height * width * 3 / 2
-                     + FFMAX(AV_LZO_OUTPUT_PADDING, FF_INPUT_BUFFER_PADDING_SIZE)
+                     + FFMAX(AV_LZO_OUTPUT_PADDING, AV_INPUT_BUFFER_PADDING_SIZE)
                      + RTJPEG_HEADER_SIZE;
         if (buf_size > INT_MAX/8)
             return -1;
@@ -208,15 +211,15 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     buf       = &buf[12];
     buf_size -= 12;
     if (comptype == NUV_RTJPEG_IN_LZO || comptype == NUV_LZO) {
-        int outlen = c->decomp_size - FFMAX(FF_INPUT_BUFFER_PADDING_SIZE, AV_LZO_OUTPUT_PADDING);
+        int outlen = c->decomp_size - FFMAX(AV_INPUT_BUFFER_PADDING_SIZE, AV_LZO_OUTPUT_PADDING);
         int inlen  = buf_size;
         if (av_lzo1x_decode(c->decomp_buf, &outlen, buf, &inlen)) {
             av_log(avctx, AV_LOG_ERROR, "error during lzo decompression\n");
             return AVERROR_INVALIDDATA;
         }
         buf      = c->decomp_buf;
-        buf_size = c->decomp_size - FFMAX(FF_INPUT_BUFFER_PADDING_SIZE, AV_LZO_OUTPUT_PADDING) - outlen;
-        memset(c->decomp_buf + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+        buf_size = c->decomp_size - FFMAX(AV_INPUT_BUFFER_PADDING_SIZE, AV_LZO_OUTPUT_PADDING) - outlen;
+        memset(c->decomp_buf + buf_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
     }
     if (c->codec_frameheader) {
         int w, h, q;
@@ -347,5 +350,5 @@ AVCodec ff_nuv_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 87ce6f35..a3b02fa9 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -93,7 +93,7 @@ typedef struct NvencData
     union {
         int64_t timestamp;
         NvencOutputSurface *surface;
-    };
+    } u;
 } NvencData;
 
 typedef struct NvencDataList
@@ -163,6 +163,7 @@ typedef struct NvencContext
     int cbr;
     int twopass;
     int gpu;
+    int buffer_delay;
 } NvencContext;
 
 static const NvencValuePair nvenc_h264_level_pairs[] = {
@@ -296,7 +297,7 @@ static int data_queue_enqueue(NvencDataList* queue, NvencData *data)
 static int out_surf_queue_enqueue(NvencDataList* queue, NvencOutputSurface* surface)
 {
     NvencData data;
-    data.surface = surface;
+    data.u.surface = surface;
 
     return data_queue_enqueue(queue, &data);
 }
@@ -308,13 +309,13 @@ static NvencOutputSurface* out_surf_queue_dequeue(NvencDataList* queue)
     if (!res)
         return NULL;
 
-    return res->surface;
+    return res->u.surface;
 }
 
 static int timestamp_queue_enqueue(NvencDataList* queue, int64_t timestamp)
 {
     NvencData data;
-    data.timestamp = timestamp;
+    data.u.timestamp = timestamp;
 
     return data_queue_enqueue(queue, &data);
 }
@@ -326,7 +327,7 @@ static int64_t timestamp_queue_dequeue(NvencDataList* queue)
     if (!res)
         return AV_NOPTS_VALUE;
 
-    return res->timestamp;
+    return res->u.timestamp;
 }
 
 #define CHECK_LOAD_FUNC(t, f, s) \
@@ -401,13 +402,13 @@ static av_cold int nvenc_check_cuda(AVCodecContext *avctx)
 
     switch (avctx->codec->id) {
     case AV_CODEC_ID_H264:
-        target_smver = 0x30;
+        target_smver = avctx->pix_fmt == AV_PIX_FMT_YUV444P ? 0x52 : 0x30;
         break;
     case AV_CODEC_ID_H265:
         target_smver = 0x52;
         break;
     default:
-        av_log(avctx, AV_LOG_FATAL, "nvenc: Unknown codec name\n");
+        av_log(avctx, AV_LOG_FATAL, "Unknown codec name\n");
         goto error;
     }
 
@@ -549,11 +550,14 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
     GUID encoder_preset = NV_ENC_PRESET_HQ_GUID;
     GUID codec;
     NVENCSTATUS nv_status = NV_ENC_SUCCESS;
+    AVCPBProperties *cpb_props;
     int surfaceCount = 0;
     int i, num_mbs;
     int isLL = 0;
+    int lossless = 0;
     int res = 0;
     int dw, dh;
+    int qp_inter_p;
 
     NvencContext *ctx = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
@@ -562,12 +566,6 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
     if (!nvenc_dyload_nvenc(avctx))
         return AVERROR_EXTERNAL;
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame) {
-        res = AVERROR(ENOMEM);
-        goto error;
-    }
-
     ctx->last_dts = AV_NOPTS_VALUE;
 
     ctx->encode_config.version = NV_ENC_CONFIG_VER;
@@ -584,7 +582,7 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
     }
 
     ctx->cu_context = NULL;
-    cu_res = dl_fn->cu_ctx_create(&ctx->cu_context, 0, dl_fn->nvenc_devices[ctx->gpu]);
+    cu_res = dl_fn->cu_ctx_create(&ctx->cu_context, 4, dl_fn->nvenc_devices[ctx->gpu]); // CU_CTX_SCHED_BLOCKING_SYNC=4, avoid CPU spins
 
     if (cu_res != CUDA_SUCCESS) {
         av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
@@ -606,16 +604,25 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
     nv_status = p_nvenc->nvEncOpenEncodeSessionEx(&encode_session_params, &ctx->nvencoder);
     if (nv_status != NV_ENC_SUCCESS) {
         ctx->nvencoder = NULL;
-        av_log(avctx, AV_LOG_FATAL, "OpenEncodeSessionEx failed: 0x%x - invalid license key?\n", (int)nv_status);
+        av_log(avctx, AV_LOG_FATAL, "OpenEncodeSessionEx failed: 0x%x\n", (int)nv_status);
         res = AVERROR_EXTERNAL;
         goto error;
     }
 
     if (ctx->preset) {
-        if (!strcmp(ctx->preset, "hp")) {
+        if (!strcmp(ctx->preset, "slow")) {
+            encoder_preset = NV_ENC_PRESET_HQ_GUID;
+            ctx->twopass = 1;
+        } else if (!strcmp(ctx->preset, "medium")) {
+            encoder_preset = NV_ENC_PRESET_HQ_GUID;
+            ctx->twopass = 0;
+        } else if (!strcmp(ctx->preset, "fast")) {
             encoder_preset = NV_ENC_PRESET_HP_GUID;
+            ctx->twopass = 0;
         } else if (!strcmp(ctx->preset, "hq")) {
             encoder_preset = NV_ENC_PRESET_HQ_GUID;
+        } else if (!strcmp(ctx->preset, "hp")) {
+            encoder_preset = NV_ENC_PRESET_HP_GUID;
         } else if (!strcmp(ctx->preset, "bd")) {
             encoder_preset = NV_ENC_PRESET_BD_GUID;
         } else if (!strcmp(ctx->preset, "ll")) {
@@ -627,15 +634,25 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
         } else if (!strcmp(ctx->preset, "llhq")) {
             encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
             isLL = 1;
+        } else if (!strcmp(ctx->preset, "lossless")) {
+            encoder_preset = NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID;
+            lossless = 1;
+        } else if (!strcmp(ctx->preset, "losslesshp")) {
+            encoder_preset = NV_ENC_PRESET_LOSSLESS_HP_GUID;
+            lossless = 1;
         } else if (!strcmp(ctx->preset, "default")) {
             encoder_preset = NV_ENC_PRESET_DEFAULT_GUID;
         } else {
-            av_log(avctx, AV_LOG_FATAL, "Preset \"%s\" is unknown! Supported presets: hp, hq, bd, ll, llhp, llhq, default\n", ctx->preset);
+            av_log(avctx, AV_LOG_FATAL, "Preset \"%s\" is unknown! Supported presets: slow, medium, high, hp, hq, bd, ll, llhp, llhq, lossless, losslesshp, default\n", ctx->preset);
             res = AVERROR(EINVAL);
             goto error;
         }
     }
 
+    if (ctx->twopass < 0) {
+        ctx->twopass = isLL;
+    }
+
     switch (avctx->codec->id) {
     case AV_CODEC_ID_H264:
         codec = NV_ENC_CODEC_H264_GUID;
@@ -644,7 +661,7 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
         codec = NV_ENC_CODEC_HEVC_GUID;
         break;
     default:
-        av_log(avctx, AV_LOG_ERROR, "nvenc: Unknown codec name\n");
+        av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
         res = AVERROR(EINVAL);
         goto error;
     }
@@ -691,6 +708,9 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
     num_mbs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4);
     ctx->max_surface_count = (num_mbs >= 8160) ? 32 : 48;
 
+    if (ctx->buffer_delay >= ctx->max_surface_count)
+        ctx->buffer_delay = ctx->max_surface_count - 1;
+
     ctx->init_encode_params.enableEncodeAsync = 0;
     ctx->init_encode_params.enablePTD = 1;
 
@@ -747,27 +767,36 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
     if (ctx->encode_config.frameIntervalP >= 2)
         ctx->last_dts = -2;
 
-    if (avctx->bit_rate > 0)
+    if (avctx->bit_rate > 0) {
         ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate;
+    } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
+        ctx->encode_config.rcParams.maxBitRate = ctx->encode_config.rcParams.averageBitRate;
+    }
 
     if (avctx->rc_max_rate > 0)
         ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
 
-    if (ctx->cbr) {
+    if (lossless) {
+        if (avctx->codec->id == AV_CODEC_ID_H264)
+            ctx->encode_config.encodeCodecConfig.h264Config.qpPrimeYZeroTransformBypassFlag = 1;
+
+        ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
+        ctx->encode_config.rcParams.constQP.qpInterB = 0;
+        ctx->encode_config.rcParams.constQP.qpInterP = 0;
+        ctx->encode_config.rcParams.constQP.qpIntra = 0;
+
+        avctx->qmin = -1;
+        avctx->qmax = -1;
+    } else if (ctx->cbr) {
         if (!ctx->twopass) {
             ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
-        } else if (ctx->twopass == 1 || isLL) {
+        } else {
             ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
 
             if (avctx->codec->id == AV_CODEC_ID_H264) {
                 ctx->encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
                 ctx->encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
             }
-
-            if (!isLL)
-                av_log(avctx, AV_LOG_WARNING, "Twopass mode is only known to work with low latency (ll, llhq, llhp) presets.\n");
-        } else {
-            ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
         }
     } else if (avctx->global_quality > 0) {
         ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
@@ -777,25 +806,61 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
 
         avctx->qmin = -1;
         avctx->qmax = -1;
-    } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
-        ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
+    } else {
+        if (avctx->qmin >= 0 && avctx->qmax >= 0) {
+            ctx->encode_config.rcParams.enableMinQP = 1;
+            ctx->encode_config.rcParams.enableMaxQP = 1;
+
+            ctx->encode_config.rcParams.minQP.qpInterB = avctx->qmin;
+            ctx->encode_config.rcParams.minQP.qpInterP = avctx->qmin;
+            ctx->encode_config.rcParams.minQP.qpIntra = avctx->qmin;
+
+            ctx->encode_config.rcParams.maxQP.qpInterB = avctx->qmax;
+            ctx->encode_config.rcParams.maxQP.qpInterP = avctx->qmax;
+            ctx->encode_config.rcParams.maxQP.qpIntra = avctx->qmax;
+
+            qp_inter_p = (avctx->qmax + 3 * avctx->qmin) / 4; // biased towards Qmin
+
+            if (ctx->twopass) {
+                ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_VBR;
+                if (avctx->codec->id == AV_CODEC_ID_H264) {
+                    ctx->encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
+                    ctx->encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
+                }
+            } else {
+                ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR_MINQP;
+            }
+        } else {
+            qp_inter_p = 26; // default to 26
 
-        ctx->encode_config.rcParams.enableMinQP = 1;
-        ctx->encode_config.rcParams.enableMaxQP = 1;
+            if (ctx->twopass) {
+                ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_VBR;
+            } else {
+                ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
+            }
+        }
 
-        ctx->encode_config.rcParams.minQP.qpInterB = avctx->qmin;
-        ctx->encode_config.rcParams.minQP.qpInterP = avctx->qmin;
-        ctx->encode_config.rcParams.minQP.qpIntra = avctx->qmin;
+        ctx->encode_config.rcParams.enableInitialRCQP = 1;
+        ctx->encode_config.rcParams.initialRCQP.qpInterP  = qp_inter_p;
 
-        ctx->encode_config.rcParams.maxQP.qpInterB = avctx->qmax;
-        ctx->encode_config.rcParams.maxQP.qpInterP = avctx->qmax;
-        ctx->encode_config.rcParams.maxQP.qpIntra = avctx->qmax;
+        if(avctx->i_quant_factor != 0.0 && avctx->b_quant_factor != 0.0) {
+            ctx->encode_config.rcParams.initialRCQP.qpIntra = av_clip(
+                qp_inter_p * fabs(avctx->i_quant_factor) + avctx->i_quant_offset, 0, 51);
+            ctx->encode_config.rcParams.initialRCQP.qpInterB = av_clip(
+                qp_inter_p * fabs(avctx->b_quant_factor) + avctx->b_quant_offset, 0, 51);
+        } else {
+            ctx->encode_config.rcParams.initialRCQP.qpIntra = qp_inter_p;
+            ctx->encode_config.rcParams.initialRCQP.qpInterB = qp_inter_p;
+        }
     }
 
-    if (avctx->rc_buffer_size > 0)
+    if (avctx->rc_buffer_size > 0) {
         ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
+    } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
+        ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
+    }
 
-    if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
+    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
         ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
     } else {
         ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
@@ -812,11 +877,17 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
 
         ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = avctx->color_range == AVCOL_RANGE_JPEG;
 
-        ctx->encode_config.encodeCodecConfig.h264Config.disableSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
-        ctx->encode_config.encodeCodecConfig.h264Config.repeatSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
+        ctx->encode_config.encodeCodecConfig.h264Config.sliceMode = 3;
+        ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData = 1;
+
+        ctx->encode_config.encodeCodecConfig.h264Config.disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
+        ctx->encode_config.encodeCodecConfig.h264Config.repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
 
         if (!ctx->profile) {
             switch (avctx->profile) {
+            case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
+                ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
+                break;
             case FF_PROFILE_H264_BASELINE:
                 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
                 break;
@@ -842,6 +913,9 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
             } else if (!strcmp(ctx->profile, "baseline")) {
                 ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
                 avctx->profile = FF_PROFILE_H264_BASELINE;
+            } else if (!strcmp(ctx->profile, "high444p")) {
+                ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
+                avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
             } else {
                 av_log(avctx, AV_LOG_FATAL, "Profile \"%s\" is unknown! Supported profiles: high, main, baseline\n", ctx->profile);
                 res = AVERROR(EINVAL);
@@ -849,6 +923,14 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
             }
         }
 
+        // force setting profile as high444p if input is AV_PIX_FMT_YUV444P
+        if (avctx->pix_fmt == AV_PIX_FMT_YUV444P) {
+            ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID;
+            avctx->profile = FF_PROFILE_H264_HIGH_444_PREDICTIVE;
+        }
+
+        ctx->encode_config.encodeCodecConfig.h264Config.chromaFormatIDC = avctx->profile == FF_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1;
+
         if (ctx->level) {
             res = input_string_to_uint32(avctx, nvenc_h264_level_pairs, ctx->level, &ctx->encode_config.encodeCodecConfig.h264Config.level);
 
@@ -862,8 +944,11 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
 
         break;
     case AV_CODEC_ID_H265:
-        ctx->encode_config.encodeCodecConfig.hevcConfig.disableSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
-        ctx->encode_config.encodeCodecConfig.hevcConfig.repeatSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
+        ctx->encode_config.encodeCodecConfig.hevcConfig.sliceMode = 3;
+        ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData = 1;
+
+        ctx->encode_config.encodeCodecConfig.hevcConfig.disableSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
+        ctx->encode_config.encodeCodecConfig.hevcConfig.repeatSPSPPS = (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ? 0 : 1;
 
         /* No other profile is supported in the current SDK version 5 */
         ctx->encode_config.profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
@@ -978,7 +1063,7 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
         ctx->output_surfaces[surfaceCount].busy = 0;
     }
 
-    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+    if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
         uint32_t outSize = 0;
         char tmpHeader[256];
         NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
@@ -995,7 +1080,7 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
         }
 
         avctx->extradata_size = outSize;
-        avctx->extradata = av_mallocz(outSize + FF_INPUT_BUFFER_PADDING_SIZE);
+        avctx->extradata = av_mallocz(outSize + AV_INPUT_BUFFER_PADDING_SIZE);
 
         if (!avctx->extradata) {
             res = AVERROR(ENOMEM);
@@ -1011,6 +1096,13 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
     if (ctx->encode_config.rcParams.averageBitRate > 0)
         avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate;
 
+    cpb_props = ff_add_cpb_side_data(avctx);
+    if (!cpb_props)
+        return AVERROR(ENOMEM);
+    cpb_props->max_bitrate = ctx->encode_config.rcParams.maxBitRate;
+    cpb_props->avg_bitrate = avctx->bit_rate;
+    cpb_props->buffer_size = ctx->encode_config.rcParams.vbvBufferSize;
+
     return 0;
 
 error:
@@ -1027,8 +1119,6 @@ static av_cold int nvenc_encode_init(AVCodecContext *avctx)
     if (ctx->cu_context)
         dl_fn->cu_ctx_destroy(ctx->cu_context);
 
-    av_frame_free(&avctx->coded_frame);
-
     nvenc_unload_nvenc(avctx);
 
     ctx->nvencoder = NULL;
@@ -1062,12 +1152,10 @@ static av_cold int nvenc_encode_close(AVCodecContext *avctx)
 
     nvenc_unload_nvenc(avctx);
 
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
-static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, AVFrame *coded_frame, NvencOutputSurface *tmpoutsurf)
+static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencOutputSurface *tmpoutsurf)
 {
     NvencContext *ctx = avctx->priv_data;
     NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
@@ -1087,7 +1175,7 @@ static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, AVFrame
       slice_mode_data = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
       break;
     default:
-      av_log(avctx, AV_LOG_ERROR, "nvenc: Unknown codec name\n");
+      av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
       res = AVERROR(EINVAL);
       goto error;
     }
@@ -1109,7 +1197,7 @@ static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, AVFrame
         goto error;
     }
 
-    if (res = ff_alloc_packet2(avctx, pkt, lock_params.bitstreamSizeInBytes)) {
+    if (res = ff_alloc_packet2(avctx, pkt, lock_params.bitstreamSizeInBytes,0)) {
         p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
         goto error;
     }
@@ -1123,6 +1211,8 @@ static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, AVFrame
     switch (lock_params.pictureType) {
     case NV_ENC_PIC_TYPE_IDR:
         pkt->flags |= AV_PKT_FLAG_KEY;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     case NV_ENC_PIC_TYPE_I:
         avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
         break;
@@ -1140,6 +1230,8 @@ static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, AVFrame
         av_log(avctx, AV_LOG_ERROR, "Please report this error and include as much information on how to reproduce it as possible.\n");
         res = AVERROR_EXTERNAL;
         goto error;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     }
 
     pkt->pts = lock_params.outputTimeStamp;
@@ -1285,7 +1377,7 @@ static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         pic_params.outputBitstream = ctx->output_surfaces[i].output_surface;
         pic_params.completionEvent = 0;
 
-        if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
+        if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
             if (frame->top_field_first) {
                 pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
             } else {
@@ -1308,7 +1400,7 @@ static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
           pic_params.codecPicParams.hevcPicParams.sliceModeData = ctx->encode_config.encodeCodecConfig.hevcConfig.sliceModeData;
           break;
         default:
-          av_log(avctx, AV_LOG_ERROR, "nvenc: Unknown codec name\n");
+          av_log(avctx, AV_LOG_ERROR, "Unknown codec name\n");
           return AVERROR(EINVAL);
         }
 
@@ -1355,10 +1447,10 @@ static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         }
     }
 
-    if (ctx->output_surface_ready_queue.count) {
+    if (ctx->output_surface_ready_queue.count && (!frame || ctx->output_surface_ready_queue.count + ctx->output_surface_queue.count >= ctx->buffer_delay)) {
         tmpoutsurf = out_surf_queue_dequeue(&ctx->output_surface_ready_queue);
 
-        res = process_output_surface(avctx, pkt, avctx->coded_frame, tmpoutsurf);
+        res = process_output_surface(avctx, pkt, tmpoutsurf);
 
         if (res)
             return res;
@@ -1376,30 +1468,35 @@ static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 }
 
 static const enum AVPixelFormat pix_fmts_nvenc[] = {
+    AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_NV12,
+    AV_PIX_FMT_YUV444P,
     AV_PIX_FMT_NONE
 };
 
 #define OFFSET(x) offsetof(NvencContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "preset", "Set the encoding preset (one of hq, hp, bd, ll, llhq, llhp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "hq" }, 0, 0, VE },
-    { "profile", "Set the encoding profile (high, main or baseline)", OFFSET(profile), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
-    { "level", "Set the encoding level restriction (auto, 1.0, 1.0b, 1.1, 1.2, ..., 4.2, 5.0, 5.1)", OFFSET(level), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
-    { "tier", "Set the encoding tier (main or high)", OFFSET(tier), AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE },
-    { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
-    { "2pass", "Use 2pass cbr encoding mode (low latency mode only)", OFFSET(twopass), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE },
+    { "preset", "Set the encoding preset (one of slow = hq 2pass, medium = hq, fast = hp, hq, hp, bd, ll, llhq, llhp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "medium" }, 0, 0, VE },
+    { "profile", "Set the encoding profile (high, main, baseline or high444p)", OFFSET(profile), AV_OPT_TYPE_STRING, { .str = "main" }, 0, 0, VE },
+    { "level", "Set the encoding level restriction (auto, 1.0, 1.0b, 1.1, 1.2, ..., 4.2, 5.0, 5.1)", OFFSET(level), AV_OPT_TYPE_STRING, { .str = "auto" }, 0, 0, VE },
+    { "tier", "Set the encoding tier (main or high)", OFFSET(tier), AV_OPT_TYPE_STRING, { .str = "main" }, 0, 0, VE },
+    { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
     { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.", OFFSET(gpu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+    { "delay", "Delays frame output by the given amount of frames.", OFFSET(buffer_delay), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 0, INT_MAX, VE },
     { NULL }
 };
 
 static const AVCodecDefault nvenc_defaults[] = {
-    { "b", "0" },
+    { "b", "2M" },
     { "qmin", "-1" },
     { "qmax", "-1" },
     { "qdiff", "-1" },
     { "qblur", "-1" },
     { "qcomp", "-1" },
+    { "g", "250" },
+    { "bf", "0" },
     { NULL },
 };
 
@@ -1413,14 +1510,14 @@ static const AVClass nvenc_class = {
 
 AVCodec ff_nvenc_encoder = {
     .name = "nvenc",
-    .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC h264 encoder"),
+    .long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC h264 encoder"),
     .type = AVMEDIA_TYPE_VIDEO,
     .id = AV_CODEC_ID_H264,
     .priv_data_size = sizeof(NvencContext),
     .init = nvenc_encode_init,
     .encode2 = nvenc_encode_frame,
     .close = nvenc_encode_close,
-    .capabilities = CODEC_CAP_DELAY,
+    .capabilities = AV_CODEC_CAP_DELAY,
     .priv_class = &nvenc_class,
     .defaults = nvenc_defaults,
     .pix_fmts = pix_fmts_nvenc,
@@ -1438,14 +1535,14 @@ static const AVClass nvenc_h264_class = {
 
 AVCodec ff_nvenc_h264_encoder = {
     .name = "nvenc_h264",
-    .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC h264 encoder"),
+    .long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC h264 encoder"),
     .type = AVMEDIA_TYPE_VIDEO,
     .id = AV_CODEC_ID_H264,
     .priv_data_size = sizeof(NvencContext),
     .init = nvenc_encode_init,
     .encode2 = nvenc_encode_frame,
     .close = nvenc_encode_close,
-    .capabilities = CODEC_CAP_DELAY,
+    .capabilities = AV_CODEC_CAP_DELAY,
     .priv_class = &nvenc_h264_class,
     .defaults = nvenc_defaults,
     .pix_fmts = pix_fmts_nvenc,
@@ -1462,14 +1559,14 @@ static const AVClass nvenc_hevc_class = {
 
 AVCodec ff_nvenc_hevc_encoder = {
     .name = "nvenc_hevc",
-    .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC hevc encoder"),
+    .long_name = NULL_IF_CONFIG_SMALL("NVIDIA NVENC hevc encoder"),
     .type = AVMEDIA_TYPE_VIDEO,
     .id = AV_CODEC_ID_H265,
     .priv_data_size = sizeof(NvencContext),
     .init = nvenc_encode_init,
     .encode2 = nvenc_encode_frame,
     .close = nvenc_encode_close,
-    .capabilities = CODEC_CAP_DELAY,
+    .capabilities = AV_CODEC_CAP_DELAY,
     .priv_class = &nvenc_hevc_class,
     .defaults = nvenc_defaults,
     .pix_fmts = pix_fmts_nvenc,
diff --git a/libavcodec/old_codec_ids.h b/libavcodec/old_codec_ids.h
deleted file mode 100644
index c7aa0e0a..00000000
--- a/libavcodec/old_codec_ids.h
+++ /dev/null
@@ -1,397 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_OLD_CODEC_IDS_H
-#define AVCODEC_OLD_CODEC_IDS_H
-
-/*
- * This header exists to prevent new codec IDs from being accidentally added to
- * the deprecated list.
- * Do not include it directly. It will be removed on next major bump
- *
- * Do not add new items to this list. Use the AVCodecID enum instead.
- */
-
-    CODEC_ID_NONE = AV_CODEC_ID_NONE,
-
-    /* video codecs */
-    CODEC_ID_MPEG1VIDEO,
-    CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
-#if FF_API_XVMC
-    CODEC_ID_MPEG2VIDEO_XVMC,
-#endif
-    CODEC_ID_H261,
-    CODEC_ID_H263,
-    CODEC_ID_RV10,
-    CODEC_ID_RV20,
-    CODEC_ID_MJPEG,
-    CODEC_ID_MJPEGB,
-    CODEC_ID_LJPEG,
-    CODEC_ID_SP5X,
-    CODEC_ID_JPEGLS,
-    CODEC_ID_MPEG4,
-    CODEC_ID_RAWVIDEO,
-    CODEC_ID_MSMPEG4V1,
-    CODEC_ID_MSMPEG4V2,
-    CODEC_ID_MSMPEG4V3,
-    CODEC_ID_WMV1,
-    CODEC_ID_WMV2,
-    CODEC_ID_H263P,
-    CODEC_ID_H263I,
-    CODEC_ID_FLV1,
-    CODEC_ID_SVQ1,
-    CODEC_ID_SVQ3,
-    CODEC_ID_DVVIDEO,
-    CODEC_ID_HUFFYUV,
-    CODEC_ID_CYUV,
-    CODEC_ID_H264,
-    CODEC_ID_INDEO3,
-    CODEC_ID_VP3,
-    CODEC_ID_THEORA,
-    CODEC_ID_ASV1,
-    CODEC_ID_ASV2,
-    CODEC_ID_FFV1,
-    CODEC_ID_4XM,
-    CODEC_ID_VCR1,
-    CODEC_ID_CLJR,
-    CODEC_ID_MDEC,
-    CODEC_ID_ROQ,
-    CODEC_ID_INTERPLAY_VIDEO,
-    CODEC_ID_XAN_WC3,
-    CODEC_ID_XAN_WC4,
-    CODEC_ID_RPZA,
-    CODEC_ID_CINEPAK,
-    CODEC_ID_WS_VQA,
-    CODEC_ID_MSRLE,
-    CODEC_ID_MSVIDEO1,
-    CODEC_ID_IDCIN,
-    CODEC_ID_8BPS,
-    CODEC_ID_SMC,
-    CODEC_ID_FLIC,
-    CODEC_ID_TRUEMOTION1,
-    CODEC_ID_VMDVIDEO,
-    CODEC_ID_MSZH,
-    CODEC_ID_ZLIB,
-    CODEC_ID_QTRLE,
-    CODEC_ID_TSCC,
-    CODEC_ID_ULTI,
-    CODEC_ID_QDRAW,
-    CODEC_ID_VIXL,
-    CODEC_ID_QPEG,
-    CODEC_ID_PNG,
-    CODEC_ID_PPM,
-    CODEC_ID_PBM,
-    CODEC_ID_PGM,
-    CODEC_ID_PGMYUV,
-    CODEC_ID_PAM,
-    CODEC_ID_FFVHUFF,
-    CODEC_ID_RV30,
-    CODEC_ID_RV40,
-    CODEC_ID_VC1,
-    CODEC_ID_WMV3,
-    CODEC_ID_LOCO,
-    CODEC_ID_WNV1,
-    CODEC_ID_AASC,
-    CODEC_ID_INDEO2,
-    CODEC_ID_FRAPS,
-    CODEC_ID_TRUEMOTION2,
-    CODEC_ID_BMP,
-    CODEC_ID_CSCD,
-    CODEC_ID_MMVIDEO,
-    CODEC_ID_ZMBV,
-    CODEC_ID_AVS,
-    CODEC_ID_SMACKVIDEO,
-    CODEC_ID_NUV,
-    CODEC_ID_KMVC,
-    CODEC_ID_FLASHSV,
-    CODEC_ID_CAVS,
-    CODEC_ID_JPEG2000,
-    CODEC_ID_VMNC,
-    CODEC_ID_VP5,
-    CODEC_ID_VP6,
-    CODEC_ID_VP6F,
-    CODEC_ID_TARGA,
-    CODEC_ID_DSICINVIDEO,
-    CODEC_ID_TIERTEXSEQVIDEO,
-    CODEC_ID_TIFF,
-    CODEC_ID_GIF,
-    CODEC_ID_DXA,
-    CODEC_ID_DNXHD,
-    CODEC_ID_THP,
-    CODEC_ID_SGI,
-    CODEC_ID_C93,
-    CODEC_ID_BETHSOFTVID,
-    CODEC_ID_PTX,
-    CODEC_ID_TXD,
-    CODEC_ID_VP6A,
-    CODEC_ID_AMV,
-    CODEC_ID_VB,
-    CODEC_ID_PCX,
-    CODEC_ID_SUNRAST,
-    CODEC_ID_INDEO4,
-    CODEC_ID_INDEO5,
-    CODEC_ID_MIMIC,
-    CODEC_ID_RL2,
-    CODEC_ID_ESCAPE124,
-    CODEC_ID_DIRAC,
-    CODEC_ID_BFI,
-    CODEC_ID_CMV,
-    CODEC_ID_MOTIONPIXELS,
-    CODEC_ID_TGV,
-    CODEC_ID_TGQ,
-    CODEC_ID_TQI,
-    CODEC_ID_AURA,
-    CODEC_ID_AURA2,
-    CODEC_ID_V210X,
-    CODEC_ID_TMV,
-    CODEC_ID_V210,
-    CODEC_ID_DPX,
-    CODEC_ID_MAD,
-    CODEC_ID_FRWU,
-    CODEC_ID_FLASHSV2,
-    CODEC_ID_CDGRAPHICS,
-    CODEC_ID_R210,
-    CODEC_ID_ANM,
-    CODEC_ID_BINKVIDEO,
-    CODEC_ID_IFF_ILBM,
-    CODEC_ID_IFF_BYTERUN1,
-    CODEC_ID_KGV1,
-    CODEC_ID_YOP,
-    CODEC_ID_VP8,
-    CODEC_ID_PICTOR,
-    CODEC_ID_ANSI,
-    CODEC_ID_A64_MULTI,
-    CODEC_ID_A64_MULTI5,
-    CODEC_ID_R10K,
-    CODEC_ID_MXPEG,
-    CODEC_ID_LAGARITH,
-    CODEC_ID_PRORES,
-    CODEC_ID_JV,
-    CODEC_ID_DFA,
-    CODEC_ID_WMV3IMAGE,
-    CODEC_ID_VC1IMAGE,
-    CODEC_ID_UTVIDEO,
-    CODEC_ID_BMV_VIDEO,
-    CODEC_ID_VBLE,
-    CODEC_ID_DXTORY,
-    CODEC_ID_V410,
-    CODEC_ID_XWD,
-    CODEC_ID_CDXL,
-    CODEC_ID_XBM,
-    CODEC_ID_ZEROCODEC,
-    CODEC_ID_MSS1,
-    CODEC_ID_MSA1,
-    CODEC_ID_TSCC2,
-    CODEC_ID_MTS2,
-    CODEC_ID_CLLC,
-    CODEC_ID_Y41P       = MKBETAG('Y','4','1','P'),
-    CODEC_ID_ESCAPE130  = MKBETAG('E','1','3','0'),
-    CODEC_ID_EXR        = MKBETAG('0','E','X','R'),
-    CODEC_ID_AVRP       = MKBETAG('A','V','R','P'),
-
-    CODEC_ID_G2M        = MKBETAG( 0 ,'G','2','M'),
-    CODEC_ID_AVUI       = MKBETAG('A','V','U','I'),
-    CODEC_ID_AYUV       = MKBETAG('A','Y','U','V'),
-    CODEC_ID_V308       = MKBETAG('V','3','0','8'),
-    CODEC_ID_V408       = MKBETAG('V','4','0','8'),
-    CODEC_ID_YUV4       = MKBETAG('Y','U','V','4'),
-    CODEC_ID_SANM       = MKBETAG('S','A','N','M'),
-    CODEC_ID_PAF_VIDEO  = MKBETAG('P','A','F','V'),
-    CODEC_ID_SNOW       = AV_CODEC_ID_SNOW,
-
-    /* various PCM "codecs" */
-    CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
-    CODEC_ID_PCM_S16LE = 0x10000,
-    CODEC_ID_PCM_S16BE,
-    CODEC_ID_PCM_U16LE,
-    CODEC_ID_PCM_U16BE,
-    CODEC_ID_PCM_S8,
-    CODEC_ID_PCM_U8,
-    CODEC_ID_PCM_MULAW,
-    CODEC_ID_PCM_ALAW,
-    CODEC_ID_PCM_S32LE,
-    CODEC_ID_PCM_S32BE,
-    CODEC_ID_PCM_U32LE,
-    CODEC_ID_PCM_U32BE,
-    CODEC_ID_PCM_S24LE,
-    CODEC_ID_PCM_S24BE,
-    CODEC_ID_PCM_U24LE,
-    CODEC_ID_PCM_U24BE,
-    CODEC_ID_PCM_S24DAUD,
-    CODEC_ID_PCM_ZORK,
-    CODEC_ID_PCM_S16LE_PLANAR,
-    CODEC_ID_PCM_DVD,
-    CODEC_ID_PCM_F32BE,
-    CODEC_ID_PCM_F32LE,
-    CODEC_ID_PCM_F64BE,
-    CODEC_ID_PCM_F64LE,
-    CODEC_ID_PCM_BLURAY,
-    CODEC_ID_PCM_LXF,
-    CODEC_ID_S302M,
-    CODEC_ID_PCM_S8_PLANAR,
-
-    /* various ADPCM codecs */
-    CODEC_ID_ADPCM_IMA_QT = 0x11000,
-    CODEC_ID_ADPCM_IMA_WAV,
-    CODEC_ID_ADPCM_IMA_DK3,
-    CODEC_ID_ADPCM_IMA_DK4,
-    CODEC_ID_ADPCM_IMA_WS,
-    CODEC_ID_ADPCM_IMA_SMJPEG,
-    CODEC_ID_ADPCM_MS,
-    CODEC_ID_ADPCM_4XM,
-    CODEC_ID_ADPCM_XA,
-    CODEC_ID_ADPCM_ADX,
-    CODEC_ID_ADPCM_EA,
-    CODEC_ID_ADPCM_G726,
-    CODEC_ID_ADPCM_CT,
-    CODEC_ID_ADPCM_SWF,
-    CODEC_ID_ADPCM_YAMAHA,
-    CODEC_ID_ADPCM_SBPRO_4,
-    CODEC_ID_ADPCM_SBPRO_3,
-    CODEC_ID_ADPCM_SBPRO_2,
-    CODEC_ID_ADPCM_THP,
-    CODEC_ID_ADPCM_IMA_AMV,
-    CODEC_ID_ADPCM_EA_R1,
-    CODEC_ID_ADPCM_EA_R3,
-    CODEC_ID_ADPCM_EA_R2,
-    CODEC_ID_ADPCM_IMA_EA_SEAD,
-    CODEC_ID_ADPCM_IMA_EA_EACS,
-    CODEC_ID_ADPCM_EA_XAS,
-    CODEC_ID_ADPCM_EA_MAXIS_XA,
-    CODEC_ID_ADPCM_IMA_ISS,
-    CODEC_ID_ADPCM_G722,
-    CODEC_ID_ADPCM_IMA_APC,
-    CODEC_ID_VIMA       = MKBETAG('V','I','M','A'),
-
-    /* AMR */
-    CODEC_ID_AMR_NB = 0x12000,
-    CODEC_ID_AMR_WB,
-
-    /* RealAudio codecs*/
-    CODEC_ID_RA_144 = 0x13000,
-    CODEC_ID_RA_288,
-
-    /* various DPCM codecs */
-    CODEC_ID_ROQ_DPCM = 0x14000,
-    CODEC_ID_INTERPLAY_DPCM,
-    CODEC_ID_XAN_DPCM,
-    CODEC_ID_SOL_DPCM,
-
-    /* audio codecs */
-    CODEC_ID_MP2 = 0x15000,
-    CODEC_ID_MP3, ///< preferred ID for decoding MPEG audio layer 1, 2 or 3
-    CODEC_ID_AAC,
-    CODEC_ID_AC3,
-    CODEC_ID_DTS,
-    CODEC_ID_VORBIS,
-    CODEC_ID_DVAUDIO,
-    CODEC_ID_WMAV1,
-    CODEC_ID_WMAV2,
-    CODEC_ID_MACE3,
-    CODEC_ID_MACE6,
-    CODEC_ID_VMDAUDIO,
-    CODEC_ID_FLAC,
-    CODEC_ID_MP3ADU,
-    CODEC_ID_MP3ON4,
-    CODEC_ID_SHORTEN,
-    CODEC_ID_ALAC,
-    CODEC_ID_WESTWOOD_SND1,
-    CODEC_ID_GSM, ///< as in Berlin toast format
-    CODEC_ID_QDM2,
-    CODEC_ID_COOK,
-    CODEC_ID_TRUESPEECH,
-    CODEC_ID_TTA,
-    CODEC_ID_SMACKAUDIO,
-    CODEC_ID_QCELP,
-    CODEC_ID_WAVPACK,
-    CODEC_ID_DSICINAUDIO,
-    CODEC_ID_IMC,
-    CODEC_ID_MUSEPACK7,
-    CODEC_ID_MLP,
-    CODEC_ID_GSM_MS, /* as found in WAV */
-    CODEC_ID_ATRAC3,
-    CODEC_ID_VOXWARE,
-    CODEC_ID_APE,
-    CODEC_ID_NELLYMOSER,
-    CODEC_ID_MUSEPACK8,
-    CODEC_ID_SPEEX,
-    CODEC_ID_WMAVOICE,
-    CODEC_ID_WMAPRO,
-    CODEC_ID_WMALOSSLESS,
-    CODEC_ID_ATRAC3P,
-    CODEC_ID_EAC3,
-    CODEC_ID_SIPR,
-    CODEC_ID_MP1,
-    CODEC_ID_TWINVQ,
-    CODEC_ID_TRUEHD,
-    CODEC_ID_MP4ALS,
-    CODEC_ID_ATRAC1,
-    CODEC_ID_BINKAUDIO_RDFT,
-    CODEC_ID_BINKAUDIO_DCT,
-    CODEC_ID_AAC_LATM,
-    CODEC_ID_QDMC,
-    CODEC_ID_CELT,
-    CODEC_ID_G723_1,
-    CODEC_ID_G729,
-    CODEC_ID_8SVX_EXP,
-    CODEC_ID_8SVX_FIB,
-    CODEC_ID_BMV_AUDIO,
-    CODEC_ID_RALF,
-    CODEC_ID_IAC,
-    CODEC_ID_ILBC,
-    CODEC_ID_FFWAVESYNTH = MKBETAG('F','F','W','S'),
-    CODEC_ID_SONIC       = MKBETAG('S','O','N','C'),
-    CODEC_ID_SONIC_LS    = MKBETAG('S','O','N','L'),
-    CODEC_ID_PAF_AUDIO   = MKBETAG('P','A','F','A'),
-    CODEC_ID_OPUS        = MKBETAG('O','P','U','S'),
-
-    /* subtitle codecs */
-    CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
-    CODEC_ID_DVD_SUBTITLE = 0x17000,
-    CODEC_ID_DVB_SUBTITLE,
-    CODEC_ID_TEXT,  ///< raw UTF-8 text
-    CODEC_ID_XSUB,
-    CODEC_ID_SSA,
-    CODEC_ID_MOV_TEXT,
-    CODEC_ID_HDMV_PGS_SUBTITLE,
-    CODEC_ID_DVB_TELETEXT,
-    CODEC_ID_SRT,
-    CODEC_ID_MICRODVD   = MKBETAG('m','D','V','D'),
-    CODEC_ID_EIA_608    = MKBETAG('c','6','0','8'),
-    CODEC_ID_JACOSUB    = MKBETAG('J','S','U','B'),
-    CODEC_ID_SAMI       = MKBETAG('S','A','M','I'),
-    CODEC_ID_REALTEXT   = MKBETAG('R','T','X','T'),
-    CODEC_ID_SUBVIEWER  = MKBETAG('S','u','b','V'),
-
-    /* other specific kind of codecs (generally used for attachments) */
-    CODEC_ID_FIRST_UNKNOWN = 0x18000,           ///< A dummy ID pointing at the start of various fake codecs.
-    CODEC_ID_TTF = 0x18000,
-    CODEC_ID_BINTEXT    = MKBETAG('B','T','X','T'),
-    CODEC_ID_XBIN       = MKBETAG('X','B','I','N'),
-    CODEC_ID_IDF        = MKBETAG( 0 ,'I','D','F'),
-    CODEC_ID_OTF        = MKBETAG( 0 ,'O','T','F'),
-
-    CODEC_ID_PROBE = 0x19000, ///< codec_id is not known (like CODEC_ID_NONE) but lavf should attempt to identify it
-
-    CODEC_ID_MPEG2TS = 0x20000, /**< _FAKE_ codec to indicate a raw MPEG-2 TS
-                                * stream (only used by libavformat) */
-    CODEC_ID_MPEG4SYSTEMS = 0x20001, /**< _FAKE_ codec to indicate a MPEG-4 Systems
-                                * stream (only used by libavformat) */
-    CODEC_ID_FFMETADATA = 0x21000,   ///< Dummy codec for streams containing only metadata information.
-
-#endif /* AVCODEC_OLD_CODEC_IDS_H */
diff --git a/libavcodec/on2avc.c b/libavcodec/on2avc.c
index 1d8fcbc5..62c71cce 100644
--- a/libavcodec/on2avc.c
+++ b/libavcodec/on2avc.c
@@ -22,6 +22,7 @@
 
 #include "libavutil/channel_layout.h"
 #include "libavutil/float_dsp.h"
+#include "libavutil/internal.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "fft.h"
@@ -186,7 +187,7 @@ static int on2avc_decode_band_scales(On2AVCContext *c, GetBitContext *gb)
 
 static inline float on2avc_scale(int v, float scale)
 {
-    return v * sqrtf(fabsf(v)) * scale;
+    return v * sqrtf(abs(v)) * scale;
 }
 
 // spectral data is coded completely differently - there are no unsigned codebooks
@@ -211,9 +212,16 @@ static inline int get_egolomb(GetBitContext *gb)
 {
     int v = 4;
 
-    while (get_bits1(gb)) v++;
+    while (get_bits1(gb)) {
+        v++;
+        if (v > 30) {
+            av_log(NULL, AV_LOG_WARNING, "Too large golomb code in get_egolomb.\n");
+            v = 30;
+            break;
+        }
+    }
 
-    return (1 << v) + get_bits(gb, v);
+    return (1 << v) + get_bits_long(gb, v);
 }
 
 static int on2avc_decode_pairs(On2AVCContext *c, GetBitContext *gb, float *dst,
@@ -926,10 +934,13 @@ static av_cold int on2avc_decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_WARNING,
                "Stereo mode support is not good, patch is welcome\n");
 
+    // We add -0.01 before ceil() to avoid any values to fall at exactly the
+    // midpoint between different ceil values. The results are identical to
+    // using pow(10, i / 10.0) without such bias
     for (i = 0; i < 20; i++)
-        c->scale_tab[i] = ceil(pow(10.0, i * 0.1) * 16) / 32;
+        c->scale_tab[i] = ceil(ff_exp10(i * 0.1) * 16 - 0.01) / 32;
     for (; i < 128; i++)
-        c->scale_tab[i] = ceil(pow(10.0, i * 0.1) * 0.5);
+        c->scale_tab[i] = ceil(ff_exp10(i * 0.1) * 0.5 - 0.01);
 
     if (avctx->sample_rate < 32000 || avctx->channels == 1)
         memcpy(c->long_win, ff_on2avc_window_long_24000,
@@ -951,7 +962,7 @@ static av_cold int on2avc_decode_init(AVCodecContext *avctx)
     ff_fft_init(&c->fft256,  7, 0);
     ff_fft_init(&c->fft512,  8, 1);
     ff_fft_init(&c->fft1024, 9, 1);
-    c->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    c->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!c->fdsp)
         return AVERROR(ENOMEM);
 
@@ -1016,7 +1027,7 @@ AVCodec ff_on2avc_decoder = {
     .init           = on2avc_decode_init,
     .decode         = on2avc_decode_frame,
     .close          = on2avc_decode_close,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/on2avcdata.h b/libavcodec/on2avcdata.h
index 7f498e58..95d88e02 100644
--- a/libavcodec/on2avcdata.h
+++ b/libavcodec/on2avcdata.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_ON2AVC_DATA_H
-#define AVCODEC_ON2AVC_DATA_H
+#ifndef AVCODEC_ON2AVCDATA_H
+#define AVCODEC_ON2AVCDATA_H
 
 #include <stdint.h>
 
@@ -79,4 +79,4 @@ extern const float ff_on2avc_ctab_2[2048];
 extern const float ff_on2avc_ctab_3[2048];
 extern const float ff_on2avc_ctab_4[2048];
 
-#endif /* AVCODEC_ON2AVC_DATA_H */
+#endif /* AVCODEC_ON2AVCDATA_H */
diff --git a/libavcodec/options.c b/libavcodec/options.c
index 17dca080..ea2563b5 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -33,7 +33,9 @@
 #include <float.h>              /* FLT_MIN, FLT_MAX */
 #include <string.h>
 
+FF_DISABLE_DEPRECATION_WARNINGS
 #include "options_table.h"
+FF_ENABLE_DEPRECATION_WARNINGS
 
 static const char* context_to_name(void* ptr) {
     AVCodecContext *avc= ptr;
@@ -211,7 +213,11 @@ int avcodec_copy_context(AVCodecContext *dest, const AVCodecContext *src)
     dest->slice_offset    = NULL;
     dest->hwaccel         = NULL;
     dest->internal        = NULL;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     dest->coded_frame     = NULL;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     /* reallocate values that should be allocated separately */
     dest->extradata       = NULL;
@@ -230,7 +236,7 @@ int avcodec_copy_context(AVCodecContext *dest, const AVCodecContext *src)
             memset(((uint8_t *) dest->obj) + size, 0, pad); \
     }
     alloc_and_copy_or_fail(extradata,    src->extradata_size,
-                           FF_INPUT_BUFFER_PADDING_SIZE);
+                           AV_INPUT_BUFFER_PADDING_SIZE);
     dest->extradata_size  = src->extradata_size;
     alloc_and_copy_or_fail(intra_matrix, 64 * sizeof(int16_t), 0);
     alloc_and_copy_or_fail(inter_matrix, 64 * sizeof(int16_t), 0);
@@ -316,7 +322,6 @@ static int dummy_init(AVCodecContext *ctx)
     //TODO: this code should set every possible pointer that could be set by codec and is not an option;
     ctx->extradata_size = 8;
     ctx->extradata = av_malloc(ctx->extradata_size);
-    ctx->coded_frame = av_frame_alloc();
     return 0;
 }
 
@@ -324,7 +329,6 @@ static int dummy_close(AVCodecContext *ctx)
 {
     av_freep(&ctx->extradata);
     ctx->extradata_size = 0;
-    av_frame_free(&ctx->coded_frame);
     return 0;
 }
 
@@ -368,7 +372,7 @@ static const AVClass dummy_v2_class = {
 };
 
 /* codec with options */
-AVCodec dummy_v1_encoder = {
+static AVCodec dummy_v1_encoder = {
     .name             = "dummy_v1_codec",
     .type             = AVMEDIA_TYPE_VIDEO,
     .id               = AV_CODEC_ID_NONE - 1,
@@ -380,7 +384,7 @@ AVCodec dummy_v1_encoder = {
 };
 
 /* codec with options, different class */
-AVCodec dummy_v2_encoder = {
+static AVCodec dummy_v2_encoder = {
     .name             = "dummy_v2_codec",
     .type             = AVMEDIA_TYPE_VIDEO,
     .id               = AV_CODEC_ID_NONE - 2,
@@ -392,7 +396,7 @@ AVCodec dummy_v2_encoder = {
 };
 
 /* codec with priv data, but no class */
-AVCodec dummy_v3_encoder = {
+static AVCodec dummy_v3_encoder = {
     .name             = "dummy_v3_codec",
     .type             = AVMEDIA_TYPE_VIDEO,
     .id               = AV_CODEC_ID_NONE - 3,
@@ -403,7 +407,7 @@ AVCodec dummy_v3_encoder = {
 };
 
 /* codec without priv data */
-AVCodec dummy_v4_encoder = {
+static AVCodec dummy_v4_encoder = {
     .name             = "dummy_v4_codec",
     .type             = AVMEDIA_TYPE_VIDEO,
     .id               = AV_CODEC_ID_NONE - 4,
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index a906864d..aa8bfacc 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -42,18 +42,18 @@
 #define AV_CODEC_DEFAULT_BITRATE 200*1000
 
 static const AVOption avcodec_options[] = {
-{"b", "set bitrate (in bits/s)", OFFSET(bit_rate), AV_OPT_TYPE_INT, {.i64 = AV_CODEC_DEFAULT_BITRATE }, 0, INT_MAX, A|V|E},
-{"ab", "set bitrate (in bits/s)", OFFSET(bit_rate), AV_OPT_TYPE_INT, {.i64 = 128*1000 }, 0, INT_MAX, A|E},
+{"b", "set bitrate (in bits/s)", OFFSET(bit_rate), AV_OPT_TYPE_INT64, {.i64 = AV_CODEC_DEFAULT_BITRATE }, 0, INT64_MAX, A|V|E},
+{"ab", "set bitrate (in bits/s)", OFFSET(bit_rate), AV_OPT_TYPE_INT64, {.i64 = 128*1000 }, 0, INT_MAX, A|E},
 {"bt", "Set video bitrate tolerance (in bits/s). In 1-pass mode, bitrate tolerance specifies how far "
        "ratecontrol is willing to deviate from the target average bitrate value. This is not related "
        "to minimum/maximum bitrate. Lowering tolerance too much has an adverse effect on quality.",
        OFFSET(bit_rate_tolerance), AV_OPT_TYPE_INT, {.i64 = AV_CODEC_DEFAULT_BITRATE*20 }, 1, INT_MAX, V|E},
 {"flags", NULL, OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 0, UINT_MAX, V|A|S|E|D, "flags"},
-{"unaligned", "allow decoders to produce unaligned output", 0, AV_OPT_TYPE_CONST, { .i64 = CODEC_FLAG_UNALIGNED }, INT_MIN, INT_MAX, V | D, "flags" },
-{"mv4", "use four motion vectors per macroblock (MPEG-4)", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_4MV }, INT_MIN, INT_MAX, V|E, "flags"},
-{"qpel", "use 1/4-pel motion compensation", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_QPEL }, INT_MIN, INT_MAX, V|E, "flags"},
-{"loop", "use loop filter", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_LOOP_FILTER }, INT_MIN, INT_MAX, V|E, "flags"},
-{"qscale", "use fixed qscale", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_QSCALE }, INT_MIN, INT_MAX, 0, "flags"},
+{"unaligned", "allow decoders to produce unaligned output", 0, AV_OPT_TYPE_CONST, { .i64 = AV_CODEC_FLAG_UNALIGNED }, INT_MIN, INT_MAX, V | D, "flags" },
+{"mv4", "use four motion vectors per macroblock (MPEG-4)", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_4MV }, INT_MIN, INT_MAX, V|E, "flags"},
+{"qpel", "use 1/4-pel motion compensation", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_QPEL }, INT_MIN, INT_MAX, V|E, "flags"},
+{"loop", "use loop filter", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_LOOP_FILTER }, INT_MIN, INT_MAX, V|E, "flags"},
+{"qscale", "use fixed qscale", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_QSCALE }, INT_MIN, INT_MAX, 0, "flags"},
 #if FF_API_GMC
 {"gmc", "use gmc", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_GMC }, INT_MIN, INT_MAX, V|E, "flags"},
 #endif
@@ -63,37 +63,38 @@ static const AVOption avcodec_options[] = {
 #if FF_API_INPUT_PRESERVED
 {"input_preserved", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_INPUT_PRESERVED }, INT_MIN, INT_MAX, 0, "flags"},
 #endif
-{"pass1", "use internal 2-pass ratecontrol in first  pass mode", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_PASS1 }, INT_MIN, INT_MAX, 0, "flags"},
-{"pass2", "use internal 2-pass ratecontrol in second pass mode", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_PASS2 }, INT_MIN, INT_MAX, 0, "flags"},
-{"gray", "only decode/encode grayscale", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_GRAY }, INT_MIN, INT_MAX, V|E|D, "flags"},
+{"pass1", "use internal 2-pass ratecontrol in first  pass mode", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PASS1 }, INT_MIN, INT_MAX, 0, "flags"},
+{"pass2", "use internal 2-pass ratecontrol in second pass mode", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PASS2 }, INT_MIN, INT_MAX, 0, "flags"},
+{"gray", "only decode/encode grayscale", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_GRAY }, INT_MIN, INT_MAX, V|E|D, "flags"},
 #if FF_API_EMU_EDGE
 {"emu_edge", "do not draw edges", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_EMU_EDGE }, INT_MIN, INT_MAX, 0, "flags"},
 #endif
-{"psnr", "error[?] variables will be set during encoding", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_PSNR }, INT_MIN, INT_MAX, V|E, "flags"},
-{"truncated", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_TRUNCATED }, INT_MIN, INT_MAX, 0, "flags"},
+{"psnr", "error[?] variables will be set during encoding", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_PSNR }, INT_MIN, INT_MAX, V|E, "flags"},
+{"truncated", "Input bitstream might be randomly truncated", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_TRUNCATED }, INT_MIN, INT_MAX, V|D, "flags"},
 #if FF_API_NORMALIZE_AQP
 {"naq", "normalize adaptive quantization", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_NORMALIZE_AQP }, INT_MIN, INT_MAX, V|E, "flags"},
 #endif
-{"ildct", "use interlaced DCT", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_INTERLACED_DCT }, INT_MIN, INT_MAX, V|E, "flags"},
-{"low_delay", "force low delay", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_LOW_DELAY }, INT_MIN, INT_MAX, V|D|E, "flags"},
-{"global_header", "place global headers in extradata instead of every keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_GLOBAL_HEADER }, INT_MIN, INT_MAX, V|A|E, "flags"},
-{"bitexact", "use only bitexact functions (except (I)DCT)", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_BITEXACT }, INT_MIN, INT_MAX, A|V|S|D|E, "flags"},
-{"aic", "H.263 advanced intra coding / MPEG-4 AC prediction", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_AC_PRED }, INT_MIN, INT_MAX, V|E, "flags"},
-{"ilme", "interlaced motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_INTERLACED_ME }, INT_MIN, INT_MAX, V|E, "flags"},
-{"cgop", "closed GOP", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_CLOSED_GOP }, INT_MIN, INT_MAX, V|E, "flags"},
-{"output_corrupt", "Output even potentially corrupted frames", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_OUTPUT_CORRUPT }, INT_MIN, INT_MAX, V|D, "flags"},
-{"fast", "allow non-spec-compliant speedup tricks", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_FAST }, INT_MIN, INT_MAX, V|E, "flags2"},
-{"noout", "skip bitstream encoding", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_NO_OUTPUT }, INT_MIN, INT_MAX, V|E, "flags2"},
-{"ignorecrop", "ignore cropping information from sps", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_IGNORE_CROP }, INT_MIN, INT_MAX, V|D, "flags2"},
-{"local_header", "place global headers at every keyframe instead of in extradata", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_LOCAL_HEADER }, INT_MIN, INT_MAX, V|E, "flags2"},
-{"chunks", "Frame data might be split into multiple chunks", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_CHUNKS }, INT_MIN, INT_MAX, V|D, "flags2"},
-{"showall", "Show all frames before the first keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_SHOW_ALL }, INT_MIN, INT_MAX, V|D, "flags2"},
-{"export_mvs", "export motion vectors through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_EXPORT_MVS}, INT_MIN, INT_MAX, V|D, "flags2"},
-{"skip_manual", "do not skip samples and export skip information as frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_SKIP_MANUAL}, INT_MIN, INT_MAX, V|D, "flags2"},
+{"ildct", "use interlaced DCT", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_INTERLACED_DCT }, INT_MIN, INT_MAX, V|E, "flags"},
+{"low_delay", "force low delay", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_LOW_DELAY }, INT_MIN, INT_MAX, V|D|E, "flags"},
+{"global_header", "place global headers in extradata instead of every keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_GLOBAL_HEADER }, INT_MIN, INT_MAX, V|A|E, "flags"},
+{"bitexact", "use only bitexact functions (except (I)DCT)", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_BITEXACT }, INT_MIN, INT_MAX, A|V|S|D|E, "flags"},
+{"aic", "H.263 advanced intra coding / MPEG-4 AC prediction", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_AC_PRED }, INT_MIN, INT_MAX, V|E, "flags"},
+{"ilme", "interlaced motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_INTERLACED_ME }, INT_MIN, INT_MAX, V|E, "flags"},
+{"cgop", "closed GOP", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_CLOSED_GOP }, INT_MIN, INT_MAX, V|E, "flags"},
+{"output_corrupt", "Output even potentially corrupted frames", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG_OUTPUT_CORRUPT }, INT_MIN, INT_MAX, V|D, "flags"},
+{"fast", "allow non-spec-compliant speedup tricks", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_FAST }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"noout", "skip bitstream encoding", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_NO_OUTPUT }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"ignorecrop", "ignore cropping information from sps", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_IGNORE_CROP }, INT_MIN, INT_MAX, V|D, "flags2"},
+{"local_header", "place global headers at every keyframe instead of in extradata", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_LOCAL_HEADER }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"chunks", "Frame data might be split into multiple chunks", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_CHUNKS }, INT_MIN, INT_MAX, V|D, "flags2"},
+{"showall", "Show all frames before the first keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_SHOW_ALL }, INT_MIN, INT_MAX, V|D, "flags2"},
+{"export_mvs", "export motion vectors through frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_EXPORT_MVS}, INT_MIN, INT_MAX, V|D, "flags2"},
+{"skip_manual", "do not skip samples and export skip information as frame side data", 0, AV_OPT_TYPE_CONST, {.i64 = AV_CODEC_FLAG2_SKIP_MANUAL}, INT_MIN, INT_MAX, V|D, "flags2"},
+#if FF_API_MOTION_EST
 {"me_method", "set motion estimation method", OFFSET(me_method), AV_OPT_TYPE_INT, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method"},
 {"zero", "zero motion estimation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_ZERO }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"full", "full motion estimation (slowest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" },
-{"epzs", "EPZS motion estimation (default)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"epzs", "EPZS motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"esa", "esa motion estimation (alias for full)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"tesa", "tesa motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_TESA }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"dia", "diamond motion estimation (alias for EPZS)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method" },
@@ -103,6 +104,7 @@ static const AVOption avcodec_options[] = {
 {"hex", "hex motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_HEX }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"umh", "umh motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_UMH }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"iter", "iter motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_ITER }, INT_MIN, INT_MAX, V|E, "me_method" },
+#endif
 {"time_base", NULL, OFFSET(time_base), AV_OPT_TYPE_RATIONAL, {.dbl = 0}, INT_MIN, INT_MAX},
 {"g", "set the group of picture (GOP) size", OFFSET(gop_size), AV_OPT_TYPE_INT, {.i64 = 12 }, INT_MIN, INT_MAX, V|E},
 {"ar", "set audio sampling rate (in Hz)", OFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, A|D|E},
@@ -120,9 +122,14 @@ static const AVOption avcodec_options[] = {
 {"qdiff", "maximum difference between the quantizer scales (VBR)", OFFSET(max_qdiff), AV_OPT_TYPE_INT, {.i64 = 3 }, INT_MIN, INT_MAX, V|E},
 {"bf", "set maximum number of B frames between non-B-frames", OFFSET(max_b_frames), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, -1, INT_MAX, V|E},
 {"b_qfactor", "QP factor between P- and B-frames", OFFSET(b_quant_factor), AV_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E},
+#if FF_API_RC_STRATEGY
 {"rc_strategy", "ratecontrol method", OFFSET(rc_strategy), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+#endif
+#if FF_API_PRIVATE_OPT
 {"b_strategy", "strategy to choose between I/P/B-frames", OFFSET(b_frame_strategy), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, V|E},
 {"ps", "RTP payload size in bytes", OFFSET(rtp_payload_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+#endif
+#if FF_API_STAT_BITS
 {"mv_bits", NULL, OFFSET(mv_bits), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"header_bits", NULL, OFFSET(header_bits), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"i_tex_bits", NULL, OFFSET(i_tex_bits), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
@@ -132,6 +139,7 @@ static const AVOption avcodec_options[] = {
 {"skip_count", NULL, OFFSET(skip_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"misc_bits", NULL, OFFSET(misc_bits), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"frame_bits", NULL, OFFSET(frame_bits), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
+#endif
 {"codec_tag", NULL, OFFSET(codec_tag), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"bug", "work around not autodetected encoder bugs", OFFSET(workaround_bugs), AV_OPT_TYPE_FLAGS, {.i64 = FF_BUG_AUTODETECT }, INT_MIN, INT_MAX, V|D, "bug"},
 {"autodetect", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_BUG_AUTODETECT }, INT_MIN, INT_MAX, V|D, "bug"},
@@ -172,7 +180,9 @@ static const AVOption avcodec_options[] = {
 {"aggressive", "consider things that a sane encoder should not do as an error", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_AGGRESSIVE }, INT_MIN, INT_MAX, A|V|D, "err_detect"},
 {"has_b_frames", NULL, OFFSET(has_b_frames), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"block_align", NULL, OFFSET(block_align), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
+#if FF_API_PRIVATE_OPT
 {"mpeg_quant", "use MPEG quantizers instead of H.263", OFFSET(mpeg_quant), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+#endif
 #if FF_API_MPV_OPT
 {"qsquish", "deprecated, use encoder private options instead", OFFSET(rc_qsquish), AV_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, 0, 99, V|E},
 {"rc_qmod_amp",  "deprecated, use encoder private options instead", OFFSET(rc_qmod_amp), AV_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, -FLT_MAX, FLT_MAX, V|E},
@@ -182,9 +192,9 @@ static const AVOption avcodec_options[] = {
 #if FF_API_MPV_OPT
 {"rc_eq", "deprecated, use encoder private options instead", OFFSET(rc_eq), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, V|E},
 #endif
-{"maxrate", "maximum bitrate (in bits/s). Used for VBV together with bufsize.", OFFSET(rc_max_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|A|E},
+{"maxrate", "maximum bitrate (in bits/s). Used for VBV together with bufsize.", OFFSET(rc_max_rate), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, 0, INT_MAX, V|A|E},
 {"minrate", "minimum bitrate (in bits/s). Most useful in setting up a CBR encode. It is of little use otherwise.",
-            OFFSET(rc_min_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
+            OFFSET(rc_min_rate), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
 {"bufsize", "set ratecontrol buffer size (in bits)", OFFSET(rc_buffer_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, A|V|E},
 #if FF_API_MPV_OPT
 {"rc_buf_aggressivity", "deprecated, use encoder private options instead", OFFSET(rc_buffer_aggressivity), AV_OPT_TYPE_FLOAT, {.dbl = 1.0 }, -FLT_MAX, FLT_MAX, V|E},
@@ -195,11 +205,9 @@ static const AVOption avcodec_options[] = {
 {"rc_init_cplx", "deprecated, use encoder private options instead", OFFSET(rc_initial_cplx), AV_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, -FLT_MAX, FLT_MAX, V|E},
 #endif
 {"dct", "DCT algorithm", OFFSET(dct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E, "dct"},
-{"auto", "autoselect a good one (default)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_AUTO }, INT_MIN, INT_MAX, V|E, "dct"},
+{"auto", "autoselect a good one", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_AUTO }, INT_MIN, INT_MAX, V|E, "dct"},
 {"fastint", "fast integer", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FASTINT }, INT_MIN, INT_MAX, V|E, "dct"},
-#if FF_API_UNUSED_MEMBERS
 {"int", "accurate integer", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_INT }, INT_MIN, INT_MAX, V|E, "dct"},
-#endif /* FF_API_UNUSED_MEMBERS */
 {"mmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_MMX }, INT_MIN, INT_MAX, V|E, "dct"},
 {"altivec", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_ALTIVEC }, INT_MIN, INT_MAX, V|E, "dct"},
 {"faan", "floating point AAN DCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FAAN }, INT_MIN, INT_MAX, V|E, "dct"},
@@ -238,10 +246,12 @@ static const AVOption avcodec_options[] = {
 {"deblock", "use strong deblock filter for damaged MBs", 0, AV_OPT_TYPE_CONST, {.i64 = FF_EC_DEBLOCK }, INT_MIN, INT_MAX, V|D, "ec"},
 {"favor_inter", "favor predicting from the previous frame", 0, AV_OPT_TYPE_CONST, {.i64 = FF_EC_FAVOR_INTER }, INT_MIN, INT_MAX, V|D, "ec"},
 {"bits_per_coded_sample", NULL, OFFSET(bits_per_coded_sample), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
+#if FF_API_PRIVATE_OPT
 {"pred", "prediction method", OFFSET(prediction_method), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "pred"},
 {"left", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PRED_LEFT }, INT_MIN, INT_MAX, V|E, "pred"},
 {"plane", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PRED_PLANE }, INT_MIN, INT_MAX, V|E, "pred"},
 {"median", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PRED_MEDIAN }, INT_MIN, INT_MAX, V|E, "pred"},
+#endif
 {"aspect", "sample aspect ratio", OFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, 10, V|E},
 {"debug", "print specific debug info", OFFSET(debug), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 0, INT_MAX, V|A|S|E|D, "debug"},
 {"pict", "picture info", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_PICT_INFO }, INT_MIN, INT_MAX, V|D, "debug"},
@@ -253,6 +263,7 @@ static const AVOption avcodec_options[] = {
 {"mv", "motion vector", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_MV }, INT_MIN, INT_MAX, V|D, "debug"},
 #endif
 {"dct_coeff", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_DCT_COEFF }, INT_MIN, INT_MAX, V|D, "debug"},
+{"green_metadata", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_GREEN_MD }, INT_MIN, INT_MAX, V|D, "debug"},
 {"skip", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_SKIP }, INT_MIN, INT_MAX, V|D, "debug"},
 {"startcode", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_STARTCODE }, INT_MIN, INT_MAX, V|D, "debug"},
 #if FF_API_UNUSED_MEMBERS
@@ -278,9 +289,11 @@ static const AVOption avcodec_options[] = {
 {"ildctcmp", "interlaced DCT compare function", OFFSET(ildct_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"dia_size", "diamond type & size for motion estimation", OFFSET(dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"last_pred", "amount of motion predictors from the previous frame", OFFSET(last_predictor_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+#if FF_API_PRIVATE_OPT
 {"preme", "pre motion estimation", OFFSET(pre_me), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+#endif
 {"precmp", "pre motion estimation compare function", OFFSET(me_pre_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
-{"sad", "sum of absolute differences, fast (default)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"sad", "sum of absolute differences, fast", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"sse", "sum of squared errors", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"satd", "sum of absolute Hadamard transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SATD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"dct", "sum of absolute DCT transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
@@ -299,11 +312,16 @@ static const AVOption avcodec_options[] = {
 {"chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"pre_dia_size", "diamond type & size for motion estimation pre-pass", OFFSET(pre_dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"subq", "sub-pel motion estimation quality", OFFSET(me_subpel_quality), AV_OPT_TYPE_INT, {.i64 = 8 }, INT_MIN, INT_MAX, V|E},
+#if FF_API_AFD
 {"dtg_active_format", NULL, OFFSET(dtg_active_format), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
+#endif
 {"me_range", "limit motion vectors range (1023 for DivX player)", OFFSET(me_range), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+#if FF_API_QUANT_BIAS
 {"ibias", "intra quant bias", OFFSET(intra_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, V|E},
 {"pbias", "inter quant bias", OFFSET(inter_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, V|E},
+#endif
 {"global_quality", NULL, OFFSET(global_quality), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
+#if FF_API_CODER_TYPE
 {"coder", NULL, OFFSET(coder_type), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "coder"},
 {"vlc", "variable length coder / Huffman coder", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CODER_TYPE_VLC }, INT_MIN, INT_MAX, V|E, "coder"},
 {"ac", "arithmetic coder", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CODER_TYPE_AC }, INT_MIN, INT_MAX, V|E, "coder"},
@@ -312,30 +330,37 @@ static const AVOption avcodec_options[] = {
 #if FF_API_UNUSED_MEMBERS
 {"deflate", "deflate-based coder", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CODER_TYPE_DEFLATE }, INT_MIN, INT_MAX, V|E, "coder"},
 #endif /* FF_API_UNUSED_MEMBERS */
+#endif /* FF_API_CODER_TYPE */
+#if FF_API_PRIVATE_OPT
 {"context", "context model", OFFSET(context_model), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+#endif
 {"slice_flags", NULL, OFFSET(slice_flags), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 #if FF_API_XVMC
 {"xvmc_acceleration", NULL, OFFSET(xvmc_acceleration), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 #endif /* FF_API_XVMC */
 {"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, 2, V|E, "mbd"},
-{"simple", "use mbcmp (default)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_SIMPLE }, INT_MIN, INT_MAX, V|E, "mbd"},
+{"simple", "use mbcmp", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_SIMPLE }, INT_MIN, INT_MAX, V|E, "mbd"},
 {"bits", "use fewest bits", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_BITS }, INT_MIN, INT_MAX, V|E, "mbd"},
 {"rd", "use best rate distortion", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_RD }, INT_MIN, INT_MAX, V|E, "mbd"},
 #if FF_API_STREAM_CODEC_TAG
 {"stream_codec_tag", NULL, OFFSET(stream_codec_tag), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 #endif
+#if FF_API_PRIVATE_OPT
 {"sc_threshold", "scene change threshold", OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+#endif
 #if FF_API_MPV_OPT
 {"lmin", "deprecated, use encoder private options instead", OFFSET(lmin), AV_OPT_TYPE_INT, {.i64 =  0 }, 0, INT_MAX, V|E},
 {"lmax", "deprecated, use encoder private options instead", OFFSET(lmax), AV_OPT_TYPE_INT, {.i64 =  0 }, 0, INT_MAX, V|E},
 #endif
+#if FF_API_PRIVATE_OPT
 {"nr", "noise reduction", OFFSET(noise_reduction), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+#endif
 {"rc_init_occupancy", "number of bits which should be loaded into the rc buffer before decoding starts", OFFSET(rc_initial_buffer_occupancy), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"flags2", NULL, OFFSET(flags2), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT}, 0, UINT_MAX, V|A|E|D, "flags2"},
 #if FF_API_ERROR_RATE
 {"error", NULL, OFFSET(error_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 #endif
-{"threads", NULL, OFFSET(thread_count), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, INT_MAX, V|A|E|D, "threads"},
+{"threads", "set the number of threads", OFFSET(thread_count), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, INT_MAX, V|A|E|D, "threads"},
 {"auto", "autodetect a suitable number of threads to use", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, V|E|D, "threads"},
 #if FF_API_MPV_OPT
 {"me_threshold", "motion estimation threshold", OFFSET(me_threshold), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
@@ -369,16 +394,20 @@ static const AVOption avcodec_options[] = {
 {"level", NULL, OFFSET(level), AV_OPT_TYPE_INT, {.i64 = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"},
 {"unknown", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"},
 {"lowres", "decode at 1= 1/2, 2=1/4, 3=1/8 resolutions", OFFSET(lowres), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, V|A|D},
+#if FF_API_PRIVATE_OPT
 {"skip_threshold", "frame skip threshold", OFFSET(frame_skip_threshold), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"skip_factor", "frame skip factor", OFFSET(frame_skip_factor), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"skip_exp", "frame skip exponent", OFFSET(frame_skip_exp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"skipcmp", "frame skip compare function", OFFSET(frame_skip_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+#endif
 #if FF_API_MPV_OPT
 {"border_mask", "deprecated, use encoder private options instead", OFFSET(border_masking), AV_OPT_TYPE_FLOAT, {.dbl = DEFAULT }, -FLT_MAX, FLT_MAX, V|E},
 #endif
 {"mblmin", "minimum macroblock Lagrange factor (VBR)", OFFSET(mb_lmin), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 2 }, 1, FF_LAMBDA_MAX, V|E},
 {"mblmax", "maximum macroblock Lagrange factor (VBR)", OFFSET(mb_lmax), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 31 }, 1, FF_LAMBDA_MAX, V|E},
+#if FF_API_PRIVATE_OPT
 {"mepc", "motion estimation bitrate penalty compensation (1.0 = 256)", OFFSET(me_penalty_compensation), AV_OPT_TYPE_INT, {.i64 = 256 }, INT_MIN, INT_MAX, V|E},
+#endif
 {"skip_loop_filter", "skip loop filtering process for the selected frames", OFFSET(skip_loop_filter), AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
 {"skip_idct"       , "skip IDCT/dequantization for the selected frames",    OFFSET(skip_idct),        AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
 {"skip_frame"      , "skip decoding for the selected frames",               OFFSET(skip_frame),       AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
@@ -390,22 +419,27 @@ static const AVOption avcodec_options[] = {
 {"nointra"         , "discard all frames except I frames",  0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONINTRA}, INT_MIN, INT_MAX, V|D, "avdiscard"},
 {"all"             , "discard all frames",                  0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_ALL     }, INT_MIN, INT_MAX, V|D, "avdiscard"},
 {"bidir_refine", "refine the two motion vectors used in bidirectional macroblocks", OFFSET(bidir_refine), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, 4, V|E},
+#if FF_API_PRIVATE_OPT
 {"brd_scale", "downscale frames for dynamic B-frame decision", OFFSET(brd_scale), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, 10, V|E},
+#endif
 {"keyint_min", "minimum interval between IDR-frames", OFFSET(keyint_min), AV_OPT_TYPE_INT, {.i64 = 25 }, INT_MIN, INT_MAX, V|E},
 {"refs", "reference frames to consider for motion compensation", OFFSET(refs), AV_OPT_TYPE_INT, {.i64 = 1 }, INT_MIN, INT_MAX, V|E},
+#if FF_API_PRIVATE_OPT
 {"chromaoffset", "chroma QP offset from luma", OFFSET(chromaoffset), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
+#endif
 {"trellis", "rate-distortion optimal quantization", OFFSET(trellis), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
 #if FF_API_UNUSED_MEMBERS
 {"sc_factor", "multiplied by qscale for each frame and added to scene_change_score", OFFSET(scenechange_factor), AV_OPT_TYPE_INT, {.i64 = 6 }, 0, INT_MAX, V|E},
 #endif /* FF_API_UNUSED_MEMBERS */
 {"mv0_threshold", NULL, OFFSET(mv0_threshold), AV_OPT_TYPE_INT, {.i64 = 256 }, 0, INT_MAX, V|E},
+#if FF_API_PRIVATE_OPT
 {"b_sensitivity", "adjust sensitivity of b_frame_strategy 1", OFFSET(b_sensitivity), AV_OPT_TYPE_INT, {.i64 = 40 }, 1, INT_MAX, V|E},
+#endif
 {"compression_level", NULL, OFFSET(compression_level), AV_OPT_TYPE_INT, {.i64 = FF_COMPRESSION_DEFAULT }, INT_MIN, INT_MAX, V|A|E},
+#if FF_API_PRIVATE_OPT
 {"min_prediction_order", NULL, OFFSET(min_prediction_order), AV_OPT_TYPE_INT, {.i64 = -1 }, INT_MIN, INT_MAX, A|E},
 {"max_prediction_order", NULL, OFFSET(max_prediction_order), AV_OPT_TYPE_INT, {.i64 = -1 }, INT_MIN, INT_MAX, A|E},
 {"timecode_frame_start", "GOP timecode frame start number, in non-drop-frame format", OFFSET(timecode_frame_start), AV_OPT_TYPE_INT64, {.i64 = -1 }, -1, INT64_MAX, V|E},
-#if FF_API_REQUEST_CHANNELS
-{"request_channels", "set desired number of audio channels", OFFSET(request_channels), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, A|D},
 #endif
 {"bits_per_raw_sample", NULL, OFFSET(bits_per_raw_sample), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"channel_layout", NULL, OFFSET(channel_layout), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, 0, INT64_MAX, A|E|D, "channel_layout"},
@@ -414,14 +448,15 @@ static const AVOption avcodec_options[] = {
 {"rc_min_vbv_use", NULL, OFFSET(rc_min_vbv_overflow_use),  AV_OPT_TYPE_FLOAT, {.dbl = 3 },     0.0, FLT_MAX, V|E},
 {"ticks_per_frame", NULL, OFFSET(ticks_per_frame), AV_OPT_TYPE_INT, {.i64 = 1 }, 1, INT_MAX, A|V|E|D},
 {"color_primaries", "color primaries", OFFSET(color_primaries), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_UNSPECIFIED }, 1, AVCOL_PRI_NB-1, V|E|D, "color_primaries_type"},
-{"bt709",       "BT.709",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709 },       INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
-{"unspecified", "Unspecified", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_UNSPECIFIED }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
-{"bt470m",      "BT.470 M",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT470M },      INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
-{"bt470bg",     "BT.470 BG",   0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT470BG },     INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
-{"smpte170m",   "SMPTE 170 M", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE170M },   INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
-{"smpte240m",   "SMPTE 240 M", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE240M },   INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
-{"film",        "Film",        0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_FILM },        INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
-{"bt2020",      "BT.2020",     0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020 },      INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"bt709",       "BT.709",         0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT709 },        INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"unspecified", "Unspecified",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_UNSPECIFIED },  INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"bt470m",      "BT.470 M",       0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT470M },       INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"bt470bg",     "BT.470 BG",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT470BG },      INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"smpte170m",   "SMPTE 170 M",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE170M },    INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"smpte240m",   "SMPTE 240 M",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTE240M },    INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"film",        "Film",           0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_FILM },         INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"bt2020",      "BT.2020",        0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_BT2020 },       INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
+{"smpte428_1",  "SMPTE ST 428-1", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_PRI_SMPTEST428_1 }, INT_MIN, INT_MAX, V|E|D, "color_primaries_type"},
 {"color_trc", "color transfer characteristics", OFFSET(color_trc), AV_OPT_TYPE_INT, {.i64 = AVCOL_TRC_UNSPECIFIED }, 1, AVCOL_TRC_NB-1, V|E|D, "color_trc_type"},
 {"bt709",        "BT.709",           0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT709 },        INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
 {"unspecified",  "Unspecified",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_UNSPECIFIED },  INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
@@ -437,6 +472,8 @@ static const AVOption avcodec_options[] = {
 {"iec61966_2_1", "IEC 61966-2-1",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_IEC61966_2_1 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
 {"bt2020_10bit", "BT.2020 - 10 bit", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_10 },    INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
 {"bt2020_12bit", "BT.2020 - 12 bit", 0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_BT2020_12 },    INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"smpte2084",    "SMPTE ST 2084",    0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST2084 },  INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
+{"smpte428_1",   "SMPTE ST 428-1",   0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_TRC_SMPTEST428_1 }, INT_MIN, INT_MAX, V|E|D, "color_trc_type"},
 {"colorspace", "color space", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = AVCOL_SPC_UNSPECIFIED }, 0, AVCOL_SPC_NB-1, V|E|D, "colorspace_type"},
 {"rgb",         "RGB",         0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_RGB },         INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
 {"bt709",       "BT.709",      0, AV_OPT_TYPE_CONST, {.i64 = AVCOL_SPC_BT709 },       INT_MIN, INT_MAX, V|E|D, "colorspace_type"},
@@ -461,7 +498,7 @@ static const AVOption avcodec_options[] = {
 {"bottomleft",  "Bottom-left", 0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_BOTTOMLEFT },  INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"},
 {"bottom",      "Bottom",      0, AV_OPT_TYPE_CONST, {.i64 = AVCHROMA_LOC_BOTTOM },      INT_MIN, INT_MAX, V|E|D, "chroma_sample_location_type"},
 {"log_level_offset", "set the log level offset", OFFSET(log_level_offset), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX },
-{"slices", "number of slices, used in parallelized encoding", OFFSET(slices), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, V|E},
+{"slices", "set the number of slices, used in parallelized encoding", OFFSET(slices), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, V|E},
 {"thread_type", "select multithreading type", OFFSET(thread_type), AV_OPT_TYPE_FLAGS, {.i64 = FF_THREAD_SLICE|FF_THREAD_FRAME }, 0, INT_MAX, V|A|E|D, "thread_type"},
 {"slice", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_THREAD_SLICE }, INT_MIN, INT_MAX, V|E|D, "thread_type"},
 {"frame", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_THREAD_FRAME }, INT_MIN, INT_MAX, V|E|D, "thread_type"},
@@ -482,9 +519,11 @@ static const AVOption avcodec_options[] = {
 {"do_nothing",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_DO_NOTHING},  INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 {"auto",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC},   INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 {"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
-{"refcounted_frames", NULL, OFFSET(refcounted_frames), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, A|V|D },
-{"side_data_only_packets", NULL, OFFSET(side_data_only_packets), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, A|V|E },
-{"skip_alpha", "Skip processing alpha", OFFSET(skip_alpha), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, V|D },
+{"refcounted_frames", NULL, OFFSET(refcounted_frames), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, A|V|D },
+#if FF_API_SIDEDATA_ONLY_PKT
+{"side_data_only_packets", NULL, OFFSET(side_data_only_packets), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, A|V|E },
+#endif
+{"skip_alpha", "Skip processing alpha", OFFSET(skip_alpha), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, V|D },
 {"field_order", "Field order", OFFSET(field_order), AV_OPT_TYPE_INT, {.i64 = AV_FIELD_UNKNOWN }, 0, 5, V|D|E, "field_order" },
 {"progressive", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_PROGRESSIVE }, 0, 0, V|D|E, "field_order" },
 {"tt", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_TT }, 0, 0, V|D|E, "field_order" },
diff --git a/libavcodec/opus.c b/libavcodec/opus.c
index 6b3d3c31..f2b8ecc4 100644
--- a/libavcodec/opus.c
+++ b/libavcodec/opus.c
@@ -27,6 +27,7 @@
 #include <stdint.h>
 
 #include "libavutil/error.h"
+#include "libavutil/internal.h"
 
 #include "opus.h"
 #include "vorbis.h"
@@ -333,7 +334,7 @@ av_cold int ff_opus_parse_extradata(AVCodecContext *avctx,
 
     s->gain_i = AV_RL16(extradata + 16);
     if (s->gain_i)
-        s->gain = pow(10, s->gain_i / (20.0 * 256));
+        s->gain = ff_exp10(s->gain_i / (20.0 * 256));
 
     map_type = extradata[18];
     if (!map_type) {
diff --git a/libavcodec/opus.h b/libavcodec/opus.h
index 92bb28a3..3a7ea9f5 100644
--- a/libavcodec/opus.h
+++ b/libavcodec/opus.h
@@ -173,6 +173,16 @@ typedef struct ChannelMap {
 
 typedef struct OpusContext {
     OpusStreamContext *streams;
+
+    /* current output buffers for each streams */
+    float **out;
+    int   *out_size;
+    /* Buffers for synchronizing the streams when they have different
+     * resampling delays */
+    AVAudioFifo **sync_buffers;
+    /* number of decoded samples for each stream */
+    int         *decoded_samples;
+
     int             nb_streams;
     int      nb_stereo_streams;
 
diff --git a/libavcodec/opus_celt.c b/libavcodec/opus_celt.c
index 42623d91..61a9dc61 100644
--- a/libavcodec/opus_celt.c
+++ b/libavcodec/opus_celt.c
@@ -27,6 +27,7 @@
 #include <stdint.h>
 
 #include "libavutil/float_dsp.h"
+#include "libavutil/libm.h"
 
 #include "imdct15.h"
 #include "opus.h"
@@ -1677,7 +1678,7 @@ static void celt_denormalize(CeltContext *s, CeltFrame *frame, float *data)
 
     for (i = s->startband; i < s->endband; i++) {
         float *dst = data + (celt_freq_bands[i] << s->duration);
-        float norm = pow(2, frame->energy[i] + celt_mean_energy[i]);
+        float norm = exp2(frame->energy[i] + celt_mean_energy[i]);
 
         for (j = 0; j < celt_freq_range[i] << s->duration; j++)
             dst[j] *= norm;
@@ -1839,7 +1840,7 @@ static void process_anticollapse(CeltContext *s, CeltFrame *frame, float *X)
 
         /* depth in 1/8 bits */
         depth = (1 + s->pulses[i]) / (celt_freq_range[i] << s->duration);
-        thresh = pow(2, -1.0 - 0.125f * depth);
+        thresh = exp2f(-1.0 - 0.125f * depth);
         sqrt_1 = 1.0f / sqrtf(celt_freq_range[i] << s->duration);
 
         xptr = X + (celt_freq_bands[i] << s->duration);
@@ -1857,7 +1858,7 @@ static void process_anticollapse(CeltContext *s, CeltFrame *frame, float *X)
 
         /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
         short blocks don't have the same energy as long */
-        r = pow(2, 1 - Ediff);
+        r = exp2(1 - Ediff);
         if (s->duration == 3)
             r *= M_SQRT2;
         r = FFMIN(thresh, r) * sqrt_1;
@@ -2209,7 +2210,7 @@ int ff_celt_init(AVCodecContext *avctx, CeltContext **ps, int output_channels)
             goto fail;
     }
 
-    s->dsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->dsp) {
         ret = AVERROR(ENOMEM);
         goto fail;
diff --git a/libavcodec/opus_silk.c b/libavcodec/opus_silk.c
index 841d1ed2..73526f98 100644
--- a/libavcodec/opus_silk.c
+++ b/libavcodec/opus_silk.c
@@ -824,7 +824,7 @@ static inline void silk_stabilize_lsf(int16_t nlsf[16], int order, const uint16_
 
             /* upper extent */
             for (i = order; i > k; i--)
-                max_center -= min_delta[k];
+                max_center -= min_delta[i];
             max_center -= min_delta[k] >> 1;
 
             /* move apart */
diff --git a/libavcodec/opusdec.c b/libavcodec/opusdec.c
index 2ee3f2ad..95a2435e 100644
--- a/libavcodec/opusdec.c
+++ b/libavcodec/opusdec.c
@@ -364,12 +364,17 @@ static int opus_decode_frame(OpusStreamContext *s, const uint8_t *data, int size
 
 static int opus_decode_subpacket(OpusStreamContext *s,
                                  const uint8_t *buf, int buf_size,
+                                 float **out, int out_size,
                                  int nb_samples)
 {
     int output_samples = 0;
     int flush_needed   = 0;
     int i, j, ret;
 
+    s->out[0]   = out[0];
+    s->out[1]   = out[1];
+    s->out_size = out_size;
+
     /* check if we need to flush the resampler */
     if (swr_is_initialized(s->swr)) {
         if (buf) {
@@ -447,15 +452,17 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
     const uint8_t *buf  = avpkt->data;
     int buf_size        = avpkt->size;
     int coded_samples   = 0;
-    int decoded_samples = 0;
-    int i, ret;
+    int decoded_samples = INT_MAX;
     int delayed_samples = 0;
+    int i, ret;
 
+    /* calculate the number of delayed samples */
     for (i = 0; i < c->nb_streams; i++) {
         OpusStreamContext *s = &c->streams[i];
         s->out[0] =
         s->out[1] = NULL;
-        delayed_samples = FFMAX(delayed_samples, s->delayed_samples);
+        delayed_samples = FFMAX(delayed_samples,
+                                s->delayed_samples + av_audio_fifo_size(c->sync_buffers[i]));
     }
 
     /* decode the header of the first sub-packet to find out the sample count */
@@ -484,14 +491,43 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
         return ret;
     frame->nb_samples = 0;
 
+    memset(c->out, 0, c->nb_streams * 2 * sizeof(*c->out));
     for (i = 0; i < avctx->channels; i++) {
         ChannelMap *map = &c->channel_maps[i];
         if (!map->copy)
-            c->streams[map->stream_idx].out[map->channel_idx] = (float*)frame->extended_data[i];
+            c->out[2 * map->stream_idx + map->channel_idx] = (float*)frame->extended_data[i];
     }
 
-    for (i = 0; i < c->nb_streams; i++)
-        c->streams[i].out_size = frame->linesize[0];
+    /* read the data from the sync buffers */
+    for (i = 0; i < c->nb_streams; i++) {
+        float          **out = c->out + 2 * i;
+        int sync_size = av_audio_fifo_size(c->sync_buffers[i]);
+
+        float sync_dummy[32];
+        int out_dummy = (!out[0]) | ((!out[1]) << 1);
+
+        if (!out[0])
+            out[0] = sync_dummy;
+        if (!out[1])
+            out[1] = sync_dummy;
+        if (out_dummy && sync_size > FF_ARRAY_ELEMS(sync_dummy))
+            return AVERROR_BUG;
+
+        ret = av_audio_fifo_read(c->sync_buffers[i], (void**)out, sync_size);
+        if (ret < 0)
+            return ret;
+
+        if (out_dummy & 1)
+            out[0] = NULL;
+        else
+            out[0] += ret;
+        if (out_dummy & 2)
+            out[1] = NULL;
+        else
+            out[1] += ret;
+
+        c->out_size[i] = frame->linesize[0] - ret * sizeof(float);
+    }
 
     /* decode each sub-packet */
     for (i = 0; i < c->nb_streams; i++) {
@@ -512,20 +548,31 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
             s->silk_samplerate = get_silk_samplerate(s->packet.config);
         }
 
-        ret = opus_decode_subpacket(&c->streams[i], buf,
-                                    s->packet.data_size, coded_samples);
+        ret = opus_decode_subpacket(&c->streams[i], buf, s->packet.data_size,
+                                    c->out + 2 * i, c->out_size[i], coded_samples);
         if (ret < 0)
             return ret;
-        if (decoded_samples && ret != decoded_samples) {
-            av_log(avctx, AV_LOG_ERROR, "Different numbers of decoded samples "
-                   "in a multi-channel stream\n");
-            return AVERROR_INVALIDDATA;
-        }
-        decoded_samples = ret;
+        c->decoded_samples[i] = ret;
+        decoded_samples       = FFMIN(decoded_samples, ret);
+
         buf      += s->packet.packet_size;
         buf_size -= s->packet.packet_size;
     }
 
+    /* buffer the extra samples */
+    for (i = 0; i < c->nb_streams; i++) {
+        int buffer_samples = c->decoded_samples[i] - decoded_samples;
+        if (buffer_samples) {
+            float *buf[2] = { c->out[2 * i + 0] ? c->out[2 * i + 0] : (float*)frame->extended_data[0],
+                              c->out[2 * i + 1] ? c->out[2 * i + 1] : (float*)frame->extended_data[0] };
+            buf[0] += decoded_samples;
+            buf[1] += decoded_samples;
+            ret = av_audio_fifo_write(c->sync_buffers[i], (void**)buf, buffer_samples);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
     for (i = 0; i < avctx->channels; i++) {
         ChannelMap *map = &c->channel_maps[i];
 
@@ -538,7 +585,7 @@ static int opus_decode_packet(AVCodecContext *avctx, void *data,
             memset(frame->extended_data[i], 0, frame->linesize[0]);
         }
 
-        if (c->gain_i) {
+        if (c->gain_i && decoded_samples > 0) {
             c->fdsp->vector_fmul_scalar((float*)frame->extended_data[i],
                                        (float*)frame->extended_data[i],
                                        c->gain, FFALIGN(decoded_samples, 8));
@@ -566,6 +613,8 @@ static av_cold void opus_decode_flush(AVCodecContext *ctx)
             av_audio_fifo_drain(s->celt_delay, av_audio_fifo_size(s->celt_delay));
         swr_close(s->swr);
 
+        av_audio_fifo_drain(c->sync_buffers[i], av_audio_fifo_size(c->sync_buffers[i]));
+
         ff_silk_flush(s->silk);
         ff_celt_flush(s->celt);
     }
@@ -590,6 +639,16 @@ static av_cold int opus_decode_close(AVCodecContext *avctx)
     }
 
     av_freep(&c->streams);
+
+    if (c->sync_buffers) {
+        for (i = 0; i < c->nb_streams; i++)
+            av_audio_fifo_free(c->sync_buffers[i]);
+    }
+    av_freep(&c->sync_buffers);
+    av_freep(&c->decoded_samples);
+    av_freep(&c->out);
+    av_freep(&c->out_size);
+
     c->nb_streams = 0;
 
     av_freep(&c->channel_maps);
@@ -612,12 +671,19 @@ static av_cold int opus_decode_init(AVCodecContext *avctx)
 
     /* find out the channel configuration */
     ret = ff_opus_parse_extradata(avctx, c);
-    if (ret < 0)
+    if (ret < 0) {
+        av_freep(&c->channel_maps);
+        av_freep(&c->fdsp);
         return ret;
+    }
 
     /* allocate and init each independent decoder */
     c->streams = av_mallocz_array(c->nb_streams, sizeof(*c->streams));
-    if (!c->streams) {
+    c->out             = av_mallocz_array(c->nb_streams, 2 * sizeof(*c->out));
+    c->out_size        = av_mallocz_array(c->nb_streams, sizeof(*c->out_size));
+    c->sync_buffers    = av_mallocz_array(c->nb_streams, sizeof(*c->sync_buffers));
+    c->decoded_samples = av_mallocz_array(c->nb_streams, sizeof(*c->decoded_samples));
+    if (!c->streams || !c->sync_buffers || !c->decoded_samples || !c->out || !c->out_size) {
         c->nb_streams = 0;
         ret = AVERROR(ENOMEM);
         goto fail;
@@ -665,6 +731,13 @@ static av_cold int opus_decode_init(AVCodecContext *avctx)
             ret = AVERROR(ENOMEM);
             goto fail;
         }
+
+        c->sync_buffers[i] = av_audio_fifo_alloc(avctx->sample_fmt,
+                                                 s->output_channels, 32);
+        if (!c->sync_buffers[i]) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
     }
 
     return 0;
@@ -683,5 +756,5 @@ AVCodec ff_opus_decoder = {
     .close           = opus_decode_close,
     .decode          = opus_decode_packet,
     .flush           = opus_decode_flush,
-    .capabilities    = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .capabilities    = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
 };
diff --git a/libavcodec/pafaudio.c b/libavcodec/pafaudio.c
index aaaef5e9..12f473ae 100644
--- a/libavcodec/pafaudio.c
+++ b/libavcodec/pafaudio.c
@@ -78,5 +78,5 @@ AVCodec ff_paf_audio_decoder = {
     .id           = AV_CODEC_ID_PAF_AUDIO,
     .init         = paf_audio_init,
     .decode       = paf_audio_decode,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/pafvideo.c b/libavcodec/pafvideo.c
index a27afed5..cab3129f 100644
--- a/libavcodec/pafvideo.c
+++ b/libavcodec/pafvideo.c
@@ -393,5 +393,5 @@ AVCodec ff_paf_video_decoder = {
     .init           = paf_video_init,
     .close          = paf_video_close,
     .decode         = paf_video_decode,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/pamenc.c b/libavcodec/pamenc.c
index 7a51fe62..50c9fcb4 100644
--- a/libavcodec/pamenc.c
+++ b/libavcodec/pamenc.c
@@ -91,7 +91,7 @@ static int pam_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         return -1;
     }
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, n*h + 200)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, n*h + 200, 0)) < 0)
         return ret;
 
     bytestream_start =
@@ -129,29 +129,22 @@ static int pam_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
 static av_cold int pam_encode_init(AVCodecContext *avctx)
 {
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     return 0;
 }
 
-static av_cold int pam_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
-
 AVCodec ff_pam_encoder = {
     .name           = "pam",
     .long_name      = NULL_IF_CONFIG_SMALL("PAM (Portable AnyMap) image"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_PAM,
     .init           = pam_encode_init,
-    .close          = pam_encode_close,
     .encode2        = pam_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
diff --git a/libavcodec/parser.c b/libavcodec/parser.c
index 1ef207f5..d25d261a 100644
--- a/libavcodec/parser.c
+++ b/libavcodec/parser.c
@@ -25,6 +25,7 @@
 
 #include "libavutil/avassert.h"
 #include "libavutil/atomic.h"
+#include "libavutil/internal.h"
 #include "libavutil/mem.h"
 
 #include "internal.h"
@@ -82,7 +83,11 @@ AVCodecParserContext *av_parser_init(int codec_id)
             goto err_out;
     }
     s->key_frame            = -1;
+#if FF_API_CONVERGENCE_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
     s->convergence_duration = 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     s->dts_sync_point       = INT_MIN;
     s->dts_ref_dts_delta    = INT_MIN;
     s->pts_dts_delta        = INT_MIN;
@@ -134,7 +139,14 @@ int av_parser_parse2(AVCodecParserContext *s, AVCodecContext *avctx,
                      int64_t pts, int64_t dts, int64_t pos)
 {
     int index, i;
-    uint8_t dummy_buf[FF_INPUT_BUFFER_PADDING_SIZE];
+    uint8_t dummy_buf[AV_INPUT_BUFFER_PADDING_SIZE];
+
+    /* Parsers only work for the specified codec ids. */
+    av_assert1(avctx->codec_id == s->parser->codec_ids[0] ||
+               avctx->codec_id == s->parser->codec_ids[1] ||
+               avctx->codec_id == s->parser->codec_ids[2] ||
+               avctx->codec_id == s->parser->codec_ids[3] ||
+               avctx->codec_id == s->parser->codec_ids[4]);
 
     if (!(s->flags & PARSER_FLAG_FETCHED_OFFSET)) {
         s->next_frame_offset =
@@ -188,8 +200,8 @@ int av_parser_change(AVCodecParserContext *s, AVCodecContext *avctx,
                      const uint8_t *buf, int buf_size, int keyframe)
 {
     if (s && s->parser->split) {
-        if (avctx->flags  & CODEC_FLAG_GLOBAL_HEADER ||
-            avctx->flags2 & CODEC_FLAG2_LOCAL_HEADER) {
+        if (avctx->flags  & AV_CODEC_FLAG_GLOBAL_HEADER ||
+            avctx->flags2 & AV_CODEC_FLAG2_LOCAL_HEADER) {
             int i = s->parser->split(avctx, buf, buf_size);
             buf      += i;
             buf_size -= i;
@@ -200,17 +212,17 @@ int av_parser_change(AVCodecParserContext *s, AVCodecContext *avctx,
     *poutbuf      = (uint8_t *) buf;
     *poutbuf_size = buf_size;
     if (avctx->extradata) {
-        if (keyframe && (avctx->flags2 & CODEC_FLAG2_LOCAL_HEADER)) {
+        if (keyframe && (avctx->flags2 & AV_CODEC_FLAG2_LOCAL_HEADER)) {
             int size = buf_size + avctx->extradata_size;
 
             *poutbuf_size = size;
-            *poutbuf      = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
+            *poutbuf      = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!*poutbuf)
                 return AVERROR(ENOMEM);
 
             memcpy(*poutbuf, avctx->extradata, avctx->extradata_size);
             memcpy(*poutbuf + avctx->extradata_size, buf,
-                   buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+                   buf_size + AV_INPUT_BUFFER_PADDING_SIZE);
             return 1;
         }
     }
@@ -252,10 +264,10 @@ int ff_combine_frame(ParseContext *pc, int next,
     if (next == END_NOT_FOUND) {
         void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size,
                                            *buf_size + pc->index +
-                                           FF_INPUT_BUFFER_PADDING_SIZE);
+                                           AV_INPUT_BUFFER_PADDING_SIZE);
 
         if (!new_buffer) {
-            av_log(NULL, AV_LOG_ERROR, "Failed to reallocate parser buffer to %d\n", *buf_size + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
+            av_log(NULL, AV_LOG_ERROR, "Failed to reallocate parser buffer to %d\n", *buf_size + pc->index + AV_INPUT_BUFFER_PADDING_SIZE);
             pc->index = 0;
             return AVERROR(ENOMEM);
         }
@@ -272,17 +284,17 @@ int ff_combine_frame(ParseContext *pc, int next,
     if (pc->index) {
         void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size,
                                            next + pc->index +
-                                           FF_INPUT_BUFFER_PADDING_SIZE);
+                                           AV_INPUT_BUFFER_PADDING_SIZE);
         if (!new_buffer) {
-            av_log(NULL, AV_LOG_ERROR, "Failed to reallocate parser buffer to %d\n", next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
+            av_log(NULL, AV_LOG_ERROR, "Failed to reallocate parser buffer to %d\n", next + pc->index + AV_INPUT_BUFFER_PADDING_SIZE);
             pc->overread_index =
             pc->index = 0;
             return AVERROR(ENOMEM);
         }
         pc->buffer = new_buffer;
-        if (next > -FF_INPUT_BUFFER_PADDING_SIZE)
+        if (next > -AV_INPUT_BUFFER_PADDING_SIZE)
             memcpy(&pc->buffer[pc->index], *buf,
-                   next + FF_INPUT_BUFFER_PADDING_SIZE);
+                   next + AV_INPUT_BUFFER_PADDING_SIZE);
         pc->index = 0;
         *buf      = pc->buffer;
     }
diff --git a/libavcodec/pcm-bluray.c b/libavcodec/pcm-bluray.c
index 5819e537..22c1c08b 100644
--- a/libavcodec/pcm-bluray.c
+++ b/libavcodec/pcm-bluray.c
@@ -117,9 +117,9 @@ static int pcm_bluray_parse_header(AVCodecContext *avctx,
 
     if (avctx->debug & FF_DEBUG_PICT_INFO)
         ff_dlog(avctx,
-                "pcm_bluray_parse_header: %d channels, %d bits per sample, %d Hz, %d bit/s\n",
+                "pcm_bluray_parse_header: %d channels, %d bits per sample, %d Hz, %"PRId64" bit/s\n",
                 avctx->channels, avctx->bits_per_coded_sample,
-                avctx->sample_rate, avctx->bit_rate);
+                avctx->sample_rate, (int64_t)avctx->bit_rate);
     return 0;
 }
 
@@ -307,7 +307,7 @@ AVCodec ff_pcm_bluray_decoder = {
     .type           = AVMEDIA_TYPE_AUDIO,
     .id             = AV_CODEC_ID_PCM_BLURAY,
     .decode         = pcm_bluray_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]){
         AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_NONE
     },
diff --git a/libavcodec/pcm-dvd.c b/libavcodec/pcm-dvd.c
index 47d5d68d..a78c05db 100644
--- a/libavcodec/pcm-dvd.c
+++ b/libavcodec/pcm-dvd.c
@@ -73,7 +73,7 @@ static int pcm_dvd_parse_header(AVCodecContext *avctx, const uint8_t *header)
     s->last_header = -1;
 
     if (avctx->debug & FF_DEBUG_PICT_INFO)
-        ff_dlog(avctx, "pcm_dvd_parse_header: header = %02x%02x%02x\n",
+        av_log(avctx, AV_LOG_DEBUG, "pcm_dvd_parse_header: header = %02x%02x%02x\n",
                 header[0], header[1], header[2]);
     /*
      * header[0] emphasis (1), muse(1), reserved(1), frame number(5)
@@ -140,9 +140,9 @@ static int pcm_dvd_parse_header(AVCodecContext *avctx, const uint8_t *header)
 
     if (avctx->debug & FF_DEBUG_PICT_INFO)
         ff_dlog(avctx,
-                "pcm_dvd_parse_header: %d channels, %d bits per sample, %d Hz, %d bit/s\n",
+                "pcm_dvd_parse_header: %d channels, %d bits per sample, %d Hz, %"PRId64" bit/s\n",
                 avctx->channels, avctx->bits_per_coded_sample,
-                avctx->sample_rate, avctx->bit_rate);
+                avctx->sample_rate, (int64_t)avctx->bit_rate);
 
     s->last_header = header_int;
 
@@ -311,7 +311,7 @@ AVCodec ff_pcm_dvd_decoder = {
     .init           = pcm_dvd_decode_init,
     .decode         = pcm_dvd_decode_frame,
     .close          = pcm_dvd_decode_uninit,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) {
         AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_NONE
     }
diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c
index 0a4ad0b9..9a136025 100644
--- a/libavcodec/pcm.c
+++ b/libavcodec/pcm.c
@@ -47,7 +47,7 @@ static av_cold int pcm_encode_init(AVCodecContext *avctx)
 
     avctx->bits_per_coded_sample = av_get_bits_per_sample(avctx->codec->id);
     avctx->block_align           = avctx->channels * avctx->bits_per_coded_sample / 8;
-    avctx->bit_rate              = avctx->block_align * avctx->sample_rate * 8;
+    avctx->bit_rate              = avctx->block_align * 8LL * avctx->sample_rate;
 
     return 0;
 }
@@ -97,7 +97,7 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     n           = frame->nb_samples * avctx->channels;
     samples     = (const short *)frame->data[0];
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, n * sample_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, n * sample_size, n * sample_size)) < 0)
         return ret;
     dst = avpkt->data;
 
@@ -501,7 +501,7 @@ AVCodec ff_ ## name_ ## _encoder = {                                        \
     .id           = AV_CODEC_ID_ ## id_,                                    \
     .init         = pcm_encode_init,                                        \
     .encode2      = pcm_encode_frame,                                       \
-    .capabilities = CODEC_CAP_VARIABLE_FRAME_SIZE,                          \
+    .capabilities = AV_CODEC_CAP_VARIABLE_FRAME_SIZE,                       \
     .sample_fmts  = (const enum AVSampleFormat[]){ sample_fmt_,             \
                                                    AV_SAMPLE_FMT_NONE },    \
 }
@@ -523,7 +523,7 @@ AVCodec ff_ ## name_ ## _decoder = {                                        \
     .priv_data_size = sizeof(PCMDecode),                                    \
     .init           = pcm_decode_init,                                      \
     .decode         = pcm_decode_frame,                                     \
-    .capabilities   = CODEC_CAP_DR1,                                        \
+    .capabilities   = AV_CODEC_CAP_DR1,                                     \
     .sample_fmts    = (const enum AVSampleFormat[]){ sample_fmt_,           \
                                                      AV_SAMPLE_FMT_NONE },  \
 }
diff --git a/libavcodec/pcm_tablegen.h b/libavcodec/pcm_tablegen.h
index 1387210a..72699779 100644
--- a/libavcodec/pcm_tablegen.h
+++ b/libavcodec/pcm_tablegen.h
@@ -87,21 +87,21 @@ static av_cold void build_xlaw_table(uint8_t *linear_to_xlaw,
 {
     int i, j, v, v1, v2;
 
-    j = 0;
-    for(i=0;i<128;i++) {
-        if (i != 127) {
-            v1 = xlaw2linear(i ^ mask);
-            v2 = xlaw2linear((i + 1) ^ mask);
-            v = (v1 + v2 + 4) >> 3;
-        } else {
-            v = 8192;
-        }
-        for(;j<v;j++) {
+    j = 1;
+    linear_to_xlaw[8192] = mask;
+    for(i=0;i<127;i++) {
+        v1 = xlaw2linear(i ^ mask);
+        v2 = xlaw2linear((i + 1) ^ mask);
+        v = (v1 + v2 + 4) >> 3;
+        for(;j<v;j+=1) {
+            linear_to_xlaw[8192 - j] = (i ^ (mask ^ 0x80));
             linear_to_xlaw[8192 + j] = (i ^ mask);
-            if (j > 0)
-                linear_to_xlaw[8192 - j] = (i ^ (mask ^ 0x80));
         }
     }
+    for(;j<8192;j++) {
+        linear_to_xlaw[8192 - j] = (127 ^ (mask ^ 0x80));
+        linear_to_xlaw[8192 + j] = (127 ^ mask);
+    }
     linear_to_xlaw[0] = linear_to_xlaw[1];
 }
 
diff --git a/libavcodec/pcx.c b/libavcodec/pcx.c
index 6487aa57..1d3ee8d9 100644
--- a/libavcodec/pcx.c
+++ b/libavcodec/pcx.c
@@ -143,7 +143,7 @@ static int pcx_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     ptr    = p->data[0];
     stride = p->linesize[0];
 
-    scanline = av_malloc(bytes_per_scanline + FF_INPUT_BUFFER_PADDING_SIZE);
+    scanline = av_malloc(bytes_per_scanline + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!scanline)
         return AVERROR(ENOMEM);
 
@@ -239,5 +239,5 @@ AVCodec ff_pcx_decoder = {
     .type         = AVMEDIA_TYPE_VIDEO,
     .id           = AV_CODEC_ID_PCX,
     .decode       = pcx_decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/pcxenc.c b/libavcodec/pcxenc.c
index a718428d..f0ffedfa 100644
--- a/libavcodec/pcxenc.c
+++ b/libavcodec/pcxenc.c
@@ -35,22 +35,16 @@ static const uint32_t monoblack_pal[16] = { 0x000000, 0xFFFFFF };
 
 static av_cold int pcx_encode_init(AVCodecContext *avctx)
 {
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     return 0;
 }
 
-static av_cold int pcx_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
-
 /**
  * PCX run-length encoder
  * @param dst output buffer
@@ -151,7 +145,7 @@ static int pcx_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     line_bytes = (line_bytes + 1) & ~1;
 
     max_pkt_size = 128 + avctx->height * 2 * line_bytes * nplanes + (pal ? 256*3 + 1 : 0);
-    if ((ret = ff_alloc_packet2(avctx, pkt, max_pkt_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, max_pkt_size, 0)) < 0)
         return ret;
     buf     = pkt->data;
     buf_end = pkt->data + pkt->size;
@@ -216,7 +210,6 @@ AVCodec ff_pcx_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_PCX,
     .init           = pcx_encode_init,
-    .close          = pcx_encode_close,
     .encode2        = pcx_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_RGB24,
diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
index 0d307f53..133d08bf 100644
--- a/libavcodec/pgssubdec.c
+++ b/libavcodec/pgssubdec.c
@@ -33,7 +33,7 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 
-#define RGBA(r,g,b,a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+#define RGBA(r,g,b,a) (((unsigned)(a) << 24) | ((r) << 16) | ((g) << 8) | (b))
 #define MAX_EPOCH_PALETTES 8   // Max 8 allowed per PGS epoch
 #define MAX_EPOCH_OBJECTS  64  // Max 64 allowed per PGS epoch
 #define MAX_OBJECT_REFS    2   // Max objects per display set
@@ -166,9 +166,9 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitleRect *rect,
 
     rle_bitmap_end = buf + buf_size;
 
-    rect->pict.data[0] = av_malloc_array(rect->w, rect->h);
+    rect->data[0] = av_malloc_array(rect->w, rect->h);
 
-    if (!rect->pict.data[0])
+    if (!rect->data[0])
         return AVERROR(ENOMEM);
 
     pixel_count = 0;
@@ -190,7 +190,7 @@ static int decode_rle(AVCodecContext *avctx, AVSubtitleRect *rect,
         }
 
         if (run > 0 && pixel_count + run <= rect->w * rect->h) {
-            memset(rect->pict.data[0] + pixel_count, color, run);
+            memset(rect->data[0] + pixel_count, color, run);
             pixel_count += run;
         } else if (!run) {
             /*
@@ -290,8 +290,8 @@ static int parse_object_segment(AVCodecContext *avctx,
     height = bytestream_get_be16(&buf);
 
     /* Make sure the bitmap is not too large */
-    if (avctx->width < width || avctx->height < height) {
-        av_log(avctx, AV_LOG_ERROR, "Bitmap dimensions larger than video.\n");
+    if (avctx->width < width || avctx->height < height || !width || !height) {
+        av_log(avctx, AV_LOG_ERROR, "Bitmap dimensions (%dx%d) invalid.\n", width, height);
         return AVERROR_INVALIDDATA;
     }
 
@@ -354,8 +354,14 @@ static int parse_palette_segment(AVCodecContext *avctx,
         cb        = bytestream_get_byte(&buf);
         alpha     = bytestream_get_byte(&buf);
 
-        YUV_TO_RGB1(cb, cr);
-        YUV_TO_RGB2(r, g, b, y);
+        /* Default to BT.709 colorimetry. In case of <= 576 height use BT.601 */
+        if (avctx->height <= 0 || avctx->height > 576) {
+            YUV_TO_RGB1_CCIR_BT709(cb, cr);
+        } else {
+            YUV_TO_RGB1_CCIR(cb, cr);
+        }
+
+        YUV_TO_RGB2_CCIR(r, g, b, y);
 
         ff_dlog(avctx, "Color %d := (%d,%d,%d,%d)\n", color_id, r, g, b, alpha);
 
@@ -523,6 +529,8 @@ static int display_end_segment(AVCodecContext *avctx, void *data,
     }
     for (i = 0; i < ctx->presentation.object_count; i++) {
         PGSSubObject *object;
+        AVSubtitleRect *rect;
+        int j;
 
         sub->rects[i]  = av_mallocz(sizeof(*sub->rects[0]));
         if (!sub->rects[i]) {
@@ -553,7 +561,7 @@ static int display_end_segment(AVCodecContext *avctx, void *data,
         sub->rects[i]->w    = object->w;
         sub->rects[i]->h    = object->h;
 
-        sub->rects[i]->pict.linesize[0] = object->w;
+        sub->rects[i]->linesize[0] = object->w;
 
         if (object->rle) {
             if (object->rle_remaining_len) {
@@ -578,15 +586,24 @@ static int display_end_segment(AVCodecContext *avctx, void *data,
         }
         /* Allocate memory for colors */
         sub->rects[i]->nb_colors    = 256;
-        sub->rects[i]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
-        if (!sub->rects[i]->pict.data[1]) {
+        sub->rects[i]->data[1] = av_mallocz(AVPALETTE_SIZE);
+        if (!sub->rects[i]->data[1]) {
             avsubtitle_free(sub);
             return AVERROR(ENOMEM);
         }
 
         if (!ctx->forced_subs_only || ctx->presentation.objects[i].composition_flag & 0x40)
-        memcpy(sub->rects[i]->pict.data[1], palette->clut, sub->rects[i]->nb_colors * sizeof(uint32_t));
-
+        memcpy(sub->rects[i]->data[1], palette->clut, sub->rects[i]->nb_colors * sizeof(uint32_t));
+
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
+        rect = sub->rects[i];
+        for (j = 0; j < 4; j++) {
+            rect->pict.data[j] = rect->data[j];
+            rect->pict.linesize[j] = rect->linesize[j];
+        }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     }
     return 1;
 }
@@ -675,7 +692,7 @@ static int decode(AVCodecContext *avctx, void *data, int *data_size,
 #define OFFSET(x) offsetof(PGSSubContext, x)
 #define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
-    {"forced_subs_only", "Only show forced subtitles", OFFSET(forced_subs_only), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, SD},
+    {"forced_subs_only", "Only show forced subtitles", OFFSET(forced_subs_only), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, SD},
     { NULL },
 };
 
diff --git a/libavcodec/pictordec.c b/libavcodec/pictordec.c
index 1bc51bcf..ff6eb7f4 100644
--- a/libavcodec/pictordec.c
+++ b/libavcodec/pictordec.c
@@ -263,5 +263,5 @@ AVCodec ff_pictor_decoder = {
     .id             = AV_CODEC_ID_PICTOR,
     .priv_data_size = sizeof(PicContext),
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index ebde68b6..f0883d3d 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -20,15 +20,42 @@
 
 #include "config.h"
 #include "libavutil/attributes.h"
+#include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "pixblockdsp.h"
 
-#define BIT_DEPTH 16
-#include "pixblockdsp_template.c"
-#undef BIT_DEPTH
+static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t *pixels,
+                            ptrdiff_t line_size)
+{
+    AV_COPY128U(block + 0 * 8, pixels + 0 * line_size);
+    AV_COPY128U(block + 1 * 8, pixels + 1 * line_size);
+    AV_COPY128U(block + 2 * 8, pixels + 2 * line_size);
+    AV_COPY128U(block + 3 * 8, pixels + 3 * line_size);
+    AV_COPY128U(block + 4 * 8, pixels + 4 * line_size);
+    AV_COPY128U(block + 5 * 8, pixels + 5 * line_size);
+    AV_COPY128U(block + 6 * 8, pixels + 6 * line_size);
+    AV_COPY128U(block + 7 * 8, pixels + 7 * line_size);
+}
+
+static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
+                           ptrdiff_t line_size)
+{
+    int i;
 
-#define BIT_DEPTH 8
-#include "pixblockdsp_template.c"
+    /* read the pixels */
+    for (i = 0; i < 8; i++) {
+        block[0] = pixels[0];
+        block[1] = pixels[1];
+        block[2] = pixels[2];
+        block[3] = pixels[3];
+        block[4] = pixels[4];
+        block[5] = pixels[5];
+        block[6] = pixels[6];
+        block[7] = pixels[7];
+        pixels  += line_size;
+        block   += 8;
+    }
+}
 
 static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
                           const uint8_t *s2, int stride)
@@ -79,4 +106,6 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
         ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth);
     if (ARCH_X86)
         ff_pixblockdsp_init_x86(c, avctx, high_bit_depth);
+    if (ARCH_MIPS)
+        ff_pixblockdsp_init_mips(c, avctx, high_bit_depth);
 }
diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
index d4b85903..79ed86c3 100644
--- a/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@ -42,5 +42,7 @@ void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx,
                              unsigned high_bit_depth);
 void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx,
                              unsigned high_bit_depth);
+void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx,
+                              unsigned high_bit_depth);
 
 #endif /* AVCODEC_PIXBLOCKDSP_H */
diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index 60c49758..24318fbe 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -21,8 +21,11 @@
 
 //#define DEBUG
 
+#include "libavutil/avassert.h"
 #include "libavutil/bprint.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/stereo3d.h"
+
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
@@ -59,6 +62,7 @@ typedef struct PNGDecContext {
     int bits_per_pixel;
     int bpp;
     int has_trns;
+    uint8_t transparent_color_be[6];
 
     uint8_t *image_buf;
     int image_linesize;
@@ -590,6 +594,7 @@ static int decode_idat_chunk(AVCodecContext *avctx, PNGDecContext *s,
                              uint32_t length, AVFrame *p)
 {
     int ret;
+    size_t byte_depth = s->bit_depth > 8 ? 2 : 1;
 
     if (!(s->state & PNG_IHDR)) {
         av_log(avctx, AV_LOG_ERROR, "IDAT without IHDR\n");
@@ -641,8 +646,41 @@ static int decode_idat_chunk(AVCodecContext *avctx, PNGDecContext *s,
             return AVERROR_INVALIDDATA;
         }
 
+        if (s->has_trns && s->color_type != PNG_COLOR_TYPE_PALETTE) {
+            switch (avctx->pix_fmt) {
+            case AV_PIX_FMT_RGB24:
+                avctx->pix_fmt = AV_PIX_FMT_RGBA;
+                break;
+
+            case AV_PIX_FMT_RGB48BE:
+                avctx->pix_fmt = AV_PIX_FMT_RGBA64BE;
+                break;
+
+            case AV_PIX_FMT_GRAY8:
+                avctx->pix_fmt = AV_PIX_FMT_YA8;
+                break;
+
+            case AV_PIX_FMT_GRAY16BE:
+                avctx->pix_fmt = AV_PIX_FMT_YA16BE;
+                break;
+
+            default:
+                avpriv_request_sample(avctx, "bit depth %d "
+                        "and color type %d with TRNS",
+                        s->bit_depth, s->color_type);
+                return AVERROR_INVALIDDATA;
+            }
+
+            s->bpp += byte_depth;
+        }
+
         if ((ret = ff_thread_get_buffer(avctx, &s->picture, AV_GET_BUFFER_FLAG_REF)) < 0)
             return ret;
+        if (avctx->codec_id == AV_CODEC_ID_APNG && s->last_dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
+            ff_thread_release_buffer(avctx, &s->previous_picture);
+            if ((ret = ff_thread_get_buffer(avctx, &s->previous_picture, AV_GET_BUFFER_FLAG_REF)) < 0)
+                return ret;
+        }
         ff_thread_finish_setup(avctx);
 
         p->pict_type        = AV_PICTURE_TYPE_I;
@@ -686,9 +724,21 @@ static int decode_idat_chunk(AVCodecContext *avctx, PNGDecContext *s,
         s->zstream.avail_out = s->crow_size;
         s->zstream.next_out  = s->crow_buf;
     }
+
     s->state |= PNG_IDAT;
-    if ((ret = png_decode_idat(s, length)) < 0)
+
+    /* set image to non-transparent bpp while decompressing */
+    if (s->has_trns && s->color_type != PNG_COLOR_TYPE_PALETTE)
+        s->bpp -= byte_depth;
+
+    ret = png_decode_idat(s, length);
+
+    if (s->has_trns && s->color_type != PNG_COLOR_TYPE_PALETTE)
+        s->bpp += byte_depth;
+
+    if (ret < 0)
         return ret;
+
     bytestream2_skip(&s->gb, 4); /* crc */
 
     return 0;
@@ -722,17 +772,33 @@ static int decode_trns_chunk(AVCodecContext *avctx, PNGDecContext *s,
 {
     int v, i;
 
-    /* read the transparency. XXX: Only palette mode supported */
-    if (s->color_type != PNG_COLOR_TYPE_PALETTE ||
-            length > 256 ||
-            !(s->state & PNG_PLTE))
+    if (s->color_type == PNG_COLOR_TYPE_PALETTE) {
+        if (length > 256 || !(s->state & PNG_PLTE))
+            return AVERROR_INVALIDDATA;
+
+        for (i = 0; i < length; i++) {
+            v = bytestream2_get_byte(&s->gb);
+            s->palette[i] = (s->palette[i] & 0x00ffffff) | (v << 24);
+        }
+    } else if (s->color_type == PNG_COLOR_TYPE_GRAY || s->color_type == PNG_COLOR_TYPE_RGB) {
+        if ((s->color_type == PNG_COLOR_TYPE_GRAY && length != 2) ||
+            (s->color_type == PNG_COLOR_TYPE_RGB && length != 6))
+            return AVERROR_INVALIDDATA;
+
+        for (i = 0; i < length / 2; i++) {
+            /* only use the least significant bits */
+            v = av_mod_uintp2(bytestream2_get_be16(&s->gb), s->bit_depth);
+
+            if (s->bit_depth > 8)
+                AV_WB16(&s->transparent_color_be[2 * i], v);
+            else
+                s->transparent_color_be[i] = v;
+        }
+    } else {
         return AVERROR_INVALIDDATA;
-    for (i = 0; i < length; i++) {
-        v = bytestream2_get_byte(&s->gb);
-        s->palette[i] = (s->palette[i] & 0x00ffffff) | (v << 24);
     }
-    bytestream2_skip(&s->gb, 4);     /* crc */
 
+    bytestream2_skip(&s->gb, 4); /* crc */
     s->has_trns = 1;
 
     return 0;
@@ -851,13 +917,18 @@ static int decode_fctl_chunk(AVCodecContext *avctx, PNGDecContext *s,
         cur_w > s->width - x_offset|| cur_h > s->height - y_offset)
             return AVERROR_INVALIDDATA;
 
-    if (sequence_number == 0 && s->dispose_op == APNG_DISPOSE_OP_PREVIOUS) {
+    if (blend_op != APNG_BLEND_OP_OVER && blend_op != APNG_BLEND_OP_SOURCE) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid blend_op %d\n", blend_op);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (sequence_number == 0 && dispose_op == APNG_DISPOSE_OP_PREVIOUS) {
         // No previous frame to revert to for the first frame
         // Spec says to just treat it as a APNG_DISPOSE_OP_BACKGROUND
-        s->dispose_op = APNG_DISPOSE_OP_BACKGROUND;
+        dispose_op = APNG_DISPOSE_OP_BACKGROUND;
     }
 
-    if (s->dispose_op == APNG_BLEND_OP_OVER && !s->has_trns && (
+    if (blend_op == APNG_BLEND_OP_OVER && !s->has_trns && (
             avctx->pix_fmt == AV_PIX_FMT_RGB24 ||
             avctx->pix_fmt == AV_PIX_FMT_RGB48BE ||
             avctx->pix_fmt == AV_PIX_FMT_PAL8 ||
@@ -865,8 +936,8 @@ static int decode_fctl_chunk(AVCodecContext *avctx, PNGDecContext *s,
             avctx->pix_fmt == AV_PIX_FMT_GRAY16BE ||
             avctx->pix_fmt == AV_PIX_FMT_MONOBLACK
         )) {
-        // APNG_DISPOSE_OP_OVER is the same as APNG_DISPOSE_OP_SOURCE when there is no alpha channel
-        s->dispose_op = APNG_BLEND_OP_SOURCE;
+        // APNG_BLEND_OP_OVER is the same as APNG_BLEND_OP_SOURCE when there is no alpha channel
+        blend_op = APNG_BLEND_OP_SOURCE;
     }
 
     s->cur_w      = cur_w;
@@ -903,10 +974,7 @@ static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
                                AVFrame *p)
 {
     size_t x, y;
-    uint8_t *buffer = av_malloc(s->image_linesize * s->height);
-
-    if (!buffer)
-        return AVERROR(ENOMEM);
+    uint8_t *buffer;
 
     if (s->blend_op == APNG_BLEND_OP_OVER &&
         avctx->pix_fmt != AV_PIX_FMT_RGBA &&
@@ -917,20 +985,25 @@ static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
         return AVERROR_PATCHWELCOME;
     }
 
-    // Copy the previous frame to the buffer
-    ff_thread_await_progress(&s->last_picture, INT_MAX, 0);
-    memcpy(buffer, s->last_picture.f->data[0], s->image_linesize * s->height);
+    buffer = av_malloc_array(s->image_linesize, s->height);
+    if (!buffer)
+        return AVERROR(ENOMEM);
+
 
     // Do the disposal operation specified by the last frame on the frame
-    if (s->last_dispose_op == APNG_DISPOSE_OP_BACKGROUND) {
-        for (y = s->last_y_offset; y < s->last_y_offset + s->last_h; ++y)
-            memset(buffer + s->image_linesize * y + s->bpp * s->last_x_offset, 0, s->bpp * s->last_w);
-    } else if (s->last_dispose_op == APNG_DISPOSE_OP_PREVIOUS) {
+    if (s->last_dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
+        ff_thread_await_progress(&s->last_picture, INT_MAX, 0);
+        memcpy(buffer, s->last_picture.f->data[0], s->image_linesize * s->height);
+
+        if (s->last_dispose_op == APNG_DISPOSE_OP_BACKGROUND)
+            for (y = s->last_y_offset; y < s->last_y_offset + s->last_h; ++y)
+                memset(buffer + s->image_linesize * y + s->bpp * s->last_x_offset, 0, s->bpp * s->last_w);
+
+        memcpy(s->previous_picture.f->data[0], buffer, s->image_linesize * s->height);
+        ff_thread_report_progress(&s->previous_picture, INT_MAX, 0);
+    } else {
         ff_thread_await_progress(&s->previous_picture, INT_MAX, 0);
-        for (y = s->last_y_offset; y < s->last_y_offset + s->last_h; ++y) {
-            size_t row_start = s->image_linesize * y + s->bpp * s->last_x_offset;
-            memcpy(buffer + row_start, s->previous_picture.f->data[0] + row_start, s->bpp * s->last_w);
-        }
+        memcpy(buffer, s->previous_picture.f->data[0], s->image_linesize * s->height);
     }
 
     // Perform blending
@@ -946,7 +1019,7 @@ static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
             for (x = s->x_offset; x < s->x_offset + s->cur_w; ++x, foreground += s->bpp, background += s->bpp) {
                 size_t b;
                 uint8_t foreground_alpha, background_alpha, output_alpha;
-                uint8_t output[4];
+                uint8_t output[10];
 
                 // Since we might be blending alpha onto alpha, we use the following equations:
                 // output_alpha = foreground_alpha + (1 - foreground_alpha) * background_alpha
@@ -986,6 +1059,8 @@ static int handle_p_frame_apng(AVCodecContext *avctx, PNGDecContext *s,
 
                 output_alpha = foreground_alpha + FAST_DIV255((255 - foreground_alpha) * background_alpha);
 
+                av_assert0(s->bpp <= 10);
+
                 for (b = 0; b < s->bpp - 1; ++b) {
                     if (output_alpha == 0) {
                         output[b] = 0;
@@ -1019,6 +1094,13 @@ static int decode_frame_common(AVCodecContext *avctx, PNGDecContext *s,
     for (;;) {
         length = bytestream2_get_bytes_left(&s->gb);
         if (length <= 0) {
+
+            if (avctx->codec_id == AV_CODEC_ID_PNG &&
+                avctx->skip_frame == AVDISCARD_ALL) {
+                av_frame_set_metadata(p, metadata);
+                return 0;
+            }
+
             if (CONFIG_APNG_DECODER && avctx->codec_id == AV_CODEC_ID_APNG && length == 0) {
                 if (!(s->state & PNG_IDAT))
                     return 0;
@@ -1046,6 +1128,21 @@ static int decode_frame_common(AVCodecContext *avctx, PNGDecContext *s,
                 ((tag >> 8) & 0xff),
                 ((tag >> 16) & 0xff),
                 ((tag >> 24) & 0xff), length);
+
+        if (avctx->codec_id == AV_CODEC_ID_PNG &&
+            avctx->skip_frame == AVDISCARD_ALL) {
+            switch(tag) {
+            case MKTAG('I', 'H', 'D', 'R'):
+            case MKTAG('p', 'H', 'Y', 's'):
+            case MKTAG('t', 'E', 'X', 't'):
+            case MKTAG('I', 'D', 'A', 'T'):
+            case MKTAG('t', 'R', 'N', 'S'):
+                break;
+            default:
+                goto skip_tag;
+            }
+        }
+
         switch (tag) {
         case MKTAG('I', 'H', 'D', 'R'):
             if ((ret = decode_ihdr_chunk(avctx, s, length)) < 0)
@@ -1096,6 +1193,22 @@ static int decode_frame_common(AVCodecContext *avctx, PNGDecContext *s,
                 av_log(avctx, AV_LOG_WARNING, "Broken zTXt chunk\n");
             bytestream2_skip(&s->gb, length + 4);
             break;
+        case MKTAG('s', 'T', 'E', 'R'): {
+            int mode = bytestream2_get_byte(&s->gb);
+            AVStereo3D *stereo3d = av_stereo3d_create_side_data(p);
+            if (!stereo3d)
+                goto fail;
+
+            if (mode == 0 || mode == 1) {
+                stereo3d->type  = AV_STEREO3D_SIDEBYSIDE;
+                stereo3d->flags = mode ? 0 : AV_STEREO3D_FLAG_INVERT;
+            } else {
+                 av_log(avctx, AV_LOG_WARNING,
+                        "Unknown value in sTER chunk (%d)\n", mode);
+            }
+            bytestream2_skip(&s->gb, 4); /* crc */
+            break;
+        }
         case MKTAG('I', 'E', 'N', 'D'):
             if (!(s->state & PNG_ALLIMAGE))
                 av_log(avctx, AV_LOG_ERROR, "IEND without all image\n");
@@ -1113,10 +1226,38 @@ static int decode_frame_common(AVCodecContext *avctx, PNGDecContext *s,
         }
     }
 exit_loop:
+    if (avctx->codec_id == AV_CODEC_ID_PNG &&
+        avctx->skip_frame == AVDISCARD_ALL) {
+        av_frame_set_metadata(p, metadata);
+        return 0;
+    }
 
     if (s->bits_per_pixel <= 4)
         handle_small_bpp(s, p);
 
+    /* apply transparency if needed */
+    if (s->has_trns && s->color_type != PNG_COLOR_TYPE_PALETTE) {
+        size_t byte_depth = s->bit_depth > 8 ? 2 : 1;
+        size_t raw_bpp = s->bpp - byte_depth;
+        unsigned x, y;
+
+        for (y = 0; y < s->height; ++y) {
+            uint8_t *row = &s->image_buf[s->image_linesize * y];
+
+            /* since we're updating in-place, we have to go from right to left */
+            for (x = s->width; x > 0; --x) {
+                uint8_t *pixel = &row[s->bpp * (x - 1)];
+                memmove(pixel, &row[raw_bpp * (x - 1)], raw_bpp);
+
+                if (!memcmp(pixel, s->transparent_color_be, raw_bpp)) {
+                    memset(&pixel[raw_bpp], 0, byte_depth);
+                } else {
+                    memset(&pixel[raw_bpp], 0xff, byte_depth);
+                }
+            }
+        }
+    }
+
     /* handle p-frames only if a predecessor frame is available */
     if (s->last_picture.f->data[0]) {
         if (   !(avpkt->flags & AV_PKT_FLAG_KEY) && avctx->codec_tag != AV_RL32("MPNG")
@@ -1133,6 +1274,7 @@ static int decode_frame_common(AVCodecContext *avctx, PNGDecContext *s,
         }
     }
     ff_thread_report_progress(&s->picture, INT_MAX, 0);
+    ff_thread_report_progress(&s->previous_picture, INT_MAX, 0);
 
     av_frame_set_metadata(p, metadata);
     metadata   = NULL;
@@ -1141,6 +1283,7 @@ static int decode_frame_common(AVCodecContext *avctx, PNGDecContext *s,
 fail:
     av_dict_free(&metadata);
     ff_thread_report_progress(&s->picture, INT_MAX, 0);
+    ff_thread_report_progress(&s->previous_picture, INT_MAX, 0);
     return ret;
 }
 
@@ -1166,11 +1309,11 @@ static int decode_frame_png(AVCodecContext *avctx,
     sig = bytestream2_get_be64(&s->gb);
     if (sig != PNGSIG &&
         sig != MNGSIG) {
-        av_log(avctx, AV_LOG_ERROR, "Invalid PNG signature (%d).\n", buf_size);
+        av_log(avctx, AV_LOG_ERROR, "Invalid PNG signature 0x%08"PRIX64".\n", sig);
         return AVERROR_INVALIDDATA;
     }
 
-    s->y = s->state = 0;
+    s->y = s->state = s->has_trns = 0;
 
     /* init the zlib */
     s->zstream.zalloc = ff_png_zalloc;
@@ -1185,6 +1328,12 @@ static int decode_frame_png(AVCodecContext *avctx,
     if ((ret = decode_frame_common(avctx, s, p, avpkt)) < 0)
         goto the_end;
 
+    if (avctx->skip_frame == AVDISCARD_ALL) {
+        *got_frame = 0;
+        ret = bytestream2_tell(&s->gb);
+        goto the_end;
+    }
+
     if ((ret = av_frame_ref(data, s->picture.f)) < 0)
         return ret;
 
@@ -1206,13 +1355,9 @@ static int decode_frame_apng(AVCodecContext *avctx,
     PNGDecContext *const s = avctx->priv_data;
     int ret;
     AVFrame *p;
-    ThreadFrame tmp;
 
-    ff_thread_release_buffer(avctx, &s->previous_picture);
-    tmp = s->previous_picture;
-    s->previous_picture = s->last_picture;
-    s->last_picture = s->picture;
-    s->picture = tmp;
+    ff_thread_release_buffer(avctx, &s->last_picture);
+    FFSWAP(ThreadFrame, s->picture, s->last_picture);
     p = s->picture.f;
 
     if (!(s->state & PNG_IHDR)) {
@@ -1258,6 +1403,7 @@ static int decode_frame_apng(AVCodecContext *avctx,
 }
 #endif
 
+#if HAVE_THREADS
 static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 {
     PNGDecContext *psrc = src->priv_data;
@@ -1283,6 +1429,8 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
         pdst->cur_h = psrc->cur_h;
         pdst->x_offset = psrc->x_offset;
         pdst->y_offset = psrc->y_offset;
+        pdst->has_trns = psrc->has_trns;
+        memcpy(pdst->transparent_color_be, psrc->transparent_color_be, sizeof(pdst->transparent_color_be));
 
         pdst->dispose_op = psrc->dispose_op;
 
@@ -1291,12 +1439,19 @@ static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
         pdst->state |= psrc->state & (PNG_IHDR | PNG_PLTE);
 
         ff_thread_release_buffer(dst, &pdst->last_picture);
-        if (psrc->last_picture.f->data[0])
-            return ff_thread_ref_frame(&pdst->last_picture, &psrc->last_picture);
+        if (psrc->last_picture.f->data[0] &&
+            (ret = ff_thread_ref_frame(&pdst->last_picture, &psrc->last_picture)) < 0)
+            return ret;
+
+        ff_thread_release_buffer(dst, &pdst->previous_picture);
+        if (psrc->previous_picture.f->data[0] &&
+            (ret = ff_thread_ref_frame(&pdst->previous_picture, &psrc->previous_picture)) < 0)
+            return ret;
     }
 
     return 0;
 }
+#endif
 
 static av_cold int png_dec_init(AVCodecContext *avctx)
 {
@@ -1355,7 +1510,7 @@ AVCodec ff_apng_decoder = {
     .decode         = decode_frame_apng,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(png_dec_init),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(update_thread_context),
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS /*| AV_CODEC_CAP_DRAW_HORIZ_BAND*/,
 };
 #endif
 
@@ -1371,6 +1526,7 @@ AVCodec ff_png_decoder = {
     .decode         = decode_frame_png,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(png_dec_init),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(update_thread_context),
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS /*| AV_CODEC_CAP_DRAW_HORIZ_BAND*/,
+    .caps_internal  = FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
 };
 #endif
diff --git a/libavcodec/pngdsp.h b/libavcodec/pngdsp.h
index fbc1a508..5475d0d9 100644
--- a/libavcodec/pngdsp.h
+++ b/libavcodec/pngdsp.h
@@ -37,4 +37,4 @@ typedef struct PNGDSPContext {
 void ff_pngdsp_init(PNGDSPContext *dsp);
 void ff_pngdsp_init_x86(PNGDSPContext *dsp);
 
-#endif /* AVCDODEC_PNGDSP_H */
+#endif /* AVCODEC_PNGDSP_H */
diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c
index 7a9d0b0f..e9e8bc27 100644
--- a/libavcodec/pngenc.c
+++ b/libavcodec/pngenc.c
@@ -31,11 +31,20 @@
 #include "libavutil/libm.h"
 #include "libavutil/opt.h"
 #include "libavutil/color_utils.h"
+#include "libavutil/stereo3d.h"
 
 #include <zlib.h>
 
 #define IOBUF_SIZE 4096
 
+typedef struct APNGFctlChunk {
+    uint32_t sequence_number;
+    uint32_t width, height;
+    uint32_t x_offset, y_offset;
+    uint16_t delay_num, delay_den;
+    uint8_t dispose_op, blend_op;
+} APNGFctlChunk;
+
 typedef struct PNGEncContext {
     AVClass *class;
     HuffYUVEncDSPContext hdsp;
@@ -59,6 +68,12 @@ typedef struct PNGEncContext {
     // APNG
     uint32_t palette_checksum;   // Used to ensure a single unique palette
     uint32_t sequence_number;
+
+    AVFrame *prev_frame;
+    AVFrame *last_frame;
+    APNGFctlChunk last_frame_fctl;
+    uint8_t *last_frame_packet;
+    size_t last_frame_packet_size;
 } PNGEncContext;
 
 static void png_get_interlaced_row(uint8_t *dst, int row_size,
@@ -326,6 +341,7 @@ static int png_get_gama(enum AVColorTransferCharacteristic trc, uint8_t *buf)
 
 static int encode_headers(AVCodecContext *avctx, const AVFrame *pict)
 {
+    AVFrameSideData *side_data;
     PNGEncContext *s = avctx->priv_data;
 
     /* write png header */
@@ -350,6 +366,23 @@ static int encode_headers(AVCodecContext *avctx, const AVFrame *pict)
     }
     png_write_chunk(&s->bytestream, MKTAG('p', 'H', 'Y', 's'), s->buf, 9);
 
+    /* write stereoscopic information */
+    side_data = av_frame_get_side_data(pict, AV_FRAME_DATA_STEREO3D);
+    if (side_data) {
+        AVStereo3D *stereo3d = (AVStereo3D *)side_data->data;
+        switch (stereo3d->type) {
+            case AV_STEREO3D_SIDEBYSIDE:
+                s->buf[0] = ((stereo3d->flags & AV_STEREO3D_FLAG_INVERT) == 0) ? 1 : 0;
+                png_write_chunk(&s->bytestream, MKTAG('s', 'T', 'E', 'R'), s->buf, 1);
+                break;
+            case AV_STEREO3D_2D:
+                break;
+            default:
+                av_log(avctx, AV_LOG_WARNING, "Only side-by-side stereo3d flag can be defined within sTER chunk\n");
+                break;
+        }
+    }
+
     /* write colorspace information */
     if (pict->color_primaries == AVCOL_PRI_BT709 &&
         pict->color_trc == AVCOL_TRC_IEC61966_2_1) {
@@ -403,7 +436,7 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
     uint8_t *progressive_buf = NULL;
     uint8_t *top_buf         = NULL;
 
-    row_size = (avctx->width * s->bits_per_pixel + 7) >> 3;
+    row_size = (pict->width * s->bits_per_pixel + 7) >> 3;
 
     crow_base = av_malloc((row_size + 32) << (s->filter_type == PNG_FILTER_VALUE_MIXED));
     if (!crow_base) {
@@ -430,16 +463,16 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
         for (pass = 0; pass < NB_PASSES; pass++) {
             /* NOTE: a pass is completely omitted if no pixels would be
              * output */
-            pass_row_size = ff_png_pass_row_size(pass, s->bits_per_pixel, avctx->width);
+            pass_row_size = ff_png_pass_row_size(pass, s->bits_per_pixel, pict->width);
             if (pass_row_size > 0) {
                 top = NULL;
-                for (y = 0; y < avctx->height; y++)
+                for (y = 0; y < pict->height; y++)
                     if ((ff_png_pass_ymask[pass] << (y & 7)) & 0x80) {
                         ptr = p->data[0] + y * p->linesize[0];
                         FFSWAP(uint8_t *, progressive_buf, top_buf);
                         png_get_interlaced_row(progressive_buf, pass_row_size,
                                                s->bits_per_pixel, pass,
-                                               ptr, avctx->width);
+                                               ptr, pict->width);
                         crow = png_choose_filter(s, crow_buf, progressive_buf,
                                                  top, pass_row_size, s->bits_per_pixel >> 3);
                         png_write_row(avctx, crow, pass_row_size + 1);
@@ -449,7 +482,7 @@ static int encode_frame(AVCodecContext *avctx, const AVFrame *pict)
         }
     } else {
         top = NULL;
-        for (y = 0; y < avctx->height; y++) {
+        for (y = 0; y < pict->height; y++) {
             ptr = p->data[0] + y * p->linesize[0];
             crow = png_choose_filter(s, crow_buf, ptr, top,
                                      row_size, s->bits_per_pixel >> 3);
@@ -495,14 +528,14 @@ static int encode_png(AVCodecContext *avctx, AVPacket *pkt,
 
     enc_row_size    = deflateBound(&s->zstream, (avctx->width * s->bits_per_pixel + 7) >> 3);
     max_packet_size =
-        FF_MIN_BUFFER_SIZE + // headers
+        AV_INPUT_BUFFER_MIN_SIZE + // headers
         avctx->height * (
             enc_row_size +
             12 * (((int64_t)enc_row_size + IOBUF_SIZE - 1) / IOBUF_SIZE) // IDAT * ceil(enc_row_size / IOBUF_SIZE)
         );
     if (max_packet_size > INT_MAX)
         return AVERROR(ENOMEM);
-    ret = ff_alloc_packet2(avctx, pkt, max_packet_size);
+    ret = ff_alloc_packet2(avctx, pkt, max_packet_size, 0);
     if (ret < 0)
         return ret;
 
@@ -530,6 +563,270 @@ static int encode_png(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
+static int apng_do_inverse_blend(AVFrame *output, const AVFrame *input,
+                                  APNGFctlChunk *fctl_chunk, uint8_t bpp)
+{
+    // output: background, input: foreground
+    // output the image such that when blended with the background, will produce the foreground
+
+    unsigned int x, y;
+    unsigned int leftmost_x = input->width;
+    unsigned int rightmost_x = 0;
+    unsigned int topmost_y = input->height;
+    unsigned int bottommost_y = 0;
+    const uint8_t *input_data = input->data[0];
+    uint8_t *output_data = output->data[0];
+    ptrdiff_t input_linesize = input->linesize[0];
+    ptrdiff_t output_linesize = output->linesize[0];
+
+    // Find bounding box of changes
+    for (y = 0; y < input->height; ++y) {
+        for (x = 0; x < input->width; ++x) {
+            if (!memcmp(input_data + bpp * x, output_data + bpp * x, bpp))
+                continue;
+
+            if (x < leftmost_x)
+                leftmost_x = x;
+            if (x >= rightmost_x)
+                rightmost_x = x + 1;
+            if (y < topmost_y)
+                topmost_y = y;
+            if (y >= bottommost_y)
+                bottommost_y = y + 1;
+        }
+
+        input_data += input_linesize;
+        output_data += output_linesize;
+    }
+
+    if (leftmost_x == input->width && rightmost_x == 0) {
+        // Empty frame
+        // APNG does not support empty frames, so we make it a 1x1 frame
+        leftmost_x = topmost_y = 0;
+        rightmost_x = bottommost_y = 1;
+    }
+
+    // Do actual inverse blending
+    if (fctl_chunk->blend_op == APNG_BLEND_OP_SOURCE) {
+        output_data = output->data[0];
+        for (y = topmost_y; y < bottommost_y; ++y) {
+            memcpy(output_data,
+                   input->data[0] + input_linesize * y + bpp * leftmost_x,
+                   bpp * (rightmost_x - leftmost_x));
+            output_data += output_linesize;
+        }
+    } else { // APNG_BLEND_OP_OVER
+        size_t transparent_palette_index;
+        uint32_t *palette;
+
+        switch (input->format) {
+        case AV_PIX_FMT_RGBA64BE:
+        case AV_PIX_FMT_YA16BE:
+        case AV_PIX_FMT_RGBA:
+        case AV_PIX_FMT_GRAY8A:
+            break;
+
+        case AV_PIX_FMT_PAL8:
+            palette = (uint32_t*)input->data[1];
+            for (transparent_palette_index = 0; transparent_palette_index < 256; ++transparent_palette_index)
+                if (palette[transparent_palette_index] >> 24 == 0)
+                    break;
+            break;
+
+        default:
+            // No alpha, so blending not possible
+            return -1;
+        }
+
+        for (y = topmost_y; y < bottommost_y; ++y) {
+            uint8_t *foreground = input->data[0] + input_linesize * y + bpp * leftmost_x;
+            uint8_t *background = output->data[0] + output_linesize * y + bpp * leftmost_x;
+            output_data = output->data[0] + output_linesize * (y - topmost_y);
+            for (x = leftmost_x; x < rightmost_x; ++x, foreground += bpp, background += bpp, output_data += bpp) {
+                if (!memcmp(foreground, background, bpp)) {
+                    if (input->format == AV_PIX_FMT_PAL8) {
+                        if (transparent_palette_index == 256) {
+                            // Need fully transparent colour, but none exists
+                            return -1;
+                        }
+
+                        *output_data = transparent_palette_index;
+                    } else {
+                        memset(output_data, 0, bpp);
+                    }
+                    continue;
+                }
+
+                // Check for special alpha values, since full inverse
+                // alpha-on-alpha blending is rarely possible, and when
+                // possible, doesn't compress much better than
+                // APNG_BLEND_OP_SOURCE blending
+                switch (input->format) {
+                case AV_PIX_FMT_RGBA64BE:
+                    if (((uint16_t*)foreground)[3] == 0xffff ||
+                        ((uint16_t*)background)[3] == 0)
+                        break;
+                    return -1;
+
+                case AV_PIX_FMT_YA16BE:
+                    if (((uint16_t*)foreground)[1] == 0xffff ||
+                        ((uint16_t*)background)[1] == 0)
+                        break;
+                    return -1;
+
+                case AV_PIX_FMT_RGBA:
+                    if (foreground[3] == 0xff || background[3] == 0)
+                        break;
+                    return -1;
+
+                case AV_PIX_FMT_GRAY8A:
+                    if (foreground[1] == 0xff || background[1] == 0)
+                        break;
+                    return -1;
+
+                case AV_PIX_FMT_PAL8:
+                    if (palette[*foreground] >> 24 == 0xff ||
+                        palette[*background] >> 24 == 0)
+                        break;
+                    return -1;
+                }
+
+                memmove(output_data, foreground, bpp);
+            }
+        }
+    }
+
+    output->width = rightmost_x - leftmost_x;
+    output->height = bottommost_y - topmost_y;
+    fctl_chunk->width = output->width;
+    fctl_chunk->height = output->height;
+    fctl_chunk->x_offset = leftmost_x;
+    fctl_chunk->y_offset = topmost_y;
+
+    return 0;
+}
+
+static int apng_encode_frame(AVCodecContext *avctx, const AVFrame *pict,
+                             APNGFctlChunk *best_fctl_chunk, APNGFctlChunk *best_last_fctl_chunk)
+{
+    PNGEncContext *s = avctx->priv_data;
+    int ret;
+    unsigned int y;
+    AVFrame* diffFrame;
+    uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
+    uint8_t *original_bytestream, *original_bytestream_end;
+    uint8_t *temp_bytestream = 0, *temp_bytestream_end;
+    uint32_t best_sequence_number;
+    uint8_t *best_bytestream;
+    size_t best_bytestream_size = SIZE_MAX;
+    APNGFctlChunk last_fctl_chunk = *best_last_fctl_chunk;
+    APNGFctlChunk fctl_chunk = *best_fctl_chunk;
+
+    if (avctx->frame_number == 0) {
+        best_fctl_chunk->width = pict->width;
+        best_fctl_chunk->height = pict->height;
+        best_fctl_chunk->x_offset = 0;
+        best_fctl_chunk->y_offset = 0;
+        best_fctl_chunk->blend_op = APNG_BLEND_OP_SOURCE;
+        return encode_frame(avctx, pict);
+    }
+
+    diffFrame = av_frame_alloc();
+    if (!diffFrame)
+        return AVERROR(ENOMEM);
+
+    diffFrame->format = pict->format;
+    diffFrame->width = pict->width;
+    diffFrame->height = pict->height;
+    if ((ret = av_frame_get_buffer(diffFrame, 32)) < 0)
+        goto fail;
+
+    original_bytestream = s->bytestream;
+    original_bytestream_end = s->bytestream_end;
+
+    temp_bytestream = av_malloc(original_bytestream_end - original_bytestream);
+    temp_bytestream_end = temp_bytestream + (original_bytestream_end - original_bytestream);
+    if (!temp_bytestream) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (last_fctl_chunk.dispose_op = 0; last_fctl_chunk.dispose_op < 3; ++last_fctl_chunk.dispose_op) {
+        // 0: APNG_DISPOSE_OP_NONE
+        // 1: APNG_DISPOSE_OP_BACKGROUND
+        // 2: APNG_DISPOSE_OP_PREVIOUS
+
+        for (fctl_chunk.blend_op = 0; fctl_chunk.blend_op < 2; ++fctl_chunk.blend_op) {
+            // 0: APNG_BLEND_OP_SOURCE
+            // 1: APNG_BLEND_OP_OVER
+
+            uint32_t original_sequence_number = s->sequence_number, sequence_number;
+            uint8_t *bytestream_start = s->bytestream;
+            size_t bytestream_size;
+
+            // Do disposal
+            if (last_fctl_chunk.dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
+                av_frame_copy(diffFrame, s->last_frame);
+
+                if (last_fctl_chunk.dispose_op == APNG_DISPOSE_OP_BACKGROUND) {
+                    for (y = last_fctl_chunk.y_offset; y < last_fctl_chunk.y_offset + last_fctl_chunk.height; ++y) {
+                        size_t row_start = diffFrame->linesize[0] * y + bpp * last_fctl_chunk.x_offset;
+                        memset(diffFrame->data[0] + row_start, 0, bpp * last_fctl_chunk.width);
+                    }
+                }
+            } else {
+                if (!s->prev_frame)
+                    continue;
+
+                av_frame_copy(diffFrame, s->prev_frame);
+            }
+
+            // Do inverse blending
+            if (apng_do_inverse_blend(diffFrame, pict, &fctl_chunk, bpp) < 0)
+                continue;
+
+            // Do encoding
+            ret = encode_frame(avctx, diffFrame);
+            sequence_number = s->sequence_number;
+            s->sequence_number = original_sequence_number;
+            bytestream_size = s->bytestream - bytestream_start;
+            s->bytestream = bytestream_start;
+            if (ret < 0)
+                goto fail;
+
+            if (bytestream_size < best_bytestream_size) {
+                *best_fctl_chunk = fctl_chunk;
+                *best_last_fctl_chunk = last_fctl_chunk;
+
+                best_sequence_number = sequence_number;
+                best_bytestream = s->bytestream;
+                best_bytestream_size = bytestream_size;
+
+                if (best_bytestream == original_bytestream) {
+                    s->bytestream = temp_bytestream;
+                    s->bytestream_end = temp_bytestream_end;
+                } else {
+                    s->bytestream = original_bytestream;
+                    s->bytestream_end = original_bytestream_end;
+                }
+            }
+        }
+    }
+
+    s->sequence_number = best_sequence_number;
+    s->bytestream = original_bytestream + best_bytestream_size;
+    s->bytestream_end = original_bytestream_end;
+    if (best_bytestream != original_bytestream)
+        memcpy(original_bytestream, best_bytestream, best_bytestream_size);
+
+    ret = 0;
+
+fail:
+    av_freep(&temp_bytestream);
+    av_frame_free(&diffFrame);
+    return ret;
+}
+
 static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
                        const AVFrame *pict, int *got_packet)
 {
@@ -537,9 +834,9 @@ static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
     int ret;
     int enc_row_size;
     size_t max_packet_size;
-    uint8_t buf[26];
+    APNGFctlChunk fctl_chunk = {0};
 
-    if (avctx->codec_id == AV_CODEC_ID_APNG && s->color_type == PNG_COLOR_TYPE_PALETTE) {
+    if (pict && avctx->codec_id == AV_CODEC_ID_APNG && s->color_type == PNG_COLOR_TYPE_PALETTE) {
         uint32_t checksum = ~av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), ~0U, pict->data[1], 256 * sizeof(uint32_t));
 
         if (avctx->frame_number == 0) {
@@ -553,54 +850,120 @@ static int encode_apng(AVCodecContext *avctx, AVPacket *pkt,
 
     enc_row_size    = deflateBound(&s->zstream, (avctx->width * s->bits_per_pixel + 7) >> 3);
     max_packet_size =
-        FF_MIN_BUFFER_SIZE + // headers
+        AV_INPUT_BUFFER_MIN_SIZE + // headers
         avctx->height * (
             enc_row_size +
             (4 + 12) * (((int64_t)enc_row_size + IOBUF_SIZE - 1) / IOBUF_SIZE) // fdAT * ceil(enc_row_size / IOBUF_SIZE)
         );
     if (max_packet_size > INT_MAX)
         return AVERROR(ENOMEM);
-    ret = ff_alloc_packet2(avctx, pkt, max_packet_size);
-    if (ret < 0)
-        return ret;
-
-    s->bytestream_start =
-    s->bytestream       = pkt->data;
-    s->bytestream_end   = pkt->data + pkt->size;
 
     if (avctx->frame_number == 0) {
+        if (!pict)
+            return AVERROR(EINVAL);
+
+        s->bytestream = avctx->extradata = av_malloc(FF_MIN_BUFFER_SIZE);
+        if (!avctx->extradata)
+            return AVERROR(ENOMEM);
+
         ret = encode_headers(avctx, pict);
         if (ret < 0)
             return ret;
 
-        avctx->extradata = av_malloc(s->bytestream - s->bytestream_start);
-        if (!avctx->extradata)
+        avctx->extradata_size = s->bytestream - avctx->extradata;
+
+        s->last_frame_packet = av_malloc(max_packet_size);
+        if (!s->last_frame_packet)
             return AVERROR(ENOMEM);
-        avctx->extradata_size = s->bytestream - s->bytestream_start;
-        memcpy(avctx->extradata, s->bytestream_start, s->bytestream - s->bytestream_start);
+    } else if (s->last_frame) {
+        ret = ff_alloc_packet2(avctx, pkt, max_packet_size, 0);
+        if (ret < 0)
+            return ret;
 
-        s->bytestream = s->bytestream_start;
+        memcpy(pkt->data, s->last_frame_packet, s->last_frame_packet_size);
+        pkt->size = s->last_frame_packet_size;
+        pkt->pts = pkt->dts = s->last_frame->pts;
     }
 
-    AV_WB32(buf, s->sequence_number);
-    AV_WB32(buf + 4, avctx->width);
-    AV_WB32(buf + 8, avctx->height);
-    AV_WB32(buf + 12, 0); // x offset
-    AV_WB32(buf + 16, 0); // y offset
-    AV_WB16(buf + 20, 0); // delay numerator (filled in during muxing)
-    AV_WB16(buf + 22, 0); // delay denominator
-    buf[24] = APNG_DISPOSE_OP_BACKGROUND;
-    buf[25] = APNG_BLEND_OP_SOURCE;
-    png_write_chunk(&s->bytestream, MKTAG('f', 'c', 'T', 'L'), buf, 26);
-    ++s->sequence_number;
+    if (pict) {
+        s->bytestream_start =
+        s->bytestream       = s->last_frame_packet;
+        s->bytestream_end   = s->bytestream + max_packet_size;
 
-    ret = encode_frame(avctx, pict);
-    if (ret < 0)
-        return ret;
+        // We're encoding the frame first, so we have to do a bit of shuffling around
+        // to have the image data write to the correct place in the buffer
+        fctl_chunk.sequence_number = s->sequence_number;
+        ++s->sequence_number;
+        s->bytestream += 26 + 12;
 
-    pkt->size = s->bytestream - s->bytestream_start;
-    pkt->flags |= AV_PKT_FLAG_KEY;
-    *got_packet = 1;
+        ret = apng_encode_frame(avctx, pict, &fctl_chunk, &s->last_frame_fctl);
+        if (ret < 0)
+            return ret;
+
+        fctl_chunk.delay_num = 0; // delay filled in during muxing
+        fctl_chunk.delay_den = 0;
+    } else {
+        s->last_frame_fctl.dispose_op = APNG_DISPOSE_OP_NONE;
+    }
+
+    if (s->last_frame) {
+        uint8_t* last_fctl_chunk_start = pkt->data;
+        uint8_t buf[26];
+
+        AV_WB32(buf + 0, s->last_frame_fctl.sequence_number);
+        AV_WB32(buf + 4, s->last_frame_fctl.width);
+        AV_WB32(buf + 8, s->last_frame_fctl.height);
+        AV_WB32(buf + 12, s->last_frame_fctl.x_offset);
+        AV_WB32(buf + 16, s->last_frame_fctl.y_offset);
+        AV_WB16(buf + 20, s->last_frame_fctl.delay_num);
+        AV_WB16(buf + 22, s->last_frame_fctl.delay_den);
+        buf[24] = s->last_frame_fctl.dispose_op;
+        buf[25] = s->last_frame_fctl.blend_op;
+        png_write_chunk(&last_fctl_chunk_start, MKTAG('f', 'c', 'T', 'L'), buf, 26);
+
+        *got_packet = 1;
+    }
+
+    if (pict) {
+        if (!s->last_frame) {
+            s->last_frame = av_frame_alloc();
+            if (!s->last_frame)
+                return AVERROR(ENOMEM);
+        } else if (s->last_frame_fctl.dispose_op != APNG_DISPOSE_OP_PREVIOUS) {
+            if (!s->prev_frame) {
+                s->prev_frame = av_frame_alloc();
+                if (!s->prev_frame)
+                    return AVERROR(ENOMEM);
+
+                s->prev_frame->format = pict->format;
+                s->prev_frame->width = pict->width;
+                s->prev_frame->height = pict->height;
+                if ((ret = av_frame_get_buffer(s->prev_frame, 32)) < 0)
+                    return ret;
+            }
+
+            // Do disposal, but not blending
+            av_frame_copy(s->prev_frame, s->last_frame);
+            if (s->last_frame_fctl.dispose_op == APNG_DISPOSE_OP_BACKGROUND) {
+                uint32_t y;
+                uint8_t bpp = (s->bits_per_pixel + 7) >> 3;
+                for (y = s->last_frame_fctl.y_offset; y < s->last_frame_fctl.y_offset + s->last_frame_fctl.height; ++y) {
+                    size_t row_start = s->prev_frame->linesize[0] * y + bpp * s->last_frame_fctl.x_offset;
+                    memset(s->prev_frame->data[0] + row_start, 0, bpp * s->last_frame_fctl.width);
+                }
+            }
+        }
+
+        av_frame_unref(s->last_frame);
+        ret = av_frame_ref(s->last_frame, (AVFrame*)pict);
+        if (ret < 0)
+            return ret;
+
+        s->last_frame_fctl = fctl_chunk;
+        s->last_frame_packet_size = s->bytestream - s->bytestream_start;
+    } else {
+        av_frame_free(&s->last_frame);
+    }
 
     return 0;
 }
@@ -627,18 +990,24 @@ static av_cold int png_enc_init(AVCodecContext *avctx)
         avctx->bits_per_coded_sample = 8;
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     ff_huffyuvencdsp_init(&s->hdsp);
 
-    s->filter_type = av_clip(avctx->prediction_method,
-                             PNG_FILTER_VALUE_NONE,
-                             PNG_FILTER_VALUE_MIXED);
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->prediction_method)
+        s->filter_type = av_clip(avctx->prediction_method,
+                                 PNG_FILTER_VALUE_NONE,
+                                 PNG_FILTER_VALUE_MIXED);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     if (avctx->pix_fmt == AV_PIX_FMT_MONOBLACK)
         s->filter_type = PNG_FILTER_VALUE_NONE;
 
@@ -649,7 +1018,7 @@ static av_cold int png_enc_init(AVCodecContext *avctx)
       s->dpm = s->dpi * 10000 / 254;
     }
 
-    s->is_progressive = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
+    s->is_progressive = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
     switch (avctx->pix_fmt) {
     case AV_PIX_FMT_RGBA64BE:
         s->bit_depth = 16;
@@ -713,7 +1082,9 @@ static av_cold int png_enc_close(AVCodecContext *avctx)
     PNGEncContext *s = avctx->priv_data;
 
     deflateEnd(&s->zstream);
-    av_frame_free(&avctx->coded_frame);
+    av_frame_free(&s->last_frame);
+    av_frame_free(&s->prev_frame);
+    av_freep(&s->last_frame_packet);
     return 0;
 }
 
@@ -722,7 +1093,14 @@ static av_cold int png_enc_close(AVCodecContext *avctx)
 static const AVOption options[] = {
     {"dpi", "Set image resolution (in dots per inch)",  OFFSET(dpi), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 0x10000, VE},
     {"dpm", "Set image resolution (in dots per meter)", OFFSET(dpm), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 0x10000, VE},
-    { NULL }
+    { "pred", "Prediction method", OFFSET(filter_type), AV_OPT_TYPE_INT, { .i64 = PNG_FILTER_VALUE_NONE }, PNG_FILTER_VALUE_NONE, PNG_FILTER_VALUE_MIXED, VE, "pred" },
+        { "none",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PNG_FILTER_VALUE_NONE },  INT_MIN, INT_MAX, VE, "pred" },
+        { "sub",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PNG_FILTER_VALUE_SUB },   INT_MIN, INT_MAX, VE, "pred" },
+        { "up",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PNG_FILTER_VALUE_UP },    INT_MIN, INT_MAX, VE, "pred" },
+        { "avg",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PNG_FILTER_VALUE_AVG },   INT_MIN, INT_MAX, VE, "pred" },
+        { "paeth", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PNG_FILTER_VALUE_PAETH }, INT_MIN, INT_MAX, VE, "pred" },
+        { "mixed", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PNG_FILTER_VALUE_MIXED }, INT_MIN, INT_MAX, VE, "pred" },
+    { NULL},
 };
 
 static const AVClass pngenc_class = {
@@ -748,7 +1126,7 @@ AVCodec ff_png_encoder = {
     .init           = png_enc_init,
     .close          = png_enc_close,
     .encode2        = encode_png,
-    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
         AV_PIX_FMT_RGB48BE, AV_PIX_FMT_RGBA64BE,
@@ -769,6 +1147,7 @@ AVCodec ff_apng_encoder = {
     .init           = png_enc_init,
     .close          = png_enc_close,
     .encode2        = encode_apng,
+    .capabilities   = CODEC_CAP_DELAY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
         AV_PIX_FMT_RGB48BE, AV_PIX_FMT_RGBA64BE,
diff --git a/libavcodec/pnm_parser.c b/libavcodec/pnm_parser.c
index 2a9e3e1c..a7d70b99 100644
--- a/libavcodec/pnm_parser.c
+++ b/libavcodec/pnm_parser.c
@@ -19,6 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/imgutils.h"
+
 #include "parser.h" //for ParseContext
 #include "pnm.h"
 
@@ -55,7 +57,7 @@ static int pnm_parse(AVCodecParserContext *s, AVCodecContext *avctx,
             goto retry;
         }
 #if 0
-        if (pc->index && pc->index * 2 + FF_INPUT_BUFFER_PADDING_SIZE < pc->buffer_size && buf_size > pc->index) {
+        if (pc->index && pc->index * 2 + AV_INPUT_BUFFER_PADDING_SIZE < pc->buffer_size && buf_size > pc->index) {
             memcpy(pc->buffer + pc->index, buf, pc->index);
             pc->index += pc->index;
             buf       += pc->index;
@@ -66,7 +68,7 @@ static int pnm_parse(AVCodecParserContext *s, AVCodecContext *avctx,
         next = END_NOT_FOUND;
     } else {
         next = pnmctx.bytestream - pnmctx.bytestream_start
-               + avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
+               + av_image_get_buffer_size(avctx->pix_fmt, avctx->width, avctx->height, 1);
         if (pnmctx.bytestream_start != buf)
             next -= pc->index;
         if (next > buf_size)
diff --git a/libavcodec/pnmdec.c b/libavcodec/pnmdec.c
index e6345006..d4261a45 100644
--- a/libavcodec/pnmdec.c
+++ b/libavcodec/pnmdec.c
@@ -265,7 +265,7 @@ AVCodec ff_pgm_decoder = {
     .id             = AV_CODEC_ID_PGM,
     .priv_data_size = sizeof(PNMContext),
     .decode         = pnm_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 
@@ -277,7 +277,7 @@ AVCodec ff_pgmyuv_decoder = {
     .id             = AV_CODEC_ID_PGMYUV,
     .priv_data_size = sizeof(PNMContext),
     .decode         = pnm_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 
@@ -289,7 +289,7 @@ AVCodec ff_ppm_decoder = {
     .id             = AV_CODEC_ID_PPM,
     .priv_data_size = sizeof(PNMContext),
     .decode         = pnm_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 
@@ -301,7 +301,7 @@ AVCodec ff_pbm_decoder = {
     .id             = AV_CODEC_ID_PBM,
     .priv_data_size = sizeof(PNMContext),
     .decode         = pnm_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 
@@ -313,6 +313,6 @@ AVCodec ff_pam_decoder = {
     .id             = AV_CODEC_ID_PAM,
     .priv_data_size = sizeof(PNMContext),
     .decode         = pnm_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
diff --git a/libavcodec/pnmenc.c b/libavcodec/pnmenc.c
index e6c3635e..ba9478d0 100644
--- a/libavcodec/pnmenc.c
+++ b/libavcodec/pnmenc.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/imgutils.h"
 #include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "internal.h"
@@ -29,10 +30,10 @@ static int pnm_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint8_t *bytestream, *bytestream_start, *bytestream_end;
     int i, h, h1, c, n, linesize, ret;
     uint8_t *ptr, *ptr1, *ptr2;
+    int size = av_image_get_buffer_size(avctx->pix_fmt,
+                                        avctx->width, avctx->height, 1);
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, avpicture_get_size(avctx->pix_fmt,
-                                                       avctx->width,
-                                                       avctx->height) + 200)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, size + 200, 0)) < 0)
         return ret;
 
     bytestream_start =
@@ -83,7 +84,7 @@ static int pnm_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
              "P%c\n%d %d\n", c, avctx->width, h1);
     bytestream += strlen(bytestream);
     if (avctx->pix_fmt != AV_PIX_FMT_MONOWHITE) {
-        int maxdepth = (1 << (av_pix_fmt_desc_get(avctx->pix_fmt)->comp[0].depth_minus1 + 1)) - 1;
+        int maxdepth = (1 << av_pix_fmt_desc_get(avctx->pix_fmt)->comp[0].depth) - 1;
         snprintf(bytestream, bytestream_end - bytestream,
                  "%d\n", maxdepth);
         bytestream += strlen(bytestream);
@@ -120,22 +121,16 @@ static int pnm_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
 static av_cold int pnm_encode_init(AVCodecContext *avctx)
 {
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     return 0;
 }
 
-static av_cold int pnm_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
-
 #if CONFIG_PGM_ENCODER
 AVCodec ff_pgm_encoder = {
     .name           = "pgm",
@@ -143,7 +138,6 @@ AVCodec ff_pgm_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_PGM,
     .init           = pnm_encode_init,
-    .close          = pnm_encode_close,
     .encode2        = pnm_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16BE, AV_PIX_FMT_NONE
@@ -158,7 +152,6 @@ AVCodec ff_pgmyuv_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_PGMYUV,
     .init           = pnm_encode_init,
-    .close          = pnm_encode_close,
     .encode2        = pnm_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P16BE, AV_PIX_FMT_NONE
@@ -173,7 +166,6 @@ AVCodec ff_ppm_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_PPM,
     .init           = pnm_encode_init,
-    .close          = pnm_encode_close,
     .encode2        = pnm_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB48BE, AV_PIX_FMT_NONE
@@ -188,7 +180,6 @@ AVCodec ff_pbm_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_PBM,
     .init           = pnm_encode_init,
-    .close          = pnm_encode_close,
     .encode2        = pnm_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_MONOWHITE,
                                                   AV_PIX_FMT_NONE },
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 30d04b84..bd4f4278 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -20,6 +20,7 @@ OBJS-$(CONFIG_MPEGVIDEOENC)            += ppc/mpegvideoencdsp.o
 OBJS-$(CONFIG_PIXBLOCKDSP)             += ppc/pixblockdsp.o
 OBJS-$(CONFIG_VIDEODSP)                += ppc/videodsp_ppc.o
 OBJS-$(CONFIG_VP3DSP)                  += ppc/vp3dsp_altivec.o
+OBJS-$(CONFIG_VP8DSP)                  += ppc/vp8dsp_altivec.o
 
 # decoders/encoders
 OBJS-$(CONFIG_LLAUDDSP)                += ppc/lossless_audiodsp_altivec.o
@@ -27,4 +28,3 @@ OBJS-$(CONFIG_SVQ1_ENCODER)            += ppc/svq1enc_altivec.o
 OBJS-$(CONFIG_VC1_DECODER)             += ppc/vc1dsp_altivec.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += ppc/vorbisdsp_altivec.o
 OBJS-$(CONFIG_VP7_DECODER)             += ppc/vp8dsp_altivec.o
-OBJS-$(CONFIG_VP8_DECODER)             += ppc/vp8dsp_altivec.o
diff --git a/libavcodec/ppc/blockdsp.c b/libavcodec/ppc/blockdsp.c
index 0059b3b4..45c492ab 100644
--- a/libavcodec/ppc/blockdsp.c
+++ b/libavcodec/ppc/blockdsp.c
@@ -143,27 +143,24 @@ static void clear_block_altivec(int16_t *block)
 }
 #endif /* HAVE_ALTIVEC */
 
-av_cold void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth)
+av_cold void ff_blockdsp_init_ppc(BlockDSPContext *c)
 {
     // common optimizations whether AltiVec is available or not
-    if (!high_bit_depth) {
-        switch (check_dcbzl_effect()) {
-        case 32:
-            c->clear_blocks = clear_blocks_dcbz32_ppc;
-            break;
-        case 128:
-            c->clear_blocks = clear_blocks_dcbz128_ppc;
-            break;
-        default:
-            break;
-        }
+    switch (check_dcbzl_effect()) {
+    case 32:
+        c->clear_blocks = clear_blocks_dcbz32_ppc;
+        break;
+    case 128:
+        c->clear_blocks = clear_blocks_dcbz128_ppc;
+        break;
+    default:
+        break;
     }
 
 #if HAVE_ALTIVEC
     if (!PPC_ALTIVEC(av_get_cpu_flags()))
         return;
 
-    if (!high_bit_depth)
-        c->clear_block = clear_block_altivec;
+    c->clear_block = clear_block_altivec;
 #endif /* HAVE_ALTIVEC */
 }
diff --git a/libavcodec/ppc/fdctdsp.c b/libavcodec/ppc/fdctdsp.c
index 40f4c6c9..6659046f 100644
--- a/libavcodec/ppc/fdctdsp.c
+++ b/libavcodec/ppc/fdctdsp.c
@@ -37,27 +37,26 @@
 #define vu16(v) ((vector unsigned short) (v))
 #define vu32(v)   ((vector unsigned int) (v))
 
-#define C1     0.98078525066375732421875000 /* cos(1 * PI / 16) */
-#define C2     0.92387950420379638671875000 /* cos(2 * PI / 16) */
-#define C3     0.83146959543228149414062500 /* cos(3 * PI / 16) */
-#define C4     0.70710676908493041992187500 /* cos(4 * PI / 16) */
-#define C5     0.55557024478912353515625000 /* cos(5 * PI / 16) */
-#define C6     0.38268342614173889160156250 /* cos(6 * PI / 16) */
-#define C7     0.19509032368659973144531250 /* cos(7 * PI / 16) */
-#define SQRT_2 1.41421353816986083984375000 /* sqrt(2)          */
+#define C1     0.98078528040323044912618224 /* cos(1 * PI / 16) */
+#define C2     0.92387953251128675612818319 /* cos(2 * PI / 16) */
+#define C3     0.83146961230254523707878838 /* cos(3 * PI / 16) */
+#define C4     0.70710678118654752440084436 /* cos(4 * PI / 16) */
+#define C5     0.55557023301960222474283081 /* cos(5 * PI / 16) */
+#define C6     0.38268343236508977172845998 /* cos(6 * PI / 16) */
+#define C7     0.19509032201612826784828487 /* cos(7 * PI / 16) */
 
 #define W0 -(2 * C2)
 #define W1  (2 * C6)
-#define W2 (SQRT_2 * C6)
-#define W3 (SQRT_2 * C3)
-#define W4 (SQRT_2 * (-C1 + C3 + C5 - C7))
-#define W5 (SQRT_2 *  (C1 + C3 - C5 + C7))
-#define W6 (SQRT_2 *  (C1 + C3 + C5 - C7))
-#define W7 (SQRT_2 *  (C1 + C3 - C5 - C7))
-#define W8 (SQRT_2 *  (C7 - C3))
-#define W9 (SQRT_2 * (-C1 - C3))
-#define WA (SQRT_2 * (-C3 - C5))
-#define WB (SQRT_2 *  (C5 - C3))
+#define W2 (M_SQRT2 * C6)
+#define W3 (M_SQRT2 * C3)
+#define W4 (M_SQRT2 * (-C1 + C3 + C5 - C7))
+#define W5 (M_SQRT2 *  (C1 + C3 - C5 + C7))
+#define W6 (M_SQRT2 *  (C1 + C3 + C5 - C7))
+#define W7 (M_SQRT2 *  (C1 + C3 - C5 - C7))
+#define W8 (M_SQRT2 *  (C7 - C3))
+#define W9 (M_SQRT2 * (-C1 - C3))
+#define WA (M_SQRT2 * (-C3 - C5))
+#define WB (M_SQRT2 *  (C5 - C3))
 
 static const vector float fdctconsts[3] = {
     { W0, W1, W2, W3 },
diff --git a/libavcodec/ppc/h264dsp.c b/libavcodec/ppc/h264dsp.c
index 3822c7f5..4a048a17 100644
--- a/libavcodec/ppc/h264dsp.c
+++ b/libavcodec/ppc/h264dsp.c
@@ -524,7 +524,7 @@ static inline vec_u8 h264_deblock_q1(register vec_u8 p0,
 
     register vec_u8 average = vec_avg(p0, q0);
     register vec_u8 temp;
-    register vec_u8 uncliped;
+    register vec_u8 unclipped;
     register vec_u8 ones;
     register vec_u8 max;
     register vec_u8 min;
@@ -534,10 +534,10 @@ static inline vec_u8 h264_deblock_q1(register vec_u8 p0,
     average = vec_avg(average, p2);     /*avg(p2, avg(p0, q0)) */
     ones = vec_splat_u8(1);
     temp = vec_and(temp, ones);         /*(p2^avg(p0, q0)) & 1 */
-    uncliped = vec_subs(average, temp); /*(p2+((p0+q0+1)>>1))>>1 */
+    unclipped = vec_subs(average, temp); /*(p2+((p0+q0+1)>>1))>>1 */
     max = vec_adds(p1, tc0);
     min = vec_subs(p1, tc0);
-    newp1 = vec_max(min, uncliped);
+    newp1 = vec_max(min, unclipped);
     newp1 = vec_min(max, newp1);
     return newp1;
 }
diff --git a/libavcodec/ppc/idctdsp.c b/libavcodec/ppc/idctdsp.c
index ea56a709..80e71fda 100644
--- a/libavcodec/ppc/idctdsp.c
+++ b/libavcodec/ppc/idctdsp.c
@@ -261,7 +261,7 @@ av_cold void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx,
         return;
 
     if (!high_bit_depth && avctx->lowres == 0) {
-        if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & CODEC_FLAG_BITEXACT)) ||
+        if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & AV_CODEC_FLAG_BITEXACT)) ||
             (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
             c->idct      = idct_altivec;
             c->idct_add  = idct_add_altivec;
diff --git a/libavcodec/profiles.c b/libavcodec/profiles.c
new file mode 100644
index 00000000..94069fd6
--- /dev/null
+++ b/libavcodec/profiles.c
@@ -0,0 +1,132 @@
+/*
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "avcodec.h"
+#include "profiles.h"
+
+#if !CONFIG_SMALL
+
+const AVProfile ff_aac_profiles[] = {
+    { FF_PROFILE_AAC_LOW,   "LC"       },
+    { FF_PROFILE_AAC_HE,    "HE-AAC"   },
+    { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
+    { FF_PROFILE_AAC_LD,    "LD"       },
+    { FF_PROFILE_AAC_ELD,   "ELD"      },
+    { FF_PROFILE_AAC_MAIN,  "Main" },
+    { FF_PROFILE_AAC_LOW,   "LC"   },
+    { FF_PROFILE_AAC_SSR,   "SSR"  },
+    { FF_PROFILE_AAC_LTP,   "LTP"  },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_dca_profiles[] = {
+    { FF_PROFILE_DTS,         "DTS"         },
+    { FF_PROFILE_DTS_ES,      "DTS-ES"      },
+    { FF_PROFILE_DTS_96_24,   "DTS 96/24"   },
+    { FF_PROFILE_DTS_HD_HRA,  "DTS-HD HRA"  },
+    { FF_PROFILE_DTS_HD_MA,   "DTS-HD MA"   },
+    { FF_PROFILE_DTS_EXPRESS, "DTS Express" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_h264_profiles[] = {
+    { FF_PROFILE_H264_BASELINE,             "Baseline"              },
+    { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
+    { FF_PROFILE_H264_MAIN,                 "Main"                  },
+    { FF_PROFILE_H264_EXTENDED,             "Extended"              },
+    { FF_PROFILE_H264_HIGH,                 "High"                  },
+    { FF_PROFILE_H264_HIGH_10,              "High 10"               },
+    { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
+    { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
+    { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
+    { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
+    { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
+    { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
+    { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_hevc_profiles[] = {
+    { FF_PROFILE_HEVC_MAIN,                 "Main"                },
+    { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
+    { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
+    { FF_PROFILE_HEVC_REXT,                 "Rext"                },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_jpeg2000_profiles[] = {
+    { FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_0,  "JPEG 2000 codestream restriction 0"   },
+    { FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_1,  "JPEG 2000 codestream restriction 1"   },
+    { FF_PROFILE_JPEG2000_CSTREAM_NO_RESTRICTION, "JPEG 2000 no codestream restrictions" },
+    { FF_PROFILE_JPEG2000_DCINEMA_2K,             "JPEG 2000 digital cinema 2K"          },
+    { FF_PROFILE_JPEG2000_DCINEMA_4K,             "JPEG 2000 digital cinema 4K"          },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_mpeg2_video_profiles[] = {
+    { FF_PROFILE_MPEG2_422,          "4:2:2"              },
+    { FF_PROFILE_MPEG2_HIGH,         "High"               },
+    { FF_PROFILE_MPEG2_SS,           "Spatially Scalable" },
+    { FF_PROFILE_MPEG2_SNR_SCALABLE, "SNR Scalable"       },
+    { FF_PROFILE_MPEG2_MAIN,         "Main"               },
+    { FF_PROFILE_MPEG2_SIMPLE,       "Simple"             },
+    { FF_PROFILE_RESERVED,           "Reserved"           },
+    { FF_PROFILE_RESERVED,           "Reserved"           },
+    { FF_PROFILE_UNKNOWN                                  },
+};
+
+const AVProfile ff_mpeg4_video_profiles[] = {
+    { FF_PROFILE_MPEG4_SIMPLE,                    "Simple Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_SCALABLE,           "Simple Scalable Profile" },
+    { FF_PROFILE_MPEG4_CORE,                      "Core Profile" },
+    { FF_PROFILE_MPEG4_MAIN,                      "Main Profile" },
+    { FF_PROFILE_MPEG4_N_BIT,                     "N-bit Profile" },
+    { FF_PROFILE_MPEG4_SCALABLE_TEXTURE,          "Scalable Texture Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION,     "Simple Face Animation Profile" },
+    { FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE,    "Basic Animated Texture Profile" },
+    { FF_PROFILE_MPEG4_HYBRID,                    "Hybrid Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_REAL_TIME,        "Advanced Real Time Simple Profile" },
+    { FF_PROFILE_MPEG4_CORE_SCALABLE,             "Code Scalable Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_CODING,           "Advanced Coding Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_CORE,             "Advanced Core Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE, "Advanced Scalable Texture Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_STUDIO,             "Simple Studio Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_SIMPLE,           "Advanced Simple Profile" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_vc1_profiles[] = {
+    { FF_PROFILE_VC1_SIMPLE,   "Simple"   },
+    { FF_PROFILE_VC1_MAIN,     "Main"     },
+    { FF_PROFILE_VC1_COMPLEX,  "Complex"  },
+    { FF_PROFILE_VC1_ADVANCED, "Advanced" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_vp9_profiles[] = {
+    { FF_PROFILE_VP9_0, "Profile 0" },
+    { FF_PROFILE_VP9_1, "Profile 1" },
+    { FF_PROFILE_VP9_2, "Profile 2" },
+    { FF_PROFILE_VP9_3, "Profile 3" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+#endif /* !CONFIG_SMALL */
diff --git a/libavcodec/profiles.h b/libavcodec/profiles.h
new file mode 100644
index 00000000..7e1f74d0
--- /dev/null
+++ b/libavcodec/profiles.h
@@ -0,0 +1,35 @@
+/*
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PROFILES_H
+#define AVCODEC_PROFILES_H
+
+#include "avcodec.h"
+
+extern const AVProfile ff_aac_profiles[];
+extern const AVProfile ff_dca_profiles[];
+extern const AVProfile ff_h264_profiles[];
+extern const AVProfile ff_hevc_profiles[];
+extern const AVProfile ff_jpeg2000_profiles[];
+extern const AVProfile ff_mpeg2_video_profiles[];
+extern const AVProfile ff_mpeg4_video_profiles[];
+extern const AVProfile ff_vc1_profiles[];
+extern const AVProfile ff_vp9_profiles[];
+
+#endif
diff --git a/libavcodec/proresdec2.c b/libavcodec/proresdec2.c
index a1d497f0..2d47a130 100644
--- a/libavcodec/proresdec2.c
+++ b/libavcodec/proresdec2.c
@@ -28,6 +28,7 @@
 
 #define LONG_BITSTREAM_READER
 
+#include "libavutil/internal.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "idctdsp.h"
@@ -70,14 +71,14 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
     const uint8_t *ptr;
 
     hdr_size = AV_RB16(buf);
-    av_dlog(avctx, "header size %d\n", hdr_size);
+    ff_dlog(avctx, "header size %d\n", hdr_size);
     if (hdr_size > data_size) {
         av_log(avctx, AV_LOG_ERROR, "error, wrong header size\n");
         return AVERROR_INVALIDDATA;
     }
 
     version = AV_RB16(buf + 2);
-    av_dlog(avctx, "%.4s version %d\n", buf+4, version);
+    ff_dlog(avctx, "%.4s version %d\n", buf+4, version);
     if (version > 1) {
         av_log(avctx, AV_LOG_ERROR, "unsupported version: %d\n", version);
         return AVERROR_PATCHWELCOME;
@@ -100,7 +101,7 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
     }
     if (avctx->skip_alpha) ctx->alpha_info = 0;
 
-    av_dlog(avctx, "frame type %d\n", ctx->frame_type);
+    ff_dlog(avctx, "frame type %d\n", ctx->frame_type);
 
     if (ctx->frame_type == 0) {
         ctx->scan = ctx->progressive_scan; // permuted
@@ -118,7 +119,7 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
 
     ptr   = buf + 20;
     flags = buf[19];
-    av_dlog(avctx, "flags %x\n", flags);
+    ff_dlog(avctx, "flags %x\n", flags);
 
     if (flags & 2) {
         if(buf + data_size - ptr < 64) {
@@ -179,7 +180,10 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
     else
         ctx->mb_height = (avctx->height + 15) >> 4;
 
-    slice_count = AV_RB16(buf + 5);
+    // QT ignores the written value
+    // slice_count = AV_RB16(buf + 5);
+    slice_count = ctx->mb_height * ((ctx->mb_width >> log2_slice_mb_width) +
+                                    av_popcount(ctx->mb_width & (1 << log2_slice_mb_width) - 1));
 
     if (ctx->slice_count != slice_count || !ctx->slices) {
         av_freep(&ctx->slices);
@@ -574,7 +578,7 @@ static int decode_slice_thread(AVCodecContext *avctx, void *arg, int jobnr, int
     if (ret < 0)
         return ret;
 
-    if (!(avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!(avctx->flags & AV_CODEC_FLAG_GRAY)) {
         ret = decode_slice_chroma(avctx, slice, (uint16_t*)dest_u, chroma_stride,
                                   buf + y_data_size, u_data_size,
                                   qmat_chroma_scaled, log2_chroma_blocks_per_mb);
@@ -686,5 +690,5 @@ AVCodec ff_prores_decoder = {
     .init           = decode_init,
     .close          = decode_close,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS,
 };
diff --git a/libavcodec/proresdec_lgpl.c b/libavcodec/proresdec_lgpl.c
index 4bdf3924..467a423f 100644
--- a/libavcodec/proresdec_lgpl.c
+++ b/libavcodec/proresdec_lgpl.c
@@ -251,7 +251,7 @@ static int decode_picture_header(ProresContext *ctx, const uint8_t *buf,
                       (1 << (4 + ctx->frame->interlaced_frame)) - 1) >>
                      (4 + ctx->frame->interlaced_frame);
 
-    remainder    = ctx->num_x_mbs & ((1 << slice_width_factor) - 1);
+    remainder    = av_mod_uintp2(ctx->num_x_mbs, slice_width_factor);
     num_x_slices = (ctx->num_x_mbs >> slice_width_factor) + (remainder & 1) +
                    ((remainder >> 1) & 1) + ((remainder >> 2) & 1);
 
@@ -780,5 +780,5 @@ AVCodec ff_prores_lgpl_decoder = {
     .init           = decode_init,
     .close          = decode_close,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS,
 };
diff --git a/libavcodec/proresenc_anatoliy.c b/libavcodec/proresenc_anatoliy.c
index 48eb44ec..05160661 100644
--- a/libavcodec/proresenc_anatoliy.c
+++ b/libavcodec/proresenc_anatoliy.c
@@ -323,7 +323,7 @@ static av_always_inline unsigned encode_slice_data(AVCodecContext *avctx,
     *y_data_size = encode_slice_plane(avctx, mb_count, dest_y, luma_stride,
             buf, data_size, ctx->qmat_luma[qp - 1], 0);
 
-    if (!(avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!(avctx->flags & AV_CODEC_FLAG_GRAY)) {
         *u_data_size = encode_slice_plane(avctx, mb_count, dest_u,
                 chroma_stride, buf + *y_data_size, data_size - *y_data_size,
                 ctx->qmat_chroma[qp - 1], 1);
@@ -491,10 +491,10 @@ static int prores_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int header_size = 148;
     uint8_t *buf;
     int pic_size, ret;
-    int frame_size = FFALIGN(avctx->width, 16) * FFALIGN(avctx->height, 16)*16 + 500 + FF_MIN_BUFFER_SIZE; //FIXME choose tighter limit
+    int frame_size = FFALIGN(avctx->width, 16) * FFALIGN(avctx->height, 16)*16 + 500 + AV_INPUT_BUFFER_MIN_SIZE; //FIXME choose tighter limit
 
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, frame_size + FF_MIN_BUFFER_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, frame_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;
 
     buf = pkt->data;
@@ -590,19 +590,12 @@ static av_cold int prores_encode_init(AVCodecContext *avctx)
         scale_mat(QMAT_CHROMA[avctx->profile], ctx->qmat_chroma[i - 1], i);
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-    avctx->coded_frame->key_frame = 1;
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-
     return 0;
 }
 
 static av_cold int prores_encode_close(AVCodecContext *avctx)
 {
     ProresContext* ctx = avctx->priv_data;
-    av_frame_free(&avctx->coded_frame);
     av_freep(&ctx->fill_y);
 
     return 0;
@@ -618,7 +611,7 @@ AVCodec ff_prores_aw_encoder = {
     .close          = prores_encode_close,
     .encode2        = prores_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_NONE},
-    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .profiles       = profiles
 };
 
@@ -632,6 +625,6 @@ AVCodec ff_prores_encoder = {
     .close          = prores_encode_close,
     .encode2        = prores_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_NONE},
-    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .profiles       = profiles
 };
diff --git a/libavcodec/proresenc_kostya.c b/libavcodec/proresenc_kostya.c
index 18413681..3bc1d5d7 100644
--- a/libavcodec/proresenc_kostya.c
+++ b/libavcodec/proresenc_kostya.c
@@ -199,6 +199,7 @@ typedef struct ProresContext {
                  int linesize, int16_t *block);
     FDCTDSPContext fdsp;
 
+    const AVFrame *pic;
     int mb_width, mb_height;
     int mbs_per_slice;
     int num_chroma_blocks, chroma_factor;
@@ -745,7 +746,7 @@ static int estimate_alpha_plane(ProresContext *ctx, int *error,
     return bits;
 }
 
-static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
+static int find_slice_quant(AVCodecContext *avctx,
                             int trellis_node, int x, int y, int mbs_per_slice,
                             ProresThreadData *td)
 {
@@ -767,7 +768,7 @@ static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
     if (ctx->pictures_per_frame == 1)
         line_add = 0;
     else
-        line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
+        line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
     mbs = x + mbs_per_slice;
 
     for (i = 0; i < ctx->num_planes; i++) {
@@ -787,9 +788,9 @@ static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
             pwidth         = avctx->width >> 1;
         }
 
-        linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
-        src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
-                                line_add * pic->linesize[i]) + xp;
+        linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
+        src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
+                                 line_add * ctx->pic->linesize[i]) + xp;
 
         if (i < 3) {
             get_slice_data(ctx, src, linesize[i], xp, yp,
@@ -912,7 +913,7 @@ static int find_quant_thread(AVCodecContext *avctx, void *arg,
     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
         while (ctx->mb_width - x < mbs_per_slice)
             mbs_per_slice >>= 1;
-        q = find_slice_quant(avctx, arg,
+        q = find_slice_quant(avctx,
                              (mb + 1) * TRELLIS_WIDTH, x, y,
                              mbs_per_slice, td);
     }
@@ -940,9 +941,10 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
     uint8_t frame_flags;
 
+    ctx->pic = pic;
     pkt_size = ctx->frame_size_upper_bound;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + FF_MIN_BUFFER_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;
 
     orig_buf = pkt->data;
@@ -961,7 +963,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     bytestream_put_be16  (&buf, avctx->height);
 
     frame_flags = ctx->chroma_factor << 6;
-    if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
+    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
     bytestream_put_byte  (&buf, frame_flags);
 
@@ -1090,8 +1092,6 @@ static av_cold int encode_close(AVCodecContext *avctx)
     ProresContext *ctx = avctx->priv_data;
     int i;
 
-    av_frame_free(&avctx->coded_frame);
-
     if (ctx->tdata) {
         for (i = 0; i < avctx->thread_count; i++)
             av_freep(&ctx->tdata[i].nodes);
@@ -1122,14 +1122,15 @@ static av_cold int encode_init(AVCodecContext *avctx)
     int mps;
     int i, j;
     int min_quant, max_quant;
-    int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
+    int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
 
     avctx->bits_per_raw_sample = 10;
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     ctx->fdct      = prores_fdct;
     ctx->scantable = interlaced ? ff_prores_interlaced_scan
@@ -1346,7 +1347,7 @@ AVCodec ff_prores_ks_encoder = {
     .init           = encode_init,
     .close          = encode_close,
     .encode2        = encode_frame,
-    .capabilities   = CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
     .pix_fmts       = (const enum AVPixelFormat[]) {
                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c
index 824eefb7..6274a49e 100644
--- a/libavcodec/psymodel.c
+++ b/libavcodec/psymodel.c
@@ -39,6 +39,7 @@ av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
     ctx->group     = av_mallocz_array(sizeof(ctx->group[0]), num_groups);
     ctx->bands     = av_malloc_array (sizeof(ctx->bands[0]),      num_lens);
     ctx->num_bands = av_malloc_array (sizeof(ctx->num_bands[0]),  num_lens);
+    ctx->cutoff    = avctx->cutoff;
 
     if (!ctx->ch || !ctx->group || !ctx->bands || !ctx->num_bands) {
         ff_psy_end(ctx);
@@ -109,24 +110,20 @@ av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *av
         return NULL;
     ctx->avctx = avctx;
 
-    if (avctx->cutoff > 0)
-        cutoff_coeff = 2.0 * avctx->cutoff / avctx->sample_rate;
-
-    if (!cutoff_coeff && avctx->codec_id == AV_CODEC_ID_AAC)
-        cutoff_coeff = 2.0 * AAC_CUTOFF(avctx) / avctx->sample_rate;
-
-    if (cutoff_coeff && cutoff_coeff < 0.98)
-    ctx->fcoeffs = ff_iir_filter_init_coeffs(avctx, FF_FILTER_TYPE_BUTTERWORTH,
-                                             FF_FILTER_MODE_LOWPASS, FILT_ORDER,
-                                             cutoff_coeff, 0.0, 0.0);
-    if (ctx->fcoeffs) {
-        ctx->fstate = av_mallocz_array(sizeof(ctx->fstate[0]), avctx->channels);
-        if (!ctx->fstate) {
-            av_free(ctx);
-            return NULL;
+    /* AAC has its own LP method */
+    if (avctx->codec_id != AV_CODEC_ID_AAC) {
+        if (avctx->cutoff > 0)
+            cutoff_coeff = 2.0 * avctx->cutoff / avctx->sample_rate;
+
+        if (cutoff_coeff && cutoff_coeff < 0.98)
+        ctx->fcoeffs = ff_iir_filter_init_coeffs(avctx, FF_FILTER_TYPE_BUTTERWORTH,
+                                                 FF_FILTER_MODE_LOWPASS, FILT_ORDER,
+                                                 cutoff_coeff, 0.0, 0.0);
+        if (ctx->fcoeffs) {
+            ctx->fstate = av_mallocz(sizeof(ctx->fstate[0]) * avctx->channels);
+            for (i = 0; i < avctx->channels; i++)
+                ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER);
         }
-        for (i = 0; i < avctx->channels; i++)
-            ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER);
     }
 
     ff_iir_filter_init(&ctx->fiir);
diff --git a/libavcodec/psymodel.h b/libavcodec/psymodel.h
index 75261ba4..35d184c7 100644
--- a/libavcodec/psymodel.h
+++ b/libavcodec/psymodel.h
@@ -29,7 +29,20 @@
 /** maximum number of channels */
 #define PSY_MAX_CHANS 20
 
-#define AAC_CUTOFF(s) ((s)->bit_rate ? FFMIN3(4000 + (s)->bit_rate/8, 12000 + (s)->bit_rate/32, (s)->sample_rate / 2) : ((s)->sample_rate / 2))
+/* cutoff for VBR is purposedly increased, since LP filtering actually
+ * hinders VBR performance rather than the opposite
+ */
+#define AAC_CUTOFF_FROM_BITRATE(bit_rate,channels,sample_rate) (bit_rate ? FFMIN3(FFMIN3( \
+    FFMAX(bit_rate/channels/5, bit_rate/channels*15/32 - 5500), \
+    3000 + bit_rate/channels/4, \
+    12000 + bit_rate/channels/16), \
+    22000, \
+    sample_rate / 2): (sample_rate / 2))
+#define AAC_CUTOFF(s) ( \
+    (s->flags & CODEC_FLAG_QSCALE) \
+    ? s->sample_rate / 2 \
+    : AAC_CUTOFF_FROM_BITRATE(s->bit_rate, s->channels, s->sample_rate) \
+)
 
 /**
  * single band psychoacoustic information
@@ -38,8 +51,7 @@ typedef struct FFPsyBand {
     int   bits;
     float energy;
     float threshold;
-    float distortion;
-    float perceptual_weight;
+    float spread;    /* Energy spread over the band */
 } FFPsyBand;
 
 /**
@@ -67,6 +79,7 @@ typedef struct FFPsyWindowInfo {
     int window_shape;                 ///< window shape (sine/KBD/whatever)
     int num_windows;                  ///< number of windows in a frame
     int grouping[8];                  ///< window grouping (for e.g. AAC)
+    float clipping[8];                ///< maximum absolute normalized intensity in the given window for clip avoidance
     int *window_sizes;                ///< sequence of window sizes inside one frame (for eg. WMA)
 } FFPsyWindowInfo;
 
@@ -80,6 +93,7 @@ typedef struct FFPsyContext {
     FFPsyChannel      *ch;            ///< single channel information
     FFPsyChannelGroup *group;         ///< channel group information
     int num_groups;                   ///< number of channel groups
+    int cutoff;                       ///< lowpass frequency cutoff for analysis
 
     uint8_t **bands;                  ///< scalefactor band sizes for possible frame sizes
     int     *num_bands;               ///< number of scalefactor bands for possible frame sizes
@@ -88,6 +102,7 @@ typedef struct FFPsyContext {
     struct {
         int size;                     ///< size of the bitresevoir in bits
         int bits;                     ///< number of bits used in the bitresevoir
+        int alloc;                    ///< number of bits allocated by the psy, or -1 if no allocation was done
     } bitres;
 
     void* model_priv_data;            ///< psychoacoustic model implementation private data
diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c
index 407ca2e2..57247158 100644
--- a/libavcodec/pthread.c
+++ b/libavcodec/pthread.c
@@ -45,18 +45,18 @@
  */
 static void validate_thread_parameters(AVCodecContext *avctx)
 {
-    int frame_threading_supported = (avctx->codec->capabilities & CODEC_CAP_FRAME_THREADS)
-                                && !(avctx->flags & CODEC_FLAG_TRUNCATED)
-                                && !(avctx->flags & CODEC_FLAG_LOW_DELAY)
-                                && !(avctx->flags2 & CODEC_FLAG2_CHUNKS);
+    int frame_threading_supported = (avctx->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)
+                                && !(avctx->flags  & AV_CODEC_FLAG_TRUNCATED)
+                                && !(avctx->flags  & AV_CODEC_FLAG_LOW_DELAY)
+                                && !(avctx->flags2 & AV_CODEC_FLAG2_CHUNKS);
     if (avctx->thread_count == 1) {
         avctx->active_thread_type = 0;
     } else if (frame_threading_supported && (avctx->thread_type & FF_THREAD_FRAME)) {
         avctx->active_thread_type = FF_THREAD_FRAME;
-    } else if (avctx->codec->capabilities & CODEC_CAP_SLICE_THREADS &&
+    } else if (avctx->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS &&
                avctx->thread_type & FF_THREAD_SLICE) {
         avctx->active_thread_type = FF_THREAD_SLICE;
-    } else if (!(avctx->codec->capabilities & CODEC_CAP_AUTO_THREADS)) {
+    } else if (!(avctx->codec->capabilities & AV_CODEC_CAP_AUTO_THREADS)) {
         avctx->thread_count       = 1;
         avctx->active_thread_type = 0;
     }
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 77bb6fdb..b77dd1e5 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -26,14 +26,6 @@
 
 #include <stdint.h>
 
-#if HAVE_PTHREADS
-#include <pthread.h>
-#elif HAVE_W32THREADS
-#include "compat/w32pthreads.h"
-#elif HAVE_OS2THREADS
-#include "compat/os2threads.h"
-#endif
-
 #include "avcodec.h"
 #include "internal.h"
 #include "pthread_internal.h"
@@ -49,6 +41,7 @@
 #include "libavutil/log.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
+#include "libavutil/thread.h"
 
 /**
  * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
@@ -122,13 +115,8 @@ typedef struct FrameThreadContext {
     int die;                       ///< Set when threads should exit.
 } FrameThreadContext;
 
-#if FF_API_GET_BUFFER
-#define THREAD_SAFE_CALLBACKS(avctx) \
-((avctx)->thread_safe_callbacks || (!(avctx)->get_buffer && (avctx)->get_buffer2 == avcodec_default_get_buffer2))
-#else
 #define THREAD_SAFE_CALLBACKS(avctx) \
 ((avctx)->thread_safe_callbacks || (avctx)->get_buffer2 == avcodec_default_get_buffer2)
-#endif
 
 /**
  * Codec worker thread.
@@ -192,6 +180,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
  * @param dst The destination context.
  * @param src The source context.
  * @param for_user 0 if the destination is a codec thread, 1 if the destination is the user's thread
+ * @return 0 on success, negative error code on failure
  */
 static int update_context_from_thread(AVCodecContext *dst, AVCodecContext *src, int for_user)
 {
@@ -242,7 +231,11 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
     if (for_user) {
         dst->delay       = src->thread_count - 1;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
         dst->coded_frame = src->coded_frame;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     } else {
         if (dst->codec->update_thread_context)
             err = dst->codec->update_thread_context(dst, src);
@@ -265,12 +258,6 @@ static int update_context_from_user(AVCodecContext *dst, AVCodecContext *src)
 
     dst->draw_horiz_band= src->draw_horiz_band;
     dst->get_buffer2    = src->get_buffer2;
-#if FF_API_GET_BUFFER
-FF_DISABLE_DEPRECATION_WARNINGS
-    dst->get_buffer     = src->get_buffer;
-    dst->release_buffer = src->release_buffer;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
 
     dst->opaque   = src->opaque;
     dst->debug    = src->debug;
@@ -327,7 +314,8 @@ static int submit_packet(PerThreadContext *p, AVPacket *avpkt)
     PerThreadContext *prev_thread = fctx->prev_thread;
     const AVCodec *codec = p->avctx->codec;
 
-    if (!avpkt->size && !(codec->capabilities & CODEC_CAP_DELAY)) return 0;
+    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
+        return 0;
 
     pthread_mutex_lock(&p->mutex);
 
@@ -362,14 +350,9 @@ static int submit_packet(PerThreadContext *p, AVPacket *avpkt)
      * and it calls back to the client here.
      */
 
-FF_DISABLE_DEPRECATION_WARNINGS
     if (!p->avctx->thread_safe_callbacks && (
          p->avctx->get_format != avcodec_default_get_format ||
-#if FF_API_GET_BUFFER
-         p->avctx->get_buffer ||
-#endif
          p->avctx->get_buffer2 != avcodec_default_get_buffer2)) {
-FF_ENABLE_DEPRECATION_WARNINGS
         while (p->state != STATE_SETUP_FINISHED && p->state != STATE_INPUT_READY) {
             int call_done = 1;
             pthread_mutex_lock(&p->progress_mutex);
@@ -475,7 +458,7 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
     fctx->next_finished = finished;
 
     /*
-     * When no frame was found while flushing, but an error occured in
+     * When no frame was found while flushing, but an error occurred in
      * any thread, return it instead of 0.
      * Otherwise the error can get lost.
      */
@@ -803,13 +786,8 @@ static int thread_get_buffer_internal(AVCodecContext *avctx, ThreadFrame *f, int
     }
 
     pthread_mutex_lock(&p->parent->buffer_mutex);
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->thread_safe_callbacks || (
-#if FF_API_GET_BUFFER
-        !avctx->get_buffer &&
-#endif
-        avctx->get_buffer2 == avcodec_default_get_buffer2)) {
-FF_ENABLE_DEPRECATION_WARNINGS
+    if (avctx->thread_safe_callbacks ||
+        avctx->get_buffer2 == avcodec_default_get_buffer2) {
         err = ff_get_buffer(avctx, f->f, flags);
     } else {
         pthread_mutex_lock(&p->progress_mutex);
@@ -828,7 +806,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
     }
     if (!THREAD_SAFE_CALLBACKS(avctx) && !avctx->codec->update_thread_context)
         ff_thread_finish_setup(avctx);
-
     if (err)
         av_buffer_unref(&f->progress);
 
@@ -876,15 +853,9 @@ void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f)
     PerThreadContext *p = avctx->internal->thread_ctx;
     FrameThreadContext *fctx;
     AVFrame *dst, *tmp;
-FF_DISABLE_DEPRECATION_WARNINGS
     int can_direct_free = !(avctx->active_thread_type & FF_THREAD_FRAME) ||
                           avctx->thread_safe_callbacks                   ||
-                          (
-#if FF_API_GET_BUFFER
-                           !avctx->get_buffer &&
-#endif
-                           avctx->get_buffer2 == avcodec_default_get_buffer2);
-FF_ENABLE_DEPRECATION_WARNINGS
+                          avctx->get_buffer2 == avcodec_default_get_buffer2;
 
     if (!f->f || !f->f->buf[0])
         return;
diff --git a/libavcodec/pthread_slice.c b/libavcodec/pthread_slice.c
index c8e69f0a..96a7643f 100644
--- a/libavcodec/pthread_slice.c
+++ b/libavcodec/pthread_slice.c
@@ -24,22 +24,16 @@
 
 #include "config.h"
 
-#if HAVE_PTHREADS
-#include <pthread.h>
-#elif HAVE_W32THREADS
-#include "compat/w32pthreads.h"
-#elif HAVE_OS2THREADS
-#include "compat/os2threads.h"
-#endif
-
 #include "avcodec.h"
 #include "internal.h"
 #include "pthread_internal.h"
 #include "thread.h"
 
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "libavutil/cpu.h"
 #include "libavutil/mem.h"
+#include "libavutil/thread.h"
 
 typedef int (action_func)(AVCodecContext *c, void *arg);
 typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr);
@@ -50,7 +44,6 @@ typedef struct SliceThreadContext {
     action_func2 *func2;
     void *args;
     int *rets;
-    int rets_count;
     int job_count;
     int job_size;
 
@@ -80,6 +73,7 @@ static void* attribute_align_arg worker(void *v)
     pthread_mutex_lock(&c->current_job_lock);
     self_id = c->current_job++;
     for (;;){
+        int ret;
         while (our_job >= c->job_count) {
             if (c->current_job == thread_count + c->job_count)
                 pthread_cond_signal(&c->last_job_cond);
@@ -96,8 +90,10 @@ static void* attribute_align_arg worker(void *v)
         }
         pthread_mutex_unlock(&c->current_job_lock);
 
-        c->rets[our_job%c->rets_count] = c->func ? c->func(avctx, (char*)c->args + our_job*c->job_size):
-                                                   c->func2(avctx, c->args, our_job, self_id);
+        ret = c->func ? c->func(avctx, (char*)c->args + our_job*c->job_size):
+                                c->func2(avctx, c->args, our_job, self_id);
+        if (c->rets)
+            c->rets[our_job%c->job_count] = ret;
 
         pthread_mutex_lock(&c->current_job_lock);
         our_job = c->current_job++;
@@ -146,7 +142,6 @@ static av_always_inline void thread_park_workers(SliceThreadContext *c, int thre
 static int thread_execute(AVCodecContext *avctx, action_func* func, void *arg, int *ret, int job_count, int job_size)
 {
     SliceThreadContext *c = avctx->internal->thread_ctx;
-    int dummy_ret;
 
     if (!(avctx->active_thread_type&FF_THREAD_SLICE) || avctx->thread_count <= 1)
         return avcodec_default_execute(avctx, func, arg, ret, job_count, job_size);
@@ -163,10 +158,8 @@ static int thread_execute(AVCodecContext *avctx, action_func* func, void *arg, i
     c->func = func;
     if (ret) {
         c->rets = ret;
-        c->rets_count = job_count;
     } else {
-        c->rets = &dummy_ret;
-        c->rets_count = 1;
+        c->rets = NULL;
     }
     c->current_execute++;
     pthread_cond_broadcast(&c->current_job_cond);
@@ -193,6 +186,12 @@ int ff_slice_thread_init(AVCodecContext *avctx)
     w32thread_init();
 #endif
 
+    // We cannot do this in the encoder init as the threads are created before
+    if (av_codec_is_encoder(avctx->codec) &&
+        avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
+        avctx->height > 2800)
+        thread_count = avctx->thread_count = 1;
+
     if (!thread_count) {
         int nb_cpus = av_cpu_count();
         if  (avctx->height)
@@ -277,11 +276,19 @@ int ff_alloc_entries(AVCodecContext *avctx, int count)
 
     if (avctx->active_thread_type & FF_THREAD_SLICE)  {
         SliceThreadContext *p = avctx->internal->thread_ctx;
+
+        if (p->entries) {
+            av_assert0(p->thread_count == avctx->thread_count);
+            av_freep(&p->entries);
+        }
+
         p->thread_count  = avctx->thread_count;
         p->entries       = av_mallocz_array(count, sizeof(int));
 
-        p->progress_mutex = av_malloc_array(p->thread_count, sizeof(pthread_mutex_t));
-        p->progress_cond  = av_malloc_array(p->thread_count, sizeof(pthread_cond_t));
+        if (!p->progress_mutex) {
+            p->progress_mutex = av_malloc_array(p->thread_count, sizeof(pthread_mutex_t));
+            p->progress_cond  = av_malloc_array(p->thread_count, sizeof(pthread_cond_t));
+        }
 
         if (!p->entries || !p->progress_mutex || !p->progress_cond) {
             av_freep(&p->entries);
diff --git a/libavcodec/ptx.c b/libavcodec/ptx.c
index 8c3abd7d..42147f4a 100644
--- a/libavcodec/ptx.c
+++ b/libavcodec/ptx.c
@@ -88,5 +88,5 @@ AVCodec ff_ptx_decoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_PTX,
     .decode         = ptx_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/put_bits.h b/libavcodec/put_bits.h
index 5b1bc8b8..68ed3911 100644
--- a/libavcodec/put_bits.h
+++ b/libavcodec/put_bits.h
@@ -105,7 +105,7 @@ static inline void flush_put_bits(PutBitContext *s)
         s->bit_buf <<= s->bit_left;
 #endif
     while (s->bit_left < 32) {
-        /* XXX: should test end of buffer */
+        av_assert0(s->buf_ptr < s->buf_end);
 #ifdef BITSTREAM_WRITER_LE
         *s->buf_ptr++ = s->bit_buf;
         s->bit_buf  >>= 8;
@@ -163,9 +163,13 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
 #ifdef BITSTREAM_WRITER_LE
     bit_buf |= value << (32 - bit_left);
     if (n >= bit_left) {
-        av_assert2(s->buf_ptr+3<s->buf_end);
-        AV_WL32(s->buf_ptr, bit_buf);
-        s->buf_ptr += 4;
+        if (3 < s->buf_end - s->buf_ptr) {
+            AV_WL32(s->buf_ptr, bit_buf);
+            s->buf_ptr += 4;
+        } else {
+            av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n");
+            av_assert2(0);
+        }
         bit_buf     = value >> bit_left;
         bit_left   += 32;
     }
@@ -177,9 +181,13 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
     } else {
         bit_buf   <<= bit_left;
         bit_buf    |= value >> (n - bit_left);
-        av_assert2(s->buf_ptr+3<s->buf_end);
-        AV_WB32(s->buf_ptr, bit_buf);
-        s->buf_ptr += 4;
+        if (3 < s->buf_end - s->buf_ptr) {
+            AV_WB32(s->buf_ptr, bit_buf);
+            s->buf_ptr += 4;
+        } else {
+            av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n");
+            av_assert2(0);
+        }
         bit_left   += 32 - n;
         bit_buf     = value;
     }
diff --git a/libavcodec/qcelpdec.c b/libavcodec/qcelpdec.c
index 22564edb..adb3e823 100644
--- a/libavcodec/qcelpdec.c
+++ b/libavcodec/qcelpdec.c
@@ -797,6 +797,6 @@ AVCodec ff_qcelp_decoder = {
     .id             = AV_CODEC_ID_QCELP,
     .init           = qcelp_decode_init,
     .decode         = qcelp_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .priv_data_size = sizeof(QCELPContext),
 };
diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index a02c5e53..0b6dcd68 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -1888,5 +1888,5 @@ AVCodec ff_qdm2_decoder = {
     .init             = qdm2_decode_init,
     .close            = qdm2_decode_close,
     .decode           = qdm2_decode_frame,
-    .capabilities     = CODEC_CAP_DR1,
+    .capabilities     = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/qdrw.c b/libavcodec/qdrw.c
index 838f8365..0a31b416 100644
--- a/libavcodec/qdrw.c
+++ b/libavcodec/qdrw.c
@@ -339,5 +339,5 @@ AVCodec ff_qdraw_decoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_QDRAW,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/qpeg.c b/libavcodec/qpeg.c
index 71f322b8..9eaf9b80 100644
--- a/libavcodec/qpeg.c
+++ b/libavcodec/qpeg.c
@@ -351,5 +351,5 @@ AVCodec ff_qpeg_decoder = {
     .close          = decode_end,
     .decode         = decode_frame,
     .flush          = decode_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/qpeldsp.c b/libavcodec/qpeldsp.c
index 1c0ec637..6e52b336 100644
--- a/libavcodec/qpeldsp.c
+++ b/libavcodec/qpeldsp.c
@@ -811,4 +811,6 @@ av_cold void ff_qpeldsp_init(QpelDSPContext *c)
 
     if (ARCH_X86)
         ff_qpeldsp_init_x86(c);
+    if (ARCH_MIPS)
+        ff_qpeldsp_init_mips(c);
 }
diff --git a/libavcodec/qpeldsp.h b/libavcodec/qpeldsp.h
index b51420a6..91019eda 100644
--- a/libavcodec/qpeldsp.h
+++ b/libavcodec/qpeldsp.h
@@ -78,5 +78,6 @@ typedef struct QpelDSPContext {
 void ff_qpeldsp_init(QpelDSPContext *c);
 
 void ff_qpeldsp_init_x86(QpelDSPContext *c);
+void ff_qpeldsp_init_mips(QpelDSPContext *c);
 
 #endif /* AVCODEC_QPELDSP_H */
diff --git a/libavcodec/qsv.c b/libavcodec/qsv.c
index 31be9d1f..4c8e6b01 100644
--- a/libavcodec/qsv.c
+++ b/libavcodec/qsv.c
@@ -19,7 +19,12 @@
  */
 
 #include <mfx/mfxvideo.h>
+#include <mfx/mfxplugin.h>
 
+#include <stdio.h>
+#include <string.h>
+
+#include "libavutil/avstring.h"
 #include "libavutil/error.h"
 
 #include "avcodec.h"
@@ -30,6 +35,10 @@ int ff_qsv_codec_id_to_mfx(enum AVCodecID codec_id)
     switch (codec_id) {
     case AV_CODEC_ID_H264:
         return MFX_CODEC_AVC;
+#if QSV_VERSION_ATLEAST(1, 8)
+    case AV_CODEC_ID_HEVC:
+        return MFX_CODEC_HEVC;
+#endif
     case AV_CODEC_ID_MPEG1VIDEO:
     case AV_CODEC_ID_MPEG2VIDEO:
         return MFX_CODEC_MPEG2;
@@ -76,8 +85,91 @@ int ff_qsv_error(int mfx_err)
         return AVERROR_UNKNOWN;
     }
 }
+static int ff_qsv_set_display_handle(AVCodecContext *avctx, QSVSession *qs)
+{
+    // this code is only required for Linux.  It searches for a valid
+    // display handle.  First in /dev/dri/renderD then in /dev/dri/card
+#ifdef AVCODEC_QSV_LINUX_SESSION_HANDLE
+    // VAAPI display handle
+    int ret = 0;
+    VADisplay va_dpy = NULL;
+    VAStatus va_res = VA_STATUS_SUCCESS;
+    int major_version = 0, minor_version = 0;
+    int fd = -1;
+    char adapterpath[256];
+    int adapter_num;
+
+    qs->fd_display = -1;
+    qs->va_display = NULL;
+
+    //search for valid graphics device
+    for (adapter_num = 0;adapter_num < 6;adapter_num++) {
+
+        if (adapter_num<3) {
+            snprintf(adapterpath,sizeof(adapterpath),
+                "/dev/dri/renderD%d", adapter_num+128);
+        } else {
+            snprintf(adapterpath,sizeof(adapterpath),
+                "/dev/dri/card%d", adapter_num-3);
+        }
+
+        fd = open(adapterpath, O_RDWR);
+        if (fd < 0) {
+            av_log(avctx, AV_LOG_ERROR,
+                "mfx init: %s fd open failed\n", adapterpath);
+            continue;
+        }
+
+        va_dpy = vaGetDisplayDRM(fd);
+        if (!va_dpy) {
+            av_log(avctx, AV_LOG_ERROR,
+                "mfx init: %s vaGetDisplayDRM failed\n", adapterpath);
+            close(fd);
+            continue;
+        }
 
-int ff_qsv_init_internal_session(AVCodecContext *avctx, mfxSession *session)
+        va_res = vaInitialize(va_dpy, &major_version, &minor_version);
+        if (VA_STATUS_SUCCESS != va_res) {
+            av_log(avctx, AV_LOG_ERROR,
+                "mfx init: %s vaInitialize failed\n", adapterpath);
+            close(fd);
+            fd = -1;
+            continue;
+        } else {
+            av_log(avctx, AV_LOG_VERBOSE,
+            "mfx initialization: %s vaInitialize successful\n",adapterpath);
+            qs->fd_display = fd;
+            qs->va_display = va_dpy;
+            ret = MFXVideoCORE_SetHandle(qs->session,
+                  (mfxHandleType)MFX_HANDLE_VA_DISPLAY, (mfxHDL)va_dpy);
+            if (ret < 0) {
+                av_log(avctx, AV_LOG_ERROR,
+                "Error %d during set display handle\n", ret);
+                return ff_qsv_error(ret);
+            }
+            break;
+        }
+    }
+#endif //AVCODEC_QSV_LINUX_SESSION_HANDLE
+    return 0;
+}
+/**
+ * @brief Initialize a MSDK session
+ *
+ * Media SDK is based on sessions, so this is the prerequisite
+ * initialization for HW acceleration.  For Windows the session is
+ * complete and ready to use, for Linux a display handle is
+ * required.  For releases of Media Server Studio >= 2015 R4 the
+ * render nodes interface is preferred (/dev/dri/renderD).
+ * Using Media Server Studio 2015 R4 or newer is recommended
+ * but the older /dev/dri/card interface is also searched
+ * for broader compatibility.
+ *
+ * @param avctx    ffmpeg metadata for this codec context
+ * @param session  the MSDK session used
+ */
+int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs,
+                                 const char *load_plugins)
 {
     mfxIMPL impl   = MFX_IMPL_AUTO_ANY;
     mfxVersion ver = { { QSV_VERSION_MINOR, QSV_VERSION_MAJOR } };
@@ -85,13 +177,17 @@ int ff_qsv_init_internal_session(AVCodecContext *avctx, mfxSession *session)
     const char *desc;
     int ret;
 
-    ret = MFXInit(impl, &ver, session);
+    ret = MFXInit(impl, &ver, &qs->session);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error initializing an internal MFX session\n");
         return ff_qsv_error(ret);
     }
 
-    MFXQueryIMPL(*session, &impl);
+    ret = ff_qsv_set_display_handle(avctx, qs);
+    if (ret < 0)
+        return ret;
+
+    MFXQueryIMPL(qs->session, &impl);
 
     switch (MFX_IMPL_BASETYPE(impl)) {
     case MFX_IMPL_SOFTWARE:
@@ -107,9 +203,67 @@ int ff_qsv_init_internal_session(AVCodecContext *avctx, mfxSession *session)
         desc = "unknown";
     }
 
+    if (load_plugins && *load_plugins) {
+        while (*load_plugins) {
+            mfxPluginUID uid;
+            int i, err = 0;
+
+            char *plugin = av_get_token(&load_plugins, ":");
+            if (!plugin)
+                return AVERROR(ENOMEM);
+            if (strlen(plugin) != 2 * sizeof(uid.Data)) {
+                av_log(avctx, AV_LOG_ERROR, "Invalid plugin UID length\n");
+                err = AVERROR(EINVAL);
+                goto load_plugin_fail;
+            }
+
+            for (i = 0; i < sizeof(uid.Data); i++) {
+                err = sscanf(plugin + 2 * i, "%2hhx", uid.Data + i);
+                if (err != 1) {
+                    av_log(avctx, AV_LOG_ERROR, "Invalid plugin UID\n");
+                    err = AVERROR(EINVAL);
+                    goto load_plugin_fail;
+                }
+
+            }
+
+            ret = MFXVideoUSER_Load(qs->session, &uid, 1);
+            if (ret < 0) {
+                av_log(avctx, AV_LOG_ERROR, "Could not load the requested plugin: %s\n",
+                       plugin);
+                err = ff_qsv_error(ret);
+                goto load_plugin_fail;
+            }
+
+load_plugin_fail:
+            av_freep(&plugin);
+            if (err < 0)
+                return err;
+        }
+    }
+
     av_log(avctx, AV_LOG_VERBOSE,
            "Initialized an internal MFX session using %s implementation\n",
            desc);
 
     return 0;
 }
+
+int ff_qsv_close_internal_session(QSVSession *qs)
+{
+    if (qs->session) {
+        MFXClose(qs->session);
+        qs->session = NULL;
+    }
+#ifdef AVCODEC_QSV_LINUX_SESSION_HANDLE
+    if (qs->va_display) {
+        vaTerminate(qs->va_display);
+        qs->va_display = NULL;
+    }
+    if (qs->fd_display > 0) {
+        close(qs->fd_display);
+        qs->fd_display = -1;
+    }
+#endif
+    return 0;
+}
diff --git a/libavcodec/qsv.h b/libavcodec/qsv.h
index e7487c88..b77158ec 100644
--- a/libavcodec/qsv.h
+++ b/libavcodec/qsv.h
@@ -23,12 +23,78 @@
 
 #include <mfx/mfxvideo.h>
 
+#include "libavutil/buffer.h"
+
+/**
+ * This struct is used for communicating QSV parameters between libavcodec and
+ * the caller. It is managed by the caller and must be assigned to
+ * AVCodecContext.hwaccel_context.
+ * - decoding: hwaccel_context must be set on return from the get_format()
+ *             callback
+ * - encoding: hwaccel_context must be set before avcodec_open2()
+ */
 typedef struct AVQSVContext {
+    /**
+     * If non-NULL, the session to use for encoding or decoding.
+     * Otherwise, libavcodec will try to create an internal session.
+     */
     mfxSession session;
+
+    /**
+     * The IO pattern to use.
+     */
     int iopattern;
 
+    /**
+     * Extra buffers to pass to encoder or decoder initialization.
+     */
     mfxExtBuffer **ext_buffers;
     int         nb_ext_buffers;
+
+    /**
+     * Encoding only. If this field is set to non-zero by the caller, libavcodec
+     * will create an mfxExtOpaqueSurfaceAlloc extended buffer and pass it to
+     * the encoder initialization. This only makes sense if iopattern is also
+     * set to MFX_IOPATTERN_IN_OPAQUE_MEMORY.
+     *
+     * The number of allocated opaque surfaces will be the sum of the number
+     * required by the encoder and the user-provided value nb_opaque_surfaces.
+     * The array of the opaque surfaces will be exported to the caller through
+     * the opaque_surfaces field.
+     */
+    int opaque_alloc;
+
+    /**
+     * Encoding only, and only if opaque_alloc is set to non-zero. Before
+     * calling avcodec_open2(), the caller should set this field to the number
+     * of extra opaque surfaces to allocate beyond what is required by the
+     * encoder.
+     *
+     * On return from avcodec_open2(), this field will be set by libavcodec to
+     * the total number of allocated opaque surfaces.
+     */
+    int nb_opaque_surfaces;
+
+    /**
+     * Encoding only, and only if opaque_alloc is set to non-zero. On return
+     * from avcodec_open2(), this field will be used by libavcodec to export the
+     * array of the allocated opaque surfaces to the caller, so they can be
+     * passed to other parts of the pipeline.
+     *
+     * The buffer reference exported here is owned and managed by libavcodec,
+     * the callers should make their own reference with av_buffer_ref() and free
+     * it with av_buffer_unref() when it is no longer needed.
+     *
+     * The buffer data is an nb_opaque_surfaces-sized array of mfxFrameSurface1.
+     */
+    AVBufferRef *opaque_surfaces;
+
+    /**
+     * Encoding only, and only if opaque_alloc is set to non-zero. On return
+     * from avcodec_open2(), this field will be set to the surface type used in
+     * the opaque allocation request.
+     */
+    int opaque_alloc_type;
 } AVQSVContext;
 
 /**
diff --git a/libavcodec/qsv_internal.h b/libavcodec/qsv_internal.h
index 86fca5fd..c235e07c 100644
--- a/libavcodec/qsv_internal.h
+++ b/libavcodec/qsv_internal.h
@@ -21,24 +21,56 @@
 #ifndef AVCODEC_QSV_INTERNAL_H
 #define AVCODEC_QSV_INTERNAL_H
 
+#if CONFIG_VAAPI
+#define AVCODEC_QSV_LINUX_SESSION_HANDLE
+#endif //CONFIG_VAAPI
+
+#ifdef AVCODEC_QSV_LINUX_SESSION_HANDLE
+#include <stdio.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <fcntl.h>
+#include <va/va.h>
+#include <va/va_drm.h>
+#endif
+
 #include <mfx/mfxvideo.h>
 
 #include "libavutil/frame.h"
 
 #define QSV_VERSION_MAJOR 1
-#define QSV_VERSION_MINOR 1
+#define QSV_VERSION_MINOR 9
 
 #define ASYNC_DEPTH_DEFAULT 4       // internal parallelism
 
+#define QSV_MAX_ENC_PAYLOAD 2       // # of mfxEncodeCtrl payloads supported
+
+#define QSV_VERSION_ATLEAST(MAJOR, MINOR)   \
+    (MFX_VERSION_MAJOR > (MAJOR) ||         \
+     MFX_VERSION_MAJOR == (MAJOR) && MFX_VERSION_MINOR >= (MINOR))
+
 typedef struct QSVFrame {
     AVFrame *frame;
     mfxFrameSurface1 *surface;
+    mfxEncodeCtrl enc_ctrl;
 
     mfxFrameSurface1 surface_internal;
 
+    int queued;
+
     struct QSVFrame *next;
 } QSVFrame;
 
+typedef struct QSVSession {
+    mfxSession session;
+#ifdef AVCODEC_QSV_LINUX_SESSION_HANDLE
+    int        fd_display;
+    VADisplay  va_display;
+#endif
+} QSVSession;
+
 /**
  * Convert a libmfx error code into a ffmpeg error code.
  */
@@ -46,6 +78,8 @@ int ff_qsv_error(int mfx_err);
 
 int ff_qsv_codec_id_to_mfx(enum AVCodecID codec_id);
 
-int ff_qsv_init_internal_session(AVCodecContext *avctx, mfxSession *session);
+int ff_qsv_init_internal_session(AVCodecContext *avctx, QSVSession *qs,
+                                 const char *load_plugins);
+int ff_qsv_close_internal_session(QSVSession *qs);
 
 #endif /* AVCODEC_QSV_INTERNAL_H */
diff --git a/libavcodec/qsvdec.c b/libavcodec/qsvdec.c
index 47709b50..9125700e 100644
--- a/libavcodec/qsvdec.c
+++ b/libavcodec/qsvdec.c
@@ -34,6 +34,7 @@
 
 #include "avcodec.h"
 #include "internal.h"
+#include "qsv.h"
 #include "qsv_internal.h"
 #include "qsvdec.h"
 
@@ -48,65 +49,117 @@ int ff_qsv_map_pixfmt(enum AVPixelFormat format)
     }
 }
 
-static int qsv_init_session(AVCodecContext *avctx, QSVContext *q, mfxSession session)
+static int qsv_decode_init(AVCodecContext *avctx, QSVContext *q, AVPacket *avpkt)
 {
-    if (!session) {
-        if (!q->internal_session) {
-            int ret = ff_qsv_init_internal_session(avctx, &q->internal_session);
-            if (ret < 0)
-                return ret;
-        }
+    mfxVideoParam param = { { 0 } };
+    mfxBitstream bs   = { { { 0 } } };
+    int ret;
+    enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
+                                       AV_PIX_FMT_NV12,
+                                       AV_PIX_FMT_NONE };
 
-        q->session = q->internal_session;
-    } else {
-        q->session = session;
-    }
+    ret = ff_get_format(avctx, pix_fmts);
+    if (ret < 0)
+        return ret;
 
-    /* make sure the decoder is uninitialized */
-    MFXVideoDECODE_Close(q->session);
+    avctx->pix_fmt      = ret;
 
-    return 0;
-}
+    q->iopattern  = MFX_IOPATTERN_OUT_SYSTEM_MEMORY;
+    if (avctx->hwaccel_context) {
+        AVQSVContext *qsv = avctx->hwaccel_context;
 
-int ff_qsv_decode_init(AVCodecContext *avctx, QSVContext *q, mfxSession session)
-{
-    mfxVideoParam param = { { 0 } };
-    int ret;
+        q->session        = qsv->session;
+        q->iopattern      = qsv->iopattern;
+        q->ext_buffers    = qsv->ext_buffers;
+        q->nb_ext_buffers = qsv->nb_ext_buffers;
+    }
+    if (!q->session) {
+        if (!q->internal_qs.session) {
+            ret = ff_qsv_init_internal_session(avctx, &q->internal_qs,
+                                               q->load_plugins);
+            if (ret < 0)
+                return ret;
+        }
 
-    ret = qsv_init_session(avctx, q, session);
-    if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error initializing an MFX session\n");
-        return ret;
+        q->session = q->internal_qs.session;
     }
 
+    if (avpkt->size) {
+        bs.Data       = avpkt->data;
+        bs.DataLength = avpkt->size;
+        bs.MaxLength  = bs.DataLength;
+        bs.TimeStamp  = avpkt->pts;
+    } else
+        return AVERROR_INVALIDDATA;
 
     ret = ff_qsv_codec_id_to_mfx(avctx->codec_id);
-    if (ret < 0)
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Unsupported codec_id %08x\n", avctx->codec_id);
         return ret;
+    }
 
-    param.mfx.CodecId      = ret;
-    param.mfx.CodecProfile = avctx->profile;
-    param.mfx.CodecLevel   = avctx->level;
-
-    param.mfx.FrameInfo.BitDepthLuma   = 8;
-    param.mfx.FrameInfo.BitDepthChroma = 8;
-    param.mfx.FrameInfo.Shift          = 0;
-    param.mfx.FrameInfo.FourCC         = MFX_FOURCC_NV12;
-    param.mfx.FrameInfo.Width          = avctx->coded_width;
-    param.mfx.FrameInfo.Height         = avctx->coded_height;
-    param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;
+    param.mfx.CodecId = ret;
 
+    ret = MFXVideoDECODE_DecodeHeader(q->session, &bs, &param);
+    if (MFX_ERR_MORE_DATA==ret) {
+        /* this code means that header not found so we return packet size to skip
+           a current packet
+         */
+        return avpkt->size;
+    } else if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Decode header error %d\n", ret);
+        return ff_qsv_error(ret);
+    }
     param.IOPattern   = q->iopattern;
     param.AsyncDepth  = q->async_depth;
     param.ExtParam    = q->ext_buffers;
     param.NumExtParam = q->nb_ext_buffers;
+    param.mfx.FrameInfo.BitDepthLuma   = 8;
+    param.mfx.FrameInfo.BitDepthChroma = 8;
 
     ret = MFXVideoDECODE_Init(q->session, &param);
     if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error initializing the MFX video decoder\n");
+        if (MFX_ERR_INVALID_VIDEO_PARAM==ret) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Error initializing the MFX video decoder, unsupported video\n");
+        } else {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Error initializing the MFX video decoder %d\n", ret);
+        }
         return ff_qsv_error(ret);
     }
 
+    avctx->profile      = param.mfx.CodecProfile;
+    avctx->level        = param.mfx.CodecLevel;
+    avctx->coded_width  = param.mfx.FrameInfo.Width;
+    avctx->coded_height = param.mfx.FrameInfo.Height;
+    avctx->width        = param.mfx.FrameInfo.CropW - param.mfx.FrameInfo.CropX;
+    avctx->height       = param.mfx.FrameInfo.CropH - param.mfx.FrameInfo.CropY;
+
+    /* maximum decoder latency should be not exceed max DPB size for h.264 and
+       HEVC which is 16 for both cases.
+       So weare  pre-allocating fifo big enough for 17 elements:
+     */
+    if (!q->async_fifo) {
+        q->async_fifo = av_fifo_alloc((1 + 16) *
+                                      (sizeof(mfxSyncPoint) + sizeof(QSVFrame*)));
+        if (!q->async_fifo)
+            return AVERROR(ENOMEM);
+    }
+
+    if (!q->input_fifo) {
+        q->input_fifo = av_fifo_alloc(1024*16);
+        if (!q->input_fifo)
+            return AVERROR(ENOMEM);
+    }
+
+    if (!q->pkt_fifo) {
+        q->pkt_fifo = av_fifo_alloc( sizeof(AVPacket) * (1 + 16) );
+        if (!q->pkt_fifo)
+            return AVERROR(ENOMEM);
+    }
+    q->engine_ready = 1;
+
     return 0;
 }
 
@@ -142,7 +195,7 @@ static void qsv_clear_unused_frames(QSVContext *q)
 {
     QSVFrame *cur = q->work_frames;
     while (cur) {
-        if (cur->surface && !cur->surface->Data.Locked) {
+        if (cur->surface && !cur->surface->Data.Locked && !cur->queued) {
             cur->surface = NULL;
             av_frame_unref(cur->frame);
         }
@@ -191,70 +244,190 @@ static int get_surface(AVCodecContext *avctx, QSVContext *q, mfxFrameSurface1 **
     return 0;
 }
 
-static AVFrame *find_frame(QSVContext *q, mfxFrameSurface1 *surf)
+static QSVFrame *find_frame(QSVContext *q, mfxFrameSurface1 *surf)
 {
     QSVFrame *cur = q->work_frames;
     while (cur) {
         if (surf == cur->surface)
-            return cur->frame;
+            return cur;
         cur = cur->next;
     }
     return NULL;
 }
 
-int ff_qsv_decode(AVCodecContext *avctx, QSVContext *q,
+/*  This function uses for 'smart' releasing of consumed data
+    from the input bitstream fifo.
+    Since the input fifo mapped to mfxBitstream which does not understand
+    a wrapping of data over fifo end, we should also to relocate a possible
+    data rest to fifo begin. If rest of data is absent then we just reset fifo's
+    pointers to initial positions.
+    NOTE the case when fifo does contain unconsumed data is rare and typical
+    amount of such data is 1..4 bytes.
+*/
+static void qsv_fifo_relocate(AVFifoBuffer *f, int bytes_to_free)
+{
+    int data_size;
+    int data_rest = 0;
+
+    av_fifo_drain(f, bytes_to_free);
+
+    data_size = av_fifo_size(f);
+    if (data_size > 0) {
+        if (f->buffer!=f->rptr) {
+            if ( (f->end - f->rptr) < data_size) {
+                data_rest = data_size - (f->end - f->rptr);
+                data_size-=data_rest;
+                memmove(f->buffer+data_size, f->buffer, data_rest);
+            }
+            memmove(f->buffer, f->rptr, data_size);
+            data_size+= data_rest;
+        }
+    }
+    f->rptr = f->buffer;
+    f->wptr = f->buffer + data_size;
+    f->wndx = data_size;
+    f->rndx = 0;
+}
+
+
+static void close_decoder(QSVContext *q)
+{
+    QSVFrame *cur;
+
+    if (q->session)
+        MFXVideoDECODE_Close(q->session);
+
+    cur = q->work_frames;
+    while (cur) {
+        q->work_frames = cur->next;
+        av_frame_free(&cur->frame);
+        av_freep(&cur);
+        cur = q->work_frames;
+    }
+
+    q->engine_ready   = 0;
+    q->reinit_pending = 0;
+}
+
+static int do_qsv_decode(AVCodecContext *avctx, QSVContext *q,
                   AVFrame *frame, int *got_frame,
                   AVPacket *avpkt)
 {
+    QSVFrame *out_frame;
     mfxFrameSurface1 *insurf;
     mfxFrameSurface1 *outsurf;
     mfxSyncPoint sync;
     mfxBitstream bs = { { { 0 } } };
     int ret;
+    int n_out_frames;
+    int buffered = 0;
+    int flush    = !avpkt->size || q->reinit_pending;
 
-    if (avpkt->size) {
-        bs.Data       = avpkt->data;
-        bs.DataLength = avpkt->size;
+    if (!q->engine_ready) {
+        ret = qsv_decode_init(avctx, q, avpkt);
+        if (ret)
+            return ret;
+    }
+
+    if (!flush) {
+        if (av_fifo_size(q->input_fifo)) {
+            /* we have got rest of previous packet into buffer */
+            if (av_fifo_space(q->input_fifo) < avpkt->size) {
+                ret = av_fifo_grow(q->input_fifo, avpkt->size);
+                if (ret < 0)
+                    return ret;
+            }
+            av_fifo_generic_write(q->input_fifo, avpkt->data, avpkt->size, NULL);
+            bs.Data       = q->input_fifo->rptr;
+            bs.DataLength = av_fifo_size(q->input_fifo);
+            buffered = 1;
+        } else {
+            bs.Data       = avpkt->data;
+            bs.DataLength = avpkt->size;
+        }
         bs.MaxLength  = bs.DataLength;
         bs.TimeStamp  = avpkt->pts;
     }
 
-    do {
+    while (1) {
         ret = get_surface(avctx, q, &insurf);
         if (ret < 0)
             return ret;
+        do {
+            ret = MFXVideoDECODE_DecodeFrameAsync(q->session, flush ? NULL : &bs,
+                                                  insurf, &outsurf, &sync);
+            if (ret != MFX_WRN_DEVICE_BUSY)
+                break;
+            av_usleep(500);
+        } while (1);
+
+        if (MFX_WRN_VIDEO_PARAM_CHANGED==ret) {
+            /* TODO: handle here minor sequence header changing */
+        } else if (MFX_ERR_INCOMPATIBLE_VIDEO_PARAM==ret) {
+            av_fifo_reset(q->input_fifo);
+            flush = q->reinit_pending = 1;
+            continue;
+        }
+
+        if (sync) {
+            QSVFrame *out_frame = find_frame(q, outsurf);
 
-        ret = MFXVideoDECODE_DecodeFrameAsync(q->session, avpkt->size ? &bs : NULL,
-                                              insurf, &outsurf, &sync);
-        if (ret == MFX_WRN_DEVICE_BUSY)
-            av_usleep(1);
+            if (!out_frame) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "The returned surface does not correspond to any frame\n");
+                return AVERROR_BUG;
+            }
 
-    } while (ret == MFX_WRN_DEVICE_BUSY || ret == MFX_ERR_MORE_SURFACE);
+            out_frame->queued = 1;
+            av_fifo_generic_write(q->async_fifo, &out_frame, sizeof(out_frame), NULL);
+            av_fifo_generic_write(q->async_fifo, &sync,      sizeof(sync),      NULL);
 
-    if (ret != MFX_ERR_NONE &&
-        ret != MFX_ERR_MORE_DATA &&
-        ret != MFX_WRN_VIDEO_PARAM_CHANGED &&
-        ret != MFX_ERR_MORE_SURFACE) {
-        av_log(avctx, AV_LOG_ERROR, "Error during QSV decoding.\n");
+            continue;
+        }
+        if (MFX_ERR_MORE_SURFACE != ret && ret < 0)
+            break;
+    }
+
+    /* make sure we do not enter an infinite loop if the SDK
+     * did not consume any data and did not return anything */
+    if (!sync && !bs.DataOffset && !flush) {
+        av_log(avctx, AV_LOG_WARNING, "A decode call did not consume any data\n");
+        bs.DataOffset = avpkt->size;
+    }
+
+    if (buffered) {
+        qsv_fifo_relocate(q->input_fifo, bs.DataOffset);
+    } else if (bs.DataOffset!=avpkt->size) {
+        /* some data of packet was not consumed. store it to local buffer */
+        av_fifo_generic_write(q->input_fifo, avpkt->data+bs.DataOffset,
+                              avpkt->size - bs.DataOffset, NULL);
+    }
+
+    if (MFX_ERR_MORE_DATA!=ret && ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error %d during QSV decoding.\n", ret);
         return ff_qsv_error(ret);
     }
+    n_out_frames = av_fifo_size(q->async_fifo) / (sizeof(out_frame)+sizeof(sync));
 
-    if (sync) {
+    if (n_out_frames > q->async_depth || (flush && n_out_frames) ) {
         AVFrame *src_frame;
 
-        MFXVideoCORE_SyncOperation(q->session, sync, 60000);
+        av_fifo_generic_read(q->async_fifo, &out_frame, sizeof(out_frame), NULL);
+        av_fifo_generic_read(q->async_fifo, &sync,      sizeof(sync),      NULL);
+        out_frame->queued = 0;
 
-        src_frame = find_frame(q, outsurf);
-        if (!src_frame) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "The returned surface does not correspond to any frame\n");
-            return AVERROR_BUG;
-        }
+        do {
+            ret = MFXVideoCORE_SyncOperation(q->session, sync, 1000);
+        } while (ret == MFX_WRN_IN_EXECUTION);
+
+        src_frame = out_frame->frame;
 
         ret = av_frame_ref(frame, src_frame);
         if (ret < 0)
             return ret;
 
+        outsurf = out_frame->surface;
+
         frame->pkt_pts = frame->pts = outsurf->Data.TimeStamp;
 
         frame->repeat_pict =
@@ -269,22 +442,147 @@ int ff_qsv_decode(AVCodecContext *avctx, QSVContext *q,
         *got_frame = 1;
     }
 
-    return bs.DataOffset;
+    return avpkt->size;
 }
+/*
+ This function inserts a packet at fifo front.
+*/
+static void qsv_packet_push_front(QSVContext *q, AVPacket *avpkt)
+{
+    int fifo_size = av_fifo_size(q->pkt_fifo);
+    if (!fifo_size) {
+    /* easy case fifo is empty */
+        av_fifo_generic_write(q->pkt_fifo, avpkt, sizeof(*avpkt), NULL);
+    } else {
+    /* realloc necessary */
+        AVPacket pkt;
+        AVFifoBuffer *fifo = av_fifo_alloc(fifo_size+av_fifo_space(q->pkt_fifo));
 
-int ff_qsv_decode_close(QSVContext *q)
+        av_fifo_generic_write(fifo, avpkt, sizeof(*avpkt), NULL);
+
+        while (av_fifo_size(q->pkt_fifo)) {
+            av_fifo_generic_read(q->pkt_fifo, &pkt, sizeof(pkt), NULL);
+            av_fifo_generic_write(fifo,       &pkt, sizeof(pkt), NULL);
+        }
+        av_fifo_free(q->pkt_fifo);
+        q->pkt_fifo = fifo;
+    }
+}
+int ff_qsv_decode(AVCodecContext *avctx, QSVContext *q,
+                  AVFrame *frame, int *got_frame,
+                  AVPacket *avpkt)
 {
-    QSVFrame *cur = q->work_frames;
+    AVPacket pkt_ref = { 0 };
+    int ret = 0;
 
-    while (cur) {
-        q->work_frames = cur->next;
-        av_frame_free(&cur->frame);
-        av_freep(&cur);
+    if (q->pkt_fifo && av_fifo_size(q->pkt_fifo) >= sizeof(AVPacket)) {
+        /* we already have got some buffered packets. so add new to tail */
+        ret = av_packet_ref(&pkt_ref, avpkt);
+        if (ret < 0)
+            return ret;
+        av_fifo_generic_write(q->pkt_fifo, &pkt_ref, sizeof(pkt_ref), NULL);
+    }
+    if (q->reinit_pending) {
+        ret = do_qsv_decode(avctx, q, frame, got_frame, avpkt);
+
+        if (!*got_frame) {
+            /* Flushing complete, no more frames  */
+            close_decoder(q);
+            //return ff_qsv_decode(avctx, q, frame, got_frame, avpkt);
+        }
+    }
+    if (!q->reinit_pending) {
+        if (q->pkt_fifo && av_fifo_size(q->pkt_fifo) >= sizeof(AVPacket)) {
+            /* process buffered packets */
+            while (!*got_frame && av_fifo_size(q->pkt_fifo) >= sizeof(AVPacket)) {
+                av_fifo_generic_read(q->pkt_fifo, &pkt_ref, sizeof(pkt_ref), NULL);
+                ret = do_qsv_decode(avctx, q, frame, got_frame, &pkt_ref);
+                if (q->reinit_pending) {
+                    /*
+                       A rare case: new reinit pending when buffering existing.
+                       We should to return the pkt_ref back to same place of fifo
+                    */
+                    qsv_packet_push_front(q, &pkt_ref);
+                } else {
+                    av_packet_unref(&pkt_ref);
+                }
+           }
+        } else {
+            /* general decoding */
+            ret = do_qsv_decode(avctx, q, frame, got_frame, avpkt);
+            if (q->reinit_pending) {
+                ret = av_packet_ref(&pkt_ref, avpkt);
+                if (ret < 0)
+                    return ret;
+                av_fifo_generic_write(q->pkt_fifo, &pkt_ref, sizeof(pkt_ref), NULL);
+            }
+        }
+    }
+
+    return ret;
+}
+/*
+ This function resets decoder and corresponded buffers before seek operation
+*/
+void ff_qsv_decode_reset(AVCodecContext *avctx, QSVContext *q)
+{
+    QSVFrame *cur;
+    AVPacket pkt;
+    int ret = 0;
+    mfxVideoParam param = { { 0 } };
+
+    if (q->reinit_pending) {
+        close_decoder(q);
+    } else if (q->engine_ready) {
+        ret = MFXVideoDECODE_GetVideoParam(q->session, &param);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "MFX decode get param error %d\n", ret);
+        }
+
+        ret = MFXVideoDECODE_Reset(q->session, &param);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "MFX decode reset error %d\n", ret);
+        }
+
+        /* Free all frames*/
         cur = q->work_frames;
+        while (cur) {
+            q->work_frames = cur->next;
+            av_frame_free(&cur->frame);
+            av_freep(&cur);
+            cur = q->work_frames;
+        }
     }
 
-    if (q->internal_session)
-        MFXClose(q->internal_session);
+    /* Reset output surfaces */
+    av_fifo_reset(q->async_fifo);
+
+    /* Reset input packets fifo */
+    while (av_fifo_size(q->pkt_fifo)) {
+        av_fifo_generic_read(q->pkt_fifo, &pkt, sizeof(pkt), NULL);
+        av_packet_unref(&pkt);
+    }
+
+    /* Reset input bitstream fifo */
+    av_fifo_reset(q->input_fifo);
+}
+
+int ff_qsv_decode_close(QSVContext *q)
+{
+    close_decoder(q);
+
+    q->session = NULL;
+
+    ff_qsv_close_internal_session(&q->internal_qs);
+
+    av_fifo_free(q->async_fifo);
+    q->async_fifo = NULL;
+
+    av_fifo_free(q->input_fifo);
+    q->input_fifo = NULL;
+
+    av_fifo_free(q->pkt_fifo);
+    q->pkt_fifo = NULL;
 
     return 0;
 }
diff --git a/libavcodec/qsvdec.h b/libavcodec/qsvdec.h
index 373cc72b..97a3315b 100644
--- a/libavcodec/qsvdec.h
+++ b/libavcodec/qsvdec.h
@@ -28,6 +28,7 @@
 
 #include <mfx/mfxvideo.h>
 
+#include "libavutil/fifo.h"
 #include "libavutil/frame.h"
 #include "libavutil/pixfmt.h"
 
@@ -40,29 +41,49 @@ typedef struct QSVContext {
 
     // the session we allocated internally, in case the caller did not provide
     // one
-    mfxSession internal_session;
+    QSVSession internal_qs;
 
     /**
      * a linked list of frames currently being used by QSV
      */
     QSVFrame *work_frames;
 
+    AVFifoBuffer *async_fifo;
+    AVFifoBuffer *input_fifo;
+
+    // we should to buffer input packets at some cases
+    // else it is not possible to handle dynamic stream changes correctly
+    // this fifo uses for input packets buffering
+    AVFifoBuffer *pkt_fifo;
+
+    // this flag indicates that header parsed,
+    // decoder instance created and ready to general decoding
+    int engine_ready;
+
+    // we can not just re-init decoder if different sequence header arrived
+    // we should to deliver all buffered frames but we can not decode new packets
+    // this time. So when reinit_pending is non-zero we flushing decoder and
+    // accumulate new arrived packets into pkt_fifo
+    int reinit_pending;
+
     // options set by the caller
     int async_depth;
     int iopattern;
 
+    char *load_plugins;
+
     mfxExtBuffer **ext_buffers;
     int         nb_ext_buffers;
 } QSVContext;
 
 int ff_qsv_map_pixfmt(enum AVPixelFormat format);
 
-int ff_qsv_decode_init(AVCodecContext *s, QSVContext *q, mfxSession session);
-
 int ff_qsv_decode(AVCodecContext *s, QSVContext *q,
                   AVFrame *frame, int *got_frame,
                   AVPacket *avpkt);
 
+void ff_qsv_decode_reset(AVCodecContext *avctx, QSVContext *q);
+
 int ff_qsv_decode_close(QSVContext *q);
 
 #endif /* AVCODEC_QSVDEC_H */
diff --git a/libavcodec/qsvdec_h264.c b/libavcodec/qsvdec_h264.c
deleted file mode 100644
index 7eb7a6c1..00000000
--- a/libavcodec/qsvdec_h264.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
- * Intel MediaSDK QSV based H.264 decoder
- *
- * copyright (c) 2013 Luca Barbato
- * copyright (c) 2015 Anton Khirnov
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-
-#include <stdint.h>
-#include <string.h>
-
-#include <mfx/mfxvideo.h>
-
-#include "libavutil/common.h"
-#include "libavutil/fifo.h"
-#include "libavutil/opt.h"
-
-#include "avcodec.h"
-#include "internal.h"
-#include "qsv_internal.h"
-#include "qsvdec.h"
-#include "qsv.h"
-
-typedef struct QSVH264Context {
-    AVClass *class;
-    QSVContext qsv;
-
-    // the internal parser and codec context for parsing the data
-    AVCodecParserContext *parser;
-    AVCodecContext *avctx_internal;
-    enum AVPixelFormat orig_pix_fmt;
-
-    // the filter for converting to Annex B
-    AVBitStreamFilterContext *bsf;
-
-    AVFifoBuffer *packet_fifo;
-
-    AVPacket input_ref;
-    AVPacket pkt_filtered;
-    uint8_t *filtered_data;
-} QSVH264Context;
-
-static void qsv_clear_buffers(QSVH264Context *s)
-{
-    AVPacket pkt;
-    while (av_fifo_size(s->packet_fifo) >= sizeof(pkt)) {
-        av_fifo_generic_read(s->packet_fifo, &pkt, sizeof(pkt), NULL);
-        av_packet_unref(&pkt);
-    }
-
-    if (s->filtered_data != s->input_ref.data)
-        av_freep(&s->filtered_data);
-    s->filtered_data = NULL;
-    av_packet_unref(&s->input_ref);
-}
-
-static av_cold int qsv_decode_close(AVCodecContext *avctx)
-{
-    QSVH264Context *s = avctx->priv_data;
-
-    ff_qsv_decode_close(&s->qsv);
-
-    qsv_clear_buffers(s);
-
-    av_fifo_free(s->packet_fifo);
-
-    av_bitstream_filter_close(s->bsf);
-    av_parser_close(s->parser);
-    avcodec_free_context(&s->avctx_internal);
-
-    return 0;
-}
-
-static av_cold int qsv_decode_init(AVCodecContext *avctx)
-{
-    QSVH264Context *s = avctx->priv_data;
-    int ret;
-
-    s->orig_pix_fmt = AV_PIX_FMT_NONE;
-
-    s->packet_fifo = av_fifo_alloc(sizeof(AVPacket));
-    if (!s->packet_fifo) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    s->bsf = av_bitstream_filter_init("h264_mp4toannexb");
-    if (!s->bsf) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    s->avctx_internal = avcodec_alloc_context3(NULL);
-    if (!s->avctx_internal) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    if (avctx->extradata) {
-        s->avctx_internal->extradata = av_mallocz(avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
-        if (!s->avctx_internal->extradata) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
-        memcpy(s->avctx_internal->extradata, avctx->extradata,
-               avctx->extradata_size);
-        s->avctx_internal->extradata_size = avctx->extradata_size;
-    }
-
-    s->parser = av_parser_init(AV_CODEC_ID_H264);
-    if (!s->parser) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-    s->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
-
-    s->qsv.iopattern = MFX_IOPATTERN_OUT_SYSTEM_MEMORY;
-
-    return 0;
-fail:
-    qsv_decode_close(avctx);
-    return ret;
-}
-
-static int qsv_process_data(AVCodecContext *avctx, AVFrame *frame,
-                            int *got_frame, AVPacket *pkt)
-{
-    QSVH264Context *s = avctx->priv_data;
-    uint8_t *dummy_data;
-    int dummy_size;
-    int ret;
-
-    /* we assume the packets are already split properly and want
-     * just the codec parameters here */
-    av_parser_parse2(s->parser, s->avctx_internal,
-                     &dummy_data, &dummy_size,
-                     pkt->data, pkt->size, pkt->pts, pkt->dts,
-                     pkt->pos);
-
-    /* TODO: flush delayed frames on reinit */
-    if (s->parser->format       != s->orig_pix_fmt    ||
-        s->parser->coded_width  != avctx->coded_width ||
-        s->parser->coded_height != avctx->coded_height) {
-        mfxSession session = NULL;
-
-        enum AVPixelFormat pix_fmts[3] = { AV_PIX_FMT_QSV,
-                                           AV_PIX_FMT_NONE,
-                                           AV_PIX_FMT_NONE };
-        enum AVPixelFormat qsv_format;
-
-        qsv_format = ff_qsv_map_pixfmt(s->parser->format);
-        if (qsv_format < 0) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Only 8-bit YUV420 streams are supported.\n");
-            ret = AVERROR(ENOSYS);
-            goto reinit_fail;
-        }
-
-        s->orig_pix_fmt     = s->parser->format;
-        avctx->pix_fmt      = pix_fmts[1] = qsv_format;
-        avctx->width        = s->parser->width;
-        avctx->height       = s->parser->height;
-        avctx->coded_width  = s->parser->coded_width;
-        avctx->coded_height = s->parser->coded_height;
-        avctx->level        = s->avctx_internal->level;
-        avctx->profile      = s->avctx_internal->profile;
-
-        ret = ff_get_format(avctx, pix_fmts);
-        if (ret < 0)
-            goto reinit_fail;
-
-        avctx->pix_fmt = ret;
-
-        if (avctx->hwaccel_context) {
-            AVQSVContext *user_ctx = avctx->hwaccel_context;
-            session               = user_ctx->session;
-            s->qsv.iopattern      = user_ctx->iopattern;
-            s->qsv.ext_buffers    = user_ctx->ext_buffers;
-            s->qsv.nb_ext_buffers = user_ctx->nb_ext_buffers;
-        }
-
-        ret = ff_qsv_decode_init(avctx, &s->qsv, session);
-        if (ret < 0)
-            goto reinit_fail;
-    }
-
-    return ff_qsv_decode(avctx, &s->qsv, frame, got_frame, &s->pkt_filtered);
-
-reinit_fail:
-    s->orig_pix_fmt = s->parser->format = avctx->pix_fmt = AV_PIX_FMT_NONE;
-    return ret;
-}
-
-static int qsv_decode_frame(AVCodecContext *avctx, void *data,
-                            int *got_frame, AVPacket *avpkt)
-{
-    QSVH264Context *s = avctx->priv_data;
-    AVFrame *frame    = data;
-    int ret;
-
-    /* buffer the input packet */
-    if (avpkt->size) {
-        AVPacket input_ref = { 0 };
-
-        if (av_fifo_space(s->packet_fifo) < sizeof(input_ref)) {
-            ret = av_fifo_realloc2(s->packet_fifo,
-                                   av_fifo_size(s->packet_fifo) + sizeof(input_ref));
-            if (ret < 0)
-                return ret;
-        }
-
-        ret = av_packet_ref(&input_ref, avpkt);
-        if (ret < 0)
-            return ret;
-        av_fifo_generic_write(s->packet_fifo, &input_ref, sizeof(input_ref), NULL);
-    }
-
-    /* process buffered data */
-    while (!*got_frame) {
-        /* prepare the input data -- convert to Annex B if needed */
-        if (s->pkt_filtered.size <= 0) {
-            int size;
-
-            /* no more data */
-            if (av_fifo_size(s->packet_fifo) < sizeof(AVPacket))
-                return avpkt->size ? avpkt->size : ff_qsv_decode(avctx, &s->qsv, frame, got_frame, avpkt);
-
-            if (s->filtered_data != s->input_ref.data)
-                av_freep(&s->filtered_data);
-            s->filtered_data = NULL;
-            av_packet_unref(&s->input_ref);
-
-            av_fifo_generic_read(s->packet_fifo, &s->input_ref, sizeof(s->input_ref), NULL);
-            ret = av_bitstream_filter_filter(s->bsf, avctx, NULL,
-                                             &s->filtered_data, &size,
-                                             s->input_ref.data, s->input_ref.size, 0);
-            if (ret < 0) {
-                s->filtered_data = s->input_ref.data;
-                size             = s->input_ref.size;
-            }
-            s->pkt_filtered      = s->input_ref;
-            s->pkt_filtered.data = s->filtered_data;
-            s->pkt_filtered.size = size;
-        }
-
-        ret = qsv_process_data(avctx, frame, got_frame, &s->pkt_filtered);
-        if (ret < 0)
-            return ret;
-
-        s->pkt_filtered.size -= ret;
-        s->pkt_filtered.data += ret;
-    }
-
-    return avpkt->size;
-}
-
-static void qsv_decode_flush(AVCodecContext *avctx)
-{
-    QSVH264Context *s = avctx->priv_data;
-
-    qsv_clear_buffers(s);
-    s->orig_pix_fmt = AV_PIX_FMT_NONE;
-}
-
-AVHWAccel ff_h264_qsv_hwaccel = {
-    .name           = "h264_qsv",
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_H264,
-    .pix_fmt        = AV_PIX_FMT_QSV,
-};
-
-#define OFFSET(x) offsetof(QSVH264Context, x)
-#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
-static const AVOption options[] = {
-    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
-    { NULL },
-};
-
-static const AVClass class = {
-    .class_name = "h264_qsv",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
-AVCodec ff_h264_qsv_decoder = {
-    .name           = "h264_qsv",
-    .long_name      = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (Intel Quick Sync Video acceleration)"),
-    .priv_data_size = sizeof(QSVH264Context),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_H264,
-    .init           = qsv_decode_init,
-    .decode         = qsv_decode_frame,
-    .flush          = qsv_decode_flush,
-    .close          = qsv_decode_close,
-    .capabilities   = CODEC_CAP_DELAY,
-    .priv_class     = &class,
-};
diff --git a/libavcodec/qsvdec_h2645.c b/libavcodec/qsvdec_h2645.c
new file mode 100644
index 00000000..a396f31e
--- /dev/null
+++ b/libavcodec/qsvdec_h2645.c
@@ -0,0 +1,228 @@
+/*
+ * Intel MediaSDK QSV based H.264 / HEVC decoder
+ *
+ * copyright (c) 2013 Luca Barbato
+ * copyright (c) 2015 Anton Khirnov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include <stdint.h>
+#include <string.h>
+
+#include <mfx/mfxvideo.h>
+
+#include "libavutil/common.h"
+#include "libavutil/fifo.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "internal.h"
+#include "qsvdec.h"
+
+enum LoadPlugin {
+    LOAD_PLUGIN_NONE,
+    LOAD_PLUGIN_HEVC_SW,
+};
+
+typedef struct QSVH2645Context {
+    AVClass *class;
+    QSVContext qsv;
+
+    int load_plugin;
+
+    // the filter for converting to Annex B
+    AVBitStreamFilterContext *bsf;
+
+} QSVH2645Context;
+
+static av_cold int qsv_decode_close(AVCodecContext *avctx)
+{
+    QSVH2645Context *s = avctx->priv_data;
+
+    ff_qsv_decode_close(&s->qsv);
+
+    av_bitstream_filter_close(s->bsf);
+
+    return 0;
+}
+
+static av_cold int qsv_decode_init(AVCodecContext *avctx)
+{
+    QSVH2645Context *s = avctx->priv_data;
+    int ret;
+
+    if (avctx->codec_id == AV_CODEC_ID_HEVC && s->load_plugin != LOAD_PLUGIN_NONE) {
+        static const char *uid_hevcenc_sw = "15dd936825ad475ea34e35f3f54217a6";
+
+        if (s->qsv.load_plugins[0]) {
+            av_log(avctx, AV_LOG_WARNING,
+                   "load_plugins is not empty, but load_plugin is not set to 'none'."
+                   "The load_plugin value will be ignored.\n");
+        } else {
+            av_freep(&s->qsv.load_plugins);
+            s->qsv.load_plugins = av_strdup(uid_hevcenc_sw);
+            if (!s->qsv.load_plugins)
+                return AVERROR(ENOMEM);
+        }
+    }
+
+    if (avctx->codec_id == AV_CODEC_ID_H264)
+        s->bsf = av_bitstream_filter_init("h264_mp4toannexb");
+    else
+        s->bsf = av_bitstream_filter_init("hevc_mp4toannexb");
+    if (!s->bsf) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    return 0;
+fail:
+    qsv_decode_close(avctx);
+    return ret;
+}
+
+static int qsv_decode_frame(AVCodecContext *avctx, void *data,
+                            int *got_frame, AVPacket *avpkt)
+{
+    QSVH2645Context *s = avctx->priv_data;
+    AVFrame *frame    = data;
+    int ret;
+    uint8_t *p_filtered = NULL;
+    int      n_filtered = NULL;
+    AVPacket pkt_filtered = { 0 };
+
+    if (avpkt->size) {
+        if (avpkt->size > 3 && !avpkt->data[0] &&
+            !avpkt->data[1] && !avpkt->data[2] && 1==avpkt->data[3]) {
+            /* we already have annex-b prefix */
+            return ff_qsv_decode(avctx, &s->qsv, frame, got_frame, avpkt);
+
+        } else {
+            /* no annex-b prefix. try to restore: */
+            ret = av_bitstream_filter_filter(s->bsf, avctx, "private_spspps_buf",
+                                         &p_filtered, &n_filtered,
+                                         avpkt->data, avpkt->size, 0);
+            if (ret>=0) {
+                pkt_filtered.pts  = avpkt->pts;
+                pkt_filtered.data = p_filtered;
+                pkt_filtered.size = n_filtered;
+
+                ret = ff_qsv_decode(avctx, &s->qsv, frame, got_frame, &pkt_filtered);
+
+                if (p_filtered != avpkt->data)
+                    av_free(p_filtered);
+                return ret > 0 ? avpkt->size : ret;
+            }
+        }
+    }
+
+    return ff_qsv_decode(avctx, &s->qsv, frame, got_frame, avpkt);
+}
+
+static void qsv_decode_flush(AVCodecContext *avctx)
+{
+    QSVH2645Context *s = avctx->priv_data;
+    ff_qsv_decode_reset(avctx, &s->qsv);
+}
+
+#define OFFSET(x) offsetof(QSVH2645Context, x)
+#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+
+#if CONFIG_HEVC_QSV_DECODER
+AVHWAccel ff_hevc_qsv_hwaccel = {
+    .name           = "hevc_qsv",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_HEVC,
+    .pix_fmt        = AV_PIX_FMT_QSV,
+};
+
+static const AVOption hevc_options[] = {
+    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
+
+    { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_HEVC_SW }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_SW, VD, "load_plugin" },
+    { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_NONE },    0, 0, VD, "load_plugin" },
+    { "hevc_sw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_SW }, 0, 0, VD, "load_plugin" },
+
+    { "load_plugins", "A :-separate list of hexadecimal plugin UIDs to load in an internal session",
+        OFFSET(qsv.load_plugins), AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VD },
+    { NULL },
+};
+
+static const AVClass hevc_class = {
+    .class_name = "hevc_qsv",
+    .item_name  = av_default_item_name,
+    .option     = hevc_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_hevc_qsv_decoder = {
+    .name           = "hevc_qsv",
+    .long_name      = NULL_IF_CONFIG_SMALL("HEVC (Intel Quick Sync Video acceleration)"),
+    .priv_data_size = sizeof(QSVH2645Context),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_HEVC,
+    .init           = qsv_decode_init,
+    .decode         = qsv_decode_frame,
+    .flush          = qsv_decode_flush,
+    .close          = qsv_decode_close,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
+    .priv_class     = &hevc_class,
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
+                                                    AV_PIX_FMT_QSV,
+                                                    AV_PIX_FMT_NONE },
+};
+#endif
+
+#if CONFIG_H264_QSV_DECODER
+AVHWAccel ff_h264_qsv_hwaccel = {
+    .name           = "h264_qsv",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_H264,
+    .pix_fmt        = AV_PIX_FMT_QSV,
+};
+
+static const AVOption options[] = {
+    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
+    { NULL },
+};
+
+static const AVClass class = {
+    .class_name = "h264_qsv",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_h264_qsv_decoder = {
+    .name           = "h264_qsv",
+    .long_name      = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (Intel Quick Sync Video acceleration)"),
+    .priv_data_size = sizeof(QSVH2645Context),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_H264,
+    .init           = qsv_decode_init,
+    .decode         = qsv_decode_frame,
+    .flush          = qsv_decode_flush,
+    .close          = qsv_decode_close,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
+    .priv_class     = &class,
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
+                                                    AV_PIX_FMT_QSV,
+                                                    AV_PIX_FMT_NONE },
+};
+#endif
diff --git a/libavcodec/qsvdec_mpeg2.c b/libavcodec/qsvdec_mpeg2.c
new file mode 100644
index 00000000..d9052e0c
--- /dev/null
+++ b/libavcodec/qsvdec_mpeg2.c
@@ -0,0 +1,100 @@
+/*
+ * Intel MediaSDK QSV based MPEG2 video decoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "qsvdec.h"
+
+typedef struct QSVMPEG2Context {
+    AVClass *class;
+    QSVContext qsv;
+} QSVMPEG2Context;
+
+static av_cold int qsv_decode_close(AVCodecContext *avctx)
+{
+    QSVMPEG2Context *s = avctx->priv_data;
+
+    ff_qsv_decode_close(&s->qsv);
+
+    return 0;
+}
+
+static av_cold int qsv_decode_init(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+static int qsv_decode_frame(AVCodecContext *avctx, void *data,
+                            int *got_frame, AVPacket *avpkt)
+{
+    QSVMPEG2Context *s = avctx->priv_data;
+    AVFrame *frame    = data;
+
+    return ff_qsv_decode(avctx, &s->qsv, frame, got_frame, avpkt);
+}
+
+static void qsv_decode_flush(AVCodecContext *avctx)
+{
+    QSVMPEG2Context *s = avctx->priv_data;
+    ff_qsv_decode_reset(avctx, &s->qsv);
+}
+
+AVHWAccel ff_mpeg2_qsv_hwaccel = {
+    .name           = "mpeg2_qsv",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG2VIDEO,
+    .pix_fmt        = AV_PIX_FMT_QSV,
+};
+
+#define OFFSET(x) offsetof(QSVMPEG2Context, x)
+#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
+    { NULL },
+};
+
+static const AVClass class = {
+    .class_name = "mpeg2_qsv",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_mpeg2_qsv_decoder = {
+    .name           = "mpeg2_qsv",
+    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-2 video (Intel Quick Sync Video acceleration)"),
+    .priv_data_size = sizeof(QSVMPEG2Context),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG2VIDEO,
+    .init           = qsv_decode_init,
+    .decode         = qsv_decode_frame,
+    .flush          = qsv_decode_flush,
+    .close          = qsv_decode_close,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
+    .priv_class     = &class,
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
+                                                    AV_PIX_FMT_QSV,
+                                                    AV_PIX_FMT_NONE },
+};
diff --git a/libavcodec/qsvdec_vc1.c b/libavcodec/qsvdec_vc1.c
new file mode 100644
index 00000000..fcf101f7
--- /dev/null
+++ b/libavcodec/qsvdec_vc1.c
@@ -0,0 +1,97 @@
+/*
+ * Intel MediaSDK QSV based VC-1 video decoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/fifo.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "qsvdec.h"
+
+typedef struct QSVVC1Context {
+    AVClass *class;
+    QSVContext qsv;
+} QSVVC1Context;
+
+
+static av_cold int qsv_decode_close(AVCodecContext *avctx)
+{
+    QSVVC1Context *s = avctx->priv_data;
+
+    ff_qsv_decode_close(&s->qsv);
+
+    return 0;
+}
+
+static int qsv_decode_frame(AVCodecContext *avctx, void *data,
+                            int *got_frame, AVPacket *avpkt)
+{
+    QSVVC1Context *s = avctx->priv_data;
+    AVFrame *frame    = data;
+
+    return ff_qsv_decode(avctx, &s->qsv, frame, got_frame, avpkt);
+}
+
+static void qsv_decode_flush(AVCodecContext *avctx)
+{
+    QSVVC1Context *s = avctx->priv_data;
+    ff_qsv_decode_reset(avctx, &s->qsv);
+}
+
+AVHWAccel ff_vc1_qsv_hwaccel = {
+    .name           = "vc1_qsv",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_VC1,
+    .pix_fmt        = AV_PIX_FMT_QSV,
+};
+
+#define OFFSET(x) offsetof(QSVVC1Context, x)
+#define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    { "async_depth", "Internal parallelization depth, the higher the value the higher the latency.", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VD },
+    { NULL },
+};
+
+static const AVClass class = {
+    .class_name = "vc1_qsv",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_vc1_qsv_decoder = {
+    .name           = "vc1_qsv",
+    .long_name      = NULL_IF_CONFIG_SMALL("VC-1 video (Intel Quick Sync Video acceleration)"),
+    .priv_data_size = sizeof(QSVVC1Context),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_VC1,
+    .init           = NULL,
+    .decode         = qsv_decode_frame,
+    .flush          = qsv_decode_flush,
+    .close          = qsv_decode_close,
+    .capabilities   = AV_CODEC_CAP_DELAY,
+    .priv_class     = &class,
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
+                                                    AV_PIX_FMT_QSV,
+                                                    AV_PIX_FMT_NONE },
+};
diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c
index bcf3d73c..3bac8cf6 100644
--- a/libavcodec/qsvenc.c
+++ b/libavcodec/qsvenc.c
@@ -30,6 +30,7 @@
 #include "libavutil/log.h"
 #include "libavutil/time.h"
 #include "libavutil/imgutils.h"
+#include "libavcodec/bytestream.h"
 
 #include "avcodec.h"
 #include "internal.h"
@@ -37,10 +38,322 @@
 #include "qsv_internal.h"
 #include "qsvenc.h"
 
-static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
+static const struct {
+    mfxU16 profile;
+    const char *name;
+} profile_names[] = {
+    { MFX_PROFILE_AVC_BASELINE,                 "baseline"              },
+    { MFX_PROFILE_AVC_MAIN,                     "main"                  },
+    { MFX_PROFILE_AVC_EXTENDED,                 "extended"              },
+    { MFX_PROFILE_AVC_HIGH,                     "high"                  },
+#if QSV_VERSION_ATLEAST(1, 15)
+    { MFX_PROFILE_AVC_HIGH_422,                 "high 422"              },
+#endif
+#if QSV_VERSION_ATLEAST(1, 4)
+    { MFX_PROFILE_AVC_CONSTRAINED_BASELINE,     "constrained baseline"  },
+    { MFX_PROFILE_AVC_CONSTRAINED_HIGH,         "constrained high"      },
+    { MFX_PROFILE_AVC_PROGRESSIVE_HIGH,         "progressive high"      },
+#endif
+    { MFX_PROFILE_MPEG2_SIMPLE,                 "simple"                },
+    { MFX_PROFILE_MPEG2_MAIN,                   "main"                  },
+    { MFX_PROFILE_MPEG2_HIGH,                   "high"                  },
+    { MFX_PROFILE_VC1_SIMPLE,                   "simple"                },
+    { MFX_PROFILE_VC1_MAIN,                     "main"                  },
+    { MFX_PROFILE_VC1_ADVANCED,                 "advanced"              },
+#if QSV_VERSION_ATLEAST(1, 8)
+    { MFX_PROFILE_HEVC_MAIN,                    "main"                  },
+    { MFX_PROFILE_HEVC_MAIN10,                  "main10"                },
+    { MFX_PROFILE_HEVC_MAINSP,                  "mainsp"                },
+#endif
+};
+
+static const char *print_profile(mfxU16 profile)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(profile_names); i++)
+        if (profile == profile_names[i].profile)
+            return profile_names[i].name;
+    return "unknown";
+}
+
+static const struct {
+    mfxU16      rc_mode;
+    const char *name;
+} rc_names[] = {
+    { MFX_RATECONTROL_CBR,     "CBR" },
+    { MFX_RATECONTROL_VBR,     "VBR" },
+    { MFX_RATECONTROL_CQP,     "CQP" },
+    { MFX_RATECONTROL_AVBR,    "AVBR" },
+#if QSV_HAVE_LA
+    { MFX_RATECONTROL_LA,      "LA" },
+#endif
+#if QSV_HAVE_ICQ
+    { MFX_RATECONTROL_ICQ,     "ICQ" },
+    { MFX_RATECONTROL_LA_ICQ,  "LA_ICQ" },
+#endif
+#if QSV_HAVE_VCM
+    { MFX_RATECONTROL_VCM,     "VCM" },
+#endif
+#if QSV_VERSION_ATLEAST(1, 10)
+    { MFX_RATECONTROL_LA_EXT,  "LA_EXT" },
+#endif
+#if QSV_HAVE_LA_HRD
+    { MFX_RATECONTROL_LA_HRD,  "LA_HRD" },
+#endif
+#if QSV_HAVE_QVBR
+    { MFX_RATECONTROL_QVBR,    "QVBR" },
+#endif
+};
+
+static const char *print_ratecontrol(mfxU16 rc_mode)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(rc_names); i++)
+        if (rc_mode == rc_names[i].rc_mode)
+            return rc_names[i].name;
+    return "unknown";
+}
+
+static const char *print_threestate(mfxU16 val)
+{
+    if (val == MFX_CODINGOPTION_ON)
+        return "ON";
+    else if (val == MFX_CODINGOPTION_OFF)
+        return "OFF";
+    return "unknown";
+}
+
+static void dump_video_param(AVCodecContext *avctx, QSVEncContext *q,
+                             mfxExtBuffer **coding_opts)
+{
+    mfxInfoMFX *info = &q->param.mfx;
+
+    mfxExtCodingOption   *co = (mfxExtCodingOption*)coding_opts[0];
+#if QSV_HAVE_CO2
+    mfxExtCodingOption2 *co2 = (mfxExtCodingOption2*)coding_opts[1];
+#endif
+#if QSV_HAVE_CO3
+    mfxExtCodingOption3 *co3 = (mfxExtCodingOption3*)coding_opts[2];
+#endif
+
+    av_log(avctx, AV_LOG_VERBOSE, "profile: %s; level: %"PRIu16"\n",
+           print_profile(info->CodecProfile), info->CodecLevel);
+
+    av_log(avctx, AV_LOG_VERBOSE, "GopPicSize: %"PRIu16"; GopRefDist: %"PRIu16"; GopOptFlag: ",
+           info->GopPicSize, info->GopRefDist);
+    if (info->GopOptFlag & MFX_GOP_CLOSED)
+        av_log(avctx, AV_LOG_VERBOSE, "closed ");
+    if (info->GopOptFlag & MFX_GOP_STRICT)
+        av_log(avctx, AV_LOG_VERBOSE, "strict ");
+    av_log(avctx, AV_LOG_VERBOSE, "; IdrInterval: %"PRIu16"\n", info->IdrInterval);
+
+    av_log(avctx, AV_LOG_VERBOSE, "TargetUsage: %"PRIu16"; RateControlMethod: %s\n",
+           info->TargetUsage, print_ratecontrol(info->RateControlMethod));
+
+    if (info->RateControlMethod == MFX_RATECONTROL_CBR ||
+        info->RateControlMethod == MFX_RATECONTROL_VBR
+#if QSV_HAVE_VCM
+        || info->RateControlMethod == MFX_RATECONTROL_VCM
+#endif
+        ) {
+        av_log(avctx, AV_LOG_VERBOSE,
+               "InitialDelayInKB: %"PRIu16"; TargetKbps: %"PRIu16"; MaxKbps: %"PRIu16"\n",
+               info->InitialDelayInKB, info->TargetKbps, info->MaxKbps);
+    } else if (info->RateControlMethod == MFX_RATECONTROL_CQP) {
+        av_log(avctx, AV_LOG_VERBOSE, "QPI: %"PRIu16"; QPP: %"PRIu16"; QPB: %"PRIu16"\n",
+               info->QPI, info->QPP, info->QPB);
+    } else if (info->RateControlMethod == MFX_RATECONTROL_AVBR) {
+        av_log(avctx, AV_LOG_VERBOSE,
+               "TargetKbps: %"PRIu16"; Accuracy: %"PRIu16"; Convergence: %"PRIu16"\n",
+               info->TargetKbps, info->Accuracy, info->Convergence);
+    }
+#if QSV_HAVE_LA
+    else if (info->RateControlMethod == MFX_RATECONTROL_LA
+#if QSV_HAVE_LA_HRD
+             || info->RateControlMethod == MFX_RATECONTROL_LA_HRD
+#endif
+             ) {
+        av_log(avctx, AV_LOG_VERBOSE,
+               "TargetKbps: %"PRIu16"; LookAheadDepth: %"PRIu16"\n",
+               info->TargetKbps, co2->LookAheadDepth);
+    }
+#endif
+#if QSV_HAVE_ICQ
+    else if (info->RateControlMethod == MFX_RATECONTROL_ICQ) {
+        av_log(avctx, AV_LOG_VERBOSE, "ICQQuality: %"PRIu16"\n", info->ICQQuality);
+    } else if (info->RateControlMethod == MFX_RATECONTROL_LA_ICQ) {
+        av_log(avctx, AV_LOG_VERBOSE, "ICQQuality: %"PRIu16"; LookAheadDepth: %"PRIu16"\n",
+               info->ICQQuality, co2->LookAheadDepth);
+    }
+#endif
+#if QSV_HAVE_QVBR
+    else if (info->RateControlMethod == MFX_RATECONTROL_QVBR) {
+        av_log(avctx, AV_LOG_VERBOSE, "QVBRQuality: %"PRIu16"\n",
+               co3->QVBRQuality);
+    }
+#endif
+
+    av_log(avctx, AV_LOG_VERBOSE, "NumSlice: %"PRIu16"; NumRefFrame: %"PRIu16"\n",
+           info->NumSlice, info->NumRefFrame);
+    av_log(avctx, AV_LOG_VERBOSE, "RateDistortionOpt: %s\n",
+           print_threestate(co->RateDistortionOpt));
+
+#if QSV_HAVE_CO2
+    av_log(avctx, AV_LOG_VERBOSE,
+           "RecoveryPointSEI: %s IntRefType: %"PRIu16"; IntRefCycleSize: %"PRIu16"; IntRefQPDelta: %"PRId16"\n",
+           print_threestate(co->RecoveryPointSEI), co2->IntRefType, co2->IntRefCycleSize, co2->IntRefQPDelta);
+
+    av_log(avctx, AV_LOG_VERBOSE, "MaxFrameSize: %"PRIu16"; ", co2->MaxFrameSize);
+#if QSV_HAVE_MAX_SLICE_SIZE
+    av_log(avctx, AV_LOG_VERBOSE, "MaxSliceSize: %"PRIu16"; ", co2->MaxSliceSize);
+#endif
+    av_log(avctx, AV_LOG_VERBOSE, "\n");
+
+    av_log(avctx, AV_LOG_VERBOSE,
+           "BitrateLimit: %s; MBBRC: %s; ExtBRC: %s\n",
+           print_threestate(co2->BitrateLimit), print_threestate(co2->MBBRC),
+           print_threestate(co2->ExtBRC));
+
+#if QSV_HAVE_TRELLIS
+    av_log(avctx, AV_LOG_VERBOSE, "Trellis: ");
+    if (co2->Trellis & MFX_TRELLIS_OFF) {
+        av_log(avctx, AV_LOG_VERBOSE, "off");
+    } else if (!co2->Trellis) {
+        av_log(avctx, AV_LOG_VERBOSE, "auto");
+    } else {
+        if (co2->Trellis & MFX_TRELLIS_I) av_log(avctx, AV_LOG_VERBOSE, "I");
+        if (co2->Trellis & MFX_TRELLIS_P) av_log(avctx, AV_LOG_VERBOSE, "P");
+        if (co2->Trellis & MFX_TRELLIS_B) av_log(avctx, AV_LOG_VERBOSE, "B");
+    }
+    av_log(avctx, AV_LOG_VERBOSE, "\n");
+#endif
+
+#if QSV_VERSION_ATLEAST(1, 8)
+    av_log(avctx, AV_LOG_VERBOSE,
+           "RepeatPPS: %s; NumMbPerSlice: %"PRIu16"; LookAheadDS: ",
+           print_threestate(co2->RepeatPPS), co2->NumMbPerSlice);
+    switch (co2->LookAheadDS) {
+    case MFX_LOOKAHEAD_DS_OFF: av_log(avctx, AV_LOG_VERBOSE, "off");     break;
+    case MFX_LOOKAHEAD_DS_2x:  av_log(avctx, AV_LOG_VERBOSE, "2x");      break;
+    case MFX_LOOKAHEAD_DS_4x:  av_log(avctx, AV_LOG_VERBOSE, "4x");      break;
+    default:                   av_log(avctx, AV_LOG_VERBOSE, "unknown"); break;
+    }
+    av_log(avctx, AV_LOG_VERBOSE, "\n");
+
+    av_log(avctx, AV_LOG_VERBOSE, "AdaptiveI: %s; AdaptiveB: %s; BRefType: ",
+           print_threestate(co2->AdaptiveI), print_threestate(co2->AdaptiveB));
+    switch (co2->BRefType) {
+    case MFX_B_REF_OFF:     av_log(avctx, AV_LOG_VERBOSE, "off");       break;
+    case MFX_B_REF_PYRAMID: av_log(avctx, AV_LOG_VERBOSE, "pyramid");   break;
+    default:                av_log(avctx, AV_LOG_VERBOSE, "auto");      break;
+    }
+    av_log(avctx, AV_LOG_VERBOSE, "\n");
+#endif
+
+#if QSV_VERSION_ATLEAST(1, 9)
+    av_log(avctx, AV_LOG_VERBOSE,
+           "MinQPI: %"PRIu8"; MaxQPI: %"PRIu8"; MinQPP: %"PRIu8"; MaxQPP: %"PRIu8"; MinQPB: %"PRIu8"; MaxQPB: %"PRIu8"\n",
+           co2->MinQPI, co2->MaxQPI, co2->MinQPP, co2->MaxQPP, co2->MinQPB, co2->MaxQPB);
+#endif
+#endif
+
+    if (avctx->codec_id == AV_CODEC_ID_H264) {
+        av_log(avctx, AV_LOG_VERBOSE, "Entropy coding: %s; MaxDecFrameBuffering: %"PRIu16"\n",
+               co->CAVLC == MFX_CODINGOPTION_ON ? "CAVLC" : "CABAC", co->MaxDecFrameBuffering);
+        av_log(avctx, AV_LOG_VERBOSE,
+               "NalHrdConformance: %s; SingleSeiNalUnit: %s; VuiVclHrdParameters: %s VuiNalHrdParameters: %s\n",
+               print_threestate(co->NalHrdConformance), print_threestate(co->SingleSeiNalUnit),
+               print_threestate(co->VuiVclHrdParameters), print_threestate(co->VuiNalHrdParameters));
+    }
+}
+
+static int select_rc_mode(AVCodecContext *avctx, QSVEncContext *q)
 {
-    const char *ratecontrol_desc;
+    const char *rc_desc;
+    mfxU16      rc_mode;
 
+    int want_la     = q->look_ahead;
+    int want_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
+    int want_vcm    = q->vcm;
+
+    if (want_la && !QSV_HAVE_LA) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Lookahead ratecontrol mode requested, but is not supported by this SDK version\n");
+        return AVERROR(ENOSYS);
+    }
+    if (want_vcm && !QSV_HAVE_VCM) {
+        av_log(avctx, AV_LOG_ERROR,
+               "VCM ratecontrol mode requested, but is not supported by this SDK version\n");
+        return AVERROR(ENOSYS);
+    }
+
+    if (want_la + want_qscale + want_vcm > 1) {
+        av_log(avctx, AV_LOG_ERROR,
+               "More than one of: { constant qscale, lookahead, VCM } requested, "
+               "only one of them can be used at a time.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (want_qscale) {
+        rc_mode = MFX_RATECONTROL_CQP;
+        rc_desc = "constant quantization parameter (CQP)";
+    }
+#if QSV_HAVE_VCM
+    else if (want_vcm) {
+        rc_mode = MFX_RATECONTROL_VCM;
+        rc_desc = "video conferencing mode (VCM)";
+    }
+#endif
+#if QSV_HAVE_LA
+    else if (want_la) {
+        rc_mode = MFX_RATECONTROL_LA;
+        rc_desc = "VBR with lookahead (LA)";
+
+#if QSV_HAVE_ICQ
+        if (avctx->global_quality > 0) {
+            rc_mode = MFX_RATECONTROL_LA_ICQ;
+            rc_desc = "intelligent constant quality with lookahead (LA_ICQ)";
+        }
+#endif
+    }
+#endif
+#if QSV_HAVE_ICQ
+    else if (avctx->global_quality > 0) {
+        rc_mode = MFX_RATECONTROL_ICQ;
+        rc_desc = "intelligent constant quality (ICQ)";
+    }
+#endif
+    else if (avctx->rc_max_rate == avctx->bit_rate) {
+        rc_mode = MFX_RATECONTROL_CBR;
+        rc_desc = "constant bitrate (CBR)";
+    } else if (!avctx->rc_max_rate) {
+        rc_mode = MFX_RATECONTROL_AVBR;
+        rc_desc = "average variable bitrate (AVBR)";
+    } else {
+        rc_mode = MFX_RATECONTROL_VBR;
+        rc_desc = "variable bitrate (VBR)";
+    }
+
+    q->param.mfx.RateControlMethod = rc_mode;
+    av_log(avctx, AV_LOG_VERBOSE, "Using the %s ratecontrol method\n", rc_desc);
+
+    return 0;
+}
+
+static int rc_supported(QSVEncContext *q)
+{
+    mfxVideoParam param_out = { .mfx.CodecId = q->param.mfx.CodecId };
+    mfxStatus ret;
+
+    ret = MFXVideoENCODE_Query(q->session, &q->param, &param_out);
+    if (ret < 0 ||
+        param_out.mfx.RateControlMethod != q->param.mfx.RateControlMethod)
+        return 0;
+    return 1;
+}
+
+static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
+{
     float quant;
     int ret;
 
@@ -49,6 +362,8 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
         return AVERROR_BUG;
     q->param.mfx.CodecId = ret;
 
+    q->width_align = avctx->codec_id == AV_CODEC_ID_HEVC ? 32 : 16;
+
     if (avctx->level > 0)
         q->param.mfx.CodecLevel = avctx->level;
 
@@ -56,7 +371,7 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
     q->param.mfx.TargetUsage        = q->preset;
     q->param.mfx.GopPicSize         = FFMAX(0, avctx->gop_size);
     q->param.mfx.GopRefDist         = FFMAX(-1, avctx->max_b_frames) + 1;
-    q->param.mfx.GopOptFlag         = avctx->flags & CODEC_FLAG_CLOSED_GOP ?
+    q->param.mfx.GopOptFlag         = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ?
                                       MFX_GOP_CLOSED : 0;
     q->param.mfx.IdrInterval        = q->idr_interval;
     q->param.mfx.NumSlice           = avctx->slices;
@@ -65,18 +380,30 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
     q->param.mfx.BufferSizeInKB     = 0;
 
     q->param.mfx.FrameInfo.FourCC         = MFX_FOURCC_NV12;
-    q->param.mfx.FrameInfo.Width          = FFALIGN(avctx->width, 16);
-    q->param.mfx.FrameInfo.Height         = FFALIGN(avctx->height, 32);
     q->param.mfx.FrameInfo.CropX          = 0;
     q->param.mfx.FrameInfo.CropY          = 0;
     q->param.mfx.FrameInfo.CropW          = avctx->width;
     q->param.mfx.FrameInfo.CropH          = avctx->height;
     q->param.mfx.FrameInfo.AspectRatioW   = avctx->sample_aspect_ratio.num;
     q->param.mfx.FrameInfo.AspectRatioH   = avctx->sample_aspect_ratio.den;
-    q->param.mfx.FrameInfo.PicStruct      = MFX_PICSTRUCT_PROGRESSIVE;
     q->param.mfx.FrameInfo.ChromaFormat   = MFX_CHROMAFORMAT_YUV420;
     q->param.mfx.FrameInfo.BitDepthLuma   = 8;
     q->param.mfx.FrameInfo.BitDepthChroma = 8;
+    q->param.mfx.FrameInfo.Width          = FFALIGN(avctx->width, q->width_align);
+
+    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
+       /* A true field layout (TFF or BFF) is not important here,
+          it will specified later during frame encoding. But it is important
+          to specify is frame progressive or not because allowed heigh alignment
+          does depend by this.
+        */
+        q->param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_FIELD_TFF;
+        q->height_align = 32;
+    } else {
+        q->param.mfx.FrameInfo.PicStruct = MFX_PICSTRUCT_PROGRESSIVE;
+        q->height_align = 16;
+    }
+   q->param.mfx.FrameInfo.Height    = FFALIGN(avctx->height, q->height_align);
 
     if (avctx->framerate.den > 0 && avctx->framerate.num > 0) {
         q->param.mfx.FrameInfo.FrameRateExtN = avctx->framerate.num;
@@ -86,28 +413,19 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
         q->param.mfx.FrameInfo.FrameRateExtD  = avctx->time_base.num;
     }
 
-    if (avctx->flags & CODEC_FLAG_QSCALE) {
-        q->param.mfx.RateControlMethod = MFX_RATECONTROL_CQP;
-        ratecontrol_desc = "constant quantization parameter (CQP)";
-    } else if (avctx->rc_max_rate == avctx->bit_rate) {
-        q->param.mfx.RateControlMethod = MFX_RATECONTROL_CBR;
-        ratecontrol_desc = "constant bitrate (CBR)";
-    } else if (!avctx->rc_max_rate) {
-        q->param.mfx.RateControlMethod = MFX_RATECONTROL_AVBR;
-        ratecontrol_desc = "average variable bitrate (AVBR)";
-    } else {
-        q->param.mfx.RateControlMethod = MFX_RATECONTROL_VBR;
-        ratecontrol_desc = "variable bitrate (VBR)";
-    }
-
-    av_log(avctx, AV_LOG_VERBOSE, "Using the %s ratecontrol method\n", ratecontrol_desc);
+    ret = select_rc_mode(avctx, q);
+    if (ret < 0)
+        return ret;
 
     switch (q->param.mfx.RateControlMethod) {
     case MFX_RATECONTROL_CBR:
     case MFX_RATECONTROL_VBR:
+#if QSV_HAVE_VCM
+    case MFX_RATECONTROL_VCM:
+#endif
         q->param.mfx.InitialDelayInKB = avctx->rc_initial_buffer_occupancy / 1000;
         q->param.mfx.TargetKbps       = avctx->bit_rate / 1000;
-        q->param.mfx.MaxKbps          = avctx->bit_rate / 1000;
+        q->param.mfx.MaxKbps          = avctx->rc_max_rate / 1000;
         break;
     case MFX_RATECONTROL_CQP:
         quant = avctx->global_quality / FF_QP2LAMBDA;
@@ -122,23 +440,117 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q)
         q->param.mfx.Convergence = q->avbr_convergence;
         q->param.mfx.Accuracy    = q->avbr_accuracy;
         break;
+#if QSV_HAVE_LA
+    case MFX_RATECONTROL_LA:
+        q->param.mfx.TargetKbps  = avctx->bit_rate / 1000;
+        q->extco2.LookAheadDepth = q->look_ahead_depth;
+        break;
+#if QSV_HAVE_ICQ
+    case MFX_RATECONTROL_LA_ICQ:
+        q->extco2.LookAheadDepth = q->look_ahead_depth;
+    case MFX_RATECONTROL_ICQ:
+        q->param.mfx.ICQQuality  = avctx->global_quality;
+        break;
+#endif
+#endif
     }
 
-    q->extco.Header.BufferId      = MFX_EXTBUFF_CODING_OPTION;
-    q->extco.Header.BufferSz      = sizeof(q->extco);
-    q->extco.CAVLC                = avctx->coder_type == FF_CODER_TYPE_VLC ?
-                                    MFX_CODINGOPTION_ON : MFX_CODINGOPTION_UNKNOWN;
+    // the HEVC encoder plugin currently fails if coding options
+    // are provided
+    if (avctx->codec_id != AV_CODEC_ID_HEVC) {
+        q->extco.Header.BufferId      = MFX_EXTBUFF_CODING_OPTION;
+        q->extco.Header.BufferSz      = sizeof(q->extco);
+        q->extco.CAVLC                = avctx->coder_type == FF_CODER_TYPE_VLC ?
+                                        MFX_CODINGOPTION_ON : MFX_CODINGOPTION_UNKNOWN;
+
+        q->extco.PicTimingSEI         = q->pic_timing_sei ?
+                                        MFX_CODINGOPTION_ON : MFX_CODINGOPTION_UNKNOWN;
+
+        if (q->rdo >= 0)
+            q->extco.RateDistortionOpt = q->rdo > 0 ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
+
+        if (avctx->codec_id == AV_CODEC_ID_H264) {
+            if (avctx->strict_std_compliance != FF_COMPLIANCE_NORMAL)
+                q->extco.NalHrdConformance = avctx->strict_std_compliance > FF_COMPLIANCE_NORMAL ?
+                                             MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
+
+            if (q->single_sei_nal_unit >= 0)
+                q->extco.SingleSeiNalUnit = q->single_sei_nal_unit ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
+            if (q->recovery_point_sei >= 0)
+                q->extco.RecoveryPointSEI = q->recovery_point_sei ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
+            q->extco.MaxDecFrameBuffering = q->max_dec_frame_buffering;
+        }
 
-    q->extparam[0] = (mfxExtBuffer *)&q->extco;
+        q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extco;
+
+#if QSV_HAVE_CO2
+        if (avctx->codec_id == AV_CODEC_ID_H264) {
+            q->extco2.Header.BufferId     = MFX_EXTBUFF_CODING_OPTION2;
+            q->extco2.Header.BufferSz     = sizeof(q->extco2);
+
+            if (q->int_ref_type >= 0)
+                q->extco2.IntRefType = q->int_ref_type;
+            if (q->int_ref_cycle_size >= 0)
+                q->extco2.IntRefCycleSize = q->int_ref_cycle_size;
+            if (q->int_ref_qp_delta != INT16_MIN)
+                q->extco2.IntRefQPDelta = q->int_ref_qp_delta;
+
+            if (q->bitrate_limit >= 0)
+                q->extco2.BitrateLimit = q->bitrate_limit ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
+            if (q->mbbrc >= 0)
+                q->extco2.MBBRC = q->mbbrc ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
+            if (q->extbrc >= 0)
+                q->extco2.ExtBRC = q->extbrc ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
+
+            if (q->max_frame_size >= 0)
+                q->extco2.MaxFrameSize = q->max_frame_size;
+#if QSV_HAVE_MAX_SLICE_SIZE
+            if (q->max_slice_size >= 0)
+                q->extco2.MaxSliceSize = q->max_slice_size;
+#endif
+
+#if QSV_HAVE_TRELLIS
+            q->extco2.Trellis = q->trellis;
+#endif
+
+#if QSV_HAVE_BREF_TYPE
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+            if (avctx->b_frame_strategy >= 0)
+                q->b_strategy = avctx->b_frame_strategy;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+            if (q->b_strategy >= 0)
+                q->extco2.BRefType = q->b_strategy ? MFX_B_REF_PYRAMID : MFX_B_REF_OFF;
+            if (q->adaptive_i >= 0)
+                q->extco2.AdaptiveI = q->adaptive_i ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
+            if (q->adaptive_b >= 0)
+                q->extco2.AdaptiveB = q->adaptive_b ? MFX_CODINGOPTION_ON : MFX_CODINGOPTION_OFF;
+#endif
+
+            q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->extco2;
+
+#if QSV_VERSION_ATLEAST(1,8)
+            q->extco2.LookAheadDS           = q->look_ahead_downsampling;
+#endif
+        }
+#endif
+    }
 
-    q->param.ExtParam    = q->extparam;
-    q->param.NumExtParam = FF_ARRAY_ELEMS(q->extparam);
+    if (!rc_supported(q)) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Selected ratecontrol mode is not supported by the QSV "
+               "runtime. Choose a different mode.\n");
+        return AVERROR(ENOSYS);
+    }
 
     return 0;
 }
 
 static int qsv_retrieve_enc_params(AVCodecContext *avctx, QSVEncContext *q)
 {
+    AVCPBProperties *cpb_props;
+
     uint8_t sps_buf[128];
     uint8_t pps_buf[128];
 
@@ -149,10 +561,35 @@ static int qsv_retrieve_enc_params(AVCodecContext *avctx, QSVEncContext *q)
         .PPSBuffer = pps_buf, .PPSBufSize = sizeof(pps_buf)
     };
 
+    mfxExtCodingOption co = {
+        .Header.BufferId = MFX_EXTBUFF_CODING_OPTION,
+        .Header.BufferSz = sizeof(co),
+    };
+#if QSV_HAVE_CO2
+    mfxExtCodingOption2 co2 = {
+        .Header.BufferId = MFX_EXTBUFF_CODING_OPTION2,
+        .Header.BufferSz = sizeof(co2),
+    };
+#endif
+#if QSV_HAVE_CO3
+    mfxExtCodingOption3 co3 = {
+        .Header.BufferId = MFX_EXTBUFF_CODING_OPTION3,
+        .Header.BufferSz = sizeof(co3),
+    };
+#endif
+
     mfxExtBuffer *ext_buffers[] = {
         (mfxExtBuffer*)&extradata,
+        (mfxExtBuffer*)&co,
+#if QSV_HAVE_CO2
+        (mfxExtBuffer*)&co2,
+#endif
+#if QSV_HAVE_CO3
+        (mfxExtBuffer*)&co3,
+#endif
     };
 
+    int need_pps = avctx->codec_id != AV_CODEC_ID_MPEG2VIDEO;
     int ret;
 
     q->param.ExtParam    = ext_buffers;
@@ -164,58 +601,160 @@ static int qsv_retrieve_enc_params(AVCodecContext *avctx, QSVEncContext *q)
 
     q->packet_size = q->param.mfx.BufferSizeInKB * 1000;
 
-    if (!extradata.SPSBufSize || !extradata.PPSBufSize) {
+    if (!extradata.SPSBufSize || (need_pps && !extradata.PPSBufSize)) {
         av_log(avctx, AV_LOG_ERROR, "No extradata returned from libmfx.\n");
         return AVERROR_UNKNOWN;
     }
 
-    avctx->extradata = av_malloc(extradata.SPSBufSize + extradata.PPSBufSize +
-                                 FF_INPUT_BUFFER_PADDING_SIZE);
+    avctx->extradata = av_malloc(extradata.SPSBufSize + need_pps * extradata.PPSBufSize +
+                                 AV_INPUT_BUFFER_PADDING_SIZE);
     if (!avctx->extradata)
         return AVERROR(ENOMEM);
 
     memcpy(avctx->extradata,                        sps_buf, extradata.SPSBufSize);
-    memcpy(avctx->extradata + extradata.SPSBufSize, pps_buf, extradata.PPSBufSize);
-    avctx->extradata_size = extradata.SPSBufSize + extradata.PPSBufSize;
-    memset(avctx->extradata + avctx->extradata_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    if (need_pps)
+        memcpy(avctx->extradata + extradata.SPSBufSize, pps_buf, extradata.PPSBufSize);
+    avctx->extradata_size = extradata.SPSBufSize + need_pps * extradata.PPSBufSize;
+    memset(avctx->extradata + avctx->extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    cpb_props = ff_add_cpb_side_data(avctx);
+    if (!cpb_props)
+        return AVERROR(ENOMEM);
+    cpb_props->max_bitrate = avctx->rc_max_rate;
+    cpb_props->min_bitrate = avctx->rc_min_rate;
+    cpb_props->avg_bitrate = avctx->bit_rate;
+    cpb_props->buffer_size = avctx->rc_buffer_size;
+
+    dump_video_param(avctx, q, ext_buffers + 1);
+
+    return 0;
+}
+
+static int qsv_init_opaque_alloc(AVCodecContext *avctx, QSVEncContext *q)
+{
+    AVQSVContext *qsv = avctx->hwaccel_context;
+    mfxFrameSurface1 *surfaces;
+    int nb_surfaces, i;
+
+    nb_surfaces = qsv->nb_opaque_surfaces + q->req.NumFrameSuggested + q->async_depth;
+
+    q->opaque_alloc_buf = av_buffer_allocz(sizeof(*surfaces) * nb_surfaces);
+    if (!q->opaque_alloc_buf)
+        return AVERROR(ENOMEM);
+
+    q->opaque_surfaces = av_malloc_array(nb_surfaces, sizeof(*q->opaque_surfaces));
+    if (!q->opaque_surfaces)
+        return AVERROR(ENOMEM);
+
+    surfaces = (mfxFrameSurface1*)q->opaque_alloc_buf->data;
+    for (i = 0; i < nb_surfaces; i++) {
+        surfaces[i].Info      = q->req.Info;
+        q->opaque_surfaces[i] = surfaces + i;
+    }
+
+    q->opaque_alloc.Header.BufferId = MFX_EXTBUFF_OPAQUE_SURFACE_ALLOCATION;
+    q->opaque_alloc.Header.BufferSz = sizeof(q->opaque_alloc);
+    q->opaque_alloc.In.Surfaces     = q->opaque_surfaces;
+    q->opaque_alloc.In.NumSurface   = nb_surfaces;
+    q->opaque_alloc.In.Type         = q->req.Type;
+
+    q->extparam_internal[q->nb_extparam_internal++] = (mfxExtBuffer *)&q->opaque_alloc;
+
+    qsv->nb_opaque_surfaces = nb_surfaces;
+    qsv->opaque_surfaces    = q->opaque_alloc_buf;
+    qsv->opaque_alloc_type  = q->req.Type;
 
     return 0;
 }
 
 int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q)
 {
+    int opaque_alloc = 0;
     int ret;
 
     q->param.IOPattern  = MFX_IOPATTERN_IN_SYSTEM_MEMORY;
     q->param.AsyncDepth = q->async_depth;
 
+    q->async_fifo = av_fifo_alloc((1 + q->async_depth) *
+                                  (sizeof(AVPacket) + sizeof(mfxSyncPoint) + sizeof(mfxBitstream*)));
+    if (!q->async_fifo)
+        return AVERROR(ENOMEM);
+
     if (avctx->hwaccel_context) {
         AVQSVContext *qsv = avctx->hwaccel_context;
 
         q->session         = qsv->session;
         q->param.IOPattern = qsv->iopattern;
+
+        opaque_alloc = qsv->opaque_alloc;
     }
 
     if (!q->session) {
-        ret = ff_qsv_init_internal_session(avctx, &q->internal_session);
+        ret = ff_qsv_init_internal_session(avctx, &q->internal_qs,
+                                           q->load_plugins);
         if (ret < 0)
             return ret;
 
-        q->session = q->internal_session;
+        q->session = q->internal_qs.session;
     }
 
     ret = init_video_param(avctx, q);
     if (ret < 0)
         return ret;
 
+    ret = MFXVideoENCODE_Query(q->session, &q->param,&q->param);
+    if (MFX_WRN_PARTIAL_ACCELERATION==ret) {
+        av_log(avctx, AV_LOG_WARNING, "Encoder will work with partial HW acceleration\n");
+    } else if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error %d querying encoder params\n", ret);
+        return ff_qsv_error(ret);
+    }
+
     ret = MFXVideoENCODE_QueryIOSurf(q->session, &q->param, &q->req);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error querying the encoding parameters\n");
         return ff_qsv_error(ret);
     }
 
+    if (opaque_alloc) {
+        ret = qsv_init_opaque_alloc(avctx, q);
+        if (ret < 0)
+            return ret;
+    }
+
+    if (avctx->hwaccel_context) {
+        AVQSVContext *qsv = avctx->hwaccel_context;
+        int i, j;
+
+        q->extparam = av_mallocz_array(qsv->nb_ext_buffers + q->nb_extparam_internal,
+                                       sizeof(*q->extparam));
+        if (!q->extparam)
+            return AVERROR(ENOMEM);
+
+        q->param.ExtParam = q->extparam;
+        for (i = 0; i < qsv->nb_ext_buffers; i++)
+            q->param.ExtParam[i] = qsv->ext_buffers[i];
+        q->param.NumExtParam = qsv->nb_ext_buffers;
+
+        for (i = 0; i < q->nb_extparam_internal; i++) {
+            for (j = 0; j < qsv->nb_ext_buffers; j++) {
+                if (qsv->ext_buffers[j]->BufferId == q->extparam_internal[i]->BufferId)
+                    break;
+            }
+            if (j < qsv->nb_ext_buffers)
+                continue;
+
+            q->param.ExtParam[q->param.NumExtParam++] = q->extparam_internal[i];
+        }
+    } else {
+        q->param.ExtParam    = q->extparam_internal;
+        q->param.NumExtParam = q->nb_extparam_internal;
+    }
+
     ret = MFXVideoENCODE_Init(q->session, &q->param);
-    if (ret < 0) {
+    if (MFX_WRN_PARTIAL_ACCELERATION==ret) {
+        av_log(avctx, AV_LOG_WARNING, "Encoder will work with partial HW acceleration\n");
+    } else if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error initializing the encoder\n");
         return ff_qsv_error(ret);
     }
@@ -226,21 +765,31 @@ int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q)
         return ret;
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
     q->avctx = avctx;
 
     return 0;
 }
 
+static void free_encoder_ctrl_payloads(mfxEncodeCtrl* enc_ctrl)
+{
+    if (enc_ctrl) {
+        int i;
+        for (i = 0; i < enc_ctrl->NumPayload && i < QSV_MAX_ENC_PAYLOAD; i++) {
+            mfxPayload* pay = enc_ctrl->Payload[i];
+            av_free(enc_ctrl->Payload[i]->Data);
+            av_free(pay);
+        }
+        enc_ctrl->NumPayload = 0;
+    }
+}
+
 static void clear_unused_frames(QSVEncContext *q)
 {
     QSVFrame *cur = q->work_frames;
     while (cur) {
         if (cur->surface && !cur->surface->Data.Locked) {
             cur->surface = NULL;
+            free_encoder_ctrl_payloads(&cur->enc_ctrl);
             av_frame_unref(cur->frame);
         }
         cur = cur->next;
@@ -273,6 +822,11 @@ static int get_free_frame(QSVEncContext *q, QSVFrame **f)
         av_freep(&frame);
         return AVERROR(ENOMEM);
     }
+    frame->enc_ctrl.Payload = av_mallocz(sizeof(mfxPayload*) * QSV_MAX_ENC_PAYLOAD);
+    if (!frame->enc_ctrl.Payload) {
+        av_freep(&frame);
+        return AVERROR(ENOMEM);
+    }
     *last = frame;
 
     *f = frame;
@@ -281,7 +835,7 @@ static int get_free_frame(QSVEncContext *q, QSVFrame **f)
 }
 
 static int submit_frame(QSVEncContext *q, const AVFrame *frame,
-                        mfxFrameSurface1 **surface)
+                        QSVFrame **new_frame)
 {
     QSVFrame *qf;
     int ret;
@@ -296,53 +850,53 @@ static int submit_frame(QSVEncContext *q, const AVFrame *frame,
             return ret;
 
         qf->surface = (mfxFrameSurface1*)qf->frame->data[3];
-        *surface = qf->surface;
-        return 0;
-    }
-
-    /* make a copy if the input is not padded as libmfx requires */
-    if (frame->height & 31 || frame->linesize[0] & 15) {
-        qf->frame->height = FFALIGN(frame->height, 32);
-        qf->frame->width  = FFALIGN(frame->width, 16);
-
-        ret = ff_get_buffer(q->avctx, qf->frame, AV_GET_BUFFER_FLAG_REF);
-        if (ret < 0)
-            return ret;
-
-        qf->frame->height = frame->height;
-        qf->frame->width  = frame->width;
-        ret = av_frame_copy(qf->frame, frame);
-        if (ret < 0) {
-            av_frame_unref(qf->frame);
-            return ret;
-        }
     } else {
-        ret = av_frame_ref(qf->frame, frame);
-        if (ret < 0)
-            return ret;
-    }
+        /* make a copy if the input is not padded as libmfx requires */
+        if (     frame->height & (q->height_align - 1) ||
+            frame->linesize[0] & (q->width_align - 1)) {
+            qf->frame->height = FFALIGN(frame->height, q->height_align);
+            qf->frame->width  = FFALIGN(frame->width, q->width_align);
+
+            ret = ff_get_buffer(q->avctx, qf->frame, AV_GET_BUFFER_FLAG_REF);
+            if (ret < 0)
+                return ret;
+
+            qf->frame->height = frame->height;
+            qf->frame->width  = frame->width;
+            ret = av_frame_copy(qf->frame, frame);
+            if (ret < 0) {
+                av_frame_unref(qf->frame);
+                return ret;
+            }
+        } else {
+            ret = av_frame_ref(qf->frame, frame);
+            if (ret < 0)
+                return ret;
+        }
+
+        qf->surface_internal.Info = q->param.mfx.FrameInfo;
 
-    qf->surface_internal.Info = q->param.mfx.FrameInfo;
+        qf->surface_internal.Info.PicStruct =
+            !frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE :
+            frame->top_field_first   ? MFX_PICSTRUCT_FIELD_TFF :
+                                       MFX_PICSTRUCT_FIELD_BFF;
+        if (frame->repeat_pict == 1)
+            qf->surface_internal.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
+        else if (frame->repeat_pict == 2)
+            qf->surface_internal.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
+        else if (frame->repeat_pict == 4)
+            qf->surface_internal.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
 
-    qf->surface_internal.Info.PicStruct =
-        !frame->interlaced_frame ? MFX_PICSTRUCT_PROGRESSIVE :
-        frame->top_field_first   ? MFX_PICSTRUCT_FIELD_TFF :
-                                   MFX_PICSTRUCT_FIELD_BFF;
-    if (frame->repeat_pict == 1)
-        qf->surface_internal.Info.PicStruct |= MFX_PICSTRUCT_FIELD_REPEATED;
-    else if (frame->repeat_pict == 2)
-        qf->surface_internal.Info.PicStruct |= MFX_PICSTRUCT_FRAME_DOUBLING;
-    else if (frame->repeat_pict == 4)
-        qf->surface_internal.Info.PicStruct |= MFX_PICSTRUCT_FRAME_TRIPLING;
+        qf->surface_internal.Data.PitchLow  = qf->frame->linesize[0];
+        qf->surface_internal.Data.Y         = qf->frame->data[0];
+        qf->surface_internal.Data.UV        = qf->frame->data[1];
 
-    qf->surface_internal.Data.PitchLow  = qf->frame->linesize[0];
-    qf->surface_internal.Data.Y         = qf->frame->data[0];
-    qf->surface_internal.Data.UV        = qf->frame->data[1];
-    qf->surface_internal.Data.TimeStamp = av_rescale_q(frame->pts, q->avctx->time_base, (AVRational){1, 90000});
+        qf->surface = &qf->surface_internal;
+    }
 
-    qf->surface = &qf->surface_internal;
+    qf->surface->Data.TimeStamp = av_rescale_q(frame->pts, q->avctx->time_base, (AVRational){1, 90000});
 
-    *surface = qf->surface;
+    *new_frame = qf;
 
     return 0;
 }
@@ -359,60 +913,146 @@ static void print_interlace_msg(AVCodecContext *avctx, QSVEncContext *q)
     }
 }
 
-int ff_qsv_encode(AVCodecContext *avctx, QSVEncContext *q,
-                  AVPacket *pkt, const AVFrame *frame, int *got_packet)
+static int encode_frame(AVCodecContext *avctx, QSVEncContext *q,
+                        const AVFrame *frame)
 {
-    mfxBitstream bs = { { { 0 } } };
+    AVPacket new_pkt = { 0 };
+    mfxBitstream *bs;
 
     mfxFrameSurface1 *surf = NULL;
     mfxSyncPoint sync      = NULL;
+    QSVFrame *qsv_frame = NULL;
+    mfxEncodeCtrl* enc_ctrl = NULL;
     int ret;
 
     if (frame) {
-        ret = submit_frame(q, frame, &surf);
+        ret = submit_frame(q, frame, &qsv_frame);
         if (ret < 0) {
             av_log(avctx, AV_LOG_ERROR, "Error submitting the frame for encoding.\n");
             return ret;
         }
     }
+    if (qsv_frame) {
+        surf = qsv_frame->surface;
+        enc_ctrl = &qsv_frame->enc_ctrl;
+    }
 
-    ret = ff_alloc_packet(pkt, q->packet_size);
+    ret = av_new_packet(&new_pkt, q->packet_size);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error allocating the output packet\n");
         return ret;
     }
-    bs.Data      = pkt->data;
-    bs.MaxLength = pkt->size;
+
+    bs = av_mallocz(sizeof(*bs));
+    if (!bs) {
+        av_packet_unref(&new_pkt);
+        return AVERROR(ENOMEM);
+    }
+    bs->Data      = new_pkt.data;
+    bs->MaxLength = new_pkt.size;
+
+    if (q->set_encode_ctrl_cb) {
+        q->set_encode_ctrl_cb(avctx, frame, &qsv_frame->enc_ctrl);
+    }
 
     do {
-        ret = MFXVideoENCODE_EncodeFrameAsync(q->session, NULL, surf, &bs, &sync);
-        if (ret == MFX_WRN_DEVICE_BUSY)
-            av_usleep(1);
-    } while (ret > 0);
+        ret = MFXVideoENCODE_EncodeFrameAsync(q->session, enc_ctrl, surf, bs, &sync);
+        if (ret == MFX_WRN_DEVICE_BUSY) {
+            av_usleep(500);
+            continue;
+        }
+        break;
+    } while ( 1 );
+
+    if (ret < 0) {
+        av_packet_unref(&new_pkt);
+        av_freep(&bs);
+        if (ret == MFX_ERR_MORE_DATA)
+            return 0;
+        av_log(avctx, AV_LOG_ERROR, "EncodeFrameAsync returned %d\n", ret);
+        return ff_qsv_error(ret);
+    }
+
+    if (ret == MFX_WRN_INCOMPATIBLE_VIDEO_PARAM) {
+        if (frame->interlaced_frame)
+            print_interlace_msg(avctx, q);
+        else
+            av_log(avctx, AV_LOG_WARNING,
+                   "EncodeFrameAsync returned 'incompatible param' code\n");
+    }
+    if (sync) {
+        av_fifo_generic_write(q->async_fifo, &new_pkt, sizeof(new_pkt), NULL);
+        av_fifo_generic_write(q->async_fifo, &sync,    sizeof(sync),    NULL);
+        av_fifo_generic_write(q->async_fifo, &bs,      sizeof(bs),    NULL);
+    } else {
+        av_packet_unref(&new_pkt);
+        av_freep(&bs);
+    }
 
+    return 0;
+}
+
+int ff_qsv_encode(AVCodecContext *avctx, QSVEncContext *q,
+                  AVPacket *pkt, const AVFrame *frame, int *got_packet)
+{
+    int ret;
+
+    ret = encode_frame(avctx, q, frame);
     if (ret < 0)
-        return (ret == MFX_ERR_MORE_DATA) ? 0 : ff_qsv_error(ret);
+        return ret;
 
-    if (ret == MFX_WRN_INCOMPATIBLE_VIDEO_PARAM && frame->interlaced_frame)
-        print_interlace_msg(avctx, q);
+    if (!av_fifo_space(q->async_fifo) ||
+        (!frame && av_fifo_size(q->async_fifo))) {
+        AVPacket new_pkt;
+        mfxBitstream *bs;
+        mfxSyncPoint sync;
 
-    if (sync) {
-        MFXVideoCORE_SyncOperation(q->session, sync, 60000);
+        av_fifo_generic_read(q->async_fifo, &new_pkt, sizeof(new_pkt), NULL);
+        av_fifo_generic_read(q->async_fifo, &sync,    sizeof(sync),    NULL);
+        av_fifo_generic_read(q->async_fifo, &bs,      sizeof(bs),      NULL);
+
+        do {
+            ret = MFXVideoCORE_SyncOperation(q->session, sync, 1000);
+        } while (ret == MFX_WRN_IN_EXECUTION);
 
-        if (bs.FrameType & MFX_FRAMETYPE_I || bs.FrameType & MFX_FRAMETYPE_xI)
+        new_pkt.dts  = av_rescale_q(bs->DecodeTimeStamp, (AVRational){1, 90000}, avctx->time_base);
+        new_pkt.pts  = av_rescale_q(bs->TimeStamp,       (AVRational){1, 90000}, avctx->time_base);
+        new_pkt.size = bs->DataLength;
+
+        if (bs->FrameType & MFX_FRAMETYPE_IDR ||
+            bs->FrameType & MFX_FRAMETYPE_xIDR)
+            new_pkt.flags |= AV_PKT_FLAG_KEY;
+
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+        if (bs->FrameType & MFX_FRAMETYPE_I || bs->FrameType & MFX_FRAMETYPE_xI)
             avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-        else if (bs.FrameType & MFX_FRAMETYPE_P || bs.FrameType & MFX_FRAMETYPE_xP)
+        else if (bs->FrameType & MFX_FRAMETYPE_P || bs->FrameType & MFX_FRAMETYPE_xP)
             avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
-        else if (bs.FrameType & MFX_FRAMETYPE_B || bs.FrameType & MFX_FRAMETYPE_xB)
+        else if (bs->FrameType & MFX_FRAMETYPE_B || bs->FrameType & MFX_FRAMETYPE_xB)
             avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+        av_freep(&bs);
 
-        pkt->dts  = av_rescale_q(bs.DecodeTimeStamp, (AVRational){1, 90000}, avctx->time_base);
-        pkt->pts  = av_rescale_q(bs.TimeStamp,       (AVRational){1, 90000}, avctx->time_base);
-        pkt->size = bs.DataLength;
+        if (pkt->data) {
+            if (pkt->size < new_pkt.size) {
+                av_log(avctx, AV_LOG_ERROR, "Submitted buffer not large enough: %d < %d\n",
+                       pkt->size, new_pkt.size);
+                av_packet_unref(&new_pkt);
+                return AVERROR(EINVAL);
+            }
 
-        if (bs.FrameType & MFX_FRAMETYPE_IDR ||
-            bs.FrameType & MFX_FRAMETYPE_xIDR)
-            pkt->flags |= AV_PKT_FLAG_KEY;
+            memcpy(pkt->data, new_pkt.data, new_pkt.size);
+            pkt->size = new_pkt.size;
+
+            ret = av_packet_copy_props(pkt, &new_pkt);
+            av_packet_unref(&new_pkt);
+            if (ret < 0)
+                return ret;
+        } else
+            *pkt = new_pkt;
 
         *got_packet = 1;
     }
@@ -424,21 +1064,40 @@ int ff_qsv_enc_close(AVCodecContext *avctx, QSVEncContext *q)
 {
     QSVFrame *cur;
 
-    MFXVideoENCODE_Close(q->session);
-    if (q->internal_session)
-        MFXClose(q->internal_session);
-    q->session          = NULL;
-    q->internal_session = NULL;
+    if (q->session)
+        MFXVideoENCODE_Close(q->session);
+    q->session = NULL;
+
+    ff_qsv_close_internal_session(&q->internal_qs);
 
     cur = q->work_frames;
     while (cur) {
         q->work_frames = cur->next;
         av_frame_free(&cur->frame);
+        av_free(cur->enc_ctrl.Payload);
         av_freep(&cur);
         cur = q->work_frames;
     }
 
-    av_frame_free(&avctx->coded_frame);
+    while (q->async_fifo && av_fifo_size(q->async_fifo)) {
+        AVPacket pkt;
+        mfxSyncPoint sync;
+        mfxBitstream *bs;
+
+        av_fifo_generic_read(q->async_fifo, &pkt,  sizeof(pkt),  NULL);
+        av_fifo_generic_read(q->async_fifo, &sync, sizeof(sync), NULL);
+        av_fifo_generic_read(q->async_fifo, &bs,   sizeof(bs),   NULL);
+
+        av_freep(&bs);
+        av_packet_unref(&pkt);
+    }
+    av_fifo_free(q->async_fifo);
+    q->async_fifo = NULL;
+
+    av_freep(&q->opaque_surfaces);
+    av_buffer_unref(&q->opaque_alloc_buf);
+
+    av_freep(&q->extparam);
 
     return 0;
 }
diff --git a/libavcodec/qsvenc.h b/libavcodec/qsvenc.h
index 9f7f8ccf..806dc060 100644
--- a/libavcodec/qsvenc.h
+++ b/libavcodec/qsvenc.h
@@ -29,25 +29,79 @@
 #include <mfx/mfxvideo.h>
 
 #include "libavutil/avutil.h"
+#include "libavutil/fifo.h"
 
 #include "avcodec.h"
 #include "qsv_internal.h"
 
+#define QSV_HAVE_CO2 QSV_VERSION_ATLEAST(1, 6)
+#define QSV_HAVE_CO3 QSV_VERSION_ATLEAST(1, 11)
+
+#define QSV_HAVE_TRELLIS QSV_VERSION_ATLEAST(1, 8)
+#define QSV_HAVE_MAX_SLICE_SIZE QSV_VERSION_ATLEAST(1, 9)
+#define QSV_HAVE_BREF_TYPE      QSV_VERSION_ATLEAST(1, 8)
+
+#define QSV_HAVE_LA     QSV_VERSION_ATLEAST(1, 7)
+#define QSV_HAVE_LA_HRD QSV_VERSION_ATLEAST(1, 11)
+#define QSV_HAVE_ICQ    QSV_VERSION_ATLEAST(1, 8)
+#define QSV_HAVE_VCM    QSV_VERSION_ATLEAST(1, 8)
+#define QSV_HAVE_QVBR   QSV_VERSION_ATLEAST(1, 11)
+
+#define QSV_COMMON_OPTS \
+{ "async_depth", "Maximum processing parallelism", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VE },                          \
+{ "avbr_accuracy",    "Accuracy of the AVBR ratecontrol",    OFFSET(qsv.avbr_accuracy),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },                             \
+{ "avbr_convergence", "Convergence of the AVBR ratecontrol", OFFSET(qsv.avbr_convergence), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },                             \
+{ "preset", NULL, OFFSET(qsv.preset), AV_OPT_TYPE_INT, { .i64 = MFX_TARGETUSAGE_BALANCED }, MFX_TARGETUSAGE_BEST_QUALITY, MFX_TARGETUSAGE_BEST_SPEED,   VE, "preset" }, \
+{ "veryfast",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_BEST_SPEED  },   INT_MIN, INT_MAX, VE, "preset" },                                                \
+{ "faster",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_6  },            INT_MIN, INT_MAX, VE, "preset" },                                                \
+{ "fast",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_5  },            INT_MIN, INT_MAX, VE, "preset" },                                                \
+{ "medium",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_BALANCED  },     INT_MIN, INT_MAX, VE, "preset" },                                                \
+{ "slow",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_3  },            INT_MIN, INT_MAX, VE, "preset" },                                                \
+{ "slower",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_2  },            INT_MIN, INT_MAX, VE, "preset" },                                                \
+{ "veryslow",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_BEST_QUALITY  }, INT_MIN, INT_MAX, VE, "preset" },                                                \
+{ "vcm",      "Use the video conferencing mode ratecontrol",  OFFSET(qsv.vcm),      AV_OPT_TYPE_INT, { .i64 = 0  },  0, 1,         VE },                                \
+{ "rdo",            "Enable rate distortion optimization",    OFFSET(qsv.rdo),            AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
+{ "max_frame_size", "Maximum encoded frame size in bytes",    OFFSET(qsv.max_frame_size), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, UINT16_MAX, VE },                         \
+{ "max_slice_size", "Maximum encoded slice size in bytes",    OFFSET(qsv.max_slice_size), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, UINT16_MAX, VE },                         \
+{ "bitrate_limit",  "Toggle bitrate limitations",             OFFSET(qsv.bitrate_limit),  AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
+{ "mbbrc",          "MB level bitrate control",               OFFSET(qsv.mbbrc),          AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
+{ "extbrc",         "Extended bitrate control",               OFFSET(qsv.extbrc),         AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
+{ "adaptive_i",     "Adaptive I-frame placement",             OFFSET(qsv.adaptive_i),     AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
+{ "adaptive_b",     "Adaptive B-frame placement",             OFFSET(qsv.adaptive_b),     AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
+{ "b_strategy",     "Strategy to choose between I/P/B-frames", OFFSET(qsv.b_strategy),    AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },                         \
+
+typedef int SetEncodeCtrlCB (AVCodecContext *avctx,
+                             const AVFrame *frame, mfxEncodeCtrl* enc_ctrl);
 typedef struct QSVEncContext {
     AVCodecContext *avctx;
 
     QSVFrame *work_frames;
 
     mfxSession session;
-    mfxSession internal_session;
+    QSVSession internal_qs;
 
     int packet_size;
+    int width_align;
+    int height_align;
 
     mfxVideoParam param;
     mfxFrameAllocRequest req;
 
     mfxExtCodingOption  extco;
-    mfxExtBuffer *extparam[1];
+#if QSV_HAVE_CO2
+    mfxExtCodingOption2 extco2;
+#endif
+
+    mfxExtOpaqueSurfaceAlloc opaque_alloc;
+    mfxFrameSurface1       **opaque_surfaces;
+    AVBufferRef             *opaque_alloc_buf;
+
+    mfxExtBuffer  *extparam_internal[2 + QSV_HAVE_CO2];
+    int         nb_extparam_internal;
+
+    mfxExtBuffer **extparam;
+
+    AVFifoBuffer *async_fifo;
 
     // options set by the caller
     int async_depth;
@@ -56,6 +110,34 @@ typedef struct QSVEncContext {
     int preset;
     int avbr_accuracy;
     int avbr_convergence;
+    int pic_timing_sei;
+    int look_ahead;
+    int look_ahead_depth;
+    int look_ahead_downsampling;
+    int vcm;
+    int rdo;
+    int max_frame_size;
+    int max_slice_size;
+
+    int single_sei_nal_unit;
+    int max_dec_frame_buffering;
+    int trellis;
+
+    int bitrate_limit;
+    int mbbrc;
+    int extbrc;
+    int adaptive_i;
+    int adaptive_b;
+    int b_strategy;
+
+    int int_ref_type;
+    int int_ref_cycle_size;
+    int int_ref_qp_delta;
+    int recovery_point_sei;
+
+    int a53_cc;
+    char *load_plugins;
+    SetEncodeCtrlCB *set_encode_ctrl_cb;
 } QSVEncContext;
 
 int ff_qsv_enc_init(AVCodecContext *avctx, QSVEncContext *q);
diff --git a/libavcodec/qsvenc_h264.c b/libavcodec/qsvenc_h264.c
index d0b9b03f..66e5ee87 100644
--- a/libavcodec/qsvenc_h264.c
+++ b/libavcodec/qsvenc_h264.c
@@ -40,10 +40,75 @@ typedef struct QSVH264EncContext {
     QSVEncContext qsv;
 } QSVH264EncContext;
 
+static int qsv_h264_set_encode_ctrl(AVCodecContext *avctx,
+                                    const AVFrame *frame, mfxEncodeCtrl* enc_ctrl)
+{
+    AVFrameSideData *side_data = NULL;
+    QSVH264EncContext *qh264 = avctx->priv_data;
+    QSVEncContext *q = &qh264->qsv;
+
+    if (q->a53_cc && frame) {
+        side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC);
+        if (side_data) {
+
+            int sei_payload_size = 0;
+            mfxU8* sei_data = NULL;
+            mfxPayload* payload = NULL;
+
+            sei_payload_size = side_data->size + 13;
+
+            sei_data = av_mallocz(sei_payload_size);
+            if (!sei_data) {
+                av_log(avctx, AV_LOG_ERROR, "No memory for CC, skipping...\n");
+                return AVERROR(ENOMEM);
+            }
+
+            // SEI header
+            sei_data[0] = 4;
+            sei_data[1] = sei_payload_size - 2; // size of SEI data
+
+            // country code
+            sei_data[2] = 181;
+            sei_data[3] = 0;
+            sei_data[4] = 49;
+
+            // ATSC_identifier - using 'GA94' only
+            AV_WL32(sei_data + 5,
+                MKTAG('G', 'A', '9', '4'));
+            sei_data[9] = 3;
+            sei_data[10] =
+                ((side_data->size/3) & 0x1f) | 0xC0;
+
+            sei_data[11] = 0xFF; // reserved
+
+            memcpy(sei_data + 12, side_data->data, side_data->size);
+
+            sei_data[side_data->size+12] = 255;
+
+            payload = av_mallocz(sizeof(mfxPayload));
+            if (!payload) {
+                av_log(avctx, AV_LOG_ERROR, "No memory, skipping captions\n");
+                av_freep(&sei_data);
+                return AVERROR(ENOMEM);
+            }
+            payload->BufSize = side_data->size + 13;
+            payload->NumBit = payload->BufSize * 8;
+            payload->Type = 4;
+            payload->Data = sei_data;
+
+            enc_ctrl->NumExtParam = 0;
+            enc_ctrl->NumPayload = 1;
+            enc_ctrl->Payload[0] = payload;
+        }
+    }
+    return 0;
+}
+
 static av_cold int qsv_enc_init(AVCodecContext *avctx)
 {
     QSVH264EncContext *q = avctx->priv_data;
 
+    q->qsv.set_encode_ctrl_cb = qsv_h264_set_encode_ctrl;
     return ff_qsv_enc_init(avctx, &q->qsv);
 }
 
@@ -65,10 +130,37 @@ static av_cold int qsv_enc_close(AVCodecContext *avctx)
 #define OFFSET(x) offsetof(QSVH264EncContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "async_depth", "Maximum processing parallelism", OFFSET(qsv.async_depth), AV_OPT_TYPE_INT, { .i64 = ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, VE },
+    QSV_COMMON_OPTS
+
     { "idr_interval", "Distance (in I-frames) between IDR frames", OFFSET(qsv.idr_interval), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
-    { "avbr_accuracy",    "Accuracy of the AVBR ratecontrol",    OFFSET(qsv.avbr_accuracy),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
-    { "avbr_convergence", "Convergence of the AVBR ratecontrol", OFFSET(qsv.avbr_convergence), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+    { "pic_timing_sei",    "Insert picture timing SEI with pic_struct_syntax element", OFFSET(qsv.pic_timing_sei), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
+    { "single_sei_nal_unit",    "Put all the SEI messages into one NALU",        OFFSET(qsv.single_sei_nal_unit),     AV_OPT_TYPE_INT, { .i64 = -1 }, -1,          1, VE },
+    { "max_dec_frame_buffering", "Maximum number of frames buffered in the DPB", OFFSET(qsv.max_dec_frame_buffering), AV_OPT_TYPE_INT, { .i64 = 0 },   0, UINT16_MAX, VE },
+
+#if QSV_HAVE_LA
+    { "look_ahead",       "Use VBR algorithm with look ahead",    OFFSET(qsv.look_ahead),       AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
+    { "look_ahead_depth", "Depth of look ahead in number frames", OFFSET(qsv.look_ahead_depth), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 100, VE },
+#endif
+
+#if QSV_VERSION_ATLEAST(1,8)
+    { "look_ahead_downsampling", NULL, OFFSET(qsv.look_ahead_downsampling), AV_OPT_TYPE_INT, { .i64 = MFX_LOOKAHEAD_DS_UNKNOWN }, MFX_LOOKAHEAD_DS_UNKNOWN, MFX_LOOKAHEAD_DS_2x, VE, "look_ahead_downsampling" },
+    { "unknown"                , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_LOOKAHEAD_DS_UNKNOWN }, INT_MIN, INT_MAX,     VE, "look_ahead_downsampling" },
+    { "off"                    , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_LOOKAHEAD_DS_OFF     }, INT_MIN, INT_MAX,     VE, "look_ahead_downsampling" },
+    { "2x"                     , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_LOOKAHEAD_DS_2x      }, INT_MIN, INT_MAX,     VE, "look_ahead_downsampling" },
+#endif
+
+    { "int_ref_type", "Intra refresh type",                                      OFFSET(qsv.int_ref_type),            AV_OPT_TYPE_INT, { .i64 = -1 }, -1, UINT16_MAX, VE, "int_ref_type" },
+        { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, .flags = VE, "int_ref_type" },
+        { "vertical", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, .flags = VE, "int_ref_type" },
+    { "int_ref_cycle_size", "Number of frames in the intra refresh cycle",       OFFSET(qsv.int_ref_cycle_size),      AV_OPT_TYPE_INT, { .i64 = -1 },               -1, UINT16_MAX, VE },
+    { "int_ref_qp_delta",   "QP difference for the refresh MBs",                 OFFSET(qsv.int_ref_qp_delta),        AV_OPT_TYPE_INT, { .i64 = INT16_MIN }, INT16_MIN,  INT16_MAX, VE },
+    { "recovery_point_sei", "Insert recovery point SEI messages",                OFFSET(qsv.recovery_point_sei),      AV_OPT_TYPE_INT, { .i64 = -1 },               -1,          1, VE },
+
+    { "trellis",             "Trellis quantization",                             OFFSET(qsv.trellis),                 AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, UINT_MAX, VE, "trellis" },
+        { "off", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TRELLIS_OFF }, .flags = VE, "trellis" },
+        { "I",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TRELLIS_I },   .flags = VE, "trellis" },
+        { "P",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TRELLIS_P },   .flags = VE, "trellis" },
+        { "B",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TRELLIS_B },   .flags = VE, "trellis" },
 
     { "profile", NULL, OFFSET(qsv.profile), AV_OPT_TYPE_INT, { .i64 = MFX_PROFILE_UNKNOWN }, 0, INT_MAX, VE, "profile" },
     { "unknown" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_UNKNOWN      }, INT_MIN, INT_MAX,     VE, "profile" },
@@ -76,11 +168,7 @@ static const AVOption options[] = {
     { "main"    , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_AVC_MAIN     }, INT_MIN, INT_MAX,     VE, "profile" },
     { "high"    , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_AVC_HIGH     }, INT_MIN, INT_MAX,     VE, "profile" },
 
-    { "preset", NULL, OFFSET(qsv.preset), AV_OPT_TYPE_INT, { .i64 = MFX_TARGETUSAGE_BALANCED }, 0, 7,   VE, "preset" },
-    { "fast",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_BEST_SPEED  },   INT_MIN, INT_MAX, VE, "preset" },
-    { "medium", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_BALANCED  },     INT_MIN, INT_MAX, VE, "preset" },
-    { "slow",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_TARGETUSAGE_BEST_QUALITY  }, INT_MIN, INT_MAX, VE, "preset" },
-
+    { "a53cc" , "Use A53 Closed Captions (if available)", OFFSET(qsv.a53_cc), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, VE},
     { NULL },
 };
 
@@ -100,6 +188,9 @@ static const AVCodecDefault qsv_enc_defaults[] = {
     { "coder",     "ac"    },
 
     { "flags",     "+cgop" },
+#if FF_API_PRIVATE_OPT
+    { "b_strategy", "-1"   },
+#endif
     { NULL },
 };
 
@@ -112,10 +203,11 @@ AVCodec ff_h264_qsv_encoder = {
     .init           = qsv_enc_init,
     .encode2        = qsv_enc_frame,
     .close          = qsv_enc_close,
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
                                                     AV_PIX_FMT_QSV,
                                                     AV_PIX_FMT_NONE },
     .priv_class     = &class,
     .defaults       = qsv_enc_defaults,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/qsvenc_hevc.c b/libavcodec/qsvenc_hevc.c
new file mode 100644
index 00000000..30fde72d
--- /dev/null
+++ b/libavcodec/qsvenc_hevc.c
@@ -0,0 +1,269 @@
+/*
+ * Intel MediaSDK QSV based HEVC encoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <mfx/mfxvideo.h>
+
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "get_bits.h"
+#include "hevc.h"
+#include "internal.h"
+#include "qsv.h"
+#include "qsv_internal.h"
+#include "qsvenc.h"
+
+enum LoadPlugin {
+    LOAD_PLUGIN_NONE,
+    LOAD_PLUGIN_HEVC_SW,
+    LOAD_PLUGIN_HEVC_HW,
+};
+
+typedef struct QSVHEVCEncContext {
+    AVClass *class;
+    QSVEncContext qsv;
+    int load_plugin;
+} QSVHEVCEncContext;
+
+static int generate_fake_vps(QSVEncContext *q, AVCodecContext *avctx)
+{
+    GetByteContext gbc;
+    PutByteContext pbc;
+
+    GetBitContext gb;
+    HEVCNAL sps_nal = { NULL };
+    HEVCSPS sps = { 0 };
+    HEVCVPS vps = { 0 };
+    uint8_t vps_buf[128], vps_rbsp_buf[128];
+    uint8_t *new_extradata;
+    unsigned int sps_id;
+    int ret, i, type, vps_size;
+
+    if (!avctx->extradata_size) {
+        av_log(avctx, AV_LOG_ERROR, "No extradata returned from libmfx\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    /* parse the SPS */
+    ret = ff_hevc_extract_rbsp(NULL, avctx->extradata + 4, avctx->extradata_size - 4, &sps_nal);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error unescaping the SPS buffer\n");
+        return ret;
+    }
+
+    ret = init_get_bits8(&gb, sps_nal.data, sps_nal.size);
+    if (ret < 0) {
+        av_freep(&sps_nal.rbsp_buffer);
+        return ret;
+    }
+
+    get_bits(&gb, 1);
+    type = get_bits(&gb, 6);
+    if (type != NAL_SPS) {
+        av_log(avctx, AV_LOG_ERROR, "Unexpected NAL type in the extradata: %d\n",
+               type);
+        av_freep(&sps_nal.rbsp_buffer);
+        return AVERROR_INVALIDDATA;
+    }
+    get_bits(&gb, 9);
+
+    ret = ff_hevc_parse_sps(&sps, &gb, &sps_id, 0, NULL, avctx);
+    av_freep(&sps_nal.rbsp_buffer);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error parsing the SPS\n");
+        return ret;
+    }
+
+    /* generate the VPS */
+    vps.vps_max_layers     = 1;
+    vps.vps_max_sub_layers = sps.max_sub_layers;
+    memcpy(&vps.ptl, &sps.ptl, sizeof(vps.ptl));
+    vps.vps_sub_layer_ordering_info_present_flag = 1;
+    for (i = 0; i < MAX_SUB_LAYERS; i++) {
+        vps.vps_max_dec_pic_buffering[i] = sps.temporal_layer[i].max_dec_pic_buffering;
+        vps.vps_num_reorder_pics[i]      = sps.temporal_layer[i].num_reorder_pics;
+        vps.vps_max_latency_increase[i]  = sps.temporal_layer[i].max_latency_increase;
+    }
+
+    vps.vps_num_layer_sets                  = 1;
+    vps.vps_timing_info_present_flag        = sps.vui.vui_timing_info_present_flag;
+    vps.vps_num_units_in_tick               = sps.vui.vui_num_units_in_tick;
+    vps.vps_time_scale                      = sps.vui.vui_time_scale;
+    vps.vps_poc_proportional_to_timing_flag = sps.vui.vui_poc_proportional_to_timing_flag;
+    vps.vps_num_ticks_poc_diff_one          = sps.vui.vui_num_ticks_poc_diff_one_minus1 + 1;
+
+    /* generate the encoded RBSP form of the VPS */
+    ret = ff_hevc_encode_nal_vps(&vps, sps.vps_id, vps_rbsp_buf, sizeof(vps_rbsp_buf));
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error writing the VPS\n");
+        return ret;
+    }
+
+    /* escape and add the startcode */
+    bytestream2_init(&gbc, vps_rbsp_buf, ret);
+    bytestream2_init_writer(&pbc, vps_buf, sizeof(vps_buf));
+
+    bytestream2_put_be32(&pbc, 1);              // startcode
+    bytestream2_put_byte(&pbc, NAL_VPS << 1);   // NAL
+    bytestream2_put_byte(&pbc, 1);              // header
+
+    while (bytestream2_get_bytes_left(&gbc)) {
+        uint32_t b = bytestream2_peek_be24(&gbc);
+        if (b <= 3) {
+            bytestream2_put_be24(&pbc, 3);
+            bytestream2_skip(&gbc, 2);
+        } else
+            bytestream2_put_byte(&pbc, bytestream2_get_byte(&gbc));
+    }
+
+    vps_size = bytestream2_tell_p(&pbc);
+    new_extradata = av_mallocz(vps_size + avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!new_extradata)
+        return AVERROR(ENOMEM);
+    memcpy(new_extradata, vps_buf, vps_size);
+    memcpy(new_extradata + vps_size, avctx->extradata, avctx->extradata_size);
+
+    av_freep(&avctx->extradata);
+    avctx->extradata       = new_extradata;
+    avctx->extradata_size += vps_size;
+
+    return 0;
+}
+
+static av_cold int qsv_enc_init(AVCodecContext *avctx)
+{
+    QSVHEVCEncContext *q = avctx->priv_data;
+    int ret;
+
+    if (q->load_plugin != LOAD_PLUGIN_NONE) {
+        static const char *uid_hevcenc_sw = "2fca99749fdb49aeb121a5b63ef568f7";
+        static const char *uid_hevcenc_hw = "6fadc791a0c2eb479ab6dcd5ea9da347";
+
+        if (q->qsv.load_plugins[0]) {
+            av_log(avctx, AV_LOG_WARNING,
+                   "load_plugins is not empty, but load_plugin is not set to 'none'."
+                   "The load_plugin value will be ignored.\n");
+        } else {
+            av_freep(&q->qsv.load_plugins);
+
+            if (q->load_plugin == LOAD_PLUGIN_HEVC_SW)
+                q->qsv.load_plugins = av_strdup(uid_hevcenc_sw);
+            else
+                q->qsv.load_plugins = av_strdup(uid_hevcenc_hw);
+
+            if (!q->qsv.load_plugins)
+                return AVERROR(ENOMEM);
+        }
+    }
+
+    ret = ff_qsv_enc_init(avctx, &q->qsv);
+    if (ret < 0)
+        return ret;
+
+    ret = generate_fake_vps(&q->qsv, avctx);
+    if (ret < 0) {
+        ff_qsv_enc_close(avctx, &q->qsv);
+        return ret;
+    }
+
+    return 0;
+}
+
+static int qsv_enc_frame(AVCodecContext *avctx, AVPacket *pkt,
+                         const AVFrame *frame, int *got_packet)
+{
+    QSVHEVCEncContext *q = avctx->priv_data;
+
+    return ff_qsv_encode(avctx, &q->qsv, pkt, frame, got_packet);
+}
+
+static av_cold int qsv_enc_close(AVCodecContext *avctx)
+{
+    QSVHEVCEncContext *q = avctx->priv_data;
+
+    return ff_qsv_enc_close(avctx, &q->qsv);
+}
+
+#define OFFSET(x) offsetof(QSVHEVCEncContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    QSV_COMMON_OPTS
+
+    { "load_plugin", "A user plugin to load in an internal session", OFFSET(load_plugin), AV_OPT_TYPE_INT, { .i64 = LOAD_PLUGIN_HEVC_SW }, LOAD_PLUGIN_NONE, LOAD_PLUGIN_HEVC_HW, VE, "load_plugin" },
+    { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_NONE },    0, 0, VE, "load_plugin" },
+    { "hevc_sw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_SW }, 0, 0, VE, "load_plugin" },
+    { "hevc_hw",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = LOAD_PLUGIN_HEVC_HW }, 0, 0, VE, "load_plugin" },
+
+    { "load_plugins", "A :-separate list of hexadecimal plugin UIDs to load in an internal session",
+        OFFSET(qsv.load_plugins), AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VE },
+
+    { "profile", NULL, OFFSET(qsv.profile), AV_OPT_TYPE_INT, { .i64 = MFX_PROFILE_UNKNOWN }, 0, INT_MAX, VE, "profile" },
+    { "unknown", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_UNKNOWN      }, INT_MIN, INT_MAX,     VE, "profile" },
+    { "main",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_HEVC_MAIN    }, INT_MIN, INT_MAX,     VE, "profile" },
+    { "main10",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_HEVC_MAIN10  }, INT_MIN, INT_MAX,     VE, "profile" },
+    { "mainsp",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_HEVC_MAINSP  }, INT_MIN, INT_MAX,     VE, "profile" },
+
+    { NULL },
+};
+
+static const AVClass class = {
+    .class_name = "hevc_qsv encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVCodecDefault qsv_enc_defaults[] = {
+    { "b",         "1M"    },
+    { "refs",      "0"     },
+    // same as the x264 default
+    { "g",         "248"   },
+    { "bf",        "8"     },
+
+    { "flags",     "+cgop" },
+#if FF_API_PRIVATE_OPT
+    { "b_strategy", "-1"   },
+#endif
+    { NULL },
+};
+
+AVCodec ff_hevc_qsv_encoder = {
+    .name           = "hevc_qsv",
+    .long_name      = NULL_IF_CONFIG_SMALL("HEVC (Intel Quick Sync Video acceleration)"),
+    .priv_data_size = sizeof(QSVHEVCEncContext),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_HEVC,
+    .init           = qsv_enc_init,
+    .encode2        = qsv_enc_frame,
+    .close          = qsv_enc_close,
+    .capabilities   = AV_CODEC_CAP_DELAY,
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
+                                                    AV_PIX_FMT_QSV,
+                                                    AV_PIX_FMT_NONE },
+    .priv_class     = &class,
+    .defaults       = qsv_enc_defaults,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/qsvenc_mpeg2.c b/libavcodec/qsvenc_mpeg2.c
new file mode 100644
index 00000000..5b583fb4
--- /dev/null
+++ b/libavcodec/qsvenc_mpeg2.c
@@ -0,0 +1,114 @@
+/*
+ * Intel MediaSDK QSV based MPEG-2 encoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <mfx/mfxvideo.h>
+
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "avcodec.h"
+#include "internal.h"
+#include "qsv.h"
+#include "qsv_internal.h"
+#include "qsvenc.h"
+
+typedef struct QSVMpeg2EncContext {
+    AVClass *class;
+    QSVEncContext qsv;
+} QSVMpeg2EncContext;
+
+static av_cold int qsv_enc_init(AVCodecContext *avctx)
+{
+    QSVMpeg2EncContext *q = avctx->priv_data;
+
+    return ff_qsv_enc_init(avctx, &q->qsv);
+}
+
+static int qsv_enc_frame(AVCodecContext *avctx, AVPacket *pkt,
+                         const AVFrame *frame, int *got_packet)
+{
+    QSVMpeg2EncContext *q = avctx->priv_data;
+
+    return ff_qsv_encode(avctx, &q->qsv, pkt, frame, got_packet);
+}
+
+static av_cold int qsv_enc_close(AVCodecContext *avctx)
+{
+    QSVMpeg2EncContext *q = avctx->priv_data;
+
+    return ff_qsv_enc_close(avctx, &q->qsv);
+}
+
+#define OFFSET(x) offsetof(QSVMpeg2EncContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    QSV_COMMON_OPTS
+
+    { "profile", NULL, OFFSET(qsv.profile), AV_OPT_TYPE_INT, { .i64 = MFX_PROFILE_UNKNOWN }, 0, INT_MAX, VE, "profile" },
+    { "unknown", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_UNKNOWN        }, INT_MIN, INT_MAX,     VE, "profile" },
+    { "simple",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_MPEG2_SIMPLE   }, INT_MIN, INT_MAX,     VE, "profile" },
+    { "main",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_MPEG2_MAIN     }, INT_MIN, INT_MAX,     VE, "profile" },
+    { "high",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = MFX_PROFILE_MPEG2_HIGH     }, INT_MIN, INT_MAX,     VE, "profile" },
+
+    { NULL },
+};
+
+static const AVClass class = {
+    .class_name = "mpeg2_qsv encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVCodecDefault qsv_enc_defaults[] = {
+    { "b",         "1M"    },
+    { "refs",      "0"     },
+    // same as the x264 default
+    { "g",         "250"   },
+    { "bf",        "3"     },
+
+    { "flags",     "+cgop" },
+#if FF_API_PRIVATE_OPT
+    { "b_strategy", "-1"   },
+#endif
+    { NULL },
+};
+
+AVCodec ff_mpeg2_qsv_encoder = {
+    .name           = "mpeg2_qsv",
+    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-2 video (Intel Quick Sync Video acceleration)"),
+    .priv_data_size = sizeof(QSVMpeg2EncContext),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG2VIDEO,
+    .init           = qsv_enc_init,
+    .encode2        = qsv_enc_frame,
+    .close          = qsv_enc_close,
+    .capabilities   = AV_CODEC_CAP_DELAY,
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_NV12,
+                                                    AV_PIX_FMT_QSV,
+                                                    AV_PIX_FMT_NONE },
+    .priv_class     = &class,
+    .defaults       = qsv_enc_defaults,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
index b3676437..3f482f44 100644
--- a/libavcodec/qtrle.c
+++ b/libavcodec/qtrle.c
@@ -83,9 +83,9 @@ static void qtrle_decode_1bpp(QtrleContext *s, int row_ptr, int lines_to_change)
         if(skip & 0x80) {
             lines_to_change--;
             row_ptr += row_inc;
-            pixel_ptr = row_ptr + 2 * (skip & 0x7f);
+            pixel_ptr = row_ptr + 2 * 8 * (skip & 0x7f);
         } else
-            pixel_ptr += 2 * skip;
+            pixel_ptr += 2 * 8 * skip;
         CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
 
         if(rle_code == -1)
@@ -99,19 +99,42 @@ static void qtrle_decode_1bpp(QtrleContext *s, int row_ptr, int lines_to_change)
 
             pi0 = bytestream2_get_byte(&s->g);
             pi1 = bytestream2_get_byte(&s->g);
-            CHECK_PIXEL_PTR(rle_code * 2);
+            CHECK_PIXEL_PTR(rle_code * 2 * 8);
 
             while (rle_code--) {
-                rgb[pixel_ptr++] = pi0;
-                rgb[pixel_ptr++] = pi1;
+                rgb[pixel_ptr++] = (pi0 >> 7) & 0x01;
+                rgb[pixel_ptr++] = (pi0 >> 6) & 0x01;
+                rgb[pixel_ptr++] = (pi0 >> 5) & 0x01;
+                rgb[pixel_ptr++] = (pi0 >> 4) & 0x01;
+                rgb[pixel_ptr++] = (pi0 >> 3) & 0x01;
+                rgb[pixel_ptr++] = (pi0 >> 2) & 0x01;
+                rgb[pixel_ptr++] = (pi0 >> 1) & 0x01;
+                rgb[pixel_ptr++] =  pi0       & 0x01;
+                rgb[pixel_ptr++] = (pi1 >> 7) & 0x01;
+                rgb[pixel_ptr++] = (pi1 >> 6) & 0x01;
+                rgb[pixel_ptr++] = (pi1 >> 5) & 0x01;
+                rgb[pixel_ptr++] = (pi1 >> 4) & 0x01;
+                rgb[pixel_ptr++] = (pi1 >> 3) & 0x01;
+                rgb[pixel_ptr++] = (pi1 >> 2) & 0x01;
+                rgb[pixel_ptr++] = (pi1 >> 1) & 0x01;
+                rgb[pixel_ptr++] =  pi1       & 0x01;
             }
         } else {
             /* copy the same pixel directly to output 2 times */
             rle_code *= 2;
-            CHECK_PIXEL_PTR(rle_code);
+            CHECK_PIXEL_PTR(rle_code * 8);
 
-            bytestream2_get_buffer(&s->g, &rgb[pixel_ptr], rle_code);
-            pixel_ptr += rle_code;
+            while (rle_code--) {
+                int x = bytestream2_get_byte(&s->g);
+                rgb[pixel_ptr++] = (x >> 7) & 0x01;
+                rgb[pixel_ptr++] = (x >> 6) & 0x01;
+                rgb[pixel_ptr++] = (x >> 5) & 0x01;
+                rgb[pixel_ptr++] = (x >> 4) & 0x01;
+                rgb[pixel_ptr++] = (x >> 3) & 0x01;
+                rgb[pixel_ptr++] = (x >> 2) & 0x01;
+                rgb[pixel_ptr++] = (x >> 1) & 0x01;
+                rgb[pixel_ptr++] =  x       & 0x01;
+            }
         }
     }
 }
@@ -364,13 +387,10 @@ static av_cold int qtrle_decode_init(AVCodecContext *avctx)
     s->avctx = avctx;
     switch (avctx->bits_per_coded_sample) {
     case 1:
-    case 33:
-        avctx->pix_fmt = AV_PIX_FMT_MONOWHITE;
-        break;
-
     case 2:
     case 4:
     case 8:
+    case 33:
     case 34:
     case 36:
     case 40:
@@ -446,6 +466,7 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
     case 1:
     case 33:
         qtrle_decode_1bpp(s, row_ptr, height);
+        has_palette = 1;
         break;
 
     case 2:
@@ -523,5 +544,5 @@ AVCodec ff_qtrle_decoder = {
     .init           = qtrle_decode_init,
     .close          = qtrle_decode_end,
     .decode         = qtrle_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/qtrleenc.c b/libavcodec/qtrleenc.c
index d7231884..a4ed85aa 100644
--- a/libavcodec/qtrleenc.c
+++ b/libavcodec/qtrleenc.c
@@ -37,7 +37,7 @@
 typedef struct QtrleEncContext {
     AVCodecContext *avctx;
     int pixel_size;
-    AVPicture previous_frame;
+    AVFrame *previous_frame;
     unsigned int max_buf_size;
     int logical_width;
     /**
@@ -58,15 +58,16 @@ typedef struct QtrleEncContext {
      * Will contain at ith position the number of consecutive pixels equal to the previous
      * frame starting from pixel i */
     uint8_t* skip_table;
+
+    /** Encoded frame is a key frame */
+    int key_frame;
 } QtrleEncContext;
 
 static av_cold int qtrle_encode_end(AVCodecContext *avctx)
 {
     QtrleEncContext *s = avctx->priv_data;
 
-    av_frame_free(&avctx->coded_frame);
-
-    avpicture_free(&s->previous_frame);
+    av_frame_free(&s->previous_frame);
     av_free(s->rlecode_table);
     av_free(s->length_table);
     av_free(s->skip_table);
@@ -76,7 +77,6 @@ static av_cold int qtrle_encode_end(AVCodecContext *avctx)
 static av_cold int qtrle_encode_init(AVCodecContext *avctx)
 {
     QtrleEncContext *s = avctx->priv_data;
-    int ret;
 
     if (av_image_check_size(avctx->width, avctx->height, 0, avctx) < 0) {
         return AVERROR(EINVAL);
@@ -115,9 +115,10 @@ static av_cold int qtrle_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "Error allocating memory.\n");
         return AVERROR(ENOMEM);
     }
-    if ((ret = avpicture_alloc(&s->previous_frame, avctx->pix_fmt, avctx->width, avctx->height)) < 0) {
+    s->previous_frame = av_frame_alloc();
+    if (!s->previous_frame) {
         av_log(avctx, AV_LOG_ERROR, "Error allocating picture\n");
-        return ret;
+        return AVERROR(ENOMEM);
     }
 
     s->max_buf_size = s->logical_width*s->avctx->height*s->pixel_size*2 /* image base material */
@@ -125,12 +126,6 @@ static av_cold int qtrle_encode_init(AVCodecContext *avctx)
                       + s->avctx->height*2                            /* skip code+rle end */
                       + s->logical_width/MAX_RLE_BULK + 1             /* rle codes */;
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame) {
-        qtrle_encode_end(avctx);
-        return AVERROR(ENOMEM);
-    }
-
     return 0;
 }
 
@@ -162,7 +157,7 @@ static void qtrle_encode_line(QtrleEncContext *s, const AVFrame *p, int line, ui
 
     uint8_t *this_line = p->               data[0] + line*p->               linesize[0] +
         (width - 1)*s->pixel_size;
-    uint8_t *prev_line = s->previous_frame.data[0] + line*s->previous_frame.linesize[0] +
+    uint8_t *prev_line = s->previous_frame->data[0] + line * s->previous_frame->linesize[0] +
         (width - 1)*s->pixel_size;
 
     s->length_table[width] = 0;
@@ -219,7 +214,7 @@ static void qtrle_encode_line(QtrleEncContext *s, const AVFrame *p, int line, ui
             }
         }
 
-        if (!s->avctx->coded_frame->key_frame && !memcmp(this_line, prev_line, s->pixel_size))
+        if (!s->key_frame && !memcmp(this_line, prev_line, s->pixel_size))
             skipcount = FFMIN(skipcount + 1, MAX_RLE_SKIP);
         else
             skipcount = 0;
@@ -330,17 +325,17 @@ static int encode_frame(QtrleEncContext *s, const AVFrame *p, uint8_t *buf)
     int end_line = s->avctx->height;
     uint8_t *orig_buf = buf;
 
-    if (!s->avctx->coded_frame->key_frame) {
+    if (!s->key_frame) {
         unsigned line_size = s->logical_width * s->pixel_size;
         for (start_line = 0; start_line < s->avctx->height; start_line++)
             if (memcmp(p->data[0] + start_line*p->linesize[0],
-                       s->previous_frame.data[0] + start_line*s->previous_frame.linesize[0],
+                       s->previous_frame->data[0] + start_line * s->previous_frame->linesize[0],
                        line_size))
                 break;
 
         for (end_line=s->avctx->height; end_line > start_line; end_line--)
             if (memcmp(p->data[0] + (end_line - 1)*p->linesize[0],
-                       s->previous_frame.data[0] + (end_line - 1)*s->previous_frame.linesize[0],
+                       s->previous_frame->data[0] + (end_line - 1) * s->previous_frame->linesize[0],
                        line_size))
                 break;
     }
@@ -368,29 +363,40 @@ static int qtrle_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                               const AVFrame *pict, int *got_packet)
 {
     QtrleEncContext * const s = avctx->priv_data;
-    AVFrame * const p = avctx->coded_frame;
+    enum AVPictureType pict_type;
     int ret;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, s->max_buf_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, s->max_buf_size, 0)) < 0)
         return ret;
 
     if (avctx->gop_size == 0 || (s->avctx->frame_number % avctx->gop_size) == 0) {
         /* I-Frame */
-        p->pict_type = AV_PICTURE_TYPE_I;
-        p->key_frame = 1;
+        pict_type = AV_PICTURE_TYPE_I;
+        s->key_frame = 1;
     } else {
         /* P-Frame */
-        p->pict_type = AV_PICTURE_TYPE_P;
-        p->key_frame = 0;
+        pict_type = AV_PICTURE_TYPE_P;
+        s->key_frame = 0;
     }
 
     pkt->size = encode_frame(s, pict, pkt->data);
 
     /* save the current frame */
-    av_picture_copy(&s->previous_frame, (const AVPicture *)pict,
-                    avctx->pix_fmt, avctx->width, avctx->height);
+    av_frame_unref(s->previous_frame);
+    ret = av_frame_ref(s->previous_frame, pict);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "cannot add reference\n");
+        return ret;
+    }
+
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    avctx->coded_frame->key_frame = s->key_frame;
+    avctx->coded_frame->pict_type = pict_type;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
-    if (p->key_frame)
+    if (s->key_frame)
         pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
 
diff --git a/libavcodec/r210dec.c b/libavcodec/r210dec.c
index fc9e7e5c..9c868cd1 100644
--- a/libavcodec/r210dec.c
+++ b/libavcodec/r210dec.c
@@ -103,7 +103,7 @@ AVCodec ff_r210_decoder = {
     .id             = AV_CODEC_ID_R210,
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 #if CONFIG_R10K_DECODER
@@ -114,7 +114,7 @@ AVCodec ff_r10k_decoder = {
     .id             = AV_CODEC_ID_R10K,
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
 #if CONFIG_AVRP_DECODER
@@ -125,6 +125,6 @@ AVCodec ff_avrp_decoder = {
     .id             = AV_CODEC_ID_AVRP,
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 #endif
diff --git a/libavcodec/r210enc.c b/libavcodec/r210enc.c
index b7d5a07d..65b3c069 100644
--- a/libavcodec/r210enc.c
+++ b/libavcodec/r210enc.c
@@ -24,16 +24,6 @@
 #include "internal.h"
 #include "bytestream.h"
 
-static av_cold int encode_init(AVCodecContext *avctx)
-{
-    avctx->coded_frame = av_frame_alloc();
-
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
-    return 0;
-}
-
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                         const AVFrame *pic, int *got_packet)
 {
@@ -44,11 +34,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint8_t *src_line;
     uint8_t *dst;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, 4 * aligned_width * avctx->height)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, 4 * aligned_width * avctx->height, 0)) < 0)
         return ret;
 
-    avctx->coded_frame->key_frame = 1;
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     src_line = pic->data[0];
     dst = pkt->data;
 
@@ -78,12 +66,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
-static av_cold int encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-
-    return 0;
-}
 
 #if CONFIG_R210_ENCODER
 AVCodec ff_r210_encoder = {
@@ -91,10 +73,9 @@ AVCodec ff_r210_encoder = {
     .long_name      = NULL_IF_CONFIG_SMALL("Uncompressed RGB 10-bit"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_R210,
-    .init           = encode_init,
     .encode2        = encode_frame,
-    .close          = encode_close,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB48, AV_PIX_FMT_NONE },
+    .capabilities   = AV_CODEC_CAP_INTRA_ONLY,
 };
 #endif
 #if CONFIG_R10K_ENCODER
@@ -103,10 +84,9 @@ AVCodec ff_r10k_encoder = {
     .long_name      = NULL_IF_CONFIG_SMALL("AJA Kona 10-bit RGB Codec"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_R10K,
-    .init           = encode_init,
     .encode2        = encode_frame,
-    .close          = encode_close,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB48, AV_PIX_FMT_NONE },
+    .capabilities   = AV_CODEC_CAP_INTRA_ONLY,
 };
 #endif
 #if CONFIG_AVRP_ENCODER
@@ -115,9 +95,8 @@ AVCodec ff_avrp_encoder = {
     .long_name      = NULL_IF_CONFIG_SMALL("Avid 1:1 10-bit RGB Packer"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_AVRP,
-    .init           = encode_init,
     .encode2        = encode_frame,
-    .close          = encode_close,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB48, AV_PIX_FMT_NONE },
+    .capabilities   = AV_CODEC_CAP_INTRA_ONLY,
 };
 #endif
diff --git a/libavcodec/ra144dec.c b/libavcodec/ra144dec.c
index 29c78229..3eed17c0 100644
--- a/libavcodec/ra144dec.c
+++ b/libavcodec/ra144dec.c
@@ -134,5 +134,5 @@ AVCodec ff_ra_144_decoder = {
     .priv_data_size = sizeof(RA144Context),
     .init           = ra144_decode_init,
     .decode         = ra144_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c
index 3ad3f4ea..32755d22 100644
--- a/libavcodec/ra144enc.c
+++ b/libavcodec/ra144enc.c
@@ -447,7 +447,7 @@ static int ra144_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     if (ractx->last_frame)
         return 0;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, FRAME_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, FRAME_SIZE, 0)) < 0)
         return ret;
 
     /**
@@ -551,7 +551,7 @@ AVCodec ff_ra_144_encoder = {
     .init           = ra144_encode_init,
     .encode2        = ra144_encode_frame,
     .close          = ra144_encode_close,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
     .supported_samplerates = (const int[]){ 8000, 0 },
diff --git a/libavcodec/ra288.c b/libavcodec/ra288.c
index 189d5c51..8f5a7f22 100644
--- a/libavcodec/ra288.c
+++ b/libavcodec/ra288.c
@@ -81,7 +81,7 @@ static av_cold int ra288_decode_init(AVCodecContext *avctx)
         return AVERROR_PATCHWELCOME;
     }
 
-    ractx->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    ractx->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!ractx->fdsp)
         return AVERROR(ENOMEM);
 
@@ -207,14 +207,16 @@ static int ra288_decode_frame(AVCodecContext * avctx, void *data,
         return AVERROR_INVALIDDATA;
     }
 
+    ret = init_get_bits8(&gb, buf, avctx->block_align);
+    if (ret < 0)
+        return ret;
+
     /* get output buffer */
     frame->nb_samples = RA288_BLOCK_SIZE * RA288_BLOCKS_PER_FRAME;
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
     out = (float *)frame->data[0];
 
-    init_get_bits8(&gb, buf, avctx->block_align);
-
     for (i=0; i < RA288_BLOCKS_PER_FRAME; i++) {
         float gain = amptable[get_bits(&gb, 3)];
         int cb_coef = get_bits(&gb, 6 + (i&1));
@@ -247,5 +249,5 @@ AVCodec ff_ra_288_decoder = {
     .init           = ra288_decode_init,
     .decode         = ra288_decode_frame,
     .close          = ra288_decode_close,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/ralf.c b/libavcodec/ralf.c
index 8a319ac8..8cd9f88d 100644
--- a/libavcodec/ralf.c
+++ b/libavcodec/ralf.c
@@ -530,7 +530,7 @@ AVCodec ff_ralf_decoder = {
     .close          = decode_close,
     .decode         = decode_frame,
     .flush          = decode_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/rangecoder.c b/libavcodec/rangecoder.c
index e4c5763e..200217a7 100644
--- a/libavcodec/rangecoder.c
+++ b/libavcodec/rangecoder.c
@@ -35,9 +35,10 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
+#include "libavutil/intreadwrite.h"
+
 #include "avcodec.h"
 #include "rangecoder.h"
-#include "bytestream.h"
 
 av_cold void ff_init_range_encoder(RangeCoder *c, uint8_t *buf, int buf_size)
 {
@@ -56,7 +57,8 @@ av_cold void ff_init_range_decoder(RangeCoder *c, const uint8_t *buf,
     /* cast to avoid compiler warning */
     ff_init_range_encoder(c, (uint8_t *)buf, buf_size);
 
-    c->low = bytestream_get_be16((const uint8_t **)&c->bytestream);
+    c->low = AV_RB16(c->bytestream);
+    c->bytestream += 2;
 }
 
 void ff_build_rac_states(RangeCoder *c, int factor, int max_p)
diff --git a/libavcodec/ratecontrol.c b/libavcodec/ratecontrol.c
index c6a51a24..6c2b4af1 100644
--- a/libavcodec/ratecontrol.c
+++ b/libavcodec/ratecontrol.c
@@ -26,6 +26,8 @@
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+
 #include "avcodec.h"
 #include "internal.h"
 #include "ratecontrol.h"
@@ -33,10 +35,6 @@
 #include "mpegvideo.h"
 #include "libavutil/eval.h"
 
-#ifndef M_E
-#define M_E 2.718281828
-#endif
-
 static int init_pass2(MpegEncContext *s);
 static double get_qscale(MpegEncContext *s, RateControlEntry *rce,
                          double rate_factor, int frame_num);
@@ -144,6 +142,13 @@ av_cold int ff_rate_control_init(MpegEncContext *s)
         return res;
     }
 
+#if FF_API_RC_STRATEGY
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (!s->rc_strategy)
+        s->rc_strategy = s->avctx->rc_strategy;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     for (i = 0; i < 5; i++) {
         rcc->pred[i].coeff = FF_QP2LAMBDA * 7.0;
         rcc->pred[i].count = 1.0;
@@ -161,7 +166,7 @@ av_cold int ff_rate_control_init(MpegEncContext *s)
     if (!rcc->buffer_index)
         rcc->buffer_index = s->avctx->rc_buffer_size * 3 / 4;
 
-    if (s->avctx->flags & CODEC_FLAG_PASS2) {
+    if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
         int i;
         char *p;
 
@@ -228,8 +233,12 @@ av_cold int ff_rate_control_init(MpegEncContext *s)
             return -1;
         }
 
+#if FF_API_RC_STRATEGY
+        av_assert0(MPV_RC_STRATEGY_XVID == FF_RC_STRATEGY_XVID);
+#endif
+
         // FIXME maybe move to end
-        if ((s->avctx->flags & CODEC_FLAG_PASS2) && s->avctx->rc_strategy == FF_RC_STRATEGY_XVID) {
+        if ((s->avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == MPV_RC_STRATEGY_XVID) {
 #if CONFIG_LIBXVID
             return ff_xvid_rate_control_init(s);
 #else
@@ -240,7 +249,7 @@ av_cold int ff_rate_control_init(MpegEncContext *s)
         }
     }
 
-    if (!(s->avctx->flags & CODEC_FLAG_PASS2)) {
+    if (!(s->avctx->flags & AV_CODEC_FLAG_PASS2)) {
         rcc->short_term_qsum   = 0.001;
         rcc->short_term_qcount = 0.001;
 
@@ -309,7 +318,7 @@ av_cold void ff_rate_control_uninit(MpegEncContext *s)
     av_freep(&rcc->entry);
 
 #if CONFIG_LIBXVID
-    if ((s->avctx->flags & CODEC_FLAG_PASS2) && s->avctx->rc_strategy == FF_RC_STRATEGY_XVID)
+    if ((s->avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == MPV_RC_STRATEGY_XVID)
         ff_xvid_rate_control_uninit(s);
 #endif
 }
@@ -762,8 +771,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
     emms_c();
 
 #if CONFIG_LIBXVID
-    if ((s->avctx->flags & CODEC_FLAG_PASS2) &&
-        s->avctx->rc_strategy == FF_RC_STRATEGY_XVID)
+    if ((s->avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == MPV_RC_STRATEGY_XVID)
         return ff_xvid_rate_estimate_qscale(s, dry_run);
 #endif
 
@@ -782,7 +790,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
                          s->frame_bits - s->stuffing_bits);
     }
 
-    if (s->avctx->flags & CODEC_FLAG_PASS2) {
+    if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
         av_assert0(picture_number >= 0);
         if (picture_number >= rcc->num_entries) {
             av_log(s, AV_LOG_ERROR, "Input is longer than 2-pass log file\n");
@@ -816,7 +824,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
     var = pict_type == AV_PICTURE_TYPE_I ? pic->mb_var_sum : pic->mc_mb_var_sum;
 
     short_term_q = 0; /* avoid warning */
-    if (s->avctx->flags & CODEC_FLAG_PASS2) {
+    if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
         if (pict_type != AV_PICTURE_TYPE_I)
             av_assert0(pict_type == rce->new_pict_type);
 
@@ -882,7 +890,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
     if (s->avctx->debug & FF_DEBUG_RC) {
         av_log(s->avctx, AV_LOG_DEBUG,
                "%c qp:%d<%2.1f<%d %d want:%d total:%d comp:%f st_q:%2.2f "
-               "size:%d var:%"PRId64"/%"PRId64" br:%d fps:%d\n",
+               "size:%d var:%"PRId64"/%"PRId64" br:%"PRId64" fps:%d\n",
                av_get_picture_type_char(pict_type),
                qmin, q, qmax, picture_number,
                (int)wanted_bits / 1000, (int)s->total_bits / 1000,
@@ -1049,9 +1057,9 @@ static int init_pass2(MpegEncContext *s)
     }
     av_assert0(toobig <= 40);
     av_log(s->avctx, AV_LOG_DEBUG,
-           "[lavc rc] requested bitrate: %d bps  expected bitrate: %d bps\n",
+           "[lavc rc] requested bitrate: %"PRId64" bps  expected bitrate: %"PRId64" bps\n",
            s->bit_rate,
-           (int)(expected_bits / ((double)all_available_bits / s->bit_rate)));
+           (int64_t)(expected_bits / ((double)all_available_bits / s->bit_rate)));
     av_log(s->avctx, AV_LOG_DEBUG,
            "[lavc rc] estimated target average qp: %.3f\n",
            (float)qscale_sum / rcc->num_entries);
diff --git a/libavcodec/raw.c b/libavcodec/raw.c
index 62ad338b..bfa2537b 100644
--- a/libavcodec/raw.c
+++ b/libavcodec/raw.c
@@ -53,6 +53,7 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_YUYV422, MKTAG('V', '4', '2', '2') },
     { AV_PIX_FMT_YUYV422, MKTAG('V', 'Y', 'U', 'Y') },
     { AV_PIX_FMT_YUYV422, MKTAG('Y', 'U', 'N', 'V') },
+    { AV_PIX_FMT_YUYV422, MKTAG('Y', 'U', 'Y', 'V') },
     { AV_PIX_FMT_YVYU422, MKTAG('Y', 'V', 'Y', 'U') }, /* Philips */
     { AV_PIX_FMT_UYVY422, MKTAG('U', 'Y', 'V', 'Y') },
     { AV_PIX_FMT_UYVY422, MKTAG('H', 'D', 'Y', 'C') },
@@ -119,6 +120,12 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_BGR48BE,  MKTAG( 48, 'B', 'G', 'R') },
     { AV_PIX_FMT_GRAY16LE,    MKTAG('Y', '1',  0 , 16 ) },
     { AV_PIX_FMT_GRAY16BE,    MKTAG(16 ,  0 , '1', 'Y') },
+    { AV_PIX_FMT_YUV420P9LE,  MKTAG('Y', '3', 11 ,  9 ) },
+    { AV_PIX_FMT_YUV420P9BE,  MKTAG( 9 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P9LE,  MKTAG('Y', '3', 10 ,  9 ) },
+    { AV_PIX_FMT_YUV422P9BE,  MKTAG( 9 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P9LE,  MKTAG('Y', '3',  0 ,  9 ) },
+    { AV_PIX_FMT_YUV444P9BE,  MKTAG( 9 ,  0 , '3', 'Y') },
     { AV_PIX_FMT_YUV420P10LE, MKTAG('Y', '3', 11 , 10 ) },
     { AV_PIX_FMT_YUV420P10BE, MKTAG(10 , 11 , '3', 'Y') },
     { AV_PIX_FMT_YUV422P10LE, MKTAG('Y', '3', 10 , 10 ) },
@@ -147,6 +154,7 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_YUVA422P,    MKTAG('Y', '4', 10 ,  8 ) },
     { AV_PIX_FMT_YUVA444P,    MKTAG('Y', '4',  0 ,  8 ) },
     { AV_PIX_FMT_YA8,         MKTAG('Y', '2',  0 ,  8 ) },
+    { AV_PIX_FMT_PAL8,        MKTAG('P', 'A', 'L',  8 ) },
 
     { AV_PIX_FMT_YUVA420P9LE,  MKTAG('Y', '4', 11 ,  9 ) },
     { AV_PIX_FMT_YUVA420P9BE,  MKTAG( 9 , 11 , '4', 'Y') },
@@ -217,6 +225,40 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_GRAY16BE,MKTAG('b', '1', '6', 'g') },
     { AV_PIX_FMT_RGB48BE, MKTAG('b', '4', '8', 'r') },
 
+    /* vlc */
+    { AV_PIX_FMT_YUV410P,     MKTAG('I', '4', '1', '0') },
+    { AV_PIX_FMT_YUV411P,     MKTAG('I', '4', '1', '1') },
+    { AV_PIX_FMT_YUV422P,     MKTAG('I', '4', '2', '2') },
+    { AV_PIX_FMT_YUV440P,     MKTAG('I', '4', '4', '0') },
+    { AV_PIX_FMT_YUV444P,     MKTAG('I', '4', '4', '4') },
+    { AV_PIX_FMT_YUVJ420P,    MKTAG('J', '4', '2', '0') },
+    { AV_PIX_FMT_YUVJ422P,    MKTAG('J', '4', '2', '2') },
+    { AV_PIX_FMT_YUVJ440P,    MKTAG('J', '4', '4', '0') },
+    { AV_PIX_FMT_YUVJ444P,    MKTAG('J', '4', '4', '4') },
+    { AV_PIX_FMT_YUVA444P,    MKTAG('Y', 'U', 'V', 'A') },
+    { AV_PIX_FMT_YUVA420P,    MKTAG('I', '4', '0', 'A') },
+    { AV_PIX_FMT_YUVA422P,    MKTAG('I', '4', '2', 'A') },
+    { AV_PIX_FMT_RGB8,        MKTAG('R', 'G', 'B', '2') },
+    { AV_PIX_FMT_RGB555LE,    MKTAG('R', 'V', '1', '5') },
+    { AV_PIX_FMT_RGB565LE,    MKTAG('R', 'V', '1', '6') },
+    { AV_PIX_FMT_BGR24,       MKTAG('R', 'V', '2', '4') },
+    { AV_PIX_FMT_BGR0,        MKTAG('R', 'V', '3', '2') },
+    { AV_PIX_FMT_RGBA,        MKTAG('A', 'V', '3', '2') },
+    { AV_PIX_FMT_YUV420P9LE,  MKTAG('I', '0', '9', 'L') },
+    { AV_PIX_FMT_YUV420P9BE,  MKTAG('I', '0', '9', 'B') },
+    { AV_PIX_FMT_YUV422P9LE,  MKTAG('I', '2', '9', 'L') },
+    { AV_PIX_FMT_YUV422P9BE,  MKTAG('I', '2', '9', 'B') },
+    { AV_PIX_FMT_YUV444P9LE,  MKTAG('I', '4', '9', 'L') },
+    { AV_PIX_FMT_YUV444P9BE,  MKTAG('I', '4', '9', 'B') },
+    { AV_PIX_FMT_YUV420P10LE, MKTAG('I', '0', 'A', 'L') },
+    { AV_PIX_FMT_YUV420P10BE, MKTAG('I', '0', 'A', 'B') },
+    { AV_PIX_FMT_YUV422P10LE, MKTAG('I', '2', 'A', 'L') },
+    { AV_PIX_FMT_YUV422P10BE, MKTAG('I', '2', 'A', 'B') },
+    { AV_PIX_FMT_YUV444P10LE, MKTAG('I', '4', 'A', 'L') },
+    { AV_PIX_FMT_YUV444P10BE, MKTAG('I', '4', 'A', 'B') },
+    { AV_PIX_FMT_YUV444P16LE, MKTAG('I', '4', 'F', 'L') },
+    { AV_PIX_FMT_YUV444P16BE, MKTAG('I', '4', 'F', 'B') },
+
     /* special */
     { AV_PIX_FMT_RGB565LE,MKTAG( 3 ,  0 ,  0 ,  0 ) }, /* flipped RGB565LE */
     { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */
@@ -241,7 +283,7 @@ unsigned int avcodec_pix_fmt_to_codec_tag(enum AVPixelFormat fmt)
 }
 
 const PixelFormatTag avpriv_pix_fmt_bps_avi[] = {
-    { AV_PIX_FMT_MONOWHITE, 1 },
+    { AV_PIX_FMT_PAL8,    1 },
     { AV_PIX_FMT_PAL8,    2 },
     { AV_PIX_FMT_PAL8,    4 },
     { AV_PIX_FMT_PAL8,    8 },
@@ -254,13 +296,13 @@ const PixelFormatTag avpriv_pix_fmt_bps_avi[] = {
 };
 
 const PixelFormatTag avpriv_pix_fmt_bps_mov[] = {
-    { AV_PIX_FMT_MONOWHITE, 1 },
+    { AV_PIX_FMT_PAL8,      1 },
     { AV_PIX_FMT_PAL8,      2 },
     { AV_PIX_FMT_PAL8,      4 },
     { AV_PIX_FMT_PAL8,      8 },
     { AV_PIX_FMT_RGB555BE, 16 },
     { AV_PIX_FMT_RGB24,    24 },
     { AV_PIX_FMT_ARGB,     32 },
-    { AV_PIX_FMT_MONOWHITE,33 },
+    { AV_PIX_FMT_PAL8,     33 },
     { AV_PIX_FMT_NONE,      0 },
 };
diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index 647dfa9a..287be969 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -41,7 +41,11 @@ typedef struct RawVideoContext {
     AVBufferRef *palette;
     int frame_size;  /* size of the frame in bytes */
     int flip;
-    int is_2_4_bpp; // 2 or 4 bpp raw in avi/mov
+    int is_1_2_4_8_bpp; // 1, 2, 4 and 8 bpp in avi/mov, 1 and 8 bpp in nut
+    int is_mono;
+    int is_pal8;
+    int is_nut_mono;
+    int is_nut_pal8;
     int is_yuv2;
     int is_lt_16bpp; // 16bpp pixfmt and bits_per_coded_sample < 16
     int tff;
@@ -52,7 +56,7 @@ typedef struct RawVideoContext {
 } RawVideoContext;
 
 static const AVOption options[]={
-{"top", "top field first", offsetof(RawVideoContext, tff), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, AV_OPT_FLAG_DECODING_PARAM|AV_OPT_FLAG_VIDEO_PARAM},
+{"top", "top field first", offsetof(RawVideoContext, tff), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AV_OPT_FLAG_DECODING_PARAM|AV_OPT_FLAG_VIDEO_PARAM},
 {NULL}
 };
 
@@ -94,8 +98,11 @@ static av_cold int raw_init_decoder(AVCodecContext *avctx)
             return AVERROR(ENOMEM);
         if (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
             avpriv_set_systematic_pal2((uint32_t*)context->palette->data, avctx->pix_fmt);
-        else
+        else {
             memset(context->palette->data, 0, AVPALETTE_SIZE);
+            if (avctx->bits_per_coded_sample == 1)
+                memset(context->palette->data, 0xff, 4);
+        }
     }
 
     if ((avctx->extradata_size >= 9 &&
@@ -105,17 +112,33 @@ static av_cold int raw_init_decoder(AVCodecContext *avctx)
         avctx->codec_tag == MKTAG('W','R','A','W'))
         context->flip = 1;
 
+    if (avctx->pix_fmt == AV_PIX_FMT_MONOWHITE ||
+        avctx->pix_fmt == AV_PIX_FMT_MONOBLACK)
+        context->is_mono = 1;
+    else if (avctx->pix_fmt == AV_PIX_FMT_PAL8)
+        context->is_pal8 = 1;
+
+    if (avctx->codec_tag == MKTAG('B','1','W','0') ||
+        avctx->codec_tag == MKTAG('B','0','W','1'))
+        context->is_nut_mono = 1;
+    else if (avctx->codec_tag == MKTAG('P','A','L',8))
+        context->is_nut_pal8 = 1;
+
     if (avctx->codec_tag == AV_RL32("yuv2") &&
         avctx->pix_fmt   == AV_PIX_FMT_YUYV422)
         context->is_yuv2 = 1;
 
+    /* Temporary solution until PAL8 is implemented in nut */
+    if (context->is_pal8 && avctx->bits_per_coded_sample == 1)
+        avctx->pix_fmt = AV_PIX_FMT_NONE;
+
     return 0;
 }
 
-static void flip(AVCodecContext *avctx, AVPicture *picture)
+static void flip(AVCodecContext *avctx, AVFrame *frame)
 {
-    picture->data[0]     += picture->linesize[0] * (avctx->height - 1);
-    picture->linesize[0] *= -1;
+    frame->data[0]     += frame->linesize[0] * (avctx->height - 1);
+    frame->linesize[0] *= -1;
 }
 
 /*
@@ -149,33 +172,81 @@ MKSCALE16(scale16le, AV_RL16, AV_WL16)
 static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
                       AVPacket *avpkt)
 {
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+    const AVPixFmtDescriptor *desc;
     RawVideoContext *context       = avctx->priv_data;
     const uint8_t *buf             = avpkt->data;
     int buf_size                   = avpkt->size;
     int linesize_align             = 4;
+    int stride;
     int res, len;
     int need_copy;
 
     AVFrame   *frame   = data;
-    AVPicture *picture = data;
-
-    if ((avctx->bits_per_coded_sample == 4 || avctx->bits_per_coded_sample == 2) &&
-        avctx->pix_fmt == AV_PIX_FMT_PAL8 &&
-       (!avctx->codec_tag || avctx->codec_tag == MKTAG('r','a','w',' '))) {
-        context->is_2_4_bpp = 1;
-        context->frame_size = avpicture_get_size(avctx->pix_fmt,
-                                                 FFALIGN(avctx->width, 16),
-                                                 avctx->height);
+
+    if (avctx->width <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "width is not set\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (avctx->height <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "height is not set\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (context->is_nut_mono)
+        stride = avctx->width / 8 + (avctx->width & 7 ? 1 : 0);
+    else if (context->is_nut_pal8)
+        stride = avctx->width;
+    else
+        stride = avpkt->size / avctx->height;
+
+    av_log(avctx, AV_LOG_DEBUG, "PACKET SIZE: %d, STRIDE: %d\n", avpkt->size, stride);
+
+    if (stride == 0 || avpkt->size < stride * avctx->height) {
+        av_log(avctx, AV_LOG_ERROR, "Packet too small (%d)\n", avpkt->size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* Temporary solution until PAL8 is implemented in nut */
+    if (avctx->pix_fmt == AV_PIX_FMT_NONE &&
+        avctx->bits_per_coded_sample == 1 &&
+        avctx->frame_number == 0 &&
+        context->palette &&
+        AV_RB64(context->palette->data) == 0xFFFFFFFF00000000
+    ) {
+        const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, NULL);
+        if (!pal) {
+            avctx->pix_fmt = AV_PIX_FMT_MONOWHITE;
+            context->is_pal8 = 0;
+            context->is_mono = 1;
+        } else
+            avctx->pix_fmt = AV_PIX_FMT_PAL8;
+    }
+    desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+
+    if ((avctx->bits_per_coded_sample == 8 || avctx->bits_per_coded_sample == 4
+            || avctx->bits_per_coded_sample <= 2) &&
+        (context->is_mono || context->is_pal8) &&
+        (!avctx->codec_tag || avctx->codec_tag == MKTAG('r','a','w',' ') ||
+                context->is_nut_mono || context->is_nut_pal8)) {
+        context->is_1_2_4_8_bpp = 1;
+        if (context->is_mono) {
+            int row_bytes = avctx->width / 8 + (avctx->width & 7 ? 1 : 0);
+            context->frame_size = av_image_get_buffer_size(avctx->pix_fmt,
+                                                           FFALIGN(row_bytes, 16) * 8,
+                                                           avctx->height, 1);
+        } else
+            context->frame_size = av_image_get_buffer_size(avctx->pix_fmt,
+                                                           FFALIGN(avctx->width, 16),
+                                                           avctx->height, 1);
     } else {
         context->is_lt_16bpp = av_get_bits_per_pixel(desc) == 16 && avctx->bits_per_coded_sample && avctx->bits_per_coded_sample < 16;
-        context->frame_size = avpicture_get_size(avctx->pix_fmt, avctx->width,
-                                                 avctx->height);
+        context->frame_size = av_image_get_buffer_size(avctx->pix_fmt, avctx->width,
+                                                       avctx->height, 1);
     }
     if (context->frame_size < 0)
         return context->frame_size;
 
-    need_copy = !avpkt->buf || context->is_2_4_bpp || context->is_yuv2 || context->is_lt_16bpp;
+    need_copy = !avpkt->buf || context->is_1_2_4_8_bpp || context->is_yuv2 || context->is_lt_16bpp;
 
     frame->pict_type        = AV_PICTURE_TYPE_I;
     frame->key_frame        = 1;
@@ -202,27 +273,66 @@ static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
     if (!frame->buf[0])
         return AVERROR(ENOMEM);
 
-    //2bpp and 4bpp raw in avi and mov (yes this is ugly ...)
-    if (context->is_2_4_bpp) {
-        int i;
+    // 1, 2, 4 and 8 bpp in avi/mov, 1 and 8 bpp in nut
+    if (context->is_1_2_4_8_bpp) {
+        int i, j, row_pix = 0;
         uint8_t *dst = frame->buf[0]->data;
-        buf_size = context->frame_size - AVPALETTE_SIZE;
-        if (avctx->bits_per_coded_sample == 4) {
-            for (i = 0; 2 * i + 1 < buf_size && i<avpkt->size; i++) {
-                dst[2 * i + 0] = buf[i] >> 4;
-                dst[2 * i + 1] = buf[i] & 15;
+        buf_size = context->frame_size - (context->is_pal8 ? AVPALETTE_SIZE : 0);
+        if (avctx->bits_per_coded_sample == 8 || context->is_nut_pal8 || context->is_mono) {
+            int pix_per_byte = context->is_mono ? 8 : 1;
+            for (i = 0, j = 0; j < buf_size && i<avpkt->size; i++, j++) {
+                dst[j] = buf[i];
+                row_pix += pix_per_byte;
+                if (row_pix >= avctx->width) {
+                    i += stride - (i % stride) - 1;
+                    j += 16 - (j % 16) - 1;
+                    row_pix = 0;
+                }
+            }
+        } else if (avctx->bits_per_coded_sample == 4) {
+            for (i = 0, j = 0; 2 * j + 1 < buf_size && i<avpkt->size; i++, j++) {
+                dst[2 * j + 0] = buf[i] >> 4;
+                dst[2 * j + 1] = buf[i] & 15;
+                row_pix += 2;
+                if (row_pix >= avctx->width) {
+                    i += stride - (i % stride) - 1;
+                    j += 8 - (j % 8) - 1;
+                    row_pix = 0;
+                }
+            }
+        } else if (avctx->bits_per_coded_sample == 2) {
+            for (i = 0, j = 0; 4 * j + 3 < buf_size && i<avpkt->size; i++, j++) {
+                dst[4 * j + 0] = buf[i] >> 6;
+                dst[4 * j + 1] = buf[i] >> 4 & 3;
+                dst[4 * j + 2] = buf[i] >> 2 & 3;
+                dst[4 * j + 3] = buf[i]      & 3;
+                row_pix += 4;
+                if (row_pix >= avctx->width) {
+                    i += stride - (i % stride) - 1;
+                    j += 4 - (j % 4) - 1;
+                    row_pix = 0;
+                }
             }
-            linesize_align = 8;
         } else {
-            av_assert0(avctx->bits_per_coded_sample == 2);
-            for (i = 0; 4 * i + 3 < buf_size && i<avpkt->size; i++) {
-                dst[4 * i + 0] = buf[i] >> 6;
-                dst[4 * i + 1] = buf[i] >> 4 & 3;
-                dst[4 * i + 2] = buf[i] >> 2 & 3;
-                dst[4 * i + 3] = buf[i]      & 3;
+            av_assert0(avctx->bits_per_coded_sample == 1);
+            for (i = 0, j = 0; 8 * j + 7 < buf_size && i<avpkt->size; i++, j++) {
+                dst[8 * j + 0] = buf[i] >> 7;
+                dst[8 * j + 1] = buf[i] >> 6 & 1;
+                dst[8 * j + 2] = buf[i] >> 5 & 1;
+                dst[8 * j + 3] = buf[i] >> 4 & 1;
+                dst[8 * j + 4] = buf[i] >> 3 & 1;
+                dst[8 * j + 5] = buf[i] >> 2 & 1;
+                dst[8 * j + 6] = buf[i] >> 1 & 1;
+                dst[8 * j + 7] = buf[i]      & 1;
+                row_pix += 8;
+                if (row_pix >= avctx->width) {
+                    i += stride - (i % stride) - 1;
+                    j += 2 - (j % 2) - 1;
+                    row_pix = 0;
+                }
             }
-            linesize_align = 16;
         }
+        linesize_align = 16;
         buf = dst;
     } else if (context->is_lt_16bpp) {
         uint8_t *dst = frame->buf[0]->data;
@@ -258,14 +368,15 @@ static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
         buf += buf_size - context->frame_size;
 
     len = context->frame_size - (avctx->pix_fmt==AV_PIX_FMT_PAL8 ? AVPALETTE_SIZE : 0);
-    if (buf_size < len && (avctx->codec_tag & 0xFFFFFF) != MKTAG('B','I','T', 0)) {
+    if (buf_size < len && ((avctx->codec_tag & 0xFFFFFF) != MKTAG('B','I','T', 0) || !need_copy)) {
         av_log(avctx, AV_LOG_ERROR, "Invalid buffer size, packet size %d < expected frame_size %d\n", buf_size, len);
         av_buffer_unref(&frame->buf[0]);
         return AVERROR(EINVAL);
     }
 
-    if ((res = avpicture_fill(picture, buf, avctx->pix_fmt,
-                              avctx->width, avctx->height)) < 0) {
+    if ((res = av_image_fill_arrays(frame->data, frame->linesize,
+                                    buf, avctx->pix_fmt,
+                                    avctx->width, avctx->height, 1)) < 0) {
         av_buffer_unref(&frame->buf[0]);
         return res;
     }
@@ -292,6 +403,7 @@ static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
         avctx->pix_fmt==AV_PIX_FMT_RGB555BE ||
         avctx->pix_fmt==AV_PIX_FMT_RGB565LE ||
         avctx->pix_fmt==AV_PIX_FMT_MONOWHITE ||
+        avctx->pix_fmt==AV_PIX_FMT_MONOBLACK ||
         avctx->pix_fmt==AV_PIX_FMT_PAL8) &&
         FFALIGN(frame->linesize[0], linesize_align) * avctx->height <= buf_size)
         frame->linesize[0] = FFALIGN(frame->linesize[0], linesize_align);
@@ -320,27 +432,27 @@ static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
         frame->linesize[0] = (frame->linesize[0] + 3) & ~3;
 
     if (context->flip)
-        flip(avctx, picture);
+        flip(avctx, frame);
 
     if (avctx->codec_tag == MKTAG('Y', 'V', '1', '2') ||
         avctx->codec_tag == MKTAG('Y', 'V', '1', '6') ||
         avctx->codec_tag == MKTAG('Y', 'V', '2', '4') ||
         avctx->codec_tag == MKTAG('Y', 'V', 'U', '9'))
-        FFSWAP(uint8_t *, picture->data[1], picture->data[2]);
+        FFSWAP(uint8_t *, frame->data[1], frame->data[2]);
 
     if (avctx->codec_tag == AV_RL32("I420") && (avctx->width+1)*(avctx->height+1) * 3/2 == buf_size) {
-        picture->data[1] = picture->data[1] +  (avctx->width+1)*(avctx->height+1) -avctx->width*avctx->height;
-        picture->data[2] = picture->data[2] + ((avctx->width+1)*(avctx->height+1) -avctx->width*avctx->height)*5/4;
+        frame->data[1] = frame->data[1] +  (avctx->width+1)*(avctx->height+1) -avctx->width*avctx->height;
+        frame->data[2] = frame->data[2] + ((avctx->width+1)*(avctx->height+1) -avctx->width*avctx->height)*5/4;
     }
 
     if (avctx->codec_tag == AV_RL32("yuv2") &&
         avctx->pix_fmt   == AV_PIX_FMT_YUYV422) {
         int x, y;
-        uint8_t *line = picture->data[0];
+        uint8_t *line = frame->data[0];
         for (y = 0; y < avctx->height; y++) {
             for (x = 0; x < avctx->width; x++)
                 line[2 * x + 1] ^= 0x80;
-            line += picture->linesize[0];
+            line += frame->linesize[0];
         }
     }
 
@@ -372,5 +484,5 @@ AVCodec ff_rawvideo_decoder = {
     .close          = raw_close_decoder,
     .decode         = raw_decode,
     .priv_class     = &rawdec_class,
-    .capabilities   = CODEC_CAP_PARAM_CHANGE,
+    .capabilities   = AV_CODEC_CAP_PARAM_CHANGE,
 };
diff --git a/libavcodec/rawenc.c b/libavcodec/rawenc.c
index 71c1de5b..d8370564 100644
--- a/libavcodec/rawenc.c
+++ b/libavcodec/rawenc.c
@@ -29,17 +29,18 @@
 #include "internal.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
 
 static av_cold int raw_encode_init(AVCodecContext *avctx)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
 
-    avctx->coded_frame            = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     avctx->bits_per_coded_sample = av_get_bits_per_pixel(desc);
     if(!avctx->codec_tag)
         avctx->codec_tag = avcodec_pix_fmt_to_codec_tag(avctx->pix_fmt);
@@ -49,21 +50,24 @@ static av_cold int raw_encode_init(AVCodecContext *avctx)
 static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
                       const AVFrame *frame, int *got_packet)
 {
-    int ret = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
+    int ret = av_image_get_buffer_size(frame->format,
+                                       frame->width, frame->height, 1);
 
     if (ret < 0)
         return ret;
 
-    if ((ret = ff_alloc_packet(pkt, ret)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, ret, ret)) < 0)
         return ret;
-    if ((ret = avpicture_layout((const AVPicture *)frame, avctx->pix_fmt, avctx->width,
-                                avctx->height, pkt->data, pkt->size)) < 0)
+    if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size,
+                                       (const uint8_t **)frame->data, frame->linesize,
+                                       frame->format,
+                                       frame->width, frame->height, 1)) < 0)
         return ret;
 
     if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 &&
-       avctx->pix_fmt   == AV_PIX_FMT_YUYV422) {
+       frame->format   == AV_PIX_FMT_YUYV422) {
         int x;
-        for(x = 1; x < avctx->height*avctx->width*2; x += 2)
+        for(x = 1; x < frame->height*frame->width*2; x += 2)
             pkt->data[x] ^= 0x80;
     }
     pkt->flags |= AV_PKT_FLAG_KEY;
@@ -71,18 +75,11 @@ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
-static av_cold int raw_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
-
 AVCodec ff_rawvideo_encoder = {
     .name           = "rawvideo",
     .long_name      = NULL_IF_CONFIG_SMALL("raw video"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_RAWVIDEO,
     .init           = raw_encode_init,
-    .close          = raw_encode_close,
     .encode2        = raw_encode,
 };
diff --git a/libavcodec/remove_extradata_bsf.c b/libavcodec/remove_extradata_bsf.c
index e880b958..6bb3576c 100644
--- a/libavcodec/remove_extradata_bsf.c
+++ b/libavcodec/remove_extradata_bsf.c
@@ -33,7 +33,8 @@ static int remove_extradata(AVBitStreamFilterContext *bsfc, AVCodecContext *avct
     s= bsfc->parser;
 
     if(s && s->parser->split){
-        if(  (((avctx->flags & CODEC_FLAG_GLOBAL_HEADER) || (avctx->flags2 & CODEC_FLAG2_LOCAL_HEADER)) && cmd=='a')
+        if(  (((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) ||
+               (avctx->flags2 & AV_CODEC_FLAG2_LOCAL_HEADER)) && cmd == 'a')
            ||(!keyframe && cmd=='k')
            ||(cmd=='e' || !cmd)
           ){
diff --git a/libavcodec/resample.c b/libavcodec/resample.c
index c45aa16c..4c5eb9f1 100644
--- a/libavcodec/resample.c
+++ b/libavcodec/resample.c
@@ -33,6 +33,7 @@
 #include "libavutil/samplefmt.h"
 
 #if FF_API_AVCODEC_RESAMPLE
+FF_DISABLE_DEPRECATION_WARNINGS
 
 #define MAX_CHANNELS 8
 
@@ -290,12 +291,6 @@ int audio_resample(ReSampleContext *s, short *output, short *input, int nb_sampl
     short *output_bak = NULL;
     int lenout;
 
-    if (s->input_channels == s->output_channels && s->ratio == 1.0 && 0) {
-        /* nothing to do */
-        memcpy(output, input, nb_samples * s->input_channels * sizeof(short));
-        return nb_samples;
-    }
-
     if (s->sample_fmt[0] != AV_SAMPLE_FMT_S16) {
         int istride[1] = { s->sample_size[0] };
         int ostride[1] = { 2 };
@@ -440,4 +435,5 @@ void audio_resample_close(ReSampleContext *s)
     av_free(s);
 }
 
+FF_ENABLE_DEPRECATION_WARNINGS
 #endif
diff --git a/libavcodec/resample2.c b/libavcodec/resample2.c
index cd9fe1ce..56ae9f72 100644
--- a/libavcodec/resample2.c
+++ b/libavcodec/resample2.c
@@ -34,9 +34,9 @@
 #ifndef CONFIG_RESAMPLE_HP
 #define FILTER_SHIFT 15
 
-#define FELEM int16_t
-#define FELEM2 int32_t
-#define FELEML int64_t
+typedef int16_t FELEM;
+typedef int32_t FELEM2;
+typedef int64_t FELEML;
 #define FELEM_MAX INT16_MAX
 #define FELEM_MIN INT16_MIN
 #define WINDOW_TYPE 9
@@ -52,9 +52,9 @@
 #else
 #define FILTER_SHIFT 0
 
-#define FELEM double
-#define FELEM2 double
-#define FELEML double
+typedef double FELEM;
+typedef double FELEM2;
+typedef double FELEML;
 #define WINDOW_TYPE 24
 #endif
 
diff --git a/libavcodec/reverse.c b/libavcodec/reverse.c
new file mode 100644
index 00000000..440badaf
--- /dev/null
+++ b/libavcodec/reverse.c
@@ -0,0 +1 @@
+#include "libavutil/reverse.c"
diff --git a/libavcodec/rl2.c b/libavcodec/rl2.c
index eaf31b63..6662979c 100644
--- a/libavcodec/rl2.c
+++ b/libavcodec/rl2.c
@@ -222,5 +222,5 @@ AVCodec ff_rl2_decoder = {
     .init           = rl2_decode_init,
     .close          = rl2_decode_end,
     .decode         = rl2_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/rle.c b/libavcodec/rle.c
index d2ec68c4..7924ea78 100644
--- a/libavcodec/rle.c
+++ b/libavcodec/rle.c
@@ -22,16 +22,7 @@
 #include "rle.h"
 #include "libavutil/common.h"
 
-/**
- * Count up to 127 consecutive pixels which are either all the same or
- * all differ from the previous and next pixels.
- * @param start Pointer to the first pixel
- * @param len Maximum number of pixels
- * @param bpp Bytes per pixel
- * @param same 1 if searching for identical pixel values.  0 for differing
- * @return Number of matching consecutive pixels found
- */
-static int count_pixels(const uint8_t *start, int len, int bpp, int same)
+int ff_rle_count_pixels(const uint8_t *start, int len, int bpp, int same)
 {
     const uint8_t *pos;
     int count = 1;
@@ -63,14 +54,14 @@ int ff_rle_encode(uint8_t *outbuf, int out_size, const uint8_t *ptr , int bpp, i
 
     for(x = 0; x < w; x += count) {
         /* see if we can encode the next set of pixels with RLE */
-        if((count = count_pixels(ptr, w-x, bpp, 1)) > 1) {
+        if ((count = ff_rle_count_pixels(ptr, w - x, bpp, 1)) > 1) {
             if(out + bpp + 1 > outbuf + out_size) return -1;
             *out++ = (count ^ xor_rep) + add_rep;
             memcpy(out, ptr, bpp);
             out += bpp;
         } else {
             /* fall back on uncompressed */
-            count = count_pixels(ptr, w-x, bpp, 0);
+            count = ff_rle_count_pixels(ptr, w - x, bpp, 0);
             if(out + bpp*count >= outbuf + out_size) return -1;
             *out++ = (count ^ xor_raw) + add_raw;
 
diff --git a/libavcodec/rle.h b/libavcodec/rle.h
index 24851321..cb516249 100644
--- a/libavcodec/rle.h
+++ b/libavcodec/rle.h
@@ -23,6 +23,17 @@
 
 #include <stdint.h>
 
+/**
+ * Count up to 127 consecutive pixels which are either all the same or
+ * all differ from the previous and next pixels.
+ * @param start Pointer to the first pixel
+ * @param len Maximum number of pixels
+ * @param bpp Bytes per pixel
+ * @param same 1 if searching for identical pixel values, 0 for differing
+ * @return Number of matching consecutive pixels found
+ */
+int ff_rle_count_pixels(const uint8_t *start, int len, int bpp, int same);
+
 /**
  * RLE compress the row, with maximum size of out_size. Value before repeated bytes is (count ^ xor_rep) + add_rep.
  *                                                      Value before raw bytes is      (count ^ xor_raw) + add_raw.
diff --git a/libavcodec/roqaudioenc.c b/libavcodec/roqaudioenc.c
index c373ccca..5154604b 100644
--- a/libavcodec/roqaudioenc.c
+++ b/libavcodec/roqaudioenc.c
@@ -160,7 +160,7 @@ static int roq_dpcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     else
         data_size = avctx->channels * avctx->frame_size;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, ROQ_HEADER_SIZE + data_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, ROQ_HEADER_SIZE + data_size, 0)) < 0)
         return ret;
     out = avpkt->data;
 
@@ -198,7 +198,7 @@ AVCodec ff_roq_dpcm_encoder = {
     .init           = roq_dpcm_encode_init,
     .encode2        = roq_dpcm_encode_frame,
     .close          = roq_dpcm_encode_close,
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/roqvideodec.c b/libavcodec/roqvideodec.c
index b716e258..5f0b2047 100644
--- a/libavcodec/roqvideodec.c
+++ b/libavcodec/roqvideodec.c
@@ -26,6 +26,8 @@
  */
 
 #include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
@@ -202,15 +204,17 @@ static int roq_decode_frame(AVCodecContext *avctx,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     RoqContext *s = avctx->priv_data;
-    int copy= !s->current_frame->data[0];
+    int copy = !s->current_frame->data[0] && s->last_frame->data[0];
     int ret;
 
     if ((ret = ff_reget_buffer(avctx, s->current_frame)) < 0)
         return ret;
 
-    if(copy)
-        av_picture_copy((AVPicture*)s->current_frame, (AVPicture*)s->last_frame,
-                        avctx->pix_fmt, avctx->width, avctx->height);
+    if (copy) {
+        ret = av_frame_copy(s->current_frame, s->last_frame);
+        if (ret < 0)
+            return ret;
+    }
 
     bytestream2_init(&s->gb, buf, buf_size);
     roqvideo_decode_frame(s);
@@ -244,5 +248,5 @@ AVCodec ff_roq_decoder = {
     .init           = roq_decode_init,
     .close          = roq_decode_end,
     .decode         = roq_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/roqvideoenc.c b/libavcodec/roqvideoenc.c
index 89879e81..b6ed1f98 100644
--- a/libavcodec/roqvideoenc.c
+++ b/libavcodec/roqvideoenc.c
@@ -960,9 +960,6 @@ static int roq_encode_video(RoqContext *enc)
     reconstruct_and_encode_image(enc, tempData, enc->width, enc->height,
                                  enc->width*enc->height/64);
 
-    av_frame_unref(enc->avctx->coded_frame);
-    av_frame_ref(enc->avctx->coded_frame, enc->current_frame);
-
     /* Rotate frame history */
     FFSWAP(AVFrame *, enc->current_frame, enc->last_frame);
     FFSWAP(motion_vect *, enc->last_motion4, enc->this_motion4);
@@ -982,7 +979,6 @@ static av_cold int roq_encode_end(AVCodecContext *avctx)
 
     av_frame_free(&enc->current_frame);
     av_frame_free(&enc->last_frame);
-    av_frame_free(&enc->avctx->coded_frame);
 
     av_freep(&enc->tmpData);
     av_freep(&enc->this_motion4);
@@ -1023,8 +1019,7 @@ static av_cold int roq_encode_init(AVCodecContext *avctx)
 
     enc->last_frame    = av_frame_alloc();
     enc->current_frame = av_frame_alloc();
-    avctx->coded_frame = av_frame_alloc();
-    if (!enc->last_frame || !enc->current_frame || !avctx->coded_frame) {
+    if (!enc->last_frame || !enc->current_frame) {
         roq_encode_end(avctx);
         return AVERROR(ENOMEM);
     }
@@ -1095,7 +1090,7 @@ static int roq_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     /* 138 bits max per 8x8 block +
      *     256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame header */
     size = ((enc->width * enc->height / 64) * 138 + 7) / 8 + 256 * (6 + 4) + 8;
-    if ((ret = ff_alloc_packet2(avctx, pkt, size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, size, 0)) < 0)
         return ret;
     enc->out_buf = pkt->data;
 
@@ -1132,7 +1127,7 @@ static int roq_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 #define OFFSET(x) offsetof(RoqContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "quake3_compat", "Whether to respect known limitations in Quake 3 decoder", OFFSET(quake3_compat), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
+    { "quake3_compat", "Whether to respect known limitations in Quake 3 decoder", OFFSET(quake3_compat), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, VE },
     { NULL },
 };
 
diff --git a/libavcodec/rpza.c b/libavcodec/rpza.c
index 732b09ac..315ad23d 100644
--- a/libavcodec/rpza.c
+++ b/libavcodec/rpza.c
@@ -52,23 +52,25 @@ typedef struct RpzaContext {
     GetByteContext gb;
 } RpzaContext;
 
-#define ADVANCE_BLOCK() \
-{ \
-    pixel_ptr += 4; \
-    if (pixel_ptr >= width) \
-    { \
-        pixel_ptr = 0; \
-        row_ptr += stride * 4; \
-    } \
-    total_blocks--; \
-    if (total_blocks < 0) \
-    { \
-        av_log(s->avctx, AV_LOG_ERROR, "warning: block counter just went negative (this should not happen)\n"); \
-        return; \
-    } \
-}
+#define CHECK_BLOCK()                                                         \
+    if (total_blocks < 1) {                                                    \
+        av_log(s->avctx, AV_LOG_ERROR,                                         \
+               "Block counter just went negative (this should not happen)\n"); \
+        return AVERROR_INVALIDDATA;                                            \
+    }                                                                          \
+
+#define ADVANCE_BLOCK()             \
+    {                               \
+        pixel_ptr += 4;             \
+        if (pixel_ptr >= width)     \
+        {                           \
+            pixel_ptr = 0;          \
+            row_ptr  += stride * 4; \
+        }                           \
+        total_blocks--;             \
+    }
 
-static void rpza_decode_stream(RpzaContext *s)
+static int rpza_decode_stream(RpzaContext *s)
 {
     int width = s->avctx->width;
     int stride = s->frame->linesize[0] / 2;
@@ -80,7 +82,7 @@ static void rpza_decode_stream(RpzaContext *s)
     uint16_t *pixels = (uint16_t *)s->frame->data[0];
 
     int row_ptr = 0;
-    int pixel_ptr = -4;
+    int pixel_ptr = 0;
     int block_ptr;
     int pixel_x, pixel_y;
     int total_blocks;
@@ -130,7 +132,8 @@ static void rpza_decode_stream(RpzaContext *s)
         /* Skip blocks */
         case 0x80:
             while (n_blocks--) {
-              ADVANCE_BLOCK();
+                CHECK_BLOCK();
+                ADVANCE_BLOCK();
             }
             break;
 
@@ -138,7 +141,7 @@ static void rpza_decode_stream(RpzaContext *s)
         case 0xa0:
             colorA = bytestream2_get_be16(&s->gb);
             while (n_blocks--) {
-                ADVANCE_BLOCK()
+                CHECK_BLOCK();
                 block_ptr = row_ptr + pixel_ptr;
                 for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                     for (pixel_x = 0; pixel_x < 4; pixel_x++){
@@ -147,6 +150,7 @@ static void rpza_decode_stream(RpzaContext *s)
                     }
                     block_ptr += row_inc;
                 }
+                ADVANCE_BLOCK();
             }
             break;
 
@@ -181,9 +185,9 @@ static void rpza_decode_stream(RpzaContext *s)
             color4[2] |= ((21 * ta + 11 * tb) >> 5);
 
             if (bytestream2_get_bytes_left(&s->gb) < n_blocks * 4)
-                return;
+                return AVERROR_INVALIDDATA;
             while (n_blocks--) {
-                ADVANCE_BLOCK();
+                CHECK_BLOCK();
                 block_ptr = row_ptr + pixel_ptr;
                 for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                     uint8_t index = bytestream2_get_byteu(&s->gb);
@@ -194,14 +198,15 @@ static void rpza_decode_stream(RpzaContext *s)
                     }
                     block_ptr += row_inc;
                 }
+                ADVANCE_BLOCK();
             }
             break;
 
         /* Fill block with 16 colors */
         case 0x00:
             if (bytestream2_get_bytes_left(&s->gb) < 30)
-                return;
-            ADVANCE_BLOCK();
+                return AVERROR_INVALIDDATA;
+            CHECK_BLOCK();
             block_ptr = row_ptr + pixel_ptr;
             for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                 for (pixel_x = 0; pixel_x < 4; pixel_x++){
@@ -213,6 +218,7 @@ static void rpza_decode_stream(RpzaContext *s)
                 }
                 block_ptr += row_inc;
             }
+            ADVANCE_BLOCK();
             break;
 
         /* Unknown opcode */
@@ -220,9 +226,11 @@ static void rpza_decode_stream(RpzaContext *s)
             av_log(s->avctx, AV_LOG_ERROR, "Unknown opcode %d in rpza chunk."
                  " Skip remaining %d bytes of chunk data.\n", opcode,
                  bytestream2_get_bytes_left(&s->gb));
-            return;
+            return AVERROR_INVALIDDATA;
         } /* Opcode switch */
     }
+
+    return 0;
 }
 
 static av_cold int rpza_decode_init(AVCodecContext *avctx)
@@ -251,7 +259,9 @@ static int rpza_decode_frame(AVCodecContext *avctx,
     if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
 
-    rpza_decode_stream(s);
+    ret = rpza_decode_stream(s);
+    if (ret < 0)
+        return ret;
 
     if ((ret = av_frame_ref(data, s->frame)) < 0)
         return ret;
@@ -280,5 +290,5 @@ AVCodec ff_rpza_decoder = {
     .init           = rpza_decode_init,
     .close          = rpza_decode_end,
     .decode         = rpza_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/rscc.c b/libavcodec/rscc.c
new file mode 100644
index 00000000..a2f7a0dc
--- /dev/null
+++ b/libavcodec/rscc.c
@@ -0,0 +1,293 @@
+/*
+ * innoHeim/Rsupport Screen Capture Codec
+ * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * innoHeim/Rsupport Screen Capture Codec decoder
+ *
+ * Fourcc: ISCC, RSCC
+ *
+ * Lossless codec, data stored in tiles, with optional deflate compression.
+ *
+ * Header contains the number of tiles in a frame with the tile coordinates,
+ * and it can be deflated or not. Similarly, pixel data comes after the header
+ * and a variable size value, and it can be deflated or just raw.
+ *
+ * Supports: BGRA
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <zlib.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+
+#define TILE_SIZE 8
+
+typedef struct Tile {
+    int x, y;
+    int w, h;
+} Tile;
+
+typedef struct RsccContext {
+    GetByteContext gbc;
+    AVFrame *reference;
+    Tile *tiles;
+    unsigned int tiles_size;
+
+    /* zlib interaction */
+    uint8_t *inflated_buf;
+    uLongf inflated_size;
+} RsccContext;
+
+static av_cold int rscc_init(AVCodecContext *avctx)
+{
+    RsccContext *ctx = avctx->priv_data;
+
+    /* These needs to be set to estimate uncompressed buffer */
+    int ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid image size %dx%d.\n",
+               avctx->width, avctx->height);
+        return ret;
+    }
+
+    /* Allocate reference frame */
+    ctx->reference = av_frame_alloc();
+    if (!ctx->reference)
+        return AVERROR(ENOMEM);
+
+    if (avctx->codec_tag == MKTAG('I','S','C','C')) {
+        avctx->pix_fmt = AV_PIX_FMT_BGRA;
+    } else {
+        avctx->pix_fmt = AV_PIX_FMT_BGR0;
+    }
+
+    /* Store the value to check for keyframes */
+    ctx->inflated_size = avctx->width * avctx->height * 4;
+
+    /* Allocate maximum size possible, a full frame */
+    ctx->inflated_buf = av_malloc(ctx->inflated_size);
+    if (!ctx->inflated_buf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static av_cold int rscc_close(AVCodecContext *avctx)
+{
+    RsccContext *ctx = avctx->priv_data;
+
+    av_freep(&ctx->tiles);
+    av_freep(&ctx->inflated_buf);
+    av_frame_free(&ctx->reference);
+
+    return 0;
+}
+
+static int rscc_decode_frame(AVCodecContext *avctx, void *data,
+                                     int *got_frame, AVPacket *avpkt)
+{
+    RsccContext *ctx = avctx->priv_data;
+    GetByteContext *gbc = &ctx->gbc;
+    GetByteContext tiles_gbc;
+    AVFrame *frame = data;
+    const uint8_t *pixels, *raw;
+    uint8_t *inflated_tiles = NULL;
+    int tiles_nb, packed_size, pixel_size = 0;
+    int i, ret = 0;
+
+    bytestream2_init(gbc, avpkt->data, avpkt->size);
+
+    /* Size check */
+    if (bytestream2_get_bytes_left(gbc) < 12) {
+        av_log(avctx, AV_LOG_ERROR, "Packet too small (%d)\n", avpkt->size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* Read number of tiles, and allocate the array */
+    tiles_nb = bytestream2_get_le16(gbc);
+    av_fast_malloc(&ctx->tiles, &ctx->tiles_size,
+                   tiles_nb * sizeof(*ctx->tiles));
+    if (!ctx->tiles) {
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Frame with %d tiles.\n", tiles_nb);
+
+    /* When there are more than 5 tiles, they are packed together with
+     * a size header. When that size does not match the number of tiles
+     * times the tile size, it means it needs to be inflated as well */
+    if (tiles_nb > 5) {
+        uLongf packed_tiles_size;
+
+        if (tiles_nb < 32)
+            packed_tiles_size = bytestream2_get_byte(gbc);
+        else
+            packed_tiles_size = bytestream2_get_le16(gbc);
+
+        ff_dlog(avctx, "packed tiles of size %lu.\n", packed_tiles_size);
+
+        /* If necessary, uncompress tiles, and hijack the bytestream reader */
+        if (packed_tiles_size != tiles_nb * TILE_SIZE) {
+            uLongf length = tiles_nb * TILE_SIZE;
+            inflated_tiles = av_malloc(length);
+            if (!inflated_tiles) {
+                ret = AVERROR(ENOMEM);
+                goto end;
+            }
+
+            ret = uncompress(inflated_tiles, &length,
+                             gbc->buffer, packed_tiles_size);
+            if (ret) {
+                av_log(avctx, AV_LOG_ERROR, "Tile deflate error %d.\n", ret);
+                ret = AVERROR_UNKNOWN;
+                goto end;
+            }
+
+            /* Skip the compressed tile section in the main byte reader,
+             * and point it to read the newly uncompressed data */
+            bytestream2_skip(gbc, packed_tiles_size);
+            bytestream2_init(&tiles_gbc, inflated_tiles, length);
+            gbc = &tiles_gbc;
+        }
+    }
+
+    /* Fill in array of tiles, keeping track of how many pixels are updated */
+    for (i = 0; i < tiles_nb; i++) {
+        ctx->tiles[i].x = bytestream2_get_le16(gbc);
+        ctx->tiles[i].w = bytestream2_get_le16(gbc);
+        ctx->tiles[i].y = bytestream2_get_le16(gbc);
+        ctx->tiles[i].h = bytestream2_get_le16(gbc);
+
+        pixel_size += ctx->tiles[i].w * ctx->tiles[i].h * 4;
+
+        ff_dlog(avctx, "tile %d orig(%d,%d) %dx%d.\n", i,
+                ctx->tiles[i].x, ctx->tiles[i].y,
+                ctx->tiles[i].w, ctx->tiles[i].h);
+
+        if (ctx->tiles[i].w == 0 || ctx->tiles[i].h == 0) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "invalid tile %d at (%d.%d) with size %dx%d.\n", i,
+                   ctx->tiles[i].x, ctx->tiles[i].y,
+                   ctx->tiles[i].w, ctx->tiles[i].h);
+            ret = AVERROR_INVALIDDATA;
+            goto end;
+        } else if (ctx->tiles[i].x + ctx->tiles[i].w > avctx->width ||
+                   ctx->tiles[i].y + ctx->tiles[i].h > avctx->height) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "out of bounds tile %d at (%d.%d) with size %dx%d.\n", i,
+                   ctx->tiles[i].x, ctx->tiles[i].y,
+                   ctx->tiles[i].w, ctx->tiles[i].h);
+            ret = AVERROR_INVALIDDATA;
+            goto end;
+        }
+    }
+
+    /* Reset the reader in case it had been modified before */
+    gbc = &ctx->gbc;
+
+    /* Extract how much pixel data the tiles contain */
+    if (pixel_size < 0x100)
+        packed_size = bytestream2_get_byte(gbc);
+    else if (pixel_size < 0x10000)
+        packed_size = bytestream2_get_le16(gbc);
+    else if (pixel_size < 0x1000000)
+        packed_size = bytestream2_get_le24(gbc);
+    else
+        packed_size = bytestream2_get_le32(gbc);
+
+    ff_dlog(avctx, "pixel_size %d packed_size %d.\n", pixel_size, packed_size);
+
+    /* Get pixels buffer, it may be deflated or just raw */
+    if (pixel_size == packed_size) {
+        if (bytestream2_get_bytes_left(gbc) < pixel_size) {
+            av_log(avctx, AV_LOG_ERROR, "Insufficient input for %d\n", pixel_size);
+            ret = AVERROR_INVALIDDATA;
+            goto end;
+        }
+        pixels = gbc->buffer;
+    } else {
+        uLongf len = ctx->inflated_size;
+        ret = uncompress(ctx->inflated_buf, &len, gbc->buffer, packed_size);
+        if (ret) {
+            av_log(avctx, AV_LOG_ERROR, "Pixel deflate error %d.\n", ret);
+            ret = AVERROR_UNKNOWN;
+            goto end;
+        }
+        pixels = ctx->inflated_buf;
+    }
+
+    /* Allocate when needed */
+    ret = ff_reget_buffer(avctx, ctx->reference);
+    if (ret < 0)
+        goto end;
+
+    /* Pointer to actual pixels, will be updated when data is consumed */
+    raw = pixels;
+    for (i = 0; i < tiles_nb; i++) {
+        uint8_t *dst = ctx->reference->data[0] + ctx->reference->linesize[0] *
+                       (avctx->height - ctx->tiles[i].y - 1) +
+                       ctx->tiles[i].x * 4;
+        av_image_copy_plane(dst, -1 * ctx->reference->linesize[0],
+                            raw, ctx->tiles[i].w * 4,
+                            ctx->tiles[i].w * 4, ctx->tiles[i].h);
+        raw += ctx->tiles[i].w * 4 * ctx->tiles[i].h;
+    }
+
+    /* Frame is ready to be output */
+    ret = av_frame_ref(frame, ctx->reference);
+    if (ret < 0)
+        goto end;
+
+    /* Keyframe when the number of pixels updated matches the whole surface */
+    if (pixel_size == ctx->inflated_size) {
+        frame->pict_type = AV_PICTURE_TYPE_I;
+        frame->key_frame = 1;
+    } else {
+        frame->pict_type = AV_PICTURE_TYPE_P;
+    }
+    *got_frame = 1;
+
+end:
+    av_free(inflated_tiles);
+    return ret;
+}
+
+AVCodec ff_rscc_decoder = {
+    .name           = "rscc",
+    .long_name      = NULL_IF_CONFIG_SMALL("innoHeim/Rsupport Screen Capture Codec"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_RSCC,
+    .init           = rscc_init,
+    .decode         = rscc_decode_frame,
+    .close          = rscc_close,
+    .priv_data_size = sizeof(RsccContext),
+    .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
+                      FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c
index b53e602d..b56bb588 100644
--- a/libavcodec/rv10.c
+++ b/libavcodec/rv10.c
@@ -32,12 +32,14 @@
 #include "avcodec.h"
 #include "error_resilience.h"
 #include "h263.h"
+#include "h263data.h"
 #include "internal.h"
 #include "mpeg_er.h"
 #include "mpegutils.h"
 #include "mpegvideo.h"
 #include "mpeg4video.h"
 #include "mpegvideodata.h"
+#include "rv10.h"
 
 #define RV_GET_MAJOR_VER(x)  ((x) >> 28)
 #define RV_GET_MINOR_VER(x) (((x) >> 20) & 0xFF)
@@ -795,7 +797,7 @@ AVCodec ff_rv10_decoder = {
     .init           = rv10_decode_init,
     .close          = rv10_decode_end,
     .decode         = rv10_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
@@ -812,7 +814,7 @@ AVCodec ff_rv20_decoder = {
     .init           = rv10_decode_init,
     .close          = rv10_decode_end,
     .decode         = rv10_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .flush          = ff_mpeg_flush,
     .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
diff --git a/libavcodec/vda_internal.h b/libavcodec/rv10.h
similarity index 62%
rename from libavcodec/vda_internal.h
rename to libavcodec/rv10.h
index 457916b0..364270e7 100644
--- a/libavcodec/vda_internal.h
+++ b/libavcodec/rv10.h
@@ -1,4 +1,6 @@
 /*
+ * RV10/RV20 decoder
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -16,18 +18,16 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_VDA_INTERNAL_H
-#define AVCODEC_VDA_INTERNAL_H
+#ifndef AVCODEC_RV10_H
+#define AVCODEC_RV10_H
+
+#include <stdint.h>
 
-#include "vda.h"
+#include "mpegvideo.h"
 
-void ff_vda_output_callback(void *vda_hw_ctx,
-                            CFDictionaryRef user_info,
-                            OSStatus status,
-                            uint32_t infoFlags,
-                            CVImageBufferRef image_buffer);
+int ff_rv_decode_dc(MpegEncContext *s, int n);
 
-int ff_vda_default_init(AVCodecContext *avctx);
-void ff_vda_default_free(AVCodecContext *avctx);
+int ff_rv10_encode_picture_header(MpegEncContext *s, int picture_number);
+void ff_rv20_encode_picture_header(MpegEncContext *s, int picture_number);
 
-#endif /* AVCODEC_VDA_INTERNAL_H */
+#endif /* AVCODEC_RV10_H */
diff --git a/libavcodec/rv10enc.c b/libavcodec/rv10enc.c
index 45f11365..b17acbc9 100644
--- a/libavcodec/rv10enc.c
+++ b/libavcodec/rv10enc.c
@@ -27,6 +27,7 @@
 
 #include "mpegvideo.h"
 #include "put_bits.h"
+#include "rv10.h"
 
 int ff_rv10_encode_picture_header(MpegEncContext *s, int picture_number)
 {
@@ -62,7 +63,12 @@ int ff_rv10_encode_picture_header(MpegEncContext *s, int picture_number)
     return 0;
 }
 
-FF_MPV_GENERIC_CLASS(rv10)
+static const AVClass rv10_class = {
+    .class_name = "rv10 encoder",
+    .item_name  = av_default_item_name,
+    .option     = ff_mpv_generic_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_rv10_encoder = {
     .name           = "rv10",
diff --git a/libavcodec/rv20enc.c b/libavcodec/rv20enc.c
index f9926d80..81fb4fc1 100644
--- a/libavcodec/rv20enc.c
+++ b/libavcodec/rv20enc.c
@@ -28,7 +28,9 @@
 #include "mpegvideo.h"
 #include "mpegvideodata.h"
 #include "h263.h"
+#include "h263data.h"
 #include "put_bits.h"
+#include "rv10.h"
 
 void ff_rv20_encode_picture_header(MpegEncContext *s, int picture_number){
     put_bits(&s->pb, 2, s->pict_type); //I 0 vs. 1 ?
@@ -58,7 +60,12 @@ void ff_rv20_encode_picture_header(MpegEncContext *s, int picture_number){
     }
 }
 
-FF_MPV_GENERIC_CLASS(rv20)
+static const AVClass rv20_class = {
+    .class_name = "rv20 encoder",
+    .item_name  = av_default_item_name,
+    .option     = ff_mpv_generic_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_rv20_encoder = {
     .name           = "rv20",
diff --git a/libavcodec/rv30.c b/libavcodec/rv30.c
index 1483107c..3b9868cd 100644
--- a/libavcodec/rv30.c
+++ b/libavcodec/rv30.c
@@ -67,6 +67,9 @@ static int rv30_parse_slice_header(RV34DecContext *r, GetBitContext *gb, SliceIn
 
         w = r->s.avctx->extradata[6 + rpr*2] << 2;
         h = r->s.avctx->extradata[7 + rpr*2] << 2;
+    } else {
+        w = r->orig_width;
+        h = r->orig_height;
     }
     si->width  = w;
     si->height = h;
@@ -259,6 +262,9 @@ static av_cold int rv30_decode_init(AVCodecContext *avctx)
     RV34DecContext *r = avctx->priv_data;
     int ret;
 
+    r->orig_width  = avctx->coded_width;
+    r->orig_height = avctx->coded_height;
+
     if (avctx->extradata_size < 2) {
         av_log(avctx, AV_LOG_ERROR, "Extradata is too small.\n");
         return AVERROR(EINVAL);
@@ -291,8 +297,8 @@ AVCodec ff_rv30_decoder = {
     .init                  = rv30_decode_init,
     .close                 = ff_rv34_decode_end,
     .decode                = ff_rv34_decode_frame,
-    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
-                             CODEC_CAP_FRAME_THREADS,
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
+                             AV_CODEC_CAP_FRAME_THREADS,
     .flush                 = ff_mpeg_flush,
     .pix_fmts              = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index 51e0f401..c2e84a3b 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -1645,6 +1645,7 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
     int slice_count;
     const uint8_t *slices_hdr = NULL;
     int last = 0;
+    int faulty_b = 0;
 
     /* no supplementary picture */
     if (buf_size == 0) {
@@ -1682,7 +1683,7 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
         si.type == AV_PICTURE_TYPE_B) {
         av_log(avctx, AV_LOG_ERROR, "Invalid decoder state: B-frame without "
                "reference data.\n");
-        return AVERROR_INVALIDDATA;
+        faulty_b = 1;
     }
     if(   (avctx->skip_frame >= AVDISCARD_NONREF && si.type==AV_PICTURE_TYPE_B)
        || (avctx->skip_frame >= AVDISCARD_NONKEY && si.type!=AV_PICTURE_TYPE_I)
@@ -1772,6 +1773,8 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
                "multithreading mode (start MB is %d).\n", si.start);
         return AVERROR_INVALIDDATA;
     }
+    if (faulty_b)
+        return AVERROR_INVALIDDATA;
 
     for(i = 0; i < slice_count; i++){
         int offset = get_slice_offset(avctx, slices_hdr, i);
diff --git a/libavcodec/rv34.h b/libavcodec/rv34.h
index 870164c6..e2f40c8e 100644
--- a/libavcodec/rv34.h
+++ b/libavcodec/rv34.h
@@ -109,6 +109,8 @@ typedef struct RV34DecContext{
     int weight1, weight2;    ///< B frame distance fractions (0.14) used in motion compensation
     int mv_weight1, mv_weight2;
 
+    int orig_width, orig_height;
+
     uint16_t *cbp_luma;      ///< CBP values for luma subblocks
     uint8_t  *cbp_chroma;    ///< CBP values for chroma subblocks
     uint16_t *deblock_coefs; ///< deblock coefficients for each macroblock
diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c
index e9cd1109..3ff1554d 100644
--- a/libavcodec/rv40.c
+++ b/libavcodec/rv40.c
@@ -574,8 +574,8 @@ AVCodec ff_rv40_decoder = {
     .init                  = rv40_decode_init,
     .close                 = ff_rv34_decode_end,
     .decode                = ff_rv34_decode_frame,
-    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
-                             CODEC_CAP_FRAME_THREADS,
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
+                             AV_CODEC_CAP_FRAME_THREADS,
     .flush                 = ff_mpeg_flush,
     .pix_fmts              = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
diff --git a/libavcodec/s302m.c b/libavcodec/s302m.c
index 24130d87..ccfb5913 100644
--- a/libavcodec/s302m.c
+++ b/libavcodec/s302m.c
@@ -203,11 +203,11 @@ static int s302m_decode_frame(AVCodecContext *avctx, void *data,
 
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_DECODING_PARAM
 static const AVOption s302m_options[] = {
-    {"non_pcm_mode", "Chooses what to do with NON-PCM", offsetof(S302Context, non_pcm_mode), FF_OPT_TYPE_INT, {.i64 = 3}, 0, 3, FLAGS, "non_pcm_mode"},
-    {"copy"        , "Pass NON-PCM through unchanged"     , 0, FF_OPT_TYPE_CONST, {.i64 = 0}, 0, 3, FLAGS, "non_pcm_mode"},
-    {"drop"        , "Drop NON-PCM"                       , 0, FF_OPT_TYPE_CONST, {.i64 = 1}, 0, 3, FLAGS, "non_pcm_mode"},
-    {"decode_copy" , "Decode if possible else passthrough", 0, FF_OPT_TYPE_CONST, {.i64 = 2}, 0, 3, FLAGS, "non_pcm_mode"},
-    {"decode_drop" , "Decode if possible else drop"       , 0, FF_OPT_TYPE_CONST, {.i64 = 3}, 0, 3, FLAGS, "non_pcm_mode"},
+    {"non_pcm_mode", "Chooses what to do with NON-PCM", offsetof(S302Context, non_pcm_mode), AV_OPT_TYPE_INT, {.i64 = 3}, 0, 3, FLAGS, "non_pcm_mode"},
+    {"copy"        , "Pass NON-PCM through unchanged"     , 0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 3, FLAGS, "non_pcm_mode"},
+    {"drop"        , "Drop NON-PCM"                       , 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 3, FLAGS, "non_pcm_mode"},
+    {"decode_copy" , "Decode if possible else passthrough", 0, AV_OPT_TYPE_CONST, {.i64 = 2}, 0, 3, FLAGS, "non_pcm_mode"},
+    {"decode_drop" , "Decode if possible else drop"       , 0, AV_OPT_TYPE_CONST, {.i64 = 3}, 0, 3, FLAGS, "non_pcm_mode"},
     {NULL}
 };
 
@@ -225,6 +225,6 @@ AVCodec ff_s302m_decoder = {
     .id             = AV_CODEC_ID_S302M,
     .priv_data_size = sizeof(S302Context),
     .decode         = s302m_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .priv_class     = &s302m_class,
 };
diff --git a/libavcodec/s302menc.c b/libavcodec/s302menc.c
index e738f09d..b04a54e4 100644
--- a/libavcodec/s302menc.c
+++ b/libavcodec/s302menc.c
@@ -78,7 +78,12 @@ static int s302m_encode2_frame(AVCodecContext *avctx, AVPacket *avpkt,
     uint8_t *o;
     PutBitContext pb;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, buf_size)) < 0)
+    if (buf_size - AES3_HEADER_LEN > UINT16_MAX) {
+        av_log(avctx, AV_LOG_ERROR, "number of samples in frame too big\n");
+        return AVERROR(EINVAL);
+    }
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, buf_size, 0)) < 0)
         return ret;
 
     o = avpkt->data;
@@ -173,6 +178,11 @@ AVCodec ff_s302m_encoder = {
     .sample_fmts           = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32,
                                                             AV_SAMPLE_FMT_S16,
                                                             AV_SAMPLE_FMT_NONE },
-    .capabilities          = CODEC_CAP_VARIABLE_FRAME_SIZE | CODEC_CAP_EXPERIMENTAL,
+    .capabilities          = AV_CODEC_CAP_VARIABLE_FRAME_SIZE | AV_CODEC_CAP_EXPERIMENTAL,
     .supported_samplerates = (const int[]) { 48000, 0 },
+ /* .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_STEREO,
+                                                  AV_CH_LAYOUT_QUAD,
+                                                  AV_CH_LAYOUT_5POINT1_BACK,
+                                                  AV_CH_LAYOUT_5POINT1_BACK | AV_CH_LAYOUT_STEREO_DOWNMIX,
+                                                  0 }, */
 };
diff --git a/libavcodec/s3tc.c b/libavcodec/s3tc.c
deleted file mode 100644
index a422874d..00000000
--- a/libavcodec/s3tc.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * S3 Texture Compression (S3TC) decoding functions
- * Copyright (c) 2007 by Ivo van Poorten
- *
- * see also: http://wiki.multimedia.cx/index.php?title=S3TC
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "avcodec.h"
-#include "bytestream.h"
-#include "s3tc.h"
-
-static inline void dxt1_decode_pixels(GetByteContext *gb, uint32_t *d,
-                                      unsigned int w, unsigned int h,
-                                      unsigned int qstride, unsigned int flag,
-                                      uint64_t alpha) {
-    unsigned int x, y, c0, c1, a = (!flag * 255u) << 24;
-    unsigned int rb0, rb1, rb2, rb3, g0, g1, g2, g3;
-    uint32_t colors[4], pixels;
-
-    c0 = bytestream2_get_le16(gb);
-    c1 = bytestream2_get_le16(gb);
-
-    rb0  = (c0<<3 | c0<<8) & 0xf800f8;
-    rb1  = (c1<<3 | c1<<8) & 0xf800f8;
-    rb0 +=        (rb0>>5) & 0x070007;
-    rb1 +=        (rb1>>5) & 0x070007;
-    g0   =        (c0 <<5) & 0x00fc00;
-    g1   =        (c1 <<5) & 0x00fc00;
-    g0  +=        (g0 >>6) & 0x000300;
-    g1  +=        (g1 >>6) & 0x000300;
-
-    colors[0] = rb0 + g0 + a;
-    colors[1] = rb1 + g1 + a;
-
-    if (c0 > c1 || flag) {
-        rb2 = (((2*rb0+rb1) * 21) >> 6) & 0xff00ff;
-        rb3 = (((2*rb1+rb0) * 21) >> 6) & 0xff00ff;
-        g2  = (((2*g0 +g1 ) * 21) >> 6) & 0x00ff00;
-        g3  = (((2*g1 +g0 ) * 21) >> 6) & 0x00ff00;
-        colors[3] = rb3 + g3 + a;
-    } else {
-        rb2 = ((rb0+rb1) >> 1) & 0xff00ff;
-        g2  = ((g0 +g1 ) >> 1) & 0x00ff00;
-        colors[3] = 0;
-    }
-
-    colors[2] = rb2 + g2 + a;
-
-    pixels = bytestream2_get_le32(gb);
-    for (y=0; y<h; y++) {
-        for (x=0; x<w; x++) {
-            a        = (alpha & 0x0f) << 28;
-            a       += a >> 4;
-            d[x]     = a + colors[pixels&3];
-            pixels >>= 2;
-            alpha  >>= 4;
-        }
-        for (; x<4; x++) {
-            pixels >>= 2;
-            alpha  >>= 4;
-        }
-        d += qstride;
-    }
-}
-
-void ff_decode_dxt1(GetByteContext *gb, uint8_t *dst,
-                    const unsigned int w, const unsigned int h,
-                    const unsigned int stride) {
-    unsigned int x, y, qstride = stride/4;
-    uint32_t *d = (uint32_t *) dst;
-
-    for (y=0; y < h; y += 4, d += stride-w)
-        for (x = 0; x < w; d += FFMIN(4, w-x), x += 4)
-            dxt1_decode_pixels(gb, d, FFMIN(4, w-x), FFMIN(4, h-y), qstride, 0, 0LL);
-}
-
-void ff_decode_dxt3(GetByteContext *gb, uint8_t *dst,
-                    const unsigned int w, const unsigned int h,
-                    const unsigned int stride) {
-    unsigned int x, y, qstride = stride/4;
-    uint32_t *d = (uint32_t *) dst;
-
-    for (y=0; y < h; y += 4, d += stride-w)
-        for (x = 0; x < w; d += FFMIN(4, w-x), x += 4)
-            dxt1_decode_pixels(gb, d, FFMIN(4, w-x), FFMIN(4, h-y), qstride, 1, bytestream2_get_le64(gb));
-}
diff --git a/libavcodec/s3tc.h b/libavcodec/s3tc.h
deleted file mode 100644
index 2d77b3ab..00000000
--- a/libavcodec/s3tc.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * S3 Texture Compression (S3TC) decoding functions
- * Copyright (c) 2007 by Ivo van Poorten
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_S3TC_H
-#define AVCODEC_S3TC_H
-
-#include <stdint.h>
-
-#include "bytestream.h"
-
-#define FF_S3TC_DXT1    0x31545844
-#define FF_S3TC_DXT3    0x33545844
-
-/**
- * Decode DXT1 encoded data to RGB32
- * @param gb GetByteContext
- * @param dst destination buffer
- * @param w width of output image
- * @param h height of output image
- * @param stride line size of output image
- */
-void ff_decode_dxt1(GetByteContext *gb, uint8_t *dst,
-                    const unsigned int w, const unsigned int h,
-                    const unsigned int stride);
-/**
- * Decode DXT3 encoded data to RGB32
- * @param gb GetByteContext
- * @param dst destination buffer
- * @param w width of output image
- * @param h height of output image
- * @param stride line size of output image
- */
-void ff_decode_dxt3(GetByteContext *gb, uint8_t *dst,
-                    const unsigned int w, const unsigned int h,
-                    const unsigned int stride);
-
-#endif /* AVCODEC_S3TC_H */
diff --git a/libavcodec/samidec.c b/libavcodec/samidec.c
index 47850e21..95f35abd 100644
--- a/libavcodec/samidec.c
+++ b/libavcodec/samidec.c
@@ -27,10 +27,13 @@
 #include "ass.h"
 #include "libavutil/avstring.h"
 #include "libavutil/bprint.h"
+#include "htmlsubtitles.h"
 
 typedef struct {
     AVBPrint source;
     AVBPrint content;
+    AVBPrint encoded_source;
+    AVBPrint encoded_content;
     AVBPrint full;
 } SAMIContext;
 
@@ -41,8 +44,12 @@ static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
     char *tag = NULL;
     char *dupsrc = av_strdup(src);
     char *p = dupsrc;
+    AVBPrint *dst_content = &sami->encoded_content;
+    AVBPrint *dst_source = &sami->encoded_source;
 
+    av_bprint_clear(&sami->encoded_content);
     av_bprint_clear(&sami->content);
+    av_bprint_clear(&sami->encoded_source);
     for (;;) {
         char *saveptr = NULL;
         int prev_chr_is_space = 0;
@@ -82,8 +89,9 @@ static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
             if (*p == '<') {
                 if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
                     break;
-                if (!av_strncasecmp(p, "<BR", 3))
-                    av_bprintf(dst, "\\N");
+            }
+            if (!av_strncasecmp(p, "<BR", 3)) {
+                av_bprintf(dst, "\\N");
                 p++;
                 while (*p && *p != '>')
                     p++;
@@ -103,9 +111,12 @@ static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
     }
 
     av_bprint_clear(&sami->full);
-    if (sami->source.len)
-        av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->source.str);
-    av_bprintf(&sami->full, "%s", sami->content.str);
+    if (sami->source.len) {
+        ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
+        av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
+    }
+    ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
+    av_bprintf(&sami->full, "%s", sami->encoded_content.str);
 
 end:
     av_free(dupsrc);
@@ -136,6 +147,8 @@ static av_cold int sami_init(AVCodecContext *avctx)
     SAMIContext *sami = avctx->priv_data;
     av_bprint_init(&sami->source,  0, 2048);
     av_bprint_init(&sami->content, 0, 2048);
+    av_bprint_init(&sami->encoded_source,  0, 2048);
+    av_bprint_init(&sami->encoded_content, 0, 2048);
     av_bprint_init(&sami->full,    0, 2048);
     return ff_ass_subtitle_header_default(avctx);
 }
@@ -145,6 +158,8 @@ static av_cold int sami_close(AVCodecContext *avctx)
     SAMIContext *sami = avctx->priv_data;
     av_bprint_finalize(&sami->source,  NULL);
     av_bprint_finalize(&sami->content, NULL);
+    av_bprint_finalize(&sami->encoded_source,  NULL);
+    av_bprint_finalize(&sami->encoded_content, NULL);
     av_bprint_finalize(&sami->full,    NULL);
     return 0;
 }
diff --git a/libavcodec/sanm.c b/libavcodec/sanm.c
index 2547abb8..1aa002b6 100644
--- a/libavcodec/sanm.c
+++ b/libavcodec/sanm.c
@@ -1525,5 +1525,5 @@ AVCodec ff_sanm_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/sbr.h b/libavcodec/sbr.h
index ff00acba..eb7d1aec 100644
--- a/libavcodec/sbr.h
+++ b/libavcodec/sbr.h
@@ -66,9 +66,9 @@ typedef struct SBRData {
      */
     unsigned           bs_frame_class;
     unsigned           bs_add_harmonic_flag;
-    unsigned           bs_num_env;
+    AAC_SIGNE          bs_num_env;
     uint8_t            bs_freq_res[7];
-    unsigned           bs_num_noise;
+    AAC_SIGNE          bs_num_noise;
     uint8_t            bs_df_env[5];
     uint8_t            bs_df_noise[2];
     uint8_t            bs_invf_mode[2][5];
@@ -80,25 +80,27 @@ typedef struct SBRData {
      * @name State variables
      * @{
      */
-    DECLARE_ALIGNED(32, float, synthesis_filterbank_samples)[SBR_SYNTHESIS_BUF_SIZE];
-    DECLARE_ALIGNED(32, float, analysis_filterbank_samples) [1312];
+    DECLARE_ALIGNED(32, INTFLOAT, synthesis_filterbank_samples)[SBR_SYNTHESIS_BUF_SIZE];
+    DECLARE_ALIGNED(32, INTFLOAT, analysis_filterbank_samples) [1312];
     int                synthesis_filterbank_samples_offset;
     ///l_APrev and l_A
     int                e_a[2];
     ///Chirp factors
-    float              bw_array[5];
+    INTFLOAT              bw_array[5];
     ///QMF values of the original signal
-    float              W[2][32][32][2];
+    INTFLOAT              W[2][32][32][2];
     ///QMF output of the HF adjustor
     int                Ypos;
-    DECLARE_ALIGNED(16, float, Y)[2][38][64][2];
-    DECLARE_ALIGNED(16, float, g_temp)[42][48];
-    float              q_temp[42][48];
+    DECLARE_ALIGNED(16, INTFLOAT, Y)[2][38][64][2];
+    DECLARE_ALIGNED(16, AAC_FLOAT, g_temp)[42][48];
+    AAC_FLOAT          q_temp[42][48];
     uint8_t            s_indexmapped[8][48];
     ///Envelope scalefactors
-    float              env_facs[6][48];
+    uint8_t            env_facs_q[6][48];
+    AAC_FLOAT          env_facs[6][48];
     ///Noise scalefactors
-    float              noise_facs[3][5];
+    uint8_t            noise_facs_q[3][5];
+    AAC_FLOAT          noise_facs[3][5];
     ///Envelope time borders
     uint8_t            t_env[8];
     ///Envelope time border of the last envelope of the previous frame
@@ -117,18 +119,18 @@ typedef struct SpectralBandReplication SpectralBandReplication;
  */
 typedef struct AACSBRContext {
     int (*sbr_lf_gen)(AACContext *ac, SpectralBandReplication *sbr,
-                      float X_low[32][40][2], const float W[2][32][32][2],
+                      INTFLOAT X_low[32][40][2], const INTFLOAT W[2][32][32][2],
                       int buf_idx);
-    void (*sbr_hf_assemble)(float Y1[38][64][2],
-                            const float X_high[64][40][2],
+    void (*sbr_hf_assemble)(INTFLOAT Y1[38][64][2],
+                            const INTFLOAT X_high[64][40][2],
                             SpectralBandReplication *sbr, SBRData *ch_data,
                             const int e_a[2]);
-    int (*sbr_x_gen)(SpectralBandReplication *sbr, float X[2][38][64],
-                     const float Y0[38][64][2], const float Y1[38][64][2],
-                     const float X_low[32][40][2], int ch);
+    int (*sbr_x_gen)(SpectralBandReplication *sbr, INTFLOAT X[2][38][64],
+                     const INTFLOAT Y0[38][64][2], const INTFLOAT Y1[38][64][2],
+                     const INTFLOAT X_low[32][40][2], int ch);
     void (*sbr_hf_inverse_filter)(SBRDSPContext *dsp,
-                                  float (*alpha0)[2], float (*alpha1)[2],
-                                  const float X_low[32][40][2], int k0);
+                                  INTFLOAT (*alpha0)[2], INTFLOAT (*alpha1)[2],
+                                  const INTFLOAT X_low[32][40][2], int k0);
 } AACSBRContext;
 
 /**
@@ -137,6 +139,7 @@ typedef struct AACSBRContext {
 struct SpectralBandReplication {
     int                sample_rate;
     int                start;
+    int                ready_for_dequant;
     int                id_aac;
     int                reset;
     SpectrumParameters spectrum_params;
@@ -151,23 +154,23 @@ struct SpectralBandReplication {
     unsigned           bs_smoothing_mode;
     /** @} */
     unsigned           bs_coupling;
-    unsigned           k[5]; ///< k0, k1, k2
+    AAC_SIGNE          k[5]; ///< k0, k1, k2
     ///kx', and kx respectively, kx is the first QMF subband where SBR is used.
     ///kx' is its value from the previous frame
-    unsigned           kx[2];
+    AAC_SIGNE          kx[2];
     ///M' and M respectively, M is the number of QMF subbands that use SBR.
-    unsigned           m[2];
+    AAC_SIGNE          m[2];
     unsigned           kx_and_m_pushed;
     ///The number of frequency bands in f_master
-    unsigned           n_master;
+    AAC_SIGNE          n_master;
     SBRData            data[2];
     PSContext          ps;
     ///N_Low and N_High respectively, the number of frequency bands for low and high resolution
-    unsigned           n[2];
+    AAC_SIGNE          n[2];
     ///Number of noise floor bands
-    unsigned           n_q;
+    AAC_SIGNE          n_q;
     ///Number of limiter bands
-    unsigned           n_lim;
+    AAC_SIGNE          n_lim;
     ///The master QMF frequency grouping
     uint16_t           f_master[49];
     ///Frequency borders for low resolution SBR
@@ -178,33 +181,33 @@ struct SpectralBandReplication {
     uint16_t           f_tablenoise[6];
     ///Frequency borders for the limiter
     uint16_t           f_tablelim[30];
-    unsigned           num_patches;
+    AAC_SIGNE          num_patches;
     uint8_t            patch_num_subbands[6];
     uint8_t            patch_start_subband[6];
     ///QMF low frequency input to the HF generator
-    DECLARE_ALIGNED(16, float, X_low)[32][40][2];
+    DECLARE_ALIGNED(16, INTFLOAT, X_low)[32][40][2];
     ///QMF output of the HF generator
-    DECLARE_ALIGNED(16, float, X_high)[64][40][2];
+    DECLARE_ALIGNED(16, INTFLOAT, X_high)[64][40][2];
     ///QMF values of the reconstructed signal
-    DECLARE_ALIGNED(16, float, X)[2][2][38][64];
+    DECLARE_ALIGNED(16, INTFLOAT, X)[2][2][38][64];
     ///Zeroth coefficient used to filter the subband signals
-    DECLARE_ALIGNED(16, float, alpha0)[64][2];
+    DECLARE_ALIGNED(16, INTFLOAT, alpha0)[64][2];
     ///First coefficient used to filter the subband signals
-    DECLARE_ALIGNED(16, float, alpha1)[64][2];
+    DECLARE_ALIGNED(16, INTFLOAT, alpha1)[64][2];
     ///Dequantized envelope scalefactors, remapped
-    float              e_origmapped[7][48];
+    AAC_FLOAT          e_origmapped[7][48];
     ///Dequantized noise scalefactors, remapped
-    float              q_mapped[7][48];
+    AAC_FLOAT          q_mapped[7][48];
     ///Sinusoidal presence, remapped
     uint8_t            s_mapped[7][48];
     ///Estimated envelope
-    float              e_curr[7][48];
+    AAC_FLOAT          e_curr[7][48];
     ///Amplitude adjusted noise scalefactors
-    float              q_m[7][48];
+    AAC_FLOAT          q_m[7][48];
     ///Sinusoidal levels
-    float              s_m[7][48];
-    float              gain[7][48];
-    DECLARE_ALIGNED(32, float, qmf_filter_scratch)[5][64];
+    AAC_FLOAT          s_m[7][48];
+    AAC_FLOAT          gain[7][48];
+    DECLARE_ALIGNED(32, INTFLOAT, qmf_filter_scratch)[5][64];
     FFTContext         mdct_ana;
     FFTContext         mdct;
     SBRDSPContext      dsp;
diff --git a/libavcodec/sbrdsp.c b/libavcodec/sbrdsp.c
index e4f053b1..cc432b65 100644
--- a/libavcodec/sbrdsp.c
+++ b/libavcodec/sbrdsp.c
@@ -20,20 +20,14 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define USE_FIXED 0
+
+#include "aac.h"
 #include "config.h"
 #include "libavutil/attributes.h"
 #include "libavutil/intfloat.h"
 #include "sbrdsp.h"
 
-static void sbr_sum64x5_c(float *z)
-{
-    int k;
-    for (k = 0; k < 64; k++) {
-        float f = z[k] + z[k + 64] + z[k + 128] + z[k + 192] + z[k + 256];
-        z[k] = f;
-    }
-}
-
 static float sbr_sum_square_c(float (*x)[2], int n)
 {
     float sum0 = 0.0f, sum1 = 0.0f;
@@ -101,16 +95,6 @@ static void sbr_qmf_deint_neg_c(float *v, const float *src)
     }
 }
 
-static void sbr_qmf_deint_bfly_c(float *v, const float *src0, const float *src1)
-{
-    int i;
-    for (i = 0; i < 64; i++) {
-        v[      i] = src0[i] - src1[63 - i];
-        v[127 - i] = src0[i] + src1[63 - i];
-    }
-}
-
-
 #if 0
     /* This code is slower because it multiplies memory accesses.
      * It is left for educational purposes and because it may offer
@@ -238,58 +222,4 @@ static av_always_inline void sbr_hf_apply_noise(float (*Y)[2],
     }
 }
 
-static void sbr_hf_apply_noise_0(float (*Y)[2], const float *s_m,
-                                 const float *q_filt, int noise,
-                                 int kx, int m_max)
-{
-    sbr_hf_apply_noise(Y, s_m, q_filt, noise, 1.0, 0.0, m_max);
-}
-
-static void sbr_hf_apply_noise_1(float (*Y)[2], const float *s_m,
-                                 const float *q_filt, int noise,
-                                 int kx, int m_max)
-{
-    float phi_sign = 1 - 2 * (kx & 1);
-    sbr_hf_apply_noise(Y, s_m, q_filt, noise, 0.0, phi_sign, m_max);
-}
-
-static void sbr_hf_apply_noise_2(float (*Y)[2], const float *s_m,
-                                 const float *q_filt, int noise,
-                                 int kx, int m_max)
-{
-    sbr_hf_apply_noise(Y, s_m, q_filt, noise, -1.0, 0.0, m_max);
-}
-
-static void sbr_hf_apply_noise_3(float (*Y)[2], const float *s_m,
-                                 const float *q_filt, int noise,
-                                 int kx, int m_max)
-{
-    float phi_sign = 1 - 2 * (kx & 1);
-    sbr_hf_apply_noise(Y, s_m, q_filt, noise, 0.0, -phi_sign, m_max);
-}
-
-av_cold void ff_sbrdsp_init(SBRDSPContext *s)
-{
-    s->sum64x5 = sbr_sum64x5_c;
-    s->sum_square = sbr_sum_square_c;
-    s->neg_odd_64 = sbr_neg_odd_64_c;
-    s->qmf_pre_shuffle = sbr_qmf_pre_shuffle_c;
-    s->qmf_post_shuffle = sbr_qmf_post_shuffle_c;
-    s->qmf_deint_neg = sbr_qmf_deint_neg_c;
-    s->qmf_deint_bfly = sbr_qmf_deint_bfly_c;
-    s->autocorrelate = sbr_autocorrelate_c;
-    s->hf_gen = sbr_hf_gen_c;
-    s->hf_g_filt = sbr_hf_g_filt_c;
-
-    s->hf_apply_noise[0] = sbr_hf_apply_noise_0;
-    s->hf_apply_noise[1] = sbr_hf_apply_noise_1;
-    s->hf_apply_noise[2] = sbr_hf_apply_noise_2;
-    s->hf_apply_noise[3] = sbr_hf_apply_noise_3;
-
-    if (ARCH_ARM)
-        ff_sbrdsp_init_arm(s);
-    if (ARCH_X86)
-        ff_sbrdsp_init_x86(s);
-    if (ARCH_MIPS)
-        ff_sbrdsp_init_mips(s);
-}
+#include "sbrdsp_template.c"
diff --git a/libavcodec/sbrdsp.h b/libavcodec/sbrdsp.h
index 1c1bcdfa..66852de6 100644
--- a/libavcodec/sbrdsp.h
+++ b/libavcodec/sbrdsp.h
@@ -22,29 +22,31 @@
 #define AVCODEC_SBRDSP_H
 
 #include <stdint.h>
+#include "aac_defines.h"
+#include "libavutil/softfloat.h"
 
 typedef struct SBRDSPContext {
-    void (*sum64x5)(float *z);
-    float (*sum_square)(float (*x)[2], int n);
-    void (*neg_odd_64)(float *x);
-    void (*qmf_pre_shuffle)(float *z);
-    void (*qmf_post_shuffle)(float W[32][2], const float *z);
-    void (*qmf_deint_neg)(float *v, const float *src);
-    void (*qmf_deint_bfly)(float *v, const float *src0, const float *src1);
-    void (*autocorrelate)(const float x[40][2], float phi[3][2][2]);
-    void (*hf_gen)(float (*X_high)[2], const float (*X_low)[2],
-                   const float alpha0[2], const float alpha1[2],
-                   float bw, int start, int end);
-    void (*hf_g_filt)(float (*Y)[2], const float (*X_high)[40][2],
-                      const float *g_filt, int m_max, intptr_t ixh);
-    void (*hf_apply_noise[4])(float (*Y)[2], const float *s_m,
-                              const float *q_filt, int noise,
+    void (*sum64x5)(INTFLOAT *z);
+    AAC_FLOAT (*sum_square)(INTFLOAT (*x)[2], int n);
+    void (*neg_odd_64)(INTFLOAT *x);
+    void (*qmf_pre_shuffle)(INTFLOAT *z);
+    void (*qmf_post_shuffle)(INTFLOAT W[32][2], const INTFLOAT *z);
+    void (*qmf_deint_neg)(INTFLOAT *v, const INTFLOAT *src);
+    void (*qmf_deint_bfly)(INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT *src1);
+    void (*autocorrelate)(const INTFLOAT x[40][2], AAC_FLOAT phi[3][2][2]);
+    void (*hf_gen)(INTFLOAT (*X_high)[2], const INTFLOAT (*X_low)[2],
+                   const INTFLOAT alpha0[2], const INTFLOAT alpha1[2],
+                   INTFLOAT bw, int start, int end);
+    void (*hf_g_filt)(INTFLOAT (*Y)[2], const INTFLOAT (*X_high)[40][2],
+                      const AAC_FLOAT *g_filt, int m_max, intptr_t ixh);
+    void (*hf_apply_noise[4])(INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
+                              const AAC_FLOAT *q_filt, int noise,
                               int kx, int m_max);
 } SBRDSPContext;
 
-extern const float ff_sbr_noise_table[][2];
+extern const INTFLOAT AAC_RENAME(ff_sbr_noise_table)[][2];
 
-void ff_sbrdsp_init(SBRDSPContext *s);
+void AAC_RENAME(ff_sbrdsp_init)(SBRDSPContext *s);
 void ff_sbrdsp_init_arm(SBRDSPContext *s);
 void ff_sbrdsp_init_x86(SBRDSPContext *s);
 void ff_sbrdsp_init_mips(SBRDSPContext *s);
diff --git a/libavcodec/sbrdsp_fixed.c b/libavcodec/sbrdsp_fixed.c
new file mode 100644
index 00000000..f4e3de0c
--- /dev/null
+++ b/libavcodec/sbrdsp_fixed.c
@@ -0,0 +1,291 @@
+/*
+ * AAC Spectral Band Replication decoding functions
+ * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
+ * Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Note: Rounding-to-nearest used unless otherwise stated
+ *
+ */
+
+#define USE_FIXED 1
+
+#include "aac.h"
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/intfloat.h"
+#include "sbrdsp.h"
+
+static SoftFloat sbr_sum_square_c(int (*x)[2], int n)
+{
+    SoftFloat ret;
+    int64_t accu = 0;
+    int i, nz, round;
+
+    for (i = 0; i < n; i += 2) {
+        // Larger values are inavlid and could cause overflows of accu.
+        av_assert2(FFABS(x[i + 0][0]) >> 29 == 0);
+        accu += (int64_t)x[i + 0][0] * x[i + 0][0];
+        av_assert2(FFABS(x[i + 0][1]) >> 29 == 0);
+        accu += (int64_t)x[i + 0][1] * x[i + 0][1];
+        av_assert2(FFABS(x[i + 1][0]) >> 29 == 0);
+        accu += (int64_t)x[i + 1][0] * x[i + 1][0];
+        av_assert2(FFABS(x[i + 1][1]) >> 29 == 0);
+        accu += (int64_t)x[i + 1][1] * x[i + 1][1];
+    }
+
+    i = (int)(accu >> 32);
+    if (i == 0) {
+        nz = 1;
+    } else {
+        nz = 0;
+        while (FFABS(i) < 0x40000000) {
+            i <<= 1;
+            nz++;
+        }
+        nz = 32 - nz;
+    }
+
+    round = 1 << (nz-1);
+    i = (int)((accu + round) >> nz);
+    i >>= 1;
+    ret = av_int2sf(i, 15 - nz);
+
+    return ret;
+}
+
+static void sbr_neg_odd_64_c(int *x)
+{
+    int i;
+    for (i = 1; i < 64; i += 2)
+        x[i] = -x[i];
+}
+
+static void sbr_qmf_pre_shuffle_c(int *z)
+{
+    int k;
+    z[64] = z[0];
+    z[65] = z[1];
+    for (k = 1; k < 32; k++) {
+        z[64+2*k  ] = -z[64 - k];
+        z[64+2*k+1] =  z[ k + 1];
+    }
+}
+
+static void sbr_qmf_post_shuffle_c(int W[32][2], const int *z)
+{
+    int k;
+    for (k = 0; k < 32; k++) {
+        W[k][0] = -z[63-k];
+        W[k][1] = z[k];
+    }
+}
+
+static void sbr_qmf_deint_neg_c(int *v, const int *src)
+{
+    int i;
+    for (i = 0; i < 32; i++) {
+        v[     i] = ( src[63 - 2*i    ] + 0x10) >> 5;
+        v[63 - i] = (-src[63 - 2*i - 1] + 0x10) >> 5;
+    }
+}
+
+static av_always_inline SoftFloat autocorr_calc(int64_t accu)
+{
+        int nz, mant, expo, round;
+        int i = (int)(accu >> 32);
+        if (i == 0) {
+            nz = 1;
+        } else {
+            nz = 0;
+            while (FFABS(i) < 0x40000000) {
+                i <<= 1;
+                nz++;
+            }
+            nz = 32-nz;
+        }
+
+        round = 1 << (nz-1);
+        mant = (int)((accu + round) >> nz);
+        mant = (mant + 0x40)>>7;
+        mant <<= 6;
+        expo = nz + 15;
+        return av_int2sf(mant, 30 - expo);
+}
+
+static av_always_inline void autocorrelate(const int x[40][2], SoftFloat phi[3][2][2], int lag)
+{
+    int i;
+    int64_t real_sum, imag_sum;
+    int64_t accu_re = 0, accu_im = 0;
+
+    if (lag) {
+        for (i = 1; i < 38; i++) {
+            accu_re += (int64_t)x[i][0] * x[i+lag][0];
+            accu_re += (int64_t)x[i][1] * x[i+lag][1];
+            accu_im += (int64_t)x[i][0] * x[i+lag][1];
+            accu_im -= (int64_t)x[i][1] * x[i+lag][0];
+        }
+
+        real_sum = accu_re;
+        imag_sum = accu_im;
+
+        accu_re += (int64_t)x[ 0][0] * x[lag][0];
+        accu_re += (int64_t)x[ 0][1] * x[lag][1];
+        accu_im += (int64_t)x[ 0][0] * x[lag][1];
+        accu_im -= (int64_t)x[ 0][1] * x[lag][0];
+
+        phi[2-lag][1][0] = autocorr_calc(accu_re);
+        phi[2-lag][1][1] = autocorr_calc(accu_im);
+
+        if (lag == 1) {
+            accu_re = real_sum;
+            accu_im = imag_sum;
+            accu_re += (int64_t)x[38][0] * x[39][0];
+            accu_re += (int64_t)x[38][1] * x[39][1];
+            accu_im += (int64_t)x[38][0] * x[39][1];
+            accu_im -= (int64_t)x[38][1] * x[39][0];
+
+            phi[0][0][0] = autocorr_calc(accu_re);
+            phi[0][0][1] = autocorr_calc(accu_im);
+        }
+    } else {
+        for (i = 1; i < 38; i++) {
+            accu_re += (int64_t)x[i][0] * x[i][0];
+            accu_re += (int64_t)x[i][1] * x[i][1];
+        }
+        real_sum = accu_re;
+        accu_re += (int64_t)x[ 0][0] * x[ 0][0];
+        accu_re += (int64_t)x[ 0][1] * x[ 0][1];
+
+        phi[2][1][0] = autocorr_calc(accu_re);
+
+        accu_re = real_sum;
+        accu_re += (int64_t)x[38][0] * x[38][0];
+        accu_re += (int64_t)x[38][1] * x[38][1];
+
+        phi[1][0][0] = autocorr_calc(accu_re);
+    }
+}
+
+static void sbr_autocorrelate_c(const int x[40][2], SoftFloat phi[3][2][2])
+{
+    autocorrelate(x, phi, 0);
+    autocorrelate(x, phi, 1);
+    autocorrelate(x, phi, 2);
+}
+
+static void sbr_hf_gen_c(int (*X_high)[2], const int (*X_low)[2],
+                       const int alpha0[2], const int alpha1[2],
+                       int bw, int start, int end)
+{
+    int alpha[4];
+    int i;
+    int64_t accu;
+
+    accu = (int64_t)alpha0[0] * bw;
+    alpha[2] = (int)((accu + 0x40000000) >> 31);
+    accu = (int64_t)alpha0[1] * bw;
+    alpha[3] = (int)((accu + 0x40000000) >> 31);
+    accu = (int64_t)bw * bw;
+    bw = (int)((accu + 0x40000000) >> 31);
+    accu = (int64_t)alpha1[0] * bw;
+    alpha[0] = (int)((accu + 0x40000000) >> 31);
+    accu = (int64_t)alpha1[1] * bw;
+    alpha[1] = (int)((accu + 0x40000000) >> 31);
+
+    for (i = start; i < end; i++) {
+        accu  = (int64_t)X_low[i][0] * 0x20000000;
+        accu += (int64_t)X_low[i - 2][0] * alpha[0];
+        accu -= (int64_t)X_low[i - 2][1] * alpha[1];
+        accu += (int64_t)X_low[i - 1][0] * alpha[2];
+        accu -= (int64_t)X_low[i - 1][1] * alpha[3];
+        X_high[i][0] = (int)((accu + 0x10000000) >> 29);
+
+        accu  = (int64_t)X_low[i][1] * 0x20000000;
+        accu += (int64_t)X_low[i - 2][1] * alpha[0];
+        accu += (int64_t)X_low[i - 2][0] * alpha[1];
+        accu += (int64_t)X_low[i - 1][1] * alpha[2];
+        accu += (int64_t)X_low[i - 1][0] * alpha[3];
+        X_high[i][1] = (int)((accu + 0x10000000) >> 29);
+    }
+}
+
+static void sbr_hf_g_filt_c(int (*Y)[2], const int (*X_high)[40][2],
+                          const SoftFloat *g_filt, int m_max, intptr_t ixh)
+{
+    int m, r;
+    int64_t accu;
+
+    for (m = 0; m < m_max; m++) {
+        r = 1 << (22-g_filt[m].exp);
+        accu = (int64_t)X_high[m][ixh][0] * ((g_filt[m].mant + 0x40)>>7);
+        Y[m][0] = (int)((accu + r) >> (23-g_filt[m].exp));
+
+        accu = (int64_t)X_high[m][ixh][1] * ((g_filt[m].mant + 0x40)>>7);
+        Y[m][1] = (int)((accu + r) >> (23-g_filt[m].exp));
+    }
+}
+
+static av_always_inline void sbr_hf_apply_noise(int (*Y)[2],
+                                                const SoftFloat *s_m,
+                                                const SoftFloat *q_filt,
+                                                int noise,
+                                                int phi_sign0,
+                                                int phi_sign1,
+                                                int m_max)
+{
+    int m;
+
+    for (m = 0; m < m_max; m++) {
+        int y0 = Y[m][0];
+        int y1 = Y[m][1];
+        noise = (noise + 1) & 0x1ff;
+        if (s_m[m].mant) {
+            int shift, round;
+
+            shift = 22 - s_m[m].exp;
+            if (shift < 30) {
+                round = 1 << (shift-1);
+                y0 += (s_m[m].mant * phi_sign0 + round) >> shift;
+                y1 += (s_m[m].mant * phi_sign1 + round) >> shift;
+            }
+        } else {
+            int shift, round, tmp;
+            int64_t accu;
+
+            shift = 22 - q_filt[m].exp;
+            if (shift < 30) {
+                round = 1 << (shift-1);
+
+                accu = (int64_t)q_filt[m].mant * ff_sbr_noise_table_fixed[noise][0];
+                tmp = (int)((accu + 0x40000000) >> 31);
+                y0 += (tmp + round) >> shift;
+
+                accu = (int64_t)q_filt[m].mant * ff_sbr_noise_table_fixed[noise][1];
+                tmp = (int)((accu + 0x40000000) >> 31);
+                y1 += (tmp + round) >> shift;
+            }
+        }
+        Y[m][0] = y0;
+        Y[m][1] = y1;
+        phi_sign1 = -phi_sign1;
+    }
+}
+
+#include "sbrdsp_template.c"
diff --git a/libavcodec/sbrdsp_template.c b/libavcodec/sbrdsp_template.c
new file mode 100644
index 00000000..b649dfd7
--- /dev/null
+++ b/libavcodec/sbrdsp_template.c
@@ -0,0 +1,97 @@
+/*
+ * AAC Spectral Band Replication decoding functions
+ * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
+ * Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static void sbr_sum64x5_c(INTFLOAT *z)
+{
+    int k;
+    for (k = 0; k < 64; k++) {
+        INTFLOAT f = z[k] + z[k + 64] + z[k + 128] + z[k + 192] + z[k + 256];
+        z[k] = f;
+    }
+}
+
+static void sbr_qmf_deint_bfly_c(INTFLOAT *v, const INTFLOAT *src0, const INTFLOAT *src1)
+{
+    int i;
+    for (i = 0; i < 64; i++) {
+        v[      i] = AAC_SRA_R((src0[i] - src1[63 - i]), 5);
+        v[127 - i] = AAC_SRA_R((src0[i] + src1[63 - i]), 5);
+    }
+}
+
+static void sbr_hf_apply_noise_0(INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
+                                 const AAC_FLOAT *q_filt, int noise,
+                                 int kx, int m_max)
+{
+    sbr_hf_apply_noise(Y, s_m, q_filt, noise, (INTFLOAT)1.0, (INTFLOAT)0.0, m_max);
+}
+
+static void sbr_hf_apply_noise_1(INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
+                                 const AAC_FLOAT *q_filt, int noise,
+                                 int kx, int m_max)
+{
+    INTFLOAT phi_sign = 1 - 2 * (kx & 1);
+    sbr_hf_apply_noise(Y, s_m, q_filt, noise, (INTFLOAT)0.0, phi_sign, m_max);
+}
+
+static void sbr_hf_apply_noise_2(INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
+                                 const AAC_FLOAT *q_filt, int noise,
+                                 int kx, int m_max)
+{
+    sbr_hf_apply_noise(Y, s_m, q_filt, noise, (INTFLOAT)-1.0, (INTFLOAT)0.0, m_max);
+}
+
+static void sbr_hf_apply_noise_3(INTFLOAT (*Y)[2], const AAC_FLOAT *s_m,
+                                 const AAC_FLOAT *q_filt, int noise,
+                                 int kx, int m_max)
+{
+    INTFLOAT phi_sign = 1 - 2 * (kx & 1);
+    sbr_hf_apply_noise(Y, s_m, q_filt, noise, (INTFLOAT)0.0, -phi_sign, m_max);
+}
+
+av_cold void AAC_RENAME(ff_sbrdsp_init)(SBRDSPContext *s)
+{
+    s->sum64x5 = sbr_sum64x5_c;
+    s->sum_square = sbr_sum_square_c;
+    s->neg_odd_64 = sbr_neg_odd_64_c;
+    s->qmf_pre_shuffle = sbr_qmf_pre_shuffle_c;
+    s->qmf_post_shuffle = sbr_qmf_post_shuffle_c;
+    s->qmf_deint_neg = sbr_qmf_deint_neg_c;
+    s->qmf_deint_bfly = sbr_qmf_deint_bfly_c;
+    s->autocorrelate = sbr_autocorrelate_c;
+    s->hf_gen = sbr_hf_gen_c;
+    s->hf_g_filt = sbr_hf_g_filt_c;
+
+    s->hf_apply_noise[0] = sbr_hf_apply_noise_0;
+    s->hf_apply_noise[1] = sbr_hf_apply_noise_1;
+    s->hf_apply_noise[2] = sbr_hf_apply_noise_2;
+    s->hf_apply_noise[3] = sbr_hf_apply_noise_3;
+
+#if !USE_FIXED
+    if (ARCH_ARM)
+        ff_sbrdsp_init_arm(s);
+    if (ARCH_X86)
+        ff_sbrdsp_init_x86(s);
+    if (ARCH_MIPS)
+        ff_sbrdsp_init_mips(s);
+#endif /* !USE_FIXED */
+}
diff --git a/libavcodec/screenpresso.c b/libavcodec/screenpresso.c
new file mode 100644
index 00000000..2c4bb19b
--- /dev/null
+++ b/libavcodec/screenpresso.c
@@ -0,0 +1,182 @@
+/*
+ * Screenpresso decoder
+ * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Screenpresso decoder
+ *
+ * Fourcc: SPV1
+ *
+ * Screenpresso simply horizontally flips and then deflates frames,
+ * alternating full pictures and deltas. Deltas are related to the currently
+ * rebuilt frame (not the reference), and since there is no coordinate system
+ * they contain exactly as many pixel as the keyframe.
+ *
+ * Supports: BGR24
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <zlib.h>
+
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/mem.h"
+
+#include "avcodec.h"
+#include "internal.h"
+
+typedef struct ScreenpressoContext {
+    AVFrame *current;
+
+    /* zlib interation */
+    uint8_t *inflated_buf;
+    uLongf inflated_size;
+} ScreenpressoContext;
+
+static av_cold int screenpresso_close(AVCodecContext *avctx)
+{
+    ScreenpressoContext *ctx = avctx->priv_data;
+
+    av_frame_free(&ctx->current);
+    av_freep(&ctx->inflated_buf);
+
+    return 0;
+}
+
+static av_cold int screenpresso_init(AVCodecContext *avctx)
+{
+    ScreenpressoContext *ctx = avctx->priv_data;
+
+    /* These needs to be set to estimate uncompressed buffer */
+    int ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid image size %dx%d.\n",
+               avctx->width, avctx->height);
+        return ret;
+    }
+
+    /* Allocate current frame */
+    ctx->current = av_frame_alloc();
+    if (!ctx->current)
+        return AVERROR(ENOMEM);
+
+    avctx->pix_fmt = AV_PIX_FMT_BGR24;
+
+    /* Allocate maximum size possible, a full frame */
+    ctx->inflated_size = avctx->width * avctx->height * 3;
+    ctx->inflated_buf  = av_malloc(ctx->inflated_size);
+    if (!ctx->inflated_buf)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void sum_delta_flipped(uint8_t       *dst, int dst_linesize,
+                              const uint8_t *src, int src_linesize,
+                              int bytewidth, int height)
+{
+    int i;
+    for (; height > 0; height--) {
+        for (i = 0; i < bytewidth; i++)
+            dst[i] += src[(height - 1) * src_linesize + i];
+        dst += dst_linesize;
+    }
+}
+
+static int screenpresso_decode_frame(AVCodecContext *avctx, void *data,
+                                     int *got_frame, AVPacket *avpkt)
+{
+    ScreenpressoContext *ctx = avctx->priv_data;
+    AVFrame *frame = data;
+    uLongf length = ctx->inflated_size;
+    int keyframe;
+    int ret;
+
+    /* Size check */
+    if (avpkt->size < 3) {
+        av_log(avctx, AV_LOG_ERROR, "Packet too small (%d)\n", avpkt->size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* Basic sanity check, but not really harmful */
+    if ((avpkt->data[0] != 0x73 && avpkt->data[0] != 0x72) ||
+        avpkt->data[1] != 8) { // bpp probably
+        av_log(avctx, AV_LOG_WARNING, "Unknown header 0x%02X%02X\n",
+               avpkt->data[0], avpkt->data[1]);
+    }
+    keyframe = (avpkt->data[0] == 0x73);
+
+    /* Inflate the frame after the 2 byte header */
+    ret = uncompress(ctx->inflated_buf, &length,
+                     avpkt->data + 2, avpkt->size - 2);
+    if (ret) {
+        av_log(avctx, AV_LOG_ERROR, "Deflate error %d.\n", ret);
+        return AVERROR_UNKNOWN;
+    }
+
+    ret = ff_reget_buffer(avctx, ctx->current);
+    if (ret < 0)
+        return ret;
+
+    /* When a keyframe is found, copy it (flipped) */
+    if (keyframe)
+        av_image_copy_plane(ctx->current->data[0] +
+                            ctx->current->linesize[0] * (avctx->height - 1),
+                            -1 * ctx->current->linesize[0],
+                            ctx->inflated_buf, avctx->width * 3,
+                            avctx->width * 3, avctx->height);
+    /* Otherwise sum the delta on top of the current frame */
+    else
+        sum_delta_flipped(ctx->current->data[0], ctx->current->linesize[0],
+                          ctx->inflated_buf, avctx->width * 3,
+                          avctx->width * 3, avctx->height);
+
+    /* Frame is ready to be output */
+    ret = av_frame_ref(frame, ctx->current);
+    if (ret < 0)
+        return ret;
+
+    /* Usual properties */
+    if (keyframe) {
+        frame->pict_type = AV_PICTURE_TYPE_I;
+        frame->key_frame = 1;
+    } else {
+        frame->pict_type = AV_PICTURE_TYPE_P;
+    }
+    *got_frame = 1;
+
+    return 0;
+}
+
+AVCodec ff_screenpresso_decoder = {
+    .name           = "screenpresso",
+    .long_name      = NULL_IF_CONFIG_SMALL("Screenpresso"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_SCREENPRESSO,
+    .init           = screenpresso_init,
+    .decode         = screenpresso_decode_frame,
+    .close          = screenpresso_close,
+    .priv_data_size = sizeof(ScreenpressoContext),
+    .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
+                      FF_CODEC_CAP_INIT_CLEANUP,
+};
diff --git a/libavcodec/sgidec.c b/libavcodec/sgidec.c
index 3ddbf77b..02ad1e11 100644
--- a/libavcodec/sgidec.c
+++ b/libavcodec/sgidec.c
@@ -296,5 +296,5 @@ AVCodec ff_sgi_decoder = {
     .priv_data_size = sizeof(SgiState),
     .decode         = decode_frame,
     .init           = sgi_decode_init,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/sgienc.c b/libavcodec/sgienc.c
index 2f45eb39..13756f16 100644
--- a/libavcodec/sgienc.c
+++ b/libavcodec/sgienc.c
@@ -19,6 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/opt.h"
+
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
@@ -28,6 +30,12 @@
 #define SGI_SINGLE_CHAN 2
 #define SGI_MULTI_CHAN 3
 
+typedef struct SgiContext {
+    AVClass *class;
+
+    int rle;
+} SgiContext;
+
 static av_cold int encode_init(AVCodecContext *avctx)
 {
     if (avctx->width > 65535 || avctx->height > 65535) {
@@ -37,25 +45,74 @@ static av_cold int encode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
     return 0;
 }
 
+static int sgi_rle_encode(PutByteContext *pbc, const uint8_t *src,
+                          int w, int bpp)
+{
+    int val, count, x, start = bytestream2_tell_p(pbc);
+    void (*bytestream2_put)(PutByteContext *, unsigned int);
+
+    if (bpp == 1)
+        bytestream2_put = bytestream2_put_byte;
+    else
+        bytestream2_put = bytestream2_put_be16;
+
+    for (x = 0; x < w; x += count) {
+        /* see if we can encode the next set of pixels with RLE */
+        count = ff_rle_count_pixels(src, w - x, bpp, 1);
+        if (count > 1) {
+            if (bytestream2_get_bytes_left_p(pbc) < bpp * 2)
+                return AVERROR_INVALIDDATA;
+
+            val = bpp == 1 ? *src : AV_RB16(src);
+            bytestream2_put(pbc, count);
+            bytestream2_put(pbc, val);
+        } else {
+            int i;
+            /* fall back on uncompressed */
+            count = ff_rle_count_pixels(src, w - x, bpp, 0);
+            if (bytestream2_get_bytes_left_p(pbc) < bpp * (count + 1))
+                return AVERROR_INVALIDDATA;
+
+            bytestream2_put(pbc, count + 0x80);
+            for (i = 0; i < count; i++) {
+                val = bpp == 1 ? src[i] : AV_RB16(src + i * bpp);
+                bytestream2_put(pbc, val);
+            }
+        }
+
+        src += count * bpp;
+    }
+
+    return bytestream2_tell_p(pbc) - start;
+}
+
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                         const AVFrame *frame, int *got_packet)
 {
+    SgiContext *s = avctx->priv_data;
     const AVFrame * const p = frame;
-    uint8_t *offsettab, *lengthtab, *in_buf, *encode_buf, *buf;
-    int x, y, z, length, tablesize, ret;
+    PutByteContext pbc;
+    uint8_t *in_buf, *encode_buf;
+    int x, y, z, length, tablesize, ret, i;
     unsigned int width, height, depth, dimension;
     unsigned int bytes_per_channel, pixmax, put_be;
-    unsigned char *end_buf;
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+#if FF_API_CODER_TYPE
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->coder_type == FF_CODER_TYPE_RAW)
+        s->rle = 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     width  = avctx->width;
     height = avctx->height;
@@ -79,7 +136,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     case AV_PIX_FMT_GRAY16LE:
         put_be = !HAVE_BIGENDIAN;
     case AV_PIX_FMT_GRAY16BE:
-        avctx->coder_type = FF_CODER_TYPE_RAW;
         bytes_per_channel = 2;
         pixmax = 0xFFFF;
         dimension = SGI_SINGLE_CHAN;
@@ -88,7 +144,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     case AV_PIX_FMT_RGB48LE:
         put_be = !HAVE_BIGENDIAN;
     case AV_PIX_FMT_RGB48BE:
-        avctx->coder_type = FF_CODER_TYPE_RAW;
         bytes_per_channel = 2;
         pixmax = 0xFFFF;
         dimension = SGI_MULTI_CHAN;
@@ -97,7 +152,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     case AV_PIX_FMT_RGBA64LE:
         put_be = !HAVE_BIGENDIAN;
     case AV_PIX_FMT_RGBA64BE:
-        avctx->coder_type = FF_CODER_TYPE_RAW;
         bytes_per_channel = 2;
         pixmax = 0xFFFF;
         dimension = SGI_MULTI_CHAN;
@@ -109,69 +163,72 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     tablesize = depth * height * 4;
     length = SGI_HEADER_SIZE;
-    if (avctx->coder_type == FF_CODER_TYPE_RAW)
+    if (!s->rle)
         length += depth * height * width;
-    else // assume ff_rl_encode() produces at most 2x size of input
+    else // assume sgi_rle_encode() produces at most 2x size of input
         length += tablesize * 2 + depth * height * (2 * width + 1);
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, bytes_per_channel * length)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, bytes_per_channel * length, 0)) < 0)
         return ret;
-    buf     = pkt->data;
-    end_buf = pkt->data + pkt->size;
+
+    bytestream2_init_writer(&pbc, pkt->data, pkt->size);
 
     /* Encode header. */
-    bytestream_put_be16(&buf, SGI_MAGIC);
-    bytestream_put_byte(&buf, avctx->coder_type != FF_CODER_TYPE_RAW); /* RLE 1 - VERBATIM 0*/
-    bytestream_put_byte(&buf, bytes_per_channel);
-    bytestream_put_be16(&buf, dimension);
-    bytestream_put_be16(&buf, width);
-    bytestream_put_be16(&buf, height);
-    bytestream_put_be16(&buf, depth);
-
-    bytestream_put_be32(&buf, 0L); /* pixmin */
-    bytestream_put_be32(&buf, pixmax);
-    bytestream_put_be32(&buf, 0L); /* dummy */
+    bytestream2_put_be16(&pbc, SGI_MAGIC);
+    bytestream2_put_byte(&pbc, s->rle); /* RLE 1 - VERBATIM 0 */
+    bytestream2_put_byte(&pbc, bytes_per_channel);
+    bytestream2_put_be16(&pbc, dimension);
+    bytestream2_put_be16(&pbc, width);
+    bytestream2_put_be16(&pbc, height);
+    bytestream2_put_be16(&pbc, depth);
+
+    bytestream2_put_be32(&pbc, 0L); /* pixmin */
+    bytestream2_put_be32(&pbc, pixmax);
+    bytestream2_put_be32(&pbc, 0L); /* dummy */
 
     /* name */
-    memset(buf, 0, SGI_HEADER_SIZE);
-    buf += 80;
+    for (i = 0; i < 80; i++)
+        bytestream2_put_byte(&pbc, 0L);
 
     /* colormap */
-    bytestream_put_be32(&buf, 0L);
+    bytestream2_put_be32(&pbc, 0L);
 
     /* The rest of the 512 byte header is unused. */
-    buf += 404;
-    offsettab = buf;
+    for (i = 0; i < 404; i++)
+        bytestream2_put_byte(&pbc, 0L);
+
+    if (s->rle) {
+        PutByteContext taboff_pcb, tablen_pcb;
 
-    if (avctx->coder_type != FF_CODER_TYPE_RAW) {
         /* Skip RLE offset table. */
-        buf += tablesize;
-        lengthtab = buf;
+        bytestream2_init_writer(&taboff_pcb, pbc.buffer, tablesize);
+        bytestream2_skip_p(&pbc, tablesize);
 
         /* Skip RLE length table. */
-        buf += tablesize;
+        bytestream2_init_writer(&tablen_pcb, pbc.buffer, tablesize);
+        bytestream2_skip_p(&pbc, tablesize);
 
         /* Make an intermediate consecutive buffer. */
-        if (!(encode_buf = av_malloc(width)))
+        if (!(encode_buf = av_malloc(width * bytes_per_channel)))
             return AVERROR(ENOMEM);
 
         for (z = 0; z < depth; z++) {
-            in_buf = p->data[0] + p->linesize[0] * (height - 1) + z;
+            in_buf = p->data[0] + p->linesize[0] * (height - 1) + z * bytes_per_channel;
 
             for (y = 0; y < height; y++) {
-                bytestream_put_be32(&offsettab, buf - pkt->data);
+                bytestream2_put_be32(&taboff_pcb, bytestream2_tell_p(&pbc));
 
-                for (x = 0; x < width; x++)
+                for (x = 0; x < width * bytes_per_channel; x += bytes_per_channel)
                     encode_buf[x] = in_buf[depth * x];
 
-                if ((length = ff_rle_encode(buf, end_buf - buf - 1, encode_buf, 1, width, 0, 0, 0x80, 0)) < 1) {
+                length = sgi_rle_encode(&pbc, encode_buf, width,
+                                        bytes_per_channel);
+                if (length < 1) {
                     av_free(encode_buf);
-                    return -1;
+                    return AVERROR_INVALIDDATA;
                 }
 
-                buf += length;
-                bytestream_put_byte(&buf, 0);
-                bytestream_put_be32(&lengthtab, length + 1);
+                bytestream2_put_be32(&tablen_pcb, length);
                 in_buf -= p->linesize[0];
             }
         }
@@ -183,15 +240,13 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
             for (y = 0; y < height; y++) {
                 for (x = 0; x < width * depth; x += depth)
-                    if (bytes_per_channel == 1) {
-                        bytestream_put_byte(&buf, in_buf[x]);
-                    } else {
-                        if (put_be) {
-                            bytestream_put_be16(&buf, ((uint16_t *)in_buf)[x]);
-                        } else {
-                            bytestream_put_le16(&buf, ((uint16_t *)in_buf)[x]);
-                        }
-                    }
+                    if (bytes_per_channel == 1)
+                        bytestream2_put_byte(&pbc, in_buf[x]);
+                    else
+                        if (put_be)
+                            bytestream2_put_be16(&pbc, ((uint16_t *)in_buf)[x]);
+                        else
+                            bytestream2_put_le16(&pbc, ((uint16_t *)in_buf)[x]);
 
                 in_buf -= p->linesize[0];
             }
@@ -199,27 +254,37 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
 
     /* total length */
-    pkt->size   = buf - pkt->data;
+    pkt->size   = bytestream2_tell_p(&pbc);
     pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
 
     return 0;
 }
 
-static av_cold int encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
+#define OFFSET(x) offsetof(SgiContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "rle", "Use run-length compression", OFFSET(rle), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
+
+    { NULL },
+};
+
+static const AVClass sgi_class = {
+    .class_name = "sgi",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_sgi_encoder = {
     .name      = "sgi",
     .long_name = NULL_IF_CONFIG_SMALL("SGI image"),
     .type      = AVMEDIA_TYPE_VIDEO,
     .id        = AV_CODEC_ID_SGI,
+    .priv_data_size = sizeof(SgiContext),
+    .priv_class = &sgi_class,
     .init      = encode_init,
     .encode2   = encode_frame,
-    .close     = encode_close,
     .pix_fmts  = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
         AV_PIX_FMT_RGB48LE, AV_PIX_FMT_RGB48BE,
diff --git a/libavcodec/sgirledec.c b/libavcodec/sgirledec.c
index 69d012e8..e7b281ac 100644
--- a/libavcodec/sgirledec.c
+++ b/libavcodec/sgirledec.c
@@ -156,5 +156,5 @@ AVCodec ff_sgirle_decoder = {
     .init           = sgirle_decode_init,
     .close          = sgirle_decode_end,
     .decode         = sgirle_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index db2e3c5f..0f5be96e 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -431,7 +431,7 @@ static int shorten_decode_frame(AVCodecContext *avctx, void *data,
         void *tmp_ptr;
         s->max_framesize = 8192; // should hopefully be enough for the first header
         tmp_ptr = av_fast_realloc(s->bitstream, &s->allocated_bitstream_size,
-                                  s->max_framesize + FF_INPUT_BUFFER_PADDING_SIZE);
+                                  s->max_framesize + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!tmp_ptr) {
             av_log(avctx, AV_LOG_ERROR, "error allocating bitstream buffer\n");
             return AVERROR(ENOMEM);
@@ -445,7 +445,7 @@ static int shorten_decode_frame(AVCodecContext *avctx, void *data,
         buf_size       = FFMIN(buf_size, s->max_framesize - s->bitstream_size);
         input_buf_size = buf_size;
 
-        if (s->bitstream_index + s->bitstream_size + buf_size + FF_INPUT_BUFFER_PADDING_SIZE >
+        if (s->bitstream_index + s->bitstream_size + buf_size + AV_INPUT_BUFFER_PADDING_SIZE >
             s->allocated_bitstream_size) {
             memmove(s->bitstream, &s->bitstream[s->bitstream_index],
                     s->bitstream_size);
@@ -466,7 +466,8 @@ static int shorten_decode_frame(AVCodecContext *avctx, void *data,
         }
     }
     /* init and position bitstream reader */
-    init_get_bits(&s->gb, buf, buf_size * 8);
+    if ((ret = init_get_bits8(&s->gb, buf, buf_size)) < 0)
+        return ret;
     skip_bits(&s->gb, s->bitindex);
 
     /* process header or next subblock */
@@ -679,7 +680,7 @@ AVCodec ff_shorten_decoder = {
     .init           = shorten_decode_init,
     .close          = shorten_decode_close,
     .decode         = shorten_decode_frame,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_U8P,
                                                       AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index eeb62799..0711e167 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -36,6 +36,11 @@
 
 #define BIT_DEPTH 10
 #include "simple_idct_template.c"
+
+#define EXTRA_SHIFT  2
+#include "simple_idct_template.c"
+
+#undef EXTRA_SHIFT
 #undef BIT_DEPTH
 
 #define BIT_DEPTH 12
@@ -127,7 +132,7 @@ void ff_simple_idct248_put(uint8_t *dest, int line_size, int16_t *block)
 #undef C1
 #undef C2
 #define CN_SHIFT 12
-#define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
+#define C_FIX(x) ((int)((x) * M_SQRT2 * (1 << CN_SHIFT) + 0.5))
 #define C1 C_FIX(0.6532814824)
 #define C2 C_FIX(0.2705980501)
 #define C3 C_FIX(0.5)
@@ -154,7 +159,7 @@ static inline void idct4col_add(uint8_t *dest, int line_size, const int16_t *col
 }
 
 #define RN_SHIFT 15
-#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
+#define R_FIX(x) ((int)((x) * M_SQRT2 * (1 << RN_SHIFT) + 0.5))
 #define R1 R_FIX(0.6532814824)
 #define R2 R_FIX(0.2705980501)
 #define R3 R_FIX(0.5)
@@ -230,10 +235,10 @@ void ff_prores_idct(int16_t *block, const int16_t *qmat)
         block[i] *= qmat[i];
 
     for (i = 0; i < 8; i++)
-        idctRowCondDC_10(block + i*8, 2);
+        idctRowCondDC_extrashift_10(block + i*8, 2);
 
     for (i = 0; i < 8; i++) {
         block[i] += 8192;
-        idctSparseCol_10(block + i);
+        idctSparseCol_extrashift_10(block + i);
     }
 }
diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index 789db8d0..0585679b 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c
@@ -66,19 +66,26 @@
 
 #elif BIT_DEPTH == 10 || BIT_DEPTH == 12
 
-#if BIT_DEPTH == 10
-#define W1 (22725*4)  // 90901
-#define W2 (21407*4) //  85627
-#define W3 (19265*4) //  77062
-#define W4 (16384*4) //  65535
-#define W5 (12873*4) //  51491
-#define W6 ( 8867*4) //  35468
-#define W7 ( 4520*4) //  18081
-
-#define ROW_SHIFT 15
-#define COL_SHIFT 20
-#define DC_SHIFT 1
-#else
+# if BIT_DEPTH == 10
+#define W1 22725 // 90901
+#define W2 21407 //  85627
+#define W3 19265 //  77062
+#define W4 16384 //  65535
+#define W5 12873 //  51491
+#define W6  8867 //  35468
+#define W7  4520 //  18081
+
+#   ifdef EXTRA_SHIFT
+#define ROW_SHIFT 13
+#define COL_SHIFT 18
+#define DC_SHIFT  1
+#   else
+#define ROW_SHIFT 12
+#define COL_SHIFT 19
+#define DC_SHIFT  2
+#   endif
+
+# else
 #define W1 45451
 #define W2 42813
 #define W3 38531
@@ -90,7 +97,7 @@
 #define ROW_SHIFT 16
 #define COL_SHIFT 17
 #define DC_SHIFT -1
-#endif
+# endif
 
 #define MUL(a, b)    ((a) * (b))
 #define MAC(a, b, c) ((a) += (b) * (c))
@@ -101,7 +108,11 @@
 
 #endif
 
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
+#else
 static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
+#endif
 {
     int a0, a1, a2, a3, b0, b1, b2, b3;
 
@@ -236,6 +247,9 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
         }                                               \
     } while (0)
 
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
+#else
 static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
                                           int16_t *col)
 {
@@ -285,6 +299,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
 }
 
 static inline void FUNC(idctSparseCol)(int16_t *col)
+#endif
 {
     int a0, a1, a2, a3, b0, b1, b2, b3;
 
@@ -300,6 +315,7 @@ static inline void FUNC(idctSparseCol)(int16_t *col)
     col[56] = ((a0 - b0) >> COL_SHIFT);
 }
 
+#ifndef EXTRA_SHIFT
 void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block)
 {
     pixel *dest = (pixel *)dest_;
@@ -338,3 +354,4 @@ void FUNC(ff_simple_idct)(int16_t *block)
     for (i = 0; i < 8; i++)
         FUNC(idctSparseCol)(block + i);
 }
+#endif
diff --git a/libavcodec/sinewin.c b/libavcodec/sinewin.c
index 1fa0e953..4532dc73 100644
--- a/libavcodec/sinewin.c
+++ b/libavcodec/sinewin.c
@@ -16,5 +16,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define USE_FIXED 0
 #include "sinewin.h"
 #include "sinewin_tablegen.h"
diff --git a/libavcodec/sinewin.h b/libavcodec/sinewin.h
index 2268fd52..27c107ce 100644
--- a/libavcodec/sinewin.h
+++ b/libavcodec/sinewin.h
@@ -23,6 +23,7 @@
 
 #include "config.h"
 #include "libavutil/mem.h"
+#include "libavcodec/aac_defines.h"
 
 #if CONFIG_HARDCODED_TABLES
 #   define SINETABLE_CONST const
@@ -30,20 +31,24 @@
 #   define SINETABLE_CONST
 #endif
 
+#ifndef USE_FIXED
+#define USE_FIXED 0
+#endif
+
 #define SINETABLE(size) \
-    SINETABLE_CONST DECLARE_ALIGNED(32, float, ff_sine_##size)[size]
+    SINETABLE_CONST DECLARE_ALIGNED(32, INTFLOAT, AAC_RENAME(ff_sine_##size))[size]
 
 /**
  * Generate a sine window.
  * @param   window  pointer to half window
  * @param   n       size of half window
  */
-void ff_sine_window_init(float *window, int n);
+void AAC_RENAME(ff_sine_window_init)(INTFLOAT *window, int n);
 
 /**
  * initialize the specified entry of ff_sine_windows
  */
-void ff_init_ff_sine_windows(int index);
+void AAC_RENAME(ff_init_ff_sine_windows)(int index);
 
 extern SINETABLE(  32);
 extern SINETABLE(  64);
@@ -55,6 +60,6 @@ extern SINETABLE(2048);
 extern SINETABLE(4096);
 extern SINETABLE(8192);
 
-extern SINETABLE_CONST float * const ff_sine_windows[14];
+extern SINETABLE_CONST INTFLOAT * const AAC_RENAME(ff_sine_windows)[14];
 
 #endif /* AVCODEC_SINEWIN_H */
diff --git a/libavcodec/sinewin_fixed.c b/libavcodec/sinewin_fixed.c
new file mode 100644
index 00000000..27ead29e
--- /dev/null
+++ b/libavcodec/sinewin_fixed.c
@@ -0,0 +1,21 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FIXED 1
+#include "sinewin.h"
+#include "sinewin_tablegen.h"
diff --git a/libavcodec/sinewin_fixed_tablegen.c b/libavcodec/sinewin_fixed_tablegen.c
new file mode 100644
index 00000000..977e6f3c
--- /dev/null
+++ b/libavcodec/sinewin_fixed_tablegen.c
@@ -0,0 +1,24 @@
+/*
+ * Generate a header file for hardcoded sine windows
+ *
+ * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FIXED 1
+#include "sinewin_tablegen_template.c"
diff --git a/libavcodec/sinewin_tablegen.c b/libavcodec/sinewin_tablegen.c
index 561ae3ea..dd602668 100644
--- a/libavcodec/sinewin_tablegen.c
+++ b/libavcodec/sinewin_tablegen.c
@@ -20,27 +20,5 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <stdlib.h>
-#define CONFIG_HARDCODED_TABLES 0
-#define SINETABLE_CONST
-#define SINETABLE(size) \
-    float ff_sine_##size[size]
-#define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
-#include "sinewin_tablegen.h"
-#include "tableprint.h"
-
-int main(void)
-{
-    int i;
-
-    write_fileheader();
-
-    for (i = 5; i <= 13; i++) {
-        ff_init_ff_sine_windows(i);
-        printf("SINETABLE(%4i) = {\n", 1 << i);
-        write_float_array(ff_sine_windows[i], 1 << i);
-        printf("};\n");
-    }
-
-    return 0;
-}
+#define USE_FIXED 0
+#include "sinewin_tablegen_template.c"
diff --git a/libavcodec/sinewin_tablegen.h b/libavcodec/sinewin_tablegen.h
index 2b9c4f23..4432135f 100644
--- a/libavcodec/sinewin_tablegen.h
+++ b/libavcodec/sinewin_tablegen.h
@@ -27,6 +27,7 @@
 // do not use libavutil/libm.h since this is compiled both
 // for the host and the target and config.h is only valid for the target
 #include <math.h>
+#include "libavcodec/aac_defines.h"
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"
 
@@ -41,26 +42,37 @@ SINETABLE(2048);
 SINETABLE(4096);
 SINETABLE(8192);
 #else
+#if USE_FIXED
+#include "libavcodec/sinewin_fixed_tables.h"
+#else
 #include "libavcodec/sinewin_tables.h"
 #endif
+#endif
+
+#if USE_FIXED
+#define SIN_FIX(a) (int)floor((a) * 0x80000000 + 0.5)
+#else
+#define SIN_FIX(a) a
+#endif
 
-SINETABLE_CONST float * const ff_sine_windows[] = {
+SINETABLE_CONST INTFLOAT * const AAC_RENAME(ff_sine_windows)[] = {
     NULL, NULL, NULL, NULL, NULL, // unused
-    ff_sine_32 , ff_sine_64 ,
-    ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, ff_sine_2048, ff_sine_4096, ff_sine_8192
+    AAC_RENAME(ff_sine_32) , AAC_RENAME(ff_sine_64), AAC_RENAME(ff_sine_128),
+    AAC_RENAME(ff_sine_256), AAC_RENAME(ff_sine_512), AAC_RENAME(ff_sine_1024),
+    AAC_RENAME(ff_sine_2048), AAC_RENAME(ff_sine_4096), AAC_RENAME(ff_sine_8192)
 };
 
 // Generate a sine window.
-av_cold void ff_sine_window_init(float *window, int n) {
+av_cold void AAC_RENAME(ff_sine_window_init)(INTFLOAT *window, int n) {
     int i;
     for(i = 0; i < n; i++)
-        window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n)));
+        window[i] = SIN_FIX(sinf((i + 0.5) * (M_PI / (2.0 * n))));
 }
 
-av_cold void ff_init_ff_sine_windows(int index) {
-    assert(index >= 0 && index < FF_ARRAY_ELEMS(ff_sine_windows));
+av_cold void AAC_RENAME(ff_init_ff_sine_windows)(int index) {
+    assert(index >= 0 && index < FF_ARRAY_ELEMS(AAC_RENAME(ff_sine_windows)));
 #if !CONFIG_HARDCODED_TABLES
-    ff_sine_window_init(ff_sine_windows[index], 1 << index);
+    AAC_RENAME(ff_sine_window_init)(AAC_RENAME(ff_sine_windows)[index], 1 << index);
 #endif
 }
 
diff --git a/libavcodec/sinewin_tablegen_template.c b/libavcodec/sinewin_tablegen_template.c
new file mode 100644
index 00000000..43ce1ba8
--- /dev/null
+++ b/libavcodec/sinewin_tablegen_template.c
@@ -0,0 +1,54 @@
+/*
+ * Generate a header file for hardcoded sine windows
+ *
+ * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include "libavcodec/aac_defines.h"
+#define CONFIG_HARDCODED_TABLES 0
+
+#if USE_FIXED
+#define WRITE_FUNC write_int32_t_array
+#else
+#define WRITE_FUNC write_float_array
+#endif
+
+#define SINETABLE_CONST
+#define SINETABLE(size) \
+    INTFLOAT AAC_RENAME(ff_sine_##size)[size]
+#define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
+#include "sinewin_tablegen.h"
+#include "tableprint.h"
+
+int main(void)
+{
+    int i;
+
+    write_fileheader();
+
+    for (i = 5; i <= 13; i++) {
+        AAC_RENAME(ff_init_ff_sine_windows)(i);
+        printf("SINETABLE(%4i) = {\n", 1 << i);
+        WRITE_FUNC(AAC_RENAME(ff_sine_windows)[i], 1 << i);
+        printf("};\n");
+    }
+
+    return 0;
+}
diff --git a/libavcodec/sipr.c b/libavcodec/sipr.c
index af1edf50..595097a6 100644
--- a/libavcodec/sipr.c
+++ b/libavcodec/sipr.c
@@ -493,8 +493,8 @@ static av_cold int sipr_decoder_init(AVCodecContext * avctx)
         else if (avctx->bit_rate > 5750 ) ctx->mode = MODE_6k5;
         else                              ctx->mode = MODE_5k0;
         av_log(avctx, AV_LOG_WARNING,
-               "Invalid block_align: %d. Mode %s guessed based on bitrate: %d\n",
-               avctx->block_align, modes[ctx->mode].mode_name, avctx->bit_rate);
+               "Invalid block_align: %d. Mode %s guessed based on bitrate: %"PRId64"\n",
+               avctx->block_align, modes[ctx->mode].mode_name, (int64_t)avctx->bit_rate);
     }
 
     av_log(avctx, AV_LOG_DEBUG, "Mode: %s\n", modes[ctx->mode].mode_name);
@@ -537,7 +537,7 @@ static int sipr_decode_frame(AVCodecContext *avctx, void *data,
         av_log(avctx, AV_LOG_ERROR,
                "Error processing packet: packet size (%d) too small\n",
                avpkt->size);
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     /* get output buffer */
@@ -570,5 +570,5 @@ AVCodec ff_sipr_decoder = {
     .priv_data_size = sizeof(SiprContext),
     .init           = sipr_decoder_init,
     .decode         = sipr_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index b5538c74..4014e8d0 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -316,7 +316,9 @@ static int decode_header_trees(SmackVContext *smk) {
     full_size = AV_RL32(smk->avctx->extradata + 8);
     type_size = AV_RL32(smk->avctx->extradata + 12);
 
-    init_get_bits8(&gb, smk->avctx->extradata + 16, smk->avctx->extradata_size - 16);
+    ret = init_get_bits8(&gb, smk->avctx->extradata + 16, smk->avctx->extradata_size - 16);
+    if (ret < 0)
+        return ret;
 
     if(!get_bits1(&gb)) {
         av_log(smk->avctx, AV_LOG_INFO, "Skipping MMAP tree\n");
@@ -668,6 +670,10 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = unp_size / (avctx->channels * (bits + 1));
+    if (unp_size % (avctx->channels * (bits + 1))) {
+        av_log(avctx, AV_LOG_ERROR, "unp_size %d is odd\n", unp_size);
+        return AVERROR(EINVAL);
+    }
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
     samples  = (int16_t *)frame->data[0];
@@ -813,7 +819,7 @@ AVCodec ff_smacker_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 
 AVCodec ff_smackaud_decoder = {
@@ -823,5 +829,5 @@ AVCodec ff_smackaud_decoder = {
     .id             = AV_CODEC_ID_SMACKAUDIO,
     .init           = smka_decode_init,
     .decode         = smka_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/smc.c b/libavcodec/smc.c
index 131300a5..a423c455 100644
--- a/libavcodec/smc.c
+++ b/libavcodec/smc.c
@@ -472,5 +472,5 @@ AVCodec ff_smc_decoder = {
     .init           = smc_decode_init,
     .close          = smc_decode_end,
     .decode         = smc_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/smvjpegdec.c b/libavcodec/smvjpegdec.c
index 45faff29..9057e861 100644
--- a/libavcodec/smvjpegdec.c
+++ b/libavcodec/smvjpegdec.c
@@ -65,7 +65,7 @@ static inline void smv_img_pnt(uint8_t *dst_data[4], uint8_t *src_data[4],
     for (i = 0; i < planes_nb; i++) {
         int h = height;
         if (i == 1 || i == 2) {
-            h = FF_CEIL_RSHIFT(height, desc->log2_chroma_h);
+            h = AV_CEIL_RSHIFT(height, desc->log2_chroma_h);
         }
         smv_img_pnt_plane(&dst_data[i], src_data[i],
             src_linesizes[i], h, nlines);
@@ -164,7 +164,9 @@ static int smvjpeg_decode_frame(AVCodecContext *avctx, void *data, int *data_siz
         return AVERROR(EINVAL);
 
     desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
-    if (desc && mjpeg_data->height % (s->frames_per_jpeg << desc->log2_chroma_h)) {
+    av_assert0(desc);
+
+    if (mjpeg_data->height % (s->frames_per_jpeg << desc->log2_chroma_h)) {
         av_log(avctx, AV_LOG_ERROR, "Invalid height\n");
         return AVERROR_INVALIDDATA;
     }
diff --git a/libavcodec/snappy.c b/libavcodec/snappy.c
new file mode 100644
index 00000000..7900b0f9
--- /dev/null
+++ b/libavcodec/snappy.c
@@ -0,0 +1,183 @@
+/*
+ * Snappy decompression algorithm
+ * Copyright (c) 2015 Luca Barbato
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+
+#include "bytestream.h"
+#include "snappy.h"
+
+enum {
+    SNAPPY_LITERAL,
+    SNAPPY_COPY_1,
+    SNAPPY_COPY_2,
+    SNAPPY_COPY_4,
+};
+
+static int64_t bytestream2_get_levarint(GetByteContext *gb)
+{
+    uint64_t val = 0;
+    int shift = 0;
+    int tmp;
+
+    do {
+        tmp = bytestream2_get_byte(gb);
+        val |= (tmp & 127) << shift;
+        shift += 7;
+    } while (tmp & 128);
+
+    return val;
+}
+
+static int snappy_literal(GetByteContext *gb, uint8_t *p, int size, int val)
+{
+    unsigned int len = 1;
+
+    switch (val) {
+    case 63:
+        len += bytestream2_get_le32(gb);
+        break;
+    case 62:
+        len += bytestream2_get_le24(gb);
+        break;
+    case 61:
+        len += bytestream2_get_le16(gb);
+        break;
+    case 60:
+        len += bytestream2_get_byte(gb);
+        break;
+    default: // val < 60
+        len += val;
+    }
+
+    if (size < len)
+        return AVERROR_INVALIDDATA;
+
+    bytestream2_get_buffer(gb, p, len);
+
+    return len;
+}
+
+static int snappy_copy(uint8_t *start, uint8_t *p, int size,
+                       unsigned int off, int len)
+{
+    uint8_t *q;
+    int i;
+    if (off > p - start || size < len)
+        return AVERROR_INVALIDDATA;
+
+    q = p - off;
+
+    for (i = 0; i < len; i++)
+        p[i] = q[i];
+
+    return len;
+}
+
+static int snappy_copy1(GetByteContext *gb, uint8_t *start, uint8_t *p,
+                        int size, int val)
+{
+    int len          = 4 + (val & 0x7);
+    unsigned int off = bytestream2_get_byte(gb) | (val & 0x38) << 5;
+
+    return snappy_copy(start, p, size, off, len);
+}
+
+static int snappy_copy2(GetByteContext *gb, uint8_t *start, uint8_t *p,
+                        int size, int val)
+{
+    int len          = 1 + val;
+    unsigned int off = bytestream2_get_le16(gb);
+
+    return snappy_copy(start, p, size, off, len);
+}
+
+static int snappy_copy4(GetByteContext *gb, uint8_t *start, uint8_t *p,
+                        int size, int val)
+{
+    int len          = 1 + val;
+    unsigned int off = bytestream2_get_le32(gb);
+
+    return snappy_copy(start, p, size, off, len);
+}
+
+static int64_t decode_len(GetByteContext *gb)
+{
+    int64_t len = bytestream2_get_levarint(gb);
+
+    if (len < 0 || len > UINT_MAX)
+        return AVERROR_INVALIDDATA;
+
+    return len;
+}
+
+int64_t ff_snappy_peek_uncompressed_length(GetByteContext *gb)
+{
+    int pos = bytestream2_get_bytes_left(gb);
+    int64_t len = decode_len(gb);
+
+    bytestream2_seek(gb, -pos, SEEK_END);
+
+    return len;
+}
+
+int ff_snappy_uncompress(GetByteContext *gb, uint8_t *buf, int64_t *size)
+{
+    int64_t len = decode_len(gb);
+    int ret     = 0;
+    uint8_t *p;
+
+    if (len < 0)
+        return len;
+
+    if (len > *size)
+        return AVERROR_BUFFER_TOO_SMALL;
+
+    *size = len;
+    p     = buf;
+
+    while (bytestream2_get_bytes_left(gb) > 0) {
+        uint8_t s = bytestream2_get_byte(gb);
+        int val   = s >> 2;
+
+        switch (s & 0x03) {
+        case SNAPPY_LITERAL:
+            ret = snappy_literal(gb, p, len, val);
+            break;
+        case SNAPPY_COPY_1:
+            ret = snappy_copy1(gb, buf, p, len, val);
+            break;
+        case SNAPPY_COPY_2:
+            ret = snappy_copy2(gb, buf, p, len, val);
+            break;
+        case SNAPPY_COPY_4:
+            ret = snappy_copy4(gb, buf, p, len, val);
+            break;
+        }
+
+        if (ret < 0)
+            return ret;
+
+        p   += ret;
+        len -= ret;
+    }
+
+    return 0;
+}
diff --git a/libavcodec/snappy.h b/libavcodec/snappy.h
new file mode 100644
index 00000000..a65cb3aa
--- /dev/null
+++ b/libavcodec/snappy.h
@@ -0,0 +1,60 @@
+/*
+ * Snappy module
+ * Copyright (c) Luca Barbato
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Snappy decompression
+ *
+ * Snappy is a compression/decompression algorithm that does not aim for
+ * maximum compression, but rather for very high speeds and reasonable
+ * compression.
+ *
+ * http://en.wikipedia.org/wiki/Snappy_%28software%29
+ */
+
+#ifndef AVCODEC_SNAPPY_H
+#define AVCODEC_SNAPPY_H
+
+#include <stdint.h>
+
+#include "bytestream.h"
+
+/**
+ * Get the uncompressed length of an input buffer compressed using the Snappy
+ * algorithm. The GetByteContext is not advanced.
+ *
+ * @param gb    input GetByteContext.
+ * @return      A positive length on success, AVERROR otherwise.
+ */
+ int64_t ff_snappy_peek_uncompressed_length(GetByteContext *gb);
+
+/**
+ * Decompress an input buffer using Snappy algorithm.
+ *
+ * @param gb    input GetByteContext.
+ * @param buf   input buffer pointer.
+ * @param size  input/output on input, the size of buffer, on output, the size
+ *              of the uncompressed data.
+ * @return      0 if success, AVERROR otherwise.
+ */
+int ff_snappy_uncompress(GetByteContext *gb, uint8_t *buf, int64_t *size);
+
+#endif /* AVCODEC_SNAPPY_H */
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index fc2e7279..a3e6afc8 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -108,8 +108,8 @@ void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
 }
 
 int ff_snow_alloc_blocks(SnowContext *s){
-    int w= FF_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
-    int h= FF_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
+    int w= AV_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
+    int h= AV_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
 
     s->b_width = w;
     s->b_height= h;
@@ -535,8 +535,8 @@ int ff_snow_common_init_after_header(AVCodecContext *avctx) {
         int h= s->avctx->height;
 
         if(plane_index){
-            w = FF_CEIL_RSHIFT(w, s->chroma_h_shift);
-            h = FF_CEIL_RSHIFT(h, s->chroma_v_shift);
+            w = AV_CEIL_RSHIFT(w, s->chroma_h_shift);
+            h = AV_CEIL_RSHIFT(h, s->chroma_v_shift);
         }
         s->plane[plane_index].width = w;
         s->plane[plane_index].height= h;
@@ -590,8 +590,8 @@ static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *fra
 
     for(p=0; p < s->nb_planes; p++){
         int is_chroma= !!p;
-        int w= is_chroma ? FF_CEIL_RSHIFT(s->avctx->width,  s->chroma_h_shift) : s->avctx->width;
-        int h= is_chroma ? FF_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
+        int w= is_chroma ? AV_CEIL_RSHIFT(s->avctx->width,  s->chroma_h_shift) : s->avctx->width;
+        int h= is_chroma ? AV_CEIL_RSHIFT(s->avctx->height, s->chroma_v_shift) : s->avctx->height;
         int ls= frame->linesize[p];
         uint8_t *src= frame->data[p];
 
diff --git a/libavcodec/snow.h b/libavcodec/snow.h
index 447859f4..d126d68a 100644
--- a/libavcodec/snow.h
+++ b/libavcodec/snow.h
@@ -121,7 +121,6 @@ typedef struct SnowContext{
     H264QpelContext h264qpel;
     MpegvideoEncDSPContext mpvencdsp;
     SnowDWTContext dwt;
-    const AVFrame *new_picture;
     AVFrame *input_picture;              ///< new_picture with the internal linesizes
     AVFrame *current_picture;
     AVFrame *last_picture[MAX_REF_FRAMES];
@@ -176,6 +175,9 @@ typedef struct SnowContext{
     int memc_only;
     int no_bitstream;
     int intra_penalty;
+    int motion_est;
+    int iterative_dia_size;
+    int scenechange_threshold;
 
     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
 
@@ -184,6 +186,9 @@ typedef struct SnowContext{
 
     AVMotionVector *avmv;
     int avmv_index;
+    uint64_t encoding_error[AV_NUM_DATA_POINTERS];
+
+    int pred;
 }SnowContext;
 
 /* Tables */
@@ -563,6 +568,8 @@ static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
         e= 0;
         while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
             e++;
+            if (e > 31)
+                return AVERROR_INVALIDDATA;
         }
 
         a= 1;
diff --git a/libavcodec/snowdec.c b/libavcodec/snowdec.c
index e12cb21f..042aecbb 100644
--- a/libavcodec/snowdec.c
+++ b/libavcodec/snowdec.c
@@ -104,8 +104,11 @@ static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer
             avmv->h = block_h;
             avmv->dst_x = block_w*mb_x - block_w/2;
             avmv->dst_y = block_h*mb_y - block_h/2;
-            avmv->src_x = avmv->dst_x + (bn->mx * s->mv_scale)/8;
-            avmv->src_y = avmv->dst_y + (bn->my * s->mv_scale)/8;
+            avmv->motion_scale = 8;
+            avmv->motion_x = bn->mx * s->mv_scale;
+            avmv->motion_y = bn->my * s->mv_scale;
+            avmv->src_x = avmv->dst_x + avmv->motion_x / 8;
+            avmv->src_y = avmv->dst_y + avmv->motion_y / 8;
             avmv->source= -1 - bn->ref;
             avmv->flags = 0;
         }
@@ -477,7 +480,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
               );
 
     av_assert0(!s->avmv);
-    if (s->avctx->flags2 & CODEC_FLAG2_EXPORT_MVS) {
+    if (s->avctx->flags2 & AV_CODEC_FLAG2_EXPORT_MVS) {
         s->avmv = av_malloc_array(s->b_width * s->b_height, sizeof(AVMotionVector) << (s->block_max_depth*2));
     }
     s->avmv_index = 0;
@@ -642,7 +645,7 @@ AVCodec ff_snow_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
+    .capabilities   = AV_CODEC_CAP_DR1 /*| AV_CODEC_CAP_DRAW_HORIZ_BAND*/,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
                       FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
index bd5c0fde..b1d177d3 100644
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@@ -19,6 +19,7 @@
  */
 
 #include "libavutil/intmath.h"
+#include "libavutil/libm.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
 #include "avcodec.h"
@@ -32,23 +33,38 @@
 #include "mpegvideo.h"
 #include "h263.h"
 
+#define FF_ME_ITER 50
+
 static av_cold int encode_init(AVCodecContext *avctx)
 {
     SnowContext *s = avctx->priv_data;
     int plane_index, ret;
     int i;
 
-    if(avctx->prediction_method == DWT_97
-       && (avctx->flags & CODEC_FLAG_QSCALE)
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->prediction_method)
+        s->pred = avctx->prediction_method;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if(s->pred == DWT_97
+       && (avctx->flags & AV_CODEC_FLAG_QSCALE)
        && avctx->global_quality == 0){
         av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
         return -1;
     }
+#if FF_API_MOTION_EST
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->me_method == ME_ITER)
+        s->motion_est = FF_ME_ITER;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
-    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
+    s->spatial_decomposition_type= s->pred; //FIXME add decorrelator type r transform_type
 
-    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
-    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
+    s->mv_scale       = (avctx->flags & AV_CODEC_FLAG_QPEL) ? 2 : 4;
+    s->block_max_depth= (avctx->flags & AV_CODEC_FLAG_4MV ) ? 1 : 0;
 
     for(plane_index=0; plane_index<3; plane_index++){
         s->plane[plane_index].diag_mc= 1;
@@ -83,18 +99,18 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
     s->max_ref_frames = av_clip(avctx->refs, 1, MAX_REF_FRAMES);
 
-    if(avctx->flags&CODEC_FLAG_PASS1){
+    if(avctx->flags&AV_CODEC_FLAG_PASS1){
         if(!avctx->stats_out)
             avctx->stats_out = av_mallocz(256);
 
         if (!avctx->stats_out)
             return AVERROR(ENOMEM);
     }
-    if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
+    if((avctx->flags&AV_CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
         if(ff_rate_control_init(&s->m) < 0)
             return -1;
     }
-    s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
+    s->pass1_rc= !(avctx->flags & (AV_CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
 
     switch(avctx->pix_fmt){
     case AV_PIX_FMT_YUV444P:
@@ -122,14 +138,13 @@ static av_cold int encode_init(AVCodecContext *avctx)
     ff_set_cmp(&s->mecc, s->mecc.me_sub_cmp, s->avctx->me_sub_cmp);
 
     s->input_picture = av_frame_alloc();
-    avctx->coded_frame = av_frame_alloc();
-    if (!s->input_picture || !avctx->coded_frame)
+    if (!s->input_picture)
         return AVERROR(ENOMEM);
 
     if ((ret = ff_snow_get_buffer(s, s->input_picture)) < 0)
         return ret;
 
-    if(s->avctx->me_method == ME_ITER){
+    if(s->motion_est == FF_ME_ITER){
         int size= s->b_width * s->b_height << 2*s->block_max_depth;
         for(i=0; i<s->max_ref_frames; i++){
             s->ref_mvs[i]= av_mallocz_array(size, sizeof(int16_t[2]));
@@ -242,7 +257,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
                                 s->input_picture->data[2] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift)};
     int P[10][2];
     int16_t last_mv[3][2];
-    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
+    int qpel= !!(s->avctx->flags & AV_CODEC_FLAG_QPEL); //unused
     const int shift= 1+qpel;
     MotionEstContext *c= &s->m.me;
     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
@@ -284,7 +299,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
-    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
+    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_DMV;
 
     c->xmin = - x*block_w - 16+3;
     c->ymin = - y*block_w - 16+3;
@@ -1113,8 +1128,9 @@ static void iterative_me(SnowContext *s){
                     do{
                         int newx = block->mx;
                         int newy = block->my;
+                        int dia_size = s->iterative_dia_size ? s->iterative_dia_size : FFMAX(s->avctx->dia_size, 1);
                         dia_change=0;
-                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
+                        for(i=0; i < dia_size; i++){
                             for(j=0; j<i; j++){
                                 dia_change |= check_block_inter(s, mb_x, mb_y, newx+4*(i-j), newy+(4*j), obmc_edged, &best_rd);
                                 dia_change |= check_block_inter(s, mb_x, mb_y, newx-4*(i-j), newy-(4*j), obmc_edged, &best_rd);
@@ -1207,7 +1223,7 @@ static void encode_blocks(SnowContext *s, int search){
     int w= s->b_width;
     int h= s->b_height;
 
-    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
+    if(s->motion_est == FF_ME_ITER && !s->keyframe && search)
         iterative_me(s);
 
     for(y=0; y<h; y++){
@@ -1216,7 +1232,7 @@ static void encode_blocks(SnowContext *s, int search){
             return;
         }
         for(x=0; x<w; x++){
-            if(s->avctx->me_method == ME_ITER || !search)
+            if(s->motion_est == FF_ME_ITER || !search)
                 encode_q_branch2(s, 0, x, y);
             else
                 encode_q_branch (s, 0, x, y);
@@ -1462,7 +1478,7 @@ static void update_last_header_values(SnowContext *s){
 }
 
 static int qscale2qlog(int qscale){
-    return rint(QROOT*log2(qscale / (float)FF_QP2LAMBDA))
+    return lrint(QROOT*log2(qscale / (float)FF_QP2LAMBDA))
            + 61*QROOT/8; ///< 64 > 60
 }
 
@@ -1539,7 +1555,7 @@ static void calculate_visual_weight(SnowContext *s, Plane *p){
                 }
             }
 
-            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
+            b->qlog= (int)(QROOT * log2(352256.0/sqrt(error)) + 0.5);
         }
     }
 }
@@ -1549,14 +1565,14 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 {
     SnowContext *s = avctx->priv_data;
     RangeCoder * const c= &s->c;
-    AVFrame *pic = pict;
+    AVFrame *pic;
     const int width= s->avctx->width;
     const int height= s->avctx->height;
     int level, orientation, plane_index, i, y, ret;
     uint8_t rc_header_bak[sizeof(s->header_state)];
     uint8_t rc_block_bak[sizeof(s->block_state)];
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, s->b_width*s->b_height*MB_SIZE*MB_SIZE*3 + FF_MIN_BUFFER_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, s->b_width*s->b_height*MB_SIZE*MB_SIZE*3 + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;
 
     ff_init_range_encoder(c, pkt->data, pkt->size);
@@ -1565,24 +1581,26 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     for(i=0; i < s->nb_planes; i++){
         int hshift= i ? s->chroma_h_shift : 0;
         int vshift= i ? s->chroma_v_shift : 0;
-        for(y=0; y<FF_CEIL_RSHIFT(height, vshift); y++)
+        for(y=0; y<AV_CEIL_RSHIFT(height, vshift); y++)
             memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]],
                    &pict->data[i][y * pict->linesize[i]],
-                   FF_CEIL_RSHIFT(width, hshift));
+                   AV_CEIL_RSHIFT(width, hshift));
         s->mpvencdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i],
-                                FF_CEIL_RSHIFT(width, hshift), FF_CEIL_RSHIFT(height, vshift),
+                                AV_CEIL_RSHIFT(width, hshift), AV_CEIL_RSHIFT(height, vshift),
                                 EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
                                 EDGE_TOP | EDGE_BOTTOM);
 
     }
     emms_c();
-    s->new_picture = pict;
+    pic = s->input_picture;
+    pic->pict_type = pict->pict_type;
+    pic->quality = pict->quality;
 
     s->m.picture_number= avctx->frame_number;
-    if(avctx->flags&CODEC_FLAG_PASS2){
+    if(avctx->flags&AV_CODEC_FLAG_PASS2){
         s->m.pict_type = pic->pict_type = s->m.rc_context.entry[avctx->frame_number].new_pict_type;
         s->keyframe = pic->pict_type == AV_PICTURE_TYPE_I;
-        if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
+        if(!(avctx->flags&AV_CODEC_FLAG_QSCALE)) {
             pic->quality = ff_rate_estimate_qscale(&s->m, 0);
             if (pic->quality < 0)
                 return -1;
@@ -1598,12 +1616,16 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         s->qlog   = qscale2qlog(pic->quality);
         s->lambda = pic->quality * 3/2;
     }
-    if (s->qlog < 0 || (!pic->quality && (avctx->flags & CODEC_FLAG_QSCALE))) {
+    if (s->qlog < 0 || (!pic->quality && (avctx->flags & AV_CODEC_FLAG_QSCALE))) {
         s->qlog= LOSSLESS_QLOG;
         s->lambda = 0;
     }//else keep previous frame's qlog until after motion estimation
 
-    if (s->current_picture->data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
+    if (s->current_picture->data[0]
+#if FF_API_EMU_EDGE
+        && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)
+#endif
+        ) {
         int w = s->avctx->width;
         int h = s->avctx->height;
 
@@ -1651,10 +1673,13 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         s->m.b8_stride= 2*s->m.mb_width+1;
         s->m.f_code=1;
         s->m.pict_type = pic->pict_type;
+#if FF_API_MOTION_EST
         s->m.me_method= s->avctx->me_method;
+#endif
+        s->m.motion_est= s->motion_est;
         s->m.me.scene_change_score=0;
         s->m.me.dia_size = avctx->dia_size;
-        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
+        s->m.quarter_sample= (s->avctx->flags & AV_CODEC_FLAG_QPEL)!=0;
         s->m.out_format= FMT_H263;
         s->m.unrestricted_mv= 1;
 
@@ -1721,10 +1746,17 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                 }
             predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
 
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
+            if(s->avctx->scenechange_threshold)
+                s->scenechange_threshold = s->avctx->scenechange_threshold;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
             if(   plane_index==0
                && pic->pict_type == AV_PICTURE_TYPE_P
-               && !(avctx->flags&CODEC_FLAG_PASS2)
-               && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
+               && !(avctx->flags&AV_CODEC_FLAG_PASS2)
+               && s->m.me.scene_change_score > s->scenechange_threshold){
                 ff_init_range_encoder(c, pkt->data, pkt->size);
                 ff_build_rac_states(c, (1LL<<32)/20, 256-8);
                 pic->pict_type= AV_PICTURE_TYPE_I;
@@ -1809,7 +1841,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
             }
         }
-        if(s->avctx->flags&CODEC_FLAG_PSNR){
+        if(s->avctx->flags&AV_CODEC_FLAG_PSNR){
             int64_t error= 0;
 
             if(pict->data[plane_index]) //FIXME gray hack
@@ -1820,7 +1852,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                     }
                 }
             s->avctx->error[plane_index] += error;
-            s->current_picture->error[plane_index] = error;
+            s->encoding_error[plane_index] = error;
         }
 
     }
@@ -1830,8 +1862,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     ff_snow_release_buffer(avctx);
 
     s->current_picture->coded_picture_number = avctx->frame_number;
-    s->current_picture->pict_type = pict->pict_type;
-    s->current_picture->quality = pict->quality;
+    s->current_picture->pict_type = pic->pict_type;
+    s->current_picture->quality = pic->quality;
     s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
     s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
     s->m.current_picture.f->display_picture_number =
@@ -1841,7 +1873,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     if(s->pass1_rc)
         if (ff_rate_estimate_qscale(&s->m, 0) < 0)
             return -1;
-    if(avctx->flags&CODEC_FLAG_PASS1)
+    if(avctx->flags&AV_CODEC_FLAG_PASS1)
         ff_write_pass1_stats(&s->m);
     s->m.last_pict_type = s->m.pict_type;
     avctx->frame_bits = s->m.frame_bits;
@@ -1851,8 +1883,19 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     emms_c();
 
+    ff_side_data_set_encoder_stats(pkt, s->current_picture->quality,
+                                   s->encoding_error,
+                                   (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
+                                   s->current_picture->pict_type);
+
+#if FF_API_ERROR_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    memcpy(s->current_picture->error, s->encoding_error, sizeof(s->encoding_error));
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     pkt->size = ff_rac_terminate(c);
-    if (avctx->coded_frame->key_frame)
+    if (s->current_picture->key_frame)
         pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
 
@@ -1866,7 +1909,6 @@ static av_cold int encode_end(AVCodecContext *avctx)
     ff_snow_common_end(s);
     ff_rate_control_uninit(&s->m);
     av_frame_free(&s->input_picture);
-    av_frame_free(&avctx->coded_frame);
     av_freep(&avctx->stats_out);
 
     return 0;
@@ -1876,9 +1918,15 @@ static av_cold int encode_end(AVCodecContext *avctx)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
     FF_MPV_COMMON_OPTS
-    { "memc_only",      "Only do ME/MC (I frames -> ref, P frame -> ME+MC).",   OFFSET(memc_only), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
-    { "no_bitstream",   "Skip final bitstream writeout.",                    OFFSET(no_bitstream), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    { "iter",           NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ITER }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" },
+    { "memc_only",      "Only do ME/MC (I frames -> ref, P frame -> ME+MC).",   OFFSET(memc_only), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+    { "no_bitstream",   "Skip final bitstream writeout.",                    OFFSET(no_bitstream), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
     { "intra_penalty",  "Penalty for intra blocks in block decission",      OFFSET(intra_penalty), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+    { "iterative_dia_size",  "Dia size for the iterative ME",          OFFSET(iterative_dia_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+    { "sc_threshold",   "Scene change threshold",                   OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, VE },
+    { "pred",           "Spatial decomposition type",                                OFFSET(pred), AV_OPT_TYPE_INT, { .i64 = 0 }, DWT_97, DWT_53, VE, "pred" },
+        { "dwt97", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, VE, "pred" },
+        { "dwt53", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, INT_MIN, INT_MAX, VE, "pred" },
     { NULL },
 };
 
diff --git a/libavcodec/sonic.c b/libavcodec/sonic.c
index c5076f9d..2e3ca79f 100644
--- a/libavcodec/sonic.c
+++ b/libavcodec/sonic.c
@@ -727,7 +727,7 @@ static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     const short *samples = (const int16_t*)frame->data[0];
     uint8_t state[32];
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000, 0)) < 0)
         return ret;
 
     ff_init_range_encoder(&c, avpkt->data, avpkt->size);
@@ -858,6 +858,7 @@ static av_cold int sonic_decode_init(AVCodecContext *avctx)
     SonicContext *s = avctx->priv_data;
     GetBitContext gb;
     int i;
+    int ret;
 
     s->channels = avctx->channels;
     s->samplerate = avctx->sample_rate;
@@ -868,7 +869,9 @@ static av_cold int sonic_decode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
+    ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
+    if (ret < 0)
+        return ret;
 
     s->version = get_bits(&gb, 2);
     if (s->version >= 2) {
@@ -925,6 +928,13 @@ static av_cold int sonic_decode_init(AVCodecContext *avctx)
     s->frame_size = s->channels*s->block_align*s->downsampling;
 //    avctx->frame_size = s->block_align;
 
+    if (s->num_taps * s->channels > s->frame_size) {
+        av_log(avctx, AV_LOG_ERROR,
+               "number of taps times channels (%d * %d) larger than frame size %d\n",
+               s->num_taps, s->channels, s->frame_size);
+        return AVERROR_INVALIDDATA;
+    }
+
     av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
         s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
 
@@ -1081,7 +1091,7 @@ AVCodec ff_sonic_decoder = {
     .init           = sonic_decode_init,
     .close          = sonic_decode_close,
     .decode         = sonic_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL,
 };
 #endif /* CONFIG_SONIC_DECODER */
 
@@ -1095,7 +1105,7 @@ AVCodec ff_sonic_encoder = {
     .init           = sonic_encode_init,
     .encode2        = sonic_encode_frame,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
-    .capabilities   = CODEC_CAP_EXPERIMENTAL,
+    .capabilities   = AV_CODEC_CAP_EXPERIMENTAL,
     .close          = sonic_encode_close,
 };
 #endif
@@ -1110,7 +1120,7 @@ AVCodec ff_sonic_ls_encoder = {
     .init           = sonic_encode_init,
     .encode2        = sonic_encode_frame,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
-    .capabilities   = CODEC_CAP_EXPERIMENTAL,
+    .capabilities   = AV_CODEC_CAP_EXPERIMENTAL,
     .close          = sonic_encode_close,
 };
 #endif
diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index 3e49c59e..815f9ad5 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -104,7 +104,7 @@ AVCodec ff_sp5x_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     .decode         = sp5x_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .max_lowres     = 3,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };
@@ -120,6 +120,7 @@ AVCodec ff_amv_decoder = {
     .close          = ff_mjpeg_decode_end,
     .decode         = sp5x_decode_frame,
     .max_lowres     = 3,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 };
 #endif
diff --git a/libavcodec/srtdec.c b/libavcodec/srtdec.c
index ed3af950..542dd357 100644
--- a/libavcodec/srtdec.c
+++ b/libavcodec/srtdec.c
@@ -25,46 +25,11 @@
 #include "libavutil/parseutils.h"
 #include "avcodec.h"
 #include "ass.h"
-
-static int html_color_parse(AVCodecContext *avctx, const char *str)
-{
-    uint8_t rgba[4];
-    if (av_parse_color(rgba, str, strcspn(str, "\" >"), avctx) < 0)
-        return -1;
-    return rgba[0] | rgba[1] << 8 | rgba[2] << 16;
-}
-
-enum {
-    PARAM_UNKNOWN = -1,
-    PARAM_SIZE,
-    PARAM_COLOR,
-    PARAM_FACE,
-    PARAM_NUMBER
-};
-
-typedef struct SrtStack {
-    char tag[128];
-    char param[PARAM_NUMBER][128];
-} SrtStack;
-
-static void rstrip_spaces_buf(AVBPrint *buf)
-{
-    while (buf->len > 0 && buf->str[buf->len - 1] == ' ')
-        buf->str[--buf->len] = 0;
-}
+#include "htmlsubtitles.h"
 
 static void srt_to_ass(AVCodecContext *avctx, AVBPrint *dst,
                        const char *in, int x1, int y1, int x2, int y2)
 {
-    char *param, buffer[128], tmp[128];
-    int len, tag_close, sptr = 1, line_start = 1, an = 0, end = 0;
-    SrtStack stack[16];
-
-    stack[0].tag[0] = 0;
-    strcpy(stack[0].param[PARAM_SIZE],  "{\\fs}");
-    strcpy(stack[0].param[PARAM_COLOR], "{\\c}");
-    strcpy(stack[0].param[PARAM_FACE],  "{\\fn}");
-
     if (x1 >= 0 && y1 >= 0) {
         /* XXX: here we rescale coordinate assuming they are in DVD resolution
          * (720x480) since we don't have anything better */
@@ -84,117 +49,7 @@ static void srt_to_ass(AVCodecContext *avctx, AVBPrint *dst,
         }
     }
 
-    for (; !end && *in; in++) {
-        switch (*in) {
-        case '\r':
-            break;
-        case '\n':
-            if (line_start) {
-                end = 1;
-                break;
-            }
-            rstrip_spaces_buf(dst);
-            av_bprintf(dst, "\\N");
-            line_start = 1;
-            break;
-        case ' ':
-            if (!line_start)
-                av_bprint_chars(dst, *in, 1);
-            break;
-        case '{':    /* skip all {\xxx} substrings except for {\an%d}
-                        and all microdvd like styles such as {Y:xxx} */
-            len = 0;
-            an += sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0;
-            if ((an != 1 && (len = 0, sscanf(in, "{\\%*[^}]}%n", &len) >= 0 && len > 0)) ||
-                (len = 0, sscanf(in, "{%*1[CcFfoPSsYy]:%*[^}]}%n", &len) >= 0 && len > 0)) {
-                in += len - 1;
-            } else
-                av_bprint_chars(dst, *in, 1);
-            break;
-        case '<':
-            tag_close = in[1] == '/';
-            len = 0;
-            if (sscanf(in+tag_close+1, "%127[^>]>%n", buffer, &len) >= 1 && len > 0) {
-                if ((param = strchr(buffer, ' ')))
-                    *param++ = 0;
-                if ((!tag_close && sptr < FF_ARRAY_ELEMS(stack)) ||
-                    ( tag_close && sptr > 0 && !strcmp(stack[sptr-1].tag, buffer))) {
-                    int i, j, unknown = 0;
-                    in += len + tag_close;
-                    if (!tag_close)
-                        memset(stack+sptr, 0, sizeof(*stack));
-                    if (!strcmp(buffer, "font")) {
-                        if (tag_close) {
-                            for (i=PARAM_NUMBER-1; i>=0; i--)
-                                if (stack[sptr-1].param[i][0])
-                                    for (j=sptr-2; j>=0; j--)
-                                        if (stack[j].param[i][0]) {
-                                            av_bprintf(dst, "%s", stack[j].param[i]);
-                                            break;
-                                        }
-                        } else {
-                            while (param) {
-                                if (!strncmp(param, "size=", 5)) {
-                                    unsigned font_size;
-                                    param += 5 + (param[5] == '"');
-                                    if (sscanf(param, "%u", &font_size) == 1) {
-                                        snprintf(stack[sptr].param[PARAM_SIZE],
-                                             sizeof(stack[0].param[PARAM_SIZE]),
-                                             "{\\fs%u}", font_size);
-                                    }
-                                } else if (!strncmp(param, "color=", 6)) {
-                                    param += 6 + (param[6] == '"');
-                                    snprintf(stack[sptr].param[PARAM_COLOR],
-                                         sizeof(stack[0].param[PARAM_COLOR]),
-                                         "{\\c&H%X&}",
-                                         html_color_parse(avctx, param));
-                                } else if (!strncmp(param, "face=", 5)) {
-                                    param += 5 + (param[5] == '"');
-                                    len = strcspn(param,
-                                                  param[-1] == '"' ? "\"" :" ");
-                                    av_strlcpy(tmp, param,
-                                               FFMIN(sizeof(tmp), len+1));
-                                    param += len;
-                                    snprintf(stack[sptr].param[PARAM_FACE],
-                                             sizeof(stack[0].param[PARAM_FACE]),
-                                             "{\\fn%s}", tmp);
-                                }
-                                if ((param = strchr(param, ' ')))
-                                    param++;
-                            }
-                            for (i=0; i<PARAM_NUMBER; i++)
-                                if (stack[sptr].param[i][0])
-                                    av_bprintf(dst, "%s", stack[sptr].param[i]);
-                        }
-                    } else if (!buffer[1] && strspn(buffer, "bisu") == 1) {
-                        av_bprintf(dst, "{\\%c%d}", buffer[0], !tag_close);
-                    } else {
-                        unknown = 1;
-                        snprintf(tmp, sizeof(tmp), "</%s>", buffer);
-                    }
-                    if (tag_close) {
-                        sptr--;
-                    } else if (unknown && !strstr(in, tmp)) {
-                        in -= len + tag_close;
-                        av_bprint_chars(dst, *in, 1);
-                    } else
-                        av_strlcpy(stack[sptr++].tag, buffer,
-                                   sizeof(stack[0].tag));
-                    break;
-                }
-            }
-        default:
-            av_bprint_chars(dst, *in, 1);
-            break;
-        }
-        if (*in != ' ' && *in != '\r' && *in != '\n')
-            line_start = 0;
-    }
-
-    while (dst->len >= 2 && !strncmp(&dst->str[dst->len - 2], "\\N", 2))
-        dst->len -= 2;
-    dst->str[dst->len] = 0;
-    rstrip_spaces_buf(dst);
+    ff_htmlmarkup_to_ass(avctx, dst, in);
 }
 
 static int srt_decode_frame(AVCodecContext *avctx,
@@ -218,14 +73,13 @@ static int srt_decode_frame(AVCodecContext *avctx,
 
     av_bprint_init(&buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
 
-        // TODO: reindent
-            // Do final divide-by-10 outside rescale to force rounding down.
-            ts_start = av_rescale_q(avpkt->pts,
-                                    avctx->time_base,
-                                    (AVRational){1,100});
-            ts_end   = av_rescale_q(avpkt->pts + avpkt->duration,
-                                    avctx->time_base,
-                                    (AVRational){1,100});
+    // Do final divide-by-10 outside rescale to force rounding down.
+    ts_start = av_rescale_q(avpkt->pts,
+                            avctx->time_base,
+                            (AVRational){1,100});
+    ts_end   = av_rescale_q(avpkt->pts + avpkt->duration,
+                            avctx->time_base,
+                            (AVRational){1,100});
 
     srt_to_ass(avctx, &buffer, avpkt->data, x1, y1, x2, y2);
     ret = ff_ass_add_rect_bprint(sub, &buffer, ts_start, ts_end-ts_start);
diff --git a/libavcodec/srtenc.c b/libavcodec/srtenc.c
index 32879708..0a6875a5 100644
--- a/libavcodec/srtenc.c
+++ b/libavcodec/srtenc.c
@@ -221,8 +221,14 @@ static const ASSCodesCallbacks srt_callbacks = {
     .end              = srt_end_cb,
 };
 
-static int srt_encode_frame(AVCodecContext *avctx,
-                            unsigned char *buf, int bufsize, const AVSubtitle *sub)
+static const ASSCodesCallbacks text_callbacks = {
+    .text             = srt_text_cb,
+    .new_line         = srt_new_line_cb,
+};
+
+static int encode_frame(AVCodecContext *avctx,
+                        unsigned char *buf, int bufsize, const AVSubtitle *sub,
+                        const ASSCodesCallbacks *cb)
 {
     SRTContext *s = avctx->priv_data;
     ASSDialog *dialog;
@@ -241,7 +247,7 @@ static int srt_encode_frame(AVCodecContext *avctx,
         for (; dialog && num--; dialog++) {
             s->alignment_applied = 0;
             srt_style_apply(s, dialog->style);
-            ff_ass_split_override_codes(&srt_callbacks, s, dialog->text);
+            ff_ass_split_override_codes(cb, s, dialog->text);
         }
     }
 
@@ -259,6 +265,18 @@ static int srt_encode_frame(AVCodecContext *avctx,
     return s->buffer.len;
 }
 
+static int srt_encode_frame(AVCodecContext *avctx,
+                               unsigned char *buf, int bufsize, const AVSubtitle *sub)
+{
+    return encode_frame(avctx, buf, bufsize, sub, &srt_callbacks);
+}
+
+static int text_encode_frame(AVCodecContext *avctx,
+                             unsigned char *buf, int bufsize, const AVSubtitle *sub)
+{
+    return encode_frame(avctx, buf, bufsize, sub, &text_callbacks);
+}
+
 static int srt_encode_close(AVCodecContext *avctx)
 {
     SRTContext *s = avctx->priv_data;
@@ -293,3 +311,16 @@ AVCodec ff_subrip_encoder = {
     .close          = srt_encode_close,
 };
 #endif
+
+#if CONFIG_TEXT_ENCODER
+AVCodec ff_text_encoder = {
+    .name           = "text",
+    .long_name      = NULL_IF_CONFIG_SMALL("Raw text subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_TEXT,
+    .priv_data_size = sizeof(SRTContext),
+    .init           = srt_encode_init,
+    .encode_sub     = text_encode_frame,
+    .close          = srt_encode_close,
+};
+#endif
diff --git a/libavcodec/startcode.c b/libavcodec/startcode.c
index 940bbb71..9efdffe8 100644
--- a/libavcodec/startcode.c
+++ b/libavcodec/startcode.c
@@ -33,7 +33,7 @@ int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
     int i = 0;
 #if HAVE_FAST_UNALIGNED
     /* we check i < size instead of i + 3 / 7 because it is
-     * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
+     * simpler and there must be AV_INPUT_BUFFER_PADDING_SIZE
      * bytes at the end.
      */
 #if HAVE_FAST_64BIT
diff --git a/libavcodec/sunrast.c b/libavcodec/sunrast.c
index 3fbec1df..25e11f6c 100644
--- a/libavcodec/sunrast.c
+++ b/libavcodec/sunrast.c
@@ -211,5 +211,5 @@ AVCodec ff_sunrast_decoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_SUNRAST,
     .decode         = sunrast_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/sunrastenc.c b/libavcodec/sunrastenc.c
index cff8c85c..97b2242a 100644
--- a/libavcodec/sunrastenc.c
+++ b/libavcodec/sunrastenc.c
@@ -19,12 +19,16 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/opt.h"
+
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
 #include "sunrast.h"
 
 typedef struct SUNRASTContext {
+    AVClass *class;
+
     PutByteContext p;
     int depth;      ///< depth of pixel
     int length;     ///< length (bytes) of image
@@ -136,6 +140,8 @@ static av_cold int sunrast_encode_init(AVCodecContext *avctx)
 {
     SUNRASTContext *s = avctx->priv_data;
 
+#if FF_API_CODER_TYPE
+FF_DISABLE_DEPRECATION_WARNINGS
     switch (avctx->coder_type) {
     case FF_CODER_TYPE_RLE:
         s->type = RT_BYTE_ENCODED;
@@ -147,6 +153,11 @@ static av_cold int sunrast_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "invalid coder_type\n");
         return AVERROR(EINVAL);
     }
+FF_ENABLE_DEPRECATION_WARNINGS
+    if (s->type != RT_BYTE_ENCODED && s->type != RT_STANDARD)
+#endif
+    // adjust boolean option to RT equivalent
+    s->type++;
 
     s->maptype                    = RMT_NONE;
     s->maplength                  = 0;
@@ -169,8 +180,7 @@ static av_cold int sunrast_encode_init(AVCodecContext *avctx)
         return AVERROR_BUG;
     }
     s->length = avctx->height * (FFALIGN(avctx->width * s->depth, 16) >> 3);
-    s->size   = 32 + s->maplength +
-                s->length * (s->type == RT_BYTE_ENCODED ? 2 : 1);
+    s->size   = 32 + s->maplength + s->length * s->type;
 
     return 0;
 }
@@ -181,7 +191,7 @@ static int sunrast_encode_frame(AVCodecContext *avctx,  AVPacket *avpkt,
     SUNRASTContext *s = avctx->priv_data;
     int ret;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, s->size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, s->size, 0)) < 0)
         return ret;
 
     bytestream2_init_writer(&s->p, avpkt->data, avpkt->size);
@@ -199,16 +209,27 @@ static int sunrast_encode_frame(AVCodecContext *avctx,  AVPacket *avpkt,
     return 0;
 }
 
-static av_cold int sunrast_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
+#define OFFSET(x) offsetof(SUNRASTContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "rle", "Use run-length compression", OFFSET(type), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
+
+    { NULL },
+};
+
+static const AVClass sunrast_class = {
+    .class_name = "sunrast",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
+#if FF_API_CODER_TYPE
 static const AVCodecDefault sunrast_defaults[] = {
      { "coder", "rle" },
      { NULL },
 };
+#endif
 
 AVCodec ff_sunrast_encoder = {
     .name           = "sunrast",
@@ -217,9 +238,11 @@ AVCodec ff_sunrast_encoder = {
     .id             = AV_CODEC_ID_SUNRAST,
     .priv_data_size = sizeof(SUNRASTContext),
     .init           = sunrast_encode_init,
-    .close          = sunrast_encode_close,
     .encode2        = sunrast_encode_frame,
+#if FF_API_CODER_TYPE
     .defaults       = sunrast_defaults,
+#endif
+    .priv_class     = &sunrast_class,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_BGR24,
                                                   AV_PIX_FMT_PAL8,
                                                   AV_PIX_FMT_GRAY8,
diff --git a/libavcodec/svq1dec.c b/libavcodec/svq1dec.c
index 21d4acf8..dca99fae 100644
--- a/libavcodec/svq1dec.c
+++ b/libavcodec/svq1dec.c
@@ -618,9 +618,12 @@ static int svq1_decode_frame(AVCodecContext *avctx, void *data,
     uint8_t *current;
     int result, i, x, y, width, height;
     svq1_pmv *pmv;
+    int ret;
 
     /* initialize bit buffer */
-    init_get_bits8(&s->gb, buf, buf_size);
+    ret = init_get_bits8(&s->gb, buf, buf_size);
+    if (ret < 0)
+        return ret;
 
     /* decode frame header */
     s->frame_code = get_bits(&s->gb, 22);
@@ -685,7 +688,7 @@ static int svq1_decode_frame(AVCodecContext *avctx, void *data,
             width    = FFALIGN(s->width,  16);
             height   = FFALIGN(s->height, 16);
         } else {
-            if (avctx->flags & CODEC_FLAG_GRAY)
+            if (avctx->flags & AV_CODEC_FLAG_GRAY)
                 break;
             width    = FFALIGN(s->width  / 4, 16);
             height   = FFALIGN(s->height / 4, 16);
@@ -839,7 +842,7 @@ AVCodec ff_svq1_decoder = {
     .init           = svq1_decode_init,
     .close          = svq1_decode_end,
     .decode         = svq1_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = svq1_flush,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV410P,
                                                      AV_PIX_FMT_NONE },
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 56031451..d968d36a 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -104,7 +104,9 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
     best_score = 0;
     // FIXME: Optimize, this does not need to be done multiple times.
     if (intra) {
-        codebook_sum   = svq1_intra_codebook_sum[level];
+        // level is 5 when encode_block is called from svq1_encode_plane
+        // and always < 4 when called recursively from this function.
+        codebook_sum   = level < 4 ? svq1_intra_codebook_sum[level] : NULL;
         codebook       = ff_svq1_intra_codebooks[level];
         mean_vlc       = ff_svq1_intra_mean_vlc;
         multistage_vlc = ff_svq1_intra_multistage_vlc[level];
@@ -117,7 +119,8 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
             }
         }
     } else {
-        codebook_sum   = svq1_inter_codebook_sum[level];
+        // level is 5 or < 4, see above for details.
+        codebook_sum   = level < 4 ? svq1_inter_codebook_sum[level] : NULL;
         codebook       = ff_svq1_inter_codebooks[level];
         mean_vlc       = ff_svq1_inter_mean_vlc + 256;
         multistage_vlc = ff_svq1_inter_multistage_vlc[level];
@@ -246,14 +249,13 @@ static int svq1_encode_plane(SVQ1EncContext *s, int plane,
                              unsigned char *decoded_plane,
                              int width, int height, int src_stride, int stride)
 {
-    const AVFrame *f = s->avctx->coded_frame;
     int x, y;
     int i;
     int block_width, block_height;
     int level;
     int threshold[6];
     uint8_t *src     = s->scratchbuf + stride * 32;
-    const int lambda = (f->quality * f->quality) >>
+    const int lambda = (s->quality * s->quality) >>
                        (2 * FF_LAMBDA_SHIFT);
 
     /* figure out the acceptable level thresholds in advance */
@@ -264,7 +266,7 @@ static int svq1_encode_plane(SVQ1EncContext *s, int plane,
     block_width  = (width  + 15) / 16;
     block_height = (height + 15) / 16;
 
-    if (f->pict_type == AV_PICTURE_TYPE_P) {
+    if (s->pict_type == AV_PICTURE_TYPE_P) {
         s->m.avctx                         = s->avctx;
         s->m.current_picture_ptr           = &s->m.current_picture;
         s->m.last_picture_ptr              = &s->m.last_picture;
@@ -280,12 +282,25 @@ static int svq1_encode_plane(SVQ1EncContext *s, int plane,
         s->m.mb_stride                     = s->m.mb_width + 1;
         s->m.b8_stride                     = 2 * s->m.mb_width + 1;
         s->m.f_code                        = 1;
-        s->m.pict_type                     = f->pict_type;
+        s->m.pict_type                     = s->pict_type;
+#if FF_API_MOTION_EST
+FF_DISABLE_DEPRECATION_WARNINGS
         s->m.me_method                     = s->avctx->me_method;
+        if (s->motion_est == FF_ME_EPZS) {
+            if (s->avctx->me_method == ME_ZERO)
+                s->motion_est = FF_ME_ZERO;
+            else if (s->avctx->me_method == ME_EPZS)
+                s->motion_est = FF_ME_EPZS;
+            else if (s->avctx->me_method == ME_X1)
+                s->motion_est = FF_ME_XONE;
+        }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        s->m.motion_est                    = s->motion_est;
         s->m.me.scene_change_score         = 0;
         // s->m.out_format                    = FMT_H263;
         // s->m.unrestricted_mv               = 1;
-        s->m.lambda                        = f->quality;
+        s->m.lambda                        = s->quality;
         s->m.qscale                        = s->m.lambda * 139 +
                                              FF_LAMBDA_SCALE * 64 >>
                                              FF_LAMBDA_SHIFT + 7;
@@ -378,13 +393,13 @@ static int svq1_encode_plane(SVQ1EncContext *s, int plane,
             s->m.mb_x = x;
             init_block_index(&s->m);
 
-            if (f->pict_type == AV_PICTURE_TYPE_I ||
+            if (s->pict_type == AV_PICTURE_TYPE_I ||
                 (s->m.mb_type[x + y * s->m.mb_stride] &
                  CANDIDATE_MB_TYPE_INTRA)) {
                 for (i = 0; i < 6; i++)
                     init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i],
                                   7 * 32);
-                if (f->pict_type == AV_PICTURE_TYPE_P) {
+                if (s->pict_type == AV_PICTURE_TYPE_P) {
                     const uint8_t *vlc = ff_svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
                     score[0] = vlc[1] * lambda;
@@ -400,7 +415,7 @@ static int svq1_encode_plane(SVQ1EncContext *s, int plane,
 
             best = 0;
 
-            if (f->pict_type == AV_PICTURE_TYPE_P) {
+            if (s->pict_type == AV_PICTURE_TYPE_P) {
                 const uint8_t *vlc = ff_svq1_block_type_vlc[SVQ1_BLOCK_INTER];
                 int mx, my, pred_x, pred_y, dxy;
                 int16_t *motion_ptr;
@@ -505,7 +520,6 @@ static av_cold int svq1_encode_end(AVCodecContext *avctx)
 
     av_frame_free(&s->current_picture);
     av_frame_free(&s->last_picture);
-    av_frame_free(&avctx->coded_frame);
 
     return 0;
 }
@@ -515,14 +529,18 @@ static av_cold int svq1_encode_init(AVCodecContext *avctx)
     SVQ1EncContext *const s = avctx->priv_data;
     int ret;
 
+    if (avctx->width >= 4096 || avctx->height >= 4096) {
+        av_log(avctx, AV_LOG_ERROR, "Dimensions too large, maximum is 4095x4095\n");
+        return AVERROR(EINVAL);
+    }
+
     ff_hpeldsp_init(&s->hdsp, avctx->flags);
     ff_me_cmp_init(&s->mecc, avctx);
     ff_mpegvideoencdsp_init(&s->m.mpvencdsp, avctx);
 
-    avctx->coded_frame = av_frame_alloc();
     s->current_picture = av_frame_alloc();
     s->last_picture    = av_frame_alloc();
-    if (!avctx->coded_frame || !s->current_picture || !s->last_picture) {
+    if (!s->current_picture || !s->last_picture) {
         svq1_encode_end(avctx);
         return AVERROR(ENOMEM);
     }
@@ -576,11 +594,10 @@ static int svq1_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                              const AVFrame *pict, int *got_packet)
 {
     SVQ1EncContext *const s = avctx->priv_data;
-    AVFrame *const p        = avctx->coded_frame;
     int i, ret;
 
     if ((ret = ff_alloc_packet2(avctx, pkt, s->y_block_width * s->y_block_height *
-                             MAX_MB_BYTES*3 + FF_MIN_BUFFER_SIZE)) < 0)
+                             MAX_MB_BYTES*3 + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
         return ret;
 
     if (avctx->pix_fmt != AV_PIX_FMT_YUV410P) {
@@ -608,12 +625,22 @@ static int svq1_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     init_put_bits(&s->pb, pkt->data, pkt->size);
 
-    p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ?
-                   AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
-    p->key_frame = p->pict_type == AV_PICTURE_TYPE_I;
-    p->quality   = pict->quality;
+    if (avctx->gop_size && (avctx->frame_number % avctx->gop_size))
+        s->pict_type = AV_PICTURE_TYPE_P;
+    else
+        s->pict_type = AV_PICTURE_TYPE_I;
+    s->quality = pict->quality;
+
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    avctx->coded_frame->pict_type = s->pict_type;
+    avctx->coded_frame->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    ff_side_data_set_encoder_stats(pkt, pict->quality, NULL, 0, s->pict_type);
 
-    svq1_write_header(s, p->pict_type);
+    svq1_write_header(s, s->pict_type);
     for (i = 0; i < 3; i++)
         if (svq1_encode_plane(s, i,
                               pict->data[i],
@@ -639,19 +666,38 @@ static int svq1_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     flush_put_bits(&s->pb);
 
     pkt->size = put_bits_count(&s->pb) / 8;
-    if (p->pict_type == AV_PICTURE_TYPE_I)
+    if (s->pict_type == AV_PICTURE_TYPE_I)
         pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
 
     return 0;
 }
 
+#define OFFSET(x) offsetof(struct SVQ1EncContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "motion-est", "Motion estimation algorithm", OFFSET(motion_est), AV_OPT_TYPE_INT, { .i64 = FF_ME_EPZS }, FF_ME_ZERO, FF_ME_XONE, VE, "motion-est"},
+        { "zero", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ZERO }, 0, 0, FF_MPV_OPT_FLAGS, "motion-est" },
+        { "epzs", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_EPZS }, 0, 0, FF_MPV_OPT_FLAGS, "motion-est" },
+        { "xone", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_XONE }, 0, 0, FF_MPV_OPT_FLAGS, "motion-est" },
+
+    { NULL },
+};
+
+static const AVClass svq1enc_class = {
+    .class_name = "svq1enc",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_svq1_encoder = {
     .name           = "svq1",
     .long_name      = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1 / Sorenson Video 1 / SVQ1"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_SVQ1,
     .priv_data_size = sizeof(SVQ1EncContext),
+    .priv_class     = &svq1enc_class,
     .init           = svq1_encode_init,
     .encode2        = svq1_encode_frame,
     .close          = svq1_encode_end,
diff --git a/libavcodec/svq1enc.h b/libavcodec/svq1enc.h
index 8e748851..37f05a04 100644
--- a/libavcodec/svq1enc.h
+++ b/libavcodec/svq1enc.h
@@ -44,6 +44,10 @@ typedef struct SVQ1EncContext {
     PutBitContext pb;
     GetBitContext gb;
 
+    /* Some compression statistics */
+    enum AVPictureType pict_type;
+    int quality;
+
     /* why ooh why this sick breadth first order,
      * everything is slower and more complex */
     PutBitContext reorder_pb[6];
@@ -70,6 +74,8 @@ typedef struct SVQ1EncContext {
 
     uint8_t *scratchbuf;
 
+    int motion_est;
+
     int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
                              intptr_t size);
 } SVQ1EncContext;
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 0683b0db..57205c6a 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -339,7 +339,7 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
              : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, sl->linesize,
                                                        height);
 
-    if (!(h->flags & CODEC_FLAG_GRAY)) {
+    if (!(h->flags & AV_CODEC_FLAG_GRAY)) {
         mx     = mx + (mx < (int) x) >> 1;
         my     = my + (my < (int) y) >> 1;
         width  = width  >> 1;
@@ -1414,9 +1414,9 @@ AVCodec ff_svq3_decoder = {
     .init           = svq3_decode_init,
     .close          = svq3_decode_end,
     .decode         = svq3_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND |
-                      CODEC_CAP_DR1             |
-                      CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND |
+                      AV_CODEC_CAP_DR1             |
+                      AV_CODEC_CAP_DELAY,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
                                                      AV_PIX_FMT_NONE},
 };
diff --git a/libavcodec/synth_filter.c b/libavcodec/synth_filter.c
index d49ffe64..1c5dab5c 100644
--- a/libavcodec/synth_filter.c
+++ b/libavcodec/synth_filter.c
@@ -1,5 +1,6 @@
 /*
  * copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2016 foo86
  *
  * This file is part of FFmpeg.
  *
@@ -19,47 +20,165 @@
  */
 
 #include "fft.h"
+#include "dcadct.h"
+#include "dcamath.h"
 #include "synth_filter.h"
 
 static void synth_filter_float(FFTContext *imdct,
-                           float *synth_buf_ptr, int *synth_buf_offset,
-                           float synth_buf2[32], const float window[512],
-                           float out[32], const float in[32], float scale)
+                               float *synth_buf_ptr, int *synth_buf_offset,
+                               float synth_buf2[32], const float window[512],
+                               float out[32], const float in[32], float scale)
 {
-    float *synth_buf= synth_buf_ptr + *synth_buf_offset;
+    float *synth_buf = synth_buf_ptr + *synth_buf_offset;
     int i, j;
 
     imdct->imdct_half(imdct, synth_buf, in);
 
-    for (i = 0; i < 16; i++){
-        float a= synth_buf2[i     ];
-        float b= synth_buf2[i + 16];
-        float c= 0;
-        float d= 0;
-        for (j = 0; j < 512 - *synth_buf_offset; j += 64){
-            a += window[i + j     ]*(-synth_buf[15 - i + j      ]);
-            b += window[i + j + 16]*( synth_buf[     i + j      ]);
-            c += window[i + j + 32]*( synth_buf[16 + i + j      ]);
-            d += window[i + j + 48]*( synth_buf[31 - i + j      ]);
+    for (i = 0; i < 16; i++) {
+        float a = synth_buf2[i     ];
+        float b = synth_buf2[i + 16];
+        float c = 0;
+        float d = 0;
+        for (j = 0; j < 512 - *synth_buf_offset; j += 64) {
+            a += window[i + j     ] * (-synth_buf[15 - i + j      ]);
+            b += window[i + j + 16] * ( synth_buf[     i + j      ]);
+            c += window[i + j + 32] * ( synth_buf[16 + i + j      ]);
+            d += window[i + j + 48] * ( synth_buf[31 - i + j      ]);
         }
-        for (     ; j < 512; j += 64){
-            a += window[i + j     ]*(-synth_buf[15 - i + j - 512]);
-            b += window[i + j + 16]*( synth_buf[     i + j - 512]);
-            c += window[i + j + 32]*( synth_buf[16 + i + j - 512]);
-            d += window[i + j + 48]*( synth_buf[31 - i + j - 512]);
+        for (     ; j < 512; j += 64) {
+            a += window[i + j     ] * (-synth_buf[15 - i + j - 512]);
+            b += window[i + j + 16] * ( synth_buf[     i + j - 512]);
+            c += window[i + j + 32] * ( synth_buf[16 + i + j - 512]);
+            d += window[i + j + 48] * ( synth_buf[31 - i + j - 512]);
         }
-        out[i     ] = a*scale;
-        out[i + 16] = b*scale;
+        out[i     ] = a * scale;
+        out[i + 16] = b * scale;
         synth_buf2[i     ] = c;
         synth_buf2[i + 16] = d;
     }
-    *synth_buf_offset= (*synth_buf_offset - 32)&511;
+
+    *synth_buf_offset = (*synth_buf_offset - 32) & 511;
+}
+
+static void synth_filter_float_64(FFTContext *imdct,
+                                  float *synth_buf_ptr, int *synth_buf_offset,
+                                  float synth_buf2[64], const float window[1024],
+                                  float out[64], const float in[64], float scale)
+{
+    float *synth_buf = synth_buf_ptr + *synth_buf_offset;
+    int i, j;
+
+    imdct->imdct_half(imdct, synth_buf, in);
+
+    for (i = 0; i < 32; i++) {
+        float a = synth_buf2[i     ];
+        float b = synth_buf2[i + 32];
+        float c = 0;
+        float d = 0;
+        for (j = 0; j < 1024 - *synth_buf_offset; j += 128) {
+            a += window[i + j     ] * (-synth_buf[31 - i + j       ]);
+            b += window[i + j + 32] * ( synth_buf[     i + j       ]);
+            c += window[i + j + 64] * ( synth_buf[32 + i + j       ]);
+            d += window[i + j + 96] * ( synth_buf[63 - i + j       ]);
+        }
+        for (     ; j < 1024; j += 128) {
+            a += window[i + j     ] * (-synth_buf[31 - i + j - 1024]);
+            b += window[i + j + 32] * ( synth_buf[     i + j - 1024]);
+            c += window[i + j + 64] * ( synth_buf[32 + i + j - 1024]);
+            d += window[i + j + 96] * ( synth_buf[63 - i + j - 1024]);
+        }
+        out[i     ] = a * scale;
+        out[i + 32] = b * scale;
+        synth_buf2[i     ] = c;
+        synth_buf2[i + 32] = d;
+    }
+
+    *synth_buf_offset = (*synth_buf_offset - 64) & 1023;
+}
+
+static void synth_filter_fixed(DCADCTContext *imdct,
+                               int32_t *synth_buf_ptr, int *synth_buf_offset,
+                               int32_t synth_buf2[32], const int32_t window[512],
+                               int32_t out[32], const int32_t in[32])
+{
+    int32_t *synth_buf = synth_buf_ptr + *synth_buf_offset;
+    int i, j;
+
+    imdct->imdct_half[0](synth_buf, in);
+
+    for (i = 0; i < 16; i++) {
+        int64_t a = synth_buf2[i     ] * (INT64_C(1) << 21);
+        int64_t b = synth_buf2[i + 16] * (INT64_C(1) << 21);
+        int64_t c = 0;
+        int64_t d = 0;
+        for (j = 0; j < 512 - *synth_buf_offset; j += 64) {
+            a += (int64_t)window[i + j     ] * synth_buf[     i + j      ];
+            b += (int64_t)window[i + j + 16] * synth_buf[15 - i + j      ];
+            c += (int64_t)window[i + j + 32] * synth_buf[16 + i + j      ];
+            d += (int64_t)window[i + j + 48] * synth_buf[31 - i + j      ];
+        }
+        for (     ; j < 512; j += 64) {
+            a += (int64_t)window[i + j     ] * synth_buf[     i + j - 512];
+            b += (int64_t)window[i + j + 16] * synth_buf[15 - i + j - 512];
+            c += (int64_t)window[i + j + 32] * synth_buf[16 + i + j - 512];
+            d += (int64_t)window[i + j + 48] * synth_buf[31 - i + j - 512];
+        }
+        out[i     ] = clip23(norm21(a));
+        out[i + 16] = clip23(norm21(b));
+        synth_buf2[i     ] = norm21(c);
+        synth_buf2[i + 16] = norm21(d);
+    }
+
+    *synth_buf_offset = (*synth_buf_offset - 32) & 511;
+}
+
+static void synth_filter_fixed_64(DCADCTContext *imdct,
+                                  int32_t *synth_buf_ptr, int *synth_buf_offset,
+                                  int32_t synth_buf2[64], const int32_t window[1024],
+                                  int32_t out[64], const int32_t in[64])
+{
+    int32_t *synth_buf = synth_buf_ptr + *synth_buf_offset;
+    int i, j;
+
+    imdct->imdct_half[1](synth_buf, in);
+
+    for (i = 0; i < 32; i++) {
+        int64_t a = synth_buf2[i     ] * (INT64_C(1) << 20);
+        int64_t b = synth_buf2[i + 32] * (INT64_C(1) << 20);
+        int64_t c = 0;
+        int64_t d = 0;
+        for (j = 0; j < 1024 - *synth_buf_offset; j += 128) {
+            a += (int64_t)window[i + j     ] * synth_buf[     i + j       ];
+            b += (int64_t)window[i + j + 32] * synth_buf[31 - i + j       ];
+            c += (int64_t)window[i + j + 64] * synth_buf[32 + i + j       ];
+            d += (int64_t)window[i + j + 96] * synth_buf[63 - i + j       ];
+        }
+        for (     ; j < 1024; j += 128) {
+            a += (int64_t)window[i + j     ] * synth_buf[     i + j - 1024];
+            b += (int64_t)window[i + j + 32] * synth_buf[31 - i + j - 1024];
+            c += (int64_t)window[i + j + 64] * synth_buf[32 + i + j - 1024];
+            d += (int64_t)window[i + j + 96] * synth_buf[63 - i + j - 1024];
+        }
+        out[i     ] = clip23(norm20(a));
+        out[i + 32] = clip23(norm20(b));
+        synth_buf2[i     ] = norm20(c);
+        synth_buf2[i + 32] = norm20(d);
+    }
+
+    *synth_buf_offset = (*synth_buf_offset - 64) & 1023;
 }
 
 av_cold void ff_synth_filter_init(SynthFilterContext *c)
 {
-    c->synth_filter_float = synth_filter_float;
+    c->synth_filter_float    = synth_filter_float;
+    c->synth_filter_float_64 = synth_filter_float_64;
+    c->synth_filter_fixed    = synth_filter_fixed;
+    c->synth_filter_fixed_64 = synth_filter_fixed_64;
 
-    if (ARCH_ARM) ff_synth_filter_init_arm(c);
-    if (ARCH_X86) ff_synth_filter_init_x86(c);
+    if (ARCH_AARCH64)
+        ff_synth_filter_init_aarch64(c);
+    if (ARCH_ARM)
+        ff_synth_filter_init_arm(c);
+    if (ARCH_X86)
+        ff_synth_filter_init_x86(c);
 }
diff --git a/libavcodec/synth_filter.h b/libavcodec/synth_filter.h
index b63fd779..df3589a8 100644
--- a/libavcodec/synth_filter.h
+++ b/libavcodec/synth_filter.h
@@ -22,6 +22,7 @@
 #define AVCODEC_SYNTH_FILTER_H
 
 #include "fft.h"
+#include "dcadct.h"
 
 typedef struct SynthFilterContext {
     void (*synth_filter_float)(FFTContext *imdct,
@@ -29,9 +30,22 @@ typedef struct SynthFilterContext {
                                float synth_buf2[32], const float window[512],
                                float out[32], const float in[32],
                                float scale);
+    void (*synth_filter_float_64)(FFTContext *imdct,
+                                  float *synth_buf_ptr, int *synth_buf_offset,
+                                  float synth_buf2[64], const float window[1024],
+                                  float out[64], const float in[64], float scale);
+    void (*synth_filter_fixed)(DCADCTContext *imdct,
+                               int32_t *synth_buf_ptr, int *synth_buf_offset,
+                               int32_t synth_buf2[32], const int32_t window[512],
+                               int32_t out[32], const int32_t in[32]);
+    void (*synth_filter_fixed_64)(DCADCTContext *imdct,
+                                  int32_t *synth_buf_ptr, int *synth_buf_offset,
+                                  int32_t synth_buf2[64], const int32_t window[1024],
+                                  int32_t out[64], const int32_t in[64]);
 } SynthFilterContext;
 
 void ff_synth_filter_init(SynthFilterContext *c);
+void ff_synth_filter_init_aarch64(SynthFilterContext *c);
 void ff_synth_filter_init_arm(SynthFilterContext *c);
 void ff_synth_filter_init_x86(SynthFilterContext *c);
 
diff --git a/libavcodec/tableprint.h b/libavcodec/tableprint.h
index 667985f6..6f61c712 100644
--- a/libavcodec/tableprint.h
+++ b/libavcodec/tableprint.h
@@ -64,6 +64,7 @@ void write_int8_t_array     (const int8_t   *, int);
 void write_uint8_t_array    (const uint8_t  *, int);
 void write_uint16_t_array   (const uint16_t *, int);
 void write_uint32_t_array   (const uint32_t *, int);
+void write_int32_t_array    (const int32_t  *, int);
 void write_float_array      (const float    *, int);
 void write_int8_t_2d_array  (const void *, int, int);
 void write_uint8_t_2d_array (const void *, int, int);
@@ -116,6 +117,7 @@ WRITE_1D_FUNC(uint8_t,  "0x%02"PRIx8, 15)
 WRITE_1D_FUNC(uint16_t, "0x%08"PRIx16, 7)
 WRITE_1D_FUNC(int16_t,  "%5"PRIi16, 7)
 WRITE_1D_FUNC(uint32_t, "0x%08"PRIx32, 7)
+WRITE_1D_FUNC(int32_t,  "0x%08"PRIx32, 7)
 WRITE_1D_FUNC(float,    "%.18e", 3)
 
 WRITE_2D_FUNC(int8_t)
diff --git a/libavcodec/tableprint_vlc.h b/libavcodec/tableprint_vlc.h
index 33a9c0e7..675251a8 100644
--- a/libavcodec/tableprint_vlc.h
+++ b/libavcodec/tableprint_vlc.h
@@ -38,6 +38,7 @@
 #include "tableprint.h"
 #include "get_bits.h"
 #include "mathtables.c"
+#include "libavutil/reverse.c"
 #include "bitstream.c"
 
 #define REPLACE_DEFINE2(type) write_##type##_array
diff --git a/libavcodec/takdec.c b/libavcodec/takdec.c
index 42250303..001086bc 100644
--- a/libavcodec/takdec.c
+++ b/libavcodec/takdec.c
@@ -28,6 +28,7 @@
 #include "libavutil/internal.h"
 #include "libavutil/samplefmt.h"
 #include "tak.h"
+#include "takdsp.h"
 #include "audiodsp.h"
 #include "thread.h"
 #include "avcodec.h"
@@ -47,6 +48,7 @@ typedef struct MCDParam {
 typedef struct TAKDecContext {
     AVCodecContext *avctx;                          ///< parent AVCodecContext
     AudioDSPContext adsp;
+    TAKDSPContext   tdsp;
     TAKStreamInfo   ti;
     GetBitContext   gb;                             ///< bitstream reader initialized to start at the current frame
 
@@ -172,6 +174,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx)
     TAKDecContext *s = avctx->priv_data;
 
     ff_audiodsp_init(&s->adsp);
+    ff_takdsp_init(&s->tdsp);
 
     s->avctx = avctx;
     avctx->bits_per_raw_sample = avctx->bits_per_coded_sample;
@@ -224,6 +227,7 @@ static void decode_lpc(int32_t *coeffs, int mode, int length)
             int a3  = coeffs[2];
             int a4  = a3 + a1;
             int a5  = a4 + a2;
+            coeffs[2] = a5;
             coeffs += 3;
             for (i = 0; i < length - 3; i++) {
                 a3     += *coeffs;
@@ -541,46 +545,32 @@ static int decode_channel(TAKDecContext *s, int chan)
 static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
 {
     GetBitContext *gb = &s->gb;
-    int32_t *p1       = s->decoded[c1] + 1;
-    int32_t *p2       = s->decoded[c2] + 1;
+    int32_t *p1       = s->decoded[c1] + (s->dmode > 5);
+    int32_t *p2       = s->decoded[c2] + (s->dmode > 5);
+    int32_t bp1       = p1[0];
+    int32_t bp2       = p2[0];
     int i;
     int dshift, dfactor;
 
+    length += s->dmode < 6;
+
     switch (s->dmode) {
     case 1: /* left/side */
-        for (i = 0; i < length; i++) {
-            int32_t a = p1[i];
-            int32_t b = p2[i];
-            p2[i]     = a + b;
-        }
+        s->tdsp.decorrelate_ls(p1, p2, length);
         break;
     case 2: /* side/right */
-        for (i = 0; i < length; i++) {
-            int32_t a = p1[i];
-            int32_t b = p2[i];
-            p1[i]     = b - a;
-        }
+        s->tdsp.decorrelate_sr(p1, p2, length);
         break;
     case 3: /* side/mid */
-        for (i = 0; i < length; i++) {
-            int32_t a = p1[i];
-            int32_t b = p2[i];
-            a        -= b >> 1;
-            p1[i]     = a;
-            p2[i]     = a + b;
-        }
+        s->tdsp.decorrelate_sm(p1, p2, length);
         break;
     case 4: /* side/left with scale factor */
         FFSWAP(int32_t*, p1, p2);
+        FFSWAP(int32_t, bp1, bp2);
     case 5: /* side/right with scale factor */
         dshift  = get_bits_esc4(gb);
         dfactor = get_sbits(gb, 10);
-        for (i = 0; i < length; i++) {
-            int32_t a = p1[i];
-            int32_t b = p2[i];
-            b         = dfactor * (b >> dshift) + 128 >> 8 << dshift;
-            p1[i]     = b - a;
-        }
+        s->tdsp.decorrelate_sf(p1, p2, length, dshift, dfactor);
         break;
     case 6:
         FFSWAP(int32_t*, p1, p2);
@@ -632,7 +622,7 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
         for (; length2 > 0; length2 -= tmp) {
             tmp = FFMIN(length2, x);
 
-            for (i = 0; i < tmp; i++)
+            for (i = 0; i < tmp - (tmp == length2); i++)
                 s->residues[filter_order + i] = *p2++ >> dshift;
 
             for (i = 0; i < tmp; i++) {
@@ -656,7 +646,7 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
                 *p1++ = v;
             }
 
-            memcpy(s->residues, &s->residues[tmp], 2 * filter_order);
+            memmove(s->residues, &s->residues[tmp], 2 * filter_order);
         }
 
         emms_c();
@@ -664,6 +654,11 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
     }
     }
 
+    if (s->dmode > 0 && s->dmode < 6) {
+        p1[0] = bp1;
+        p2[0] = bp2;
+    }
+
     return 0;
 }
 
@@ -908,6 +903,7 @@ static int tak_decode_frame(AVCodecContext *avctx, void *data,
     return pkt->size;
 }
 
+#if HAVE_THREADS
 static int init_thread_copy(AVCodecContext *avctx)
 {
     TAKDecContext *s = avctx->priv_data;
@@ -926,6 +922,7 @@ static int update_thread_context(AVCodecContext *dst,
     memcpy(&tdst->ti, &tsrc->ti, sizeof(TAKStreamInfo));
     return 0;
 }
+#endif
 
 static av_cold int tak_decode_close(AVCodecContext *avctx)
 {
@@ -947,7 +944,7 @@ AVCodec ff_tak_decoder = {
     .decode           = tak_decode_frame,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(update_thread_context),
-    .capabilities     = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities     = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .sample_fmts      = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
                                                         AV_SAMPLE_FMT_S16P,
                                                         AV_SAMPLE_FMT_S32P,
diff --git a/libavcodec/takdsp.c b/libavcodec/takdsp.c
new file mode 100644
index 00000000..2441c2ba
--- /dev/null
+++ b/libavcodec/takdsp.c
@@ -0,0 +1,82 @@
+/*
+ * TAK decoder
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "takdsp.h"
+#include "config.h"
+
+static void decorrelate_ls(int32_t *p1, int32_t *p2, int length)
+{
+    int i;
+
+    for (i = 0; i < length; i++) {
+        int32_t a = p1[i];
+        int32_t b = p2[i];
+        p2[i]     = a + b;
+    }
+}
+
+static void decorrelate_sr(int32_t *p1, int32_t *p2, int length)
+{
+    int i;
+
+    for (i = 0; i < length; i++) {
+        int32_t a = p1[i];
+        int32_t b = p2[i];
+        p1[i]     = b - a;
+    }
+}
+
+static void decorrelate_sm(int32_t *p1, int32_t *p2, int length)
+{
+    int i;
+
+    for (i = 0; i < length; i++) {
+        int32_t a = p1[i];
+        int32_t b = p2[i];
+        a        -= b >> 1;
+        p1[i]     = a;
+        p2[i]     = a + b;
+    }
+}
+
+static void decorrelate_sf(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor)
+{
+    int i;
+
+    for (i = 0; i < length; i++) {
+        int32_t a = p1[i];
+        int32_t b = p2[i];
+        b         = dfactor * (b >> dshift) + 128 >> 8 << dshift;
+        p1[i]     = b - a;
+    }
+}
+
+av_cold void ff_takdsp_init(TAKDSPContext *c)
+{
+    c->decorrelate_ls = decorrelate_ls;
+    c->decorrelate_sr = decorrelate_sr;
+    c->decorrelate_sm = decorrelate_sm;
+    c->decorrelate_sf = decorrelate_sf;
+
+    if (ARCH_X86)
+        ff_takdsp_init_x86(c);
+}
diff --git a/libavcodec/takdsp.h b/libavcodec/takdsp.h
new file mode 100644
index 00000000..c05b5741
--- /dev/null
+++ b/libavcodec/takdsp.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_TAKDSP_H
+#define AVCODEC_TAKDSP_H
+
+#include <stdint.h>
+
+typedef struct TAKDSPContext {
+    void (*decorrelate_ls)(int32_t *p1, int32_t *p2, int length);
+    void (*decorrelate_sr)(int32_t *p1, int32_t *p2, int length);
+    void (*decorrelate_sm)(int32_t *p1, int32_t *p2, int length);
+    void (*decorrelate_sf)(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor);
+} TAKDSPContext;
+
+void ff_takdsp_init(TAKDSPContext *c);
+void ff_takdsp_init_x86(TAKDSPContext *c);
+
+#endif /* AVCODEC_TAKDSP_H */
diff --git a/libavcodec/targa.c b/libavcodec/targa.c
index b0c9b55f..215c0f51 100644
--- a/libavcodec/targa.c
+++ b/libavcodec/targa.c
@@ -303,5 +303,5 @@ AVCodec ff_targa_decoder = {
     .id             = AV_CODEC_ID_TARGA,
     .priv_data_size = sizeof(TargaContext),
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/targa_y216dec.c b/libavcodec/targa_y216dec.c
index 5f4eeaaa..21b3d35d 100644
--- a/libavcodec/targa_y216dec.c
+++ b/libavcodec/targa_y216dec.c
@@ -79,5 +79,5 @@ AVCodec ff_targa_y216_decoder = {
     .id           = AV_CODEC_ID_TARGA_Y216,
     .init         = y216_decode_init,
     .decode       = y216_decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/targaenc.c b/libavcodec/targaenc.c
index d4483ec3..66bc55c3 100644
--- a/libavcodec/targaenc.c
+++ b/libavcodec/targaenc.c
@@ -21,14 +21,22 @@
 
 #include <string.h>
 
+#include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "rle.h"
 #include "targa.h"
 
+typedef struct TargaContext {
+    AVClass *class;
+
+    int rle;
+} TargaContext;
+
 /**
  * RLE compress the image, with maximum size of out_size
  * @param outbuf Output buffer
@@ -77,6 +85,7 @@ static int targa_encode_normal(uint8_t *outbuf, const AVFrame *pic, int bpp, int
 static int targa_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                               const AVFrame *p, int *got_packet)
 {
+    TargaContext *s = avctx->priv_data;
     int bpp, picsize, datasize = -1, ret, i;
     uint8_t *out;
 
@@ -84,8 +93,9 @@ static int targa_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         av_log(avctx, AV_LOG_ERROR, "image dimensions too large\n");
         return AVERROR(EINVAL);
     }
-    picsize = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
-    if ((ret = ff_alloc_packet2(avctx, pkt, picsize + 45)) < 0)
+    picsize = av_image_get_buffer_size(avctx->pix_fmt,
+                                       avctx->width, avctx->height, 1);
+    if ((ret = ff_alloc_packet2(avctx, pkt, picsize + 45, 0)) < 0)
         return ret;
 
     /* zero out the header and only set applicable fields */
@@ -145,8 +155,16 @@ static int targa_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
     bpp = pkt->data[16] >> 3;
 
+
+#if FF_API_CODER_TYPE
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (avctx->coder_type == FF_CODER_TYPE_RAW)
+        s->rle = 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     /* try RLE compression */
-    if (avctx->coder_type != FF_CODER_TYPE_RAW)
+    if (s->rle)
         datasize = targa_encode_rle(out, picsize, p, bpp, avctx->width, avctx->height);
 
     /* if that worked well, mark the picture as RLE compressed */
@@ -172,29 +190,39 @@ static int targa_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
 static av_cold int targa_encode_init(AVCodecContext *avctx)
 {
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->key_frame = 1;
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     return 0;
 }
 
-static av_cold int targa_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
+#define OFFSET(x) offsetof(TargaContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "rle", "Use run-length compression", OFFSET(rle), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
+
+    { NULL },
+};
+
+static const AVClass targa_class = {
+    .class_name = "targa",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_targa_encoder = {
     .name           = "targa",
     .long_name      = NULL_IF_CONFIG_SMALL("Truevision Targa image"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_TARGA,
+    .priv_data_size = sizeof(TargaContext),
+    .priv_class     = &targa_class,
     .init           = targa_encode_init,
-    .close          = targa_encode_close,
     .encode2        = targa_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_BGR24, AV_PIX_FMT_BGRA, AV_PIX_FMT_RGB555LE, AV_PIX_FMT_GRAY8, AV_PIX_FMT_PAL8,
diff --git a/libavcodec/tdsc.c b/libavcodec/tdsc.c
index 8f0ebe7c..63cd4434 100644
--- a/libavcodec/tdsc.c
+++ b/libavcodec/tdsc.c
@@ -621,7 +621,7 @@ AVCodec ff_tdsc_decoder = {
     .decode         = tdsc_decode_frame,
     .close          = tdsc_close,
     .priv_data_size = sizeof(TDSCContext),
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE |
                       FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/textdec.c b/libavcodec/textdec.c
index c9f02a2a..a6c8722c 100644
--- a/libavcodec/textdec.c
+++ b/libavcodec/textdec.c
@@ -37,7 +37,7 @@ typedef struct {
 #define OFFSET(x) offsetof(TextContext, x)
 #define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
-    { "keep_ass_markup", "Set if ASS tags must be escaped", OFFSET(keep_ass_markup), AV_OPT_TYPE_INT,    {.i64=0}, 0, 1, .flags=SD },
+    { "keep_ass_markup", "Set if ASS tags must be escaped", OFFSET(keep_ass_markup), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, .flags=SD },
     { NULL }
 };
 
diff --git a/libavcodec/texturedsp.c b/libavcodec/texturedsp.c
new file mode 100644
index 00000000..c5e6cc62
--- /dev/null
+++ b/libavcodec/texturedsp.c
@@ -0,0 +1,614 @@
+/*
+ * Texture block decompression
+ * Copyright (C) 2009 Benjamin Dobell, Glass Echidna
+ * Copyright (C) 2012 Matthäus G. "Anteru" Chajdas (http://anteru.net)
+ * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/libm.h"
+
+#include "texturedsp.h"
+
+#define RGBA(r, g, b, a) (((uint8_t)(r) <<  0) | \
+                          ((uint8_t)(g) <<  8) | \
+                          ((uint8_t)(b) << 16) | \
+                          ((uint8_t)(a) << 24))
+
+static av_always_inline void extract_color(uint32_t colors[4],
+                                           uint16_t color0,
+                                           uint16_t color1,
+                                           int dxtn, int alpha)
+{
+    int tmp;
+    uint8_t r0, g0, b0, r1, g1, b1;
+    uint8_t a = dxtn ? 0 : 255;
+
+    tmp = (color0 >> 11) * 255 + 16;
+    r0  = (uint8_t) ((tmp / 32 + tmp) / 32);
+    tmp = ((color0 & 0x07E0) >> 5) * 255 + 32;
+    g0  = (uint8_t) ((tmp / 64 + tmp) / 64);
+    tmp = (color0 & 0x001F) * 255 + 16;
+    b0  = (uint8_t) ((tmp / 32 + tmp) / 32);
+
+    tmp = (color1 >> 11) * 255 + 16;
+    r1  = (uint8_t) ((tmp / 32 + tmp) / 32);
+    tmp = ((color1 & 0x07E0) >> 5) * 255 + 32;
+    g1  = (uint8_t) ((tmp / 64 + tmp) / 64);
+    tmp = (color1 & 0x001F) * 255 + 16;
+    b1  = (uint8_t) ((tmp / 32 + tmp) / 32);
+
+    if (dxtn || color0 > color1) {
+        colors[0] = RGBA(r0, g0, b0, a);
+        colors[1] = RGBA(r1, g1, b1, a);
+        colors[2] = RGBA((2 * r0 + r1) / 3,
+                         (2 * g0 + g1) / 3,
+                         (2 * b0 + b1) / 3,
+                         a);
+        colors[3] = RGBA((2 * r1 + r0) / 3,
+                         (2 * g1 + g0) / 3,
+                         (2 * b1 + b0) / 3,
+                         a);
+    } else {
+        colors[0] = RGBA(r0, g0, b0, a);
+        colors[1] = RGBA(r1, g1, b1, a);
+        colors[2] = RGBA((r0 + r1) / 2,
+                         (g0 + g1) / 2,
+                         (b0 + b1) / 2,
+                         a);
+        colors[3] = RGBA(0, 0, 0, alpha);
+    }
+}
+
+static inline void dxt1_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                       const uint8_t *block, uint8_t alpha)
+{
+    int x, y;
+    uint32_t colors[4];
+    uint16_t color0 = AV_RL16(block + 0);
+    uint16_t color1 = AV_RL16(block + 2);
+    uint32_t code   = AV_RL32(block + 4);
+
+    extract_color(colors, color0, color1, 0, alpha);
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            uint32_t pixel = colors[code & 3];
+            code >>= 2;
+            AV_WL32(dst + x * 4, pixel);
+        }
+        dst += stride;
+    }
+}
+
+/**
+ * Decompress one block of a DXT1 texture and store the resulting
+ * RGBA pixels in 'dst'. Alpha component is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt1_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    dxt1_block_internal(dst, stride, block, 255);
+
+    return 8;
+}
+
+/**
+ * Decompress one block of a DXT1 with 1-bit alpha texture and store
+ * the resulting RGBA pixels in 'dst'. Alpha is either fully opaque or
+ * fully transparent.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt1a_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    dxt1_block_internal(dst, stride, block, 0);
+
+    return 8;
+}
+
+static inline void dxt3_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                       const uint8_t *block)
+{
+    int x, y;
+    uint32_t colors[4];
+    uint16_t color0 = AV_RL16(block +  8);
+    uint16_t color1 = AV_RL16(block + 10);
+    uint32_t code   = AV_RL32(block + 12);
+
+    extract_color(colors, color0, color1, 1, 0);
+
+    for (y = 0; y < 4; y++) {
+        const uint16_t alpha_code = AV_RL16(block + 2 * y);
+        uint8_t alpha_values[4];
+
+        alpha_values[0] = ((alpha_code >>  0) & 0x0F) * 17;
+        alpha_values[1] = ((alpha_code >>  4) & 0x0F) * 17;
+        alpha_values[2] = ((alpha_code >>  8) & 0x0F) * 17;
+        alpha_values[3] = ((alpha_code >> 12) & 0x0F) * 17;
+
+        for (x = 0; x < 4; x++) {
+            uint8_t alpha = alpha_values[x];
+            uint32_t pixel = colors[code & 3] | (alpha << 24);
+            code >>= 2;
+
+            AV_WL32(dst + x * 4, pixel);
+        }
+        dst += stride;
+    }
+}
+
+/** Convert a premultiplied alpha pixel to a straigth alpha pixel. */
+static av_always_inline void premult2straight(uint8_t *src)
+{
+    int r = src[0];
+    int g = src[1];
+    int b = src[2];
+    int a = src[3]; /* unchanged */
+
+    src[0] = (uint8_t) r * a / 255;
+    src[1] = (uint8_t) g * a / 255;
+    src[2] = (uint8_t) b * a / 255;
+}
+
+/**
+ * Decompress one block of a DXT2 texture and store the resulting
+ * RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt2_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+
+    dxt3_block_internal(dst, stride, block);
+
+    /* This format is DXT3, but returns premultiplied alpha. It needs to be
+     * converted because it's what lavc outputs (and swscale expects). */
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            premult2straight(dst + x * 4 + y * stride);
+
+    return 16;
+}
+
+/**
+ * Decompress one block of a DXT3 texture and store the resulting
+ * RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt3_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    dxt3_block_internal(dst, stride, block);
+
+    return 16;
+}
+
+/**
+ * Decompress a BC 16x3 index block stored as
+ *   h g f e
+ *   d c b a
+ *   p o n m
+ *   l k j i
+ *
+ * Bits packed as
+ *  | h | g | f | e | d | c | b | a | // Entry
+ *  |765 432 107 654 321 076 543 210| // Bit
+ *  |0000000000111111111112222222222| // Byte
+ *
+ * into 16 8-bit indices.
+ */
+static void decompress_indices(uint8_t *dst, const uint8_t *src)
+{
+    int block, i;
+
+    for (block = 0; block < 2; block++) {
+        int tmp = AV_RL24(src);
+
+        /* Unpack 8x3 bit from last 3 byte block */
+        for (i = 0; i < 8; i++)
+            dst[i] = (tmp >> (i * 3)) & 0x7;
+
+        src += 3;
+        dst += 8;
+    }
+}
+
+static inline void dxt5_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                       const uint8_t *block)
+{
+    int x, y;
+    uint32_t colors[4];
+    uint8_t alpha_indices[16];
+    uint16_t color0 = AV_RL16(block + 8);
+    uint16_t color1 = AV_RL16(block + 10);
+    uint32_t code   = AV_RL32(block + 12);
+    uint8_t alpha0  = *(block);
+    uint8_t alpha1  = *(block + 1);
+
+    decompress_indices(alpha_indices, block + 2);
+
+    extract_color(colors, color0, color1, 1, 0);
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int alpha_code = alpha_indices[x + y * 4];
+            uint32_t pixel;
+            uint8_t alpha;
+
+            if (alpha_code == 0) {
+                alpha = alpha0;
+            } else if (alpha_code == 1) {
+                alpha = alpha1;
+            } else {
+                if (alpha0 > alpha1) {
+                    alpha = (uint8_t) (((8 - alpha_code) * alpha0 +
+                                        (alpha_code - 1) * alpha1) / 7);
+                } else {
+                    if (alpha_code == 6) {
+                        alpha = 0;
+                    } else if (alpha_code == 7) {
+                        alpha = 255;
+                    } else {
+                        alpha = (uint8_t) (((6 - alpha_code) * alpha0 +
+                                            (alpha_code - 1) * alpha1) / 5);
+                    }
+                }
+            }
+            pixel = colors[code & 3] | (alpha << 24);
+            code >>= 2;
+            AV_WL32(dst + x * 4, pixel);
+        }
+        dst += stride;
+    }
+}
+
+/**
+ * Decompress one block of a DXT4 texture and store the resulting
+ * RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt4_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+
+    dxt5_block_internal(dst, stride, block);
+
+    /* This format is DXT5, but returns premultiplied alpha. It needs to be
+     * converted because it's what lavc outputs (and swscale expects). */
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            premult2straight(dst + x * 4 + y * stride);
+
+    return 16;
+}
+
+/**
+ * Decompress one block of a DXT5 texture and store the resulting
+ * RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt5_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    dxt5_block_internal(dst, stride, block);
+
+    return 16;
+}
+
+/**
+ * Convert a YCoCg buffer to RGBA.
+ *
+ * @param src    input buffer.
+ * @param scaled variant with scaled chroma components and opaque alpha.
+ */
+static av_always_inline void ycocg2rgba(uint8_t *src, int scaled)
+{
+    int r = src[0];
+    int g = src[1];
+    int b = src[2];
+    int a = src[3];
+
+    int s  = scaled ? (b >> 3) + 1 : 1;
+    int y  = a;
+    int co = (r - 128) / s;
+    int cg = (g - 128) / s;
+
+    src[0] = av_clip_uint8(y + co - cg);
+    src[1] = av_clip_uint8(y + cg);
+    src[2] = av_clip_uint8(y - co - cg);
+    src[3] = scaled ? 255 : b;
+}
+
+/**
+ * Decompress one block of a DXT5 texture with classic YCoCg and store
+ * the resulting RGBA pixels in 'dst'. Alpha component is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt5y_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+
+    /* This format is basically DXT5, with luma stored in alpha.
+     * Run a normal decompress and then reorder the components. */
+    dxt5_block_internal(dst, stride, block);
+
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            ycocg2rgba(dst + x * 4 + y * stride, 0);
+
+    return 16;
+}
+
+/**
+ * Decompress one block of a DXT5 texture with scaled YCoCg and store
+ * the resulting RGBA pixels in 'dst'. Alpha component is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt5ys_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+
+    /* This format is basically DXT5, with luma stored in alpha.
+     * Run a normal decompress and then reorder the components. */
+    dxt5_block_internal(dst, stride, block);
+
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            ycocg2rgba(dst + x * 4 + y * stride, 1);
+
+    return 16;
+}
+
+static inline void rgtc_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                       const uint8_t *block,
+                                       const int *color_tab)
+{
+    uint8_t indices[16];
+    int x, y;
+
+    decompress_indices(indices, block + 2);
+
+    /* Only one or two channels are stored at most, since it only used to
+     * compress specular (black and white) or normal (red and green) maps.
+     * Although the standard says to zero out unused components, many
+     * implementations fill all of them with the same value. */
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int i = indices[x + y * 4];
+            /* Interval expansion from [-1 1] or [0 1] to [0 255]. */
+            int c = color_tab[i];
+            uint32_t pixel = RGBA(c, c, c, 255U);
+            AV_WL32(dst + x * 4 + y * stride, pixel);
+        }
+    }
+}
+
+static inline void rgtc1_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                        const uint8_t *block, int sign)
+{
+    int color_table[8];
+    int r0, r1;
+
+    if (sign) {
+        /* signed data is in [-128 127] so just offset it to unsigned
+         * and it can be treated exactly the same */
+        r0 = ((int8_t) block[0]) + 128;
+        r1 = ((int8_t) block[1]) + 128;
+    } else {
+        r0 = block[0];
+        r1 = block[1];
+    }
+
+    color_table[0] = r0;
+    color_table[1] = r1;
+
+    if (r0 > r1) {
+        /* 6 interpolated color values */
+        color_table[2] = (6 * r0 + 1 * r1) / 7; // bit code 010
+        color_table[3] = (5 * r0 + 2 * r1) / 7; // bit code 011
+        color_table[4] = (4 * r0 + 3 * r1) / 7; // bit code 100
+        color_table[5] = (3 * r0 + 4 * r1) / 7; // bit code 101
+        color_table[6] = (2 * r0 + 5 * r1) / 7; // bit code 110
+        color_table[7] = (1 * r0 + 6 * r1) / 7; // bit code 111
+    } else {
+        /* 4 interpolated color values */
+        color_table[2] = (4 * r0 + 1 * r1) / 5; // bit code 010
+        color_table[3] = (3 * r0 + 2 * r1) / 5; // bit code 011
+        color_table[4] = (2 * r0 + 3 * r1) / 5; // bit code 100
+        color_table[5] = (1 * r0 + 4 * r1) / 5; // bit code 101
+        color_table[6] = 0;    /* min range */  // bit code 110
+        color_table[7] = 255;  /* max range */  // bit code 111
+    }
+
+    rgtc_block_internal(dst, stride, block, color_table);
+}
+
+/**
+ * Decompress one block of a RGRC1 texture with signed components
+ * and store the resulting RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int rgtc1s_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    rgtc1_block_internal(dst, stride, block, 1);
+
+    return 8;
+}
+
+/**
+ * Decompress one block of a RGRC1 texture with unsigned components
+ * and store the resulting RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int rgtc1u_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    rgtc1_block_internal(dst, stride, block, 0);
+
+    return 8;
+}
+
+static inline void rgtc2_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                        const uint8_t *block, int sign)
+{
+    /* 4x4 block containing 4 component pixels. */
+    uint8_t c0[4 * 4 * 4];
+    uint8_t c1[4 * 4 * 4];
+    int x, y;
+
+    /* Decompress the two channels separately and interleave them afterwards. */
+    rgtc1_block_internal(c0, 16, block, sign);
+    rgtc1_block_internal(c1, 16, block + 8, sign);
+
+    /* B is rebuilt exactly like a normal map. */
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            uint8_t *p = dst + x * 4 + y * stride;
+            int r = c0[x * 4 + y * 16];
+            int g = c1[x * 4 + y * 16];
+            int b = 127;
+
+            int d = (255 * 255 - r * r - g * g) / 2;
+            if (d > 0)
+                b = lrint(sqrtf(d));
+
+            p[0] = r;
+            p[1] = g;
+            p[2] = b;
+            p[3] = 255;
+        }
+    }
+}
+
+/**
+ * Decompress one block of a RGRC2 texture with signed components
+ * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int rgtc2s_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    rgtc2_block_internal(dst, stride, block, 1);
+
+    return 16;
+}
+
+/**
+ * Decompress one block of a RGRC2 texture with unsigned components
+ * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int rgtc2u_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    rgtc2_block_internal(dst, stride, block, 0);
+
+    return 16;
+}
+
+/**
+ * Decompress one block of a 3Dc texture with unsigned components
+ * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxn3dc_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+    rgtc2_block_internal(dst, stride, block, 0);
+
+    /* This is the 3Dc variant of RGTC2, with swapped R and G. */
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            uint8_t *p = dst + x * 4 + y * stride;
+            FFSWAP(uint8_t, p[0], p[1]);
+        }
+    }
+
+    return 16;
+}
+
+av_cold void ff_texturedsp_init(TextureDSPContext *c)
+{
+    c->dxt1_block   = dxt1_block;
+    c->dxt1a_block  = dxt1a_block;
+    c->dxt2_block   = dxt2_block;
+    c->dxt3_block   = dxt3_block;
+    c->dxt4_block   = dxt4_block;
+    c->dxt5_block   = dxt5_block;
+    c->dxt5y_block  = dxt5y_block;
+    c->dxt5ys_block = dxt5ys_block;
+    c->rgtc1s_block = rgtc1s_block;
+    c->rgtc1u_block = rgtc1u_block;
+    c->rgtc2s_block = rgtc2s_block;
+    c->rgtc2u_block = rgtc2u_block;
+    c->dxn3dc_block = dxn3dc_block;
+}
diff --git a/libavcodec/texturedsp.h b/libavcodec/texturedsp.h
new file mode 100644
index 00000000..26f3b647
--- /dev/null
+++ b/libavcodec/texturedsp.h
@@ -0,0 +1,64 @@
+/*
+ * Texture block module
+ * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Texture block (4x4) module
+ *
+ * References:
+ *   https://www.opengl.org/wiki/S3_Texture_Compression
+ *   https://www.opengl.org/wiki/Red_Green_Texture_Compression
+ *   https://msdn.microsoft.com/en-us/library/bb694531%28v=vs.85%29.aspx
+ *
+ * All functions return how much data has been written or read.
+ *
+ * Pixel input or output format is always AV_PIX_FMT_RGBA.
+ */
+
+#ifndef AVCODEC_TEXTUREDSP_H
+#define AVCODEC_TEXTUREDSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define TEXTURE_BLOCK_W 4
+#define TEXTURE_BLOCK_H 4
+
+typedef struct TextureDSPContext {
+    int (*dxt1_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt1a_block) (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt2_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt3_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt4_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt5_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt5y_block) (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt5ys_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc1s_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc1u_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc2s_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc2u_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxn3dc_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+} TextureDSPContext;
+
+void ff_texturedsp_init(TextureDSPContext *c);
+void ff_texturedspenc_init(TextureDSPContext *c);
+
+#endif /* AVCODEC_TEXTUREDSP_H */
diff --git a/libavcodec/texturedspenc.c b/libavcodec/texturedspenc.c
new file mode 100644
index 00000000..7160396f
--- /dev/null
+++ b/libavcodec/texturedspenc.c
@@ -0,0 +1,655 @@
+/*
+ * Texture block compression
+ * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ * Based on public domain code by Fabian Giesen, Sean Barrett and Yann Collet.
+ *
+ * This file is part of FFmpeg
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+
+#include "texturedsp.h"
+
+static const uint8_t expand5[32] = {
+      0,   8,  16,  24,  33,  41,  49,  57,  66,  74,  82,  90,
+     99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189,
+    198, 206, 214, 222, 231, 239, 247, 255,
+};
+
+static const uint8_t expand6[64] = {
+      0,   4,   8,  12,  16,  20,  24,  28,  32,  36,  40,  44,
+     48,  52,  56,  60,  65,  69,  73,  77,  81,  85,  89,  93,
+     97, 101, 105, 109, 113, 117, 121, 125, 130, 134, 138, 142,
+    146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
+    195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239,
+    243, 247, 251, 255,
+};
+
+static const uint8_t match5[256][2] = {
+    {  0,  0 }, {  0,  0 }, {  0,  1 }, {  0,  1 }, {  1,  0 }, {  1,  0 },
+    {  1,  0 }, {  1,  1 }, {  1,  1 }, {  2,  0 }, {  2,  0 }, {  0,  4 },
+    {  2,  1 }, {  2,  1 }, {  2,  1 }, {  3,  0 }, {  3,  0 }, {  3,  0 },
+    {  3,  1 }, {  1,  5 }, {  3,  2 }, {  3,  2 }, {  4,  0 }, {  4,  0 },
+    {  4,  1 }, {  4,  1 }, {  4,  2 }, {  4,  2 }, {  4,  2 }, {  3,  5 },
+    {  5,  1 }, {  5,  1 }, {  5,  2 }, {  4,  4 }, {  5,  3 }, {  5,  3 },
+    {  5,  3 }, {  6,  2 }, {  6,  2 }, {  6,  2 }, {  6,  3 }, {  5,  5 },
+    {  6,  4 }, {  6,  4 }, {  4,  8 }, {  7,  3 }, {  7,  3 }, {  7,  3 },
+    {  7,  4 }, {  7,  4 }, {  7,  4 }, {  7,  5 }, {  5,  9 }, {  7,  6 },
+    {  7,  6 }, {  8,  4 }, {  8,  4 }, {  8,  5 }, {  8,  5 }, {  8,  6 },
+    {  8,  6 }, {  8,  6 }, {  7,  9 }, {  9,  5 }, {  9,  5 }, {  9,  6 },
+    {  8,  8 }, {  9,  7 }, {  9,  7 }, {  9,  7 }, { 10,  6 }, { 10,  6 },
+    { 10,  6 }, { 10,  7 }, {  9,  9 }, { 10,  8 }, { 10,  8 }, {  8, 12 },
+    { 11,  7 }, { 11,  7 }, { 11,  7 }, { 11,  8 }, { 11,  8 }, { 11,  8 },
+    { 11,  9 }, {  9, 13 }, { 11, 10 }, { 11, 10 }, { 12,  8 }, { 12,  8 },
+    { 12,  9 }, { 12,  9 }, { 12, 10 }, { 12, 10 }, { 12, 10 }, { 11, 13 },
+    { 13,  9 }, { 13,  9 }, { 13, 10 }, { 12, 12 }, { 13, 11 }, { 13, 11 },
+    { 13, 11 }, { 14, 10 }, { 14, 10 }, { 14, 10 }, { 14, 11 }, { 13, 13 },
+    { 14, 12 }, { 14, 12 }, { 12, 16 }, { 15, 11 }, { 15, 11 }, { 15, 11 },
+    { 15, 12 }, { 15, 12 }, { 15, 12 }, { 15, 13 }, { 13, 17 }, { 15, 14 },
+    { 15, 14 }, { 16, 12 }, { 16, 12 }, { 16, 13 }, { 16, 13 }, { 16, 14 },
+    { 16, 14 }, { 16, 14 }, { 15, 17 }, { 17, 13 }, { 17, 13 }, { 17, 14 },
+    { 16, 16 }, { 17, 15 }, { 17, 15 }, { 17, 15 }, { 18, 14 }, { 18, 14 },
+    { 18, 14 }, { 18, 15 }, { 17, 17 }, { 18, 16 }, { 18, 16 }, { 16, 20 },
+    { 19, 15 }, { 19, 15 }, { 19, 15 }, { 19, 16 }, { 19, 16 }, { 19, 16 },
+    { 19, 17 }, { 17, 21 }, { 19, 18 }, { 19, 18 }, { 20, 16 }, { 20, 16 },
+    { 20, 17 }, { 20, 17 }, { 20, 18 }, { 20, 18 }, { 20, 18 }, { 19, 21 },
+    { 21, 17 }, { 21, 17 }, { 21, 18 }, { 20, 20 }, { 21, 19 }, { 21, 19 },
+    { 21, 19 }, { 22, 18 }, { 22, 18 }, { 22, 18 }, { 22, 19 }, { 21, 21 },
+    { 22, 20 }, { 22, 20 }, { 20, 24 }, { 23, 19 }, { 23, 19 }, { 23, 19 },
+    { 23, 20 }, { 23, 20 }, { 23, 20 }, { 23, 21 }, { 21, 25 }, { 23, 22 },
+    { 23, 22 }, { 24, 20 }, { 24, 20 }, { 24, 21 }, { 24, 21 }, { 24, 22 },
+    { 24, 22 }, { 24, 22 }, { 23, 25 }, { 25, 21 }, { 25, 21 }, { 25, 22 },
+    { 24, 24 }, { 25, 23 }, { 25, 23 }, { 25, 23 }, { 26, 22 }, { 26, 22 },
+    { 26, 22 }, { 26, 23 }, { 25, 25 }, { 26, 24 }, { 26, 24 }, { 24, 28 },
+    { 27, 23 }, { 27, 23 }, { 27, 23 }, { 27, 24 }, { 27, 24 }, { 27, 24 },
+    { 27, 25 }, { 25, 29 }, { 27, 26 }, { 27, 26 }, { 28, 24 }, { 28, 24 },
+    { 28, 25 }, { 28, 25 }, { 28, 26 }, { 28, 26 }, { 28, 26 }, { 27, 29 },
+    { 29, 25 }, { 29, 25 }, { 29, 26 }, { 28, 28 }, { 29, 27 }, { 29, 27 },
+    { 29, 27 }, { 30, 26 }, { 30, 26 }, { 30, 26 }, { 30, 27 }, { 29, 29 },
+    { 30, 28 }, { 30, 28 }, { 30, 28 }, { 31, 27 }, { 31, 27 }, { 31, 27 },
+    { 31, 28 }, { 31, 28 }, { 31, 28 }, { 31, 29 }, { 31, 29 }, { 31, 30 },
+    { 31, 30 }, { 31, 30 }, { 31, 31 }, { 31, 31 },
+};
+
+static const uint8_t match6[256][2] = {
+    {  0,  0 }, {  0,  1 }, {  1,  0 }, {  1,  0 }, {  1,  1 }, {  2,  0 },
+    {  2,  1 }, {  3,  0 }, {  3,  0 }, {  3,  1 }, {  4,  0 }, {  4,  0 },
+    {  4,  1 }, {  5,  0 }, {  5,  1 }, {  6,  0 }, {  6,  0 }, {  6,  1 },
+    {  7,  0 }, {  7,  0 }, {  7,  1 }, {  8,  0 }, {  8,  1 }, {  8,  1 },
+    {  8,  2 }, {  9,  1 }, {  9,  2 }, {  9,  2 }, {  9,  3 }, { 10,  2 },
+    { 10,  3 }, { 10,  3 }, { 10,  4 }, { 11,  3 }, { 11,  4 }, { 11,  4 },
+    { 11,  5 }, { 12,  4 }, { 12,  5 }, { 12,  5 }, { 12,  6 }, { 13,  5 },
+    { 13,  6 }, {  8, 16 }, { 13,  7 }, { 14,  6 }, { 14,  7 }, {  9, 17 },
+    { 14,  8 }, { 15,  7 }, { 15,  8 }, { 11, 16 }, { 15,  9 }, { 15, 10 },
+    { 16,  8 }, { 16,  9 }, { 16, 10 }, { 15, 13 }, { 17,  9 }, { 17, 10 },
+    { 17, 11 }, { 15, 16 }, { 18, 10 }, { 18, 11 }, { 18, 12 }, { 16, 16 },
+    { 19, 11 }, { 19, 12 }, { 19, 13 }, { 17, 17 }, { 20, 12 }, { 20, 13 },
+    { 20, 14 }, { 19, 16 }, { 21, 13 }, { 21, 14 }, { 21, 15 }, { 20, 17 },
+    { 22, 14 }, { 22, 15 }, { 25, 10 }, { 22, 16 }, { 23, 15 }, { 23, 16 },
+    { 26, 11 }, { 23, 17 }, { 24, 16 }, { 24, 17 }, { 27, 12 }, { 24, 18 },
+    { 25, 17 }, { 25, 18 }, { 28, 13 }, { 25, 19 }, { 26, 18 }, { 26, 19 },
+    { 29, 14 }, { 26, 20 }, { 27, 19 }, { 27, 20 }, { 30, 15 }, { 27, 21 },
+    { 28, 20 }, { 28, 21 }, { 28, 21 }, { 28, 22 }, { 29, 21 }, { 29, 22 },
+    { 24, 32 }, { 29, 23 }, { 30, 22 }, { 30, 23 }, { 25, 33 }, { 30, 24 },
+    { 31, 23 }, { 31, 24 }, { 27, 32 }, { 31, 25 }, { 31, 26 }, { 32, 24 },
+    { 32, 25 }, { 32, 26 }, { 31, 29 }, { 33, 25 }, { 33, 26 }, { 33, 27 },
+    { 31, 32 }, { 34, 26 }, { 34, 27 }, { 34, 28 }, { 32, 32 }, { 35, 27 },
+    { 35, 28 }, { 35, 29 }, { 33, 33 }, { 36, 28 }, { 36, 29 }, { 36, 30 },
+    { 35, 32 }, { 37, 29 }, { 37, 30 }, { 37, 31 }, { 36, 33 }, { 38, 30 },
+    { 38, 31 }, { 41, 26 }, { 38, 32 }, { 39, 31 }, { 39, 32 }, { 42, 27 },
+    { 39, 33 }, { 40, 32 }, { 40, 33 }, { 43, 28 }, { 40, 34 }, { 41, 33 },
+    { 41, 34 }, { 44, 29 }, { 41, 35 }, { 42, 34 }, { 42, 35 }, { 45, 30 },
+    { 42, 36 }, { 43, 35 }, { 43, 36 }, { 46, 31 }, { 43, 37 }, { 44, 36 },
+    { 44, 37 }, { 44, 37 }, { 44, 38 }, { 45, 37 }, { 45, 38 }, { 40, 48 },
+    { 45, 39 }, { 46, 38 }, { 46, 39 }, { 41, 49 }, { 46, 40 }, { 47, 39 },
+    { 47, 40 }, { 43, 48 }, { 47, 41 }, { 47, 42 }, { 48, 40 }, { 48, 41 },
+    { 48, 42 }, { 47, 45 }, { 49, 41 }, { 49, 42 }, { 49, 43 }, { 47, 48 },
+    { 50, 42 }, { 50, 43 }, { 50, 44 }, { 48, 48 }, { 51, 43 }, { 51, 44 },
+    { 51, 45 }, { 49, 49 }, { 52, 44 }, { 52, 45 }, { 52, 46 }, { 51, 48 },
+    { 53, 45 }, { 53, 46 }, { 53, 47 }, { 52, 49 }, { 54, 46 }, { 54, 47 },
+    { 57, 42 }, { 54, 48 }, { 55, 47 }, { 55, 48 }, { 58, 43 }, { 55, 49 },
+    { 56, 48 }, { 56, 49 }, { 59, 44 }, { 56, 50 }, { 57, 49 }, { 57, 50 },
+    { 60, 45 }, { 57, 51 }, { 58, 50 }, { 58, 51 }, { 61, 46 }, { 58, 52 },
+    { 59, 51 }, { 59, 52 }, { 62, 47 }, { 59, 53 }, { 60, 52 }, { 60, 53 },
+    { 60, 53 }, { 60, 54 }, { 61, 53 }, { 61, 54 }, { 61, 54 }, { 61, 55 },
+    { 62, 54 }, { 62, 55 }, { 62, 55 }, { 62, 56 }, { 63, 55 }, { 63, 56 },
+    { 63, 56 }, { 63, 57 }, { 63, 58 }, { 63, 59 }, { 63, 59 }, { 63, 60 },
+    { 63, 61 }, { 63, 62 }, { 63, 62 }, { 63, 63 },
+};
+
+/* Multiplication over 8 bit emulation */
+#define mul8(a, b) (((a) * (b) + 128 + (((a) * (b) + 128) >> 8)) >> 8)
+
+/* Conversion from rgb24 to rgb565 */
+#define rgb2rgb565(r, g, b) \
+    ((mul8(r, 31) << 11) | (mul8(g, 63) << 5) | (mul8(b, 31) << 0))
+
+/* Linear interpolation at 1/3 point between a and b */
+#define lerp13(a, b) ((2 * (a) + (b)) / 3)
+
+/* Linear interpolation on an RGB pixel */
+static inline void lerp13rgb(uint8_t *out, uint8_t *p1, uint8_t *p2)
+{
+    out[0] = lerp13(p1[0], p2[0]);
+    out[1] = lerp13(p1[1], p2[1]);
+    out[2] = lerp13(p1[2], p2[2]);
+}
+
+/* Conversion from rgb565 to rgb24 */
+static inline void rgb5652rgb(uint8_t *out, uint16_t v)
+{
+    int rv = (v & 0xf800) >> 11;
+    int gv = (v & 0x07e0) >> 5;
+    int bv = (v & 0x001f) >> 0;
+
+    out[0] = expand5[rv];
+    out[1] = expand6[gv];
+    out[2] = expand5[bv];
+    out[3] = 0;
+}
+
+/* Color matching function */
+static unsigned int match_colors(const uint8_t *block, ptrdiff_t stride,
+                                 uint16_t c0, uint16_t c1)
+{
+    uint32_t mask = 0;
+    int dirr, dirg, dirb;
+    int dots[16];
+    int stops[4];
+    int x, y, k = 0;
+    int c0_point, half_point, c3_point;
+    uint8_t color[16];
+    const int indexMap[8] = {
+        0 << 30, 2 << 30, 0 << 30, 2 << 30,
+        3 << 30, 3 << 30, 1 << 30, 1 << 30,
+    };
+
+    /* Fill color and compute direction for each component */
+    rgb5652rgb(color + 0, c0);
+    rgb5652rgb(color + 4, c1);
+    lerp13rgb(color + 8, color + 0, color + 4);
+    lerp13rgb(color + 12, color + 4, color + 0);
+
+    dirr = color[0 * 4 + 0] - color[1 * 4 + 0];
+    dirg = color[0 * 4 + 1] - color[1 * 4 + 1];
+    dirb = color[0 * 4 + 2] - color[1 * 4 + 2];
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++)
+            dots[k++] = block[0 + x * 4 + y * stride] * dirr +
+                        block[1 + x * 4 + y * stride] * dirg +
+                        block[2 + x * 4 + y * stride] * dirb;
+
+        stops[y] = color[0 + y * 4] * dirr +
+                   color[1 + y * 4] * dirg +
+                   color[2 + y * 4] * dirb;
+    }
+
+    /* Think of the colors as arranged on a line; project point onto that line,
+     * then choose next color out of available ones. we compute the crossover
+     * points for 'best color in top half'/'best in bottom half' and then
+     * the same inside that subinterval.
+     *
+     * Relying on this 1d approximation isn't always optimal in terms of
+     * euclidean distance, but it's very close and a lot faster.
+     *
+     * http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html */
+    c0_point   = (stops[1] + stops[3]) >> 1;
+    half_point = (stops[3] + stops[2]) >> 1;
+    c3_point   = (stops[2] + stops[0]) >> 1;
+
+    for (x = 0; x < 16; x++) {
+        int dot  = dots[x];
+        int bits = (dot < half_point ? 4 : 0) |
+                   (dot < c0_point   ? 2 : 0) |
+                   (dot < c3_point   ? 1 : 0);
+
+        mask >>= 2;
+        mask  |= indexMap[bits];
+    }
+
+    return mask;
+}
+
+/* Color optimization function */
+static void optimize_colors(const uint8_t *block, ptrdiff_t stride,
+                            uint16_t *pmax16, uint16_t *pmin16)
+{
+    const uint8_t *minp;
+    const uint8_t *maxp;
+    const int iter_power = 4;
+    double magn;
+    int v_r, v_g, v_b;
+    float covf[6], vfr, vfg, vfb;
+    int mind, maxd;
+    int cov[6] = { 0 };
+    int mu[3], min[3], max[3];
+    int ch, iter, x, y;
+
+    /* Determine color distribution */
+    for (ch = 0; ch < 3; ch++) {
+        const uint8_t *bp = &block[ch];
+        int muv, minv, maxv;
+
+        muv = minv = maxv = bp[0];
+        for (y = 0; y < 4; y++) {
+            for (x = 4; x < 4; x += 4) {
+                muv += bp[x * 4 + y * stride];
+                if (bp[x] < minv)
+                    minv = bp[x * 4 + y * stride];
+                else if (bp[x] > maxv)
+                    maxv = bp[x * 4 + y * stride];
+            }
+        }
+
+        mu[ch]  = (muv + 8) >> 4;
+        min[ch] = minv;
+        max[ch] = maxv;
+    }
+
+    /* Determine covariance matrix */
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int r = block[x * 4 + stride * y + 0] - mu[0];
+            int g = block[x * 4 + stride * y + 1] - mu[1];
+            int b = block[x * 4 + stride * y + 2] - mu[2];
+
+            cov[0] += r * r;
+            cov[1] += r * g;
+            cov[2] += r * b;
+            cov[3] += g * g;
+            cov[4] += g * b;
+            cov[5] += b * b;
+        }
+    }
+
+    /* Convert covariance matrix to float, find principal axis via power iter */
+    for (x = 0; x < 6; x++)
+        covf[x] = cov[x] / 255.0f;
+
+    vfr = (float) (max[0] - min[0]);
+    vfg = (float) (max[1] - min[1]);
+    vfb = (float) (max[2] - min[2]);
+
+    for (iter = 0; iter < iter_power; iter++) {
+        float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
+        float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
+        float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
+
+        vfr = r;
+        vfg = g;
+        vfb = b;
+    }
+
+    magn = fabs(vfr);
+    if (fabs(vfg) > magn)
+        magn = fabs(vfg);
+    if (fabs(vfb) > magn)
+        magn = fabs(vfb);
+
+    /* if magnitudo is too small, default to luminance */
+    if (magn < 4.0f) {
+        /* JPEG YCbCr luma coefs, scaled by 1000 */
+        v_r = 299;
+        v_g = 587;
+        v_b = 114;
+    } else {
+        magn = 512.0 / magn;
+        v_r  = (int) (vfr * magn);
+        v_g  = (int) (vfg * magn);
+        v_b  = (int) (vfb * magn);
+    }
+
+    /* Pick colors at extreme points */
+    mind = maxd = block[0] * v_r + block[1] * v_g + block[2] * v_b;
+    minp = maxp = block;
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int dot = block[x * 4 + y * stride + 0] * v_r +
+                      block[x * 4 + y * stride + 1] * v_g +
+                      block[x * 4 + y * stride + 2] * v_b;
+
+            if (dot < mind) {
+                mind = dot;
+                minp = block + x * 4 + y * stride;
+            } else if (dot > maxd) {
+                maxd = dot;
+                maxp = block + x * 4 + y * stride;
+            }
+        }
+    }
+
+    *pmax16 = rgb2rgb565(maxp[0], maxp[1], maxp[2]);
+    *pmin16 = rgb2rgb565(minp[0], minp[1], minp[2]);
+}
+
+/* Try to optimize colors to suit block contents better, by solving
+ * a least squares system via normal equations + Cramer's rule. */
+static int refine_colors(const uint8_t *block, ptrdiff_t stride,
+                         uint16_t *pmax16, uint16_t *pmin16, uint32_t mask)
+{
+    uint32_t cm = mask;
+    uint16_t oldMin = *pmin16;
+    uint16_t oldMax = *pmax16;
+    uint16_t min16, max16;
+    int x, y;
+
+    /* Additional magic to save a lot of multiplies in the accumulating loop.
+     * The tables contain precomputed products of weights for least squares
+     * system, accumulated inside one 32-bit register */
+    const int w1tab[4] = { 3, 0, 2, 1 };
+    const int prods[4] = { 0x090000, 0x000900, 0x040102, 0x010402 };
+
+    /* Check if all pixels have the same index */
+    if ((mask ^ (mask << 2)) < 4) {
+        /* If so, linear system would be singular; solve using optimal
+         * single-color match on average color. */
+        int r = 8, g = 8, b = 8;
+        for (y = 0; y < 4; y++) {
+            for (x = 0; x < 4; x++) {
+                r += block[0 + x * 4 + y * stride];
+                g += block[1 + x * 4 + y * stride];
+                b += block[2 + x * 4 + y * stride];
+            }
+        }
+
+        r >>= 4;
+        g >>= 4;
+        b >>= 4;
+
+        max16 = (match5[r][0] << 11) | (match6[g][0] << 5) | match5[b][0];
+        min16 = (match5[r][1] << 11) | (match6[g][1] << 5) | match5[b][1];
+    } else {
+        float fr, fg, fb;
+        int at1_r = 0, at1_g = 0, at1_b = 0;
+        int at2_r = 0, at2_g = 0, at2_b = 0;
+        int akku = 0;
+        int xx, xy, yy;
+
+        for (y = 0; y < 4; y++) {
+            for (x = 0; x < 4; x++) {
+                int step = cm & 3;
+                int w1 = w1tab[step];
+                int r = block[0 + x * 4 + y * stride];
+                int g = block[1 + x * 4 + y * stride];
+                int b = block[2 + x * 4 + y * stride];
+
+                akku  += prods[step];
+                at1_r += w1 * r;
+                at1_g += w1 * g;
+                at1_b += w1 * b;
+                at2_r += r;
+                at2_g += g;
+                at2_b += b;
+
+                cm >>= 2;
+            }
+        }
+
+        at2_r = 3 * at2_r - at1_r;
+        at2_g = 3 * at2_g - at1_g;
+        at2_b = 3 * at2_b - at1_b;
+
+        /* Extract solutions and decide solvability */
+        xx =  akku >> 16;
+        yy = (akku >>  8) & 0xFF;
+        xy = (akku >>  0) & 0xFF;
+
+        fr = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy);
+        fg = fr * 63.0f / 31.0f;
+        fb = fr;
+
+        /* Solve */
+        max16  = av_clip_uintp2((at1_r * yy - at2_r * xy) * fr + 0.5f, 5) << 11;
+        max16 |= av_clip_uintp2((at1_g * yy - at2_g * xy) * fg + 0.5f, 6) <<  5;
+        max16 |= av_clip_uintp2((at1_b * yy - at2_b * xy) * fb + 0.5f, 5) <<  0;
+
+        min16  = av_clip_uintp2((at2_r * xx - at1_r * xy) * fr + 0.5f, 5) << 11;
+        min16 |= av_clip_uintp2((at2_g * xx - at1_g * xy) * fg + 0.5f, 6) <<  5;
+        min16 |= av_clip_uintp2((at2_b * xx - at1_b * xy) * fb + 0.5f, 5) <<  0;
+    }
+
+    *pmin16 = min16;
+    *pmax16 = max16;
+    return oldMin != min16 || oldMax != max16;
+}
+
+/* Check if input block is a constant color */
+static int constant_color(const uint8_t *block, ptrdiff_t stride)
+{
+    int x, y;
+    uint32_t first = AV_RL32(block);
+
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            if (first != AV_RL32(block + x * 4 + y * stride))
+                return 0;
+    return 1;
+}
+
+/* Main color compression function */
+static void compress_color(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    uint32_t mask;
+    uint16_t max16, min16;
+    int constant = constant_color(block, stride);
+
+    /* Constant color will load values from tables */
+    if (constant) {
+        int r = block[0];
+        int g = block[1];
+        int b = block[2];
+        mask  = 0xAAAAAAAA;
+        max16 = (match5[r][0] << 11) | (match6[g][0] << 5) | match5[b][0];
+        min16 = (match5[r][1] << 11) | (match6[g][1] << 5) | match5[b][1];
+    } else {
+        int refine;
+
+        /* Otherwise find pca and map along principal axis */
+        optimize_colors(block, stride, &max16, &min16);
+        if (max16 != min16)
+            mask = match_colors(block, stride, max16, min16);
+        else
+            mask = 0;
+
+        /* One pass refinement */
+        refine  = refine_colors(block, stride, &max16, &min16, mask);
+        if (refine) {
+            if (max16 != min16)
+                mask = match_colors(block, stride, max16, min16);
+            else
+                mask = 0;
+        }
+    }
+
+    /* Finally write the color block */
+    if (max16 < min16) {
+        FFSWAP(uint16_t, min16, max16);
+        mask ^= 0x55555555;
+    }
+
+    AV_WL16(dst + 0, max16);
+    AV_WL16(dst + 2, min16);
+    AV_WL32(dst + 4, mask);
+}
+
+/* Alpha compression function */
+static void compress_alpha(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+    int dist, bias, dist4, dist2;
+    int mn, mx;
+    int bits = 0;
+    int mask = 0;
+
+    memset(dst, 0, 8);
+
+    /* Find min/max color */
+    mn = mx = block[3];
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int val = block[3 + x * 4 + y * stride];
+            if (val < mn)
+                mn = val;
+            else if (val > mx)
+                mx = val;
+        }
+    }
+
+    /* Encode them */
+    dst[0] = (uint8_t) mx;
+    dst[1] = (uint8_t) mn;
+    dst += 2;
+
+    /* Mono-alpha shortcut */
+    if (mn == mx)
+        return;
+
+    /* Determine bias and emit color indices.
+     * Given the choice of mx/mn, these indices are optimal:
+     * fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination */
+    dist = mx - mn;
+
+    dist4 = dist * 4;
+    dist2 = dist * 2;
+    if (dist < 8)
+        bias = dist - 1 - mn * 7;
+    else
+        bias = dist / 2 + 2 - mn * 7;
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int alp = block[3 + x * 4 + y * stride] * 7 + bias;
+            int ind, tmp;
+
+            /* This is a "linear scale" lerp factor between 0 (val=min)
+             * and 7 (val=max) to select index. */
+            tmp  = (alp >= dist4) ? -1 : 0;
+            ind  = tmp & 4;
+            alp -= dist4 & tmp;
+            tmp  = (alp >= dist2) ? -1 : 0;
+            ind += tmp & 2;
+            alp -= dist2 & tmp;
+            ind += (alp >= dist);
+
+            /* Turn linear scale into DXT index (0/1 are extreme points) */
+            ind  = -ind & 7;
+            ind ^= (2 > ind);
+
+            /* Write index */
+            mask |= ind << bits;
+            bits += 3;
+            if (bits >= 8) {
+                *dst++ = mask;
+                mask >>= 8;
+                bits  -= 8;
+            }
+        }
+    }
+}
+
+/**
+ * Convert a RGBA buffer to unscaled YCoCg.
+ * Scale is usually introduced to avoid banding over a certain range of colors,
+ * but this version of the algorithm does not introduce it as much as other
+ * implementations, allowing for a simpler and faster conversion.
+ */
+static void rgba2ycocg(uint8_t *dst, const uint8_t *pixel)
+{
+    int r =  pixel[0];
+    int g = (pixel[1] + 1) >> 1;
+    int b =  pixel[2];
+    int t = (2 + r + b) >> 2;
+
+    dst[0] = av_clip_uint8(128 + ((r - b + 1) >> 1));   /* Co */
+    dst[1] = av_clip_uint8(128 + g - t);                /* Cg */
+    dst[2] = 0;
+    dst[3] = av_clip_uint8(g + t);                      /* Y */
+}
+
+/**
+ * Compress one block of RGBA pixels in a DXT1 texture and store the
+ * resulting bytes in 'dst'. Alpha is not preserved.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to compress.
+ * @return how much texture data has been written.
+ */
+static int dxt1_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    compress_color(dst, stride, block);
+
+    return 8;
+}
+
+/**
+ * Compress one block of RGBA pixels in a DXT5 texture and store the
+ * resulting bytes in 'dst'. Alpha is preserved.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to compress.
+ * @return how much texture data has been written.
+ */
+static int dxt5_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    compress_alpha(dst, stride, block);
+    compress_color(dst + 8, stride, block);
+
+    return 16;
+}
+
+/**
+ * Compress one block of RGBA pixels in a DXT5-YCoCg texture and store the
+ * resulting bytes in 'dst'. Alpha is not preserved.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to compress.
+ * @return how much texture data has been written.
+ */
+static int dxt5ys_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+    uint8_t reorder[64];
+
+    /* Reorder the components and then run a normal DXT5 compression. */
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            rgba2ycocg(reorder + x * 4 + y * 16, block + x * 4 + y * stride);
+
+    compress_alpha(dst + 0, 16, reorder);
+    compress_color(dst + 8, 16, reorder);
+
+    return 16;
+}
+
+av_cold void ff_texturedspenc_init(TextureDSPContext *c)
+{
+    c->dxt1_block   = dxt1_block;
+    c->dxt5_block   = dxt5_block;
+    c->dxt5ys_block = dxt5ys_block;
+}
diff --git a/libavcodec/tiertexseqv.c b/libavcodec/tiertexseqv.c
index 7c62208d..df12ee38 100644
--- a/libavcodec/tiertexseqv.c
+++ b/libavcodec/tiertexseqv.c
@@ -265,5 +265,5 @@ AVCodec ff_tiertexseqvideo_decoder = {
     .init           = seqvideo_decode_init,
     .close          = seqvideo_decode_end,
     .decode         = seqvideo_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index 7a7f9b74..4be587d5 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -453,25 +453,21 @@ static int tiff_unpack_fax(TiffContext *s, uint8_t *dst, int stride,
     int i, ret = 0;
     int line;
     uint8_t *src2 = av_malloc((unsigned)size +
-                              FF_INPUT_BUFFER_PADDING_SIZE);
+                              AV_INPUT_BUFFER_PADDING_SIZE);
 
     if (!src2) {
         av_log(s->avctx, AV_LOG_ERROR,
                "Error allocating temporary buffer\n");
         return AVERROR(ENOMEM);
     }
-    if (s->fax_opts & 2) {
-        avpriv_request_sample(s->avctx, "Uncompressed fax mode");
-        av_free(src2);
-        return AVERROR_PATCHWELCOME;
-    }
+
     if (!s->fill_order) {
         memcpy(src2, src, size);
     } else {
         for (i = 0; i < size; i++)
             src2[i] = ff_reverse[src[i]];
     }
-    memset(src2 + size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(src2 + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
     ret = ff_ccitt_unpack(s->avctx, src2, size, dst, lines, stride,
                           s->compr, s->fax_opts);
     if (s->bpp < 8 && s->avctx->pix_fmt == AV_PIX_FMT_PAL8)
@@ -1008,8 +1004,13 @@ static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
             av_log(s->avctx, AV_LOG_ERROR, "subsample count invalid\n");
             return AVERROR_INVALIDDATA;
         }
-        for (i = 0; i < count; i++)
+        for (i = 0; i < count; i++) {
             s->subsampling[i] = ff_tget(&s->gb, type, s->le);
+            if (s->subsampling[i] <= 0) {
+                av_log(s->avctx, AV_LOG_ERROR, "subsampling %d is invalid\n", s->subsampling[i]);
+                return AVERROR_INVALIDDATA;
+            }
+        }
         break;
     case TIFF_T4OPTIONS:
         if (s->compr == TIFF_G3)
@@ -1257,7 +1258,7 @@ static int decode_frame(AVCodecContext *avctx,
                          avpkt->size - s->strippos);
     }
 
-    if (s->rps <= 0) {
+    if (s->rps <= 0 || s->rps % s->subsampling[1]) {
         av_log(avctx, AV_LOG_ERROR, "rps %d invalid\n", s->rps);
         return AVERROR_INVALIDDATA;
     }
@@ -1388,5 +1389,5 @@ AVCodec ff_tiff_decoder = {
     .close          = tiff_end,
     .decode         = decode_frame,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(tiff_init),
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/tiffenc.c b/libavcodec/tiffenc.c
index 2cdac0b2..3d37d2e0 100644
--- a/libavcodec/tiffenc.c
+++ b/libavcodec/tiffenc.c
@@ -311,7 +311,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
 
     for (i = 0; i < s->bpp_tab_size; i++)
-        bpp_tab[i] = desc->comp[i].depth_minus1 + 1;
+        bpp_tab[i] = desc->comp[i].depth;
 
     if (s->compr == TIFF_DEFLATE       ||
         s->compr == TIFF_ADOBE_DEFLATE ||
@@ -329,9 +329,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     bytes_per_row = (((s->width - 1) / s->subsampling[0] + 1) * s->bpp *
                      s->subsampling[0] * s->subsampling[1] + 7) >> 3;
     packet_size = avctx->height * bytes_per_row * 2 +
-                  avctx->height * 4 + FF_MIN_BUFFER_SIZE;
+                  avctx->height * 4 + AV_INPUT_BUFFER_MIN_SIZE;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, packet_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, packet_size, 0)) < 0)
         return ret;
     ptr          = pkt->data;
     s->buf_start = pkt->data;
@@ -475,7 +475,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     ADD_ENTRY(s,  TIFF_YRES,         TIFF_RATIONAL, 1,      res);
     ADD_ENTRY1(s, TIFF_RES_UNIT,     TIFF_SHORT,    2);
 
-    if (!(avctx->flags & CODEC_FLAG_BITEXACT))
+    if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT))
         ADD_ENTRY(s, TIFF_SOFTWARE_NAME, TIFF_STRING,
                   strlen(LIBAVCODEC_IDENT) + 1, LIBAVCODEC_IDENT);
 
@@ -521,13 +521,12 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 static av_cold int encode_init(AVCodecContext *avctx)
 {
     TiffEncoderContext *s = avctx->priv_data;
-
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     s->avctx = avctx;
 
     return 0;
@@ -537,7 +536,6 @@ static av_cold int encode_close(AVCodecContext *avctx)
 {
     TiffEncoderContext *s = avctx->priv_data;
 
-    av_frame_free(&avctx->coded_frame);
     av_freep(&s->strip_sizes);
     av_freep(&s->strip_offsets);
     av_freep(&s->yuv_line);
@@ -574,7 +572,7 @@ AVCodec ff_tiff_encoder = {
     .priv_data_size = sizeof(TiffEncoderContext),
     .init           = encode_init,
     .close          = encode_close,
-    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .encode2        = encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB48LE, AV_PIX_FMT_PAL8,
diff --git a/libavcodec/tmv.c b/libavcodec/tmv.c
index e525a735..b738fcb1 100644
--- a/libavcodec/tmv.c
+++ b/libavcodec/tmv.c
@@ -93,5 +93,5 @@ AVCodec ff_tmv_decoder = {
     .id             = AV_CODEC_ID_TMV,
     .init           = tmv_decode_init,
     .decode         = tmv_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/truemotion1.c b/libavcodec/truemotion1.c
index 660ecf54..da843c44 100644
--- a/libavcodec/truemotion1.c
+++ b/libavcodec/truemotion1.c
@@ -215,7 +215,7 @@ static int make_cdt16_entry(int p1, int p2, int16_t *cdt)
     b = cdt[p2];
     r = cdt[p1] << 11;
     lo = b + r;
-    return (lo + (lo << 16)) << 1;
+    return (lo + (lo * (1 << 16))) * 2;
 }
 
 static int make_ydt24_entry(int p1, int p2, int16_t *ydt)
@@ -224,7 +224,7 @@ static int make_ydt24_entry(int p1, int p2, int16_t *ydt)
 
     lo = ydt[p1];
     hi = ydt[p2];
-    return (lo + (hi << 8) + (hi << 16)) << 1;
+    return (lo + (hi * (1 << 8)) + (hi * (1 << 16))) * 2;
 }
 
 static int make_cdt24_entry(int p1, int p2, int16_t *cdt)
@@ -232,8 +232,8 @@ static int make_cdt24_entry(int p1, int p2, int16_t *cdt)
     int r, b;
 
     b = cdt[p2];
-    r = cdt[p1]<<16;
-    return (b+r) << 1;
+    r = cdt[p1] * (1 << 16);
+    return (b+r) * 2;
 }
 
 static void gen_vector_table15(TrueMotion1Context *s, const uint8_t *sel_vector_table)
@@ -396,12 +396,16 @@ static int truemotion1_decode_header(TrueMotion1Context *s)
     }
 
     if (compression_types[header.compression].algorithm == ALGO_RGB24H) {
-        new_pix_fmt = AV_PIX_FMT_RGB32;
+        new_pix_fmt = AV_PIX_FMT_0RGB32;
         width_shift = 1;
     } else
         new_pix_fmt = AV_PIX_FMT_RGB555; // RGB565 is supported as well
 
     s->w >>= width_shift;
+    if (s->w & 1) {
+        avpriv_request_sample(s->avctx, "Frame with odd width");
+        return AVERROR_PATCHWELCOME;
+    }
 
     if (s->w != s->avctx->width || s->h != s->avctx->height ||
         new_pix_fmt != s->avctx->pix_fmt) {
@@ -641,7 +645,8 @@ static void truemotion1_decode_16bit(TrueMotion1Context *s)
         current_pixel_pair = (unsigned int *)current_line;
         vert_pred = s->vert_pred;
         mb_change_index = 0;
-        mb_change_byte = mb_change_bits[mb_change_index++];
+        if (!keyframe)
+            mb_change_byte = mb_change_bits[mb_change_index++];
         mb_change_byte_mask = 0x01;
         pixels_left = s->avctx->width;
 
@@ -912,5 +917,5 @@ AVCodec ff_truemotion1_decoder = {
     .init           = truemotion1_decode_init,
     .close          = truemotion1_decode_end,
     .decode         = truemotion1_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/truemotion2.c b/libavcodec/truemotion2.c
index f70c79c3..245a32a8 100644
--- a/libavcodec/truemotion2.c
+++ b/libavcodec/truemotion2.c
@@ -303,8 +303,8 @@ static int tm2_read_stream(TM2Context *ctx, const uint8_t *buf, int stream_id, i
     if (len == 0)
         return 4;
 
-    if (len >= INT_MAX/4-1 || len < 0 || skip > buf_size) {
-        av_log(ctx->avctx, AV_LOG_ERROR, "invalid stream size\n");
+    if (len >= INT_MAX / 4 - 1 || len < 0 || skip > buf_size) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "Error, invalid stream size.\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -344,31 +344,35 @@ static int tm2_read_stream(TM2Context *ctx, const uint8_t *buf, int stream_id, i
     /* check if we have sane number of tokens */
     if ((toks < 0) || (toks > 0xFFFFFF)) {
         av_log(ctx->avctx, AV_LOG_ERROR, "Incorrect number of tokens: %i\n", toks);
-        tm2_free_codes(&codes);
-        return AVERROR_INVALIDDATA;
+        ret = AVERROR_INVALIDDATA;
+        goto end;
     }
     ret = av_reallocp_array(&ctx->tokens[stream_id], toks, sizeof(int));
     if (ret < 0) {
         ctx->tok_lens[stream_id] = 0;
-        return ret;
+        goto end;
     }
     ctx->tok_lens[stream_id] = toks;
     len = bytestream2_get_be32(&gb);
     if (len > 0) {
         pos = bytestream2_tell(&gb);
-        if (skip <= pos)
-            return AVERROR_INVALIDDATA;
+        if (skip <= pos) {
+            ret = AVERROR_INVALIDDATA;
+            goto end;
+        }
         init_get_bits(&ctx->gb, buf + pos, (skip - pos) * 8);
         for (i = 0; i < toks; i++) {
             if (get_bits_left(&ctx->gb) <= 0) {
                 av_log(ctx->avctx, AV_LOG_ERROR, "Incorrect number of tokens: %i\n", toks);
-                return AVERROR_INVALIDDATA;
+                ret = AVERROR_INVALIDDATA;
+                goto end;
             }
             ctx->tokens[stream_id][i] = tm2_get_token(&ctx->gb, &codes);
             if (stream_id <= TM2_MOT && ctx->tokens[stream_id][i] >= TM2_DELTAS || ctx->tokens[stream_id][i]<0) {
                 av_log(ctx->avctx, AV_LOG_ERROR, "Invalid delta token index %d for type %d, n=%d\n",
                        ctx->tokens[stream_id][i], stream_id, i);
-                return AVERROR_INVALIDDATA;
+                ret = AVERROR_INVALIDDATA;
+                goto end;
             }
         }
     } else {
@@ -377,13 +381,17 @@ static int tm2_read_stream(TM2Context *ctx, const uint8_t *buf, int stream_id, i
             if (stream_id <= TM2_MOT && ctx->tokens[stream_id][i] >= TM2_DELTAS) {
                 av_log(ctx->avctx, AV_LOG_ERROR, "Invalid delta token index %d for type %d, n=%d\n",
                        ctx->tokens[stream_id][i], stream_id, i);
-                return AVERROR_INVALIDDATA;
+                ret = AVERROR_INVALIDDATA;
+                goto end;
             }
         }
     }
-    tm2_free_codes(&codes);
 
-    return skip;
+    ret = skip;
+
+end:
+    tm2_free_codes(&codes);
+    return ret;
 }
 
 static inline int GET_TOK(TM2Context *ctx,int type)
@@ -1023,5 +1031,5 @@ AVCodec ff_truemotion2_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/truespeech.c b/libavcodec/truespeech.c
index f9e86025..d4ddfcbf 100644
--- a/libavcodec/truespeech.c
+++ b/libavcodec/truespeech.c
@@ -362,5 +362,5 @@ AVCodec ff_truespeech_decoder = {
     .priv_data_size = sizeof(TSContext),
     .init           = truespeech_decode_init,
     .decode         = truespeech_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/tscc.c b/libavcodec/tscc.c
index f9b325b6..8c5ec68c 100644
--- a/libavcodec/tscc.c
+++ b/libavcodec/tscc.c
@@ -94,7 +94,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if (ret != Z_DATA_ERROR) {
         bytestream2_init(&c->gb, c->decomp_buf,
                          c->decomp_size - c->zstream.avail_out);
-        ff_msrle_decode(avctx, (AVPicture*)frame, c->bpp, &c->gb);
+        ff_msrle_decode(avctx, frame, c->bpp, &c->gb);
     }
 
     /* make the palette available on the way out */
@@ -184,5 +184,5 @@ AVCodec ff_tscc_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/tscc2.c b/libavcodec/tscc2.c
index 92210f7d..9bb7ab3b 100644
--- a/libavcodec/tscc2.c
+++ b/libavcodec/tscc2.c
@@ -384,5 +384,5 @@ AVCodec ff_tscc2_decoder = {
     .init           = tscc2_decode_init,
     .close          = tscc2_decode_end,
     .decode         = tscc2_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/tscc2data.h b/libavcodec/tscc2data.h
index 4586da77..78062675 100644
--- a/libavcodec/tscc2data.h
+++ b/libavcodec/tscc2data.h
@@ -19,8 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_TSCC2_DATA_H
-#define AVCODEC_TSCC2_DATA_H
+#ifndef AVCODEC_TSCC2DATA_H
+#define AVCODEC_TSCC2DATA_H
 
 #include <stdint.h>
 
@@ -932,4 +932,4 @@ static const uint8_t *tscc2_ac_vlc_bits[NUM_VLC_SETS] = {
     ac_vlc_descC_bits,
 };
 
-#endif /* AVCODEC_TSCC2_DATA_H */
+#endif /* AVCODEC_TSCC2DATA_H */
diff --git a/libavcodec/tta.c b/libavcodec/tta.c
index 01584d95..1e2e9c4e 100644
--- a/libavcodec/tta.c
+++ b/libavcodec/tta.c
@@ -123,6 +123,7 @@ static av_cold int tta_decode_init(AVCodecContext * avctx)
     TTAContext *s = avctx->priv_data;
     GetBitContext gb;
     int total_frames;
+    int ret;
 
     s->avctx = avctx;
 
@@ -131,7 +132,10 @@ static av_cold int tta_decode_init(AVCodecContext * avctx)
         return AVERROR_INVALIDDATA;
 
     s->crc_table = av_crc_get_table(AV_CRC_32_IEEE_LE);
-    init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
+    ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
+    if (ret < 0)
+        return ret;
+
     if (show_bits_long(&gb, 32) == AV_RL32("TTA1")) {
         /* signature */
         skip_bits_long(&gb, 32);
@@ -425,6 +429,6 @@ AVCodec ff_tta_decoder = {
     .close          = tta_decode_close,
     .decode         = tta_decode_frame,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .priv_class     = &tta_decoder_class,
 };
diff --git a/libavcodec/ttaenc.c b/libavcodec/ttaenc.c
index ccd41a90..2f1c8db5 100644
--- a/libavcodec/ttaenc.c
+++ b/libavcodec/ttaenc.c
@@ -114,9 +114,12 @@ static int tta_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 {
     TTAEncContext *s = avctx->priv_data;
     PutBitContext pb;
-    int ret, i, out_bytes, cur_chan = 0, res = 0, samples = 0;
+    int ret, i, out_bytes, cur_chan, res, samples;
+    int64_t pkt_size =  frame->nb_samples * 2LL * avctx->channels * s->bps;
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, frame->nb_samples * 2 * avctx->channels * s->bps)) < 0)
+pkt_alloc:
+    cur_chan = 0, res = 0, samples = 0;
+    if ((ret = ff_alloc_packet2(avctx, avpkt, pkt_size, 0)) < 0)
         return ret;
     init_put_bits(&pb, avpkt->data, avpkt->size);
 
@@ -174,6 +177,14 @@ static int tta_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                 rice->k1++;
 
             unary = 1 + (outval >> k);
+            if (unary + 100LL > put_bits_left(&pb)) {
+                if (pkt_size < INT_MAX/2) {
+                    pkt_size *= 2;
+                    av_packet_unref(avpkt);
+                    goto pkt_alloc;
+                } else
+                    return AVERROR(ENOMEM);
+            }
             do {
                 if (unary > 31) {
                     put_bits(&pb, 31, 0x7FFFFFFF);
@@ -224,7 +235,7 @@ AVCodec ff_tta_encoder = {
     .init           = tta_encode_init,
     .close          = tta_encode_close,
     .encode2        = tta_encode_frame,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_LOSSLESS,
+    .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_LOSSLESS,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_U8,
                                                      AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_S32,
diff --git a/libavcodec/twinvq.c b/libavcodec/twinvq.c
index 4c289b0c..7b2e19e5 100644
--- a/libavcodec/twinvq.c
+++ b/libavcodec/twinvq.c
@@ -789,7 +789,7 @@ av_cold int ff_twinvq_decode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    tctx->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    tctx->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!tctx->fdsp) {
         ff_twinvq_decode_close(avctx);
         return AVERROR(ENOMEM);
diff --git a/libavcodec/twinvq.h b/libavcodec/twinvq.h
index 206eeaaf..228a4894 100644
--- a/libavcodec/twinvq.h
+++ b/libavcodec/twinvq.h
@@ -200,4 +200,4 @@ int ff_twinvq_decode_frame(AVCodecContext *avctx, void *data,
 int ff_twinvq_decode_close(AVCodecContext *avctx);
 int ff_twinvq_decode_init(AVCodecContext *avctx);
 
-#endif /* AVCODEC_TWINVQ_DATA_H */
+#endif /* AVCODEC_TWINVQ_H */
diff --git a/libavcodec/twinvqdec.c b/libavcodec/twinvqdec.c
index 3ea4dfa1..5f4dd350 100644
--- a/libavcodec/twinvqdec.c
+++ b/libavcodec/twinvqdec.c
@@ -422,7 +422,7 @@ AVCodec ff_twinvq_decoder = {
     .init           = twinvq_decode_init,
     .close          = ff_twinvq_decode_close,
     .decode         = ff_twinvq_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/txd.c b/libavcodec/txd.c
index d2884ad1..d7fdde08 100644
--- a/libavcodec/txd.c
+++ b/libavcodec/txd.c
@@ -26,18 +26,25 @@
 #include "bytestream.h"
 #include "avcodec.h"
 #include "internal.h"
-#include "s3tc.h"
+#include "texturedsp.h"
+
+#define TXD_DXT1 0x31545844
+#define TXD_DXT3 0x33545844
 
 static int txd_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                             AVPacket *avpkt) {
     GetByteContext gb;
+    TextureDSPContext dxtc;
     AVFrame * const p = data;
     unsigned int version, w, h, d3d_format, depth, stride, flags;
     unsigned int y, v;
     uint8_t *ptr;
     uint32_t *pal;
+    int i, j;
     int ret;
 
+    ff_texturedsp_init(&dxtc);
+
     bytestream2_init(&gb, avpkt->data, avpkt->size);
     version         = bytestream2_get_le32(&gb);
     bytestream2_skip(&gb, 72);
@@ -57,7 +64,7 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if (depth == 8) {
         avctx->pix_fmt = AV_PIX_FMT_PAL8;
     } else if (depth == 16 || depth == 32) {
-        avctx->pix_fmt = AV_PIX_FMT_RGB32;
+        avctx->pix_fmt = AV_PIX_FMT_RGBA;
     } else {
         av_log(avctx, AV_LOG_ERROR, "depth of %i is unsupported\n", depth);
         return AVERROR_PATCHWELCOME;
@@ -66,6 +73,9 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
         return ret;
 
+    avctx->coded_width  = FFALIGN(w, 4);
+    avctx->coded_height = FFALIGN(h, 4);
+
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
 
@@ -93,15 +103,27 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         case 0:
             if (!(flags & 1))
                 goto unsupported;
-        case FF_S3TC_DXT1:
-            if (bytestream2_get_bytes_left(&gb) < FF_CEIL_RSHIFT(w, 2) * FF_CEIL_RSHIFT(h, 2) * 8)
+        case TXD_DXT1:
+            if (bytestream2_get_bytes_left(&gb) < AV_CEIL_RSHIFT(w, 2) * AV_CEIL_RSHIFT(h, 2) * 8)
                 return AVERROR_INVALIDDATA;
-            ff_decode_dxt1(&gb, ptr, w, h, stride);
+            for (j = 0; j < avctx->height; j += 4) {
+                for (i = 0; i < avctx->width; i += 4) {
+                    uint8_t *p = ptr + i * 4 + j * stride;
+                    int step = dxtc.dxt1_block(p, stride, gb.buffer);
+                    bytestream2_skip(&gb, step);
+                }
+            }
             break;
-        case FF_S3TC_DXT3:
-            if (bytestream2_get_bytes_left(&gb) < FF_CEIL_RSHIFT(w, 2) * FF_CEIL_RSHIFT(h, 2) * 16)
+        case TXD_DXT3:
+            if (bytestream2_get_bytes_left(&gb) < AV_CEIL_RSHIFT(w, 2) * AV_CEIL_RSHIFT(h, 2) * 16)
                 return AVERROR_INVALIDDATA;
-            ff_decode_dxt3(&gb, ptr, w, h, stride);
+            for (j = 0; j < avctx->height; j += 4) {
+                for (i = 0; i < avctx->width; i += 4) {
+                    uint8_t *p = ptr + i * 4 + j * stride;
+                    int step = dxtc.dxt3_block(p, stride, gb.buffer);
+                    bytestream2_skip(&gb, step);
+                }
+            }
             break;
         default:
             goto unsupported;
@@ -137,5 +159,5 @@ AVCodec ff_txd_decoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_TXD,
     .decode         = txd_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/ulti.c b/libavcodec/ulti.c
index d14603a2..e6f43749 100644
--- a/libavcodec/ulti.c
+++ b/libavcodec/ulti.c
@@ -424,5 +424,5 @@ AVCodec ff_ulti_decoder = {
     .init           = ulti_decode_init,
     .close          = ulti_decode_end,
     .decode         = ulti_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 6596aeae..f532824f 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -36,10 +36,12 @@
 #include "libavutil/frame.h"
 #include "libavutil/internal.h"
 #include "libavutil/mathematics.h"
+#include "libavutil/mem_internal.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/samplefmt.h"
 #include "libavutil/dict.h"
+#include "libavutil/thread.h"
 #include "avcodec.h"
 #include "libavutil/opt.h"
 #include "me_cmp.h"
@@ -58,14 +60,6 @@
 # include <iconv.h>
 #endif
 
-#if HAVE_PTHREADS
-#include <pthread.h>
-#elif HAVE_W32THREADS
-#include "compat/w32pthreads.h"
-#elif HAVE_OS2THREADS
-#include "compat/os2threads.h"
-#endif
-
 #include "libavutil/ffversion.h"
 const char av_codec_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
 
@@ -122,42 +116,28 @@ static int volatile entangled_thread_counter = 0;
 static void *codec_mutex;
 static void *avformat_mutex;
 
-static inline int ff_fast_malloc(void *ptr, unsigned int *size, size_t min_size, int zero_realloc)
-{
-    void **p = ptr;
-    if (min_size <= *size && *p)
-        return 0;
-    min_size = FFMAX(17 * min_size / 16 + 32, min_size);
-    av_free(*p);
-    *p = zero_realloc ? av_mallocz(min_size) : av_malloc(min_size);
-    if (!*p)
-        min_size = 0;
-    *size = min_size;
-    return 1;
-}
-
 void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size)
 {
     uint8_t **p = ptr;
-    if (min_size > SIZE_MAX - FF_INPUT_BUFFER_PADDING_SIZE) {
+    if (min_size > SIZE_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
         av_freep(p);
         *size = 0;
         return;
     }
-    if (!ff_fast_malloc(p, size, min_size + FF_INPUT_BUFFER_PADDING_SIZE, 1))
-        memset(*p + min_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    if (!ff_fast_malloc(p, size, min_size + AV_INPUT_BUFFER_PADDING_SIZE, 1))
+        memset(*p + min_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 }
 
 void av_fast_padded_mallocz(void *ptr, unsigned int *size, size_t min_size)
 {
     uint8_t **p = ptr;
-    if (min_size > SIZE_MAX - FF_INPUT_BUFFER_PADDING_SIZE) {
+    if (min_size > SIZE_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
         av_freep(p);
         *size = 0;
         return;
     }
-    if (!ff_fast_malloc(p, size, min_size + FF_INPUT_BUFFER_PADDING_SIZE, 1))
-        memset(*p, 0, min_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    if (!ff_fast_malloc(p, size, min_size + AV_INPUT_BUFFER_PADDING_SIZE, 1))
+        memset(*p, 0, min_size + AV_INPUT_BUFFER_PADDING_SIZE);
 }
 
 /* encoder management */
@@ -235,8 +215,8 @@ int ff_set_dimensions(AVCodecContext *s, int width, int height)
 
     s->coded_width  = width;
     s->coded_height = height;
-    s->width        = FF_CEIL_RSHIFT(width,  s->lowres);
-    s->height       = FF_CEIL_RSHIFT(height, s->lowres);
+    s->width        = AV_CEIL_RSHIFT(width,  s->lowres);
+    s->height       = AV_CEIL_RSHIFT(height, s->lowres);
 
     return ret;
 }
@@ -371,6 +351,8 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
     case AV_PIX_FMT_GBRP14BE:
     case AV_PIX_FMT_GBRP16LE:
     case AV_PIX_FMT_GBRP16BE:
+    case AV_PIX_FMT_GBRAP16LE:
+    case AV_PIX_FMT_GBRAP16BE:
         w_align = 16; //FIXME assume 16 pixel per macroblock
         h_align = 16 * 2; // interlaced needs 2 macroblocks height
         break;
@@ -422,7 +404,7 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
         break;
     }
 
-    if (s->codec_id == AV_CODEC_ID_IFF_ILBM || s->codec_id == AV_CODEC_ID_IFF_BYTERUN1) {
+    if (s->codec_id == AV_CODEC_ID_IFF_ILBM) {
         w_align = FFMAX(w_align, 8);
     }
 
@@ -432,6 +414,11 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
         // some of the optimized chroma MC reads one line too much
         // which is also done in mpeg decoders with lowres > 0
         *height += 2;
+
+        // H.264 uses edge emulation for out of frame motion vectors, for this
+        // it requires a temporary area large enough to hold a 21x21 block,
+        // increasing witdth ensure that the temporary area is large enough,
+        // the next rounded up width is 32
         *width = FFMAX(*width, 32);
     }
 
@@ -520,7 +507,8 @@ static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame)
 
     switch (avctx->codec_type) {
     case AVMEDIA_TYPE_VIDEO: {
-        AVPicture picture;
+        uint8_t *data[4];
+        int linesize[4];
         int size[4] = { 0 };
         int w = frame->width;
         int h = frame->height;
@@ -535,27 +523,29 @@ static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame)
         do {
             // NOTE: do not align linesizes individually, this breaks e.g. assumptions
             // that linesize[0] == 2*linesize[1] in the MPEG-encoder for 4:2:2
-            av_image_fill_linesizes(picture.linesize, avctx->pix_fmt, w);
+            ret = av_image_fill_linesizes(linesize, avctx->pix_fmt, w);
+            if (ret < 0)
+                return ret;
             // increase alignment of w for next try (rhs gives the lowest bit set in w)
             w += w & ~(w - 1);
 
             unaligned = 0;
             for (i = 0; i < 4; i++)
-                unaligned |= picture.linesize[i] % pool->stride_align[i];
+                unaligned |= linesize[i] % pool->stride_align[i];
         } while (unaligned);
 
-        tmpsize = av_image_fill_pointers(picture.data, avctx->pix_fmt, h,
-                                         NULL, picture.linesize);
+        tmpsize = av_image_fill_pointers(data, avctx->pix_fmt, h,
+                                         NULL, linesize);
         if (tmpsize < 0)
             return -1;
 
-        for (i = 0; i < 3 && picture.data[i + 1]; i++)
-            size[i] = picture.data[i + 1] - picture.data[i];
-        size[i] = tmpsize - (picture.data[i] - picture.data[0]);
+        for (i = 0; i < 3 && data[i + 1]; i++)
+            size[i] = data[i + 1] - data[i];
+        size[i] = tmpsize - (data[i] - data[0]);
 
         for (i = 0; i < 4; i++) {
             av_buffer_pool_uninit(&pool->pools[i]);
-            pool->linesize[i] = picture.linesize[i];
+            pool->linesize[i] = linesize[i];
             if (size[i]) {
                 pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1,
                                                      CONFIG_MEMORY_POISONING ?
@@ -660,6 +650,7 @@ static int audio_get_buffer(AVCodecContext *avctx, AVFrame *frame)
 static int video_get_buffer(AVCodecContext *s, AVFrame *pic)
 {
     FramePool *pool = s->internal->pool;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pic->format);
     int i;
 
     if (pic->data[0]) {
@@ -667,6 +658,13 @@ static int video_get_buffer(AVCodecContext *s, AVFrame *pic)
         return -1;
     }
 
+    if (!desc) {
+        av_log(s, AV_LOG_ERROR,
+            "Unable to get pixel format descriptor for format %s\n",
+            av_get_pix_fmt_name(pic->format));
+        return AVERROR(EINVAL);
+    }
+
     memset(pic->data, 0, sizeof(pic->data));
     pic->extended_data = pic->data;
 
@@ -683,8 +681,9 @@ static int video_get_buffer(AVCodecContext *s, AVFrame *pic)
         pic->data[i] = NULL;
         pic->linesize[i] = 0;
     }
-    if (pic->data[1] && !pic->data[2])
-        avpriv_set_systematic_pal2((uint32_t *)pic->data[1], s->pix_fmt);
+    if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
+        desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
+        avpriv_set_systematic_pal2((uint32_t *)pic->data[1], pic->format);
 
     if (s->debug & FF_DEBUG_BUFFERS)
         av_log(s, AV_LOG_DEBUG, "default_get_buffer called on pic %p\n", pic);
@@ -695,7 +694,7 @@ static int video_get_buffer(AVCodecContext *s, AVFrame *pic)
     return AVERROR(ENOMEM);
 }
 
-void avpriv_color_frame(AVFrame *frame, const int c[4])
+void ff_color_frame(AVFrame *frame, const int c[4])
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
     int p, y, x;
@@ -705,10 +704,10 @@ void avpriv_color_frame(AVFrame *frame, const int c[4])
     for (p = 0; p<desc->nb_components; p++) {
         uint8_t *dst = frame->data[p];
         int is_chroma = p == 1 || p == 2;
-        int bytes  = is_chroma ? FF_CEIL_RSHIFT(frame->width,  desc->log2_chroma_w) : frame->width;
-        int height = is_chroma ? FF_CEIL_RSHIFT(frame->height, desc->log2_chroma_h) : frame->height;
+        int bytes  = is_chroma ? AV_CEIL_RSHIFT(frame->width,  desc->log2_chroma_w) : frame->width;
+        int height = is_chroma ? AV_CEIL_RSHIFT(frame->height, desc->log2_chroma_h) : frame->height;
         for (y = 0; y < height; y++) {
-            if (desc->comp[0].depth_minus1 >= 8) {
+            if (desc->comp[0].depth >= 9) {
                 for (x = 0; x<bytes; x++)
                     ((uint16_t*)dst)[x] = c[p];
             }else
@@ -725,12 +724,6 @@ int avcodec_default_get_buffer2(AVCodecContext *avctx, AVFrame *frame, int flags
     if ((ret = update_frame_pool(avctx, frame)) < 0)
         return ret;
 
-#if FF_API_GET_BUFFER
-FF_DISABLE_DEPRECATION_WARNINGS
-    frame->type = FF_BUFFER_TYPE_INTERNAL;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     switch (avctx->codec_type) {
     case AVMEDIA_TYPE_VIDEO:
         return video_get_buffer(avctx, frame);
@@ -741,6 +734,18 @@ FF_ENABLE_DEPRECATION_WARNINGS
     }
 }
 
+static int add_metadata_from_side_data(AVPacket *avpkt, AVFrame *frame)
+{
+    int size;
+    const uint8_t *side_metadata;
+
+    AVDictionary **frame_md = avpriv_frame_get_metadatap(frame);
+
+    side_metadata = av_packet_get_side_data(avpkt,
+                                            AV_PKT_DATA_STRINGS_METADATA, &size);
+    return av_packet_unpack_dictionary(side_metadata, size, frame_md);
+}
+
 int ff_init_buffer_info(AVCodecContext *avctx, AVFrame *frame)
 {
     AVPacket *pkt = avctx->internal->pkt;
@@ -774,6 +779,7 @@ int ff_init_buffer_info(AVCodecContext *avctx, AVFrame *frame)
                 memcpy(frame_sd->data, packet_sd, size);
             }
         }
+        add_metadata_from_side_data(pkt, frame);
     } else {
         frame->pkt_pts = AV_NOPTS_VALUE;
         av_frame_set_pkt_pos     (frame, -1);
@@ -838,35 +844,6 @@ int ff_init_buffer_info(AVCodecContext *avctx, AVFrame *frame)
     return 0;
 }
 
-#if FF_API_GET_BUFFER
-FF_DISABLE_DEPRECATION_WARNINGS
-int avcodec_default_get_buffer(AVCodecContext *avctx, AVFrame *frame)
-{
-    return avcodec_default_get_buffer2(avctx, frame, 0);
-}
-
-typedef struct CompatReleaseBufPriv {
-    AVCodecContext avctx;
-    AVFrame frame;
-    uint8_t avframe_padding[1024]; // hack to allow linking to a avutil with larger AVFrame
-} CompatReleaseBufPriv;
-
-static void compat_free_buffer(void *opaque, uint8_t *data)
-{
-    CompatReleaseBufPriv *priv = opaque;
-    if (priv->avctx.release_buffer)
-        priv->avctx.release_buffer(&priv->avctx, &priv->frame);
-    av_freep(&priv);
-}
-
-static void compat_release_buffer(void *opaque, uint8_t *data)
-{
-    AVBufferRef *buf = opaque;
-    av_buffer_unref(&buf);
-}
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
 int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
 {
     return ff_init_buffer_info(avctx, frame);
@@ -886,16 +863,14 @@ static int get_buffer_internal(AVCodecContext *avctx, AVFrame *frame, int flags)
     }
     if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
         if (frame->width <= 0 || frame->height <= 0) {
-            frame->width  = FFMAX(avctx->width,  FF_CEIL_RSHIFT(avctx->coded_width,  avctx->lowres));
-            frame->height = FFMAX(avctx->height, FF_CEIL_RSHIFT(avctx->coded_height, avctx->lowres));
+            frame->width  = FFMAX(avctx->width,  AV_CEIL_RSHIFT(avctx->coded_width,  avctx->lowres));
+            frame->height = FFMAX(avctx->height, AV_CEIL_RSHIFT(avctx->coded_height, avctx->lowres));
             override_dimensions = 0;
         }
     }
     ret = ff_decode_frame_props(avctx, frame);
     if (ret < 0)
         return ret;
-    if ((ret = ff_init_buffer_info(avctx, frame)) < 0)
-        return ret;
 
     if (hwaccel) {
         if (hwaccel->alloc_frame) {
@@ -905,124 +880,6 @@ static int get_buffer_internal(AVCodecContext *avctx, AVFrame *frame, int flags)
     } else
         avctx->sw_pix_fmt = avctx->pix_fmt;
 
-#if FF_API_GET_BUFFER
-FF_DISABLE_DEPRECATION_WARNINGS
-    /*
-     * Wrap an old get_buffer()-allocated buffer in a bunch of AVBuffers.
-     * We wrap each plane in its own AVBuffer. Each of those has a reference to
-     * a dummy AVBuffer as its private data, unreffing it on free.
-     * When all the planes are freed, the dummy buffer's free callback calls
-     * release_buffer().
-     */
-    if (avctx->get_buffer) {
-        CompatReleaseBufPriv *priv = NULL;
-        AVBufferRef *dummy_buf = NULL;
-        int planes, i, ret;
-
-        if (flags & AV_GET_BUFFER_FLAG_REF)
-            frame->reference    = 1;
-
-        ret = avctx->get_buffer(avctx, frame);
-        if (ret < 0)
-            return ret;
-
-        /* return if the buffers are already set up
-         * this would happen e.g. when a custom get_buffer() calls
-         * avcodec_default_get_buffer
-         */
-        if (frame->buf[0])
-            goto end0;
-
-        priv = av_mallocz(sizeof(*priv));
-        if (!priv) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
-        priv->avctx = *avctx;
-        priv->frame = *frame;
-
-        dummy_buf = av_buffer_create(NULL, 0, compat_free_buffer, priv, 0);
-        if (!dummy_buf) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
-
-#define WRAP_PLANE(ref_out, data, data_size)                            \
-do {                                                                    \
-    AVBufferRef *dummy_ref = av_buffer_ref(dummy_buf);                  \
-    if (!dummy_ref) {                                                   \
-        ret = AVERROR(ENOMEM);                                          \
-        goto fail;                                                      \
-    }                                                                   \
-    ref_out = av_buffer_create(data, data_size, compat_release_buffer,  \
-                               dummy_ref, 0);                           \
-    if (!ref_out) {                                                     \
-        av_buffer_unref(&dummy_ref);                                    \
-        av_frame_unref(frame);                                          \
-        ret = AVERROR(ENOMEM);                                          \
-        goto fail;                                                      \
-    }                                                                   \
-} while (0)
-
-        if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
-            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
-
-            planes = av_pix_fmt_count_planes(frame->format);
-            /* workaround for AVHWAccel plane count of 0, buf[0] is used as
-               check for allocated buffers: make libavcodec happy */
-            if (desc && desc->flags & AV_PIX_FMT_FLAG_HWACCEL)
-                planes = 1;
-            if (!desc || planes <= 0) {
-                ret = AVERROR(EINVAL);
-                goto fail;
-            }
-
-            for (i = 0; i < planes; i++) {
-                int v_shift    = (i == 1 || i == 2) ? desc->log2_chroma_h : 0;
-                int plane_size = (frame->height >> v_shift) * frame->linesize[i];
-
-                WRAP_PLANE(frame->buf[i], frame->data[i], plane_size);
-            }
-        } else {
-            int planar = av_sample_fmt_is_planar(frame->format);
-            planes = planar ? avctx->channels : 1;
-
-            if (planes > FF_ARRAY_ELEMS(frame->buf)) {
-                frame->nb_extended_buf = planes - FF_ARRAY_ELEMS(frame->buf);
-                frame->extended_buf = av_malloc_array(sizeof(*frame->extended_buf),
-                                                frame->nb_extended_buf);
-                if (!frame->extended_buf) {
-                    ret = AVERROR(ENOMEM);
-                    goto fail;
-                }
-            }
-
-            for (i = 0; i < FFMIN(planes, FF_ARRAY_ELEMS(frame->buf)); i++)
-                WRAP_PLANE(frame->buf[i], frame->extended_data[i], frame->linesize[0]);
-
-            for (i = 0; i < frame->nb_extended_buf; i++)
-                WRAP_PLANE(frame->extended_buf[i],
-                           frame->extended_data[i + FF_ARRAY_ELEMS(frame->buf)],
-                           frame->linesize[0]);
-        }
-
-        av_buffer_unref(&dummy_buf);
-
-end0:
-        frame->width  = avctx->width;
-        frame->height = avctx->height;
-
-        return 0;
-
-fail:
-        avctx->release_buffer(avctx, frame);
-        av_freep(&priv);
-        av_buffer_unref(&dummy_buf);
-        return ret;
-    }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     ret = avctx->get_buffer2(avctx, frame, flags);
 
 end:
@@ -1037,8 +894,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
 int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
 {
     int ret = get_buffer_internal(avctx, frame, flags);
-    if (ret < 0)
+    if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        frame->width = frame->height = 0;
+    }
     return ret;
 }
 
@@ -1089,21 +948,6 @@ int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame)
     return ret;
 }
 
-#if FF_API_GET_BUFFER
-void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic)
-{
-    av_assert0(s->codec_type == AVMEDIA_TYPE_VIDEO);
-
-    av_frame_unref(pic);
-}
-
-int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic)
-{
-    av_assert0(0);
-    return AVERROR_BUG;
-}
-#endif
-
 int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2), void *arg, int *ret, int count, int size)
 {
     int i;
@@ -1171,6 +1015,11 @@ static int setup_hwaccel(AVCodecContext *avctx,
     AVHWAccel *hwa = find_hwaccel(avctx->codec_id, fmt);
     int ret        = 0;
 
+    if (avctx->active_thread_type & FF_THREAD_FRAME) {
+        av_log(avctx, AV_LOG_WARNING,
+               "Hardware accelerated decoding with frame threading is known to be unstable and its use is discouraged.\n");
+    }
+
     if (!hwa) {
         av_log(avctx, AV_LOG_ERROR,
                "Could not find an AVHWAccel for the pixel format: %s",
@@ -1178,6 +1027,13 @@ static int setup_hwaccel(AVCodecContext *avctx,
         return AVERROR(ENOENT);
     }
 
+    if (hwa->capabilities & HWACCEL_CODEC_CAP_EXPERIMENTAL &&
+        avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+        av_log(avctx, AV_LOG_WARNING, "Ignoring experimental hwaccel: %s\n",
+               hwa->name);
+        return AVERROR_PATCHWELCOME;
+    }
+
     if (hwa->priv_data_size) {
         avctx->internal->hwaccel_priv_data = av_mallocz(hwa->priv_data_size);
         if (!avctx->internal->hwaccel_priv_data)
@@ -1233,8 +1089,10 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
 
         if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL))
             break;
-        if (avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+#if FF_API_CAP_VDPAU
+        if (avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU)
             break;
+#endif
 
         if (!setup_hwaccel(avctx, ret, desc->name))
             break;
@@ -1252,37 +1110,17 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
     return ret;
 }
 
-#if FF_API_AVFRAME_LAVC
-void avcodec_get_frame_defaults(AVFrame *frame)
-{
-#if LIBAVCODEC_VERSION_MAJOR >= 55
-     // extended_data should explicitly be freed when needed, this code is unsafe currently
-     // also this is not compatible to the <55 ABI/API
-    if (frame->extended_data != frame->data && 0)
-        av_freep(&frame->extended_data);
-#endif
-
-    memset(frame, 0, sizeof(AVFrame));
-    av_frame_unref(frame);
-}
-
-AVFrame *avcodec_alloc_frame(void)
-{
-    return av_frame_alloc();
-}
-
-void avcodec_free_frame(AVFrame **frame)
-{
-    av_frame_free(frame);
-}
-#endif
-
 MAKE_ACCESSORS(AVCodecContext, codec, AVRational, pkt_timebase)
 MAKE_ACCESSORS(AVCodecContext, codec, const AVCodecDescriptor *, codec_descriptor)
 MAKE_ACCESSORS(AVCodecContext, codec, int, lowres)
 MAKE_ACCESSORS(AVCodecContext, codec, int, seek_preroll)
 MAKE_ACCESSORS(AVCodecContext, codec, uint16_t*, chroma_intra_matrix)
 
+unsigned av_codec_get_codec_properties(const AVCodecContext *codec)
+{
+    return codec->properties;
+}
+
 int av_codec_get_max_lowres(const AVCodec *codec)
 {
     return codec->max_lowres;
@@ -1294,9 +1132,9 @@ static void get_subtitle_defaults(AVSubtitle *sub)
     sub->pts = AV_NOPTS_VALUE;
 }
 
-static int get_bit_rate(AVCodecContext *ctx)
+static int64_t get_bit_rate(AVCodecContext *ctx)
 {
-    int bit_rate;
+    int64_t bit_rate;
     int bits_per_sample;
 
     switch (ctx->codec_type) {
@@ -1308,7 +1146,7 @@ static int get_bit_rate(AVCodecContext *ctx)
         break;
     case AVMEDIA_TYPE_AUDIO:
         bits_per_sample = av_get_bits_per_sample(ctx->codec_id);
-        bit_rate = bits_per_sample ? ctx->sample_rate * ctx->channels * bits_per_sample : ctx->bit_rate;
+        bit_rate = bits_per_sample ? ctx->sample_rate * (int64_t)ctx->channels * bits_per_sample : ctx->bit_rate;
         break;
     default:
         bit_rate = 0;
@@ -1321,7 +1159,7 @@ int attribute_align_arg ff_codec_open2_recursive(AVCodecContext *avctx, const AV
 {
     int ret = 0;
 
-    ff_unlock_avcodec();
+    ff_unlock_avcodec(codec);
 
     ret = avcodec_open2(avctx, codec, options);
 
@@ -1333,6 +1171,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
 {
     int ret = 0;
     AVDictionary *tmp = NULL;
+    const AVPixFmtDescriptor *pixdesc;
 
     if (avcodec_is_open(avctx))
         return 0;
@@ -1398,14 +1237,14 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
         goto free_and_end;
 
     if (avctx->codec_whitelist && av_match_list(codec->name, avctx->codec_whitelist, ',') <= 0) {
-        av_log(avctx, AV_LOG_ERROR, "Codec (%s) not on whitelist\n", codec->name);
+        av_log(avctx, AV_LOG_ERROR, "Codec (%s) not on whitelist \'%s\'\n", codec->name, avctx->codec_whitelist);
         ret = AVERROR(EINVAL);
         goto free_and_end;
     }
 
-    // only call ff_set_dimensions() for non H.264/VP6F codecs so as not to overwrite previously setup dimensions
+    // only call ff_set_dimensions() for non H.264/VP6F/DXV codecs so as not to overwrite previously setup dimensions
     if (!(avctx->coded_width && avctx->coded_height && avctx->width && avctx->height &&
-          (avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_VP6F))) {
+          (avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_VP6F || avctx->codec_id == AV_CODEC_ID_DXV))) {
     if (avctx->coded_width && avctx->coded_height)
         ret = ff_set_dimensions(avctx, avctx->coded_width, avctx->coded_height);
     else if (avctx->width && avctx->height)
@@ -1456,7 +1295,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
     avctx->frame_number = 0;
     avctx->codec_descriptor = avcodec_descriptor_get(avctx->codec_id);
 
-    if (avctx->codec->capabilities & CODEC_CAP_EXPERIMENTAL &&
+    if ((avctx->codec->capabilities & AV_CODEC_CAP_EXPERIMENTAL) &&
         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
         const char *codec_string = av_codec_is_encoder(codec) ? "encoder" : "decoder";
         AVCodec *codec2;
@@ -1465,7 +1304,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
                "add '-strict %d' if you want to use it.\n",
                codec_string, codec->name, FF_COMPLIANCE_EXPERIMENTAL);
         codec2 = av_codec_is_encoder(codec) ? avcodec_find_encoder(codec->id) : avcodec_find_decoder(codec->id);
-        if (!(codec2->capabilities & CODEC_CAP_EXPERIMENTAL))
+        if (!(codec2->capabilities & AV_CODEC_CAP_EXPERIMENTAL))
             av_log(avctx, AV_LOG_ERROR, "Alternatively use the non experimental %s '%s'.\n",
                 codec_string, codec2->name);
         ret = AVERROR_EXPERIMENTAL;
@@ -1481,8 +1320,8 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
     if (!HAVE_THREADS)
         av_log(avctx, AV_LOG_WARNING, "Warning: not compiled with thread support, using thread emulation\n");
 
-    if (CONFIG_FRAME_THREAD_ENCODER) {
-        ff_unlock_avcodec(); //we will instanciate a few encoders thus kick the counter to prevent false detection of a problem
+    if (CONFIG_FRAME_THREAD_ENCODER && av_codec_is_encoder(avctx->codec)) {
+        ff_unlock_avcodec(codec); //we will instantiate a few encoders thus kick the counter to prevent false detection of a problem
         ret = ff_frame_thread_encoder_init(avctx, options ? *options : NULL);
         ff_lock_avcodec(avctx, codec);
         if (ret < 0)
@@ -1496,7 +1335,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
             goto free_and_end;
         }
     }
-    if (!HAVE_THREADS && !(codec->capabilities & CODEC_CAP_AUTO_THREADS))
+    if (!HAVE_THREADS && !(codec->capabilities & AV_CODEC_CAP_AUTO_THREADS))
         avctx->thread_count = 1;
 
     if (avctx->codec->max_lowres < avctx->lowres || avctx->lowres < 0) {
@@ -1514,6 +1353,15 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
 
     if (av_codec_is_encoder(avctx->codec)) {
         int i;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+        avctx->coded_frame = av_frame_alloc();
+        if (!avctx->coded_frame) {
+            ret = AVERROR(ENOMEM);
+            goto free_and_end;
+        }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
         if (avctx->codec->sample_fmts) {
             for (i = 0; avctx->codec->sample_fmts[i] != AV_SAMPLE_FMT_NONE; i++) {
                 if (avctx->sample_fmt == avctx->codec->sample_fmts[i])
@@ -1566,6 +1414,12 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
                 goto free_and_end;
             }
         }
+        if (avctx->sample_rate < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Specified sample rate %d is not supported\n",
+                    avctx->sample_rate);
+            ret = AVERROR(EINVAL);
+            goto free_and_end;
+        }
         if (avctx->codec->channel_layouts) {
             if (!avctx->channel_layout) {
                 av_log(avctx, AV_LOG_WARNING, "Channel layout not specified\n");
@@ -1596,7 +1450,20 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
         } else if (avctx->channel_layout) {
             avctx->channels = av_get_channel_layout_nb_channels(avctx->channel_layout);
         }
+        if (avctx->channels < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Specified number of channels %d is not supported\n",
+                    avctx->channels);
+            ret = AVERROR(EINVAL);
+            goto free_and_end;
+        }
         if(avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+            pixdesc = av_pix_fmt_desc_get(avctx->pix_fmt);
+            if (    avctx->bits_per_raw_sample < 0
+                || (avctx->bits_per_raw_sample > 8 && pixdesc->comp[0].depth <= 8)) {
+                av_log(avctx, AV_LOG_WARNING, "Specified bit depth %d not possible with the specified pixel formats depth %d\n",
+                    avctx->bits_per_raw_sample, pixdesc->comp[0].depth);
+                avctx->bits_per_raw_sample = pixdesc->comp[0].depth;
+            }
             if (avctx->width <= 0 || avctx->height <= 0) {
                 av_log(avctx, AV_LOG_ERROR, "dimensions not set\n");
                 ret = AVERROR(EINVAL);
@@ -1605,11 +1472,21 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
         }
         if (   (avctx->codec_type == AVMEDIA_TYPE_VIDEO || avctx->codec_type == AVMEDIA_TYPE_AUDIO)
             && avctx->bit_rate>0 && avctx->bit_rate<1000) {
-            av_log(avctx, AV_LOG_WARNING, "Bitrate %d is extremely low, maybe you mean %dk\n", avctx->bit_rate, avctx->bit_rate);
+            av_log(avctx, AV_LOG_WARNING, "Bitrate %"PRId64" is extremely low, maybe you mean %"PRId64"k\n", (int64_t)avctx->bit_rate, (int64_t)avctx->bit_rate);
         }
 
         if (!avctx->rc_initial_buffer_occupancy)
             avctx->rc_initial_buffer_occupancy = avctx->rc_buffer_size * 3 / 4;
+
+        if (avctx->ticks_per_frame && avctx->time_base.num &&
+            avctx->ticks_per_frame > INT_MAX / avctx->time_base.num) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "ticks_per_frame %d too large for the timebase %d/%d.",
+                   avctx->ticks_per_frame,
+                   avctx->time_base.num,
+                   avctx->time_base.den);
+            goto free_and_end;
+        }
     }
 
     avctx->pts_correction_num_faulty_pts =
@@ -1617,7 +1494,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
     avctx->pts_correction_last_pts =
     avctx->pts_correction_last_dts = INT64_MIN;
 
-    if (   !CONFIG_GRAY && avctx->flags & CODEC_FLAG_GRAY
+    if (   !CONFIG_GRAY && avctx->flags & AV_CODEC_FLAG_GRAY
         && avctx->codec_descriptor->type == AVMEDIA_TYPE_VIDEO)
         av_log(avctx, AV_LOG_WARNING,
                "gray decoding requested but not enabled at configuration time\n");
@@ -1708,7 +1585,7 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
     }
 
 end:
-    ff_unlock_avcodec();
+    ff_unlock_avcodec(codec);
     if (options) {
         av_dict_free(options);
         *options = tmp;
@@ -1724,6 +1601,12 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
         av_opt_free(avctx->priv_data);
     av_opt_free(avctx);
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    av_frame_free(&avctx->coded_frame);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     av_dict_free(&tmp);
     av_freep(&avctx->priv_data);
     if (avctx->internal) {
@@ -1735,39 +1618,29 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
     goto end;
 }
 
-int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size)
+int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
 {
     if (avpkt->size < 0) {
         av_log(avctx, AV_LOG_ERROR, "Invalid negative user packet size %d\n", avpkt->size);
         return AVERROR(EINVAL);
     }
-    if (size < 0 || size > INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE) {
+    if (size < 0 || size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
         av_log(avctx, AV_LOG_ERROR, "Invalid minimum required packet size %"PRId64" (max allowed is %d)\n",
-               size, INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE);
+               size, INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE);
         return AVERROR(EINVAL);
     }
 
-    if (avctx) {
+    if (avctx && 2*min_size < size) { // FIXME The factor needs to be finetuned
         av_assert0(!avpkt->data || avpkt->data != avctx->internal->byte_buffer);
         if (!avpkt->data || avpkt->size < size) {
             av_fast_padded_malloc(&avctx->internal->byte_buffer, &avctx->internal->byte_buffer_size, size);
             avpkt->data = avctx->internal->byte_buffer;
             avpkt->size = avctx->internal->byte_buffer_size;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-            avpkt->destruct = NULL;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
         }
     }
 
     if (avpkt->data) {
         AVBufferRef *buf = avpkt->buf;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-        void *destruct = avpkt->destruct;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
 
         if (avpkt->size < size) {
             av_log(avctx, AV_LOG_ERROR, "User packet is too small (%d < %"PRId64")\n", avpkt->size, size);
@@ -1775,11 +1648,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
         }
 
         av_init_packet(avpkt);
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-        avpkt->destruct = destruct;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
         avpkt->buf      = buf;
         avpkt->size     = size;
         return 0;
@@ -1793,7 +1661,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
 int ff_alloc_packet(AVPacket *avpkt, int size)
 {
-    return ff_alloc_packet2(NULL, avpkt, size);
+    return ff_alloc_packet2(NULL, avpkt, size, 0);
 }
 
 /**
@@ -1849,8 +1717,8 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
 
     *got_packet_ptr = 0;
 
-    if (!(avctx->codec->capabilities & CODEC_CAP_DELAY) && !frame) {
-        av_free_packet(avpkt);
+    if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY) && !frame) {
+        av_packet_unref(avpkt);
         av_init_packet(avpkt);
         return 0;
     }
@@ -1884,13 +1752,13 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
 
     /* check for valid frame size */
     if (frame) {
-        if (avctx->codec->capabilities & CODEC_CAP_SMALL_LAST_FRAME) {
+        if (avctx->codec->capabilities & AV_CODEC_CAP_SMALL_LAST_FRAME) {
             if (frame->nb_samples > avctx->frame_size) {
                 av_log(avctx, AV_LOG_ERROR, "more samples than frame size (avcodec_encode_audio2)\n");
                 ret = AVERROR(EINVAL);
                 goto end;
             }
-        } else if (!(avctx->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)) {
+        } else if (!(avctx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)) {
             if (frame->nb_samples < avctx->frame_size &&
                 !avctx->internal->last_audio_frame) {
                 ret = pad_last_frame(avctx, &padded_frame, frame);
@@ -1909,10 +1777,12 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
         }
     }
 
+    av_assert0(avctx->codec->encode2);
+
     ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
     if (!ret) {
         if (*got_packet_ptr) {
-            if (!(avctx->codec->capabilities & CODEC_CAP_DELAY)) {
+            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY)) {
                 if (avpkt->pts == AV_NOPTS_VALUE)
                     avpkt->pts = frame->pts;
                 if (!avpkt->duration)
@@ -1936,11 +1806,6 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
             }
             avpkt->buf      = user_pkt.buf;
             avpkt->data     = user_pkt.data;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-            avpkt->destruct = user_pkt.destruct;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
         } else {
             if (av_dup_packet(avpkt) < 0) {
                 ret = AVERROR(ENOMEM);
@@ -1950,7 +1815,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
     if (!ret) {
         if (needs_realloc && avpkt->data) {
-            ret = av_buffer_realloc(&avpkt->buf, avpkt->size + FF_INPUT_BUFFER_PADDING_SIZE);
+            ret = av_buffer_realloc(&avpkt->buf, avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (ret >= 0)
                 avpkt->data = avpkt->buf->data;
         }
@@ -1959,7 +1824,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
     }
 
     if (ret < 0 || !*got_packet_ptr) {
-        av_free_packet(avpkt);
+        av_packet_unref(avpkt);
         av_init_packet(avpkt);
         goto end;
     }
@@ -1980,126 +1845,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
     return ret;
 }
 
-#if FF_API_OLD_ENCODE_AUDIO
-int attribute_align_arg avcodec_encode_audio(AVCodecContext *avctx,
-                                             uint8_t *buf, int buf_size,
-                                             const short *samples)
-{
-    AVPacket pkt;
-    AVFrame *frame;
-    int ret, samples_size, got_packet;
-
-    av_init_packet(&pkt);
-    pkt.data = buf;
-    pkt.size = buf_size;
-
-    if (samples) {
-        frame = av_frame_alloc();
-        if (!frame)
-            return AVERROR(ENOMEM);
-
-        if (avctx->frame_size) {
-            frame->nb_samples = avctx->frame_size;
-        } else {
-            /* if frame_size is not set, the number of samples must be
-             * calculated from the buffer size */
-            int64_t nb_samples;
-            if (!av_get_bits_per_sample(avctx->codec_id)) {
-                av_log(avctx, AV_LOG_ERROR, "avcodec_encode_audio() does not "
-                                            "support this codec\n");
-                av_frame_free(&frame);
-                return AVERROR(EINVAL);
-            }
-            nb_samples = (int64_t)buf_size * 8 /
-                         (av_get_bits_per_sample(avctx->codec_id) *
-                          avctx->channels);
-            if (nb_samples >= INT_MAX) {
-                av_frame_free(&frame);
-                return AVERROR(EINVAL);
-            }
-            frame->nb_samples = nb_samples;
-        }
-
-        /* it is assumed that the samples buffer is large enough based on the
-         * relevant parameters */
-        samples_size = av_samples_get_buffer_size(NULL, avctx->channels,
-                                                  frame->nb_samples,
-                                                  avctx->sample_fmt, 1);
-        if ((ret = avcodec_fill_audio_frame(frame, avctx->channels,
-                                            avctx->sample_fmt,
-                                            (const uint8_t *)samples,
-                                            samples_size, 1)) < 0) {
-            av_frame_free(&frame);
-            return ret;
-        }
-
-        /* fabricate frame pts from sample count.
-         * this is needed because the avcodec_encode_audio() API does not have
-         * a way for the user to provide pts */
-        if (avctx->sample_rate && avctx->time_base.num)
-            frame->pts = ff_samples_to_time_base(avctx,
-                                                 avctx->internal->sample_count);
-        else
-            frame->pts = AV_NOPTS_VALUE;
-        avctx->internal->sample_count += frame->nb_samples;
-    } else {
-        frame = NULL;
-    }
-
-    got_packet = 0;
-    ret = avcodec_encode_audio2(avctx, &pkt, frame, &got_packet);
-    if (!ret && got_packet && avctx->coded_frame) {
-        avctx->coded_frame->pts       = pkt.pts;
-        avctx->coded_frame->key_frame = !!(pkt.flags & AV_PKT_FLAG_KEY);
-    }
-    /* free any side data since we cannot return it */
-    av_packet_free_side_data(&pkt);
-
-    if (frame && frame->extended_data != frame->data)
-        av_freep(&frame->extended_data);
-
-    av_frame_free(&frame);
-    return ret ? ret : pkt.size;
-}
-
-#endif
-
-#if FF_API_OLD_ENCODE_VIDEO
-int attribute_align_arg avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
-                                             const AVFrame *pict)
-{
-    AVPacket pkt;
-    int ret, got_packet = 0;
-
-    if (buf_size < FF_MIN_BUFFER_SIZE) {
-        av_log(avctx, AV_LOG_ERROR, "buffer smaller than minimum size\n");
-        return -1;
-    }
-
-    av_init_packet(&pkt);
-    pkt.data = buf;
-    pkt.size = buf_size;
-
-    ret = avcodec_encode_video2(avctx, &pkt, pict, &got_packet);
-    if (!ret && got_packet && avctx->coded_frame) {
-        avctx->coded_frame->pts       = pkt.pts;
-        avctx->coded_frame->key_frame = !!(pkt.flags & AV_PKT_FLAG_KEY);
-    }
-
-    /* free any side data since we cannot return it */
-    if (pkt.side_data_elems > 0) {
-        int i;
-        for (i = 0; i < pkt.side_data_elems; i++)
-            av_free(pkt.side_data[i].data);
-        av_freep(&pkt.side_data);
-        pkt.side_data_elems = 0;
-    }
-
-    return ret ? ret : pkt.size;
-}
-
-#endif
-
 int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
                                               AVPacket *avpkt,
                                               const AVFrame *frame,
@@ -2115,11 +1860,11 @@ int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
        avctx->internal->frame_thread_encoder && (avctx->active_thread_type&FF_THREAD_FRAME))
         return ff_thread_video_encode_frame(avctx, avpkt, frame, got_packet_ptr);
 
-    if ((avctx->flags&CODEC_FLAG_PASS1) && avctx->stats_out)
+    if ((avctx->flags&AV_CODEC_FLAG_PASS1) && avctx->stats_out)
         avctx->stats_out[0] = '\0';
 
-    if (!(avctx->codec->capabilities & CODEC_CAP_DELAY) && !frame) {
-        av_free_packet(avpkt);
+    if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY) && !frame) {
+        av_packet_unref(avpkt);
         av_init_packet(avpkt);
         avpkt->size = 0;
         return 0;
@@ -2150,11 +1895,6 @@ int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
             }
             avpkt->buf      = user_pkt.buf;
             avpkt->data     = user_pkt.data;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-            avpkt->destruct = user_pkt.destruct;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
         } else {
             if (av_dup_packet(avpkt) < 0) {
                 ret = AVERROR(ENOMEM);
@@ -2165,11 +1905,11 @@ FF_ENABLE_DEPRECATION_WARNINGS
     if (!ret) {
         if (!*got_packet_ptr)
             avpkt->size = 0;
-        else if (!(avctx->codec->capabilities & CODEC_CAP_DELAY))
+        else if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
             avpkt->pts = avpkt->dts = frame->pts;
 
         if (needs_realloc && avpkt->data) {
-            ret = av_buffer_realloc(&avpkt->buf, avpkt->size + FF_INPUT_BUFFER_PADDING_SIZE);
+            ret = av_buffer_realloc(&avpkt->buf, avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (ret >= 0)
                 avpkt->data = avpkt->buf->data;
         }
@@ -2178,9 +1918,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
     }
 
     if (ret < 0 || !*got_packet_ptr)
-        av_free_packet(avpkt);
-    else
-        av_packet_merge_side_data(avpkt);
+        av_packet_unref(avpkt);
 
     emms_c();
     return ret;
@@ -2241,12 +1979,13 @@ static int apply_param_change(AVCodecContext *avctx, AVPacket *avpkt)
     int size = 0, ret;
     const uint8_t *data;
     uint32_t flags;
+    int64_t val;
 
     data = av_packet_get_side_data(avpkt, AV_PKT_DATA_PARAM_CHANGE, &size);
     if (!data)
         return 0;
 
-    if (!(avctx->codec->capabilities & CODEC_CAP_PARAM_CHANGE)) {
+    if (!(avctx->codec->capabilities & AV_CODEC_CAP_PARAM_CHANGE)) {
         av_log(avctx, AV_LOG_ERROR, "This decoder does not support parameter "
                "changes, but PARAM_CHANGE side data was sent to it.\n");
         return AVERROR(EINVAL);
@@ -2261,7 +2000,12 @@ static int apply_param_change(AVCodecContext *avctx, AVPacket *avpkt)
     if (flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT) {
         if (size < 4)
             goto fail;
-        avctx->channels = bytestream_get_le32(&data);
+        val = bytestream_get_le32(&data);
+        if (val <= 0 || val > INT_MAX) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid channel count");
+            return AVERROR_INVALIDDATA;
+        }
+        avctx->channels = val;
         size -= 4;
     }
     if (flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT) {
@@ -2273,7 +2017,12 @@ static int apply_param_change(AVCodecContext *avctx, AVPacket *avpkt)
     if (flags & AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE) {
         if (size < 4)
             goto fail;
-        avctx->sample_rate = bytestream_get_le32(&data);
+        val = bytestream_get_le32(&data);
+        if (val <= 0 || val > INT_MAX) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid sample rate");
+            return AVERROR_INVALIDDATA;
+        }
+        avctx->sample_rate = val;
         size -= 4;
     }
     if (flags & AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS) {
@@ -2293,18 +2042,6 @@ static int apply_param_change(AVCodecContext *avctx, AVPacket *avpkt)
     return AVERROR_INVALIDDATA;
 }
 
-static int add_metadata_from_side_data(AVCodecContext *avctx, AVFrame *frame)
-{
-    int size;
-    const uint8_t *side_metadata;
-
-    AVDictionary **frame_md = avpriv_frame_get_metadatap(frame);
-
-    side_metadata = av_packet_get_side_data(avctx->internal->pkt,
-                                            AV_PKT_DATA_STRINGS_METADATA, &size);
-    return av_packet_unpack_dictionary(side_metadata, size, frame_md);
-}
-
 static int unrefcount_frame(AVCodecInternal *avci, AVFrame *frame)
 {
     int ret;
@@ -2373,7 +2110,8 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
 
     av_frame_unref(picture);
 
-    if ((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type & FF_THREAD_FRAME)) {
+    if ((avctx->codec->capabilities & AV_CODEC_CAP_DELAY) || avpkt->size ||
+        (avctx->active_thread_type & FF_THREAD_FRAME)) {
         int did_split = av_packet_split_side_data(&tmp);
         ret = apply_param_change(avctx, &tmp);
         if (ret < 0) {
@@ -2389,21 +2127,21 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
         else {
             ret = avctx->codec->decode(avctx, picture, got_picture_ptr,
                                        &tmp);
-            picture->pkt_dts = avpkt->dts;
+            if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
+                picture->pkt_dts = avpkt->dts;
 
             if(!avctx->has_b_frames){
                 av_frame_set_pkt_pos(picture, avpkt->pos);
             }
             //FIXME these should be under if(!avctx->has_b_frames)
             /* get_buffer is supposed to set frame parameters */
-            if (!(avctx->codec->capabilities & CODEC_CAP_DR1)) {
+            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
                 if (!picture->sample_aspect_ratio.num)    picture->sample_aspect_ratio = avctx->sample_aspect_ratio;
                 if (!picture->width)                      picture->width               = avctx->width;
                 if (!picture->height)                     picture->height              = avctx->height;
                 if (picture->format == AV_PIX_FMT_NONE)   picture->format              = avctx->pix_fmt;
             }
         }
-        add_metadata_from_side_data(avctx, picture);
 
 fail:
         emms_c(); //needed to avoid an emms_c() call before every return;
@@ -2444,59 +2182,6 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
     return ret;
 }
 
-#if FF_API_OLD_DECODE_AUDIO
-int attribute_align_arg avcodec_decode_audio3(AVCodecContext *avctx, int16_t *samples,
-                                              int *frame_size_ptr,
-                                              AVPacket *avpkt)
-{
-    AVFrame *frame = av_frame_alloc();
-    int ret, got_frame = 0;
-
-    if (!frame)
-        return AVERROR(ENOMEM);
-    if (avctx->get_buffer != avcodec_default_get_buffer) {
-        av_log(avctx, AV_LOG_ERROR, "Custom get_buffer() for use with"
-                                    "avcodec_decode_audio3() detected. Overriding with avcodec_default_get_buffer\n");
-        av_log(avctx, AV_LOG_ERROR, "Please port your application to "
-                                    "avcodec_decode_audio4()\n");
-        avctx->get_buffer = avcodec_default_get_buffer;
-        avctx->release_buffer = avcodec_default_release_buffer;
-    }
-
-    ret = avcodec_decode_audio4(avctx, frame, &got_frame, avpkt);
-
-    if (ret >= 0 && got_frame) {
-        int ch, plane_size;
-        int planar    = av_sample_fmt_is_planar(avctx->sample_fmt);
-        int data_size = av_samples_get_buffer_size(&plane_size, avctx->channels,
-                                                   frame->nb_samples,
-                                                   avctx->sample_fmt, 1);
-        if (*frame_size_ptr < data_size) {
-            av_log(avctx, AV_LOG_ERROR, "output buffer size is too small for "
-                                        "the current frame (%d < %d)\n", *frame_size_ptr, data_size);
-            av_frame_free(&frame);
-            return AVERROR(EINVAL);
-        }
-
-        memcpy(samples, frame->extended_data[0], plane_size);
-
-        if (planar && avctx->channels > 1) {
-            uint8_t *out = ((uint8_t *)samples) + plane_size;
-            for (ch = 1; ch < avctx->channels; ch++) {
-                memcpy(out, frame->extended_data[ch], plane_size);
-                out += plane_size;
-            }
-        }
-        *frame_size_ptr = data_size;
-    } else {
-        *frame_size_ptr = 0;
-    }
-    av_frame_free(&frame);
-    return ret;
-}
-
-#endif
-
 int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
                                               AVFrame *frame,
                                               int *got_frame_ptr,
@@ -2520,7 +2205,7 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
 
     av_frame_unref(frame);
 
-    if ((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type & FF_THREAD_FRAME)) {
+    if ((avctx->codec->capabilities & AV_CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type & FF_THREAD_FRAME)) {
         uint8_t *side;
         int side_size;
         uint32_t discard_padding = 0;
@@ -2545,7 +2230,6 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
             frame->pkt_dts = avpkt->dts;
         }
         if (ret >= 0 && *got_frame_ptr) {
-            add_metadata_from_side_data(avctx, frame);
             avctx->frame_number++;
             av_frame_set_best_effort_timestamp(frame,
                                                guess_correct_pts(avctx,
@@ -2571,7 +2255,7 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
             discard_reason = AV_RL8(side + 9);
         }
         if (avctx->internal->skip_samples && *got_frame_ptr &&
-            !(avctx->flags2 & CODEC_FLAG2_SKIP_MANUAL)) {
+            !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
             if(frame->nb_samples <= avctx->internal->skip_samples){
                 *got_frame_ptr = 0;
                 avctx->internal->skip_samples -= frame->nb_samples;
@@ -2601,7 +2285,7 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
         }
 
         if (discard_padding > 0 && discard_padding <= frame->nb_samples && *got_frame_ptr &&
-            !(avctx->flags2 & CODEC_FLAG2_SKIP_MANUAL)) {
+            !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
             if (discard_padding == frame->nb_samples) {
                 *got_frame_ptr = 0;
             } else {
@@ -2620,7 +2304,7 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
             }
         }
 
-        if ((avctx->flags2 & CODEC_FLAG2_SKIP_MANUAL) && *got_frame_ptr) {
+        if ((avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL) && *got_frame_ptr) {
             AVFrameSideData *fside = av_frame_new_side_data(frame, AV_FRAME_DATA_SKIP_SAMPLES, 10);
             if (fside) {
                 AV_WL32(fside->data, avctx->internal->skip_samples);
@@ -2673,7 +2357,7 @@ static int recode_subtitle(AVCodecContext *avctx,
     inb = inpkt->data;
     inl = inpkt->size;
 
-    if (inl >= INT_MAX / UTF8_MAX_BYTES - FF_INPUT_BUFFER_PADDING_SIZE) {
+    if (inl >= INT_MAX / UTF8_MAX_BYTES - AV_INPUT_BUFFER_PADDING_SIZE) {
         av_log(avctx, AV_LOG_ERROR, "Subtitles packet is too big for recoding\n");
         ret = AVERROR(ENOMEM);
         goto end;
@@ -2694,7 +2378,7 @@ static int recode_subtitle(AVCodecContext *avctx,
         ret = FFMIN(AVERROR(errno), -1);
         av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" "
                "from %s to UTF-8\n", inpkt->data, avctx->sub_charenc);
-        av_free_packet(&tmp);
+        av_packet_unref(&tmp);
         goto end;
     }
     outpkt->size -= outl;
@@ -2749,7 +2433,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
     *got_sub_ptr = 0;
     get_subtitle_defaults(sub);
 
-    if ((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size) {
+    if ((avctx->codec->capabilities & AV_CODEC_CAP_DELAY) || avpkt->size) {
         AVPacket pkt_recoded;
         AVPacket tmp = *avpkt;
         int did_split = av_packet_split_side_data(&tmp);
@@ -2762,7 +2446,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
              * remaining bytes should have already been filled with zeros by the
              * original packet allocation anyway. */
             memset(tmp.data + tmp.size, 0,
-                   FFMIN(avpkt->size - tmp.size, FF_INPUT_BUFFER_PADDING_SIZE));
+                   FFMIN(avpkt->size - tmp.size, AV_INPUT_BUFFER_PADDING_SIZE));
         }
 
         pkt_recoded = tmp;
@@ -2772,7 +2456,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
         } else {
             avctx->internal->pkt = &pkt_recoded;
 
-            if (avctx->pkt_timebase.den && avpkt->pts != AV_NOPTS_VALUE)
+            if (avctx->pkt_timebase.num && avpkt->pts != AV_NOPTS_VALUE)
                 sub->pts = av_rescale_q(avpkt->pts,
                                         avctx->pkt_timebase, AV_TIME_BASE_Q);
             ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &pkt_recoded);
@@ -2801,7 +2485,7 @@ int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                 pkt_recoded.side_data = NULL;
                 pkt_recoded.side_data_elems = 0;
 
-                av_free_packet(&pkt_recoded);
+                av_packet_unref(&pkt_recoded);
             }
             if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
                 sub->format = 0;
@@ -2828,10 +2512,10 @@ void avsubtitle_free(AVSubtitle *sub)
     int i;
 
     for (i = 0; i < sub->num_rects; i++) {
-        av_freep(&sub->rects[i]->pict.data[0]);
-        av_freep(&sub->rects[i]->pict.data[1]);
-        av_freep(&sub->rects[i]->pict.data[2]);
-        av_freep(&sub->rects[i]->pict.data[3]);
+        av_freep(&sub->rects[i]->data[0]);
+        av_freep(&sub->rects[i]->data[1]);
+        av_freep(&sub->rects[i]->data[2]);
+        av_freep(&sub->rects[i]->data[3]);
         av_freep(&sub->rects[i]->text);
         av_freep(&sub->rects[i]->ass);
         av_freep(&sub->rects[i]);
@@ -2844,12 +2528,13 @@ void avsubtitle_free(AVSubtitle *sub)
 
 av_cold int avcodec_close(AVCodecContext *avctx)
 {
+    int i;
+
     if (!avctx)
         return 0;
 
     if (avcodec_is_open(avctx)) {
         FramePool *pool = avctx->internal->pool;
-        int i;
         if (CONFIG_FRAME_THREAD_ENCODER &&
             avctx->internal->frame_thread_encoder && avctx->thread_count > 1) {
             ff_frame_thread_encoder_free(avctx);
@@ -2858,7 +2543,6 @@ av_cold int avcodec_close(AVCodecContext *avctx)
             ff_thread_free(avctx);
         if (avctx->codec && avctx->codec->close)
             avctx->codec->close(avctx);
-        avctx->coded_frame = NULL;
         avctx->internal->byte_buffer_size = 0;
         av_freep(&avctx->internal->byte_buffer);
         av_frame_free(&avctx->internal->to_free);
@@ -2873,12 +2557,23 @@ av_cold int avcodec_close(AVCodecContext *avctx)
         av_freep(&avctx->internal);
     }
 
+    for (i = 0; i < avctx->nb_coded_side_data; i++)
+        av_freep(&avctx->coded_side_data[i].data);
+    av_freep(&avctx->coded_side_data);
+    avctx->nb_coded_side_data = 0;
+
     if (avctx->priv_data && avctx->codec && avctx->codec->priv_class)
         av_opt_free(avctx->priv_data);
     av_opt_free(avctx);
     av_freep(&avctx->priv_data);
-    if (av_codec_is_encoder(avctx->codec))
+    if (av_codec_is_encoder(avctx->codec)) {
         av_freep(&avctx->extradata);
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+        av_frame_free(&avctx->coded_frame);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    }
     avctx->codec = NULL;
     avctx->active_thread_type = 0;
 
@@ -2890,25 +2585,6 @@ static enum AVCodecID remap_deprecated_codec_id(enum AVCodecID id)
     switch(id){
         //This is for future deprecatec codec ids, its empty since
         //last major bump but will fill up again over time, please don't remove it
-//         case AV_CODEC_ID_UTVIDEO_DEPRECATED: return AV_CODEC_ID_UTVIDEO;
-        case AV_CODEC_ID_BRENDER_PIX_DEPRECATED         : return AV_CODEC_ID_BRENDER_PIX;
-        case AV_CODEC_ID_OPUS_DEPRECATED                : return AV_CODEC_ID_OPUS;
-        case AV_CODEC_ID_TAK_DEPRECATED                 : return AV_CODEC_ID_TAK;
-        case AV_CODEC_ID_PAF_AUDIO_DEPRECATED           : return AV_CODEC_ID_PAF_AUDIO;
-        case AV_CODEC_ID_PCM_S24LE_PLANAR_DEPRECATED    : return AV_CODEC_ID_PCM_S24LE_PLANAR;
-        case AV_CODEC_ID_PCM_S32LE_PLANAR_DEPRECATED    : return AV_CODEC_ID_PCM_S32LE_PLANAR;
-        case AV_CODEC_ID_ADPCM_VIMA_DEPRECATED          : return AV_CODEC_ID_ADPCM_VIMA;
-        case AV_CODEC_ID_ESCAPE130_DEPRECATED           : return AV_CODEC_ID_ESCAPE130;
-        case AV_CODEC_ID_EXR_DEPRECATED                 : return AV_CODEC_ID_EXR;
-        case AV_CODEC_ID_G2M_DEPRECATED                 : return AV_CODEC_ID_G2M;
-        case AV_CODEC_ID_PAF_VIDEO_DEPRECATED           : return AV_CODEC_ID_PAF_VIDEO;
-        case AV_CODEC_ID_WEBP_DEPRECATED                : return AV_CODEC_ID_WEBP;
-        case AV_CODEC_ID_HEVC_DEPRECATED                : return AV_CODEC_ID_HEVC;
-        case AV_CODEC_ID_MVC1_DEPRECATED                : return AV_CODEC_ID_MVC1;
-        case AV_CODEC_ID_MVC2_DEPRECATED                : return AV_CODEC_ID_MVC2;
-        case AV_CODEC_ID_SANM_DEPRECATED                : return AV_CODEC_ID_SANM;
-        case AV_CODEC_ID_SGIRLE_DEPRECATED              : return AV_CODEC_ID_SGIRLE;
-        case AV_CODEC_ID_VP7_DEPRECATED                 : return AV_CODEC_ID_VP7;
         default                                         : return id;
     }
 }
@@ -2921,7 +2597,7 @@ static AVCodec *find_encdec(enum AVCodecID id, int encoder)
     while (p) {
         if ((encoder ? av_codec_is_encoder(p) : av_codec_is_decoder(p)) &&
             p->id == id) {
-            if (p->capabilities & CODEC_CAP_EXPERIMENTAL && !experimental) {
+            if (p->capabilities & AV_CODEC_CAP_EXPERIMENTAL && !experimental) {
                 experimental = p;
             } else
                 return p;
@@ -3014,8 +2690,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
     const char *codec_type;
     const char *codec_name;
     const char *profile = NULL;
-    const AVCodec *p;
-    int bitrate;
+    int64_t bitrate;
     int new_line = 0;
     AVRational display_aspect_ratio;
     const char *separator = enc->dump_separator ? (const char *)enc->dump_separator : ", ";
@@ -3024,15 +2699,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
         return;
     codec_type = av_get_media_type_string(enc->codec_type);
     codec_name = avcodec_get_name(enc->codec_id);
-    if (enc->profile != FF_PROFILE_UNKNOWN) {
-        if (enc->codec)
-            p = enc->codec;
-        else
-            p = encode ? avcodec_find_encoder(enc->codec_id) :
-                        avcodec_find_decoder(enc->codec_id);
-        if (p)
-            profile = av_get_profile_name(p, enc->profile);
-    }
+    profile = avcodec_profile_name(enc->codec_id, enc->profile);
 
     snprintf(buf, buf_size, "%s: %s", codec_type ? codec_type : "unknown",
              codec_name);
@@ -3068,7 +2735,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
                  "%s", enc->pix_fmt == AV_PIX_FMT_NONE ? "none" :
                      av_get_pix_fmt_name(enc->pix_fmt));
             if (enc->bits_per_raw_sample && enc->pix_fmt != AV_PIX_FMT_NONE &&
-                enc->bits_per_raw_sample <= av_pix_fmt_desc_get(enc->pix_fmt)->comp[0].depth_minus1)
+                enc->bits_per_raw_sample < av_pix_fmt_desc_get(enc->pix_fmt)->comp[0].depth)
                 av_strlcatf(detail, sizeof(detail), "%d bpc, ", enc->bits_per_raw_sample);
             if (enc->color_range != AVCOL_RANGE_UNSPECIFIED)
                 av_strlcatf(detail, sizeof(detail), "%s, ",
@@ -3115,8 +2782,8 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
 
             if (enc->sample_aspect_ratio.num) {
                 av_reduce(&display_aspect_ratio.num, &display_aspect_ratio.den,
-                          enc->width * enc->sample_aspect_ratio.num,
-                          enc->height * enc->sample_aspect_ratio.den,
+                          enc->width * (int64_t)enc->sample_aspect_ratio.num,
+                          enc->height * (int64_t)enc->sample_aspect_ratio.den,
                           1024 * 1024);
                 snprintf(buf + strlen(buf), buf_size - strlen(buf),
                          " [SAR %d:%d DAR %d:%d]",
@@ -3133,6 +2800,13 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
         if (encode) {
             snprintf(buf + strlen(buf), buf_size - strlen(buf),
                      ", q=%d-%d", enc->qmin, enc->qmax);
+        } else {
+            if (enc->properties & FF_CODEC_PROPERTY_CLOSED_CAPTIONS)
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                         ", Closed Captions");
+            if (enc->properties & FF_CODEC_PROPERTY_LOSSLESS)
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                         ", lossless");
         }
         break;
     case AVMEDIA_TYPE_AUDIO:
@@ -3170,20 +2844,20 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
         return;
     }
     if (encode) {
-        if (enc->flags & CODEC_FLAG_PASS1)
+        if (enc->flags & AV_CODEC_FLAG_PASS1)
             snprintf(buf + strlen(buf), buf_size - strlen(buf),
                      ", pass 1");
-        if (enc->flags & CODEC_FLAG_PASS2)
+        if (enc->flags & AV_CODEC_FLAG_PASS2)
             snprintf(buf + strlen(buf), buf_size - strlen(buf),
                      ", pass 2");
     }
     bitrate = get_bit_rate(enc);
     if (bitrate != 0) {
         snprintf(buf + strlen(buf), buf_size - strlen(buf),
-                 ", %d kb/s", bitrate / 1000);
+                 ", %"PRId64" kb/s", bitrate / 1000);
     } else if (enc->rc_max_rate > 0) {
         snprintf(buf + strlen(buf), buf_size - strlen(buf),
-                 ", max. %d kb/s", enc->rc_max_rate / 1000);
+                 ", max. %"PRId64" kb/s", (int64_t)enc->rc_max_rate / 1000);
     }
 }
 
@@ -3200,6 +2874,21 @@ const char *av_get_profile_name(const AVCodec *codec, int profile)
     return NULL;
 }
 
+const char *avcodec_profile_name(enum AVCodecID codec_id, int profile)
+{
+    const AVCodecDescriptor *desc = avcodec_descriptor_get(codec_id);
+    const AVProfile *p;
+
+    if (profile == FF_PROFILE_UNKNOWN || !desc || !desc->profiles)
+        return NULL;
+
+    for (p = desc->profiles; p->profile != FF_PROFILE_UNKNOWN; p++)
+        if (p->profile == profile)
+            return p->name;
+
+    return NULL;
+}
+
 unsigned avcodec_version(void)
 {
 //    av_assert0(AV_CODEC_ID_V410==164);
@@ -3209,11 +2898,6 @@ unsigned avcodec_version(void)
     av_assert0(AV_CODEC_ID_SRT==94216);
     av_assert0(LIBAVCODEC_VERSION_MICRO >= 100);
 
-    av_assert0(CODEC_ID_CLLC == AV_CODEC_ID_CLLC);
-    av_assert0(CODEC_ID_PCM_S8_PLANAR == AV_CODEC_ID_PCM_S8_PLANAR);
-    av_assert0(CODEC_ID_ADPCM_IMA_APC == AV_CODEC_ID_ADPCM_IMA_APC);
-    av_assert0(CODEC_ID_ILBC == AV_CODEC_ID_ILBC);
-    av_assert0(CODEC_ID_SRT == AV_CODEC_ID_SRT);
     return LIBAVCODEC_VERSION_INT;
 }
 
@@ -3254,6 +2938,7 @@ int av_get_exact_bits_per_sample(enum AVCodecID codec_id)
     case AV_CODEC_ID_ADPCM_IMA_WS:
     case AV_CODEC_ID_ADPCM_G722:
     case AV_CODEC_ID_ADPCM_YAMAHA:
+    case AV_CODEC_ID_ADPCM_AICA:
         return 4;
     case AV_CODEC_ID_DSD_LSBF:
     case AV_CODEC_ID_DSD_MSBF:
@@ -3265,6 +2950,7 @@ int av_get_exact_bits_per_sample(enum AVCodecID codec_id)
     case AV_CODEC_ID_PCM_S8_PLANAR:
     case AV_CODEC_ID_PCM_U8:
     case AV_CODEC_ID_PCM_ZORK:
+    case AV_CODEC_ID_SDX2_DPCM:
         return 8;
     case AV_CODEC_ID_PCM_S16BE:
     case AV_CODEC_ID_PCM_S16BE_PLANAR:
@@ -3418,11 +3104,12 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
                 return frame_bytes * 8 / bps;
         }
 
-        if (ch > 0) {
+        if (ch > 0 && ch < INT_MAX/16) {
             /* calc from frame_bytes and channels */
             switch (id) {
             case AV_CODEC_ID_ADPCM_AFC:
                 return frame_bytes / (9 * ch) * 16;
+            case AV_CODEC_ID_ADPCM_PSX:
             case AV_CODEC_ID_ADPCM_DTK:
                 return frame_bytes / (16 * ch) * 28;
             case AV_CODEC_ID_ADPCM_4XM:
@@ -3432,6 +3119,11 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
                 return (frame_bytes - 4) * 2 / ch;
             case AV_CODEC_ID_ADPCM_IMA_AMV:
                 return (frame_bytes - 8) * 2 / ch;
+            case AV_CODEC_ID_ADPCM_THP:
+            case AV_CODEC_ID_ADPCM_THP_LE:
+                if (avctx->extradata)
+                    return frame_bytes * 14 / (8 * ch);
+                break;
             case AV_CODEC_ID_ADPCM_XA:
                 return (frame_bytes / 128) * 224 / ch;
             case AV_CODEC_ID_INTERPLAY_DPCM:
@@ -3621,13 +3313,15 @@ int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op))
 
 int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
 {
+    if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
+        return 0;
+
     if (lockmgr_cb) {
         if ((*lockmgr_cb)(&codec_mutex, AV_LOCK_OBTAIN))
             return -1;
     }
 
-    if (avpriv_atomic_int_add_and_fetch(&entangled_thread_counter, 1) != 1 &&
-        !(codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE)) {
+    if (avpriv_atomic_int_add_and_fetch(&entangled_thread_counter, 1) != 1) {
         av_log(log_ctx, AV_LOG_ERROR,
                "Insufficient thread locking. At least %d threads are "
                "calling avcodec_open2() at the same time right now.\n",
@@ -3635,7 +3329,7 @@ int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
         if (!lockmgr_cb)
             av_log(log_ctx, AV_LOG_ERROR, "No lock manager is set, please see av_lockmgr_register()\n");
         ff_avcodec_locked = 1;
-        ff_unlock_avcodec();
+        ff_unlock_avcodec(codec);
         return AVERROR(EINVAL);
     }
     av_assert0(!ff_avcodec_locked);
@@ -3643,8 +3337,11 @@ int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
     return 0;
 }
 
-int ff_unlock_avcodec(void)
+int ff_unlock_avcodec(const AVCodec *codec)
 {
+    if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
+        return 0;
+
     av_assert0(ff_avcodec_locked);
     ff_avcodec_locked = 0;
     avpriv_atomic_int_add_and_fetch(&entangled_thread_counter, -1);
@@ -3758,26 +3455,6 @@ void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, in
 
 #endif
 
-enum AVMediaType avcodec_get_type(enum AVCodecID codec_id)
-{
-    AVCodec *c= avcodec_find_decoder(codec_id);
-    if(!c)
-        c= avcodec_find_encoder(codec_id);
-    if(c)
-        return c->type;
-
-    if (codec_id <= AV_CODEC_ID_NONE)
-        return AVMEDIA_TYPE_UNKNOWN;
-    else if (codec_id < AV_CODEC_ID_FIRST_AUDIO)
-        return AVMEDIA_TYPE_VIDEO;
-    else if (codec_id < AV_CODEC_ID_FIRST_SUBTITLE)
-        return AVMEDIA_TYPE_AUDIO;
-    else if (codec_id < AV_CODEC_ID_FIRST_UNKNOWN)
-        return AVMEDIA_TYPE_SUBTITLE;
-
-    return AVMEDIA_TYPE_UNKNOWN;
-}
-
 int avcodec_is_open(AVCodecContext *s)
 {
     return !!s->internal;
@@ -3800,7 +3477,7 @@ int avpriv_bprint_to_extradata(AVCodecContext *avctx, struct AVBPrint *buf)
     /* Note: the string is NUL terminated (so extradata can be read as a
      * string), but the ending character is not accounted in the size (in
      * binary formats you are likely not supposed to mux that character). When
-     * extradata is copied, it is also padded with FF_INPUT_BUFFER_PADDING_SIZE
+     * extradata is copied, it is also padded with AV_INPUT_BUFFER_PADDING_SIZE
      * zeros. */
     avctx->extradata_size = buf->len;
     return 0;
@@ -3838,3 +3515,43 @@ const uint8_t *avpriv_find_start_code(const uint8_t *av_restrict p,
 
     return p + 4;
 }
+
+AVCPBProperties *av_cpb_properties_alloc(size_t *size)
+{
+    AVCPBProperties *props = av_mallocz(sizeof(AVCPBProperties));
+    if (!props)
+        return NULL;
+
+    if (size)
+        *size = sizeof(*props);
+
+    props->vbv_delay = UINT64_MAX;
+
+    return props;
+}
+
+AVCPBProperties *ff_add_cpb_side_data(AVCodecContext *avctx)
+{
+    AVPacketSideData *tmp;
+    AVCPBProperties  *props;
+    size_t size;
+
+    props = av_cpb_properties_alloc(&size);
+    if (!props)
+        return NULL;
+
+    tmp = av_realloc_array(avctx->coded_side_data, avctx->nb_coded_side_data + 1, sizeof(*tmp));
+    if (!tmp) {
+        av_freep(&props);
+        return NULL;
+    }
+
+    avctx->coded_side_data = tmp;
+    avctx->nb_coded_side_data++;
+
+    avctx->coded_side_data[avctx->nb_coded_side_data - 1].type = AV_PKT_DATA_CPB_PROPERTIES;
+    avctx->coded_side_data[avctx->nb_coded_side_data - 1].data = (uint8_t*)props;
+    avctx->coded_side_data[avctx->nb_coded_side_data - 1].size = size;
+
+    return props;
+}
diff --git a/libavcodec/utvideo.c b/libavcodec/utvideo.c
index 308adb75..b14e56e0 100644
--- a/libavcodec/utvideo.c
+++ b/libavcodec/utvideo.c
@@ -26,9 +26,11 @@
 
 #include "utvideo.h"
 
+#if FF_API_PRIVATE_OPT
 const int ff_ut_pred_order[5] = {
     PRED_LEFT, PRED_MEDIAN, PRED_MEDIAN, PRED_NONE, PRED_GRADIENT
 };
+#endif
 
 const int ff_ut_rgb_order[4]  = { 1, 2, 0, 3 }; // G, B, R, A
 
diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h
index 78c3ec54..84eec663 100644
--- a/libavcodec/utvideo.h
+++ b/libavcodec/utvideo.h
@@ -65,6 +65,7 @@ extern const int ff_ut_pred_order[5];
 extern const int ff_ut_rgb_order[4];
 
 typedef struct UtvideoContext {
+    const AVClass *class;
     AVCodecContext *avctx;
     BswapDSPContext bdsp;
     HuffYUVEncDSPContext hdsp;
diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
index 3a3c46e0..760d9e5a 100644
--- a/libavcodec/utvideodec.c
+++ b/libavcodec/utvideodec.c
@@ -143,7 +143,7 @@ static int decode_plane(UtvideoContext *c, int plane_no,
 
         memcpy(c->slice_bits, src + slice_data_start + c->slices * 4,
                slice_size);
-        memset(c->slice_bits + slice_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+        memset(c->slice_bits + slice_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
         c->bdsp.bswap_buf((uint32_t *) c->slice_bits,
                           (uint32_t *) c->slice_bits,
                           (slice_data_end - slice_data_start + 3) >> 2);
@@ -385,7 +385,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     }
 
     av_fast_malloc(&c->slice_bits, &c->slice_bits_size,
-                   max_slice_size + FF_INPUT_BUFFER_PADDING_SIZE);
+                   max_slice_size + AV_INPUT_BUFFER_PADDING_SIZE);
 
     if (!c->slice_bits) {
         av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer\n");
@@ -559,5 +559,5 @@ AVCodec ff_utvideo_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c
index 99791ba1..00185cfd 100644
--- a/libavcodec/utvideoenc.c
+++ b/libavcodec/utvideoenc.c
@@ -26,6 +26,8 @@
 
 #include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+
 #include "avcodec.h"
 #include "internal.h"
 #include "bswapdsp.h"
@@ -48,7 +50,6 @@ static av_cold int utvideo_encode_close(AVCodecContext *avctx)
     UtvideoContext *c = avctx->priv_data;
     int i;
 
-    av_frame_free(&avctx->coded_frame);
     av_freep(&c->slice_bits);
     for (i = 0; i < 4; i++)
         av_freep(&c->slice_buffer[i]);
@@ -112,6 +113,8 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
     ff_bswapdsp_init(&c->bdsp);
     ff_huffyuvencdsp_init(&c->hdsp);
 
+#if FF_API_PRIVATE_OPT
+FF_DISABLE_DEPRECATION_WARNINGS
     /* Check the prediction method, and error out if unsupported */
     if (avctx->prediction_method < 0 || avctx->prediction_method > 4) {
         av_log(avctx, AV_LOG_WARNING,
@@ -127,7 +130,10 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
     }
 
     /* Convert from libavcodec prediction type to Ut Video's */
-    c->frame_pred = ff_ut_pred_order[avctx->prediction_method];
+    if (avctx->prediction_method)
+        c->frame_pred = ff_ut_pred_order[avctx->prediction_method];
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     if (c->frame_pred == PRED_GRADIENT) {
         av_log(avctx, AV_LOG_ERROR, "Gradient prediction is not supported.\n");
@@ -154,19 +160,11 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
-    avctx->coded_frame = av_frame_alloc();
-
-    if (!avctx->coded_frame) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
-        utvideo_encode_close(avctx);
-        return AVERROR(ENOMEM);
-    }
-
     /* extradata size is 4 * 32bit */
     avctx->extradata_size = 16;
 
     avctx->extradata = av_mallocz(avctx->extradata_size +
-                                  FF_INPUT_BUFFER_PADDING_SIZE);
+                                  AV_INPUT_BUFFER_PADDING_SIZE);
 
     if (!avctx->extradata) {
         av_log(avctx, AV_LOG_ERROR, "Could not allocate extradata.\n");
@@ -176,7 +174,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx)
 
     for (i = 0; i < c->planes; i++) {
         c->slice_buffer[i] = av_malloc(c->slice_stride * (avctx->height + 2) +
-                                       FF_INPUT_BUFFER_PADDING_SIZE);
+                                       AV_INPUT_BUFFER_PADDING_SIZE);
         if (!c->slice_buffer[i]) {
             av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer 1.\n");
             utvideo_encode_close(avctx);
@@ -545,7 +543,7 @@ static int utvideo_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     /* Allocate a new packet if needed, and set it to the pointer dst */
     ret = ff_alloc_packet2(avctx, pkt, (256 + 4 * c->slices + width * height) *
-                           c->planes + 4);
+                           c->planes + 4, 0);
 
     if (ret < 0)
         return ret;
@@ -622,8 +620,12 @@ static int utvideo_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
      * At least currently Ut Video is IDR only.
      * Set flags accordingly.
      */
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->key_frame = 1;
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     pkt->size   = bytestream2_tell_p(&pb);
     pkt->flags |= AV_PKT_FLAG_KEY;
@@ -634,16 +636,36 @@ static int utvideo_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
+#define OFFSET(x) offsetof(UtvideoContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+{ "pred", "Prediction method", OFFSET(frame_pred), AV_OPT_TYPE_INT, { .i64 = PRED_LEFT }, PRED_NONE, PRED_MEDIAN, VE, "pred" },
+    { "none",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRED_NONE }, INT_MIN, INT_MAX, VE, "pred" },
+    { "left",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRED_LEFT }, INT_MIN, INT_MAX, VE, "pred" },
+    { "gradient", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRED_GRADIENT }, INT_MIN, INT_MAX, VE, "pred" },
+    { "median",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRED_MEDIAN }, INT_MIN, INT_MAX, VE, "pred" },
+
+    { NULL},
+};
+
+static const AVClass utvideo_class = {
+    .class_name = "utvideo",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_utvideo_encoder = {
     .name           = "utvideo",
     .long_name      = NULL_IF_CONFIG_SMALL("Ut Video"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_UTVIDEO,
     .priv_data_size = sizeof(UtvideoContext),
+    .priv_class     = &utvideo_class,
     .init           = utvideo_encode_init,
     .encode2        = utvideo_encode_frame,
     .close          = utvideo_encode_close,
-    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .capabilities   = AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
                           AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA, AV_PIX_FMT_YUV422P,
                           AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE
diff --git a/libavcodec/v210dec.c b/libavcodec/v210dec.c
index f1e968d9..9af9af67 100644
--- a/libavcodec/v210dec.c
+++ b/libavcodec/v210dec.c
@@ -53,10 +53,6 @@ static av_cold int decode_init(AVCodecContext *avctx)
 {
     V210DecContext *s = avctx->priv_data;
 
-    if (avctx->width & 1) {
-        av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n");
-        return AVERROR_INVALIDDATA;
-    }
     avctx->pix_fmt             = AV_PIX_FMT_YUV422P10;
     avctx->bits_per_raw_sample = 10;
 
@@ -141,7 +137,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         }
 
         psrc += stride;
-        y += pic->linesize[0] / 2 - avctx->width;
+        y += pic->linesize[0] / 2 - avctx->width + (avctx->width & 1);
         u += pic->linesize[1] / 2 - avctx->width / 2;
         v += pic->linesize[2] / 2 - avctx->width / 2;
     }
@@ -160,7 +156,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
 #define V210DEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption v210dec_options[] = {
-    {"custom_stride", "Custom V210 stride", offsetof(V210DecContext, custom_stride), FF_OPT_TYPE_INT,
+    {"custom_stride", "Custom V210 stride", offsetof(V210DecContext, custom_stride), AV_OPT_TYPE_INT,
      {.i64 = 0}, INT_MIN, INT_MAX, V210DEC_FLAGS},
     {NULL}
 };
@@ -180,6 +176,6 @@ AVCodec ff_v210_decoder = {
     .priv_data_size = sizeof(V210DecContext),
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .priv_class     = &v210dec_class,
 };
diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c
index 2e0fd43b..00c89dc1 100644
--- a/libavcodec/v210enc.c
+++ b/libavcodec/v210enc.c
@@ -82,6 +82,16 @@ static void v210_planar_pack_10_c(const uint16_t *y, const uint16_t *u,
     }
 }
 
+av_cold void ff_v210enc_init(V210EncContext *s)
+{
+    s->pack_line_8  = v210_planar_pack_8_c;
+    s->pack_line_10 = v210_planar_pack_10_c;
+    s->sample_factor = 1;
+
+    if (ARCH_X86)
+        ff_v210enc_init_x86(s);
+}
+
 static av_cold int encode_init(AVCodecContext *avctx)
 {
     V210EncContext *s = avctx->priv_data;
@@ -91,17 +101,13 @@ static av_cold int encode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
-    s->pack_line_8  = v210_planar_pack_8_c;
-    s->pack_line_10 = v210_planar_pack_10_c;
-
-    if (ARCH_X86)
-        ff_v210enc_init_x86(s);
+    ff_v210enc_init(s);
 
     return 0;
 }
@@ -116,7 +122,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int h, w, ret;
     uint8_t *dst;
 
-    ret = ff_alloc_packet(pkt, avctx->height * stride);
+    ret = ff_alloc_packet2(avctx, pkt, avctx->height * stride, avctx->height * stride);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
         return ret;
@@ -129,13 +135,20 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         const uint16_t *v = (const uint16_t *)pic->data[2];
         for (h = 0; h < avctx->height; h++) {
             uint32_t val;
-            w = (avctx->width / 6) * 6;
+            w = (avctx->width / (6 * s->sample_factor)) * 6 * s->sample_factor;
             s->pack_line_10(y, u, v, dst, w);
 
             y += w;
             u += w >> 1;
             v += w >> 1;
-            dst += (w / 6) * 16;
+            dst += (w / (6 * s->sample_factor)) * 16 * s->sample_factor;
+
+            for (; w < avctx->width - 5; w += 6) {
+                WRITE_PIXELS(u, y, v);
+                WRITE_PIXELS(y, u, y);
+                WRITE_PIXELS(v, y, u);
+                WRITE_PIXELS(y, v, y);
+            }
             if (w < avctx->width - 1) {
                 WRITE_PIXELS(u, y, v);
 
@@ -167,13 +180,13 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         const uint8_t *v = pic->data[2];
         for (h = 0; h < avctx->height; h++) {
             uint32_t val;
-            w = (avctx->width / 12) * 12;
+            w = (avctx->width / (12 * s->sample_factor)) * 12 * s->sample_factor;
             s->pack_line_8(y, u, v, dst, w);
 
             y += w;
             u += w >> 1;
             v += w >> 1;
-            dst += (w / 12) * 32;
+            dst += (w / (12 * s->sample_factor)) * 32 * s->sample_factor;
 
             for (; w < avctx->width - 5; w += 6) {
                 WRITE_PIXELS8(u, y, v);
@@ -213,13 +226,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
-static av_cold int encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-
-    return 0;
-}
-
 AVCodec ff_v210_encoder = {
     .name           = "v210",
     .long_name      = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
@@ -228,6 +234,5 @@ AVCodec ff_v210_encoder = {
     .priv_data_size = sizeof(V210EncContext),
     .init           = encode_init,
     .encode2        = encode_frame,
-    .close          = encode_close,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV422P, AV_PIX_FMT_NONE },
 };
diff --git a/libavcodec/v210enc.h b/libavcodec/v210enc.h
index 9179d735..899a7d95 100644
--- a/libavcodec/v210enc.h
+++ b/libavcodec/v210enc.h
@@ -28,8 +28,12 @@ typedef struct V210EncContext {
                         const uint8_t *v, uint8_t *dst, ptrdiff_t width);
     void (*pack_line_10)(const uint16_t *y, const uint16_t *u,
                          const uint16_t *v, uint8_t *dst, ptrdiff_t width);
+    int sample_factor; /* This value must be the same for both 8-and 10-bit
+                          functions otherwise the output will be incorrect. */
 } V210EncContext;
 
+void ff_v210enc_init(V210EncContext *s);
+
 void ff_v210enc_init_x86(V210EncContext *s);
 
 #endif /* AVCODEC_V210ENC_H */
diff --git a/libavcodec/v210x.c b/libavcodec/v210x.c
index 63307157..f6a453aa 100644
--- a/libavcodec/v210x.c
+++ b/libavcodec/v210x.c
@@ -127,5 +127,5 @@ AVCodec ff_v210x_decoder = {
     .id             = AV_CODEC_ID_V210X,
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/v308dec.c b/libavcodec/v308dec.c
index 1d31f0a9..dd53fbde 100644
--- a/libavcodec/v308dec.c
+++ b/libavcodec/v308dec.c
@@ -79,5 +79,5 @@ AVCodec ff_v308_decoder = {
     .id           = AV_CODEC_ID_V308,
     .init         = v308_decode_init,
     .decode       = v308_decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/v308enc.c b/libavcodec/v308enc.c
index 408784b0..b60a72ce 100644
--- a/libavcodec/v308enc.c
+++ b/libavcodec/v308enc.c
@@ -31,13 +31,6 @@ static av_cold int v308_encode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    avctx->coded_frame = av_frame_alloc();
-
-    if (!avctx->coded_frame) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
-        return AVERROR(ENOMEM);
-    }
-
     return 0;
 }
 
@@ -48,13 +41,10 @@ static int v308_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint8_t *y, *u, *v;
     int i, j, ret;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 3)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 3, 0)) < 0)
         return ret;
     dst = pkt->data;
 
-    avctx->coded_frame->key_frame = 1;
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-
     y = pic->data[0];
     u = pic->data[1];
     v = pic->data[2];
@@ -77,8 +67,6 @@ static int v308_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
 static av_cold int v308_encode_close(AVCodecContext *avctx)
 {
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
@@ -91,4 +79,5 @@ AVCodec ff_v308_encoder = {
     .encode2      = v308_encode_frame,
     .close        = v308_encode_close,
     .pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV444P, AV_PIX_FMT_NONE },
+    .capabilities = AV_CODEC_CAP_INTRA_ONLY,
 };
diff --git a/libavcodec/v408dec.c b/libavcodec/v408dec.c
index be442faa..acff95d6 100644
--- a/libavcodec/v408dec.c
+++ b/libavcodec/v408dec.c
@@ -87,7 +87,7 @@ AVCodec ff_ayuv_decoder = {
     .id           = AV_CODEC_ID_AYUV,
     .init         = v408_decode_init,
     .decode       = v408_decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
 #endif
 #if CONFIG_V408_DECODER
@@ -98,6 +98,6 @@ AVCodec ff_v408_decoder = {
     .id           = AV_CODEC_ID_V408,
     .init         = v408_decode_init,
     .decode       = v408_decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
 #endif
diff --git a/libavcodec/v408enc.c b/libavcodec/v408enc.c
index cdb2efaf..f37f360b 100644
--- a/libavcodec/v408enc.c
+++ b/libavcodec/v408enc.c
@@ -26,12 +26,6 @@
 
 static av_cold int v408_encode_init(AVCodecContext *avctx)
 {
-    avctx->coded_frame = av_frame_alloc();
-
-    if (!avctx->coded_frame) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
-        return AVERROR(ENOMEM);
-    }
 
     return 0;
 }
@@ -43,13 +37,10 @@ static int v408_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint8_t *y, *u, *v, *a;
     int i, j, ret;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 4)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 4, 0)) < 0)
         return ret;
     dst = pkt->data;
 
-    avctx->coded_frame->key_frame = 1;
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-
     y = pic->data[0];
     u = pic->data[1];
     v = pic->data[2];
@@ -82,8 +73,6 @@ static int v408_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
 static av_cold int v408_encode_close(AVCodecContext *avctx)
 {
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
@@ -97,6 +86,7 @@ AVCodec ff_ayuv_encoder = {
     .encode2      = v408_encode_frame,
     .close        = v408_encode_close,
     .pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE },
+    .capabilities = AV_CODEC_CAP_INTRA_ONLY,
 };
 #endif
 #if CONFIG_V408_ENCODER
@@ -109,5 +99,6 @@ AVCodec ff_v408_encoder = {
     .encode2      = v408_encode_frame,
     .close        = v408_encode_close,
     .pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE },
+    .capabilities = AV_CODEC_CAP_INTRA_ONLY,
 };
 #endif
diff --git a/libavcodec/v410dec.c b/libavcodec/v410dec.c
index e7a9c0e1..48fab682 100644
--- a/libavcodec/v410dec.c
+++ b/libavcodec/v410dec.c
@@ -94,5 +94,5 @@ AVCodec ff_v410_decoder = {
     .id           = AV_CODEC_ID_V410,
     .init         = v410_decode_init,
     .decode       = v410_decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/v410enc.c b/libavcodec/v410enc.c
index f2f7d734..f35ff759 100644
--- a/libavcodec/v410enc.c
+++ b/libavcodec/v410enc.c
@@ -32,13 +32,6 @@ static av_cold int v410_encode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    avctx->coded_frame = av_frame_alloc();
-
-    if (!avctx->coded_frame) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
-        return AVERROR(ENOMEM);
-    }
-
     return 0;
 }
 
@@ -50,12 +43,17 @@ static int v410_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint32_t val;
     int i, j, ret;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 4)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 4,
+                                            avctx->width * avctx->height * 4)) < 0)
         return ret;
     dst = pkt->data;
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->key_frame = 1;
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     y = (uint16_t *)pic->data[0];
     u = (uint16_t *)pic->data[1];
@@ -79,13 +77,6 @@ static int v410_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
-static av_cold int v410_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-
-    return 0;
-}
-
 AVCodec ff_v410_encoder = {
     .name         = "v410",
     .long_name    = NULL_IF_CONFIG_SMALL("Uncompressed 4:4:4 10-bit"),
@@ -93,6 +84,5 @@ AVCodec ff_v410_encoder = {
     .id           = AV_CODEC_ID_V410,
     .init         = v410_encode_init,
     .encode2      = v410_encode_frame,
-    .close        = v410_encode_close,
     .pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV444P10, AV_PIX_FMT_NONE },
 };
diff --git a/libavcodec/vaapi.c b/libavcodec/vaapi.c
index 6ac22e64..a1ea790a 100644
--- a/libavcodec/vaapi.c
+++ b/libavcodec/vaapi.c
@@ -21,7 +21,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "h264.h"
+#include "libavutil/log.h"
+#include "mpegvideo.h"
 #include "vaapi_internal.h"
 
 /**
@@ -34,30 +35,56 @@ static void destroy_buffers(VADisplay display, VABufferID *buffers, unsigned int
 {
     unsigned int i;
     for (i = 0; i < n_buffers; i++) {
-        if (buffers[i]) {
+        if (buffers[i] != VA_INVALID_ID) {
             vaDestroyBuffer(display, buffers[i]);
-            buffers[i] = 0;
+            buffers[i] = VA_INVALID_ID;
         }
     }
 }
 
-int ff_vaapi_render_picture(struct vaapi_context *vactx, VASurfaceID surface)
+int ff_vaapi_context_init(AVCodecContext *avctx)
+{
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
+    const struct vaapi_context * const user_vactx = avctx->hwaccel_context;
+
+    if (!user_vactx) {
+        av_log(avctx, AV_LOG_ERROR, "Hardware acceleration context (hwaccel_context) does not exist.\n");
+        return AVERROR(ENOSYS);
+    }
+
+    vactx->display              = user_vactx->display;
+    vactx->config_id            = user_vactx->config_id;
+    vactx->context_id           = user_vactx->context_id;
+
+    vactx->pic_param_buf_id     = VA_INVALID_ID;
+    vactx->iq_matrix_buf_id     = VA_INVALID_ID;
+    vactx->bitplane_buf_id      = VA_INVALID_ID;
+
+    return 0;
+}
+
+int ff_vaapi_context_fini(AVCodecContext *avctx)
+{
+    return 0;
+}
+
+int ff_vaapi_render_picture(FFVAContext *vactx, VASurfaceID surface)
 {
     VABufferID va_buffers[3];
     unsigned int n_va_buffers = 0;
 
-    if (!vactx->pic_param_buf_id)
+    if (vactx->pic_param_buf_id == VA_INVALID_ID)
         return 0;
 
     vaUnmapBuffer(vactx->display, vactx->pic_param_buf_id);
     va_buffers[n_va_buffers++] = vactx->pic_param_buf_id;
 
-    if (vactx->iq_matrix_buf_id) {
+    if (vactx->iq_matrix_buf_id != VA_INVALID_ID) {
         vaUnmapBuffer(vactx->display, vactx->iq_matrix_buf_id);
         va_buffers[n_va_buffers++] = vactx->iq_matrix_buf_id;
     }
 
-    if (vactx->bitplane_buf_id) {
+    if (vactx->bitplane_buf_id != VA_INVALID_ID) {
         vaUnmapBuffer(vactx->display, vactx->bitplane_buf_id);
         va_buffers[n_va_buffers++] = vactx->bitplane_buf_id;
     }
@@ -81,7 +108,7 @@ int ff_vaapi_render_picture(struct vaapi_context *vactx, VASurfaceID surface)
     return 0;
 }
 
-int ff_vaapi_commit_slices(struct vaapi_context *vactx)
+int ff_vaapi_commit_slices(FFVAContext *vactx)
 {
     VABufferID *slice_buf_ids;
     VABufferID slice_param_buf_id, slice_data_buf_id;
@@ -97,7 +124,7 @@ int ff_vaapi_commit_slices(struct vaapi_context *vactx)
         return -1;
     vactx->slice_buf_ids = slice_buf_ids;
 
-    slice_param_buf_id = 0;
+    slice_param_buf_id = VA_INVALID_ID;
     if (vaCreateBuffer(vactx->display, vactx->context_id,
                        VASliceParameterBufferType,
                        vactx->slice_param_size,
@@ -106,7 +133,7 @@ int ff_vaapi_commit_slices(struct vaapi_context *vactx)
         return -1;
     vactx->slice_count = 0;
 
-    slice_data_buf_id = 0;
+    slice_data_buf_id = VA_INVALID_ID;
     if (vaCreateBuffer(vactx->display, vactx->context_id,
                        VASliceDataBufferType,
                        vactx->slice_data_size,
@@ -121,11 +148,11 @@ int ff_vaapi_commit_slices(struct vaapi_context *vactx)
     return 0;
 }
 
-static void *alloc_buffer(struct vaapi_context *vactx, int type, unsigned int size, uint32_t *buf_id)
+static void *alloc_buffer(FFVAContext *vactx, int type, unsigned int size, uint32_t *buf_id)
 {
     void *data = NULL;
 
-    *buf_id = 0;
+    *buf_id = VA_INVALID_ID;
     if (vaCreateBuffer(vactx->display, vactx->context_id,
                        type, size, 1, NULL, buf_id) == VA_STATUS_SUCCESS)
         vaMapBuffer(vactx->display, *buf_id, &data);
@@ -133,22 +160,22 @@ static void *alloc_buffer(struct vaapi_context *vactx, int type, unsigned int si
     return data;
 }
 
-void *ff_vaapi_alloc_pic_param(struct vaapi_context *vactx, unsigned int size)
+void *ff_vaapi_alloc_pic_param(FFVAContext *vactx, unsigned int size)
 {
     return alloc_buffer(vactx, VAPictureParameterBufferType, size, &vactx->pic_param_buf_id);
 }
 
-void *ff_vaapi_alloc_iq_matrix(struct vaapi_context *vactx, unsigned int size)
+void *ff_vaapi_alloc_iq_matrix(FFVAContext *vactx, unsigned int size)
 {
     return alloc_buffer(vactx, VAIQMatrixBufferType, size, &vactx->iq_matrix_buf_id);
 }
 
-uint8_t *ff_vaapi_alloc_bitplane(struct vaapi_context *vactx, uint32_t size)
+uint8_t *ff_vaapi_alloc_bitplane(FFVAContext *vactx, uint32_t size)
 {
     return alloc_buffer(vactx, VABitPlaneBufferType, size, &vactx->bitplane_buf_id);
 }
 
-VASliceParameterBufferBase *ff_vaapi_alloc_slice(struct vaapi_context *vactx, const uint8_t *buffer, uint32_t size)
+VASliceParameterBufferBase *ff_vaapi_alloc_slice(FFVAContext *vactx, const uint8_t *buffer, uint32_t size)
 {
     uint8_t *slice_params;
     VASliceParameterBufferBase *slice_param;
@@ -181,7 +208,7 @@ VASliceParameterBufferBase *ff_vaapi_alloc_slice(struct vaapi_context *vactx, co
 
 void ff_vaapi_common_end_frame(AVCodecContext *avctx)
 {
-    struct vaapi_context * const vactx = avctx->hwaccel_context;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
 
     ff_dlog(avctx, "ff_vaapi_common_end_frame()\n");
 
@@ -202,7 +229,7 @@ void ff_vaapi_common_end_frame(AVCodecContext *avctx)
     CONFIG_VC1_VAAPI_HWACCEL   || CONFIG_WMV3_VAAPI_HWACCEL
 int ff_vaapi_mpeg_end_frame(AVCodecContext *avctx)
 {
-    struct vaapi_context * const vactx = avctx->hwaccel_context;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
     MpegEncContext *s = avctx->priv_data;
     int ret;
 
diff --git a/libavcodec/vaapi.h b/libavcodec/vaapi.h
index 815a27e2..7a29f6f8 100644
--- a/libavcodec/vaapi.h
+++ b/libavcodec/vaapi.h
@@ -31,6 +31,8 @@
  */
 
 #include <stdint.h>
+#include "libavutil/attributes.h"
+#include "version.h"
 
 /**
  * @defgroup lavc_codec_hwaccel_vaapi VA API Decoding
@@ -72,12 +74,14 @@ struct vaapi_context {
      */
     uint32_t context_id;
 
+#if FF_API_VAAPI_CONTEXT
     /**
      * VAPictureParameterBuffer ID
      *
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     uint32_t pic_param_buf_id;
 
     /**
@@ -86,6 +90,7 @@ struct vaapi_context {
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     uint32_t iq_matrix_buf_id;
 
     /**
@@ -94,6 +99,7 @@ struct vaapi_context {
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     uint32_t bitplane_buf_id;
 
     /**
@@ -102,6 +108,7 @@ struct vaapi_context {
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     uint32_t *slice_buf_ids;
 
     /**
@@ -110,6 +117,7 @@ struct vaapi_context {
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     unsigned int n_slice_buf_ids;
 
     /**
@@ -118,6 +126,7 @@ struct vaapi_context {
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     unsigned int slice_buf_ids_alloc;
 
     /**
@@ -126,6 +135,7 @@ struct vaapi_context {
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     void *slice_params;
 
     /**
@@ -134,6 +144,7 @@ struct vaapi_context {
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     unsigned int slice_param_size;
 
     /**
@@ -142,6 +153,7 @@ struct vaapi_context {
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     unsigned int slice_params_alloc;
 
     /**
@@ -150,6 +162,7 @@ struct vaapi_context {
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     unsigned int slice_count;
 
     /**
@@ -157,6 +170,7 @@ struct vaapi_context {
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     const uint8_t *slice_data;
 
     /**
@@ -165,7 +179,9 @@ struct vaapi_context {
      * - encoding: unused
      * - decoding: Set by libavcodec
      */
+    attribute_deprecated
     uint32_t slice_data_size;
+#endif
 };
 
 /* @} */
diff --git a/libavcodec/vaapi_h264.c b/libavcodec/vaapi_h264.c
index 151aca9e..ded2cb3d 100644
--- a/libavcodec/vaapi_h264.c
+++ b/libavcodec/vaapi_h264.c
@@ -227,7 +227,7 @@ static int vaapi_h264_start_frame(AVCodecContext          *avctx,
                                   av_unused uint32_t       size)
 {
     H264Context * const h = avctx->priv_data;
-    struct vaapi_context * const vactx = avctx->hwaccel_context;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
     VAPictureParameterBufferH264 *pic_param;
     VAIQMatrixBufferH264 *iq_matrix;
 
@@ -292,7 +292,7 @@ static int vaapi_h264_start_frame(AVCodecContext          *avctx,
 /** End a hardware decoding based frame. */
 static int vaapi_h264_end_frame(AVCodecContext *avctx)
 {
-    struct vaapi_context * const vactx = avctx->hwaccel_context;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
     H264Context * const h = avctx->priv_data;
     H264SliceContext *sl = &h->slice_ctx[0];
     int ret;
@@ -318,6 +318,7 @@ static int vaapi_h264_decode_slice(AVCodecContext *avctx,
                                    const uint8_t  *buffer,
                                    uint32_t        size)
 {
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
     H264Context * const h = avctx->priv_data;
     H264SliceContext *sl  = &h->slice_ctx[0];
     VASliceParameterBufferH264 *slice_param;
@@ -326,7 +327,7 @@ static int vaapi_h264_decode_slice(AVCodecContext *avctx,
             buffer, size);
 
     /* Fill in VASliceParameterBufferH264. */
-    slice_param = (VASliceParameterBufferH264 *)ff_vaapi_alloc_slice(avctx->hwaccel_context, buffer, size);
+    slice_param = (VASliceParameterBufferH264 *)ff_vaapi_alloc_slice(vactx, buffer, size);
     if (!slice_param)
         return -1;
     slice_param->slice_data_bit_offset          = get_bits_count(&sl->gb) + 8; /* bit buffer started beyond nal_unit_type */
@@ -359,8 +360,11 @@ AVHWAccel ff_h264_vaapi_hwaccel = {
     .name           = "h264_vaapi",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
-    .pix_fmt        = AV_PIX_FMT_VAAPI_VLD,
+    .pix_fmt        = AV_PIX_FMT_VAAPI,
     .start_frame    = vaapi_h264_start_frame,
     .end_frame      = vaapi_h264_end_frame,
     .decode_slice   = vaapi_h264_decode_slice,
+    .init           = ff_vaapi_context_init,
+    .uninit         = ff_vaapi_context_fini,
+    .priv_data_size = sizeof(FFVAContext),
 };
diff --git a/libavcodec/vaapi_hevc.c b/libavcodec/vaapi_hevc.c
new file mode 100644
index 00000000..62f783e3
--- /dev/null
+++ b/libavcodec/vaapi_hevc.c
@@ -0,0 +1,490 @@
+/*
+ * HEVC HW decode acceleration through VA API
+ *
+ * Copyright (C) 2015 Timo Rothenpieler <timo@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vaapi_internal.h"
+#include "hevc.h"
+#include "mpegutils.h"
+
+/**
+ * @file
+ * This file implements the glue code between FFmpeg's and VA API's
+ * structures for HEVC decoding.
+ */
+
+typedef struct vaapi_hevc_frame_data {
+    VAPictureParameterBufferHEVC *pic_param;
+    VASliceParameterBufferHEVC *last_slice_param;
+} vaapi_hevc_frame_data;
+
+/**
+ * Initialize an empty VA API picture.
+ *
+ * VA API requires a fixed-size reference picture array.
+ */
+static void init_vaapi_pic(VAPictureHEVC *va_pic)
+{
+    va_pic->picture_id = VA_INVALID_ID;
+    va_pic->flags = VA_PICTURE_HEVC_INVALID;
+    va_pic->pic_order_cnt = 0;
+}
+
+static void fill_vaapi_pic(VAPictureHEVC *va_pic, const HEVCFrame *pic, int rps_type)
+{
+    va_pic->picture_id = ff_vaapi_get_surface_id(pic->frame);
+    va_pic->pic_order_cnt = pic->poc;
+    va_pic->flags = rps_type;
+
+    if (pic->flags & HEVC_FRAME_FLAG_LONG_REF)
+        va_pic->flags |= VA_PICTURE_HEVC_LONG_TERM_REFERENCE;
+
+    if (pic->frame->interlaced_frame) {
+        va_pic->flags |= VA_PICTURE_HEVC_FIELD_PIC;
+
+        if (!pic->frame->top_field_first) {
+            va_pic->flags |= VA_PICTURE_HEVC_BOTTOM_FIELD;
+        }
+    }
+}
+
+static int find_frame_rps_type(const HEVCContext *h, const HEVCFrame *pic)
+{
+    VASurfaceID pic_surf = ff_vaapi_get_surface_id(pic->frame);
+    int i;
+
+    for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; ++i) {
+        if (pic_surf == ff_vaapi_get_surface_id(h->rps[ST_CURR_BEF].ref[i]->frame))
+            return VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE;
+    }
+
+    for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; ++i) {
+        if (pic_surf == ff_vaapi_get_surface_id(h->rps[ST_CURR_AFT].ref[i]->frame))
+            return VA_PICTURE_HEVC_RPS_ST_CURR_AFTER;
+    }
+
+    for (i = 0; i < h->rps[LT_CURR].nb_refs; ++i) {
+        if (pic_surf == ff_vaapi_get_surface_id(h->rps[LT_CURR].ref[i]->frame))
+            return VA_PICTURE_HEVC_RPS_LT_CURR;
+    }
+
+    return 0;
+}
+
+static void fill_vaapi_ReferenceFrames(const HEVCContext *h, VAPictureParameterBufferHEVC *pp)
+{
+    const HEVCFrame *current_picture = h->ref;
+    int i, j, rps_type;
+
+    for (i = 0, j = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); i++) {
+        const HEVCFrame *frame = NULL;
+
+        while (!frame && j < FF_ARRAY_ELEMS(h->DPB)) {
+            if (&h->DPB[j] != current_picture && (h->DPB[j].flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF)))
+                frame = &h->DPB[j];
+            j++;
+        }
+
+        init_vaapi_pic(&pp->ReferenceFrames[i]);
+
+        if (frame) {
+            rps_type = find_frame_rps_type(h, frame);
+            fill_vaapi_pic(&pp->ReferenceFrames[i], frame, rps_type);
+        }
+    }
+}
+
+static uint8_t get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame)
+{
+    vaapi_hevc_frame_data *frame_data = h->ref->hwaccel_picture_private;
+    VAPictureParameterBufferHEVC *pp = frame_data->pic_param;
+    uint8_t i;
+
+    if (!frame)
+        return 0xff;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(pp->ReferenceFrames); ++i) {
+        VASurfaceID pid = pp->ReferenceFrames[i].picture_id;
+        int poc = pp->ReferenceFrames[i].pic_order_cnt;
+        if (pid != VA_INVALID_ID && pid == ff_vaapi_get_surface_id(frame->frame) && poc == frame->poc)
+            return i;
+    }
+
+    return 0xff;
+}
+
+static void fill_picture_parameters(const HEVCContext *h, VAPictureParameterBufferHEVC *pp)
+{
+    int i;
+
+    pp->pic_fields.value = 0;
+    pp->slice_parsing_fields.value = 0;
+
+    fill_vaapi_pic(&pp->CurrPic, h->ref, 0);
+    fill_vaapi_ReferenceFrames(h, pp);
+
+    pp->pic_width_in_luma_samples  = h->ps.sps->width;
+    pp->pic_height_in_luma_samples = h->ps.sps->height;
+
+    pp->log2_min_luma_coding_block_size_minus3 = h->ps.sps->log2_min_cb_size - 3;
+
+    pp->pic_fields.bits.chroma_format_idc = h->ps.sps->chroma_format_idc;
+
+    pp->sps_max_dec_pic_buffering_minus1 = h->ps.sps->temporal_layer[h->ps.sps->max_sub_layers - 1].max_dec_pic_buffering - 1;
+    pp->log2_diff_max_min_luma_coding_block_size = h->ps.sps->log2_diff_max_min_coding_block_size;
+    pp->log2_min_transform_block_size_minus2 = h->ps.sps->log2_min_tb_size - 2;
+    pp->log2_diff_max_min_transform_block_size = h->ps.sps->log2_max_trafo_size  - h->ps.sps->log2_min_tb_size;
+    pp->max_transform_hierarchy_depth_inter = h->ps.sps->max_transform_hierarchy_depth_inter;
+    pp->max_transform_hierarchy_depth_intra = h->ps.sps->max_transform_hierarchy_depth_intra;
+    pp->num_short_term_ref_pic_sets = h->ps.sps->nb_st_rps;
+    pp->num_long_term_ref_pic_sps = h->ps.sps->num_long_term_ref_pics_sps;
+
+    pp->num_ref_idx_l0_default_active_minus1 = h->ps.pps->num_ref_idx_l0_default_active - 1;
+    pp->num_ref_idx_l1_default_active_minus1 = h->ps.pps->num_ref_idx_l1_default_active - 1;
+    pp->init_qp_minus26 = h->ps.pps->pic_init_qp_minus26;
+
+    pp->pps_cb_qp_offset = h->ps.pps->cb_qp_offset;
+    pp->pps_cr_qp_offset = h->ps.pps->cr_qp_offset;
+
+    pp->pic_fields.bits.tiles_enabled_flag = h->ps.pps->tiles_enabled_flag;
+    pp->pic_fields.bits.separate_colour_plane_flag = h->ps.sps->separate_colour_plane_flag;
+    pp->pic_fields.bits.pcm_enabled_flag = h->ps.sps->pcm_enabled_flag;
+    pp->pic_fields.bits.scaling_list_enabled_flag = h->ps.sps->scaling_list_enable_flag;
+    pp->pic_fields.bits.transform_skip_enabled_flag = h->ps.pps->transform_skip_enabled_flag;
+    pp->pic_fields.bits.amp_enabled_flag = h->ps.sps->amp_enabled_flag;
+    pp->pic_fields.bits.strong_intra_smoothing_enabled_flag = h->ps.sps->sps_strong_intra_smoothing_enable_flag;
+    pp->pic_fields.bits.sign_data_hiding_enabled_flag = h->ps.pps->sign_data_hiding_flag;
+    pp->pic_fields.bits.constrained_intra_pred_flag = h->ps.pps->constrained_intra_pred_flag;
+    pp->pic_fields.bits.cu_qp_delta_enabled_flag = h->ps.pps->cu_qp_delta_enabled_flag;
+    pp->pic_fields.bits.weighted_pred_flag = h->ps.pps->weighted_pred_flag;
+    pp->pic_fields.bits.weighted_bipred_flag = h->ps.pps->weighted_bipred_flag;
+    pp->pic_fields.bits.transquant_bypass_enabled_flag = h->ps.pps->transquant_bypass_enable_flag;
+    pp->pic_fields.bits.entropy_coding_sync_enabled_flag = h->ps.pps->entropy_coding_sync_enabled_flag;
+    pp->pic_fields.bits.pps_loop_filter_across_slices_enabled_flag = h->ps.pps->seq_loop_filter_across_slices_enabled_flag;
+    pp->pic_fields.bits.loop_filter_across_tiles_enabled_flag = h->ps.pps->loop_filter_across_tiles_enabled_flag;
+
+    pp->pic_fields.bits.pcm_loop_filter_disabled_flag = h->ps.sps->pcm.loop_filter_disable_flag;
+    pp->pcm_sample_bit_depth_luma_minus1 = h->ps.sps->pcm.bit_depth - 1;
+    pp->pcm_sample_bit_depth_chroma_minus1 = h->ps.sps->pcm.bit_depth_chroma - 1;
+    pp->log2_min_pcm_luma_coding_block_size_minus3 = h->ps.sps->pcm.log2_min_pcm_cb_size - 3;
+    pp->log2_diff_max_min_pcm_luma_coding_block_size = h->ps.sps->pcm.log2_max_pcm_cb_size - h->ps.sps->pcm.log2_min_pcm_cb_size;
+
+    memset(pp->column_width_minus1, 0, sizeof(pp->column_width_minus1));
+    memset(pp->row_height_minus1, 0, sizeof(pp->row_height_minus1));
+
+    if (h->ps.pps->tiles_enabled_flag) {
+        pp->num_tile_columns_minus1 = h->ps.pps->num_tile_columns - 1;
+        pp->num_tile_rows_minus1 = h->ps.pps->num_tile_rows - 1;
+
+        for (i = 0; i < h->ps.pps->num_tile_columns; i++)
+            pp->column_width_minus1[i] = h->ps.pps->column_width[i] - 1;
+
+        for (i = 0; i < h->ps.pps->num_tile_rows; i++)
+            pp->row_height_minus1[i] = h->ps.pps->row_height[i] - 1;
+    }
+
+    pp->diff_cu_qp_delta_depth = h->ps.pps->diff_cu_qp_delta_depth;
+    pp->pps_beta_offset_div2 = h->ps.pps->beta_offset / 2;
+    pp->pps_tc_offset_div2 = h->ps.pps->tc_offset / 2;
+    pp->log2_parallel_merge_level_minus2 = h->ps.pps->log2_parallel_merge_level - 2;
+
+    /* Different chroma/luma bit depths are currently not supported by ffmpeg. */
+    pp->bit_depth_luma_minus8 = h->ps.sps->bit_depth - 8;
+    pp->bit_depth_chroma_minus8 = h->ps.sps->bit_depth - 8;
+
+    pp->slice_parsing_fields.bits.lists_modification_present_flag = h->ps.pps->lists_modification_present_flag;
+    pp->slice_parsing_fields.bits.long_term_ref_pics_present_flag = h->ps.sps->long_term_ref_pics_present_flag;
+    pp->slice_parsing_fields.bits.sps_temporal_mvp_enabled_flag = h->ps.sps->sps_temporal_mvp_enabled_flag;
+    pp->slice_parsing_fields.bits.cabac_init_present_flag = h->ps.pps->cabac_init_present_flag;
+    pp->slice_parsing_fields.bits.output_flag_present_flag = h->ps.pps->output_flag_present_flag;
+    pp->slice_parsing_fields.bits.dependent_slice_segments_enabled_flag = h->ps.pps->dependent_slice_segments_enabled_flag;
+    pp->slice_parsing_fields.bits.pps_slice_chroma_qp_offsets_present_flag = h->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag;
+    pp->slice_parsing_fields.bits.sample_adaptive_offset_enabled_flag = h->ps.sps->sao_enabled;
+    pp->slice_parsing_fields.bits.deblocking_filter_override_enabled_flag = h->ps.pps->deblocking_filter_override_enabled_flag;
+    pp->slice_parsing_fields.bits.pps_disable_deblocking_filter_flag = h->ps.pps->disable_dbf;
+    pp->slice_parsing_fields.bits.slice_segment_header_extension_present_flag = h->ps.pps->slice_header_extension_present_flag;
+
+    pp->log2_max_pic_order_cnt_lsb_minus4 = h->ps.sps->log2_max_poc_lsb - 4;
+    pp->num_extra_slice_header_bits = h->ps.pps->num_extra_slice_header_bits;
+
+    if (h->nal_unit_type >= NAL_BLA_W_LP && h->nal_unit_type <= NAL_CRA_NUT) {
+        pp->slice_parsing_fields.bits.RapPicFlag = 1;
+    } else {
+        pp->slice_parsing_fields.bits.RapPicFlag = 0;
+    }
+
+    if (IS_IDR(h)) {
+        pp->slice_parsing_fields.bits.IdrPicFlag = 1;
+    } else {
+        pp->slice_parsing_fields.bits.IdrPicFlag = 0;
+    }
+
+    if (IS_IRAP(h)) {
+        pp->slice_parsing_fields.bits.IntraPicFlag = 1;
+    } else {
+        pp->slice_parsing_fields.bits.IntraPicFlag = 0;
+    }
+
+    if (h->sh.short_term_ref_pic_set_sps_flag == 0 && h->sh.short_term_rps) {
+        pp->st_rps_bits = h->sh.short_term_ref_pic_set_size;
+    } else {
+        pp->st_rps_bits = 0;
+    }
+
+    /* TODO */
+    pp->pic_fields.bits.NoPicReorderingFlag = 0;
+    pp->pic_fields.bits.NoBiPredFlag = 0;
+}
+
+
+/** Initialize and start decoding a frame with VA API. */
+static int vaapi_hevc_start_frame(AVCodecContext          *avctx,
+                                  av_unused const uint8_t *buffer,
+                                  av_unused uint32_t       size)
+{
+    HEVCContext * const h = avctx->priv_data;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
+    vaapi_hevc_frame_data *frame_data = h->ref->hwaccel_picture_private;
+    VAPictureParameterBufferHEVC *pic_param;
+    VAIQMatrixBufferHEVC *iq_matrix;
+    ScalingList const * scaling_list;
+    int i, j, pos;
+
+    ff_dlog(avctx, "vaapi_hevc_start_frame()\n");
+
+    vactx->slice_param_size = sizeof(VASliceParameterBufferHEVC);
+
+    /* Fill in VAPictureParameterBufferHEVC. */
+    pic_param = ff_vaapi_alloc_pic_param(vactx, sizeof(VAPictureParameterBufferHEVC));
+    if (!pic_param)
+        return -1;
+    fill_picture_parameters(h, pic_param);
+    frame_data->pic_param = pic_param;
+
+    /* Fill in VAIQMatrixBufferHEVC. */
+    if (h->ps.pps->scaling_list_data_present_flag) {
+        scaling_list = &h->ps.pps->scaling_list;
+    } else if (h->ps.sps->scaling_list_enable_flag) {
+        scaling_list = &h->ps.sps->scaling_list;
+    } else {
+        return 0;
+    }
+
+    iq_matrix = ff_vaapi_alloc_iq_matrix(vactx, sizeof(VAIQMatrixBufferHEVC));
+    if (!iq_matrix)
+        return -1;
+
+    for (i = 0; i < 6; ++i) {
+        for (j = 0; j < 16; ++j) {
+            pos = 4 * ff_hevc_diag_scan4x4_y[j] + ff_hevc_diag_scan4x4_x[j];
+            iq_matrix->ScalingList4x4[i][j] = scaling_list->sl[0][i][pos];
+        }
+        for (j = 0; j < 64; ++j) {
+            pos = 8 * ff_hevc_diag_scan8x8_y[j] + ff_hevc_diag_scan8x8_x[j];
+            iq_matrix->ScalingList8x8[i][j] = scaling_list->sl[1][i][pos];
+            iq_matrix->ScalingList16x16[i][j] = scaling_list->sl[2][i][pos];
+            if (i < 2) {
+                iq_matrix->ScalingList32x32[i][j] = scaling_list->sl[3][i * 3][pos];
+            }
+        }
+        iq_matrix->ScalingListDC16x16[i] = scaling_list->sl_dc[0][i];
+        if (i < 2) {
+            iq_matrix->ScalingListDC32x32[i] = scaling_list->sl_dc[1][i * 3];
+        }
+    }
+
+    return 0;
+}
+
+/** End a hardware decoding based frame. */
+static int vaapi_hevc_end_frame(AVCodecContext *avctx)
+{
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
+    HEVCContext * const h = avctx->priv_data;
+    vaapi_hevc_frame_data *frame_data = h->ref->hwaccel_picture_private;
+    int ret;
+
+    ff_dlog(avctx, "vaapi_hevc_end_frame()\n");
+
+    frame_data->last_slice_param->LongSliceFlags.fields.LastSliceOfPic = 1;
+
+    ret = ff_vaapi_commit_slices(vactx);
+    if (ret < 0)
+        goto finish;
+
+    ret = ff_vaapi_render_picture(vactx, ff_vaapi_get_surface_id(h->ref->frame));
+    if (ret < 0)
+        goto finish;
+
+finish:
+    ff_vaapi_common_end_frame(avctx);
+    return ret;
+}
+
+static int fill_pred_weight_table(HEVCContext * const h,
+                                  VASliceParameterBufferHEVC *slice_param,
+                                  SliceHeader * const sh)
+{
+    int i;
+
+    memset(slice_param->delta_luma_weight_l0, 0, sizeof(slice_param->delta_luma_weight_l0));
+    memset(slice_param->delta_luma_weight_l1, 0, sizeof(slice_param->delta_luma_weight_l1));
+    memset(slice_param->luma_offset_l0, 0, sizeof(slice_param->luma_offset_l0));
+    memset(slice_param->luma_offset_l1, 0, sizeof(slice_param->luma_offset_l1));
+    memset(slice_param->delta_chroma_weight_l0, 0, sizeof(slice_param->delta_chroma_weight_l0));
+    memset(slice_param->delta_chroma_weight_l1, 0, sizeof(slice_param->delta_chroma_weight_l1));
+    memset(slice_param->ChromaOffsetL0, 0, sizeof(slice_param->ChromaOffsetL0));
+    memset(slice_param->ChromaOffsetL1, 0, sizeof(slice_param->ChromaOffsetL1));
+
+    slice_param->delta_chroma_log2_weight_denom = 0;
+    slice_param->luma_log2_weight_denom = 0;
+
+    if (  sh->slice_type == I_SLICE
+      || (sh->slice_type == P_SLICE && !h->ps.pps->weighted_pred_flag)
+      || (sh->slice_type == B_SLICE && !h->ps.pps->weighted_bipred_flag)) {
+        return 0;
+    }
+
+    slice_param->luma_log2_weight_denom = sh->luma_log2_weight_denom;
+
+    if (h->ps.sps->chroma_format_idc) {
+        slice_param->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom;
+    }
+
+    for (i = 0; i < 15 && i < sh->nb_refs[L0]; ++i) {
+        slice_param->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - (1 << sh->luma_log2_weight_denom);
+        slice_param->luma_offset_l0[i] = sh->luma_offset_l0[i];
+        slice_param->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - (1 << sh->chroma_log2_weight_denom);
+        slice_param->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - (1 << sh->chroma_log2_weight_denom);
+        slice_param->ChromaOffsetL0[i][0] = sh->chroma_offset_l0[i][0];
+        slice_param->ChromaOffsetL0[i][1] = sh->chroma_offset_l0[i][1];
+    }
+
+    if (sh->slice_type == B_SLICE) {
+        for (i = 0; i < 15 && i < sh->nb_refs[L1]; ++i) {
+            slice_param->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - (1 << sh->luma_log2_weight_denom);
+            slice_param->luma_offset_l1[i] = sh->luma_offset_l1[i];
+            slice_param->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - (1 << sh->chroma_log2_weight_denom);
+            slice_param->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - (1 << sh->chroma_log2_weight_denom);
+            slice_param->ChromaOffsetL1[i][0] = sh->chroma_offset_l1[i][0];
+            slice_param->ChromaOffsetL1[i][1] = sh->chroma_offset_l1[i][1];
+        }
+    }
+
+    return 0;
+}
+
+/** Decode the given hevc slice with VA API. */
+static int vaapi_hevc_decode_slice(AVCodecContext *avctx,
+                                   const uint8_t  *buffer,
+                                   uint32_t        size)
+{
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
+    HEVCContext * const h = avctx->priv_data;
+    vaapi_hevc_frame_data *frame_data = h->ref->hwaccel_picture_private;
+    SliceHeader * const sh = &h->sh;
+    VASliceParameterBufferHEVC *slice_param;
+    int i, list_idx;
+    uint8_t nb_list = sh->slice_type == B_SLICE ? 2 : 1;
+
+    if (sh->slice_type == I_SLICE)
+        nb_list = 0;
+
+    ff_dlog(avctx, "vaapi_hevc_decode_slice(): buffer %p, size %d\n", buffer, size);
+
+    /* Fill in VASliceParameterBufferH264. */
+    slice_param = (VASliceParameterBufferHEVC *)ff_vaapi_alloc_slice(vactx, buffer, size);
+    if (!slice_param)
+        return -1;
+
+    frame_data->last_slice_param = slice_param;
+
+    /* The base structure changed, so this has to be re-set in order to be valid on every byte order. */
+    slice_param->slice_data_flag = VA_SLICE_DATA_FLAG_ALL;
+
+    /* Add 1 to the bits count here to account for the byte_alignment bit, which allways is at least one bit and not accounted for otherwise. */
+    slice_param->slice_data_byte_offset = (get_bits_count(&h->HEVClc->gb) + 1 + 7) / 8;
+
+    slice_param->slice_segment_address = sh->slice_segment_addr;
+
+    slice_param->LongSliceFlags.value = 0;
+    slice_param->LongSliceFlags.fields.dependent_slice_segment_flag = sh->dependent_slice_segment_flag;
+    slice_param->LongSliceFlags.fields.slice_type = sh->slice_type;
+    slice_param->LongSliceFlags.fields.color_plane_id = sh->colour_plane_id;
+    slice_param->LongSliceFlags.fields.mvd_l1_zero_flag = sh->mvd_l1_zero_flag;
+    slice_param->LongSliceFlags.fields.cabac_init_flag = sh->cabac_init_flag;
+    slice_param->LongSliceFlags.fields.slice_temporal_mvp_enabled_flag = sh->slice_temporal_mvp_enabled_flag;
+    slice_param->LongSliceFlags.fields.slice_deblocking_filter_disabled_flag = sh->disable_deblocking_filter_flag;
+    slice_param->LongSliceFlags.fields.collocated_from_l0_flag = sh->collocated_list == L0 ? 1 : 0;
+    slice_param->LongSliceFlags.fields.slice_loop_filter_across_slices_enabled_flag = sh->slice_loop_filter_across_slices_enabled_flag;
+
+    slice_param->LongSliceFlags.fields.slice_sao_luma_flag = sh->slice_sample_adaptive_offset_flag[0];
+    if (h->ps.sps->chroma_format_idc) {
+        slice_param->LongSliceFlags.fields.slice_sao_chroma_flag = sh->slice_sample_adaptive_offset_flag[1];
+    }
+
+    if (sh->slice_temporal_mvp_enabled_flag) {
+        slice_param->collocated_ref_idx = sh->collocated_ref_idx;
+    } else {
+        slice_param->collocated_ref_idx = 0xFF;
+    }
+
+    slice_param->slice_qp_delta = sh->slice_qp_delta;
+    slice_param->slice_cb_qp_offset = sh->slice_cb_qp_offset;
+    slice_param->slice_cr_qp_offset = sh->slice_cr_qp_offset;
+    slice_param->slice_beta_offset_div2 = sh->beta_offset / 2;
+    slice_param->slice_tc_offset_div2 = sh->tc_offset / 2;
+
+    if (sh->slice_type == I_SLICE) {
+        slice_param->five_minus_max_num_merge_cand = 0;
+    } else {
+        slice_param->five_minus_max_num_merge_cand = 5 - sh->max_num_merge_cand;
+    }
+
+    slice_param->num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0;
+    slice_param->num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0;
+
+    memset(slice_param->RefPicList, 0xFF, sizeof(slice_param->RefPicList));
+
+    /* h->ref->refPicList is updated befor calling each slice */
+    for (list_idx = 0; list_idx < nb_list; ++list_idx) {
+        RefPicList *rpl = &h->ref->refPicList[list_idx];
+
+        for (i = 0; i < rpl->nb_refs; ++i) {
+            slice_param->RefPicList[list_idx][i] = get_ref_pic_index(h, rpl->ref[i]);
+        }
+    }
+
+    return fill_pred_weight_table(h, slice_param, sh);
+}
+
+AVHWAccel ff_hevc_vaapi_hwaccel = {
+    .name                 = "hevc_vaapi",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_HEVC,
+    .pix_fmt              = AV_PIX_FMT_VAAPI,
+    .start_frame          = vaapi_hevc_start_frame,
+    .end_frame            = vaapi_hevc_end_frame,
+    .decode_slice         = vaapi_hevc_decode_slice,
+    .init                 = ff_vaapi_context_init,
+    .uninit               = ff_vaapi_context_fini,
+    .priv_data_size       = sizeof(FFVAContext),
+    .frame_priv_data_size = sizeof(vaapi_hevc_frame_data),
+};
diff --git a/libavcodec/vaapi_internal.h b/libavcodec/vaapi_internal.h
index 918c718d..306ae13b 100644
--- a/libavcodec/vaapi_internal.h
+++ b/libavcodec/vaapi_internal.h
@@ -27,7 +27,7 @@
 #include <va/va.h>
 #include "vaapi.h"
 #include "avcodec.h"
-#include "mpegvideo.h"
+#include "internal.h"
 
 /**
  * @addtogroup VAAPI_Decoding
@@ -35,23 +35,53 @@
  * @{
  */
 
+typedef struct {
+    VADisplay display;                  ///< Windowing system dependent handle
+    VAConfigID config_id;               ///< Configuration ID
+    VAContextID context_id;             ///< Context ID (video decode pipeline)
+    VABufferID pic_param_buf_id;        ///< Picture parameter buffer
+    VABufferID iq_matrix_buf_id;        ///< Inverse quantiser matrix buffer
+    VABufferID bitplane_buf_id;         ///< Bitplane buffer (for VC-1 decoding)
+    VABufferID *slice_buf_ids;          ///< Slice parameter/data buffers
+    unsigned int n_slice_buf_ids;       ///< Number of effective slice buffers
+    unsigned int slice_buf_ids_alloc;   ///< Number of allocated slice buffers
+    void *slice_params;                 ///< Pointer to slice parameter buffers
+    unsigned int slice_param_size;      ///< Size of a slice parameter element
+    unsigned int slice_params_alloc;    ///< Number of allocated slice parameters
+    unsigned int slice_count;           ///< Number of slices currently filled in
+    const uint8_t *slice_data;          ///< Pointer to slice data buffer base
+    unsigned int slice_data_size;       ///< Current size of slice data
+} FFVAContext;
+
+/** Extract vaapi_context from an AVCodecContext */
+static inline FFVAContext *ff_vaapi_get_context(AVCodecContext *avctx)
+{
+    return avctx->internal->hwaccel_priv_data;
+}
+
 /** Extract VASurfaceID from an AVFrame */
 static inline VASurfaceID ff_vaapi_get_surface_id(AVFrame *pic)
 {
     return (uintptr_t)pic->data[3];
 }
 
+/** Common AVHWAccel.init() implementation */
+int ff_vaapi_context_init(AVCodecContext *avctx);
+
+/** Common AVHWAccel.uninit() implementation */
+int ff_vaapi_context_fini(AVCodecContext *avctx);
+
 /** Common AVHWAccel.end_frame() implementation */
 void ff_vaapi_common_end_frame(AVCodecContext *avctx);
 
 /** Allocate a new picture parameter buffer */
-void *ff_vaapi_alloc_pic_param(struct vaapi_context *vactx, unsigned int size);
+void *ff_vaapi_alloc_pic_param(FFVAContext *vactx, unsigned int size);
 
 /** Allocate a new IQ matrix buffer */
-void *ff_vaapi_alloc_iq_matrix(struct vaapi_context *vactx, unsigned int size);
+void *ff_vaapi_alloc_iq_matrix(FFVAContext *vactx, unsigned int size);
 
 /** Allocate a new bit-plane buffer */
-uint8_t *ff_vaapi_alloc_bitplane(struct vaapi_context *vactx, uint32_t size);
+uint8_t *ff_vaapi_alloc_bitplane(FFVAContext *vactx, uint32_t size);
 
 /**
  * Allocate a new slice descriptor for the input slice.
@@ -61,11 +91,11 @@ uint8_t *ff_vaapi_alloc_bitplane(struct vaapi_context *vactx, uint32_t size);
  * @param size the size of the slice in bytes
  * @return the newly allocated slice parameter
  */
-VASliceParameterBufferBase *ff_vaapi_alloc_slice(struct vaapi_context *vactx, const uint8_t *buffer, uint32_t size);
+VASliceParameterBufferBase *ff_vaapi_alloc_slice(FFVAContext *vactx, const uint8_t *buffer, uint32_t size);
 
 int ff_vaapi_mpeg_end_frame(AVCodecContext *avctx);
-int ff_vaapi_commit_slices(struct vaapi_context *vactx);
-int ff_vaapi_render_picture(struct vaapi_context *vactx, VASurfaceID surface);
+int ff_vaapi_commit_slices(FFVAContext *vactx);
+int ff_vaapi_render_picture(FFVAContext *vactx, VASurfaceID surface);
 
 /* @} */
 
diff --git a/libavcodec/vaapi_mpeg2.c b/libavcodec/vaapi_mpeg2.c
index 87fab898..518fec04 100644
--- a/libavcodec/vaapi_mpeg2.c
+++ b/libavcodec/vaapi_mpeg2.c
@@ -21,6 +21,7 @@
  */
 
 #include "mpegutils.h"
+#include "mpegvideo.h"
 #include "vaapi_internal.h"
 #include "internal.h"
 
@@ -40,7 +41,7 @@ static inline int mpeg2_get_is_frame_start(MpegEncContext *s)
 static int vaapi_mpeg2_start_frame(AVCodecContext *avctx, av_unused const uint8_t *buffer, av_unused uint32_t size)
 {
     struct MpegEncContext * const s = avctx->priv_data;
-    struct vaapi_context * const vactx = avctx->hwaccel_context;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
     VAPictureParameterBufferMPEG2 *pic_param;
     VAIQMatrixBufferMPEG2 *iq_matrix;
     int i;
@@ -103,6 +104,7 @@ static int vaapi_mpeg2_start_frame(AVCodecContext *avctx, av_unused const uint8_
 static int vaapi_mpeg2_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
 {
     MpegEncContext * const s = avctx->priv_data;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
     VASliceParameterBufferMPEG2 *slice_param;
     GetBitContext gb;
     uint32_t quantiser_scale_code, intra_slice_flag, macroblock_offset;
@@ -123,7 +125,7 @@ static int vaapi_mpeg2_decode_slice(AVCodecContext *avctx, const uint8_t *buffer
     macroblock_offset = get_bits_count(&gb);
 
     /* Fill in VASliceParameterBufferMPEG2 */
-    slice_param = (VASliceParameterBufferMPEG2 *)ff_vaapi_alloc_slice(avctx->hwaccel_context, buffer, size);
+    slice_param = (VASliceParameterBufferMPEG2 *)ff_vaapi_alloc_slice(vactx, buffer, size);
     if (!slice_param)
         return -1;
     slice_param->macroblock_offset              = macroblock_offset;
@@ -138,8 +140,11 @@ AVHWAccel ff_mpeg2_vaapi_hwaccel = {
     .name           = "mpeg2_vaapi",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG2VIDEO,
-    .pix_fmt        = AV_PIX_FMT_VAAPI_VLD,
+    .pix_fmt        = AV_PIX_FMT_VAAPI,
     .start_frame    = vaapi_mpeg2_start_frame,
     .end_frame      = ff_vaapi_mpeg_end_frame,
     .decode_slice   = vaapi_mpeg2_decode_slice,
+    .init           = ff_vaapi_context_init,
+    .uninit         = ff_vaapi_context_fini,
+    .priv_data_size = sizeof(FFVAContext),
 };
diff --git a/libavcodec/vaapi_mpeg4.c b/libavcodec/vaapi_mpeg4.c
index 9b283f78..b5b946db 100644
--- a/libavcodec/vaapi_mpeg4.c
+++ b/libavcodec/vaapi_mpeg4.c
@@ -24,6 +24,7 @@
 #include "internal.h"
 #include "h263.h"
 #include "mpeg4video.h"
+#include "mpegvideo.h"
 
 /** Reconstruct bitstream intra_dc_vlc_thr */
 static int mpeg4_get_intra_dc_vlc_thr(Mpeg4DecContext *s)
@@ -45,7 +46,7 @@ static int vaapi_mpeg4_start_frame(AVCodecContext *avctx, av_unused const uint8_
 {
     Mpeg4DecContext *ctx = avctx->priv_data;
     MpegEncContext * const s = &ctx->m;
-    struct vaapi_context * const vactx = avctx->hwaccel_context;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
     VAPictureParameterBufferMPEG4 *pic_param;
     VAIQMatrixBufferMPEG4 *iq_matrix;
     int i;
@@ -121,12 +122,13 @@ static int vaapi_mpeg4_start_frame(AVCodecContext *avctx, av_unused const uint8_
 static int vaapi_mpeg4_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
 {
     MpegEncContext * const s = avctx->priv_data;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
     VASliceParameterBufferMPEG4 *slice_param;
 
     ff_dlog(avctx, "vaapi_mpeg4_decode_slice(): buffer %p, size %d\n", buffer, size);
 
     /* Fill in VASliceParameterBufferMPEG4 */
-    slice_param = (VASliceParameterBufferMPEG4 *)ff_vaapi_alloc_slice(avctx->hwaccel_context, buffer, size);
+    slice_param = (VASliceParameterBufferMPEG4 *)ff_vaapi_alloc_slice(vactx, buffer, size);
     if (!slice_param)
         return -1;
     slice_param->macroblock_offset      = get_bits_count(&s->gb) % 8;
@@ -141,10 +143,13 @@ AVHWAccel ff_mpeg4_vaapi_hwaccel = {
     .name           = "mpeg4_vaapi",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_MPEG4,
-    .pix_fmt        = AV_PIX_FMT_VAAPI_VLD,
+    .pix_fmt        = AV_PIX_FMT_VAAPI,
     .start_frame    = vaapi_mpeg4_start_frame,
     .end_frame      = ff_vaapi_mpeg_end_frame,
     .decode_slice   = vaapi_mpeg4_decode_slice,
+    .init           = ff_vaapi_context_init,
+    .uninit         = ff_vaapi_context_fini,
+    .priv_data_size = sizeof(FFVAContext),
 };
 #endif
 
@@ -153,9 +158,12 @@ AVHWAccel ff_h263_vaapi_hwaccel = {
     .name           = "h263_vaapi",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H263,
-    .pix_fmt        = AV_PIX_FMT_VAAPI_VLD,
+    .pix_fmt        = AV_PIX_FMT_VAAPI,
     .start_frame    = vaapi_mpeg4_start_frame,
     .end_frame      = ff_vaapi_mpeg_end_frame,
     .decode_slice   = vaapi_mpeg4_decode_slice,
+    .init           = ff_vaapi_context_init,
+    .uninit         = ff_vaapi_context_fini,
+    .priv_data_size = sizeof(FFVAContext),
 };
 #endif
diff --git a/libavcodec/vaapi_vc1.c b/libavcodec/vaapi_vc1.c
index 7ef9f2a0..5ded5dba 100644
--- a/libavcodec/vaapi_vc1.c
+++ b/libavcodec/vaapi_vc1.c
@@ -148,7 +148,7 @@ static int vaapi_vc1_start_frame(AVCodecContext *avctx, av_unused const uint8_t
 {
     VC1Context * const v = avctx->priv_data;
     MpegEncContext * const s = &v->s;
-    struct vaapi_context * const vactx = avctx->hwaccel_context;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
     VAPictureParameterBufferVC1 *pic_param;
 
     ff_dlog(avctx, "vaapi_vc1_start_frame()\n");
@@ -315,6 +315,7 @@ static int vaapi_vc1_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
 {
     VC1Context * const v = avctx->priv_data;
     MpegEncContext * const s = &v->s;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
     VASliceParameterBufferVC1 *slice_param;
 
     ff_dlog(avctx, "vaapi_vc1_decode_slice(): buffer %p, size %d\n", buffer, size);
@@ -326,7 +327,7 @@ static int vaapi_vc1_decode_slice(AVCodecContext *avctx, const uint8_t *buffer,
     }
 
     /* Fill in VASliceParameterBufferVC1 */
-    slice_param = (VASliceParameterBufferVC1 *)ff_vaapi_alloc_slice(avctx->hwaccel_context, buffer, size);
+    slice_param = (VASliceParameterBufferVC1 *)ff_vaapi_alloc_slice(vactx, buffer, size);
     if (!slice_param)
         return -1;
     slice_param->macroblock_offset       = get_bits_count(&s->gb);
@@ -339,10 +340,13 @@ AVHWAccel ff_wmv3_vaapi_hwaccel = {
     .name           = "wmv3_vaapi",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_WMV3,
-    .pix_fmt        = AV_PIX_FMT_VAAPI_VLD,
+    .pix_fmt        = AV_PIX_FMT_VAAPI,
     .start_frame    = vaapi_vc1_start_frame,
     .end_frame      = ff_vaapi_mpeg_end_frame,
     .decode_slice   = vaapi_vc1_decode_slice,
+    .init           = ff_vaapi_context_init,
+    .uninit         = ff_vaapi_context_fini,
+    .priv_data_size = sizeof(FFVAContext),
 };
 #endif
 
@@ -350,8 +354,11 @@ AVHWAccel ff_vc1_vaapi_hwaccel = {
     .name           = "vc1_vaapi",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_VC1,
-    .pix_fmt        = AV_PIX_FMT_VAAPI_VLD,
+    .pix_fmt        = AV_PIX_FMT_VAAPI,
     .start_frame    = vaapi_vc1_start_frame,
     .end_frame      = ff_vaapi_mpeg_end_frame,
     .decode_slice   = vaapi_vc1_decode_slice,
+    .init           = ff_vaapi_context_init,
+    .uninit         = ff_vaapi_context_fini,
+    .priv_data_size = sizeof(FFVAContext),
 };
diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
new file mode 100644
index 00000000..b360dcb7
--- /dev/null
+++ b/libavcodec/vaapi_vp9.c
@@ -0,0 +1,168 @@
+/*
+ * VP9 HW decode acceleration through VA API
+ *
+ * Copyright (C) 2015 Timo Rothenpieler <timo@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/pixdesc.h"
+#include "vaapi_internal.h"
+#include "vp9.h"
+
+static void fill_picture_parameters(AVCodecContext                 *avctx,
+                                    const VP9SharedContext         *h,
+                                    VADecPictureParameterBufferVP9 *pp)
+{
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+    int i;
+
+    pp->frame_width = avctx->width;
+    pp->frame_height = avctx->height;
+
+    pp->frame_header_length_in_bytes = h->h.uncompressed_header_size;
+    pp->first_partition_size = h->h.compressed_header_size;
+
+    pp->profile = h->h.profile;
+
+    pp->filter_level = h->h.filter.level;
+    pp->sharpness_level = h->h.filter.sharpness;
+    pp->log2_tile_rows = h->h.tiling.log2_tile_rows;
+    pp->log2_tile_columns = h->h.tiling.log2_tile_cols;
+
+    pp->pic_fields.bits.subsampling_x = pixdesc->log2_chroma_w;
+    pp->pic_fields.bits.subsampling_y = pixdesc->log2_chroma_h;
+    pp->pic_fields.bits.frame_type = !h->h.keyframe;
+    pp->pic_fields.bits.show_frame = !h->h.invisible;
+    pp->pic_fields.bits.error_resilient_mode = h->h.errorres;
+    pp->pic_fields.bits.intra_only = h->h.intraonly;
+    pp->pic_fields.bits.allow_high_precision_mv = h->h.keyframe ? 0 : h->h.highprecisionmvs;
+    pp->pic_fields.bits.mcomp_filter_type = h->h.filtermode ^ (h->h.filtermode <= 1);
+    pp->pic_fields.bits.frame_parallel_decoding_mode = h->h.parallelmode;
+    pp->pic_fields.bits.reset_frame_context = h->h.resetctx;
+    pp->pic_fields.bits.refresh_frame_context = h->h.refreshctx;
+    pp->pic_fields.bits.frame_context_idx = h->h.framectxid;
+
+    pp->pic_fields.bits.segmentation_enabled = h->h.segmentation.enabled;
+    pp->pic_fields.bits.segmentation_temporal_update = h->h.segmentation.temporal;
+    pp->pic_fields.bits.segmentation_update_map = h->h.segmentation.update_map;
+
+    pp->pic_fields.bits.last_ref_frame = h->h.refidx[0];
+    pp->pic_fields.bits.last_ref_frame_sign_bias = h->h.signbias[0];
+    pp->pic_fields.bits.golden_ref_frame = h->h.refidx[1];
+    pp->pic_fields.bits.golden_ref_frame_sign_bias = h->h.signbias[1];
+    pp->pic_fields.bits.alt_ref_frame = h->h.refidx[2];
+    pp->pic_fields.bits.alt_ref_frame_sign_bias = h->h.signbias[2];
+    pp->pic_fields.bits.lossless_flag = h->h.lossless;
+
+    for (i = 0; i < 7; i++)
+        pp->mb_segment_tree_probs[i] = h->h.segmentation.prob[i];
+
+    if (h->h.segmentation.temporal) {
+        for (i = 0; i < 3; i++)
+            pp->segment_pred_probs[i] = h->h.segmentation.pred_prob[i];
+    } else {
+        memset(pp->segment_pred_probs, 255, sizeof(pp->segment_pred_probs));
+    }
+
+    for (i = 0; i < 8; i++) {
+        if (h->refs[i].f->buf[0]) {
+            pp->reference_frames[i] = ff_vaapi_get_surface_id(h->refs[i].f);
+        } else {
+            pp->reference_frames[i] = VA_INVALID_ID;
+        }
+    }
+}
+
+static int vaapi_vp9_start_frame(AVCodecContext          *avctx,
+                                 av_unused const uint8_t *buffer,
+                                 av_unused uint32_t       size)
+{
+    const VP9SharedContext *h = avctx->priv_data;
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
+    VADecPictureParameterBufferVP9 *pic_param;
+
+    vactx->slice_param_size = sizeof(VASliceParameterBufferVP9);
+
+    pic_param = ff_vaapi_alloc_pic_param(vactx, sizeof(VADecPictureParameterBufferVP9));
+    if (!pic_param)
+        return -1;
+    fill_picture_parameters(avctx, h, pic_param);
+
+    return 0;
+}
+
+static int vaapi_vp9_end_frame(AVCodecContext *avctx)
+{
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
+    const VP9SharedContext *h = avctx->priv_data;
+    int ret;
+
+    ret = ff_vaapi_commit_slices(vactx);
+    if (ret < 0)
+        goto finish;
+
+    ret = ff_vaapi_render_picture(vactx, ff_vaapi_get_surface_id(h->frames[CUR_FRAME].tf.f));
+    if (ret < 0)
+        goto finish;
+
+finish:
+    ff_vaapi_common_end_frame(avctx);
+    return ret;
+}
+
+static int vaapi_vp9_decode_slice(AVCodecContext *avctx,
+                                  const uint8_t  *buffer,
+                                  uint32_t        size)
+{
+    FFVAContext * const vactx = ff_vaapi_get_context(avctx);
+    const VP9SharedContext *h = avctx->priv_data;
+    VASliceParameterBufferVP9 *slice_param;
+    int i;
+
+    slice_param = (VASliceParameterBufferVP9*)ff_vaapi_alloc_slice(vactx, buffer, size);
+    if (!slice_param)
+        return -1;
+
+    for (i = 0; i < 8; i++) {
+        slice_param->seg_param[i].segment_flags.fields.segment_reference_enabled = h->h.segmentation.feat[i].ref_enabled;
+        slice_param->seg_param[i].segment_flags.fields.segment_reference = h->h.segmentation.feat[i].ref_val;
+        slice_param->seg_param[i].segment_flags.fields.segment_reference_skipped = h->h.segmentation.feat[i].skip_enabled;
+
+        memcpy(slice_param->seg_param[i].filter_level, h->h.segmentation.feat[i].lflvl, sizeof(slice_param->seg_param[i].filter_level));
+
+        slice_param->seg_param[i].luma_dc_quant_scale = h->h.segmentation.feat[i].qmul[0][0];
+        slice_param->seg_param[i].luma_ac_quant_scale = h->h.segmentation.feat[i].qmul[0][1];
+        slice_param->seg_param[i].chroma_dc_quant_scale = h->h.segmentation.feat[i].qmul[1][0];
+        slice_param->seg_param[i].chroma_ac_quant_scale = h->h.segmentation.feat[i].qmul[1][1];
+    }
+
+    return 0;
+}
+
+AVHWAccel ff_vp9_vaapi_hwaccel = {
+    .name                 = "vp9_vaapi",
+    .type                 = AVMEDIA_TYPE_VIDEO,
+    .id                   = AV_CODEC_ID_VP9,
+    .pix_fmt              = AV_PIX_FMT_VAAPI,
+    .start_frame          = vaapi_vp9_start_frame,
+    .end_frame            = vaapi_vp9_end_frame,
+    .decode_slice         = vaapi_vp9_decode_slice,
+    .init                 = ff_vaapi_context_init,
+    .uninit               = ff_vaapi_context_fini,
+    .priv_data_size       = sizeof(FFVAContext),
+};
diff --git a/libavcodec/vb.c b/libavcodec/vb.c
index 41ee42ec..560165ad 100644
--- a/libavcodec/vb.c
+++ b/libavcodec/vb.c
@@ -279,5 +279,5 @@ AVCodec ff_vb_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/vble.c b/libavcodec/vble.c
index e7331b19..bb9c81c1 100644
--- a/libavcodec/vble.c
+++ b/libavcodec/vble.c
@@ -26,6 +26,8 @@
 
 #define BITSTREAM_READER_LE
 
+#include "libavutil/imgutils.h"
+
 #include "avcodec.h"
 #include "get_bits.h"
 #include "huffyuvdsp.h"
@@ -155,7 +157,7 @@ static int vble_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     vble_restore_plane(ctx, pic, &gb, 0, offset, avctx->width, avctx->height);
 
     /* Chroma */
-    if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!(ctx->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         offset += avctx->width * avctx->height;
         vble_restore_plane(ctx, pic, &gb, 1, offset, width_uv, height_uv);
 
@@ -187,8 +189,8 @@ static av_cold int vble_decode_init(AVCodecContext *avctx)
     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
     avctx->bits_per_raw_sample = 8;
 
-    ctx->size = avpicture_get_size(avctx->pix_fmt,
-                                   avctx->width, avctx->height);
+    ctx->size = av_image_get_buffer_size(avctx->pix_fmt,
+                                         avctx->width, avctx->height, 1);
 
     ctx->val = av_malloc_array(ctx->size, sizeof(*ctx->val));
 
@@ -210,5 +212,5 @@ AVCodec ff_vble_decoder = {
     .init           = vble_decode_init,
     .close          = vble_decode_close,
     .decode         = vble_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/vc1_block.c b/libavcodec/vc1_block.c
index 3541ba78..255ba1da 100644
--- a/libavcodec/vc1_block.c
+++ b/libavcodec/vc1_block.c
@@ -99,7 +99,7 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v)
             s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][3],
                                               s->dest[0] - v_dist * s->linesize - 8,
                                               stride_y);
-            if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
             s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][4],
                                               s->dest[1] - 8 * s->uvlinesize - 8,
                                               s->uvlinesize);
@@ -126,7 +126,7 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v)
             s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][3],
                                               s->dest[0] - v_dist * s->linesize + 8,
                                               stride_y);
-            if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
             s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][4],
                                               s->dest[1] - 8 * s->uvlinesize,
                                               s->uvlinesize);
@@ -961,7 +961,7 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n,
         q2 = s->current_picture.qscale_table[mb_pos - 1];
     if (!dc_pred_dir && a_avail && mb_pos >= s->mb_stride)
         q2 = s->current_picture.qscale_table[mb_pos - s->mb_stride];
-    if ( dc_pred_dir && n == 1)
+    if (dc_pred_dir && n == 1)
         q2 = q1;
     if (!dc_pred_dir && n == 2)
         q2 = q1;
@@ -1327,7 +1327,7 @@ static int vc1_decode_p_mb(VC1Context *v)
 
                     vc1_decode_intra_block(v, s->block[i], i, val, mquant,
                                            (i & 4) ? v->codingset2 : v->codingset);
-                    if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & CODEC_FLAG_GRAY))
+                    if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                         continue;
                     v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
                     if (v->rangeredfrm)
@@ -1348,7 +1348,7 @@ static int vc1_decode_p_mb(VC1Context *v)
                 } else if (val) {
                     pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block,
                                              s->dest[dst_idx] + off, (i & 4) ? s->uvlinesize : s->linesize,
-                                             CONFIG_GRAY && (i & 4) && (s->avctx->flags & CODEC_FLAG_GRAY), &block_tt);
+                                             CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), &block_tt);
                     block_cbp |= pat << (i << 2);
                     if (!v->ttmbf && ttmb < 8)
                         ttmb = -1;
@@ -1438,7 +1438,7 @@ static int vc1_decode_p_mb(VC1Context *v)
 
                     vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant,
                                            (i & 4) ? v->codingset2 : v->codingset);
-                    if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & CODEC_FLAG_GRAY))
+                    if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                         continue;
                     v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
                     if (v->rangeredfrm)
@@ -1460,7 +1460,7 @@ static int vc1_decode_p_mb(VC1Context *v)
                     pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
                                              first_block, s->dest[dst_idx] + off,
                                              (i & 4) ? s->uvlinesize : s->linesize,
-                                             CONFIG_GRAY && (i & 4) && (s->avctx->flags & CODEC_FLAG_GRAY),
+                                             CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY),
                                              &block_tt);
                     block_cbp |= pat << (i << 2);
                     if (!v->ttmbf && ttmb < 8)
@@ -1586,7 +1586,7 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
 
                 vc1_decode_intra_block(v, s->block[i], i, val, mquant,
                                        (i & 4) ? v->codingset2 : v->codingset);
-                if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & CODEC_FLAG_GRAY))
+                if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                     continue;
                 v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
                 if (i < 4) {
@@ -1673,7 +1673,7 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
                     pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
                                              first_block, s->dest[dst_idx] + off,
                                              (i & 4) ? s->uvlinesize : (s->linesize << fieldtx),
-                                             CONFIG_GRAY && (i & 4) && (s->avctx->flags & CODEC_FLAG_GRAY), &block_tt);
+                                             CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), &block_tt);
                     block_cbp |= pat << (i << 2);
                     if (!v->ttmbf && ttmb < 8)
                         ttmb = -1;
@@ -1752,7 +1752,7 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
 
             vc1_decode_intra_block(v, s->block[i], i, val, mquant,
                                    (i & 4) ? v->codingset2 : v->codingset);
-            if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & CODEC_FLAG_GRAY))
+            if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                 continue;
             v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
             off  = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
@@ -1806,7 +1806,7 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
                 pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
                                          first_block, s->dest[dst_idx] + off,
                                          (i & 4) ? s->uvlinesize : s->linesize,
-                                         CONFIG_GRAY && (i & 4) && (s->avctx->flags & CODEC_FLAG_GRAY),
+                                         CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY),
                                          &block_tt);
                 block_cbp |= pat << (i << 2);
                 if (!v->ttmbf && ttmb < 8)
@@ -1953,7 +1953,7 @@ static void vc1_decode_b_mb(VC1Context *v)
 
             vc1_decode_intra_block(v, s->block[i], i, val, mquant,
                                    (i & 4) ? v->codingset2 : v->codingset);
-            if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & CODEC_FLAG_GRAY))
+            if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                 continue;
             v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
             if (v->rangeredfrm)
@@ -1967,7 +1967,7 @@ static void vc1_decode_b_mb(VC1Context *v)
             vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
                                first_block, s->dest[dst_idx] + off,
                                (i & 4) ? s->uvlinesize : s->linesize,
-                               CONFIG_GRAY && (i & 4) && (s->avctx->flags & CODEC_FLAG_GRAY), NULL);
+                               CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), NULL);
             if (!v->ttmbf && ttmb < 8)
                 ttmb = -1;
             first_block = 0;
@@ -2028,7 +2028,7 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
 
             vc1_decode_intra_block(v, s->block[i], i, val, mquant,
                                    (i & 4) ? v->codingset2 : v->codingset);
-            if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & CODEC_FLAG_GRAY))
+            if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                 continue;
             v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
             if (v->rangeredfrm)
@@ -2126,7 +2126,7 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
                 vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
                                    first_block, s->dest[dst_idx] + off,
                                    (i & 4) ? s->uvlinesize : s->linesize,
-                                   CONFIG_GRAY && (i & 4) && (s->avctx->flags & CODEC_FLAG_GRAY), NULL);
+                                   CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), NULL);
                 if (!v->ttmbf && ttmb < 8)
                     ttmb = -1;
                 first_block = 0;
@@ -2251,7 +2251,7 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
 
             vc1_decode_intra_block(v, s->block[i], i, val, mquant,
                                    (i & 4) ? v->codingset2 : v->codingset);
-            if (CONFIG_GRAY && i > 3 && (s->avctx->flags & CODEC_FLAG_GRAY))
+            if (CONFIG_GRAY && i > 3 && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                 continue;
             v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
             if (i < 4) {
@@ -2417,7 +2417,7 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
                     pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
                                              first_block, s->dest[dst_idx] + off,
                                              (i & 4) ? s->uvlinesize : (s->linesize << fieldtx),
-                                             CONFIG_GRAY && (i & 4) && (s->avctx->flags & CODEC_FLAG_GRAY), &block_tt);
+                                             CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), &block_tt);
                     block_cbp |= pat << (i << 2);
                     if (!v->ttmbf && ttmb < 8)
                         ttmb = -1;
@@ -2560,7 +2560,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
 
                 vc1_decode_i_block(v, s->block[k], k, val, (k < 4) ? v->codingset : v->codingset2);
 
-                if (CONFIG_GRAY && k > 3 && (s->avctx->flags & CODEC_FLAG_GRAY))
+                if (CONFIG_GRAY && k > 3 && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                     continue;
                 v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
                 if (v->pq >= 9 && v->overlap) {
@@ -2584,7 +2584,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
                 if (s->mb_x) {
                     v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);
                     v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
-                    if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+                    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
                         v->vc1dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
                         v->vc1dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
                     }
@@ -2594,7 +2594,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
                 if (!s->first_slice_line) {
                     v->vc1dsp.vc1_v_overlap(s->dest[0], s->linesize);
                     v->vc1dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
-                    if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+                    if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
                         v->vc1dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
                         v->vc1dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
                     }
@@ -2692,7 +2692,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
             if (v->fieldtx_is_raw)
                 v->fieldtx_plane[mb_pos] = get_bits1(&v->s.gb);
             cbp = get_vlc2(&v->s.gb, ff_msmp4_mb_i_vlc.table, MB_INTRA_VLC_BITS, 2);
-            if ( v->acpred_is_raw)
+            if (v->acpred_is_raw)
                 v->s.ac_pred = get_bits1(&v->s.gb);
             else
                 v->s.ac_pred = v->acpred_plane[mb_pos];
@@ -2723,7 +2723,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
                 vc1_decode_i_block_adv(v, block[k], k, val,
                                        (k < 4) ? v->codingset : v->codingset2, mquant);
 
-                if (CONFIG_GRAY && k > 3 && (s->avctx->flags & CODEC_FLAG_GRAY))
+                if (CONFIG_GRAY && k > 3 && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                     continue;
                 v->vc1dsp.vc1_inv_trans_8x8(block[k]);
             }
diff --git a/libavcodec/vc1_loopfilter.c b/libavcodec/vc1_loopfilter.c
index ad0945ff..025776ba 100644
--- a/libavcodec/vc1_loopfilter.c
+++ b/libavcodec/vc1_loopfilter.c
@@ -40,7 +40,7 @@ void ff_vc1_loop_filter_iblk(VC1Context *v, int pq)
         if (s->mb_x)
             v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
         v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
-        if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))
+        if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
         for (j = 0; j < 2; j++) {
             v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq);
             if (s->mb_x)
@@ -52,7 +52,7 @@ void ff_vc1_loop_filter_iblk(VC1Context *v, int pq)
     if (s->mb_y == s->end_mb_y - 1) {
         if (s->mb_x) {
             v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
-            if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
             v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
             v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
             }
@@ -76,7 +76,7 @@ void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
                 if (s->mb_x >= 2)
                     v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq);
                 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq);
-                if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))
+                if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
                 for (j = 0; j < 2; j++) {
                     v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
                     if (s->mb_x >= 2) {
@@ -94,7 +94,7 @@ void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
                 if (s->mb_x)
                     v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq);
                 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq);
-                if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))
+                if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
                 for (j = 0; j < 2; j++) {
                     v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
                     if (s->mb_x >= 2) {
@@ -110,7 +110,7 @@ void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
                 if (s->mb_x >= 2)
                     v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
                 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq);
-                if (s->mb_x >= 2 && (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))) {
+                if (s->mb_x >= 2 && (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))) {
                     for (j = 0; j < 2; j++) {
                         v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
                     }
@@ -121,7 +121,7 @@ void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
                 if (s->mb_x)
                     v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
                 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
-                if (s->mb_x && (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))) {
+                if (s->mb_x && (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))) {
                     for (j = 0; j < 2; j++) {
                         v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
                     }
@@ -155,7 +155,7 @@ void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v)
                                       v->block[v->cur_blk_idx][0]);
             v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3],
                                       v->block[v->cur_blk_idx][2]);
-            if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
                 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4],
                                           v->block[v->cur_blk_idx][4]);
                 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5],
@@ -174,7 +174,7 @@ void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v)
                                           v->block[v->cur_blk_idx][0]);
                 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3],
                                           v->block[v->cur_blk_idx][1]);
-                if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+                if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
                     v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4],
                                               v->block[v->cur_blk_idx][4]);
                     v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5],
@@ -194,7 +194,7 @@ void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v)
                                       v->block[v->left_blk_idx][0]);
             v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3],
                                       v->block[v->left_blk_idx][1]);
-            if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
+            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
                 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4],
                                           v->block[v->left_blk_idx][4]);
                 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5],
@@ -336,7 +336,7 @@ void ff_vc1_apply_p_loop_filter(VC1Context *v)
 {
     MpegEncContext *s = &v->s;
     int i;
-    int block_count = CONFIG_GRAY && (s->avctx->flags & CODEC_FLAG_GRAY) ? 4 : 6;
+    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
 
     for (i = 0; i < block_count; i++) {
         vc1_apply_p_v_loop_filter(v, i);
diff --git a/libavcodec/vc1_mc.c b/libavcodec/vc1_mc.c
index 53582433..4467646e 100644
--- a/libavcodec/vc1_mc.c
+++ b/libavcodec/vc1_mc.c
@@ -270,7 +270,7 @@ void ff_vc1_mc_1mv(VC1Context *v, int dir)
     }
 
     /* for grayscale we should not try to read from unknown area */
-    if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY) {
+    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY) {
         srcU = s->sc.edge_emu_buffer + 18 * s->linesize;
         srcV = s->sc.edge_emu_buffer + 18 * s->linesize;
     }
@@ -332,7 +332,7 @@ void ff_vc1_mc_1mv(VC1Context *v, int dir)
             s->hdsp.put_no_rnd_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
     }
 
-    if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY)
+    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
         return;
     /* Chroma MC always uses qpel bilinear */
     uvmx = (uvmx & 3) << 1;
@@ -518,7 +518,7 @@ void ff_vc1_mc_4mv_chroma(VC1Context *v, int dir)
 
     if (!v->field_mode && !v->s.last_picture.f->data[0])
         return;
-    if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY)
+    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
         return;
 
     /* calculate chroma MV vector from four luma MVs */
@@ -655,7 +655,7 @@ void ff_vc1_mc_4mv_chroma4(VC1Context *v, int dir, int dir2, int avg)
     int use_ic;
     uint8_t (*lutuv)[256];
 
-    if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY)
+    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
         return;
 
     for (i = 0; i < 4; i++) {
@@ -801,7 +801,7 @@ void ff_vc1_interp_mc(VC1Context *v)
     }
 
     /* for grayscale we should not try to read from unknown area */
-    if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY) {
+    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY) {
         srcU = s->sc.edge_emu_buffer + 18 * s->linesize;
         srcV = s->sc.edge_emu_buffer + 18 * s->linesize;
     }
@@ -865,7 +865,7 @@ void ff_vc1_interp_mc(VC1Context *v)
             s->hdsp.avg_no_rnd_pixels_tab[dxy](s->dest[0], srcY, s->linesize, 16);
     }
 
-    if (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY)
+    if (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY)
         return;
     /* Chroma MC always uses qpel blilinear */
     uvmx = (uvmx & 3) << 1;
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index d7a0cefa..f66afb9d 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -34,6 +34,7 @@
 #include "mpegvideo.h"
 #include "msmpeg4.h"
 #include "msmpeg4data.h"
+#include "profiles.h"
 #include "vc1.h"
 #include "vc1data.h"
 #include "vdpau_compat.h"
@@ -191,7 +192,7 @@ static void vc1_draw_sprites(VC1Context *v, SpriteData* sd)
     }
     alpha = av_clip_uint16(sd->coefs[1][6]);
 
-    for (plane = 0; plane < (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY ? 1 : 3); plane++) {
+    for (plane = 0; plane < (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY ? 1 : 3); plane++) {
         int width = v->output_width>>!!plane;
 
         for (row = 0; row < v->output_height>>!!plane; row++) {
@@ -283,7 +284,7 @@ static int vc1_decode_sprites(VC1Context *v, GetBitContext* gb)
 
     if (!s->current_picture.f || !s->current_picture.f->data[0]) {
         av_log(avctx, AV_LOG_ERROR, "Got no sprites\n");
-        return -1;
+        return AVERROR_UNKNOWN;
     }
 
     if (v->two_sprites && (!s->last_picture_ptr || !s->last_picture.f->data[0])) {
@@ -312,7 +313,7 @@ static void vc1_sprite_flush(AVCodecContext *avctx)
        wrong but it looks better than doing nothing. */
 
     if (f && f->data[0])
-        for (plane = 0; plane < (CONFIG_GRAY && s->avctx->flags & CODEC_FLAG_GRAY ? 1 : 3); plane++)
+        for (plane = 0; plane < (CONFIG_GRAY && s->avctx->flags & AV_CODEC_FLAG_GRAY ? 1 : 3); plane++)
             for (i = 0; i < v->sprite_height>>!!plane; i++)
                 memset(f->data[plane] + i * f->linesize[plane],
                        plane ? 128 : 0, f->linesize[plane]);
@@ -428,30 +429,10 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
 
     if (!avctx->extradata_size || !avctx->extradata)
         return -1;
-    if (!CONFIG_GRAY || !(avctx->flags & CODEC_FLAG_GRAY))
-        avctx->pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts);
-    else {
-        avctx->pix_fmt = AV_PIX_FMT_GRAY8;
-        if (avctx->color_range == AVCOL_RANGE_UNSPECIFIED)
-            avctx->color_range = AVCOL_RANGE_MPEG;
-    }
     v->s.avctx = avctx;
 
     if ((ret = ff_vc1_init_common(v)) < 0)
         return ret;
-    // ensure static VLC tables are initialized
-    if ((ret = ff_msmpeg4_decode_init(avctx)) < 0)
-        return ret;
-    if ((ret = ff_vc1_decode_init_alloc_tables(v)) < 0)
-        return ret;
-    // Hack to ensure the above functions will be called
-    // again once we know all necessary settings.
-    // That this is necessary might indicate a bug.
-    ff_vc1_decode_end(avctx);
-
-    ff_blockdsp_init(&s->bdsp, avctx);
-    ff_h264chroma_init(&v->h264chroma, 8);
-    ff_qpeldsp_init(&s->qdsp);
 
     if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) {
         int count = 0;
@@ -486,7 +467,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
             return -1;
         }
 
-        buf2  = av_mallocz(avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+        buf2  = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!buf2)
             return AVERROR(ENOMEM);
 
@@ -524,14 +505,38 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
         v->res_sprite = (avctx->codec_id == AV_CODEC_ID_VC1IMAGE);
     }
 
-    v->sprite_output_frame = av_frame_alloc();
-    if (!v->sprite_output_frame)
-        return AVERROR(ENOMEM);
-
     avctx->profile = v->profile;
     if (v->profile == PROFILE_ADVANCED)
         avctx->level = v->level;
 
+    if (!CONFIG_GRAY || !(avctx->flags & AV_CODEC_FLAG_GRAY))
+        avctx->pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts);
+    else {
+        avctx->pix_fmt = AV_PIX_FMT_GRAY8;
+        if (avctx->color_range == AVCOL_RANGE_UNSPECIFIED)
+            avctx->color_range = AVCOL_RANGE_MPEG;
+    }
+
+    // ensure static VLC tables are initialized
+    if ((ret = ff_msmpeg4_decode_init(avctx)) < 0)
+        return ret;
+    if ((ret = ff_vc1_decode_init_alloc_tables(v)) < 0)
+        return ret;
+    // Hack to ensure the above functions will be called
+    // again once we know all necessary settings.
+    // That this is necessary might indicate a bug.
+    ff_vc1_decode_end(avctx);
+
+    ff_blockdsp_init(&s->bdsp, avctx);
+    ff_h264chroma_init(&v->h264chroma, 8);
+    ff_qpeldsp_init(&s->qdsp);
+
+    // Must happen after calling ff_vc1_decode_end
+    // to avoid de-allocating the sprite_output_frame
+    v->sprite_output_frame = av_frame_alloc();
+    if (!v->sprite_output_frame)
+        return AVERROR(ENOMEM);
+
     avctx->has_b_frames = !!avctx->max_b_frames;
 
     if (v->color_prim == 1 || v->color_prim == 5 || v->color_prim == 6)
@@ -630,7 +635,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
 
     v->second_field = 0;
 
-    if(s->avctx->flags & CODEC_FLAG_LOW_DELAY)
+    if(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY)
         s->low_delay = 1;
 
     /* no supplementary picture */
@@ -647,17 +652,19 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
         return buf_size;
     }
 
-    if (s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) {
+#if FF_API_CAP_VDPAU
+    if (s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU) {
         if (v->profile < PROFILE_ADVANCED)
             avctx->pix_fmt = AV_PIX_FMT_VDPAU_WMV3;
         else
             avctx->pix_fmt = AV_PIX_FMT_VDPAU_VC1;
     }
+#endif
 
     //for advanced profile we may need to parse and unescape data
     if (avctx->codec_id == AV_CODEC_ID_VC1 || avctx->codec_id == AV_CODEC_ID_VC1IMAGE) {
         int buf_size2 = 0;
-        buf2 = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+        buf2 = av_mallocz(buf_size + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!buf2)
             return AVERROR(ENOMEM);
 
@@ -672,23 +679,33 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                 if (size <= 0) continue;
                 switch (AV_RB32(start)) {
                 case VC1_CODE_FRAME:
-                    if (avctx->hwaccel ||
-                        s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+                    if (avctx->hwaccel
+#if FF_API_CAP_VDPAU
+                        || s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU
+#endif
+                        )
                         buf_start = start;
                     buf_size2 = vc1_unescape_buffer(start + 4, size, buf2);
                     break;
                 case VC1_CODE_FIELD: {
                     int buf_size3;
-                    if (avctx->hwaccel ||
-                        s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+                    if (avctx->hwaccel
+#if FF_API_CAP_VDPAU
+                        || s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU
+#endif
+                        )
                         buf_start_second_field = start;
                     tmp = av_realloc_array(slices, sizeof(*slices), (n_slices+1));
-                    if (!tmp)
+                    if (!tmp) {
+                        ret = AVERROR(ENOMEM);
                         goto err;
+                    }
                     slices = tmp;
-                    slices[n_slices].buf = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
-                    if (!slices[n_slices].buf)
+                    slices[n_slices].buf = av_mallocz(buf_size + AV_INPUT_BUFFER_PADDING_SIZE);
+                    if (!slices[n_slices].buf) {
+                        ret = AVERROR(ENOMEM);
                         goto err;
+                    }
                     buf_size3 = vc1_unescape_buffer(start + 4, size,
                                                     slices[n_slices].buf);
                     init_get_bits(&slices[n_slices].gb, slices[n_slices].buf,
@@ -708,12 +725,16 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                 case VC1_CODE_SLICE: {
                     int buf_size3;
                     tmp = av_realloc_array(slices, sizeof(*slices), (n_slices+1));
-                    if (!tmp)
+                    if (!tmp) {
+                        ret = AVERROR(ENOMEM);
                         goto err;
+                    }
                     slices = tmp;
-                    slices[n_slices].buf = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
-                    if (!slices[n_slices].buf)
+                    slices[n_slices].buf = av_mallocz(buf_size + AV_INPUT_BUFFER_PADDING_SIZE);
+                    if (!slices[n_slices].buf) {
+                        ret = AVERROR(ENOMEM);
                         goto err;
+                    }
                     buf_size3 = vc1_unescape_buffer(start + 4, size,
                                                     slices[n_slices].buf);
                     init_get_bits(&slices[n_slices].gb, slices[n_slices].buf,
@@ -731,18 +752,26 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
             divider = find_next_marker(buf, buf + buf_size);
             if ((divider == (buf + buf_size)) || AV_RB32(divider) != VC1_CODE_FIELD) {
                 av_log(avctx, AV_LOG_ERROR, "Error in WVC1 interlaced frame\n");
+                ret = AVERROR_INVALIDDATA;
                 goto err;
             } else { // found field marker, unescape second field
-                if (avctx->hwaccel ||
-                    s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+                if (avctx->hwaccel
+#if FF_API_CAP_VDPAU
+                    || s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU
+#endif
+                    )
                     buf_start_second_field = divider;
                 tmp = av_realloc_array(slices, sizeof(*slices), (n_slices+1));
-                if (!tmp)
+                if (!tmp) {
+                    ret = AVERROR(ENOMEM);
                     goto err;
+                }
                 slices = tmp;
-                slices[n_slices].buf = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
-                if (!slices[n_slices].buf)
+                slices[n_slices].buf = av_mallocz(buf_size + AV_INPUT_BUFFER_PADDING_SIZE);
+                if (!slices[n_slices].buf) {
+                    ret = AVERROR(ENOMEM);
                     goto err;
+                }
                 buf_size3 = vc1_unescape_buffer(divider + 4, buf + buf_size - divider - 4, slices[n_slices].buf);
                 init_get_bits(&slices[n_slices].gb, slices[n_slices].buf,
                               buf_size3 << 3);
@@ -783,9 +812,9 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
     }
 
     if (!s->context_initialized) {
-        if (ff_msmpeg4_decode_init(avctx) < 0)
+        if ((ret = ff_msmpeg4_decode_init(avctx)) < 0)
             goto err;
-        if (ff_vc1_decode_init_alloc_tables(v) < 0) {
+        if ((ret = ff_vc1_decode_init_alloc_tables(v)) < 0) {
             ff_mpv_common_end(s);
             goto err;
         }
@@ -793,8 +822,10 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
         s->low_delay = !avctx->has_b_frames || v->res_sprite;
 
         if (v->profile == PROFILE_ADVANCED) {
-            if(avctx->coded_width<=1 || avctx->coded_height<=1)
+            if(avctx->coded_width<=1 || avctx->coded_height<=1) {
+                ret = AVERROR_INVALIDDATA;
                 goto err;
+            }
             s->h_edge_pos = avctx->coded_width;
             s->v_edge_pos = avctx->coded_height;
         }
@@ -804,11 +835,11 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
     v->pic_header_flag = 0;
     v->first_pic_header_flag = 1;
     if (v->profile < PROFILE_ADVANCED) {
-        if (ff_vc1_parse_frame_header(v, &s->gb) < 0) {
+        if ((ret = ff_vc1_parse_frame_header(v, &s->gb)) < 0) {
             goto err;
         }
     } else {
-        if (ff_vc1_parse_frame_header_adv(v, &s->gb) < 0) {
+        if ((ret = ff_vc1_parse_frame_header_adv(v, &s->gb)) < 0) {
             goto err;
         }
     }
@@ -820,11 +851,13 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
     if ((avctx->codec_id == AV_CODEC_ID_WMV3IMAGE || avctx->codec_id == AV_CODEC_ID_VC1IMAGE)
         && s->pict_type != AV_PICTURE_TYPE_I) {
         av_log(v->s.avctx, AV_LOG_ERROR, "Sprite decoder: expected I-frame\n");
+        ret = AVERROR_INVALIDDATA;
         goto err;
     }
 
     if ((s->mb_height >> v->field_mode) == 0) {
         av_log(v->s.avctx, AV_LOG_ERROR, "image too short\n");
+        ret = AVERROR_INVALIDDATA;
         goto err;
     }
 
@@ -850,7 +883,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
             s->next_p_frame_damaged = 0;
     }
 
-    if (ff_mpv_frame_start(s, avctx) < 0) {
+    if ((ret = ff_mpv_frame_start(s, avctx)) < 0) {
         goto err;
     }
 
@@ -873,23 +906,26 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
     s->me.qpel_put = s->qdsp.put_qpel_pixels_tab;
     s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab;
 
+#if FF_API_CAP_VDPAU
     if ((CONFIG_VC1_VDPAU_DECODER)
-        &&s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) {
+        &&s->avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU) {
         if (v->field_mode && buf_start_second_field) {
             ff_vdpau_vc1_decode_picture(s, buf_start, buf_start_second_field - buf_start);
             ff_vdpau_vc1_decode_picture(s, buf_start_second_field, (buf + buf_size) - buf_start_second_field);
         } else {
             ff_vdpau_vc1_decode_picture(s, buf_start, (buf + buf_size) - buf_start);
         }
-    } else if (avctx->hwaccel) {
+    } else
+#endif
+    if (avctx->hwaccel) {
         if (v->field_mode && buf_start_second_field) {
             // decode first field
             s->picture_structure = PICT_BOTTOM_FIELD - v->tff;
-            if (avctx->hwaccel->start_frame(avctx, buf_start, buf_start_second_field - buf_start) < 0)
+            if ((ret = avctx->hwaccel->start_frame(avctx, buf_start, buf_start_second_field - buf_start)) < 0)
                 goto err;
-            if (avctx->hwaccel->decode_slice(avctx, buf_start, buf_start_second_field - buf_start) < 0)
+            if ((ret = avctx->hwaccel->decode_slice(avctx, buf_start, buf_start_second_field - buf_start)) < 0)
                 goto err;
-            if (avctx->hwaccel->end_frame(avctx) < 0)
+            if ((ret = avctx->hwaccel->end_frame(avctx)) < 0)
                 goto err;
 
             // decode second field
@@ -899,23 +935,24 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
             v->pic_header_flag = 0;
             if (ff_vc1_parse_frame_header_adv(v, &s->gb) < 0) {
                 av_log(avctx, AV_LOG_ERROR, "parsing header for second field failed");
+                ret = AVERROR_INVALIDDATA;
                 goto err;
             }
             v->s.current_picture_ptr->f->pict_type = v->s.pict_type;
 
-            if (avctx->hwaccel->start_frame(avctx, buf_start_second_field, (buf + buf_size) - buf_start_second_field) < 0)
+            if ((ret = avctx->hwaccel->start_frame(avctx, buf_start_second_field, (buf + buf_size) - buf_start_second_field)) < 0)
                 goto err;
-            if (avctx->hwaccel->decode_slice(avctx, buf_start_second_field, (buf + buf_size) - buf_start_second_field) < 0)
+            if ((ret = avctx->hwaccel->decode_slice(avctx, buf_start_second_field, (buf + buf_size) - buf_start_second_field)) < 0)
                 goto err;
-            if (avctx->hwaccel->end_frame(avctx) < 0)
+            if ((ret = avctx->hwaccel->end_frame(avctx)) < 0)
                 goto err;
         } else {
             s->picture_structure = PICT_FRAME;
-            if (avctx->hwaccel->start_frame(avctx, buf_start, (buf + buf_size) - buf_start) < 0)
+            if ((ret = avctx->hwaccel->start_frame(avctx, buf_start, (buf + buf_size) - buf_start)) < 0)
                 goto err;
-            if (avctx->hwaccel->decode_slice(avctx, buf_start, (buf + buf_size) - buf_start) < 0)
+            if ((ret = avctx->hwaccel->decode_slice(avctx, buf_start, (buf + buf_size) - buf_start)) < 0)
                 goto err;
-            if (avctx->hwaccel->end_frame(avctx) < 0)
+            if ((ret = avctx->hwaccel->end_frame(avctx)) < 0)
                 goto err;
         }
     } else {
@@ -958,6 +995,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                 if (v->field_mode && i == n_slices1 + 2) {
                     if ((header_ret = ff_vc1_parse_frame_header_adv(v, &s->gb)) < 0) {
                         av_log(v->s.avctx, AV_LOG_ERROR, "Field header damaged\n");
+                        ret = AVERROR_INVALIDDATA;
                         if (avctx->err_recognition & AV_EF_EXPLODE)
                             goto err;
                         continue;
@@ -966,6 +1004,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                     v->pic_header_flag = 1;
                     if ((header_ret = ff_vc1_parse_frame_header_adv(v, &s->gb)) < 0) {
                         av_log(v->s.avctx, AV_LOG_ERROR, "Slice header damaged\n");
+                        ret = AVERROR_INVALIDDATA;
                         if (avctx->err_recognition & AV_EF_EXPLODE)
                             goto err;
                         continue;
@@ -1012,8 +1051,10 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                 get_bits_count(&s->gb), s->gb.size_in_bits);
 //  if (get_bits_count(&s->gb) > buf_size * 8)
 //      return -1;
-        if(s->er.error_occurred && s->pict_type == AV_PICTURE_TYPE_B)
+        if(s->er.error_occurred && s->pict_type == AV_PICTURE_TYPE_B) {
+            ret = AVERROR_INVALIDDATA;
             goto err;
+        }
         if (!v->field_mode)
             ff_er_frame_end(&s->er);
     }
@@ -1027,7 +1068,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
         if (avctx->skip_frame >= AVDISCARD_NONREF)
             goto end;
 #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
-        if (vc1_decode_sprites(v, &s->gb))
+        if ((ret = vc1_decode_sprites(v, &s->gb)) < 0)
             goto err;
 #endif
         if ((ret = av_frame_ref(pict, v->sprite_output_frame)) < 0)
@@ -1059,18 +1100,10 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
     for (i = 0; i < n_slices; i++)
         av_free(slices[i].buf);
     av_free(slices);
-    return -1;
+    return ret;
 }
 
 
-static const AVProfile profiles[] = {
-    { FF_PROFILE_VC1_SIMPLE,   "Simple"   },
-    { FF_PROFILE_VC1_MAIN,     "Main"     },
-    { FF_PROFILE_VC1_COMPLEX,  "Complex"  },
-    { FF_PROFILE_VC1_ADVANCED, "Advanced" },
-    { FF_PROFILE_UNKNOWN },
-};
-
 static const enum AVPixelFormat vc1_hwaccel_pixfmt_list_420[] = {
 #if CONFIG_VC1_DXVA2_HWACCEL
     AV_PIX_FMT_DXVA2_VLD,
@@ -1079,7 +1112,7 @@ static const enum AVPixelFormat vc1_hwaccel_pixfmt_list_420[] = {
     AV_PIX_FMT_D3D11VA_VLD,
 #endif
 #if CONFIG_VC1_VAAPI_HWACCEL
-    AV_PIX_FMT_VAAPI_VLD,
+    AV_PIX_FMT_VAAPI,
 #endif
 #if CONFIG_VC1_VDPAU_HWACCEL
     AV_PIX_FMT_VDPAU,
@@ -1098,9 +1131,9 @@ AVCodec ff_vc1_decoder = {
     .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
     .flush          = ff_mpeg_flush,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .pix_fmts       = vc1_hwaccel_pixfmt_list_420,
-    .profiles       = NULL_IF_CONFIG_SMALL(profiles)
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_vc1_profiles)
 };
 
 #if CONFIG_WMV3_DECODER
@@ -1114,13 +1147,13 @@ AVCodec ff_wmv3_decoder = {
     .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
     .flush          = ff_mpeg_flush,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .pix_fmts       = vc1_hwaccel_pixfmt_list_420,
-    .profiles       = NULL_IF_CONFIG_SMALL(profiles)
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_vc1_profiles)
 };
 #endif
 
-#if CONFIG_WMV3_VDPAU_DECODER
+#if CONFIG_WMV3_VDPAU_DECODER && FF_API_VDPAU
 AVCodec ff_wmv3_vdpau_decoder = {
     .name           = "wmv3_vdpau",
     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 9 VDPAU"),
@@ -1130,13 +1163,13 @@ AVCodec ff_wmv3_vdpau_decoder = {
     .init           = vc1_decode_init,
     .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HWACCEL_VDPAU,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_VDPAU_WMV3, AV_PIX_FMT_NONE },
-    .profiles       = NULL_IF_CONFIG_SMALL(profiles)
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_vc1_profiles)
 };
 #endif
 
-#if CONFIG_VC1_VDPAU_DECODER
+#if CONFIG_VC1_VDPAU_DECODER && FF_API_VDPAU
 AVCodec ff_vc1_vdpau_decoder = {
     .name           = "vc1_vdpau",
     .long_name      = NULL_IF_CONFIG_SMALL("SMPTE VC-1 VDPAU"),
@@ -1146,9 +1179,9 @@ AVCodec ff_vc1_vdpau_decoder = {
     .init           = vc1_decode_init,
     .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HWACCEL_VDPAU,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_VDPAU_VC1, AV_PIX_FMT_NONE },
-    .profiles       = NULL_IF_CONFIG_SMALL(profiles)
+    .profiles       = NULL_IF_CONFIG_SMALL(ff_vc1_profiles)
 };
 #endif
 
@@ -1162,7 +1195,7 @@ AVCodec ff_wmv3image_decoder = {
     .init           = vc1_decode_init,
     .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = vc1_sprite_flush,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
@@ -1181,7 +1214,7 @@ AVCodec ff_vc1image_decoder = {
     .init           = vc1_decode_init,
     .close          = ff_vc1_decode_end,
     .decode         = vc1_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .flush          = vc1_sprite_flush,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c
new file mode 100644
index 00000000..bf3f3a98
--- /dev/null
+++ b/libavcodec/vc2enc.c
@@ -0,0 +1,1196 @@
+/*
+ * Copyright (C) 2016 Open Broadcast Systems Ltd.
+ * Author        2016 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/ffversion.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+#include "dirac.h"
+#include "put_bits.h"
+#include "internal.h"
+
+#include "vc2enc_dwt.h"
+#include "diractab.h"
+
+/* Quantizations above this usually zero coefficients and lower the quality */
+#define MAX_QUANT_INDEX 100
+
+#define COEF_LUT_TAB 2048
+
+enum VC2_QM {
+    VC2_QM_DEF = 0,
+    VC2_QM_COL,
+    VC2_QM_FLAT,
+
+    VC2_QM_NB
+};
+
+typedef struct SubBand {
+    dwtcoef *buf;
+    ptrdiff_t stride;
+    int width;
+    int height;
+} SubBand;
+
+typedef struct Plane {
+    SubBand band[MAX_DWT_LEVELS][4];
+    dwtcoef *coef_buf;
+    int width;
+    int height;
+    int dwt_width;
+    int dwt_height;
+    ptrdiff_t coef_stride;
+} Plane;
+
+typedef struct SliceArgs {
+    PutBitContext pb;
+    void *ctx;
+    int x;
+    int y;
+    int quant_idx;
+    int bits_ceil;
+    int bytes;
+} SliceArgs;
+
+typedef struct TransformArgs {
+    void *ctx;
+    Plane *plane;
+    void *idata;
+    ptrdiff_t istride;
+    int field;
+    VC2TransformContext t;
+} TransformArgs;
+
+typedef struct VC2EncContext {
+    AVClass *av_class;
+    PutBitContext pb;
+    Plane plane[3];
+    AVCodecContext *avctx;
+    DiracVersionInfo ver;
+
+    SliceArgs *slice_args;
+    TransformArgs transform_args[3];
+
+    /* For conversion from unsigned pixel values to signed */
+    int diff_offset;
+    int bpp;
+
+    /* Picture number */
+    uint32_t picture_number;
+
+    /* Base video format */
+    int base_vf;
+    int level;
+    int profile;
+
+    /* Quantization matrix */
+    uint8_t quant[MAX_DWT_LEVELS][4];
+
+    /* Coefficient LUT */
+    uint32_t *coef_lut_val;
+    uint8_t  *coef_lut_len;
+
+    int num_x; /* #slices horizontally */
+    int num_y; /* #slices vertically */
+    int prefix_bytes;
+    int size_scaler;
+    int chroma_x_shift;
+    int chroma_y_shift;
+
+    /* Rate control stuff */
+    int slice_max_bytes;
+    int q_ceil;
+    int q_start;
+
+    /* Options */
+    double tolerance;
+    int wavelet_idx;
+    int wavelet_depth;
+    int strict_compliance;
+    int slice_height;
+    int slice_width;
+    int interlaced;
+    enum VC2_QM quant_matrix;
+
+    /* Parse code state */
+    uint32_t next_parse_offset;
+    enum DiracParseCodes last_parse_code;
+} VC2EncContext;
+
+static av_always_inline void put_padding(PutBitContext *pb, int bytes)
+{
+    int bits = bytes*8;
+    if (!bits)
+        return;
+    while (bits > 31) {
+        put_bits(pb, 31, 0);
+        bits -= 31;
+    }
+    if (bits)
+        put_bits(pb, bits, 0);
+}
+
+static av_always_inline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val)
+{
+    int i;
+    int pbits = 0, bits = 0, topbit = 1, maxval = 1;
+
+    if (!val++) {
+        put_bits(pb, 1, 1);
+        return;
+    }
+
+    while (val > maxval) {
+        topbit <<= 1;
+        maxval <<= 1;
+        maxval |=  1;
+    }
+
+    bits = ff_log2(topbit);
+
+    for (i = 0; i < bits; i++) {
+        topbit >>= 1;
+        pbits <<= 2;
+        if (val & topbit)
+            pbits |= 0x1;
+    }
+
+    put_bits(pb, bits*2 + 1, (pbits << 1) | 1);
+}
+
+static av_always_inline int count_vc2_ue_uint(uint16_t val)
+{
+    int topbit = 1, maxval = 1;
+
+    if (!val++)
+        return 1;
+
+    while (val > maxval) {
+        topbit <<= 1;
+        maxval <<= 1;
+        maxval |=  1;
+    }
+
+    return ff_log2(topbit)*2 + 1;
+}
+
+static av_always_inline void get_vc2_ue_uint(uint16_t val, uint8_t *nbits,
+                                               uint32_t *eval)
+{
+    int i;
+    int pbits = 0, bits = 0, topbit = 1, maxval = 1;
+
+    if (!val++) {
+        *nbits = 1;
+        *eval = 1;
+        return;
+    }
+
+    while (val > maxval) {
+        topbit <<= 1;
+        maxval <<= 1;
+        maxval |=  1;
+    }
+
+    bits = ff_log2(topbit);
+
+    for (i = 0; i < bits; i++) {
+        topbit >>= 1;
+        pbits <<= 2;
+        if (val & topbit)
+            pbits |= 0x1;
+    }
+
+    *nbits = bits*2 + 1;
+    *eval = (pbits << 1) | 1;
+}
+
+/* VC-2 10.4 - parse_info() */
+static void encode_parse_info(VC2EncContext *s, enum DiracParseCodes pcode)
+{
+    uint32_t cur_pos, dist;
+
+    avpriv_align_put_bits(&s->pb);
+
+    cur_pos = put_bits_count(&s->pb) >> 3;
+
+    /* Magic string */
+    avpriv_put_string(&s->pb, "BBCD", 0);
+
+    /* Parse code */
+    put_bits(&s->pb, 8, pcode);
+
+    /* Next parse offset */
+    dist = cur_pos - s->next_parse_offset;
+    AV_WB32(s->pb.buf + s->next_parse_offset + 5, dist);
+    s->next_parse_offset = cur_pos;
+    put_bits32(&s->pb, pcode == DIRAC_PCODE_END_SEQ ? 13 : 0);
+
+    /* Last parse offset */
+    put_bits32(&s->pb, s->last_parse_code == DIRAC_PCODE_END_SEQ ? 13 : dist);
+
+    s->last_parse_code = pcode;
+}
+
+/* VC-2 11.1 - parse_parameters()
+ * The level dictates what the decoder should expect in terms of resolution
+ * and allows it to quickly reject whatever it can't support. Remember,
+ * this codec kinda targets cheapo FPGAs without much memory. Unfortunately
+ * it also limits us greatly in our choice of formats, hence the flag to disable
+ * strict_compliance */
+static void encode_parse_params(VC2EncContext *s)
+{
+    put_vc2_ue_uint(&s->pb, s->ver.major); /* VC-2 demands this to be 2 */
+    put_vc2_ue_uint(&s->pb, s->ver.minor); /* ^^ and this to be 0       */
+    put_vc2_ue_uint(&s->pb, s->profile);   /* 3 to signal HQ profile    */
+    put_vc2_ue_uint(&s->pb, s->level);     /* 3 - 1080/720, 6 - 4K      */
+}
+
+/* VC-2 11.3 - frame_size() */
+static void encode_frame_size(VC2EncContext *s)
+{
+    put_bits(&s->pb, 1, !s->strict_compliance);
+    if (!s->strict_compliance) {
+        AVCodecContext *avctx = s->avctx;
+        put_vc2_ue_uint(&s->pb, avctx->width);
+        put_vc2_ue_uint(&s->pb, avctx->height);
+    }
+}
+
+/* VC-2 11.3.3 - color_diff_sampling_format() */
+static void encode_sample_fmt(VC2EncContext *s)
+{
+    put_bits(&s->pb, 1, !s->strict_compliance);
+    if (!s->strict_compliance) {
+        int idx;
+        if (s->chroma_x_shift == 1 && s->chroma_y_shift == 0)
+            idx = 1; /* 422 */
+        else if (s->chroma_x_shift == 1 && s->chroma_y_shift == 1)
+            idx = 2; /* 420 */
+        else
+            idx = 0; /* 444 */
+        put_vc2_ue_uint(&s->pb, idx);
+    }
+}
+
+/* VC-2 11.3.4 - scan_format() */
+static void encode_scan_format(VC2EncContext *s)
+{
+    put_bits(&s->pb, 1, !s->strict_compliance);
+    if (!s->strict_compliance)
+        put_vc2_ue_uint(&s->pb, s->interlaced);
+}
+
+/* VC-2 11.3.5 - frame_rate() */
+static void encode_frame_rate(VC2EncContext *s)
+{
+    put_bits(&s->pb, 1, !s->strict_compliance);
+    if (!s->strict_compliance) {
+        AVCodecContext *avctx = s->avctx;
+        put_vc2_ue_uint(&s->pb, 0);
+        put_vc2_ue_uint(&s->pb, avctx->time_base.den);
+        put_vc2_ue_uint(&s->pb, avctx->time_base.num);
+    }
+}
+
+/* VC-2 11.3.6 - aspect_ratio() */
+static void encode_aspect_ratio(VC2EncContext *s)
+{
+    put_bits(&s->pb, 1, !s->strict_compliance);
+    if (!s->strict_compliance) {
+        AVCodecContext *avctx = s->avctx;
+        put_vc2_ue_uint(&s->pb, 0);
+        put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.num);
+        put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.den);
+    }
+}
+
+/* VC-2 11.3.7 - clean_area() */
+static void encode_clean_area(VC2EncContext *s)
+{
+    put_bits(&s->pb, 1, 0);
+}
+
+/* VC-2 11.3.8 - signal_range() */
+static void encode_signal_range(VC2EncContext *s)
+{
+    int idx;
+    AVCodecContext *avctx = s->avctx;
+    const AVPixFmtDescriptor *fmt = av_pix_fmt_desc_get(avctx->pix_fmt);
+    const int depth = fmt->comp[0].depth;
+    if (depth == 8 && avctx->color_range == AVCOL_RANGE_JPEG) {
+        idx = 1;
+        s->bpp = 1;
+        s->diff_offset = 128;
+    } else if (depth == 8 && (avctx->color_range == AVCOL_RANGE_MPEG ||
+               avctx->color_range == AVCOL_RANGE_UNSPECIFIED)) {
+        idx = 2;
+        s->bpp = 1;
+        s->diff_offset = 128;
+    } else if (depth == 10) {
+        idx = 3;
+        s->bpp = 2;
+        s->diff_offset = 512;
+    } else {
+        idx = 4;
+        s->bpp = 2;
+        s->diff_offset = 2048;
+    }
+    put_bits(&s->pb, 1, !s->strict_compliance);
+    if (!s->strict_compliance)
+        put_vc2_ue_uint(&s->pb, idx);
+}
+
+/* VC-2 11.3.9 - color_spec() */
+static void encode_color_spec(VC2EncContext *s)
+{
+    AVCodecContext *avctx = s->avctx;
+    put_bits(&s->pb, 1, !s->strict_compliance);
+    if (!s->strict_compliance) {
+        int val;
+        put_vc2_ue_uint(&s->pb, 0);
+
+        /* primaries */
+        put_bits(&s->pb, 1, 1);
+        if (avctx->color_primaries == AVCOL_PRI_BT470BG)
+            val = 2;
+        else if (avctx->color_primaries == AVCOL_PRI_SMPTE170M)
+            val = 1;
+        else if (avctx->color_primaries == AVCOL_PRI_SMPTE240M)
+            val = 1;
+        else
+            val = 0;
+        put_vc2_ue_uint(&s->pb, val);
+
+        /* color matrix */
+        put_bits(&s->pb, 1, 1);
+        if (avctx->colorspace == AVCOL_SPC_RGB)
+            val = 3;
+        else if (avctx->colorspace == AVCOL_SPC_YCOCG)
+            val = 2;
+        else if (avctx->colorspace == AVCOL_SPC_BT470BG)
+            val = 1;
+        else
+            val = 0;
+        put_vc2_ue_uint(&s->pb, val);
+
+        /* transfer function */
+        put_bits(&s->pb, 1, 1);
+        if (avctx->color_trc == AVCOL_TRC_LINEAR)
+            val = 2;
+        else if (avctx->color_trc == AVCOL_TRC_BT1361_ECG)
+            val = 1;
+        else
+            val = 0;
+        put_vc2_ue_uint(&s->pb, val);
+    }
+}
+
+/* VC-2 11.3 - source_parameters() */
+static void encode_source_params(VC2EncContext *s)
+{
+    encode_frame_size(s);
+    encode_sample_fmt(s);
+    encode_scan_format(s);
+    encode_frame_rate(s);
+    encode_aspect_ratio(s);
+    encode_clean_area(s);
+    encode_signal_range(s);
+    encode_color_spec(s);
+}
+
+/* VC-2 11 - sequence_header() */
+static void encode_seq_header(VC2EncContext *s)
+{
+    avpriv_align_put_bits(&s->pb);
+    encode_parse_params(s);
+    put_vc2_ue_uint(&s->pb, s->base_vf);
+    encode_source_params(s);
+    put_vc2_ue_uint(&s->pb, s->interlaced); /* Frames or fields coding */
+}
+
+/* VC-2 12.1 - picture_header() */
+static void encode_picture_header(VC2EncContext *s)
+{
+    avpriv_align_put_bits(&s->pb);
+    put_bits32(&s->pb, s->picture_number++);
+}
+
+/* VC-2 12.3.4.1 - slice_parameters() */
+static void encode_slice_params(VC2EncContext *s)
+{
+    put_vc2_ue_uint(&s->pb, s->num_x);
+    put_vc2_ue_uint(&s->pb, s->num_y);
+    put_vc2_ue_uint(&s->pb, s->prefix_bytes);
+    put_vc2_ue_uint(&s->pb, s->size_scaler);
+}
+
+/* 1st idx = LL, second - vertical, third - horizontal, fourth - total */
+const uint8_t vc2_qm_col_tab[][4] = {
+    {20,  9, 15,  4},
+    { 0,  6,  6,  4},
+    { 0,  3,  3,  5},
+    { 0,  3,  5,  1},
+    { 0, 11, 10, 11}
+};
+
+const uint8_t vc2_qm_flat_tab[][4] = {
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0},
+    { 0,  0,  0,  0}
+};
+
+static void init_custom_qm(VC2EncContext *s)
+{
+    int level, orientation;
+
+    if (s->quant_matrix == VC2_QM_DEF) {
+        for (level = 0; level < s->wavelet_depth; level++) {
+            for (orientation = 0; orientation < 4; orientation++) {
+                if (level <= 3)
+                    s->quant[level][orientation] = ff_dirac_default_qmat[s->wavelet_idx][level][orientation];
+                else
+                    s->quant[level][orientation] = vc2_qm_col_tab[level][orientation];
+            }
+        }
+    } else if (s->quant_matrix == VC2_QM_COL) {
+        for (level = 0; level < s->wavelet_depth; level++) {
+            for (orientation = 0; orientation < 4; orientation++) {
+                s->quant[level][orientation] = vc2_qm_col_tab[level][orientation];
+            }
+        }
+    } else {
+        for (level = 0; level < s->wavelet_depth; level++) {
+            for (orientation = 0; orientation < 4; orientation++) {
+                s->quant[level][orientation] = vc2_qm_flat_tab[level][orientation];
+            }
+        }
+    }
+}
+
+/* VC-2 12.3.4.2 - quant_matrix() */
+static void encode_quant_matrix(VC2EncContext *s)
+{
+    int level, custom_quant_matrix = 0;
+    if (s->wavelet_depth > 4 || s->quant_matrix != VC2_QM_DEF)
+        custom_quant_matrix = 1;
+    put_bits(&s->pb, 1, custom_quant_matrix);
+    if (custom_quant_matrix) {
+        init_custom_qm(s);
+        put_vc2_ue_uint(&s->pb, s->quant[0][0]);
+        for (level = 0; level < s->wavelet_depth; level++) {
+            put_vc2_ue_uint(&s->pb, s->quant[level][1]);
+            put_vc2_ue_uint(&s->pb, s->quant[level][2]);
+            put_vc2_ue_uint(&s->pb, s->quant[level][3]);
+        }
+    } else {
+        for (level = 0; level < s->wavelet_depth; level++) {
+            s->quant[level][0] = ff_dirac_default_qmat[s->wavelet_idx][level][0];
+            s->quant[level][1] = ff_dirac_default_qmat[s->wavelet_idx][level][1];
+            s->quant[level][2] = ff_dirac_default_qmat[s->wavelet_idx][level][2];
+            s->quant[level][3] = ff_dirac_default_qmat[s->wavelet_idx][level][3];
+        }
+    }
+}
+
+/* VC-2 12.3 - transform_parameters() */
+static void encode_transform_params(VC2EncContext *s)
+{
+    put_vc2_ue_uint(&s->pb, s->wavelet_idx);
+    put_vc2_ue_uint(&s->pb, s->wavelet_depth);
+
+    encode_slice_params(s);
+    encode_quant_matrix(s);
+}
+
+/* VC-2 12.2 - wavelet_transform() */
+static void encode_wavelet_transform(VC2EncContext *s)
+{
+    encode_transform_params(s);
+    avpriv_align_put_bits(&s->pb);
+    /* Continued after DWT in encode_transform_data() */
+}
+
+/* VC-2 12 - picture_parse() */
+static void encode_picture_start(VC2EncContext *s)
+{
+    avpriv_align_put_bits(&s->pb);
+    encode_picture_header(s);
+    avpriv_align_put_bits(&s->pb);
+    encode_wavelet_transform(s);
+}
+
+#define QUANT(c)  \
+    c <<= 2;      \
+    c /= qfactor; \
+
+static av_always_inline void coeff_quantize_get(qcoef coeff, int qfactor,
+                                                uint8_t *len, uint32_t *eval)
+{
+    QUANT(coeff)
+    get_vc2_ue_uint(abs(coeff), len, eval);
+    if (coeff) {
+        *eval = (*eval << 1) | (coeff < 0);
+        *len += 1;
+    }
+}
+
+static av_always_inline void coeff_quantize_encode(PutBitContext *pb, qcoef coeff,
+                                                   int qfactor)
+{
+    QUANT(coeff)
+    put_vc2_ue_uint(pb, abs(coeff));
+    if (coeff)
+        put_bits(pb, 1, coeff < 0);
+}
+
+/* VC-2 13.5.5.2 - slice_band() */
+static void encode_subband(VC2EncContext *s, PutBitContext *pb, int sx, int sy,
+                           SubBand *b, int quant)
+{
+    int x, y;
+
+    int left   = b->width  * (sx+0) / s->num_x;
+    int right  = b->width  * (sx+1) / s->num_x;
+    int top    = b->height * (sy+0) / s->num_y;
+    int bottom = b->height * (sy+1) / s->num_y;
+
+    int qfactor = ff_dirac_qscale_tab[quant];
+    uint8_t  *len_lut = &s->coef_lut_len[2*quant*COEF_LUT_TAB + COEF_LUT_TAB];
+    uint32_t *val_lut = &s->coef_lut_val[2*quant*COEF_LUT_TAB + COEF_LUT_TAB];
+
+    dwtcoef *coeff = b->buf + top * b->stride;
+
+    for (y = top; y < bottom; y++) {
+        for (x = left; x < right; x++) {
+            if (coeff[x] >= -COEF_LUT_TAB && coeff[x] < COEF_LUT_TAB)
+                put_bits(pb, len_lut[coeff[x]], val_lut[coeff[x]]);
+            else
+                coeff_quantize_encode(pb, coeff[x], qfactor);
+        }
+        coeff += b->stride;
+    }
+}
+
+static int count_hq_slice(VC2EncContext *s, int slice_x,
+                          int slice_y, int quant_idx)
+{
+    int x, y, left, right, top, bottom, qfactor;
+    uint8_t quants[MAX_DWT_LEVELS][4];
+    int bits = 0, p, level, orientation;
+
+    bits += 8*s->prefix_bytes;
+    bits += 8; /* quant_idx */
+
+    for (level = 0; level < s->wavelet_depth; level++)
+        for (orientation = !!level; orientation < 4; orientation++)
+            quants[level][orientation] = FFMAX(quant_idx - s->quant[level][orientation], 0);
+
+    for (p = 0; p < 3; p++) {
+        int bytes_start, bytes_len, pad_s, pad_c;
+        bytes_start = bits >> 3;
+        bits += 8;
+        for (level = 0; level < s->wavelet_depth; level++) {
+            for (orientation = !!level; orientation < 4; orientation++) {
+                dwtcoef *buf;
+                SubBand *b = &s->plane[p].band[level][orientation];
+
+                quant_idx = quants[level][orientation];
+                qfactor = ff_dirac_qscale_tab[quant_idx];
+
+                left   = b->width  * slice_x    / s->num_x;
+                right  = b->width  *(slice_x+1) / s->num_x;
+                top    = b->height * slice_y    / s->num_y;
+                bottom = b->height *(slice_y+1) / s->num_y;
+
+                buf = b->buf + top * b->stride;
+
+                for (y = top; y < bottom; y++) {
+                    for (x = left; x < right; x++) {
+                        qcoef coeff = (qcoef)buf[x];
+                        if (coeff >= -COEF_LUT_TAB && coeff < COEF_LUT_TAB) {
+                            bits += s->coef_lut_len[2*quant_idx*COEF_LUT_TAB + coeff + COEF_LUT_TAB];
+                        } else {
+                            QUANT(coeff)
+                            bits += count_vc2_ue_uint(abs(coeff));
+                            bits += !!coeff;
+                        }
+                    }
+                    buf += b->stride;
+                }
+            }
+        }
+        bits += FFALIGN(bits, 8) - bits;
+        bytes_len = (bits >> 3) - bytes_start - 1;
+        pad_s = FFALIGN(bytes_len, s->size_scaler)/s->size_scaler;
+        pad_c = (pad_s*s->size_scaler) - bytes_len;
+        bits += pad_c*8;
+    }
+
+    return bits;
+}
+
+/* Approaches the best possible quantizer asymptotically, its kinda exaustive
+ * but we have a LUT to get the coefficient size in bits. Guaranteed to never
+ * overshoot, which is apparently very important when streaming */
+static int rate_control(AVCodecContext *avctx, void *arg)
+{
+    SliceArgs *slice_dat = arg;
+    VC2EncContext *s = slice_dat->ctx;
+    const int sx = slice_dat->x;
+    const int sy = slice_dat->y;
+    int bits_last = INT_MAX, quant_buf[2] = {-1, -1};
+    int quant = s->q_start, range = s->q_start/3;
+    const int64_t top = slice_dat->bits_ceil;
+    const double percent = s->tolerance;
+    const double bottom = top - top*(percent/100.0f);
+    int bits = count_hq_slice(s, sx, sy, quant);
+    range -= range & 1; /* Make it an even number */
+    while ((bits > top) || (bits < bottom)) {
+        range *= bits > top ? +1 : -1;
+        quant = av_clip(quant + range, 0, s->q_ceil);
+        bits = count_hq_slice(s, sx, sy, quant);
+        range = av_clip(range/2, 1, s->q_ceil);
+        if (quant_buf[1] == quant) {
+            quant = bits_last < bits ? quant_buf[0] : quant;
+            bits  = bits_last < bits ? bits_last : bits;
+            break;
+        }
+        quant_buf[1] = quant_buf[0];
+        quant_buf[0] = quant;
+        bits_last = bits;
+    }
+    slice_dat->quant_idx = av_clip(quant, 0, s->q_ceil);
+    slice_dat->bytes = FFALIGN((bits >> 3), s->size_scaler) + 4 + s->prefix_bytes;
+
+    return 0;
+}
+
+static void calc_slice_sizes(VC2EncContext *s)
+{
+    int slice_x, slice_y;
+    SliceArgs *enc_args = s->slice_args;
+
+    for (slice_y = 0; slice_y < s->num_y; slice_y++) {
+        for (slice_x = 0; slice_x < s->num_x; slice_x++) {
+            SliceArgs *args = &enc_args[s->num_x*slice_y + slice_x];
+            args->ctx = s;
+            args->x = slice_x;
+            args->y = slice_y;
+            args->bits_ceil = s->slice_max_bytes << 3;
+        }
+    }
+
+    /* Determine quantization indices and bytes per slice */
+    s->avctx->execute(s->avctx, rate_control, enc_args, NULL, s->num_x*s->num_y,
+                      sizeof(SliceArgs));
+}
+
+/* VC-2 13.5.3 - hq_slice */
+static int encode_hq_slice(AVCodecContext *avctx, void *arg)
+{
+    SliceArgs *slice_dat = arg;
+    VC2EncContext *s = slice_dat->ctx;
+    PutBitContext *pb = &slice_dat->pb;
+    const int slice_x = slice_dat->x;
+    const int slice_y = slice_dat->y;
+    const int quant_idx = slice_dat->quant_idx;
+    const int slice_bytes_max = slice_dat->bytes;
+    uint8_t quants[MAX_DWT_LEVELS][4];
+    int p, level, orientation;
+
+    avpriv_align_put_bits(pb);
+    put_padding(pb, s->prefix_bytes);
+    put_bits(pb, 8, quant_idx);
+
+    /* Slice quantization (slice_quantizers() in the specs) */
+    for (level = 0; level < s->wavelet_depth; level++)
+        for (orientation = !!level; orientation < 4; orientation++)
+            quants[level][orientation] = FFMAX(quant_idx - s->quant[level][orientation], 0);
+
+    /* Luma + 2 Chroma planes */
+    for (p = 0; p < 3; p++) {
+        int bytes_start, bytes_len, pad_s, pad_c;
+        bytes_start = put_bits_count(pb) >> 3;
+        put_bits(pb, 8, 0);
+        for (level = 0; level < s->wavelet_depth; level++) {
+            for (orientation = !!level; orientation < 4; orientation++) {
+                encode_subband(s, pb, slice_x, slice_y,
+                               &s->plane[p].band[level][orientation],
+                               quants[level][orientation]);
+            }
+        }
+        avpriv_align_put_bits(pb);
+        bytes_len = (put_bits_count(pb) >> 3) - bytes_start - 1;
+        if (p == 2) {
+            int len_diff = slice_bytes_max - (put_bits_count(pb) >> 3);
+            pad_s = FFALIGN((bytes_len + len_diff), s->size_scaler)/s->size_scaler;
+            pad_c = (pad_s*s->size_scaler) - bytes_len;
+        } else {
+            pad_s = FFALIGN(bytes_len, s->size_scaler)/s->size_scaler;
+            pad_c = (pad_s*s->size_scaler) - bytes_len;
+        }
+        pb->buf[bytes_start] = pad_s;
+        put_padding(pb, pad_c);
+    }
+
+    return 0;
+}
+
+/* VC-2 13.5.1 - low_delay_transform_data() */
+static int encode_slices(VC2EncContext *s)
+{
+    uint8_t *buf;
+    int slice_x, slice_y, skip = 0;
+    SliceArgs *enc_args = s->slice_args;
+
+    avpriv_align_put_bits(&s->pb);
+    flush_put_bits(&s->pb);
+    buf = put_bits_ptr(&s->pb);
+
+    for (slice_y = 0; slice_y < s->num_y; slice_y++) {
+        for (slice_x = 0; slice_x < s->num_x; slice_x++) {
+            SliceArgs *args = &enc_args[s->num_x*slice_y + slice_x];
+            init_put_bits(&args->pb, buf + skip, args->bytes);
+            s->q_start = (s->q_start + args->quant_idx)/2;
+            skip += args->bytes;
+        }
+    }
+
+    s->avctx->execute(s->avctx, encode_hq_slice, enc_args, NULL, s->num_x*s->num_y,
+                      sizeof(SliceArgs));
+
+    skip_put_bytes(&s->pb, skip);
+
+    return 0;
+}
+
+/*
+ * Transform basics for a 3 level transform
+ * |---------------------------------------------------------------------|
+ * |  LL-0  | HL-0  |                 |                                  |
+ * |--------|-------|      HL-1       |                                  |
+ * |  LH-0  | HH-0  |                 |                                  |
+ * |----------------|-----------------|              HL-2                |
+ * |                |                 |                                  |
+ * |     LH-1       |      HH-1       |                                  |
+ * |                |                 |                                  |
+ * |----------------------------------|----------------------------------|
+ * |                                  |                                  |
+ * |                                  |                                  |
+ * |                                  |                                  |
+ * |              LH-2                |              HH-2                |
+ * |                                  |                                  |
+ * |                                  |                                  |
+ * |                                  |                                  |
+ * |---------------------------------------------------------------------|
+ *
+ * DWT transforms are generally applied by splitting the image in two vertically
+ * and applying a low pass transform on the left part and a corresponding high
+ * pass transform on the right hand side. This is known as the horizontal filter
+ * stage.
+ * After that, the same operation is performed except the image is divided
+ * horizontally, with the high pass on the lower and the low pass on the higher
+ * side.
+ * Therefore, you're left with 4 subdivisions - known as  low-low, low-high,
+ * high-low and high-high. They're referred to as orientations in the decoder
+ * and encoder.
+ *
+ * The LL (low-low) area contains the original image downsampled by the amount
+ * of levels. The rest of the areas can be thought as the details needed
+ * to restore the image perfectly to its original size.
+ */
+
+
+static int dwt_plane(AVCodecContext *avctx, void *arg)
+{
+    TransformArgs *transform_dat = arg;
+    VC2EncContext *s = transform_dat->ctx;
+    const void *frame_data = transform_dat->idata;
+    const ptrdiff_t linesize = transform_dat->istride;
+    const int field = transform_dat->field;
+    const Plane *p = transform_dat->plane;
+    VC2TransformContext *t = &transform_dat->t;
+    dwtcoef *buf = p->coef_buf;
+    const int idx = s->wavelet_idx;
+    const int skip = 1 + s->interlaced;
+
+    int x, y, level, offset;
+    ptrdiff_t pix_stride = linesize >> (s->bpp - 1);
+
+    if (field == 1) {
+        offset = 0;
+        pix_stride <<= 1;
+    } else if (field == 2) {
+        offset = pix_stride;
+        pix_stride <<= 1;
+    } else {
+        offset = 0;
+    }
+
+    if (s->bpp == 1) {
+        const uint8_t *pix = (const uint8_t *)frame_data + offset;
+        for (y = 0; y < p->height*skip; y+=skip) {
+            for (x = 0; x < p->width; x++) {
+                buf[x] = pix[x] - s->diff_offset;
+            }
+            buf += p->coef_stride;
+            pix += pix_stride;
+        }
+    } else {
+        const uint16_t *pix = (const uint16_t *)frame_data + offset;
+        for (y = 0; y < p->height*skip; y+=skip) {
+            for (x = 0; x < p->width; x++) {
+                buf[x] = pix[x] - s->diff_offset;
+            }
+            buf += p->coef_stride;
+            pix += pix_stride;
+        }
+    }
+
+    memset(buf, 0, p->coef_stride * (p->dwt_height - p->height) * sizeof(dwtcoef));
+
+    for (level = s->wavelet_depth-1; level >= 0; level--) {
+        const SubBand *b = &p->band[level][0];
+        t->vc2_subband_dwt[idx](t, p->coef_buf, p->coef_stride,
+                                b->width, b->height);
+    }
+
+    return 0;
+}
+
+static void encode_frame(VC2EncContext *s, const AVFrame *frame,
+                         const char *aux_data, int field)
+{
+    int i;
+
+    /* Sequence header */
+    encode_parse_info(s, DIRAC_PCODE_SEQ_HEADER);
+    encode_seq_header(s);
+
+    /* Encoder version */
+    if (aux_data) {
+        encode_parse_info(s, DIRAC_PCODE_AUX);
+        avpriv_put_string(&s->pb, aux_data, 1);
+    }
+
+    /* Picture header */
+    encode_parse_info(s, DIRAC_PCODE_PICTURE_HQ);
+    encode_picture_start(s);
+
+    for (i = 0; i < 3; i++) {
+        s->transform_args[i].ctx   = s;
+        s->transform_args[i].field = field;
+        s->transform_args[i].plane = &s->plane[i];
+        s->transform_args[i].idata = frame->data[i];
+        s->transform_args[i].istride = frame->linesize[i];
+    }
+
+    /* Do a DWT transform */
+    s->avctx->execute(s->avctx, dwt_plane, s->transform_args, NULL, 3,
+                      sizeof(TransformArgs));
+
+    /* Calculate per-slice quantizers and sizes */
+    calc_slice_sizes(s);
+
+    /* Init planes and encode slices */
+    encode_slices(s);
+
+    /* End sequence */
+    encode_parse_info(s, DIRAC_PCODE_END_SEQ);
+}
+
+static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                                      const AVFrame *frame, int *got_packet_ptr)
+{
+    int ret;
+    int max_frame_bytes, sig_size = 256;
+    VC2EncContext *s = avctx->priv_data;
+    const char aux_data[] = "FFmpeg version "FFMPEG_VERSION;
+    const int aux_data_size = sizeof(aux_data);
+    const int header_size = 100 + aux_data_size;
+    int64_t r_bitrate = avctx->bit_rate >> (s->interlaced);
+
+    s->avctx = avctx;
+    s->size_scaler = 1;
+    s->prefix_bytes = 0;
+    s->last_parse_code = 0;
+    s->next_parse_offset = 0;
+
+    /* Rate control */
+    max_frame_bytes = (av_rescale(r_bitrate, s->avctx->time_base.num,
+                                  s->avctx->time_base.den) >> 3) - header_size;
+
+    /* Find an appropriate size scaler */
+    while (sig_size > 255) {
+        s->slice_max_bytes = FFALIGN(av_rescale(max_frame_bytes, 1,
+                                     s->num_x*s->num_y), s->size_scaler);
+        s->slice_max_bytes += 4 + s->prefix_bytes;
+        sig_size = s->slice_max_bytes/s->size_scaler; /* Signalled slize size */
+        s->size_scaler <<= 1;
+    }
+
+    ret = ff_alloc_packet2(avctx, avpkt, max_frame_bytes*2, 0);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+        return ret;
+    } else {
+        init_put_bits(&s->pb, avpkt->data, avpkt->size);
+    }
+
+    encode_frame(s, frame, aux_data, s->interlaced);
+    if (s->interlaced)
+        encode_frame(s, frame, NULL, 2);
+
+    flush_put_bits(&s->pb);
+    avpkt->size = put_bits_count(&s->pb) >> 3;
+
+    *got_packet_ptr = 1;
+
+    return 0;
+}
+
+static av_cold int vc2_encode_end(AVCodecContext *avctx)
+{
+    int i;
+    VC2EncContext *s = avctx->priv_data;
+
+    for (i = 0; i < 3; i++) {
+        ff_vc2enc_free_transforms(&s->transform_args[i].t);
+        av_freep(&s->plane[i].coef_buf);
+    }
+
+    av_freep(&s->slice_args);
+    av_freep(&s->coef_lut_len);
+    av_freep(&s->coef_lut_val);
+
+    return 0;
+}
+
+
+static av_cold int vc2_encode_init(AVCodecContext *avctx)
+{
+    Plane *p;
+    SubBand *b;
+    int i, j, level, o, shift;
+    VC2EncContext *s = avctx->priv_data;
+
+    s->picture_number = 0;
+
+    /* Total allowed quantization range */
+    s->q_ceil    = MAX_QUANT_INDEX;
+
+    s->ver.major = 2;
+    s->ver.minor = 0;
+    s->profile   = 3;
+    s->level     = 3;
+
+    s->base_vf   = -1;
+    s->strict_compliance = 1;
+
+    /* Mark unknown as progressive */
+    s->interlaced = !((avctx->field_order == AV_FIELD_UNKNOWN) ||
+                      (avctx->field_order == AV_FIELD_PROGRESSIVE));
+
+    if (avctx->pix_fmt == AV_PIX_FMT_YUV422P10) {
+        if (avctx->width == 1280 && avctx->height == 720) {
+            s->level = 3;
+            if (avctx->time_base.num == 1001 && avctx->time_base.den == 60000)
+                s->base_vf = 9;
+            if (avctx->time_base.num == 1 && avctx->time_base.den == 50)
+                s->base_vf = 10;
+        } else if (avctx->width == 1920 && avctx->height == 1080) {
+            s->level = 3;
+            if (s->interlaced) {
+                if (avctx->time_base.num == 1001 && avctx->time_base.den == 30000)
+                    s->base_vf = 11;
+                if (avctx->time_base.num == 1 && avctx->time_base.den == 50)
+                    s->base_vf = 12;
+            } else {
+                if (avctx->time_base.num == 1001 && avctx->time_base.den == 60000)
+                    s->base_vf = 13;
+                if (avctx->time_base.num == 1 && avctx->time_base.den == 50)
+                    s->base_vf = 14;
+                if (avctx->time_base.num == 1001 && avctx->time_base.den == 24000)
+                    s->base_vf = 21;
+            }
+        } else if (avctx->width == 3840 && avctx->height == 2160) {
+            s->level = 6;
+            if (avctx->time_base.num == 1001 && avctx->time_base.den == 60000)
+                s->base_vf = 17;
+            if (avctx->time_base.num == 1 && avctx->time_base.den == 50)
+                s->base_vf = 18;
+        }
+    }
+
+    if (s->interlaced && s->base_vf <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "Interlacing not supported with non standard formats!\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    if (s->interlaced)
+        av_log(avctx, AV_LOG_WARNING, "Interlacing enabled!\n");
+
+    if ((s->slice_width  & (s->slice_width  - 1)) ||
+        (s->slice_height & (s->slice_height - 1))) {
+        av_log(avctx, AV_LOG_ERROR, "Slice size is not a power of two!\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    if ((s->slice_width > avctx->width) ||
+        (s->slice_height > avctx->height)) {
+        av_log(avctx, AV_LOG_ERROR, "Slice size is bigger than the image!\n");
+        return AVERROR_UNKNOWN;
+    }
+
+    if (s->base_vf <= 0) {
+        if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL) {
+            s->strict_compliance = s->base_vf = 0;
+            av_log(avctx, AV_LOG_WARNING, "Disabling strict compliance\n");
+        } else {
+            av_log(avctx, AV_LOG_WARNING, "Given format does not strictly comply with "
+                   "the specifications, please add a -strict -1 flag to use it\n");
+            return AVERROR_UNKNOWN;
+        }
+    } else {
+        av_log(avctx, AV_LOG_INFO, "Selected base video format = %i\n", s->base_vf);
+    }
+
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
+
+    /* Planes initialization */
+    for (i = 0; i < 3; i++) {
+        int w, h;
+        p = &s->plane[i];
+        p->width      = avctx->width  >> (i ? s->chroma_x_shift : 0);
+        p->height     = avctx->height >> (i ? s->chroma_y_shift : 0);
+        if (s->interlaced)
+            p->height >>= 1;
+        p->dwt_width  = w = FFALIGN(p->width,  (1 << s->wavelet_depth));
+        p->dwt_height = h = FFALIGN(p->height, (1 << s->wavelet_depth));
+        p->coef_stride = FFALIGN(p->dwt_width, 32);
+        p->coef_buf = av_malloc(p->coef_stride*p->dwt_height*sizeof(dwtcoef));
+        if (!p->coef_buf)
+            goto alloc_fail;
+        for (level = s->wavelet_depth-1; level >= 0; level--) {
+            w = w >> 1;
+            h = h >> 1;
+            for (o = 0; o < 4; o++) {
+                b = &p->band[level][o];
+                b->width  = w;
+                b->height = h;
+                b->stride = p->coef_stride;
+                shift = (o > 1)*b->height*b->stride + (o & 1)*b->width;
+                b->buf = p->coef_buf + shift;
+            }
+        }
+
+        /* DWT init */
+        if (ff_vc2enc_init_transforms(&s->transform_args[i].t,
+                                        s->plane[0].coef_stride,
+                                        s->plane[0].dwt_height))
+            goto alloc_fail;
+    }
+
+    /* Slices */
+    s->num_x = s->plane[0].dwt_width/s->slice_width;
+    s->num_y = s->plane[0].dwt_height/s->slice_height;
+
+    s->slice_args = av_malloc(s->num_x*s->num_y*sizeof(SliceArgs));
+    if (!s->slice_args)
+        goto alloc_fail;
+
+    /* Lookup tables */
+    s->coef_lut_len = av_malloc(2*COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_len));
+    if (!s->coef_lut_len)
+        goto alloc_fail;
+
+    s->coef_lut_val = av_malloc(2*COEF_LUT_TAB*s->q_ceil*sizeof(*s->coef_lut_val));
+    if (!s->coef_lut_val)
+        goto alloc_fail;
+
+    for (i = 0; i < s->q_ceil; i++) {
+        for (j = -COEF_LUT_TAB; j < COEF_LUT_TAB; j++) {
+            uint8_t  *len_lut = &s->coef_lut_len[2*i*COEF_LUT_TAB + COEF_LUT_TAB];
+            uint32_t *val_lut = &s->coef_lut_val[2*i*COEF_LUT_TAB + COEF_LUT_TAB];
+            coeff_quantize_get(j, ff_dirac_qscale_tab[i], &len_lut[j], &val_lut[j]);
+        }
+    }
+
+    return 0;
+
+alloc_fail:
+    vc2_encode_end(avctx);
+    av_log(avctx, AV_LOG_ERROR, "Unable to allocate memory!\n");
+    return AVERROR(ENOMEM);
+}
+
+#define VC2ENC_FLAGS (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption vc2enc_options[] = {
+    {"tolerance",     "Max undershoot in percent", offsetof(VC2EncContext, tolerance), AV_OPT_TYPE_DOUBLE, {.dbl = 10.0f}, 0.0f, 45.0f, VC2ENC_FLAGS, "tolerance"},
+    {"slice_width",   "Slice width",  offsetof(VC2EncContext, slice_width), AV_OPT_TYPE_INT, {.i64 = 128}, 32, 1024, VC2ENC_FLAGS, "slice_width"},
+    {"slice_height",  "Slice height", offsetof(VC2EncContext, slice_height), AV_OPT_TYPE_INT, {.i64 = 64}, 8, 1024, VC2ENC_FLAGS, "slice_height"},
+    {"wavelet_depth", "Transform depth", offsetof(VC2EncContext, wavelet_depth), AV_OPT_TYPE_INT, {.i64 = 5}, 1, 5, VC2ENC_FLAGS, "wavelet_depth"},
+    {"wavelet_type",  "Transform type",  offsetof(VC2EncContext, wavelet_idx), AV_OPT_TYPE_INT, {.i64 = VC2_TRANSFORM_9_7}, 0, VC2_TRANSFORMS_NB, VC2ENC_FLAGS, "wavelet_idx"},
+        {"9_7",       "Deslauriers-Dubuc (9,7)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_9_7}, INT_MIN, INT_MAX, VC2ENC_FLAGS, "wavelet_idx"},
+        {"5_3",       "LeGall (5,3)",            0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_5_3}, INT_MIN, INT_MAX, VC2ENC_FLAGS, "wavelet_idx"},
+    {"qm", "Custom quantization matrix", offsetof(VC2EncContext, quant_matrix), AV_OPT_TYPE_INT, {.i64 = VC2_QM_DEF}, 0, VC2_QM_NB, VC2ENC_FLAGS, "quant_matrix"},
+        {"default",   "Default from the specifications", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_DEF}, INT_MIN, INT_MAX, VC2ENC_FLAGS, "quant_matrix"},
+        {"color",     "Prevents low bitrate discoloration", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_COL}, INT_MIN, INT_MAX, VC2ENC_FLAGS, "quant_matrix"},
+        {"flat",      "Optimize for PSNR", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_FLAT}, INT_MIN, INT_MAX, VC2ENC_FLAGS, "quant_matrix"},
+    {NULL}
+};
+
+static const AVClass vc2enc_class = {
+    .class_name = "SMPTE VC-2 encoder",
+    .category = AV_CLASS_CATEGORY_ENCODER,
+    .option = vc2enc_options,
+    .item_name = av_default_item_name,
+    .version = LIBAVUTIL_VERSION_INT
+};
+
+static const AVCodecDefault vc2enc_defaults[] = {
+    { "b",              "600000000"   },
+    { NULL },
+};
+
+static const enum AVPixelFormat allowed_pix_fmts[] = {
+    AV_PIX_FMT_YUV420P,   AV_PIX_FMT_YUV422P,   AV_PIX_FMT_YUV444P,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+    AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12,
+    AV_PIX_FMT_NONE
+};
+
+AVCodec ff_vc2_encoder = {
+    .name = "vc2",
+    .long_name = NULL_IF_CONFIG_SMALL("SMPTE VC-2"),
+    .type = AVMEDIA_TYPE_VIDEO,
+    .id = AV_CODEC_ID_DIRAC,
+    .priv_data_size = sizeof(VC2EncContext),
+    .init = vc2_encode_init,
+    .close = vc2_encode_end,
+    .capabilities = AV_CODEC_CAP_SLICE_THREADS,
+    .encode2 = vc2_encode_frame,
+    .priv_class = &vc2enc_class,
+    .defaults = vc2enc_defaults,
+    .pix_fmts = allowed_pix_fmts
+};
diff --git a/libavcodec/vc2enc_dwt.c b/libavcodec/vc2enc_dwt.c
new file mode 100644
index 00000000..eb341684
--- /dev/null
+++ b/libavcodec/vc2enc_dwt.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
+ * Copyright (C) 2016 Open Broadcast Systems Ltd.
+ * Author        2016 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/mem.h"
+#include "vc2enc_dwt.h"
+
+/* Since the transforms spit out interleaved coefficients, this function
+ * rearranges the coefficients into the more traditional subdivision,
+ * making it easier to encode and perform another level. */
+static av_always_inline void deinterleave(dwtcoef *linell, ptrdiff_t stride,
+                                          int width, int height, dwtcoef *synthl)
+{
+    int x, y;
+    ptrdiff_t synthw = width << 1;
+    dwtcoef *linehl = linell + width;
+    dwtcoef *linelh = linell + height*stride;
+    dwtcoef *linehh = linelh + width;
+
+    /* Deinterleave the coefficients. */
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++) {
+            linell[x] = synthl[(x << 1)];
+            linehl[x] = synthl[(x << 1) + 1];
+            linelh[x] = synthl[(x << 1) + synthw];
+            linehh[x] = synthl[(x << 1) + synthw + 1];
+        }
+        synthl += synthw << 1;
+        linell += stride;
+        linelh += stride;
+        linehl += stride;
+        linehh += stride;
+    }
+}
+
+static void vc2_subband_dwt_97(VC2TransformContext *t, dwtcoef *data,
+                               ptrdiff_t stride, int width, int height)
+{
+    int x, y;
+    dwtcoef *datal = data, *synth = t->buffer, *synthl = synth;
+    const ptrdiff_t synth_width  = width  << 1;
+    const ptrdiff_t synth_height = height << 1;
+
+    /*
+     * Shift in one bit that is used for additional precision and copy
+     * the data to the buffer.
+     */
+    for (y = 0; y < synth_height; y++) {
+        for (x = 0; x < synth_width; x++)
+            synthl[x] = datal[x] << 1;
+        synthl += synth_width;
+        datal += stride;
+    }
+
+    /* Horizontal synthesis. */
+    synthl = synth;
+    for (y = 0; y < synth_height; y++) {
+        /* Lifting stage 2. */
+        synthl[1] -= (8*synthl[0] + 9*synthl[2] - synthl[4] + 8) >> 4;
+        for (x = 1; x < width - 2; x++)
+            synthl[2*x + 1] -= (9*synthl[2*x] + 9*synthl[2*x + 2] - synthl[2*x + 4] -
+                                synthl[2 * x - 2] + 8) >> 4;
+        synthl[synth_width - 1] -= (17*synthl[synth_width - 2] -
+                                    synthl[synth_width - 4] + 8) >> 4;
+        synthl[synth_width - 3] -= (8*synthl[synth_width - 2] +
+                                    9*synthl[synth_width - 4] -
+                                    synthl[synth_width - 6] + 8) >> 4;
+        /* Lifting stage 1. */
+        synthl[0] += (synthl[1] + synthl[1] + 2) >> 2;
+        for (x = 1; x < width - 1; x++)
+            synthl[2*x] += (synthl[2*x - 1] + synthl[2*x + 1] + 2) >> 2;
+
+        synthl[synth_width - 2] += (synthl[synth_width - 3] +
+                                    synthl[synth_width - 1] + 2) >> 2;
+        synthl += synth_width;
+    }
+
+    /* Vertical synthesis: Lifting stage 2. */
+    synthl = synth + synth_width;
+    for (x = 0; x < synth_width; x++)
+        synthl[x] -= (8*synthl[x - synth_width] + 9*synthl[x + synth_width] -
+                      synthl[x + 3 * synth_width] + 8) >> 4;
+
+    synthl = synth + (synth_width << 1);
+    for (y = 1; y < height - 2; y++) {
+        for (x = 0; x < synth_width; x++)
+            synthl[x + synth_width] -= (9*synthl[x] +
+                                        9*synthl[x + 2 * synth_width] -
+                                        synthl[x - 2 * synth_width] -
+                                        synthl[x + 4 * synth_width] + 8) >> 4;
+        synthl += synth_width << 1;
+    }
+
+    synthl = synth + (synth_height - 1) * synth_width;
+    for (x = 0; x < synth_width; x++) {
+        synthl[x] -= (17*synthl[x - synth_width] -
+                      synthl[x - 3*synth_width] + 8) >> 4;
+                      synthl[x - 2*synth_width] -= (9*synthl[x - 3*synth_width] +
+                      8*synthl[x - 1*synth_width] - synthl[x - 5*synth_width] + 8) >> 4;
+    }
+
+    /* Vertical synthesis: Lifting stage 1. */
+    synthl = synth;
+    for (x = 0; x < synth_width; x++)
+        synthl[x] += (synthl[x + synth_width] + synthl[x + synth_width] + 2) >> 2;
+
+    synthl = synth + (synth_width << 1);
+    for (y = 1; y < height - 1; y++) {
+        for (x = 0; x < synth_width; x++)
+            synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2;
+        synthl += synth_width << 1;
+    }
+
+    synthl = synth + (synth_height - 2) * synth_width;
+    for (x = 0; x < synth_width; x++)
+        synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2;
+
+    deinterleave(data, stride, width, height, synth);
+}
+
+static void vc2_subband_dwt_53(VC2TransformContext *t, dwtcoef *data,
+                               ptrdiff_t stride, int width, int height)
+{
+    int x, y;
+    dwtcoef *synth = t->buffer, *synthl = synth, *datal = data;
+    const ptrdiff_t synth_width  = width  << 1;
+    const ptrdiff_t synth_height = height << 1;
+
+    /*
+     * Shift in one bit that is used for additional precision and copy
+     * the data to the buffer.
+     */
+    for (y = 0; y < synth_height; y++) {
+        for (x = 0; x < synth_width; x++)
+            synthl[x] = datal[x] << 1;
+        synthl += synth_width;
+        datal  += stride;
+    }
+
+    /* Horizontal synthesis. */
+    synthl = synth;
+    for (y = 0; y < synth_height; y++) {
+        /* Lifting stage 2. */
+        for (x = 0; x < width - 1; x++)
+            synthl[2 * x + 1] -= (synthl[2 * x] + synthl[2 * x + 2] + 1) >> 1;
+
+        synthl[synth_width - 1] -= (2*synthl[synth_width - 2] + 1) >> 1;
+
+        /* Lifting stage 1. */
+        synthl[0] += (2*synthl[1] + 2) >> 2;
+        for (x = 1; x < width - 1; x++)
+            synthl[2 * x] += (synthl[2 * x - 1] + synthl[2 * x + 1] + 2) >> 2;
+
+        synthl[synth_width - 2] += (synthl[synth_width - 3] + synthl[synth_width - 1] + 2) >> 2;
+
+        synthl += synth_width;
+    }
+
+    /* Vertical synthesis: Lifting stage 2. */
+    synthl = synth + synth_width;
+    for (x = 0; x < synth_width; x++)
+        synthl[x] -= (synthl[x - synth_width] + synthl[x + synth_width] + 1) >> 1;
+
+    synthl = synth + (synth_width << 1);
+    for (y = 1; y < height - 1; y++) {
+        for (x = 0; x < synth_width; x++)
+            synthl[x + synth_width] -= (synthl[x] + synthl[x + synth_width * 2] + 1) >> 1;
+        synthl += (synth_width << 1);
+    }
+
+    synthl = synth + (synth_height - 1) * synth_width;
+    for (x = 0; x < synth_width; x++)
+        synthl[x] -= (2*synthl[x - synth_width] + 1) >> 1;
+
+    /* Vertical synthesis: Lifting stage 1. */
+    synthl = synth;
+    for (x = 0; x < synth_width; x++)
+        synthl[x] += (2*synthl[synth_width + x] + 2) >> 2;
+
+    synthl = synth + (synth_width << 1);
+    for (y = 1; y < height - 1; y++) {
+        for (x = 0; x < synth_width; x++)
+            synthl[x] += (synthl[x + synth_width] + synthl[x - synth_width] + 2) >> 2;
+        synthl += (synth_width << 1);
+    }
+
+    synthl = synth + (synth_height - 2)*synth_width;
+    for (x = 0; x < synth_width; x++)
+        synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2;
+
+
+    deinterleave(data, stride, width, height, synth);
+}
+
+av_cold int ff_vc2enc_init_transforms(VC2TransformContext *s, int p_width, int p_height)
+{
+    s->vc2_subband_dwt[VC2_TRANSFORM_9_7]    = vc2_subband_dwt_97;
+    s->vc2_subband_dwt[VC2_TRANSFORM_5_3]    = vc2_subband_dwt_53;
+
+    s->buffer = av_malloc(2*p_width*p_height*sizeof(dwtcoef));
+    if (!s->buffer)
+        return 1;
+
+    return 0;
+}
+
+av_cold void ff_vc2enc_free_transforms(VC2TransformContext *s)
+{
+    av_freep(&s->buffer);
+}
diff --git a/libavcodec/vc2enc_dwt.h b/libavcodec/vc2enc_dwt.h
new file mode 100644
index 00000000..8e1b6149
--- /dev/null
+++ b/libavcodec/vc2enc_dwt.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2016 Open Broadcast Systems Ltd.
+ * Author        2016 Rostislav Pehlivanov <atomnuker@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VC2ENC_DWT_H
+#define AVCODEC_VC2ENC_DWT_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef int16_t dwtcoef;
+typedef int32_t qcoef;   /* Quantization needs more precision */
+
+/* Only Deslauriers-Dubuc (9,7) and LeGall (5,3) supported! */
+
+enum VC2TransformType {
+    VC2_TRANSFORM_9_7    = 0,   /* Deslauriers-Dubuc (9,7)  */
+    VC2_TRANSFORM_5_3    = 1,   /* LeGall (5,3)             */
+    VC2_TRANSFORM_13_7   = 2,   /* Deslauriers-Dubuc (13,7) */
+    VC2_TRANSFORM_HAAR   = 3,   /* Haar without shift       */
+    VC2_TRANSFORM_HAAR_S = 4,   /* Haar with 1 shift/lvl    */
+    VC2_TRANSFORM_FIDEL  = 5,   /* Fidelity filter          */
+    VC2_TRANSFORM_9_7_I  = 6,   /* Daubechies (9,7)         */
+
+    VC2_TRANSFORMS_NB
+};
+
+typedef struct VC2TransformContext {
+    dwtcoef *buffer;
+    void (*vc2_subband_dwt[VC2_TRANSFORMS_NB])(struct VC2TransformContext *t,
+                                               dwtcoef *data, ptrdiff_t stride,
+                                               int width, int height);
+} VC2TransformContext;
+
+int  ff_vc2enc_init_transforms(VC2TransformContext *t, int p_width, int p_height);
+void ff_vc2enc_free_transforms(VC2TransformContext *t);
+
+#endif /* AVCODEC_VC2ENC_DWT_H */
diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c
index f8281ea0..28a5eec7 100644
--- a/libavcodec/vcr1.c
+++ b/libavcodec/vcr1.c
@@ -129,5 +129,5 @@ AVCodec ff_vcr1_decoder = {
     .priv_data_size = sizeof(VCR1Context),
     .init           = vcr1_decode_init,
     .decode         = vcr1_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/vda.c b/libavcodec/vda.c
index 5867cae1..4670140c 100644
--- a/libavcodec/vda.c
+++ b/libavcodec/vda.c
@@ -21,7 +21,7 @@
 #include "libavutil/mem.h"
 
 #include "vda.h"
-#include "vda_internal.h"
+#include "vda_vt_internal.h"
 
 #if CONFIG_H264_VDA_HWACCEL
 AVVDAContext *av_vda_alloc_context(void)
diff --git a/libavcodec/vda_h264.c b/libavcodec/vda_h264.c
index 4d2274d6..8c526c07 100644
--- a/libavcodec/vda_h264.c
+++ b/libavcodec/vda_h264.c
@@ -32,20 +32,7 @@ struct vda_buffer {
     CVPixelBufferRef cv_buffer;
 };
 #include "internal.h"
-#include "vda_internal.h"
-
-typedef struct VDAContext {
-    // The current bitstream buffer.
-    uint8_t             *bitstream;
-
-    // The current size of the bitstream.
-    int                  bitstream_size;
-
-    // The reference size used for fast reallocation.
-    int                  allocated_size;
-
-    CVImageBufferRef frame;
-} VDAContext;
+#include "vda_vt_internal.h"
 
 /* Decoder callback that adds the vda frame to the queue in display order. */
 static void vda_decoder_callback(void *vda_hw_ctx,
@@ -68,7 +55,7 @@ static void vda_decoder_callback(void *vda_hw_ctx,
     vda_ctx->cv_buffer = CVPixelBufferRetain(image_buffer);
 }
 
-static int vda_sync_decode(VDAContext *ctx, struct vda_context *vda_ctx)
+static int vda_sync_decode(VTContext *ctx, struct vda_context *vda_ctx)
 {
     OSStatus status;
     CFDataRef coded_frame;
@@ -93,7 +80,7 @@ static int vda_old_h264_start_frame(AVCodecContext *avctx,
                                 av_unused const uint8_t *buffer,
                                 av_unused uint32_t size)
 {
-    VDAContext *vda = avctx->internal->hwaccel_priv_data;
+    VTContext *vda = avctx->internal->hwaccel_priv_data;
     struct vda_context *vda_ctx = avctx->hwaccel_context;
 
     if (!vda_ctx->decoder)
@@ -108,7 +95,7 @@ static int vda_old_h264_decode_slice(AVCodecContext *avctx,
                                  const uint8_t *buffer,
                                  uint32_t size)
 {
-    VDAContext *vda             = avctx->internal->hwaccel_priv_data;
+    VTContext *vda              = avctx->internal->hwaccel_priv_data;
     struct vda_context *vda_ctx = avctx->hwaccel_context;
     void *tmp;
 
@@ -141,7 +128,7 @@ static void vda_h264_release_buffer(void *opaque, uint8_t *data)
 static int vda_old_h264_end_frame(AVCodecContext *avctx)
 {
     H264Context *h                      = avctx->priv_data;
-    VDAContext *vda                     = avctx->internal->hwaccel_priv_data;
+    VTContext *vda                      = avctx->internal->hwaccel_priv_data;
     struct vda_context *vda_ctx         = avctx->hwaccel_context;
     AVFrame *frame                      = h->cur_pic_ptr->f;
     struct vda_buffer *context;
@@ -271,17 +258,6 @@ int ff_vda_destroy_decoder(struct vda_context *vda_ctx)
     return status;
 }
 
-static int vda_h264_uninit(AVCodecContext *avctx)
-{
-    VDAContext *vda = avctx->internal->hwaccel_priv_data;
-    if (vda) {
-        av_freep(&vda->bitstream);
-        if (vda->frame)
-            CVPixelBufferRelease(vda->frame);
-    }
-    return 0;
-}
-
 AVHWAccel ff_h264_vda_old_hwaccel = {
     .name           = "h264_vda",
     .type           = AVMEDIA_TYPE_VIDEO,
@@ -290,8 +266,8 @@ AVHWAccel ff_h264_vda_old_hwaccel = {
     .start_frame    = vda_old_h264_start_frame,
     .decode_slice   = vda_old_h264_decode_slice,
     .end_frame      = vda_old_h264_end_frame,
-    .uninit         = vda_h264_uninit,
-    .priv_data_size = sizeof(VDAContext),
+    .uninit         = ff_videotoolbox_uninit,
+    .priv_data_size = sizeof(VTContext),
 };
 
 void ff_vda_output_callback(void *opaque,
@@ -301,7 +277,7 @@ void ff_vda_output_callback(void *opaque,
                             CVImageBufferRef image_buffer)
 {
     AVCodecContext *ctx = opaque;
-    VDAContext *vda = ctx->internal->hwaccel_priv_data;
+    VTContext *vda = ctx->internal->hwaccel_priv_data;
 
 
     if (vda->frame) {
@@ -315,65 +291,10 @@ void ff_vda_output_callback(void *opaque,
     vda->frame = CVPixelBufferRetain(image_buffer);
 }
 
-static int vda_h264_start_frame(AVCodecContext *avctx,
-                                const uint8_t *buffer,
-                                uint32_t size)
-{
-    VDAContext *vda = avctx->internal->hwaccel_priv_data;
-    H264Context *h  = avctx->priv_data;
-
-    if (h->is_avc == 1) {
-        void *tmp;
-        vda->bitstream_size = 0;
-        tmp = av_fast_realloc(vda->bitstream,
-                              &vda->allocated_size,
-                              size);
-        vda->bitstream = tmp;
-        memcpy(vda->bitstream, buffer, size);
-        vda->bitstream_size = size;
-    } else {
-        vda->bitstream_size = 0;
-    }
-    return 0;
-}
-
-static int vda_h264_decode_slice(AVCodecContext *avctx,
-                                 const uint8_t *buffer,
-                                 uint32_t size)
-{
-    VDAContext *vda       = avctx->internal->hwaccel_priv_data;
-    H264Context *h  = avctx->priv_data;
-    void *tmp;
-
-    if (h->is_avc == 1)
-        return 0;
-
-    tmp = av_fast_realloc(vda->bitstream,
-                          &vda->allocated_size,
-                          vda->bitstream_size + size + 4);
-    if (!tmp)
-        return AVERROR(ENOMEM);
-
-    vda->bitstream = tmp;
-
-    AV_WB32(vda->bitstream + vda->bitstream_size, size);
-    memcpy(vda->bitstream + vda->bitstream_size + 4, buffer, size);
-
-    vda->bitstream_size += size + 4;
-
-    return 0;
-}
-
-static void release_buffer(void *opaque, uint8_t *data)
-{
-    CVImageBufferRef frame = (CVImageBufferRef)data;
-    CVPixelBufferRelease(frame);
-}
-
 static int vda_h264_end_frame(AVCodecContext *avctx)
 {
     H264Context *h        = avctx->priv_data;
-    VDAContext *vda       = avctx->internal->hwaccel_priv_data;
+    VTContext *vda        = avctx->internal->hwaccel_priv_data;
     AVVDAContext *vda_ctx = avctx->hwaccel_context;
     AVFrame *frame        = h->cur_pic_ptr->f;
     uint32_t flush_flags  = 1 << 0; ///< kVDADecoderFlush_emitFrames
@@ -403,19 +324,7 @@ static int vda_h264_end_frame(AVCodecContext *avctx)
         return AVERROR_UNKNOWN;
     }
 
-    av_buffer_unref(&frame->buf[0]);
-
-    frame->buf[0] = av_buffer_create((uint8_t*)vda->frame,
-                                     sizeof(vda->frame),
-                                     release_buffer, NULL,
-                                     AV_BUFFER_FLAG_READONLY);
-    if (!frame->buf)
-        return AVERROR(ENOMEM);
-
-    frame->data[3] = (uint8_t*)vda->frame;
-    vda->frame = NULL;
-
-    return 0;
+    return ff_videotoolbox_buffer_create(vda, frame);
 }
 
 int ff_vda_default_init(AVCodecContext *avctx)
@@ -434,26 +343,7 @@ int ff_vda_default_init(AVCodecContext *avctx)
 
     // kCVPixelFormatType_420YpCbCr8Planar;
 
-    /* Each VCL NAL in the bitstream sent to the decoder
-     * is preceded by a 4 bytes length header.
-     * Change the avcC atom header if needed, to signal headers of 4 bytes. */
-    if (avctx->extradata_size >= 4 && (avctx->extradata[4] & 0x03) != 0x03) {
-        uint8_t *rw_extradata;
-
-        if (!(rw_extradata = av_malloc(avctx->extradata_size)))
-            return AVERROR(ENOMEM);
-
-        memcpy(rw_extradata, avctx->extradata, avctx->extradata_size);
-
-        rw_extradata[4] |= 0x03;
-
-        avc_data = CFDataCreate(kCFAllocatorDefault, rw_extradata, avctx->extradata_size);
-
-        av_freep(&rw_extradata);
-    } else {
-        avc_data = CFDataCreate(kCFAllocatorDefault,
-                                avctx->extradata, avctx->extradata_size);
-    }
+    avc_data = ff_videotoolbox_avcc_extradata_create(avctx);
 
     config_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
                                             4,
@@ -521,27 +411,15 @@ int ff_vda_default_init(AVCodecContext *avctx)
     }
 }
 
-static int vda_h264_alloc_frame(AVCodecContext *avctx, AVFrame *frame)
-{
-    frame->width  = avctx->width;
-    frame->height = avctx->height;
-    frame->format = avctx->pix_fmt;
-    frame->buf[0] = av_buffer_alloc(1);
-
-    if (!frame->buf[0])
-        return AVERROR(ENOMEM);
-    return 0;
-}
-
 AVHWAccel ff_h264_vda_hwaccel = {
     .name           = "h264_vda",
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_H264,
     .pix_fmt        = AV_PIX_FMT_VDA,
-    .alloc_frame    = vda_h264_alloc_frame,
-    .start_frame    = vda_h264_start_frame,
-    .decode_slice   = vda_h264_decode_slice,
+    .alloc_frame    = ff_videotoolbox_alloc_frame,
+    .start_frame    = ff_videotoolbox_h264_start_frame,
+    .decode_slice   = ff_videotoolbox_h264_decode_slice,
     .end_frame      = vda_h264_end_frame,
-    .uninit         = vda_h264_uninit,
-    .priv_data_size = sizeof(VDAContext),
+    .uninit         = ff_videotoolbox_uninit,
+    .priv_data_size = sizeof(VTContext),
 };
diff --git a/libavcodec/vda_h264_dec.c b/libavcodec/vda_h264_dec.c
index c00e7e4e..a092693d 100644
--- a/libavcodec/vda_h264_dec.c
+++ b/libavcodec/vda_h264_dec.c
@@ -62,9 +62,6 @@ typedef struct {
     void *hwaccel_context;
     enum AVPixelFormat (*get_format)(struct AVCodecContext *s, const enum AVPixelFormat * fmt);
     int (*get_buffer2)(struct AVCodecContext *s, AVFrame *frame, int flags);
-#if FF_API_GET_BUFFER
-    int (*get_buffer)(struct AVCodecContext *c, AVFrame *pic);
-#endif
 } VDADecoderContext;
 
 static enum AVPixelFormat get_format(struct AVCodecContext *avctx,
@@ -108,10 +105,6 @@ static inline void set_context(AVCodecContext *avctx)
     avctx->get_format = get_format;
     ctx->get_buffer2 = avctx->get_buffer2;
     avctx->get_buffer2 = get_buffer2;
-#if FF_API_GET_BUFFER
-    ctx->get_buffer = avctx->get_buffer;
-    avctx->get_buffer = NULL;
-#endif
 }
 
 static inline void restore_context(AVCodecContext *avctx)
@@ -120,9 +113,6 @@ static inline void restore_context(AVCodecContext *avctx)
     avctx->hwaccel_context = ctx->hwaccel_context;
     avctx->get_format = ctx->get_format;
     avctx->get_buffer2 = ctx->get_buffer2;
-#if FF_API_GET_BUFFER
-    avctx->get_buffer = ctx->get_buffer;
-#endif
 }
 
 static int vdadec_decode(AVCodecContext *avctx,
@@ -267,7 +257,7 @@ AVCodec ff_h264_vda_decoder = {
     .init           = vdadec_init,
     .close          = vdadec_close,
     .decode         = vdadec_decode,
-    .capabilities   = CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_DELAY,
     .flush          = vdadec_flush,
     .long_name      = NULL_IF_CONFIG_SMALL("H.264 (VDA acceleration)"),
 };
diff --git a/libavcodec/vda_vt_internal.h b/libavcodec/vda_vt_internal.h
new file mode 100644
index 00000000..9ff63ccc
--- /dev/null
+++ b/libavcodec/vda_vt_internal.h
@@ -0,0 +1,55 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VDA_VT_INTERNAL_H
+#define AVCODEC_VDA_VT_INTERNAL_H
+
+void ff_vda_output_callback(void *vda_hw_ctx,
+                            CFDictionaryRef user_info,
+                            OSStatus status,
+                            uint32_t infoFlags,
+                            CVImageBufferRef image_buffer);
+
+int ff_vda_default_init(AVCodecContext *avctx);
+void ff_vda_default_free(AVCodecContext *avctx);
+
+typedef struct VTContext {
+    // The current bitstream buffer.
+    uint8_t                     *bitstream;
+
+    // The current size of the bitstream.
+    int                         bitstream_size;
+
+    // The reference size used for fast reallocation.
+    int                         allocated_size;
+
+    // The core video buffer
+    CVImageBufferRef            frame;
+} VTContext;
+
+int ff_videotoolbox_alloc_frame(AVCodecContext *avctx, AVFrame *frame);
+int ff_videotoolbox_uninit(AVCodecContext *avctx);
+int ff_videotoolbox_buffer_create(VTContext *vtctx, AVFrame *frame);
+int ff_videotoolbox_h264_start_frame(AVCodecContext *avctx,
+                                     const uint8_t *buffer,
+                                     uint32_t size);
+int ff_videotoolbox_h264_decode_slice(AVCodecContext *avctx,
+                                      const uint8_t *buffer,
+                                      uint32_t size);
+CFDataRef ff_videotoolbox_avcc_extradata_create(AVCodecContext *avctx);
+#endif /* AVCODEC_VDA_VT_INTERNAL_H */
diff --git a/libavcodec/vdpau.c b/libavcodec/vdpau.c
index 62d99601..a01c445c 100644
--- a/libavcodec/vdpau.c
+++ b/libavcodec/vdpau.c
@@ -187,8 +187,7 @@ int ff_vdpau_common_init(AVCodecContext *avctx, VdpDecoderProfile profile,
     status = decoder_query_caps(vdctx->device, profile, &supported, &max_level,
                                 &max_mb, &max_width, &max_height);
 #ifdef VDP_DECODER_PROFILE_H264_CONSTRAINED_BASELINE
-    if (status != VDP_STATUS_OK && profile == VDP_DECODER_PROFILE_H264_CONSTRAINED_BASELINE) {
-        /* Run-time backward compatibility for libvdpau 0.8 and earlier */
+    if ((status != VDP_STATUS_OK || supported != VDP_TRUE) && profile == VDP_DECODER_PROFILE_H264_CONSTRAINED_BASELINE) {
         profile = VDP_DECODER_PROFILE_H264_MAIN;
         status = decoder_query_caps(vdctx->device, profile, &supported,
                                     &max_level, &max_mb,
@@ -318,7 +317,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
     return vdpau_error(status);
 }
 
-#if CONFIG_H263_VDPAU_HWACCEL  || CONFIG_MPEG1_VDPAU_HWACCEL || \
+#if CONFIG_MPEG1_VDPAU_HWACCEL || \
     CONFIG_MPEG2_VDPAU_HWACCEL || CONFIG_MPEG4_VDPAU_HWACCEL || \
     CONFIG_VC1_VDPAU_HWACCEL   || CONFIG_WMV3_VDPAU_HWACCEL
 int ff_vdpau_mpeg_end_frame(AVCodecContext *avctx)
@@ -358,6 +357,25 @@ int ff_vdpau_add_buffer(struct vdpau_picture_context *pic_ctx,
 
 /* Obsolete non-hwaccel VDPAU support below... */
 
+#if FF_API_VDPAU
+void ff_vdpau_add_data_chunk(uint8_t *data, const uint8_t *buf, int buf_size)
+{
+    struct vdpau_render_state *render = (struct vdpau_render_state*)data;
+    assert(render);
+
+    render->bitstream_buffers= av_fast_realloc(
+        render->bitstream_buffers,
+        &render->bitstream_buffers_allocated,
+        sizeof(*render->bitstream_buffers)*(render->bitstream_buffers_used + 1)
+    );
+
+    render->bitstream_buffers[render->bitstream_buffers_used].struct_version  = VDP_BITSTREAM_BUFFER_VERSION;
+    render->bitstream_buffers[render->bitstream_buffers_used].bitstream       = buf;
+    render->bitstream_buffers[render->bitstream_buffers_used].bitstream_bytes = buf_size;
+    render->bitstream_buffers_used++;
+}
+
+#if CONFIG_H264_VDPAU_DECODER
 void ff_vdpau_h264_set_reference_frames(H264Context *h)
 {
     struct vdpau_render_state *render, *render_ref;
@@ -426,24 +444,6 @@ void ff_vdpau_h264_set_reference_frames(H264Context *h)
     }
 }
 
-void ff_vdpau_add_data_chunk(uint8_t *data, const uint8_t *buf, int buf_size)
-{
-    struct vdpau_render_state *render = (struct vdpau_render_state*)data;
-    assert(render);
-
-    render->bitstream_buffers= av_fast_realloc(
-        render->bitstream_buffers,
-        &render->bitstream_buffers_allocated,
-        sizeof(*render->bitstream_buffers)*(render->bitstream_buffers_used + 1)
-    );
-
-    render->bitstream_buffers[render->bitstream_buffers_used].struct_version  = VDP_BITSTREAM_BUFFER_VERSION;
-    render->bitstream_buffers[render->bitstream_buffers_used].bitstream       = buf;
-    render->bitstream_buffers[render->bitstream_buffers_used].bitstream_bytes = buf_size;
-    render->bitstream_buffers_used++;
-}
-
-#if CONFIG_H264_VDPAU_DECODER
 void ff_vdpau_h264_picture_start(H264Context *h)
 {
     struct vdpau_render_state *render;
@@ -692,6 +692,7 @@ void ff_vdpau_mpeg4_decode_picture(Mpeg4DecContext *ctx, const uint8_t *buf,
     render->bitstream_buffers_used = 0;
 }
 #endif /* CONFIG_MPEG4_VDPAU_DECODER */
+#endif /* FF_API_VDPAU */
 
 int av_vdpau_get_profile(AVCodecContext *avctx, VdpDecoderProfile *profile)
 {
diff --git a/libavcodec/vdpau.h b/libavcodec/vdpau.h
index a42ca013..e85e4d9e 100644
--- a/libavcodec/vdpau.h
+++ b/libavcodec/vdpau.h
@@ -196,10 +196,13 @@ int av_vdpau_get_surface_parameters(AVCodecContext *avctx, VdpChromaType *type,
  */
 AVVDPAUContext *av_vdpau_alloc_context(void);
 
+#if FF_API_VDPAU_PROFILE
 /**
  * Get a decoder profile that should be used for initializing a VDPAU decoder.
  * Should be called from the AVCodecContext.get_format() callback.
  *
+ * @deprecated Use av_vdpau_bind_context() instead.
+ *
  * @param avctx the codec context being used for decoding the stream
  * @param profile a pointer into which the result will be written on success.
  *                The contents of profile are undefined if this function returns
@@ -207,7 +210,9 @@ AVVDPAUContext *av_vdpau_alloc_context(void);
  *
  * @return 0 on success (non-negative), a negative AVERROR on failure.
  */
+attribute_deprecated
 int av_vdpau_get_profile(AVCodecContext *avctx, VdpDecoderProfile *profile);
+#endif
 
 #if FF_API_CAP_VDPAU
 /** @brief The videoSurface is used for rendering. */
@@ -231,10 +236,8 @@ struct vdpau_render_state {
 
     int state; ///< Holds FF_VDPAU_STATE_* values.
 
-#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
     /** picture parameter information for all supported codecs */
     union AVVDPAUPictureInfo info;
-#endif
 
     /** Describe size/location of the compressed video data.
         Set to 0 when freeing bitstream_buffers. */
@@ -242,11 +245,6 @@ struct vdpau_render_state {
     int bitstream_buffers_used;
     /** The user is responsible for freeing this buffer using av_freep(). */
     VdpBitstreamBuffer *bitstream_buffers;
-
-#if !AV_HAVE_INCOMPATIBLE_LIBAV_ABI
-    /** picture parameter information for all supported codecs */
-    union AVVDPAUPictureInfo info;
-#endif
 };
 #endif
 
diff --git a/libavcodec/vdpau_hevc.c b/libavcodec/vdpau_hevc.c
new file mode 100644
index 00000000..3c1dc5f2
--- /dev/null
+++ b/libavcodec/vdpau_hevc.c
@@ -0,0 +1,437 @@
+/*
+ * MPEG-H Part 2 / HEVC / H.265 HW decode acceleration through VDPAU
+ *
+ * Copyright (c) 2013 Philip Langdale
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <vdpau/vdpau.h>
+
+#include "avcodec.h"
+#include "internal.h"
+#include "hevc.h"
+#include "vdpau.h"
+#include "vdpau_internal.h"
+
+static int vdpau_hevc_start_frame(AVCodecContext *avctx,
+                                  const uint8_t *buffer, uint32_t size)
+{
+    HEVCContext *h = avctx->priv_data;
+    HEVCFrame *pic = h->ref;
+    struct vdpau_picture_context *pic_ctx = pic->hwaccel_picture_private;
+
+    VdpPictureInfoHEVC *info = &pic_ctx->info.hevc;
+
+    const HEVCSPS *sps = h->ps.sps;
+    const HEVCPPS *pps = h->ps.pps;
+    const SliceHeader *sh = &h->sh;
+    const ScalingList *sl = pps->scaling_list_data_present_flag ?
+                            &pps->scaling_list : &sps->scaling_list;
+
+    /* init VdpPictureInfoHEVC */
+
+    /* SPS */
+    info->chroma_format_idc = sps->chroma_format_idc;
+    info->separate_colour_plane_flag = sps->separate_colour_plane_flag;
+    info->pic_width_in_luma_samples = sps->width;
+    info->pic_height_in_luma_samples = sps->height;
+    info->bit_depth_luma_minus8 = sps->bit_depth - 8;
+    info->bit_depth_chroma_minus8 = sps->bit_depth - 8;
+    info->log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4;
+    /** Provides the value corresponding to the nuh_temporal_id of the frame
+        to be decoded. */
+    info->sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1;
+    info->log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3;
+    info->log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size;
+    info->log2_min_transform_block_size_minus2 = sps->log2_min_tb_size - 2;
+    info->log2_diff_max_min_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size;
+    info->max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
+    info->max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
+    info->scaling_list_enabled_flag = sps->scaling_list_enable_flag;
+    /** Scaling lists, in diagonal order, to be used for this frame. */
+    for (size_t i = 0; i < 6; i++) {
+        for (size_t j = 0; j < 16; j++) {
+            /** Scaling List for 4x4 quantization matrix,
+                indexed as ScalingList4x4[matrixId][i]. */
+            uint8_t pos = 4 * ff_hevc_diag_scan4x4_y[j] + ff_hevc_diag_scan4x4_x[j];
+            info->ScalingList4x4[i][j] = sl->sl[0][i][pos];
+        }
+        for (size_t j = 0; j < 64; j++) {
+            uint8_t pos = 8 * ff_hevc_diag_scan8x8_y[j] + ff_hevc_diag_scan8x8_x[j];
+            /** Scaling List for 8x8 quantization matrix,
+                indexed as ScalingList8x8[matrixId][i]. */
+            info->ScalingList8x8[i][j] = sl->sl[1][i][pos];
+            /** Scaling List for 16x16 quantization matrix,
+                indexed as ScalingList16x16[matrixId][i]. */
+            info->ScalingList16x16[i][j] = sl->sl[2][i][pos];
+            if (i < 2) {
+                /** Scaling List for 32x32 quantization matrix,
+                    indexed as ScalingList32x32[matrixId][i]. */
+                info->ScalingList32x32[i][j] = sl->sl[3][i * 3][pos];
+            }
+        }
+        /** Scaling List DC Coefficients for 16x16,
+            indexed as ScalingListDCCoeff16x16[matrixId]. */
+        info->ScalingListDCCoeff16x16[i] = sl->sl_dc[0][i];
+        if (i < 2) {
+            /** Scaling List DC Coefficients for 32x32,
+                indexed as ScalingListDCCoeff32x32[matrixId]. */
+            info->ScalingListDCCoeff32x32[i] = sl->sl_dc[1][i * 3];
+        }
+    }
+    info->amp_enabled_flag = sps->amp_enabled_flag;
+    info->sample_adaptive_offset_enabled_flag = sps->sao_enabled;
+    info->pcm_enabled_flag = sps->pcm_enabled_flag;
+    if (info->pcm_enabled_flag) {
+        /** Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */
+        info->pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1;
+        /** Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */
+        info->pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1;
+        /** Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */
+        info->log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3;
+        /** Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */
+        info->log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size;
+        /** Only needs to be set if pcm_enabled_flag is set. Ignored otherwise. */
+        info->pcm_loop_filter_disabled_flag = sps->pcm.loop_filter_disable_flag;
+    }
+    /** Per spec, when zero, assume short_term_ref_pic_set_sps_flag
+        is also zero. */
+    info->num_short_term_ref_pic_sets = sps->nb_st_rps;
+    info->long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag;
+    /** Only needed if long_term_ref_pics_present_flag is set. Ignored
+        otherwise. */
+    info->num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps;
+    info->sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag;
+    info->strong_intra_smoothing_enabled_flag = sps->sps_strong_intra_smoothing_enable_flag;
+    /** @} */
+
+    /** \name HEVC Picture Parameter Set
+     *
+     * Copies of the HEVC Picture Parameter Set bitstream fields.
+     * @{ */
+    info->dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag;
+    info->output_flag_present_flag = pps->output_flag_present_flag;
+    info->num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
+    info->sign_data_hiding_enabled_flag = pps->sign_data_hiding_flag;
+    info->cabac_init_present_flag = pps->cabac_init_present_flag;
+    info->num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active - 1;
+    info->num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active - 1;
+    info->init_qp_minus26 = pps->pic_init_qp_minus26;
+    info->constrained_intra_pred_flag = pps->constrained_intra_pred_flag;
+    info->transform_skip_enabled_flag = pps->transform_skip_enabled_flag;
+    info->cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag;
+    /** Only needed if cu_qp_delta_enabled_flag is set. Ignored otherwise. */
+    info->diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
+    info->pps_cb_qp_offset = pps->cb_qp_offset;
+    info->pps_cr_qp_offset = pps->cr_qp_offset;
+    info->pps_slice_chroma_qp_offsets_present_flag = pps->pic_slice_level_chroma_qp_offsets_present_flag;
+    info->weighted_pred_flag = pps->weighted_pred_flag;
+    info->weighted_bipred_flag = pps->weighted_bipred_flag;
+    info->transquant_bypass_enabled_flag = pps->transquant_bypass_enable_flag;
+    info->tiles_enabled_flag = pps->tiles_enabled_flag;
+    info->entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag;
+    if (info->tiles_enabled_flag) {
+        /** Only valid if tiles_enabled_flag is set. Ignored otherwise. */
+        info->num_tile_columns_minus1 = pps->num_tile_columns - 1;
+        /** Only valid if tiles_enabled_flag is set. Ignored otherwise. */
+        info->num_tile_rows_minus1 = pps->num_tile_rows - 1;
+        /** Only valid if tiles_enabled_flag is set. Ignored otherwise. */
+        info->uniform_spacing_flag = pps->uniform_spacing_flag;
+        /** Only need to set 0..num_tile_columns_minus1. The struct
+            definition reserves up to the maximum of 20. Invalid values are
+            ignored. */
+        for (ssize_t i = 0; i < pps->num_tile_columns; i++) {
+            info->column_width_minus1[i] = pps->column_width[i] - 1;
+        }
+        /** Only need to set 0..num_tile_rows_minus1. The struct
+          definition reserves up to the maximum of 22. Invalid values are
+          ignored.*/
+        for (ssize_t i = 0; i < pps->num_tile_rows; i++) {
+            info->row_height_minus1[i] = pps->row_height[i] - 1;
+        }
+        /** Only needed if tiles_enabled_flag is set. Invalid values are
+          ignored. */
+        info->loop_filter_across_tiles_enabled_flag = pps->loop_filter_across_tiles_enabled_flag;
+    }
+    info->pps_loop_filter_across_slices_enabled_flag = pps->seq_loop_filter_across_slices_enabled_flag;
+    info->deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag;
+    /** Only valid if deblocking_filter_control_present_flag is set. Ignored
+        otherwise. */
+    info->deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag;
+    /** Only valid if deblocking_filter_control_present_flag is set. Ignored
+        otherwise. */
+    info->pps_deblocking_filter_disabled_flag = pps->disable_dbf;
+    /** Only valid if deblocking_filter_control_present_flag is set and
+        pps_deblocking_filter_disabled_flag is not set. Ignored otherwise.*/
+    info->pps_beta_offset_div2 = pps->beta_offset / 2;
+    /** Only valid if deblocking_filter_control_present_flag is set and
+        pps_deblocking_filter_disabled_flag is not set. Ignored otherwise. */
+    info->pps_tc_offset_div2 = pps->tc_offset / 2;
+    info->lists_modification_present_flag = pps->lists_modification_present_flag;
+    info->log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2;
+    info->slice_segment_header_extension_present_flag = pps->slice_header_extension_present_flag;
+
+    /** \name HEVC Slice Segment Header
+     *
+     * Copies of the HEVC Slice Segment Header bitstream fields and calculated
+     * values detailed in the specification.
+     * @{ */
+    /** Set to 1 if nal_unit_type is equal to IDR_W_RADL or IDR_N_LP.
+        Set to zero otherwise. */
+    info->IDRPicFlag = IS_IDR(h);
+    /** Set to 1 if nal_unit_type in the range of BLA_W_LP to
+        RSV_IRAP_VCL23, inclusive. Set to zero otherwise.*/
+    info->RAPPicFlag = IS_IRAP(h);
+    /** See section 7.4.7.1 of the specification. */
+    info->CurrRpsIdx = sps->nb_st_rps;
+    if (sh->short_term_ref_pic_set_sps_flag == 1) {
+        for (size_t i = 0; i < sps->nb_st_rps; i++) {
+            if (sh->short_term_rps == &sps->st_rps[i]) {
+                info->CurrRpsIdx = i;
+                break;
+            }
+        }
+    }
+    /** See section 7.4.7.2 of the specification. */
+    info->NumPocTotalCurr = ff_hevc_frame_nb_refs(h);
+    if (sh->short_term_ref_pic_set_sps_flag == 0 && sh->short_term_rps) {
+        /** Corresponds to specification field, NumDeltaPocs[RefRpsIdx].
+            Only applicable when short_term_ref_pic_set_sps_flag == 0.
+            Implementations will ignore this value in other cases. See 7.4.8. */
+        info->NumDeltaPocsOfRefRpsIdx = sh->short_term_rps->rps_idx_num_delta_pocs;
+    }
+    /** Section 7.6.3.1 of the H.265/HEVC Specification defines the syntax of
+        the slice_segment_header. This header contains information that
+        some VDPAU implementations may choose to skip. The VDPAU API
+        requires client applications to track the number of bits used in the
+        slice header for structures associated with short term and long term
+        reference pictures. First, VDPAU requires the number of bits used by
+        the short_term_ref_pic_set array in the slice_segment_header. */
+    info->NumShortTermPictureSliceHeaderBits = sh->short_term_ref_pic_set_size;
+    /** Second, VDPAU requires the number of bits used for long term reference
+        pictures in the slice_segment_header. This is equal to the number
+        of bits used for the contents of the block beginning with
+        "if(long_term_ref_pics_present_flag)". */
+    info->NumLongTermPictureSliceHeaderBits = sh->long_term_ref_pic_set_size;
+    /** @} */
+
+    /** Slice Decoding Process - Picture Order Count */
+    /** The value of PicOrderCntVal of the picture in the access unit
+        containing the SEI message. The picture being decoded. */
+    info->CurrPicOrderCntVal = h->poc;
+
+    /** Slice Decoding Process - Reference Picture Sets */
+    for (size_t i = 0; i < 16; i++) {
+        info->RefPics[i] = VDP_INVALID_HANDLE;
+        info->PicOrderCntVal[i] = 0;
+        info->IsLongTerm[i] = 0;
+    }
+    for (size_t i = 0, j = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) {
+        const HEVCFrame *frame = &h->DPB[i];
+        if (frame != h->ref && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF |
+                                                HEVC_FRAME_FLAG_SHORT_REF))) {
+            if (j > 16) {
+                av_log(avctx, AV_LOG_WARNING,
+                     "VDPAU only supports up to 16 references in the DPB. "
+                     "This frame may not be decoded correctly.\n");
+                break;
+            }
+            /** Array of video reference surfaces.
+                Set any unused positions to VDP_INVALID_HANDLE. */
+            info->RefPics[j] = ff_vdpau_get_surface_id(frame->frame);
+            /** Array of picture order counts. These correspond to positions
+                in the RefPics array. */
+            info->PicOrderCntVal[j] = frame->poc;
+            /** Array used to specify whether a particular RefPic is
+                a long term reference. A value of "1" indicates a long-term
+                reference. */
+            // XXX: Setting this caused glitches in the nvidia implementation
+            // Always setting it to zero, produces correct results
+            //info->IsLongTerm[j] = frame->flags & HEVC_FRAME_FLAG_LONG_REF;
+            info->IsLongTerm[j] = 0;
+            j++;
+        }
+    }
+    /** Copy of specification field, see Section 8.3.2 of the
+        H.265/HEVC Specification. */
+    info->NumPocStCurrBefore = h->rps[ST_CURR_BEF].nb_refs;
+    if (info->NumPocStCurrBefore > 8) {
+        av_log(avctx, AV_LOG_WARNING,
+             "VDPAU only supports up to 8 references in StCurrBefore. "
+             "This frame may not be decoded correctly.\n");
+        info->NumPocStCurrBefore = 8;
+    }
+    /** Copy of specification field, see Section 8.3.2 of the
+        H.265/HEVC Specification. */
+    info->NumPocStCurrAfter = h->rps[ST_CURR_AFT].nb_refs;
+    if (info->NumPocStCurrAfter > 8) {
+        av_log(avctx, AV_LOG_WARNING,
+             "VDPAU only supports up to 8 references in StCurrAfter. "
+             "This frame may not be decoded correctly.\n");
+        info->NumPocStCurrAfter = 8;
+    }
+    /** Copy of specification field, see Section 8.3.2 of the
+        H.265/HEVC Specification. */
+    info->NumPocLtCurr = h->rps[LT_CURR].nb_refs;
+    if (info->NumPocLtCurr > 8) {
+        av_log(avctx, AV_LOG_WARNING,
+             "VDPAU only supports up to 8 references in LtCurr. "
+             "This frame may not be decoded correctly.\n");
+        info->NumPocLtCurr = 8;
+    }
+    /** Reference Picture Set list, one of the short-term RPS. These
+        correspond to positions in the RefPics array. */
+    for (ssize_t i = 0, j = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) {
+        HEVCFrame *frame = h->rps[ST_CURR_BEF].ref[i];
+        if (frame) {
+            uint8_t found = 0;
+            uintptr_t id = ff_vdpau_get_surface_id(frame->frame);
+            for (size_t k = 0; k < 16; k++) {
+                if (id == info->RefPics[k]) {
+                    info->RefPicSetStCurrBefore[j] = k;
+                    j++;
+                    found = 1;
+                    break;
+                }
+            }
+            if (!found) {
+                av_log(avctx, AV_LOG_WARNING, "missing surface: %p\n",
+                       (void *)id);
+            }
+        } else {
+            av_log(avctx, AV_LOG_WARNING, "missing STR Before frame: %zd\n", i);
+        }
+    }
+    /** Reference Picture Set list, one of the short-term RPS. These
+        correspond to positions in the RefPics array. */
+    for (ssize_t i = 0, j = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) {
+        HEVCFrame *frame = h->rps[ST_CURR_AFT].ref[i];
+        if (frame) {
+            uint8_t found = 0;
+            uintptr_t id = ff_vdpau_get_surface_id(frame->frame);
+            for (size_t k = 0; k < 16; k++) {
+                if (id == info->RefPics[k]) {
+                    info->RefPicSetStCurrAfter[j] = k;
+                    j++;
+                    found = 1;
+                    break;
+                }
+            }
+            if (!found) {
+                av_log(avctx, AV_LOG_WARNING, "missing surface: %p\n",
+                       (void *)id);
+            }
+        } else {
+            av_log(avctx, AV_LOG_WARNING, "missing STR After frame: %zd\n", i);
+        }
+    }
+    /** Reference Picture Set list, one of the long-term RPS. These
+        correspond to positions in the RefPics array. */
+    for (ssize_t i = 0, j = 0; i < h->rps[LT_CURR].nb_refs; i++) {
+        HEVCFrame *frame = h->rps[LT_CURR].ref[i];
+        if (frame) {
+            uint8_t found = 0;
+            uintptr_t id = ff_vdpau_get_surface_id(frame->frame);
+            for (size_t k = 0; k < 16; k++) {
+                if (id == info->RefPics[k]) {
+                    info->RefPicSetLtCurr[j] = k;
+                    j++;
+                    found = 1;
+                    break;
+                }
+            }
+            if (!found) {
+                av_log(avctx, AV_LOG_WARNING, "missing surface: %p\n",
+                       (void *)id);
+            }
+        } else {
+            av_log(avctx, AV_LOG_WARNING, "missing LTR frame: %zd\n", i);
+        }
+    }
+
+    return ff_vdpau_common_start_frame(pic_ctx, buffer, size);
+}
+
+static const uint8_t start_code_prefix[3] = { 0x00, 0x00, 0x01 };
+
+static int vdpau_hevc_decode_slice(AVCodecContext *avctx,
+                                   const uint8_t *buffer, uint32_t size)
+{
+    HEVCContext *h = avctx->priv_data;
+    struct vdpau_picture_context *pic_ctx = h->ref->hwaccel_picture_private;
+    int val;
+
+    val = ff_vdpau_add_buffer(pic_ctx, start_code_prefix, 3);
+    if (val)
+        return val;
+
+    val = ff_vdpau_add_buffer(pic_ctx, buffer, size);
+    if (val)
+        return val;
+
+    return 0;
+}
+
+static int vdpau_hevc_end_frame(AVCodecContext *avctx)
+{
+    HEVCContext *h = avctx->priv_data;
+    struct vdpau_picture_context *pic_ctx = h->ref->hwaccel_picture_private;
+    int val;
+
+    val = ff_vdpau_common_end_frame(avctx, h->ref->frame, pic_ctx);
+    if (val < 0)
+        return val;
+
+    return 0;
+}
+
+static int vdpau_hevc_init(AVCodecContext *avctx)
+{
+    VdpDecoderProfile profile;
+    uint32_t level = avctx->level;
+
+    switch (avctx->profile) {
+    case FF_PROFILE_HEVC_MAIN:
+        profile = VDP_DECODER_PROFILE_HEVC_MAIN;
+        break;
+    case FF_PROFILE_HEVC_MAIN_10:
+        profile = VDP_DECODER_PROFILE_HEVC_MAIN_10;
+        break;
+    case FF_PROFILE_HEVC_MAIN_STILL_PICTURE:
+        profile = VDP_DECODER_PROFILE_HEVC_MAIN_STILL;
+        break;
+    default:
+        return AVERROR(ENOTSUP);
+    }
+
+    return ff_vdpau_common_init(avctx, profile, level);
+}
+
+AVHWAccel ff_hevc_vdpau_hwaccel = {
+    .name           = "hevc_vdpau",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_HEVC,
+    .pix_fmt        = AV_PIX_FMT_VDPAU,
+    .start_frame    = vdpau_hevc_start_frame,
+    .end_frame      = vdpau_hevc_end_frame,
+    .decode_slice   = vdpau_hevc_decode_slice,
+    .frame_priv_data_size = sizeof(struct vdpau_picture_context),
+    .init           = vdpau_hevc_init,
+    .uninit         = ff_vdpau_common_uninit,
+    .priv_data_size = sizeof(VDPAUContext),
+};
diff --git a/libavcodec/vdpau_internal.h b/libavcodec/vdpau_internal.h
index 6f762e41..8a637335 100644
--- a/libavcodec/vdpau_internal.h
+++ b/libavcodec/vdpau_internal.h
@@ -50,6 +50,9 @@ union VDPAUPictureInfo {
 #ifdef VDP_DECODER_PROFILE_H264_HIGH_444_PREDICTIVE
     VdpPictureInfoH264Predictive h264_predictive;
 #endif
+#ifdef VDP_DECODER_PROFILE_HEVC_MAIN
+    VdpPictureInfoHEVC        hevc;
+#endif
 };
 
 #include "vdpau.h"
diff --git a/libavcodec/vdpau_mpeg4.c b/libavcodec/vdpau_mpeg4.c
index 9141becd..46a00cb2 100644
--- a/libavcodec/vdpau_mpeg4.c
+++ b/libavcodec/vdpau_mpeg4.c
@@ -88,29 +88,6 @@ static int vdpau_mpeg4_decode_slice(av_unused AVCodecContext *avctx,
      return 0;
 }
 
-#if CONFIG_H263_VDPAU_HWACCEL
-static int vdpau_h263_init(AVCodecContext *avctx)
-{
-    return ff_vdpau_common_init(avctx, VDP_DECODER_PROFILE_MPEG4_PART2_ASP,
-                                VDP_DECODER_LEVEL_MPEG4_PART2_ASP_L5);
-}
-
-AVHWAccel ff_h263_vdpau_hwaccel = {
-    .name           = "h263_vdpau",
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_H263,
-    .pix_fmt        = AV_PIX_FMT_VDPAU,
-    .start_frame    = vdpau_mpeg4_start_frame,
-    .end_frame      = ff_vdpau_mpeg_end_frame,
-    .decode_slice   = vdpau_mpeg4_decode_slice,
-    .frame_priv_data_size = sizeof(struct vdpau_picture_context),
-    .init           = vdpau_h263_init,
-    .uninit         = ff_vdpau_common_uninit,
-    .priv_data_size = sizeof(VDPAUContext),
-};
-#endif
-
-#if CONFIG_MPEG4_VDPAU_HWACCEL
 static int vdpau_mpeg4_init(AVCodecContext *avctx)
 {
     VdpDecoderProfile profile;
@@ -145,4 +122,3 @@ AVHWAccel ff_mpeg4_vdpau_hwaccel = {
     .uninit         = ff_vdpau_common_uninit,
     .priv_data_size = sizeof(VDPAUContext),
 };
-#endif
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 74a1b304..37a35e05 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -28,9 +28,9 @@
 
 #include "libavutil/version.h"
 
-#define LIBAVCODEC_VERSION_MAJOR 56
-#define LIBAVCODEC_VERSION_MINOR  41
-#define LIBAVCODEC_VERSION_MICRO 100
+#define LIBAVCODEC_VERSION_MAJOR  57
+#define LIBAVCODEC_VERSION_MINOR  24
+#define LIBAVCODEC_VERSION_MICRO 102
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \
@@ -46,141 +46,118 @@
  * FF_API_* defines may be placed below to indicate public API that will be
  * dropped at a future version bump. The defines themselves are not part of
  * the public API and may change, break or disappear at any time.
+ *
+ * @note, when bumping the major version it is recommended to manually
+ * disable each FF_API_* in its own commit instead of disabling them all
+ * at once through the bump. This improves the git bisect-ability of the change.
  */
 
 #ifndef FF_API_VIMA_DECODER
-#define FF_API_VIMA_DECODER     (LIBAVCODEC_VERSION_MAJOR < 57)
-#endif
-#ifndef FF_API_REQUEST_CHANNELS
-#define FF_API_REQUEST_CHANNELS (LIBAVCODEC_VERSION_MAJOR < 57)
-#endif
-#ifndef FF_API_OLD_DECODE_AUDIO
-#define FF_API_OLD_DECODE_AUDIO (LIBAVCODEC_VERSION_MAJOR < 57)
-#endif
-#ifndef FF_API_OLD_ENCODE_AUDIO
-#define FF_API_OLD_ENCODE_AUDIO (LIBAVCODEC_VERSION_MAJOR < 57)
-#endif
-#ifndef FF_API_OLD_ENCODE_VIDEO
-#define FF_API_OLD_ENCODE_VIDEO (LIBAVCODEC_VERSION_MAJOR < 57)
-#endif
-#ifndef FF_API_CODEC_ID
-#define FF_API_CODEC_ID          (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_VIMA_DECODER     (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_AUDIO_CONVERT
-#define FF_API_AUDIO_CONVERT     (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_AUDIO_CONVERT     (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_AVCODEC_RESAMPLE
 #define FF_API_AVCODEC_RESAMPLE  FF_API_AUDIO_CONVERT
 #endif
-#ifndef FF_API_DEINTERLACE
-#define FF_API_DEINTERLACE       (LIBAVCODEC_VERSION_MAJOR < 57)
-#endif
-#ifndef FF_API_DESTRUCT_PACKET
-#define FF_API_DESTRUCT_PACKET   (LIBAVCODEC_VERSION_MAJOR < 57)
-#endif
-#ifndef FF_API_GET_BUFFER
-#define FF_API_GET_BUFFER        (LIBAVCODEC_VERSION_MAJOR < 57)
+#ifndef FF_API_GETCHROMA
+#define FF_API_GETCHROMA         (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_MISSING_SAMPLE
-#define FF_API_MISSING_SAMPLE    (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_MISSING_SAMPLE    (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_LOWRES
-#define FF_API_LOWRES            (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_LOWRES            (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_CAP_VDPAU
-#define FF_API_CAP_VDPAU         (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_CAP_VDPAU         (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_BUFS_VDPAU
-#define FF_API_BUFS_VDPAU        (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_BUFS_VDPAU        (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_VOXWARE
-#define FF_API_VOXWARE           (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_VOXWARE           (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_SET_DIMENSIONS
-#define FF_API_SET_DIMENSIONS    (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_SET_DIMENSIONS    (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_DEBUG_MV
-#define FF_API_DEBUG_MV          (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_DEBUG_MV          (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_AC_VLC
-#define FF_API_AC_VLC            (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_AC_VLC            (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_OLD_MSMPEG4
-#define FF_API_OLD_MSMPEG4       (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_OLD_MSMPEG4       (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_ASPECT_EXTENDED
-#define FF_API_ASPECT_EXTENDED   (LIBAVCODEC_VERSION_MAJOR < 57)
-#endif
-#ifndef FF_API_THREAD_OPAQUE
-#define FF_API_THREAD_OPAQUE     (LIBAVCODEC_VERSION_MAJOR < 57)
-#endif
-#ifndef FF_API_CODEC_PKT
-#define FF_API_CODEC_PKT         (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_ASPECT_EXTENDED   (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_ARCH_ALPHA
-#define FF_API_ARCH_ALPHA        (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_ARCH_ALPHA        (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_XVMC
-#define FF_API_XVMC              (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_XVMC              (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_ERROR_RATE
-#define FF_API_ERROR_RATE        (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_ERROR_RATE        (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_QSCALE_TYPE
-#define FF_API_QSCALE_TYPE       (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_QSCALE_TYPE       (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_MB_TYPE
-#define FF_API_MB_TYPE           (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_MB_TYPE           (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_MAX_BFRAMES
-#define FF_API_MAX_BFRAMES       (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_MAX_BFRAMES       (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_NEG_LINESIZES
-#define FF_API_NEG_LINESIZES     (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_NEG_LINESIZES     (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_EMU_EDGE
-#define FF_API_EMU_EDGE          (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_EMU_EDGE          (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_ARCH_SH4
-#define FF_API_ARCH_SH4          (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_ARCH_SH4          (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_ARCH_SPARC
-#define FF_API_ARCH_SPARC        (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_ARCH_SPARC        (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_UNUSED_MEMBERS
-#define FF_API_UNUSED_MEMBERS    (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_UNUSED_MEMBERS    (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_IDCT_XVIDMMX
-#define FF_API_IDCT_XVIDMMX      (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_IDCT_XVIDMMX      (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_INPUT_PRESERVED
-#define FF_API_INPUT_PRESERVED   (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_INPUT_PRESERVED   (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_NORMALIZE_AQP
-#define FF_API_NORMALIZE_AQP     (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_NORMALIZE_AQP     (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_GMC
-#define FF_API_GMC               (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_GMC               (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_MV0
-#define FF_API_MV0               (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_MV0               (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_CODEC_NAME
-#define FF_API_CODEC_NAME        (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_CODEC_NAME        (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_AFD
-#define FF_API_AFD               (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_AFD               (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_VISMV
 /* XXX: don't forget to drop the -vismv documentation */
-#define FF_API_VISMV             (LIBAVCODEC_VERSION_MAJOR < 57)
-#endif
-#ifndef FF_API_DV_FRAME_PROFILE
-#define FF_API_DV_FRAME_PROFILE  (LIBAVCODEC_VERSION_MAJOR < 57)
+#define FF_API_VISMV             (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_AUDIOENC_DELAY
 #define FF_API_AUDIOENC_DELAY    (LIBAVCODEC_VERSION_MAJOR < 58)
 #endif
+#ifndef FF_API_VAAPI_CONTEXT
+#define FF_API_VAAPI_CONTEXT     (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
 #ifndef FF_API_AVCTX_TIMEBASE
 #define FF_API_AVCTX_TIMEBASE    (LIBAVCODEC_VERSION_MAJOR < 59)
 #endif
@@ -190,5 +167,50 @@
 #ifndef FF_API_STREAM_CODEC_TAG
 #define FF_API_STREAM_CODEC_TAG  (LIBAVCODEC_VERSION_MAJOR < 59)
 #endif
+#ifndef FF_API_QUANT_BIAS
+#define FF_API_QUANT_BIAS        (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_RC_STRATEGY
+#define FF_API_RC_STRATEGY       (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_CODED_FRAME
+#define FF_API_CODED_FRAME       (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_MOTION_EST
+#define FF_API_MOTION_EST        (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_WITHOUT_PREFIX
+#define FF_API_WITHOUT_PREFIX    (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_SIDEDATA_ONLY_PKT
+#define FF_API_SIDEDATA_ONLY_PKT (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_VDPAU_PROFILE
+#define FF_API_VDPAU_PROFILE     (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_CONVERGENCE_DURATION
+#define FF_API_CONVERGENCE_DURATION (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_AVPICTURE
+#define FF_API_AVPICTURE         (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_AVPACKET_OLD_API
+#define FF_API_AVPACKET_OLD_API (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_RTP_CALLBACK
+#define FF_API_RTP_CALLBACK      (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_VBV_DELAY
+#define FF_API_VBV_DELAY         (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_CODER_TYPE
+#define FF_API_CODER_TYPE        (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_STAT_BITS
+#define FF_API_STAT_BITS         (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_PRIVATE_OPT
+#define FF_API_PRIVATE_OPT      (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
 
 #endif /* AVCODEC_VERSION_H */
diff --git a/libavcodec/videodsp_template.c b/libavcodec/videodsp_template.c
index c569c30d..94c1b718 100644
--- a/libavcodec/videodsp_template.c
+++ b/libavcodec/videodsp_template.c
@@ -32,6 +32,8 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src,
     if (!w || !h)
         return;
 
+    av_assert2(block_w * sizeof(pixel) <= FFABS(buf_linesize));
+
     if (src_y >= h) {
         src -= src_y * src_linesize;
         src += (h - 1) * src_linesize;
diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
new file mode 100644
index 00000000..2f4d5316
--- /dev/null
+++ b/libavcodec/videotoolbox.c
@@ -0,0 +1,701 @@
+/*
+ * Videotoolbox hardware acceleration
+ *
+ * copyright (c) 2012 Sebastien Zwickert
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#if CONFIG_VIDEOTOOLBOX
+#  include "videotoolbox.h"
+#else
+#  include "vda.h"
+#endif
+#include "vda_vt_internal.h"
+#include "libavutil/avutil.h"
+#include "bytestream.h"
+#include "h264.h"
+#include "mpegvideo.h"
+
+#ifndef kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder
+#  define kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder CFSTR("RequireHardwareAcceleratedVideoDecoder")
+#endif
+
+#define VIDEOTOOLBOX_ESDS_EXTRADATA_PADDING  12
+
+static void videotoolbox_buffer_release(void *opaque, uint8_t *data)
+{
+    CVPixelBufferRef cv_buffer = (CVImageBufferRef)data;
+    CVPixelBufferRelease(cv_buffer);
+}
+
+static int videotoolbox_buffer_copy(VTContext *vtctx,
+                                    const uint8_t *buffer,
+                                    uint32_t size)
+{
+    void *tmp;
+
+    tmp = av_fast_realloc(vtctx->bitstream,
+                         &vtctx->allocated_size,
+                         size);
+
+    if (!tmp)
+        return AVERROR(ENOMEM);
+
+    vtctx->bitstream = tmp;
+    memcpy(vtctx->bitstream, buffer, size);
+    vtctx->bitstream_size = size;
+
+    return 0;
+}
+
+int ff_videotoolbox_alloc_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    frame->width  = avctx->width;
+    frame->height = avctx->height;
+    frame->format = avctx->pix_fmt;
+    frame->buf[0] = av_buffer_alloc(1);
+
+    if (!frame->buf[0])
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+#define AV_W8(p, v) *(p) = (v)
+
+CFDataRef ff_videotoolbox_avcc_extradata_create(AVCodecContext *avctx)
+{
+    H264Context *h     = avctx->priv_data;
+    CFDataRef data = NULL;
+    uint8_t *p;
+    int vt_extradata_size = 6 + 3 + h->sps.data_size + 4 + h->pps.data_size;
+    uint8_t *vt_extradata = av_malloc(vt_extradata_size);
+    if (!vt_extradata)
+        return NULL;
+
+    p = vt_extradata;
+
+    AV_W8(p + 0, 1); /* version */
+    AV_W8(p + 1, h->sps.data[0]); /* profile */
+    AV_W8(p + 2, h->sps.data[1]); /* profile compat */
+    AV_W8(p + 3, h->sps.data[2]); /* level */
+    AV_W8(p + 4, 0xff); /* 6 bits reserved (111111) + 2 bits nal size length - 3 (11) */
+    AV_W8(p + 5, 0xe1); /* 3 bits reserved (111) + 5 bits number of sps (00001) */
+    AV_WB16(p + 6, h->sps.data_size + 1);
+    AV_W8(p + 8, NAL_SPS | (3 << 5)); // NAL unit header
+    memcpy(p + 9, h->sps.data, h->sps.data_size);
+    p += 9 + h->sps.data_size;
+    AV_W8(p + 0, 1); /* number of pps */
+    AV_WB16(p + 1, h->pps.data_size + 1);
+    AV_W8(p + 3, NAL_PPS | (3 << 5)); // NAL unit header
+    memcpy(p + 4, h->pps.data, h->pps.data_size);
+
+    p += 4 + h->pps.data_size;
+    av_assert0(p - vt_extradata == vt_extradata_size);
+
+    data = CFDataCreate(kCFAllocatorDefault, vt_extradata, vt_extradata_size);
+    av_free(vt_extradata);
+    return data;
+}
+
+int ff_videotoolbox_buffer_create(VTContext *vtctx, AVFrame *frame)
+{
+    av_buffer_unref(&frame->buf[0]);
+
+    frame->buf[0] = av_buffer_create((uint8_t*)vtctx->frame,
+                                     sizeof(vtctx->frame),
+                                     videotoolbox_buffer_release,
+                                     NULL,
+                                     AV_BUFFER_FLAG_READONLY);
+    if (!frame->buf[0]) {
+        return AVERROR(ENOMEM);
+    }
+
+    frame->data[3] = (uint8_t*)vtctx->frame;
+    vtctx->frame = NULL;
+
+    return 0;
+}
+
+int ff_videotoolbox_h264_start_frame(AVCodecContext *avctx,
+                                     const uint8_t *buffer,
+                                     uint32_t size)
+{
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    H264Context *h  = avctx->priv_data;
+
+    vtctx->bitstream_size = 0;
+
+    if (h->is_avc == 1) {
+        return videotoolbox_buffer_copy(vtctx, buffer, size);
+    }
+
+    return 0;
+}
+
+int ff_videotoolbox_h264_decode_slice(AVCodecContext *avctx,
+                                      const uint8_t *buffer,
+                                      uint32_t size)
+{
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    H264Context *h  = avctx->priv_data;
+    void *tmp;
+
+    if (h->is_avc == 1)
+        return 0;
+
+    tmp = av_fast_realloc(vtctx->bitstream,
+                          &vtctx->allocated_size,
+                          vtctx->bitstream_size+size+4);
+    if (!tmp)
+        return AVERROR(ENOMEM);
+
+    vtctx->bitstream = tmp;
+
+    AV_WB32(vtctx->bitstream + vtctx->bitstream_size, size);
+    memcpy(vtctx->bitstream + vtctx->bitstream_size + 4, buffer, size);
+
+    vtctx->bitstream_size += size + 4;
+
+    return 0;
+}
+
+int ff_videotoolbox_uninit(AVCodecContext *avctx)
+{
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+    if (vtctx) {
+        av_freep(&vtctx->bitstream);
+        if (vtctx->frame)
+            CVPixelBufferRelease(vtctx->frame);
+    }
+
+    return 0;
+}
+
+#if CONFIG_VIDEOTOOLBOX
+static void videotoolbox_write_mp4_descr_length(PutByteContext *pb, int length)
+{
+    int i;
+    uint8_t b;
+
+    for (i = 3; i >= 0; i--) {
+        b = (length >> (i * 7)) & 0x7F;
+        if (i != 0)
+            b |= 0x80;
+
+        bytestream2_put_byteu(pb, b);
+    }
+}
+
+static CFDataRef videotoolbox_esds_extradata_create(AVCodecContext *avctx)
+{
+    CFDataRef data;
+    uint8_t *rw_extradata;
+    PutByteContext pb;
+    int full_size = 3 + 5 + 13 + 5 + avctx->extradata_size + 3;
+    // ES_DescrTag data + DecoderConfigDescrTag + data + DecSpecificInfoTag + size + SLConfigDescriptor
+    int config_size = 13 + 5 + avctx->extradata_size;
+    int s;
+
+    if (!(rw_extradata = av_mallocz(full_size + VIDEOTOOLBOX_ESDS_EXTRADATA_PADDING)))
+        return NULL;
+
+    bytestream2_init_writer(&pb, rw_extradata, full_size + VIDEOTOOLBOX_ESDS_EXTRADATA_PADDING);
+    bytestream2_put_byteu(&pb, 0);        // version
+    bytestream2_put_ne24(&pb, 0);         // flags
+
+    // elementary stream descriptor
+    bytestream2_put_byteu(&pb, 0x03);     // ES_DescrTag
+    videotoolbox_write_mp4_descr_length(&pb, full_size);
+    bytestream2_put_ne16(&pb, 0);         // esid
+    bytestream2_put_byteu(&pb, 0);        // stream priority (0-32)
+
+    // decoder configuration descriptor
+    bytestream2_put_byteu(&pb, 0x04);     // DecoderConfigDescrTag
+    videotoolbox_write_mp4_descr_length(&pb, config_size);
+    bytestream2_put_byteu(&pb, 32);       // object type indication. 32 = AV_CODEC_ID_MPEG4
+    bytestream2_put_byteu(&pb, 0x11);     // stream type
+    bytestream2_put_ne24(&pb, 0);         // buffer size
+    bytestream2_put_ne32(&pb, 0);         // max bitrate
+    bytestream2_put_ne32(&pb, 0);         // avg bitrate
+
+    // decoder specific descriptor
+    bytestream2_put_byteu(&pb, 0x05);     ///< DecSpecificInfoTag
+    videotoolbox_write_mp4_descr_length(&pb, avctx->extradata_size);
+
+    bytestream2_put_buffer(&pb, avctx->extradata, avctx->extradata_size);
+
+    // SLConfigDescriptor
+    bytestream2_put_byteu(&pb, 0x06);     // SLConfigDescrTag
+    bytestream2_put_byteu(&pb, 0x01);     // length
+    bytestream2_put_byteu(&pb, 0x02);     //
+
+    s = bytestream2_size_p(&pb);
+
+    data = CFDataCreate(kCFAllocatorDefault, rw_extradata, s);
+
+    av_freep(&rw_extradata);
+    return data;
+}
+
+static CMSampleBufferRef videotoolbox_sample_buffer_create(CMFormatDescriptionRef fmt_desc,
+                                                           void *buffer,
+                                                           int size)
+{
+    OSStatus status;
+    CMBlockBufferRef  block_buf;
+    CMSampleBufferRef sample_buf;
+
+    block_buf  = NULL;
+    sample_buf = NULL;
+
+    status = CMBlockBufferCreateWithMemoryBlock(kCFAllocatorDefault,// structureAllocator
+                                                buffer,             // memoryBlock
+                                                size,               // blockLength
+                                                kCFAllocatorNull,   // blockAllocator
+                                                NULL,               // customBlockSource
+                                                0,                  // offsetToData
+                                                size,               // dataLength
+                                                0,                  // flags
+                                                &block_buf);
+
+    if (!status) {
+        status = CMSampleBufferCreate(kCFAllocatorDefault,  // allocator
+                                      block_buf,            // dataBuffer
+                                      TRUE,                 // dataReady
+                                      0,                    // makeDataReadyCallback
+                                      0,                    // makeDataReadyRefcon
+                                      fmt_desc,             // formatDescription
+                                      1,                    // numSamples
+                                      0,                    // numSampleTimingEntries
+                                      NULL,                 // sampleTimingArray
+                                      0,                    // numSampleSizeEntries
+                                      NULL,                 // sampleSizeArray
+                                      &sample_buf);
+    }
+
+    if (block_buf)
+        CFRelease(block_buf);
+
+    return sample_buf;
+}
+
+static void videotoolbox_decoder_callback(void *opaque,
+                                          void *sourceFrameRefCon,
+                                          OSStatus status,
+                                          VTDecodeInfoFlags flags,
+                                          CVImageBufferRef image_buffer,
+                                          CMTime pts,
+                                          CMTime duration)
+{
+    AVCodecContext *avctx = opaque;
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+
+    if (vtctx->frame) {
+        CVPixelBufferRelease(vtctx->frame);
+        vtctx->frame = NULL;
+    }
+
+    if (!image_buffer) {
+        av_log(NULL, AV_LOG_DEBUG, "vt decoder cb: output image buffer is null\n");
+        return;
+    }
+
+    vtctx->frame = CVPixelBufferRetain(image_buffer);
+}
+
+static OSStatus videotoolbox_session_decode_frame(AVCodecContext *avctx)
+{
+    OSStatus status;
+    CMSampleBufferRef sample_buf;
+    AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+
+    sample_buf = videotoolbox_sample_buffer_create(videotoolbox->cm_fmt_desc,
+                                                   vtctx->bitstream,
+                                                   vtctx->bitstream_size);
+
+    if (!sample_buf)
+        return -1;
+
+    status = VTDecompressionSessionDecodeFrame(videotoolbox->session,
+                                               sample_buf,
+                                               0,       // decodeFlags
+                                               NULL,    // sourceFrameRefCon
+                                               0);      // infoFlagsOut
+    if (status == noErr)
+        status = VTDecompressionSessionWaitForAsynchronousFrames(videotoolbox->session);
+
+    CFRelease(sample_buf);
+
+    return status;
+}
+
+static int videotoolbox_common_end_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    int status;
+    AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+
+    av_buffer_unref(&frame->buf[0]);
+
+    if (!videotoolbox->session || !vtctx->bitstream)
+        return AVERROR_INVALIDDATA;
+
+    status = videotoolbox_session_decode_frame(avctx);
+
+    if (status) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
+        return AVERROR_UNKNOWN;
+    }
+
+    if (!vtctx->frame)
+        return AVERROR_UNKNOWN;
+
+    return ff_videotoolbox_buffer_create(vtctx, frame);
+}
+
+static int videotoolbox_h264_end_frame(AVCodecContext *avctx)
+{
+    H264Context *h = avctx->priv_data;
+    AVFrame *frame = h->cur_pic_ptr->f;
+
+    return videotoolbox_common_end_frame(avctx, frame);
+}
+
+static int videotoolbox_mpeg_start_frame(AVCodecContext *avctx,
+                                         const uint8_t *buffer,
+                                         uint32_t size)
+{
+    VTContext *vtctx = avctx->internal->hwaccel_priv_data;
+
+    return videotoolbox_buffer_copy(vtctx, buffer, size);
+}
+
+static int videotoolbox_mpeg_decode_slice(AVCodecContext *avctx,
+                                          const uint8_t *buffer,
+                                          uint32_t size)
+{
+    return 0;
+}
+
+static int videotoolbox_mpeg_end_frame(AVCodecContext *avctx)
+{
+    MpegEncContext *s = avctx->priv_data;
+    AVFrame *frame = s->current_picture_ptr->f;
+
+    return videotoolbox_common_end_frame(avctx, frame);
+}
+
+static CFDictionaryRef videotoolbox_decoder_config_create(CMVideoCodecType codec_type,
+                                                          AVCodecContext *avctx)
+{
+    CFMutableDictionaryRef config_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
+                                                                   1,
+                                                                   &kCFTypeDictionaryKeyCallBacks,
+                                                                   &kCFTypeDictionaryValueCallBacks);
+
+    CFDictionarySetValue(config_info,
+                         kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder,
+                         kCFBooleanTrue);
+
+    if (avctx->extradata_size) {
+        CFMutableDictionaryRef avc_info;
+        CFDataRef data = NULL;
+
+        avc_info = CFDictionaryCreateMutable(kCFAllocatorDefault,
+                                             1,
+                                             &kCFTypeDictionaryKeyCallBacks,
+                                             &kCFTypeDictionaryValueCallBacks);
+
+        switch (codec_type) {
+        case kCMVideoCodecType_MPEG4Video :
+            data = videotoolbox_esds_extradata_create(avctx);
+            if (data)
+                CFDictionarySetValue(avc_info, CFSTR("esds"), data);
+            break;
+        case kCMVideoCodecType_H264 :
+            data = ff_videotoolbox_avcc_extradata_create(avctx);
+            if (data)
+                CFDictionarySetValue(avc_info, CFSTR("avcC"), data);
+            break;
+        default:
+            break;
+        }
+
+        CFDictionarySetValue(config_info,
+                kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms,
+                avc_info);
+
+        if (data)
+            CFRelease(data);
+
+        CFRelease(avc_info);
+    }
+    return config_info;
+}
+
+static CFDictionaryRef videotoolbox_buffer_attributes_create(int width,
+                                                             int height,
+                                                             OSType pix_fmt)
+{
+    CFMutableDictionaryRef buffer_attributes;
+    CFMutableDictionaryRef io_surface_properties;
+    CFNumberRef cv_pix_fmt;
+    CFNumberRef w;
+    CFNumberRef h;
+
+    w = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &width);
+    h = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &height);
+    cv_pix_fmt = CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pix_fmt);
+
+    buffer_attributes = CFDictionaryCreateMutable(kCFAllocatorDefault,
+                                                  4,
+                                                  &kCFTypeDictionaryKeyCallBacks,
+                                                  &kCFTypeDictionaryValueCallBacks);
+    io_surface_properties = CFDictionaryCreateMutable(kCFAllocatorDefault,
+                                                      0,
+                                                      &kCFTypeDictionaryKeyCallBacks,
+                                                      &kCFTypeDictionaryValueCallBacks);
+
+    CFDictionarySetValue(buffer_attributes, kCVPixelBufferPixelFormatTypeKey, cv_pix_fmt);
+    CFDictionarySetValue(buffer_attributes, kCVPixelBufferIOSurfacePropertiesKey, io_surface_properties);
+    CFDictionarySetValue(buffer_attributes, kCVPixelBufferWidthKey, w);
+    CFDictionarySetValue(buffer_attributes, kCVPixelBufferHeightKey, h);
+
+    CFRelease(io_surface_properties);
+    CFRelease(cv_pix_fmt);
+    CFRelease(w);
+    CFRelease(h);
+
+    return buffer_attributes;
+}
+
+static CMVideoFormatDescriptionRef videotoolbox_format_desc_create(CMVideoCodecType codec_type,
+                                                                   CFDictionaryRef decoder_spec,
+                                                                   int width,
+                                                                   int height)
+{
+    CMFormatDescriptionRef cm_fmt_desc;
+    OSStatus status;
+
+    status = CMVideoFormatDescriptionCreate(kCFAllocatorDefault,
+                                            codec_type,
+                                            width,
+                                            height,
+                                            decoder_spec, // Dictionary of extension
+                                            &cm_fmt_desc);
+
+    if (status)
+        return NULL;
+
+    return cm_fmt_desc;
+}
+
+static int videotoolbox_default_init(AVCodecContext *avctx)
+{
+    AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
+    OSStatus status;
+    VTDecompressionOutputCallbackRecord decoder_cb;
+    CFDictionaryRef decoder_spec;
+    CFDictionaryRef buf_attr;
+
+    if (!videotoolbox) {
+        av_log(avctx, AV_LOG_ERROR, "hwaccel context is not set\n");
+        return -1;
+    }
+
+    switch( avctx->codec_id ) {
+    case AV_CODEC_ID_H263 :
+        videotoolbox->cm_codec_type = kCMVideoCodecType_H263;
+        break;
+    case AV_CODEC_ID_H264 :
+        videotoolbox->cm_codec_type = kCMVideoCodecType_H264;
+        break;
+    case AV_CODEC_ID_MPEG1VIDEO :
+        videotoolbox->cm_codec_type = kCMVideoCodecType_MPEG1Video;
+        break;
+    case AV_CODEC_ID_MPEG2VIDEO :
+        videotoolbox->cm_codec_type = kCMVideoCodecType_MPEG2Video;
+        break;
+    case AV_CODEC_ID_MPEG4 :
+        videotoolbox->cm_codec_type = kCMVideoCodecType_MPEG4Video;
+        break;
+    default :
+        break;
+    }
+
+    decoder_spec = videotoolbox_decoder_config_create(videotoolbox->cm_codec_type, avctx);
+
+    videotoolbox->cm_fmt_desc = videotoolbox_format_desc_create(videotoolbox->cm_codec_type,
+                                                                decoder_spec,
+                                                                avctx->width,
+                                                                avctx->height);
+    if (!videotoolbox->cm_fmt_desc) {
+        if (decoder_spec)
+            CFRelease(decoder_spec);
+
+        av_log(avctx, AV_LOG_ERROR, "format description creation failed\n");
+        return -1;
+    }
+
+    buf_attr = videotoolbox_buffer_attributes_create(avctx->width,
+                                                     avctx->height,
+                                                     videotoolbox->cv_pix_fmt_type);
+
+    decoder_cb.decompressionOutputCallback = videotoolbox_decoder_callback;
+    decoder_cb.decompressionOutputRefCon   = avctx;
+
+    status = VTDecompressionSessionCreate(NULL,                      // allocator
+                                          videotoolbox->cm_fmt_desc, // videoFormatDescription
+                                          decoder_spec,              // videoDecoderSpecification
+                                          buf_attr,                  // destinationImageBufferAttributes
+                                          &decoder_cb,               // outputCallback
+                                          &videotoolbox->session);   // decompressionSessionOut
+
+    if (decoder_spec)
+        CFRelease(decoder_spec);
+    if (buf_attr)
+        CFRelease(buf_attr);
+
+    switch (status) {
+    case kVTVideoDecoderNotAvailableNowErr:
+    case kVTVideoDecoderUnsupportedDataFormatErr:
+        return AVERROR(ENOSYS);
+    case kVTVideoDecoderMalfunctionErr:
+        return AVERROR(EINVAL);
+    case kVTVideoDecoderBadDataErr :
+        return AVERROR_INVALIDDATA;
+    case 0:
+        return 0;
+    default:
+        return AVERROR_UNKNOWN;
+    }
+}
+
+static void videotoolbox_default_free(AVCodecContext *avctx)
+{
+    AVVideotoolboxContext *videotoolbox = avctx->hwaccel_context;
+
+    if (videotoolbox) {
+        if (videotoolbox->cm_fmt_desc)
+            CFRelease(videotoolbox->cm_fmt_desc);
+
+        if (videotoolbox->session)
+            VTDecompressionSessionInvalidate(videotoolbox->session);
+    }
+}
+
+AVHWAccel ff_h263_videotoolbox_hwaccel = {
+    .name           = "h263_videotoolbox",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_H263,
+    .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
+    .alloc_frame    = ff_videotoolbox_alloc_frame,
+    .start_frame    = videotoolbox_mpeg_start_frame,
+    .decode_slice   = videotoolbox_mpeg_decode_slice,
+    .end_frame      = videotoolbox_mpeg_end_frame,
+    .uninit         = ff_videotoolbox_uninit,
+    .priv_data_size = sizeof(VTContext),
+};
+
+AVHWAccel ff_h264_videotoolbox_hwaccel = {
+    .name           = "h264_videotoolbox",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_H264,
+    .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
+    .alloc_frame    = ff_videotoolbox_alloc_frame,
+    .start_frame    = ff_videotoolbox_h264_start_frame,
+    .decode_slice   = ff_videotoolbox_h264_decode_slice,
+    .end_frame      = videotoolbox_h264_end_frame,
+    .uninit         = ff_videotoolbox_uninit,
+    .priv_data_size = sizeof(VTContext),
+};
+
+AVHWAccel ff_mpeg1_videotoolbox_hwaccel = {
+    .name           = "mpeg1_videotoolbox",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG1VIDEO,
+    .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
+    .alloc_frame    = ff_videotoolbox_alloc_frame,
+    .start_frame    = videotoolbox_mpeg_start_frame,
+    .decode_slice   = videotoolbox_mpeg_decode_slice,
+    .end_frame      = videotoolbox_mpeg_end_frame,
+    .uninit         = ff_videotoolbox_uninit,
+    .priv_data_size = sizeof(VTContext),
+};
+
+AVHWAccel ff_mpeg2_videotoolbox_hwaccel = {
+    .name           = "mpeg2_videotoolbox",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG2VIDEO,
+    .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
+    .alloc_frame    = ff_videotoolbox_alloc_frame,
+    .start_frame    = videotoolbox_mpeg_start_frame,
+    .decode_slice   = videotoolbox_mpeg_decode_slice,
+    .end_frame      = videotoolbox_mpeg_end_frame,
+    .uninit         = ff_videotoolbox_uninit,
+    .priv_data_size = sizeof(VTContext),
+};
+
+AVHWAccel ff_mpeg4_videotoolbox_hwaccel = {
+    .name           = "mpeg4_videotoolbox",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG4,
+    .pix_fmt        = AV_PIX_FMT_VIDEOTOOLBOX,
+    .alloc_frame    = ff_videotoolbox_alloc_frame,
+    .start_frame    = videotoolbox_mpeg_start_frame,
+    .decode_slice   = videotoolbox_mpeg_decode_slice,
+    .end_frame      = videotoolbox_mpeg_end_frame,
+    .uninit         = ff_videotoolbox_uninit,
+    .priv_data_size = sizeof(VTContext),
+};
+
+AVVideotoolboxContext *av_videotoolbox_alloc_context(void)
+{
+    AVVideotoolboxContext *ret = av_mallocz(sizeof(*ret));
+
+    if (ret) {
+        ret->output_callback = videotoolbox_decoder_callback;
+        ret->cv_pix_fmt_type = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
+    }
+
+    return ret;
+}
+
+int av_videotoolbox_default_init(AVCodecContext *avctx)
+{
+    return av_videotoolbox_default_init2(avctx, NULL);
+}
+
+int av_videotoolbox_default_init2(AVCodecContext *avctx, AVVideotoolboxContext *vtctx)
+{
+    avctx->hwaccel_context = vtctx ?: av_videotoolbox_alloc_context();
+    if (!avctx->hwaccel_context)
+        return AVERROR(ENOMEM);
+    return videotoolbox_default_init(avctx);
+}
+
+void av_videotoolbox_default_free(AVCodecContext *avctx)
+{
+
+    videotoolbox_default_free(avctx);
+    av_freep(&avctx->hwaccel_context);
+}
+#endif /* CONFIG_VIDEOTOOLBOX */
diff --git a/libavcodec/videotoolbox.h b/libavcodec/videotoolbox.h
new file mode 100644
index 00000000..a48638e2
--- /dev/null
+++ b/libavcodec/videotoolbox.h
@@ -0,0 +1,126 @@
+/*
+ * Videotoolbox hardware acceleration
+ *
+ * copyright (c) 2012 Sebastien Zwickert
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VIDEOTOOLBOX_H
+#define AVCODEC_VIDEOTOOLBOX_H
+
+/**
+ * @file
+ * @ingroup lavc_codec_hwaccel_videotoolbox
+ * Public libavcodec Videotoolbox header.
+ */
+
+#include <stdint.h>
+
+#define Picture QuickdrawPicture
+#include <VideoToolbox/VideoToolbox.h>
+#undef Picture
+
+#include "libavcodec/avcodec.h"
+
+/**
+ * This struct holds all the information that needs to be passed
+ * between the caller and libavcodec for initializing Videotoolbox decoding.
+ * Its size is not a part of the public ABI, it must be allocated with
+ * av_videotoolbox_alloc_context() and freed with av_free().
+ */
+typedef struct AVVideotoolboxContext {
+    /**
+     * Videotoolbox decompression session object.
+     * Created and freed the caller.
+     */
+    VTDecompressionSessionRef session;
+
+    /**
+     * The output callback that must be passed to the session.
+     * Set by av_videottoolbox_default_init()
+     */
+    VTDecompressionOutputCallback output_callback;
+
+    /**
+     * CVPixelBuffer Format Type that Videotoolbox will use for decoded frames.
+     * set by the caller.
+     */
+    OSType cv_pix_fmt_type;
+
+    /**
+     * CoreMedia Format Description that Videotoolbox will use to create the decompression session.
+     * Set by the caller.
+     */
+    CMVideoFormatDescriptionRef cm_fmt_desc;
+
+    /**
+     * CoreMedia codec type that Videotoolbox will use to create the decompression session.
+     * Set by the caller.
+     */
+    int cm_codec_type;
+} AVVideotoolboxContext;
+
+/**
+ * Allocate and initialize a Videotoolbox context.
+ *
+ * This function should be called from the get_format() callback when the caller
+ * selects the AV_PIX_FMT_VIDETOOLBOX format. The caller must then create
+ * the decoder object (using the output callback provided by libavcodec) that
+ * will be used for Videotoolbox-accelerated decoding.
+ *
+ * When decoding with Videotoolbox is finished, the caller must destroy the decoder
+ * object and free the Videotoolbox context using av_free().
+ *
+ * @return the newly allocated context or NULL on failure
+ */
+AVVideotoolboxContext *av_videotoolbox_alloc_context(void);
+
+/**
+ * This is a convenience function that creates and sets up the Videotoolbox context using
+ * an internal implementation.
+ *
+ * @param avctx the corresponding codec context
+ *
+ * @return >= 0 on success, a negative AVERROR code on failure
+ */
+int av_videotoolbox_default_init(AVCodecContext *avctx);
+
+/**
+ * This is a convenience function that creates and sets up the Videotoolbox context using
+ * an internal implementation.
+ *
+ * @param avctx the corresponding codec context
+ * @param vtctx the Videotoolbox context to use
+ *
+ * @return >= 0 on success, a negative AVERROR code on failure
+ */
+int av_videotoolbox_default_init2(AVCodecContext *avctx, AVVideotoolboxContext *vtctx);
+
+/**
+ * This function must be called to free the Videotoolbox context initialized with
+ * av_videotoolbox_default_init().
+ *
+ * @param avctx the corresponding codec context
+ */
+void av_videotoolbox_default_free(AVCodecContext *avctx);
+
+/**
+ * @}
+ */
+
+#endif /* AVCODEC_VIDEOTOOLBOX_H */
diff --git a/libavcodec/vima.c b/libavcodec/vima.c
index 74d6a9a1..b4620acf 100644
--- a/libavcodec/vima.c
+++ b/libavcodec/vima.c
@@ -214,17 +214,5 @@ AVCodec ff_adpcm_vima_decoder = {
     .id           = AV_CODEC_ID_ADPCM_VIMA,
     .init         = decode_init,
     .decode       = decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
-
-#if FF_API_VIMA_DECODER
-AVCodec ff_vima_decoder = {
-    .name         = "vima",
-    .long_name    = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio"),
-    .type         = AVMEDIA_TYPE_AUDIO,
-    .id           = AV_CODEC_ID_ADPCM_VIMA,
-    .init         = decode_init,
-    .decode       = decode_frame,
-    .capabilities = CODEC_CAP_DR1,
-};
-#endif
diff --git a/libavcodec/vmdaudio.c b/libavcodec/vmdaudio.c
index 3be0ff85..e8c8a064 100644
--- a/libavcodec/vmdaudio.c
+++ b/libavcodec/vmdaudio.c
@@ -231,5 +231,5 @@ AVCodec ff_vmdaudio_decoder = {
     .priv_data_size = sizeof(VmdAudioContext),
     .init           = vmdaudio_decode_init,
     .decode         = vmdaudio_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/vmdvideo.c b/libavcodec/vmdvideo.c
index a2ba1c95..b97032ff 100644
--- a/libavcodec/vmdvideo.c
+++ b/libavcodec/vmdvideo.c
@@ -471,5 +471,5 @@ AVCodec ff_vmdvideo_decoder = {
     .init           = vmdvideo_decode_init,
     .close          = vmdvideo_decode_end,
     .decode         = vmdvideo_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/vmnc.c b/libavcodec/vmnc.c
index 58bd0e26..49abb776 100644
--- a/libavcodec/vmnc.c
+++ b/libavcodec/vmnc.c
@@ -577,5 +577,5 @@ AVCodec ff_vmnc_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/vorbis_parser.c b/libavcodec/vorbis_parser.c
index 8fa6d995..0b2c97cd 100644
--- a/libavcodec/vorbis_parser.c
+++ b/libavcodec/vorbis_parser.c
@@ -296,27 +296,6 @@ AVVorbisParseContext *av_vorbis_parse_init(const uint8_t *extradata,
     return s;
 }
 
-#if LIBAVCODEC_VERSION_MAJOR < 57
-int avpriv_vorbis_parse_extradata(AVCodecContext *avctx, AVVorbisParseContext *s)
-{
-    return vorbis_parse_init(s, avctx->extradata, avctx->extradata_size);
-}
-void avpriv_vorbis_parse_reset(AVVorbisParseContext *s)
-{
-    av_vorbis_parse_reset(s);
-}
-int avpriv_vorbis_parse_frame(AVVorbisParseContext *s, const uint8_t *buf,
-                              int buf_size)
-{
-    return av_vorbis_parse_frame(s, buf, buf_size);
-}
-int avpriv_vorbis_parse_frame_flags(AVVorbisParseContext *s, const uint8_t *buf,
-                                    int buf_size, int *flags)
-{
-    return av_vorbis_parse_frame_flags(s, buf, buf_size, flags);
-}
-#endif
-
 #if CONFIG_VORBIS_PARSER
 
 typedef struct VorbisParseContext {
diff --git a/libavcodec/vorbis_parser.h b/libavcodec/vorbis_parser.h
index 06e48bd3..81fda3b0 100644
--- a/libavcodec/vorbis_parser.h
+++ b/libavcodec/vorbis_parser.h
@@ -24,8 +24,8 @@
  * Determines the duration for each packet.
  */
 
-#ifndef AVCODEC_VORBIS_PARSE_H
-#define AVCODEC_VORBIS_PARSE_H
+#ifndef AVCODEC_VORBIS_PARSER_H
+#define AVCODEC_VORBIS_PARSER_H
 
 #include <stdint.h>
 
@@ -75,4 +75,4 @@ int av_vorbis_parse_frame(AVVorbisParseContext *s, const uint8_t *buf,
 
 void av_vorbis_parse_reset(AVVorbisParseContext *s);
 
-#endif /* AVCODEC_VORBIS_PARSE_H */
+#endif /* AVCODEC_VORBIS_PARSER_H */
diff --git a/libavcodec/vorbis_parser_internal.h b/libavcodec/vorbis_parser_internal.h
index 49481eea..691a8423 100644
--- a/libavcodec/vorbis_parser_internal.h
+++ b/libavcodec/vorbis_parser_internal.h
@@ -25,8 +25,8 @@
  * Determines the duration for each packet.
  */
 
-#ifndef AVCODEC_VORBIS_PARSER_H
-#define AVCODEC_VORBIS_PARSER_H
+#ifndef AVCODEC_VORBIS_PARSER_INTERNAL_H
+#define AVCODEC_VORBIS_PARSER_INTERNAL_H
 
 #include "avcodec.h"
 #include "vorbis_parser.h"
@@ -43,44 +43,4 @@ struct AVVorbisParseContext {
     int prev_mask;              ///< bitmask used to get the previous mode flag in each packet
 };
 
-#if LIBAVCODEC_VERSION_MAJOR < 57
-/**
- * Initialize the Vorbis parser using headers in the extradata.
- *
- * @param avctx codec context
- * @param s     Vorbis parser context
- */
-int avpriv_vorbis_parse_extradata(AVCodecContext *avctx, AVVorbisParseContext *s);
-
-/**
- * Get the duration for a Vorbis packet.
- *
- * avpriv_vorbis_parse_extradata() must have been successfully called prior to
- * this in order for a correct duration to be returned. If @p flags is @c NULL,
- * special frames are considered invalid.
- *
- * @param s        Vorbis parser context
- * @param buf      buffer containing a Vorbis frame
- * @param buf_size size of the buffer
- * @param flags    flags for special frames
- */
-int avpriv_vorbis_parse_frame_flags(AVVorbisParseContext *s, const uint8_t *buf,
-                                    int buf_size, int *flags);
-
-/**
- * Get the duration for a Vorbis packet.
- *
- * avpriv_vorbis_parse_extradata() must have been successfully called prior to
- * this in order for a correct duration to be returned.
- *
- * @param s        Vorbis parser context
- * @param buf      buffer containing a Vorbis frame
- * @param buf_size size of the buffer
- */
-int avpriv_vorbis_parse_frame(AVVorbisParseContext *s, const uint8_t *buf,
-                              int buf_size);
-
-void avpriv_vorbis_parse_reset(AVVorbisParseContext *s);
-#endif
-
-#endif /* AVCODEC_VORBIS_PARSER_H */
+#endif /* AVCODEC_VORBIS_PARSER_INTERNAL_H */
diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index 2926e8e5..225f1e94 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -573,6 +573,11 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
                 return AVERROR(ENOMEM);
 
             rangebits = get_bits(gb, 4);
+            if (!rangebits && floor_setup->data.t1.partitions) {
+                av_log(vc->avctx, AV_LOG_ERROR,
+                       "A rangebits value of 0 is not compliant with the Vorbis I specification.\n");
+                return AVERROR_INVALIDDATA;
+            }
             rangemax = (1 << rangebits);
             if (rangemax > vc->blocksize[1] / 2) {
                 av_log(vc->avctx, AV_LOG_ERROR,
@@ -789,6 +794,11 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc)
 
         if (get_bits1(gb)) {
             mapping_setup->coupling_steps = get_bits(gb, 8) + 1;
+            if (vc->audio_channels < 2) {
+                av_log(vc->avctx, AV_LOG_ERROR,
+                       "Square polar channel mapping with less than two channels is not compliant with the Vorbis I specification.\n");
+                return AVERROR_INVALIDDATA;
+            }
             mapping_setup->magnitude      = av_mallocz(mapping_setup->coupling_steps *
                                                        sizeof(*mapping_setup->magnitude));
             mapping_setup->angle          = av_mallocz(mapping_setup->coupling_steps *
@@ -998,7 +1008,7 @@ static int vorbis_parse_id_hdr(vorbis_context *vc)
 
     ff_mdct_init(&vc->mdct[0], bl0, 1, -1.0);
     ff_mdct_init(&vc->mdct[1], bl1, 1, -1.0);
-    vc->fdsp = avpriv_float_dsp_alloc(vc->avctx->flags & CODEC_FLAG_BITEXACT);
+    vc->fdsp = avpriv_float_dsp_alloc(vc->avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!vc->fdsp)
         return AVERROR(ENOMEM);
 
@@ -1739,7 +1749,9 @@ static int vorbis_decode_frame(AVCodecContext *avctx, void *data,
     ff_dlog(NULL, "packet length %d \n", buf_size);
 
     if (*buf == 1 && buf_size > 7) {
-        init_get_bits(gb, buf+1, buf_size*8 - 8);
+        if ((ret = init_get_bits8(gb, buf + 1, buf_size - 1)) < 0)
+            return ret;
+
         vorbis_free(vc);
         if ((ret = vorbis_parse_id_hdr(vc))) {
             av_log(avctx, AV_LOG_ERROR, "Id header corrupt.\n");
@@ -1763,7 +1775,9 @@ static int vorbis_decode_frame(AVCodecContext *avctx, void *data,
     }
 
     if (*buf == 5 && buf_size > 7 && vc->channel_residues && !vc->modes) {
-        init_get_bits(gb, buf+1, buf_size*8 - 8);
+        if ((ret = init_get_bits8(gb, buf + 1, buf_size - 1)) < 0)
+            return ret;
+
         if ((ret = vorbis_parse_setup_hdr(vc))) {
             av_log(avctx, AV_LOG_ERROR, "Setup header corrupt.\n");
             vorbis_free(vc);
@@ -1792,7 +1806,8 @@ static int vorbis_decode_frame(AVCodecContext *avctx, void *data,
         }
     }
 
-    init_get_bits(gb, buf, buf_size*8);
+    if ((ret = init_get_bits8(gb, buf, buf_size)) < 0)
+        return ret;
 
     if ((len = vorbis_parse_audio_packet(vc, channel_ptrs)) <= 0)
         return len;
@@ -1846,7 +1861,7 @@ AVCodec ff_vorbis_decoder = {
     .close           = vorbis_decode_close,
     .decode          = vorbis_decode_frame,
     .flush           = vorbis_decode_flush,
-    .capabilities    = CODEC_CAP_DR1,
+    .capabilities    = AV_CODEC_CAP_DR1,
     .channel_layouts = ff_vorbis_channel_layouts,
     .sample_fmts     = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                        AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c
index dcb2a6e5..2974ca2c 100644
--- a/libavcodec/vorbisenc.c
+++ b/libavcodec/vorbisenc.c
@@ -1033,7 +1033,7 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         return 0;
     samples = 1 << (venc->log2_blocksize[0] - 1);
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, 8192)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 8192, 0)) < 0)
         return ret;
 
     init_put_bits(&pb, avpkt->data, avpkt->size);
@@ -1178,7 +1178,7 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx)
         goto error;
 
     avctx->bit_rate = 0;
-    if (avctx->flags & CODEC_FLAG_QSCALE)
+    if (avctx->flags & AV_CODEC_FLAG_QSCALE)
         venc->quality = avctx->global_quality / (float)FF_QP2LAMBDA;
     else
         venc->quality = 8;
@@ -1205,7 +1205,7 @@ AVCodec ff_vorbis_encoder = {
     .init           = vorbis_encode_init,
     .encode2        = vorbis_encode_frame,
     .close          = vorbis_encode_close,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL,
+    .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_EXPERIMENTAL,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index 005f043b..5bbf47b5 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -131,7 +131,7 @@ static const uint8_t hilbert_offset[16][2] = {
 
 typedef struct Vp3DecodeContext {
     AVCodecContext *avctx;
-    int theora, theora_tables;
+    int theora, theora_tables, theora_header;
     int version;
     int width, height;
     int chroma_x_shift, chroma_y_shift;
@@ -209,8 +209,8 @@ typedef struct Vp3DecodeContext {
     int16_t *dct_tokens[3][64];
     int16_t *dct_tokens_base;
 #define TOKEN_EOB(eob_run)              ((eob_run) << 2)
-#define TOKEN_ZERO_RUN(coeff, zero_run) (((coeff) << 9) + ((zero_run) << 2) + 1)
-#define TOKEN_COEFF(coeff)              (((coeff) << 2) + 2)
+#define TOKEN_ZERO_RUN(coeff, zero_run) (((coeff) * 512) + ((zero_run) << 2) + 1)
+#define TOKEN_COEFF(coeff)              (((coeff) * 4) + 2)
 
     /**
      * number of blocks that contain DCT coefficients at
@@ -1095,7 +1095,7 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
         return residual_eob_run;
 
     /* reverse prediction of the C-plane DC coefficients */
-    if (!(s->avctx->flags & CODEC_FLAG_GRAY)) {
+    if (!(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
         reverse_dc_prediction(s, s->fragment_start[1],
                               s->fragment_width[1], s->fragment_height[1]);
         reverse_dc_prediction(s, s->fragment_start[2],
@@ -1518,7 +1518,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
 
         if (!s->flipped_image)
             stride = -stride;
-        if (CONFIG_GRAY && plane && (s->avctx->flags & CODEC_FLAG_GRAY))
+        if (CONFIG_GRAY && plane && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
             continue;
 
         /* for each superblock row in the slice (both of them)... */
@@ -1738,7 +1738,7 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     if (avctx->codec_id != AV_CODEC_ID_THEORA)
         avctx->pix_fmt = AV_PIX_FMT_YUV420P;
     avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
-    ff_hpeldsp_init(&s->hdsp, avctx->flags | CODEC_FLAG_BITEXACT);
+    ff_hpeldsp_init(&s->hdsp, avctx->flags | AV_CODEC_FLAG_BITEXACT);
     ff_videodsp_init(&s->vdsp, 8);
     ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
 
@@ -1930,6 +1930,7 @@ static int ref_frames(Vp3DecodeContext *dst, Vp3DecodeContext *src)
     return 0;
 }
 
+#if HAVE_THREADS
 static int vp3_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
 {
     Vp3DecodeContext *s = dst->priv_data, *s1 = src->priv_data;
@@ -1947,6 +1948,8 @@ static int vp3_update_thread_context(AVCodecContext *dst, const AVCodecContext *
     }
 
     if (s != s1) {
+        if (!s->current_frame.f)
+            return AVERROR(ENOMEM);
         // init tables if the first frame hasn't been decoded
         if (!s->current_frame.f->data[0]) {
             int y_fragment_count, c_fragment_count;
@@ -1987,6 +1990,7 @@ static int vp3_update_thread_context(AVCodecContext *dst, const AVCodecContext *
 
     return update_frames(dst);
 }
+#endif
 
 static int vp3_decode_frame(AVCodecContext *avctx,
                             void *data, int *got_frame,
@@ -2014,17 +2018,20 @@ static int vp3_decode_frame(AVCodecContext *avctx,
             vp3_decode_end(avctx);
             ret = theora_decode_header(avctx, &gb);
 
+            if (ret >= 0)
+                ret = vp3_decode_init(avctx);
             if (ret < 0) {
                 vp3_decode_end(avctx);
-            } else
-                ret = vp3_decode_init(avctx);
+            }
             return ret;
         } else if (type == 2) {
+            vp3_decode_end(avctx);
             ret = theora_decode_tables(avctx, &gb);
+            if (ret >= 0)
+                ret = vp3_decode_init(avctx);
             if (ret < 0) {
                 vp3_decode_end(avctx);
-            } else
-                ret = vp3_decode_init(avctx);
+            }
             return ret;
         }
 
@@ -2219,6 +2226,7 @@ static int read_huffman_tree(AVCodecContext *avctx, GetBitContext *gb)
     return 0;
 }
 
+#if HAVE_THREADS
 static int vp3_init_thread_copy(AVCodecContext *avctx)
 {
     Vp3DecodeContext *s = avctx->priv_data;
@@ -2235,6 +2243,7 @@ static int vp3_init_thread_copy(AVCodecContext *avctx)
 
     return init_frames(s);
 }
+#endif
 
 #if CONFIG_THEORA_DECODER
 static const enum AVPixelFormat theora_pix_fmts[4] = {
@@ -2249,6 +2258,7 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
     int ret;
     AVRational fps, aspect;
 
+    s->theora_header = 0;
     s->theora = get_bits_long(gb, 24);
     av_log(avctx, AV_LOG_DEBUG, "Theora bitstream version %X\n", s->theora);
 
@@ -2319,12 +2329,13 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
             return AVERROR_INVALIDDATA;
         }
         skip_bits(gb, 3); /* reserved */
-    }
+    } else
+        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
 
     ret = ff_set_dimensions(avctx, s->width, s->height);
     if (ret < 0)
         return ret;
-    if (!(avctx->flags2 & CODEC_FLAG2_IGNORE_CROP)) {
+    if (!(avctx->flags2 & AV_CODEC_FLAG2_IGNORE_CROP)) {
         avctx->width  = visible_width;
         avctx->height = visible_height;
         // translate offsets from theora axis ([0,0] lower left)
@@ -2332,7 +2343,7 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
         s->offset_x = offset_x;
         s->offset_y = s->height - visible_height - offset_y;
 
-        if ((s->offset_x & 0x1F) && !(avctx->flags & CODEC_FLAG_UNALIGNED)) {
+        if ((s->offset_x & 0x1F) && !(avctx->flags & AV_CODEC_FLAG_UNALIGNED)) {
             s->offset_x &= ~0x1F;
             if (!s->offset_x_warned) {
                 s->offset_x_warned = 1;
@@ -2353,6 +2364,7 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
         avctx->color_trc  = AVCOL_TRC_BT709;
     }
 
+    s->theora_header = 1;
     return 0;
 }
 
@@ -2361,6 +2373,9 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb)
     Vp3DecodeContext *s = avctx->priv_data;
     int i, n, matrices, inter, plane;
 
+    if (!s->theora_header)
+        return AVERROR_INVALIDDATA;
+
     if (s->theora >= 0x030200) {
         n = get_bits(gb, 3);
         /* loop filter limit values table */
@@ -2473,6 +2488,7 @@ static av_cold int theora_decode_init(AVCodecContext *avctx)
     const uint8_t *header_start[3];
     int header_len[3];
     int i;
+    int ret;
 
     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
 
@@ -2492,7 +2508,9 @@ static av_cold int theora_decode_init(AVCodecContext *avctx)
     for (i = 0; i < 3; i++) {
         if (header_len[i] <= 0)
             continue;
-        init_get_bits8(&gb, header_start[i], header_len[i]);
+        ret = init_get_bits8(&gb, header_start[i], header_len[i]);
+        if (ret < 0)
+            return ret;
 
         ptype = get_bits(&gb, 8);
 
@@ -2542,8 +2560,8 @@ AVCodec ff_theora_decoder = {
     .init                  = theora_decode_init,
     .close                 = vp3_decode_end,
     .decode                = vp3_decode_frame,
-    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND |
-                             CODEC_CAP_FRAME_THREADS,
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DRAW_HORIZ_BAND |
+                             AV_CODEC_CAP_FRAME_THREADS,
     .flush                 = vp3_decode_flush,
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp3_init_thread_copy),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp3_update_thread_context)
@@ -2559,8 +2577,8 @@ AVCodec ff_vp3_decoder = {
     .init                  = vp3_decode_init,
     .close                 = vp3_decode_end,
     .decode                = vp3_decode_frame,
-    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND |
-                             CODEC_CAP_FRAME_THREADS,
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DRAW_HORIZ_BAND |
+                             AV_CODEC_CAP_FRAME_THREADS,
     .flush                 = vp3_decode_flush,
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp3_init_thread_copy),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp3_update_thread_context),
diff --git a/libavcodec/vp5.c b/libavcodec/vp5.c
index 1923d633..5bcf9b62 100644
--- a/libavcodec/vp5.c
+++ b/libavcodec/vp5.c
@@ -290,5 +290,5 @@ AVCodec ff_vp5_decoder = {
     .init           = vp5_decode_init,
     .close          = ff_vp56_free,
     .decode         = ff_vp56_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index e97ef76d..a2bb4578 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -679,7 +679,7 @@ AVCodec ff_vp6_decoder = {
     .init           = vp6_decode_init,
     .close          = vp6_decode_free,
     .decode         = ff_vp56_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 
 /* flash version, not flipped upside-down */
@@ -692,7 +692,7 @@ AVCodec ff_vp6f_decoder = {
     .init           = vp6_decode_init,
     .close          = vp6_decode_free,
     .decode         = ff_vp56_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
 
 /* flash version, not flipped upside-down, with alpha channel */
@@ -705,5 +705,5 @@ AVCodec ff_vp6a_decoder = {
     .init           = vp6_decode_init,
     .close          = vp6_decode_free,
     .decode         = ff_vp56_decode_frame,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS,
 };
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 25fe70ae..64037fc0 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -164,7 +164,7 @@ int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
     s->mb_height = (s->avctx->coded_height + 15) / 16;
 
     s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
-                   FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
+                   avctx->thread_count > 1;
     if (!s->mb_layout) { // Frame threading and one thread
         s->macroblocks_base       = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
                                                sizeof(*s->macroblocks));
@@ -493,6 +493,10 @@ static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
     int width  = s->avctx->width;
     int height = s->avctx->height;
 
+    if (buf_size < 4) {
+        return AVERROR_INVALIDDATA;
+    }
+
     s->profile = (buf[0] >> 1) & 7;
     if (s->profile > 1) {
         avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
@@ -2765,6 +2769,7 @@ av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
 }
 
 #if CONFIG_VP8_DECODER
+#if HAVE_THREADS
 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
 {
     VP8Context *s = avctx->priv_data;
@@ -2815,6 +2820,7 @@ static int vp8_decode_update_thread_context(AVCodecContext *dst,
 
     return 0;
 }
+#endif /* HAVE_THREADS */
 #endif /* CONFIG_VP8_DECODER */
 
 #if CONFIG_VP7_DECODER
@@ -2827,7 +2833,7 @@ AVCodec ff_vp7_decoder = {
     .init                  = vp7_decode_init,
     .close                 = ff_vp8_decode_free,
     .decode                = vp7_decode_frame,
-    .capabilities          = CODEC_CAP_DR1,
+    .capabilities          = AV_CODEC_CAP_DR1,
     .flush                 = vp8_decode_flush,
 };
 #endif /* CONFIG_VP7_DECODER */
@@ -2842,7 +2848,8 @@ AVCodec ff_vp8_decoder = {
     .init                  = ff_vp8_decode_init,
     .close                 = ff_vp8_decode_free,
     .decode                = ff_vp8_decode_frame,
-    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
+                             AV_CODEC_CAP_SLICE_THREADS,
     .flush                 = vp8_decode_flush,
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index 2135bd9d..cfd82aaa 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -27,20 +27,13 @@
 #define AVCODEC_VP8_H
 
 #include "libavutil/buffer.h"
+#include "libavutil/thread.h"
 
 #include "h264pred.h"
 #include "thread.h"
 #include "vp56.h"
 #include "vp8dsp.h"
 
-#if HAVE_PTHREADS
-#   include <pthread.h>
-#elif HAVE_OS2THREADS
-#   include "compat/os2threads.h"
-#elif HAVE_W32THREADS
-#   include "compat/w32pthreads.h"
-#endif
-
 #define VP8_MAX_QUANT 127
 
 enum dct_token {
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c
index e1a91bb8..07bea69c 100644
--- a/libavcodec/vp8dsp.c
+++ b/libavcodec/vp8dsp.c
@@ -735,5 +735,7 @@ av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
         ff_vp8dsp_init_arm(dsp);
     if (ARCH_X86)
         ff_vp8dsp_init_x86(dsp);
+    if (ARCH_MIPS)
+        ff_vp8dsp_init_mips(dsp);
 }
 #endif /* CONFIG_VP8_DECODER */
diff --git a/libavcodec/vp8dsp.h b/libavcodec/vp8dsp.h
index 5fdd3af7..0401c922 100644
--- a/libavcodec/vp8dsp.h
+++ b/libavcodec/vp8dsp.h
@@ -98,6 +98,7 @@ void ff_vp78dsp_init_x86(VP8DSPContext *c);
 void ff_vp8dsp_init(VP8DSPContext *c);
 void ff_vp8dsp_init_arm(VP8DSPContext *c);
 void ff_vp8dsp_init_x86(VP8DSPContext *c);
+void ff_vp8dsp_init_mips(VP8DSPContext *c);
 
 #define IS_VP7 1
 #define IS_VP8 0
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index a7954c3f..5d8ad12b 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -24,6 +24,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "internal.h"
+#include "profiles.h"
 #include "thread.h"
 #include "videodsp.h"
 #include "vp56.h"
@@ -35,49 +36,6 @@
 
 #define VP9_SYNCCODE 0x498342
 
-enum CompPredMode {
-    PRED_SINGLEREF,
-    PRED_COMPREF,
-    PRED_SWITCHABLE,
-};
-
-enum BlockLevel {
-    BL_64X64,
-    BL_32X32,
-    BL_16X16,
-    BL_8X8,
-};
-
-enum BlockSize {
-    BS_64x64,
-    BS_64x32,
-    BS_32x64,
-    BS_32x32,
-    BS_32x16,
-    BS_16x32,
-    BS_16x16,
-    BS_16x8,
-    BS_8x16,
-    BS_8x8,
-    BS_8x4,
-    BS_4x8,
-    BS_4x4,
-    N_BS_SIZES,
-};
-
-struct VP9mvrefPair {
-    VP56mv mv[2];
-    int8_t ref[2];
-};
-
-typedef struct VP9Frame {
-    ThreadFrame tf;
-    AVBufferRef *extradata;
-    uint8_t *segmentation_map;
-    struct VP9mvrefPair *mv;
-    int uses_2pass;
-} VP9Frame;
-
 struct VP9Filter {
     uint8_t level[8 * 8];
     uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
@@ -95,6 +53,8 @@ typedef struct VP9Block {
 } VP9Block;
 
 typedef struct VP9Context {
+    VP9SharedContext s;
+
     VP9DSPContext dsp;
     VideoDSPContext vdsp;
     GetBitContext gb;
@@ -107,71 +67,23 @@ typedef struct VP9Context {
     uint8_t *dst[3];
     ptrdiff_t y_stride, uv_stride;
 
-    // bitstream header
-    uint8_t keyframe, last_keyframe;
-    uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
-    uint8_t invisible;
-    uint8_t use_last_frame_mvs;
-    uint8_t errorres;
     uint8_t ss_h, ss_v;
-    uint8_t intraonly;
-    uint8_t resetctx;
-    uint8_t refreshrefmask;
-    uint8_t highprecisionmvs;
-    enum FilterMode filtermode;
-    uint8_t allowcompinter;
-    uint8_t fixcompref;
-    uint8_t refreshctx;
-    uint8_t parallelmode;
-    uint8_t framectxid;
-    uint8_t refidx[3];
-    uint8_t signbias[3];
-    uint8_t varcompref[2];
-    ThreadFrame refs[8], next_refs[8];
-#define CUR_FRAME 0
-#define REF_FRAME_MVPAIR 1
-#define REF_FRAME_SEGMAP 2
-    VP9Frame frames[3];
+    uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
+    uint8_t last_keyframe;
+    // sb_cols/rows, rows/cols and last_fmt are used for allocating all internal
+    // arrays, and are thus per-thread. w/h and gf_fmt are synced between threads
+    // and are therefore per-stream. pix_fmt represents the value in the header
+    // of the currently processed frame.
+    int w, h;
+    enum AVPixelFormat pix_fmt, last_fmt, gf_fmt;
+    unsigned sb_cols, sb_rows, rows, cols;
+    ThreadFrame next_refs[8];
 
     struct {
-        uint8_t level;
-        int8_t sharpness;
         uint8_t lim_lut[64];
         uint8_t mblim_lut[64];
-    } filter;
-    struct {
-        uint8_t enabled;
-        int8_t mode[2];
-        int8_t ref[4];
-    } lf_delta;
-    uint8_t yac_qi;
-    int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
-    uint8_t lossless;
-#define MAX_SEGMENT 8
-    struct {
-        uint8_t enabled;
-        uint8_t temporal;
-        uint8_t absolute_vals;
-        uint8_t update_map;
-        uint8_t ignore_refmap;
-        struct {
-            uint8_t q_enabled;
-            uint8_t lf_enabled;
-            uint8_t ref_enabled;
-            uint8_t skip_enabled;
-            uint8_t ref_val;
-            int16_t q_val;
-            int8_t lf_val;
-            int16_t qmul[2][2];
-            uint8_t lflvl[4][2];
-        } feat[MAX_SEGMENT];
-    } segmentation;
-    struct {
-        unsigned log2_tile_cols, log2_tile_rows;
-        unsigned tile_cols, tile_rows;
-        unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
-    } tiling;
-    unsigned sb_cols, sb_rows, rows, cols;
+    } filter_lut;
+    unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
     struct {
         prob_context p;
         uint8_t coef[4][2][2][6][6][3];
@@ -179,8 +91,6 @@ typedef struct VP9Context {
     struct {
         prob_context p;
         uint8_t coef[4][2][2][6][6][11];
-        uint8_t seg[7];
-        uint8_t segpred[3];
     } prob;
     struct {
         unsigned y_mode[4][10];
@@ -210,8 +120,6 @@ typedef struct VP9Context {
         unsigned coef[4][2][2][6][6][3];
         unsigned eob[4][2][2][6][6][2];
     } counts;
-    enum TxfmMode txfmmode;
-    enum CompPredMode comppredmode;
 
     // contextual (left/above) cache
     DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
@@ -266,6 +174,15 @@ static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
     }
 };
 
+static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
+{
+    ff_thread_release_buffer(ctx, &f->tf);
+    av_buffer_unref(&f->extradata);
+    av_buffer_unref(&f->hwaccel_priv_buf);
+    f->segmentation_map = NULL;
+    f->hwaccel_picture_private = NULL;
+}
+
 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
 {
     VP9Context *s = ctx->priv_data;
@@ -275,20 +192,28 @@ static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
         return ret;
     sz = 64 * s->sb_cols * s->sb_rows;
     if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
-        ff_thread_release_buffer(ctx, &f->tf);
-        return AVERROR(ENOMEM);
+        goto fail;
     }
 
     f->segmentation_map = f->extradata->data;
     f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
 
+    if (ctx->hwaccel) {
+        const AVHWAccel *hwaccel = ctx->hwaccel;
+        av_assert0(!f->hwaccel_picture_private);
+        if (hwaccel->frame_priv_data_size) {
+            f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+            if (!f->hwaccel_priv_buf)
+                goto fail;
+            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
+        }
+    }
+
     return 0;
-}
 
-static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
-{
-    ff_thread_release_buffer(ctx, &f->tf);
-    av_buffer_unref(&f->extradata);
+fail:
+    vp9_unref_frame(ctx, f);
+    return AVERROR(ENOMEM);
 }
 
 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
@@ -298,31 +223,73 @@ static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
     if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
         return res;
     } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
-        vp9_unref_frame(ctx, dst);
-        return AVERROR(ENOMEM);
+        goto fail;
     }
 
     dst->segmentation_map = src->segmentation_map;
     dst->mv = src->mv;
     dst->uses_2pass = src->uses_2pass;
 
+    if (src->hwaccel_picture_private) {
+        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
+        if (!dst->hwaccel_priv_buf)
+            goto fail;
+        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
+    }
+
     return 0;
+
+fail:
+    vp9_unref_frame(ctx, dst);
+    return AVERROR(ENOMEM);
 }
 
-static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
+static int update_size(AVCodecContext *ctx, int w, int h)
 {
+#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL + CONFIG_VP9_VAAPI_HWACCEL)
+    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
     VP9Context *s = ctx->priv_data;
     uint8_t *p;
-    int bytesperpixel = s->bytesperpixel;
+    int bytesperpixel = s->bytesperpixel, res, cols, rows;
 
     av_assert0(w > 0 && h > 0);
 
-    if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
+    if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
+        if ((res = ff_set_dimensions(ctx, w, h)) < 0)
+            return res;
+
+        if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
+#if CONFIG_VP9_DXVA2_HWACCEL
+            *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
+#endif
+#if CONFIG_VP9_D3D11VA_HWACCEL
+            *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
+#endif
+#if CONFIG_VP9_VAAPI_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VAAPI;
+#endif
+        }
+
+        *fmtp++ = s->pix_fmt;
+        *fmtp = AV_PIX_FMT_NONE;
+
+        res = ff_thread_get_format(ctx, pix_fmts);
+        if (res < 0)
+            return res;
+
+        ctx->pix_fmt = res;
+        s->gf_fmt  = s->pix_fmt;
+        s->w = w;
+        s->h = h;
+    }
+
+    cols = (w + 7) >> 3;
+    rows = (h + 7) >> 3;
+
+    if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
         return 0;
 
-    ctx->width   = w;
-    ctx->height  = h;
-    ctx->pix_fmt = fmt;
+    s->last_fmt  = s->pix_fmt;
     s->sb_cols   = (w + 63) >> 6;
     s->sb_rows   = (h + 63) >> 6;
     s->cols      = (w + 7) >> 3;
@@ -360,7 +327,7 @@ static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt
     av_freep(&s->block_base);
 
     if (s->bpp != s->last_bpp) {
-        ff_vp9dsp_init(&s->dsp, s->bpp);
+        ff_vp9dsp_init(&s->dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT);
         ff_videodsp_init(&s->vdsp, s->bpp);
         s->last_bpp = s->bpp;
     }
@@ -373,14 +340,14 @@ static int update_block_buffers(AVCodecContext *ctx)
     VP9Context *s = ctx->priv_data;
     int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
 
-    if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
+    if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
         return 0;
 
     av_free(s->b_base);
     av_free(s->block_base);
     chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
     chroma_eobs   = 16 * 16 >> (s->ss_h + s->ss_v);
-    if (s->frames[CUR_FRAME].uses_2pass) {
+    if (s->s.frames[CUR_FRAME].uses_2pass) {
         int sbs = s->sb_cols * s->sb_rows;
 
         s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
@@ -405,7 +372,7 @@ static int update_block_buffers(AVCodecContext *ctx)
         s->uveob_base[0] = s->eob_base + 16 * 16;
         s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
     }
-    s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
+    s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
 
     return 0;
 }
@@ -481,14 +448,13 @@ static int update_prob(VP56RangeCoder *c, int p)
                     255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
 }
 
-static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
+static int read_colorspace_details(AVCodecContext *ctx)
 {
     static const enum AVColorSpace colorspaces[8] = {
         AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
         AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
     };
     VP9Context *s = ctx->priv_data;
-    enum AVPixelFormat res;
     int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
 
     s->bpp_index = bits;
@@ -499,10 +465,14 @@ static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
         static const enum AVPixelFormat pix_fmt_rgb[3] = {
             AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
         };
+        s->ss_h = s->ss_v = 0;
+        ctx->color_range = AVCOL_RANGE_JPEG;
+        s->pix_fmt = pix_fmt_rgb[bits];
         if (ctx->profile & 1) {
-            s->ss_h = s->ss_v = 1;
-            res = pix_fmt_rgb[bits];
-            ctx->color_range = AVCOL_RANGE_JPEG;
+            if (get_bits1(&s->gb)) {
+                av_log(ctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
+                return AVERROR_INVALIDDATA;
+            }
         } else {
             av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
                    ctx->profile);
@@ -521,7 +491,8 @@ static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
         if (ctx->profile & 1) {
             s->ss_h = get_bits1(&s->gb);
             s->ss_v = get_bits1(&s->gb);
-            if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
+            s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
+            if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
                 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
                        ctx->profile);
                 return AVERROR_INVALIDDATA;
@@ -532,11 +503,11 @@ static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
             }
         } else {
             s->ss_h = s->ss_v = 1;
-            res = pix_fmt_for_ss[bits][1][1];
+            s->pix_fmt = pix_fmt_for_ss[bits][1][1];
         }
     }
 
-    return res;
+    return 0;
 }
 
 static int decode_frame_header(AVCodecContext *ctx,
@@ -544,7 +515,6 @@ static int decode_frame_header(AVCodecContext *ctx,
 {
     VP9Context *s = ctx->priv_data;
     int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
-    enum AVPixelFormat fmt = ctx->pix_fmt;
     int last_invisible;
     const uint8_t *data2;
 
@@ -564,77 +534,78 @@ static int decode_frame_header(AVCodecContext *ctx,
         av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
         return AVERROR_INVALIDDATA;
     }
+    s->s.h.profile = ctx->profile;
     if (get_bits1(&s->gb)) {
         *ref = get_bits(&s->gb, 3);
         return 0;
     }
-    s->last_keyframe  = s->keyframe;
-    s->keyframe       = !get_bits1(&s->gb);
-    last_invisible    = s->invisible;
-    s->invisible      = !get_bits1(&s->gb);
-    s->errorres       = get_bits1(&s->gb);
-    s->use_last_frame_mvs = !s->errorres && !last_invisible;
-    if (s->keyframe) {
+    s->last_keyframe  = s->s.h.keyframe;
+    s->s.h.keyframe     = !get_bits1(&s->gb);
+    last_invisible    = s->s.h.invisible;
+    s->s.h.invisible    = !get_bits1(&s->gb);
+    s->s.h.errorres     = get_bits1(&s->gb);
+    s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
+    if (s->s.h.keyframe) {
         if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
             av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
             return AVERROR_INVALIDDATA;
         }
-        if ((fmt = read_colorspace_details(ctx)) < 0)
-            return fmt;
+        if ((res = read_colorspace_details(ctx)) < 0)
+            return res;
         // for profile 1, here follows the subsampling bits
-        s->refreshrefmask = 0xff;
+        s->s.h.refreshrefmask = 0xff;
         w = get_bits(&s->gb, 16) + 1;
         h = get_bits(&s->gb, 16) + 1;
         if (get_bits1(&s->gb)) // display size
             skip_bits(&s->gb, 32);
     } else {
-        s->intraonly  = s->invisible ? get_bits1(&s->gb) : 0;
-        s->resetctx   = s->errorres ? 0 : get_bits(&s->gb, 2);
-        if (s->intraonly) {
+        s->s.h.intraonly  = s->s.h.invisible ? get_bits1(&s->gb) : 0;
+        s->s.h.resetctx   = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
+        if (s->s.h.intraonly) {
             if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
                 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
                 return AVERROR_INVALIDDATA;
             }
-            if (ctx->profile == 1) {
-                if ((fmt = read_colorspace_details(ctx)) < 0)
-                    return fmt;
+            if (ctx->profile >= 1) {
+                if ((res = read_colorspace_details(ctx)) < 0)
+                    return res;
             } else {
                 s->ss_h = s->ss_v = 1;
                 s->bpp = 8;
                 s->bpp_index = 0;
                 s->bytesperpixel = 1;
-                fmt = AV_PIX_FMT_YUV420P;
+                s->pix_fmt = AV_PIX_FMT_YUV420P;
                 ctx->colorspace = AVCOL_SPC_BT470BG;
                 ctx->color_range = AVCOL_RANGE_JPEG;
             }
-            s->refreshrefmask = get_bits(&s->gb, 8);
+            s->s.h.refreshrefmask = get_bits(&s->gb, 8);
             w = get_bits(&s->gb, 16) + 1;
             h = get_bits(&s->gb, 16) + 1;
             if (get_bits1(&s->gb)) // display size
                 skip_bits(&s->gb, 32);
         } else {
-            s->refreshrefmask = get_bits(&s->gb, 8);
-            s->refidx[0]      = get_bits(&s->gb, 3);
-            s->signbias[0]    = get_bits1(&s->gb) && !s->errorres;
-            s->refidx[1]      = get_bits(&s->gb, 3);
-            s->signbias[1]    = get_bits1(&s->gb) && !s->errorres;
-            s->refidx[2]      = get_bits(&s->gb, 3);
-            s->signbias[2]    = get_bits1(&s->gb) && !s->errorres;
-            if (!s->refs[s->refidx[0]].f->data[0] ||
-                !s->refs[s->refidx[1]].f->data[0] ||
-                !s->refs[s->refidx[2]].f->data[0]) {
+            s->s.h.refreshrefmask = get_bits(&s->gb, 8);
+            s->s.h.refidx[0]      = get_bits(&s->gb, 3);
+            s->s.h.signbias[0]    = get_bits1(&s->gb) && !s->s.h.errorres;
+            s->s.h.refidx[1]      = get_bits(&s->gb, 3);
+            s->s.h.signbias[1]    = get_bits1(&s->gb) && !s->s.h.errorres;
+            s->s.h.refidx[2]      = get_bits(&s->gb, 3);
+            s->s.h.signbias[2]    = get_bits1(&s->gb) && !s->s.h.errorres;
+            if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
+                !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
+                !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
                 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
                 return AVERROR_INVALIDDATA;
             }
             if (get_bits1(&s->gb)) {
-                w = s->refs[s->refidx[0]].f->width;
-                h = s->refs[s->refidx[0]].f->height;
+                w = s->s.refs[s->s.h.refidx[0]].f->width;
+                h = s->s.refs[s->s.h.refidx[0]].f->height;
             } else if (get_bits1(&s->gb)) {
-                w = s->refs[s->refidx[1]].f->width;
-                h = s->refs[s->refidx[1]].f->height;
+                w = s->s.refs[s->s.h.refidx[1]].f->width;
+                h = s->s.refs[s->s.h.refidx[1]].f->height;
             } else if (get_bits1(&s->gb)) {
-                w = s->refs[s->refidx[2]].f->width;
-                h = s->refs[s->refidx[2]].f->height;
+                w = s->s.refs[s->s.h.refidx[2]].f->width;
+                h = s->s.refs[s->s.h.refidx[2]].f->height;
             } else {
                 w = get_bits(&s->gb, 16) + 1;
                 h = get_bits(&s->gb, 16) + 1;
@@ -642,218 +613,210 @@ static int decode_frame_header(AVCodecContext *ctx,
             // Note that in this code, "CUR_FRAME" is actually before we
             // have formally allocated a frame, and thus actually represents
             // the _last_ frame
-            s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
-                                     s->frames[CUR_FRAME].tf.f->height == h;
+            s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
+                                       s->s.frames[CUR_FRAME].tf.f->height == h;
             if (get_bits1(&s->gb)) // display size
                 skip_bits(&s->gb, 32);
-            s->highprecisionmvs = get_bits1(&s->gb);
-            s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
-                                                get_bits(&s->gb, 2);
-            s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
-                                 s->signbias[0] != s->signbias[2]);
-            if (s->allowcompinter) {
-                if (s->signbias[0] == s->signbias[1]) {
-                    s->fixcompref    = 2;
-                    s->varcompref[0] = 0;
-                    s->varcompref[1] = 1;
-                } else if (s->signbias[0] == s->signbias[2]) {
-                    s->fixcompref    = 1;
-                    s->varcompref[0] = 0;
-                    s->varcompref[1] = 2;
-                } else {
-                    s->fixcompref    = 0;
-                    s->varcompref[0] = 1;
-                    s->varcompref[1] = 2;
-                }
-            }
-
-            for (i = 0; i < 3; i++) {
-                AVFrame *ref = s->refs[s->refidx[i]].f;
-                int refw = ref->width, refh = ref->height;
-
-                if (ref->format != fmt) {
-                    av_log(ctx, AV_LOG_ERROR,
-                           "Ref pixfmt (%s) did not match current frame (%s)",
-                           av_get_pix_fmt_name(ref->format),
-                           av_get_pix_fmt_name(fmt));
-                    return AVERROR_INVALIDDATA;
-                } else if (refw == w && refh == h) {
-                    s->mvscale[i][0] = s->mvscale[i][1] = 0;
+            s->s.h.highprecisionmvs = get_bits1(&s->gb);
+            s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
+                                                  get_bits(&s->gb, 2);
+            s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
+                                  s->s.h.signbias[0] != s->s.h.signbias[2];
+            if (s->s.h.allowcompinter) {
+                if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
+                    s->s.h.fixcompref    = 2;
+                    s->s.h.varcompref[0] = 0;
+                    s->s.h.varcompref[1] = 1;
+                } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
+                    s->s.h.fixcompref    = 1;
+                    s->s.h.varcompref[0] = 0;
+                    s->s.h.varcompref[1] = 2;
                 } else {
-                    if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
-                        av_log(ctx, AV_LOG_ERROR,
-                               "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
-                               refw, refh, w, h);
-                        return AVERROR_INVALIDDATA;
-                    }
-                    s->mvscale[i][0] = (refw << 14) / w;
-                    s->mvscale[i][1] = (refh << 14) / h;
-                    s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
-                    s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
+                    s->s.h.fixcompref    = 0;
+                    s->s.h.varcompref[0] = 1;
+                    s->s.h.varcompref[1] = 2;
                 }
             }
         }
     }
-    s->refreshctx   = s->errorres ? 0 : get_bits1(&s->gb);
-    s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
-    s->framectxid   = c = get_bits(&s->gb, 2);
+    s->s.h.refreshctx   = s->s.h.errorres ? 0 : get_bits1(&s->gb);
+    s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
+    s->s.h.framectxid   = c = get_bits(&s->gb, 2);
 
     /* loopfilter header data */
-    if (s->keyframe || s->errorres || s->intraonly) {
+    if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
         // reset loopfilter defaults
-        s->lf_delta.ref[0] = 1;
-        s->lf_delta.ref[1] = 0;
-        s->lf_delta.ref[2] = -1;
-        s->lf_delta.ref[3] = -1;
-        s->lf_delta.mode[0] = 0;
-        s->lf_delta.mode[1] = 0;
+        s->s.h.lf_delta.ref[0] = 1;
+        s->s.h.lf_delta.ref[1] = 0;
+        s->s.h.lf_delta.ref[2] = -1;
+        s->s.h.lf_delta.ref[3] = -1;
+        s->s.h.lf_delta.mode[0] = 0;
+        s->s.h.lf_delta.mode[1] = 0;
+        memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
     }
-    s->filter.level = get_bits(&s->gb, 6);
+    s->s.h.filter.level = get_bits(&s->gb, 6);
     sharp = get_bits(&s->gb, 3);
     // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
     // the old cache values since they are still valid
-    if (s->filter.sharpness != sharp)
-        memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
-    s->filter.sharpness = sharp;
-    if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
-        if (get_bits1(&s->gb)) {
+    if (s->s.h.filter.sharpness != sharp)
+        memset(s->filter_lut.lim_lut, 0, sizeof(s->filter_lut.lim_lut));
+    s->s.h.filter.sharpness = sharp;
+    if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
+        if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
             for (i = 0; i < 4; i++)
                 if (get_bits1(&s->gb))
-                    s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
+                    s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
             for (i = 0; i < 2; i++)
                 if (get_bits1(&s->gb))
-                    s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
+                    s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
         }
     }
 
     /* quantization header data */
-    s->yac_qi      = get_bits(&s->gb, 8);
-    s->ydc_qdelta  = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
-    s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
-    s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
-    s->lossless    = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
-                     s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
+    s->s.h.yac_qi      = get_bits(&s->gb, 8);
+    s->s.h.ydc_qdelta  = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
+    s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
+    s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
+    s->s.h.lossless    = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
+                       s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
+    if (s->s.h.lossless)
+        ctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
 
     /* segmentation header info */
-    s->segmentation.ignore_refmap = 0;
-    if ((s->segmentation.enabled = get_bits1(&s->gb))) {
-        if ((s->segmentation.update_map = get_bits1(&s->gb))) {
+    if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
+        if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
             for (i = 0; i < 7; i++)
-                s->prob.seg[i] = get_bits1(&s->gb) ?
+                s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
                                  get_bits(&s->gb, 8) : 255;
-            if ((s->segmentation.temporal = get_bits1(&s->gb))) {
+            if ((s->s.h.segmentation.temporal = get_bits1(&s->gb))) {
                 for (i = 0; i < 3; i++)
-                    s->prob.segpred[i] = get_bits1(&s->gb) ?
+                    s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
                                          get_bits(&s->gb, 8) : 255;
             }
         }
-        if ((!s->segmentation.update_map || s->segmentation.temporal) &&
-            (w != s->frames[CUR_FRAME].tf.f->width ||
-             h != s->frames[CUR_FRAME].tf.f->height)) {
-            av_log(ctx, AV_LOG_WARNING,
-                   "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
-                   s->segmentation.temporal, s->segmentation.update_map);
-                s->segmentation.ignore_refmap = 1;
-            //return AVERROR_INVALIDDATA;
-        }
 
         if (get_bits1(&s->gb)) {
-            s->segmentation.absolute_vals = get_bits1(&s->gb);
+            s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
             for (i = 0; i < 8; i++) {
-                if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
-                    s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
-                if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
-                    s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
-                if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
-                    s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
-                s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
+                if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
+                    s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
+                if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
+                    s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
+                if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
+                    s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
+                s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
             }
         }
-    } else {
-        s->segmentation.feat[0].q_enabled    = 0;
-        s->segmentation.feat[0].lf_enabled   = 0;
-        s->segmentation.feat[0].skip_enabled = 0;
-        s->segmentation.feat[0].ref_enabled  = 0;
     }
 
     // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
-    for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
+    for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
         int qyac, qydc, quvac, quvdc, lflvl, sh;
 
-        if (s->segmentation.feat[i].q_enabled) {
-            if (s->segmentation.absolute_vals)
-                qyac = s->segmentation.feat[i].q_val;
+        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
+            if (s->s.h.segmentation.absolute_vals)
+                qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
             else
-                qyac = s->yac_qi + s->segmentation.feat[i].q_val;
+                qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
         } else {
-            qyac  = s->yac_qi;
+            qyac  = s->s.h.yac_qi;
         }
-        qydc  = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
-        quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
-        quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
+        qydc  = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
+        quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
+        quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
         qyac  = av_clip_uintp2(qyac, 8);
 
-        s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
-        s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
-        s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
-        s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
+        s->s.h.segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
+        s->s.h.segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
+        s->s.h.segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
+        s->s.h.segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
 
-        sh = s->filter.level >= 32;
-        if (s->segmentation.feat[i].lf_enabled) {
-            if (s->segmentation.absolute_vals)
-                lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
+        sh = s->s.h.filter.level >= 32;
+        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
+            if (s->s.h.segmentation.absolute_vals)
+                lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
             else
-                lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
+                lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
         } else {
-            lflvl  = s->filter.level;
+            lflvl  = s->s.h.filter.level;
         }
-        if (s->lf_delta.enabled) {
-            s->segmentation.feat[i].lflvl[0][0] =
-            s->segmentation.feat[i].lflvl[0][1] =
-                av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
+        if (s->s.h.lf_delta.enabled) {
+            s->s.h.segmentation.feat[i].lflvl[0][0] =
+            s->s.h.segmentation.feat[i].lflvl[0][1] =
+                av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] << sh), 6);
             for (j = 1; j < 4; j++) {
-                s->segmentation.feat[i].lflvl[j][0] =
-                    av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
-                                             s->lf_delta.mode[0]) * (1 << sh)), 6);
-                s->segmentation.feat[i].lflvl[j][1] =
-                    av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
-                                             s->lf_delta.mode[1]) * (1 << sh)), 6);
+                s->s.h.segmentation.feat[i].lflvl[j][0] =
+                    av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
+                                             s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
+                s->s.h.segmentation.feat[i].lflvl[j][1] =
+                    av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
+                                             s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
             }
         } else {
-            memset(s->segmentation.feat[i].lflvl, lflvl,
-                   sizeof(s->segmentation.feat[i].lflvl));
+            memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
+                   sizeof(s->s.h.segmentation.feat[i].lflvl));
         }
     }
 
     /* tiling info */
-    if ((res = update_size(ctx, w, h, fmt)) < 0) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
+    if ((res = update_size(ctx, w, h)) < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
+               w, h, s->pix_fmt);
         return res;
     }
-    for (s->tiling.log2_tile_cols = 0;
-         (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
-         s->tiling.log2_tile_cols++) ;
+    for (s->s.h.tiling.log2_tile_cols = 0;
+         s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
+         s->s.h.tiling.log2_tile_cols++) ;
     for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
     max = FFMAX(0, max - 1);
-    while (max > s->tiling.log2_tile_cols) {
+    while (max > s->s.h.tiling.log2_tile_cols) {
         if (get_bits1(&s->gb))
-            s->tiling.log2_tile_cols++;
+            s->s.h.tiling.log2_tile_cols++;
         else
             break;
     }
-    s->tiling.log2_tile_rows = decode012(&s->gb);
-    s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
-    if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
-        s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
+    s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
+    s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
+    if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
+        s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
         s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
-                                 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
+                                 sizeof(VP56RangeCoder) * s->s.h.tiling.tile_cols);
         if (!s->c_b) {
             av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
             return AVERROR(ENOMEM);
         }
     }
 
-    if (s->keyframe || s->errorres || s->intraonly) {
+    /* check reference frames */
+    if (!s->s.h.keyframe && !s->s.h.intraonly) {
+        for (i = 0; i < 3; i++) {
+            AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
+            int refw = ref->width, refh = ref->height;
+
+            if (ref->format != ctx->pix_fmt) {
+                av_log(ctx, AV_LOG_ERROR,
+                       "Ref pixfmt (%s) did not match current frame (%s)",
+                       av_get_pix_fmt_name(ref->format),
+                       av_get_pix_fmt_name(ctx->pix_fmt));
+                return AVERROR_INVALIDDATA;
+            } else if (refw == w && refh == h) {
+                s->mvscale[i][0] = s->mvscale[i][1] = 0;
+            } else {
+                if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
+                    av_log(ctx, AV_LOG_ERROR,
+                           "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
+                           refw, refh, w, h);
+                    return AVERROR_INVALIDDATA;
+                }
+                s->mvscale[i][0] = (refw << 14) / w;
+                s->mvscale[i][1] = (refh << 14) / h;
+                s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
+                s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
+            }
+        }
+    }
+
+    if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
         s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
                            s->prob_ctx[3].p = vp9_default_probs;
         memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
@@ -864,10 +827,16 @@ static int decode_frame_header(AVCodecContext *ctx,
                sizeof(vp9_default_coef_probs));
         memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
                sizeof(vp9_default_coef_probs));
+    } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
+        s->prob_ctx[c].p = vp9_default_probs;
+        memcpy(s->prob_ctx[c].coef, vp9_default_coef_probs,
+               sizeof(vp9_default_coef_probs));
     }
 
     // next 16 bits is size of the rest of the header (arith-coded)
-    size2 = get_bits(&s->gb, 16);
+    s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
+    s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
+
     data2 = align_get_bits(&s->gb);
     if (size2 > size - (data2 - data)) {
         av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
@@ -879,7 +848,7 @@ static int decode_frame_header(AVCodecContext *ctx,
         return AVERROR_INVALIDDATA;
     }
 
-    if (s->keyframe || s->intraonly) {
+    if (s->s.h.keyframe || s->s.h.intraonly) {
         memset(s->counts.coef, 0, sizeof(s->counts.coef));
         memset(s->counts.eob,  0, sizeof(s->counts.eob));
     } else {
@@ -891,14 +860,14 @@ static int decode_frame_header(AVCodecContext *ctx,
     s->prob.p = s->prob_ctx[c].p;
 
     // txfm updates
-    if (s->lossless) {
-        s->txfmmode = TX_4X4;
+    if (s->s.h.lossless) {
+        s->s.h.txfmmode = TX_4X4;
     } else {
-        s->txfmmode = vp8_rac_get_uint(&s->c, 2);
-        if (s->txfmmode == 3)
-            s->txfmmode += vp8_rac_get(&s->c);
+        s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
+        if (s->s.h.txfmmode == 3)
+            s->s.h.txfmmode += vp8_rac_get(&s->c);
 
-        if (s->txfmmode == TX_SWITCHABLE) {
+        if (s->s.h.txfmmode == TX_SWITCHABLE) {
             for (i = 0; i < 2; i++)
                 if (vp56_rac_get_prob_branchy(&s->c, 252))
                     s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
@@ -949,7 +918,7 @@ static int decode_frame_header(AVCodecContext *ctx,
                             p[3] = 0;
                         }
         }
-        if (s->txfmmode == i)
+        if (s->s.h.txfmmode == i)
             break;
     }
 
@@ -957,14 +926,14 @@ static int decode_frame_header(AVCodecContext *ctx,
     for (i = 0; i < 3; i++)
         if (vp56_rac_get_prob_branchy(&s->c, 252))
             s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
-    if (!s->keyframe && !s->intraonly) {
+    if (!s->s.h.keyframe && !s->s.h.intraonly) {
         for (i = 0; i < 7; i++)
             for (j = 0; j < 3; j++)
                 if (vp56_rac_get_prob_branchy(&s->c, 252))
                     s->prob.p.mv_mode[i][j] =
                         update_prob(&s->c, s->prob.p.mv_mode[i][j]);
 
-        if (s->filtermode == FILTER_SWITCHABLE)
+        if (s->s.h.filtermode == FILTER_SWITCHABLE)
             for (i = 0; i < 4; i++)
                 for (j = 0; j < 2; j++)
                     if (vp56_rac_get_prob_branchy(&s->c, 252))
@@ -975,20 +944,20 @@ static int decode_frame_header(AVCodecContext *ctx,
             if (vp56_rac_get_prob_branchy(&s->c, 252))
                 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
 
-        if (s->allowcompinter) {
-            s->comppredmode = vp8_rac_get(&s->c);
-            if (s->comppredmode)
-                s->comppredmode += vp8_rac_get(&s->c);
-            if (s->comppredmode == PRED_SWITCHABLE)
+        if (s->s.h.allowcompinter) {
+            s->s.h.comppredmode = vp8_rac_get(&s->c);
+            if (s->s.h.comppredmode)
+                s->s.h.comppredmode += vp8_rac_get(&s->c);
+            if (s->s.h.comppredmode == PRED_SWITCHABLE)
                 for (i = 0; i < 5; i++)
                     if (vp56_rac_get_prob_branchy(&s->c, 252))
                         s->prob.p.comp[i] =
                             update_prob(&s->c, s->prob.p.comp[i]);
         } else {
-            s->comppredmode = PRED_SINGLEREF;
+            s->s.h.comppredmode = PRED_SINGLEREF;
         }
 
-        if (s->comppredmode != PRED_COMPREF) {
+        if (s->s.h.comppredmode != PRED_COMPREF) {
             for (i = 0; i < 5; i++) {
                 if (vp56_rac_get_prob_branchy(&s->c, 252))
                     s->prob.p.single_ref[i][0] =
@@ -999,7 +968,7 @@ static int decode_frame_header(AVCodecContext *ctx,
             }
         }
 
-        if (s->comppredmode != PRED_SINGLEREF) {
+        if (s->s.h.comppredmode != PRED_SINGLEREF) {
             for (i = 0; i < 5; i++)
                 if (vp56_rac_get_prob_branchy(&s->c, 252))
                     s->prob.p.comp_ref[i] =
@@ -1055,7 +1024,7 @@ static int decode_frame_header(AVCodecContext *ctx,
                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
         }
 
-        if (s->highprecisionmvs) {
+        if (s->s.h.highprecisionmvs) {
             for (i = 0; i < 2; i++) {
                 if (vp56_rac_get_prob_branchy(&s->c, 252))
                     s->prob.p.mv_comp[i].class0_hp =
@@ -1180,15 +1149,15 @@ static void find_ref_mvs(VP9Context *s,
     } while (0)
 
         if (row > 0) {
-            struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
+            struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
             if (mv->ref[0] == ref) {
                 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
             } else if (mv->ref[1] == ref) {
                 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
             }
         }
-        if (col > s->tiling.tile_col_start) {
-            struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
+        if (col > s->tile_col_start) {
+            struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
             if (mv->ref[0] == ref) {
                 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
             } else if (mv->ref[1] == ref) {
@@ -1204,8 +1173,8 @@ static void find_ref_mvs(VP9Context *s,
     for (; i < 8; i++) {
         int c = p[i][0] + col, r = p[i][1] + row;
 
-        if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
-            struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
+        if (c >= s->tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
+            struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
 
             if (mv->ref[0] == ref) {
                 RETURN_MV(mv->mv[0]);
@@ -1216,11 +1185,11 @@ static void find_ref_mvs(VP9Context *s,
     }
 
     // MV at this position in previous frame, using same reference frame
-    if (s->use_last_frame_mvs) {
-        struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
+    if (s->s.h.use_last_frame_mvs) {
+        struct VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
 
-        if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
-            ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
+        if (!s->s.frames[REF_FRAME_MVPAIR].uses_2pass)
+            ff_thread_await_progress(&s->s.frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
         if (mv->ref[0] == ref) {
             RETURN_MV(mv->mv[0]);
         } else if (mv->ref[1] == ref) {
@@ -1242,34 +1211,34 @@ static void find_ref_mvs(VP9Context *s,
     for (i = 0; i < 8; i++) {
         int c = p[i][0] + col, r = p[i][1] + row;
 
-        if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
-            struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
+        if (c >= s->tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
+            struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
 
             if (mv->ref[0] != ref && mv->ref[0] >= 0) {
-                RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
+                RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
             }
             if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
                 // BUG - libvpx has this condition regardless of whether
                 // we used the first ref MV and pre-scaling
                 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
-                RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
+                RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
             }
         }
     }
 
     // MV at this position in previous frame, using different reference frame
-    if (s->use_last_frame_mvs) {
-        struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
+    if (s->s.h.use_last_frame_mvs) {
+        struct VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
 
         // no need to await_progress, because we already did that above
         if (mv->ref[0] != ref && mv->ref[0] >= 0) {
-            RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
+            RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
         }
         if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
             // BUG - libvpx has this condition regardless of whether
             // we used the first ref MV and pre-scaling
             AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
-            RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
+            RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
         }
     }
 
@@ -1348,7 +1317,7 @@ static void fill_mv(VP9Context *s,
                      mode == NEWMV ? -1 : sb);
         // FIXME maybe move this code into find_ref_mvs()
         if ((mode == NEWMV || sb == -1) &&
-            !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
+            !(hp = s->s.h.highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
             if (mv[0].y & 1) {
                 if (mv[0].y < 0)
                     mv[0].y++;
@@ -1378,7 +1347,7 @@ static void fill_mv(VP9Context *s,
             find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
                          mode == NEWMV ? -1 : sb);
             if ((mode == NEWMV || sb == -1) &&
-                !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
+                !(hp = s->s.h.highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
                 if (mv[1].y & 1) {
                     if (mv[1].y < 0)
                         mv[1].y++;
@@ -1470,24 +1439,25 @@ static void decode_mode(AVCodecContext *ctx)
     enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
     int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
     int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
-    int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
+    int have_a = row > 0, have_l = col > s->tile_col_start;
     int vref, filter_id;
 
-    if (!s->segmentation.enabled) {
+    if (!s->s.h.segmentation.enabled) {
         b->seg_id = 0;
-    } else if (s->keyframe || s->intraonly) {
-        b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
-    } else if (!s->segmentation.update_map ||
-               (s->segmentation.temporal &&
+    } else if (s->s.h.keyframe || s->s.h.intraonly) {
+        b->seg_id = !s->s.h.segmentation.update_map ? 0 :
+                    vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->s.h.segmentation.prob);
+    } else if (!s->s.h.segmentation.update_map ||
+               (s->s.h.segmentation.temporal &&
                 vp56_rac_get_prob_branchy(&s->c,
-                    s->prob.segpred[s->above_segpred_ctx[col] +
+                    s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] +
                                     s->left_segpred_ctx[row7]]))) {
-        if (!s->errorres && !s->segmentation.ignore_refmap) {
+        if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) {
             int pred = 8, x;
-            uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
+            uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
 
-            if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
-                ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
+            if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
+                ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
             for (y = 0; y < h4; y++) {
                 int idx_base = (y + row) * 8 * s->sb_cols + col;
                 for (x = 0; x < w4; x++)
@@ -1503,29 +1473,29 @@ static void decode_mode(AVCodecContext *ctx)
         memset(&s->left_segpred_ctx[row7], 1, h4);
     } else {
         b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
-                                     s->prob.seg);
+                                     s->s.h.segmentation.prob);
 
         memset(&s->above_segpred_ctx[col], 0, w4);
         memset(&s->left_segpred_ctx[row7], 0, h4);
     }
-    if (s->segmentation.enabled &&
-        (s->segmentation.update_map || s->keyframe || s->intraonly)) {
-        setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
+    if (s->s.h.segmentation.enabled &&
+        (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
+        setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
                   bw4, bh4, 8 * s->sb_cols, b->seg_id);
     }
 
-    b->skip = s->segmentation.enabled &&
-        s->segmentation.feat[b->seg_id].skip_enabled;
+    b->skip = s->s.h.segmentation.enabled &&
+        s->s.h.segmentation.feat[b->seg_id].skip_enabled;
     if (!b->skip) {
         int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
         b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
         s->counts.skip[c][b->skip]++;
     }
 
-    if (s->keyframe || s->intraonly) {
+    if (s->s.h.keyframe || s->s.h.intraonly) {
         b->intra = 1;
-    } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
-        b->intra = !s->segmentation.feat[b->seg_id].ref_val;
+    } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
+        b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
     } else {
         int c, bit;
 
@@ -1541,7 +1511,7 @@ static void decode_mode(AVCodecContext *ctx)
         b->intra = !bit;
     }
 
-    if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
+    if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
         int c;
         if (have_a) {
             if (have_l) {
@@ -1584,10 +1554,10 @@ static void decode_mode(AVCodecContext *ctx)
             break;
         }
     } else {
-        b->tx = FFMIN(max_tx, s->txfmmode);
+        b->tx = FFMIN(max_tx, s->s.h.txfmmode);
     }
 
-    if (s->keyframe || s->intraonly) {
+    if (s->s.h.keyframe || s->s.h.intraonly) {
         uint8_t *a = &s->above_mode_ctx[col * 2];
         uint8_t *l = &s->left_mode_ctx[(row7) << 1];
 
@@ -1689,14 +1659,14 @@ static void decode_mode(AVCodecContext *ctx)
             { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
         };
 
-        if (s->segmentation.feat[b->seg_id].ref_enabled) {
-            av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
+        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
+            av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
             b->comp = 0;
-            b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
+            b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
         } else {
             // read comp_pred flag
-            if (s->comppredmode != PRED_SWITCHABLE) {
-                b->comp = s->comppredmode == PRED_COMPREF;
+            if (s->s.h.comppredmode != PRED_SWITCHABLE) {
+                b->comp = s->s.h.comppredmode == PRED_COMPREF;
             } else {
                 int c;
 
@@ -1707,23 +1677,23 @@ static void decode_mode(AVCodecContext *ctx)
                             c = 4;
                         } else if (s->above_comp_ctx[col]) {
                             c = 2 + (s->left_intra_ctx[row7] ||
-                                     s->left_ref_ctx[row7] == s->fixcompref);
+                                     s->left_ref_ctx[row7] == s->s.h.fixcompref);
                         } else if (s->left_comp_ctx[row7]) {
                             c = 2 + (s->above_intra_ctx[col] ||
-                                     s->above_ref_ctx[col] == s->fixcompref);
+                                     s->above_ref_ctx[col] == s->s.h.fixcompref);
                         } else {
                             c = (!s->above_intra_ctx[col] &&
-                                 s->above_ref_ctx[col] == s->fixcompref) ^
+                                 s->above_ref_ctx[col] == s->s.h.fixcompref) ^
                             (!s->left_intra_ctx[row7] &&
-                             s->left_ref_ctx[row & 7] == s->fixcompref);
+                             s->left_ref_ctx[row & 7] == s->s.h.fixcompref);
                         }
                     } else {
                         c = s->above_comp_ctx[col] ? 3 :
-                        (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
+                        (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
                     }
                 } else if (have_l) {
                     c = s->left_comp_ctx[row7] ? 3 :
-                    (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
+                    (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->s.h.fixcompref);
                 } else {
                     c = 1;
                 }
@@ -1735,9 +1705,9 @@ static void decode_mode(AVCodecContext *ctx)
             // FIXME probably cache a few variables here to prevent repetitive
             // memory accesses below
             if (b->comp) /* two references */ {
-                int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
+                int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
 
-                b->ref[fix_idx] = s->fixcompref;
+                b->ref[fix_idx] = s->s.h.fixcompref;
                 // FIXME can this codeblob be replaced by some sort of LUT?
                 if (have_a) {
                     if (have_l) {
@@ -1745,35 +1715,35 @@ static void decode_mode(AVCodecContext *ctx)
                             if (s->left_intra_ctx[row7]) {
                                 c = 2;
                             } else {
-                                c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
+                                c = 1 + 2 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
                             }
                         } else if (s->left_intra_ctx[row7]) {
-                            c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
+                            c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
                         } else {
                             int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
 
-                            if (refl == refa && refa == s->varcompref[1]) {
+                            if (refl == refa && refa == s->s.h.varcompref[1]) {
                                 c = 0;
                             } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
-                                if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
-                                    (refl == s->fixcompref && refa == s->varcompref[0])) {
+                                if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
+                                    (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
                                     c = 4;
                                 } else {
                                     c = (refa == refl) ? 3 : 1;
                                 }
                             } else if (!s->left_comp_ctx[row7]) {
-                                if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
+                                if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
                                     c = 1;
                                 } else {
-                                    c = (refl == s->varcompref[1] &&
-                                         refa != s->varcompref[1]) ? 2 : 4;
+                                    c = (refl == s->s.h.varcompref[1] &&
+                                         refa != s->s.h.varcompref[1]) ? 2 : 4;
                                 }
                             } else if (!s->above_comp_ctx[col]) {
-                                if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
+                                if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
                                     c = 1;
                                 } else {
-                                    c = (refa == s->varcompref[1] &&
-                                         refl != s->varcompref[1]) ? 2 : 4;
+                                    c = (refa == s->s.h.varcompref[1] &&
+                                         refl != s->s.h.varcompref[1]) ? 2 : 4;
                                 }
                             } else {
                                 c = (refl == refa) ? 4 : 2;
@@ -1783,24 +1753,24 @@ static void decode_mode(AVCodecContext *ctx)
                         if (s->above_intra_ctx[col]) {
                             c = 2;
                         } else if (s->above_comp_ctx[col]) {
-                            c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
+                            c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
                         } else {
-                            c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
+                            c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
                         }
                     }
                 } else if (have_l) {
                     if (s->left_intra_ctx[row7]) {
                         c = 2;
                     } else if (s->left_comp_ctx[row7]) {
-                        c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
+                        c = 4 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
                     } else {
-                        c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
+                        c = 3 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
                     }
                 } else {
                     c = 2;
                 }
                 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
-                b->ref[var_idx] = s->varcompref[bit];
+                b->ref[var_idx] = s->s.h.varcompref[bit];
                 s->counts.comp_ref[c][bit]++;
             } else /* single reference */ {
                 int bit, c;
@@ -1809,22 +1779,22 @@ static void decode_mode(AVCodecContext *ctx)
                     if (have_l && !s->left_intra_ctx[row7]) {
                         if (s->left_comp_ctx[row7]) {
                             if (s->above_comp_ctx[col]) {
-                                c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
+                                c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7] ||
                                          !s->above_ref_ctx[col]);
                             } else {
                                 c = (3 * !s->above_ref_ctx[col]) +
-                                    (!s->fixcompref || !s->left_ref_ctx[row7]);
+                                    (!s->s.h.fixcompref || !s->left_ref_ctx[row7]);
                             }
                         } else if (s->above_comp_ctx[col]) {
                             c = (3 * !s->left_ref_ctx[row7]) +
-                                (!s->fixcompref || !s->above_ref_ctx[col]);
+                                (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
                         } else {
                             c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
                         }
                     } else if (s->above_intra_ctx[col]) {
                         c = 2;
                     } else if (s->above_comp_ctx[col]) {
-                        c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
+                        c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
                     } else {
                         c = 4 * (!s->above_ref_ctx[col]);
                     }
@@ -1832,7 +1802,7 @@ static void decode_mode(AVCodecContext *ctx)
                     if (s->left_intra_ctx[row7]) {
                         c = 2;
                     } else if (s->left_comp_ctx[row7]) {
-                        c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
+                        c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7]);
                     } else {
                         c = 4 * (!s->left_ref_ctx[row7]);
                     }
@@ -1851,7 +1821,7 @@ static void decode_mode(AVCodecContext *ctx)
                                 if (s->above_intra_ctx[col]) {
                                     c = 2;
                                 } else if (s->above_comp_ctx[col]) {
-                                    c = 1 + 2 * (s->fixcompref == 1 ||
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
                                                  s->above_ref_ctx[col] == 1);
                                 } else if (!s->above_ref_ctx[col]) {
                                     c = 3;
@@ -1862,7 +1832,7 @@ static void decode_mode(AVCodecContext *ctx)
                                 if (s->left_intra_ctx[row7]) {
                                     c = 2;
                                 } else if (s->left_comp_ctx[row7]) {
-                                    c = 1 + 2 * (s->fixcompref == 1 ||
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
                                                  s->left_ref_ctx[row7] == 1);
                                 } else if (!s->left_ref_ctx[row7]) {
                                     c = 3;
@@ -1872,25 +1842,25 @@ static void decode_mode(AVCodecContext *ctx)
                             } else if (s->above_comp_ctx[col]) {
                                 if (s->left_comp_ctx[row7]) {
                                     if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
-                                        c = 3 * (s->fixcompref == 1 ||
+                                        c = 3 * (s->s.h.fixcompref == 1 ||
                                                  s->left_ref_ctx[row7] == 1);
                                     } else {
                                         c = 2;
                                     }
                                 } else if (!s->left_ref_ctx[row7]) {
-                                    c = 1 + 2 * (s->fixcompref == 1 ||
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
                                                  s->above_ref_ctx[col] == 1);
                                 } else {
                                     c = 3 * (s->left_ref_ctx[row7] == 1) +
-                                    (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
+                                    (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
                                 }
                             } else if (s->left_comp_ctx[row7]) {
                                 if (!s->above_ref_ctx[col]) {
-                                    c = 1 + 2 * (s->fixcompref == 1 ||
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
                                                  s->left_ref_ctx[row7] == 1);
                                 } else {
                                     c = 3 * (s->above_ref_ctx[col] == 1) +
-                                    (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
+                                    (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1);
                                 }
                             } else if (!s->above_ref_ctx[col]) {
                                 if (!s->left_ref_ctx[row7]) {
@@ -1909,7 +1879,7 @@ static void decode_mode(AVCodecContext *ctx)
                                 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
                                 c = 2;
                             } else if (s->above_comp_ctx[col]) {
-                                c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
+                                c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
                             } else {
                                 c = 4 * (s->above_ref_ctx[col] == 1);
                             }
@@ -1919,7 +1889,7 @@ static void decode_mode(AVCodecContext *ctx)
                             (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
                             c = 2;
                         } else if (s->left_comp_ctx[row7]) {
-                            c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
+                            c = 3 * (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1);
                         } else {
                             c = 4 * (s->left_ref_ctx[row7] == 1);
                         }
@@ -1934,7 +1904,7 @@ static void decode_mode(AVCodecContext *ctx)
         }
 
         if (b->bs <= BS_8x8) {
-            if (s->segmentation.feat[b->seg_id].skip_enabled) {
+            if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
                 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
             } else {
                 static const uint8_t off[10] = {
@@ -1953,7 +1923,7 @@ static void decode_mode(AVCodecContext *ctx)
             }
         }
 
-        if (s->filtermode == FILTER_SWITCHABLE) {
+        if (s->s.h.filtermode == FILTER_SWITCHABLE) {
             int c;
 
             if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
@@ -1974,7 +1944,7 @@ static void decode_mode(AVCodecContext *ctx)
             s->counts.filter[c][filter_id]++;
             b->filter = vp9_filter_lut[filter_id];
         } else {
-            b->filter = s->filtermode;
+            b->filter = s->s.h.filtermode;
         }
 
         if (b->bs > BS_8x8) {
@@ -2030,7 +2000,7 @@ static void decode_mode(AVCodecContext *ctx)
             AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
         }
 
-        vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
+        vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
     }
 
 #if HAVE_FAST_64BIT
@@ -2076,13 +2046,13 @@ static void decode_mode(AVCodecContext *ctx)
         SPLAT_CTX(s->dir##_skip_ctx[off],      b->skip,          n); \
         SPLAT_CTX(s->dir##_txfm_ctx[off],      b->tx,            n); \
         SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
-        if (!s->keyframe && !s->intraonly) { \
+        if (!s->s.h.keyframe && !s->s.h.intraonly) { \
             SPLAT_CTX(s->dir##_intra_ctx[off], b->intra,   n); \
             SPLAT_CTX(s->dir##_comp_ctx[off],  b->comp,    n); \
             SPLAT_CTX(s->dir##_mode_ctx[off],  b->mode[3], n); \
             if (!b->intra) { \
                 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
-                if (s->filtermode == FILTER_SWITCHABLE) { \
+                if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
                     SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
                 } \
             } \
@@ -2102,7 +2072,7 @@ static void decode_mode(AVCodecContext *ctx)
 #undef SPLAT_CTX
 #undef SET_CTXS
 
-    if (!s->keyframe && !s->intraonly) {
+    if (!s->s.h.keyframe && !s->s.h.intraonly) {
         if (b->bs > BS_8x8) {
             int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
 
@@ -2131,7 +2101,7 @@ static void decode_mode(AVCodecContext *ctx)
     // FIXME kinda ugly
     for (y = 0; y < h4; y++) {
         int x, o = (row + y) * s->sb_cols * 8 + col;
-        struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
+        struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
 
         if (b->intra) {
             for (x = 0; x < w4; x++) {
@@ -2332,8 +2302,8 @@ static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpix
     int end_x = FFMIN(2 * (s->cols - col), w4);
     int end_y = FFMIN(2 * (s->rows - row), h4);
     int n, pl, x, y, res;
-    int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
-    int tx = 4 * s->lossless + b->tx;
+    int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
+    int tx = 4 * s->s.h.lossless + b->tx;
     const int16_t * const *yscans = vp9_scans[tx];
     const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
     const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
@@ -2505,7 +2475,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
                                              int p, int ss_h, int ss_v, int bytesperpixel)
 {
     int have_top = row > 0 || y > 0;
-    int have_left = col > s->tiling.tile_col_start || x > 0;
+    int have_left = col > s->tile_col_start || x > 0;
     int have_right = x < w - 1;
     int bpp = s->bpp;
     static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
@@ -2688,9 +2658,9 @@ static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
     int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
     int end_x = FFMIN(2 * (s->cols - col), w4);
     int end_y = FFMIN(2 * (s->rows - row), h4);
-    int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
+    int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
     int uvstep1d = 1 << b->uvtx, p;
-    uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
+    uint8_t *dst = s->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
     LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
     LOCAL_ALIGNED_32(uint8_t, l, [64]);
 
@@ -2705,7 +2675,7 @@ static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
             int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
 
             mode = check_intra_mode(s, mode, &a, ptr_r,
-                                    s->frames[CUR_FRAME].tf.f->linesize[0],
+                                    s->s.frames[CUR_FRAME].tf.f->linesize[0],
                                     ptr, s->y_stride, l,
                                     col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
             s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
@@ -2713,7 +2683,7 @@ static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
                 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
                                            s->block + 16 * n * bytesperpixel, eob);
         }
-        dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
+        dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
         dst   += 4 * step1d * s->y_stride;
     }
 
@@ -2724,7 +2694,7 @@ static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
     step = 1 << (b->uvtx * 2);
     for (p = 0; p < 2; p++) {
         dst   = s->dst[1 + p];
-        dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
+        dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
         for (n = 0, y = 0; y < end_y; y += uvstep1d) {
             uint8_t *ptr = dst, *ptr_r = dst_r;
             for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
@@ -2734,7 +2704,7 @@ static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
                 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
 
                 mode = check_intra_mode(s, mode, &a, ptr_r,
-                                        s->frames[CUR_FRAME].tf.f->linesize[1],
+                                        s->s.frames[CUR_FRAME].tf.f->linesize[1],
                                         ptr, s->uv_stride, l, col, x, w4, row, y,
                                         b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
                 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
@@ -2742,7 +2712,7 @@ static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
                     s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
                                                     s->uvblock[p] + 16 * n * bytesperpixel, eob);
             }
-            dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
+            dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
             dst   += 4 * uvstep1d * s->uv_stride;
         }
     }
@@ -2758,7 +2728,108 @@ static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv
     intra_recon(ctx, y_off, uv_off, 2);
 }
 
+static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
+                                              uint8_t *dst, ptrdiff_t dst_stride,
+                                              const uint8_t *ref, ptrdiff_t ref_stride,
+                                              ThreadFrame *ref_frame,
+                                              ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
+                                              int bw, int bh, int w, int h, int bytesperpixel)
+{
+    int mx = mv->x, my = mv->y, th;
+
+    y += my >> 3;
+    x += mx >> 3;
+    ref += y * ref_stride + x * bytesperpixel;
+    mx &= 7;
+    my &= 7;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + bh + 4 * !!my + 7) >> 6;
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    if (x < !!mx * 3 || y < !!my * 3 ||
+        x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
+                                 160, ref_stride,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+        ref_stride = 160;
+    }
+    mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
+}
+
+static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
+                                                uint8_t *dst_u, uint8_t *dst_v,
+                                                ptrdiff_t dst_stride,
+                                                const uint8_t *ref_u, ptrdiff_t src_stride_u,
+                                                const uint8_t *ref_v, ptrdiff_t src_stride_v,
+                                                ThreadFrame *ref_frame,
+                                                ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
+                                                int bw, int bh, int w, int h, int bytesperpixel)
+{
+    int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
+
+    y += my >> 4;
+    x += mx >> 4;
+    ref_u += y * src_stride_u + x * bytesperpixel;
+    ref_v += y * src_stride_v + x * bytesperpixel;
+    mx &= 15;
+    my &= 15;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    if (x < !!mx * 3 || y < !!my * 3 ||
+        x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
+                                 160, src_stride_u,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+        mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
+
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
+                                 160, src_stride_v,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+        mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
+    } else {
+        mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
+        mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
+    }
+}
+
+#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
+                    px, py, pw, ph, bw, bh, w, h, i) \
+    mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
+                     mv, bw, bh, w, h, bytesperpixel)
+#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+                      row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
+    mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+                       row, col, mv, bw, bh, w, h, bytesperpixel)
+#define SCALED 0
+#define FN(x) x##_8bpp
+#define BYTES_PER_PIXEL 1
+#include "vp9_mc_template.c"
+#undef FN
+#undef BYTES_PER_PIXEL
+#define FN(x) x##_16bpp
+#define BYTES_PER_PIXEL 2
+#include "vp9_mc_template.c"
+#undef mc_luma_dir
+#undef mc_chroma_dir
+#undef FN
+#undef BYTES_PER_PIXEL
+#undef SCALED
+
 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
+                                            vp9_mc_func (*mc)[2],
                                             uint8_t *dst, ptrdiff_t dst_stride,
                                             const uint8_t *ref, ptrdiff_t ref_stride,
                                             ThreadFrame *ref_frame,
@@ -2767,6 +2838,11 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
                                             int bw, int bh, int w, int h, int bytesperpixel,
                                             const uint16_t *scale, const uint8_t *step)
 {
+    if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
+        s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
+        mc_luma_unscaled(s, mc, dst, dst_stride, ref, ref_stride, ref_frame,
+                         y, x, in_mv, bw, bh, w, h, bytesperpixel);
+    } else {
 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
     int mx, my;
     int refbw_m1, refbh_m1;
@@ -2803,9 +2879,11 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
         ref_stride = 288;
     }
     smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
+    }
 }
 
 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
+                                              vp9_mc_func (*mc)[2],
                                               uint8_t *dst_u, uint8_t *dst_v,
                                               ptrdiff_t dst_stride,
                                               const uint8_t *ref_u, ptrdiff_t src_stride_u,
@@ -2816,6 +2894,12 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
                                               int bw, int bh, int w, int h, int bytesperpixel,
                                               const uint16_t *scale, const uint8_t *step)
 {
+    if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
+        s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
+        mc_chroma_unscaled(s, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
+                           ref_v, src_stride_v, ref_frame,
+                           y, x, in_mv, bw, bh, w, h, bytesperpixel);
+    } else {
     int mx, my;
     int refbw_m1, refbh_m1;
     int th;
@@ -2871,16 +2955,17 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
         smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
         smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
     }
+    }
 }
 
 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
                     px, py, pw, ph, bw, bh, w, h, i) \
-    mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
+    mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
                    mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
                    s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
                       row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
-    mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+    mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
                      row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
                      s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
 #define SCALED 1
@@ -2898,106 +2983,6 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
 #undef BYTES_PER_PIXEL
 #undef SCALED
 
-static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
-                                              uint8_t *dst, ptrdiff_t dst_stride,
-                                              const uint8_t *ref, ptrdiff_t ref_stride,
-                                              ThreadFrame *ref_frame,
-                                              ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
-                                              int bw, int bh, int w, int h, int bytesperpixel)
-{
-    int mx = mv->x, my = mv->y, th;
-
-    y += my >> 3;
-    x += mx >> 3;
-    ref += y * ref_stride + x * bytesperpixel;
-    mx &= 7;
-    my &= 7;
-    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
-    // we use +7 because the last 7 pixels of each sbrow can be changed in
-    // the longest loopfilter of the next sbrow
-    th = (y + bh + 4 * !!my + 7) >> 6;
-    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
-    if (x < !!mx * 3 || y < !!my * 3 ||
-        x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
-        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
-                                 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
-                                 160, ref_stride,
-                                 bw + !!mx * 7, bh + !!my * 7,
-                                 x - !!mx * 3, y - !!my * 3, w, h);
-        ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
-        ref_stride = 160;
-    }
-    mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
-}
-
-static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
-                                                uint8_t *dst_u, uint8_t *dst_v,
-                                                ptrdiff_t dst_stride,
-                                                const uint8_t *ref_u, ptrdiff_t src_stride_u,
-                                                const uint8_t *ref_v, ptrdiff_t src_stride_v,
-                                                ThreadFrame *ref_frame,
-                                                ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
-                                                int bw, int bh, int w, int h, int bytesperpixel)
-{
-    int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
-
-    y += my >> 4;
-    x += mx >> 4;
-    ref_u += y * src_stride_u + x * bytesperpixel;
-    ref_v += y * src_stride_v + x * bytesperpixel;
-    mx &= 15;
-    my &= 15;
-    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
-    // we use +7 because the last 7 pixels of each sbrow can be changed in
-    // the longest loopfilter of the next sbrow
-    th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
-    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
-    if (x < !!mx * 3 || y < !!my * 3 ||
-        x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
-        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
-                                 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
-                                 160, src_stride_u,
-                                 bw + !!mx * 7, bh + !!my * 7,
-                                 x - !!mx * 3, y - !!my * 3, w, h);
-        ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
-        mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
-
-        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
-                                 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
-                                 160, src_stride_v,
-                                 bw + !!mx * 7, bh + !!my * 7,
-                                 x - !!mx * 3, y - !!my * 3, w, h);
-        ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
-        mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
-    } else {
-        mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
-        mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
-    }
-}
-
-#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
-                    px, py, pw, ph, bw, bh, w, h, i) \
-    mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
-                     mv, bw, bh, w, h, bytesperpixel)
-#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
-                      row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
-    mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
-                       row, col, mv, bw, bh, w, h, bytesperpixel)
-#define SCALED 0
-#define FN(x) x##_8bpp
-#define BYTES_PER_PIXEL 1
-#include "vp9_mc_template.c"
-#undef FN
-#undef BYTES_PER_PIXEL
-#define FN(x) x##_16bpp
-#define BYTES_PER_PIXEL 2
-#include "vp9_mc_template.c"
-#undef mc_luma_dir_dir
-#undef mc_chroma_dir_dir
-#undef FN
-#undef BYTES_PER_PIXEL
-#undef SCALED
-
 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
 {
     VP9Context *s = ctx->priv_data;
@@ -3024,7 +3009,7 @@ static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
         int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
         int end_x = FFMIN(2 * (s->cols - col), w4);
         int end_y = FFMIN(2 * (s->rows - row), h4);
-        int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
+        int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
         int uvstep1d = 1 << b->uvtx, p;
         uint8_t *dst = s->dst[0];
 
@@ -3206,7 +3191,7 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
     int bytesperpixel = s->bytesperpixel;
     int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
     int emu[2];
-    AVFrame *f = s->frames[CUR_FRAME].tf.f;
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
 
     s->row = row;
     s->row7 = row & 7;
@@ -3273,6 +3258,7 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
             case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
             }
         }
+
         if (s->pass == 1) {
             s->b++;
             s->block += w4 * h4 * 64 * bytesperpixel;
@@ -3289,9 +3275,9 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
     // emulated overhangs if the stride of the target buffer can't hold. This
     // makes it possible to support emu-edge and so on even if we have large block
     // overhangs
-    emu[0] = (col + w4) * 8 > f->linesize[0] ||
+    emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
              (row + h4) > s->rows;
-    emu[1] = (col + w4) * 4 > f->linesize[1] ||
+    emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
              (row + h4) > s->rows;
     if (emu[0]) {
         s->dst[0] = s->tmp_y;
@@ -3330,9 +3316,9 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
 
             av_assert2(n <= 4);
             if (w & bw) {
-                s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
-                                         s->tmp_y + o, 128, h, 0, 0);
-                o += bw * bytesperpixel;
+                s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
+                                         s->tmp_y + o * bytesperpixel, 128, h, 0, 0);
+                o += bw;
             }
         }
     }
@@ -3345,19 +3331,19 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
 
             av_assert2(n <= 4);
             if (w & bw) {
-                s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
-                                         s->tmp_uv[0] + o, 128, h, 0, 0);
-                s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
-                                         s->tmp_uv[1] + o, 128, h, 0, 0);
-                o += bw * bytesperpixel;
+                s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
+                                         s->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
+                s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
+                                         s->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
+                o += bw;
             }
         }
     }
 
     // pick filter level and find edges to apply filter to
-    if (s->filter.level &&
-        (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
-                                                    [b->mode[3] != ZEROMV]) > 0) {
+    if (s->s.h.filter.level &&
+        (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
+                                                      [b->mode[3] != ZEROMV]) > 0) {
         int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
         int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
 
@@ -3369,8 +3355,8 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
                        s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
                        b->uvtx, skip_inter);
 
-        if (!s->filter.lim_lut[lvl]) {
-            int sharp = s->filter.sharpness;
+        if (!s->filter_lut.lim_lut[lvl]) {
+            int sharp = s->s.h.filter.sharpness;
             int limit = lvl;
 
             if (sharp > 0) {
@@ -3379,8 +3365,8 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
             }
             limit = FFMAX(limit, 1);
 
-            s->filter.lim_lut[lvl] = limit;
-            s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
+            s->filter_lut.lim_lut[lvl] = limit;
+            s->filter_lut.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
         }
     }
 
@@ -3401,11 +3387,11 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l
     VP9Context *s = ctx->priv_data;
     int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
             (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
-    const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
+    const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? vp9_default_kf_partition_probs[bl][c] :
                                                      s->prob.p.partition[bl][c];
     enum BlockPartition bp;
     ptrdiff_t hbs = 4 >> bl;
-    AVFrame *f = s->frames[CUR_FRAME].tf.f;
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
     int bytesperpixel = s->bytesperpixel;
 
@@ -3480,7 +3466,7 @@ static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filte
     VP9Context *s = ctx->priv_data;
     VP9Block *b = s->b;
     ptrdiff_t hbs = 4 >> bl;
-    AVFrame *f = s->frames[CUR_FRAME].tf.f;
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
     int bytesperpixel = s->bytesperpixel;
 
@@ -3540,7 +3526,7 @@ static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h,
             if (col || x > 1) {
                 if (hm1 & x) {
                     int L = *l, H = L >> 4;
-                    int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
 
                     if (hmask1[0] & x) {
                         if (hmask2[0] & x) {
@@ -3552,8 +3538,8 @@ static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h,
                     } else if (hm2 & x) {
                         L = l[8 << ss_v];
                         H |= (L >> 4) << 8;
-                        E |= s->filter.mblim_lut[L] << 8;
-                        I |= s->filter.lim_lut[L] << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
                         s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
                                                [!!(hmask2[1] & x)]
                                                [0](ptr, ls, E, I, H);
@@ -3563,7 +3549,7 @@ static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h,
                     }
                 } else if (hm2 & x) {
                     int L = l[8 << ss_v], H = L >> 4;
-                    int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
 
                     s->dsp.loop_filter_8[!!(hmask2[1] & x)]
                                         [0](ptr + 8 * ls, ls, E, I, H);
@@ -3575,20 +3561,20 @@ static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h,
             } else {
                 if (hm13 & x) {
                     int L = *l, H = L >> 4;
-                    int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
 
                     if (hm23 & x) {
                         L = l[8 << ss_v];
                         H |= (L >> 4) << 8;
-                        E |= s->filter.mblim_lut[L] << 8;
-                        I |= s->filter.lim_lut[L] << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
                         s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
                     } else {
                         s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
                     }
                 } else if (hm23 & x) {
                     int L = l[8 << ss_v], H = L >> 4;
-                    int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
 
                     s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
                 }
@@ -3615,7 +3601,7 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h,
             if (row || y) {
                 if (vm & x) {
                     int L = *l, H = L >> 4;
-                    int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
 
                     if (vmask[0] & x) {
                         if (vmask[0] & (x << (1 + ss_h))) {
@@ -3627,8 +3613,8 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h,
                     } else if (vm & (x << (1 + ss_h))) {
                         L = l[1 + ss_h];
                         H |= (L >> 4) << 8;
-                        E |= s->filter.mblim_lut[L] << 8;
-                        I |= s->filter.lim_lut[L] << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
                         s->dsp.loop_filter_mix2[!!(vmask[1] &  x)]
                                                [!!(vmask[1] & (x << (1 + ss_h)))]
                                                [1](ptr, ls, E, I, H);
@@ -3638,7 +3624,7 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h,
                     }
                 } else if (vm & (x << (1 + ss_h))) {
                     int L = l[1 + ss_h], H = L >> 4;
-                    int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
 
                     s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
                                         [1](ptr + 8 * bytesperpixel, ls, E, I, H);
@@ -3647,20 +3633,20 @@ static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h,
             if (!ss_v) {
                 if (vm3 & x) {
                     int L = *l, H = L >> 4;
-                    int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
 
                     if (vm3 & (x << (1 + ss_h))) {
                         L = l[1 + ss_h];
                         H |= (L >> 4) << 8;
-                        E |= s->filter.mblim_lut[L] << 8;
-                        I |= s->filter.lim_lut[L] << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
                         s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
                     } else {
                         s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
                     }
                 } else if (vm3 & (x << (1 + ss_h))) {
                     int L = l[1 + ss_h], H = L >> 4;
-                    int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
 
                     s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
                 }
@@ -3679,7 +3665,7 @@ static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
                           int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
 {
     VP9Context *s = ctx->priv_data;
-    AVFrame *f = s->frames[CUR_FRAME].tf.f;
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
     uint8_t *dst = f->data[0] + yoff;
     ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
     uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
@@ -3730,8 +3716,8 @@ static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
 static void adapt_probs(VP9Context *s)
 {
     int i, j, k, l, m;
-    prob_context *p = &s->prob_ctx[s->framectxid].p;
-    int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
+    prob_context *p = &s->prob_ctx[s->s.h.framectxid].p;
+    int uf = (s->s.h.keyframe || s->s.h.intraonly || !s->last_keyframe) ? 112 : 128;
 
     // coefficients
     for (i = 0; i < 4; i++)
@@ -3739,7 +3725,7 @@ static void adapt_probs(VP9Context *s)
             for (k = 0; k < 2; k++)
                 for (l = 0; l < 6; l++)
                     for (m = 0; m < 6; m++) {
-                        uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
+                        uint8_t *pp = s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m];
                         unsigned *e = s->counts.eob[i][j][k][l][m];
                         unsigned *c = s->counts.coef[i][j][k][l][m];
 
@@ -3751,7 +3737,7 @@ static void adapt_probs(VP9Context *s)
                         adapt_prob(&pp[2], c[1], c[2], 24, uf);
                     }
 
-    if (s->keyframe || s->intraonly) {
+    if (s->s.h.keyframe || s->s.h.intraonly) {
         memcpy(p->skip,  s->prob.p.skip,  sizeof(p->skip));
         memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
         memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
@@ -3768,19 +3754,19 @@ static void adapt_probs(VP9Context *s)
         adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
 
     // comppred flag
-    if (s->comppredmode == PRED_SWITCHABLE) {
+    if (s->s.h.comppredmode == PRED_SWITCHABLE) {
       for (i = 0; i < 5; i++)
           adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
     }
 
     // reference frames
-    if (s->comppredmode != PRED_SINGLEREF) {
+    if (s->s.h.comppredmode != PRED_SINGLEREF) {
       for (i = 0; i < 5; i++)
           adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
                      s->counts.comp_ref[i][1], 20, 128);
     }
 
-    if (s->comppredmode != PRED_COMPREF) {
+    if (s->s.h.comppredmode != PRED_COMPREF) {
       for (i = 0; i < 5; i++) {
           uint8_t *pp = p->single_ref[i];
           unsigned (*c)[2] = s->counts.single_ref[i];
@@ -3802,7 +3788,7 @@ static void adapt_probs(VP9Context *s)
         }
 
     // tx size
-    if (s->txfmmode == TX_SWITCHABLE) {
+    if (s->s.h.txfmmode == TX_SWITCHABLE) {
       for (i = 0; i < 2; i++) {
           unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
 
@@ -3816,7 +3802,7 @@ static void adapt_probs(VP9Context *s)
     }
 
     // interpolation filter
-    if (s->filtermode == FILTER_SWITCHABLE) {
+    if (s->s.h.filtermode == FILTER_SWITCHABLE) {
         for (i = 0; i < 4; i++) {
             uint8_t *pp = p->filter[i];
             unsigned *c = s->counts.filter[i];
@@ -3892,7 +3878,7 @@ static void adapt_probs(VP9Context *s)
         adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
         adapt_prob(&pp[2], c[2], c[3], 20, 128);
 
-        if (s->highprecisionmvs) {
+        if (s->s.h.highprecisionmvs) {
             adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
                        s->counts.mv_comp[i].class0_hp[1], 20, 128);
             adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
@@ -3962,15 +3948,15 @@ static av_cold int vp9_decode_free(AVCodecContext *ctx)
     int i;
 
     for (i = 0; i < 3; i++) {
-        if (s->frames[i].tf.f->data[0])
-            vp9_unref_frame(ctx, &s->frames[i]);
-        av_frame_free(&s->frames[i].tf.f);
+        if (s->s.frames[i].tf.f->buf[0])
+            vp9_unref_frame(ctx, &s->s.frames[i]);
+        av_frame_free(&s->s.frames[i].tf.f);
     }
     for (i = 0; i < 8; i++) {
-        if (s->refs[i].f->data[0])
-            ff_thread_release_buffer(ctx, &s->refs[i]);
-        av_frame_free(&s->refs[i].f);
-        if (s->next_refs[i].f->data[0])
+        if (s->s.refs[i].f->buf[0])
+            ff_thread_release_buffer(ctx, &s->s.refs[i]);
+        av_frame_free(&s->s.refs[i].f);
+        if (s->next_refs[i].f->buf[0])
             ff_thread_release_buffer(ctx, &s->next_refs[i]);
         av_frame_free(&s->next_refs[i].f);
     }
@@ -3989,8 +3975,8 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
     int size = pkt->size;
     VP9Context *s = ctx->priv_data;
     int res, tile_row, tile_col, i, ref, row, col;
-    int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map
-                            && s->frames[REF_FRAME_SEGMAP].segmentation_map;
+    int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
+                            (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
     ptrdiff_t yoff, uvoff, ls_y, ls_uv;
     AVFrame *f;
     int bytesperpixel;
@@ -3998,19 +3984,19 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
     if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
         return res;
     } else if (res == 0) {
-        if (!s->refs[ref].f->data[0]) {
+        if (!s->s.refs[ref].f->buf[0]) {
             av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
             return AVERROR_INVALIDDATA;
         }
-        if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
+        if ((res = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
             return res;
         ((AVFrame *)frame)->pkt_pts = pkt->pts;
         ((AVFrame *)frame)->pkt_dts = pkt->dts;
         for (i = 0; i < 8; i++) {
-            if (s->next_refs[i].f->data[0])
+            if (s->next_refs[i].f->buf[0])
                 ff_thread_release_buffer(ctx, &s->next_refs[i]);
-            if (s->refs[i].f->data[0] &&
-                (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
+            if (s->s.refs[i].f->buf[0] &&
+                (res = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
                 return res;
         }
         *got_frame = 1;
@@ -4019,46 +4005,65 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
     data += res;
     size -= res;
 
-    if (!retain_segmap_ref) {
-        if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
-            vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
-        if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
-            (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
+    if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
+        if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
+            vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP]);
+        if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
+            (res = vp9_ref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
             return res;
     }
-    if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
-        vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
-    if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
-        (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
+    if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
+        vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_MVPAIR]);
+    if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
+        (res = vp9_ref_frame(ctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
         return res;
-    if (s->frames[CUR_FRAME].tf.f->data[0])
-        vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
-    if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
+    if (s->s.frames[CUR_FRAME].tf.f->buf[0])
+        vp9_unref_frame(ctx, &s->s.frames[CUR_FRAME]);
+    if ((res = vp9_alloc_frame(ctx, &s->s.frames[CUR_FRAME])) < 0)
         return res;
-    f = s->frames[CUR_FRAME].tf.f;
-    f->key_frame = s->keyframe;
-    f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+    f = s->s.frames[CUR_FRAME].tf.f;
+    f->key_frame = s->s.h.keyframe;
+    f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
     ls_y = f->linesize[0];
     ls_uv =f->linesize[1];
 
+    if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
+        (s->s.frames[REF_FRAME_MVPAIR].tf.f->width  != s->s.frames[CUR_FRAME].tf.f->width ||
+         s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
+        vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP]);
+    }
+
     // ref frame setup
     for (i = 0; i < 8; i++) {
-        if (s->next_refs[i].f->data[0])
+        if (s->next_refs[i].f->buf[0])
             ff_thread_release_buffer(ctx, &s->next_refs[i]);
-        if (s->refreshrefmask & (1 << i)) {
-            res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
-        } else if (s->refs[i].f->data[0]) {
-            res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
+        if (s->s.h.refreshrefmask & (1 << i)) {
+            res = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
+        } else if (s->s.refs[i].f->buf[0]) {
+            res = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
         }
         if (res < 0)
             return res;
     }
 
+    if (ctx->hwaccel) {
+        res = ctx->hwaccel->start_frame(ctx, NULL, 0);
+        if (res < 0)
+            return res;
+        res = ctx->hwaccel->decode_slice(ctx, pkt->data, pkt->size);
+        if (res < 0)
+            return res;
+        res = ctx->hwaccel->end_frame(ctx);
+        if (res < 0)
+            return res;
+        goto finish;
+    }
+
     // main tile decode loop
     bytesperpixel = s->bytesperpixel;
     memset(s->above_partition_ctx, 0, s->cols);
     memset(s->above_skip_ctx, 0, s->cols);
-    if (s->keyframe || s->intraonly) {
+    if (s->s.h.keyframe || s->s.h.intraonly) {
         memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
     } else {
         memset(s->above_mode_ctx, NEARESTMV, s->cols);
@@ -4067,14 +4072,14 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
     memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
     memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
     memset(s->above_segpred_ctx, 0, s->cols);
-    s->pass = s->frames[CUR_FRAME].uses_2pass =
-        ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
+    s->pass = s->s.frames[CUR_FRAME].uses_2pass =
+        ctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
     if ((res = update_block_buffers(ctx)) < 0) {
         av_log(ctx, AV_LOG_ERROR,
                "Failed to allocate block buffers\n");
         return res;
     }
-    if (s->refreshctx && s->parallelmode) {
+    if (s->s.h.refreshctx && s->s.h.parallelmode) {
         int j, k, l, m;
 
         for (i = 0; i < 4; i++) {
@@ -4082,14 +4087,14 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
                 for (k = 0; k < 2; k++)
                     for (l = 0; l < 6; l++)
                         for (m = 0; m < 6; m++)
-                            memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
+                            memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
                                    s->prob.coef[i][j][k][l][m], 3);
-            if (s->txfmmode == i)
+            if (s->s.h.txfmmode == i)
                 break;
         }
-        s->prob_ctx[s->framectxid].p = s->prob.p;
+        s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
         ff_thread_finish_setup(ctx);
-    } else if (!s->refreshctx) {
+    } else if (!s->s.h.refreshctx) {
         ff_thread_finish_setup(ctx);
     }
 
@@ -4103,15 +4108,15 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
         s->uveob[0] = s->uveob_base[0];
         s->uveob[1] = s->uveob_base[1];
 
-        for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
-            set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
-                            tile_row, s->tiling.log2_tile_rows, s->sb_rows);
+        for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
+            set_tile_offset(&s->tile_row_start, &s->tile_row_end,
+                            tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
             if (s->pass != 2) {
-                for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
+                for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
                     int64_t tile_size;
 
-                    if (tile_col == s->tiling.tile_cols - 1 &&
-                        tile_row == s->tiling.tile_rows - 1) {
+                    if (tile_col == s->s.h.tiling.tile_cols - 1 &&
+                        tile_row == s->s.h.tiling.tile_rows - 1) {
                         tile_size = size;
                     } else {
                         tile_size = AV_RB32(data);
@@ -4119,12 +4124,12 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
                         size -= 4;
                     }
                     if (tile_size > size) {
-                        ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
+                        ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
                         return AVERROR_INVALIDDATA;
                     }
                     ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
                     if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
-                        ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
+                        ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
                         return AVERROR_INVALIDDATA;
                     }
                     data += tile_size;
@@ -4132,19 +4137,19 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
                 }
             }
 
-            for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
+            for (row = s->tile_row_start; row < s->tile_row_end;
                  row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
                 struct VP9Filter *lflvl_ptr = s->lflvl;
                 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
 
-                for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
-                    set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
-                                    tile_col, s->tiling.log2_tile_cols, s->sb_cols);
+                for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
+                    set_tile_offset(&s->tile_col_start, &s->tile_col_end,
+                                    tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
 
                     if (s->pass != 2) {
                         memset(s->left_partition_ctx, 0, 8);
                         memset(s->left_skip_ctx, 0, 8);
-                        if (s->keyframe || s->intraonly) {
+                        if (s->s.h.keyframe || s->s.h.intraonly) {
                             memset(s->left_mode_ctx, DC_PRED, 16);
                         } else {
                             memset(s->left_mode_ctx, NEARESTMV, 8);
@@ -4156,8 +4161,8 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
                         memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
                     }
 
-                    for (col = s->tiling.tile_col_start;
-                         col < s->tiling.tile_col_end;
+                    for (col = s->tile_col_start;
+                         col < s->tile_col_end;
                          col += 8, yoff2 += 64 * bytesperpixel,
                          uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
                         // FIXME integrate with lf code (i.e. zero after each
@@ -4198,7 +4203,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
                 }
 
                 // loopfilter one row
-                if (s->filter.level) {
+                if (s->s.h.filter.level) {
                     yoff2 = yoff;
                     uvoff2 = uvoff;
                     lflvl_ptr = s->lflvl;
@@ -4212,26 +4217,29 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
                 // FIXME maybe we can make this more finegrained by running the
                 // loopfilter per-block instead of after each sbrow
                 // In fact that would also make intra pred left preparation easier?
-                ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
+                ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
             }
         }
 
-        if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
+        if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
             adapt_probs(s);
             ff_thread_finish_setup(ctx);
         }
     } while (s->pass++ == 1);
-    ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
+    ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
 
+finish:
     // ref frame setup
     for (i = 0; i < 8; i++) {
-        if (s->refs[i].f->data[0])
-            ff_thread_release_buffer(ctx, &s->refs[i]);
-        ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
+        if (s->s.refs[i].f->buf[0])
+            ff_thread_release_buffer(ctx, &s->s.refs[i]);
+        if (s->next_refs[i].f->buf[0] &&
+            (res = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
+            return res;
     }
 
-    if (!s->invisible) {
-        if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
+    if (!s->s.h.invisible) {
+        if ((res = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
             return res;
         *got_frame = 1;
     }
@@ -4245,9 +4253,9 @@ static void vp9_decode_flush(AVCodecContext *ctx)
     int i;
 
     for (i = 0; i < 3; i++)
-        vp9_unref_frame(ctx, &s->frames[i]);
+        vp9_unref_frame(ctx, &s->s.frames[i]);
     for (i = 0; i < 8; i++)
-        ff_thread_release_buffer(ctx, &s->refs[i]);
+        ff_thread_release_buffer(ctx, &s->s.refs[i]);
 }
 
 static int init_frames(AVCodecContext *ctx)
@@ -4256,17 +4264,17 @@ static int init_frames(AVCodecContext *ctx)
     int i;
 
     for (i = 0; i < 3; i++) {
-        s->frames[i].tf.f = av_frame_alloc();
-        if (!s->frames[i].tf.f) {
+        s->s.frames[i].tf.f = av_frame_alloc();
+        if (!s->s.frames[i].tf.f) {
             vp9_decode_free(ctx);
             av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
             return AVERROR(ENOMEM);
         }
     }
     for (i = 0; i < 8; i++) {
-        s->refs[i].f = av_frame_alloc();
+        s->s.refs[i].f = av_frame_alloc();
         s->next_refs[i].f = av_frame_alloc();
-        if (!s->refs[i].f || !s->next_refs[i].f) {
+        if (!s->s.refs[i].f || !s->next_refs[i].f) {
             vp9_decode_free(ctx);
             av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
             return AVERROR(ENOMEM);
@@ -4282,11 +4290,12 @@ static av_cold int vp9_decode_init(AVCodecContext *ctx)
 
     ctx->internal->allocate_progress = 1;
     s->last_bpp = 0;
-    s->filter.sharpness = -1;
+    s->s.h.filter.sharpness = -1;
 
     return init_frames(ctx);
 }
 
+#if HAVE_THREADS
 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
 {
     return init_frames(avctx);
@@ -4297,55 +4306,46 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
     int i, res;
     VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
 
-    // detect size changes in other threads
-    if (s->intra_pred_data[0] &&
-        (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
-        free_buffers(s);
-    }
-
     for (i = 0; i < 3; i++) {
-        if (s->frames[i].tf.f->data[0])
-            vp9_unref_frame(dst, &s->frames[i]);
-        if (ssrc->frames[i].tf.f->data[0]) {
-            if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
+        if (s->s.frames[i].tf.f->buf[0])
+            vp9_unref_frame(dst, &s->s.frames[i]);
+        if (ssrc->s.frames[i].tf.f->buf[0]) {
+            if ((res = vp9_ref_frame(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
                 return res;
         }
     }
     for (i = 0; i < 8; i++) {
-        if (s->refs[i].f->data[0])
-            ff_thread_release_buffer(dst, &s->refs[i]);
-        if (ssrc->next_refs[i].f->data[0]) {
-            if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
+        if (s->s.refs[i].f->buf[0])
+            ff_thread_release_buffer(dst, &s->s.refs[i]);
+        if (ssrc->next_refs[i].f->buf[0]) {
+            if ((res = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
                 return res;
         }
     }
 
-    s->invisible = ssrc->invisible;
-    s->keyframe = ssrc->keyframe;
+    s->s.h.invisible = ssrc->s.h.invisible;
+    s->s.h.keyframe = ssrc->s.h.keyframe;
+    s->s.h.intraonly = ssrc->s.h.intraonly;
     s->ss_v = ssrc->ss_v;
     s->ss_h = ssrc->ss_h;
-    s->segmentation.enabled = ssrc->segmentation.enabled;
-    s->segmentation.update_map = ssrc->segmentation.update_map;
+    s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
+    s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
+    s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
     s->bytesperpixel = ssrc->bytesperpixel;
+    s->gf_fmt = ssrc->gf_fmt;
+    s->w = ssrc->w;
+    s->h = ssrc->h;
     s->bpp = ssrc->bpp;
     s->bpp_index = ssrc->bpp_index;
+    s->pix_fmt = ssrc->pix_fmt;
     memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
-    memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
-    if (ssrc->segmentation.enabled) {
-        memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
-               sizeof(s->segmentation.feat));
-    }
+    memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
+    memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
+           sizeof(s->s.h.segmentation.feat));
 
     return 0;
 }
-
-static const AVProfile profiles[] = {
-    { FF_PROFILE_VP9_0, "Profile 0" },
-    { FF_PROFILE_VP9_1, "Profile 1" },
-    { FF_PROFILE_VP9_2, "Profile 2" },
-    { FF_PROFILE_VP9_3, "Profile 3" },
-    { FF_PROFILE_UNKNOWN },
-};
+#endif
 
 AVCodec ff_vp9_decoder = {
     .name                  = "vp9",
@@ -4356,9 +4356,9 @@ AVCodec ff_vp9_decoder = {
     .init                  = vp9_decode_init,
     .close                 = vp9_decode_free,
     .decode                = vp9_decode_frame,
-    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .flush                 = vp9_decode_flush,
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
-    .profiles              = NULL_IF_CONFIG_SMALL(profiles),
+    .profiles              = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
 };
diff --git a/libavcodec/vp9.h b/libavcodec/vp9.h
index 9a29416e..df5bd4d8 100644
--- a/libavcodec/vp9.h
+++ b/libavcodec/vp9.h
@@ -24,6 +24,42 @@
 #ifndef AVCODEC_VP9_H
 #define AVCODEC_VP9_H
 
+#include <stdint.h>
+
+#include "thread.h"
+#include "vp56.h"
+
+enum BlockLevel {
+    BL_64X64,
+    BL_32X32,
+    BL_16X16,
+    BL_8X8,
+};
+
+enum BlockPartition {
+    PARTITION_NONE,    // [ ] <-.
+    PARTITION_H,       // [-]   |
+    PARTITION_V,       // [|]   |
+    PARTITION_SPLIT,   // [+] --'
+};
+
+enum BlockSize {
+    BS_64x64,
+    BS_64x32,
+    BS_32x64,
+    BS_32x32,
+    BS_32x16,
+    BS_16x32,
+    BS_16x16,
+    BS_16x8,
+    BS_8x16,
+    BS_8x8,
+    BS_8x4,
+    BS_4x8,
+    BS_4x4,
+    N_BS_SIZES,
+};
+
 enum TxfmMode {
     TX_4X4,
     TX_8X8,
@@ -61,6 +97,13 @@ enum IntraPredMode {
     N_INTRA_PRED_MODES
 };
 
+enum InterPredMode {
+    NEARESTMV = 10,
+    NEARMV = 11,
+    ZEROMV = 12,
+    NEWMV = 13,
+};
+
 enum FilterMode {
     FILTER_8TAP_SMOOTH,
     FILTER_8TAP_REGULAR,
@@ -69,4 +112,100 @@ enum FilterMode {
     FILTER_SWITCHABLE,
 };
 
+enum CompPredMode {
+    PRED_SINGLEREF,
+    PRED_COMPREF,
+    PRED_SWITCHABLE,
+};
+
+struct VP9mvrefPair {
+    VP56mv mv[2];
+    int8_t ref[2];
+};
+
+typedef struct VP9Frame {
+    ThreadFrame tf;
+    AVBufferRef *extradata;
+    uint8_t *segmentation_map;
+    struct VP9mvrefPair *mv;
+    int uses_2pass;
+
+    AVBufferRef *hwaccel_priv_buf;
+    void *hwaccel_picture_private;
+} VP9Frame;
+
+typedef struct VP9BitstreamHeader {
+    // bitstream header
+    uint8_t profile;
+    uint8_t keyframe;
+    uint8_t invisible;
+    uint8_t errorres;
+    uint8_t intraonly;
+    uint8_t resetctx;
+    uint8_t refreshrefmask;
+    uint8_t highprecisionmvs;
+    enum FilterMode filtermode;
+    uint8_t allowcompinter;
+    uint8_t refreshctx;
+    uint8_t parallelmode;
+    uint8_t framectxid;
+    uint8_t use_last_frame_mvs;
+    uint8_t refidx[3];
+    uint8_t signbias[3];
+    uint8_t fixcompref;
+    uint8_t varcompref[2];
+    struct {
+        uint8_t level;
+        int8_t sharpness;
+    } filter;
+    struct {
+        uint8_t enabled;
+        uint8_t updated;
+        int8_t mode[2];
+        int8_t ref[4];
+    } lf_delta;
+    uint8_t yac_qi;
+    int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
+    uint8_t lossless;
+#define MAX_SEGMENT 8
+    struct {
+        uint8_t enabled;
+        uint8_t temporal;
+        uint8_t absolute_vals;
+        uint8_t update_map;
+        uint8_t prob[7];
+        uint8_t pred_prob[3];
+        struct {
+            uint8_t q_enabled;
+            uint8_t lf_enabled;
+            uint8_t ref_enabled;
+            uint8_t skip_enabled;
+            uint8_t ref_val;
+            int16_t q_val;
+            int8_t lf_val;
+            int16_t qmul[2][2];
+            uint8_t lflvl[4][2];
+        } feat[MAX_SEGMENT];
+    } segmentation;
+    enum TxfmMode txfmmode;
+    enum CompPredMode comppredmode;
+    struct {
+        unsigned log2_tile_cols, log2_tile_rows;
+        unsigned tile_cols, tile_rows;
+    } tiling;
+
+    int uncompressed_header_size;
+    int compressed_header_size;
+} VP9BitstreamHeader;
+
+typedef struct VP9SharedContext {
+    VP9BitstreamHeader h;
+
+    ThreadFrame refs[8];
+#define CUR_FRAME 0
+#define REF_FRAME_MVPAIR 1
+#define REF_FRAME_SEGMAP 2
+    VP9Frame frames[3];
+} VP9SharedContext;
+
 #endif /* AVCODEC_VP9_H */
diff --git a/libavcodec/vp9_mc_template.c b/libavcodec/vp9_mc_template.c
index f4eb4e56..38d9a6da 100644
--- a/libavcodec/vp9_mc_template.c
+++ b/libavcodec/vp9_mc_template.c
@@ -36,14 +36,14 @@ static void FN(inter_pred)(AVCodecContext *ctx)
     VP9Context *s = ctx->priv_data;
     VP9Block *b = s->b;
     int row = s->row, col = s->col;
-    ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
+    ThreadFrame *tref1 = &s->s.refs[s->s.h.refidx[b->ref[0]]], *tref2;
     AVFrame *ref1 = tref1->f, *ref2;
     int w1 = ref1->width, h1 = ref1->height, w2, h2;
     ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
     int bytesperpixel = BYTES_PER_PIXEL;
 
     if (b->comp) {
-        tref2 = &s->refs[s->refidx[b->ref[1]]];
+        tref2 = &s->s.refs[s->s.h.refidx[b->ref[1]]];
         ref2 = tref2->f;
         w2 = ref2->width;
         h2 = ref2->height;
diff --git a/libavcodec/vp9_parser.c b/libavcodec/vp9_parser.c
index ab33c334..2e9235e6 100644
--- a/libavcodec/vp9_parser.c
+++ b/libavcodec/vp9_parser.c
@@ -22,6 +22,7 @@
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavcodec/get_bits.h"
 #include "parser.h"
 
 typedef struct VP9ParseContext {
@@ -30,11 +31,28 @@ typedef struct VP9ParseContext {
     int64_t pts;
 } VP9ParseContext;
 
-static void parse_frame(AVCodecParserContext *ctx, const uint8_t *buf, int size)
+static int parse_frame(AVCodecParserContext *ctx, const uint8_t *buf, int size)
 {
     VP9ParseContext *s = ctx->priv_data;
+    GetBitContext gb;
+    int res, profile, keyframe, invisible;
+
+    if ((res = init_get_bits8(&gb, buf, size)) < 0)
+        return res;
+    get_bits(&gb, 2); // frame marker
+    profile  = get_bits1(&gb);
+    profile |= get_bits1(&gb) << 1;
+    if (profile == 3) profile += get_bits1(&gb);
+
+    if (get_bits1(&gb)) {
+        keyframe = 0;
+        invisible = 0;
+    } else {
+        keyframe  = !get_bits1(&gb);
+        invisible = !get_bits1(&gb);
+    }
 
-    if (buf[0] & 0x4) {
+    if (!keyframe) {
         ctx->pict_type = AV_PICTURE_TYPE_P;
         ctx->key_frame = 0;
     } else {
@@ -42,14 +60,16 @@ static void parse_frame(AVCodecParserContext *ctx, const uint8_t *buf, int size)
         ctx->key_frame = 1;
     }
 
-    if (buf[0] & 0x2) {
+    if (!invisible) {
         if (ctx->pts == AV_NOPTS_VALUE)
             ctx->pts = s->pts;
         s->pts = AV_NOPTS_VALUE;
-    } else {
+    } else if (ctx->pts != AV_NOPTS_VALUE) {
         s->pts = ctx->pts;
         ctx->pts = AV_NOPTS_VALUE;
     }
+
+    return 0;
 }
 
 static int parse(AVCodecParserContext *ctx,
@@ -91,12 +111,12 @@ static int parse(AVCodecParserContext *ctx,
                 while (n_frames--) { \
                     unsigned sz = rd; \
                     idx += a; \
-                    if (sz > size) { \
+                    if (sz == 0 || sz > size) { \
                         s->n_frames = 0; \
                         *out_size = size; \
                         *out_data = data; \
                         av_log(avctx, AV_LOG_ERROR, \
-                               "Superframe packet size too big: %u > %d\n", \
+                               "Invalid superframe packet size: %u frame size: %d\n", \
                                sz, size); \
                         return full_size; \
                     } \
@@ -112,7 +132,7 @@ static int parse(AVCodecParserContext *ctx,
                     size -= sz; \
                 } \
                 parse_frame(ctx, *out_data, *out_size); \
-                return *out_size
+                return s->n_frames > 0 ? *out_size : full_size
 
                 case_n(1, *idx);
                 case_n(2, AV_RL16(idx));
diff --git a/libavcodec/vp9data.h b/libavcodec/vp9data.h
index 4142cea5..cb12e7e9 100644
--- a/libavcodec/vp9data.h
+++ b/libavcodec/vp9data.h
@@ -26,13 +26,6 @@
 
 #include "vp9.h"
 
-enum BlockPartition {
-    PARTITION_NONE,    // [ ] <-.
-    PARTITION_H,       // [-]   |
-    PARTITION_V,       // [|]   |
-    PARTITION_SPLIT,   // [+] --'
-};
-
 static const int8_t vp9_partition_tree[3][2] = {
     { -PARTITION_NONE, 1 },               // '0'
      { -PARTITION_H, 2 },                 // '10'
@@ -212,13 +205,6 @@ static const uint8_t vp9_default_kf_uvmode_probs[10][9] = {
     { 102,  19,  66, 162, 182, 122,  35,  59, 128 } /* y = tm */
 };
 
-enum InterPredMode {
-    NEARESTMV = 10,
-    NEARMV = 11,
-    ZEROMV = 12,
-    NEWMV = 13,
-};
-
 static const int8_t vp9_inter_mode_tree[3][2] = {
     { -ZEROMV, 1 },        // '0'
      { -NEARESTMV, 2 },    // '10'
diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c
index 5ff18b47..54e77e26 100644
--- a/libavcodec/vp9dsp.c
+++ b/libavcodec/vp9dsp.c
@@ -25,7 +25,7 @@
 #include "libavutil/common.h"
 #include "vp9dsp.h"
 
-av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp)
+av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
 {
     if (bpp == 8) {
         ff_vp9dsp_init_8(dsp);
@@ -36,5 +36,6 @@ av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp)
         ff_vp9dsp_init_12(dsp);
     }
 
-    if (ARCH_X86) ff_vp9dsp_init_x86(dsp, bpp);
+    if (ARCH_X86) ff_vp9dsp_init_x86(dsp, bpp, bitexact);
+    if (ARCH_MIPS) ff_vp9dsp_init_mips(dsp, bpp);
 }
diff --git a/libavcodec/vp9dsp.h b/libavcodec/vp9dsp.h
index beb89263..016a9bb2 100644
--- a/libavcodec/vp9dsp.h
+++ b/libavcodec/vp9dsp.h
@@ -120,12 +120,13 @@ typedef struct VP9DSPContext {
     vp9_scaled_mc_func smc[5][4][2];
 } VP9DSPContext;
 
-void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact);
 
 void ff_vp9dsp_init_8(VP9DSPContext *dsp);
 void ff_vp9dsp_init_10(VP9DSPContext *dsp);
 void ff_vp9dsp_init_12(VP9DSPContext *dsp);
 
-void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp);
+void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
+void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
 
 #endif /* AVCODEC_VP9DSP_H */
diff --git a/libavcodec/vp9dsp_template.c b/libavcodec/vp9dsp_template.c
index 8f10ccf8..4d810fec 100644
--- a/libavcodec/vp9dsp_template.c
+++ b/libavcodec/vp9dsp_template.c
@@ -1078,12 +1078,12 @@ def_hor_up(32)
 #endif /* BIT_DEPTH != 12 */
 
 #if BIT_DEPTH != 8
-void vp9dsp_intrapred_init_10(VP9DSPContext *dsp);
+void ff_vp9dsp_intrapred_init_10(VP9DSPContext *dsp);
 #endif
 #if BIT_DEPTH != 10
 static
 #endif
-av_cold void FUNC(vp9dsp_intrapred_init)(VP9DSPContext *dsp)
+av_cold void FUNC(ff_vp9dsp_intrapred_init)(VP9DSPContext *dsp)
 {
 #define init_intra_pred_bd_aware(tx, sz) \
     dsp->intra_pred[tx][TM_VP8_PRED]          = tm_##sz##_c; \
@@ -1092,7 +1092,7 @@ av_cold void FUNC(vp9dsp_intrapred_init)(VP9DSPContext *dsp)
     dsp->intra_pred[tx][DC_129_PRED]          = dc_129_##sz##_c
 
 #if BIT_DEPTH == 12
-    vp9dsp_intrapred_init_10(dsp);
+    ff_vp9dsp_intrapred_init_10(dsp);
 #define init_intra_pred(tx, sz) \
     init_intra_pred_bd_aware(tx, sz)
 #else
@@ -1131,8 +1131,8 @@ static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *_dst, \
 \
     stride /= sizeof(pixel); \
     if (has_dconly && eob == 1) { \
-        const int t  = (((block[0] * 11585 + (1 << 13)) >> 14) \
-                                   * 11585 + (1 << 13)) >> 14; \
+        const int t  = ((((dctint) block[0] * 11585 + (1 << 13)) >> 14) \
+                                            * 11585 + (1 << 13)) >> 14; \
         block[0] = 0; \
         for (i = 0; i < sz; i++) { \
             for (j = 0; j < sz; j++) \
@@ -1186,7 +1186,7 @@ static av_always_inline void idct4_1d(const dctcoef *in, ptrdiff_t stride,
 static av_always_inline void iadst4_1d(const dctcoef *in, ptrdiff_t stride,
                                        dctcoef *out, int pass)
 {
-    int t0, t1, t2, t3;
+    dctint t0, t1, t2, t3;
 
     t0 =  5283 * IN(0) + 15212 * IN(2) +  9929 * IN(3);
     t1 =  9929 * IN(0) -  5283 * IN(2) - 15212 * IN(3);
@@ -2317,15 +2317,15 @@ filter_fn_set(avg)
 #undef bilinf_fn_2d
 
 #if BIT_DEPTH != 8
-void vp9dsp_mc_init_10(VP9DSPContext *dsp);
+void ff_vp9dsp_mc_init_10(VP9DSPContext *dsp);
 #endif
 #if BIT_DEPTH != 10
 static
 #endif
-av_cold void FUNC(vp9dsp_mc_init)(VP9DSPContext *dsp)
+av_cold void FUNC(ff_vp9dsp_mc_init)(VP9DSPContext *dsp)
 {
 #if BIT_DEPTH == 12
-    vp9dsp_mc_init_10(dsp);
+    ff_vp9dsp_mc_init_10(dsp);
 #else /* BIT_DEPTH == 12 */
 
 #define init_fpel(idx1, idx2, sz, type) \
@@ -2555,12 +2555,12 @@ scaled_filter_fn_set(avg)
 #undef scaled_bilinf_fn
 
 #if BIT_DEPTH != 8
-void vp9dsp_scaled_mc_init_10(VP9DSPContext *dsp);
+void ff_vp9dsp_scaled_mc_init_10(VP9DSPContext *dsp);
 #endif
 #if BIT_DEPTH != 10
 static
 #endif
-av_cold void FUNC(vp9dsp_scaled_mc_init)(VP9DSPContext *dsp)
+av_cold void FUNC(ff_vp9dsp_scaled_mc_init)(VP9DSPContext *dsp)
 {
 #define init_scaled_bd_aware(idx1, idx2, sz, type) \
     dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \
@@ -2568,7 +2568,7 @@ av_cold void FUNC(vp9dsp_scaled_mc_init)(VP9DSPContext *dsp)
     dsp->smc[idx1][FILTER_8TAP_SHARP  ][idx2] = type##_scaled_sharp_##sz##_c
 
 #if BIT_DEPTH == 12
-    vp9dsp_scaled_mc_init_10(dsp);
+    ff_vp9dsp_scaled_mc_init_10(dsp);
 #define init_scaled(a,b,c,d) init_scaled_bd_aware(a,b,c,d)
 #else
 #define init_scaled(idx1, idx2, sz, type) \
@@ -2593,9 +2593,9 @@ av_cold void FUNC(vp9dsp_scaled_mc_init)(VP9DSPContext *dsp)
 
 av_cold void FUNC(ff_vp9dsp_init)(VP9DSPContext *dsp)
 {
-    FUNC(vp9dsp_intrapred_init)(dsp);
+    FUNC(ff_vp9dsp_intrapred_init)(dsp);
     vp9dsp_itxfm_init(dsp);
     vp9dsp_loopfilter_init(dsp);
-    FUNC(vp9dsp_mc_init)(dsp);
-    FUNC(vp9dsp_scaled_mc_init)(dsp);
+    FUNC(ff_vp9dsp_mc_init)(dsp);
+    FUNC(ff_vp9dsp_scaled_mc_init)(dsp);
 }
diff --git a/libavcodec/vqavideo.c b/libavcodec/vqavideo.c
index 45eb1574..3ed9652d 100644
--- a/libavcodec/vqavideo.c
+++ b/libavcodec/vqavideo.c
@@ -650,5 +650,5 @@ AVCodec ff_vqa_decoder = {
     .init           = vqa_decode_init,
     .close          = vqa_decode_end,
     .decode         = vqa_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/wavpack.c b/libavcodec/wavpack.c
index 554367b3..b6022f0f 100644
--- a/libavcodec/wavpack.c
+++ b/libavcodec/wavpack.c
@@ -93,7 +93,7 @@ static av_always_inline int get_tail(GetBitContext *gb, int k)
         return 0;
     p   = av_log2(k);
     e   = (1 << (p + 1)) - k - 1;
-    res = p ? get_bits(gb, p) : 0;
+    res = get_bitsz(gb, p);
     if (res >= e)
         res = (res << 1) - e + get_bits1(gb);
     return res;
@@ -299,7 +299,7 @@ static float wv_get_value_float(WavpackFrameContext *s, uint32_t *crc, int S)
         const int max_bits  = 1 + 23 + 8 + 1;
         const int left_bits = get_bits_left(&s->gb_extra_bits);
 
-        if (left_bits + 8 * FF_INPUT_BUFFER_PADDING_SIZE < max_bits)
+        if (left_bits + 8 * AV_INPUT_BUFFER_PADDING_SIZE < max_bits)
             return 0.0;
     }
 
@@ -597,12 +597,14 @@ static av_cold int wv_alloc_frame_context(WavpackContext *c)
     return 0;
 }
 
+#if HAVE_THREADS
 static int init_thread_copy(AVCodecContext *avctx)
 {
     WavpackContext *s = avctx->priv_data;
     s->avctx = avctx;
     return 0;
 }
+#endif
 
 static av_cold int wavpack_decode_init(AVCodecContext *avctx)
 {
@@ -1125,5 +1127,5 @@ AVCodec ff_wavpack_decoder = {
     .decode         = wavpack_decode_frame,
     .flush          = wavpack_decode_flush,
     .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/wavpackenc.c b/libavcodec/wavpackenc.c
index 87f14457..979b9216 100644
--- a/libavcodec/wavpackenc.c
+++ b/libavcodec/wavpackenc.c
@@ -128,6 +128,11 @@ static av_cold int wavpack_encode_init(AVCodecContext *avctx)
 
     s->avctx = avctx;
 
+    if (avctx->channels > 255) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid channel count: %d\n", avctx->channels);
+        return AVERROR(EINVAL);
+    }
+
     if (!avctx->frame_size) {
         int block_samples;
         if (!(avctx->sample_rate & 1))
@@ -1829,9 +1834,9 @@ static int wv_stereo(WavPackEncodeContext *s,
     log_limit = (((s->flags & MAG_MASK) >> MAG_LSB) + 4) * 256;
     log_limit = FFMIN(6912, log_limit);
 
-    if (s->joint) {
-        force_js = s->joint > 0;
-        force_ts = s->joint < 0;
+    if (s->joint != -1) {
+        force_js =  s->joint;
+        force_ts = !s->joint;
     }
 
     if ((ret = allocate_buffers(s)) < 0)
@@ -2211,8 +2216,7 @@ static void pack_float_sample(WavPackEncodeContext *s, int32_t *sample)
         }
     } else if (shift_count) {
         if (s->float_flags & FLOAT_SHIFT_SENT) {
-            int32_t data = get_mantissa(*sample) & ((1 << shift_count) - 1);
-            put_bits(pb, shift_count, data);
+            put_sbits(pb, shift_count, get_mantissa(*sample));
         } else if (s->float_flags & FLOAT_SHIFT_SAME) {
             put_bits(pb, 1, get_mantissa(*sample) & 1);
         }
@@ -2878,8 +2882,8 @@ static int wavpack_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     }
 
     buf_size = s->block_samples * avctx->channels * 8
-             + 200 /* for headers */;
-    if ((ret = ff_alloc_packet2(avctx, avpkt, buf_size)) < 0)
+             + 200 * avctx->channels /* for headers */;
+    if ((ret = ff_alloc_packet2(avctx, avpkt, buf_size, 0)) < 0)
         return ret;
     buf = avpkt->data;
 
@@ -2955,13 +2959,8 @@ static av_cold int wavpack_encode_close(AVCodecContext *avctx)
 #define OFFSET(x) offsetof(WavPackEncodeContext, x)
 #define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
 static const AVOption options[] = {
-    { "joint_stereo",  "", OFFSET(joint), AV_OPT_TYPE_INT, {.i64=0},-1, 1, FLAGS, "joint" },
-    { "on",   "mid/side",   0, AV_OPT_TYPE_CONST, {.i64= 1}, 0, 0, FLAGS, "joint"},
-    { "off",  "left/right", 0, AV_OPT_TYPE_CONST, {.i64=-1}, 0, 0, FLAGS, "joint"},
-    { "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64= 0}, 0, 0, FLAGS, "joint"},
-    { "optimize_mono",        "", OFFSET(optimize_mono), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "opt_mono" },
-    { "on",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "opt_mono"},
-    { "off",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "opt_mono"},
+    { "joint_stereo",  "", OFFSET(joint), AV_OPT_TYPE_BOOL, {.i64=-1}, -1, 1, FLAGS },
+    { "optimize_mono", "", OFFSET(optimize_mono), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
     { NULL },
 };
 
@@ -2982,7 +2981,7 @@ AVCodec ff_wavpack_encoder = {
     .init           = wavpack_encode_init,
     .encode2        = wavpack_encode_frame,
     .close          = wavpack_encode_close,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
+    .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_U8P,
                                                      AV_SAMPLE_FMT_S16P,
                                                      AV_SAMPLE_FMT_S32P,
diff --git a/libavcodec/webp.c b/libavcodec/webp.c
index 723a8476..5c2961ff 100644
--- a/libavcodec/webp.c
+++ b/libavcodec/webp.c
@@ -1417,6 +1417,7 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                                                 chunk_size, 0);
                 if (ret < 0)
                     return ret;
+                avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
             }
             bytestream2_skip(&gb, chunk_size);
             break;
@@ -1539,5 +1540,5 @@ AVCodec ff_webp_decoder = {
     .priv_data_size = sizeof(WebPContext),
     .decode         = webp_decode_frame,
     .close          = webp_decode_close,
-    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
index 1284a172..73545887 100644
--- a/libavcodec/webvttdec.c
+++ b/libavcodec/webvttdec.c
@@ -37,11 +37,14 @@ static const struct {
     {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
     {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
     {"{", "\\{"}, {"}", "\\}"}, // escape to avoid ASS markup conflicts
+    {"&gt;", ">"}, {"&lt;", "<"},
+    {"&lrm;", ""}, {"&rlm;", ""}, // FIXME: properly honor bidi marks
+    {"&amp;", "&"}, {"&nbsp;", "\\h"},
 };
 
 static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
 {
-    int i, skip = 0;
+    int i, again = 0, skip = 0;
 
     while (*p) {
 
@@ -51,12 +54,18 @@ static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
             if (!strncmp(p, from, len)) {
                 av_bprintf(buf, "%s", webvtt_tag_replace[i].to);
                 p += len;
+                again = 1;
                 break;
             }
         }
         if (!*p)
             break;
 
+        if (again) {
+            again = 0;
+            skip = 0;
+            continue;
+        }
         if (*p == '<')
             skip = 1;
         else if (*p == '>')
diff --git a/libavcodec/wma.c b/libavcodec/wma.c
index 4c1bf004..6d1c7e5c 100644
--- a/libavcodec/wma.c
+++ b/libavcodec/wma.c
@@ -185,8 +185,8 @@ av_cold int ff_wma_init(AVCodecContext *avctx, int flags2)
             high_freq = high_freq * 0.5;
     }
     ff_dlog(s->avctx, "flags2=0x%x\n", flags2);
-    ff_dlog(s->avctx, "version=%d channels=%d sample_rate=%d bitrate=%d block_align=%d\n",
-            s->version, avctx->channels, avctx->sample_rate, avctx->bit_rate,
+    ff_dlog(s->avctx, "version=%d channels=%d sample_rate=%d bitrate=%"PRId64" block_align=%d\n",
+            s->version, avctx->channels, avctx->sample_rate, (int64_t)avctx->bit_rate,
             avctx->block_align);
     ff_dlog(s->avctx, "bps=%f bps1=%f high_freq=%f bitoffset=%d\n",
             bps, bps1, high_freq, s->byte_offset_bits);
@@ -338,7 +338,7 @@ av_cold int ff_wma_init(AVCodecContext *avctx, int flags2)
 #endif /* TRACE */
     }
 
-    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp)
         return AVERROR(ENOMEM);
 
diff --git a/libavcodec/wma.h b/libavcodec/wma.h
index cc9d6f9f..325f03c4 100644
--- a/libavcodec/wma.h
+++ b/libavcodec/wma.h
@@ -120,7 +120,7 @@ typedef struct WMACodecContext {
     /* output buffer for one frame and the last for IMDCT windowing */
     DECLARE_ALIGNED(32, float, frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
     /* last frame info */
-    uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + FF_INPUT_BUFFER_PADDING_SIZE]; /* padding added */
+    uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + AV_INPUT_BUFFER_PADDING_SIZE]; /* padding added */
     int last_bitoffset;
     int last_superframe_len;
     float noise_table[NOISE_TAB_SIZE];
@@ -144,7 +144,9 @@ extern const float ff_wma_lsp_codebook[NB_LSP_COEFS][16];
 extern const uint32_t ff_aac_scalefactor_code[121];
 extern const uint8_t  ff_aac_scalefactor_bits[121];
 
+av_warn_unused_result
 int ff_wma_init(AVCodecContext *avctx, int flags2);
+
 int ff_wma_total_gain_to_bits(int total_gain);
 int ff_wma_end(AVCodecContext *avctx);
 unsigned int ff_wma_get_large_val(GetBitContext *gb);
diff --git a/libavcodec/wma_freqs.h b/libavcodec/wma_freqs.h
index 85c5f697..6fd93e4e 100644
--- a/libavcodec/wma_freqs.h
+++ b/libavcodec/wma_freqs.h
@@ -23,4 +23,4 @@
 
 extern const uint16_t ff_wma_critical_freqs[25];
 
-#endif /* AVCODEC_WMA_FREQS */
+#endif /* AVCODEC_WMA_FREQS_H */
diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c
index d60cf1fe..1a843237 100644
--- a/libavcodec/wmadec.c
+++ b/libavcodec/wmadec.c
@@ -34,6 +34,8 @@
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavutil/libm.h"
 
 #include "avcodec.h"
 #include "internal.h"
@@ -163,7 +165,7 @@ static av_cold void wma_lsp_to_curve_init(WMACodecContext *s, int frame_len)
     /* tables for x^-0.25 computation */
     for (i = 0; i < 256; i++) {
         e                     = i - 126;
-        s->lsp_pow_e_table[i] = pow(2.0, e * -0.25);
+        s->lsp_pow_e_table[i] = exp2f(e * -0.25);
     }
 
     /* NOTE: these two tables are needed to avoid two operations in
@@ -172,7 +174,7 @@ static av_cold void wma_lsp_to_curve_init(WMACodecContext *s, int frame_len)
     for (i = (1 << LSP_POW_BITS) - 1; i >= 0; i--) {
         m                      = (1 << LSP_POW_BITS) + i;
         a                      = (float) m * (0.5 / (1 << LSP_POW_BITS));
-        a                      = pow(a, -0.25);
+        a                      = 1/sqrt(sqrt(a));
         s->lsp_pow_m_table1[i] = 2 * a - b;
         s->lsp_pow_m_table2[i] = b - a;
         b                      = a;
@@ -626,7 +628,7 @@ static int wma_decode_block(WMACodecContext *s)
             coefs1    = s->coefs1[ch];
             exponents = s->exponents[ch];
             esize     = s->exponents_bsize[ch];
-            mult      = pow(10, total_gain * 0.05) / s->max_exponent[ch];
+            mult      = ff_exp10(total_gain * 0.05) / s->max_exponent[ch];
             mult     *= mdct_norm;
             coefs     = s->coefs[ch];
             if (s->use_noise_coding) {
@@ -674,7 +676,7 @@ static int wma_decode_block(WMACodecContext *s)
                         /* use noise with specified power */
                         mult1 = sqrt(exp_power[j] / exp_power[last_high_band]);
                         /* XXX: use a table */
-                        mult1  = mult1 * pow(10, s->high_band_values[ch][j] * 0.05);
+                        mult1  = mult1 * ff_exp10(s->high_band_values[ch][j] * 0.05);
                         mult1  = mult1 / (s->max_exponent[ch] * s->noise_mult);
                         mult1 *= mdct_norm;
                         for (i = 0; i < n; i++) {
@@ -854,7 +856,7 @@ static int wma_decode_superframe(AVCodecContext *avctx, void *data,
                 *q++ = get_bits (&s->gb, 8);
                 len --;
             }
-            memset(q, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+            memset(q, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
             s->last_superframe_len += 8*buf_size - 8;
 //             s->reset_block_lengths = 1; //XXX is this needed ?
@@ -893,7 +895,7 @@ static int wma_decode_superframe(AVCodecContext *avctx, void *data,
             }
             if (len > 0)
                 *q++ = (get_bits) (&s->gb, len) << (8 - len);
-            memset(q, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+            memset(q, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
             /* XXX: bit_offset bits into last frame */
             init_get_bits(&s->gb, s->last_superframe,
@@ -977,7 +979,7 @@ AVCodec ff_wmav1_decoder = {
     .close          = ff_wma_end,
     .decode         = wma_decode_superframe,
     .flush          = flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
@@ -993,7 +995,7 @@ AVCodec ff_wmav2_decoder = {
     .close          = ff_wma_end,
     .decode         = wma_decode_superframe,
     .flush          = flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c
index b922acde..d0727e69 100644
--- a/libavcodec/wmaenc.c
+++ b/libavcodec/wmaenc.c
@@ -20,6 +20,7 @@
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/internal.h"
 
 #include "avcodec.h"
 #include "internal.h"
@@ -32,6 +33,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     WMACodecContext *s = avctx->priv_data;
     int i, flags1, flags2, block_align;
     uint8_t *extradata;
+    int ret;
 
     s->avctx = avctx;
 
@@ -50,8 +52,8 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
     if (avctx->bit_rate < 24 * 1000) {
         av_log(avctx, AV_LOG_ERROR,
-               "bitrate too low: got %i, need 24000 or higher\n",
-               avctx->bit_rate);
+               "bitrate too low: got %"PRId64", need 24000 or higher\n",
+               (int64_t)avctx->bit_rate);
         return AVERROR(EINVAL);
     }
 
@@ -82,7 +84,8 @@ static av_cold int encode_init(AVCodecContext *avctx)
     if (avctx->channels == 2)
         s->ms_stereo = 1;
 
-    ff_wma_init(avctx, flags2);
+    if ((ret = ff_wma_init(avctx, flags2)) < 0)
+        return ret;
 
     /* init MDCT */
     for (i = 0; i < s->nb_block_sizes; i++)
@@ -97,7 +100,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static void apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame)
+static int apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame)
 {
     WMACodecContext *s = avctx->priv_data;
     float **audio      = (float **) frame->extended_data;
@@ -116,7 +119,13 @@ static void apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame)
                                     win, len);
         s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
         mdct->mdct_calc(mdct, s->coefs[ch], s->output);
+        if (!isfinite(s->coefs[ch][0])) {
+            av_log(avctx, AV_LOG_ERROR, "Input contains NaN/+-Inf\n");
+            return AVERROR(EINVAL);
+        }
     }
+
+    return 0;
 }
 
 // FIXME use for decoding too
@@ -132,7 +141,7 @@ static void init_exp(WMACodecContext *s, int ch, const int *exp_param)
     max_scale = 0;
     while (q < q_end) {
         /* XXX: use a table */
-        v         = pow(10, *exp_param++ *(1.0 / 16.0));
+        v         = ff_exp10(*exp_param++ *(1.0 / 16.0));
         max_scale = FFMAX(max_scale, v);
         n         = *ptr++;
         do {
@@ -227,7 +236,7 @@ static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
 
             coefs1    = s->coefs1[ch];
             exponents = s->exponents[ch];
-            mult      = pow(10, total_gain * 0.05) / s->max_exponent[ch];
+            mult      = ff_exp10(total_gain * 0.05) / s->max_exponent[ch];
             mult     *= mdct_norm;
             coefs     = src_coefs[ch];
             if (s->use_noise_coding && 0) {
@@ -363,7 +372,10 @@ static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
     s->block_len_bits = s->frame_len_bits; // required by non variable block len
     s->block_len      = 1 << s->block_len_bits;
 
-    apply_window_and_mdct(avctx, frame);
+    ret = apply_window_and_mdct(avctx, frame);
+
+    if (ret < 0)
+        return ret;
 
     if (s->ms_stereo) {
         float a, b;
@@ -377,7 +389,7 @@ static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
         }
     }
 
-    if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE, 0)) < 0)
         return ret;
 
     total_gain = 128;
diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index 0c85c839..8a5ffb86 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -72,7 +72,7 @@ typedef struct WmallDecodeCtx {
     AVCodecContext  *avctx;
     AVFrame         *frame;
     LLAudDSPContext dsp;                           ///< accelerated DSP functions
-    uint8_t         frame_data[MAX_FRAMESIZE + FF_INPUT_BUFFER_PADDING_SIZE];  ///< compressed frame data
+    uint8_t         frame_data[MAX_FRAMESIZE + AV_INPUT_BUFFER_PADDING_SIZE];  ///< compressed frame data
     PutBitContext   pb;                             ///< context for filling the frame_data buffer
 
     /* frame size dependent frame information (set during initialization) */
@@ -201,6 +201,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
         if (s->bits_per_sample == 16)
             avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
         else if (s->bits_per_sample == 24) {
+            av_log(avctx, AV_LOG_WARNING, "Decoding audio at 24 bit-depth\n");
             avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
             avctx->bits_per_raw_sample = 24;
         } else {
@@ -421,8 +422,7 @@ static void decode_ac_filter(WmallDecodeCtx *s)
     s->acfilter_scaling = get_bits(&s->gb, 4);
 
     for (i = 0; i < s->acfilter_order; i++)
-        s->acfilter_coeffs[i] = (s->acfilter_scaling ?
-                                 get_bits(&s->gb, s->acfilter_scaling) : 0) + 1;
+        s->acfilter_coeffs[i] = get_bitsz(&s->gb, s->acfilter_scaling) + 1;
 }
 
 static void decode_mclms(WmallDecodeCtx *s)
@@ -435,7 +435,7 @@ static void decode_mclms(WmallDecodeCtx *s)
         if (1 << cbits < s->mclms_scaling + 1)
             cbits++;
 
-        send_coef_bits = (cbits ? get_bits(&s->gb, cbits) : 0) + 2;
+        send_coef_bits = get_bitsz(&s->gb, cbits) + 2;
 
         for (i = 0; i < s->mclms_order * s->num_channels * s->num_channels; i++)
             s->mclms_coeffs[i] = get_bits(&s->gb, send_coef_bits);
@@ -488,7 +488,7 @@ static int decode_cdlms(WmallDecodeCtx *s)
                 if ((1 << cbits) < s->cdlms[c][i].scaling + 1)
                     cbits++;
 
-                s->cdlms[c][i].bitsend = (cbits ? get_bits(&s->gb, cbits) : 0) + 2;
+                s->cdlms[c][i].bitsend = get_bitsz(&s->gb, cbits) + 2;
                 shift_l = 32 - s->cdlms[c][i].bitsend;
                 shift_r = 32 - s->cdlms[c][i].scaling - 2;
                 for (j = 0; j < s->cdlms[c][i].coefsend; j++)
@@ -1299,7 +1299,7 @@ AVCodec ff_wmalossless_decoder = {
     .close          = decode_close,
     .decode         = decode_packet,
     .flush          = flush,
-    .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_S32P,
                                                       AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c
index c319b398..2ea59e92 100644
--- a/libavcodec/wmaprodec.c
+++ b/libavcodec/wmaprodec.c
@@ -89,6 +89,7 @@
 #include <inttypes.h>
 
 #include "libavutil/float_dsp.h"
+#include "libavutil/internal.h"
 #include "libavutil/intfloat.h"
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
@@ -173,7 +174,7 @@ typedef struct WMAProDecodeCtx {
     AVCodecContext*  avctx;                         ///< codec context for av_log
     AVFloatDSPContext *fdsp;
     uint8_t          frame_data[MAX_FRAMESIZE +
-                      FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
+                      AV_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
     PutBitContext    pb;                            ///< context for filling the frame_data buffer
     FFTContext       mdct_ctx[WMAPRO_BLOCK_SIZES];  ///< MDCT context per block size
     DECLARE_ALIGNED(32, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
@@ -206,9 +207,11 @@ typedef struct WMAProDecodeCtx {
     int              subframe_offset;               ///< subframe offset in the bit reservoir
     uint8_t          packet_loss;                   ///< set in case of bitstream error
     uint8_t          packet_done;                   ///< set when a packet is fully decoded
+    uint8_t          skip_packets;
 
     /* frame decode state */
     uint32_t         frame_num;                     ///< current frame number (not used for decoding)
+    int              num_frames;
     GetBitContext    gb;                            ///< bitstream reader context
     int              buf_bit_size;                  ///< buffer size in bits
     uint8_t          drc_gain;                      ///< gain for the DRC tool
@@ -268,6 +271,21 @@ static av_cold int decode_end(AVCodecContext *avctx)
     return 0;
 }
 
+static av_cold int get_rate(AVCodecContext *avctx)
+{
+    if (avctx->codec_id != AV_CODEC_ID_WMAPRO) { // XXX: is this really only for XMA?
+        if (avctx->sample_rate > 44100)
+            return 48000;
+        else if (avctx->sample_rate > 32000)
+            return 44100;
+        else if (avctx->sample_rate > 24000)
+            return 32000;
+        return 24000;
+    }
+
+    return avctx->sample_rate;
+}
+
 /**
  *@brief Initialize the decoder.
  *@param avctx codec context
@@ -282,13 +300,16 @@ static av_cold int decode_init(AVCodecContext *avctx)
     int log2_max_num_subframes;
     int num_possible_block_sizes;
 
+    if (avctx->codec_id == AV_CODEC_ID_XMA1 || avctx->codec_id == AV_CODEC_ID_XMA2)
+        avctx->block_align = 2048;
+
     if (!avctx->block_align) {
         av_log(avctx, AV_LOG_ERROR, "block_align is not set\n");
         return AVERROR(EINVAL);
     }
 
     s->avctx = avctx;
-    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
+    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
     if (!s->fdsp)
         return AVERROR(ENOMEM);
 
@@ -296,10 +317,34 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
 
-    if (avctx->extradata_size >= 18) {
+    if (avctx->codec_id == AV_CODEC_ID_XMA2 && avctx->extradata_size >= 34) {
+        s->decode_flags    = 0x10d6;
+        channel_mask       = AV_RL32(edata_ptr+2);
+        s->bits_per_sample = 16;
+        /** dump the extradata */
+        for (i = 0; i < avctx->extradata_size; i++)
+            ff_dlog(avctx, "[%x] ", avctx->extradata[i]);
+        ff_dlog(avctx, "\n");
+
+     } else if (avctx->codec_id == AV_CODEC_ID_XMA1 && avctx->extradata_size >= 28) {
+        s->decode_flags    = 0x10d6;
+        s->bits_per_sample = 16;
+        channel_mask       = 0;
+        /** dump the extradata */
+        for (i = 0; i < avctx->extradata_size; i++)
+            ff_dlog(avctx, "[%x] ", avctx->extradata[i]);
+        ff_dlog(avctx, "\n");
+
+     } else if (avctx->extradata_size >= 18) {
         s->decode_flags    = AV_RL16(edata_ptr+14);
         channel_mask       = AV_RL32(edata_ptr+2);
         s->bits_per_sample = AV_RL16(edata_ptr);
+
+        if (s->bits_per_sample > 32 || s->bits_per_sample < 1) {
+            avpriv_request_sample(avctx, "bits per sample is %d", s->bits_per_sample);
+            return AVERROR_PATCHWELCOME;
+        }
+
         /** dump the extradata */
         for (i = 0; i < avctx->extradata_size; i++)
             ff_dlog(avctx, "[%x] ", avctx->extradata[i]);
@@ -310,6 +355,11 @@ static av_cold int decode_init(AVCodecContext *avctx)
         return AVERROR_PATCHWELCOME;
     }
 
+    if (avctx->codec_id != AV_CODEC_ID_WMAPRO && avctx->channels > 2) {
+        avpriv_report_missing_feature(avctx, ">2 channels support");
+        return AVERROR_PATCHWELCOME;
+    }
+
     /** generic init */
     s->log2_frame_size = av_log2(avctx->block_align) + 4;
     if (s->log2_frame_size > 25) {
@@ -318,17 +368,25 @@ static av_cold int decode_init(AVCodecContext *avctx)
     }
 
     /** frame info */
-    s->skip_frame  = 1; /* skip first frame */
+    if (avctx->codec_id != AV_CODEC_ID_WMAPRO)
+        s->skip_frame = 0;
+    else
+        s->skip_frame = 1; /* skip first frame */
+
     s->packet_loss = 1;
     s->len_prefix  = (s->decode_flags & 0x40);
 
     /** get frame len */
-    bits = ff_wma_get_frame_len_bits(avctx->sample_rate, 3, s->decode_flags);
-    if (bits > WMAPRO_BLOCK_MAX_BITS) {
-        avpriv_request_sample(avctx, "14-bit block sizes");
-        return AVERROR_PATCHWELCOME;
+    if (avctx->codec_id == AV_CODEC_ID_WMAPRO) {
+        bits = ff_wma_get_frame_len_bits(avctx->sample_rate, 3, s->decode_flags);
+        if (bits > WMAPRO_BLOCK_MAX_BITS) {
+            avpriv_request_sample(avctx, "14-bit block sizes");
+            return AVERROR_PATCHWELCOME;
+        }
+        s->samples_per_frame = 1 << bits;
+    } else {
+        s->samples_per_frame = 512;
     }
-    s->samples_per_frame = 1 << bits;
 
     /** subframe info */
     log2_max_num_subframes       = ((s->decode_flags & 0x38) >> 3);
@@ -417,12 +475,12 @@ static av_cold int decode_init(AVCodecContext *avctx)
         int subframe_len = s->samples_per_frame >> i;
         int x;
         int band = 1;
+        int rate = get_rate(avctx);
 
         s->sfb_offsets[i][0] = 0;
 
         for (x = 0; x < MAX_BANDS-1 && s->sfb_offsets[i][band - 1] < subframe_len; x++) {
-            int offset = (subframe_len * 2 * critical_freq[x])
-                          / s->avctx->sample_rate + 2;
+            int offset = (subframe_len * 2 * critical_freq[x]) / rate + 2;
             offset &= ~3;
             if (offset > s->sfb_offsets[i][band - 1])
                 s->sfb_offsets[i][band++] = offset;
@@ -477,7 +535,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     /** calculate subwoofer cutoff values */
     for (i = 0; i < num_possible_block_sizes; i++) {
         int block_size = s->samples_per_frame >> i;
-        int cutoff = (440*block_size + 3 * (s->avctx->sample_rate >> 1) - 1)
+        int cutoff = (440*block_size + 3LL * (s->avctx->sample_rate >> 1) - 1)
                      / s->avctx->sample_rate;
         s->subwoofer_cutoffs[i] = av_clip(cutoff, 4, block_size);
     }
@@ -1164,7 +1222,7 @@ static int decode_subframe(WMAProDecodeCtx *s)
         int num_fill_bits;
         if (!(num_fill_bits = get_bits(&s->gb, 2))) {
             int len = get_bits(&s->gb, 4);
-            num_fill_bits = (len ? get_bits(&s->gb, len) : 0) + 1;
+            num_fill_bits = get_bitsz(&s->gb, len) + 1;
         }
 
         if (num_fill_bits >= 0) {
@@ -1293,7 +1351,7 @@ static int decode_subframe(WMAProDecodeCtx *s)
                 const int exp = s->channel[c].quant_step -
                             (s->channel[c].max_scale_factor - *sf++) *
                             s->channel[c].scale_factor_step;
-                const float quant = pow(10.0, exp / 20.0);
+                const float quant = ff_exp10(exp / 20.0);
                 int start = s->cur_sfb_offsets[b];
                 s->fdsp->vector_fmul_scalar(s->tmp + start,
                                            s->channel[c].coeffs + start,
@@ -1535,32 +1593,52 @@ static int decode_packet(AVCodecContext *avctx, void *data,
 
     *got_frame_ptr = 0;
 
+    if (s->skip_packets > 0) {
+        s->skip_packets--;
+        return FFMIN(avpkt->size, avctx->block_align);
+    }
+
     if (s->packet_done || s->packet_loss) {
         s->packet_done = 0;
 
         /** sanity check for the buffer length */
-        if (buf_size < avctx->block_align) {
+        if (avctx->codec_id == AV_CODEC_ID_WMAPRO && buf_size < avctx->block_align) {
             av_log(avctx, AV_LOG_ERROR, "Input packet too small (%d < %d)\n",
                    buf_size, avctx->block_align);
             return AVERROR_INVALIDDATA;
         }
 
-        s->next_packet_start = buf_size - avctx->block_align;
-        buf_size = avctx->block_align;
+        if (avctx->codec_id == AV_CODEC_ID_WMAPRO) {
+            s->next_packet_start = buf_size - avctx->block_align;
+            buf_size = avctx->block_align;
+        } else {
+            s->next_packet_start = buf_size - FFMIN(buf_size, avctx->block_align);
+            buf_size = FFMIN(buf_size, avctx->block_align);
+        }
         s->buf_bit_size = buf_size << 3;
 
         /** parse packet header */
         init_get_bits(gb, buf, s->buf_bit_size);
-        packet_sequence_number = get_bits(gb, 4);
-        skip_bits(gb, 2);
+        if (avctx->codec_id != AV_CODEC_ID_XMA2) {
+            packet_sequence_number = get_bits(gb, 4);
+            skip_bits(gb, 2);
+        } else {
+            s->num_frames = get_bits(gb, 6);
+            packet_sequence_number = 0;
+        }
 
         /** get number of bits that need to be added to the previous frame */
         num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
+        if (avctx->codec_id != AV_CODEC_ID_WMAPRO) {
+            skip_bits(gb, 3);
+            s->skip_packets = get_bits(gb, 8);
+        }
+
         ff_dlog(avctx, "packet[%d]: nbpf %x\n", avctx->frame_number,
                 num_bits_prev_frame);
 
         /** check for packet loss */
-        if (!s->packet_loss &&
+        if (avctx->codec_id != AV_CODEC_ID_XMA2 && !s->packet_loss &&
             ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
             s->packet_loss = 1;
             av_log(avctx, AV_LOG_ERROR,
@@ -1623,6 +1701,11 @@ static int decode_packet(AVCodecContext *avctx, void *data,
             s->packet_done = 1;
     }
 
+    if (remaining_bits(s, gb) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Overread %d\n", -remaining_bits(s, gb));
+        s->packet_loss = 1;
+    }
+
     if (s->packet_done && !s->packet_loss &&
         remaining_bits(s, gb) > 0) {
         /** save the rest of the data so that it can be decoded
@@ -1666,7 +1749,37 @@ AVCodec ff_wmapro_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_packet,
-    .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
+    .flush          = flush,
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_NONE },
+};
+
+AVCodec ff_xma1_decoder = {
+    .name           = "xma1",
+    .long_name      = NULL_IF_CONFIG_SMALL("Xbox Media Audio 1"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_XMA1,
+    .priv_data_size = sizeof(WMAProDecodeCtx),
+    .init           = decode_init,
+    .close          = decode_end,
+    .decode         = decode_packet,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
+    .flush          = flush,
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_NONE },
+};
+
+AVCodec ff_xma2_decoder = {
+    .name           = "xma2",
+    .long_name      = NULL_IF_CONFIG_SMALL("Xbox Media Audio 2"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_XMA2,
+    .priv_data_size = sizeof(WMAProDecodeCtx),
+    .init           = decode_init,
+    .close          = decode_end,
+    .decode         = decode_packet,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
     .flush          = flush,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index fff1aa87..029dfdd8 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -203,7 +203,7 @@ typedef struct WMAVoiceContext {
                                   ///< to #wmavoice_decode_packet() (since
                                   ///< they're part of the previous superframe)
 
-    uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
+    uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + AV_INPUT_BUFFER_PADDING_SIZE];
                                   ///< cache for superframe data split over
                                   ///< multiple packets
     int sframe_cache_size;        ///< set to >0 if we have data from an
@@ -2084,6 +2084,6 @@ AVCodec ff_wmavoice_decoder = {
     .init_static_data = wmavoice_init_static_data,
     .close            = wmavoice_decode_end,
     .decode           = wmavoice_decode_packet,
-    .capabilities     = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
+    .capabilities     = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
     .flush            = wmavoice_flush,
 };
diff --git a/libavcodec/wmv2.c b/libavcodec/wmv2.c
index 0ebe02db..9c3acbcd 100644
--- a/libavcodec/wmv2.c
+++ b/libavcodec/wmv2.c
@@ -89,7 +89,7 @@ void ff_wmv2_add_mb(MpegEncContext *s, int16_t block1[6][64],
     wmv2_add_block(w, block1[2], dest_y + 8 * s->linesize,     s->linesize, 2);
     wmv2_add_block(w, block1[3], dest_y + 8 + 8 * s->linesize, s->linesize, 3);
 
-    if (s->avctx->flags & CODEC_FLAG_GRAY)
+    if (s->avctx->flags & AV_CODEC_FLAG_GRAY)
         return;
 
     wmv2_add_block(w, block1[4], dest_cb, s->uvlinesize, 4);
@@ -141,7 +141,7 @@ void ff_mspel_motion(MpegEncContext *s, uint8_t *dest_y,
     w->wdsp.put_mspel_pixels_tab[dxy](dest_y     + 8 * linesize, ptr     + 8 * linesize, linesize);
     w->wdsp.put_mspel_pixels_tab[dxy](dest_y + 8 + 8 * linesize, ptr + 8 + 8 * linesize, linesize);
 
-    if (s->avctx->flags & CODEC_FLAG_GRAY)
+    if (s->avctx->flags & AV_CODEC_FLAG_GRAY)
         return;
 
     dxy = 0;
diff --git a/libavcodec/wmv2dec.c b/libavcodec/wmv2dec.c
index d2b9b130..99c95d39 100644
--- a/libavcodec/wmv2dec.c
+++ b/libavcodec/wmv2dec.c
@@ -108,7 +108,7 @@ static int decode_ext_header(Wmv2Context *w)
 
     if (s->avctx->debug & FF_DEBUG_PICT_INFO)
         av_log(s->avctx, AV_LOG_DEBUG,
-               "fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, "
+               "fps:%d, br:%"PRId64", qpbit:%d, abt_flag:%d, j_type_bit:%d, "
                "tl_mv_flag:%d, mbrl_bit:%d, code:%d, loop_filter:%d, "
                "slices:%d\n",
                fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit,
@@ -453,7 +453,9 @@ static av_cold int wmv2_decode_init(AVCodecContext *avctx)
     Wmv2Context *const w = avctx->priv_data;
     int ret;
 
+#if FF_API_EMU_EDGE
     avctx->flags |= CODEC_FLAG_EMU_EDGE;
+#endif
 
     if ((ret = ff_msmpeg4_decode_init(avctx)) < 0)
         return ret;
@@ -482,7 +484,7 @@ AVCodec ff_wmv2_decoder = {
     .init           = wmv2_decode_init,
     .close          = wmv2_decode_end,
     .decode         = ff_h263_decode_frame,
-    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND | AV_CODEC_CAP_DR1,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P,
                                                      AV_PIX_FMT_NONE },
 };
diff --git a/libavcodec/wmv2enc.c b/libavcodec/wmv2enc.c
index 55ee089e..3ed8b5fb 100644
--- a/libavcodec/wmv2enc.c
+++ b/libavcodec/wmv2enc.c
@@ -62,9 +62,10 @@ static av_cold int wmv2_encode_init(AVCodecContext *avctx)
     ff_wmv2_common_init(w);
 
     avctx->extradata_size = 4;
-    avctx->extradata      = av_mallocz(avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    avctx->extradata      = av_mallocz(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!avctx->extradata)
         return AVERROR(ENOMEM);
+
     encode_ext_header(w);
 
     return 0;
@@ -213,7 +214,12 @@ void ff_wmv2_encode_mb(MpegEncContext *s, int16_t block[6][64],
         s->p_tex_bits += get_bits_diff(s);
 }
 
-FF_MPV_GENERIC_CLASS(wmv2)
+static const AVClass wmv2_class = {
+    .class_name = "wmv2 encoder",
+    .item_name  = av_default_item_name,
+    .option     = ff_mpv_generic_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_wmv2_encoder = {
     .name           = "wmv2",
@@ -221,10 +227,10 @@ AVCodec ff_wmv2_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_WMV2,
     .priv_data_size = sizeof(Wmv2Context),
+    .priv_class     = &wmv2_class,
     .init           = wmv2_encode_init,
     .encode2        = ff_mpv_encode_picture,
     .close          = ff_mpv_encode_end,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P,
                                                      AV_PIX_FMT_NONE },
-    .priv_class     = &wmv2_class,
 };
diff --git a/libavcodec/wnv1.c b/libavcodec/wnv1.c
index fb9e9cfe..9ff99b2f 100644
--- a/libavcodec/wnv1.c
+++ b/libavcodec/wnv1.c
@@ -73,12 +73,12 @@ static int decode_frame(AVCodecContext *avctx,
         return AVERROR_INVALIDDATA;
     }
 
-    rbuf = av_malloc(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    rbuf = av_malloc(buf_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!rbuf) {
         av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer\n");
         return AVERROR(ENOMEM);
     }
-    memset(rbuf + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(rbuf + buf_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
         av_free(rbuf);
@@ -155,5 +155,5 @@ AVCodec ff_wnv1_decoder = {
     .priv_data_size = sizeof(WNV1Context),
     .init           = decode_init,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/wrapped_avframe.c b/libavcodec/wrapped_avframe.c
new file mode 100644
index 00000000..13c8d8a2
--- /dev/null
+++ b/libavcodec/wrapped_avframe.c
@@ -0,0 +1,73 @@
+/*
+ * AVFrame wrapper
+ * Copyright (c) 2015 Luca Barbato
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Simple wrapper to store an AVFrame and forward it as AVPacket.
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+
+#include "libavutil/internal.h"
+#include "libavutil/frame.h"
+#include "libavutil/buffer.h"
+#include "libavutil/pixdesc.h"
+
+static void wrapped_avframe_release_buffer(void *unused, uint8_t *data)
+{
+    AVFrame *frame = (AVFrame *)data;
+
+    av_frame_free(&frame);
+}
+
+static int wrapped_avframe_encode(AVCodecContext *avctx, AVPacket *pkt,
+                     const AVFrame *frame, int *got_packet)
+{
+    AVFrame *wrapped = av_frame_clone(frame);
+
+    if (!wrapped)
+        return AVERROR(ENOMEM);
+
+    pkt->buf = av_buffer_create((uint8_t *)wrapped, sizeof(*wrapped),
+                                wrapped_avframe_release_buffer, NULL,
+                                AV_BUFFER_FLAG_READONLY);
+    if (!pkt->buf) {
+        av_frame_free(&wrapped);
+        return AVERROR(ENOMEM);
+    }
+
+    pkt->data = (uint8_t *)wrapped;
+    pkt->size = sizeof(*wrapped);
+
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+    return 0;
+}
+
+AVCodec ff_wrapped_avframe_encoder = {
+    .name           = "wrapped_avframe",
+    .long_name      = NULL_IF_CONFIG_SMALL("AVFrame to AVPacket passthrough"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_WRAPPED_AVFRAME,
+    .encode2        = wrapped_avframe_encode,
+    .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
+};
diff --git a/libavcodec/ws-snd1.c b/libavcodec/ws-snd1.c
index 6929cbf5..0f005807 100644
--- a/libavcodec/ws-snd1.c
+++ b/libavcodec/ws-snd1.c
@@ -177,5 +177,5 @@ AVCodec ff_ws_snd1_decoder = {
     .id             = AV_CODEC_ID_WESTWOOD_SND1,
     .init           = ws_snd_decode_init,
     .decode         = ws_snd_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 87985f27..668a9bef 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -6,10 +6,11 @@ OBJS-$(CONFIG_AUDIODSP)                += x86/audiodsp_init.o
 OBJS-$(CONFIG_BLOCKDSP)                += x86/blockdsp_init.o
 OBJS-$(CONFIG_BSWAPDSP)                += x86/bswapdsp_init.o
 OBJS-$(CONFIG_DCT)                     += x86/dct_init.o
+OBJS-$(CONFIG_DIRAC_DECODER)           += x86/diracdsp_init.o           \
+                                          x86/dirac_dwt_init.o
 OBJS-$(CONFIG_FDCTDSP)                 += x86/fdctdsp_init.o
 OBJS-$(CONFIG_FFT)                     += x86/fft_init.o
-OBJS-$(CONFIG_FLAC_DECODER)            += x86/flacdsp_init.o
-OBJS-$(CONFIG_FLAC_ENCODER)            += x86/flacdsp_init.o
+OBJS-$(CONFIG_FLACDSP)                 += x86/flacdsp_init.o
 OBJS-$(CONFIG_FMTCONVERT)              += x86/fmtconvert_init.o
 OBJS-$(CONFIG_H263DSP)                 += x86/h263dsp_init.o
 OBJS-$(CONFIG_H264CHROMA)              += x86/h264chroma_init.o
@@ -31,28 +32,32 @@ OBJS-$(CONFIG_MPEGVIDEOENC)            += x86/mpegvideoenc.o           \
                                           x86/mpegvideoencdsp_init.o
 OBJS-$(CONFIG_PIXBLOCKDSP)             += x86/pixblockdsp_init.o
 OBJS-$(CONFIG_QPELDSP)                 += x86/qpeldsp_init.o
+OBJS-$(CONFIG_RV34DSP)                 += x86/rv34dsp_init.o
 OBJS-$(CONFIG_VIDEODSP)                += x86/videodsp_init.o
 OBJS-$(CONFIG_VP3DSP)                  += x86/vp3dsp_init.o
+OBJS-$(CONFIG_VP8DSP)                  += x86/vp8dsp_init.o
 OBJS-$(CONFIG_XMM_CLOBBER_TEST)        += x86/w64xmmtest.o
 
 # decoders/encoders
-OBJS-$(CONFIG_AAC_DECODER)             += x86/sbrdsp_init.o
+OBJS-$(CONFIG_AAC_DECODER)             += x86/aacpsdsp_init.o          \
+                                          x86/sbrdsp_init.o
 OBJS-$(CONFIG_ADPCM_G722_DECODER)      += x86/g722dsp_init.o
 OBJS-$(CONFIG_ADPCM_G722_ENCODER)      += x86/g722dsp_init.o
+OBJS-$(CONFIG_ALAC_DECODER)            += x86/alacdsp_init.o
 OBJS-$(CONFIG_APNG_DECODER)            += x86/pngdsp_init.o
 OBJS-$(CONFIG_CAVS_DECODER)            += x86/cavsdsp.o
-OBJS-$(CONFIG_DCA_DECODER)             += x86/dcadsp_init.o
+OBJS-$(CONFIG_DCA_DECODER)             += x86/dcadsp_init.o x86/synth_filter_init.o
 OBJS-$(CONFIG_DNXHD_ENCODER)           += x86/dnxhdenc_init.o
 OBJS-$(CONFIG_HEVC_DECODER)            += x86/hevcdsp_init.o
+OBJS-$(CONFIG_JPEG2000_DECODER)        += x86/jpeg2000dsp_init.o
 OBJS-$(CONFIG_MLP_DECODER)             += x86/mlpdsp_init.o
 OBJS-$(CONFIG_MPEG4_DECODER)           += x86/xvididct_init.o
 OBJS-$(CONFIG_PNG_DECODER)             += x86/pngdsp_init.o
 OBJS-$(CONFIG_PRORES_DECODER)          += x86/proresdsp_init.o
 OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += x86/proresdsp_init.o
-OBJS-$(CONFIG_RV30_DECODER)            += x86/rv34dsp_init.o
-OBJS-$(CONFIG_RV40_DECODER)            += x86/rv34dsp_init.o            \
-                                          x86/rv40dsp_init.o
+OBJS-$(CONFIG_RV40_DECODER)            += x86/rv40dsp_init.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc_init.o
+OBJS-$(CONFIG_TAK_DECODER)             += x86/takdsp_init.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp_init.o
 OBJS-$(CONFIG_TTA_DECODER)             += x86/ttadsp_init.o
 OBJS-$(CONFIG_V210_DECODER)            += x86/v210-init.o
@@ -60,15 +65,15 @@ OBJS-$(CONFIG_V210_ENCODER)            += x86/v210enc_init.o
 OBJS-$(CONFIG_VC1_DECODER)             += x86/vc1dsp_init.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += x86/vorbisdsp_init.o
 OBJS-$(CONFIG_VP6_DECODER)             += x86/vp6dsp_init.o
-OBJS-$(CONFIG_VP7_DECODER)             += x86/vp8dsp_init.o
-OBJS-$(CONFIG_VP8_DECODER)             += x86/vp8dsp_init.o
-OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o
+OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o            \
+                                          x86/vp9dsp_init_10bpp.o      \
+                                          x86/vp9dsp_init_12bpp.o      \
+                                          x86/vp9dsp_init_16bpp.o
 OBJS-$(CONFIG_WEBP_DECODER)            += x86/vp8dsp_init.o
 
 
 # GCC inline assembly optimizations
 # subsystems
-MMX-OBJS-$(CONFIG_DIRAC_DECODER)       += x86/dirac_dwt.o
 MMX-OBJS-$(CONFIG_FDCTDSP)             += x86/fdct.o
 MMX-OBJS-$(CONFIG_IDCTDSP)             += x86/simple_idct.o
 
@@ -77,24 +82,13 @@ MMX-OBJS-$(CONFIG_SNOW_DECODER)        += x86/snowdsp.o
 MMX-OBJS-$(CONFIG_SNOW_ENCODER)        += x86/snowdsp.o
 MMX-OBJS-$(CONFIG_VC1_DECODER)         += x86/vc1dsp_mmx.o
 
-
-# YASM optimizations
-YASM-OBJS                              += x86/deinterlace.o             \
-
 # subsystems
 YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
 YASM-OBJS-$(CONFIG_AUDIODSP)           += x86/audiodsp.o
 YASM-OBJS-$(CONFIG_BLOCKDSP)           += x86/blockdsp.o
 YASM-OBJS-$(CONFIG_BSWAPDSP)           += x86/bswapdsp.o
 YASM-OBJS-$(CONFIG_DCT)                += x86/dct32.o
-YASM-OBJS-$(CONFIG_DIRAC_DECODER)      += x86/diracdsp_mmx.o x86/diracdsp_yasm.o\
-                                          x86/dwt_yasm.o
-YASM-OBJS-$(CONFIG_DNXHD_ENCODER)      += x86/dnxhdenc.o
 YASM-OBJS-$(CONFIG_FFT)                += x86/fft.o
-YASM-OBJS-$(CONFIG_FLAC_DECODER)       += x86/flacdsp.o
-ifdef CONFIG_GPL
-YASM-OBJS-$(CONFIG_FLAC_ENCODER)       += x86/flac_dsp_gpl.o
-endif
 YASM-OBJS-$(CONFIG_FMTCONVERT)         += x86/fmtconvert.o
 YASM-OBJS-$(CONFIG_H263DSP)            += x86/h263_loopfilter.o
 YASM-OBJS-$(CONFIG_H264CHROMA)         += x86/h264_chromamc.o           \
@@ -114,6 +108,7 @@ YASM-OBJS-$(CONFIG_H264QPEL)           += x86/h264_qpel_8bit.o          \
 YASM-OBJS-$(CONFIG_HPELDSP)            += x86/fpel.o                    \
                                           x86/hpeldsp.o
 YASM-OBJS-$(CONFIG_HUFFYUVDSP)         += x86/huffyuvdsp.o
+YASM-OBJS-$(CONFIG_HUFFYUVENCDSP)      += x86/huffyuvencdsp.o
 YASM-OBJS-$(CONFIG_IDCTDSP)            += x86/idctdsp.o
 YASM-OBJS-$(CONFIG_LLAUDDSP)           += x86/lossless_audiodsp.o
 YASM-OBJS-$(CONFIG_LLVIDDSP)           += x86/lossless_videodsp.o
@@ -124,29 +119,43 @@ YASM-OBJS-$(CONFIG_PIXBLOCKDSP)        += x86/pixblockdsp.o
 YASM-OBJS-$(CONFIG_QPELDSP)            += x86/qpeldsp.o                 \
                                           x86/fpel.o                    \
                                           x86/qpel.o
+YASM-OBJS-$(CONFIG_RV34DSP)            += x86/rv34dsp.o
+YASM-OBJS-$(CONFIG_IDCTDSP)            += x86/simple_idct10.o
 YASM-OBJS-$(CONFIG_VIDEODSP)           += x86/videodsp.o
 YASM-OBJS-$(CONFIG_VP3DSP)             += x86/vp3dsp.o
+YASM-OBJS-$(CONFIG_VP8DSP)             += x86/vp8dsp.o                  \
+                                          x86/vp8dsp_loopfilter.o
 
 # decoders/encoders
-YASM-OBJS-$(CONFIG_AAC_DECODER)        += x86/sbrdsp.o
+YASM-OBJS-$(CONFIG_AAC_DECODER)        += x86/aacpsdsp.o                \
+                                          x86/sbrdsp.o
 YASM-OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp.o
 YASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp.o
+YASM-OBJS-$(CONFIG_ALAC_DECODER)       += x86/alacdsp.o
 YASM-OBJS-$(CONFIG_APNG_DECODER)       += x86/pngdsp.o
-YASM-OBJS-$(CONFIG_DCA_DECODER)        += x86/dcadsp.o
+YASM-OBJS-$(CONFIG_DCA_DECODER)        += x86/dcadsp.o x86/synth_filter.o
+YASM-OBJS-$(CONFIG_DIRAC_DECODER)      += x86/diracdsp.o                \
+                                          x86/dirac_dwt.o
+YASM-OBJS-$(CONFIG_DNXHD_ENCODER)      += x86/dnxhdenc.o
+YASM-OBJS-$(CONFIG_FLAC_DECODER)       += x86/flacdsp.o
+ifdef CONFIG_GPL
+YASM-OBJS-$(CONFIG_FLAC_ENCODER)       += x86/flac_dsp_gpl.o
+endif
 YASM-OBJS-$(CONFIG_HEVC_DECODER)       += x86/hevc_mc.o                 \
                                           x86/hevc_deblock.o            \
                                           x86/hevc_idct.o               \
                                           x86/hevc_res_add.o            \
-                                          x86/hevc_sao.o
+                                          x86/hevc_sao.o                \
+                                          x86/hevc_sao_10bit.o
+YASM-OBJS-$(CONFIG_JPEG2000_DECODER)   += x86/jpeg2000dsp.o
 YASM-OBJS-$(CONFIG_MLP_DECODER)        += x86/mlpdsp.o
 YASM-OBJS-$(CONFIG_MPEG4_DECODER)      += x86/xvididct.o
 YASM-OBJS-$(CONFIG_PNG_DECODER)        += x86/pngdsp.o
 YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o
 YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
-YASM-OBJS-$(CONFIG_RV30_DECODER)       += x86/rv34dsp.o
-YASM-OBJS-$(CONFIG_RV40_DECODER)       += x86/rv34dsp.o                 \
-                                          x86/rv40dsp.o
+YASM-OBJS-$(CONFIG_RV40_DECODER)       += x86/rv40dsp.o
 YASM-OBJS-$(CONFIG_SVQ1_ENCODER)       += x86/svq1enc.o
+YASM-OBJS-$(CONFIG_TAK_DECODER)        += x86/takdsp.o
 YASM-OBJS-$(CONFIG_TRUEHD_DECODER)     += x86/mlpdsp.o
 YASM-OBJS-$(CONFIG_TTA_DECODER)        += x86/ttadsp.o
 YASM-OBJS-$(CONFIG_V210_ENCODER)       += x86/v210enc.o
@@ -154,12 +163,12 @@ YASM-OBJS-$(CONFIG_V210_DECODER)       += x86/v210.o
 YASM-OBJS-$(CONFIG_VC1_DECODER)        += x86/vc1dsp.o
 YASM-OBJS-$(CONFIG_VORBIS_DECODER)     += x86/vorbisdsp.o
 YASM-OBJS-$(CONFIG_VP6_DECODER)        += x86/vp6dsp.o
-YASM-OBJS-$(CONFIG_VP7_DECODER)        += x86/vp8dsp.o                  \
-                                          x86/vp8dsp_loopfilter.o
-YASM-OBJS-$(CONFIG_VP8_DECODER)        += x86/vp8dsp.o                  \
-                                          x86/vp8dsp_loopfilter.o
 YASM-OBJS-$(CONFIG_VP9_DECODER)        += x86/vp9intrapred.o            \
+                                          x86/vp9intrapred_16bpp.o      \
                                           x86/vp9itxfm.o                \
+                                          x86/vp9itxfm_16bpp.o          \
                                           x86/vp9lpf.o                  \
-                                          x86/vp9mc.o
+                                          x86/vp9lpf_16bpp.o            \
+                                          x86/vp9mc.o                   \
+                                          x86/vp9mc_16bpp.o
 YASM-OBJS-$(CONFIG_WEBP_DECODER)       += x86/vp8dsp.o
diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm
new file mode 100644
index 00000000..d1187df4
--- /dev/null
+++ b/libavcodec/x86/aacpsdsp.asm
@@ -0,0 +1,215 @@
+;******************************************************************************
+;* SIMD optimized MPEG-4 Parametric Stereo decoding functions
+;*
+;* Copyright (C) 2015 James Almer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+ps_p1m1p1m1: dd 0, 0x80000000, 0, 0x80000000
+
+SECTION .text
+
+;*************************************************************************
+;void ff_ps_add_squares_<opt>(float *dst, const float (*src)[2], int n);
+;*************************************************************************
+%macro PS_ADD_SQUARES 1
+cglobal ps_add_squares, 3, 3, %1, dst, src, n
+.loop:
+    movaps m0, [srcq]
+    movaps m1, [srcq+mmsize]
+    mulps  m0, m0
+    mulps  m1, m1
+%if cpuflag(sse3)
+    haddps m0, m1
+%else
+    movaps m3, m0
+    movaps m4, m1
+    shufps m3, m3, q0301
+    shufps m4, m4, q0301
+    addps  m0, m3
+    addps  m1, m4
+    shufps m0, m1, q2020
+%endif
+    addps  m0, [dstq]
+    movaps [dstq], m0
+    add  dstq, mmsize
+    add  srcq, mmsize*2
+    sub    nd, mmsize/4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_XMM sse
+PS_ADD_SQUARES 3
+INIT_XMM sse3
+PS_ADD_SQUARES 5
+
+;*******************************************************************
+;void ff_ps_mul_pair_single_sse(float (*dst)[2], float (*src0)[2],
+;                                   float *src1, int n);
+;*******************************************************************
+INIT_XMM sse
+cglobal ps_mul_pair_single, 4, 5, 4, dst, src1, src2, n
+    xor r4q, r4q
+
+.loop:
+    movu     m0, [src1q+r4q]
+    movu     m1, [src1q+r4q+mmsize]
+    mova     m2, [src2q]
+    mova     m3, m2
+    unpcklps m2, m2
+    unpckhps m3, m3
+    mulps    m0, m2
+    mulps    m1, m3
+    mova [dstq+r4q], m0
+    mova [dstq+r4q+mmsize], m1
+    add   src2q, mmsize
+    add     r4q, mmsize*2
+    sub      nd, mmsize/4
+    jg .loop
+    REP_RET
+
+;***********************************************************************
+;void ff_ps_stereo_interpolate_sse3(float (*l)[2], float (*r)[2],
+;                                   float h[2][4], float h_step[2][4],
+;                                   int len);
+;***********************************************************************
+INIT_XMM sse3
+cglobal ps_stereo_interpolate, 5, 5, 6, l, r, h, h_step, n
+    movaps   m0, [hq]
+    movaps   m1, [h_stepq]
+    cmp      nd, 0
+    jle .ret
+    shl      nd, 3
+    add      lq, nq
+    add      rq, nq
+    neg      nq
+
+align 16
+.loop:
+    addps    m0, m1
+    movddup  m2, [lq+nq]
+    movddup  m3, [rq+nq]
+    movaps   m4, m0
+    movaps   m5, m0
+    unpcklps m4, m4
+    unpckhps m5, m5
+    mulps    m2, m4
+    mulps    m3, m5
+    addps    m2, m3
+    movsd  [lq+nq], m2
+    movhps [rq+nq], m2
+    add      nq, 8
+    jl .loop
+.ret:
+    REP_RET
+
+;*******************************************************************
+;void ff_ps_hybrid_analysis_<opt>(float (*out)[2], float (*in)[2],
+;                                 const float (*filter)[8][2],
+;                                 int stride, int n);
+;*******************************************************************
+%macro PS_HYBRID_ANALYSIS_LOOP 3
+    movu     %1, [inq+mmsize*%3]
+    movu     m1, [inq+mmsize*(5-%3)+8]
+%if cpuflag(sse3)
+    pshufd   %2, %1, q2301
+    pshufd   m4, m1, q0123
+    pshufd   m1, m1, q1032
+    pshufd   m2, [filterq+nq+mmsize*%3], q2301
+    addsubps %2, m4
+    addsubps %1, m1
+%else
+    mova     m2, [filterq+nq+mmsize*%3]
+    mova     %2, %1
+    mova     m4, m1
+    shufps   %2, %2, q2301
+    shufps   m4, m4, q0123
+    shufps   m1, m1, q1032
+    shufps   m2, m2, q2301
+    xorps    m4, m7
+    xorps    m1, m7
+    subps    %2, m4
+    subps    %1, m1
+%endif
+    mulps    %2, m2
+    mulps    %1, m2
+%if %3
+    addps    m3, %2
+    addps    m0, %1
+%endif
+%endmacro
+
+%macro PS_HYBRID_ANALYSIS 0
+cglobal ps_hybrid_analysis, 5, 5, 8, out, in, filter, stride, n
+%if cpuflag(sse3)
+%define MOVH movsd
+%else
+%define MOVH movlps
+%endif
+    shl strided, 3
+    shl nd, 6
+    add filterq, nq
+    neg nq
+    mova m7, [ps_p1m1p1m1]
+
+align 16
+.loop:
+    PS_HYBRID_ANALYSIS_LOOP m0, m3, 0
+    PS_HYBRID_ANALYSIS_LOOP m5, m6, 1
+    PS_HYBRID_ANALYSIS_LOOP m5, m6, 2
+
+%if cpuflag(sse3)
+    pshufd   m3, m3, q2301
+    xorps    m0, m7
+    hsubps   m3, m0
+    pshufd   m1, m3, q0020
+    pshufd   m3, m3, q0031
+    addps    m1, m3
+    movsd    m2, [inq+6*8]
+%else
+    mova     m1, m3
+    mova     m2, m0
+    shufps   m1, m1, q2301
+    shufps   m2, m2, q2301
+    subps    m1, m3
+    addps    m2, m0
+    unpcklps m3, m1, m2
+    unpckhps m1, m2
+    addps    m1, m3
+    movu     m2, [inq+6*8] ; faster than movlps and no risk of overread
+%endif
+    movss    m3, [filterq+nq+8*6]
+    SPLATD   m3
+    mulps    m2, m3
+    addps    m1, m2
+    MOVH [outq], m1
+    add    outq, strideq
+    add      nq, 64
+    jl .loop
+    REP_RET
+%endmacro
+
+INIT_XMM sse
+PS_HYBRID_ANALYSIS
+INIT_XMM sse3
+PS_HYBRID_ANALYSIS
diff --git a/libavcodec/x86/aacpsdsp_init.c b/libavcodec/x86/aacpsdsp_init.c
new file mode 100644
index 00000000..f6d6c039
--- /dev/null
+++ b/libavcodec/x86/aacpsdsp_init.c
@@ -0,0 +1,55 @@
+/*
+ * SIMD optimized MPEG-4 Parametric Stereo decoding functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/x86/cpu.h"
+#include "libavutil/attributes.h"
+#include "libavcodec/aacpsdsp.h"
+
+void ff_ps_add_squares_sse  (float *dst, const float (*src)[2], int n);
+void ff_ps_add_squares_sse3 (float *dst, const float (*src)[2], int n);
+void ff_ps_mul_pair_single_sse (float (*dst)[2], float (*src0)[2],
+                                float *src1, int n);
+void ff_ps_hybrid_analysis_sse (float (*out)[2], float (*in)[2],
+                                const float (*filter)[8][2],
+                                int stride, int n);
+void ff_ps_hybrid_analysis_sse3(float (*out)[2], float (*in)[2],
+                                const float (*filter)[8][2],
+                                int stride, int n);
+void ff_ps_stereo_interpolate_sse3(float (*l)[2], float (*r)[2],
+                                   float h[2][4], float h_step[2][4],
+                                   int len);
+
+av_cold void ff_psdsp_init_x86(PSDSPContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        s->add_squares            = ff_ps_add_squares_sse;
+        s->mul_pair_single        = ff_ps_mul_pair_single_sse;
+        s->hybrid_analysis        = ff_ps_hybrid_analysis_sse;
+    }
+    if (EXTERNAL_SSE3(cpu_flags)) {
+        s->add_squares            = ff_ps_add_squares_sse3;
+        s->stereo_interpolate[0]  = ff_ps_stereo_interpolate_sse3;
+        s->hybrid_analysis        = ff_ps_hybrid_analysis_sse3;
+    }
+}
diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c
index eea2736b..07f0d256 100644
--- a/libavcodec/x86/ac3dsp_init.c
+++ b/libavcodec/x86/ac3dsp_init.c
@@ -47,7 +47,6 @@ void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len)
 
 int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
 
-void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs);
 void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
 void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
 
diff --git a/libavcodec/x86/alacdsp.asm b/libavcodec/x86/alacdsp.asm
new file mode 100644
index 00000000..bb2069f7
--- /dev/null
+++ b/libavcodec/x86/alacdsp.asm
@@ -0,0 +1,133 @@
+;******************************************************************************
+;* ALAC DSP SIMD optimizations
+;*
+;* Copyright (C) 2015 James Almer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+INIT_XMM sse4
+%if ARCH_X86_64
+cglobal alac_decorrelate_stereo, 2, 5, 8, buf0, len, shift, weight, buf1
+%else
+cglobal alac_decorrelate_stereo, 2, 3, 8, buf0, len, shift, weight
+%define  buf1q  r2q
+%endif
+    movd    m6, shiftm
+    movd    m7, weightm
+    SPLATD  m7
+    shl   lend, 2
+    mov  buf1q, [buf0q + gprsize]
+    mov  buf0q, [buf0q]
+    add  buf1q, lenq
+    add  buf0q, lenq
+    neg  lenq
+
+align 16
+.loop:
+    mova    m0, [buf0q + lenq]
+    mova    m1, [buf0q + lenq + mmsize]
+    mova    m2, [buf1q + lenq]
+    mova    m3, [buf1q + lenq + mmsize]
+    pmulld  m4, m2, m7
+    pmulld  m5, m3, m7
+    psrad   m4, m6
+    psrad   m5, m6
+    psubd   m0, m4
+    psubd   m1, m5
+    paddd   m2, m0
+    paddd   m3, m1
+    mova [buf1q + lenq], m0
+    mova [buf1q + lenq + mmsize], m1
+    mova [buf0q + lenq], m2
+    mova [buf0q + lenq + mmsize], m3
+
+    add   lenq, mmsize*2
+    jl .loop
+    RET
+
+INIT_XMM sse2
+cglobal alac_append_extra_bits_stereo, 2, 5, 5, buf0, exbuf0, buf1, exbuf1, len
+    movifnidn lend, lenm
+    movd      m4, r2m ; exbits
+    shl     lend, 2
+    mov    buf1q, [buf0q + gprsize]
+    mov    buf0q, [buf0q]
+    mov  exbuf1q, [exbuf0q + gprsize]
+    mov  exbuf0q, [exbuf0q]
+    add    buf1q, lenq
+    add    buf0q, lenq
+    add  exbuf1q, lenq
+    add  exbuf0q, lenq
+    neg lenq
+
+align 16
+.loop:
+    mova      m0, [buf0q + lenq]
+    mova      m1, [buf0q + lenq + mmsize]
+    pslld     m0, m4
+    pslld     m1, m4
+    mova      m2, [buf1q + lenq]
+    mova      m3, [buf1q + lenq + mmsize]
+    pslld     m2, m4
+    pslld     m3, m4
+    por       m0, [exbuf0q + lenq]
+    por       m1, [exbuf0q + lenq + mmsize]
+    por       m2, [exbuf1q + lenq]
+    por       m3, [exbuf1q + lenq + mmsize]
+    mova [buf0q + lenq         ], m0
+    mova [buf0q + lenq + mmsize], m1
+    mova [buf1q + lenq         ], m2
+    mova [buf1q + lenq + mmsize], m3
+
+    add     lenq, mmsize*2
+    jl .loop
+    REP_RET
+
+%if ARCH_X86_64
+cglobal alac_append_extra_bits_mono, 2, 5, 3, buf, exbuf, exbits, ch, len
+%else
+cglobal alac_append_extra_bits_mono, 2, 3, 3, buf, exbuf, len
+%define exbitsm r2m
+%endif
+    movifnidn lend, r4m
+    movd     m2, exbitsm
+    shl    lend, 2
+    mov    bufq, [bufq]
+    mov  exbufq, [exbufq]
+    add    bufq, lenq
+    add  exbufq, lenq
+    neg lenq
+
+align 16
+.loop:
+    mova      m0, [bufq + lenq]
+    mova      m1, [bufq + lenq + mmsize]
+    pslld     m0, m2
+    pslld     m1, m2
+    por       m0, [exbufq + lenq]
+    por       m1, [exbufq + lenq + mmsize]
+    mova [bufq + lenq], m0
+    mova [bufq + lenq + mmsize], m1
+
+    add     lenq, mmsize*2
+    jl .loop
+    REP_RET
diff --git a/libavcodec/x86/alacdsp_init.c b/libavcodec/x86/alacdsp_init.c
new file mode 100644
index 00000000..de5dae6c
--- /dev/null
+++ b/libavcodec/x86/alacdsp_init.c
@@ -0,0 +1,44 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/alacdsp.h"
+#include "config.h"
+
+void ff_alac_decorrelate_stereo_sse4(int32_t *buffer[2], int nb_samples,
+                                     int decorr_shift, int decorr_left_weight);
+void ff_alac_append_extra_bits_stereo_sse2(int32_t *buffer[2], int32_t *extra_bits_buffer[2],
+                                           int extra_bits, int channels, int nb_samples);
+void ff_alac_append_extra_bits_mono_sse2(int32_t *buffer[2], int32_t *extra_bits_buffer[2],
+                                         int extra_bits, int channels, int nb_samples);
+
+av_cold void ff_alacdsp_init_x86(ALACDSPContext *c)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->append_extra_bits[0] = ff_alac_append_extra_bits_mono_sse2;
+        c->append_extra_bits[1] = ff_alac_append_extra_bits_stereo_sse2;
+    }
+    if (EXTERNAL_SSE4(cpu_flags)) {
+        c->decorrelate_stereo   = ff_alac_decorrelate_stereo_sse4;
+    }
+#endif /* HAVE_YASM */
+}
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index 273b9ef6..3ffb27fc 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -21,7 +21,7 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_TEXT
+SECTION .text
 
 %macro SCALARPRODUCT 0
 ; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
diff --git a/libavcodec/x86/blockdsp.asm b/libavcodec/x86/blockdsp.asm
index c7938588..7cbfa3a8 100644
--- a/libavcodec/x86/blockdsp.asm
+++ b/libavcodec/x86/blockdsp.asm
@@ -23,7 +23,7 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_TEXT
+SECTION .text
 
 ;----------------------------------------
 ; void ff_clear_block(int16_t *blocks);
@@ -64,7 +64,7 @@ cglobal clear_blocks, 1, 2, %1, blocks, len
     add   blocksq, 768
     mov      lenq, -768
     ZERO       m0, m0
-.loop
+.loop:
     mova  [blocksq+lenq+mmsize*0], m0
     mova  [blocksq+lenq+mmsize*1], m0
     mova  [blocksq+lenq+mmsize*2], m0
diff --git a/libavcodec/x86/blockdsp_init.c b/libavcodec/x86/blockdsp_init.c
index 7780184a..21599934 100644
--- a/libavcodec/x86/blockdsp_init.c
+++ b/libavcodec/x86/blockdsp_init.c
@@ -31,30 +31,24 @@ void ff_clear_block_sse(int16_t *block);
 void ff_clear_blocks_mmx(int16_t *blocks);
 void ff_clear_blocks_sse(int16_t *blocks);
 
-#if FF_API_XVMC
-av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
+av_cold void ff_blockdsp_init_x86(BlockDSPContext *c,
                                   AVCodecContext *avctx)
-#else
-av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth)
-#endif /* FF_API_XVMC */
 {
 #if HAVE_YASM
     int cpu_flags = av_get_cpu_flags();
 
-    if (!high_bit_depth) {
-        if (EXTERNAL_MMX(cpu_flags)) {
-            c->clear_block  = ff_clear_block_mmx;
-            c->clear_blocks = ff_clear_blocks_mmx;
-        }
+    if (EXTERNAL_MMX(cpu_flags)) {
+        c->clear_block  = ff_clear_block_mmx;
+        c->clear_blocks = ff_clear_blocks_mmx;
+    }
 
     /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
     if (CONFIG_XVMC && avctx->hwaccel && avctx->hwaccel->decode_mb)
         return;
 
-        if (EXTERNAL_SSE(cpu_flags)) {
-            c->clear_block  = ff_clear_block_sse;
-            c->clear_blocks = ff_clear_blocks_sse;
-        }
+    if (EXTERNAL_SSE(cpu_flags)) {
+        c->clear_block  = ff_clear_block_sse;
+        c->clear_blocks = ff_clear_blocks_sse;
     }
 #endif /* HAVE_YASM */
 }
diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index ec060c93..56d80836 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -28,12 +28,12 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
 
 cextern pb_80
 
-SECTION_TEXT
+SECTION .text
 
 ; %1 = aligned/unaligned
 %macro BSWAP_LOOPS  1
-    mov      r3, r2
-    sar      r2, 3
+    mov      r3d, r2d
+    sar      r2d, 3
     jz       .left4_%1
 .loop8_%1:
     mov%1    m0, [r1 +  0]
@@ -61,11 +61,11 @@ SECTION_TEXT
 %endif
     add      r0, 32
     add      r1, 32
-    dec      r2
+    dec      r2d
     jnz      .loop8_%1
 .left4_%1:
-    mov      r2, r3
-    and      r3, 4
+    mov      r2d, r3d
+    test     r3d, 4
     jz       .left
     mov%1    m0, [r1]
 %if cpuflag(ssse3)
@@ -95,7 +95,7 @@ cglobal bswap32_buf, 3,4,5
     mov      r3, r1
 %endif
     or       r3, r0
-    and      r3, 15
+    test     r3, 15
     jz       .start_align
     BSWAP_LOOPS  u
     jmp      .left
@@ -103,8 +103,7 @@ cglobal bswap32_buf, 3,4,5
     BSWAP_LOOPS  a
 .left:
 %if cpuflag(ssse3)
-    mov      r3, r2
-    and      r2, 2
+    test     r2d, 2
     jz       .left1
     movq     m0, [r1]
     pshufb   m0, m2
@@ -112,13 +111,13 @@ cglobal bswap32_buf, 3,4,5
     add      r1, 8
     add      r0, 8
 .left1:
-    and      r3, 1
+    test     r2d, 1
     jz       .end
     mov      r2d, [r1]
     bswap    r2d
     mov      [r0], r2d
 %else
-    and      r2, 3
+    and      r2d, 3
     jz       .end
 .loop2:
     mov      r3d, [r1]
@@ -126,7 +125,7 @@ cglobal bswap32_buf, 3,4,5
     mov      [r0], r3d
     add      r1, 4
     add      r0, 4
-    dec      r2
+    dec      r2d
     jnz      .loop2
 %endif
 .end:
diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c
index b5711638..4b20e655 100644
--- a/libavcodec/x86/cavsdsp.c
+++ b/libavcodec/x86/cavsdsp.c
@@ -563,7 +563,7 @@ static av_cold void cavsdsp_init_3dnow(CAVSDSPContext *c,
 
 av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
 {
-    int cpu_flags = av_get_cpu_flags();
+    av_unused int cpu_flags = av_get_cpu_flags();
 
     cavsdsp_init_mmx(c, avctx);
 #if HAVE_AMD3DNOW_INLINE
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index 553dd49d..11002ee6 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -27,7 +27,8 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)    = { 0x0001000100010001ULL, 0x000
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)    = { 0x0002000200020002ULL, 0x0002000200020002ULL,
                                                     0x0002000200020002ULL, 0x0002000200020002ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_3)    = { 0x0003000300030003ULL, 0x0003000300030003ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_4)    = { 0x0004000400040004ULL, 0x0004000400040004ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_4)    = { 0x0004000400040004ULL, 0x0004000400040004ULL,
+                                                    0x0004000400040004ULL, 0x0004000400040004ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_5)    = { 0x0005000500050005ULL, 0x0005000500050005ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8)    = { 0x0008000800080008ULL, 0x0008000800080008ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_9)    = { 0x0009000900090009ULL, 0x0009000900090009ULL };
@@ -55,6 +56,8 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1024) = { 0x0400040004000400ULL, 0x040
                                                     0x0400040004000400ULL, 0x0400040004000400ULL};
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2048) = { 0x0800080008000800ULL, 0x0800080008000800ULL,
                                                     0x0800080008000800ULL, 0x0800080008000800ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_4095) = { 0x0fff0fff0fff0fffULL, 0x0fff0fff0fff0fffULL,
+                                                    0x0fff0fff0fff0fffULL, 0x0fff0fff0fff0fffULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_4096) = { 0x1000100010001000ULL, 0x1000100010001000ULL,
                                                     0x1000100010001000ULL, 0x1000100010001000ULL };
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_8192) = { 0x2000200020002000ULL, 0x2000200020002000ULL,
@@ -72,10 +75,19 @@ DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_3)    = { 0x0303030303030303ULL, 0x030
                                                     0x0303030303030303ULL, 0x0303030303030303ULL };
 DECLARE_ALIGNED(32, const xmm_reg,  ff_pb_15)   = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_80)   = { 0x8080808080808080ULL, 0x8080808080808080ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_FE)   = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_FE)   = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL,
+                                                    0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL };
 DECLARE_ALIGNED(8,  const uint64_t, ff_pb_FC)   =   0xFCFCFCFCFCFCFCFCULL;
 
 DECLARE_ALIGNED(16, const xmm_reg,  ff_ps_neg)  = { 0x8000000080000000ULL, 0x8000000080000000ULL };
 
 DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_1)    = { 0x0000000100000001ULL, 0x0000000100000001ULL,
                                                     0x0000000100000001ULL, 0x0000000100000001ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_16)   = { 0x0000001000000010ULL, 0x0000001000000010ULL,
+                                                    0x0000001000000010ULL, 0x0000001000000010ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_32)   = { 0x0000002000000020ULL, 0x0000002000000020ULL,
+                                                    0x0000002000000020ULL, 0x0000002000000020ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_8192) = { 0x0000200000002000ULL, 0x0000200000002000ULL,
+                                                    0x0000200000002000ULL, 0x0000200000002000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_65535)= { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL,
+                                                    0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL };
diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
index 33dbb650..b82aef9a 100644
--- a/libavcodec/x86/constants.h
+++ b/libavcodec/x86/constants.h
@@ -28,7 +28,7 @@
 extern const ymm_reg  ff_pw_1;
 extern const ymm_reg  ff_pw_2;
 extern const xmm_reg  ff_pw_3;
-extern const xmm_reg  ff_pw_4;
+extern const ymm_reg  ff_pw_4;
 extern const xmm_reg  ff_pw_5;
 extern const xmm_reg  ff_pw_8;
 extern const xmm_reg  ff_pw_9;
@@ -47,6 +47,7 @@ extern const ymm_reg  ff_pw_512;
 extern const ymm_reg  ff_pw_1023;
 extern const ymm_reg  ff_pw_1024;
 extern const ymm_reg  ff_pw_2048;
+extern const ymm_reg  ff_pw_4095;
 extern const ymm_reg  ff_pw_4096;
 extern const ymm_reg  ff_pw_8192;
 extern const ymm_reg  ff_pw_m1;
@@ -56,11 +57,15 @@ extern const ymm_reg  ff_pb_1;
 extern const ymm_reg  ff_pb_2;
 extern const ymm_reg  ff_pb_3;
 extern const xmm_reg  ff_pb_80;
-extern const xmm_reg  ff_pb_FE;
+extern const ymm_reg  ff_pb_FE;
 extern const uint64_t ff_pb_FC;
 
 extern const xmm_reg  ff_ps_neg;
 
 extern const ymm_reg  ff_pd_1;
+extern const ymm_reg  ff_pd_16;
+extern const ymm_reg  ff_pd_32;
+extern const ymm_reg  ff_pd_8192;
+extern const ymm_reg  ff_pd_65535;
 
 #endif /* AVCODEC_X86_CONSTANTS_H */
diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 1ac23788..fb139570 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -1,6 +1,6 @@
 ;******************************************************************************
-;* SSE-optimized functions for the DCA decoder
-;* Copyright (C) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
+;* SIMD-optimized functions for the DCA decoder
+;* Copyright (C) 2016 James Almer
 ;*
 ;* This file is part of FFmpeg.
 ;*
@@ -21,411 +21,183 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_RODATA
-pf_inv16:  times 4 dd 0x3D800000 ; 1/16
+SECTION .text
 
-SECTION_TEXT
+%define sizeof_float 4
+%define FMA3_OFFSET (8 * cpuflag(fma3) * ARCH_X86_64)
 
-; void decode_hf(float dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS],
-;                const int8_t hf_vq[1024][32], intptr_t vq_offset,
-;                int32_t scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end)
+%macro LFE_FIR0_FLOAT 0
+cglobal lfe_fir0_float, 4, 6, 12 + cpuflag(fma3)*4, samples, lfe, coeff, nblocks, cnt1, cnt2
+    shr nblocksd, 1
+    sub     lfeq, 7*sizeof_float
+    mov    cnt1d, 32*sizeof_float
+    mov    cnt2d, 32*sizeof_float-8-FMA3_OFFSET
+    lea   coeffq, [coeffq+cnt1q*8]
+    add samplesq, cnt1q
+    neg    cnt1q
 
-%macro DECODE_HF 0
-cglobal decode_hf, 6,6,5, dst, num, src, offset, scale, start, end
-    lea       srcq, [srcq + offsetq]
-    shl     startq, 2
-    mov    offsetd, endm
-%define DICT offsetq
-    shl    offsetq, 2
-    mov       endm, offsetq
 .loop:
-%if ARCH_X86_64
-    mov    offsetd, [scaleq + 2 * startq]
-    cvtsi2ss    m0, offsetd
-%else
-    cvtsi2ss    m0, [scaleq + 2 * startq]
-%endif
-    mov    offsetd, [numq + startq]
-    mulss       m0, [pf_inv16]
-    shl       DICT, 5
-    shufps      m0, m0, 0
-%if cpuflag(sse2)
-%if cpuflag(sse4)
-    pmovsxbd    m1, [srcq + DICT + 0]
-    pmovsxbd    m2, [srcq + DICT + 4]
-%else
-    movq        m1, [srcq + DICT]
-    punpcklbw   m1, m1
-    mova        m2, m1
-    punpcklwd   m1, m1
-    punpckhwd   m2, m2
-    psrad       m1, 24
-    psrad       m2, 24
-%endif
-    cvtdq2ps    m1, m1
-    cvtdq2ps    m2, m2
-%else
-    movd       mm0, [srcq + DICT + 0]
-    movd       mm1, [srcq + DICT + 4]
-    punpcklbw  mm0, mm0
-    punpcklbw  mm1, mm1
-    movq       mm2, mm0
-    movq       mm3, mm1
-    punpcklwd  mm0, mm0
-    punpcklwd  mm1, mm1
-    punpckhwd  mm2, mm2
-    punpckhwd  mm3, mm3
-    psrad      mm0, 24
-    psrad      mm1, 24
-    psrad      mm2, 24
-    psrad      mm3, 24
-    cvtpi2ps    m1, mm0
-    cvtpi2ps    m2, mm1
-    cvtpi2ps    m3, mm2
-    cvtpi2ps    m4, mm3
-    shufps      m0, m0, 0
-    shufps      m1, m3, q1010
-    shufps      m2, m4, q1010
-%endif
-    mulps       m1, m0
-    mulps       m2, m0
-    mova [dstq + 8 * startq +  0], m1
-    mova [dstq + 8 * startq + 16], m2
-    add     startq, 4
-    cmp     startq, endm
-    jl       .loop
-.end:
-%if notcpuflag(sse2)
-    emms
-%endif
-    REP_RET
-%endmacro
-
-%if ARCH_X86_32
-INIT_XMM sse
-DECODE_HF
-%endif
-
-INIT_XMM sse2
-DECODE_HF
-
-INIT_XMM sse4
-DECODE_HF
-
-; %1=v0/v1  %2=in1  %3=in2
-%macro FIR_LOOP 2-3
-.loop%1:
-%define va          m1
-%define vb          m2
-%if %1
-%define OFFSET      0
-%else
-%define OFFSET      NUM_COEF*count
-%endif
-; for v0, incrementing and for v1, decrementing
-    mova        va, [cf0q + OFFSET]
-    mova        vb, [cf0q + OFFSET + 4*NUM_COEF]
-%if %0 == 3
-    mova        m4, [cf0q + OFFSET + mmsize]
-    mova        m0, [cf0q + OFFSET + 4*NUM_COEF + mmsize]
-%endif
-    mulps       va, %2
-    mulps       vb, %2
-%if %0 == 3
-%if cpuflag(fma3)
-    fmaddps     va, m4, %3, va
-    fmaddps     vb, m0, %3, vb
-%else
-    mulps       m4, %3
-    mulps       m0, %3
-    addps       va, m4
-    addps       vb, m0
-%endif
-%endif
-    ; va = va1 va2 va3 va4
-    ; vb = vb1 vb2 vb3 vb4
-%if %1
-    SWAP        va, vb
-%endif
-    mova        m4, va
-    unpcklps    va, vb ; va3 vb3 va4 vb4
-    unpckhps    m4, vb ; va1 vb1 va2 vb2
-    addps       m4, va ; va1+3 vb1+3 va2+4 vb2+4
-    movhlps     vb, m4 ; va1+3  vb1+3
-    addps       vb, m4 ; va0..4 vb0..4
-    movlps  [outq + count], vb
-%if %1
-    sub       cf0q, 8*NUM_COEF
-%endif
-    add      count, 8
-    jl   .loop%1
-%endmacro
-
-; void dca_lfe_fir(float *out, float *in, float *coefs)
-%macro DCA_LFE_FIR 1
-cglobal dca_lfe_fir%1, 3,3,6-%1, out, in, cf0
-%define IN1       m3
-%define IN2       m5
-%define count     inq
-%define NUM_COEF  4*(2-%1)
-%define NUM_OUT   32*(%1+1)
-
-    movu     IN1, [inq + 4 - 1*mmsize]
-    shufps   IN1, IN1, q0123
-%if %1 == 0
-    movu     IN2, [inq + 4 - 2*mmsize]
-    shufps   IN2, IN2, q0123
-%endif
-
-    mov    count, -4*NUM_OUT
-    add     cf0q, 4*NUM_COEF*NUM_OUT
-    add     outq, 4*NUM_OUT
-    ; compute v0 first
-%if %1 == 0
-    FIR_LOOP   0, IN1, IN2
-%else
-    FIR_LOOP   0, IN1
-%endif
-    shufps   IN1, IN1, q0123
-    mov    count, -4*NUM_OUT
-    ; cf1 already correctly positioned
-    add     outq, 4*NUM_OUT          ; outq now at out2
-    sub     cf0q, 8*NUM_COEF
-%if %1 == 0
-    shufps   IN2, IN2, q0123
-    FIR_LOOP   1, IN2, IN1
-%else
-    FIR_LOOP   1, IN1
-%endif
-    RET
-%endmacro
-
-INIT_XMM sse
-DCA_LFE_FIR 0
-DCA_LFE_FIR 1
-%if HAVE_FMA3_EXTERNAL
-INIT_XMM fma3
-DCA_LFE_FIR 0
-%endif
-
-%macro SETZERO 1
-%if cpuflag(sse2) && notcpuflag(avx)
-    pxor          %1, %1
-%else
-    xorps         %1, %1, %1
-%endif
-%endmacro
-
-%macro SHUF 3
 %if cpuflag(avx)
-    mova          %3, [%2 - 16]
-    vperm2f128    %1, %3, %3, 1
-    vshufps       %1, %1, %1, q0123
+    cvtdq2ps  m4, [lfeq+16]
+    cvtdq2ps  m5, [lfeq   ]
+    shufps    m7, m4, m4, q0123
+    shufps    m6, m5, m5, q0123
 %elif cpuflag(sse2)
-    pshufd        %1, [%2], q0123
+    movu      m4, [lfeq+16]
+    movu      m5, [lfeq   ]
+    cvtdq2ps  m4, m4
+    cvtdq2ps  m5, m5
+    pshufd    m7, m4, q0123
+    pshufd    m6, m5, q0123
 %else
-    mova          %1, [%2]
-    shufps        %1, %1, q0123
+    cvtpi2ps  m4, [lfeq+16]
+    cvtpi2ps  m0, [lfeq+24]
+    cvtpi2ps  m5, [lfeq   ]
+    cvtpi2ps  m1, [lfeq+8 ]
+    shufps    m4, m0, q1010
+    shufps    m5, m1, q1010
+    shufps    m7, m4, m4, q0123
+    shufps    m6, m5, m5, q0123
 %endif
-%endmacro
 
-%macro INNER_LOOP   1
-    ; reading backwards:  ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
-    ;~ a += window[i + j]      * (-synth_buf[15 - i + j])
-    ;~ b += window[i + j + 16] * (synth_buf[i + j])
-    SHUF          m5,  ptr2 + j + (15 - 3) * 4, m6
-    mova          m6, [ptr1 + j]
+.inner_loop:
 %if ARCH_X86_64
-    SHUF         m11,  ptr2 + j + (15 - 3) * 4 - mmsize, m12
-    mova         m12, [ptr1 + j + mmsize]
-%endif
+    movaps    m8, [coeffq+cnt1q*8   ]
+    movaps    m9, [coeffq+cnt1q*8+16]
+    movaps   m10, [coeffq+cnt1q*8+32]
+    movaps   m11, [coeffq+cnt1q*8+48]
 %if cpuflag(fma3)
-    fmaddps       m2, m6,  [win + %1 + j + 16 * 4], m2
-    fnmaddps      m1, m5,  [win + %1 + j], m1
-%if ARCH_X86_64
-    fmaddps       m8, m12, [win + %1 + j + mmsize + 16 * 4], m8
-    fnmaddps      m7, m11, [win + %1 + j + mmsize], m7
-%endif
-%else ; non-FMA
-    mulps         m6, m6,  [win + %1 + j + 16 * 4]
-    mulps         m5, m5,  [win + %1 + j]
-%if ARCH_X86_64
-    mulps        m12, m12, [win + %1 + j + mmsize + 16 * 4]
-    mulps        m11, m11, [win + %1 + j + mmsize]
-%endif
-    addps         m2, m2, m6
-    subps         m1, m1, m5
-%if ARCH_X86_64
-    addps         m8, m8, m12
-    subps         m7, m7, m11
-%endif
-%endif ; cpuflag(fma3)
-    ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
-    ;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
-    SHUF          m6,  ptr2 + j + (31 - 3) * 4, m5
-    mova          m5, [ptr1 + j + 16 * 4]
-%if ARCH_X86_64
-    SHUF         m12,  ptr2 + j + (31 - 3) * 4 - mmsize, m11
-    mova         m11, [ptr1 + j + mmsize + 16 * 4]
-%endif
-%if cpuflag(fma3)
-    fmaddps       m3, m5,  [win + %1 + j + 32 * 4], m3
-    fmaddps       m4, m6,  [win + %1 + j + 48 * 4], m4
-%if ARCH_X86_64
-    fmaddps       m9, m11, [win + %1 + j + mmsize + 32 * 4], m9
-    fmaddps      m10, m12, [win + %1 + j + mmsize + 48 * 4], m10
-%endif
-%else ; non-FMA
-    mulps         m5, m5,  [win + %1 + j + 32 * 4]
-    mulps         m6, m6,  [win + %1 + j + 48 * 4]
-%if ARCH_X86_64
-    mulps        m11, m11, [win + %1 + j + mmsize + 32 * 4]
-    mulps        m12, m12, [win + %1 + j + mmsize + 48 * 4]
-%endif
-    addps         m3, m3, m5
-    addps         m4, m4, m6
-%if ARCH_X86_64
-    addps         m9, m9, m11
-    addps        m10, m10, m12
-%endif
-%endif ; cpuflag(fma3)
-    sub            j, 64 * 4
-%endmacro
-
-; void ff_synth_filter_inner_<opt>(float *synth_buf, float synth_buf2[32],
-;                                  const float window[512], float out[32],
-;                                  intptr_t offset, float scale)
-%macro SYNTH_FILTER 0
-cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
-                              synth_buf, synth_buf2, window, out, off, scale
-%define scale m0
-%if ARCH_X86_32 || WIN64
-%if cpuflag(sse2) && notcpuflag(avx)
-    movd       scale, scalem
-    SPLATD        m0
-%else
-    VBROADCASTSS  m0, scalem
-%endif
-; Make sure offset is in a register and not on the stack
-%define OFFQ  r4q
+    movaps   m12, [coeffq+cnt1q*8+64]
+    movaps   m13, [coeffq+cnt1q*8+80]
+    movaps   m14, [coeffq+cnt1q*8+96]
+    movaps   m15, [coeffq+cnt1q*8+112]
+    mulps     m0, m7, m8
+    mulps     m1, m7, m10
+    mulps     m2, m7, m12
+    mulps     m3, m7, m14
+    fmaddps   m0, m6, m9, m0
+    fmaddps   m1, m6, m11, m1
+    fmaddps   m2, m6, m13, m2
+    fmaddps   m3, m6, m15, m3
+
+    haddps    m0, m1
+    haddps    m2, m3
+    haddps    m0, m2
+    movaps [samplesq+cnt1q], m0
 %else
-    SPLATD      xmm0
-%if cpuflag(avx)
-    vinsertf128   m0, m0, xmm0, 1
-%endif
-%define OFFQ  offq
-%endif
-    ; prepare inner counter limit 1
-    mov          r5q, 480
-    sub          r5q, offmp
-    and          r5q, -64
-    shl          r5q, 2
-%if ARCH_X86_32 || notcpuflag(avx)
-    mov         OFFQ, r5q
-%define i        r5q
-    mov            i, 16 * 4 - (ARCH_X86_64 + 1) * mmsize  ; main loop counter
+    mulps     m0, m7, m8
+    mulps     m1, m6, m9
+    mulps     m2, m7, m10
+    mulps     m3, m6, m11
+    addps     m0, m1
+    addps     m2, m3
+
+    unpckhps  m3, m0, m2
+    unpcklps  m0, m2
+    addps     m3, m0
+    movhlps   m2, m3
+    addps     m2, m3
+    movlps [samplesq+cnt1q], m2
+%endif
+%else ; ARCH_X86_32
+%if cpuflag(fma3)
+    mulps     m0, m7, [coeffq+cnt1q*8   ]
+    movaps        m1, [coeffq+cnt1q*8+16]
+    mulps     m2, m7, [coeffq+cnt1q*8+32]
+    fmaddps   m0, m6, m1, m0
+    fmaddps   m2, m6, [coeffq+cnt1q*8+48], m2
 %else
-%define i 0
-%define OFFQ  r5q
-%endif
+    mulps     m0, m7, [coeffq+cnt1q*8   ]
+    mulps     m1, m6, [coeffq+cnt1q*8+16]
+    mulps     m2, m7, [coeffq+cnt1q*8+32]
+    mulps     m3, m6, [coeffq+cnt1q*8+48]
+    addps     m0, m1
+    addps     m2, m3
+%endif
+    unpckhps  m3, m0, m2
+    unpcklps  m0, m2
+    addps     m3, m0
+    movhlps   m2, m3
+    addps     m2, m3
+    movlps [samplesq+cnt1q], m2
+%endif; ARCH
 
-%define buf2     synth_buf2q
-%if ARCH_X86_32
-    mov         buf2, synth_buf2mp
-%endif
-.mainloop
-    ; m1 = a  m2 = b  m3 = c  m4 = d
-    SETZERO       m3
-    SETZERO       m4
-    mova          m1, [buf2 + i]
-    mova          m2, [buf2 + i + 16 * 4]
-%if ARCH_X86_32
-%define ptr1     r0q
-%define ptr2     r1q
-%define win      r2q
-%define j        r3q
-    mov          win, windowm
-    mov         ptr1, synth_bufm
-%if ARCH_X86_32 || notcpuflag(avx)
-    add          win, i
-    add         ptr1, i
-%endif
-%else ; ARCH_X86_64
-%define ptr1     r6q
-%define ptr2     r7q ; must be loaded
-%define win      r8q
-%define j        r9q
-    SETZERO       m9
-    SETZERO      m10
-    mova          m7, [buf2 + i + mmsize]
-    mova          m8, [buf2 + i + mmsize + 16 * 4]
-    lea          win, [windowq + i]
-    lea         ptr1, [synth_bufq + i]
-%endif
-    mov         ptr2, synth_bufmp
-    ; prepare the inner loop counter
-    mov            j, OFFQ
-%if ARCH_X86_32 || notcpuflag(avx)
-    sub         ptr2, i
-%endif
-.loop1:
-    INNER_LOOP  0
-    jge       .loop1
-
-    mov            j, 448 * 4
-    sub            j, OFFQ
-    jz          .end
-    sub         ptr1, j
-    sub         ptr2, j
-    add          win, OFFQ ; now at j-64, so define OFFSET
-    sub            j, 64 * 4
-.loop2:
-    INNER_LOOP  64 * 4
-    jge       .loop2
-
-.end:
-%if ARCH_X86_32
-    mov         buf2, synth_buf2m ; needed for next iteration anyway
-    mov         outq, outmp       ; j, which will be set again during it
-%endif
-    ;~ out[i]      = a * scale;
-    ;~ out[i + 16] = b * scale;
-    mulps         m1, m1, scale
-    mulps         m2, m2, scale
-%if ARCH_X86_64
-    mulps         m7, m7, scale
-    mulps         m8, m8, scale
-%endif
-    ;~ synth_buf2[i]      = c;
-    ;~ synth_buf2[i + 16] = d;
-    mova   [buf2 + i +  0 * 4], m3
-    mova   [buf2 + i + 16 * 4], m4
 %if ARCH_X86_64
-    mova   [buf2 + i +  0 * 4 + mmsize], m9
-    mova   [buf2 + i + 16 * 4 + mmsize], m10
-%endif
-    ;~ out[i]      = a;
-    ;~ out[i + 16] = a;
-    mova   [outq + i +  0 * 4], m1
-    mova   [outq + i + 16 * 4], m2
-%if ARCH_X86_64
-    mova   [outq + i +  0 * 4 + mmsize], m7
-    mova   [outq + i + 16 * 4 + mmsize], m8
-%endif
-%if ARCH_X86_32 || notcpuflag(avx)
-    sub            i, (ARCH_X86_64 + 1) * mmsize
-    jge    .mainloop
-%endif
+%if cpuflag(fma3)
+    mulps     m8, m5
+    mulps    m10, m5
+    mulps    m12, m5
+    mulps    m14, m5
+    fmaddps   m8, m4, m9, m8
+    fmaddps  m10, m4, m11, m10
+    fmaddps  m12, m4, m13, m12
+    fmaddps  m14, m4, m15, m14
+
+    haddps   m10, m8
+    haddps   m14, m12
+    haddps   m14, m10
+    movaps [samplesq+cnt2q], m14
+%else
+    mulps     m8, m5
+    mulps     m9, m4
+    mulps    m10, m5
+    mulps    m11, m4
+    addps     m8, m9
+    addps    m10, m11
+
+    unpckhps m11, m10, m8
+    unpcklps m10, m8
+    addps    m11, m10
+    movhlps   m8, m11
+    addps     m8, m11
+    movlps [samplesq+cnt2q], m8
+%endif
+%else ; ARCH_X86_32
+%if cpuflag(fma3)
+    mulps     m0, m5, [coeffq+cnt1q*8   ]
+    mulps     m2, m5, [coeffq+cnt1q*8+32]
+    fmaddps   m0, m4, m1, m0
+    fmaddps   m2, m4, [coeffq+cnt1q*8+48], m2
+%else
+    mulps     m0, m5, [coeffq+cnt1q*8   ]
+    mulps     m1, m4, [coeffq+cnt1q*8+16]
+    mulps     m2, m5, [coeffq+cnt1q*8+32]
+    mulps     m3, m4, [coeffq+cnt1q*8+48]
+    addps     m0, m1
+    addps     m2, m3
+%endif
+    unpckhps  m3, m2, m0
+    unpcklps  m2, m0
+    addps     m3, m2
+    movhlps   m0, m3
+    addps     m0, m3
+    movlps [samplesq+cnt2q], m0
+%endif; ARCH
+
+    sub    cnt2d, 8 + FMA3_OFFSET
+    add    cnt1q, 8 + FMA3_OFFSET
+    jl .inner_loop
+
+    add     lfeq, 4
+    add samplesq,  64*sizeof_float
+    mov    cnt1q, -32*sizeof_float
+    mov    cnt2d,  32*sizeof_float-8-FMA3_OFFSET
+    sub nblocksd, 1
+    jg .loop
     RET
 %endmacro
 
 %if ARCH_X86_32
 INIT_XMM sse
-SYNTH_FILTER
+LFE_FIR0_FLOAT
 %endif
 INIT_XMM sse2
-SYNTH_FILTER
-INIT_YMM avx
-SYNTH_FILTER
-INIT_YMM fma3
-SYNTH_FILTER
+LFE_FIR0_FLOAT
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+LFE_FIR0_FLOAT
+%endif
+%if HAVE_FMA3_EXTERNAL
+INIT_XMM fma3
+LFE_FIR0_FLOAT
+%endif
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 1a19f6b8..bfe13e5a 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -1,6 +1,4 @@
 /*
- * Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
- *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -23,91 +21,25 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/dcadsp.h"
 
-void ff_decode_hf_sse(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
-                      const int8_t hf_vq[1024][32], intptr_t vq_offset,
-                      int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
-void ff_decode_hf_sse2(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
-                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
-                       int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
-void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
-                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
-                       int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
-void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
-void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
-void ff_dca_lfe_fir0_fma3(float *out, const float *in, const float *coefs);
-
-av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
-{
-    int cpu_flags = av_get_cpu_flags();
-
-    if (EXTERNAL_SSE(cpu_flags)) {
-#if ARCH_X86_32
-        s->decode_hf = ff_decode_hf_sse;
-#endif
-        s->lfe_fir[0]        = ff_dca_lfe_fir0_sse;
-        s->lfe_fir[1]        = ff_dca_lfe_fir1_sse;
-    }
-
-    if (EXTERNAL_SSE2(cpu_flags)) {
-        s->decode_hf = ff_decode_hf_sse2;
-    }
+#define LFE_FIR_FLOAT_FUNC(opt)                                               \
+void ff_lfe_fir0_float_##opt(float *pcm_samples, int32_t *lfe_samples,         \
+                             const float *filter_coeff, ptrdiff_t npcmblocks);
 
-    if (EXTERNAL_SSE4(cpu_flags)) {
-        s->decode_hf = ff_decode_hf_sse4;
-    }
+LFE_FIR_FLOAT_FUNC(sse)
+LFE_FIR_FLOAT_FUNC(sse2)
+LFE_FIR_FLOAT_FUNC(avx)
+LFE_FIR_FLOAT_FUNC(fma3)
 
-    if (EXTERNAL_FMA3(cpu_flags)) {
-        s->lfe_fir[0]        = ff_dca_lfe_fir0_fma3;
-    }
-}
-
-
-#define SYNTH_FILTER_FUNC(opt)                                                 \
-void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32],   \
-                                 const float window[512],                      \
-                                 float out[32], intptr_t offset, float scale); \
-static void synth_filter_##opt(FFTContext *imdct,                              \
-                               float *synth_buf_ptr, int *synth_buf_offset,    \
-                               float synth_buf2[32], const float window[512],  \
-                               float out[32], const float in[32], float scale) \
-{                                                                              \
-    float *synth_buf= synth_buf_ptr + *synth_buf_offset;                       \
-                                                                               \
-    imdct->imdct_half(imdct, synth_buf, in);                                   \
-                                                                               \
-    ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window,                 \
-                                out, *synth_buf_offset, scale);                \
-                                                                               \
-    *synth_buf_offset = (*synth_buf_offset - 32) & 511;                        \
-}                                                                              \
-
-#if HAVE_YASM
-#if ARCH_X86_32
-SYNTH_FILTER_FUNC(sse)
-#endif
-SYNTH_FILTER_FUNC(sse2)
-SYNTH_FILTER_FUNC(avx)
-SYNTH_FILTER_FUNC(fma3)
-#endif /* HAVE_YASM */
-
-av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
+av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
 {
-#if HAVE_YASM
     int cpu_flags = av_get_cpu_flags();
 
-#if ARCH_X86_32
-    if (EXTERNAL_SSE(cpu_flags)) {
-        s->synth_filter_float = synth_filter_sse;
-    }
-#endif
-    if (EXTERNAL_SSE2(cpu_flags)) {
-        s->synth_filter_float = synth_filter_sse2;
-    }
-    if (EXTERNAL_AVX_FAST(cpu_flags)) {
-        s->synth_filter_float = synth_filter_avx;
-    }
-    if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
-        s->synth_filter_float = synth_filter_fma3;
-    }
-#endif /* HAVE_YASM */
+    if (ARCH_X86_32 && EXTERNAL_SSE(cpu_flags))
+        s->lfe_fir_float[0] = ff_lfe_fir0_float_sse;
+    if (EXTERNAL_SSE2(cpu_flags))
+        s->lfe_fir_float[0] = ff_lfe_fir0_float_sse2;
+    if (EXTERNAL_AVX(cpu_flags))
+        s->lfe_fir_float[0] = ff_lfe_fir0_float_avx;
+    if (EXTERNAL_FMA3(cpu_flags))
+        s->lfe_fir_float[0] = ff_lfe_fir0_float_fma3;
 }
diff --git a/libavcodec/x86/dct-test.c b/libavcodec/x86/dct-test.c
index 0414381e..28ede166 100644
--- a/libavcodec/x86/dct-test.c
+++ b/libavcodec/x86/dct-test.c
@@ -84,6 +84,18 @@ static const struct algo idct_tab_arch[] = {
 # if HAVE_AVX_EXTERNAL
     { "PR-AVX",      ff_prores_idct_put_10_avx_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX, 1 },
 # endif
+#endif
+#if HAVE_YASM
+#if ARCH_X86_64
+#if HAVE_SSE2_EXTERNAL
+    { "SIMPLE10-SSE2",  ff_simple_idct10_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2},
+    { "SIMPLE12-SSE2",  ff_simple_idct12_sse2, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 },
+#endif
+#if HAVE_AVX_EXTERNAL
+    { "SIMPLE10-AVX",   ff_simple_idct10_avx,  FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX},
+    { "SIMPLE12-AVX",   ff_simple_idct12_avx,  FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_AVX,  1 },
+#endif
+#endif
 #endif
     { 0 }
 };
diff --git a/libavcodec/x86/dct32.asm b/libavcodec/x86/dct32.asm
index c70f6c9c..4e657b54 100644
--- a/libavcodec/x86/dct32.asm
+++ b/libavcodec/x86/dct32.asm
@@ -191,7 +191,7 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
 %endmacro
 
 INIT_YMM avx
-SECTION_TEXT
+SECTION .text
 %if HAVE_AVX_EXTERNAL
 ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
 cglobal dct32_float, 2,3,8, out, in, tmp
@@ -488,5 +488,6 @@ cglobal dct32_float, 2, 3, 16, out, in, tmp
 INIT_XMM sse
 DCT32_FUNC
 %endif
+
 INIT_XMM sse2
 DCT32_FUNC
diff --git a/libavcodec/x86/dct_init.c b/libavcodec/x86/dct_init.c
index daf2bb4e..c31ef922 100644
--- a/libavcodec/x86/dct_init.c
+++ b/libavcodec/x86/dct_init.c
@@ -30,8 +30,10 @@ av_cold void ff_dct_init_x86(DCTContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (ARCH_X86_32 && EXTERNAL_SSE(cpu_flags))
+#if ARCH_X86_32
+    if (EXTERNAL_SSE(cpu_flags))
         s->dct32 = ff_dct32_float_sse;
+#endif
     if (EXTERNAL_SSE2(cpu_flags))
         s->dct32 = ff_dct32_float_sse2;
     if (EXTERNAL_AVX_FAST(cpu_flags))
diff --git a/libavcodec/x86/deinterlace.asm b/libavcodec/x86/deinterlace.asm
deleted file mode 100644
index c421385f..00000000
--- a/libavcodec/x86/deinterlace.asm
+++ /dev/null
@@ -1,84 +0,0 @@
-;******************************************************************************
-;* SIMD-optimized deinterlacing functions
-;* Copyright (c) 2010 Vitor Sessak
-;* Copyright (c) 2002 Michael Niedermayer
-;*
-;* This file is part of FFmpeg.
-;*
-;* FFmpeg is free software; you can redistribute it and/or
-;* modify it under the terms of the GNU Lesser General Public
-;* License as published by the Free Software Foundation; either
-;* version 2.1 of the License, or (at your option) any later version.
-;*
-;* FFmpeg is distributed in the hope that it will be useful,
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;* Lesser General Public License for more details.
-;*
-;* You should have received a copy of the GNU Lesser General Public
-;* License along with FFmpeg; if not, write to the Free Software
-;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;******************************************************************************
-
-%include "libavutil/x86/x86util.asm"
-
-SECTION_RODATA
-
-cextern pw_4
-
-SECTION .text
-
-%macro DEINTERLACE 1
-%ifidn %1, inplace
-;void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum,  int size)
-cglobal deinterlace_line_inplace, 6,6,7,      lum_m4, lum_m3, lum_m2, lum_m1, lum, size
-%else
-;void ff_deinterlace_line_mmx(uint8_t *dst, const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum,  int size)
-cglobal deinterlace_line,         7,7,7, dst, lum_m4, lum_m3, lum_m2, lum_m1, lum, size
-%endif
-    pxor  mm7, mm7
-    movq  mm6, [pw_4]
-.nextrow:
-    movd  mm0, [lum_m4q]
-    movd  mm1, [lum_m3q]
-    movd  mm2, [lum_m2q]
-%ifidn %1, inplace
-    movd [lum_m4q], mm2
-%endif
-    movd  mm3, [lum_m1q]
-    movd  mm4, [lumq]
-    punpcklbw mm0, mm7
-    punpcklbw mm1, mm7
-    punpcklbw mm2, mm7
-    punpcklbw mm3, mm7
-    punpcklbw mm4, mm7
-    paddw     mm1, mm3
-    psllw     mm2, 1
-    paddw     mm0, mm4
-    psllw     mm1, 2
-    paddw     mm2, mm6
-    paddw     mm1, mm2
-    psubusw   mm1, mm0
-    psrlw     mm1, 3
-    packuswb  mm1, mm7
-%ifidn %1, inplace
-    movd [lum_m2q], mm1
-%else
-    movd   [dstq], mm1
-    add       dstq, 4
-%endif
-    add    lum_m4q, 4
-    add    lum_m3q, 4
-    add    lum_m2q, 4
-    add    lum_m1q, 4
-    add       lumq, 4
-    sub      sized, 4
-    jg .nextrow
-    REP_RET
-%endmacro
-
-INIT_MMX mmx
-
-DEINTERLACE ""
-
-DEINTERLACE inplace
diff --git a/libavcodec/x86/dwt_yasm.asm b/libavcodec/x86/dirac_dwt.asm
similarity index 99%
rename from libavcodec/x86/dwt_yasm.asm
rename to libavcodec/x86/dirac_dwt.asm
index 658acc13..89806899 100644
--- a/libavcodec/x86/dwt_yasm.asm
+++ b/libavcodec/x86/dirac_dwt.asm
@@ -1,5 +1,5 @@
 ;******************************************************************************
-;* MMX optimized discrete wavelet trasnform
+;* x86 optimized discrete wavelet trasnform
 ;* Copyright (c) 2010 David Conrad
 ;*
 ;* This file is part of FFmpeg.
diff --git a/libavcodec/x86/dirac_dwt.c b/libavcodec/x86/dirac_dwt_init.c
similarity index 67%
rename from libavcodec/x86/dirac_dwt.c
rename to libavcodec/x86/dirac_dwt_init.c
index 3c51ea6f..afdf0a14 100644
--- a/libavcodec/x86/dirac_dwt.c
+++ b/libavcodec/x86/dirac_dwt_init.c
@@ -1,5 +1,5 @@
 /*
- * MMX optimized discrete wavelet transform
+ * x86 optimized discrete wavelet transform
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  * Copyright (c) 2010 David Conrad
  *
@@ -22,20 +22,23 @@
 
 #include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
-#include "dirac_dwt.h"
+#include "libavcodec/dirac_dwt.h"
 
 #define COMPOSE_VERTICAL(ext, align) \
-void ff_vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
-void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
-void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
-void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
-void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
-void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
-void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
-\
-static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
+void ff_vertical_compose53iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
+void ff_vertical_compose_dirac53iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
+void ff_vertical_compose_dd137iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
+void ff_vertical_compose_dd97iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
+void ff_vertical_compose_haar##ext(int16_t *b0, int16_t *b1, int width); \
+void ff_horizontal_compose_haar0i##ext(int16_t *b, int16_t *tmp, int w);\
+void ff_horizontal_compose_haar1i##ext(int16_t *b, int16_t *tmp, int w);\
+\
+static void vertical_compose53iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
 { \
     int i, width_align = width&~(align-1); \
+    int16_t *b0 = (int16_t *)_b0; \
+    int16_t *b1 = (int16_t *)_b1; \
+    int16_t *b2 = (int16_t *)_b2; \
 \
     for(i=width_align; i<width; i++) \
         b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
@@ -43,9 +46,12 @@ static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
     ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
 } \
 \
-static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
+static void vertical_compose_dirac53iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
 { \
     int i, width_align = width&~(align-1); \
+    int16_t *b0 = (int16_t *)_b0; \
+    int16_t *b1 = (int16_t *)_b1; \
+    int16_t *b2 = (int16_t *)_b2; \
 \
     for(i=width_align; i<width; i++) \
         b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
@@ -53,10 +59,15 @@ static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
     ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
 } \
 \
-static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
-                                           IDWTELEM *b3, IDWTELEM *b4, int width) \
+static void vertical_compose_dd137iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
+                                           uint8_t *_b3, uint8_t *_b4, int width) \
 { \
     int i, width_align = width&~(align-1); \
+    int16_t *b0 = (int16_t *)_b0; \
+    int16_t *b1 = (int16_t *)_b1; \
+    int16_t *b2 = (int16_t *)_b2; \
+    int16_t *b3 = (int16_t *)_b3; \
+    int16_t *b4 = (int16_t *)_b4; \
 \
     for(i=width_align; i<width; i++) \
         b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
@@ -64,19 +75,26 @@ static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
     ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
 } \
 \
-static void vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
-                                          IDWTELEM *b3, IDWTELEM *b4, int width) \
+static void vertical_compose_dd97iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
+                                          uint8_t *_b3, uint8_t *_b4, int width) \
 { \
     int i, width_align = width&~(align-1); \
+    int16_t *b0 = (int16_t *)_b0; \
+    int16_t *b1 = (int16_t *)_b1; \
+    int16_t *b2 = (int16_t *)_b2; \
+    int16_t *b3 = (int16_t *)_b3; \
+    int16_t *b4 = (int16_t *)_b4; \
 \
     for(i=width_align; i<width; i++) \
         b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
 \
     ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
 } \
-static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
+static void vertical_compose_haar##ext(uint8_t *_b0, uint8_t *_b1, int width) \
 { \
     int i, width_align = width&~(align-1); \
+    int16_t *b0 = (int16_t *)_b0; \
+    int16_t *b1 = (int16_t *)_b1; \
 \
     for(i=width_align; i<width; i++) { \
         b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
@@ -85,10 +103,13 @@ static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
 \
     ff_vertical_compose_haar##ext(b0, b1, width_align); \
 } \
-static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
+static void horizontal_compose_haar0i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
 {\
     int w2= w>>1;\
     int x= w2 - (w2&(align-1));\
+    int16_t *b = (int16_t *)_b; \
+    int16_t *tmp = (int16_t *)_tmp; \
+\
     ff_horizontal_compose_haar0i##ext(b, tmp, w);\
 \
     for (; x < w2; x++) {\
@@ -96,10 +117,13 @@ static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
         b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
     }\
 }\
-static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
+static void horizontal_compose_haar1i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
 {\
     int w2= w>>1;\
     int x= w2 - (w2&(align-1));\
+    int16_t *b = (int16_t *)_b; \
+    int16_t *tmp = (int16_t *)_tmp; \
+\
     ff_horizontal_compose_haar1i##ext(b, tmp, w);\
 \
     for (; x < w2; x++) {\
@@ -116,12 +140,15 @@ COMPOSE_VERTICAL(_mmx, 4)
 COMPOSE_VERTICAL(_sse2, 8)
 
 
-void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
+void ff_horizontal_compose_dd97i_ssse3(int16_t *_b, int16_t *_tmp, int w);
 
-static void horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w)
+static void horizontal_compose_dd97i_ssse3(uint8_t *_b, uint8_t *_tmp, int w)
 {
     int w2= w>>1;
     int x= w2 - (w2&7);
+    int16_t *b = (int16_t *)_b;
+    int16_t *tmp = (int16_t *)_tmp;
+
     ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
 
     for (; x < w2; x++) {
@@ -131,7 +158,7 @@ static void horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w)
 }
 #endif
 
-void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
+void ff_spatial_idwt_init_x86(DWTContext *d, enum dwt_type type)
 {
 #if HAVE_YASM
   int mm_flags = av_get_cpu_flags();
diff --git a/libavcodec/x86/diracdsp_yasm.asm b/libavcodec/x86/diracdsp.asm
similarity index 98%
rename from libavcodec/x86/diracdsp_yasm.asm
rename to libavcodec/x86/diracdsp.asm
index d3cf9f19..a042413c 100644
--- a/libavcodec/x86/diracdsp_yasm.asm
+++ b/libavcodec/x86/diracdsp.asm
@@ -149,8 +149,8 @@ cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w,
     %define hd r5mp
 %endif
 
-.loopy
-    lea     src2q, [srcq+src_strideq*2]
+.loopy:
+    lea     src2q, [srcq+src_strideq]
     lea     dst2q, [dstq+dst_strideq]
 .loopx:
     sub      wd, mmsize
@@ -164,7 +164,7 @@ cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w,
     mova    [dst2q+wq], m2
     jg      .loopx
 
-    lea   srcq, [srcq+src_strideq*4]
+    lea   srcq, [srcq+src_strideq*2]
     lea   dstq, [dstq+dst_strideq*2]
     sub     hd, 2
     mov     wd, wspill
diff --git a/libavcodec/x86/diracdsp_mmx.c b/libavcodec/x86/diracdsp_init.c
similarity index 70%
rename from libavcodec/x86/diracdsp_mmx.c
rename to libavcodec/x86/diracdsp_init.c
index 11df5e39..5fae7989 100644
--- a/libavcodec/x86/diracdsp_mmx.c
+++ b/libavcodec/x86/diracdsp_init.c
@@ -19,14 +19,35 @@
  */
 
 #include "libavutil/x86/cpu.h"
-#include "diracdsp_mmx.h"
+#include "libavcodec/diracdsp.h"
 #include "fpel.h"
 
+DECL_DIRAC_PIXOP(put, mmx);
+DECL_DIRAC_PIXOP(avg, mmx);
+DECL_DIRAC_PIXOP(avg, mmxext);
+
+void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+
+void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
+void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
+
+void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+
+void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+
 void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
 
+#if HAVE_YASM
+
 #define HPEL_FILTER(MMSIZE, EXT)                                                             \
     void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int);               \
     void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int);                    \
@@ -47,11 +68,6 @@ void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t
         }                                                                                    \
     }
 
-#if !ARCH_X86_64
-HPEL_FILTER(8, mmx)
-#endif
-HPEL_FILTER(16, sse2)
-
 #define PIXFUNC(PFX, IDX, EXT)                                                   \
     /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT;*/  \
     c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \
@@ -91,22 +107,22 @@ void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride,
     if (h&3)
         ff_put_dirac_pixels16_c(dst, src, stride, h);
     else
-    ff_put_pixels16_sse2(dst, src[0], stride, h);
+        ff_put_pixels16_sse2(dst, src[0], stride, h);
 }
 void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
 {
     if (h&3)
         ff_avg_dirac_pixels16_c(dst, src, stride, h);
     else
-    ff_avg_pixels16_sse2(dst, src[0], stride, h);
+        ff_avg_pixels16_sse2(dst, src[0], stride, h);
 }
 void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
 {
     if (h&3) {
         ff_put_dirac_pixels32_c(dst, src, stride, h);
     } else {
-    ff_put_pixels16_sse2(dst   , src[0]   , stride, h);
-    ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h);
+        ff_put_pixels16_sse2(dst   , src[0]   , stride, h);
+        ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h);
     }
 }
 void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
@@ -114,26 +130,41 @@ void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride,
     if (h&3) {
         ff_avg_dirac_pixels32_c(dst, src, stride, h);
     } else {
-    ff_avg_pixels16_sse2(dst   , src[0]   , stride, h);
-    ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h);
+        ff_avg_pixels16_sse2(dst   , src[0]   , stride, h);
+        ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h);
     }
 }
 
-void ff_diracdsp_init_mmx(DiracDSPContext* c)
+#else // HAVE_YASM
+
+#define HPEL_FILTER(MMSIZE, EXT)                                                     \
+    void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,              \
+                                   const uint8_t *src, int stride, int width, int height);
+
+#define PIXFUNC(PFX, IDX, EXT) do {} while (0)
+
+#endif // HAVE_YASM
+
+#if !ARCH_X86_64
+HPEL_FILTER(8, mmx)
+#endif
+HPEL_FILTER(16, sse2)
+
+void ff_diracdsp_init_x86(DiracDSPContext* c)
 {
     int mm_flags = av_get_cpu_flags();
 
     if (EXTERNAL_MMX(mm_flags)) {
-    c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
+        c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
 #if !ARCH_X86_64
-    c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
-    c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
-    c->dirac_hpel_filter = dirac_hpel_filter_mmx;
-    c->add_rect_clamped = ff_add_rect_clamped_mmx;
-    c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx;
+        c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
+        c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
+        c->dirac_hpel_filter = dirac_hpel_filter_mmx;
+        c->add_rect_clamped = ff_add_rect_clamped_mmx;
+        c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx;
 #endif
-    PIXFUNC(put, 0, mmx);
-    PIXFUNC(avg, 0, mmx);
+        PIXFUNC(put, 0, mmx);
+        PIXFUNC(avg, 0, mmx);
     }
 
     if (EXTERNAL_MMXEXT(mm_flags)) {
@@ -143,7 +174,7 @@ void ff_diracdsp_init_mmx(DiracDSPContext* c)
     if (EXTERNAL_SSE2(mm_flags)) {
         c->dirac_hpel_filter = dirac_hpel_filter_sse2;
         c->add_rect_clamped = ff_add_rect_clamped_sse2;
-        c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2;
+        c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2;
 
         c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
         c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
diff --git a/libavcodec/x86/diracdsp_mmx.h b/libavcodec/x86/diracdsp_mmx.h
deleted file mode 100644
index 89858544..00000000
--- a/libavcodec/x86/diracdsp_mmx.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2010 David Conrad
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_X86_DIRACDSP_H
-#define AVCODEC_X86_DIRACDSP_H
-
-#include "libavcodec/diracdsp.h"
-
-void ff_diracdsp_init_mmx(DiracDSPContext* c);
-
-DECL_DIRAC_PIXOP(put, mmx);
-DECL_DIRAC_PIXOP(avg, mmx);
-DECL_DIRAC_PIXOP(avg, mmxext);
-
-void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
-void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
-void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
-void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
-
-void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
-void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
-
-void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
-void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
-void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
-
-void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
-void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
-
-#endif
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index f233774f..22d98666 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -90,7 +90,7 @@ cextern cos_ %+ i
     %1
 %endmacro
 
-SECTION_TEXT
+SECTION .text
 
 %macro T2_3DNOW 4 ; z0, z1, mem0, mem1
     mova     %1, %3
diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm
index cedf0837..e2851581 100644
--- a/libavcodec/x86/flac_dsp_gpl.asm
+++ b/libavcodec/x86/flac_dsp_gpl.asm
@@ -22,7 +22,7 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_TEXT
+SECTION .text
 
 INIT_XMM sse4
 %if ARCH_X86_64
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
index 901c440c..71386115 100644
--- a/libavcodec/x86/flacdsp.asm
+++ b/libavcodec/x86/flacdsp.asm
@@ -25,6 +25,15 @@
 
 SECTION .text
 
+%macro PMACSDQL 5
+%if cpuflag(xop)
+    pmacsdql %1, %2, %3, %1
+%else
+    pmuldq   %2, %3
+    paddq    %1, %2
+%endif
+%endmacro
+
 %macro LPC_32 1
 INIT_XMM %1
 cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index f4fc0c20..8f62a0a0 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -21,7 +21,7 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_TEXT
+SECTION .text
 
 ;------------------------------------------------------------------------------
 ; void ff_int32_to_float_fmul_scalar(float *dst, const int32_t *src, float mul,
@@ -39,7 +39,7 @@ cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len
     movss   m0, mulm
 %endif
     SPLATD  m0
-    shl     lenq, 2
+    shl     lend, 2
     add     srcq, lenq
     add     dstq, lenq
     neg     lenq
@@ -61,7 +61,14 @@ cglobal int32_to_float_fmul_scalar, 4, 4, %1, dst, src, mul, len
     mova  [dstq+lenq+16], m2
     add     lenq, 32
     jl .loop
-    REP_RET
+%if notcpuflag(sse2)
+    ;; cvtpi2ps switches to MMX even if the source is a memory location
+    ;; possible an error in documentation since every tested CPU disagrees with
+    ;; that. Use emms anyway since the vast majority of machines will use the
+    ;; SSE2 variant
+    emms
+%endif
+    RET
 %endmacro
 
 INIT_XMM sse
@@ -100,7 +107,14 @@ cglobal int32_to_float_fmul_array8, 5, 5, 5, c, dst, src, mul, len
     add     mulq, 4
     add     lenq, 32
     jl .loop
-    REP_RET
+%if notcpuflag(sse2)
+    ;; cvtpi2ps switches to MMX even if the source is a memory location
+    ;; possible an error in documentation since every tested CPU disagrees with
+    ;; that. Use emms anyway since the vast majority of machines will use the
+    ;; SSE2 variant
+    emms
+%endif
+    RET
 %endmacro
 
 INIT_XMM sse
diff --git a/libavcodec/x86/fpel.h b/libavcodec/x86/fpel.h
index 4d93959a..4e83cf71 100644
--- a/libavcodec/x86/fpel.h
+++ b/libavcodec/x86/fpel.h
@@ -22,6 +22,10 @@
 #include <stddef.h>
 #include <stdint.h>
 
+void ff_avg_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int h);
+void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
+                           ptrdiff_t line_size, int h);
 void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
                         ptrdiff_t line_size, int h);
 void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
@@ -32,10 +36,10 @@ void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
                             ptrdiff_t line_size, int h);
 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);
+void ff_put_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int h);
 void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
                         ptrdiff_t line_size, int h);
-void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
-                           ptrdiff_t line_size, int h);
 void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
                          ptrdiff_t line_size, int h);
 void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
diff --git a/libavcodec/x86/g722dsp.asm b/libavcodec/x86/g722dsp.asm
index 807a1bdd..a5294222 100644
--- a/libavcodec/x86/g722dsp.asm
+++ b/libavcodec/x86/g722dsp.asm
@@ -29,7 +29,7 @@ pw_qmf_coeffs2: dw  12, 3876, -156,  951,   32, -805, 362, -210
 pw_qmf_coeffs3: dw 362,    0 ,  32,    0, -156,    0,  12,    0
 pw_qmf_coeffs4: dw  53,    0,  -11,    0,  -11,    0,   3,    0
 
-SECTION_TEXT
+SECTION .text
 
 INIT_XMM sse2
 cglobal g722_apply_qmf, 2, 2, 5, prev, out
diff --git a/libavcodec/x86/h263_loopfilter.asm b/libavcodec/x86/h263_loopfilter.asm
index 2fcd1a26..77c8cf15 100644
--- a/libavcodec/x86/h263_loopfilter.asm
+++ b/libavcodec/x86/h263_loopfilter.asm
@@ -26,7 +26,7 @@ SECTION_RODATA
 cextern pb_FC
 cextern h263_loop_filter_strength
 
-SECTION_TEXT
+SECTION .text
 
 %macro H263_LOOP_FILTER 5
     pxor         m7, m7
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 14c8205b..4aabbc08 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -446,13 +446,13 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
 ;                        int8_t *tc0)
 ;-----------------------------------------------------------------------------
 INIT_MMX cpuname
-cglobal deblock_h_luma_8, 0,5,8,0x60+HAVE_ALIGNED_STACK*12
+cglobal deblock_h_luma_8, 0,5,8,0x60+12
     mov    r0, r0mp
     mov    r3, r1m
     lea    r4, [r3*3]
     sub    r0, 4
     lea    r1, [r0+r4]
-%define pix_tmp esp+12*HAVE_ALIGNED_STACK
+%define pix_tmp esp+12
 
     ; transpose 6x16 -> tmp space
     TRANSPOSE6x8_MEM  PASS8ROWS(r0, r1, r3, r4), pix_tmp
@@ -864,7 +864,52 @@ ff_chroma_inter_body_mmxext:
     DEBLOCK_P0_Q0
     ret
 
+%define t5 r4
+%define t6 r5
+
+cglobal deblock_h_chroma422_8, 5, 6
+    SUB rsp, (1+ARCH_X86_64*2)*mmsize
+    %if ARCH_X86_64
+        %define buf0 [rsp+16]
+        %define buf1 [rsp+8]
+    %else
+        %define buf0 r0m
+        %define buf1 r2m
+    %endif
+
+    movd m6, [r4]
+    punpcklbw m6, m6
+    movq [rsp], m6
+    CHROMA_H_START
+
+    TRANSPOSE4x8B_LOAD PASS8ROWS(t5, r0, r1, t6)
+    movq buf0, m0
+    movq buf1, m3
+    LOAD_MASK r2d, r3d
+    movd m6, [rsp]
+    punpcklwd m6, m6
+    pand m7, m6
+    DEBLOCK_P0_Q0
+    movq m0, buf0
+    movq m3, buf1
+    TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
 
+    lea r0, [r0+r1*8]
+    lea t5, [t5+r1*8]
+
+    TRANSPOSE4x8B_LOAD PASS8ROWS(t5, r0, r1, t6)
+    movq buf0, m0
+    movq buf1, m3
+    LOAD_MASK r2d, r3d
+    movd m6, [rsp+4]
+    punpcklwd m6, m6
+    pand m7, m6
+    DEBLOCK_P0_Q0
+    movq m0, buf0
+    movq m3, buf1
+    TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
+    ADD rsp, (1+ARCH_X86_64*2)*mmsize
+RET
 
 ; in: %1=p0 %2=p1 %3=q1
 ; out: p0 = (p0 + q1 + 2*p1 + 2) >> 2
@@ -877,9 +922,6 @@ ff_chroma_inter_body_mmxext:
     pavgb   %1, %2             ; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
 %endmacro
 
-%define t5 r4
-%define t6 r5
-
 ;------------------------------------------------------------------------------
 ; void ff_deblock_v_chroma_intra(uint8_t *pix, int stride, int alpha, int beta)
 ;------------------------------------------------------------------------------
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index cc115b0f..f1c2c81e 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -24,14 +24,11 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_RODATA
-
-pd_32:        times 4 dd 32
-
 SECTION .text
 
 cextern pw_1023
 %define pw_pixel_max pw_1023
+cextern pd_32
 
 ;-----------------------------------------------------------------------------
 ; void ff_h264_idct_add_10(pixel *dst, int16_t *block, int stride)
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index 9aeb7024..9e40cfe2 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -34,11 +34,11 @@ cextern pw_8
 cextern pw_4
 cextern pw_2
 cextern pw_1
+cextern pd_16
 
 pw_m32101234: dw -3, -2, -1, 0, 1, 2, 3, 4
 pw_m3:        times 8 dw -3
 pd_17:        times 4 dd 17
-pd_16:        times 4 dd 16
 
 SECTION .text
 
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index 33a7fb03..d759e888 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -23,16 +23,12 @@
 #include "libavutil/cpu.h"
 #include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
+#include "libavcodec/h264.h"
 #include "libavcodec/h264qpel.h"
-#include "libavcodec/mpegvideo.h"
 #include "libavcodec/pixels.h"
 #include "fpel.h"
 
 #if HAVE_YASM
-void ff_put_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
-                        ptrdiff_t line_size, int h);
-void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
-                           ptrdiff_t line_size, int h);
 void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
                               int dstStride, int src1Stride, int h);
 void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm
index 757c4258..87226830 100644
--- a/libavcodec/x86/h264_qpel_10bit.asm
+++ b/libavcodec/x86/h264_qpel_10bit.asm
@@ -26,6 +26,7 @@
 
 SECTION_RODATA 32
 
+cextern pd_65535
 cextern pw_1023
 %define pw_pixel_max pw_1023
 cextern pw_16
@@ -42,7 +43,6 @@ unpad: times 8 dw 16*1022/32 ; needs to be mod 16
 tap1: times 4 dw  1, -5
 tap2: times 4 dw 20, 20
 tap3: times 4 dw -5,  1
-pd_0f: times 4 dd 0xffff
 
 SECTION .text
 
@@ -386,7 +386,7 @@ MC_CACHE MC10
 ; void ff_h264_qpel_mc02(uint8_t *dst, uint8_t *src, int stride)
 ;-----------------------------------------------------------------------------
 %macro V_FILT 10
-v_filt%9_%10_10
+v_filt%9_%10_10:
     add    r4, r2
 .no_addr4:
     FILT_V m0, m1, m2, m3, m4, m5, m6, m7
@@ -708,7 +708,7 @@ h%1_loop_op:
     psrad      m1, 10
     psrad      m2, 10
     pslld      m2, 16
-    pand       m1, [pd_0f]
+    pand       m1, [pd_65535]
     por        m1, m2
 %if num_mmregs <= 8
     pxor       m0, m0
diff --git a/libavcodec/x86/h264_weight.asm b/libavcodec/x86/h264_weight.asm
index 1e1219dd..6c57d57b 100644
--- a/libavcodec/x86/h264_weight.asm
+++ b/libavcodec/x86/h264_weight.asm
@@ -136,15 +136,15 @@ WEIGHT_FUNC_HALF_MM 8, 8
     or   off_regd, 1
     add        r4, 1
     cmp        r6d, 128
-     je .nonnormal
+    je .nonnormal
     cmp        r5, 128
-     jne .normal
-.nonnormal
+    jne .normal
+.nonnormal:
     sar        r5, 1
     sar        r6, 1
     sar  off_regd, 1
     sub        r4, 1
-.normal
+.normal:
 %if cpuflag(ssse3)
     movd       m4, r5d
     movd       m0, r6d
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 35db2001..c8cd0650 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -129,6 +129,8 @@ LF_IFUNC(v, chroma_intra, depth, avx)
 LF_FUNCS(uint8_t,   8)
 LF_FUNCS(uint16_t, 10)
 
+void ff_deblock_h_chroma422_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
+
 #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
 LF_FUNC(v8, luma, 8, mmxext)
 static void deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha,
@@ -245,6 +247,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
             if (chroma_format_idc <= 1) {
                 c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_mmxext;
                 c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
+            } else {
+                c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_mmxext;
             }
 #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
             c->h264_v_loop_filter_luma       = deblock_v_luma_8_mmxext;
diff --git a/libavcodec/x86/hevc_idct.asm b/libavcodec/x86/hevc_idct.asm
index 481726d2..2edaf9ae 100644
--- a/libavcodec/x86/hevc_idct.asm
+++ b/libavcodec/x86/hevc_idct.asm
@@ -21,7 +21,7 @@
 ; */
 %include "libavutil/x86/x86util.asm"
 
-SECTION_TEXT 32
+SECTION .text
 
 ; void ff_hevc_idctHxW_dc_{8,10}_<opt>(int16_t *coeffs)
 ; %1 = HxW
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 986493f2..ff6ed071 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -40,7 +40,6 @@ max_pixels_12:          times 16 dw ((1 << 12)-1)
 cextern pd_1
 cextern pb_0
 
-SECTION_TEXT 32
 %macro EPEL_TABLE 4
 hevc_epel_filters_%4_%1 times %2 d%3 -2, 58
                         times %2 d%3 10, -2
@@ -88,6 +87,8 @@ QPEL_TABLE 12, 4, w, sse4
 QPEL_TABLE  8,16, b, avx2
 QPEL_TABLE 10, 8, w, avx2
 
+SECTION .text
+
 %define MAX_PB_SIZE  64
 
 %define hevc_qpel_filters_sse4_14 hevc_qpel_filters_sse4_10
@@ -683,7 +684,7 @@ HEVC_BI_PEL_PIXELS  %1, %2
 %macro HEVC_PEL_PIXELS 2
 cglobal hevc_put_hevc_pel_pixels%1_%2, 4, 4, 3, dst, src, srcstride,height
     pxor               m2, m2
-.loop
+.loop:
     SIMPLE_LOAD       %1, %2, srcq, m0
     MC_PIXEL_COMPUTE  %1, %2, 1
     PEL_10STORE%1     dstq, m0, m1
@@ -693,7 +694,7 @@ cglobal hevc_put_hevc_pel_pixels%1_%2, 4, 4, 3, dst, src, srcstride,height
 
 %macro HEVC_UNI_PEL_PIXELS 2
 cglobal hevc_put_hevc_uni_pel_pixels%1_%2, 5, 5, 2, dst, dststride, src, srcstride,height
-.loop
+.loop:
     SIMPLE_LOAD       %1, %2, srcq, m0
     PEL_%2STORE%1   dstq, m0, m1
     add             dstq, dststrideq             ; dst += dststride
@@ -707,7 +708,7 @@ cglobal hevc_put_hevc_uni_pel_pixels%1_%2, 5, 5, 2, dst, dststride, src, srcstri
 cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstride, src2, height
     pxor              m2, m2
     movdqa            m5, [pw_bi_%2]
-.loop
+.loop:
     SIMPLE_LOAD       %1, %2, srcq, m0
     SIMPLE_BILOAD     %1, src2q, m3, m4
     MC_PIXEL_COMPUTE  %1, %2, 1
@@ -739,7 +740,7 @@ cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 6, 6, 6, dst, dststride, src, srcstrid
 cglobal hevc_put_hevc_epel_h%1_%2, 5, 6, XMM_REGS, dst, src, srcstride, height, mx, rfilter
 %assign %%stride ((%2 + 7)/8)
     EPEL_FILTER       %2, mx, m4, m5, rfilter
-.loop
+.loop:
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
     PEL_10STORE%1      dstq, m0, m1
@@ -750,7 +751,7 @@ cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, XMM_REGS, dst, dststride, src, srcs
 %assign %%stride ((%2 + 7)/8)
     movdqa            m6, [pw_%2]
     EPEL_FILTER       %2, mx, m4, m5, rfilter
-.loop
+.loop:
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
     UNI_COMPUTE       %1, %2, m0, m1, m6
@@ -764,7 +765,7 @@ cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, XMM_REGS, dst, dststride, src, srcs
 cglobal hevc_put_hevc_bi_epel_h%1_%2, 7, 8, XMM_REGS, dst, dststride, src, srcstride, src2, height, mx, rfilter
     movdqa            m6, [pw_bi_%2]
     EPEL_FILTER       %2, mx, m4, m5, rfilter
-.loop
+.loop:
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
     SIMPLE_BILOAD     %1, src2q, m2, m3
@@ -788,7 +789,7 @@ cglobal hevc_put_hevc_epel_v%1_%2, 4, 6, XMM_REGS, dst, src, srcstride, height,
     sub             srcq, srcstrideq
     EPEL_FILTER       %2, my, m4, m5, r3src
     lea           r3srcq, [srcstrideq*3]
-.loop
+.loop:
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
     PEL_10STORE%1     dstq, m0, m1
@@ -801,7 +802,7 @@ cglobal hevc_put_hevc_uni_epel_v%1_%2, 5, 7, XMM_REGS, dst, dststride, src, srcs
     sub             srcq, srcstrideq
     EPEL_FILTER       %2, my, m4, m5, r3src
     lea           r3srcq, [srcstrideq*3]
-.loop
+.loop:
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5
     UNI_COMPUTE       %1, %2, m0, m1, m6
@@ -819,7 +820,7 @@ cglobal hevc_put_hevc_bi_epel_v%1_%2, 6, 8, XMM_REGS, dst, dststride, src, srcst
     sub             srcq, srcstrideq
     EPEL_FILTER       %2, my, m4, m5, r3src
     lea           r3srcq, [srcstrideq*3]
-.loop
+.loop:
     EPEL_LOAD         %2, srcq, srcstride, %1
     EPEL_COMPUTE      %2, %1, m4, m5, 1
     SIMPLE_BILOAD     %1, src2q, m2, m3
@@ -866,7 +867,7 @@ cglobal hevc_put_hevc_epel_hv%1_%2, 6, 7, 16 , dst, src, srcstride, height, mx,
 %endif
     SWAP              m6, m0
     add             srcq, srcstrideq
-.loop
+.loop:
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m14, m15
 %if (%1 > 8 && (%2 == 8))
@@ -932,7 +933,7 @@ cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 8, 16 , dst, dststride, src, srcstrid
 %endif
     SWAP              m6, m0
     add             srcq, srcstrideq
-.loop
+.loop:
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m14, m15
 %if (%1 > 8 && (%2 == 8))
@@ -996,7 +997,7 @@ cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride,
 %endif
     SWAP              m6, m0
     add             srcq, srcstrideq
-.loop
+.loop:
     EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
     EPEL_COMPUTE      %2, %1, m14, m15
 %if (%1 > 8 && (%2 == 8))
@@ -1054,7 +1055,7 @@ cglobal hevc_put_hevc_bi_epel_hv%1_%2, 8, 9, 16, dst, dststride, src, srcstride,
 %macro HEVC_PUT_HEVC_QPEL 2
 cglobal hevc_put_hevc_qpel_h%1_%2, 5, 6, 16, dst, src, srcstride, height, mx, rfilter
     QPEL_FILTER       %2, mx
-.loop
+.loop:
     QPEL_H_LOAD       %2, srcq, %1, 10
     QPEL_COMPUTE      %1, %2, 1
 %if %2 > 8
@@ -1067,7 +1068,7 @@ cglobal hevc_put_hevc_qpel_h%1_%2, 5, 6, 16, dst, src, srcstride, height, mx, rf
 cglobal hevc_put_hevc_uni_qpel_h%1_%2, 6, 7, 16 , dst, dststride, src, srcstride, height, mx, rfilter
     mova              m9, [pw_%2]
     QPEL_FILTER       %2, mx
-.loop
+.loop:
     QPEL_H_LOAD       %2, srcq, %1, 10
     QPEL_COMPUTE      %1, %2
 %if %2 > 8
@@ -1084,7 +1085,7 @@ cglobal hevc_put_hevc_uni_qpel_h%1_%2, 6, 7, 16 , dst, dststride, src, srcstride
 cglobal hevc_put_hevc_bi_qpel_h%1_%2, 7, 8, 16 , dst, dststride, src, srcstride, src2, height, mx, rfilter
     movdqa            m9, [pw_bi_%2]
     QPEL_FILTER       %2, mx
-.loop
+.loop:
     QPEL_H_LOAD       %2, srcq, %1, 10
     QPEL_COMPUTE      %1, %2, 1
 %if %2 > 8
@@ -1111,7 +1112,7 @@ cglobal hevc_put_hevc_qpel_v%1_%2, 4, 8, 16, dst, src, srcstride, height, r3src,
     movifnidn        myd, mym
     lea           r3srcq, [srcstrideq*3]
     QPEL_FILTER       %2, my
-.loop
+.loop:
     QPEL_V_LOAD       %2, srcq, srcstride, %1, r7
     QPEL_COMPUTE      %1, %2, 1
 %if %2 > 8
@@ -1126,7 +1127,7 @@ cglobal hevc_put_hevc_uni_qpel_v%1_%2, 5, 9, 16, dst, dststride, src, srcstride,
     movdqa            m9, [pw_%2]
     lea           r3srcq, [srcstrideq*3]
     QPEL_FILTER       %2, my
-.loop
+.loop:
     QPEL_V_LOAD       %2, srcq, srcstride, %1, r8
     QPEL_COMPUTE      %1, %2
 %if %2 > 8
@@ -1145,7 +1146,7 @@ cglobal hevc_put_hevc_bi_qpel_v%1_%2, 6, 10, 16, dst, dststride, src, srcstride,
     movdqa            m9, [pw_bi_%2]
     lea           r3srcq, [srcstrideq*3]
     QPEL_FILTER       %2, my
-.loop
+.loop:
     QPEL_V_LOAD       %2, srcq, srcstride, %1, r9
     QPEL_COMPUTE      %1, %2, 1
 %if %2 > 8
@@ -1209,7 +1210,7 @@ cglobal hevc_put_hevc_qpel_hv%1_%2, 6, 8, 16, dst, src, srcstride, height, mx, m
     QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
     SWAP             m14, m0
     add             srcq, srcstrideq
-.loop
+.loop:
     QPEL_H_LOAD       %2, srcq, %1, 15
     QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
     SWAP             m15, m0
@@ -1285,7 +1286,7 @@ cglobal hevc_put_hevc_uni_qpel_hv%1_%2, 7, 9, 16 , dst, dststride, src, srcstrid
     QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
     SWAP             m14, m0
     add             srcq, srcstrideq
-.loop
+.loop:
     QPEL_H_LOAD       %2, srcq, %1, 15
     QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
     SWAP             m15, m0
@@ -1366,7 +1367,7 @@ cglobal hevc_put_hevc_bi_qpel_hv%1_%2, 8, 10, 16, dst, dststride, src, srcstride
     QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
     SWAP             m14, m0
     add             srcq, srcstrideq
-.loop
+.loop:
     QPEL_H_LOAD       %2, srcq, %1, 15
     QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
     SWAP             m15, m0
@@ -1444,7 +1445,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, height, denom, w
 %if WIN64 || ARCH_X86_32
     mov           SHIFT, heightm
 %endif
-.loop
+.loop:
    SIMPLE_LOAD        %1, 10, srcq, m0
 %if %1 <= 4
     punpcklwd         m0, m1
@@ -1513,7 +1514,7 @@ cglobal hevc_put_hevc_bi_w%1_%2, 4, 6, 10, dst, dststride, src, src2, height, de
 %endif
     pslld             m4, m0
 
-.loop
+.loop:
    SIMPLE_LOAD        %1, 10, srcq,  m0
    SIMPLE_LOAD        %1, 10, src2q, m8
 %if %1 <= 4
diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index 86ef847b..888a28af 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -1,5 +1,5 @@
 ;******************************************************************************
-;* SIMD optimized SAO functions for HEVC decoding
+;* SIMD optimized SAO functions for HEVC 8bit decoding
 ;*
 ;* Copyright (c) 2013 Pierre-Edouard LEPERE
 ;* Copyright (c) 2014 James Almer
@@ -25,27 +25,18 @@
 
 SECTION_RODATA 32
 
-pw_mask10: times 16 dw 0x03FF
-pw_mask12: times 16 dw 0x0FFF
-pw_m2:     times 16 dw -2
 pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
 pb_eo:                   db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
-cextern pw_m1
-cextern pw_1
-cextern pw_2
 cextern pb_1
 cextern pb_2
 
-SECTION_TEXT
-
-%define MAX_PB_SIZE  64
-%define PADDING_SIZE 32 ; FF_INPUT_BUFFER_PADDING_SIZE
+SECTION .text
 
 ;******************************************************************************
 ;SAO Band Filter
 ;******************************************************************************
 
-%macro HEVC_SAO_BAND_FILTER_INIT 1
+%macro HEVC_SAO_BAND_FILTER_INIT 0
     and            leftq, 31
     movd             xm0, leftd
     add            leftq, 1
@@ -76,9 +67,6 @@ SECTION_TEXT
 %endif
 
 %if ARCH_X86_64
-%if %1 > 8
-    mova             m13, [pw_mask %+ %1]
-%endif
     pxor             m14, m14
 
 %else ; ARCH_X86_32
@@ -90,9 +78,6 @@ SECTION_TEXT
     mova  [rsp+mmsize*5], m5
     mova  [rsp+mmsize*6], m6
     pxor              m0, m0
-%if %1 > 8
-    mova              m1, [pw_mask %+ %1]
-%endif
     %assign MMSIZE mmsize
     %define m14 m0
     %define m13 m1
@@ -103,49 +88,49 @@ DEFINE_ARGS dst, src, dststride, srcstride, offset, height
     mov          heightd, r7m
 %endmacro
 
-%macro HEVC_SAO_BAND_FILTER_COMPUTE 3
-    psraw             %2, %3, %1-5
+%macro HEVC_SAO_BAND_FILTER_COMPUTE 2
+    psraw             %1, %2, 3
 %if ARCH_X86_64
-    pcmpeqw          m10, %2, m0
-    pcmpeqw          m11, %2, m1
-    pcmpeqw          m12, %2, m2
-    pcmpeqw           %2, m3
+    pcmpeqw          m10, %1, m0
+    pcmpeqw          m11, %1, m1
+    pcmpeqw          m12, %1, m2
+    pcmpeqw           %1, m3
     pand             m10, m4
     pand             m11, m5
     pand             m12, m6
-    pand              %2, m7
+    pand              %1, m7
     por              m10, m11
-    por              m12, %2
+    por              m12, %1
     por              m10, m12
-    paddw             %3, m10
+    paddw             %2, m10
 %else ; ARCH_X86_32
-    pcmpeqw           m4, %2, [rsp+MMSIZE*0]
-    pcmpeqw           m5, %2, [rsp+MMSIZE*1]
-    pcmpeqw           m6, %2, [rsp+MMSIZE*2]
-    pcmpeqw           %2, [rsp+MMSIZE*3]
+    pcmpeqw           m4, %1, [rsp+MMSIZE*0]
+    pcmpeqw           m5, %1, [rsp+MMSIZE*1]
+    pcmpeqw           m6, %1, [rsp+MMSIZE*2]
+    pcmpeqw           %1, [rsp+MMSIZE*3]
     pand              m4, [rsp+MMSIZE*4]
     pand              m5, [rsp+MMSIZE*5]
     pand              m6, [rsp+MMSIZE*6]
-    pand              %2, m7
+    pand              %1, m7
     por               m4, m5
-    por               m6, %2
+    por               m6, %1
     por               m4, m6
-    paddw             %3, m4
+    paddw             %2, m4
 %endif ; ARCH
 %endmacro
 
 ;void ff_hevc_sao_band_filter_<width>_8_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
 ;                                             int16_t *sao_offset_val, int sao_left_class, int width, int height);
-%macro HEVC_SAO_BAND_FILTER_8 2
+%macro HEVC_SAO_BAND_FILTER 2
 cglobal hevc_sao_band_filter_%1_8, 6, 6, 15, 7*mmsize*ARCH_X86_32, dst, src, dststride, srcstride, offset, left
-    HEVC_SAO_BAND_FILTER_INIT 8
+    HEVC_SAO_BAND_FILTER_INIT
 
 align 16
-.loop
+.loop:
 %if %1 == 8
     movq              m8, [srcq]
     punpcklbw         m8, m14
-    HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m8
+    HEVC_SAO_BAND_FILTER_COMPUTE m9, m8
     packuswb          m8, m14
     movq          [dstq], m8
 %endif ; %1 == 8
@@ -154,9 +139,9 @@ align 16
 %rep %2
     mova             m13, [srcq + i]
     punpcklbw         m8, m13, m14
-    HEVC_SAO_BAND_FILTER_COMPUTE 8, m9,  m8
+    HEVC_SAO_BAND_FILTER_COMPUTE m9,  m8
     punpckhbw        m13, m14
-    HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
+    HEVC_SAO_BAND_FILTER_COMPUTE m9, m13
     packuswb          m8, m13
     mova      [dstq + i], m8
 %assign i i+mmsize
@@ -167,9 +152,9 @@ INIT_XMM cpuname
 
     mova             m13, [srcq + i]
     punpcklbw         m8, m13, m14
-    HEVC_SAO_BAND_FILTER_COMPUTE 8, m9,  m8
+    HEVC_SAO_BAND_FILTER_COMPUTE m9,  m8
     punpckhbw        m13, m14
-    HEVC_SAO_BAND_FILTER_COMPUTE 8, m9, m13
+    HEVC_SAO_BAND_FILTER_COMPUTE m9, m13
     packuswb          m8, m13
     mova      [dstq + i], m8
 %if cpuflag(avx2)
@@ -184,76 +169,13 @@ INIT_YMM cpuname
     REP_RET
 %endmacro
 
-;void ff_hevc_sao_band_filter_<width>_<depth>_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
-;                                                   int16_t *sao_offset_val, int sao_left_class, int width, int height);
-%macro HEVC_SAO_BAND_FILTER_16 3
-cglobal hevc_sao_band_filter_%2_%1, 6, 6, 15, 7*mmsize*ARCH_X86_32, dst, src, dststride, srcstride, offset, left
-    HEVC_SAO_BAND_FILTER_INIT %1
-
-align 16
-.loop
-%if %2 == 8
-    movu              m8, [srcq]
-    HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
-    CLIPW             m8, m14, m13
-    movu          [dstq], m8
-%endif
-
-%assign i 0
-%rep %3
-    mova              m8, [srcq + i]
-    HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
-    CLIPW             m8, m14, m13
-    mova      [dstq + i], m8
-
-    mova              m9, [srcq + i + mmsize]
-    HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
-    CLIPW             m9, m14, m13
-    mova      [dstq + i + mmsize], m9
-%assign i i+mmsize*2
-%endrep
-
-%if %2 == 48
-INIT_XMM cpuname
-    mova              m8, [srcq + i]
-    HEVC_SAO_BAND_FILTER_COMPUTE %1, m9, m8
-    CLIPW             m8, m14, m13
-    mova      [dstq + i], m8
-
-    mova              m9, [srcq + i + mmsize]
-    HEVC_SAO_BAND_FILTER_COMPUTE %1, m8, m9
-    CLIPW             m9, m14, m13
-    mova      [dstq + i + mmsize], m9
-%if cpuflag(avx2)
-INIT_YMM cpuname
-%endif
-%endif ; %1 == 48
-
-    add             dstq, dststrideq
-    add             srcq, srcstrideq
-    dec          heightd
-    jg .loop
-    REP_RET
-%endmacro
 
 %macro HEVC_SAO_BAND_FILTER_FUNCS 0
-HEVC_SAO_BAND_FILTER_8       8, 0
-HEVC_SAO_BAND_FILTER_8      16, 1
-HEVC_SAO_BAND_FILTER_8      32, 2
-HEVC_SAO_BAND_FILTER_8      48, 2
-HEVC_SAO_BAND_FILTER_8      64, 4
-
-HEVC_SAO_BAND_FILTER_16 10,  8, 0
-HEVC_SAO_BAND_FILTER_16 10, 16, 1
-HEVC_SAO_BAND_FILTER_16 10, 32, 2
-HEVC_SAO_BAND_FILTER_16 10, 48, 2
-HEVC_SAO_BAND_FILTER_16 10, 64, 4
-
-HEVC_SAO_BAND_FILTER_16 12,  8, 0
-HEVC_SAO_BAND_FILTER_16 12, 16, 1
-HEVC_SAO_BAND_FILTER_16 12, 32, 2
-HEVC_SAO_BAND_FILTER_16 12, 48, 2
-HEVC_SAO_BAND_FILTER_16 12, 64, 4
+HEVC_SAO_BAND_FILTER  8, 0
+HEVC_SAO_BAND_FILTER 16, 1
+HEVC_SAO_BAND_FILTER 32, 2
+HEVC_SAO_BAND_FILTER 48, 2
+HEVC_SAO_BAND_FILTER 64, 4
 %endmacro
 
 INIT_XMM sse2
@@ -263,37 +185,23 @@ HEVC_SAO_BAND_FILTER_FUNCS
 
 %if HAVE_AVX2_EXTERNAL
 INIT_XMM avx2
-HEVC_SAO_BAND_FILTER_8       8, 0
-HEVC_SAO_BAND_FILTER_8      16, 1
-INIT_YMM avx2
-HEVC_SAO_BAND_FILTER_8      32, 1
-HEVC_SAO_BAND_FILTER_8      48, 1
-HEVC_SAO_BAND_FILTER_8      64, 2
-
-INIT_XMM avx2
-HEVC_SAO_BAND_FILTER_16 10,  8, 0
-HEVC_SAO_BAND_FILTER_16 10, 16, 1
+HEVC_SAO_BAND_FILTER  8, 0
+HEVC_SAO_BAND_FILTER 16, 1
 INIT_YMM avx2
-HEVC_SAO_BAND_FILTER_16 10, 32, 1
-HEVC_SAO_BAND_FILTER_16 10, 48, 1
-HEVC_SAO_BAND_FILTER_16 10, 64, 2
-
-INIT_XMM avx2
-HEVC_SAO_BAND_FILTER_16 12,  8, 0
-HEVC_SAO_BAND_FILTER_16 12, 16, 1
-INIT_YMM avx2
-HEVC_SAO_BAND_FILTER_16 12, 32, 1
-HEVC_SAO_BAND_FILTER_16 12, 48, 1
-HEVC_SAO_BAND_FILTER_16 12, 64, 2
+HEVC_SAO_BAND_FILTER 32, 1
+HEVC_SAO_BAND_FILTER 48, 1
+HEVC_SAO_BAND_FILTER 64, 2
 %endif
 
 ;******************************************************************************
 ;SAO Edge Filter
 ;******************************************************************************
 
+%define MAX_PB_SIZE  64
+%define PADDING_SIZE 32 ; AV_INPUT_BUFFER_PADDING_SIZE
 %define EDGE_SRCSTRIDE 2 * MAX_PB_SIZE + PADDING_SIZE
 
-%macro HEVC_SAO_EDGE_FILTER_INIT 1
+%macro HEVC_SAO_EDGE_FILTER_INIT 0
 %if WIN64
     movsxd           eoq, dword eom
 %elif ARCH_X86_64
@@ -304,15 +212,15 @@ HEVC_SAO_BAND_FILTER_16 12, 64, 2
     lea            tmp2q, [pb_eo]
     movsx      a_strideq, byte [tmp2q+eoq*4+1]
     movsx      b_strideq, byte [tmp2q+eoq*4+3]
-    imul       a_strideq, EDGE_SRCSTRIDE>>%1
-    imul       b_strideq, EDGE_SRCSTRIDE>>%1
+    imul       a_strideq, EDGE_SRCSTRIDE
+    imul       b_strideq, EDGE_SRCSTRIDE
     movsx           tmpq, byte [tmp2q+eoq*4]
     add        a_strideq, tmpq
     movsx           tmpq, byte [tmp2q+eoq*4+2]
     add        b_strideq, tmpq
 %endmacro
 
-%macro HEVC_SAO_EDGE_FILTER_COMPUTE_8 1
+%macro HEVC_SAO_EDGE_FILTER_COMPUTE 1
     pminub            m4, m1, m2
     pminub            m5, m1, m3
     pcmpeqb           m2, m4
@@ -343,11 +251,11 @@ HEVC_SAO_BAND_FILTER_16 12, 64, 2
 
 ;void ff_hevc_sao_edge_filter_<width>_8_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
 ;                                             int eo, int width, int height);
-%macro HEVC_SAO_EDGE_FILTER_8 2-3
+%macro HEVC_SAO_EDGE_FILTER 2-3
 %if ARCH_X86_64
 cglobal hevc_sao_edge_filter_%1_8, 4, 9, 8, dst, src, dststride, offset, eo, a_stride, b_stride, height, tmp
 %define tmp2q heightq
-    HEVC_SAO_EDGE_FILTER_INIT 0
+    HEVC_SAO_EDGE_FILTER_INIT
     mov          heightd, r6m
 
 %else ; ARCH_X86_32
@@ -356,7 +264,7 @@ cglobal hevc_sao_edge_filter_%1_8, 1, 6, 8, dst, src, dststride, a_stride, b_str
 %define tmpq  heightq
 %define tmp2q dststrideq
 %define offsetq heightq
-    HEVC_SAO_EDGE_FILTER_INIT 0
+    HEVC_SAO_EDGE_FILTER_INIT
     mov             srcq, srcm
     mov          offsetq, r3m
     mov       dststrideq, dststridem
@@ -383,7 +291,7 @@ align 16
     movq              m1, [srcq]
     movq              m2, [srcq + a_strideq]
     movq              m3, [srcq + b_strideq]
-    HEVC_SAO_EDGE_FILTER_COMPUTE_8 %1
+    HEVC_SAO_EDGE_FILTER_COMPUTE %1
     movq          [dstq], m3
 %endif
 
@@ -392,7 +300,7 @@ align 16
     mova              m1, [srcq + i]
     movu              m2, [srcq + a_strideq + i]
     movu              m3, [srcq + b_strideq + i]
-    HEVC_SAO_EDGE_FILTER_COMPUTE_8 %1
+    HEVC_SAO_EDGE_FILTER_COMPUTE %1
     mov%3     [dstq + i], m3
 %assign i i+mmsize
 %endrep
@@ -403,7 +311,7 @@ INIT_XMM cpuname
     mova              m1, [srcq + i]
     movu              m2, [srcq + a_strideq + i]
     movu              m3, [srcq + b_strideq + i]
-    HEVC_SAO_EDGE_FILTER_COMPUTE_8 %1
+    HEVC_SAO_EDGE_FILTER_COMPUTE %1
     mova      [dstq + i], m3
 %if cpuflag(avx2)
 INIT_YMM cpuname
@@ -417,208 +325,16 @@ INIT_YMM cpuname
     RET
 %endmacro
 
-%macro PMINUW 4
-%if cpuflag(sse4)
-    pminuw            %1, %2, %3
-%else
-    psubusw           %4, %2, %3
-    psubw             %1, %2, %4
-%endif
-%endmacro
-
-%macro HEVC_SAO_EDGE_FILTER_COMPUTE_10 0
-    PMINUW            m4, m1, m2, m6
-    PMINUW            m5, m1, m3, m7
-    pcmpeqw           m2, m4
-    pcmpeqw           m3, m5
-    pcmpeqw           m4, m1
-    pcmpeqw           m5, m1
-    psubw             m4, m2
-    psubw             m5, m3
-
-    paddw             m4, m5
-    pcmpeqw           m2, m4, [pw_m2]
-%if ARCH_X86_64
-    pcmpeqw           m3, m4, m13
-    pcmpeqw           m5, m4, m0
-    pcmpeqw           m6, m4, m14
-    pcmpeqw           m7, m4, m15
-    pand              m2, m8
-    pand              m3, m9
-    pand              m5, m10
-    pand              m6, m11
-    pand              m7, m12
-%else
-    pcmpeqw           m3, m4, [pw_m1]
-    pcmpeqw           m5, m4, m0
-    pcmpeqw           m6, m4, [pw_1]
-    pcmpeqw           m7, m4, [pw_2]
-    pand              m2, [rsp+MMSIZE*0]
-    pand              m3, [rsp+MMSIZE*1]
-    pand              m5, [rsp+MMSIZE*2]
-    pand              m6, [rsp+MMSIZE*3]
-    pand              m7, [rsp+MMSIZE*4]
-%endif
-    paddw             m2, m3
-    paddw             m5, m6
-    paddw             m2, m7
-    paddw             m2, m1
-    paddw             m2, m5
-%endmacro
-
-;void ff_hevc_sao_edge_filter_<width>_<depth>_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
-;                                                   int eo, int width, int height);
-%macro HEVC_SAO_EDGE_FILTER_16 3
-%if ARCH_X86_64
-cglobal hevc_sao_edge_filter_%2_%1, 4, 9, 16, dst, src, dststride, offset, eo, a_stride, b_stride, height, tmp
-%define tmp2q heightq
-    HEVC_SAO_EDGE_FILTER_INIT 1
-    mov          heightd, r6m
-    add        a_strideq, a_strideq
-    add        b_strideq, b_strideq
-
-%else ; ARCH_X86_32
-cglobal hevc_sao_edge_filter_%2_%1, 1, 6, 8, 5*mmsize, dst, src, dststride, a_stride, b_stride, height
-%assign MMSIZE mmsize
-%define eoq   srcq
-%define tmpq  heightq
-%define tmp2q dststrideq
-%define offsetq heightq
-%define m8 m1
-%define m9 m2
-%define m10 m3
-%define m11 m4
-%define m12 m5
-    HEVC_SAO_EDGE_FILTER_INIT 1
-    mov             srcq, srcm
-    mov          offsetq, r3m
-    mov       dststrideq, dststridem
-    add        a_strideq, a_strideq
-    add        b_strideq, b_strideq
-
-%endif ; ARCH
-
-%if cpuflag(avx2)
-    SPLATW            m8, [offsetq+2]
-    SPLATW            m9, [offsetq+4]
-    SPLATW           m10, [offsetq+0]
-    SPLATW           m11, [offsetq+6]
-    SPLATW           m12, [offsetq+8]
-%else
-    movq             m10, [offsetq+0]
-    movd             m12, [offsetq+6]
-    SPLATW            m8, xm10, 1
-    SPLATW            m9, xm10, 2
-    SPLATW           m10, xm10, 0
-    SPLATW           m11, xm12, 0
-    SPLATW           m12, xm12, 1
-%endif
-    pxor              m0, m0
-%if ARCH_X86_64
-    mova             m13, [pw_m1]
-    mova             m14, [pw_1]
-    mova             m15, [pw_2]
-%else
-    mov          heightd, r6m
-    mova  [rsp+mmsize*0], m8
-    mova  [rsp+mmsize*1], m9
-    mova  [rsp+mmsize*2], m10
-    mova  [rsp+mmsize*3], m11
-    mova  [rsp+mmsize*4], m12
-%endif
-
-align 16
-.loop
-
-%if %2 == 8
-    mova              m1, [srcq]
-    movu              m2, [srcq+a_strideq]
-    movu              m3, [srcq+b_strideq]
-
-    HEVC_SAO_EDGE_FILTER_COMPUTE_10
-    CLIPW             m2, m0, [pw_mask %+ %1]
-    movu          [dstq], m2
-%endif
-
-%assign i 0
-%rep %3
-    mova              m1, [srcq + i]
-    movu              m2, [srcq+a_strideq + i]
-    movu              m3, [srcq+b_strideq + i]
-    HEVC_SAO_EDGE_FILTER_COMPUTE_10
-    CLIPW             m2, m0, [pw_mask %+ %1]
-    mova      [dstq + i], m2
-
-    mova              m1, [srcq + i + mmsize]
-    movu              m2, [srcq+a_strideq + i + mmsize]
-    movu              m3, [srcq+b_strideq + i + mmsize]
-    HEVC_SAO_EDGE_FILTER_COMPUTE_10
-    CLIPW             m2, m0, [pw_mask %+ %1]
-    mova [dstq + i + mmsize], m2
-%assign i i+mmsize*2
-%endrep
-
-%if %2 == 48
-INIT_XMM cpuname
-    mova              m1, [srcq + i]
-    movu              m2, [srcq+a_strideq + i]
-    movu              m3, [srcq+b_strideq + i]
-    HEVC_SAO_EDGE_FILTER_COMPUTE_10
-    CLIPW             m2, m0, [pw_mask %+ %1]
-    mova              [dstq + i], m2
-
-    mova              m1, [srcq + i + mmsize]
-    movu              m2, [srcq+a_strideq + i + mmsize]
-    movu              m3, [srcq+b_strideq + i + mmsize]
-    HEVC_SAO_EDGE_FILTER_COMPUTE_10
-    CLIPW             m2, m0, [pw_mask %+ %1]
-    mova [dstq + i + mmsize], m2
-%if cpuflag(avx2)
-INIT_YMM cpuname
-%endif
-%endif
-
-    add             dstq, dststrideq
-    add             srcq, EDGE_SRCSTRIDE
-    dec          heightd
-    jg .loop
-    RET
-%endmacro
-
 INIT_XMM ssse3
-HEVC_SAO_EDGE_FILTER_8       8, 0
-HEVC_SAO_EDGE_FILTER_8      16, 1, a
-HEVC_SAO_EDGE_FILTER_8      32, 2, a
-HEVC_SAO_EDGE_FILTER_8      48, 2, a
-HEVC_SAO_EDGE_FILTER_8      64, 4, a
-
-%if HAVE_AVX2_EXTERNAL
-INIT_YMM avx2
-HEVC_SAO_EDGE_FILTER_8      32, 1, a
-HEVC_SAO_EDGE_FILTER_8      48, 1, u
-HEVC_SAO_EDGE_FILTER_8      64, 2, a
-%endif
-
-INIT_XMM sse2
-HEVC_SAO_EDGE_FILTER_16 10,  8, 0
-HEVC_SAO_EDGE_FILTER_16 10, 16, 1
-HEVC_SAO_EDGE_FILTER_16 10, 32, 2
-HEVC_SAO_EDGE_FILTER_16 10, 48, 2
-HEVC_SAO_EDGE_FILTER_16 10, 64, 4
-
-HEVC_SAO_EDGE_FILTER_16 12,  8, 0
-HEVC_SAO_EDGE_FILTER_16 12, 16, 1
-HEVC_SAO_EDGE_FILTER_16 12, 32, 2
-HEVC_SAO_EDGE_FILTER_16 12, 48, 2
-HEVC_SAO_EDGE_FILTER_16 12, 64, 4
+HEVC_SAO_EDGE_FILTER  8, 0
+HEVC_SAO_EDGE_FILTER 16, 1, a
+HEVC_SAO_EDGE_FILTER 32, 2, a
+HEVC_SAO_EDGE_FILTER 48, 2, a
+HEVC_SAO_EDGE_FILTER 64, 4, a
 
 %if HAVE_AVX2_EXTERNAL
 INIT_YMM avx2
-HEVC_SAO_EDGE_FILTER_16 10, 32, 1
-HEVC_SAO_EDGE_FILTER_16 10, 48, 1
-HEVC_SAO_EDGE_FILTER_16 10, 64, 2
-
-HEVC_SAO_EDGE_FILTER_16 12, 32, 1
-HEVC_SAO_EDGE_FILTER_16 12, 48, 1
-HEVC_SAO_EDGE_FILTER_16 12, 64, 2
+HEVC_SAO_EDGE_FILTER 32, 1, a
+HEVC_SAO_EDGE_FILTER 48, 1, u
+HEVC_SAO_EDGE_FILTER 64, 2, a
 %endif
diff --git a/libavcodec/x86/hevc_sao_10bit.asm b/libavcodec/x86/hevc_sao_10bit.asm
new file mode 100644
index 00000000..f81e2d50
--- /dev/null
+++ b/libavcodec/x86/hevc_sao_10bit.asm
@@ -0,0 +1,370 @@
+;******************************************************************************
+;* SIMD optimized SAO functions for HEVC 10/12bit decoding
+;*
+;* Copyright (c) 2013 Pierre-Edouard LEPERE
+;* Copyright (c) 2014 James Almer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pw_m2:     times 16 dw -2
+pw_mask10: times 16 dw 0x03FF
+pw_mask12: times 16 dw 0x0FFF
+pb_eo:              db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
+cextern pw_m1
+cextern pw_1
+cextern pw_2
+
+SECTION .text
+
+;******************************************************************************
+;SAO Band Filter
+;******************************************************************************
+
+%macro HEVC_SAO_BAND_FILTER_INIT 1
+    and            leftq, 31
+    movd             xm0, leftd
+    add            leftq, 1
+    and            leftq, 31
+    movd             xm1, leftd
+    add            leftq, 1
+    and            leftq, 31
+    movd             xm2, leftd
+    add            leftq, 1
+    and            leftq, 31
+    movd             xm3, leftd
+
+    SPLATW            m0, xm0
+    SPLATW            m1, xm1
+    SPLATW            m2, xm2
+    SPLATW            m3, xm3
+%if mmsize > 16
+    SPLATW            m4, [offsetq + 2]
+    SPLATW            m5, [offsetq + 4]
+    SPLATW            m6, [offsetq + 6]
+    SPLATW            m7, [offsetq + 8]
+%else
+    movq              m7, [offsetq + 2]
+    SPLATW            m4, m7, 0
+    SPLATW            m5, m7, 1
+    SPLATW            m6, m7, 2
+    SPLATW            m7, m7, 3
+%endif
+
+%if ARCH_X86_64
+    mova             m13, [pw_mask %+ %1]
+    pxor             m14, m14
+
+%else ; ARCH_X86_32
+    mova  [rsp+mmsize*0], m0
+    mova  [rsp+mmsize*1], m1
+    mova  [rsp+mmsize*2], m2
+    mova  [rsp+mmsize*3], m3
+    mova  [rsp+mmsize*4], m4
+    mova  [rsp+mmsize*5], m5
+    mova  [rsp+mmsize*6], m6
+    mova              m1, [pw_mask %+ %1]
+    pxor              m0, m0
+    %define m14 m0
+    %define m13 m1
+    %define  m9 m2
+    %define  m8 m3
+%endif ; ARCH
+DEFINE_ARGS dst, src, dststride, srcstride, offset, height
+    mov          heightd, r7m
+%endmacro
+
+;void ff_hevc_sao_band_filter_<width>_<depth>_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
+;                                                   int16_t *sao_offset_val, int sao_left_class, int width, int height);
+%macro HEVC_SAO_BAND_FILTER 3
+cglobal hevc_sao_band_filter_%2_%1, 6, 6, 15, 7*mmsize*ARCH_X86_32, dst, src, dststride, srcstride, offset, left
+    HEVC_SAO_BAND_FILTER_INIT %1
+
+align 16
+.loop:
+
+%assign i 0
+%assign j 0
+%rep %3
+%assign k 8+(j&1)
+%assign l 9-(j&1)
+    mova          m %+ k, [srcq + i]
+    psraw         m %+ l, m %+ k, %1-5
+%if ARCH_X86_64
+    pcmpeqw          m10, m %+ l, m0
+    pcmpeqw          m11, m %+ l, m1
+    pcmpeqw          m12, m %+ l, m2
+    pcmpeqw       m %+ l, m3
+    pand             m10, m4
+    pand             m11, m5
+    pand             m12, m6
+    pand          m %+ l, m7
+    por              m10, m11
+    por              m12, m %+ l
+    por              m10, m12
+    paddw         m %+ k, m10
+%else ; ARCH_X86_32
+    pcmpeqw           m4, m %+ l, [rsp+mmsize*0]
+    pcmpeqw           m5, m %+ l, [rsp+mmsize*1]
+    pcmpeqw           m6, m %+ l, [rsp+mmsize*2]
+    pcmpeqw       m %+ l, [rsp+mmsize*3]
+    pand              m4, [rsp+mmsize*4]
+    pand              m5, [rsp+mmsize*5]
+    pand              m6, [rsp+mmsize*6]
+    pand          m %+ l, m7
+    por               m4, m5
+    por               m6, m %+ l
+    por               m4, m6
+    paddw         m %+ k, m4
+%endif ; ARCH
+    CLIPW             m %+ k, m14, m13
+    mova      [dstq + i], m %+ k
+%assign i i+mmsize
+%assign j j+1
+%endrep
+
+    add             dstq, dststrideq
+    add             srcq, srcstrideq
+    dec          heightd
+    jg .loop
+    REP_RET
+%endmacro
+
+%macro HEVC_SAO_BAND_FILTER_FUNCS 0
+HEVC_SAO_BAND_FILTER 10,  8, 1
+HEVC_SAO_BAND_FILTER 10, 16, 2
+HEVC_SAO_BAND_FILTER 10, 32, 4
+HEVC_SAO_BAND_FILTER 10, 48, 6
+HEVC_SAO_BAND_FILTER 10, 64, 8
+
+HEVC_SAO_BAND_FILTER 12,  8, 1
+HEVC_SAO_BAND_FILTER 12, 16, 2
+HEVC_SAO_BAND_FILTER 12, 32, 4
+HEVC_SAO_BAND_FILTER 12, 48, 6
+HEVC_SAO_BAND_FILTER 12, 64, 8
+%endmacro
+
+INIT_XMM sse2
+HEVC_SAO_BAND_FILTER_FUNCS
+INIT_XMM avx
+HEVC_SAO_BAND_FILTER_FUNCS
+
+%if HAVE_AVX2_EXTERNAL
+INIT_XMM avx2
+HEVC_SAO_BAND_FILTER 10,  8, 1
+INIT_YMM avx2
+HEVC_SAO_BAND_FILTER 10, 16, 1
+HEVC_SAO_BAND_FILTER 10, 32, 2
+HEVC_SAO_BAND_FILTER 10, 48, 3
+HEVC_SAO_BAND_FILTER 10, 64, 4
+
+INIT_XMM avx2
+HEVC_SAO_BAND_FILTER 12,  8, 1
+INIT_YMM avx2
+HEVC_SAO_BAND_FILTER 12, 16, 1
+HEVC_SAO_BAND_FILTER 12, 32, 2
+HEVC_SAO_BAND_FILTER 12, 48, 3
+HEVC_SAO_BAND_FILTER 12, 64, 4
+%endif
+
+;******************************************************************************
+;SAO Edge Filter
+;******************************************************************************
+
+%define MAX_PB_SIZE  64
+%define PADDING_SIZE 32 ; AV_INPUT_BUFFER_PADDING_SIZE
+%define EDGE_SRCSTRIDE 2 * MAX_PB_SIZE + PADDING_SIZE
+
+%macro PMINUW 4
+%if cpuflag(sse4)
+    pminuw            %1, %2, %3
+%else
+    psubusw           %4, %2, %3
+    psubw             %1, %2, %4
+%endif
+%endmacro
+
+%macro HEVC_SAO_EDGE_FILTER_INIT 0
+%if WIN64
+    movsxd           eoq, dword eom
+%elif ARCH_X86_64
+    movsxd           eoq, eod
+%else
+    mov              eoq, r4m
+%endif
+    lea            tmp2q, [pb_eo]
+    movsx      a_strideq, byte [tmp2q+eoq*4+1]
+    movsx      b_strideq, byte [tmp2q+eoq*4+3]
+    imul       a_strideq, EDGE_SRCSTRIDE >> 1
+    imul       b_strideq, EDGE_SRCSTRIDE >> 1
+    movsx           tmpq, byte [tmp2q+eoq*4]
+    add        a_strideq, tmpq
+    movsx           tmpq, byte [tmp2q+eoq*4+2]
+    add        b_strideq, tmpq
+%endmacro
+
+;void ff_hevc_sao_edge_filter_<width>_<depth>_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
+;                                                   int eo, int width, int height);
+%macro HEVC_SAO_EDGE_FILTER 3
+%if ARCH_X86_64
+cglobal hevc_sao_edge_filter_%2_%1, 4, 9, 16, dst, src, dststride, offset, eo, a_stride, b_stride, height, tmp
+%define tmp2q heightq
+    HEVC_SAO_EDGE_FILTER_INIT
+    mov          heightd, r6m
+    add        a_strideq, a_strideq
+    add        b_strideq, b_strideq
+
+%else ; ARCH_X86_32
+cglobal hevc_sao_edge_filter_%2_%1, 1, 6, 8, 5*mmsize, dst, src, dststride, a_stride, b_stride, height
+%define eoq   srcq
+%define tmpq  heightq
+%define tmp2q dststrideq
+%define offsetq heightq
+%define m8 m1
+%define m9 m2
+%define m10 m3
+%define m11 m4
+%define m12 m5
+    HEVC_SAO_EDGE_FILTER_INIT
+    mov             srcq, srcm
+    mov          offsetq, r3m
+    mov       dststrideq, dststridem
+    add        a_strideq, a_strideq
+    add        b_strideq, b_strideq
+
+%endif ; ARCH
+
+%if mmsize > 16
+    SPLATW            m8, [offsetq+2]
+    SPLATW            m9, [offsetq+4]
+    SPLATW           m10, [offsetq+0]
+    SPLATW           m11, [offsetq+6]
+    SPLATW           m12, [offsetq+8]
+%else
+    movq             m10, [offsetq+0]
+    movd             m12, [offsetq+6]
+    SPLATW            m8, xm10, 1
+    SPLATW            m9, xm10, 2
+    SPLATW           m10, xm10, 0
+    SPLATW           m11, xm12, 0
+    SPLATW           m12, xm12, 1
+%endif
+    pxor              m0, m0
+%if ARCH_X86_64
+    mova             m13, [pw_m1]
+    mova             m14, [pw_1]
+    mova             m15, [pw_2]
+%else
+    mov          heightd, r6m
+    mova  [rsp+mmsize*0], m8
+    mova  [rsp+mmsize*1], m9
+    mova  [rsp+mmsize*2], m10
+    mova  [rsp+mmsize*3], m11
+    mova  [rsp+mmsize*4], m12
+%endif
+
+align 16
+.loop:
+
+%assign i 0
+%rep %3
+    mova              m1, [srcq + i]
+    movu              m2, [srcq+a_strideq + i]
+    movu              m3, [srcq+b_strideq + i]
+    PMINUW            m4, m1, m2, m6
+    PMINUW            m5, m1, m3, m7
+    pcmpeqw           m2, m4
+    pcmpeqw           m3, m5
+    pcmpeqw           m4, m1
+    pcmpeqw           m5, m1
+    psubw             m4, m2
+    psubw             m5, m3
+
+    paddw             m4, m5
+    pcmpeqw           m2, m4, [pw_m2]
+%if ARCH_X86_64
+    pcmpeqw           m3, m4, m13
+    pcmpeqw           m5, m4, m0
+    pcmpeqw           m6, m4, m14
+    pcmpeqw           m7, m4, m15
+    pand              m2, m8
+    pand              m3, m9
+    pand              m5, m10
+    pand              m6, m11
+    pand              m7, m12
+%else
+    pcmpeqw           m3, m4, [pw_m1]
+    pcmpeqw           m5, m4, m0
+    pcmpeqw           m6, m4, [pw_1]
+    pcmpeqw           m7, m4, [pw_2]
+    pand              m2, [rsp+mmsize*0]
+    pand              m3, [rsp+mmsize*1]
+    pand              m5, [rsp+mmsize*2]
+    pand              m6, [rsp+mmsize*3]
+    pand              m7, [rsp+mmsize*4]
+%endif
+    paddw             m2, m3
+    paddw             m5, m6
+    paddw             m2, m7
+    paddw             m2, m1
+    paddw             m2, m5
+    CLIPW             m2, m0, [pw_mask %+ %1]
+    mova      [dstq + i], m2
+%assign i i+mmsize
+%endrep
+
+    add             dstq, dststrideq
+    add             srcq, EDGE_SRCSTRIDE
+    dec          heightd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+HEVC_SAO_EDGE_FILTER 10,  8, 1
+HEVC_SAO_EDGE_FILTER 10, 16, 2
+HEVC_SAO_EDGE_FILTER 10, 32, 4
+HEVC_SAO_EDGE_FILTER 10, 48, 6
+HEVC_SAO_EDGE_FILTER 10, 64, 8
+
+HEVC_SAO_EDGE_FILTER 12,  8, 1
+HEVC_SAO_EDGE_FILTER 12, 16, 2
+HEVC_SAO_EDGE_FILTER 12, 32, 4
+HEVC_SAO_EDGE_FILTER 12, 48, 6
+HEVC_SAO_EDGE_FILTER 12, 64, 8
+
+%if HAVE_AVX2_EXTERNAL
+INIT_XMM avx2
+HEVC_SAO_EDGE_FILTER 10,  8, 1
+INIT_YMM avx2
+HEVC_SAO_EDGE_FILTER 10, 16, 1
+HEVC_SAO_EDGE_FILTER 10, 32, 2
+HEVC_SAO_EDGE_FILTER 10, 48, 3
+HEVC_SAO_EDGE_FILTER 10, 64, 4
+
+INIT_XMM avx2
+HEVC_SAO_EDGE_FILTER 12,  8, 1
+INIT_YMM avx2
+HEVC_SAO_EDGE_FILTER 12, 16, 1
+HEVC_SAO_EDGE_FILTER 12, 32, 2
+HEVC_SAO_EDGE_FILTER 12, 48, 3
+HEVC_SAO_EDGE_FILTER 12, 64, 4
+%endif
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index ddc876df..09eb06d0 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -119,8 +119,8 @@ void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dst
 }
 
 #define mc_rep_funcs(name, bitd, step, W, opt)        \
-    mc_rep_func(name, bitd, step, W, opt);            \
-    mc_rep_uni_func(name, bitd, step, W, opt);        \
+    mc_rep_func(name, bitd, step, W, opt)            \
+    mc_rep_uni_func(name, bitd, step, W, opt)        \
     mc_rep_bi_func(name, bitd, step, W, opt)
 
 #define mc_rep_func2(name, bitd, step1, step2, W, opt) \
@@ -153,14 +153,9 @@ void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dsts
                                                        src2 + step1, height, mx, my, width);                    \
 }
 
-#define mc_rep_funcs(name, bitd, step, W, opt)        \
-    mc_rep_func(name, bitd, step, W, opt);            \
-    mc_rep_uni_func(name, bitd, step, W, opt);        \
-    mc_rep_bi_func(name, bitd, step, W, opt)
-
 #define mc_rep_funcs2(name, bitd, step1, step2, W, opt) \
-    mc_rep_func2(name, bitd, step1, step2, W, opt);     \
-    mc_rep_uni_func2(name, bitd, step1, step2, W, opt); \
+    mc_rep_func2(name, bitd, step1, step2, W, opt)      \
+    mc_rep_uni_func2(name, bitd, step1, step2, W, opt)  \
     mc_rep_bi_func2(name, bitd, step1, step2, W, opt)
 
 #if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
@@ -196,9 +191,9 @@ void ff_hevc_put_hevc_uni_##name##width1##_10_##opt1(uint8_t *dst, ptrdiff_t dst
                                                       height, mx, my, width);                                 \
 }
 
-#define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4)    \
-mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4);            \
-mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4);         \
+#define mc_rep_mixs_10(name, width1, width2, width3, opt1, opt2, width4)   \
+mc_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)            \
+mc_bi_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)         \
 mc_uni_rep_mix_10(name, width1, width2, width3, opt1, opt2, width4)
 
 #define mc_rep_mix_8(name, width1, width2, width3, opt1, opt2)                                                \
@@ -232,199 +227,199 @@ void ff_hevc_put_hevc_uni_##name##width1##_8_##opt1(uint8_t *dst, ptrdiff_t dsts
                                                    height, mx, my, width);                                    \
 }
 
-#define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2)    \
-mc_rep_mix_8(name, width1, width2, width3, opt1, opt2);            \
-mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2);         \
+#define mc_rep_mixs_8(name, width1, width2, width3, opt1, opt2)   \
+mc_rep_mix_8(name, width1, width2, width3, opt1, opt2)            \
+mc_bi_rep_mix_8(name, width1, width2, width3, opt1, opt2)         \
 mc_uni_rep_mix_8(name, width1, width2, width3, opt1, opt2)
 
 #if HAVE_AVX2_EXTERNAL
 
-mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4);
-mc_rep_mixs_8(epel_hv,    48, 32, 16, avx2, sse4);
-mc_rep_mixs_8(epel_h ,    48, 32, 16, avx2, sse4);
-mc_rep_mixs_8(epel_v ,    48, 32, 16, avx2, sse4);
+mc_rep_mixs_8(pel_pixels, 48, 32, 16, avx2, sse4)
+mc_rep_mixs_8(epel_hv,    48, 32, 16, avx2, sse4)
+mc_rep_mixs_8(epel_h ,    48, 32, 16, avx2, sse4)
+mc_rep_mixs_8(epel_v ,    48, 32, 16, avx2, sse4)
 
-mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32);
-mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32);
-mc_rep_mixs_10(epel_hv,   24, 16, 8, avx2, sse4, 32);
-mc_rep_mixs_10(epel_h ,   24, 16, 8, avx2, sse4, 32);
-mc_rep_mixs_10(epel_v ,   24, 16, 8, avx2, sse4, 32);
+mc_rep_mix_10(pel_pixels, 24, 16, 8, avx2, sse4, 32)
+mc_bi_rep_mix_10(pel_pixels,24, 16, 8, avx2, sse4, 32)
+mc_rep_mixs_10(epel_hv,   24, 16, 8, avx2, sse4, 32)
+mc_rep_mixs_10(epel_h ,   24, 16, 8, avx2, sse4, 32)
+mc_rep_mixs_10(epel_v ,   24, 16, 8, avx2, sse4, 32)
 
 
-mc_rep_mixs_10(qpel_h ,   24, 16, 8, avx2, sse4, 32);
-mc_rep_mixs_10(qpel_v ,   24, 16, 8, avx2, sse4, 32);
-mc_rep_mixs_10(qpel_hv,   24, 16, 8, avx2, sse4, 32);
+mc_rep_mixs_10(qpel_h ,   24, 16, 8, avx2, sse4, 32)
+mc_rep_mixs_10(qpel_v ,   24, 16, 8, avx2, sse4, 32)
+mc_rep_mixs_10(qpel_hv,   24, 16, 8, avx2, sse4, 32)
 
 
-mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2);//used for 10bit
-mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2); //used for 10bit
+mc_rep_uni_func(pel_pixels, 8, 64, 128, avx2)//used for 10bit
+mc_rep_uni_func(pel_pixels, 8, 32, 96, avx2) //used for 10bit
 
-mc_rep_funcs(pel_pixels, 8, 32, 64, avx2);
+mc_rep_funcs(pel_pixels, 8, 32, 64, avx2)
 
-mc_rep_func(pel_pixels, 10, 16, 32, avx2);
-mc_rep_func(pel_pixels, 10, 16, 48, avx2);
-mc_rep_func(pel_pixels, 10, 32, 64, avx2);
+mc_rep_func(pel_pixels, 10, 16, 32, avx2)
+mc_rep_func(pel_pixels, 10, 16, 48, avx2)
+mc_rep_func(pel_pixels, 10, 32, 64, avx2)
 
-mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2);
-mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2);
-mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2);
+mc_rep_bi_func(pel_pixels, 10, 16, 32, avx2)
+mc_rep_bi_func(pel_pixels, 10, 16, 48, avx2)
+mc_rep_bi_func(pel_pixels, 10, 32, 64, avx2)
 
-mc_rep_funcs(epel_h, 8, 32, 64, avx2);
+mc_rep_funcs(epel_h, 8, 32, 64, avx2)
 
-mc_rep_funcs(epel_v, 8, 32, 64, avx2);
+mc_rep_funcs(epel_v, 8, 32, 64, avx2)
 
-mc_rep_funcs(epel_h, 10, 16, 32, avx2);
-mc_rep_funcs(epel_h, 10, 16, 48, avx2);
-mc_rep_funcs(epel_h, 10, 32, 64, avx2);
+mc_rep_funcs(epel_h, 10, 16, 32, avx2)
+mc_rep_funcs(epel_h, 10, 16, 48, avx2)
+mc_rep_funcs(epel_h, 10, 32, 64, avx2)
 
-mc_rep_funcs(epel_v, 10, 16, 32, avx2);
-mc_rep_funcs(epel_v, 10, 16, 48, avx2);
-mc_rep_funcs(epel_v, 10, 32, 64, avx2);
+mc_rep_funcs(epel_v, 10, 16, 32, avx2)
+mc_rep_funcs(epel_v, 10, 16, 48, avx2)
+mc_rep_funcs(epel_v, 10, 32, 64, avx2)
 
 
-mc_rep_funcs(epel_hv,  8, 32, 64, avx2);
+mc_rep_funcs(epel_hv,  8, 32, 64, avx2)
 
-mc_rep_funcs(epel_hv, 10, 16, 32, avx2);
-mc_rep_funcs(epel_hv, 10, 16, 48, avx2);
-mc_rep_funcs(epel_hv, 10, 32, 64, avx2);
+mc_rep_funcs(epel_hv, 10, 16, 32, avx2)
+mc_rep_funcs(epel_hv, 10, 16, 48, avx2)
+mc_rep_funcs(epel_hv, 10, 32, 64, avx2)
 
-mc_rep_funcs(qpel_h, 8, 32, 64, avx2);
-mc_rep_mixs_8(qpel_h ,  48, 32, 16, avx2, sse4);
+mc_rep_funcs(qpel_h, 8, 32, 64, avx2)
+mc_rep_mixs_8(qpel_h ,  48, 32, 16, avx2, sse4)
 
-mc_rep_funcs(qpel_v, 8, 32, 64, avx2);
-mc_rep_mixs_8(qpel_v,  48, 32, 16, avx2, sse4);
+mc_rep_funcs(qpel_v, 8, 32, 64, avx2)
+mc_rep_mixs_8(qpel_v,  48, 32, 16, avx2, sse4)
 
-mc_rep_funcs(qpel_h, 10, 16, 32, avx2);
-mc_rep_funcs(qpel_h, 10, 16, 48, avx2);
-mc_rep_funcs(qpel_h, 10, 32, 64, avx2);
+mc_rep_funcs(qpel_h, 10, 16, 32, avx2)
+mc_rep_funcs(qpel_h, 10, 16, 48, avx2)
+mc_rep_funcs(qpel_h, 10, 32, 64, avx2)
 
-mc_rep_funcs(qpel_v, 10, 16, 32, avx2);
-mc_rep_funcs(qpel_v, 10, 16, 48, avx2);
-mc_rep_funcs(qpel_v, 10, 32, 64, avx2);
+mc_rep_funcs(qpel_v, 10, 16, 32, avx2)
+mc_rep_funcs(qpel_v, 10, 16, 48, avx2)
+mc_rep_funcs(qpel_v, 10, 32, 64, avx2)
 
-mc_rep_funcs(qpel_hv, 10, 16, 32, avx2);
-mc_rep_funcs(qpel_hv, 10, 16, 48, avx2);
-mc_rep_funcs(qpel_hv, 10, 32, 64, avx2);
+mc_rep_funcs(qpel_hv, 10, 16, 32, avx2)
+mc_rep_funcs(qpel_hv, 10, 16, 48, avx2)
+mc_rep_funcs(qpel_hv, 10, 32, 64, avx2)
 
 #endif //AVX2
 
-mc_rep_funcs(pel_pixels, 8, 16, 64, sse4);
-mc_rep_funcs(pel_pixels, 8, 16, 48, sse4);
-mc_rep_funcs(pel_pixels, 8, 16, 32, sse4);
-mc_rep_funcs(pel_pixels, 8,  8, 24, sse4);
-mc_rep_funcs(pel_pixels,10,  8, 64, sse4);
-mc_rep_funcs(pel_pixels,10,  8, 48, sse4);
-mc_rep_funcs(pel_pixels,10,  8, 32, sse4);
-mc_rep_funcs(pel_pixels,10,  8, 24, sse4);
-mc_rep_funcs(pel_pixels,10,  8, 16, sse4);
-mc_rep_funcs(pel_pixels,10,  4, 12, sse4);
-mc_rep_funcs(pel_pixels,12,  8, 64, sse4);
-mc_rep_funcs(pel_pixels,12,  8, 48, sse4);
-mc_rep_funcs(pel_pixels,12,  8, 32, sse4);
-mc_rep_funcs(pel_pixels,12,  8, 24, sse4);
-mc_rep_funcs(pel_pixels,12,  8, 16, sse4);
-mc_rep_funcs(pel_pixels,12,  4, 12, sse4);
-
-mc_rep_funcs(epel_h, 8, 16, 64, sse4);
-mc_rep_funcs(epel_h, 8, 16, 48, sse4);
-mc_rep_funcs(epel_h, 8, 16, 32, sse4);
-mc_rep_funcs(epel_h, 8,  8, 24, sse4);
-mc_rep_funcs(epel_h,10,  8, 64, sse4);
-mc_rep_funcs(epel_h,10,  8, 48, sse4);
-mc_rep_funcs(epel_h,10,  8, 32, sse4);
-mc_rep_funcs(epel_h,10,  8, 24, sse4);
-mc_rep_funcs(epel_h,10,  8, 16, sse4);
-mc_rep_funcs(epel_h,10,  4, 12, sse4);
-mc_rep_funcs(epel_h,12,  8, 64, sse4);
-mc_rep_funcs(epel_h,12,  8, 48, sse4);
-mc_rep_funcs(epel_h,12,  8, 32, sse4);
-mc_rep_funcs(epel_h,12,  8, 24, sse4);
-mc_rep_funcs(epel_h,12,  8, 16, sse4);
-mc_rep_funcs(epel_h,12,  4, 12, sse4);
-mc_rep_funcs(epel_v, 8, 16, 64, sse4);
-mc_rep_funcs(epel_v, 8, 16, 48, sse4);
-mc_rep_funcs(epel_v, 8, 16, 32, sse4);
-mc_rep_funcs(epel_v, 8,  8, 24, sse4);
-mc_rep_funcs(epel_v,10,  8, 64, sse4);
-mc_rep_funcs(epel_v,10,  8, 48, sse4);
-mc_rep_funcs(epel_v,10,  8, 32, sse4);
-mc_rep_funcs(epel_v,10,  8, 24, sse4);
-mc_rep_funcs(epel_v,10,  8, 16, sse4);
-mc_rep_funcs(epel_v,10,  4, 12, sse4);
-mc_rep_funcs(epel_v,12,  8, 64, sse4);
-mc_rep_funcs(epel_v,12,  8, 48, sse4);
-mc_rep_funcs(epel_v,12,  8, 32, sse4);
-mc_rep_funcs(epel_v,12,  8, 24, sse4);
-mc_rep_funcs(epel_v,12,  8, 16, sse4);
-mc_rep_funcs(epel_v,12,  4, 12, sse4);
-mc_rep_funcs(epel_hv, 8, 16, 64, sse4);
-mc_rep_funcs(epel_hv, 8, 16, 48, sse4);
-mc_rep_funcs(epel_hv, 8, 16, 32, sse4);
-mc_rep_funcs(epel_hv, 8,  8, 24, sse4);
-mc_rep_funcs2(epel_hv,8,  8,  4, 12, sse4);
-mc_rep_funcs(epel_hv,10,  8, 64, sse4);
-mc_rep_funcs(epel_hv,10,  8, 48, sse4);
-mc_rep_funcs(epel_hv,10,  8, 32, sse4);
-mc_rep_funcs(epel_hv,10,  8, 24, sse4);
-mc_rep_funcs(epel_hv,10,  8, 16, sse4);
-mc_rep_funcs(epel_hv,10,  4, 12, sse4);
-mc_rep_funcs(epel_hv,12,  8, 64, sse4);
-mc_rep_funcs(epel_hv,12,  8, 48, sse4);
-mc_rep_funcs(epel_hv,12,  8, 32, sse4);
-mc_rep_funcs(epel_hv,12,  8, 24, sse4);
-mc_rep_funcs(epel_hv,12,  8, 16, sse4);
-mc_rep_funcs(epel_hv,12,  4, 12, sse4);
-
-mc_rep_funcs(qpel_h, 8, 16, 64, sse4);
-mc_rep_funcs(qpel_h, 8, 16, 48, sse4);
-mc_rep_funcs(qpel_h, 8, 16, 32, sse4);
-mc_rep_funcs(qpel_h, 8,  8, 24, sse4);
-mc_rep_funcs(qpel_h,10,  8, 64, sse4);
-mc_rep_funcs(qpel_h,10,  8, 48, sse4);
-mc_rep_funcs(qpel_h,10,  8, 32, sse4);
-mc_rep_funcs(qpel_h,10,  8, 24, sse4);
-mc_rep_funcs(qpel_h,10,  8, 16, sse4);
-mc_rep_funcs(qpel_h,10,  4, 12, sse4);
-mc_rep_funcs(qpel_h,12,  8, 64, sse4);
-mc_rep_funcs(qpel_h,12,  8, 48, sse4);
-mc_rep_funcs(qpel_h,12,  8, 32, sse4);
-mc_rep_funcs(qpel_h,12,  8, 24, sse4);
-mc_rep_funcs(qpel_h,12,  8, 16, sse4);
-mc_rep_funcs(qpel_h,12,  4, 12, sse4);
-mc_rep_funcs(qpel_v, 8, 16, 64, sse4);
-mc_rep_funcs(qpel_v, 8, 16, 48, sse4);
-mc_rep_funcs(qpel_v, 8, 16, 32, sse4);
-mc_rep_funcs(qpel_v, 8,  8, 24, sse4);
-mc_rep_funcs(qpel_v,10,  8, 64, sse4);
-mc_rep_funcs(qpel_v,10,  8, 48, sse4);
-mc_rep_funcs(qpel_v,10,  8, 32, sse4);
-mc_rep_funcs(qpel_v,10,  8, 24, sse4);
-mc_rep_funcs(qpel_v,10,  8, 16, sse4);
-mc_rep_funcs(qpel_v,10,  4, 12, sse4);
-mc_rep_funcs(qpel_v,12,  8, 64, sse4);
-mc_rep_funcs(qpel_v,12,  8, 48, sse4);
-mc_rep_funcs(qpel_v,12,  8, 32, sse4);
-mc_rep_funcs(qpel_v,12,  8, 24, sse4);
-mc_rep_funcs(qpel_v,12,  8, 16, sse4);
-mc_rep_funcs(qpel_v,12,  4, 12, sse4);
-mc_rep_funcs(qpel_hv, 8,  8, 64, sse4);
-mc_rep_funcs(qpel_hv, 8,  8, 48, sse4);
-mc_rep_funcs(qpel_hv, 8,  8, 32, sse4);
-mc_rep_funcs(qpel_hv, 8,  8, 24, sse4);
-mc_rep_funcs(qpel_hv, 8,  8, 16, sse4);
-mc_rep_funcs2(qpel_hv,8,  8,  4, 12, sse4);
-mc_rep_funcs(qpel_hv,10,  8, 64, sse4);
-mc_rep_funcs(qpel_hv,10,  8, 48, sse4);
-mc_rep_funcs(qpel_hv,10,  8, 32, sse4);
-mc_rep_funcs(qpel_hv,10,  8, 24, sse4);
-mc_rep_funcs(qpel_hv,10,  8, 16, sse4);
-mc_rep_funcs(qpel_hv,10,  4, 12, sse4);
-mc_rep_funcs(qpel_hv,12,  8, 64, sse4);
-mc_rep_funcs(qpel_hv,12,  8, 48, sse4);
-mc_rep_funcs(qpel_hv,12,  8, 32, sse4);
-mc_rep_funcs(qpel_hv,12,  8, 24, sse4);
-mc_rep_funcs(qpel_hv,12,  8, 16, sse4);
-mc_rep_funcs(qpel_hv,12,  4, 12, sse4);
+mc_rep_funcs(pel_pixels, 8, 16, 64, sse4)
+mc_rep_funcs(pel_pixels, 8, 16, 48, sse4)
+mc_rep_funcs(pel_pixels, 8, 16, 32, sse4)
+mc_rep_funcs(pel_pixels, 8,  8, 24, sse4)
+mc_rep_funcs(pel_pixels,10,  8, 64, sse4)
+mc_rep_funcs(pel_pixels,10,  8, 48, sse4)
+mc_rep_funcs(pel_pixels,10,  8, 32, sse4)
+mc_rep_funcs(pel_pixels,10,  8, 24, sse4)
+mc_rep_funcs(pel_pixels,10,  8, 16, sse4)
+mc_rep_funcs(pel_pixels,10,  4, 12, sse4)
+mc_rep_funcs(pel_pixels,12,  8, 64, sse4)
+mc_rep_funcs(pel_pixels,12,  8, 48, sse4)
+mc_rep_funcs(pel_pixels,12,  8, 32, sse4)
+mc_rep_funcs(pel_pixels,12,  8, 24, sse4)
+mc_rep_funcs(pel_pixels,12,  8, 16, sse4)
+mc_rep_funcs(pel_pixels,12,  4, 12, sse4)
+
+mc_rep_funcs(epel_h, 8, 16, 64, sse4)
+mc_rep_funcs(epel_h, 8, 16, 48, sse4)
+mc_rep_funcs(epel_h, 8, 16, 32, sse4)
+mc_rep_funcs(epel_h, 8,  8, 24, sse4)
+mc_rep_funcs(epel_h,10,  8, 64, sse4)
+mc_rep_funcs(epel_h,10,  8, 48, sse4)
+mc_rep_funcs(epel_h,10,  8, 32, sse4)
+mc_rep_funcs(epel_h,10,  8, 24, sse4)
+mc_rep_funcs(epel_h,10,  8, 16, sse4)
+mc_rep_funcs(epel_h,10,  4, 12, sse4)
+mc_rep_funcs(epel_h,12,  8, 64, sse4)
+mc_rep_funcs(epel_h,12,  8, 48, sse4)
+mc_rep_funcs(epel_h,12,  8, 32, sse4)
+mc_rep_funcs(epel_h,12,  8, 24, sse4)
+mc_rep_funcs(epel_h,12,  8, 16, sse4)
+mc_rep_funcs(epel_h,12,  4, 12, sse4)
+mc_rep_funcs(epel_v, 8, 16, 64, sse4)
+mc_rep_funcs(epel_v, 8, 16, 48, sse4)
+mc_rep_funcs(epel_v, 8, 16, 32, sse4)
+mc_rep_funcs(epel_v, 8,  8, 24, sse4)
+mc_rep_funcs(epel_v,10,  8, 64, sse4)
+mc_rep_funcs(epel_v,10,  8, 48, sse4)
+mc_rep_funcs(epel_v,10,  8, 32, sse4)
+mc_rep_funcs(epel_v,10,  8, 24, sse4)
+mc_rep_funcs(epel_v,10,  8, 16, sse4)
+mc_rep_funcs(epel_v,10,  4, 12, sse4)
+mc_rep_funcs(epel_v,12,  8, 64, sse4)
+mc_rep_funcs(epel_v,12,  8, 48, sse4)
+mc_rep_funcs(epel_v,12,  8, 32, sse4)
+mc_rep_funcs(epel_v,12,  8, 24, sse4)
+mc_rep_funcs(epel_v,12,  8, 16, sse4)
+mc_rep_funcs(epel_v,12,  4, 12, sse4)
+mc_rep_funcs(epel_hv, 8, 16, 64, sse4)
+mc_rep_funcs(epel_hv, 8, 16, 48, sse4)
+mc_rep_funcs(epel_hv, 8, 16, 32, sse4)
+mc_rep_funcs(epel_hv, 8,  8, 24, sse4)
+mc_rep_funcs2(epel_hv,8,  8,  4, 12, sse4)
+mc_rep_funcs(epel_hv,10,  8, 64, sse4)
+mc_rep_funcs(epel_hv,10,  8, 48, sse4)
+mc_rep_funcs(epel_hv,10,  8, 32, sse4)
+mc_rep_funcs(epel_hv,10,  8, 24, sse4)
+mc_rep_funcs(epel_hv,10,  8, 16, sse4)
+mc_rep_funcs(epel_hv,10,  4, 12, sse4)
+mc_rep_funcs(epel_hv,12,  8, 64, sse4)
+mc_rep_funcs(epel_hv,12,  8, 48, sse4)
+mc_rep_funcs(epel_hv,12,  8, 32, sse4)
+mc_rep_funcs(epel_hv,12,  8, 24, sse4)
+mc_rep_funcs(epel_hv,12,  8, 16, sse4)
+mc_rep_funcs(epel_hv,12,  4, 12, sse4)
+
+mc_rep_funcs(qpel_h, 8, 16, 64, sse4)
+mc_rep_funcs(qpel_h, 8, 16, 48, sse4)
+mc_rep_funcs(qpel_h, 8, 16, 32, sse4)
+mc_rep_funcs(qpel_h, 8,  8, 24, sse4)
+mc_rep_funcs(qpel_h,10,  8, 64, sse4)
+mc_rep_funcs(qpel_h,10,  8, 48, sse4)
+mc_rep_funcs(qpel_h,10,  8, 32, sse4)
+mc_rep_funcs(qpel_h,10,  8, 24, sse4)
+mc_rep_funcs(qpel_h,10,  8, 16, sse4)
+mc_rep_funcs(qpel_h,10,  4, 12, sse4)
+mc_rep_funcs(qpel_h,12,  8, 64, sse4)
+mc_rep_funcs(qpel_h,12,  8, 48, sse4)
+mc_rep_funcs(qpel_h,12,  8, 32, sse4)
+mc_rep_funcs(qpel_h,12,  8, 24, sse4)
+mc_rep_funcs(qpel_h,12,  8, 16, sse4)
+mc_rep_funcs(qpel_h,12,  4, 12, sse4)
+mc_rep_funcs(qpel_v, 8, 16, 64, sse4)
+mc_rep_funcs(qpel_v, 8, 16, 48, sse4)
+mc_rep_funcs(qpel_v, 8, 16, 32, sse4)
+mc_rep_funcs(qpel_v, 8,  8, 24, sse4)
+mc_rep_funcs(qpel_v,10,  8, 64, sse4)
+mc_rep_funcs(qpel_v,10,  8, 48, sse4)
+mc_rep_funcs(qpel_v,10,  8, 32, sse4)
+mc_rep_funcs(qpel_v,10,  8, 24, sse4)
+mc_rep_funcs(qpel_v,10,  8, 16, sse4)
+mc_rep_funcs(qpel_v,10,  4, 12, sse4)
+mc_rep_funcs(qpel_v,12,  8, 64, sse4)
+mc_rep_funcs(qpel_v,12,  8, 48, sse4)
+mc_rep_funcs(qpel_v,12,  8, 32, sse4)
+mc_rep_funcs(qpel_v,12,  8, 24, sse4)
+mc_rep_funcs(qpel_v,12,  8, 16, sse4)
+mc_rep_funcs(qpel_v,12,  4, 12, sse4)
+mc_rep_funcs(qpel_hv, 8,  8, 64, sse4)
+mc_rep_funcs(qpel_hv, 8,  8, 48, sse4)
+mc_rep_funcs(qpel_hv, 8,  8, 32, sse4)
+mc_rep_funcs(qpel_hv, 8,  8, 24, sse4)
+mc_rep_funcs(qpel_hv, 8,  8, 16, sse4)
+mc_rep_funcs2(qpel_hv,8,  8,  4, 12, sse4)
+mc_rep_funcs(qpel_hv,10,  8, 64, sse4)
+mc_rep_funcs(qpel_hv,10,  8, 48, sse4)
+mc_rep_funcs(qpel_hv,10,  8, 32, sse4)
+mc_rep_funcs(qpel_hv,10,  8, 24, sse4)
+mc_rep_funcs(qpel_hv,10,  8, 16, sse4)
+mc_rep_funcs(qpel_hv,10,  4, 12, sse4)
+mc_rep_funcs(qpel_hv,12,  8, 64, sse4)
+mc_rep_funcs(qpel_hv,12,  8, 48, sse4)
+mc_rep_funcs(qpel_hv,12,  8, 32, sse4)
+mc_rep_funcs(qpel_hv,12,  8, 24, sse4)
+mc_rep_funcs(qpel_hv,12,  8, 16, sse4)
+mc_rep_funcs(qpel_hv,12,  4, 12, sse4)
 
 #define mc_rep_uni_w(bitd, step, W, opt) \
 void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \
@@ -441,26 +436,26 @@ void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststri
     }                                                                                                                   \
 }
 
-mc_rep_uni_w(8, 6, 12, sse4);
-mc_rep_uni_w(8, 8, 16, sse4);
-mc_rep_uni_w(8, 8, 24, sse4);
-mc_rep_uni_w(8, 8, 32, sse4);
-mc_rep_uni_w(8, 8, 48, sse4);
-mc_rep_uni_w(8, 8, 64, sse4);
-
-mc_rep_uni_w(10, 6, 12, sse4);
-mc_rep_uni_w(10, 8, 16, sse4);
-mc_rep_uni_w(10, 8, 24, sse4);
-mc_rep_uni_w(10, 8, 32, sse4);
-mc_rep_uni_w(10, 8, 48, sse4);
-mc_rep_uni_w(10, 8, 64, sse4);
-
-mc_rep_uni_w(12, 6, 12, sse4);
-mc_rep_uni_w(12, 8, 16, sse4);
-mc_rep_uni_w(12, 8, 24, sse4);
-mc_rep_uni_w(12, 8, 32, sse4);
-mc_rep_uni_w(12, 8, 48, sse4);
-mc_rep_uni_w(12, 8, 64, sse4);
+mc_rep_uni_w(8, 6, 12, sse4)
+mc_rep_uni_w(8, 8, 16, sse4)
+mc_rep_uni_w(8, 8, 24, sse4)
+mc_rep_uni_w(8, 8, 32, sse4)
+mc_rep_uni_w(8, 8, 48, sse4)
+mc_rep_uni_w(8, 8, 64, sse4)
+
+mc_rep_uni_w(10, 6, 12, sse4)
+mc_rep_uni_w(10, 8, 16, sse4)
+mc_rep_uni_w(10, 8, 24, sse4)
+mc_rep_uni_w(10, 8, 32, sse4)
+mc_rep_uni_w(10, 8, 48, sse4)
+mc_rep_uni_w(10, 8, 64, sse4)
+
+mc_rep_uni_w(12, 6, 12, sse4)
+mc_rep_uni_w(12, 8, 16, sse4)
+mc_rep_uni_w(12, 8, 24, sse4)
+mc_rep_uni_w(12, 8, 32, sse4)
+mc_rep_uni_w(12, 8, 48, sse4)
+mc_rep_uni_w(12, 8, 64, sse4)
 
 #define mc_rep_bi_w(bitd, step, W, opt) \
 void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, \
@@ -480,26 +475,26 @@ void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststrid
     }                                                                                                                   \
 }
 
-mc_rep_bi_w(8, 6, 12, sse4);
-mc_rep_bi_w(8, 8, 16, sse4);
-mc_rep_bi_w(8, 8, 24, sse4);
-mc_rep_bi_w(8, 8, 32, sse4);
-mc_rep_bi_w(8, 8, 48, sse4);
-mc_rep_bi_w(8, 8, 64, sse4);
-
-mc_rep_bi_w(10, 6, 12, sse4);
-mc_rep_bi_w(10, 8, 16, sse4);
-mc_rep_bi_w(10, 8, 24, sse4);
-mc_rep_bi_w(10, 8, 32, sse4);
-mc_rep_bi_w(10, 8, 48, sse4);
-mc_rep_bi_w(10, 8, 64, sse4);
-
-mc_rep_bi_w(12, 6, 12, sse4);
-mc_rep_bi_w(12, 8, 16, sse4);
-mc_rep_bi_w(12, 8, 24, sse4);
-mc_rep_bi_w(12, 8, 32, sse4);
-mc_rep_bi_w(12, 8, 48, sse4);
-mc_rep_bi_w(12, 8, 64, sse4);
+mc_rep_bi_w(8, 6, 12, sse4)
+mc_rep_bi_w(8, 8, 16, sse4)
+mc_rep_bi_w(8, 8, 24, sse4)
+mc_rep_bi_w(8, 8, 32, sse4)
+mc_rep_bi_w(8, 8, 48, sse4)
+mc_rep_bi_w(8, 8, 64, sse4)
+
+mc_rep_bi_w(10, 6, 12, sse4)
+mc_rep_bi_w(10, 8, 16, sse4)
+mc_rep_bi_w(10, 8, 24, sse4)
+mc_rep_bi_w(10, 8, 32, sse4)
+mc_rep_bi_w(10, 8, 48, sse4)
+mc_rep_bi_w(10, 8, 64, sse4)
+
+mc_rep_bi_w(12, 6, 12, sse4)
+mc_rep_bi_w(12, 8, 16, sse4)
+mc_rep_bi_w(12, 8, 24, sse4)
+mc_rep_bi_w(12, 8, 32, sse4)
+mc_rep_bi_w(12, 8, 48, sse4)
+mc_rep_bi_w(12, 8, 64, sse4)
 
 #define mc_uni_w_func(name, bitd, W, opt) \
 void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride,         \
@@ -513,51 +508,51 @@ void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t
     ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, height, denom, _wx, _ox);\
 }
 
-#define mc_uni_w_funcs(name, bitd, opt)       \
-        mc_uni_w_func(name, bitd, 4, opt);    \
-        mc_uni_w_func(name, bitd, 8, opt);    \
-        mc_uni_w_func(name, bitd, 12, opt);   \
-        mc_uni_w_func(name, bitd, 16, opt);   \
-        mc_uni_w_func(name, bitd, 24, opt);   \
-        mc_uni_w_func(name, bitd, 32, opt);   \
-        mc_uni_w_func(name, bitd, 48, opt);   \
+#define mc_uni_w_funcs(name, bitd, opt)      \
+        mc_uni_w_func(name, bitd, 4, opt)    \
+        mc_uni_w_func(name, bitd, 8, opt)    \
+        mc_uni_w_func(name, bitd, 12, opt)   \
+        mc_uni_w_func(name, bitd, 16, opt)   \
+        mc_uni_w_func(name, bitd, 24, opt)   \
+        mc_uni_w_func(name, bitd, 32, opt)   \
+        mc_uni_w_func(name, bitd, 48, opt)   \
         mc_uni_w_func(name, bitd, 64, opt)
 
-mc_uni_w_funcs(pel_pixels, 8, sse4);
-mc_uni_w_func(pel_pixels, 8, 6, sse4);
-mc_uni_w_funcs(epel_h, 8, sse4);
-mc_uni_w_func(epel_h, 8, 6, sse4);
-mc_uni_w_funcs(epel_v, 8, sse4);
-mc_uni_w_func(epel_v, 8, 6, sse4);
-mc_uni_w_funcs(epel_hv, 8, sse4);
-mc_uni_w_func(epel_hv, 8, 6, sse4);
-mc_uni_w_funcs(qpel_h, 8, sse4);
-mc_uni_w_funcs(qpel_v, 8, sse4);
-mc_uni_w_funcs(qpel_hv, 8, sse4);
-
-mc_uni_w_funcs(pel_pixels, 10, sse4);
-mc_uni_w_func(pel_pixels, 10, 6, sse4);
-mc_uni_w_funcs(epel_h, 10, sse4);
-mc_uni_w_func(epel_h, 10, 6, sse4);
-mc_uni_w_funcs(epel_v, 10, sse4);
-mc_uni_w_func(epel_v, 10, 6, sse4);
-mc_uni_w_funcs(epel_hv, 10, sse4);
-mc_uni_w_func(epel_hv, 10, 6, sse4);
-mc_uni_w_funcs(qpel_h, 10, sse4);
-mc_uni_w_funcs(qpel_v, 10, sse4);
-mc_uni_w_funcs(qpel_hv, 10, sse4);
-
-mc_uni_w_funcs(pel_pixels, 12, sse4);
-mc_uni_w_func(pel_pixels, 12, 6, sse4);
-mc_uni_w_funcs(epel_h, 12, sse4);
-mc_uni_w_func(epel_h, 12, 6, sse4);
-mc_uni_w_funcs(epel_v, 12, sse4);
-mc_uni_w_func(epel_v, 12, 6, sse4);
-mc_uni_w_funcs(epel_hv, 12, sse4);
-mc_uni_w_func(epel_hv, 12, 6, sse4);
-mc_uni_w_funcs(qpel_h, 12, sse4);
-mc_uni_w_funcs(qpel_v, 12, sse4);
-mc_uni_w_funcs(qpel_hv, 12, sse4);
+mc_uni_w_funcs(pel_pixels, 8, sse4)
+mc_uni_w_func(pel_pixels, 8, 6, sse4)
+mc_uni_w_funcs(epel_h, 8, sse4)
+mc_uni_w_func(epel_h, 8, 6, sse4)
+mc_uni_w_funcs(epel_v, 8, sse4)
+mc_uni_w_func(epel_v, 8, 6, sse4)
+mc_uni_w_funcs(epel_hv, 8, sse4)
+mc_uni_w_func(epel_hv, 8, 6, sse4)
+mc_uni_w_funcs(qpel_h, 8, sse4)
+mc_uni_w_funcs(qpel_v, 8, sse4)
+mc_uni_w_funcs(qpel_hv, 8, sse4)
+
+mc_uni_w_funcs(pel_pixels, 10, sse4)
+mc_uni_w_func(pel_pixels, 10, 6, sse4)
+mc_uni_w_funcs(epel_h, 10, sse4)
+mc_uni_w_func(epel_h, 10, 6, sse4)
+mc_uni_w_funcs(epel_v, 10, sse4)
+mc_uni_w_func(epel_v, 10, 6, sse4)
+mc_uni_w_funcs(epel_hv, 10, sse4)
+mc_uni_w_func(epel_hv, 10, 6, sse4)
+mc_uni_w_funcs(qpel_h, 10, sse4)
+mc_uni_w_funcs(qpel_v, 10, sse4)
+mc_uni_w_funcs(qpel_hv, 10, sse4)
+
+mc_uni_w_funcs(pel_pixels, 12, sse4)
+mc_uni_w_func(pel_pixels, 12, 6, sse4)
+mc_uni_w_funcs(epel_h, 12, sse4)
+mc_uni_w_func(epel_h, 12, 6, sse4)
+mc_uni_w_funcs(epel_v, 12, sse4)
+mc_uni_w_func(epel_v, 12, 6, sse4)
+mc_uni_w_funcs(epel_hv, 12, sse4)
+mc_uni_w_func(epel_hv, 12, 6, sse4)
+mc_uni_w_funcs(qpel_h, 12, sse4)
+mc_uni_w_funcs(qpel_v, 12, sse4)
+mc_uni_w_funcs(qpel_hv, 12, sse4)
 
 #define mc_bi_w_func(name, bitd, W, opt) \
 void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride,           \
@@ -573,51 +568,51 @@ void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _
                                               height, denom, _wx0, _wx1, _ox0, _ox1);                \
 }
 
-#define mc_bi_w_funcs(name, bitd, opt)       \
-        mc_bi_w_func(name, bitd, 4, opt);    \
-        mc_bi_w_func(name, bitd, 8, opt);    \
-        mc_bi_w_func(name, bitd, 12, opt);   \
-        mc_bi_w_func(name, bitd, 16, opt);   \
-        mc_bi_w_func(name, bitd, 24, opt);   \
-        mc_bi_w_func(name, bitd, 32, opt);   \
-        mc_bi_w_func(name, bitd, 48, opt);   \
+#define mc_bi_w_funcs(name, bitd, opt)      \
+        mc_bi_w_func(name, bitd, 4, opt)    \
+        mc_bi_w_func(name, bitd, 8, opt)    \
+        mc_bi_w_func(name, bitd, 12, opt)   \
+        mc_bi_w_func(name, bitd, 16, opt)   \
+        mc_bi_w_func(name, bitd, 24, opt)   \
+        mc_bi_w_func(name, bitd, 32, opt)   \
+        mc_bi_w_func(name, bitd, 48, opt)   \
         mc_bi_w_func(name, bitd, 64, opt)
 
-mc_bi_w_funcs(pel_pixels, 8, sse4);
-mc_bi_w_func(pel_pixels, 8, 6, sse4);
-mc_bi_w_funcs(epel_h, 8, sse4);
-mc_bi_w_func(epel_h, 8, 6, sse4);
-mc_bi_w_funcs(epel_v, 8, sse4);
-mc_bi_w_func(epel_v, 8, 6, sse4);
-mc_bi_w_funcs(epel_hv, 8, sse4);
-mc_bi_w_func(epel_hv, 8, 6, sse4);
-mc_bi_w_funcs(qpel_h, 8, sse4);
-mc_bi_w_funcs(qpel_v, 8, sse4);
-mc_bi_w_funcs(qpel_hv, 8, sse4);
-
-mc_bi_w_funcs(pel_pixels, 10, sse4);
-mc_bi_w_func(pel_pixels, 10, 6, sse4);
-mc_bi_w_funcs(epel_h, 10, sse4);
-mc_bi_w_func(epel_h, 10, 6, sse4);
-mc_bi_w_funcs(epel_v, 10, sse4);
-mc_bi_w_func(epel_v, 10, 6, sse4);
-mc_bi_w_funcs(epel_hv, 10, sse4);
-mc_bi_w_func(epel_hv, 10, 6, sse4);
-mc_bi_w_funcs(qpel_h, 10, sse4);
-mc_bi_w_funcs(qpel_v, 10, sse4);
-mc_bi_w_funcs(qpel_hv, 10, sse4);
-
-mc_bi_w_funcs(pel_pixels, 12, sse4);
-mc_bi_w_func(pel_pixels, 12, 6, sse4);
-mc_bi_w_funcs(epel_h, 12, sse4);
-mc_bi_w_func(epel_h, 12, 6, sse4);
-mc_bi_w_funcs(epel_v, 12, sse4);
-mc_bi_w_func(epel_v, 12, 6, sse4);
-mc_bi_w_funcs(epel_hv, 12, sse4);
-mc_bi_w_func(epel_hv, 12, 6, sse4);
-mc_bi_w_funcs(qpel_h, 12, sse4);
-mc_bi_w_funcs(qpel_v, 12, sse4);
-mc_bi_w_funcs(qpel_hv, 12, sse4);
+mc_bi_w_funcs(pel_pixels, 8, sse4)
+mc_bi_w_func(pel_pixels, 8, 6, sse4)
+mc_bi_w_funcs(epel_h, 8, sse4)
+mc_bi_w_func(epel_h, 8, 6, sse4)
+mc_bi_w_funcs(epel_v, 8, sse4)
+mc_bi_w_func(epel_v, 8, 6, sse4)
+mc_bi_w_funcs(epel_hv, 8, sse4)
+mc_bi_w_func(epel_hv, 8, 6, sse4)
+mc_bi_w_funcs(qpel_h, 8, sse4)
+mc_bi_w_funcs(qpel_v, 8, sse4)
+mc_bi_w_funcs(qpel_hv, 8, sse4)
+
+mc_bi_w_funcs(pel_pixels, 10, sse4)
+mc_bi_w_func(pel_pixels, 10, 6, sse4)
+mc_bi_w_funcs(epel_h, 10, sse4)
+mc_bi_w_func(epel_h, 10, 6, sse4)
+mc_bi_w_funcs(epel_v, 10, sse4)
+mc_bi_w_func(epel_v, 10, 6, sse4)
+mc_bi_w_funcs(epel_hv, 10, sse4)
+mc_bi_w_func(epel_hv, 10, 6, sse4)
+mc_bi_w_funcs(qpel_h, 10, sse4)
+mc_bi_w_funcs(qpel_v, 10, sse4)
+mc_bi_w_funcs(qpel_hv, 10, sse4)
+
+mc_bi_w_funcs(pel_pixels, 12, sse4)
+mc_bi_w_func(pel_pixels, 12, 6, sse4)
+mc_bi_w_funcs(epel_h, 12, sse4)
+mc_bi_w_func(epel_h, 12, 6, sse4)
+mc_bi_w_funcs(epel_v, 12, sse4)
+mc_bi_w_func(epel_v, 12, 6, sse4)
+mc_bi_w_funcs(epel_hv, 12, sse4)
+mc_bi_w_func(epel_hv, 12, 6, sse4)
+mc_bi_w_funcs(qpel_h, 12, sse4)
+mc_bi_w_funcs(qpel_v, 12, sse4)
+mc_bi_w_funcs(qpel_hv, 12, sse4)
 #endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
 
 #define SAO_BAND_FILTER_FUNCS(bitd, opt)                                                                                   \
@@ -630,17 +625,17 @@ void ff_hevc_sao_band_filter_32_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptr
 void ff_hevc_sao_band_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
                                             int16_t *sao_offset_val, int sao_left_class, int width, int height);           \
 void ff_hevc_sao_band_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src, \
-                                             int16_t *sao_offset_val, int sao_left_class, int width, int height)
-
-SAO_BAND_FILTER_FUNCS(8,  sse2);
-SAO_BAND_FILTER_FUNCS(10, sse2);
-SAO_BAND_FILTER_FUNCS(12, sse2);
-SAO_BAND_FILTER_FUNCS(8,   avx);
-SAO_BAND_FILTER_FUNCS(10,  avx);
-SAO_BAND_FILTER_FUNCS(12,  avx);
-SAO_BAND_FILTER_FUNCS(8,  avx2);
-SAO_BAND_FILTER_FUNCS(10, avx2);
-SAO_BAND_FILTER_FUNCS(12, avx2);
+                                             int16_t *sao_offset_val, int sao_left_class, int width, int height);
+
+SAO_BAND_FILTER_FUNCS(8,  sse2)
+SAO_BAND_FILTER_FUNCS(10, sse2)
+SAO_BAND_FILTER_FUNCS(12, sse2)
+SAO_BAND_FILTER_FUNCS(8,   avx)
+SAO_BAND_FILTER_FUNCS(10,  avx)
+SAO_BAND_FILTER_FUNCS(12,  avx)
+SAO_BAND_FILTER_FUNCS(8,  avx2)
+SAO_BAND_FILTER_FUNCS(10, avx2)
+SAO_BAND_FILTER_FUNCS(12, avx2)
 
 #define SAO_BAND_INIT(bitd, opt) do {                                       \
     c->sao_band_filter[0]      = ff_hevc_sao_band_filter_8_##bitd##_##opt;  \
@@ -662,12 +657,12 @@ void ff_hevc_sao_edge_filter_48_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptr
 void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, \
                                                int eo, int width, int height);                                              \
 
-SAO_EDGE_FILTER_FUNCS(8, ssse3);
-SAO_EDGE_FILTER_FUNCS(8, avx2);
-SAO_EDGE_FILTER_FUNCS(10, sse2);
-SAO_EDGE_FILTER_FUNCS(10, avx2);
-SAO_EDGE_FILTER_FUNCS(12, sse2);
-SAO_EDGE_FILTER_FUNCS(12, avx2);
+SAO_EDGE_FILTER_FUNCS(8, ssse3)
+SAO_EDGE_FILTER_FUNCS(8, avx2)
+SAO_EDGE_FILTER_FUNCS(10, sse2)
+SAO_EDGE_FILTER_FUNCS(10, avx2)
+SAO_EDGE_FILTER_FUNCS(12, sse2)
+SAO_EDGE_FILTER_FUNCS(12, avx2)
 
 #define SAO_EDGE_INIT(bitd, opt) do {                                       \
     c->sao_edge_filter[0]      = ff_hevc_sao_edge_filter_8_##bitd##_##opt;  \
@@ -758,6 +753,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
             c->transform_add[3]    = ff_hevc_transform_add32_8_avx;
         }
         if (EXTERNAL_AVX2(cpu_flags)) {
+            c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_8_avx2;
+            c->sao_band_filter[1] = ff_hevc_sao_band_filter_16_8_avx2;
+        }
+        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
             c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;
             c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2;
             if (ARCH_X86_64) {
@@ -902,7 +901,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
             SAO_BAND_INIT(10, avx);
         }
         if (EXTERNAL_AVX2(cpu_flags)) {
-
+            c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_10_avx2;
+        }
+        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
             c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2;
             c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;
             if (ARCH_X86_64) {
@@ -1050,9 +1051,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
                 c->put_hevc_qpel_bi[9][1][1] = ff_hevc_put_hevc_bi_qpel_hv64_10_avx2;
             }
             SAO_BAND_INIT(10, avx2);
-            c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_10_avx2;
-            c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_10_avx2;
-            c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_10_avx2;
+            SAO_EDGE_INIT(10, avx2);
 
             c->transform_add[2] = ff_hevc_transform_add16_10_avx2;
             c->transform_add[3] = ff_hevc_transform_add32_10_avx2;
@@ -1102,13 +1101,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
             SAO_BAND_INIT(12, avx);
         }
         if (EXTERNAL_AVX2(cpu_flags)) {
+            c->sao_band_filter[0] = ff_hevc_sao_band_filter_8_12_avx2;
+        }
+        if (EXTERNAL_AVX2_FAST(cpu_flags)) {
             c->idct_dc[2] = ff_hevc_idct16x16_dc_12_avx2;
             c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2;
 
             SAO_BAND_INIT(12, avx2);
-            c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_12_avx2;
-            c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_12_avx2;
-            c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_12_avx2;
+            SAO_EDGE_INIT(12, avx2);
         }
     }
 }
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index 2cef8e69..82fb8934 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -35,7 +35,7 @@ pb_interleave8:  db 0, 4, 1, 5, 2, 6, 3, 7
 
 cextern pw_8192
 
-SECTION_TEXT
+SECTION .text
 
 ; void ff_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 %macro PUT_PIXELS8_X2 0
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index 8c0a0e9a..5c5da283 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -121,11 +121,13 @@ void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
 #undef PAVGB
 #undef STATIC
 
+#if HAVE_MMX
 CALL_2X_PIXELS(avg_no_rnd_pixels16_y2_mmx, avg_no_rnd_pixels8_y2_mmx, 8)
 CALL_2X_PIXELS(put_no_rnd_pixels16_y2_mmx, put_no_rnd_pixels8_y2_mmx, 8)
 
 CALL_2X_PIXELS(avg_no_rnd_pixels16_xy2_mmx, avg_no_rnd_pixels8_xy2_mmx, 8)
 CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8)
+#endif
 
 /***********************************/
 /* MMX rounding */
@@ -148,11 +150,13 @@ CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8)
 #undef PAVGBP
 #undef PAVGB
 
+#if HAVE_MMX
 CALL_2X_PIXELS(avg_pixels16_y2_mmx, avg_pixels8_y2_mmx, 8)
 CALL_2X_PIXELS(put_pixels16_y2_mmx, put_pixels8_y2_mmx, 8)
 
 CALL_2X_PIXELS_EXPORT(ff_avg_pixels16_xy2_mmx, ff_avg_pixels8_xy2_mmx, 8)
 CALL_2X_PIXELS_EXPORT(ff_put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8)
+#endif
 
 #endif /* HAVE_INLINE_ASM */
 
@@ -230,7 +234,7 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
     c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
     c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
 
-    if (!(flags & CODEC_FLAG_BITEXACT)) {
+    if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
         c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
         c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
@@ -240,7 +244,7 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
         c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext;
     }
 
-    if (CONFIG_VP3_DECODER && flags & CODEC_FLAG_BITEXACT) {
+    if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
     }
@@ -266,7 +270,7 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
     c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_3dnow;
     c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
 
-    if (!(flags & CODEC_FLAG_BITEXACT)){
+    if (!(flags & AV_CODEC_FLAG_BITEXACT)){
         c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
         c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow;
@@ -276,7 +280,7 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
         c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow;
     }
 
-    if (CONFIG_VP3_DECODER && flags & CODEC_FLAG_BITEXACT) {
+    if (CONFIG_VP3_DECODER && flags & AV_CODEC_FLAG_BITEXACT) {
         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
     }
diff --git a/libavcodec/x86/hpeldsp_rnd_template.c b/libavcodec/x86/hpeldsp_rnd_template.c
index 8cbc412e..e20d0658 100644
--- a/libavcodec/x86/hpeldsp_rnd_template.c
+++ b/libavcodec/x86/hpeldsp_rnd_template.c
@@ -28,7 +28,7 @@
 #include <stdint.h>
 
 // put_pixels
-static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 {
     MOVQ_BFE(mm6);
     __asm__ volatile(
@@ -60,7 +60,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_
         :REG_a, "memory");
 }
 
-static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 {
     MOVQ_BFE(mm6);
     __asm__ volatile(
@@ -106,7 +106,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff
         :REG_a, "memory");
 }
 
-static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+av_unused static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 {
     MOVQ_BFE(mm6);
     __asm__ volatile(
@@ -135,7 +135,7 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_
         :REG_a, "memory");
 }
 
-static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+av_unused static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 {
     MOVQ_BFE(mm6);
         __asm__ volatile(
@@ -162,7 +162,7 @@ static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff
             :"memory");
 }
 
-static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 {
     MOVQ_BFE(mm6);
     __asm__ volatile(
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index 85ee56df..0dbe5984 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -29,7 +29,7 @@ pb_7: times 8 db 7
 pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11
 pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13
 
-SECTION_TEXT
+SECTION .text
 
 ; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
 ;                                     const uint8_t *diff, int w,
@@ -196,7 +196,7 @@ cglobal add_bytes, 3,4,2, dst, src, w, size
     add   dstq, wq
     add   srcq, wq
     neg     wq
-.3
+.3:
     mov  sizeb, [srcq + wq]
     add [dstq + wq], sizeb
     inc     wq
diff --git a/libavcodec/x86/huffyuvencdsp.asm b/libavcodec/x86/huffyuvencdsp.asm
new file mode 100644
index 00000000..a55a1de6
--- /dev/null
+++ b/libavcodec/x86/huffyuvencdsp.asm
@@ -0,0 +1,150 @@
+;************************************************************************
+;* SIMD-optimized HuffYUV encoding functions
+;* Copyright (c) 2000, 2001 Fabrice Bellard
+;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+;*
+;* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
+;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+section .text
+
+; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
+;                    intptr_t w);
+%macro DIFF_BYTES_PROLOGUE 0
+%if ARCH_X86_32
+cglobal diff_bytes, 3,5,2, dst, src1, src2
+%define wq r4q
+    DECLARE_REG_TMP 3
+    mov               wq, r3mp
+%else
+cglobal diff_bytes, 4,5,2, dst, src1, src2, w
+    DECLARE_REG_TMP 4
+%endif ; ARCH_X86_32
+%define i t0q
+%endmacro
+
+; label to jump to if w < regsize
+%macro DIFF_BYTES_LOOP_PREP 1
+    mov                i, wq
+    and                i, -2 * regsize
+        jz            %1
+    add             dstq, i
+    add            src1q, i
+    add            src2q, i
+    neg                i
+%endmacro
+
+; mov type used for src1q, dstq, first reg, second reg
+%macro DIFF_BYTES_LOOP_CORE 4
+%if mmsize != 16
+    mov%1             %3, [src1q + i]
+    mov%1             %4, [src1q + i + regsize]
+    psubb             %3, [src2q + i]
+    psubb             %4, [src2q + i + regsize]
+    mov%2           [dstq + i], %3
+    mov%2 [regsize + dstq + i], %4
+%else
+    ; SSE enforces alignment of psubb operand
+    mov%1             %3, [src1q + i]
+    movu              %4, [src2q + i]
+    psubb             %3, %4
+    mov%2     [dstq + i], %3
+    mov%1             %3, [src1q + i + regsize]
+    movu              %4, [src2q + i + regsize]
+    psubb             %3, %4
+    mov%2 [regsize + dstq + i], %3
+%endif
+%endmacro
+
+%macro DIFF_BYTES_BODY 2 ; mov type used for src1q, for dstq
+    %define regsize mmsize
+.loop_%1%2:
+    DIFF_BYTES_LOOP_CORE %1, %2, m0, m1
+    add                i, 2 * regsize
+        jl    .loop_%1%2
+.skip_main_%1%2:
+    and               wq, 2 * regsize - 1
+        jz     .end_%1%2
+%if mmsize > 16
+    ; fall back to narrower xmm
+    %define regsize mmsize / 2
+    DIFF_BYTES_LOOP_PREP .setup_loop_gpr_aa
+.loop2_%1%2:
+    DIFF_BYTES_LOOP_CORE %1, %2, xm0, xm1
+    add                i, 2 * regsize
+        jl   .loop2_%1%2
+.setup_loop_gpr_%1%2:
+    and               wq, 2 * regsize - 1
+        jz     .end_%1%2
+%endif
+    add             dstq, wq
+    add            src1q, wq
+    add            src2q, wq
+    neg               wq
+.loop_gpr_%1%2:
+    mov              t0b, [src1q + wq]
+    sub              t0b, [src2q + wq]
+    mov      [dstq + wq], t0b
+    inc               wq
+        jl .loop_gpr_%1%2
+.end_%1%2:
+    REP_RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+DIFF_BYTES_PROLOGUE
+    %define regsize mmsize
+    DIFF_BYTES_LOOP_PREP .skip_main_aa
+    DIFF_BYTES_BODY    a, a
+%undef i
+%endif
+
+INIT_XMM sse2
+DIFF_BYTES_PROLOGUE
+    %define regsize mmsize
+    DIFF_BYTES_LOOP_PREP .skip_main_aa
+    test            dstq, regsize - 1
+        jnz     .loop_uu
+    test           src1q, regsize - 1
+        jnz     .loop_ua
+    DIFF_BYTES_BODY    a, a
+    DIFF_BYTES_BODY    u, a
+    DIFF_BYTES_BODY    u, u
+%undef i
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+DIFF_BYTES_PROLOGUE
+    %define regsize mmsize
+    ; Directly using unaligned SSE2 version is marginally faster than
+    ; branching based on arguments.
+    DIFF_BYTES_LOOP_PREP .skip_main_uu
+    test            dstq, regsize - 1
+        jnz     .loop_uu
+    test           src1q, regsize - 1
+        jnz     .loop_ua
+    DIFF_BYTES_BODY    a, a
+    DIFF_BYTES_BODY    u, a
+    DIFF_BYTES_BODY    u, u
+%undef i
+%endif
diff --git a/libavcodec/x86/huffyuvencdsp_mmx.c b/libavcodec/x86/huffyuvencdsp_mmx.c
index 63d8e3cc..9767b212 100644
--- a/libavcodec/x86/huffyuvencdsp_mmx.c
+++ b/libavcodec/x86/huffyuvencdsp_mmx.c
@@ -29,35 +29,17 @@
 #include "libavcodec/huffyuvencdsp.h"
 #include "libavcodec/mathops.h"
 
-#if HAVE_INLINE_ASM
-
-static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w)
-{
-    x86_reg i = 0;
-
-    if (w >= 16)
-    __asm__ volatile (
-        "1:                             \n\t"
-        "movq  (%2, %0), %%mm0          \n\t"
-        "movq  (%1, %0), %%mm1          \n\t"
-        "psubb %%mm0, %%mm1             \n\t"
-        "movq %%mm1, (%3, %0)           \n\t"
-        "movq 8(%2, %0), %%mm0          \n\t"
-        "movq 8(%1, %0), %%mm1          \n\t"
-        "psubb %%mm0, %%mm1             \n\t"
-        "movq %%mm1, 8(%3, %0)          \n\t"
-        "add $16, %0                    \n\t"
-        "cmp %4, %0                     \n\t"
-        " jb 1b                         \n\t"
-        : "+r" (i)
-        : "r" (src1), "r" (src2), "r" (dst), "r" ((x86_reg) w - 15));
+void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
+                       intptr_t w);
+void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
+                        intptr_t w);
+void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
+                        intptr_t w);
 
-    for (; i < w; i++)
-        dst[i + 0] = src1[i + 0] - src2[i + 0];
-}
+#if HAVE_INLINE_ASM
 
 static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
-                                        const uint8_t *src2, int w,
+                                        const uint8_t *src2, intptr_t w,
                                         int *left, int *left_top)
 {
     x86_reg i = 0;
@@ -100,15 +82,23 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
 
 av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c)
 {
-#if HAVE_INLINE_ASM
-    int cpu_flags = av_get_cpu_flags();
+    av_unused int cpu_flags = av_get_cpu_flags();
 
-    if (INLINE_MMX(cpu_flags)) {
-        c->diff_bytes = diff_bytes_mmx;
+    if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
+        c->diff_bytes = ff_diff_bytes_mmx;
     }
 
+#if HAVE_INLINE_ASM
     if (INLINE_MMXEXT(cpu_flags)) {
         c->sub_hfyu_median_pred = sub_hfyu_median_pred_mmxext;
     }
 #endif /* HAVE_INLINE_ASM */
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->diff_bytes = ff_diff_bytes_sse2;
+    }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        c->diff_bytes = ff_diff_bytes_avx2;
+    }
 }
diff --git a/libavcodec/x86/idctdsp.asm b/libavcodec/x86/idctdsp.asm
index 0aa73459..089425a9 100644
--- a/libavcodec/x86/idctdsp.asm
+++ b/libavcodec/x86/idctdsp.asm
@@ -27,7 +27,7 @@ SECTION_RODATA
 
 cextern pb_80
 
-SECTION_TEXT
+SECTION .text
 
 ;--------------------------------------------------------------------------
 ;void ff_put_signed_pixels_clamped(const int16_t *block, uint8_t *pixels,
diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
index 2c26a988..bcf7e5be 100644
--- a/libavcodec/x86/idctdsp_init.c
+++ b/libavcodec/x86/idctdsp_init.c
@@ -85,4 +85,42 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
         c->put_pixels_clamped        = ff_put_pixels_clamped_sse2;
         c->add_pixels_clamped        = ff_add_pixels_clamped_sse2;
     }
+
+    if (ARCH_X86_64 && avctx->lowres == 0) {
+        if (avctx->bits_per_raw_sample == 10 &&
+        (avctx->idct_algo == FF_IDCT_AUTO ||
+         avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+         avctx->idct_algo == FF_IDCT_SIMPLE)) {
+        if (EXTERNAL_SSE2(cpu_flags)) {
+            c->idct_put  = ff_simple_idct10_put_sse2;
+            c->idct_add  = NULL;
+            c->idct      = ff_simple_idct10_sse2;
+            c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+
+        }
+        if (EXTERNAL_AVX(cpu_flags)) {
+            c->idct_put  = ff_simple_idct10_put_avx;
+            c->idct_add  = NULL;
+            c->idct      = ff_simple_idct10_avx;
+            c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+        }
+        }
+
+        if (avctx->bits_per_raw_sample == 12 &&
+            (avctx->idct_algo == FF_IDCT_AUTO ||
+             avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
+            if (EXTERNAL_SSE2(cpu_flags)) {
+                c->idct_put  = ff_simple_idct12_put_sse2;
+                c->idct_add  = NULL;
+                c->idct      = ff_simple_idct12_sse2;
+                c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+            }
+            if (EXTERNAL_AVX(cpu_flags)) {
+                c->idct_put  = ff_simple_idct12_put_avx;
+                c->idct_add  = NULL;
+                c->idct      = ff_simple_idct12_avx;
+                c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+            }
+        }
+    }
 }
diff --git a/libavcodec/x86/imdct36.asm b/libavcodec/x86/imdct36.asm
index ce30b421..409b2c57 100644
--- a/libavcodec/x86/imdct36.asm
+++ b/libavcodec/x86/imdct36.asm
@@ -72,7 +72,7 @@ costabs:  times 4 dd  0.98480773
           times 4 dd  5.73685646
 
 %define SBLIMIT 32
-SECTION_TEXT
+SECTION .text
 
 %macro PSHUFD 3
 %if cpuflag(sse2) && notcpuflag(avx)
@@ -143,6 +143,12 @@ SECTION_TEXT
 %endmacro
 
 %macro STORE 4
+%if cpuflag(sse4)
+    movss     [%3       ], %1
+    extractps [%3 +   %4], %1, 1
+    extractps [%3 + 2*%4], %1, 2
+    extractps [%3 + 3*%4], %1, 3
+%else
     movhlps %2, %1
     movss   [%3       ], %1
     movss   [%3 + 2*%4], %2
@@ -150,6 +156,7 @@ SECTION_TEXT
     movss   [%3 +   %4], %1
     movhlps %2, %1
     movss   [%3 + 3*%4], %2
+%endif
 %endmacro
 
 %macro LOAD 4
diff --git a/libavcodec/x86/jpeg2000dsp.asm b/libavcodec/x86/jpeg2000dsp.asm
new file mode 100644
index 00000000..56b5fbd6
--- /dev/null
+++ b/libavcodec/x86/jpeg2000dsp.asm
@@ -0,0 +1,144 @@
+;******************************************************************************
+;* SIMD-optimized JPEG2000 DSP functions
+;* Copyright (c) 2014 Nicolas Bertrand
+;* Copyright (c) 2015 James Almer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pf_ict0: times 8 dd 1.402
+pf_ict1: times 8 dd 0.34413
+pf_ict2: times 8 dd 0.71414
+pf_ict3: times 8 dd 1.772
+
+SECTION .text
+
+;***********************************************************************
+; ff_ict_float_<opt>(float *src0, float *src1, float *src2, int csize)
+;***********************************************************************
+%macro ICT_FLOAT 1
+cglobal ict_float, 4, 4, %1, src0, src1, src2, csize
+    shl  csized, 2
+    add   src0q, csizeq
+    add   src1q, csizeq
+    add   src2q, csizeq
+    neg  csizeq
+    movaps   m6, [pf_ict0]
+    movaps   m7, [pf_ict1]
+    %define ICT0 m6
+    %define ICT1 m7
+
+%if ARCH_X86_64
+    movaps   m8, [pf_ict2]
+    %define ICT2 m8
+%if cpuflag(avx)
+    movaps   m3, [pf_ict3]
+    %define ICT3 m3
+%else
+    movaps   m9, [pf_ict3]
+    %define ICT3 m9
+%endif
+
+%else ; ARCH_X86_32
+    %define ICT2 [pf_ict2]
+%if cpuflag(avx)
+    movaps   m3, [pf_ict3]
+    %define ICT3 m3
+%else
+    %define ICT3 [pf_ict3]
+%endif
+
+%endif ; ARCH
+
+align 16
+.loop:
+    movaps   m0, [src0q+csizeq]
+    movaps   m1, [src1q+csizeq]
+    movaps   m2, [src2q+csizeq]
+
+%if cpuflag(avx)
+    mulps    m5, m1, ICT1
+    mulps    m4, m2, ICT0
+    mulps    m1, m1, ICT3
+    mulps    m2, m2, ICT2
+    subps    m5, m0, m5
+%else ; sse
+    movaps   m3, m1
+    movaps   m4, m2
+    movaps   m5, m0
+    mulps    m3, ICT1
+    mulps    m4, ICT0
+    mulps    m1, ICT3
+    mulps    m2, ICT2
+    subps    m5, m3
+%endif
+    addps    m4, m4, m0
+    addps    m0, m0, m1
+    subps    m5, m5, m2
+
+    movaps   [src0q+csizeq], m4
+    movaps   [src2q+csizeq], m0
+    movaps   [src1q+csizeq], m5
+    add  csizeq, mmsize
+    jl .loop
+    REP_RET
+%endmacro
+
+INIT_XMM sse
+ICT_FLOAT 10
+INIT_YMM avx
+ICT_FLOAT 9
+
+;***************************************************************************
+; ff_rct_int_<opt>(int32_t *src0, int32_t *src1, int32_t *src2, int csize)
+;***************************************************************************
+%macro RCT_INT 0
+cglobal rct_int, 4, 4, 4, src0, src1, src2, csize
+    shl  csized, 2
+    add   src0q, csizeq
+    add   src1q, csizeq
+    add   src2q, csizeq
+    neg  csizeq
+
+align 16
+.loop:
+    mova   m1, [src1q+csizeq]
+    mova   m2, [src2q+csizeq]
+    mova   m0, [src0q+csizeq]
+    paddd  m3, m1, m2
+    psrad  m3, 2
+    psubd  m0, m3
+    paddd  m1, m0
+    paddd  m2, m0
+    mova   [src1q+csizeq], m0
+    mova   [src2q+csizeq], m1
+    mova   [src0q+csizeq], m2
+    add  csizeq, mmsize
+    jl .loop
+    REP_RET
+%endmacro
+
+INIT_XMM sse2
+RCT_INT
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+RCT_INT
+%endif
diff --git a/libavcodec/x86/jpeg2000dsp_init.c b/libavcodec/x86/jpeg2000dsp_init.c
new file mode 100644
index 00000000..baa81383
--- /dev/null
+++ b/libavcodec/x86/jpeg2000dsp_init.c
@@ -0,0 +1,50 @@
+/*
+ * SIMD optimized JPEG 2000 DSP functions
+ * Copyright (c) 2015 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/jpeg2000dsp.h"
+
+void ff_ict_float_sse(void *src0, void *src1, void *src2, int csize);
+void ff_ict_float_avx(void *src0, void *src1, void *src2, int csize);
+void ff_rct_int_sse2 (void *src0, void *src1, void *src2, int csize);
+void ff_rct_int_avx2 (void *src0, void *src1, void *src2, int csize);
+
+av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+    if (EXTERNAL_SSE(cpu_flags)) {
+        c->mct_decode[FF_DWT97] = ff_ict_float_sse;
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->mct_decode[FF_DWT53] = ff_rct_int_sse2;
+    }
+
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        c->mct_decode[FF_DWT97] = ff_ict_float_avx;
+    }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
+    }
+}
diff --git a/libavcodec/x86/lossless_audiodsp.asm b/libavcodec/x86/lossless_audiodsp.asm
index 64b769f7..5597dada 100644
--- a/libavcodec/x86/lossless_audiodsp.asm
+++ b/libavcodec/x86/lossless_audiodsp.asm
@@ -20,7 +20,7 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_TEXT
+SECTION .text
 
 %macro SCALARPRODUCT 0
 ; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3,
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
index e6c23e79..f06fcdf7 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -29,7 +29,7 @@ pb_67: times 8 db  6, 7
 pb_zzzz2323zzzzabab: db -1,-1,-1,-1, 2, 3, 2, 3,-1,-1,-1,-1,10,11,10,11
 pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
 
-SECTION_TEXT
+SECTION .text
 
 %macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
     movd    m4, maskd
diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c
index 6589024a..b0fbcfef 100644
--- a/libavcodec/x86/lossless_videodsp_init.c
+++ b/libavcodec/x86/lossless_videodsp_init.c
@@ -42,7 +42,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
         c->diff_int16 = ff_diff_int16_mmx;
     }
 
-    if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc->comp[0].depth_minus1<15) {
+    if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc->comp[0].depth<16) {
         c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext;
         c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
     }
diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm
index 0160dc34..ad06d485 100644
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -794,7 +794,7 @@ cglobal vsad_intra%1, 5, 5, 3, v, pix1, pix2, lsize, h
 %endif
     sub       hd, 2
 
-.loop
+.loop:
     lea    pix1q, [pix1q + 2*lsizeq]
 %if %1 == mmsize
     mova      m1, [pix1q]
@@ -875,7 +875,7 @@ cglobal vsad%1_approx, 5, 5, 5, v, pix1, pix2, lsize, h
 %endif
     sub    hd, 2
 
-.loop
+.loop:
     lea pix1q, [pix1q + 2*lsizeq]
     lea pix2q, [pix2q + 2*lsizeq]
     mova   m2, [pix1q]
diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c
index 255df506..49f50d0e 100644
--- a/libavcodec/x86/me_cmp_init.c
+++ b/libavcodec/x86/me_cmp_init.c
@@ -573,7 +573,7 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
 
         c->vsad[4] = vsad_intra16_mmx;
 
-        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+        if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
             c->vsad[0] = vsad16_mmx;
         }
     }
@@ -610,7 +610,7 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
         c->vsad[4] = ff_vsad_intra16_mmxext;
         c->vsad[5] = ff_vsad_intra8_mmxext;
 
-        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+        if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
             c->pix_abs[0][3] = ff_sad16_approx_xy2_mmxext;
             c->pix_abs[1][3] = ff_sad8_approx_xy2_mmxext;
 
@@ -634,7 +634,7 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
             c->pix_abs[0][2] = ff_sad16_y2_sse2;
 
             c->vsad[4]       = ff_vsad_intra16_sse2;
-            if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+            if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
                 c->pix_abs[0][3] = ff_sad16_approx_xy2_sse2;
                 c->vsad[0]       = ff_vsad16_approx_sse2;
             }
diff --git a/libavcodec/x86/mlpdsp.asm b/libavcodec/x86/mlpdsp.asm
index ce656af1..3dc641e8 100644
--- a/libavcodec/x86/mlpdsp.asm
+++ b/libavcodec/x86/mlpdsp.asm
@@ -21,7 +21,7 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_TEXT
+SECTION .text
 
 %if ARCH_X86_64
 
diff --git a/libavcodec/x86/mlpdsp_init.c b/libavcodec/x86/mlpdsp_init.c
index e9d9b1bf..7f5e6b11 100644
--- a/libavcodec/x86/mlpdsp_init.c
+++ b/libavcodec/x86/mlpdsp_init.c
@@ -199,6 +199,6 @@ av_cold void ff_mlpdsp_init_x86(MLPDSPContext *c)
 #endif
     if (ARCH_X86_64 && EXTERNAL_SSE4(cpu_flags))
         c->mlp_rematrix_channel = ff_mlp_rematrix_channel_sse4;
-    if (ARCH_X86_64 && EXTERNAL_AVX2(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2)
+    if (ARCH_X86_64 && EXTERNAL_AVX2_FAST(cpu_flags) && cpu_flags & AV_CPU_FLAG_BMI2)
         c->mlp_rematrix_channel = ff_mlp_rematrix_channel_avx2_bmi2;
 }
diff --git a/libavcodec/x86/mpegaudiodsp.c b/libavcodec/x86/mpegaudiodsp.c
index 27231674..d969f1df 100644
--- a/libavcodec/x86/mpegaudiodsp.c
+++ b/libavcodec/x86/mpegaudiodsp.c
@@ -30,6 +30,7 @@
 static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\
 void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
 
+#if HAVE_YASM
 #if ARCH_X86_32
 DECL(sse)
 #endif
@@ -37,6 +38,7 @@ DECL(sse2)
 DECL(sse3)
 DECL(ssse3)
 DECL(avx)
+#endif /* HAVE_YASM */
 
 void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,
                                float *tmpbuf);
@@ -239,7 +241,7 @@ DECL_IMDCT_BLOCKS(avx,avx)
 
 av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
 {
-    int cpu_flags = av_get_cpu_flags();
+    av_unused int cpu_flags = av_get_cpu_flags();
 
     int i, j;
     for (j = 0; j < 4; j++) {
diff --git a/libavcodec/x86/mpegvideo.c b/libavcodec/x86/mpegvideo.c
index 133ae80a..af47422c 100644
--- a/libavcodec/x86/mpegvideo.c
+++ b/libavcodec/x86/mpegvideo.c
@@ -25,6 +25,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/mpegvideo.h"
+#include "libavcodec/mpegvideodata.h"
 
 #if HAVE_MMX_INLINE
 
@@ -308,6 +309,9 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
 
     av_assert2(s->block_last_index[n]>=0);
 
+    if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
+    else                 qscale <<= 1;
+
     if(s->alternate_scan) nCoeffs= 63; //FIXME
     else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
 
@@ -345,8 +349,8 @@ __asm__ volatile(
                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow
                 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
                 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
-                "psraw $3, %%mm0                \n\t"
-                "psraw $3, %%mm1                \n\t"
+                "psraw $4, %%mm0                \n\t"
+                "psraw $4, %%mm1                \n\t"
                 "pxor %%mm2, %%mm0              \n\t"
                 "pxor %%mm3, %%mm1              \n\t"
                 "psubw %%mm2, %%mm0             \n\t"
@@ -373,6 +377,9 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
 
     av_assert2(s->block_last_index[n]>=0);
 
+    if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
+    else                 qscale <<= 1;
+
     if(s->alternate_scan) nCoeffs= 63; //FIXME
     else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
 
@@ -410,8 +417,8 @@ __asm__ volatile(
                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow
                 "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
                 "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
-                "psrlw $4, %%mm0                \n\t"
-                "psrlw $4, %%mm1                \n\t"
+                "psrlw $5, %%mm0                \n\t"
+                "psrlw $5, %%mm1                \n\t"
                 "pxor %%mm2, %%mm0              \n\t"
                 "pxor %%mm3, %%mm1              \n\t"
                 "psubw %%mm2, %%mm0             \n\t"
@@ -454,7 +461,7 @@ av_cold void ff_mpv_common_init_x86(MpegEncContext *s)
         s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
         s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
         s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
-        if (!(s->avctx->flags & CODEC_FLAG_BITEXACT))
+        if (!(s->avctx->flags & AV_CODEC_FLAG_BITEXACT))
             s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
         s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
     }
diff --git a/libavcodec/x86/mpegvideoenc.c b/libavcodec/x86/mpegvideoenc.c
index b410511c..67b26178 100644
--- a/libavcodec/x86/mpegvideoenc.c
+++ b/libavcodec/x86/mpegvideoenc.c
@@ -86,6 +86,7 @@ DECLARE_ALIGNED(16, static uint16_t, inv_zigzag_direct16)[64];
 #endif /* HAVE_6REGS */
 
 #if HAVE_INLINE_ASM
+#if HAVE_MMX_INLINE
 static void  denoise_dct_mmx(MpegEncContext *s, int16_t *block){
     const int intra= s->mb_intra;
     int *sum= s->dct_error_sum[intra];
@@ -139,7 +140,9 @@ static void  denoise_dct_mmx(MpegEncContext *s, int16_t *block){
         : "r"(block+64)
     );
 }
+#endif /* HAVE_MMX_INLINE */
 
+#if HAVE_SSE2_INLINE
 static void  denoise_dct_sse2(MpegEncContext *s, int16_t *block){
     const int intra= s->mb_intra;
     int *sum= s->dct_error_sum[intra];
@@ -195,6 +198,7 @@ static void  denoise_dct_sse2(MpegEncContext *s, int16_t *block){
                             "%xmm4", "%xmm5", "%xmm6", "%xmm7")
     );
 }
+#endif /* HAVE_SSE2_INLINE */
 #endif /* HAVE_INLINE_ASM */
 
 av_cold void ff_dct_encode_init_x86(MpegEncContext *s)
diff --git a/libavcodec/x86/mpegvideoenc_template.c b/libavcodec/x86/mpegvideoenc_template.c
index 1899ba23..da76459c 100644
--- a/libavcodec/x86/mpegvideoenc_template.c
+++ b/libavcodec/x86/mpegvideoenc_template.c
@@ -24,6 +24,7 @@
 
 #include "libavutil/internal.h"
 #include "libavutil/x86/asm.h"
+#include "libavcodec/mpegutils.h"
 #include "libavcodec/mpegvideo.h"
 #include "fdct.h"
 
diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c
index 2a4db615..532836ce 100644
--- a/libavcodec/x86/mpegvideoencdsp_init.c
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
@@ -242,7 +242,7 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
 #if HAVE_INLINE_ASM
 
     if (INLINE_MMX(cpu_flags)) {
-        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+        if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
             c->try_8x8basis = try_8x8basis_mmx;
         }
         c->add_8x8basis = add_8x8basis_mmx;
@@ -253,7 +253,7 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
     }
 
     if (INLINE_AMD3DNOW(cpu_flags)) {
-        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+        if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
             c->try_8x8basis = try_8x8basis_3dnow;
         }
         c->add_8x8basis = add_8x8basis_3dnow;
@@ -261,7 +261,7 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
 
 #if HAVE_SSSE3_INLINE
     if (INLINE_SSSE3(cpu_flags)) {
-        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+        if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
             c->try_8x8basis = try_8x8basis_ssse3;
         }
         c->add_8x8basis = add_8x8basis_ssse3;
diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm
index 7c5377b2..2864d0c9 100644
--- a/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@ -80,54 +80,50 @@ cglobal get_pixels, 3, 4, 5
     mova  [r0+0x70], m3
     RET
 
-INIT_MMX mmx
 ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
 ;                         int stride);
-cglobal diff_pixels, 4,5
-    movsxdifnidn r3, r3d
-    pxor         m7, m7
-    add          r0,  128
-    mov          r4, -128
-.loop:
-    mova         m0, [r1]
-    mova         m2, [r2]
-    mova         m1, m0
-    mova         m3, m2
-    punpcklbw    m0, m7
-    punpckhbw    m1, m7
-    punpcklbw    m2, m7
-    punpckhbw    m3, m7
-    psubw        m0, m2
-    psubw        m1, m3
-    mova  [r0+r4+0], m0
-    mova  [r0+r4+8], m1
-    add          r1, r3
-    add          r2, r3
-    add          r4, 16
-    jne .loop
-    REP_RET
-
-INIT_XMM sse2
-cglobal diff_pixels, 4, 5, 5
+%macro DIFF_PIXELS 0
+cglobal diff_pixels, 4,5,5
     movsxdifnidn r3, r3d
     pxor         m4, m4
     add          r0,  128
     mov          r4, -128
 .loop:
-    movh         m0, [r1]
-    movh         m2, [r2]
-    movh         m1, [r1+r3]
-    movh         m3, [r2+r3]
+    movq         m0, [r1]
+    movq         m2, [r2]
+%if mmsize == 8
+    movq         m1, m0
+    movq         m3, m2
+    punpcklbw    m0, m4
+    punpckhbw    m1, m4
+    punpcklbw    m2, m4
+    punpckhbw    m3, m4
+%else
+    movq         m1, [r1+r3]
+    movq         m3, [r2+r3]
     punpcklbw    m0, m4
     punpcklbw    m1, m4
     punpcklbw    m2, m4
     punpcklbw    m3, m4
+%endif
     psubw        m0, m2
     psubw        m1, m3
-    mova [r0+r4+0 ], m0
-    mova [r0+r4+16], m1
+    mova  [r0+r4+0], m0
+    mova  [r0+r4+mmsize], m1
+%if mmsize == 8
+    add          r1, r3
+    add          r2, r3
+%else
     lea          r1, [r1+r3*2]
     lea          r2, [r2+r3*2]
-    add          r4, 32
+%endif
+    add          r4, 2 * mmsize
     jne .loop
     RET
+%endmacro
+
+INIT_MMX mmx
+DIFF_PIXELS
+
+INIT_XMM sse2
+DIFF_PIXELS
diff --git a/libavcodec/x86/pngdsp.asm b/libavcodec/x86/pngdsp.asm
index 7bd1ab5c..50e4255d 100644
--- a/libavcodec/x86/pngdsp.asm
+++ b/libavcodec/x86/pngdsp.asm
@@ -27,7 +27,7 @@ SECTION_RODATA
 
 cextern pw_255
 
-SECTION_TEXT
+SECTION .text
 
 ; %1 = nr. of xmm registers used
 %macro ADD_BYTES_FN 1
diff --git a/libavcodec/x86/proresdsp.asm b/libavcodec/x86/proresdsp.asm
index 632ece6e..16fc262a 100644
--- a/libavcodec/x86/proresdsp.asm
+++ b/libavcodec/x86/proresdsp.asm
@@ -24,287 +24,43 @@
 
 %include "libavutil/x86/x86util.asm"
 
-%define W1sh2 22725 ; W1 = 90901 = 22725<<2 + 1
-%define W2sh2 21407 ; W2 = 85627 = 21407<<2 - 1
-%define W3sh2 19265 ; W3 = 77062 = 19265<<2 + 2
-%define W4sh2 16384 ; W4 = 65535 = 16384<<2 - 1
-%define W5sh2 12873 ; W5 = 51491 = 12873<<2 - 1
-%define W6sh2  8867 ; W6 = 35468 =  8867<<2
-%define W7sh2  4520 ; W7 = 18081 =  4520<<2 + 1
-
 %if ARCH_X86_64
 
 SECTION_RODATA
 
-w4_plus_w2: times 4 dw W4sh2, +W2sh2
-w4_min_w2:  times 4 dw W4sh2, -W2sh2
-w4_plus_w6: times 4 dw W4sh2, +W6sh2
-w4_min_w6:  times 4 dw W4sh2, -W6sh2
-w1_plus_w3: times 4 dw W1sh2, +W3sh2
-w3_min_w1:  times 4 dw W3sh2, -W1sh2
-w7_plus_w3: times 4 dw W7sh2, +W3sh2
-w3_min_w7:  times 4 dw W3sh2, -W7sh2
-w1_plus_w5: times 4 dw W1sh2, +W5sh2
-w5_min_w1:  times 4 dw W5sh2, -W1sh2
-w5_plus_w7: times 4 dw W5sh2, +W7sh2
-w7_min_w5:  times 4 dw W7sh2, -W5sh2
 pw_88:      times 8 dw 0x2008
-
 cextern pw_1
 cextern pw_4
-cextern pw_512
 cextern pw_1019
-
-section .text align=16
-
-; interleave data while maintaining source
-; %1=type, %2=dstlo, %3=dsthi, %4=src, %5=interleave
-%macro SBUTTERFLY3 5
-    punpckl%1   m%2, m%4, m%5
-    punpckh%1   m%3, m%4, m%5
-%endmacro
-
-; %1/%2=src1/dst1, %3/%4=dst2, %5/%6=src2, %7=shift
-; action: %3/%4 = %1/%2 - %5/%6; %1/%2 += %5/%6
-;         %1/%2/%3/%4 >>= %7; dword -> word (in %1/%3)
-%macro SUMSUB_SHPK 7
-    psubd       %3,  %1,  %5       ; { a0 - b0 }[0-3]
-    psubd       %4,  %2,  %6       ; { a0 - b0 }[4-7]
-    paddd       %1,  %5            ; { a0 + b0 }[0-3]
-    paddd       %2,  %6            ; { a0 + b0 }[4-7]
-    psrad       %1,  %7
-    psrad       %2,  %7
-    psrad       %3,  %7
-    psrad       %4,  %7
-    packssdw    %1,  %2            ; row[0]
-    packssdw    %3,  %4            ; row[7]
-%endmacro
-
-; %1 = row or col (for rounding variable)
-; %2 = number of bits to shift at the end
-%macro IDCT_1D 2
-    ; a0 = (W4 * row[0]) + (1 << (15 - 1));
-    ; a1 = a0;
-    ; a2 = a0;
-    ; a3 = a0;
-    ; a0 += W2 * row[2];
-    ; a1 += W6 * row[2];
-    ; a2 -= W6 * row[2];
-    ; a3 -= W2 * row[2];
-%ifidn %1, col
-    paddw       m10,[pw_88]
-%endif
-%ifidn %1, row
-    paddw       m10,[pw_1]
-%endif
-    SBUTTERFLY3 wd,  0,  1, 10,  8 ; { row[0], row[2] }[0-3]/[4-7]
-    pmaddwd     m2,  m0, [w4_plus_w6]
-    pmaddwd     m3,  m1, [w4_plus_w6]
-    pmaddwd     m4,  m0, [w4_min_w6]
-    pmaddwd     m5,  m1, [w4_min_w6]
-    pmaddwd     m6,  m0, [w4_min_w2]
-    pmaddwd     m7,  m1, [w4_min_w2]
-    pmaddwd     m0, [w4_plus_w2]
-    pmaddwd     m1, [w4_plus_w2]
-
-    ; a0: -1*row[0]-1*row[2]
-    ; a1: -1*row[0]
-    ; a2: -1*row[0]
-    ; a3: -1*row[0]+1*row[2]
-
-    ; a0 +=   W4*row[4] + W6*row[6]; i.e. -1*row[4]
-    ; a1 -=   W4*row[4] + W2*row[6]; i.e. -1*row[4]-1*row[6]
-    ; a2 -=   W4*row[4] - W2*row[6]; i.e. -1*row[4]+1*row[6]
-    ; a3 +=   W4*row[4] - W6*row[6]; i.e. -1*row[4]
-    SBUTTERFLY3 wd,  8,  9, 13, 12 ; { row[4], row[6] }[0-3]/[4-7]
-    pmaddwd     m10, m8, [w4_plus_w6]
-    pmaddwd     m11, m9, [w4_plus_w6]
-    paddd       m0,  m10            ; a0[0-3]
-    paddd       m1,  m11            ; a0[4-7]
-    pmaddwd     m10, m8, [w4_min_w6]
-    pmaddwd     m11, m9, [w4_min_w6]
-    paddd       m6,  m10           ; a3[0-3]
-    paddd       m7,  m11           ; a3[4-7]
-    pmaddwd     m10, m8, [w4_min_w2]
-    pmaddwd     m11, m9, [w4_min_w2]
-    pmaddwd     m8, [w4_plus_w2]
-    pmaddwd     m9, [w4_plus_w2]
-    psubd       m4,  m10           ; a2[0-3] intermediate
-    psubd       m5,  m11           ; a2[4-7] intermediate
-    psubd       m2,  m8            ; a1[0-3] intermediate
-    psubd       m3,  m9            ; a1[4-7] intermediate
-
-    ; load/store
-    mova   [r2+  0], m0
-    mova   [r2+ 32], m2
-    mova   [r2+ 64], m4
-    mova   [r2+ 96], m6
-    mova        m10,[r2+ 16]       ; { row[1] }[0-7]
-    mova        m8, [r2+ 48]       ; { row[3] }[0-7]
-    mova        m13,[r2+ 80]       ; { row[5] }[0-7]
-    mova        m14,[r2+112]       ; { row[7] }[0-7]
-    mova   [r2+ 16], m1
-    mova   [r2+ 48], m3
-    mova   [r2+ 80], m5
-    mova   [r2+112], m7
-%ifidn %1, row
-    pmullw      m10,[r3+ 16]
-    pmullw      m8, [r3+ 48]
-    pmullw      m13,[r3+ 80]
-    pmullw      m14,[r3+112]
-%endif
-
-    ; b0 = MUL(W1, row[1]);
-    ; MAC(b0, W3, row[3]);
-    ; b1 = MUL(W3, row[1]);
-    ; MAC(b1, -W7, row[3]);
-    ; b2 = MUL(W5, row[1]);
-    ; MAC(b2, -W1, row[3]);
-    ; b3 = MUL(W7, row[1]);
-    ; MAC(b3, -W5, row[3]);
-    SBUTTERFLY3 wd,  0,  1, 10, 8  ; { row[1], row[3] }[0-3]/[4-7]
-    pmaddwd     m2,  m0, [w3_min_w7]
-    pmaddwd     m3,  m1, [w3_min_w7]
-    pmaddwd     m4,  m0, [w5_min_w1]
-    pmaddwd     m5,  m1, [w5_min_w1]
-    pmaddwd     m6,  m0, [w7_min_w5]
-    pmaddwd     m7,  m1, [w7_min_w5]
-    pmaddwd     m0, [w1_plus_w3]
-    pmaddwd     m1, [w1_plus_w3]
-
-    ; b0: +1*row[1]+2*row[3]
-    ; b1: +2*row[1]-1*row[3]
-    ; b2: -1*row[1]-1*row[3]
-    ; b3: +1*row[1]+1*row[3]
-
-    ; MAC(b0,  W5, row[5]);
-    ; MAC(b0,  W7, row[7]);
-    ; MAC(b1, -W1, row[5]);
-    ; MAC(b1, -W5, row[7]);
-    ; MAC(b2,  W7, row[5]);
-    ; MAC(b2,  W3, row[7]);
-    ; MAC(b3,  W3, row[5]);
-    ; MAC(b3, -W1, row[7]);
-    SBUTTERFLY3 wd,  8,  9, 13, 14 ; { row[5], row[7] }[0-3]/[4-7]
-
-    ; b0: -1*row[5]+1*row[7]
-    ; b1: -1*row[5]+1*row[7]
-    ; b2: +1*row[5]+2*row[7]
-    ; b3: +2*row[5]-1*row[7]
-
-    pmaddwd     m10, m8, [w1_plus_w5]
-    pmaddwd     m11, m9, [w1_plus_w5]
-    pmaddwd     m12, m8, [w5_plus_w7]
-    pmaddwd     m13, m9, [w5_plus_w7]
-    psubd       m2,  m10           ; b1[0-3]
-    psubd       m3,  m11           ; b1[4-7]
-    paddd       m0,  m12            ; b0[0-3]
-    paddd       m1,  m13            ; b0[4-7]
-    pmaddwd     m12, m8, [w7_plus_w3]
-    pmaddwd     m13, m9, [w7_plus_w3]
-    pmaddwd     m8, [w3_min_w1]
-    pmaddwd     m9, [w3_min_w1]
-    paddd       m4,  m12           ; b2[0-3]
-    paddd       m5,  m13           ; b2[4-7]
-    paddd       m6,  m8            ; b3[0-3]
-    paddd       m7,  m9            ; b3[4-7]
-
-    ; row[0] = (a0 + b0) >> 15;
-    ; row[7] = (a0 - b0) >> 15;
-    ; row[1] = (a1 + b1) >> 15;
-    ; row[6] = (a1 - b1) >> 15;
-    ; row[2] = (a2 + b2) >> 15;
-    ; row[5] = (a2 - b2) >> 15;
-    ; row[3] = (a3 + b3) >> 15;
-    ; row[4] = (a3 - b3) >> 15;
-    mova        m8, [r2+ 0]        ; a0[0-3]
-    mova        m9, [r2+16]        ; a0[4-7]
-    SUMSUB_SHPK m8,  m9,  m10, m11, m0,  m1,  %2
-    mova        m0, [r2+32]        ; a1[0-3]
-    mova        m1, [r2+48]        ; a1[4-7]
-    SUMSUB_SHPK m0,  m1,  m9,  m11, m2,  m3,  %2
-    mova        m1, [r2+64]        ; a2[0-3]
-    mova        m2, [r2+80]        ; a2[4-7]
-    SUMSUB_SHPK m1,  m2,  m11, m3,  m4,  m5,  %2
-    mova        m2, [r2+96]        ; a3[0-3]
-    mova        m3, [r2+112]       ; a3[4-7]
-    SUMSUB_SHPK m2,  m3,  m4,  m5,  m6,  m7,  %2
-%endmacro
-
-; void ff_prores_idct_put_10_<opt>(uint8_t *pixels, int stride,
-;                                  int16_t *block, const int16_t *qmat);
-%macro idct_put_fn 1
-cglobal prores_idct_put_10, 4, 4, %1
-    movsxd      r1,  r1d
-    pxor        m15, m15           ; zero
-
-    ; for (i = 0; i < 8; i++)
-    ;     idctRowCondDC(block + i*8);
-    mova        m10,[r2+ 0]        ; { row[0] }[0-7]
-    mova        m8, [r2+32]        ; { row[2] }[0-7]
-    mova        m13,[r2+64]        ; { row[4] }[0-7]
-    mova        m12,[r2+96]        ; { row[6] }[0-7]
-
-    pmullw      m10,[r3+ 0]
-    pmullw      m8, [r3+32]
-    pmullw      m13,[r3+64]
-    pmullw      m12,[r3+96]
-
-    IDCT_1D     row, 15
-
-    ; transpose for second part of IDCT
-    TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3
-    mova   [r2+ 16], m0
-    mova   [r2+ 48], m2
-    mova   [r2+ 80], m11
-    mova   [r2+112], m10
-    SWAP         8,  10
-    SWAP         1,   8
-    SWAP         4,  13
-    SWAP         9,  12
-
-    ; for (i = 0; i < 8; i++)
-    ;     idctSparseColAdd(dest + i, line_size, block + i);
-    IDCT_1D     col, 18
-
-    ; clip/store
-    mova        m3, [pw_4]
-    mova        m5, [pw_1019]
-    pmaxsw      m8,  m3
-    pmaxsw      m0,  m3
-    pmaxsw      m1,  m3
-    pmaxsw      m2,  m3
-    pmaxsw      m4,  m3
-    pmaxsw      m11, m3
-    pmaxsw      m9,  m3
-    pmaxsw      m10, m3
-    pminsw      m8,  m5
-    pminsw      m0,  m5
-    pminsw      m1,  m5
-    pminsw      m2,  m5
-    pminsw      m4,  m5
-    pminsw      m11, m5
-    pminsw      m9,  m5
-    pminsw      m10, m5
-
-    lea         r2, [r1*3]
-    mova  [r0     ], m8
-    mova  [r0+r1  ], m0
-    mova  [r0+r1*2], m1
-    mova  [r0+r2  ], m2
-    lea         r0, [r0+r1*4]
-    mova  [r0     ], m4
-    mova  [r0+r1  ], m11
-    mova  [r0+r1*2], m9
-    mova  [r0+r2  ], m10
+; Below are defined in simple_idct10.asm built from selecting idctdsp
+cextern w4_plus_w2
+cextern w4_min_w2
+cextern w4_plus_w6
+cextern w4_min_w6
+cextern w1_plus_w3
+cextern w3_min_w1
+cextern w7_plus_w3
+cextern w3_min_w7
+cextern w1_plus_w5
+cextern w5_min_w1
+cextern w5_plus_w7
+cextern w7_min_w5
+
+%include "libavcodec/x86/simple_idct10_template.asm"
+
+SECTION .text
+
+%macro idct_fn 0
+cglobal prores_idct_put_10, 4, 4, 15
+    IDCT_FN    pw_1, 15, pw_88, 18, pw_4, pw_1019, r3
     RET
 %endmacro
 
 INIT_XMM sse2
-idct_put_fn 16
+idct_fn
 %if HAVE_AVX_EXTERNAL
 INIT_XMM avx
-idct_put_fn 16
+idct_fn
 %endif
 
 %endif
diff --git a/libavcodec/x86/qpeldsp.asm b/libavcodec/x86/qpeldsp.asm
index dc0f900c..282faed1 100644
--- a/libavcodec/x86/qpeldsp.asm
+++ b/libavcodec/x86/qpeldsp.asm
@@ -31,7 +31,7 @@ cextern pw_16
 cextern pw_20
 
 
-SECTION_TEXT
+SECTION .text
 
 ; void ff_put_no_rnd_pixels8_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 %macro PUT_NO_RND_PIXELS8_L2 0
diff --git a/libavcodec/x86/rnd_template.c b/libavcodec/x86/rnd_template.c
index c9fd71ee..ddca4eb5 100644
--- a/libavcodec/x86/rnd_template.c
+++ b/libavcodec/x86/rnd_template.c
@@ -30,7 +30,7 @@
 #include "inline_asm.h"
 
 // put_pixels
-STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
+av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
                                   ptrdiff_t line_size, int h)
 {
     MOVQ_ZERO(mm7);
@@ -99,7 +99,7 @@ STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
 
 // avg_pixels
 // this routine is 'slightly' suboptimal but mostly unused
-STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
+av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
                                   ptrdiff_t line_size, int h)
 {
     MOVQ_ZERO(mm7);
diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c
index bbf9c785..218deb82 100644
--- a/libavcodec/x86/rv40dsp_init.c
+++ b/libavcodec/x86/rv40dsp_init.c
@@ -101,7 +101,7 @@ static void OP ## rv40_qpel ##SIZE ##_mc ##PH ##PV ##OPT(uint8_t *dst,  \
             ff_ ##OP ##rv40_qpel_h ## OPT(dst + i, stride, src + i,     \
                                           stride, SIZE, HCOFF(PH));     \
     }                                                                   \
-};
+}
 
 /** Declare functions for sizes 8 and 16 and given operations
  *  and qpel position. */
@@ -214,7 +214,7 @@ DEFINE_FN(avg, 16, mmx)
 
 av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
 {
-    int cpu_flags = av_get_cpu_flags();
+    av_unused int cpu_flags = av_get_cpu_flags();
 
 #if HAVE_MMX_INLINE
     if (INLINE_MMX(cpu_flags)) {
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index 083461a1..b6fa5351 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -34,7 +34,7 @@ ps_noise13      dd  0.0,  1.0, 0.0, -1.0
 cextern         sbr_noise_table
 cextern         ps_neg
 
-SECTION_TEXT
+SECTION .text
 
 INIT_XMM sse
 cglobal sbr_sum_square, 2, 3, 6
@@ -382,6 +382,7 @@ apply_noise_main:
 %else
 %define count m_maxq
 %endif
+    movsxdifnidn    noiseq, noised
     dec    noiseq
     shl    count, 2
 %ifdef PIC
@@ -515,42 +516,42 @@ align 16
     jl .loop
 
     movlhps m1, m1
-    mulps   m4, m1, m2
+    mulps   m2, m1
     mulps   m1, m1
-    addps   m4, m6       ; real_sum1 + x[38][0] * x[39][0], x[38][1] * x[39][1]; imag_sum1 + x[38][0] * x[39][1], x[38][1] * x[39][0];
+    addps   m2, m6       ; real_sum1 + x[38][0] * x[39][0], x[38][1] * x[39][1]; imag_sum1 + x[38][0] * x[39][1], x[38][1] * x[39][0];
     addps   m1, m7       ; real_sum0 + x[38][0] * x[38][0], x[38][1] * x[38][1];
     addps   m6, [rsp   ] ; real_sum1 + x[ 0][0] * x[ 1][0], x[ 0][1] * x[ 1][1]; imag_sum1 + x[ 0][0] * x[ 1][1], x[ 0][1] * x[ 1][0];
     addps   m7, [rsp+16] ; real_sum0 + x[ 0][0] * x[ 0][0], x[ 0][1] * x[ 0][1];
 
-    xorps   m4, [ps_mask3]
+    xorps   m2, [ps_mask3]
     xorps   m5, [ps_mask3]
     xorps   m6, [ps_mask3]
 %if cpuflag(sse3)
-    movshdup m2, m1
-    haddps  m4, m5
+    movshdup m0, m1
+    haddps  m2, m5
     haddps  m7, m6
-    addss   m1, m2
+    addss   m1, m0
 %else
-    movaps  m3, m4
-    movaps  m2, m5
-    movaps  m0, m6
+    movaps  m3, m2
+    movaps  m0, m5
+    movaps  m4, m6
     shufps  m3, m3, q0301
-    shufps  m2, m2, q0301
     shufps  m0, m0, q0301
-    addps   m4, m3
-    addps   m5, m2
-    addps   m6, m0
+    shufps  m4, m4, q0301
+    addps   m2, m3
+    addps   m5, m0
+    addps   m6, m4
 
-    movss   m2, m7
+    movss   m0, m7
     movss   m3, m1
     shufps  m7, m7, q0001
     shufps  m1, m1, q0001
-    addss   m7, m2
+    addss   m7, m0
     addss   m1, m3
-    shufps  m4, m5, q2020
+    shufps  m2, m5, q2020
     shufps  m7, m6, q2020
 %endif
-    movaps  [phiq     ], m4
+    movaps  [phiq     ], m2
     movhps  [phiq+0x18], m7
     movss   [phiq+0x28], m7
     movss   [phiq+0x10], m1
diff --git a/libavcodec/x86/simple_idct.h b/libavcodec/x86/simple_idct.h
index 4a987325..8eeb31e2 100644
--- a/libavcodec/x86/simple_idct.h
+++ b/libavcodec/x86/simple_idct.h
@@ -25,4 +25,16 @@ void ff_simple_idct_mmx(int16_t *block);
 void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);
 void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, int16_t *block);
 
+void ff_simple_idct10_sse2(int16_t *block);
+void ff_simple_idct10_avx(int16_t *block);
+
+void ff_simple_idct10_put_sse2(uint8_t *dest, int line_size, int16_t *block);
+void ff_simple_idct10_put_avx(uint8_t *dest, int line_size, int16_t *block);
+
+void ff_simple_idct12_sse2(int16_t *block);
+void ff_simple_idct12_avx(int16_t *block);
+
+void ff_simple_idct12_put_sse2(uint8_t *dest, int line_size, int16_t *block);
+void ff_simple_idct12_put_avx(uint8_t *dest, int line_size, int16_t *block);
+
 #endif /* AVCODEC_X86_SIMPLE_IDCT_H */
diff --git a/libavcodec/x86/simple_idct10.asm b/libavcodec/x86/simple_idct10.asm
new file mode 100644
index 00000000..5dee533d
--- /dev/null
+++ b/libavcodec/x86/simple_idct10.asm
@@ -0,0 +1,100 @@
+;******************************************************************************
+;* x86-SIMD-optimized IDCT for prores
+;* this is identical to "simple" IDCT written by Michael Niedermayer
+;* except for the clip range
+;*
+;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
+;* Copyright (c) 2015 Christophe Gisquet
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+%if ARCH_X86_64
+
+SECTION_RODATA
+
+cextern pw_2
+cextern pw_16
+cextern pw_1023
+cextern pw_4095
+pd_round_12: times 4 dd 1<<(12-1)
+pd_round_15: times 4 dd 1<<(15-1)
+pd_round_19: times 4 dd 1<<(19-1)
+
+%macro CONST_DEC  3
+const %1
+times 4 dw %2, %3
+%endmacro
+
+%define W1sh2 22725 ; W1 = 90901 = 22725<<2 + 1
+%define W2sh2 21407 ; W2 = 85627 = 21407<<2 - 1
+%define W3sh2 19265 ; W3 = 77062 = 19265<<2 + 2
+%define W4sh2 16384 ; W4 = 65535 = 16384<<2 - 1
+%define W5sh2 12873 ; W5 = 51491 = 12873<<2 - 1
+%define W6sh2  8867 ; W6 = 35468 =  8867<<2
+%define W7sh2  4520 ; W7 = 18081 =  4520<<2 + 1
+
+CONST_DEC  w4_plus_w2,   W4sh2, +W2sh2
+CONST_DEC  w4_min_w2,    W4sh2, -W2sh2
+CONST_DEC  w4_plus_w6,   W4sh2, +W6sh2
+CONST_DEC  w4_min_w6,    W4sh2, -W6sh2
+CONST_DEC  w1_plus_w3,   W1sh2, +W3sh2
+CONST_DEC  w3_min_w1,    W3sh2, -W1sh2
+CONST_DEC  w7_plus_w3,   W7sh2, +W3sh2
+CONST_DEC  w3_min_w7,    W3sh2, -W7sh2
+CONST_DEC  w1_plus_w5,   W1sh2, +W5sh2
+CONST_DEC  w5_min_w1,    W5sh2, -W1sh2
+CONST_DEC  w5_plus_w7,   W5sh2, +W7sh2
+CONST_DEC  w7_min_w5,    W7sh2, -W5sh2
+
+%include "libavcodec/x86/simple_idct10_template.asm"
+
+SECTION .text
+
+%macro idct_fn 0
+cglobal simple_idct10, 1, 1, 16
+    IDCT_FN    "", 12, "", 19
+    RET
+
+cglobal simple_idct10_put, 3, 3, 16
+    IDCT_FN    "", 12, "", 19, 0, pw_1023
+    RET
+
+cglobal simple_idct12, 1, 1, 16
+    ; coeffs are already 15bits, adding the offset would cause
+    ; overflow in the input
+    IDCT_FN    "", 15, pw_2, 16
+    RET
+
+cglobal simple_idct12_put, 3, 3, 16
+    ; range isn't known, so the C simple_idct range is used
+    ; Also, using a bias on input overflows, so use the bias
+    ; on output of the first butterfly instead
+    IDCT_FN    "", 15, pw_2, 16, 0, pw_4095
+    RET
+%endmacro
+
+INIT_XMM sse2
+idct_fn
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+idct_fn
+%endif
+
+%endif
diff --git a/libavcodec/x86/simple_idct10_template.asm b/libavcodec/x86/simple_idct10_template.asm
new file mode 100644
index 00000000..e5deb0f2
--- /dev/null
+++ b/libavcodec/x86/simple_idct10_template.asm
@@ -0,0 +1,315 @@
+;******************************************************************************
+;* x86-SIMD-optimized IDCT for prores
+;* this is identical to "simple" IDCT written by Michael Niedermayer
+;* except for the clip range
+;*
+;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+; add SECTION_RODATA and proper include before including this file!
+
+%if ARCH_X86_64
+
+; interleave data while maintaining source
+; %1=type, %2=dstlo, %3=dsthi, %4=src, %5=interleave
+%macro SBUTTERFLY3 5
+    punpckl%1   m%2, m%4, m%5
+    punpckh%1   m%3, m%4, m%5
+%endmacro
+
+; %1/%2=src1/dst1, %3/%4=dst2, %5/%6=src2, %7=shift
+; action: %3/%4 = %1/%2 - %5/%6; %1/%2 += %5/%6
+;         %1/%2/%3/%4 >>= %7; dword -> word (in %1/%3)
+%macro SUMSUB_SHPK 7
+    psubd       %3,  %1,  %5       ; { a0 - b0 }[0-3]
+    psubd       %4,  %2,  %6       ; { a0 - b0 }[4-7]
+    paddd       %1,  %5            ; { a0 + b0 }[0-3]
+    paddd       %2,  %6            ; { a0 + b0 }[4-7]
+    psrad       %1,  %7
+    psrad       %2,  %7
+    psrad       %3,  %7
+    psrad       %4,  %7
+    packssdw    %1,  %2            ; row[0]
+    packssdw    %3,  %4            ; row[7]
+%endmacro
+
+; %1 = initial bias ("" if nop)
+; %2 = number of bits to shift at the end
+; %3 = qmat (for prores)
+%macro IDCT_1D 2-3
+    ; a0 = (W4 * row[0]) + (1 << (15 - 1));
+    ; a1 = a0;
+    ; a2 = a0;
+    ; a3 = a0;
+    ; a0 += W2 * row[2];
+    ; a1 += W6 * row[2];
+    ; a2 -= W6 * row[2];
+    ; a3 -= W2 * row[2];
+%ifstr %1
+    mova        m15, [pd_round_ %+ %2]
+%else
+    paddw       m10, [%1]
+%endif
+    SBUTTERFLY3 wd,  0,  1, 10,  8 ; { row[0], row[2] }[0-3]/[4-7]
+    pmaddwd     m2,  m0, [w4_plus_w6]
+    pmaddwd     m3,  m1, [w4_plus_w6]
+    pmaddwd     m4,  m0, [w4_min_w6]
+    pmaddwd     m5,  m1, [w4_min_w6]
+    pmaddwd     m6,  m0, [w4_min_w2]
+    pmaddwd     m7,  m1, [w4_min_w2]
+    pmaddwd     m0, [w4_plus_w2]
+    pmaddwd     m1, [w4_plus_w2]
+%ifstr %1
+    ; Adding 1<<(%2-1) for >=15 bits values
+    paddd       m2, m15
+    paddd       m3, m15
+    paddd       m4, m15
+    paddd       m5, m15
+    paddd       m6, m15
+    paddd       m7, m15
+    paddd       m0, m15
+    paddd       m1, m15
+%endif
+
+    ; a0: -1*row[0]-1*row[2]
+    ; a1: -1*row[0]
+    ; a2: -1*row[0]
+    ; a3: -1*row[0]+1*row[2]
+
+    ; a0 +=   W4*row[4] + W6*row[6]; i.e. -1*row[4]
+    ; a1 -=   W4*row[4] + W2*row[6]; i.e. -1*row[4]-1*row[6]
+    ; a2 -=   W4*row[4] - W2*row[6]; i.e. -1*row[4]+1*row[6]
+    ; a3 +=   W4*row[4] - W6*row[6]; i.e. -1*row[4]
+    SBUTTERFLY3 wd,  8,  9, 13, 12 ; { row[4], row[6] }[0-3]/[4-7]
+    pmaddwd     m10, m8, [w4_plus_w6]
+    pmaddwd     m11, m9, [w4_plus_w6]
+    paddd       m0,  m10            ; a0[0-3]
+    paddd       m1,  m11            ; a0[4-7]
+    pmaddwd     m10, m8, [w4_min_w6]
+    pmaddwd     m11, m9, [w4_min_w6]
+    paddd       m6,  m10           ; a3[0-3]
+    paddd       m7,  m11           ; a3[4-7]
+    pmaddwd     m10, m8, [w4_min_w2]
+    pmaddwd     m11, m9, [w4_min_w2]
+    pmaddwd     m8, [w4_plus_w2]
+    pmaddwd     m9, [w4_plus_w2]
+    psubd       m4,  m10           ; a2[0-3] intermediate
+    psubd       m5,  m11           ; a2[4-7] intermediate
+    psubd       m2,  m8            ; a1[0-3] intermediate
+    psubd       m3,  m9            ; a1[4-7] intermediate
+
+    ; load/store
+    mova   [COEFFS+  0], m0
+    mova   [COEFFS+ 32], m2
+    mova   [COEFFS+ 64], m4
+    mova   [COEFFS+ 96], m6
+    mova        m10,[COEFFS+ 16]       ; { row[1] }[0-7]
+    mova        m8, [COEFFS+ 48]       ; { row[3] }[0-7]
+    mova        m13,[COEFFS+ 80]       ; { row[5] }[0-7]
+    mova        m14,[COEFFS+112]       ; { row[7] }[0-7]
+    mova   [COEFFS+ 16], m1
+    mova   [COEFFS+ 48], m3
+    mova   [COEFFS+ 80], m5
+    mova   [COEFFS+112], m7
+%if %0 == 3
+    pmullw      m10,[%3+ 16]
+    pmullw      m8, [%3+ 48]
+    pmullw      m13,[%3+ 80]
+    pmullw      m14,[%3+112]
+%endif
+
+    ; b0 = MUL(W1, row[1]);
+    ; MAC(b0, W3, row[3]);
+    ; b1 = MUL(W3, row[1]);
+    ; MAC(b1, -W7, row[3]);
+    ; b2 = MUL(W5, row[1]);
+    ; MAC(b2, -W1, row[3]);
+    ; b3 = MUL(W7, row[1]);
+    ; MAC(b3, -W5, row[3]);
+    SBUTTERFLY3 wd,  0,  1, 10, 8  ; { row[1], row[3] }[0-3]/[4-7]
+    pmaddwd     m2,  m0, [w3_min_w7]
+    pmaddwd     m3,  m1, [w3_min_w7]
+    pmaddwd     m4,  m0, [w5_min_w1]
+    pmaddwd     m5,  m1, [w5_min_w1]
+    pmaddwd     m6,  m0, [w7_min_w5]
+    pmaddwd     m7,  m1, [w7_min_w5]
+    pmaddwd     m0, [w1_plus_w3]
+    pmaddwd     m1, [w1_plus_w3]
+
+    ; b0: +1*row[1]+2*row[3]
+    ; b1: +2*row[1]-1*row[3]
+    ; b2: -1*row[1]-1*row[3]
+    ; b3: +1*row[1]+1*row[3]
+
+    ; MAC(b0,  W5, row[5]);
+    ; MAC(b0,  W7, row[7]);
+    ; MAC(b1, -W1, row[5]);
+    ; MAC(b1, -W5, row[7]);
+    ; MAC(b2,  W7, row[5]);
+    ; MAC(b2,  W3, row[7]);
+    ; MAC(b3,  W3, row[5]);
+    ; MAC(b3, -W1, row[7]);
+    SBUTTERFLY3 wd,  8,  9, 13, 14 ; { row[5], row[7] }[0-3]/[4-7]
+
+    ; b0: -1*row[5]+1*row[7]
+    ; b1: -1*row[5]+1*row[7]
+    ; b2: +1*row[5]+2*row[7]
+    ; b3: +2*row[5]-1*row[7]
+
+    pmaddwd     m10, m8, [w1_plus_w5]
+    pmaddwd     m11, m9, [w1_plus_w5]
+    pmaddwd     m12, m8, [w5_plus_w7]
+    pmaddwd     m13, m9, [w5_plus_w7]
+    psubd       m2,  m10           ; b1[0-3]
+    psubd       m3,  m11           ; b1[4-7]
+    paddd       m0,  m12            ; b0[0-3]
+    paddd       m1,  m13            ; b0[4-7]
+    pmaddwd     m12, m8, [w7_plus_w3]
+    pmaddwd     m13, m9, [w7_plus_w3]
+    pmaddwd     m8, [w3_min_w1]
+    pmaddwd     m9, [w3_min_w1]
+    paddd       m4,  m12           ; b2[0-3]
+    paddd       m5,  m13           ; b2[4-7]
+    paddd       m6,  m8            ; b3[0-3]
+    paddd       m7,  m9            ; b3[4-7]
+
+    ; row[0] = (a0 + b0) >> 15;
+    ; row[7] = (a0 - b0) >> 15;
+    ; row[1] = (a1 + b1) >> 15;
+    ; row[6] = (a1 - b1) >> 15;
+    ; row[2] = (a2 + b2) >> 15;
+    ; row[5] = (a2 - b2) >> 15;
+    ; row[3] = (a3 + b3) >> 15;
+    ; row[4] = (a3 - b3) >> 15;
+    mova        m8, [COEFFS+ 0]        ; a0[0-3]
+    mova        m9, [COEFFS+16]        ; a0[4-7]
+    SUMSUB_SHPK m8,  m9,  m10, m11, m0,  m1,  %2
+    mova        m0, [COEFFS+32]        ; a1[0-3]
+    mova        m1, [COEFFS+48]        ; a1[4-7]
+    SUMSUB_SHPK m0,  m1,  m9,  m11, m2,  m3,  %2
+    mova        m1, [COEFFS+64]        ; a2[0-3]
+    mova        m2, [COEFFS+80]        ; a2[4-7]
+    SUMSUB_SHPK m1,  m2,  m11, m3,  m4,  m5,  %2
+    mova        m2, [COEFFS+96]        ; a3[0-3]
+    mova        m3, [COEFFS+112]       ; a3[4-7]
+    SUMSUB_SHPK m2,  m3,  m4,  m5,  m6,  m7,  %2
+%endmacro
+
+; void ff_prores_idct_put_10_<opt>(uint8_t *pixels, int stride,
+;                                  int16_t *block, const int16_t *qmat);
+
+; %1 = row shift
+; %2 = row bias macro
+; %3 = column shift
+; %4 = column bias macro
+; %5 = min pixel value
+; %6 = max pixel value
+; %7 = qmat (for prores)
+
+%macro IDCT_FN 4-7
+%if %0 == 4
+    ; No clamping, means pure idct
+%xdefine COEFFS r0
+%else
+    movsxd      r1,  r1d
+%xdefine COEFFS r2
+%endif
+
+    ; for (i = 0; i < 8; i++)
+    ;     idctRowCondDC(block + i*8);
+    mova        m10,[COEFFS+ 0]        ; { row[0] }[0-7]
+    mova        m8, [COEFFS+32]        ; { row[2] }[0-7]
+    mova        m13,[COEFFS+64]        ; { row[4] }[0-7]
+    mova        m12,[COEFFS+96]        ; { row[6] }[0-7]
+
+%if %0 == 7
+    pmullw      m10,[%7+ 0]
+    pmullw      m8, [%7+32]
+    pmullw      m13,[%7+64]
+    pmullw      m12,[%7+96]
+
+    IDCT_1D     %1, %2, %7
+%else
+    IDCT_1D     %1, %2
+%endif
+
+    ; transpose for second part of IDCT
+    TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3
+    mova   [COEFFS+ 16], m0
+    mova   [COEFFS+ 48], m2
+    mova   [COEFFS+ 80], m11
+    mova   [COEFFS+112], m10
+    SWAP         8,  10
+    SWAP         1,   8
+    SWAP         4,  13
+    SWAP         9,  12
+
+    ; for (i = 0; i < 8; i++)
+    ;     idctSparseColAdd(dest + i, line_size, block + i);
+    IDCT_1D     %3, %4
+
+    ; clip/store
+%if %0 == 4
+    ; No clamping, means pure idct
+    mova  [r0+  0], m8
+    mova  [r0+ 16], m0
+    mova  [r0+ 32], m1
+    mova  [r0+ 48], m2
+    mova  [r0+ 64], m4
+    mova  [r0+ 80], m11
+    mova  [r0+ 96], m9
+    mova  [r0+112], m10
+%else
+%ifidn %5, 0
+    pxor        m3, m3
+%else
+    mova        m3, [%5]
+%endif
+    mova        m5, [%6]
+    pmaxsw      m8,  m3
+    pmaxsw      m0,  m3
+    pmaxsw      m1,  m3
+    pmaxsw      m2,  m3
+    pmaxsw      m4,  m3
+    pmaxsw      m11, m3
+    pmaxsw      m9,  m3
+    pmaxsw      m10, m3
+    pminsw      m8,  m5
+    pminsw      m0,  m5
+    pminsw      m1,  m5
+    pminsw      m2,  m5
+    pminsw      m4,  m5
+    pminsw      m11, m5
+    pminsw      m9,  m5
+    pminsw      m10, m5
+
+    lea         r2, [r1*3]
+    mova  [r0     ], m8
+    mova  [r0+r1  ], m0
+    mova  [r0+r1*2], m1
+    mova  [r0+r2  ], m2
+    lea         r0, [r0+r1*4]
+    mova  [r0     ], m4
+    mova  [r0+r1  ], m11
+    mova  [r0+r1*2], m9
+    mova  [r0+r2  ], m10
+%endif
+%endmacro
+
+%endif
diff --git a/libavcodec/x86/svq1enc.asm b/libavcodec/x86/svq1enc.asm
index 24ee70f1..a8763283 100644
--- a/libavcodec/x86/svq1enc.asm
+++ b/libavcodec/x86/svq1enc.asm
@@ -21,12 +21,12 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_TEXT
+SECTION .text
 
 %macro SSD_INT8_VS_INT16 0
 cglobal ssd_int8_vs_int16, 3, 3, 3, pix1, pix2, size
     pxor m0, m0
-.loop
+.loop:
     sub       sizeq, 8
     movq      m1, [pix1q + sizeq]
     mova      m2, [pix2q + sizeq*2]
diff --git a/libavcodec/x86/synth_filter.asm b/libavcodec/x86/synth_filter.asm
new file mode 100644
index 00000000..bc1a48f4
--- /dev/null
+++ b/libavcodec/x86/synth_filter.asm
@@ -0,0 +1,246 @@
+;******************************************************************************
+;* SSE-optimized functions for the DCA decoder
+;* Copyright (C) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+%macro SETZERO 1
+%if cpuflag(sse2) && notcpuflag(avx)
+    pxor          %1, %1
+%else
+    xorps         %1, %1, %1
+%endif
+%endmacro
+
+%macro SHUF 3
+%if cpuflag(avx)
+    mova          %3, [%2 - 16]
+    vperm2f128    %1, %3, %3, 1
+    vshufps       %1, %1, %1, q0123
+%elif cpuflag(sse2)
+    pshufd        %1, [%2], q0123
+%else
+    mova          %1, [%2]
+    shufps        %1, %1, q0123
+%endif
+%endmacro
+
+%macro INNER_LOOP   1
+    ; reading backwards:  ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
+    ;~ a += window[i + j]      * (-synth_buf[15 - i + j])
+    ;~ b += window[i + j + 16] * (synth_buf[i + j])
+    SHUF          m5,  ptr2 + j + (15 - 3) * 4, m6
+    mova          m6, [ptr1 + j]
+%if ARCH_X86_64
+    SHUF         m11,  ptr2 + j + (15 - 3) * 4 - mmsize, m12
+    mova         m12, [ptr1 + j + mmsize]
+%endif
+%if cpuflag(fma3)
+    fmaddps       m2, m6,  [win + %1 + j + 16 * 4], m2
+    fnmaddps      m1, m5,  [win + %1 + j], m1
+%if ARCH_X86_64
+    fmaddps       m8, m12, [win + %1 + j + mmsize + 16 * 4], m8
+    fnmaddps      m7, m11, [win + %1 + j + mmsize], m7
+%endif
+%else ; non-FMA
+    mulps         m6, m6,  [win + %1 + j + 16 * 4]
+    mulps         m5, m5,  [win + %1 + j]
+%if ARCH_X86_64
+    mulps        m12, m12, [win + %1 + j + mmsize + 16 * 4]
+    mulps        m11, m11, [win + %1 + j + mmsize]
+%endif
+    addps         m2, m2, m6
+    subps         m1, m1, m5
+%if ARCH_X86_64
+    addps         m8, m8, m12
+    subps         m7, m7, m11
+%endif
+%endif ; cpuflag(fma3)
+    ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
+    ;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
+    SHUF          m6,  ptr2 + j + (31 - 3) * 4, m5
+    mova          m5, [ptr1 + j + 16 * 4]
+%if ARCH_X86_64
+    SHUF         m12,  ptr2 + j + (31 - 3) * 4 - mmsize, m11
+    mova         m11, [ptr1 + j + mmsize + 16 * 4]
+%endif
+%if cpuflag(fma3)
+    fmaddps       m3, m5,  [win + %1 + j + 32 * 4], m3
+    fmaddps       m4, m6,  [win + %1 + j + 48 * 4], m4
+%if ARCH_X86_64
+    fmaddps       m9, m11, [win + %1 + j + mmsize + 32 * 4], m9
+    fmaddps      m10, m12, [win + %1 + j + mmsize + 48 * 4], m10
+%endif
+%else ; non-FMA
+    mulps         m5, m5,  [win + %1 + j + 32 * 4]
+    mulps         m6, m6,  [win + %1 + j + 48 * 4]
+%if ARCH_X86_64
+    mulps        m11, m11, [win + %1 + j + mmsize + 32 * 4]
+    mulps        m12, m12, [win + %1 + j + mmsize + 48 * 4]
+%endif
+    addps         m3, m3, m5
+    addps         m4, m4, m6
+%if ARCH_X86_64
+    addps         m9, m9, m11
+    addps        m10, m10, m12
+%endif
+%endif ; cpuflag(fma3)
+    sub            j, 64 * 4
+%endmacro
+
+; void ff_synth_filter_inner_<opt>(float *synth_buf, float synth_buf2[32],
+;                                  const float window[512], float out[32],
+;                                  intptr_t offset, float scale)
+%macro SYNTH_FILTER 0
+cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
+                              synth_buf, synth_buf2, window, out, off, scale
+%define scale m0
+%if ARCH_X86_32 || WIN64
+%if cpuflag(sse2) && notcpuflag(avx)
+    movd       scale, scalem
+    SPLATD        m0
+%else
+    VBROADCASTSS  m0, scalem
+%endif
+; Make sure offset is in a register and not on the stack
+%define OFFQ  r4q
+%else
+    SPLATD      xmm0
+%if cpuflag(avx)
+    vinsertf128   m0, m0, xmm0, 1
+%endif
+%define OFFQ  offq
+%endif
+    ; prepare inner counter limit 1
+    mov          r5q, 480
+    sub          r5q, offmp
+    and          r5q, -64
+    shl          r5q, 2
+%if ARCH_X86_32 || notcpuflag(avx)
+    mov         OFFQ, r5q
+%define i        r5q
+    mov            i, 16 * 4 - (ARCH_X86_64 + 1) * mmsize  ; main loop counter
+%else
+%define i 0
+%define OFFQ  r5q
+%endif
+
+%define buf2     synth_buf2q
+%if ARCH_X86_32
+    mov         buf2, synth_buf2mp
+%endif
+.mainloop:
+    ; m1 = a  m2 = b  m3 = c  m4 = d
+    SETZERO       m3
+    SETZERO       m4
+    mova          m1, [buf2 + i]
+    mova          m2, [buf2 + i + 16 * 4]
+%if ARCH_X86_32
+%define ptr1     r0q
+%define ptr2     r1q
+%define win      r2q
+%define j        r3q
+    mov          win, windowm
+    mov         ptr1, synth_bufm
+%if ARCH_X86_32 || notcpuflag(avx)
+    add          win, i
+    add         ptr1, i
+%endif
+%else ; ARCH_X86_64
+%define ptr1     r6q
+%define ptr2     r7q ; must be loaded
+%define win      r8q
+%define j        r9q
+    SETZERO       m9
+    SETZERO      m10
+    mova          m7, [buf2 + i + mmsize]
+    mova          m8, [buf2 + i + mmsize + 16 * 4]
+    lea          win, [windowq + i]
+    lea         ptr1, [synth_bufq + i]
+%endif
+    mov         ptr2, synth_bufmp
+    ; prepare the inner loop counter
+    mov            j, OFFQ
+%if ARCH_X86_32 || notcpuflag(avx)
+    sub         ptr2, i
+%endif
+.loop1:
+    INNER_LOOP  0
+    jge       .loop1
+
+    mov            j, 448 * 4
+    sub            j, OFFQ
+    jz          .end
+    sub         ptr1, j
+    sub         ptr2, j
+    add          win, OFFQ ; now at j-64, so define OFFSET
+    sub            j, 64 * 4
+.loop2:
+    INNER_LOOP  64 * 4
+    jge       .loop2
+
+.end:
+%if ARCH_X86_32
+    mov         buf2, synth_buf2m ; needed for next iteration anyway
+    mov         outq, outmp       ; j, which will be set again during it
+%endif
+    ;~ out[i]      = a * scale;
+    ;~ out[i + 16] = b * scale;
+    mulps         m1, m1, scale
+    mulps         m2, m2, scale
+%if ARCH_X86_64
+    mulps         m7, m7, scale
+    mulps         m8, m8, scale
+%endif
+    ;~ synth_buf2[i]      = c;
+    ;~ synth_buf2[i + 16] = d;
+    mova   [buf2 + i +  0 * 4], m3
+    mova   [buf2 + i + 16 * 4], m4
+%if ARCH_X86_64
+    mova   [buf2 + i +  0 * 4 + mmsize], m9
+    mova   [buf2 + i + 16 * 4 + mmsize], m10
+%endif
+    ;~ out[i]      = a;
+    ;~ out[i + 16] = a;
+    mova   [outq + i +  0 * 4], m1
+    mova   [outq + i + 16 * 4], m2
+%if ARCH_X86_64
+    mova   [outq + i +  0 * 4 + mmsize], m7
+    mova   [outq + i + 16 * 4 + mmsize], m8
+%endif
+%if ARCH_X86_32 || notcpuflag(avx)
+    sub            i, (ARCH_X86_64 + 1) * mmsize
+    jge    .mainloop
+%endif
+    RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_XMM sse
+SYNTH_FILTER
+%endif
+INIT_XMM sse2
+SYNTH_FILTER
+INIT_YMM avx
+SYNTH_FILTER
+INIT_YMM fma3
+SYNTH_FILTER
diff --git a/libavcodec/x86/synth_filter_init.c b/libavcodec/x86/synth_filter_init.c
new file mode 100644
index 00000000..9ef00cdb
--- /dev/null
+++ b/libavcodec/x86/synth_filter_init.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/synth_filter.h"
+
+#define SYNTH_FILTER_FUNC(opt)                                                 \
+void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32],   \
+                                 const float window[512],                      \
+                                 float out[32], intptr_t offset, float scale); \
+static void synth_filter_##opt(FFTContext *imdct,                              \
+                               float *synth_buf_ptr, int *synth_buf_offset,    \
+                               float synth_buf2[32], const float window[512],  \
+                               float out[32], const float in[32], float scale) \
+{                                                                              \
+    float *synth_buf= synth_buf_ptr + *synth_buf_offset;                       \
+                                                                               \
+    imdct->imdct_half(imdct, synth_buf, in);                                   \
+                                                                               \
+    ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window,                 \
+                                out, *synth_buf_offset, scale);                \
+                                                                               \
+    *synth_buf_offset = (*synth_buf_offset - 32) & 511;                        \
+}                                                                              \
+
+#if HAVE_YASM
+#if ARCH_X86_32
+SYNTH_FILTER_FUNC(sse)
+#endif
+SYNTH_FILTER_FUNC(sse2)
+SYNTH_FILTER_FUNC(avx)
+SYNTH_FILTER_FUNC(fma3)
+#endif /* HAVE_YASM */
+
+av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+#if ARCH_X86_32
+    if (EXTERNAL_SSE(cpu_flags)) {
+        s->synth_filter_float = synth_filter_sse;
+    }
+#endif
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        s->synth_filter_float = synth_filter_sse2;
+    }
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        s->synth_filter_float = synth_filter_avx;
+    }
+    if (EXTERNAL_FMA3_FAST(cpu_flags)) {
+        s->synth_filter_float = synth_filter_fma3;
+    }
+#endif /* HAVE_YASM */
+}
diff --git a/libavcodec/x86/takdsp.asm b/libavcodec/x86/takdsp.asm
new file mode 100644
index 00000000..5f3ded3e
--- /dev/null
+++ b/libavcodec/x86/takdsp.asm
@@ -0,0 +1,116 @@
+;******************************************************************************
+;* TAK DSP SIMD optimizations
+;*
+;* Copyright (C) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pd_128: times 4 dd 128
+
+SECTION .text
+
+INIT_XMM sse2
+cglobal tak_decorrelate_ls, 3, 3, 2, p1, p2, length
+    shl                     lengthd, 2
+    add                         p1q, lengthq
+    add                         p2q, lengthq
+    neg                     lengthq
+.loop:
+    mova                         m0, [p1q+lengthq+mmsize*0]
+    mova                         m1, [p1q+lengthq+mmsize*1]
+    paddd                        m0, [p2q+lengthq+mmsize*0]
+    paddd                        m1, [p2q+lengthq+mmsize*1]
+    mova     [p2q+lengthq+mmsize*0], m0
+    mova     [p2q+lengthq+mmsize*1], m1
+    add                     lengthq, mmsize*2
+    jl .loop
+    REP_RET
+
+cglobal tak_decorrelate_sr, 3, 3, 2, p1, p2, length
+    shl                     lengthd, 2
+    add                         p1q, lengthq
+    add                         p2q, lengthq
+    neg                     lengthq
+
+.loop:
+    mova                         m0, [p2q+lengthq+mmsize*0]
+    mova                         m1, [p2q+lengthq+mmsize*1]
+    psubd                        m0, [p1q+lengthq+mmsize*0]
+    psubd                        m1, [p1q+lengthq+mmsize*1]
+    mova     [p1q+lengthq+mmsize*0], m0
+    mova     [p1q+lengthq+mmsize*1], m1
+    add                     lengthq, mmsize*2
+    jl .loop
+    REP_RET
+
+cglobal tak_decorrelate_sm, 3, 3, 6, p1, p2, length
+    shl                     lengthd, 2
+    add                         p1q, lengthq
+    add                         p2q, lengthq
+    neg                     lengthq
+
+.loop:
+    mova                         m0, [p1q+lengthq]
+    mova                         m1, [p2q+lengthq]
+    mova                         m3, [p1q+lengthq+mmsize]
+    mova                         m4, [p2q+lengthq+mmsize]
+    mova                         m2, m1
+    mova                         m5, m4
+    psrad                        m2, 1
+    psrad                        m5, 1
+    psubd                        m0, m2
+    psubd                        m3, m5
+    paddd                        m1, m0
+    paddd                        m4, m3
+    mova              [p1q+lengthq], m0
+    mova              [p2q+lengthq], m1
+    mova       [p1q+lengthq+mmsize], m3
+    mova       [p2q+lengthq+mmsize], m4
+    add                     lengthq, mmsize*2
+    jl .loop
+    REP_RET
+
+INIT_XMM sse4
+cglobal tak_decorrelate_sf, 3, 3, 5, p1, p2, length, dshift, dfactor
+    shl             lengthd, 2
+    add                 p1q, lengthq
+    add                 p2q, lengthq
+    neg             lengthq
+
+    movd                 m2, dshiftm
+    movd                 m3, dfactorm
+    pshufd               m3, m3, 0
+    mova                 m4, [pd_128]
+
+.loop:
+    mova                 m0, [p1q+lengthq]
+    mova                 m1, [p2q+lengthq]
+    psrad                m1, m2
+    pmulld               m1, m3
+    paddd                m1, m4
+    psrad                m1, 8
+    pslld                m1, m2
+    psubd                m1, m0
+    mova      [p1q+lengthq], m1
+    add             lengthq, mmsize
+    jl .loop
+    REP_RET
diff --git a/libavcodec/x86/takdsp_init.c b/libavcodec/x86/takdsp_init.c
new file mode 100644
index 00000000..555d0649
--- /dev/null
+++ b/libavcodec/x86/takdsp_init.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/takdsp.h"
+#include "libavutil/x86/cpu.h"
+#include "config.h"
+
+void ff_tak_decorrelate_ls_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sr_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sm_sse2(int32_t *p1, int32_t *p2, int length);
+void ff_tak_decorrelate_sf_sse4(int32_t *p1, int32_t *p2, int length, int dshift, int dfactor);
+
+av_cold void ff_takdsp_init_x86(TAKDSPContext *c)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->decorrelate_ls = ff_tak_decorrelate_ls_sse2;
+        c->decorrelate_sr = ff_tak_decorrelate_sr_sse2;
+        c->decorrelate_sm = ff_tak_decorrelate_sm_sse2;
+    }
+
+    if (EXTERNAL_SSE4(cpu_flags)) {
+        c->decorrelate_sf = ff_tak_decorrelate_sf_sse4;
+    }
+#endif
+}
diff --git a/libavcodec/x86/v210-init.c b/libavcodec/x86/v210-init.c
index dfdfd263..f579307a 100644
--- a/libavcodec/x86/v210-init.c
+++ b/libavcodec/x86/v210-init.c
@@ -27,9 +27,9 @@ extern void ff_v210_planar_unpack_aligned_avx(const uint32_t *src, uint16_t *y,
 
 av_cold void ff_v210_x86_init(V210DecContext *s)
 {
+#if HAVE_YASM
     int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_YASM
     if (s->aligned_input) {
         if (cpu_flags & AV_CPU_FLAG_SSSE3)
             s->unpack_frame = ff_v210_planar_unpack_aligned_ssse3;
diff --git a/libavcodec/x86/v210.asm b/libavcodec/x86/v210.asm
index 400a1f3f..c24c765e 100644
--- a/libavcodec/x86/v210.asm
+++ b/libavcodec/x86/v210.asm
@@ -45,7 +45,7 @@ cglobal v210_planar_unpack_%1, 5, 5, 7
     mova   m4, [v210_mask]
     mova   m5, [v210_luma_shuf]
     mova   m6, [v210_chroma_shuf]
-.loop
+.loop:
 %ifidn %1, unaligned
     movu   m0, [r0]
 %else
diff --git a/libavcodec/x86/v210enc.asm b/libavcodec/x86/v210enc.asm
index 751675fc..0545454c 100644
--- a/libavcodec/x86/v210enc.asm
+++ b/libavcodec/x86/v210enc.asm
@@ -21,37 +21,37 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_RODATA
+SECTION_RODATA 32
 
 cextern pw_4
 %define v210_enc_min_10 pw_4
-v210_enc_max_10: times 8 dw 0x3fb
+v210_enc_max_10: times 16 dw 0x3fb
 
-v210_enc_luma_mult_10: dw 4,1,16,4,1,16,0,0
-v210_enc_luma_shuf_10: db -1,0,1,-1,2,3,4,5,-1,6,7,-1,8,9,10,11
+v210_enc_luma_mult_10: times 2 dw 4,1,16,4,1,16,0,0
+v210_enc_luma_shuf_10: times 2 db -1,0,1,-1,2,3,4,5,-1,6,7,-1,8,9,10,11
 
-v210_enc_chroma_mult_10: dw 1,4,16,0,16,1,4,0
-v210_enc_chroma_shuf_10: db 0,1,8,9,-1,2,3,-1,10,11,4,5,-1,12,13,-1
+v210_enc_chroma_mult_10: times 2 dw 1,4,16,0,16,1,4,0
+v210_enc_chroma_shuf_10: times 2 db 0,1,8,9,-1,2,3,-1,10,11,4,5,-1,12,13,-1
 
 cextern pb_1
 %define v210_enc_min_8 pb_1
 cextern pb_FE
 %define v210_enc_max_8 pb_FE
 
-v210_enc_luma_shuf_8: db 6,-1,7,-1,8,-1,9,-1,10,-1,11,-1,-1,-1,-1,-1
-v210_enc_luma_mult_8: dw 16,4,64,16,4,64,0,0
+v210_enc_luma_shuf_8: times 2 db 6,-1,7,-1,8,-1,9,-1,10,-1,11,-1,-1,-1,-1,-1
+v210_enc_luma_mult_8: times 2 dw 16,4,64,16,4,64,0,0
 
-v210_enc_chroma_shuf1_8: db 0,-1,1,-1,2,-1,3,-1,8,-1,9,-1,10,-1,11,-1
-v210_enc_chroma_shuf2_8: db 3,-1,4,-1,5,-1,7,-1,11,-1,12,-1,13,-1,15,-1
+v210_enc_chroma_shuf1_8: times 2 db 0,-1,1,-1,2,-1,3,-1,8,-1,9,-1,10,-1,11,-1
+v210_enc_chroma_shuf2_8: times 2 db 3,-1,4,-1,5,-1,7,-1,11,-1,12,-1,13,-1,15,-1
 
-v210_enc_chroma_mult_8: dw 4,16,64,0,64,4,16,0
+v210_enc_chroma_mult_8: times 2 dw 4,16,64,0,64,4,16,0
 
 SECTION .text
 
 %macro v210_planar_pack_10 0
 
 ; v210_planar_pack_10(const uint16_t *y, const uint16_t *u, const uint16_t *v, uint8_t *dst, ptrdiff_t width)
-cglobal v210_planar_pack_10, 5, 5, 4, y, u, v, dst, width
+cglobal v210_planar_pack_10, 5, 5, 4+cpuflag(avx2), y, u, v, dst, width
     lea     r0, [yq+2*widthq]
     add     uq, widthq
     add     vq, widthq
@@ -60,12 +60,20 @@ cglobal v210_planar_pack_10, 5, 5, 4, y, u, v, dst, width
     mova    m2, [v210_enc_min_10]
     mova    m3, [v210_enc_max_10]
 
-.loop
-    movu    m0, [yq+2*widthq]
+.loop:
+    movu        xm0, [yq+2*widthq]
+%if cpuflag(avx2)
+    vinserti128 m0,   m0, [yq+widthq*2+12], 1
+%endif
     CLIPW   m0, m2, m3
 
-    movq    m1, [uq+widthq]
-    movhps  m1, [vq+widthq]
+    movq         xm1, [uq+widthq]
+    movhps       xm1, [vq+widthq]
+%if cpuflag(avx2)
+    movq         xm4, [uq+widthq+6]
+    movhps       xm4, [vq+widthq+6]
+    vinserti128  m1,   m1, xm4, 1
+%endif
     CLIPW   m1, m2, m3
 
     pmullw  m0, [v210_enc_luma_mult_10]
@@ -79,7 +87,7 @@ cglobal v210_planar_pack_10, 5, 5, 4, y, u, v, dst, width
     movu    [dstq], m0
 
     add     dstq, mmsize
-    add     widthq, 6
+    add     widthq, (mmsize*3)/8
     jl .loop
 
     RET
@@ -88,6 +96,11 @@ cglobal v210_planar_pack_10, 5, 5, 4, y, u, v, dst, width
 INIT_XMM ssse3
 v210_planar_pack_10
 
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+v210_planar_pack_10
+%endif
+
 %macro v210_planar_pack_8 0
 
 ; v210_planar_pack_8(const uint8_t *y, const uint8_t *u, const uint8_t *v, uint8_t *dst, ptrdiff_t width)
@@ -102,8 +115,11 @@ cglobal v210_planar_pack_8, 5, 5, 7, y, u, v, dst, width
     mova    m5, [v210_enc_max_8]
     pxor    m6, m6
 
-.loop
-    movu    m1, [yq+2*widthq]
+.loop:
+    movu        xm1, [yq+widthq*2]
+%if cpuflag(avx2)
+    vinserti128 m1,   m1, [yq+widthq*2+12], 1
+%endif
     CLIPUB  m1, m4, m5
 
     punpcklbw m0, m1, m6
@@ -116,8 +132,13 @@ cglobal v210_planar_pack_8, 5, 5, 7, y, u, v, dst, width
     pshufb  m0, [v210_enc_luma_shuf_10]
     pshufb  m1, [v210_enc_luma_shuf_10]
 
-    movq    m3, [uq+widthq]
-    movhps  m3, [vq+widthq]
+    movq         xm3, [uq+widthq]
+    movhps       xm3, [vq+widthq]
+%if cpuflag(avx2)
+    movq         xm2, [uq+widthq+6]
+    movhps       xm2, [vq+widthq+6]
+    vinserti128  m3,   m3, xm2, 1
+%endif
     CLIPUB  m3, m4, m5
 
     ; shuffle and multiply to get the same packing as in 10-bit
@@ -132,11 +153,15 @@ cglobal v210_planar_pack_8, 5, 5, 7, y, u, v, dst, width
     por     m0, m2
     por     m1, m3
 
-    movu    [dstq], m0
-    movu    [dstq+mmsize], m1
+    movu         [dstq],    xm0
+    movu         [dstq+16], xm1
+%if cpuflag(avx2)
+    vextracti128 [dstq+32], m0, 1
+    vextracti128 [dstq+48], m1, 1
+%endif
 
     add     dstq, 2*mmsize
-    add     widthq, 6
+    add     widthq, (mmsize*3)/8
     jl .loop
 
     RET
@@ -146,3 +171,8 @@ INIT_XMM ssse3
 v210_planar_pack_8
 INIT_XMM avx
 v210_planar_pack_8
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+v210_planar_pack_8
+%endif
diff --git a/libavcodec/x86/v210enc_init.c b/libavcodec/x86/v210enc_init.c
index 2afb1b2d..8abb152c 100644
--- a/libavcodec/x86/v210enc_init.c
+++ b/libavcodec/x86/v210enc_init.c
@@ -24,9 +24,14 @@ void ff_v210_planar_pack_8_ssse3(const uint8_t *y, const uint8_t *u,
                                  ptrdiff_t width);
 void ff_v210_planar_pack_8_avx(const uint8_t *y, const uint8_t *u,
                                const uint8_t *v, uint8_t *dst, ptrdiff_t width);
+void ff_v210_planar_pack_8_avx2(const uint8_t *y, const uint8_t *u,
+                                const uint8_t *v, uint8_t *dst, ptrdiff_t width);
 void ff_v210_planar_pack_10_ssse3(const uint16_t *y, const uint16_t *u,
                                   const uint16_t *v, uint8_t *dst,
                                   ptrdiff_t width);
+void ff_v210_planar_pack_10_avx2(const uint16_t *y, const uint16_t *u,
+                                 const uint16_t *v, uint8_t *dst,
+                                 ptrdiff_t width);
 
 av_cold void ff_v210enc_init_x86(V210EncContext *s)
 {
@@ -39,4 +44,10 @@ av_cold void ff_v210enc_init_x86(V210EncContext *s)
 
     if (EXTERNAL_AVX(cpu_flags))
         s->pack_line_8 = ff_v210_planar_pack_8_avx;
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        s->pack_line_8 = ff_v210_planar_pack_8_avx2;
+        s->pack_line_10 = ff_v210_planar_pack_10_avx2;
+        s->sample_factor = 2;
+    }
 }
diff --git a/libavcodec/x86/vc1dsp.asm b/libavcodec/x86/vc1dsp.asm
index 546688cf..eee42c27 100644
--- a/libavcodec/x86/vc1dsp.asm
+++ b/libavcodec/x86/vc1dsp.asm
@@ -1,5 +1,6 @@
 ;******************************************************************************
-;* VC1 deblocking optimizations
+;* VC1 DSP optimizations
+;* Copyright (c) 2007 Christophe GISQUET <christophe.gisquet@free.fr>
 ;* Copyright (c) 2009 David Conrad
 ;*
 ;* This file is part of FFmpeg.
@@ -23,6 +24,8 @@
 
 cextern pw_4
 cextern pw_5
+cextern pw_9
+cextern pw_128
 
 section .text
 
@@ -315,3 +318,268 @@ cglobal vc1_h_loop_filter8, 3,5,8
     START_H_FILTER 8
     VC1_H_LOOP_FILTER 8
     RET
+
+%if HAVE_MMX_INLINE
+
+; XXX some of these macros are not used right now, but they will in the future
+;     when more functions are ported.
+
+%macro OP_PUT 2 ; dst, src
+%endmacro
+
+%macro OP_AVG 2 ; dst, src
+    pavgb           %1, %2
+%endmacro
+
+%macro NORMALIZE_MMX 1 ; shift
+    paddw           m3, m7 ; +bias-r
+    paddw           m4, m7 ; +bias-r
+    psraw           m3, %1
+    psraw           m4, %1
+%endmacro
+
+%macro TRANSFER_DO_PACK 2 ; op, dst
+    packuswb        m3, m4
+    %1              m3, [%2]
+    mova          [%2], m3
+%endmacro
+
+%macro TRANSFER_DONT_PACK 2 ; op, dst
+    %1              m3, [%2]
+    %1              m3, [%2 + mmsize]
+    mova          [%2], m3
+    mova [mmsize + %2], m4
+%endmacro
+
+; see MSPEL_FILTER13_CORE for use as UNPACK macro
+%macro DO_UNPACK 1 ; reg
+    punpcklbw       %1, m0
+%endmacro
+%macro DONT_UNPACK 1 ; reg
+%endmacro
+
+; Compute the rounder 32-r or 8-r and unpacks it to m7
+%macro LOAD_ROUNDER_MMX 1 ; round
+    movd      m7, %1
+    punpcklwd m7, m7
+    punpckldq m7, m7
+%endmacro
+
+%macro SHIFT2_LINE 5 ; off, r0, r1, r2, r3
+    paddw          m%3, m%4
+    movh           m%2, [srcq + stride_neg2]
+    pmullw         m%3, m6
+    punpcklbw      m%2, m0
+    movh           m%5, [srcq + strideq]
+    psubw          m%3, m%2
+    punpcklbw      m%5, m0
+    paddw          m%3, m7
+    psubw          m%3, m%5
+    psraw          m%3, shift
+    movu   [dstq + %1], m%3
+    add           srcq, strideq
+%endmacro
+
+INIT_MMX mmx
+; void ff_vc1_put_ver_16b_shift2_mmx(int16_t *dst, const uint8_t *src,
+;                                    x86_reg stride, int rnd, int64_t shift)
+; Sacrificing m6 makes it possible to pipeline loads from src
+%if ARCH_X86_32
+cglobal vc1_put_ver_16b_shift2, 3,6,0, dst, src, stride
+    DECLARE_REG_TMP     3, 4, 5
+    %define rnd r3mp
+    %define shift qword r4m
+%else ; X86_64
+cglobal vc1_put_ver_16b_shift2, 4,7,0, dst, src, stride
+    DECLARE_REG_TMP     4, 5, 6
+    %define   rnd r3d
+    ; We need shift either in memory or in a mm reg as it's used in psraw
+    ; On WIN64, the arg is already on the stack
+    ; On UNIX64, m5 doesn't seem to be used
+%if WIN64
+    %define shift r4mp
+%else ; UNIX64
+    %define shift m5
+    mova shift, r4q
+%endif ; WIN64
+%endif ; X86_32
+%define stride_neg2 t0q
+%define stride_9minus4 t1q
+%define i t2q
+    mov       stride_neg2, strideq
+    neg       stride_neg2
+    add       stride_neg2, stride_neg2
+    lea    stride_9minus4, [strideq * 9 - 4]
+    mov                 i, 3
+    LOAD_ROUNDER_MMX  rnd
+    mova               m6, [pw_9]
+    pxor               m0, m0
+.loop:
+    movh               m2, [srcq]
+    add              srcq, strideq
+    movh               m3, [srcq]
+    punpcklbw          m2, m0
+    punpcklbw          m3, m0
+    SHIFT2_LINE         0, 1, 2, 3, 4
+    SHIFT2_LINE        24, 2, 3, 4, 1
+    SHIFT2_LINE        48, 3, 4, 1, 2
+    SHIFT2_LINE        72, 4, 1, 2, 3
+    SHIFT2_LINE        96, 1, 2, 3, 4
+    SHIFT2_LINE       120, 2, 3, 4, 1
+    SHIFT2_LINE       144, 3, 4, 1, 2
+    SHIFT2_LINE       168, 4, 1, 2, 3
+    sub              srcq, stride_9minus4
+    add              dstq, 8
+    dec                 i
+        jnz         .loop
+    REP_RET
+%undef rnd
+%undef shift
+%undef stride_neg2
+%undef stride_9minus4
+%undef i
+
+; void ff_vc1_*_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,
+;                                  const int16_t *src, int rnd);
+; Data is already unpacked, so some operations can directly be made from
+; memory.
+%macro HOR_16B_SHIFT2 2 ; op, opname
+cglobal vc1_%2_hor_16b_shift2, 4, 5, 0, dst, stride, src, rnd, h
+    mov                hq, 8
+    sub              srcq, 2
+    sub              rndd, (-1+9+9-1) * 1024 ; add -1024 bias
+    LOAD_ROUNDER_MMX rndq
+    mova               m5, [pw_9]
+    mova               m6, [pw_128]
+    pxor               m0, m0
+
+.loop:
+    mova               m1, [srcq + 2 * 0]
+    mova               m2, [srcq + 2 * 0 + mmsize]
+    mova               m3, [srcq + 2 * 1]
+    mova               m4, [srcq + 2 * 1 + mmsize]
+    paddw              m3, [srcq + 2 * 2]
+    paddw              m4, [srcq + 2 * 2 + mmsize]
+    paddw              m1, [srcq + 2 * 3]
+    paddw              m2, [srcq + 2 * 3 + mmsize]
+    pmullw             m3, m5
+    pmullw             m4, m5
+    psubw              m3, m1
+    psubw              m4, m2
+    NORMALIZE_MMX      7
+    ; remove bias
+    paddw              m3, m6
+    paddw              m4, m6
+    TRANSFER_DO_PACK   %1, dstq
+    add              srcq, 24
+    add              dstq, strideq
+    dec                hq
+        jnz         .loop
+
+    RET
+%endmacro
+
+INIT_MMX mmx
+HOR_16B_SHIFT2 OP_PUT, put
+
+INIT_MMX mmxext
+HOR_16B_SHIFT2 OP_AVG, avg
+%endif ; HAVE_MMX_INLINE
+
+%macro INV_TRANS_INIT 0
+    movsxdifnidn linesizeq, linesized
+    movd       m0, blockd
+    SPLATW     m0, m0
+    pxor       m1, m1
+    psubw      m1, m0
+    packuswb   m0, m0
+    packuswb   m1, m1
+
+    DEFINE_ARGS dest, linesize, linesize3
+    lea    linesize3q, [linesizeq*3]
+%endmacro
+
+%macro INV_TRANS_PROCESS 1
+    mov%1                  m2, [destq+linesizeq*0]
+    mov%1                  m3, [destq+linesizeq*1]
+    mov%1                  m4, [destq+linesizeq*2]
+    mov%1                  m5, [destq+linesize3q]
+    paddusb                m2, m0
+    paddusb                m3, m0
+    paddusb                m4, m0
+    paddusb                m5, m0
+    psubusb                m2, m1
+    psubusb                m3, m1
+    psubusb                m4, m1
+    psubusb                m5, m1
+    mov%1 [linesizeq*0+destq], m2
+    mov%1 [linesizeq*1+destq], m3
+    mov%1 [linesizeq*2+destq], m4
+    mov%1 [linesize3q +destq], m5
+%endmacro
+
+; ff_vc1_inv_trans_?x?_dc_mmxext(uint8_t *dest, int linesize, int16_t *block)
+INIT_MMX mmxext
+cglobal vc1_inv_trans_4x4_dc, 3,4,0, dest, linesize, block
+    movsx         r3d, WORD [blockq]
+    mov        blockd, r3d             ; dc
+    shl        blockd, 4               ; 16 * dc
+    lea        blockd, [blockq+r3+4]   ; 17 * dc + 4
+    sar        blockd, 3               ; >> 3
+    mov           r3d, blockd          ; dc
+    shl        blockd, 4               ; 16 * dc
+    lea        blockd, [blockq+r3+64]  ; 17 * dc + 64
+    sar        blockd, 7               ; >> 7
+
+    INV_TRANS_INIT
+
+    INV_TRANS_PROCESS h
+    RET
+
+INIT_MMX mmxext
+cglobal vc1_inv_trans_4x8_dc, 3,4,0, dest, linesize, block
+    movsx         r3d, WORD [blockq]
+    mov        blockd, r3d             ; dc
+    shl        blockd, 4               ; 16 * dc
+    lea        blockd, [blockq+r3+4]   ; 17 * dc + 4
+    sar        blockd, 3               ; >> 3
+    shl        blockd, 2               ;  4 * dc
+    lea        blockd, [blockq*3+64]   ; 12 * dc + 64
+    sar        blockd, 7               ; >> 7
+
+    INV_TRANS_INIT
+
+    INV_TRANS_PROCESS h
+    lea         destq, [destq+linesizeq*4]
+    INV_TRANS_PROCESS h
+    RET
+
+INIT_MMX mmxext
+cglobal vc1_inv_trans_8x4_dc, 3,4,0, dest, linesize, block
+    movsx      blockd, WORD [blockq]   ; dc
+    lea        blockd, [blockq*3+1]    ;  3 * dc + 1
+    sar        blockd, 1               ; >> 1
+    mov           r3d, blockd          ; dc
+    shl        blockd, 4               ; 16 * dc
+    lea        blockd, [blockq+r3+64]  ; 17 * dc + 64
+    sar        blockd, 7               ; >> 7
+
+    INV_TRANS_INIT
+
+    INV_TRANS_PROCESS a
+    RET
+
+INIT_MMX mmxext
+cglobal vc1_inv_trans_8x8_dc, 3,3,0, dest, linesize, block
+    movsx      blockd, WORD [blockq]   ; dc
+    lea        blockd, [blockq*3+1]    ;  3 * dc + 1
+    sar        blockd, 1               ; >> 1
+    lea        blockd, [blockq*3+16]   ;  3 * dc + 16
+    sar        blockd, 5               ; >> 5
+
+    INV_TRANS_INIT
+
+    INV_TRANS_PROCESS a
+    lea         destq, [destq+linesizeq*4]
+    INV_TRANS_PROCESS a
+    RET
diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c
index 2bef5f5f..c8943fa2 100644
--- a/libavcodec/x86/vc1dsp_init.c
+++ b/libavcodec/x86/vc1dsp_init.c
@@ -63,16 +63,22 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
     ff_vc1_h_loop_filter8_sse4(src,          stride, pq);
     ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
 }
-static void avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
-                                      ptrdiff_t stride, int rnd)
-{
-    ff_avg_pixels8_mmxext(dst, src, stride, 8);
-}
-static void avg_vc1_mspel_mc00_16_sse2(uint8_t *dst, const uint8_t *src,
-                                       ptrdiff_t stride, int rnd)
-{
-    ff_avg_pixels16_sse2(dst, src, stride, 16);
-}
+
+#define DECLARE_FUNCTION(OP, DEPTH, INSN)                       \
+    static void OP##vc1_mspel_mc00_##DEPTH##INSN(uint8_t *dst,          \
+                             const uint8_t *src, ptrdiff_t stride, int rnd) \
+    {                                                                       \
+        ff_ ## OP ## pixels ## DEPTH ## INSN(dst, src, stride, DEPTH);     \
+    }
+
+DECLARE_FUNCTION(put_,  8, _mmx)
+DECLARE_FUNCTION(put_, 16, _mmx)
+DECLARE_FUNCTION(avg_,  8, _mmx)
+DECLARE_FUNCTION(avg_, 16, _mmx)
+DECLARE_FUNCTION(avg_,  8, _mmxext)
+DECLARE_FUNCTION(avg_, 16, _mmxext)
+DECLARE_FUNCTION(put_, 16, _sse2)
+DECLARE_FUNCTION(avg_, 16, _sse2)
 
 #endif /* HAVE_YASM */
 
@@ -86,16 +92,24 @@ void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
                                        int stride, int h, int x, int y);
 void ff_avg_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
                                        int stride, int h, int x, int y);
+void ff_vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, int linesize,
+                                    int16_t *block);
+void ff_vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, int linesize,
+                                    int16_t *block);
+void ff_vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, int linesize,
+                                    int16_t *block);
+void ff_vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize,
+                                    int16_t *block);
 
 
 av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (HAVE_6REGS && INLINE_MMX(cpu_flags))
+    if (HAVE_6REGS && INLINE_MMX(cpu_flags) && EXTERNAL_MMX(cpu_flags))
         ff_vc1dsp_init_mmx(dsp);
 
-    if (HAVE_6REGS && INLINE_MMXEXT(cpu_flags))
+    if (HAVE_6REGS && INLINE_MMXEXT(cpu_flags) && EXTERNAL_MMXEXT(cpu_flags))
         ff_vc1dsp_init_mmxext(dsp);
 
 #define ASSIGN_LF(EXT) \
@@ -109,6 +123,11 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
 #if HAVE_YASM
     if (EXTERNAL_MMX(cpu_flags)) {
         dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_nornd_mmx;
+
+        dsp->put_vc1_mspel_pixels_tab[1][0]      = put_vc1_mspel_mc00_8_mmx;
+        dsp->put_vc1_mspel_pixels_tab[0][0]      = put_vc1_mspel_mc00_16_mmx;
+        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_8_mmx;
+        dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_mmx;
     }
     if (EXTERNAL_AMD3DNOW(cpu_flags)) {
         dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_3dnow;
@@ -117,13 +136,21 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
         ASSIGN_LF(mmxext);
         dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;
 
-        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_8_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_mmxext;
+
+        dsp->vc1_inv_trans_8x8_dc                = ff_vc1_inv_trans_8x8_dc_mmxext;
+        dsp->vc1_inv_trans_4x8_dc                = ff_vc1_inv_trans_4x8_dc_mmxext;
+        dsp->vc1_inv_trans_8x4_dc                = ff_vc1_inv_trans_8x4_dc_mmxext;
+        dsp->vc1_inv_trans_4x4_dc                = ff_vc1_inv_trans_4x4_dc_mmxext;
     }
     if (EXTERNAL_SSE2(cpu_flags)) {
         dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_sse2;
         dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse2;
         dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
         dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
+
+        dsp->put_vc1_mspel_pixels_tab[0][0]      = put_vc1_mspel_mc00_16_sse2;
         dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_sse2;
     }
     if (EXTERNAL_SSSE3(cpu_flags)) {
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index a7eb59df..83256483 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -33,7 +33,15 @@
 #include "fpel.h"
 #include "vc1dsp.h"
 
-#if HAVE_6REGS && HAVE_INLINE_ASM
+#if HAVE_6REGS && HAVE_INLINE_ASM && HAVE_MMX_EXTERNAL
+
+void ff_vc1_put_ver_16b_shift2_mmx(int16_t *dst,
+                                   const uint8_t *src, x86_reg stride,
+                                   int rnd, int64_t shift);
+void ff_vc1_put_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,
+                                   const int16_t *src, int rnd);
+void ff_vc1_avg_hor_16b_shift2_mmxext(uint8_t *dst, x86_reg stride,
+                                      const int16_t *src, int rnd);
 
 #define OP_PUT(S,D)
 #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t"
@@ -66,104 +74,6 @@
      "punpcklwd %%mm7, %%mm7           \n\t"    \
      "punpckldq %%mm7, %%mm7           \n\t"
 
-#define SHIFT2_LINE(OFF, R0,R1,R2,R3)           \
-    "paddw     %%mm"#R2", %%mm"#R1"    \n\t"    \
-    "movd      (%0,%3), %%mm"#R0"      \n\t"    \
-    "pmullw    %%mm6, %%mm"#R1"        \n\t"    \
-    "punpcklbw %%mm0, %%mm"#R0"        \n\t"    \
-    "movd      (%0,%2), %%mm"#R3"      \n\t"    \
-    "psubw     %%mm"#R0", %%mm"#R1"    \n\t"    \
-    "punpcklbw %%mm0, %%mm"#R3"        \n\t"    \
-    "paddw     %%mm7, %%mm"#R1"        \n\t"    \
-    "psubw     %%mm"#R3", %%mm"#R1"    \n\t"    \
-    "psraw     %4, %%mm"#R1"           \n\t"    \
-    "movq      %%mm"#R1", "#OFF"(%1)   \n\t"    \
-    "add       %2, %0                  \n\t"
-
-/** Sacrificing mm6 makes it possible to pipeline loads from src */
-static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
-                                       const uint8_t *src, x86_reg stride,
-                                       int rnd, int64_t shift)
-{
-    __asm__ volatile(
-        "mov       $3, %%"REG_c"           \n\t"
-        LOAD_ROUNDER_MMX("%5")
-        "movq      "MANGLE(ff_pw_9)", %%mm6 \n\t"
-        "1:                                \n\t"
-        "movd      (%0), %%mm2             \n\t"
-        "add       %2, %0                  \n\t"
-        "movd      (%0), %%mm3             \n\t"
-        "punpcklbw %%mm0, %%mm2            \n\t"
-        "punpcklbw %%mm0, %%mm3            \n\t"
-        SHIFT2_LINE(  0, 1, 2, 3, 4)
-        SHIFT2_LINE( 24, 2, 3, 4, 1)
-        SHIFT2_LINE( 48, 3, 4, 1, 2)
-        SHIFT2_LINE( 72, 4, 1, 2, 3)
-        SHIFT2_LINE( 96, 1, 2, 3, 4)
-        SHIFT2_LINE(120, 2, 3, 4, 1)
-        SHIFT2_LINE(144, 3, 4, 1, 2)
-        SHIFT2_LINE(168, 4, 1, 2, 3)
-        "sub       %6, %0                  \n\t"
-        "add       $8, %1                  \n\t"
-        "dec       %%"REG_c"               \n\t"
-        "jnz 1b                            \n\t"
-        : "+r"(src), "+r"(dst)
-        : "r"(stride), "r"(-2*stride),
-          "m"(shift), "m"(rnd), "r"(9*stride-4)
-          NAMED_CONSTRAINTS_ADD(ff_pw_9)
-        : "%"REG_c, "memory"
-    );
-}
-
-/**
- * Data is already unpacked, so some operations can directly be made from
- * memory.
- */
-#define VC1_HOR_16b_SHIFT2(OP, OPNAME)\
-static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\
-                                             const int16_t *src, int rnd)\
-{\
-    int h = 8;\
-\
-    src -= 1;\
-    rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */\
-    __asm__ volatile(\
-        LOAD_ROUNDER_MMX("%4")\
-        "movq      "MANGLE(ff_pw_128)", %%mm6\n\t"\
-        "movq      "MANGLE(ff_pw_9)", %%mm5 \n\t"\
-        "1:                                \n\t"\
-        "movq      2*0+0(%1), %%mm1        \n\t"\
-        "movq      2*0+8(%1), %%mm2        \n\t"\
-        "movq      2*1+0(%1), %%mm3        \n\t"\
-        "movq      2*1+8(%1), %%mm4        \n\t"\
-        "paddw     2*3+0(%1), %%mm1        \n\t"\
-        "paddw     2*3+8(%1), %%mm2        \n\t"\
-        "paddw     2*2+0(%1), %%mm3        \n\t"\
-        "paddw     2*2+8(%1), %%mm4        \n\t"\
-        "pmullw    %%mm5, %%mm3            \n\t"\
-        "pmullw    %%mm5, %%mm4            \n\t"\
-        "psubw     %%mm1, %%mm3            \n\t"\
-        "psubw     %%mm2, %%mm4            \n\t"\
-        NORMALIZE_MMX("$7")\
-        /* Remove bias */\
-        "paddw     %%mm6, %%mm3            \n\t"\
-        "paddw     %%mm6, %%mm4            \n\t"\
-        TRANSFER_DO_PACK(OP)\
-        "add       $24, %1                 \n\t"\
-        "add       %3, %2                  \n\t"\
-        "decl      %0                      \n\t"\
-        "jnz 1b                            \n\t"\
-        : "+r"(h), "+r" (src),  "+r" (dst)\
-        : "r"(stride), "m"(rnd)\
-          NAMED_CONSTRAINTS_ADD(ff_pw_128,ff_pw_9)\
-        : "memory"\
-    );\
-}
-
-VC1_HOR_16b_SHIFT2(OP_PUT, put_)
-VC1_HOR_16b_SHIFT2(OP_AVG, avg_)
-
-
 /**
  * Purely vertical or horizontal 1/2 shift interpolation.
  * Sacrify mm6 for *9 factor.
@@ -425,14 +335,14 @@ typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, x86_
  * @param  hmode   Vertical filter.
  * @param  rnd     Rounding bias.
  */
-#define VC1_MSPEL_MC(OP)\
+#define VC1_MSPEL_MC(OP, INSTR)\
 static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
                                int hmode, int vmode, int rnd)\
 {\
     static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\
-         { NULL, vc1_put_ver_16b_shift1_mmx, vc1_put_ver_16b_shift2_mmx, vc1_put_ver_16b_shift3_mmx };\
+         { NULL, vc1_put_ver_16b_shift1_mmx, ff_vc1_put_ver_16b_shift2_mmx, vc1_put_ver_16b_shift3_mmx };\
     static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\
-         { NULL, OP ## vc1_hor_16b_shift1_mmx, OP ## vc1_hor_16b_shift2_mmx, OP ## vc1_hor_16b_shift3_mmx };\
+         { NULL, OP ## vc1_hor_16b_shift1_mmx, ff_vc1_ ## OP ## hor_16b_shift2_ ## INSTR, OP ## vc1_hor_16b_shift3_mmx };\
     static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =\
          { NULL, OP ## vc1_shift1_mmx, OP ## vc1_shift2_mmx, OP ## vc1_shift3_mmx };\
 \
@@ -473,8 +383,8 @@ static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \
     OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \
 }
 
-VC1_MSPEL_MC(put_)
-VC1_MSPEL_MC(avg_)
+VC1_MSPEL_MC(put_, mmx)
+VC1_MSPEL_MC(avg_, mmxext)
 
 /** Macro to ease bicubic filter interpolation functions declarations */
 #define DECLARE_FUNCTION(a, b)                                          \
@@ -526,241 +436,12 @@ DECLARE_FUNCTION(3, 1)
 DECLARE_FUNCTION(3, 2)
 DECLARE_FUNCTION(3, 3)
 
-static void vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, int linesize,
-                                        int16_t *block)
-{
-    int dc = block[0];
-    dc = (17 * dc +  4) >> 3;
-    dc = (17 * dc + 64) >> 7;
-    __asm__ volatile(
-        "movd          %0, %%mm0 \n\t"
-        "pshufw $0, %%mm0, %%mm0 \n\t"
-        "pxor       %%mm1, %%mm1 \n\t"
-        "psubw      %%mm0, %%mm1 \n\t"
-        "packuswb   %%mm0, %%mm0 \n\t"
-        "packuswb   %%mm1, %%mm1 \n\t"
-        ::"r"(dc)
-    );
-    __asm__ volatile(
-        "movd          %0, %%mm2 \n\t"
-        "movd          %1, %%mm3 \n\t"
-        "movd          %2, %%mm4 \n\t"
-        "movd          %3, %%mm5 \n\t"
-        "paddusb    %%mm0, %%mm2 \n\t"
-        "paddusb    %%mm0, %%mm3 \n\t"
-        "paddusb    %%mm0, %%mm4 \n\t"
-        "paddusb    %%mm0, %%mm5 \n\t"
-        "psubusb    %%mm1, %%mm2 \n\t"
-        "psubusb    %%mm1, %%mm3 \n\t"
-        "psubusb    %%mm1, %%mm4 \n\t"
-        "psubusb    %%mm1, %%mm5 \n\t"
-        "movd       %%mm2, %0    \n\t"
-        "movd       %%mm3, %1    \n\t"
-        "movd       %%mm4, %2    \n\t"
-        "movd       %%mm5, %3    \n\t"
-        :"+m"(*(uint32_t*)(dest+0*linesize)),
-         "+m"(*(uint32_t*)(dest+1*linesize)),
-         "+m"(*(uint32_t*)(dest+2*linesize)),
-         "+m"(*(uint32_t*)(dest+3*linesize))
-    );
-}
-
-static void vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, int linesize,
-                                        int16_t *block)
-{
-    int dc = block[0];
-    dc = (17 * dc +  4) >> 3;
-    dc = (12 * dc + 64) >> 7;
-    __asm__ volatile(
-        "movd          %0, %%mm0 \n\t"
-        "pshufw $0, %%mm0, %%mm0 \n\t"
-        "pxor       %%mm1, %%mm1 \n\t"
-        "psubw      %%mm0, %%mm1 \n\t"
-        "packuswb   %%mm0, %%mm0 \n\t"
-        "packuswb   %%mm1, %%mm1 \n\t"
-        ::"r"(dc)
-    );
-    __asm__ volatile(
-        "movd          %0, %%mm2 \n\t"
-        "movd          %1, %%mm3 \n\t"
-        "movd          %2, %%mm4 \n\t"
-        "movd          %3, %%mm5 \n\t"
-        "paddusb    %%mm0, %%mm2 \n\t"
-        "paddusb    %%mm0, %%mm3 \n\t"
-        "paddusb    %%mm0, %%mm4 \n\t"
-        "paddusb    %%mm0, %%mm5 \n\t"
-        "psubusb    %%mm1, %%mm2 \n\t"
-        "psubusb    %%mm1, %%mm3 \n\t"
-        "psubusb    %%mm1, %%mm4 \n\t"
-        "psubusb    %%mm1, %%mm5 \n\t"
-        "movd       %%mm2, %0    \n\t"
-        "movd       %%mm3, %1    \n\t"
-        "movd       %%mm4, %2    \n\t"
-        "movd       %%mm5, %3    \n\t"
-        :"+m"(*(uint32_t*)(dest+0*linesize)),
-         "+m"(*(uint32_t*)(dest+1*linesize)),
-         "+m"(*(uint32_t*)(dest+2*linesize)),
-         "+m"(*(uint32_t*)(dest+3*linesize))
-    );
-    dest += 4*linesize;
-    __asm__ volatile(
-        "movd          %0, %%mm2 \n\t"
-        "movd          %1, %%mm3 \n\t"
-        "movd          %2, %%mm4 \n\t"
-        "movd          %3, %%mm5 \n\t"
-        "paddusb    %%mm0, %%mm2 \n\t"
-        "paddusb    %%mm0, %%mm3 \n\t"
-        "paddusb    %%mm0, %%mm4 \n\t"
-        "paddusb    %%mm0, %%mm5 \n\t"
-        "psubusb    %%mm1, %%mm2 \n\t"
-        "psubusb    %%mm1, %%mm3 \n\t"
-        "psubusb    %%mm1, %%mm4 \n\t"
-        "psubusb    %%mm1, %%mm5 \n\t"
-        "movd       %%mm2, %0    \n\t"
-        "movd       %%mm3, %1    \n\t"
-        "movd       %%mm4, %2    \n\t"
-        "movd       %%mm5, %3    \n\t"
-        :"+m"(*(uint32_t*)(dest+0*linesize)),
-         "+m"(*(uint32_t*)(dest+1*linesize)),
-         "+m"(*(uint32_t*)(dest+2*linesize)),
-         "+m"(*(uint32_t*)(dest+3*linesize))
-    );
-}
-
-static void vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, int linesize,
-                                        int16_t *block)
-{
-    int dc = block[0];
-    dc = ( 3 * dc +  1) >> 1;
-    dc = (17 * dc + 64) >> 7;
-    __asm__ volatile(
-        "movd          %0, %%mm0 \n\t"
-        "pshufw $0, %%mm0, %%mm0 \n\t"
-        "pxor       %%mm1, %%mm1 \n\t"
-        "psubw      %%mm0, %%mm1 \n\t"
-        "packuswb   %%mm0, %%mm0 \n\t"
-        "packuswb   %%mm1, %%mm1 \n\t"
-        ::"r"(dc)
-    );
-    __asm__ volatile(
-        "movq          %0, %%mm2 \n\t"
-        "movq          %1, %%mm3 \n\t"
-        "movq          %2, %%mm4 \n\t"
-        "movq          %3, %%mm5 \n\t"
-        "paddusb    %%mm0, %%mm2 \n\t"
-        "paddusb    %%mm0, %%mm3 \n\t"
-        "paddusb    %%mm0, %%mm4 \n\t"
-        "paddusb    %%mm0, %%mm5 \n\t"
-        "psubusb    %%mm1, %%mm2 \n\t"
-        "psubusb    %%mm1, %%mm3 \n\t"
-        "psubusb    %%mm1, %%mm4 \n\t"
-        "psubusb    %%mm1, %%mm5 \n\t"
-        "movq       %%mm2, %0    \n\t"
-        "movq       %%mm3, %1    \n\t"
-        "movq       %%mm4, %2    \n\t"
-        "movq       %%mm5, %3    \n\t"
-        :"+m"(*(uint32_t*)(dest+0*linesize)),
-         "+m"(*(uint32_t*)(dest+1*linesize)),
-         "+m"(*(uint32_t*)(dest+2*linesize)),
-         "+m"(*(uint32_t*)(dest+3*linesize))
-    );
-}
-
-static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize,
-                                        int16_t *block)
-{
-    int dc = block[0];
-    dc = (3 * dc +  1) >> 1;
-    dc = (3 * dc + 16) >> 5;
-    __asm__ volatile(
-        "movd          %0, %%mm0 \n\t"
-        "pshufw $0, %%mm0, %%mm0 \n\t"
-        "pxor       %%mm1, %%mm1 \n\t"
-        "psubw      %%mm0, %%mm1 \n\t"
-        "packuswb   %%mm0, %%mm0 \n\t"
-        "packuswb   %%mm1, %%mm1 \n\t"
-        ::"r"(dc)
-    );
-    __asm__ volatile(
-        "movq          %0, %%mm2 \n\t"
-        "movq          %1, %%mm3 \n\t"
-        "movq          %2, %%mm4 \n\t"
-        "movq          %3, %%mm5 \n\t"
-        "paddusb    %%mm0, %%mm2 \n\t"
-        "paddusb    %%mm0, %%mm3 \n\t"
-        "paddusb    %%mm0, %%mm4 \n\t"
-        "paddusb    %%mm0, %%mm5 \n\t"
-        "psubusb    %%mm1, %%mm2 \n\t"
-        "psubusb    %%mm1, %%mm3 \n\t"
-        "psubusb    %%mm1, %%mm4 \n\t"
-        "psubusb    %%mm1, %%mm5 \n\t"
-        "movq       %%mm2, %0    \n\t"
-        "movq       %%mm3, %1    \n\t"
-        "movq       %%mm4, %2    \n\t"
-        "movq       %%mm5, %3    \n\t"
-        :"+m"(*(uint32_t*)(dest+0*linesize)),
-         "+m"(*(uint32_t*)(dest+1*linesize)),
-         "+m"(*(uint32_t*)(dest+2*linesize)),
-         "+m"(*(uint32_t*)(dest+3*linesize))
-    );
-    dest += 4*linesize;
-    __asm__ volatile(
-        "movq          %0, %%mm2 \n\t"
-        "movq          %1, %%mm3 \n\t"
-        "movq          %2, %%mm4 \n\t"
-        "movq          %3, %%mm5 \n\t"
-        "paddusb    %%mm0, %%mm2 \n\t"
-        "paddusb    %%mm0, %%mm3 \n\t"
-        "paddusb    %%mm0, %%mm4 \n\t"
-        "paddusb    %%mm0, %%mm5 \n\t"
-        "psubusb    %%mm1, %%mm2 \n\t"
-        "psubusb    %%mm1, %%mm3 \n\t"
-        "psubusb    %%mm1, %%mm4 \n\t"
-        "psubusb    %%mm1, %%mm5 \n\t"
-        "movq       %%mm2, %0    \n\t"
-        "movq       %%mm3, %1    \n\t"
-        "movq       %%mm4, %2    \n\t"
-        "movq       %%mm5, %3    \n\t"
-        :"+m"(*(uint32_t*)(dest+0*linesize)),
-         "+m"(*(uint32_t*)(dest+1*linesize)),
-         "+m"(*(uint32_t*)(dest+2*linesize)),
-         "+m"(*(uint32_t*)(dest+3*linesize))
-    );
-}
-
-#if HAVE_MMX_EXTERNAL
-static void put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
-                                   ptrdiff_t stride, int rnd)
-{
-    ff_put_pixels8_mmx(dst, src, stride, 8);
-}
-static void put_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src,
-                                      ptrdiff_t stride, int rnd)
-{
-    ff_put_pixels16_mmx(dst, src, stride, 16);
-}
-static void avg_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
-                                   ptrdiff_t stride, int rnd)
-{
-    ff_avg_pixels8_mmx(dst, src, stride, 8);
-}
-static void avg_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src,
-                                      ptrdiff_t stride, int rnd)
-{
-    ff_avg_pixels16_mmx(dst, src, stride, 16);
-}
-#endif
-
 #define FN_ASSIGN(OP, X, Y, INSN) \
     dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \
     dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = OP##vc1_mspel_mc##X##Y##_16##INSN
 
 av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
 {
-#if HAVE_MMX_EXTERNAL
-    FN_ASSIGN(put_, 0, 0, _mmx);
-    FN_ASSIGN(avg_, 0, 0, _mmx);
-#endif
     FN_ASSIGN(put_, 0, 1, _mmx);
     FN_ASSIGN(put_, 0, 2, _mmx);
     FN_ASSIGN(put_, 0, 3, _mmx);
@@ -801,10 +482,5 @@ av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
     FN_ASSIGN(avg_, 3, 1, _mmxext);
     FN_ASSIGN(avg_, 3, 2, _mmxext);
     FN_ASSIGN(avg_, 3, 3, _mmxext);
-
-    dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmxext;
-    dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmxext;
-    dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmxext;
-    dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmxext;
 }
-#endif /* HAVE_6REGS && HAVE_INLINE_ASM */
+#endif /* HAVE_6REGS && HAVE_INLINE_ASM && HAVE_MMX_EXTERNAL */
diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index 25d43640..a807d3b8 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -193,10 +193,10 @@ hvar_fn
     mov            valb, [srcq+%2-1]
 %elif (%2-%%off) == 2
     mov            valw, [srcq+%2-2]
-%elifidn %1, body
-    mov            vald, [srcq+%2-3]
 %else
-    movd mm %+ %%mmx_idx, [srcq+%2-3]
+    mov            valb, [srcq+%2-1]
+    ror            vald, 16
+    mov            valw, [srcq+%2-3]
 %endif
 %endif ; (%2-%%off) >= 1
 %endmacro ; READ_NUM_BYTES
@@ -249,15 +249,13 @@ hvar_fn
     mov     [dstq+%2-1], valb
 %elif (%2-%%off) == 2
     mov     [dstq+%2-2], valw
-%elifidn %1, body
-    mov     [dstq+%2-3], valw
-    shr            vald, 16
-    mov     [dstq+%2-1], valb
 %else
-    movd           vald, mm %+ %%mmx_idx
     mov     [dstq+%2-3], valw
-    shr            vald, 16
+    ror            vald, 16
     mov     [dstq+%2-1], valb
+%ifnidn %1, body
+    ror            vald, 16
+%endif
 %endif
 %endif ; (%2-%%off) >= 1
 %endmacro ; WRITE_NUM_BYTES
diff --git a/libavcodec/x86/videodsp_init.c b/libavcodec/x86/videodsp_init.c
index 885cdf1d..26e072bb 100644
--- a/libavcodec/x86/videodsp_init.c
+++ b/libavcodec/x86/videodsp_init.c
@@ -162,6 +162,8 @@ static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src,
     if (!w || !h)
         return;
 
+    av_assert2(block_w <= FFABS(dst_stride));
+
     if (src_y >= h) {
         src -= src_y*src_stride;
         src_y_add = h - 1;
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index ee5a6bf6..d457cd7d 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -167,7 +167,7 @@ INIT_MMX mmx
 cglobal put_vp_no_rnd_pixels8_l2, 5, 6, 0, dst, src1, src2, stride, h, stride3
     mova   m6, [pb_FE]
     lea    stride3q,[strideq+strideq*2]
-.loop
+.loop:
     mova   m0, [src1q]
     mova   m1, [src2q]
     mova   m2, [src1q+strideq]
diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c
index 354e1a19..2ece9ab7 100644
--- a/libavcodec/x86/vp3dsp_init.c
+++ b/libavcodec/x86/vp3dsp_init.c
@@ -59,7 +59,7 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
     if (EXTERNAL_MMXEXT(cpu_flags)) {
         c->idct_dc_add = ff_vp3_idct_dc_add_mmxext;
 
-        if (!(flags & CODEC_FLAG_BITEXACT)) {
+        if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
             c->v_loop_filter = ff_vp3_v_loop_filter_mmxext;
             c->h_loop_filter = ff_vp3_h_loop_filter_mmxext;
         }
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 00e7125a..469a6617 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -23,170 +23,73 @@
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/mem.h"
-#include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/vp9dsp.h"
+#include "libavcodec/x86/vp9dsp_init.h"
 
 #if HAVE_YASM
 
-#define fpel_func(avg, sz, opt) \
-void ff_vp9_##avg##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
-                              const uint8_t *src, ptrdiff_t src_stride, \
-                              int h, int mx, int my)
-fpel_func(put,  4, mmx);
-fpel_func(put,  8, mmx);
-fpel_func(put, 16, sse);
-fpel_func(put, 32, sse);
-fpel_func(put, 64, sse);
-fpel_func(avg,  4, mmxext);
-fpel_func(avg,  8, mmxext);
-fpel_func(avg, 16, sse2);
-fpel_func(avg, 32, sse2);
-fpel_func(avg, 64, sse2);
-fpel_func(put, 32, avx);
-fpel_func(put, 64, avx);
-fpel_func(avg, 32, avx2);
-fpel_func(avg, 64, avx2);
-#undef fpel_func
-
-#define mc_func(avg, sz, dir, opt, type, f_sz) \
-void ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
-                                                 const uint8_t *src, ptrdiff_t src_stride, \
-                                                 int h, const type (*filter)[f_sz])
-#define mc_funcs(sz, opt, type, fsz) \
-mc_func(put, sz, h, opt, type, fsz); \
-mc_func(avg, sz, h, opt, type, fsz); \
-mc_func(put, sz, v, opt, type, fsz); \
-mc_func(avg, sz, v, opt, type, fsz)
-
-mc_funcs(4, mmxext, int16_t, 8);
-mc_funcs(8, sse2, int16_t, 8);
-mc_funcs(4, ssse3, int8_t, 32);
-mc_funcs(8, ssse3, int8_t, 32);
+decl_fpel_func(put,  4,   , mmx);
+decl_fpel_func(put,  8,   , mmx);
+decl_fpel_func(put, 16,   , sse);
+decl_fpel_func(put, 32,   , sse);
+decl_fpel_func(put, 64,   , sse);
+decl_fpel_func(avg,  4, _8, mmxext);
+decl_fpel_func(avg,  8, _8, mmxext);
+decl_fpel_func(avg, 16, _8, sse2);
+decl_fpel_func(avg, 32, _8, sse2);
+decl_fpel_func(avg, 64, _8, sse2);
+decl_fpel_func(put, 32,   , avx);
+decl_fpel_func(put, 64,   , avx);
+decl_fpel_func(avg, 32, _8, avx2);
+decl_fpel_func(avg, 64, _8, avx2);
+
+decl_mc_funcs(4, mmxext, int16_t, 8, 8);
+decl_mc_funcs(8, sse2, int16_t,  8, 8);
+decl_mc_funcs(4, ssse3, int8_t, 32, 8);
+decl_mc_funcs(8, ssse3, int8_t, 32, 8);
 #if ARCH_X86_64
-mc_funcs(16, ssse3, int8_t, 32);
-mc_funcs(32, avx2, int8_t, 32);
+decl_mc_funcs(16, ssse3, int8_t, 32, 8);
+decl_mc_funcs(32, avx2, int8_t, 32, 8);
 #endif
 
-#undef mc_funcs
-#undef mc_func
-
-#define mc_rep_func(avg, sz, hsz, dir, opt, type, f_sz) \
-static av_always_inline void \
-ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
-                                            const uint8_t *src, ptrdiff_t src_stride, \
-                                            int h, const type (*filter)[f_sz]) \
-{ \
-    ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##opt(dst,       dst_stride, src, \
-                                                 src_stride, h, filter); \
-    ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##opt(dst + hsz, dst_stride, src + hsz, \
-                                                 src_stride, h, filter); \
-}
-
-#define mc_rep_funcs(sz, hsz, opt, type, fsz) \
-mc_rep_func(put, sz, hsz, h, opt, type, fsz); \
-mc_rep_func(avg, sz, hsz, h, opt, type, fsz); \
-mc_rep_func(put, sz, hsz, v, opt, type, fsz); \
-mc_rep_func(avg, sz, hsz, v, opt, type, fsz)
-
-mc_rep_funcs(16, 8, sse2, int16_t, 8);
+mc_rep_funcs(16,  8,  8,  sse2, int16_t,  8, 8)
 #if ARCH_X86_32
-mc_rep_funcs(16, 8, ssse3, int8_t, 32);
+mc_rep_funcs(16,  8,  8, ssse3, int8_t,  32, 8)
 #endif
-mc_rep_funcs(32, 16, sse2, int16_t, 8);
-mc_rep_funcs(32, 16, ssse3, int8_t, 32);
-mc_rep_funcs(64, 32, sse2, int16_t, 8);
-mc_rep_funcs(64, 32, ssse3, int8_t, 32);
+mc_rep_funcs(32, 16, 16, sse2,  int16_t,  8, 8)
+mc_rep_funcs(32, 16, 16, ssse3, int8_t,  32, 8)
+mc_rep_funcs(64, 32, 32, sse2,  int16_t,  8, 8)
+mc_rep_funcs(64, 32, 32, ssse3, int8_t,  32, 8)
 #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
-mc_rep_funcs(64, 32, avx2, int8_t, 32);
+mc_rep_funcs(64, 32, 32, avx2,  int8_t,  32, 8)
 #endif
 
-#undef mc_rep_funcs
-#undef mc_rep_func
-
 extern const int8_t ff_filters_ssse3[3][15][4][32];
 extern const int16_t ff_filters_sse2[3][15][8][8];
 
-#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, opt) \
-static void op##_8tap_##fname##_##sz##hv_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
-                                               const uint8_t *src, ptrdiff_t src_stride, \
-                                               int h, int mx, int my) \
-{ \
-    LOCAL_ALIGNED_##align(uint8_t, temp, [71 * 64]); \
-    ff_vp9_put_8tap_1d_h_##sz##_##opt(temp, 64, src - 3 * src_stride, src_stride, \
-                                      h + 7, ff_filters_##f_opt[f][mx - 1]); \
-    ff_vp9_##op##_8tap_1d_v_##sz##_##opt(dst, dst_stride, temp + 3 * 64, 64, \
-                                         h, ff_filters_##f_opt[f][my - 1]); \
-}
-
-#define filters_8tap_2d_fn(op, sz, align, opt, f_opt) \
-filter_8tap_2d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, align, opt) \
-filter_8tap_2d_fn(op, sz, FILTER_8TAP_SHARP,   f_opt, sharp, align, opt) \
-filter_8tap_2d_fn(op, sz, FILTER_8TAP_SMOOTH,  f_opt, smooth, align, opt)
-
-#define filters_8tap_2d_fn2(op, align, opt4, opt8, f_opt) \
-filters_8tap_2d_fn(op, 64, align, opt8, f_opt) \
-filters_8tap_2d_fn(op, 32, align, opt8, f_opt) \
-filters_8tap_2d_fn(op, 16, align, opt8, f_opt) \
-filters_8tap_2d_fn(op, 8, align, opt8, f_opt) \
-filters_8tap_2d_fn(op, 4, align, opt4, f_opt)
-
-filters_8tap_2d_fn2(put, 16, mmxext, sse2, sse2)
-filters_8tap_2d_fn2(avg, 16, mmxext, sse2, sse2)
-filters_8tap_2d_fn2(put, 16, ssse3, ssse3, ssse3)
-filters_8tap_2d_fn2(avg, 16, ssse3, ssse3, ssse3)
+filters_8tap_2d_fn2(put, 16, 8, 1, mmxext, sse2, sse2)
+filters_8tap_2d_fn2(avg, 16, 8, 1, mmxext, sse2, sse2)
+filters_8tap_2d_fn2(put, 16, 8, 1, ssse3, ssse3, ssse3)
+filters_8tap_2d_fn2(avg, 16, 8, 1, ssse3, ssse3, ssse3)
 #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
-filters_8tap_2d_fn(put, 64, 32, avx2, ssse3)
-filters_8tap_2d_fn(put, 32, 32, avx2, ssse3)
-filters_8tap_2d_fn(avg, 64, 32, avx2, ssse3)
-filters_8tap_2d_fn(avg, 32, 32, avx2, ssse3)
+filters_8tap_2d_fn(put, 64, 32, 8, 1, avx2, ssse3)
+filters_8tap_2d_fn(put, 32, 32, 8, 1, avx2, ssse3)
+filters_8tap_2d_fn(avg, 64, 32, 8, 1, avx2, ssse3)
+filters_8tap_2d_fn(avg, 32, 32, 8, 1, avx2, ssse3)
 #endif
 
-#undef filters_8tap_2d_fn2
-#undef filters_8tap_2d_fn
-#undef filter_8tap_2d_fn
-
-#define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, opt) \
-static void op##_8tap_##fname##_##sz##dir##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
-                                                  const uint8_t *src, ptrdiff_t src_stride, \
-                                                  int h, int mx, int my) \
-{ \
-    ff_vp9_##op##_8tap_1d_##dir##_##sz##_##opt(dst, dst_stride, src, src_stride, \
-                                               h, ff_filters_##f_opt[f][dvar - 1]); \
-}
-
-#define filters_8tap_1d_fn(op, sz, dir, dvar, opt, f_opt) \
-filter_8tap_1d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, dir, dvar, opt) \
-filter_8tap_1d_fn(op, sz, FILTER_8TAP_SHARP,   f_opt, sharp,   dir, dvar, opt) \
-filter_8tap_1d_fn(op, sz, FILTER_8TAP_SMOOTH,  f_opt, smooth,  dir, dvar, opt)
-
-#define filters_8tap_1d_fn2(op, sz, opt, f_opt) \
-filters_8tap_1d_fn(op, sz, h, mx, opt, f_opt) \
-filters_8tap_1d_fn(op, sz, v, my, opt, f_opt)
-
-#define filters_8tap_1d_fn3(op, opt4, opt8, f_opt) \
-filters_8tap_1d_fn2(op, 64, opt8, f_opt) \
-filters_8tap_1d_fn2(op, 32, opt8, f_opt) \
-filters_8tap_1d_fn2(op, 16, opt8, f_opt) \
-filters_8tap_1d_fn2(op, 8, opt8, f_opt) \
-filters_8tap_1d_fn2(op, 4, opt4, f_opt)
-
-filters_8tap_1d_fn3(put, mmxext, sse2, sse2)
-filters_8tap_1d_fn3(avg, mmxext, sse2, sse2)
-filters_8tap_1d_fn3(put, ssse3, ssse3, ssse3)
-filters_8tap_1d_fn3(avg, ssse3, ssse3, ssse3)
+filters_8tap_1d_fn3(put, 8, mmxext, sse2, sse2)
+filters_8tap_1d_fn3(avg, 8, mmxext, sse2, sse2)
+filters_8tap_1d_fn3(put, 8, ssse3, ssse3, ssse3)
+filters_8tap_1d_fn3(avg, 8, ssse3, ssse3, ssse3)
 #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
-filters_8tap_1d_fn2(put, 64, avx2, ssse3)
-filters_8tap_1d_fn2(put, 32, avx2, ssse3)
-filters_8tap_1d_fn2(avg, 64, avx2, ssse3)
-filters_8tap_1d_fn2(avg, 32, avx2, ssse3)
+filters_8tap_1d_fn2(put, 64, 8, avx2, ssse3)
+filters_8tap_1d_fn2(put, 32, 8, avx2, ssse3)
+filters_8tap_1d_fn2(avg, 64, 8, avx2, ssse3)
+filters_8tap_1d_fn2(avg, 32, 8, avx2, ssse3)
 #endif
 
-#undef filters_8tap_1d_fn
-#undef filters_8tap_1d_fn2
-#undef filters_8tap_1d_fn3
-#undef filter_8tap_1d_fn
-
 #define itxfm_func(typea, typeb, size, opt) \
 void ff_vp9_##typea##_##typeb##_##size##x##size##_add_##opt(uint8_t *dst, ptrdiff_t stride, \
                                                             int16_t *block, int eob)
@@ -307,42 +210,20 @@ ipred_func(32, tm, avx2);
 
 #endif /* HAVE_YASM */
 
-av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp)
+av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
 {
 #if HAVE_YASM
     int cpu_flags;
-    if (bpp != 8) return;
 
-    cpu_flags = av_get_cpu_flags();
-
-#define init_fpel(idx1, idx2, sz, type, opt) \
-    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
-    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
-    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
-    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##_##opt
-
-#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
-    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
-    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
-    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_##opt
-
-#define init_subpel2(idx1, idx2, sz, type, opt) \
-    init_subpel1(idx1, idx2, 1, 1, sz, hv, type, opt); \
-    init_subpel1(idx1, idx2, 0, 1, sz, v,  type, opt); \
-    init_subpel1(idx1, idx2, 1, 0, sz, h,  type, opt)
-
-#define init_subpel3_32_64(idx, type, opt) \
-    init_subpel2(0, idx, 64, type, opt); \
-    init_subpel2(1, idx, 32, type, opt)
-
-#define init_subpel3_8to64(idx, type, opt) \
-    init_subpel3_32_64(idx, type, opt); \
-    init_subpel2(2, idx, 16, type, opt); \
-    init_subpel2(3, idx,  8, type, opt)
+    if (bpp == 10) {
+        ff_vp9dsp_init_10bpp_x86(dsp, bitexact);
+        return;
+    } else if (bpp == 12) {
+        ff_vp9dsp_init_12bpp_x86(dsp, bitexact);
+        return;
+    }
 
-#define init_subpel3(idx, type, opt) \
-    init_subpel3_8to64(idx, type, opt); \
-    init_subpel2(4, idx,  4, type, opt)
+    cpu_flags = av_get_cpu_flags();
 
 #define init_lpf(opt) do { \
     dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
@@ -386,20 +267,22 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp)
 } while (0)
 
     if (EXTERNAL_MMX(cpu_flags)) {
-        init_fpel(4, 0,  4, put, mmx);
-        init_fpel(3, 0,  8, put, mmx);
-        dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
-        dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
-        dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
-        dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
+        init_fpel_func(4, 0,  4, put, , mmx);
+        init_fpel_func(3, 0,  8, put, , mmx);
+        if (!bitexact) {
+            dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
+            dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
+            dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
+            dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
+        }
         init_ipred(8, mmx, v, VERT);
     }
 
     if (EXTERNAL_MMXEXT(cpu_flags)) {
-        init_subpel2(4, 0, 4, put, mmxext);
-        init_subpel2(4, 1, 4, avg, mmxext);
-        init_fpel(4, 1,  4, avg, mmxext);
-        init_fpel(3, 1,  8, avg, mmxext);
+        init_subpel2(4, 0, 4, put, 8, mmxext);
+        init_subpel2(4, 1, 4, avg, 8, mmxext);
+        init_fpel_func(4, 1,  4, avg, _8, mmxext);
+        init_fpel_func(3, 1,  8, avg, _8, mmxext);
         dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext;
         init_dc_ipred(4, mmxext);
         init_dc_ipred(8, mmxext);
@@ -407,19 +290,19 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp)
     }
 
     if (EXTERNAL_SSE(cpu_flags)) {
-        init_fpel(2, 0, 16, put, sse);
-        init_fpel(1, 0, 32, put, sse);
-        init_fpel(0, 0, 64, put, sse);
+        init_fpel_func(2, 0, 16, put, , sse);
+        init_fpel_func(1, 0, 32, put, , sse);
+        init_fpel_func(0, 0, 64, put, , sse);
         init_ipred(16, sse, v, VERT);
         init_ipred(32, sse, v, VERT);
     }
 
     if (EXTERNAL_SSE2(cpu_flags)) {
-        init_subpel3_8to64(0, put, sse2);
-        init_subpel3_8to64(1, avg, sse2);
-        init_fpel(2, 1, 16, avg, sse2);
-        init_fpel(1, 1, 32, avg, sse2);
-        init_fpel(0, 1, 64, avg, sse2);
+        init_subpel3_8to64(0, put, 8, sse2);
+        init_subpel3_8to64(1, avg, 8, sse2);
+        init_fpel_func(2, 1, 16, avg,  _8, sse2);
+        init_fpel_func(1, 1, 32, avg,  _8, sse2);
+        init_fpel_func(0, 1, 64, avg,  _8, sse2);
         init_lpf(sse2);
         dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_sse2;
         dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_sse2;
@@ -445,8 +328,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp)
     }
 
     if (EXTERNAL_SSSE3(cpu_flags)) {
-        init_subpel3(0, put, ssse3);
-        init_subpel3(1, avg, ssse3);
+        init_subpel3(0, put, 8, ssse3);
+        init_subpel3(1, avg, 8, ssse3);
         dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
         dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_ssse3;
         dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_ssse3;
@@ -489,18 +372,18 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp)
         init_dir_tm_h_ipred(32, avx);
     }
     if (EXTERNAL_AVX_FAST(cpu_flags)) {
-        init_fpel(1, 0, 32, put, avx);
-        init_fpel(0, 0, 64, put, avx);
+        init_fpel_func(1, 0, 32, put, , avx);
+        init_fpel_func(0, 0, 64, put, , avx);
         init_ipred(32, avx, v, VERT);
     }
 
-    if (EXTERNAL_AVX2(cpu_flags)) {
-        init_fpel(1, 1, 32, avg, avx2);
-        init_fpel(0, 1, 64, avg, avx2);
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        init_fpel_func(1, 1, 32, avg, _8, avx2);
+        init_fpel_func(0, 1, 64, avg, _8, avx2);
         if (ARCH_X86_64) {
 #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
-            init_subpel3_32_64(0, put, avx2);
-            init_subpel3_32_64(1, avg, avx2);
+            init_subpel3_32_64(0, put, 8, avx2);
+            init_subpel3_32_64(1, avg, 8, avx2);
 #endif
         }
         init_dc_ipred(32, avx2);
diff --git a/libavcodec/x86/vp9dsp_init.h b/libavcodec/x86/vp9dsp_init.h
new file mode 100644
index 00000000..e410cab3
--- /dev/null
+++ b/libavcodec/x86/vp9dsp_init.h
@@ -0,0 +1,189 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_VP9DSP_INIT_H
+#define AVCODEC_X86_VP9DSP_INIT_H
+
+#include "libavcodec/vp9dsp.h"
+
+// hack to force-expand BPC
+#define cat(a, bpp, b) a##bpp##b
+
+#define decl_fpel_func(avg, sz, bpp, opt) \
+void ff_vp9_##avg##sz##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                   const uint8_t *src, ptrdiff_t src_stride, \
+                                   int h, int mx, int my)
+
+#define decl_mc_func(avg, sz, dir, opt, type, f_sz, bpp) \
+void ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                         const uint8_t *src, ptrdiff_t src_stride, \
+                                                         int h, const type (*filter)[f_sz])
+
+#define decl_mc_funcs(sz, opt, type, fsz, bpp) \
+decl_mc_func(put, sz, h, opt, type, fsz, bpp); \
+decl_mc_func(avg, sz, h, opt, type, fsz, bpp); \
+decl_mc_func(put, sz, v, opt, type, fsz, bpp); \
+decl_mc_func(avg, sz, v, opt, type, fsz, bpp)
+
+#define decl_ipred_fn(type, sz, bpp, opt) \
+void ff_vp9_ipred_##type##_##sz##x##sz##_##bpp##_##opt(uint8_t *dst, \
+                                                       ptrdiff_t stride, \
+                                                       const uint8_t *l, \
+                                                       const uint8_t *a)
+
+#define decl_ipred_fns(type, bpp, opt4, opt8_16_32) \
+decl_ipred_fn(type,  4, bpp, opt4); \
+decl_ipred_fn(type,  8, bpp, opt8_16_32); \
+decl_ipred_fn(type, 16, bpp, opt8_16_32); \
+decl_ipred_fn(type, 32, bpp, opt8_16_32)
+
+#define decl_itxfm_func(typea, typeb, size, bpp, opt) \
+void cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt)(uint8_t *dst, \
+                                                                         ptrdiff_t stride, \
+                                                                         int16_t *block, \
+                                                                         int eob)
+
+#define decl_itxfm_funcs(size, bpp, opt) \
+decl_itxfm_func(idct,  idct,  size, bpp, opt); \
+decl_itxfm_func(iadst, idct,  size, bpp, opt); \
+decl_itxfm_func(idct,  iadst, size, bpp, opt); \
+decl_itxfm_func(iadst, iadst, size, bpp, opt)
+
+#define mc_rep_func(avg, sz, hsz, hszb, dir, opt, type, f_sz, bpp) \
+static av_always_inline void \
+ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                    const uint8_t *src, ptrdiff_t src_stride, \
+                                                    int h, const type (*filter)[f_sz]) \
+{ \
+    ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst,        dst_stride, src, \
+                                                         src_stride, h, filter); \
+    ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst + hszb, dst_stride, src + hszb, \
+                                                         src_stride, h, filter); \
+}
+
+#define mc_rep_funcs(sz, hsz, hszb, opt, type, fsz, bpp) \
+mc_rep_func(put, sz, hsz, hszb, h, opt, type, fsz, bpp) \
+mc_rep_func(avg, sz, hsz, hszb, h, opt, type, fsz, bpp) \
+mc_rep_func(put, sz, hsz, hszb, v, opt, type, fsz, bpp) \
+mc_rep_func(avg, sz, hsz, hszb, v, opt, type, fsz, bpp)
+
+#define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, bpp, opt) \
+static void op##_8tap_##fname##_##sz##dir##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                          const uint8_t *src, ptrdiff_t src_stride, \
+                                                          int h, int mx, int my) \
+{ \
+    ff_vp9_##op##_8tap_1d_##dir##_##sz##_##bpp##_##opt(dst, dst_stride, src, src_stride, \
+                                                       h, ff_filters_##f_opt[f][dvar - 1]); \
+}
+
+#define filters_8tap_1d_fn(op, sz, dir, dvar, bpp, opt, f_opt) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, dir, dvar, bpp, opt) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_SHARP,   f_opt, sharp,   dir, dvar, bpp, opt) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_SMOOTH,  f_opt, smooth,  dir, dvar, bpp, opt)
+
+#define filters_8tap_1d_fn2(op, sz, bpp, opt, f_opt) \
+filters_8tap_1d_fn(op, sz, h, mx, bpp, opt, f_opt) \
+filters_8tap_1d_fn(op, sz, v, my, bpp, opt, f_opt)
+
+#define filters_8tap_1d_fn3(op, bpp, opt4, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 64, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 32, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 16, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 8, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 4, bpp, opt4, f_opt)
+
+#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, bpp, bytes, opt) \
+static void op##_8tap_##fname##_##sz##hv_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                       const uint8_t *src, ptrdiff_t src_stride, \
+                                                       int h, int mx, int my) \
+{ \
+    LOCAL_ALIGNED_##align(uint8_t, temp, [71 * 64 * bytes]); \
+    ff_vp9_put_8tap_1d_h_##sz##_##bpp##_##opt(temp, 64 * bytes, src - 3 * src_stride, \
+                                              src_stride,  h + 7, \
+                                              ff_filters_##f_opt[f][mx - 1]); \
+    ff_vp9_##op##_8tap_1d_v_##sz##_##bpp##_##opt(dst, dst_stride, temp + 3 * bytes * 64, \
+                                                 64 * bytes, h, \
+                                                 ff_filters_##f_opt[f][my - 1]); \
+}
+
+#define filters_8tap_2d_fn(op, sz, align, bpp, bytes, opt, f_opt) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, align, bpp, bytes, opt) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_SHARP,   f_opt, sharp, align, bpp, bytes, opt) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_SMOOTH,  f_opt, smooth, align, bpp, bytes, opt)
+
+#define filters_8tap_2d_fn2(op, align, bpp, bytes, opt4, opt8, f_opt) \
+filters_8tap_2d_fn(op, 64, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 32, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 16, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 8, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
+
+#define init_fpel_func(idx1, idx2, sz, type, bpp, opt) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##bpp##_##opt
+
+#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, bpp, opt) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = \
+        type##_8tap_smooth_##sz##dir##_##bpp##_##opt; \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = \
+        type##_8tap_regular_##sz##dir##_##bpp##_##opt; \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = \
+        type##_8tap_sharp_##sz##dir##_##bpp##_##opt
+
+#define init_subpel2(idx1, idx2, sz, type, bpp, opt) \
+    init_subpel1(idx1, idx2, 1, 1, sz, hv, type, bpp, opt); \
+    init_subpel1(idx1, idx2, 0, 1, sz, v,  type, bpp, opt); \
+    init_subpel1(idx1, idx2, 1, 0, sz, h,  type, bpp, opt)
+
+#define init_subpel3_32_64(idx, type, bpp, opt) \
+    init_subpel2(0, idx, 64, type, bpp, opt); \
+    init_subpel2(1, idx, 32, type, bpp, opt)
+
+#define init_subpel3_8to64(idx, type, bpp, opt) \
+    init_subpel3_32_64(idx, type, bpp, opt); \
+    init_subpel2(2, idx, 16, type, bpp, opt); \
+    init_subpel2(3, idx,  8, type, bpp, opt)
+
+#define init_subpel3(idx, type, bpp, opt) \
+    init_subpel3_8to64(idx, type, bpp, opt); \
+    init_subpel2(4, idx,  4, type, bpp, opt)
+
+#define init_ipred_func(type, enum, sz, bpp, opt) \
+    dsp->intra_pred[TX_##sz##X##sz][enum##_PRED] = \
+        cat(ff_vp9_ipred_##type##_##sz##x##sz##_, bpp, _##opt)
+
+#define init_8_16_32_ipred_funcs(type, enum, bpp, opt) \
+    init_ipred_func(type, enum,  8, bpp, opt); \
+    init_ipred_func(type, enum, 16, bpp, opt); \
+    init_ipred_func(type, enum, 32, bpp, opt)
+
+#define init_ipred_funcs(type, enum, bpp, opt) \
+    init_ipred_func(type, enum,  4, bpp, opt); \
+    init_8_16_32_ipred_funcs(type, enum, bpp, opt)
+
+void ff_vp9dsp_init_10bpp_x86(VP9DSPContext *dsp, int bitexact);
+void ff_vp9dsp_init_12bpp_x86(VP9DSPContext *dsp, int bitexact);
+void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp);
+
+#endif /* AVCODEC_X86_VP9DSP_INIT_H */
diff --git a/libavcodec/x86/vp9dsp_init_10bpp.c b/libavcodec/x86/vp9dsp_init_10bpp.c
new file mode 100644
index 00000000..2694c06c
--- /dev/null
+++ b/libavcodec/x86/vp9dsp_init_10bpp.c
@@ -0,0 +1,25 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BPC 10
+#define INIT_FUNC ff_vp9dsp_init_10bpp_x86
+#include "vp9dsp_init_16bpp_template.c"
diff --git a/libavcodec/x86/vp9dsp_init_12bpp.c b/libavcodec/x86/vp9dsp_init_12bpp.c
new file mode 100644
index 00000000..5da3bc18
--- /dev/null
+++ b/libavcodec/x86/vp9dsp_init_12bpp.c
@@ -0,0 +1,25 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BPC 12
+#define INIT_FUNC ff_vp9dsp_init_12bpp_x86
+#include "vp9dsp_init_16bpp_template.c"
diff --git a/libavcodec/x86/vp9dsp_init_16bpp.c b/libavcodec/x86/vp9dsp_init_16bpp.c
new file mode 100644
index 00000000..eb67499c
--- /dev/null
+++ b/libavcodec/x86/vp9dsp_init_16bpp.c
@@ -0,0 +1,139 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "libavcodec/x86/vp9dsp_init.h"
+
+#if HAVE_YASM
+
+decl_fpel_func(put,   8,    , mmx);
+decl_fpel_func(avg,   8, _16, mmxext);
+decl_fpel_func(put,  16,    , sse);
+decl_fpel_func(put,  32,    , sse);
+decl_fpel_func(put,  64,    , sse);
+decl_fpel_func(put, 128,    , sse);
+decl_fpel_func(avg,  16, _16, sse2);
+decl_fpel_func(avg,  32, _16, sse2);
+decl_fpel_func(avg,  64, _16, sse2);
+decl_fpel_func(avg, 128, _16, sse2);
+decl_fpel_func(put,  32,    , avx);
+decl_fpel_func(put,  64,    , avx);
+decl_fpel_func(put, 128,    , avx);
+decl_fpel_func(avg,  32, _16, avx2);
+decl_fpel_func(avg,  64, _16, avx2);
+decl_fpel_func(avg, 128, _16, avx2);
+
+decl_ipred_fns(v,       16, mmx,    sse);
+decl_ipred_fns(h,       16, mmxext, sse2);
+decl_ipred_fns(dc,      16, mmxext, sse2);
+decl_ipred_fns(dc_top,  16, mmxext, sse2);
+decl_ipred_fns(dc_left, 16, mmxext, sse2);
+
+#define decl_ipred_dir_funcs(type) \
+decl_ipred_fns(type, 16, sse2,  sse2); \
+decl_ipred_fns(type, 16, ssse3, ssse3); \
+decl_ipred_fns(type, 16, avx,   avx)
+
+decl_ipred_dir_funcs(dl);
+decl_ipred_dir_funcs(dr);
+decl_ipred_dir_funcs(vl);
+decl_ipred_dir_funcs(vr);
+decl_ipred_dir_funcs(hu);
+decl_ipred_dir_funcs(hd);
+#endif /* HAVE_YASM */
+
+av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        init_fpel_func(4, 0,   8, put, , mmx);
+        init_ipred_func(v, VERT, 4, 16, mmx);
+    }
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        init_fpel_func(4, 1,   8, avg, _16, mmxext);
+        init_ipred_func(h, HOR, 4, 16, mmxext);
+        init_ipred_func(dc, DC, 4, 16, mmxext);
+        init_ipred_func(dc_top,  TOP_DC,  4, 16, mmxext);
+        init_ipred_func(dc_left, LEFT_DC, 4, 16, mmxext);
+    }
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        init_fpel_func(3, 0,  16, put, , sse);
+        init_fpel_func(2, 0,  32, put, , sse);
+        init_fpel_func(1, 0,  64, put, , sse);
+        init_fpel_func(0, 0, 128, put, , sse);
+        init_8_16_32_ipred_funcs(v, VERT, 16, sse);
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        init_fpel_func(3, 1,  16, avg, _16, sse2);
+        init_fpel_func(2, 1,  32, avg, _16, sse2);
+        init_fpel_func(1, 1,  64, avg, _16, sse2);
+        init_fpel_func(0, 1, 128, avg, _16, sse2);
+        init_8_16_32_ipred_funcs(h, HOR, 16, sse2);
+        init_8_16_32_ipred_funcs(dc, DC, 16, sse2);
+        init_8_16_32_ipred_funcs(dc_top,  TOP_DC,  16, sse2);
+        init_8_16_32_ipred_funcs(dc_left, LEFT_DC, 16, sse2);
+        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, sse2);
+        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, sse2);
+        init_ipred_funcs(vl, VERT_LEFT, 16, sse2);
+        init_ipred_funcs(vr, VERT_RIGHT, 16, sse2);
+        init_ipred_funcs(hu, HOR_UP, 16, sse2);
+        init_ipred_funcs(hd, HOR_DOWN, 16, sse2);
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, ssse3);
+        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, ssse3);
+        init_ipred_funcs(vl, VERT_LEFT, 16, ssse3);
+        init_ipred_funcs(vr, VERT_RIGHT, 16, ssse3);
+        init_ipred_funcs(hu, HOR_UP, 16, ssse3);
+        init_ipred_funcs(hd, HOR_DOWN, 16, ssse3);
+    }
+
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        init_fpel_func(2, 0,  32, put, , avx);
+        init_fpel_func(1, 0,  64, put, , avx);
+        init_fpel_func(0, 0, 128, put, , avx);
+        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, avx);
+        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, avx);
+        init_ipred_funcs(vl, VERT_LEFT, 16, avx);
+        init_ipred_funcs(vr, VERT_RIGHT, 16, avx);
+        init_ipred_funcs(hu, HOR_UP, 16, avx);
+        init_ipred_funcs(hd, HOR_DOWN, 16, avx);
+    }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        init_fpel_func(2, 1,  32, avg, _16, avx2);
+        init_fpel_func(1, 1,  64, avg, _16, avx2);
+        init_fpel_func(0, 1, 128, avg, _16, avx2);
+    }
+
+#endif /* HAVE_YASM */
+}
diff --git a/libavcodec/x86/vp9dsp_init_16bpp_template.c b/libavcodec/x86/vp9dsp_init_16bpp_template.c
new file mode 100644
index 00000000..4840b284
--- /dev/null
+++ b/libavcodec/x86/vp9dsp_init_16bpp_template.c
@@ -0,0 +1,240 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "libavcodec/x86/vp9dsp_init.h"
+
+#if HAVE_YASM
+
+extern const int16_t ff_filters_16bpp[3][15][4][16];
+
+decl_mc_funcs(4, sse2, int16_t, 16, BPC);
+decl_mc_funcs(8, sse2, int16_t, 16, BPC);
+decl_mc_funcs(16, avx2, int16_t, 16, BPC);
+
+mc_rep_funcs(16,  8, 16, sse2, int16_t, 16, BPC)
+mc_rep_funcs(32, 16, 32, sse2, int16_t, 16, BPC)
+mc_rep_funcs(64, 32, 64, sse2, int16_t, 16, BPC)
+#if HAVE_AVX2_EXTERNAL
+mc_rep_funcs(32, 16, 32, avx2, int16_t, 16, BPC)
+mc_rep_funcs(64, 32, 64, avx2, int16_t, 16, BPC)
+#endif
+
+filters_8tap_2d_fn2(put, 16, BPC, 2, sse2, sse2, 16bpp)
+filters_8tap_2d_fn2(avg, 16, BPC, 2, sse2, sse2, 16bpp)
+#if HAVE_AVX2_EXTERNAL
+filters_8tap_2d_fn(put, 64, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 64, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(put, 32, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 32, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(put, 16, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 16, 32, BPC, 2, avx2, 16bpp)
+#endif
+
+filters_8tap_1d_fn3(put, BPC, sse2, sse2, 16bpp)
+filters_8tap_1d_fn3(avg, BPC, sse2, sse2, 16bpp)
+#if HAVE_AVX2_EXTERNAL
+filters_8tap_1d_fn2(put, 64, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 64, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(put, 32, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 32, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(put, 16, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 16, BPC, avx2, 16bpp)
+#endif
+
+#define decl_lpf_func(dir, wd, bpp, opt) \
+void ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                     int E, int I, int H)
+
+#define decl_lpf_funcs(dir, wd, bpp) \
+decl_lpf_func(dir, wd, bpp, sse2); \
+decl_lpf_func(dir, wd, bpp, ssse3); \
+decl_lpf_func(dir, wd, bpp, avx)
+
+#define decl_lpf_funcs_wd(dir) \
+decl_lpf_funcs(dir,  4, BPC); \
+decl_lpf_funcs(dir,  8, BPC); \
+decl_lpf_funcs(dir, 16, BPC)
+
+decl_lpf_funcs_wd(h);
+decl_lpf_funcs_wd(v);
+
+#define lpf_16_wrapper(dir, off, bpp, opt) \
+static void loop_filter_##dir##_16_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                 int E, int I, int H) \
+{ \
+    ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst,       stride, E, I, H); \
+    ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst + off, stride, E, I, H); \
+}
+
+#define lpf_16_wrappers(bpp, opt) \
+lpf_16_wrapper(h, 8 * stride, bpp, opt) \
+lpf_16_wrapper(v, 16,         bpp, opt)
+
+lpf_16_wrappers(BPC, sse2)
+lpf_16_wrappers(BPC, ssse3)
+lpf_16_wrappers(BPC, avx)
+
+#define lpf_mix2_wrapper(dir, off, wd1, wd2, bpp, opt) \
+static void loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                           int E, int I, int H) \
+{ \
+    ff_vp9_loop_filter_##dir##_##wd1##_##bpp##_##opt(dst,       stride, \
+                                                     E & 0xff, I & 0xff, H & 0xff); \
+    ff_vp9_loop_filter_##dir##_##wd2##_##bpp##_##opt(dst + off, stride, \
+                                                     E >> 8,   I >> 8,   H >> 8); \
+}
+
+#define lpf_mix2_wrappers(wd1, wd2, bpp, opt) \
+lpf_mix2_wrapper(h, 8 * stride, wd1, wd2, bpp, opt) \
+lpf_mix2_wrapper(v, 16,         wd1, wd2, bpp, opt)
+
+#define lpf_mix2_wrappers_set(bpp, opt) \
+lpf_mix2_wrappers(4, 4, bpp, opt) \
+lpf_mix2_wrappers(4, 8, bpp, opt) \
+lpf_mix2_wrappers(8, 4, bpp, opt) \
+lpf_mix2_wrappers(8, 8, bpp, opt) \
+
+lpf_mix2_wrappers_set(BPC, sse2)
+lpf_mix2_wrappers_set(BPC, ssse3)
+lpf_mix2_wrappers_set(BPC, avx)
+
+decl_ipred_fns(tm, BPC, mmxext, sse2);
+
+decl_itxfm_func(iwht, iwht, 4, BPC, mmxext);
+#if BPC == 10
+decl_itxfm_func(idct,  idct,  4, BPC, mmxext);
+decl_itxfm_funcs(4, BPC, ssse3);
+#else
+decl_itxfm_func(idct,  idct,  4, BPC, sse2);
+#endif
+decl_itxfm_func(idct,  iadst, 4, BPC, sse2);
+decl_itxfm_func(iadst, idct,  4, BPC, sse2);
+decl_itxfm_func(iadst, iadst, 4, BPC, sse2);
+decl_itxfm_funcs(8, BPC, sse2);
+decl_itxfm_funcs(16, BPC, sse2);
+decl_itxfm_func(idct,  idct, 32, BPC, sse2);
+#endif /* HAVE_YASM */
+
+av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+#define init_lpf_8_func(idx1, idx2, dir, wd, bpp, opt) \
+    dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt
+#define init_lpf_16_func(idx, dir, bpp, opt) \
+    dsp->loop_filter_16[idx] = loop_filter_##dir##_16_##bpp##_##opt
+#define init_lpf_mix2_func(idx1, idx2, idx3, dir, wd1, wd2, bpp, opt) \
+    dsp->loop_filter_mix2[idx1][idx2][idx3] = loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt
+
+#define init_lpf_funcs(bpp, opt) \
+    init_lpf_8_func(0, 0, h,  4, bpp, opt); \
+    init_lpf_8_func(0, 1, v,  4, bpp, opt); \
+    init_lpf_8_func(1, 0, h,  8, bpp, opt); \
+    init_lpf_8_func(1, 1, v,  8, bpp, opt); \
+    init_lpf_8_func(2, 0, h, 16, bpp, opt); \
+    init_lpf_8_func(2, 1, v, 16, bpp, opt); \
+    init_lpf_16_func(0, h, bpp, opt); \
+    init_lpf_16_func(1, v, bpp, opt); \
+    init_lpf_mix2_func(0, 0, 0, h, 4, 4, bpp, opt); \
+    init_lpf_mix2_func(0, 1, 0, h, 4, 8, bpp, opt); \
+    init_lpf_mix2_func(1, 0, 0, h, 8, 4, bpp, opt); \
+    init_lpf_mix2_func(1, 1, 0, h, 8, 8, bpp, opt); \
+    init_lpf_mix2_func(0, 0, 1, v, 4, 4, bpp, opt); \
+    init_lpf_mix2_func(0, 1, 1, v, 4, 8, bpp, opt); \
+    init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
+    init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)
+
+#define init_itx_func(idxa, idxb, typea, typeb, size, bpp, opt) \
+    dsp->itxfm_add[idxa][idxb] = \
+        cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt);
+#define init_itx_func_one(idx, typea, typeb, size, bpp, opt) \
+    init_itx_func(idx, DCT_DCT,   typea, typeb, size, bpp, opt); \
+    init_itx_func(idx, ADST_DCT,  typea, typeb, size, bpp, opt); \
+    init_itx_func(idx, DCT_ADST,  typea, typeb, size, bpp, opt); \
+    init_itx_func(idx, ADST_ADST, typea, typeb, size, bpp, opt)
+#define init_itx_funcs(idx, size, bpp, opt) \
+    init_itx_func(idx, DCT_DCT,   idct,  idct,  size, bpp, opt); \
+    init_itx_func(idx, ADST_DCT,  idct,  iadst, size, bpp, opt); \
+    init_itx_func(idx, DCT_ADST,  iadst, idct,  size, bpp, opt); \
+    init_itx_func(idx, ADST_ADST, iadst, iadst, size, bpp, opt); \
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        init_ipred_func(tm, TM_VP8, 4, BPC, mmxext);
+        if (!bitexact) {
+            init_itx_func_one(4 /* lossless */, iwht, iwht, 4, BPC, mmxext);
+#if BPC == 10
+            init_itx_func(TX_4X4, DCT_DCT, idct, idct, 4, 10, mmxext);
+#endif
+        }
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        init_subpel3(0, put, BPC, sse2);
+        init_subpel3(1, avg, BPC, sse2);
+        init_lpf_funcs(BPC, sse2);
+        init_8_16_32_ipred_funcs(tm, TM_VP8, BPC, sse2);
+#if BPC == 10
+        if (!bitexact) {
+            init_itx_func(TX_4X4, ADST_DCT,  idct,  iadst, 4, 10, sse2);
+            init_itx_func(TX_4X4, DCT_ADST,  iadst, idct,  4, 10, sse2);
+            init_itx_func(TX_4X4, ADST_ADST, iadst, iadst, 4, 10, sse2);
+        }
+#else
+        init_itx_funcs(TX_4X4, 4, 12, sse2);
+#endif
+        init_itx_funcs(TX_8X8, 8, BPC, sse2);
+        init_itx_funcs(TX_16X16, 16, BPC, sse2);
+        init_itx_func_one(TX_32X32, idct, idct, 32, BPC, sse2);
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        init_lpf_funcs(BPC, ssse3);
+#if BPC == 10
+        if (!bitexact) {
+            init_itx_funcs(TX_4X4, 4, BPC, ssse3);
+        }
+#endif
+    }
+
+    if (EXTERNAL_AVX(cpu_flags)) {
+        init_lpf_funcs(BPC, avx);
+    }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+#if HAVE_AVX2_EXTERNAL
+        init_subpel3_32_64(0,  put, BPC, avx2);
+        init_subpel3_32_64(1,  avg, BPC, avx2);
+        init_subpel2(2, 0, 16, put, BPC, avx2);
+        init_subpel2(2, 1, 16, avg, BPC, avx2);
+#endif
+    }
+
+#endif /* HAVE_YASM */
+
+    ff_vp9dsp_init_16bpp_x86(dsp);
+}
diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm b/libavcodec/x86/vp9intrapred_16bpp.asm
new file mode 100644
index 00000000..c0ac16d3
--- /dev/null
+++ b/libavcodec/x86/vp9intrapred_16bpp.asm
@@ -0,0 +1,2135 @@
+;******************************************************************************
+;* VP9 Intra prediction SIMD optimizations
+;*
+;* Copyright (c) 2015 Ronald S. Bultje <rsbultje gmail com>
+;* Copyright (c) 2015 Henrik Gramner <henrik gramner com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pd_2: times 8 dd 2
+pd_4: times 8 dd 4
+pd_8: times 8 dd 8
+
+pb_2to15_14_15: db 2,3,4,5,6,7,8,9,10,11,12,13,14,15,14,15
+pb_4_5_8to13_8x0: db 4,5,8,9,10,11,12,13,0,0,0,0,0,0,0,0
+pb_0to7_67x4: db 0,1,2,3,4,5,6,7,6,7,6,7,6,7,6,7
+
+cextern pw_1
+cextern pw_1023
+cextern pw_4095
+cextern pd_16
+cextern pd_32
+cextern pd_65535;
+
+; FIXME most top-only functions (ddl, vl, v, dc_top) can be modified to take
+; only 3 registers on x86-32, which would make it one cycle faster, but that
+; would make the code quite a bit uglier...
+
+SECTION .text
+
+%macro SCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%if %0 == 4
+%define reg_%4 m%2
+%endif
+%else
+    mova              [%3], m%1
+%if %0 == 4
+%define reg_%4 [%3]
+%endif
+%endif
+%endmacro
+
+%macro UNSCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%if %0 == 4
+%undef reg_%4
+%endif
+%endmacro
+
+%macro PRELOAD 2-3
+%if ARCH_X86_64
+    mova               m%1, [%2]
+%if %0 == 3
+%define reg_%3 m%1
+%endif
+%elif %0 == 3
+%define reg_%3 [%2]
+%endif
+%endmacro
+
+INIT_MMX mmx
+cglobal vp9_ipred_v_4x4_16, 2, 4, 1, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_8x8_16, 2, 4, 1, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_16x16_16, 2, 4, 2, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    mova                    m1, [aq+mmsize]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m1
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m1
+    lea                   dstq, [dstq+strideq*4]
+    dec               cntd
+    jg .loop
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_32x32_16, 2, 4, 4, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq+mmsize*0]
+    mova                    m1, [aq+mmsize*1]
+    mova                    m2, [aq+mmsize*2]
+    mova                    m3, [aq+mmsize*3]
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 16
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m3
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*1+32], m2
+    mova   [dstq+strideq*1+48], m3
+    lea                   dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop
+    RET
+
+INIT_MMX mmxext
+cglobal vp9_ipred_h_4x4_16, 3, 3, 4, dst, stride, l, a
+    mova                    m3, [lq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pshufw                  m0, m3, q3333
+    pshufw                  m1, m3, q2222
+    pshufw                  m2, m3, q1111
+    pshufw                  m3, m3, q0000
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_h_8x8_16, 3, 3, 4, dst, stride, l, a
+    mova                    m2, [lq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    punpckhwd               m3, m2, m2
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    pshufd                  m0, m3, q1111
+    pshufd                  m1, m3, q0000
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m1
+    lea                   dstq, [dstq+strideq*4]
+    punpcklwd               m2, m2
+    pshufd                  m0, m2, q3333
+    pshufd                  m1, m2, q2222
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    pshufd                  m0, m2, q1111
+    pshufd                  m1, m2, q0000
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m1
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_h_16x16_16, 3, 5, 4, dst, stride, l, stride3, cnt
+    mov                   cntd, 3
+    lea               stride3q, [strideq*3]
+.loop:
+    movh                    m3, [lq+cntq*8]
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    mova    [dstq+strideq*0+ 0], m0
+    mova    [dstq+strideq*0+16], m0
+    mova    [dstq+strideq*1+ 0], m1
+    mova    [dstq+strideq*1+16], m1
+    mova    [dstq+strideq*2+ 0], m2
+    mova    [dstq+strideq*2+16], m2
+    mova    [dstq+stride3q + 0], m3
+    mova    [dstq+stride3q +16], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jge .loop
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_h_32x32_16, 3, 5, 4, dst, stride, l, stride3, cnt
+    mov                   cntd, 7
+    lea               stride3q, [strideq*3]
+.loop:
+    movh                    m3, [lq+cntq*8]
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*0+32], m0
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m1
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*1+32], m1
+    mova   [dstq+strideq*1+48], m1
+    mova   [dstq+strideq*2+ 0], m2
+    mova   [dstq+strideq*2+16], m2
+    mova   [dstq+strideq*2+32], m2
+    mova   [dstq+strideq*2+48], m2
+    mova   [dstq+stride3q + 0], m3
+    mova   [dstq+stride3q +16], m3
+    mova   [dstq+stride3q +32], m3
+    mova   [dstq+stride3q +48], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jge .loop
+    RET
+
+INIT_MMX mmxext
+cglobal vp9_ipred_dc_4x4_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq]
+    paddw                   m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufw                  m1, m0, q3232
+    paddd                   m0, [pd_4]
+    paddd                   m0, m1
+    psrad                   m0, 3
+    pshufw                  m0, m0, q0000
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_8x8_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq]
+    paddw                   m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_8]
+    paddd                   m0, m1
+    psrad                   m0, 4
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_16x16_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq]
+    paddw                   m0, [lq+mmsize]
+    paddw                   m0, [aq]
+    paddw                   m0, [aq+mmsize]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_16]
+    paddd                   m0, m1
+    psrad                   m0, 5
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_32x32_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq+mmsize*0]
+    paddw                   m0, [lq+mmsize*1]
+    paddw                   m0, [lq+mmsize*2]
+    paddw                   m0, [lq+mmsize*3]
+    paddw                   m0, [aq+mmsize*0]
+    paddw                   m0, [aq+mmsize*1]
+    paddw                   m0, [aq+mmsize*2]
+    paddw                   m0, [aq+mmsize*3]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 16
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_32]
+    paddd                   m0, m1
+    psrad                   m0, 6
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*0+32], m0
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*1+32], m0
+    mova   [dstq+strideq*1+48], m0
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+%macro DC_1D_FNS 2
+INIT_MMX mmxext
+cglobal vp9_ipred_dc_%1_4x4_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufw                  m1, m0, q3232
+    paddd                   m0, [pd_2]
+    paddd                   m0, m1
+    psrad                   m0, 2
+    pshufw                  m0, m0, q0000
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_%1_8x8_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_4]
+    paddd                   m0, m1
+    psrad                   m0, 3
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_%1_16x16_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2]
+    paddw                   m0, [%2+mmsize]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_8]
+    paddd                   m0, m1
+    psrad                   m0, 4
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_%1_32x32_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2+mmsize*0]
+    paddw                   m0, [%2+mmsize*1]
+    paddw                   m0, [%2+mmsize*2]
+    paddw                   m0, [%2+mmsize*3]
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 16
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_16]
+    paddd                   m0, m1
+    psrad                   m0, 5
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*0+32], m0
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*1+32], m0
+    mova   [dstq+strideq*1+48], m0
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+DC_1D_FNS top,  aq
+DC_1D_FNS left, lq
+
+INIT_MMX mmxext
+cglobal vp9_ipred_tm_4x4_10, 4, 4, 6, dst, stride, l, a
+    mova                    m5, [pw_1023]
+.body:
+    mova                    m4, [aq]
+    mova                    m3, [lq]
+    movd                    m0, [aq-4]
+    pshufw                  m0, m0, q1111
+    psubw                   m4, m0
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pshufw                  m0, m3, q3333
+    pshufw                  m1, m3, q2222
+    pshufw                  m2, m3, q1111
+    pshufw                  m3, m3, q0000
+    paddw                   m0, m4
+    paddw                   m1, m4
+    paddw                   m2, m4
+    paddw                   m3, m4
+    pxor                    m4, m4
+    pmaxsw                  m0, m4
+    pmaxsw                  m1, m4
+    pmaxsw                  m2, m4
+    pmaxsw                  m3, m4
+    pminsw                  m0, m5
+    pminsw                  m1, m5
+    pminsw                  m2, m5
+    pminsw                  m3, m5
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    RET
+
+cglobal vp9_ipred_tm_4x4_12, 4, 4, 6, dst, stride, l, a
+    mova                    m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_4x4_10 %+ SUFFIX).body
+
+INIT_XMM sse2
+cglobal vp9_ipred_tm_8x8_10, 4, 5, 7, dst, stride, l, a
+    mova                    m4, [pw_1023]
+.body:
+    pxor                    m6, m6
+    mova                    m5, [aq]
+    movd                    m0, [aq-4]
+    pshuflw                 m0, m0, q1111
+    punpcklqdq              m0, m0
+    psubw                   m5, m0
+    DEFINE_ARGS dst, stride, l, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 1
+.loop:
+    movh                    m3, [lq+cntq*8]
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    paddw                   m0, m5
+    paddw                   m1, m5
+    paddw                   m2, m5
+    paddw                   m3, m5
+    pmaxsw                  m0, m6
+    pmaxsw                  m1, m6
+    pmaxsw                  m2, m6
+    pmaxsw                  m3, m6
+    pminsw                  m0, m4
+    pminsw                  m1, m4
+    pminsw                  m2, m4
+    pminsw                  m3, m4
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jge .loop
+    RET
+
+cglobal vp9_ipred_tm_8x8_12, 4, 5, 7, dst, stride, l, a
+    mova                    m4, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_8x8_10 %+ SUFFIX).body
+
+INIT_XMM sse2
+cglobal vp9_ipred_tm_16x16_10, 4, 4, 8, dst, stride, l, a
+    mova                    m7, [pw_1023]
+.body:
+    pxor                    m6, m6
+    mova                    m4, [aq]
+    mova                    m5, [aq+mmsize]
+    movd                    m0, [aq-4]
+    pshuflw                 m0, m0, q1111
+    punpcklqdq              m0, m0
+    psubw                   m4, m0
+    psubw                   m5, m0
+    DEFINE_ARGS dst, stride, l, cnt
+    mov                   cntd, 7
+.loop:
+    movd                    m3, [lq+cntq*4]
+    punpcklwd               m3, m3
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    paddw                   m0, m2, m4
+    paddw                   m2, m5
+    paddw                   m1, m3, m4
+    paddw                   m3, m5
+    pmaxsw                  m0, m6
+    pmaxsw                  m2, m6
+    pmaxsw                  m1, m6
+    pmaxsw                  m3, m6
+    pminsw                  m0, m7
+    pminsw                  m2, m7
+    pminsw                  m1, m7
+    pminsw                  m3, m7
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m2
+    mova   [dstq+strideq*1+ 0], m1
+    mova   [dstq+strideq*1+16], m3
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jge .loop
+    RET
+
+cglobal vp9_ipred_tm_16x16_12, 4, 4, 8, dst, stride, l, a
+    mova                    m7, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_16x16_10 %+ SUFFIX).body
+
+INIT_XMM sse2
+cglobal vp9_ipred_tm_32x32_10, 4, 4, 10, 32 * -ARCH_X86_32, dst, stride, l, a
+    mova                    m0, [pw_1023]
+.body:
+    pxor                    m1, m1
+%if ARCH_X86_64
+    SWAP                     0, 8
+    SWAP                     1, 9
+%define reg_min m9
+%define reg_max m8
+%else
+    mova              [rsp+ 0], m0
+    mova              [rsp+16], m1
+%define reg_min [rsp+16]
+%define reg_max [rsp+ 0]
+%endif
+
+    mova                    m4, [aq+mmsize*0]
+    mova                    m5, [aq+mmsize*1]
+    mova                    m6, [aq+mmsize*2]
+    mova                    m7, [aq+mmsize*3]
+    movd                    m0, [aq-4]
+    pshuflw                 m0, m0, q1111
+    punpcklqdq              m0, m0
+    psubw                   m4, m0
+    psubw                   m5, m0
+    psubw                   m6, m0
+    psubw                   m7, m0
+    DEFINE_ARGS dst, stride, l, cnt
+    mov                   cntd, 31
+.loop:
+    pinsrw                  m3, [lq+cntq*2], 0
+    punpcklwd               m3, m3
+    pshufd                  m3, m3, q0000
+    paddw                   m0, m3, m4
+    paddw                   m1, m3, m5
+    paddw                   m2, m3, m6
+    paddw                   m3, m7
+    pmaxsw                  m0, reg_min
+    pmaxsw                  m1, reg_min
+    pmaxsw                  m2, reg_min
+    pmaxsw                  m3, reg_min
+    pminsw                  m0, reg_max
+    pminsw                  m1, reg_max
+    pminsw                  m2, reg_max
+    pminsw                  m3, reg_max
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m3
+    add                   dstq, strideq
+    dec                   cntd
+    jge .loop
+    RET
+
+cglobal vp9_ipred_tm_32x32_12, 4, 4, 10, 32 * -ARCH_X86_32, dst, stride, l, a
+    mova                    m0, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_32x32_10 %+ SUFFIX).body
+
+; Directional intra predicion functions
+;
+; in the functions below, 'abcdefgh' refers to above data (sometimes simply
+; abbreviated as a[N-M]). 'stuvwxyz' refers to left data (sometimes simply
+; abbreviated as l[N-M]). * is top-left data. ABCDEFG or A[N-M] is filtered
+; above data, STUVWXYZ or L[N-M] is filtered left data, and # is filtered
+; top-left data.
+
+; left=(left+2*center+right+2)>>2
+%macro LOWPASS 3 ; left [dst], center, right
+    paddw                  m%1, m%3
+    psraw                  m%1, 1
+    pavgw                  m%1, m%2
+%endmacro
+
+; abcdefgh (src) -> bcdefghh (dst)
+; dst/src can be the same register
+%macro SHIFT_RIGHT 2-3 [pb_2to15_14_15] ; dst, src, [ssse3_shift_reg]
+%if cpuflag(ssse3)
+    pshufb                  %1, %2, %3              ; abcdefgh -> bcdefghh
+%else
+    psrldq                  %1, %2, 2               ; abcdefgh -> bcdefgh.
+    pshufhw                 %1, %1, q2210           ; bcdefgh. -> bcdefghh
+%endif
+%endmacro
+
+; abcdefgh (src) -> bcdefghh (dst1) and cdefghhh (dst2)
+%macro SHIFT_RIGHTx2 3-4 [pb_2to15_14_15] ; dst1, dst2, src, [ssse3_shift_reg]
+%if cpuflag(ssse3)
+    pshufb                  %1, %3, %4              ; abcdefgh -> bcdefghh
+    pshufb                  %2, %1, %4              ; bcdefghh -> cdefghhh
+%else
+    psrldq                  %1, %3, 2               ; abcdefgh -> bcdefgh.
+    psrldq                  %2, %3, 4               ; abcdefgh -> cdefgh..
+    pshufhw                 %1, %1, q2210           ; bcdefgh. -> bcdefghh
+    pshufhw                 %2, %2, q1110           ; cdefgh.. -> cdefghhh
+%endif
+%endmacro
+
+%macro DL_FUNCS 0
+cglobal vp9_ipred_dl_4x4_16, 2, 4, 3, dst, stride, l, a
+    movifnidn               aq, amp
+    movu                    m1, [aq]                ; abcdefgh
+    pshufhw                 m0, m1, q3310           ; abcdefhh
+    SHIFT_RIGHT             m1, m1                  ; bcdefghh
+    psrldq                  m2, m1, 2               ; cdefghh.
+    LOWPASS                  0,  1,  2              ; BCDEFGh.
+    pshufd                  m1, m0, q3321           ; DEFGh...
+    movh      [dstq+strideq*0], m0
+    movh      [dstq+strideq*2], m1
+    add                   dstq, strideq
+    psrldq                  m0, 2                   ; CDEFGh..
+    psrldq                  m1, 2                   ; EFGh....
+    movh      [dstq+strideq*0], m0
+    movh      [dstq+strideq*2], m1
+    RET
+
+cglobal vp9_ipred_dl_8x8_16, 2, 4, 5, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]                ; abcdefgh
+%if cpuflag(ssse3)
+    mova                    m4, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m2, m0, m4          ; bcdefghh/cdefghhh
+    LOWPASS                  0,  1,  2              ; BCDEFGHh
+    shufps                  m1, m0, m2, q3332       ; FGHhhhhh
+    shufps                  m3, m0, m1, q2121       ; DEFGHhhh
+    DEFINE_ARGS dst, stride, stride5
+    lea               stride5q, [strideq*5]
+
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*4], m1
+    SHIFT_RIGHT             m0, m0, m4              ; CDEFGHhh
+    pshuflw                 m1, m1, q3321           ; GHhhhhhh
+    pshufd                  m2, m0, q3321           ; EFGHhhhh
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+stride5q ], m1
+    lea                   dstq, [dstq+strideq*2]
+    pshuflw                 m1, m1, q3321           ; Hhhhhhhh
+    mova      [dstq+strideq*0], m3
+    mova      [dstq+strideq*4], m1
+    pshuflw                 m1, m1, q3321           ; hhhhhhhh
+    mova      [dstq+strideq*1], m2
+    mova      [dstq+stride5q ], m1
+    RET
+
+cglobal vp9_ipred_dl_16x16_16, 2, 4, 5, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]                ; abcdefgh
+    mova                    m3, [aq+mmsize]         ; ijklmnop
+    PALIGNR                 m1, m3, m0, 2, m4       ; bcdefghi
+    PALIGNR                 m2, m3, m0, 4, m4       ; cdefghij
+    LOWPASS                  0,  1,  2              ; BCDEFGHI
+%if cpuflag(ssse3)
+    mova                    m4, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m2, m1, m3, m4          ; jklmnopp/klmnoppp
+    LOWPASS                  1,  2,  3              ; JKLMNOPp
+    pshufd                  m2, m2, q3333           ; pppppppp
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 8
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*8+ 0], m1
+    mova   [dstq+strideq*8+16], m2
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m0, m1, m0, 2
+%else
+    PALIGNR                 m3, m1, m0, 2, m4
+    mova                    m0, m3
+%endif
+    SHIFT_RIGHT             m1, m1, m4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dl_32x32_16, 2, 5, 7, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq+mmsize*0]       ; abcdefgh
+    mova                    m1, [aq+mmsize*1]       ; ijklmnop
+    mova                    m2, [aq+mmsize*2]       ; qrstuvwx
+    mova                    m3, [aq+mmsize*3]       ; yz012345
+    PALIGNR                 m4, m1, m0, 2, m6
+    PALIGNR                 m5, m1, m0, 4, m6
+    LOWPASS                  0,  4,  5              ; BCDEFGHI
+    PALIGNR                 m4, m2, m1, 2, m6
+    PALIGNR                 m5, m2, m1, 4, m6
+    LOWPASS                  1,  4,  5              ; JKLMNOPQ
+    PALIGNR                 m4, m3, m2, 2, m6
+    PALIGNR                 m5, m3, m2, 4, m6
+    LOWPASS                  2,  4,  5              ; RSTUVWXY
+%if cpuflag(ssse3)
+    mova                    m6, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m4, m5, m3, m6
+    LOWPASS                  3,  4,  5              ; Z0123455
+    pshufd                  m4, m4, q3333           ; 55555555
+    DEFINE_ARGS dst, stride, stride8, stride24, cnt
+    mov                   cntd, 8
+    lea               stride8q, [strideq*8]
+    lea              stride24q, [stride8q*3]
+
+.loop:
+    mova  [dstq+stride8q*0+ 0], m0
+    mova  [dstq+stride8q*0+16], m1
+    mova  [dstq+stride8q*0+32], m2
+    mova  [dstq+stride8q*0+48], m3
+    mova  [dstq+stride8q*1+ 0], m1
+    mova  [dstq+stride8q*1+16], m2
+    mova  [dstq+stride8q*1+32], m3
+    mova  [dstq+stride8q*1+48], m4
+    mova  [dstq+stride8q*2+ 0], m2
+    mova  [dstq+stride8q*2+16], m3
+    mova  [dstq+stride8q*2+32], m4
+    mova  [dstq+stride8q*2+48], m4
+    mova  [dstq+stride24q + 0], m3
+    mova  [dstq+stride24q +16], m4
+    mova  [dstq+stride24q +32], m4
+    mova  [dstq+stride24q +48], m4
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m0, m1, m0, 2
+    vpalignr                m1, m2, m1, 2
+    vpalignr                m2, m3, m2, 2
+%else
+    PALIGNR                 m5, m1, m0, 2, m6
+    mova                    m0, m5
+    PALIGNR                 m5, m2, m1, 2, m6
+    mova                    m1, m5
+    PALIGNR                 m5, m3, m2, 2, m6
+    mova                    m2, m5
+%endif
+    SHIFT_RIGHT             m3, m3, m6
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DL_FUNCS
+INIT_XMM ssse3
+DL_FUNCS
+INIT_XMM avx
+DL_FUNCS
+
+%macro DR_FUNCS 1 ; stack_mem_for_32x32_32bit_function
+cglobal vp9_ipred_dr_4x4_16, 4, 4, 3, dst, stride, l, a
+    movh                    m0, [lq]                ; wxyz....
+    movhps                  m0, [aq-2]              ; wxyz*abc
+    movd                    m1, [aq+6]              ; d.......
+    PALIGNR                 m1, m0, 2, m2           ; xyz*abcd
+    psrldq                  m2, m1, 2               ; yz*abcd.
+    LOWPASS                  0, 1, 2                ; XYZ#ABC.
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+stride3q ], m0
+    psrldq                  m0, 2                   ; YZ#ABC..
+    movh      [dstq+strideq*2], m0
+    psrldq                  m0, 2                   ; Z#ABC...
+    movh      [dstq+strideq*1], m0
+    psrldq                  m0, 2                   ; #ABC....
+    movh      [dstq+strideq*0], m0
+    RET
+
+cglobal vp9_ipred_dr_8x8_16, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [lq]                ; stuvwxyz
+    movu                    m1, [aq-2]              ; *abcdefg
+    mova                    m2, [aq]                ; abcdefgh
+    psrldq                  m3, m2, 2               ; bcdefgh.
+    LOWPASS                  3,  2, 1               ; ABCDEFG.
+    PALIGNR                 m1, m0, 2, m4           ; tuvwxyz*
+    PALIGNR                 m2, m1, 2, m4           ; uvwxyz*a
+    LOWPASS                  2,  1, 0               ; TUVWXYZ#
+    DEFINE_ARGS dst, stride, dst4, stride3
+    lea               stride3q, [strideq*3]
+    lea                  dst4q, [dstq+strideq*4]
+
+    movhps [dstq +stride3q +0], m2
+    movh   [dstq+ stride3q +8], m3
+    mova   [dst4q+stride3q +0], m2
+    PALIGNR                 m1, m3, m2, 2, m0
+    psrldq                  m3, 2
+    movhps [dstq +strideq*2+0], m1
+    movh   [dstq+ strideq*2+8], m3
+    mova   [dst4q+strideq*2+0], m1
+    PALIGNR                 m2, m3, m1, 2, m0
+    psrldq                  m3, 2
+    movhps [dstq +strideq*1+0], m2
+    movh   [dstq+ strideq*1+8], m3
+    mova   [dst4q+strideq*1+0], m2
+    PALIGNR                 m1, m3, m2, 2, m0
+    psrldq                  m3, 2
+    movhps [dstq +strideq*0+0], m1
+    movh   [dstq+ strideq*0+8], m3
+    mova   [dst4q+strideq*0+0], m1
+    RET
+
+cglobal vp9_ipred_dr_16x16_16, 4, 4, 7, dst, stride, l, a
+    mova                    m0, [lq]                ; klmnopqr
+    mova                    m1, [lq+mmsize]         ; stuvwxyz
+    movu                    m2, [aq-2]              ; *abcdefg
+    movu                    m3, [aq+mmsize-2]       ; hijklmno
+    mova                    m4, [aq]                ; abcdefgh
+    mova                    m5, [aq+mmsize]         ; ijklmnop
+    psrldq                  m6, m5, 2               ; jklmnop.
+    LOWPASS                  6,  5, 3               ; IJKLMNO.
+    PALIGNR                 m5, m4, 2, m3           ; bcdefghi
+    LOWPASS                  5,  4, 2               ; ABCDEFGH
+    PALIGNR                 m2, m1, 2, m3           ; tuvwxyz*
+    PALIGNR                 m4, m2, 2, m3           ; uvwxyz*a
+    LOWPASS                  4,  2, 1               ; TUVWXYZ#
+    PALIGNR                 m1, m0, 2, m3           ; lmnopqrs
+    PALIGNR                 m2, m1, 2, m3           ; mnopqrst
+    LOWPASS                  2, 1, 0                ; LMNOPQRS
+    DEFINE_ARGS dst, stride, dst8, cnt
+    lea                  dst8q, [dstq+strideq*8]
+    mov                   cntd, 8
+
+.loop:
+    sub                  dst8q, strideq
+    mova  [dst8q+strideq*0+ 0], m4
+    mova  [dst8q+strideq*0+16], m5
+    mova  [dst8q+strideq*8+ 0], m2
+    mova  [dst8q+strideq*8+16], m4
+%if cpuflag(avx)
+    vpalignr                m2, m4, m2, 2
+    vpalignr                m4, m5, m4, 2
+    vpalignr                m5, m6, m5, 2
+%else
+    PALIGNR                 m0, m4, m2, 2, m1
+    mova                    m2, m0
+    PALIGNR                 m0, m5, m4, 2, m1
+    mova                    m4, m0
+    PALIGNR                 m0, m6, m5, 2, m1
+    mova                    m5, m0
+%endif
+    psrldq                  m6, 2
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dr_32x32_16, 4, 5, 10 + notcpuflag(ssse3), \
+                               %1 * ARCH_X86_32 * -mmsize, dst, stride, l, a
+    mova                    m0, [aq+mmsize*3]       ; a[24-31]
+    movu                    m1, [aq+mmsize*3-2]     ; a[23-30]
+    psrldq                  m2, m0, 2               ; a[25-31].
+    LOWPASS                  2,  0, 1               ; A[24-30].
+    mova                    m1, [aq+mmsize*2]       ; a[16-23]
+    movu                    m3, [aq+mmsize*2-2]     ; a[15-22]
+    PALIGNR                 m0, m1, 2, m4           ; a[17-24]
+    LOWPASS                  0,  1, 3               ; A[16-23]
+    mova                    m3, [aq+mmsize*1]       ; a[8-15]
+    movu                    m4, [aq+mmsize*1-2]     ; a[7-14]
+    PALIGNR                 m1, m3, 2, m5           ; a[9-16]
+    LOWPASS                  1,  3, 4               ; A[8-15]
+    mova                    m4, [aq+mmsize*0]       ; a[0-7]
+    movu                    m5, [aq+mmsize*0-2]     ; *a[0-6]
+    PALIGNR                 m3, m4, 2, m6           ; a[1-8]
+    LOWPASS                  3,  4, 5               ; A[0-7]
+    SCRATCH                  1,  8, rsp+0*mmsize
+    SCRATCH                  3,  9, rsp+1*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  0, 10, rsp+2*mmsize
+%endif
+    mova                    m6, [lq+mmsize*3]       ; l[24-31]
+    PALIGNR                 m5, m6, 2, m0           ; l[25-31]*
+    PALIGNR                 m4, m5, 2, m0           ; l[26-31]*a
+    LOWPASS                  4,  5, 6               ; L[25-31]#
+    mova                    m7, [lq+mmsize*2]       ; l[16-23]
+    PALIGNR                 m6, m7, 2, m0           ; l[17-24]
+    PALIGNR                 m5, m6, 2, m0           ; l[18-25]
+    LOWPASS                  5,  6, 7               ; L[17-24]
+    mova                    m1, [lq+mmsize*1]       ; l[8-15]
+    PALIGNR                 m7, m1, 2, m0           ; l[9-16]
+    PALIGNR                 m6, m7, 2, m0           ; l[10-17]
+    LOWPASS                  6,  7, 1               ; L[9-16]
+    mova                    m3, [lq+mmsize*0]       ; l[0-7]
+    PALIGNR                 m1, m3, 2, m0           ; l[1-8]
+    PALIGNR                 m7, m1, 2, m0           ; l[2-9]
+    LOWPASS                  7,  1, 3               ; L[1-8]
+%if cpuflag(ssse3)
+%if cpuflag(avx)
+    UNSCRATCH                1,  8, rsp+0*mmsize
+%endif
+    UNSCRATCH                3,  9, rsp+1*mmsize
+%else
+    UNSCRATCH                0, 10, rsp+2*mmsize
+%endif
+    DEFINE_ARGS dst8, stride, stride8, stride24, cnt
+    lea               stride8q, [strideq*8]
+    lea              stride24q, [stride8q*3]
+    lea                  dst8q, [dst8q+strideq*8]
+    mov                   cntd, 8
+
+.loop:
+    sub                  dst8q, strideq
+%if notcpuflag(avx)
+    UNSCRATCH                1,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                3,  9, rsp+1*mmsize
+%endif
+%endif
+    mova [dst8q+stride8q*0+ 0], m4
+    mova [dst8q+stride8q*0+16], m3
+    mova [dst8q+stride8q*0+32], m1
+    mova [dst8q+stride8q*0+48], m0
+    mova [dst8q+stride8q*1+ 0], m5
+    mova [dst8q+stride8q*1+16], m4
+    mova [dst8q+stride8q*1+32], m3
+    mova [dst8q+stride8q*1+48], m1
+    mova [dst8q+stride8q*2+ 0], m6
+    mova [dst8q+stride8q*2+16], m5
+    mova [dst8q+stride8q*2+32], m4
+    mova [dst8q+stride8q*2+48], m3
+    mova [dst8q+stride24q + 0], m7
+    mova [dst8q+stride24q +16], m6
+    mova [dst8q+stride24q +32], m5
+    mova [dst8q+stride24q +48], m4
+%if cpuflag(avx)
+    vpalignr                m7, m6, m7, 2
+    vpalignr                m6, m5, m6, 2
+    vpalignr                m5, m4, m5, 2
+    vpalignr                m4, m3, m4, 2
+    vpalignr                m3, m1, m3, 2
+    vpalignr                m1, m0, m1, 2
+    vpalignr                m0, m2, m0, 2
+%else
+    SCRATCH                  2,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  0,  9, rsp+1*mmsize
+%endif
+    PALIGNR                 m2, m6, m7, 2, m0
+    mova                    m7, m2
+    PALIGNR                 m2, m5, m6, 2, m0
+    mova                    m6, m2
+    PALIGNR                 m2, m4, m5, 2, m0
+    mova                    m5, m2
+    PALIGNR                 m2, m3, m4, 2, m0
+    mova                    m4, m2
+    PALIGNR                 m2, m1, m3, 2, m0
+    mova                    m3, m2
+%if notcpuflag(ssse3)
+    UNSCRATCH                0,  9, rsp+1*mmsize
+    SCRATCH                  3,  9, rsp+1*mmsize
+%endif
+    PALIGNR                 m2, m0, m1, 2, m3
+    mova                    m1, m2
+    UNSCRATCH                2,  8, rsp+0*mmsize
+    SCRATCH                  1,  8, rsp+0*mmsize
+    PALIGNR                 m1, m2, m0, 2, m3
+    mova                    m0, m1
+%endif
+    psrldq                  m2, 2
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DR_FUNCS 3
+INIT_XMM ssse3
+DR_FUNCS 2
+INIT_XMM avx
+DR_FUNCS 2
+
+%macro VL_FUNCS 1 ; stack_mem_for_32x32_32bit_function
+cglobal vp9_ipred_vl_4x4_16, 2, 4, 3, dst, stride, l, a
+    movifnidn               aq, amp
+    movu                    m0, [aq]                ; abcdefgh
+    psrldq                  m1, m0, 2               ; bcdefgh.
+    psrldq                  m2, m0, 4               ; cdefgh..
+    LOWPASS                  2,  1, 0               ; BCDEFGH.
+    pavgw                   m1, m0                  ; ABCDEFG.
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+strideq*0], m1
+    movh      [dstq+strideq*1], m2
+    psrldq                  m1, 2
+    psrldq                  m2, 2
+    movh      [dstq+strideq*2], m1
+    movh      [dstq+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_vl_8x8_16, 2, 4, 4, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]                ; abcdefgh
+%if cpuflag(ssse3)
+    mova                    m3, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m2, m0, m3          ; bcdefghh/cdefghhh
+    LOWPASS                  2,  1, 0               ; BCDEFGHh
+    pavgw                   m1, m0                  ; ABCDEFGh
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*1], m2
+    SHIFT_RIGHT             m1, m1, m3
+    SHIFT_RIGHT             m2, m2, m3
+    mova      [dstq+strideq*2], m1
+    mova      [dstq+stride3q ], m2
+    lea                   dstq, [dstq+strideq*4]
+    SHIFT_RIGHT             m1, m1, m3
+    SHIFT_RIGHT             m2, m2, m3
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*1], m2
+    SHIFT_RIGHT             m1, m1, m3
+    SHIFT_RIGHT             m2, m2, m3
+    mova      [dstq+strideq*2], m1
+    mova      [dstq+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_vl_16x16_16, 2, 4, 6, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    mova                    m1, [aq+mmsize]
+    PALIGNR                 m2, m1, m0, 2, m3
+    PALIGNR                 m3, m1, m0, 4, m4
+    LOWPASS                  3,  2,  0
+    pavgw                   m2, m0
+%if cpuflag(ssse3)
+    mova                    m4, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m5, m0, m1, m4
+    LOWPASS                  0,  5,  1
+    pavgw                   m1, m5
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 8
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m2
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*1+ 0], m3
+    mova   [dstq+strideq*1+16], m0
+    lea                   dstq, [dstq+strideq*2]
+%if cpuflag(avx)
+    vpalignr                m2, m1, m2, 2
+    vpalignr                m3, m0, m3, 2
+%else
+    PALIGNR                 m5, m1, m2, 2, m4
+    mova                    m2, m5
+    PALIGNR                 m5, m0, m3, 2, m4
+    mova                    m3, m5
+%endif
+    SHIFT_RIGHT             m1, m1, m4
+    SHIFT_RIGHT             m0, m0, m4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_vl_32x32_16, 2, 5, 11, %1 * mmsize * ARCH_X86_32, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq+mmsize*0]
+    mova                    m1, [aq+mmsize*1]
+    mova                    m2, [aq+mmsize*2]
+    PALIGNR                 m6, m1, m0, 2, m5
+    PALIGNR                 m7, m1, m0, 4, m5
+    LOWPASS                  7,  6,  0
+    pavgw                   m6, m0
+    SCRATCH                  6,  8, rsp+0*mmsize
+    PALIGNR                 m4, m2, m1, 2, m0
+    PALIGNR                 m5, m2, m1, 4, m0
+    LOWPASS                  5,  4,  1
+    pavgw                   m4, m1
+    mova                    m0, [aq+mmsize*3]
+    PALIGNR                 m1, m0, m2, 2, m6
+    PALIGNR                 m3, m0, m2, 4, m6
+    LOWPASS                  3,  1,  2
+    pavgw                   m2, m1
+%if cpuflag(ssse3)
+    PRELOAD                 10, pb_2to15_14_15, shuf
+%endif
+    SHIFT_RIGHTx2           m6, m1, m0, reg_shuf
+    LOWPASS                  1,  6,  0
+    pavgw                   m0, m6
+%if ARCH_X86_64
+    pshufd                  m9, m6, q3333
+%endif
+%if cpuflag(avx)
+    UNSCRATCH                6,  8, rsp+0*mmsize
+%endif
+    DEFINE_ARGS dst, stride, cnt, stride16, stride17
+    mov              stride16q, strideq
+    mov                   cntd, 8
+    shl              stride16q, 4
+    lea              stride17q, [stride16q+strideq]
+
+    ; FIXME m8 is unused for avx, so we could save one register here for win64
+.loop:
+%if notcpuflag(avx)
+    UNSCRATCH                6,  8, rsp+0*mmsize
+%endif
+    mova   [dstq+strideq*0+ 0], m6
+    mova   [dstq+strideq*0+16], m4
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m7
+    mova   [dstq+strideq*1+16], m5
+    mova   [dstq+strideq*1+32], m3
+    mova   [dstq+strideq*1+48], m1
+    mova   [dstq+stride16q+ 0], m4
+    mova   [dstq+stride16q+16], m2
+    mova   [dstq+stride16q+32], m0
+%if ARCH_X86_64
+    mova   [dstq+stride16q+48], m9
+%endif
+    mova   [dstq+stride17q+ 0], m5
+    mova   [dstq+stride17q+16], m3
+    mova   [dstq+stride17q+32], m1
+%if ARCH_X86_64
+    mova   [dstq+stride17q+48], m9
+%endif
+    lea                   dstq, [dstq+strideq*2]
+%if cpuflag(avx)
+    vpalignr                m6, m4, m6, 2
+    vpalignr                m4, m2, m4, 2
+    vpalignr                m2, m0, m2, 2
+    vpalignr                m7, m5, m7, 2
+    vpalignr                m5, m3, m5, 2
+    vpalignr                m3, m1, m3, 2
+%else
+    SCRATCH                  3,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  1, 10, rsp+1*mmsize
+%endif
+    PALIGNR                 m3, m4, m6, 2, m1
+    mova                    m6, m3
+    PALIGNR                 m3, m2, m4, 2, m1
+    mova                    m4, m3
+    PALIGNR                 m3, m0, m2, 2, m1
+    mova                    m2, m3
+    PALIGNR                 m3, m5, m7, 2, m1
+    mova                    m7, m3
+    UNSCRATCH                3,  8, rsp+0*mmsize
+    SCRATCH                  6,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                1, 10, rsp+1*mmsize
+    SCRATCH                  7, 10, rsp+1*mmsize
+%endif
+    PALIGNR                 m6, m3, m5, 2, m7
+    mova                    m5, m6
+    PALIGNR                 m6, m1, m3, 2, m7
+    mova                    m3, m6
+%if notcpuflag(ssse3)
+    UNSCRATCH                7, 10, rsp+1*mmsize
+%endif
+%endif
+    SHIFT_RIGHT             m1, m1, reg_shuf
+    SHIFT_RIGHT             m0, m0, reg_shuf
+    dec                   cntd
+    jg .loop
+
+%if ARCH_X86_32
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+%assign %%n 0
+%rep 4
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+48], m0
+    mova   [dstq+strideq*2+48], m0
+    mova   [dstq+stride3q +48], m0
+%if %%n < 3
+    lea                   dstq, [dstq+strideq*4]
+%endif
+%assign %%n (%%n+1)
+%endrep
+%endif
+    RET
+%endmacro
+
+INIT_XMM sse2
+VL_FUNCS 2
+INIT_XMM ssse3
+VL_FUNCS 1
+INIT_XMM avx
+VL_FUNCS 1
+
+%macro VR_FUNCS 0
+cglobal vp9_ipred_vr_4x4_16, 4, 4, 3, dst, stride, l, a
+    movu                    m0, [aq-2]
+    movhps                  m1, [lq]
+    PALIGNR                 m0, m1, 10, m2          ; xyz*abcd
+    pslldq                  m1, m0, 2               ; .xyz*abc
+    pslldq                  m2, m0, 4               ; ..xyz*ab
+    LOWPASS                  2,  1, 0               ; ..YZ#ABC
+    pavgw                   m1, m0                  ; ....#ABC
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movhps    [dstq+strideq*0], m1
+    movhps    [dstq+strideq*1], m2
+    shufps                  m0, m2, m1, q3210
+%if cpuflag(ssse3)
+    pshufb                  m2, [pb_4_5_8to13_8x0]
+%else
+    pshuflw                 m2, m2, q2222
+    psrldq                  m2, 6
+%endif
+    psrldq                  m0, 6
+    movh      [dstq+strideq*2], m0
+    movh      [dstq+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_vr_8x8_16, 4, 4, 5, dst, stride, l, a
+    movu                    m1, [aq-2]              ; *abcdefg
+    movu                    m2, [lq]                ; stuvwxyz
+    mova                    m0, [aq]                ; abcdefgh
+    PALIGNR                 m3, m1, m2, 14, m4      ; z*abcdef
+    LOWPASS                  3,  1,  0
+    pavgw                   m0, m1
+    PALIGNR                 m1, m2,  2, m4          ; tuvwxyz*
+    pslldq                  m4, m2,  2              ; .stuvwxy
+    LOWPASS                  4,  2,  1
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m3
+    PALIGNR                 m0, m4, 14, m1
+    pslldq                  m4, 2
+    PALIGNR                 m3, m4, 14, m1
+    pslldq                  m4, 2
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    PALIGNR                 m0, m4, 14, m1
+    pslldq                  m4, 2
+    PALIGNR                 m3, m4, 14, m1
+    pslldq                  m4, 2
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m3
+    PALIGNR                 m0, m4, 14, m1
+    pslldq                  m4, 2
+    PALIGNR                 m3, m4, 14, m4
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m3
+    RET
+
+cglobal vp9_ipred_vr_16x16_16, 4, 4, 8, dst, stride, l, a
+    movu                    m1, [aq-2]              ; *abcdefg
+    movu                    m2, [aq+mmsize-2]       ; hijklmno
+    mova                    m3, [aq]                ; abcdefgh
+    mova                    m4, [aq+mmsize]         ; ijklmnop
+    mova                    m5, [lq+mmsize]         ; stuvwxyz
+    PALIGNR                 m0, m1, m5, 14, m6      ; z*abcdef
+    movu                    m6, [aq+mmsize-4]       ; ghijklmn
+    LOWPASS                  6,  2,  4
+    pavgw                   m2, m4
+    LOWPASS                  0,  1,  3
+    pavgw                   m3, m1
+    PALIGNR                 m1, m5,  2, m7          ; tuvwxyz*
+    movu                    m7, [lq+mmsize-2]       ; rstuvwxy
+    LOWPASS                  1,  5,  7
+    movu                    m5, [lq+2]              ; lmnopqrs
+    pslldq                  m4, m5,  2              ; .lmnopqr
+    pslldq                  m7, m5,  4              ; ..lmnopq
+    LOWPASS                  5,  4,  7
+    psrld                   m4, m1, 16
+    psrld                   m7, m5, 16
+    pand                    m1, [pd_65535]
+    pand                    m5, [pd_65535]
+    packssdw                m7, m4
+    packssdw                m5, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 8
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m3
+    mova   [dstq+strideq*0+16], m2
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m6
+    lea                   dstq, [dstq+strideq*2]
+    PALIGNR                 m2, m3, 14, m4
+    PALIGNR                 m3, m7, 14, m4
+    pslldq                  m7, 2
+    PALIGNR                 m6, m0, 14, m4
+    PALIGNR                 m0, m5, 14, m4
+    pslldq                  m5, 2
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_vr_32x32_16, 4, 5, 14, 6 * mmsize * ARCH_X86_32, dst, stride, l, a
+    movu                    m0, [aq+mmsize*0-2]     ; *a[0-6]
+    movu                    m1, [aq+mmsize*1-2]     ; a[7-14]
+    movu                    m2, [aq+mmsize*2-2]     ; a[15-22]
+    movu                    m3, [aq+mmsize*3-2]     ; a[23-30]
+    mova                    m4, [aq+mmsize*3+0]     ; a[24-31]
+    movu                    m5, [aq+mmsize*3-4]     ; a[22-29]
+    LOWPASS                  5,  3,  4              ; A[23-30]
+    SCRATCH                  5,  8, rsp+0*mmsize
+    pavgw                   m3, m4
+    mova                    m4, [aq+mmsize*2+0]     ; a[16-23]
+    movu                    m6, [aq+mmsize*2-4]     ; a[14-21]
+    LOWPASS                  6,  2,  4              ; A[15-22]
+    SCRATCH                  6,  9, rsp+1*mmsize
+    pavgw                   m2, m4
+    mova                    m4, [aq+mmsize*1+0]     ; a[8-15]
+    movu                    m7, [aq+mmsize*1-4]     ; a[6-13]
+    LOWPASS                  7,  1,  4              ; A[7-14]
+    SCRATCH                  7, 10, rsp+2*mmsize
+    pavgw                   m1, m4
+    mova                    m4, [aq+mmsize*0+0]     ; a[0-7]
+    mova                    m5, [lq+mmsize*3+0]     ; l[24-31]
+    PALIGNR                 m6, m0, m5, 14, m7      ; l[31]*a[0-5]
+    LOWPASS                  6,  0,  4              ; #A[0-6]
+    SCRATCH                  6, 11, rsp+3*mmsize
+    pavgw                   m4, m0
+    PALIGNR                 m0, m5,  2, m7          ; l[25-31]*
+    movu                    m7, [lq+mmsize*3-2]     ; l[23-30]
+    LOWPASS                  0,  5,  7              ; L[24-31]
+    movu                    m5, [lq+mmsize*2-2]     ; l[15-22]
+    mova                    m7, [lq+mmsize*2+0]     ; l[16-23]
+    movu                    m6, [lq+mmsize*2+2]     ; l[17-24]
+    LOWPASS                  5,  7,  6              ; L[16-23]
+    psrld                   m7, m0, 16
+    psrld                   m6, m5, 16
+    pand                    m0, [pd_65535]
+    pand                    m5, [pd_65535]
+    packssdw                m6, m7
+    packssdw                m5, m0
+    SCRATCH                  5, 12, rsp+4*mmsize
+    SCRATCH                  6, 13, rsp+5*mmsize
+    movu                    m6, [lq+mmsize*1-2]     ; l[7-14]
+    mova                    m0, [lq+mmsize*1+0]     ; l[8-15]
+    movu                    m5, [lq+mmsize*1+2]     ; l[9-16]
+    LOWPASS                  6,  0,  5              ; L[8-15]
+    movu                    m0, [lq+mmsize*0+2]     ; l[1-8]
+    pslldq                  m5, m0,  2              ; .l[1-7]
+    pslldq                  m7, m0,  4              ; ..l[1-6]
+    LOWPASS                  0,  5,  7
+    psrld                   m5, m6, 16
+    psrld                   m7, m0, 16
+    pand                    m6, [pd_65535]
+    pand                    m0, [pd_65535]
+    packssdw                m7, m5
+    packssdw                m0, m6
+    UNSCRATCH                6, 13, rsp+5*mmsize
+    DEFINE_ARGS dst, stride, stride16, cnt, stride17
+    mov              stride16q, strideq
+    mov                   cntd, 8
+    shl              stride16q, 4
+%if ARCH_X86_64
+    lea              stride17q, [stride16q+strideq]
+%endif
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m4
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m3
+%if ARCH_X86_64
+    mova   [dstq+strideq*1+ 0], m11
+    mova   [dstq+strideq*1+16], m10
+    mova   [dstq+strideq*1+32], m9
+    mova   [dstq+strideq*1+48], m8
+%endif
+    mova   [dstq+stride16q+ 0], m6
+    mova   [dstq+stride16q+16], m4
+    mova   [dstq+stride16q+32], m1
+    mova   [dstq+stride16q+48], m2
+%if ARCH_X86_64
+    mova   [dstq+stride17q+ 0], m12
+    mova   [dstq+stride17q+16], m11
+    mova   [dstq+stride17q+32], m10
+    mova   [dstq+stride17q+48], m9
+%endif
+    lea                   dstq, [dstq+strideq*2]
+    PALIGNR                 m3, m2,  14, m5
+    PALIGNR                 m2, m1,  14, m5
+    PALIGNR                 m1, m4,  14, m5
+    PALIGNR                 m4, m6,  14, m5
+    PALIGNR                 m6, m7,  14, m5
+    pslldq                  m7, 2
+%if ARCH_X86_64
+    PALIGNR                 m8, m9,  14, m5
+    PALIGNR                 m9, m10, 14, m5
+    PALIGNR                m10, m11, 14, m5
+    PALIGNR                m11, m12, 14, m5
+    PALIGNR                m12, m0,  14, m5
+    pslldq                  m0, 2
+%endif
+    dec                   cntd
+    jg .loop
+
+%if ARCH_X86_32
+    UNSCRATCH                5, 12, rsp+4*mmsize
+    UNSCRATCH                4, 11, rsp+3*mmsize
+    UNSCRATCH                3, 10, rsp+2*mmsize
+    UNSCRATCH                2,  9, rsp+1*mmsize
+    UNSCRATCH                1,  8, rsp+0*mmsize
+    mov                   dstq, dstm
+    mov                   cntd, 8
+    add                   dstq, strideq
+.loop2:
+    mova   [dstq+strideq*0+ 0], m4
+    mova   [dstq+strideq*0+16], m3
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m1
+    mova   [dstq+stride16q+ 0], m5
+    mova   [dstq+stride16q+16], m4
+    mova   [dstq+stride16q+32], m3
+    mova   [dstq+stride16q+48], m2
+    lea                   dstq, [dstq+strideq*2]
+    PALIGNR                 m1, m2,  14, m6
+    PALIGNR                 m2, m3,  14, m6
+    PALIGNR                 m3, m4,  14, m6
+    PALIGNR                 m4, m5,  14, m6
+    PALIGNR                 m5, m0,  14, m6
+    pslldq                  m0, 2
+    dec                   cntd
+    jg .loop2
+%endif
+    RET
+%endmacro
+
+INIT_XMM sse2
+VR_FUNCS
+INIT_XMM ssse3
+VR_FUNCS
+INIT_XMM avx
+VR_FUNCS
+
+%macro HU_FUNCS 1 ; stack_mem_for_32x32_32bit_function
+cglobal vp9_ipred_hu_4x4_16, 3, 3, 3, dst, stride, l, a
+    movh                    m0, [lq]                ; abcd
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_0to7_67x4]      ; abcddddd
+%else
+    punpcklqdq              m0, m0
+    pshufhw                 m0, m0, q3333           ; abcddddd
+%endif
+    psrldq                  m1, m0,  2              ; bcddddd.
+    psrldq                  m2, m0,  4              ; cddddd..
+    LOWPASS                  2,  1,  0              ; BCDddd..
+    pavgw                   m1, m0                  ; abcddddd
+    SBUTTERFLY          wd,  1,  2,  0              ; aBbCcDdd, dddddddd
+    PALIGNR                 m2, m1,  4, m0          ; bCcDdddd
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+strideq*0], m1                  ; aBbC
+    movh      [dstq+strideq*1], m2                  ; bCcD
+    movhps    [dstq+strideq*2], m1                  ; cDdd
+    movhps    [dstq+stride3q ], m2                  ; dddd
+    RET
+
+cglobal vp9_ipred_hu_8x8_16, 3, 3, 4, dst, stride, l, a
+    mova                    m0, [lq]
+%if cpuflag(ssse3)
+    mova                    m3, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m2, m0, m3
+    LOWPASS                  2,  1,  0
+    pavgw                   m1, m0
+    SBUTTERFLY          wd,  1,  2,  0
+    shufps                  m0, m1, m2, q1032
+    pshufd                  m3, m2, q3332
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    mova     [dstq+strideq *0], m1
+    mova     [dstq+strideq *2], m0
+    mova     [dstq+strideq *4], m2
+    mova     [dstq+stride3q*2], m3
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m1, m2, m1, 4
+%else
+    PALIGNR                 m0, m2, m1, 4, m3
+    mova                    m1, m0
+%endif
+    pshufd                  m2, m2, q3321
+    shufps                  m0, m1, m2, q1032
+    pshufd                  m3, m2, q3332
+    mova     [dstq+strideq *0], m1
+    mova     [dstq+strideq *2], m0
+    mova     [dstq+strideq *4], m2
+    mova     [dstq+stride3q*2], m3
+    RET
+
+cglobal vp9_ipred_hu_16x16_16, 3, 4, 6 + notcpuflag(ssse3), dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m3, [lq+mmsize]
+    movu                    m1, [lq+2]
+    movu                    m2, [lq+4]
+    LOWPASS                  2,  1,  0
+    pavgw                   m1, m0
+    SBUTTERFLY           wd, 1,  2,  0
+%if cpuflag(ssse3)
+    mova                    m5, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m0, m4, m3, m5
+    LOWPASS                  4,  0,  3
+    pavgw                   m3, m0
+    SBUTTERFLY           wd, 3,  4,  5
+    pshufd                  m0, m0, q3333
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+
+.loop:
+    mova  [dstq+strideq *0+ 0], m1
+    mova  [dstq+strideq *0+16], m2
+    mova  [dstq+strideq *4+ 0], m2
+    mova  [dstq+strideq *4+16], m3
+    mova  [dstq+strideq *8+ 0], m3
+    mova  [dstq+strideq *8+16], m4
+    mova  [dstq+stride3q*4+ 0], m4
+    mova  [dstq+stride3q*4+16], m0
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m1, m2, m1, 4
+    vpalignr                m2, m3, m2, 4
+    vpalignr                m3, m4, m3, 4
+    vpalignr                m4, m0, m4, 4
+%else
+    PALIGNR                 m5, m2, m1, 4, m6
+    mova                    m1, m5
+    PALIGNR                 m5, m3, m2, 4, m6
+    mova                    m2, m5
+    PALIGNR                 m5, m4, m3, 4, m6
+    mova                    m3, m5
+    PALIGNR                 m5, m0, m4, 4, m6
+    mova                    m4, m5
+%endif
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hu_32x32_16, 3, 7, 10 + notcpuflag(ssse3), \
+                               %1 * -mmsize * ARCH_X86_32, dst, stride, l, a
+    mova                    m2, [lq+mmsize*0+0]
+    movu                    m1, [lq+mmsize*0+2]
+    movu                    m0, [lq+mmsize*0+4]
+    LOWPASS                  0,  1,  2
+    pavgw                   m1, m2
+    SBUTTERFLY           wd, 1,  0,  2
+    SCRATCH                  1,  8, rsp+0*mmsize
+    mova                    m4, [lq+mmsize*1+0]
+    movu                    m3, [lq+mmsize*1+2]
+    movu                    m2, [lq+mmsize*1+4]
+    LOWPASS                  2,  3,  4
+    pavgw                   m3, m4
+    SBUTTERFLY           wd, 3,  2,  4
+    mova                    m6, [lq+mmsize*2+0]
+    movu                    m5, [lq+mmsize*2+2]
+    movu                    m4, [lq+mmsize*2+4]
+    LOWPASS                  4,  5,  6
+    pavgw                   m5, m6
+    SBUTTERFLY           wd, 5,  4,  6
+    mova                    m7, [lq+mmsize*3+0]
+    SCRATCH                  0,  9, rsp+1*mmsize
+%if cpuflag(ssse3)
+    mova                    m0, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m6, m7, m0
+    LOWPASS                  6,  1,  7
+    pavgw                   m7, m1
+    SBUTTERFLY           wd, 7,  6,  0
+    pshufd                  m1, m1, q3333
+    UNSCRATCH                0,  9, rsp+1*mmsize
+    DEFINE_ARGS dst, stride, cnt, stride3, stride4, stride20, stride28
+    lea               stride3q, [strideq*3]
+    lea               stride4q, [strideq*4]
+    lea              stride28q, [stride4q*8]
+    lea              stride20q, [stride4q*5]
+    sub              stride28q, stride4q
+    mov                   cntd, 4
+
+.loop:
+%if ARCH_X86_64
+    SWAP                     1,  8
+%else
+    mova        [rsp+1*mmsize], m1
+    mova                    m1, [rsp+0*mmsize]
+%endif
+    mova  [dstq+strideq *0+ 0], m1
+    mova  [dstq+strideq *0+16], m0
+    mova  [dstq+strideq *0+32], m3
+    mova  [dstq+strideq *0+48], m2
+    mova  [dstq+stride4q*1+ 0], m0
+    mova  [dstq+stride4q*1+16], m3
+    mova  [dstq+stride4q*1+32], m2
+    mova  [dstq+stride4q*1+48], m5
+    mova  [dstq+stride4q*2+ 0], m3
+    mova  [dstq+stride4q*2+16], m2
+    mova  [dstq+stride4q*2+32], m5
+    mova  [dstq+stride4q*2+48], m4
+%if cpuflag(avx)
+    vpalignr                m1, m0, m1, 4
+    vpalignr                m0, m3, m0, 4
+    vpalignr                m3, m2, m3, 4
+%else
+    SCRATCH                  6,  9, rsp+2*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  7, 10, rsp+3*mmsize
+%endif
+    PALIGNR                 m6, m0, m1, 4, m7
+    mova                    m1, m6
+    PALIGNR                 m6, m3, m0, 4, m7
+    mova                    m0, m6
+    PALIGNR                 m6, m2, m3, 4, m7
+    mova                    m3, m6
+    UNSCRATCH                6,  9, rsp+2*mmsize
+    SCRATCH                  0,  9, rsp+2*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                7, 10, rsp+3*mmsize
+    SCRATCH                  3, 10, rsp+3*mmsize
+%endif
+%endif
+%if ARCH_X86_64
+    SWAP                     1,  8
+%else
+    mova        [rsp+0*mmsize], m1
+    mova                    m1, [rsp+1*mmsize]
+%endif
+    mova  [dstq+stride3q*4+ 0], m2
+    mova  [dstq+stride3q*4+16], m5
+    mova  [dstq+stride3q*4+32], m4
+    mova  [dstq+stride3q*4+48], m7
+    mova  [dstq+stride4q*4+ 0], m5
+    mova  [dstq+stride4q*4+16], m4
+    mova  [dstq+stride4q*4+32], m7
+    mova  [dstq+stride4q*4+48], m6
+    mova  [dstq+stride20q + 0], m4
+    mova  [dstq+stride20q +16], m7
+    mova  [dstq+stride20q +32], m6
+    mova  [dstq+stride20q +48], m1
+    mova  [dstq+stride3q*8+ 0], m7
+    mova  [dstq+stride3q*8+16], m6
+    mova  [dstq+stride3q*8+32], m1
+    mova  [dstq+stride3q*8+48], m1
+    mova  [dstq+stride28q + 0], m6
+    mova  [dstq+stride28q +16], m1
+    mova  [dstq+stride28q +32], m1
+    mova  [dstq+stride28q +48], m1
+%if cpuflag(avx)
+    vpalignr                m2, m5, m2, 4
+    vpalignr                m5, m4, m5, 4
+    vpalignr                m4, m7, m4, 4
+    vpalignr                m7, m6, m7, 4
+    vpalignr                m6, m1, m6, 4
+%else
+    PALIGNR                 m0, m5, m2, 4, m3
+    mova                    m2, m0
+    PALIGNR                 m0, m4, m5, 4, m3
+    mova                    m5, m0
+    PALIGNR                 m0, m7, m4, 4, m3
+    mova                    m4, m0
+    PALIGNR                 m0, m6, m7, 4, m3
+    mova                    m7, m0
+    PALIGNR                 m0, m1, m6, 4, m3
+    mova                    m6, m0
+    UNSCRATCH                0,  9, rsp+2*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                3, 10, rsp+3*mmsize
+%endif
+%endif
+    add                   dstq, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+HU_FUNCS 4
+INIT_XMM ssse3
+HU_FUNCS 3
+INIT_XMM avx
+HU_FUNCS 2
+
+%macro HD_FUNCS 0
+cglobal vp9_ipred_hd_4x4_16, 4, 4, 4, dst, stride, l, a
+    movh                    m0, [lq]
+    movhps                  m0, [aq-2]
+    psrldq                  m1, m0, 2
+    psrldq                  m2, m0, 4
+    LOWPASS                  2,  1,  0
+    pavgw                   m1, m0
+    punpcklwd               m1, m2
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+stride3q ], m1
+    movhps    [dstq+strideq*1], m1
+    movhlps                 m2, m2
+    PALIGNR                 m2, m1, 4, m0
+    movh      [dstq+strideq*2], m2
+    movhps    [dstq+strideq*0], m2
+    RET
+
+cglobal vp9_ipred_hd_8x8_16, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [lq]
+    movu                    m1, [aq-2]
+    PALIGNR                 m2, m1, m0, 2, m3
+    PALIGNR                 m3, m1, m0, 4, m4
+    LOWPASS                  3,  2,  0
+    pavgw                   m2, m0
+    SBUTTERFLY           wd, 2,  3,  0
+    psrldq                  m0, m1,  2
+    psrldq                  m4, m1,  4
+    LOWPASS                  1,  0,  4
+    DEFINE_ARGS dst8, mstride, cnt
+    lea                  dst8q, [dst8q+mstrideq*8]
+    neg               mstrideq
+    mov                   cntd, 4
+
+.loop:
+    add                  dst8q, mstrideq
+    mova    [dst8q+mstrideq*0], m2
+    mova    [dst8q+mstrideq*4], m3
+%if cpuflag(avx)
+    vpalignr                m2, m3, m2, 4
+    vpalignr                m3, m1, m3, 4
+%else
+    PALIGNR                 m0, m3, m2, 4, m4
+    mova                    m2, m0
+    PALIGNR                 m0, m1, m3, 4, m4
+    mova                    m3, m0
+%endif
+    psrldq                  m1, 4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hd_16x16_16, 4, 4, 8, dst, stride, l, a
+    mova                    m2, [lq]
+    movu                    m1, [lq+2]
+    movu                    m0, [lq+4]
+    LOWPASS                  0,  1,  2
+    pavgw                   m1, m2
+    mova                    m4, [lq+mmsize]
+    movu                    m5, [aq-2]
+    PALIGNR                 m3, m5, m4, 2, m6
+    PALIGNR                 m2, m5, m4, 4, m6
+    LOWPASS                  2,  3,  4
+    pavgw                   m3, m4
+    SBUTTERFLY           wd, 1,  0,  4
+    SBUTTERFLY           wd, 3,  2,  4
+    mova                    m6, [aq]
+    movu                    m4, [aq+2]
+    LOWPASS                  4,  6,  5
+    movu                    m5, [aq+mmsize-2]
+    psrldq                  m6, m5,  2
+    psrldq                  m7, m5,  4
+    LOWPASS                  5,  6,  7
+    DEFINE_ARGS dst, mstride, mstride3, cnt
+    lea                   dstq, [dstq+mstrideq*8]
+    lea                   dstq, [dstq+mstrideq*8]
+    neg               mstrideq
+    lea              mstride3q, [mstrideq*3]
+    mov                   cntd, 4
+
+.loop:
+    add                  dstq, mstrideq
+    mova [dstq+mstride3q*4+ 0], m2
+    mova [dstq+mstride3q*4+16], m4
+    mova [dstq+mstrideq *8+ 0], m3
+    mova [dstq+mstrideq *8+16], m2
+    mova [dstq+mstrideq *4+ 0], m0
+    mova [dstq+mstrideq *4+16], m3
+    mova [dstq+mstrideq *0+ 0], m1
+    mova [dstq+mstrideq *0+16], m0
+%if cpuflag(avx)
+    vpalignr                m1, m0, m1, 4
+    vpalignr                m0, m3, m0, 4
+    vpalignr                m3, m2, m3, 4
+    vpalignr                m2, m4, m2, 4
+    vpalignr                m4, m5, m4, 4
+%else
+    PALIGNR                 m6, m0, m1, 4, m7
+    mova                    m1, m6
+    PALIGNR                 m6, m3, m0, 4, m7
+    mova                    m0, m6
+    PALIGNR                 m6, m2, m3, 4, m7
+    mova                    m3, m6
+    PALIGNR                 m6, m4, m2, 4, m7
+    mova                    m2, m6
+    PALIGNR                 m6, m5, m4, 4, m7
+    mova                    m4, m6
+%endif
+    psrldq                  m5, 4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hd_32x32_16, 4, 4 + 3 * ARCH_X86_64, 14, \
+                               10 * -mmsize * ARCH_X86_32, dst, stride, l, a
+    mova                    m2, [lq+mmsize*0+0]
+    movu                    m1, [lq+mmsize*0+2]
+    movu                    m0, [lq+mmsize*0+4]
+    LOWPASS                  0,  1,  2
+    pavgw                   m1, m2
+    SBUTTERFLY           wd, 1,  0,  2
+    mova                    m4, [lq+mmsize*1+0]
+    movu                    m3, [lq+mmsize*1+2]
+    movu                    m2, [lq+mmsize*1+4]
+    LOWPASS                  2,  3,  4
+    pavgw                   m3, m4
+    SBUTTERFLY           wd, 3,  2,  4
+    SCRATCH                  0,  8, rsp+0*mmsize
+    SCRATCH                  1,  9, rsp+1*mmsize
+    SCRATCH                  2, 10, rsp+2*mmsize
+    SCRATCH                  3, 11, rsp+3*mmsize
+    mova                    m6, [lq+mmsize*2+0]
+    movu                    m5, [lq+mmsize*2+2]
+    movu                    m4, [lq+mmsize*2+4]
+    LOWPASS                  4,  5,  6
+    pavgw                   m5, m6
+    SBUTTERFLY           wd, 5,  4,  6
+    mova                    m0, [lq+mmsize*3+0]
+    movu                    m1, [aq+mmsize*0-2]
+    PALIGNR                 m7, m1, m0, 2, m2
+    PALIGNR                 m6, m1, m0, 4, m2
+    LOWPASS                  6,  7,  0
+    pavgw                   m7, m0
+    SBUTTERFLY           wd, 7,  6,  0
+    mova                    m2, [aq+mmsize*0+0]
+    movu                    m0, [aq+mmsize*0+2]
+    LOWPASS                  0,  2,  1
+    movu                    m1, [aq+mmsize*1-2]
+    mova                    m2, [aq+mmsize*1+0]
+    movu                    m3, [aq+mmsize*1+2]
+    LOWPASS                  1,  2,  3
+    SCRATCH                  6, 12, rsp+6*mmsize
+    SCRATCH                  7, 13, rsp+7*mmsize
+    movu                    m2, [aq+mmsize*2-2]
+    mova                    m3, [aq+mmsize*2+0]
+    movu                    m6, [aq+mmsize*2+2]
+    LOWPASS                  2,  3,  6
+    movu                    m3, [aq+mmsize*3-2]
+    psrldq                  m6, m3,  2
+    psrldq                  m7, m3,  4
+    LOWPASS                  3,  6,  7
+    UNSCRATCH                6, 12, rsp+6*mmsize
+    UNSCRATCH                7, 13, rsp+7*mmsize
+%if ARCH_X86_32
+    mova        [rsp+4*mmsize], m4
+    mova        [rsp+5*mmsize], m5
+    ; we already backed up m6/m7 earlier on x86-32 in SCRATCH, so we don't need
+    ; to do it again here
+%endif
+    DEFINE_ARGS dst, stride, cnt, stride3, stride4, stride20, stride28
+    mov                   cntd, 4
+    lea               stride3q, [strideq*3]
+%if ARCH_X86_64
+    lea               stride4q, [strideq*4]
+    lea              stride28q, [stride4q*8]
+    lea              stride20q, [stride4q*5]
+    sub              stride28q, stride4q
+%endif
+    add                   dstq, stride3q
+
+    ; x86-32 doesn't have enough registers, so on that platform, we split
+    ; the loop in 2... Otherwise you spend most of the loop (un)scratching
+.loop:
+%if ARCH_X86_64
+    mova  [dstq+stride28q + 0], m9
+    mova  [dstq+stride28q +16], m8
+    mova  [dstq+stride28q +32], m11
+    mova  [dstq+stride28q +48], m10
+    mova  [dstq+stride3q*8+ 0], m8
+    mova  [dstq+stride3q*8+16], m11
+    mova  [dstq+stride3q*8+32], m10
+    mova  [dstq+stride3q*8+48], m5
+    mova  [dstq+stride20q + 0], m11
+    mova  [dstq+stride20q +16], m10
+    mova  [dstq+stride20q +32], m5
+    mova  [dstq+stride20q +48], m4
+    mova  [dstq+stride4q*4+ 0], m10
+    mova  [dstq+stride4q*4+16], m5
+    mova  [dstq+stride4q*4+32], m4
+    mova  [dstq+stride4q*4+48], m7
+%endif
+    mova  [dstq+stride3q*4+ 0], m5
+    mova  [dstq+stride3q*4+16], m4
+    mova  [dstq+stride3q*4+32], m7
+    mova  [dstq+stride3q*4+48], m6
+    mova  [dstq+strideq* 8+ 0], m4
+    mova  [dstq+strideq* 8+16], m7
+    mova  [dstq+strideq* 8+32], m6
+    mova  [dstq+strideq* 8+48], m0
+    mova  [dstq+strideq* 4+ 0], m7
+    mova  [dstq+strideq* 4+16], m6
+    mova  [dstq+strideq* 4+32], m0
+    mova  [dstq+strideq* 4+48], m1
+    mova  [dstq+strideq* 0+ 0], m6
+    mova  [dstq+strideq* 0+16], m0
+    mova  [dstq+strideq* 0+32], m1
+    mova  [dstq+strideq* 0+48], m2
+    sub                   dstq, strideq
+%if cpuflag(avx)
+%if ARCH_X86_64
+    vpalignr                m9, m8,  m9,  4
+    vpalignr                m8, m11, m8,  4
+    vpalignr               m11, m10, m11, 4
+    vpalignr               m10, m5,  m10, 4
+%endif
+    vpalignr                m5, m4,  m5,  4
+    vpalignr                m4, m7,  m4,  4
+    vpalignr                m7, m6,  m7,  4
+    vpalignr                m6, m0,  m6,  4
+    vpalignr                m0, m1,  m0,  4
+    vpalignr                m1, m2,  m1,  4
+    vpalignr                m2, m3,  m2,  4
+%else
+%if ARCH_X86_64
+    PALIGNR                m12, m8,  m9,  4, m13
+    mova                    m9, m12
+    PALIGNR                m12, m11, m8,  4, m13
+    mova                    m8, m12
+    PALIGNR                m12, m10, m11, 4, m13
+    mova                   m11, m12
+    PALIGNR                m12, m5,  m10, 4, m13
+    mova                   m10, m12
+%endif
+    SCRATCH                  3, 12, rsp+8*mmsize, sh
+%if notcpuflag(ssse3)
+    SCRATCH                  2, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m3, m4,  m5,  4, m2
+    mova                    m5, m3
+    PALIGNR                 m3, m7,  m4,  4, m2
+    mova                    m4, m3
+    PALIGNR                 m3, m6,  m7,  4, m2
+    mova                    m7, m3
+    PALIGNR                 m3, m0,  m6,  4, m2
+    mova                    m6, m3
+    PALIGNR                 m3, m1,  m0,  4, m2
+    mova                    m0, m3
+%if notcpuflag(ssse3)
+    UNSCRATCH                2, 13, rsp+9*mmsize
+    SCRATCH                  0, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m3, m2,  m1,  4, m0
+    mova                    m1, m3
+    PALIGNR                 m3, reg_sh,  m2,  4, m0
+    mova                    m2, m3
+%if notcpuflag(ssse3)
+    UNSCRATCH                0, 13, rsp+9*mmsize
+%endif
+    UNSCRATCH                3, 12, rsp+8*mmsize, sh
+%endif
+    psrldq                  m3, 4
+    dec                   cntd
+    jg .loop
+
+%if ARCH_X86_32
+    UNSCRATCH                0,  8, rsp+0*mmsize
+    UNSCRATCH                1,  9, rsp+1*mmsize
+    UNSCRATCH                2, 10, rsp+2*mmsize
+    UNSCRATCH                3, 11, rsp+3*mmsize
+    mova                    m4, [rsp+4*mmsize]
+    mova                    m5, [rsp+5*mmsize]
+    mova                    m6, [rsp+6*mmsize]
+    mova                    m7, [rsp+7*mmsize]
+    DEFINE_ARGS dst, stride, stride5, stride3
+    lea               stride5q, [strideq*5]
+    lea                   dstq, [dstq+stride5q*4]
+    DEFINE_ARGS dst, stride, cnt, stride3
+    mov                   cntd, 4
+.loop_2:
+    mova  [dstq+stride3q*4+ 0], m1
+    mova  [dstq+stride3q*4+16], m0
+    mova  [dstq+stride3q*4+32], m3
+    mova  [dstq+stride3q*4+48], m2
+    mova  [dstq+strideq* 8+ 0], m0
+    mova  [dstq+strideq* 8+16], m3
+    mova  [dstq+strideq* 8+32], m2
+    mova  [dstq+strideq* 8+48], m5
+    mova  [dstq+strideq* 4+ 0], m3
+    mova  [dstq+strideq* 4+16], m2
+    mova  [dstq+strideq* 4+32], m5
+    mova  [dstq+strideq* 4+48], m4
+    mova  [dstq+strideq* 0+ 0], m2
+    mova  [dstq+strideq* 0+16], m5
+    mova  [dstq+strideq* 0+32], m4
+    mova  [dstq+strideq* 0+48], m7
+    sub                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m1, m0,  m1,  4
+    vpalignr                m0, m3,  m0,  4
+    vpalignr                m3, m2,  m3,  4
+    vpalignr                m2, m5,  m2,  4
+    vpalignr                m5, m4,  m5,  4
+    vpalignr                m4, m7,  m4,  4
+    vpalignr                m7, m6,  m7,  4
+%else
+    SCRATCH                  6, 12, rsp+8*mmsize, sh
+%if notcpuflag(ssse3)
+    SCRATCH                  7, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m6, m0,  m1,  4, m7
+    mova                    m1, m6
+    PALIGNR                 m6, m3,  m0,  4, m7
+    mova                    m0, m6
+    PALIGNR                 m6, m2,  m3,  4, m7
+    mova                    m3, m6
+    PALIGNR                 m6, m5,  m2,  4, m7
+    mova                    m2, m6
+    PALIGNR                 m6, m4,  m5,  4, m7
+    mova                    m5, m6
+%if notcpuflag(ssse3)
+    UNSCRATCH                7, 13, rsp+9*mmsize
+    SCRATCH                  5, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m6, m7,  m4,  4, m5
+    mova                    m4, m6
+    PALIGNR                 m6, reg_sh,  m7,  4, m5
+    mova                    m7, m6
+%if notcpuflag(ssse3)
+    UNSCRATCH                5, 13, rsp+9*mmsize
+%endif
+    UNSCRATCH                6, 12, rsp+8*mmsize, sh
+%endif
+    psrldq                  m6, 4
+    dec                   cntd
+    jg .loop_2
+%endif
+    RET
+%endmacro
+
+INIT_XMM sse2
+HD_FUNCS
+INIT_XMM ssse3
+HD_FUNCS
+INIT_XMM avx
+HD_FUNCS
diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm
index d9fb36f7..6d5008e3 100644
--- a/libavcodec/x86/vp9itxfm.asm
+++ b/libavcodec/x86/vp9itxfm.asm
@@ -22,53 +22,74 @@
 ;******************************************************************************
 
 %include "libavutil/x86/x86util.asm"
+%include "vp9itxfm_template.asm"
 
 SECTION_RODATA
 
-pw_11585x2:  times 8 dw 23170
-pw_m11585x2: times 8 dw -23170
-pw_m11585_11585: times 4 dw -11585, 11585
-pw_11585_11585: times 8 dw 11585
-
 %macro VP9_IDCT_COEFFS 2-3 0
-pw_%1x2:    times 8 dw  %1*2
+const pw_m%1_%2
+times 4 dw -%1,  %2
+const pw_%2_%1
+times 4 dw  %2,  %1
+
+%if %3 == 1
+const pw_m%2_m%1
+times 4 dw -%2, -%1
+%if %1 != %2
+const pw_m%2_%1
+times 4 dw -%2,  %1
+const pw_%1_%2
+times 4 dw  %1,  %2
+%endif
+%endif
+
+%if %1 < 11585
 pw_m%1x2:   times 8 dw -%1*2
+%elif %1 > 11585
+pw_%1x2:    times 8 dw  %1*2
+%else
+const pw_%1x2
+times 8 dw %1*2
+%endif
+
+%if %2 != %1
 pw_%2x2:    times 8 dw  %2*2
-pw_m%2x2:   times 8 dw -%2*2
-pw_m%1_%2:  times 4 dw -%1,  %2
-pw_%2_%1:   times 4 dw  %2,  %1
-pw_m%2_m%1: times 4 dw -%2, -%1
-%if %3 == 1
-pw_m%2_%1:  times 4 dw -%2,  %1
-pw_%1_%2:   times 4 dw  %1,  %2
 %endif
 %endmacro
 
-VP9_IDCT_COEFFS 15137,  6270, 1
-VP9_IDCT_COEFFS 16069,  3196, 1
-VP9_IDCT_COEFFS  9102, 13623, 1
+VP9_IDCT_COEFFS 16364,   804
 VP9_IDCT_COEFFS 16305,  1606
-VP9_IDCT_COEFFS 10394, 12665
+VP9_IDCT_COEFFS 16069,  3196, 1
+VP9_IDCT_COEFFS 15893,  3981
+VP9_IDCT_COEFFS 15137,  6270, 1
+VP9_IDCT_COEFFS 14811,  7005
 VP9_IDCT_COEFFS 14449,  7723
-VP9_IDCT_COEFFS  4756, 15679
-VP9_IDCT_COEFFS 16364,   804
+VP9_IDCT_COEFFS 13160,  9760
+VP9_IDCT_COEFFS 11585, 11585, 1
 VP9_IDCT_COEFFS 11003, 12140
-VP9_IDCT_COEFFS 14811,  7005
-VP9_IDCT_COEFFS  5520, 15426
-VP9_IDCT_COEFFS 15893,  3981
+VP9_IDCT_COEFFS 10394, 12665
+VP9_IDCT_COEFFS  9102, 13623, 1
 VP9_IDCT_COEFFS  8423, 14053
-VP9_IDCT_COEFFS 13160,  9760
+VP9_IDCT_COEFFS  5520, 15426
+VP9_IDCT_COEFFS  4756, 15679
 VP9_IDCT_COEFFS  2404, 16207
 
-pw_5283_13377: times 4 dw 5283, 13377
-pw_9929_13377: times 4 dw 9929, 13377
-pw_15212_m13377: times 4 dw 15212, -13377
-pw_15212_9929: times 4 dw 15212, 9929
-pw_m5283_m15212: times 4 dw -5283, -15212
-pw_13377x2: times 8 dw 13377*2
-pw_13377_m13377: times 4 dw 13377, -13377
-
-pd_8192: times 4 dd 8192
+const pw_5283_13377
+times 4 dw 5283, 13377
+const pw_9929_13377
+times 4 dw 9929, 13377
+const pw_15212_m13377
+times 4 dw 15212, -13377
+const pw_15212_9929
+times 4 dw 15212, 9929
+const pw_m5283_m15212
+times 4 dw -5283, -15212
+const pw_13377x2
+times 8 dw 13377*2
+const pw_m13377_13377
+times 4 dw -13377, 13377
+const pw_13377_0
+times 4 dw 13377, 0
 
 cextern pw_8
 cextern pw_16
@@ -77,38 +98,10 @@ cextern pw_512
 cextern pw_1024
 cextern pw_2048
 cextern pw_m1
+cextern pd_8192
 
 SECTION .text
 
-; (a*x + b*y + round) >> shift
-%macro VP9_MULSUB_2W_2X 5 ; dst1, dst2/src, round, coefs1, coefs2
-    pmaddwd            m%1, m%2, %4
-    pmaddwd            m%2,  %5
-    paddd              m%1,  %3
-    paddd              m%2,  %3
-    psrad              m%1,  14
-    psrad              m%2,  14
-%endmacro
-
-%macro VP9_MULSUB_2W_4X 7 ; dst1, dst2, coef1, coef2, rnd, tmp1/src, tmp2
-    VP9_MULSUB_2W_2X    %7,  %6,  %5, [pw_m%3_%4], [pw_%4_%3]
-    VP9_MULSUB_2W_2X    %1,  %2,  %5, [pw_m%3_%4], [pw_%4_%3]
-    packssdw           m%1, m%7
-    packssdw           m%2, m%6
-%endmacro
-
-%macro VP9_UNPACK_MULSUB_2W_4X 7-9 ; dst1, dst2, (src1, src2,) coef1, coef2, rnd, tmp1, tmp2
-%if %0 == 7
-    punpckhwd          m%6, m%2, m%1
-    punpcklwd          m%2, m%1
-    VP9_MULSUB_2W_4X   %1, %2, %3, %4, %5, %6, %7
-%else
-    punpckhwd          m%8, m%4, m%3
-    punpcklwd          m%2, m%4, m%3
-    VP9_MULSUB_2W_4X   %1, %2, %5, %6, %7, %8, %9
-%endif
-%endmacro
-
 %macro VP9_UNPACK_MULSUB_2D_4X 6 ; dst1 [src1], dst2 [src2], dst3, dst4, mul1, mul2
     punpckhwd          m%4, m%2, m%1
     punpcklwd          m%2, m%1
@@ -162,21 +155,6 @@ SECTION .text
 ; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
 ;-------------------------------------------------------------------------------------------
 
-%macro VP9_IWHT4_1D 0
-    SWAP                 1, 2, 3
-    paddw               m0, m2
-    psubw               m3, m1
-    psubw               m4, m0, m3
-    psraw               m4, 1
-    psubw               m5, m4, m1
-    SWAP                 5, 1
-    psubw               m4, m2
-    SWAP                 4, 2
-    psubw               m0, m1
-    paddw               m3, m2
-    SWAP                 3, 2, 1
-%endmacro
-
 INIT_MMX mmx
 cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
     mova                m0, [blockq+0*8]
@@ -203,24 +181,6 @@ cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
 ; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
 ;-------------------------------------------------------------------------------------------
 
-%macro VP9_IDCT4_1D_FINALIZE 0
-    SUMSUB_BA            w, 3, 2, 4                         ; m3=t3+t0, m2=-t3+t0
-    SUMSUB_BA            w, 1, 0, 4                         ; m1=t2+t1, m0=-t2+t1
-    SWAP                 0, 3, 2                            ; 3102 -> 0123
-%endmacro
-
-%macro VP9_IDCT4_1D 0
-%if cpuflag(ssse3)
-    SUMSUB_BA            w, 2, 0, 4                         ; m2=IN(0)+IN(2) m0=IN(0)-IN(2)
-    pmulhrsw            m2, m6                              ; m2=t0
-    pmulhrsw            m0, m6                              ; m0=t1
-%else ; <= sse2
-    VP9_UNPACK_MULSUB_2W_4X 0, 2, 11585, 11585, m7, 4, 5    ; m0=t1, m1=t0
-%endif
-    VP9_UNPACK_MULSUB_2W_4X 1, 3, 15137, 6270, m7, 4, 5     ; m1=t2, m3=t3
-    VP9_IDCT4_1D_FINALIZE
-%endmacro
-
 ; 2x2 top left corner
 %macro VP9_IDCT4_2x2_1D 0
     pmulhrsw            m0, m5                              ; m0=t1
@@ -349,66 +309,11 @@ IDCT_4x4_FN ssse3
 ; void vp9_iadst_iadst_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
 ;-------------------------------------------------------------------------------------------
 
-%macro VP9_IADST4_1D 0
-    movq2dq           xmm0, m0
-    movq2dq           xmm1, m1
-    movq2dq           xmm2, m2
-    movq2dq           xmm3, m3
-%if cpuflag(ssse3)
-    paddw               m3, m0
-%else
-    paddw             xmm6, xmm3, xmm0
-    punpcklwd         xmm6, xmm2
-%endif
-    punpcklwd         xmm0, xmm1
-    punpcklwd         xmm2, xmm3
-    pmaddwd           xmm1, xmm0, [pw_5283_13377]
-    pmaddwd           xmm4, xmm0, [pw_9929_13377]
-    pmaddwd           xmm0, [pw_15212_m13377]
-    pmaddwd           xmm3, xmm2, [pw_15212_9929]
-    pmaddwd           xmm2, [pw_m5283_m15212]
-%if cpuflag(ssse3)
-    psubw               m3, m2
-%else
-    pmaddwd           xmm6, [pw_13377_m13377]
-%endif
-    paddd             xmm0, xmm2
-    paddd             xmm3, xmm5
-    paddd             xmm2, xmm5
-%if notcpuflag(ssse3)
-    paddd             xmm6, xmm5
-%endif
-    paddd             xmm1, xmm3
-    paddd             xmm0, xmm3
-    paddd             xmm4, xmm2
-    psrad             xmm1, 14
-    psrad             xmm0, 14
-    psrad             xmm4, 14
-%if cpuflag(ssse3)
-    pmulhrsw            m3, [pw_13377x2]        ; out2
-%else
-    psrad             xmm6, 14
-%endif
-    packssdw          xmm0, xmm0
-    packssdw          xmm1, xmm1
-    packssdw          xmm4, xmm4
-%if notcpuflag(ssse3)
-    packssdw          xmm6, xmm6
-%endif
-    movdq2q             m0, xmm0                ; out3
-    movdq2q             m1, xmm1                ; out0
-    movdq2q             m2, xmm4                ; out1
-%if notcpuflag(ssse3)
-    movdq2q             m3, xmm6                ; out2
-%endif
-    SWAP                 0, 1, 2, 3
-%endmacro
-
 %macro IADST4_FN 5
 INIT_MMX %5
-cglobal vp9_%1_%3_4x4_add, 3, 3, 6 + notcpuflag(ssse3), dst, stride, block, eob
+cglobal vp9_%1_%3_4x4_add, 3, 3, 0, dst, stride, block, eob
 %if WIN64 && notcpuflag(ssse3)
-WIN64_SPILL_XMM 7
+    WIN64_SPILL_XMM 8
 %endif
     movdqa            xmm5, [pd_8192]
     mova                m0, [blockq+ 0]
@@ -527,10 +432,9 @@ IADST4_FN iadst, IADST4, iadst, IADST4, ssse3
     pmulhrsw            m2, [pw_6270x2]                     ; m2=t2a
     pmulhrsw            m7, m1, [pw_16069x2]                ; m7=t7a
     pmulhrsw            m1, [pw_3196x2]                     ; m1=t4a
-    pmulhrsw            m5, m3, [pw_9102x2]                 ; m5=-t5a
+    pmulhrsw            m5, m3, [pw_m9102x2]                ; m5=t5a
     pmulhrsw            m3, [pw_13623x2]                    ; m3=t6a
     SUMSUB_BA            w,  5,  1, 4                       ; m1=t4a+t5a (t4), m5=t4a-t5a (t5a)
-    SWAP                 1,  5
     SUMSUB_BA            w,  3,  7, 4                       ; m3=t7a+t6a (t7), m7=t7a-t6a (t6a)
     SUMSUB_BA            w,  1,  7, 4                       ; m1=t6a+t5a (t6), m7=t6a-t5a (t5)
     pmulhrsw            m1, W_11585x2_REG                   ; m1=t6
@@ -969,18 +873,15 @@ cglobal vp9_%1_%3_8x8_add, 3, 3, %6, dst, stride, block, eob
 
 %endmacro
 
-%define PSIGNW PSIGNW_MMX
 IADST8_FN idct,  IDCT8,  iadst, IADST8, sse2, 15
 IADST8_FN iadst, IADST8, idct,  IDCT8,  sse2, 15
 IADST8_FN iadst, IADST8, iadst, IADST8, sse2, 15
-%define PSIGNW PSIGNW_SSSE3
 IADST8_FN idct,  IDCT8,  iadst, IADST8, ssse3, 16
 IADST8_FN idct,  IDCT8,  iadst, IADST8, avx, 16
 IADST8_FN iadst, IADST8, idct,  IDCT8,  ssse3, 16
 IADST8_FN iadst, IADST8, idct,  IDCT8,  avx, 16
 IADST8_FN iadst, IADST8, iadst, IADST8, ssse3, 16
 IADST8_FN iadst, IADST8, iadst, IADST8, avx, 16
-%undef PSIGNW
 
 ;---------------------------------------------------------------------------------------------
 ; void vp9_idct_idct_16x16_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
@@ -1124,10 +1025,14 @@ IADST8_FN iadst, IADST8, iadst, IADST8, avx, 16
     pmulhrsw            m7, m4, [pw_16069x2]        ; t6-7
     pmulhrsw            m4, [pw_3196x2]             ; t4-5
 
+%if 0 ; overflows :(
     paddw               m6, m7, m4
     psubw               m5, m7, m4
     pmulhrsw            m5, [pw_11585x2]            ; t5
     pmulhrsw            m6, [pw_11585x2]            ; t6
+%else
+    VP9_UNPACK_MULSUB_2W_4X  5, 6, 7, 4, 11585, 11585, [pd_8192], 0, 1 ; t5,  t6
+%endif
 
     psubw               m0, m3, m7
     paddw               m7, m3
@@ -1709,13 +1614,13 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
     SUMSUB_BA               w,   7,  6,  4
     pmulhrsw                m7, [pw_m11585x2]               ; m8=out7[w]
     pmulhrsw                m6, [pw_11585x2]                ; m1=out8[w]
+    SWAP                     6,  7
     SUMSUB_BA                w,  3,  2,  4
     pmulhrsw                m3, [pw_11585x2]                ; m3=out4[w]
     pmulhrsw                m2, [pw_11585x2]                ; m2=out11[w]
 %else
     SCRATCH                  5,  8, tmpq+10*%%str
-    PSIGNW                  m7, [pw_m1]
-    VP9_UNPACK_MULSUB_2W_4X  7,  6, 11585, 11585, [pd_8192],  5,  4
+    VP9_UNPACK_MULSUB_2W_4X  6,  7, 11585, m11585, [pd_8192],  5,  4
     VP9_UNPACK_MULSUB_2W_4X  2,  3, 11585, 11585, [pd_8192],  5,  4
     UNSCRATCH                5,  8, tmpq+10*%%str
 %endif
@@ -1726,7 +1631,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
 %if %2 == 1
 %if ARCH_X86_64
     mova                   m13, [tmpq+ 6*%%str]
-    TRANSPOSE8x8W            1, 11, 14, 0, 3, 15, 13, 7, 10
+    TRANSPOSE8x8W            1, 11, 14, 0, 3, 15, 13, 6, 10
     mova          [tmpq+ 0*16], m1
     mova          [tmpq+ 2*16], m11
     mova          [tmpq+ 4*16], m14
@@ -1738,10 +1643,10 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
     mova          [tmpq+ 8*16], m3
     mova          [tmpq+10*16], m15
     mova          [tmpq+12*16], m13
-    mova          [tmpq+14*16], m7
+    mova          [tmpq+14*16], m6
 
-    TRANSPOSE8x8W            6, 1, 11, 2, 9, 14, 0, 5, 10
-    mova          [tmpq+ 1*16], m6
+    TRANSPOSE8x8W            7, 1, 11, 2, 9, 14, 0, 5, 10
+    mova          [tmpq+ 1*16], m7
     mova          [tmpq+ 3*16], m1
     mova          [tmpq+ 5*16], m11
     mova          [tmpq+ 7*16], m2
@@ -1752,20 +1657,20 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
 %else
     mova       [tmpq+12*%%str], m2
     mova       [tmpq+ 1*%%str], m5
-    mova       [tmpq+15*%%str], m6
+    mova       [tmpq+15*%%str], m7
     mova                    m2, [tmpq+ 9*%%str]
     mova                    m5, [tmpq+ 5*%%str]
-    mova                    m6, [tmpq+ 8*%%str]
-    TRANSPOSE8x8W            1, 2, 5, 0, 3, 6, 4, 7, [tmpq+ 6*%%str], [tmpq+ 8*%%str], 1
+    mova                    m7, [tmpq+ 8*%%str]
+    TRANSPOSE8x8W            1, 2, 5, 0, 3, 7, 4, 6, [tmpq+ 6*%%str], [tmpq+ 8*%%str], 1
     mova          [tmpq+ 0*16], m1
     mova          [tmpq+ 2*16], m2
     mova          [tmpq+ 4*16], m5
     mova          [tmpq+ 6*16], m0
-    mova          [tmpq+10*16], m6
+    mova          [tmpq+10*16], m7
     mova                    m3, [tmpq+12*%%str]
     mova          [tmpq+12*16], m4
     mova                    m4, [tmpq+14*%%str]
-    mova          [tmpq+14*16], m7
+    mova          [tmpq+14*16], m6
 
     mova                    m0, [tmpq+15*%%str]
     mova                    m1, [tmpq+ 3*%%str]
@@ -1798,7 +1703,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
     lea                   dstq, [dstq+strideq*2]
     VP9_IDCT8_WRITEx2        3, 15, 10,  8,  4, ROUND_REG, 6
     lea                   dstq, [dstq+strideq*2]
-    VP9_IDCT8_WRITEx2       12,  7, 10,  8,  4, ROUND_REG, 6
+    VP9_IDCT8_WRITEx2       12,  6, 10,  8,  4, ROUND_REG, 6
     lea                   dstq, [dstq+strideq*2]
 
     mova                    m1, [tmpq+ 3*%%str]
@@ -1806,7 +1711,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
     mova                   m14, [tmpq+11*%%str]
     mova                    m0, [tmpq+13*%%str]
 
-    VP9_IDCT8_WRITEx2        6,  1, 10,  8,  4, ROUND_REG, 6
+    VP9_IDCT8_WRITEx2        7,  1, 10,  8,  4, ROUND_REG, 6
     lea                   dstq, [dstq+strideq*2]
     VP9_IDCT8_WRITEx2       11,  2, 10,  8,  4, ROUND_REG, 6
     lea                   dstq, [dstq+strideq*2]
@@ -1816,9 +1721,9 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
 %else
     mova       [tmpq+ 0*%%str], m2
     mova       [tmpq+ 1*%%str], m5
-    mova       [tmpq+ 2*%%str], m6
+    mova       [tmpq+ 2*%%str], m7
     mova                    m2, [tmpq+ 9*%%str]
-    VP9_IDCT8_WRITEx2        1,  2,  5,  6,  4, ROUND_REG, 6
+    VP9_IDCT8_WRITEx2        1,  2,  5,  7,  4, ROUND_REG, 6
     lea                   dstq, [dstq+strideq*2]
     mova                    m5, [tmpq+ 5*%%str]
     VP9_IDCT8_WRITEx2        5,  0,  1,  2,  4, ROUND_REG, 6
@@ -1827,7 +1732,7 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx
     VP9_IDCT8_WRITEx2        3,  5,  1,  2,  4, ROUND_REG, 6
     lea                   dstq, [dstq+strideq*2]
     mova                    m5, [tmpq+ 6*%%str]
-    VP9_IDCT8_WRITEx2        5,  7,  1,  2,  4, ROUND_REG, 6
+    VP9_IDCT8_WRITEx2        5,  6,  1,  2,  4, ROUND_REG, 6
     lea                   dstq, [dstq+strideq*2]
 
     mova                    m0, [tmpq+ 2*%%str]
@@ -1881,18 +1786,15 @@ cglobal vp9_%1_%3_16x16_add, 3, 6, 16, 512, dst, stride, block, cnt, dst_bak, tm
     RET
 %endmacro
 
-%define PSIGNW PSIGNW_MMX
 IADST16_FN idct,  IDCT16,  iadst, IADST16, sse2
 IADST16_FN iadst, IADST16, idct,  IDCT16,  sse2
 IADST16_FN iadst, IADST16, iadst, IADST16, sse2
-%define PSIGNW PSIGNW_SSSE3
 IADST16_FN idct,  IDCT16,  iadst, IADST16, ssse3
 IADST16_FN iadst, IADST16, idct,  IDCT16,  ssse3
 IADST16_FN iadst, IADST16, iadst, IADST16, ssse3
 IADST16_FN idct,  IDCT16,  iadst, IADST16, avx
 IADST16_FN iadst, IADST16, idct,  IDCT16,  avx
 IADST16_FN iadst, IADST16, iadst, IADST16, avx
-%undef PSIGNW
 
 ;---------------------------------------------------------------------------------------------
 ; void vp9_idct_idct_32x32_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
diff --git a/libavcodec/x86/vp9itxfm_16bpp.asm b/libavcodec/x86/vp9itxfm_16bpp.asm
new file mode 100644
index 00000000..902685ed
--- /dev/null
+++ b/libavcodec/x86/vp9itxfm_16bpp.asm
@@ -0,0 +1,2044 @@
+;******************************************************************************
+;* VP9 inverse transform x86 SIMD optimizations
+;*
+;* Copyright (C) 2015 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+%include "vp9itxfm_template.asm"
+
+SECTION_RODATA
+
+cextern pw_8
+cextern pw_1023
+cextern pw_2048
+cextern pw_4095
+cextern pw_m1
+cextern pd_1
+cextern pd_16
+cextern pd_32
+cextern pd_8192
+
+pd_8: times 4 dd 8
+pd_3fff: times 4 dd 0x3fff
+
+cextern pw_11585x2
+
+cextern pw_5283_13377
+cextern pw_9929_13377
+cextern pw_15212_m13377
+cextern pw_15212_9929
+cextern pw_m5283_m15212
+cextern pw_13377x2
+cextern pw_m13377_13377
+cextern pw_13377_0
+
+pw_9929_m5283: times 4 dw 9929, -5283
+
+%macro COEF_PAIR 2-3
+cextern pw_m%1_%2
+cextern pw_%2_%1
+%if %0 == 3
+cextern pw_m%1_m%2
+%if %1 != %2
+cextern pw_m%2_%1
+cextern pw_%1_%2
+%endif
+%endif
+%endmacro
+
+COEF_PAIR  2404, 16207
+COEF_PAIR  3196, 16069, 1
+COEF_PAIR  4756, 15679
+COEF_PAIR  5520, 15426
+COEF_PAIR  6270, 15137, 1
+COEF_PAIR  8423, 14053
+COEF_PAIR 10394, 12665
+COEF_PAIR 11003, 12140
+COEF_PAIR 11585, 11585, 1
+COEF_PAIR 13160,  9760
+COEF_PAIR 13623,  9102, 1
+COEF_PAIR 14449,  7723
+COEF_PAIR 14811,  7005
+COEF_PAIR 15893,  3981
+COEF_PAIR 16305,  1606
+COEF_PAIR 16364,   804
+
+default_8x8:
+times 12 db 1
+times 52 db 2
+row_8x8:
+times 18 db 1
+times 46 db 2
+col_8x8:
+times 6 db 1
+times 58 db 2
+default_16x16:
+times 10 db 1
+times 28 db 2
+times 51 db 3
+times 167 db 4
+row_16x16:
+times 21 db 1
+times 45 db 2
+times 60 db 3
+times 130 db 4
+col_16x16:
+times 5 db 1
+times 12 db 2
+times 25 db 3
+times 214 db 4
+default_32x32:
+times 9 db 1
+times 25 db 2
+times 36 db 3
+times 65 db 4
+times 105 db 5
+times 96 db 6
+times 112 db 7
+times 576 db 8
+
+SECTION .text
+
+%macro VP9_STORE_2X 6-7 dstq ; reg1, reg2, tmp1, tmp2, min, max, dst
+    mova               m%3, [%7]
+    mova               m%4, [%7+strideq]
+    paddw              m%3, m%1
+    paddw              m%4, m%2
+    pmaxsw             m%3, m%5
+    pmaxsw             m%4, m%5
+    pminsw             m%3, m%6
+    pminsw             m%4, m%6
+    mova              [%7], m%3
+    mova      [%7+strideq], m%4
+%endmacro
+
+%macro ZERO_BLOCK 4 ; mem, stride, nnzcpl, zero_reg
+%assign %%y 0
+%rep %3
+%assign %%x 0
+%rep %3*4/mmsize
+    mova      [%1+%%y+%%x], %4
+%assign %%x (%%x+mmsize)
+%endrep
+%assign %%y (%%y+%2)
+%endrep
+%endmacro
+
+; the input coefficients are scaled up by 2 bit (which we downscale immediately
+; in the iwht), and is otherwise orthonormally increased by 1 bit per iwht_1d.
+; therefore, a diff of 10-12+sign bit will fit in 12-14+sign bit after scaling,
+; i.e. everything can be done in 15+1bpp words. Since the quant fractional bits
+; add 2 bits, we need to scale before converting to word in 12bpp, since the
+; input will be 16+sign bit which doesn't fit in 15+sign words, but in 10bpp
+; we can scale after converting to words (which is half the instructions),
+; since the input is only 14+sign bit, which fits in 15+sign words directly.
+
+%macro IWHT4_FN 2 ; bpp, max
+cglobal vp9_iwht_iwht_4x4_add_%1, 3, 3, 8, dst, stride, block, eob
+    mova                m7, [pw_%2]
+    mova                m0, [blockq+0*16+0]
+    mova                m1, [blockq+1*16+0]
+%if %1 >= 12
+    mova                m4, [blockq+0*16+8]
+    mova                m5, [blockq+1*16+8]
+    psrad               m0, 2
+    psrad               m1, 2
+    psrad               m4, 2
+    psrad               m5, 2
+    packssdw            m0, m4
+    packssdw            m1, m5
+%else
+    packssdw            m0, [blockq+0*16+8]
+    packssdw            m1, [blockq+1*16+8]
+    psraw               m0, 2
+    psraw               m1, 2
+%endif
+    mova                m2, [blockq+2*16+0]
+    mova                m3, [blockq+3*16+0]
+%if %1 >= 12
+    mova                m4, [blockq+2*16+8]
+    mova                m5, [blockq+3*16+8]
+    psrad               m2, 2
+    psrad               m3, 2
+    psrad               m4, 2
+    psrad               m5, 2
+    packssdw            m2, m4
+    packssdw            m3, m5
+%else
+    packssdw            m2, [blockq+2*16+8]
+    packssdw            m3, [blockq+3*16+8]
+    psraw               m2, 2
+    psraw               m3, 2
+%endif
+
+    VP9_IWHT4_1D
+    TRANSPOSE4x4W        0, 1, 2, 3, 4
+    VP9_IWHT4_1D
+
+    pxor                m6, m6
+    VP9_STORE_2X         0, 1, 4, 5, 6, 7
+    lea               dstq, [dstq+strideq*2]
+    VP9_STORE_2X         2, 3, 4, 5, 6, 7
+    ZERO_BLOCK      blockq, 16, 4, m6
+    RET
+%endmacro
+
+INIT_MMX mmxext
+IWHT4_FN 10, 1023
+INIT_MMX mmxext
+IWHT4_FN 12, 4095
+
+%macro VP9_IDCT4_WRITEOUT 0
+%if cpuflag(ssse3)
+    mova                m5, [pw_2048]
+    pmulhrsw            m0, m5
+    pmulhrsw            m1, m5
+    pmulhrsw            m2, m5
+    pmulhrsw            m3, m5
+%else
+    mova                m5, [pw_8]
+    paddw               m0, m5
+    paddw               m1, m5
+    paddw               m2, m5
+    paddw               m3, m5
+    psraw               m0, 4
+    psraw               m1, 4
+    psraw               m2, 4
+    psraw               m3, 4
+%endif
+    mova                m5, [pw_1023]
+    VP9_STORE_2X         0,  1,  6,  7,  4,  5
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         2,  3,  6,  7,  4,  5
+%endmacro
+
+%macro DC_ONLY 2 ; shift, zero
+    mov              coefd, dword [blockq]
+    movd          [blockq], %2
+    imul             coefd, 11585
+    add              coefd, 8192
+    sar              coefd, 14
+    imul             coefd, 11585
+    add              coefd, ((1 << (%1 - 1)) << 14) + 8192
+    sar              coefd, 14 + %1
+%endmacro
+
+; 4x4 coefficients are 5+depth+sign bits, so for 10bpp, everything still fits
+; in 15+1 words without additional effort, since the coefficients are 15bpp.
+
+%macro IDCT4_10_FN 0
+cglobal vp9_idct_idct_4x4_add_10, 4, 4, 8, dst, stride, block, eob
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only
+    pxor                m4, m4
+%if cpuflag(ssse3)
+    movd                m0, [blockq]
+    movd          [blockq], m4
+    mova                m5, [pw_11585x2]
+    pmulhrsw            m0, m5
+    pmulhrsw            m0, m5
+%else
+    DEFINE_ARGS dst, stride, block, coef
+    DC_ONLY              4, m4
+    movd                m0, coefd
+%endif
+    pshufw              m0, m0, 0
+    mova                m5, [pw_1023]
+%if cpuflag(ssse3)
+    pmulhrsw            m0, [pw_2048]       ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4
+%endif
+    VP9_STORE_2X         0,  0,  6,  7,  4,  5
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         0,  0,  6,  7,  4,  5
+    RET
+
+.idctfull:
+    mova                m0, [blockq+0*16+0]
+    mova                m1, [blockq+1*16+0]
+    packssdw            m0, [blockq+0*16+8]
+    packssdw            m1, [blockq+1*16+8]
+    mova                m2, [blockq+2*16+0]
+    mova                m3, [blockq+3*16+0]
+    packssdw            m2, [blockq+2*16+8]
+    packssdw            m3, [blockq+3*16+8]
+
+%if cpuflag(ssse3)
+    mova                m6, [pw_11585x2]
+%endif
+    mova                m7, [pd_8192]       ; rounding
+    VP9_IDCT4_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_IDCT4_1D
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endmacro
+
+INIT_MMX mmxext
+IDCT4_10_FN
+INIT_MMX ssse3
+IDCT4_10_FN
+
+%macro IADST4_FN 4
+cglobal vp9_%1_%3_4x4_add_10, 3, 3, 0, dst, stride, block, eob
+%if WIN64 && notcpuflag(ssse3)
+    WIN64_SPILL_XMM 8
+%endif
+    movdqa            xmm5, [pd_8192]
+    mova                m0, [blockq+0*16+0]
+    mova                m1, [blockq+1*16+0]
+    packssdw            m0, [blockq+0*16+8]
+    packssdw            m1, [blockq+1*16+8]
+    mova                m2, [blockq+2*16+0]
+    mova                m3, [blockq+3*16+0]
+    packssdw            m2, [blockq+2*16+8]
+    packssdw            m3, [blockq+3*16+8]
+
+%if cpuflag(ssse3)
+    mova                m6, [pw_11585x2]
+%endif
+%ifnidn %1%3, iadstiadst
+    movdq2q             m7, xmm5
+%endif
+    VP9_%2_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_%4_1D
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endmacro
+
+INIT_MMX sse2
+IADST4_FN idct,  IDCT4,  iadst, IADST4
+IADST4_FN iadst, IADST4, idct,  IDCT4
+IADST4_FN iadst, IADST4, iadst, IADST4
+
+INIT_MMX ssse3
+IADST4_FN idct,  IDCT4,  iadst, IADST4
+IADST4_FN iadst, IADST4, idct,  IDCT4
+IADST4_FN iadst, IADST4, iadst, IADST4
+
+; inputs and outputs are dwords, coefficients are words
+;
+; dst1 = src1 * coef1 + src2 * coef2 + rnd >> 14
+; dst2 = src1 * coef2 - src2 * coef1 + rnd >> 14
+%macro SUMSUB_MUL 6-8 [pd_8192], [pd_3fff] ; src/dst 1-2, tmp1-2, coef1-2, rnd, mask
+    pand               m%3, m%1, %8
+    pand               m%4, m%2, %8
+    psrad              m%1, 14
+    psrad              m%2, 14
+    packssdw           m%4, m%2
+    packssdw           m%3, m%1
+    punpckhwd          m%2, m%4, m%3
+    punpcklwd          m%4, m%3
+    pmaddwd            m%3, m%4, [pw_%6_%5]
+    pmaddwd            m%1, m%2, [pw_%6_%5]
+    pmaddwd            m%4, [pw_m%5_%6]
+    pmaddwd            m%2, [pw_m%5_%6]
+    paddd              m%3, %7
+    paddd              m%4, %7
+    psrad              m%3, 14
+    psrad              m%4, 14
+    paddd              m%1, m%3
+    paddd              m%2, m%4
+%endmacro
+
+%macro IDCT4_12BPP_1D 0-8 [pd_8192], [pd_3fff], 0, 1, 2, 3, 4, 5 ; rnd, mask, in/out0-3, tmp0-1
+    SUMSUB_MUL          %3, %5, %7, %8, 11585, 11585, %1, %2
+    SUMSUB_MUL          %4, %6, %7, %8, 15137,  6270, %1, %2
+    SUMSUB_BA        d, %4, %3, %7
+    SUMSUB_BA        d, %6, %5, %7
+    SWAP                %4, %6, %3
+%endmacro
+
+%macro STORE_4x4 6 ; tmp1-2, reg1-2, min, max
+    movh               m%1, [dstq+strideq*0]
+    movh               m%2, [dstq+strideq*2]
+    movhps             m%1, [dstq+strideq*1]
+    movhps             m%2, [dstq+stride3q ]
+    paddw              m%1, m%3
+    paddw              m%2, m%4
+    pmaxsw             m%1, %5
+    pmaxsw             m%2, %5
+    pminsw             m%1, %6
+    pminsw             m%2, %6
+    movh   [dstq+strideq*0], m%1
+    movhps [dstq+strideq*1], m%1
+    movh   [dstq+strideq*2], m%2
+    movhps [dstq+stride3q ], m%2
+%endmacro
+
+%macro ROUND_AND_STORE_4x4 8 ; reg1-4, min, max, rnd, shift
+    paddd              m%1, %7
+    paddd              m%2, %7
+    paddd              m%3, %7
+    paddd              m%4, %7
+    psrad              m%1, %8
+    psrad              m%2, %8
+    psrad              m%3, %8
+    psrad              m%4, %8
+    packssdw           m%1, m%2
+    packssdw           m%3, m%4
+    STORE_4x4           %2, %4, %1, %3, %5, %6
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_4x4_add_12, 4, 4, 8, dst, stride, block, eob
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - this is special, since for 4x4 12bpp, the max coef size is
+    ; 17+sign bpp. Since the multiply is with 11585, which is 14bpp, the
+    ; result of each multiply is 31+sign bit, i.e. it _exactly_ fits in a
+    ; dword. After the final shift (4), the result is 13+sign bits, so we
+    ; don't need any additional processing to fit it in a word
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m4, m4
+    DC_ONLY              4, m4
+    movd                m0, coefd
+    pshuflw             m0, m0, q0000
+    punpcklqdq          m0, m0
+    mova                m5, [pw_4095]
+    DEFINE_ARGS dst, stride, stride3
+    lea           stride3q, [strideq*3]
+    STORE_4x4            1, 3, 0, 0, m4, m5
+    RET
+
+.idctfull:
+    DEFINE_ARGS dst, stride, block, eob
+    mova                m0, [blockq+0*16]
+    mova                m1, [blockq+1*16]
+    mova                m2, [blockq+2*16]
+    mova                m3, [blockq+3*16]
+    mova                m6, [pd_8192]
+    mova                m7, [pd_3fff]
+
+    IDCT4_12BPP_1D      m6, m7
+    TRANSPOSE4x4D        0, 1, 2, 3, 4
+    IDCT4_12BPP_1D      m6, m7
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+
+    ; writeout
+    DEFINE_ARGS dst, stride, stride3
+    lea           stride3q, [strideq*3]
+    mova                m5, [pw_4095]
+    mova                m6, [pd_8]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m4, m5, m6, 4
+    RET
+
+%macro SCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%if %0 == 4
+%define reg_%4 m%2
+%endif
+%else
+    mova              [%3], m%1
+%if %0 == 4
+%define reg_%4 [%3]
+%endif
+%endif
+%endmacro
+
+%macro UNSCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%if %0 == 4
+%undef reg_%4
+%endif
+%endmacro
+
+%macro PRELOAD 2-3
+%if ARCH_X86_64
+    mova               m%1, [%2]
+%if %0 == 3
+%define reg_%3 m%1
+%endif
+%elif %0 == 3
+%define reg_%3 [%2]
+%endif
+%endmacro
+
+; out0 =  5283 * in0 + 13377 + in1 + 15212 * in2 +  9929 * in3 + rnd >> 14
+; out1 =  9929 * in0 + 13377 * in1 -  5283 * in2 - 15282 * in3 + rnd >> 14
+; out2 = 13377 * in0               - 13377 * in2 + 13377 * in3 + rnd >> 14
+; out3 = 15212 * in0 - 13377 * in1 +  9929 * in2 -  5283 * in3 + rnd >> 14
+%macro IADST4_12BPP_1D 0-2 [pd_8192], [pd_3fff] ; rnd, mask
+    pand                m4, m0, %2
+    pand                m5, m1, %2
+    psrad               m0, 14
+    psrad               m1, 14
+    packssdw            m5, m1
+    packssdw            m4, m0
+    punpckhwd           m1, m4, m5
+    punpcklwd           m4, m5
+    pand                m5, m2, %2
+    pand                m6, m3, %2
+    psrad               m2, 14
+    psrad               m3, 14
+    packssdw            m6, m3
+    packssdw            m5, m2
+    punpckhwd           m3, m5, m6
+    punpcklwd           m5, m6
+    SCRATCH              1,  8, rsp+0*mmsize, a
+    SCRATCH              5,  9, rsp+1*mmsize, b
+
+    ; m1/3 have the high bits of 0,1,2,3
+    ; m4/5 have the low bits of 0,1,2,3
+    ; m0/2/6/7 are free
+
+    mova                m2, [pw_15212_9929]
+    mova                m0, [pw_5283_13377]
+    pmaddwd             m7, m2, reg_b
+    pmaddwd             m6, m4, m0
+    pmaddwd             m2, m3
+    pmaddwd             m0, reg_a
+    paddd               m6, m7
+    paddd               m0, m2
+    mova                m1, [pw_m13377_13377]
+    mova                m5, [pw_13377_0]
+    pmaddwd             m7, m1, reg_b
+    pmaddwd             m2, m4, m5
+    pmaddwd             m1, m3
+    pmaddwd             m5, reg_a
+    paddd               m2, m7
+    paddd               m1, m5
+    paddd               m6, %1
+    paddd               m2, %1
+    psrad               m6, 14
+    psrad               m2, 14
+    paddd               m0, m6                      ; t0
+    paddd               m2, m1                      ; t2
+
+    mova                m7, [pw_m5283_m15212]
+    mova                m5, [pw_9929_13377]
+    pmaddwd             m1, m7, reg_b
+    pmaddwd             m6, m4, m5
+    pmaddwd             m7, m3
+    pmaddwd             m5, reg_a
+    paddd               m6, m1
+    paddd               m7, m5
+    UNSCRATCH            5,  9, rsp+1*mmsize, b
+    pmaddwd             m5, [pw_9929_m5283]
+    pmaddwd             m4, [pw_15212_m13377]
+    pmaddwd             m3, [pw_9929_m5283]
+    UNSCRATCH            1,  8, rsp+0*mmsize, a
+    pmaddwd             m1, [pw_15212_m13377]
+    paddd               m4, m5
+    paddd               m3, m1
+    paddd               m6, %1
+    paddd               m4, %1
+    psrad               m6, 14
+    psrad               m4, 14
+    paddd               m7, m6                      ; t1
+    paddd               m3, m4                      ; t3
+
+    SWAP                 1, 7
+%endmacro
+
+%macro IADST4_12BPP_FN 4
+cglobal vp9_%1_%3_4x4_add_12, 3, 3, 12, 2 * ARCH_X86_32 * mmsize, dst, stride, block, eob
+    mova                m0, [blockq+0*16]
+    mova                m1, [blockq+1*16]
+    mova                m2, [blockq+2*16]
+    mova                m3, [blockq+3*16]
+
+    PRELOAD             10, pd_8192, rnd
+    PRELOAD             11, pd_3fff, mask
+    %2_12BPP_1D    reg_rnd, reg_mask
+    TRANSPOSE4x4D        0, 1, 2, 3, 4
+    %4_12BPP_1D    reg_rnd, reg_mask
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+
+    ; writeout
+    DEFINE_ARGS dst, stride, stride3
+    lea           stride3q, [strideq*3]
+    mova                m5, [pw_4095]
+    mova                m6, [pd_8]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m4, m5, m6, 4
+    RET
+%endmacro
+
+INIT_XMM sse2
+IADST4_12BPP_FN idct,  IDCT4,  iadst, IADST4
+IADST4_12BPP_FN iadst, IADST4, idct,  IDCT4
+IADST4_12BPP_FN iadst, IADST4, iadst, IADST4
+
+; the following line has not been executed at the end of this macro:
+; UNSCRATCH            6, 8, rsp+%3*mmsize
+%macro IDCT8_1D 1-5 [pd_8192], [pd_3fff], 2 * mmsize, 17 ; src, rnd, mask, src_stride, stack_offset
+    mova                m0, [%1+0*%4]
+    mova                m2, [%1+2*%4]
+    mova                m4, [%1+4*%4]
+    mova                m6, [%1+6*%4]
+    IDCT4_12BPP_1D      %2, %3, 0, 2, 4, 6, 1, 3            ; m0/2/4/6 have t0/1/2/3
+    SCRATCH              4, 8, rsp+(%5+0)*mmsize
+    SCRATCH              6, 9, rsp+(%5+1)*mmsize
+    mova                m1, [%1+1*%4]
+    mova                m3, [%1+3*%4]
+    mova                m5, [%1+5*%4]
+    mova                m7, [%1+7*%4]
+    SUMSUB_MUL           1, 7, 4, 6, 16069,  3196, %2, %3   ; m1=t7a, m7=t4a
+    SUMSUB_MUL           5, 3, 4, 6,  9102, 13623, %2, %3   ; m5=t6a, m3=t5a
+    SUMSUB_BA         d, 3, 7, 4                            ; m3=t4, m7=t5a
+    SUMSUB_BA         d, 5, 1, 4                            ; m5=t7, m1=t6a
+    SUMSUB_MUL           1, 7, 4, 6, 11585, 11585, %2, %3   ; m1=t6, m7=t5
+    SUMSUB_BA         d, 5, 0, 4                            ; m5=out0, m0=out7
+    SUMSUB_BA         d, 1, 2, 4                            ; m1=out1, m2=out6
+    UNSCRATCH            4, 8, rsp+(%5+0)*mmsize
+    UNSCRATCH            6, 9, rsp+(%5+1)*mmsize
+    SCRATCH              2, 8, rsp+(%5+0)*mmsize
+    SUMSUB_BA         d, 7, 4, 2                            ; m7=out2, m4=out5
+    SUMSUB_BA         d, 3, 6, 2                            ; m3=out3, m6=out4
+    SWAP                 0, 5, 4, 6, 2, 7
+%endmacro
+
+%macro STORE_2x8 5-7 dstq, strideq ; tmp1-2, reg, min, max
+    mova               m%1, [%6+%7*0]
+    mova               m%2, [%6+%7*1]
+    paddw              m%1, m%3
+    paddw              m%2, m%3
+    pmaxsw             m%1, %4
+    pmaxsw             m%2, %4
+    pminsw             m%1, %5
+    pminsw             m%2, %5
+    mova         [%6+%7*0], m%1
+    mova         [%6+%7*1], m%2
+%endmacro
+
+; FIXME we can use the intermediate storage (rsp[0-15]) on x86-32 for temp
+; storage also instead of allocating two more stack spaces. This doesn't
+; matter much but it's something...
+INIT_XMM sse2
+cglobal vp9_idct_idct_8x8_add_10, 4, 6 + ARCH_X86_64, 14, \
+                                  16 * mmsize + 3 * ARCH_X86_32 * mmsize, \
+                                  dst, stride, block, eob
+    mova                m0, [pw_1023]
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - the 10bit version can be done entirely in 32bit, since the max
+    ; coef values are 16+sign bit, and the coef is 14bit, so 30+sign easily
+    ; fits in 32bit
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m2, m2
+    DC_ONLY              5, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 4
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+.idctfull:
+    SCRATCH              0, 12, rsp+16*mmsize, max
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [default_8x8]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [default_8x8+cntq-1]
+%endif
+    mov              skipd, 2
+    sub              skipd, cntd
+    mov               ptrq, rsp
+    PRELOAD             10, pd_8192, rnd
+    PRELOAD             11, pd_3fff, mask
+    PRELOAD             13, pd_16, srnd
+.loop_1:
+    IDCT8_1D        blockq, reg_rnd, reg_mask
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 6
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 2*mmsize], m1
+    mova  [ptrq+ 4*mmsize], m2
+    mova  [ptrq+ 6*mmsize], m3
+    UNSCRATCH            6, 8, rsp+17*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 1*mmsize], m4
+    mova  [ptrq+ 3*mmsize], m5
+    mova  [ptrq+ 5*mmsize], m6
+    mova  [ptrq+ 7*mmsize], m7
+    add               ptrq, 8 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    add               ptrq, 4 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 2
+    mov               ptrq, rsp
+.loop_2:
+    IDCT8_1D          ptrq, reg_rnd, reg_mask
+
+    pxor                m6, m6
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m6, reg_max, reg_srnd, 5
+    lea               dstq, [dstq+strideq*4]
+    UNSCRATCH            0, 8, rsp+17*mmsize
+    UNSCRATCH            1, 12, rsp+16*mmsize, max
+    UNSCRATCH            2, 13, pd_16, srnd
+    ROUND_AND_STORE_4x4  4, 5, 0, 7, m6, m1, m2, 5
+    add               ptrq, 16
+%if ARCH_X86_64
+    lea               dstq, [dstbakq+8]
+%else
+    mov               dstq, dstm
+    add               dstq, 8
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m6 is still zero
+    ZERO_BLOCK blockq-2*mmsize, 32, 8, m6
+    RET
+
+%macro DC_ONLY_64BIT 2 ; shift, zero
+%if ARCH_X86_64
+    movsxd           coefq, dword [blockq]
+    movd          [blockq], %2
+    imul             coefq, 11585
+    add              coefq, 8192
+    sar              coefq, 14
+    imul             coefq, 11585
+    add              coefq, ((1 << (%1 - 1)) << 14) + 8192
+    sar              coefq, 14 + %1
+%else
+    mov              coefd, dword [blockq]
+    movd          [blockq], %2
+    DEFINE_ARGS dst, stride, cnt, coef, coefl
+    mov               cntd, 2
+.loop_dc_calc:
+    mov             coefld, coefd
+    sar              coefd, 14
+    and             coefld, 0x3fff
+    imul             coefd, 11585
+    imul            coefld, 11585
+    add             coefld, 8192
+    sar             coefld, 14
+    add              coefd, coefld
+    dec               cntd
+    jg .loop_dc_calc
+    add              coefd, 1 << (%1 - 1)
+    sar              coefd, %1
+%endif
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_8x8_add_12, 4, 6 + ARCH_X86_64, 14, \
+                                  16 * mmsize + 3 * ARCH_X86_32 * mmsize, \
+                                  dst, stride, block, eob
+    mova                m0, [pw_4095]
+    cmp               eobd, 1
+    jg mangle(private_prefix %+ _ %+ vp9_idct_idct_8x8_add_10 %+ SUFFIX).idctfull
+
+    ; dc-only - unfortunately, this one can overflow, since coefs are 18+sign
+    ; bpp, and 18+14+sign does not fit in 32bit, so we do 2-stage multiplies
+    DEFINE_ARGS dst, stride, block, coef, coefl
+    pxor                m2, m2
+    DC_ONLY_64BIT        5, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 4
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+; inputs and outputs are dwords, coefficients are words
+;
+; dst1[hi]:dst3[lo] = src1 * coef1 + src2 * coef2
+; dst2[hi]:dst4[lo] = src1 * coef2 - src2 * coef1
+%macro SUMSUB_MUL_D 6-7 [pd_3fff] ; src/dst 1-2, dst3-4, coef1-2, mask
+    pand               m%3, m%1, %7
+    pand               m%4, m%2, %7
+    psrad              m%1, 14
+    psrad              m%2, 14
+    packssdw           m%4, m%2
+    packssdw           m%3, m%1
+    punpckhwd          m%2, m%4, m%3
+    punpcklwd          m%4, m%3
+    pmaddwd            m%3, m%4, [pw_%6_%5]
+    pmaddwd            m%1, m%2, [pw_%6_%5]
+    pmaddwd            m%4, [pw_m%5_%6]
+    pmaddwd            m%2, [pw_m%5_%6]
+%endmacro
+
+; dst1 = src2[hi]:src4[lo] + src1[hi]:src3[lo] + rnd >> 14
+; dst2 = src2[hi]:src4[lo] - src1[hi]:src3[lo] + rnd >> 14
+%macro SUMSUB_PACK_D 5-6 [pd_8192] ; src/dst 1-2, src3-4, tmp, rnd
+    SUMSUB_BA        d, %1, %2, %5
+    SUMSUB_BA        d, %3, %4, %5
+    paddd              m%3, %6
+    paddd              m%4, %6
+    psrad              m%3, 14
+    psrad              m%4, 14
+    paddd              m%1, m%3
+    paddd              m%2, m%4
+%endmacro
+
+%macro NEGD 1
+%if cpuflag(ssse3)
+    psignd              %1, [pw_m1]
+%else
+    pxor                %1, [pw_m1]
+    paddd               %1, [pd_1]
+%endif
+%endmacro
+
+; the following line has not been executed at the end of this macro:
+; UNSCRATCH            6, 8, rsp+17*mmsize
+%macro IADST8_1D 1-3 [pd_8192], [pd_3fff] ; src, rnd, mask
+    mova                m0, [%1+ 0*mmsize]
+    mova                m3, [%1+ 6*mmsize]
+    mova                m4, [%1+ 8*mmsize]
+    mova                m7, [%1+14*mmsize]
+    SUMSUB_MUL_D         7, 0, 1, 2, 16305,  1606, %3   ; m7/1=t0a, m0/2=t1a
+    SUMSUB_MUL_D         3, 4, 5, 6, 10394, 12665, %3   ; m3/5=t4a, m4/6=t5a
+    SCRATCH              0, 8, rsp+17*mmsize
+    SUMSUB_PACK_D        3, 7, 5, 1, 0, %2              ; m3=t0, m7=t4
+    UNSCRATCH            0, 8, rsp+17*mmsize
+    SUMSUB_PACK_D        4, 0, 6, 2, 1, %2              ; m4=t1, m0=t5
+
+    SCRATCH              3, 8, rsp+17*mmsize
+    SCRATCH              4, 9, rsp+18*mmsize
+    SCRATCH              7, 10, rsp+19*mmsize
+    SCRATCH              0, 11, rsp+20*mmsize
+
+    mova                m1, [%1+ 2*mmsize]
+    mova                m2, [%1+ 4*mmsize]
+    mova                m5, [%1+10*mmsize]
+    mova                m6, [%1+12*mmsize]
+    SUMSUB_MUL_D         5, 2, 3, 4, 14449,  7723, %3   ; m5/8=t2a, m2/9=t3a
+    SUMSUB_MUL_D         1, 6, 7, 0,  4756, 15679, %3   ; m1/10=t6a, m6/11=t7a
+    SCRATCH              2, 12, rsp+21*mmsize
+    SUMSUB_PACK_D        1, 5, 7, 3, 2, %2              ; m1=t2, m5=t6
+    UNSCRATCH            2, 12, rsp+21*mmsize
+    SUMSUB_PACK_D        6, 2, 0, 4, 3, %2              ; m6=t3, m2=t7
+
+    UNSCRATCH            7, 10, rsp+19*mmsize
+    UNSCRATCH            0, 11, rsp+20*mmsize
+    SCRATCH              1, 10, rsp+19*mmsize
+    SCRATCH              6, 11, rsp+20*mmsize
+
+    SUMSUB_MUL_D         7, 0, 3, 4, 15137,  6270, %3   ; m7/8=t4a, m0/9=t5a
+    SUMSUB_MUL_D         2, 5, 1, 6,  6270, 15137, %3   ; m2/10=t7a, m5/11=t6a
+    SCRATCH              2, 12, rsp+21*mmsize
+    SUMSUB_PACK_D        5, 7, 6, 3, 2, %2              ; m5=-out1, m7=t6
+    UNSCRATCH            2, 12, rsp+21*mmsize
+    NEGD                m5                              ; m5=out1
+    SUMSUB_PACK_D        2, 0, 1, 4, 3, %2              ; m2=out6, m0=t7
+    SUMSUB_MUL           7, 0, 3, 4, 11585, 11585, %2, %3   ; m7=out2, m0=-out5
+    NEGD                m0                              ; m0=out5
+
+    UNSCRATCH            3, 8, rsp+17*mmsize
+    UNSCRATCH            4, 9, rsp+18*mmsize
+    UNSCRATCH            1, 10, rsp+19*mmsize
+    UNSCRATCH            6, 11, rsp+20*mmsize
+    SCRATCH              2, 8, rsp+17*mmsize
+    SCRATCH              0, 9, rsp+18*mmsize
+
+    SUMSUB_BA         d, 1, 3,  2                       ; m1=out0, m3=t2
+    SUMSUB_BA         d, 6, 4,  2                       ; m6=-out7, m4=t3
+    NEGD                m6                              ; m6=out7
+    SUMSUB_MUL           3, 4,  2,  0, 11585, 11585, %2, %3 ; m3=-out3, m4=out4
+    NEGD                m3                              ; m3=out3
+
+    UNSCRATCH            0, 9, rsp+18*mmsize
+
+    SWAP                 0, 1, 5
+    SWAP                 2, 7, 6
+%endmacro
+
+%macro IADST8_FN 5
+cglobal vp9_%1_%3_8x8_add_10, 4, 6 + ARCH_X86_64, 16, \
+                              16 * mmsize + ARCH_X86_32 * 6 * mmsize, \
+                              dst, stride, block, eob
+    mova                m0, [pw_1023]
+
+.body:
+    SCRATCH              0, 13, rsp+16*mmsize, max
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [%5_8x8]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [%5_8x8+cntq-1]
+%endif
+    mov              skipd, 2
+    sub              skipd, cntd
+    mov               ptrq, rsp
+    PRELOAD             14, pd_8192, rnd
+    PRELOAD             15, pd_3fff, mask
+.loop_1:
+    %2_1D           blockq, reg_rnd, reg_mask
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 6
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 2*mmsize], m1
+    mova  [ptrq+ 4*mmsize], m2
+    mova  [ptrq+ 6*mmsize], m3
+    UNSCRATCH            6, 8, rsp+17*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 1*mmsize], m4
+    mova  [ptrq+ 3*mmsize], m5
+    mova  [ptrq+ 5*mmsize], m6
+    mova  [ptrq+ 7*mmsize], m7
+    add               ptrq, 8 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    add               ptrq, 4 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 2
+    mov               ptrq, rsp
+.loop_2:
+    %4_1D             ptrq, reg_rnd, reg_mask
+
+    pxor                m6, m6
+    PRELOAD              9, pd_16, srnd
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m6, reg_max, reg_srnd, 5
+    lea               dstq, [dstq+strideq*4]
+    UNSCRATCH            0, 8, rsp+17*mmsize
+    UNSCRATCH            1, 13, rsp+16*mmsize, max
+    UNSCRATCH            2, 9, pd_16, srnd
+    ROUND_AND_STORE_4x4  4, 5, 0, 7, m6, m1, m2, 5
+    add               ptrq, 16
+%if ARCH_X86_64
+    lea               dstq, [dstbakq+8]
+%else
+    mov               dstq, dstm
+    add               dstq, 8
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m6 is still zero
+    ZERO_BLOCK blockq-2*mmsize, 32, 8, m6
+    RET
+
+cglobal vp9_%1_%3_8x8_add_12, 4, 6 + ARCH_X86_64, 16, \
+                              16 * mmsize + ARCH_X86_32 * 6 * mmsize, \
+                              dst, stride, block, eob
+    mova                m0, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_%3_8x8_add_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+IADST8_FN idct,  IDCT8,  iadst, IADST8, row
+IADST8_FN iadst, IADST8, idct,  IDCT8,  col
+IADST8_FN iadst, IADST8, iadst, IADST8, default
+
+%macro IDCT16_1D 1-4 4 * mmsize, 65, 67 ; src, src_stride, stack_offset, mm32bit_stack_offset
+    IDCT8_1D            %1, [pd_8192], [pd_3fff], %2 * 2, %4    ; m0-3=t0-3a, m4-5/m8|r67/m7=t4-7
+    ; SCRATCH            6, 8, rsp+(%4+0)*mmsize    ; t6
+    SCRATCH              0, 15, rsp+(%4+7)*mmsize   ; t0a
+    SCRATCH              1, 14, rsp+(%4+6)*mmsize   ; t1a
+    SCRATCH              2, 13, rsp+(%4+5)*mmsize   ; t2a
+    SCRATCH              3, 12, rsp+(%4+4)*mmsize   ; t3a
+    SCRATCH              4, 11, rsp+(%4+3)*mmsize   ; t4
+    mova [rsp+(%3+0)*mmsize], m5                    ; t5
+    mova [rsp+(%3+1)*mmsize], m7                    ; t7
+
+    mova                m0, [%1+ 1*%2]              ; in1
+    mova                m3, [%1+ 7*%2]              ; in7
+    mova                m4, [%1+ 9*%2]              ; in9
+    mova                m7, [%1+15*%2]              ; in15
+
+    SUMSUB_MUL           0, 7, 1, 2, 16305,  1606   ; m0=t15a, m7=t8a
+    SUMSUB_MUL           4, 3, 1, 2, 10394, 12665   ; m4=t14a, m3=t9a
+    SUMSUB_BA         d, 3, 7, 1                    ; m3=t8, m7=t9
+    SUMSUB_BA         d, 4, 0, 1                    ; m4=t15,m0=t14
+    SUMSUB_MUL           0, 7, 1, 2, 15137,  6270   ; m0=t14a, m7=t9a
+
+    mova                m1, [%1+ 3*%2]              ; in3
+    mova                m2, [%1+ 5*%2]              ; in5
+    mova                m5, [%1+11*%2]              ; in11
+    mova                m6, [%1+13*%2]              ; in13
+
+    SCRATCH              0,  9, rsp+(%4+1)*mmsize
+    SCRATCH              7, 10, rsp+(%4+2)*mmsize
+
+    SUMSUB_MUL           2, 5, 0, 7, 14449,  7723   ; m2=t13a, m5=t10a
+    SUMSUB_MUL           6, 1, 0, 7,  4756, 15679   ; m6=t12a, m1=t11a
+    SUMSUB_BA         d, 5, 1, 0                    ; m5=t11,m1=t10
+    SUMSUB_BA         d, 2, 6, 0                    ; m2=t12,m6=t13
+    NEGD                m1                          ; m1=-t10
+    SUMSUB_MUL           1, 6, 0, 7, 15137,  6270   ; m1=t13a, m6=t10a
+
+    UNSCRATCH            7, 10, rsp+(%4+2)*mmsize
+    SUMSUB_BA         d, 5, 3, 0                    ; m5=t8a, m3=t11a
+    SUMSUB_BA         d, 6, 7, 0                    ; m6=t9,  m7=t10
+    SUMSUB_BA         d, 2, 4, 0                    ; m2=t15a,m4=t12a
+    SCRATCH              5, 10, rsp+(%4+2)*mmsize
+    SUMSUB_MUL           4, 3, 0, 5, 11585, 11585   ; m4=t12, m3=t11
+    UNSCRATCH            0, 9, rsp+(%4+1)*mmsize
+    SUMSUB_BA         d, 1, 0, 5                    ; m1=t14, m0=t13
+    SCRATCH              6, 9, rsp+(%4+1)*mmsize
+    SUMSUB_MUL           0, 7, 6, 5, 11585, 11585   ; m0=t13a,m7=t10a
+
+    ; order: 15|r74,14|r73,13|r72,12|r71,11|r70,r65,8|r67,r66,10|r69,9|r68,7,3,4,0,1,2
+    ; free: 6,5
+
+    UNSCRATCH            5, 15, rsp+(%4+7)*mmsize
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=out0, m5=out15
+    SCRATCH              5, 15, rsp+(%4+7)*mmsize
+    UNSCRATCH            5, 14, rsp+(%4+6)*mmsize
+    SUMSUB_BA         d, 1, 5, 6                    ; m1=out1, m5=out14
+    SCRATCH              5, 14, rsp+(%4+6)*mmsize
+    UNSCRATCH            5, 13, rsp+(%4+5)*mmsize
+    SUMSUB_BA         d, 0, 5, 6                    ; m0=out2, m5=out13
+    SCRATCH              5, 13, rsp+(%4+5)*mmsize
+    UNSCRATCH            5, 12, rsp+(%4+4)*mmsize
+    SUMSUB_BA         d, 4, 5, 6                    ; m4=out3, m5=out12
+    SCRATCH              5, 12, rsp+(%4+4)*mmsize
+    UNSCRATCH            5, 11, rsp+(%4+3)*mmsize
+    SUMSUB_BA         d, 3, 5, 6                    ; m3=out4, m5=out11
+    SCRATCH              4, 11, rsp+(%4+3)*mmsize
+    mova                m4, [rsp+(%3+0)*mmsize]
+    SUMSUB_BA         d, 7, 4, 6                    ; m7=out5, m4=out10
+    mova [rsp+(%3+0)*mmsize], m5
+    UNSCRATCH            5, 8, rsp+(%4+0)*mmsize
+    UNSCRATCH            6, 9, rsp+(%4+1)*mmsize
+    SCRATCH              2, 8, rsp+(%4+0)*mmsize
+    SCRATCH              1, 9, rsp+(%4+1)*mmsize
+    UNSCRATCH            1, 10, rsp+(%4+2)*mmsize
+    SCRATCH              0, 10, rsp+(%4+2)*mmsize
+    mova                m0, [rsp+(%3+1)*mmsize]
+    SUMSUB_BA         d, 6, 5, 2                    ; m6=out6, m5=out9
+    SUMSUB_BA         d, 1, 0, 2                    ; m1=out7, m0=out8
+
+    SWAP                 0, 3, 1, 7, 2, 6, 4
+
+    ; output order: 8-11|r67-70=out0-3
+    ;               0-6,r65=out4-11
+    ;               12-15|r71-74=out12-15
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \
+                                    67 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_1023]
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - the 10bit version can be done entirely in 32bit, since the max
+    ; coef values are 17+sign bit, and the coef is 14bit, so 31+sign easily
+    ; fits in 32bit
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m2, m2
+    DC_ONLY              6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 8
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,         mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+strideq, mmsize
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+.idctfull:
+    mova   [rsp+64*mmsize], m0
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [default_16x16]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [default_16x16+cntq-1]
+%endif
+    mov              skipd, 4
+    sub              skipd, cntd
+    mov               ptrq, rsp
+.loop_1:
+    IDCT16_1D       blockq
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 1*mmsize], m0
+    mova  [ptrq+ 5*mmsize], m1
+    mova  [ptrq+ 9*mmsize], m2
+    mova  [ptrq+13*mmsize], m3
+    mova                m7, [rsp+65*mmsize]
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 2*mmsize], m4
+    mova  [ptrq+ 6*mmsize], m5
+    mova  [ptrq+10*mmsize], m6
+    mova  [ptrq+14*mmsize], m7
+    UNSCRATCH               0, 8, rsp+67*mmsize
+    UNSCRATCH               1, 9, rsp+68*mmsize
+    UNSCRATCH               2, 10, rsp+69*mmsize
+    UNSCRATCH               3, 11, rsp+70*mmsize
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 4*mmsize], m1
+    mova  [ptrq+ 8*mmsize], m2
+    mova  [ptrq+12*mmsize], m3
+    UNSCRATCH               4, 12, rsp+71*mmsize
+    UNSCRATCH               5, 13, rsp+72*mmsize
+    UNSCRATCH               6, 14, rsp+73*mmsize
+    UNSCRATCH               7, 15, rsp+74*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 3*mmsize], m4
+    mova  [ptrq+ 7*mmsize], m5
+    mova  [ptrq+11*mmsize], m6
+    mova  [ptrq+15*mmsize], m7
+    add               ptrq, 16 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    mova   [ptrq+mmsize*4], m0
+    mova   [ptrq+mmsize*5], m0
+    mova   [ptrq+mmsize*6], m0
+    mova   [ptrq+mmsize*7], m0
+    add               ptrq, 8 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 4
+    mov               ptrq, rsp
+.loop_2:
+    IDCT16_1D         ptrq
+
+    pxor               m7, m7
+    lea               dstq, [dstq+strideq*4]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+64*mmsize], [pd_32], 6
+    lea               dstq, [dstq+strideq*4]
+    mova                m0, [rsp+65*mmsize]
+    mova                m1, [rsp+64*mmsize]
+    mova                m2, [pd_32]
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+%if ARCH_X86_64
+    DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst
+%else
+    mov               dstq, dstm
+%endif
+    UNSCRATCH               0, 8, rsp+67*mmsize
+    UNSCRATCH               4, 9, rsp+68*mmsize
+    UNSCRATCH               5, 10, rsp+69*mmsize
+    UNSCRATCH               3, 11, rsp+70*mmsize
+    ROUND_AND_STORE_4x4  0, 4, 5, 3, m7, m1, m2, 6
+%if ARCH_X86_64
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea               dstq, [dstbakq+stride3q*4]
+%else
+    lea               dstq, [dstq+stride3q*4]
+%endif
+    UNSCRATCH               4, 12, rsp+71*mmsize
+    UNSCRATCH               5, 13, rsp+72*mmsize
+    UNSCRATCH               6, 14, rsp+73*mmsize
+    UNSCRATCH               0, 15, rsp+74*mmsize
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+    add               ptrq, mmsize
+%if ARCH_X86_64
+    add            dstbakq, 8
+    mov               dstq, dstbakq
+%else
+    add         dword dstm, 8
+    mov               dstq, dstm
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m7 is still zero
+    ZERO_BLOCK blockq-4*mmsize, 64, 16, m7
+    RET
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_16x16_add_12, 4, 6 + ARCH_X86_64, 16, \
+                                    67 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_4095]
+    cmp               eobd, 1
+    jg mangle(private_prefix %+ _ %+ vp9_idct_idct_16x16_add_10 %+ SUFFIX).idctfull
+
+    ; dc-only - unfortunately, this one can overflow, since coefs are 19+sign
+    ; bpp, and 19+14+sign does not fit in 32bit, so we do 2-stage multiplies
+    DEFINE_ARGS dst, stride, block, coef, coefl
+    pxor                m2, m2
+    DC_ONLY_64BIT        6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 8
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,         mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+strideq, mmsize
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+; r65-69 are available for spills
+; r70-77 are available on x86-32 only (x86-64 should use m8-15)
+; output should be in m8-11|r70-73, m0-6,r65 and m12-15|r74-77
+%macro IADST16_1D 1 ; src
+    mova                m0, [%1+ 0*4*mmsize]        ; in0
+    mova                m1, [%1+ 7*4*mmsize]        ; in7
+    mova                m2, [%1+ 8*4*mmsize]        ; in8
+    mova                m3, [%1+15*4*mmsize]        ; in15
+    SUMSUB_MUL_D         3, 0, 4, 5, 16364,  804    ; m3/4=t0, m0/5=t1
+    SUMSUB_MUL_D         1, 2, 6, 7, 11003, 12140   ; m1/6=t8, m2/7=t9
+    SCRATCH              0, 8, rsp+70*mmsize
+    SUMSUB_PACK_D        1, 3, 6, 4, 0              ; m1=t0a, m3=t8a
+    UNSCRATCH            0, 8, rsp+70*mmsize
+    SUMSUB_PACK_D        2, 0, 7, 5, 4              ; m2=t1a, m0=t9a
+    mova   [rsp+67*mmsize], m1
+    SCRATCH              2, 9, rsp+71*mmsize
+    SCRATCH              3, 12, rsp+74*mmsize
+    SCRATCH              0, 13, rsp+75*mmsize
+
+    mova                m0, [%1+ 3*4*mmsize]        ; in3
+    mova                m1, [%1+ 4*4*mmsize]        ; in4
+    mova                m2, [%1+11*4*mmsize]        ; in11
+    mova                m3, [%1+12*4*mmsize]        ; in12
+    SUMSUB_MUL_D         2, 1, 4, 5, 14811,  7005   ; m2/4=t4, m1/5=t5
+    SUMSUB_MUL_D         0, 3, 6, 7,  5520, 15426   ; m0/6=t12, m3/7=t13
+    SCRATCH              1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        0, 2, 6, 4, 1              ; m0=t4a, m2=t12a
+    UNSCRATCH            1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        3, 1, 7, 5, 4              ; m3=t5a, m1=t13a
+    SCRATCH              0, 15, rsp+77*mmsize
+    SCRATCH              3, 11, rsp+73*mmsize
+
+    UNSCRATCH            0, 12, rsp+74*mmsize       ; t8a
+    UNSCRATCH            3, 13, rsp+75*mmsize       ; t9a
+    SUMSUB_MUL_D         0, 3, 4, 5, 16069,  3196   ; m0/4=t8, m3/5=t9
+    SUMSUB_MUL_D         1, 2, 6, 7,  3196, 16069   ; m1/6=t13, m2/7=t12
+    SCRATCH              1, 12, rsp+74*mmsize
+    SUMSUB_PACK_D        2, 0, 7, 4, 1              ; m2=t8a, m0=t12a
+    UNSCRATCH            1, 12, rsp+74*mmsize
+    SUMSUB_PACK_D        1, 3, 6, 5, 4              ; m1=t9a, m3=t13a
+    mova   [rsp+65*mmsize], m2
+    mova   [rsp+66*mmsize], m1
+    SCRATCH              0, 8, rsp+70*mmsize
+    SCRATCH              3, 12, rsp+74*mmsize
+
+    mova                m0, [%1+ 2*4*mmsize]        ; in2
+    mova                m1, [%1+ 5*4*mmsize]        ; in5
+    mova                m2, [%1+10*4*mmsize]        ; in10
+    mova                m3, [%1+13*4*mmsize]        ; in13
+    SUMSUB_MUL_D         3, 0, 4, 5, 15893,  3981   ; m3/4=t2, m0/5=t3
+    SUMSUB_MUL_D         1, 2, 6, 7,  8423, 14053   ; m1/6=t10, m2/7=t11
+    SCRATCH              0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        1, 3, 6, 4, 0              ; m1=t2a, m3=t10a
+    UNSCRATCH            0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        2, 0, 7, 5, 4              ; m2=t3a, m0=t11a
+    mova   [rsp+68*mmsize], m1
+    mova   [rsp+69*mmsize], m2
+    SCRATCH              3, 13, rsp+75*mmsize
+    SCRATCH              0, 14, rsp+76*mmsize
+
+    mova                m0, [%1+ 1*4*mmsize]        ; in1
+    mova                m1, [%1+ 6*4*mmsize]        ; in6
+    mova                m2, [%1+ 9*4*mmsize]        ; in9
+    mova                m3, [%1+14*4*mmsize]        ; in14
+    SUMSUB_MUL_D         2, 1, 4, 5, 13160,  9760   ; m2/4=t6, m1/5=t7
+    SUMSUB_MUL_D         0, 3, 6, 7,  2404, 16207   ; m0/6=t14, m3/7=t15
+    SCRATCH              1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        0, 2, 6, 4, 1              ; m0=t6a, m2=t14a
+    UNSCRATCH            1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        3, 1, 7, 5, 4              ; m3=t7a, m1=t15a
+
+    UNSCRATCH            4, 13, rsp+75*mmsize       ; t10a
+    UNSCRATCH            5, 14, rsp+76*mmsize       ; t11a
+    SCRATCH              0, 13, rsp+75*mmsize
+    SCRATCH              3, 14, rsp+76*mmsize
+    SUMSUB_MUL_D         4, 5, 6, 7,  9102, 13623   ; m4/6=t10, m5/7=t11
+    SUMSUB_MUL_D         1, 2, 0, 3, 13623,  9102   ; m1/0=t15, m2/3=t14
+    SCRATCH              0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        2, 4, 3, 6, 0              ; m2=t10a, m4=t14a
+    UNSCRATCH            0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        1, 5, 0, 7, 6              ; m1=t11a, m5=t15a
+
+    UNSCRATCH            0, 8, rsp+70*mmsize        ; t12a
+    UNSCRATCH            3, 12, rsp+74*mmsize       ; t13a
+    SCRATCH              2, 8, rsp+70*mmsize
+    SCRATCH              1, 12, rsp+74*mmsize
+    SUMSUB_MUL_D         0, 3, 1, 2, 15137,  6270   ; m0/1=t12, m3/2=t13
+    SUMSUB_MUL_D         5, 4, 7, 6,  6270, 15137   ; m5/7=t15, m4/6=t14
+    SCRATCH              2, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        4, 0, 6, 1, 2              ; m4=out2, m0=t14a
+    UNSCRATCH            2, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        5, 3, 7, 2, 1              ; m5=-out13, m3=t15a
+    NEGD                m5                          ; m5=out13
+
+    UNSCRATCH            1, 9, rsp+71*mmsize        ; t1a
+    mova                m2, [rsp+68*mmsize]         ; t2a
+    UNSCRATCH            6, 13, rsp+75*mmsize       ; t6a
+    UNSCRATCH            7, 14, rsp+76*mmsize       ; t7a
+    SCRATCH              4, 10, rsp+72*mmsize
+    SCRATCH              5, 13, rsp+75*mmsize
+    UNSCRATCH            4, 15, rsp+77*mmsize       ; t4a
+    UNSCRATCH            5, 11, rsp+73*mmsize       ; t5a
+    SCRATCH              0, 14, rsp+76*mmsize
+    SCRATCH              3, 15, rsp+77*mmsize
+    mova                m0, [rsp+67*mmsize]         ; t0a
+    SUMSUB_BA         d, 4, 0, 3                    ; m4=t0, m0=t4
+    SUMSUB_BA         d, 5, 1, 3                    ; m5=t1, m1=t5
+    SUMSUB_BA         d, 6, 2, 3                    ; m6=t2, m2=t6
+    SCRATCH              4, 9, rsp+71*mmsize
+    mova                m3, [rsp+69*mmsize]         ; t3a
+    SUMSUB_BA         d, 7, 3, 4                    ; m7=t3, m3=t7
+
+    mova   [rsp+67*mmsize], m5
+    mova   [rsp+68*mmsize], m6
+    mova   [rsp+69*mmsize], m7
+    SUMSUB_MUL_D         0, 1, 4, 5, 15137,  6270   ; m0/4=t4a, m1/5=t5a
+    SUMSUB_MUL_D         3, 2, 7, 6,  6270, 15137   ; m3/7=t7a, m2/6=t6a
+    SCRATCH              1, 11, rsp+73*mmsize
+    SUMSUB_PACK_D        2, 0, 6, 4, 1              ; m2=-out3, m0=t6
+    NEGD                m2                          ; m2=out3
+    UNSCRATCH            1, 11, rsp+73*mmsize
+    SUMSUB_PACK_D        3, 1, 7, 5, 4              ; m3=out12, m1=t7
+    SCRATCH              2, 11, rsp+73*mmsize
+    UNSCRATCH            2, 12, rsp+74*mmsize       ; t11a
+    SCRATCH              3, 12, rsp+74*mmsize
+
+    UNSCRATCH            3, 8, rsp+70*mmsize        ; t10a
+    mova                m4, [rsp+65*mmsize]         ; t8a
+    mova                m5, [rsp+66*mmsize]         ; t9a
+    SUMSUB_BA         d, 3, 4, 6                    ; m3=-out1, m4=t10
+    NEGD                m3                          ; m3=out1
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=out14, m5=t11
+    UNSCRATCH            6, 9, rsp+71*mmsize        ; t0
+    UNSCRATCH            7, 14, rsp+76*mmsize       ; t14a
+    SCRATCH              3, 9, rsp+71*mmsize
+    SCRATCH              2, 14, rsp+76*mmsize
+
+    SUMSUB_MUL           1, 0, 2, 3, 11585, 11585   ; m1=out4, m0=out11
+    mova   [rsp+65*mmsize], m0
+    SUMSUB_MUL           5, 4, 2, 3, 11585, 11585   ; m5=out6, m4=out9
+    UNSCRATCH            0, 15, rsp+77*mmsize       ; t15a
+    SUMSUB_MUL           7, 0, 2, 3, 11585, m11585  ; m7=out10, m0=out5
+
+    mova                m2, [rsp+68*mmsize]         ; t2
+    SUMSUB_BA         d, 2, 6, 3                    ; m2=out0, m6=t2a
+    SCRATCH              2, 8, rsp+70*mmsize
+    mova                m2, [rsp+67*mmsize]         ; t1
+    mova                m3, [rsp+69*mmsize]         ; t3
+    mova   [rsp+67*mmsize], m7
+    SUMSUB_BA         d, 3, 2, 7                    ; m3=-out15, m2=t3a
+    NEGD                m3                          ; m3=out15
+    SCRATCH              3, 15, rsp+77*mmsize
+    SUMSUB_MUL           6, 2, 7, 3, 11585, m11585  ; m6=out8, m2=out7
+    mova                m7, [rsp+67*mmsize]
+
+    SWAP                 0, 1
+    SWAP                 2, 5, 4, 6, 7, 3
+%endmacro
+
+%macro IADST16_FN 7
+cglobal vp9_%1_%4_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \
+                                70 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                dst, stride, block, eob
+    mova                m0, [pw_1023]
+
+.body:
+    mova   [rsp+64*mmsize], m0
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [%7_16x16]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [%7_16x16+cntq-1]
+%endif
+    mov              skipd, 4
+    sub              skipd, cntd
+    mov               ptrq, rsp
+.loop_1:
+    %2_1D           blockq
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 1*mmsize], m0
+    mova  [ptrq+ 5*mmsize], m1
+    mova  [ptrq+ 9*mmsize], m2
+    mova  [ptrq+13*mmsize], m3
+    mova                m7, [rsp+65*mmsize]
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 2*mmsize], m4
+    mova  [ptrq+ 6*mmsize], m5
+    mova  [ptrq+10*mmsize], m6
+    mova  [ptrq+14*mmsize], m7
+    UNSCRATCH               0, 8, rsp+(%3+0)*mmsize
+    UNSCRATCH               1, 9, rsp+(%3+1)*mmsize
+    UNSCRATCH               2, 10, rsp+(%3+2)*mmsize
+    UNSCRATCH               3, 11, rsp+(%3+3)*mmsize
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 4*mmsize], m1
+    mova  [ptrq+ 8*mmsize], m2
+    mova  [ptrq+12*mmsize], m3
+    UNSCRATCH               4, 12, rsp+(%3+4)*mmsize
+    UNSCRATCH               5, 13, rsp+(%3+5)*mmsize
+    UNSCRATCH               6, 14, rsp+(%3+6)*mmsize
+    UNSCRATCH               7, 15, rsp+(%3+7)*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 3*mmsize], m4
+    mova  [ptrq+ 7*mmsize], m5
+    mova  [ptrq+11*mmsize], m6
+    mova  [ptrq+15*mmsize], m7
+    add               ptrq, 16 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    mova   [ptrq+mmsize*4], m0
+    mova   [ptrq+mmsize*5], m0
+    mova   [ptrq+mmsize*6], m0
+    mova   [ptrq+mmsize*7], m0
+    add               ptrq, 8 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 4
+    mov               ptrq, rsp
+.loop_2:
+    %5_1D             ptrq
+
+    pxor                m7, m7
+    lea               dstq, [dstq+strideq*4]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+64*mmsize], [pd_32], 6
+    lea               dstq, [dstq+strideq*4]
+    mova                m0, [rsp+65*mmsize]
+    mova                m1, [rsp+64*mmsize]
+    mova                m2, [pd_32]
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+%if ARCH_X86_64
+    DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst
+%else
+    mov               dstq, dstm
+%endif
+    UNSCRATCH               0, 8, rsp+(%6+0)*mmsize
+    UNSCRATCH               4, 9, rsp+(%6+1)*mmsize
+    UNSCRATCH               5, 10, rsp+(%6+2)*mmsize
+    UNSCRATCH               3, 11, rsp+(%6+3)*mmsize
+    ROUND_AND_STORE_4x4  0, 4, 5, 3, m7, m1, m2, 6
+%if ARCH_X86_64
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea               dstq, [dstbakq+stride3q*4]
+%else
+    lea               dstq, [dstq+stride3q*4]
+%endif
+    UNSCRATCH               4, 12, rsp+(%6+4)*mmsize
+    UNSCRATCH               5, 13, rsp+(%6+5)*mmsize
+    UNSCRATCH               6, 14, rsp+(%6+6)*mmsize
+    UNSCRATCH               0, 15, rsp+(%6+7)*mmsize
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+    add               ptrq, mmsize
+%if ARCH_X86_64
+    add            dstbakq, 8
+    mov               dstq, dstbakq
+%else
+    add         dword dstm, 8
+    mov               dstq, dstm
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m7 is still zero
+    ZERO_BLOCK blockq-4*mmsize, 64, 16, m7
+    RET
+
+cglobal vp9_%1_%4_16x16_add_12, 4, 6 + ARCH_X86_64, 16, \
+                                70 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                dst, stride, block, eob
+    mova                m0, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_%4_16x16_add_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+IADST16_FN idct,  IDCT16,  67, iadst, IADST16, 70, row
+IADST16_FN iadst, IADST16, 70, idct,  IDCT16,  67, col
+IADST16_FN iadst, IADST16, 70, iadst, IADST16, 70, default
+
+%macro IDCT32_1D 2-3 8 * mmsize; pass[1/2], src, src_stride
+    IDCT16_1D %2, 2 * %3, 272, 257
+%if ARCH_X86_64
+    mova  [rsp+257*mmsize], m8
+    mova  [rsp+258*mmsize], m9
+    mova  [rsp+259*mmsize], m10
+    mova  [rsp+260*mmsize], m11
+    mova  [rsp+261*mmsize], m12
+    mova  [rsp+262*mmsize], m13
+    mova  [rsp+263*mmsize], m14
+    mova  [rsp+264*mmsize], m15
+%endif
+    mova  [rsp+265*mmsize], m0
+    mova  [rsp+266*mmsize], m1
+    mova  [rsp+267*mmsize], m2
+    mova  [rsp+268*mmsize], m3
+    mova  [rsp+269*mmsize], m4
+    mova  [rsp+270*mmsize], m5
+    mova  [rsp+271*mmsize], m6
+
+    ; r257-260: t0-3
+    ; r265-272: t4/5a/6a/7/8/9a/10/11a
+    ; r261-264: t12a/13/14a/15
+    ; r273-274 is free as scratch space, and 275-282 mirrors m8-15 on 32bit
+
+    mova                m0, [%2+ 1*%3]              ; in1
+    mova                m1, [%2+15*%3]              ; in15
+    mova                m2, [%2+17*%3]              ; in17
+    mova                m3, [%2+31*%3]              ; in31
+    SUMSUB_MUL           0, 3, 4, 5, 16364,  804    ; m0=t31a, m3=t16a
+    SUMSUB_MUL           2, 1, 4, 5, 11003, 12140   ; m2=t30a, m1=t17a
+    SUMSUB_BA         d, 1, 3, 4                    ; m1=t16, m3=t17
+    SUMSUB_BA         d, 2, 0, 4                    ; m2=t31, m0=t30
+    SUMSUB_MUL           0, 3, 4, 5, 16069,  3196   ; m0=t30a, m3=t17a
+    SCRATCH              0, 8, rsp+275*mmsize
+    SCRATCH              2, 9, rsp+276*mmsize
+
+    ; end of stage 1-3 first quart
+
+    mova                m0, [%2+ 7*%3]              ; in7
+    mova                m2, [%2+ 9*%3]              ; in9
+    mova                m4, [%2+23*%3]              ; in23
+    mova                m5, [%2+25*%3]              ; in25
+    SUMSUB_MUL           2, 4, 6, 7, 14811,  7005   ; m2=t29a, m4=t18a
+    SUMSUB_MUL           5, 0, 6, 7,  5520, 15426   ; m5=t28a, m0=t19a
+    SUMSUB_BA         d, 4, 0, 6                    ; m4=t19, m0=t18
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=t28, m5=t29
+    SUMSUB_MUL           5, 0, 6, 7,  3196, m16069  ; m5=t29a, m0=t18a
+
+    ; end of stage 1-3 second quart
+
+    SUMSUB_BA         d, 4, 1, 6                    ; m4=t16a, m1=t19a
+    SUMSUB_BA         d, 0, 3, 6                    ; m0=t17, m3=t18
+    UNSCRATCH            6, 8, rsp+275*mmsize       ; t30a
+    UNSCRATCH            7, 9, rsp+276*mmsize       ; t31
+    mova  [rsp+273*mmsize], m4
+    mova  [rsp+274*mmsize], m0
+    SUMSUB_BA         d, 2, 7, 0                    ; m2=t31a, m7=t28a
+    SUMSUB_BA         d, 5, 6, 0                    ; m5=t30, m6=t29
+    SUMSUB_MUL           6, 3, 0, 4, 15137,  6270   ; m6=t29a, m3=t18a
+    SUMSUB_MUL           7, 1, 0, 4, 15137,  6270   ; m7=t28, m1=t19
+    SCRATCH              3, 10, rsp+277*mmsize
+    SCRATCH              1, 11, rsp+278*mmsize
+    SCRATCH              7, 12, rsp+279*mmsize
+    SCRATCH              6, 13, rsp+280*mmsize
+    SCRATCH              5, 14, rsp+281*mmsize
+    SCRATCH              2, 15, rsp+282*mmsize
+
+    ; end of stage 4-5 first half
+
+    mova                m0, [%2+ 5*%3]              ; in5
+    mova                m1, [%2+11*%3]              ; in11
+    mova                m2, [%2+21*%3]              ; in21
+    mova                m3, [%2+27*%3]              ; in27
+    SUMSUB_MUL           0, 3, 4, 5, 15893,  3981   ; m0=t27a, m3=t20a
+    SUMSUB_MUL           2, 1, 4, 5,  8423, 14053   ; m2=t26a, m1=t21a
+    SUMSUB_BA         d, 1, 3, 4                    ; m1=t20, m3=t21
+    SUMSUB_BA         d, 2, 0, 4                    ; m2=t27, m0=t26
+    SUMSUB_MUL           0, 3, 4, 5,  9102, 13623   ; m0=t26a, m3=t21a
+    SCRATCH              0, 8, rsp+275*mmsize
+    SCRATCH              2, 9, rsp+276*mmsize
+
+    ; end of stage 1-3 third quart
+
+    mova                m0, [%2+ 3*%3]              ; in3
+    mova                m2, [%2+13*%3]              ; in13
+    mova                m4, [%2+19*%3]              ; in19
+    mova                m5, [%2+29*%3]              ; in29
+    SUMSUB_MUL           2, 4, 6, 7, 13160,  9760   ; m2=t25a, m4=t22a
+    SUMSUB_MUL           5, 0, 6, 7,  2404, 16207   ; m5=t24a, m0=t23a
+    SUMSUB_BA         d, 4, 0, 6                    ; m4=t23, m0=t22
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=t24, m5=t25
+    SUMSUB_MUL           5, 0, 6, 7, 13623, m9102   ; m5=t25a, m0=t22a
+
+    ; end of stage 1-3 fourth quart
+
+    SUMSUB_BA         d, 1, 4, 6                    ; m1=t23a, m4=t20a
+    SUMSUB_BA         d, 3, 0, 6                    ; m3=t22, m0=t21
+    UNSCRATCH            6, 8, rsp+275*mmsize       ; t26a
+    UNSCRATCH            7, 9, rsp+276*mmsize       ; t27
+    SCRATCH              3, 8, rsp+275*mmsize
+    SCRATCH              1, 9, rsp+276*mmsize
+    SUMSUB_BA         d, 7, 2, 1                    ; m7=t24a, m2=t27a
+    SUMSUB_BA         d, 6, 5, 1                    ; m6=t25, m5=t26
+    SUMSUB_MUL           2, 4, 1, 3,  6270, m15137  ; m2=t27, m4=t20
+    SUMSUB_MUL           5, 0, 1, 3,  6270, m15137  ; m5=t26a, m0=t21a
+
+    ; end of stage 4-5 second half
+
+    UNSCRATCH            1, 12, rsp+279*mmsize      ; t28
+    UNSCRATCH            3, 13, rsp+280*mmsize      ; t29a
+    SCRATCH              4, 12, rsp+279*mmsize
+    SCRATCH              0, 13, rsp+280*mmsize
+    SUMSUB_BA         d, 5, 3, 0                    ; m5=t29, m3=t26
+    SUMSUB_BA         d, 2, 1, 0                    ; m2=t28a, m1=t27a
+    UNSCRATCH            0, 14, rsp+281*mmsize      ; t30
+    UNSCRATCH            4, 15, rsp+282*mmsize      ; t31a
+    SCRATCH              2, 14, rsp+281*mmsize
+    SCRATCH              5, 15, rsp+282*mmsize
+    SUMSUB_BA         d, 6, 0, 2                    ; m6=t30a, m0=t25a
+    SUMSUB_BA         d, 7, 4, 2                    ; m7=t31, m4=t24
+
+    mova                m2, [rsp+273*mmsize]        ; t16a
+    mova                m5, [rsp+274*mmsize]        ; t17
+    mova  [rsp+273*mmsize], m6
+    mova  [rsp+274*mmsize], m7
+    UNSCRATCH            6, 10, rsp+277*mmsize      ; t18a
+    UNSCRATCH            7, 11, rsp+278*mmsize      ; t19
+    SCRATCH              4, 10, rsp+277*mmsize
+    SCRATCH              0, 11, rsp+278*mmsize
+    UNSCRATCH            4, 12, rsp+279*mmsize      ; t20
+    UNSCRATCH            0, 13, rsp+280*mmsize      ; t21a
+    SCRATCH              3, 12, rsp+279*mmsize
+    SCRATCH              1, 13, rsp+280*mmsize
+    SUMSUB_BA         d, 0, 6, 1                    ; m0=t18, m6=t21
+    SUMSUB_BA         d, 4, 7, 1                    ; m4=t19a, m7=t20a
+    UNSCRATCH            3, 8, rsp+275*mmsize       ; t22
+    UNSCRATCH            1, 9, rsp+276*mmsize       ; t23a
+    SCRATCH              0, 8, rsp+275*mmsize
+    SCRATCH              4, 9, rsp+276*mmsize
+    SUMSUB_BA         d, 3, 5, 0                    ; m3=t17a, m5=t22a
+    SUMSUB_BA         d, 1, 2, 0                    ; m1=t16, m2=t23
+
+    ; end of stage 6
+
+    UNSCRATCH            0, 10, rsp+277*mmsize      ; t24
+    UNSCRATCH            4, 11, rsp+278*mmsize      ; t25a
+    SCRATCH              1, 10, rsp+277*mmsize
+    SCRATCH              3, 11, rsp+278*mmsize
+    SUMSUB_MUL           0, 2, 1, 3, 11585, 11585   ; m0=t24a, m2=t23a
+    SUMSUB_MUL           4, 5, 1, 3, 11585, 11585   ; m4=t25, m5=t22
+    UNSCRATCH            1, 12, rsp+279*mmsize      ; t26
+    UNSCRATCH            3, 13, rsp+280*mmsize      ; t27a
+    SCRATCH              0, 12, rsp+279*mmsize
+    SCRATCH              4, 13, rsp+280*mmsize
+    SUMSUB_MUL           3, 7, 0, 4, 11585, 11585   ; m3=t27, m7=t20
+    SUMSUB_MUL           1, 6, 0, 4, 11585, 11585   ; m1=t26a, m6=t21a
+
+    ; end of stage 7
+
+    mova                m0, [rsp+269*mmsize]        ; t8
+    mova                m4, [rsp+270*mmsize]        ; t9a
+    mova  [rsp+269*mmsize], m1                      ; t26a
+    mova  [rsp+270*mmsize], m3                      ; t27
+    mova                m3, [rsp+271*mmsize]        ; t10
+    SUMSUB_BA         d, 2, 0, 1                    ; m2=out8, m0=out23
+    SUMSUB_BA         d, 5, 4, 1                    ; m5=out9, m4=out22
+    SUMSUB_BA         d, 6, 3, 1                    ; m6=out10, m3=out21
+    mova                m1, [rsp+272*mmsize]        ; t11a
+    mova  [rsp+271*mmsize], m0
+    SUMSUB_BA         d, 7, 1, 0                    ; m7=out11, m1=out20
+
+%if %1 == 1
+    TRANSPOSE4x4D        2, 5, 6, 7, 0
+    mova  [ptrq+ 2*mmsize], m2
+    mova  [ptrq+10*mmsize], m5
+    mova  [ptrq+18*mmsize], m6
+    mova  [ptrq+26*mmsize], m7
+%else ; %1 == 2
+    pxor                m0, m0
+    lea               dstq, [dstq+strideq*8]
+    ROUND_AND_STORE_4x4  2, 5, 6, 7, m0, [rsp+256*mmsize], [pd_32], 6
+%endif
+    mova                m2, [rsp+271*mmsize]
+%if %1 == 1
+    TRANSPOSE4x4D        1, 3, 4, 2, 0
+    mova  [ptrq+ 5*mmsize], m1
+    mova  [ptrq+13*mmsize], m3
+    mova  [ptrq+21*mmsize], m4
+    mova  [ptrq+29*mmsize], m2
+%else ; %1 == 2
+    lea               dstq, [dstq+stride3q*4]
+    ROUND_AND_STORE_4x4  1, 3, 4, 2, m0, [rsp+256*mmsize], [pd_32], 6
+%endif
+
+    ; end of last stage + store for out8-11 and out20-23
+
+    UNSCRATCH            0, 9, rsp+276*mmsize       ; t19a
+    UNSCRATCH            1, 8, rsp+275*mmsize       ; t18
+    UNSCRATCH            2, 11, rsp+278*mmsize      ; t17a
+    UNSCRATCH            3, 10, rsp+277*mmsize      ; t16
+    mova                m7, [rsp+261*mmsize]        ; t12a
+    mova                m6, [rsp+262*mmsize]        ; t13
+    mova                m5, [rsp+263*mmsize]        ; t14a
+    SUMSUB_BA         d, 0, 7, 4                    ; m0=out12, m7=out19
+    SUMSUB_BA         d, 1, 6, 4                    ; m1=out13, m6=out18
+    SUMSUB_BA         d, 2, 5, 4                    ; m2=out14, m5=out17
+    mova                m4, [rsp+264*mmsize]        ; t15
+    SCRATCH              7, 8, rsp+275*mmsize
+    SUMSUB_BA         d, 3, 4, 7                    ; m3=out15, m4=out16
+
+%if %1 == 1
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 3*mmsize], m0
+    mova  [ptrq+11*mmsize], m1
+    mova  [ptrq+19*mmsize], m2
+    mova  [ptrq+27*mmsize], m3
+%else ; %1 == 2
+%if ARCH_X86_64
+    SWAP                 7, 9
+    lea               dstq, [dstbakq+stride3q*4]
+%else ; x86-32
+    pxor                m7, m7
+    mov               dstq, dstm
+    lea               dstq, [dstq+stride3q*4]
+%endif
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+    UNSCRATCH            0, 8, rsp+275*mmsize       ; out19
+%if %1 == 1
+    TRANSPOSE4x4D        4, 5, 6, 0, 7
+    mova  [ptrq+ 4*mmsize], m4
+    mova  [ptrq+12*mmsize], m5
+    mova  [ptrq+20*mmsize], m6
+    mova  [ptrq+28*mmsize], m0
+%else ; %1 == 2
+    lea               dstq, [dstq+strideq*4]
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+
+    ; end of last stage + store for out12-19
+
+%if ARCH_X86_64
+    SWAP                 7, 8
+%endif
+    mova                m7, [rsp+257*mmsize]        ; t0
+    mova                m6, [rsp+258*mmsize]        ; t1
+    mova                m5, [rsp+259*mmsize]        ; t2
+    mova                m4, [rsp+260*mmsize]        ; t3
+    mova                m0, [rsp+274*mmsize]        ; t31
+    mova                m1, [rsp+273*mmsize]        ; t30a
+    UNSCRATCH            2, 15, rsp+282*mmsize      ; t29
+    SUMSUB_BA         d, 0, 7, 3                    ; m0=out0, m7=out31
+    SUMSUB_BA         d, 1, 6, 3                    ; m1=out1, m6=out30
+    SUMSUB_BA         d, 2, 5, 3                    ; m2=out2, m5=out29
+    SCRATCH              0, 9, rsp+276*mmsize
+    UNSCRATCH            3, 14, rsp+281*mmsize      ; t28a
+    SUMSUB_BA         d, 3, 4, 0                    ; m3=out3, m4=out28
+
+%if %1 == 1
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 7*mmsize], m4
+    mova  [ptrq+15*mmsize], m5
+    mova  [ptrq+23*mmsize], m6
+    mova  [ptrq+31*mmsize], m7
+%else ; %1 == 2
+%if ARCH_X86_64
+    SWAP                 0, 8
+%else ; x86-32
+    pxor                m0, m0
+%endif
+    lea               dstq, [dstq+stride3q*4]
+    ROUND_AND_STORE_4x4  4, 5, 6, 7, m0, [rsp+256*mmsize], [pd_32], 6
+%endif
+    UNSCRATCH            7, 9, rsp+276*mmsize       ; out0
+%if %1 == 1
+    TRANSPOSE4x4D        7, 1, 2, 3, 0
+    mova  [ptrq+ 0*mmsize], m7
+    mova  [ptrq+ 8*mmsize], m1
+    mova  [ptrq+16*mmsize], m2
+    mova  [ptrq+24*mmsize], m3
+%else ; %1 == 2
+%if ARCH_X86_64
+    DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst
+%else ; x86-32
+    mov               dstq, dstm
+%endif
+    ROUND_AND_STORE_4x4  7, 1, 2, 3, m0, [rsp+256*mmsize], [pd_32], 6
+%if ARCH_X86_64
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+%endif
+%endif
+
+    ; end of last stage + store for out0-3 and out28-31
+
+%if ARCH_X86_64
+    SWAP                 0, 8
+%endif
+    mova                m7, [rsp+265*mmsize]        ; t4
+    mova                m6, [rsp+266*mmsize]        ; t5a
+    mova                m5, [rsp+267*mmsize]        ; t6a
+    mova                m4, [rsp+268*mmsize]        ; t7
+    mova                m0, [rsp+270*mmsize]        ; t27
+    mova                m1, [rsp+269*mmsize]        ; t26a
+    UNSCRATCH            2, 13, rsp+280*mmsize      ; t25
+    SUMSUB_BA         d, 0, 7, 3                    ; m0=out4, m7=out27
+    SUMSUB_BA         d, 1, 6, 3                    ; m1=out5, m6=out26
+    SUMSUB_BA         d, 2, 5, 3                    ; m2=out6, m5=out25
+    UNSCRATCH            3, 12, rsp+279*mmsize      ; t24a
+    SCRATCH              7, 9, rsp+276*mmsize
+    SUMSUB_BA         d, 3, 4, 7                    ; m3=out7, m4=out24
+
+%if %1 == 1
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 1*mmsize], m0
+    mova  [ptrq+ 9*mmsize], m1
+    mova  [ptrq+17*mmsize], m2
+    mova  [ptrq+25*mmsize], m3
+%else ; %1 == 2
+%if ARCH_X86_64
+    SWAP                 7, 8
+    lea               dstq, [dstbakq+strideq*4]
+%else ; x86-32
+    pxor                m7, m7
+    lea               dstq, [dstq+strideq*4]
+%endif
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+    UNSCRATCH            0, 9, rsp+276*mmsize       ; out27
+%if %1 == 1
+    TRANSPOSE4x4D        4, 5, 6, 0, 7
+    mova  [ptrq+ 6*mmsize], m4
+    mova  [ptrq+14*mmsize], m5
+    mova  [ptrq+22*mmsize], m6
+    mova  [ptrq+30*mmsize], m0
+%else ; %1 == 2
+%if ARCH_X86_64
+    lea               dstq, [dstbakq+stride3q*8]
+%else
+    mov               dstq, dstm
+    lea               dstq, [dstq+stride3q*8]
+%endif
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+
+    ; end of last stage + store for out4-7 and out24-27
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_32x32_add_10, 4, 6 + ARCH_X86_64, 16, \
+                                    275 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_1023]
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - the 10bit version can be done entirely in 32bit, since the max
+    ; coef values are 17+sign bit, and the coef is 14bit, so 31+sign easily
+    ; fits in 32bit
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m2, m2
+    DC_ONLY              6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 32
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,          mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+mmsize*2, mmsize
+    add               dstq, strideq
+    dec               cntd
+    jg .loop_dc
+    RET
+
+.idctfull:
+    mova  [rsp+256*mmsize], m0
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [default_32x32]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [default_32x32+cntq-1]
+%endif
+    mov              skipd, 8
+    sub              skipd, cntd
+    mov               ptrq, rsp
+.loop_1:
+    IDCT32_1D            1, blockq
+
+    add               ptrq, 32 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    shl              skipd, 2
+    lea             blockq, [blockq+skipq*(mmsize/4)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    mova   [ptrq+mmsize*4], m0
+    mova   [ptrq+mmsize*5], m0
+    mova   [ptrq+mmsize*6], m0
+    mova   [ptrq+mmsize*7], m0
+    add               ptrq, 8 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 8
+    mov               ptrq, rsp
+.loop_2:
+    IDCT32_1D            2, ptrq
+
+    add               ptrq, mmsize
+%if ARCH_X86_64
+    add            dstbakq, 8
+    mov               dstq, dstbakq
+%else
+    add         dword dstm, 8
+    mov               dstq, dstm
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m7 is still zero
+    ZERO_BLOCK blockq-8*mmsize, 128, 32, m7
+    RET
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_32x32_add_12, 4, 6 + ARCH_X86_64, 16, \
+                                    275 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_4095]
+    cmp               eobd, 1
+    jg mangle(private_prefix %+ _ %+ vp9_idct_idct_32x32_add_10 %+ SUFFIX).idctfull
+
+    ; dc-only - unfortunately, this one can overflow, since coefs are 19+sign
+    ; bpp, and 19+14+sign does not fit in 32bit, so we do 2-stage multiplies
+    DEFINE_ARGS dst, stride, block, coef, coefl
+    pxor                m2, m2
+    DC_ONLY_64BIT        6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 32
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,          mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+mmsize*2, mmsize
+    add               dstq, strideq
+    dec               cntd
+    jg .loop_dc
+    RET
diff --git a/libavcodec/x86/vp9itxfm_template.asm b/libavcodec/x86/vp9itxfm_template.asm
new file mode 100644
index 00000000..d2f2257d
--- /dev/null
+++ b/libavcodec/x86/vp9itxfm_template.asm
@@ -0,0 +1,142 @@
+;******************************************************************************
+;* VP9 IDCT SIMD optimizations
+;*
+;* Copyright (C) 2013 Clément Bœsch <u pkh me>
+;* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%macro VP9_IWHT4_1D 0
+    SWAP                 1, 2, 3
+    paddw               m0, m2
+    psubw               m3, m1
+    psubw               m4, m0, m3
+    psraw               m4, 1
+    psubw               m5, m4, m1
+    SWAP                 5, 1
+    psubw               m4, m2
+    SWAP                 4, 2
+    psubw               m0, m1
+    paddw               m3, m2
+    SWAP                 3, 2, 1
+%endmacro
+
+; (a*x + b*y + round) >> shift
+%macro VP9_MULSUB_2W_2X 5 ; dst1, dst2/src, round, coefs1, coefs2
+    pmaddwd            m%1, m%2, %4
+    pmaddwd            m%2,  %5
+    paddd              m%1,  %3
+    paddd              m%2,  %3
+    psrad              m%1,  14
+    psrad              m%2,  14
+%endmacro
+
+%macro VP9_MULSUB_2W_4X 7 ; dst1, dst2, coef1, coef2, rnd, tmp1/src, tmp2
+    VP9_MULSUB_2W_2X    %7,  %6,  %5, [pw_m%3_%4], [pw_%4_%3]
+    VP9_MULSUB_2W_2X    %1,  %2,  %5, [pw_m%3_%4], [pw_%4_%3]
+    packssdw           m%1, m%7
+    packssdw           m%2, m%6
+%endmacro
+
+%macro VP9_UNPACK_MULSUB_2W_4X 7-9 ; dst1, dst2, (src1, src2,) coef1, coef2, rnd, tmp1, tmp2
+%if %0 == 7
+    punpckhwd          m%6, m%2, m%1
+    punpcklwd          m%2, m%1
+    VP9_MULSUB_2W_4X   %1, %2, %3, %4, %5, %6, %7
+%else
+    punpckhwd          m%8, m%4, m%3
+    punpcklwd          m%2, m%4, m%3
+    VP9_MULSUB_2W_4X   %1, %2, %5, %6, %7, %8, %9
+%endif
+%endmacro
+
+%macro VP9_IDCT4_1D_FINALIZE 0
+    SUMSUB_BA            w, 3, 2, 4                         ; m3=t3+t0, m2=-t3+t0
+    SUMSUB_BA            w, 1, 0, 4                         ; m1=t2+t1, m0=-t2+t1
+    SWAP                 0, 3, 2                            ; 3102 -> 0123
+%endmacro
+
+%macro VP9_IDCT4_1D 0
+%if cpuflag(ssse3)
+    SUMSUB_BA            w, 2, 0, 4                         ; m2=IN(0)+IN(2) m0=IN(0)-IN(2)
+    pmulhrsw            m2, m6                              ; m2=t0
+    pmulhrsw            m0, m6                              ; m0=t1
+%else ; <= sse2
+    VP9_UNPACK_MULSUB_2W_4X 0, 2, 11585, 11585, m7, 4, 5    ; m0=t1, m1=t0
+%endif
+    VP9_UNPACK_MULSUB_2W_4X 1, 3, 15137, 6270, m7, 4, 5     ; m1=t2, m3=t3
+    VP9_IDCT4_1D_FINALIZE
+%endmacro
+
+%macro VP9_IADST4_1D 0
+    movq2dq           xmm0, m0
+    movq2dq           xmm1, m1
+    movq2dq           xmm2, m2
+    movq2dq           xmm3, m3
+%if cpuflag(ssse3)
+    paddw               m3, m0
+%endif
+    punpcklwd         xmm0, xmm1
+    punpcklwd         xmm2, xmm3
+    pmaddwd           xmm1, xmm0, [pw_5283_13377]
+    pmaddwd           xmm4, xmm0, [pw_9929_13377]
+%if notcpuflag(ssse3)
+    pmaddwd           xmm6, xmm0, [pw_13377_0]
+%endif
+    pmaddwd           xmm0, [pw_15212_m13377]
+    pmaddwd           xmm3, xmm2, [pw_15212_9929]
+%if notcpuflag(ssse3)
+    pmaddwd           xmm7, xmm2, [pw_m13377_13377]
+%endif
+    pmaddwd           xmm2, [pw_m5283_m15212]
+%if cpuflag(ssse3)
+    psubw               m3, m2
+%else
+    paddd             xmm6, xmm7
+%endif
+    paddd             xmm0, xmm2
+    paddd             xmm3, xmm5
+    paddd             xmm2, xmm5
+%if notcpuflag(ssse3)
+    paddd             xmm6, xmm5
+%endif
+    paddd             xmm1, xmm3
+    paddd             xmm0, xmm3
+    paddd             xmm4, xmm2
+    psrad             xmm1, 14
+    psrad             xmm0, 14
+    psrad             xmm4, 14
+%if cpuflag(ssse3)
+    pmulhrsw            m3, [pw_13377x2]        ; out2
+%else
+    psrad             xmm6, 14
+%endif
+    packssdw          xmm0, xmm0
+    packssdw          xmm1, xmm1
+    packssdw          xmm4, xmm4
+%if notcpuflag(ssse3)
+    packssdw          xmm6, xmm6
+%endif
+    movdq2q             m0, xmm0                ; out3
+    movdq2q             m1, xmm1                ; out0
+    movdq2q             m2, xmm4                ; out1
+%if notcpuflag(ssse3)
+    movdq2q             m3, xmm6                ; out2
+%endif
+    SWAP                 0, 1, 2, 3
+%endmacro
diff --git a/libavcodec/x86/vp9lpf_16bpp.asm b/libavcodec/x86/vp9lpf_16bpp.asm
new file mode 100644
index 00000000..c15437b8
--- /dev/null
+++ b/libavcodec/x86/vp9lpf_16bpp.asm
@@ -0,0 +1,823 @@
+;******************************************************************************
+;* VP9 loop filter SIMD optimizations
+;*
+;* Copyright (C) 2015 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pw_511: times 16 dw 511
+pw_2047: times 16 dw 2047
+pw_16384: times 16 dw 16384
+pw_m512: times 16 dw -512
+pw_m2048: times 16 dw -2048
+
+cextern pw_1
+cextern pw_3
+cextern pw_4
+cextern pw_8
+cextern pw_16
+cextern pw_256
+cextern pw_1023
+cextern pw_4095
+cextern pw_m1
+
+SECTION .text
+
+%macro SCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%if %0 == 4
+%define reg_%4 m%2
+%endif
+%else
+    mova              [%3], m%1
+%if %0 == 4
+%define reg_%4 [%3]
+%endif
+%endif
+%endmacro
+
+%macro UNSCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%if %0 == 4
+%undef reg_%4
+%endif
+%endmacro
+
+%macro PRELOAD 2-3
+%if ARCH_X86_64
+    mova               m%1, [%2]
+%if %0 == 3
+%define reg_%3 m%1
+%endif
+%elif %0 == 3
+%define reg_%3 [%2]
+%endif
+%endmacro
+
+; calulate p or q portion of flat8out
+%macro FLAT8OUT_HALF 0
+    psubw               m4, m0                      ; q4-q0
+    psubw               m5, m0                      ; q5-q0
+    psubw               m6, m0                      ; q6-q0
+    psubw               m7, m0                      ; q7-q0
+    ABS2                m4, m5, m2, m3              ; abs(q4-q0) | abs(q5-q0)
+    ABS2                m6, m7, m2, m3              ; abs(q6-q0) | abs(q7-q0)
+    pcmpgtw             m4, reg_F                   ; abs(q4-q0) > F
+    pcmpgtw             m5, reg_F                   ; abs(q5-q0) > F
+    pcmpgtw             m6, reg_F                   ; abs(q6-q0) > F
+    pcmpgtw             m7, reg_F                   ; abs(q7-q0) > F
+    por                 m5, m4
+    por                 m7, m6
+    por                 m7, m5                      ; !flat8out, q portion
+%endmacro
+
+; calculate p or q portion of flat8in/hev/fm (excluding mb_edge condition)
+%macro FLAT8IN_HALF 1
+%if %1 > 4
+    psubw               m4, m3, m0                  ; q3-q0
+    psubw               m5, m2, m0                  ; q2-q0
+    ABS2                m4, m5, m6, m7              ; abs(q3-q0) | abs(q2-q0)
+    pcmpgtw             m4, reg_F                   ; abs(q3-q0) > F
+    pcmpgtw             m5, reg_F                   ; abs(q2-q0) > F
+%endif
+    psubw               m3, m2                      ; q3-q2
+    psubw               m2, m1                      ; q2-q1
+    ABS2                m3, m2, m6, m7              ; abs(q3-q2) | abs(q2-q1)
+    pcmpgtw             m3, reg_I                   ; abs(q3-q2) > I
+    pcmpgtw             m2, reg_I                   ; abs(q2-q1) > I
+%if %1 > 4
+    por                 m4, m5
+%endif
+    por                 m2, m3
+    psubw               m3, m1, m0                  ; q1-q0
+    ABS1                m3, m5                      ; abs(q1-q0)
+%if %1 > 4
+    pcmpgtw             m6, m3, reg_F               ; abs(q1-q0) > F
+%endif
+    pcmpgtw             m7, m3, reg_H               ; abs(q1-q0) > H
+    pcmpgtw             m3, reg_I                   ; abs(q1-q0) > I
+%if %1 > 4
+    por                 m4, m6
+%endif
+    por                 m2, m3
+%endmacro
+
+; one step in filter_14/filter_6
+;
+; take sum $reg, downshift, apply mask and write into dst
+;
+; if sub2/add1-2 are present, add/sub as appropriate to prepare for the next
+; step's sum $reg. This is omitted for the last row in each filter.
+;
+; if dont_store is set, don't write the result into memory, instead keep the
+; values in register so we can write it out later
+%macro FILTER_STEP 6-10 "", "", "", 0 ; tmp, reg, mask, shift, dst, \
+                                      ; src/sub1, sub2, add1, add2, dont_store
+    psrlw               %1, %2, %4
+    psubw               %1, %6                      ; abs->delta
+%ifnidn %7, ""
+    psubw               %2, %6
+    psubw               %2, %7
+    paddw               %2, %8
+    paddw               %2, %9
+%endif
+    pand                %1, reg_%3                  ; apply mask
+%if %10 == 1
+    paddw               %6, %1                      ; delta->abs
+%else
+    paddw               %1, %6                      ; delta->abs
+    mova              [%5], %1
+%endif
+%endmacro
+
+; FIXME avx2 versions for 16_16 and mix2_{4,8}{4,8}
+
+%macro LOOP_FILTER 3 ; dir[h/v], wd[4/8/16], bpp[10/12]
+
+%if ARCH_X86_64
+%if %2 == 16
+%assign %%num_xmm_regs 16
+%elif %2 == 8
+%assign %%num_xmm_regs 15
+%else ; %2 == 4
+%assign %%num_xmm_regs 14
+%endif ; %2
+%assign %%bak_mem 0
+%else ; ARCH_X86_32
+%assign %%num_xmm_regs 8
+%if %2 == 16
+%assign %%bak_mem 7
+%elif %2 == 8
+%assign %%bak_mem 6
+%else ; %2 == 4
+%assign %%bak_mem 5
+%endif ; %2
+%endif ; ARCH_X86_64/32
+
+%if %2 == 16
+%ifidn %1, v
+%assign %%num_gpr_regs 6
+%else ; %1 == h
+%assign %%num_gpr_regs 5
+%endif ; %1
+%assign %%wd_mem 6
+%else ; %2 == 8/4
+%assign %%num_gpr_regs 5
+%if ARCH_X86_32 && %2 == 8
+%assign %%wd_mem 2
+%else ; ARCH_X86_64 || %2 == 4
+%assign %%wd_mem 0
+%endif ; ARCH_X86_64/32 etc.
+%endif ; %2
+
+%ifidn %1, v
+%assign %%tsp_mem 0
+%elif %2 == 16 ; && %1 == h
+%assign %%tsp_mem 16
+%else ; %1 == h && %1 == 8/4
+%assign %%tsp_mem 8
+%endif ; %1/%2
+
+%assign %%off %%wd_mem
+%assign %%tspoff %%bak_mem+%%wd_mem
+%assign %%stack_mem ((%%bak_mem+%%wd_mem+%%tsp_mem)*mmsize)
+
+%if %3 == 10
+%define %%maxsgn 511
+%define %%minsgn m512
+%define %%maxusgn 1023
+%define %%maxf 4
+%else ; %3 == 12
+%define %%maxsgn 2047
+%define %%minsgn m2048
+%define %%maxusgn 4095
+%define %%maxf 16
+%endif ; %3
+
+cglobal vp9_loop_filter_%1_%2_%3, 5, %%num_gpr_regs, %%num_xmm_regs, %%stack_mem, dst, stride, E, I, H
+    ; prepare E, I and H masks
+    shl                 Ed, %3-8
+    shl                 Id, %3-8
+    shl                 Hd, %3-8
+%if cpuflag(ssse3)
+    mova                m0, [pw_256]
+%endif
+    movd                m1, Ed
+    movd                m2, Id
+    movd                m3, Hd
+%if cpuflag(ssse3)
+    pshufb              m1, m0                      ; E << (bit_depth - 8)
+    pshufb              m2, m0                      ; I << (bit_depth - 8)
+    pshufb              m3, m0                      ; H << (bit_depth - 8)
+%else
+    punpcklwd           m1, m1
+    punpcklwd           m2, m2
+    punpcklwd           m3, m3
+    pshufd              m1, m1, q0000
+    pshufd              m2, m2, q0000
+    pshufd              m3, m3, q0000
+%endif
+    SCRATCH              1,  8, rsp+(%%off+0)*mmsize,  E
+    SCRATCH              2,  9, rsp+(%%off+1)*mmsize,  I
+    SCRATCH              3, 10, rsp+(%%off+2)*mmsize,  H
+%if %2 > 4
+    PRELOAD                 11, pw_ %+ %%maxf, F
+%endif
+
+    ; set up variables to load data
+%ifidn %1, v
+    DEFINE_ARGS dst8, stride, stride3, dst0, dst4, dst12
+    lea           stride3q, [strideq*3]
+    neg            strideq
+%if %2 == 16
+    lea              dst0q, [dst8q+strideq*8]
+%else
+    lea              dst4q, [dst8q+strideq*4]
+%endif
+    neg            strideq
+%if %2 == 16
+    lea             dst12q, [dst8q+strideq*4]
+    lea              dst4q, [dst0q+strideq*4]
+%endif
+
+%if %2 == 16
+%define %%p7 dst0q
+%define %%p6 dst0q+strideq
+%define %%p5 dst0q+strideq*2
+%define %%p4 dst0q+stride3q
+%endif
+%define %%p3 dst4q
+%define %%p2 dst4q+strideq
+%define %%p1 dst4q+strideq*2
+%define %%p0 dst4q+stride3q
+%define %%q0 dst8q
+%define %%q1 dst8q+strideq
+%define %%q2 dst8q+strideq*2
+%define %%q3 dst8q+stride3q
+%if %2 == 16
+%define %%q4 dst12q
+%define %%q5 dst12q+strideq
+%define %%q6 dst12q+strideq*2
+%define %%q7 dst12q+stride3q
+%endif
+%else ; %1 == h
+    DEFINE_ARGS dst0, stride, stride3, dst4
+    lea           stride3q, [strideq*3]
+    lea              dst4q, [dst0q+strideq*4]
+
+%define %%p3 rsp+(%%tspoff+0)*mmsize
+%define %%p2 rsp+(%%tspoff+1)*mmsize
+%define %%p1 rsp+(%%tspoff+2)*mmsize
+%define %%p0 rsp+(%%tspoff+3)*mmsize
+%define %%q0 rsp+(%%tspoff+4)*mmsize
+%define %%q1 rsp+(%%tspoff+5)*mmsize
+%define %%q2 rsp+(%%tspoff+6)*mmsize
+%define %%q3 rsp+(%%tspoff+7)*mmsize
+
+%if %2 < 16
+    movu                m0, [dst0q+strideq*0-8]
+    movu                m1, [dst0q+strideq*1-8]
+    movu                m2, [dst0q+strideq*2-8]
+    movu                m3, [dst0q+stride3q -8]
+    movu                m4, [dst4q+strideq*0-8]
+    movu                m5, [dst4q+strideq*1-8]
+    movu                m6, [dst4q+strideq*2-8]
+    movu                m7, [dst4q+stride3q -8]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 12
+%else
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, [%%p0], [%%q0]
+%endif
+
+    mova            [%%p3], m0
+    mova            [%%p2], m1
+    mova            [%%p1], m2
+    mova            [%%p0], m3
+%if ARCH_X86_64
+    mova            [%%q0], m4
+%endif
+    mova            [%%q1], m5
+    mova            [%%q2], m6
+    mova            [%%q3], m7
+
+    ; FIXME investigate if we can _not_ load q0-3 below if h, and adjust register
+    ; order here accordingly
+%else ; %2 == 16
+
+%define %%p7 rsp+(%%tspoff+ 8)*mmsize
+%define %%p6 rsp+(%%tspoff+ 9)*mmsize
+%define %%p5 rsp+(%%tspoff+10)*mmsize
+%define %%p4 rsp+(%%tspoff+11)*mmsize
+%define %%q4 rsp+(%%tspoff+12)*mmsize
+%define %%q5 rsp+(%%tspoff+13)*mmsize
+%define %%q6 rsp+(%%tspoff+14)*mmsize
+%define %%q7 rsp+(%%tspoff+15)*mmsize
+
+    mova                m0, [dst0q+strideq*0-16]
+    mova                m1, [dst0q+strideq*1-16]
+    mova                m2, [dst0q+strideq*2-16]
+    mova                m3, [dst0q+stride3q -16]
+    mova                m4, [dst4q+strideq*0-16]
+    mova                m5, [dst4q+strideq*1-16]
+%if ARCH_X86_64
+    mova                m6, [dst4q+strideq*2-16]
+%endif
+    mova                m7, [dst4q+stride3q -16]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 12
+%else
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, [dst4q+strideq*2-16], [%%p3], 1
+%endif
+
+    mova            [%%p7], m0
+    mova            [%%p6], m1
+    mova            [%%p5], m2
+    mova            [%%p4], m3
+%if ARCH_X86_64
+    mova            [%%p3], m4
+%endif
+    mova            [%%p2], m5
+    mova            [%%p1], m6
+    mova            [%%p0], m7
+
+    mova                m0, [dst0q+strideq*0]
+    mova                m1, [dst0q+strideq*1]
+    mova                m2, [dst0q+strideq*2]
+    mova                m3, [dst0q+stride3q ]
+    mova                m4, [dst4q+strideq*0]
+    mova                m5, [dst4q+strideq*1]
+%if ARCH_X86_64
+    mova                m6, [dst4q+strideq*2]
+%endif
+    mova                m7, [dst4q+stride3q ]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 12
+%else
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, [dst4q+strideq*2], [%%q4], 1
+%endif
+
+    mova            [%%q0], m0
+    mova            [%%q1], m1
+    mova            [%%q2], m2
+    mova            [%%q3], m3
+%if ARCH_X86_64
+    mova            [%%q4], m4
+%endif
+    mova            [%%q5], m5
+    mova            [%%q6], m6
+    mova            [%%q7], m7
+
+    ; FIXME investigate if we can _not_ load q0|q4-7 below if h, and adjust register
+    ; order here accordingly
+%endif ; %2
+%endif ; %1
+
+    ; load q0|q4-7 data
+    mova                m0, [%%q0]
+%if %2 == 16
+    mova                m4, [%%q4]
+    mova                m5, [%%q5]
+    mova                m6, [%%q6]
+    mova                m7, [%%q7]
+
+    ; flat8out q portion
+    FLAT8OUT_HALF
+    SCRATCH              7, 15, rsp+(%%off+6)*mmsize, F8O
+%endif
+
+    ; load q1-3 data
+    mova                m1, [%%q1]
+    mova                m2, [%%q2]
+    mova                m3, [%%q3]
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flatout[q]
+    ; m12-14=free
+    ; m0-3=q0-q3
+    ; m4-7=free
+
+    ; flat8in|fm|hev q portion
+    FLAT8IN_HALF        %2
+    SCRATCH              7, 13, rsp+(%%off+4)*mmsize, HEV
+%if %2 > 4
+    SCRATCH              4, 14, rsp+(%%off+5)*mmsize, F8I
+%endif
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flat8out[q]
+    ; r10[m13]=hev[q]
+    ; r11[m14]=!flat8in[q]
+    ; m2=!fm[q]
+    ; m0,1=q0-q1
+    ; m2-7=free
+    ; m12=free
+
+    ; load p0-1
+    mova                m3, [%%p0]
+    mova                m4, [%%p1]
+
+    ; fm mb_edge portion
+    psubw               m5, m3, m0                  ; q0-p0
+    psubw               m6, m4, m1                  ; q1-p1
+%if ARCH_X86_64
+    ABS2                m5, m6, m7, m12             ; abs(q0-p0) | abs(q1-p1)
+%else
+    ABS1                m5, m7                      ; abs(q0-p0)
+    ABS1                m6, m7                      ; abs(q1-p1)
+%endif
+    paddw               m5, m5
+    psraw               m6, 1
+    paddw               m6, m5                      ; abs(q0-p0)*2+(abs(q1-p1)>>1)
+    pcmpgtw             m6, reg_E
+    por                 m2, m6
+    SCRATCH              2, 12, rsp+(%%off+3)*mmsize, FM
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flat8out[q]
+    ; r10[m13]=hev[q]
+    ; r11[m14]=!flat8in[q]
+    ; r12[m12]=!fm[q]
+    ; m3-4=q0-1
+    ; m0-2/5-7=free
+
+    ; load p4-7 data
+    SWAP                 3, 0                       ; p0
+    SWAP                 4, 1                       ; p1
+%if %2 == 16
+    mova                m7, [%%p7]
+    mova                m6, [%%p6]
+    mova                m5, [%%p5]
+    mova                m4, [%%p4]
+
+    ; flat8out p portion
+    FLAT8OUT_HALF
+    por                 m7, reg_F8O
+    SCRATCH              7, 15, rsp+(%%off+6)*mmsize, F8O
+%endif
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flat8out
+    ; r10[m13]=hev[q]
+    ; r11[m14]=!flat8in[q]
+    ; r12[m12]=!fm[q]
+    ; m0=p0
+    ; m1-7=free
+
+    ; load p2-3 data
+    mova                m2, [%%p2]
+    mova                m3, [%%p3]
+
+    ; flat8in|fm|hev p portion
+    FLAT8IN_HALF        %2
+    por                 m7, reg_HEV
+%if %2 > 4
+    por                 m4, reg_F8I
+%endif
+    por                 m2, reg_FM
+%if %2 > 4
+    por                 m4, m2                      ; !flat8|!fm
+%if %2 == 16
+    por                 m5, m4, reg_F8O             ; !flat16|!fm
+    pandn               m2, m4                      ; filter4_mask
+    pandn               m4, m5                      ; filter8_mask
+    pxor                m5, [pw_m1]                 ; filter16_mask
+    SCRATCH              5, 15, rsp+(%%off+6)*mmsize, F16M
+%else
+    pandn               m2, m4                      ; filter4_mask
+    pxor                m4, [pw_m1]                 ; filter8_mask
+%endif
+    SCRATCH              4, 14, rsp+(%%off+5)*mmsize, F8M
+%else
+    pxor                m2, [pw_m1]                 ; filter4_mask
+%endif
+    SCRATCH              7, 13, rsp+(%%off+4)*mmsize, HEV
+    SCRATCH              2, 12, rsp+(%%off+3)*mmsize, F4M
+
+    ; r9[m15]=filter16_mask
+    ; r10[m13]=hev
+    ; r11[m14]=filter8_mask
+    ; r12[m12]=filter4_mask
+    ; m0,1=p0-p1
+    ; m2-7=free
+    ; m8-11=free
+
+%if %2 > 4
+%if %2 == 16
+    ; filter_14
+    mova                m2, [%%p7]
+    mova                m3, [%%p6]
+    mova                m6, [%%p5]
+    mova                m7, [%%p4]
+    PRELOAD              8, %%p3, P3
+    PRELOAD              9, %%p2, P2
+%endif
+    PRELOAD             10, %%q0, Q0
+    PRELOAD             11, %%q1, Q1
+%if %2 == 16
+    psllw               m4, m2, 3
+    paddw               m5, m3, m3
+    paddw               m4, m6
+    paddw               m5, m7
+    paddw               m4, reg_P3
+    paddw               m5, reg_P2
+    paddw               m4, m1
+    paddw               m5, m0
+    paddw               m4, reg_Q0                  ; q0+p1+p3+p5+p7*8
+    psubw               m5, m2                      ; p0+p2+p4+p6*2-p7
+    paddw               m4, [pw_8]
+    paddw               m5, m4                      ; q0+p0+p1+p2+p3+p4+p5+p6*2+p7*7+8
+
+    ; below, we use r0-5 for storing pre-filter pixels for subsequent subtraction
+    ; at the end of the filter
+
+    mova    [rsp+0*mmsize], m3
+    FILTER_STEP         m4, m5, F16M, 4, %%p6, m3,     m2,             m6,     reg_Q1
+%endif
+    mova                m3, [%%q2]
+%if %2 == 16
+    mova    [rsp+1*mmsize], m6
+    FILTER_STEP         m4, m5, F16M, 4, %%p5, m6,     m2,             m7,     m3
+%endif
+    mova                m6, [%%q3]
+%if %2 == 16
+    mova    [rsp+2*mmsize], m7
+    FILTER_STEP         m4, m5, F16M, 4, %%p4, m7,     m2,             reg_P3, m6
+    mova                m7, [%%q4]
+%if ARCH_X86_64
+    mova    [rsp+3*mmsize], reg_P3
+%else
+    mova                m4, reg_P3
+    mova    [rsp+3*mmsize], m4
+%endif
+    FILTER_STEP         m4, m5, F16M, 4, %%p3, reg_P3, m2,             reg_P2, m7
+    PRELOAD              8, %%q5, Q5
+%if ARCH_X86_64
+    mova    [rsp+4*mmsize], reg_P2
+%else
+    mova                m4, reg_P2
+    mova    [rsp+4*mmsize], m4
+%endif
+    FILTER_STEP         m4, m5, F16M, 4, %%p2, reg_P2, m2,             m1,     reg_Q5
+    PRELOAD              9, %%q6, Q6
+    mova    [rsp+5*mmsize], m1
+    FILTER_STEP         m4, m5, F16M, 4, %%p1, m1,     m2,             m0,     reg_Q6
+    mova                m1, [%%q7]
+    FILTER_STEP         m4, m5, F16M, 4, %%p0, m0,     m2,             reg_Q0, m1,     1
+    FILTER_STEP         m4, m5, F16M, 4, %%q0, reg_Q0, [rsp+0*mmsize], reg_Q1, m1,     ARCH_X86_64
+    FILTER_STEP         m4, m5, F16M, 4, %%q1, reg_Q1, [rsp+1*mmsize], m3,     m1,     ARCH_X86_64
+    FILTER_STEP         m4, m5, F16M, 4, %%q2, m3,     [rsp+2*mmsize], m6,     m1,     1
+    FILTER_STEP         m4, m5, F16M, 4, %%q3, m6,     [rsp+3*mmsize], m7,     m1
+    FILTER_STEP         m4, m5, F16M, 4, %%q4, m7,     [rsp+4*mmsize], reg_Q5, m1
+    FILTER_STEP         m4, m5, F16M, 4, %%q5, reg_Q5, [rsp+5*mmsize], reg_Q6, m1
+    FILTER_STEP         m4, m5, F16M, 4, %%q6, reg_Q6
+
+    mova                m7, [%%p1]
+%else
+    SWAP                 1, 7
+%endif
+
+    mova                m2, [%%p3]
+    mova                m1, [%%p2]
+
+    ; reg_Q0-1 (m10-m11)
+    ; m0=p0
+    ; m1=p2
+    ; m2=p3
+    ; m3=q2
+    ; m4-5=free
+    ; m6=q3
+    ; m7=p1
+    ; m8-9 unused
+
+    ; filter_6
+    psllw               m4, m2, 2
+    paddw               m5, m1, m1
+    paddw               m4, m7
+    psubw               m5, m2
+    paddw               m4, m0
+    paddw               m5, reg_Q0
+    paddw               m4, [pw_4]
+    paddw               m5, m4
+
+%if ARCH_X86_64
+    mova                m8, m1
+    mova                m9, m7
+%else
+    mova    [rsp+0*mmsize], m1
+    mova    [rsp+1*mmsize], m7
+%endif
+%ifidn %1, v
+    FILTER_STEP         m4, m5, F8M, 3, %%p2, m1,     m2,             m7,     reg_Q1
+%else
+    FILTER_STEP         m4, m5, F8M, 3, %%p2, m1,     m2,             m7,     reg_Q1, 1
+%endif
+    FILTER_STEP         m4, m5, F8M, 3, %%p1, m7,     m2,             m0,     m3, 1
+    FILTER_STEP         m4, m5, F8M, 3, %%p0, m0,     m2,             reg_Q0, m6, 1
+%if ARCH_X86_64
+    FILTER_STEP         m4, m5, F8M, 3, %%q0, reg_Q0, m8,             reg_Q1, m6, ARCH_X86_64
+    FILTER_STEP         m4, m5, F8M, 3, %%q1, reg_Q1, m9,             m3,     m6, ARCH_X86_64
+%else
+    FILTER_STEP         m4, m5, F8M, 3, %%q0, reg_Q0, [rsp+0*mmsize], reg_Q1, m6, ARCH_X86_64
+    FILTER_STEP         m4, m5, F8M, 3, %%q1, reg_Q1, [rsp+1*mmsize], m3,     m6, ARCH_X86_64
+%endif
+    FILTER_STEP         m4, m5, F8M, 3, %%q2, m3
+
+    UNSCRATCH            2, 10, %%q0
+    UNSCRATCH            6, 11, %%q1
+%else
+    SWAP                 1, 7
+    mova                m2, [%%q0]
+    mova                m6, [%%q1]
+%endif
+    UNSCRATCH            3, 13, rsp+(%%off+4)*mmsize, HEV
+
+    ; m0=p0
+    ; m1=p2
+    ; m2=q0
+    ; m3=hev_mask
+    ; m4-5=free
+    ; m6=q1
+    ; m7=p1
+
+    ; filter_4
+    psubw               m4, m7, m6              ; p1-q1
+    psubw               m5, m2, m0              ; q0-p0
+    pand                m4, m3
+    pminsw              m4, [pw_ %+ %%maxsgn]
+    pmaxsw              m4, [pw_ %+ %%minsgn]   ; clip_intp2(p1-q1, 9) -> f
+    paddw               m4, m5
+    paddw               m5, m5
+    paddw               m4, m5                  ; 3*(q0-p0)+f
+    pminsw              m4, [pw_ %+ %%maxsgn]
+    pmaxsw              m4, [pw_ %+ %%minsgn]   ; clip_intp2(3*(q0-p0)+f, 9) -> f
+    pand                m4, reg_F4M
+    paddw               m5, m4, [pw_4]
+    paddw               m4, [pw_3]
+    pminsw              m5, [pw_ %+ %%maxsgn]
+    pminsw              m4, [pw_ %+ %%maxsgn]
+    psraw               m5, 3                   ; min_intp2(f+4, 9)>>3 -> f1
+    psraw               m4, 3                   ; min_intp2(f+3, 9)>>3 -> f2
+    psubw               m2, m5                  ; q0-f1
+    paddw               m0, m4                  ; p0+f2
+    pandn               m3, m5                  ; f1 & !hev (for p1/q1 adj)
+    pxor                m4, m4
+    mova                m5, [pw_ %+ %%maxusgn]
+    pmaxsw              m2, m4
+    pmaxsw              m0, m4
+    pminsw              m2, m5
+    pminsw              m0, m5
+%if cpuflag(ssse3)
+    pmulhrsw            m3, [pw_16384]          ; (f1+1)>>1
+%else
+    paddw               m3, [pw_1]
+    psraw               m3, 1
+%endif
+    paddw               m7, m3                  ; p1+f
+    psubw               m6, m3                  ; q1-f
+    pmaxsw              m7, m4
+    pmaxsw              m6, m4
+    pminsw              m7, m5
+    pminsw              m6, m5
+
+    ; store
+%ifidn %1, v
+    mova            [%%p1], m7
+    mova            [%%p0], m0
+    mova            [%%q0], m2
+    mova            [%%q1], m6
+%else ; %1 == h
+%if %2 == 4
+    TRANSPOSE4x4W        7, 0, 2, 6, 1
+    movh   [dst0q+strideq*0-4], m7
+    movhps [dst0q+strideq*1-4], m7
+    movh   [dst0q+strideq*2-4], m0
+    movhps [dst0q+stride3q -4], m0
+    movh   [dst4q+strideq*0-4], m2
+    movhps [dst4q+strideq*1-4], m2
+    movh   [dst4q+strideq*2-4], m6
+    movhps [dst4q+stride3q -4], m6
+%elif %2 == 8
+    mova                m3, [%%p3]
+    mova                m4, [%%q2]
+    mova                m5, [%%q3]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        3, 1, 7, 0, 2, 6, 4, 5, 8
+%else
+    TRANSPOSE8x8W        3, 1, 7, 0, 2, 6, 4, 5, [%%q2], [%%q0], 1
+    mova                m2, [%%q0]
+%endif
+
+    movu [dst0q+strideq*0-8], m3
+    movu [dst0q+strideq*1-8], m1
+    movu [dst0q+strideq*2-8], m7
+    movu [dst0q+stride3q -8], m0
+    movu [dst4q+strideq*0-8], m2
+    movu [dst4q+strideq*1-8], m6
+    movu [dst4q+strideq*2-8], m4
+    movu [dst4q+stride3q -8], m5
+%else ; %2 == 16
+    SCRATCH              2, 8, %%q0
+    SCRATCH              6, 9, %%q1
+    mova                m2, [%%p7]
+    mova                m3, [%%p6]
+    mova                m4, [%%p5]
+    mova                m5, [%%p4]
+    mova                m6, [%%p3]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        2, 3, 4, 5, 6, 1, 7, 0, 10
+%else
+    mova            [%%p1], m7
+    TRANSPOSE8x8W        2, 3, 4, 5, 6, 1, 7, 0, [%%p1], [dst4q+strideq*0-16], 1
+%endif
+
+    mova [dst0q+strideq*0-16], m2
+    mova [dst0q+strideq*1-16], m3
+    mova [dst0q+strideq*2-16], m4
+    mova [dst0q+stride3q -16], m5
+%if ARCH_X86_64
+    mova [dst4q+strideq*0-16], m6
+%endif
+    mova [dst4q+strideq*1-16], m1
+    mova [dst4q+strideq*2-16], m7
+    mova [dst4q+stride3q -16], m0
+
+    UNSCRATCH            2, 8, %%q0
+    UNSCRATCH            6, 9, %%q1
+    mova                m0, [%%q2]
+    mova                m1, [%%q3]
+    mova                m3, [%%q4]
+    mova                m4, [%%q5]
+%if ARCH_X86_64
+    mova                m5, [%%q6]
+%endif
+    mova                m7, [%%q7]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        2, 6, 0, 1, 3, 4, 5, 7, 8
+%else
+    TRANSPOSE8x8W        2, 6, 0, 1, 3, 4, 5, 7, [%%q6], [dst4q+strideq*0], 1
+%endif
+
+    mova [dst0q+strideq*0], m2
+    mova [dst0q+strideq*1], m6
+    mova [dst0q+strideq*2], m0
+    mova [dst0q+stride3q ], m1
+%if ARCH_X86_64
+    mova [dst4q+strideq*0], m3
+%endif
+    mova [dst4q+strideq*1], m4
+    mova [dst4q+strideq*2], m5
+    mova [dst4q+stride3q ], m7
+%endif ; %2
+%endif ; %1
+    RET
+%endmacro
+
+%macro LOOP_FILTER_CPUSETS 3
+INIT_XMM sse2
+LOOP_FILTER %1, %2, %3
+INIT_XMM ssse3
+LOOP_FILTER %1, %2, %3
+INIT_XMM avx
+LOOP_FILTER %1, %2, %3
+%endmacro
+
+%macro LOOP_FILTER_WDSETS 2
+LOOP_FILTER_CPUSETS %1,  4, %2
+LOOP_FILTER_CPUSETS %1,  8, %2
+LOOP_FILTER_CPUSETS %1, 16, %2
+%endmacro
+
+LOOP_FILTER_WDSETS h, 10
+LOOP_FILTER_WDSETS v, 10
+LOOP_FILTER_WDSETS h, 12
+LOOP_FILTER_WDSETS v, 12
diff --git a/libavcodec/x86/vp9mc.asm b/libavcodec/x86/vp9mc.asm
index 53939579..9152ba54 100644
--- a/libavcodec/x86/vp9mc.asm
+++ b/libavcodec/x86/vp9mc.asm
@@ -45,6 +45,13 @@ times 8 dw %7
 times 8 dw %8
 %endmacro
 
+%macro F8_16BPP_TAPS 8
+times 8 dw %1, %2
+times 8 dw %3, %4
+times 8 dw %5, %6
+times 8 dw %7, %8
+%endmacro
+
 %macro FILTER 1
 const filters_%1 ; smooth
                     F8_TAPS -3, -1,  32,  64,  38,   1, -3,  0
@@ -102,12 +109,15 @@ FILTER ssse3
 %define F8_TAPS F8_SSE2_TAPS
 ; int16_t ff_filters_sse2[3][15][8][8]
 FILTER sse2
+%define F8_TAPS F8_16BPP_TAPS
+; int16_t ff_filters_16bpp[3][15][4][16]
+FILTER 16bpp
 
 SECTION .text
 
 %macro filter_sse2_h_fn 1
 %assign %%px mmsize/2
-cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 15, dst, dstride, src, sstride, h, filtery
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h, filtery
     pxor        m5, m5
     mova        m6, [pw_64]
     mova        m7, [filteryq+  0]
@@ -192,7 +202,7 @@ filter_sse2_h_fn avg
 
 %macro filter_h_fn 1
 %assign %%px mmsize/2
-cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, dstride, src, sstride, h, filtery
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 11, dst, dstride, src, sstride, h, filtery
     mova        m6, [pw_256]
     mova        m7, [filteryq+ 0]
 %if ARCH_X86_64 && mmsize > 8
@@ -253,7 +263,7 @@ filter_h_fn avg
 %if ARCH_X86_64
 %macro filter_hx2_fn 1
 %assign %%px mmsize
-cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, dstride, src, sstride, h, filtery
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 14, dst, dstride, src, sstride, h, filtery
     mova       m13, [pw_256]
     mova        m8, [filteryq+ 0]
     mova        m9, [filteryq+32]
@@ -315,9 +325,9 @@ filter_hx2_fn avg
 %macro filter_sse2_v_fn 1
 %assign %%px mmsize/2
 %if ARCH_X86_64
-cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 15, dst, dstride, src, sstride, h, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 15, dst, dstride, src, sstride, h, filtery, src4, sstride3
 %else
-cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 15, dst, dstride, src, sstride, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, filtery, src4, sstride3
     mov   filteryq, r5mp
 %define hd r4mp
 %endif
@@ -413,9 +423,9 @@ filter_sse2_v_fn avg
 %macro filter_v_fn 1
 %assign %%px mmsize/2
 %if ARCH_X86_64
-cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, dstride, src, sstride, h, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 11, dst, dstride, src, sstride, h, filtery, src4, sstride3
 %else
-cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, dstride, src, sstride, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 11, dst, dstride, src, sstride, filtery, src4, sstride3
     mov   filteryq, r5mp
 %define hd r4mp
 %endif
@@ -487,7 +497,7 @@ filter_v_fn avg
 
 %macro filter_vx2_fn 1
 %assign %%px mmsize
-cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, dstride, src, sstride, h, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 14, dst, dstride, src, sstride, h, filtery, src4, sstride3
     mova       m13, [pw_256]
     lea  sstride3q, [sstrideq*3]
     lea      src4q, [srcq+sstrideq]
@@ -553,7 +563,7 @@ filter_vx2_fn avg
 
 %endif ; ARCH_X86_64
 
-%macro fpel_fn 6
+%macro fpel_fn 6-8 0, 4
 %if %2 == 4
 %define %%srcfn movh
 %define %%dstfn movh
@@ -562,29 +572,57 @@ filter_vx2_fn avg
 %define %%dstfn mova
 %endif
 
+%if %7 == 8
+%define %%pavg pavgb
+%define %%szsuf _8
+%elif %7 == 16
+%define %%pavg pavgw
+%define %%szsuf _16
+%else
+%define %%szsuf
+%endif
+
 %if %2 <= mmsize
-cglobal vp9_%1%2, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3
+cglobal vp9_%1%2 %+ %%szsuf, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3
     lea  sstride3q, [sstrideq*3]
     lea  dstride3q, [dstrideq*3]
 %else
-cglobal vp9_%1%2, 5, 5, 4, dst, dstride, src, sstride, h
+cglobal vp9_%1%2 %+ %%szsuf, 5, 5, %8, dst, dstride, src, sstride, h
 %endif
 .loop:
     %%srcfn     m0, [srcq]
     %%srcfn     m1, [srcq+s%3]
     %%srcfn     m2, [srcq+s%4]
     %%srcfn     m3, [srcq+s%5]
+%if %2/mmsize == 8
+    %%srcfn     m4, [srcq+mmsize*4]
+    %%srcfn     m5, [srcq+mmsize*5]
+    %%srcfn     m6, [srcq+mmsize*6]
+    %%srcfn     m7, [srcq+mmsize*7]
+%endif
     lea       srcq, [srcq+sstrideq*%6]
 %ifidn %1, avg
-    pavgb       m0, [dstq]
-    pavgb       m1, [dstq+d%3]
-    pavgb       m2, [dstq+d%4]
-    pavgb       m3, [dstq+d%5]
+    %%pavg      m0, [dstq]
+    %%pavg      m1, [dstq+d%3]
+    %%pavg      m2, [dstq+d%4]
+    %%pavg      m3, [dstq+d%5]
+%if %2/mmsize == 8
+    %%pavg      m4, [dstq+mmsize*4]
+    %%pavg      m5, [dstq+mmsize*5]
+    %%pavg      m6, [dstq+mmsize*6]
+    %%pavg      m7, [dstq+mmsize*7]
+%endif
 %endif
     %%dstfn [dstq], m0
     %%dstfn [dstq+d%3], m1
     %%dstfn [dstq+d%4], m2
     %%dstfn [dstq+d%5], m3
+%if %2/mmsize == 8
+    %%dstfn [dstq+mmsize*4], m4
+    %%dstfn [dstq+mmsize*5], m5
+    %%dstfn [dstq+mmsize*6], m6
+    %%dstfn [dstq+mmsize*7], m7
+%endif
     lea       dstq, [dstq+dstrideq*%6]
     sub         hd, %6
     jnz .loop
@@ -599,23 +637,38 @@ INIT_MMX mmx
 fpel_fn put, 4,  strideq, strideq*2, stride3q, 4
 fpel_fn put, 8,  strideq, strideq*2, stride3q, 4
 INIT_MMX mmxext
-fpel_fn avg, 4,  strideq, strideq*2, stride3q, 4
-fpel_fn avg, 8,  strideq, strideq*2, stride3q, 4
+fpel_fn avg, 4,  strideq, strideq*2, stride3q, 4, 8
+fpel_fn avg, 8,  strideq, strideq*2, stride3q, 4, 8
 INIT_XMM sse
 fpel_fn put, 16, strideq, strideq*2, stride3q, 4
 fpel_fn put, 32, mmsize,  strideq,   strideq+mmsize, 2
 fpel_fn put, 64, mmsize,  mmsize*2,  mmsize*3, 1
+fpel_fn put, 128, mmsize, mmsize*2,  mmsize*3, 1, 0, 8
 INIT_XMM sse2
-fpel_fn avg, 16, strideq, strideq*2, stride3q, 4
-fpel_fn avg, 32, mmsize,  strideq,   strideq+mmsize, 2
-fpel_fn avg, 64, mmsize,  mmsize*2,  mmsize*3, 1
+fpel_fn avg, 16, strideq, strideq*2, stride3q, 4, 8
+fpel_fn avg, 32, mmsize,  strideq,   strideq+mmsize, 2, 8
+fpel_fn avg, 64, mmsize,  mmsize*2,  mmsize*3, 1, 8
 INIT_YMM avx
 fpel_fn put, 32, strideq, strideq*2, stride3q, 4
 fpel_fn put, 64, mmsize,  strideq,   strideq+mmsize, 2
+fpel_fn put, 128, mmsize, mmsize*2,     mmsize*3, 1
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+fpel_fn avg, 32, strideq, strideq*2, stride3q, 4, 8
+fpel_fn avg, 64, mmsize,  strideq,   strideq+mmsize, 2, 8
+%endif
+INIT_MMX mmxext
+fpel_fn avg,  8,  strideq, strideq*2, stride3q, 4, 16
+INIT_XMM sse2
+fpel_fn avg,  16, strideq, strideq*2, stride3q, 4, 16
+fpel_fn avg,  32, mmsize,  strideq,   strideq+mmsize, 2, 16
+fpel_fn avg,  64, mmsize,  mmsize*2,  mmsize*3, 1, 16
+fpel_fn avg, 128, mmsize,  mmsize*2,  mmsize*3, 1, 16, 8
 %if HAVE_AVX2_EXTERNAL
 INIT_YMM avx2
-fpel_fn avg, 32, strideq, strideq*2, stride3q, 4
-fpel_fn avg, 64, mmsize,  strideq,   strideq+mmsize, 2
+fpel_fn avg,  32, strideq, strideq*2, stride3q, 4, 16
+fpel_fn avg,  64, mmsize,  strideq,   strideq+mmsize, 2, 16
+fpel_fn avg, 128, mmsize,  mmsize*2,  mmsize*3, 1, 16
 %endif
 %undef s16
 %undef d16
diff --git a/libavcodec/x86/vp9mc_16bpp.asm b/libavcodec/x86/vp9mc_16bpp.asm
new file mode 100644
index 00000000..9a462eaf
--- /dev/null
+++ b/libavcodec/x86/vp9mc_16bpp.asm
@@ -0,0 +1,431 @@
+;******************************************************************************
+;* VP9 MC SIMD optimizations
+;*
+;* Copyright (c) 2015 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pd_64: times 8 dd 64
+
+cextern pw_1023
+cextern pw_4095
+
+SECTION .text
+
+%macro filter_h4_fn 1-2 12
+cglobal vp9_%1_8tap_1d_h_4_10, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m11, m11
+%endif
+    mova        m6, [pd_64]
+    mova        m7, [filteryq+ 0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+32]
+    mova        m9, [filteryq+64]
+    mova       m10, [filteryq+96]
+%endif
+.loop:
+    movh        m0, [srcq-6]
+    movh        m1, [srcq-4]
+    movh        m2, [srcq-2]
+    movh        m3, [srcq+0]
+    movh        m4, [srcq+2]
+    punpcklwd   m0, m1
+    punpcklwd   m2, m3
+    pmaddwd     m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+%else
+    pmaddwd     m2, [filteryq+32]
+%endif
+    movu        m1, [srcq+4]
+    movu        m3, [srcq+6]
+    paddd       m0, m2
+    movu        m2, [srcq+8]
+    add       srcq, sstrideq
+    punpcklwd   m4, m1
+    punpcklwd   m3, m2
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m4, m9
+    pmaddwd     m3, m10
+%else
+    pmaddwd     m4, [filteryq+64]
+    pmaddwd     m3, [filteryq+96]
+%endif
+    paddd       m0, m4
+    paddd       m0, m3
+    paddd       m0, m6
+    psrad       m0, 7
+%if cpuflag(sse4)
+    packusdw    m0, m0
+%else
+    packssdw    m0, m0
+%endif
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m11
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+cglobal vp9_%1_8tap_1d_h_4_12, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_h_4_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_h4_fn put
+filter_h4_fn avg
+
+%macro filter_h_fn 1-2 12
+%assign %%px mmsize/2
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _10, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m11, m11
+%endif
+    mova        m6, [pd_64]
+    mova        m7, [filteryq+ 0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+32]
+    mova        m9, [filteryq+64]
+    mova       m10, [filteryq+96]
+%endif
+.loop:
+    movu        m0, [srcq-6]
+    movu        m1, [srcq-4]
+    movu        m2, [srcq-2]
+    movu        m3, [srcq+0]
+    movu        m4, [srcq+2]
+    pmaddwd     m0, m7
+    pmaddwd     m1, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+    pmaddwd     m3, m8
+    pmaddwd     m4, m9
+%else
+    pmaddwd     m2, [filteryq+32]
+    pmaddwd     m3, [filteryq+32]
+    pmaddwd     m4, [filteryq+64]
+%endif
+    paddd       m0, m2
+    paddd       m1, m3
+    paddd       m0, m4
+    movu        m2, [srcq+4]
+    movu        m3, [srcq+6]
+    movu        m4, [srcq+8]
+    add       srcq, sstrideq
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m9
+    pmaddwd     m3, m10
+    pmaddwd     m4, m10
+%else
+    pmaddwd     m2, [filteryq+64]
+    pmaddwd     m3, [filteryq+96]
+    pmaddwd     m4, [filteryq+96]
+%endif
+    paddd       m1, m2
+    paddd       m0, m3
+    paddd       m1, m4
+    paddd       m0, m6
+    paddd       m1, m6
+    psrad       m0, 7
+    psrad       m1, 7
+%if cpuflag(sse4)
+    packusdw    m0, m0
+    packusdw    m1, m1
+%else
+    packssdw    m0, m0
+    packssdw    m1, m1
+%endif
+    punpcklwd   m0, m1
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m11
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _12, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_h_ %+ %%px %+ _10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_h_fn put
+filter_h_fn avg
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+filter_h_fn put
+filter_h_fn avg
+%endif
+
+%macro filter_v4_fn 1-2 12
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_4_10, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_4_10, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%define hd r4mp
+%endif
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m11, m11
+%endif
+    mova        m6, [pd_64]
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m7, [filteryq+  0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+ 32]
+    mova        m9, [filteryq+ 64]
+    mova       m10, [filteryq+ 96]
+%endif
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movh        m0, [srcq]
+    movh        m1, [srcq+sstrideq]
+    movh        m2, [srcq+sstrideq*2]
+    movh        m3, [srcq+sstride3q]
+    add       srcq, sstrideq
+    movh        m4, [src4q]
+    punpcklwd   m0, m1
+    punpcklwd   m2, m3
+    pmaddwd     m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+%else
+    pmaddwd     m2, [filteryq+ 32]
+%endif
+    movh        m1, [src4q+sstrideq]
+    movh        m3, [src4q+sstrideq*2]
+    paddd       m0, m2
+    movh        m2, [src4q+sstride3q]
+    add      src4q, sstrideq
+    punpcklwd   m4, m1
+    punpcklwd   m3, m2
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m4, m9
+    pmaddwd     m3, m10
+%else
+    pmaddwd     m4, [filteryq+ 64]
+    pmaddwd     m3, [filteryq+ 96]
+%endif
+    paddd       m0, m4
+    paddd       m0, m3
+    paddd       m0, m6
+    psrad       m0, 7
+%if cpuflag(sse4)
+    packusdw    m0, m0
+%else
+    packssdw    m0, m0
+%endif
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m11
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_4_12, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_4_12, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%endif
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_v_4_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_v4_fn put
+filter_v4_fn avg
+
+%macro filter_v_fn 1-2 13
+%assign %%px mmsize/2
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _10, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _10, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%define hd r4mp
+%endif
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m12, m12
+%endif
+%if ARCH_X86_64
+    mova       m11, [pd_64]
+%endif
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m7, [filteryq+  0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+ 32]
+    mova        m9, [filteryq+ 64]
+    mova       m10, [filteryq+ 96]
+%endif
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movu        m0, [srcq]
+    movu        m1, [srcq+sstrideq]
+    movu        m2, [srcq+sstrideq*2]
+    movu        m3, [srcq+sstride3q]
+    add       srcq, sstrideq
+    movu        m4, [src4q]
+    SBUTTERFLY  wd, 0, 1, 6
+    SBUTTERFLY  wd, 2, 3, 6
+    pmaddwd     m0, m7
+    pmaddwd     m1, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+    pmaddwd     m3, m8
+%else
+    pmaddwd     m2, [filteryq+ 32]
+    pmaddwd     m3, [filteryq+ 32]
+%endif
+    paddd       m0, m2
+    paddd       m1, m3
+    movu        m2, [src4q+sstrideq]
+    movu        m3, [src4q+sstrideq*2]
+    SBUTTERFLY  wd, 4, 2, 6
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m4, m9
+    pmaddwd     m2, m9
+%else
+    pmaddwd     m4, [filteryq+ 64]
+    pmaddwd     m2, [filteryq+ 64]
+%endif
+    paddd       m0, m4
+    paddd       m1, m2
+    movu        m4, [src4q+sstride3q]
+    add      src4q, sstrideq
+    SBUTTERFLY  wd, 3, 4, 6
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m3, m10
+    pmaddwd     m4, m10
+%else
+    pmaddwd     m3, [filteryq+ 96]
+    pmaddwd     m4, [filteryq+ 96]
+%endif
+    paddd       m0, m3
+    paddd       m1, m4
+%if ARCH_X86_64
+    paddd       m0, m11
+    paddd       m1, m11
+%else
+    paddd       m0, [pd_64]
+    paddd       m1, [pd_64]
+%endif
+    psrad       m0, 7
+    psrad       m1, 7
+%if cpuflag(sse4)
+    packusdw    m0, m1
+%else
+    packssdw    m0, m1
+%endif
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m12
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%endif
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_v_ %+ %%px %+ _10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_v_fn put
+filter_v_fn avg
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+filter_v_fn put
+filter_v_fn avg
+%endif
diff --git a/libavcodec/x86/w64xmmtest.c b/libavcodec/x86/w64xmmtest.c
index 25e833fe..94b3049a 100644
--- a/libavcodec/x86/w64xmmtest.c
+++ b/libavcodec/x86/w64xmmtest.c
@@ -65,13 +65,6 @@ wrap(avcodec_encode_audio2(AVCodecContext *avctx,
                     got_packet_ptr);
 }
 
-wrap(avcodec_encode_video(AVCodecContext *avctx,
-                          uint8_t *buf, int buf_size,
-                          const AVFrame *pict))
-{
-    testxmmclobbers(avcodec_encode_video, avctx, buf, buf_size, pict);
-}
-
 wrap(avcodec_encode_subtitle(AVCodecContext *avctx,
                              uint8_t *buf, int buf_size,
                              const AVSubtitle *sub))
diff --git a/libavcodec/xan.c b/libavcodec/xan.c
index 2c565eed..662386af 100644
--- a/libavcodec/xan.c
+++ b/libavcodec/xan.c
@@ -263,7 +263,7 @@ static inline void xan_wc3_copy_pixel_run(XanContext *s, AVFrame *frame,
     prevframe_x = x + motion_x;
 
     if (prev_palette_plane == palette_plane && FFABS(curframe_index - prevframe_index) < pixel_count) {
-         avpriv_request_sample(s->avctx, "Overlapping copy\n");
+         avpriv_request_sample(s->avctx, "Overlapping copy");
          return ;
     }
 
@@ -644,5 +644,5 @@ AVCodec ff_xan_wc3_decoder = {
     .init           = xan_decode_init,
     .close          = xan_decode_end,
     .decode         = xan_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/xbmdec.c b/libavcodec/xbmdec.c
index 143e3a28..d19bdaee 100644
--- a/libavcodec/xbmdec.c
+++ b/libavcodec/xbmdec.c
@@ -133,5 +133,5 @@ AVCodec ff_xbm_decoder = {
     .type         = AVMEDIA_TYPE_VIDEO,
     .id           = AV_CODEC_ID_XBM,
     .decode       = xbm_decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/xbmenc.c b/libavcodec/xbmenc.c
index a752bdf2..b25615f2 100644
--- a/libavcodec/xbmenc.c
+++ b/libavcodec/xbmenc.c
@@ -32,7 +32,7 @@ static int xbm_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     linesize = (avctx->width + 7) / 8;
     size     = avctx->height * (linesize * 7 + 2) + 110;
-    if ((ret = ff_alloc_packet2(avctx, pkt, size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, size, 0)) < 0)
         return ret;
 
     buf = pkt->data;
diff --git a/libavcodec/xface.h b/libavcodec/xface.h
index 0236d713..d366fdb1 100644
--- a/libavcodec/xface.h
+++ b/libavcodec/xface.h
@@ -24,6 +24,9 @@
  * X-Face common definitions.
  */
 
+#ifndef AVCODEC_XFACE_H
+#define AVCODEC_XFACE_H
+
 #include <stdint.h>
 
 /* define the face size - 48x48x1 */
@@ -94,3 +97,5 @@ extern const ProbRange ff_xface_probranges_per_level[4][3];
 extern const ProbRange ff_xface_probranges_2x2[16];
 
 void ff_xface_generate_face(uint8_t *dst, uint8_t * const src);
+
+#endif /* AVCODEC_XFACE_H */
diff --git a/libavcodec/xfaceenc.c b/libavcodec/xfaceenc.c
index fa6f2273..bfb9fb9e 100644
--- a/libavcodec/xfaceenc.c
+++ b/libavcodec/xfaceenc.c
@@ -124,16 +124,6 @@ static void encode_block(char *bitmap, int w, int h, int level, ProbRangesQueue
     }
 }
 
-static av_cold int xface_encode_init(AVCodecContext *avctx)
-{
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-
-    return 0;
-}
-
 static void push_integer(BigInt *b, const ProbRange *prange)
 {
     uint8_t r;
@@ -205,7 +195,7 @@ static int xface_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         intbuf[i++] = r + XFACE_FIRST_PRINT;
     }
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, i+2)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, i+2, 0)) < 0)
         return ret;
 
     /* revert the number, and close the buffer */
@@ -221,21 +211,13 @@ static int xface_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
-static av_cold int xface_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-
-    return 0;
-}
-
 AVCodec ff_xface_encoder = {
     .name           = "xface",
     .long_name      = NULL_IF_CONFIG_SMALL("X-face image"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_XFACE,
     .priv_data_size = sizeof(XFaceContext),
-    .init           = xface_encode_init,
-    .close          = xface_encode_close,
     .encode2        = xface_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_MONOWHITE, AV_PIX_FMT_NONE },
+    .capabilities   = AV_CODEC_CAP_INTRA_ONLY,
 };
diff --git a/libavcodec/xl.c b/libavcodec/xl.c
index 2d1da1d2..37ab46e4 100644
--- a/libavcodec/xl.c
+++ b/libavcodec/xl.c
@@ -134,5 +134,5 @@ AVCodec ff_xl_decoder = {
     .id           = AV_CODEC_ID_VIXL,
     .init         = decode_init,
     .decode       = decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/xsubdec.c b/libavcodec/xsubdec.c
index 2db263ba..540607aa 100644
--- a/libavcodec/xsubdec.c
+++ b/libavcodec/xsubdec.c
@@ -57,6 +57,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     int64_t packet_time = 0;
     GetBitContext gb;
     int has_alpha = avctx->codec_tag == MKTAG('D','X','S','A');
+    AVSubtitleRect *rect;
+    int j;
 
     // check that at least header fits
     if (buf_size < 27 + 7 * 2 + 4 * (3 + has_alpha)) {
@@ -104,13 +106,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     sub->rects[0]->x = x; sub->rects[0]->y = y;
     sub->rects[0]->w = w; sub->rects[0]->h = h;
     sub->rects[0]->type = SUBTITLE_BITMAP;
-    sub->rects[0]->pict.linesize[0] = w;
-    sub->rects[0]->pict.data[0] = av_malloc(w * h);
+    sub->rects[0]->linesize[0] = w;
+    sub->rects[0]->data[0] = av_malloc(w * h);
     sub->rects[0]->nb_colors = 4;
-    sub->rects[0]->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
-    if (!sub->rects[0]->pict.data[0] || !sub->rects[0]->pict.data[1]) {
-        av_freep(&sub->rects[0]->pict.data[1]);
-        av_freep(&sub->rects[0]->pict.data[0]);
+    sub->rects[0]->data[1] = av_mallocz(AVPALETTE_SIZE);
+    if (!sub->rects[0]->data[0] || !sub->rects[0]->data[1]) {
+        av_freep(&sub->rects[0]->data[1]);
+        av_freep(&sub->rects[0]->data[0]);
         av_freep(&sub->rects[0]);
         av_freep(&sub->rects);
         return AVERROR(ENOMEM);
@@ -119,23 +121,33 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
 
     // read palette
     for (i = 0; i < sub->rects[0]->nb_colors; i++)
-        ((uint32_t*)sub->rects[0]->pict.data[1])[i] = bytestream_get_be24(&buf);
+        ((uint32_t*)sub->rects[0]->data[1])[i] = bytestream_get_be24(&buf);
 
     if (!has_alpha) {
         // make all except background (first entry) non-transparent
         for (i = 1; i < sub->rects[0]->nb_colors; i++)
-            ((uint32_t *)sub->rects[0]->pict.data[1])[i] |= 0xff000000;
+            ((uint32_t *)sub->rects[0]->data[1])[i] |= 0xff000000;
     } else {
         for (i = 0; i < sub->rects[0]->nb_colors; i++)
-            ((uint32_t *)sub->rects[0]->pict.data[1])[i] |= *buf++ << 24;
+            ((uint32_t *)sub->rects[0]->data[1])[i] |= *buf++ << 24;
     }
 
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
+    rect = sub->rects[0];
+    for (j = 0; j < 4; j++) {
+        rect->pict.data[j] = rect->data[j];
+        rect->pict.linesize[j] = rect->linesize[j];
+    }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     // process RLE-compressed data
     init_get_bits(&gb, buf, (buf_end - buf) * 8);
-    bitmap = sub->rects[0]->pict.data[0];
+    bitmap = sub->rects[0]->data[0];
     for (y = 0; y < h; y++) {
         // interlaced: do odd lines
-        if (y == (h + 1) / 2) bitmap = sub->rects[0]->pict.data[0] + w;
+        if (y == (h + 1) / 2) bitmap = sub->rects[0]->data[0] + w;
         for (x = 0; x < w; ) {
             int log2 = ff_log2_tab[show_bits(&gb, 8)];
             int run = get_bits(&gb, 14 - 4 * (log2 >> 1));
diff --git a/libavcodec/xsubenc.c b/libavcodec/xsubenc.c
index 70708548..b3da9096 100644
--- a/libavcodec/xsubenc.c
+++ b/libavcodec/xsubenc.c
@@ -131,8 +131,21 @@ static int xsub_encode(AVCodecContext *avctx, unsigned char *buf,
     if (h->num_rects != 1)
         av_log(avctx, AV_LOG_WARNING, "Only single rects supported (%d in subtitle.)\n", h->num_rects);
 
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (!h->rects[0]->data[0]) {
+        AVSubtitleRect *rect = h->rects[0];
+        int j;
+        for (j = 0; j < 4; j++) {
+            rect->data[j] = rect->pict.data[j];
+            rect->linesize[j] = rect->pict.linesize[j];
+        }
+    }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
     // TODO: render text-based subtitles into bitmaps
-    if (!h->rects[0]->pict.data[0] || !h->rects[0]->pict.data[1]) {
+    if (!h->rects[0]->data[0] || !h->rects[0]->data[1]) {
         av_log(avctx, AV_LOG_WARNING, "No subtitle bitmap available.\n");
         return -1;
     }
@@ -142,7 +155,7 @@ static int xsub_encode(AVCodecContext *avctx, unsigned char *buf,
         av_log(avctx, AV_LOG_WARNING, "No more than 4 subtitle colors supported (%d found.)\n", h->rects[0]->nb_colors);
 
     // TODO: Palette swapping if color zero is not transparent
-    if (((uint32_t *)h->rects[0]->pict.data[1])[0] & 0xff000000)
+    if (((uint32_t *)h->rects[0]->data[1])[0] & 0xff000000)
         av_log(avctx, AV_LOG_WARNING, "Color index 0 is not transparent. Transparency will be messed up.\n");
 
     if (make_tc(startTime, start_tc) || make_tc(endTime, end_tc)) {
@@ -174,19 +187,19 @@ static int xsub_encode(AVCodecContext *avctx, unsigned char *buf,
 
     // Palette
     for (i=0; i<4; i++)
-        bytestream_put_be24(&hdr, ((uint32_t *)h->rects[0]->pict.data[1])[i]);
+        bytestream_put_be24(&hdr, ((uint32_t *)h->rects[0]->data[1])[i]);
 
     // Bitmap
     // RLE buffer. Reserve 2 bytes for possible padding after the last row.
     init_put_bits(&pb, hdr, bufsize - (hdr - buf) - 2);
-    if (xsub_encode_rle(&pb, h->rects[0]->pict.data[0],
-                        h->rects[0]->pict.linesize[0]*2,
+    if (xsub_encode_rle(&pb, h->rects[0]->data[0],
+                        h->rects[0]->linesize[0] * 2,
                         h->rects[0]->w, (h->rects[0]->h + 1) >> 1))
         return -1;
     bytestream_put_le16(&rlelenptr, put_bits_count(&pb) >> 3); // Length of first field
 
-    if (xsub_encode_rle(&pb, h->rects[0]->pict.data[0] + h->rects[0]->pict.linesize[0],
-                        h->rects[0]->pict.linesize[0]*2,
+    if (xsub_encode_rle(&pb, h->rects[0]->data[0] + h->rects[0]->linesize[0],
+                        h->rects[0]->linesize[0] * 2,
                         h->rects[0]->w, h->rects[0]->h >> 1))
         return -1;
 
diff --git a/libavcodec/xvididct.c b/libavcodec/xvididct.c
index e9fab70c..1f96ccc3 100644
--- a/libavcodec/xvididct.c
+++ b/libavcodec/xvididct.c
@@ -348,6 +348,8 @@ av_cold void ff_xvid_idct_init(IDCTDSPContext *c, AVCodecContext *avctx)
 
     if (ARCH_X86)
         ff_xvid_idct_init_x86(c, avctx, high_bit_depth);
+    if (ARCH_MIPS)
+        ff_xvid_idct_init_mips(c, avctx, high_bit_depth);
 
     ff_init_scantable_permutation(c->idct_permutation, c->perm_type);
 }
diff --git a/libavcodec/xvididct.h b/libavcodec/xvididct.h
index f7dfba48..e0bc1a2b 100644
--- a/libavcodec/xvididct.h
+++ b/libavcodec/xvididct.h
@@ -30,5 +30,7 @@ void ff_xvid_idct_init(IDCTDSPContext *c, AVCodecContext *avctx);
 
 void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
                            unsigned high_bit_depth);
+void ff_xvid_idct_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
+                            unsigned high_bit_depth);
 
 #endif /* AVCODEC_XVIDIDCT_H */
diff --git a/libavcodec/xvmc.h b/libavcodec/xvmc.h
index c2e187cc..465ee78d 100644
--- a/libavcodec/xvmc.h
+++ b/libavcodec/xvmc.h
@@ -43,7 +43,7 @@
 #define AV_XVMC_ID                    0x1DC711C0  /**< special value to ensure that regular pixel routines haven't corrupted the struct
                                                        the number is 1337 speak for the letters IDCT MCo (motion compensation) */
 
-attribute_deprecated struct xvmc_pix_fmt {
+struct attribute_deprecated xvmc_pix_fmt {
     /** The field contains the special constant value AV_XVMC_ID.
         It is used as a test that the application correctly uses the API,
         and that there is no corruption caused by pixel routines.
diff --git a/libavcodec/xwddec.c b/libavcodec/xwddec.c
index 62dfdace..64cd8418 100644
--- a/libavcodec/xwddec.c
+++ b/libavcodec/xwddec.c
@@ -141,7 +141,7 @@ static int xwd_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_INVALIDDATA;
     }
 
-    if (bytestream2_get_bytes_left(&gb) < ncolors * XWD_CMAP_SIZE + avctx->height * lsize) {
+    if (bytestream2_get_bytes_left(&gb) < ncolors * XWD_CMAP_SIZE + (uint64_t)avctx->height * lsize) {
         av_log(avctx, AV_LOG_ERROR, "input buffer too small\n");
         return AVERROR_INVALIDDATA;
     }
@@ -249,5 +249,5 @@ AVCodec ff_xwd_decoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_XWD,
     .decode         = xwd_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/xwdenc.c b/libavcodec/xwdenc.c
index 06fa4a0a..43bca890 100644
--- a/libavcodec/xwdenc.c
+++ b/libavcodec/xwdenc.c
@@ -146,7 +146,7 @@ static int xwd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     header_size = XWD_HEADER_SIZE + WINDOW_NAME_SIZE;
     out_size    = header_size + ncolors * XWD_CMAP_SIZE + avctx->height * lsize;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, out_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, out_size, 0)) < 0)
         return ret;
     buf = pkt->data;
 
diff --git a/libavcodec/xxan.c b/libavcodec/xxan.c
index b261cdfd..54852962 100644
--- a/libavcodec/xxan.c
+++ b/libavcodec/xxan.c
@@ -447,5 +447,5 @@ AVCodec ff_xan_wc4_decoder = {
     .init           = xan_decode_init,
     .close          = xan_decode_end,
     .decode         = xan_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/y41pdec.c b/libavcodec/y41pdec.c
index 22c7d1fd..1b177d42 100644
--- a/libavcodec/y41pdec.c
+++ b/libavcodec/y41pdec.c
@@ -88,5 +88,5 @@ AVCodec ff_y41p_decoder = {
     .id           = AV_CODEC_ID_Y41P,
     .init         = y41p_decode_init,
     .decode       = y41p_decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/y41penc.c b/libavcodec/y41penc.c
index 47786188..94acc343 100644
--- a/libavcodec/y41penc.c
+++ b/libavcodec/y41penc.c
@@ -30,14 +30,8 @@ static av_cold int y41p_encode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    avctx->coded_frame = av_frame_alloc();
     avctx->bits_per_coded_sample = 12;
 
-    if (!avctx->coded_frame) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
-        return AVERROR(ENOMEM);
-    }
-
     return 0;
 }
 
@@ -48,11 +42,9 @@ static int y41p_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint8_t *y, *u, *v;
     int i, j, ret;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 1.5)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 1.5, 0)) < 0)
         return ret;
 
-    avctx->coded_frame->key_frame = 1;
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     dst = pkt->data;
 
     for (i = avctx->height - 1; i >= 0; i--) {
@@ -84,8 +76,6 @@ static int y41p_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
 static av_cold int y41p_encode_close(AVCodecContext *avctx)
 {
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
@@ -99,4 +89,5 @@ AVCodec ff_y41p_encoder = {
     .close        = y41p_encode_close,
     .pix_fmts     = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV411P,
                                                  AV_PIX_FMT_NONE },
+    .capabilities = AV_CODEC_CAP_INTRA_ONLY,
 };
diff --git a/libavcodec/yuv4dec.c b/libavcodec/yuv4dec.c
index 00ccf581..f89f62de 100644
--- a/libavcodec/yuv4dec.c
+++ b/libavcodec/yuv4dec.c
@@ -80,5 +80,5 @@ AVCodec ff_yuv4_decoder = {
     .id           = AV_CODEC_ID_YUV4,
     .init         = yuv4_decode_init,
     .decode       = yuv4_decode_frame,
-    .capabilities = CODEC_CAP_DR1,
+    .capabilities = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/yuv4enc.c b/libavcodec/yuv4enc.c
index 5ce48467..cc8846d7 100644
--- a/libavcodec/yuv4enc.c
+++ b/libavcodec/yuv4enc.c
@@ -25,13 +25,6 @@
 
 static av_cold int yuv4_encode_init(AVCodecContext *avctx)
 {
-    avctx->coded_frame = av_frame_alloc();
-
-    if (!avctx->coded_frame) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
-        return AVERROR(ENOMEM);
-    }
-
     return 0;
 }
 
@@ -42,13 +35,10 @@ static int yuv4_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint8_t *y, *u, *v;
     int i, j, ret;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, 6 * (avctx->width + 1 >> 1) * (avctx->height + 1 >> 1))) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, 6 * (avctx->width + 1 >> 1) * (avctx->height + 1 >> 1), 0)) < 0)
         return ret;
     dst = pkt->data;
 
-    avctx->coded_frame->key_frame = 1;
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-
     y = pic->data[0];
     u = pic->data[1];
     v = pic->data[2];
@@ -74,8 +64,6 @@ static int yuv4_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
 static av_cold int yuv4_encode_close(AVCodecContext *avctx)
 {
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
@@ -88,4 +76,5 @@ AVCodec ff_yuv4_encoder = {
     .encode2      = yuv4_encode_frame,
     .close        = yuv4_encode_close,
     .pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
+    .capabilities = AV_CODEC_CAP_INTRA_ONLY,
 };
diff --git a/libavcodec/zerocodec.c b/libavcodec/zerocodec.c
index 12d80241..55a9a917 100644
--- a/libavcodec/zerocodec.c
+++ b/libavcodec/zerocodec.c
@@ -148,5 +148,5 @@ AVCodec ff_zerocodec_decoder = {
     .init           = zerocodec_decode_init,
     .decode         = zerocodec_decode_frame,
     .close          = zerocodec_decode_close,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/zmbv.c b/libavcodec/zmbv.c
index 82ae169e..25a1cd21 100644
--- a/libavcodec/zmbv.c
+++ b/libavcodec/zmbv.c
@@ -634,5 +634,5 @@ AVCodec ff_zmbv_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1,
 };
diff --git a/libavcodec/zmbvenc.c b/libavcodec/zmbvenc.c
index 2d56a13e..e832bedb 100644
--- a/libavcodec/zmbvenc.c
+++ b/libavcodec/zmbvenc.c
@@ -133,8 +133,12 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     c->curfrm++;
     if(c->curfrm == c->keyint)
         c->curfrm = 0;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     avctx->coded_frame->pict_type = keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
     avctx->coded_frame->key_frame = keyframe;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     chpal = !keyframe && memcmp(p->data[1], c->pal2, 1024);
 
     palptr = (uint32_t*)p->data[1];
@@ -227,7 +231,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
 
     pkt_size = c->zstream.total_out + 1 + 6*keyframe;
-    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
         return ret;
     buf = pkt->data;
 
@@ -259,8 +263,6 @@ static av_cold int encode_end(AVCodecContext *avctx)
     deflateEnd(&c->zstream);
     av_freep(&c->prev);
 
-    av_frame_free(&avctx->coded_frame);
-
     return 0;
 }
 
@@ -275,7 +277,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     int lvl = 9;
 
     for(i=1; i<256; i++)
-        score_tab[i]= -i * log(i/(double)(ZMBV_BLOCK*ZMBV_BLOCK)) * (256/M_LN2);
+        score_tab[i]= -i * log2(i/(double)(ZMBV_BLOCK*ZMBV_BLOCK)) * 256;
 
     c->avctx = avctx;
 
@@ -324,12 +326,6 @@ static av_cold int encode_init(AVCodecContext *avctx)
         return -1;
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame) {
-        encode_end(avctx);
-        return AVERROR(ENOMEM);
-    }
-
     return 0;
 }
 
diff --git a/libavdevice/Makefile b/libavdevice/Makefile
index f57ec0bc..8394e871 100644
--- a/libavdevice/Makefile
+++ b/libavdevice/Makefile
@@ -58,8 +58,9 @@ OBJS-$(HAVE_LIBC_MSVCRT)                 += file_open.o
 # Windows resource file
 SLIBOBJS-$(HAVE_GNU_WINDRES)             += avdeviceres.o
 
+SKIPHEADERS                              += decklink_common.h
 SKIPHEADERS-$(CONFIG_DECKLINK)           += decklink_enc.h decklink_dec.h \
-                                            decklink_common.h decklink_common_c.h
+                                            decklink_common_c.h
 SKIPHEADERS-$(CONFIG_DSHOW_INDEV)        += dshow_capture.h
 SKIPHEADERS-$(CONFIG_FBDEV_INDEV)        += fbdev_common.h
 SKIPHEADERS-$(CONFIG_FBDEV_OUTDEV)       += fbdev_common.h
diff --git a/libavdevice/alsa.c b/libavdevice/alsa.c
index 27a1655a..75ac4449 100644
--- a/libavdevice/alsa.c
+++ b/libavdevice/alsa.c
@@ -89,7 +89,7 @@ MAKE_REORDER_FUNCS(5, out_50, \
         out[2] = in[3]; \
         out[3] = in[4]; \
         out[4] = in[2]; \
-        );
+        )
 
 MAKE_REORDER_FUNCS(6, out_51, \
         out[0] = in[0]; \
@@ -98,7 +98,7 @@ MAKE_REORDER_FUNCS(6, out_51, \
         out[3] = in[5]; \
         out[4] = in[2]; \
         out[5] = in[3]; \
-        );
+        )
 
 MAKE_REORDER_FUNCS(8, out_71, \
         out[0] = in[0]; \
@@ -109,7 +109,7 @@ MAKE_REORDER_FUNCS(8, out_71, \
         out[5] = in[3]; \
         out[6] = in[6]; \
         out[7] = in[7]; \
-        );
+        )
 
 #define FORMAT_I8  0
 #define FORMAT_I16 1
diff --git a/libavdevice/alsa.h b/libavdevice/alsa.h
index f8b7449c..cd41d965 100644
--- a/libavdevice/alsa.h
+++ b/libavdevice/alsa.h
@@ -74,6 +74,7 @@ typedef struct AlsaData {
  *
  * @return 0 if OK, AVERROR_xxx on error
  */
+av_warn_unused_result
 int ff_alsa_open(AVFormatContext *s, snd_pcm_stream_t mode,
                  unsigned int *sample_rate,
                  int channels, enum AVCodecID *codec_id);
@@ -95,10 +96,13 @@ int ff_alsa_close(AVFormatContext *s1);
  *
  * @return 0 if OK, AVERROR_xxx on error
  */
+av_warn_unused_result
 int ff_alsa_xrun_recover(AVFormatContext *s1, int err);
 
+av_warn_unused_result
 int ff_alsa_extend_reorder_buf(AlsaData *s, int size);
 
+av_warn_unused_result
 int ff_alsa_get_device_list(AVDeviceInfoList *device_list, snd_pcm_stream_t stream_type);
 
 #endif /* AVDEVICE_ALSA_H */
diff --git a/libavdevice/alsa_dec.c b/libavdevice/alsa_dec.c
index b8e957a5..71a6ef4f 100644
--- a/libavdevice/alsa_dec.c
+++ b/libavdevice/alsa_dec.c
@@ -111,14 +111,14 @@ static int audio_read_packet(AVFormatContext *s1, AVPacket *pkt)
 
     while ((res = snd_pcm_readi(s->h, pkt->data, s->period_size)) < 0) {
         if (res == -EAGAIN) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
 
             return AVERROR(EAGAIN);
         }
         if (ff_alsa_xrun_recover(s1, res) < 0) {
             av_log(s1, AV_LOG_ERROR, "ALSA read error: %s\n",
                    snd_strerror(res));
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
 
             return AVERROR(EIO);
         }
diff --git a/libavdevice/caca.c b/libavdevice/caca.c
index ff54940d..75adc354 100644
--- a/libavdevice/caca.c
+++ b/libavdevice/caca.c
@@ -69,18 +69,18 @@ static void list_drivers(CACAContext *c)
 
     av_log(c->ctx, AV_LOG_INFO, "Available drivers:\n");
     for (i = 0; drivers[i]; i += 2)
-        av_log(c->ctx, AV_LOG_INFO, "%s : %s\n", drivers[i], drivers[i + 1]);
+        av_log(c->ctx, AV_LOG_INFO, "%s: %s\n", drivers[i], drivers[i + 1]);
 }
 
 #define DEFINE_LIST_DITHER(thing, thing_str)                                 \
-static void list_dither_## thing(CACAContext *c)                         \
+static void list_dither_## thing(CACAContext *c)                             \
 {                                                                            \
     const char *const *thing = caca_get_dither_## thing ##_list(c->dither);  \
     int i;                                                                   \
                                                                              \
     av_log(c->ctx, AV_LOG_INFO, "Available %s:\n", thing_str);               \
     for (i = 0; thing[i]; i += 2)                                            \
-        av_log(c->ctx, AV_LOG_INFO, "%s : %s\n", thing[i], thing[i + 1]);    \
+        av_log(c->ctx, AV_LOG_INFO, "%s: %s\n", thing[i], thing[i + 1]);     \
 }
 
 DEFINE_LIST_DITHER(color, "colors");
@@ -150,13 +150,15 @@ static int caca_write_header(AVFormatContext *s)
         goto fail;
     }
 
-#define CHECK_DITHER_OPT(opt)                                           \
-    if (caca_set_dither_##opt(c->dither, c->opt) < 0)  {                \
-        ret = AVERROR(errno);                                           \
-        av_log(s, AV_LOG_ERROR, "Failed to set value '%s' for option '%s'\n", \
-               c->opt, #opt);                                           \
-        goto fail;                                                      \
-    }
+#define CHECK_DITHER_OPT(opt) do {                                              \
+    if (caca_set_dither_##opt(c->dither, c->opt) < 0)  {                        \
+        ret = AVERROR(errno);                                                   \
+        av_log(s, AV_LOG_ERROR, "Failed to set value '%s' for option '%s'\n",   \
+               c->opt, #opt);                                                   \
+        goto fail;                                                              \
+    }                                                                           \
+} while (0)
+
     CHECK_DITHER_OPT(algorithm);
     CHECK_DITHER_OPT(antialias);
     CHECK_DITHER_OPT(charset);
@@ -208,10 +210,8 @@ static const AVOption options[] = {
     { "antialias",    "set antialias method",    OFFSET(antialias), AV_OPT_TYPE_STRING, {.str = "default" }, 0, 0, ENC },
     { "charset",      "set charset used to render output", OFFSET(charset), AV_OPT_TYPE_STRING, {.str = "default" }, 0, 0, ENC },
     { "color",        "set color used to render output",   OFFSET(color),   AV_OPT_TYPE_STRING, {.str = "default" }, 0, 0, ENC },
-    { "list_drivers", "list available drivers",  OFFSET(list_drivers), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, ENC, "list_drivers" },
-    { "true",         NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, ENC, "list_drivers" },
-    { "false",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 0, ENC, "list_drivers" },
-    { "list_dither", "list available dither options", OFFSET(list_dither), AV_OPT_TYPE_STRING, {.dbl=0}, 0, 1, ENC, "list_dither" },
+    { "list_drivers", "list available drivers",  OFFSET(list_drivers), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, ENC },
+    { "list_dither", "list available dither options", OFFSET(list_dither), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, ENC, "list_dither" },
     { "algorithms",   NULL, 0, AV_OPT_TYPE_CONST, {.str = "algorithms"}, 0, 0, ENC, "list_dither" },
     { "antialiases",  NULL, 0, AV_OPT_TYPE_CONST, {.str = "antialiases"},0, 0, ENC, "list_dither" },
     { "charsets",     NULL, 0, AV_OPT_TYPE_CONST, {.str = "charsets"},   0, 0, ENC, "list_dither" },
diff --git a/libavdevice/decklink_common.h b/libavdevice/decklink_common.h
index 96912a7c..dff4fc1c 100644
--- a/libavdevice/decklink_common.h
+++ b/libavdevice/decklink_common.h
@@ -19,6 +19,11 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVDEVICE_DECKLINK_COMMON_H
+#define AVDEVICE_DECKLINK_COMMON_H
+
+#include <DeckLinkAPIVersion.h>
+
 #include "decklink_common_c.h"
 
 class decklink_output_callback;
@@ -65,10 +70,12 @@ struct decklink_ctx {
     unsigned int dropped;
     AVStream *audio_st;
     AVStream *video_st;
+    AVStream *teletext_st;
 
     /* Options */
     int list_devices;
     int list_formats;
+    int64_t teletext_lines;
     double preroll;
 
     int frames_preroll;
@@ -82,7 +89,11 @@ struct decklink_ctx {
 typedef enum { DIRECTION_IN, DIRECTION_OUT} decklink_direction_t;
 
 #ifdef _WIN32
+#if BLACKMAGIC_DECKLINK_API_VERSION < 0x0a040000
 typedef unsigned long buffercount_type;
+#else
+typedef unsigned int buffercount_type;
+#endif
 IDeckLinkIterator *CreateDeckLinkIteratorInstance(void);
 #else
 typedef uint32_t buffercount_type;
@@ -95,3 +106,4 @@ int ff_decklink_set_format(AVFormatContext *avctx, decklink_direction_t directio
 int ff_decklink_list_devices(AVFormatContext *avctx);
 int ff_decklink_list_formats(AVFormatContext *avctx, decklink_direction_t direction = DIRECTION_OUT);
 
+#endif /* AVDEVICE_DECKLINK_COMMON_H */
diff --git a/libavdevice/decklink_common_c.h b/libavdevice/decklink_common_c.h
index fb2b7886..2b5d92f2 100644
--- a/libavdevice/decklink_common_c.h
+++ b/libavdevice/decklink_common_c.h
@@ -19,6 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVDEVICE_DECKLINK_COMMON_C_H
+#define AVDEVICE_DECKLINK_COMMON_C_H
+
 struct decklink_cctx {
     const AVClass *cclass;
 
@@ -27,7 +30,10 @@ struct decklink_cctx {
     /* Options */
     int list_devices;
     int list_formats;
+    int64_t teletext_lines;
     double preroll;
     int v210;
+    int audio_channels;
 };
 
+#endif /* AVDEVICE_DECKLINK_COMMON_C_H */
diff --git a/libavdevice/decklink_dec.cpp b/libavdevice/decklink_dec.cpp
index 747f47e5..9d7dc97f 100644
--- a/libavdevice/decklink_dec.cpp
+++ b/libavdevice/decklink_dec.cpp
@@ -25,14 +25,47 @@
 #include <semaphore.h>
 
 extern "C" {
+#include "config.h"
 #include "libavformat/avformat.h"
 #include "libavformat/internal.h"
 #include "libavutil/imgutils.h"
+#if CONFIG_LIBZVBI
+#include <libzvbi.h>
+#endif
 }
 
 #include "decklink_common.h"
 #include "decklink_dec.h"
 
+#if CONFIG_LIBZVBI
+static uint8_t calc_parity_and_line_offset(int line)
+{
+    uint8_t ret = (line < 313) << 5;
+    if (line >= 7 && line <= 22)
+        ret += line;
+    if (line >= 320 && line <= 335)
+        ret += (line - 313);
+    return ret;
+}
+
+int teletext_data_unit_from_vbi_data(int line, uint8_t *src, uint8_t *tgt)
+{
+    vbi_bit_slicer slicer;
+
+    vbi_bit_slicer_init(&slicer, 720, 13500000, 6937500, 6937500, 0x00aaaae4, 0xffff, 18, 6, 42 * 8, VBI_MODULATION_NRZ_MSB, VBI_PIXFMT_UYVY);
+
+    if (vbi_bit_slice(&slicer, src, tgt + 4) == FALSE)
+        return -1;
+
+    tgt[0] = 0x02; // data_unit_id
+    tgt[1] = 0x2c; // data_unit_length
+    tgt[2] = calc_parity_and_line_offset(line); // field_parity, line_offset
+    tgt[3] = 0xe4; // framing code
+
+    return 0;
+}
+#endif
+
 static void avpacket_queue_init(AVFormatContext *avctx, AVPacketQueue *q)
 {
     memset(q, 0, sizeof(AVPacketQueue));
@@ -48,7 +81,7 @@ static void avpacket_queue_flush(AVPacketQueue *q)
     pthread_mutex_lock(&q->mutex);
     for (pkt = q->first_pkt; pkt != NULL; pkt = pkt1) {
         pkt1 = pkt->next;
-        av_free_packet(&pkt->pkt);
+        av_packet_unref(&pkt->pkt);
         av_freep(&pkt);
     }
     q->last_pkt   = NULL;
@@ -277,6 +310,50 @@ HRESULT decklink_input_callback::VideoInputFrameArrived(
         pkt.size         = videoFrame->GetRowBytes() *
                            videoFrame->GetHeight();
         //fprintf(stderr,"Video Frame size %d ts %d\n", pkt.size, pkt.pts);
+
+#if CONFIG_LIBZVBI
+        if (!no_video && ctx->teletext_lines && videoFrame->GetPixelFormat() == bmdFormat8BitYUV && videoFrame->GetWidth() == 720) {
+            IDeckLinkVideoFrameAncillary *vanc;
+            AVPacket txt_pkt;
+            uint8_t txt_buf0[1611]; // max 35 * 46 bytes decoded teletext lines + 1 byte data_identifier
+            uint8_t *txt_buf = txt_buf0;
+
+            if (videoFrame->GetAncillaryData(&vanc) == S_OK) {
+                int i;
+                int64_t line_mask = 1;
+                txt_buf[0] = 0x10;    // data_identifier - EBU_data
+                txt_buf++;
+                for (i = 6; i < 336; i++, line_mask <<= 1) {
+                    uint8_t *buf;
+                    if ((ctx->teletext_lines & line_mask) && vanc->GetBufferForVerticalBlankingLine(i, (void**)&buf) == S_OK) {
+                        if (teletext_data_unit_from_vbi_data(i, buf, txt_buf) >= 0)
+                            txt_buf += 46;
+                    }
+                    if (i == 22)
+                        i = 317;
+                }
+                vanc->Release();
+                if (txt_buf - txt_buf0 > 1) {
+                    int stuffing_units = (4 - ((45 + txt_buf - txt_buf0) / 46) % 4) % 4;
+                    while (stuffing_units--) {
+                        memset(txt_buf, 0xff, 46);
+                        txt_buf[1] = 0x2c; // data_unit_length
+                        txt_buf += 46;
+                    }
+                    av_init_packet(&txt_pkt);
+                    txt_pkt.pts = pkt.pts;
+                    txt_pkt.dts = pkt.dts;
+                    txt_pkt.stream_index = ctx->teletext_st->index;
+                    txt_pkt.data = txt_buf0;
+                    txt_pkt.size = txt_buf - txt_buf0;
+                    if (avpacket_queue_put(&ctx->queue, &txt_pkt) < 0) {
+                        ++ctx->dropped;
+                    }
+                }
+            }
+        }
+#endif
+
         c->frame_number++;
         if (avpacket_queue_put(&ctx->queue, &pkt) < 0) {
             ++ctx->dropped;
@@ -378,9 +455,28 @@ av_cold int ff_decklink_read_header(AVFormatContext *avctx)
         return AVERROR(ENOMEM);
     ctx->list_devices = cctx->list_devices;
     ctx->list_formats = cctx->list_formats;
+    ctx->teletext_lines = cctx->teletext_lines;
     ctx->preroll      = cctx->preroll;
     cctx->ctx = ctx;
 
+#if !CONFIG_LIBZVBI
+    if (ctx->teletext_lines) {
+        av_log(avctx, AV_LOG_ERROR, "Libzvbi support is needed for capturing teletext, please recompile FFmpeg.\n");
+        return AVERROR(ENOSYS);
+    }
+#endif
+
+    /* Check audio channel option for valid values: 2, 8 or 16 */
+    switch (cctx->audio_channels) {
+        case 2:
+        case 8:
+        case 16:
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Value of channels option must be one of 2, 8 or 16\n");
+            return AVERROR(EINVAL);
+    }
+
     iter = CreateDeckLinkIteratorInstance();
     if (!iter) {
         av_log(avctx, AV_LOG_ERROR, "Could not create DeckLink iterator\n");
@@ -458,7 +554,7 @@ av_cold int ff_decklink_read_header(AVFormatContext *avctx)
     st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
     st->codec->codec_id    = AV_CODEC_ID_PCM_S16LE;
     st->codec->sample_rate = bmdAudioSampleRate48kHz;
-    st->codec->channels    = 2;
+    st->codec->channels    = cctx->audio_channels;
     avpriv_set_pts_info(st, 64, 1, 1000000);  /* 64 bits pts in us */
     ctx->audio_st=st;
 
@@ -473,7 +569,7 @@ av_cold int ff_decklink_read_header(AVFormatContext *avctx)
 
     st->codec->time_base.den      = ctx->bmd_tb_den;
     st->codec->time_base.num      = ctx->bmd_tb_num;
-    st->codec->bit_rate    = avpicture_get_size(st->codec->pix_fmt, ctx->bmd_width, ctx->bmd_height) * 1/av_q2d(st->codec->time_base) * 8;
+    st->codec->bit_rate    = av_image_get_buffer_size(st->codec->pix_fmt, ctx->bmd_width, ctx->bmd_height, 1) * 1/av_q2d(st->codec->time_base) * 8;
 
     if (cctx->v210) {
         st->codec->codec_id    = AV_CODEC_ID_V210;
@@ -488,7 +584,22 @@ av_cold int ff_decklink_read_header(AVFormatContext *avctx)
 
     ctx->video_st=st;
 
-    result = ctx->dli->EnableAudioInput(bmdAudioSampleRate48kHz, bmdAudioSampleType16bitInteger, 2);
+    if (ctx->teletext_lines) {
+        st = avformat_new_stream(avctx, NULL);
+        if (!st) {
+            av_log(avctx, AV_LOG_ERROR, "Cannot add stream\n");
+            goto error;
+        }
+        st->codec->codec_type  = AVMEDIA_TYPE_SUBTITLE;
+        st->codec->time_base.den      = ctx->bmd_tb_den;
+        st->codec->time_base.num      = ctx->bmd_tb_num;
+        st->codec->codec_id    = AV_CODEC_ID_DVB_TELETEXT;
+        avpriv_set_pts_info(st, 64, 1, 1000000);  /* 64 bits pts in us */
+        ctx->teletext_st = st;
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "Using %d input audio channels\n", ctx->audio_st->codec->channels);
+    result = ctx->dli->EnableAudioInput(bmdAudioSampleRate48kHz, bmdAudioSampleType16bitInteger, ctx->audio_st->codec->channels);
 
     if (result != S_OK) {
         av_log(avctx, AV_LOG_ERROR, "Cannot enable audio input\n");
diff --git a/libavdevice/decklink_dec.h b/libavdevice/decklink_dec.h
index 6bd9226c..c02344ef 100644
--- a/libavdevice/decklink_dec.h
+++ b/libavdevice/decklink_dec.h
@@ -19,6 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVDEVICE_DECKLINK_DEC_H
+#define AVDEVICE_DECKLINK_DEC_H
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -30,3 +33,5 @@ int ff_decklink_read_close(AVFormatContext *avctx);
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
+
+#endif /* AVDEVICE_DECKLINK_DEC_H */
diff --git a/libavdevice/decklink_dec_c.c b/libavdevice/decklink_dec_c.c
index b1a65e68..40c21a75 100644
--- a/libavdevice/decklink_dec_c.c
+++ b/libavdevice/decklink_dec_c.c
@@ -32,6 +32,10 @@ static const AVOption options[] = {
     { "list_devices", "list available devices"  , OFFSET(list_devices), AV_OPT_TYPE_INT   , { .i64 = 0   }, 0, 1, DEC },
     { "list_formats", "list supported formats"  , OFFSET(list_formats), AV_OPT_TYPE_INT   , { .i64 = 0   }, 0, 1, DEC },
     { "bm_v210",      "v210 10 bit per channel" , OFFSET(v210),         AV_OPT_TYPE_INT   , { .i64 = 0   }, 0, 1, DEC },
+    { "teletext_lines", "teletext lines bitmask", OFFSET(teletext_lines), AV_OPT_TYPE_INT64, { .i64 = 0   }, 0, 0x7ffffffffLL, DEC, "teletext_lines"},
+    { "standard",     NULL,                                           0,  AV_OPT_TYPE_CONST, { .i64 = 0x7fff9fffeLL}, 0, 0,    DEC, "teletext_lines"},
+    { "all",          NULL,                                           0,  AV_OPT_TYPE_CONST, { .i64 = 0x7ffffffffLL}, 0, 0,    DEC, "teletext_lines"},
+    { "channels",     "number of audio channels", OFFSET(audio_channels), AV_OPT_TYPE_INT , { .i64 = 2   }, 2, 16, DEC },
     { NULL },
 };
 
diff --git a/libavdevice/decklink_enc.h b/libavdevice/decklink_enc.h
index 6086947e..5ffc05cd 100644
--- a/libavdevice/decklink_enc.h
+++ b/libavdevice/decklink_enc.h
@@ -19,6 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVDEVICE_DECKLINK_ENC_H
+#define AVDEVICE_DECKLINK_ENC_H
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -30,3 +33,5 @@ int ff_decklink_write_trailer(AVFormatContext *avctx);
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
+
+#endif /* AVDEVICE_DECKLINK_ENC_H */
diff --git a/libavdevice/dshow.c b/libavdevice/dshow.c
index 62249785..f56c1655 100644
--- a/libavdevice/dshow.c
+++ b/libavdevice/dshow.c
@@ -119,7 +119,7 @@ dshow_read_close(AVFormatContext *s)
     pktl = ctx->pktl;
     while (pktl) {
         AVPacketList *next = pktl->next;
-        av_free_packet(&pktl->pkt);
+        av_packet_unref(&pktl->pkt);
         av_free(pktl);
         pktl = next;
     }
@@ -240,7 +240,7 @@ dshow_cycle_devices(AVFormatContext *avctx, ICreateDevEnum *devenum,
         int i;
 
         r = CoGetMalloc(1, &co_malloc);
-        if (r = S_OK)
+        if (r != S_OK)
             goto fail1;
         r = CreateBindCtx(0, &bind_ctx);
         if (r != S_OK)
@@ -975,7 +975,7 @@ dshow_add_device(AVFormatContext *avctx,
             codec->codec_id = AV_CODEC_ID_RAWVIDEO;
             if (bih->biCompression == BI_RGB || bih->biCompression == BI_BITFIELDS) {
                 codec->bits_per_coded_sample = bih->biBitCount;
-                codec->extradata = av_malloc(9 + FF_INPUT_BUFFER_PADDING_SIZE);
+                codec->extradata = av_malloc(9 + AV_INPUT_BUFFER_PADDING_SIZE);
                 if (codec->extradata) {
                     codec->extradata_size = 9;
                     memcpy(codec->extradata, "BottomUp", 9);
@@ -1268,36 +1268,20 @@ static const AVOption options[] = {
     { "sample_size", "set audio sample size", OFFSET(sample_size), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 16, DEC },
     { "channels", "set number of audio channels, such as 1 or 2", OFFSET(channels), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, DEC },
     { "audio_buffer_size", "set audio device buffer latency size in milliseconds (default is the device's default)", OFFSET(audio_buffer_size), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, DEC },
-    { "list_devices", "list available devices", OFFSET(list_devices), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, DEC, "list_devices" },
-    { "true", "", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, DEC, "list_devices" },
-    { "false", "", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, DEC, "list_devices" },
-    { "list_options", "list available options for specified device", OFFSET(list_options), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, DEC, "list_options" },
-    { "true", "", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, DEC, "list_options" },
-    { "false", "", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, DEC, "list_options" },
+    { "list_devices", "list available devices",                      OFFSET(list_devices), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, DEC },
+    { "list_options", "list available options for specified device", OFFSET(list_options), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, DEC },
     { "video_device_number", "set video device number for devices with same name (starts at 0)", OFFSET(video_device_number), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, DEC },
     { "audio_device_number", "set audio device number for devices with same name (starts at 0)", OFFSET(audio_device_number), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, DEC },
     { "video_pin_name", "select video capture pin by name", OFFSET(video_pin_name),AV_OPT_TYPE_STRING, {.str = NULL},  0, 0, AV_OPT_FLAG_ENCODING_PARAM },
     { "audio_pin_name", "select audio capture pin by name", OFFSET(audio_pin_name),AV_OPT_TYPE_STRING, {.str = NULL},  0, 0, AV_OPT_FLAG_ENCODING_PARAM },
     { "crossbar_video_input_pin_number", "set video input pin number for crossbar device", OFFSET(crossbar_video_input_pin_number), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, DEC },
     { "crossbar_audio_input_pin_number", "set audio input pin number for crossbar device", OFFSET(crossbar_audio_input_pin_number), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, DEC },
-    { "show_video_device_dialog", "display property dialog for video capture device", OFFSET(show_video_device_dialog), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, DEC, "show_video_device_dialog" },
-    { "true", "", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, DEC, "show_video_device_dialog" },
-    { "false", "", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, DEC, "show_video_device_dialog" },
-    { "show_audio_device_dialog", "display property dialog for audio capture device", OFFSET(show_audio_device_dialog), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, DEC, "show_audio_device_dialog" },
-    { "true", "", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, DEC, "show_audio_device_dialog" },
-    { "false", "", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, DEC, "show_audio_device_dialog" },
-    { "show_video_crossbar_connection_dialog", "display property dialog for crossbar connecting pins filter on video device", OFFSET(show_video_crossbar_connection_dialog), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, DEC, "show_video_crossbar_connection_dialog" },
-    { "true", "", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, DEC, "show_video_crossbar_connection_dialog" },
-    { "false", "", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, DEC, "show_video_crossbar_connection_dialog" },
-    { "show_audio_crossbar_connection_dialog", "display property dialog for crossbar connecting pins filter on audio device", OFFSET(show_audio_crossbar_connection_dialog), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, DEC, "show_audio_crossbar_connection_dialog" },
-    { "true", "", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, DEC, "show_audio_crossbar_connection_dialog" },
-    { "false", "", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, DEC, "show_audio_crossbar_connection_dialog" },
-    { "show_analog_tv_tuner_dialog", "display property dialog for analog tuner filter", OFFSET(show_analog_tv_tuner_dialog), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, DEC, "show_analog_tv_tuner_dialog" },
-    { "true", "", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, DEC, "show_analog_tv_tuner_dialog" },
-    { "false", "", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, DEC, "show_analog_tv_tuner_dialog" },
-    { "show_analog_tv_tuner_audio_dialog", "display property dialog for analog tuner audio filter", OFFSET(show_analog_tv_tuner_audio_dialog), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, DEC, "show_analog_tv_tuner_dialog" },
-    { "true", "", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, DEC, "show_analog_tv_tuner_audio_dialog" },
-    { "false", "", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, DEC, "show_analog_tv_tuner_audio_dialog" },
+    { "show_video_device_dialog",              "display property dialog for video capture device",                            OFFSET(show_video_device_dialog),              AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DEC },
+    { "show_audio_device_dialog",              "display property dialog for audio capture device",                            OFFSET(show_audio_device_dialog),              AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DEC },
+    { "show_video_crossbar_connection_dialog", "display property dialog for crossbar connecting pins filter on video device", OFFSET(show_video_crossbar_connection_dialog), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DEC },
+    { "show_audio_crossbar_connection_dialog", "display property dialog for crossbar connecting pins filter on audio device", OFFSET(show_audio_crossbar_connection_dialog), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DEC },
+    { "show_analog_tv_tuner_dialog",           "display property dialog for analog tuner filter",                             OFFSET(show_analog_tv_tuner_dialog),           AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DEC },
+    { "show_analog_tv_tuner_audio_dialog",     "display property dialog for analog tuner audio filter",                       OFFSET(show_analog_tv_tuner_audio_dialog),     AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DEC },
     { "audio_device_load", "load audio capture filter device (and properties) from file", OFFSET(audio_filter_load_file), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { "audio_device_save", "save audio capture filter device (and properties) to file", OFFSET(audio_filter_save_file), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
     { "video_device_load", "load video capture filter device (and properties) from file", OFFSET(video_filter_load_file), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, DEC },
diff --git a/libavdevice/dshow_capture.h b/libavdevice/dshow_capture.h
index b17da109..f26eaf9a 100644
--- a/libavdevice/dshow_capture.h
+++ b/libavdevice/dshow_capture.h
@@ -19,8 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVDEVICE_DSHOW_H
-#define AVDEVICE_DSHOW_H
+#ifndef AVDEVICE_DSHOW_CAPTURE_H
+#define AVDEVICE_DSHOW_CAPTURE_H
 
 #define DSHOWDEBUG 0
 
@@ -349,4 +349,4 @@ HRESULT dshow_try_setup_crossbar_options(ICaptureGraphBuilder2 *graph_builder2,
 
 void dshow_show_filter_properties(IBaseFilter *pFilter, AVFormatContext *avctx);
 
-#endif /* AVDEVICE_DSHOW_H */
+#endif /* AVDEVICE_DSHOW_CAPTURE_H */
diff --git a/libavdevice/dshow_enummediatypes.c b/libavdevice/dshow_enummediatypes.c
index 5b69a5b7..3a66a4de 100644
--- a/libavdevice/dshow_enummediatypes.c
+++ b/libavdevice/dshow_enummediatypes.c
@@ -37,6 +37,8 @@ libAVEnumMediaTypes_Next(libAVEnumMediaTypes *this, unsigned long n,
     if (!this->pos && n == 1) {
         if (!IsEqualGUID(&this->type.majortype, &GUID_NULL)) {
             AM_MEDIA_TYPE *type = av_malloc(sizeof(AM_MEDIA_TYPE));
+            if (!type)
+                return E_OUTOFMEMORY;
             ff_copy_dshow_media_type(type, &this->type);
             *types = type;
             count = 1;
diff --git a/libavdevice/dshow_pin.c b/libavdevice/dshow_pin.c
index 4f719a66..664246da 100644
--- a/libavdevice/dshow_pin.c
+++ b/libavdevice/dshow_pin.c
@@ -303,14 +303,18 @@ libAVMemInputPin_Receive(libAVMemInputPin *this, IMediaSample *sample)
     libAVPin *pin = (libAVPin *) ((uint8_t *) this - imemoffset);
     enum dshowDeviceType devtype = pin->filter->type;
     void *priv_data;
+    AVFormatContext *s;
     uint8_t *buf;
     int buf_size; /* todo should be a long? */
     int index;
     int64_t curtime;
     int64_t orig_curtime;
+    int64_t graphtime;
     const char *devtypename = (devtype == VideoDevice) ? "video" : "audio";
     IReferenceClock *clock = pin->filter->clock;
     int64_t dummy;
+    struct dshow_ctx *ctx;
+
 
     dshowdebug("libAVMemInputPin_Receive(%p)\n", this);
 
@@ -319,6 +323,7 @@ libAVMemInputPin_Receive(libAVMemInputPin *this, IMediaSample *sample)
 
     IMediaSample_GetTime(sample, &orig_curtime, &dummy);
     orig_curtime += pin->filter->start_time;
+    IReferenceClock_GetTime(clock, &graphtime);
     if (devtype == VideoDevice) {
         /* PTS from video devices is unreliable. */
         IReferenceClock_GetTime(clock, &curtime);
@@ -338,10 +343,13 @@ libAVMemInputPin_Receive(libAVMemInputPin *this, IMediaSample *sample)
     buf_size = IMediaSample_GetActualDataLength(sample);
     IMediaSample_GetPointer(sample, &buf);
     priv_data = pin->filter->priv_data;
+    s = priv_data;
+    ctx = s->priv_data;
     index = pin->filter->stream_index;
 
-    av_log(NULL, AV_LOG_VERBOSE, "dshow passing through packet of type %s size %6d timestamp %"PRId64" orig timestamp %"PRId64"\n",
-           devtypename, buf_size, curtime, orig_curtime);
+    av_log(NULL, AV_LOG_VERBOSE, "dshow passing through packet of type %s size %8d "
+        "timestamp %"PRId64" orig timestamp %"PRId64" graph timestamp %"PRId64" diff %"PRId64" %s\n",
+        devtypename, buf_size, curtime, orig_curtime, graphtime, graphtime - orig_curtime, ctx->device_name[devtype]);
     pin->filter->callback(priv_data, index, buf, buf_size, curtime, devtype);
 
     return S_OK;
diff --git a/libavdevice/fbdev_common.c b/libavdevice/fbdev_common.c
index 98f96de2..91bd8e1a 100644
--- a/libavdevice/fbdev_common.c
+++ b/libavdevice/fbdev_common.c
@@ -42,7 +42,7 @@ static const struct rgb_pixfmt_map_entry rgb_pixfmt_map[] = {
     {  32,       3,           2,           8,            0,   AV_PIX_FMT_ABGR  },
     {  24,       0,           8,          16,            0,   AV_PIX_FMT_RGB24 },
     {  24,      16,           8,           0,            0,   AV_PIX_FMT_BGR24 },
-    {  16,      11,           5,           0,           16,   AV_PIX_FMT_RGB565 },
+    {  16,      11,           5,           0,            0,   AV_PIX_FMT_RGB565 },
 };
 
 enum AVPixelFormat ff_get_pixfmt_from_fb_varinfo(struct fb_var_screeninfo *varinfo)
diff --git a/libavdevice/fbdev_dec.c b/libavdevice/fbdev_dec.c
index c1e946a5..e9a36393 100644
--- a/libavdevice/fbdev_dec.c
+++ b/libavdevice/fbdev_dec.c
@@ -136,11 +136,11 @@ static av_cold int fbdev_read_header(AVFormatContext *avctx)
         fbdev->width * fbdev->height * fbdev->bytes_per_pixel * av_q2d(fbdev->framerate_q) * 8;
 
     av_log(avctx, AV_LOG_INFO,
-           "w:%d h:%d bpp:%d pixfmt:%s fps:%d/%d bit_rate:%d\n",
+           "w:%d h:%d bpp:%d pixfmt:%s fps:%d/%d bit_rate:%"PRId64"\n",
            fbdev->width, fbdev->height, fbdev->varinfo.bits_per_pixel,
            av_get_pix_fmt_name(pix_fmt),
            fbdev->framerate_q.num, fbdev->framerate_q.den,
-           st->codec->bit_rate);
+           (int64_t)st->codec->bit_rate);
     return 0;
 
 fail:
diff --git a/libavdevice/gdigrab.c b/libavdevice/gdigrab.c
index 9a185d4c..4428a34e 100644
--- a/libavdevice/gdigrab.c
+++ b/libavdevice/gdigrab.c
@@ -235,6 +235,8 @@ gdigrab_read_header(AVFormatContext *s1)
     AVStream   *st       = NULL;
 
     int bpp;
+    int vertres;
+    int desktopvertres;
     RECT virtual_rect;
     RECT clip_rect;
     BITMAP bmp;
@@ -263,13 +265,26 @@ gdigrab_read_header(AVFormatContext *s1)
         goto error;
     }
 
+    /* This will get the device context for the selected window, or if
+     * none, the primary screen */
+    source_hdc = GetDC(hwnd);
+    if (!source_hdc) {
+        WIN32_API_ERROR("Couldn't get window device context");
+        ret = AVERROR(EIO);
+        goto error;
+    }
+    bpp = GetDeviceCaps(source_hdc, BITSPIXEL);
+
     if (hwnd) {
         GetClientRect(hwnd, &virtual_rect);
     } else {
+        /* desktop -- get the right height and width for scaling DPI */
+        vertres = GetDeviceCaps(source_hdc, VERTRES);
+        desktopvertres = GetDeviceCaps(source_hdc, DESKTOPVERTRES);
         virtual_rect.left = GetSystemMetrics(SM_XVIRTUALSCREEN);
         virtual_rect.top = GetSystemMetrics(SM_YVIRTUALSCREEN);
-        virtual_rect.right = virtual_rect.left + GetSystemMetrics(SM_CXVIRTUALSCREEN);
-        virtual_rect.bottom = virtual_rect.top + GetSystemMetrics(SM_CYVIRTUALSCREEN);
+        virtual_rect.right = (virtual_rect.left + GetSystemMetrics(SM_CXVIRTUALSCREEN)) * desktopvertres / vertres;
+        virtual_rect.bottom = (virtual_rect.top + GetSystemMetrics(SM_CYVIRTUALSCREEN)) * desktopvertres / vertres;
     }
 
     /* If no width or height set, use full screen/window area */
@@ -299,15 +314,6 @@ gdigrab_read_header(AVFormatContext *s1)
             goto error;
     }
 
-    /* This will get the device context for the selected window, or if
-     * none, the primary screen */
-    source_hdc = GetDC(hwnd);
-    if (!source_hdc) {
-        WIN32_API_ERROR("Couldn't get window device context");
-        ret = AVERROR(EIO);
-        goto error;
-    }
-    bpp = GetDeviceCaps(source_hdc, BITSPIXEL);
 
     if (name) {
         av_log(s1, AV_LOG_INFO,
diff --git a/libavdevice/iec61883.c b/libavdevice/iec61883.c
index 01e52899..c45ae9ae 100644
--- a/libavdevice/iec61883.c
+++ b/libavdevice/iec61883.c
@@ -198,7 +198,6 @@ static int iec61883_parse_queue_dv(struct iec61883_data *dv, AVPacket *pkt)
 
     size = avpriv_dv_produce_packet(dv->dv_demux, pkt,
                                     packet->buf, packet->len, -1);
-    pkt->destruct = av_destruct_packet;
     dv->queue_first = packet->next;
     av_free(packet);
     dv->packets--;
@@ -271,7 +270,7 @@ static int iec61883_read_header(AVFormatContext *context)
     }
 
     if (dv->device_guid) {
-        if (sscanf(dv->device_guid, "%llx", (long long unsigned int *)&guid) != 1) {
+        if (sscanf(dv->device_guid, "%"SCNu64, &guid) != 1) {
             av_log(context, AV_LOG_INFO, "Invalid dvguid parameter: %s\n",
                    dv->device_guid);
             goto fail;
diff --git a/libavdevice/internal.h b/libavdevice/internal.h
index 3cd1b068..e222cf20 100644
--- a/libavdevice/internal.h
+++ b/libavdevice/internal.h
@@ -21,6 +21,7 @@
 
 #include "libavformat/avformat.h"
 
+av_warn_unused_result
 int ff_alloc_input_device_context(struct AVFormatContext **avctx, struct AVInputFormat *iformat,
                                   const char *format);
 
diff --git a/libavdevice/jack.c b/libavdevice/jack.c
index 917534d9..9ecbf9e3 100644
--- a/libavdevice/jack.c
+++ b/libavdevice/jack.c
@@ -218,7 +218,7 @@ static void free_pkt_fifo(AVFifoBuffer **fifo)
     AVPacket pkt;
     while (av_fifo_size(*fifo)) {
         av_fifo_generic_read(*fifo, &pkt, sizeof(pkt), NULL);
-        av_free_packet(&pkt);
+        av_packet_unref(&pkt);
     }
     av_fifo_freep(fifo);
 }
diff --git a/libavdevice/lavfi.c b/libavdevice/lavfi.c
index 126e5f62..8e9e67d3 100644
--- a/libavdevice/lavfi.c
+++ b/libavdevice/lavfi.c
@@ -30,6 +30,8 @@
 #include "libavutil/bprint.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/file.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
 #include "libavutil/log.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
@@ -38,6 +40,7 @@
 #include "libavfilter/avfilter.h"
 #include "libavfilter/avfiltergraph.h"
 #include "libavfilter/buffersink.h"
+#include "libavformat/avio_internal.h"
 #include "libavformat/internal.h"
 #include "avdevice.h"
 
@@ -142,7 +145,11 @@ av_cold static int lavfi_read_header(AVFormatContext *avctx)
     if (lavfi->graph_filename) {
         AVBPrint graph_file_pb;
         AVIOContext *avio = NULL;
-        ret = avio_open(&avio, lavfi->graph_filename, AVIO_FLAG_READ);
+        AVDictionary *options = NULL;
+        if (avctx->protocol_whitelist && (ret = av_dict_set(&options, "protocol_whitelist", avctx->protocol_whitelist, 0)) < 0)
+            goto end;
+        ret = avio_open2(&avio, lavfi->graph_filename, AVIO_FLAG_READ, &avctx->interrupt_callback, &options);
+        av_dict_set(&options, "protocol_whitelist", NULL, 0);
         if (ret < 0)
             goto end;
         av_bprint_init(&graph_file_pb, 0, AV_BPRINT_SIZE_UNLIMITED);
@@ -247,7 +254,7 @@ av_cold static int lavfi_read_header(AVFormatContext *avctx)
     for (i = 0, inout = output_links; inout; i++, inout = inout->next) {
         AVFilterContext *sink;
 
-        type = inout->filter_ctx->output_pads[inout->pad_idx].type;
+        type = avfilter_pad_get_type(inout->filter_ctx->output_pads, inout->pad_idx);
 
         if (type == AVMEDIA_TYPE_VIDEO && ! buffersink ||
             type == AVMEDIA_TYPE_AUDIO && ! abuffersink) {
@@ -336,7 +343,7 @@ av_cold static int lavfi_read_header(AVFormatContext *avctx)
     }
 
     if ((ret = create_subcc_streams(avctx)) < 0)
-        FAIL(ret);
+        goto end;
 
     if (!(lavfi->decoded_frame = av_frame_alloc()))
         FAIL(AVERROR(ENOMEM));
@@ -380,7 +387,6 @@ static int lavfi_read_packet(AVFormatContext *avctx, AVPacket *pkt)
     double min_pts = DBL_MAX;
     int stream_idx, min_pts_sink_idx = 0;
     AVFrame *frame = lavfi->decoded_frame;
-    AVPicture pict;
     AVDictionary *frame_metadata;
     int ret, i;
     int size = 0;
@@ -406,13 +412,13 @@ static int lavfi_read_packet(AVFormatContext *avctx, AVPacket *pkt)
         ret = av_buffersink_get_frame_flags(lavfi->sinks[i], frame,
                                             AV_BUFFERSINK_FLAG_PEEK);
         if (ret == AVERROR_EOF) {
-            av_dlog(avctx, "EOF sink_idx:%d\n", i);
+            ff_dlog(avctx, "EOF sink_idx:%d\n", i);
             lavfi->sink_eof[i] = 1;
             continue;
         } else if (ret < 0)
             return ret;
         d = av_rescale_q_rnd(frame->pts, tb, AV_TIME_BASE_Q, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
-        av_dlog(avctx, "sink_idx:%d time:%f\n", i, d);
+        ff_dlog(avctx, "sink_idx:%d time:%f\n", i, d);
         av_frame_unref(frame);
 
         if (d < min_pts) {
@@ -423,21 +429,18 @@ static int lavfi_read_packet(AVFormatContext *avctx, AVPacket *pkt)
     if (min_pts == DBL_MAX)
         return AVERROR_EOF;
 
-    av_dlog(avctx, "min_pts_sink_idx:%i\n", min_pts_sink_idx);
+    ff_dlog(avctx, "min_pts_sink_idx:%i\n", min_pts_sink_idx);
 
     av_buffersink_get_frame_flags(lavfi->sinks[min_pts_sink_idx], frame, 0);
     stream_idx = lavfi->sink_stream_map[min_pts_sink_idx];
 
     if (frame->width /* FIXME best way of testing a video */) {
-        size = avpicture_get_size(frame->format, frame->width, frame->height);
+        size = av_image_get_buffer_size(frame->format, frame->width, frame->height, 1);
         if ((ret = av_new_packet(pkt, size)) < 0)
             return ret;
 
-        memcpy(pict.data,     frame->data,     4*sizeof(frame->data[0]));
-        memcpy(pict.linesize, frame->linesize, 4*sizeof(frame->linesize[0]));
-
-        avpicture_layout(&pict, frame->format, frame->width, frame->height,
-                         pkt->data, size);
+        av_image_copy_to_buffer(pkt->data, size, (const uint8_t **)frame->data, frame->linesize,
+                                frame->format, frame->width, frame->height, 1);
     } else if (av_frame_get_channels(frame) /* FIXME test audio */) {
         size = frame->nb_samples * av_get_bytes_per_sample(frame->format) *
                                    av_frame_get_channels(frame);
diff --git a/libavdevice/libdc1394.c b/libavdevice/libdc1394.c
index 5f49c5ed..dcdca606 100644
--- a/libavdevice/libdc1394.c
+++ b/libavdevice/libdc1394.c
@@ -41,6 +41,7 @@
 #define DC1394_FRAMERATE_240   FRAMERATE_240
 #endif
 
+#include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
 #include "libavutil/log.h"
 #include "libavutil/mathematics.h"
@@ -71,7 +72,7 @@ typedef struct dc1394_data {
     AVPacket packet;
 } dc1394_data;
 
-struct dc1394_frame_format {
+static const struct dc1394_frame_format {
     int width;
     int height;
     enum AVPixelFormat pix_fmt;
@@ -84,7 +85,7 @@ struct dc1394_frame_format {
     { 0, 0, 0, 0 } /* gotta be the last one */
 };
 
-struct dc1394_frame_rate {
+static const struct dc1394_frame_rate {
     int frame_rate;
     int frame_rate_id;
 } dc1394_frame_rates[] = {
@@ -121,12 +122,12 @@ static const AVClass libdc1394_class = {
 
 
 static inline int dc1394_read_common(AVFormatContext *c,
-                                     struct dc1394_frame_format **select_fmt, struct dc1394_frame_rate **select_fps)
+                                     const struct dc1394_frame_format **select_fmt, const struct dc1394_frame_rate **select_fps)
 {
     dc1394_data* dc1394 = c->priv_data;
     AVStream* vst;
-    struct dc1394_frame_format *fmt;
-    struct dc1394_frame_rate *fps;
+    const struct dc1394_frame_format *fmt;
+    const struct dc1394_frame_rate *fps;
     enum AVPixelFormat pix_fmt;
     int width, height;
     AVRational framerate;
@@ -180,7 +181,8 @@ static inline int dc1394_read_common(AVFormatContext *c,
 
     /* packet init */
     av_init_packet(&dc1394->packet);
-    dc1394->packet.size = avpicture_get_size(fmt->pix_fmt, fmt->width, fmt->height);
+    dc1394->packet.size = av_image_get_buffer_size(fmt->pix_fmt,
+                                                   fmt->width, fmt->height, 1);
     dc1394->packet.stream_index = vst->index;
     dc1394->packet.flags |= AV_PKT_FLAG_KEY;
 
@@ -293,8 +295,8 @@ static int dc1394_v2_read_header(AVFormatContext *c)
     dc1394_data* dc1394 = c->priv_data;
     dc1394camera_list_t *list;
     int res, i;
-    struct dc1394_frame_format *fmt = NULL;
-    struct dc1394_frame_rate *fps = NULL;
+    const struct dc1394_frame_format *fmt = NULL;
+    const struct dc1394_frame_rate *fps = NULL;
 
     if (dc1394_read_common(c, &fmt, &fps) != 0)
        return -1;
diff --git a/libavdevice/openal-dec.c b/libavdevice/openal-dec.c
index 37d321a3..e4daf532 100644
--- a/libavdevice/openal-dec.c
+++ b/libavdevice/openal-dec.c
@@ -60,9 +60,9 @@ typedef struct {
  * @param al_fmt the AL_FORMAT value to find information about.
  * @return A pointer to a structure containing information about the AL_FORMAT value.
  */
-static inline al_format_info* get_al_format_info(ALCenum al_fmt)
+static const inline al_format_info* get_al_format_info(ALCenum al_fmt)
 {
-    static al_format_info info_table[] = {
+    static const al_format_info info_table[] = {
         [AL_FORMAT_MONO8-LOWEST_AL_FORMAT]    = {AL_FORMAT_MONO8, AV_CODEC_ID_PCM_U8, 1},
         [AL_FORMAT_MONO16-LOWEST_AL_FORMAT]   = {AL_FORMAT_MONO16, AV_NE (AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S16LE), 1},
         [AL_FORMAT_STEREO8-LOWEST_AL_FORMAT]  = {AL_FORMAT_STEREO8, AV_CODEC_ID_PCM_U8, 2},
@@ -204,7 +204,7 @@ static int read_packet(AVFormatContext* ctx, AVPacket *pkt)
 fail:
     /* Handle failure */
     if (pkt->data)
-        av_destruct_packet(pkt);
+        av_packet_unref(pkt);
     if (error_msg)
         av_log(ctx, AV_LOG_ERROR, "Error: %s\n", error_msg);
     return error;
diff --git a/libavdevice/opengl_enc.c b/libavdevice/opengl_enc.c
index 434ae97a..ba8d36ab 100644
--- a/libavdevice/opengl_enc.c
+++ b/libavdevice/opengl_enc.c
@@ -733,8 +733,8 @@ static av_cold void opengl_fill_color_map(OpenGLContext *opengl)
         return;
 
 #define FILL_COMPONENT(i) { \
-        shift = desc->comp[i].depth_minus1 >> 3; \
-        opengl->color_map[(i << 2) + ((desc->comp[i].offset_plus1 - 1) >> shift)] = 1.0; \
+        shift = (desc->comp[i].depth - 1) >> 3; \
+        opengl->color_map[(i << 2) + (desc->comp[i].offset >> shift)] = 1.0; \
     }
 
     memset(opengl->color_map, 0, sizeof(opengl->color_map));
@@ -1032,8 +1032,8 @@ static av_cold int opengl_init_context(OpenGLContext *opengl)
         for (i = 1; i < num_planes; i++)
             if (opengl->non_pow_2_textures)
                 opengl_configure_texture(opengl, opengl->texture_name[i],
-                        FF_CEIL_RSHIFT(opengl->width, desc->log2_chroma_w),
-                        FF_CEIL_RSHIFT(opengl->height, desc->log2_chroma_h));
+                        AV_CEIL_RSHIFT(opengl->width, desc->log2_chroma_w),
+                        AV_CEIL_RSHIFT(opengl->height, desc->log2_chroma_h));
             else
                 opengl_configure_texture(opengl, opengl->texture_name[i], opengl->width, opengl->height);
         if (has_alpha)
@@ -1135,8 +1135,8 @@ static uint8_t* opengl_get_plane_pointer(OpenGLContext *opengl, AVPacket *pkt, i
 {
     uint8_t *data = pkt->data;
     int wordsize = opengl_type_size(opengl->type);
-    int width_chroma = FF_CEIL_RSHIFT(opengl->width, desc->log2_chroma_w);
-    int height_chroma = FF_CEIL_RSHIFT(opengl->height, desc->log2_chroma_h);
+    int width_chroma = AV_CEIL_RSHIFT(opengl->width, desc->log2_chroma_w);
+    int height_chroma = AV_CEIL_RSHIFT(opengl->height, desc->log2_chroma_h);
     int plane = desc->comp[comp_index].plane;
 
     switch(plane) {
@@ -1161,8 +1161,8 @@ static uint8_t* opengl_get_plane_pointer(OpenGLContext *opengl, AVPacket *pkt, i
 
 #define LOAD_TEXTURE_DATA(comp_index, sub)                                                  \
 {                                                                                           \
-    int width = sub ? FF_CEIL_RSHIFT(opengl->width, desc->log2_chroma_w) : opengl->width;   \
-    int height = sub ? FF_CEIL_RSHIFT(opengl->height, desc->log2_chroma_h): opengl->height; \
+    int width = sub ? AV_CEIL_RSHIFT(opengl->width, desc->log2_chroma_w) : opengl->width;   \
+    int height = sub ? AV_CEIL_RSHIFT(opengl->height, desc->log2_chroma_h): opengl->height; \
     uint8_t *data;                                                                          \
     int plane = desc->comp[comp_index].plane;                                               \
                                                                                             \
diff --git a/libavdevice/opengl_enc_shaders.h b/libavdevice/opengl_enc_shaders.h
index ed8b3d30..67ee0ae7 100644
--- a/libavdevice/opengl_enc_shaders.h
+++ b/libavdevice/opengl_enc_shaders.h
@@ -18,8 +18,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVDEVICE_OPENGL_SHADERS_H
-#define AVDEVICE_OPENGL_SHADERS_H
+#ifndef AVDEVICE_OPENGL_ENC_SHADERS_H
+#define AVDEVICE_OPENGL_ENC_SHADERS_H
 
 #include "libavutil/pixfmt.h"
 
@@ -185,4 +185,4 @@ static const char * const FF_OPENGL_FRAGMENT_SHADER_GRAY =
         "gl_FragColor = vec4(c, c, c, 1.0);"
     "}";
 
-#endif /* AVDEVICE_OPENGL_SHADERS_H */
+#endif /* AVDEVICE_OPENGL_ENC_SHADERS_H */
diff --git a/libavdevice/oss_dec.c b/libavdevice/oss_dec.c
index 156d6ae9..3a9a20a2 100644
--- a/libavdevice/oss_dec.c
+++ b/libavdevice/oss_dec.c
@@ -84,7 +84,7 @@ static int audio_read_packet(AVFormatContext *s1, AVPacket *pkt)
 
     ret = read(s->fd, pkt->data, pkt->size);
     if (ret <= 0){
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         pkt->size = 0;
         if (ret<0)  return AVERROR(errno);
         else        return AVERROR_EOF;
diff --git a/libavdevice/pulse_audio_common.h b/libavdevice/pulse_audio_common.h
index 02534f79..902795e4 100644
--- a/libavdevice/pulse_audio_common.h
+++ b/libavdevice/pulse_audio_common.h
@@ -28,8 +28,10 @@
 
 pa_sample_format_t ff_codec_id_to_pulse_format(enum AVCodecID codec_id);
 
+av_warn_unused_result
 int ff_pulse_audio_get_devices(AVDeviceInfoList *devices, const char *server, int output);
 
+av_warn_unused_result
 int ff_pulse_audio_connect_context(pa_mainloop **pa_ml, pa_context **pa_ctx,
                                    const char *server, const char *description);
 
diff --git a/libavdevice/pulse_audio_enc.c b/libavdevice/pulse_audio_enc.c
index bc4d1f05..b419a38d 100644
--- a/libavdevice/pulse_audio_enc.c
+++ b/libavdevice/pulse_audio_enc.c
@@ -23,6 +23,7 @@
 #include <pulse/error.h>
 #include "libavformat/avformat.h"
 #include "libavformat/internal.h"
+#include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/time.h"
 #include "libavutil/log.h"
@@ -333,7 +334,7 @@ static int pulse_set_volume(PulseData *s, double volume)
     pa_volume_t vol;
     const pa_sample_spec *ss = pa_stream_get_sample_spec(s->stream);
 
-    vol = pa_sw_volume_multiply(lround(volume * PA_VOLUME_NORM), s->base_volume);
+    vol = pa_sw_volume_multiply(lrint(volume * PA_VOLUME_NORM), s->base_volume);
     pa_cvolume_set(&cvol, ss->channels, PA_VOLUME_NORM);
     pa_sw_cvolume_multiply_scalar(&cvol, &cvol, vol);
     pa_threaded_mainloop_lock(s->mainloop);
diff --git a/libavdevice/sdl.c b/libavdevice/sdl.c
index b98aae5e..4cccfe52 100644
--- a/libavdevice/sdl.c
+++ b/libavdevice/sdl.c
@@ -27,6 +27,7 @@
 #include <SDL_thread.h>
 
 #include "libavutil/avstring.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/pixdesc.h"
@@ -315,22 +316,23 @@ static int sdl_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     SDLContext *sdl = s->priv_data;
     AVCodecContext *encctx = s->streams[0]->codec;
-    AVPicture pict;
+    uint8_t *data[4];
+    int linesize[4];
     int i;
 
     if (sdl->quit) {
         sdl_write_trailer(s);
         return AVERROR(EIO);
     }
-    avpicture_fill(&pict, pkt->data, encctx->pix_fmt, encctx->width, encctx->height);
+    av_image_fill_arrays(data, linesize, pkt->data, encctx->pix_fmt, encctx->width, encctx->height, 1);
 
     SDL_LockMutex(sdl->mutex);
     SDL_FillRect(sdl->surface, &sdl->surface->clip_rect,
                  SDL_MapRGB(sdl->surface->format, 0, 0, 0));
     SDL_LockYUVOverlay(sdl->overlay);
     for (i = 0; i < 3; i++) {
-        sdl->overlay->pixels [i] = pict.data    [i];
-        sdl->overlay->pitches[i] = pict.linesize[i];
+        sdl->overlay->pixels [i] = data    [i];
+        sdl->overlay->pitches[i] = linesize[i];
     }
     SDL_DisplayYUVOverlay(sdl->overlay, &sdl->overlay_rect);
     SDL_UnlockYUVOverlay(sdl->overlay);
diff --git a/libavdevice/sndio_dec.c b/libavdevice/sndio_dec.c
index f815b377..6f1160e0 100644
--- a/libavdevice/sndio_dec.c
+++ b/libavdevice/sndio_dec.c
@@ -67,7 +67,7 @@ static int audio_read_packet(AVFormatContext *s1, AVPacket *pkt)
 
     ret = sio_read(s->hdl, pkt->data, pkt->size);
     if (ret == 0 || sio_eof(s->hdl)) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return AVERROR_EOF;
     }
 
diff --git a/libavdevice/v4l.c b/libavdevice/v4l.c
index d33f7142..81653e02 100644
--- a/libavdevice/v4l.c
+++ b/libavdevice/v4l.c
@@ -25,6 +25,7 @@
 #include "config.h"
 #include "libavutil/rational.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
 #include "libavformat/internal.h"
@@ -151,7 +152,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
     ioctl(video_fd, VIDIOCSAUDIO, &audio);
 
     ioctl(video_fd, VIDIOCGPICT, &pict);
-    av_dlog(s1, "v4l: colour=%d hue=%d brightness=%d constrast=%d whiteness=%d\n",
+    ff_dlog(s1, "v4l: colour=%d hue=%d brightness=%d constrast=%d whiteness=%d\n",
             pict.colour, pict.hue, pict.brightness, pict.contrast, pict.whiteness);
     /* try to choose a suitable video format */
     pict.palette = desired_palette;
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index 64ac09c1..383033e7 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -126,7 +126,7 @@ static int device_open(AVFormatContext *ctx)
 #if CONFIG_LIBV4L2
         SET_WRAPPERS(v4l2_);
 #else
-        av_log(ctx, AV_LOG_ERROR, "libavdevice is not build with libv4l2 support.\n");
+        av_log(ctx, AV_LOG_ERROR, "libavdevice is not built with libv4l2 support.\n");
         return AVERROR(EINVAL);
 #endif
     } else {
@@ -394,13 +394,6 @@ static int mmap_init(AVFormatContext *ctx)
     return 0;
 }
 
-#if FF_API_DESTRUCT_PACKET
-static void dummy_release_buffer(AVPacket *pkt)
-{
-    av_assert0(0);
-}
-#endif
-
 static int enqueue_buffer(struct video_data *s, struct v4l2_buffer *buf)
 {
     int res = 0;
@@ -557,7 +550,7 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
 
         res = enqueue_buffer(s, &buf);
         if (res) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return res;
         }
     } else {
@@ -565,11 +558,6 @@ static int mmap_read_frame(AVFormatContext *ctx, AVPacket *pkt)
 
         pkt->data     = s->buf_start[buf.index];
         pkt->size     = buf.bytesused;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-        pkt->destruct = dummy_release_buffer;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
 
         buf_descriptor = av_malloc(sizeof(struct buff_data));
         if (!buf_descriptor) {
@@ -951,8 +939,8 @@ static int v4l2_read_header(AVFormatContext *ctx)
         goto fail;
 
     st->codec->pix_fmt = ff_fmt_v4l2ff(desired_format, codec_id);
-    s->frame_size =
-        avpicture_get_size(st->codec->pix_fmt, s->width, s->height);
+    s->frame_size = av_image_get_buffer_size(st->codec->pix_fmt,
+                                             s->width, s->height, 1);
 
     if ((res = mmap_init(ctx)) ||
         (res = mmap_start(ctx)) < 0)
@@ -966,7 +954,7 @@ static int v4l2_read_header(AVFormatContext *ctx)
         st->codec->codec_tag =
             avcodec_pix_fmt_to_codec_tag(st->codec->pix_fmt);
     else if (codec_id == AV_CODEC_ID_H264) {
-        st->need_parsing = AVSTREAM_PARSE_HEADERS;
+        st->need_parsing = AVSTREAM_PARSE_FULL_ONCE;
     }
     if (desired_format == V4L2_PIX_FMT_YVU420)
         st->codec->codec_tag = MKTAG('Y', 'V', '1', '2');
@@ -987,7 +975,11 @@ static int v4l2_read_header(AVFormatContext *ctx)
 static int v4l2_read_packet(AVFormatContext *ctx, AVPacket *pkt)
 {
     struct video_data *s = ctx->priv_data;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     AVFrame *frame = ctx->streams[0]->codec->coded_frame;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
     int res;
 
     av_init_packet(pkt);
@@ -995,10 +987,14 @@ static int v4l2_read_packet(AVFormatContext *ctx, AVPacket *pkt)
         return res;
     }
 
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     if (frame && s->interlaced) {
         frame->interlaced_frame = 1;
         frame->top_field_first = s->top_field_first;
     }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     return pkt->size;
 }
@@ -1116,7 +1112,7 @@ static const AVOption options[] = {
     { "default",      "use timestamps from the kernel",                           OFFSET(ts_mode),      AV_OPT_TYPE_CONST,  {.i64 = V4L_TS_DEFAULT  }, 0, 2, DEC, "timestamps" },
     { "abs",          "use absolute timestamps (wall clock)",                     OFFSET(ts_mode),      AV_OPT_TYPE_CONST,  {.i64 = V4L_TS_ABS      }, 0, 2, DEC, "timestamps" },
     { "mono2abs",     "force conversion from monotonic to absolute timestamps",   OFFSET(ts_mode),      AV_OPT_TYPE_CONST,  {.i64 = V4L_TS_MONO2ABS }, 0, 2, DEC, "timestamps" },
-    { "use_libv4l2",  "use libv4l2 (v4l-utils) conversion functions",             OFFSET(use_libv4l2),  AV_OPT_TYPE_INT,    {.i64 = 0}, 0, 1, DEC },
+    { "use_libv4l2",  "use libv4l2 (v4l-utils) conversion functions",             OFFSET(use_libv4l2),  AV_OPT_TYPE_BOOL,   {.i64 = 0}, 0, 1, DEC },
     { NULL },
 };
 
diff --git a/libavdevice/version.h b/libavdevice/version.h
index 8de07f08..b226a761 100644
--- a/libavdevice/version.h
+++ b/libavdevice/version.h
@@ -27,9 +27,9 @@
 
 #include "libavutil/version.h"
 
-#define LIBAVDEVICE_VERSION_MAJOR 56
-#define LIBAVDEVICE_VERSION_MINOR  4
-#define LIBAVDEVICE_VERSION_MICRO 100
+#define LIBAVDEVICE_VERSION_MAJOR  57
+#define LIBAVDEVICE_VERSION_MINOR   0
+#define LIBAVDEVICE_VERSION_MICRO 101
 
 #define LIBAVDEVICE_VERSION_INT AV_VERSION_INT(LIBAVDEVICE_VERSION_MAJOR, \
                                                LIBAVDEVICE_VERSION_MINOR, \
diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c
index 10e6c69d..e1f8b866 100644
--- a/libavdevice/vfwcap.c
+++ b/libavdevice/vfwcap.c
@@ -234,7 +234,7 @@ static int vfw_read_close(AVFormatContext *s)
     pktl = ctx->pktl;
     while (pktl) {
         AVPacketList *next = pktl->next;
-        av_free_packet(&pktl->pkt);
+        av_packet_unref(&pktl->pkt);
         av_free(pktl);
         pktl = next;
     }
@@ -396,7 +396,7 @@ static int vfw_read_header(AVFormatContext *s)
         codec->codec_id = AV_CODEC_ID_RAWVIDEO;
         if(biCompression == BI_RGB) {
             codec->bits_per_coded_sample = biBitCount;
-            codec->extradata = av_malloc(9 + FF_INPUT_BUFFER_PADDING_SIZE);
+            codec->extradata = av_malloc(9 + AV_INPUT_BUFFER_PADDING_SIZE);
             if (codec->extradata) {
                 codec->extradata_size = 9;
                 memcpy(codec->extradata, "BottomUp", 9);
diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c
index bdfaa66d..9dc34721 100644
--- a/libavdevice/x11grab.c
+++ b/libavdevice/x11grab.c
@@ -526,16 +526,11 @@ static int x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt)
     int64_t curtime, delay;
     struct timespec ts;
 
-    /* Calculate the time of the next frame */
-    s->time_frame += INT64_C(1000000);
-
     /* wait based on the frame rate */
     for (;;) {
         curtime = av_gettime();
         delay   = s->time_frame * av_q2d(s->time_base) - curtime;
         if (delay <= 0) {
-            if (delay < INT64_C(-1000000) * av_q2d(s->time_base))
-                s->time_frame += INT64_C(1000000);
             break;
         }
         ts.tv_sec  = delay / 1000000;
@@ -543,6 +538,11 @@ static int x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt)
         nanosleep(&ts, NULL);
     }
 
+    /* Calculate the time of the next frame */
+    do {
+      s->time_frame += INT64_C(1000000);
+    } while ((s->time_frame * av_q2d(s->time_base) - curtime) <= 0);
+
     av_init_packet(pkt);
     pkt->data = image->data;
     pkt->size = s->frame_size;
diff --git a/libavdevice/xcbgrab.c b/libavdevice/xcbgrab.c
index 166575c9..2da7ec7d 100644
--- a/libavdevice/xcbgrab.c
+++ b/libavdevice/xcbgrab.c
@@ -231,7 +231,7 @@ static int xcbgrab_frame_shm(AVFormatContext *s, AVPacket *pkt)
     xcb_shm_get_image_reply_t *img;
     xcb_drawable_t drawable = c->screen->root;
     uint8_t *data;
-    int size = c->frame_size + FF_INPUT_BUFFER_PADDING_SIZE;
+    int size = c->frame_size + AV_INPUT_BUFFER_PADDING_SIZE;
     int id   = shmget(IPC_PRIVATE, size, IPC_CREAT | 0777);
     xcb_generic_error_t *e = NULL;
 
@@ -591,7 +591,7 @@ static void setup_window(AVFormatContext *s)
     uint32_t values[] = { 1,
                           XCB_EVENT_MASK_EXPOSURE |
                           XCB_EVENT_MASK_STRUCTURE_NOTIFY };
-    xcb_rectangle_t rect = { 0, 0, c->width, c->height };
+    av_unused xcb_rectangle_t rect = { 0, 0, c->width, c->height };
 
     c->window = xcb_generate_id(c->conn);
 
diff --git a/libavdevice/xv.c b/libavdevice/xv.c
index c19c15c2..64cddeb0 100644
--- a/libavdevice/xv.c
+++ b/libavdevice/xv.c
@@ -291,7 +291,8 @@ static int xv_repaint(AVFormatContext *s)
     return 0;
 }
 
-static int write_picture(AVFormatContext *s, AVPicture *pict)
+static int write_picture(AVFormatContext *s, uint8_t *input_data[4],
+                         int linesize[4])
 {
     XVContext *xv = s->priv_data;
     XvImage *img = xv->yuv_image;
@@ -313,18 +314,20 @@ static int write_picture(AVFormatContext *s, AVPicture *pict)
         }
     }
 
-    av_image_copy(data, img->pitches, (const uint8_t **)pict->data, pict->linesize,
+    av_image_copy(data, img->pitches, (const uint8_t **)input_data, linesize,
                   xv->image_format, img->width, img->height);
     return xv_repaint(s);
 }
 
 static int xv_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
-    AVPicture pict;
     AVCodecContext *ctx = s->streams[0]->codec;
+    uint8_t *data[4];
+    int linesize[4];
 
-    avpicture_fill(&pict, pkt->data, ctx->pix_fmt, ctx->width, ctx->height);
-    return write_picture(s, &pict);
+    av_image_fill_arrays(data, linesize, pkt->data, ctx->pix_fmt,
+                         ctx->width, ctx->height, 1);
+    return write_picture(s, data, linesize);
 }
 
 static int xv_write_frame(AVFormatContext *s, int stream_index, AVFrame **frame,
@@ -333,7 +336,7 @@ static int xv_write_frame(AVFormatContext *s, int stream_index, AVFrame **frame,
     /* xv_write_header() should have accepted only supported formats */
     if ((flags & AV_WRITE_UNCODED_FRAME_QUERY))
         return 0;
-    return write_picture(s, (AVPicture *)*frame);
+    return write_picture(s, (*frame)->data, (*frame)->linesize);
 }
 
 static int xv_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index bf5a5498..89165881 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -2,9 +2,7 @@ include $(SUBDIR)../config.mak
 
 NAME = avfilter
 
-HEADERS = asrc_abuffer.h                                                \
-          avcodec.h                                                     \
-          avfilter.h                                                    \
+HEADERS = avfilter.h                                                    \
           avfiltergraph.h                                               \
           buffersink.h                                                  \
           buffersrc.h                                                   \
@@ -14,35 +12,43 @@ OBJS = allfilters.o                                                     \
        audio.o                                                          \
        avfilter.o                                                       \
        avfiltergraph.o                                                  \
-       buffer.o                                                         \
        buffersink.o                                                     \
        buffersrc.o                                                      \
        drawutils.o                                                      \
        fifo.o                                                           \
        formats.o                                                        \
+       framepool.o                                                      \
        graphdump.o                                                      \
        graphparser.o                                                    \
        opencl_allkernels.o                                              \
        transform.o                                                      \
        video.o                                                          \
 
-
-OBJS-$(CONFIG_AVCODEC)                       += avcodec.o
-
+OBJS-$(CONFIG_ACOMPRESSOR_FILTER)            += af_sidechaincompress.o
+OBJS-$(CONFIG_ACROSSFADE_FILTER)             += af_afade.o
 OBJS-$(CONFIG_ADELAY_FILTER)                 += af_adelay.o
 OBJS-$(CONFIG_AECHO_FILTER)                  += af_aecho.o
+OBJS-$(CONFIG_AEMPHASIS_FILTER)              += af_aemphasis.o
+OBJS-$(CONFIG_AFFTFILT_FILTER)               += af_afftfilt.o window_func.o
+OBJS-$(CONFIG_ANEQUALIZER_FILTER)            += af_anequalizer.o
 OBJS-$(CONFIG_AEVAL_FILTER)                  += aeval.o
 OBJS-$(CONFIG_AFADE_FILTER)                  += af_afade.o
 OBJS-$(CONFIG_AFORMAT_FILTER)                += af_aformat.o
+OBJS-$(CONFIG_AGATE_FILTER)                  += af_agate.o
 OBJS-$(CONFIG_AINTERLEAVE_FILTER)            += f_interleave.o
+OBJS-$(CONFIG_ALIMITER_FILTER)               += af_alimiter.o
 OBJS-$(CONFIG_ALLPASS_FILTER)                += af_biquads.o
 OBJS-$(CONFIG_AMERGE_FILTER)                 += af_amerge.o
+OBJS-$(CONFIG_AMETADATA_FILTER)              += f_metadata.o
 OBJS-$(CONFIG_AMIX_FILTER)                   += af_amix.o
 OBJS-$(CONFIG_ANULL_FILTER)                  += af_anull.o
 OBJS-$(CONFIG_APAD_FILTER)                   += af_apad.o
 OBJS-$(CONFIG_APERMS_FILTER)                 += f_perms.o
 OBJS-$(CONFIG_APHASER_FILTER)                += af_aphaser.o generate_wave_table.o
+OBJS-$(CONFIG_APULSATOR_FILTER)              += af_apulsator.o
+OBJS-$(CONFIG_AREALTIME_FILTER)              += f_realtime.o
 OBJS-$(CONFIG_ARESAMPLE_FILTER)              += af_aresample.o
+OBJS-$(CONFIG_AREVERSE_FILTER)               += f_reverse.o
 OBJS-$(CONFIG_ASELECT_FILTER)                += f_select.o
 OBJS-$(CONFIG_ASENDCMD_FILTER)               += f_sendcmd.o
 OBJS-$(CONFIG_ASETNSAMPLES_FILTER)           += af_asetnsamples.o
@@ -52,7 +58,7 @@ OBJS-$(CONFIG_ASETTB_FILTER)                 += settb.o
 OBJS-$(CONFIG_ASHOWINFO_FILTER)              += af_ashowinfo.o
 OBJS-$(CONFIG_ASPLIT_FILTER)                 += split.o
 OBJS-$(CONFIG_ASTATS_FILTER)                 += af_astats.o
-OBJS-$(CONFIG_ASTREAMSYNC_FILTER)            += af_astreamsync.o
+OBJS-$(CONFIG_ASTREAMSELECT_FILTER)          += f_streamselect.o
 OBJS-$(CONFIG_ASYNCTS_FILTER)                += af_asyncts.o
 OBJS-$(CONFIG_ATEMPO_FILTER)                 += af_atempo.o
 OBJS-$(CONFIG_ATRIM_FILTER)                  += trim.o
@@ -66,10 +72,13 @@ OBJS-$(CONFIG_CHANNELMAP_FILTER)             += af_channelmap.o
 OBJS-$(CONFIG_CHANNELSPLIT_FILTER)           += af_channelsplit.o
 OBJS-$(CONFIG_CHORUS_FILTER)                 += af_chorus.o generate_wave_table.o
 OBJS-$(CONFIG_COMPAND_FILTER)                += af_compand.o
+OBJS-$(CONFIG_COMPENSATIONDELAY_FILTER)      += af_compensationdelay.o
 OBJS-$(CONFIG_DCSHIFT_FILTER)                += af_dcshift.o
+OBJS-$(CONFIG_DYNAUDNORM_FILTER)             += af_dynaudnorm.o
 OBJS-$(CONFIG_EARWAX_FILTER)                 += af_earwax.o
 OBJS-$(CONFIG_EBUR128_FILTER)                += f_ebur128.o
 OBJS-$(CONFIG_EQUALIZER_FILTER)              += af_biquads.o
+OBJS-$(CONFIG_EXTRASTEREO_FILTER)            += af_extrastereo.o
 OBJS-$(CONFIG_FLANGER_FILTER)                += af_flanger.o generate_wave_table.o
 OBJS-$(CONFIG_HIGHPASS_FILTER)               += af_biquads.o
 OBJS-$(CONFIG_JOIN_FILTER)                   += af_join.o
@@ -78,13 +87,22 @@ OBJS-$(CONFIG_LOWPASS_FILTER)                += af_biquads.o
 OBJS-$(CONFIG_PAN_FILTER)                    += af_pan.o
 OBJS-$(CONFIG_REPLAYGAIN_FILTER)             += af_replaygain.o
 OBJS-$(CONFIG_RESAMPLE_FILTER)               += af_resample.o
+OBJS-$(CONFIG_RUBBERBAND_FILTER)             += af_rubberband.o
+OBJS-$(CONFIG_SIDECHAINCOMPRESS_FILTER)      += af_sidechaincompress.o
+OBJS-$(CONFIG_SIDECHAINGATE_FILTER)          += af_agate.o
 OBJS-$(CONFIG_SILENCEDETECT_FILTER)          += af_silencedetect.o
 OBJS-$(CONFIG_SILENCEREMOVE_FILTER)          += af_silenceremove.o
+OBJS-$(CONFIG_SOFALIZER_FILTER)              += af_sofalizer.o
+OBJS-$(CONFIG_STEREOTOOLS_FILTER)            += af_stereotools.o
+OBJS-$(CONFIG_STEREOWIDEN_FILTER)            += af_stereowiden.o
 OBJS-$(CONFIG_TREBLE_FILTER)                 += af_biquads.o
+OBJS-$(CONFIG_TREMOLO_FILTER)                += af_tremolo.o
+OBJS-$(CONFIG_VIBRATO_FILTER)                += af_vibrato.o generate_wave_table.o
 OBJS-$(CONFIG_VOLUME_FILTER)                 += af_volume.o
 OBJS-$(CONFIG_VOLUMEDETECT_FILTER)           += af_volumedetect.o
 
 OBJS-$(CONFIG_AEVALSRC_FILTER)               += aeval.o
+OBJS-$(CONFIG_ANOISESRC_FILTER)              += asrc_anoisesrc.o
 OBJS-$(CONFIG_ANULLSRC_FILTER)               += asrc_anullsrc.o
 OBJS-$(CONFIG_FLITE_FILTER)                  += asrc_flite.o
 OBJS-$(CONFIG_SINE_FILTER)                   += asrc_sine.o
@@ -94,33 +112,43 @@ OBJS-$(CONFIG_ANULLSINK_FILTER)              += asink_anullsink.o
 OBJS-$(CONFIG_ASS_FILTER)                    += vf_subtitles.o
 OBJS-$(CONFIG_ALPHAEXTRACT_FILTER)           += vf_extractplanes.o
 OBJS-$(CONFIG_ALPHAMERGE_FILTER)             += vf_alphamerge.o
+OBJS-$(CONFIG_ATADENOISE_FILTER)             += vf_atadenoise.o
 OBJS-$(CONFIG_BBOX_FILTER)                   += bbox.o vf_bbox.o
 OBJS-$(CONFIG_BLACKDETECT_FILTER)            += vf_blackdetect.o
 OBJS-$(CONFIG_BLACKFRAME_FILTER)             += vf_blackframe.o
 OBJS-$(CONFIG_BLEND_FILTER)                  += vf_blend.o dualinput.o framesync.o
 OBJS-$(CONFIG_BOXBLUR_FILTER)                += vf_boxblur.o
+OBJS-$(CONFIG_CHROMAKEY_FILTER)              += vf_chromakey.o
 OBJS-$(CONFIG_CODECVIEW_FILTER)              += vf_codecview.o
 OBJS-$(CONFIG_COLORBALANCE_FILTER)           += vf_colorbalance.o
 OBJS-$(CONFIG_COLORCHANNELMIXER_FILTER)      += vf_colorchannelmixer.o
+OBJS-$(CONFIG_COLORKEY_FILTER)               += vf_colorkey.o
 OBJS-$(CONFIG_COLORLEVELS_FILTER)            += vf_colorlevels.o
 OBJS-$(CONFIG_COLORMATRIX_FILTER)            += vf_colormatrix.o
+OBJS-$(CONFIG_CONVOLUTION_FILTER)            += vf_convolution.o
 OBJS-$(CONFIG_COPY_FILTER)                   += vf_copy.o
 OBJS-$(CONFIG_COVER_RECT_FILTER)             += vf_cover_rect.o lavfutils.o
 OBJS-$(CONFIG_CROP_FILTER)                   += vf_crop.o
 OBJS-$(CONFIG_CROPDETECT_FILTER)             += vf_cropdetect.o
 OBJS-$(CONFIG_CURVES_FILTER)                 += vf_curves.o
 OBJS-$(CONFIG_DCTDNOIZ_FILTER)               += vf_dctdnoiz.o
+OBJS-$(CONFIG_DEBAND_FILTER)                 += vf_deband.o
 OBJS-$(CONFIG_DECIMATE_FILTER)               += vf_decimate.o
+OBJS-$(CONFIG_DEFLATE_FILTER)                += vf_neighbor.o
 OBJS-$(CONFIG_DEJUDDER_FILTER)               += vf_dejudder.o
 OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
 OBJS-$(CONFIG_DESHAKE_FILTER)                += vf_deshake.o
 OBJS-$(CONFIG_DETELECINE_FILTER)             += vf_detelecine.o
+OBJS-$(CONFIG_DILATION_FILTER)               += vf_neighbor.o
+OBJS-$(CONFIG_DISPLACE_FILTER)               += vf_displace.o framesync.o
 OBJS-$(CONFIG_DRAWBOX_FILTER)                += vf_drawbox.o
+OBJS-$(CONFIG_DRAWGRAPH_FILTER)              += f_drawgraph.o
 OBJS-$(CONFIG_DRAWGRID_FILTER)               += vf_drawbox.o
 OBJS-$(CONFIG_DRAWTEXT_FILTER)               += vf_drawtext.o
 OBJS-$(CONFIG_ELBG_FILTER)                   += vf_elbg.o
 OBJS-$(CONFIG_EDGEDETECT_FILTER)             += vf_edgedetect.o
 OBJS-$(CONFIG_EQ_FILTER)                     += vf_eq.o
+OBJS-$(CONFIG_EROSION_FILTER)                += vf_neighbor.o
 OBJS-$(CONFIG_EXTRACTPLANES_FILTER)          += vf_extractplanes.o
 OBJS-$(CONFIG_FADE_FILTER)                   += vf_fade.o
 OBJS-$(CONFIG_FFTFILT_FILTER)                += vf_fftfilt.o
@@ -129,9 +157,10 @@ OBJS-$(CONFIG_FIELDMATCH_FILTER)             += vf_fieldmatch.o
 OBJS-$(CONFIG_FIELDORDER_FILTER)             += vf_fieldorder.o
 OBJS-$(CONFIG_FIND_RECT_FILTER)              += vf_find_rect.o lavfutils.o
 OBJS-$(CONFIG_FORMAT_FILTER)                 += vf_format.o
-OBJS-$(CONFIG_FRAMESTEP_FILTER)              += vf_framestep.o
 OBJS-$(CONFIG_FPS_FILTER)                    += vf_fps.o
 OBJS-$(CONFIG_FRAMEPACK_FILTER)              += vf_framepack.o
+OBJS-$(CONFIG_FRAMERATE_FILTER)              += vf_framerate.o
+OBJS-$(CONFIG_FRAMESTEP_FILTER)              += vf_framestep.o
 OBJS-$(CONFIG_FREI0R_FILTER)                 += vf_frei0r.o
 OBJS-$(CONFIG_FSPP_FILTER)                   += vf_fspp.o
 OBJS-$(CONFIG_GEQ_FILTER)                    += vf_geq.o
@@ -142,9 +171,11 @@ OBJS-$(CONFIG_HISTEQ_FILTER)                 += vf_histeq.o
 OBJS-$(CONFIG_HISTOGRAM_FILTER)              += vf_histogram.o
 OBJS-$(CONFIG_HQDN3D_FILTER)                 += vf_hqdn3d.o
 OBJS-$(CONFIG_HQX_FILTER)                    += vf_hqx.o
+OBJS-$(CONFIG_HSTACK_FILTER)                 += vf_stack.o framesync.o
 OBJS-$(CONFIG_HUE_FILTER)                    += vf_hue.o
 OBJS-$(CONFIG_IDET_FILTER)                   += vf_idet.o
 OBJS-$(CONFIG_IL_FILTER)                     += vf_il.o
+OBJS-$(CONFIG_INFLATE_FILTER)                += vf_neighbor.o
 OBJS-$(CONFIG_INTERLACE_FILTER)              += vf_interlace.o
 OBJS-$(CONFIG_INTERLEAVE_FILTER)             += f_interleave.o
 OBJS-$(CONFIG_KERNDEINT_FILTER)              += vf_kerndeint.o
@@ -153,13 +184,17 @@ OBJS-$(CONFIG_LUT3D_FILTER)                  += vf_lut3d.o
 OBJS-$(CONFIG_LUT_FILTER)                    += vf_lut.o
 OBJS-$(CONFIG_LUTRGB_FILTER)                 += vf_lut.o
 OBJS-$(CONFIG_LUTYUV_FILTER)                 += vf_lut.o
+OBJS-$(CONFIG_MASKEDMERGE_FILTER)            += vf_maskedmerge.o framesync.o
 OBJS-$(CONFIG_MCDEINT_FILTER)                += vf_mcdeint.o
 OBJS-$(CONFIG_MERGEPLANES_FILTER)            += vf_mergeplanes.o framesync.o
+OBJS-$(CONFIG_METADATA_FILTER)               += f_metadata.o
 OBJS-$(CONFIG_MPDECIMATE_FILTER)             += vf_mpdecimate.o
 OBJS-$(CONFIG_NEGATE_FILTER)                 += vf_lut.o
+OBJS-$(CONFIG_NNEDI_FILTER)                  += vf_nnedi.o
 OBJS-$(CONFIG_NOFORMAT_FILTER)               += vf_format.o
 OBJS-$(CONFIG_NOISE_FILTER)                  += vf_noise.o
 OBJS-$(CONFIG_NULL_FILTER)                   += vf_null.o
+OBJS-$(CONFIG_OCR_FILTER)                    += vf_ocr.o
 OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
 OBJS-$(CONFIG_OPENCL)                        += deshake_opencl.o unsharp_opencl.o
 OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o dualinput.o framesync.o
@@ -176,13 +211,19 @@ OBJS-$(CONFIG_PP7_FILTER)                    += vf_pp7.o
 OBJS-$(CONFIG_PSNR_FILTER)                   += vf_psnr.o dualinput.o framesync.o
 OBJS-$(CONFIG_PULLUP_FILTER)                 += vf_pullup.o
 OBJS-$(CONFIG_QP_FILTER)                     += vf_qp.o
+OBJS-$(CONFIG_RANDOM_FILTER)                 += vf_random.o
+OBJS-$(CONFIG_REALTIME_FILTER)               += f_realtime.o
+OBJS-$(CONFIG_REMOVEGRAIN_FILTER)            += vf_removegrain.o
 OBJS-$(CONFIG_REMOVELOGO_FILTER)             += bbox.o lswsutils.o lavfutils.o vf_removelogo.o
 OBJS-$(CONFIG_REPEATFIELDS_FILTER)           += vf_repeatfields.o
+OBJS-$(CONFIG_REVERSE_FILTER)                += f_reverse.o
 OBJS-$(CONFIG_ROTATE_FILTER)                 += vf_rotate.o
 OBJS-$(CONFIG_SEPARATEFIELDS_FILTER)         += vf_separatefields.o
 OBJS-$(CONFIG_SAB_FILTER)                    += vf_sab.o
 OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o
+OBJS-$(CONFIG_SCALE2REF_FILTER)              += vf_scale.o
 OBJS-$(CONFIG_SELECT_FILTER)                 += f_select.o
+OBJS-$(CONFIG_SELECTIVECOLOR_FILTER)         += vf_selectivecolor.o
 OBJS-$(CONFIG_SENDCMD_FILTER)                += f_sendcmd.o
 OBJS-$(CONFIG_SETDAR_FILTER)                 += vf_aspect.o
 OBJS-$(CONFIG_SETFIELD_FILTER)               += vf_setfield.o
@@ -191,14 +232,18 @@ OBJS-$(CONFIG_SETSAR_FILTER)                 += vf_aspect.o
 OBJS-$(CONFIG_SETTB_FILTER)                  += settb.o
 OBJS-$(CONFIG_SHOWINFO_FILTER)               += vf_showinfo.o
 OBJS-$(CONFIG_SHOWPALETTE_FILTER)            += vf_showpalette.o
+OBJS-$(CONFIG_SHUFFLEFRAMES_FILTER)          += vf_shuffleframes.o
 OBJS-$(CONFIG_SHUFFLEPLANES_FILTER)          += vf_shuffleplanes.o
 OBJS-$(CONFIG_SIGNALSTATS_FILTER)            += vf_signalstats.o
 OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o
+OBJS-$(CONFIG_SSIM_FILTER)                   += vf_ssim.o dualinput.o framesync.o
 OBJS-$(CONFIG_STEREO3D_FILTER)               += vf_stereo3d.o
+OBJS-$(CONFIG_STREAMSELECT_FILTER)           += f_streamselect.o
 OBJS-$(CONFIG_SUBTITLES_FILTER)              += vf_subtitles.o
 OBJS-$(CONFIG_SUPER2XSAI_FILTER)             += vf_super2xsai.o
+OBJS-$(CONFIG_SWAPRECT_FILTER)               += vf_swaprect.o
 OBJS-$(CONFIG_SWAPUV_FILTER)                 += vf_swapuv.o
 OBJS-$(CONFIG_TBLEND_FILTER)                 += vf_blend.o dualinput.o framesync.o
 OBJS-$(CONFIG_TELECINE_FILTER)               += vf_telecine.o
@@ -209,16 +254,22 @@ OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
 OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
 OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
 OBJS-$(CONFIG_USPP_FILTER)                   += vf_uspp.o
+OBJS-$(CONFIG_VECTORSCOPE_FILTER)            += vf_vectorscope.o
 OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
 OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o vf_vidstabdetect.o
 OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransform.o
 OBJS-$(CONFIG_VIGNETTE_FILTER)               += vf_vignette.o
+OBJS-$(CONFIG_VSTACK_FILTER)                 += vf_stack.o framesync.o
 OBJS-$(CONFIG_W3FDIF_FILTER)                 += vf_w3fdif.o
+OBJS-$(CONFIG_WAVEFORM_FILTER)               += vf_waveform.o
 OBJS-$(CONFIG_XBR_FILTER)                    += vf_xbr.o
 OBJS-$(CONFIG_YADIF_FILTER)                  += vf_yadif.o
 OBJS-$(CONFIG_ZMQ_FILTER)                    += f_zmq.o
 OBJS-$(CONFIG_ZOOMPAN_FILTER)                += vf_zoompan.o
+OBJS-$(CONFIG_ZSCALE_FILTER)                 += vf_zscale.o
 
+OBJS-$(CONFIG_ALLRGB_FILTER)                 += vsrc_testsrc.o
+OBJS-$(CONFIG_ALLYUV_FILTER)                 += vsrc_testsrc.o
 OBJS-$(CONFIG_CELLAUTO_FILTER)               += vsrc_cellauto.o
 OBJS-$(CONFIG_COLOR_FILTER)                  += vsrc_testsrc.o
 OBJS-$(CONFIG_FREI0R_SRC_FILTER)             += vf_frei0r.o
@@ -231,16 +282,24 @@ OBJS-$(CONFIG_RGBTESTSRC_FILTER)             += vsrc_testsrc.o
 OBJS-$(CONFIG_SMPTEBARS_FILTER)              += vsrc_testsrc.o
 OBJS-$(CONFIG_SMPTEHDBARS_FILTER)            += vsrc_testsrc.o
 OBJS-$(CONFIG_TESTSRC_FILTER)                += vsrc_testsrc.o
+OBJS-$(CONFIG_TESTSRC2_FILTER)               += vsrc_testsrc.o
 
 OBJS-$(CONFIG_NULLSINK_FILTER)               += vsink_nullsink.o
 
 # multimedia filters
+OBJS-$(CONFIG_ADRAWGRAPH_FILTER)             += f_drawgraph.o
+OBJS-$(CONFIG_AHISTOGRAM_FILTER)             += avf_ahistogram.o
+OBJS-$(CONFIG_APHASEMETER_FILTER)            += avf_aphasemeter.o
 OBJS-$(CONFIG_AVECTORSCOPE_FILTER)           += avf_avectorscope.o
 OBJS-$(CONFIG_CONCAT_FILTER)                 += avf_concat.o
-OBJS-$(CONFIG_SHOWCQT_FILTER)                += avf_showcqt.o
-OBJS-$(CONFIG_SHOWSPECTRUM_FILTER)           += avf_showspectrum.o
+OBJS-$(CONFIG_SHOWCQT_FILTER)                += avf_showcqt.o lswsutils.o lavfutils.o
+OBJS-$(CONFIG_SHOWFREQS_FILTER)              += avf_showfreqs.o window_func.o
+OBJS-$(CONFIG_SHOWSPECTRUM_FILTER)           += avf_showspectrum.o window_func.o
+OBJS-$(CONFIG_SHOWSPECTRUMPIC_FILTER)        += avf_showspectrum.o window_func.o
+OBJS-$(CONFIG_SHOWVOLUME_FILTER)             += avf_showvolume.o
 OBJS-$(CONFIG_SHOWWAVES_FILTER)              += avf_showwaves.o
 OBJS-$(CONFIG_SHOWWAVESPIC_FILTER)           += avf_showwaves.o
+OBJS-$(CONFIG_SPECTRUMSYNTH_FILTER)          += vaf_spectrumsynth.o window_func.o
 
 # multimedia sources
 OBJS-$(CONFIG_AMOVIE_FILTER)                 += src_movie.o
diff --git a/libavfilter/aeval.c b/libavfilter/aeval.c
index b6c420a4..ac9dea8f 100644
--- a/libavfilter/aeval.c
+++ b/libavfilter/aeval.c
@@ -350,36 +350,34 @@ static int aeval_query_formats(AVFilterContext *ctx)
     static const enum AVSampleFormat sample_fmts[] = {
         AV_SAMPLE_FMT_DBLP, AV_SAMPLE_FMT_NONE
     };
+    int ret;
 
     // inlink supports any channel layout
     layouts = ff_all_channel_counts();
-    ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
+    if ((ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts)) < 0)
+        return ret;
 
     if (eval->same_chlayout) {
         layouts = ff_all_channel_counts();
-        if (!layouts)
-            return AVERROR(ENOMEM);
-            ff_set_common_channel_layouts(ctx, layouts);
+        if ((ret = ff_set_common_channel_layouts(ctx, layouts)) < 0)
+            return ret;
     } else {
         // outlink supports only requested output channel layout
         layouts = NULL;
-        ff_add_channel_layout(&layouts,
+        if ((ret = ff_add_channel_layout(&layouts,
                               eval->out_channel_layout ? eval->out_channel_layout :
-                              FF_COUNT2LAYOUT(eval->nb_channels));
-        ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts);
+                              FF_COUNT2LAYOUT(eval->nb_channels))) < 0)
+            return ret;
+        if ((ret = ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts)) < 0)
+            return ret;
     }
 
     formats = ff_make_format_list(sample_fmts);
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_set_common_formats(ctx, formats);
+    if ((ret = ff_set_common_formats(ctx, formats)) < 0)
+        return ret;
 
     formats = ff_all_samplerates();
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_set_common_samplerates(ctx, formats);
-
-    return 0;
+    return ff_set_common_samplerates(ctx, formats);
 }
 
 static int aeval_config_output(AVFilterLink *outlink)
diff --git a/libavfilter/af_adelay.c b/libavfilter/af_adelay.c
index ca141ce3..09bf3c77 100644
--- a/libavfilter/af_adelay.c
+++ b/libavfilter/af_adelay.c
@@ -66,7 +66,7 @@ static int query_formats(AVFilterContext *ctx)
     };
     int ret;
 
-    layouts = ff_all_channel_layouts();
+    layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
@@ -192,8 +192,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
         return ff_filter_frame(ctx->outputs[0], frame);
 
     out_frame = ff_get_audio_buffer(inlink, frame->nb_samples);
-    if (!out_frame)
+    if (!out_frame) {
+        av_frame_free(&frame);
         return AVERROR(ENOMEM);
+    }
     av_frame_copy_props(out_frame, frame);
 
     for (i = 0; i < s->nb_delays; i++) {
diff --git a/libavfilter/af_aecho.c b/libavfilter/af_aecho.c
index 8e7a39ec..82049e95 100644
--- a/libavfilter/af_aecho.c
+++ b/libavfilter/af_aecho.c
@@ -160,7 +160,7 @@ static int query_formats(AVFilterContext *ctx)
     };
     int ret;
 
-    layouts = ff_all_channel_layouts();
+    layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
@@ -279,8 +279,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
         out_frame = frame;
     } else {
         out_frame = ff_get_audio_buffer(inlink, frame->nb_samples);
-        if (!out_frame)
+        if (!out_frame) {
+            av_frame_free(&frame);
             return AVERROR(ENOMEM);
+        }
         av_frame_copy_props(out_frame, frame);
     }
 
diff --git a/libavfilter/af_aemphasis.c b/libavfilter/af_aemphasis.c
new file mode 100644
index 00000000..a5b8e305
--- /dev/null
+++ b/libavfilter/af_aemphasis.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2001-2010 Krzysztof Foltman, Markus Schmidt, Thor Harald Johansen, Damien Zammit and others
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "audio.h"
+
+typedef struct BiquadCoeffs {
+    double a0, a1, a2, b1, b2;
+} BiquadCoeffs;
+
+typedef struct BiquadD2 {
+    double a0, a1, a2, b1, b2, w1, w2;
+} BiquadD2;
+
+typedef struct RIAACurve {
+    BiquadD2 r1;
+    BiquadD2 brickw;
+    int use_brickw;
+} RIAACurve;
+
+typedef struct AudioEmphasisContext {
+    const AVClass *class;
+    int mode, type;
+    double level_in, level_out;
+
+    RIAACurve *rc;
+} AudioEmphasisContext;
+
+#define OFFSET(x) offsetof(AudioEmphasisContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption aemphasis_options[] = {
+    { "level_in",      "set input gain", OFFSET(level_in),  AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 64, FLAGS },
+    { "level_out",    "set output gain", OFFSET(level_out), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0, 64, FLAGS },
+    { "mode",         "set filter mode", OFFSET(mode), AV_OPT_TYPE_INT,   {.i64=0}, 0, 1, FLAGS, "mode" },
+    { "reproduction",              NULL,            0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "mode" },
+    { "production",                NULL,            0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "mode" },
+    { "type",         "set filter type", OFFSET(type), AV_OPT_TYPE_INT,   {.i64=4}, 0, 8, FLAGS, "type" },
+    { "col",                 "Columbia",            0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "type" },
+    { "emi",                      "EMI",            0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "type" },
+    { "bsi",              "BSI (78RPM)",            0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "type" },
+    { "riaa",                    "RIAA",            0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "type" },
+    { "cd",         "Compact Disc (CD)",            0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, FLAGS, "type" },
+    { "50fm",               "50µs (FM)",            0, AV_OPT_TYPE_CONST, {.i64=5}, 0, 0, FLAGS, "type" },
+    { "75fm",               "75µs (FM)",            0, AV_OPT_TYPE_CONST, {.i64=6}, 0, 0, FLAGS, "type" },
+    { "50kf",            "50µs (FM-KF)",            0, AV_OPT_TYPE_CONST, {.i64=7}, 0, 0, FLAGS, "type" },
+    { "75kf",            "75µs (FM-KF)",            0, AV_OPT_TYPE_CONST, {.i64=8}, 0, 0, FLAGS, "type" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(aemphasis);
+
+static inline double biquad(BiquadD2 *bq, double in)
+{
+    double n = in;
+    double tmp = n - bq->w1 * bq->b1 - bq->w2 * bq->b2;
+    double out = tmp * bq->a0 + bq->w1 * bq->a1 + bq->w2 * bq->a2;
+
+    bq->w2 = bq->w1;
+    bq->w1 = tmp;
+
+    return out;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioEmphasisContext *s = ctx->priv;
+    const double *src = (const double *)in->data[0];
+    const double level_out = s->level_out;
+    const double level_in = s->level_in;
+    AVFrame *out;
+    double *dst;
+    int n, c;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+    dst = (double *)out->data[0];
+
+    for (n = 0; n < in->nb_samples; n++) {
+        for (c = 0; c < inlink->channels; c++)
+            dst[c] = level_out * biquad(&s->rc[c].r1, s->rc[c].use_brickw ? biquad(&s->rc[c].brickw, src[c] * level_in) : src[c] * level_in);
+        dst += inlink->channels;
+        src += inlink->channels;
+    }
+
+    if (in != out)
+        av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterChannelLayouts *layouts;
+    AVFilterFormats *formats;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBL,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static inline void set_highshelf_rbj(BiquadD2 *bq, double freq, double q, double peak, double sr)
+{
+    double A = sqrt(peak);
+    double w0 = freq * 2 * M_PI / sr;
+    double alpha = sin(w0) / (2 * q);
+    double cw0 = cos(w0);
+    double tmp = 2 * sqrt(A) * alpha;
+    double b0 = 0, ib0 = 0;
+
+    bq->a0 =    A*( (A+1) + (A-1)*cw0 + tmp);
+    bq->a1 = -2*A*( (A-1) + (A+1)*cw0);
+    bq->a2 =    A*( (A+1) + (A-1)*cw0 - tmp);
+        b0 =        (A+1) - (A-1)*cw0 + tmp;
+    bq->b1 =    2*( (A-1) - (A+1)*cw0);
+    bq->b2 =        (A+1) - (A-1)*cw0 - tmp;
+
+    ib0     = 1 / b0;
+    bq->b1 *= ib0;
+    bq->b2 *= ib0;
+    bq->a0 *= ib0;
+    bq->a1 *= ib0;
+    bq->a2 *= ib0;
+}
+
+static inline void set_lp_rbj(BiquadD2 *bq, double fc, double q, double sr, double gain)
+{
+    double omega = 2.0 * M_PI * fc / sr;
+    double sn = sin(omega);
+    double cs = cos(omega);
+    double alpha = sn/(2 * q);
+    double inv = 1.0/(1.0 + alpha);
+
+    bq->a2 = bq->a0 = gain * inv * (1.0 - cs) * 0.5;
+    bq->a1 = bq->a0 + bq->a0;
+    bq->b1 = (-2.0 * cs * inv);
+    bq->b2 = ((1.0 - alpha) * inv);
+}
+
+static double freq_gain(BiquadCoeffs *c, double freq, double sr)
+{
+    double zr, zi;
+
+    freq *= 2.0 * M_PI / sr;
+    zr = cos(freq);
+    zi = -sin(freq);
+
+    /* |(a0 + a1*z + a2*z^2)/(1 + b1*z + b2*z^2)| */
+    return hypot(c->a0 + c->a1*zr + c->a2*(zr*zr-zi*zi), c->a1*zi + 2*c->a2*zr*zi) /
+           hypot(1 + c->b1*zr + c->b2*(zr*zr-zi*zi), c->b1*zi + 2*c->b2*zr*zi);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    double i, j, k, g, t, a0, a1, a2, b1, b2, tau1, tau2, tau3;
+    double cutfreq, gain1kHz, gc, sr = inlink->sample_rate;
+    AVFilterContext *ctx = inlink->dst;
+    AudioEmphasisContext *s = ctx->priv;
+    BiquadCoeffs coeffs;
+    int ch;
+
+    s->rc = av_calloc(inlink->channels, sizeof(*s->rc));
+    if (!s->rc)
+        return AVERROR(ENOMEM);
+
+    switch (s->type) {
+    case 0: //"Columbia"
+        i = 100.;
+        j = 500.;
+        k = 1590.;
+        break;
+    case 1: //"EMI"
+        i = 70.;
+        j = 500.;
+        k = 2500.;
+        break;
+    case 2: //"BSI(78rpm)"
+        i = 50.;
+        j = 353.;
+        k = 3180.;
+        break;
+    case 3: //"RIAA"
+    default:
+        tau1 = 0.003180;
+        tau2 = 0.000318;
+        tau3 = 0.000075;
+        i = 1. / (2. * M_PI * tau1);
+        j = 1. / (2. * M_PI * tau2);
+        k = 1. / (2. * M_PI * tau3);
+        break;
+    case 4: //"CD Mastering"
+        tau1 = 0.000050;
+        tau2 = 0.000015;
+        tau3 = 0.0000001;// 1.6MHz out of audible range for null impact
+        i = 1. / (2. * M_PI * tau1);
+        j = 1. / (2. * M_PI * tau2);
+        k = 1. / (2. * M_PI * tau3);
+        break;
+    case 5: //"50µs FM (Europe)"
+        tau1 = 0.000050;
+        tau2 = tau1 / 20;// not used
+        tau3 = tau1 / 50;//
+        i = 1. / (2. * M_PI * tau1);
+        j = 1. / (2. * M_PI * tau2);
+        k = 1. / (2. * M_PI * tau3);
+        break;
+    case 6: //"75µs FM (US)"
+        tau1 = 0.000075;
+        tau2 = tau1 / 20;// not used
+        tau3 = tau1 / 50;//
+        i = 1. / (2. * M_PI * tau1);
+        j = 1. / (2. * M_PI * tau2);
+        k = 1. / (2. * M_PI * tau3);
+        break;
+    }
+
+    i *= 2 * M_PI;
+    j *= 2 * M_PI;
+    k *= 2 * M_PI;
+
+    t = 1. / sr;
+
+    //swap a1 b1, a2 b2
+    if (s->type == 7 || s->type == 8) {
+        double tau = (s->type == 7 ? 0.000050 : 0.000075);
+        double f = 1.0 / (2 * M_PI * tau);
+        double nyq = sr * 0.5;
+        double gain = sqrt(1.0 + nyq * nyq / (f * f)); // gain at Nyquist
+        double cfreq = sqrt((gain - 1.0) * f * f); // frequency
+        double q = 1.0;
+
+        if (s->type == 8)
+            q = pow((sr / 3269.0) + 19.5, -0.25); // somewhat poor curve-fit
+        if (s->type == 7)
+            q = pow((sr / 4750.0) + 19.5, -0.25);
+        if (s->mode == 0)
+            set_highshelf_rbj(&s->rc[0].r1, cfreq, q, 1. / gain, sr);
+        else
+            set_highshelf_rbj(&s->rc[0].r1, cfreq, q, gain, sr);
+        s->rc[0].use_brickw = 0;
+    } else {
+        s->rc[0].use_brickw = 1;
+        if (s->mode == 0) { // Reproduction
+            g  = 1. / (4.+2.*i*t+2.*k*t+i*k*t*t);
+            a0 = (2.*t+j*t*t)*g;
+            a1 = (2.*j*t*t)*g;
+            a2 = (-2.*t+j*t*t)*g;
+            b1 = (-8.+2.*i*k*t*t)*g;
+            b2 = (4.-2.*i*t-2.*k*t+i*k*t*t)*g;
+        } else {  // Production
+            g  = 1. / (2.*t+j*t*t);
+            a0 = (4.+2.*i*t+2.*k*t+i*k*t*t)*g;
+            a1 = (-8.+2.*i*k*t*t)*g;
+            a2 = (4.-2.*i*t-2.*k*t+i*k*t*t)*g;
+            b1 = (2.*j*t*t)*g;
+            b2 = (-2.*t+j*t*t)*g;
+        }
+
+        coeffs.a0 = a0;
+        coeffs.a1 = a1;
+        coeffs.a2 = a2;
+        coeffs.b1 = b1;
+        coeffs.b2 = b2;
+
+        // the coeffs above give non-normalized value, so it should be normalized to produce 0dB at 1 kHz
+        // find actual gain
+        // Note: for FM emphasis, use 100 Hz for normalization instead
+        gain1kHz = freq_gain(&coeffs, 1000.0, sr);
+        // divide one filter's x[n-m] coefficients by that value
+        gc = 1.0 / gain1kHz;
+        s->rc[0].r1.a0 = coeffs.a0 * gc;
+        s->rc[0].r1.a1 = coeffs.a1 * gc;
+        s->rc[0].r1.a2 = coeffs.a2 * gc;
+        s->rc[0].r1.b1 = coeffs.b1;
+        s->rc[0].r1.b2 = coeffs.b2;
+    }
+
+    cutfreq = FFMIN(0.45 * sr, 21000.);
+    set_lp_rbj(&s->rc[0].brickw, cutfreq, 0.707, sr, 1.);
+
+    for (ch = 1; ch < inlink->channels; ch++) {
+        memcpy(&s->rc[ch], &s->rc[0], sizeof(RIAACurve));
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioEmphasisContext *s = ctx->priv;
+    av_freep(&s->rc);
+}
+
+static const AVFilterPad avfilter_af_aemphasis_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_af_aemphasis_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_aemphasis = {
+    .name          = "aemphasis",
+    .description   = NULL_IF_CONFIG_SMALL("Audio emphasis."),
+    .priv_size     = sizeof(AudioEmphasisContext),
+    .priv_class    = &aemphasis_class,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = avfilter_af_aemphasis_inputs,
+    .outputs       = avfilter_af_aemphasis_outputs,
+};
diff --git a/libavfilter/af_afade.c b/libavfilter/af_afade.c
index 8b69cefb..9acadc51 100644
--- a/libavfilter/af_afade.c
+++ b/libavfilter/af_afade.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 Paul B Mahol
+ * Copyright (c) 2013-2015 Paul B Mahol
  *
  * This file is part of FFmpeg.
  *
@@ -23,6 +23,7 @@
  * fade audio filter
  */
 
+#include "libavutil/audio_fifo.h"
 #include "libavutil/opt.h"
 #include "audio.h"
 #include "avfilter.h"
@@ -31,62 +32,31 @@
 typedef struct {
     const AVClass *class;
     int type;
-    int curve;
+    int curve, curve2;
     int nb_samples;
     int64_t start_sample;
     int64_t duration;
     int64_t start_time;
+    int overlap;
+    int cf0_eof;
+    int crossfade_is_over;
+    AVAudioFifo *fifo[2];
+    int64_t pts;
 
     void (*fade_samples)(uint8_t **dst, uint8_t * const *src,
                          int nb_samples, int channels, int direction,
                          int64_t start, int range, int curve);
+    void (*crossfade_samples)(uint8_t **dst, uint8_t * const *cf0,
+                              uint8_t * const *cf1,
+                              int nb_samples, int channels,
+                              int curve0, int curve1);
 } AudioFadeContext;
 
-enum CurveType { TRI, QSIN, ESIN, HSIN, LOG, PAR, QUA, CUB, SQU, CBR };
+enum CurveType { TRI, QSIN, ESIN, HSIN, LOG, IPAR, QUA, CUB, SQU, CBR, PAR, EXP, IQSIN, IHSIN, DESE, DESI, NB_CURVES };
 
 #define OFFSET(x) offsetof(AudioFadeContext, x)
 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
-static const AVOption afade_options[] = {
-    { "type",         "set the fade direction",                      OFFSET(type),         AV_OPT_TYPE_INT,    {.i64 = 0    }, 0, 1, FLAGS, "type" },
-    { "t",            "set the fade direction",                      OFFSET(type),         AV_OPT_TYPE_INT,    {.i64 = 0    }, 0, 1, FLAGS, "type" },
-    { "in",           "fade-in",                                     0,                    AV_OPT_TYPE_CONST,  {.i64 = 0    }, 0, 0, FLAGS, "type" },
-    { "out",          "fade-out",                                    0,                    AV_OPT_TYPE_CONST,  {.i64 = 1    }, 0, 0, FLAGS, "type" },
-    { "start_sample", "set number of first sample to start fading",  OFFSET(start_sample), AV_OPT_TYPE_INT64,  {.i64 = 0    }, 0, INT64_MAX, FLAGS },
-    { "ss",           "set number of first sample to start fading",  OFFSET(start_sample), AV_OPT_TYPE_INT64,  {.i64 = 0    }, 0, INT64_MAX, FLAGS },
-    { "nb_samples",   "set number of samples for fade duration",     OFFSET(nb_samples),   AV_OPT_TYPE_INT,    {.i64 = 44100}, 1, INT32_MAX, FLAGS },
-    { "ns",           "set number of samples for fade duration",     OFFSET(nb_samples),   AV_OPT_TYPE_INT,    {.i64 = 44100}, 1, INT32_MAX, FLAGS },
-    { "start_time",   "set time to start fading",                    OFFSET(start_time),   AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
-    { "st",           "set time to start fading",                    OFFSET(start_time),   AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
-    { "duration",     "set fade duration",                           OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
-    { "d",            "set fade duration",                           OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
-    { "curve",        "set fade curve type",                         OFFSET(curve),        AV_OPT_TYPE_INT,    {.i64 = TRI  }, TRI, CBR, FLAGS, "curve" },
-    { "c",            "set fade curve type",                         OFFSET(curve),        AV_OPT_TYPE_INT,    {.i64 = TRI  }, TRI, CBR, FLAGS, "curve" },
-    { "tri",          "linear slope",                                0,                    AV_OPT_TYPE_CONST,  {.i64 = TRI  }, 0, 0, FLAGS, "curve" },
-    { "qsin",         "quarter of sine wave",                        0,                    AV_OPT_TYPE_CONST,  {.i64 = QSIN }, 0, 0, FLAGS, "curve" },
-    { "esin",         "exponential sine wave",                       0,                    AV_OPT_TYPE_CONST,  {.i64 = ESIN }, 0, 0, FLAGS, "curve" },
-    { "hsin",         "half of sine wave",                           0,                    AV_OPT_TYPE_CONST,  {.i64 = HSIN }, 0, 0, FLAGS, "curve" },
-    { "log",          "logarithmic",                                 0,                    AV_OPT_TYPE_CONST,  {.i64 = LOG  }, 0, 0, FLAGS, "curve" },
-    { "par",          "inverted parabola",                           0,                    AV_OPT_TYPE_CONST,  {.i64 = PAR  }, 0, 0, FLAGS, "curve" },
-    { "qua",          "quadratic",                                   0,                    AV_OPT_TYPE_CONST,  {.i64 = QUA  }, 0, 0, FLAGS, "curve" },
-    { "cub",          "cubic",                                       0,                    AV_OPT_TYPE_CONST,  {.i64 = CUB  }, 0, 0, FLAGS, "curve" },
-    { "squ",          "square root",                                 0,                    AV_OPT_TYPE_CONST,  {.i64 = SQU  }, 0, 0, FLAGS, "curve" },
-    { "cbr",          "cubic root",                                  0,                    AV_OPT_TYPE_CONST,  {.i64 = CBR  }, 0, 0, FLAGS, "curve" },
-    { NULL }
-};
-
-AVFILTER_DEFINE_CLASS(afade);
-
-static av_cold int init(AVFilterContext *ctx)
-{
-    AudioFadeContext *s = ctx->priv;
-
-    if (INT64_MAX - s->nb_samples < s->start_sample)
-        return AVERROR(EINVAL);
-
-    return 0;
-}
-
 static int query_formats(AVFilterContext *ctx)
 {
     AVFilterFormats *formats;
@@ -100,7 +70,7 @@ static int query_formats(AVFilterContext *ctx)
     };
     int ret;
 
-    layouts = ff_all_channel_layouts();
+    layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
@@ -122,31 +92,47 @@ static int query_formats(AVFilterContext *ctx)
 
 static double fade_gain(int curve, int64_t index, int range)
 {
+#define CUBE(a) ((a)*(a)*(a))
     double gain;
 
-    gain = FFMAX(0.0, FFMIN(1.0, 1.0 * index / range));
+    gain = av_clipd(1.0 * index / range, 0, 1.0);
 
     switch (curve) {
     case QSIN:
         gain = sin(gain * M_PI / 2.0);
         break;
+    case IQSIN:
+        /* 0.6... = 2 / M_PI */
+        gain = 0.6366197723675814 * asin(gain);
+        break;
     case ESIN:
-        gain = 1.0 - cos(M_PI / 4.0 * (pow(2.0*gain - 1, 3) + 1));
+        gain = 1.0 - cos(M_PI / 4.0 * (CUBE(2.0*gain - 1) + 1));
         break;
     case HSIN:
         gain = (1.0 - cos(gain * M_PI)) / 2.0;
         break;
+    case IHSIN:
+        /* 0.3... = 1 / M_PI */
+        gain = 0.3183098861837907 * acos(1 - 2 * gain);
+        break;
+    case EXP:
+        /* -11.5... = 5*ln(0.1) */
+        gain = exp(-11.512925464970227 * (1 - gain));
+        break;
     case LOG:
-        gain = pow(0.1, (1 - gain) * 5.0);
+        gain = av_clipd(1 + 0.2 * log10(gain), 0, 1.0);
         break;
     case PAR:
+        gain = 1 - sqrt(1 - gain);
+        break;
+    case IPAR:
         gain = (1 - (1 - gain) * (1 - gain));
         break;
     case QUA:
         gain *= gain;
         break;
     case CUB:
-        gain = gain * gain * gain;
+        gain = CUBE(gain);
         break;
     case SQU:
         gain = sqrt(gain);
@@ -154,6 +140,12 @@ static double fade_gain(int curve, int64_t index, int range)
     case CBR:
         gain = cbrt(gain);
         break;
+    case DESE:
+        gain = gain <= 0.5 ? cbrt(2 * gain) / 2: 1 - cbrt(2 * (1 - gain)) / 2;
+        break;
+    case DESI:
+        gain = gain <= 0.5 ? CUBE(2 * gain) / 2: 1 - CUBE(2 * (1 - gain)) / 2;
+        break;
     }
 
     return gain;
@@ -203,12 +195,12 @@ FADE(flt, float)
 FADE(s16, int16_t)
 FADE(s32, int32_t)
 
-static int config_input(AVFilterLink *inlink)
+static int config_output(AVFilterLink *outlink)
 {
-    AVFilterContext *ctx = inlink->dst;
+    AVFilterContext *ctx = outlink->src;
     AudioFadeContext *s  = ctx->priv;
 
-    switch (inlink->format) {
+    switch (outlink->format) {
     case AV_SAMPLE_FMT_DBL:  s->fade_samples = fade_samples_dbl;  break;
     case AV_SAMPLE_FMT_DBLP: s->fade_samples = fade_samples_dblp; break;
     case AV_SAMPLE_FMT_FLT:  s->fade_samples = fade_samples_flt;  break;
@@ -220,9 +212,57 @@ static int config_input(AVFilterLink *inlink)
     }
 
     if (s->duration)
-        s->nb_samples = av_rescale(s->duration, inlink->sample_rate, AV_TIME_BASE);
+        s->nb_samples = av_rescale(s->duration, outlink->sample_rate, AV_TIME_BASE);
     if (s->start_time)
-        s->start_sample = av_rescale(s->start_time, inlink->sample_rate, AV_TIME_BASE);
+        s->start_sample = av_rescale(s->start_time, outlink->sample_rate, AV_TIME_BASE);
+
+    return 0;
+}
+
+#if CONFIG_AFADE_FILTER
+
+static const AVOption afade_options[] = {
+    { "type",         "set the fade direction",                      OFFSET(type),         AV_OPT_TYPE_INT,    {.i64 = 0    }, 0, 1, FLAGS, "type" },
+    { "t",            "set the fade direction",                      OFFSET(type),         AV_OPT_TYPE_INT,    {.i64 = 0    }, 0, 1, FLAGS, "type" },
+    { "in",           "fade-in",                                     0,                    AV_OPT_TYPE_CONST,  {.i64 = 0    }, 0, 0, FLAGS, "type" },
+    { "out",          "fade-out",                                    0,                    AV_OPT_TYPE_CONST,  {.i64 = 1    }, 0, 0, FLAGS, "type" },
+    { "start_sample", "set number of first sample to start fading",  OFFSET(start_sample), AV_OPT_TYPE_INT64,  {.i64 = 0    }, 0, INT64_MAX, FLAGS },
+    { "ss",           "set number of first sample to start fading",  OFFSET(start_sample), AV_OPT_TYPE_INT64,  {.i64 = 0    }, 0, INT64_MAX, FLAGS },
+    { "nb_samples",   "set number of samples for fade duration",     OFFSET(nb_samples),   AV_OPT_TYPE_INT,    {.i64 = 44100}, 1, INT32_MAX, FLAGS },
+    { "ns",           "set number of samples for fade duration",     OFFSET(nb_samples),   AV_OPT_TYPE_INT,    {.i64 = 44100}, 1, INT32_MAX, FLAGS },
+    { "start_time",   "set time to start fading",                    OFFSET(start_time),   AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
+    { "st",           "set time to start fading",                    OFFSET(start_time),   AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
+    { "duration",     "set fade duration",                           OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
+    { "d",            "set fade duration",                           OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
+    { "curve",        "set fade curve type",                         OFFSET(curve),        AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
+    { "c",            "set fade curve type",                         OFFSET(curve),        AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
+    { "tri",          "linear slope",                                0,                    AV_OPT_TYPE_CONST,  {.i64 = TRI  }, 0, 0, FLAGS, "curve" },
+    { "qsin",         "quarter of sine wave",                        0,                    AV_OPT_TYPE_CONST,  {.i64 = QSIN }, 0, 0, FLAGS, "curve" },
+    { "esin",         "exponential sine wave",                       0,                    AV_OPT_TYPE_CONST,  {.i64 = ESIN }, 0, 0, FLAGS, "curve" },
+    { "hsin",         "half of sine wave",                           0,                    AV_OPT_TYPE_CONST,  {.i64 = HSIN }, 0, 0, FLAGS, "curve" },
+    { "log",          "logarithmic",                                 0,                    AV_OPT_TYPE_CONST,  {.i64 = LOG  }, 0, 0, FLAGS, "curve" },
+    { "ipar",         "inverted parabola",                           0,                    AV_OPT_TYPE_CONST,  {.i64 = IPAR }, 0, 0, FLAGS, "curve" },
+    { "qua",          "quadratic",                                   0,                    AV_OPT_TYPE_CONST,  {.i64 = QUA  }, 0, 0, FLAGS, "curve" },
+    { "cub",          "cubic",                                       0,                    AV_OPT_TYPE_CONST,  {.i64 = CUB  }, 0, 0, FLAGS, "curve" },
+    { "squ",          "square root",                                 0,                    AV_OPT_TYPE_CONST,  {.i64 = SQU  }, 0, 0, FLAGS, "curve" },
+    { "cbr",          "cubic root",                                  0,                    AV_OPT_TYPE_CONST,  {.i64 = CBR  }, 0, 0, FLAGS, "curve" },
+    { "par",          "parabola",                                    0,                    AV_OPT_TYPE_CONST,  {.i64 = PAR  }, 0, 0, FLAGS, "curve" },
+    { "exp",          "exponential",                                 0,                    AV_OPT_TYPE_CONST,  {.i64 = EXP  }, 0, 0, FLAGS, "curve" },
+    { "iqsin",        "inverted quarter of sine wave",               0,                    AV_OPT_TYPE_CONST,  {.i64 = IQSIN}, 0, 0, FLAGS, "curve" },
+    { "ihsin",        "inverted half of sine wave",                  0,                    AV_OPT_TYPE_CONST,  {.i64 = IHSIN}, 0, 0, FLAGS, "curve" },
+    { "dese",         "double-exponential seat",                     0,                    AV_OPT_TYPE_CONST,  {.i64 = DESE }, 0, 0, FLAGS, "curve" },
+    { "desi",         "double-exponential sigmoid",                  0,                    AV_OPT_TYPE_CONST,  {.i64 = DESI }, 0, 0, FLAGS, "curve" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(afade);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    AudioFadeContext *s = ctx->priv;
+
+    if (INT64_MAX - s->nb_samples < s->start_sample)
+        return AVERROR(EINVAL);
 
     return 0;
 }
@@ -236,7 +276,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
     int64_t cur_sample = av_rescale_q(buf->pts, inlink->time_base, (AVRational){1, inlink->sample_rate});
 
     if ((!s->type && (s->start_sample + s->nb_samples < cur_sample)) ||
-        ( s->type && (cur_sample + s->nb_samples < s->start_sample)))
+        ( s->type && (cur_sample + nb_samples < s->start_sample)))
         return ff_filter_frame(outlink, buf);
 
     if (av_frame_is_writable(buf)) {
@@ -277,15 +317,15 @@ static const AVFilterPad avfilter_af_afade_inputs[] = {
         .name         = "default",
         .type         = AVMEDIA_TYPE_AUDIO,
         .filter_frame = filter_frame,
-        .config_props = config_input,
     },
     { NULL }
 };
 
 static const AVFilterPad avfilter_af_afade_outputs[] = {
     {
-        .name = "default",
-        .type = AVMEDIA_TYPE_AUDIO,
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_output,
     },
     { NULL }
 };
@@ -301,3 +341,329 @@ AVFilter ff_af_afade = {
     .priv_class    = &afade_class,
     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
 };
+
+#endif /* CONFIG_AFADE_FILTER */
+
+#if CONFIG_ACROSSFADE_FILTER
+
+static const AVOption acrossfade_options[] = {
+    { "nb_samples",   "set number of samples for cross fade duration", OFFSET(nb_samples),   AV_OPT_TYPE_INT,    {.i64 = 44100}, 1, INT32_MAX/10, FLAGS },
+    { "ns",           "set number of samples for cross fade duration", OFFSET(nb_samples),   AV_OPT_TYPE_INT,    {.i64 = 44100}, 1, INT32_MAX/10, FLAGS },
+    { "duration",     "set cross fade duration",                       OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, 60, FLAGS },
+    { "d",            "set cross fade duration",                       OFFSET(duration),     AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, 60, FLAGS },
+    { "overlap",      "overlap 1st stream end with 2nd stream start",  OFFSET(overlap),      AV_OPT_TYPE_BOOL,   {.i64 = 1    }, 0,  1, FLAGS },
+    { "o",            "overlap 1st stream end with 2nd stream start",  OFFSET(overlap),      AV_OPT_TYPE_BOOL,   {.i64 = 1    }, 0,  1, FLAGS },
+    { "curve1",       "set fade curve type for 1st stream",            OFFSET(curve),        AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
+    { "c1",           "set fade curve type for 1st stream",            OFFSET(curve),        AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
+    {     "tri",      "linear slope",                                  0,                    AV_OPT_TYPE_CONST,  {.i64 = TRI  }, 0, 0, FLAGS, "curve" },
+    {     "qsin",     "quarter of sine wave",                          0,                    AV_OPT_TYPE_CONST,  {.i64 = QSIN }, 0, 0, FLAGS, "curve" },
+    {     "esin",     "exponential sine wave",                         0,                    AV_OPT_TYPE_CONST,  {.i64 = ESIN }, 0, 0, FLAGS, "curve" },
+    {     "hsin",     "half of sine wave",                             0,                    AV_OPT_TYPE_CONST,  {.i64 = HSIN }, 0, 0, FLAGS, "curve" },
+    {     "log",      "logarithmic",                                   0,                    AV_OPT_TYPE_CONST,  {.i64 = LOG  }, 0, 0, FLAGS, "curve" },
+    {     "ipar",     "inverted parabola",                             0,                    AV_OPT_TYPE_CONST,  {.i64 = IPAR }, 0, 0, FLAGS, "curve" },
+    {     "qua",      "quadratic",                                     0,                    AV_OPT_TYPE_CONST,  {.i64 = QUA  }, 0, 0, FLAGS, "curve" },
+    {     "cub",      "cubic",                                         0,                    AV_OPT_TYPE_CONST,  {.i64 = CUB  }, 0, 0, FLAGS, "curve" },
+    {     "squ",      "square root",                                   0,                    AV_OPT_TYPE_CONST,  {.i64 = SQU  }, 0, 0, FLAGS, "curve" },
+    {     "cbr",      "cubic root",                                    0,                    AV_OPT_TYPE_CONST,  {.i64 = CBR  }, 0, 0, FLAGS, "curve" },
+    {     "par",      "parabola",                                      0,                    AV_OPT_TYPE_CONST,  {.i64 = PAR  }, 0, 0, FLAGS, "curve" },
+    {     "exp",      "exponential",                                   0,                    AV_OPT_TYPE_CONST,  {.i64 = EXP  }, 0, 0, FLAGS, "curve" },
+    {     "iqsin",    "inverted quarter of sine wave",                 0,                    AV_OPT_TYPE_CONST,  {.i64 = IQSIN}, 0, 0, FLAGS, "curve" },
+    {     "ihsin",    "inverted half of sine wave",                    0,                    AV_OPT_TYPE_CONST,  {.i64 = IHSIN}, 0, 0, FLAGS, "curve" },
+    {     "dese",     "double-exponential seat",                       0,                    AV_OPT_TYPE_CONST,  {.i64 = DESE }, 0, 0, FLAGS, "curve" },
+    {     "desi",     "double-exponential sigmoid",                    0,                    AV_OPT_TYPE_CONST,  {.i64 = DESI }, 0, 0, FLAGS, "curve" },
+    { "curve2",       "set fade curve type for 2nd stream",            OFFSET(curve2),       AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
+    { "c2",           "set fade curve type for 2nd stream",            OFFSET(curve2),       AV_OPT_TYPE_INT,    {.i64 = TRI  }, 0, NB_CURVES - 1, FLAGS, "curve" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(acrossfade);
+
+#define CROSSFADE_PLANAR(name, type)                                           \
+static void crossfade_samples_## name ##p(uint8_t **dst, uint8_t * const *cf0, \
+                                          uint8_t * const *cf1,                \
+                                          int nb_samples, int channels,        \
+                                          int curve0, int curve1)              \
+{                                                                              \
+    int i, c;                                                                  \
+                                                                               \
+    for (i = 0; i < nb_samples; i++) {                                         \
+        double gain0 = fade_gain(curve0, nb_samples - 1 - i, nb_samples);      \
+        double gain1 = fade_gain(curve1, i, nb_samples);                       \
+        for (c = 0; c < channels; c++) {                                       \
+            type *d = (type *)dst[c];                                          \
+            const type *s0 = (type *)cf0[c];                                   \
+            const type *s1 = (type *)cf1[c];                                   \
+                                                                               \
+            d[i] = s0[i] * gain0 + s1[i] * gain1;                              \
+        }                                                                      \
+    }                                                                          \
+}
+
+#define CROSSFADE(name, type)                                               \
+static void crossfade_samples_## name (uint8_t **dst, uint8_t * const *cf0, \
+                                       uint8_t * const *cf1,                \
+                                       int nb_samples, int channels,        \
+                                       int curve0, int curve1)              \
+{                                                                           \
+    type *d = (type *)dst[0];                                               \
+    const type *s0 = (type *)cf0[0];                                        \
+    const type *s1 = (type *)cf1[0];                                        \
+    int i, c, k = 0;                                                        \
+                                                                            \
+    for (i = 0; i < nb_samples; i++) {                                      \
+        double gain0 = fade_gain(curve0, nb_samples - 1 - i, nb_samples);   \
+        double gain1 = fade_gain(curve1, i, nb_samples);                    \
+        for (c = 0; c < channels; c++, k++)                                 \
+            d[k] = s0[k] * gain0 + s1[k] * gain1;                           \
+    }                                                                       \
+}
+
+CROSSFADE_PLANAR(dbl, double)
+CROSSFADE_PLANAR(flt, float)
+CROSSFADE_PLANAR(s16, int16_t)
+CROSSFADE_PLANAR(s32, int32_t)
+
+CROSSFADE(dbl, double)
+CROSSFADE(flt, float)
+CROSSFADE(s16, int16_t)
+CROSSFADE(s32, int32_t)
+
+static int acrossfade_filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx  = inlink->dst;
+    AudioFadeContext *s   = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out, *cf[2] = { NULL };
+    int ret = 0, nb_samples;
+
+    if (s->crossfade_is_over) {
+        in->pts = s->pts;
+        s->pts += av_rescale_q(in->nb_samples,
+            (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+        return ff_filter_frame(outlink, in);
+    } else if (inlink == ctx->inputs[0]) {
+        av_audio_fifo_write(s->fifo[0], (void **)in->extended_data, in->nb_samples);
+
+        nb_samples = av_audio_fifo_size(s->fifo[0]) - s->nb_samples;
+        if (nb_samples > 0) {
+            out = ff_get_audio_buffer(outlink, nb_samples);
+            if (!out) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+            av_audio_fifo_read(s->fifo[0], (void **)out->extended_data, nb_samples);
+            out->pts = s->pts;
+            s->pts += av_rescale_q(nb_samples,
+                (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+            ret = ff_filter_frame(outlink, out);
+        }
+    } else if (av_audio_fifo_size(s->fifo[1]) < s->nb_samples) {
+        if (!s->overlap && av_audio_fifo_size(s->fifo[0]) > 0) {
+            nb_samples = av_audio_fifo_size(s->fifo[0]);
+
+            cf[0] = ff_get_audio_buffer(outlink, nb_samples);
+            out = ff_get_audio_buffer(outlink, nb_samples);
+            if (!out || !cf[0]) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+            av_audio_fifo_read(s->fifo[0], (void **)cf[0]->extended_data, nb_samples);
+
+            s->fade_samples(out->extended_data, cf[0]->extended_data, nb_samples,
+                            outlink->channels, -1, nb_samples - 1, nb_samples, s->curve);
+            out->pts = s->pts;
+            s->pts += av_rescale_q(nb_samples,
+                (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+            ret = ff_filter_frame(outlink, out);
+            if (ret < 0)
+                goto fail;
+        }
+
+        av_audio_fifo_write(s->fifo[1], (void **)in->extended_data, in->nb_samples);
+    } else if (av_audio_fifo_size(s->fifo[1]) >= s->nb_samples) {
+        av_audio_fifo_write(s->fifo[1], (void **)in->extended_data, in->nb_samples);
+
+        if (s->overlap) {
+            cf[0] = ff_get_audio_buffer(outlink, s->nb_samples);
+            cf[1] = ff_get_audio_buffer(outlink, s->nb_samples);
+            out = ff_get_audio_buffer(outlink, s->nb_samples);
+            if (!out || !cf[0] || !cf[1]) {
+                av_frame_free(&out);
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            av_audio_fifo_read(s->fifo[0], (void **)cf[0]->extended_data, s->nb_samples);
+            av_audio_fifo_read(s->fifo[1], (void **)cf[1]->extended_data, s->nb_samples);
+
+            s->crossfade_samples(out->extended_data, cf[0]->extended_data,
+                                 cf[1]->extended_data,
+                                 s->nb_samples, av_frame_get_channels(in),
+                                 s->curve, s->curve2);
+            out->pts = s->pts;
+            s->pts += av_rescale_q(s->nb_samples,
+                (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+            ret = ff_filter_frame(outlink, out);
+            if (ret < 0)
+                goto fail;
+        } else {
+            out = ff_get_audio_buffer(outlink, s->nb_samples);
+            cf[1] = ff_get_audio_buffer(outlink, s->nb_samples);
+            if (!out || !cf[1]) {
+                ret = AVERROR(ENOMEM);
+                av_frame_free(&out);
+                goto fail;
+            }
+
+            av_audio_fifo_read(s->fifo[1], (void **)cf[1]->extended_data, s->nb_samples);
+
+            s->fade_samples(out->extended_data, cf[1]->extended_data, s->nb_samples,
+                            outlink->channels, 1, 0, s->nb_samples, s->curve2);
+            out->pts = s->pts;
+            s->pts += av_rescale_q(s->nb_samples,
+                (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+            ret = ff_filter_frame(outlink, out);
+            if (ret < 0)
+                goto fail;
+        }
+
+        nb_samples = av_audio_fifo_size(s->fifo[1]);
+        if (nb_samples > 0) {
+            out = ff_get_audio_buffer(outlink, nb_samples);
+            if (!out) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            av_audio_fifo_read(s->fifo[1], (void **)out->extended_data, nb_samples);
+            out->pts = s->pts;
+            s->pts += av_rescale_q(nb_samples,
+                (AVRational){ 1, outlink->sample_rate }, outlink->time_base);
+            ret = ff_filter_frame(outlink, out);
+        }
+        s->crossfade_is_over = 1;
+    }
+
+fail:
+    av_frame_free(&in);
+    av_frame_free(&cf[0]);
+    av_frame_free(&cf[1]);
+    return ret;
+}
+
+static int acrossfade_request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioFadeContext *s = ctx->priv;
+    int ret = 0;
+
+    if (!s->cf0_eof) {
+        AVFilterLink *cf0 = ctx->inputs[0];
+        ret = ff_request_frame(cf0);
+        if (ret < 0 && ret != AVERROR_EOF)
+            return ret;
+        if (ret == AVERROR_EOF) {
+            s->cf0_eof = 1;
+            ret = 0;
+        }
+    } else {
+        AVFilterLink *cf1 = ctx->inputs[1];
+        int nb_samples = av_audio_fifo_size(s->fifo[1]);
+
+        ret = ff_request_frame(cf1);
+        if (ret == AVERROR_EOF && nb_samples > 0) {
+            AVFrame *out = ff_get_audio_buffer(outlink, nb_samples);
+            if (!out)
+                return AVERROR(ENOMEM);
+
+            av_audio_fifo_read(s->fifo[1], (void **)out->extended_data, nb_samples);
+            ret = ff_filter_frame(outlink, out);
+        }
+    }
+
+    return ret;
+}
+
+static int acrossfade_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioFadeContext *s  = ctx->priv;
+
+    if (ctx->inputs[0]->sample_rate != ctx->inputs[1]->sample_rate) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Inputs must have the same sample rate "
+               "%d for in0 vs %d for in1\n",
+               ctx->inputs[0]->sample_rate, ctx->inputs[1]->sample_rate);
+        return AVERROR(EINVAL);
+    }
+
+    outlink->sample_rate = ctx->inputs[0]->sample_rate;
+    outlink->time_base   = ctx->inputs[0]->time_base;
+    outlink->channel_layout = ctx->inputs[0]->channel_layout;
+    outlink->channels = ctx->inputs[0]->channels;
+
+    switch (outlink->format) {
+    case AV_SAMPLE_FMT_DBL:  s->crossfade_samples = crossfade_samples_dbl;  break;
+    case AV_SAMPLE_FMT_DBLP: s->crossfade_samples = crossfade_samples_dblp; break;
+    case AV_SAMPLE_FMT_FLT:  s->crossfade_samples = crossfade_samples_flt;  break;
+    case AV_SAMPLE_FMT_FLTP: s->crossfade_samples = crossfade_samples_fltp; break;
+    case AV_SAMPLE_FMT_S16:  s->crossfade_samples = crossfade_samples_s16;  break;
+    case AV_SAMPLE_FMT_S16P: s->crossfade_samples = crossfade_samples_s16p; break;
+    case AV_SAMPLE_FMT_S32:  s->crossfade_samples = crossfade_samples_s32;  break;
+    case AV_SAMPLE_FMT_S32P: s->crossfade_samples = crossfade_samples_s32p; break;
+    }
+
+    config_output(outlink);
+
+    s->fifo[0] = av_audio_fifo_alloc(outlink->format, outlink->channels, s->nb_samples);
+    s->fifo[1] = av_audio_fifo_alloc(outlink->format, outlink->channels, s->nb_samples);
+    if (!s->fifo[0] || !s->fifo[1])
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioFadeContext *s = ctx->priv;
+
+    av_audio_fifo_free(s->fifo[0]);
+    av_audio_fifo_free(s->fifo[1]);
+}
+
+static const AVFilterPad avfilter_af_acrossfade_inputs[] = {
+    {
+        .name         = "crossfade0",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = acrossfade_filter_frame,
+    },
+    {
+        .name         = "crossfade1",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = acrossfade_filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_af_acrossfade_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .request_frame = acrossfade_request_frame,
+        .config_props  = acrossfade_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_acrossfade = {
+    .name          = "acrossfade",
+    .description   = NULL_IF_CONFIG_SMALL("Cross fade two input audio streams."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(AudioFadeContext),
+    .uninit        = uninit,
+    .priv_class    = &acrossfade_class,
+    .inputs        = avfilter_af_acrossfade_inputs,
+    .outputs       = avfilter_af_acrossfade_outputs,
+};
+
+#endif /* CONFIG_ACROSSFADE_FILTER */
diff --git a/libavfilter/af_afftfilt.c b/libavfilter/af_afftfilt.c
new file mode 100644
index 00000000..8e41f52c
--- /dev/null
+++ b/libavfilter/af_afftfilt.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 2016 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 2.1 of the License,
+ * or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/audio_fifo.h"
+#include "libavutil/avstring.h"
+#include "libavfilter/internal.h"
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+#include "libavcodec/avfft.h"
+#include "libavutil/eval.h"
+#include "audio.h"
+#include "window_func.h"
+
+typedef struct AFFTFiltContext {
+    const AVClass *class;
+    char *real_str;
+    char *img_str;
+    int fft_bits;
+
+    FFTContext *fft, *ifft;
+    FFTComplex **fft_data;
+    int nb_exprs;
+    int window_size;
+    AVExpr **real;
+    AVExpr **imag;
+    AVAudioFifo *fifo;
+    int64_t pts;
+    int hop_size;
+    float overlap;
+    AVFrame *buffer;
+    int start, end;
+    int win_func;
+    float win_scale;
+    float *window_func_lut;
+} AFFTFiltContext;
+
+static const char *const var_names[] = {            "sr",     "b",       "nb",        "ch",        "chs",   "pts",        NULL };
+enum                                   { VAR_SAMPLE_RATE, VAR_BIN, VAR_NBBINS, VAR_CHANNEL, VAR_CHANNELS, VAR_PTS, VAR_VARS_NB };
+
+#define OFFSET(x) offsetof(AFFTFiltContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption afftfilt_options[] = {
+    { "real", "set channels real expressions",       OFFSET(real_str), AV_OPT_TYPE_STRING, {.str = "1" }, 0, 0, A },
+    { "imag",  "set channels imaginary expressions", OFFSET(img_str),  AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, A },
+    { "win_size", "set window size", OFFSET(fft_bits), AV_OPT_TYPE_INT, {.i64=12}, 4, 16, A, "fft" },
+        { "w16",    0, 0, AV_OPT_TYPE_CONST, {.i64=4},  0, 0, A, "fft" },
+        { "w32",    0, 0, AV_OPT_TYPE_CONST, {.i64=5},  0, 0, A, "fft" },
+        { "w64",    0, 0, AV_OPT_TYPE_CONST, {.i64=6},  0, 0, A, "fft" },
+        { "w128",   0, 0, AV_OPT_TYPE_CONST, {.i64=7},  0, 0, A, "fft" },
+        { "w256",   0, 0, AV_OPT_TYPE_CONST, {.i64=8},  0, 0, A, "fft" },
+        { "w512",   0, 0, AV_OPT_TYPE_CONST, {.i64=9},  0, 0, A, "fft" },
+        { "w1024",  0, 0, AV_OPT_TYPE_CONST, {.i64=10}, 0, 0, A, "fft" },
+        { "w2048",  0, 0, AV_OPT_TYPE_CONST, {.i64=11}, 0, 0, A, "fft" },
+        { "w4096",  0, 0, AV_OPT_TYPE_CONST, {.i64=12}, 0, 0, A, "fft" },
+        { "w8192",  0, 0, AV_OPT_TYPE_CONST, {.i64=13}, 0, 0, A, "fft" },
+        { "w16384", 0, 0, AV_OPT_TYPE_CONST, {.i64=14}, 0, 0, A, "fft" },
+        { "w32768", 0, 0, AV_OPT_TYPE_CONST, {.i64=15}, 0, 0, A, "fft" },
+        { "w65536", 0, 0, AV_OPT_TYPE_CONST, {.i64=16}, 0, 0, A, "fft" },
+    { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64 = WFUNC_HANNING}, 0, NB_WFUNC-1, A, "win_func" },
+        { "rect",     "Rectangular",      0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT},     0, 0, A, "win_func" },
+        { "bartlett", "Bartlett",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, A, "win_func" },
+        { "hann",     "Hann",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, A, "win_func" },
+        { "hanning",  "Hanning",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, A, "win_func" },
+        { "hamming",  "Hamming",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING},  0, 0, A, "win_func" },
+        { "sine",     "Sine",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE},     0, 0, A, "win_func" },
+    { "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=0.75}, 0,  1, A },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(afftfilt);
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AFFTFiltContext *s = ctx->priv;
+    char *saveptr = NULL;
+    int ret = 0, ch, i;
+    float overlap;
+    char *args;
+    const char *last_expr = "1";
+
+    s->fft  = av_fft_init(s->fft_bits, 0);
+    s->ifft = av_fft_init(s->fft_bits, 1);
+    if (!s->fft || !s->ifft)
+        return AVERROR(ENOMEM);
+
+    s->window_size = 1 << s->fft_bits;
+
+    s->fft_data = av_calloc(inlink->channels, sizeof(*s->fft_data));
+    if (!s->fft_data)
+        return AVERROR(ENOMEM);
+
+    for (ch = 0; ch < inlink->channels; ch++) {
+        s->fft_data[ch] = av_calloc(s->window_size, sizeof(**s->fft_data));
+        if (!s->fft_data[ch])
+            return AVERROR(ENOMEM);
+    }
+
+    s->real = av_calloc(inlink->channels, sizeof(*s->real));
+    if (!s->real)
+        return AVERROR(ENOMEM);
+
+    s->imag = av_calloc(inlink->channels, sizeof(*s->imag));
+    if (!s->imag)
+        return AVERROR(ENOMEM);
+
+    args = av_strdup(s->real_str);
+    if (!args)
+        return AVERROR(ENOMEM);
+
+    for (ch = 0; ch < inlink->channels; ch++) {
+        char *arg = av_strtok(ch == 0 ? args : NULL, "|", &saveptr);
+
+        ret = av_expr_parse(&s->real[ch], arg ? arg : last_expr, var_names,
+                            NULL, NULL, NULL, NULL, 0, ctx);
+        if (ret < 0)
+            break;
+        if (arg)
+            last_expr = arg;
+        s->nb_exprs++;
+    }
+
+    av_free(args);
+
+    args = av_strdup(s->img_str ? s->img_str : s->real_str);
+    if (!args)
+        return AVERROR(ENOMEM);
+
+    for (ch = 0; ch < inlink->channels; ch++) {
+        char *arg = av_strtok(ch == 0 ? args : NULL, "|", &saveptr);
+
+        ret = av_expr_parse(&s->imag[ch], arg ? arg : last_expr, var_names,
+                            NULL, NULL, NULL, NULL, 0, ctx);
+        if (ret < 0)
+            break;
+        if (arg)
+            last_expr = arg;
+    }
+
+    av_free(args);
+
+    s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->window_size);
+    if (!s->fifo)
+        return AVERROR(ENOMEM);
+
+    s->window_func_lut = av_realloc_f(s->window_func_lut, s->window_size,
+                                      sizeof(*s->window_func_lut));
+    if (!s->window_func_lut)
+        return AVERROR(ENOMEM);
+    ff_generate_window_func(s->window_func_lut, s->window_size, s->win_func, &overlap);
+    if (s->overlap == 1)
+        s->overlap = overlap;
+
+    for (s->win_scale = 0, i = 0; i < s->window_size; i++) {
+        s->win_scale += s->window_func_lut[i] * s->window_func_lut[i];
+    }
+
+    s->hop_size = s->window_size * (1 - s->overlap);
+    if (s->hop_size <= 0)
+        return AVERROR(EINVAL);
+
+    s->buffer = ff_get_audio_buffer(inlink, s->window_size * 2);
+    if (!s->buffer)
+        return AVERROR(ENOMEM);
+
+    return ret;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AFFTFiltContext *s = ctx->priv;
+    const int window_size = s->window_size;
+    const float f = 1. / s->win_scale;
+    double values[VAR_VARS_NB];
+    AVFrame *out, *in = NULL;
+    int ch, n, ret, i, j, k;
+    int start = s->start, end = s->end;
+
+    av_audio_fifo_write(s->fifo, (void **)frame->extended_data, frame->nb_samples);
+    av_frame_free(&frame);
+
+    while (av_audio_fifo_size(s->fifo) >= window_size) {
+        if (!in) {
+            in = ff_get_audio_buffer(outlink, window_size);
+            if (!in)
+                return AVERROR(ENOMEM);
+        }
+
+        ret = av_audio_fifo_peek(s->fifo, (void **)in->extended_data, window_size);
+        if (ret < 0)
+            break;
+
+        for (ch = 0; ch < inlink->channels; ch++) {
+            const float *src = (float *)in->extended_data[ch];
+            FFTComplex *fft_data = s->fft_data[ch];
+
+            for (n = 0; n < in->nb_samples; n++) {
+                fft_data[n].re = src[n] * s->window_func_lut[n];
+                fft_data[n].im = 0;
+            }
+
+            for (; n < window_size; n++) {
+                fft_data[n].re = 0;
+                fft_data[n].im = 0;
+            }
+        }
+
+        values[VAR_PTS]         = s->pts;
+        values[VAR_SAMPLE_RATE] = inlink->sample_rate;
+        values[VAR_NBBINS]      = window_size / 2;
+        values[VAR_CHANNELS]    = inlink->channels;
+
+        for (ch = 0; ch < inlink->channels; ch++) {
+            FFTComplex *fft_data = s->fft_data[ch];
+            float *buf = (float *)s->buffer->extended_data[ch];
+            int x;
+
+            values[VAR_CHANNEL] = ch;
+
+            av_fft_permute(s->fft, fft_data);
+            av_fft_calc(s->fft, fft_data);
+
+            for (n = 0; n < window_size / 2; n++) {
+                float fr, fi;
+
+                values[VAR_BIN] = n;
+
+                fr = av_expr_eval(s->real[ch], values, s);
+                fi = av_expr_eval(s->imag[ch], values, s);
+
+                fft_data[n].re *= fr;
+                fft_data[n].im *= fi;
+            }
+
+            for (n = window_size / 2 + 1, x = window_size / 2 - 1; n < window_size; n++, x--) {
+                fft_data[n].re =  fft_data[x].re;
+                fft_data[n].im = -fft_data[x].im;
+            }
+
+            av_fft_permute(s->ifft, fft_data);
+            av_fft_calc(s->ifft, fft_data);
+
+            start = s->start;
+            end = s->end;
+            k = end;
+            for (i = 0, j = start; j < k && i < window_size; i++, j++) {
+                buf[j] += s->fft_data[ch][i].re * f;
+            }
+
+            for (; i < window_size; i++, j++) {
+                buf[j] = s->fft_data[ch][i].re * f;
+            }
+
+            start += s->hop_size;
+            end = j;
+        }
+
+        s->start = start;
+        s->end = end;
+
+        if (start >= window_size) {
+            float *dst, *buf;
+
+            start -= window_size;
+            end   -= window_size;
+
+            s->start = start;
+            s->end = end;
+
+            out = ff_get_audio_buffer(outlink, window_size);
+            if (!out) {
+                ret = AVERROR(ENOMEM);
+                break;
+            }
+
+            out->pts = s->pts;
+            s->pts += window_size;
+
+            for (ch = 0; ch < inlink->channels; ch++) {
+                dst = (float *)out->extended_data[ch];
+                buf = (float *)s->buffer->extended_data[ch];
+
+                for (n = 0; n < window_size; n++) {
+                    dst[n] = buf[n] * (1 - s->overlap);
+                }
+                memmove(buf, buf + window_size, window_size * 4);
+            }
+
+            ret = ff_filter_frame(outlink, out);
+            if (ret < 0)
+                break;
+        }
+
+        av_audio_fifo_drain(s->fifo, s->hop_size);
+    }
+
+    av_frame_free(&in);
+    return ret;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLTP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AFFTFiltContext *s = ctx->priv;
+    int i;
+
+    av_fft_end(s->fft);
+    av_fft_end(s->ifft);
+
+    for (i = 0; i < s->nb_exprs; i++) {
+        if (s->fft_data)
+            av_freep(&s->fft_data[i]);
+    }
+    av_freep(&s->fft_data);
+
+    for (i = 0; i < s->nb_exprs; i++) {
+        av_expr_free(s->real[i]);
+        av_expr_free(s->imag[i]);
+    }
+
+    av_freep(&s->real);
+    av_freep(&s->imag);
+    av_frame_free(&s->buffer);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_afftfilt = {
+    .name            = "afftfilt",
+    .description     = NULL_IF_CONFIG_SMALL("Apply arbitrary expressions to samples in frequency domain."),
+    .priv_size       = sizeof(AFFTFiltContext),
+    .priv_class      = &afftfilt_class,
+    .inputs          = inputs,
+    .outputs         = outputs,
+    .query_formats   = query_formats,
+    .uninit          = uninit,
+};
diff --git a/libavfilter/af_aformat.c b/libavfilter/af_aformat.c
index 4fdcb09d..e4314956 100644
--- a/libavfilter/af_aformat.c
+++ b/libavfilter/af_aformat.c
@@ -49,17 +49,18 @@ typedef struct AFormatContext {
 #define A AV_OPT_FLAG_AUDIO_PARAM
 #define F AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption aformat_options[] = {
-    { "sample_fmts",     "A comma-separated list of sample formats.",  OFFSET(formats_str),         AV_OPT_TYPE_STRING, .flags = A|F },
-    { "sample_rates",    "A comma-separated list of sample rates.",    OFFSET(sample_rates_str),    AV_OPT_TYPE_STRING, .flags = A|F },
-    { "channel_layouts", "A comma-separated list of channel layouts.", OFFSET(channel_layouts_str), AV_OPT_TYPE_STRING, .flags = A|F },
+    { "sample_fmts",     "A '|'-separated list of sample formats.",  OFFSET(formats_str),         AV_OPT_TYPE_STRING, .flags = A|F },
+    { "sample_rates",    "A '|'-separated list of sample rates.",    OFFSET(sample_rates_str),    AV_OPT_TYPE_STRING, .flags = A|F },
+    { "channel_layouts", "A '|'-separated list of channel layouts.", OFFSET(channel_layouts_str), AV_OPT_TYPE_STRING, .flags = A|F },
     { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(aformat);
 
-#define PARSE_FORMATS(str, type, list, add_to_list, get_fmt, none, desc)    \
+#define PARSE_FORMATS(str, type, list, add_to_list, unref_fn, get_fmt, none, desc)    \
 do {                                                                        \
     char *next, *cur = str, sep;                                            \
+    int ret;                                                                \
                                                                             \
     if (str && strchr(str, ',')) {                                          \
         av_log(ctx, AV_LOG_WARNING, "This syntax is deprecated, use '|' to "\
@@ -78,7 +79,10 @@ do {                                                                        \
             av_log(ctx, AV_LOG_ERROR, "Error parsing " desc ": %s.\n", cur);\
             return AVERROR(EINVAL);                                         \
         }                                                                   \
-        add_to_list(&list, fmt);                                            \
+        if ((ret = add_to_list(&list, fmt)) < 0) {                          \
+            unref_fn(&list);                                                \
+            return ret;                                                     \
+        }                                                                   \
                                                                             \
         cur = next;                                                         \
     }                                                                       \
@@ -95,11 +99,11 @@ static av_cold int init(AVFilterContext *ctx)
     AFormatContext *s = ctx->priv;
 
     PARSE_FORMATS(s->formats_str, enum AVSampleFormat, s->formats,
-                  ff_add_format, av_get_sample_fmt, AV_SAMPLE_FMT_NONE, "sample format");
-    PARSE_FORMATS(s->sample_rates_str, int, s->sample_rates, ff_add_format,
+                  ff_add_format, ff_formats_unref, av_get_sample_fmt, AV_SAMPLE_FMT_NONE, "sample format");
+    PARSE_FORMATS(s->sample_rates_str, int, s->sample_rates, ff_add_format, ff_formats_unref,
                   get_sample_rate, 0, "sample rate");
     PARSE_FORMATS(s->channel_layouts_str, uint64_t, s->channel_layouts,
-                  ff_add_channel_layout, av_get_channel_layout, 0,
+                  ff_add_channel_layout, ff_channel_layouts_unref, av_get_channel_layout, 0,
                   "channel layout");
 
     return 0;
diff --git a/libavfilter/af_agate.c b/libavfilter/af_agate.c
new file mode 100644
index 00000000..328e25ba
--- /dev/null
+++ b/libavfilter/af_agate.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2001-2010 Krzysztof Foltman, Markus Schmidt, Thor Harald Johansen, Damien Zammit
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Audio (Sidechain) Gate filter
+ */
+
+#include "libavutil/audio_fifo.h"
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+#include "hermite.h"
+
+typedef struct AudioGateContext {
+    const AVClass *class;
+
+    double level_in;
+    double level_sc;
+    double attack;
+    double release;
+    double threshold;
+    double ratio;
+    double knee;
+    double makeup;
+    double range;
+    int link;
+    int detection;
+
+    double thres;
+    double knee_start;
+    double lin_knee_stop;
+    double knee_stop;
+    double lin_slope;
+    double attack_coeff;
+    double release_coeff;
+
+    AVAudioFifo *fifo[2];
+    int64_t pts;
+} AudioGateContext;
+
+#define OFFSET(x) offsetof(AudioGateContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption options[] = {
+    { "level_in",  "set input level",        OFFSET(level_in),  AV_OPT_TYPE_DOUBLE, {.dbl=1},           0.015625,   64, A },
+    { "range",     "set max gain reduction", OFFSET(range),     AV_OPT_TYPE_DOUBLE, {.dbl=0.06125},     0, 1, A },
+    { "threshold", "set threshold",          OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0.125},       0, 1, A },
+    { "ratio",     "set ratio",              OFFSET(ratio),     AV_OPT_TYPE_DOUBLE, {.dbl=2},           1,  9000, A },
+    { "attack",    "set attack",             OFFSET(attack),    AV_OPT_TYPE_DOUBLE, {.dbl=20},          0.01, 9000, A },
+    { "release",   "set release",            OFFSET(release),   AV_OPT_TYPE_DOUBLE, {.dbl=250},         0.01, 9000, A },
+    { "makeup",    "set makeup gain",        OFFSET(makeup),    AV_OPT_TYPE_DOUBLE, {.dbl=1},           1,   64, A },
+    { "knee",      "set knee",               OFFSET(knee),      AV_OPT_TYPE_DOUBLE, {.dbl=2.828427125}, 1,    8, A },
+    { "detection", "set detection",          OFFSET(detection), AV_OPT_TYPE_INT,    {.i64=1},           0,    1, A, "detection" },
+    {   "peak",    0,                        0,                 AV_OPT_TYPE_CONST,  {.i64=0},           0,    0, A, "detection" },
+    {   "rms",     0,                        0,                 AV_OPT_TYPE_CONST,  {.i64=1},           0,    0, A, "detection" },
+    { "link",      "set link",               OFFSET(link),      AV_OPT_TYPE_INT,    {.i64=0},           0,    1, A, "link" },
+    {   "average", 0,                        0,                 AV_OPT_TYPE_CONST,  {.i64=0},           0,    0, A, "link" },
+    {   "maximum", 0,                        0,                 AV_OPT_TYPE_CONST,  {.i64=1},           0,    0, A, "link" },
+    { "level_sc",  "set sidechain gain",     OFFSET(level_sc),  AV_OPT_TYPE_DOUBLE, {.dbl=1},           0.015625,   64, A },
+    { NULL }
+};
+
+static int agate_config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioGateContext *s = ctx->priv;
+    double lin_threshold = s->threshold;
+    double lin_knee_sqrt = sqrt(s->knee);
+    double lin_knee_start;
+
+    if (s->detection)
+        lin_threshold *= lin_threshold;
+
+    s->attack_coeff  = FFMIN(1., 1. / (s->attack * inlink->sample_rate / 4000.));
+    s->release_coeff = FFMIN(1., 1. / (s->release * inlink->sample_rate / 4000.));
+    s->lin_knee_stop = lin_threshold * lin_knee_sqrt;
+    lin_knee_start = lin_threshold / lin_knee_sqrt;
+    s->thres = log(lin_threshold);
+    s->knee_start = log(lin_knee_start);
+    s->knee_stop = log(s->lin_knee_stop);
+
+    return 0;
+}
+
+// A fake infinity value (because real infinity may break some hosts)
+#define FAKE_INFINITY (65536.0 * 65536.0)
+
+// Check for infinity (with appropriate-ish tolerance)
+#define IS_FAKE_INFINITY(value) (fabs(value-FAKE_INFINITY) < 1.0)
+
+static double output_gain(double lin_slope, double ratio, double thres,
+                          double knee, double knee_start, double knee_stop,
+                          double lin_knee_stop, double range)
+{
+    if (lin_slope < lin_knee_stop) {
+        double slope = log(lin_slope);
+        double tratio = ratio;
+        double gain = 0.;
+        double delta = 0.;
+
+        if (IS_FAKE_INFINITY(ratio))
+            tratio = 1000.;
+        gain = (slope - thres) * tratio + thres;
+        delta = tratio;
+
+        if (knee > 1. && slope > knee_start) {
+            gain = hermite_interpolation(slope, knee_start, knee_stop, ((knee_start - thres) * tratio  + thres), knee_stop, delta, 1.);
+        }
+        return FFMAX(range, exp(gain - slope));
+    }
+
+    return 1.;
+}
+
+static void gate(AudioGateContext *s,
+                 const double *src, double *dst, const double *scsrc,
+                 int nb_samples, double level_in, double level_sc,
+                 AVFilterLink *inlink, AVFilterLink *sclink)
+{
+    const double makeup = s->makeup;
+    const double attack_coeff = s->attack_coeff;
+    const double release_coeff = s->release_coeff;
+    int n, c;
+
+    for (n = 0; n < nb_samples; n++, src += inlink->channels, dst += inlink->channels, scsrc += sclink->channels) {
+        double abs_sample = fabs(scsrc[0] * level_sc), gain = 1.0;
+
+        if (s->link == 1) {
+            for (c = 1; c < sclink->channels; c++)
+                abs_sample = FFMAX(fabs(scsrc[c] * level_sc), abs_sample);
+        } else {
+            for (c = 1; c < sclink->channels; c++)
+                abs_sample += fabs(scsrc[c] * level_sc);
+
+            abs_sample /= sclink->channels;
+        }
+
+        if (s->detection)
+            abs_sample *= abs_sample;
+
+        s->lin_slope += (abs_sample - s->lin_slope) * (abs_sample > s->lin_slope ? attack_coeff : release_coeff);
+        if (s->lin_slope > 0.0)
+            gain = output_gain(s->lin_slope, s->ratio, s->thres,
+                               s->knee, s->knee_start, s->knee_stop,
+                               s->lin_knee_stop, s->range);
+
+        for (c = 0; c < inlink->channels; c++)
+            dst[c] = src[c] * level_in * gain * makeup;
+    }
+}
+
+#if CONFIG_AGATE_FILTER
+
+#define agate_options options
+AVFILTER_DEFINE_CLASS(agate);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts;
+    int ret;
+
+    if ((ret = ff_add_format(&formats, AV_SAMPLE_FMT_DBL)) < 0)
+        return ret;
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    const double *src = (const double *)in->data[0];
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioGateContext *s = ctx->priv;
+    AVFrame *out;
+    double *dst;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+    dst = (double *)out->data[0];
+
+    gate(s, src, dst, src, in->nb_samples,
+         s->level_in, s->level_in, inlink, inlink);
+
+    if (out != in)
+        av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+        .config_props = agate_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_agate = {
+    .name           = "agate",
+    .description    = NULL_IF_CONFIG_SMALL("Audio gate."),
+    .query_formats  = query_formats,
+    .priv_size      = sizeof(AudioGateContext),
+    .priv_class     = &agate_class,
+    .inputs         = inputs,
+    .outputs        = outputs,
+};
+
+#endif /* CONFIG_AGATE_FILTER */
+
+#if CONFIG_SIDECHAINGATE_FILTER
+
+#define sidechaingate_options options
+AVFILTER_DEFINE_CLASS(sidechaingate);
+
+static int scfilter_frame(AVFilterLink *link, AVFrame *frame)
+{
+    AVFilterContext *ctx = link->dst;
+    AudioGateContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out, *in[2] = { NULL };
+    double *dst;
+    int nb_samples;
+    int i;
+
+    for (i = 0; i < 2; i++)
+        if (link == ctx->inputs[i])
+            break;
+    av_assert0(i < 2);
+    av_audio_fifo_write(s->fifo[i], (void **)frame->extended_data,
+                        frame->nb_samples);
+    av_frame_free(&frame);
+
+    nb_samples = FFMIN(av_audio_fifo_size(s->fifo[0]), av_audio_fifo_size(s->fifo[1]));
+    if (!nb_samples)
+        return 0;
+
+    out = ff_get_audio_buffer(outlink, nb_samples);
+    if (!out)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < 2; i++) {
+        in[i] = ff_get_audio_buffer(ctx->inputs[i], nb_samples);
+        if (!in[i]) {
+            av_frame_free(&in[0]);
+            av_frame_free(&in[1]);
+            av_frame_free(&out);
+            return AVERROR(ENOMEM);
+        }
+        av_audio_fifo_read(s->fifo[i], (void **)in[i]->data, nb_samples);
+    }
+
+    dst = (double *)out->data[0];
+    out->pts = s->pts;
+    s->pts += nb_samples;
+
+    gate(s, (double *)in[0]->data[0], dst,
+         (double *)in[1]->data[0], nb_samples,
+         s->level_in, s->level_sc,
+         ctx->inputs[0], ctx->inputs[1]);
+
+    av_frame_free(&in[0]);
+    av_frame_free(&in[1]);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int screquest_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioGateContext *s = ctx->priv;
+    int i;
+
+    /* get a frame on each input */
+    for (i = 0; i < 2; i++) {
+        AVFilterLink *inlink = ctx->inputs[i];
+        if (!av_audio_fifo_size(s->fifo[i]))
+            return ff_request_frame(inlink);
+    }
+
+    return 0;
+}
+
+static int scquery_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts = NULL;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBL,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret, i;
+
+    if (!ctx->inputs[0]->in_channel_layouts ||
+        !ctx->inputs[0]->in_channel_layouts->nb_channel_layouts) {
+        av_log(ctx, AV_LOG_WARNING,
+               "No channel layout for input 1\n");
+            return AVERROR(EAGAIN);
+    }
+
+    if ((ret = ff_add_channel_layout(&layouts, ctx->inputs[0]->in_channel_layouts->channel_layouts[0])) < 0 ||
+        (ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0)
+        return ret;
+
+    for (i = 0; i < 2; i++) {
+        layouts = ff_all_channel_counts();
+        if ((ret = ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts)) < 0)
+            return ret;
+    }
+
+    formats = ff_make_format_list(sample_fmts);
+    if ((ret = ff_set_common_formats(ctx, formats)) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int scconfig_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioGateContext *s = ctx->priv;
+
+    if (ctx->inputs[0]->sample_rate != ctx->inputs[1]->sample_rate) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Inputs must have the same sample rate "
+               "%d for in0 vs %d for in1\n",
+               ctx->inputs[0]->sample_rate, ctx->inputs[1]->sample_rate);
+        return AVERROR(EINVAL);
+    }
+
+    outlink->sample_rate = ctx->inputs[0]->sample_rate;
+    outlink->time_base   = ctx->inputs[0]->time_base;
+    outlink->channel_layout = ctx->inputs[0]->channel_layout;
+    outlink->channels = ctx->inputs[0]->channels;
+
+    s->fifo[0] = av_audio_fifo_alloc(ctx->inputs[0]->format, ctx->inputs[0]->channels, 1024);
+    s->fifo[1] = av_audio_fifo_alloc(ctx->inputs[1]->format, ctx->inputs[1]->channels, 1024);
+    if (!s->fifo[0] || !s->fifo[1])
+        return AVERROR(ENOMEM);
+
+
+    agate_config_input(ctx->inputs[0]);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioGateContext *s = ctx->priv;
+
+    av_audio_fifo_free(s->fifo[0]);
+    av_audio_fifo_free(s->fifo[1]);
+}
+
+static const AVFilterPad sidechaingate_inputs[] = {
+    {
+        .name           = "main",
+        .type           = AVMEDIA_TYPE_AUDIO,
+        .filter_frame   = scfilter_frame,
+    },{
+        .name           = "sidechain",
+        .type           = AVMEDIA_TYPE_AUDIO,
+        .filter_frame   = scfilter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad sidechaingate_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .config_props  = scconfig_output,
+        .request_frame = screquest_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_sidechaingate = {
+    .name           = "sidechaingate",
+    .description    = NULL_IF_CONFIG_SMALL("Audio sidechain gate."),
+    .priv_size      = sizeof(AudioGateContext),
+    .priv_class     = &sidechaingate_class,
+    .query_formats  = scquery_formats,
+    .uninit         = uninit,
+    .inputs         = sidechaingate_inputs,
+    .outputs        = sidechaingate_outputs,
+};
+#endif  /* CONFIG_SIDECHAINGATE_FILTER */
diff --git a/libavfilter/af_alimiter.c b/libavfilter/af_alimiter.c
new file mode 100644
index 00000000..46211a71
--- /dev/null
+++ b/libavfilter/af_alimiter.c
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2001-2010 Krzysztof Foltman, Markus Schmidt, Thor Harald Johansen and others
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Lookahead limiter filter
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+
+typedef struct AudioLimiterContext {
+    const AVClass *class;
+
+    double limit;
+    double attack;
+    double release;
+    double att;
+    double level_in;
+    double level_out;
+    int auto_release;
+    int auto_level;
+    double asc;
+    int asc_c;
+    int asc_pos;
+    double asc_coeff;
+
+    double *buffer;
+    int buffer_size;
+    int pos;
+    int *nextpos;
+    double *nextdelta;
+
+    double delta;
+    int nextiter;
+    int nextlen;
+    int asc_changed;
+} AudioLimiterContext;
+
+#define OFFSET(x) offsetof(AudioLimiterContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM
+#define F AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption alimiter_options[] = {
+    { "level_in",  "set input level",  OFFSET(level_in),     AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625,   64, A|F },
+    { "level_out", "set output level", OFFSET(level_out),    AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625,   64, A|F },
+    { "limit",     "set limit",        OFFSET(limit),        AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.0625,    1, A|F },
+    { "attack",    "set attack",       OFFSET(attack),       AV_OPT_TYPE_DOUBLE, {.dbl=5},    0.1,   80, A|F },
+    { "release",   "set release",      OFFSET(release),      AV_OPT_TYPE_DOUBLE, {.dbl=50},     1, 8000, A|F },
+    { "asc",       "enable asc",       OFFSET(auto_release), AV_OPT_TYPE_BOOL,   {.i64=0},      0,    1, A|F },
+    { "asc_level", "set asc level",    OFFSET(asc_coeff),    AV_OPT_TYPE_DOUBLE, {.dbl=0.5},    0,    1, A|F },
+    { "level",     "auto level",       OFFSET(auto_level),   AV_OPT_TYPE_BOOL,   {.i64=1},      0,    1, A|F },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(alimiter);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    AudioLimiterContext *s = ctx->priv;
+
+    s->attack   /= 1000.;
+    s->release  /= 1000.;
+    s->att       = 1.;
+    s->asc_pos   = -1;
+    s->asc_coeff = pow(0.5, s->asc_coeff - 0.5) * 2 * -1;
+
+    return 0;
+}
+
+static double get_rdelta(AudioLimiterContext *s, double release, int sample_rate,
+                         double peak, double limit, double patt, int asc)
+{
+    double rdelta = (1.0 - patt) / (sample_rate * release);
+
+    if (asc && s->auto_release && s->asc_c > 0) {
+        double a_att = limit / (s->asc_coeff * s->asc) * (double)s->asc_c;
+
+        if (a_att > patt) {
+            double delta = FFMAX((a_att - patt) / (sample_rate * release), rdelta / 10);
+
+            if (delta < rdelta)
+                rdelta = delta;
+        }
+    }
+
+    return rdelta;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioLimiterContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    const double *src = (const double *)in->data[0];
+    const int channels = inlink->channels;
+    const int buffer_size = s->buffer_size;
+    double *dst, *buffer = s->buffer;
+    const double release = s->release;
+    const double limit = s->limit;
+    double *nextdelta = s->nextdelta;
+    double level = s->auto_level ? 1 / limit : 1;
+    const double level_out = s->level_out;
+    const double level_in = s->level_in;
+    int *nextpos = s->nextpos;
+    AVFrame *out;
+    double *buf;
+    int n, c, i;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+    dst = (double *)out->data[0];
+
+    for (n = 0; n < in->nb_samples; n++) {
+        double peak = 0;
+
+        for (c = 0; c < channels; c++) {
+            double sample = src[c] * level_in;
+
+            buffer[s->pos + c] = sample;
+            peak = FFMAX(peak, fabs(sample));
+        }
+
+        if (s->auto_release && peak > limit) {
+            s->asc += peak;
+            s->asc_c++;
+        }
+
+        if (peak > limit) {
+            double patt = FFMIN(limit / peak, 1.);
+            double rdelta = get_rdelta(s, release, inlink->sample_rate,
+                                       peak, limit, patt, 0);
+            double delta = (limit / peak - s->att) / buffer_size * channels;
+            int found = 0;
+
+            if (delta < s->delta) {
+                s->delta = delta;
+                nextpos[0] = s->pos;
+                nextpos[1] = -1;
+                nextdelta[0] = rdelta;
+                s->nextlen = 1;
+                s->nextiter= 0;
+            } else {
+                for (i = s->nextiter; i < s->nextiter + s->nextlen; i++) {
+                    int j = i % buffer_size;
+                    double ppeak, pdelta;
+
+                    ppeak = fabs(buffer[nextpos[j]]) > fabs(buffer[nextpos[j] + 1]) ?
+                            fabs(buffer[nextpos[j]]) : fabs(buffer[nextpos[j] + 1]);
+                    pdelta = (limit / peak - limit / ppeak) / (((buffer_size - nextpos[j] + s->pos) % buffer_size) / channels);
+                    if (pdelta < nextdelta[j]) {
+                        nextdelta[j] = pdelta;
+                        found = 1;
+                        break;
+                    }
+                }
+                if (found) {
+                    s->nextlen = i - s->nextiter + 1;
+                    nextpos[(s->nextiter + s->nextlen) % buffer_size] = s->pos;
+                    nextdelta[(s->nextiter + s->nextlen) % buffer_size] = rdelta;
+                    nextpos[(s->nextiter + s->nextlen + 1) % buffer_size] = -1;
+                    s->nextlen++;
+                }
+            }
+        }
+
+        buf = &s->buffer[(s->pos + channels) % buffer_size];
+        peak = 0;
+        for (c = 0; c < channels; c++) {
+            double sample = buf[c];
+
+            peak = FFMAX(peak, fabs(sample));
+        }
+
+        if (s->pos == s->asc_pos && !s->asc_changed)
+            s->asc_pos = -1;
+
+        if (s->auto_release && s->asc_pos == -1 && peak > limit) {
+            s->asc -= peak;
+            s->asc_c--;
+        }
+
+        s->att += s->delta;
+
+        for (c = 0; c < channels; c++)
+            dst[c] = buf[c] * s->att;
+
+        if ((s->pos + channels) % buffer_size == nextpos[s->nextiter]) {
+            if (s->auto_release) {
+                s->delta = get_rdelta(s, release, inlink->sample_rate,
+                                      peak, limit, s->att, 1);
+                if (s->nextlen > 1) {
+                    int pnextpos = nextpos[(s->nextiter + 1) % buffer_size];
+                    double ppeak = fabs(buffer[pnextpos]) > fabs(buffer[pnextpos + 1]) ?
+                                                            fabs(buffer[pnextpos]) :
+                                                            fabs(buffer[pnextpos + 1]);
+                    double pdelta = (limit / ppeak - s->att) /
+                                    (((buffer_size + pnextpos -
+                                    ((s->pos + channels) % buffer_size)) %
+                                    buffer_size) / channels);
+                    if (pdelta < s->delta)
+                        s->delta = pdelta;
+                }
+            } else {
+                s->delta = nextdelta[s->nextiter];
+                s->att = limit / peak;
+            }
+
+            s->nextlen -= 1;
+            nextpos[s->nextiter] = -1;
+            s->nextiter = (s->nextiter + 1) % buffer_size;
+        }
+
+        if (s->att > 1.) {
+            s->att = 1.;
+            s->delta = 0.;
+            s->nextiter = 0;
+            s->nextlen = 0;
+            nextpos[0] = -1;
+        }
+
+        if (s->att <= 0.) {
+            s->att = 0.0000000000001;
+            s->delta = (1.0 - s->att) / (inlink->sample_rate * release);
+        }
+
+        if (s->att != 1. && (1. - s->att) < 0.0000000000001)
+            s->att = 1.;
+
+        if (s->delta != 0. && fabs(s->delta) < 0.00000000000001)
+            s->delta = 0.;
+
+        for (c = 0; c < channels; c++)
+            dst[c] = av_clipd(dst[c], -limit, limit) * level * level_out;
+
+        s->pos = (s->pos + channels) % buffer_size;
+        src += channels;
+        dst += channels;
+    }
+
+    if (in != out)
+        av_frame_free(&in);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBL,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioLimiterContext *s = ctx->priv;
+    int obuffer_size;
+
+    obuffer_size = inlink->sample_rate * inlink->channels * 100 / 1000. + inlink->channels;
+    if (obuffer_size < inlink->channels)
+        return AVERROR(EINVAL);
+
+    s->buffer = av_calloc(obuffer_size, sizeof(*s->buffer));
+    s->nextdelta = av_calloc(obuffer_size, sizeof(*s->nextdelta));
+    s->nextpos = av_malloc_array(obuffer_size, sizeof(*s->nextpos));
+    if (!s->buffer || !s->nextdelta || !s->nextpos)
+        return AVERROR(ENOMEM);
+
+    memset(s->nextpos, -1, obuffer_size * sizeof(*s->nextpos));
+    s->buffer_size = inlink->sample_rate * s->attack * inlink->channels;
+    s->buffer_size -= s->buffer_size % inlink->channels;
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioLimiterContext *s = ctx->priv;
+
+    av_freep(&s->buffer);
+    av_freep(&s->nextdelta);
+    av_freep(&s->nextpos);
+}
+
+static const AVFilterPad alimiter_inputs[] = {
+    {
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad alimiter_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_alimiter = {
+    .name           = "alimiter",
+    .description    = NULL_IF_CONFIG_SMALL("Audio lookahead limiter."),
+    .priv_size      = sizeof(AudioLimiterContext),
+    .priv_class     = &alimiter_class,
+    .init           = init,
+    .uninit         = uninit,
+    .query_formats  = query_formats,
+    .inputs         = alimiter_inputs,
+    .outputs        = alimiter_outputs,
+};
diff --git a/libavfilter/af_amerge.c b/libavfilter/af_amerge.c
index 0a0a79fd..2b4edb09 100644
--- a/libavfilter/af_amerge.c
+++ b/libavfilter/af_amerge.c
@@ -32,7 +32,7 @@
 #include "bufferqueue.h"
 #include "internal.h"
 
-#define SWR_CH_MAX 32
+#define SWR_CH_MAX 64
 
 typedef struct {
     const AVClass *class;
@@ -60,27 +60,27 @@ AVFILTER_DEFINE_CLASS(amerge);
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    AMergeContext *am = ctx->priv;
+    AMergeContext *s = ctx->priv;
     int i;
 
-    for (i = 0; i < am->nb_inputs; i++) {
-        if (am->in)
-            ff_bufqueue_discard_all(&am->in[i].queue);
+    for (i = 0; i < s->nb_inputs; i++) {
+        if (s->in)
+            ff_bufqueue_discard_all(&s->in[i].queue);
         if (ctx->input_pads)
             av_freep(&ctx->input_pads[i].name);
     }
-    av_freep(&am->in);
+    av_freep(&s->in);
 }
 
 static int query_formats(AVFilterContext *ctx)
 {
-    AMergeContext *am = ctx->priv;
+    AMergeContext *s = ctx->priv;
     int64_t inlayout[SWR_CH_MAX], outlayout = 0;
     AVFilterFormats *formats;
     AVFilterChannelLayouts *layouts;
-    int i, overlap = 0, nb_ch = 0;
+    int i, ret, overlap = 0, nb_ch = 0;
 
-    for (i = 0; i < am->nb_inputs; i++) {
+    for (i = 0; i < s->nb_inputs; i++) {
         if (!ctx->inputs[i]->in_channel_layouts ||
             !ctx->inputs[i]->in_channel_layouts->nb_channel_layouts) {
             av_log(ctx, AV_LOG_WARNING,
@@ -93,11 +93,11 @@ static int query_formats(AVFilterContext *ctx)
             av_get_channel_layout_string(buf, sizeof(buf), 0, inlayout[i]);
             av_log(ctx, AV_LOG_INFO, "Using \"%s\" for input %d\n", buf, i + 1);
         }
-        am->in[i].nb_ch = av_get_channel_layout_nb_channels(inlayout[i]);
+        s->in[i].nb_ch = av_get_channel_layout_nb_channels(inlayout[i]);
         if (outlayout & inlayout[i])
             overlap++;
         outlayout |= inlayout[i];
-        nb_ch += am->in[i].nb_ch;
+        nb_ch += s->in[i].nb_ch;
     }
     if (nb_ch > SWR_CH_MAX) {
         av_log(ctx, AV_LOG_ERROR, "Too many channels (max %d)\n", SWR_CH_MAX);
@@ -108,44 +108,49 @@ static int query_formats(AVFilterContext *ctx)
                "Input channel layouts overlap: "
                "output layout will be determined by the number of distinct input channels\n");
         for (i = 0; i < nb_ch; i++)
-            am->route[i] = i;
+            s->route[i] = i;
         outlayout = av_get_default_channel_layout(nb_ch);
-        if (!outlayout)
-            outlayout = ((int64_t)1 << nb_ch) - 1;
+        if (!outlayout && nb_ch)
+            outlayout = 0xFFFFFFFFFFFFFFFFULL >> (64 - nb_ch);
     } else {
         int *route[SWR_CH_MAX];
         int c, out_ch_number = 0;
 
-        route[0] = am->route;
-        for (i = 1; i < am->nb_inputs; i++)
-            route[i] = route[i - 1] + am->in[i - 1].nb_ch;
+        route[0] = s->route;
+        for (i = 1; i < s->nb_inputs; i++)
+            route[i] = route[i - 1] + s->in[i - 1].nb_ch;
         for (c = 0; c < 64; c++)
-            for (i = 0; i < am->nb_inputs; i++)
+            for (i = 0; i < s->nb_inputs; i++)
                 if ((inlayout[i] >> c) & 1)
                     *(route[i]++) = out_ch_number++;
     }
     formats = ff_make_format_list(ff_packed_sample_fmts_array);
-    ff_set_common_formats(ctx, formats);
-    for (i = 0; i < am->nb_inputs; i++) {
+    if ((ret = ff_set_common_formats(ctx, formats)) < 0)
+        return ret;
+    for (i = 0; i < s->nb_inputs; i++) {
         layouts = NULL;
-        ff_add_channel_layout(&layouts, inlayout[i]);
-        ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts);
+        if ((ret = ff_add_channel_layout(&layouts, inlayout[i])) < 0)
+            return ret;
+        if ((ret = ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts)) < 0)
+            return ret;
     }
     layouts = NULL;
-    ff_add_channel_layout(&layouts, outlayout);
-    ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts);
-    ff_set_common_samplerates(ctx, ff_all_samplerates());
-    return 0;
+    if ((ret = ff_add_channel_layout(&layouts, outlayout)) < 0)
+        return ret;
+    if ((ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0)
+        return ret;
+
+    return ff_set_common_samplerates(ctx, ff_all_samplerates());
 }
 
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    AMergeContext *am = ctx->priv;
+    AMergeContext *s = ctx->priv;
     AVBPrint bp;
     int i;
 
-    for (i = 1; i < am->nb_inputs; i++) {
+    for (i = 1; i < s->nb_inputs; i++) {
         if (ctx->inputs[i]->sample_rate != ctx->inputs[0]->sample_rate) {
             av_log(ctx, AV_LOG_ERROR,
                    "Inputs must have the same sample rate "
@@ -154,12 +159,12 @@ static int config_output(AVFilterLink *outlink)
             return AVERROR(EINVAL);
         }
     }
-    am->bps = av_get_bytes_per_sample(ctx->outputs[0]->format);
+    s->bps = av_get_bytes_per_sample(ctx->outputs[0]->format);
     outlink->sample_rate = ctx->inputs[0]->sample_rate;
     outlink->time_base   = ctx->inputs[0]->time_base;
 
     av_bprint_init(&bp, 0, 1);
-    for (i = 0; i < am->nb_inputs; i++) {
+    for (i = 0; i < s->nb_inputs; i++) {
         av_bprintf(&bp, "%sin%d:", i ? " + " : "", i);
         av_bprint_channel_layout(&bp, -1, ctx->inputs[i]->channel_layout);
     }
@@ -173,11 +178,11 @@ static int config_output(AVFilterLink *outlink)
 static int request_frame(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    AMergeContext *am = ctx->priv;
+    AMergeContext *s = ctx->priv;
     int i, ret;
 
-    for (i = 0; i < am->nb_inputs; i++)
-        if (!am->in[i].nb_samples)
+    for (i = 0; i < s->nb_inputs; i++)
+        if (!s->in[i].nb_samples)
             if ((ret = ff_request_frame(ctx->inputs[i])) < 0)
                 return ret;
     return 0;
@@ -223,27 +228,27 @@ static inline void copy_samples(int nb_inputs, struct amerge_input in[],
 static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
 {
     AVFilterContext *ctx = inlink->dst;
-    AMergeContext *am = ctx->priv;
+    AMergeContext *s = ctx->priv;
     AVFilterLink *const outlink = ctx->outputs[0];
     int input_number;
     int nb_samples, ns, i;
     AVFrame *outbuf, *inbuf[SWR_CH_MAX];
     uint8_t *ins[SWR_CH_MAX], *outs;
 
-    for (input_number = 0; input_number < am->nb_inputs; input_number++)
+    for (input_number = 0; input_number < s->nb_inputs; input_number++)
         if (inlink == ctx->inputs[input_number])
             break;
-    av_assert1(input_number < am->nb_inputs);
-    if (ff_bufqueue_is_full(&am->in[input_number].queue)) {
+    av_assert1(input_number < s->nb_inputs);
+    if (ff_bufqueue_is_full(&s->in[input_number].queue)) {
         av_frame_free(&insamples);
         return AVERROR(ENOMEM);
     }
-    ff_bufqueue_add(ctx, &am->in[input_number].queue, av_frame_clone(insamples));
-    am->in[input_number].nb_samples += insamples->nb_samples;
+    ff_bufqueue_add(ctx, &s->in[input_number].queue, av_frame_clone(insamples));
+    s->in[input_number].nb_samples += insamples->nb_samples;
     av_frame_free(&insamples);
-    nb_samples = am->in[0].nb_samples;
-    for (i = 1; i < am->nb_inputs; i++)
-        nb_samples = FFMIN(nb_samples, am->in[i].nb_samples);
+    nb_samples = s->in[0].nb_samples;
+    for (i = 1; i < s->nb_inputs; i++)
+        nb_samples = FFMIN(nb_samples, s->in[i].nb_samples);
     if (!nb_samples)
         return 0;
 
@@ -251,15 +256,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
     if (!outbuf)
         return AVERROR(ENOMEM);
     outs = outbuf->data[0];
-    for (i = 0; i < am->nb_inputs; i++) {
-        inbuf[i] = ff_bufqueue_peek(&am->in[i].queue, 0);
+    for (i = 0; i < s->nb_inputs; i++) {
+        inbuf[i] = ff_bufqueue_peek(&s->in[i].queue, 0);
         ins[i] = inbuf[i]->data[0] +
-                 am->in[i].pos * am->in[i].nb_ch * am->bps;
+                 s->in[i].pos * s->in[i].nb_ch * s->bps;
     }
     av_frame_copy_props(outbuf, inbuf[0]);
     outbuf->pts = inbuf[0]->pts == AV_NOPTS_VALUE ? AV_NOPTS_VALUE :
                   inbuf[0]->pts +
-                  av_rescale_q(am->in[0].pos,
+                  av_rescale_q(s->in[0].pos,
                                av_make_q(1, ctx->inputs[0]->sample_rate),
                                ctx->outputs[0]->time_base);
 
@@ -269,34 +274,34 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
 
     while (nb_samples) {
         ns = nb_samples;
-        for (i = 0; i < am->nb_inputs; i++)
-            ns = FFMIN(ns, inbuf[i]->nb_samples - am->in[i].pos);
+        for (i = 0; i < s->nb_inputs; i++)
+            ns = FFMIN(ns, inbuf[i]->nb_samples - s->in[i].pos);
         /* Unroll the most common sample formats: speed +~350% for the loop,
            +~13% overall (including two common decoders) */
-        switch (am->bps) {
+        switch (s->bps) {
             case 1:
-                copy_samples(am->nb_inputs, am->in, am->route, ins, &outs, ns, 1);
+                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, ns, 1);
                 break;
             case 2:
-                copy_samples(am->nb_inputs, am->in, am->route, ins, &outs, ns, 2);
+                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, ns, 2);
                 break;
             case 4:
-                copy_samples(am->nb_inputs, am->in, am->route, ins, &outs, ns, 4);
+                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, ns, 4);
                 break;
             default:
-                copy_samples(am->nb_inputs, am->in, am->route, ins, &outs, ns, am->bps);
+                copy_samples(s->nb_inputs, s->in, s->route, ins, &outs, ns, s->bps);
                 break;
         }
 
         nb_samples -= ns;
-        for (i = 0; i < am->nb_inputs; i++) {
-            am->in[i].nb_samples -= ns;
-            am->in[i].pos += ns;
-            if (am->in[i].pos == inbuf[i]->nb_samples) {
-                am->in[i].pos = 0;
+        for (i = 0; i < s->nb_inputs; i++) {
+            s->in[i].nb_samples -= ns;
+            s->in[i].pos += ns;
+            if (s->in[i].pos == inbuf[i]->nb_samples) {
+                s->in[i].pos = 0;
                 av_frame_free(&inbuf[i]);
-                ff_bufqueue_get(&am->in[i].queue);
-                inbuf[i] = ff_bufqueue_peek(&am->in[i].queue, 0);
+                ff_bufqueue_get(&s->in[i].queue);
+                inbuf[i] = ff_bufqueue_peek(&s->in[i].queue, 0);
                 ins[i] = inbuf[i] ? inbuf[i]->data[0] : NULL;
             }
         }
@@ -306,13 +311,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    AMergeContext *am = ctx->priv;
+    AMergeContext *s = ctx->priv;
     int i;
 
-    am->in = av_calloc(am->nb_inputs, sizeof(*am->in));
-    if (!am->in)
+    s->in = av_calloc(s->nb_inputs, sizeof(*s->in));
+    if (!s->in)
         return AVERROR(ENOMEM);
-    for (i = 0; i < am->nb_inputs; i++) {
+    for (i = 0; i < s->nb_inputs; i++) {
         char *name = av_asprintf("in%d", i);
         AVFilterPad pad = {
             .name             = name,
diff --git a/libavfilter/af_amix.c b/libavfilter/af_amix.c
index 9a3cbd4d..e64e2891 100644
--- a/libavfilter/af_amix.c
+++ b/libavfilter/af_amix.c
@@ -44,9 +44,8 @@
 #include "formats.h"
 #include "internal.h"
 
-#define INPUT_OFF      0    /**< input has reached EOF */
 #define INPUT_ON       1    /**< input is active */
-#define INPUT_INACTIVE 2    /**< input is on, but is currently inactive */
+#define INPUT_EOF      2    /**< input has reached EOF (may still be active) */
 
 #define DURATION_LONGEST  0
 #define DURATION_SHORTEST 1
@@ -209,7 +208,7 @@ static void calculate_scales(MixContext *s, int nb_samples)
     }
 
     for (i = 0; i < s->nb_inputs; i++) {
-        if (s->input_state[i] == INPUT_ON)
+        if (s->input_state[i] & INPUT_ON)
             s->input_scale[i] = 1.0f / s->scale_norm;
         else
             s->input_scale[i] = 0.0f;
@@ -264,15 +263,52 @@ static int config_output(AVFilterLink *outlink)
     return 0;
 }
 
+static int calc_active_inputs(MixContext *s);
+
 /**
  * Read samples from the input FIFOs, mix, and write to the output link.
  */
-static int output_frame(AVFilterLink *outlink, int nb_samples)
+static int output_frame(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     MixContext      *s = ctx->priv;
     AVFrame *out_buf, *in_buf;
-    int i;
+    int nb_samples, ns, ret, i;
+
+    ret = calc_active_inputs(s);
+    if (ret < 0)
+        return ret;
+
+    if (s->input_state[0] & INPUT_ON) {
+        /* first input live: use the corresponding frame size */
+        nb_samples = frame_list_next_frame_size(s->frame_list);
+        for (i = 1; i < s->nb_inputs; i++) {
+            if (s->input_state[i] & INPUT_ON) {
+                ns = av_audio_fifo_size(s->fifos[i]);
+                if (ns < nb_samples) {
+                    if (!(s->input_state[i] & INPUT_EOF))
+                        /* unclosed input with not enough samples */
+                        return 0;
+                    /* closed input to drain */
+                    nb_samples = ns;
+                }
+            }
+        }
+    } else {
+        /* first input closed: use the available samples */
+        nb_samples = INT_MAX;
+        for (i = 1; i < s->nb_inputs; i++) {
+            if (s->input_state[i] & INPUT_ON) {
+                ns = av_audio_fifo_size(s->fifos[i]);
+                nb_samples = FFMIN(nb_samples, ns);
+            }
+        }
+        if (nb_samples == INT_MAX)
+            return AVERROR_EOF;
+    }
+
+    s->next_pts = frame_list_next_pts(s->frame_list);
+    frame_list_remove_samples(s->frame_list, nb_samples);
 
     calculate_scales(s, nb_samples);
 
@@ -287,7 +323,7 @@ static int output_frame(AVFilterLink *outlink, int nb_samples)
     }
 
     for (i = 0; i < s->nb_inputs; i++) {
-        if (s->input_state[i] == INPUT_ON) {
+        if (s->input_state[i] & INPUT_ON) {
             int planes, plane_size, p;
 
             av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
@@ -313,29 +349,6 @@ static int output_frame(AVFilterLink *outlink, int nb_samples)
     return ff_filter_frame(outlink, out_buf);
 }
 
-/**
- * Returns the smallest number of samples available in the input FIFOs other
- * than that of the first input.
- */
-static int get_available_samples(MixContext *s)
-{
-    int i;
-    int available_samples = INT_MAX;
-
-    av_assert0(s->nb_inputs > 1);
-
-    for (i = 1; i < s->nb_inputs; i++) {
-        int nb_samples;
-        if (s->input_state[i] == INPUT_OFF)
-            continue;
-        nb_samples = av_audio_fifo_size(s->fifos[i]);
-        available_samples = FFMIN(available_samples, nb_samples);
-    }
-    if (available_samples == INT_MAX)
-        return 0;
-    return available_samples;
-}
-
 /**
  * Requests a frame, if needed, from each input link other than the first.
  */
@@ -348,19 +361,21 @@ static int request_samples(AVFilterContext *ctx, int min_samples)
 
     for (i = 1; i < s->nb_inputs; i++) {
         ret = 0;
-        if (s->input_state[i] == INPUT_OFF)
+        if (!(s->input_state[i] & INPUT_ON))
             continue;
-        while (!ret && av_audio_fifo_size(s->fifos[i]) < min_samples)
-            ret = ff_request_frame(ctx->inputs[i]);
+        if (av_audio_fifo_size(s->fifos[i]) >= min_samples)
+            continue;
+        ret = ff_request_frame(ctx->inputs[i]);
         if (ret == AVERROR_EOF) {
+            s->input_state[i] |= INPUT_EOF;
             if (av_audio_fifo_size(s->fifos[i]) == 0) {
-                s->input_state[i] = INPUT_OFF;
+                s->input_state[i] = 0;
                 continue;
             }
         } else if (ret < 0)
             return ret;
     }
-    return 0;
+    return output_frame(ctx->outputs[0]);
 }
 
 /**
@@ -374,11 +389,11 @@ static int calc_active_inputs(MixContext *s)
     int i;
     int active_inputs = 0;
     for (i = 0; i < s->nb_inputs; i++)
-        active_inputs += !!(s->input_state[i] != INPUT_OFF);
+        active_inputs += !!(s->input_state[i] & INPUT_ON);
     s->active_inputs = active_inputs;
 
     if (!active_inputs ||
-        (s->duration_mode == DURATION_FIRST && s->input_state[0] == INPUT_OFF) ||
+        (s->duration_mode == DURATION_FIRST && !(s->input_state[0] & INPUT_ON)) ||
         (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
         return AVERROR_EOF;
     return 0;
@@ -389,66 +404,30 @@ static int request_frame(AVFilterLink *outlink)
     AVFilterContext *ctx = outlink->src;
     MixContext      *s = ctx->priv;
     int ret;
-    int wanted_samples, available_samples;
+    int wanted_samples;
 
     ret = calc_active_inputs(s);
     if (ret < 0)
         return ret;
 
-    if (s->input_state[0] == INPUT_OFF) {
-        ret = request_samples(ctx, 1);
-        if (ret < 0)
-            return ret;
-
-        ret = calc_active_inputs(s);
-        if (ret < 0)
-            return ret;
-
-        available_samples = get_available_samples(s);
-        if (!available_samples)
-            return AVERROR(EAGAIN);
-
-        return output_frame(outlink, available_samples);
-    }
+    if (!(s->input_state[0] & INPUT_ON))
+        return request_samples(ctx, 1);
 
     if (s->frame_list->nb_frames == 0) {
         ret = ff_request_frame(ctx->inputs[0]);
         if (ret == AVERROR_EOF) {
-            s->input_state[0] = INPUT_OFF;
+            s->input_state[0] = 0;
             if (s->nb_inputs == 1)
                 return AVERROR_EOF;
-            else
-                return AVERROR(EAGAIN);
-        } else if (ret < 0)
-            return ret;
+            return output_frame(ctx->outputs[0]);
+        }
+        return ret;
     }
     av_assert0(s->frame_list->nb_frames > 0);
 
     wanted_samples = frame_list_next_frame_size(s->frame_list);
 
-    if (s->active_inputs > 1) {
-        ret = request_samples(ctx, wanted_samples);
-        if (ret < 0)
-            return ret;
-
-        ret = calc_active_inputs(s);
-        if (ret < 0)
-            return ret;
-    }
-
-    if (s->active_inputs > 1) {
-        available_samples = get_available_samples(s);
-        if (!available_samples)
-            return AVERROR(EAGAIN);
-        available_samples = FFMIN(available_samples, wanted_samples);
-    } else {
-        available_samples = wanted_samples;
-    }
-
-    s->next_pts = frame_list_next_pts(s->frame_list);
-    frame_list_remove_samples(s->frame_list, available_samples);
-
-    return output_frame(outlink, available_samples);
+    return request_samples(ctx, wanted_samples);
 }
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
@@ -478,6 +457,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
     ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
                               buf->nb_samples);
 
+    av_frame_free(&buf);
+    return output_frame(outlink);
+
 fail:
     av_frame_free(&buf);
 
@@ -537,19 +519,23 @@ static int query_formats(AVFilterContext *ctx)
     int ret;
 
     layouts = ff_all_channel_layouts();
+    if (!layouts) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
 
-    if (!layouts)
-        return AVERROR(ENOMEM);
-
-    ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
-    ff_add_format(&formats, AV_SAMPLE_FMT_FLTP);
-    ret = ff_set_common_formats(ctx, formats);
-    if (ret < 0)
-        return ret;
-    ret = ff_set_common_channel_layouts(ctx, layouts);
-    if (ret < 0)
-        return ret;
-    return ff_set_common_samplerates(ctx, ff_all_samplerates());
+    if ((ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLT ))          < 0 ||
+        (ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLTP))          < 0 ||
+        (ret = ff_set_common_formats        (ctx, formats))          < 0 ||
+        (ret = ff_set_common_channel_layouts(ctx, layouts))          < 0 ||
+        (ret = ff_set_common_samplerates(ctx, ff_all_samplerates())) < 0)
+        goto fail;
+    return 0;
+fail:
+    if (layouts)
+        av_freep(&layouts->channel_layouts);
+    av_freep(&layouts);
+    return ret;
 }
 
 static const AVFilterPad avfilter_af_amix_outputs[] = {
diff --git a/libavfilter/af_anequalizer.c b/libavfilter/af_anequalizer.c
new file mode 100644
index 00000000..a344c775
--- /dev/null
+++ b/libavfilter/af_anequalizer.c
@@ -0,0 +1,761 @@
+/*
+ * Copyright (c) 2001-2010 Krzysztof Foltman, Markus Schmidt, Thor Harald Johansen and others
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/avstring.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "audio.h"
+
+#define FILTER_ORDER 4
+
+enum FilterType {
+    BUTTERWORTH,
+    CHEBYSHEV1,
+    CHEBYSHEV2,
+    NB_TYPES
+};
+
+typedef struct FoSection {
+    double a0, a1, a2, a3, a4;
+    double b0, b1, b2, b3, b4;
+
+    double num[4];
+    double denum[4];
+} FoSection;
+
+typedef struct EqualizatorFilter {
+    int ignore;
+    int channel;
+    int type;
+
+    double freq;
+    double gain;
+    double width;
+
+    FoSection section[2];
+} EqualizatorFilter;
+
+typedef struct AudioNEqualizerContext {
+    const AVClass *class;
+    char *args;
+    char *colors;
+    int draw_curves;
+    int w, h;
+
+    double mag;
+    int fscale;
+    int nb_filters;
+    int nb_allocated;
+    EqualizatorFilter *filters;
+    AVFrame *video;
+} AudioNEqualizerContext;
+
+#define OFFSET(x) offsetof(AudioNEqualizerContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM
+#define V AV_OPT_FLAG_VIDEO_PARAM
+#define F AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption anequalizer_options[] = {
+    { "params", NULL,                             OFFSET(args),        AV_OPT_TYPE_STRING,     {.str=""}, 0, 0, A|F },
+    { "curves", "draw frequency response curves", OFFSET(draw_curves), AV_OPT_TYPE_BOOL,       {.i64=0}, 0, 1, V|F },
+    { "size",   "set video size",                 OFFSET(w),           AV_OPT_TYPE_IMAGE_SIZE, {.str = "hd720"}, 0, 0, V|F },
+    { "mgain",  "set max gain",                   OFFSET(mag),         AV_OPT_TYPE_DOUBLE,     {.dbl=60}, -900, 900, V|F },
+    { "fscale", "set frequency scale",            OFFSET(fscale),      AV_OPT_TYPE_INT,        {.i64=1}, 0, 1, V|F, "fscale" },
+        { "lin",  "linear",                       0,                   AV_OPT_TYPE_CONST,      {.i64=0}, 0, 0, V|F, "fscale" },
+        { "log",  "logarithmic",                  0,                   AV_OPT_TYPE_CONST,      {.i64=1}, 0, 0, V|F, "fscale" },
+    { "colors", "set channels curves colors",     OFFSET(colors),      AV_OPT_TYPE_STRING,     {.str = "red|green|blue|yellow|orange|lime|pink|magenta|brown" }, 0, 0, V|F },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(anequalizer);
+
+static void draw_curves(AVFilterContext *ctx, AVFilterLink *inlink, AVFrame *out)
+{
+    AudioNEqualizerContext *s = ctx->priv;
+    char *colors, *color, *saveptr = NULL;
+    int ch, i, n;
+
+    colors = av_strdup(s->colors);
+    if (!colors)
+        return;
+
+    memset(out->data[0], 0, s->h * out->linesize[0]);
+
+    for (ch = 0; ch < inlink->channels; ch++) {
+        uint8_t fg[4] = { 0xff, 0xff, 0xff, 0xff };
+        int prev_v = -1;
+        double f;
+
+        color = av_strtok(ch == 0 ? colors : NULL, " |", &saveptr);
+        if (color)
+            av_parse_color(fg, color, -1, ctx);
+
+        for (f = 0; f < s->w; f++) {
+            double zr, zi, zr2, zi2;
+            double Hr, Hi;
+            double Hmag = 1;
+            double w;
+            int v, y, x;
+
+            w = M_PI * (s->fscale ? pow(s->w - 1, f / s->w) : f) / (s->w - 1);
+            zr = cos(w);
+            zr2 = zr * zr;
+            zi = -sin(w);
+            zi2 = zi * zi;
+
+            for (n = 0; n < s->nb_filters; n++) {
+                if (s->filters[n].channel != ch ||
+                    s->filters[n].ignore)
+                    continue;
+
+                for (i = 0; i < FILTER_ORDER / 2; i++) {
+                    FoSection *S = &s->filters[n].section[i];
+
+                    /* H *= (((((S->b4 * z + S->b3) * z + S->b2) * z + S->b1) * z + S->b0) /
+                          ((((S->a4 * z + S->a3) * z + S->a2) * z + S->a1) * z + S->a0)); */
+
+                    Hr = S->b4*(1-8*zr2*zi2) + S->b2*(zr2-zi2) + zr*(S->b1+S->b3*(zr2-3*zi2))+ S->b0;
+                    Hi = zi*(S->b3*(3*zr2-zi2) + S->b1 + 2*zr*(2*S->b4*(zr2-zi2) + S->b2));
+                    Hmag *= hypot(Hr, Hi);
+                    Hr = S->a4*(1-8*zr2*zi2) + S->a2*(zr2-zi2) + zr*(S->a1+S->a3*(zr2-3*zi2))+ S->a0;
+                    Hi = zi*(S->a3*(3*zr2-zi2) + S->a1 + 2*zr*(2*S->a4*(zr2-zi2) + S->a2));
+                    Hmag /= hypot(Hr, Hi);
+                }
+            }
+
+            v = av_clip((1. + -20 * log10(Hmag) / s->mag) * s->h / 2, 0, s->h - 1);
+            x = lrint(f);
+            if (prev_v == -1)
+                prev_v = v;
+            if (v <= prev_v) {
+                for (y = v; y <= prev_v; y++)
+                    AV_WL32(out->data[0] + y * out->linesize[0] + x * 4, AV_RL32(fg));
+            } else {
+                for (y = prev_v; y <= v; y++)
+                    AV_WL32(out->data[0] + y * out->linesize[0] + x * 4, AV_RL32(fg));
+            }
+
+            prev_v = v;
+        }
+    }
+
+    av_free(colors);
+}
+
+static int config_video(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioNEqualizerContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFrame *out;
+
+    outlink->w = s->w;
+    outlink->h = s->h;
+
+    av_frame_free(&s->video);
+    s->video = out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out)
+        return AVERROR(ENOMEM);
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+
+    draw_curves(ctx, inlink, out);
+
+    return 0;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    AudioNEqualizerContext *s = ctx->priv;
+    AVFilterPad pad, vpad;
+
+    pad = (AVFilterPad){
+        .name         = av_strdup("out0"),
+        .type         = AVMEDIA_TYPE_AUDIO,
+    };
+
+    if (!pad.name)
+        return AVERROR(ENOMEM);
+
+    if (s->draw_curves) {
+        vpad = (AVFilterPad){
+            .name         = av_strdup("out1"),
+            .type         = AVMEDIA_TYPE_VIDEO,
+            .config_props = config_video,
+        };
+        if (!vpad.name)
+            return AVERROR(ENOMEM);
+    }
+
+    ff_insert_outpad(ctx, 0, &pad);
+
+    if (s->draw_curves)
+        ff_insert_outpad(ctx, 1, &vpad);
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioNEqualizerContext *s = ctx->priv;
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGBA, AV_PIX_FMT_NONE };
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    if (s->draw_curves) {
+        AVFilterLink *videolink = ctx->outputs[1];
+        formats = ff_make_format_list(pix_fmts);
+        if ((ret = ff_formats_ref(formats, &videolink->in_formats)) < 0)
+            return ret;
+    }
+
+    formats = ff_make_format_list(sample_fmts);
+    if ((ret = ff_formats_ref(formats, &inlink->out_formats)) < 0 ||
+        (ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
+        return ret;
+
+    layouts = ff_all_channel_counts();
+    if ((ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts)) < 0 ||
+        (ret = ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts)) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0 ||
+        (ret = ff_formats_ref(formats, &outlink->in_samplerates)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioNEqualizerContext *s = ctx->priv;
+
+    av_freep(&ctx->output_pads[0].name);
+    if (s->draw_curves)
+        av_freep(&ctx->output_pads[1].name);
+    av_frame_free(&s->video);
+    av_freep(&s->filters);
+    s->nb_filters = 0;
+    s->nb_allocated = 0;
+}
+
+static void butterworth_fo_section(FoSection *S, double beta,
+                                   double si, double g, double g0,
+                                   double D, double c0)
+{
+    if (c0 == 1 || c0 == -1) {
+        S->b0 = (g*g*beta*beta + 2*g*g0*si*beta + g0*g0)/D;
+        S->b1 = 2*c0*(g*g*beta*beta - g0*g0)/D;
+        S->b2 = (g*g*beta*beta - 2*g0*g*beta*si + g0*g0)/D;
+        S->b3 = 0;
+        S->b4 = 0;
+
+        S->a0 = 1;
+        S->a1 = 2*c0*(beta*beta - 1)/D;
+        S->a2 = (beta*beta - 2*beta*si + 1)/D;
+        S->a3 = 0;
+        S->a4 = 0;
+    } else {
+        S->b0 = (g*g*beta*beta + 2*g*g0*si*beta + g0*g0)/D;
+        S->b1 = -4*c0*(g0*g0 + g*g0*si*beta)/D;
+        S->b2 = 2*(g0*g0*(1 + 2*c0*c0) - g*g*beta*beta)/D;
+        S->b3 = -4*c0*(g0*g0 - g*g0*si*beta)/D;
+        S->b4 = (g*g*beta*beta - 2*g*g0*si*beta + g0*g0)/D;
+
+        S->a0 = 1;
+        S->a1 = -4*c0*(1 + si*beta)/D;
+        S->a2 = 2*(1 + 2*c0*c0 - beta*beta)/D;
+        S->a3 = -4*c0*(1 - si*beta)/D;
+        S->a4 = (beta*beta - 2*si*beta + 1)/D;
+    }
+}
+
+static void butterworth_bp_filter(EqualizatorFilter *f,
+                                  int N, double w0, double wb,
+                                  double G, double Gb, double G0)
+{
+    double g, c0, g0, beta;
+    double epsilon;
+    int r =  N % 2;
+    int L = (N - r) / 2;
+    int i;
+
+    if (G == 0 && G0 == 0) {
+        f->section[0].a0 = 1;
+        f->section[0].b0 = 1;
+        f->section[1].a0 = 1;
+        f->section[1].b0 = 1;
+        return;
+    }
+
+    G  = ff_exp10(G/20);
+    Gb = ff_exp10(Gb/20);
+    G0 = ff_exp10(G0/20);
+
+    epsilon = sqrt((G * G - Gb * Gb) / (Gb * Gb - G0 * G0));
+    g  = pow(G,  1.0 / N);
+    g0 = pow(G0, 1.0 / N);
+    beta = pow(epsilon, -1.0 / N) * tan(wb/2);
+    c0 = cos(w0);
+
+    for (i = 1; i <= L; i++) {
+        double ui = (2.0 * i - 1) / N;
+        double si = sin(M_PI * ui / 2.0);
+        double Di = beta * beta + 2 * si * beta + 1;
+
+        butterworth_fo_section(&f->section[i - 1], beta, si, g, g0, Di, c0);
+    }
+}
+
+static void chebyshev1_fo_section(FoSection *S, double a,
+                                  double c, double tetta_b,
+                                  double g0, double si, double b,
+                                  double D, double c0)
+{
+    if (c0 == 1 || c0 == -1) {
+        S->b0 = (tetta_b*tetta_b*(b*b+g0*g0*c*c) + 2*g0*b*si*tetta_b*tetta_b + g0*g0)/D;
+        S->b1 = 2*c0*(tetta_b*tetta_b*(b*b+g0*g0*c*c) - g0*g0)/D;
+        S->b2 = (tetta_b*tetta_b*(b*b+g0*g0*c*c) - 2*g0*b*si*tetta_b + g0*g0)/D;
+        S->b3 = 0;
+        S->b4 = 0;
+
+        S->a0 = 1;
+        S->a1 = 2*c0*(tetta_b*tetta_b*(a*a+c*c) - 1)/D;
+        S->a2 = (tetta_b*tetta_b*(a*a+c*c) - 2*a*si*tetta_b + 1)/D;
+        S->a3 = 0;
+        S->a4 = 0;
+    } else {
+        S->b0 = ((b*b + g0*g0*c*c)*tetta_b*tetta_b + 2*g0*b*si*tetta_b + g0*g0)/D;
+        S->b1 = -4*c0*(g0*g0 + g0*b*si*tetta_b)/D;
+        S->b2 = 2*(g0*g0*(1 + 2*c0*c0) - (b*b + g0*g0*c*c)*tetta_b*tetta_b)/D;
+        S->b3 = -4*c0*(g0*g0 - g0*b*si*tetta_b)/D;
+        S->b4 = ((b*b + g0*g0*c*c)*tetta_b*tetta_b - 2*g0*b*si*tetta_b + g0*g0)/D;
+
+        S->a0 = 1;
+        S->a1 = -4*c0*(1 + a*si*tetta_b)/D;
+        S->a2 = 2*(1 + 2*c0*c0 - (a*a + c*c)*tetta_b*tetta_b)/D;
+        S->a3 = -4*c0*(1 - a*si*tetta_b)/D;
+        S->a4 = ((a*a + c*c)*tetta_b*tetta_b - 2*a*si*tetta_b + 1)/D;
+    }
+}
+
+static void chebyshev1_bp_filter(EqualizatorFilter *f,
+                                 int N, double w0, double wb,
+                                 double G, double Gb, double G0)
+{
+    double a, b, c0, g0, alfa, beta, tetta_b;
+    double epsilon;
+    int r =  N % 2;
+    int L = (N - r) / 2;
+    int i;
+
+    if (G == 0 && G0 == 0) {
+        f->section[0].a0 = 1;
+        f->section[0].b0 = 1;
+        f->section[1].a0 = 1;
+        f->section[1].b0 = 1;
+        return;
+    }
+
+    G  = ff_exp10(G/20);
+    Gb = ff_exp10(Gb/20);
+    G0 = ff_exp10(G0/20);
+
+    epsilon = sqrt((G*G - Gb*Gb) / (Gb*Gb - G0*G0));
+    g0 = pow(G0,1.0/N);
+    alfa = pow(1.0/epsilon    + sqrt(1 + 1/(epsilon*epsilon)), 1.0/N);
+    beta = pow(G/epsilon + Gb * sqrt(1 + 1/(epsilon*epsilon)), 1.0/N);
+    a = 0.5 * (alfa - 1.0/alfa);
+    b = 0.5 * (beta - g0*g0*(1/beta));
+    tetta_b = tan(wb/2);
+    c0 = cos(w0);
+
+    for (i = 1; i <= L; i++) {
+        double ui = (2.0*i-1.0)/N;
+        double ci = cos(M_PI*ui/2.0);
+        double si = sin(M_PI*ui/2.0);
+        double Di = (a*a + ci*ci)*tetta_b*tetta_b + 2.0*a*si*tetta_b + 1;
+
+        chebyshev1_fo_section(&f->section[i - 1], a, ci, tetta_b, g0, si, b, Di, c0);
+    }
+}
+
+static void chebyshev2_fo_section(FoSection *S, double a,
+                                  double c, double tetta_b,
+                                  double g, double si, double b,
+                                  double D, double c0)
+{
+    if (c0 == 1 || c0 == -1) {
+        S->b0 = (g*g*tetta_b*tetta_b + 2*tetta_b*g*b*si + b*b + g*g*c*c)/D;
+        S->b1 = 2*c0*(g*g*tetta_b*tetta_b - b*b - g*g*c*c)/D;
+        S->b2 = (g*g*tetta_b*tetta_b - 2*tetta_b*g*b*si + b*b + g*g*c*c)/D;
+        S->b3 = 0;
+        S->b4 = 0;
+
+        S->a0 = 1;
+        S->a1 = 2*c0*(tetta_b*tetta_b - a*a - c*c)/D;
+        S->a2 = (tetta_b*tetta_b - 2*tetta_b*a*si + a*a + c*c)/D;
+        S->a3 = 0;
+        S->a4 = 0;
+    } else {
+        S->b0 = (g*g*tetta_b*tetta_b + 2*g*b*si*tetta_b + b*b + g*g*c*c)/D;
+        S->b1 = -4*c0*(b*b + g*g*c*c + g*b*si*tetta_b)/D;
+        S->b2 = 2*((b*b + g*g*c*c)*(1 + 2*c0*c0) - g*g*tetta_b*tetta_b)/D;
+        S->b3 = -4*c0*(b*b + g*g*c*c - g*b*si*tetta_b)/D;
+        S->b4 = (g*g*tetta_b*tetta_b - 2*g*b*si*tetta_b + b*b + g*g*c*c)/D;
+
+        S->a0 = 1;
+        S->a1 = -4*c0*(a*a + c*c + a*si*tetta_b)/D;
+        S->a2 = 2*((a*a + c*c)*(1 + 2*c0*c0) - tetta_b*tetta_b)/D;
+        S->a3 = -4*c0*(a*a + c*c - a*si*tetta_b)/D;
+        S->a4 = (tetta_b*tetta_b - 2*a*si*tetta_b + a*a + c*c)/D;
+    }
+}
+
+static void chebyshev2_bp_filter(EqualizatorFilter *f,
+                                 int N, double w0, double wb,
+                                 double G, double Gb, double G0)
+{
+    double a, b, c0, tetta_b;
+    double epsilon, g, eu, ew;
+    int r =  N % 2;
+    int L = (N - r) / 2;
+    int i;
+
+    if (G == 0 && G0 == 0) {
+        f->section[0].a0 = 1;
+        f->section[0].b0 = 1;
+        f->section[1].a0 = 1;
+        f->section[1].b0 = 1;
+        return;
+    }
+
+    G  = ff_exp10(G/20);
+    Gb = ff_exp10(Gb/20);
+    G0 = ff_exp10(G0/20);
+
+    epsilon = sqrt((G*G - Gb*Gb) / (Gb*Gb - G0*G0));
+    g  = pow(G, 1.0 / N);
+    eu = pow(epsilon + sqrt(1 + epsilon*epsilon), 1.0/N);
+    ew = pow(G0*epsilon + Gb*sqrt(1 + epsilon*epsilon), 1.0/N);
+    a = (eu - 1.0/eu)/2.0;
+    b = (ew - g*g/ew)/2.0;
+    tetta_b = tan(wb/2);
+    c0 = cos(w0);
+
+    for (i = 1; i <= L; i++) {
+        double ui = (2.0 * i - 1.0)/N;
+        double ci = cos(M_PI * ui / 2.0);
+        double si = sin(M_PI * ui / 2.0);
+        double Di = tetta_b*tetta_b + 2*a*si*tetta_b + a*a + ci*ci;
+
+        chebyshev2_fo_section(&f->section[i - 1], a, ci, tetta_b, g, si, b, Di, c0);
+    }
+}
+
+static double butterworth_compute_bw_gain_db(double gain)
+{
+    double bw_gain = 0;
+
+    if (gain <= -6)
+        bw_gain = gain + 3;
+    else if(gain > -6 && gain < 6)
+        bw_gain = gain * 0.5;
+    else if(gain >= 6)
+        bw_gain = gain - 3;
+
+    return bw_gain;
+}
+
+static double chebyshev1_compute_bw_gain_db(double gain)
+{
+    double bw_gain = 0;
+
+    if (gain <= -6)
+        bw_gain = gain + 1;
+    else if(gain > -6 && gain < 6)
+        bw_gain = gain * 0.9;
+    else if(gain >= 6)
+        bw_gain = gain - 1;
+
+    return bw_gain;
+}
+
+static double chebyshev2_compute_bw_gain_db(double gain)
+{
+    double bw_gain = 0;
+
+    if (gain <= -6)
+        bw_gain = -3;
+    else if(gain > -6 && gain < 6)
+        bw_gain = gain * 0.3;
+    else if(gain >= 6)
+        bw_gain = 3;
+
+    return bw_gain;
+}
+
+static inline double hz_2_rad(double x, double fs)
+{
+    return 2 * M_PI * x / fs;
+}
+
+static void equalizer(EqualizatorFilter *f, double sample_rate)
+{
+    double w0 = hz_2_rad(f->freq,  sample_rate);
+    double wb = hz_2_rad(f->width, sample_rate);
+    double bw_gain;
+
+    switch (f->type) {
+    case BUTTERWORTH:
+        bw_gain = butterworth_compute_bw_gain_db(f->gain);
+        butterworth_bp_filter(f, FILTER_ORDER, w0, wb, f->gain, bw_gain, 0);
+        break;
+    case CHEBYSHEV1:
+        bw_gain = chebyshev1_compute_bw_gain_db(f->gain);
+        chebyshev1_bp_filter(f, FILTER_ORDER, w0, wb, f->gain, bw_gain, 0);
+        break;
+    case CHEBYSHEV2:
+        bw_gain = chebyshev2_compute_bw_gain_db(f->gain);
+        chebyshev2_bp_filter(f, FILTER_ORDER, w0, wb, f->gain, bw_gain, 0);
+        break;
+    }
+
+}
+
+static int add_filter(AudioNEqualizerContext *s, AVFilterLink *inlink)
+{
+    equalizer(&s->filters[s->nb_filters], inlink->sample_rate);
+    if (s->nb_filters >= s->nb_allocated) {
+        EqualizatorFilter *filters;
+
+        filters = av_calloc(s->nb_allocated, 2 * sizeof(*s->filters));
+        if (!filters)
+            return AVERROR(ENOMEM);
+        memcpy(filters, s->filters, sizeof(*s->filters) * s->nb_allocated);
+        av_free(s->filters);
+        s->filters = filters;
+        s->nb_allocated *= 2;
+    }
+    s->nb_filters++;
+
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioNEqualizerContext *s = ctx->priv;
+    char *args = av_strdup(s->args);
+    char *saveptr = NULL;
+    int ret = 0;
+
+    if (!args)
+        return AVERROR(ENOMEM);
+
+    s->nb_allocated = 32 * inlink->channels;
+    s->filters = av_calloc(inlink->channels, 32 * sizeof(*s->filters));
+    if (!s->filters) {
+        s->nb_allocated = 0;
+        av_free(args);
+        return AVERROR(ENOMEM);
+    }
+
+    while (1) {
+        char *arg = av_strtok(s->nb_filters == 0 ? args : NULL, "|", &saveptr);
+
+        if (!arg)
+            break;
+
+        s->filters[s->nb_filters].type = 0;
+        if (sscanf(arg, "c%d f=%lf w=%lf g=%lf t=%d", &s->filters[s->nb_filters].channel,
+                                                     &s->filters[s->nb_filters].freq,
+                                                     &s->filters[s->nb_filters].width,
+                                                     &s->filters[s->nb_filters].gain,
+                                                     &s->filters[s->nb_filters].type) != 5 &&
+            sscanf(arg, "c%d f=%lf w=%lf g=%lf", &s->filters[s->nb_filters].channel,
+                                                &s->filters[s->nb_filters].freq,
+                                                &s->filters[s->nb_filters].width,
+                                                &s->filters[s->nb_filters].gain) != 4 ) {
+            av_free(args);
+            return AVERROR(EINVAL);
+        }
+
+        if (s->filters[s->nb_filters].freq < 0 ||
+            s->filters[s->nb_filters].freq > inlink->sample_rate / 2.0)
+            s->filters[s->nb_filters].ignore = 1;
+
+        if (s->filters[s->nb_filters].channel < 0 ||
+            s->filters[s->nb_filters].channel >= inlink->channels)
+            s->filters[s->nb_filters].ignore = 1;
+
+        s->filters[s->nb_filters].type = av_clip(s->filters[s->nb_filters].type, 0, NB_TYPES - 1);
+        ret = add_filter(s, inlink);
+        if (ret < 0)
+            break;
+    }
+
+    av_free(args);
+
+    return ret;
+}
+
+static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
+                           char *res, int res_len, int flags)
+{
+    AudioNEqualizerContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    int ret = AVERROR(ENOSYS);
+
+    if (!strcmp(cmd, "change")) {
+        double freq, width, gain;
+        int filter;
+
+        if (sscanf(args, "%d|f=%lf|w=%lf|g=%lf", &filter, &freq, &width, &gain) != 4)
+            return AVERROR(EINVAL);
+
+        if (filter < 0 || filter >= s->nb_filters)
+            return AVERROR(EINVAL);
+
+        if (freq < 0 || freq > inlink->sample_rate / 2.0)
+            return AVERROR(EINVAL);
+
+        s->filters[filter].freq  = freq;
+        s->filters[filter].width = width;
+        s->filters[filter].gain  = gain;
+        equalizer(&s->filters[filter], inlink->sample_rate);
+        if (s->draw_curves)
+            draw_curves(ctx, inlink, s->video);
+
+        ret = 0;
+    }
+
+    return ret;
+}
+
+static inline double section_process(FoSection *S, double in)
+{
+    double out;
+
+    out = S->b0 * in;
+    out+= S->b1 * S->num[0] - S->denum[0] * S->a1;
+    out+= S->b2 * S->num[1] - S->denum[1] * S->a2;
+    out+= S->b3 * S->num[2] - S->denum[2] * S->a3;
+    out+= S->b4 * S->num[3] - S->denum[3] * S->a4;
+
+    S->num[3] = S->num[2];
+    S->num[2] = S->num[1];
+    S->num[1] = S->num[0];
+    S->num[0] = in;
+
+    S->denum[3] = S->denum[2];
+    S->denum[2] = S->denum[1];
+    S->denum[1] = S->denum[0];
+    S->denum[0] = out;
+
+    return out;
+}
+
+static double process_sample(FoSection *s1, double in)
+{
+    double p0 = in, p1;
+    int i;
+
+    for (i = 0; i < FILTER_ORDER / 2; i++) {
+        p1 = section_process(&s1[i], p0);
+        p0 = p1;
+    }
+
+    return p1;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioNEqualizerContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    double *bptr;
+    int i, n;
+
+    for (i = 0; i < s->nb_filters; i++) {
+        EqualizatorFilter *f = &s->filters[i];
+
+        if (f->gain == 0. || f->ignore)
+            continue;
+
+        bptr = (double *)buf->extended_data[f->channel];
+        for (n = 0; n < buf->nb_samples; n++) {
+            double sample = bptr[n];
+
+            sample  = process_sample(f->section, sample);
+            bptr[n] = sample;
+        }
+    }
+
+    if (s->draw_curves) {
+        const int64_t pts = buf->pts +
+            av_rescale_q(buf->nb_samples, (AVRational){ 1, inlink->sample_rate },
+                         outlink->time_base);
+        int ret;
+
+        s->video->pts = pts;
+        ret = ff_filter_frame(ctx->outputs[1], av_frame_clone(s->video));
+        if (ret < 0)
+            return ret;
+    }
+
+    return ff_filter_frame(outlink, buf);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_AUDIO,
+        .config_props   = config_input,
+        .filter_frame   = filter_frame,
+        .needs_writable = 1,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_anequalizer = {
+    .name          = "anequalizer",
+    .description   = NULL_IF_CONFIG_SMALL("Apply high-order audio parametric multi band equalizer."),
+    .priv_size     = sizeof(AudioNEqualizerContext),
+    .priv_class    = &anequalizer_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = inputs,
+    .outputs       = NULL,
+    .flags         = AVFILTER_FLAG_DYNAMIC_OUTPUTS,
+    .process_command = process_command,
+};
diff --git a/libavfilter/af_apad.c b/libavfilter/af_apad.c
index eafc7050..0a2d4206 100644
--- a/libavfilter/af_apad.c
+++ b/libavfilter/af_apad.c
@@ -57,15 +57,15 @@ AVFILTER_DEFINE_CLASS(apad);
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    APadContext *apad = ctx->priv;
+    APadContext *s = ctx->priv;
 
-    apad->next_pts = AV_NOPTS_VALUE;
-    if (apad->whole_len >= 0 && apad->pad_len >= 0) {
+    s->next_pts = AV_NOPTS_VALUE;
+    if (s->whole_len >= 0 && s->pad_len >= 0) {
         av_log(ctx, AV_LOG_ERROR, "Both whole and pad length are set, this is not possible\n");
         return AVERROR(EINVAL);
     }
-    apad->pad_len_left   = apad->pad_len;
-    apad->whole_len_left = apad->whole_len;
+    s->pad_len_left   = s->pad_len;
+    s->whole_len_left = s->whole_len;
 
     return 0;
 }
@@ -73,38 +73,38 @@ static av_cold int init(AVFilterContext *ctx)
 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
     AVFilterContext *ctx = inlink->dst;
-    APadContext *apad = ctx->priv;
+    APadContext *s = ctx->priv;
 
-    if (apad->whole_len >= 0) {
-        apad->whole_len_left = FFMAX(apad->whole_len_left - frame->nb_samples, 0);
+    if (s->whole_len >= 0) {
+        s->whole_len_left = FFMAX(s->whole_len_left - frame->nb_samples, 0);
         av_log(ctx, AV_LOG_DEBUG,
-               "n_out:%d whole_len_left:%"PRId64"\n", frame->nb_samples, apad->whole_len_left);
+               "n_out:%d whole_len_left:%"PRId64"\n", frame->nb_samples, s->whole_len_left);
     }
 
-    apad->next_pts = frame->pts + av_rescale_q(frame->nb_samples, (AVRational){1, inlink->sample_rate}, inlink->time_base);
+    s->next_pts = frame->pts + av_rescale_q(frame->nb_samples, (AVRational){1, inlink->sample_rate}, inlink->time_base);
     return ff_filter_frame(ctx->outputs[0], frame);
 }
 
 static int request_frame(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    APadContext *apad = ctx->priv;
+    APadContext *s = ctx->priv;
     int ret;
 
     ret = ff_request_frame(ctx->inputs[0]);
 
     if (ret == AVERROR_EOF && !ctx->is_disabled) {
-        int n_out = apad->packet_size;
+        int n_out = s->packet_size;
         AVFrame *outsamplesref;
 
-        if (apad->whole_len >= 0 && apad->pad_len < 0) {
-            apad->pad_len = apad->pad_len_left = apad->whole_len_left;
+        if (s->whole_len >= 0 && s->pad_len < 0) {
+            s->pad_len = s->pad_len_left = s->whole_len_left;
         }
-        if (apad->pad_len >=0 || apad->whole_len >= 0) {
-            n_out = FFMIN(n_out, apad->pad_len_left);
-            apad->pad_len_left -= n_out;
+        if (s->pad_len >=0 || s->whole_len >= 0) {
+            n_out = FFMIN(n_out, s->pad_len_left);
+            s->pad_len_left -= n_out;
             av_log(ctx, AV_LOG_DEBUG,
-                   "padding n_out:%d pad_len_left:%"PRId64"\n", n_out, apad->pad_len_left);
+                   "padding n_out:%d pad_len_left:%"PRId64"\n", n_out, s->pad_len_left);
         }
 
         if (!n_out)
@@ -122,9 +122,9 @@ static int request_frame(AVFilterLink *outlink)
                                av_frame_get_channels(outsamplesref),
                                outsamplesref->format);
 
-        outsamplesref->pts = apad->next_pts;
-        if (apad->next_pts != AV_NOPTS_VALUE)
-            apad->next_pts += av_rescale_q(n_out, (AVRational){1, outlink->sample_rate}, outlink->time_base);
+        outsamplesref->pts = s->next_pts;
+        if (s->next_pts != AV_NOPTS_VALUE)
+            s->next_pts += av_rescale_q(n_out, (AVRational){1, outlink->sample_rate}, outlink->time_base);
 
         return ff_filter_frame(outlink, outsamplesref);
     }
diff --git a/libavfilter/af_aphaser.c b/libavfilter/af_aphaser.c
index 582f6e79..33ecb1a7 100644
--- a/libavfilter/af_aphaser.c
+++ b/libavfilter/af_aphaser.c
@@ -47,7 +47,7 @@ typedef struct AudioPhaserContext {
 
     int delay_pos, modulation_pos;
 
-    void (*phaser)(struct AudioPhaserContext *p,
+    void (*phaser)(struct AudioPhaserContext *s,
                    uint8_t * const *src, uint8_t **dst,
                    int nb_samples, int channels);
 } AudioPhaserContext;
@@ -73,11 +73,11 @@ AVFILTER_DEFINE_CLASS(aphaser);
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    AudioPhaserContext *p = ctx->priv;
+    AudioPhaserContext *s = ctx->priv;
 
-    if (p->in_gain > (1 - p->decay * p->decay))
+    if (s->in_gain > (1 - s->decay * s->decay))
         av_log(ctx, AV_LOG_WARNING, "in_gain may cause clipping\n");
-    if (p->in_gain / (1 - p->decay) > 1 / p->out_gain)
+    if (s->in_gain / (1 - s->decay) > 1 / s->out_gain)
         av_log(ctx, AV_LOG_WARNING, "out_gain may cause clipping\n");
 
     return 0;
@@ -96,7 +96,7 @@ static int query_formats(AVFilterContext *ctx)
     };
     int ret;
 
-    layouts = ff_all_channel_layouts();
+    layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
@@ -119,75 +119,75 @@ static int query_formats(AVFilterContext *ctx)
 #define MOD(a, b) (((a) >= (b)) ? (a) - (b) : (a))
 
 #define PHASER_PLANAR(name, type)                                      \
-static void phaser_## name ##p(AudioPhaserContext *p,                  \
-                               uint8_t * const *src, uint8_t **dst,    \
+static void phaser_## name ##p(AudioPhaserContext *s,                  \
+                               uint8_t * const *ssrc, uint8_t **ddst,  \
                                int nb_samples, int channels)           \
 {                                                                      \
     int i, c, delay_pos, modulation_pos;                               \
                                                                        \
     av_assert0(channels > 0);                                          \
     for (c = 0; c < channels; c++) {                                   \
-        type *s = (type *)src[c];                                      \
-        type *d = (type *)dst[c];                                      \
-        double *buffer = p->delay_buffer +                             \
-                         c * p->delay_buffer_length;                   \
+        type *src = (type *)ssrc[c];                                   \
+        type *dst = (type *)ddst[c];                                   \
+        double *buffer = s->delay_buffer +                             \
+                         c * s->delay_buffer_length;                   \
                                                                        \
-        delay_pos      = p->delay_pos;                                 \
-        modulation_pos = p->modulation_pos;                            \
+        delay_pos      = s->delay_pos;                                 \
+        modulation_pos = s->modulation_pos;                            \
                                                                        \
-        for (i = 0; i < nb_samples; i++, s++, d++) {                   \
-            double v = *s * p->in_gain + buffer[                       \
-                       MOD(delay_pos + p->modulation_buffer[           \
+        for (i = 0; i < nb_samples; i++, src++, dst++) {               \
+            double v = *src * s->in_gain + buffer[                     \
+                       MOD(delay_pos + s->modulation_buffer[           \
                        modulation_pos],                                \
-                       p->delay_buffer_length)] * p->decay;            \
+                       s->delay_buffer_length)] * s->decay;            \
                                                                        \
             modulation_pos = MOD(modulation_pos + 1,                   \
-                             p->modulation_buffer_length);             \
-            delay_pos = MOD(delay_pos + 1, p->delay_buffer_length);    \
+                             s->modulation_buffer_length);             \
+            delay_pos = MOD(delay_pos + 1, s->delay_buffer_length);    \
             buffer[delay_pos] = v;                                     \
                                                                        \
-            *d = v * p->out_gain;                                      \
+            *dst = v * s->out_gain;                                    \
         }                                                              \
     }                                                                  \
                                                                        \
-    p->delay_pos      = delay_pos;                                     \
-    p->modulation_pos = modulation_pos;                                \
+    s->delay_pos      = delay_pos;                                     \
+    s->modulation_pos = modulation_pos;                                \
 }
 
 #define PHASER(name, type)                                              \
-static void phaser_## name (AudioPhaserContext *p,                      \
-                            uint8_t * const *src, uint8_t **dst,        \
+static void phaser_## name (AudioPhaserContext *s,                      \
+                            uint8_t * const *ssrc, uint8_t **ddst,      \
                             int nb_samples, int channels)               \
 {                                                                       \
     int i, c, delay_pos, modulation_pos;                                \
-    type *s = (type *)src[0];                                           \
-    type *d = (type *)dst[0];                                           \
-    double *buffer = p->delay_buffer;                                   \
+    type *src = (type *)ssrc[0];                                        \
+    type *dst = (type *)ddst[0];                                        \
+    double *buffer = s->delay_buffer;                                   \
                                                                         \
-    delay_pos      = p->delay_pos;                                      \
-    modulation_pos = p->modulation_pos;                                 \
+    delay_pos      = s->delay_pos;                                      \
+    modulation_pos = s->modulation_pos;                                 \
                                                                         \
     for (i = 0; i < nb_samples; i++) {                                  \
-        int pos = MOD(delay_pos + p->modulation_buffer[modulation_pos], \
-                   p->delay_buffer_length) * channels;                  \
+        int pos = MOD(delay_pos + s->modulation_buffer[modulation_pos], \
+                      s->delay_buffer_length) * channels;               \
         int npos;                                                       \
                                                                         \
-        delay_pos = MOD(delay_pos + 1, p->delay_buffer_length);         \
+        delay_pos = MOD(delay_pos + 1, s->delay_buffer_length);         \
         npos = delay_pos * channels;                                    \
-        for (c = 0; c < channels; c++, s++, d++) {                      \
-            double v = *s * p->in_gain + buffer[pos + c] * p->decay;    \
+        for (c = 0; c < channels; c++, src++, dst++) {                  \
+            double v = *src * s->in_gain + buffer[pos + c] * s->decay;  \
                                                                         \
             buffer[npos + c] = v;                                       \
                                                                         \
-            *d = v * p->out_gain;                                       \
+            *dst = v * s->out_gain;                                     \
         }                                                               \
                                                                         \
         modulation_pos = MOD(modulation_pos + 1,                        \
-                         p->modulation_buffer_length);                  \
+                         s->modulation_buffer_length);                  \
     }                                                                   \
                                                                         \
-    p->delay_pos      = delay_pos;                                      \
-    p->modulation_pos = modulation_pos;                                 \
+    s->delay_pos      = delay_pos;                                      \
+    s->modulation_pos = modulation_pos;                                 \
 }
 
 PHASER_PLANAR(dbl, double)
@@ -202,36 +202,36 @@ PHASER(s32, int32_t)
 
 static int config_output(AVFilterLink *outlink)
 {
-    AudioPhaserContext *p = outlink->src->priv;
+    AudioPhaserContext *s = outlink->src->priv;
     AVFilterLink *inlink = outlink->src->inputs[0];
 
-    p->delay_buffer_length = p->delay * 0.001 * inlink->sample_rate + 0.5;
-    if (p->delay_buffer_length <= 0) {
+    s->delay_buffer_length = s->delay * 0.001 * inlink->sample_rate + 0.5;
+    if (s->delay_buffer_length <= 0) {
         av_log(outlink->src, AV_LOG_ERROR, "delay is too small\n");
         return AVERROR(EINVAL);
     }
-    p->delay_buffer = av_calloc(p->delay_buffer_length, sizeof(*p->delay_buffer) * inlink->channels);
-    p->modulation_buffer_length = inlink->sample_rate / p->speed + 0.5;
-    p->modulation_buffer = av_malloc_array(p->modulation_buffer_length, sizeof(*p->modulation_buffer));
+    s->delay_buffer = av_calloc(s->delay_buffer_length, sizeof(*s->delay_buffer) * inlink->channels);
+    s->modulation_buffer_length = inlink->sample_rate / s->speed + 0.5;
+    s->modulation_buffer = av_malloc_array(s->modulation_buffer_length, sizeof(*s->modulation_buffer));
 
-    if (!p->modulation_buffer || !p->delay_buffer)
+    if (!s->modulation_buffer || !s->delay_buffer)
         return AVERROR(ENOMEM);
 
-    ff_generate_wave_table(p->type, AV_SAMPLE_FMT_S32,
-                           p->modulation_buffer, p->modulation_buffer_length,
-                           1., p->delay_buffer_length, M_PI / 2.0);
+    ff_generate_wave_table(s->type, AV_SAMPLE_FMT_S32,
+                           s->modulation_buffer, s->modulation_buffer_length,
+                           1., s->delay_buffer_length, M_PI / 2.0);
 
-    p->delay_pos = p->modulation_pos = 0;
+    s->delay_pos = s->modulation_pos = 0;
 
     switch (inlink->format) {
-    case AV_SAMPLE_FMT_DBL:  p->phaser = phaser_dbl;  break;
-    case AV_SAMPLE_FMT_DBLP: p->phaser = phaser_dblp; break;
-    case AV_SAMPLE_FMT_FLT:  p->phaser = phaser_flt;  break;
-    case AV_SAMPLE_FMT_FLTP: p->phaser = phaser_fltp; break;
-    case AV_SAMPLE_FMT_S16:  p->phaser = phaser_s16;  break;
-    case AV_SAMPLE_FMT_S16P: p->phaser = phaser_s16p; break;
-    case AV_SAMPLE_FMT_S32:  p->phaser = phaser_s32;  break;
-    case AV_SAMPLE_FMT_S32P: p->phaser = phaser_s32p; break;
+    case AV_SAMPLE_FMT_DBL:  s->phaser = phaser_dbl;  break;
+    case AV_SAMPLE_FMT_DBLP: s->phaser = phaser_dblp; break;
+    case AV_SAMPLE_FMT_FLT:  s->phaser = phaser_flt;  break;
+    case AV_SAMPLE_FMT_FLTP: s->phaser = phaser_fltp; break;
+    case AV_SAMPLE_FMT_S16:  s->phaser = phaser_s16;  break;
+    case AV_SAMPLE_FMT_S16P: s->phaser = phaser_s16p; break;
+    case AV_SAMPLE_FMT_S32:  s->phaser = phaser_s32;  break;
+    case AV_SAMPLE_FMT_S32P: s->phaser = phaser_s32p; break;
     default: av_assert0(0);
     }
 
@@ -240,7 +240,7 @@ static int config_output(AVFilterLink *outlink)
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *inbuf)
 {
-    AudioPhaserContext *p = inlink->dst->priv;
+    AudioPhaserContext *s = inlink->dst->priv;
     AVFilterLink *outlink = inlink->dst->outputs[0];
     AVFrame *outbuf;
 
@@ -253,7 +253,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inbuf)
         av_frame_copy_props(outbuf, inbuf);
     }
 
-    p->phaser(p, inbuf->extended_data, outbuf->extended_data,
+    s->phaser(s, inbuf->extended_data, outbuf->extended_data,
               outbuf->nb_samples, av_frame_get_channels(outbuf));
 
     if (inbuf != outbuf)
@@ -264,10 +264,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inbuf)
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    AudioPhaserContext *p = ctx->priv;
+    AudioPhaserContext *s = ctx->priv;
 
-    av_freep(&p->delay_buffer);
-    av_freep(&p->modulation_buffer);
+    av_freep(&s->delay_buffer);
+    av_freep(&s->modulation_buffer);
 }
 
 static const AVFilterPad aphaser_inputs[] = {
diff --git a/libavfilter/af_apulsator.c b/libavfilter/af_apulsator.c
new file mode 100644
index 00000000..802b8d02
--- /dev/null
+++ b/libavfilter/af_apulsator.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (c) 2001-2010 Krzysztof Foltman, Markus Schmidt, Thor Harald Johansen and others
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "audio.h"
+
+enum PulsatorModes { SINE, TRIANGLE, SQUARE, SAWUP, SAWDOWN, NB_MODES };
+enum PulsatorTimings { UNIT_BPM, UNIT_MS, UNIT_HZ, NB_TIMINGS };
+
+typedef struct SimpleLFO {
+    double phase;
+    double freq;
+    double offset;
+    double amount;
+    double pwidth;
+    int mode;
+    int srate;
+} SimpleLFO;
+
+typedef struct AudioPulsatorContext {
+    const AVClass *class;
+    int mode;
+    double level_in;
+    double level_out;
+    double amount;
+    double offset_l;
+    double offset_r;
+    double pwidth;
+    double bpm;
+    double hz;
+    int ms;
+    int timing;
+
+    SimpleLFO lfoL, lfoR;
+} AudioPulsatorContext;
+
+#define OFFSET(x) offsetof(AudioPulsatorContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption apulsator_options[] = {
+    { "level_in",   "set input gain", OFFSET(level_in),  AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.015625, 64, FLAGS, },
+    { "level_out", "set output gain", OFFSET(level_out), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.015625, 64, FLAGS, },
+    { "mode",             "set mode", OFFSET(mode),      AV_OPT_TYPE_INT,    {.i64=SINE}, SINE,   NB_MODES-1, FLAGS, "mode" },
+    {   "sine",                 NULL, 0,                 AV_OPT_TYPE_CONST,  {.i64=SINE},    0,            0, FLAGS, "mode" },
+    {   "triangle",             NULL, 0,                 AV_OPT_TYPE_CONST,  {.i64=TRIANGLE},0,            0, FLAGS, "mode" },
+    {   "square",               NULL, 0,                 AV_OPT_TYPE_CONST,  {.i64=SQUARE},  0,            0, FLAGS, "mode" },
+    {   "sawup",                NULL, 0,                 AV_OPT_TYPE_CONST,  {.i64=SAWUP},   0,            0, FLAGS, "mode" },
+    {   "sawdown",              NULL, 0,                 AV_OPT_TYPE_CONST,  {.i64=SAWDOWN}, 0,            0, FLAGS, "mode" },
+    { "amount",     "set modulation", OFFSET(amount),    AV_OPT_TYPE_DOUBLE, {.dbl=1},       0,            1, FLAGS },
+    { "offset_l",     "set offset L", OFFSET(offset_l),  AV_OPT_TYPE_DOUBLE, {.dbl=0},       0,            1, FLAGS },
+    { "offset_r",     "set offset R", OFFSET(offset_r),  AV_OPT_TYPE_DOUBLE, {.dbl=.5},      0,            1, FLAGS },
+    { "width",     "set pulse width", OFFSET(pwidth),    AV_OPT_TYPE_DOUBLE, {.dbl=1},       0,            2, FLAGS },
+    { "timing",         "set timing", OFFSET(timing),    AV_OPT_TYPE_INT,    {.i64=2},       0, NB_TIMINGS-1, FLAGS, "timing" },
+    {   "bpm",                  NULL, 0,                 AV_OPT_TYPE_CONST,  {.i64=UNIT_BPM},  0,          0, FLAGS, "timing" },
+    {   "ms",                   NULL, 0,                 AV_OPT_TYPE_CONST,  {.i64=UNIT_MS},   0,          0, FLAGS, "timing" },
+    {   "hz",                   NULL, 0,                 AV_OPT_TYPE_CONST,  {.i64=UNIT_HZ},   0,          0, FLAGS, "timing" },
+    { "bpm",               "set BPM", OFFSET(bpm),       AV_OPT_TYPE_DOUBLE, {.dbl=120},    30,          300, FLAGS },
+    { "ms",                 "set ms", OFFSET(ms),        AV_OPT_TYPE_INT,    {.i64=500},    10,         2000, FLAGS },
+    { "hz",          "set frequency", OFFSET(hz),        AV_OPT_TYPE_DOUBLE, {.dbl=2},    0.01,          100, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(apulsator);
+
+static void lfo_advance(SimpleLFO *lfo, unsigned count)
+{
+    lfo->phase = fabs(lfo->phase + count * lfo->freq / lfo->srate);
+    if (lfo->phase >= 1)
+        lfo->phase = fmod(lfo->phase, 1);
+}
+
+static double lfo_get_value(SimpleLFO *lfo)
+{
+    double phs = FFMIN(100, lfo->phase / FFMIN(1.99, FFMAX(0.01, lfo->pwidth)) + lfo->offset);
+    double val;
+
+    if (phs > 1)
+        phs = fmod(phs, 1.);
+
+    switch (lfo->mode) {
+    case SINE:
+        val = sin(phs * 2 * M_PI);
+        break;
+    case TRIANGLE:
+        if (phs > 0.75)
+            val = (phs - 0.75) * 4 - 1;
+        else if (phs > 0.25)
+            val = -4 * phs + 2;
+        else
+            val = phs * 4;
+        break;
+    case SQUARE:
+        val = phs < 0.5 ? -1 : +1;
+        break;
+    case SAWUP:
+        val = phs * 2 - 1;
+        break;
+    case SAWDOWN:
+        val = 1 - phs * 2;
+        break;
+    default: av_assert0(0);
+    }
+
+    return val * lfo->amount;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioPulsatorContext *s = ctx->priv;
+    const double *src = (const double *)in->data[0];
+    const int nb_samples = in->nb_samples;
+    const double level_out = s->level_out;
+    const double level_in = s->level_in;
+    const double amount = s->amount;
+    AVFrame *out;
+    double *dst;
+    int n;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+    dst = (double *)out->data[0];
+
+    for (n = 0; n < nb_samples; n++) {
+        double outL;
+        double outR;
+        double inL = src[0] * level_in;
+        double inR = src[1] * level_in;
+        double procL = inL;
+        double procR = inR;
+
+        procL *= lfo_get_value(&s->lfoL) * 0.5 + amount / 2;
+        procR *= lfo_get_value(&s->lfoR) * 0.5 + amount / 2;
+
+        outL = procL + inL * (1 - amount);
+        outR = procR + inR * (1 - amount);
+
+        outL *= level_out;
+        outR *= level_out;
+
+        dst[0] = outL;
+        dst[1] = outR;
+
+        lfo_advance(&s->lfoL, 1);
+        lfo_advance(&s->lfoR, 1);
+
+        dst += 2;
+        src += 2;
+    }
+
+    if (in != out)
+        av_frame_free(&in);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterChannelLayouts *layout = NULL;
+    AVFilterFormats *formats = NULL;
+    int ret;
+
+    if ((ret = ff_add_format                 (&formats, AV_SAMPLE_FMT_DBL  )) < 0 ||
+        (ret = ff_set_common_formats         (ctx     , formats            )) < 0 ||
+        (ret = ff_add_channel_layout         (&layout , AV_CH_LAYOUT_STEREO)) < 0 ||
+        (ret = ff_set_common_channel_layouts (ctx     , layout             )) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioPulsatorContext *s = ctx->priv;
+    double freq;
+
+    switch (s->timing) {
+    case UNIT_BPM:  freq = s->bpm / 60;         break;
+    case UNIT_MS:   freq = 1 / (s->ms / 1000.); break;
+    case UNIT_HZ:   freq = s->hz;               break;
+    default: av_assert0(0);
+    }
+
+    s->lfoL.freq   = freq;
+    s->lfoR.freq   = freq;
+    s->lfoL.mode   = s->mode;
+    s->lfoR.mode   = s->mode;
+    s->lfoL.offset = s->offset_l;
+    s->lfoR.offset = s->offset_r;
+    s->lfoL.srate  = inlink->sample_rate;
+    s->lfoR.srate  = inlink->sample_rate;
+    s->lfoL.amount = s->amount;
+    s->lfoR.amount = s->amount;
+    s->lfoL.pwidth = s->pwidth;
+    s->lfoR.pwidth = s->pwidth;
+
+    return 0;
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_apulsator = {
+    .name          = "apulsator",
+    .description   = NULL_IF_CONFIG_SMALL("Audio pulsator."),
+    .priv_size     = sizeof(AudioPulsatorContext),
+    .priv_class    = &apulsator_class,
+    .query_formats = query_formats,
+    .inputs        = inputs,
+    .outputs       = outputs,
+};
diff --git a/libavfilter/af_aresample.c b/libavfilter/af_aresample.c
index 6cb765df..028e1053 100644
--- a/libavfilter/af_aresample.c
+++ b/libavfilter/af_aresample.c
@@ -40,7 +40,6 @@ typedef struct {
     double ratio;
     struct SwrContext *swr;
     int64_t next_pts;
-    int req_fullfilled;
     int more_data;
 } AResampleContext;
 
@@ -80,9 +79,8 @@ static av_cold void uninit(AVFilterContext *ctx)
 static int query_formats(AVFilterContext *ctx)
 {
     AResampleContext *aresample = ctx->priv;
-    int out_rate                   = av_get_int(aresample->swr, "osr", NULL);
-    uint64_t out_layout            = av_get_int(aresample->swr, "ocl", NULL);
-    enum AVSampleFormat out_format = av_get_int(aresample->swr, "osf", NULL);
+    enum AVSampleFormat out_format;
+    int64_t out_rate, out_layout;
 
     AVFilterLink *inlink  = ctx->inputs[0];
     AVFilterLink *outlink = ctx->outputs[0];
@@ -90,22 +88,23 @@ static int query_formats(AVFilterContext *ctx)
     AVFilterFormats        *in_formats, *out_formats;
     AVFilterFormats        *in_samplerates, *out_samplerates;
     AVFilterChannelLayouts *in_layouts, *out_layouts;
+    int ret;
 
+    av_opt_get_sample_fmt(aresample->swr, "osf", 0, &out_format);
+    av_opt_get_int(aresample->swr, "osr", 0, &out_rate);
+    av_opt_get_int(aresample->swr, "ocl", 0, &out_layout);
 
     in_formats      = ff_all_formats(AVMEDIA_TYPE_AUDIO);
-    if (!in_formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref  (in_formats,      &inlink->out_formats);
+    if ((ret = ff_formats_ref(in_formats, &inlink->out_formats)) < 0)
+        return ret;
 
     in_samplerates  = ff_all_samplerates();
-    if (!in_samplerates)
-        return AVERROR(ENOMEM);
-    ff_formats_ref  (in_samplerates,  &inlink->out_samplerates);
+    if ((ret = ff_formats_ref(in_samplerates, &inlink->out_samplerates)) < 0)
+        return ret;
 
     in_layouts      = ff_all_channel_counts();
-    if (!in_layouts)
-         return AVERROR(ENOMEM);
-    ff_channel_layouts_ref(in_layouts,      &inlink->out_channel_layouts);
+    if ((ret = ff_channel_layouts_ref(in_layouts, &inlink->out_channel_layouts)) < 0)
+        return ret;
 
     if(out_rate > 0) {
         int ratelist[] = { out_rate, -1 };
@@ -113,28 +112,25 @@ static int query_formats(AVFilterContext *ctx)
     } else {
         out_samplerates = ff_all_samplerates();
     }
-    if (!out_samplerates) {
-        av_log(ctx, AV_LOG_ERROR, "Cannot allocate output samplerates.\n");
-        return AVERROR(ENOMEM);
-    }
 
-    ff_formats_ref(out_samplerates, &outlink->in_samplerates);
+    if ((ret = ff_formats_ref(out_samplerates, &outlink->in_samplerates)) < 0)
+        return ret;
 
     if(out_format != AV_SAMPLE_FMT_NONE) {
         int formatlist[] = { out_format, -1 };
         out_formats = ff_make_format_list(formatlist);
     } else
         out_formats = ff_all_formats(AVMEDIA_TYPE_AUDIO);
-    ff_formats_ref(out_formats, &outlink->in_formats);
+    if ((ret = ff_formats_ref(out_formats, &outlink->in_formats)) < 0)
+        return ret;
 
     if(out_layout) {
         int64_t layout_list[] = { out_layout, -1 };
         out_layouts = avfilter_make_format64_list(layout_list);
     } else
         out_layouts = ff_all_channel_counts();
-    ff_channel_layouts_ref(out_layouts, &outlink->in_channel_layouts);
 
-    return 0;
+    return ff_channel_layouts_ref(out_layouts, &outlink->in_channel_layouts);
 }
 
 
@@ -144,8 +140,7 @@ static int config_output(AVFilterLink *outlink)
     AVFilterContext *ctx = outlink->src;
     AVFilterLink *inlink = ctx->inputs[0];
     AResampleContext *aresample = ctx->priv;
-    int out_rate;
-    uint64_t out_layout;
+    int64_t out_rate, out_layout;
     enum AVSampleFormat out_format;
     char inchl_buf[128], outchl_buf[128];
 
@@ -164,9 +159,9 @@ static int config_output(AVFilterLink *outlink)
     if (ret < 0)
         return ret;
 
-    out_rate   = av_get_int(aresample->swr, "osr", NULL);
-    out_layout = av_get_int(aresample->swr, "ocl", NULL);
-    out_format = av_get_int(aresample->swr, "osf", NULL);
+    av_opt_get_int(aresample->swr, "osr", 0, &out_rate);
+    av_opt_get_int(aresample->swr, "ocl", 0, &out_layout);
+    av_opt_get_sample_fmt(aresample->swr, "osf", 0, &out_format);
     outlink->time_base = (AVRational) {1, out_rate};
 
     av_assert0(outlink->sample_rate == out_rate);
@@ -230,7 +225,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamplesref)
     outsamplesref->nb_samples  = n_out;
 
     ret = ff_filter_frame(outlink, outsamplesref);
-    aresample->req_fullfilled= 1;
     av_frame_free(&insamplesref);
     return ret;
 }
@@ -283,10 +277,7 @@ static int request_frame(AVFilterLink *outlink)
     aresample->more_data = 0;
 
     // Second request more data from the input
-    aresample->req_fullfilled = 0;
-    do{
-        ret = ff_request_frame(ctx->inputs[0]);
-    }while(!aresample->req_fullfilled && ret>=0);
+    ret = ff_request_frame(ctx->inputs[0]);
 
     // Third if we hit the end flush
     if (ret == AVERROR_EOF) {
diff --git a/libavfilter/af_asetnsamples.c b/libavfilter/af_asetnsamples.c
index e8306431..b5aa193c 100644
--- a/libavfilter/af_asetnsamples.c
+++ b/libavfilter/af_asetnsamples.c
@@ -47,8 +47,8 @@ typedef struct {
 static const AVOption asetnsamples_options[] = {
     { "nb_out_samples", "set the number of per-frame output samples", OFFSET(nb_out_samples), AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
     { "n",              "set the number of per-frame output samples", OFFSET(nb_out_samples), AV_OPT_TYPE_INT, {.i64=1024}, 1, INT_MAX, FLAGS },
-    { "pad", "pad last frame with zeros", OFFSET(pad), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS },
-    { "p",   "pad last frame with zeros", OFFSET(pad), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS },
+    { "pad", "pad last frame with zeros", OFFSET(pad), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
+    { "p",   "pad last frame with zeros", OFFSET(pad), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
     { NULL }
 };
 
@@ -77,7 +77,6 @@ static int config_props_output(AVFilterLink *outlink)
     asns->fifo = av_audio_fifo_alloc(outlink->format, outlink->channels, asns->nb_out_samples);
     if (!asns->fifo)
         return AVERROR(ENOMEM);
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
 
     return 0;
 }
diff --git a/libavfilter/af_asetrate.c b/libavfilter/af_asetrate.c
index 409c48fd..66febd71 100644
--- a/libavfilter/af_asetrate.c
+++ b/libavfilter/af_asetrate.c
@@ -39,8 +39,8 @@ typedef struct {
     OPT_GENERIC(name, field, def, min, max, descr, INT, i64, __VA_ARGS__)
 
 static const AVOption asetrate_options[] = {
-    OPT_INT("sample_rate", sample_rate, 44100, 1, INT_MAX, "set the sample rate"),
-    OPT_INT("r",           sample_rate, 44100, 1, INT_MAX, "set the sample rate"),
+    OPT_INT("sample_rate", sample_rate, 44100, 1, INT_MAX, "set the sample rate",),
+    OPT_INT("r",           sample_rate, 44100, 1, INT_MAX, "set the sample rate",),
     {NULL},
 };
 
diff --git a/libavfilter/af_astats.c b/libavfilter/af_astats.c
index 5780fb90..b3b8f281 100644
--- a/libavfilter/af_astats.c
+++ b/libavfilter/af_astats.c
@@ -33,6 +33,9 @@ typedef struct ChannelStats {
     double min, max;
     double min_run, max_run;
     double min_runs, max_runs;
+    double min_diff, max_diff;
+    double diff1_sum;
+    uint64_t mask;
     uint64_t min_count, max_count;
     uint64_t nb_samples;
 } ChannelStats;
@@ -44,6 +47,9 @@ typedef struct {
     uint64_t tc_samples;
     double time_constant;
     double mult;
+    int metadata;
+    int reset_count;
+    int nb_frames;
 } AudioStatsContext;
 
 #define OFFSET(x) offsetof(AudioStatsContext, x)
@@ -51,6 +57,8 @@ typedef struct {
 
 static const AVOption astats_options[] = {
     { "length", "set the window length", OFFSET(time_constant), AV_OPT_TYPE_DOUBLE, {.dbl=.05}, .01, 10, FLAGS },
+    { "metadata", "inject metadata in the filtergraph", OFFSET(metadata), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    { "reset", "recalculate stats after this many frames", OFFSET(reset_count), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
     { NULL }
 };
 
@@ -66,7 +74,7 @@ static int query_formats(AVFilterContext *ctx)
     };
     int ret;
 
-    layouts = ff_all_channel_layouts();
+    layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
@@ -86,10 +94,24 @@ static int query_formats(AVFilterContext *ctx)
     return ff_set_common_samplerates(ctx, formats);
 }
 
+static void reset_stats(AudioStatsContext *s)
+{
+    int c;
+
+    memset(s->chstats, 0, sizeof(*s->chstats));
+
+    for (c = 0; c < s->nb_channels; c++) {
+        ChannelStats *p = &s->chstats[c];
+
+        p->min = p->min_sigma_x2 = DBL_MAX;
+        p->max = p->max_sigma_x2 = DBL_MIN;
+        p->min_diff = p->max_diff = -1;
+    }
+}
+
 static int config_output(AVFilterLink *outlink)
 {
     AudioStatsContext *s = outlink->src->priv;
-    int c;
 
     s->chstats = av_calloc(sizeof(*s->chstats), outlink->channels);
     if (!s->chstats)
@@ -98,16 +120,20 @@ static int config_output(AVFilterLink *outlink)
     s->mult = exp((-1 / s->time_constant / outlink->sample_rate));
     s->tc_samples = 5 * s->time_constant * outlink->sample_rate + .5;
 
-    for (c = 0; c < s->nb_channels; c++) {
-        ChannelStats *p = &s->chstats[c];
-
-        p->min = p->min_sigma_x2 = DBL_MAX;
-        p->max = p->max_sigma_x2 = DBL_MIN;
-    }
+    reset_stats(s);
 
     return 0;
 }
 
+static unsigned bit_depth(uint64_t mask)
+{
+    unsigned result = 64;
+
+    for (; result && !(mask & 1); --result, mask >>= 1);
+
+    return result;
+}
+
 static inline void update_stat(AudioStatsContext *s, ChannelStats *p, double d)
 {
     if (d < p->min) {
@@ -137,7 +163,11 @@ static inline void update_stat(AudioStatsContext *s, ChannelStats *p, double d)
     p->sigma_x += d;
     p->sigma_x2 += d * d;
     p->avg_sigma_x2 = p->avg_sigma_x2 * s->mult + (1.0 - s->mult) * d * d;
+    p->min_diff = FFMIN(p->min_diff == -1 ? DBL_MAX : p->min_diff, fabs(d - (p->min_diff == -1 ? DBL_MAX : p->last)));
+    p->max_diff = FFMAX(p->max_diff, fabs(d - (p->max_diff == -1 ? d : p->last)));
+    p->diff1_sum += fabs(d - p->last);
     p->last = d;
+    p->mask |= llrint(d * (UINT64_C(1) << 63));
 
     if (p->nb_samples >= s->tc_samples) {
         p->max_sigma_x2 = FFMAX(p->max_sigma_x2, p->avg_sigma_x2);
@@ -146,9 +176,95 @@ static inline void update_stat(AudioStatsContext *s, ChannelStats *p, double d)
     p->nb_samples++;
 }
 
+static void set_meta(AVDictionary **metadata, int chan, const char *key,
+                     const char *fmt, double val)
+{
+    uint8_t value[128];
+    uint8_t key2[128];
+
+    snprintf(value, sizeof(value), fmt, val);
+    if (chan)
+        snprintf(key2, sizeof(key2), "lavfi.astats.%d.%s", chan, key);
+    else
+        snprintf(key2, sizeof(key2), "lavfi.astats.%s", key);
+    av_dict_set(metadata, key2, value, 0);
+}
+
+#define LINEAR_TO_DB(x) (log10(x) * 20)
+
+static void set_metadata(AudioStatsContext *s, AVDictionary **metadata)
+{
+    uint64_t mask = 0, min_count = 0, max_count = 0, nb_samples = 0;
+    double min_runs = 0, max_runs = 0,
+           min = DBL_MAX, max = DBL_MIN, min_diff = DBL_MAX, max_diff = 0,
+           max_sigma_x = 0,
+           diff1_sum = 0,
+           sigma_x = 0,
+           sigma_x2 = 0,
+           min_sigma_x2 = DBL_MAX,
+           max_sigma_x2 = DBL_MIN;
+    int c;
+
+    for (c = 0; c < s->nb_channels; c++) {
+        ChannelStats *p = &s->chstats[c];
+
+        if (p->nb_samples < s->tc_samples)
+            p->min_sigma_x2 = p->max_sigma_x2 = p->sigma_x2 / p->nb_samples;
+
+        min = FFMIN(min, p->min);
+        max = FFMAX(max, p->max);
+        min_diff = FFMIN(min_diff, p->min_diff);
+        max_diff = FFMAX(max_diff, p->max_diff);
+        diff1_sum += p->diff1_sum,
+        min_sigma_x2 = FFMIN(min_sigma_x2, p->min_sigma_x2);
+        max_sigma_x2 = FFMAX(max_sigma_x2, p->max_sigma_x2);
+        sigma_x += p->sigma_x;
+        sigma_x2 += p->sigma_x2;
+        min_count += p->min_count;
+        max_count += p->max_count;
+        min_runs += p->min_runs;
+        max_runs += p->max_runs;
+        mask |= p->mask;
+        nb_samples += p->nb_samples;
+        if (fabs(p->sigma_x) > fabs(max_sigma_x))
+            max_sigma_x = p->sigma_x;
+
+        set_meta(metadata, c + 1, "DC_offset", "%f", p->sigma_x / p->nb_samples);
+        set_meta(metadata, c + 1, "Min_level", "%f", p->min);
+        set_meta(metadata, c + 1, "Max_level", "%f", p->max);
+        set_meta(metadata, c + 1, "Min_difference", "%f", p->min_diff);
+        set_meta(metadata, c + 1, "Max_difference", "%f", p->max_diff);
+        set_meta(metadata, c + 1, "Mean_difference", "%f", p->diff1_sum / (p->nb_samples - 1));
+        set_meta(metadata, c + 1, "Peak_level", "%f", LINEAR_TO_DB(FFMAX(-p->min, p->max)));
+        set_meta(metadata, c + 1, "RMS_level", "%f", LINEAR_TO_DB(sqrt(p->sigma_x2 / p->nb_samples)));
+        set_meta(metadata, c + 1, "RMS_peak", "%f", LINEAR_TO_DB(sqrt(p->max_sigma_x2)));
+        set_meta(metadata, c + 1, "RMS_trough", "%f", LINEAR_TO_DB(sqrt(p->min_sigma_x2)));
+        set_meta(metadata, c + 1, "Crest_factor", "%f", p->sigma_x2 ? FFMAX(-p->min, p->max) / sqrt(p->sigma_x2 / p->nb_samples) : 1);
+        set_meta(metadata, c + 1, "Flat_factor", "%f", LINEAR_TO_DB((p->min_runs + p->max_runs) / (p->min_count + p->max_count)));
+        set_meta(metadata, c + 1, "Peak_count", "%f", (float)(p->min_count + p->max_count));
+        set_meta(metadata, c + 1, "Bit_depth", "%f", bit_depth(p->mask));
+    }
+
+    set_meta(metadata, 0, "Overall.DC_offset", "%f", max_sigma_x / (nb_samples / s->nb_channels));
+    set_meta(metadata, 0, "Overall.Min_level", "%f", min);
+    set_meta(metadata, 0, "Overall.Max_level", "%f", max);
+    set_meta(metadata, 0, "Overall.Min_difference", "%f", min_diff);
+    set_meta(metadata, 0, "Overall.Max_difference", "%f", max_diff);
+    set_meta(metadata, 0, "Overall.Mean_difference", "%f", diff1_sum / (nb_samples - s->nb_channels));
+    set_meta(metadata, 0, "Overall.Peak_level", "%f", LINEAR_TO_DB(FFMAX(-min, max)));
+    set_meta(metadata, 0, "Overall.RMS_level", "%f", LINEAR_TO_DB(sqrt(sigma_x2 / nb_samples)));
+    set_meta(metadata, 0, "Overall.RMS_peak", "%f", LINEAR_TO_DB(sqrt(max_sigma_x2)));
+    set_meta(metadata, 0, "Overall.RMS_trough", "%f", LINEAR_TO_DB(sqrt(min_sigma_x2)));
+    set_meta(metadata, 0, "Overall.Flat_factor", "%f", LINEAR_TO_DB((min_runs + max_runs) / (min_count + max_count)));
+    set_meta(metadata, 0, "Overall.Peak_count", "%f", (float)(min_count + max_count) / (double)s->nb_channels);
+    set_meta(metadata, 0, "Overall.Bit_depth", "%f", bit_depth(mask));
+    set_meta(metadata, 0, "Overall.Number_of_samples", "%f", nb_samples / s->nb_channels);
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
 {
     AudioStatsContext *s = inlink->dst->priv;
+    AVDictionary **metadata = avpriv_frame_get_metadatap(buf);
     const int channels = s->nb_channels;
     const double *src;
     int i, c;
@@ -173,18 +289,28 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
         break;
     }
 
+    if (s->metadata)
+        set_metadata(s, metadata);
+
+    if (s->reset_count > 0) {
+        s->nb_frames++;
+        if (s->nb_frames >= s->reset_count) {
+            reset_stats(s);
+            s->nb_frames = 0;
+        }
+    }
+
     return ff_filter_frame(inlink->dst->outputs[0], buf);
 }
 
-#define LINEAR_TO_DB(x) (log10(x) * 20)
-
 static void print_stats(AVFilterContext *ctx)
 {
     AudioStatsContext *s = ctx->priv;
-    uint64_t min_count = 0, max_count = 0, nb_samples = 0;
+    uint64_t mask = 0, min_count = 0, max_count = 0, nb_samples = 0;
     double min_runs = 0, max_runs = 0,
-           min = DBL_MAX, max = DBL_MIN,
+           min = DBL_MAX, max = DBL_MIN, min_diff = DBL_MAX, max_diff = 0,
            max_sigma_x = 0,
+           diff1_sum = 0,
            sigma_x = 0,
            sigma_x2 = 0,
            min_sigma_x2 = DBL_MAX,
@@ -199,6 +325,9 @@ static void print_stats(AVFilterContext *ctx)
 
         min = FFMIN(min, p->min);
         max = FFMAX(max, p->max);
+        min_diff = FFMIN(min_diff, p->min_diff);
+        max_diff = FFMAX(max_diff, p->max_diff);
+        diff1_sum += p->diff1_sum,
         min_sigma_x2 = FFMIN(min_sigma_x2, p->min_sigma_x2);
         max_sigma_x2 = FFMAX(max_sigma_x2, p->max_sigma_x2);
         sigma_x += p->sigma_x;
@@ -207,6 +336,7 @@ static void print_stats(AVFilterContext *ctx)
         max_count += p->max_count;
         min_runs += p->min_runs;
         max_runs += p->max_runs;
+        mask |= p->mask;
         nb_samples += p->nb_samples;
         if (fabs(p->sigma_x) > fabs(max_sigma_x))
             max_sigma_x = p->sigma_x;
@@ -215,6 +345,9 @@ static void print_stats(AVFilterContext *ctx)
         av_log(ctx, AV_LOG_INFO, "DC offset: %f\n", p->sigma_x / p->nb_samples);
         av_log(ctx, AV_LOG_INFO, "Min level: %f\n", p->min);
         av_log(ctx, AV_LOG_INFO, "Max level: %f\n", p->max);
+        av_log(ctx, AV_LOG_INFO, "Min difference: %f\n", p->min_diff);
+        av_log(ctx, AV_LOG_INFO, "Max difference: %f\n", p->max_diff);
+        av_log(ctx, AV_LOG_INFO, "Mean difference: %f\n", p->diff1_sum / (p->nb_samples - 1));
         av_log(ctx, AV_LOG_INFO, "Peak level dB: %f\n", LINEAR_TO_DB(FFMAX(-p->min, p->max)));
         av_log(ctx, AV_LOG_INFO, "RMS level dB: %f\n", LINEAR_TO_DB(sqrt(p->sigma_x2 / p->nb_samples)));
         av_log(ctx, AV_LOG_INFO, "RMS peak dB: %f\n", LINEAR_TO_DB(sqrt(p->max_sigma_x2)));
@@ -223,12 +356,16 @@ static void print_stats(AVFilterContext *ctx)
         av_log(ctx, AV_LOG_INFO, "Crest factor: %f\n", p->sigma_x2 ? FFMAX(-p->min, p->max) / sqrt(p->sigma_x2 / p->nb_samples) : 1);
         av_log(ctx, AV_LOG_INFO, "Flat factor: %f\n", LINEAR_TO_DB((p->min_runs + p->max_runs) / (p->min_count + p->max_count)));
         av_log(ctx, AV_LOG_INFO, "Peak count: %"PRId64"\n", p->min_count + p->max_count);
+        av_log(ctx, AV_LOG_INFO, "Bit depth: %u\n", bit_depth(p->mask));
     }
 
     av_log(ctx, AV_LOG_INFO, "Overall\n");
     av_log(ctx, AV_LOG_INFO, "DC offset: %f\n", max_sigma_x / (nb_samples / s->nb_channels));
     av_log(ctx, AV_LOG_INFO, "Min level: %f\n", min);
     av_log(ctx, AV_LOG_INFO, "Max level: %f\n", max);
+    av_log(ctx, AV_LOG_INFO, "Min difference: %f\n", min_diff);
+    av_log(ctx, AV_LOG_INFO, "Max difference: %f\n", max_diff);
+    av_log(ctx, AV_LOG_INFO, "Mean difference: %f\n", diff1_sum / (nb_samples - s->nb_channels));
     av_log(ctx, AV_LOG_INFO, "Peak level dB: %f\n", LINEAR_TO_DB(FFMAX(-min, max)));
     av_log(ctx, AV_LOG_INFO, "RMS level dB: %f\n", LINEAR_TO_DB(sqrt(sigma_x2 / nb_samples)));
     av_log(ctx, AV_LOG_INFO, "RMS peak dB: %f\n", LINEAR_TO_DB(sqrt(max_sigma_x2)));
@@ -236,6 +373,7 @@ static void print_stats(AVFilterContext *ctx)
         av_log(ctx, AV_LOG_INFO, "RMS trough dB: %f\n", LINEAR_TO_DB(sqrt(min_sigma_x2)));
     av_log(ctx, AV_LOG_INFO, "Flat factor: %f\n", LINEAR_TO_DB((min_runs + max_runs) / (min_count + max_count)));
     av_log(ctx, AV_LOG_INFO, "Peak count: %f\n", (min_count + max_count) / (double)s->nb_channels);
+    av_log(ctx, AV_LOG_INFO, "Bit depth: %u\n", bit_depth(mask));
     av_log(ctx, AV_LOG_INFO, "Number of samples: %"PRId64"\n", nb_samples / s->nb_channels);
 }
 
diff --git a/libavfilter/af_astreamsync.c b/libavfilter/af_astreamsync.c
deleted file mode 100644
index becfe340..00000000
--- a/libavfilter/af_astreamsync.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright (c) 2011 Nicolas George <nicolas.george@normalesup.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * Stream (de)synchronization filter
- */
-
-#include "libavutil/eval.h"
-#include "libavutil/opt.h"
-#include "avfilter.h"
-#include "audio.h"
-#include "internal.h"
-
-#define QUEUE_SIZE 16
-
-static const char * const var_names[] = {
-    "b1", "b2",
-    "s1", "s2",
-    "t1", "t2",
-    NULL
-};
-
-enum var_name {
-    VAR_B1, VAR_B2,
-    VAR_S1, VAR_S2,
-    VAR_T1, VAR_T2,
-    VAR_NB
-};
-
-typedef struct {
-    const AVClass *class;
-    AVExpr *expr;
-    char *expr_str;
-    double var_values[VAR_NB];
-    struct buf_queue {
-        AVFrame *buf[QUEUE_SIZE];
-        unsigned tail, nb;
-        /* buf[tail] is the oldest,
-           buf[(tail + nb) % QUEUE_SIZE] is where the next is added */
-    } queue[2];
-    int req[2];
-    int next_out;
-    int eof; /* bitmask, one bit for each stream */
-} AStreamSyncContext;
-
-#define OFFSET(x) offsetof(AStreamSyncContext, x)
-#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
-static const AVOption astreamsync_options[] = {
-    { "expr", "set stream selection expression", OFFSET(expr_str), AV_OPT_TYPE_STRING, { .str = "t1-t2" }, .flags = FLAGS },
-    { "e",    "set stream selection expression", OFFSET(expr_str), AV_OPT_TYPE_STRING, { .str = "t1-t2" }, .flags = FLAGS },
-    { NULL }
-};
-
-AVFILTER_DEFINE_CLASS(astreamsync);
-
-static av_cold int init(AVFilterContext *ctx)
-{
-    AStreamSyncContext *as = ctx->priv;
-    int r, i;
-
-    r = av_expr_parse(&as->expr, as->expr_str, var_names,
-                      NULL, NULL, NULL, NULL, 0, ctx);
-    if (r < 0) {
-        av_log(ctx, AV_LOG_ERROR, "Error in expression \"%s\"\n", as->expr_str);
-        return r;
-    }
-    for (i = 0; i < 42; i++)
-        av_expr_eval(as->expr, as->var_values, NULL); /* exercize prng */
-    return 0;
-}
-
-static int query_formats(AVFilterContext *ctx)
-{
-    int i;
-    AVFilterFormats *formats, *rates;
-    AVFilterChannelLayouts *layouts;
-
-    for (i = 0; i < 2; i++) {
-        formats = ctx->inputs[i]->in_formats;
-        ff_formats_ref(formats, &ctx->inputs[i]->out_formats);
-        ff_formats_ref(formats, &ctx->outputs[i]->in_formats);
-        rates = ff_all_samplerates();
-        ff_formats_ref(rates, &ctx->inputs[i]->out_samplerates);
-        ff_formats_ref(rates, &ctx->outputs[i]->in_samplerates);
-        layouts = ctx->inputs[i]->in_channel_layouts;
-        ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts);
-        ff_channel_layouts_ref(layouts, &ctx->outputs[i]->in_channel_layouts);
-    }
-    return 0;
-}
-
-static int config_output(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    int id = outlink == ctx->outputs[1];
-
-    outlink->sample_rate = ctx->inputs[id]->sample_rate;
-    outlink->time_base   = ctx->inputs[id]->time_base;
-    return 0;
-}
-
-static int send_out(AVFilterContext *ctx, int out_id)
-{
-    AStreamSyncContext *as = ctx->priv;
-    struct buf_queue *queue = &as->queue[out_id];
-    AVFrame *buf = queue->buf[queue->tail];
-    int ret;
-
-    queue->buf[queue->tail] = NULL;
-    as->var_values[VAR_B1 + out_id]++;
-    as->var_values[VAR_S1 + out_id] += buf->nb_samples;
-    if (buf->pts != AV_NOPTS_VALUE)
-        as->var_values[VAR_T1 + out_id] =
-            av_q2d(ctx->outputs[out_id]->time_base) * buf->pts;
-    as->var_values[VAR_T1 + out_id] += buf->nb_samples /
-                                   (double)ctx->inputs[out_id]->sample_rate;
-    ret = ff_filter_frame(ctx->outputs[out_id], buf);
-    queue->nb--;
-    queue->tail = (queue->tail + 1) % QUEUE_SIZE;
-    if (as->req[out_id])
-        as->req[out_id]--;
-    return ret;
-}
-
-static void send_next(AVFilterContext *ctx)
-{
-    AStreamSyncContext *as = ctx->priv;
-    int i;
-
-    while (1) {
-        if (!as->queue[as->next_out].nb)
-            break;
-        send_out(ctx, as->next_out);
-        if (!as->eof)
-            as->next_out = av_expr_eval(as->expr, as->var_values, NULL) >= 0;
-    }
-    for (i = 0; i < 2; i++)
-        if (as->queue[i].nb == QUEUE_SIZE)
-            send_out(ctx, i);
-}
-
-static int request_frame(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    AStreamSyncContext *as = ctx->priv;
-    int id = outlink == ctx->outputs[1];
-
-    as->req[id]++;
-    while (as->req[id] && !(as->eof & (1 << id))) {
-        if (as->queue[as->next_out].nb) {
-            send_next(ctx);
-        } else {
-            as->eof |= 1 << as->next_out;
-            ff_request_frame(ctx->inputs[as->next_out]);
-            if (as->eof & (1 << as->next_out))
-                as->next_out = !as->next_out;
-        }
-    }
-    return 0;
-}
-
-static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
-{
-    AVFilterContext *ctx = inlink->dst;
-    AStreamSyncContext *as = ctx->priv;
-    int id = inlink == ctx->inputs[1];
-
-    as->queue[id].buf[(as->queue[id].tail + as->queue[id].nb++) % QUEUE_SIZE] =
-        insamples;
-    as->eof &= ~(1 << id);
-    send_next(ctx);
-    return 0;
-}
-
-static av_cold void uninit(AVFilterContext *ctx)
-{
-    AStreamSyncContext *as = ctx->priv;
-
-    av_expr_free(as->expr);
-    as->expr = NULL;
-}
-
-static const AVFilterPad astreamsync_inputs[] = {
-    {
-        .name         = "in1",
-        .type         = AVMEDIA_TYPE_AUDIO,
-        .filter_frame = filter_frame,
-    },{
-        .name         = "in2",
-        .type         = AVMEDIA_TYPE_AUDIO,
-        .filter_frame = filter_frame,
-    },
-    { NULL }
-};
-
-static const AVFilterPad astreamsync_outputs[] = {
-    {
-        .name          = "out1",
-        .type          = AVMEDIA_TYPE_AUDIO,
-        .config_props  = config_output,
-        .request_frame = request_frame,
-    },{
-        .name          = "out2",
-        .type          = AVMEDIA_TYPE_AUDIO,
-        .config_props  = config_output,
-        .request_frame = request_frame,
-    },
-    { NULL }
-};
-
-AVFilter ff_af_astreamsync = {
-    .name          = "astreamsync",
-    .description   = NULL_IF_CONFIG_SMALL("Copy two streams of audio data "
-                                          "in a configurable order."),
-    .priv_size     = sizeof(AStreamSyncContext),
-    .init          = init,
-    .uninit        = uninit,
-    .query_formats = query_formats,
-    .inputs        = astreamsync_inputs,
-    .outputs       = astreamsync_outputs,
-    .priv_class    = &astreamsync_class,
-};
diff --git a/libavfilter/af_asyncts.c b/libavfilter/af_asyncts.c
index 5f8e1f61..22559a1e 100644
--- a/libavfilter/af_asyncts.c
+++ b/libavfilter/af_asyncts.c
@@ -53,7 +53,7 @@ typedef struct ASyncContext {
 #define A AV_OPT_FLAG_AUDIO_PARAM
 #define F AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption asyncts_options[] = {
-    { "compensate", "Stretch/squeeze the data to make it match the timestamps", OFFSET(resample),      AV_OPT_TYPE_INT,   { .i64 = 0 },   0, 1,       A|F },
+    { "compensate", "Stretch/squeeze the data to make it match the timestamps", OFFSET(resample),      AV_OPT_TYPE_BOOL,  { .i64 = 0 },   0, 1,       A|F },
     { "min_delta",  "Minimum difference between timestamps and audio data "
                     "(in seconds) to trigger padding/trimmin the data.",        OFFSET(min_delta_sec), AV_OPT_TYPE_FLOAT, { .dbl = 0.1 }, 0, INT_MAX, A|F },
     { "max_comp",   "Maximum compensation in samples per second.",              OFFSET(max_comp),      AV_OPT_TYPE_INT,   { .i64 = 500 }, 0, INT_MAX, A|F },
@@ -139,8 +139,7 @@ static int request_frame(AVFilterLink *link)
     int nb_samples;
 
     s->got_output = 0;
-    while (ret >= 0 && !s->got_output)
-        ret = ff_request_frame(ctx->inputs[0]);
+    ret = ff_request_frame(ctx->inputs[0]);
 
     /* flush the fifo */
     if (ret == AVERROR_EOF) {
@@ -205,7 +204,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
     delta    = pts - s->pts - get_delay(s);
     out_size = avresample_available(s->avr);
 
-    if (labs(delta) > s->min_delta ||
+    if (llabs(delta) > s->min_delta ||
         (s->first_frame && delta && s->first_pts != AV_NOPTS_VALUE)) {
         av_log(ctx, AV_LOG_VERBOSE, "Discontinuity - %"PRId64" samples.\n", delta);
         out_size = av_clipl_int32((int64_t)out_size + delta);
@@ -313,7 +312,7 @@ static const AVFilterPad avfilter_af_asyncts_outputs[] = {
 
 AVFilter ff_af_asyncts = {
     .name        = "asyncts",
-    .description = NULL_IF_CONFIG_SMALL("Sync audio data to timestamps"),
+    .description = NULL_IF_CONFIG_SMALL("Sync audio data to timestamps."),
     .init        = init,
     .uninit      = uninit,
     .priv_size   = sizeof(ASyncContext),
diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
index 49d49ee2..7b3d57cc 100644
--- a/libavfilter/af_atempo.c
+++ b/libavfilter/af_atempo.c
@@ -1046,8 +1046,6 @@ static int config_props(AVFilterLink *inlink)
     int sample_rate = (int)inlink->sample_rate;
     int channels = av_get_channel_layout_nb_channels(inlink->channel_layout);
 
-    ctx->outputs[0]->flags |= FF_LINK_FLAG_REQUEST_LOOP;
-
     return yae_reset(atempo, format, sample_rate, channels);
 }
 
diff --git a/libavfilter/af_biquads.c b/libavfilter/af_biquads.c
index 118a0c0b..4953202d 100644
--- a/libavfilter/af_biquads.c
+++ b/libavfilter/af_biquads.c
@@ -94,7 +94,7 @@ typedef struct ChanCache {
     double o1, o2;
 } ChanCache;
 
-typedef struct {
+typedef struct BiquadsContext {
     const AVClass *class;
 
     enum FilterType filter_type;
@@ -110,8 +110,9 @@ typedef struct {
     double b0, b1, b2;
 
     ChanCache *cache;
+    int clippings;
 
-    void (*filter)(const void *ibuf, void *obuf, int len,
+    void (*filter)(struct BiquadsContext *s, const void *ibuf, void *obuf, int len,
                    double *i1, double *i2, double *o1, double *o2,
                    double b0, double b1, double b2, double a1, double a2);
 } BiquadsContext;
@@ -144,7 +145,7 @@ static int query_formats(AVFilterContext *ctx)
     };
     int ret;
 
-    layouts = ff_all_channel_layouts();
+    layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
@@ -165,7 +166,8 @@ static int query_formats(AVFilterContext *ctx)
 }
 
 #define BIQUAD_FILTER(name, type, min, max, need_clipping)                    \
-static void biquad_## name (const void *input, void *output, int len,         \
+static void biquad_## name (BiquadsContext *s,                                \
+                            const void *input, void *output, int len,         \
                             double *in1, double *in2,                         \
                             double *out1, double *out2,                       \
                             double b0, double b1, double b2,                  \
@@ -185,10 +187,10 @@ static void biquad_## name (const void *input, void *output, int len,         \
         o2 = i2 * b2 + i1 * b1 + ibuf[i] * b0 + o2 * a2 + o1 * a1;            \
         i2 = ibuf[i];                                                         \
         if (need_clipping && o2 < min) {                                      \
-            av_log(NULL, AV_LOG_WARNING, "clipping\n");                       \
+            s->clippings++;                                                   \
             obuf[i] = min;                                                    \
         } else if (need_clipping && o2 > max) {                               \
-            av_log(NULL, AV_LOG_WARNING, "clipping\n");                       \
+            s->clippings++;                                                   \
             obuf[i] = max;                                                    \
         } else {                                                              \
             obuf[i] = o2;                                                     \
@@ -197,10 +199,10 @@ static void biquad_## name (const void *input, void *output, int len,         \
         o1 = i1 * b2 + i2 * b1 + ibuf[i] * b0 + o1 * a2 + o2 * a1;            \
         i1 = ibuf[i];                                                         \
         if (need_clipping && o1 < min) {                                      \
-            av_log(NULL, AV_LOG_WARNING, "clipping\n");                       \
+            s->clippings++;                                                   \
             obuf[i] = min;                                                    \
         } else if (need_clipping && o1 > max) {                               \
-            av_log(NULL, AV_LOG_WARNING, "clipping\n");                       \
+            s->clippings++;                                                   \
             obuf[i] = max;                                                    \
         } else {                                                              \
             obuf[i] = o1;                                                     \
@@ -213,10 +215,10 @@ static void biquad_## name (const void *input, void *output, int len,         \
         o2 = o1;                                                              \
         o1 = o0;                                                              \
         if (need_clipping && o0 < min) {                                      \
-            av_log(NULL, AV_LOG_WARNING, "clipping\n");                       \
+            s->clippings++;                                                   \
             obuf[i] = min;                                                    \
         } else if (need_clipping && o0 > max) {                               \
-            av_log(NULL, AV_LOG_WARNING, "clipping\n");                       \
+            s->clippings++;                                                   \
             obuf[i] = max;                                                    \
         } else {                                                              \
             obuf[i] = o0;                                                     \
@@ -391,8 +393,9 @@ static int config_output(AVFilterLink *outlink)
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
 {
-    BiquadsContext *s       = inlink->dst->priv;
-    AVFilterLink *outlink   = inlink->dst->outputs[0];
+    AVFilterContext  *ctx = inlink->dst;
+    BiquadsContext *s     = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out_buf;
     int nb_samples = buf->nb_samples;
     int ch;
@@ -401,18 +404,23 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
         out_buf = buf;
     } else {
         out_buf = ff_get_audio_buffer(inlink, nb_samples);
-        if (!out_buf)
+        if (!out_buf) {
+            av_frame_free(&buf);
             return AVERROR(ENOMEM);
+        }
         av_frame_copy_props(out_buf, buf);
     }
 
     for (ch = 0; ch < av_frame_get_channels(buf); ch++)
-        s->filter(buf->extended_data[ch],
+        s->filter(s, buf->extended_data[ch],
                   out_buf->extended_data[ch], nb_samples,
                   &s->cache[ch].i1, &s->cache[ch].i2,
                   &s->cache[ch].o1, &s->cache[ch].o2,
                   s->b0, s->b1, s->b2, s->a1, s->a2);
 
+    if (s->clippings > 0)
+        av_log(ctx, AV_LOG_WARNING, "clipping %d times. Please reduce gain.\n", s->clippings);
+
     if (buf != out_buf)
         av_frame_free(&buf);
 
@@ -534,7 +542,7 @@ static const AVOption bandpass_options[] = {
     {"s", "slope", 0, AV_OPT_TYPE_CONST, {.i64=SLOPE}, 0, 0, FLAGS, "width_type"},
     {"width", "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 999, FLAGS},
     {"w",     "set band-width", OFFSET(width), AV_OPT_TYPE_DOUBLE, {.dbl=0.5}, 0, 999, FLAGS},
-    {"csg",   "use constant skirt gain", OFFSET(csg), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS},
+    {"csg",   "use constant skirt gain", OFFSET(csg), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
     {NULL}
 };
 
diff --git a/libavfilter/af_bs2b.c b/libavfilter/af_bs2b.c
index 592fdec3..54d52c5c 100644
--- a/libavfilter/af_bs2b.c
+++ b/libavfilter/af_bs2b.c
@@ -47,7 +47,7 @@ typedef struct Bs2bContext {
 #define OFFSET(x) offsetof(Bs2bContext, x)
 #define A AV_OPT_FLAG_AUDIO_PARAM
 
-static const AVOption options[] = {
+static const AVOption bs2b_options[] = {
     { "profile", "Apply a pre-defined crossfeed level",
             OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = BS2B_DEFAULT_CLEVEL }, 0, INT_MAX, A, "profile" },
         { "default", "default profile", 0, AV_OPT_TYPE_CONST, { .i64 = BS2B_DEFAULT_CLEVEL }, 0, 0, A, "profile" },
@@ -60,12 +60,7 @@ static const AVOption options[] = {
     { NULL },
 };
 
-static const AVClass bs2b_class = {
-    .class_name = "bs2b filter",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
+AVFILTER_DEFINE_CLASS(bs2b);
 
 static av_cold int init(AVFilterContext *ctx)
 {
diff --git a/libavfilter/af_channelmap.c b/libavfilter/af_channelmap.c
index f8289ccf..dcae2a21 100644
--- a/libavfilter/af_channelmap.c
+++ b/libavfilter/af_channelmap.c
@@ -57,7 +57,6 @@ enum MappingMode {
 #define MAX_CH 64
 typedef struct ChannelMapContext {
     const AVClass *class;
-    AVFilterChannelLayouts *channel_layouts;
     char *mapping_str;
     char *channel_layout_str;
     uint64_t output_layout;
@@ -275,8 +274,6 @@ static av_cold int channelmap_init(AVFilterContext *ctx)
         return AVERROR(EINVAL);
     }
 
-    ff_add_channel_layout(&s->channel_layouts, s->output_layout);
-
     if (mode == MAP_PAIR_INT_STR || mode == MAP_PAIR_STR_STR) {
         for (i = 0; i < s->nch; i++) {
             s->map[i].out_channel_idx = av_get_channel_layout_channel_index(
@@ -291,18 +288,27 @@ static int channelmap_query_formats(AVFilterContext *ctx)
 {
     ChannelMapContext *s = ctx->priv;
     AVFilterChannelLayouts *layouts;
-
-    ff_set_common_formats(ctx, ff_planar_sample_fmts());
-    ff_set_common_samplerates(ctx, ff_all_samplerates());
+    AVFilterChannelLayouts *channel_layouts = NULL;
+    int ret;
 
     layouts = ff_all_channel_layouts();
-    if (!layouts)
-        return AVERROR(ENOMEM);
-
-    ff_channel_layouts_ref(layouts, &ctx->inputs[0]->out_channel_layouts);
-    ff_channel_layouts_ref(s->channel_layouts,       &ctx->outputs[0]->in_channel_layouts);
+    if (!layouts) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    if ((ret = ff_add_channel_layout     (&channel_layouts, s->output_layout                    )) < 0 ||
+        (ret = ff_set_common_formats     (ctx             , ff_planar_sample_fmts()             )) < 0 ||
+        (ret = ff_set_common_samplerates (ctx             , ff_all_samplerates()                )) < 0 ||
+        (ret = ff_channel_layouts_ref    (layouts         , &ctx->inputs[0]->out_channel_layouts)) < 0 ||
+        (ret = ff_channel_layouts_ref    (channel_layouts , &ctx->outputs[0]->in_channel_layouts)) < 0)
+            goto fail;
 
     return 0;
+fail:
+    if (layouts)
+        av_freep(&layouts->channel_layouts);
+    av_freep(&layouts);
+    return ret;
 }
 
 static int channelmap_filter_frame(AVFilterLink *inlink, AVFrame *buf)
diff --git a/libavfilter/af_channelsplit.c b/libavfilter/af_channelsplit.c
index b3756e2b..f5041498 100644
--- a/libavfilter/af_channelsplit.c
+++ b/libavfilter/af_channelsplit.c
@@ -82,20 +82,23 @@ static int query_formats(AVFilterContext *ctx)
 {
     ChannelSplitContext *s = ctx->priv;
     AVFilterChannelLayouts *in_layouts = NULL;
-    int i;
+    int i, ret;
 
-    ff_set_common_formats    (ctx, ff_planar_sample_fmts());
-    ff_set_common_samplerates(ctx, ff_all_samplerates());
+    if ((ret = ff_set_common_formats(ctx, ff_planar_sample_fmts())) < 0 ||
+        (ret = ff_set_common_samplerates(ctx, ff_all_samplerates())) < 0)
+        return ret;
 
-    ff_add_channel_layout(&in_layouts, s->channel_layout);
-    ff_channel_layouts_ref(in_layouts, &ctx->inputs[0]->out_channel_layouts);
+    if ((ret = ff_add_channel_layout(&in_layouts, s->channel_layout)) < 0 ||
+        (ret = ff_channel_layouts_ref(in_layouts, &ctx->inputs[0]->out_channel_layouts)) < 0)
+        return ret;
 
     for (i = 0; i < ctx->nb_outputs; i++) {
         AVFilterChannelLayouts *out_layouts = NULL;
         uint64_t channel = av_channel_layout_extract_channel(s->channel_layout, i);
 
-        ff_add_channel_layout(&out_layouts, channel);
-        ff_channel_layouts_ref(out_layouts, &ctx->outputs[i]->in_channel_layouts);
+        if ((ret = ff_add_channel_layout(&out_layouts, channel)) < 0 ||
+            (ret = ff_channel_layouts_ref(out_layouts, &ctx->outputs[i]->in_channel_layouts)) < 0)
+            return ret;
     }
 
     return 0;
diff --git a/libavfilter/af_chorus.c b/libavfilter/af_chorus.c
index 93fb36b6..c5961643 100644
--- a/libavfilter/af_chorus.c
+++ b/libavfilter/af_chorus.c
@@ -162,7 +162,7 @@ static int query_formats(AVFilterContext *ctx)
     };
     int ret;
 
-    layouts = ff_all_channel_layouts();
+    layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
@@ -247,8 +247,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
         out_frame = frame;
     } else {
         out_frame = ff_get_audio_buffer(inlink, frame->nb_samples);
-        if (!out_frame)
+        if (!out_frame) {
+            av_frame_free(&frame);
             return AVERROR(ENOMEM);
+        }
         av_frame_copy_props(out_frame, frame);
     }
 
diff --git a/libavfilter/af_compand.c b/libavfilter/af_compand.c
index 55ad98a1..b7463640 100644
--- a/libavfilter/af_compand.c
+++ b/libavfilter/af_compand.c
@@ -109,7 +109,7 @@ static int query_formats(AVFilterContext *ctx)
     };
     int ret;
 
-    layouts = ff_all_channel_layouts();
+    layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
@@ -205,7 +205,7 @@ static int compand_nodelay(AVFilterContext *ctx, AVFrame *frame)
         for (i = 0; i < nb_samples; i++) {
             update_volume(cp, fabs(src[i]));
 
-            dst[i] = av_clipd(src[i] * get_volume(s, cp->volume), -1, 1);
+            dst[i] = src[i] * get_volume(s, cp->volume);
         }
     }
 
@@ -266,8 +266,7 @@ static int compand_delay(AVFilterContext *ctx, AVFrame *frame)
                 }
 
                 dst = (double *)out_frame->extended_data[chan];
-                dst[oindex++] = av_clipd(dbuf[dindex] *
-                        get_volume(s, cp->volume), -1, 1);
+                dst[oindex++] = dbuf[dindex] * get_volume(s, cp->volume);
             } else {
                 count++;
             }
@@ -315,8 +314,7 @@ static int compand_drain(AVFilterLink *outlink)
 
         dindex = s->delay_index;
         for (i = 0; i < frame->nb_samples; i++) {
-            dst[i] = av_clipd(dbuf[dindex] * get_volume(s, cp->volume),
-                    -1, 1);
+            dst[i] = dbuf[dindex] * get_volume(s, cp->volume);
             dindex = MOD(dindex + 1, s->delay_samples);
         }
     }
@@ -398,6 +396,11 @@ static int config_output(AVFilterLink *outlink)
         return AVERROR(EINVAL);
     }
 
+    for (i = nb_decays; i < channels; i++) {
+        s->channels[i].attack = s->channels[nb_decays - 1].attack;
+        s->channels[i].decay = s->channels[nb_decays - 1].decay;
+    }
+
 #define S(x) s->segments[2 * ((x) + 1)]
     p = s->points;
     for (i = 0, new_nb_items = 0; i < nb_points; i++) {
@@ -445,14 +448,14 @@ static int config_output(AVFilterLink *outlink)
             S(j) = S(j + 1);
     }
 
-    for (i = 0; !i || s->segments[i - 2].x; i += 2) {
+    for (i = 0; i < s->nb_segments; i += 2) {
         s->segments[i].y += s->gain_dB;
         s->segments[i].x *= M_LN10 / 20;
         s->segments[i].y *= M_LN10 / 20;
     }
 
 #define L(x) s->segments[i - (x)]
-    for (i = 4; s->segments[i - 2].x; i += 2) {
+    for (i = 4; i < s->nb_segments; i += 2) {
         double x, y, cx, cy, in1, in2, out1, out2, theta, len, r;
 
         L(4).a = 0;
@@ -462,13 +465,13 @@ static int config_output(AVFilterLink *outlink)
         L(2).b = (L(0).y - L(2).y) / (L(0).x - L(2).x);
 
         theta = atan2(L(2).y - L(4).y, L(2).x - L(4).x);
-        len = sqrt(pow(L(2).x - L(4).x, 2.) + pow(L(2).y - L(4).y, 2.));
+        len = hypot(L(2).x - L(4).x, L(2).y - L(4).y);
         r = FFMIN(radius, len);
         L(3).x = L(2).x - r * cos(theta);
         L(3).y = L(2).y - r * sin(theta);
 
         theta = atan2(L(0).y - L(2).y, L(0).x - L(2).x);
-        len = sqrt(pow(L(0).x - L(2).x, 2.) + pow(L(0).y - L(2).y, 2.));
+        len = hypot(L(0).x - L(2).x, L(0).y - L(2).y);
         r = FFMIN(radius, len / 2);
         x = L(2).x + r * cos(theta);
         y = L(2).y + r * sin(theta);
@@ -503,7 +506,7 @@ static int config_output(AVFilterLink *outlink)
             cp->decay = 1.0 - exp(-1.0 / (sample_rate * cp->decay));
         else
             cp->decay = 1.0;
-        cp->volume = pow(10.0, s->initial_volume / 20);
+        cp->volume = ff_exp10(s->initial_volume / 20);
     }
 
     s->delay_samples = s->delay * sample_rate;
@@ -526,7 +529,6 @@ static int config_output(AVFilterLink *outlink)
     if (err)
         return err;
 
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
     s->compand = compand_delay;
     return 0;
 }
diff --git a/libavfilter/af_compensationdelay.c b/libavfilter/af_compensationdelay.c
new file mode 100644
index 00000000..d5a34843
--- /dev/null
+++ b/libavfilter/af_compensationdelay.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2001-2010 Krzysztof Foltman, Markus Schmidt, Thor Harald Johansen, Vladimir Sadovnikov and others
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/samplefmt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "internal.h"
+
+typedef struct CompensationDelayContext {
+    const AVClass *class;
+    int distance_mm;
+    int distance_cm;
+    int distance_m;
+    double dry, wet;
+    int temp;
+
+    unsigned delay;
+    unsigned w_ptr;
+    unsigned buf_size;
+    AVFrame *delay_frame;
+} CompensationDelayContext;
+
+#define OFFSET(x) offsetof(CompensationDelayContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption compensationdelay_options[] = {
+    { "mm",   "set mm distance",    OFFSET(distance_mm), AV_OPT_TYPE_INT,    {.i64=0},    0,  10, A },
+    { "cm",   "set cm distance",    OFFSET(distance_cm), AV_OPT_TYPE_INT,    {.i64=0},    0, 100, A },
+    { "m",    "set meter distance", OFFSET(distance_m),  AV_OPT_TYPE_INT,    {.i64=0},    0, 100, A },
+    { "dry",  "set dry amount",     OFFSET(dry),         AV_OPT_TYPE_DOUBLE, {.dbl=0},    0,   1, A },
+    { "wet",  "set wet amount",     OFFSET(wet),         AV_OPT_TYPE_DOUBLE, {.dbl=1},    0,   1, A },
+    { "temp", "set temperature °C", OFFSET(temp),        AV_OPT_TYPE_INT,    {.i64=20}, -50,  50, A },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(compensationdelay);
+
+// The maximum distance for options
+#define COMP_DELAY_MAX_DISTANCE            (100.0 * 100.0 + 100.0 * 1.0 + 1.0)
+// The actual speed of sound in normal conditions
+#define COMP_DELAY_SOUND_SPEED_KM_H(temp)  1.85325 * (643.95 * sqrt(((temp + 273.15) / 273.15)))
+#define COMP_DELAY_SOUND_SPEED_CM_S(temp)  (COMP_DELAY_SOUND_SPEED_KM_H(temp) * (1000.0 * 100.0) /* cm/km */ / (60.0 * 60.0) /* s/h */)
+#define COMP_DELAY_SOUND_FRONT_DELAY(temp) (1.0 / COMP_DELAY_SOUND_SPEED_CM_S(temp))
+// The maximum delay may be reached by this filter
+#define COMP_DELAY_MAX_DELAY               (COMP_DELAY_MAX_DISTANCE * COMP_DELAY_SOUND_FRONT_DELAY(50))
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterChannelLayouts *layouts;
+    AVFilterFormats *formats;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    CompensationDelayContext *s = ctx->priv;
+    unsigned min_size, new_size = 1;
+
+    s->delay = (s->distance_m * 100. + s->distance_cm * 1. + s->distance_mm * .1) *
+               COMP_DELAY_SOUND_FRONT_DELAY(s->temp) * inlink->sample_rate;
+    min_size = inlink->sample_rate * COMP_DELAY_MAX_DELAY;
+
+    while (new_size < min_size)
+        new_size <<= 1;
+
+    s->delay_frame = av_frame_alloc();
+    if (!s->delay_frame)
+        return AVERROR(ENOMEM);
+
+    s->buf_size                    = new_size;
+    s->delay_frame->format         = inlink->format;
+    s->delay_frame->nb_samples     = new_size;
+    s->delay_frame->channel_layout = inlink->channel_layout;
+
+    return av_frame_get_buffer(s->delay_frame, 32);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    CompensationDelayContext *s = ctx->priv;
+    const unsigned b_mask = s->buf_size - 1;
+    const unsigned buf_size = s->buf_size;
+    const unsigned delay = s->delay;
+    const double dry = s->dry;
+    const double wet = s->wet;
+    unsigned r_ptr, w_ptr;
+    AVFrame *out;
+    int n, ch;
+
+    out = ff_get_audio_buffer(inlink, in->nb_samples);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+
+    for (ch = 0; ch < inlink->channels; ch++) {
+        const double *src = (const double *)in->extended_data[ch];
+        double *dst = (double *)out->extended_data[ch];
+        double *buffer = (double *)s->delay_frame->extended_data[ch];
+
+        w_ptr =  s->w_ptr;
+        r_ptr = (w_ptr + buf_size - delay) & b_mask;
+
+        for (n = 0; n < in->nb_samples; n++) {
+            const double sample = src[n];
+
+            buffer[w_ptr] = sample;
+            dst[n] = dry * sample + wet * buffer[r_ptr];
+            w_ptr = (w_ptr + 1) & b_mask;
+            r_ptr = (r_ptr + 1) & b_mask;
+        }
+    }
+    s->w_ptr = w_ptr;
+
+    av_frame_free(&in);
+    return ff_filter_frame(ctx->outputs[0], out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    CompensationDelayContext *s = ctx->priv;
+
+    av_frame_free(&s->delay_frame);
+}
+
+static const AVFilterPad compensationdelay_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad compensationdelay_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_compensationdelay = {
+    .name          = "compensationdelay",
+    .description   = NULL_IF_CONFIG_SMALL("Audio Compensation Delay Line."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(CompensationDelayContext),
+    .priv_class    = &compensationdelay_class,
+    .uninit        = uninit,
+    .inputs        = compensationdelay_inputs,
+    .outputs       = compensationdelay_outputs,
+};
diff --git a/libavfilter/af_dcshift.c b/libavfilter/af_dcshift.c
index eb981568..7332c12b 100644
--- a/libavfilter/af_dcshift.c
+++ b/libavfilter/af_dcshift.c
@@ -61,7 +61,7 @@ static int query_formats(AVFilterContext *ctx)
     };
     int ret;
 
-    layouts = ff_all_channel_layouts();
+    layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
diff --git a/libavfilter/af_dynaudnorm.c b/libavfilter/af_dynaudnorm.c
new file mode 100644
index 00000000..1dd221ce
--- /dev/null
+++ b/libavfilter/af_dynaudnorm.c
@@ -0,0 +1,737 @@
+/*
+ * Dynamic Audio Normalizer
+ * Copyright (c) 2015 LoRd_MuldeR <mulder2@gmx.de>. Some rights reserved.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Dynamic Audio Normalizer
+ */
+
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+
+#define FF_BUFQUEUE_SIZE 302
+#include "libavfilter/bufferqueue.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+
+typedef struct cqueue {
+    double *elements;
+    int size;
+    int nb_elements;
+    int first;
+} cqueue;
+
+typedef struct DynamicAudioNormalizerContext {
+    const AVClass *class;
+
+    struct FFBufQueue queue;
+
+    int frame_len;
+    int frame_len_msec;
+    int filter_size;
+    int dc_correction;
+    int channels_coupled;
+    int alt_boundary_mode;
+
+    double peak_value;
+    double max_amplification;
+    double target_rms;
+    double compress_factor;
+    double *prev_amplification_factor;
+    double *dc_correction_value;
+    double *compress_threshold;
+    double *fade_factors[2];
+    double *weights;
+
+    int channels;
+    int delay;
+
+    cqueue **gain_history_original;
+    cqueue **gain_history_minimum;
+    cqueue **gain_history_smoothed;
+} DynamicAudioNormalizerContext;
+
+#define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption dynaudnorm_options[] = {
+    { "f", "set the frame length in msec",     OFFSET(frame_len_msec),    AV_OPT_TYPE_INT,    {.i64 = 500},   10,  8000, FLAGS },
+    { "g", "set the filter size",              OFFSET(filter_size),       AV_OPT_TYPE_INT,    {.i64 = 31},     3,   301, FLAGS },
+    { "p", "set the peak value",               OFFSET(peak_value),        AV_OPT_TYPE_DOUBLE, {.dbl = 0.95}, 0.0,   1.0, FLAGS },
+    { "m", "set the max amplification",        OFFSET(max_amplification), AV_OPT_TYPE_DOUBLE, {.dbl = 10.0}, 1.0, 100.0, FLAGS },
+    { "r", "set the target RMS",               OFFSET(target_rms),        AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,   1.0, FLAGS },
+    { "n", "set channel coupling",             OFFSET(channels_coupled),  AV_OPT_TYPE_BOOL,   {.i64 = 1},      0,     1, FLAGS },
+    { "c", "set DC correction",                OFFSET(dc_correction),     AV_OPT_TYPE_BOOL,   {.i64 = 0},      0,     1, FLAGS },
+    { "b", "set alternative boundary mode",    OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL,   {.i64 = 0},      0,     1, FLAGS },
+    { "s", "set the compress factor",          OFFSET(compress_factor),   AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,  30.0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(dynaudnorm);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    DynamicAudioNormalizerContext *s = ctx->priv;
+
+    if (!(s->filter_size & 1)) {
+        av_log(ctx, AV_LOG_ERROR, "filter size %d is invalid. Must be an odd value.\n", s->filter_size);
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static inline int frame_size(int sample_rate, int frame_len_msec)
+{
+    const int frame_size = lrint((double)sample_rate * (frame_len_msec / 1000.0));
+    return frame_size + (frame_size % 2);
+}
+
+static void precalculate_fade_factors(double *fade_factors[2], int frame_len)
+{
+    const double step_size = 1.0 / frame_len;
+    int pos;
+
+    for (pos = 0; pos < frame_len; pos++) {
+        fade_factors[0][pos] = 1.0 - (step_size * (pos + 1.0));
+        fade_factors[1][pos] = 1.0 - fade_factors[0][pos];
+    }
+}
+
+static cqueue *cqueue_create(int size)
+{
+    cqueue *q;
+
+    q = av_malloc(sizeof(cqueue));
+    if (!q)
+        return NULL;
+
+    q->size = size;
+    q->nb_elements = 0;
+    q->first = 0;
+
+    q->elements = av_malloc_array(size, sizeof(double));
+    if (!q->elements) {
+        av_free(q);
+        return NULL;
+    }
+
+    return q;
+}
+
+static void cqueue_free(cqueue *q)
+{
+    if (q)
+        av_free(q->elements);
+    av_free(q);
+}
+
+static int cqueue_size(cqueue *q)
+{
+    return q->nb_elements;
+}
+
+static int cqueue_empty(cqueue *q)
+{
+    return !q->nb_elements;
+}
+
+static int cqueue_enqueue(cqueue *q, double element)
+{
+    int i;
+
+    av_assert2(q->nb_elements != q->size);
+
+    i = (q->first + q->nb_elements) % q->size;
+    q->elements[i] = element;
+    q->nb_elements++;
+
+    return 0;
+}
+
+static double cqueue_peek(cqueue *q, int index)
+{
+    av_assert2(index < q->nb_elements);
+    return q->elements[(q->first + index) % q->size];
+}
+
+static int cqueue_dequeue(cqueue *q, double *element)
+{
+    av_assert2(!cqueue_empty(q));
+
+    *element = q->elements[q->first];
+    q->first = (q->first + 1) % q->size;
+    q->nb_elements--;
+
+    return 0;
+}
+
+static int cqueue_pop(cqueue *q)
+{
+    av_assert2(!cqueue_empty(q));
+
+    q->first = (q->first + 1) % q->size;
+    q->nb_elements--;
+
+    return 0;
+}
+
+static void init_gaussian_filter(DynamicAudioNormalizerContext *s)
+{
+    double total_weight = 0.0;
+    const double sigma = (((s->filter_size / 2.0) - 1.0) / 3.0) + (1.0 / 3.0);
+    double adjust;
+    int i;
+
+    // Pre-compute constants
+    const int offset = s->filter_size / 2;
+    const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
+    const double c2 = 2.0 * sigma * sigma;
+
+    // Compute weights
+    for (i = 0; i < s->filter_size; i++) {
+        const int x = i - offset;
+
+        s->weights[i] = c1 * exp(-x * x / c2);
+        total_weight += s->weights[i];
+    }
+
+    // Adjust weights
+    adjust = 1.0 / total_weight;
+    for (i = 0; i < s->filter_size; i++) {
+        s->weights[i] *= adjust;
+    }
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    DynamicAudioNormalizerContext *s = ctx->priv;
+    int c;
+
+    av_freep(&s->prev_amplification_factor);
+    av_freep(&s->dc_correction_value);
+    av_freep(&s->compress_threshold);
+    av_freep(&s->fade_factors[0]);
+    av_freep(&s->fade_factors[1]);
+
+    for (c = 0; c < s->channels; c++) {
+        if (s->gain_history_original)
+            cqueue_free(s->gain_history_original[c]);
+        if (s->gain_history_minimum)
+            cqueue_free(s->gain_history_minimum[c]);
+        if (s->gain_history_smoothed)
+            cqueue_free(s->gain_history_smoothed[c]);
+    }
+
+    av_freep(&s->gain_history_original);
+    av_freep(&s->gain_history_minimum);
+    av_freep(&s->gain_history_smoothed);
+
+    av_freep(&s->weights);
+
+    ff_bufqueue_discard_all(&s->queue);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    DynamicAudioNormalizerContext *s = ctx->priv;
+    int c;
+
+    uninit(ctx);
+
+    s->frame_len =
+    inlink->min_samples =
+    inlink->max_samples =
+    inlink->partial_buf_size = frame_size(inlink->sample_rate, s->frame_len_msec);
+    av_log(ctx, AV_LOG_DEBUG, "frame len %d\n", s->frame_len);
+
+    s->fade_factors[0] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[0]));
+    s->fade_factors[1] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[1]));
+
+    s->prev_amplification_factor = av_malloc_array(inlink->channels, sizeof(*s->prev_amplification_factor));
+    s->dc_correction_value = av_calloc(inlink->channels, sizeof(*s->dc_correction_value));
+    s->compress_threshold = av_calloc(inlink->channels, sizeof(*s->compress_threshold));
+    s->gain_history_original = av_calloc(inlink->channels, sizeof(*s->gain_history_original));
+    s->gain_history_minimum = av_calloc(inlink->channels, sizeof(*s->gain_history_minimum));
+    s->gain_history_smoothed = av_calloc(inlink->channels, sizeof(*s->gain_history_smoothed));
+    s->weights = av_malloc_array(s->filter_size, sizeof(*s->weights));
+    if (!s->prev_amplification_factor || !s->dc_correction_value ||
+        !s->compress_threshold || !s->fade_factors[0] || !s->fade_factors[1] ||
+        !s->gain_history_original || !s->gain_history_minimum ||
+        !s->gain_history_smoothed || !s->weights)
+        return AVERROR(ENOMEM);
+
+    for (c = 0; c < inlink->channels; c++) {
+        s->prev_amplification_factor[c] = 1.0;
+
+        s->gain_history_original[c] = cqueue_create(s->filter_size);
+        s->gain_history_minimum[c]  = cqueue_create(s->filter_size);
+        s->gain_history_smoothed[c] = cqueue_create(s->filter_size);
+
+        if (!s->gain_history_original[c] || !s->gain_history_minimum[c] ||
+            !s->gain_history_smoothed[c])
+            return AVERROR(ENOMEM);
+    }
+
+    precalculate_fade_factors(s->fade_factors, s->frame_len);
+    init_gaussian_filter(s);
+
+    s->channels = inlink->channels;
+    s->delay = s->filter_size;
+
+    return 0;
+}
+
+static inline double fade(double prev, double next, int pos,
+                          double *fade_factors[2])
+{
+    return fade_factors[0][pos] * prev + fade_factors[1][pos] * next;
+}
+
+static inline double pow2(const double value)
+{
+    return value * value;
+}
+
+static inline double bound(const double threshold, const double val)
+{
+    const double CONST = 0.8862269254527580136490837416705725913987747280611935; //sqrt(PI) / 2.0
+    return erf(CONST * (val / threshold)) * threshold;
+}
+
+static double find_peak_magnitude(AVFrame *frame, int channel)
+{
+    double max = DBL_EPSILON;
+    int c, i;
+
+    if (channel == -1) {
+        for (c = 0; c < av_frame_get_channels(frame); c++) {
+            double *data_ptr = (double *)frame->extended_data[c];
+
+            for (i = 0; i < frame->nb_samples; i++)
+                max = FFMAX(max, fabs(data_ptr[i]));
+        }
+    } else {
+        double *data_ptr = (double *)frame->extended_data[channel];
+
+        for (i = 0; i < frame->nb_samples; i++)
+            max = FFMAX(max, fabs(data_ptr[i]));
+    }
+
+    return max;
+}
+
+static double compute_frame_rms(AVFrame *frame, int channel)
+{
+    double rms_value = 0.0;
+    int c, i;
+
+    if (channel == -1) {
+        for (c = 0; c < av_frame_get_channels(frame); c++) {
+            const double *data_ptr = (double *)frame->extended_data[c];
+
+            for (i = 0; i < frame->nb_samples; i++) {
+                rms_value += pow2(data_ptr[i]);
+            }
+        }
+
+        rms_value /= frame->nb_samples * av_frame_get_channels(frame);
+    } else {
+        const double *data_ptr = (double *)frame->extended_data[channel];
+        for (i = 0; i < frame->nb_samples; i++) {
+            rms_value += pow2(data_ptr[i]);
+        }
+
+        rms_value /= frame->nb_samples;
+    }
+
+    return FFMAX(sqrt(rms_value), DBL_EPSILON);
+}
+
+static double get_max_local_gain(DynamicAudioNormalizerContext *s, AVFrame *frame,
+                                 int channel)
+{
+    const double maximum_gain = s->peak_value / find_peak_magnitude(frame, channel);
+    const double rms_gain = s->target_rms > DBL_EPSILON ? (s->target_rms / compute_frame_rms(frame, channel)) : DBL_MAX;
+    return bound(s->max_amplification, FFMIN(maximum_gain, rms_gain));
+}
+
+static double minimum_filter(cqueue *q)
+{
+    double min = DBL_MAX;
+    int i;
+
+    for (i = 0; i < cqueue_size(q); i++) {
+        min = FFMIN(min, cqueue_peek(q, i));
+    }
+
+    return min;
+}
+
+static double gaussian_filter(DynamicAudioNormalizerContext *s, cqueue *q)
+{
+    double result = 0.0;
+    int i;
+
+    for (i = 0; i < cqueue_size(q); i++) {
+        result += cqueue_peek(q, i) * s->weights[i];
+    }
+
+    return result;
+}
+
+static void update_gain_history(DynamicAudioNormalizerContext *s, int channel,
+                                double current_gain_factor)
+{
+    if (cqueue_empty(s->gain_history_original[channel]) ||
+        cqueue_empty(s->gain_history_minimum[channel])) {
+        const int pre_fill_size = s->filter_size / 2;
+
+        s->prev_amplification_factor[channel] = s->alt_boundary_mode ? current_gain_factor : 1.0;
+
+        while (cqueue_size(s->gain_history_original[channel]) < pre_fill_size) {
+            cqueue_enqueue(s->gain_history_original[channel], s->alt_boundary_mode ? current_gain_factor : 1.0);
+        }
+
+        while (cqueue_size(s->gain_history_minimum[channel]) < pre_fill_size) {
+            cqueue_enqueue(s->gain_history_minimum[channel], s->alt_boundary_mode ? current_gain_factor : 1.0);
+        }
+    }
+
+    cqueue_enqueue(s->gain_history_original[channel], current_gain_factor);
+
+    while (cqueue_size(s->gain_history_original[channel]) >= s->filter_size) {
+        double minimum;
+        av_assert0(cqueue_size(s->gain_history_original[channel]) == s->filter_size);
+        minimum = minimum_filter(s->gain_history_original[channel]);
+
+        cqueue_enqueue(s->gain_history_minimum[channel], minimum);
+
+        cqueue_pop(s->gain_history_original[channel]);
+    }
+
+    while (cqueue_size(s->gain_history_minimum[channel]) >= s->filter_size) {
+        double smoothed;
+        av_assert0(cqueue_size(s->gain_history_minimum[channel]) == s->filter_size);
+        smoothed = gaussian_filter(s, s->gain_history_minimum[channel]);
+
+        cqueue_enqueue(s->gain_history_smoothed[channel], smoothed);
+
+        cqueue_pop(s->gain_history_minimum[channel]);
+    }
+}
+
+static inline double update_value(double new, double old, double aggressiveness)
+{
+    av_assert0((aggressiveness >= 0.0) && (aggressiveness <= 1.0));
+    return aggressiveness * new + (1.0 - aggressiveness) * old;
+}
+
+static void perform_dc_correction(DynamicAudioNormalizerContext *s, AVFrame *frame)
+{
+    const double diff = 1.0 / frame->nb_samples;
+    int is_first_frame = cqueue_empty(s->gain_history_original[0]);
+    int c, i;
+
+    for (c = 0; c < s->channels; c++) {
+        double *dst_ptr = (double *)frame->extended_data[c];
+        double current_average_value = 0.0;
+        double prev_value;
+
+        for (i = 0; i < frame->nb_samples; i++)
+            current_average_value += dst_ptr[i] * diff;
+
+        prev_value = is_first_frame ? current_average_value : s->dc_correction_value[c];
+        s->dc_correction_value[c] = is_first_frame ? current_average_value : update_value(current_average_value, s->dc_correction_value[c], 0.1);
+
+        for (i = 0; i < frame->nb_samples; i++) {
+            dst_ptr[i] -= fade(prev_value, s->dc_correction_value[c], i, s->fade_factors);
+        }
+    }
+}
+
+static double setup_compress_thresh(double threshold)
+{
+    if ((threshold > DBL_EPSILON) && (threshold < (1.0 - DBL_EPSILON))) {
+        double current_threshold = threshold;
+        double step_size = 1.0;
+
+        while (step_size > DBL_EPSILON) {
+            while ((current_threshold + step_size > current_threshold) &&
+                   (bound(current_threshold + step_size, 1.0) <= threshold)) {
+                current_threshold += step_size;
+            }
+
+            step_size /= 2.0;
+        }
+
+        return current_threshold;
+    } else {
+        return threshold;
+    }
+}
+
+static double compute_frame_std_dev(DynamicAudioNormalizerContext *s,
+                                    AVFrame *frame, int channel)
+{
+    double variance = 0.0;
+    int i, c;
+
+    if (channel == -1) {
+        for (c = 0; c < s->channels; c++) {
+            const double *data_ptr = (double *)frame->extended_data[c];
+
+            for (i = 0; i < frame->nb_samples; i++) {
+                variance += pow2(data_ptr[i]);  // Assume that MEAN is *zero*
+            }
+        }
+        variance /= (s->channels * frame->nb_samples) - 1;
+    } else {
+        const double *data_ptr = (double *)frame->extended_data[channel];
+
+        for (i = 0; i < frame->nb_samples; i++) {
+            variance += pow2(data_ptr[i]);      // Assume that MEAN is *zero*
+        }
+        variance /= frame->nb_samples - 1;
+    }
+
+    return FFMAX(sqrt(variance), DBL_EPSILON);
+}
+
+static void perform_compression(DynamicAudioNormalizerContext *s, AVFrame *frame)
+{
+    int is_first_frame = cqueue_empty(s->gain_history_original[0]);
+    int c, i;
+
+    if (s->channels_coupled) {
+        const double standard_deviation = compute_frame_std_dev(s, frame, -1);
+        const double current_threshold  = FFMIN(1.0, s->compress_factor * standard_deviation);
+
+        const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[0];
+        double prev_actual_thresh, curr_actual_thresh;
+        s->compress_threshold[0] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[0], (1.0/3.0));
+
+        prev_actual_thresh = setup_compress_thresh(prev_value);
+        curr_actual_thresh = setup_compress_thresh(s->compress_threshold[0]);
+
+        for (c = 0; c < s->channels; c++) {
+            double *const dst_ptr = (double *)frame->extended_data[c];
+            for (i = 0; i < frame->nb_samples; i++) {
+                const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors);
+                dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
+            }
+        }
+    } else {
+        for (c = 0; c < s->channels; c++) {
+            const double standard_deviation = compute_frame_std_dev(s, frame, c);
+            const double current_threshold  = setup_compress_thresh(FFMIN(1.0, s->compress_factor * standard_deviation));
+
+            const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[c];
+            double prev_actual_thresh, curr_actual_thresh;
+            double *dst_ptr;
+            s->compress_threshold[c] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[c], 1.0/3.0);
+
+            prev_actual_thresh = setup_compress_thresh(prev_value);
+            curr_actual_thresh = setup_compress_thresh(s->compress_threshold[c]);
+
+            dst_ptr = (double *)frame->extended_data[c];
+            for (i = 0; i < frame->nb_samples; i++) {
+                const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors);
+                dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
+            }
+        }
+    }
+}
+
+static void analyze_frame(DynamicAudioNormalizerContext *s, AVFrame *frame)
+{
+    if (s->dc_correction) {
+        perform_dc_correction(s, frame);
+    }
+
+    if (s->compress_factor > DBL_EPSILON) {
+        perform_compression(s, frame);
+    }
+
+    if (s->channels_coupled) {
+        const double current_gain_factor = get_max_local_gain(s, frame, -1);
+        int c;
+
+        for (c = 0; c < s->channels; c++)
+            update_gain_history(s, c, current_gain_factor);
+    } else {
+        int c;
+
+        for (c = 0; c < s->channels; c++)
+            update_gain_history(s, c, get_max_local_gain(s, frame, c));
+    }
+}
+
+static void amplify_frame(DynamicAudioNormalizerContext *s, AVFrame *frame)
+{
+    int c, i;
+
+    for (c = 0; c < s->channels; c++) {
+        double *dst_ptr = (double *)frame->extended_data[c];
+        double current_amplification_factor;
+
+        cqueue_dequeue(s->gain_history_smoothed[c], &current_amplification_factor);
+
+        for (i = 0; i < frame->nb_samples; i++) {
+            const double amplification_factor = fade(s->prev_amplification_factor[c],
+                                                     current_amplification_factor, i,
+                                                     s->fade_factors);
+
+            dst_ptr[i] *= amplification_factor;
+
+            if (fabs(dst_ptr[i]) > s->peak_value)
+                dst_ptr[i] = copysign(s->peak_value, dst_ptr[i]);
+        }
+
+        s->prev_amplification_factor[c] = current_amplification_factor;
+    }
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    DynamicAudioNormalizerContext *s = ctx->priv;
+    AVFilterLink *outlink = inlink->dst->outputs[0];
+    int ret = 0;
+
+    if (!cqueue_empty(s->gain_history_smoothed[0])) {
+        AVFrame *out = ff_bufqueue_get(&s->queue);
+
+        amplify_frame(s, out);
+        ret = ff_filter_frame(outlink, out);
+    }
+
+    analyze_frame(s, in);
+    ff_bufqueue_add(ctx, &s->queue, in);
+
+    return ret;
+}
+
+static int flush_buffer(DynamicAudioNormalizerContext *s, AVFilterLink *inlink,
+                        AVFilterLink *outlink)
+{
+    AVFrame *out = ff_get_audio_buffer(outlink, s->frame_len);
+    int c, i;
+
+    if (!out)
+        return AVERROR(ENOMEM);
+
+    for (c = 0; c < s->channels; c++) {
+        double *dst_ptr = (double *)out->extended_data[c];
+
+        for (i = 0; i < out->nb_samples; i++) {
+            dst_ptr[i] = s->alt_boundary_mode ? DBL_EPSILON : ((s->target_rms > DBL_EPSILON) ? FFMIN(s->peak_value, s->target_rms) : s->peak_value);
+            if (s->dc_correction) {
+                dst_ptr[i] *= ((i % 2) == 1) ? -1 : 1;
+                dst_ptr[i] += s->dc_correction_value[c];
+            }
+        }
+    }
+
+    s->delay--;
+    return filter_frame(inlink, out);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    DynamicAudioNormalizerContext *s = ctx->priv;
+    int ret = 0;
+
+    ret = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && !ctx->is_disabled && s->delay)
+        ret = flush_buffer(s, ctx->inputs[0], outlink);
+
+    return ret;
+}
+
+static const AVFilterPad avfilter_af_dynaudnorm_inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_AUDIO,
+        .filter_frame   = filter_frame,
+        .config_props   = config_input,
+        .needs_writable = 1,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_af_dynaudnorm_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_dynaudnorm = {
+    .name          = "dynaudnorm",
+    .description   = NULL_IF_CONFIG_SMALL("Dynamic Audio Normalizer."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(DynamicAudioNormalizerContext),
+    .init          = init,
+    .uninit        = uninit,
+    .inputs        = avfilter_af_dynaudnorm_inputs,
+    .outputs       = avfilter_af_dynaudnorm_outputs,
+    .priv_class    = &dynaudnorm_class,
+};
diff --git a/libavfilter/af_earwax.c b/libavfilter/af_earwax.c
index c3109976..b0ba4cff 100644
--- a/libavfilter/af_earwax.c
+++ b/libavfilter/af_earwax.c
@@ -78,15 +78,17 @@ typedef struct {
 static int query_formats(AVFilterContext *ctx)
 {
     static const int sample_rates[] = { 44100, -1 };
+    int ret;
 
     AVFilterFormats *formats = NULL;
     AVFilterChannelLayouts *layout = NULL;
 
-    ff_add_format(&formats, AV_SAMPLE_FMT_S16);
-    ff_set_common_formats(ctx, formats);
-    ff_add_channel_layout(&layout, AV_CH_LAYOUT_STEREO);
-    ff_set_common_channel_layouts(ctx, layout);
-    ff_set_common_samplerates(ctx, ff_make_format_list(sample_rates));
+    if ((ret = ff_add_format                 (&formats, AV_SAMPLE_FMT_S16                 )) < 0 ||
+        (ret = ff_set_common_formats         (ctx     , formats                           )) < 0 ||
+        (ret = ff_add_channel_layout         (&layout , AV_CH_LAYOUT_STEREO               )) < 0 ||
+        (ret = ff_set_common_channel_layouts (ctx     , layout                            )) < 0 ||
+        (ret = ff_set_common_samplerates     (ctx     , ff_make_format_list(sample_rates) )) < 0)
+        return ret;
 
     return 0;
 }
diff --git a/libavfilter/af_extrastereo.c b/libavfilter/af_extrastereo.c
new file mode 100644
index 00000000..a746006f
--- /dev/null
+++ b/libavfilter/af_extrastereo.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2015 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+
+typedef struct ExtraStereoContext {
+    const AVClass *class;
+    float mult;
+    int clip;
+} ExtraStereoContext;
+
+#define OFFSET(x) offsetof(ExtraStereoContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption extrastereo_options[] = {
+    { "m", "set the difference coefficient", OFFSET(mult), AV_OPT_TYPE_FLOAT, {.dbl=2.5}, -10, 10, A },
+    { "c", "enable clipping",                OFFSET(clip), AV_OPT_TYPE_BOOL,  {.i64=1},     0,  1, A },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(extrastereo);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layout = NULL;
+    int ret;
+
+    if ((ret = ff_add_format                 (&formats, AV_SAMPLE_FMT_FLT  )) < 0 ||
+        (ret = ff_set_common_formats         (ctx     , formats            )) < 0 ||
+        (ret = ff_add_channel_layout         (&layout , AV_CH_LAYOUT_STEREO)) < 0 ||
+        (ret = ff_set_common_channel_layouts (ctx     , layout             )) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    ExtraStereoContext *s = ctx->priv;
+    const float *src = (const float *)in->data[0];
+    const float mult = s->mult;
+    AVFrame *out;
+    float *dst;
+    int n;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+    dst = (float *)out->data[0];
+
+    for (n = 0; n < in->nb_samples; n++) {
+        float average, left, right;
+
+        left    = src[n * 2    ];
+        right   = src[n * 2 + 1];
+        average = (left + right) / 2.;
+        left    = average + mult * (left  - average);
+        right   = average + mult * (right - average);
+
+        if (s->clip) {
+            dst[n * 2    ] = av_clipf(left,  -1, 1);
+            dst[n * 2 + 1] = av_clipf(right, -1, 1);
+        }
+    }
+
+    if (out != in)
+        av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_extrastereo = {
+    .name           = "extrastereo",
+    .description    = NULL_IF_CONFIG_SMALL("Increase difference between stereo audio channels."),
+    .query_formats  = query_formats,
+    .priv_size      = sizeof(ExtraStereoContext),
+    .priv_class     = &extrastereo_class,
+    .inputs         = inputs,
+    .outputs        = outputs,
+};
diff --git a/libavfilter/af_flanger.c b/libavfilter/af_flanger.c
index 106e6f73..a92367c9 100644
--- a/libavfilter/af_flanger.c
+++ b/libavfilter/af_flanger.c
@@ -97,7 +97,7 @@ static int query_formats(AVFilterContext *ctx)
     };
     int ret;
 
-    layouts = ff_all_channel_layouts();
+    layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
@@ -130,7 +130,7 @@ static int config_input(AVFilterLink *inlink)
         return AVERROR(ENOMEM);
 
     ff_generate_wave_table(s->wave_shape, AV_SAMPLE_FMT_FLT, s->lfo, s->lfo_length,
-                           floor(s->delay_min * inlink->sample_rate + 0.5),
+                           rint(s->delay_min * inlink->sample_rate),
                            s->max_samples - 2., 3 * M_PI_2);
 
     return av_samples_alloc_array_and_samples(&s->delay_buffer, NULL,
@@ -149,8 +149,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
         out_frame = frame;
     } else {
         out_frame = ff_get_audio_buffer(inlink, frame->nb_samples);
-        if (!out_frame)
+        if (!out_frame) {
+            av_frame_free(&frame);
             return AVERROR(ENOMEM);
+        }
         av_frame_copy_props(out_frame, frame);
     }
 
diff --git a/libavfilter/af_join.c b/libavfilter/af_join.c
index 4c1f6a07..bd780cc3 100644
--- a/libavfilter/af_join.c
+++ b/libavfilter/af_join.c
@@ -78,11 +78,13 @@ static const AVOption join_options[] = {
 
 AVFILTER_DEFINE_CLASS(join);
 
+static int try_push_frame(AVFilterContext *ctx);
+
 static int filter_frame(AVFilterLink *link, AVFrame *frame)
 {
     AVFilterContext *ctx = link->dst;
     JoinContext       *s = ctx->priv;
-    int i;
+    int i, j;
 
     for (i = 0; i < ctx->nb_inputs; i++)
         if (link == ctx->inputs[i])
@@ -91,7 +93,17 @@ static int filter_frame(AVFilterLink *link, AVFrame *frame)
     av_assert0(!s->input_frames[i]);
     s->input_frames[i] = frame;
 
-    return 0;
+    /* request the same number of samples on all inputs */
+    /* FIXME that means a frame arriving asynchronously on a different input
+       will not have the requested number of samples */
+    if (i == 0) {
+        int nb_samples = s->input_frames[0]->nb_samples;
+
+        for (j = 1; !i && j < ctx->nb_inputs; j++)
+            ctx->inputs[j]->request_samples = nb_samples;
+    }
+
+    return try_push_frame(ctx);
 }
 
 static int parse_maps(AVFilterContext *ctx)
@@ -245,20 +257,21 @@ static int join_query_formats(AVFilterContext *ctx)
 {
     JoinContext *s = ctx->priv;
     AVFilterChannelLayouts *layouts = NULL;
-    int i;
+    int i, ret;
 
-    ff_add_channel_layout(&layouts, s->channel_layout);
-    ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts);
+    if ((ret = ff_add_channel_layout(&layouts, s->channel_layout)) < 0 ||
+        (ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0)
+        return ret;
 
     for (i = 0; i < ctx->nb_inputs; i++) {
         layouts = ff_all_channel_layouts();
-        if (!layouts)
-            return AVERROR(ENOMEM);
-        ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts);
+        if ((ret = ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts)) < 0)
+            return ret;
     }
 
-    ff_set_common_formats    (ctx, ff_planar_sample_fmts());
-    ff_set_common_samplerates(ctx, ff_all_samplerates());
+    if ((ret = ff_set_common_formats(ctx, ff_planar_sample_fmts())) < 0 ||
+        (ret = ff_set_common_samplerates(ctx, ff_all_samplerates())) < 0)
+        return ret;
 
     return 0;
 }
@@ -386,27 +399,31 @@ static int join_request_frame(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     JoinContext *s       = ctx->priv;
-    AVFrame *frame;
-    int linesize   = INT_MAX;
-    int nb_samples = 0;
-    int nb_buffers = 0;
-    int i, j, ret;
+    int i;
 
     /* get a frame on each input */
     for (i = 0; i < ctx->nb_inputs; i++) {
         AVFilterLink *inlink = ctx->inputs[i];
+        if (!s->input_frames[i])
+            return ff_request_frame(inlink);
+    }
+    return 0;
+}
 
-        if (!s->input_frames[i] &&
-            (ret = ff_request_frame(inlink)) < 0)
-            return ret;
-
-        /* request the same number of samples on all inputs */
-        if (i == 0) {
-            nb_samples = s->input_frames[0]->nb_samples;
+static int try_push_frame(AVFilterContext *ctx)
+{
+    AVFilterLink *outlink = ctx->outputs[0];
+    JoinContext *s       = ctx->priv;
+    AVFrame *frame;
+    int linesize   = INT_MAX;
+    int nb_samples = INT_MAX;
+    int nb_buffers = 0;
+    int i, j, ret;
 
-            for (j = 1; !i && j < ctx->nb_inputs; j++)
-                ctx->inputs[j]->request_samples = nb_samples;
-        }
+    for (i = 0; i < ctx->nb_inputs; i++) {
+        if (!s->input_frames[i])
+            return 0;
+        nb_samples = FFMIN(nb_samples, s->input_frames[i]->nb_samples);
     }
 
     /* setup the output frame */
diff --git a/libavfilter/af_ladspa.c b/libavfilter/af_ladspa.c
index 2057e6d0..5532dacd 100644
--- a/libavfilter/af_ladspa.c
+++ b/libavfilter/af_ladspa.c
@@ -26,6 +26,7 @@
 
 #include <dlfcn.h>
 #include <ladspa.h>
+#include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
@@ -142,7 +143,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     AVFilterContext *ctx = inlink->dst;
     LADSPAContext *s = ctx->priv;
     AVFrame *out;
-    int i, h;
+    int i, h, p;
+
+    av_assert0(in->channels == (s->nb_inputs * s->nb_handles));
 
     if (!s->nb_outputs ||
         (av_frame_is_writable(in) && s->nb_inputs == s->nb_outputs &&
@@ -157,15 +160,19 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         av_frame_copy_props(out, in);
     }
 
+    av_assert0(!s->nb_outputs || out->channels == (s->nb_outputs * s->nb_handles));
+
     for (h = 0; h < s->nb_handles; h++) {
         for (i = 0; i < s->nb_inputs; i++) {
+            p = s->nb_handles > 1 ? h : i;
             s->desc->connect_port(s->handles[h], s->ipmap[i],
-                                  (LADSPA_Data*)in->extended_data[i]);
+                                  (LADSPA_Data*)in->extended_data[p]);
         }
 
         for (i = 0; i < s->nb_outputs; i++) {
+            p = s->nb_handles > 1 ? h : i;
             s->desc->connect_port(s->handles[h], s->opmap[i],
-                                  (LADSPA_Data*)out->extended_data[i]);
+                                  (LADSPA_Data*)out->extended_data[p]);
         }
 
         s->desc->run(s->handles[h], in->nb_samples);
@@ -296,6 +303,7 @@ static int config_input(AVFilterLink *inlink)
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
+    LADSPAContext *s = ctx->priv;
     int ret;
 
     if (ctx->nb_inputs) {
@@ -303,6 +311,10 @@ static int config_output(AVFilterLink *outlink)
 
         outlink->format      = inlink->format;
         outlink->sample_rate = inlink->sample_rate;
+        if (s->nb_inputs == s->nb_outputs) {
+            outlink->channel_layout = inlink->channel_layout;
+            outlink->channels = inlink->channels;
+        }
 
         ret = 0;
     } else {
@@ -392,7 +404,7 @@ static av_cold int init(AVFilterContext *ctx)
     AVFilterPad pad = { NULL };
     char *p, *arg, *saveptr = NULL;
     unsigned long nb_ports;
-    int i;
+    int i, j = 0;
 
     if (!s->dl_name) {
         av_log(ctx, AV_LOG_ERROR, "No plugin name provided\n");
@@ -534,13 +546,16 @@ static av_cold int init(AVFilterContext *ctx)
         LADSPA_Data val;
         int ret;
 
-        if (!(arg = av_strtok(p, "|", &saveptr)))
+        if (!(arg = av_strtok(p, " |", &saveptr)))
             break;
         p = NULL;
 
         if (sscanf(arg, "c%d=%f", &i, &val) != 2) {
-            av_log(ctx, AV_LOG_ERROR, "Invalid syntax.\n");
-            return AVERROR(EINVAL);
+            if (sscanf(arg, "%f", &val) != 1) {
+                av_log(ctx, AV_LOG_ERROR, "Invalid syntax.\n");
+                return AVERROR(EINVAL);
+            }
+            i = j++;
         }
 
         if ((ret = set_control(ctx, i, val)) < 0)
@@ -588,52 +603,80 @@ static int query_formats(AVFilterContext *ctx)
     AVFilterChannelLayouts *layouts;
     static const enum AVSampleFormat sample_fmts[] = {
         AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
+    int ret;
 
     formats = ff_make_format_list(sample_fmts);
     if (!formats)
         return AVERROR(ENOMEM);
-    ff_set_common_formats(ctx, formats);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
 
     if (s->nb_inputs) {
         formats = ff_all_samplerates();
         if (!formats)
             return AVERROR(ENOMEM);
 
-        ff_set_common_samplerates(ctx, formats);
+        ret = ff_set_common_samplerates(ctx, formats);
+        if (ret < 0)
+            return ret;
     } else {
         int sample_rates[] = { s->sample_rate, -1 };
 
-        ff_set_common_samplerates(ctx, ff_make_format_list(sample_rates));
+        ret = ff_set_common_samplerates(ctx, ff_make_format_list(sample_rates));
+        if (ret < 0)
+            return ret;
     }
 
     if (s->nb_inputs == 1 && s->nb_outputs == 1) {
         // We will instantiate multiple LADSPA_Handle, one over each channel
-        layouts = ff_all_channel_layouts();
+        layouts = ff_all_channel_counts();
         if (!layouts)
             return AVERROR(ENOMEM);
 
-        ff_set_common_channel_layouts(ctx, layouts);
+        ret = ff_set_common_channel_layouts(ctx, layouts);
+        if (ret < 0)
+            return ret;
+    } else if (s->nb_inputs == 2 && s->nb_outputs == 2) {
+        layouts = NULL;
+        ret = ff_add_channel_layout(&layouts, AV_CH_LAYOUT_STEREO);
+        if (ret < 0)
+            return ret;
+        ret = ff_set_common_channel_layouts(ctx, layouts);
+        if (ret < 0)
+            return ret;
     } else {
         AVFilterLink *outlink = ctx->outputs[0];
 
         if (s->nb_inputs >= 1) {
             AVFilterLink *inlink = ctx->inputs[0];
-            int64_t inlayout = FF_COUNT2LAYOUT(s->nb_inputs);
+            uint64_t inlayout = FF_COUNT2LAYOUT(s->nb_inputs);
 
             layouts = NULL;
-            ff_add_channel_layout(&layouts, inlayout);
-            ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
-
-            if (!s->nb_outputs)
-                ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts);
+            ret = ff_add_channel_layout(&layouts, inlayout);
+            if (ret < 0)
+                return ret;
+            ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
+            if (ret < 0)
+                return ret;
+
+            if (!s->nb_outputs) {
+                ret = ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts);
+                if (ret < 0)
+                    return ret;
+            }
         }
 
         if (s->nb_outputs >= 1) {
-            int64_t outlayout = FF_COUNT2LAYOUT(s->nb_outputs);
+            uint64_t outlayout = FF_COUNT2LAYOUT(s->nb_outputs);
 
             layouts = NULL;
-            ff_add_channel_layout(&layouts, outlayout);
-            ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts);
+            ret = ff_add_channel_layout(&layouts, outlayout);
+            if (ret < 0)
+                return ret;
+            ret = ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts);
+            if (ret < 0)
+                return ret;
         }
     }
 
diff --git a/libavfilter/af_pan.c b/libavfilter/af_pan.c
index 4ba77a73..1eb102c1 100644
--- a/libavfilter/af_pan.c
+++ b/libavfilter/af_pan.c
@@ -37,7 +37,7 @@
 #include "formats.h"
 #include "internal.h"
 
-#define MAX_CHANNELS 63
+#define MAX_CHANNELS 64
 
 typedef struct PanContext {
     const AVClass *class;
@@ -227,27 +227,29 @@ static int query_formats(AVFilterContext *ctx)
     AVFilterLink *outlink = ctx->outputs[0];
     AVFilterFormats *formats = NULL;
     AVFilterChannelLayouts *layouts;
+    int ret;
 
     pan->pure_gains = are_gains_pure(pan);
     /* libswr supports any sample and packing formats */
-    ff_set_common_formats(ctx, ff_all_formats(AVMEDIA_TYPE_AUDIO));
+    if ((ret = ff_set_common_formats(ctx, ff_all_formats(AVMEDIA_TYPE_AUDIO))) < 0)
+        return ret;
 
     formats = ff_all_samplerates();
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_set_common_samplerates(ctx, formats);
+    if ((ret = ff_set_common_samplerates(ctx, formats)) < 0)
+        return ret;
 
     // inlink supports any channel layout
     layouts = ff_all_channel_counts();
-    ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
+    if ((ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts)) < 0)
+        return ret;
 
     // outlink supports only requested output channel layout
     layouts = NULL;
-    ff_add_channel_layout(&layouts,
+    if ((ret = ff_add_channel_layout(&layouts,
                           pan->out_channel_layout ? pan->out_channel_layout :
-                          FF_COUNT2LAYOUT(pan->nb_output_channels));
-    ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts);
-    return 0;
+                          FF_COUNT2LAYOUT(pan->nb_output_channels))) < 0)
+        return ret;
+    return ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts);
 }
 
 static int config_props(AVFilterLink *link)
diff --git a/libavfilter/af_replaygain.c b/libavfilter/af_replaygain.c
index c4198578..c8f6f966 100644
--- a/libavfilter/af_replaygain.c
+++ b/libavfilter/af_replaygain.c
@@ -323,19 +323,21 @@ static int query_formats(AVFilterContext *ctx)
 {
     AVFilterFormats *formats = NULL;
     AVFilterChannelLayouts *layout = NULL;
-    int i;
+    int i, ret;
 
-    ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
-    ff_set_common_formats(ctx, formats);
-    ff_add_channel_layout(&layout, AV_CH_LAYOUT_STEREO);
-    ff_set_common_channel_layouts(ctx, layout);
+    if ((ret = ff_add_format                 (&formats, AV_SAMPLE_FMT_FLT  )) < 0 ||
+        (ret = ff_set_common_formats         (ctx     , formats            )) < 0 ||
+        (ret = ff_add_channel_layout         (&layout , AV_CH_LAYOUT_STEREO)) < 0 ||
+        (ret = ff_set_common_channel_layouts (ctx     , layout             )) < 0)
+        return ret;
 
     formats = NULL;
-    for (i = 0; i < FF_ARRAY_ELEMS(freqinfos); i++)
-        ff_add_format(&formats, freqinfos[i].sample_rate);
-    ff_set_common_samplerates(ctx, formats);
+    for (i = 0; i < FF_ARRAY_ELEMS(freqinfos); i++) {
+        if ((ret = ff_add_format(&formats, freqinfos[i].sample_rate)) < 0)
+            return ret;
+    }
 
-    return 0;
+    return ff_set_common_samplerates(ctx, formats);
 }
 
 static int config_input(AVFilterLink *inlink)
diff --git a/libavfilter/af_resample.c b/libavfilter/af_resample.c
index d65d4bc6..e3c6a206 100644
--- a/libavfilter/af_resample.c
+++ b/libavfilter/af_resample.c
@@ -40,6 +40,7 @@ typedef struct ResampleContext {
     AVAudioResampleContext *avr;
     AVDictionary *options;
 
+    int resampling;
     int64_t next_pts;
     int64_t next_in_pts;
 
@@ -89,22 +90,25 @@ static int query_formats(AVFilterContext *ctx)
 {
     AVFilterLink *inlink  = ctx->inputs[0];
     AVFilterLink *outlink = ctx->outputs[0];
+    AVFilterFormats *in_formats, *out_formats, *in_samplerates, *out_samplerates;
+    AVFilterChannelLayouts *in_layouts, *out_layouts;
+    int ret;
 
-    AVFilterFormats        *in_formats      = ff_all_formats(AVMEDIA_TYPE_AUDIO);
-    AVFilterFormats        *out_formats     = ff_all_formats(AVMEDIA_TYPE_AUDIO);
-    AVFilterFormats        *in_samplerates  = ff_all_samplerates();
-    AVFilterFormats        *out_samplerates = ff_all_samplerates();
-    AVFilterChannelLayouts *in_layouts      = ff_all_channel_layouts();
-    AVFilterChannelLayouts *out_layouts     = ff_all_channel_layouts();
-
-    ff_formats_ref(in_formats,  &inlink->out_formats);
-    ff_formats_ref(out_formats, &outlink->in_formats);
-
-    ff_formats_ref(in_samplerates,  &inlink->out_samplerates);
-    ff_formats_ref(out_samplerates, &outlink->in_samplerates);
+    if (!(in_formats      = ff_all_formats         (AVMEDIA_TYPE_AUDIO)) ||
+        !(out_formats     = ff_all_formats         (AVMEDIA_TYPE_AUDIO)) ||
+        !(in_samplerates  = ff_all_samplerates     (                  )) ||
+        !(out_samplerates = ff_all_samplerates     (                  )) ||
+        !(in_layouts      = ff_all_channel_layouts (                  )) ||
+        !(out_layouts     = ff_all_channel_layouts (                  )))
+        return AVERROR(ENOMEM);
 
-    ff_channel_layouts_ref(in_layouts,  &inlink->out_channel_layouts);
-    ff_channel_layouts_ref(out_layouts, &outlink->in_channel_layouts);
+    if ((ret = ff_formats_ref         (in_formats,      &inlink->out_formats        )) < 0 ||
+        (ret = ff_formats_ref         (out_formats,     &outlink->in_formats        )) < 0 ||
+        (ret = ff_formats_ref         (in_samplerates,  &inlink->out_samplerates    )) < 0 ||
+        (ret = ff_formats_ref         (out_samplerates, &outlink->in_samplerates    )) < 0 ||
+        (ret = ff_channel_layouts_ref (in_layouts,      &inlink->out_channel_layouts)) < 0 ||
+        (ret = ff_channel_layouts_ref (out_layouts,     &outlink->in_channel_layouts)) < 0)
+        return ret;
 
     return 0;
 }
@@ -117,6 +121,8 @@ static int config_output(AVFilterLink *outlink)
     char buf1[64], buf2[64];
     int ret;
 
+    int64_t resampling_forced;
+
     if (s->avr) {
         avresample_close(s->avr);
         avresample_free(&s->avr);
@@ -155,9 +161,15 @@ static int config_output(AVFilterLink *outlink)
     if ((ret = avresample_open(s->avr)) < 0)
         return ret;
 
-    outlink->time_base = (AVRational){ 1, outlink->sample_rate };
-    s->next_pts        = AV_NOPTS_VALUE;
-    s->next_in_pts     = AV_NOPTS_VALUE;
+    av_opt_get_int(s->avr, "force_resampling", 0, &resampling_forced);
+    s->resampling = resampling_forced || (inlink->sample_rate != outlink->sample_rate);
+
+    if (s->resampling) {
+        outlink->time_base = (AVRational){ 1, outlink->sample_rate };
+        s->next_pts        = AV_NOPTS_VALUE;
+        s->next_in_pts     = AV_NOPTS_VALUE;
+    } else
+        outlink->time_base = inlink->time_base;
 
     av_get_channel_layout_string(buf1, sizeof(buf1),
                                  -1, inlink ->channel_layout);
@@ -201,6 +213,7 @@ static int request_frame(AVFilterLink *outlink)
             return (ret == 0) ? AVERROR_EOF : ret;
         }
 
+        frame->nb_samples = ret;
         frame->pts = s->next_pts;
         return ff_filter_frame(outlink, frame);
     }
@@ -239,7 +252,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
         av_assert0(!avresample_available(s->avr));
 
-        if (s->next_pts == AV_NOPTS_VALUE) {
+        if (s->resampling && s->next_pts == AV_NOPTS_VALUE) {
             if (in->pts == AV_NOPTS_VALUE) {
                 av_log(ctx, AV_LOG_WARNING, "First timestamp is missing, "
                        "assuming 0.\n");
@@ -258,22 +271,25 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                 goto fail;
             }
 
-            out->sample_rate = outlink->sample_rate;
-            /* Only convert in->pts if there is a discontinuous jump.
-               This ensures that out->pts tracks the number of samples actually
-               output by the resampler in the absence of such a jump.
-               Otherwise, the rounding in av_rescale_q() and av_rescale()
-               causes off-by-1 errors. */
-            if (in->pts != AV_NOPTS_VALUE && in->pts != s->next_in_pts) {
-                out->pts = av_rescale_q(in->pts, inlink->time_base,
-                                            outlink->time_base) -
-                               av_rescale(delay, outlink->sample_rate,
-                                          inlink->sample_rate);
+            if (s->resampling) {
+                out->sample_rate = outlink->sample_rate;
+                /* Only convert in->pts if there is a discontinuous jump.
+                   This ensures that out->pts tracks the number of samples actually
+                   output by the resampler in the absence of such a jump.
+                   Otherwise, the rounding in av_rescale_q() and av_rescale()
+                   causes off-by-1 errors. */
+                if (in->pts != AV_NOPTS_VALUE && in->pts != s->next_in_pts) {
+                    out->pts = av_rescale_q(in->pts, inlink->time_base,
+                                                outlink->time_base) -
+                                   av_rescale(delay, outlink->sample_rate,
+                                              inlink->sample_rate);
+                } else
+                    out->pts = s->next_pts;
+
+                s->next_pts = out->pts + out->nb_samples;
+                s->next_in_pts = in->pts + in->nb_samples;
             } else
-                out->pts = s->next_pts;
-
-            s->next_pts = out->pts + out->nb_samples;
-            s->next_in_pts = in->pts + in->nb_samples;
+                out->pts = in->pts;
 
             ret = ff_filter_frame(outlink, out);
             s->got_output = 1;
diff --git a/libavfilter/af_rubberband.c b/libavfilter/af_rubberband.c
new file mode 100644
index 00000000..ded25449
--- /dev/null
+++ b/libavfilter/af_rubberband.c
@@ -0,0 +1,271 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <rubberband/rubberband-c.h>
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+
+typedef struct RubberBandContext {
+    const AVClass *class;
+    RubberBandState rbs;
+
+    double tempo, pitch;
+    int transients, detector, phase, window,
+        smoothing, formant, opitch, channels;
+    int64_t nb_samples_out;
+    int64_t nb_samples_in;
+    int flushed;
+} RubberBandContext;
+
+#define OFFSET(x) offsetof(RubberBandContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption rubberband_options[] = {
+    { "tempo",      "set tempo scale factor", OFFSET(tempo), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.01, 100, A },
+    { "pitch",      "set pitch scale factor", OFFSET(pitch), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 0.01, 100, A },
+    { "transients", "set transients", OFFSET(transients), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "transients" },
+        { "crisp",  0,                0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsCrisp},  0, 0, A, "transients" },
+        { "mixed",  0,                0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsMixed},  0, 0, A, "transients" },
+        { "smooth", 0,                0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionTransientsSmooth}, 0, 0, A, "transients" },
+    { "detector",   "set detector",   OFFSET(detector),   AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "detector" },
+        { "compound",   0,            0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorCompound},   0, 0, A, "detector" },
+        { "percussive", 0,            0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorPercussive}, 0, 0, A, "detector" },
+        { "soft",       0,            0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionDetectorSoft},       0, 0, A, "detector" },
+    { "phase",      "set phase",      OFFSET(phase),      AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "phase" },
+        { "laminar",     0,           0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPhaseLaminar},     0, 0, A, "phase" },
+        { "independent", 0,           0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPhaseIndependent}, 0, 0, A, "phase" },
+    { "window",     "set window",     OFFSET(window),     AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "window" },
+        { "standard", 0,              0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowStandard}, 0, 0, A, "window" },
+        { "short",    0,              0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowShort},    0, 0, A, "window" },
+        { "long",     0,              0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionWindowLong},     0, 0, A, "window" },
+    { "smoothing",  "set smoothing",  OFFSET(smoothing),  AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "smoothing" },
+        { "off",    0,                0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionSmoothingOff}, 0, 0, A, "smoothing" },
+        { "on",     0,                0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionSmoothingOn},  0, 0, A, "smoothing" },
+    { "formant",    "set formant",    OFFSET(formant),    AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "formant" },
+        { "shifted",    0,            0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionFormantShifted},   0, 0, A, "formant" },
+        { "preserved",  0,            0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionFormantPreserved}, 0, 0, A, "formant" },
+    { "pitchq",     "set pitch quality", OFFSET(opitch),  AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "pitch" },
+        { "quality",     0,           0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighQuality},     0, 0, A, "pitch" },
+        { "speed",       0,           0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighSpeed},       0, 0, A, "pitch" },
+        { "consistency", 0,           0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionPitchHighConsistency}, 0, 0, A, "pitch" },
+    { "channels",   "set channels",   OFFSET(channels),   AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, A, "channels" },
+        { "apart",    0,              0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionChannelsApart},    0, 0, A, "channels" },
+        { "together", 0,              0,                  AV_OPT_TYPE_CONST, {.i64=RubberBandOptionChannelsTogether}, 0, 0, A, "channels" },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(rubberband);
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    RubberBandContext *s = ctx->priv;
+
+    if (s->rbs)
+        rubberband_delete(s->rbs);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_FLTP,
+        AV_SAMPLE_FMT_NONE,
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    RubberBandContext *s = inlink->dst->priv;
+    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFrame *out;
+    int ret = 0, nb_samples;
+
+    rubberband_process(s->rbs, (const float *const *)in->data, in->nb_samples, 0);
+    s->nb_samples_in += in->nb_samples;
+
+    nb_samples = rubberband_available(s->rbs);
+    if (nb_samples > 0) {
+        out = ff_get_audio_buffer(inlink, nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        out->pts = av_rescale_q(s->nb_samples_out,
+                     (AVRational){ 1, outlink->sample_rate },
+                     outlink->time_base);
+        nb_samples = rubberband_retrieve(s->rbs, (float *const *)out->data, nb_samples);
+        out->nb_samples = nb_samples;
+        ret = ff_filter_frame(outlink, out);
+        s->nb_samples_out += nb_samples;
+    }
+
+    av_frame_free(&in);
+    return ret;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    RubberBandContext *s = ctx->priv;
+    int opts = s->transients|s->detector|s->phase|s->window|
+               s->smoothing|s->formant|s->opitch|s->channels|
+               RubberBandOptionProcessRealTime;
+
+    if (s->rbs)
+        rubberband_delete(s->rbs);
+    s->rbs = rubberband_new(inlink->sample_rate, inlink->channels, opts, 1. / s->tempo, s->pitch);
+
+    inlink->partial_buf_size =
+    inlink->min_samples =
+    inlink->max_samples = rubberband_get_samples_required(s->rbs);
+
+    return 0;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    RubberBandContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    int ret = 0;
+
+    ret = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && !s->flushed) {
+        if (rubberband_available(s->rbs) > 0) {
+            AVFrame *out = ff_get_audio_buffer(inlink, 1);
+            int nb_samples;
+
+            if (!out)
+                return AVERROR(ENOMEM);
+
+            rubberband_process(s->rbs, (const float *const *)out->data, 1, 1);
+            av_frame_free(&out);
+            nb_samples = rubberband_available(s->rbs);
+
+            if (nb_samples > 0) {
+                out = ff_get_audio_buffer(inlink, nb_samples);
+                if (!out)
+                    return AVERROR(ENOMEM);
+                out->pts = av_rescale_q(s->nb_samples_out,
+                             (AVRational){ 1, outlink->sample_rate },
+                             outlink->time_base);
+                nb_samples = rubberband_retrieve(s->rbs, (float *const *)out->data, nb_samples);
+                out->nb_samples = nb_samples;
+                ret = ff_filter_frame(outlink, out);
+                s->nb_samples_out += nb_samples;
+            }
+        }
+        s->flushed = 1;
+        av_log(ctx, AV_LOG_DEBUG, "nb_samples_in %"PRId64" nb_samples_out %"PRId64"\n",
+                                   s->nb_samples_in, s->nb_samples_out);
+    }
+
+    return ret;
+}
+
+static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
+                           char *res, int res_len, int flags)
+{
+    RubberBandContext *s = ctx->priv;
+
+    if (!strcmp(cmd, "tempo")) {
+        double arg;
+
+        sscanf(args, "%lf", &arg);
+        if (arg < 0.01 || arg > 100) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Tempo scale factor '%f' out of range\n", arg);
+            return AVERROR(EINVAL);
+        }
+        rubberband_set_time_ratio(s->rbs, 1. / arg);
+    }
+
+    if (!strcmp(cmd, "pitch")) {
+        double arg;
+
+        sscanf(args, "%lf", &arg);
+        if (arg < 0.01 || arg > 100) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Pitch scale factor '%f' out of range\n", arg);
+            return AVERROR(EINVAL);
+        }
+        rubberband_set_pitch_scale(s->rbs, arg);
+    }
+
+    return 0;
+}
+
+static const AVFilterPad rubberband_inputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .config_props  = config_input,
+        .filter_frame  = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad rubberband_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_rubberband = {
+    .name          = "rubberband",
+    .description   = NULL_IF_CONFIG_SMALL("Apply time-stretching and pitch-shifting."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(RubberBandContext),
+    .priv_class    = &rubberband_class,
+    .uninit        = uninit,
+    .inputs        = rubberband_inputs,
+    .outputs       = rubberband_outputs,
+    .process_command = process_command,
+};
diff --git a/libavfilter/af_sidechaincompress.c b/libavfilter/af_sidechaincompress.c
new file mode 100644
index 00000000..3f540e2d
--- /dev/null
+++ b/libavfilter/af_sidechaincompress.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright (C) 2001-2010 Krzysztof Foltman, Markus Schmidt, Thor Harald Johansen and others
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Audio (Sidechain) Compressor filter
+ */
+
+#include "libavutil/audio_fifo.h"
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "audio.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "hermite.h"
+#include "internal.h"
+
+typedef struct SidechainCompressContext {
+    const AVClass *class;
+
+    double level_in;
+    double level_sc;
+    double attack, attack_coeff;
+    double release, release_coeff;
+    double lin_slope;
+    double ratio;
+    double threshold;
+    double makeup;
+    double mix;
+    double thres;
+    double knee;
+    double knee_start;
+    double knee_stop;
+    double lin_knee_start;
+    double adj_knee_start;
+    double compressed_knee_stop;
+    int link;
+    int detection;
+
+    AVAudioFifo *fifo[2];
+    int64_t pts;
+} SidechainCompressContext;
+
+#define OFFSET(x) offsetof(SidechainCompressContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM
+#define F AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption options[] = {
+    { "level_in",  "set input gain",     OFFSET(level_in),  AV_OPT_TYPE_DOUBLE, {.dbl=1},        0.015625,   64, A|F },
+    { "threshold", "set threshold",      OFFSET(threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0.125}, 0.000976563,    1, A|F },
+    { "ratio",     "set ratio",          OFFSET(ratio),     AV_OPT_TYPE_DOUBLE, {.dbl=2},               1,   20, A|F },
+    { "attack",    "set attack",         OFFSET(attack),    AV_OPT_TYPE_DOUBLE, {.dbl=20},           0.01, 2000, A|F },
+    { "release",   "set release",        OFFSET(release),   AV_OPT_TYPE_DOUBLE, {.dbl=250},          0.01, 9000, A|F },
+    { "makeup",    "set make up gain",   OFFSET(makeup),    AV_OPT_TYPE_DOUBLE, {.dbl=2},               1,   64, A|F },
+    { "knee",      "set knee",           OFFSET(knee),      AV_OPT_TYPE_DOUBLE, {.dbl=2.82843},         1,    8, A|F },
+    { "link",      "set link type",      OFFSET(link),      AV_OPT_TYPE_INT,    {.i64=0},               0,    1, A|F, "link" },
+    {   "average", 0,                    0,                 AV_OPT_TYPE_CONST,  {.i64=0},               0,    0, A|F, "link" },
+    {   "maximum", 0,                    0,                 AV_OPT_TYPE_CONST,  {.i64=1},               0,    0, A|F, "link" },
+    { "detection", "set detection",      OFFSET(detection), AV_OPT_TYPE_INT,    {.i64=1},               0,    1, A|F, "detection" },
+    {   "peak",    0,                    0,                 AV_OPT_TYPE_CONST,  {.i64=0},               0,    0, A|F, "detection" },
+    {   "rms",     0,                    0,                 AV_OPT_TYPE_CONST,  {.i64=1},               0,    0, A|F, "detection" },
+    { "level_sc",  "set sidechain gain", OFFSET(level_sc),  AV_OPT_TYPE_DOUBLE, {.dbl=1},        0.015625,   64, A|F },
+    { "mix",       "set mix",            OFFSET(mix),       AV_OPT_TYPE_DOUBLE, {.dbl=1},               0,    1, A|F },
+    { NULL }
+};
+
+#define sidechaincompress_options options
+AVFILTER_DEFINE_CLASS(sidechaincompress);
+
+// A fake infinity value (because real infinity may break some hosts)
+#define FAKE_INFINITY (65536.0 * 65536.0)
+
+// Check for infinity (with appropriate-ish tolerance)
+#define IS_FAKE_INFINITY(value) (fabs(value-FAKE_INFINITY) < 1.0)
+
+static double output_gain(double lin_slope, double ratio, double thres,
+                          double knee, double knee_start, double knee_stop,
+                          double compressed_knee_stop, int detection)
+{
+    double slope = log(lin_slope);
+    double gain = 0.0;
+    double delta = 0.0;
+
+    if (detection)
+        slope *= 0.5;
+
+    if (IS_FAKE_INFINITY(ratio)) {
+        gain = thres;
+        delta = 0.0;
+    } else {
+        gain = (slope - thres) / ratio + thres;
+        delta = 1.0 / ratio;
+    }
+
+    if (knee > 1.0 && slope < knee_stop)
+        gain = hermite_interpolation(slope, knee_start, knee_stop,
+                                     knee_start, compressed_knee_stop,
+                                     1.0, delta);
+
+    return exp(gain - slope);
+}
+
+static int compressor_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    SidechainCompressContext *s = ctx->priv;
+
+    s->thres = log(s->threshold);
+    s->lin_knee_start = s->threshold / sqrt(s->knee);
+    s->adj_knee_start = s->lin_knee_start * s->lin_knee_start;
+    s->knee_start = log(s->lin_knee_start);
+    s->knee_stop = log(s->threshold * sqrt(s->knee));
+    s->compressed_knee_stop = (s->knee_stop - s->thres) / s->ratio + s->thres;
+
+    s->attack_coeff = FFMIN(1., 1. / (s->attack * outlink->sample_rate / 4000.));
+    s->release_coeff = FFMIN(1., 1. / (s->release * outlink->sample_rate / 4000.));
+
+    return 0;
+}
+
+static void compressor(SidechainCompressContext *s,
+                       const double *src, double *dst, const double *scsrc, int nb_samples,
+                       double level_in, double level_sc,
+                       AVFilterLink *inlink, AVFilterLink *sclink)
+{
+    const double makeup = s->makeup;
+    const double mix = s->mix;
+    int i, c;
+
+    for (i = 0; i < nb_samples; i++) {
+        double abs_sample, gain = 1.0;
+
+        abs_sample = fabs(scsrc[0] * level_sc);
+
+        if (s->link == 1) {
+            for (c = 1; c < sclink->channels; c++)
+                abs_sample = FFMAX(fabs(scsrc[c] * level_sc), abs_sample);
+        } else {
+            for (c = 1; c < sclink->channels; c++)
+                abs_sample += fabs(scsrc[c] * level_sc);
+
+            abs_sample /= sclink->channels;
+        }
+
+        if (s->detection)
+            abs_sample *= abs_sample;
+
+        s->lin_slope += (abs_sample - s->lin_slope) * (abs_sample > s->lin_slope ? s->attack_coeff : s->release_coeff);
+
+        if (s->lin_slope > 0.0 && s->lin_slope > (s->detection ? s->adj_knee_start : s->lin_knee_start))
+            gain = output_gain(s->lin_slope, s->ratio, s->thres, s->knee,
+                               s->knee_start, s->knee_stop,
+                               s->compressed_knee_stop, s->detection);
+
+        for (c = 0; c < inlink->channels; c++)
+            dst[c] = src[c] * level_in * (gain * makeup * mix + (1. - mix));
+
+        src += inlink->channels;
+        dst += inlink->channels;
+        scsrc += sclink->channels;
+    }
+}
+
+#if CONFIG_SIDECHAINCOMPRESS_FILTER
+static int filter_frame(AVFilterLink *link, AVFrame *frame)
+{
+    AVFilterContext *ctx = link->dst;
+    SidechainCompressContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out = NULL, *in[2] = { NULL };
+    double *dst;
+    int nb_samples;
+    int i;
+
+    for (i = 0; i < 2; i++)
+        if (link == ctx->inputs[i])
+            break;
+    av_assert0(i < 2);
+    av_audio_fifo_write(s->fifo[i], (void **)frame->extended_data,
+                        frame->nb_samples);
+    av_frame_free(&frame);
+
+    nb_samples = FFMIN(av_audio_fifo_size(s->fifo[0]), av_audio_fifo_size(s->fifo[1]));
+    if (!nb_samples)
+        return 0;
+
+    out = ff_get_audio_buffer(outlink, nb_samples);
+    if (!out)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < 2; i++) {
+        in[i] = ff_get_audio_buffer(ctx->inputs[i], nb_samples);
+        if (!in[i]) {
+            av_frame_free(&in[0]);
+            av_frame_free(&in[1]);
+            av_frame_free(&out);
+            return AVERROR(ENOMEM);
+        }
+        av_audio_fifo_read(s->fifo[i], (void **)in[i]->data, nb_samples);
+    }
+
+    dst = (double *)out->data[0];
+    out->pts = s->pts;
+    s->pts += nb_samples;
+
+    compressor(s, (double *)in[0]->data[0], dst,
+               (double *)in[1]->data[0], nb_samples,
+               s->level_in, s->level_sc,
+               ctx->inputs[0], ctx->inputs[1]);
+
+    av_frame_free(&in[0]);
+    av_frame_free(&in[1]);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    SidechainCompressContext *s = ctx->priv;
+    int i;
+
+    /* get a frame on each input */
+    for (i = 0; i < 2; i++) {
+        AVFilterLink *inlink = ctx->inputs[i];
+        if (!av_audio_fifo_size(s->fifo[i]))
+            return ff_request_frame(inlink);
+    }
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts = NULL;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBL,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret, i;
+
+    if (!ctx->inputs[0]->in_channel_layouts ||
+        !ctx->inputs[0]->in_channel_layouts->nb_channel_layouts) {
+        av_log(ctx, AV_LOG_WARNING,
+               "No channel layout for input 1\n");
+            return AVERROR(EAGAIN);
+    }
+
+    if ((ret = ff_add_channel_layout(&layouts, ctx->inputs[0]->in_channel_layouts->channel_layouts[0])) < 0 ||
+        (ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts)) < 0)
+        return ret;
+
+    for (i = 0; i < 2; i++) {
+        layouts = ff_all_channel_counts();
+        if ((ret = ff_channel_layouts_ref(layouts, &ctx->inputs[i]->out_channel_layouts)) < 0)
+            return ret;
+    }
+
+    formats = ff_make_format_list(sample_fmts);
+    if ((ret = ff_set_common_formats(ctx, formats)) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    SidechainCompressContext *s = ctx->priv;
+
+    if (ctx->inputs[0]->sample_rate != ctx->inputs[1]->sample_rate) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Inputs must have the same sample rate "
+               "%d for in0 vs %d for in1\n",
+               ctx->inputs[0]->sample_rate, ctx->inputs[1]->sample_rate);
+        return AVERROR(EINVAL);
+    }
+
+    outlink->sample_rate = ctx->inputs[0]->sample_rate;
+    outlink->time_base   = ctx->inputs[0]->time_base;
+    outlink->channel_layout = ctx->inputs[0]->channel_layout;
+    outlink->channels = ctx->inputs[0]->channels;
+
+    s->fifo[0] = av_audio_fifo_alloc(ctx->inputs[0]->format, ctx->inputs[0]->channels, 1024);
+    s->fifo[1] = av_audio_fifo_alloc(ctx->inputs[1]->format, ctx->inputs[1]->channels, 1024);
+    if (!s->fifo[0] || !s->fifo[1])
+        return AVERROR(ENOMEM);
+
+    compressor_config_output(outlink);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SidechainCompressContext *s = ctx->priv;
+
+    av_audio_fifo_free(s->fifo[0]);
+    av_audio_fifo_free(s->fifo[1]);
+}
+
+static const AVFilterPad sidechaincompress_inputs[] = {
+    {
+        .name           = "main",
+        .type           = AVMEDIA_TYPE_AUDIO,
+        .filter_frame   = filter_frame,
+    },{
+        .name           = "sidechain",
+        .type           = AVMEDIA_TYPE_AUDIO,
+        .filter_frame   = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad sidechaincompress_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_sidechaincompress = {
+    .name           = "sidechaincompress",
+    .description    = NULL_IF_CONFIG_SMALL("Sidechain compressor."),
+    .priv_size      = sizeof(SidechainCompressContext),
+    .priv_class     = &sidechaincompress_class,
+    .query_formats  = query_formats,
+    .uninit         = uninit,
+    .inputs         = sidechaincompress_inputs,
+    .outputs        = sidechaincompress_outputs,
+};
+#endif  /* CONFIG_SIDECHAINCOMPRESS_FILTER */
+
+#if CONFIG_ACOMPRESSOR_FILTER
+static int acompressor_filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    const double *src = (const double *)in->data[0];
+    AVFilterContext *ctx = inlink->dst;
+    SidechainCompressContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    double *dst;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+    dst = (double *)out->data[0];
+
+    compressor(s, src, dst, src, in->nb_samples,
+               s->level_in, s->level_in,
+               inlink, inlink);
+
+    if (out != in)
+        av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static int acompressor_query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBL,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+#define acompressor_options options
+AVFILTER_DEFINE_CLASS(acompressor);
+
+static const AVFilterPad acompressor_inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_AUDIO,
+        .filter_frame   = acompressor_filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad acompressor_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .config_props  = compressor_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_acompressor = {
+    .name           = "acompressor",
+    .description    = NULL_IF_CONFIG_SMALL("Audio compressor."),
+    .priv_size      = sizeof(SidechainCompressContext),
+    .priv_class     = &acompressor_class,
+    .query_formats  = acompressor_query_formats,
+    .inputs         = acompressor_inputs,
+    .outputs        = acompressor_outputs,
+};
+#endif  /* CONFIG_ACOMPRESSOR_FILTER */
diff --git a/libavfilter/af_silenceremove.c b/libavfilter/af_silenceremove.c
index cd1e0384..f156d188 100644
--- a/libavfilter/af_silenceremove.c
+++ b/libavfilter/af_silenceremove.c
@@ -61,15 +61,20 @@ typedef struct SilenceRemoveContext {
     size_t stop_holdoff_end;
     int    stop_found_periods;
 
+    double window_ratio;
     double *window;
     double *window_current;
     double *window_end;
     int window_size;
-    double rms_sum;
+    double sum;
 
     int leave_silence;
     int restart;
     int64_t next_pts;
+
+    int detection;
+    void (*update)(struct SilenceRemoveContext *s, double sample);
+    double(*compute)(struct SilenceRemoveContext *s, double sample);
 } SilenceRemoveContext;
 
 #define OFFSET(x) offsetof(SilenceRemoveContext, x)
@@ -81,12 +86,60 @@ static const AVOption silenceremove_options[] = {
     { "stop_periods",    NULL, OFFSET(stop_periods),    AV_OPT_TYPE_INT,      {.i64=0}, -9000,    9000, FLAGS },
     { "stop_duration",   NULL, OFFSET(stop_duration),   AV_OPT_TYPE_DURATION, {.i64=0},     0,    9000, FLAGS },
     { "stop_threshold",  NULL, OFFSET(stop_threshold),  AV_OPT_TYPE_DOUBLE,   {.dbl=0},     0, DBL_MAX, FLAGS },
-    { "leave_silence",   NULL, OFFSET(leave_silence),   AV_OPT_TYPE_INT,      {.i64=0},     0,       1, FLAGS },
+    { "leave_silence",   NULL, OFFSET(leave_silence),   AV_OPT_TYPE_BOOL,     {.i64=0},     0,       1, FLAGS },
+    { "detection",       NULL, OFFSET(detection),       AV_OPT_TYPE_INT,      {.i64=1},     0,       1, FLAGS, "detection" },
+    {   "peak",          0,    0,                       AV_OPT_TYPE_CONST,    {.i64=0},     0,       0, FLAGS, "detection" },
+    {   "rms",           0,    0,                       AV_OPT_TYPE_CONST,    {.i64=1},     0,       0, FLAGS, "detection" },
+    { "window",          NULL, OFFSET(window_ratio),    AV_OPT_TYPE_DOUBLE,   {.dbl=0.02},  0,      10, FLAGS },
     { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(silenceremove);
 
+static double compute_peak(SilenceRemoveContext *s, double sample)
+{
+    double new_sum;
+
+    new_sum  = s->sum;
+    new_sum -= *s->window_current;
+    new_sum += fabs(sample);
+
+    return new_sum / s->window_size;
+}
+
+static void update_peak(SilenceRemoveContext *s, double sample)
+{
+    s->sum -= *s->window_current;
+    *s->window_current = fabs(sample);
+    s->sum += *s->window_current;
+
+    s->window_current++;
+    if (s->window_current >= s->window_end)
+        s->window_current = s->window;
+}
+
+static double compute_rms(SilenceRemoveContext *s, double sample)
+{
+    double new_sum;
+
+    new_sum  = s->sum;
+    new_sum -= *s->window_current;
+    new_sum += sample * sample;
+
+    return sqrt(new_sum / s->window_size);
+}
+
+static void update_rms(SilenceRemoveContext *s, double sample)
+{
+    s->sum -= *s->window_current;
+    *s->window_current = sample * sample;
+    s->sum += *s->window_current;
+
+    s->window_current++;
+    if (s->window_current >= s->window_end)
+        s->window_current = s->window;
+}
+
 static av_cold int init(AVFilterContext *ctx)
 {
     SilenceRemoveContext *s = ctx->priv;
@@ -96,16 +149,27 @@ static av_cold int init(AVFilterContext *ctx)
         s->restart = 1;
     }
 
+    switch (s->detection) {
+    case 0:
+        s->update = update_peak;
+        s->compute = compute_peak;
+        break;
+    case 1:
+        s->update = update_rms;
+        s->compute = compute_rms;
+        break;
+    };
+
     return 0;
 }
 
-static void clear_rms(SilenceRemoveContext *s)
+static void clear_window(SilenceRemoveContext *s)
 {
     memset(s->window, 0, s->window_size * sizeof(*s->window));
 
     s->window_current = s->window;
     s->window_end = s->window + s->window_size;
-    s->rms_sum = 0;
+    s->sum = 0;
 }
 
 static int config_input(AVFilterLink *inlink)
@@ -113,12 +177,12 @@ static int config_input(AVFilterLink *inlink)
     AVFilterContext *ctx = inlink->dst;
     SilenceRemoveContext *s = ctx->priv;
 
-    s->window_size = (inlink->sample_rate / 50) * inlink->channels;
+    s->window_size = FFMAX((inlink->sample_rate * s->window_ratio), 1) * inlink->channels;
     s->window = av_malloc_array(s->window_size, sizeof(*s->window));
     if (!s->window)
         return AVERROR(ENOMEM);
 
-    clear_rms(s);
+    clear_window(s);
 
     s->start_duration = av_rescale(s->start_duration, inlink->sample_rate,
                                    AV_TIME_BASE);
@@ -153,35 +217,6 @@ static int config_input(AVFilterLink *inlink)
     return 0;
 }
 
-static int config_output(AVFilterLink *outlink)
-{
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
-
-    return 0;
-}
-
-static double compute_rms(SilenceRemoveContext *s, double sample)
-{
-    double new_sum;
-
-    new_sum  = s->rms_sum;
-    new_sum -= *s->window_current;
-    new_sum += sample * sample;
-
-    return sqrt(new_sum / s->window_size);
-}
-
-static void update_rms(SilenceRemoveContext *s, double sample)
-{
-    s->rms_sum -= *s->window_current;
-    *s->window_current = sample * sample;
-    s->rms_sum += *s->window_current;
-
-    s->window_current++;
-    if (s->window_current >= s->window_end)
-        s->window_current = s->window;
-}
-
 static void flush(AVFrame *out, AVFilterLink *outlink,
                   int *nb_samples_written, int *ret)
 {
@@ -216,15 +251,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         for (i = 0; i < nbs; i++) {
             threshold = 0;
             for (j = 0; j < inlink->channels; j++) {
-                threshold |= compute_rms(s, ibuf[j]) > s->start_threshold;
+                threshold |= s->compute(s, ibuf[j]) > s->start_threshold;
             }
 
             if (threshold) {
                 for (j = 0; j < inlink->channels; j++) {
-                    update_rms(s, *ibuf);
+                    s->update(s, *ibuf);
                     s->start_holdoff[s->start_holdoff_end++] = *ibuf++;
-                    nb_samples_read++;
                 }
+                nb_samples_read += inlink->channels;
 
                 if (s->start_holdoff_end >= s->start_duration * inlink->channels) {
                     if (++s->start_found_periods >= s->start_periods) {
@@ -239,7 +274,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                 s->start_holdoff_end = 0;
 
                 for (j = 0; j < inlink->channels; j++)
-                    update_rms(s, ibuf[j]);
+                    s->update(s, ibuf[j]);
 
                 ibuf += inlink->channels;
                 nb_samples_read += inlink->channels;
@@ -291,7 +326,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             for (i = 0; i < nbs; i++) {
                 threshold = 1;
                 for (j = 0; j < inlink->channels; j++)
-                    threshold &= compute_rms(s, ibuf[j]) > s->stop_threshold;
+                    threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
 
                 if (threshold && s->stop_holdoff_end && !s->leave_silence) {
                     s->mode = SILENCE_COPY_FLUSH;
@@ -299,22 +334,22 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                     goto silence_copy_flush;
                 } else if (threshold) {
                     for (j = 0; j < inlink->channels; j++) {
-                        update_rms(s, *ibuf);
+                        s->update(s, *ibuf);
                         *obuf++ = *ibuf++;
-                        nb_samples_read++;
-                        nb_samples_written++;
                     }
+                    nb_samples_read    += inlink->channels;
+                    nb_samples_written += inlink->channels;
                 } else if (!threshold) {
                     for (j = 0; j < inlink->channels; j++) {
-                        update_rms(s, *ibuf);
+                        s->update(s, *ibuf);
                         if (s->leave_silence) {
                             *obuf++ = *ibuf;
                             nb_samples_written++;
                         }
 
                         s->stop_holdoff[s->stop_holdoff_end++] = *ibuf++;
-                        nb_samples_read++;
                     }
+                    nb_samples_read += inlink->channels;
 
                     if (s->stop_holdoff_end >= s->stop_duration * inlink->channels) {
                         if (++s->stop_found_periods >= s->stop_periods) {
@@ -330,7 +365,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                                 s->start_found_periods = 0;
                                 s->start_holdoff_offset = 0;
                                 s->start_holdoff_end = 0;
-                                clear_rms(s);
+                                clear_window(s);
                                 s->mode = SILENCE_TRIM;
                                 flush(out, outlink, &nb_samples_written, &ret);
                                 goto silence_trim;
@@ -420,7 +455,7 @@ static int query_formats(AVFilterContext *ctx)
     };
     int ret;
 
-    layouts = ff_all_channel_layouts();
+    layouts = ff_all_channel_counts();
     if (!layouts)
         return AVERROR(ENOMEM);
     ret = ff_set_common_channel_layouts(ctx, layouts);
@@ -463,7 +498,6 @@ static const AVFilterPad silenceremove_outputs[] = {
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_AUDIO,
-        .config_props  = config_output,
         .request_frame = request_frame,
     },
     { NULL }
diff --git a/libavfilter/af_sofalizer.c b/libavfilter/af_sofalizer.c
new file mode 100644
index 00000000..0f1231f8
--- /dev/null
+++ b/libavfilter/af_sofalizer.c
@@ -0,0 +1,1136 @@
+/*****************************************************************************
+ * sofalizer.c : SOFAlizer filter for virtual binaural acoustics
+ *****************************************************************************
+ * Copyright (C) 2013-2015 Andreas Fuchs, Wolfgang Hrauda,
+ *                         Acoustics Research Institute (ARI), Vienna, Austria
+ *
+ * Authors: Andreas Fuchs <andi.fuchs.mail@gmail.com>
+ *          Wolfgang Hrauda <wolfgang.hrauda@gmx.at>
+ *
+ * SOFAlizer project coordinator at ARI, main developer of SOFA:
+ *          Piotr Majdak <piotr@majdak.at>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#include <math.h>
+#include <netcdf.h>
+
+#include "libavcodec/avfft.h"
+#include "libavutil/float_dsp.h"
+#include "libavutil/intmath.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "audio.h"
+
+#define TIME_DOMAIN      0
+#define FREQUENCY_DOMAIN 1
+
+typedef struct NCSofa {  /* contains data of one SOFA file */
+    int ncid;            /* netCDF ID of the opened SOFA file */
+    int n_samples;       /* length of one impulse response (IR) */
+    int m_dim;           /* number of measurement positions */
+    int *data_delay;     /* broadband delay of each IR */
+                         /* all measurement positions for each receiver (i.e. ear): */
+    float *sp_a;         /* azimuth angles */
+    float *sp_e;         /* elevation angles */
+    float *sp_r;         /* radii */
+                         /* data at each measurement position for each receiver: */
+    float *data_ir;      /* IRs (time-domain) */
+} NCSofa;
+
+typedef struct SOFAlizerContext {
+    const AVClass *class;
+
+    char *filename;             /* name of SOFA file */
+    NCSofa sofa;                /* contains data of the SOFA file */
+
+    int sample_rate;            /* sample rate from SOFA file */
+    float *speaker_azim;        /* azimuth of the virtual loudspeakers */
+    float *speaker_elev;        /* elevation of the virtual loudspeakers */
+    float gain_lfe;             /* gain applied to LFE channel */
+    int lfe_channel;            /* LFE channel position in channel layout */
+
+    int n_conv;                 /* number of channels to convolute */
+
+                                /* buffer variables (for convolution) */
+    float *ringbuffer[2];       /* buffers input samples, length of one buffer: */
+                                /* no. input ch. (incl. LFE) x buffer_length */
+    int write[2];               /* current write position to ringbuffer */
+    int buffer_length;          /* is: longest IR plus max. delay in all SOFA files */
+                                /* then choose next power of 2 */
+    int n_fft;                  /* number of samples in one FFT block */
+
+                                /* netCDF variables */
+    int *delay[2];              /* broadband delay for each channel/IR to be convolved */
+
+    float *data_ir[2];          /* IRs for all channels to be convolved */
+                                /* (this excludes the LFE) */
+    float *temp_src[2];
+    FFTComplex *temp_fft[2];
+
+                         /* control variables */
+    float gain;          /* filter gain (in dB) */
+    float rotation;      /* rotation of virtual loudspeakers (in degrees)  */
+    float elevation;     /* elevation of virtual loudspeakers (in deg.) */
+    float radius;        /* distance virtual loudspeakers to listener (in metres) */
+    int type;            /* processing type */
+
+    FFTContext *fft[2], *ifft[2];
+    FFTComplex *data_hrtf[2];
+
+    AVFloatDSPContext *fdsp;
+} SOFAlizerContext;
+
+static int close_sofa(struct NCSofa *sofa)
+{
+    av_freep(&sofa->data_delay);
+    av_freep(&sofa->sp_a);
+    av_freep(&sofa->sp_e);
+    av_freep(&sofa->sp_r);
+    av_freep(&sofa->data_ir);
+    nc_close(sofa->ncid);
+    sofa->ncid = 0;
+
+    return 0;
+}
+
+static int load_sofa(AVFilterContext *ctx, char *filename, int *samplingrate)
+{
+    struct SOFAlizerContext *s = ctx->priv;
+    /* variables associated with content of SOFA file: */
+    int ncid, n_dims, n_vars, n_gatts, n_unlim_dim_id, status;
+    char data_delay_dim_name[NC_MAX_NAME];
+    float *sp_a, *sp_e, *sp_r, *data_ir;
+    char *sofa_conventions;
+    char dim_name[NC_MAX_NAME];   /* names of netCDF dimensions */
+    size_t *dim_length;           /* lengths of netCDF dimensions */
+    char *text;
+    unsigned int sample_rate;
+    int data_delay_dim_id[2];
+    int samplingrate_id;
+    int data_delay_id;
+    int n_samples;
+    int m_dim_id = -1;
+    int n_dim_id = -1;
+    int data_ir_id;
+    size_t att_len;
+    int m_dim;
+    int *data_delay;
+    int sp_id;
+    int i, ret;
+
+    s->sofa.ncid = 0;
+    status = nc_open(filename, NC_NOWRITE, &ncid); /* open SOFA file read-only */
+    if (status != NC_NOERR) {
+        av_log(ctx, AV_LOG_ERROR, "Can't find SOFA-file '%s'\n", filename);
+        return AVERROR(EINVAL);
+    }
+
+    /* get number of dimensions, vars, global attributes and Id of unlimited dimensions: */
+    nc_inq(ncid, &n_dims, &n_vars, &n_gatts, &n_unlim_dim_id);
+
+    /* -- get number of measurements ("M") and length of one IR ("N") -- */
+    dim_length = av_malloc_array(n_dims, sizeof(*dim_length));
+    if (!dim_length) {
+        nc_close(ncid);
+        return AVERROR(ENOMEM);
+    }
+
+    for (i = 0; i < n_dims; i++) { /* go through all dimensions of file */
+        nc_inq_dim(ncid, i, (char *)&dim_name, &dim_length[i]); /* get dimensions */
+        if (!strncmp("M", (const char *)&dim_name, 1)) /* get ID of dimension "M" */
+            m_dim_id = i;
+        if (!strncmp("N", (const char *)&dim_name, 1)) /* get ID of dimension "N" */
+            n_dim_id = i;
+    }
+
+    if ((m_dim_id == -1) || (n_dim_id == -1)) { /* dimension "M" or "N" couldn't be found */
+        av_log(ctx, AV_LOG_ERROR, "Can't find required dimensions in SOFA file.\n");
+        av_freep(&dim_length);
+        nc_close(ncid);
+        return AVERROR(EINVAL);
+    }
+
+    n_samples = dim_length[n_dim_id]; /* get length of one IR */
+    m_dim     = dim_length[m_dim_id]; /* get number of measurements */
+
+    av_freep(&dim_length);
+
+    /* -- check file type -- */
+    /* get length of attritube "Conventions" */
+    status = nc_inq_attlen(ncid, NC_GLOBAL, "Conventions", &att_len);
+    if (status != NC_NOERR) {
+        av_log(ctx, AV_LOG_ERROR, "Can't get length of attribute \"Conventions\".\n");
+        nc_close(ncid);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* check whether file is SOFA file */
+    text = av_malloc(att_len + 1);
+    if (!text) {
+        nc_close(ncid);
+        return AVERROR(ENOMEM);
+    }
+
+    nc_get_att_text(ncid, NC_GLOBAL, "Conventions", text);
+    *(text + att_len) = 0;
+    if (strncmp("SOFA", text, 4)) {
+        av_log(ctx, AV_LOG_ERROR, "Not a SOFA file!\n");
+        av_freep(&text);
+        nc_close(ncid);
+        return AVERROR(EINVAL);
+    }
+    av_freep(&text);
+
+    status = nc_inq_attlen(ncid, NC_GLOBAL, "License", &att_len);
+    if (status == NC_NOERR) {
+        text = av_malloc(att_len + 1);
+        if (text) {
+            nc_get_att_text(ncid, NC_GLOBAL, "License", text);
+            *(text + att_len) = 0;
+            av_log(ctx, AV_LOG_INFO, "SOFA file License: %s\n", text);
+            av_freep(&text);
+        }
+    }
+
+    status = nc_inq_attlen(ncid, NC_GLOBAL, "SourceDescription", &att_len);
+    if (status == NC_NOERR) {
+        text = av_malloc(att_len + 1);
+        if (text) {
+            nc_get_att_text(ncid, NC_GLOBAL, "SourceDescription", text);
+            *(text + att_len) = 0;
+            av_log(ctx, AV_LOG_INFO, "SOFA file SourceDescription: %s\n", text);
+            av_freep(&text);
+        }
+    }
+
+    status = nc_inq_attlen(ncid, NC_GLOBAL, "Comment", &att_len);
+    if (status == NC_NOERR) {
+        text = av_malloc(att_len + 1);
+        if (text) {
+            nc_get_att_text(ncid, NC_GLOBAL, "Comment", text);
+            *(text + att_len) = 0;
+            av_log(ctx, AV_LOG_INFO, "SOFA file Comment: %s\n", text);
+            av_freep(&text);
+        }
+    }
+
+    status = nc_inq_attlen(ncid, NC_GLOBAL, "SOFAConventions", &att_len);
+    if (status != NC_NOERR) {
+        av_log(ctx, AV_LOG_ERROR, "Can't get length of attribute \"SOFAConventions\".\n");
+        nc_close(ncid);
+        return AVERROR_INVALIDDATA;
+    }
+
+    sofa_conventions = av_malloc(att_len + 1);
+    if (!sofa_conventions) {
+        nc_close(ncid);
+        return AVERROR(ENOMEM);
+    }
+
+    nc_get_att_text(ncid, NC_GLOBAL, "SOFAConventions", sofa_conventions);
+    *(sofa_conventions + att_len) = 0;
+    if (strncmp("SimpleFreeFieldHRIR", sofa_conventions, att_len)) {
+        av_log(ctx, AV_LOG_ERROR, "Not a SimpleFreeFieldHRIR file!\n");
+        av_freep(&sofa_conventions);
+        nc_close(ncid);
+        return AVERROR(EINVAL);
+    }
+    av_freep(&sofa_conventions);
+
+    /* -- get sampling rate of HRTFs -- */
+    /* read ID, then value */
+    status  = nc_inq_varid(ncid, "Data.SamplingRate", &samplingrate_id);
+    status += nc_get_var_uint(ncid, samplingrate_id, &sample_rate);
+    if (status != NC_NOERR) {
+        av_log(ctx, AV_LOG_ERROR, "Couldn't read Data.SamplingRate.\n");
+        nc_close(ncid);
+        return AVERROR(EINVAL);
+    }
+    *samplingrate = sample_rate; /* remember sampling rate */
+
+    /* -- allocate memory for one value for each measurement position: -- */
+    sp_a = s->sofa.sp_a = av_malloc_array(m_dim, sizeof(float));
+    sp_e = s->sofa.sp_e = av_malloc_array(m_dim, sizeof(float));
+    sp_r = s->sofa.sp_r = av_malloc_array(m_dim, sizeof(float));
+    /* delay and IR values required for each ear and measurement position: */
+    data_delay = s->sofa.data_delay = av_calloc(m_dim, 2 * sizeof(int));
+    data_ir = s->sofa.data_ir = av_malloc_array(m_dim * n_samples, sizeof(float) * 2);
+
+    if (!data_delay || !sp_a || !sp_e || !sp_r || !data_ir) {
+        /* if memory could not be allocated */
+        close_sofa(&s->sofa);
+        return AVERROR(ENOMEM);
+    }
+
+    /* get impulse responses (HRTFs): */
+    /* get corresponding ID */
+    status = nc_inq_varid(ncid, "Data.IR", &data_ir_id);
+    status += nc_get_var_float(ncid, data_ir_id, data_ir); /* read and store IRs */
+    if (status != NC_NOERR) {
+        av_log(ctx, AV_LOG_ERROR, "Couldn't read Data.IR!\n");
+        ret = AVERROR(EINVAL);
+        goto error;
+    }
+
+    /* get source positions of the HRTFs in the SOFA file: */
+    status  = nc_inq_varid(ncid, "SourcePosition", &sp_id); /* get corresponding ID */
+    status += nc_get_vara_float(ncid, sp_id, (size_t[2]){ 0, 0 } ,
+                (size_t[2]){ m_dim, 1}, sp_a); /* read & store azimuth angles */
+    status += nc_get_vara_float(ncid, sp_id, (size_t[2]){ 0, 1 } ,
+                (size_t[2]){ m_dim, 1}, sp_e); /* read & store elevation angles */
+    status += nc_get_vara_float(ncid, sp_id, (size_t[2]){ 0, 2 } ,
+                (size_t[2]){ m_dim, 1}, sp_r); /* read & store radii */
+    if (status != NC_NOERR) { /* if any source position variable coudn't be read */
+        av_log(ctx, AV_LOG_ERROR, "Couldn't read SourcePosition.\n");
+        ret = AVERROR(EINVAL);
+        goto error;
+    }
+
+    /* read Data.Delay, check for errors and fit it to data_delay */
+    status  = nc_inq_varid(ncid, "Data.Delay", &data_delay_id);
+    status += nc_inq_vardimid(ncid, data_delay_id, &data_delay_dim_id[0]);
+    status += nc_inq_dimname(ncid, data_delay_dim_id[0], data_delay_dim_name);
+    if (status != NC_NOERR) {
+        av_log(ctx, AV_LOG_ERROR, "Couldn't read Data.Delay.\n");
+        ret = AVERROR(EINVAL);
+        goto error;
+    }
+
+    /* Data.Delay dimension check */
+    /* dimension of Data.Delay is [I R]: */
+    if (!strncmp(data_delay_dim_name, "I", 2)) {
+        /* check 2 characters to assure string is 0-terminated after "I" */
+        int delay[2]; /* delays get from SOFA file: */
+
+        av_log(ctx, AV_LOG_DEBUG, "Data.Delay has dimension [I R]\n");
+        status = nc_get_var_int(ncid, data_delay_id, &delay[0]);
+        if (status != NC_NOERR) {
+            av_log(ctx, AV_LOG_ERROR, "Couldn't read Data.Delay\n");
+            ret = AVERROR(EINVAL);
+            goto error;
+        }
+        int *data_delay_r = data_delay + m_dim;
+        for (i = 0; i < m_dim; i++) { /* extend given dimension [I R] to [M R] */
+            /* assign constant delay value for all measurements to data_delay fields */
+            data_delay[i]   = delay[0];
+            data_delay_r[i] = delay[1];
+        }
+        /* dimension of Data.Delay is [M R] */
+    } else if (!strncmp(data_delay_dim_name, "M", 2)) {
+        av_log(ctx, AV_LOG_ERROR, "Data.Delay in dimension [M R]\n");
+        /* get delays from SOFA file: */
+        status = nc_get_var_int(ncid, data_delay_id, data_delay);
+        if (status != NC_NOERR) {
+            av_log(ctx, AV_LOG_ERROR, "Couldn't read Data.Delay\n");
+            ret = AVERROR(EINVAL);
+            goto error;
+        }
+    } else { /* dimension of Data.Delay is neither [I R] nor [M R] */
+        av_log(ctx, AV_LOG_ERROR, "Data.Delay does not have the required dimensions [I R] or [M R].\n");
+        ret = AVERROR(EINVAL);
+        goto error;
+    }
+
+    /* save information in SOFA struct: */
+    s->sofa.m_dim = m_dim; /* no. measurement positions */
+    s->sofa.n_samples = n_samples; /* length on one IR */
+    s->sofa.ncid = ncid; /* netCDF ID of SOFA file */
+    nc_close(ncid); /* close SOFA file */
+
+    return 0;
+
+error:
+    close_sofa(&s->sofa);
+    return ret;
+}
+
+static int get_speaker_pos(AVFilterContext *ctx,
+                           float *speaker_azim, float *speaker_elev)
+{
+    struct SOFAlizerContext *s = ctx->priv;
+    uint64_t channels_layout = ctx->inputs[0]->channel_layout;
+    float azim[16] = { 0 };
+    float elev[16] = { 0 };
+    int m, ch, n_conv = ctx->inputs[0]->channels; /* get no. input channels */
+
+    if (n_conv > 16)
+        return AVERROR(EINVAL);
+
+    s->lfe_channel = -1;
+
+    /* set speaker positions according to input channel configuration: */
+    for (m = 0, ch = 0; ch < n_conv && m < 64; m++) {
+        uint64_t mask = channels_layout & (1 << m);
+
+        switch (mask) {
+        case AV_CH_FRONT_LEFT:            azim[ch] =  30;      break;
+        case AV_CH_FRONT_RIGHT:           azim[ch] = 330;      break;
+        case AV_CH_FRONT_CENTER:          azim[ch] =   0;      break;
+        case AV_CH_LOW_FREQUENCY:
+        case AV_CH_LOW_FREQUENCY_2:       s->lfe_channel = ch; break;
+        case AV_CH_BACK_LEFT:             azim[ch] = 150;      break;
+        case AV_CH_BACK_RIGHT:            azim[ch] = 210;      break;
+        case AV_CH_BACK_CENTER:           azim[ch] = 180;      break;
+        case AV_CH_SIDE_LEFT:             azim[ch] =  90;      break;
+        case AV_CH_SIDE_RIGHT:            azim[ch] = 270;      break;
+        case AV_CH_FRONT_LEFT_OF_CENTER:  azim[ch] =  15;      break;
+        case AV_CH_FRONT_RIGHT_OF_CENTER: azim[ch] = 345;      break;
+        case AV_CH_TOP_CENTER:            azim[ch] =   0;
+                                          elev[ch] =  90;      break;
+        case AV_CH_TOP_FRONT_LEFT:        azim[ch] =  30;
+                                          elev[ch] =  45;      break;
+        case AV_CH_TOP_FRONT_CENTER:      azim[ch] =   0;
+                                          elev[ch] =  45;      break;
+        case AV_CH_TOP_FRONT_RIGHT:       azim[ch] = 330;
+                                          elev[ch] =  45;      break;
+        case AV_CH_TOP_BACK_LEFT:         azim[ch] = 150;
+                                          elev[ch] =  45;      break;
+        case AV_CH_TOP_BACK_RIGHT:        azim[ch] = 210;
+                                          elev[ch] =  45;      break;
+        case AV_CH_TOP_BACK_CENTER:       azim[ch] = 180;
+                                          elev[ch] =  45;      break;
+        case AV_CH_WIDE_LEFT:             azim[ch] =  90;      break;
+        case AV_CH_WIDE_RIGHT:            azim[ch] = 270;      break;
+        case AV_CH_SURROUND_DIRECT_LEFT:  azim[ch] =  90;      break;
+        case AV_CH_SURROUND_DIRECT_RIGHT: azim[ch] = 270;      break;
+        case AV_CH_STEREO_LEFT:           azim[ch] =  90;      break;
+        case AV_CH_STEREO_RIGHT:          azim[ch] = 270;      break;
+        case 0:                                                break;
+        default:
+            return AVERROR(EINVAL);
+        }
+        if (mask)
+            ch++;
+    }
+
+    memcpy(speaker_azim, azim, n_conv * sizeof(float));
+    memcpy(speaker_elev, elev, n_conv * sizeof(float));
+
+    return 0;
+
+}
+
+static int max_delay(struct NCSofa *sofa)
+{
+    int i, max = 0;
+
+    for (i = 0; i < sofa->m_dim * 2; i++) {
+        /* search maximum delay in given SOFA file */
+        max = FFMAX(max, sofa->data_delay[i]);
+    }
+
+    return max;
+}
+
+static int find_m(SOFAlizerContext *s, int azim, int elev, float radius)
+{
+    /* get source positions and M of currently selected SOFA file */
+    float *sp_a = s->sofa.sp_a; /* azimuth angle */
+    float *sp_e = s->sofa.sp_e; /* elevation angle */
+    float *sp_r = s->sofa.sp_r; /* radius */
+    int m_dim = s->sofa.m_dim; /* no. measurements */
+    int best_id = 0; /* index m currently closest to desired source pos. */
+    float delta = 1000; /* offset between desired and currently best pos. */
+    float current;
+    int i;
+
+    for (i = 0; i < m_dim; i++) {
+        /* search through all measurements in currently selected SOFA file */
+        /* distance of current to desired source position: */
+        current = fabs(sp_a[i] - azim) +
+                  fabs(sp_e[i] - elev) +
+                  fabs(sp_r[i] - radius);
+        if (current <= delta) {
+            /* if current distance is smaller than smallest distance so far */
+            delta = current;
+            best_id = i; /* remember index */
+        }
+    }
+
+    return best_id;
+}
+
+static int compensate_volume(AVFilterContext *ctx)
+{
+    struct SOFAlizerContext *s = ctx->priv;
+    float compensate;
+    float energy = 0;
+    float *ir;
+    int m;
+
+    if (s->sofa.ncid) {
+        /* find IR at front center position in the SOFA file (IR closest to 0°,0°,1m) */
+        struct NCSofa *sofa = &s->sofa;
+        m = find_m(s, 0, 0, 1);
+        /* get energy of that IR and compensate volume */
+        ir = sofa->data_ir + 2 * m * sofa->n_samples;
+        if (sofa->n_samples & 31) {
+            energy = avpriv_scalarproduct_float_c(ir, ir, sofa->n_samples);
+        } else {
+            energy = s->fdsp->scalarproduct_float(ir, ir, sofa->n_samples);
+        }
+        compensate = 256 / (sofa->n_samples * sqrt(energy));
+        av_log(ctx, AV_LOG_DEBUG, "Compensate-factor: %f\n", compensate);
+        ir = sofa->data_ir;
+        /* apply volume compensation to IRs */
+        s->fdsp->vector_fmul_scalar(ir, ir, compensate, sofa->n_samples * sofa->m_dim * 2);
+        emms_c();
+    }
+
+    return 0;
+}
+
+typedef struct ThreadData {
+    AVFrame *in, *out;
+    int *write;
+    int **delay;
+    float **ir;
+    int *n_clippings;
+    float **ringbuffer;
+    float **temp_src;
+    FFTComplex **temp_fft;
+} ThreadData;
+
+static int sofalizer_convolute(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    SOFAlizerContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in, *out = td->out;
+    int offset = jobnr;
+    int *write = &td->write[jobnr];
+    const int *const delay = td->delay[jobnr];
+    const float *const ir = td->ir[jobnr];
+    int *n_clippings = &td->n_clippings[jobnr];
+    float *ringbuffer = td->ringbuffer[jobnr];
+    float *temp_src = td->temp_src[jobnr];
+    const int n_samples = s->sofa.n_samples; /* length of one IR */
+    const float *src = (const float *)in->data[0]; /* get pointer to audio input buffer */
+    float *dst = (float *)out->data[0]; /* get pointer to audio output buffer */
+    const int in_channels = s->n_conv; /* number of input channels */
+    /* ring buffer length is: longest IR plus max. delay -> next power of 2 */
+    const int buffer_length = s->buffer_length;
+    /* -1 for AND instead of MODULO (applied to powers of 2): */
+    const uint32_t modulo = (uint32_t)buffer_length - 1;
+    float *buffer[16]; /* holds ringbuffer for each input channel */
+    int wr = *write;
+    int read;
+    int i, l;
+
+    dst += offset;
+    for (l = 0; l < in_channels; l++) {
+        /* get starting address of ringbuffer for each input channel */
+        buffer[l] = ringbuffer + l * buffer_length;
+    }
+
+    for (i = 0; i < in->nb_samples; i++) {
+        const float *temp_ir = ir; /* using same set of IRs for each sample */
+
+        *dst = 0;
+        for (l = 0; l < in_channels; l++) {
+            /* write current input sample to ringbuffer (for each channel) */
+            *(buffer[l] + wr) = src[l];
+        }
+
+        /* loop goes through all channels to be convolved */
+        for (l = 0; l < in_channels; l++) {
+            const float *const bptr = buffer[l];
+
+            if (l == s->lfe_channel) {
+                /* LFE is an input channel but requires no convolution */
+                /* apply gain to LFE signal and add to output buffer */
+                *dst += *(buffer[s->lfe_channel] + wr) * s->gain_lfe;
+                temp_ir += n_samples;
+                continue;
+            }
+
+            /* current read position in ringbuffer: input sample write position
+             * - delay for l-th ch. + diff. betw. IR length and buffer length
+             * (mod buffer length) */
+            read = (wr - *(delay + l) - (n_samples - 1) + buffer_length) & modulo;
+
+            if (read + n_samples < buffer_length) {
+                memcpy(temp_src, bptr + read, n_samples * sizeof(*temp_src));
+            } else {
+                int len = FFMIN(n_samples - (read % n_samples), buffer_length - read);
+
+                memcpy(temp_src, bptr + read, len * sizeof(*temp_src));
+                memcpy(temp_src + len, bptr, (n_samples - len) * sizeof(*temp_src));
+            }
+
+            /* multiply signal and IR, and add up the results */
+            dst[0] += s->fdsp->scalarproduct_float(temp_ir, temp_src, n_samples);
+            temp_ir += n_samples;
+        }
+
+        /* clippings counter */
+        if (fabs(*dst) > 1)
+            *n_clippings += 1;
+
+        /* move output buffer pointer by +2 to get to next sample of processed channel: */
+        dst += 2;
+        src += in_channels;
+        wr   = (wr + 1) & modulo; /* update ringbuffer write position */
+    }
+
+    *write = wr; /* remember write position in ringbuffer for next call */
+
+    return 0;
+}
+
+static int sofalizer_fast_convolute(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    SOFAlizerContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in, *out = td->out;
+    int offset = jobnr;
+    int *write = &td->write[jobnr];
+    FFTComplex *hrtf = s->data_hrtf[jobnr]; /* get pointers to current HRTF data */
+    int *n_clippings = &td->n_clippings[jobnr];
+    float *ringbuffer = td->ringbuffer[jobnr];
+    const int n_samples = s->sofa.n_samples; /* length of one IR */
+    const float *src = (const float *)in->data[0]; /* get pointer to audio input buffer */
+    float *dst = (float *)out->data[0]; /* get pointer to audio output buffer */
+    const int in_channels = s->n_conv; /* number of input channels */
+    /* ring buffer length is: longest IR plus max. delay -> next power of 2 */
+    const int buffer_length = s->buffer_length;
+    /* -1 for AND instead of MODULO (applied to powers of 2): */
+    const uint32_t modulo = (uint32_t)buffer_length - 1;
+    FFTComplex *fft_in = s->temp_fft[jobnr]; /* temporary array for FFT input/output data */
+    FFTContext *ifft = s->ifft[jobnr];
+    FFTContext *fft = s->fft[jobnr];
+    const int n_conv = s->n_conv;
+    const int n_fft = s->n_fft;
+    int wr = *write;
+    int n_read;
+    int i, j;
+
+    dst += offset;
+
+    /* find minimum between number of samples and output buffer length:
+     * (important, if one IR is longer than the output buffer) */
+    n_read = FFMIN(s->sofa.n_samples, in->nb_samples);
+    for (j = 0; j < n_read; j++) {
+        /* initialize output buf with saved signal from overflow buf */
+        dst[2 * j]     = ringbuffer[wr];
+        ringbuffer[wr] = 0.0; /* re-set read samples to zero */
+        /* update ringbuffer read/write position */
+        wr  = (wr + 1) & modulo;
+    }
+
+    /* initialize rest of output buffer with 0 */
+    for (j = n_read; j < in->nb_samples; j++) {
+        dst[2 * j] = 0;
+    }
+
+    for (i = 0; i < n_conv; i++) {
+        if (i == s->lfe_channel) { /* LFE */
+            for (j = 0; j < in->nb_samples; j++) {
+                /* apply gain to LFE signal and add to output buffer */
+                dst[2 * j] += src[i + j * in_channels] * s->gain_lfe;
+            }
+            continue;
+        }
+
+        /* outer loop: go through all input channels to be convolved */
+        offset = i * n_fft; /* no. samples already processed */
+
+        /* fill FFT input with 0 (we want to zero-pad) */
+        memset(fft_in, 0, sizeof(FFTComplex) * n_fft);
+
+        for (j = 0; j < in->nb_samples; j++) {
+            /* prepare input for FFT */
+            /* write all samples of current input channel to FFT input array */
+            fft_in[j].re = src[j * in_channels + i];
+        }
+
+        /* transform input signal of current channel to frequency domain */
+        av_fft_permute(fft, fft_in);
+        av_fft_calc(fft, fft_in);
+        for (j = 0; j < n_fft; j++) {
+            const float re = fft_in[j].re;
+            const float im = fft_in[j].im;
+
+            /* complex multiplication of input signal and HRTFs */
+            /* output channel (real): */
+            fft_in[j].re = re * (hrtf + offset + j)->re - im * (hrtf + offset + j)->im;
+            /* output channel (imag): */
+            fft_in[j].im = re * (hrtf + offset + j)->im + im * (hrtf + offset + j)->re;
+        }
+
+        /* transform output signal of current channel back to time domain */
+        av_fft_permute(ifft, fft_in);
+        av_fft_calc(ifft, fft_in);
+
+        for (j = 0; j < in->nb_samples; j++) {
+            /* write output signal of current channel to output buffer */
+            dst[2 * j] += fft_in[j].re / (float)n_fft;
+        }
+
+        for (j = 0; j < n_samples - 1; j++) { /* overflow length is IR length - 1 */
+            /* write the rest of output signal to overflow buffer */
+            int write_pos = (wr + j) & modulo;
+
+            *(ringbuffer + write_pos) += fft_in[in->nb_samples + j].re / (float)n_fft;
+        }
+    }
+
+    /* go through all samples of current output buffer: count clippings */
+    for (i = 0; i < out->nb_samples; i++) {
+        /* clippings counter */
+        if (fabs(*dst) > 1) { /* if current output sample > 1 */
+            *n_clippings = *n_clippings + 1;
+        }
+
+        /* move output buffer pointer by +2 to get to next sample of processed channel: */
+        dst += 2;
+    }
+
+    /* remember read/write position in ringbuffer for next call */
+    *write = wr;
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SOFAlizerContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    int n_clippings[2] = { 0 };
+    ThreadData td;
+    AVFrame *out;
+
+    out = ff_get_audio_buffer(outlink, in->nb_samples);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+
+    td.in = in; td.out = out; td.write = s->write;
+    td.delay = s->delay; td.ir = s->data_ir; td.n_clippings = n_clippings;
+    td.ringbuffer = s->ringbuffer; td.temp_src = s->temp_src;
+    td.temp_fft = s->temp_fft;
+
+    if (s->type == TIME_DOMAIN) {
+        ctx->internal->execute(ctx, sofalizer_convolute, &td, NULL, 2);
+    } else {
+        ctx->internal->execute(ctx, sofalizer_fast_convolute, &td, NULL, 2);
+    }
+    emms_c();
+
+    /* display error message if clipping occurred */
+    if (n_clippings[0] + n_clippings[1] > 0) {
+        av_log(ctx, AV_LOG_WARNING, "%d of %d samples clipped. Please reduce gain.\n",
+               n_clippings[0] + n_clippings[1], out->nb_samples * 2);
+    }
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    struct SOFAlizerContext *s = ctx->priv;
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    int ret, sample_rates[] = { 48000, -1 };
+
+    ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLT);
+    if (ret)
+        return ret;
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret)
+        return ret;
+
+    layouts = ff_all_channel_layouts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+
+    ret = ff_channel_layouts_ref(layouts, &ctx->inputs[0]->out_channel_layouts);
+    if (ret)
+        return ret;
+
+    layouts = NULL;
+    ret = ff_add_channel_layout(&layouts, AV_CH_LAYOUT_STEREO);
+    if (ret)
+        return ret;
+
+    ret = ff_channel_layouts_ref(layouts, &ctx->outputs[0]->in_channel_layouts);
+    if (ret)
+        return ret;
+
+    sample_rates[0] = s->sample_rate;
+    formats = ff_make_format_list(sample_rates);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int load_data(AVFilterContext *ctx, int azim, int elev, float radius)
+{
+    struct SOFAlizerContext *s = ctx->priv;
+    const int n_samples = s->sofa.n_samples;
+    int n_conv = s->n_conv; /* no. channels to convolve */
+    int n_fft = s->n_fft;
+    int delay_l[16]; /* broadband delay for each IR */
+    int delay_r[16];
+    int nb_input_channels = ctx->inputs[0]->channels; /* no. input channels */
+    float gain_lin = expf((s->gain - 3 * nb_input_channels) / 20 * M_LN10); /* gain - 3dB/channel */
+    FFTComplex *data_hrtf_l = NULL;
+    FFTComplex *data_hrtf_r = NULL;
+    FFTComplex *fft_in_l = NULL;
+    FFTComplex *fft_in_r = NULL;
+    float *data_ir_l = NULL;
+    float *data_ir_r = NULL;
+    int offset = 0; /* used for faster pointer arithmetics in for-loop */
+    int m[16]; /* measurement index m of IR closest to required source positions */
+    int i, j, azim_orig = azim, elev_orig = elev;
+
+    if (!s->sofa.ncid) { /* if an invalid SOFA file has been selected */
+        av_log(ctx, AV_LOG_ERROR, "Selected SOFA file is invalid. Please select valid SOFA file.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (s->type == TIME_DOMAIN) {
+        s->temp_src[0] = av_calloc(FFALIGN(n_samples, 16), sizeof(float));
+        s->temp_src[1] = av_calloc(FFALIGN(n_samples, 16), sizeof(float));
+
+        /* get temporary IR for L and R channel */
+        data_ir_l = av_malloc_array(n_conv * n_samples, sizeof(*data_ir_l));
+        data_ir_r = av_malloc_array(n_conv * n_samples, sizeof(*data_ir_r));
+        if (!data_ir_r || !data_ir_l || !s->temp_src[0] || !s->temp_src[1]) {
+            av_free(data_ir_l);
+            av_free(data_ir_r);
+            return AVERROR(ENOMEM);
+        }
+    } else {
+        /* get temporary HRTF memory for L and R channel */
+        data_hrtf_l = av_malloc_array(n_fft, sizeof(*data_hrtf_l) * n_conv);
+        data_hrtf_r = av_malloc_array(n_fft, sizeof(*data_hrtf_r) * n_conv);
+        if (!data_hrtf_r || !data_hrtf_l) {
+            av_free(data_hrtf_l);
+            av_free(data_hrtf_r);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    for (i = 0; i < s->n_conv; i++) {
+        /* load and store IRs and corresponding delays */
+        azim = (int)(s->speaker_azim[i] + azim_orig) % 360;
+        elev = (int)(s->speaker_elev[i] + elev_orig) % 90;
+        /* get id of IR closest to desired position */
+        m[i] = find_m(s, azim, elev, radius);
+
+        /* load the delays associated with the current IRs */
+        delay_l[i] = *(s->sofa.data_delay + 2 * m[i]);
+        delay_r[i] = *(s->sofa.data_delay + 2 * m[i] + 1);
+
+        if (s->type == TIME_DOMAIN) {
+            offset = i * n_samples; /* no. samples already written */
+            for (j = 0; j < n_samples; j++) {
+                /* load reversed IRs of the specified source position
+                 * sample-by-sample for left and right ear; and apply gain */
+                *(data_ir_l + offset + j) = /* left channel */
+                *(s->sofa.data_ir + 2 * m[i] * n_samples + n_samples - 1 - j) * gain_lin;
+                *(data_ir_r + offset + j) = /* right channel */
+                *(s->sofa.data_ir + 2 * m[i] * n_samples + n_samples - 1 - j  + n_samples) * gain_lin;
+            }
+        } else {
+            fft_in_l = av_calloc(n_fft, sizeof(*fft_in_l));
+            fft_in_r = av_calloc(n_fft, sizeof(*fft_in_r));
+            if (!fft_in_l || !fft_in_r) {
+                av_free(data_hrtf_l);
+                av_free(data_hrtf_r);
+                av_free(fft_in_l);
+                av_free(fft_in_r);
+                return AVERROR(ENOMEM);
+            }
+
+            offset = i * n_fft; /* no. samples already written */
+            for (j = 0; j < n_samples; j++) {
+                /* load non-reversed IRs of the specified source position
+                 * sample-by-sample and apply gain,
+                 * L channel is loaded to real part, R channel to imag part,
+                 * IRs ared shifted by L and R delay */
+                fft_in_l[delay_l[i] + j].re = /* left channel */
+                *(s->sofa.data_ir + 2 * m[i] * n_samples + j) * gain_lin;
+                fft_in_r[delay_r[i] + j].re = /* right channel */
+                *(s->sofa.data_ir + (2 * m[i] + 1) * n_samples + j) * gain_lin;
+            }
+
+            /* actually transform to frequency domain (IRs -> HRTFs) */
+            av_fft_permute(s->fft[0], fft_in_l);
+            av_fft_calc(s->fft[0], fft_in_l);
+            memcpy(data_hrtf_l + offset, fft_in_l, n_fft * sizeof(*fft_in_l));
+            av_fft_permute(s->fft[0], fft_in_r);
+            av_fft_calc(s->fft[0], fft_in_r);
+            memcpy(data_hrtf_r + offset, fft_in_r, n_fft * sizeof(*fft_in_r));
+        }
+
+        av_log(ctx, AV_LOG_DEBUG, "Index: %d, Azimuth: %f, Elevation: %f, Radius: %f of SOFA file.\n",
+               m[i], *(s->sofa.sp_a + m[i]), *(s->sofa.sp_e + m[i]), *(s->sofa.sp_r + m[i]));
+    }
+
+    if (s->type == TIME_DOMAIN) {
+        /* copy IRs and delays to allocated memory in the SOFAlizerContext struct: */
+        memcpy(s->data_ir[0], data_ir_l, sizeof(float) * n_conv * n_samples);
+        memcpy(s->data_ir[1], data_ir_r, sizeof(float) * n_conv * n_samples);
+
+        av_freep(&data_ir_l); /* free temporary IR memory */
+        av_freep(&data_ir_r);
+    } else {
+        s->data_hrtf[0] = av_malloc_array(n_fft * s->n_conv, sizeof(FFTComplex));
+        s->data_hrtf[1] = av_malloc_array(n_fft * s->n_conv, sizeof(FFTComplex));
+        if (!s->data_hrtf[0] || !s->data_hrtf[1]) {
+            av_freep(&data_hrtf_l);
+            av_freep(&data_hrtf_r);
+            av_freep(&fft_in_l);
+            av_freep(&fft_in_r);
+            return AVERROR(ENOMEM); /* memory allocation failed */
+        }
+
+        memcpy(s->data_hrtf[0], data_hrtf_l, /* copy HRTF data to */
+            sizeof(FFTComplex) * n_conv * n_fft); /* filter struct */
+        memcpy(s->data_hrtf[1], data_hrtf_r,
+            sizeof(FFTComplex) * n_conv * n_fft);
+
+        av_freep(&data_hrtf_l); /* free temporary HRTF memory */
+        av_freep(&data_hrtf_r);
+
+        av_freep(&fft_in_l); /* free temporary FFT memory */
+        av_freep(&fft_in_r);
+    }
+
+    memcpy(s->delay[0], &delay_l[0], sizeof(int) * s->n_conv);
+    memcpy(s->delay[1], &delay_r[0], sizeof(int) * s->n_conv);
+
+    return 0;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    SOFAlizerContext *s = ctx->priv;
+    int ret;
+
+    /* load SOFA file, */
+    /* initialize file IDs to 0 before attempting to load SOFA files,
+     * this assures that in case of error, only the memory of already
+     * loaded files is free'd */
+    s->sofa.ncid = 0;
+    ret = load_sofa(ctx, s->filename, &s->sample_rate);
+    if (ret) {
+        /* file loading error */
+        av_log(ctx, AV_LOG_ERROR, "Error while loading SOFA file: '%s'\n", s->filename);
+    } else { /* no file loading error, resampling not required */
+        av_log(ctx, AV_LOG_DEBUG, "File '%s' loaded.\n", s->filename);
+    }
+
+    if (ret) {
+        av_log(ctx, AV_LOG_ERROR, "No valid SOFA file could be loaded. Please specify valid SOFA file.\n");
+        return ret;
+    }
+
+    s->fdsp = avpriv_float_dsp_alloc(0);
+    if (!s->fdsp)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SOFAlizerContext *s = ctx->priv;
+    int nb_input_channels = inlink->channels; /* no. input channels */
+    int n_max_ir = 0;
+    int n_current;
+    int n_max = 0;
+    int ret;
+
+    if (s->type == FREQUENCY_DOMAIN) {
+        inlink->partial_buf_size =
+        inlink->min_samples =
+        inlink->max_samples = inlink->sample_rate;
+    }
+
+    /* gain -3 dB per channel, -6 dB to get LFE on a similar level */
+    s->gain_lfe = expf((s->gain - 3 * inlink->channels - 6) / 20 * M_LN10);
+
+    s->n_conv = nb_input_channels;
+
+    /* get size of ringbuffer (longest IR plus max. delay) */
+    /* then choose next power of 2 for performance optimization */
+    n_current = s->sofa.n_samples + max_delay(&s->sofa);
+    if (n_current > n_max) {
+        /* length of longest IR plus max. delay (in all SOFA files) */
+        n_max = n_current;
+        /* length of longest IR (without delay, in all SOFA files) */
+        n_max_ir = s->sofa.n_samples;
+    }
+    /* buffer length is longest IR plus max. delay -> next power of 2
+       (32 - count leading zeros gives required exponent)  */
+    s->buffer_length = 1 << (32 - ff_clz(n_max));
+    s->n_fft         = 1 << (32 - ff_clz(n_max + inlink->sample_rate));
+
+    if (s->type == FREQUENCY_DOMAIN) {
+        av_fft_end(s->fft[0]);
+        av_fft_end(s->fft[1]);
+        s->fft[0] = av_fft_init(log2(s->n_fft), 0);
+        s->fft[1] = av_fft_init(log2(s->n_fft), 0);
+        av_fft_end(s->ifft[0]);
+        av_fft_end(s->ifft[1]);
+        s->ifft[0] = av_fft_init(log2(s->n_fft), 1);
+        s->ifft[1] = av_fft_init(log2(s->n_fft), 1);
+
+        if (!s->fft[0] || !s->fft[1] || !s->ifft[0] || !s->ifft[1]) {
+            av_log(ctx, AV_LOG_ERROR, "Unable to create FFT contexts.\n");
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    /* Allocate memory for the impulse responses, delays and the ringbuffers */
+    /* size: (longest IR) * (number of channels to convolute) */
+    s->data_ir[0] = av_malloc_array(n_max_ir, sizeof(float) * s->n_conv);
+    s->data_ir[1] = av_malloc_array(n_max_ir, sizeof(float) * s->n_conv);
+    /* length:  number of channels to convolute */
+    s->delay[0] = av_malloc_array(s->n_conv, sizeof(float));
+    s->delay[1] = av_malloc_array(s->n_conv, sizeof(float));
+    /* length: (buffer length) * (number of input channels),
+     * OR: buffer length (if frequency domain processing)
+     * calloc zero-initializes the buffer */
+
+    if (s->type == TIME_DOMAIN) {
+        s->ringbuffer[0] = av_calloc(s->buffer_length, sizeof(float) * nb_input_channels);
+        s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float) * nb_input_channels);
+    } else {
+        s->ringbuffer[0] = av_calloc(s->buffer_length, sizeof(float));
+        s->ringbuffer[1] = av_calloc(s->buffer_length, sizeof(float));
+        s->temp_fft[0] = av_malloc_array(s->n_fft, sizeof(FFTComplex));
+        s->temp_fft[1] = av_malloc_array(s->n_fft, sizeof(FFTComplex));
+        if (!s->temp_fft[0] || !s->temp_fft[1])
+            return AVERROR(ENOMEM);
+    }
+
+    /* length: number of channels to convolute */
+    s->speaker_azim = av_calloc(s->n_conv, sizeof(*s->speaker_azim));
+    s->speaker_elev = av_calloc(s->n_conv, sizeof(*s->speaker_elev));
+
+    /* memory allocation failed: */
+    if (!s->data_ir[0] || !s->data_ir[1] || !s->delay[1] ||
+        !s->delay[0] || !s->ringbuffer[0] || !s->ringbuffer[1] ||
+        !s->speaker_azim || !s->speaker_elev)
+        return AVERROR(ENOMEM);
+
+    compensate_volume(ctx);
+
+    /* get speaker positions */
+    if ((ret = get_speaker_pos(ctx, s->speaker_azim, s->speaker_elev)) < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Couldn't get speaker positions. Input channel configuration not supported.\n");
+        return ret;
+    }
+
+    /* load IRs to data_ir[0] and data_ir[1] for required directions */
+    if ((ret = load_data(ctx, s->rotation, s->elevation, s->radius)) < 0)
+        return ret;
+
+    av_log(ctx, AV_LOG_DEBUG, "Samplerate: %d Channels to convolute: %d, Length of ringbuffer: %d x %d\n",
+        inlink->sample_rate, s->n_conv, nb_input_channels, s->buffer_length);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SOFAlizerContext *s = ctx->priv;
+
+    if (s->sofa.ncid) {
+        av_freep(&s->sofa.sp_a);
+        av_freep(&s->sofa.sp_e);
+        av_freep(&s->sofa.sp_r);
+        av_freep(&s->sofa.data_delay);
+        av_freep(&s->sofa.data_ir);
+    }
+    av_fft_end(s->ifft[0]);
+    av_fft_end(s->ifft[1]);
+    av_fft_end(s->fft[0]);
+    av_fft_end(s->fft[1]);
+    av_freep(&s->delay[0]);
+    av_freep(&s->delay[1]);
+    av_freep(&s->data_ir[0]);
+    av_freep(&s->data_ir[1]);
+    av_freep(&s->ringbuffer[0]);
+    av_freep(&s->ringbuffer[1]);
+    av_freep(&s->speaker_azim);
+    av_freep(&s->speaker_elev);
+    av_freep(&s->temp_src[0]);
+    av_freep(&s->temp_src[1]);
+    av_freep(&s->temp_fft[0]);
+    av_freep(&s->temp_fft[1]);
+    av_freep(&s->data_hrtf[0]);
+    av_freep(&s->data_hrtf[1]);
+    av_freep(&s->fdsp);
+}
+
+#define OFFSET(x) offsetof(SOFAlizerContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption sofalizer_options[] = {
+    { "sofa",      "sofa filename",  OFFSET(filename),  AV_OPT_TYPE_STRING, {.str=NULL},            .flags = FLAGS },
+    { "gain",      "set gain in dB", OFFSET(gain),      AV_OPT_TYPE_FLOAT,  {.dbl=0},     -20,  40, .flags = FLAGS },
+    { "rotation",  "set rotation"  , OFFSET(rotation),  AV_OPT_TYPE_FLOAT,  {.dbl=0},    -360, 360, .flags = FLAGS },
+    { "elevation", "set elevation",  OFFSET(elevation), AV_OPT_TYPE_FLOAT,  {.dbl=0},     -90,  90, .flags = FLAGS },
+    { "radius",    "set radius",     OFFSET(radius),    AV_OPT_TYPE_FLOAT,  {.dbl=1},       0,   3, .flags = FLAGS },
+    { "type",      "set processing", OFFSET(type),      AV_OPT_TYPE_INT,    {.i64=1},       0,   1, .flags = FLAGS, "type" },
+    { "time",      "time domain",      0,               AV_OPT_TYPE_CONST,  {.i64=0},       0,   0, .flags = FLAGS, "type" },
+    { "freq",      "frequency domain", 0,               AV_OPT_TYPE_CONST,  {.i64=1},       0,   0, .flags = FLAGS, "type" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(sofalizer);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_sofalizer = {
+    .name          = "sofalizer",
+    .description   = NULL_IF_CONFIG_SMALL("SOFAlizer (Spatially Oriented Format for Acoustics)."),
+    .priv_size     = sizeof(SOFAlizerContext),
+    .priv_class    = &sofalizer_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = inputs,
+    .outputs       = outputs,
+    .flags         = AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/af_stereotools.c b/libavfilter/af_stereotools.c
new file mode 100644
index 00000000..8ab184df
--- /dev/null
+++ b/libavfilter/af_stereotools.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2001-2010 Krzysztof Foltman, Markus Schmidt, Thor Harald Johansen
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+
+typedef struct StereoToolsContext {
+    const AVClass *class;
+
+    int softclip;
+    int mute_l;
+    int mute_r;
+    int phase_l;
+    int phase_r;
+    int mode;
+    double slev;
+    double sbal;
+    double mlev;
+    double mpan;
+    double phase;
+    double base;
+    double delay;
+    double balance_in;
+    double balance_out;
+    double phase_sin_coef;
+    double phase_cos_coef;
+    double sc_level;
+    double inv_atan_shape;
+    double level_in;
+    double level_out;
+
+    double *buffer;
+    int length;
+    int pos;
+} StereoToolsContext;
+
+#define OFFSET(x) offsetof(StereoToolsContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption stereotools_options[] = {
+    { "level_in",    "set level in",     OFFSET(level_in),    AV_OPT_TYPE_DOUBLE, {.dbl=1},   0.015625,  64, A },
+    { "level_out",   "set level out",    OFFSET(level_out),   AV_OPT_TYPE_DOUBLE, {.dbl=1},   0.015625,  64, A },
+    { "balance_in",  "set balance in",   OFFSET(balance_in),  AV_OPT_TYPE_DOUBLE, {.dbl=0},  -1,          1, A },
+    { "balance_out", "set balance out",  OFFSET(balance_out), AV_OPT_TYPE_DOUBLE, {.dbl=0},  -1,          1, A },
+    { "softclip",    "enable softclip",  OFFSET(softclip),    AV_OPT_TYPE_BOOL,   {.i64=0},   0,          1, A },
+    { "mutel",       "mute L",           OFFSET(mute_l),      AV_OPT_TYPE_BOOL,   {.i64=0},   0,          1, A },
+    { "muter",       "mute R",           OFFSET(mute_r),      AV_OPT_TYPE_BOOL,   {.i64=0},   0,          1, A },
+    { "phasel",      "phase L",          OFFSET(phase_l),     AV_OPT_TYPE_BOOL,   {.i64=0},   0,          1, A },
+    { "phaser",      "phase R",          OFFSET(phase_r),     AV_OPT_TYPE_BOOL,   {.i64=0},   0,          1, A },
+    { "mode",        "set stereo mode",  OFFSET(mode),        AV_OPT_TYPE_INT,    {.i64=0},   0,          6, A, "mode" },
+    {     "lr>lr",   0,                  0,                   AV_OPT_TYPE_CONST,  {.i64=0},   0,          0, A, "mode" },
+    {     "lr>ms",   0,                  0,                   AV_OPT_TYPE_CONST,  {.i64=1},   0,          0, A, "mode" },
+    {     "ms>lr",   0,                  0,                   AV_OPT_TYPE_CONST,  {.i64=2},   0,          0, A, "mode" },
+    {     "lr>ll",   0,                  0,                   AV_OPT_TYPE_CONST,  {.i64=3},   0,          0, A, "mode" },
+    {     "lr>rr",   0,                  0,                   AV_OPT_TYPE_CONST,  {.i64=4},   0,          0, A, "mode" },
+    {     "lr>l+r",  0,                  0,                   AV_OPT_TYPE_CONST,  {.i64=5},   0,          0, A, "mode" },
+    {     "lr>rl",   0,                  0,                   AV_OPT_TYPE_CONST,  {.i64=6},   0,          0, A, "mode" },
+    { "slev",        "set side level",   OFFSET(slev),        AV_OPT_TYPE_DOUBLE, {.dbl=1},   0.015625,  64, A },
+    { "sbal",        "set side balance", OFFSET(sbal),        AV_OPT_TYPE_DOUBLE, {.dbl=0},  -1,          1, A },
+    { "mlev",        "set middle level", OFFSET(mlev),        AV_OPT_TYPE_DOUBLE, {.dbl=1},   0.015625,  64, A },
+    { "mpan",        "set middle pan",   OFFSET(mpan),        AV_OPT_TYPE_DOUBLE, {.dbl=0},  -1,          1, A },
+    { "base",        "set stereo base",  OFFSET(base),        AV_OPT_TYPE_DOUBLE, {.dbl=0},  -1,          1, A },
+    { "delay",       "set delay",        OFFSET(delay),       AV_OPT_TYPE_DOUBLE, {.dbl=0}, -20,         20, A },
+    { "sclevel",     "set S/C level",    OFFSET(sc_level),    AV_OPT_TYPE_DOUBLE, {.dbl=1},   1,        100, A },
+    { "phase",       "set stereo phase", OFFSET(phase),       AV_OPT_TYPE_DOUBLE, {.dbl=0},   0,        360, A },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(stereotools);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layout = NULL;
+    int ret;
+
+    if ((ret = ff_add_format                 (&formats, AV_SAMPLE_FMT_DBL  )) < 0 ||
+        (ret = ff_set_common_formats         (ctx     , formats            )) < 0 ||
+        (ret = ff_add_channel_layout         (&layout , AV_CH_LAYOUT_STEREO)) < 0 ||
+        (ret = ff_set_common_channel_layouts (ctx     , layout             )) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    StereoToolsContext *s = ctx->priv;
+
+    s->length = 2 * inlink->sample_rate * 0.05;
+    if (s->length <= 1 || s->length & 1) {
+        av_log(ctx, AV_LOG_ERROR, "sample rate is too small\n");
+        return AVERROR(EINVAL);
+    }
+    s->buffer = av_calloc(s->length, sizeof(*s->buffer));
+    if (!s->buffer)
+        return AVERROR(ENOMEM);
+
+    s->inv_atan_shape = 1.0 / atan(s->sc_level);
+    s->phase_cos_coef = cos(s->phase / 180 * M_PI);
+    s->phase_sin_coef = sin(s->phase / 180 * M_PI);
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    StereoToolsContext *s = ctx->priv;
+    const double *src = (const double *)in->data[0];
+    const double sb = s->base < 0 ? s->base * 0.5 : s->base;
+    const double sbal = 1 + s->sbal;
+    const double mpan = 1 + s->mpan;
+    const double slev = s->slev;
+    const double mlev = s->mlev;
+    const double balance_in = s->balance_in;
+    const double balance_out = s->balance_out;
+    const double level_in = s->level_in;
+    const double level_out = s->level_out;
+    const double sc_level = s->sc_level;
+    const double delay = s->delay;
+    const int length = s->length;
+    const int mute_l = s->mute_l;
+    const int mute_r = s->mute_r;
+    const int phase_l = s->phase_l;
+    const int phase_r = s->phase_r;
+    double *buffer = s->buffer;
+    AVFrame *out;
+    double *dst;
+    int nbuf = inlink->sample_rate * (fabs(delay) / 1000.);
+    int n;
+
+    nbuf -= nbuf % 2;
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+    dst = (double *)out->data[0];
+
+    for (n = 0; n < in->nb_samples; n++, src += 2, dst += 2) {
+        double L = src[0], R = src[1], l, r, m, S;
+
+        L *= level_in;
+        R *= level_in;
+
+        L *= 1. - FFMAX(0., balance_in);
+        R *= 1. + FFMIN(0., balance_in);
+
+        if (s->softclip) {
+            R = s->inv_atan_shape * atan(R * sc_level);
+            L = s->inv_atan_shape * atan(L * sc_level);
+        }
+
+        switch (s->mode) {
+        case 0:
+            m = (L + R) * 0.5;
+            S = (L - R) * 0.5;
+            l = m * mlev * FFMIN(1., 2. - mpan) + S * slev * FFMIN(1., 2. - sbal);
+            r = m * mlev * FFMIN(1., mpan)      - S * slev * FFMIN(1., sbal);
+            L = l;
+            R = r;
+            break;
+        case 1:
+            l = L * FFMIN(1., 2. - sbal);
+            r = R * FFMIN(1., sbal);
+            L = 0.5 * (l + r) * mlev;
+            R = 0.5 * (l - r) * slev;
+            break;
+        case 2:
+            l = L * mlev * FFMIN(1., 2. - mpan) + R * slev * FFMIN(1., 2. - sbal);
+            r = L * mlev * FFMIN(1., mpan)      - R * slev * FFMIN(1., sbal);
+            L = l;
+            R = r;
+            break;
+        case 3:
+            R = L;
+            break;
+        case 4:
+            L = R;
+            break;
+        case 5:
+            L = (L + R) / 2;
+            R = L;
+            break;
+        case 6:
+            l = L;
+            L = R;
+            R = l;
+            m = (L + R) * 0.5;
+            S = (L - R) * 0.5;
+            l = m * mlev * FFMIN(1., 2. - mpan) + S * slev * FFMIN(1., 2. - sbal);
+            r = m * mlev * FFMIN(1., mpan)      - S * slev * FFMIN(1., sbal);
+            L = l;
+            R = r;
+            break;
+        }
+
+        L *= 1. - mute_l;
+        R *= 1. - mute_r;
+
+        L *= (2. * (1. - phase_l)) - 1.;
+        R *= (2. * (1. - phase_r)) - 1.;
+
+        buffer[s->pos  ] = L;
+        buffer[s->pos+1] = R;
+
+        if (delay > 0.) {
+            R = buffer[(s->pos - (int)nbuf + 1 + length) % length];
+        } else if (delay < 0.) {
+            L = buffer[(s->pos - (int)nbuf + length)     % length];
+        }
+
+        l = L + sb * L - sb * R;
+        r = R + sb * R - sb * L;
+
+        L = l;
+        R = r;
+
+        l = L * s->phase_cos_coef - R * s->phase_sin_coef;
+        r = L * s->phase_sin_coef + R * s->phase_cos_coef;
+
+        L = l;
+        R = r;
+
+        s->pos = (s->pos + 2) % s->length;
+
+        L *= 1. - FFMAX(0., balance_out);
+        R *= 1. + FFMIN(0., balance_out);
+
+        L *= level_out;
+        R *= level_out;
+
+        dst[0] = L;
+        dst[1] = R;
+    }
+
+    if (out != in)
+        av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    StereoToolsContext *s = ctx->priv;
+
+    av_freep(&s->buffer);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_stereotools = {
+    .name           = "stereotools",
+    .description    = NULL_IF_CONFIG_SMALL("Apply various stereo tools."),
+    .query_formats  = query_formats,
+    .priv_size      = sizeof(StereoToolsContext),
+    .priv_class     = &stereotools_class,
+    .uninit         = uninit,
+    .inputs         = inputs,
+    .outputs        = outputs,
+};
diff --git a/libavfilter/af_stereowiden.c b/libavfilter/af_stereowiden.c
new file mode 100644
index 00000000..154a8b15
--- /dev/null
+++ b/libavfilter/af_stereowiden.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2012 VLC authors and VideoLAN
+ * Author : Sukrit Sangwan < sukritsangwan at gmail dot com >
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+
+typedef struct StereoWidenContext {
+    const AVClass *class;
+
+    float delay;
+    float feedback;
+    float crossfeed;
+    float drymix;
+
+    float *buffer;
+    float *write;
+    int length;
+} StereoWidenContext;
+
+#define OFFSET(x) offsetof(StereoWidenContext, x)
+#define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption stereowiden_options[] = {
+    { "delay",     "set delay time",    OFFSET(delay),     AV_OPT_TYPE_FLOAT, {.dbl=20}, 1, 100, A },
+    { "feedback",  "set feedback gain", OFFSET(feedback),  AV_OPT_TYPE_FLOAT, {.dbl=.3}, 0, 0.9, A },
+    { "crossfeed", "set cross feed",    OFFSET(crossfeed), AV_OPT_TYPE_FLOAT, {.dbl=.3}, 0, 0.8, A },
+    { "drymix",    "set dry-mix",       OFFSET(drymix),    AV_OPT_TYPE_FLOAT, {.dbl=.8}, 0, 1.0, A },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(stereowiden);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layout = NULL;
+    int ret;
+
+    if ((ret = ff_add_format                 (&formats, AV_SAMPLE_FMT_FLT  )) < 0 ||
+        (ret = ff_set_common_formats         (ctx     , formats            )) < 0 ||
+        (ret = ff_add_channel_layout         (&layout , AV_CH_LAYOUT_STEREO)) < 0 ||
+        (ret = ff_set_common_channel_layouts (ctx     , layout             )) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    StereoWidenContext *s = ctx->priv;
+
+    s->length = 2 * s->delay * inlink->sample_rate / 1000;
+    s->buffer = av_calloc(s->length, sizeof(*s->buffer));
+    if (!s->buffer)
+        return AVERROR(ENOMEM);
+    s->write = s->buffer;
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    StereoWidenContext *s = ctx->priv;
+    const float *src = (const float *)in->data[0];
+    const float drymix = s->drymix;
+    const float crossfeed = s->crossfeed;
+    const float feedback = s->feedback;
+    AVFrame *out;
+    float *dst;
+    int n;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+    dst = (float *)out->data[0];
+
+    for (n = 0; n < in->nb_samples; n++, src += 2, dst += 2) {
+        const float left = src[0], right = src[1];
+        float *read = s->write + 2;
+
+        if (read > s->buffer + s->length)
+            read = s->buffer;
+
+        dst[0] = drymix * left - crossfeed * right - feedback * read[1];
+        dst[1] = drymix * right - crossfeed * left - feedback * read[0];
+
+        s->write[0] = left;
+        s->write[1] = right;
+
+        if (s->write == s->buffer + s->length)
+            s->write = s->buffer;
+        else
+            s->write += 2;
+    }
+
+    if (out != in)
+        av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    StereoWidenContext *s = ctx->priv;
+
+    av_freep(&s->buffer);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_stereowiden = {
+    .name           = "stereowiden",
+    .description    = NULL_IF_CONFIG_SMALL("Apply stereo widening effect."),
+    .query_formats  = query_formats,
+    .priv_size      = sizeof(StereoWidenContext),
+    .priv_class     = &stereowiden_class,
+    .uninit         = uninit,
+    .inputs         = inputs,
+    .outputs        = outputs,
+};
diff --git a/libavfilter/af_tremolo.c b/libavfilter/af_tremolo.c
new file mode 100644
index 00000000..572e9e3b
--- /dev/null
+++ b/libavfilter/af_tremolo.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2015 Kyle Swanson <k@ylo.ph>.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "audio.h"
+
+typedef struct TremoloContext {
+    const AVClass *class;
+    double freq;
+    double depth;
+    double *table;
+    int index;
+} TremoloContext;
+
+#define OFFSET(x) offsetof(TremoloContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption tremolo_options[] = {
+    { "f", "set frequency in hertz",    OFFSET(freq),    AV_OPT_TYPE_DOUBLE,   {.dbl = 5.0},   0.1,   20000.0, FLAGS },
+    { "d", "set depth as percentage",   OFFSET(depth),   AV_OPT_TYPE_DOUBLE,   {.dbl = 0.5},   0.0,   1.0,     FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(tremolo);
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    TremoloContext *s = ctx->priv;
+    const double *src = (const double *)in->data[0];
+    const int channels = inlink->channels;
+    const int nb_samples = in->nb_samples;
+    AVFrame *out;
+    double *dst;
+    int n, c;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+    dst = (double *)out->data[0];
+
+    for (n = 0; n < nb_samples; n++) {
+        for (c = 0; c < channels; c++)
+            dst[c] = src[c] * s->table[s->index];
+        dst += channels;
+        src += channels;
+        s->index++;
+        if (s->index >= inlink->sample_rate / s->freq)
+            s->index = 0;
+    }
+
+    if (in != out)
+        av_frame_free(&in);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBL,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    TremoloContext *s = ctx->priv;
+    av_freep(&s->table);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    TremoloContext *s = ctx->priv;
+    const double offset = 1. - s->depth / 2.;
+    int i;
+
+    s->table = av_malloc_array(inlink->sample_rate / s->freq, sizeof(*s->table));
+    if (!s->table)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < inlink->sample_rate / s->freq; i++) {
+        double env = s->freq * i / inlink->sample_rate;
+        env = sin(2 * M_PI * fmod(env + 0.25, 1.0));
+        s->table[i] = env * (1 - fabs(offset)) + offset;
+    }
+
+    s->index = 0;
+
+    return 0;
+}
+
+static const AVFilterPad avfilter_af_tremolo_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_af_tremolo_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_tremolo = {
+    .name          = "tremolo",
+    .description   = NULL_IF_CONFIG_SMALL("Apply tremolo effect."),
+    .priv_size     = sizeof(TremoloContext),
+    .priv_class    = &tremolo_class,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = avfilter_af_tremolo_inputs,
+    .outputs       = avfilter_af_tremolo_outputs,
+};
diff --git a/libavfilter/af_vibrato.c b/libavfilter/af_vibrato.c
new file mode 100644
index 00000000..c7691f2f
--- /dev/null
+++ b/libavfilter/af_vibrato.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2015 Kyle Swanson <k@ylo.ph>.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "audio.h"
+#include "generate_wave_table.h"
+
+typedef struct VibratoContext {
+    const AVClass *class;
+    double freq;
+    double depth;
+    int channels;
+
+    double **buf;
+    int buf_index;
+    int buf_size;
+
+    double *wave_table;
+    int wave_table_index;
+    int wave_table_size;
+} VibratoContext;
+
+#define OFFSET(x) offsetof(VibratoContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption vibrato_options[] = {
+    { "f", "set frequency in hertz",    OFFSET(freq),    AV_OPT_TYPE_DOUBLE,   {.dbl = 5.0},   0.1,   20000.0, FLAGS },
+    { "d", "set depth as percentage",   OFFSET(depth),   AV_OPT_TYPE_DOUBLE,   {.dbl = 0.5},   0.00,  1.0,     FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(vibrato);
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    VibratoContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    int n, c;
+    const double *src;
+    double *dst;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_audio_buffer(inlink, in->nb_samples);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+
+
+    for (n = 0; n < in->nb_samples; n++) {
+        double integer, decimal;
+        decimal = modf(s->depth * s->wave_table[s->wave_table_index], &integer);
+
+        s->wave_table_index++;
+        if (s->wave_table_index >= s->wave_table_size)
+            s->wave_table_index -= s->wave_table_size;
+
+        for (c = 0; c < inlink->channels; c++) {
+            int samp1_index, samp2_index;
+            double *buf;
+            double this_samp;
+
+            src = (const double *)in->extended_data[c];
+            dst = (double *)out->extended_data[c];
+            buf = s->buf[c];
+
+            samp1_index = s->buf_index + integer;
+            if (samp1_index >= s->buf_size)
+                samp1_index -= s->buf_size;
+            samp2_index = samp1_index + 1;
+            if (samp2_index >= s->buf_size)
+                samp2_index -= s->buf_size;
+
+            this_samp = src[n];
+            dst[n] = buf[samp1_index] + (decimal * (buf[samp2_index] - buf[samp1_index]));
+            buf[s->buf_index] = this_samp;
+        }
+        s->buf_index++;
+        if (s->buf_index >= s->buf_size)
+            s->buf_index -= s->buf_size;
+    }
+
+    if (in != out)
+        av_frame_free(&in);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBLP,
+        AV_SAMPLE_FMT_NONE
+    };
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats(ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    VibratoContext *s = ctx->priv;
+    int c;
+
+    av_freep(&s->wave_table);
+    for (c = 0; c < s->channels; c++)
+        av_freep(&s->buf[c]);
+    av_freep(&s->buf);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    int c;
+    AVFilterContext *ctx = inlink->dst;
+    VibratoContext *s = ctx->priv;
+    s->channels = inlink->channels;
+
+    s->buf = av_calloc(inlink->channels, sizeof(*s->buf));
+    if (!s->buf)
+        return AVERROR(ENOMEM);
+    s->buf_size = inlink->sample_rate * 0.005;
+    for (c = 0; c < s->channels; c++) {
+        s->buf[c] = av_malloc_array(s->buf_size, sizeof(*s->buf[c]));
+        if (!s->buf[c])
+            return AVERROR(ENOMEM);
+    }
+    s->buf_index = 0;
+
+    s->wave_table_size = inlink->sample_rate / s->freq;
+    s->wave_table = av_malloc_array(s->wave_table_size, sizeof(*s->wave_table));
+    if (!s->wave_table)
+        return AVERROR(ENOMEM);
+    ff_generate_wave_table(WAVE_SIN, AV_SAMPLE_FMT_DBL, s->wave_table, s->wave_table_size, 0.0, s->buf_size - 1, 3.0 * M_PI_2);
+    s->wave_table_index = 0;
+
+    return 0;
+}
+
+static const AVFilterPad avfilter_af_vibrato_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_af_vibrato_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_vibrato = {
+    .name          = "vibrato",
+    .description   = NULL_IF_CONFIG_SMALL("Apply vibrato effect."),
+    .priv_size     = sizeof(VibratoContext),
+    .priv_class    = &vibrato_class,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = avfilter_af_vibrato_inputs,
+    .outputs       = avfilter_af_vibrato_outputs,
+};
diff --git a/libavfilter/af_volume.c b/libavfilter/af_volume.c
index 54bffa60..3913e7b2 100644
--- a/libavfilter/af_volume.c
+++ b/libavfilter/af_volume.c
@@ -74,16 +74,16 @@ static const AVOption volume_options[] = {
          { "once",  "eval volume expression once", 0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_ONCE},  .flags = A|F, .unit = "eval" },
          { "frame", "eval volume expression per-frame",                  0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_FRAME}, .flags = A|F, .unit = "eval" },
     { "replaygain", "Apply replaygain side data when present",
-            OFFSET(replaygain), AV_OPT_TYPE_INT, { .i64 = REPLAYGAIN_DROP }, REPLAYGAIN_DROP, REPLAYGAIN_ALBUM, A, "replaygain" },
-        { "drop",   "replaygain side data is dropped", 0, AV_OPT_TYPE_CONST, { .i64 = REPLAYGAIN_DROP   }, 0, 0, A, "replaygain" },
-        { "ignore", "replaygain side data is ignored", 0, AV_OPT_TYPE_CONST, { .i64 = REPLAYGAIN_IGNORE }, 0, 0, A, "replaygain" },
-        { "track",  "track gain is preferred",         0, AV_OPT_TYPE_CONST, { .i64 = REPLAYGAIN_TRACK  }, 0, 0, A, "replaygain" },
-        { "album",  "album gain is preferred",         0, AV_OPT_TYPE_CONST, { .i64 = REPLAYGAIN_ALBUM  }, 0, 0, A, "replaygain" },
+            OFFSET(replaygain), AV_OPT_TYPE_INT, { .i64 = REPLAYGAIN_DROP }, REPLAYGAIN_DROP, REPLAYGAIN_ALBUM, A|F, "replaygain" },
+        { "drop",   "replaygain side data is dropped", 0, AV_OPT_TYPE_CONST, { .i64 = REPLAYGAIN_DROP   }, 0, 0, A|F, "replaygain" },
+        { "ignore", "replaygain side data is ignored", 0, AV_OPT_TYPE_CONST, { .i64 = REPLAYGAIN_IGNORE }, 0, 0, A|F, "replaygain" },
+        { "track",  "track gain is preferred",         0, AV_OPT_TYPE_CONST, { .i64 = REPLAYGAIN_TRACK  }, 0, 0, A|F, "replaygain" },
+        { "album",  "album gain is preferred",         0, AV_OPT_TYPE_CONST, { .i64 = REPLAYGAIN_ALBUM  }, 0, 0, A|F, "replaygain" },
     { "replaygain_preamp", "Apply replaygain pre-amplification",
-            OFFSET(replaygain_preamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.0 }, -15.0, 15.0, A },
+            OFFSET(replaygain_preamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.0 }, -15.0, 15.0, A|F },
     { "replaygain_noclip", "Apply replaygain clipping prevention",
-            OFFSET(replaygain_noclip), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, A },
-    { NULL },
+            OFFSET(replaygain_noclip), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, A|F },
+    { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(volume);
@@ -279,7 +279,7 @@ static int set_volume(AVFilterContext *ctx)
         av_log(ctx, AV_LOG_VERBOSE, "volume_i:%d/255 ", vol->volume_i);
     }
     av_log(ctx, AV_LOG_VERBOSE, "volume:%f volume_dB:%f\n",
-           vol->volume, 20.0*log(vol->volume)/M_LN10);
+           vol->volume, 20.0*log10(vol->volume));
 
     volume_init(vol);
     return 0;
@@ -376,7 +376,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
             av_log(inlink->dst, AV_LOG_VERBOSE,
                    "Using gain %f dB from replaygain side data.\n", g);
 
-            vol->volume   = pow(10, (g + vol->replaygain_preamp) / 20);
+            vol->volume   = ff_exp10((g + vol->replaygain_preamp) / 20);
             if (vol->replaygain_noclip)
                 vol->volume = FFMIN(vol->volume, 1.0 / p);
             vol->volume_i = (int)(vol->volume * 256 + 0.5);
diff --git a/libavfilter/af_volume.h b/libavfilter/af_volume.h
index aff75267..af46e34f 100644
--- a/libavfilter/af_volume.h
+++ b/libavfilter/af_volume.h
@@ -21,8 +21,8 @@
  * audio volume filter
  */
 
-#ifndef AVFILTER_AF_VOLUME_H
-#define AVFILTER_AF_VOLUME_H
+#ifndef AVFILTER_VOLUME_H
+#define AVFILTER_VOLUME_H
 
 #include "libavutil/common.h"
 #include "libavutil/eval.h"
@@ -90,4 +90,4 @@ typedef struct VolumeContext {
 
 void ff_volume_init_x86(VolumeContext *vol);
 
-#endif /* AVFILTER_AF_VOLUME_H */
+#endif /* AVFILTER_VOLUME_H */
diff --git a/libavfilter/af_volumedetect.c b/libavfilter/af_volumedetect.c
index 01f24bae..4815bccf 100644
--- a/libavfilter/af_volumedetect.c
+++ b/libavfilter/af_volumedetect.c
@@ -78,7 +78,7 @@ static inline double logdb(uint64_t v)
     double d = v / (double)(0x8000 * 0x8000);
     if (!v)
         return MAX_DB;
-    return log(d) * -4.3429448190325182765112891891660508229; /* -10/log(10) */
+    return -log10(d) * 10;
 }
 
 static void print_stats(AVFilterContext *ctx)
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 0244585a..fa7d3049 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -45,20 +45,31 @@ void avfilter_register_all(void)
         return;
     initialized = 1;
 
+    REGISTER_FILTER(ACOMPRESSOR,    acompressor,    af);
+    REGISTER_FILTER(ACROSSFADE,     acrossfade,     af);
     REGISTER_FILTER(ADELAY,         adelay,         af);
     REGISTER_FILTER(AECHO,          aecho,          af);
+    REGISTER_FILTER(AEMPHASIS,      aemphasis,      af);
     REGISTER_FILTER(AEVAL,          aeval,          af);
     REGISTER_FILTER(AFADE,          afade,          af);
+    REGISTER_FILTER(AFFTFILT,       afftfilt,       af);
     REGISTER_FILTER(AFORMAT,        aformat,        af);
+    REGISTER_FILTER(AGATE,          agate,          af);
     REGISTER_FILTER(AINTERLEAVE,    ainterleave,    af);
+    REGISTER_FILTER(ALIMITER,       alimiter,       af);
     REGISTER_FILTER(ALLPASS,        allpass,        af);
     REGISTER_FILTER(AMERGE,         amerge,         af);
+    REGISTER_FILTER(AMETADATA,      ametadata,      af);
     REGISTER_FILTER(AMIX,           amix,           af);
+    REGISTER_FILTER(ANEQUALIZER,    anequalizer,    af);
     REGISTER_FILTER(ANULL,          anull,          af);
     REGISTER_FILTER(APAD,           apad,           af);
     REGISTER_FILTER(APERMS,         aperms,         af);
     REGISTER_FILTER(APHASER,        aphaser,        af);
+    REGISTER_FILTER(APULSATOR,      apulsator,      af);
+    REGISTER_FILTER(AREALTIME,      arealtime,      af);
     REGISTER_FILTER(ARESAMPLE,      aresample,      af);
+    REGISTER_FILTER(AREVERSE,       areverse,       af);
     REGISTER_FILTER(ASELECT,        aselect,        af);
     REGISTER_FILTER(ASENDCMD,       asendcmd,       af);
     REGISTER_FILTER(ASETNSAMPLES,   asetnsamples,   af);
@@ -68,7 +79,7 @@ void avfilter_register_all(void)
     REGISTER_FILTER(ASHOWINFO,      ashowinfo,      af);
     REGISTER_FILTER(ASPLIT,         asplit,         af);
     REGISTER_FILTER(ASTATS,         astats,         af);
-    REGISTER_FILTER(ASTREAMSYNC,    astreamsync,    af);
+    REGISTER_FILTER(ASTREAMSELECT,  astreamselect,  af);
     REGISTER_FILTER(ASYNCTS,        asyncts,        af);
     REGISTER_FILTER(ATEMPO,         atempo,         af);
     REGISTER_FILTER(ATRIM,          atrim,          af);
@@ -82,10 +93,13 @@ void avfilter_register_all(void)
     REGISTER_FILTER(CHANNELSPLIT,   channelsplit,   af);
     REGISTER_FILTER(CHORUS,         chorus,         af);
     REGISTER_FILTER(COMPAND,        compand,        af);
+    REGISTER_FILTER(COMPENSATIONDELAY, compensationdelay, af);
     REGISTER_FILTER(DCSHIFT,        dcshift,        af);
+    REGISTER_FILTER(DYNAUDNORM,     dynaudnorm,     af);
     REGISTER_FILTER(EARWAX,         earwax,         af);
     REGISTER_FILTER(EBUR128,        ebur128,        af);
     REGISTER_FILTER(EQUALIZER,      equalizer,      af);
+    REGISTER_FILTER(EXTRASTEREO,    extrastereo,    af);
     REGISTER_FILTER(FLANGER,        flanger,        af);
     REGISTER_FILTER(HIGHPASS,       highpass,       af);
     REGISTER_FILTER(JOIN,           join,           af);
@@ -94,13 +108,22 @@ void avfilter_register_all(void)
     REGISTER_FILTER(PAN,            pan,            af);
     REGISTER_FILTER(REPLAYGAIN,     replaygain,     af);
     REGISTER_FILTER(RESAMPLE,       resample,       af);
+    REGISTER_FILTER(RUBBERBAND,     rubberband,     af);
+    REGISTER_FILTER(SIDECHAINCOMPRESS, sidechaincompress, af);
+    REGISTER_FILTER(SIDECHAINGATE,  sidechaingate,  af);
     REGISTER_FILTER(SILENCEDETECT,  silencedetect,  af);
     REGISTER_FILTER(SILENCEREMOVE,  silenceremove,  af);
+    REGISTER_FILTER(SOFALIZER,      sofalizer,      af);
+    REGISTER_FILTER(STEREOTOOLS,    stereotools,    af);
+    REGISTER_FILTER(STEREOWIDEN,    stereowiden,    af);
     REGISTER_FILTER(TREBLE,         treble,         af);
+    REGISTER_FILTER(TREMOLO,        tremolo,        af);
+    REGISTER_FILTER(VIBRATO,        vibrato,        af);
     REGISTER_FILTER(VOLUME,         volume,         af);
     REGISTER_FILTER(VOLUMEDETECT,   volumedetect,   af);
 
     REGISTER_FILTER(AEVALSRC,       aevalsrc,       asrc);
+    REGISTER_FILTER(ANOISESRC,      anoisesrc,      asrc);
     REGISTER_FILTER(ANULLSRC,       anullsrc,       asrc);
     REGISTER_FILTER(FLITE,          flite,          asrc);
     REGISTER_FILTER(SINE,           sine,           asrc);
@@ -109,34 +132,44 @@ void avfilter_register_all(void)
 
     REGISTER_FILTER(ALPHAEXTRACT,   alphaextract,   vf);
     REGISTER_FILTER(ALPHAMERGE,     alphamerge,     vf);
+    REGISTER_FILTER(ATADENOISE,     atadenoise,     vf);
     REGISTER_FILTER(ASS,            ass,            vf);
     REGISTER_FILTER(BBOX,           bbox,           vf);
     REGISTER_FILTER(BLACKDETECT,    blackdetect,    vf);
     REGISTER_FILTER(BLACKFRAME,     blackframe,     vf);
     REGISTER_FILTER(BLEND,          blend,          vf);
     REGISTER_FILTER(BOXBLUR,        boxblur,        vf);
+    REGISTER_FILTER(CHROMAKEY,      chromakey,      vf);
     REGISTER_FILTER(CODECVIEW,      codecview,      vf);
     REGISTER_FILTER(COLORBALANCE,   colorbalance,   vf);
     REGISTER_FILTER(COLORCHANNELMIXER, colorchannelmixer, vf);
+    REGISTER_FILTER(COLORKEY,       colorkey,       vf);
     REGISTER_FILTER(COLORLEVELS,    colorlevels,    vf);
     REGISTER_FILTER(COLORMATRIX,    colormatrix,    vf);
+    REGISTER_FILTER(CONVOLUTION,    convolution,    vf);
     REGISTER_FILTER(COPY,           copy,           vf);
     REGISTER_FILTER(COVER_RECT,     cover_rect,     vf);
     REGISTER_FILTER(CROP,           crop,           vf);
     REGISTER_FILTER(CROPDETECT,     cropdetect,     vf);
     REGISTER_FILTER(CURVES,         curves,         vf);
     REGISTER_FILTER(DCTDNOIZ,       dctdnoiz,       vf);
+    REGISTER_FILTER(DEBAND,         deband,         vf);
     REGISTER_FILTER(DECIMATE,       decimate,       vf);
+    REGISTER_FILTER(DEFLATE,        deflate,        vf);
     REGISTER_FILTER(DEJUDDER,       dejudder,       vf);
     REGISTER_FILTER(DELOGO,         delogo,         vf);
     REGISTER_FILTER(DESHAKE,        deshake,        vf);
     REGISTER_FILTER(DETELECINE,     detelecine,     vf);
+    REGISTER_FILTER(DILATION,       dilation,       vf);
+    REGISTER_FILTER(DISPLACE,       displace,       vf);
     REGISTER_FILTER(DRAWBOX,        drawbox,        vf);
+    REGISTER_FILTER(DRAWGRAPH,      drawgraph,      vf);
     REGISTER_FILTER(DRAWGRID,       drawgrid,       vf);
     REGISTER_FILTER(DRAWTEXT,       drawtext,       vf);
     REGISTER_FILTER(EDGEDETECT,     edgedetect,     vf);
     REGISTER_FILTER(ELBG,           elbg,           vf);
     REGISTER_FILTER(EQ,             eq,             vf);
+    REGISTER_FILTER(EROSION,        erosion,        vf);
     REGISTER_FILTER(EXTRACTPLANES,  extractplanes,  vf);
     REGISTER_FILTER(FADE,           fade,           vf);
     REGISTER_FILTER(FFTFILT,        fftfilt,        vf);
@@ -147,6 +180,7 @@ void avfilter_register_all(void)
     REGISTER_FILTER(FORMAT,         format,         vf);
     REGISTER_FILTER(FPS,            fps,            vf);
     REGISTER_FILTER(FRAMEPACK,      framepack,      vf);
+    REGISTER_FILTER(FRAMERATE,      framerate,      vf);
     REGISTER_FILTER(FRAMESTEP,      framestep,      vf);
     REGISTER_FILTER(FREI0R,         frei0r,         vf);
     REGISTER_FILTER(FSPP,           fspp,           vf);
@@ -158,9 +192,11 @@ void avfilter_register_all(void)
     REGISTER_FILTER(HISTOGRAM,      histogram,      vf);
     REGISTER_FILTER(HQDN3D,         hqdn3d,         vf);
     REGISTER_FILTER(HQX,            hqx,            vf);
+    REGISTER_FILTER(HSTACK,         hstack,         vf);
     REGISTER_FILTER(HUE,            hue,            vf);
     REGISTER_FILTER(IDET,           idet,           vf);
     REGISTER_FILTER(IL,             il,             vf);
+    REGISTER_FILTER(INFLATE,        inflate,        vf);
     REGISTER_FILTER(INTERLACE,      interlace,      vf);
     REGISTER_FILTER(INTERLEAVE,     interleave,     vf);
     REGISTER_FILTER(KERNDEINT,      kerndeint,      vf);
@@ -169,13 +205,17 @@ void avfilter_register_all(void)
     REGISTER_FILTER(LUT,            lut,            vf);
     REGISTER_FILTER(LUTRGB,         lutrgb,         vf);
     REGISTER_FILTER(LUTYUV,         lutyuv,         vf);
+    REGISTER_FILTER(MASKEDMERGE,    maskedmerge,    vf);
     REGISTER_FILTER(MCDEINT,        mcdeint,        vf);
     REGISTER_FILTER(MERGEPLANES,    mergeplanes,    vf);
+    REGISTER_FILTER(METADATA,       metadata,       vf);
     REGISTER_FILTER(MPDECIMATE,     mpdecimate,     vf);
     REGISTER_FILTER(NEGATE,         negate,         vf);
+    REGISTER_FILTER(NNEDI,          nnedi,          vf);
     REGISTER_FILTER(NOFORMAT,       noformat,       vf);
     REGISTER_FILTER(NOISE,          noise,          vf);
     REGISTER_FILTER(NULL,           null,           vf);
+    REGISTER_FILTER(OCR,            ocr,            vf);
     REGISTER_FILTER(OCV,            ocv,            vf);
     REGISTER_FILTER(OVERLAY,        overlay,        vf);
     REGISTER_FILTER(OWDENOISE,      owdenoise,      vf);
@@ -191,12 +231,18 @@ void avfilter_register_all(void)
     REGISTER_FILTER(PSNR,           psnr,           vf);
     REGISTER_FILTER(PULLUP,         pullup,         vf);
     REGISTER_FILTER(QP,             qp,             vf);
+    REGISTER_FILTER(RANDOM,         random,         vf);
+    REGISTER_FILTER(REALTIME,       realtime,       vf);
+    REGISTER_FILTER(REMOVEGRAIN,    removegrain,    vf);
     REGISTER_FILTER(REMOVELOGO,     removelogo,     vf);
     REGISTER_FILTER(REPEATFIELDS,   repeatfields,   vf);
+    REGISTER_FILTER(REVERSE,        reverse,        vf);
     REGISTER_FILTER(ROTATE,         rotate,         vf);
     REGISTER_FILTER(SAB,            sab,            vf);
     REGISTER_FILTER(SCALE,          scale,          vf);
+    REGISTER_FILTER(SCALE2REF,      scale2ref,      vf);
     REGISTER_FILTER(SELECT,         select,         vf);
+    REGISTER_FILTER(SELECTIVECOLOR, selectivecolor, vf);
     REGISTER_FILTER(SENDCMD,        sendcmd,        vf);
     REGISTER_FILTER(SEPARATEFIELDS, separatefields, vf);
     REGISTER_FILTER(SETDAR,         setdar,         vf);
@@ -206,14 +252,18 @@ void avfilter_register_all(void)
     REGISTER_FILTER(SETTB,          settb,          vf);
     REGISTER_FILTER(SHOWINFO,       showinfo,       vf);
     REGISTER_FILTER(SHOWPALETTE,    showpalette,    vf);
+    REGISTER_FILTER(SHUFFLEFRAMES,  shuffleframes,  vf);
     REGISTER_FILTER(SHUFFLEPLANES,  shuffleplanes,  vf);
     REGISTER_FILTER(SIGNALSTATS,    signalstats,    vf);
     REGISTER_FILTER(SMARTBLUR,      smartblur,      vf);
     REGISTER_FILTER(SPLIT,          split,          vf);
     REGISTER_FILTER(SPP,            spp,            vf);
+    REGISTER_FILTER(SSIM,           ssim,           vf);
     REGISTER_FILTER(STEREO3D,       stereo3d,       vf);
+    REGISTER_FILTER(STREAMSELECT,   streamselect,   vf);
     REGISTER_FILTER(SUBTITLES,      subtitles,      vf);
     REGISTER_FILTER(SUPER2XSAI,     super2xsai,     vf);
+    REGISTER_FILTER(SWAPRECT,       swaprect,       vf);
     REGISTER_FILTER(SWAPUV,         swapuv,         vf);
     REGISTER_FILTER(TBLEND,         tblend,         vf);
     REGISTER_FILTER(TELECINE,       telecine,       vf);
@@ -224,16 +274,22 @@ void avfilter_register_all(void)
     REGISTER_FILTER(TRIM,           trim,           vf);
     REGISTER_FILTER(UNSHARP,        unsharp,        vf);
     REGISTER_FILTER(USPP,           uspp,           vf);
+    REGISTER_FILTER(VECTORSCOPE,    vectorscope,    vf);
     REGISTER_FILTER(VFLIP,          vflip,          vf);
     REGISTER_FILTER(VIDSTABDETECT,  vidstabdetect,  vf);
     REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf);
     REGISTER_FILTER(VIGNETTE,       vignette,       vf);
+    REGISTER_FILTER(VSTACK,         vstack,         vf);
     REGISTER_FILTER(W3FDIF,         w3fdif,         vf);
+    REGISTER_FILTER(WAVEFORM,       waveform,       vf);
     REGISTER_FILTER(XBR,            xbr,            vf);
     REGISTER_FILTER(YADIF,          yadif,          vf);
     REGISTER_FILTER(ZMQ,            zmq,            vf);
     REGISTER_FILTER(ZOOMPAN,        zoompan,        vf);
+    REGISTER_FILTER(ZSCALE,         zscale,         vf);
 
+    REGISTER_FILTER(ALLRGB,         allrgb,         vsrc);
+    REGISTER_FILTER(ALLYUV,         allyuv,         vsrc);
     REGISTER_FILTER(CELLAUTO,       cellauto,       vsrc);
     REGISTER_FILTER(COLOR,          color,          vsrc);
     REGISTER_FILTER(FREI0R,         frei0r_src,     vsrc);
@@ -246,26 +302,29 @@ void avfilter_register_all(void)
     REGISTER_FILTER(SMPTEBARS,      smptebars,      vsrc);
     REGISTER_FILTER(SMPTEHDBARS,    smptehdbars,    vsrc);
     REGISTER_FILTER(TESTSRC,        testsrc,        vsrc);
+    REGISTER_FILTER(TESTSRC2,       testsrc2,       vsrc);
 
     REGISTER_FILTER(NULLSINK,       nullsink,       vsink);
 
     /* multimedia filters */
+    REGISTER_FILTER(ADRAWGRAPH,     adrawgraph,     avf);
+    REGISTER_FILTER(AHISTOGRAM,     ahistogram,     avf);
+    REGISTER_FILTER(APHASEMETER,    aphasemeter,    avf);
     REGISTER_FILTER(AVECTORSCOPE,   avectorscope,   avf);
     REGISTER_FILTER(CONCAT,         concat,         avf);
     REGISTER_FILTER(SHOWCQT,        showcqt,        avf);
+    REGISTER_FILTER(SHOWFREQS,      showfreqs,      avf);
     REGISTER_FILTER(SHOWSPECTRUM,   showspectrum,   avf);
+    REGISTER_FILTER(SHOWSPECTRUMPIC, showspectrumpic, avf);
+    REGISTER_FILTER(SHOWVOLUME,     showvolume,     avf);
     REGISTER_FILTER(SHOWWAVES,      showwaves,      avf);
     REGISTER_FILTER(SHOWWAVESPIC,   showwavespic,   avf);
+    REGISTER_FILTER(SPECTRUMSYNTH,  spectrumsynth,  vaf);
 
     /* multimedia sources */
     REGISTER_FILTER(AMOVIE,         amovie,         avsrc);
     REGISTER_FILTER(MOVIE,          movie,          avsrc);
 
-#if FF_API_AVFILTERBUFFER
-    REGISTER_FILTER_UNCONDITIONAL(vsink_ffbuffersink);
-    REGISTER_FILTER_UNCONDITIONAL(asink_ffabuffersink);
-#endif
-
     /* those filters are part of public or internal API => registered
      * unconditionally */
     REGISTER_FILTER_UNCONDITIONAL(asrc_abuffer);
diff --git a/libavfilter/asrc_abuffer.h b/libavfilter/asrc_abuffer.h
deleted file mode 100644
index aa344616..00000000
--- a/libavfilter/asrc_abuffer.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVFILTER_ASRC_ABUFFER_H
-#define AVFILTER_ASRC_ABUFFER_H
-
-#include "avfilter.h"
-
-/**
- * @file
- * memory buffer source for audio
- *
- * @deprecated use buffersrc.h instead.
- */
-
-/**
- * Queue an audio buffer to the audio buffer source.
- *
- * @param abuffersrc audio source buffer context
- * @param data pointers to the samples planes
- * @param linesize linesizes of each audio buffer plane
- * @param nb_samples number of samples per channel
- * @param sample_fmt sample format of the audio data
- * @param ch_layout channel layout of the audio data
- * @param planar flag to indicate if audio data is planar or packed
- * @param pts presentation timestamp of the audio buffer
- * @param flags unused
- *
- * @deprecated use av_buffersrc_add_ref() instead.
- */
-attribute_deprecated
-int av_asrc_buffer_add_samples(AVFilterContext *abuffersrc,
-                               uint8_t *data[8], int linesize[8],
-                               int nb_samples, int sample_rate,
-                               int sample_fmt, int64_t ch_layout, int planar,
-                               int64_t pts, int av_unused flags);
-
-/**
- * Queue an audio buffer to the audio buffer source.
- *
- * This is similar to av_asrc_buffer_add_samples(), but the samples
- * are stored in a buffer with known size.
- *
- * @param abuffersrc audio source buffer context
- * @param buf pointer to the samples data, packed is assumed
- * @param size the size in bytes of the buffer, it must contain an
- * integer number of samples
- * @param sample_fmt sample format of the audio data
- * @param ch_layout channel layout of the audio data
- * @param pts presentation timestamp of the audio buffer
- * @param flags unused
- *
- * @deprecated use av_buffersrc_add_ref() instead.
- */
-attribute_deprecated
-int av_asrc_buffer_add_buffer(AVFilterContext *abuffersrc,
-                              uint8_t *buf, int buf_size,
-                              int sample_rate,
-                              int sample_fmt, int64_t ch_layout, int planar,
-                              int64_t pts, int av_unused flags);
-
-/**
- * Queue an audio buffer to the audio buffer source.
- *
- * @param abuffersrc audio source buffer context
- * @param samplesref buffer ref to queue
- * @param flags unused
- *
- * @deprecated use av_buffersrc_add_ref() instead.
- */
-attribute_deprecated
-int av_asrc_buffer_add_audio_buffer_ref(AVFilterContext *abuffersrc,
-                                        AVFilterBufferRef *samplesref,
-                                        int av_unused flags);
-
-#endif /* AVFILTER_ASRC_ABUFFER_H */
diff --git a/libavfilter/asrc_anoisesrc.c b/libavfilter/asrc_anoisesrc.c
new file mode 100644
index 00000000..e4d40137
--- /dev/null
+++ b/libavfilter/asrc_anoisesrc.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2015 Kyle Swanson <k@ylo.ph>.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "audio.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "libavutil/lfg.h"
+#include "libavutil/random_seed.h"
+
+typedef struct {
+    const AVClass *class;
+    int sample_rate;
+    double amplitude;
+    int64_t duration;
+    int64_t color;
+    int64_t seed;
+    int nb_samples;
+
+    int64_t pts;
+    int infinite;
+    double (*filter)(double white, double *buf);
+    double buf[7];
+    AVLFG c;
+} ANoiseSrcContext;
+
+#define OFFSET(x) offsetof(ANoiseSrcContext, x)
+#define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption anoisesrc_options[] = {
+    { "sample_rate",  "set sample rate",  OFFSET(sample_rate),  AV_OPT_TYPE_INT,       {.i64 = 48000},  15,  INT_MAX,    FLAGS },
+    { "r",            "set sample rate",  OFFSET(sample_rate),  AV_OPT_TYPE_INT,       {.i64 = 48000},  15,  INT_MAX,    FLAGS },
+    { "amplitude",    "set amplitude",    OFFSET(amplitude),    AV_OPT_TYPE_DOUBLE,    {.dbl = 1.},     0.,  1.,         FLAGS },
+    { "a",            "set amplitude",    OFFSET(amplitude),    AV_OPT_TYPE_DOUBLE,    {.dbl = 1.},     0.,  1.,         FLAGS },
+    { "duration",     "set duration",     OFFSET(duration),     AV_OPT_TYPE_DURATION,  {.i64 =  0},      0,  INT64_MAX,  FLAGS },
+    { "d",            "set duration",     OFFSET(duration),     AV_OPT_TYPE_DURATION,  {.i64 =  0},      0,  INT64_MAX,  FLAGS },
+    { "color",        "set noise color",  OFFSET(color),        AV_OPT_TYPE_INT,       {.i64 =  1},      0,  2,          FLAGS, "color" },
+    { "colour",       "set noise color",  OFFSET(color),        AV_OPT_TYPE_INT,       {.i64 =  1},      0,  2,          FLAGS, "color" },
+    { "c",            "set noise color",  OFFSET(color),        AV_OPT_TYPE_INT,       {.i64 =  0},      0,  2,          FLAGS, "color" },
+    {     "white",    0,                  0,                    AV_OPT_TYPE_CONST,     {.i64 =  0},      0,  0,          FLAGS, "color" },
+    {     "pink",     0,                  0,                    AV_OPT_TYPE_CONST,     {.i64 =  1},      0,  0,          FLAGS, "color" },
+    {     "brown",    0,                  0,                    AV_OPT_TYPE_CONST,     {.i64 =  2},      0,  0,          FLAGS, "color" },
+    { "seed",         "set random seed",  OFFSET(seed),         AV_OPT_TYPE_INT64,     {.i64 = -1},     -1,  UINT_MAX,   FLAGS },
+    { "s",            "set random seed",  OFFSET(seed),         AV_OPT_TYPE_INT64,     {.i64 = -1},     -1,  UINT_MAX,   FLAGS },
+    { "nb_samples",   "set the number of samples per requested frame", OFFSET(nb_samples), AV_OPT_TYPE_INT, {.i64 = 1024}, 1, INT_MAX, FLAGS },
+    { "n",            "set the number of samples per requested frame", OFFSET(nb_samples), AV_OPT_TYPE_INT, {.i64 = 1024}, 1, INT_MAX, FLAGS },
+    {NULL}
+};
+
+AVFILTER_DEFINE_CLASS(anoisesrc);
+
+static av_cold int query_formats(AVFilterContext *ctx)
+{
+    ANoiseSrcContext *s = ctx->priv;
+    static const int64_t chlayouts[] = { AV_CH_LAYOUT_MONO, -1 };
+    int sample_rates[] = { s->sample_rate, -1 };
+    static const enum AVSampleFormat sample_fmts[] = {
+        AV_SAMPLE_FMT_DBL,
+        AV_SAMPLE_FMT_NONE
+    };
+
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    int ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_formats (ctx, formats);
+    if (ret < 0)
+        return ret;
+
+    layouts = avfilter_make_format64_list(chlayouts);
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    formats = ff_make_format_list(sample_rates);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static double white_filter(double white, double *buf)
+{
+    return white;
+};
+
+static double pink_filter(double white, double *buf)
+{
+    double pink;
+
+    /* http://www.musicdsp.org/files/pink.txt */
+    buf[0] = 0.99886 * buf[0] + white * 0.0555179;
+    buf[1] = 0.99332 * buf[1] + white * 0.0750759;
+    buf[2] = 0.96900 * buf[2] + white * 0.1538520;
+    buf[3] = 0.86650 * buf[3] + white * 0.3104856;
+    buf[4] = 0.55000 * buf[4] + white * 0.5329522;
+    buf[5] = -0.7616 * buf[5] - white * 0.0168980;
+    pink = buf[0] + buf[1] + buf[2] + buf[3] + buf[4] + buf[5] + buf[6] + white * 0.5362;
+    buf[6] = white * 0.115926;
+    return pink * 0.11;
+}
+
+static double brown_filter(double white, double *buf)
+{
+    double brown;
+
+    brown = ((0.02 * white) + buf[0]) / 1.02;
+    buf[0] = brown;
+    return brown * 3.5;
+}
+
+static av_cold int config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    ANoiseSrcContext *s = ctx->priv;
+
+    if (s->seed == -1)
+        s->seed = av_get_random_seed();
+    av_lfg_init(&s->c, s->seed);
+
+    if (s->duration == 0)
+        s->infinite = 1;
+    s->duration = av_rescale(s->duration, s->sample_rate, AV_TIME_BASE);
+
+    switch (s->color) {
+    case 0: s->filter = white_filter; break;
+    case 1: s->filter = pink_filter;  break;
+    case 2: s->filter = brown_filter; break;
+    }
+
+    return 0;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    ANoiseSrcContext *s = ctx->priv;
+    AVFrame *frame;
+    int nb_samples, i;
+    double *dst;
+
+    if (!s->infinite && s->duration <= 0) {
+        return AVERROR_EOF;
+    } else if (!s->infinite && s->duration < s->nb_samples) {
+        nb_samples = s->duration;
+    } else {
+        nb_samples = s->nb_samples;
+    }
+
+    if (!(frame = ff_get_audio_buffer(outlink, nb_samples)))
+        return AVERROR(ENOMEM);
+
+    dst = (double *)frame->data[0];
+    for (i = 0; i < nb_samples; i++) {
+        double white;
+        white = s->amplitude * ((2 * ((double) av_lfg_get(&s->c) / 0xffffffff)) - 1);
+        dst[i] = s->filter(white, s->buf);
+    }
+
+    if (!s->infinite)
+        s->duration -= nb_samples;
+
+    frame->pts = s->pts;
+    s->pts    += nb_samples;
+    return ff_filter_frame(outlink, frame);
+}
+
+static const AVFilterPad anoisesrc_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .request_frame = request_frame,
+        .config_props  = config_props,
+    },
+    { NULL }
+};
+
+AVFilter ff_asrc_anoisesrc = {
+    .name          = "anoisesrc",
+    .description   = NULL_IF_CONFIG_SMALL("Generate a noise audio signal."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(ANoiseSrcContext),
+    .inputs        = NULL,
+    .outputs       = anoisesrc_outputs,
+    .priv_class    = &anoisesrc_class,
+};
diff --git a/libavfilter/asrc_anullsrc.c b/libavfilter/asrc_anullsrc.c
index 28d4500a..94bd0cab 100644
--- a/libavfilter/asrc_anullsrc.c
+++ b/libavfilter/asrc_anullsrc.c
@@ -80,10 +80,12 @@ static int query_formats(AVFilterContext *ctx)
     ANullContext *null = ctx->priv;
     int64_t chlayouts[] = { null->channel_layout, -1 };
     int sample_rates[] = { null->sample_rate, -1 };
+    int ret;
 
-    ff_set_common_formats        (ctx, ff_all_formats(AVMEDIA_TYPE_AUDIO));
-    ff_set_common_channel_layouts(ctx, avfilter_make_format64_list(chlayouts));
-    ff_set_common_samplerates    (ctx, ff_make_format_list(sample_rates));
+    if ((ret = ff_set_common_formats         (ctx, ff_all_formats              (AVMEDIA_TYPE_AUDIO))) < 0 ||
+        (ret = ff_set_common_channel_layouts (ctx, avfilter_make_format64_list (chlayouts         ))) < 0 ||
+        (ret = ff_set_common_samplerates     (ctx, ff_make_format_list         (sample_rates      ))) < 0)
+        return ret;
 
     return 0;
 }
diff --git a/libavfilter/asrc_flite.c b/libavfilter/asrc_flite.c
index 098a1dd1..b3f83abd 100644
--- a/libavfilter/asrc_flite.c
+++ b/libavfilter/asrc_flite.c
@@ -51,7 +51,7 @@ typedef struct {
 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
 static const AVOption flite_options[] = {
-    { "list_voices", "list voices and exit",              OFFSET(list_voices), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+    { "list_voices", "list voices and exit",              OFFSET(list_voices), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
     { "nb_samples",  "set number of samples per frame",   OFFSET(frame_nb_samples), AV_OPT_TYPE_INT, {.i64=512}, 0, INT_MAX, FLAGS },
     { "n",           "set number of samples per frame",   OFFSET(frame_nb_samples), AV_OPT_TYPE_INT, {.i64=512}, 0, INT_MAX, FLAGS },
     { "text",        "set text to speak",                 OFFSET(text),      AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX, FLAGS },
@@ -205,18 +205,20 @@ static av_cold void uninit(AVFilterContext *ctx)
 static int query_formats(AVFilterContext *ctx)
 {
     FliteContext *flite = ctx->priv;
+    int ret;
 
     AVFilterChannelLayouts *chlayouts = NULL;
     int64_t chlayout = av_get_default_channel_layout(flite->wave->num_channels);
     AVFilterFormats *sample_formats = NULL;
     AVFilterFormats *sample_rates = NULL;
 
-    ff_add_channel_layout(&chlayouts, chlayout);
-    ff_set_common_channel_layouts(ctx, chlayouts);
-    ff_add_format(&sample_formats, AV_SAMPLE_FMT_S16);
-    ff_set_common_formats(ctx, sample_formats);
-    ff_add_format(&sample_rates, flite->wave->sample_rate);
-    ff_set_common_samplerates (ctx, sample_rates);
+    if ((ret = ff_add_channel_layout         (&chlayouts     , chlayout                )) < 0 ||
+        (ret = ff_set_common_channel_layouts (ctx            , chlayouts               )) < 0 ||
+        (ret = ff_add_format                 (&sample_formats, AV_SAMPLE_FMT_S16       )) < 0 ||
+        (ret = ff_set_common_formats         (ctx            , sample_formats          )) < 0 ||
+        (ret = ff_add_format                 (&sample_rates  , flite->wave->sample_rate)) < 0 ||
+        (ret = ff_set_common_samplerates     (ctx            , sample_rates            )) < 0)
+        return ret;
 
     return 0;
 }
diff --git a/libavfilter/asrc_sine.c b/libavfilter/asrc_sine.c
index 6aa01d5e..2a2f3c37 100644
--- a/libavfilter/asrc_sine.c
+++ b/libavfilter/asrc_sine.c
@@ -22,6 +22,7 @@
 
 #include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
+#include "libavutil/eval.h"
 #include "libavutil/opt.h"
 #include "audio.h"
 #include "avfilter.h"
@@ -31,7 +32,8 @@ typedef struct {
     const AVClass *class;
     double frequency;
     double beep_factor;
-    int samples_per_frame;
+    char *samples_per_frame;
+    AVExpr *samples_per_frame_expr;
     int sample_rate;
     int64_t duration;
     int16_t *sin;
@@ -61,16 +63,19 @@ typedef struct {
 #define OPT_DUR(name, field, def, min, max, descr, ...) \
     OPT_GENERIC(name, field, def, min, max, descr, DURATION, str, __VA_ARGS__)
 
+#define OPT_STR(name, field, def, min, max, descr, ...) \
+    OPT_GENERIC(name, field, def, min, max, descr, STRING, str, __VA_ARGS__)
+
 static const AVOption sine_options[] = {
-    OPT_DBL("frequency",         frequency,            440, 0, DBL_MAX,   "set the sine frequency"),
-    OPT_DBL("f",                 frequency,            440, 0, DBL_MAX,   "set the sine frequency"),
-    OPT_DBL("beep_factor",       beep_factor,            0, 0, DBL_MAX,   "set the beep fequency factor"),
-    OPT_DBL("b",                 beep_factor,            0, 0, DBL_MAX,   "set the beep fequency factor"),
-    OPT_INT("sample_rate",       sample_rate,        44100, 1, INT_MAX,   "set the sample rate"),
-    OPT_INT("r",                 sample_rate,        44100, 1, INT_MAX,   "set the sample rate"),
-    OPT_DUR("duration",          duration,               0, 0, INT64_MAX, "set the audio duration"),
-    OPT_DUR("d",                 duration,               0, 0, INT64_MAX, "set the audio duration"),
-    OPT_INT("samples_per_frame", samples_per_frame,   1024, 0, INT_MAX,   "set the number of samples per frame"),
+    OPT_DBL("frequency",         frequency,            440, 0, DBL_MAX,   "set the sine frequency",),
+    OPT_DBL("f",                 frequency,            440, 0, DBL_MAX,   "set the sine frequency",),
+    OPT_DBL("beep_factor",       beep_factor,            0, 0, DBL_MAX,   "set the beep frequency factor",),
+    OPT_DBL("b",                 beep_factor,            0, 0, DBL_MAX,   "set the beep frequency factor",),
+    OPT_INT("sample_rate",       sample_rate,        44100, 1, INT_MAX,   "set the sample rate",),
+    OPT_INT("r",                 sample_rate,        44100, 1, INT_MAX,   "set the sample rate",),
+    OPT_DUR("duration",          duration,               0, 0, INT64_MAX, "set the audio duration",),
+    OPT_DUR("d",                 duration,               0, 0, INT64_MAX, "set the audio duration",),
+    OPT_STR("samples_per_frame", samples_per_frame, "1024", 0, 0,         "set the number of samples per frame",),
     {NULL}
 };
 
@@ -120,8 +125,25 @@ static void make_sin_table(int16_t *sin)
         sin[i + 2 * half_pi] = -sin[i];
 }
 
+static const char *const var_names[] = {
+    "n",
+    "pts",
+    "t",
+    "TB",
+    NULL
+};
+
+enum {
+    VAR_N,
+    VAR_PTS,
+    VAR_T,
+    VAR_TB,
+    VAR_VARS_NB
+};
+
 static av_cold int init(AVFilterContext *ctx)
 {
+    int ret;
     SineContext *sine = ctx->priv;
 
     if (!(sine->sin = av_malloc(sizeof(*sine->sin) << LOG_PERIOD)))
@@ -136,6 +158,12 @@ static av_cold int init(AVFilterContext *ctx)
                           sine->sample_rate + 0.5;
     }
 
+    ret = av_expr_parse(&sine->samples_per_frame_expr,
+                        sine->samples_per_frame, var_names,
+                        NULL, NULL, NULL, NULL, 0, sine);
+    if (ret < 0)
+        return ret;
+
     return 0;
 }
 
@@ -143,6 +171,8 @@ static av_cold void uninit(AVFilterContext *ctx)
 {
     SineContext *sine = ctx->priv;
 
+    av_expr_free(sine->samples_per_frame_expr);
+    sine->samples_per_frame_expr = NULL;
     av_freep(&sine->sin);
 }
 
@@ -188,9 +218,21 @@ static int request_frame(AVFilterLink *outlink)
 {
     SineContext *sine = outlink->src->priv;
     AVFrame *frame;
-    int i, nb_samples = sine->samples_per_frame;
+    double values[VAR_VARS_NB] = {
+        [VAR_N]   = outlink->frame_count,
+        [VAR_PTS] = sine->pts,
+        [VAR_T]   = sine->pts * av_q2d(outlink->time_base),
+        [VAR_TB]  = av_q2d(outlink->time_base),
+    };
+    int i, nb_samples = lrint(av_expr_eval(sine->samples_per_frame_expr, values, sine));
     int16_t *samples;
 
+    if (nb_samples <= 0) {
+        av_log(sine, AV_LOG_WARNING, "nb samples expression evaluated to %d, "
+               "defaulting to 1024\n", nb_samples);
+        nb_samples = 1024;
+    }
+
     if (sine->duration) {
         nb_samples = FFMIN(nb_samples, sine->duration - sine->pts);
         av_assert1(nb_samples >= 0);
diff --git a/libavfilter/audio.c b/libavfilter/audio.c
index 1e1d8e04..51fef03f 100644
--- a/libavfilter/audio.c
+++ b/libavfilter/audio.c
@@ -28,11 +28,6 @@
 #include "avfilter.h"
 #include "internal.h"
 
-int avfilter_ref_get_channels(AVFilterBufferRef *ref)
-{
-    return ref->audio ? ref->audio->channels : 0;
-}
-
 AVFrame *ff_null_get_audio_buffer(AVFilterLink *link, int nb_samples)
 {
     return ff_get_audio_buffer(link->dst->outputs[0], nb_samples);
@@ -79,92 +74,3 @@ AVFrame *ff_get_audio_buffer(AVFilterLink *link, int nb_samples)
 
     return ret;
 }
-
-#if FF_API_AVFILTERBUFFER
-AVFilterBufferRef* avfilter_get_audio_buffer_ref_from_arrays_channels(uint8_t **data,
-                                                                      int linesize,int perms,
-                                                                      int nb_samples,
-                                                                      enum AVSampleFormat sample_fmt,
-                                                                      int channels,
-                                                                      uint64_t channel_layout)
-{
-    int planes;
-    AVFilterBuffer    *samples    = av_mallocz(sizeof(*samples));
-    AVFilterBufferRef *samplesref = av_mallocz(sizeof(*samplesref));
-
-    if (!samples || !samplesref)
-        goto fail;
-
-    av_assert0(channels);
-    av_assert0(channel_layout == 0 ||
-               channels == av_get_channel_layout_nb_channels(channel_layout));
-
-    samplesref->buf         = samples;
-    samplesref->buf->free   = ff_avfilter_default_free_buffer;
-    if (!(samplesref->audio = av_mallocz(sizeof(*samplesref->audio))))
-        goto fail;
-
-    samplesref->audio->nb_samples     = nb_samples;
-    samplesref->audio->channel_layout = channel_layout;
-    samplesref->audio->channels       = channels;
-
-    planes = av_sample_fmt_is_planar(sample_fmt) ? channels : 1;
-
-    /* make sure the buffer gets read permission or it's useless for output */
-    samplesref->perms = perms | AV_PERM_READ;
-
-    samples->refcount  = 1;
-    samplesref->type   = AVMEDIA_TYPE_AUDIO;
-    samplesref->format = sample_fmt;
-
-    memcpy(samples->data, data,
-           FFMIN(FF_ARRAY_ELEMS(samples->data), planes)*sizeof(samples->data[0]));
-    memcpy(samplesref->data, samples->data, sizeof(samples->data));
-
-    samples->linesize[0] = samplesref->linesize[0] = linesize;
-
-    if (planes > FF_ARRAY_ELEMS(samples->data)) {
-        samples->   extended_data = av_mallocz_array(sizeof(*samples->extended_data),
-                                               planes);
-        samplesref->extended_data = av_mallocz_array(sizeof(*samplesref->extended_data),
-                                               planes);
-
-        if (!samples->extended_data || !samplesref->extended_data)
-            goto fail;
-
-        memcpy(samples->   extended_data, data, sizeof(*data)*planes);
-        memcpy(samplesref->extended_data, data, sizeof(*data)*planes);
-    } else {
-        samples->extended_data    = samples->data;
-        samplesref->extended_data = samplesref->data;
-    }
-
-    samplesref->pts = AV_NOPTS_VALUE;
-
-    return samplesref;
-
-fail:
-    if (samples && samples->extended_data != samples->data)
-        av_freep(&samples->extended_data);
-    if (samplesref) {
-        av_freep(&samplesref->audio);
-        if (samplesref->extended_data != samplesref->data)
-            av_freep(&samplesref->extended_data);
-    }
-    av_freep(&samplesref);
-    av_freep(&samples);
-    return NULL;
-}
-
-AVFilterBufferRef* avfilter_get_audio_buffer_ref_from_arrays(uint8_t **data,
-                                                             int linesize,int perms,
-                                                             int nb_samples,
-                                                             enum AVSampleFormat sample_fmt,
-                                                             uint64_t channel_layout)
-{
-    int channels = av_get_channel_layout_nb_channels(channel_layout);
-    return avfilter_get_audio_buffer_ref_from_arrays_channels(data, linesize, perms,
-                                                              nb_samples, sample_fmt,
-                                                              channels, channel_layout);
-}
-#endif
diff --git a/libavfilter/audio.h b/libavfilter/audio.h
index 3335c96e..6adc82dc 100644
--- a/libavfilter/audio.h
+++ b/libavfilter/audio.h
@@ -60,24 +60,4 @@ AVFrame *ff_null_get_audio_buffer(AVFilterLink *link, int nb_samples);
  */
 AVFrame *ff_get_audio_buffer(AVFilterLink *link, int nb_samples);
 
-/**
- * Send a buffer of audio samples to the next filter.
- *
- * @param link       the output link over which the audio samples are being sent
- * @param samplesref a reference to the buffer of audio samples being sent. The
- *                   receiving filter will free this reference when it no longer
- *                   needs it or pass it on to the next filter.
- *
- * @return >= 0 on success, a negative AVERROR on error. The receiving filter
- * is responsible for unreferencing samplesref in case of error.
- */
-int ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
-
-/**
- * Send a buffer of audio samples to the next link, without checking
- * min_samples.
- */
-int ff_filter_samples_framed(AVFilterLink *link,
-                             AVFilterBufferRef *samplesref);
-
 #endif /* AVFILTER_AUDIO_H */
diff --git a/libavfilter/avcodec.c b/libavfilter/avcodec.c
deleted file mode 100644
index def735f8..00000000
--- a/libavfilter/avcodec.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright 2011 Stefano Sabatini | stefasab at gmail.com
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * libavcodec/libavfilter gluing utilities
- */
-
-#include "avcodec.h"
-#include "libavutil/avassert.h"
-#include "libavutil/channel_layout.h"
-#include "libavutil/opt.h"
-
-#if FF_API_AVFILTERBUFFER
-AVFilterBufferRef *avfilter_get_video_buffer_ref_from_frame(const AVFrame *frame,
-                                                            int perms)
-{
-    AVFilterBufferRef *picref =
-        avfilter_get_video_buffer_ref_from_arrays(frame->data, frame->linesize, perms,
-                                                  frame->width, frame->height,
-                                                  frame->format);
-    if (!picref)
-        return NULL;
-    if (avfilter_copy_frame_props(picref, frame) < 0) {
-        picref->buf->data[0] = NULL;
-        avfilter_unref_bufferp(&picref);
-    }
-    return picref;
-}
-
-AVFilterBufferRef *avfilter_get_audio_buffer_ref_from_frame(const AVFrame *frame,
-                                                            int perms)
-{
-    AVFilterBufferRef *samplesref;
-    int channels = av_frame_get_channels(frame);
-    int64_t layout = av_frame_get_channel_layout(frame);
-
-    if (layout && av_get_channel_layout_nb_channels(layout) != av_frame_get_channels(frame)) {
-        av_log(NULL, AV_LOG_ERROR, "Layout indicates a different number of channels than actually present\n");
-        return NULL;
-    }
-
-    samplesref = avfilter_get_audio_buffer_ref_from_arrays_channels(
-        (uint8_t **)frame->extended_data, frame->linesize[0], perms,
-        frame->nb_samples, frame->format, channels, layout);
-    if (!samplesref)
-        return NULL;
-    if (avfilter_copy_frame_props(samplesref, frame) < 0) {
-        samplesref->buf->data[0] = NULL;
-        avfilter_unref_bufferp(&samplesref);
-    }
-    return samplesref;
-}
-
-AVFilterBufferRef *avfilter_get_buffer_ref_from_frame(enum AVMediaType type,
-                                                      const AVFrame *frame,
-                                                      int perms)
-{
-    switch (type) {
-    case AVMEDIA_TYPE_VIDEO:
-        return avfilter_get_video_buffer_ref_from_frame(frame, perms);
-    case AVMEDIA_TYPE_AUDIO:
-        return avfilter_get_audio_buffer_ref_from_frame(frame, perms);
-    default:
-        return NULL;
-    }
-}
-
-int avfilter_copy_buf_props(AVFrame *dst, const AVFilterBufferRef *src)
-{
-    int planes, nb_channels;
-
-    if (!dst)
-        return AVERROR(EINVAL);
-    /* abort in case the src is NULL and dst is not, avoid inconsistent state in dst */
-    av_assert0(src);
-
-    memcpy(dst->data, src->data, sizeof(dst->data));
-    memcpy(dst->linesize, src->linesize, sizeof(dst->linesize));
-
-    dst->pts     = src->pts;
-    dst->format  = src->format;
-    av_frame_set_pkt_pos(dst, src->pos);
-
-    switch (src->type) {
-    case AVMEDIA_TYPE_VIDEO:
-        av_assert0(src->video);
-        dst->width               = src->video->w;
-        dst->height              = src->video->h;
-        dst->sample_aspect_ratio = src->video->sample_aspect_ratio;
-        dst->interlaced_frame    = src->video->interlaced;
-        dst->top_field_first     = src->video->top_field_first;
-        dst->key_frame           = src->video->key_frame;
-        dst->pict_type           = src->video->pict_type;
-        break;
-    case AVMEDIA_TYPE_AUDIO:
-        av_assert0(src->audio);
-        nb_channels = av_get_channel_layout_nb_channels(src->audio->channel_layout);
-        planes      = av_sample_fmt_is_planar(src->format) ? nb_channels : 1;
-
-        if (planes > FF_ARRAY_ELEMS(dst->data)) {
-            dst->extended_data = av_mallocz_array(planes, sizeof(*dst->extended_data));
-            if (!dst->extended_data)
-                return AVERROR(ENOMEM);
-            memcpy(dst->extended_data, src->extended_data,
-                   planes * sizeof(*dst->extended_data));
-        } else
-            dst->extended_data = dst->data;
-        dst->nb_samples          = src->audio->nb_samples;
-        av_frame_set_sample_rate   (dst, src->audio->sample_rate);
-        av_frame_set_channel_layout(dst, src->audio->channel_layout);
-        av_frame_set_channels      (dst, src->audio->channels);
-        break;
-    default:
-        return AVERROR(EINVAL);
-    }
-
-    return 0;
-}
-#endif
diff --git a/libavfilter/avcodec.h b/libavfilter/avcodec.h
deleted file mode 100644
index d3d0e20e..00000000
--- a/libavfilter/avcodec.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVFILTER_AVCODEC_H
-#define AVFILTER_AVCODEC_H
-
-/**
- * @file
- * libavcodec/libavfilter gluing utilities
- *
- * This should be included in an application ONLY if the installed
- * libavfilter has been compiled with libavcodec support, otherwise
- * symbols defined below will not be available.
- */
-
-#include "avfilter.h"
-
-#if FF_API_AVFILTERBUFFER
-/**
- * Create and return a picref reference from the data and properties
- * contained in frame.
- *
- * @param perms permissions to assign to the new buffer reference
- * @deprecated avfilter APIs work natively with AVFrame instead.
- */
-attribute_deprecated
-AVFilterBufferRef *avfilter_get_video_buffer_ref_from_frame(const AVFrame *frame, int perms);
-
-
-/**
- * Create and return a picref reference from the data and properties
- * contained in frame.
- *
- * @param perms permissions to assign to the new buffer reference
- * @deprecated avfilter APIs work natively with AVFrame instead.
- */
-attribute_deprecated
-AVFilterBufferRef *avfilter_get_audio_buffer_ref_from_frame(const AVFrame *frame,
-                                                            int perms);
-
-/**
- * Create and return a buffer reference from the data and properties
- * contained in frame.
- *
- * @param perms permissions to assign to the new buffer reference
- * @deprecated avfilter APIs work natively with AVFrame instead.
- */
-attribute_deprecated
-AVFilterBufferRef *avfilter_get_buffer_ref_from_frame(enum AVMediaType type,
-                                                      const AVFrame *frame,
-                                                      int perms);
-#endif
-
-#endif /* AVFILTER_AVCODEC_H */
diff --git a/libavfilter/avf_ahistogram.c b/libavfilter/avf_ahistogram.c
new file mode 100644
index 00000000..a716a96f
--- /dev/null
+++ b/libavfilter/avf_ahistogram.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "audio.h"
+#include "video.h"
+#include "internal.h"
+
+enum DisplayScale   { LINEAR, SQRT, CBRT, LOG, RLOG, NB_SCALES };
+enum AmplitudeScale { ALINEAR, ALOG, NB_ASCALES };
+enum SlideMode      { REPLACE, SCROLL, NB_SLIDES };
+enum DisplayMode    { SINGLE, SEPARATE, NB_DMODES };
+enum HistogramMode  { ACCUMULATE, CURRENT, NB_HMODES };
+
+typedef struct AudioHistogramContext {
+    const AVClass *class;
+    AVFrame *out;
+    int w, h;
+    AVRational frame_rate;
+    uint64_t *achistogram;
+    uint64_t *shistogram;
+    int ascale;
+    int scale;
+    float phisto;
+    int histogram_h;
+    int apos;
+    int ypos;
+    int slide;
+    int dmode;
+    int dchannels;
+    int count;
+    int frame_count;
+    float *combine_buffer;
+    AVFrame *in[101];
+    int first;
+} AudioHistogramContext;
+
+#define OFFSET(x) offsetof(AudioHistogramContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption ahistogram_options[] = {
+    { "dmode", "set method to display channels", OFFSET(dmode), AV_OPT_TYPE_INT, {.i64=SINGLE}, 0, NB_DMODES-1, FLAGS, "dmode" },
+        { "single", "all channels use single histogram", 0, AV_OPT_TYPE_CONST, {.i64=SINGLE},   0, 0, FLAGS, "dmode" },
+        { "separate", "each channel have own histogram", 0, AV_OPT_TYPE_CONST, {.i64=SEPARATE}, 0, 0, FLAGS, "dmode" },
+    { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, 0, FLAGS },
+    { "r",    "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, 0, FLAGS },
+    { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="hd720"}, 0, 0, FLAGS },
+    { "s",    "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="hd720"}, 0, 0, FLAGS },
+    { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=LOG}, LINEAR, NB_SCALES-1, FLAGS, "scale" },
+        { "log",  "logarithmic",         0, AV_OPT_TYPE_CONST, {.i64=LOG},    0, 0, FLAGS, "scale" },
+        { "sqrt", "square root",         0, AV_OPT_TYPE_CONST, {.i64=SQRT},   0, 0, FLAGS, "scale" },
+        { "cbrt", "cubic root",          0, AV_OPT_TYPE_CONST, {.i64=CBRT},   0, 0, FLAGS, "scale" },
+        { "lin",  "linear",              0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
+        { "rlog", "reverse logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=RLOG},   0, 0, FLAGS, "scale" },
+    { "ascale", "set amplitude scale", OFFSET(ascale), AV_OPT_TYPE_INT, {.i64=ALOG}, LINEAR, NB_ASCALES-1, FLAGS, "ascale" },
+        { "log",  "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=ALOG},    0, 0, FLAGS, "ascale" },
+        { "lin",  "linear",      0, AV_OPT_TYPE_CONST, {.i64=ALINEAR}, 0, 0, FLAGS, "ascale" },
+    { "acount", "how much frames to accumulate", OFFSET(count), AV_OPT_TYPE_INT, {.i64=1}, -1, 100, FLAGS },
+    { "rheight", "set histogram ratio of window height", OFFSET(phisto), AV_OPT_TYPE_FLOAT, {.dbl=0.10}, 0, 1, FLAGS },
+    { "slide", "set sonogram sliding", OFFSET(slide), AV_OPT_TYPE_INT, {.i64=REPLACE}, 0, NB_SLIDES-1, FLAGS, "slide" },
+        { "replace", "replace old rows with new", 0, AV_OPT_TYPE_CONST, {.i64=REPLACE},    0, 0, FLAGS, "slide" },
+        { "scroll",  "scroll from top to bottom", 0, AV_OPT_TYPE_CONST, {.i64=SCROLL}, 0, 0, FLAGS, "slide" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ahistogram);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
+    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE };
+    int ret = AVERROR(EINVAL);
+
+    formats = ff_make_format_list(sample_fmts);
+    if ((ret = ff_formats_ref         (formats, &inlink->out_formats        )) < 0 ||
+        (layouts = ff_all_channel_counts()) == NULL ||
+        (ret = ff_channel_layouts_ref (layouts, &inlink->out_channel_layouts)) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0)
+        return ret;
+
+    formats = ff_make_format_list(pix_fmts);
+    if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioHistogramContext *s = ctx->priv;
+    int nb_samples;
+
+    nb_samples = FFMAX(1024, ((double)inlink->sample_rate / av_q2d(s->frame_rate)) + 0.5);
+    inlink->partial_buf_size =
+    inlink->min_samples =
+    inlink->max_samples = nb_samples;
+
+    s->dchannels = s->dmode == SINGLE ? 1 : inlink->channels;
+    s->shistogram = av_calloc(s->w, s->dchannels * sizeof(*s->shistogram));
+    if (!s->shistogram)
+        return AVERROR(ENOMEM);
+
+    s->achistogram = av_calloc(s->w, s->dchannels * sizeof(*s->achistogram));
+    if (!s->achistogram)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AudioHistogramContext *s = outlink->src->priv;
+
+    outlink->w = s->w;
+    outlink->h = s->h;
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+    outlink->frame_rate = s->frame_rate;
+
+    s->histogram_h = s->h * s->phisto;
+    s->ypos = s->h * s->phisto;
+
+    if (s->dmode == SEPARATE) {
+        s->combine_buffer = av_malloc_array(outlink->w * 3, sizeof(*s->combine_buffer));
+        if (!s->combine_buffer)
+            return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioHistogramContext *s = ctx->priv;
+    const int H = s->histogram_h;
+    const int w = s->w;
+    int c, y, n, p, bin;
+    uint64_t acmax = 0;
+
+    if (!s->out || s->out->width  != outlink->w ||
+                   s->out->height != outlink->h) {
+        av_frame_free(&s->out);
+        s->out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!s->out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        for (n = H; n < s->h; n++) {
+            memset(s->out->data[0] + n * s->out->linesize[0], 0, w);
+            memset(s->out->data[1] + n * s->out->linesize[0], 127, w);
+            memset(s->out->data[2] + n * s->out->linesize[0], 127, w);
+            memset(s->out->data[3] + n * s->out->linesize[0], 0, w);
+        }
+    }
+
+    if (s->dmode == SEPARATE) {
+        for (y = 0; y < w; y++) {
+            s->combine_buffer[3 * y    ] = 0;
+            s->combine_buffer[3 * y + 1] = 127.5;
+            s->combine_buffer[3 * y + 2] = 127.5;
+        }
+    }
+
+    for (n = 0; n < H; n++) {
+        memset(s->out->data[0] + n * s->out->linesize[0], 0, w);
+        memset(s->out->data[1] + n * s->out->linesize[0], 127, w);
+        memset(s->out->data[2] + n * s->out->linesize[0], 127, w);
+        memset(s->out->data[3] + n * s->out->linesize[0], 0, w);
+    }
+    s->out->pts = in->pts;
+
+    s->first = s->frame_count;
+
+    switch (s->ascale) {
+    case ALINEAR:
+        for (c = 0; c < inlink->channels; c++) {
+            const float *src = (const float *)in->extended_data[c];
+            uint64_t *achistogram = &s->achistogram[(s->dmode == SINGLE ? 0: c) * w];
+
+            for (n = 0; n < in->nb_samples; n++) {
+                bin = lrint(av_clipf(fabsf(src[n]), 0, 1) * (w - 1));
+
+                achistogram[bin]++;
+            }
+
+            if (s->in[s->first] && s->count >= 0) {
+                uint64_t *shistogram = &s->shistogram[(s->dmode == SINGLE ? 0: c) * w];
+                const float *src2 = (const float *)s->in[s->first]->extended_data[c];
+
+                for (n = 0; n < in->nb_samples; n++) {
+                    bin = lrint(av_clipf(fabsf(src2[n]), 0, 1) * (w - 1));
+
+                    shistogram[bin]++;
+                }
+            }
+        }
+        break;
+    case ALOG:
+        for (c = 0; c < inlink->channels; c++) {
+            const float *src = (const float *)in->extended_data[c];
+            uint64_t *achistogram = &s->achistogram[(s->dmode == SINGLE ? 0: c) * w];
+
+            for (n = 0; n < in->nb_samples; n++) {
+                bin = lrint(av_clipf(1 + log10(fabsf(src[n])) / 6, 0, 1) * (w - 1));
+
+                achistogram[bin]++;
+            }
+
+            if (s->in[s->first] && s->count >= 0) {
+                uint64_t *shistogram = &s->shistogram[(s->dmode == SINGLE ? 0: c) * w];
+                const float *src2 = (const float *)s->in[s->first]->extended_data[c];
+
+                for (n = 0; n < in->nb_samples; n++) {
+                    bin = lrint(av_clipf(1 + log10(fabsf(src2[n])) / 6, 0, 1) * (w - 1));
+
+                    shistogram[bin]++;
+                }
+            }
+        }
+        break;
+    }
+
+    av_frame_free(&s->in[s->frame_count]);
+    s->in[s->frame_count] = in;
+    s->frame_count++;
+    if (s->frame_count > s->count)
+        s->frame_count = 0;
+
+    for (n = 0; n < w * s->dchannels; n++) {
+        acmax = FFMAX(s->achistogram[n] - s->shistogram[n], acmax);
+    }
+
+    for (c = 0; c < s->dchannels; c++) {
+        uint64_t *shistogram  = &s->shistogram[c * w];
+        uint64_t *achistogram = &s->achistogram[c * w];
+        float yf, uf, vf;
+
+        if (s->dmode == SEPARATE) {
+            yf = 256.0f / s->dchannels;
+            uf = yf * M_PI;
+            vf = yf * M_PI;
+            uf *= 0.5 * sin((2 * M_PI * c) / s->dchannels);
+            vf *= 0.5 * cos((2 * M_PI * c) / s->dchannels);
+        }
+
+        for (n = 0; n < w; n++) {
+            double a, aa;
+            int h;
+
+            a = achistogram[n] - shistogram[n];
+
+            switch (s->scale) {
+            case LINEAR:
+                aa = a / (double)acmax;
+                break;
+            case SQRT:
+                aa = sqrt(a) / sqrt(acmax);
+                break;
+            case CBRT:
+                aa = cbrt(a) / cbrt(acmax);
+                break;
+            case LOG:
+                aa = log2(a + 1) / log2(acmax + 1);
+                break;
+            case RLOG:
+                aa = 1. - log2(a + 1) / log2(acmax + 1);
+                if (aa == 1.)
+                    aa = 0;
+                break;
+            default:
+                av_assert0(0);
+            }
+
+            h = aa * (H - 1);
+
+            if (s->dmode == SINGLE) {
+
+                for (y = H - h; y < H; y++) {
+                    s->out->data[0][y * s->out->linesize[0] + n] = 255;
+                    s->out->data[3][y * s->out->linesize[0] + n] = 255;
+                }
+
+                if (s->h - H > 0) {
+                    h = aa * 255;
+
+                    s->out->data[0][s->ypos * s->out->linesize[0] + n] = h;
+                    s->out->data[1][s->ypos * s->out->linesize[1] + n] = 127;
+                    s->out->data[2][s->ypos * s->out->linesize[2] + n] = 127;
+                    s->out->data[3][s->ypos * s->out->linesize[3] + n] = 255;
+                }
+            } else if (s->dmode == SEPARATE) {
+                float *out = &s->combine_buffer[3 * n];
+                int old;
+
+                old = s->out->data[0][(H - h) * s->out->linesize[0] + n];
+                for (y = H - h; y < H; y++) {
+                    if (s->out->data[0][y * s->out->linesize[0] + n] != old)
+                        break;
+                    old = s->out->data[0][y * s->out->linesize[0] + n];
+                    s->out->data[0][y * s->out->linesize[0] + n] = yf;
+                    s->out->data[1][y * s->out->linesize[1] + n] = 128+uf;
+                    s->out->data[2][y * s->out->linesize[2] + n] = 128+vf;
+                    s->out->data[3][y * s->out->linesize[3] + n] = 255;
+                }
+
+                out[0] += aa * yf;
+                out[1] += aa * uf;
+                out[2] += aa * vf;
+            }
+        }
+    }
+
+    if (s->h - H > 0) {
+        if (s->dmode == SEPARATE) {
+            for (n = 0; n < w; n++) {
+                float *cb = &s->combine_buffer[3 * n];
+
+                s->out->data[0][s->ypos * s->out->linesize[0] + n] = cb[0];
+                s->out->data[1][s->ypos * s->out->linesize[1] + n] = cb[1];
+                s->out->data[2][s->ypos * s->out->linesize[2] + n] = cb[2];
+                s->out->data[3][s->ypos * s->out->linesize[3] + n] = 255;
+            }
+        }
+
+        if (s->slide == SCROLL) {
+            for (p = 0; p < 4; p++) {
+                for (y = s->h; y >= H + 1; y--) {
+                    memmove(s->out->data[p] + (y  ) * s->out->linesize[p],
+                            s->out->data[p] + (y-1) * s->out->linesize[p], w);
+                }
+            }
+        }
+
+        s->ypos++;
+        if (s->slide == SCROLL || s->ypos >= s->h)
+            s->ypos = H;
+    }
+
+    return ff_filter_frame(outlink, av_frame_clone(s->out));
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioHistogramContext *s = ctx->priv;
+    int i;
+
+    av_frame_free(&s->out);
+    av_freep(&s->shistogram);
+    av_freep(&s->achistogram);
+    av_freep(&s->combine_buffer);
+    for (i = 0; i < 101; i++)
+        av_frame_free(&s->in[i]);
+}
+
+static const AVFilterPad audiovectorscope_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad audiovectorscope_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_avf_ahistogram = {
+    .name          = "ahistogram",
+    .description   = NULL_IF_CONFIG_SMALL("Convert input audio to histogram video output."),
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(AudioHistogramContext),
+    .inputs        = audiovectorscope_inputs,
+    .outputs       = audiovectorscope_outputs,
+    .priv_class    = &ahistogram_class,
+};
diff --git a/libavfilter/avf_aphasemeter.c b/libavfilter/avf_aphasemeter.c
new file mode 100644
index 00000000..7c1e9932
--- /dev/null
+++ b/libavfilter/avf_aphasemeter.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio to video multimedia aphasemeter filter
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "audio.h"
+#include "video.h"
+#include "internal.h"
+
+typedef struct AudioPhaseMeterContext {
+    const AVClass *class;
+    AVFrame *out;
+    int w, h;
+    AVRational frame_rate;
+    int contrast[4];
+    uint8_t *mpc_str;
+    uint8_t mpc[4];
+    int draw_median_phase;
+} AudioPhaseMeterContext;
+
+#define OFFSET(x) offsetof(AudioPhaseMeterContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption aphasemeter_options[] = {
+    { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, 0, FLAGS },
+    { "r",    "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, 0, FLAGS },
+    { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="800x400"}, 0, 0, FLAGS },
+    { "s",    "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="800x400"}, 0, 0, FLAGS },
+    { "rc", "set red contrast",   OFFSET(contrast[0]), AV_OPT_TYPE_INT, {.i64=2}, 0, 255, FLAGS },
+    { "gc", "set green contrast", OFFSET(contrast[1]), AV_OPT_TYPE_INT, {.i64=7}, 0, 255, FLAGS },
+    { "bc", "set blue contrast",  OFFSET(contrast[2]), AV_OPT_TYPE_INT, {.i64=1}, 0, 255, FLAGS },
+    { "mpc", "set median phase color", OFFSET(mpc_str), AV_OPT_TYPE_STRING, {.str = "none"}, 0, 0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(aphasemeter);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layout = NULL;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE };
+    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGBA, AV_PIX_FMT_NONE };
+    int ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if ((ret = ff_formats_ref         (formats, &inlink->out_formats        )) < 0 ||
+        (ret = ff_add_channel_layout  (&layout, AV_CH_LAYOUT_STEREO         )) < 0 ||
+        (ret = ff_channel_layouts_ref (layout , &inlink->out_channel_layouts)) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0)
+        return ret;
+
+    formats = ff_make_format_list(pix_fmts);
+    if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AudioPhaseMeterContext *s = ctx->priv;
+    int nb_samples;
+
+    nb_samples = FFMAX(1024, ((double)inlink->sample_rate / av_q2d(s->frame_rate)) + 0.5);
+    inlink->partial_buf_size =
+    inlink->min_samples =
+    inlink->max_samples = nb_samples;
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AudioPhaseMeterContext *s = ctx->priv;
+
+    outlink->w = s->w;
+    outlink->h = s->h;
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+    outlink->frame_rate = s->frame_rate;
+
+    if (!strcmp(s->mpc_str, "none"))
+        s->draw_median_phase = 0;
+    else if (av_parse_color(s->mpc, s->mpc_str, -1, ctx) >= 0)
+        s->draw_median_phase = 1;
+    else
+        return AVERROR(EINVAL);
+
+    return 0;
+}
+
+static inline int get_x(float phase, int w)
+{
+  return (phase + 1.) / 2. * (w - 1);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AudioPhaseMeterContext *s = ctx->priv;
+    AVDictionary **metadata;
+    const int rc = s->contrast[0];
+    const int gc = s->contrast[1];
+    const int bc = s->contrast[2];
+    float fphase = 0;
+    AVFrame *out;
+    uint8_t *dst;
+    int i;
+
+    if (!s->out || s->out->width  != outlink->w ||
+                   s->out->height != outlink->h) {
+        av_frame_free(&s->out);
+        s->out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!s->out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+
+        out = s->out;
+        for (i = 0; i < outlink->h; i++)
+            memset(out->data[0] + i * out->linesize[0], 0, outlink->w * 4);
+    } else {
+        out = s->out;
+        for (i = outlink->h - 1; i >= 10; i--)
+            memmove(out->data[0] + (i  ) * out->linesize[0],
+                    out->data[0] + (i-1) * out->linesize[0],
+                    outlink->w * 4);
+        for (i = 0; i < outlink->w; i++)
+            AV_WL32(out->data[0] + i * 4, 0);
+    }
+    s->out->pts = in->pts;
+
+    for (i = 0; i < in->nb_samples; i++) {
+        const float *src = (float *)in->data[0] + i * 2;
+        const float f = src[0] * src[1] / (src[0]*src[0] + src[1] * src[1]) * 2;
+        const float phase = isnan(f) ? 1 : f;
+        const int x = get_x(phase, s->w);
+
+        dst = out->data[0] + x * 4;
+        dst[0] = FFMIN(255, dst[0] + rc);
+        dst[1] = FFMIN(255, dst[1] + gc);
+        dst[2] = FFMIN(255, dst[2] + bc);
+        dst[3] = 255;
+        fphase += phase;
+    }
+    fphase /= in->nb_samples;
+
+    if (s->draw_median_phase) {
+        dst = out->data[0] + get_x(fphase, s->w) * 4;
+        AV_WL32(dst, AV_RL32(s->mpc));
+    }
+
+    for (i = 1; i < 10 && i < outlink->h; i++)
+        memcpy(out->data[0] + i * out->linesize[0], out->data[0], outlink->w * 4);
+
+    metadata = avpriv_frame_get_metadatap(out);
+    if (metadata) {
+        uint8_t value[128];
+
+        snprintf(value, sizeof(value), "%f", fphase);
+        av_dict_set(metadata, "lavfi.aphasemeter.phase", value, 0);
+    }
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, av_frame_clone(s->out));
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AudioPhaseMeterContext *s = ctx->priv;
+
+    av_frame_free(&s->out);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_avf_aphasemeter = {
+    .name          = "aphasemeter",
+    .description   = NULL_IF_CONFIG_SMALL("Convert input audio to phase meter video output."),
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(AudioPhaseMeterContext),
+    .inputs        = inputs,
+    .outputs       = outputs,
+    .priv_class    = &aphasemeter_class,
+};
diff --git a/libavfilter/avf_avectorscope.c b/libavfilter/avf_avectorscope.c
index 3027de37..29d02877 100644
--- a/libavfilter/avf_avectorscope.c
+++ b/libavfilter/avf_avectorscope.c
@@ -36,18 +36,27 @@
 enum VectorScopeMode {
     LISSAJOUS,
     LISSAJOUS_XY,
+    POLAR,
     MODE_NB,
 };
 
+enum VectorScopeDraw {
+    DOT,
+    LINE,
+    DRAW_NB,
+};
+
 typedef struct AudioVectorScopeContext {
     const AVClass *class;
     AVFrame *outpicref;
     int w, h;
     int hw, hh;
     int mode;
-    int contrast[3];
-    int fade[3];
+    int draw;
+    int contrast[4];
+    int fade[4];
     double zoom;
+    unsigned prev_x, prev_y;
     AVRational frame_rate;
 } AudioVectorScopeContext;
 
@@ -59,17 +68,23 @@ static const AVOption avectorscope_options[] = {
     { "m",    "set mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=LISSAJOUS}, 0, MODE_NB-1, FLAGS, "mode" },
     { "lissajous",    "", 0, AV_OPT_TYPE_CONST, {.i64=LISSAJOUS},    0, 0, FLAGS, "mode" },
     { "lissajous_xy", "", 0, AV_OPT_TYPE_CONST, {.i64=LISSAJOUS_XY}, 0, 0, FLAGS, "mode" },
+    { "polar",        "", 0, AV_OPT_TYPE_CONST, {.i64=POLAR},        0, 0, FLAGS, "mode" },
     { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, 0, FLAGS },
     { "r",    "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, 0, FLAGS },
     { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="400x400"}, 0, 0, FLAGS },
     { "s",    "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="400x400"}, 0, 0, FLAGS },
-    { "rc", "set red contrast",   OFFSET(contrast[0]), AV_OPT_TYPE_INT, {.i64=40}, 0, 255, FLAGS },
+    { "rc", "set red contrast",   OFFSET(contrast[0]), AV_OPT_TYPE_INT, {.i64=40},  0, 255, FLAGS },
     { "gc", "set green contrast", OFFSET(contrast[1]), AV_OPT_TYPE_INT, {.i64=160}, 0, 255, FLAGS },
-    { "bc", "set blue contrast",  OFFSET(contrast[2]), AV_OPT_TYPE_INT, {.i64=80}, 0, 255, FLAGS },
+    { "bc", "set blue contrast",  OFFSET(contrast[2]), AV_OPT_TYPE_INT, {.i64=80},  0, 255, FLAGS },
+    { "ac", "set alpha contrast", OFFSET(contrast[3]), AV_OPT_TYPE_INT, {.i64=255}, 0, 255, FLAGS },
     { "rf", "set red fade",       OFFSET(fade[0]), AV_OPT_TYPE_INT, {.i64=15}, 0, 255, FLAGS },
     { "gf", "set green fade",     OFFSET(fade[1]), AV_OPT_TYPE_INT, {.i64=10}, 0, 255, FLAGS },
-    { "bf", "set blue fade",      OFFSET(fade[2]), AV_OPT_TYPE_INT, {.i64=5}, 0, 255, FLAGS },
-    { "zoom", "set zoom factor",  OFFSET(zoom), AV_OPT_TYPE_DOUBLE, {.dbl=1}, 1, 10, FLAGS },
+    { "bf", "set blue fade",      OFFSET(fade[2]), AV_OPT_TYPE_INT, {.i64=5},  0, 255, FLAGS },
+    { "af", "set alpha fade",     OFFSET(fade[3]), AV_OPT_TYPE_INT, {.i64=5},  0, 255, FLAGS },
+    { "zoom", "set zoom factor",  OFFSET(zoom), AV_OPT_TYPE_DOUBLE, {.dbl=1},  1, 10, FLAGS },
+    { "draw", "set draw mode", OFFSET(draw), AV_OPT_TYPE_INT, {.i64=DOT}, 0, DRAW_NB-1, FLAGS, "draw" },
+    { "dot",   "", 0, AV_OPT_TYPE_CONST, {.i64=DOT} , 0, 0, FLAGS, "draw" },
+    { "line",  "", 0, AV_OPT_TYPE_CONST, {.i64=LINE}, 0, 0, FLAGS, "draw" },
     { NULL }
 };
 
@@ -92,6 +107,33 @@ static void draw_dot(AudioVectorScopeContext *s, unsigned x, unsigned y)
     dst[0] = FFMIN(dst[0] + s->contrast[0], 255);
     dst[1] = FFMIN(dst[1] + s->contrast[1], 255);
     dst[2] = FFMIN(dst[2] + s->contrast[2], 255);
+    dst[3] = FFMIN(dst[3] + s->contrast[3], 255);
+}
+
+static void draw_line(AudioVectorScopeContext *s, int x0, int y0, int x1, int y1)
+{
+    int dx = FFABS(x1-x0), sx = x0 < x1 ? 1 : -1;
+    int dy = FFABS(y1-y0), sy = y0 < y1 ? 1 : -1;
+    int err = (dx>dy ? dx : -dy) / 2, e2;
+
+    for (;;) {
+        draw_dot(s, x0, y0);
+
+        if (x0 == x1 && y0 == y1)
+            break;
+
+        e2 = err;
+
+        if (e2 >-dx) {
+            err -= dy;
+            x0 += sx;
+        }
+
+        if (e2 < dy) {
+            err += dx;
+            y0 += sy;
+        }
+    }
 }
 
 static void fade(AudioVectorScopeContext *s)
@@ -106,6 +148,7 @@ static void fade(AudioVectorScopeContext *s)
                 d[j+0] = FFMAX(d[j+0] - s->fade[0], 0);
                 d[j+1] = FFMAX(d[j+1] - s->fade[1], 0);
                 d[j+2] = FFMAX(d[j+2] - s->fade[2], 0);
+                d[j+3] = FFMAX(d[j+3] - s->fade[3], 0);
             }
             d += linesize;
         }
@@ -120,24 +163,21 @@ static int query_formats(AVFilterContext *ctx)
     AVFilterLink *outlink = ctx->outputs[0];
     static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE };
     static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGBA, AV_PIX_FMT_NONE };
+    int ret;
 
     formats = ff_make_format_list(sample_fmts);
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &inlink->out_formats);
-
-    ff_add_channel_layout(&layout, AV_CH_LAYOUT_STEREO);
-    ff_channel_layouts_ref(layout, &inlink->out_channel_layouts);
+    if ((ret = ff_formats_ref         (formats, &inlink->out_formats        )) < 0 ||
+        (ret = ff_add_channel_layout  (&layout, AV_CH_LAYOUT_STEREO         )) < 0 ||
+        (ret = ff_channel_layouts_ref (layout , &inlink->out_channel_layouts)) < 0)
+        return ret;
 
     formats = ff_all_samplerates();
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &inlink->out_samplerates);
+    if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0)
+        return ret;
 
     formats = ff_make_format_list(pix_fmts);
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &outlink->in_formats);
+    if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
+        return ret;
 
     return 0;
 }
@@ -165,8 +205,8 @@ static int config_output(AVFilterLink *outlink)
     outlink->sample_aspect_ratio = (AVRational){1,1};
     outlink->frame_rate = s->frame_rate;
 
-    s->hw = s->w / 2;
-    s->hh = s->h / 2;
+    s->prev_x = s->hw = s->w / 2;
+    s->prev_y = s->hh = s->h / 2;
 
     return 0;
 }
@@ -179,6 +219,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
     const int hw = s->hw;
     const int hh = s->hh;
     unsigned x, y;
+    unsigned prev_x = s->prev_x, prev_y = s->prev_y;
     const double zoom = s->zoom;
     int i;
 
@@ -206,12 +247,27 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
             if (s->mode == LISSAJOUS) {
                 x = ((src[1] - src[0]) * zoom / (float)(UINT16_MAX) + 1) * hw;
                 y = (1.0 - (src[0] + src[1]) * zoom / (float)UINT16_MAX) * hh;
-            } else {
+            } else if (s->mode == LISSAJOUS_XY) {
                 x = (src[1] * zoom / (float)INT16_MAX + 1) * hw;
                 y = (src[0] * zoom / (float)INT16_MAX + 1) * hh;
+            } else {
+                float sx, sy, cx, cy;
+
+                sx = src[1] * zoom / (float)INT16_MAX;
+                sy = src[0] * zoom / (float)INT16_MAX;
+                cx = sx * sqrtf(1 - 0.5*sy*sy);
+                cy = sy * sqrtf(1 - 0.5*sx*sx);
+                x = hw + hw * FFSIGN(cx + cy) * (cx - cy) * .7;
+                y = s->h - s->h * fabsf(cx + cy) * .7;
             }
 
-            draw_dot(s, x, y);
+            if (s->draw == DOT) {
+                draw_dot(s, x, y);
+            } else {
+                draw_line(s, x, y, prev_x, prev_y);
+            }
+            prev_x = x;
+            prev_y = y;
         }
         break;
     case AV_SAMPLE_FMT_FLT:
@@ -221,16 +277,34 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
             if (s->mode == LISSAJOUS) {
                 x = ((src[1] - src[0]) * zoom / 2 + 1) * hw;
                 y = (1.0 - (src[0] + src[1]) * zoom / 2) * hh;
-            } else {
+            } else if (s->mode == LISSAJOUS_XY){
                 x = (src[1] * zoom + 1) * hw;
                 y = (src[0] * zoom + 1) * hh;
+            } else {
+                float sx, sy, cx, cy;
+
+                sx = src[1] * zoom;
+                sy = src[0] * zoom;
+                cx = sx * sqrtf(1 - 0.5 * sy * sy);
+                cy = sy * sqrtf(1 - 0.5 * sx * sx);
+                x = hw + hw * FFSIGN(cx + cy) * (cx - cy) * .7;
+                y = s->h - s->h * fabsf(cx + cy) * .7;
             }
 
-            draw_dot(s, x, y);
+            if (s->draw == DOT) {
+                draw_dot(s, x, y);
+            } else {
+                draw_line(s, x, y, prev_x, prev_y);
+            }
+            prev_x = x;
+            prev_y = y;
         }
         break;
+    default:
+        av_assert0(0);
     }
 
+    s->prev_x = x, s->prev_y = y;
     av_frame_free(&insamples);
 
     return ff_filter_frame(outlink, av_frame_clone(s->outpicref));
diff --git a/libavfilter/avf_concat.c b/libavfilter/avf_concat.c
index 088d782d..4fa94476 100644
--- a/libavfilter/avf_concat.c
+++ b/libavfilter/avf_concat.c
@@ -68,7 +68,7 @@ static const AVOption concat_options[] = {
       AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, A|F},
     { "unsafe", "enable unsafe mode",
       OFFSET(unsafe),
-      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, V|A|F},
+      AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, V|A|F},
     { NULL }
 };
 
@@ -80,6 +80,7 @@ static int query_formats(AVFilterContext *ctx)
     unsigned type, nb_str, idx0 = 0, idx, str, seg;
     AVFilterFormats *formats, *rates = NULL;
     AVFilterChannelLayouts *layouts = NULL;
+    int ret;
 
     for (type = 0; type < TYPE_ALL; type++) {
         nb_str = cat->nb_streams[type];
@@ -88,26 +89,26 @@ static int query_formats(AVFilterContext *ctx)
 
             /* Set the output formats */
             formats = ff_all_formats(type);
-            if (!formats)
-                return AVERROR(ENOMEM);
-            ff_formats_ref(formats, &ctx->outputs[idx]->in_formats);
+            if ((ret = ff_formats_ref(formats, &ctx->outputs[idx]->in_formats)) < 0)
+                return ret;
+
             if (type == AVMEDIA_TYPE_AUDIO) {
                 rates = ff_all_samplerates();
-                if (!rates)
-                    return AVERROR(ENOMEM);
-                ff_formats_ref(rates, &ctx->outputs[idx]->in_samplerates);
+                if ((ret = ff_formats_ref(rates, &ctx->outputs[idx]->in_samplerates)) < 0)
+                    return ret;
                 layouts = ff_all_channel_layouts();
-                if (!layouts)
-                    return AVERROR(ENOMEM);
-                ff_channel_layouts_ref(layouts, &ctx->outputs[idx]->in_channel_layouts);
+                if ((ret = ff_channel_layouts_ref(layouts, &ctx->outputs[idx]->in_channel_layouts)) < 0)
+                    return ret;
             }
 
             /* Set the same formats for each corresponding input */
             for (seg = 0; seg < cat->nb_segments; seg++) {
-                ff_formats_ref(formats, &ctx->inputs[idx]->out_formats);
+                if ((ret = ff_formats_ref(formats, &ctx->inputs[idx]->out_formats)) < 0)
+                    return ret;
                 if (type == AVMEDIA_TYPE_AUDIO) {
-                    ff_formats_ref(rates, &ctx->inputs[idx]->out_samplerates);
-                    ff_channel_layouts_ref(layouts, &ctx->inputs[idx]->out_channel_layouts);
+                    if ((ret = ff_formats_ref(rates, &ctx->inputs[idx]->out_samplerates)) < 0 ||
+                        (ret = ff_channel_layouts_ref(layouts, &ctx->inputs[idx]->out_channel_layouts)) < 0)
+                        return ret;
                 }
                 idx += ctx->nb_outputs;
             }
@@ -346,10 +347,9 @@ static int request_frame(AVFilterLink *outlink)
             if (cat->in[str].eof)
                 continue;
             ret = ff_request_frame(ctx->inputs[str]);
-            if (ret == AVERROR_EOF)
-                close_input(ctx, str);
-            else if (ret < 0)
+            if (ret != AVERROR_EOF)
                 return ret;
+            close_input(ctx, str);
         }
         ret = flush_segment(ctx);
         if (ret < 0)
diff --git a/libavfilter/avf_showcqt.c b/libavfilter/avf_showcqt.c
index 85f9ea97..712a9997 100644
--- a/libavfilter/avf_showcqt.c
+++ b/libavfilter/avf_showcqt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Muhammad Faiz <mfcc64@gmail.com>
+ * Copyright (c) 2014-2015 Muhammad Faiz <mfcc64@gmail.com>
  *
  * This file is part of FFmpeg.
  *
@@ -18,177 +18,507 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <math.h>
+#include <stdlib.h>
+
 #include "config.h"
 #include "libavcodec/avfft.h"
 #include "libavutil/avassert.h"
-#include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
 #include "libavutil/xga_font_data.h"
-#include "libavutil/qsort.h"
-#include "libavutil/time.h"
 #include "libavutil/eval.h"
+#include "libavutil/pixdesc.h"
 #include "avfilter.h"
 #include "internal.h"
-
-#include <math.h>
-#include <stdlib.h>
+#include "lavfutils.h"
+#include "lswsutils.h"
 
 #if CONFIG_LIBFREETYPE
 #include <ft2build.h>
 #include FT_FREETYPE_H
 #endif
 
-/* this filter is designed to do 16 bins/semitones constant Q transform with Brown-Puckette algorithm
- * start from E0 to D#10 (10 octaves)
- * so there are 16 bins/semitones * 12 semitones/octaves * 10 octaves = 1920 bins
- * match with full HD resolution */
-
-#define VIDEO_WIDTH 1920
-#define VIDEO_HEIGHT 1080
-#define FONT_HEIGHT 32
-#define SPECTOGRAM_HEIGHT ((VIDEO_HEIGHT-FONT_HEIGHT)/2)
-#define SPECTOGRAM_START (VIDEO_HEIGHT-SPECTOGRAM_HEIGHT)
-#define BASE_FREQ 20.051392800492
-#define COEFF_CLAMP 1.0e-4
-#define TLENGTH_MIN 0.001
-#define TLENGTH_DEFAULT "384/f*tc/(384/f+tc)"
-#define VOLUME_MIN 1e-10
-#define VOLUME_MAX 100.0
-#define FONTCOLOR_DEFAULT "st(0, (midi(f)-59.5)/12);" \
+#include "avf_showcqt.h"
+
+#define BASEFREQ        20.01523126408007475
+#define ENDFREQ         20495.59681441799654
+#define TLENGTH         "384*tc/(384+tc*f)"
+#define TLENGTH_MIN     0.001
+#define VOLUME_MAX      100.0
+#define FONTCOLOR       "st(0, (midi(f)-59.5)/12);" \
     "st(1, if(between(ld(0),0,1), 0.5-0.5*cos(2*PI*ld(0)), 0));" \
     "r(1-ld(1)) + b(ld(1))"
 
-typedef struct {
-    FFTSample value;
-    int index;
-} SparseCoeff;
-
-typedef struct {
-    const AVClass *class;
-    AVFrame *outpicref;
-    FFTContext *fft_context;
-    FFTComplex *fft_data;
-    FFTComplex *fft_result_left;
-    FFTComplex *fft_result_right;
-    uint8_t *spectogram;
-    SparseCoeff *coeff_sort;
-    SparseCoeff *coeffs[VIDEO_WIDTH];
-    uint8_t *font_alpha;
-    char *fontfile;     /* using freetype */
-    int coeffs_len[VIDEO_WIDTH];
-    uint8_t fontcolor_value[VIDEO_WIDTH*3];  /* result of fontcolor option */
-    int64_t frame_count;
-    int spectogram_count;
-    int spectogram_index;
-    int fft_bits;
-    int req_fullfilled;
-    int remaining_fill;
-    char *tlength;
-    char *volume;
-    char *fontcolor;
-    double timeclamp;   /* lower timeclamp, time-accurate, higher timeclamp, freq-accurate (at low freq)*/
-    float coeffclamp;   /* lower coeffclamp, more precise, higher coeffclamp, faster */
-    int fullhd;         /* if true, output video is at full HD resolution, otherwise it will be halved */
-    float gamma;        /* lower gamma, more contrast, higher gamma, more range */
-    float gamma2;       /* gamma of bargraph */
-    int fps;            /* the required fps is so strict, so it's enough to be int, but 24000/1001 etc cannot be encoded */
-    int count;          /* fps * count = transform rate */
-} ShowCQTContext;
-
 #define OFFSET(x) offsetof(ShowCQTContext, x)
-#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
 
 static const AVOption showcqt_options[] = {
-    { "volume", "set volume", OFFSET(volume), AV_OPT_TYPE_STRING, { .str = "16" }, CHAR_MIN, CHAR_MAX, FLAGS },
-    { "tlength", "set transform length", OFFSET(tlength), AV_OPT_TYPE_STRING, { .str = TLENGTH_DEFAULT }, CHAR_MIN, CHAR_MAX, FLAGS },
-    { "timeclamp", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 }, 0.1, 1.0, FLAGS },
-    { "coeffclamp", "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, { .dbl = 1 }, 0.1, 10, FLAGS },
-    { "gamma", "set gamma", OFFSET(gamma), AV_OPT_TYPE_FLOAT, { .dbl = 3 }, 1, 7, FLAGS },
-    { "gamma2", "set gamma of bargraph", OFFSET(gamma2), AV_OPT_TYPE_FLOAT, { .dbl = 1 }, 1, 7, FLAGS },
-    { "fullhd", "set full HD resolution", OFFSET(fullhd), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, FLAGS },
-    { "fps", "set video fps", OFFSET(fps), AV_OPT_TYPE_INT, { .i64 = 25 }, 10, 100, FLAGS },
-    { "count", "set number of transform per frame", OFFSET(count), AV_OPT_TYPE_INT, { .i64 = 6 }, 1, 30, FLAGS },
-    { "fontfile", "set font file", OFFSET(fontfile), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS },
-    { "fontcolor", "set font color", OFFSET(fontcolor), AV_OPT_TYPE_STRING, { .str = FONTCOLOR_DEFAULT }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "size",         "set video size", OFFSET(width), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" },      0, 0,        FLAGS },
+    { "s",            "set video size", OFFSET(width), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" },      0, 0,        FLAGS },
+    { "fps",          "set video rate", OFFSET(rate),  AV_OPT_TYPE_VIDEO_RATE, { .str = "25" },             0, 0,        FLAGS },
+    { "rate",         "set video rate", OFFSET(rate),  AV_OPT_TYPE_VIDEO_RATE, { .str = "25" },             0, 0,        FLAGS },
+    { "r",            "set video rate", OFFSET(rate),  AV_OPT_TYPE_VIDEO_RATE, { .str = "25" },             0, 0,        FLAGS },
+    { "bar_h",   "set bargraph height", OFFSET(bar_h),        AV_OPT_TYPE_INT, { .i64 = -1 },              -1, INT_MAX,  FLAGS },
+    { "axis_h",      "set axis height", OFFSET(axis_h),       AV_OPT_TYPE_INT, { .i64 = -1 },              -1, INT_MAX,  FLAGS },
+    { "sono_h",  "set sonogram height", OFFSET(sono_h),       AV_OPT_TYPE_INT, { .i64 = -1 },              -1, INT_MAX,  FLAGS },
+    { "fullhd",      "set fullhd size", OFFSET(fullhd),      AV_OPT_TYPE_BOOL, { .i64 = 1 },                0, 1,        FLAGS },
+    { "sono_v",  "set sonogram volume", OFFSET(sono_v),    AV_OPT_TYPE_STRING, { .str = "16" },      CHAR_MIN, CHAR_MAX, FLAGS },
+    { "volume",  "set sonogram volume", OFFSET(sono_v),    AV_OPT_TYPE_STRING, { .str = "16" },      CHAR_MIN, CHAR_MAX, FLAGS },
+    { "bar_v",   "set bargraph volume", OFFSET(bar_v),     AV_OPT_TYPE_STRING, { .str = "sono_v" },  CHAR_MIN, CHAR_MAX, FLAGS },
+    { "volume2", "set bargraph volume", OFFSET(bar_v),     AV_OPT_TYPE_STRING, { .str = "sono_v" },  CHAR_MIN, CHAR_MAX, FLAGS },
+    { "sono_g",   "set sonogram gamma", OFFSET(sono_g),     AV_OPT_TYPE_FLOAT, { .dbl = 3.0 },            1.0, 7.0,      FLAGS },
+    { "gamma",    "set sonogram gamma", OFFSET(sono_g),     AV_OPT_TYPE_FLOAT, { .dbl = 3.0 },            1.0, 7.0,      FLAGS },
+    { "bar_g",    "set bargraph gamma", OFFSET(bar_g),      AV_OPT_TYPE_FLOAT, { .dbl = 1.0 },            1.0, 7.0,      FLAGS },
+    { "gamma2",   "set bargraph gamma", OFFSET(bar_g),      AV_OPT_TYPE_FLOAT, { .dbl = 1.0 },            1.0, 7.0,      FLAGS },
+    { "timeclamp",     "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 },           0.1, 1.0,      FLAGS },
+    { "tc",            "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 },           0.1, 1.0,      FLAGS },
+    { "basefreq", "set base frequency", OFFSET(basefreq),  AV_OPT_TYPE_DOUBLE, { .dbl = BASEFREQ },      10.0, 100000.0, FLAGS },
+    { "endfreq",   "set end frequency", OFFSET(endfreq),   AV_OPT_TYPE_DOUBLE, { .dbl = ENDFREQ },       10.0, 100000.0, FLAGS },
+    { "coeffclamp",   "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 },            0.1, 10.0,     FLAGS },
+    { "tlength",         "set tlength", OFFSET(tlength),   AV_OPT_TYPE_STRING, { .str = TLENGTH },   CHAR_MIN, CHAR_MAX, FLAGS },
+    { "count",   "set transform count", OFFSET(count),        AV_OPT_TYPE_INT, { .i64 = 6 },                1, 30,       FLAGS },
+    { "fcount",  "set frequency count", OFFSET(fcount),       AV_OPT_TYPE_INT, { .i64 = 0 },                0, 10,       FLAGS },
+    { "fontfile",      "set axis font", OFFSET(fontfile),  AV_OPT_TYPE_STRING, { .str = NULL },      CHAR_MIN, CHAR_MAX, FLAGS },
+    { "fontcolor",    "set font color", OFFSET(fontcolor), AV_OPT_TYPE_STRING, { .str = FONTCOLOR }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "axisfile",     "set axis image", OFFSET(axisfile),  AV_OPT_TYPE_STRING, { .str = NULL },      CHAR_MIN, CHAR_MAX, FLAGS },
+    { "axis",              "draw axis", OFFSET(axis),        AV_OPT_TYPE_BOOL, { .i64 = 1 },                0, 1,        FLAGS },
+    { "text",              "draw axis", OFFSET(axis),        AV_OPT_TYPE_BOOL, { .i64 = 1 },                0, 1,        FLAGS },
     { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(showcqt);
 
-static av_cold void uninit(AVFilterContext *ctx)
+static void common_uninit(ShowCQTContext *s)
 {
     int k;
 
-    ShowCQTContext *s = ctx->priv;
-    av_fft_end(s->fft_context);
-    s->fft_context = NULL;
-    for (k = 0; k < VIDEO_WIDTH; k++)
-        av_freep(&s->coeffs[k]);
+    /* axis_frame may be non reference counted frame */
+    if (s->axis_frame && !s->axis_frame->buf[0]) {
+        av_freep(s->axis_frame->data);
+        for (k = 0; k < 4; k++)
+            s->axis_frame->data[k] = NULL;
+    }
+
+    av_frame_free(&s->axis_frame);
+    av_frame_free(&s->sono_frame);
+    av_fft_end(s->fft_ctx);
+    s->fft_ctx = NULL;
+    if (s->coeffs)
+        for (k = 0; k < s->cqt_len * 2; k++)
+            av_freep(&s->coeffs[k].val);
+    av_freep(&s->coeffs);
     av_freep(&s->fft_data);
-    av_freep(&s->fft_result_left);
-    av_freep(&s->fft_result_right);
-    av_freep(&s->coeff_sort);
-    av_freep(&s->spectogram);
-    av_freep(&s->font_alpha);
-    av_frame_free(&s->outpicref);
+    av_freep(&s->fft_result);
+    av_freep(&s->cqt_result);
+    av_freep(&s->c_buf);
+    av_freep(&s->h_buf);
+    av_freep(&s->rcp_h_buf);
+    av_freep(&s->freq);
+    av_freep(&s->sono_v_buf);
+    av_freep(&s->bar_v_buf);
 }
 
-static int query_formats(AVFilterContext *ctx)
+static double *create_freq_table(double base, double end, int n)
 {
-    AVFilterFormats *formats = NULL;
-    AVFilterChannelLayouts *layouts = NULL;
-    AVFilterLink *inlink = ctx->inputs[0];
-    AVFilterLink *outlink = ctx->outputs[0];
-    static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE };
-    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE };
-    static const int64_t channel_layouts[] = { AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_STEREO_DOWNMIX, -1 };
-    static const int samplerates[] = { 44100, 48000, -1 };
+    double log_base, log_end;
+    double rcp_n = 1.0 / n;
+    double *freq;
+    int x;
 
-    /* set input audio formats */
-    formats = ff_make_format_list(sample_fmts);
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &inlink->out_formats);
+    freq = av_malloc_array(n, sizeof(*freq));
+    if (!freq)
+        return NULL;
 
-    layouts = avfilter_make_format64_list(channel_layouts);
-    if (!layouts)
-        return AVERROR(ENOMEM);
-    ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
+    log_base = log(base);
+    log_end  = log(end);
+    for (x = 0; x < n; x++) {
+        double log_freq = log_base + (x + 0.5) * (log_end - log_base) * rcp_n;
+        freq[x] = exp(log_freq);
+    }
+    return freq;
+}
+
+static double clip_with_log(void *log_ctx, const char *name,
+                            double val, double min, double max,
+                            double nan_replace, int idx)
+{
+    int level = AV_LOG_WARNING;
+    if (isnan(val)) {
+        av_log(log_ctx, level, "[%d] %s is nan, setting it to %g.\n",
+               idx, name, nan_replace);
+        val = nan_replace;
+    } else if (val < min) {
+        av_log(log_ctx, level, "[%d] %s is too low (%g), setting it to %g.\n",
+               idx, name, val, min);
+        val = min;
+    } else if (val > max) {
+        av_log(log_ctx, level, "[%d] %s it too high (%g), setting it to %g.\n",
+               idx, name, val, max);
+        val = max;
+    }
+    return val;
+}
+
+static double a_weighting(void *p, double f)
+{
+    double ret = 12200.0*12200.0 * (f*f*f*f);
+    ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) *
+           sqrt((f*f + 107.7*107.7) * (f*f + 737.9*737.9));
+    return ret;
+}
+
+static double b_weighting(void *p, double f)
+{
+    double ret = 12200.0*12200.0 * (f*f*f);
+    ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) * sqrt(f*f + 158.5*158.5);
+    return ret;
+}
+
+static double c_weighting(void *p, double f)
+{
+    double ret = 12200.0*12200.0 * (f*f);
+    ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0);
+    return ret;
+}
+
+static int init_volume(ShowCQTContext *s)
+{
+    const char *func_names[] = { "a_weighting", "b_weighting", "c_weighting", NULL };
+    const char *sono_names[] = { "timeclamp", "tc", "frequency", "freq", "f", "bar_v", NULL };
+    const char *bar_names[] = { "timeclamp", "tc", "frequency", "freq", "f", "sono_v", NULL };
+    double (*funcs[])(void *, double) = { a_weighting, b_weighting, c_weighting };
+    AVExpr *sono = NULL, *bar = NULL;
+    int x, ret = AVERROR(ENOMEM);
+
+    s->sono_v_buf = av_malloc_array(s->cqt_len, sizeof(*s->sono_v_buf));
+    s->bar_v_buf = av_malloc_array(s->cqt_len, sizeof(*s->bar_v_buf));
+    if (!s->sono_v_buf || !s->bar_v_buf)
+        goto error;
+
+    if ((ret = av_expr_parse(&sono, s->sono_v, sono_names, func_names, funcs, NULL, NULL, 0, s->ctx)) < 0)
+        goto error;
+
+    if ((ret = av_expr_parse(&bar, s->bar_v, bar_names, func_names, funcs, NULL, NULL, 0, s->ctx)) < 0)
+        goto error;
+
+    for (x = 0; x < s->cqt_len; x++) {
+        double vars[] = { s->timeclamp, s->timeclamp, s->freq[x], s->freq[x], s->freq[x], 0.0 };
+        double vol = clip_with_log(s->ctx, "sono_v", av_expr_eval(sono, vars, NULL), 0.0, VOLUME_MAX, 0.0, x);
+        vars[5] = vol;
+        vol = clip_with_log(s->ctx, "bar_v", av_expr_eval(bar, vars, NULL), 0.0, VOLUME_MAX, 0.0, x);
+        s->bar_v_buf[x] = vol * vol;
+        vars[5] = vol;
+        vol = clip_with_log(s->ctx, "sono_v", av_expr_eval(sono, vars, NULL), 0.0, VOLUME_MAX, 0.0, x);
+        s->sono_v_buf[x] = vol * vol;
+    }
+    av_expr_free(sono);
+    av_expr_free(bar);
+    return 0;
+
+error:
+    av_freep(&s->sono_v_buf);
+    av_freep(&s->bar_v_buf);
+    av_expr_free(sono);
+    av_expr_free(bar);
+    return ret;
+}
+
+static void cqt_calc(FFTComplex *dst, const FFTComplex *src, const Coeffs *coeffs,
+                     int len, int fft_len)
+{
+    int k, x, i, j;
+    for (k = 0; k < len; k++) {
+        FFTComplex l, r, a = {0,0}, b = {0,0};
+
+        for (x = 0; x < coeffs[k].len; x++) {
+            FFTSample u = coeffs[k].val[x];
+            i = coeffs[k].start + x;
+            j = fft_len - i;
+            a.re += u * src[i].re;
+            a.im += u * src[i].im;
+            b.re += u * src[j].re;
+            b.im += u * src[j].im;
+        }
+
+        /* separate left and right, (and multiply by 2.0) */
+        l.re = a.re + b.re;
+        l.im = a.im - b.im;
+        r.re = b.im + a.im;
+        r.im = b.re - a.re;
+        dst[k].re = l.re * l.re + l.im * l.im;
+        dst[k].im = r.re * r.re + r.im * r.im;
+    }
+}
+
+#if 0
+static void cqt_calc_interleave(FFTComplex *dst, const FFTComplex *src, const Coeffs *coeffs,
+                                int len, int fft_len)
+{
+    int k, x, i, m;
+
+    for (k = 0; k < len; k++) {
+        FFTComplex l, r, a = {0,0}, b = {0,0};
+
+        m = 2 * k;
+        for (x = 0; x < coeffs[m].len; x++) {
+            FFTSample u = coeffs[m].val[x];
+            i = coeffs[m].start + x;
+            a.re += u * src[i].re;
+            a.im += u * src[i].im;
+        }
+
+        m++;
+        for (x = 0; x < coeffs[m].len; x++) {
+            FFTSample u = coeffs[m].val[x];
+            i = coeffs[m].start + x;
+            b.re += u * src[i].re;
+            b.im += u * src[i].im;
+        }
+
+        /* separate left and right, (and multiply by 2.0) */
+        l.re = a.re + b.re;
+        l.im = a.im - b.im;
+        r.re = b.im + a.im;
+        r.im = b.re - a.re;
+        dst[k].re = l.re * l.re + l.im * l.im;
+        dst[k].im = r.re * r.re + r.im * r.im;
+    }
+}
+#endif
+
+static int init_cqt(ShowCQTContext *s)
+{
+    const char *var_names[] = { "timeclamp", "tc", "frequency", "freq", "f", NULL };
+    AVExpr *expr = NULL;
+    int rate = s->ctx->inputs[0]->sample_rate;
+    int nb_cqt_coeffs = 0, nb_cqt_coeffs_r = 0;
+    int k, x, ret;
+
+    if ((ret = av_expr_parse(&expr, s->tlength, var_names, NULL, NULL, NULL, NULL, 0, s->ctx)) < 0)
+        goto error;
+
+    ret = AVERROR(ENOMEM);
+    if (!(s->coeffs = av_calloc(s->cqt_len * 2, sizeof(*s->coeffs))))
+        goto error;
+
+    for (k = 0; k < s->cqt_len; k++) {
+        double vars[] = { s->timeclamp, s->timeclamp, s->freq[k], s->freq[k], s->freq[k] };
+        double flen, center, tlength;
+        int start, end, m = (s->cqt_coeffs_type == COEFFS_TYPE_INTERLEAVE) ? (2 * k) : k;
+
+        if (s->freq[k] > 0.5 * rate)
+            continue;
+        tlength = clip_with_log(s->ctx, "tlength", av_expr_eval(expr, vars, NULL),
+                                TLENGTH_MIN, s->timeclamp, s->timeclamp, k);
+
+        flen = 8.0 * s->fft_len / (tlength * rate);
+        center = s->freq[k] * s->fft_len / rate;
+        start = FFMAX(0, ceil(center - 0.5 * flen));
+        end = FFMIN(s->fft_len, floor(center + 0.5 * flen));
+
+        s->coeffs[m].start = start & ~(s->cqt_align - 1);
+        s->coeffs[m].len = (end | (s->cqt_align - 1)) + 1 - s->coeffs[m].start;
+        nb_cqt_coeffs += s->coeffs[m].len;
+        if (!(s->coeffs[m].val = av_calloc(s->coeffs[m].len, sizeof(*s->coeffs[m].val))))
+            goto error;
+
+        if (s->cqt_coeffs_type == COEFFS_TYPE_INTERLEAVE) {
+            s->coeffs[m+1].start = (s->fft_len - end) & ~(s->cqt_align - 1);
+            s->coeffs[m+1].len = ((s->fft_len - start) | (s->cqt_align - 1)) + 1 - s->coeffs[m+1].start;
+            nb_cqt_coeffs_r += s->coeffs[m+1].len;
+            if (!(s->coeffs[m+1].val = av_calloc(s->coeffs[m+1].len, sizeof(*s->coeffs[m+1].val))))
+                goto error;
+        }
+
+        for (x = start; x <= end; x++) {
+            int sign = (x & 1) ? (-1) : 1;
+            double y = 2.0 * M_PI * (x - center) * (1.0 / flen);
+            /* nuttall window */
+            double w = 0.355768 + 0.487396 * cos(y) + 0.144232 * cos(2*y) + 0.012604 * cos(3*y);
+            w *= sign * (1.0 / s->fft_len);
+            s->coeffs[m].val[x - s->coeffs[m].start] = w;
+            if (s->cqt_coeffs_type == COEFFS_TYPE_INTERLEAVE)
+                s->coeffs[m+1].val[(s->fft_len - x) - s->coeffs[m+1].start] = w;
+        }
+    }
+
+    av_expr_free(expr);
+    if (s->cqt_coeffs_type == COEFFS_TYPE_DEFAULT)
+        av_log(s->ctx, AV_LOG_INFO, "nb_cqt_coeffs = %d.\n", nb_cqt_coeffs);
+    else
+        av_log(s->ctx, AV_LOG_INFO, "nb_cqt_coeffs = {%d,%d}.\n", nb_cqt_coeffs, nb_cqt_coeffs_r);
+    return 0;
+
+error:
+    av_expr_free(expr);
+    if (s->coeffs)
+        for (k = 0; k < s->cqt_len * 2; k++)
+            av_freep(&s->coeffs[k].val);
+    av_freep(&s->coeffs);
+    return ret;
+}
+
+static AVFrame *alloc_frame_empty(enum AVPixelFormat format, int w, int h)
+{
+    AVFrame *out;
+    out = av_frame_alloc();
+    if (!out)
+        return NULL;
+    out->format = format;
+    out->width = w;
+    out->height = h;
+    if (av_frame_get_buffer(out, 32) < 0) {
+        av_frame_free(&out);
+        return NULL;
+    }
+    if (format == AV_PIX_FMT_RGB24 || format == AV_PIX_FMT_RGBA) {
+        memset(out->data[0], 0, out->linesize[0] * h);
+    } else {
+        int hh = (format == AV_PIX_FMT_YUV420P || format == AV_PIX_FMT_YUVA420P) ? h / 2 : h;
+        memset(out->data[0], 16, out->linesize[0] * h);
+        memset(out->data[1], 128, out->linesize[1] * hh);
+        memset(out->data[2], 128, out->linesize[2] * hh);
+        if (out->data[3])
+            memset(out->data[3], 0, out->linesize[3] * h);
+    }
+    return out;
+}
+
+static enum AVPixelFormat convert_axis_pixel_format(enum AVPixelFormat format)
+{
+    switch (format) {
+        case AV_PIX_FMT_RGB24:   format = AV_PIX_FMT_RGBA; break;
+        case AV_PIX_FMT_YUV444P: format = AV_PIX_FMT_YUVA444P; break;
+        case AV_PIX_FMT_YUV422P: format = AV_PIX_FMT_YUVA422P; break;
+        case AV_PIX_FMT_YUV420P: format = AV_PIX_FMT_YUVA420P; break;
+    }
+    return format;
+}
 
-    formats = ff_make_format_list(samplerates);
-    if (!formats)
+static int init_axis_empty(ShowCQTContext *s)
+{
+    if (!(s->axis_frame = alloc_frame_empty(convert_axis_pixel_format(s->format), s->width, s->axis_h)))
         return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &inlink->out_samplerates);
+    return 0;
+}
 
-    /* set output video format */
-    formats = ff_make_format_list(pix_fmts);
-    if (!formats)
+static int init_axis_from_file(ShowCQTContext *s)
+{
+    uint8_t *tmp_data[4] = { NULL };
+    int tmp_linesize[4];
+    enum AVPixelFormat tmp_format;
+    int tmp_w, tmp_h, ret;
+
+    if ((ret = ff_load_image(tmp_data, tmp_linesize, &tmp_w, &tmp_h, &tmp_format,
+                             s->axisfile, s->ctx)) < 0)
+        goto error;
+
+    ret = AVERROR(ENOMEM);
+    if (!(s->axis_frame = av_frame_alloc()))
+        goto error;
+
+    if ((ret = ff_scale_image(s->axis_frame->data, s->axis_frame->linesize, s->width, s->axis_h,
+                              convert_axis_pixel_format(s->format), tmp_data, tmp_linesize, tmp_w, tmp_h,
+                              tmp_format, s->ctx)) < 0)
+        goto error;
+
+    s->axis_frame->width = s->width;
+    s->axis_frame->height = s->axis_h;
+    s->axis_frame->format = convert_axis_pixel_format(s->format);
+    av_freep(tmp_data);
+    return 0;
+
+error:
+    av_frame_free(&s->axis_frame);
+    av_freep(tmp_data);
+    return ret;
+}
+
+static double midi(void *p, double f)
+{
+    return log2(f/440.0) * 12.0 + 69.0;
+}
+
+static double r_func(void *p, double x)
+{
+    x = av_clipd(x, 0.0, 1.0);
+    return (int)(x*255.0+0.5) << 16;
+}
+
+static double g_func(void *p, double x)
+{
+    x = av_clipd(x, 0.0, 1.0);
+    return (int)(x*255.0+0.5) << 8;
+}
+
+static double b_func(void *p, double x)
+{
+    x = av_clipd(x, 0.0, 1.0);
+    return (int)(x*255.0+0.5);
+}
+
+static int init_axis_color(ShowCQTContext *s, AVFrame *tmp)
+{
+    const char *var_names[] = { "timeclamp", "tc", "frequency", "freq", "f", NULL };
+    const char *func_names[] = { "midi", "r", "g", "b", NULL };
+    double (*funcs[])(void *, double) = { midi, r_func, g_func, b_func };
+    AVExpr *expr = NULL;
+    double *freq = NULL;
+    int x, y, ret;
+
+    if (s->basefreq != (double) BASEFREQ || s->endfreq != (double) ENDFREQ) {
+        av_log(s->ctx, AV_LOG_WARNING, "font axis rendering is not implemented in non-default frequency range,"
+               " please use axisfile option instead.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (s->cqt_len == 1920)
+        freq = s->freq;
+    else if (!(freq = create_freq_table(s->basefreq, s->endfreq, 1920)))
         return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &outlink->in_formats);
 
+    if ((ret = av_expr_parse(&expr, s->fontcolor, var_names, func_names, funcs, NULL, NULL, 0, s->ctx)) < 0) {
+        if (freq != s->freq)
+            av_freep(&freq);
+        return ret;
+    }
+
+    for (x = 0; x < 1920; x++) {
+        double vars[] = { s->timeclamp, s->timeclamp, freq[x], freq[x], freq[x] };
+        int color = (int) av_expr_eval(expr, vars, NULL);
+        uint8_t r = (color >> 16) & 0xFF, g = (color >> 8) & 0xFF, b = color & 0xFF;
+        uint8_t *data = tmp->data[0];
+        int linesize = tmp->linesize[0];
+        for (y = 0; y < 32; y++) {
+            data[linesize * y + 4 * x] = r;
+            data[linesize * y + 4 * x + 1] = g;
+            data[linesize * y + 4 * x + 2] = b;
+            data[linesize * y + 4 * x + 3] = 0;
+        }
+    }
+
+    av_expr_free(expr);
+    if (freq != s->freq)
+        av_freep(&freq);
     return 0;
 }
 
-#if CONFIG_LIBFREETYPE
-static void load_freetype_font(AVFilterContext *ctx)
+static int render_freetype(ShowCQTContext *s, AVFrame *tmp)
 {
-    static const char str[] = "EF G A BC D ";
-    ShowCQTContext *s = ctx->priv;
+#if CONFIG_LIBFREETYPE
+    const char *str = "EF G A BC D ";
+    uint8_t *data = tmp->data[0];
+    int linesize = tmp->linesize[0];
     FT_Library lib = NULL;
     FT_Face face = NULL;
-    int video_scale = s->fullhd ? 2 : 1;
-    int video_width = (VIDEO_WIDTH/2) * video_scale;
-    int font_height = (FONT_HEIGHT/2) * video_scale;
-    int font_width = 8 * video_scale;
+    int font_width = 16, font_height = 32;
     int font_repeat = font_width * 12;
     int linear_hori_advance = font_width * 65536;
     int non_monospace_warning = 0;
     int x;
 
-    s->font_alpha = NULL;
-
     if (!s->fontfile)
-        return;
+        return AVERROR(EINVAL);
 
     if (FT_Init_FreeType(&lib))
         goto fail;
@@ -205,12 +535,6 @@ static void load_freetype_font(AVFilterContext *ctx)
     if (FT_Set_Char_Size(face, 16*64 * linear_hori_advance / face->glyph->linearHoriAdvance, 0, 0, 0))
         goto fail;
 
-    s->font_alpha = av_malloc_array(font_height, video_width);
-    if (!s->font_alpha)
-        goto fail;
-
-    memset(s->font_alpha, 0, font_height * video_width);
-
     for (x = 0; x < 12; x++) {
         int sx, sy, rx, bx, by, dx, dy;
 
@@ -221,11 +545,11 @@ static void load_freetype_font(AVFilterContext *ctx)
             goto fail;
 
         if (face->glyph->advance.x != font_width*64 && !non_monospace_warning) {
-            av_log(ctx, AV_LOG_WARNING, "Font is not monospace\n");
+            av_log(s->ctx, AV_LOG_WARNING, "font is not monospace.\n");
             non_monospace_warning = 1;
         }
 
-        sy = font_height - 4*video_scale - face->glyph->bitmap_top;
+        sy = font_height - 8 - face->glyph->bitmap_top;
         for (rx = 0; rx < 10; rx++) {
             sx = rx * font_repeat + x * font_width + face->glyph->bitmap_left;
             for (by = 0; by < face->glyph->bitmap.rows; by++) {
@@ -239,9 +563,9 @@ static void load_freetype_font(AVFilterContext *ctx)
                     dx = bx + sx;
                     if (dx < 0)
                         continue;
-                    if (dx >= video_width)
+                    if (dx >= 1920)
                         break;
-                    s->font_alpha[dy*video_width+dx] = face->glyph->bitmap.buffer[by*face->glyph->bitmap.width+bx];
+                    data[dy*linesize+4*dx+3] = face->glyph->bitmap.buffer[by*face->glyph->bitmap.width+bx];
                 }
             }
         }
@@ -249,507 +573,679 @@ static void load_freetype_font(AVFilterContext *ctx)
 
     FT_Done_Face(face);
     FT_Done_FreeType(lib);
-    return;
+    return 0;
 
-    fail:
-    av_log(ctx, AV_LOG_WARNING, "Error while loading freetype font, using default font instead\n");
+fail:
+    av_log(s->ctx, AV_LOG_WARNING, "error while loading freetype font, using default font instead.\n");
     FT_Done_Face(face);
     FT_Done_FreeType(lib);
-    av_freep(&s->font_alpha);
-    return;
-}
+    return AVERROR(EINVAL);
+#else
+    if (s->fontfile)
+        av_log(s->ctx, AV_LOG_WARNING, "freetype is not available, ignoring fontfile option.\n");
+    return AVERROR(EINVAL);
 #endif
+}
 
-static double a_weighting(void *p, double f)
+static int render_default_font(AVFrame *tmp)
 {
-    double ret = 12200.0*12200.0 * (f*f*f*f);
-    ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) *
-           sqrt((f*f + 107.7*107.7) * (f*f + 737.9*737.9));
-    return ret;
+    const char *str = "EF G A BC D ";
+    int x, u, v, mask;
+    uint8_t *data = tmp->data[0];
+    int linesize = tmp->linesize[0];
+
+    for (x = 0; x < 1920; x += 192) {
+        uint8_t *startptr = data + 4 * x;
+        for (u = 0; u < 12; u++) {
+            for (v = 0; v < 16; v++) {
+                uint8_t *p = startptr + 2 * v * linesize + 16 * 4 * u;
+                for (mask = 0x80; mask; mask >>= 1, p += 8) {
+                    if (mask & avpriv_vga16_font[str[u] * 16 + v]) {
+                        p[3] = 255;
+                        p[7] = 255;
+                        p[linesize+3] = 255;
+                        p[linesize+7] = 255;
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
 }
 
-static double b_weighting(void *p, double f)
+static int init_axis_from_font(ShowCQTContext *s)
 {
-    double ret = 12200.0*12200.0 * (f*f*f);
-    ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) * sqrt(f*f + 158.5*158.5);
+    AVFrame *tmp = NULL;
+    int ret = AVERROR(ENOMEM);
+
+    if (!(tmp = alloc_frame_empty(AV_PIX_FMT_RGBA, 1920, 32)))
+        goto fail;
+
+    if (!(s->axis_frame = av_frame_alloc()))
+        goto fail;
+
+    if ((ret = init_axis_color(s, tmp)) < 0)
+        goto fail;
+
+    if (render_freetype(s, tmp) < 0 && (ret = render_default_font(tmp)) < 0)
+        goto fail;
+
+    if ((ret = ff_scale_image(s->axis_frame->data, s->axis_frame->linesize, s->width, s->axis_h,
+                              convert_axis_pixel_format(s->format), tmp->data, tmp->linesize,
+                              1920, 32, AV_PIX_FMT_RGBA, s->ctx)) < 0)
+        goto fail;
+
+    av_frame_free(&tmp);
+    s->axis_frame->width = s->width;
+    s->axis_frame->height = s->axis_h;
+    s->axis_frame->format = convert_axis_pixel_format(s->format);
+    return 0;
+
+fail:
+    av_frame_free(&tmp);
+    av_frame_free(&s->axis_frame);
     return ret;
 }
 
-static double c_weighting(void *p, double f)
+static float calculate_gamma(float v, float g)
 {
-    double ret = 12200.0*12200.0 * (f*f);
-    ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0);
-    return ret;
+    if (g == 1.0f)
+        return v;
+    if (g == 2.0f)
+        return sqrtf(v);
+    if (g == 3.0f)
+        return cbrtf(v);
+    if (g == 4.0f)
+        return sqrtf(sqrtf(v));
+    return expf(logf(v) / g);
 }
 
-static double midi(void *p, double f)
+static void rgb_from_cqt(ColorFloat *c, const FFTComplex *v, float g, int len)
 {
-    return log2(f/440.0) * 12.0 + 69.0;
+    int x;
+    for (x = 0; x < len; x++) {
+        c[x].rgb.r = 255.0f * calculate_gamma(FFMIN(1.0f, v[x].re), g);
+        c[x].rgb.g = 255.0f * calculate_gamma(FFMIN(1.0f, 0.5f * (v[x].re + v[x].im)), g);
+        c[x].rgb.b = 255.0f * calculate_gamma(FFMIN(1.0f, v[x].im), g);
+    }
 }
 
-static double r_func(void *p, double x)
+static void yuv_from_cqt(ColorFloat *c, const FFTComplex *v, float gamma, int len)
 {
-    x = av_clipd(x, 0.0, 1.0);
-    return (int)(x*255.0+0.5) << 16;
+    int x;
+    for (x = 0; x < len; x++) {
+        float r, g, b;
+        r = calculate_gamma(FFMIN(1.0f, v[x].re), gamma);
+        g = calculate_gamma(FFMIN(1.0f, 0.5f * (v[x].re + v[x].im)), gamma);
+        b = calculate_gamma(FFMIN(1.0f, v[x].im), gamma);
+        c[x].yuv.y = 65.481f * r + 128.553f * g + 24.966f * b;
+        c[x].yuv.u = -37.797f * r - 74.203f * g + 112.0f * b;
+        c[x].yuv.v = 112.0f * r - 93.786f * g - 18.214 * b;
+    }
 }
 
-static double g_func(void *p, double x)
+static void draw_bar_rgb(AVFrame *out, const float *h, const float *rcp_h,
+                         const ColorFloat *c, int bar_h)
 {
-    x = av_clipd(x, 0.0, 1.0);
-    return (int)(x*255.0+0.5) << 8;
+    int x, y, w = out->width;
+    float mul, ht, rcp_bar_h = 1.0f / bar_h;
+    uint8_t *v = out->data[0], *lp;
+    int ls = out->linesize[0];
+
+    for (y = 0; y < bar_h; y++) {
+        ht = (bar_h - y) * rcp_bar_h;
+        lp = v + y * ls;
+        for (x = 0; x < w; x++) {
+            if (h[x] <= ht) {
+                *lp++ = 0;
+                *lp++ = 0;
+                *lp++ = 0;
+            } else {
+                mul = (h[x] - ht) * rcp_h[x];
+                *lp++ = mul * c[x].rgb.r + 0.5f;
+                *lp++ = mul * c[x].rgb.g + 0.5f;
+                *lp++ = mul * c[x].rgb.b + 0.5f;
+            }
+        }
+    }
 }
 
-static double b_func(void *p, double x)
+static void draw_bar_yuv(AVFrame *out, const float *h, const float *rcp_h,
+                         const ColorFloat *c, int bar_h)
 {
-    x = av_clipd(x, 0.0, 1.0);
-    return (int)(x*255.0+0.5);
+    int x, y, yh, w = out->width;
+    float mul, ht, rcp_bar_h = 1.0f / bar_h;
+    uint8_t *vy = out->data[0], *vu = out->data[1], *vv = out->data[2];
+    uint8_t *lpy, *lpu, *lpv;
+    int lsy = out->linesize[0], lsu = out->linesize[1], lsv = out->linesize[2];
+    int fmt = out->format;
+
+    for (y = 0; y < bar_h; y += 2) {
+        yh = (fmt == AV_PIX_FMT_YUV420P) ? y / 2 : y;
+        ht = (bar_h - y) * rcp_bar_h;
+        lpy = vy + y * lsy;
+        lpu = vu + yh * lsu;
+        lpv = vv + yh * lsv;
+        for (x = 0; x < w; x += 2) {
+            if (h[x] <= ht) {
+                *lpy++ = 16;
+                *lpu++ = 128;
+                *lpv++ = 128;
+            } else {
+                mul = (h[x] - ht) * rcp_h[x];
+                *lpy++ = mul * c[x].yuv.y + 16.5f;
+                *lpu++ = mul * c[x].yuv.u + 128.5f;
+                *lpv++ = mul * c[x].yuv.v + 128.5f;
+            }
+            /* u and v are skipped on yuv422p and yuv420p */
+            if (fmt == AV_PIX_FMT_YUV444P) {
+                if (h[x+1] <= ht) {
+                    *lpy++ = 16;
+                    *lpu++ = 128;
+                    *lpv++ = 128;
+                } else {
+                    mul = (h[x+1] - ht) * rcp_h[x+1];
+                    *lpy++ = mul * c[x+1].yuv.y + 16.5f;
+                    *lpu++ = mul * c[x+1].yuv.u + 128.5f;
+                    *lpv++ = mul * c[x+1].yuv.v + 128.5f;
+                }
+            } else {
+                if (h[x+1] <= ht) {
+                    *lpy++ = 16;
+                } else {
+                    mul = (h[x+1] - ht) * rcp_h[x+1];
+                    *lpy++ = mul * c[x+1].yuv.y + 16.5f;
+                }
+            }
+        }
+
+        ht = (bar_h - (y+1)) * rcp_bar_h;
+        lpy = vy + (y+1) * lsy;
+        lpu = vu + (y+1) * lsu;
+        lpv = vv + (y+1) * lsv;
+        for (x = 0; x < w; x += 2) {
+            /* u and v are skipped on yuv420p */
+            if (fmt != AV_PIX_FMT_YUV420P) {
+                if (h[x] <= ht) {
+                    *lpy++ = 16;
+                    *lpu++ = 128;
+                    *lpv++ = 128;
+                } else {
+                    mul = (h[x] - ht) * rcp_h[x];
+                    *lpy++ = mul * c[x].yuv.y + 16.5f;
+                    *lpu++ = mul * c[x].yuv.u + 128.5f;
+                    *lpv++ = mul * c[x].yuv.v + 128.5f;
+                }
+            } else {
+                if (h[x] <= ht) {
+                    *lpy++ = 16;
+                } else {
+                    mul = (h[x] - ht) * rcp_h[x];
+                    *lpy++ = mul * c[x].yuv.y + 16.5f;
+                }
+            }
+            /* u and v are skipped on yuv422p and yuv420p */
+            if (out->format == AV_PIX_FMT_YUV444P) {
+                if (h[x+1] <= ht) {
+                    *lpy++ = 16;
+                    *lpu++ = 128;
+                    *lpv++ = 128;
+                } else {
+                    mul = (h[x+1] - ht) * rcp_h[x+1];
+                    *lpy++ = mul * c[x+1].yuv.y + 16.5f;
+                    *lpu++ = mul * c[x+1].yuv.u + 128.5f;
+                    *lpv++ = mul * c[x+1].yuv.v + 128.5f;
+                }
+            } else {
+                if (h[x+1] <= ht) {
+                    *lpy++ = 16;
+                } else {
+                    mul = (h[x+1] - ht) * rcp_h[x+1];
+                    *lpy++ = mul * c[x+1].yuv.y + 16.5f;
+                }
+            }
+        }
+    }
 }
 
-static inline int qsort_sparsecoeff(const SparseCoeff *a, const SparseCoeff *b)
+static void draw_axis_rgb(AVFrame *out, AVFrame *axis, const ColorFloat *c, int off)
 {
-    if (fabsf(a->value) >= fabsf(b->value))
-        return 1;
-    else
-        return -1;
+    int x, y, w = axis->width, h = axis->height;
+    float a, rcp_255 = 1.0f / 255.0f;
+    uint8_t *lp, *lpa;
+
+    for (y = 0; y < h; y++) {
+        lp = out->data[0] + (off + y) * out->linesize[0];
+        lpa = axis->data[0] + y * axis->linesize[0];
+        for (x = 0; x < w; x++) {
+            a = rcp_255 * lpa[3];
+            *lp++ = a * lpa[0] + (1.0f - a) * c[x].rgb.r + 0.5f;
+            *lp++ = a * lpa[1] + (1.0f - a) * c[x].rgb.g + 0.5f;
+            *lp++ = a * lpa[2] + (1.0f - a) * c[x].rgb.b + 0.5f;
+            lpa += 4;
+        }
+    }
 }
 
-static int config_output(AVFilterLink *outlink)
+static void draw_axis_yuv(AVFrame *out, AVFrame *axis, const ColorFloat *c, int off)
 {
-    AVFilterContext *ctx = outlink->src;
-    AVFilterLink *inlink = ctx->inputs[0];
-    ShowCQTContext *s = ctx->priv;
-    AVExpr *tlength_expr = NULL, *volume_expr = NULL, *fontcolor_expr = NULL;
-    uint8_t *fontcolor_value = s->fontcolor_value;
-    static const char * const expr_vars[] = { "timeclamp", "tc", "frequency", "freq", "f", NULL };
-    static const char * const expr_func_names[] = { "a_weighting", "b_weighting", "c_weighting", NULL };
-    static const char * const expr_fontcolor_func_names[] = { "midi", "r", "g", "b", NULL };
-    static double (* const expr_funcs[])(void *, double) = { a_weighting, b_weighting, c_weighting, NULL };
-    static double (* const expr_fontcolor_funcs[])(void *, double) = { midi, r_func, g_func, b_func, NULL };
-    int fft_len, k, x, y, ret;
-    int num_coeffs = 0;
-    int rate = inlink->sample_rate;
-    double max_len = rate * (double) s->timeclamp;
-    int64_t start_time, end_time;
-    int video_scale = s->fullhd ? 2 : 1;
-    int video_width = (VIDEO_WIDTH/2) * video_scale;
-    int video_height = (VIDEO_HEIGHT/2) * video_scale;
-    int spectogram_height = (SPECTOGRAM_HEIGHT/2) * video_scale;
-
-    s->fft_bits = ceil(log2(max_len));
-    fft_len = 1 << s->fft_bits;
-
-    if (rate % (s->fps * s->count)) {
-        av_log(ctx, AV_LOG_ERROR, "Rate (%u) is not divisible by fps*count (%u*%u)\n", rate, s->fps, s->count);
-        return AVERROR(EINVAL);
+    int fmt = out->format, x, y, yh, w = axis->width, h = axis->height;
+    int offh = (fmt == AV_PIX_FMT_YUV420P) ? off / 2 : off;
+    float a, rcp_255 = 1.0f / 255.0f;
+    uint8_t *vy = out->data[0], *vu = out->data[1], *vv = out->data[2];
+    uint8_t *vay = axis->data[0], *vau = axis->data[1], *vav = axis->data[2], *vaa = axis->data[3];
+    int lsy = out->linesize[0], lsu = out->linesize[1], lsv = out->linesize[2];
+    int lsay = axis->linesize[0], lsau = axis->linesize[1], lsav = axis->linesize[2], lsaa = axis->linesize[3];
+    uint8_t *lpy, *lpu, *lpv, *lpay, *lpau, *lpav, *lpaa;
+
+    for (y = 0; y < h; y += 2) {
+        yh = (fmt == AV_PIX_FMT_YUV420P) ? y / 2 : y;
+        lpy = vy + (off + y) * lsy;
+        lpu = vu + (offh + yh) * lsu;
+        lpv = vv + (offh + yh) * lsv;
+        lpay = vay + y * lsay;
+        lpau = vau + yh * lsau;
+        lpav = vav + yh * lsav;
+        lpaa = vaa + y * lsaa;
+        for (x = 0; x < w; x += 2) {
+            a = rcp_255 * (*lpaa++);
+            *lpy++ = a * (*lpay++) + (1.0f - a) * (c[x].yuv.y + 16.0f) + 0.5f;
+            *lpu++ = a * (*lpau++) + (1.0f - a) * (c[x].yuv.u + 128.0f) + 0.5f;
+            *lpv++ = a * (*lpav++) + (1.0f - a) * (c[x].yuv.v + 128.0f) + 0.5f;
+            /* u and v are skipped on yuv422p and yuv420p */
+            a = rcp_255 * (*lpaa++);
+            *lpy++ = a * (*lpay++) + (1.0f - a) * (c[x+1].yuv.y + 16.0f) + 0.5f;
+            if (fmt == AV_PIX_FMT_YUV444P) {
+                *lpu++ = a * (*lpau++) + (1.0f - a) * (c[x+1].yuv.u + 128.0f) + 0.5f;
+                *lpv++ = a * (*lpav++) + (1.0f - a) * (c[x+1].yuv.v + 128.0f) + 0.5f;
+            }
+        }
+
+        lpy = vy + (off + y + 1) * lsy;
+        lpu = vu + (off + y + 1) * lsu;
+        lpv = vv + (off + y + 1) * lsv;
+        lpay = vay + (y + 1) * lsay;
+        lpau = vau + (y + 1) * lsau;
+        lpav = vav + (y + 1) * lsav;
+        lpaa = vaa + (y + 1) * lsaa;
+        for (x = 0; x < out->width; x += 2) {
+            /* u and v are skipped on yuv420p */
+            a = rcp_255 * (*lpaa++);
+            *lpy++ = a * (*lpay++) + (1.0f - a) * (c[x].yuv.y + 16.0f) + 0.5f;
+            if (fmt != AV_PIX_FMT_YUV420P) {
+                *lpu++ = a * (*lpau++) + (1.0f - a) * (c[x].yuv.u + 128.0f) + 0.5f;
+                *lpv++ = a * (*lpav++) + (1.0f - a) * (c[x].yuv.v + 128.0f) + 0.5f;
+            }
+            /* u and v are skipped on yuv422p and yuv420p */
+            a = rcp_255 * (*lpaa++);
+            *lpy++ = a * (*lpay++) + (1.0f - a) * (c[x+1].yuv.y + 16.0f) + 0.5f;
+            if (fmt == AV_PIX_FMT_YUV444P) {
+                *lpu++ = a * (*lpau++) + (1.0f - a) * (c[x+1].yuv.u + 128.0f) + 0.5f;
+                *lpv++ = a * (*lpav++) + (1.0f - a) * (c[x+1].yuv.v + 128.0f) + 0.5f;
+            }
+        }
     }
+}
 
-    s->fft_data         = av_malloc_array(fft_len, sizeof(*s->fft_data));
-    s->coeff_sort       = av_malloc_array(fft_len, sizeof(*s->coeff_sort));
-    s->fft_result_left  = av_malloc_array(fft_len, sizeof(*s->fft_result_left));
-    s->fft_result_right = av_malloc_array(fft_len, sizeof(*s->fft_result_right));
-    s->fft_context      = av_fft_init(s->fft_bits, 0);
+static void draw_sono(AVFrame *out, AVFrame *sono, int off, int idx)
+{
+    int fmt = out->format, h = sono->height;
+    int nb_planes = (fmt == AV_PIX_FMT_RGB24) ? 1 : 3;
+    int offh = (fmt == AV_PIX_FMT_YUV420P) ? off / 2 : off;
+    int inc = (fmt == AV_PIX_FMT_YUV420P) ? 2 : 1;
+    int ls, i, y, yh;
+
+    ls = FFMIN(out->linesize[0], sono->linesize[0]);
+    for (y = 0; y < h; y++) {
+        memcpy(out->data[0] + (off + y) * out->linesize[0],
+               sono->data[0] + (idx + y) % h * sono->linesize[0], ls);
+    }
 
-    if (!s->fft_data || !s->coeff_sort || !s->fft_result_left || !s->fft_result_right || !s->fft_context)
-        return AVERROR(ENOMEM);
+    for (i = 1; i < nb_planes; i++) {
+        ls = FFMIN(out->linesize[i], sono->linesize[i]);
+        for (y = 0; y < h; y += inc) {
+            yh = (fmt == AV_PIX_FMT_YUV420P) ? y / 2 : y;
+            memcpy(out->data[i] + (offh + yh) * out->linesize[i],
+                   sono->data[i] + (idx + y) % h * sono->linesize[i], ls);
+        }
+    }
+}
 
-#if CONFIG_LIBFREETYPE
-    load_freetype_font(ctx);
-#else
-    if (s->fontfile)
-        av_log(ctx, AV_LOG_WARNING, "Freetype is not available, ignoring fontfile option\n");
-    s->font_alpha = NULL;
-#endif
+static void update_sono_rgb(AVFrame *sono, const ColorFloat *c, int idx)
+{
+    int x, w = sono->width;
+    uint8_t *lp = sono->data[0] + idx * sono->linesize[0];
 
-    av_log(ctx, AV_LOG_INFO, "Calculating spectral kernel, please wait\n");
-    start_time = av_gettime_relative();
-    ret = av_expr_parse(&tlength_expr, s->tlength, expr_vars, NULL, NULL, NULL, NULL, 0, ctx);
-    if (ret < 0)
-        goto eval_error;
-
-    ret = av_expr_parse(&volume_expr, s->volume, expr_vars, expr_func_names,
-                        expr_funcs, NULL, NULL, 0, ctx);
-    if (ret < 0)
-        goto eval_error;
-
-    ret = av_expr_parse(&fontcolor_expr, s->fontcolor, expr_vars, expr_fontcolor_func_names,
-                        expr_fontcolor_funcs, NULL, NULL, 0, ctx);
-    if (ret < 0)
-        goto eval_error;
-
-    for (k = 0; k < VIDEO_WIDTH; k++) {
-        int hlen = fft_len >> 1;
-        float total = 0;
-        float partial = 0;
-        double freq = BASE_FREQ * exp2(k * (1.0/192.0));
-        double tlen, tlength, volume;
-        double expr_vars_val[] = { s->timeclamp, s->timeclamp, freq, freq, freq, 0 };
-        /* a window function from Albert H. Nuttall,
-         * "Some Windows with Very Good Sidelobe Behavior"
-         * -93.32 dB peak sidelobe and 18 dB/octave asymptotic decay
-         * coefficient normalized to a0 = 1 */
-        double a0 = 0.355768;
-        double a1 = 0.487396/a0;
-        double a2 = 0.144232/a0;
-        double a3 = 0.012604/a0;
-        double sv_step, cv_step, sv, cv;
-        double sw_step, cw_step, sw, cw, w;
-
-        tlength = av_expr_eval(tlength_expr, expr_vars_val, NULL);
-        if (isnan(tlength)) {
-            av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is nan, setting it to %g\n", freq, s->timeclamp);
-            tlength = s->timeclamp;
-        } else if (tlength < TLENGTH_MIN) {
-            av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is %g, setting it to %g\n", freq, tlength, TLENGTH_MIN);
-            tlength = TLENGTH_MIN;
-        } else if (tlength > s->timeclamp) {
-            av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is %g, setting it to %g\n", freq, tlength, s->timeclamp);
-            tlength = s->timeclamp;
-        }
+    for (x = 0; x < w; x++) {
+        *lp++ = c[x].rgb.r + 0.5f;
+        *lp++ = c[x].rgb.g + 0.5f;
+        *lp++ = c[x].rgb.b + 0.5f;
+    }
+}
 
-        volume = FFABS(av_expr_eval(volume_expr, expr_vars_val, NULL));
-        if (isnan(volume)) {
-            av_log(ctx, AV_LOG_WARNING, "at freq %g: volume is nan, setting it to 0\n", freq);
-            volume = VOLUME_MIN;
-        } else if (volume < VOLUME_MIN) {
-            volume = VOLUME_MIN;
-        } else if (volume > VOLUME_MAX) {
-            av_log(ctx, AV_LOG_WARNING, "at freq %g: volume is %g, setting it to %g\n", freq, volume, VOLUME_MAX);
-            volume = VOLUME_MAX;
+static void update_sono_yuv(AVFrame *sono, const ColorFloat *c, int idx)
+{
+    int x, fmt = sono->format, w = sono->width;
+    uint8_t *lpy = sono->data[0] + idx * sono->linesize[0];
+    uint8_t *lpu = sono->data[1] + idx * sono->linesize[1];
+    uint8_t *lpv = sono->data[2] + idx * sono->linesize[2];
+
+    for (x = 0; x < w; x += 2) {
+        *lpy++ = c[x].yuv.y + 16.5f;
+        *lpu++ = c[x].yuv.u + 128.5f;
+        *lpv++ = c[x].yuv.v + 128.5f;
+        *lpy++ = c[x+1].yuv.y + 16.5f;
+        if (fmt == AV_PIX_FMT_YUV444P) {
+            *lpu++ = c[x+1].yuv.u + 128.5f;
+            *lpv++ = c[x+1].yuv.v + 128.5f;
         }
+    }
+}
 
-        if (s->fullhd || !(k & 1)) {
-            int fontcolor = av_expr_eval(fontcolor_expr, expr_vars_val, NULL);
-            fontcolor_value[0] = (fontcolor >> 16) & 0xFF;
-            fontcolor_value[1] = (fontcolor >> 8) & 0xFF;
-            fontcolor_value[2] = fontcolor & 0xFF;
-            fontcolor_value += 3;
+static void process_cqt(ShowCQTContext *s)
+{
+    int x, i;
+    if (!s->sono_count) {
+        for (x = 0; x < s->cqt_len; x++) {
+            s->h_buf[x] = s->bar_v_buf[x] * 0.5f * (s->cqt_result[x].re + s->cqt_result[x].im);
         }
-
-        tlen = tlength * rate;
-        s->fft_data[0].re = 0;
-        s->fft_data[0].im = 0;
-        s->fft_data[hlen].re = (1.0 + a1 + a2 + a3) * (1.0/tlen) * volume * (1.0/fft_len);
-        s->fft_data[hlen].im = 0;
-        sv_step = sv = sin(2.0*M_PI*freq*(1.0/rate));
-        cv_step = cv = cos(2.0*M_PI*freq*(1.0/rate));
-        /* also optimizing window func */
-        sw_step = sw = sin(2.0*M_PI*(1.0/tlen));
-        cw_step = cw = cos(2.0*M_PI*(1.0/tlen));
-        for (x = 1; x < 0.5 * tlen; x++) {
-            double cv_tmp, cw_tmp;
-            double cw2, cw3, sw2;
-
-            cw2 = cw * cw - sw * sw;
-            sw2 = cw * sw + sw * cw;
-            cw3 = cw * cw2 - sw * sw2;
-            w = (1.0 + a1 * cw + a2 * cw2 + a3 * cw3) * (1.0/tlen) * volume * (1.0/fft_len);
-            s->fft_data[hlen + x].re = w * cv;
-            s->fft_data[hlen + x].im = w * sv;
-            s->fft_data[hlen - x].re = s->fft_data[hlen + x].re;
-            s->fft_data[hlen - x].im = -s->fft_data[hlen + x].im;
-
-            cv_tmp = cv * cv_step - sv * sv_step;
-            sv = sv * cv_step + cv * sv_step;
-            cv = cv_tmp;
-            cw_tmp = cw * cw_step - sw * sw_step;
-            sw = sw * cw_step + cw * sw_step;
-            cw = cw_tmp;
+        if (s->fcount > 1) {
+            float rcp_fcount = 1.0f / s->fcount;
+            for (x = 0; x < s->width; x++) {
+                float h = 0.0f;
+                for (i = 0; i < s->fcount; i++)
+                    h += s->h_buf[s->fcount * x + i];
+                s->h_buf[x] = rcp_fcount * h;
+            }
         }
-        for (; x < hlen; x++) {
-            s->fft_data[hlen + x].re = 0;
-            s->fft_data[hlen + x].im = 0;
-            s->fft_data[hlen - x].re = 0;
-            s->fft_data[hlen - x].im = 0;
+        for (x = 0; x < s->width; x++) {
+            s->h_buf[x] = calculate_gamma(s->h_buf[x], s->bar_g);
+            s->rcp_h_buf[x] = 1.0f / (s->h_buf[x] + 0.0001f);
         }
-        av_fft_permute(s->fft_context, s->fft_data);
-        av_fft_calc(s->fft_context, s->fft_data);
+    }
 
-        for (x = 0; x < fft_len; x++) {
-            s->coeff_sort[x].index = x;
-            s->coeff_sort[x].value = s->fft_data[x].re;
-        }
+    for (x = 0; x < s->cqt_len; x++) {
+        s->cqt_result[x].re *= s->sono_v_buf[x];
+        s->cqt_result[x].im *= s->sono_v_buf[x];
+    }
 
-        AV_QSORT(s->coeff_sort, fft_len, SparseCoeff, qsort_sparsecoeff);
-        for (x = 0; x < fft_len; x++)
-            total += fabsf(s->coeff_sort[x].value);
-
-        for (x = 0; x < fft_len; x++) {
-            partial += fabsf(s->coeff_sort[x].value);
-            if (partial > total * s->coeffclamp * COEFF_CLAMP) {
-                s->coeffs_len[k] = fft_len - x;
-                num_coeffs += s->coeffs_len[k];
-                s->coeffs[k] = av_malloc_array(s->coeffs_len[k], sizeof(*s->coeffs[k]));
-                if (!s->coeffs[k]) {
-                    ret = AVERROR(ENOMEM);
-                    goto eval_error;
-                }
-                for (y = 0; y < s->coeffs_len[k]; y++)
-                    s->coeffs[k][y] = s->coeff_sort[x+y];
-                break;
+    if (s->fcount > 1) {
+        float rcp_fcount = 1.0f / s->fcount;
+        for (x = 0; x < s->width; x++) {
+            FFTComplex result = {0.0f, 0.0f};
+            for (i = 0; i < s->fcount; i++) {
+                result.re += s->cqt_result[s->fcount * x + i].re;
+                result.im += s->cqt_result[s->fcount * x + i].im;
             }
+            s->cqt_result[x].re = rcp_fcount * result.re;
+            s->cqt_result[x].im = rcp_fcount * result.im;
         }
     }
-    av_expr_free(fontcolor_expr);
-    av_expr_free(volume_expr);
-    av_expr_free(tlength_expr);
-    end_time = av_gettime_relative();
-    av_log(ctx, AV_LOG_INFO, "Elapsed time %.6f s (fft_len=%u, num_coeffs=%u)\n", 1e-6 * (end_time-start_time), fft_len, num_coeffs);
 
-    outlink->w = video_width;
-    outlink->h = video_height;
+    if (s->format == AV_PIX_FMT_RGB24)
+        rgb_from_cqt(s->c_buf, s->cqt_result, s->sono_g, s->width);
+    else
+        yuv_from_cqt(s->c_buf, s->cqt_result, s->sono_g, s->width);
+}
 
-    s->req_fullfilled = 0;
-    s->spectogram_index = 0;
-    s->frame_count = 0;
-    s->spectogram_count = 0;
-    s->remaining_fill = fft_len >> 1;
-    memset(s->fft_data, 0, fft_len * sizeof(*s->fft_data));
+static int plot_cqt(AVFilterContext *ctx)
+{
+    AVFilterLink *outlink = ctx->outputs[0];
+    ShowCQTContext *s = ctx->priv;
+    int ret = 0;
+
+    memcpy(s->fft_result, s->fft_data, s->fft_len * sizeof(*s->fft_data));
+    av_fft_permute(s->fft_ctx, s->fft_result);
+    av_fft_calc(s->fft_ctx, s->fft_result);
+    s->fft_result[s->fft_len] = s->fft_result[0];
+    s->cqt_calc(s->cqt_result, s->fft_result, s->coeffs, s->cqt_len, s->fft_len);
+    process_cqt(s);
+    if (s->sono_h)
+        s->update_sono(s->sono_frame, s->c_buf, s->sono_idx);
+    if (!s->sono_count) {
+        AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out)
+            return AVERROR(ENOMEM);
+        if (s->bar_h)
+            s->draw_bar(out, s->h_buf, s->rcp_h_buf, s->c_buf, s->bar_h);
+        if (s->axis_h)
+            s->draw_axis(out, s->axis_frame, s->c_buf, s->bar_h);
+        if (s->sono_h)
+            s->draw_sono(out, s->sono_frame, s->bar_h + s->axis_h, s->sono_idx);
+        out->pts = s->frame_count;
+        ret = ff_filter_frame(outlink, out);
+        s->frame_count++;
+    }
+    s->sono_count = (s->sono_count + 1) % s->count;
+    if (s->sono_h)
+        s->sono_idx = (s->sono_idx + s->sono_h - 1) % s->sono_h;
+    return ret;
+}
 
-    s->outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h);
-    if (!s->outpicref)
-        return AVERROR(ENOMEM);
+/* main filter control */
+static av_cold int init(AVFilterContext *ctx)
+{
+    ShowCQTContext *s = ctx->priv;
+    s->ctx = ctx;
 
-    s->spectogram = av_calloc(spectogram_height, s->outpicref->linesize[0]);
-    if (!s->spectogram)
-        return AVERROR(ENOMEM);
+    if (!s->fullhd) {
+        av_log(ctx, AV_LOG_WARNING, "fullhd option is deprecated, use size/s option instead.\n");
+        if (s->width != 1920 || s->height != 1080) {
+            av_log(ctx, AV_LOG_ERROR, "fullhd set to 0 but with custom dimension.\n");
+            return AVERROR(EINVAL);
+        }
+        s->width /= 2;
+        s->height /= 2;
+        s->fullhd = 1;
+    }
+
+    if (s->axis_h < 0) {
+        s->axis_h = s->width / 60;
+        if (s->axis_h & 1)
+            s->axis_h++;
+        if (s->bar_h >= 0 && s->sono_h >= 0)
+            s->axis_h = s->height - s->bar_h - s->sono_h;
+        if (s->bar_h >= 0 && s->sono_h < 0)
+            s->axis_h = FFMIN(s->axis_h, s->height - s->bar_h);
+        if (s->bar_h < 0 && s->sono_h >= 0)
+            s->axis_h = FFMIN(s->axis_h, s->height - s->sono_h);
+    }
+
+    if (s->bar_h < 0) {
+        s->bar_h = (s->height - s->axis_h) / 2;
+        if (s->bar_h & 1)
+            s->bar_h--;
+        if (s->sono_h >= 0)
+            s->bar_h = s->height - s->sono_h - s->axis_h;
+    }
+
+    if (s->sono_h < 0)
+        s->sono_h = s->height - s->axis_h - s->bar_h;
+
+    if ((s->width & 1) || (s->height & 1) || (s->bar_h & 1) || (s->axis_h & 1) || (s->sono_h & 1) ||
+        (s->bar_h < 0) || (s->axis_h < 0) || (s->sono_h < 0) || (s->bar_h > s->height) ||
+        (s->axis_h > s->height) || (s->sono_h > s->height) || (s->bar_h + s->axis_h + s->sono_h != s->height)) {
+        av_log(ctx, AV_LOG_ERROR, "invalid dimension.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!s->fcount) {
+        do {
+            s->fcount++;
+        } while(s->fcount * s->width < 1920 && s->fcount < 10);
+    }
 
-    outlink->sample_aspect_ratio = av_make_q(1, 1);
-    outlink->time_base = av_make_q(1, s->fps);
-    outlink->frame_rate = av_make_q(s->fps, 1);
     return 0;
+}
 
-eval_error:
-    av_expr_free(fontcolor_expr);
-    av_expr_free(volume_expr);
-    av_expr_free(tlength_expr);
-    return ret;
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    common_uninit(ctx->priv);
 }
 
-static int plot_cqt(AVFilterLink *inlink)
+static int query_formats(AVFilterContext *ctx)
 {
-    AVFilterContext *ctx = inlink->dst;
-    ShowCQTContext *s = ctx->priv;
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    AVFilterLink *inlink = ctx->inputs[0];
     AVFilterLink *outlink = ctx->outputs[0];
-    int fft_len = 1 << s->fft_bits;
-    FFTSample result[VIDEO_WIDTH][4];
-    int x, y, ret = 0;
-    int linesize = s->outpicref->linesize[0];
-    int video_scale = s->fullhd ? 2 : 1;
-    int video_width = (VIDEO_WIDTH/2) * video_scale;
-    int spectogram_height = (SPECTOGRAM_HEIGHT/2) * video_scale;
-    int spectogram_start = (SPECTOGRAM_START/2) * video_scale;
-    int font_height = (FONT_HEIGHT/2) * video_scale;
-
-    /* real part contains left samples, imaginary part contains right samples */
-    memcpy(s->fft_result_left, s->fft_data, fft_len * sizeof(*s->fft_data));
-    av_fft_permute(s->fft_context, s->fft_result_left);
-    av_fft_calc(s->fft_context, s->fft_result_left);
-
-    /* separate left and right, (and multiply by 2.0) */
-    s->fft_result_right[0].re = 2.0f * s->fft_result_left[0].im;
-    s->fft_result_right[0].im = 0;
-    s->fft_result_left[0].re = 2.0f * s->fft_result_left[0].re;
-    s->fft_result_left[0].im = 0;
-    for (x = 1; x <= fft_len >> 1; x++) {
-        FFTSample tmpy = s->fft_result_left[fft_len-x].im - s->fft_result_left[x].im;
-
-        s->fft_result_right[x].re = s->fft_result_left[x].im + s->fft_result_left[fft_len-x].im;
-        s->fft_result_right[x].im = s->fft_result_left[x].re - s->fft_result_left[fft_len-x].re;
-        s->fft_result_right[fft_len-x].re = s->fft_result_right[x].re;
-        s->fft_result_right[fft_len-x].im = -s->fft_result_right[x].im;
-
-        s->fft_result_left[x].re = s->fft_result_left[x].re + s->fft_result_left[fft_len-x].re;
-        s->fft_result_left[x].im = tmpy;
-        s->fft_result_left[fft_len-x].re = s->fft_result_left[x].re;
-        s->fft_result_left[fft_len-x].im = -s->fft_result_left[x].im;
-    }
-
-    /* calculating cqt */
-    for (x = 0; x < VIDEO_WIDTH; x++) {
-        int u;
-        FFTComplex l = {0,0};
-        FFTComplex r = {0,0};
-
-        for (u = 0; u < s->coeffs_len[x]; u++) {
-            FFTSample value = s->coeffs[x][u].value;
-            int index = s->coeffs[x][u].index;
-            l.re += value * s->fft_result_left[index].re;
-            l.im += value * s->fft_result_left[index].im;
-            r.re += value * s->fft_result_right[index].re;
-            r.im += value * s->fft_result_right[index].im;
-        }
-        /* result is power, not amplitude */
-        result[x][0] = l.re * l.re + l.im * l.im;
-        result[x][2] = r.re * r.re + r.im * r.im;
-        result[x][1] = 0.5f * (result[x][0] + result[x][2]);
-
-        if (s->gamma2 == 1.0f)
-            result[x][3] = result[x][1];
-        else if (s->gamma2 == 2.0f)
-            result[x][3] = sqrtf(result[x][1]);
-        else if (s->gamma2 == 3.0f)
-            result[x][3] = cbrtf(result[x][1]);
-        else if (s->gamma2 == 4.0f)
-            result[x][3] = sqrtf(sqrtf(result[x][1]));
-        else
-            result[x][3] = expf(logf(result[x][1]) * (1.0f / s->gamma2));
-
-        result[x][0] = FFMIN(1.0f, result[x][0]);
-        result[x][1] = FFMIN(1.0f, result[x][1]);
-        result[x][2] = FFMIN(1.0f, result[x][2]);
-        if (s->gamma == 1.0f) {
-            result[x][0] = 255.0f * result[x][0];
-            result[x][1] = 255.0f * result[x][1];
-            result[x][2] = 255.0f * result[x][2];
-        } else if (s->gamma == 2.0f) {
-            result[x][0] = 255.0f * sqrtf(result[x][0]);
-            result[x][1] = 255.0f * sqrtf(result[x][1]);
-            result[x][2] = 255.0f * sqrtf(result[x][2]);
-        } else if (s->gamma == 3.0f) {
-            result[x][0] = 255.0f * cbrtf(result[x][0]);
-            result[x][1] = 255.0f * cbrtf(result[x][1]);
-            result[x][2] = 255.0f * cbrtf(result[x][2]);
-        } else if (s->gamma == 4.0f) {
-            result[x][0] = 255.0f * sqrtf(sqrtf(result[x][0]));
-            result[x][1] = 255.0f * sqrtf(sqrtf(result[x][1]));
-            result[x][2] = 255.0f * sqrtf(sqrtf(result[x][2]));
-        } else {
-            result[x][0] = 255.0f * expf(logf(result[x][0]) * (1.0f / s->gamma));
-            result[x][1] = 255.0f * expf(logf(result[x][1]) * (1.0f / s->gamma));
-            result[x][2] = 255.0f * expf(logf(result[x][2]) * (1.0f / s->gamma));
-        }
-    }
+    enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE };
+    enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE
+    };
+    int64_t channel_layouts[] = { AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_STEREO_DOWNMIX, -1 };
+    int ret;
 
-    if (!s->fullhd) {
-        for (x = 0; x < video_width; x++) {
-            result[x][0] = 0.5f * (result[2*x][0] + result[2*x+1][0]);
-            result[x][1] = 0.5f * (result[2*x][1] + result[2*x+1][1]);
-            result[x][2] = 0.5f * (result[2*x][2] + result[2*x+1][2]);
-            result[x][3] = 0.5f * (result[2*x][3] + result[2*x+1][3]);
-        }
+    /* set input audio formats */
+    formats = ff_make_format_list(sample_fmts);
+    if ((ret = ff_formats_ref(formats, &inlink->out_formats)) < 0)
+        return ret;
+
+    layouts = avfilter_make_format64_list(channel_layouts);
+    if ((ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts)) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0)
+        return ret;
+
+    /* set output video format */
+    formats = ff_make_format_list(pix_fmts);
+    if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    ShowCQTContext *s = ctx->priv;
+    int ret;
+
+    common_uninit(s);
+
+    outlink->w = s->width;
+    outlink->h = s->height;
+    s->format = outlink->format;
+    outlink->sample_aspect_ratio = av_make_q(1, 1);
+    outlink->frame_rate = s->rate;
+    outlink->time_base = av_inv_q(s->rate);
+    av_log(ctx, AV_LOG_INFO, "video: %dx%d %s %d/%d fps, bar_h = %d, axis_h = %d, sono_h = %d.\n",
+           s->width, s->height, av_get_pix_fmt_name(s->format), s->rate.num, s->rate.den,
+           s->bar_h, s->axis_h, s->sono_h);
+
+    s->cqt_len = s->width * s->fcount;
+    if (!(s->freq = create_freq_table(s->basefreq, s->endfreq, s->cqt_len)))
+        return AVERROR(ENOMEM);
+
+    if ((ret = init_volume(s)) < 0)
+        return ret;
+
+    s->fft_bits = ceil(log2(inlink->sample_rate * s->timeclamp));
+    s->fft_len = 1 << s->fft_bits;
+    av_log(ctx, AV_LOG_INFO, "fft_len = %d, cqt_len = %d.\n", s->fft_len, s->cqt_len);
+
+    s->fft_ctx = av_fft_init(s->fft_bits, 0);
+    s->fft_data = av_calloc(s->fft_len, sizeof(*s->fft_data));
+    s->fft_result = av_calloc(s->fft_len + 64, sizeof(*s->fft_result));
+    s->cqt_result = av_malloc_array(s->cqt_len, sizeof(*s->cqt_result));
+    if (!s->fft_ctx || !s->fft_data || !s->fft_result || !s->cqt_result)
+        return AVERROR(ENOMEM);
+
+    s->cqt_align = 1;
+    s->cqt_coeffs_type = COEFFS_TYPE_DEFAULT;
+    s->cqt_calc = cqt_calc;
+    s->draw_sono = draw_sono;
+    if (s->format == AV_PIX_FMT_RGB24) {
+        s->draw_bar = draw_bar_rgb;
+        s->draw_axis = draw_axis_rgb;
+        s->update_sono = update_sono_rgb;
+    } else {
+        s->draw_bar = draw_bar_yuv;
+        s->draw_axis = draw_axis_yuv;
+        s->update_sono = update_sono_yuv;
     }
 
-    for (x = 0; x < video_width; x++) {
-        s->spectogram[s->spectogram_index*linesize + 3*x] = result[x][0] + 0.5f;
-        s->spectogram[s->spectogram_index*linesize + 3*x + 1] = result[x][1] + 0.5f;
-        s->spectogram[s->spectogram_index*linesize + 3*x + 2] = result[x][2] + 0.5f;
-    }
-
-    /* drawing */
-    if (!s->spectogram_count) {
-        uint8_t *data = (uint8_t*) s->outpicref->data[0];
-        float rcp_result[VIDEO_WIDTH];
-        int total_length = linesize * spectogram_height;
-        int back_length = linesize * s->spectogram_index;
-
-        for (x = 0; x < video_width; x++)
-            rcp_result[x] = 1.0f / (result[x][3]+0.0001f);
-
-        /* drawing bar */
-        for (y = 0; y < spectogram_height; y++) {
-            float height = (spectogram_height - y) * (1.0f/spectogram_height);
-            uint8_t *lineptr = data + y * linesize;
-            for (x = 0; x < video_width; x++) {
-                float mul;
-                if (result[x][3] <= height) {
-                    *lineptr++ = 0;
-                    *lineptr++ = 0;
-                    *lineptr++ = 0;
-                } else {
-                    mul = (result[x][3] - height) * rcp_result[x];
-                    *lineptr++ = mul * result[x][0] + 0.5f;
-                    *lineptr++ = mul * result[x][1] + 0.5f;
-                    *lineptr++ = mul * result[x][2] + 0.5f;
-                }
-            }
-        }
+    if ((ret = init_cqt(s)) < 0)
+        return ret;
 
-        /* drawing font */
-        if (s->font_alpha) {
-            for (y = 0; y < font_height; y++) {
-                uint8_t *lineptr = data + (spectogram_height + y) * linesize;
-                uint8_t *spectogram_src = s->spectogram + s->spectogram_index * linesize;
-                uint8_t *fontcolor_value = s->fontcolor_value;
-                for (x = 0; x < video_width; x++) {
-                    uint8_t alpha = s->font_alpha[y*video_width+x];
-                    lineptr[3*x] = (spectogram_src[3*x] * (255-alpha) + fontcolor_value[0] * alpha + 255) >> 8;
-                    lineptr[3*x+1] = (spectogram_src[3*x+1] * (255-alpha) + fontcolor_value[1] * alpha + 255) >> 8;
-                    lineptr[3*x+2] = (spectogram_src[3*x+2] * (255-alpha) + fontcolor_value[2] * alpha + 255) >> 8;
-                    fontcolor_value += 3;
+    if (s->axis_h) {
+        if (!s->axis) {
+            if ((ret = init_axis_empty(s)) < 0)
+                return ret;
+        } else if (s->axisfile) {
+            if (init_axis_from_file(s) < 0) {
+                av_log(ctx, AV_LOG_WARNING, "loading axis image failed, fallback to font rendering.\n");
+                if (init_axis_from_font(s) < 0) {
+                    av_log(ctx, AV_LOG_WARNING, "loading axis font failed, disable text drawing.\n");
+                    if ((ret = init_axis_empty(s)) < 0)
+                        return ret;
                 }
             }
         } else {
-            for (y = 0; y < font_height; y++) {
-                uint8_t *lineptr = data + (spectogram_height + y) * linesize;
-                memcpy(lineptr, s->spectogram + s->spectogram_index * linesize, video_width*3);
-            }
-            for (x = 0; x < video_width; x += video_width/10) {
-                int u;
-                static const char str[] = "EF G A BC D ";
-                uint8_t *startptr = data + spectogram_height * linesize + x * 3;
-                for (u = 0; str[u]; u++) {
-                    int v;
-                    for (v = 0; v < 16; v++) {
-                        uint8_t *p = startptr + v * linesize * video_scale + 8 * 3 * u * video_scale;
-                        int ux = x + 8 * u * video_scale;
-                        int mask;
-                        for (mask = 0x80; mask; mask >>= 1) {
-                            if (mask & avpriv_vga16_font[str[u] * 16 + v]) {
-                                p[0] = s->fontcolor_value[3*ux];
-                                p[1] = s->fontcolor_value[3*ux+1];
-                                p[2] = s->fontcolor_value[3*ux+2];
-                                if (video_scale == 2) {
-                                    p[linesize] = p[0];
-                                    p[linesize+1] = p[1];
-                                    p[linesize+2] = p[2];
-                                    p[3] = p[linesize+3] = s->fontcolor_value[3*ux+3];
-                                    p[4] = p[linesize+4] = s->fontcolor_value[3*ux+4];
-                                    p[5] = p[linesize+5] = s->fontcolor_value[3*ux+5];
-                                }
-                            }
-                            p  += 3 * video_scale;
-                            ux += video_scale;
-                        }
-                    }
-                }
+            if (init_axis_from_font(s) < 0) {
+                av_log(ctx, AV_LOG_WARNING, "loading axis font failed, disable text drawing.\n");
+                if ((ret = init_axis_empty(s)) < 0)
+                    return ret;
             }
         }
+    }
 
-        /* drawing spectogram/sonogram */
-        data += spectogram_start * linesize;
-        memcpy(data, s->spectogram + s->spectogram_index*linesize, total_length - back_length);
+    if (s->sono_h) {
+        s->sono_frame = alloc_frame_empty((outlink->format == AV_PIX_FMT_YUV420P) ?
+                        AV_PIX_FMT_YUV422P : outlink->format, s->width, s->sono_h);
+        if (!s->sono_frame)
+            return AVERROR(ENOMEM);
+    }
 
-        data += total_length - back_length;
-        if (back_length)
-            memcpy(data, s->spectogram, back_length);
+    s->h_buf = av_malloc_array(s->cqt_len, sizeof (*s->h_buf));
+    s->rcp_h_buf = av_malloc_array(s->width, sizeof(*s->rcp_h_buf));
+    s->c_buf = av_malloc_array(s->width, sizeof(*s->c_buf));
+    if (!s->h_buf || !s->rcp_h_buf || !s->c_buf)
+        return AVERROR(ENOMEM);
 
-        s->outpicref->pts = s->frame_count;
-        ret = ff_filter_frame(outlink, av_frame_clone(s->outpicref));
-        s->req_fullfilled = 1;
-        s->frame_count++;
+    s->sono_count = 0;
+    s->frame_count = 0;
+    s->sono_idx = 0;
+    s->remaining_fill = s->fft_len / 2;
+    s->remaining_frac = 0;
+    s->step_frac = av_div_q(av_make_q(inlink->sample_rate, s->count) , s->rate);
+    s->step = (int)(s->step_frac.num / s->step_frac.den);
+    s->step_frac.num %= s->step_frac.den;
+    if (s->step_frac.num) {
+        av_log(ctx, AV_LOG_INFO, "audio: %d Hz, step = %d + %d/%d.\n",
+               inlink->sample_rate, s->step, s->step_frac.num, s->step_frac.den);
+        av_log(ctx, AV_LOG_WARNING, "fractional step.\n");
+    } else {
+        av_log(ctx, AV_LOG_INFO, "audio: %d Hz, step = %d.\n",
+               inlink->sample_rate, s->step);
     }
-    s->spectogram_count = (s->spectogram_count + 1) % s->count;
-    s->spectogram_index = (s->spectogram_index + spectogram_height - 1) % spectogram_height;
-    return ret;
+
+    return 0;
 }
 
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
 {
     AVFilterContext *ctx = inlink->dst;
     ShowCQTContext *s = ctx->priv;
-    int step = inlink->sample_rate / (s->fps * s->count);
-    int fft_len = 1 << s->fft_bits;
-    int remaining;
+    int remaining, step, ret, x, i, j, m;
     float *audio_data;
 
     if (!insamples) {
-        while (s->remaining_fill < (fft_len >> 1)) {
-            int ret, x;
-            memset(&s->fft_data[fft_len - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill);
-            ret = plot_cqt(inlink);
+        while (s->remaining_fill < s->fft_len / 2) {
+            memset(&s->fft_data[s->fft_len - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill);
+            ret = plot_cqt(ctx);
             if (ret < 0)
                 return ret;
-            for (x = 0; x < (fft_len-step); x++)
+
+            step = s->step + (s->step_frac.num + s->remaining_frac) / s->step_frac.den;
+            s->remaining_frac = (s->step_frac.num + s->remaining_frac) % s->step_frac.den;
+            for (x = 0; x < (s->fft_len-step); x++)
                 s->fft_data[x] = s->fft_data[x+step];
             s->remaining_fill += step;
         }
@@ -760,30 +1256,28 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
     audio_data = (float*) insamples->data[0];
 
     while (remaining) {
+        i = insamples->nb_samples - remaining;
+        j = s->fft_len - s->remaining_fill;
         if (remaining >= s->remaining_fill) {
-            int i = insamples->nb_samples - remaining;
-            int j = fft_len - s->remaining_fill;
-            int m, ret;
             for (m = 0; m < s->remaining_fill; m++) {
                 s->fft_data[j+m].re = audio_data[2*(i+m)];
                 s->fft_data[j+m].im = audio_data[2*(i+m)+1];
             }
-            ret = plot_cqt(inlink);
+            ret = plot_cqt(ctx);
             if (ret < 0) {
                 av_frame_free(&insamples);
                 return ret;
             }
             remaining -= s->remaining_fill;
-            for (m = 0; m < fft_len-step; m++)
+            step = s->step + (s->step_frac.num + s->remaining_frac) / s->step_frac.den;
+            s->remaining_frac = (s->step_frac.num + s->remaining_frac) % s->step_frac.den;
+            for (m = 0; m < s->fft_len-step; m++)
                 s->fft_data[m] = s->fft_data[m+step];
             s->remaining_fill = step;
         } else {
-            int i = insamples->nb_samples - remaining;
-            int j = fft_len - s->remaining_fill;
-            int m;
             for (m = 0; m < remaining; m++) {
-                s->fft_data[m+j].re = audio_data[2*(i+m)];
-                s->fft_data[m+j].im = audio_data[2*(i+m)+1];
+                s->fft_data[j+m].re = audio_data[2*(i+m)];
+                s->fft_data[j+m].im = audio_data[2*(i+m)+1];
             }
             s->remaining_fill -= remaining;
             remaining = 0;
@@ -795,16 +1289,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
 
 static int request_frame(AVFilterLink *outlink)
 {
-    ShowCQTContext *s = outlink->src->priv;
     AVFilterLink *inlink = outlink->src->inputs[0];
     int ret;
 
-    s->req_fullfilled = 0;
-    do {
-        ret = ff_request_frame(inlink);
-    } while (!s->req_fullfilled && ret >= 0);
-
-    if (ret == AVERROR_EOF && s->outpicref)
+    ret = ff_request_frame(inlink);
+    if (ret == AVERROR_EOF)
         filter_frame(inlink, NULL);
     return ret;
 }
@@ -830,7 +1319,8 @@ static const AVFilterPad showcqt_outputs[] = {
 
 AVFilter ff_avf_showcqt = {
     .name          = "showcqt",
-    .description   = NULL_IF_CONFIG_SMALL("Convert input audio to a CQT (Constant Q Transform) spectrum video output."),
+    .description   = NULL_IF_CONFIG_SMALL("Convert input audio to a CQT (Constant/Clamped Q Transform) spectrum video output."),
+    .init          = init,
     .uninit        = uninit,
     .query_formats = query_formats,
     .priv_size     = sizeof(ShowCQTContext),
diff --git a/libavfilter/avf_showcqt.h b/libavfilter/avf_showcqt.h
new file mode 100644
index 00000000..a48b2b04
--- /dev/null
+++ b/libavfilter/avf_showcqt.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2015 Muhammad Faiz <mfcc64@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_SHOWCQT_H
+#define AVFILTER_SHOWCQT_H
+
+#include "libavcodec/avfft.h"
+#include "avfilter.h"
+#include "internal.h"
+
+typedef struct {
+    FFTSample *val;
+    int start, len;
+} Coeffs;
+
+enum CoeffsType {
+    COEFFS_TYPE_DEFAULT,
+    COEFFS_TYPE_INTERLEAVE
+};
+
+typedef struct {
+    float r, g, b;
+} RGBFloat;
+
+typedef struct {
+    float y, u, v;
+} YUVFloat;
+
+typedef union {
+    RGBFloat rgb;
+    YUVFloat yuv;
+} ColorFloat;
+
+typedef struct {
+    const AVClass       *class;
+    AVFilterContext     *ctx;
+    AVFrame             *axis_frame;
+    AVFrame             *sono_frame;
+    enum AVPixelFormat  format;
+    int                 sono_idx;
+    int                 sono_count;
+    int                 step;
+    AVRational          step_frac;
+    int                 remaining_frac;
+    int                 remaining_fill;
+    int64_t             frame_count;
+    double              *freq;
+    FFTContext          *fft_ctx;
+    Coeffs              *coeffs;
+    FFTComplex          *fft_data;
+    FFTComplex          *fft_result;
+    FFTComplex          *cqt_result;
+    int                 fft_bits;
+    int                 fft_len;
+    int                 cqt_len;
+    int                 cqt_align;
+    enum CoeffsType     cqt_coeffs_type;
+    ColorFloat          *c_buf;
+    float               *h_buf;
+    float               *rcp_h_buf;
+    float               *sono_v_buf;
+    float               *bar_v_buf;
+    /* callback */
+    void                (*cqt_calc)(FFTComplex *dst, const FFTComplex *src, const Coeffs *coeffs,
+                                    int len, int fft_len);
+    void                (*draw_bar)(AVFrame *out, const float *h, const float *rcp_h,
+                                    const ColorFloat *c, int bar_h);
+    void                (*draw_axis)(AVFrame *out, AVFrame *axis, const ColorFloat *c, int off);
+    void                (*draw_sono)(AVFrame *out, AVFrame *sono, int off, int idx);
+    void                (*update_sono)(AVFrame *sono, const ColorFloat *c, int idx);
+    /* option */
+    int                 width, height;
+    AVRational          rate;
+    int                 bar_h;
+    int                 axis_h;
+    int                 sono_h;
+    int                 fullhd; /* deprecated */
+    char                *sono_v;
+    char                *bar_v;
+    float               sono_g;
+    float               bar_g;
+    double              timeclamp;
+    double              basefreq;
+    double              endfreq;
+    float               coeffclamp; /* deprecated - ignored */
+    char                *tlength;
+    int                 count;
+    int                 fcount;
+    char                *fontfile;
+    char                *fontcolor;
+    char                *axisfile;
+    int                 axis;
+} ShowCQTContext;
+
+#endif
diff --git a/libavfilter/avf_showfreqs.c b/libavfilter/avf_showfreqs.c
new file mode 100644
index 00000000..b33587b3
--- /dev/null
+++ b/libavfilter/avf_showfreqs.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <math.h>
+
+#include "libavcodec/avfft.h"
+#include "libavutil/audio_fifo.h"
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "audio.h"
+#include "video.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "window_func.h"
+
+enum DisplayMode    { LINE, BAR, DOT, NB_MODES };
+enum ChannelMode    { COMBINED, SEPARATE, NB_CMODES };
+enum FrequencyScale { FS_LINEAR, FS_LOG, FS_RLOG, NB_FSCALES };
+enum AmplitudeScale { AS_LINEAR, AS_SQRT, AS_CBRT, AS_LOG, NB_ASCALES };
+
+typedef struct ShowFreqsContext {
+    const AVClass *class;
+    int w, h;
+    int mode;
+    int cmode;
+    int fft_bits;
+    int ascale, fscale;
+    int avg;
+    int win_func;
+    FFTContext *fft;
+    FFTComplex **fft_data;
+    float **avg_data;
+    float *window_func_lut;
+    float overlap;
+    int hop_size;
+    int nb_channels;
+    int nb_freq;
+    int win_size;
+    float scale;
+    char *colors;
+    AVAudioFifo *fifo;
+    int64_t pts;
+} ShowFreqsContext;
+
+#define OFFSET(x) offsetof(ShowFreqsContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption showfreqs_options[] = {
+    { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "1024x512"}, 0, 0, FLAGS },
+    { "s",    "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "1024x512"}, 0, 0, FLAGS },
+    { "mode", "set display mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=BAR}, 0, NB_MODES-1, FLAGS, "mode" },
+        { "line", "show lines",  0, AV_OPT_TYPE_CONST, {.i64=LINE},   0, 0, FLAGS, "mode" },
+        { "bar",  "show bars",   0, AV_OPT_TYPE_CONST, {.i64=BAR},    0, 0, FLAGS, "mode" },
+        { "dot",  "show dots",   0, AV_OPT_TYPE_CONST, {.i64=DOT},    0, 0, FLAGS, "mode" },
+    { "ascale", "set amplitude scale", OFFSET(ascale), AV_OPT_TYPE_INT, {.i64=AS_LOG}, 0, NB_ASCALES-1, FLAGS, "ascale" },
+        { "lin",  "linear",      0, AV_OPT_TYPE_CONST, {.i64=AS_LINEAR}, 0, 0, FLAGS, "ascale" },
+        { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=AS_SQRT},   0, 0, FLAGS, "ascale" },
+        { "cbrt", "cubic root",  0, AV_OPT_TYPE_CONST, {.i64=AS_CBRT},   0, 0, FLAGS, "ascale" },
+        { "log",  "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=AS_LOG},    0, 0, FLAGS, "ascale" },
+    { "fscale", "set frequency scale", OFFSET(fscale), AV_OPT_TYPE_INT, {.i64=FS_LINEAR}, 0, NB_FSCALES-1, FLAGS, "fscale" },
+        { "lin",  "linear",              0, AV_OPT_TYPE_CONST, {.i64=FS_LINEAR}, 0, 0, FLAGS, "fscale" },
+        { "log",  "logarithmic",         0, AV_OPT_TYPE_CONST, {.i64=FS_LOG},    0, 0, FLAGS, "fscale" },
+        { "rlog", "reverse logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=FS_RLOG},   0, 0, FLAGS, "fscale" },
+    { "win_size", "set window size", OFFSET(fft_bits), AV_OPT_TYPE_INT, {.i64=11}, 4, 16, FLAGS, "fft" },
+        { "w16",    0, 0, AV_OPT_TYPE_CONST, {.i64=4},  0, 0, FLAGS, "fft" },
+        { "w32",    0, 0, AV_OPT_TYPE_CONST, {.i64=5},  0, 0, FLAGS, "fft" },
+        { "w64",    0, 0, AV_OPT_TYPE_CONST, {.i64=6},  0, 0, FLAGS, "fft" },
+        { "w128",   0, 0, AV_OPT_TYPE_CONST, {.i64=7},  0, 0, FLAGS, "fft" },
+        { "w256",   0, 0, AV_OPT_TYPE_CONST, {.i64=8},  0, 0, FLAGS, "fft" },
+        { "w512",   0, 0, AV_OPT_TYPE_CONST, {.i64=9},  0, 0, FLAGS, "fft" },
+        { "w1024",  0, 0, AV_OPT_TYPE_CONST, {.i64=10}, 0, 0, FLAGS, "fft" },
+        { "w2048",  0, 0, AV_OPT_TYPE_CONST, {.i64=11}, 0, 0, FLAGS, "fft" },
+        { "w4096",  0, 0, AV_OPT_TYPE_CONST, {.i64=12}, 0, 0, FLAGS, "fft" },
+        { "w8192",  0, 0, AV_OPT_TYPE_CONST, {.i64=13}, 0, 0, FLAGS, "fft" },
+        { "w16384", 0, 0, AV_OPT_TYPE_CONST, {.i64=14}, 0, 0, FLAGS, "fft" },
+        { "w32768", 0, 0, AV_OPT_TYPE_CONST, {.i64=15}, 0, 0, FLAGS, "fft" },
+        { "w65536", 0, 0, AV_OPT_TYPE_CONST, {.i64=16}, 0, 0, FLAGS, "fft" },
+    { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64=WFUNC_HANNING}, 0, NB_WFUNC-1, FLAGS, "win_func" },
+        { "rect",     "Rectangular",      0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT},     0, 0, FLAGS, "win_func" },
+        { "bartlett", "Bartlett",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, FLAGS, "win_func" },
+        { "hanning",  "Hanning",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, FLAGS, "win_func" },
+        { "hamming",  "Hamming",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING},  0, 0, FLAGS, "win_func" },
+        { "blackman", "Blackman",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BLACKMAN}, 0, 0, FLAGS, "win_func" },
+        { "welch",    "Welch",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_WELCH},    0, 0, FLAGS, "win_func" },
+        { "flattop",  "Flat-top",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_FLATTOP},  0, 0, FLAGS, "win_func" },
+        { "bharris",  "Blackman-Harris",  0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHARRIS},  0, 0, FLAGS, "win_func" },
+        { "bnuttall", "Blackman-Nuttall", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BNUTTALL}, 0, 0, FLAGS, "win_func" },
+        { "bhann",    "Bartlett-Hann",    0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHANN},    0, 0, FLAGS, "win_func" },
+        { "sine",     "Sine",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE},     0, 0, FLAGS, "win_func" },
+        { "nuttall",  "Nuttall",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_NUTTALL},  0, 0, FLAGS, "win_func" },
+        { "lanczos",  "Lanczos",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_LANCZOS},  0, 0, FLAGS, "win_func" },
+        { "gauss",    "Gauss",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_GAUSS},    0, 0, FLAGS, "win_func" },
+        { "tukey",    "Tukey",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_TUKEY},    0, 0, FLAGS, "win_func" },
+    { "overlap",  "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=1.}, 0., 1., FLAGS },
+    { "averaging", "set time averaging", OFFSET(avg), AV_OPT_TYPE_INT, {.i64=1}, 0, INT32_MAX, FLAGS },
+    { "colors", "set channels colors", OFFSET(colors), AV_OPT_TYPE_STRING, {.str = "red|green|blue|yellow|orange|lime|pink|magenta|brown" }, 0, 0, FLAGS },
+    { "cmode", "set channel mode", OFFSET(cmode), AV_OPT_TYPE_INT, {.i64=COMBINED}, 0, NB_CMODES-1, FLAGS, "cmode" },
+        { "combined", "show all channels in same window",  0, AV_OPT_TYPE_CONST, {.i64=COMBINED}, 0, 0, FLAGS, "cmode" },
+        { "separate", "show each channel in own window",   0, AV_OPT_TYPE_CONST, {.i64=SEPARATE}, 0, 0, FLAGS, "cmode" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(showfreqs);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
+    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGBA, AV_PIX_FMT_NONE };
+    int ret;
+
+    /* set input audio formats */
+    formats = ff_make_format_list(sample_fmts);
+    if ((ret = ff_formats_ref(formats, &inlink->out_formats)) < 0)
+        return ret;
+
+    layouts = ff_all_channel_layouts();
+    if ((ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts)) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0)
+        return ret;
+
+    /* set output video format */
+    formats = ff_make_format_list(pix_fmts);
+    if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    ShowFreqsContext *s = ctx->priv;
+
+    s->pts = AV_NOPTS_VALUE;
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    ShowFreqsContext *s = ctx->priv;
+    float overlap;
+    int i;
+
+    s->nb_freq = 1 << (s->fft_bits - 1);
+    s->win_size = s->nb_freq << 1;
+    av_audio_fifo_free(s->fifo);
+    av_fft_end(s->fft);
+    s->fft = av_fft_init(s->fft_bits, 0);
+    if (!s->fft) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to create FFT context. "
+               "The window size might be too high.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    /* FFT buffers: x2 for each (display) channel buffer.
+     * Note: we use free and malloc instead of a realloc-like function to
+     * make sure the buffer is aligned in memory for the FFT functions. */
+    for (i = 0; i < s->nb_channels; i++) {
+        av_freep(&s->fft_data[i]);
+        av_freep(&s->avg_data[i]);
+    }
+    av_freep(&s->fft_data);
+    av_freep(&s->avg_data);
+    s->nb_channels = inlink->channels;
+
+    s->fft_data = av_calloc(s->nb_channels, sizeof(*s->fft_data));
+    if (!s->fft_data)
+        return AVERROR(ENOMEM);
+    s->avg_data = av_calloc(s->nb_channels, sizeof(*s->avg_data));
+    if (!s->fft_data)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < s->nb_channels; i++) {
+        s->fft_data[i] = av_calloc(s->win_size, sizeof(**s->fft_data));
+        s->avg_data[i] = av_calloc(s->nb_freq, sizeof(**s->avg_data));
+        if (!s->fft_data[i] || !s->avg_data[i])
+            return AVERROR(ENOMEM);
+    }
+
+    /* pre-calc windowing function */
+    s->window_func_lut = av_realloc_f(s->window_func_lut, s->win_size,
+                                      sizeof(*s->window_func_lut));
+    if (!s->window_func_lut)
+        return AVERROR(ENOMEM);
+    ff_generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap);
+    if (s->overlap == 1.)
+        s->overlap = overlap;
+    s->hop_size = (1. - s->overlap) * s->win_size;
+    if (s->hop_size < 1) {
+        av_log(ctx, AV_LOG_ERROR, "overlap %f too big\n", s->overlap);
+        return AVERROR(EINVAL);
+    }
+
+    for (s->scale = 0, i = 0; i < s->win_size; i++) {
+        s->scale += s->window_func_lut[i] * s->window_func_lut[i];
+    }
+
+    outlink->frame_rate = av_make_q(inlink->sample_rate, s->win_size * (1.-s->overlap));
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+    outlink->w = s->w;
+    outlink->h = s->h;
+
+    s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->win_size);
+    if (!s->fifo)
+        return AVERROR(ENOMEM);
+    return 0;
+}
+
+static inline void draw_dot(AVFrame *out, int x, int y, uint8_t fg[4])
+{
+
+    uint32_t color = AV_RL32(out->data[0] + y * out->linesize[0] + x * 4);
+
+    if ((color & 0xffffff) != 0)
+        AV_WL32(out->data[0] + y * out->linesize[0] + x * 4, AV_RL32(fg) | color);
+    else
+        AV_WL32(out->data[0] + y * out->linesize[0] + x * 4, AV_RL32(fg));
+}
+
+static int get_sx(ShowFreqsContext *s, int f)
+{
+    switch (s->fscale) {
+    case FS_LINEAR:
+        return (s->w/(float)s->nb_freq)*f;
+    case FS_LOG:
+        return s->w-pow(s->w, (s->nb_freq-f-1)/(s->nb_freq-1.));
+    case FS_RLOG:
+        return pow(s->w, f/(s->nb_freq-1.));
+    }
+
+    return 0;
+}
+
+static float get_bsize(ShowFreqsContext *s, int f)
+{
+    switch (s->fscale) {
+    case FS_LINEAR:
+        return s->w/(float)s->nb_freq;
+    case FS_LOG:
+        return pow(s->w, (s->nb_freq-f-1)/(s->nb_freq-1.))-
+               pow(s->w, (s->nb_freq-f-2)/(s->nb_freq-1.));
+    case FS_RLOG:
+        return pow(s->w, (f+1)/(s->nb_freq-1.))-
+               pow(s->w,  f   /(s->nb_freq-1.));
+    }
+
+    return 1.;
+}
+
+static inline void plot_freq(ShowFreqsContext *s, int ch,
+                             double a, int f, uint8_t fg[4], int *prev_y,
+                             AVFrame *out, AVFilterLink *outlink)
+{
+    const int w = s->w;
+    const float avg = s->avg_data[ch][f];
+    const float bsize = get_bsize(s, f);
+    const int sx = get_sx(s, f);
+    int end = outlink->h;
+    int x, y, i;
+
+    switch(s->ascale) {
+    case AS_SQRT:
+        a = 1.0 - sqrt(a);
+        break;
+    case AS_CBRT:
+        a = 1.0 - cbrt(a);
+        break;
+    case AS_LOG:
+        a = log(av_clipd(a, 1e-6, 1)) / log(1e-6);
+        break;
+    case AS_LINEAR:
+        a = 1.0 - a;
+        break;
+    }
+
+    switch (s->cmode) {
+    case COMBINED:
+        y = a * outlink->h - 1;
+        break;
+    case SEPARATE:
+        end = (outlink->h / s->nb_channels) * (ch + 1);
+        y = (outlink->h / s->nb_channels) * ch + a * (outlink->h / s->nb_channels) - 1;
+        break;
+    default:
+        av_assert0(0);
+    }
+    if (y < 0)
+        return;
+
+    switch (s->avg) {
+    case 0:
+        y = s->avg_data[ch][f] = !outlink->frame_count ? y : FFMIN(avg, y);
+        break;
+    case 1:
+        break;
+    default:
+        s->avg_data[ch][f] = avg + y * (y - avg) / (FFMIN(outlink->frame_count + 1, s->avg) * y);
+        y = s->avg_data[ch][f];
+        break;
+    }
+
+    switch(s->mode) {
+    case LINE:
+        if (*prev_y == -1) {
+            *prev_y = y;
+        }
+        if (y <= *prev_y) {
+            for (x = sx + 1; x < sx + bsize && x < w; x++)
+                draw_dot(out, x, y, fg);
+            for (i = y; i <= *prev_y; i++)
+                draw_dot(out, sx, i, fg);
+        } else {
+            for (i = *prev_y; i <= y; i++)
+                draw_dot(out, sx, i, fg);
+            for (x = sx + 1; x < sx + bsize && x < w; x++)
+                draw_dot(out, x, i - 1, fg);
+        }
+        *prev_y = y;
+        break;
+    case BAR:
+        for (x = sx; x < sx + bsize && x < w; x++)
+            for (i = y; i < end; i++)
+                draw_dot(out, x, i, fg);
+        break;
+    case DOT:
+        for (x = sx; x < sx + bsize && x < w; x++)
+            draw_dot(out, x, y, fg);
+        break;
+    }
+}
+
+static int plot_freqs(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    ShowFreqsContext *s = ctx->priv;
+    const int win_size = s->win_size;
+    char *colors, *color, *saveptr = NULL;
+    AVFrame *out;
+    int ch, n;
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out)
+        return AVERROR(ENOMEM);
+
+    for (n = 0; n < outlink->h; n++)
+        memset(out->data[0] + out->linesize[0] * n, 0, outlink->w * 4);
+
+    /* fill FFT input with the number of samples available */
+    for (ch = 0; ch < s->nb_channels; ch++) {
+        const float *p = (float *)in->extended_data[ch];
+
+        for (n = 0; n < in->nb_samples; n++) {
+            s->fft_data[ch][n].re = p[n] * s->window_func_lut[n];
+            s->fft_data[ch][n].im = 0;
+        }
+        for (; n < win_size; n++) {
+            s->fft_data[ch][n].re = 0;
+            s->fft_data[ch][n].im = 0;
+        }
+    }
+
+    /* run FFT on each samples set */
+    for (ch = 0; ch < s->nb_channels; ch++) {
+        av_fft_permute(s->fft, s->fft_data[ch]);
+        av_fft_calc(s->fft, s->fft_data[ch]);
+    }
+
+#define RE(x, ch) s->fft_data[ch][x].re
+#define IM(x, ch) s->fft_data[ch][x].im
+#define M(a, b) (sqrt((a) * (a) + (b) * (b)))
+
+    colors = av_strdup(s->colors);
+    if (!colors) {
+        av_frame_free(&out);
+        return AVERROR(ENOMEM);
+    }
+
+    for (ch = 0; ch < s->nb_channels; ch++) {
+        uint8_t fg[4] = { 0xff, 0xff, 0xff, 0xff };
+        int prev_y = -1, f;
+        double a;
+
+        color = av_strtok(ch == 0 ? colors : NULL, " |", &saveptr);
+        if (color)
+            av_parse_color(fg, color, -1, ctx);
+
+        a = av_clipd(M(RE(0, ch), 0) / s->scale, 0, 1);
+        plot_freq(s, ch, a, 0, fg, &prev_y, out, outlink);
+
+        for (f = 1; f < s->nb_freq; f++) {
+            a = av_clipd(M(RE(f, ch), IM(f, ch)) / s->scale, 0, 1);
+
+            plot_freq(s, ch, a, f, fg, &prev_y, out, outlink);
+        }
+    }
+
+    av_free(colors);
+    out->pts = in->pts;
+    return ff_filter_frame(outlink, out);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    ShowFreqsContext *s = ctx->priv;
+    AVFrame *fin = NULL;
+    int consumed = 0;
+    int ret = 0;
+
+    if (s->pts == AV_NOPTS_VALUE)
+        s->pts = in->pts - av_audio_fifo_size(s->fifo);
+
+    av_audio_fifo_write(s->fifo, (void **)in->extended_data, in->nb_samples);
+    while (av_audio_fifo_size(s->fifo) >= s->win_size) {
+        fin = ff_get_audio_buffer(inlink, s->win_size);
+        if (!fin) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        fin->pts = s->pts + consumed;
+        consumed += s->hop_size;
+        ret = av_audio_fifo_peek(s->fifo, (void **)fin->extended_data, s->win_size);
+        if (ret < 0)
+            goto fail;
+
+        ret = plot_freqs(inlink, fin);
+        av_frame_free(&fin);
+        av_audio_fifo_drain(s->fifo, s->hop_size);
+        if (ret < 0)
+            goto fail;
+    }
+
+fail:
+    s->pts = AV_NOPTS_VALUE;
+    av_frame_free(&fin);
+    av_frame_free(&in);
+    return ret;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    ShowFreqsContext *s = ctx->priv;
+    int i;
+
+    av_fft_end(s->fft);
+    for (i = 0; i < s->nb_channels; i++) {
+        if (s->fft_data)
+            av_freep(&s->fft_data[i]);
+        if (s->avg_data)
+            av_freep(&s->avg_data[i]);
+    }
+    av_freep(&s->fft_data);
+    av_freep(&s->avg_data);
+    av_freep(&s->window_func_lut);
+    av_audio_fifo_free(s->fifo);
+}
+
+static const AVFilterPad showfreqs_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad showfreqs_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_avf_showfreqs = {
+    .name          = "showfreqs",
+    .description   = NULL_IF_CONFIG_SMALL("Convert input audio to a frequencies video output."),
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(ShowFreqsContext),
+    .inputs        = showfreqs_inputs,
+    .outputs       = showfreqs_outputs,
+    .priv_class    = &showfreqs_class,
+};
diff --git a/libavfilter/avf_showspectrum.c b/libavfilter/avf_showspectrum.c
index 49491b66..6be97af5 100644
--- a/libavfilter/avf_showspectrum.c
+++ b/libavfilter/avf_showspectrum.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2012-2013 Clément Bœsch
  * Copyright (c) 2013 Rudolf Polzer <divverent@xonotic.org>
+ * Copyright (c) 2015 Paul B Mahol
  *
  * This file is part of FFmpeg.
  *
@@ -28,37 +29,58 @@
 #include <math.h>
 
 #include "libavcodec/avfft.h"
+#include "libavutil/audio_fifo.h"
 #include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
+#include "libavutil/xga_font_data.h"
+#include "audio.h"
+#include "video.h"
 #include "avfilter.h"
 #include "internal.h"
+#include "window_func.h"
 
 enum DisplayMode  { COMBINED, SEPARATE, NB_MODES };
-enum DisplayScale { LINEAR, SQRT, CBRT, LOG, NB_SCALES };
-enum ColorMode    { CHANNEL, INTENSITY, NB_CLMODES };
-enum WindowFunc   { WFUNC_NONE, WFUNC_HANN, WFUNC_HAMMING, WFUNC_BLACKMAN, NB_WFUNC };
-enum SlideMode    { REPLACE, SCROLL, FULLFRAME, NB_SLIDES };
+enum DataMode     { D_MAGNITUDE, D_PHASE, NB_DMODES };
+enum DisplayScale { LINEAR, SQRT, CBRT, LOG, FOURTHRT, FIFTHRT, NB_SCALES };
+enum ColorMode    { CHANNEL, INTENSITY, RAINBOW, MORELAND, NEBULAE, FIRE, FIERY, FRUIT, COOL, NB_CLMODES };
+enum SlideMode    { REPLACE, SCROLL, FULLFRAME, RSCROLL, NB_SLIDES };
+enum Orientation  { VERTICAL, HORIZONTAL, NB_ORIENTATIONS };
 
 typedef struct {
     const AVClass *class;
     int w, h;
     AVFrame *outpicref;
-    int req_fullfilled;
     int nb_display_channels;
+    int orientation;
+    int channel_width;
     int channel_height;
     int sliding;                ///< 1 if sliding mode, 0 otherwise
     int mode;                   ///< channel display mode
     int color_mode;             ///< display color scheme
     int scale;
     float saturation;           ///< color saturation multiplier
+    int data;
     int xpos;                   ///< x position (current column)
-    RDFTContext *rdft;          ///< Real Discrete Fourier Transform context
-    int rdft_bits;              ///< number of bits (RDFT window size = 1<<rdft_bits)
-    FFTSample **rdft_data;      ///< bins holder for each (displayed) channels
+    FFTContext *fft;            ///< Fast Fourier Transform context
+    int fft_bits;               ///< number of bits (FFT window size = 1<<fft_bits)
+    FFTComplex **fft_data;      ///< bins holder for each (displayed) channels
     float *window_func_lut;     ///< Window function LUT
+    float **magnitudes;
+    float **phases;
     int win_func;
+    int win_size;
+    double win_scale;
+    float overlap;
+    float gain;
+    int hop_size;
     float *combine_buffer;      ///< color combining buffer (3 * h items)
+    AVAudioFifo *fifo;
+    int64_t pts;
+    int single_pic;
+    int legend;
+    int start_x, start_y;
 } ShowSpectrumContext;
 
 #define OFFSET(x) offsetof(ShowSpectrumContext, x)
@@ -67,9 +89,10 @@ typedef struct {
 static const AVOption showspectrum_options[] = {
     { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS },
     { "s",    "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS },
-    { "slide", "set sliding mode", OFFSET(sliding), AV_OPT_TYPE_INT, {.i64 = 0}, 0, NB_SLIDES, FLAGS, "slide" },
+    { "slide", "set sliding mode", OFFSET(sliding), AV_OPT_TYPE_INT, {.i64 = 0}, 0, NB_SLIDES-1, FLAGS, "slide" },
         { "replace", "replace old columns with new", 0, AV_OPT_TYPE_CONST, {.i64=REPLACE}, 0, 0, FLAGS, "slide" },
         { "scroll", "scroll from right to left", 0, AV_OPT_TYPE_CONST, {.i64=SCROLL}, 0, 0, FLAGS, "slide" },
+        { "rscroll", "scroll from left to right", 0, AV_OPT_TYPE_CONST, {.i64=RSCROLL}, 0, 0, FLAGS, "slide" },
         { "fullframe", "return full frames", 0, AV_OPT_TYPE_CONST, {.i64=FULLFRAME}, 0, 0, FLAGS, "slide" },
     { "mode", "set channel display mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=COMBINED}, COMBINED, NB_MODES-1, FLAGS, "mode" },
         { "combined", "combined mode", 0, AV_OPT_TYPE_CONST, {.i64=COMBINED}, 0, 0, FLAGS, "mode" },
@@ -77,24 +100,55 @@ static const AVOption showspectrum_options[] = {
     { "color", "set channel coloring", OFFSET(color_mode), AV_OPT_TYPE_INT, {.i64=CHANNEL}, CHANNEL, NB_CLMODES-1, FLAGS, "color" },
         { "channel",   "separate color for each channel", 0, AV_OPT_TYPE_CONST, {.i64=CHANNEL},   0, 0, FLAGS, "color" },
         { "intensity", "intensity based coloring",        0, AV_OPT_TYPE_CONST, {.i64=INTENSITY}, 0, 0, FLAGS, "color" },
+        { "rainbow",   "rainbow based coloring",          0, AV_OPT_TYPE_CONST, {.i64=RAINBOW},   0, 0, FLAGS, "color" },
+        { "moreland",  "moreland based coloring",         0, AV_OPT_TYPE_CONST, {.i64=MORELAND},  0, 0, FLAGS, "color" },
+        { "nebulae",   "nebulae based coloring",          0, AV_OPT_TYPE_CONST, {.i64=NEBULAE},   0, 0, FLAGS, "color" },
+        { "fire",      "fire based coloring",             0, AV_OPT_TYPE_CONST, {.i64=FIRE},      0, 0, FLAGS, "color" },
+        { "fiery",     "fiery based coloring",            0, AV_OPT_TYPE_CONST, {.i64=FIERY},     0, 0, FLAGS, "color" },
+        { "fruit",     "fruit based coloring",            0, AV_OPT_TYPE_CONST, {.i64=FRUIT},     0, 0, FLAGS, "color" },
+        { "cool",      "cool based coloring",             0, AV_OPT_TYPE_CONST, {.i64=COOL},      0, 0, FLAGS, "color" },
     { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=SQRT}, LINEAR, NB_SCALES-1, FLAGS, "scale" },
         { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT},   0, 0, FLAGS, "scale" },
         { "cbrt", "cubic root",  0, AV_OPT_TYPE_CONST, {.i64=CBRT},   0, 0, FLAGS, "scale" },
+        { "4thrt","4th root",    0, AV_OPT_TYPE_CONST, {.i64=FOURTHRT}, 0, 0, FLAGS, "scale" },
+        { "5thrt","5th root",    0, AV_OPT_TYPE_CONST, {.i64=FIFTHRT},  0, 0, FLAGS, "scale" },
         { "log",  "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=LOG},    0, 0, FLAGS, "scale" },
         { "lin",  "linear",      0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
     { "saturation", "color saturation multiplier", OFFSET(saturation), AV_OPT_TYPE_FLOAT, {.dbl = 1}, -10, 10, FLAGS },
-    { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64 = WFUNC_HANN}, 0, NB_WFUNC-1, FLAGS, "win_func" },
-        { "hann",     "Hann window",     0, AV_OPT_TYPE_CONST, {.i64 = WFUNC_HANN},     0, 0, FLAGS, "win_func" },
-        { "hamming",  "Hamming window",  0, AV_OPT_TYPE_CONST, {.i64 = WFUNC_HAMMING},  0, 0, FLAGS, "win_func" },
-        { "blackman", "Blackman window", 0, AV_OPT_TYPE_CONST, {.i64 = WFUNC_BLACKMAN}, 0, 0, FLAGS, "win_func" },
+    { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64 = WFUNC_HANNING}, 0, NB_WFUNC-1, FLAGS, "win_func" },
+        { "rect",     "Rectangular",      0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT},     0, 0, FLAGS, "win_func" },
+        { "bartlett", "Bartlett",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, FLAGS, "win_func" },
+        { "hann",     "Hann",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, FLAGS, "win_func" },
+        { "hanning",  "Hanning",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, FLAGS, "win_func" },
+        { "hamming",  "Hamming",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING},  0, 0, FLAGS, "win_func" },
+        { "blackman", "Blackman",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BLACKMAN}, 0, 0, FLAGS, "win_func" },
+        { "welch",    "Welch",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_WELCH},    0, 0, FLAGS, "win_func" },
+        { "flattop",  "Flat-top",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_FLATTOP},  0, 0, FLAGS, "win_func" },
+        { "bharris",  "Blackman-Harris",  0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHARRIS},  0, 0, FLAGS, "win_func" },
+        { "bnuttall", "Blackman-Nuttall", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BNUTTALL}, 0, 0, FLAGS, "win_func" },
+        { "bhann",    "Bartlett-Hann",    0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHANN},    0, 0, FLAGS, "win_func" },
+        { "sine",     "Sine",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE},     0, 0, FLAGS, "win_func" },
+        { "nuttall",  "Nuttall",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_NUTTALL},  0, 0, FLAGS, "win_func" },
+        { "lanczos",  "Lanczos",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_LANCZOS},  0, 0, FLAGS, "win_func" },
+        { "gauss",    "Gauss",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_GAUSS},    0, 0, FLAGS, "win_func" },
+        { "tukey",    "Tukey",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_TUKEY},    0, 0, FLAGS, "win_func" },
+    { "orientation", "set orientation", OFFSET(orientation), AV_OPT_TYPE_INT, {.i64=VERTICAL}, 0, NB_ORIENTATIONS-1, FLAGS, "orientation" },
+        { "vertical",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=VERTICAL},   0, 0, FLAGS, "orientation" },
+        { "horizontal", NULL, 0, AV_OPT_TYPE_CONST, {.i64=HORIZONTAL}, 0, 0, FLAGS, "orientation" },
+    { "overlap", "set window overlap", OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl = 0}, 0, 1, FLAGS },
+    { "gain", "set scale gain", OFFSET(gain), AV_OPT_TYPE_FLOAT, {.dbl = 1}, 0, 128, FLAGS },
+    { "data", "set data mode", OFFSET(data), AV_OPT_TYPE_INT, {.i64 = 0}, 0, NB_DMODES-1, FLAGS, "data" },
+        { "magnitude", NULL, 0, AV_OPT_TYPE_CONST, {.i64=D_MAGNITUDE}, 0, 0, FLAGS, "data" },
+        { "phase",     NULL, 0, AV_OPT_TYPE_CONST, {.i64=D_PHASE},     0, 0, FLAGS, "data" },
     { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(showspectrum);
 
-static const struct {
+static const struct ColorTable {
     float a, y, u, v;
-} intensity_color_table[] = {
+} color_table[][8] = {
+    [INTENSITY] = {
     {    0,                  0,                  0,                   0 },
     { 0.13, .03587126228984074,  .1573300977624594, -.02548747583751842 },
     { 0.30, .18572281794568020,  .1772436246393981,  .17475554840414750 },
@@ -102,7 +156,65 @@ static const struct {
     { 0.73, .65830621175547810, -.3716070802232764,  .24352759331252930 },
     { 0.78, .76318535758242900, -.4307467689263783,  .16866496622310430 },
     { 0.91, .95336363636363640, -.2045454545454546,  .03313636363636363 },
-    {    1,                  1,                  0,                   0 }
+    {    1,                  1,                  0,                   0 }},
+    [RAINBOW] = {
+    {    0,                  0,                  0,                   0 },
+    { 0.13,            44/256.,     (189-128)/256.,      (138-128)/256. },
+    { 0.25,            29/256.,     (186-128)/256.,      (119-128)/256. },
+    { 0.38,           119/256.,     (194-128)/256.,       (53-128)/256. },
+    { 0.60,           111/256.,      (73-128)/256.,       (59-128)/256. },
+    { 0.73,           205/256.,      (19-128)/256.,      (149-128)/256. },
+    { 0.86,           135/256.,      (83-128)/256.,      (200-128)/256. },
+    {    1,            73/256.,      (95-128)/256.,      (225-128)/256. }},
+    [MORELAND] = {
+    {    0,            44/256.,     (181-128)/256.,      (112-128)/256. },
+    { 0.13,           126/256.,     (177-128)/256.,      (106-128)/256. },
+    { 0.25,           164/256.,     (163-128)/256.,      (109-128)/256. },
+    { 0.38,           200/256.,     (140-128)/256.,      (120-128)/256. },
+    { 0.60,           201/256.,     (117-128)/256.,      (141-128)/256. },
+    { 0.73,           177/256.,     (103-128)/256.,      (165-128)/256. },
+    { 0.86,           136/256.,     (100-128)/256.,      (183-128)/256. },
+    {    1,            68/256.,     (117-128)/256.,      (203-128)/256. }},
+    [NEBULAE] = {
+    {    0,            10/256.,     (134-128)/256.,      (132-128)/256. },
+    { 0.23,            21/256.,     (137-128)/256.,      (130-128)/256. },
+    { 0.45,            35/256.,     (134-128)/256.,      (134-128)/256. },
+    { 0.57,            51/256.,     (130-128)/256.,      (139-128)/256. },
+    { 0.67,           104/256.,     (116-128)/256.,      (162-128)/256. },
+    { 0.77,           120/256.,     (105-128)/256.,      (188-128)/256. },
+    { 0.87,           140/256.,     (105-128)/256.,      (188-128)/256. },
+    {    1,                  1,                  0,                   0 }},
+    [FIRE] = {
+    {    0,                  0,                  0,                   0 },
+    { 0.23,            44/256.,     (132-128)/256.,      (127-128)/256. },
+    { 0.45,            62/256.,     (116-128)/256.,      (140-128)/256. },
+    { 0.57,            75/256.,     (105-128)/256.,      (152-128)/256. },
+    { 0.67,            95/256.,      (91-128)/256.,      (166-128)/256. },
+    { 0.77,           126/256.,      (74-128)/256.,      (172-128)/256. },
+    { 0.87,           164/256.,      (73-128)/256.,      (162-128)/256. },
+    {    1,                  1,                  0,                   0 }},
+    [FIERY] = {
+    {    0,                  0,                  0,                   0 },
+    { 0.23,            36/256.,     (116-128)/256.,      (163-128)/256. },
+    { 0.45,            52/256.,     (102-128)/256.,      (200-128)/256. },
+    { 0.57,           116/256.,      (84-128)/256.,      (196-128)/256. },
+    { 0.67,           157/256.,      (67-128)/256.,      (181-128)/256. },
+    { 0.77,           193/256.,      (40-128)/256.,      (155-128)/256. },
+    { 0.87,           221/256.,     (101-128)/256.,      (134-128)/256. },
+    {    1,                  1,                  0,                   0 }},
+    [FRUIT] = {
+    {    0,                  0,                  0,                   0 },
+    { 0.20,            29/256.,     (136-128)/256.,      (119-128)/256. },
+    { 0.30,            60/256.,     (119-128)/256.,       (90-128)/256. },
+    { 0.40,            85/256.,      (91-128)/256.,       (85-128)/256. },
+    { 0.50,           116/256.,      (70-128)/256.,      (105-128)/256. },
+    { 0.60,           151/256.,      (50-128)/256.,      (146-128)/256. },
+    { 0.70,           191/256.,      (63-128)/256.,      (178-128)/256. },
+    {    1,            98/256.,      (80-128)/256.,      (221-128)/256. }},
+    [COOL] = {
+    {    0,                  0,                  0,                   0 },
+    {  .15,                  0,                 .5,                 -.5 },
+    {    1,                  1,                -.5,                  .5 }},
 };
 
 static av_cold void uninit(AVFilterContext *ctx)
@@ -111,12 +223,25 @@ static av_cold void uninit(AVFilterContext *ctx)
     int i;
 
     av_freep(&s->combine_buffer);
-    av_rdft_end(s->rdft);
-    for (i = 0; i < s->nb_display_channels; i++)
-        av_freep(&s->rdft_data[i]);
-    av_freep(&s->rdft_data);
+    av_fft_end(s->fft);
+    if (s->fft_data) {
+        for (i = 0; i < s->nb_display_channels; i++)
+            av_freep(&s->fft_data[i]);
+    }
+    av_freep(&s->fft_data);
     av_freep(&s->window_func_lut);
+    if (s->magnitudes) {
+        for (i = 0; i < s->nb_display_channels; i++)
+            av_freep(&s->magnitudes[i]);
+    }
+    av_freep(&s->magnitudes);
     av_frame_free(&s->outpicref);
+    av_audio_fifo_free(s->fifo);
+    if (s->phases) {
+        for (i = 0; i < s->nb_display_channels; i++)
+            av_freep(&s->phases[i]);
+    }
+    av_freep(&s->phases);
 }
 
 static int query_formats(AVFilterContext *ctx)
@@ -125,30 +250,27 @@ static int query_formats(AVFilterContext *ctx)
     AVFilterChannelLayouts *layouts = NULL;
     AVFilterLink *inlink = ctx->inputs[0];
     AVFilterLink *outlink = ctx->outputs[0];
-    static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_NONE };
-    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_NONE };
+    static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
+    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_NONE };
+    int ret;
 
     /* set input audio formats */
     formats = ff_make_format_list(sample_fmts);
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &inlink->out_formats);
+    if ((ret = ff_formats_ref(formats, &inlink->out_formats)) < 0)
+        return ret;
 
     layouts = ff_all_channel_layouts();
-    if (!layouts)
-        return AVERROR(ENOMEM);
-    ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
+    if ((ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts)) < 0)
+        return ret;
 
     formats = ff_all_samplerates();
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &inlink->out_samplerates);
+    if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0)
+        return ret;
 
     /* set output video format */
     formats = ff_make_format_list(pix_fmts);
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &outlink->in_formats);
+    if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
+        return ret;
 
     return 0;
 }
@@ -158,82 +280,103 @@ static int config_output(AVFilterLink *outlink)
     AVFilterContext *ctx = outlink->src;
     AVFilterLink *inlink = ctx->inputs[0];
     ShowSpectrumContext *s = ctx->priv;
-    int i, rdft_bits, win_size, h;
+    int i, fft_bits, h, w;
+    float overlap;
+
+    if (!strcmp(ctx->filter->name, "showspectrumpic"))
+        s->single_pic = 1;
 
     outlink->w = s->w;
     outlink->h = s->h;
 
-    h = (s->mode == COMBINED) ? outlink->h : outlink->h / inlink->channels;
-    s->channel_height = h;
+    if (s->legend) {
+        s->start_x = log10(inlink->sample_rate) * 25;
+        s->start_y = 64;
+        outlink->w += s->start_x * 2;
+        outlink->h += s->start_y * 2;
+    }
 
-    /* RDFT window size (precision) according to the requested output frame height */
-    for (rdft_bits = 1; 1 << rdft_bits < 2 * h; rdft_bits++);
-    win_size = 1 << rdft_bits;
+    h = (s->mode == COMBINED || s->orientation == HORIZONTAL) ? s->h : s->h / inlink->channels;
+    w = (s->mode == COMBINED || s->orientation == VERTICAL)   ? s->w : s->w / inlink->channels;
+    s->channel_height = h;
+    s->channel_width  = w;
+
+    if (s->orientation == VERTICAL) {
+        /* FFT window size (precision) according to the requested output frame height */
+        for (fft_bits = 1; 1 << fft_bits < 2 * h; fft_bits++);
+    } else {
+        /* FFT window size (precision) according to the requested output frame width */
+        for (fft_bits = 1; 1 << fft_bits < 2 * w; fft_bits++);
+    }
+    s->win_size = 1 << fft_bits;
 
     /* (re-)configuration if the video output changed (or first init) */
-    if (rdft_bits != s->rdft_bits) {
-        size_t rdft_size, rdft_listsize;
+    if (fft_bits != s->fft_bits) {
         AVFrame *outpicref;
 
-        av_rdft_end(s->rdft);
-        s->rdft = av_rdft_init(rdft_bits, DFT_R2C);
-        if (!s->rdft) {
-            av_log(ctx, AV_LOG_ERROR, "Unable to create RDFT context. "
+        av_fft_end(s->fft);
+        s->fft = av_fft_init(fft_bits, 0);
+        if (!s->fft) {
+            av_log(ctx, AV_LOG_ERROR, "Unable to create FFT context. "
                    "The window size might be too high.\n");
             return AVERROR(EINVAL);
         }
-        s->rdft_bits = rdft_bits;
+        s->fft_bits = fft_bits;
 
-        /* RDFT buffers: x2 for each (display) channel buffer.
+        /* FFT buffers: x2 for each (display) channel buffer.
          * Note: we use free and malloc instead of a realloc-like function to
          * make sure the buffer is aligned in memory for the FFT functions. */
         for (i = 0; i < s->nb_display_channels; i++)
-            av_freep(&s->rdft_data[i]);
-        av_freep(&s->rdft_data);
+            av_freep(&s->fft_data[i]);
+        av_freep(&s->fft_data);
         s->nb_display_channels = inlink->channels;
 
-        if (av_size_mult(sizeof(*s->rdft_data),
-                         s->nb_display_channels, &rdft_listsize) < 0)
-            return AVERROR(EINVAL);
-        if (av_size_mult(sizeof(**s->rdft_data),
-                         win_size, &rdft_size) < 0)
-            return AVERROR(EINVAL);
-        s->rdft_data = av_malloc(rdft_listsize);
-        if (!s->rdft_data)
+        s->magnitudes = av_calloc(s->nb_display_channels, sizeof(*s->magnitudes));
+        if (!s->magnitudes)
+            return AVERROR(ENOMEM);
+        for (i = 0; i < s->nb_display_channels; i++) {
+            s->magnitudes[i] = av_calloc(s->orientation == VERTICAL ? s->h : s->w, sizeof(**s->magnitudes));
+            if (!s->magnitudes[i])
+                return AVERROR(ENOMEM);
+        }
+
+        s->phases = av_calloc(s->nb_display_channels, sizeof(*s->magnitudes));
+        if (!s->phases)
             return AVERROR(ENOMEM);
         for (i = 0; i < s->nb_display_channels; i++) {
-            s->rdft_data[i] = av_malloc(rdft_size);
-            if (!s->rdft_data[i])
+            s->phases[i] = av_calloc(s->orientation == VERTICAL ? s->h : s->w, sizeof(**s->phases));
+            if (!s->phases[i])
+                return AVERROR(ENOMEM);
+        }
+
+        s->fft_data = av_calloc(s->nb_display_channels, sizeof(*s->fft_data));
+        if (!s->fft_data)
+            return AVERROR(ENOMEM);
+        for (i = 0; i < s->nb_display_channels; i++) {
+            s->fft_data[i] = av_calloc(s->win_size, sizeof(**s->fft_data));
+            if (!s->fft_data[i])
                 return AVERROR(ENOMEM);
         }
 
         /* pre-calc windowing function */
         s->window_func_lut =
-            av_realloc_f(s->window_func_lut, win_size,
+            av_realloc_f(s->window_func_lut, s->win_size,
                          sizeof(*s->window_func_lut));
         if (!s->window_func_lut)
             return AVERROR(ENOMEM);
-        switch (s->win_func) {
-        case WFUNC_NONE:
-            for (i = 0; i < win_size; i++)
-                s->window_func_lut[i] = 1.;
-            break;
-        case WFUNC_HANN:
-            for (i = 0; i < win_size; i++)
-                s->window_func_lut[i] = .5f * (1 - cos(2*M_PI*i / (win_size-1)));
-            break;
-        case WFUNC_HAMMING:
-            for (i = 0; i < win_size; i++)
-                s->window_func_lut[i] = .54f - .46f * cos(2*M_PI*i / (win_size-1));
-            break;
-        case WFUNC_BLACKMAN: {
-            for (i = 0; i < win_size; i++)
-                s->window_func_lut[i] = .42f - .5f*cos(2*M_PI*i / (win_size-1)) + .08f*cos(4*M_PI*i / (win_size-1));
-            break;
+        ff_generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap);
+        if (s->overlap == 1)
+            s->overlap = overlap;
+        s->hop_size = (1. - s->overlap) * s->win_size;
+        if (s->hop_size < 1) {
+            av_log(ctx, AV_LOG_ERROR, "overlap %f too big\n", s->overlap);
+            return AVERROR(EINVAL);
         }
-        default:
-            av_assert0(0);
+
+        for (s->win_scale = 0, i = 0; i < s->win_size; i++) {
+            s->win_scale += s->window_func_lut[i] * s->window_func_lut[i];
         }
+        s->win_scale = 1. / sqrt(s->win_scale);
 
         /* prepare the initial picref buffer (black frame) */
         av_frame_free(&s->outpicref);
@@ -247,257 +390,447 @@ static int config_output(AVFilterLink *outlink)
             memset(outpicref->data[1] + i * outpicref->linesize[1], 128, outlink->w);
             memset(outpicref->data[2] + i * outpicref->linesize[2], 128, outlink->w);
         }
+        av_frame_set_color_range(outpicref, AVCOL_RANGE_JPEG);
     }
 
-    if (s->xpos >= outlink->w)
+    if ((s->orientation == VERTICAL   && s->xpos >= s->w) ||
+        (s->orientation == HORIZONTAL && s->xpos >= s->h))
         s->xpos = 0;
 
-    outlink->frame_rate = av_make_q(inlink->sample_rate, win_size);
-    if (s->sliding == FULLFRAME)
-        outlink->frame_rate.den *= outlink->w;
-
-    inlink->min_samples = inlink->max_samples = inlink->partial_buf_size =
-        win_size;
+    outlink->frame_rate = av_make_q(inlink->sample_rate, s->win_size * (1.-s->overlap));
+    if (s->orientation == VERTICAL && s->sliding == FULLFRAME)
+        outlink->frame_rate.den *= s->w;
+    if (s->orientation == HORIZONTAL && s->sliding == FULLFRAME)
+        outlink->frame_rate.den *= s->h;
+
+    if (s->orientation == VERTICAL) {
+        s->combine_buffer =
+            av_realloc_f(s->combine_buffer, s->h * 3,
+                         sizeof(*s->combine_buffer));
+    } else {
+        s->combine_buffer =
+            av_realloc_f(s->combine_buffer, s->w * 3,
+                         sizeof(*s->combine_buffer));
+    }
 
-    s->combine_buffer =
-        av_realloc_f(s->combine_buffer, outlink->h * 3,
-                     sizeof(*s->combine_buffer));
+    av_log(ctx, AV_LOG_VERBOSE, "s:%dx%d FFT window size:%d\n",
+           s->w, s->h, s->win_size);
 
-    av_log(ctx, AV_LOG_VERBOSE, "s:%dx%d RDFT window size:%d\n",
-           s->w, s->h, win_size);
+    av_audio_fifo_free(s->fifo);
+    s->fifo = av_audio_fifo_alloc(inlink->format, inlink->channels, s->win_size);
+    if (!s->fifo)
+        return AVERROR(ENOMEM);
     return 0;
 }
 
-static int request_frame(AVFilterLink *outlink)
+static void run_fft(ShowSpectrumContext *s, AVFrame *fin)
 {
-    ShowSpectrumContext *s = outlink->src->priv;
-    AVFilterLink *inlink = outlink->src->inputs[0];
-    unsigned i;
-    int ret;
+    int ch, n;
 
-    s->req_fullfilled = 0;
-    do {
-        ret = ff_request_frame(inlink);
-        if (ret == AVERROR_EOF && s->sliding == FULLFRAME && s->xpos > 0 &&
-            s->outpicref) {
-            for (i = 0; i < outlink->h; i++) {
-                memset(s->outpicref->data[0] + i * s->outpicref->linesize[0] + s->xpos,   0, outlink->w - s->xpos);
-                memset(s->outpicref->data[1] + i * s->outpicref->linesize[1] + s->xpos, 128, outlink->w - s->xpos);
-                memset(s->outpicref->data[2] + i * s->outpicref->linesize[2] + s->xpos, 128, outlink->w - s->xpos);
-            }
-            ret = ff_filter_frame(outlink, s->outpicref);
-            s->outpicref = NULL;
-            s->req_fullfilled = 1;
+    /* fill FFT input with the number of samples available */
+    for (ch = 0; ch < s->nb_display_channels; ch++) {
+        const float *p = (float *)fin->extended_data[ch];
+
+        for (n = 0; n < s->win_size; n++) {
+            s->fft_data[ch][n].re = p[n] * s->window_func_lut[n];
+            s->fft_data[ch][n].im = 0;
         }
-    } while (!s->req_fullfilled && ret >= 0);
+    }
 
-    return ret;
+    /* run FFT on each samples set */
+    for (ch = 0; ch < s->nb_display_channels; ch++) {
+        av_fft_permute(s->fft, s->fft_data[ch]);
+        av_fft_calc(s->fft, s->fft_data[ch]);
+    }
 }
 
-static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
+#define RE(y, ch) s->fft_data[ch][y].re
+#define IM(y, ch) s->fft_data[ch][y].im
+#define MAGNITUDE(y, ch) hypot(RE(y, ch), IM(y, ch))
+#define PHASE(y, ch) atan2(IM(y, ch), RE(y, ch))
+
+static void calc_magnitudes(ShowSpectrumContext *s)
 {
-    int ret;
-    AVFilterContext *ctx = inlink->dst;
-    AVFilterLink *outlink = ctx->outputs[0];
-    ShowSpectrumContext *s = ctx->priv;
-    AVFrame *outpicref = s->outpicref;
+    const double w = s->win_scale * (s->scale == LOG ? s->win_scale : 1);
+    int ch, y, h = s->orientation == VERTICAL ? s->h : s->w;
+    const float f = s->gain * w;
+
+    for (ch = 0; ch < s->nb_display_channels; ch++) {
+        float *magnitudes = s->magnitudes[ch];
+
+        for (y = 0; y < h; y++)
+            magnitudes[y] = MAGNITUDE(y, ch) * f;
+    }
+}
+
+static void calc_phases(ShowSpectrumContext *s)
+{
+    int ch, y, h = s->orientation == VERTICAL ? s->h : s->w;
 
-    /* nb_freq contains the power of two superior or equal to the output image
-     * height (or half the RDFT window size) */
-    const int nb_freq = 1 << (s->rdft_bits - 1);
-    const int win_size = nb_freq << 1;
-    const double w = 1. / (sqrt(nb_freq) * 32768.);
-    int h = s->channel_height;
+    for (ch = 0; ch < s->nb_display_channels; ch++) {
+        float *phases = s->phases[ch];
 
-    int ch, plane, n, y;
+        for (y = 0; y < h; y++)
+            phases[y] = (PHASE(y, ch) / M_PI + 1) / 2;
+    }
+}
 
-    av_assert0(insamples->nb_samples == win_size);
+static void acalc_magnitudes(ShowSpectrumContext *s)
+{
+    const double w = s->win_scale * (s->scale == LOG ? s->win_scale : 1);
+    int ch, y, h = s->orientation == VERTICAL ? s->h : s->w;
+    const float f = s->gain * w;
 
-    /* fill RDFT input with the number of samples available */
     for (ch = 0; ch < s->nb_display_channels; ch++) {
-        const int16_t *p = (int16_t *)insamples->extended_data[ch];
+        float *magnitudes = s->magnitudes[ch];
 
-        for (n = 0; n < win_size; n++)
-            s->rdft_data[ch][n] = p[n] * s->window_func_lut[n];
+        for (y = 0; y < h; y++)
+            magnitudes[y] += MAGNITUDE(y, ch) * f;
     }
+}
 
-    /* TODO reindent */
+static void scale_magnitudes(ShowSpectrumContext *s, float scale)
+{
+    int ch, y, h = s->orientation == VERTICAL ? s->h : s->w;
 
-        /* run RDFT on each samples set */
-        for (ch = 0; ch < s->nb_display_channels; ch++)
-            av_rdft_calc(s->rdft, s->rdft_data[ch]);
+    for (ch = 0; ch < s->nb_display_channels; ch++) {
+        float *magnitudes = s->magnitudes[ch];
 
-        /* fill a new spectrum column */
-#define RE(y, ch) s->rdft_data[ch][2 * (y) + 0]
-#define IM(y, ch) s->rdft_data[ch][2 * (y) + 1]
-#define MAGNITUDE(y, ch) hypot(RE(y, ch), IM(y, ch))
+        for (y = 0; y < h; y++)
+            magnitudes[y] *= scale;
+    }
+}
 
-        /* initialize buffer for combining to black */
-        for (y = 0; y < outlink->h; y++) {
-            s->combine_buffer[3 * y    ] = 0;
-            s->combine_buffer[3 * y + 1] = 127.5;
-            s->combine_buffer[3 * y + 2] = 127.5;
+static void color_range(ShowSpectrumContext *s, int ch,
+                        float *yf, float *uf, float *vf)
+{
+    switch (s->mode) {
+    case COMBINED:
+        // reduce range by channel count
+        *yf = 256.0f / s->nb_display_channels;
+        switch (s->color_mode) {
+        case RAINBOW:
+        case MORELAND:
+        case NEBULAE:
+        case FIRE:
+        case FIERY:
+        case FRUIT:
+        case COOL:
+        case INTENSITY:
+            *uf = *yf;
+            *vf = *yf;
+            break;
+        case CHANNEL:
+            /* adjust saturation for mixed UV coloring */
+            /* this factor is correct for infinite channels, an approximation otherwise */
+            *uf = *yf * M_PI;
+            *vf = *yf * M_PI;
+            break;
+        default:
+            av_assert0(0);
         }
+        break;
+    case SEPARATE:
+        // full range
+        *yf = 256.0f;
+        *uf = 256.0f;
+        *vf = 256.0f;
+        break;
+    default:
+        av_assert0(0);
+    }
 
-        for (ch = 0; ch < s->nb_display_channels; ch++) {
-            float yf, uf, vf;
-
-            /* decide color range */
-            switch (s->mode) {
-            case COMBINED:
-                // reduce range by channel count
-                yf = 256.0f / s->nb_display_channels;
-                switch (s->color_mode) {
-                case INTENSITY:
-                    uf = yf;
-                    vf = yf;
-                    break;
-                case CHANNEL:
-                    /* adjust saturation for mixed UV coloring */
-                    /* this factor is correct for infinite channels, an approximation otherwise */
-                    uf = yf * M_PI;
-                    vf = yf * M_PI;
-                    break;
-                default:
-                    av_assert0(0);
-                }
+    if (s->color_mode == CHANNEL) {
+        if (s->nb_display_channels > 1) {
+            *uf *= 0.5 * sin((2 * M_PI * ch) / s->nb_display_channels);
+            *vf *= 0.5 * cos((2 * M_PI * ch) / s->nb_display_channels);
+        } else {
+            *uf = 0.0f;
+            *vf = 0.0f;
+        }
+    }
+    *uf *= s->saturation;
+    *vf *= s->saturation;
+}
+
+static void pick_color(ShowSpectrumContext *s,
+                       float yf, float uf, float vf,
+                       float a, float *out)
+{
+    if (s->color_mode > CHANNEL) {
+        const int cm = s->color_mode;
+        float y, u, v;
+        int i;
+
+        for (i = 1; i < FF_ARRAY_ELEMS(color_table[cm]) - 1; i++)
+            if (color_table[cm][i].a >= a)
                 break;
-            case SEPARATE:
-                // full range
-                yf = 256.0f;
-                uf = 256.0f;
-                vf = 256.0f;
+        // i now is the first item >= the color
+        // now we know to interpolate between item i - 1 and i
+        if (a <= color_table[cm][i - 1].a) {
+            y = color_table[cm][i - 1].y;
+            u = color_table[cm][i - 1].u;
+            v = color_table[cm][i - 1].v;
+        } else if (a >= color_table[cm][i].a) {
+            y = color_table[cm][i].y;
+            u = color_table[cm][i].u;
+            v = color_table[cm][i].v;
+        } else {
+            float start = color_table[cm][i - 1].a;
+            float end = color_table[cm][i].a;
+            float lerpfrac = (a - start) / (end - start);
+            y = color_table[cm][i - 1].y * (1.0f - lerpfrac)
+              + color_table[cm][i].y * lerpfrac;
+            u = color_table[cm][i - 1].u * (1.0f - lerpfrac)
+              + color_table[cm][i].u * lerpfrac;
+            v = color_table[cm][i - 1].v * (1.0f - lerpfrac)
+              + color_table[cm][i].v * lerpfrac;
+        }
+
+        out[0] += y * yf;
+        out[1] += u * uf;
+        out[2] += v * vf;
+    } else {
+        out[0] += a * yf;
+        out[1] += a * uf;
+        out[2] += a * vf;
+    }
+}
+
+static void clear_combine_buffer(ShowSpectrumContext *s, int size)
+{
+    int y;
+
+    for (y = 0; y < size; y++) {
+        s->combine_buffer[3 * y    ] = 0;
+        s->combine_buffer[3 * y + 1] = 127.5;
+        s->combine_buffer[3 * y + 2] = 127.5;
+    }
+}
+
+static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
+{
+    int ret;
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    ShowSpectrumContext *s = ctx->priv;
+    AVFrame *outpicref = s->outpicref;
+    int h = s->orientation == VERTICAL ? s->channel_height : s->channel_width;
+
+    int ch, plane, x, y;
+
+    /* fill a new spectrum column */
+    /* initialize buffer for combining to black */
+    clear_combine_buffer(s, s->orientation == VERTICAL ? s->h : s->w);
+
+    for (ch = 0; ch < s->nb_display_channels; ch++) {
+        float *magnitudes = s->magnitudes[ch];
+        float *phases = s->phases[ch];
+        float yf, uf, vf;
+
+        /* decide color range */
+        color_range(s, ch, &yf, &uf, &vf);
+
+        /* draw the channel */
+        for (y = 0; y < h; y++) {
+            int row = (s->mode == COMBINED) ? y : ch * h + y;
+            float *out = &s->combine_buffer[3 * row];
+            float a;
+
+            switch (s->data) {
+            case D_MAGNITUDE:
+                /* get magnitude */
+                a = magnitudes[y];
+                break;
+            case D_PHASE:
+                /* get phase */
+                a = phases[y];
                 break;
             default:
                 av_assert0(0);
             }
 
-            if (s->color_mode == CHANNEL) {
-                if (s->nb_display_channels > 1) {
-                    uf *= 0.5 * sin((2 * M_PI * ch) / s->nb_display_channels);
-                    vf *= 0.5 * cos((2 * M_PI * ch) / s->nb_display_channels);
-                } else {
-                    uf = 0.0f;
-                    vf = 0.0f;
-                }
+            /* apply scale */
+            switch (s->scale) {
+            case LINEAR:
+                a = av_clipf(a, 0, 1);
+                break;
+            case SQRT:
+                a = av_clipf(sqrt(a), 0, 1);
+                break;
+            case CBRT:
+                a = av_clipf(cbrt(a), 0, 1);
+                break;
+            case FOURTHRT:
+                a = av_clipf(sqrt(sqrt(a)), 0, 1);
+                break;
+            case FIFTHRT:
+                a = av_clipf(pow(a, 0.20), 0, 1);
+                break;
+            case LOG:
+                a = 1 + log10(av_clipd(a, 1e-6, 1)) / 6; // zero = -120dBFS
+                break;
+            default:
+                av_assert0(0);
             }
-            uf *= s->saturation;
-            vf *= s->saturation;
-
-            /* draw the channel */
-            for (y = 0; y < h; y++) {
-                int row = (s->mode == COMBINED) ? y : ch * h + y;
-                float *out = &s->combine_buffer[3 * row];
-
-                /* get magnitude */
-                float a = w * MAGNITUDE(y, ch);
-
-                /* apply scale */
-                switch (s->scale) {
-                case LINEAR:
-                    break;
-                case SQRT:
-                    a = sqrt(a);
-                    break;
-                case CBRT:
-                    a = cbrt(a);
-                    break;
-                case LOG:
-                    a = 1 - log(FFMAX(FFMIN(1, a), 1e-6)) / log(1e-6); // zero = -120dBFS
-                    break;
-                default:
-                    av_assert0(0);
-                }
 
-                if (s->color_mode == INTENSITY) {
-                    float y, u, v;
-                    int i;
-
-                    for (i = 1; i < sizeof(intensity_color_table) / sizeof(*intensity_color_table) - 1; i++)
-                        if (intensity_color_table[i].a >= a)
-                            break;
-                    // i now is the first item >= the color
-                    // now we know to interpolate between item i - 1 and i
-                    if (a <= intensity_color_table[i - 1].a) {
-                        y = intensity_color_table[i - 1].y;
-                        u = intensity_color_table[i - 1].u;
-                        v = intensity_color_table[i - 1].v;
-                    } else if (a >= intensity_color_table[i].a) {
-                        y = intensity_color_table[i].y;
-                        u = intensity_color_table[i].u;
-                        v = intensity_color_table[i].v;
-                    } else {
-                        float start = intensity_color_table[i - 1].a;
-                        float end = intensity_color_table[i].a;
-                        float lerpfrac = (a - start) / (end - start);
-                        y = intensity_color_table[i - 1].y * (1.0f - lerpfrac)
-                          + intensity_color_table[i].y * lerpfrac;
-                        u = intensity_color_table[i - 1].u * (1.0f - lerpfrac)
-                          + intensity_color_table[i].u * lerpfrac;
-                        v = intensity_color_table[i - 1].v * (1.0f - lerpfrac)
-                          + intensity_color_table[i].v * lerpfrac;
-                    }
-
-                    out[0] += y * yf;
-                    out[1] += u * uf;
-                    out[2] += v * vf;
-                } else {
-                    out[0] += a * yf;
-                    out[1] += a * uf;
-                    out[2] += a * vf;
-                }
-            }
+            pick_color(s, yf, uf, vf, a, out);
         }
+    }
 
-        /* copy to output */
+    av_frame_make_writable(s->outpicref);
+    /* copy to output */
+    if (s->orientation == VERTICAL) {
         if (s->sliding == SCROLL) {
             for (plane = 0; plane < 3; plane++) {
-                for (y = 0; y < outlink->h; y++) {
+                for (y = 0; y < s->h; y++) {
                     uint8_t *p = outpicref->data[plane] +
                                  y * outpicref->linesize[plane];
-                    memmove(p, p + 1, outlink->w - 1);
+                    memmove(p, p + 1, s->w - 1);
                 }
             }
-            s->xpos = outlink->w - 1;
+            s->xpos = s->w - 1;
+        } else if (s->sliding == RSCROLL) {
+            for (plane = 0; plane < 3; plane++) {
+                for (y = 0; y < s->h; y++) {
+                    uint8_t *p = outpicref->data[plane] +
+                                 y * outpicref->linesize[plane];
+                    memmove(p + 1, p, s->w - 1);
+                }
+            }
+            s->xpos = 0;
         }
         for (plane = 0; plane < 3; plane++) {
-            uint8_t *p = outpicref->data[plane] +
-                         (outlink->h - 1) * outpicref->linesize[plane] +
+            uint8_t *p = outpicref->data[plane] + s->start_x +
+                         (outlink->h - 1 - s->start_y) * outpicref->linesize[plane] +
                          s->xpos;
-            for (y = 0; y < outlink->h; y++) {
-                *p = rint(FFMAX(0, FFMIN(s->combine_buffer[3 * y + plane], 255)));
+            for (y = 0; y < s->h; y++) {
+                *p = lrintf(av_clipf(s->combine_buffer[3 * y + plane], 0, 255));
                 p -= outpicref->linesize[plane];
             }
         }
+    } else {
+        if (s->sliding == SCROLL) {
+            for (plane = 0; plane < 3; plane++) {
+                for (y = 1; y < s->h; y++) {
+                    memmove(outpicref->data[plane] + (y-1) * outpicref->linesize[plane],
+                            outpicref->data[plane] + (y  ) * outpicref->linesize[plane],
+                            s->w);
+                }
+            }
+            s->xpos = s->h - 1;
+        } else if (s->sliding == RSCROLL) {
+            for (plane = 0; plane < 3; plane++) {
+                for (y = s->h - 1; y >= 1; y--) {
+                    memmove(outpicref->data[plane] + (y  ) * outpicref->linesize[plane],
+                            outpicref->data[plane] + (y-1) * outpicref->linesize[plane],
+                            s->w);
+                }
+            }
+            s->xpos = 0;
+        }
+        for (plane = 0; plane < 3; plane++) {
+            uint8_t *p = outpicref->data[plane] + s->start_x +
+                         (s->xpos + s->start_y) * outpicref->linesize[plane];
+            for (x = 0; x < s->w; x++) {
+                *p = lrintf(av_clipf(s->combine_buffer[3 * x + plane], 0, 255));
+                p++;
+            }
+        }
+    }
 
-        if (s->sliding != FULLFRAME || s->xpos == 0)
-            outpicref->pts = insamples->pts;
+    if (s->sliding != FULLFRAME || s->xpos == 0)
+        outpicref->pts = insamples->pts;
 
-        s->xpos++;
-        if (s->xpos >= outlink->w)
-            s->xpos = 0;
-        if (s->sliding != FULLFRAME || s->xpos == 0) {
-            s->req_fullfilled = 1;
-            ret = ff_filter_frame(outlink, av_frame_clone(s->outpicref));
-            if (ret < 0)
-                return ret;
+    s->xpos++;
+    if (s->orientation == VERTICAL && s->xpos >= s->w)
+        s->xpos = 0;
+    if (s->orientation == HORIZONTAL && s->xpos >= s->h)
+        s->xpos = 0;
+    if (!s->single_pic && (s->sliding != FULLFRAME || s->xpos == 0)) {
+        ret = ff_filter_frame(outlink, av_frame_clone(s->outpicref));
+        if (ret < 0)
+            return ret;
+    }
+
+    return s->win_size;
+}
+
+#if CONFIG_SHOWSPECTRUM_FILTER
+
+static int request_frame(AVFilterLink *outlink)
+{
+    ShowSpectrumContext *s = outlink->src->priv;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    unsigned i;
+    int ret;
+
+    ret = ff_request_frame(inlink);
+    if (ret == AVERROR_EOF && s->sliding == FULLFRAME && s->xpos > 0 &&
+        s->outpicref) {
+        if (s->orientation == VERTICAL) {
+            for (i = 0; i < outlink->h; i++) {
+                memset(s->outpicref->data[0] + i * s->outpicref->linesize[0] + s->xpos,   0, outlink->w - s->xpos);
+                memset(s->outpicref->data[1] + i * s->outpicref->linesize[1] + s->xpos, 128, outlink->w - s->xpos);
+                memset(s->outpicref->data[2] + i * s->outpicref->linesize[2] + s->xpos, 128, outlink->w - s->xpos);
+            }
+        } else {
+            for (i = s->xpos; i < outlink->h; i++) {
+                memset(s->outpicref->data[0] + i * s->outpicref->linesize[0],   0, outlink->w);
+                memset(s->outpicref->data[1] + i * s->outpicref->linesize[1], 128, outlink->w);
+                memset(s->outpicref->data[2] + i * s->outpicref->linesize[2], 128, outlink->w);
+            }
         }
+        ret = ff_filter_frame(outlink, s->outpicref);
+        s->outpicref = NULL;
+    }
 
-    return win_size;
+    return ret;
 }
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
 {
     AVFilterContext *ctx = inlink->dst;
     ShowSpectrumContext *s = ctx->priv;
-    unsigned win_size = 1 << s->rdft_bits;
-    int ret = 0;
+    AVFrame *fin = NULL;
+    int ret = 0, consumed = 0;
 
-    av_assert0(insamples->nb_samples <= win_size);
-    if (insamples->nb_samples == win_size)
-        ret = plot_spectrum_column(inlink, insamples);
+    if (s->pts == AV_NOPTS_VALUE)
+        s->pts = insamples->pts - av_audio_fifo_size(s->fifo);
 
+    av_audio_fifo_write(s->fifo, (void **)insamples->extended_data, insamples->nb_samples);
     av_frame_free(&insamples);
+    while (av_audio_fifo_size(s->fifo) >= s->win_size) {
+        fin = ff_get_audio_buffer(inlink, s->win_size);
+        if (!fin) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        fin->pts = s->pts + consumed;
+        consumed += s->hop_size;
+        ret = av_audio_fifo_peek(s->fifo, (void **)fin->extended_data, s->win_size);
+        if (ret < 0)
+            goto fail;
+
+        av_assert0(fin->nb_samples == s->win_size);
+
+        run_fft(s, fin);
+        if (s->data == D_MAGNITUDE)
+            calc_magnitudes(s);
+        if (s->data == D_PHASE)
+            calc_phases(s);
+
+        ret = plot_spectrum_column(inlink, fin);
+        av_frame_free(&fin);
+        av_audio_fifo_drain(s->fifo, s->hop_size);
+        if (ret < 0)
+            goto fail;
+    }
+
+fail:
+    s->pts = AV_NOPTS_VALUE;
+    av_frame_free(&fin);
     return ret;
 }
 
@@ -530,3 +863,366 @@ AVFilter ff_avf_showspectrum = {
     .outputs       = showspectrum_outputs,
     .priv_class    = &showspectrum_class,
 };
+#endif // CONFIG_SHOWSPECTRUM_FILTER
+
+#if CONFIG_SHOWSPECTRUMPIC_FILTER
+
+static const AVOption showspectrumpic_options[] = {
+    { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "4096x2048"}, 0, 0, FLAGS },
+    { "s",    "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "4096x2048"}, 0, 0, FLAGS },
+    { "mode", "set channel display mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=COMBINED}, 0, NB_MODES-1, FLAGS, "mode" },
+        { "combined", "combined mode", 0, AV_OPT_TYPE_CONST, {.i64=COMBINED}, 0, 0, FLAGS, "mode" },
+        { "separate", "separate mode", 0, AV_OPT_TYPE_CONST, {.i64=SEPARATE}, 0, 0, FLAGS, "mode" },
+    { "color", "set channel coloring", OFFSET(color_mode), AV_OPT_TYPE_INT, {.i64=INTENSITY}, 0, NB_CLMODES-1, FLAGS, "color" },
+        { "channel",   "separate color for each channel", 0, AV_OPT_TYPE_CONST, {.i64=CHANNEL},   0, 0, FLAGS, "color" },
+        { "intensity", "intensity based coloring",        0, AV_OPT_TYPE_CONST, {.i64=INTENSITY}, 0, 0, FLAGS, "color" },
+        { "rainbow",   "rainbow based coloring",          0, AV_OPT_TYPE_CONST, {.i64=RAINBOW},   0, 0, FLAGS, "color" },
+        { "moreland",  "moreland based coloring",         0, AV_OPT_TYPE_CONST, {.i64=MORELAND},  0, 0, FLAGS, "color" },
+        { "nebulae",   "nebulae based coloring",          0, AV_OPT_TYPE_CONST, {.i64=NEBULAE},   0, 0, FLAGS, "color" },
+        { "fire",      "fire based coloring",             0, AV_OPT_TYPE_CONST, {.i64=FIRE},      0, 0, FLAGS, "color" },
+        { "fiery",     "fiery based coloring",            0, AV_OPT_TYPE_CONST, {.i64=FIERY},     0, 0, FLAGS, "color" },
+        { "fruit",     "fruit based coloring",            0, AV_OPT_TYPE_CONST, {.i64=FRUIT},     0, 0, FLAGS, "color" },
+        { "cool",      "cool based coloring",             0, AV_OPT_TYPE_CONST, {.i64=COOL},      0, 0, FLAGS, "color" },
+    { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=LOG}, 0, NB_SCALES-1, FLAGS, "scale" },
+        { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT},   0, 0, FLAGS, "scale" },
+        { "cbrt", "cubic root",  0, AV_OPT_TYPE_CONST, {.i64=CBRT},   0, 0, FLAGS, "scale" },
+        { "4thrt","4th root",    0, AV_OPT_TYPE_CONST, {.i64=FOURTHRT}, 0, 0, FLAGS, "scale" },
+        { "5thrt","5th root",    0, AV_OPT_TYPE_CONST, {.i64=FIFTHRT},  0, 0, FLAGS, "scale" },
+        { "log",  "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=LOG},    0, 0, FLAGS, "scale" },
+        { "lin",  "linear",      0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
+    { "saturation", "color saturation multiplier", OFFSET(saturation), AV_OPT_TYPE_FLOAT, {.dbl = 1}, -10, 10, FLAGS },
+    { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64 = WFUNC_HANNING}, 0, NB_WFUNC-1, FLAGS, "win_func" },
+        { "rect",     "Rectangular",      0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT},     0, 0, FLAGS, "win_func" },
+        { "bartlett", "Bartlett",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, FLAGS, "win_func" },
+        { "hann",     "Hann",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, FLAGS, "win_func" },
+        { "hanning",  "Hanning",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, FLAGS, "win_func" },
+        { "hamming",  "Hamming",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING},  0, 0, FLAGS, "win_func" },
+        { "blackman", "Blackman",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BLACKMAN}, 0, 0, FLAGS, "win_func" },
+        { "welch",    "Welch",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_WELCH},    0, 0, FLAGS, "win_func" },
+        { "flattop",  "Flat-top",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_FLATTOP},  0, 0, FLAGS, "win_func" },
+        { "bharris",  "Blackman-Harris",  0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHARRIS},  0, 0, FLAGS, "win_func" },
+        { "bnuttall", "Blackman-Nuttall", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BNUTTALL}, 0, 0, FLAGS, "win_func" },
+        { "bhann",    "Bartlett-Hann",    0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHANN},    0, 0, FLAGS, "win_func" },
+        { "sine",     "Sine",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE},     0, 0, FLAGS, "win_func" },
+        { "nuttall",  "Nuttall",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_NUTTALL},  0, 0, FLAGS, "win_func" },
+        { "lanczos",  "Lanczos",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_LANCZOS},  0, 0, FLAGS, "win_func" },
+        { "gauss",    "Gauss",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_GAUSS},    0, 0, FLAGS, "win_func" },
+        { "tukey",    "Tukey",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_TUKEY},    0, 0, FLAGS, "win_func" },
+    { "orientation", "set orientation", OFFSET(orientation), AV_OPT_TYPE_INT, {.i64=VERTICAL}, 0, NB_ORIENTATIONS-1, FLAGS, "orientation" },
+        { "vertical",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=VERTICAL},   0, 0, FLAGS, "orientation" },
+        { "horizontal", NULL, 0, AV_OPT_TYPE_CONST, {.i64=HORIZONTAL}, 0, 0, FLAGS, "orientation" },
+    { "gain", "set scale gain", OFFSET(gain), AV_OPT_TYPE_FLOAT, {.dbl = 1}, 0, 128, FLAGS },
+    { "legend", "draw legend", OFFSET(legend), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(showspectrumpic);
+
+static void drawtext(AVFrame *pic, int x, int y, const char *txt, int o)
+{
+    const uint8_t *font;
+    int font_height;
+    int i;
+
+    font = avpriv_cga_font,   font_height =  8;
+
+    for (i = 0; txt[i]; i++) {
+        int char_y, mask;
+
+        if (o) {
+            for (char_y = font_height - 1; char_y >= 0; char_y--) {
+                uint8_t *p = pic->data[0] + (y + i * 10) * pic->linesize[0] + x;
+                for (mask = 0x80; mask; mask >>= 1) {
+                    if (font[txt[i] * font_height + font_height - 1 - char_y] & mask)
+                        p[char_y] = ~p[char_y];
+                    p += pic->linesize[0];
+                }
+            }
+        } else {
+            uint8_t *p = pic->data[0] + y*pic->linesize[0] + (x + i*8);
+            for (char_y = 0; char_y < font_height; char_y++) {
+                for (mask = 0x80; mask; mask >>= 1) {
+                    if (font[txt[i] * font_height + char_y] & mask)
+                        *p = ~(*p);
+                    p++;
+                }
+                p += pic->linesize[0] - 8;
+            }
+        }
+    }
+}
+
+static int showspectrumpic_request_frame(AVFilterLink *outlink)
+{
+    ShowSpectrumContext *s = outlink->src->priv;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    int ret;
+
+    ret = ff_request_frame(inlink);
+    if (ret == AVERROR_EOF && s->outpicref) {
+        int samples = av_audio_fifo_size(s->fifo);
+        int consumed = 0;
+        int y, x = 0, sz = s->orientation == VERTICAL ? s->w : s->h;
+        int ch, spf, spb;
+        AVFrame *fin;
+
+        spf = s->win_size * (samples / ((s->win_size * sz) * ceil(samples / (float)(s->win_size * sz))));
+        spb = (samples / (spf * sz)) * spf;
+
+        fin = ff_get_audio_buffer(inlink, s->win_size);
+        if (!fin)
+            return AVERROR(ENOMEM);
+
+        while (x < sz) {
+            ret = av_audio_fifo_peek(s->fifo, (void **)fin->extended_data, s->win_size);
+            if (ret < 0) {
+                av_frame_free(&fin);
+                return ret;
+            }
+
+            av_audio_fifo_drain(s->fifo, spf);
+
+            if (ret < s->win_size) {
+                for (ch = 0; ch < s->nb_display_channels; ch++) {
+                    memset(fin->extended_data[ch] + ret * sizeof(float), 0,
+                           (s->win_size - ret) * sizeof(float));
+                }
+            }
+
+            run_fft(s, fin);
+            acalc_magnitudes(s);
+
+            consumed += spf;
+            if (consumed >= spb) {
+                int h = s->orientation == VERTICAL ? s->h : s->w;
+
+                scale_magnitudes(s, 1. / (consumed / spf));
+                plot_spectrum_column(inlink, fin);
+                consumed = 0;
+                x++;
+                for (ch = 0; ch < s->nb_display_channels; ch++)
+                    memset(s->magnitudes[ch], 0, h * sizeof(float));
+            }
+        }
+
+        av_frame_free(&fin);
+        s->outpicref->pts = 0;
+
+        if (s->legend) {
+            int multi = (s->mode == SEPARATE && s->color_mode == CHANNEL);
+            float spp = samples / (float)sz;
+            uint8_t *dst;
+
+            drawtext(s->outpicref, 2, outlink->h - 10, "CREATED BY LIBAVFILTER", 0);
+
+            dst = s->outpicref->data[0] + (s->start_y - 1) * s->outpicref->linesize[0] + s->start_x - 1;
+            for (x = 0; x < s->w + 1; x++)
+                dst[x] = 200;
+            dst = s->outpicref->data[0] + (s->start_y + s->h) * s->outpicref->linesize[0] + s->start_x - 1;
+            for (x = 0; x < s->w + 1; x++)
+                dst[x] = 200;
+            for (y = 0; y < s->h + 2; y++) {
+                dst = s->outpicref->data[0] + (y + s->start_y - 1) * s->outpicref->linesize[0];
+                dst[s->start_x - 1] = 200;
+                dst[s->start_x + s->w] = 200;
+            }
+            if (s->orientation == VERTICAL) {
+                int h = s->mode == SEPARATE ? s->h / s->nb_display_channels : s->h;
+                for (ch = 0; ch < (s->mode == SEPARATE ? s->nb_display_channels : 1); ch++) {
+                    for (y = 0; y < h; y += 20) {
+                        dst = s->outpicref->data[0] + (s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[0];
+                        dst[s->start_x - 2] = 200;
+                        dst[s->start_x + s->w + 1] = 200;
+                    }
+                    for (y = 0; y < h; y += 40) {
+                        dst = s->outpicref->data[0] + (s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[0];
+                        dst[s->start_x - 3] = 200;
+                        dst[s->start_x + s->w + 2] = 200;
+                    }
+                    dst = s->outpicref->data[0] + (s->start_y - 2) * s->outpicref->linesize[0] + s->start_x;
+                    for (x = 0; x < s->w; x+=40)
+                        dst[x] = 200;
+                    dst = s->outpicref->data[0] + (s->start_y - 3) * s->outpicref->linesize[0] + s->start_x;
+                    for (x = 0; x < s->w; x+=80)
+                        dst[x] = 200;
+                    dst = s->outpicref->data[0] + (s->h + s->start_y + 1) * s->outpicref->linesize[0] + s->start_x;
+                    for (x = 0; x < s->w; x+=40) {
+                        dst[x] = 200;
+                    }
+                    dst = s->outpicref->data[0] + (s->h + s->start_y + 2) * s->outpicref->linesize[0] + s->start_x;
+                    for (x = 0; x < s->w; x+=80) {
+                        dst[x] = 200;
+                    }
+                    for (y = 0; y < h; y += 40) {
+                        float hz = y * (inlink->sample_rate / 2) / (float)(1 << (int)ceil(log2(h)));
+                        char *units;
+
+                        if (hz == 0)
+                            units = av_asprintf("DC");
+                        else
+                            units = av_asprintf("%.2f", hz);
+                        if (!units)
+                            return AVERROR(ENOMEM);
+
+                        drawtext(s->outpicref, s->start_x - 8 * strlen(units) - 4, h * (ch + 1) + s->start_y - y - 4, units, 0);
+                        av_free(units);
+                    }
+                }
+
+                for (x = 0; x < s->w; x+=80) {
+                    float seconds = x * spp / inlink->sample_rate;
+                    char *units;
+
+                    if (x == 0)
+                        units = av_asprintf("0");
+                    else if (log10(seconds) > 6)
+                        units = av_asprintf("%.2fh", seconds / (60 * 60));
+                    else if (log10(seconds) > 3)
+                        units = av_asprintf("%.2fm", seconds / 60);
+                    else
+                        units = av_asprintf("%.2fs", seconds);
+                    if (!units)
+                        return AVERROR(ENOMEM);
+
+                    drawtext(s->outpicref, s->start_x + x - 4 * strlen(units), s->h + s->start_y + 6, units, 0);
+                    drawtext(s->outpicref, s->start_x + x - 4 * strlen(units), s->start_y - 12, units, 0);
+                    av_free(units);
+                }
+
+                drawtext(s->outpicref, outlink->w / 2 - 4 * 4, outlink->h - s->start_y / 2, "TIME", 0);
+                drawtext(s->outpicref, s->start_x / 7, outlink->h / 2 - 14 * 4, "FREQUENCY (Hz)", 1);
+            } else {
+                int w = s->mode == SEPARATE ? s->w / s->nb_display_channels : s->w;
+                for (y = 0; y < s->h; y += 20) {
+                    dst = s->outpicref->data[0] + (s->start_y + y) * s->outpicref->linesize[0];
+                    dst[s->start_x - 2] = 200;
+                    dst[s->start_x + s->w + 1] = 200;
+                }
+                for (y = 0; y < s->h; y += 40) {
+                    dst = s->outpicref->data[0] + (s->start_y + y) * s->outpicref->linesize[0];
+                    dst[s->start_x - 3] = 200;
+                    dst[s->start_x + s->w + 2] = 200;
+                }
+                for (ch = 0; ch < (s->mode == SEPARATE ? s->nb_display_channels : 1); ch++) {
+                    dst = s->outpicref->data[0] + (s->start_y - 2) * s->outpicref->linesize[0] + s->start_x + w * ch;
+                    for (x = 0; x < w; x+=40)
+                        dst[x] = 200;
+                    dst = s->outpicref->data[0] + (s->start_y - 3) * s->outpicref->linesize[0] + s->start_x + w * ch;
+                    for (x = 0; x < w; x+=80)
+                        dst[x] = 200;
+                    dst = s->outpicref->data[0] + (s->h + s->start_y + 1) * s->outpicref->linesize[0] + s->start_x + w * ch;
+                    for (x = 0; x < w; x+=40) {
+                        dst[x] = 200;
+                    }
+                    dst = s->outpicref->data[0] + (s->h + s->start_y + 2) * s->outpicref->linesize[0] + s->start_x + w * ch;
+                    for (x = 0; x < w; x+=80) {
+                        dst[x] = 200;
+                    }
+                    for (x = 0; x < w; x += 80) {
+                        float hz = x * (inlink->sample_rate / 2) / (float)(1 << (int)ceil(log2(w)));
+                        char *units;
+
+                        if (hz == 0)
+                            units = av_asprintf("DC");
+                        else
+                            units = av_asprintf("%.2f", hz);
+                        if (!units)
+                            return AVERROR(ENOMEM);
+
+                        drawtext(s->outpicref, s->start_x - 4 * strlen(units) + x + w * ch, s->start_y - 12, units, 0);
+                        drawtext(s->outpicref, s->start_x - 4 * strlen(units) + x + w * ch, s->h + s->start_y + 6, units, 0);
+                        av_free(units);
+                    }
+                }
+                for (y = 0; y < s->h; y+=40) {
+                    float seconds = y * spp / inlink->sample_rate;
+                    char *units;
+
+                    if (x == 0)
+                        units = av_asprintf("0");
+                    else if (log10(seconds) > 6)
+                        units = av_asprintf("%.2fh", seconds / (60 * 60));
+                    else if (log10(seconds) > 3)
+                        units = av_asprintf("%.2fm", seconds / 60);
+                    else
+                        units = av_asprintf("%.2fs", seconds);
+                    if (!units)
+                        return AVERROR(ENOMEM);
+
+                    drawtext(s->outpicref, s->start_x - 8 * strlen(units) - 4, s->start_y + y - 4, units, 0);
+                    av_free(units);
+                }
+                drawtext(s->outpicref, s->start_x / 7, outlink->h / 2 - 4 * 4, "TIME", 1);
+                drawtext(s->outpicref, outlink->w / 2 - 14 * 4, outlink->h - s->start_y / 2, "FREQUENCY (Hz)", 0);
+            }
+
+            for (ch = 0; ch < (multi ? s->nb_display_channels : 1); ch++) {
+                int h = multi ? s->h / s->nb_display_channels : s->h;
+
+                for (y = 0; y < h; y++) {
+                    float out[3] = { 0., 127.5, 127.5};
+                    int chn;
+
+                    for (chn = 0; chn < (s->mode == SEPARATE ? 1 : s->nb_display_channels); chn++) {
+                        float yf, uf, vf;
+                        int channel = (multi) ? s->nb_display_channels - ch - 1 : chn;
+
+                        color_range(s, channel, &yf, &uf, &vf);
+                        pick_color(s, yf, uf, vf, y / (float)h, out);
+                    }
+                    memset(s->outpicref->data[0]+(s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[0] + s->w + s->start_x + 20, av_clip_uint8(out[0]), 10);
+                    memset(s->outpicref->data[1]+(s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[1] + s->w + s->start_x + 20, av_clip_uint8(out[1]), 10);
+                    memset(s->outpicref->data[2]+(s->start_y + h * (ch + 1) - y - 1) * s->outpicref->linesize[2] + s->w + s->start_x + 20, av_clip_uint8(out[2]), 10);
+                }
+            }
+        }
+
+        ret = ff_filter_frame(outlink, s->outpicref);
+        s->outpicref = NULL;
+    }
+
+    return ret;
+}
+
+static int showspectrumpic_filter_frame(AVFilterLink *inlink, AVFrame *insamples)
+{
+    AVFilterContext *ctx = inlink->dst;
+    ShowSpectrumContext *s = ctx->priv;
+    int ret;
+
+    ret = av_audio_fifo_write(s->fifo, (void **)insamples->extended_data, insamples->nb_samples);
+    av_frame_free(&insamples);
+    return ret;
+}
+
+static const AVFilterPad showspectrumpic_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = showspectrumpic_filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad showspectrumpic_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+        .request_frame = showspectrumpic_request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_avf_showspectrumpic = {
+    .name          = "showspectrumpic",
+    .description   = NULL_IF_CONFIG_SMALL("Convert input audio to a spectrum video output single picture."),
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(ShowSpectrumContext),
+    .inputs        = showspectrumpic_inputs,
+    .outputs       = showspectrumpic_outputs,
+    .priv_class    = &showspectrumpic_class,
+};
+
+#endif // CONFIG_SHOWSPECTRUMPIC_FILTER
diff --git a/libavfilter/avf_showvolume.c b/libavfilter/avf_showvolume.c
new file mode 100644
index 00000000..f7ccdf73
--- /dev/null
+++ b/libavfilter/avf_showvolume.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/eval.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/xga_font_data.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "audio.h"
+#include "video.h"
+#include "internal.h"
+
+static const char *const var_names[] = {   "VOLUME",   "CHANNEL",        NULL };
+enum                                   { VAR_VOLUME, VAR_CHANNEL, VAR_VARS_NB };
+
+typedef struct ShowVolumeContext {
+    const AVClass *class;
+    int w, h;
+    int b;
+    double f;
+    AVRational frame_rate;
+    char *color;
+
+    AVFrame *out;
+    AVExpr *c_expr;
+    int draw_text;
+    int draw_volume;
+    double *values;
+} ShowVolumeContext;
+
+#define OFFSET(x) offsetof(ShowVolumeContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption showvolume_options[] = {
+    { "rate", "set video rate",  OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, 0, FLAGS },
+    { "r",    "set video rate",  OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, 0, FLAGS },
+    { "b", "set border width",   OFFSET(b), AV_OPT_TYPE_INT, {.i64=1}, 0, 5, FLAGS },
+    { "w", "set channel width",  OFFSET(w), AV_OPT_TYPE_INT, {.i64=400}, 80, 1080, FLAGS },
+    { "h", "set channel height", OFFSET(h), AV_OPT_TYPE_INT, {.i64=20}, 1, 100, FLAGS },
+    { "f", "set fade",           OFFSET(f), AV_OPT_TYPE_DOUBLE, {.dbl=0.95}, 0.001, 1, FLAGS },
+    { "c", "set volume color expression", OFFSET(color), AV_OPT_TYPE_STRING, {.str="if(gte(VOLUME,-6), if(gte(VOLUME,-2), if(gte(VOLUME,-1), if(gt(VOLUME,0), 0xff0000ff, 0xff0066ff), 0xff00ffff),0xff00ff00),0xffff0000)"}, 0, 0, FLAGS },
+    { "t", "display channel names", OFFSET(draw_text), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
+    { "v", "display volume value", OFFSET(draw_volume), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(showvolume);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    ShowVolumeContext *s = ctx->priv;
+    int ret;
+
+    if (s->color) {
+        ret = av_expr_parse(&s->c_expr, s->color, var_names,
+                            NULL, NULL, NULL, NULL, 0, ctx);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
+    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGBA, AV_PIX_FMT_NONE };
+    int ret;
+
+    formats = ff_make_format_list(sample_fmts);
+    if ((ret = ff_formats_ref(formats, &inlink->out_formats)) < 0)
+        return ret;
+
+    layouts = ff_all_channel_layouts();
+    if ((ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts)) < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0)
+        return ret;
+
+    formats = ff_make_format_list(pix_fmts);
+    if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    ShowVolumeContext *s = ctx->priv;
+    int nb_samples;
+
+    nb_samples = FFMAX(1024, ((double)inlink->sample_rate / av_q2d(s->frame_rate)) + 0.5);
+    inlink->partial_buf_size =
+    inlink->min_samples =
+    inlink->max_samples = nb_samples;
+    s->values = av_calloc(inlink->channels * VAR_VARS_NB, sizeof(double));
+    if (!s->values)
+        return AVERROR(ENOMEM);
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    ShowVolumeContext *s = outlink->src->priv;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+
+    outlink->w = s->w;
+    outlink->h = s->h * inlink->channels + (inlink->channels - 1) * s->b;
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+    outlink->frame_rate = s->frame_rate;
+
+    return 0;
+}
+
+static void drawtext(AVFrame *pic, int x, int y, const char *txt)
+{
+    const uint8_t *font;
+    int font_height;
+    int i;
+
+    font = avpriv_cga_font,   font_height =  8;
+
+    for (i = 0; txt[i]; i++) {
+        int char_y, mask;
+        uint8_t *p = pic->data[0] + y*pic->linesize[0] + (x + i*8)*4;
+
+        for (char_y = 0; char_y < font_height; char_y++) {
+            for (mask = 0x80; mask; mask >>= 1) {
+                if (font[txt[i] * font_height + char_y] & mask)
+                    AV_WN32(p, ~AV_RN32(p));
+                p += 4;
+            }
+            p += pic->linesize[0] - 8*4;
+        }
+    }
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    ShowVolumeContext *s = ctx->priv;
+    int c, i, j, k;
+    AVFrame *out;
+
+    if (!s->out || s->out->width  != outlink->w ||
+                   s->out->height != outlink->h) {
+        av_frame_free(&s->out);
+        s->out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!s->out) {
+            av_frame_free(&insamples);
+            return AVERROR(ENOMEM);
+        }
+
+        for (i = 0; i < outlink->h; i++)
+            memset(s->out->data[0] + i * s->out->linesize[0], 0, outlink->w * 4);
+    }
+    s->out->pts = insamples->pts;
+
+    for (j = 0; j < outlink->h; j++) {
+        uint8_t *dst = s->out->data[0] + j * s->out->linesize[0];
+        for (k = 0; k < s->w; k++) {
+            dst[k * 4 + 0] = FFMAX(dst[k * 4 + 0] * s->f, 0);
+            dst[k * 4 + 1] = FFMAX(dst[k * 4 + 1] * s->f, 0);
+            dst[k * 4 + 2] = FFMAX(dst[k * 4 + 2] * s->f, 0);
+            dst[k * 4 + 3] = FFMAX(dst[k * 4 + 3] * s->f, 0);
+        }
+    }
+
+    for (c = 0; c < inlink->channels; c++) {
+        float *src = (float *)insamples->extended_data[c];
+        float max = 0;
+        uint32_t color;
+
+        for (i = 0; i < insamples->nb_samples; i++)
+            max = FFMAX(max, src[i]);
+
+        s->values[c * VAR_VARS_NB + VAR_VOLUME] = 20.0 * log10(max);
+        max = av_clipf(max, 0, 1);
+        s->values[c * VAR_VARS_NB + VAR_CHANNEL] = c;
+        color = av_expr_eval(s->c_expr, &s->values[c * VAR_VARS_NB], NULL);
+
+        for (j = 0; j < s->h; j++) {
+            uint8_t *dst = s->out->data[0] + (c * s->h + c * s->b + j) * s->out->linesize[0];
+
+            for (k = 0; k < s->w * max; k++)
+                AV_WN32A(dst + k * 4, color);
+        }
+
+        if (s->h >= 8 && s->draw_text)
+            drawtext(s->out, 2, c * (s->h + s->b) + (s->h - 8) / 2,
+                     av_get_channel_name(av_channel_layout_extract_channel(insamples->channel_layout, c)));
+    }
+
+    av_frame_free(&insamples);
+    out = av_frame_clone(s->out);
+    if (!out)
+        return AVERROR(ENOMEM);
+    av_frame_make_writable(out);
+
+    for (c = 0; c < inlink->channels && s->draw_volume; c++) {
+        if (s->h >= 8) {
+            char buf[16];
+
+            snprintf(buf, sizeof(buf), "%.2f", s->values[c * VAR_VARS_NB + VAR_VOLUME]);
+            drawtext(out, FFMAX(0, s->w - 8 * (int)strlen(buf)), c * (s->h + s->b) + (s->h - 8) / 2, buf);
+        }
+    }
+
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    ShowVolumeContext *s = ctx->priv;
+
+    av_frame_free(&s->out);
+    av_expr_free(s->c_expr);
+    av_freep(&s->values);
+}
+
+static const AVFilterPad showvolume_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad showvolume_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_avf_showvolume = {
+    .name          = "showvolume",
+    .description   = NULL_IF_CONFIG_SMALL("Convert input audio volume to video output."),
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(ShowVolumeContext),
+    .inputs        = showvolume_inputs,
+    .outputs       = showvolume_outputs,
+    .priv_class    = &showvolume_class,
+};
diff --git a/libavfilter/avf_showwaves.c b/libavfilter/avf_showwaves.c
index 57a6b2e1..269dc11f 100644
--- a/libavfilter/avf_showwaves.c
+++ b/libavfilter/avf_showwaves.c
@@ -24,6 +24,7 @@
  */
 
 #include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
@@ -41,6 +42,12 @@ enum ShowWavesMode {
     MODE_NB,
 };
 
+enum ShowWavesScale {
+    SCALE_LIN,
+    SCALE_LOG,
+    SCALE_NB,
+};
+
 struct frame_node {
     AVFrame *frame;
     struct frame_node *next;
@@ -50,16 +57,21 @@ typedef struct {
     const AVClass *class;
     int w, h;
     AVRational rate;
+    char *colors;
     int buf_idx;
     int16_t *buf_idy;    /* y coordinate of previous sample for each channel */
     AVFrame *outpicref;
-    int req_fullfilled;
     int n;
+    int pixstep;
     int sample_count_mod;
     int mode;                   ///< ShowWavesMode
+    int scale;                  ///< ShowWavesScale
     int split_channels;
+    uint8_t *fg;
+
+    int (*get_h)(int16_t sample, int height);
     void (*draw_sample)(uint8_t *buf, int height, int linesize,
-                        int16_t sample, int16_t *prev_y, int intensity);
+                        int16_t *prev_y, const uint8_t color[4], int h);
 
     /* single picture */
     int single_pic;
@@ -83,7 +95,11 @@ static const AVOption showwaves_options[] = {
     { "n",    "set how many samples to show in the same point", OFFSET(n), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
     { "rate", "set video rate", OFFSET(rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, 0, FLAGS },
     { "r",    "set video rate", OFFSET(rate), AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, 0, FLAGS },
-    { "split_channels", "draw channels separately", OFFSET(split_channels), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS },
+    { "split_channels", "draw channels separately", OFFSET(split_channels), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
+    { "colors", "set channels colors", OFFSET(colors), AV_OPT_TYPE_STRING, {.str = "red|green|blue|yellow|orange|lime|pink|magenta|brown" }, 0, 0, FLAGS },
+    { "scale", "set amplitude scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, SCALE_NB-1, FLAGS, .unit="scale" },
+        { "lin", "linear",         0, AV_OPT_TYPE_CONST, {.i64=SCALE_LIN}, .flags=FLAGS, .unit="scale"},
+        { "log", "logarithmic",    0, AV_OPT_TYPE_CONST, {.i64=SCALE_LOG}, .flags=FLAGS, .unit="scale"},
     { NULL }
 };
 
@@ -95,6 +111,7 @@ static av_cold void uninit(AVFilterContext *ctx)
 
     av_frame_free(&showwaves->outpicref);
     av_freep(&showwaves->buf_idy);
+    av_freep(&showwaves->fg);
 
     if (showwaves->single_pic) {
         struct frame_node *node = showwaves->audio_frames;
@@ -117,39 +134,183 @@ static int query_formats(AVFilterContext *ctx)
     AVFilterLink *inlink = ctx->inputs[0];
     AVFilterLink *outlink = ctx->outputs[0];
     static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE };
-    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE };
+    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGBA, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE };
+    int ret;
 
     /* set input audio formats */
     formats = ff_make_format_list(sample_fmts);
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &inlink->out_formats);
+    if ((ret = ff_formats_ref(formats, &inlink->out_formats)) < 0)
+        return ret;
 
     layouts = ff_all_channel_layouts();
-    if (!layouts)
-        return AVERROR(ENOMEM);
-    ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
+    if ((ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts)) < 0)
+        return ret;
 
     formats = ff_all_samplerates();
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &inlink->out_samplerates);
+    if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0)
+        return ret;
 
     /* set output video format */
     formats = ff_make_format_list(pix_fmts);
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &outlink->in_formats);
+    if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
+        return ret;
 
     return 0;
 }
 
+static int get_lin_h(int16_t sample, int height)
+{
+    return height/2 - av_rescale(sample, height/2, INT16_MAX);
+}
+
+static int get_lin_h2(int16_t sample, int height)
+{
+    return av_rescale(FFABS(sample), height, INT16_MAX);
+}
+
+static int get_log_h(int16_t sample, int height)
+{
+    return height/2 - FFSIGN(sample) * (log10(1 + FFABS(sample)) * (height/2) / log10(1 + INT16_MAX));
+}
+
+static int get_log_h2(int16_t sample, int height)
+{
+    return log10(1 + FFABS(sample)) * height / log10(1 + INT16_MAX);
+}
+
+static void draw_sample_point_rgba(uint8_t *buf, int height, int linesize,
+                                   int16_t *prev_y,
+                                   const uint8_t color[4], int h)
+{
+    if (h >= 0 && h < height) {
+        buf[h * linesize + 0] += color[0];
+        buf[h * linesize + 1] += color[1];
+        buf[h * linesize + 2] += color[2];
+        buf[h * linesize + 3] += color[3];
+    }
+}
+
+static void draw_sample_line_rgba(uint8_t *buf, int height, int linesize,
+                                  int16_t *prev_y,
+                                  const uint8_t color[4], int h)
+{
+    int k;
+    int start   = height/2;
+    int end     = av_clip(h, 0, height-1);
+    if (start > end)
+        FFSWAP(int16_t, start, end);
+    for (k = start; k < end; k++) {
+        buf[k * linesize + 0] += color[0];
+        buf[k * linesize + 1] += color[1];
+        buf[k * linesize + 2] += color[2];
+        buf[k * linesize + 3] += color[3];
+    }
+}
+
+static void draw_sample_p2p_rgba(uint8_t *buf, int height, int linesize,
+                                 int16_t *prev_y,
+                                 const uint8_t color[4], int h)
+{
+    int k;
+    if (h >= 0 && h < height) {
+        buf[h * linesize + 0] += color[0];
+        buf[h * linesize + 1] += color[1];
+        buf[h * linesize + 2] += color[2];
+        buf[h * linesize + 3] += color[3];
+        if (*prev_y && h != *prev_y) {
+            int start = *prev_y;
+            int end = av_clip(h, 0, height-1);
+            if (start > end)
+                FFSWAP(int16_t, start, end);
+            for (k = start + 1; k < end; k++) {
+                buf[k * linesize + 0] += color[0];
+                buf[k * linesize + 1] += color[1];
+                buf[k * linesize + 2] += color[2];
+                buf[k * linesize + 3] += color[3];
+            }
+        }
+    }
+    *prev_y = h;
+}
+
+static void draw_sample_cline_rgba(uint8_t *buf, int height, int linesize,
+                                   int16_t *prev_y,
+                                   const uint8_t color[4], int h)
+{
+    int k;
+    const int start = (height - h) / 2;
+    const int end   = start + h;
+    for (k = start; k < end; k++) {
+        buf[k * linesize + 0] += color[0];
+        buf[k * linesize + 1] += color[1];
+        buf[k * linesize + 2] += color[2];
+        buf[k * linesize + 3] += color[3];
+    }
+}
+
+static void draw_sample_point_gray(uint8_t *buf, int height, int linesize,
+                                   int16_t *prev_y,
+                                   const uint8_t color[4], int h)
+{
+    if (h >= 0 && h < height)
+        buf[h * linesize] += color[0];
+}
+
+static void draw_sample_line_gray(uint8_t *buf, int height, int linesize,
+                                  int16_t *prev_y,
+                                  const uint8_t color[4], int h)
+{
+    int k;
+    int start   = height/2;
+    int end     = av_clip(h, 0, height-1);
+    if (start > end)
+        FFSWAP(int16_t, start, end);
+    for (k = start; k < end; k++)
+        buf[k * linesize] += color[0];
+}
+
+static void draw_sample_p2p_gray(uint8_t *buf, int height, int linesize,
+                                 int16_t *prev_y,
+                                 const uint8_t color[4], int h)
+{
+    int k;
+    if (h >= 0 && h < height) {
+        buf[h * linesize] += color[0];
+        if (*prev_y && h != *prev_y) {
+            int start = *prev_y;
+            int end = av_clip(h, 0, height-1);
+            if (start > end)
+                FFSWAP(int16_t, start, end);
+            for (k = start + 1; k < end; k++)
+                buf[k * linesize] += color[0];
+        }
+    }
+    *prev_y = h;
+}
+
+static void draw_sample_cline_gray(uint8_t *buf, int height, int linesize,
+                                   int16_t *prev_y,
+                                   const uint8_t color[4], int h)
+{
+    int k;
+    const int start = (height - h) / 2;
+    const int end   = start + h;
+    for (k = start; k < end; k++)
+        buf[k * linesize] += color[0];
+}
+
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     AVFilterLink *inlink = ctx->inputs[0];
     ShowWavesContext *showwaves = ctx->priv;
     int nb_channels = inlink->channels;
+    char *colors, *saveptr = NULL;
+    uint8_t x;
+    int ch;
+
+    if (showwaves->single_pic)
+        showwaves->n = 1;
 
     if (!showwaves->n)
         showwaves->n = FFMAX(1, ((double)inlink->sample_rate / (showwaves->w * av_q2d(showwaves->rate))) + 0.5);
@@ -168,6 +329,85 @@ static int config_output(AVFilterLink *outlink)
 
     av_log(ctx, AV_LOG_VERBOSE, "s:%dx%d r:%f n:%d\n",
            showwaves->w, showwaves->h, av_q2d(outlink->frame_rate), showwaves->n);
+
+    switch (outlink->format) {
+    case AV_PIX_FMT_GRAY8:
+        switch (showwaves->mode) {
+        case MODE_POINT:         showwaves->draw_sample = draw_sample_point_gray; break;
+        case MODE_LINE:          showwaves->draw_sample = draw_sample_line_gray;  break;
+        case MODE_P2P:           showwaves->draw_sample = draw_sample_p2p_gray;   break;
+        case MODE_CENTERED_LINE: showwaves->draw_sample = draw_sample_cline_gray; break;
+        default:
+            return AVERROR_BUG;
+        }
+        showwaves->pixstep = 1;
+        break;
+    case AV_PIX_FMT_RGBA:
+        switch (showwaves->mode) {
+        case MODE_POINT:         showwaves->draw_sample = draw_sample_point_rgba; break;
+        case MODE_LINE:          showwaves->draw_sample = draw_sample_line_rgba;  break;
+        case MODE_P2P:           showwaves->draw_sample = draw_sample_p2p_rgba;   break;
+        case MODE_CENTERED_LINE: showwaves->draw_sample = draw_sample_cline_rgba; break;
+        default:
+            return AVERROR_BUG;
+        }
+        showwaves->pixstep = 4;
+        break;
+    }
+
+    switch (showwaves->scale) {
+    case SCALE_LIN:
+        switch (showwaves->mode) {
+        case MODE_POINT:
+        case MODE_LINE:
+        case MODE_P2P:           showwaves->get_h = get_lin_h;  break;
+        case MODE_CENTERED_LINE: showwaves->get_h = get_lin_h2; break;
+        default:
+            return AVERROR_BUG;
+        }
+        break;
+    case SCALE_LOG:
+        switch (showwaves->mode) {
+        case MODE_POINT:
+        case MODE_LINE:
+        case MODE_P2P:           showwaves->get_h = get_log_h;  break;
+        case MODE_CENTERED_LINE: showwaves->get_h = get_log_h2; break;
+        default:
+            return AVERROR_BUG;
+        }
+        break;
+    }
+
+    showwaves->fg = av_malloc_array(nb_channels, 4 * sizeof(*showwaves->fg));
+    if (!showwaves->fg)
+        return AVERROR(ENOMEM);
+
+    colors = av_strdup(showwaves->colors);
+    if (!colors)
+        return AVERROR(ENOMEM);
+
+    /* multiplication factor, pre-computed to avoid in-loop divisions */
+    x = 255 / ((showwaves->split_channels ? 1 : nb_channels) * showwaves->n);
+    if (outlink->format == AV_PIX_FMT_RGBA) {
+        uint8_t fg[4] = { 0xff, 0xff, 0xff, 0xff };
+
+        for (ch = 0; ch < nb_channels; ch++) {
+            char *color;
+
+            color = av_strtok(ch == 0 ? colors : NULL, " |", &saveptr);
+            if (color)
+                av_parse_color(fg, color, -1, ctx);
+            showwaves->fg[4*ch + 0] = fg[0] * x / 255.;
+            showwaves->fg[4*ch + 1] = fg[1] * x / 255.;
+            showwaves->fg[4*ch + 2] = fg[2] * x / 255.;
+            showwaves->fg[4*ch + 3] = fg[3] * x / 255.;
+        }
+    } else {
+        for (ch = 0; ch < nb_channels; ch++)
+            showwaves->fg[4 * ch + 0] = x;
+    }
+    av_free(colors);
+
     return 0;
 }
 
@@ -179,8 +419,7 @@ inline static int push_frame(AVFilterLink *outlink)
     int nb_channels = inlink->channels;
     int ret, i;
 
-    if ((ret = ff_filter_frame(outlink, showwaves->outpicref)) >= 0)
-        showwaves->req_fullfilled = 1;
+    ret = ff_filter_frame(outlink, showwaves->outpicref);
     showwaves->outpicref = NULL;
     showwaves->buf_idx = 0;
     for (i = 0; i < nb_channels; i++)
@@ -197,12 +436,17 @@ static int push_single_pic(AVFilterLink *outlink)
     AVFrame *out = showwaves->outpicref;
     struct frame_node *node;
     const int nb_channels = inlink->channels;
-    const int x = 255 / (showwaves->split_channels ? 1 : nb_channels);
     const int ch_height = showwaves->split_channels ? outlink->h / nb_channels : outlink->h;
     const int linesize = out->linesize[0];
+    const int pixstep = showwaves->pixstep;
     int col = 0;
     int64_t *sum = showwaves->sum;
 
+    if (max_samples == 0) {
+        av_log(ctx, AV_LOG_ERROR, "Too few samples\n");
+        return AVERROR(EINVAL);
+    }
+
     av_log(ctx, AV_LOG_DEBUG, "Create frame averaging %"PRId64" samples per column\n", max_samples);
 
     memset(sum, 0, nb_channels);
@@ -220,11 +464,14 @@ static int push_single_pic(AVFilterLink *outlink)
             if (n++ == max_samples) {
                 for (ch = 0; ch < nb_channels; ch++) {
                     int16_t sample = sum[ch] / max_samples;
-                    uint8_t *buf = out->data[0] + col;
+                    uint8_t *buf = out->data[0] + col * pixstep;
+                    int h;
+
                     if (showwaves->split_channels)
                         buf += ch*ch_height*linesize;
                     av_assert0(col < outlink->w);
-                    showwaves->draw_sample(buf, ch_height, linesize, sample, &showwaves->buf_idy[ch], x);
+                    h = showwaves->get_h(sample, ch_height);
+                    showwaves->draw_sample(buf, ch_height, linesize, &showwaves->buf_idy[ch], &showwaves->fg[ch * 4], h);
                     sum[ch] = 0;
                 }
                 col++;
@@ -243,11 +490,7 @@ static int request_frame(AVFilterLink *outlink)
     AVFilterLink *inlink = outlink->src->inputs[0];
     int ret;
 
-    showwaves->req_fullfilled = 0;
-    do {
-        ret = ff_request_frame(inlink);
-    } while (!showwaves->req_fullfilled && ret >= 0);
-
+    ret = ff_request_frame(inlink);
     if (ret == AVERROR_EOF && showwaves->outpicref) {
         if (showwaves->single_pic)
             push_single_pic(outlink);
@@ -258,57 +501,6 @@ static int request_frame(AVFilterLink *outlink)
     return ret;
 }
 
-static void draw_sample_point(uint8_t *buf, int height, int linesize,
-                              int16_t sample, int16_t *prev_y, int intensity)
-{
-    const int h = height/2 - av_rescale(sample, height/2, INT16_MAX);
-    if (h >= 0 && h < height)
-        buf[h * linesize] += intensity;
-}
-
-static void draw_sample_line(uint8_t *buf, int height, int linesize,
-                             int16_t sample, int16_t *prev_y, int intensity)
-{
-    int k;
-    const int h = height/2 - av_rescale(sample, height/2, INT16_MAX);
-    int start   = height/2;
-    int end     = av_clip(h, 0, height-1);
-    if (start > end)
-        FFSWAP(int16_t, start, end);
-    for (k = start; k < end; k++)
-        buf[k * linesize] += intensity;
-}
-
-static void draw_sample_p2p(uint8_t *buf, int height, int linesize,
-                            int16_t sample, int16_t *prev_y, int intensity)
-{
-    int k;
-    const int h = height/2 - av_rescale(sample, height/2, INT16_MAX);
-    if (h >= 0 && h < height) {
-        buf[h * linesize] += intensity;
-        if (*prev_y && h != *prev_y) {
-            int start = *prev_y;
-            int end = av_clip(h, 0, height-1);
-            if (start > end)
-                FFSWAP(int16_t, start, end);
-            for (k = start + 1; k < end; k++)
-                buf[k * linesize] += intensity;
-        }
-    }
-    *prev_y = h;
-}
-
-static void draw_sample_cline(uint8_t *buf, int height, int linesize,
-                              int16_t sample, int16_t *prev_y, int intensity)
-{
-    int k;
-    const int h     = av_rescale(abs(sample), height, INT16_MAX);
-    const int start = (height - h) / 2;
-    const int end   = start + h;
-    for (k = start; k < end; k++)
-        buf[k * linesize] += intensity;
-}
-
 static int alloc_out_frame(ShowWavesContext *showwaves, const int16_t *p,
                            const AVFilterLink *inlink, AVFilterLink *outlink,
                            const AVFrame *in)
@@ -325,7 +517,7 @@ static int alloc_out_frame(ShowWavesContext *showwaves, const int16_t *p,
                                           av_make_q(1, inlink->sample_rate),
                                           outlink->time_base);
         for (j = 0; j < outlink->h; j++)
-            memset(out->data[0] + j*out->linesize[0], 0, outlink->w);
+            memset(out->data[0] + j*out->linesize[0], 0, outlink->w * showwaves->pixstep);
     }
     return 0;
 }
@@ -339,14 +531,6 @@ static av_cold int init(AVFilterContext *ctx)
         showwaves->mode = MODE_CENTERED_LINE;
     }
 
-    switch (showwaves->mode) {
-    case MODE_POINT:         showwaves->draw_sample = draw_sample_point; break;
-    case MODE_LINE:          showwaves->draw_sample = draw_sample_line;  break;
-    case MODE_P2P:           showwaves->draw_sample = draw_sample_p2p;   break;
-    case MODE_CENTERED_LINE: showwaves->draw_sample = draw_sample_cline; break;
-    default:
-        return AVERROR_BUG;
-    }
     return 0;
 }
 
@@ -362,8 +546,8 @@ static int showwaves_filter_frame(AVFilterLink *inlink, AVFrame *insamples)
     int16_t *p = (int16_t *)insamples->data[0];
     int nb_channels = inlink->channels;
     int i, j, ret = 0;
+    const int pixstep = showwaves->pixstep;
     const int n = showwaves->n;
-    const int x = 255 / ((showwaves->split_channels ? 1 : nb_channels) * n); /* multiplication factor, pre-computed to avoid in-loop divisions */
     const int ch_height = showwaves->split_channels ? outlink->h / nb_channels : outlink->h;
 
     /* draw data in the buffer */
@@ -375,12 +559,15 @@ static int showwaves_filter_frame(AVFilterLink *inlink, AVFrame *insamples)
         outpicref = showwaves->outpicref;
 
         for (j = 0; j < nb_channels; j++) {
-            uint8_t *buf = outpicref->data[0] + showwaves->buf_idx;
+            uint8_t *buf = outpicref->data[0] + showwaves->buf_idx * pixstep;
             const int linesize = outpicref->linesize[0];
+            int h;
+
             if (showwaves->split_channels)
                 buf += j*ch_height*linesize;
-            showwaves->draw_sample(buf, ch_height, linesize, *p++,
-                                   &showwaves->buf_idy[j], x);
+            h = showwaves->get_h(*p++, ch_height);
+            showwaves->draw_sample(buf, ch_height, linesize,
+                                   &showwaves->buf_idy[j], &showwaves->fg[j * 4], h);
         }
 
         showwaves->sample_count_mod++;
@@ -440,7 +627,11 @@ AVFilter ff_avf_showwaves = {
 static const AVOption showwavespic_options[] = {
     { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "600x240"}, 0, 0, FLAGS },
     { "s",    "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "600x240"}, 0, 0, FLAGS },
-    { "split_channels", "draw channels separately", OFFSET(split_channels), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS },
+    { "split_channels", "draw channels separately", OFFSET(split_channels), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
+    { "colors", "set channels colors", OFFSET(colors), AV_OPT_TYPE_STRING, {.str = "red|green|blue|yellow|orange|lime|pink|magenta|brown" }, 0, 0, FLAGS },
+    { "scale", "set amplitude scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, SCALE_NB-1, FLAGS, .unit="scale" },
+        { "lin", "linear",         0, AV_OPT_TYPE_CONST, {.i64=SCALE_LIN}, .flags=FLAGS, .unit="scale"},
+        { "log", "logarithmic",    0, AV_OPT_TYPE_CONST, {.i64=SCALE_LOG}, .flags=FLAGS, .unit="scale"},
     { NULL }
 };
 
diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index bcf8d3f9..2f4d59f3 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -168,6 +168,7 @@ void avfilter_link_free(AVFilterLink **link)
         return;
 
     av_frame_free(&(*link)->partial_buf);
+    ff_video_frame_pool_uninit((FFVideoFramePool**)&(*link)->video_frame_pool);
 
     av_freep(link);
 }
@@ -177,9 +178,21 @@ int avfilter_link_get_channels(AVFilterLink *link)
     return link->channels;
 }
 
+void ff_avfilter_link_set_in_status(AVFilterLink *link, int status, int64_t pts)
+{
+    ff_avfilter_link_set_out_status(link, status, pts);
+}
+
+void ff_avfilter_link_set_out_status(AVFilterLink *link, int status, int64_t pts)
+{
+    link->status = status;
+    link->frame_wanted_in = link->frame_wanted_out = 0;
+    ff_update_link_current_pts(link, pts);
+}
+
 void avfilter_link_set_closed(AVFilterLink *link, int closed)
 {
-    link->closed = closed;
+    ff_avfilter_link_set_out_status(link, closed ? AVERROR_EOF : 0, AV_NOPTS_VALUE);
 }
 
 int avfilter_insert_filter(AVFilterLink *link, AVFilterContext *filt,
@@ -237,7 +250,8 @@ int avfilter_config_links(AVFilterContext *filter)
         }
 
         inlink = link->src->nb_inputs ? link->src->inputs[0] : NULL;
-        link->current_pts = AV_NOPTS_VALUE;
+        link->current_pts =
+        link->current_pts_us = AV_NOPTS_VALUE;
 
         switch (link->init_state) {
         case AVLINK_INIT:
@@ -275,10 +289,9 @@ int avfilter_config_links(AVFilterContext *filter)
                     link->sample_aspect_ratio = inlink ?
                         inlink->sample_aspect_ratio : (AVRational){1,1};
 
-                if (inlink && !link->frame_rate.num && !link->frame_rate.den)
-                    link->frame_rate = inlink->frame_rate;
-
                 if (inlink) {
+                    if (!link->frame_rate.num && !link->frame_rate.den)
+                        link->frame_rate = inlink->frame_rate;
                     if (!link->w)
                         link->w = inlink->w;
                     if (!link->h)
@@ -342,31 +355,36 @@ void ff_tlog_link(void *ctx, AVFilterLink *link, int end)
 
 int ff_request_frame(AVFilterLink *link)
 {
-    int ret = -1;
     FF_TPRINTF_START(NULL, request_frame); ff_tlog_link(NULL, link, 1);
 
-    if (link->closed)
-        return AVERROR_EOF;
-    av_assert0(!link->frame_requested);
-    link->frame_requested = 1;
-    while (link->frame_requested) {
-        if (link->srcpad->request_frame)
-            ret = link->srcpad->request_frame(link);
-        else if (link->src->inputs[0])
-            ret = ff_request_frame(link->src->inputs[0]);
-        if (ret == AVERROR_EOF && link->partial_buf) {
-            AVFrame *pbuf = link->partial_buf;
-            link->partial_buf = NULL;
-            ret = ff_filter_frame_framed(link, pbuf);
-        }
-        if (ret < 0) {
-            link->frame_requested = 0;
-            if (ret == AVERROR_EOF)
-                link->closed = 1;
-        } else {
-            av_assert0(!link->frame_requested ||
-                       link->flags & FF_LINK_FLAG_REQUEST_LOOP);
-        }
+    if (link->status)
+        return link->status;
+    link->frame_wanted_in = 1;
+    link->frame_wanted_out = 1;
+    return 0;
+}
+
+int ff_request_frame_to_filter(AVFilterLink *link)
+{
+    int ret = -1;
+
+    FF_TPRINTF_START(NULL, request_frame_to_filter); ff_tlog_link(NULL, link, 1);
+    link->frame_wanted_in = 0;
+    if (link->srcpad->request_frame)
+        ret = link->srcpad->request_frame(link);
+    else if (link->src->inputs[0])
+        ret = ff_request_frame(link->src->inputs[0]);
+    if (ret == AVERROR_EOF && link->partial_buf) {
+        AVFrame *pbuf = link->partial_buf;
+        link->partial_buf = NULL;
+        ret = ff_filter_frame_framed(link, pbuf);
+        ff_avfilter_link_set_in_status(link, AVERROR_EOF, AV_NOPTS_VALUE);
+        link->frame_wanted_out = 0;
+        return ret;
+    }
+    if (ret < 0) {
+        if (ret != AVERROR(EAGAIN) && ret != link->status)
+            ff_avfilter_link_set_in_status(link, ret, AV_NOPTS_VALUE);
     }
     return ret;
 }
@@ -451,7 +469,8 @@ void ff_update_link_current_pts(AVFilterLink *link, int64_t pts)
 {
     if (pts == AV_NOPTS_VALUE)
         return;
-    link->current_pts = av_rescale_q(pts, link->time_base, AV_TIME_BASE_Q);
+    link->current_pts = pts;
+    link->current_pts_us = av_rescale_q(pts, link->time_base, AV_TIME_BASE_Q);
     /* TODO use duration */
     if (link->graph && link->age_index >= 0)
         ff_avfilter_graph_update_heap(link->graph, link);
@@ -501,17 +520,10 @@ AVFilter *avfilter_get_by_name(const char *name)
 int avfilter_register(AVFilter *filter)
 {
     AVFilter **f = last_filter;
-    int i;
 
     /* the filter must select generic or internal exclusively */
     av_assert0((filter->flags & AVFILTER_FLAG_SUPPORT_TIMELINE) != AVFILTER_FLAG_SUPPORT_TIMELINE);
 
-    for(i=0; filter->inputs && filter->inputs[i].name; i++) {
-        const AVFilterPad *input = &filter->inputs[i];
-        av_assert0(     !input->filter_frame
-                    || (!input->start_frame && !input->end_frame));
-    }
-
     filter->next = NULL;
 
     while(*f || avpriv_atomic_ptr_cas((void * volatile *)f, NULL, filter))
@@ -669,12 +681,6 @@ AVFilterContext *ff_filter_alloc(const AVFilter *filter, const char *inst_name)
         if (!ret->outputs)
             goto err;
     }
-#if FF_API_FOO_COUNT
-FF_DISABLE_DEPRECATION_WARNINGS
-    ret->output_count = ret->nb_outputs;
-    ret->input_count  = ret->nb_inputs;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
 
     return ret;
 
@@ -893,7 +899,7 @@ int avfilter_init_str(AVFilterContext *filter, const char *args)
             return AVERROR(EINVAL);
         }
 
-#if FF_API_OLD_FILTER_OPTS
+#if FF_API_OLD_FILTER_OPTS || FF_API_OLD_FILTER_OPTS_ERROR
             if (   !strcmp(filter->filter->name, "format")     ||
                    !strcmp(filter->filter->name, "noformat")   ||
                    !strcmp(filter->filter->name, "frei0r")     ||
@@ -953,18 +959,30 @@ int avfilter_init_str(AVFilterContext *filter, const char *args)
             while ((p = strchr(p, ':')))
                 *p++ = '|';
 
+#if FF_API_OLD_FILTER_OPTS
             if (deprecated)
                 av_log(filter, AV_LOG_WARNING, "This syntax is deprecated. Use "
                        "'|' to separate the list items.\n");
 
             av_log(filter, AV_LOG_DEBUG, "compat: called with args=[%s]\n", copy);
             ret = process_options(filter, &options, copy);
+#else
+            if (deprecated) {
+                av_log(filter, AV_LOG_ERROR, "This syntax is deprecated. Use "
+                       "'|' to separate the list items ('%s' instead of '%s')\n",
+                       copy, args);
+                ret = AVERROR(EINVAL);
+            } else {
+                ret = process_options(filter, &options, copy);
+            }
+#endif
             av_freep(&copy);
 
             if (ret < 0)
                 goto fail;
+        } else
 #endif
-        } else {
+        {
             ret = process_options(filter, &options, args);
             if (ret < 0)
                 goto fail;
@@ -1012,9 +1030,9 @@ static int ff_filter_frame_framed(AVFilterLink *link, AVFrame *frame)
     AVFilterCommand *cmd= link->dst->command_queue;
     int64_t pts;
 
-    if (link->closed) {
+    if (link->status) {
         av_frame_free(&frame);
-        return AVERROR_EOF;
+        return link->status;
     }
 
     if (!(filter_frame = dst->filter_frame))
@@ -1024,7 +1042,6 @@ static int ff_filter_frame_framed(AVFilterLink *link, AVFrame *frame)
     if (dst->needs_writable && !av_frame_is_writable(frame)) {
         av_log(link->dst, AV_LOG_DEBUG, "Copying data in avfilter.\n");
 
-        /* Maybe use ff_copy_buffer_ref instead? */
         switch (link->type) {
         case AVMEDIA_TYPE_VIDEO:
             out = ff_get_video_buffer(link, link->w, link->h);
@@ -1090,7 +1107,6 @@ static int ff_filter_frame_framed(AVFilterLink *link, AVFrame *frame)
     }
     ret = filter_frame(link, out);
     link->frame_count++;
-    link->frame_requested = 0;
     ff_update_link_current_pts(link, pts);
     return ret;
 
@@ -1107,7 +1123,6 @@ static int ff_filter_frame_needs_framing(AVFilterLink *link, AVFrame *frame)
     int nb_channels = av_frame_get_channels(frame);
     int ret = 0;
 
-    link->flags |= FF_LINK_FLAG_REQUEST_LOOP;
     /* Handle framing (min_samples, max_samples) */
     while (insamples) {
         if (!pbuf) {
@@ -1135,6 +1150,9 @@ static int ff_filter_frame_needs_framing(AVFilterLink *link, AVFrame *frame)
         if (pbuf->nb_samples >= link->min_samples) {
             ret = ff_filter_frame_framed(link, pbuf);
             pbuf = NULL;
+        } else {
+            if (link->frame_wanted_out)
+                link->frame_wanted_in = 1;
         }
     }
     av_frame_free(&frame);
@@ -1148,19 +1166,35 @@ int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
 
     /* Consistency checks */
     if (link->type == AVMEDIA_TYPE_VIDEO) {
-        if (strcmp(link->dst->filter->name, "scale") &&
-            strcmp(link->dst->filter->name, "idet")) {
+        if (strcmp(link->dst->filter->name, "buffersink") &&
+            strcmp(link->dst->filter->name, "format") &&
+            strcmp(link->dst->filter->name, "idet") &&
+            strcmp(link->dst->filter->name, "null") &&
+            strcmp(link->dst->filter->name, "scale")) {
             av_assert1(frame->format                 == link->format);
             av_assert1(frame->width               == link->w);
             av_assert1(frame->height               == link->h);
         }
     } else {
-        av_assert1(frame->format                == link->format);
-        av_assert1(av_frame_get_channels(frame) == link->channels);
-        av_assert1(frame->channel_layout        == link->channel_layout);
-        av_assert1(frame->sample_rate           == link->sample_rate);
+        if (frame->format != link->format) {
+            av_log(link->dst, AV_LOG_ERROR, "Format change is not supported\n");
+            goto error;
+        }
+        if (av_frame_get_channels(frame) != link->channels) {
+            av_log(link->dst, AV_LOG_ERROR, "Channel count change is not supported\n");
+            goto error;
+        }
+        if (frame->channel_layout != link->channel_layout) {
+            av_log(link->dst, AV_LOG_ERROR, "Channel layout change is not supported\n");
+            goto error;
+        }
+        if (frame->sample_rate != link->sample_rate) {
+            av_log(link->dst, AV_LOG_ERROR, "Sample rate change is not supported\n");
+            goto error;
+        }
     }
 
+    link->frame_wanted_out = 0;
     /* Go directly to actual filtering if possible */
     if (link->type == AVMEDIA_TYPE_AUDIO &&
         link->min_samples &&
@@ -1171,6 +1205,9 @@ int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
     } else {
         return ff_filter_frame_framed(link, frame);
     }
+error:
+    av_frame_free(&frame);
+    return AVERROR_PATCHWELCOME;
 }
 
 const AVClass *avfilter_get_class(void)
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index 296f2d3c..04bf5855 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -66,335 +66,6 @@ typedef struct AVFilterLink    AVFilterLink;
 typedef struct AVFilterPad     AVFilterPad;
 typedef struct AVFilterFormats AVFilterFormats;
 
-#if FF_API_AVFILTERBUFFER
-/**
- * A reference-counted buffer data type used by the filter system. Filters
- * should not store pointers to this structure directly, but instead use the
- * AVFilterBufferRef structure below.
- */
-typedef struct AVFilterBuffer {
-    uint8_t *data[8];           ///< buffer data for each plane/channel
-
-    /**
-     * pointers to the data planes/channels.
-     *
-     * For video, this should simply point to data[].
-     *
-     * For planar audio, each channel has a separate data pointer, and
-     * linesize[0] contains the size of each channel buffer.
-     * For packed audio, there is just one data pointer, and linesize[0]
-     * contains the total size of the buffer for all channels.
-     *
-     * Note: Both data and extended_data will always be set, but for planar
-     * audio with more channels that can fit in data, extended_data must be used
-     * in order to access all channels.
-     */
-    uint8_t **extended_data;
-    int linesize[8];            ///< number of bytes per line
-
-    /** private data to be used by a custom free function */
-    void *priv;
-    /**
-     * A pointer to the function to deallocate this buffer if the default
-     * function is not sufficient. This could, for example, add the memory
-     * back into a memory pool to be reused later without the overhead of
-     * reallocating it from scratch.
-     */
-    void (*free)(struct AVFilterBuffer *buf);
-
-    int format;                 ///< media format
-    int w, h;                   ///< width and height of the allocated buffer
-    unsigned refcount;          ///< number of references to this buffer
-} AVFilterBuffer;
-
-#define AV_PERM_READ     0x01   ///< can read from the buffer
-#define AV_PERM_WRITE    0x02   ///< can write to the buffer
-#define AV_PERM_PRESERVE 0x04   ///< nobody else can overwrite the buffer
-#define AV_PERM_REUSE    0x08   ///< can output the buffer multiple times, with the same contents each time
-#define AV_PERM_REUSE2   0x10   ///< can output the buffer multiple times, modified each time
-#define AV_PERM_NEG_LINESIZES 0x20  ///< the buffer requested can have negative linesizes
-#define AV_PERM_ALIGN    0x40   ///< the buffer must be aligned
-
-#define AVFILTER_ALIGN 16 //not part of ABI
-
-/**
- * Audio specific properties in a reference to an AVFilterBuffer. Since
- * AVFilterBufferRef is common to different media formats, audio specific
- * per reference properties must be separated out.
- */
-typedef struct AVFilterBufferRefAudioProps {
-    uint64_t channel_layout;    ///< channel layout of audio buffer
-    int nb_samples;             ///< number of audio samples per channel
-    int sample_rate;            ///< audio buffer sample rate
-    int channels;               ///< number of channels (do not access directly)
-} AVFilterBufferRefAudioProps;
-
-/**
- * Video specific properties in a reference to an AVFilterBuffer. Since
- * AVFilterBufferRef is common to different media formats, video specific
- * per reference properties must be separated out.
- */
-typedef struct AVFilterBufferRefVideoProps {
-    int w;                      ///< image width
-    int h;                      ///< image height
-    AVRational sample_aspect_ratio; ///< sample aspect ratio
-    int interlaced;             ///< is frame interlaced
-    int top_field_first;        ///< field order
-    enum AVPictureType pict_type; ///< picture type of the frame
-    int key_frame;              ///< 1 -> keyframe, 0-> not
-    int qp_table_linesize;                ///< qp_table stride
-    int qp_table_size;            ///< qp_table size
-    int8_t *qp_table;             ///< array of Quantization Parameters
-} AVFilterBufferRefVideoProps;
-
-/**
- * A reference to an AVFilterBuffer. Since filters can manipulate the origin of
- * a buffer to, for example, crop image without any memcpy, the buffer origin
- * and dimensions are per-reference properties. Linesize is also useful for
- * image flipping, frame to field filters, etc, and so is also per-reference.
- *
- * TODO: add anything necessary for frame reordering
- */
-typedef struct AVFilterBufferRef {
-    AVFilterBuffer *buf;        ///< the buffer that this is a reference to
-    uint8_t *data[8];           ///< picture/audio data for each plane
-    /**
-     * pointers to the data planes/channels.
-     *
-     * For video, this should simply point to data[].
-     *
-     * For planar audio, each channel has a separate data pointer, and
-     * linesize[0] contains the size of each channel buffer.
-     * For packed audio, there is just one data pointer, and linesize[0]
-     * contains the total size of the buffer for all channels.
-     *
-     * Note: Both data and extended_data will always be set, but for planar
-     * audio with more channels that can fit in data, extended_data must be used
-     * in order to access all channels.
-     */
-    uint8_t **extended_data;
-    int linesize[8];            ///< number of bytes per line
-
-    AVFilterBufferRefVideoProps *video; ///< video buffer specific properties
-    AVFilterBufferRefAudioProps *audio; ///< audio buffer specific properties
-
-    /**
-     * presentation timestamp. The time unit may change during
-     * filtering, as it is specified in the link and the filter code
-     * may need to rescale the PTS accordingly.
-     */
-    int64_t pts;
-    int64_t pos;                ///< byte position in stream, -1 if unknown
-
-    int format;                 ///< media format
-
-    int perms;                  ///< permissions, see the AV_PERM_* flags
-
-    enum AVMediaType type;      ///< media type of buffer data
-
-    AVDictionary *metadata;     ///< dictionary containing metadata key=value tags
-} AVFilterBufferRef;
-
-/**
- * Copy properties of src to dst, without copying the actual data
- */
-attribute_deprecated
-void avfilter_copy_buffer_ref_props(AVFilterBufferRef *dst, const AVFilterBufferRef *src);
-
-/**
- * Add a new reference to a buffer.
- *
- * @param ref   an existing reference to the buffer
- * @param pmask a bitmask containing the allowable permissions in the new
- *              reference
- * @return      a new reference to the buffer with the same properties as the
- *              old, excluding any permissions denied by pmask
- */
-attribute_deprecated
-AVFilterBufferRef *avfilter_ref_buffer(AVFilterBufferRef *ref, int pmask);
-
-/**
- * Remove a reference to a buffer. If this is the last reference to the
- * buffer, the buffer itself is also automatically freed.
- *
- * @param ref reference to the buffer, may be NULL
- *
- * @note it is recommended to use avfilter_unref_bufferp() instead of this
- * function
- */
-attribute_deprecated
-void avfilter_unref_buffer(AVFilterBufferRef *ref);
-
-/**
- * Remove a reference to a buffer and set the pointer to NULL.
- * If this is the last reference to the buffer, the buffer itself
- * is also automatically freed.
- *
- * @param ref pointer to the buffer reference
- */
-attribute_deprecated
-void avfilter_unref_bufferp(AVFilterBufferRef **ref);
-#endif
-
-/**
- * Get the number of channels of a buffer reference.
- */
-attribute_deprecated
-int avfilter_ref_get_channels(AVFilterBufferRef *ref);
-
-#if FF_API_AVFILTERPAD_PUBLIC
-/**
- * A filter pad used for either input or output.
- *
- * See doc/filter_design.txt for details on how to implement the methods.
- *
- * @warning this struct might be removed from public API.
- * users should call avfilter_pad_get_name() and avfilter_pad_get_type()
- * to access the name and type fields; there should be no need to access
- * any other fields from outside of libavfilter.
- */
-struct AVFilterPad {
-    /**
-     * Pad name. The name is unique among inputs and among outputs, but an
-     * input may have the same name as an output. This may be NULL if this
-     * pad has no need to ever be referenced by name.
-     */
-    const char *name;
-
-    /**
-     * AVFilterPad type.
-     */
-    enum AVMediaType type;
-
-    /**
-     * Input pads:
-     * Minimum required permissions on incoming buffers. Any buffer with
-     * insufficient permissions will be automatically copied by the filter
-     * system to a new buffer which provides the needed access permissions.
-     *
-     * Output pads:
-     * Guaranteed permissions on outgoing buffers. Any buffer pushed on the
-     * link must have at least these permissions; this fact is checked by
-     * asserts. It can be used to optimize buffer allocation.
-     */
-    attribute_deprecated int min_perms;
-
-    /**
-     * Input pads:
-     * Permissions which are not accepted on incoming buffers. Any buffer
-     * which has any of these permissions set will be automatically copied
-     * by the filter system to a new buffer which does not have those
-     * permissions. This can be used to easily disallow buffers with
-     * AV_PERM_REUSE.
-     *
-     * Output pads:
-     * Permissions which are automatically removed on outgoing buffers. It
-     * can be used to optimize buffer allocation.
-     */
-    attribute_deprecated int rej_perms;
-
-    /**
-     * @deprecated unused
-     */
-    int (*start_frame)(AVFilterLink *link, AVFilterBufferRef *picref);
-
-    /**
-     * Callback function to get a video buffer. If NULL, the filter system will
-     * use ff_default_get_video_buffer().
-     *
-     * Input video pads only.
-     */
-    AVFrame *(*get_video_buffer)(AVFilterLink *link, int w, int h);
-
-    /**
-     * Callback function to get an audio buffer. If NULL, the filter system will
-     * use ff_default_get_audio_buffer().
-     *
-     * Input audio pads only.
-     */
-    AVFrame *(*get_audio_buffer)(AVFilterLink *link, int nb_samples);
-
-    /**
-     * @deprecated unused
-     */
-    int (*end_frame)(AVFilterLink *link);
-
-    /**
-     * @deprecated unused
-     */
-    int (*draw_slice)(AVFilterLink *link, int y, int height, int slice_dir);
-
-    /**
-     * Filtering callback. This is where a filter receives a frame with
-     * audio/video data and should do its processing.
-     *
-     * Input pads only.
-     *
-     * @return >= 0 on success, a negative AVERROR on error. This function
-     * must ensure that frame is properly unreferenced on error if it
-     * hasn't been passed on to another filter.
-     */
-    int (*filter_frame)(AVFilterLink *link, AVFrame *frame);
-
-    /**
-     * Frame poll callback. This returns the number of immediately available
-     * samples. It should return a positive value if the next request_frame()
-     * is guaranteed to return one frame (with no delay).
-     *
-     * Defaults to just calling the source poll_frame() method.
-     *
-     * Output pads only.
-     */
-    int (*poll_frame)(AVFilterLink *link);
-
-    /**
-     * Frame request callback. A call to this should result in at least one
-     * frame being output over the given link. This should return zero on
-     * success, and another value on error.
-     * See ff_request_frame() for the error codes with a specific
-     * meaning.
-     *
-     * Output pads only.
-     */
-    int (*request_frame)(AVFilterLink *link);
-
-    /**
-     * Link configuration callback.
-     *
-     * For output pads, this should set the following link properties:
-     * video: width, height, sample_aspect_ratio, time_base
-     * audio: sample_rate.
-     *
-     * This should NOT set properties such as format, channel_layout, etc which
-     * are negotiated between filters by the filter system using the
-     * query_formats() callback before this function is called.
-     *
-     * For input pads, this should check the properties of the link, and update
-     * the filter's internal state as necessary.
-     *
-     * For both input and output pads, this should return zero on success,
-     * and another value on error.
-     */
-    int (*config_props)(AVFilterLink *link);
-
-    /**
-     * The filter expects a fifo to be inserted on its input link,
-     * typically because it has a delay.
-     *
-     * input pads only.
-     */
-    int needs_fifo;
-
-    /**
-     * The filter expects writable frames from its input link,
-     * duplicating data buffers if needed.
-     *
-     * input pads only.
-     */
-    int needs_writable;
-};
-#endif
-
 /**
  * Get the number of elements in a NULL-terminated array of AVFilterPads (e.g.
  * AVFilter.inputs/outputs).
@@ -639,16 +310,10 @@ struct AVFilterContext {
 
     AVFilterPad   *input_pads;      ///< array of input pads
     AVFilterLink **inputs;          ///< array of pointers to input links
-#if FF_API_FOO_COUNT
-    attribute_deprecated unsigned input_count; ///< @deprecated use nb_inputs
-#endif
     unsigned    nb_inputs;          ///< number of input pads
 
     AVFilterPad   *output_pads;     ///< array of output pads
     AVFilterLink **outputs;         ///< array of pointers to output links
-#if FF_API_FOO_COUNT
-    attribute_deprecated unsigned output_count; ///< @deprecated use nb_outputs
-#endif
     unsigned    nb_outputs;         ///< number of output pads
 
     void *priv;                     ///< private data for use by the filter
@@ -763,8 +428,6 @@ struct AVFilterLink {
         AVLINK_INIT             ///< complete
     } init_state;
 
-    struct AVFilterPool *pool;
-
     /**
      * Graph the filter belongs to.
      */
@@ -772,21 +435,28 @@ struct AVFilterLink {
 
     /**
      * Current timestamp of the link, as defined by the most recent
-     * frame(s), in AV_TIME_BASE units.
+     * frame(s), in link time_base units.
      */
     int64_t current_pts;
 
+    /**
+     * Current timestamp of the link, as defined by the most recent
+     * frame(s), in AV_TIME_BASE units.
+     */
+    int64_t current_pts_us;
+
     /**
      * Index in the age array.
      */
     int age_index;
 
     /**
-     * Frame rate of the stream on the link, or 1/0 if unknown;
-     * if left to 0/0, will be automatically be copied from the first input
+     * Frame rate of the stream on the link, or 1/0 if unknown or variable;
+     * if left to 0/0, will be automatically copied from the first input
      * of the source filter if it exists.
      *
      * Sources should set it to the best estimation of the real frame rate.
+     * If the source frame rate is unknown or variable, set this to 1/0.
      * Filters should update it if necessary depending on their function.
      * Sinks can use it to set a default output frame rate.
      * It is similar to the r_frame_rate field in AVStream.
@@ -820,37 +490,22 @@ struct AVFilterLink {
     int max_samples;
 
     /**
-     * The buffer reference currently being received across the link by the
-     * destination filter. This is used internally by the filter system to
-     * allow automatic copying of buffers which do not have sufficient
-     * permissions for the destination. This should not be accessed directly
-     * by the filters.
-     */
-    AVFilterBufferRef *cur_buf_copy;
-
-    /**
-     * True if the link is closed.
-     * If set, all attempts of start_frame, filter_frame or request_frame
-     * will fail with AVERROR_EOF, and if necessary the reference will be
-     * destroyed.
-     * If request_frame returns AVERROR_EOF, this flag is set on the
+     * Link status.
+     * If not zero, all attempts of filter_frame or request_frame
+     * will fail with the corresponding code, and if necessary the reference
+     * will be destroyed.
+     * If request_frame returns an error, the status is set on the
      * corresponding link.
      * It can be set also be set by either the source or the destination
      * filter.
      */
-    int closed;
+    int status;
 
     /**
      * Number of channels.
      */
     int channels;
 
-    /**
-     * True if a frame is being requested on the link.
-     * Used internally by the framework.
-     */
-    unsigned frame_requested;
-
     /**
      * Link processing flags.
      */
@@ -860,6 +515,25 @@ struct AVFilterLink {
      * Number of past frames sent through the link.
      */
     int64_t frame_count;
+
+    /**
+     * A pointer to a FFVideoFramePool struct.
+     */
+    void *video_frame_pool;
+
+    /**
+     * True if a frame is currently wanted on the input of this filter.
+     * Set when ff_request_frame() is called by the output,
+     * cleared when the request is handled or forwarded.
+     */
+    int frame_wanted_in;
+
+    /**
+     * True if a frame is currently wanted on the output of this filter.
+     * Set when ff_request_frame() is called by the output,
+     * cleared when a frame is filtered.
+     */
+    int frame_wanted_out;
 };
 
 /**
@@ -886,7 +560,10 @@ int avfilter_link_get_channels(AVFilterLink *link);
 
 /**
  * Set the closed field of a link.
+ * @deprecated applications are not supposed to mess with links, they should
+ * close the sinks.
  */
+attribute_deprecated
 void avfilter_link_set_closed(AVFilterLink *link, int closed);
 
 /**
@@ -897,69 +574,6 @@ void avfilter_link_set_closed(AVFilterLink *link, int closed);
  */
 int avfilter_config_links(AVFilterContext *filter);
 
-#if FF_API_AVFILTERBUFFER
-/**
- * Create a buffer reference wrapped around an already allocated image
- * buffer.
- *
- * @param data pointers to the planes of the image to reference
- * @param linesize linesizes for the planes of the image to reference
- * @param perms the required access permissions
- * @param w the width of the image specified by the data and linesize arrays
- * @param h the height of the image specified by the data and linesize arrays
- * @param format the pixel format of the image specified by the data and linesize arrays
- */
-attribute_deprecated
-AVFilterBufferRef *
-avfilter_get_video_buffer_ref_from_arrays(uint8_t * const data[4], const int linesize[4], int perms,
-                                          int w, int h, enum AVPixelFormat format);
-
-/**
- * Create an audio buffer reference wrapped around an already
- * allocated samples buffer.
- *
- * See avfilter_get_audio_buffer_ref_from_arrays_channels() for a version
- * that can handle unknown channel layouts.
- *
- * @param data           pointers to the samples plane buffers
- * @param linesize       linesize for the samples plane buffers
- * @param perms          the required access permissions
- * @param nb_samples     number of samples per channel
- * @param sample_fmt     the format of each sample in the buffer to allocate
- * @param channel_layout the channel layout of the buffer
- */
-attribute_deprecated
-AVFilterBufferRef *avfilter_get_audio_buffer_ref_from_arrays(uint8_t **data,
-                                                             int linesize,
-                                                             int perms,
-                                                             int nb_samples,
-                                                             enum AVSampleFormat sample_fmt,
-                                                             uint64_t channel_layout);
-/**
- * Create an audio buffer reference wrapped around an already
- * allocated samples buffer.
- *
- * @param data           pointers to the samples plane buffers
- * @param linesize       linesize for the samples plane buffers
- * @param perms          the required access permissions
- * @param nb_samples     number of samples per channel
- * @param sample_fmt     the format of each sample in the buffer to allocate
- * @param channels       the number of channels of the buffer
- * @param channel_layout the channel layout of the buffer,
- *                       must be either 0 or consistent with channels
- */
-attribute_deprecated
-AVFilterBufferRef *avfilter_get_audio_buffer_ref_from_arrays_channels(uint8_t **data,
-                                                                      int linesize,
-                                                                      int perms,
-                                                                      int nb_samples,
-                                                                      enum AVSampleFormat sample_fmt,
-                                                                      int channels,
-                                                                      uint64_t channel_layout);
-
-#endif
-
-
 #define AVFILTER_CMD_FLAG_ONE   1 ///< Stop once a filter understood the command (for target=all for example), fast filters are favored automatically
 #define AVFILTER_CMD_FLAG_FAST  2 ///< Only execute command when its fast (like a video out that supports contrast adjustment in hw)
 
@@ -1106,26 +720,6 @@ void avfilter_free(AVFilterContext *filter);
 int avfilter_insert_filter(AVFilterLink *link, AVFilterContext *filt,
                            unsigned filt_srcpad_idx, unsigned filt_dstpad_idx);
 
-#if FF_API_AVFILTERBUFFER
-/**
- * Copy the frame properties of src to dst, without copying the actual
- * image data.
- *
- * @return 0 on success, a negative number on error.
- */
-attribute_deprecated
-int avfilter_copy_frame_props(AVFilterBufferRef *dst, const AVFrame *src);
-
-/**
- * Copy the frame properties and data pointers of src to dst, without copying
- * the actual data.
- *
- * @return 0 on success, a negative number on error.
- */
-attribute_deprecated
-int avfilter_copy_buf_props(AVFrame *dst, const AVFilterBufferRef *src);
-#endif
-
 /**
  * @return AVClass for AVFilterContext.
  *
@@ -1166,20 +760,11 @@ typedef int (avfilter_execute_func)(AVFilterContext *ctx, avfilter_action_func *
 
 typedef struct AVFilterGraph {
     const AVClass *av_class;
-#if FF_API_FOO_COUNT
-    attribute_deprecated
-    unsigned filter_count_unused;
-#endif
     AVFilterContext **filters;
-#if !FF_API_FOO_COUNT
     unsigned nb_filters;
-#endif
 
     char *scale_sws_opts; ///< sws options to use for the auto-inserted scale filters
     char *resample_lavr_opts;   ///< libavresample options to use for the auto-inserted resample filters
-#if FF_API_FOO_COUNT
-    unsigned nb_filters;
-#endif
 
     /**
      * Type of multithreading allowed for filters in this graph. A combination
@@ -1375,7 +960,6 @@ AVFilterInOut *avfilter_inout_alloc(void);
  */
 void avfilter_inout_free(AVFilterInOut **inout);
 
-#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI || !FF_API_OLD_GRAPH_PARSE
 /**
  * Add a graph described by a string to a graph.
  *
@@ -1397,26 +981,6 @@ void avfilter_inout_free(AVFilterInOut **inout);
 int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
                          AVFilterInOut *inputs, AVFilterInOut *outputs,
                          void *log_ctx);
-#else
-/**
- * Add a graph described by a string to a graph.
- *
- * @param graph   the filter graph where to link the parsed graph context
- * @param filters string to be parsed
- * @param inputs  pointer to a linked list to the inputs of the graph, may be NULL.
- *                If non-NULL, *inputs is updated to contain the list of open inputs
- *                after the parsing, should be freed with avfilter_inout_free().
- * @param outputs pointer to a linked list to the outputs of the graph, may be NULL.
- *                If non-NULL, *outputs is updated to contain the list of open outputs
- *                after the parsing, should be freed with avfilter_inout_free().
- * @return non negative on success, a negative AVERROR code on error
- * @deprecated Use avfilter_graph_parse_ptr() instead.
- */
-attribute_deprecated
-int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
-                         AVFilterInOut **inputs, AVFilterInOut **outputs,
-                         void *log_ctx);
-#endif
 
 /**
  * Add a graph described by a string to a graph.
diff --git a/libavfilter/avfiltergraph.c b/libavfilter/avfiltergraph.c
index bac0da18..9f50b412 100644
--- a/libavfilter/avfiltergraph.c
+++ b/libavfilter/avfiltergraph.c
@@ -135,12 +135,6 @@ int avfilter_graph_add_filter(AVFilterGraph *graph, AVFilterContext *filter)
     graph->filters = filters;
     graph->filters[graph->nb_filters++] = filter;
 
-#if FF_API_FOO_COUNT
-FF_DISABLE_DEPRECATION_WARNINGS
-    graph->filter_count_unused = graph->nb_filters;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     filter->graph = graph;
 
     return 0;
@@ -206,12 +200,6 @@ AVFilterContext *avfilter_graph_alloc_filter(AVFilterGraph *graph,
     graph->filters = filters;
     graph->filters[graph->nb_filters++] = s;
 
-#if FF_API_FOO_COUNT
-FF_DISABLE_DEPRECATION_WARNINGS
-    graph->filter_count_unused = graph->nb_filters;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
     s->graph = graph;
 
     return s;
@@ -329,18 +317,15 @@ static int filter_query_formats(AVFilterContext *ctx)
         sanitize_channel_layouts(ctx, ctx->outputs[i]->in_channel_layouts);
 
     formats = ff_all_formats(type);
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_set_common_formats(ctx, formats);
+    if ((ret = ff_set_common_formats(ctx, formats)) < 0)
+        return ret;
     if (type == AVMEDIA_TYPE_AUDIO) {
         samplerates = ff_all_samplerates();
-        if (!samplerates)
-            return AVERROR(ENOMEM);
-        ff_set_common_samplerates(ctx, samplerates);
+        if ((ret = ff_set_common_samplerates(ctx, samplerates)) < 0)
+            return ret;
         chlayouts = ff_all_channel_layouts();
-        if (!chlayouts)
-            return AVERROR(ENOMEM);
-        ff_set_common_channel_layouts(ctx, chlayouts);
+        if ((ret = ff_set_common_channel_layouts(ctx, chlayouts)) < 0)
+            return ret;
     }
     return 0;
 }
@@ -631,6 +616,40 @@ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx)
     return 0;
 }
 
+static int get_fmt_score(enum AVSampleFormat dst_fmt, enum AVSampleFormat src_fmt)
+{
+    int score = 0;
+
+    if (av_sample_fmt_is_planar(dst_fmt) != av_sample_fmt_is_planar(src_fmt))
+        score ++;
+
+    if (av_get_bytes_per_sample(dst_fmt) < av_get_bytes_per_sample(src_fmt)) {
+        score += 100 * (av_get_bytes_per_sample(src_fmt) - av_get_bytes_per_sample(dst_fmt));
+    }else
+        score += 10  * (av_get_bytes_per_sample(dst_fmt) - av_get_bytes_per_sample(src_fmt));
+
+    if (av_get_packed_sample_fmt(dst_fmt) == AV_SAMPLE_FMT_S32 &&
+        av_get_packed_sample_fmt(src_fmt) == AV_SAMPLE_FMT_FLT)
+        score += 20;
+
+    if (av_get_packed_sample_fmt(dst_fmt) == AV_SAMPLE_FMT_FLT &&
+        av_get_packed_sample_fmt(src_fmt) == AV_SAMPLE_FMT_S32)
+        score += 2;
+
+    return score;
+}
+
+static enum AVSampleFormat find_best_sample_fmt_of_2(enum AVSampleFormat dst_fmt1, enum AVSampleFormat dst_fmt2,
+                                                     enum AVSampleFormat src_fmt)
+{
+    int score1, score2;
+
+    score1 = get_fmt_score(dst_fmt1, src_fmt);
+    score2 = get_fmt_score(dst_fmt2, src_fmt);
+
+    return score1 < score2 ? dst_fmt1 : dst_fmt2;
+}
+
 static int pick_format(AVFilterLink *link, AVFilterLink *ref)
 {
     if (!link || !link->in_formats)
@@ -650,6 +669,19 @@ static int pick_format(AVFilterLink *link, AVFilterLink *ref)
                    av_get_pix_fmt_name(ref->format), has_alpha);
             link->in_formats->formats[0] = best;
         }
+    } else if (link->type == AVMEDIA_TYPE_AUDIO) {
+        if(ref && ref->type == AVMEDIA_TYPE_AUDIO){
+            enum AVSampleFormat best= AV_SAMPLE_FMT_NONE;
+            int i;
+            for (i=0; i<link->in_formats->nb_formats; i++) {
+                enum AVSampleFormat p = link->in_formats->formats[i];
+                best = find_best_sample_fmt_of_2(best, p, ref->format);
+            }
+            av_log(link->src,AV_LOG_DEBUG, "picking %s out of %d ref:%s\n",
+                   av_get_sample_fmt_name(best), link->in_formats->nb_formats,
+                   av_get_sample_fmt_name(ref->format));
+            link->in_formats->formats[0] = best;
+        }
     }
 
     link->in_formats->nb_formats = 1;
@@ -693,7 +725,7 @@ static int pick_format(AVFilterLink *link, AVFilterLink *ref)
     return 0;
 }
 
-#define REDUCE_FORMATS(fmt_type, list_type, list, var, nb, add_format) \
+#define REDUCE_FORMATS(fmt_type, list_type, list, var, nb, add_format, unref_format) \
 do {                                                                   \
     for (i = 0; i < filter->nb_inputs; i++) {                          \
         AVFilterLink *link = filter->inputs[i];                        \
@@ -713,7 +745,8 @@ do {                                                                   \
             fmts = out_link->in_ ## list;                              \
                                                                        \
             if (!out_link->in_ ## list->nb) {                          \
-                add_format(&out_link->in_ ##list, fmt);                \
+                if ((ret = add_format(&out_link->in_ ##list, fmt)) < 0)\
+                    return ret;                                        \
                 ret = 1;                                               \
                 break;                                                 \
             }                                                          \
@@ -734,9 +767,9 @@ static int reduce_formats_on_filter(AVFilterContext *filter)
     int i, j, k, ret = 0;
 
     REDUCE_FORMATS(int,      AVFilterFormats,        formats,         formats,
-                   nb_formats, ff_add_format);
+                   nb_formats, ff_add_format, ff_formats_unref);
     REDUCE_FORMATS(int,      AVFilterFormats,        samplerates,     formats,
-                   nb_formats, ff_add_format);
+                   nb_formats, ff_add_format, ff_formats_unref);
 
     /* reduce channel layouts */
     for (i = 0; i < filter->nb_inputs; i++) {
@@ -760,7 +793,8 @@ static int reduce_formats_on_filter(AVFilterContext *filter)
                 (!FF_LAYOUT2COUNT(fmt) || fmts->all_counts)) {
                 /* Turn the infinite list into a singleton */
                 fmts->all_layouts = fmts->all_counts  = 0;
-                ff_add_channel_layout(&outlink->in_channel_layouts, fmt);
+                if (ff_add_channel_layout(&outlink->in_channel_layouts, fmt) < 0)
+                    ret = 1;
                 break;
             }
 
@@ -778,16 +812,21 @@ static int reduce_formats_on_filter(AVFilterContext *filter)
     return ret;
 }
 
-static void reduce_formats(AVFilterGraph *graph)
+static int reduce_formats(AVFilterGraph *graph)
 {
-    int i, reduced;
+    int i, reduced, ret;
 
     do {
         reduced = 0;
 
-        for (i = 0; i < graph->nb_filters; i++)
-            reduced |= reduce_formats_on_filter(graph->filters[i]);
+        for (i = 0; i < graph->nb_filters; i++) {
+            if ((ret = reduce_formats_on_filter(graph->filters[i])) < 0)
+                return ret;
+            reduced |= ret;
+        }
     } while (reduced);
+
+    return 0;
 }
 
 static void swap_samplerates_on_filter(AVFilterContext *filter)
@@ -1105,7 +1144,8 @@ static int graph_config_formats(AVFilterGraph *graph, AVClass *log_ctx)
     /* Once everything is merged, it's possible that we'll still have
      * multiple valid media format choices. We try to minimize the amount
      * of format conversion inside filters */
-    reduce_formats(graph);
+    if ((ret = reduce_formats(graph)) < 0)
+        return ret;
 
     /* for audio filters, ensure the best format, sample rate and channel layout
      * is selected */
@@ -1284,7 +1324,7 @@ static void heap_bubble_up(AVFilterGraph *graph,
 
     while (index) {
         int parent = (index - 1) >> 1;
-        if (links[parent]->current_pts >= link->current_pts)
+        if (links[parent]->current_pts_us >= link->current_pts_us)
             break;
         links[index] = links[parent];
         links[index]->age_index = index;
@@ -1306,9 +1346,9 @@ static void heap_bubble_down(AVFilterGraph *graph,
         if (child >= graph->sink_links_count)
             break;
         if (child + 1 < graph->sink_links_count &&
-            links[child + 1]->current_pts < links[child]->current_pts)
+            links[child + 1]->current_pts_us < links[child]->current_pts_us)
             child++;
-        if (link->current_pts < links[child]->current_pts)
+        if (link->current_pts_us < links[child]->current_pts_us)
             break;
         links[index] = links[child];
         links[index]->age_index = index;
@@ -1327,11 +1367,14 @@ void ff_avfilter_graph_update_heap(AVFilterGraph *graph, AVFilterLink *link)
 
 int avfilter_graph_request_oldest(AVFilterGraph *graph)
 {
+    AVFilterLink *oldest = graph->sink_links[0];
+    int r;
+
     while (graph->sink_links_count) {
-        AVFilterLink *oldest = graph->sink_links[0];
-        int r = ff_request_frame(oldest);
+        oldest = graph->sink_links[0];
+        r = ff_request_frame(oldest);
         if (r != AVERROR_EOF)
-            return r;
+            break;
         av_log(oldest->dst, AV_LOG_DEBUG, "EOF on sink link %s:%s.\n",
                oldest->dst ? oldest->dst->name : "unknown",
                oldest->dstpad ? oldest->dstpad->name : "unknown");
@@ -1341,5 +1384,52 @@ int avfilter_graph_request_oldest(AVFilterGraph *graph)
                              oldest->age_index);
         oldest->age_index = -1;
     }
-    return AVERROR_EOF;
+    if (!graph->sink_links_count)
+        return AVERROR_EOF;
+    av_assert1(oldest->age_index >= 0);
+    while (oldest->frame_wanted_out) {
+        r = ff_filter_graph_run_once(graph);
+        if (r < 0)
+            return r;
+    }
+    return 0;
+}
+
+static AVFilterLink *graph_run_once_find_filter(AVFilterGraph *graph)
+{
+    unsigned i, j;
+    AVFilterContext *f;
+
+    /* TODO: replace scanning the graph with a priority list */
+    for (i = 0; i < graph->nb_filters; i++) {
+        f = graph->filters[i];
+        for (j = 0; j < f->nb_outputs; j++)
+            if (f->outputs[j]->frame_wanted_in)
+                return f->outputs[j];
+    }
+    for (i = 0; i < graph->nb_filters; i++) {
+        f = graph->filters[i];
+        for (j = 0; j < f->nb_outputs; j++)
+            if (f->outputs[j]->frame_wanted_out)
+                return f->outputs[j];
+    }
+    return NULL;
+}
+
+int ff_filter_graph_run_once(AVFilterGraph *graph)
+{
+    AVFilterLink *link;
+    int ret;
+
+    link = graph_run_once_find_filter(graph);
+    if (!link) {
+        av_log(NULL, AV_LOG_WARNING, "Useless run of a filter graph\n");
+        return AVERROR(EAGAIN);
+    }
+    ret = ff_request_frame_to_filter(link);
+    if (ret == AVERROR_EOF)
+        /* local EOF will be forwarded through request_frame() /
+           set_status() until it reaches the sink */
+        ret = 0;
+    return ret < 0 ? ret : 1;
 }
diff --git a/libavfilter/blend.h b/libavfilter/blend.h
new file mode 100644
index 00000000..ccef1361
--- /dev/null
+++ b/libavfilter/blend.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2013 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_BLEND_H
+#define AVFILTER_BLEND_H
+
+#include "libavutil/eval.h"
+#include "avfilter.h"
+
+enum BlendMode {
+    BLEND_UNSET = -1,
+    BLEND_NORMAL,
+    BLEND_ADDITION,
+    BLEND_AND,
+    BLEND_AVERAGE,
+    BLEND_BURN,
+    BLEND_DARKEN,
+    BLEND_DIFFERENCE,
+    BLEND_DIFFERENCE128,
+    BLEND_DIVIDE,
+    BLEND_DODGE,
+    BLEND_EXCLUSION,
+    BLEND_HARDLIGHT,
+    BLEND_LIGHTEN,
+    BLEND_MULTIPLY,
+    BLEND_NEGATION,
+    BLEND_OR,
+    BLEND_OVERLAY,
+    BLEND_PHOENIX,
+    BLEND_PINLIGHT,
+    BLEND_REFLECT,
+    BLEND_SCREEN,
+    BLEND_SOFTLIGHT,
+    BLEND_SUBTRACT,
+    BLEND_VIVIDLIGHT,
+    BLEND_XOR,
+    BLEND_HARDMIX,
+    BLEND_LINEARLIGHT,
+    BLEND_GLOW,
+    BLEND_ADDITION128,
+    BLEND_MULTIPLY128,
+    BLEND_NB
+};
+
+typedef struct FilterParams {
+    enum BlendMode mode;
+    double opacity;
+    AVExpr *e;
+    char *expr_str;
+    void (*blend)(const uint8_t *top, ptrdiff_t top_linesize,
+                  const uint8_t *bottom, ptrdiff_t bottom_linesize,
+                  uint8_t *dst, ptrdiff_t dst_linesize,
+                  ptrdiff_t width, ptrdiff_t height,
+                  struct FilterParams *param, double *values);
+} FilterParams;
+
+void ff_blend_init(FilterParams *param, int is_16bit);
+void ff_blend_init_x86(FilterParams *param, int is_16bit);
+
+#endif /* AVFILTER_BLEND_H */
diff --git a/libavfilter/buffer.c b/libavfilter/buffer.c
deleted file mode 100644
index a5b3b1db..00000000
--- a/libavfilter/buffer.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright Stefano Sabatini <stefasab gmail com>
- * Copyright Anton Khirnov <anton khirnov net>
- * Copyright Michael Niedermayer <michaelni gmx at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/channel_layout.h"
-#include "libavutil/avassert.h"
-#include "libavutil/common.h"
-#include "libavutil/imgutils.h"
-#include "libavutil/internal.h"
-#include "libavcodec/avcodec.h"
-
-#include "avfilter.h"
-#include "internal.h"
-#include "audio.h"
-#include "avcodec.h"
-#include "version.h"
-
-#if FF_API_AVFILTERBUFFER
-void ff_avfilter_default_free_buffer(AVFilterBuffer *ptr)
-{
-    if (ptr->extended_data != ptr->data)
-        av_freep(&ptr->extended_data);
-    av_freep(&ptr->data[0]);
-    av_free(ptr);
-}
-
-static int copy_video_props(AVFilterBufferRefVideoProps *dst, AVFilterBufferRefVideoProps *src) {
-    *dst = *src;
-    if (src->qp_table) {
-        int qsize = src->qp_table_size;
-        dst->qp_table = av_malloc(qsize);
-        if (!dst->qp_table) {
-            av_log(NULL, AV_LOG_ERROR, "Failed to allocate qp_table\n");
-            dst->qp_table_size = 0;
-            return AVERROR(ENOMEM);
-        }
-        memcpy(dst->qp_table, src->qp_table, qsize);
-    }
-    return 0;
-}
-
-AVFilterBufferRef *avfilter_ref_buffer(AVFilterBufferRef *ref, int pmask)
-{
-    AVFilterBufferRef *ret = av_malloc(sizeof(AVFilterBufferRef));
-    if (!ret)
-        return NULL;
-    *ret = *ref;
-
-    ret->metadata = NULL;
-    av_dict_copy(&ret->metadata, ref->metadata, 0);
-
-    if (ref->type == AVMEDIA_TYPE_VIDEO) {
-        ret->video = av_malloc(sizeof(AVFilterBufferRefVideoProps));
-        if (!ret->video) {
-            av_free(ret);
-            return NULL;
-        }
-        copy_video_props(ret->video, ref->video);
-        ret->extended_data = ret->data;
-    } else if (ref->type == AVMEDIA_TYPE_AUDIO) {
-        ret->audio = av_malloc(sizeof(AVFilterBufferRefAudioProps));
-        if (!ret->audio) {
-            av_free(ret);
-            return NULL;
-        }
-        *ret->audio = *ref->audio;
-
-        if (ref->extended_data && ref->extended_data != ref->data) {
-            int nb_channels = av_get_channel_layout_nb_channels(ref->audio->channel_layout);
-            if (!(ret->extended_data = av_malloc_array(sizeof(*ret->extended_data),
-                                                 nb_channels))) {
-                av_freep(&ret->audio);
-                av_freep(&ret);
-                return NULL;
-            }
-            memcpy(ret->extended_data, ref->extended_data,
-                   sizeof(*ret->extended_data) * nb_channels);
-        } else
-            ret->extended_data = ret->data;
-    }
-    ret->perms &= pmask;
-    ret->buf->refcount ++;
-    return ret;
-}
-
-void avfilter_unref_buffer(AVFilterBufferRef *ref)
-{
-    if (!ref)
-        return;
-    av_assert0(ref->buf->refcount > 0);
-    if (!(--ref->buf->refcount))
-        ref->buf->free(ref->buf);
-    if (ref->extended_data != ref->data)
-        av_freep(&ref->extended_data);
-    if (ref->video)
-        av_freep(&ref->video->qp_table);
-    av_freep(&ref->video);
-    av_freep(&ref->audio);
-    av_dict_free(&ref->metadata);
-    av_free(ref);
-}
-
-void avfilter_unref_bufferp(AVFilterBufferRef **ref)
-{
-FF_DISABLE_DEPRECATION_WARNINGS
-    avfilter_unref_buffer(*ref);
-FF_ENABLE_DEPRECATION_WARNINGS
-    *ref = NULL;
-}
-
-int avfilter_copy_frame_props(AVFilterBufferRef *dst, const AVFrame *src)
-{
-    dst->pts    = src->pts;
-    dst->pos    = av_frame_get_pkt_pos(src);
-    dst->format = src->format;
-
-    av_dict_free(&dst->metadata);
-    av_dict_copy(&dst->metadata, av_frame_get_metadata(src), 0);
-
-    switch (dst->type) {
-    case AVMEDIA_TYPE_VIDEO:
-        dst->video->w                   = src->width;
-        dst->video->h                   = src->height;
-        dst->video->sample_aspect_ratio = src->sample_aspect_ratio;
-        dst->video->interlaced          = src->interlaced_frame;
-        dst->video->top_field_first     = src->top_field_first;
-        dst->video->key_frame           = src->key_frame;
-        dst->video->pict_type           = src->pict_type;
-        break;
-    case AVMEDIA_TYPE_AUDIO:
-        dst->audio->sample_rate         = src->sample_rate;
-        dst->audio->channel_layout      = src->channel_layout;
-        break;
-    default:
-        return AVERROR(EINVAL);
-    }
-
-    return 0;
-}
-
-void avfilter_copy_buffer_ref_props(AVFilterBufferRef *dst, const AVFilterBufferRef *src)
-{
-    // copy common properties
-    dst->pts             = src->pts;
-    dst->pos             = src->pos;
-
-    switch (src->type) {
-    case AVMEDIA_TYPE_VIDEO: {
-        if (dst->video->qp_table)
-            av_freep(&dst->video->qp_table);
-        copy_video_props(dst->video, src->video);
-        break;
-    }
-    case AVMEDIA_TYPE_AUDIO: *dst->audio = *src->audio; break;
-    default: break;
-    }
-
-    av_dict_free(&dst->metadata);
-    av_dict_copy(&dst->metadata, src->metadata, 0);
-}
-#endif /* FF_API_AVFILTERBUFFER */
diff --git a/libavfilter/buffersink.c b/libavfilter/buffersink.c
index b145e35c..2feb56de 100644
--- a/libavfilter/buffersink.c
+++ b/libavfilter/buffersink.c
@@ -62,6 +62,8 @@ typedef struct BufferSinkContext {
 } BufferSinkContext;
 
 #define NB_ITEMS(list) (list ## _size / sizeof(*list))
+#define FIFO_INIT_SIZE 8
+#define FIFO_INIT_ELEMENT_SIZE sizeof(void *)
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
@@ -72,7 +74,7 @@ static av_cold void uninit(AVFilterContext *ctx)
         av_audio_fifo_free(sink->audio_fifo);
 
     if (sink->fifo) {
-        while (av_fifo_size(sink->fifo) >= sizeof(AVFilterBufferRef *)) {
+        while (av_fifo_size(sink->fifo) >= FIFO_INIT_ELEMENT_SIZE) {
             av_fifo_generic_read(sink->fifo, &frame, sizeof(frame), NULL);
             av_frame_free(&frame);
         }
@@ -84,7 +86,7 @@ static int add_buffer_ref(AVFilterContext *ctx, AVFrame *ref)
 {
     BufferSinkContext *buf = ctx->priv;
 
-    if (av_fifo_space(buf->fifo) < sizeof(AVFilterBufferRef *)) {
+    if (av_fifo_space(buf->fifo) < FIFO_INIT_ELEMENT_SIZE) {
         /* realloc fifo size */
         if (av_fifo_realloc2(buf->fifo, av_fifo_size(buf->fifo) * 2) < 0) {
             av_log(ctx, AV_LOG_ERROR,
@@ -95,7 +97,7 @@ static int add_buffer_ref(AVFilterContext *ctx, AVFrame *ref)
     }
 
     /* cache frame */
-    av_fifo_generic_write(buf->fifo, &ref, sizeof(AVFilterBufferRef *), NULL);
+    av_fifo_generic_write(buf->fifo, &ref, FIFO_INIT_ELEMENT_SIZE, NULL);
     return 0;
 }
 
@@ -108,7 +110,7 @@ static int filter_frame(AVFilterLink *link, AVFrame *frame)
     if ((ret = add_buffer_ref(ctx, frame)) < 0)
         return ret;
     if (buf->warning_limit &&
-        av_fifo_size(buf->fifo) / sizeof(AVFilterBufferRef *) >= buf->warning_limit) {
+        av_fifo_size(buf->fifo) / FIFO_INIT_ELEMENT_SIZE >= buf->warning_limit) {
         av_log(ctx, AV_LOG_WARNING,
                "%d buffers queued in %s, something may be wrong.\n",
                buf->warning_limit,
@@ -131,18 +133,20 @@ int attribute_align_arg av_buffersink_get_frame_flags(AVFilterContext *ctx, AVFr
     AVFrame *cur_frame;
 
     /* no picref available, fetch it from the filterchain */
-    if (!av_fifo_size(buf->fifo)) {
-        if (inlink->closed)
-            return AVERROR_EOF;
+    while (!av_fifo_size(buf->fifo)) {
+        if (inlink->status)
+            return inlink->status;
         if (flags & AV_BUFFERSINK_FLAG_NO_REQUEST)
             return AVERROR(EAGAIN);
         if ((ret = ff_request_frame(inlink)) < 0)
             return ret;
+        while (inlink->frame_wanted_out) {
+            ret = ff_filter_graph_run_once(ctx->graph);
+            if (ret < 0)
+                return ret;
+        }
     }
 
-    if (!av_fifo_size(buf->fifo))
-        return AVERROR(EINVAL);
-
     if (flags & AV_BUFFERSINK_FLAG_PEEK) {
         cur_frame = *((AVFrame **)av_fifo_peek2(buf->fifo, 0));
         if ((ret = av_frame_ref(frame, cur_frame)) < 0)
@@ -242,13 +246,11 @@ AVABufferSinkParams *av_abuffersink_params_alloc(void)
     return params;
 }
 
-#define FIFO_INIT_SIZE 8
-
 static av_cold int common_init(AVFilterContext *ctx)
 {
     BufferSinkContext *buf = ctx->priv;
 
-    buf->fifo = av_fifo_alloc_array(FIFO_INIT_SIZE, sizeof(AVFilterBufferRef *));
+    buf->fifo = av_fifo_alloc_array(FIFO_INIT_SIZE, FIFO_INIT_ELEMENT_SIZE);
     if (!buf->fifo) {
         av_log(ctx, AV_LOG_ERROR, "Failed to allocate fifo\n");
         return AVERROR(ENOMEM);
@@ -266,95 +268,6 @@ void av_buffersink_set_frame_size(AVFilterContext *ctx, unsigned frame_size)
     inlink->partial_buf_size = frame_size;
 }
 
-#if FF_API_AVFILTERBUFFER
-FF_DISABLE_DEPRECATION_WARNINGS
-static void compat_free_buffer(AVFilterBuffer *buf)
-{
-    AVFrame *frame = buf->priv;
-    av_frame_free(&frame);
-    av_free(buf);
-}
-
-static int compat_read(AVFilterContext *ctx,
-                       AVFilterBufferRef **pbuf, int nb_samples, int flags)
-{
-    AVFilterBufferRef *buf;
-    AVFrame *frame;
-    int ret;
-
-    if (!pbuf)
-        return ff_poll_frame(ctx->inputs[0]);
-
-    frame = av_frame_alloc();
-    if (!frame)
-        return AVERROR(ENOMEM);
-
-    if (!nb_samples)
-        ret = av_buffersink_get_frame_flags(ctx, frame, flags);
-    else
-        ret = av_buffersink_get_samples(ctx, frame, nb_samples);
-
-    if (ret < 0)
-        goto fail;
-
-    AV_NOWARN_DEPRECATED(
-    if (ctx->inputs[0]->type == AVMEDIA_TYPE_VIDEO) {
-        buf = avfilter_get_video_buffer_ref_from_arrays(frame->data, frame->linesize,
-                                                        AV_PERM_READ,
-                                                        frame->width, frame->height,
-                                                        frame->format);
-    } else {
-        buf = avfilter_get_audio_buffer_ref_from_arrays(frame->extended_data,
-                                                        frame->linesize[0], AV_PERM_READ,
-                                                        frame->nb_samples,
-                                                        frame->format,
-                                                        frame->channel_layout);
-    }
-    if (!buf) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    avfilter_copy_frame_props(buf, frame);
-    )
-
-    buf->buf->priv = frame;
-    buf->buf->free = compat_free_buffer;
-
-    *pbuf = buf;
-
-    return 0;
-fail:
-    av_frame_free(&frame);
-    return ret;
-}
-
-int attribute_align_arg av_buffersink_read(AVFilterContext *ctx, AVFilterBufferRef **buf)
-{
-    return compat_read(ctx, buf, 0, 0);
-}
-
-int attribute_align_arg av_buffersink_read_samples(AVFilterContext *ctx, AVFilterBufferRef **buf,
-                                                   int nb_samples)
-{
-    return compat_read(ctx, buf, nb_samples, 0);
-}
-
-int attribute_align_arg av_buffersink_get_buffer_ref(AVFilterContext *ctx,
-                                                     AVFilterBufferRef **bufref, int flags)
-{
-    *bufref = NULL;
-
-    av_assert0(    !strcmp(ctx->filter->name, "buffersink")
-                || !strcmp(ctx->filter->name, "abuffersink")
-                || !strcmp(ctx->filter->name, "ffbuffersink")
-                || !strcmp(ctx->filter->name, "ffabuffersink"));
-
-    return compat_read(ctx, bufref, 0, flags);
-}
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
 AVRational av_buffersink_get_frame_rate(AVFilterContext *ctx)
 {
     av_assert0(   !strcmp(ctx->filter->name, "buffersink")
@@ -363,19 +276,6 @@ AVRational av_buffersink_get_frame_rate(AVFilterContext *ctx)
     return ctx->inputs[0]->frame_rate;
 }
 
-int attribute_align_arg av_buffersink_poll_frame(AVFilterContext *ctx)
-{
-    BufferSinkContext *buf = ctx->priv;
-    AVFilterLink *inlink = ctx->inputs[0];
-
-    av_assert0(   !strcmp(ctx->filter->name, "buffersink")
-               || !strcmp(ctx->filter->name, "abuffersink")
-               || !strcmp(ctx->filter->name, "ffbuffersink")
-               || !strcmp(ctx->filter->name, "ffabuffersink"));
-
-    return av_fifo_size(buf->fifo)/sizeof(AVFilterBufferRef *) + ff_poll_frame(inlink);
-}
-
 static av_cold int vsink_init(AVFilterContext *ctx, void *opaque)
 {
     BufferSinkContext *buf = ctx->priv;
@@ -407,13 +307,13 @@ static int vsink_query_formats(AVFilterContext *ctx)
     CHECK_LIST_SIZE(pixel_fmts)
     if (buf->pixel_fmts_size) {
         for (i = 0; i < NB_ITEMS(buf->pixel_fmts); i++)
-            if ((ret = ff_add_format(&formats, buf->pixel_fmts[i])) < 0) {
-                ff_formats_unref(&formats);
+            if ((ret = ff_add_format(&formats, buf->pixel_fmts[i])) < 0)
                 return ret;
-            }
-        ff_set_common_formats(ctx, formats);
+        if ((ret = ff_set_common_formats(ctx, formats)) < 0)
+            return ret;
     } else {
-        ff_default_query_formats(ctx);
+        if ((ret = ff_default_query_formats(ctx)) < 0)
+            return ret;
     }
 
     return 0;
@@ -451,25 +351,20 @@ static int asink_query_formats(AVFilterContext *ctx)
 
     if (buf->sample_fmts_size) {
         for (i = 0; i < NB_ITEMS(buf->sample_fmts); i++)
-            if ((ret = ff_add_format(&formats, buf->sample_fmts[i])) < 0) {
-                ff_formats_unref(&formats);
+            if ((ret = ff_add_format(&formats, buf->sample_fmts[i])) < 0)
                 return ret;
-            }
-        ff_set_common_formats(ctx, formats);
+        if ((ret = ff_set_common_formats(ctx, formats)) < 0)
+            return ret;
     }
 
     if (buf->channel_layouts_size || buf->channel_counts_size ||
         buf->all_channel_counts) {
         for (i = 0; i < NB_ITEMS(buf->channel_layouts); i++)
-            if ((ret = ff_add_channel_layout(&layouts, buf->channel_layouts[i])) < 0) {
-                ff_channel_layouts_unref(&layouts);
+            if ((ret = ff_add_channel_layout(&layouts, buf->channel_layouts[i])) < 0)
                 return ret;
-            }
         for (i = 0; i < NB_ITEMS(buf->channel_counts); i++)
-            if ((ret = ff_add_channel_layout(&layouts, FF_COUNT2LAYOUT(buf->channel_counts[i]))) < 0) {
-                ff_channel_layouts_unref(&layouts);
+            if ((ret = ff_add_channel_layout(&layouts, FF_COUNT2LAYOUT(buf->channel_counts[i]))) < 0)
                 return ret;
-            }
         if (buf->all_channel_counts) {
             if (layouts)
                 av_log(ctx, AV_LOG_WARNING,
@@ -477,17 +372,17 @@ static int asink_query_formats(AVFilterContext *ctx)
             else if (!(layouts = ff_all_channel_counts()))
                 return AVERROR(ENOMEM);
         }
-        ff_set_common_channel_layouts(ctx, layouts);
+        if ((ret = ff_set_common_channel_layouts(ctx, layouts)) < 0)
+            return ret;
     }
 
     if (buf->sample_rates_size) {
         formats = NULL;
         for (i = 0; i < NB_ITEMS(buf->sample_rates); i++)
-            if ((ret = ff_add_format(&formats, buf->sample_rates[i])) < 0) {
-                ff_formats_unref(&formats);
+            if ((ret = ff_add_format(&formats, buf->sample_rates[i])) < 0)
                 return ret;
-            }
-        ff_set_common_samplerates(ctx, formats);
+        if ((ret = ff_set_common_samplerates(ctx, formats)) < 0)
+            return ret;
     }
 
     return 0;
@@ -506,7 +401,7 @@ static const AVOption abuffersink_options[] = {
     { "sample_rates",    "set the supported sample rates",    OFFSET(sample_rates),    AV_OPT_TYPE_BINARY, .flags = FLAGS },
     { "channel_layouts", "set the supported channel layouts", OFFSET(channel_layouts), AV_OPT_TYPE_BINARY, .flags = FLAGS },
     { "channel_counts",  "set the supported channel counts",  OFFSET(channel_counts),  AV_OPT_TYPE_BINARY, .flags = FLAGS },
-    { "all_channel_counts", "accept all channel counts", OFFSET(all_channel_counts), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS },
+    { "all_channel_counts", "accept all channel counts", OFFSET(all_channel_counts), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
     { NULL },
 };
 #undef FLAGS
@@ -514,57 +409,6 @@ static const AVOption abuffersink_options[] = {
 AVFILTER_DEFINE_CLASS(buffersink);
 AVFILTER_DEFINE_CLASS(abuffersink);
 
-#if FF_API_AVFILTERBUFFER
-
-#define ffbuffersink_options buffersink_options
-#define ffabuffersink_options abuffersink_options
-AVFILTER_DEFINE_CLASS(ffbuffersink);
-AVFILTER_DEFINE_CLASS(ffabuffersink);
-
-static const AVFilterPad ffbuffersink_inputs[] = {
-    {
-        .name      = "default",
-        .type      = AVMEDIA_TYPE_VIDEO,
-        .filter_frame = filter_frame,
-    },
-    { NULL },
-};
-
-AVFilter ff_vsink_ffbuffersink = {
-    .name      = "ffbuffersink",
-    .description = NULL_IF_CONFIG_SMALL("Buffer video frames, and make them available to the end of the filter graph."),
-    .priv_size = sizeof(BufferSinkContext),
-    .priv_class = &ffbuffersink_class,
-    .init_opaque = vsink_init,
-    .uninit    = uninit,
-
-    .query_formats = vsink_query_formats,
-    .inputs        = ffbuffersink_inputs,
-    .outputs       = NULL,
-};
-
-static const AVFilterPad ffabuffersink_inputs[] = {
-    {
-        .name           = "default",
-        .type           = AVMEDIA_TYPE_AUDIO,
-        .filter_frame   = filter_frame,
-    },
-    { NULL },
-};
-
-AVFilter ff_asink_ffabuffersink = {
-    .name      = "ffabuffersink",
-    .description = NULL_IF_CONFIG_SMALL("Buffer audio frames, and make them available to the end of the filter graph."),
-    .init_opaque = asink_init,
-    .uninit    = uninit,
-    .priv_size = sizeof(BufferSinkContext),
-    .priv_class = &ffabuffersink_class,
-    .query_formats = asink_query_formats,
-    .inputs        = ffabuffersink_inputs,
-    .outputs       = NULL,
-};
-#endif /* FF_API_AVFILTERBUFFER */
-
 static const AVFilterPad avfilter_vsink_buffer_inputs[] = {
     {
         .name         = "default",
diff --git a/libavfilter/buffersink.h b/libavfilter/buffersink.h
index 24cd2fea..e399b911 100644
--- a/libavfilter/buffersink.h
+++ b/libavfilter/buffersink.h
@@ -33,63 +33,6 @@
  * @{
  */
 
-#if FF_API_AVFILTERBUFFER
-/**
- * Get an audio/video buffer data from buffer_sink and put it in bufref.
- *
- * This function works with both audio and video buffer sinks.
- *
- * @param buffer_sink pointer to a buffersink or abuffersink context
- * @param flags a combination of AV_BUFFERSINK_FLAG_* flags
- * @return >= 0 in case of success, a negative AVERROR code in case of
- * failure
- */
-attribute_deprecated
-int av_buffersink_get_buffer_ref(AVFilterContext *buffer_sink,
-                                 AVFilterBufferRef **bufref, int flags);
-
-/**
- * Get the number of immediately available frames.
- */
-attribute_deprecated
-int av_buffersink_poll_frame(AVFilterContext *ctx);
-
-/**
- * Get a buffer with filtered data from sink and put it in buf.
- *
- * @param ctx pointer to a context of a buffersink or abuffersink AVFilter.
- * @param buf pointer to the buffer will be written here if buf is non-NULL. buf
- *            must be freed by the caller using avfilter_unref_buffer().
- *            Buf may also be NULL to query whether a buffer is ready to be
- *            output.
- *
- * @return >= 0 in case of success, a negative AVERROR code in case of
- *         failure.
- */
-attribute_deprecated
-int av_buffersink_read(AVFilterContext *ctx, AVFilterBufferRef **buf);
-
-/**
- * Same as av_buffersink_read, but with the ability to specify the number of
- * samples read. This function is less efficient than av_buffersink_read(),
- * because it copies the data around.
- *
- * @param ctx pointer to a context of the abuffersink AVFilter.
- * @param buf pointer to the buffer will be written here if buf is non-NULL. buf
- *            must be freed by the caller using avfilter_unref_buffer(). buf
- *            will contain exactly nb_samples audio samples, except at the end
- *            of stream, when it can contain less than nb_samples.
- *            Buf may also be NULL to query whether a buffer is ready to be
- *            output.
- *
- * @warning do not mix this function with av_buffersink_read(). Use only one or
- * the other with a single sink, not both.
- */
-attribute_deprecated
-int av_buffersink_read_samples(AVFilterContext *ctx, AVFilterBufferRef **buf,
-                               int nb_samples);
-#endif
-
 /**
  * Get a frame with filtered data from sink and put it in frame.
  *
diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c
index bf77b88f..3920b048 100644
--- a/libavfilter/buffersrc.c
+++ b/libavfilter/buffersrc.c
@@ -39,7 +39,6 @@
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
-#include "avcodec.h"
 
 typedef struct BufferSourceContext {
     const AVClass    *class;
@@ -184,127 +183,6 @@ static int av_buffersrc_add_frame_internal(AVFilterContext *ctx,
     return 0;
 }
 
-#if FF_API_AVFILTERBUFFER
-FF_DISABLE_DEPRECATION_WARNINGS
-static void compat_free_buffer(void *opaque, uint8_t *data)
-{
-    AVFilterBufferRef *buf = opaque;
-    AV_NOWARN_DEPRECATED(
-    avfilter_unref_buffer(buf);
-    )
-}
-
-static void compat_unref_buffer(void *opaque, uint8_t *data)
-{
-    AVBufferRef *buf = opaque;
-    AV_NOWARN_DEPRECATED(
-    av_buffer_unref(&buf);
-    )
-}
-
-int av_buffersrc_add_ref(AVFilterContext *ctx, AVFilterBufferRef *buf,
-                         int flags)
-{
-    BufferSourceContext *s = ctx->priv;
-    AVFrame *frame = NULL;
-    AVBufferRef *dummy_buf = NULL;
-    int ret = 0, planes, i;
-
-    if (!buf) {
-        s->eof = 1;
-        return 0;
-    } else if (s->eof)
-        return AVERROR(EINVAL);
-
-    frame = av_frame_alloc();
-    if (!frame)
-        return AVERROR(ENOMEM);
-
-    dummy_buf = av_buffer_create(NULL, 0, compat_free_buffer, buf,
-                                 (buf->perms & AV_PERM_WRITE) ? 0 : AV_BUFFER_FLAG_READONLY);
-    if (!dummy_buf) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-
-    AV_NOWARN_DEPRECATED(
-    if ((ret = avfilter_copy_buf_props(frame, buf)) < 0)
-        goto fail;
-    )
-
-#define WRAP_PLANE(ref_out, data, data_size)                            \
-do {                                                                    \
-    AVBufferRef *dummy_ref = av_buffer_ref(dummy_buf);                  \
-    if (!dummy_ref) {                                                   \
-        ret = AVERROR(ENOMEM);                                          \
-        goto fail;                                                      \
-    }                                                                   \
-    ref_out = av_buffer_create(data, data_size, compat_unref_buffer,    \
-                               dummy_ref, (buf->perms & AV_PERM_WRITE) ? 0 : AV_BUFFER_FLAG_READONLY);                           \
-    if (!ref_out) {                                                     \
-        av_buffer_unref(&dummy_ref);                                    \
-        av_frame_unref(frame);                                          \
-        ret = AVERROR(ENOMEM);                                          \
-        goto fail;                                                      \
-    }                                                                   \
-} while (0)
-
-    if (ctx->outputs[0]->type  == AVMEDIA_TYPE_VIDEO) {
-        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
-
-        planes = av_pix_fmt_count_planes(frame->format);
-        if (!desc || planes <= 0) {
-            ret = AVERROR(EINVAL);
-            goto fail;
-        }
-
-        for (i = 0; i < planes; i++) {
-            int v_shift    = (i == 1 || i == 2) ? desc->log2_chroma_h : 0;
-            int plane_size = (frame->height >> v_shift) * frame->linesize[i];
-
-            WRAP_PLANE(frame->buf[i], frame->data[i], plane_size);
-        }
-    } else {
-        int planar = av_sample_fmt_is_planar(frame->format);
-        int channels = av_get_channel_layout_nb_channels(frame->channel_layout);
-
-        planes = planar ? channels : 1;
-
-        if (planes > FF_ARRAY_ELEMS(frame->buf)) {
-            frame->nb_extended_buf = planes - FF_ARRAY_ELEMS(frame->buf);
-            frame->extended_buf = av_mallocz_array(sizeof(*frame->extended_buf),
-                                             frame->nb_extended_buf);
-            if (!frame->extended_buf) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
-            }
-        }
-
-        for (i = 0; i < FFMIN(planes, FF_ARRAY_ELEMS(frame->buf)); i++)
-            WRAP_PLANE(frame->buf[i], frame->extended_data[i], frame->linesize[0]);
-
-        for (i = 0; i < planes - FF_ARRAY_ELEMS(frame->buf); i++)
-            WRAP_PLANE(frame->extended_buf[i],
-                       frame->extended_data[i + FF_ARRAY_ELEMS(frame->buf)],
-                       frame->linesize[0]);
-    }
-
-    ret = av_buffersrc_add_frame_flags(ctx, frame, flags);
-
-fail:
-    av_buffer_unref(&dummy_buf);
-    av_frame_free(&frame);
-
-    return ret;
-}
-FF_ENABLE_DEPRECATION_WARNINGS
-
-int av_buffersrc_buffer(AVFilterContext *ctx, AVFilterBufferRef *buf)
-{
-    return av_buffersrc_add_ref(ctx, buf, 0);
-}
-#endif
-
 static av_cold int init_video(AVFilterContext *ctx)
 {
     BufferSourceContext *c = ctx->priv;
@@ -436,23 +314,27 @@ static int query_formats(AVFilterContext *ctx)
     AVFilterChannelLayouts *channel_layouts = NULL;
     AVFilterFormats *formats = NULL;
     AVFilterFormats *samplerates = NULL;
+    int ret;
 
     switch (ctx->outputs[0]->type) {
     case AVMEDIA_TYPE_VIDEO:
-        ff_add_format(&formats, c->pix_fmt);
-        ff_set_common_formats(ctx, formats);
+        if ((ret = ff_add_format         (&formats, c->pix_fmt)) < 0 ||
+            (ret = ff_set_common_formats (ctx     , formats   )) < 0)
+            return ret;
         break;
     case AVMEDIA_TYPE_AUDIO:
-        ff_add_format(&formats,           c->sample_fmt);
-        ff_set_common_formats(ctx, formats);
-
-        ff_add_format(&samplerates,       c->sample_rate);
-        ff_set_common_samplerates(ctx, samplerates);
+        if ((ret = ff_add_format             (&formats    , c->sample_fmt )) < 0 ||
+            (ret = ff_set_common_formats     (ctx         , formats       )) < 0 ||
+            (ret = ff_add_format             (&samplerates, c->sample_rate)) < 0 ||
+            (ret = ff_set_common_samplerates (ctx         , samplerates   )) < 0)
+            return ret;
 
-        ff_add_channel_layout(&channel_layouts,
+        if ((ret = ff_add_channel_layout(&channel_layouts,
                               c->channel_layout ? c->channel_layout :
-                              FF_COUNT2LAYOUT(c->channels));
-        ff_set_common_channel_layouts(ctx, channel_layouts);
+                              FF_COUNT2LAYOUT(c->channels))) < 0)
+            return ret;
+        if ((ret = ff_set_common_channel_layouts(ctx, channel_layouts)) < 0)
+            return ret;
         break;
     default:
         return AVERROR(EINVAL);
diff --git a/libavfilter/buffersrc.h b/libavfilter/buffersrc.h
index ea34c04e..847c093c 100644
--- a/libavfilter/buffersrc.h
+++ b/libavfilter/buffersrc.h
@@ -42,13 +42,6 @@ enum {
      */
     AV_BUFFERSRC_FLAG_NO_CHECK_FORMAT = 1,
 
-#if FF_API_AVFILTERBUFFER
-    /**
-     * Ignored
-     */
-    AV_BUFFERSRC_FLAG_NO_COPY = 2,
-#endif
-
     /**
      * Immediately push the frame to the output.
      */
@@ -63,18 +56,6 @@ enum {
 
 };
 
-/**
- * Add buffer data in picref to buffer_src.
- *
- * @param buffer_src  pointer to a buffer source context
- * @param picref      a buffer reference, or NULL to mark EOF
- * @param flags       a combination of AV_BUFFERSRC_FLAG_*
- * @return            >= 0 in case of success, a negative AVERROR code
- *                    in case of failure
- */
-int av_buffersrc_add_ref(AVFilterContext *buffer_src,
-                         AVFilterBufferRef *picref, int flags);
-
 /**
  * Get the number of failed requests.
  *
@@ -84,21 +65,6 @@ int av_buffersrc_add_ref(AVFilterContext *buffer_src,
  */
 unsigned av_buffersrc_get_nb_failed_requests(AVFilterContext *buffer_src);
 
-#if FF_API_AVFILTERBUFFER
-/**
- * Add a buffer to a filtergraph.
- *
- * @param ctx an instance of the buffersrc filter
- * @param buf buffer containing frame data to be passed down the filtergraph.
- * This function will take ownership of buf, the user must not free it.
- * A NULL buf signals EOF -- i.e. no more frames will be sent to this filter.
- *
- * @deprecated use av_buffersrc_write_frame() or av_buffersrc_add_frame()
- */
-attribute_deprecated
-int av_buffersrc_buffer(AVFilterContext *ctx, AVFilterBufferRef *buf);
-#endif
-
 /**
  * Add a frame to the buffer source.
  *
@@ -112,6 +78,7 @@ int av_buffersrc_buffer(AVFilterContext *ctx, AVFilterBufferRef *buf);
  * This function is equivalent to av_buffersrc_add_frame_flags() with the
  * AV_BUFFERSRC_FLAG_KEEP_REF flag.
  */
+av_warn_unused_result
 int av_buffersrc_write_frame(AVFilterContext *ctx, const AVFrame *frame);
 
 /**
@@ -132,6 +99,7 @@ int av_buffersrc_write_frame(AVFilterContext *ctx, const AVFrame *frame);
  * This function is equivalent to av_buffersrc_add_frame_flags() without the
  * AV_BUFFERSRC_FLAG_KEEP_REF flag.
  */
+av_warn_unused_result
 int av_buffersrc_add_frame(AVFilterContext *ctx, AVFrame *frame);
 
 /**
@@ -149,6 +117,7 @@ int av_buffersrc_add_frame(AVFilterContext *ctx, AVFrame *frame);
  * @return            >= 0 in case of success, a negative AVERROR code
  *                    in case of failure
  */
+av_warn_unused_result
 int av_buffersrc_add_frame_flags(AVFilterContext *buffer_src,
                                  AVFrame *frame, int flags);
 
diff --git a/libavfilter/deshake_opencl.c b/libavfilter/deshake_opencl.c
index 28212482..91ae7d58 100644
--- a/libavfilter/deshake_opencl.c
+++ b/libavfilter/deshake_opencl.c
@@ -160,7 +160,7 @@ int ff_opencl_deshake_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFra
     AVFilterLink *link = ctx->inputs[0];
     DeshakeContext *deshake = ctx->priv;
     const int hshift = av_pix_fmt_desc_get(link->format)->log2_chroma_h;
-    int chroma_height = FF_CEIL_RSHIFT(link->h, hshift);
+    int chroma_height = AV_CEIL_RSHIFT(link->h, hshift);
 
     if ((!deshake->opencl_ctx.cl_inbuf) || (!deshake->opencl_ctx.cl_outbuf)) {
         deshake->opencl_ctx.in_plane_size[0]  = (in->linesize[0] * in->height);
diff --git a/libavfilter/drawutils.c b/libavfilter/drawutils.c
index 0b2f17e5..23a0eb5d 100644
--- a/libavfilter/drawutils.c
+++ b/libavfilter/drawutils.c
@@ -21,6 +21,7 @@
 
 #include <string.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/avutil.h"
 #include "libavutil/colorspace.h"
 #include "libavutil/mem.h"
@@ -51,6 +52,10 @@ int ff_fill_rgba_map(uint8_t *rgba_map, enum AVPixelFormat pix_fmt)
     case AV_PIX_FMT_BGRA:
     case AV_PIX_FMT_BGR0:
     case AV_PIX_FMT_BGR24: rgba_map[BLUE ] = 0; rgba_map[GREEN] = 1; rgba_map[RED  ] = 2; rgba_map[ALPHA] = 3; break;
+    case AV_PIX_FMT_GBRP9:
+    case AV_PIX_FMT_GBRP10:
+    case AV_PIX_FMT_GBRP12:
+    case AV_PIX_FMT_GBRP14:
     case AV_PIX_FMT_GBRAP:
     case AV_PIX_FMT_GBRP:  rgba_map[GREEN] = 0; rgba_map[BLUE ] = 1; rgba_map[RED  ] = 2; rgba_map[ALPHA] = 3; break;
     default:                    /* unsupported */
@@ -66,7 +71,11 @@ int ff_fill_line_with_color(uint8_t *line[4], int pixel_step[4], int w, uint8_t
     uint8_t rgba_map[4] = {0};
     int i;
     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(pix_fmt);
-    int hsub = pix_desc->log2_chroma_w;
+    int hsub;
+
+    av_assert0(pix_desc);
+
+    hsub = pix_desc->log2_chroma_w;
 
     *is_packed_rgba = ff_fill_rgba_map(rgba_map, pix_fmt) >= 0;
 
@@ -95,7 +104,7 @@ int ff_fill_line_with_color(uint8_t *line[4], int pixel_step[4], int w, uint8_t
             int hsub1 = (plane == 1 || plane == 2) ? hsub : 0;
 
             pixel_step[plane] = 1;
-            line_size = FF_CEIL_RSHIFT(w, hsub1) * pixel_step[plane];
+            line_size = AV_CEIL_RSHIFT(w, hsub1) * pixel_step[plane];
             line[plane] = av_malloc(line_size);
             if (!line[plane]) {
                 while(plane && line[plane-1])
@@ -119,8 +128,8 @@ void ff_draw_rectangle(uint8_t *dst[4], int dst_linesize[4],
     for (plane = 0; plane < 4 && dst[plane]; plane++) {
         int hsub1 = plane == 1 || plane == 2 ? hsub : 0;
         int vsub1 = plane == 1 || plane == 2 ? vsub : 0;
-        int width  = FF_CEIL_RSHIFT(w, hsub1);
-        int height = FF_CEIL_RSHIFT(h, vsub1);
+        int width  = AV_CEIL_RSHIFT(w, hsub1);
+        int height = AV_CEIL_RSHIFT(h, vsub1);
 
         p = dst[plane] + (y >> vsub1) * dst_linesize[plane];
         for (i = 0; i < height; i++) {
@@ -141,8 +150,8 @@ void ff_copy_rectangle(uint8_t *dst[4], int dst_linesize[4],
     for (plane = 0; plane < 4 && dst[plane]; plane++) {
         int hsub1 = plane == 1 || plane == 2 ? hsub : 0;
         int vsub1 = plane == 1 || plane == 2 ? vsub : 0;
-        int width  = FF_CEIL_RSHIFT(w, hsub1);
-        int height = FF_CEIL_RSHIFT(h, vsub1);
+        int width  = AV_CEIL_RSHIFT(w, hsub1);
+        int height = AV_CEIL_RSHIFT(h, vsub1);
 
         p = dst[plane] + (y >> vsub1) * dst_linesize[plane];
         for (i = 0; i < height; i++) {
@@ -160,22 +169,22 @@ int ff_draw_init(FFDrawContext *draw, enum AVPixelFormat format, unsigned flags)
     unsigned i, nb_planes = 0;
     int pixelstep[MAX_PLANES] = { 0 };
 
-    if (!desc->name)
+    if (!desc || !desc->name)
         return AVERROR(EINVAL);
     if (desc->flags & ~(AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PSEUDOPAL | AV_PIX_FMT_FLAG_ALPHA))
         return AVERROR(ENOSYS);
     for (i = 0; i < desc->nb_components; i++) {
         c = &desc->comp[i];
         /* for now, only 8-bits formats */
-        if (c->depth_minus1 != 8 - 1)
+        if (c->depth != 8)
             return AVERROR(ENOSYS);
         if (c->plane >= MAX_PLANES)
             return AVERROR(ENOSYS);
         /* strange interleaving */
         if (pixelstep[c->plane] != 0 &&
-            pixelstep[c->plane] != c->step_minus1 + 1)
+            pixelstep[c->plane] != c->step)
             return AVERROR(ENOSYS);
-        pixelstep[c->plane] = c->step_minus1 + 1;
+        pixelstep[c->plane] = c->step;
         if (pixelstep[c->plane] >= 8)
             return AVERROR(ENOSYS);
         nb_planes = FFMAX(nb_planes, c->plane + 1);
@@ -191,7 +200,7 @@ int ff_draw_init(FFDrawContext *draw, enum AVPixelFormat format, unsigned flags)
     draw->vsub[1] = draw->vsub[2] = draw->vsub_max = desc->log2_chroma_h;
     for (i = 0; i < ((desc->nb_components - 1) | 1); i++)
         draw->comp_mask[desc->comp[i].plane] |=
-            1 << (desc->comp[i].offset_plus1 - 1);
+            1 << desc->comp[i].offset;
     return 0;
 }
 
@@ -205,8 +214,8 @@ void ff_draw_color(FFDrawContext *draw, FFDrawColor *color, const uint8_t rgba[4
     if ((draw->desc->flags & AV_PIX_FMT_FLAG_RGB) &&
         ff_fill_rgba_map(rgba_map, draw->format) >= 0) {
         if (draw->nb_planes == 1) {
-        for (i = 0; i < 4; i++)
-            color->comp[0].u8[rgba_map[i]] = rgba[i];
+            for (i = 0; i < 4; i++)
+                color->comp[0].u8[rgba_map[i]] = rgba[i];
         } else {
             for (i = 0; i < 4; i++)
                 color->comp[rgba_map[i]].u8[0] = rgba[i];
@@ -247,8 +256,8 @@ void ff_copy_rectangle2(FFDrawContext *draw,
     for (plane = 0; plane < draw->nb_planes; plane++) {
         p = pointer_at(draw, src, src_linesize, plane, src_x, src_y);
         q = pointer_at(draw, dst, dst_linesize, plane, dst_x, dst_y);
-        wp = FF_CEIL_RSHIFT(w, draw->hsub[plane]) * draw->pixelstep[plane];
-        hp = FF_CEIL_RSHIFT(h, draw->vsub[plane]);
+        wp = AV_CEIL_RSHIFT(w, draw->hsub[plane]) * draw->pixelstep[plane];
+        hp = AV_CEIL_RSHIFT(h, draw->vsub[plane]);
         for (y = 0; y < hp; y++) {
             memcpy(q, p, wp);
             p += src_linesize[plane];
@@ -266,8 +275,8 @@ void ff_fill_rectangle(FFDrawContext *draw, FFDrawColor *color,
 
     for (plane = 0; plane < draw->nb_planes; plane++) {
         p0 = pointer_at(draw, dst, dst_linesize, plane, dst_x, dst_y);
-        wp = FF_CEIL_RSHIFT(w, draw->hsub[plane]);
-        hp = FF_CEIL_RSHIFT(h, draw->vsub[plane]);
+        wp = AV_CEIL_RSHIFT(w, draw->hsub[plane]);
+        hp = AV_CEIL_RSHIFT(h, draw->vsub[plane]);
         if (!hp)
             return;
         p = p0;
@@ -403,7 +412,7 @@ void ff_blend_rectangle(FFDrawContext *draw, FFDrawColor *color,
 }
 
 static void blend_pixel(uint8_t *dst, unsigned src, unsigned alpha,
-                        uint8_t *mask, int mask_linesize, int l2depth,
+                        const uint8_t *mask, int mask_linesize, int l2depth,
                         unsigned w, unsigned h, unsigned shift, unsigned xm0)
 {
     unsigned xm, x, y, t = 0;
@@ -427,7 +436,7 @@ static void blend_pixel(uint8_t *dst, unsigned src, unsigned alpha,
 
 static void blend_line_hv(uint8_t *dst, int dst_delta,
                           unsigned src, unsigned alpha,
-                          uint8_t *mask, int mask_linesize, int l2depth, int w,
+                          const uint8_t *mask, int mask_linesize, int l2depth, int w,
                           unsigned hsub, unsigned vsub,
                           int xm, int left, int right, int hband)
 {
@@ -452,12 +461,13 @@ static void blend_line_hv(uint8_t *dst, int dst_delta,
 
 void ff_blend_mask(FFDrawContext *draw, FFDrawColor *color,
                    uint8_t *dst[], int dst_linesize[], int dst_w, int dst_h,
-                   uint8_t *mask,  int mask_linesize, int mask_w, int mask_h,
+                   const uint8_t *mask,  int mask_linesize, int mask_w, int mask_h,
                    int l2depth, unsigned endianness, int x0, int y0)
 {
     unsigned alpha, nb_planes, nb_comp, plane, comp;
     int xm0, ym0, w_sub, h_sub, x_sub, y_sub, left, right, top, bottom, y;
-    uint8_t *p0, *p, *m;
+    uint8_t *p0, *p;
+    const uint8_t *m;
 
     clip_interval(dst_w, &x0, &mask_w, &xm0);
     clip_interval(dst_h, &y0, &mask_h, &ym0);
@@ -527,10 +537,12 @@ AVFilterFormats *ff_draw_supported_pixel_formats(unsigned flags)
     enum AVPixelFormat i;
     FFDrawContext draw;
     AVFilterFormats *fmts = NULL;
+    int ret;
 
     for (i = 0; av_pix_fmt_desc_get(i); i++)
-        if (ff_draw_init(&draw, i, flags) >= 0)
-            ff_add_format(&fmts, i);
+        if (ff_draw_init(&draw, i, flags) >= 0 &&
+            (ret = ff_add_format(&fmts, i)) < 0)
+            return NULL;
     return fmts;
 }
 
diff --git a/libavfilter/drawutils.h b/libavfilter/drawutils.h
index 5ffffe77..e247dd64 100644
--- a/libavfilter/drawutils.h
+++ b/libavfilter/drawutils.h
@@ -130,7 +130,7 @@ void ff_blend_rectangle(FFDrawContext *draw, FFDrawColor *color,
  */
 void ff_blend_mask(FFDrawContext *draw, FFDrawColor *color,
                    uint8_t *dst[], int dst_linesize[], int dst_w, int dst_h,
-                   uint8_t *mask, int mask_linesize, int mask_w, int mask_h,
+                   const uint8_t *mask, int mask_linesize, int mask_w, int mask_h,
                    int l2depth, unsigned endianness, int x0, int y0);
 
 /**
diff --git a/libavfilter/dualinput.c b/libavfilter/dualinput.c
index 45f68107..1a078a25 100644
--- a/libavfilter/dualinput.c
+++ b/libavfilter/dualinput.c
@@ -32,7 +32,7 @@ static int process_frame(FFFrameSync *fs)
         return ret;
     }
     av_assert0(mainpic);
-    mainpic->pts = av_rescale_q(mainpic->pts, s->fs.time_base, ctx->outputs[0]->time_base);
+    mainpic->pts = av_rescale_q(s->fs.pts, s->fs.time_base, ctx->outputs[0]->time_base);
     if (secondpic && !ctx->is_disabled)
         mainpic = s->process(ctx, mainpic, secondpic);
     ret = ff_filter_frame(ctx->outputs[0], mainpic);
@@ -42,9 +42,13 @@ static int process_frame(FFFrameSync *fs)
 
 int ff_dualinput_init(AVFilterContext *ctx, FFDualInputContext *s)
 {
-    FFFrameSyncIn *in = s->fs.in;
+    FFFrameSyncIn *in;
+    int ret;
 
-    ff_framesync_init(&s->fs, ctx, 2);
+    if ((ret = ff_framesync_init(&s->fs, ctx, 2)) < 0)
+        return ret;
+
+    in = s->fs.in;
     s->fs.opaque = s;
     s->fs.on_event = process_frame;
     in[0].time_base = ctx->inputs[0]->time_base;
diff --git a/libavfilter/dualinput.h b/libavfilter/dualinput.h
index 0ec0ea73..5ff23e62 100644
--- a/libavfilter/dualinput.h
+++ b/libavfilter/dualinput.h
@@ -31,7 +31,6 @@
 
 typedef struct {
     FFFrameSync fs;
-    FFFrameSyncIn second_input; /* must be immediately after fs */
 
     AVFrame *(*process)(AVFilterContext *ctx, AVFrame *main, const AVFrame *second);
     int shortest;               ///< terminate stream when the second input terminates
diff --git a/libavfilter/f_drawgraph.c b/libavfilter/f_drawgraph.c
new file mode 100644
index 00000000..0ca0d229
--- /dev/null
+++ b/libavfilter/f_drawgraph.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "float.h"
+
+#include "libavutil/eval.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct DrawGraphContext {
+    const AVClass *class;
+
+    char          *key[4];
+    float         min, max;
+    char          *fg_str[4];
+    AVExpr        *fg_expr[4];
+    uint8_t       bg[4];
+    int           mode;
+    int           slide;
+    int           w, h;
+
+    AVFrame       *out;
+    int           x;
+    int           prev_y[4];
+    int           first;
+} DrawGraphContext;
+
+#define OFFSET(x) offsetof(DrawGraphContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption drawgraph_options[] = {
+    { "m1", "set 1st metadata key", OFFSET(key[0]), AV_OPT_TYPE_STRING, {.str=""}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "fg1", "set 1st foreground color expression", OFFSET(fg_str[0]), AV_OPT_TYPE_STRING, {.str="0xffff0000"}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "m2", "set 2nd metadata key", OFFSET(key[1]), AV_OPT_TYPE_STRING, {.str=""}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "fg2", "set 2nd foreground color expression", OFFSET(fg_str[1]), AV_OPT_TYPE_STRING, {.str="0xff00ff00"}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "m3", "set 3rd metadata key", OFFSET(key[2]), AV_OPT_TYPE_STRING, {.str=""}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "fg3", "set 3rd foreground color expression", OFFSET(fg_str[2]), AV_OPT_TYPE_STRING, {.str="0xffff00ff"}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "m4", "set 4th metadata key", OFFSET(key[3]), AV_OPT_TYPE_STRING, {.str=""}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "fg4", "set 4th foreground color expression", OFFSET(fg_str[3]), AV_OPT_TYPE_STRING, {.str="0xffffff00"}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "bg", "set background color", OFFSET(bg), AV_OPT_TYPE_COLOR, {.str="white"}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "min", "set minimal value", OFFSET(min), AV_OPT_TYPE_FLOAT, {.dbl=-1.}, INT_MIN, INT_MAX, FLAGS },
+    { "max", "set maximal value", OFFSET(max), AV_OPT_TYPE_FLOAT, {.dbl=1.}, INT_MIN, INT_MAX, FLAGS },
+    { "mode", "set graph mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=2}, 0, 2, FLAGS, "mode" },
+        {"bar", "draw bars", OFFSET(mode), AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "mode"},
+        {"dot", "draw dots", OFFSET(mode), AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "mode"},
+        {"line", "draw lines", OFFSET(mode), AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "mode"},
+    { "slide", "set slide mode", OFFSET(slide), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS, "slide" },
+        {"frame", "draw new frames", OFFSET(slide), AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "slide"},
+        {"replace", "replace old columns with new", OFFSET(slide), AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "slide"},
+        {"scroll", "scroll from right to left", OFFSET(slide), AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "slide"},
+        {"rscroll", "scroll from left to right", OFFSET(slide), AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "slide"},
+    { "size", "set graph size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="900x256"}, 0, 0, FLAGS },
+    { "s", "set graph size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="900x256"}, 0, 0, FLAGS },
+    { NULL }
+};
+
+static const char *const var_names[] = {   "MAX",   "MIN",   "VAL", NULL };
+enum                                   { VAR_MAX, VAR_MIN, VAR_VAL, VAR_VARS_NB };
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    DrawGraphContext *s = ctx->priv;
+    int ret, i;
+
+    if (s->max <= s->min) {
+        av_log(ctx, AV_LOG_ERROR, "max is same or lower than min\n");
+        return AVERROR(EINVAL);
+    }
+
+    for (i = 0; i < 4; i++) {
+        if (s->fg_str[i]) {
+            ret = av_expr_parse(&s->fg_expr[i], s->fg_str[i], var_names,
+                                NULL, NULL, NULL, NULL, 0, ctx);
+
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    s->first = 1;
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_RGBA,
+        AV_PIX_FMT_NONE
+    };
+    int ret;
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if ((ret = ff_formats_ref(fmts_list, &outlink->in_formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static void clear_image(DrawGraphContext *s, AVFrame *out, AVFilterLink *outlink)
+{
+    int i, j;
+    int bg = AV_RN32(s->bg);
+
+    for (i = 0; i < out->height; i++)
+        for (j = 0; j < out->width; j++)
+            AV_WN32(out->data[0] + i * out->linesize[0] + j * 4, bg);
+}
+
+static inline void draw_dot(int fg, int x, int y, AVFrame *out)
+{
+    AV_WN32(out->data[0] + y * out->linesize[0] + x * 4, fg);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    DrawGraphContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVDictionary *metadata;
+    AVDictionaryEntry *e;
+    AVFrame *out = s->out;
+    int i;
+
+    if (!s->out || s->out->width  != outlink->w ||
+                   s->out->height != outlink->h) {
+        av_frame_free(&s->out);
+        s->out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        out = s->out;
+        if (!s->out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+
+        clear_image(s, out, outlink);
+    }
+    av_frame_copy_props(out, in);
+
+    metadata = av_frame_get_metadata(in);
+
+    for (i = 0; i < 4; i++) {
+        double values[VAR_VARS_NB];
+        int j, y, x, old;
+        uint32_t fg, bg;
+        float vf;
+
+        e = av_dict_get(metadata, s->key[i], NULL, 0);
+        if (!e || !e->value)
+            continue;
+
+        if (sscanf(e->value, "%f", &vf) != 1)
+            continue;
+
+        vf = av_clipf(vf, s->min, s->max);
+
+        values[VAR_MIN] = s->min;
+        values[VAR_MAX] = s->max;
+        values[VAR_VAL] = vf;
+
+        fg = av_expr_eval(s->fg_expr[i], values, NULL);
+        bg = AV_RN32(s->bg);
+
+        if (i == 0 && (s->x >= outlink->w || s->slide == 3)) {
+            if (s->slide == 0 || s->slide == 1)
+                s->x = 0;
+
+            if (s->slide == 2) {
+                s->x = outlink->w - 1;
+                for (j = 0; j < outlink->h; j++) {
+                    memmove(out->data[0] + j * out->linesize[0] ,
+                            out->data[0] + j * out->linesize[0] + 4,
+                            (outlink->w - 1) * 4);
+                }
+            } else if (s->slide == 3) {
+                s->x = 0;
+                for (j = 0; j < outlink->h; j++) {
+                    memmove(out->data[0] + j * out->linesize[0] + 4,
+                            out->data[0] + j * out->linesize[0],
+                            (outlink->w - 1) * 4);
+                }
+            } else if (s->slide == 0) {
+                clear_image(s, out, outlink);
+            }
+        }
+
+        x = s->x;
+        y = (outlink->h - 1) * (1 - ((vf - s->min) / (s->max - s->min)));
+
+        switch (s->mode) {
+        case 0:
+            if (i == 0 && (s->slide > 0))
+                for (j = 0; j < outlink->h; j++)
+                    draw_dot(bg, x, j, out);
+
+            old = AV_RN32(out->data[0] + y * out->linesize[0] + x * 4);
+            for (j = y; j < outlink->h; j++) {
+                if (old != bg &&
+                    (AV_RN32(out->data[0] + j * out->linesize[0] + x * 4) != old) ||
+                    AV_RN32(out->data[0] + FFMIN(j+1, outlink->h - 1) * out->linesize[0] + x * 4) != old) {
+                    draw_dot(fg, x, j, out);
+                    break;
+                }
+                draw_dot(fg, x, j, out);
+            }
+            break;
+        case 1:
+            if (i == 0 && (s->slide > 0))
+                for (j = 0; j < outlink->h; j++)
+                    draw_dot(bg, x, j, out);
+            draw_dot(fg, x, y, out);
+            break;
+        case 2:
+            if (s->first) {
+                s->first = 0;
+                s->prev_y[i] = y;
+            }
+
+            if (i == 0 && (s->slide > 0)) {
+                for (j = 0; j < y; j++)
+                    draw_dot(bg, x, j, out);
+                for (j = outlink->h - 1; j > y; j--)
+                    draw_dot(bg, x, j, out);
+            }
+            if (y <= s->prev_y[i]) {
+                for (j = y; j <= s->prev_y[i]; j++)
+                    draw_dot(fg, x, j, out);
+            } else {
+                for (j = s->prev_y[i]; j <= y; j++)
+                    draw_dot(fg, x, j, out);
+            }
+            s->prev_y[i] = y;
+            break;
+        }
+    }
+
+    s->x++;
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, av_frame_clone(s->out));
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    DrawGraphContext *s = outlink->src->priv;
+
+    outlink->w = s->w;
+    outlink->h = s->h;
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    DrawGraphContext *s = ctx->priv;
+    int i;
+
+    for (i = 0; i < 4; i++)
+        av_expr_free(s->fg_expr[i]);
+    av_frame_free(&s->out);
+}
+
+#if CONFIG_DRAWGRAPH_FILTER
+
+AVFILTER_DEFINE_CLASS(drawgraph);
+
+static const AVFilterPad drawgraph_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad drawgraph_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_drawgraph = {
+    .name          = "drawgraph",
+    .description   = NULL_IF_CONFIG_SMALL("Draw a graph using input video metadata."),
+    .priv_size     = sizeof(DrawGraphContext),
+    .priv_class    = &drawgraph_class,
+    .query_formats = query_formats,
+    .init          = init,
+    .uninit        = uninit,
+    .inputs        = drawgraph_inputs,
+    .outputs       = drawgraph_outputs,
+};
+
+#endif // CONFIG_DRAWGRAPH_FILTER
+
+#if CONFIG_ADRAWGRAPH_FILTER
+
+#define adrawgraph_options drawgraph_options
+AVFILTER_DEFINE_CLASS(adrawgraph);
+
+static const AVFilterPad adrawgraph_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad adrawgraph_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_avf_adrawgraph = {
+    .name          = "adrawgraph",
+    .description   = NULL_IF_CONFIG_SMALL("Draw a graph using input audio metadata."),
+    .priv_size     = sizeof(DrawGraphContext),
+    .priv_class    = &adrawgraph_class,
+    .query_formats = query_formats,
+    .init          = init,
+    .uninit        = uninit,
+    .inputs        = adrawgraph_inputs,
+    .outputs       = adrawgraph_outputs,
+};
+#endif // CONFIG_ADRAWGRAPH_FILTER
diff --git a/libavfilter/f_ebur128.c b/libavfilter/f_ebur128.c
index c18ae799..c4b30b49 100644
--- a/libavfilter/f_ebur128.c
+++ b/libavfilter/f_ebur128.c
@@ -139,6 +139,8 @@ typedef struct {
     /* misc */
     int loglevel;                   ///< log level for frame logging
     int metadata;                   ///< whether or not to inject loudness results in frames
+    int dual_mono;                  ///< whether or not to treat single channel input files as dual-mono
+    double pan_law;                 ///< pan law value used to calulate dual-mono measurements
 } EBUR128Context;
 
 enum {
@@ -152,17 +154,19 @@ enum {
 #define V AV_OPT_FLAG_VIDEO_PARAM
 #define F AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption ebur128_options[] = {
-    { "video", "set video output", OFFSET(do_video), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, V|F },
+    { "video", "set video output", OFFSET(do_video), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, V|F },
     { "size",  "set video size",   OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x480"}, 0, 0, V|F },
     { "meter", "set scale meter (+9 to +18)",  OFFSET(meter), AV_OPT_TYPE_INT, {.i64 = 9}, 9, 18, V|F },
     { "framelog", "force frame logging level", OFFSET(loglevel), AV_OPT_TYPE_INT, {.i64 = -1},   INT_MIN, INT_MAX, A|V|F, "level" },
         { "info",    "information logging level", 0, AV_OPT_TYPE_CONST, {.i64 = AV_LOG_INFO},    INT_MIN, INT_MAX, A|V|F, "level" },
         { "verbose", "verbose logging level",     0, AV_OPT_TYPE_CONST, {.i64 = AV_LOG_VERBOSE}, INT_MIN, INT_MAX, A|V|F, "level" },
-    { "metadata", "inject metadata in the filtergraph", OFFSET(metadata), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, A|V|F },
+    { "metadata", "inject metadata in the filtergraph", OFFSET(metadata), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, A|V|F },
     { "peak", "set peak mode", OFFSET(peak_mode), AV_OPT_TYPE_FLAGS, {.i64 = PEAK_MODE_NONE}, 0, INT_MAX, A|F, "mode" },
         { "none",   "disable any peak mode",   0, AV_OPT_TYPE_CONST, {.i64 = PEAK_MODE_NONE},          INT_MIN, INT_MAX, A|F, "mode" },
         { "sample", "enable peak-sample mode", 0, AV_OPT_TYPE_CONST, {.i64 = PEAK_MODE_SAMPLES_PEAKS}, INT_MIN, INT_MAX, A|F, "mode" },
         { "true",   "enable true-peak mode",   0, AV_OPT_TYPE_CONST, {.i64 = PEAK_MODE_TRUE_PEAKS},    INT_MIN, INT_MAX, A|F, "mode" },
+    { "dualmono", "treat mono input files as dual-mono", OFFSET(dual_mono), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, A|F },
+    { "panlaw", "set a specific pan law for dual-mono files", OFFSET(pan_law), AV_OPT_TYPE_DOUBLE, {.dbl = -3.01029995663978}, -10.0, 0.0, A|F },
     { NULL },
 };
 
@@ -337,8 +341,6 @@ static int config_video_output(AVFilterLink *outlink)
     DRAW_RECT(ebur128->graph);
     DRAW_RECT(ebur128->gauge);
 
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
-
     return 0;
 }
 
@@ -398,8 +400,6 @@ static int config_audio_output(AVFilterLink *outlink)
             return AVERROR(ENOMEM);
     }
 
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
-
 #if CONFIG_SWRESAMPLE
     if (ebur128->peak_mode & PEAK_MODE_TRUE_PEAKS) {
         int ret;
@@ -435,7 +435,7 @@ static int config_audio_output(AVFilterLink *outlink)
     return 0;
 }
 
-#define ENERGY(loudness) (pow(10, ((loudness) + 0.691) / 10.))
+#define ENERGY(loudness) (ff_exp10(((loudness) + 0.691) / 10.))
 #define LOUDNESS(energy) (-0.691 + 10 * log10(energy))
 #define DBFS(energy) (20 * log10(energy))
 
@@ -558,9 +558,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
             ebur128->true_peaks_per_frame[ch] = 0.0;
         for (idx_insample = 0; idx_insample < ret; idx_insample++) {
             for (ch = 0; ch < nb_channels; ch++) {
-                ebur128->true_peaks[ch] = FFMAX(ebur128->true_peaks[ch], FFABS(*swr_samples));
+                ebur128->true_peaks[ch] = FFMAX(ebur128->true_peaks[ch], fabs(*swr_samples));
                 ebur128->true_peaks_per_frame[ch] = FFMAX(ebur128->true_peaks_per_frame[ch],
-                                                          FFABS(*swr_samples));
+                                                          fabs(*swr_samples));
                 swr_samples++;
             }
         }
@@ -586,7 +586,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
             double bin;
 
             if (ebur128->peak_mode & PEAK_MODE_SAMPLES_PEAKS)
-                ebur128->sample_peaks[ch] = FFMAX(ebur128->sample_peaks[ch], FFABS(*samples));
+                ebur128->sample_peaks[ch] = FFMAX(ebur128->sample_peaks[ch], fabs(*samples));
 
             ebur128->x[ch * 3] = *samples++; // set X[i]
 
@@ -663,8 +663,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
                     nb_integrated  += nb_v;
                     integrated_sum += nb_v * ebur128->i400.histogram[i].energy;
                 }
-                if (nb_integrated)
+                if (nb_integrated) {
                     ebur128->integrated_loudness = LOUDNESS(integrated_sum / nb_integrated);
+                    /* dual-mono correction */
+                    if (nb_channels == 1 && ebur128->dual_mono) {
+                        ebur128->integrated_loudness -= ebur128->pan_law;
+                    }
+                }
             }
 
             /* LRA */
@@ -711,6 +716,12 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
                 }
             }
 
+            /* dual-mono correction */
+            if (nb_channels == 1 && ebur128->dual_mono) {
+                loudness_400 -= ebur128->pan_law;
+                loudness_3000 -= ebur128->pan_law;
+            }
+
 #define LOG_FMT "M:%6.1f S:%6.1f     I:%6.1f LUFS     LRA:%6.1f LU"
 
             /* push one video frame */
@@ -816,6 +827,7 @@ static int query_formats(AVFilterContext *ctx)
     AVFilterChannelLayouts *layouts;
     AVFilterLink *inlink = ctx->inputs[0];
     AVFilterLink *outlink = ctx->outputs[0];
+    int ret;
 
     static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_NONE };
     static const int input_srate[] = {48000, -1}; // ITU-R BS.1770 provides coeff only for 48kHz
@@ -824,9 +836,8 @@ static int query_formats(AVFilterContext *ctx)
     /* set optional output video format */
     if (ebur128->do_video) {
         formats = ff_make_format_list(pix_fmts);
-        if (!formats)
-            return AVERROR(ENOMEM);
-        ff_formats_ref(formats, &outlink->in_formats);
+        if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
+            return ret;
         outlink = ctx->outputs[1];
     }
 
@@ -834,22 +845,19 @@ static int query_formats(AVFilterContext *ctx)
      * Note: ff_set_common_* functions are not used because they affect all the
      * links, and thus break the video format negotiation */
     formats = ff_make_format_list(sample_fmts);
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &inlink->out_formats);
-    ff_formats_ref(formats, &outlink->in_formats);
+    if ((ret = ff_formats_ref(formats, &inlink->out_formats)) < 0 ||
+        (ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
+        return ret;
 
     layouts = ff_all_channel_layouts();
-    if (!layouts)
-        return AVERROR(ENOMEM);
-    ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts);
-    ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts);
+    if ((ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts)) < 0 ||
+        (ret = ff_channel_layouts_ref(layouts, &outlink->in_channel_layouts)) < 0)
+        return ret;
 
     formats = ff_make_format_list(input_srate);
-    if (!formats)
-        return AVERROR(ENOMEM);
-    ff_formats_ref(formats, &inlink->out_samplerates);
-    ff_formats_ref(formats, &outlink->in_samplerates);
+    if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0 ||
+        (ret = ff_formats_ref(formats, &outlink->in_samplerates)) < 0)
+        return ret;
 
     return 0;
 }
@@ -859,6 +867,14 @@ static av_cold void uninit(AVFilterContext *ctx)
     int i;
     EBUR128Context *ebur128 = ctx->priv;
 
+    /* dual-mono correction */
+    if (ebur128->nb_channels == 1 && ebur128->dual_mono) {
+        ebur128->i400.rel_threshold -= ebur128->pan_law;
+        ebur128->i3000.rel_threshold -= ebur128->pan_law;
+        ebur128->lra_low -= ebur128->pan_law;
+        ebur128->lra_high -= ebur128->pan_law;
+    }
+
     av_log(ctx, AV_LOG_INFO, "Summary:\n\n"
            "  Integrated loudness:\n"
            "    I:         %5.1f LUFS\n"
diff --git a/libavfilter/f_interleave.c b/libavfilter/f_interleave.c
index 95401cfb..422f2bfb 100644
--- a/libavfilter/f_interleave.c
+++ b/libavfilter/f_interleave.c
@@ -59,7 +59,7 @@ inline static int push_frame(AVFilterContext *ctx)
     for (i = 0; i < ctx->nb_inputs; i++) {
         struct FFBufQueue *q = &s->queues[i];
 
-        if (!q->available && !ctx->inputs[i]->closed)
+        if (!q->available && !ctx->inputs[i]->status)
             return 0;
         if (q->available) {
             frame = ff_bufqueue_peek(q, 0);
@@ -180,8 +180,6 @@ static int config_output(AVFilterLink *outlink)
             }
         }
     }
-
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
     return 0;
 }
 
@@ -192,7 +190,7 @@ static int request_frame(AVFilterLink *outlink)
     int i, ret;
 
     for (i = 0; i < ctx->nb_inputs; i++) {
-        if (!s->queues[i].available && !ctx->inputs[i]->closed) {
+        if (!s->queues[i].available && !ctx->inputs[i]->status) {
             ret = ff_request_frame(ctx->inputs[i]);
             if (ret != AVERROR_EOF)
                 return ret;
diff --git a/libavfilter/f_metadata.c b/libavfilter/f_metadata.c
new file mode 100644
index 00000000..fa6b9d35
--- /dev/null
+++ b/libavfilter/f_metadata.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2016 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * filter for manipulating frame metadata
+ */
+
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/eval.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+enum MetadataMode {
+    METADATA_SELECT,
+    METADATA_ADD,
+    METADATA_MODIFY,
+    METADATA_DELETE,
+    METADATA_PRINT,
+    METADATA_NB
+};
+
+enum MetadataFunction {
+    METADATAF_SAME_STR,
+    METADATAF_STARTS_WITH,
+    METADATAF_LESS,
+    METADATAF_EQUAL,
+    METADATAF_GREATER,
+    METADATAF_EXPR,
+    METADATAF_NB
+};
+
+static const char *const var_names[] = {
+    "VALUE1",
+    "VALUE2",
+    NULL
+};
+
+enum var_name {
+    VAR_VALUE1,
+    VAR_VALUE2,
+    VAR_VARS_NB
+};
+
+typedef struct MetadataContext {
+    const AVClass *class;
+
+    int mode;
+    char *key;
+    char *value;
+    int function;
+
+    char *expr_str;
+    AVExpr *expr;
+    double var_values[VAR_VARS_NB];
+
+    FILE *file;
+    char *file_str;
+
+    int (*compare)(struct MetadataContext *s,
+                   const char *value1, const char *value2);
+    void (*print)(AVFilterContext *ctx, const char *msg, ...) av_printf_format(2, 3);
+} MetadataContext;
+
+#define OFFSET(x) offsetof(MetadataContext, x)
+#define DEFINE_OPTIONS(filt_name, FLAGS) \
+static const AVOption filt_name##_options[] = { \
+    { "mode", "set a mode of operation", OFFSET(mode),   AV_OPT_TYPE_INT,    {.i64 = 0 }, 0, METADATA_NB-1, FLAGS, "mode" }, \
+    {   "select", "select frame",        0,              AV_OPT_TYPE_CONST,  {.i64 = METADATA_SELECT }, 0, 0, FLAGS, "mode" }, \
+    {   "add",    "add new metadata",    0,              AV_OPT_TYPE_CONST,  {.i64 = METADATA_ADD },    0, 0, FLAGS, "mode" }, \
+    {   "modify", "modify metadata",     0,              AV_OPT_TYPE_CONST,  {.i64 = METADATA_MODIFY }, 0, 0, FLAGS, "mode" }, \
+    {   "delete", "delete metadata",     0,              AV_OPT_TYPE_CONST,  {.i64 = METADATA_DELETE }, 0, 0, FLAGS, "mode" }, \
+    {   "print",  "print metadata",      0,              AV_OPT_TYPE_CONST,  {.i64 = METADATA_PRINT },  0, 0, FLAGS, "mode" }, \
+    { "key",   "set metadata key",       OFFSET(key),    AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, FLAGS }, \
+    { "value", "set metadata value",     OFFSET(value),  AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, FLAGS }, \
+    { "function", "function for comparing values", OFFSET(function), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, METADATAF_NB-1, FLAGS, "function" }, \
+    {   "same_str",    NULL, 0, AV_OPT_TYPE_CONST, {.i64 = METADATAF_SAME_STR },    0, 3, FLAGS, "function" }, \
+    {   "starts_with", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = METADATAF_STARTS_WITH }, 0, 0, FLAGS, "function" }, \
+    {   "less",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = METADATAF_LESS    },     0, 3, FLAGS, "function" }, \
+    {   "equal",       NULL, 0, AV_OPT_TYPE_CONST, {.i64 = METADATAF_EQUAL   },     0, 3, FLAGS, "function" }, \
+    {   "greater",     NULL, 0, AV_OPT_TYPE_CONST, {.i64 = METADATAF_GREATER },     0, 3, FLAGS, "function" }, \
+    {   "expr",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = METADATAF_EXPR    },     0, 3, FLAGS, "function" }, \
+    { "expr", "set expression for expr function", OFFSET(expr_str), AV_OPT_TYPE_STRING, {.str = NULL }, 0, 0, FLAGS }, \
+    { "file", "set file where to print metadata information", OFFSET(file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, \
+    { NULL } \
+}
+
+static int same_str(MetadataContext *s, const char *value1, const char *value2)
+{
+    return !strcmp(value1, value2);
+}
+
+static int starts_with(MetadataContext *s, const char *value1, const char *value2)
+{
+    return !strncmp(value1, value2, strlen(value2));
+}
+
+static int equal(MetadataContext *s, const char *value1, const char *value2)
+{
+    float f1, f2;
+
+    if (sscanf(value1, "%f", &f1) + sscanf(value2, "%f", &f2) != 2)
+        return 0;
+
+    return fabsf(f1 - f2) < FLT_EPSILON;
+}
+
+static int less(MetadataContext *s, const char *value1, const char *value2)
+{
+    float f1, f2;
+
+    if (sscanf(value1, "%f", &f1) + sscanf(value2, "%f", &f2) != 2)
+        return 0;
+
+    return (f1 - f2) < FLT_EPSILON;
+}
+
+static int greater(MetadataContext *s, const char *value1, const char *value2)
+{
+    float f1, f2;
+
+    if (sscanf(value1, "%f", &f1) + sscanf(value2, "%f", &f2) != 2)
+        return 0;
+
+    return (f2 - f1) < FLT_EPSILON;
+}
+
+static int parse_expr(MetadataContext *s, const char *value1, const char *value2)
+{
+    double f1, f2;
+
+    if (sscanf(value1, "%lf", &f1) + sscanf(value2, "%lf", &f2) != 2)
+        return 0;
+
+    s->var_values[VAR_VALUE1] = f1;
+    s->var_values[VAR_VALUE2] = f2;
+
+    return av_expr_eval(s->expr, s->var_values, NULL);
+}
+
+static void print_log(AVFilterContext *ctx, const char *msg, ...)
+{
+    va_list argument_list;
+
+    va_start(argument_list, msg);
+    if (msg)
+        av_vlog(ctx, AV_LOG_INFO, msg, argument_list);
+    va_end(argument_list);
+}
+
+static void print_file(AVFilterContext *ctx, const char *msg, ...)
+{
+    MetadataContext *s = ctx->priv;
+    va_list argument_list;
+
+    va_start(argument_list, msg);
+    if (msg)
+        vfprintf(s->file, msg, argument_list);
+    va_end(argument_list);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    MetadataContext *s = ctx->priv;
+    int ret;
+
+    if (!s->key && s->mode != METADATA_PRINT) {
+        av_log(ctx, AV_LOG_WARNING, "Metadata key must be set\n");
+        return AVERROR(EINVAL);
+    }
+
+    if ((s->mode == METADATA_MODIFY ||
+        s->mode == METADATA_ADD) && !s->value) {
+        av_log(ctx, AV_LOG_WARNING, "Missing metadata value\n");
+        return AVERROR(EINVAL);
+    }
+
+    switch (s->function) {
+    case METADATAF_SAME_STR:
+        s->compare = same_str;
+        break;
+    case METADATAF_STARTS_WITH:
+        s->compare = starts_with;
+        break;
+    case METADATAF_LESS:
+        s->compare = less;
+        break;
+    case METADATAF_EQUAL:
+        s->compare = equal;
+        break;
+    case METADATAF_GREATER:
+        s->compare = greater;
+        break;
+    case METADATAF_EXPR:
+        s->compare = parse_expr;
+        break;
+    default:
+        av_assert0(0);
+    };
+
+    if (s->function == METADATAF_EXPR) {
+        if (!s->expr_str) {
+            av_log(ctx, AV_LOG_WARNING, "expr option not set\n");
+            return AVERROR(EINVAL);
+        }
+        if ((ret = av_expr_parse(&s->expr, s->expr_str,
+                                 var_names, NULL, NULL, NULL, NULL, 0, ctx)) < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Error while parsing expression '%s'\n", s->expr_str);
+            return ret;
+        }
+    }
+
+    if (s->file_str) {
+        if (!strcmp(s->file_str, "-")) {
+            s->file = stdout;
+        } else {
+            s->file = fopen(s->file_str, "w");
+            if (!s->file) {
+                int err = AVERROR(errno);
+                char buf[128];
+                av_strerror(err, buf, sizeof(buf));
+                av_log(ctx, AV_LOG_ERROR, "Could not open file %s: %s\n",
+                       s->file_str, buf);
+                return err;
+            }
+        }
+        s->print = print_file;
+    } else {
+        s->print = print_log;
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    MetadataContext *s = ctx->priv;
+
+    if (s->file && s->file != stdout)
+        fclose(s->file);
+    s->file = NULL;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    MetadataContext *s = ctx->priv;
+    AVDictionary *metadata = av_frame_get_metadata(frame);
+    AVDictionaryEntry *e;
+
+    if (!metadata)
+        return ff_filter_frame(outlink, frame);
+
+    e = av_dict_get(metadata, !s->key ? "" : s->key, NULL,
+                    !s->key ? AV_DICT_IGNORE_SUFFIX: 0);
+
+    switch (s->mode) {
+    case METADATA_SELECT:
+        if (!s->value && e && e->value) {
+            return ff_filter_frame(outlink, frame);
+        } else if (s->value && e && e->value &&
+                   s->compare(s, e->value, s->value)) {
+            return ff_filter_frame(outlink, frame);
+        }
+        break;
+    case METADATA_ADD:
+        if (e && e->value) {
+            ;
+        } else {
+            av_dict_set(&metadata, s->key, s->value, 0);
+        }
+        return ff_filter_frame(outlink, frame);
+        break;
+    case METADATA_MODIFY:
+        if (e && e->value) {
+            av_dict_set(&metadata, s->key, s->value, 0);
+        }
+        return ff_filter_frame(outlink, frame);
+        break;
+    case METADATA_PRINT:
+        if (!s->key && e) {
+            s->print(ctx, "frame %"PRId64" pts %"PRId64"\n", inlink->frame_count, frame->pts);
+            s->print(ctx, "%s=%s\n", e->key, e->value);
+            while ((e = av_dict_get(metadata, "", e, AV_DICT_IGNORE_SUFFIX)) != NULL) {
+                s->print(ctx, "%s=%s\n", e->key, e->value);
+            }
+        } else if (e && e->value && (!s->value || (e->value && s->compare(s, e->value, s->value)))) {
+            s->print(ctx, "frame %"PRId64" pts %"PRId64"\n", inlink->frame_count, frame->pts);
+            s->print(ctx, "%s=%s\n", s->key, e->value);
+        }
+        return ff_filter_frame(outlink, frame);
+        break;
+    case METADATA_DELETE:
+        if (e && e->value && s->value && s->compare(s, e->value, s->value)) {
+            av_dict_set(&metadata, s->key, NULL, 0);
+        } else if (e && e->value) {
+            av_dict_set(&metadata, s->key, NULL, 0);
+        }
+        return ff_filter_frame(outlink, frame);
+        break;
+    default:
+        av_assert0(0);
+    };
+
+    av_frame_free(&frame);
+
+    return 0;
+}
+
+#if CONFIG_AMETADATA_FILTER
+
+DEFINE_OPTIONS(ametadata, AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM);
+AVFILTER_DEFINE_CLASS(ametadata);
+
+static const AVFilterPad ainputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad aoutputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_ametadata = {
+    .name          = "ametadata",
+    .description   = NULL_IF_CONFIG_SMALL("Manipulate audio frame metadata."),
+    .priv_size     = sizeof(MetadataContext),
+    .priv_class    = &ametadata_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = ff_query_formats_all,
+    .inputs        = ainputs,
+    .outputs       = aoutputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+};
+#endif /* CONFIG_AMETADATA_FILTER */
+
+#if CONFIG_METADATA_FILTER
+
+DEFINE_OPTIONS(metadata, AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM);
+AVFILTER_DEFINE_CLASS(metadata);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_metadata = {
+    .name        = "metadata",
+    .description = NULL_IF_CONFIG_SMALL("Manipulate video frame metadata."),
+    .priv_size   = sizeof(MetadataContext),
+    .priv_class  = &metadata_class,
+    .init        = init,
+    .uninit      = uninit,
+    .inputs      = inputs,
+    .outputs     = outputs,
+    .flags       = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+};
+#endif /* CONFIG_METADATA_FILTER */
diff --git a/libavfilter/f_perms.c b/libavfilter/f_perms.c
index abe2e4f4..40b88111 100644
--- a/libavfilter/f_perms.c
+++ b/libavfilter/f_perms.c
@@ -56,16 +56,16 @@ static const AVOption options[] = {
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    PermsContext *perms = ctx->priv;
+    PermsContext *s = ctx->priv;
 
-    if (perms->mode == MODE_RANDOM) {
+    if (s->mode == MODE_RANDOM) {
         uint32_t seed;
 
-        if (perms->random_seed == -1)
-            perms->random_seed = av_get_random_seed();
-        seed = perms->random_seed;
+        if (s->random_seed == -1)
+            s->random_seed = av_get_random_seed();
+        seed = s->random_seed;
         av_log(ctx, AV_LOG_INFO, "random seed: 0x%08x\n", seed);
-        av_lfg_init(&perms->lfg, seed);
+        av_lfg_init(&s->lfg, seed);
     }
 
     return 0;
@@ -78,14 +78,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
     int ret;
     AVFilterContext *ctx = inlink->dst;
-    PermsContext *perms = ctx->priv;
+    PermsContext *s = ctx->priv;
     AVFrame *out = frame;
     enum perm in_perm = av_frame_is_writable(frame) ? RW : RO;
     enum perm out_perm;
 
-    switch (perms->mode) {
+    switch (s->mode) {
     case MODE_TOGGLE:   out_perm = in_perm == RO ? RW : RO;                 break;
-    case MODE_RANDOM:   out_perm = av_lfg_get(&perms->lfg) & 1 ? RW : RO;   break;
+    case MODE_RANDOM:   out_perm = av_lfg_get(&s->lfg) & 1 ? RW : RO;       break;
     case MODE_RO:       out_perm = RO;                                      break;
     case MODE_RW:       out_perm = RW;                                      break;
     default:            out_perm = in_perm;                                 break;
diff --git a/libavfilter/f_realtime.c b/libavfilter/f_realtime.c
new file mode 100644
index 00000000..171c16aa
--- /dev/null
+++ b/libavfilter/f_realtime.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2015 Nicolas George
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/time.h"
+#include "avfilter.h"
+#include "internal.h"
+
+typedef struct RealtimeContext {
+    const AVClass *class;
+    int64_t delta;
+    int64_t limit;
+    unsigned inited;
+} RealtimeContext;
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    RealtimeContext *s = ctx->priv;
+
+    if (frame->pts != AV_NOPTS_VALUE) {
+        int64_t pts = av_rescale_q(frame->pts, inlink->time_base, AV_TIME_BASE_Q);
+        int64_t now = av_gettime_relative();
+        int64_t sleep = pts - now + s->delta;
+        if (!s->inited) {
+            s->inited = 1;
+            sleep = 0;
+            s->delta = now - pts;
+        }
+        if (sleep > s->limit || sleep < -s->limit) {
+            av_log(ctx, AV_LOG_WARNING,
+                   "time discontinuity detected: %"PRIi64" us, resetting\n",
+                   sleep);
+            sleep = 0;
+            s->delta = now - pts;
+        }
+        if (sleep > 0) {
+            av_log(ctx, AV_LOG_DEBUG, "sleeping %"PRIi64" us\n", sleep);
+            for (; sleep > 600000000; sleep -= 600000000)
+                av_usleep(600000000);
+            av_usleep(sleep);
+        }
+    }
+    return ff_filter_frame(inlink->dst->outputs[0], frame);
+}
+
+#define OFFSET(x) offsetof(RealtimeContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption options[] = {
+    { "limit", "sleep time limit", OFFSET(limit), AV_OPT_TYPE_DURATION, { .i64 = 2000000 }, 0, INT64_MAX, FLAGS },
+    { NULL }
+};
+
+#if CONFIG_REALTIME_FILTER
+#define realtime_options options
+AVFILTER_DEFINE_CLASS(realtime);
+
+static const AVFilterPad avfilter_vf_realtime_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_vf_realtime_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_realtime = {
+    .name        = "realtime",
+    .description = NULL_IF_CONFIG_SMALL("Slow down filtering to match realtime."),
+    .priv_size   = sizeof(RealtimeContext),
+    .priv_class  = &realtime_class,
+    .inputs      = avfilter_vf_realtime_inputs,
+    .outputs     = avfilter_vf_realtime_outputs,
+};
+#endif /* CONFIG_REALTIME_FILTER */
+
+#if CONFIG_AREALTIME_FILTER
+
+#define arealtime_options options
+AVFILTER_DEFINE_CLASS(arealtime);
+
+static const AVFilterPad arealtime_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad arealtime_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_arealtime = {
+    .name        = "arealtime",
+    .description = NULL_IF_CONFIG_SMALL("Slow down filtering to match realtime."),
+    .priv_size   = sizeof(RealtimeContext),
+    .priv_class  = &arealtime_class,
+    .inputs      = arealtime_inputs,
+    .outputs     = arealtime_outputs,
+};
+#endif /* CONFIG_AREALTIME_FILTER */
diff --git a/libavfilter/f_reverse.c b/libavfilter/f_reverse.c
new file mode 100644
index 00000000..5bf71b38
--- /dev/null
+++ b/libavfilter/f_reverse.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2015 Derek Buitenhuis
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+#define DEFAULT_LENGTH 300
+
+typedef struct ReverseContext {
+    int nb_frames;
+    AVFrame **frames;
+    unsigned int frames_size;
+    unsigned int pts_size;
+    int64_t *pts;
+    int flush_idx;
+} ReverseContext;
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    ReverseContext *s = ctx->priv;
+
+    s->pts = av_fast_realloc(NULL, &s->pts_size,
+                             DEFAULT_LENGTH * sizeof(*(s->pts)));
+    if (!s->pts)
+        return AVERROR(ENOMEM);
+
+    s->frames = av_fast_realloc(NULL, &s->frames_size,
+                                DEFAULT_LENGTH * sizeof(*(s->frames)));
+    if (!s->frames) {
+        av_freep(&s->pts);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    ReverseContext *s = ctx->priv;
+
+    av_freep(&s->pts);
+    av_freep(&s->frames);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    ReverseContext *s    = ctx->priv;
+    void *ptr;
+
+    if (s->nb_frames + 1 > s->pts_size / sizeof(*(s->pts))) {
+        ptr = av_fast_realloc(s->pts, &s->pts_size, s->pts_size * 2);
+        if (!ptr)
+            return AVERROR(ENOMEM);
+        s->pts = ptr;
+    }
+
+    if (s->nb_frames + 1 > s->frames_size / sizeof(*(s->frames))) {
+        ptr = av_fast_realloc(s->frames, &s->frames_size, s->frames_size * 2);
+        if (!ptr)
+            return AVERROR(ENOMEM);
+        s->frames = ptr;
+    }
+
+    s->frames[s->nb_frames] = in;
+    s->pts[s->nb_frames]    = in->pts;
+    s->nb_frames++;
+
+    return 0;
+}
+
+#if CONFIG_REVERSE_FILTER
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    ReverseContext *s = ctx->priv;
+    int ret;
+
+    ret = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && s->nb_frames > 0) {
+        AVFrame *out = s->frames[s->nb_frames - 1];
+        out->pts     = s->pts[s->flush_idx++];
+        ret          = ff_filter_frame(outlink, out);
+        s->nb_frames--;
+    }
+
+    return ret;
+}
+
+static const AVFilterPad reverse_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad reverse_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_reverse = {
+    .name        = "reverse",
+    .description = NULL_IF_CONFIG_SMALL("Reverse a clip."),
+    .priv_size   = sizeof(ReverseContext),
+    .init        = init,
+    .uninit      = uninit,
+    .inputs      = reverse_inputs,
+    .outputs     = reverse_outputs,
+};
+
+#endif /* CONFIG_REVERSE_FILTER */
+
+#if CONFIG_AREVERSE_FILTER
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+    AVFilterChannelLayouts *layouts;
+    int ret;
+
+    layouts = ff_all_channel_counts();
+    if (!layouts)
+        return AVERROR(ENOMEM);
+    ret = ff_set_common_channel_layouts(ctx, layouts);
+    if (ret < 0)
+        return ret;
+
+    ret = ff_set_common_formats(ctx, ff_planar_sample_fmts());
+    if (ret < 0)
+        return ret;
+
+    formats = ff_all_samplerates();
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_samplerates(ctx, formats);
+}
+
+static int areverse_request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    ReverseContext *s = ctx->priv;
+    int ret, p, i, j;
+
+    ret = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && s->nb_frames > 0) {
+        AVFrame *out = s->frames[s->nb_frames - 1];
+        out->pts     = s->pts[s->flush_idx++];
+
+        for (p = 0; p < outlink->channels; p++) {
+            switch (outlink->format) {
+            case AV_SAMPLE_FMT_U8P: {
+                uint8_t *dst = (uint8_t *)out->extended_data[p];
+                for (i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+                    FFSWAP(uint8_t, dst[i], dst[j]);
+            }
+                break;
+            case AV_SAMPLE_FMT_S16P: {
+                int16_t *dst = (int16_t *)out->extended_data[p];
+                for (i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+                    FFSWAP(int16_t, dst[i], dst[j]);
+            }
+                break;
+            case AV_SAMPLE_FMT_S32P: {
+                int32_t *dst = (int32_t *)out->extended_data[p];
+                for (i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+                    FFSWAP(int32_t, dst[i], dst[j]);
+            }
+                break;
+            case AV_SAMPLE_FMT_FLTP: {
+                float *dst = (float *)out->extended_data[p];
+                for (i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+                    FFSWAP(float, dst[i], dst[j]);
+            }
+                break;
+            case AV_SAMPLE_FMT_DBLP: {
+                double *dst = (double *)out->extended_data[p];
+                for (i = 0, j = out->nb_samples - 1; i < j; i++, j--)
+                    FFSWAP(double, dst[i], dst[j]);
+            }
+                break;
+            }
+        }
+
+        ret = ff_filter_frame(outlink, out);
+        s->nb_frames--;
+    }
+
+    return ret;
+}
+
+static const AVFilterPad areverse_inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_AUDIO,
+        .filter_frame   = filter_frame,
+        .needs_writable = 1,
+    },
+    { NULL }
+};
+
+static const AVFilterPad areverse_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .request_frame = areverse_request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_af_areverse = {
+    .name          = "areverse",
+    .description   = NULL_IF_CONFIG_SMALL("Reverse an audio clip."),
+    .query_formats = query_formats,
+    .priv_size     = sizeof(ReverseContext),
+    .init          = init,
+    .uninit        = uninit,
+    .inputs        = areverse_inputs,
+    .outputs       = areverse_outputs,
+};
+
+#endif /* CONFIG_AREVERSE_FILTER */
diff --git a/libavfilter/f_select.c b/libavfilter/f_select.c
index 3e7cf782..52f474eb 100644
--- a/libavfilter/f_select.c
+++ b/libavfilter/f_select.c
@@ -82,6 +82,8 @@ static const char *const var_names[] = {
 
     "scene",
 
+    "concatdec_select",  ///< frame is within the interval set by the concat demuxer
+
     NULL
 };
 
@@ -132,6 +134,8 @@ enum var_name {
 
     VAR_SCENE,
 
+    VAR_CONCATDEC_SELECT,
+
     VAR_VARS_NB
 };
 
@@ -278,6 +282,28 @@ static double get_scene_score(AVFilterContext *ctx, AVFrame *frame)
     return ret;
 }
 
+static double get_concatdec_select(AVFrame *frame, int64_t pts)
+{
+    AVDictionary *metadata = av_frame_get_metadata(frame);
+    AVDictionaryEntry *start_time_entry = av_dict_get(metadata, "lavf.concatdec.start_time", NULL, 0);
+    AVDictionaryEntry *duration_entry = av_dict_get(metadata, "lavf.concatdec.duration", NULL, 0);
+    if (start_time_entry) {
+        int64_t start_time = strtoll(start_time_entry->value, NULL, 10);
+        if (pts >= start_time) {
+            if (duration_entry) {
+              int64_t duration = strtoll(duration_entry->value, NULL, 10);
+              if (pts < start_time + duration)
+                  return -1;
+              else
+                  return 0;
+            }
+            return -1;
+        }
+        return 0;
+    }
+    return NAN;
+}
+
 #define D2TS(d)  (isnan(d) ? AV_NOPTS_VALUE : (int64_t)(d))
 #define TS2D(ts) ((ts) == AV_NOPTS_VALUE ? NAN : (double)(ts))
 
@@ -297,6 +323,7 @@ static void select_frame(AVFilterContext *ctx, AVFrame *frame)
     select->var_values[VAR_T  ] = TS2D(frame->pts) * av_q2d(inlink->time_base);
     select->var_values[VAR_POS] = av_frame_get_pkt_pos(frame) == -1 ? NAN : av_frame_get_pkt_pos(frame);
     select->var_values[VAR_KEY] = frame->key_frame;
+    select->var_values[VAR_CONCATDEC_SELECT] = get_concatdec_select(frame, av_rescale_q(frame->pts, inlink->time_base, AV_TIME_BASE_Q));
 
     switch (inlink->type) {
     case AVMEDIA_TYPE_AUDIO:
@@ -379,18 +406,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 
 static int request_frame(AVFilterLink *outlink)
 {
-    AVFilterContext *ctx = outlink->src;
-    SelectContext *select = ctx->priv;
     AVFilterLink *inlink = outlink->src->inputs[0];
-    int out_no = FF_OUTLINK_IDX(outlink);
-
-    do {
-        int ret = ff_request_frame(inlink);
-        if (ret < 0)
-            return ret;
-    } while (select->select_out != out_no);
-
-    return 0;
+    int ret = ff_request_frame(inlink);
+    return ret;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
diff --git a/libavfilter/f_sendcmd.c b/libavfilter/f_sendcmd.c
index 7cb958b7..fb30220e 100644
--- a/libavfilter/f_sendcmd.c
+++ b/libavfilter/f_sendcmd.c
@@ -364,28 +364,24 @@ static int cmp_intervals(const void *a, const void *b)
 {
     const Interval *i1 = a;
     const Interval *i2 = b;
-    int64_t ts_diff = i1->start_ts - i2->start_ts;
-    int ret;
-
-    ret = ts_diff > 0 ? 1 : ts_diff < 0 ? -1 : 0;
-    return ret == 0 ? i1->index - i2->index : ret;
+    return 2 * FFDIFFSIGN(i1->start_ts, i2->start_ts) + FFDIFFSIGN(i1->index, i2->index);
 }
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    SendCmdContext *sendcmd = ctx->priv;
+    SendCmdContext *s = ctx->priv;
     int ret, i, j;
 
-    if ((!!sendcmd->commands_filename + !!sendcmd->commands_str) != 1) {
+    if ((!!s->commands_filename + !!s->commands_str) != 1) {
         av_log(ctx, AV_LOG_ERROR,
                "One and only one of the filename or commands options must be specified\n");
         return AVERROR(EINVAL);
     }
 
-    if (sendcmd->commands_filename) {
+    if (s->commands_filename) {
         uint8_t *file_buf, *buf;
         size_t file_bufsize;
-        ret = av_file_map(sendcmd->commands_filename,
+        ret = av_file_map(s->commands_filename,
                           &file_buf, &file_bufsize, 0, ctx);
         if (ret < 0)
             return ret;
@@ -399,24 +395,24 @@ static av_cold int init(AVFilterContext *ctx)
         memcpy(buf, file_buf, file_bufsize);
         buf[file_bufsize] = 0;
         av_file_unmap(file_buf, file_bufsize);
-        sendcmd->commands_str = buf;
+        s->commands_str = buf;
     }
 
-    if ((ret = parse_intervals(&sendcmd->intervals, &sendcmd->nb_intervals,
-                               sendcmd->commands_str, ctx)) < 0)
+    if ((ret = parse_intervals(&s->intervals, &s->nb_intervals,
+                               s->commands_str, ctx)) < 0)
         return ret;
 
-    if (sendcmd->nb_intervals == 0) {
+    if (s->nb_intervals == 0) {
         av_log(ctx, AV_LOG_ERROR, "No commands were specified\n");
         return AVERROR(EINVAL);
     }
 
-    qsort(sendcmd->intervals, sendcmd->nb_intervals, sizeof(Interval), cmp_intervals);
+    qsort(s->intervals, s->nb_intervals, sizeof(Interval), cmp_intervals);
 
     av_log(ctx, AV_LOG_DEBUG, "Parsed commands:\n");
-    for (i = 0; i < sendcmd->nb_intervals; i++) {
+    for (i = 0; i < s->nb_intervals; i++) {
         AVBPrint pbuf;
-        Interval *interval = &sendcmd->intervals[i];
+        Interval *interval = &s->intervals[i];
         av_log(ctx, AV_LOG_VERBOSE, "start_time:%f end_time:%f index:%d\n",
                (double)interval->start_ts/1000000, (double)interval->end_ts/1000000, interval->index);
         for (j = 0; j < interval->nb_commands; j++) {
@@ -432,11 +428,11 @@ static av_cold int init(AVFilterContext *ctx)
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    SendCmdContext *sendcmd = ctx->priv;
+    SendCmdContext *s = ctx->priv;
     int i, j;
 
-    for (i = 0; i < sendcmd->nb_intervals; i++) {
-        Interval *interval = &sendcmd->intervals[i];
+    for (i = 0; i < s->nb_intervals; i++) {
+        Interval *interval = &s->intervals[i];
         for (j = 0; j < interval->nb_commands; j++) {
             Command *cmd = &interval->commands[j];
             av_freep(&cmd->target);
@@ -445,13 +441,13 @@ static av_cold void uninit(AVFilterContext *ctx)
         }
         av_freep(&interval->commands);
     }
-    av_freep(&sendcmd->intervals);
+    av_freep(&s->intervals);
 }
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
 {
     AVFilterContext *ctx = inlink->dst;
-    SendCmdContext *sendcmd = ctx->priv;
+    SendCmdContext *s = ctx->priv;
     int64_t ts;
     int i, j, ret;
 
@@ -462,8 +458,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
 
 #define WITHIN_INTERVAL(ts, start_ts, end_ts) ((ts) >= (start_ts) && (ts) < (end_ts))
 
-    for (i = 0; i < sendcmd->nb_intervals; i++) {
-        Interval *interval = &sendcmd->intervals[i];
+    for (i = 0; i < s->nb_intervals; i++) {
+        Interval *interval = &s->intervals[i];
         int flags = 0;
 
         if (!interval->enabled && WITHIN_INTERVAL(ts, interval->start_ts, interval->end_ts)) {
diff --git a/libavfilter/f_streamselect.c b/libavfilter/f_streamselect.c
new file mode 100644
index 00000000..db46c997
--- /dev/null
+++ b/libavfilter/f_streamselect.c
@@ -0,0 +1,353 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+#include "framesync.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct StreamSelectContext {
+    const AVClass *class;
+    int nb_inputs;
+    char *map_str;
+    int *map;
+    int nb_map;
+    int is_audio;
+    int64_t *last_pts;
+    AVFrame **frames;
+    FFFrameSync fs;
+} StreamSelectContext;
+
+#define OFFSET(x) offsetof(StreamSelectContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption streamselect_options[] = {
+    { "inputs",  "number of input streams",           OFFSET(nb_inputs),  AV_OPT_TYPE_INT,    {.i64=2},    2, INT_MAX,  .flags=FLAGS },
+    { "map",     "input indexes to remap to outputs", OFFSET(map_str),    AV_OPT_TYPE_STRING, {.str=NULL},              .flags=FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(streamselect);
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    StreamSelectContext *s = inlink->dst->priv;
+    return ff_framesync_filter_frame(&s->fs, inlink, in);
+}
+
+static int process_frame(FFFrameSync *fs)
+{
+    AVFilterContext *ctx = fs->parent;
+    StreamSelectContext *s = fs->opaque;
+    AVFrame **in = s->frames;
+    int i, j, ret = 0;
+
+    for (i = 0; i < ctx->nb_inputs; i++) {
+        if ((ret = ff_framesync_get_frame(&s->fs, i, &in[i], 0)) < 0)
+            return ret;
+    }
+
+    for (j = 0; j < ctx->nb_inputs; j++) {
+        for (i = 0; i < s->nb_map; i++) {
+            if (s->map[i] == j) {
+                AVFrame *out;
+
+                if (s->is_audio && s->last_pts[j] == in[j]->pts &&
+                    ctx->outputs[i]->frame_count > 0)
+                    continue;
+                out = av_frame_clone(in[j]);
+                if (!out)
+                    return AVERROR(ENOMEM);
+
+                out->pts = av_rescale_q(s->fs.pts, s->fs.time_base, ctx->outputs[i]->time_base);
+                s->last_pts[j] = in[j]->pts;
+                ret = ff_filter_frame(ctx->outputs[i], out);
+                if (ret < 0)
+                    return ret;
+            }
+        }
+    }
+
+    return ret;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    StreamSelectContext *s = outlink->src->priv;
+    return ff_framesync_request_frame(&s->fs, outlink);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    StreamSelectContext *s = ctx->priv;
+    const int outlink_idx = FF_OUTLINK_IDX(outlink);
+    const int inlink_idx  = s->map[outlink_idx];
+    AVFilterLink *inlink = ctx->inputs[inlink_idx];
+    FFFrameSyncIn *in;
+    int i, ret;
+
+    av_log(ctx, AV_LOG_VERBOSE, "config output link %d "
+           "with settings from input link %d\n",
+           outlink_idx, inlink_idx);
+
+    switch (outlink->type) {
+    case AVMEDIA_TYPE_VIDEO:
+        outlink->w = inlink->w;
+        outlink->h = inlink->h;
+        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+        outlink->frame_rate = inlink->frame_rate;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        outlink->sample_rate    = inlink->sample_rate;
+        outlink->channels       = inlink->channels;
+        outlink->channel_layout = inlink->channel_layout;
+        break;
+    }
+
+    outlink->time_base = inlink->time_base;
+    outlink->format = inlink->format;
+
+    if (s->fs.opaque == s)
+        return 0;
+
+    if ((ret = ff_framesync_init(&s->fs, ctx, ctx->nb_inputs)) < 0)
+        return ret;
+
+    in = s->fs.in;
+    s->fs.opaque = s;
+    s->fs.on_event = process_frame;
+
+    for (i = 0; i < ctx->nb_inputs; i++) {
+        in[i].time_base = ctx->inputs[i]->time_base;
+        in[i].sync      = 1;
+        in[i].before    = EXT_STOP;
+        in[i].after     = EXT_STOP;
+    }
+
+    s->frames = av_calloc(ctx->nb_inputs, sizeof(*s->frames));
+    if (!s->frames)
+        return AVERROR(ENOMEM);
+
+    return ff_framesync_configure(&s->fs);
+}
+
+static int parse_definition(AVFilterContext *ctx, int nb_pads, void *filter_frame, int is_audio)
+{
+    const int is_input = !!filter_frame;
+    const char *padtype = is_input ? "in" : "out";
+    int i = 0, ret = 0;
+
+    for (i = 0; i < nb_pads; i++) {
+        AVFilterPad pad = { 0 };
+
+        pad.type = is_audio ? AVMEDIA_TYPE_AUDIO : AVMEDIA_TYPE_VIDEO;
+
+        pad.name = av_asprintf("%sput%d", padtype, i);
+        if (!pad.name)
+            return AVERROR(ENOMEM);
+
+        av_log(ctx, AV_LOG_DEBUG, "Add %s pad %s\n", padtype, pad.name);
+
+        if (is_input) {
+            pad.filter_frame = filter_frame;
+            ret = ff_insert_inpad(ctx, i, &pad);
+        } else {
+            pad.config_props  = config_output;
+            pad.request_frame = request_frame;
+            ret = ff_insert_outpad(ctx, i, &pad);
+        }
+
+        if (ret < 0) {
+            av_freep(&pad.name);
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static int parse_mapping(AVFilterContext *ctx, const char *map)
+{
+    StreamSelectContext *s = ctx->priv;
+    int *new_map;
+    int new_nb_map = 0;
+
+    if (!map) {
+        av_log(ctx, AV_LOG_ERROR, "mapping definition is not set\n");
+        return AVERROR(EINVAL);
+    }
+
+    new_map = av_calloc(s->nb_inputs, sizeof(*new_map));
+    if (!new_map)
+        return AVERROR(ENOMEM);
+
+    while (1) {
+        char *p;
+        const int n = strtol(map, &p, 0);
+
+        av_log(ctx, AV_LOG_DEBUG, "n=%d map=%p p=%p\n", n, map, p);
+
+        if (map == p)
+            break;
+        map = p;
+
+        if (new_nb_map >= s->nb_inputs) {
+            av_log(ctx, AV_LOG_ERROR, "Unable to map more than the %d "
+                   "input pads available\n", s->nb_inputs);
+            av_free(new_map);
+            return AVERROR(EINVAL);
+        }
+
+        if (n < 0 || n >= ctx->nb_inputs) {
+            av_log(ctx, AV_LOG_ERROR, "Input stream index %d doesn't exist "
+                   "(there is only %d input streams defined)\n",
+                   n, s->nb_inputs);
+            av_free(new_map);
+            return AVERROR(EINVAL);
+        }
+
+        av_log(ctx, AV_LOG_VERBOSE, "Map input stream %d to output stream %d\n", n, new_nb_map);
+        new_map[new_nb_map++] = n;
+    }
+
+    if (!new_nb_map) {
+        av_log(ctx, AV_LOG_ERROR, "invalid mapping\n");
+        av_free(new_map);
+        return AVERROR(EINVAL);
+    }
+
+    av_freep(&s->map);
+    s->map = new_map;
+    s->nb_map = new_nb_map;
+
+    av_log(ctx, AV_LOG_VERBOSE, "%d map set\n", s->nb_map);
+
+    return 0;
+}
+
+static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
+                           char *res, int res_len, int flags)
+{
+    if (!strcmp(cmd, "map")) {
+        int ret = parse_mapping(ctx, args);
+
+        if (ret < 0)
+            return ret;
+        return avfilter_config_links(ctx);
+    }
+    return AVERROR(ENOSYS);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    StreamSelectContext *s = ctx->priv;
+    int ret, nb_outputs = 0;
+    char *map = s->map_str;
+
+    if (!strcmp(ctx->filter->name, "astreamselect"))
+        s->is_audio = 1;
+
+    for (;;) {
+        char *p;
+
+        strtol(map, &p, 0);
+        if (map == p)
+            break;
+        nb_outputs++;
+        map = p;
+    }
+
+    s->last_pts = av_calloc(s->nb_inputs, sizeof(*s->last_pts));
+    if (!s->last_pts)
+        return AVERROR(ENOMEM);
+
+    if ((ret = parse_definition(ctx, s->nb_inputs, filter_frame, s->is_audio)) < 0 ||
+        (ret = parse_definition(ctx, nb_outputs, NULL, s->is_audio)) < 0)
+        return ret;
+
+    av_log(ctx, AV_LOG_DEBUG, "Configured with %d inpad and %d outpad\n",
+           ctx->nb_inputs, ctx->nb_outputs);
+
+    return parse_mapping(ctx, s->map_str);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    StreamSelectContext *s = ctx->priv;
+
+    av_freep(&s->last_pts);
+    av_freep(&s->map);
+    av_freep(&s->frames);
+    ff_framesync_uninit(&s->fs);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats, *rates = NULL;
+    AVFilterChannelLayouts *layouts = NULL;
+    int ret, i;
+
+    for (i = 0; i < ctx->nb_inputs; i++) {
+        formats = ff_all_formats(ctx->inputs[i]->type);
+        if ((ret = ff_set_common_formats(ctx, formats)) < 0)
+            return ret;
+
+        if (ctx->inputs[i]->type == AVMEDIA_TYPE_AUDIO) {
+            rates = ff_all_samplerates();
+            if ((ret = ff_set_common_samplerates(ctx, rates)) < 0)
+                return ret;
+            layouts = ff_all_channel_counts();
+            if ((ret = ff_set_common_channel_layouts(ctx, layouts)) < 0)
+                return ret;
+        }
+    }
+
+    return 0;
+}
+
+AVFilter ff_vf_streamselect = {
+    .name            = "streamselect",
+    .description     = NULL_IF_CONFIG_SMALL("Select video streams"),
+    .init            = init,
+    .query_formats   = query_formats,
+    .process_command = process_command,
+    .uninit          = uninit,
+    .priv_size       = sizeof(StreamSelectContext),
+    .priv_class      = &streamselect_class,
+    .flags           = AVFILTER_FLAG_DYNAMIC_INPUTS | AVFILTER_FLAG_DYNAMIC_OUTPUTS,
+};
+
+#define astreamselect_options streamselect_options
+AVFILTER_DEFINE_CLASS(astreamselect);
+
+AVFilter ff_af_astreamselect = {
+    .name            = "astreamselect",
+    .description     = NULL_IF_CONFIG_SMALL("Select audio streams"),
+    .init            = init,
+    .query_formats   = query_formats,
+    .process_command = process_command,
+    .uninit          = uninit,
+    .priv_size       = sizeof(StreamSelectContext),
+    .priv_class      = &astreamselect_class,
+    .flags           = AVFILTER_FLAG_DYNAMIC_INPUTS | AVFILTER_FLAG_DYNAMIC_OUTPUTS,
+};
diff --git a/libavfilter/fifo.c b/libavfilter/fifo.c
index e477cff4..f0b77ffb 100644
--- a/libavfilter/fifo.c
+++ b/libavfilter/fifo.c
@@ -201,7 +201,8 @@ static int return_audio_frame(AVFilterContext *ctx)
                     break;
                 } else if (ret < 0)
                     return ret;
-                av_assert0(s->root.next); // If ff_request_frame() succeeded then we should have a frame
+                if (!s->root.next)
+                    return 0;
             }
             head = s->root.next->frame;
 
@@ -237,7 +238,8 @@ static int request_frame(AVFilterLink *outlink)
                 return return_audio_frame(outlink->src);
             return ret;
         }
-        av_assert0(fifo->root.next);
+        if (!fifo->root.next)
+            return 0;
     }
 
     if (outlink->request_samples) {
diff --git a/libavfilter/formats.c b/libavfilter/formats.c
index 4f9773bd..f12dcf47 100644
--- a/libavfilter/formats.c
+++ b/libavfilter/formats.c
@@ -289,6 +289,17 @@ AVFilterFormats *ff_make_format_list(const int *fmts)
     return formats;
 }
 
+AVFilterChannelLayouts *ff_make_formatu64_list(const uint64_t *fmts)
+{
+    MAKE_FORMAT_LIST(AVFilterChannelLayouts,
+                     channel_layouts, nb_channel_layouts);
+    if (count)
+        memcpy(formats->channel_layouts, fmts,
+               sizeof(*formats->channel_layouts) * count);
+
+    return formats;
+}
+
 AVFilterChannelLayouts *avfilter_make_format64_list(const int64_t *fmts)
 {
     MAKE_FORMAT_LIST(AVFilterChannelLayouts,
@@ -300,17 +311,20 @@ AVFilterChannelLayouts *avfilter_make_format64_list(const int64_t *fmts)
     return formats;
 }
 
-#define ADD_FORMAT(f, fmt, type, list, nb)                  \
+#define ADD_FORMAT(f, fmt, unref_fn, type, list, nb)        \
 do {                                                        \
     type *fmts;                                             \
     void *oldf = *f;                                        \
                                                             \
-    if (!(*f) && !(*f = av_mallocz(sizeof(**f))))           \
+    if (!(*f) && !(*f = av_mallocz(sizeof(**f)))) {         \
+        unref_fn(f);                                        \
         return AVERROR(ENOMEM);                             \
+    }                                                       \
                                                             \
     fmts = av_realloc_array((*f)->list, (*f)->nb + 1,       \
                             sizeof(*(*f)->list));           \
     if (!fmts) {                                            \
+        unref_fn(f);                                        \
         if (!oldf)                                          \
             av_freep(f);                                    \
         return AVERROR(ENOMEM);                             \
@@ -322,14 +336,14 @@ do {                                                        \
 
 int ff_add_format(AVFilterFormats **avff, int64_t fmt)
 {
-    ADD_FORMAT(avff, fmt, int, formats, nb_formats);
+    ADD_FORMAT(avff, fmt, ff_formats_unref, int, formats, nb_formats);
     return 0;
 }
 
 int ff_add_channel_layout(AVFilterChannelLayouts **l, uint64_t channel_layout)
 {
     av_assert1(!(*l && (*l)->all_layouts));
-    ADD_FORMAT(l, channel_layout, uint64_t, channel_layouts, nb_channel_layouts);
+    ADD_FORMAT(l, channel_layout, ff_channel_layouts_unref, uint64_t, channel_layouts, nb_channel_layouts);
     return 0;
 }
 
@@ -340,13 +354,14 @@ AVFilterFormats *ff_all_formats(enum AVMediaType type)
     if (type == AVMEDIA_TYPE_VIDEO) {
         const AVPixFmtDescriptor *desc = NULL;
         while ((desc = av_pix_fmt_desc_next(desc))) {
-            if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL))
-                ff_add_format(&ret, av_pix_fmt_desc_get_id(desc));
+            if (ff_add_format(&ret, av_pix_fmt_desc_get_id(desc)) < 0)
+                return NULL;
         }
     } else if (type == AVMEDIA_TYPE_AUDIO) {
         enum AVSampleFormat fmt = 0;
         while (av_get_sample_fmt_name(fmt)) {
-            ff_add_format(&ret, fmt);
+            if (ff_add_format(&ret, fmt) < 0)
+                return NULL;
             fmt++;
         }
     }
@@ -371,7 +386,8 @@ AVFilterFormats *ff_planar_sample_fmts(void)
 
     for (fmt = 0; av_get_bytes_per_sample(fmt)>0; fmt++)
         if (av_sample_fmt_is_planar(fmt))
-            ff_add_format(&ret, fmt);
+            if (ff_add_format(&ret, fmt) < 0)
+                return NULL;
 
     return ret;
 }
@@ -400,15 +416,17 @@ AVFilterChannelLayouts *ff_all_channel_counts(void)
     return ret;
 }
 
-#define FORMATS_REF(f, ref)                                                     \
+#define FORMATS_REF(f, ref, unref_fn)                                           \
     void *tmp;                                                                  \
                                                                                 \
-    if (!ref)                                                                   \
-        return AVERROR_BUG;                                                     \
+    if (!f || !ref)                                                             \
+        return AVERROR(ENOMEM);                                                 \
                                                                                 \
     tmp = av_realloc_array(f->refs, sizeof(*f->refs), f->refcount + 1);         \
-    if (!tmp)                                                                   \
+    if (!tmp) {                                                                 \
+        unref_fn(&f);                                                           \
         return AVERROR(ENOMEM);                                                 \
+    }                                                                           \
     f->refs = tmp;                                                              \
     f->refs[f->refcount++] = ref;                                               \
     *ref = f;                                                                   \
@@ -416,12 +434,12 @@ AVFilterChannelLayouts *ff_all_channel_counts(void)
 
 int ff_channel_layouts_ref(AVFilterChannelLayouts *f, AVFilterChannelLayouts **ref)
 {
-    FORMATS_REF(f, ref);
+    FORMATS_REF(f, ref, ff_channel_layouts_unref);
 }
 
 int ff_formats_ref(AVFilterFormats *f, AVFilterFormats **ref)
 {
-    FORMATS_REF(f, ref);
+    FORMATS_REF(f, ref, ff_formats_unref);
 }
 
 #define FIND_REF_INDEX(ref, idx)            \
@@ -438,7 +456,7 @@ do {                                        \
 do {                                                               \
     int idx = -1;                                                  \
                                                                    \
-    if (!*ref)                                                     \
+    if (!*ref || !(*ref)->refs)                                    \
         return;                                                    \
                                                                    \
     FIND_REF_INDEX(ref, idx);                                      \
@@ -489,25 +507,33 @@ void ff_formats_changeref(AVFilterFormats **oldref, AVFilterFormats **newref)
     FORMATS_CHANGEREF(oldref, newref);
 }
 
-#define SET_COMMON_FORMATS(ctx, fmts, in_fmts, out_fmts, ref, list) \
+#define SET_COMMON_FORMATS(ctx, fmts, in_fmts, out_fmts, ref_fn, unref_fn, list) \
     int count = 0, i;                                               \
                                                                     \
     if (!fmts)                                                      \
-        return AVERROR_BUG;                                         \
+        return AVERROR(ENOMEM);                                     \
                                                                     \
     for (i = 0; i < ctx->nb_inputs; i++) {                          \
         if (ctx->inputs[i] && !ctx->inputs[i]->out_fmts) {          \
-            int ret = ref(fmts, &ctx->inputs[i]->out_fmts);         \
-            if (ret < 0)                                            \
+            int ret = ref_fn(fmts, &ctx->inputs[i]->out_fmts);      \
+            if (ret < 0) {                                          \
+                unref_fn(&fmts);                                    \
+                av_freep(&fmts->list);                              \
+                av_freep(&fmts);                                    \
                 return ret;                                         \
+            }                                                       \
             count++;                                                \
         }                                                           \
     }                                                               \
     for (i = 0; i < ctx->nb_outputs; i++) {                         \
         if (ctx->outputs[i] && !ctx->outputs[i]->in_fmts) {         \
-            int ret = ref(fmts, &ctx->outputs[i]->in_fmts);         \
-            if (ret < 0)                                            \
+            int ret = ref_fn(fmts, &ctx->outputs[i]->in_fmts);      \
+            if (ret < 0) {                                          \
+                unref_fn(&fmts);                                    \
+                av_freep(&fmts->list);                              \
+                av_freep(&fmts);                                    \
                 return ret;                                         \
+            }                                                       \
             count++;                                                \
         }                                                           \
     }                                                               \
@@ -524,14 +550,14 @@ int ff_set_common_channel_layouts(AVFilterContext *ctx,
                                   AVFilterChannelLayouts *layouts)
 {
     SET_COMMON_FORMATS(ctx, layouts, in_channel_layouts, out_channel_layouts,
-                       ff_channel_layouts_ref, channel_layouts);
+                       ff_channel_layouts_ref, ff_channel_layouts_unref, channel_layouts);
 }
 
 int ff_set_common_samplerates(AVFilterContext *ctx,
                               AVFilterFormats *samplerates)
 {
     SET_COMMON_FORMATS(ctx, samplerates, in_samplerates, out_samplerates,
-                       ff_formats_ref, formats);
+                       ff_formats_ref, ff_formats_unref, formats);
 }
 
 /**
@@ -542,7 +568,7 @@ int ff_set_common_samplerates(AVFilterContext *ctx,
 int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
 {
     SET_COMMON_FORMATS(ctx, formats, in_formats, out_formats,
-                       ff_formats_ref, formats);
+                       ff_formats_ref, ff_formats_unref, formats);
 }
 
 static int default_query_formats_common(AVFilterContext *ctx,
@@ -637,23 +663,20 @@ int ff_parse_channel_layout(int64_t *ret, int *nret, const char *arg,
                             void *log_ctx)
 {
     char *tail;
-    int64_t chlayout, count;
+    int64_t chlayout;
 
-    if (nret) {
-        count = strtol(arg, &tail, 10);
-        if (*tail == 'c' && !tail[1] && count > 0 && count < 63) {
-            *nret = count;
-            *ret = 0;
-            return 0;
-        }
-    }
     chlayout = av_get_channel_layout(arg);
     if (chlayout == 0) {
         chlayout = strtol(arg, &tail, 10);
-        if (*tail || chlayout == 0) {
+        if (!(*tail == '\0' || *tail == 'c' && *(tail + 1) == '\0') || chlayout <= 0 || chlayout > 63) {
             av_log(log_ctx, AV_LOG_ERROR, "Invalid channel layout '%s'\n", arg);
             return AVERROR(EINVAL);
         }
+        if (nret) {
+            *nret = chlayout;
+            *ret = 0;
+            return 0;
+        }
     }
     *ret = chlayout;
     if (nret)
@@ -669,12 +692,41 @@ int main(void)
 {
     const int64_t *cl;
     char buf[512];
+    int i;
+    const char *teststrings[] ={
+        "blah",
+        "1",
+        "2",
+        "-1",
+        "60",
+        "65",
+        "1c",
+        "2c",
+        "-1c",
+        "60c",
+        "65c",
+        "5.1",
+        "stereo",
+        "1+1+1+1",
+        "1c+1c+1c+1c",
+        "2c+1c",
+        "0x3",
+    };
 
     for (cl = avfilter_all_channel_layouts; *cl != -1; cl++) {
         av_get_channel_layout_string(buf, sizeof(buf), -1, *cl);
         printf("%s\n", buf);
     }
 
+    for ( i = 0; i<FF_ARRAY_ELEMS(teststrings); i++) {
+        int64_t layout = -1;
+        int count = -1;
+        int ret;
+        ret = ff_parse_channel_layout(&layout, &count, teststrings[i], NULL);
+
+        printf ("%d = ff_parse_channel_layout(%016"PRIX64", %2d, %s);\n", ret ? -1 : 0, layout, count, teststrings[i]);
+    }
+
     return 0;
 }
 
diff --git a/libavfilter/formats.h b/libavfilter/formats.h
index 5a8ee5ed..ce09d3ce 100644
--- a/libavfilter/formats.h
+++ b/libavfilter/formats.h
@@ -125,25 +125,35 @@ AVFilterFormats *ff_merge_samplerates(AVFilterFormats *a,
  * Construct an empty AVFilterChannelLayouts/AVFilterFormats struct --
  * representing any channel layout (with known disposition)/sample rate.
  */
+av_warn_unused_result
 AVFilterChannelLayouts *ff_all_channel_layouts(void);
+
+av_warn_unused_result
 AVFilterFormats *ff_all_samplerates(void);
 
 /**
  * Construct an AVFilterChannelLayouts coding for any channel layout, with
  * known or unknown disposition.
  */
+av_warn_unused_result
 AVFilterChannelLayouts *ff_all_channel_counts(void);
 
+av_warn_unused_result
 AVFilterChannelLayouts *avfilter_make_format64_list(const int64_t *fmts);
 
+av_warn_unused_result
+AVFilterChannelLayouts *ff_make_formatu64_list(const uint64_t *fmts);
+
 
 /**
  * A helper for query_formats() which sets all links to the same list of channel
  * layouts/sample rates. If there are no links hooked to this filter, the list
  * is freed.
  */
+av_warn_unused_result
 int ff_set_common_channel_layouts(AVFilterContext *ctx,
                                   AVFilterChannelLayouts *layouts);
+av_warn_unused_result
 int ff_set_common_samplerates(AVFilterContext *ctx,
                               AVFilterFormats *samplerates);
 
@@ -152,13 +162,16 @@ int ff_set_common_samplerates(AVFilterContext *ctx,
  * formats. If there are no links hooked to this filter, the list of formats is
  * freed.
  */
+av_warn_unused_result
 int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats);
 
+av_warn_unused_result
 int ff_add_channel_layout(AVFilterChannelLayouts **l, uint64_t channel_layout);
 
 /**
  * Add *ref as a new reference to f.
  */
+av_warn_unused_result
 int ff_channel_layouts_ref(AVFilterChannelLayouts *f,
                            AVFilterChannelLayouts **ref);
 
@@ -170,6 +183,7 @@ void ff_channel_layouts_unref(AVFilterChannelLayouts **ref);
 void ff_channel_layouts_changeref(AVFilterChannelLayouts **oldref,
                                   AVFilterChannelLayouts **newref);
 
+av_warn_unused_result
 int ff_default_query_formats(AVFilterContext *ctx);
 
 /**
@@ -178,6 +192,7 @@ int ff_default_query_formats(AVFilterContext *ctx);
  * accepts channel layouts with unknown disposition. It should only be used
  * with audio filters.
  */
+av_warn_unused_result
 int ff_query_formats_all(AVFilterContext *ctx);
 
 
@@ -188,6 +203,7 @@ int ff_query_formats_all(AVFilterContext *ctx);
  * @param fmts list of media formats, terminated by -1
  * @return the format list, with no existing references
  */
+av_warn_unused_result
 AVFilterFormats *ff_make_format_list(const int *fmts);
 
 /**
@@ -198,16 +214,19 @@ AVFilterFormats *ff_make_format_list(const int *fmts);
  * @return a non negative value in case of success, or a negative
  * value corresponding to an AVERROR code in case of error
  */
+av_warn_unused_result
 int ff_add_format(AVFilterFormats **avff, int64_t fmt);
 
 /**
  * Return a list of all formats supported by FFmpeg for the given media type.
  */
+av_warn_unused_result
 AVFilterFormats *ff_all_formats(enum AVMediaType type);
 
 /**
  * Construct a formats list containing all planar sample formats.
  */
+av_warn_unused_result
 AVFilterFormats *ff_planar_sample_fmts(void);
 
 /**
@@ -233,6 +252,7 @@ AVFilterFormats *ff_merge_formats(AVFilterFormats *a, AVFilterFormats *b,
  *  | |____| |    | |____|
  *  |________|    |________________________
  */
+av_warn_unused_result
 int ff_formats_ref(AVFilterFormats *formats, AVFilterFormats **ref);
 
 /**
diff --git a/libavfilter/framepool.c b/libavfilter/framepool.c
new file mode 100644
index 00000000..6df574ea
--- /dev/null
+++ b/libavfilter/framepool.c
@@ -0,0 +1,189 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * Copyright (c) 2015 Matthieu Bouron <matthieu.bouron stupeflix.com>
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "framepool.h"
+#include "libavutil/avassert.h"
+#include "libavutil/buffer.h"
+#include "libavutil/frame.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/pixfmt.h"
+
+struct FFVideoFramePool {
+
+    int width;
+    int height;
+    int format;
+    int align;
+    int linesize[4];
+    AVBufferPool *pools[4];
+
+};
+
+FFVideoFramePool *ff_video_frame_pool_init(AVBufferRef* (*alloc)(int size),
+                                           int width,
+                                           int height,
+                                           enum AVPixelFormat format,
+                                           int align)
+{
+    int i, ret;
+    FFVideoFramePool *pool;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
+
+    if (!desc)
+        return NULL;
+
+    pool = av_mallocz(sizeof(FFVideoFramePool));
+    if (!pool)
+        return NULL;
+
+    pool->width = width;
+    pool->height = height;
+    pool->format = format;
+    pool->align = align;
+
+    if ((ret = av_image_check_size(width, height, 0, NULL)) < 0) {
+        goto fail;
+    }
+
+    if (!pool->linesize[0]) {
+        for(i = 1; i <= align; i += i) {
+            ret = av_image_fill_linesizes(pool->linesize, pool->format,
+                                          FFALIGN(pool->width, i));
+            if (ret < 0) {
+                goto fail;
+            }
+            if (!(pool->linesize[0] & (pool->align - 1)))
+                break;
+        }
+
+        for (i = 0; i < 4 && pool->linesize[i]; i++) {
+            pool->linesize[i] = FFALIGN(pool->linesize[i], pool->align);
+        }
+    }
+
+    for (i = 0; i < 4 && pool->linesize[i]; i++) {
+        int h = FFALIGN(pool->height, 32);
+        if (i == 1 || i == 2)
+            h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
+
+        pool->pools[i] = av_buffer_pool_init(pool->linesize[i] * h + 16 + 16 - 1,
+                                             alloc);
+        if (!pool->pools[i])
+            goto fail;
+    }
+
+    if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
+        desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
+        pool->pools[1] = av_buffer_pool_init(AVPALETTE_SIZE, alloc);
+        if (!pool->pools[1])
+            goto fail;
+    }
+
+    return pool;
+
+fail:
+    ff_video_frame_pool_uninit(&pool);
+    return NULL;
+}
+
+int ff_video_frame_pool_get_config(FFVideoFramePool *pool,
+                                   int *width,
+                                   int *height,
+                                   enum AVPixelFormat *format,
+                                   int *align)
+{
+    if (!pool)
+        return AVERROR(EINVAL);
+
+    *width = pool->width;
+    *height = pool->height;
+    *format = pool->format;
+    *align = pool->align;
+
+    return 0;
+}
+
+
+AVFrame *ff_video_frame_pool_get(FFVideoFramePool *pool)
+{
+    int i;
+    AVFrame *frame;
+    const AVPixFmtDescriptor *desc;
+
+    frame = av_frame_alloc();
+    if (!frame) {
+        return NULL;
+    }
+
+    desc = av_pix_fmt_desc_get(pool->format);
+    if (!desc) {
+        goto fail;
+    }
+
+    frame->width = pool->width;
+    frame->height = pool->height;
+    frame->format = pool->format;
+
+    for (i = 0; i < 4; i++) {
+        frame->linesize[i] = pool->linesize[i];
+        if (!pool->pools[i])
+            break;
+
+        frame->buf[i] = av_buffer_pool_get(pool->pools[i]);
+        if (!frame->buf[i]) {
+            goto fail;
+        }
+
+        frame->data[i] = frame->buf[i]->data;
+    }
+
+    if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
+        desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
+        enum AVPixelFormat format =
+            pool->format == AV_PIX_FMT_PAL8 ? AV_PIX_FMT_BGR8 : pool->format;
+
+        av_assert0(frame->data[1] != NULL);
+        if (avpriv_set_systematic_pal2((uint32_t *)frame->data[1], format) < 0) {
+            goto fail;
+        }
+    }
+
+    frame->extended_data = frame->data;
+
+    return frame;
+fail:
+    av_frame_free(&frame);
+    return NULL;
+}
+
+void ff_video_frame_pool_uninit(FFVideoFramePool **pool)
+{
+    int i;
+
+    if (!pool || !*pool)
+        return;
+
+    for (i = 0; i < 4; i++) {
+        av_buffer_pool_uninit(&(*pool)->pools[i]);
+    }
+
+    av_freep(pool);
+}
diff --git a/libavfilter/framepool.h b/libavfilter/framepool.h
new file mode 100644
index 00000000..2a6c9e86
--- /dev/null
+++ b/libavfilter/framepool.h
@@ -0,0 +1,84 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * Copyright (c) 2015 Matthieu Bouron <matthieu.bouron stupeflix.com>
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_FRAMEPOOL_H
+#define AVFILTER_FRAMEPOOL_H
+
+#include "libavutil/buffer.h"
+#include "libavutil/frame.h"
+
+/**
+ * Video frame pool. This structure is opaque and not meant to be accessed
+ * directly. It is allocated with ff_video_frame_pool_init() and freed with
+ * ff_video_frame_pool_uninit().
+ */
+typedef struct FFVideoFramePool FFVideoFramePool;
+
+/**
+ * Allocate and initialize a video frame pool.
+ *
+ * @param alloc a function that will be used to allocate new frame buffers when
+ * the pool is empty. May be NULL, then the default allocator will be used
+ * (av_buffer_alloc()).
+ * @param width width of each frame in this pool
+ * @param height height of each frame in this pool
+ * @param format format of each frame in this pool
+ * @param align buffers alignement of each frame in this pool
+ * @return newly created video frame pool on success, NULL on error.
+ */
+FFVideoFramePool *ff_video_frame_pool_init(AVBufferRef* (*alloc)(int size),
+                                           int width,
+                                           int height,
+                                           enum AVPixelFormat format,
+                                           int align);
+
+/**
+ * Deallocate the video frame pool. It is safe to call this function while
+ * some of the allocated video frame are still in use.
+ *
+ * @param pool pointer to the video frame pool to be freed. It will be set to NULL.
+ */
+void ff_video_frame_pool_uninit(FFVideoFramePool **pool);
+
+/**
+ * Get the video frame pool configuration.
+ *
+ * @param width width of each frame in this pool
+ * @param height height of each frame in this pool
+ * @param format format of each frame in this pool
+ * @param align buffers alignement of each frame in this pool
+ * @return 0 on success, a negative AVERROR otherwise.
+ */
+int ff_video_frame_pool_get_config(FFVideoFramePool *pool,
+                                   int *width,
+                                   int *height,
+                                   enum AVPixelFormat *format,
+                                   int *align);
+
+/**
+ * Allocate a new AVFrame, reussing old buffers from the pool when available.
+ * This function may be called simultaneously from multiple threads.
+ *
+ * @return a new AVFrame on success, NULL on error.
+ */
+AVFrame *ff_video_frame_pool_get(FFVideoFramePool *pool);
+
+
+#endif /* AVFILTER_FRAMEPOOL_H */
diff --git a/libavfilter/framesync.c b/libavfilter/framesync.c
index 12db50cb..7920cdb3 100644
--- a/libavfilter/framesync.c
+++ b/libavfilter/framesync.c
@@ -46,11 +46,16 @@ enum {
     STATE_EOF,
 };
 
-void ff_framesync_init(FFFrameSync *fs, void *parent, unsigned nb_in)
+int ff_framesync_init(FFFrameSync *fs, void *parent, unsigned nb_in)
 {
     fs->class  = &framesync_class;
     fs->parent = parent;
     fs->nb_in  = nb_in;
+
+    fs->in = av_calloc(nb_in, sizeof(*fs->in));
+    if (!fs->in)
+        return AVERROR(ENOMEM);
+    return 0;
 }
 
 static void framesync_sync_level_update(FFFrameSync *fs)
@@ -267,6 +272,8 @@ void ff_framesync_uninit(FFFrameSync *fs)
         av_frame_free(&fs->in[i].frame_next);
         ff_bufqueue_discard_all(&fs->in[i].queue);
     }
+
+    av_freep(&fs->in);
 }
 
 int ff_framesync_process_frame(FFFrameSync *fs, unsigned all)
@@ -315,7 +322,6 @@ int ff_framesync_request_frame(FFFrameSync *fs, AVFilterLink *outlink)
         return 0;
     if (fs->eof)
         return AVERROR_EOF;
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
     input = fs->in_request;
     ret = ff_request_frame(ctx->inputs[input]);
     if (ret == AVERROR_EOF) {
diff --git a/libavfilter/framesync.h b/libavfilter/framesync.h
index 20727810..7ba99d5d 100644
--- a/libavfilter/framesync.h
+++ b/libavfilter/framesync.h
@@ -201,9 +201,9 @@ typedef struct FFFrameSync {
     uint8_t eof;
 
     /**
-     * Array of inputs; all inputs must be in consecutive memory
+     * Pointer to array of inputs.
      */
-    FFFrameSyncIn in[1]; /* must be the last field */
+    FFFrameSyncIn *in;
 
 } FFFrameSync;
 
@@ -215,8 +215,9 @@ typedef struct FFFrameSync {
  * @param  fs      frame sync structure to initialize
  * @param  parent  parent object, used for logging
  * @param  nb_in   number of inputs
+ * @return  >= 0 for success or a negative error code
  */
-void ff_framesync_init(FFFrameSync *fs, void *parent, unsigned nb_in);
+int ff_framesync_init(FFFrameSync *fs, void *parent, unsigned nb_in);
 
 /**
  * Configure a frame sync structure.
diff --git a/libavfilter/generate_wave_table.c b/libavfilter/generate_wave_table.c
index bee9c009..6cd80228 100644
--- a/libavfilter/generate_wave_table.c
+++ b/libavfilter/generate_wave_table.c
@@ -80,5 +80,3 @@ void ff_generate_wave_table(enum WaveType wave_type,
         }
     }
 }
-
-
diff --git a/libavfilter/graphdump.c b/libavfilter/graphdump.c
index 3d702c6a..531bb571 100644
--- a/libavfilter/graphdump.c
+++ b/libavfilter/graphdump.c
@@ -26,6 +26,7 @@
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
 #include "avfiltergraph.h"
+#include "internal.h"
 
 static int print_link_prop(AVBPrint *buf, AVFilterLink *link)
 {
diff --git a/libavfilter/graphparser.c b/libavfilter/graphparser.c
index dd331d12..d9f40d69 100644
--- a/libavfilter/graphparser.c
+++ b/libavfilter/graphparser.c
@@ -118,13 +118,16 @@ static int create_filter(AVFilterContext **filt_ctx, AVFilterGraph *ctx, int ind
         return AVERROR(ENOMEM);
     }
 
-    if (!strcmp(filt_name, "scale") && args && !strstr(args, "flags") &&
+    if (!strcmp(filt_name, "scale") && (!args || !strstr(args, "flags")) &&
         ctx->scale_sws_opts) {
-        tmp_args = av_asprintf("%s:%s",
-                 args, ctx->scale_sws_opts);
-        if (!tmp_args)
-            return AVERROR(ENOMEM);
-        args = tmp_args;
+        if (args) {
+            tmp_args = av_asprintf("%s:%s",
+                    args, ctx->scale_sws_opts);
+            if (!tmp_args)
+                return AVERROR(ENOMEM);
+            args = tmp_args;
+        } else
+            args = ctx->scale_sws_opts;
     }
 
     ret = avfilter_init_str(*filt_ctx, args);
@@ -453,7 +456,6 @@ int avfilter_graph_parse2(AVFilterGraph *graph, const char *filters,
     return ret;
 }
 
-#if HAVE_INCOMPATIBLE_LIBAV_ABI || !FF_API_OLD_GRAPH_PARSE
 int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
                          AVFilterInOut *open_inputs,
                          AVFilterInOut *open_outputs, void *log_ctx)
@@ -515,13 +517,6 @@ int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
     avfilter_inout_free(&open_inputs);
     avfilter_inout_free(&open_outputs);
     return ret;
-#else
-int avfilter_graph_parse(AVFilterGraph *graph, const char *filters,
-                         AVFilterInOut **inputs, AVFilterInOut **outputs,
-                         void *log_ctx)
-{
-    return avfilter_graph_parse_ptr(graph, filters, inputs, outputs, log_ctx);
-#endif
 }
 
 int avfilter_graph_parse_ptr(AVFilterGraph *graph, const char *filters,
diff --git a/libavfilter/hermite.h b/libavfilter/hermite.h
new file mode 100644
index 00000000..fc1c0c61
--- /dev/null
+++ b/libavfilter/hermite.h
@@ -0,0 +1,45 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_HERMITE_H
+#define AVFILTER_HERMITE_H
+
+static inline double hermite_interpolation(double x, double x0, double x1,
+                                    double p0, double p1,
+                                    double m0, double m1)
+{
+    double width = x1 - x0;
+    double t = (x - x0) / width;
+    double t2, t3;
+    double ct0, ct1, ct2, ct3;
+
+    m0 *= width;
+    m1 *= width;
+
+    t2 = t*t;
+    t3 = t2*t;
+    ct0 = p0;
+    ct1 = m0;
+
+    ct2 = -3 * p0 - 2 * m0 + 3 * p1 - m1;
+    ct3 = 2 * p0 + m0  - 2 * p1 + m1;
+
+    return ct3 * t3 + ct2 * t2 + ct1 * t + ct0;
+}
+
+#endif /* AVFILTER_HERMITE_H */
diff --git a/libavfilter/internal.h b/libavfilter/internal.h
index a7ec751a..769e65b2 100644
--- a/libavfilter/internal.h
+++ b/libavfilter/internal.h
@@ -28,19 +28,12 @@
 #include "avfilter.h"
 #include "avfiltergraph.h"
 #include "formats.h"
+#include "framepool.h"
 #include "thread.h"
 #include "version.h"
 #include "video.h"
 #include "libavcodec/avcodec.h"
 
-#define POOL_SIZE 32
-typedef struct AVFilterPool {
-    AVFilterBufferRef *pic[POOL_SIZE];
-    int count;
-    int refcount;
-    int draining;
-} AVFilterPool;
-
 typedef struct AVFilterCommand {
     double time;                ///< time expressed in seconds
     char *command;              ///< command
@@ -54,7 +47,6 @@ typedef struct AVFilterCommand {
  */
 void ff_avfilter_graph_update_heap(AVFilterGraph *graph, AVFilterLink *link);
 
-#if !FF_API_AVFILTERPAD_PUBLIC
 /**
  * A filter pad used for either input or output.
  */
@@ -94,7 +86,7 @@ struct AVFilterPad {
      * Input pads only.
      *
      * @return >= 0 on success, a negative AVERROR on error. This function
-     * must ensure that samplesref is properly unreferenced on error if it
+     * must ensure that frame is properly unreferenced on error if it
      * hasn't been passed on to another filter.
      */
     int (*filter_frame)(AVFilterLink *link, AVFrame *frame);
@@ -111,9 +103,9 @@ struct AVFilterPad {
     int (*poll_frame)(AVFilterLink *link);
 
     /**
-     * Frame request callback. A call to this should result in at least one
-     * frame being output over the given link. This should return zero on
-     * success, and another value on error.
+     * Frame request callback. A call to this should result in some progress
+     * towards producing output over the given link. This should return zero
+     * on success, and another value on error.
      *
      * Output pads only.
      */
@@ -151,7 +143,6 @@ struct AVFilterPad {
      */
     int needs_writable;
 };
-#endif
 
 struct AVFilterGraphInternal {
     void *thread;
@@ -162,12 +153,15 @@ struct AVFilterInternal {
     avfilter_execute_func *execute;
 };
 
-#if FF_API_AVFILTERBUFFER
-/** default handler for freeing audio/video buffer when there are no references left */
-void ff_avfilter_default_free_buffer(AVFilterBuffer *buf);
-#endif
-
-/** Tell is a format is contained in the provided list terminated by -1. */
+/**
+ * Tell if an integer is contained in the provided -1-terminated list of integers.
+ * This is useful for determining (for instance) if an AVPixelFormat is in an
+ * array of supported formats.
+ *
+ * @param fmt provided format
+ * @param fmts -1-terminated list of formats
+ * @return 1 if present, 0 if absent
+ */
 int ff_fmt_is_in(int fmt, const int *fmts);
 
 /* Functions to parse audio format arguments */
@@ -180,6 +174,7 @@ int ff_fmt_is_in(int fmt, const int *fmts);
  * @param log_ctx log context
  * @return >= 0 in case of success, a negative AVERROR code on error
  */
+av_warn_unused_result
 int ff_parse_pixel_format(enum AVPixelFormat *ret, const char *arg, void *log_ctx);
 
 /**
@@ -190,6 +185,7 @@ int ff_parse_pixel_format(enum AVPixelFormat *ret, const char *arg, void *log_ct
  * @param log_ctx log context
  * @return >= 0 in case of success, a negative AVERROR code on error
  */
+av_warn_unused_result
 int ff_parse_sample_rate(int *ret, const char *arg, void *log_ctx);
 
 /**
@@ -200,6 +196,7 @@ int ff_parse_sample_rate(int *ret, const char *arg, void *log_ctx);
  * @param log_ctx log context
  * @return >= 0 in case of success, a negative AVERROR code on error
  */
+av_warn_unused_result
 int ff_parse_time_base(AVRational *ret, const char *arg, void *log_ctx);
 
 /**
@@ -210,6 +207,7 @@ int ff_parse_time_base(AVRational *ret, const char *arg, void *log_ctx);
  * @param log_ctx log context
  * @return >= 0 in case of success, a negative AVERROR code on error
  */
+av_warn_unused_result
 int ff_parse_sample_format(int *ret, const char *arg, void *log_ctx);
 
 /**
@@ -222,11 +220,27 @@ int ff_parse_sample_format(int *ret, const char *arg, void *log_ctx);
  * @param log_ctx log context
  * @return >= 0 in case of success, a negative AVERROR code on error
  */
+av_warn_unused_result
 int ff_parse_channel_layout(int64_t *ret, int *nret, const char *arg,
                             void *log_ctx);
 
 void ff_update_link_current_pts(AVFilterLink *link, int64_t pts);
 
+/**
+ * Set the status field of a link from the source filter.
+ * The pts should reflect the timestamp of the status change,
+ * in link time base and relative to the frames timeline.
+ * In particular, for AVERROR_EOF, it should reflect the
+ * end time of the last frame.
+ */
+void ff_avfilter_link_set_in_status(AVFilterLink *link, int status, int64_t pts);
+
+/**
+ * Set the status field of a link from the destination filter.
+ * The pts should probably be left unset (AV_NOPTS_VALUE).
+ */
+void ff_avfilter_link_set_out_status(AVFilterLink *link, int status, int64_t pts);
+
 void ff_command_queue_pop(AVFilterContext *filter);
 
 /* misc trace functions */
@@ -269,28 +283,16 @@ int ff_insert_pad(unsigned idx, unsigned *count, size_t padidx_off,
 static inline int ff_insert_inpad(AVFilterContext *f, unsigned index,
                                    AVFilterPad *p)
 {
-    int ret = ff_insert_pad(index, &f->nb_inputs, offsetof(AVFilterLink, dstpad),
+    return ff_insert_pad(index, &f->nb_inputs, offsetof(AVFilterLink, dstpad),
                   &f->input_pads, &f->inputs, p);
-#if FF_API_FOO_COUNT
-FF_DISABLE_DEPRECATION_WARNINGS
-    f->input_count = f->nb_inputs;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-    return ret;
 }
 
 /** Insert a new output pad for the filter. */
 static inline int ff_insert_outpad(AVFilterContext *f, unsigned index,
                                     AVFilterPad *p)
 {
-    int ret = ff_insert_pad(index, &f->nb_outputs, offsetof(AVFilterLink, srcpad),
+    return ff_insert_pad(index, &f->nb_outputs, offsetof(AVFilterLink, srcpad),
                   &f->output_pads, &f->outputs, p);
-#if FF_API_FOO_COUNT
-FF_DISABLE_DEPRECATION_WARNINGS
-    f->output_count = f->nb_outputs;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-    return ret;
 }
 
 /**
@@ -305,11 +307,35 @@ int ff_poll_frame(AVFilterLink *link);
 /**
  * Request an input frame from the filter at the other end of the link.
  *
+ * The input filter may pass the request on to its inputs, fulfill the
+ * request from an internal buffer or any other means specific to its function.
+ *
+ * When the end of a stream is reached AVERROR_EOF is returned and no further
+ * frames are returned after that.
+ *
+ * When a filter is unable to output a frame for example due to its sources
+ * being unable to do so or because it depends on external means pushing data
+ * into it then AVERROR(EAGAIN) is returned.
+ * It is important that a AVERROR(EAGAIN) return is returned all the way to the
+ * caller (generally eventually a user application) as this step may (but does
+ * not have to be) necessary to provide the input with the next frame.
+ *
+ * If a request is successful then some progress has been made towards
+ * providing a frame on the link (through ff_filter_frame()). A filter that
+ * needs several frames to produce one is allowed to return success if one
+ * more frame has been processed but no output has been produced yet. A
+ * filter is also allowed to simply forward a success return value.
+ *
  * @param link the input link
  * @return     zero on success
+ *             AVERROR_EOF on end of file
+ *             AVERROR(EAGAIN) if the previous filter cannot output a frame
+ *             currently and can neither guarantee that EOF has been reached.
  */
 int ff_request_frame(AVFilterLink *link);
 
+int ff_request_frame_to_filter(AVFilterLink *link);
+
 #define AVFILTER_DEFINE_CLASS(fname)            \
     static const AVClass fname##_class = {      \
         .class_name = #fname,                   \
@@ -319,9 +345,6 @@ int ff_request_frame(AVFilterLink *link);
         .category   = AV_CLASS_CATEGORY_FILTER, \
     }
 
-AVFilterBufferRef *ff_copy_buffer_ref(AVFilterLink *outlink,
-                                      AVFilterBufferRef *ref);
-
 /**
  * Find the index of a link.
  *
@@ -330,9 +353,6 @@ AVFilterBufferRef *ff_copy_buffer_ref(AVFilterLink *outlink,
 #define FF_INLINK_IDX(link)  ((int)((link)->dstpad - (link)->dst->input_pads))
 #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src->output_pads))
 
-int ff_buffersink_read_compat(AVFilterContext *ctx, AVFilterBufferRef **buf);
-int ff_buffersink_read_samples_compat(AVFilterContext *ctx, AVFilterBufferRef **pbuf,
-                                      int nb_samples);
 /**
  * Send a frame of data to the next filter.
  *
@@ -346,20 +366,6 @@ int ff_buffersink_read_samples_compat(AVFilterContext *ctx, AVFilterBufferRef **
  */
 int ff_filter_frame(AVFilterLink *link, AVFrame *frame);
 
-/**
- * Flags for AVFilterLink.flags.
- */
-enum {
-
-    /**
-     * Frame requests may need to loop in order to be fulfilled.
-     * A filter must set this flags on an output link if it may return 0 in
-     * request_frame() without filtering a frame.
-     */
-    FF_LINK_FLAG_REQUEST_LOOP = 1,
-
-};
-
 /**
  * Allocate a new filter context and return it.
  *
@@ -375,6 +381,11 @@ AVFilterContext *ff_filter_alloc(const AVFilter *filter, const char *inst_name);
  */
 void ff_filter_graph_remove_filter(AVFilterGraph *graph, AVFilterContext *filter);
 
+/**
+ * Run one round of processing on a filter graph.
+ */
+int ff_filter_graph_run_once(AVFilterGraph *graph);
+
 /**
  * Normalize the qscale factor
  * FIXME the H264 qscale is a log based scale, mpeg1/2 is not, the code below
diff --git a/libavfilter/lavfutils.c b/libavfilter/lavfutils.c
index 9952e6ea..706badf6 100644
--- a/libavfilter/lavfutils.c
+++ b/libavfilter/lavfutils.c
@@ -95,7 +95,7 @@ int ff_load_image(uint8_t *data[4], int linesize[4],
     av_image_copy(data, linesize, (const uint8_t **)frame->data, frame->linesize, *pix_fmt, *w, *h);
 
 end:
-    av_free_packet(&pkt);
+    av_packet_unref(&pkt);
     avcodec_close(codec_ctx);
     avformat_close_input(&format_ctx);
     av_frame_free(&frame);
diff --git a/libavfilter/maskedmerge.h b/libavfilter/maskedmerge.h
new file mode 100644
index 00000000..a8c7551b
--- /dev/null
+++ b/libavfilter/maskedmerge.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_MASKEDMERGE_H
+#define AVFILTER_MASKEDMERGE_H
+
+#include "avfilter.h"
+#include "framesync.h"
+
+typedef struct MaskedMergeContext {
+    const AVClass *class;
+    int width[4], height[4];
+    int nb_planes;
+    int planes;
+    int half, depth;
+    FFFrameSync fs;
+
+    void (*maskedmerge)(const uint8_t *bsrc, const uint8_t *osrc,
+                        const uint8_t *msrc, uint8_t *dst,
+                        ptrdiff_t blinesize, ptrdiff_t olinesize,
+                        ptrdiff_t mlinesize, ptrdiff_t dlinesize,
+                        int w, int h,
+                        int half, int shift);
+} MaskedMergeContext;
+
+void ff_maskedmerge_init_x86(MaskedMergeContext *s);
+
+#endif /* AVFILTER_MASKEDMERGE_H */
diff --git a/libavfilter/opencl_allkernels.h b/libavfilter/opencl_allkernels.h
index aca02e04..57b650d2 100644
--- a/libavfilter/opencl_allkernels.h
+++ b/libavfilter/opencl_allkernels.h
@@ -18,12 +18,12 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVFILTER_OPENCL_ALLKERNEL_H
-#define AVFILTER_OPENCL_ALLKERNEL_H
+#ifndef AVFILTER_OPENCL_ALLKERNELS_H
+#define AVFILTER_OPENCL_ALLKERNELS_H
 
 #include "avfilter.h"
 #include "config.h"
 
 void ff_opencl_register_filter_kernel_code_all(void);
 
-#endif /* AVFILTER_OPENCL_ALLKERNEL_H */
+#endif /* AVFILTER_OPENCL_ALLKERNELS_H */
diff --git a/libavfilter/psnr.h b/libavfilter/psnr.h
new file mode 100644
index 00000000..bbc45411
--- /dev/null
+++ b/libavfilter/psnr.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2015 Ronald S. Bultje <rsbultje@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_PSNR_H
+#define AVFILTER_PSNR_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct PSNRDSPContext {
+    uint64_t (*sse_line)(const uint8_t *buf, const uint8_t *ref, int w);
+} PSNRDSPContext;
+
+void ff_psnr_init_x86(PSNRDSPContext *dsp, int bpp);
+
+#endif /* AVFILTER_PSNR_H */
diff --git a/libavfilter/pthread.c b/libavfilter/pthread.c
index 070b3bde..37ca73f9 100644
--- a/libavfilter/pthread.c
+++ b/libavfilter/pthread.c
@@ -27,19 +27,12 @@
 #include "libavutil/common.h"
 #include "libavutil/cpu.h"
 #include "libavutil/mem.h"
+#include "libavutil/thread.h"
 
 #include "avfilter.h"
 #include "internal.h"
 #include "thread.h"
 
-#if HAVE_PTHREADS
-#include <pthread.h>
-#elif HAVE_OS2THREADS
-#include "compat/os2threads.h"
-#elif HAVE_W32THREADS
-#include "compat/w32pthreads.h"
-#endif
-
 typedef struct ThreadContext {
     AVFilterGraph *graph;
 
@@ -47,7 +40,7 @@ typedef struct ThreadContext {
     pthread_t *workers;
     avfilter_action_func *func;
 
-    /* per-execute perameters */
+    /* per-execute parameters */
     AVFilterContext *ctx;
     void *arg;
     int   *rets;
diff --git a/libavfilter/removegrain.h b/libavfilter/removegrain.h
new file mode 100644
index 00000000..f3f10288
--- /dev/null
+++ b/libavfilter/removegrain.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ * Copyright (c) 2015 James Darnley
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_REMOVEGRAIN_H
+#define AVFILTER_REMOVEGRAIN_H
+
+#include "avfilter.h"
+
+typedef struct RemoveGrainContext {
+    const AVClass *class;
+
+    int mode[4];
+
+    int nb_planes;
+    int planewidth[4];
+    int planeheight[4];
+    int skip_even;
+    int skip_odd;
+
+    int (*rg[4])(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8);
+
+    void (*fl[4])(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+} RemoveGrainContext;
+
+void ff_removegrain_init_x86(RemoveGrainContext *rg);
+
+#endif /* AVFILTER_REMOVEGRAIN_H */
diff --git a/libavfilter/setpts.c b/libavfilter/setpts.c
index dbfd88d2..2ccca28e 100644
--- a/libavfilter/setpts.c
+++ b/libavfilter/setpts.c
@@ -127,8 +127,9 @@ static int config_input(AVFilterLink *inlink)
     setpts->var_values[VAR_SAMPLE_RATE] =
         setpts->type == AVMEDIA_TYPE_AUDIO ? inlink->sample_rate : NAN;
 
-    setpts->var_values[VAR_FRAME_RATE] = inlink->frame_rate.num && inlink->frame_rate.den ?
-        av_q2d(inlink->frame_rate) : NAN;
+    setpts->var_values[VAR_FRAME_RATE] = inlink->frame_rate.num &&
+                                         inlink->frame_rate.den ?
+                                            av_q2d(inlink->frame_rate) : NAN;
 
     av_log(inlink->src, AV_LOG_VERBOSE, "TB:%f FRAME_RATE:%f SAMPLE_RATE:%f\n",
            setpts->var_values[VAR_TB],
diff --git a/libavfilter/split.c b/libavfilter/split.c
index 73538106..c545fd6d 100644
--- a/libavfilter/split.c
+++ b/libavfilter/split.c
@@ -32,6 +32,7 @@
 
 #include "avfilter.h"
 #include "audio.h"
+#include "formats.h"
 #include "internal.h"
 #include "video.h"
 
@@ -77,7 +78,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     for (i = 0; i < ctx->nb_outputs; i++) {
         AVFrame *buf_out;
 
-        if (ctx->outputs[i]->closed)
+        if (ctx->outputs[i]->status)
             continue;
         buf_out = av_frame_clone(frame);
         if (!buf_out) {
@@ -143,6 +144,7 @@ AVFilter ff_af_asplit = {
     .priv_class  = &asplit_class,
     .init        = split_init,
     .uninit      = split_uninit,
+    .query_formats = ff_query_formats_all,
     .inputs      = avfilter_af_asplit_inputs,
     .outputs     = NULL,
     .flags       = AVFILTER_FLAG_DYNAMIC_OUTPUTS,
diff --git a/libavfilter/src_movie.c b/libavfilter/src_movie.c
index 908c03e1..eab24589 100644
--- a/libavfilter/src_movie.c
+++ b/libavfilter/src_movie.c
@@ -35,10 +35,10 @@
 #include "libavutil/avassert.h"
 #include "libavutil/opt.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
 #include "libavutil/timestamp.h"
 #include "libavformat/avformat.h"
 #include "audio.h"
-#include "avcodec.h"
 #include "avfilter.h"
 #include "formats.h"
 #include "internal.h"
@@ -240,7 +240,7 @@ static av_cold int movie_common_init(AVFilterContext *ctx)
         timestamp = movie->seek_point;
         // add the stream start time, should it exist
         if (movie->format_ctx->start_time != AV_NOPTS_VALUE) {
-            if (timestamp > INT64_MAX - movie->format_ctx->start_time) {
+            if (timestamp > 0 && movie->format_ctx->start_time > INT64_MAX - timestamp) {
                 av_log(ctx, AV_LOG_ERROR,
                        "%s: seek value overflow with start_time:%"PRId64" seek_point:%"PRId64"\n",
                        movie->file_name, movie->format_ctx->start_time, movie->seek_point);
@@ -333,7 +333,7 @@ static int movie_query_formats(AVFilterContext *ctx)
     MovieContext *movie = ctx->priv;
     int list[] = { 0, -1 };
     int64_t list64[] = { 0, -1 };
-    int i;
+    int i, ret;
 
     for (i = 0; i < ctx->nb_outputs; i++) {
         MovieStream *st = &movie->st[i];
@@ -343,16 +343,20 @@ static int movie_query_formats(AVFilterContext *ctx)
         switch (c->codec_type) {
         case AVMEDIA_TYPE_VIDEO:
             list[0] = c->pix_fmt;
-            ff_formats_ref(ff_make_format_list(list), &outlink->in_formats);
+            if ((ret = ff_formats_ref(ff_make_format_list(list), &outlink->in_formats)) < 0)
+                return ret;
             break;
         case AVMEDIA_TYPE_AUDIO:
             list[0] = c->sample_fmt;
-            ff_formats_ref(ff_make_format_list(list), &outlink->in_formats);
+            if ((ret = ff_formats_ref(ff_make_format_list(list), &outlink->in_formats)) < 0)
+                return ret;
             list[0] = c->sample_rate;
-            ff_formats_ref(ff_make_format_list(list), &outlink->in_samplerates);
+            if ((ret = ff_formats_ref(ff_make_format_list(list), &outlink->in_samplerates)) < 0)
+                return ret;
             list64[0] = c->channel_layout;
-            ff_channel_layouts_ref(avfilter_make_format64_list(list64),
-                                   &outlink->in_channel_layouts);
+            if ((ret = ff_channel_layouts_ref(avfilter_make_format64_list(list64),
+                                   &outlink->in_channel_layouts)) < 0)
+                return ret;
             break;
         }
     }
@@ -486,7 +490,7 @@ static int movie_push_frame(AVFilterContext *ctx, unsigned out_id)
     pkt_out_id = pkt->stream_index > movie->max_stream_index ? -1 :
                  movie->out_index[pkt->stream_index];
     if (pkt_out_id < 0) {
-        av_free_packet(&movie->pkt0);
+        av_packet_unref(&movie->pkt0);
         pkt->size = 0; /* ready for next run */
         pkt->data = NULL;
         return 0;
@@ -513,7 +517,7 @@ static int movie_push_frame(AVFilterContext *ctx, unsigned out_id)
     if (ret < 0) {
         av_log(ctx, AV_LOG_WARNING, "Decode error: %s\n", av_err2str(ret));
         av_frame_free(&frame);
-        av_free_packet(&movie->pkt0);
+        av_packet_unref(&movie->pkt0);
         movie->pkt.size = 0;
         movie->pkt.data = NULL;
         return 0;
@@ -524,7 +528,7 @@ static int movie_push_frame(AVFilterContext *ctx, unsigned out_id)
     pkt->data += ret;
     pkt->size -= ret;
     if (pkt->size <= 0) {
-        av_free_packet(&movie->pkt0);
+        av_packet_unref(&movie->pkt0);
         pkt->size = 0; /* ready for next run */
         pkt->data = NULL;
     }
@@ -536,7 +540,7 @@ static int movie_push_frame(AVFilterContext *ctx, unsigned out_id)
     }
 
     frame->pts = av_frame_get_best_effort_timestamp(frame);
-    av_dlog(ctx, "movie_push_frame(): file:'%s' %s\n", movie->file_name,
+    ff_dlog(ctx, "movie_push_frame(): file:'%s' %s\n", movie->file_name,
             describe_frame_to_str((char[1024]){0}, 1024, frame, frame_type, outlink));
 
     if (st->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
diff --git a/libavfilter/ssim.h b/libavfilter/ssim.h
new file mode 100644
index 00000000..ac0395a2
--- /dev/null
+++ b/libavfilter/ssim.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2015 Ronald S. Bultje <rsbultje@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_SSIM_H
+#define AVFILTER_SSIM_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct SSIMDSPContext {
+    void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride,
+                          const uint8_t *ref, ptrdiff_t ref_stride,
+                          int (*sums)[4], int w);
+    float (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w);
+} SSIMDSPContext;
+
+void ff_ssim_init_x86(SSIMDSPContext *dsp);
+
+#endif /* AVFILTER_SSIM_H */
diff --git a/libavfilter/stereo3d.h b/libavfilter/stereo3d.h
new file mode 100644
index 00000000..54611d12
--- /dev/null
+++ b/libavfilter/stereo3d.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_STEREO3D_H
+#define AVFILTER_STEREO3D_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct Stereo3DDSPContext {
+    void (*anaglyph)(uint8_t *dst, uint8_t *lsrc, uint8_t *rsrc,
+                     ptrdiff_t dst_linesize, ptrdiff_t l_linesize, ptrdiff_t r_linesize,
+                     int width, int height,
+                     const int *ana_matrix_r, const int *ana_matrix_g, const int *ana_matrix_b);
+} Stereo3DDSPContext;
+
+void ff_stereo3d_init_x86(Stereo3DDSPContext *dsp);
+
+#endif /* AVFILTER_STEREO3D_H */
diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h
index d80a6e21..3b703e7b 100644
--- a/libavfilter/tinterlace.h
+++ b/libavfilter/tinterlace.h
@@ -38,6 +38,7 @@ enum TInterlaceMode {
     MODE_INTERLEAVE_TOP,
     MODE_INTERLEAVE_BOTTOM,
     MODE_INTERLACEX2,
+    MODE_MERGEX2,
     MODE_NB,
 };
 
diff --git a/libavfilter/trim.c b/libavfilter/trim.c
index 468dc03c..9141ac5e 100644
--- a/libavfilter/trim.c
+++ b/libavfilter/trim.c
@@ -114,12 +114,6 @@ static int config_input(AVFilterLink *inlink)
     return 0;
 }
 
-static int config_output(AVFilterLink *outlink)
-{
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
-    return 0;
-}
-
 #define OFFSET(x) offsetof(TrimContext, x)
 #define COMMON_OPTS                                                                                                                                                         \
     { "starti",      "Timestamp of the first frame that "                                                                                                        \
@@ -180,7 +174,8 @@ static int trim_filter_frame(AVFilterLink *inlink, AVFrame *frame)
             drop = 0;
 
         if (drop) {
-            s->eof = inlink->closed = 1;
+            s->eof = 1;
+            ff_avfilter_link_set_out_status(inlink, AVERROR_EOF, AV_NOPTS_VALUE);
             goto drop;
         }
     }
@@ -223,7 +218,6 @@ static const AVFilterPad trim_outputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
-        .config_props = config_output,
     },
     { NULL }
 };
@@ -312,7 +306,8 @@ static int atrim_filter_frame(AVFilterLink *inlink, AVFrame *frame)
         }
 
         if (drop) {
-            s->eof = inlink->closed = 1;
+            s->eof = 1;
+            ff_avfilter_link_set_out_status(inlink, AVERROR_EOF, AV_NOPTS_VALUE);
             goto drop;
         }
     }
@@ -378,7 +373,6 @@ static const AVFilterPad atrim_outputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_AUDIO,
-        .config_props = config_output,
     },
     { NULL }
 };
@@ -387,6 +381,7 @@ AVFilter ff_af_atrim = {
     .name        = "atrim",
     .description = NULL_IF_CONFIG_SMALL("Pick one continuous section from the input, drop the rest."),
     .init        = init,
+    .query_formats = ff_query_formats_all,
     .priv_size   = sizeof(TrimContext),
     .priv_class  = &atrim_class,
     .inputs      = atrim_inputs,
diff --git a/libavfilter/unsharp_opencl.c b/libavfilter/unsharp_opencl.c
index 2cc0704a..d84920c5 100644
--- a/libavfilter/unsharp_opencl.c
+++ b/libavfilter/unsharp_opencl.c
@@ -170,8 +170,8 @@ int ff_opencl_apply_unsharp(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
     FFOpenclParam kernel2 = {0};
     int width = link->w;
     int height = link->h;
-    int cw = FF_CEIL_RSHIFT(link->w, unsharp->hsub);
-    int ch = FF_CEIL_RSHIFT(link->h, unsharp->vsub);
+    int cw = AV_CEIL_RSHIFT(link->w, unsharp->hsub);
+    int ch = AV_CEIL_RSHIFT(link->h, unsharp->vsub);
     size_t globalWorkSize1d = width * height + 2 * ch * cw;
     size_t globalWorkSize2dLuma[2];
     size_t globalWorkSize2dChroma[2];
@@ -385,7 +385,7 @@ int ff_opencl_unsharp_process_inout_buf(AVFilterContext *ctx, AVFrame *in, AVFra
     int ret = 0;
     AVFilterLink *link = ctx->inputs[0];
     UnsharpContext *unsharp = ctx->priv;
-    int ch = FF_CEIL_RSHIFT(link->h, unsharp->vsub);
+    int ch = AV_CEIL_RSHIFT(link->h, unsharp->vsub);
 
     if ((!unsharp->opencl_ctx.cl_inbuf) || (!unsharp->opencl_ctx.cl_outbuf)) {
         unsharp->opencl_ctx.in_plane_size[0]  = (in->linesize[0] * in->height);
diff --git a/libavfilter/vaf_spectrumsynth.c b/libavfilter/vaf_spectrumsynth.c
new file mode 100644
index 00000000..8d4014ea
--- /dev/null
+++ b/libavfilter/vaf_spectrumsynth.c
@@ -0,0 +1,540 @@
+/*
+ * Copyright (c) 2016 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SpectrumSynth filter
+ * @todo support float pixel format
+ */
+
+#include "libavcodec/avfft.h"
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "audio.h"
+#include "video.h"
+#include "internal.h"
+#include "window_func.h"
+
+enum MagnitudeScale { LINEAR, LOG, NB_SCALES };
+enum SlideMode      { REPLACE, SCROLL, FULLFRAME, RSCROLL, NB_SLIDES };
+enum Orientation    { VERTICAL, HORIZONTAL, NB_ORIENTATIONS };
+
+typedef struct SpectrumSynthContext {
+    const AVClass *class;
+    int sample_rate;
+    int channels;
+    int scale;
+    int sliding;
+    int win_func;
+    float overlap;
+    int orientation;
+
+    AVFrame *magnitude, *phase;
+    FFTContext *fft;            ///< Fast Fourier Transform context
+    int fft_bits;               ///< number of bits (FFT window size = 1<<fft_bits)
+    FFTComplex **fft_data;      ///< bins holder for each (displayed) channels
+    int win_size;
+    int size;
+    int nb_freq;
+    int hop_size;
+    int start, end;
+    int xpos;
+    int xend;
+    int64_t pts;
+    float factor;
+    AVFrame *buffer;
+    float *window_func_lut;     ///< Window function LUT
+} SpectrumSynthContext;
+
+#define OFFSET(x) offsetof(SpectrumSynthContext, x)
+#define A AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
+#define V AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption spectrumsynth_options[] = {
+    { "sample_rate", "set sample rate",  OFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64 = 44100}, 15,  INT_MAX, A },
+    { "channels",    "set channels",     OFFSET(channels), AV_OPT_TYPE_INT, {.i64 = 1}, 1, 8, A },
+    { "scale",       "set input amplitude scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64 = LOG}, 0, NB_SCALES-1, V, "scale" },
+        { "lin",  "linear",      0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, V, "scale" },
+        { "log",  "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=LOG},    0, 0, V, "scale" },
+    { "slide", "set input sliding mode", OFFSET(sliding), AV_OPT_TYPE_INT, {.i64 = FULLFRAME}, 0, NB_SLIDES-1, V, "slide" },
+        { "replace",   "consume old columns with new",   0, AV_OPT_TYPE_CONST, {.i64=REPLACE},   0, 0, V, "slide" },
+        { "scroll",    "consume only most right column", 0, AV_OPT_TYPE_CONST, {.i64=SCROLL},    0, 0, V, "slide" },
+        { "fullframe", "consume full frames",            0, AV_OPT_TYPE_CONST, {.i64=FULLFRAME}, 0, 0, V, "slide" },
+        { "rscroll",   "consume only most left column",  0, AV_OPT_TYPE_CONST, {.i64=RSCROLL},   0, 0, V, "slide" },
+    { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64 = 0}, 0, NB_WFUNC-1, A, "win_func" },
+        { "rect",     "Rectangular",      0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT},     0, 0, A, "win_func" },
+        { "bartlett", "Bartlett",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, A, "win_func" },
+        { "hann",     "Hann",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, A, "win_func" },
+        { "hanning",  "Hanning",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, A, "win_func" },
+        { "hamming",  "Hamming",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING},  0, 0, A, "win_func" },
+        { "sine",     "Sine",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE},     0, 0, A, "win_func" },
+    { "overlap", "set window overlap",  OFFSET(overlap), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0,  1, A },
+    { "orientation", "set orientation", OFFSET(orientation), AV_OPT_TYPE_INT, {.i64=VERTICAL}, 0, NB_ORIENTATIONS-1, V, "orientation" },
+        { "vertical",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=VERTICAL},   0, 0, V, "orientation" },
+        { "horizontal", NULL, 0, AV_OPT_TYPE_CONST, {.i64=HORIZONTAL}, 0, 0, V, "orientation" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(spectrumsynth);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    SpectrumSynthContext *s = ctx->priv;
+    AVFilterFormats *formats = NULL;
+    AVFilterChannelLayouts *layout = NULL;
+    AVFilterLink *magnitude = ctx->inputs[0];
+    AVFilterLink *phase = ctx->inputs[1];
+    AVFilterLink *outlink = ctx->outputs[0];
+    static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
+    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16,
+                                                   AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P,
+                                                   AV_PIX_FMT_YUV444P16, AV_PIX_FMT_NONE };
+    int ret, sample_rates[] = { 48000, -1 };
+
+    formats = ff_make_format_list(sample_fmts);
+    if ((ret = ff_formats_ref         (formats, &outlink->in_formats        )) < 0 ||
+        (ret = ff_add_channel_layout  (&layout, FF_COUNT2LAYOUT(s->channels))) < 0 ||
+        (ret = ff_channel_layouts_ref (layout , &outlink->in_channel_layouts)) < 0)
+        return ret;
+
+    sample_rates[0] = s->sample_rate;
+    formats = ff_make_format_list(sample_rates);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    if ((ret = ff_formats_ref(formats, &outlink->in_samplerates)) < 0)
+        return ret;
+
+    formats = ff_make_format_list(pix_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    if ((ret = ff_formats_ref(formats, &magnitude->out_formats)) < 0)
+        return ret;
+
+    formats = ff_make_format_list(pix_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    if ((ret = ff_formats_ref(formats, &phase->out_formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    SpectrumSynthContext *s = ctx->priv;
+    int width = ctx->inputs[0]->w;
+    int height = ctx->inputs[0]->h;
+    AVRational time_base  = ctx->inputs[0]->time_base;
+    AVRational frame_rate = ctx->inputs[0]->frame_rate;
+    int i, ch, fft_bits;
+    float factor, overlap;
+
+    outlink->sample_rate = s->sample_rate;
+    outlink->time_base = (AVRational){1, s->sample_rate};
+
+    if (width  != ctx->inputs[1]->w ||
+        height != ctx->inputs[1]->h) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Magnitude and Phase sizes differ (%dx%d vs %dx%d).\n",
+               width, height,
+               ctx->inputs[1]->w, ctx->inputs[1]->h);
+        return AVERROR_INVALIDDATA;
+    } else if (av_cmp_q(time_base, ctx->inputs[1]->time_base) != 0) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Magnitude and Phase time bases differ (%d/%d vs %d/%d).\n",
+               time_base.num, time_base.den,
+               ctx->inputs[1]->time_base.num,
+               ctx->inputs[1]->time_base.den);
+        return AVERROR_INVALIDDATA;
+    } else if (av_cmp_q(frame_rate, ctx->inputs[1]->frame_rate) != 0) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Magnitude and Phase framerates differ (%d/%d vs %d/%d).\n",
+               frame_rate.num, frame_rate.den,
+               ctx->inputs[1]->frame_rate.num,
+               ctx->inputs[1]->frame_rate.den);
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->size = s->orientation == VERTICAL ? height / s->channels : width / s->channels;
+    s->xend = s->orientation == VERTICAL ? width : height;
+
+    for (fft_bits = 1; 1 << fft_bits < 2 * s->size; fft_bits++);
+
+    s->win_size = 1 << fft_bits;
+    s->nb_freq = 1 << (fft_bits - 1);
+
+    s->fft = av_fft_init(fft_bits, 1);
+    if (!s->fft) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to create FFT context. "
+               "The window size might be too high.\n");
+        return AVERROR(EINVAL);
+    }
+    s->fft_data = av_calloc(s->channels, sizeof(*s->fft_data));
+    if (!s->fft_data)
+        return AVERROR(ENOMEM);
+    for (ch = 0; ch < s->channels; ch++) {
+        s->fft_data[ch] = av_calloc(s->win_size, sizeof(**s->fft_data));
+        if (!s->fft_data[ch])
+            return AVERROR(ENOMEM);
+    }
+
+    s->buffer = ff_get_audio_buffer(outlink, s->win_size * 2);
+    if (!s->buffer)
+        return AVERROR(ENOMEM);
+
+    /* pre-calc windowing function */
+    s->window_func_lut = av_realloc_f(s->window_func_lut, s->win_size,
+                                      sizeof(*s->window_func_lut));
+    if (!s->window_func_lut)
+        return AVERROR(ENOMEM);
+    ff_generate_window_func(s->window_func_lut, s->win_size, s->win_func, &overlap);
+    if (s->overlap == 1)
+        s->overlap = overlap;
+    s->hop_size = (1 - s->overlap) * s->win_size;
+    for (factor = 0, i = 0; i < s->win_size; i++) {
+        factor += s->window_func_lut[i] * s->window_func_lut[i];
+    }
+    s->factor = (factor / s->win_size) / FFMAX(1 / (1 - s->overlap) - 1, 1);
+
+    return 0;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    SpectrumSynthContext *s = ctx->priv;
+    int ret;
+
+    if (!s->magnitude) {
+        ret = ff_request_frame(ctx->inputs[0]);
+        if (ret < 0)
+            return ret;
+    }
+    if (!s->phase) {
+        ret = ff_request_frame(ctx->inputs[1]);
+        if (ret < 0)
+            return ret;
+    }
+    return 0;
+}
+
+static void read16_fft_bin(SpectrumSynthContext *s,
+                           int x, int y, int f, int ch)
+{
+    const int m_linesize = s->magnitude->linesize[0];
+    const int p_linesize = s->phase->linesize[0];
+    const uint16_t *m = (uint16_t *)(s->magnitude->data[0] + y * m_linesize);
+    const uint16_t *p = (uint16_t *)(s->phase->data[0] + y * p_linesize);
+    float magnitude, phase;
+
+    switch (s->scale) {
+    case LINEAR:
+        magnitude = m[x] / (double)UINT16_MAX;
+        break;
+    case LOG:
+        magnitude = ff_exp10(((m[x] / (double)UINT16_MAX) - 1.) * 6.);
+        break;
+    default:
+        av_assert0(0);
+    }
+    phase = ((p[x] / (double)UINT16_MAX) * 2. - 1.) * M_PI;
+
+    s->fft_data[ch][f].re = magnitude * cos(phase);
+    s->fft_data[ch][f].im = magnitude * sin(phase);
+}
+
+static void read8_fft_bin(SpectrumSynthContext *s,
+                          int x, int y, int f, int ch)
+{
+    const int m_linesize = s->magnitude->linesize[0];
+    const int p_linesize = s->phase->linesize[0];
+    const uint8_t *m = (uint8_t *)(s->magnitude->data[0] + y * m_linesize);
+    const uint8_t *p = (uint8_t *)(s->phase->data[0] + y * p_linesize);
+    float magnitude, phase;
+
+    switch (s->scale) {
+    case LINEAR:
+        magnitude = m[x] / (double)UINT8_MAX;
+        break;
+    case LOG:
+        magnitude = ff_exp10(((m[x] / (double)UINT8_MAX) - 1.) * 6.);
+        break;
+    default:
+        av_assert0(0);
+    }
+    phase = ((p[x] / (double)UINT8_MAX) * 2. - 1.) * M_PI;
+
+    s->fft_data[ch][f].re = magnitude * cos(phase);
+    s->fft_data[ch][f].im = magnitude * sin(phase);
+}
+
+static void read_fft_data(AVFilterContext *ctx, int x, int h, int ch)
+{
+    SpectrumSynthContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    int start = h * (s->channels - ch) - 1;
+    int end = h * (s->channels - ch - 1);
+    int y, f;
+
+    switch (s->orientation) {
+    case VERTICAL:
+        switch (inlink->format) {
+        case AV_PIX_FMT_YUV444P16:
+        case AV_PIX_FMT_GRAY16:
+            for (y = start, f = 0; y >= end; y--, f++) {
+                read16_fft_bin(s, x, y, f, ch);
+            }
+            break;
+        case AV_PIX_FMT_YUVJ444P:
+        case AV_PIX_FMT_YUV444P:
+        case AV_PIX_FMT_GRAY8:
+            for (y = start, f = 0; y >= end; y--, f++) {
+                read8_fft_bin(s, x, y, f, ch);
+            }
+            break;
+        }
+        break;
+    case HORIZONTAL:
+        switch (inlink->format) {
+        case AV_PIX_FMT_YUV444P16:
+        case AV_PIX_FMT_GRAY16:
+            for (y = end, f = 0; y <= start; y++, f++) {
+                read16_fft_bin(s, y, x, f, ch);
+            }
+            break;
+        case AV_PIX_FMT_YUVJ444P:
+        case AV_PIX_FMT_YUV444P:
+        case AV_PIX_FMT_GRAY8:
+            for (y = end, f = 0; y <= start; y++, f++) {
+                read8_fft_bin(s, y, x, f, ch);
+            }
+            break;
+        }
+        break;
+    }
+}
+
+static void synth_window(AVFilterContext *ctx, int x)
+{
+    SpectrumSynthContext *s = ctx->priv;
+    const int h = s->size;
+    int nb = s->win_size;
+    int y, f, ch;
+
+    for (ch = 0; ch < s->channels; ch++) {
+        read_fft_data(ctx, x, h, ch);
+
+        for (y = h; y <= s->nb_freq; y++) {
+            s->fft_data[ch][y].re = 0;
+            s->fft_data[ch][y].im = 0;
+        }
+
+        for (y = s->nb_freq + 1, f = s->nb_freq - 1; y < nb; y++, f--) {
+            s->fft_data[ch][y].re =  s->fft_data[ch][f].re;
+            s->fft_data[ch][y].im = -s->fft_data[ch][f].im;
+        }
+
+        av_fft_permute(s->fft, s->fft_data[ch]);
+        av_fft_calc(s->fft, s->fft_data[ch]);
+    }
+}
+
+static int try_push_frame(AVFilterContext *ctx, int x)
+{
+    SpectrumSynthContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    const float factor = s->factor;
+    int ch, n, i, ret;
+    int start, end;
+    AVFrame *out;
+
+    synth_window(ctx, x);
+
+    for (ch = 0; ch < s->channels; ch++) {
+        float *buf = (float *)s->buffer->extended_data[ch];
+        int j, k;
+
+        start = s->start;
+        end = s->end;
+        k = end;
+        for (i = 0, j = start; j < k && i < s->win_size; i++, j++) {
+            buf[j] += s->fft_data[ch][i].re;
+        }
+
+        for (; i < s->win_size; i++, j++) {
+            buf[j] = s->fft_data[ch][i].re;
+        }
+
+        start += s->hop_size;
+        end = j;
+
+        if (start >= s->win_size) {
+            start -= s->win_size;
+            end -= s->win_size;
+
+            if (ch == s->channels - 1) {
+                float *dst;
+                int c;
+
+                out = ff_get_audio_buffer(outlink, s->win_size);
+                if (!out) {
+                    av_frame_free(&s->magnitude);
+                    av_frame_free(&s->phase);
+                    return AVERROR(ENOMEM);
+                }
+
+                out->pts = s->pts;
+                s->pts += s->win_size;
+                for (c = 0; c < s->channels; c++) {
+                    dst = (float *)out->extended_data[c];
+                    buf = (float *)s->buffer->extended_data[c];
+
+                    for (n = 0; n < s->win_size; n++) {
+                        dst[n] = buf[n] * factor;
+                    }
+                    memmove(buf, buf + s->win_size, s->win_size * 4);
+                }
+
+                ret = ff_filter_frame(outlink, out);
+            }
+        }
+    }
+
+    s->start = start;
+    s->end = end;
+
+    return 0;
+}
+
+static int try_push_frames(AVFilterContext *ctx)
+{
+    SpectrumSynthContext *s = ctx->priv;
+    int ret, x;
+
+    if (!(s->magnitude && s->phase))
+        return 0;
+
+    switch (s->sliding) {
+    case REPLACE:
+        ret = try_push_frame(ctx, s->xpos);
+        s->xpos++;
+        if (s->xpos >= s->xend)
+            s->xpos = 0;
+        break;
+    case SCROLL:
+        s->xpos = s->xend - 1;
+        ret = try_push_frame(ctx, s->xpos);
+        break;
+    case RSCROLL:
+        s->xpos = 0;
+        ret = try_push_frame(ctx, s->xpos);
+        break;
+    case FULLFRAME:
+        for (x = 0; x < s->xend; x++) {
+            ret = try_push_frame(ctx, x);
+            if (ret < 0)
+                break;
+        }
+        break;
+    default:
+        av_assert0(0);
+    }
+
+    av_frame_free(&s->magnitude);
+    av_frame_free(&s->phase);
+    return ret;
+}
+
+static int filter_frame_magnitude(AVFilterLink *inlink, AVFrame *magnitude)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SpectrumSynthContext *s = ctx->priv;
+
+    s->magnitude = magnitude;
+    return try_push_frames(ctx);
+}
+
+static int filter_frame_phase(AVFilterLink *inlink, AVFrame *phase)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SpectrumSynthContext *s = ctx->priv;
+
+    s->phase = phase;
+    return try_push_frames(ctx);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SpectrumSynthContext *s = ctx->priv;
+    int i;
+
+    av_frame_free(&s->magnitude);
+    av_frame_free(&s->phase);
+    av_frame_free(&s->buffer);
+    av_fft_end(s->fft);
+    if (s->fft_data) {
+        for (i = 0; i < s->channels; i++)
+            av_freep(&s->fft_data[i]);
+    }
+    av_freep(&s->fft_data);
+    av_freep(&s->window_func_lut);
+}
+
+static const AVFilterPad spectrumsynth_inputs[] = {
+    {
+        .name         = "magnitude",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame_magnitude,
+        .needs_fifo   = 1,
+    },
+    {
+        .name         = "phase",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame_phase,
+        .needs_fifo   = 1,
+    },
+    { NULL }
+};
+
+static const AVFilterPad spectrumsynth_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_AUDIO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vaf_spectrumsynth = {
+    .name          = "spectrumsynth",
+    .description   = NULL_IF_CONFIG_SMALL("Convert input spectrum videos to audio output."),
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(SpectrumSynthContext),
+    .inputs        = spectrumsynth_inputs,
+    .outputs       = spectrumsynth_outputs,
+    .priv_class    = &spectrumsynth_class,
+};
diff --git a/libavfilter/version.h b/libavfilter/version.h
index bf7275f7..1fe7757b 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -29,9 +29,9 @@
 
 #include "libavutil/version.h"
 
-#define LIBAVFILTER_VERSION_MAJOR  5
-#define LIBAVFILTER_VERSION_MINOR  16
-#define LIBAVFILTER_VERSION_MICRO 101
+#define LIBAVFILTER_VERSION_MAJOR   6
+#define LIBAVFILTER_VERSION_MINOR  31
+#define LIBAVFILTER_VERSION_MICRO 100
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \
@@ -49,32 +49,23 @@
  * the public API and may change, break or disappear at any time.
  */
 
-#ifndef FF_API_AVFILTERPAD_PUBLIC
-#define FF_API_AVFILTERPAD_PUBLIC           (LIBAVFILTER_VERSION_MAJOR < 6)
-#endif
-#ifndef FF_API_FOO_COUNT
-#define FF_API_FOO_COUNT                    (LIBAVFILTER_VERSION_MAJOR < 6)
-#endif
-#ifndef FF_API_AVFILTERBUFFER
-#define FF_API_AVFILTERBUFFER               (LIBAVFILTER_VERSION_MAJOR < 6)
-#endif
 #ifndef FF_API_OLD_FILTER_OPTS
-#define FF_API_OLD_FILTER_OPTS              (LIBAVFILTER_VERSION_MAJOR < 6)
+#define FF_API_OLD_FILTER_OPTS              (LIBAVFILTER_VERSION_MAJOR < 7)
+#endif
+#ifndef FF_API_OLD_FILTER_OPTS_ERROR
+#define FF_API_OLD_FILTER_OPTS_ERROR        (LIBAVFILTER_VERSION_MAJOR < 7)
 #endif
 #ifndef FF_API_AVFILTER_OPEN
-#define FF_API_AVFILTER_OPEN                (LIBAVFILTER_VERSION_MAJOR < 6)
+#define FF_API_AVFILTER_OPEN                (LIBAVFILTER_VERSION_MAJOR < 7)
 #endif
 #ifndef FF_API_AVFILTER_INIT_FILTER
-#define FF_API_AVFILTER_INIT_FILTER         (LIBAVFILTER_VERSION_MAJOR < 6)
+#define FF_API_AVFILTER_INIT_FILTER         (LIBAVFILTER_VERSION_MAJOR < 7)
 #endif
 #ifndef FF_API_OLD_FILTER_REGISTER
-#define FF_API_OLD_FILTER_REGISTER          (LIBAVFILTER_VERSION_MAJOR < 6)
-#endif
-#ifndef FF_API_OLD_GRAPH_PARSE
-#define FF_API_OLD_GRAPH_PARSE              (LIBAVFILTER_VERSION_MAJOR < 5)
+#define FF_API_OLD_FILTER_REGISTER          (LIBAVFILTER_VERSION_MAJOR < 7)
 #endif
 #ifndef FF_API_NOCONST_GET_NAME
-#define FF_API_NOCONST_GET_NAME             (LIBAVFILTER_VERSION_MAJOR < 6)
+#define FF_API_NOCONST_GET_NAME             (LIBAVFILTER_VERSION_MAJOR < 7)
 #endif
 
 #endif /* AVFILTER_VERSION_H */
diff --git a/libavfilter/vf_alphamerge.c b/libavfilter/vf_alphamerge.c
index 5f0da35e..a8a8d568 100644
--- a/libavfilter/vf_alphamerge.c
+++ b/libavfilter/vf_alphamerge.c
@@ -36,7 +36,6 @@
 enum { Y, U, V, A };
 
 typedef struct {
-    int frame_requested;
     int is_packed_rgb;
     uint8_t rgba_map[4];
     struct FFBufQueue queue_main;
@@ -58,12 +57,27 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_NONE
     };
     static const enum AVPixelFormat alpha_fmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE };
-    AVFilterFormats *main_formats = ff_make_format_list(main_fmts);
-    AVFilterFormats *alpha_formats = ff_make_format_list(alpha_fmts);
-    ff_formats_ref(main_formats, &ctx->inputs[0]->out_formats);
-    ff_formats_ref(alpha_formats, &ctx->inputs[1]->out_formats);
-    ff_formats_ref(main_formats, &ctx->outputs[0]->in_formats);
+    AVFilterFormats *main_formats = NULL, *alpha_formats = NULL;
+    int ret;
+
+    if (!(main_formats = ff_make_format_list(main_fmts)) ||
+        !(alpha_formats = ff_make_format_list(alpha_fmts))) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+    if ((ret = ff_formats_ref(main_formats , &ctx->inputs[0]->out_formats)) < 0 ||
+        (ret = ff_formats_ref(alpha_formats, &ctx->inputs[1]->out_formats)) < 0 ||
+        (ret = ff_formats_ref(main_formats , &ctx->outputs[0]->in_formats)) < 0)
+            goto fail;
     return 0;
+fail:
+    if (main_formats)
+        av_freep(&main_formats->formats);
+    av_freep(&main_formats);
+    if (alpha_formats)
+        av_freep(&alpha_formats->formats);
+    av_freep(&alpha_formats);
+    return ret;
 }
 
 static int config_input_main(AVFilterLink *inlink)
@@ -146,7 +160,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
         main_buf = ff_bufqueue_get(&merge->queue_main);
         alpha_buf = ff_bufqueue_get(&merge->queue_alpha);
 
-        merge->frame_requested = 0;
         draw_frame(ctx, main_buf, alpha_buf);
         ret = ff_filter_frame(ctx->outputs[0], main_buf);
         av_frame_free(&alpha_buf);
@@ -160,13 +173,10 @@ static int request_frame(AVFilterLink *outlink)
     AlphaMergeContext *merge = ctx->priv;
     int in, ret;
 
-    merge->frame_requested = 1;
-    while (merge->frame_requested) {
-        in = ff_bufqueue_peek(&merge->queue_main, 0) ? 1 : 0;
-        ret = ff_request_frame(ctx->inputs[in]);
-        if (ret < 0)
-            return ret;
-    }
+    in = ff_bufqueue_peek(&merge->queue_main, 0) ? 1 : 0;
+    ret = ff_request_frame(ctx->inputs[in]);
+    if (ret < 0)
+        return ret;
     return 0;
 }
 
diff --git a/libavfilter/vf_aspect.c b/libavfilter/vf_aspect.c
index 84dbee95..bf308248 100644
--- a/libavfilter/vf_aspect.c
+++ b/libavfilter/vf_aspect.c
@@ -69,10 +69,10 @@ typedef struct AspectContext {
 
 static av_cold int init(AVFilterContext *ctx)
 {
+#if FF_API_OLD_FILTER_OPTS
     AspectContext *s = ctx->priv;
     int ret;
 
-#if FF_API_OLD_FILTER_OPTS
     if (s->ratio_expr && s->aspect_den > 0) {
         double num;
         av_log(ctx, AV_LOG_WARNING,
diff --git a/libavfilter/vf_atadenoise.c b/libavfilter/vf_atadenoise.c
new file mode 100644
index 00000000..aa09021c
--- /dev/null
+++ b/libavfilter/vf_atadenoise.c
@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Adaptive Temporal Averaging Denoiser,
+ * based on paper "Video Denoising Based on Adaptive Temporal Averaging" by
+ * David Bartovčak and Miroslav Vrankić
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+
+#define FF_BUFQUEUE_SIZE 129
+#include "bufferqueue.h"
+
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+#define SIZE FF_BUFQUEUE_SIZE
+
+typedef struct ATADenoiseContext {
+    const AVClass *class;
+
+    float fthra[4], fthrb[4];
+    int thra[4], thrb[4];
+
+    int nb_planes;
+    int planewidth[4];
+    int planeheight[4];
+
+    struct FFBufQueue q;
+    void *data[4][SIZE];
+    int linesize[4][SIZE];
+    int size, mid;
+    int available;
+
+    int (*filter_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+} ATADenoiseContext;
+
+#define OFFSET(x) offsetof(ATADenoiseContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption atadenoise_options[] = {
+    { "0a", "set threshold A for 1st plane", OFFSET(fthra[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.02}, 0, 0.3, FLAGS },
+    { "0b", "set threshold B for 1st plane", OFFSET(fthrb[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.04}, 0, 5.0, FLAGS },
+    { "1a", "set threshold A for 2nd plane", OFFSET(fthra[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.02}, 0, 0.3, FLAGS },
+    { "1b", "set threshold B for 2nd plane", OFFSET(fthrb[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.04}, 0, 5.0, FLAGS },
+    { "2a", "set threshold A for 3rd plane", OFFSET(fthra[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.02}, 0, 0.3, FLAGS },
+    { "2b", "set threshold B for 3rd plane", OFFSET(fthrb[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.04}, 0, 5.0, FLAGS },
+    { "s",  "set how many frames to use",    OFFSET(size),     AV_OPT_TYPE_INT,   {.i64=33},   5, SIZE, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(atadenoise);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pixel_fmts[] = {
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_NONE
+    };
+    AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, formats);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    ATADenoiseContext *s = ctx->priv;
+
+    if (!(s->size & 1)) {
+        av_log(ctx, AV_LOG_ERROR, "size %d is invalid. Must be an odd value.\n", s->size);
+        return AVERROR(EINVAL);
+    }
+    s->mid = s->size / 2 + 1;
+
+    return 0;
+}
+
+typedef struct ThreadData {
+    AVFrame *in, *out;
+} ThreadData;
+
+static int filter_slice8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ATADenoiseContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const int size = s->size;
+    const int mid = s->mid;
+    int p, x, y, i, j;
+
+    for (p = 0; p < s->nb_planes; p++) {
+        const int h = s->planeheight[p];
+        const int w = s->planewidth[p];
+        const int slice_start = (h * jobnr) / nb_jobs;
+        const int slice_end = (h * (jobnr+1)) / nb_jobs;
+        const uint8_t *src = in->data[p] + slice_start * in->linesize[p];
+        uint8_t *dst = out->data[p] + slice_start * out->linesize[p];
+        const int thra = s->thra[p];
+        const int thrb = s->thrb[p];
+        const uint8_t **data = (const uint8_t **)s->data[p];
+        const int *linesize = (const int *)s->linesize[p];
+        const uint8_t *srcf[SIZE];
+
+        for (i = 0; i < size; i++)
+            srcf[i] = data[i] + slice_start * linesize[i];
+
+        for (y = slice_start; y < slice_end; y++) {
+            for (x = 0; x < w; x++) {
+                const int srcx = src[x];
+                unsigned lsumdiff = 0, rsumdiff = 0;
+                unsigned ldiff, rdiff;
+                unsigned sum = srcx;
+                int l = 0, r = 0;
+                int srcjx, srcix;
+
+                for (j = mid - 1, i = mid + 1; j >= 0 && i < size; j--, i++) {
+                    srcjx = srcf[j][x];
+
+                    ldiff = FFABS(srcx - srcjx);
+                    lsumdiff += ldiff;
+                    if (ldiff > thra ||
+                        lsumdiff > thrb)
+                        break;
+                    l++;
+                    sum += srcjx;
+
+                    srcix = srcf[i][x];
+
+                    rdiff = FFABS(srcx - srcix);
+                    rsumdiff += rdiff;
+                    if (rdiff > thra ||
+                        rsumdiff > thrb)
+                        break;
+                    r++;
+                    sum += srcix;
+                }
+
+                dst[x] = sum / (r + l + 1);
+            }
+
+            dst += out->linesize[p];
+            src += in->linesize[p];
+
+            for (i = 0; i < size; i++)
+                srcf[i] += linesize[i];
+        }
+    }
+
+    return 0;
+}
+
+static int filter_slice16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    ATADenoiseContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const int size = s->size;
+    const int mid = s->mid;
+    int p, x, y, i, j;
+
+    for (p = 0; p < s->nb_planes; p++) {
+        const int h = s->planeheight[p];
+        const int w = s->planewidth[p];
+        const int slice_start = (h * jobnr) / nb_jobs;
+        const int slice_end = (h * (jobnr+1)) / nb_jobs;
+        const uint16_t *src = (uint16_t *)(in->data[p] + slice_start * in->linesize[p]);
+        uint16_t *dst = (uint16_t *)(out->data[p] + slice_start * out->linesize[p]);
+        const int thra = s->thra[p];
+        const int thrb = s->thrb[p];
+        const uint8_t **data = (const uint8_t **)s->data[p];
+        const int *linesize = (const int *)s->linesize[p];
+        const uint16_t *srcf[SIZE];
+
+        for (i = 0; i < s->size; i++)
+            srcf[i] = (const uint16_t *)(data[i] + slice_start * linesize[i]);
+
+        for (y = slice_start; y < slice_end; y++) {
+            for (x = 0; x < w; x++) {
+                const int srcx = src[x];
+                unsigned lsumdiff = 0, rsumdiff = 0;
+                unsigned ldiff, rdiff;
+                unsigned sum = srcx;
+                int l = 0, r = 0;
+                int srcjx, srcix;
+
+                for (j = mid - 1, i = mid + 1; j >= 0 && i < size; j--, i++) {
+                    srcjx = srcf[j][x];
+
+                    ldiff = FFABS(srcx - srcjx);
+                    lsumdiff += ldiff;
+                    if (ldiff > thra ||
+                        lsumdiff > thrb)
+                        break;
+                    l++;
+                    sum += srcjx;
+
+                    srcix = srcf[i][x];
+
+                    rdiff = FFABS(srcx - srcix);
+                    rsumdiff += rdiff;
+                    if (rdiff > thra ||
+                        rsumdiff > thrb)
+                        break;
+                    r++;
+                    sum += srcix;
+                }
+
+                dst[x] = sum / (r + l + 1);
+            }
+
+            dst += out->linesize[p] / 2;
+            src += in->linesize[p] / 2;
+
+            for (i = 0; i < size; i++)
+                srcf[i] += linesize[i] / 2;
+        }
+    }
+
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    AVFilterContext *ctx = inlink->dst;
+    ATADenoiseContext *s = ctx->priv;
+    int depth;
+
+    s->nb_planes = desc->nb_components;
+
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0]  = s->planewidth[3]  = inlink->w;
+
+    depth = desc->comp[0].depth;
+    if (depth == 8)
+        s->filter_slice = filter_slice8;
+    else
+        s->filter_slice = filter_slice16;
+
+    s->thra[0] = s->fthra[0] * (1 << depth) - 1;
+    s->thra[1] = s->fthra[1] * (1 << depth) - 1;
+    s->thra[2] = s->fthra[2] * (1 << depth) - 1;
+    s->thrb[0] = s->fthrb[0] * (1 << depth) - 1;
+    s->thrb[1] = s->fthrb[1] * (1 << depth) - 1;
+    s->thrb[2] = s->fthrb[2] * (1 << depth) - 1;
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    ATADenoiseContext *s = ctx->priv;
+    AVFrame *out, *in;
+    int i;
+
+    if (s->q.available != s->size) {
+        if (s->q.available < s->mid) {
+            for (i = 0; i < s->mid; i++) {
+                out = av_frame_clone(buf);
+                if (!out) {
+                    av_frame_free(&buf);
+                    return AVERROR(ENOMEM);
+                }
+                ff_bufqueue_add(ctx, &s->q, out);
+            }
+        }
+        if (s->q.available < s->size) {
+            ff_bufqueue_add(ctx, &s->q, buf);
+            s->available++;
+        }
+        return 0;
+    }
+
+    in = ff_bufqueue_peek(&s->q, s->mid);
+
+    if (!ctx->is_disabled) {
+        ThreadData td;
+
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out) {
+            av_frame_free(&buf);
+            return AVERROR(ENOMEM);
+        }
+
+        for (i = 0; i < s->size; i++) {
+            AVFrame *frame = ff_bufqueue_peek(&s->q, i);
+
+            s->data[0][i] = frame->data[0];
+            s->data[1][i] = frame->data[1];
+            s->data[2][i] = frame->data[2];
+            s->linesize[0][i] = frame->linesize[0];
+            s->linesize[1][i] = frame->linesize[1];
+            s->linesize[2][i] = frame->linesize[2];
+        }
+
+        td.in = in; td.out = out;
+        ctx->internal->execute(ctx, s->filter_slice, &td, NULL,
+                               FFMIN3(s->planeheight[1],
+                                      s->planeheight[2],
+                                      ctx->graph->nb_threads));
+        av_frame_copy_props(out, in);
+    } else {
+        out = av_frame_clone(in);
+        if (!out) {
+            av_frame_free(&buf);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    in = ff_bufqueue_get(&s->q);
+    av_frame_free(&in);
+    ff_bufqueue_add(ctx, &s->q, buf);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    ATADenoiseContext *s = ctx->priv;
+    int ret = 0;
+
+    ret = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && !ctx->is_disabled && s->available) {
+        AVFrame *buf = av_frame_clone(ff_bufqueue_peek(&s->q, s->available));
+        if (!buf)
+            return AVERROR(ENOMEM);
+
+        ret = filter_frame(ctx->inputs[0], buf);
+        s->available--;
+    }
+
+    return ret;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    ATADenoiseContext *s = ctx->priv;
+
+    ff_bufqueue_discard_all(&s->q);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_atadenoise = {
+    .name          = "atadenoise",
+    .description   = NULL_IF_CONFIG_SMALL("Apply an Adaptive Temporal Averaging Denoiser."),
+    .priv_size     = sizeof(ATADenoiseContext),
+    .priv_class    = &atadenoise_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = inputs,
+    .outputs       = outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/vf_blend.c b/libavfilter/vf_blend.c
index 9c93baf3..ea63a200 100644
--- a/libavfilter/vf_blend.c
+++ b/libavfilter/vf_blend.c
@@ -28,68 +28,12 @@
 #include "internal.h"
 #include "dualinput.h"
 #include "video.h"
+#include "blend.h"
 
 #define TOP    0
 #define BOTTOM 1
 
-enum BlendMode {
-    BLEND_UNSET = -1,
-    BLEND_NORMAL,
-    BLEND_ADDITION,
-    BLEND_AND,
-    BLEND_AVERAGE,
-    BLEND_BURN,
-    BLEND_DARKEN,
-    BLEND_DIFFERENCE,
-    BLEND_DIFFERENCE128,
-    BLEND_DIVIDE,
-    BLEND_DODGE,
-    BLEND_EXCLUSION,
-    BLEND_HARDLIGHT,
-    BLEND_LIGHTEN,
-    BLEND_MULTIPLY,
-    BLEND_NEGATION,
-    BLEND_OR,
-    BLEND_OVERLAY,
-    BLEND_PHOENIX,
-    BLEND_PINLIGHT,
-    BLEND_REFLECT,
-    BLEND_SCREEN,
-    BLEND_SOFTLIGHT,
-    BLEND_SUBTRACT,
-    BLEND_VIVIDLIGHT,
-    BLEND_XOR,
-    BLEND_HARDMIX,
-    BLEND_LINEARLIGHT,
-    BLEND_GLOW,
-    BLEND_NB
-};
-
-static const char *const var_names[] = {   "X",   "Y",   "W",   "H",   "SW",   "SH",   "T",   "N",   "A",   "B",   "TOP",   "BOTTOM",        NULL };
-enum                                   { VAR_X, VAR_Y, VAR_W, VAR_H, VAR_SW, VAR_SH, VAR_T, VAR_N, VAR_A, VAR_B, VAR_TOP, VAR_BOTTOM, VAR_VARS_NB };
-
-typedef struct FilterParams {
-    enum BlendMode mode;
-    double opacity;
-    AVExpr *e;
-    char *expr_str;
-    void (*blend)(const uint8_t *top, int top_linesize,
-                  const uint8_t *bottom, int bottom_linesize,
-                  uint8_t *dst, int dst_linesize,
-                  int width, int start, int end,
-                  struct FilterParams *param, double *values);
-} FilterParams;
-
-typedef struct ThreadData {
-    const AVFrame *top, *bottom;
-    AVFrame *dst;
-    AVFilterLink *inlink;
-    int plane;
-    int w, h;
-    FilterParams *param;
-} ThreadData;
-
-typedef struct {
+typedef struct BlendContext {
     const AVClass *class;
     FFDualInputContext dinput;
     int hsub, vsub;             ///< chroma subsampling values
@@ -103,6 +47,18 @@ typedef struct {
     AVFrame *prev_frame;        /* only used with tblend */
 } BlendContext;
 
+static const char *const var_names[] = {   "X",   "Y",   "W",   "H",   "SW",   "SH",   "T",   "N",   "A",   "B",   "TOP",   "BOTTOM",        NULL };
+enum                                   { VAR_X, VAR_Y, VAR_W, VAR_H, VAR_SW, VAR_SH, VAR_T, VAR_N, VAR_A, VAR_B, VAR_TOP, VAR_BOTTOM, VAR_VARS_NB };
+
+typedef struct ThreadData {
+    const AVFrame *top, *bottom;
+    AVFrame *dst;
+    AVFilterLink *inlink;
+    int plane;
+    int w, h;
+    FilterParams *param;
+} ThreadData;
+
 #define COMMON_OPTIONS \
     { "c0_mode", "set component #0 blend mode", OFFSET(params[0].mode), AV_OPT_TYPE_INT, {.i64=0}, 0, BLEND_NB-1, FLAGS, "mode"},\
     { "c1_mode", "set component #1 blend mode", OFFSET(params[1].mode), AV_OPT_TYPE_INT, {.i64=0}, 0, BLEND_NB-1, FLAGS, "mode"},\
@@ -110,6 +66,7 @@ typedef struct {
     { "c3_mode", "set component #3 blend mode", OFFSET(params[3].mode), AV_OPT_TYPE_INT, {.i64=0}, 0, BLEND_NB-1, FLAGS, "mode"},\
     { "all_mode", "set blend mode for all components", OFFSET(all_mode), AV_OPT_TYPE_INT, {.i64=-1},-1, BLEND_NB-1, FLAGS, "mode"},\
     { "addition",   "", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_ADDITION},   0, 0, FLAGS, "mode" },\
+    { "addition128", "", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_ADDITION128}, 0, 0, FLAGS, "mode" },\
     { "and",        "", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_AND},        0, 0, FLAGS, "mode" },\
     { "average",    "", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_AVERAGE},    0, 0, FLAGS, "mode" },\
     { "burn",       "", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_BURN},       0, 0, FLAGS, "mode" },\
@@ -125,6 +82,7 @@ typedef struct {
     { "lighten",    "", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_LIGHTEN},    0, 0, FLAGS, "mode" },\
     { "linearlight","", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_LINEARLIGHT},0, 0, FLAGS, "mode" },\
     { "multiply",   "", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_MULTIPLY},   0, 0, FLAGS, "mode" },\
+    { "multiply128","", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_MULTIPLY128},0, 0, FLAGS, "mode" },\
     { "negation",   "", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_NEGATION},   0, 0, FLAGS, "mode" },\
     { "normal",     "", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_NORMAL},     0, 0, FLAGS, "mode" },\
     { "or",         "", 0, AV_OPT_TYPE_CONST, {.i64=BLEND_OR},         0, 0, FLAGS, "mode" },\
@@ -153,40 +111,117 @@ typedef struct {
 
 static const AVOption blend_options[] = {
     COMMON_OPTIONS,
-    { "shortest",    "force termination when the shortest input terminates", OFFSET(dinput.shortest), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
-    { "repeatlast",  "repeat last bottom frame", OFFSET(dinput.repeatlast), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS },
+    { "shortest",    "force termination when the shortest input terminates", OFFSET(dinput.shortest), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    { "repeatlast",  "repeat last bottom frame", OFFSET(dinput.repeatlast), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
     { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(blend);
 
-static void blend_normal(const uint8_t *top, int top_linesize,
-                         const uint8_t *bottom, int bottom_linesize,
-                         uint8_t *dst, int dst_linesize,
-                         int width, int start, int end,
-                         FilterParams *param, double *values)
+#define COPY(src)                                                            \
+static void blend_copy ## src(const uint8_t *top, ptrdiff_t top_linesize,    \
+                            const uint8_t *bottom, ptrdiff_t bottom_linesize,\
+                            uint8_t *dst, ptrdiff_t dst_linesize,            \
+                            ptrdiff_t width, ptrdiff_t height,               \
+                            FilterParams *param, double *values)             \
+{                                                                            \
+    av_image_copy_plane(dst, dst_linesize, src, src ## _linesize,            \
+                        width, height);                                 \
+}
+
+COPY(top)
+COPY(bottom)
+
+#undef COPY
+
+static void blend_normal_8bit(const uint8_t *top, ptrdiff_t top_linesize,
+                              const uint8_t *bottom, ptrdiff_t bottom_linesize,
+                              uint8_t *dst, ptrdiff_t dst_linesize,
+                              ptrdiff_t width, ptrdiff_t height,
+                              FilterParams *param, double *values)
 {
-    av_image_copy_plane(dst, dst_linesize, top, top_linesize, width, end - start);
+    const double opacity = param->opacity;
+    int i, j;
+
+    for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+            dst[j] = top[j] * opacity + bottom[j] * (1. - opacity);
+        }
+        dst    += dst_linesize;
+        top    += top_linesize;
+        bottom += bottom_linesize;
+    }
 }
 
-#define DEFINE_BLEND(name, expr)                                      \
-static void blend_## name(const uint8_t *top, int top_linesize,       \
-                          const uint8_t *bottom, int bottom_linesize, \
-                          uint8_t *dst, int dst_linesize,             \
-                          int width, int start, int end,              \
-                          FilterParams *param, double *values)        \
-{                                                                     \
-    double opacity = param->opacity;                                  \
-    int i, j;                                                         \
-                                                                      \
-    for (i = start; i < end; i++) {                                   \
-        for (j = 0; j < width; j++) {                                 \
-            dst[j] = top[j] + ((expr) - top[j]) * opacity;            \
-        }                                                             \
-        dst    += dst_linesize;                                       \
-        top    += top_linesize;                                       \
-        bottom += bottom_linesize;                                    \
-    }                                                                 \
+static void blend_normal_16bit(const uint8_t *_top, ptrdiff_t top_linesize,
+                                  const uint8_t *_bottom, ptrdiff_t bottom_linesize,
+                                  uint8_t *_dst, ptrdiff_t dst_linesize,
+                                  ptrdiff_t width, ptrdiff_t height,
+                                  FilterParams *param, double *values)
+{
+    const uint16_t *top = (uint16_t*)_top;
+    const uint16_t *bottom = (uint16_t*)_bottom;
+    uint16_t *dst = (uint16_t*)_dst;
+    const double opacity = param->opacity;
+    int i, j;
+    dst_linesize /= 2;
+    top_linesize /= 2;
+    bottom_linesize /= 2;
+
+    for (i = 0; i < height; i++) {
+        for (j = 0; j < width; j++) {
+            dst[j] = top[j] * opacity + bottom[j] * (1. - opacity);
+        }
+        dst    += dst_linesize;
+        top    += top_linesize;
+        bottom += bottom_linesize;
+    }
+}
+
+#define DEFINE_BLEND8(name, expr)                                              \
+static void blend_## name##_8bit(const uint8_t *top, ptrdiff_t top_linesize,         \
+                                 const uint8_t *bottom, ptrdiff_t bottom_linesize,   \
+                                 uint8_t *dst, ptrdiff_t dst_linesize,               \
+                                 ptrdiff_t width, ptrdiff_t height,                \
+                                 FilterParams *param, double *values)          \
+{                                                                              \
+    double opacity = param->opacity;                                           \
+    int i, j;                                                                  \
+                                                                               \
+    for (i = 0; i < height; i++) {                                             \
+        for (j = 0; j < width; j++) {                                          \
+            dst[j] = top[j] + ((expr) - top[j]) * opacity;                     \
+        }                                                                      \
+        dst    += dst_linesize;                                                \
+        top    += top_linesize;                                                \
+        bottom += bottom_linesize;                                             \
+    }                                                                          \
+}
+
+#define DEFINE_BLEND16(name, expr)                                             \
+static void blend_## name##_16bit(const uint8_t *_top, ptrdiff_t top_linesize,       \
+                                  const uint8_t *_bottom, ptrdiff_t bottom_linesize, \
+                                  uint8_t *_dst, ptrdiff_t dst_linesize,             \
+                                  ptrdiff_t width, ptrdiff_t height,           \
+                                  FilterParams *param, double *values)         \
+{                                                                              \
+    const uint16_t *top = (uint16_t*)_top;                                     \
+    const uint16_t *bottom = (uint16_t*)_bottom;                               \
+    uint16_t *dst = (uint16_t*)_dst;                                           \
+    double opacity = param->opacity;                                           \
+    int i, j;                                                                  \
+    dst_linesize /= 2;                                                         \
+    top_linesize /= 2;                                                         \
+    bottom_linesize /= 2;                                                      \
+                                                                               \
+    for (i = 0; i < height; i++) {                                             \
+        for (j = 0; j < width; j++) {                                          \
+            dst[j] = top[j] + ((expr) - top[j]) * opacity;                     \
+        }                                                                      \
+        dst    += dst_linesize;                                                \
+        top    += top_linesize;                                                \
+        bottom += bottom_linesize;                                             \
+    }                                                                          \
 }
 
 #define A top[j]
@@ -197,62 +232,115 @@ static void blend_## name(const uint8_t *top, int top_linesize,       \
 #define BURN(a, b)        (((a) == 0) ? (a) : FFMAX(0, 255 - ((255 - (b)) << 8) / (a)))
 #define DODGE(a, b)       (((a) == 255) ? (a) : FFMIN(255, (((b) << 8) / (255 - (a)))))
 
-DEFINE_BLEND(addition,   FFMIN(255, A + B))
-DEFINE_BLEND(average,    (A + B) / 2)
-DEFINE_BLEND(subtract,   FFMAX(0, A - B))
-DEFINE_BLEND(multiply,   MULTIPLY(1, A, B))
-DEFINE_BLEND(negation,   255 - FFABS(255 - A - B))
-DEFINE_BLEND(difference, FFABS(A - B))
-DEFINE_BLEND(difference128, av_clip_uint8(128 + A - B))
-DEFINE_BLEND(screen,     SCREEN(1, A, B))
-DEFINE_BLEND(overlay,    (A < 128) ? MULTIPLY(2, A, B) : SCREEN(2, A, B))
-DEFINE_BLEND(hardlight,  (B < 128) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
-DEFINE_BLEND(hardmix,    (A < (255 - B)) ? 0: 255)
-DEFINE_BLEND(darken,     FFMIN(A, B))
-DEFINE_BLEND(lighten,    FFMAX(A, B))
-DEFINE_BLEND(divide,     av_clip_uint8(((float)A / ((float)B) * 255)))
-DEFINE_BLEND(dodge,      DODGE(A, B))
-DEFINE_BLEND(burn,       BURN(A, B))
-DEFINE_BLEND(softlight,  (A > 127) ? B + (255 - B) * (A - 127.5) / 127.5 * (0.5 - FFABS(B - 127.5) / 255): B - B * ((127.5 - A) / 127.5) * (0.5 - FFABS(B - 127.5)/255))
-DEFINE_BLEND(exclusion,  A + B - 2 * A * B / 255)
-DEFINE_BLEND(pinlight,   (B < 128) ? FFMIN(A, 2 * B) : FFMAX(A, 2 * (B - 128)))
-DEFINE_BLEND(phoenix,    FFMIN(A, B) - FFMAX(A, B) + 255)
-DEFINE_BLEND(reflect,    (B == 255) ? B : FFMIN(255, (A * A / (255 - B))))
-DEFINE_BLEND(glow,       (A == 255) ? A : FFMIN(255, (B * B / (255 - A))))
-DEFINE_BLEND(and,        A & B)
-DEFINE_BLEND(or,         A | B)
-DEFINE_BLEND(xor,        A ^ B)
-DEFINE_BLEND(vividlight, (A < 128) ? BURN(2 * A, B) : DODGE(2 * (A - 128), B))
-DEFINE_BLEND(linearlight,av_clip_uint8((B < 128) ? B + 2 * A - 255 : B + 2 * (A - 128)))
-
-static void blend_expr(const uint8_t *top, int top_linesize,
-                       const uint8_t *bottom, int bottom_linesize,
-                       uint8_t *dst, int dst_linesize,
-                       int width, int start, int end,
-                       FilterParams *param, double *values)
-{
-    AVExpr *e = param->e;
-    int y, x;
-
-    for (y = start; y < end; y++) {
-        values[VAR_Y] = y;
-        for (x = 0; x < width; x++) {
-            values[VAR_X]      = x;
-            values[VAR_TOP]    = values[VAR_A] = top[x];
-            values[VAR_BOTTOM] = values[VAR_B] = bottom[x];
-            dst[x] = av_expr_eval(e, values, NULL);
-        }
-        dst    += dst_linesize;
-        top    += top_linesize;
-        bottom += bottom_linesize;
-    }
+DEFINE_BLEND8(addition,   FFMIN(255, A + B))
+DEFINE_BLEND8(addition128, av_clip_uint8(A + B - 128))
+DEFINE_BLEND8(average,    (A + B) / 2)
+DEFINE_BLEND8(subtract,   FFMAX(0, A - B))
+DEFINE_BLEND8(multiply,   MULTIPLY(1, A, B))
+DEFINE_BLEND8(multiply128,av_clip_uint8((A - 128) * B / 32. + 128))
+DEFINE_BLEND8(negation,   255 - FFABS(255 - A - B))
+DEFINE_BLEND8(difference, FFABS(A - B))
+DEFINE_BLEND8(difference128, av_clip_uint8(128 + A - B))
+DEFINE_BLEND8(screen,     SCREEN(1, A, B))
+DEFINE_BLEND8(overlay,    (A < 128) ? MULTIPLY(2, A, B) : SCREEN(2, A, B))
+DEFINE_BLEND8(hardlight,  (B < 128) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
+DEFINE_BLEND8(hardmix,    (A < (255 - B)) ? 0: 255)
+DEFINE_BLEND8(darken,     FFMIN(A, B))
+DEFINE_BLEND8(lighten,    FFMAX(A, B))
+DEFINE_BLEND8(divide,     av_clip_uint8(B == 0 ? 255 : 255 * A / B))
+DEFINE_BLEND8(dodge,      DODGE(A, B))
+DEFINE_BLEND8(burn,       BURN(A, B))
+DEFINE_BLEND8(softlight,  (A > 127) ? B + (255 - B) * (A - 127.5) / 127.5 * (0.5 - fabs(B - 127.5) / 255): B - B * ((127.5 - A) / 127.5) * (0.5 - fabs(B - 127.5)/255))
+DEFINE_BLEND8(exclusion,  A + B - 2 * A * B / 255)
+DEFINE_BLEND8(pinlight,   (B < 128) ? FFMIN(A, 2 * B) : FFMAX(A, 2 * (B - 128)))
+DEFINE_BLEND8(phoenix,    FFMIN(A, B) - FFMAX(A, B) + 255)
+DEFINE_BLEND8(reflect,    (B == 255) ? B : FFMIN(255, (A * A / (255 - B))))
+DEFINE_BLEND8(glow,       (A == 255) ? A : FFMIN(255, (B * B / (255 - A))))
+DEFINE_BLEND8(and,        A & B)
+DEFINE_BLEND8(or,         A | B)
+DEFINE_BLEND8(xor,        A ^ B)
+DEFINE_BLEND8(vividlight, (A < 128) ? BURN(2 * A, B) : DODGE(2 * (A - 128), B))
+DEFINE_BLEND8(linearlight,av_clip_uint8((B < 128) ? B + 2 * A - 255 : B + 2 * (A - 128)))
+
+#undef MULTIPLY
+#undef SCREEN
+#undef BURN
+#undef DODGE
+
+#define MULTIPLY(x, a, b) ((x) * (((a) * (b)) / 65535))
+#define SCREEN(x, a, b)   (65535 - (x) * ((65535 - (a)) * (65535 - (b)) / 65535))
+#define BURN(a, b)        (((a) == 0) ? (a) : FFMAX(0, 65535 - ((65535 - (b)) << 16) / (a)))
+#define DODGE(a, b)       (((a) == 65535) ? (a) : FFMIN(65535, (((b) << 16) / (65535 - (a)))))
+
+DEFINE_BLEND16(addition,   FFMIN(65535, A + B))
+DEFINE_BLEND16(addition128, av_clip_uint16(A + B - 32768))
+DEFINE_BLEND16(average,    (A + B) / 2)
+DEFINE_BLEND16(subtract,   FFMAX(0, A - B))
+DEFINE_BLEND16(multiply,   MULTIPLY(1, A, B))
+DEFINE_BLEND16(multiply128, av_clip_uint16((A - 32768) * B / 8192. + 32768))
+DEFINE_BLEND16(negation,   65535 - FFABS(65535 - A - B))
+DEFINE_BLEND16(difference, FFABS(A - B))
+DEFINE_BLEND16(difference128, av_clip_uint16(32768 + A - B))
+DEFINE_BLEND16(screen,     SCREEN(1, A, B))
+DEFINE_BLEND16(overlay,    (A < 32768) ? MULTIPLY(2, A, B) : SCREEN(2, A, B))
+DEFINE_BLEND16(hardlight,  (B < 32768) ? MULTIPLY(2, B, A) : SCREEN(2, B, A))
+DEFINE_BLEND16(hardmix,    (A < (65535 - B)) ? 0: 65535)
+DEFINE_BLEND16(darken,     FFMIN(A, B))
+DEFINE_BLEND16(lighten,    FFMAX(A, B))
+DEFINE_BLEND16(divide,     av_clip_uint16(B == 0 ? 65535 : 65535 * A / B))
+DEFINE_BLEND16(dodge,      DODGE(A, B))
+DEFINE_BLEND16(burn,       BURN(A, B))
+DEFINE_BLEND16(softlight,  (A > 32767) ? B + (65535 - B) * (A - 32767.5) / 32767.5 * (0.5 - fabs(B - 32767.5) / 65535): B - B * ((32767.5 - A) / 32767.5) * (0.5 - fabs(B - 32767.5)/65535))
+DEFINE_BLEND16(exclusion,  A + B - 2 * A * B / 65535)
+DEFINE_BLEND16(pinlight,   (B < 32768) ? FFMIN(A, 2 * B) : FFMAX(A, 2 * (B - 32768)))
+DEFINE_BLEND16(phoenix,    FFMIN(A, B) - FFMAX(A, B) + 65535)
+DEFINE_BLEND16(reflect,    (B == 65535) ? B : FFMIN(65535, (A * A / (65535 - B))))
+DEFINE_BLEND16(glow,       (A == 65535) ? A : FFMIN(65535, (B * B / (65535 - A))))
+DEFINE_BLEND16(and,        A & B)
+DEFINE_BLEND16(or,         A | B)
+DEFINE_BLEND16(xor,        A ^ B)
+DEFINE_BLEND16(vividlight, (A < 32768) ? BURN(2 * A, B) : DODGE(2 * (A - 32768), B))
+DEFINE_BLEND16(linearlight,av_clip_uint16((B < 32768) ? B + 2 * A - 65535 : B + 2 * (A - 32768)))
+
+#define DEFINE_BLEND_EXPR(type, name, div)                                     \
+static void blend_expr_## name(const uint8_t *_top, ptrdiff_t top_linesize,          \
+                               const uint8_t *_bottom, ptrdiff_t bottom_linesize,    \
+                               uint8_t *_dst, ptrdiff_t dst_linesize,                \
+                               ptrdiff_t width, ptrdiff_t height,              \
+                               FilterParams *param, double *values)            \
+{                                                                              \
+    const type *top = (type*)_top;                                             \
+    const type *bottom = (type*)_bottom;                                       \
+    type *dst = (type*)_dst;                                                   \
+    AVExpr *e = param->e;                                                      \
+    int y, x;                                                                  \
+    dst_linesize /= div;                                                       \
+    top_linesize /= div;                                                       \
+    bottom_linesize /= div;                                                    \
+                                                                               \
+    for (y = 0; y < height; y++) {                                             \
+        values[VAR_Y] = y;                                                     \
+        for (x = 0; x < width; x++) {                                          \
+            values[VAR_X]      = x;                                            \
+            values[VAR_TOP]    = values[VAR_A] = top[x];                       \
+            values[VAR_BOTTOM] = values[VAR_B] = bottom[x];                    \
+            dst[x] = av_expr_eval(e, values, NULL);                            \
+        }                                                                      \
+        dst    += dst_linesize;                                                \
+        top    += top_linesize;                                                \
+        bottom += bottom_linesize;                                             \
+    }                                                                          \
 }
 
+DEFINE_BLEND_EXPR(uint8_t, 8bit, 1)
+DEFINE_BLEND_EXPR(uint16_t, 16bit, 2)
+
 static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
     ThreadData *td = arg;
     int slice_start = (td->h *  jobnr   ) / nb_jobs;
     int slice_end   = (td->h * (jobnr+1)) / nb_jobs;
+    int height      = slice_end - slice_start;
     const uint8_t *top    = td->top->data[td->plane];
     const uint8_t *bottom = td->bottom->data[td->plane];
     uint8_t *dst    = td->dst->data[td->plane];
@@ -271,14 +359,14 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
                      td->bottom->linesize[td->plane],
                      dst + slice_start * td->dst->linesize[td->plane],
                      td->dst->linesize[td->plane],
-                     td->w, slice_start, slice_end, td->param, &values[0]);
+                     td->w, height, td->param, &values[0]);
     return 0;
 }
 
 static AVFrame *blend_frame(AVFilterContext *ctx, AVFrame *top_buf,
                             const AVFrame *bottom_buf)
 {
-    BlendContext *b = ctx->priv;
+    BlendContext *s = ctx->priv;
     AVFilterLink *inlink = ctx->inputs[0];
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *dst_buf;
@@ -289,12 +377,12 @@ static AVFrame *blend_frame(AVFilterContext *ctx, AVFrame *top_buf,
         return top_buf;
     av_frame_copy_props(dst_buf, top_buf);
 
-    for (plane = 0; plane < b->nb_planes; plane++) {
-        int hsub = plane == 1 || plane == 2 ? b->hsub : 0;
-        int vsub = plane == 1 || plane == 2 ? b->vsub : 0;
-        int outw = FF_CEIL_RSHIFT(dst_buf->width,  hsub);
-        int outh = FF_CEIL_RSHIFT(dst_buf->height, vsub);
-        FilterParams *param = &b->params[plane];
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        int hsub = plane == 1 || plane == 2 ? s->hsub : 0;
+        int vsub = plane == 1 || plane == 2 ? s->vsub : 0;
+        int outw = AV_CEIL_RSHIFT(dst_buf->width,  hsub);
+        int outh = AV_CEIL_RSHIFT(dst_buf->height, vsub);
+        FilterParams *param = &s->params[plane];
         ThreadData td = { .top = top_buf, .bottom = bottom_buf, .dst = dst_buf,
                           .w = outw, .h = outh, .param = param, .plane = plane,
                           .inlink = inlink };
@@ -302,7 +390,7 @@ static AVFrame *blend_frame(AVFilterContext *ctx, AVFrame *top_buf,
         ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(outh, ctx->graph->nb_threads));
     }
 
-    if (!b->tblend)
+    if (!s->tblend)
         av_frame_free(&top_buf);
 
     return dst_buf;
@@ -310,65 +398,11 @@ static AVFrame *blend_frame(AVFilterContext *ctx, AVFrame *top_buf,
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    BlendContext *b = ctx->priv;
-    int ret, plane;
-
-    b->tblend = !strcmp(ctx->filter->name, "tblend");
-
-    for (plane = 0; plane < FF_ARRAY_ELEMS(b->params); plane++) {
-        FilterParams *param = &b->params[plane];
-
-        if (b->all_mode >= 0)
-            param->mode = b->all_mode;
-        if (b->all_opacity < 1)
-            param->opacity = b->all_opacity;
-
-        switch (param->mode) {
-        case BLEND_ADDITION:   param->blend = blend_addition;   break;
-        case BLEND_AND:        param->blend = blend_and;        break;
-        case BLEND_AVERAGE:    param->blend = blend_average;    break;
-        case BLEND_BURN:       param->blend = blend_burn;       break;
-        case BLEND_DARKEN:     param->blend = blend_darken;     break;
-        case BLEND_DIFFERENCE: param->blend = blend_difference; break;
-        case BLEND_DIFFERENCE128: param->blend = blend_difference128; break;
-        case BLEND_DIVIDE:     param->blend = blend_divide;     break;
-        case BLEND_DODGE:      param->blend = blend_dodge;      break;
-        case BLEND_EXCLUSION:  param->blend = blend_exclusion;  break;
-        case BLEND_GLOW:       param->blend = blend_glow;       break;
-        case BLEND_HARDLIGHT:  param->blend = blend_hardlight;  break;
-        case BLEND_HARDMIX:    param->blend = blend_hardmix;    break;
-        case BLEND_LIGHTEN:    param->blend = blend_lighten;    break;
-        case BLEND_LINEARLIGHT:param->blend = blend_linearlight;break;
-        case BLEND_MULTIPLY:   param->blend = blend_multiply;   break;
-        case BLEND_NEGATION:   param->blend = blend_negation;   break;
-        case BLEND_NORMAL:     param->blend = blend_normal;     break;
-        case BLEND_OR:         param->blend = blend_or;         break;
-        case BLEND_OVERLAY:    param->blend = blend_overlay;    break;
-        case BLEND_PHOENIX:    param->blend = blend_phoenix;    break;
-        case BLEND_PINLIGHT:   param->blend = blend_pinlight;   break;
-        case BLEND_REFLECT:    param->blend = blend_reflect;    break;
-        case BLEND_SCREEN:     param->blend = blend_screen;     break;
-        case BLEND_SOFTLIGHT:  param->blend = blend_softlight;  break;
-        case BLEND_SUBTRACT:   param->blend = blend_subtract;   break;
-        case BLEND_VIVIDLIGHT: param->blend = blend_vividlight; break;
-        case BLEND_XOR:        param->blend = blend_xor;        break;
-        }
+    BlendContext *s = ctx->priv;
 
-        if (b->all_expr && !param->expr_str) {
-            param->expr_str = av_strdup(b->all_expr);
-            if (!param->expr_str)
-                return AVERROR(ENOMEM);
-        }
-        if (param->expr_str) {
-            ret = av_expr_parse(&param->e, param->expr_str, var_names,
-                                NULL, NULL, NULL, NULL, 0, ctx);
-            if (ret < 0)
-                return ret;
-            param->blend = blend_expr;
-        }
-    }
+    s->tblend = !strcmp(ctx->filter->name, "tblend");
 
-    b->dinput.process = blend_frame;
+    s->dinput.process = blend_frame;
     return 0;
 }
 
@@ -378,7 +412,11 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA420P,
         AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ422P,AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ411P,
         AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
-        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_GBRP16, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_NONE
     };
 
     AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
@@ -389,45 +427,91 @@ static int query_formats(AVFilterContext *ctx)
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    BlendContext *b = ctx->priv;
+    BlendContext *s = ctx->priv;
     int i;
 
-    ff_dualinput_uninit(&b->dinput);
-    av_frame_free(&b->prev_frame);
+    ff_dualinput_uninit(&s->dinput);
+    av_frame_free(&s->prev_frame);
 
-    for (i = 0; i < FF_ARRAY_ELEMS(b->params); i++)
-        av_expr_free(b->params[i].e);
+    for (i = 0; i < FF_ARRAY_ELEMS(s->params); i++)
+        av_expr_free(s->params[i].e);
 }
 
-#if CONFIG_BLEND_FILTER
+void ff_blend_init(FilterParams *param, int is_16bit)
+{
+    switch (param->mode) {
+    case BLEND_ADDITION:   param->blend = is_16bit ? blend_addition_16bit   : blend_addition_8bit;   break;
+    case BLEND_ADDITION128: param->blend = is_16bit ? blend_addition128_16bit : blend_addition128_8bit; break;
+    case BLEND_AND:        param->blend = is_16bit ? blend_and_16bit        : blend_and_8bit;        break;
+    case BLEND_AVERAGE:    param->blend = is_16bit ? blend_average_16bit    : blend_average_8bit;    break;
+    case BLEND_BURN:       param->blend = is_16bit ? blend_burn_16bit       : blend_burn_8bit;       break;
+    case BLEND_DARKEN:     param->blend = is_16bit ? blend_darken_16bit     : blend_darken_8bit;     break;
+    case BLEND_DIFFERENCE: param->blend = is_16bit ? blend_difference_16bit : blend_difference_8bit; break;
+    case BLEND_DIFFERENCE128: param->blend = is_16bit ? blend_difference128_16bit: blend_difference128_8bit; break;
+    case BLEND_DIVIDE:     param->blend = is_16bit ? blend_divide_16bit     : blend_divide_8bit;     break;
+    case BLEND_DODGE:      param->blend = is_16bit ? blend_dodge_16bit      : blend_dodge_8bit;      break;
+    case BLEND_EXCLUSION:  param->blend = is_16bit ? blend_exclusion_16bit  : blend_exclusion_8bit;  break;
+    case BLEND_GLOW:       param->blend = is_16bit ? blend_glow_16bit       : blend_glow_8bit;       break;
+    case BLEND_HARDLIGHT:  param->blend = is_16bit ? blend_hardlight_16bit  : blend_hardlight_8bit;  break;
+    case BLEND_HARDMIX:    param->blend = is_16bit ? blend_hardmix_16bit    : blend_hardmix_8bit;    break;
+    case BLEND_LIGHTEN:    param->blend = is_16bit ? blend_lighten_16bit    : blend_lighten_8bit;    break;
+    case BLEND_LINEARLIGHT:param->blend = is_16bit ? blend_linearlight_16bit: blend_linearlight_8bit;break;
+    case BLEND_MULTIPLY:   param->blend = is_16bit ? blend_multiply_16bit   : blend_multiply_8bit;   break;
+    case BLEND_MULTIPLY128:param->blend = is_16bit ? blend_multiply128_16bit: blend_multiply128_8bit;break;
+    case BLEND_NEGATION:   param->blend = is_16bit ? blend_negation_16bit   : blend_negation_8bit;   break;
+    case BLEND_NORMAL:     param->blend = param->opacity == 1 ? blend_copytop :
+                                          param->opacity == 0 ? blend_copybottom :
+                                          is_16bit ? blend_normal_16bit     : blend_normal_8bit;     break;
+    case BLEND_OR:         param->blend = is_16bit ? blend_or_16bit         : blend_or_8bit;         break;
+    case BLEND_OVERLAY:    param->blend = is_16bit ? blend_overlay_16bit    : blend_overlay_8bit;    break;
+    case BLEND_PHOENIX:    param->blend = is_16bit ? blend_phoenix_16bit    : blend_phoenix_8bit;    break;
+    case BLEND_PINLIGHT:   param->blend = is_16bit ? blend_pinlight_16bit   : blend_pinlight_8bit;   break;
+    case BLEND_REFLECT:    param->blend = is_16bit ? blend_reflect_16bit    : blend_reflect_8bit;    break;
+    case BLEND_SCREEN:     param->blend = is_16bit ? blend_screen_16bit     : blend_screen_8bit;     break;
+    case BLEND_SOFTLIGHT:  param->blend = is_16bit ? blend_softlight_16bit  : blend_softlight_8bit;  break;
+    case BLEND_SUBTRACT:   param->blend = is_16bit ? blend_subtract_16bit   : blend_subtract_8bit;   break;
+    case BLEND_VIVIDLIGHT: param->blend = is_16bit ? blend_vividlight_16bit : blend_vividlight_8bit; break;
+    case BLEND_XOR:        param->blend = is_16bit ? blend_xor_16bit        : blend_xor_8bit;        break;
+    }
+
+    if (param->opacity == 0 && param->mode != BLEND_NORMAL) {
+        param->blend = blend_copytop;
+    }
+
+    if (ARCH_X86)
+        ff_blend_init_x86(param, is_16bit);
+}
 
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     AVFilterLink *toplink = ctx->inputs[TOP];
-    AVFilterLink *bottomlink = ctx->inputs[BOTTOM];
-    BlendContext *b = ctx->priv;
+    BlendContext *s = ctx->priv;
     const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(toplink->format);
-    int ret;
+    int ret, plane, is_16bit;
 
-    if (toplink->format != bottomlink->format) {
-        av_log(ctx, AV_LOG_ERROR, "inputs must be of same pixel format\n");
-        return AVERROR(EINVAL);
-    }
-    if (toplink->w                       != bottomlink->w ||
-        toplink->h                       != bottomlink->h ||
-        toplink->sample_aspect_ratio.num != bottomlink->sample_aspect_ratio.num ||
-        toplink->sample_aspect_ratio.den != bottomlink->sample_aspect_ratio.den) {
-        av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
-               "(size %dx%d, SAR %d:%d) do not match the corresponding "
-               "second input link %s parameters (%dx%d, SAR %d:%d)\n",
-               ctx->input_pads[TOP].name, toplink->w, toplink->h,
-               toplink->sample_aspect_ratio.num,
-               toplink->sample_aspect_ratio.den,
-               ctx->input_pads[BOTTOM].name, bottomlink->w, bottomlink->h,
-               bottomlink->sample_aspect_ratio.num,
-               bottomlink->sample_aspect_ratio.den);
-        return AVERROR(EINVAL);
+    if (!s->tblend) {
+        AVFilterLink *bottomlink = ctx->inputs[BOTTOM];
+
+        if (toplink->format != bottomlink->format) {
+            av_log(ctx, AV_LOG_ERROR, "inputs must be of same pixel format\n");
+            return AVERROR(EINVAL);
+        }
+        if (toplink->w                       != bottomlink->w ||
+            toplink->h                       != bottomlink->h ||
+            toplink->sample_aspect_ratio.num != bottomlink->sample_aspect_ratio.num ||
+            toplink->sample_aspect_ratio.den != bottomlink->sample_aspect_ratio.den) {
+            av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
+                   "(size %dx%d, SAR %d:%d) do not match the corresponding "
+                   "second input link %s parameters (%dx%d, SAR %d:%d)\n",
+                   ctx->input_pads[TOP].name, toplink->w, toplink->h,
+                   toplink->sample_aspect_ratio.num,
+                   toplink->sample_aspect_ratio.den,
+                   ctx->input_pads[BOTTOM].name, bottomlink->w, bottomlink->h,
+                   bottomlink->sample_aspect_ratio.num,
+                   bottomlink->sample_aspect_ratio.den);
+            return AVERROR(EINVAL);
+        }
     }
 
     outlink->w = toplink->w;
@@ -436,26 +520,55 @@ static int config_output(AVFilterLink *outlink)
     outlink->sample_aspect_ratio = toplink->sample_aspect_ratio;
     outlink->frame_rate = toplink->frame_rate;
 
-    b->hsub = pix_desc->log2_chroma_w;
-    b->vsub = pix_desc->log2_chroma_h;
-    b->nb_planes = av_pix_fmt_count_planes(toplink->format);
+    s->hsub = pix_desc->log2_chroma_w;
+    s->vsub = pix_desc->log2_chroma_h;
+
+    is_16bit = pix_desc->comp[0].depth == 16;
+    s->nb_planes = av_pix_fmt_count_planes(toplink->format);
 
-    if ((ret = ff_dualinput_init(ctx, &b->dinput)) < 0)
-        return ret;
+    if (!s->tblend)
+        if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0)
+            return ret;
+
+    for (plane = 0; plane < FF_ARRAY_ELEMS(s->params); plane++) {
+        FilterParams *param = &s->params[plane];
+
+        if (s->all_mode >= 0)
+            param->mode = s->all_mode;
+        if (s->all_opacity < 1)
+            param->opacity = s->all_opacity;
+
+        ff_blend_init(param, is_16bit);
+
+        if (s->all_expr && !param->expr_str) {
+            param->expr_str = av_strdup(s->all_expr);
+            if (!param->expr_str)
+                return AVERROR(ENOMEM);
+        }
+        if (param->expr_str) {
+            ret = av_expr_parse(&param->e, param->expr_str, var_names,
+                                NULL, NULL, NULL, NULL, 0, ctx);
+            if (ret < 0)
+                return ret;
+            param->blend = is_16bit? blend_expr_16bit : blend_expr_8bit;
+        }
+    }
 
     return 0;
 }
 
+#if CONFIG_BLEND_FILTER
+
 static int request_frame(AVFilterLink *outlink)
 {
-    BlendContext *b = outlink->src->priv;
-    return ff_dualinput_request_frame(&b->dinput, outlink);
+    BlendContext *s = outlink->src->priv;
+    return ff_dualinput_request_frame(&s->dinput, outlink);
 }
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
 {
-    BlendContext *b = inlink->dst->priv;
-    return ff_dualinput_filter_frame(&b->dinput, inlink, buf);
+    BlendContext *s = inlink->dst->priv;
+    return ff_dualinput_filter_frame(&s->dinput, inlink, buf);
 }
 
 static const AVFilterPad blend_inputs[] = {
@@ -498,33 +611,18 @@ AVFilter ff_vf_blend = {
 
 #if CONFIG_TBLEND_FILTER
 
-static int tblend_config_output(AVFilterLink *outlink)
-{
-    AVFilterContext *ctx = outlink->src;
-    AVFilterLink *inlink = ctx->inputs[0];
-    BlendContext *b = ctx->priv;
-    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
-
-    b->hsub = pix_desc->log2_chroma_w;
-    b->vsub = pix_desc->log2_chroma_h;
-    b->nb_planes = av_pix_fmt_count_planes(inlink->format);
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
-
-    return 0;
-}
-
 static int tblend_filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
-    BlendContext *b = inlink->dst->priv;
+    BlendContext *s = inlink->dst->priv;
     AVFilterLink *outlink = inlink->dst->outputs[0];
 
-    if (b->prev_frame) {
-        AVFrame *out = blend_frame(inlink->dst, frame, b->prev_frame);
-        av_frame_free(&b->prev_frame);
-        b->prev_frame = frame;
+    if (s->prev_frame) {
+        AVFrame *out = blend_frame(inlink->dst, frame, s->prev_frame);
+        av_frame_free(&s->prev_frame);
+        s->prev_frame = frame;
         return ff_filter_frame(outlink, out);
     }
-    b->prev_frame = frame;
+    s->prev_frame = frame;
     return 0;
 }
 
@@ -548,7 +646,7 @@ static const AVFilterPad tblend_outputs[] = {
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
-        .config_props  = tblend_config_output,
+        .config_props  = config_output,
     },
     { NULL }
 };
diff --git a/libavfilter/vf_boxblur.c b/libavfilter/vf_boxblur.c
index 17db949b..8e439868 100644
--- a/libavfilter/vf_boxblur.c
+++ b/libavfilter/vf_boxblur.c
@@ -118,14 +118,15 @@ static av_cold void uninit(AVFilterContext *ctx)
 static int query_formats(AVFilterContext *ctx)
 {
     AVFilterFormats *formats = NULL;
-    int fmt;
+    int fmt, ret;
 
     for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
         if (!(desc->flags & (AV_PIX_FMT_FLAG_HWACCEL | AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_PAL)) &&
             (desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) &&
-            (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth_minus1 == 7))
-            ff_add_format(&formats, fmt);
+            (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) &&
+            (ret = ff_add_format(&formats, fmt)) < 0)
+            return ret;
     }
 
     return ff_set_common_formats(ctx, formats);
@@ -203,75 +204,53 @@ static int config_input(AVFilterLink *inlink)
     return 0;
 }
 
-static inline void blur8(uint8_t *dst, int dst_step, const uint8_t *src, int src_step,
-                        int len, int radius)
-{
-    /* Naive boxblur would sum source pixels from x-radius .. x+radius
-     * for destination pixel x. That would be O(radius*width).
-     * If you now look at what source pixels represent 2 consecutive
-     * output pixels, then you see they are almost identical and only
-     * differ by 2 pixels, like:
-     * src0       111111111
-     * dst0           1
-     * src1        111111111
-     * dst1            1
-     * src0-src1  1       -1
-     * so when you know one output pixel you can find the next by just adding
-     * and subtracting 1 input pixel.
-     * The following code adopts this faster variant.
-     */
-    const int length = radius*2 + 1;
-    const int inv = ((1<<16) + length/2)/length;
-    int x, sum = src[radius*src_step];
-
-    for (x = 0; x < radius; x++)
-        sum += src[x*src_step]<<1;
-
-    sum = sum*inv + (1<<15);
-
-    for (x = 0; x <= radius; x++) {
-        sum += (src[(radius+x)*src_step] - src[(radius-x)*src_step])*inv;
-        dst[x*dst_step] = sum>>16;
-    }
-
-    for (; x < len-radius; x++) {
-        sum += (src[(radius+x)*src_step] - src[(x-radius-1)*src_step])*inv;
-        dst[x*dst_step] = sum >>16;
-    }
-
-    for (; x < len; x++) {
-        sum += (src[(2*len-radius-x-1)*src_step] - src[(x-radius-1)*src_step])*inv;
-        dst[x*dst_step] = sum>>16;
-    }
+/* Naive boxblur would sum source pixels from x-radius .. x+radius
+ * for destination pixel x. That would be O(radius*width).
+ * If you now look at what source pixels represent 2 consecutive
+ * output pixels, then you see they are almost identical and only
+ * differ by 2 pixels, like:
+ * src0       111111111
+ * dst0           1
+ * src1        111111111
+ * dst1            1
+ * src0-src1  1       -1
+ * so when you know one output pixel you can find the next by just adding
+ * and subtracting 1 input pixel.
+ * The following code adopts this faster variant.
+ */
+#define BLUR(type, depth)                                                   \
+static inline void blur ## depth(type *dst, int dst_step, const type *src,  \
+                                 int src_step, int len, int radius)         \
+{                                                                           \
+    const int length = radius*2 + 1;                                        \
+    const int inv = ((1<<16) + length/2)/length;                            \
+    int x, sum = src[radius*src_step];                                      \
+                                                                            \
+    for (x = 0; x < radius; x++)                                            \
+        sum += src[x*src_step]<<1;                                          \
+                                                                            \
+    sum = sum*inv + (1<<15);                                                \
+                                                                            \
+    for (x = 0; x <= radius; x++) {                                         \
+        sum += (src[(radius+x)*src_step] - src[(radius-x)*src_step])*inv;   \
+        dst[x*dst_step] = sum>>16;                                          \
+    }                                                                       \
+                                                                            \
+    for (; x < len-radius; x++) {                                           \
+        sum += (src[(radius+x)*src_step] - src[(x-radius-1)*src_step])*inv; \
+        dst[x*dst_step] = sum >>16;                                         \
+    }                                                                       \
+                                                                            \
+    for (; x < len; x++) {                                                  \
+        sum += (src[(2*len-radius-x-1)*src_step] - src[(x-radius-1)*src_step])*inv; \
+        dst[x*dst_step] = sum>>16;                                          \
+    }                                                                       \
 }
 
-static inline void blur16(uint16_t *dst, int dst_step, const uint16_t *src, int src_step,
-                          int len, int radius)
-{
-    const int length = radius*2 + 1;
-    const int inv = ((1<<16) + length/2)/length;
-    int x, sum = src[radius*src_step];
-
-    for (x = 0; x < radius; x++)
-        sum += src[x*src_step]<<1;
-
-    sum = sum*inv + (1<<15);
-
-    for (x = 0; x <= radius; x++) {
-        sum += (src[(radius+x)*src_step] - src[(radius-x)*src_step])*inv;
-        dst[x*dst_step] = sum>>16;
-    }
-
-    for (; x < len-radius; x++) {
-        sum += (src[(radius+x)*src_step] - src[(x-radius-1)*src_step])*inv;
-        dst[x*dst_step] = sum >>16;
-    }
+BLUR(uint8_t,   8)
+BLUR(uint16_t, 16)
 
-    for (; x < len; x++) {
-        sum += (src[(2*len-radius-x-1)*src_step] - src[(x-radius-1)*src_step])*inv;
-        dst[x*dst_step] = sum>>16;
-    }
-}
+#undef BLUR
 
 static inline void blur(uint8_t *dst, int dst_step, const uint8_t *src, int src_step,
                         int len, int radius, int pixsize)
@@ -347,11 +326,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     AVFilterLink *outlink = inlink->dst->outputs[0];
     AVFrame *out;
     int plane;
-    int cw = FF_CEIL_RSHIFT(inlink->w, s->hsub), ch = FF_CEIL_RSHIFT(in->height, s->vsub);
+    int cw = AV_CEIL_RSHIFT(inlink->w, s->hsub), ch = AV_CEIL_RSHIFT(in->height, s->vsub);
     int w[4] = { inlink->w, cw, cw, inlink->w };
     int h[4] = { in->height, ch, ch, in->height };
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
-    const int depth = desc->comp[0].depth_minus1 + 1;
+    const int depth = desc->comp[0].depth;
     const int pixsize = (depth+7)/8;
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
diff --git a/libavfilter/vf_chromakey.c b/libavfilter/vf_chromakey.c
new file mode 100644
index 00000000..33097480
--- /dev/null
+++ b/libavfilter/vf_chromakey.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2015 Timo Rothenpieler <timo@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/imgutils.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct ChromakeyContext {
+    const AVClass *class;
+
+    uint8_t chromakey_rgba[4];
+    uint8_t chromakey_uv[2];
+
+    float similarity;
+    float blend;
+
+    int is_yuv;
+
+    int hsub_log2;
+    int vsub_log2;
+} ChromakeyContext;
+
+static uint8_t do_chromakey_pixel(ChromakeyContext *ctx, uint8_t u[9], uint8_t v[9])
+{
+    double diff = 0.0;
+    int du, dv, i;
+
+    for (i = 0; i < 9; ++i) {
+        du = (int)u[i] - ctx->chromakey_uv[0];
+        dv = (int)v[i] - ctx->chromakey_uv[1];
+
+        diff += sqrt((du * du + dv * dv) / (255.0 * 255.0));
+    }
+
+    diff /= 9.0;
+
+    if (ctx->blend > 0.0001) {
+        return av_clipd((diff - ctx->similarity) / ctx->blend, 0.0, 1.0) * 255.0;
+    } else {
+        return (diff > ctx->similarity) ? 255 : 0;
+    }
+}
+
+static av_always_inline void get_pixel_uv(AVFrame *frame, int hsub_log2, int vsub_log2, int x, int y, uint8_t *u, uint8_t *v)
+{
+    if (x < 0 || x >= frame->width || y < 0 || y >= frame->height)
+        return;
+
+    x >>= hsub_log2;
+    y >>= vsub_log2;
+
+    *u = frame->data[1][frame->linesize[1] * y + x];
+    *v = frame->data[2][frame->linesize[2] * y + x];
+}
+
+static int do_chromakey_slice(AVFilterContext *avctx, void *arg, int jobnr, int nb_jobs)
+{
+    AVFrame *frame = arg;
+
+    const int slice_start = (frame->height * jobnr) / nb_jobs;
+    const int slice_end = (frame->height * (jobnr + 1)) / nb_jobs;
+
+    ChromakeyContext *ctx = avctx->priv;
+
+    int x, y, xo, yo;
+    uint8_t u[9], v[9];
+
+    memset(u, ctx->chromakey_uv[0], sizeof(u));
+    memset(v, ctx->chromakey_uv[1], sizeof(v));
+
+    for (y = slice_start; y < slice_end; ++y) {
+        for (x = 0; x < frame->width; ++x) {
+            for (yo = 0; yo < 3; ++yo) {
+                for (xo = 0; xo < 3; ++xo) {
+                    get_pixel_uv(frame, ctx->hsub_log2, ctx->vsub_log2, x + xo - 1, y + yo - 1, &u[yo * 3 + xo], &v[yo * 3 + xo]);
+                }
+            }
+
+            frame->data[3][frame->linesize[3] * y + x] = do_chromakey_pixel(ctx, u, v);
+        }
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *link, AVFrame *frame)
+{
+    AVFilterContext *avctx = link->dst;
+    int res;
+
+    if (res = avctx->internal->execute(avctx, do_chromakey_slice, frame, NULL, FFMIN(frame->height, avctx->graph->nb_threads)))
+        return res;
+
+    return ff_filter_frame(avctx->outputs[0], frame);
+}
+
+#define FIXNUM(x) lrint((x) * (1 << 10))
+#define RGB_TO_U(rgb) (((- FIXNUM(0.16874) * rgb[0] - FIXNUM(0.33126) * rgb[1] + FIXNUM(0.50000) * rgb[2] + (1 << 9) - 1) >> 10) + 128)
+#define RGB_TO_V(rgb) (((  FIXNUM(0.50000) * rgb[0] - FIXNUM(0.41869) * rgb[1] - FIXNUM(0.08131) * rgb[2] + (1 << 9) - 1) >> 10) + 128)
+
+static av_cold int initialize_chromakey(AVFilterContext *avctx)
+{
+    ChromakeyContext *ctx = avctx->priv;
+
+    if (ctx->is_yuv) {
+        ctx->chromakey_uv[0] = ctx->chromakey_rgba[1];
+        ctx->chromakey_uv[1] = ctx->chromakey_rgba[2];
+    } else {
+        ctx->chromakey_uv[0] = RGB_TO_U(ctx->chromakey_rgba);
+        ctx->chromakey_uv[1] = RGB_TO_V(ctx->chromakey_rgba);
+    }
+
+    return 0;
+}
+
+static av_cold int query_formats(AVFilterContext *avctx)
+{
+    static const enum AVPixelFormat pixel_fmts[] = {
+        AV_PIX_FMT_YUVA420P,
+        AV_PIX_FMT_YUVA422P,
+        AV_PIX_FMT_YUVA444P,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *formats = NULL;
+
+    formats = ff_make_format_list(pixel_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(avctx, formats);
+}
+
+static av_cold int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *avctx = inlink->dst;
+    ChromakeyContext *ctx = avctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+
+    ctx->hsub_log2 = desc->log2_chroma_w;
+    ctx->vsub_log2 = desc->log2_chroma_h;
+
+    return 0;
+}
+
+static const AVFilterPad chromakey_inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+        .needs_writable = 1,
+        .filter_frame   = filter_frame,
+        .config_props   = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad chromakey_outputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+#define OFFSET(x) offsetof(ChromakeyContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption chromakey_options[] = {
+    { "color", "set the chromakey key color", OFFSET(chromakey_rgba), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "similarity", "set the chromakey similarity value", OFFSET(similarity), AV_OPT_TYPE_FLOAT, { .dbl = 0.01 }, 0.01, 1.0, FLAGS },
+    { "blend", "set the chromakey key blend value", OFFSET(blend), AV_OPT_TYPE_FLOAT, { .dbl = 0.0 }, 0.0, 1.0, FLAGS },
+    { "yuv", "color parameter is in yuv instead of rgb", OFFSET(is_yuv), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(chromakey);
+
+AVFilter ff_vf_chromakey = {
+    .name          = "chromakey",
+    .description   = NULL_IF_CONFIG_SMALL("Turns a certain color into transparency. Operates on YUV colors."),
+    .priv_size     = sizeof(ChromakeyContext),
+    .priv_class    = &chromakey_class,
+    .init          = initialize_chromakey,
+    .query_formats = query_formats,
+    .inputs        = chromakey_inputs,
+    .outputs       = chromakey_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/vf_codecview.c b/libavfilter/vf_codecview.c
index df45f551..e70b397e 100644
--- a/libavfilter/vf_codecview.c
+++ b/libavfilter/vf_codecview.c
@@ -27,7 +27,6 @@
  * libavcodec/mpegvideo.c.
  *
  * TODO: segmentation
- * TODO: quantization
  */
 
 #include "libavutil/imgutils.h"
@@ -43,6 +42,8 @@
 typedef struct {
     const AVClass *class;
     unsigned mv;
+    int hsub, vsub;
+    int qp;
 } CodecViewContext;
 
 #define OFFSET(x) offsetof(CodecViewContext, x)
@@ -52,6 +53,7 @@ static const AVOption codecview_options[] = {
         {"pf", "forward predicted MVs of P-frames",  0, AV_OPT_TYPE_CONST, {.i64 = MV_P_FOR },  INT_MIN, INT_MAX, FLAGS, "mv"},
         {"bf", "forward predicted MVs of B-frames",  0, AV_OPT_TYPE_CONST, {.i64 = MV_B_FOR },  INT_MIN, INT_MAX, FLAGS, "mv"},
         {"bb", "backward predicted MVs of B-frames", 0, AV_OPT_TYPE_CONST, {.i64 = MV_B_BACK }, INT_MIN, INT_MAX, FLAGS, "mv"},
+    { "qp", NULL, OFFSET(qp), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, .flags = FLAGS },
     { NULL }
 };
 
@@ -198,29 +200,68 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     CodecViewContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
 
-    AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MOTION_VECTORS);
-    if (sd) {
-        int i;
-        const AVMotionVector *mvs = (const AVMotionVector *)sd->data;
-        for (i = 0; i < sd->size / sizeof(*mvs); i++) {
-            const AVMotionVector *mv = &mvs[i];
-            const int direction = mv->source > 0;
-            if ((direction == 0 && (s->mv & MV_P_FOR)  && frame->pict_type == AV_PICTURE_TYPE_P) ||
-                (direction == 0 && (s->mv & MV_B_FOR)  && frame->pict_type == AV_PICTURE_TYPE_B) ||
-                (direction == 1 && (s->mv & MV_B_BACK) && frame->pict_type == AV_PICTURE_TYPE_B))
-                draw_arrow(frame->data[0], mv->dst_x, mv->dst_y, mv->src_x, mv->src_y,
-                           frame->width, frame->height, frame->linesize[0],
-                           100, 0, mv->source > 0);
+    if (s->qp) {
+        int qstride, qp_type;
+        int8_t *qp_table = av_frame_get_qp_table(frame, &qstride, &qp_type);
+
+        if (qp_table) {
+            int x, y;
+            const int w = AV_CEIL_RSHIFT(frame->width,  s->hsub);
+            const int h = AV_CEIL_RSHIFT(frame->height, s->vsub);
+            uint8_t *pu = frame->data[1];
+            uint8_t *pv = frame->data[2];
+            const int lzu = frame->linesize[1];
+            const int lzv = frame->linesize[2];
+
+            for (y = 0; y < h; y++) {
+                for (x = 0; x < w; x++) {
+                    const int qp = ff_norm_qscale(qp_table[(y >> 3) * qstride + (x >> 3)], qp_type) * 128/31;
+                    pu[x] = pv[x] = qp;
+                }
+                pu += lzu;
+                pv += lzv;
+            }
         }
     }
+
+    if (s->mv) {
+        AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MOTION_VECTORS);
+        if (sd) {
+            int i;
+            const AVMotionVector *mvs = (const AVMotionVector *)sd->data;
+            for (i = 0; i < sd->size / sizeof(*mvs); i++) {
+                const AVMotionVector *mv = &mvs[i];
+                const int direction = mv->source > 0;
+                if ((direction == 0 && (s->mv & MV_P_FOR)  && frame->pict_type == AV_PICTURE_TYPE_P) ||
+                    (direction == 0 && (s->mv & MV_B_FOR)  && frame->pict_type == AV_PICTURE_TYPE_B) ||
+                    (direction == 1 && (s->mv & MV_B_BACK) && frame->pict_type == AV_PICTURE_TYPE_B))
+                    draw_arrow(frame->data[0], mv->dst_x, mv->dst_y, mv->src_x, mv->src_y,
+                               frame->width, frame->height, frame->linesize[0],
+                               100, 0, mv->source > 0);
+            }
+        }
+    }
+
     return ff_filter_frame(outlink, frame);
 }
 
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    CodecViewContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+
+    s->hsub = desc->log2_chroma_w;
+    s->vsub = desc->log2_chroma_h;
+    return 0;
+}
+
 static const AVFilterPad codecview_inputs[] = {
     {
         .name           = "default",
         .type           = AVMEDIA_TYPE_VIDEO,
         .filter_frame   = filter_frame,
+        .config_props   = config_input,
         .needs_writable = 1,
     },
     { NULL }
@@ -236,7 +277,7 @@ static const AVFilterPad codecview_outputs[] = {
 
 AVFilter ff_vf_codecview = {
     .name          = "codecview",
-    .description   = NULL_IF_CONFIG_SMALL("Visualize information about some codecs"),
+    .description   = NULL_IF_CONFIG_SMALL("Visualize information about some codecs."),
     .priv_size     = sizeof(CodecViewContext),
     .query_formats = query_formats,
     .inputs        = codecview_inputs,
diff --git a/libavfilter/vf_colorchannelmixer.c b/libavfilter/vf_colorchannelmixer.c
index 0fffd341..cda972dd 100644
--- a/libavfilter/vf_colorchannelmixer.c
+++ b/libavfilter/vf_colorchannelmixer.c
@@ -115,25 +115,25 @@ static int config_output(AVFilterLink *outlink)
             s->lut[i][j] = buffer;
 
     for (i = 0; i < size; i++) {
-        s->lut[R][R][i] = round(i * s->rr);
-        s->lut[R][G][i] = round(i * s->rg);
-        s->lut[R][B][i] = round(i * s->rb);
-        s->lut[R][A][i] = round(i * s->ra);
-
-        s->lut[G][R][i] = round(i * s->gr);
-        s->lut[G][G][i] = round(i * s->gg);
-        s->lut[G][B][i] = round(i * s->gb);
-        s->lut[G][A][i] = round(i * s->ga);
-
-        s->lut[B][R][i] = round(i * s->br);
-        s->lut[B][G][i] = round(i * s->bg);
-        s->lut[B][B][i] = round(i * s->bb);
-        s->lut[B][A][i] = round(i * s->ba);
-
-        s->lut[A][R][i] = round(i * s->ar);
-        s->lut[A][G][i] = round(i * s->ag);
-        s->lut[A][B][i] = round(i * s->ab);
-        s->lut[A][A][i] = round(i * s->aa);
+        s->lut[R][R][i] = lrint(i * s->rr);
+        s->lut[R][G][i] = lrint(i * s->rg);
+        s->lut[R][B][i] = lrint(i * s->rb);
+        s->lut[R][A][i] = lrint(i * s->ra);
+
+        s->lut[G][R][i] = lrint(i * s->gr);
+        s->lut[G][G][i] = lrint(i * s->gg);
+        s->lut[G][B][i] = lrint(i * s->gb);
+        s->lut[G][A][i] = lrint(i * s->ga);
+
+        s->lut[B][R][i] = lrint(i * s->br);
+        s->lut[B][G][i] = lrint(i * s->bg);
+        s->lut[B][B][i] = lrint(i * s->bb);
+        s->lut[B][A][i] = lrint(i * s->ba);
+
+        s->lut[A][R][i] = lrint(i * s->ar);
+        s->lut[A][G][i] = lrint(i * s->ag);
+        s->lut[A][B][i] = lrint(i * s->ab);
+        s->lut[A][A][i] = lrint(i * s->aa);
     }
 
     return 0;
diff --git a/libavfilter/vf_colorkey.c b/libavfilter/vf_colorkey.c
new file mode 100644
index 00000000..2f670d97
--- /dev/null
+++ b/libavfilter/vf_colorkey.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2015 Timo Rothenpieler <timo@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/imgutils.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct ColorkeyContext {
+    const AVClass *class;
+
+    /* color offsets rgba */
+    int co[4];
+
+    uint8_t colorkey_rgba[4];
+    float similarity;
+    float blend;
+} ColorkeyContext;
+
+static uint8_t do_colorkey_pixel(ColorkeyContext *ctx, uint8_t r, uint8_t g, uint8_t b)
+{
+    int dr = (int)r - ctx->colorkey_rgba[0];
+    int dg = (int)g - ctx->colorkey_rgba[1];
+    int db = (int)b - ctx->colorkey_rgba[2];
+
+    double diff = sqrt((dr * dr + dg * dg + db * db) / (255.0 * 255.0));
+
+    if (ctx->blend > 0.0001) {
+        return av_clipd((diff - ctx->similarity) / ctx->blend, 0.0, 1.0) * 255.0;
+    } else {
+        return (diff > ctx->similarity) ? 255 : 0;
+    }
+}
+
+static int do_colorkey_slice(AVFilterContext *avctx, void *arg, int jobnr, int nb_jobs)
+{
+    AVFrame *frame = arg;
+
+    const int slice_start = (frame->height * jobnr) / nb_jobs;
+    const int slice_end = (frame->height * (jobnr + 1)) / nb_jobs;
+
+    ColorkeyContext *ctx = avctx->priv;
+
+    int o, x, y;
+
+    for (y = slice_start; y < slice_end; ++y) {
+        for (x = 0; x < frame->width; ++x) {
+            o = frame->linesize[0] * y + x * 4;
+
+            frame->data[0][o + ctx->co[3]] =
+                do_colorkey_pixel(ctx,
+                                  frame->data[0][o + ctx->co[0]],
+                                  frame->data[0][o + ctx->co[1]],
+                                  frame->data[0][o + ctx->co[2]]);
+        }
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *link, AVFrame *frame)
+{
+    AVFilterContext *avctx = link->dst;
+    int res;
+
+    if (res = av_frame_make_writable(frame))
+        return res;
+
+    if (res = avctx->internal->execute(avctx, do_colorkey_slice, frame, NULL, FFMIN(frame->height, avctx->graph->nb_threads)))
+        return res;
+
+    return ff_filter_frame(avctx->outputs[0], frame);
+}
+
+static av_cold int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *avctx = outlink->src;
+    ColorkeyContext *ctx = avctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(outlink->format);
+    int i;
+
+    outlink->w = avctx->inputs[0]->w;
+    outlink->h = avctx->inputs[0]->h;
+    outlink->time_base = avctx->inputs[0]->time_base;
+
+    for (i = 0; i < 4; ++i)
+        ctx->co[i] = desc->comp[i].offset;
+
+    return 0;
+}
+
+static av_cold int query_formats(AVFilterContext *avctx)
+{
+    static const enum AVPixelFormat pixel_fmts[] = {
+        AV_PIX_FMT_ARGB,
+        AV_PIX_FMT_RGBA,
+        AV_PIX_FMT_ABGR,
+        AV_PIX_FMT_BGRA,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *formats = NULL;
+
+    formats = ff_make_format_list(pixel_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(avctx, formats);
+}
+
+static const AVFilterPad colorkey_inputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad colorkey_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+#define OFFSET(x) offsetof(ColorkeyContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption colorkey_options[] = {
+    { "color", "set the colorkey key color", OFFSET(colorkey_rgba), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "similarity", "set the colorkey similarity value", OFFSET(similarity), AV_OPT_TYPE_FLOAT, { .dbl = 0.01 }, 0.01, 1.0, FLAGS },
+    { "blend", "set the colorkey key blend value", OFFSET(blend), AV_OPT_TYPE_FLOAT, { .dbl = 0.0 }, 0.0, 1.0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(colorkey);
+
+AVFilter ff_vf_colorkey = {
+    .name          = "colorkey",
+    .description   = NULL_IF_CONFIG_SMALL("Turns a certain color into transparency. Operates on RGB colors."),
+    .priv_size     = sizeof(ColorkeyContext),
+    .priv_class    = &colorkey_class,
+    .query_formats = query_formats,
+    .inputs        = colorkey_inputs,
+    .outputs       = colorkey_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/vf_colorlevels.c b/libavfilter/vf_colorlevels.c
index 7157c913..dedbe30d 100644
--- a/libavfilter/vf_colorlevels.c
+++ b/libavfilter/vf_colorlevels.c
@@ -97,7 +97,7 @@ static int config_input(AVFilterLink *inlink)
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 
     s->nb_comp = desc->nb_components;
-    s->bpp = (desc->comp[0].depth_minus1 + 1) >> 3;
+    s->bpp = desc->comp[0].depth >> 3;
     s->step = (av_get_padded_bits_per_pixel(desc) >> 3) / s->bpp;
     s->linesize = inlink->w * s->step;
     ff_fill_rgba_map(s->rgba_map, inlink->format);
@@ -132,10 +132,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             const uint8_t offset = s->rgba_map[i];
             const uint8_t *srcrow = in->data[0];
             uint8_t *dstrow = out->data[0];
-            int imin = round(r->in_min  * UINT8_MAX);
-            int imax = round(r->in_max  * UINT8_MAX);
-            int omin = round(r->out_min * UINT8_MAX);
-            int omax = round(r->out_max * UINT8_MAX);
+            int imin = lrint(r->in_min  * UINT8_MAX);
+            int imax = lrint(r->in_max  * UINT8_MAX);
+            int omin = lrint(r->out_min * UINT8_MAX);
+            int omax = lrint(r->out_max * UINT8_MAX);
             double coeff;
 
             if (imin < 0) {
@@ -179,10 +179,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             const uint8_t offset = s->rgba_map[i];
             const uint8_t *srcrow = in->data[0];
             uint8_t *dstrow = out->data[0];
-            int imin = round(r->in_min  * UINT16_MAX);
-            int imax = round(r->in_max  * UINT16_MAX);
-            int omin = round(r->out_min * UINT16_MAX);
-            int omax = round(r->out_max * UINT16_MAX);
+            int imin = lrint(r->in_min  * UINT16_MAX);
+            int imax = lrint(r->in_max  * UINT16_MAX);
+            int omin = lrint(r->out_min * UINT16_MAX);
+            int omax = lrint(r->out_max * UINT16_MAX);
             double coeff;
 
             if (imin < 0) {
diff --git a/libavfilter/vf_colormatrix.c b/libavfilter/vf_colormatrix.c
index 4971cac3..4a57fe0b 100644
--- a/libavfilter/vf_colormatrix.c
+++ b/libavfilter/vf_colormatrix.c
@@ -94,6 +94,7 @@ static const AVOption colormatrix_options[] = {
     { "fcc",       "set FCC colorspace   ",      0, AV_OPT_TYPE_CONST, {.i64=COLOR_MODE_FCC},         .flags=FLAGS, .unit="color_mode" },
     { "bt601",     "set BT.601 colorspace",      0, AV_OPT_TYPE_CONST, {.i64=COLOR_MODE_BT601},       .flags=FLAGS, .unit="color_mode" },
     { "bt470",     "set BT.470 colorspace",      0, AV_OPT_TYPE_CONST, {.i64=COLOR_MODE_BT601},       .flags=FLAGS, .unit="color_mode" },
+    { "bt470bg",   "set BT.470 colorspace",      0, AV_OPT_TYPE_CONST, {.i64=COLOR_MODE_BT601},       .flags=FLAGS, .unit="color_mode" },
     { "smpte170m", "set SMTPE-170M colorspace",  0, AV_OPT_TYPE_CONST, {.i64=COLOR_MODE_BT601},       .flags=FLAGS, .unit="color_mode" },
     { "smpte240m", "set SMPTE-240M colorspace",  0, AV_OPT_TYPE_CONST, {.i64=COLOR_MODE_SMPTE240M},   .flags=FLAGS, .unit="color_mode" },
     { NULL }
@@ -230,6 +231,53 @@ static int process_slice_uyvy422(AVFilterContext *ctx, void *arg, int jobnr, int
     return 0;
 }
 
+static int process_slice_yuv444p(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    const ThreadData *td = arg;
+    const AVFrame *src = td->src;
+    AVFrame *dst = td->dst;
+    const int height = src->height;
+    const int width = src->width;
+    const int slice_start = (height *  jobnr   ) / nb_jobs;
+    const int slice_end   = (height * (jobnr+1)) / nb_jobs;
+    const int src_pitchY  = src->linesize[0];
+    const int src_pitchUV = src->linesize[1];
+    const unsigned char *srcpU = src->data[1] + slice_start * src_pitchUV;
+    const unsigned char *srcpV = src->data[2] + slice_start * src_pitchUV;
+    const unsigned char *srcpY = src->data[0] + slice_start * src_pitchY;
+    const int dst_pitchY  = dst->linesize[0];
+    const int dst_pitchUV = dst->linesize[1];
+    unsigned char *dstpU = dst->data[1] + slice_start * dst_pitchUV;
+    unsigned char *dstpV = dst->data[2] + slice_start * dst_pitchUV;
+    unsigned char *dstpY = dst->data[0] + slice_start * dst_pitchY;
+    const int c2 = td->c2;
+    const int c3 = td->c3;
+    const int c4 = td->c4;
+    const int c5 = td->c5;
+    const int c6 = td->c6;
+    const int c7 = td->c7;
+    int x, y;
+
+    for (y = slice_start; y < slice_end; y++) {
+        for (x = 0; x < width; x++) {
+            const int u = srcpU[x] - 128;
+            const int v = srcpV[x] - 128;
+            const int uvval = c2 * u + c3 * v + 1081344;
+            dstpY[x] = CB((65536 * (srcpY[x] - 16) + uvval) >> 16);
+            dstpU[x] = CB((c4 * u + c5 * v + 8421376) >> 16);
+            dstpV[x] = CB((c6 * u + c7 * v + 8421376) >> 16);
+        }
+        srcpY += src_pitchY;
+        dstpY += dst_pitchY;
+        srcpU += src_pitchUV;
+        srcpV += src_pitchUV;
+        dstpU += dst_pitchUV;
+        dstpV += dst_pitchUV;
+    }
+
+    return 0;
+}
+
 static int process_slice_yuv422p(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
     const ThreadData *td = arg;
@@ -350,6 +398,7 @@ static int config_input(AVFilterLink *inlink)
 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV444P,
         AV_PIX_FMT_YUV422P,
         AV_PIX_FMT_YUV420P,
         AV_PIX_FMT_UYVY422,
@@ -411,7 +460,10 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     td.c6 = color->yuv_convert[color->mode][2][1];
     td.c7 = color->yuv_convert[color->mode][2][2];
 
-    if (in->format == AV_PIX_FMT_YUV422P)
+    if (in->format == AV_PIX_FMT_YUV444P)
+        ctx->internal->execute(ctx, process_slice_yuv444p, &td, NULL,
+                               FFMIN(in->height, ctx->graph->nb_threads));
+    else if (in->format == AV_PIX_FMT_YUV422P)
         ctx->internal->execute(ctx, process_slice_yuv422p, &td, NULL,
                                FFMIN(in->height, ctx->graph->nb_threads));
     else if (in->format == AV_PIX_FMT_YUV420P)
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c
new file mode 100644
index 00000000..2380cdca
--- /dev/null
+++ b/libavfilter/vf_convolution.c
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2012-2013 Oka Motofumi (chikuzen.mo at gmail dot com)
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct ConvolutionContext {
+    const AVClass *class;
+
+    char *matrix_str[4];
+    float rdiv[4];
+    float bias[4];
+
+    int bstride;
+    uint8_t *buffer;
+    int nb_planes;
+    int planewidth[4];
+    int planeheight[4];
+    int matrix[4][25];
+    int matrix_length[4];
+    int copy[4];
+
+    void (*filter[4])(struct ConvolutionContext *s, AVFrame *in, AVFrame *out, int plane);
+} ConvolutionContext;
+
+#define OFFSET(x) offsetof(ConvolutionContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption convolution_options[] = {
+    { "0m", "set matrix for 1st plane", OFFSET(matrix_str[0]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "1m", "set matrix for 2nd plane", OFFSET(matrix_str[1]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "2m", "set matrix for 3rd plane", OFFSET(matrix_str[2]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "3m", "set matrix for 4th plane", OFFSET(matrix_str[3]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "0rdiv", "set rdiv for 1st plane", OFFSET(rdiv[0]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "1rdiv", "set rdiv for 2nd plane", OFFSET(rdiv[1]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "2rdiv", "set rdiv for 3rd plane", OFFSET(rdiv[2]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "3rdiv", "set rdiv for 4th plane", OFFSET(rdiv[3]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "0bias", "set bias for 1st plane", OFFSET(bias[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "1bias", "set bias for 2nd plane", OFFSET(bias[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "2bias", "set bias for 3rd plane", OFFSET(bias[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "3bias", "set bias for 4th plane", OFFSET(bias[3]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(convolution);
+
+static const int same3x3[9] = {0, 0, 0,
+                               0, 1, 0,
+                               0, 0, 0};
+
+static const int same5x5[25] = {0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0,
+                                0, 0, 1, 0, 0,
+                                0, 0, 0, 0, 0,
+                                0, 0, 0, 0, 0};
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_NONE
+    };
+
+    return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    ConvolutionContext *s = inlink->dst->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int ret;
+
+    if ((ret = av_image_fill_linesizes(s->planewidth, inlink->format, inlink->w)) < 0)
+        return ret;
+
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+
+    s->bstride = s->planewidth[0] + 32;
+    s->buffer = av_malloc(5 * s->bstride);
+    if (!s->buffer)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static inline void line_copy8(uint8_t *line, const uint8_t *srcp, int width, int mergin)
+{
+    int i;
+
+    memcpy(line, srcp, width);
+
+    for (i = mergin; i > 0; i--) {
+        line[-i] = line[i];
+        line[width - 1 + i] = line[width - 1 - i];
+    }
+}
+
+static void filter_3x3(ConvolutionContext *s, AVFrame *in, AVFrame *out, int plane)
+{
+    const uint8_t *src = in->data[plane];
+    uint8_t *dst = out->data[plane];
+    const int stride = in->linesize[plane];
+    const int bstride = s->bstride;
+    const int height = s->planeheight[plane];
+    const int width  = s->planewidth[plane];
+    uint8_t *p0 = s->buffer + 16;
+    uint8_t *p1 = p0 + bstride;
+    uint8_t *p2 = p1 + bstride;
+    uint8_t *orig = p0, *end = p2;
+    const int *matrix = s->matrix[plane];
+    const float rdiv = s->rdiv[plane];
+    const float bias = s->bias[plane];
+    int y, x;
+
+    line_copy8(p0, src + stride, width, 1);
+    line_copy8(p1, src, width, 1);
+
+    for (y = 0; y < height; y++) {
+        src += stride * (y < height - 1 ? 1 : -1);
+        line_copy8(p2, src, width, 1);
+
+        for (x = 0; x < width; x++) {
+            int sum = p0[x - 1] * matrix[0] +
+                      p0[x] *     matrix[1] +
+                      p0[x + 1] * matrix[2] +
+                      p1[x - 1] * matrix[3] +
+                      p1[x] *     matrix[4] +
+                      p1[x + 1] * matrix[5] +
+                      p2[x - 1] * matrix[6] +
+                      p2[x] *     matrix[7] +
+                      p2[x + 1] * matrix[8];
+            sum = (int)(sum * rdiv + bias + 0.5f);
+            dst[x] = av_clip_uint8(sum);
+        }
+
+        p0 = p1;
+        p1 = p2;
+        p2 = (p2 == end) ? orig: p2 + bstride;
+        dst += out->linesize[plane];
+    }
+}
+
+static void filter_5x5(ConvolutionContext *s, AVFrame *in, AVFrame *out, int plane)
+{
+    const uint8_t *src = in->data[plane];
+    uint8_t *dst = out->data[plane];
+    const int stride = in->linesize[plane];
+    const int bstride = s->bstride;
+    const int height = s->planeheight[plane];
+    const int width  = s->planewidth[plane];
+    uint8_t *p0 = s->buffer + 16;
+    uint8_t *p1 = p0 + bstride;
+    uint8_t *p2 = p1 + bstride;
+    uint8_t *p3 = p2 + bstride;
+    uint8_t *p4 = p3 + bstride;
+    uint8_t *orig = p0, *end = p4;
+    const int *matrix = s->matrix[plane];
+    float rdiv = s->rdiv[plane];
+    float bias = s->bias[plane];
+    int y, x, i;
+
+    line_copy8(p0, src + 2 * stride, width, 2);
+    line_copy8(p1, src + stride, width, 2);
+    line_copy8(p2, src, width, 2);
+    src += stride;
+    line_copy8(p3, src, width, 2);
+
+
+    for (y = 0; y < height; y++) {
+        uint8_t *array[] = {
+            p0 - 2, p0 - 1, p0, p0 + 1, p0 + 2,
+            p1 - 2, p1 - 1, p1, p1 + 1, p1 + 2,
+            p2 - 2, p2 - 1, p2, p2 + 1, p2 + 2,
+            p3 - 2, p3 - 1, p3, p3 + 1, p3 + 2,
+            p4 - 2, p4 - 1, p4, p4 + 1, p4 + 2
+        };
+
+        src += stride * (y < height - 2 ? 1 : -1);
+        line_copy8(p4, src, width, 2);
+
+        for (x = 0; x < width; x++) {
+            int sum = 0;
+
+            for (i = 0; i < 25; i++) {
+                sum += *(array[i] + x) * matrix[i];
+            }
+            sum = (int)(sum * rdiv + bias + 0.5f);
+            dst[x] = av_clip_uint8(sum);
+        }
+
+        p0 = p1;
+        p1 = p2;
+        p2 = p3;
+        p3 = p4;
+        p4 = (p4 == end) ? orig: p4 + bstride;
+        dst += out->linesize[plane];
+    }
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    ConvolutionContext *s = inlink->dst->priv;
+    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFrame *out;
+    int plane;
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        if (s->copy[plane]) {
+            av_image_copy_plane(out->data[plane], out->linesize[plane],
+                                in->data[plane], in->linesize[plane],
+                                s->planewidth[plane],
+                                s->planeheight[plane]);
+            continue;
+        }
+
+        s->filter[plane](s, in, out, plane);
+    }
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    ConvolutionContext *s = ctx->priv;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        int *matrix = (int *)s->matrix[i];
+        char *p, *arg, *saveptr = NULL;
+
+        p = s->matrix_str[i];
+        while (s->matrix_length[i] < 25) {
+            if (!(arg = av_strtok(p, " ", &saveptr)))
+                break;
+
+            p = NULL;
+            sscanf(arg, "%d", &matrix[s->matrix_length[i]]);
+            s->matrix_length[i]++;
+        }
+
+        if (s->matrix_length[i] == 9) {
+            if (!memcmp(matrix, same3x3, sizeof(same3x3)))
+                s->copy[i] = 1;
+            else
+                s->filter[i] = filter_3x3;
+        } else if (s->matrix_length[i] == 25) {
+            if (!memcmp(matrix, same5x5, sizeof(same5x5)))
+                s->copy[i] = 1;
+            else
+                s->filter[i] = filter_5x5;
+        } else {
+            return AVERROR(EINVAL);
+        }
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    ConvolutionContext *s = ctx->priv;
+
+    av_freep(&s->buffer);
+}
+
+static const AVFilterPad convolution_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad convolution_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_convolution = {
+    .name          = "convolution",
+    .description   = NULL_IF_CONFIG_SMALL("Apply convolution filter."),
+    .priv_size     = sizeof(ConvolutionContext),
+    .priv_class    = &convolution_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = convolution_inputs,
+    .outputs       = convolution_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+};
diff --git a/libavfilter/vf_cover_rect.c b/libavfilter/vf_cover_rect.c
index c2020bb3..f7f61038 100644
--- a/libavfilter/vf_cover_rect.c
+++ b/libavfilter/vf_cover_rect.c
@@ -53,13 +53,7 @@ static const AVOption cover_rect_options[] = {
     { NULL }
 };
 
-static const AVClass cover_rect_class = {
-    .class_name       = "cover_rect",
-    .item_name        = av_default_item_name,
-    .option           = cover_rect_options,
-    .version          = LIBAVUTIL_VERSION_INT,
-    .category         = AV_CLASS_CATEGORY_FILTER,
-};
+AVFILTER_DEFINE_CLASS(cover_rect);
 
 static int query_formats(AVFilterContext *ctx)
 {
@@ -84,8 +78,8 @@ static void cover_rect(CoverContext *cover, AVFrame *in, int offx, int offy)
     for (p = 0; p < 3; p++) {
         uint8_t *data = in->data[p] + (offx>>!!p) + (offy>>!!p) * in->linesize[p];
         const uint8_t *src = cover->cover_frame->data[p];
-        int w = FF_CEIL_RSHIFT(cover->cover_frame->width , !!p);
-        int h = FF_CEIL_RSHIFT(cover->cover_frame->height, !!p);
+        int w = AV_CEIL_RSHIFT(cover->cover_frame->width , !!p);
+        int h = AV_CEIL_RSHIFT(cover->cover_frame->height, !!p);
         for (y = 0; y < h; y++) {
             for (x = 0; x < w; x++) {
                 data[x] = src[x];
@@ -104,10 +98,10 @@ static void blur(CoverContext *cover, AVFrame *in, int offx, int offy)
         int oy = offy>>!!p;
         int stride = in->linesize[p];
         uint8_t *data = in->data[p] + ox + oy * stride;
-        int w = FF_CEIL_RSHIFT(cover->width , !!p);
-        int h = FF_CEIL_RSHIFT(cover->height, !!p);
-        int iw = FF_CEIL_RSHIFT(in->width , !!p);
-        int ih = FF_CEIL_RSHIFT(in->height, !!p);
+        int w = AV_CEIL_RSHIFT(cover->width , !!p);
+        int h = AV_CEIL_RSHIFT(cover->height, !!p);
+        int iw = AV_CEIL_RSHIFT(in->width , !!p);
+        int ih = AV_CEIL_RSHIFT(in->height, !!p);
         for (y = 0; y < h; y++) {
             for (x = 0; x < w; x++) {
                 int c = 0;
@@ -255,7 +249,7 @@ static const AVFilterPad cover_rect_outputs[] = {
 
 AVFilter ff_vf_cover_rect = {
     .name            = "cover_rect",
-    .description     = NULL_IF_CONFIG_SMALL("Find and cover a user specified object"),
+    .description     = NULL_IF_CONFIG_SMALL("Find and cover a user specified object."),
     .priv_size       = sizeof(CoverContext),
     .init            = init,
     .uninit          = uninit,
diff --git a/libavfilter/vf_crop.c b/libavfilter/vf_crop.c
index f58a7ae8..01773fa3 100644
--- a/libavfilter/vf_crop.c
+++ b/libavfilter/vf_crop.c
@@ -93,13 +93,14 @@ typedef struct CropContext {
 static int query_formats(AVFilterContext *ctx)
 {
     AVFilterFormats *formats = NULL;
-    int fmt;
+    int fmt, ret;
 
     for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
         if (!(desc->flags & (AV_PIX_FMT_FLAG_HWACCEL | AV_PIX_FMT_FLAG_BITSTREAM)) &&
-            !((desc->log2_chroma_w || desc->log2_chroma_h) && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)))
-            ff_add_format(&formats, fmt);
+            !((desc->log2_chroma_w || desc->log2_chroma_h) && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) &&
+            (ret = ff_add_format(&formats, fmt)) < 0)
+            return ret;
     }
 
     return ff_set_common_formats(ctx, formats);
@@ -125,7 +126,7 @@ static inline int normalize_double(int *n, double d)
         *n = d > INT_MAX ? INT_MAX : INT_MIN;
         ret = AVERROR(EINVAL);
     } else
-        *n = round(d);
+        *n = lrint(d);
 
     return ret;
 }
@@ -296,6 +297,42 @@ static int filter_frame(AVFilterLink *link, AVFrame *frame)
     return ff_filter_frame(link->dst->outputs[0], frame);
 }
 
+static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
+                           char *res, int res_len, int flags)
+{
+    CropContext *s = ctx->priv;
+    int ret;
+
+    if (   !strcmp(cmd, "out_w")  || !strcmp(cmd, "w")
+        || !strcmp(cmd, "out_h")  || !strcmp(cmd, "h")
+        || !strcmp(cmd, "x")      || !strcmp(cmd, "y")) {
+
+        int old_x = s->x;
+        int old_y = s->y;
+        int old_w = s->w;
+        int old_h = s->h;
+
+        AVFilterLink *outlink = ctx->outputs[0];
+        AVFilterLink *inlink  = ctx->inputs[0];
+
+        av_opt_set(s, cmd, args, 0);
+
+        if ((ret = config_input(inlink)) < 0) {
+            s->x = old_x;
+            s->y = old_y;
+            s->w = old_w;
+            s->h = old_h;
+            return ret;
+        }
+
+        ret = config_output(outlink);
+
+    } else
+        ret = AVERROR(ENOSYS);
+
+    return ret;
+}
+
 #define OFFSET(x) offsetof(CropContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 
@@ -306,7 +343,7 @@ static const AVOption crop_options[] = {
     { "h",           "set the height crop area expression",  OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, CHAR_MIN, CHAR_MAX, FLAGS },
     { "x",           "set the x crop area expression",       OFFSET(x_expr), AV_OPT_TYPE_STRING, {.str = "(in_w-out_w)/2"}, CHAR_MIN, CHAR_MAX, FLAGS },
     { "y",           "set the y crop area expression",       OFFSET(y_expr), AV_OPT_TYPE_STRING, {.str = "(in_h-out_h)/2"}, CHAR_MIN, CHAR_MAX, FLAGS },
-    { "keep_aspect", "keep aspect ratio",                    OFFSET(keep_aspect), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+    { "keep_aspect", "keep aspect ratio",                    OFFSET(keep_aspect), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
     { NULL }
 };
 
@@ -332,12 +369,13 @@ static const AVFilterPad avfilter_vf_crop_outputs[] = {
 };
 
 AVFilter ff_vf_crop = {
-    .name          = "crop",
-    .description   = NULL_IF_CONFIG_SMALL("Crop the input video."),
-    .priv_size     = sizeof(CropContext),
-    .priv_class    = &crop_class,
-    .query_formats = query_formats,
-    .uninit        = uninit,
-    .inputs        = avfilter_vf_crop_inputs,
-    .outputs       = avfilter_vf_crop_outputs,
+    .name            = "crop",
+    .description     = NULL_IF_CONFIG_SMALL("Crop the input video."),
+    .priv_size       = sizeof(CropContext),
+    .priv_class      = &crop_class,
+    .query_formats   = query_formats,
+    .uninit          = uninit,
+    .inputs          = avfilter_vf_crop_inputs,
+    .outputs         = avfilter_vf_crop_outputs,
+    .process_command = process_command,
 };
diff --git a/libavfilter/vf_cropdetect.c b/libavfilter/vf_cropdetect.c
index 7fa96516..4a898755 100644
--- a/libavfilter/vf_cropdetect.c
+++ b/libavfilter/vf_cropdetect.c
@@ -144,7 +144,7 @@ static int config_input(AVFilterLink *inlink)
     av_image_fill_max_pixsteps(s->max_pixsteps, NULL, desc);
 
     if (s->limit < 1.0)
-        s->limit *= (1 << (desc->comp[0].depth_minus1 + 1)) - 1;
+        s->limit *= (1 << desc->comp[0].depth) - 1;
 
     s->x1 = inlink->w - 1;
     s->y1 = inlink->h - 1;
@@ -165,7 +165,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     int w, h, x, y, shrink_by;
     AVDictionary **metadata;
     int outliers, last_y;
-    int limit = round(s->limit);
+    int limit = lrint(s->limit);
 
     // ignore first 2 frames - they may be empty
     if (++s->frame_nb > 0) {
diff --git a/libavfilter/vf_dctdnoiz.c b/libavfilter/vf_dctdnoiz.c
index 37306bb5..6957f19a 100644
--- a/libavfilter/vf_dctdnoiz.c
+++ b/libavfilter/vf_dctdnoiz.c
@@ -367,10 +367,10 @@ static av_always_inline void filter_freq_##bsize(const float *src, int src_lines
         float *b = &tmp_block2[i];                                                          \
         /* frequency filtering */                                                           \
         if (expr) {                                                                         \
-            var_values[VAR_C] = FFABS(*b);                                                  \
+            var_values[VAR_C] = fabsf(*b);                                                  \
             *b *= av_expr_eval(expr, var_values, NULL);                                     \
         } else {                                                                            \
-            if (FFABS(*b) < sigma_th)                                                       \
+            if (fabsf(*b) < sigma_th)                                                       \
                 *b = 0;                                                                     \
         }                                                                                   \
     }                                                                                       \
diff --git a/libavfilter/vf_deband.c b/libavfilter/vf_deband.c
new file mode 100644
index 00000000..01728b19
--- /dev/null
+++ b/libavfilter/vf_deband.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2015 Niklas Haas
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct DebandContext {
+    const AVClass *class;
+
+    float threshold[4];
+    int range;
+    int blur;
+    float direction;
+
+    int nb_components;
+    int planewidth[4];
+    int planeheight[4];
+    int thr[4];
+
+    int *x_pos;
+    int *y_pos;
+
+    int (*deband)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+} DebandContext;
+
+#define OFFSET(x) offsetof(DebandContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption deband_options[] = {
+    { "1thr",      "set 1st plane threshold", OFFSET(threshold[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.02},  0.00003,     0.5, FLAGS },
+    { "2thr",      "set 2nd plane threshold", OFFSET(threshold[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.02},  0.00003,     0.5, FLAGS },
+    { "3thr",      "set 3rd plane threshold", OFFSET(threshold[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.02},  0.00003,     0.5, FLAGS },
+    { "4thr",      "set 4th plane threshold", OFFSET(threshold[3]), AV_OPT_TYPE_FLOAT, {.dbl=0.02},  0.00003,     0.5, FLAGS },
+    { "range",     "set range",               OFFSET(range),        AV_OPT_TYPE_INT,   {.i64=16},    INT_MIN, INT_MAX, FLAGS },
+    { "r",         "set range",               OFFSET(range),        AV_OPT_TYPE_INT,   {.i64=16},    INT_MIN, INT_MAX, FLAGS },
+    { "direction", "set direction",           OFFSET(direction),    AV_OPT_TYPE_FLOAT, {.dbl=2*M_PI},-2*M_PI,  2*M_PI, FLAGS },
+    { "d",         "set direction",           OFFSET(direction),    AV_OPT_TYPE_FLOAT, {.dbl=2*M_PI},-2*M_PI,  2*M_PI, FLAGS },
+    { "blur",      "set blur",                OFFSET(blur),         AV_OPT_TYPE_BOOL,  {.i64=1},           0,       1, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(deband);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV422P,  AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUV411P,  AV_PIX_FMT_YUV410P,  AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12,
+        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
+        AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14,
+        AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_NONE
+    };
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static float frand(int x, int y)
+{
+    const float r = sinf(x * 12.9898 + y * 78.233) * 43758.545;
+
+    return r - floorf(r);
+}
+
+static int inline get_avg(int ref0, int ref1, int ref2, int ref3)
+{
+    return (ref0 + ref1 + ref2 + ref3) / 4;
+}
+
+typedef struct ThreadData {
+    AVFrame *in, *out;
+} ThreadData;
+
+static int deband_8_c(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    DebandContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    int x, y, p;
+
+    for (p = 0; p < s->nb_components; p++) {
+        const uint8_t *src_ptr = (const uint8_t *)in->data[p];
+        uint8_t *dst_ptr = (uint8_t *)out->data[p];
+        const int dst_linesize = out->linesize[p];
+        const int src_linesize = in->linesize[p];
+        const int thr = s->thr[p];
+        const int start = (s->planeheight[p] *  jobnr   ) / nb_jobs;
+        const int end   = (s->planeheight[p] * (jobnr+1)) / nb_jobs;
+        const int w = s->planewidth[p] - 1;
+        const int h = s->planeheight[p] - 1;
+
+        for (y = start; y < end; y++) {
+            const int pos = y * s->planeheight[0];
+
+            for (x = 0; x < s->planewidth[p]; x++) {
+                const int x_pos = s->x_pos[pos + x];
+                const int y_pos = s->y_pos[pos + x];
+                const int ref0 = src_ptr[av_clip(y +  y_pos, 0, h) * src_linesize + av_clip(x +  x_pos, 0, w)];
+                const int ref1 = src_ptr[av_clip(y + -y_pos, 0, h) * src_linesize + av_clip(x +  x_pos, 0, w)];
+                const int ref2 = src_ptr[av_clip(y + -y_pos, 0, h) * src_linesize + av_clip(x + -x_pos, 0, w)];
+                const int ref3 = src_ptr[av_clip(y +  y_pos, 0, h) * src_linesize + av_clip(x + -x_pos, 0, w)];
+                const int src0 = src_ptr[y * src_linesize + x];
+
+                if (s->blur) {
+                    const int avg = get_avg(ref0, ref1, ref2, ref3);
+                    const int diff = FFABS(src0 - avg);
+
+                    dst_ptr[y * dst_linesize + x] = diff < thr ? avg : src0;
+                } else {
+                    dst_ptr[y * dst_linesize + x] = (FFABS(src0 - ref0) < thr) &&
+                                                    (FFABS(src0 - ref1) < thr) &&
+                                                    (FFABS(src0 - ref2) < thr) &&
+                                                    (FFABS(src0 - ref3) < thr) ? get_avg(ref0, ref1, ref2, ref3) : src0;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int deband_16_c(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    DebandContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    int x, y, p;
+
+    for (p = 0; p < s->nb_components; p++) {
+        const uint16_t *src_ptr = (const uint16_t *)in->data[p];
+        uint16_t *dst_ptr = (uint16_t *)out->data[p];
+        const int dst_linesize = out->linesize[p] / 2;
+        const int src_linesize = in->linesize[p] / 2;
+        const int thr = s->thr[p];
+        const int start = (s->planeheight[p] *  jobnr   ) / nb_jobs;
+        const int end   = (s->planeheight[p] * (jobnr+1)) / nb_jobs;
+        const int w = s->planewidth[p] - 1;
+        const int h = s->planeheight[p] - 1;
+
+        for (y = start; y < end; y++) {
+            const int pos = y * s->planeheight[0];
+
+            for (x = 0; x < s->planewidth[p]; x++) {
+                const int x_pos = s->x_pos[pos + x];
+                const int y_pos = s->y_pos[pos + x];
+                const int ref0 = src_ptr[av_clip(y +  y_pos, 0, h) * src_linesize + av_clip(x +  x_pos, 0, w)];
+                const int ref1 = src_ptr[av_clip(y + -y_pos, 0, h) * src_linesize + av_clip(x +  x_pos, 0, w)];
+                const int ref2 = src_ptr[av_clip(y + -y_pos, 0, h) * src_linesize + av_clip(x + -x_pos, 0, w)];
+                const int ref3 = src_ptr[av_clip(y +  y_pos, 0, h) * src_linesize + av_clip(x + -x_pos, 0, w)];
+                const int src0 = src_ptr[y * src_linesize + x];
+
+                if (s->blur) {
+                    const int avg = get_avg(ref0, ref1, ref2, ref3);
+                    const int diff = FFABS(src0 - avg);
+
+                    dst_ptr[y * dst_linesize + x] = diff < thr ? avg : src0;
+                } else {
+                    dst_ptr[y * dst_linesize + x] = (FFABS(src0 - ref0) < thr) &&
+                                                    (FFABS(src0 - ref1) < thr) &&
+                                                    (FFABS(src0 - ref2) < thr) &&
+                                                    (FFABS(src0 - ref3) < thr) ? get_avg(ref0, ref1, ref2, ref3) : src0;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    AVFilterContext *ctx = inlink->dst;
+    DebandContext *s = ctx->priv;
+    const float direction = s->direction;
+    const int range = s->range;
+    int x, y;
+
+    s->nb_components = desc->nb_components;
+
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0]  = s->planewidth[3]  = inlink->w;
+
+    s->deband = desc->comp[0].depth > 8 ? deband_16_c : deband_8_c;
+
+    s->thr[0] = ((1 << desc->comp[0].depth) - 1) * s->threshold[0];
+    s->thr[1] = ((1 << desc->comp[1].depth) - 1) * s->threshold[1];
+    s->thr[2] = ((1 << desc->comp[2].depth) - 1) * s->threshold[2];
+    s->thr[3] = ((1 << desc->comp[3].depth) - 1) * s->threshold[3];
+
+    s->x_pos = av_malloc(s->planewidth[0] * s->planeheight[0] * sizeof(*s->x_pos));
+    s->y_pos = av_malloc(s->planewidth[0] * s->planeheight[0] * sizeof(*s->y_pos));
+    if (!s->x_pos || !s->y_pos)
+        return AVERROR(ENOMEM);
+
+    for (y = 0; y < s->planeheight[0]; y++) {
+        for (x = 0; x < s->planewidth[0]; x++) {
+            const float r = frand(x, y);
+            const float dir = direction < 0 ? -direction : r * direction;
+            const int dist = range < 0 ? -range : r * range;
+
+            s->x_pos[y * s->planeheight[0] + x] = cosf(dir) * dist;
+            s->y_pos[y * s->planeheight[0] + x] = sinf(dir) * dist;
+        }
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    DebandContext *s = ctx->priv;
+    AVFrame *out;
+    ThreadData td;
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+
+    td.in = in; td.out = out;
+    ctx->internal->execute(ctx, s->deband, &td, NULL, FFMIN3(s->planeheight[1],
+                                                             s->planeheight[2],
+                                                             ctx->graph->nb_threads));
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    DebandContext *s = ctx->priv;
+
+    av_freep(&s->x_pos);
+    av_freep(&s->y_pos);
+}
+
+static const AVFilterPad avfilter_vf_deband_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_vf_deband_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_deband = {
+    .name          = "deband",
+    .description   = NULL_IF_CONFIG_SMALL("Debands video."),
+    .priv_size     = sizeof(DebandContext),
+    .priv_class    = &deband_class,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = avfilter_vf_deband_inputs,
+    .outputs       = avfilter_vf_deband_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/vf_decimate.c b/libavfilter/vf_decimate.c
index 70357ea8..e2c0a32d 100644
--- a/libavfilter/vf_decimate.c
+++ b/libavfilter/vf_decimate.c
@@ -42,7 +42,7 @@ typedef struct {
     AVFrame *last;          ///< last frame from the previous queue
     AVFrame **clean_src;    ///< frame queue for the clean source
     int got_frame[2];       ///< frame request flag for each input stream
-    double ts_unit;         ///< timestamp units for the output frames
+    AVRational ts_unit;     ///< timestamp units for the output frames
     int64_t start_pts;      ///< base for output timestamps
     uint32_t eof;           ///< bitmask for end of stream
     int hsub, vsub;         ///< chroma subsampling values
@@ -71,8 +71,8 @@ static const AVOption decimate_options[] = {
     { "scthresh",  "set scene change threshold", OFFSET(scthresh_flt),  AV_OPT_TYPE_DOUBLE, {.dbl = 15.0}, 0, 100, FLAGS },
     { "blockx",    "set the size of the x-axis blocks used during metric calculations", OFFSET(blockx), AV_OPT_TYPE_INT, {.i64 = 32}, 4, 1<<9, FLAGS },
     { "blocky",    "set the size of the y-axis blocks used during metric calculations", OFFSET(blocky), AV_OPT_TYPE_INT, {.i64 = 32}, 4, 1<<9, FLAGS },
-    { "ppsrc",     "mark main input as a pre-processed input and activate clean source input stream", OFFSET(ppsrc), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
-    { "chroma",    "set whether or not chroma is considered in the metric calculations", OFFSET(chroma), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS },
+    { "ppsrc",     "mark main input as a pre-processed input and activate clean source input stream", OFFSET(ppsrc), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    { "chroma",    "set whether or not chroma is considered in the metric calculations", OFFSET(chroma), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
     { NULL }
 };
 
@@ -93,8 +93,8 @@ static void calc_diffs(const DecimateContext *dm, struct qitem *q,
         const int linesize2 = f2->linesize[plane];
         const uint8_t *f1p = f1->data[plane];
         const uint8_t *f2p = f2->data[plane];
-        int width    = plane ? FF_CEIL_RSHIFT(f1->width,  dm->hsub) : f1->width;
-        int height   = plane ? FF_CEIL_RSHIFT(f1->height, dm->vsub) : f1->height;
+        int width    = plane ? AV_CEIL_RSHIFT(f1->width,  dm->hsub) : f1->width;
+        int height   = plane ? AV_CEIL_RSHIFT(f1->height, dm->vsub) : f1->height;
         int hblockx  = dm->blockx / 2;
         int hblocky  = dm->blocky / 2;
 
@@ -167,9 +167,12 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     if (in) {
         /* update frame metrics */
         prv = dm->fid ? dm->queue[dm->fid - 1].frame : dm->last;
-        if (!prv)
-            prv = in;
-        calc_diffs(dm, &dm->queue[dm->fid], prv, in);
+        if (!prv) {
+            dm->queue[dm->fid].maxbdiff = INT64_MAX;
+            dm->queue[dm->fid].totdiff  = INT64_MAX;
+        } else {
+            calc_diffs(dm, &dm->queue[dm->fid], prv, in);
+        }
         if (++dm->fid != dm->cycle)
             return 0;
         av_frame_free(&dm->last);
@@ -217,7 +220,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                 av_frame_free(&frame);
                 frame = dm->clean_src[i];
             }
-            frame->pts = outlink->frame_count * dm->ts_unit +
+            frame->pts = av_rescale_q(outlink->frame_count, dm->ts_unit, (AVRational){1,1}) +
                          (dm->start_pts == AV_NOPTS_VALUE ? 0 : dm->start_pts);
             ret = ff_filter_frame(outlink, frame);
             if (ret < 0)
@@ -239,7 +242,7 @@ static int config_input(AVFilterLink *inlink)
 
     dm->hsub      = pix_desc->log2_chroma_w;
     dm->vsub      = pix_desc->log2_chroma_h;
-    dm->depth     = pix_desc->comp[0].depth_minus1 + 1;
+    dm->depth     = pix_desc->comp[0].depth;
     max_value     = (1 << dm->depth) - 1;
     dm->scthresh  = (int64_t)(((int64_t)max_value *          w * h          * dm->scthresh_flt)  / 100);
     dm->dupthresh = (int64_t)(((int64_t)max_value * dm->blockx * dm->blocky * dm->dupthresh_flt) / 100);
@@ -362,6 +365,8 @@ static int config_output(AVFilterLink *outlink)
     DecimateContext *dm = ctx->priv;
     const AVFilterLink *inlink =
         ctx->inputs[dm->ppsrc ? INPUT_CLEANSRC : INPUT_MAIN];
+    const AVFilterLink *inlink_main =
+        ctx->inputs[INPUT_MAIN];
     AVRational fps = inlink->frame_rate;
 
     if (!fps.num || !fps.den) {
@@ -369,16 +374,22 @@ static int config_output(AVFilterLink *outlink)
                "current rate of %d/%d is invalid\n", fps.num, fps.den);
         return AVERROR(EINVAL);
     }
+
+    if (inlink->w != inlink_main->w ||
+        inlink->h != inlink_main->h ||
+        inlink->format != inlink_main->format) {
+        av_log(ctx, AV_LOG_ERROR, "frame parameters differ between inputs\n");
+        return AVERROR_PATCHWELCOME;
+    }
     fps = av_mul_q(fps, (AVRational){dm->cycle - 1, dm->cycle});
     av_log(ctx, AV_LOG_VERBOSE, "FPS: %d/%d -> %d/%d\n",
            inlink->frame_rate.num, inlink->frame_rate.den, fps.num, fps.den);
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
     outlink->time_base  = inlink->time_base;
     outlink->frame_rate = fps;
     outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
     outlink->w = inlink->w;
     outlink->h = inlink->h;
-    dm->ts_unit = av_q2d(av_inv_q(av_mul_q(fps, outlink->time_base)));
+    dm->ts_unit = av_inv_q(av_mul_q(fps, outlink->time_base));
     return 0;
 }
 
diff --git a/libavfilter/vf_dejudder.c b/libavfilter/vf_dejudder.c
index ab525b66..c4d7b6bb 100644
--- a/libavfilter/vf_dejudder.c
+++ b/libavfilter/vf_dejudder.c
@@ -80,40 +80,40 @@ AVFILTER_DEFINE_CLASS(dejudder);
 static int config_out_props(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    DejudderContext *dj = ctx->priv;
+    DejudderContext *s = ctx->priv;
     AVFilterLink *inlink = outlink->src->inputs[0];
 
-    outlink->time_base = av_mul_q(inlink->time_base, av_make_q(1, 2 * dj->cycle));
-    outlink->frame_rate = av_mul_q(inlink->frame_rate, av_make_q(2 * dj->cycle, 1));
+    outlink->time_base = av_mul_q(inlink->time_base, av_make_q(1, 2 * s->cycle));
+    outlink->frame_rate = av_mul_q(inlink->frame_rate, av_make_q(2 * s->cycle, 1));
 
-    av_log(ctx, AV_LOG_VERBOSE, "cycle:%d\n", dj->cycle);
+    av_log(ctx, AV_LOG_VERBOSE, "cycle:%d\n", s->cycle);
 
     return 0;
 }
 
 static av_cold int dejudder_init(AVFilterContext *ctx)
 {
-    DejudderContext *dj = ctx->priv;
+    DejudderContext *s = ctx->priv;
 
-    dj->ringbuff = av_mallocz_array(dj->cycle+2, sizeof(*dj->ringbuff));
-    if (!dj->ringbuff)
+    s->ringbuff = av_mallocz_array(s->cycle+2, sizeof(*s->ringbuff));
+    if (!s->ringbuff)
         return AVERROR(ENOMEM);
 
-    dj->new_pts = 0;
-    dj->i1 = 0;
-    dj->i2 = 1;
-    dj->i3 = 2;
-    dj->i4 = 3;
-    dj->start_count = dj->cycle + 2;
+    s->new_pts = 0;
+    s->i1 = 0;
+    s->i2 = 1;
+    s->i3 = 2;
+    s->i4 = 3;
+    s->start_count = s->cycle + 2;
 
     return 0;
 }
 
 static av_cold void dejudder_uninit(AVFilterContext *ctx)
 {
-    DejudderContext *dj = ctx->priv;
+    DejudderContext *s = ctx->priv;
 
-    av_freep(&(dj->ringbuff));
+    av_freep(&(s->ringbuff));
 }
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
@@ -121,36 +121,36 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     int k;
     AVFilterContext *ctx  = inlink->dst;
     AVFilterLink *outlink = ctx->outputs[0];
-    DejudderContext *dj   = ctx->priv;
-    int64_t *judbuff      = dj->ringbuff;
+    DejudderContext *s   = ctx->priv;
+    int64_t *judbuff      = s->ringbuff;
     int64_t next_pts      = frame->pts;
     int64_t offset;
 
     if (next_pts == AV_NOPTS_VALUE)
         return ff_filter_frame(outlink, frame);
 
-    if (dj->start_count) {
-        dj->start_count--;
-        dj->new_pts = next_pts * 2 * dj->cycle;
+    if (s->start_count) {
+        s->start_count--;
+        s->new_pts = next_pts * 2 * s->cycle;
     } else {
-        if (next_pts < judbuff[dj->i2]) {
-            offset = next_pts + judbuff[dj->i3] - judbuff[dj->i4] - judbuff[dj->i1];
-            for (k = 0; k < dj->cycle + 2; k++)
+        if (next_pts < judbuff[s->i2]) {
+            offset = next_pts + judbuff[s->i3] - judbuff[s->i4] - judbuff[s->i1];
+            for (k = 0; k < s->cycle + 2; k++)
                 judbuff[k] += offset;
         }
-        dj->new_pts += (dj->cycle - 1) * (judbuff[dj->i3] - judbuff[dj->i1])
-                    + (dj->cycle + 1) * (next_pts - judbuff[dj->i4]);
+        s->new_pts += (s->cycle - 1) * (judbuff[s->i3] - judbuff[s->i1])
+                    + (s->cycle + 1) * (next_pts - judbuff[s->i4]);
     }
 
-    judbuff[dj->i2] = next_pts;
-    dj->i1 = dj->i2;
-    dj->i2 = dj->i3;
-    dj->i3 = dj->i4;
-    dj->i4 = (dj->i4 + 1) % (dj->cycle + 2);
+    judbuff[s->i2] = next_pts;
+    s->i1 = s->i2;
+    s->i2 = s->i3;
+    s->i3 = s->i4;
+    s->i4 = (s->i4 + 1) % (s->cycle + 2);
 
-    frame->pts = dj->new_pts;
+    frame->pts = s->new_pts;
 
-    for (k = 0; k < dj->cycle + 2; k++)
+    for (k = 0; k < s->cycle + 2; k++)
         av_log(ctx, AV_LOG_DEBUG, "%"PRId64"\t", judbuff[k]);
     av_log(ctx, AV_LOG_DEBUG, "next=%"PRId64", new=%"PRId64"\n", next_pts, frame->pts);
 
diff --git a/libavfilter/vf_delogo.c b/libavfilter/vf_delogo.c
index 50a548b8..63c35394 100644
--- a/libavfilter/vf_delogo.c
+++ b/libavfilter/vf_delogo.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2002 Jindrich Makovicka <makovick@gmail.com>
  * Copyright (c) 2011 Stefano Sabatini
- * Copyright (c) 2013 Jean Delvare <khali@linux-fr.org>
+ * Copyright (c) 2013, 2015 Jean Delvare <jdelvare@suse.com>
  *
  * This file is part of FFmpeg.
  *
@@ -61,7 +61,7 @@ static void apply_delogo(uint8_t *dst, int dst_linesize,
                          unsigned int band, int show, int direct)
 {
     int x, y;
-    uint64_t interp, weightl, weightr, weightt, weightb;
+    uint64_t interp, weightl, weightr, weightt, weightb, weight;
     uint8_t *xdst, *xsrc;
 
     uint8_t *topleft, *botleft, *topright;
@@ -75,13 +75,13 @@ static void apply_delogo(uint8_t *dst, int dst_linesize,
     yclipb = FFMAX(logo_y+logo_h-h, 0);
 
     logo_x1 = logo_x + xclipl;
-    logo_x2 = logo_x + logo_w - xclipr;
+    logo_x2 = logo_x + logo_w - xclipr - 1;
     logo_y1 = logo_y + yclipt;
-    logo_y2 = logo_y + logo_h - yclipb;
+    logo_y2 = logo_y + logo_h - yclipb - 1;
 
-    topleft  = src+logo_y1     * src_linesize+logo_x1;
-    topright = src+logo_y1     * src_linesize+logo_x2-1;
-    botleft  = src+(logo_y2-1) * src_linesize+logo_x1;
+    topleft  = src+logo_y1 * src_linesize+logo_x1;
+    topright = src+logo_y1 * src_linesize+logo_x2;
+    botleft  = src+logo_y2 * src_linesize+logo_x1;
 
     if (!direct)
         av_image_copy_plane(dst, dst_linesize, src, src_linesize, w, h);
@@ -89,7 +89,7 @@ static void apply_delogo(uint8_t *dst, int dst_linesize,
     dst += (logo_y1 + 1) * dst_linesize;
     src += (logo_y1 + 1) * src_linesize;
 
-    for (y = logo_y1+1; y < logo_y2-1; y++) {
+    for (y = logo_y1+1; y < logo_y2; y++) {
         left_sample = topleft[src_linesize*(y-logo_y1)]   +
                       topleft[src_linesize*(y-logo_y1-1)] +
                       topleft[src_linesize*(y-logo_y1+1)];
@@ -99,13 +99,19 @@ static void apply_delogo(uint8_t *dst, int dst_linesize,
 
         for (x = logo_x1+1,
              xdst = dst+logo_x1+1,
-             xsrc = src+logo_x1+1; x < logo_x2-1; x++, xdst++, xsrc++) {
+             xsrc = src+logo_x1+1; x < logo_x2; x++, xdst++, xsrc++) {
+
+            if (show && (y == logo_y1+1 || y == logo_y2-1 ||
+                         x == logo_x1+1 || x == logo_x2-1)) {
+                *xdst = 0;
+                continue;
+            }
 
             /* Weighted interpolation based on relative distances, taking SAR into account */
-            weightl = (uint64_t)              (logo_x2-1-x) * (y-logo_y1) * (logo_y2-1-y) * sar.den;
-            weightr = (uint64_t)(x-logo_x1)                 * (y-logo_y1) * (logo_y2-1-y) * sar.den;
-            weightt = (uint64_t)(x-logo_x1) * (logo_x2-1-x)               * (logo_y2-1-y) * sar.num;
-            weightb = (uint64_t)(x-logo_x1) * (logo_x2-1-x) * (y-logo_y1)                 * sar.num;
+            weightl = (uint64_t)              (logo_x2-x) * (y-logo_y1) * (logo_y2-y) * sar.den;
+            weightr = (uint64_t)(x-logo_x1)               * (y-logo_y1) * (logo_y2-y) * sar.den;
+            weightt = (uint64_t)(x-logo_x1) * (logo_x2-x)               * (logo_y2-y) * sar.num;
+            weightb = (uint64_t)(x-logo_x1) * (logo_x2-x) * (y-logo_y1)               * sar.num;
 
             interp =
                 left_sample * weightl
@@ -119,7 +125,8 @@ static void apply_delogo(uint8_t *dst, int dst_linesize,
                 (botleft[x-logo_x1]    +
                  botleft[x-logo_x1-1]  +
                  botleft[x-logo_x1+1]) * weightb;
-            interp /= (weightl + weightr + weightt + weightb) * 3U;
+            weight = (weightl + weightr + weightt + weightb) * 3U;
+            interp = ROUNDED_DIV(interp, weight);
 
             if (y >= logo_y+band && y < logo_y+logo_h-band &&
                 x >= logo_x+band && x < logo_x+logo_w-band) {
@@ -138,8 +145,6 @@ static void apply_delogo(uint8_t *dst, int dst_linesize,
                     dist = FFMAX(dist, y-(logo_y+logo_h-1-band));
 
                 *xdst = (*xsrc*dist + interp*(band-dist))/band;
-                if (show && (dist == band-1))
-                    *xdst = 0;
             }
         }
 
@@ -161,9 +166,12 @@ static const AVOption delogo_options[]= {
     { "y",    "set logo y position",       OFFSET(y),    AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS },
     { "w",    "set logo width",            OFFSET(w),    AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS },
     { "h",    "set logo height",           OFFSET(h),    AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS },
-    { "band", "set delogo area band size", OFFSET(band), AV_OPT_TYPE_INT, { .i64 =  4 },  1, INT_MAX, FLAGS },
-    { "t",    "set delogo area band size", OFFSET(band), AV_OPT_TYPE_INT, { .i64 =  4 },  1, INT_MAX, FLAGS },
-    { "show", "show delogo area",          OFFSET(show), AV_OPT_TYPE_INT, { .i64 =  0 },  0, 1,       FLAGS },
+#if LIBAVFILTER_VERSION_MAJOR < 7
+    /* Actual default value for band/t is 1, set in init */
+    { "band", "set delogo area band size", OFFSET(band), AV_OPT_TYPE_INT, { .i64 =  0 },  0, INT_MAX, FLAGS },
+    { "t",    "set delogo area band size", OFFSET(band), AV_OPT_TYPE_INT, { .i64 =  0 },  0, INT_MAX, FLAGS },
+#endif
+    { "show", "show delogo area",          OFFSET(show), AV_OPT_TYPE_BOOL,{ .i64 =  0 },  0, 1,       FLAGS },
     { NULL }
 };
 
@@ -197,6 +205,16 @@ static av_cold int init(AVFilterContext *ctx)
     CHECK_UNSET_OPT(w);
     CHECK_UNSET_OPT(h);
 
+#if LIBAVFILTER_VERSION_MAJOR < 7
+    if (s->band == 0) { /* Unset, use default */
+        av_log(ctx, AV_LOG_WARNING, "Note: default band value was changed from 4 to 1.\n");
+        s->band = 1;
+    } else if (s->band != 1) {
+        av_log(ctx, AV_LOG_WARNING, "Option band is deprecated.\n");
+    }
+#else
+    s->band = 1;
+#endif
     av_log(ctx, AV_LOG_VERBOSE, "x:%d y:%d, w:%d h:%d band:%d show:%d\n",
            s->x, s->y, s->w, s->h, s->band, s->show);
 
@@ -238,19 +256,19 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     if (!sar.num)
         sar.num = sar.den = 1;
 
-    for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; plane++) {
+    for (plane = 0; plane < desc->nb_components; plane++) {
         int hsub = plane == 1 || plane == 2 ? hsub0 : 0;
         int vsub = plane == 1 || plane == 2 ? vsub0 : 0;
 
         apply_delogo(out->data[plane], out->linesize[plane],
                      in ->data[plane], in ->linesize[plane],
-                     FF_CEIL_RSHIFT(inlink->w, hsub),
-                     FF_CEIL_RSHIFT(inlink->h, vsub),
+                     AV_CEIL_RSHIFT(inlink->w, hsub),
+                     AV_CEIL_RSHIFT(inlink->h, vsub),
                      sar, s->x>>hsub, s->y>>vsub,
                      /* Up and left borders were rounded down, inject lost bits
                       * into width and height to avoid error accumulation */
-                     FF_CEIL_RSHIFT(s->w + (s->x & ((1<<hsub)-1)), hsub),
-                     FF_CEIL_RSHIFT(s->h + (s->y & ((1<<vsub)-1)), vsub),
+                     AV_CEIL_RSHIFT(s->w + (s->x & ((1<<hsub)-1)), hsub),
+                     AV_CEIL_RSHIFT(s->h + (s->y & ((1<<vsub)-1)), vsub),
                      s->band>>FFMIN(hsub, vsub),
                      s->show, direct);
     }
diff --git a/libavfilter/vf_deshake.c b/libavfilter/vf_deshake.c
index cd06ee7d..4eae9885 100644
--- a/libavfilter/vf_deshake.c
+++ b/libavfilter/vf_deshake.c
@@ -57,13 +57,11 @@
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/qsort.h"
 
 #include "deshake.h"
 #include "deshake_opencl.h"
 
-#define CHROMA_WIDTH(link)  (-((-(link)->w) >> av_pix_fmt_desc_get((link)->format)->log2_chroma_w))
-#define CHROMA_HEIGHT(link) (-((-(link)->h) >> av_pix_fmt_desc_get((link)->format)->log2_chroma_h))
-
 #define OFFSET(x) offsetof(DeshakeContext, x)
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 
@@ -85,15 +83,15 @@ static const AVOption deshake_options[] = {
         { "exhaustive", "exhaustive search",      0, AV_OPT_TYPE_CONST, {.i64=EXHAUSTIVE},       INT_MIN, INT_MAX, FLAGS, "smode" },
         { "less",       "less exhaustive search", 0, AV_OPT_TYPE_CONST, {.i64=SMART_EXHAUSTIVE}, INT_MIN, INT_MAX, FLAGS, "smode" },
     { "filename", "set motion search detailed log file name", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
-    { "opencl", "use OpenCL filtering capabilities", OFFSET(opencl), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, .flags = FLAGS },
+    { "opencl", "use OpenCL filtering capabilities", OFFSET(opencl), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, .flags = FLAGS },
     { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(deshake);
 
-static int cmp(const double *a, const double *b)
+static int cmp(const void *a, const void *b)
 {
-    return *a < *b ? -1 : ( *a > *b ? 1 : 0 );
+    return FFDIFFSIGN(*(const double *)a, *(const double *)b);
 }
 
 /**
@@ -105,7 +103,7 @@ static double clean_mean(double *values, int count)
     int cut = count / 5;
     int x;
 
-    qsort(values, count, sizeof(double), (void*)cmp);
+    AV_QSORT(values, count, double, cmp);
 
     for (x = cut; x < count - cut; x++) {
         mean += values[x];
@@ -438,6 +436,9 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     float alpha = 2.0 / deshake->refcount;
     char tmp[256];
     int ret = 0;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
+    const int chroma_width  = AV_CEIL_RSHIFT(link->w, desc->log2_chroma_w);
+    const int chroma_height = AV_CEIL_RSHIFT(link->h, desc->log2_chroma_h);
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {
@@ -526,9 +527,9 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     // Generate a luma transformation matrix
     avfilter_get_matrix(t.vec.x, t.vec.y, t.angle, 1.0 + t.zoom / 100.0, matrix_y);
     // Generate a chroma transformation matrix
-    avfilter_get_matrix(t.vec.x / (link->w / CHROMA_WIDTH(link)), t.vec.y / (link->h / CHROMA_HEIGHT(link)), t.angle, 1.0 + t.zoom / 100.0, matrix_uv);
+    avfilter_get_matrix(t.vec.x / (link->w / chroma_width), t.vec.y / (link->h / chroma_height), t.angle, 1.0 + t.zoom / 100.0, matrix_uv);
     // Transform the luma and chroma planes
-    ret = deshake->transform(link->dst, link->w, link->h, CHROMA_WIDTH(link), CHROMA_HEIGHT(link),
+    ret = deshake->transform(link->dst, link->w, link->h, chroma_width, chroma_height,
                              matrix_y, matrix_uv, INTERPOLATE_BILINEAR, deshake->edge, in, out);
 
     // Cleanup the old reference frame
diff --git a/libavfilter/vf_detelecine.c b/libavfilter/vf_detelecine.c
index 44379a3e..6fd9aade 100644
--- a/libavfilter/vf_detelecine.c
+++ b/libavfilter/vf_detelecine.c
@@ -116,14 +116,15 @@ static av_cold int init(AVFilterContext *ctx)
 static int query_formats(AVFilterContext *ctx)
 {
     AVFilterFormats *pix_fmts = NULL;
-    int fmt;
+    int fmt, ret;
 
     for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
         if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL ||
               desc->flags & AV_PIX_FMT_FLAG_PAL     ||
-              desc->flags & AV_PIX_FMT_FLAG_BITSTREAM))
-            ff_add_format(&pix_fmts, fmt);
+              desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) &&
+             (ret = ff_add_format(&pix_fmts, fmt)) < 0)
+            return ret;
     }
 
     return ff_set_common_formats(ctx, pix_fmts);
@@ -146,7 +147,7 @@ static int config_input(AVFilterLink *inlink)
     if ((ret = av_image_fill_linesizes(s->stride, inlink->format, inlink->w)) < 0)
         return ret;
 
-    s->planeheight[1] = s->planeheight[2] = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
     s->planeheight[0] = s->planeheight[3] = inlink->h;
 
     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
@@ -170,7 +171,6 @@ static int config_output(AVFilterLink *outlink)
     av_log(ctx, AV_LOG_VERBOSE, "FPS: %d/%d -> %d/%d\n",
            inlink->frame_rate.num, inlink->frame_rate.den, fps.num, fps.den);
 
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
     outlink->frame_rate = fps;
     outlink->time_base = av_mul_q(inlink->time_base, s->pts);
     av_log(ctx, AV_LOG_VERBOSE, "TB: %d/%d -> %d/%d\n",
diff --git a/libavfilter/vf_displace.c b/libavfilter/vf_displace.c
new file mode 100644
index 00000000..9daa0c9d
--- /dev/null
+++ b/libavfilter/vf_displace.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright (c) 2013 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "framesync.h"
+#include "internal.h"
+#include "video.h"
+
+enum EdgeMode {
+    EDGE_BLANK,
+    EDGE_SMEAR,
+    EDGE_WRAP,
+    EDGE_NB
+};
+
+typedef struct DisplaceContext {
+    const AVClass *class;
+    int width[4], height[4];
+    enum EdgeMode edge;
+    int nb_planes;
+    int nb_components;
+    int step;
+    uint8_t blank[4];
+    FFFrameSync fs;
+
+    void (*displace)(struct DisplaceContext *s, const AVFrame *in,
+                     const AVFrame *xpic, const AVFrame *ypic, AVFrame *out);
+} DisplaceContext;
+
+#define OFFSET(x) offsetof(DisplaceContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption displace_options[] = {
+    { "edge", "set edge mode", OFFSET(edge), AV_OPT_TYPE_INT, {.i64=EDGE_SMEAR}, 0, EDGE_NB-1, FLAGS, "edge" },
+    {   "blank", "", 0, AV_OPT_TYPE_CONST, {.i64=EDGE_BLANK}, 0, 0, FLAGS, "edge" },
+    {   "smear", "", 0, AV_OPT_TYPE_CONST, {.i64=EDGE_SMEAR}, 0, 0, FLAGS, "edge" },
+    {   "wrap" , "", 0, AV_OPT_TYPE_CONST, {.i64=EDGE_WRAP},  0, 0, FLAGS, "edge" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(displace);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
+        AV_PIX_FMT_ARGB, AV_PIX_FMT_ABGR, AV_PIX_FMT_RGBA, AV_PIX_FMT_BGRA,
+        AV_PIX_FMT_0RGB, AV_PIX_FMT_0BGR, AV_PIX_FMT_RGB0, AV_PIX_FMT_BGR0,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE
+    };
+
+    return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+}
+
+static void displace_planar(DisplaceContext *s, const AVFrame *in,
+                            const AVFrame *xpic, const AVFrame *ypic,
+                            AVFrame *out)
+{
+    int plane, x, y;
+
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        const int h = s->height[plane];
+        const int w = s->width[plane];
+        const int dlinesize = out->linesize[plane];
+        const int slinesize = in->linesize[plane];
+        const int xlinesize = xpic->linesize[plane];
+        const int ylinesize = ypic->linesize[plane];
+        const uint8_t *src = in->data[plane];
+        const uint8_t *ysrc = ypic->data[plane];
+        const uint8_t *xsrc = xpic->data[plane];
+        uint8_t *dst = out->data[plane];
+        const uint8_t blank = s->blank[plane];
+
+        for (y = 0; y < h; y++) {
+            switch (s->edge) {
+            case EDGE_BLANK:
+                for (x = 0; x < w; x++) {
+                    int Y = y + ysrc[x] - 128;
+                    int X = x + xsrc[x] - 128;
+
+                    if (Y < 0 || Y >= h || X < 0 || X >= w)
+                        dst[x] = blank;
+                    else
+                        dst[x] = src[Y * slinesize + X];
+                }
+                break;
+            case EDGE_SMEAR:
+                for (x = 0; x < w; x++) {
+                    int Y = av_clip(y + ysrc[x] - 128, 0, h - 1);
+                    int X = av_clip(x + xsrc[x] - 128, 0, w - 1);
+                    dst[x] = src[Y * slinesize + X];
+                }
+                break;
+            case EDGE_WRAP:
+                for (x = 0; x < w; x++) {
+                    int Y = (y + ysrc[x] - 128) % h;
+                    int X = (x + xsrc[x] - 128) % w;
+
+                    if (Y < 0)
+                        Y += h;
+                    if (X < 0)
+                        X += w;
+                    dst[x] = src[Y * slinesize + X];
+                }
+                break;
+            }
+
+            ysrc += ylinesize;
+            xsrc += xlinesize;
+            dst  += dlinesize;
+        }
+    }
+}
+
+static void displace_packed(DisplaceContext *s, const AVFrame *in,
+                            const AVFrame *xpic, const AVFrame *ypic,
+                            AVFrame *out)
+{
+    const int step = s->step;
+    const int h = s->height[0];
+    const int w = s->width[0];
+    const int dlinesize = out->linesize[0];
+    const int slinesize = in->linesize[0];
+    const int xlinesize = xpic->linesize[0];
+    const int ylinesize = ypic->linesize[0];
+    const uint8_t *src = in->data[0];
+    const uint8_t *ysrc = ypic->data[0];
+    const uint8_t *xsrc = xpic->data[0];
+    const uint8_t *blank = s->blank;
+    uint8_t *dst = out->data[0];
+    int c, x, y;
+
+    for (y = 0; y < h; y++) {
+        switch (s->edge) {
+        case EDGE_BLANK:
+            for (x = 0; x < w; x++) {
+                for (c = 0; c < s->nb_components; c++) {
+                    int Y = y + (ysrc[x * step + c] - 128);
+                    int X = x + (xsrc[x * step + c] - 128);
+
+                    if (Y < 0 || Y >= h || X < 0 || X >= w)
+                        dst[x * step + c] = blank[c];
+                    else
+                        dst[x * step + c] = src[Y * slinesize + X * step + c];
+                }
+            }
+            break;
+        case EDGE_SMEAR:
+            for (x = 0; x < w; x++) {
+                for (c = 0; c < s->nb_components; c++) {
+                    int Y = av_clip(y + (ysrc[x * step + c] - 128), 0, h - 1);
+                    int X = av_clip(x + (xsrc[x * step + c] - 128), 0, w - 1);
+
+                    dst[x * step + c] = src[Y * slinesize + X * step + c];
+                }
+            }
+            break;
+        case EDGE_WRAP:
+            for (x = 0; x < w; x++) {
+                for (c = 0; c < s->nb_components; c++) {
+                    int Y = (y + (ysrc[x * step + c] - 128)) % h;
+                    int X = (x + (xsrc[x * step + c] - 128)) % w;
+
+                    if (Y < 0)
+                        Y += h;
+                    if (X < 0)
+                        X += w;
+                    dst[x * step + c] = src[Y * slinesize + X * step + c];
+                }
+            }
+            break;
+        }
+
+        ysrc += ylinesize;
+        xsrc += xlinesize;
+        dst  += dlinesize;
+    }
+}
+
+static int process_frame(FFFrameSync *fs)
+{
+    AVFilterContext *ctx = fs->parent;
+    DisplaceContext *s = fs->opaque;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out, *in, *xpic, *ypic;
+    int ret;
+
+    if ((ret = ff_framesync_get_frame(&s->fs, 0, &in,   0)) < 0 ||
+        (ret = ff_framesync_get_frame(&s->fs, 1, &xpic, 0)) < 0 ||
+        (ret = ff_framesync_get_frame(&s->fs, 2, &ypic, 0)) < 0)
+        return ret;
+
+    if (ctx->is_disabled) {
+        out = av_frame_clone(in);
+        if (!out)
+            return AVERROR(ENOMEM);
+    } else {
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out)
+            return AVERROR(ENOMEM);
+        av_frame_copy_props(out, in);
+
+        s->displace(s, in, xpic, ypic, out);
+    }
+    out->pts = av_rescale_q(in->pts, s->fs.time_base, outlink->time_base);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    DisplaceContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int vsub, hsub;
+
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+    s->nb_components = desc->nb_components;
+
+    if (s->nb_planes > 1 || s->nb_components == 1)
+        s->displace = displace_planar;
+    else
+        s->displace = displace_packed;
+
+    if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+        s->blank[1] = s->blank[2] = 128;
+        s->blank[0] = 16;
+    }
+
+    s->step = av_get_padded_bits_per_pixel(desc) >> 3;
+    hsub = desc->log2_chroma_w;
+    vsub = desc->log2_chroma_h;
+    s->height[1] = s->height[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
+    s->height[0] = s->height[3] = inlink->h;
+    s->width[1]  = s->width[2]  = AV_CEIL_RSHIFT(inlink->w, hsub);
+    s->width[0]  = s->width[3]  = inlink->w;
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    DisplaceContext *s = ctx->priv;
+    AVFilterLink *srclink = ctx->inputs[0];
+    AVFilterLink *xlink = ctx->inputs[1];
+    AVFilterLink *ylink = ctx->inputs[2];
+    FFFrameSyncIn *in;
+    int ret;
+
+    if (srclink->format != xlink->format ||
+        srclink->format != ylink->format) {
+        av_log(ctx, AV_LOG_ERROR, "inputs must be of same pixel format\n");
+        return AVERROR(EINVAL);
+    }
+    if (srclink->w                       != xlink->w ||
+        srclink->h                       != xlink->h ||
+        srclink->sample_aspect_ratio.num != xlink->sample_aspect_ratio.num ||
+        srclink->sample_aspect_ratio.den != xlink->sample_aspect_ratio.den ||
+        srclink->w                       != ylink->w ||
+        srclink->h                       != ylink->h ||
+        srclink->sample_aspect_ratio.num != ylink->sample_aspect_ratio.num ||
+        srclink->sample_aspect_ratio.den != ylink->sample_aspect_ratio.den) {
+        av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
+               "(size %dx%d, SAR %d:%d) do not match the corresponding "
+               "second input link %s parameters (%dx%d, SAR %d:%d) "
+               "and/or third input link %s parameters (%dx%d, SAR %d:%d)\n",
+               ctx->input_pads[0].name, srclink->w, srclink->h,
+               srclink->sample_aspect_ratio.num,
+               srclink->sample_aspect_ratio.den,
+               ctx->input_pads[1].name, xlink->w, xlink->h,
+               xlink->sample_aspect_ratio.num,
+               xlink->sample_aspect_ratio.den,
+               ctx->input_pads[2].name, ylink->w, ylink->h,
+               ylink->sample_aspect_ratio.num,
+               ylink->sample_aspect_ratio.den);
+        return AVERROR(EINVAL);
+    }
+
+    outlink->w = srclink->w;
+    outlink->h = srclink->h;
+    outlink->time_base = srclink->time_base;
+    outlink->sample_aspect_ratio = srclink->sample_aspect_ratio;
+    outlink->frame_rate = srclink->frame_rate;
+
+    ret = ff_framesync_init(&s->fs, ctx, 3);
+    if (ret < 0)
+        return ret;
+
+    in = s->fs.in;
+    in[0].time_base = srclink->time_base;
+    in[1].time_base = xlink->time_base;
+    in[2].time_base = ylink->time_base;
+    in[0].sync   = 2;
+    in[0].before = EXT_STOP;
+    in[0].after  = EXT_STOP;
+    in[1].sync   = 1;
+    in[1].before = EXT_NULL;
+    in[1].after  = EXT_INFINITY;
+    in[2].sync   = 1;
+    in[2].before = EXT_NULL;
+    in[2].after  = EXT_INFINITY;
+    s->fs.opaque   = s;
+    s->fs.on_event = process_frame;
+
+    return ff_framesync_configure(&s->fs);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
+{
+    DisplaceContext *s = inlink->dst->priv;
+    return ff_framesync_filter_frame(&s->fs, inlink, buf);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    DisplaceContext *s = outlink->src->priv;
+    return ff_framesync_request_frame(&s->fs, outlink);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    DisplaceContext *s = ctx->priv;
+
+    ff_framesync_uninit(&s->fs);
+}
+
+static const AVFilterPad displace_inputs[] = {
+    {
+        .name         = "source",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    {
+        .name         = "xmap",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    {
+        .name         = "ymap",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad displace_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_displace = {
+    .name          = "displace",
+    .description   = NULL_IF_CONFIG_SMALL("Displace pixels."),
+    .priv_size     = sizeof(DisplaceContext),
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = displace_inputs,
+    .outputs       = displace_outputs,
+    .priv_class    = &displace_class,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+};
diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c
index 16e3383d..65c2c58b 100644
--- a/libavfilter/vf_drawtext.c
+++ b/libavfilter/vf_drawtext.c
@@ -207,13 +207,13 @@ static const AVOption drawtext_options[]= {
     {"boxcolor",    "set box color",        OFFSET(boxcolor.rgba),      AV_OPT_TYPE_COLOR,  {.str="white"}, CHAR_MIN, CHAR_MAX, FLAGS},
     {"bordercolor", "set border color",     OFFSET(bordercolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, CHAR_MIN, CHAR_MAX, FLAGS},
     {"shadowcolor", "set shadow color",     OFFSET(shadowcolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, CHAR_MIN, CHAR_MAX, FLAGS},
-    {"box",         "set box",              OFFSET(draw_box),           AV_OPT_TYPE_INT,    {.i64=0},     0,        1       , FLAGS},
+    {"box",         "set box",              OFFSET(draw_box),           AV_OPT_TYPE_BOOL,   {.i64=0},     0,        1       , FLAGS},
     {"boxborderw",  "set box border width", OFFSET(boxborderw),         AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
     {"fontsize",    "set font size",        OFFSET(fontsize),           AV_OPT_TYPE_INT,    {.i64=0},     0,        INT_MAX , FLAGS},
     {"x",           "set x expression",     OFFSET(x_expr),             AV_OPT_TYPE_STRING, {.str="0"},   CHAR_MIN, CHAR_MAX, FLAGS},
     {"y",           "set y expression",     OFFSET(y_expr),             AV_OPT_TYPE_STRING, {.str="0"},   CHAR_MIN, CHAR_MAX, FLAGS},
-    {"shadowx",     "set x",                OFFSET(shadowx),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
-    {"shadowy",     "set y",                OFFSET(shadowy),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
+    {"shadowx",     "set shadow x offset",  OFFSET(shadowx),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
+    {"shadowy",     "set shadow y offset",  OFFSET(shadowy),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
     {"borderw",     "set border width",     OFFSET(borderw),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS},
     {"tabsize",     "set tab size",         OFFSET(tabsize),            AV_OPT_TYPE_INT,    {.i64=4},     0,        INT_MAX , FLAGS},
     {"basetime",    "set base time",        OFFSET(basetime),           AV_OPT_TYPE_INT64,  {.i64=AV_NOPTS_VALUE}, INT64_MIN, INT64_MAX , FLAGS},
@@ -227,17 +227,17 @@ static const AVOption drawtext_options[]= {
         {"strftime", "set strftime expansion (deprecated)", OFFSET(exp_mode), AV_OPT_TYPE_CONST, {.i64=EXP_STRFTIME}, 0, 0, FLAGS, "expansion"},
 
     {"timecode",        "set initial timecode",             OFFSET(tc_opt_string), AV_OPT_TYPE_STRING,   {.str=NULL}, CHAR_MIN, CHAR_MAX, FLAGS},
-    {"tc24hmax",        "set 24 hours max (timecode only)", OFFSET(tc24hmax),      AV_OPT_TYPE_INT,      {.i64=0},           0,        1, FLAGS},
+    {"tc24hmax",        "set 24 hours max (timecode only)", OFFSET(tc24hmax),      AV_OPT_TYPE_BOOL,     {.i64=0},           0,        1, FLAGS},
     {"timecode_rate",   "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},           0,  INT_MAX, FLAGS},
     {"r",               "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},           0,  INT_MAX, FLAGS},
     {"rate",            "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},           0,  INT_MAX, FLAGS},
-    {"reload",     "reload text file for each frame",                       OFFSET(reload),     AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS},
+    {"reload",     "reload text file for each frame",                       OFFSET(reload),     AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
     { "alpha",       "apply alpha while rendering", OFFSET(a_expr),      AV_OPT_TYPE_STRING, { .str = "1"     },          .flags = FLAGS },
-    {"fix_bounds", "if true, check and fix text coords to avoid clipping",  OFFSET(fix_bounds), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS},
+    {"fix_bounds", "check and fix text coords to avoid clipping", OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS},
     {"start_number", "start frame number for n/frame_num variable", OFFSET(start_number), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS},
 
 #if CONFIG_LIBFRIBIDI
-    {"text_shaping", "attempt to shape text before drawing", OFFSET(text_shaping), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS},
+    {"text_shaping", "attempt to shape text before drawing", OFFSET(text_shaping), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS},
 #endif
 
     /* FT_LOAD_* flags */
@@ -288,7 +288,7 @@ typedef struct Glyph {
     int bitmap_top;
 } Glyph;
 
-static int glyph_cmp(void *key, const void *b)
+static int glyph_cmp(const void *key, const void *b)
 {
     const Glyph *a = key, *bb = b;
     int64_t diff = (int64_t)a->code - (int64_t)bb->code;
@@ -372,8 +372,10 @@ static int load_font_file(AVFilterContext *ctx, const char *path, int index)
 
     err = FT_New_Face(s->library, path, index, &s->face);
     if (err) {
+#if !CONFIG_LIBFONTCONFIG
         av_log(ctx, AV_LOG_ERROR, "Could not load font \"%s\": %s\n",
                s->fontfile, FT_ERRMSG(err));
+#endif
         return AVERROR(EINVAL);
     }
     return 0;
@@ -610,12 +612,6 @@ static av_cold int init(AVFilterContext *ctx)
             return err;
     }
 
-#if CONFIG_LIBFRIBIDI
-    if (s->text_shaping)
-        if ((err = shape_text(ctx)) < 0)
-            return err;
-#endif
-
     if (s->reload && !s->textfile)
         av_log(ctx, AV_LOG_WARNING, "No file to reload\n");
 
@@ -636,6 +632,12 @@ static av_cold int init(AVFilterContext *ctx)
         return AVERROR(EINVAL);
     }
 
+#if CONFIG_LIBFRIBIDI
+    if (s->text_shaping)
+        if ((err = shape_text(ctx)) < 0)
+            return err;
+#endif
+
     if ((err = FT_Init_FreeType(&(s->library)))) {
         av_log(ctx, AV_LOG_ERROR,
                "Could not load FreeType: %s\n", FT_ERRMSG(err));
@@ -812,7 +814,7 @@ static int func_pts(AVFilterContext *ctx, AVBPrint *bp,
         if (isnan(pts)) {
             av_bprintf(bp, " ??:??:??.???");
         } else {
-            int64_t ms = round(pts * 1000);
+            int64_t ms = llrint(pts * 1000);
             char sign = ' ';
             if (ms < 0) {
                 sign = '-';
@@ -822,8 +824,18 @@ static int func_pts(AVFilterContext *ctx, AVBPrint *bp,
                        (int)(ms / (60 * 60 * 1000)),
                        (int)(ms / (60 * 1000)) % 60,
                        (int)(ms / 1000) % 60,
-                       (int)ms % 1000);
+                       (int)(ms % 1000));
         }
+    } else if (!strcmp(fmt, "localtime") ||
+               !strcmp(fmt, "gmtime")) {
+        struct tm tm;
+        time_t ms = (time_t)pts;
+        const char *timefmt = argc >= 3 ? argv[2] : "%Y-%m-%d %H:%M:%S";
+        if (!strcmp(fmt, "localtime"))
+            localtime_r(&ms, &tm);
+        else
+            gmtime_r(&ms, &tm);
+        av_bprint_strftime(bp, timefmt, &tm);
     } else {
         av_log(ctx, AV_LOG_ERROR, "Invalid format '%s'\n", fmt);
         return AVERROR(EINVAL);
@@ -958,7 +970,7 @@ static const struct drawtext_function {
     { "expr_int_format", 2, 3, 0, func_eval_expr_int_format },
     { "eif",       2, 3, 0,   func_eval_expr_int_format },
     { "pict_type", 0, 0, 0,   func_pict_type },
-    { "pts",       0, 2, 0,   func_pts      },
+    { "pts",       0, 3, 0,   func_pts      },
     { "gmtime",    0, 1, 'G', func_strftime },
     { "localtime", 0, 1, 'L', func_strftime },
     { "frame_num", 0, 0, 0,   func_frame_num },
@@ -1077,7 +1089,7 @@ static int draw_glyphs(DrawTextContext *s, AVFrame *frame,
             continue;
 
         dummy.code = code;
-        glyph = av_tree_find(s->glyphs, &dummy, (void *)glyph_cmp, NULL);
+        glyph = av_tree_find(s->glyphs, &dummy, glyph_cmp, NULL);
 
         bitmap = borderw ? glyph->border_bitmap : glyph->bitmap;
 
@@ -1210,7 +1222,9 @@ static int draw_text(AVFilterContext *ctx, AVFrame *frame,
         dummy.code = code;
         glyph = av_tree_find(s->glyphs, &dummy, glyph_cmp, NULL);
         if (!glyph) {
-            load_glyph(ctx, &glyph, code);
+            ret = load_glyph(ctx, &glyph, code);
+            if (ret < 0)
+                return ret;
         }
 
         y_min = FFMIN(glyph->bbox.yMin, y_min);
diff --git a/libavfilter/vf_elbg.c b/libavfilter/vf_elbg.c
index 86560708..b34136bd 100644
--- a/libavfilter/vf_elbg.c
+++ b/libavfilter/vf_elbg.c
@@ -33,7 +33,7 @@
 #include "internal.h"
 #include "video.h"
 
-typedef struct ColorContext {
+typedef struct ELBGContext {
     const AVClass *class;
     AVLFG lfg;
     unsigned int lfg_seed;
@@ -45,6 +45,7 @@ typedef struct ColorContext {
     int codebook_length;
     const AVPixFmtDescriptor *pix_desc;
     uint8_t rgba_map[4];
+    int pal8;
 } ELBGContext;
 
 #define OFFSET(x) offsetof(ELBGContext, x)
@@ -57,6 +58,7 @@ static const AVOption elbg_options[] = {
     { "n",        "set max number of steps used to compute the mapping", OFFSET(max_steps_nb), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, INT_MAX, FLAGS },
     { "seed", "set the random seed", OFFSET(lfg_seed), AV_OPT_TYPE_INT, {.i64 = -1}, -1, UINT32_MAX, FLAGS },
     { "s",    "set the random seed", OFFSET(lfg_seed), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, UINT32_MAX, FLAGS },
+    { "pal8", "set the pal8 output", OFFSET(pal8), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
     { NULL }
 };
 
@@ -66,6 +68,11 @@ static av_cold int init(AVFilterContext *ctx)
 {
     ELBGContext *elbg = ctx->priv;
 
+    if (elbg->pal8 && elbg->codebook_length > 256) {
+        av_log(ctx, AV_LOG_ERROR, "pal8 output allows max 256 codebook length.\n");
+        return AVERROR(EINVAL);
+    }
+
     if (elbg->lfg_seed == -1)
         elbg->lfg_seed = av_get_random_seed();
 
@@ -75,15 +82,29 @@ static av_cold int init(AVFilterContext *ctx)
 
 static int query_formats(AVFilterContext *ctx)
 {
+    ELBGContext *elbg = ctx->priv;
+    int ret;
+
     static const enum AVPixelFormat pix_fmts[] = {
         AV_PIX_FMT_ARGB, AV_PIX_FMT_RGBA, AV_PIX_FMT_ABGR, AV_PIX_FMT_BGRA,
         AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
         AV_PIX_FMT_NONE
     };
-    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
-    if (!fmts_list)
-        return AVERROR(ENOMEM);
-    return ff_set_common_formats(ctx, fmts_list);
+    if (!elbg->pal8) {
+        AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+        if (!fmts_list)
+            return AVERROR(ENOMEM);
+        return ff_set_common_formats(ctx, fmts_list);
+    } else {
+        static const enum AVPixelFormat pal8_fmt[] = {
+            AV_PIX_FMT_PAL8,
+            AV_PIX_FMT_NONE
+        };
+        if ((ret = ff_formats_ref(ff_make_format_list(pix_fmts), &ctx->inputs[0]->out_formats)) < 0 ||
+            (ret = ff_formats_ref(ff_make_format_list(pal8_fmt), &ctx->outputs[0]->in_formats)) < 0)
+            return ret;
+    }
+    return 0;
 }
 
 #define NB_COMPONENTS 3
@@ -152,6 +173,36 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
                    elbg->codebook, elbg->codebook_length, elbg->max_steps_nb,
                    elbg->codeword_closest_codebook_idxs, &elbg->lfg);
 
+    if (elbg->pal8) {
+        AVFilterLink *outlink = inlink->dst->outputs[0];
+        AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        uint32_t *pal;
+
+        if (!out)
+            return AVERROR(ENOMEM);
+        out->pts = frame->pts;
+        av_frame_free(&frame);
+        pal = (uint32_t *)out->data[1];
+        p0 = (uint8_t *)out->data[0];
+
+        for (i = 0; i < elbg->codebook_length; i++) {
+            pal[i] = (elbg->codebook[i*3  ] << 16) |
+                     (elbg->codebook[i*3+1] <<  8) |
+                      elbg->codebook[i*3+2];
+        }
+
+        k = 0;
+        for (i = 0; i < inlink->h; i++) {
+            p = p0;
+            for (j = 0; j < inlink->w; j++, p++) {
+                p[0] = elbg->codeword_closest_codebook_idxs[k++];
+            }
+            p0 += out->linesize[0];
+        }
+
+        return ff_filter_frame(outlink, out);
+    }
+
     /* fill the output with the codebook values */
     p0 = frame->data[0];
 
diff --git a/libavfilter/vf_eq.c b/libavfilter/vf_eq.c
index f8b81603..0b929f33 100644
--- a/libavfilter/vf_eq.c
+++ b/libavfilter/vf_eq.c
@@ -98,7 +98,7 @@ static void check_values(EQParameters *param, EQContext *eq)
 {
     if (param->contrast == 1.0 && param->brightness == 0.0 && param->gamma == 1.0)
         param->adjust = NULL;
-    else if (param->gamma == 1.0)
+    else if (param->gamma == 1.0 && fabs(param->contrast) < 7.9)
         param->adjust = eq->process;
     else
         param->adjust = apply_lut;
@@ -106,7 +106,7 @@ static void check_values(EQParameters *param, EQContext *eq)
 
 static void set_contrast(EQContext *eq)
 {
-    eq->contrast = av_clipf(av_expr_eval(eq->contrast_pexpr, eq->var_values, eq), -2.0, 2.0);
+    eq->contrast = av_clipf(av_expr_eval(eq->contrast_pexpr, eq->var_values, eq), -1000.0, 1000.0);
     eq->param[0].contrast = eq->contrast;
     eq->param[0].lut_clean = 0;
     check_values(&eq->param[0], eq);
@@ -281,8 +281,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         int h = inlink->h;
 
         if (i == 1 || i == 2) {
-            w = FF_CEIL_RSHIFT(w, desc->log2_chroma_w);
-            h = FF_CEIL_RSHIFT(h, desc->log2_chroma_h);
+            w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
+            h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
         }
 
         if (eq->param[i].adjust)
diff --git a/libavfilter/vf_extractplanes.c b/libavfilter/vf_extractplanes.c
index b0fa1bb2..099c00f2 100644
--- a/libavfilter/vf_extractplanes.c
+++ b/libavfilter/vf_extractplanes.c
@@ -39,7 +39,7 @@ typedef struct {
     int requested_planes;
     int map[4];
     int linesize[4];
-    int is_packed_rgb;
+    int is_packed;
     int depth;
     int step;
 } ExtractPlanesContext;
@@ -70,6 +70,7 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA422P,
         AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
         AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_YUVJ411P,
         AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA444P,
         AV_PIX_FMT_YUV420P16LE, AV_PIX_FMT_YUVA420P16LE,
         AV_PIX_FMT_YUV420P16BE, AV_PIX_FMT_YUVA420P16BE,
@@ -78,6 +79,7 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_YUV444P16LE, AV_PIX_FMT_YUVA444P16LE,
         AV_PIX_FMT_YUV444P16BE, AV_PIX_FMT_YUVA444P16BE,
         AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY8A,
+        AV_PIX_FMT_YA16LE, AV_PIX_FMT_YA16BE,
         AV_PIX_FMT_GRAY16LE, AV_PIX_FMT_GRAY16BE,
         AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
         AV_PIX_FMT_RGBA, AV_PIX_FMT_BGRA,
@@ -97,7 +99,7 @@ static int query_formats(AVFilterContext *ctx)
     const enum AVPixelFormat *out_pixfmts;
     const AVPixFmtDescriptor *desc;
     AVFilterFormats *avff;
-    int i, depth = 0, be = 0;
+    int i, ret, depth = 0, be = 0;
 
     if (!ctx->inputs[0]->in_formats ||
         !ctx->inputs[0]->in_formats->nb_formats) {
@@ -105,21 +107,22 @@ static int query_formats(AVFilterContext *ctx)
     }
 
     if (!ctx->inputs[0]->out_formats)
-        ff_formats_ref(ff_make_format_list(in_pixfmts), &ctx->inputs[0]->out_formats);
+        if ((ret = ff_formats_ref(ff_make_format_list(in_pixfmts), &ctx->inputs[0]->out_formats)) < 0)
+            return ret;
 
     avff = ctx->inputs[0]->in_formats;
     desc = av_pix_fmt_desc_get(avff->formats[0]);
-    depth = desc->comp[0].depth_minus1;
+    depth = desc->comp[0].depth;
     be = desc->flags & AV_PIX_FMT_FLAG_BE;
     for (i = 1; i < avff->nb_formats; i++) {
         desc = av_pix_fmt_desc_get(avff->formats[i]);
-        if (depth != desc->comp[0].depth_minus1 ||
+        if (depth != desc->comp[0].depth ||
             be    != (desc->flags & AV_PIX_FMT_FLAG_BE)) {
             return AVERROR(EAGAIN);
         }
     }
 
-    if (depth == 7)
+    if (depth == 8)
         out_pixfmts = out8_pixfmts;
     else if (be)
         out_pixfmts = out16be_pixfmts;
@@ -127,7 +130,8 @@ static int query_formats(AVFilterContext *ctx)
         out_pixfmts = out16le_pixfmts;
 
     for (i = 0; i < ctx->nb_outputs; i++)
-        ff_formats_ref(ff_make_format_list(out_pixfmts), &ctx->outputs[i]->in_formats);
+        if ((ret = ff_formats_ref(ff_make_format_list(out_pixfmts), &ctx->outputs[i]->in_formats)) < 0)
+            return ret;
     return 0;
 }
 
@@ -150,9 +154,10 @@ static int config_input(AVFilterLink *inlink)
     if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
         return ret;
 
-    s->depth = (desc->comp[0].depth_minus1 + 1) >> 3;
+    s->depth = desc->comp[0].depth >> 3;
     s->step = av_get_padded_bits_per_pixel(desc) >> 3;
-    s->is_packed_rgb = !(desc->flags & AV_PIX_FMT_FLAG_PLANAR);
+    s->is_packed = !(desc->flags & AV_PIX_FMT_FLAG_PLANAR) &&
+                    (desc->nb_components > 1);
     if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
         ff_fill_rgba_map(rgba_map, inlink->format);
         for (i = 0; i < 4; i++)
@@ -171,8 +176,8 @@ static int config_output(AVFilterLink *outlink)
     const int output = outlink->srcpad - ctx->output_pads;
 
     if (s->map[output] == 1 || s->map[output] == 2) {
-        outlink->h = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
-        outlink->w = FF_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+        outlink->h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+        outlink->w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
     }
 
     return 0;
@@ -214,7 +219,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
         const int idx = s->map[i];
         AVFrame *out;
 
-        if (outlink->closed)
+        if (outlink->status)
             continue;
 
         out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
@@ -224,7 +229,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
         }
         av_frame_copy_props(out, frame);
 
-        if (s->is_packed_rgb) {
+        if (s->is_packed) {
             extract_from_packed(out->data[0], out->linesize[0],
                                 frame->data[0], frame->linesize[0],
                                 outlink->w, outlink->h,
diff --git a/libavfilter/vf_fade.c b/libavfilter/vf_fade.c
index 3e6aa7fe..f7b2b6a2 100644
--- a/libavfilter/vf_fade.c
+++ b/libavfilter/vf_fade.c
@@ -244,8 +244,8 @@ static int filter_slice_chroma(AVFilterContext *ctx, void *arg, int jobnr,
     FadeContext *s = ctx->priv;
     AVFrame *frame = arg;
     int i, j, plane;
-    const int width = FF_CEIL_RSHIFT(frame->width, s->hsub);
-    const int height= FF_CEIL_RSHIFT(frame->height, s->vsub);
+    const int width = AV_CEIL_RSHIFT(frame->width, s->hsub);
+    const int height= AV_CEIL_RSHIFT(frame->height, s->vsub);
     int slice_start = (height *  jobnr   ) / nb_jobs;
     int slice_end   = (height * (jobnr+1)) / nb_jobs;
 
@@ -384,7 +384,7 @@ static const AVOption fade_options[] = {
                                                     OFFSET(nb_frames),   AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, FLAGS },
     { "n",           "Number of frames to which the effect should be applied.",
                                                     OFFSET(nb_frames),   AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, FLAGS },
-    { "alpha",       "fade alpha if it is available on the input", OFFSET(alpha),       AV_OPT_TYPE_INT, {.i64 = 0    }, 0,       1, FLAGS },
+    { "alpha",       "fade alpha if it is available on the input", OFFSET(alpha),       AV_OPT_TYPE_BOOL, {.i64 = 0    }, 0,       1, FLAGS },
     { "start_time",  "Number of seconds of the beginning of the effect.",
                                                     OFFSET(start_time),  AV_OPT_TYPE_DURATION, {.i64 = 0. }, 0, INT32_MAX, FLAGS },
     { "st",          "Number of seconds of the beginning of the effect.",
diff --git a/libavfilter/vf_fftfilt.c b/libavfilter/vf_fftfilt.c
index c914ed01..307b41a7 100644
--- a/libavfilter/vf_fftfilt.c
+++ b/libavfilter/vf_fftfilt.c
@@ -73,8 +73,8 @@ AVFILTER_DEFINE_CLASS(fftfilt);
 
 static inline double lum(void *priv, double x, double y, int plane)
 {
-    FFTFILTContext *fftfilt = priv;
-    return fftfilt->rdft_vdata[plane][(int)x * fftfilt->rdft_vlen[plane] + (int)y];
+    FFTFILTContext *s = priv;
+    return s->rdft_vdata[plane][(int)x * s->rdft_vlen[plane] + (int)y];
 }
 
 static double weight_Y(void *priv, double x, double y) { return lum(priv, x, y, Y); }
@@ -93,95 +93,95 @@ static void copy_rev (FFTSample *dest, int w, int w2)
 }
 
 /*Horizontal pass - RDFT*/
-static void rdft_horizontal(FFTFILTContext *fftfilt, AVFrame *in, int w, int h, int plane)
+static void rdft_horizontal(FFTFILTContext *s, AVFrame *in, int w, int h, int plane)
 {
     int i, j;
-    fftfilt->rdft = av_rdft_init(fftfilt->rdft_hbits[plane], DFT_R2C);
+    s->rdft = av_rdft_init(s->rdft_hbits[plane], DFT_R2C);
 
     for (i = 0; i < h; i++) {
         for (j = 0; j < w; j++)
-            fftfilt->rdft_hdata[plane][i * fftfilt->rdft_hlen[plane] + j] = *(in->data[plane] + in->linesize[plane] * i + j);
+            s->rdft_hdata[plane][i * s->rdft_hlen[plane] + j] = *(in->data[plane] + in->linesize[plane] * i + j);
 
-        copy_rev(fftfilt->rdft_hdata[plane] + i * fftfilt->rdft_hlen[plane], w, fftfilt->rdft_hlen[plane]);
+        copy_rev(s->rdft_hdata[plane] + i * s->rdft_hlen[plane], w, s->rdft_hlen[plane]);
     }
 
     for (i = 0; i < h; i++)
-        av_rdft_calc(fftfilt->rdft, fftfilt->rdft_hdata[plane] + i * fftfilt->rdft_hlen[plane]);
+        av_rdft_calc(s->rdft, s->rdft_hdata[plane] + i * s->rdft_hlen[plane]);
 
-    av_rdft_end(fftfilt->rdft);
+    av_rdft_end(s->rdft);
 }
 
 /*Vertical pass - RDFT*/
-static void rdft_vertical(FFTFILTContext *fftfilt, int h, int plane)
+static void rdft_vertical(FFTFILTContext *s, int h, int plane)
 {
     int i, j;
-    fftfilt->rdft = av_rdft_init(fftfilt->rdft_vbits[plane], DFT_R2C);
+    s->rdft = av_rdft_init(s->rdft_vbits[plane], DFT_R2C);
 
-    for (i = 0; i < fftfilt->rdft_hlen[plane]; i++) {
+    for (i = 0; i < s->rdft_hlen[plane]; i++) {
         for (j = 0; j < h; j++)
-            fftfilt->rdft_vdata[plane][i * fftfilt->rdft_vlen[plane] + j] =
-            fftfilt->rdft_hdata[plane][j * fftfilt->rdft_hlen[plane] + i];
-        copy_rev(fftfilt->rdft_vdata[plane] + i * fftfilt->rdft_vlen[plane], h, fftfilt->rdft_vlen[plane]);
+            s->rdft_vdata[plane][i * s->rdft_vlen[plane] + j] =
+            s->rdft_hdata[plane][j * s->rdft_hlen[plane] + i];
+        copy_rev(s->rdft_vdata[plane] + i * s->rdft_vlen[plane], h, s->rdft_vlen[plane]);
     }
 
-    for (i = 0; i < fftfilt->rdft_hlen[plane]; i++)
-        av_rdft_calc(fftfilt->rdft, fftfilt->rdft_vdata[plane] + i * fftfilt->rdft_vlen[plane]);
+    for (i = 0; i < s->rdft_hlen[plane]; i++)
+        av_rdft_calc(s->rdft, s->rdft_vdata[plane] + i * s->rdft_vlen[plane]);
 
-    av_rdft_end(fftfilt->rdft);
+    av_rdft_end(s->rdft);
 }
 /*Vertical pass - IRDFT*/
-static void irdft_vertical(FFTFILTContext *fftfilt, int h, int plane)
+static void irdft_vertical(FFTFILTContext *s, int h, int plane)
 {
     int i, j;
-    fftfilt->rdft = av_rdft_init(fftfilt->rdft_vbits[plane], IDFT_C2R);
-    for (i = 0; i < fftfilt->rdft_hlen[plane]; i++)
-        av_rdft_calc(fftfilt->rdft, fftfilt->rdft_vdata[plane] + i * fftfilt->rdft_vlen[plane]);
+    s->rdft = av_rdft_init(s->rdft_vbits[plane], IDFT_C2R);
+    for (i = 0; i < s->rdft_hlen[plane]; i++)
+        av_rdft_calc(s->rdft, s->rdft_vdata[plane] + i * s->rdft_vlen[plane]);
 
-    for (i = 0; i < fftfilt->rdft_hlen[plane]; i++)
+    for (i = 0; i < s->rdft_hlen[plane]; i++)
         for (j = 0; j < h; j++)
-            fftfilt->rdft_hdata[plane][j * fftfilt->rdft_hlen[plane] + i] =
-            fftfilt->rdft_vdata[plane][i * fftfilt->rdft_vlen[plane] + j];
+            s->rdft_hdata[plane][j * s->rdft_hlen[plane] + i] =
+            s->rdft_vdata[plane][i * s->rdft_vlen[plane] + j];
 
-    av_rdft_end(fftfilt->rdft);
+    av_rdft_end(s->rdft);
 }
 
 /*Horizontal pass - IRDFT*/
-static void irdft_horizontal(FFTFILTContext *fftfilt, AVFrame *out, int w, int h, int plane)
+static void irdft_horizontal(FFTFILTContext *s, AVFrame *out, int w, int h, int plane)
 {
     int i, j;
-    fftfilt->rdft = av_rdft_init(fftfilt->rdft_hbits[plane], IDFT_C2R);
+    s->rdft = av_rdft_init(s->rdft_hbits[plane], IDFT_C2R);
     for (i = 0; i < h; i++)
-        av_rdft_calc(fftfilt->rdft, fftfilt->rdft_hdata[plane] + i * fftfilt->rdft_hlen[plane]);
+        av_rdft_calc(s->rdft, s->rdft_hdata[plane] + i * s->rdft_hlen[plane]);
 
     for (i = 0; i < h; i++)
         for (j = 0; j < w; j++)
-            *(out->data[plane] + out->linesize[plane] * i + j) = av_clip(fftfilt->rdft_hdata[plane][i
-                                                                         *fftfilt->rdft_hlen[plane] + j] * 4 /
-                                                                         (fftfilt->rdft_hlen[plane] *
-                                                                          fftfilt->rdft_vlen[plane]), 0, 255);
+            *(out->data[plane] + out->linesize[plane] * i + j) = av_clip(s->rdft_hdata[plane][i
+                                                                         *s->rdft_hlen[plane] + j] * 4 /
+                                                                         (s->rdft_hlen[plane] *
+                                                                          s->rdft_vlen[plane]), 0, 255);
 
-    av_rdft_end(fftfilt->rdft);
+    av_rdft_end(s->rdft);
 }
 
 static av_cold int initialize(AVFilterContext *ctx)
 {
-    FFTFILTContext *fftfilt = ctx->priv;
+    FFTFILTContext *s = ctx->priv;
     int ret = 0, plane;
 
-    if (!fftfilt->dc[U] && !fftfilt->dc[V]) {
-        fftfilt->dc[U] = fftfilt->dc[Y];
-        fftfilt->dc[V] = fftfilt->dc[Y];
+    if (!s->dc[U] && !s->dc[V]) {
+        s->dc[U] = s->dc[Y];
+        s->dc[V] = s->dc[Y];
     } else {
-        if (!fftfilt->dc[U]) fftfilt->dc[U] = fftfilt->dc[V];
-        if (!fftfilt->dc[V]) fftfilt->dc[V] = fftfilt->dc[U];
+        if (!s->dc[U]) s->dc[U] = s->dc[V];
+        if (!s->dc[V]) s->dc[V] = s->dc[U];
     }
 
-    if (!fftfilt->weight_str[U] && !fftfilt->weight_str[V]) {
-        fftfilt->weight_str[U] = av_strdup(fftfilt->weight_str[Y]);
-        fftfilt->weight_str[V] = av_strdup(fftfilt->weight_str[Y]);
+    if (!s->weight_str[U] && !s->weight_str[V]) {
+        s->weight_str[U] = av_strdup(s->weight_str[Y]);
+        s->weight_str[V] = av_strdup(s->weight_str[Y]);
     } else {
-        if (!fftfilt->weight_str[U]) fftfilt->weight_str[U] = av_strdup(fftfilt->weight_str[V]);
-        if (!fftfilt->weight_str[V]) fftfilt->weight_str[V] = av_strdup(fftfilt->weight_str[U]);
+        if (!s->weight_str[U]) s->weight_str[U] = av_strdup(s->weight_str[V]);
+        if (!s->weight_str[V]) s->weight_str[V] = av_strdup(s->weight_str[U]);
     }
 
     for (plane = 0; plane < 3; plane++) {
@@ -189,7 +189,7 @@ static av_cold int initialize(AVFilterContext *ctx)
         const char *const func2_names[] = {"weight_Y", "weight_U", "weight_V", NULL };
         double (*func2[])(void *, double, double) = { weight_Y, weight_U, weight_V, p[plane], NULL };
 
-        ret = av_expr_parse(&fftfilt->weight_expr[plane], fftfilt->weight_str[plane], var_names,
+        ret = av_expr_parse(&s->weight_expr[plane], s->weight_str[plane], var_names,
                             NULL, NULL, func2_names, func2, 0, ctx);
         if (ret < 0)
             break;
@@ -199,7 +199,7 @@ static av_cold int initialize(AVFilterContext *ctx)
 
 static int config_props(AVFilterLink *inlink)
 {
-    FFTFILTContext *fftfilt = inlink->dst->priv;
+    FFTFILTContext *s = inlink->dst->priv;
     const AVPixFmtDescriptor *desc;
     int rdft_hbits, rdft_vbits, i, j, plane;
     double values[VAR_VARS_NB];
@@ -211,16 +211,16 @@ static int config_props(AVFilterLink *inlink)
 
         /* RDFT - Array initialization for Horizontal pass*/
         for (rdft_hbits = 1; 1 << rdft_hbits < w*10/9; rdft_hbits++);
-        fftfilt->rdft_hbits[i] = rdft_hbits;
-        fftfilt->rdft_hlen[i] = 1 << rdft_hbits;
-        if (!(fftfilt->rdft_hdata[i] = av_malloc_array(h, fftfilt->rdft_hlen[i] * sizeof(FFTSample))))
+        s->rdft_hbits[i] = rdft_hbits;
+        s->rdft_hlen[i] = 1 << rdft_hbits;
+        if (!(s->rdft_hdata[i] = av_malloc_array(h, s->rdft_hlen[i] * sizeof(FFTSample))))
             return AVERROR(ENOMEM);
 
         /* RDFT - Array initialization for Vertical pass*/
         for (rdft_vbits = 1; 1 << rdft_vbits < h*10/9; rdft_vbits++);
-        fftfilt->rdft_vbits[i] = rdft_vbits;
-        fftfilt->rdft_vlen[i] = 1 << rdft_vbits;
-        if (!(fftfilt->rdft_vdata[i] = av_malloc_array(fftfilt->rdft_hlen[i], fftfilt->rdft_vlen[i] * sizeof(FFTSample))))
+        s->rdft_vbits[i] = rdft_vbits;
+        s->rdft_vlen[i] = 1 << rdft_vbits;
+        if (!(s->rdft_vdata[i] = av_malloc_array(s->rdft_hlen[i], s->rdft_vlen[i] * sizeof(FFTSample))))
             return AVERROR(ENOMEM);
     }
 
@@ -229,16 +229,16 @@ static int config_props(AVFilterLink *inlink)
     values[VAR_H] = inlink->h;
     for (plane = 0; plane < 3; plane++)
     {
-        if(!(fftfilt->weight[plane] = av_malloc_array(fftfilt->rdft_hlen[plane], fftfilt->rdft_vlen[plane] * sizeof(double))))
+        if(!(s->weight[plane] = av_malloc_array(s->rdft_hlen[plane], s->rdft_vlen[plane] * sizeof(double))))
             return AVERROR(ENOMEM);
-        for (i = 0; i < fftfilt->rdft_hlen[plane]; i++)
+        for (i = 0; i < s->rdft_hlen[plane]; i++)
         {
             values[VAR_X] = i;
-            for (j = 0; j < fftfilt->rdft_vlen[plane]; j++)
+            for (j = 0; j < s->rdft_vlen[plane]; j++)
             {
                 values[VAR_Y] = j;
-                fftfilt->weight[plane][i * fftfilt->rdft_vlen[plane] + j] =
-                av_expr_eval(fftfilt->weight_expr[plane], values, fftfilt);
+                s->weight[plane][i * s->rdft_vlen[plane] + j] =
+                av_expr_eval(s->weight_expr[plane], values, s);
             }
         }
     }
@@ -250,7 +250,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     AVFilterContext *ctx = inlink->dst;
     AVFilterLink *outlink = inlink->dst->outputs[0];
     const AVPixFmtDescriptor *desc;
-    FFTFILTContext *fftfilt = ctx->priv;
+    FFTFILTContext *s = ctx->priv;
     AVFrame *out;
     int i, j, plane;
 
@@ -266,23 +266,23 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         int h = inlink->h;
 
         if (plane == 1 || plane == 2) {
-            w = FF_CEIL_RSHIFT(w, desc->log2_chroma_w);
-            h = FF_CEIL_RSHIFT(h, desc->log2_chroma_h);
+            w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
+            h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
         }
 
-        rdft_horizontal(fftfilt, in, w, h, plane);
-        rdft_vertical(fftfilt, h, plane);
+        rdft_horizontal(s, in, w, h, plane);
+        rdft_vertical(s, h, plane);
 
         /*Change user defined parameters*/
-        for (i = 0; i < fftfilt->rdft_hlen[plane]; i++)
-            for (j = 0; j < fftfilt->rdft_vlen[plane]; j++)
-                fftfilt->rdft_vdata[plane][i * fftfilt->rdft_vlen[plane] + j] *=
-                  fftfilt->weight[plane][i * fftfilt->rdft_vlen[plane] + j];
+        for (i = 0; i < s->rdft_hlen[plane]; i++)
+            for (j = 0; j < s->rdft_vlen[plane]; j++)
+                s->rdft_vdata[plane][i * s->rdft_vlen[plane] + j] *=
+                  s->weight[plane][i * s->rdft_vlen[plane] + j];
 
-        fftfilt->rdft_vdata[plane][0] += fftfilt->rdft_hlen[plane] * fftfilt->rdft_vlen[plane] * fftfilt->dc[plane];
+        s->rdft_vdata[plane][0] += s->rdft_hlen[plane] * s->rdft_vlen[plane] * s->dc[plane];
 
-        irdft_vertical(fftfilt, h, plane);
-        irdft_horizontal(fftfilt, out, w, h, plane);
+        irdft_vertical(s, h, plane);
+        irdft_horizontal(s, out, w, h, plane);
     }
 
     av_frame_free(&in);
@@ -291,13 +291,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    FFTFILTContext *fftfilt = ctx->priv;
+    FFTFILTContext *s = ctx->priv;
     int i;
     for (i = 0; i < MAX_PLANES; i++) {
-        av_free(fftfilt->rdft_hdata[i]);
-        av_free(fftfilt->rdft_vdata[i]);
-        av_expr_free(fftfilt->weight_expr[i]);
-        av_free(fftfilt->weight[i]);
+        av_free(s->rdft_hdata[i]);
+        av_free(s->rdft_vdata[i]);
+        av_expr_free(s->weight_expr[i]);
+        av_free(s->weight[i]);
     }
 }
 
@@ -335,7 +335,7 @@ static const AVFilterPad fftfilt_outputs[] = {
 
 AVFilter ff_vf_fftfilt = {
     .name            = "fftfilt",
-    .description     = NULL_IF_CONFIG_SMALL("Apply arbitrary expressions to samples in frequency domain"),
+    .description     = NULL_IF_CONFIG_SMALL("Apply arbitrary expressions to pixels in frequency domain."),
     .priv_size       = sizeof(FFTFILTContext),
     .priv_class      = &fftfilt_class,
     .inputs          = fftfilt_inputs,
diff --git a/libavfilter/vf_fieldmatch.c b/libavfilter/vf_fieldmatch.c
index c01af668..e155712c 100644
--- a/libavfilter/vf_fieldmatch.c
+++ b/libavfilter/vf_fieldmatch.c
@@ -124,12 +124,12 @@ static const AVOption fieldmatch_options[] = {
         { "pc_n_ub", "2-way match + 3rd match on combed + 4th/5th matches if still combed (p/c + u + u/b)",  0, AV_OPT_TYPE_CONST, {.i64=MODE_PC_N_UB}, INT_MIN, INT_MAX, FLAGS, "mode" },
         { "pcn",     "3-way match (p/c/n)",                                                                  0, AV_OPT_TYPE_CONST, {.i64=MODE_PCN},     INT_MIN, INT_MAX, FLAGS, "mode" },
         { "pcn_ub",  "3-way match + 4th/5th matches on combed (p/c/n + u/b)",                                0, AV_OPT_TYPE_CONST, {.i64=MODE_PCN_UB},  INT_MIN, INT_MAX, FLAGS, "mode" },
-    { "ppsrc", "mark main input as a pre-processed input and activate clean source input stream", OFFSET(ppsrc), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+    { "ppsrc", "mark main input as a pre-processed input and activate clean source input stream", OFFSET(ppsrc), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
     { "field", "set the field to match from", OFFSET(field), AV_OPT_TYPE_INT, {.i64=FM_PARITY_AUTO}, -1, 1, FLAGS, "field" },
         { "auto",   "automatic (same value as 'order')",    0, AV_OPT_TYPE_CONST, {.i64=FM_PARITY_AUTO},    INT_MIN, INT_MAX, FLAGS, "field" },
         { "bottom", "bottom field",                         0, AV_OPT_TYPE_CONST, {.i64=FM_PARITY_BOTTOM},  INT_MIN, INT_MAX, FLAGS, "field" },
         { "top",    "top field",                            0, AV_OPT_TYPE_CONST, {.i64=FM_PARITY_TOP},     INT_MIN, INT_MAX, FLAGS, "field" },
-    { "mchroma", "set whether or not chroma is included during the match comparisons", OFFSET(mchroma), AV_OPT_TYPE_INT, {.i64=1}, 0, 1,  FLAGS },
+    { "mchroma", "set whether or not chroma is included during the match comparisons", OFFSET(mchroma), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1,  FLAGS },
     { "y0", "define an exclusion band which excludes the lines between y0 and y1 from the field matching decision", OFFSET(y0), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
     { "y1", "define an exclusion band which excludes the lines between y0 and y1 from the field matching decision", OFFSET(y1), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
     { "scthresh", "set scene change detection threshold", OFFSET(scthresh_flt), AV_OPT_TYPE_DOUBLE, {.dbl=12}, 0, 100, FLAGS },
@@ -142,7 +142,7 @@ static const AVOption fieldmatch_options[] = {
         { "pcn",   "calculate p/c/n",       0, AV_OPT_TYPE_CONST, {.i64=COMBDBG_PCN},   INT_MIN, INT_MAX, FLAGS, "dbglvl" },
         { "pcnub", "calculate p/c/n/u/b",   0, AV_OPT_TYPE_CONST, {.i64=COMBDBG_PCNUB}, INT_MIN, INT_MAX, FLAGS, "dbglvl" },
     { "cthresh", "set the area combing threshold used for combed frame detection",       OFFSET(cthresh), AV_OPT_TYPE_INT, {.i64= 9}, -1, 0xff, FLAGS },
-    { "chroma",  "set whether or not chroma is considered in the combed frame decision", OFFSET(chroma),  AV_OPT_TYPE_INT, {.i64= 0},  0,    1, FLAGS },
+    { "chroma",  "set whether or not chroma is considered in the combed frame decision", OFFSET(chroma),  AV_OPT_TYPE_BOOL,{.i64= 0},  0,    1, FLAGS },
     { "blockx",  "set the x-axis size of the window used during combed frame detection", OFFSET(blockx),  AV_OPT_TYPE_INT, {.i64=16},  4, 1<<9, FLAGS },
     { "blocky",  "set the y-axis size of the window used during combed frame detection", OFFSET(blocky),  AV_OPT_TYPE_INT, {.i64=16},  4, 1<<9, FLAGS },
     { "combpel", "set the number of combed pixels inside any of the blocky by blockx size blocks on the frame for the frame to be detected as combed", OFFSET(combpel), AV_OPT_TYPE_INT, {.i64=80}, 0, INT_MAX, FLAGS },
@@ -153,12 +153,12 @@ AVFILTER_DEFINE_CLASS(fieldmatch);
 
 static int get_width(const FieldMatchContext *fm, const AVFrame *f, int plane)
 {
-    return plane ? FF_CEIL_RSHIFT(f->width, fm->hsub) : f->width;
+    return plane ? AV_CEIL_RSHIFT(f->width, fm->hsub) : f->width;
 }
 
 static int get_height(const FieldMatchContext *fm, const AVFrame *f, int plane)
 {
-    return plane ? FF_CEIL_RSHIFT(f->height, fm->vsub) : f->height;
+    return plane ? AV_CEIL_RSHIFT(f->height, fm->vsub) : f->height;
 }
 
 static int64_t luma_abs_diff(const AVFrame *f1, const AVFrame *f2)
@@ -270,8 +270,8 @@ static int calc_combed_score(const FieldMatchContext *fm, const AVFrame *src)
         uint8_t *cmkp  = fm->cmask_data[0];
         uint8_t *cmkpU = fm->cmask_data[1];
         uint8_t *cmkpV = fm->cmask_data[2];
-        const int width  = FF_CEIL_RSHIFT(src->width,  fm->hsub);
-        const int height = FF_CEIL_RSHIFT(src->height, fm->vsub);
+        const int width  = AV_CEIL_RSHIFT(src->width,  fm->hsub);
+        const int height = AV_CEIL_RSHIFT(src->height, fm->vsub);
         const int cmk_linesize   = fm->cmask_linesize[0] << 1;
         const int cmk_linesizeUV = fm->cmask_linesize[2];
         uint8_t *cmkpp  = cmkp - (cmk_linesize>>1);
@@ -608,10 +608,13 @@ static void copy_fields(const FieldMatchContext *fm, AVFrame *dst,
                         const AVFrame *src, int field)
 {
     int plane;
-    for (plane = 0; plane < 4 && src->data[plane] && src->linesize[plane]; plane++)
+    for (plane = 0; plane < 4 && src->data[plane] && src->linesize[plane]; plane++) {
+        const int plane_h = get_height(fm, src, plane);
+        const int nb_copy_fields = (plane_h >> 1) + (field ? 0 : (plane_h & 1));
         av_image_copy_plane(dst->data[plane] + field*dst->linesize[plane], dst->linesize[plane] << 1,
                             src->data[plane] + field*src->linesize[plane], src->linesize[plane] << 1,
-                            get_width(fm, src, plane), get_height(fm, src, plane) / 2);
+                            get_width(fm, src, plane), nb_copy_fields);
+    }
 }
 
 static AVFrame *create_weave_frame(AVFilterContext *ctx, int match, int field,
@@ -953,7 +956,6 @@ static int config_output(AVFilterLink *outlink)
     const AVFilterLink *inlink =
         ctx->inputs[fm->ppsrc ? INPUT_CLEANSRC : INPUT_MAIN];
 
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
     outlink->time_base = inlink->time_base;
     outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
     outlink->frame_rate = inlink->frame_rate;
diff --git a/libavfilter/vf_fieldorder.c b/libavfilter/vf_fieldorder.c
index d0d68071..ca55ff1f 100644
--- a/libavfilter/vf_fieldorder.c
+++ b/libavfilter/vf_fieldorder.c
@@ -55,13 +55,12 @@ static int query_formats(AVFilterContext *ctx)
                   desc->flags & AV_PIX_FMT_FLAG_PAL     ||
                   desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) &&
                 desc->nb_components && !desc->log2_chroma_h &&
-                (ret = ff_add_format(&formats, pix_fmt)) < 0) {
-                ff_formats_unref(&formats);
+                (ret = ff_add_format(&formats, pix_fmt)) < 0)
                 return ret;
-            }
         }
-        ff_formats_ref(formats, &ctx->inputs[0]->out_formats);
-        ff_formats_ref(formats, &ctx->outputs[0]->in_formats);
+        if ((ret = ff_formats_ref(formats, &ctx->inputs[0]->out_formats)) < 0 ||
+            (ret = ff_formats_ref(formats, &ctx->outputs[0]->in_formats)) < 0)
+            return ret;
     }
 
     return 0;
diff --git a/libavfilter/vf_find_rect.c b/libavfilter/vf_find_rect.c
index a4631dee..d7e6579a 100644
--- a/libavfilter/vf_find_rect.c
+++ b/libavfilter/vf_find_rect.c
@@ -45,7 +45,7 @@ typedef struct FOCContext {
 
 #define OFFSET(x) offsetof(FOCContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
-static const AVOption foc_options[] = {
+static const AVOption find_rect_options[] = {
     { "object", "object bitmap filename", OFFSET(obj_filename), AV_OPT_TYPE_STRING, {.str = NULL}, .flags = FLAGS },
     { "threshold", "set threshold", OFFSET(threshold), AV_OPT_TYPE_FLOAT, {.dbl = 0.5}, 0, 1.0, FLAGS },
     { "mipmaps", "set mipmaps", OFFSET(mipmaps), AV_OPT_TYPE_INT, {.i64 = 3}, 1, MAX_MIPMAPS, FLAGS },
@@ -56,13 +56,7 @@ static const AVOption foc_options[] = {
     { NULL }
 };
 
-static const AVClass foc_class = {
-    .class_name       = "find_rect",
-    .item_name        = av_default_item_name,
-    .option           = foc_options,
-    .version          = LIBAVUTIL_VERSION_INT,
-    .category         = AV_CLASS_CATEGORY_FILTER,
-};
+AVFILTER_DEFINE_CLASS(find_rect);
 
 static int query_formats(AVFilterContext *ctx)
 {
@@ -300,12 +294,12 @@ static const AVFilterPad foc_outputs[] = {
 
 AVFilter ff_vf_find_rect = {
     .name            = "find_rect",
-    .description     = NULL_IF_CONFIG_SMALL("Find a user specified object"),
+    .description     = NULL_IF_CONFIG_SMALL("Find a user specified object."),
     .priv_size       = sizeof(FOCContext),
     .init            = init,
     .uninit          = uninit,
     .query_formats   = query_formats,
     .inputs          = foc_inputs,
     .outputs         = foc_outputs,
-    .priv_class      = &foc_class,
+    .priv_class      = &find_rect_class,
 };
diff --git a/libavfilter/vf_fps.c b/libavfilter/vf_fps.c
index 6154f6d4..0500e975 100644
--- a/libavfilter/vf_fps.c
+++ b/libavfilter/vf_fps.c
@@ -126,11 +126,9 @@ static int request_frame(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     FPSContext        *s = ctx->priv;
-    int frames_out = s->frames_out;
-    int ret = 0;
+    int ret;
 
-    while (ret >= 0 && s->frames_out == frames_out)
-        ret = ff_request_frame(ctx->inputs[0]);
+    ret = ff_request_frame(ctx->inputs[0]);
 
     /* flush the fifo */
     if (ret == AVERROR_EOF && av_fifo_size(s->fifo)) {
diff --git a/libavfilter/vf_framepack.c b/libavfilter/vf_framepack.c
index e9b9ed13..a5cd9540 100644
--- a/libavfilter/vf_framepack.c
+++ b/libavfilter/vf_framepack.c
@@ -25,6 +25,7 @@
 
 #include <string.h>
 
+#include "libavutil/common.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -78,12 +79,12 @@ static av_cold void framepack_uninit(AVFilterContext *ctx)
 
 static int config_output(AVFilterLink *outlink)
 {
-    AVFilterContext *ctx = outlink->src;
-    FramepackContext *s  = outlink->src->priv;
+    AVFilterContext *ctx  = outlink->src;
+    FramepackContext *s   = outlink->src->priv;
 
-    int width            = ctx->inputs[LEFT]->w;
-    int height           = ctx->inputs[LEFT]->h;
-    AVRational time_base = ctx->inputs[LEFT]->time_base;
+    int width             = ctx->inputs[LEFT]->w;
+    int height            = ctx->inputs[LEFT]->h;
+    AVRational time_base  = ctx->inputs[LEFT]->time_base;
     AVRational frame_rate = ctx->inputs[LEFT]->frame_rate;
 
     // check size and fps match on the other input
@@ -117,7 +118,7 @@ static int config_output(AVFilterLink *outlink)
     // modify output properties as needed
     switch (s->format) {
     case AV_STEREO3D_FRAMESEQUENCE:
-        time_base.den *= 2;
+        time_base.den  *= 2;
         frame_rate.num *= 2;
 
         s->double_pts = AV_NOPTS_VALUE;
@@ -135,123 +136,159 @@ static int config_output(AVFilterLink *outlink)
         return AVERROR_INVALIDDATA;
     }
 
-    outlink->w         = width;
-    outlink->h         = height;
-    outlink->time_base = time_base;
-    outlink->frame_rate= frame_rate;
+    outlink->w          = width;
+    outlink->h          = height;
+    outlink->time_base  = time_base;
+    outlink->frame_rate = frame_rate;
 
     return 0;
 }
 
-static void horizontal_frame_pack(FramepackContext *s,
-                                  AVFrame *dst,
+static void horizontal_frame_pack(AVFilterLink *outlink,
+                                  AVFrame *out,
                                   int interleaved)
 {
-    int plane, i;
-    int length = dst->width / 2;
-    int lines  = dst->height;
-
-    for (plane = 0; plane < s->pix_desc->nb_components; plane++) {
-        const uint8_t *leftp  = s->input_views[LEFT]->data[plane];
-        const uint8_t *rightp = s->input_views[RIGHT]->data[plane];
-        uint8_t *dstp         = dst->data[plane];
-
-        if (plane == 1 || plane == 2) {
-            length = FF_CEIL_RSHIFT(dst->width / 2, s->pix_desc->log2_chroma_w);
-            lines  = FF_CEIL_RSHIFT(dst->height,    s->pix_desc->log2_chroma_h);
-        }
-
-        if (interleaved) {
+    AVFilterContext *ctx = outlink->src;
+    FramepackContext *s = ctx->priv;
+    int i, plane;
+
+    if (interleaved) {
+        const uint8_t *leftp  = s->input_views[LEFT]->data[0];
+        const uint8_t *rightp = s->input_views[RIGHT]->data[0];
+        uint8_t *dstp         = out->data[0];
+        int length = out->width / 2;
+        int lines  = out->height;
+
+        for (plane = 0; plane < s->pix_desc->nb_components; plane++) {
+            if (plane == 1 || plane == 2) {
+                length = AV_CEIL_RSHIFT(out->width / 2, s->pix_desc->log2_chroma_w);
+                lines  = AV_CEIL_RSHIFT(out->height,    s->pix_desc->log2_chroma_h);
+            }
             for (i = 0; i < lines; i++) {
                 int j;
-                int k = 0;
-
+                leftp  = s->input_views[LEFT]->data[plane] +
+                         s->input_views[LEFT]->linesize[plane] * i;
+                rightp = s->input_views[RIGHT]->data[plane] +
+                         s->input_views[RIGHT]->linesize[plane] * i;
+                dstp   = out->data[plane] + out->linesize[plane] * i;
                 for (j = 0; j < length; j++) {
-                    dstp[k++] = leftp[j];
-                    dstp[k++] = rightp[j];
+                    // interpolate chroma as necessary
+                    if ((s->pix_desc->log2_chroma_w ||
+                         s->pix_desc->log2_chroma_h) &&
+                        (plane == 1 || plane == 2)) {
+                        *dstp++ = (*leftp + *rightp) / 2;
+                        *dstp++ = (*leftp + *rightp) / 2;
+                    } else {
+                        *dstp++ = *leftp;
+                        *dstp++ = *rightp;
+                    }
+                    leftp += 1;
+                    rightp += 1;
                 }
-
-                dstp   += dst->linesize[plane];
-                leftp  += s->input_views[LEFT]->linesize[plane];
-                rightp += s->input_views[RIGHT]->linesize[plane];
             }
-        } else {
-            av_image_copy_plane(dst->data[plane], dst->linesize[plane],
-                                leftp, s->input_views[LEFT]->linesize[plane],
-                                length, lines);
-            av_image_copy_plane(dst->data[plane] + length, dst->linesize[plane],
-                                rightp, s->input_views[RIGHT]->linesize[plane],
-                                length, lines);
+        }
+    } else {
+        for (i = 0; i < 2; i++) {
+            const uint8_t *src[4];
+            uint8_t *dst[4];
+            int sub_w = s->input_views[i]->width >> s->pix_desc->log2_chroma_w;
+
+            src[0] = s->input_views[i]->data[0];
+            src[1] = s->input_views[i]->data[1];
+            src[2] = s->input_views[i]->data[2];
+
+            dst[0] = out->data[0] + i * s->input_views[i]->width;
+            dst[1] = out->data[1] + i * sub_w;
+            dst[2] = out->data[2] + i * sub_w;
+
+            av_image_copy(dst, out->linesize, src, s->input_views[i]->linesize,
+                          s->input_views[i]->format,
+                          s->input_views[i]->width,
+                          s->input_views[i]->height);
         }
     }
 }
 
-static void vertical_frame_pack(FramepackContext *s,
-                                AVFrame *dst,
+static void vertical_frame_pack(AVFilterLink *outlink,
+                                AVFrame *out,
                                 int interleaved)
 {
-    int plane, offset;
-    int length = dst->width;
-    int lines  = dst->height / 2;
-
-    for (plane = 0; plane < s->pix_desc->nb_components; plane++) {
-        if (plane == 1 || plane == 2) {
-            length = -(-(dst->width)      >> s->pix_desc->log2_chroma_w);
-            lines  = -(-(dst->height / 2) >> s->pix_desc->log2_chroma_h);
-        }
+    AVFilterContext *ctx = outlink->src;
+    FramepackContext *s = ctx->priv;
+    int i;
 
-        offset = interleaved ? dst->linesize[plane] : dst->linesize[plane] * lines;
-
-        av_image_copy_plane(dst->data[plane],
-                            dst->linesize[plane] << interleaved,
-                            s->input_views[LEFT]->data[plane],
-                            s->input_views[LEFT]->linesize[plane],
-                            length, lines);
-        av_image_copy_plane(dst->data[plane] + offset,
-                            dst->linesize[plane] << interleaved,
-                            s->input_views[RIGHT]->data[plane],
-                            s->input_views[RIGHT]->linesize[plane],
-                            length, lines);
+    for (i = 0; i < 2; i++) {
+        const uint8_t *src[4];
+        uint8_t *dst[4];
+        int linesizes[4];
+        int sub_h = s->input_views[i]->height >> s->pix_desc->log2_chroma_h;
+
+        src[0] = s->input_views[i]->data[0];
+        src[1] = s->input_views[i]->data[1];
+        src[2] = s->input_views[i]->data[2];
+
+        dst[0] = out->data[0] + i * out->linesize[0] *
+                 (interleaved + s->input_views[i]->height * (1 - interleaved));
+        dst[1] = out->data[1] + i * out->linesize[1] *
+                 (interleaved + sub_h * (1 - interleaved));
+        dst[2] = out->data[2] + i * out->linesize[2] *
+                 (interleaved + sub_h * (1 - interleaved));
+
+        linesizes[0] = out->linesize[0] +
+                       interleaved * out->linesize[0];
+        linesizes[1] = out->linesize[1] +
+                       interleaved * out->linesize[1];
+        linesizes[2] = out->linesize[2] +
+                       interleaved * out->linesize[2];
+
+        av_image_copy(dst, linesizes, src, s->input_views[i]->linesize,
+                      s->input_views[i]->format,
+                      s->input_views[i]->width,
+                      s->input_views[i]->height);
     }
 }
 
-static av_always_inline void spatial_frame_pack(FramepackContext *s, AVFrame *dst)
+static av_always_inline void spatial_frame_pack(AVFilterLink *outlink,
+                                                AVFrame *dst)
 {
+    AVFilterContext *ctx = outlink->src;
+    FramepackContext *s = ctx->priv;
     switch (s->format) {
     case AV_STEREO3D_SIDEBYSIDE:
-        horizontal_frame_pack(s, dst, 0);
+        horizontal_frame_pack(outlink, dst, 0);
         break;
     case AV_STEREO3D_COLUMNS:
-        horizontal_frame_pack(s, dst, 1);
+        horizontal_frame_pack(outlink, dst, 1);
         break;
     case AV_STEREO3D_TOPBOTTOM:
-        vertical_frame_pack(s, dst, 0);
+        vertical_frame_pack(outlink, dst, 0);
         break;
     case AV_STEREO3D_LINES:
-        vertical_frame_pack(s, dst, 1);
+        vertical_frame_pack(outlink, dst, 1);
         break;
     }
 }
 
+static int try_push_frame(AVFilterContext *ctx);
+
 static int filter_frame_left(AVFilterLink *inlink, AVFrame *frame)
 {
     FramepackContext *s = inlink->dst->priv;
     s->input_views[LEFT] = frame;
-    return 0;
+    return try_push_frame(inlink->dst);
 }
 
 static int filter_frame_right(AVFilterLink *inlink, AVFrame *frame)
 {
     FramepackContext *s = inlink->dst->priv;
     s->input_views[RIGHT] = frame;
-    return 0;
+    return try_push_frame(inlink->dst);
 }
 
 static int request_frame(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     FramepackContext *s = ctx->priv;
-    AVStereo3D *stereo;
     int ret, i;
 
     /* get a frame on the either input, stop as soon as a video ends */
@@ -262,7 +299,18 @@ static int request_frame(AVFilterLink *outlink)
                 return ret;
         }
     }
+    return 0;
+}
 
+static int try_push_frame(AVFilterContext *ctx)
+{
+    FramepackContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVStereo3D *stereo;
+    int ret, i;
+
+    if (!(s->input_views[0] && s->input_views[1]))
+        return 0;
     if (s->format == AV_STEREO3D_FRAMESEQUENCE) {
         if (s->double_pts == AV_NOPTS_VALUE)
             s->double_pts = s->input_views[LEFT]->pts;
@@ -289,7 +337,7 @@ static int request_frame(AVFilterLink *outlink)
         if (!dst)
             return AVERROR(ENOMEM);
 
-        spatial_frame_pack(s, dst);
+        spatial_frame_pack(outlink, dst);
 
         // get any property from the original frame
         ret = av_frame_copy_props(dst, s->input_views[LEFT]);
@@ -315,7 +363,7 @@ static int request_frame(AVFilterLink *outlink)
 
 #define OFFSET(x) offsetof(FramepackContext, x)
 #define V AV_OPT_FLAG_VIDEO_PARAM
-static const AVOption options[] = {
+static const AVOption framepack_options[] = {
     { "format", "Frame pack output format", OFFSET(format), AV_OPT_TYPE_INT,
         { .i64 = AV_STEREO3D_SIDEBYSIDE }, 0, INT_MAX, .flags = V, .unit = "format" },
     { "sbs", "Views are packed next to each other", 0, AV_OPT_TYPE_CONST,
@@ -331,12 +379,7 @@ static const AVOption options[] = {
     { NULL },
 };
 
-static const AVClass framepack_class = {
-    .class_name = "framepack",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
+AVFILTER_DEFINE_CLASS(framepack);
 
 static const AVFilterPad framepack_inputs[] = {
     {
diff --git a/libavfilter/vf_framerate.c b/libavfilter/vf_framerate.c
new file mode 100644
index 00000000..237a4873
--- /dev/null
+++ b/libavfilter/vf_framerate.c
@@ -0,0 +1,731 @@
+/*
+ * Copyright (C) 2012 Mark Himsley
+ *
+ * get_scene_score() Copyright (c) 2011 Stefano Sabatini
+ * taken from libavfilter/vf_select.c
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * filter for upsampling or downsampling a progressive source
+ */
+
+#define DEBUG
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/pixelutils.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "video.h"
+
+#define N_SRCE 3
+
+typedef struct FrameRateContext {
+    const AVClass *class;
+    // parameters
+    AVRational dest_frame_rate;         ///< output frames per second
+    int flags;                          ///< flags affecting frame rate conversion algorithm
+    double scene_score;                 ///< score that denotes a scene change has happened
+    int interp_start;                   ///< start of range to apply linear interpolation
+    int interp_end;                     ///< end of range to apply linear interpolation
+
+    int line_size[4];                   ///< bytes of pixel data per line for each plane
+    int vsub;
+
+    int frst, next, prev, crnt, last;
+    int pending_srce_frames;            ///< how many input frames are still waiting to be processed
+    int flush;                          ///< are we flushing final frames
+    int pending_end_frame;              ///< flag indicating we are waiting to call filter_frame()
+
+    AVRational srce_time_base;          ///< timebase of source
+
+    AVRational dest_time_base;          ///< timebase of destination
+    int32_t dest_frame_num;
+    int64_t last_dest_frame_pts;        ///< pts of the last frame output
+    int64_t average_srce_pts_dest_delta;///< average input pts delta converted from input rate to output rate
+    int64_t average_dest_pts_delta;     ///< calculated average output pts delta
+
+    av_pixelutils_sad_fn sad;           ///< Sum of the absolute difference function (scene detect only)
+    double prev_mafd;                   ///< previous MAFD                           (scene detect only)
+
+    AVFrame *srce[N_SRCE];              ///< buffered source frames
+    int64_t srce_pts_dest[N_SRCE];      ///< pts for source frames scaled to output timebase
+    int64_t pts;                        ///< pts of frame we are working on
+
+    int (*blend_frames)(AVFilterContext *ctx, float interpolate,
+                        AVFrame *copy_src1, AVFrame *copy_src2);
+    int max;
+    int bitdepth;
+    AVFrame *work;
+} FrameRateContext;
+
+#define OFFSET(x) offsetof(FrameRateContext, x)
+#define V AV_OPT_FLAG_VIDEO_PARAM
+#define F AV_OPT_FLAG_FILTERING_PARAM
+#define FRAMERATE_FLAG_SCD 01
+
+static const AVOption framerate_options[] = {
+    {"fps",                 "required output frames per second rate", OFFSET(dest_frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="50"},             0,       INT_MAX, V|F },
+
+    {"interp_start",        "point to start linear interpolation",    OFFSET(interp_start),    AV_OPT_TYPE_INT,      {.i64=15},                 0,       255,     V|F },
+    {"interp_end",          "point to end linear interpolation",      OFFSET(interp_end),      AV_OPT_TYPE_INT,      {.i64=240},                0,       255,     V|F },
+    {"scene",               "scene change level",                     OFFSET(scene_score),     AV_OPT_TYPE_DOUBLE,   {.dbl=7.0},                0,       INT_MAX, V|F },
+
+    {"flags",               "set flags",                              OFFSET(flags),           AV_OPT_TYPE_FLAGS,    {.i64=1},                  0,       INT_MAX, V|F, "flags" },
+    {"scene_change_detect", "enable scene change detection",          0,                       AV_OPT_TYPE_CONST,    {.i64=FRAMERATE_FLAG_SCD}, INT_MIN, INT_MAX, V|F, "flags" },
+    {"scd",                 "enable scene change detection",          0,                       AV_OPT_TYPE_CONST,    {.i64=FRAMERATE_FLAG_SCD}, INT_MIN, INT_MAX, V|F, "flags" },
+
+    {NULL}
+};
+
+AVFILTER_DEFINE_CLASS(framerate);
+
+static void next_source(AVFilterContext *ctx)
+{
+    FrameRateContext *s = ctx->priv;
+    int i;
+
+    ff_dlog(ctx,  "next_source()\n");
+
+    if (s->srce[s->last] && s->srce[s->last] != s->srce[s->last-1]) {
+        ff_dlog(ctx, "next_source() unlink %d\n", s->last);
+        av_frame_free(&s->srce[s->last]);
+    }
+    for (i = s->last; i > s->frst; i--) {
+        ff_dlog(ctx, "next_source() copy %d to %d\n", i - 1, i);
+        s->srce[i] = s->srce[i - 1];
+    }
+    ff_dlog(ctx, "next_source() make %d null\n", s->frst);
+    s->srce[s->frst] = NULL;
+}
+
+static av_always_inline int64_t sad_8x8_16(const uint16_t *src1, ptrdiff_t stride1,
+                                           const uint16_t *src2, ptrdiff_t stride2)
+{
+    int sum = 0;
+    int x, y;
+
+    for (y = 0; y < 8; y++) {
+        for (x = 0; x < 8; x++)
+            sum += FFABS(src1[x] - src2[x]);
+        src1 += stride1;
+        src2 += stride2;
+    }
+    return sum;
+}
+
+static double get_scene_score16(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next)
+{
+    FrameRateContext *s = ctx->priv;
+    double ret = 0;
+
+    ff_dlog(ctx, "get_scene_score16()\n");
+
+    if (crnt &&
+        crnt->height == next->height &&
+        crnt->width  == next->width) {
+        int x, y;
+        int64_t sad;
+        double mafd, diff;
+        const uint16_t *p1 = (const uint16_t *)crnt->data[0];
+        const uint16_t *p2 = (const uint16_t *)next->data[0];
+        const int p1_linesize = crnt->linesize[0] / 2;
+        const int p2_linesize = next->linesize[0] / 2;
+
+        ff_dlog(ctx, "get_scene_score16() process\n");
+
+        for (sad = y = 0; y < crnt->height; y += 8) {
+            for (x = 0; x < p1_linesize; x += 8) {
+                sad += sad_8x8_16(p1 + y * p1_linesize + x,
+                                  p1_linesize,
+                                  p2 + y * p2_linesize + x,
+                                  p2_linesize);
+            }
+        }
+        mafd = sad / (crnt->height * crnt->width * 3);
+        diff = fabs(mafd - s->prev_mafd);
+        ret  = av_clipf(FFMIN(mafd, diff), 0, 100.0);
+        s->prev_mafd = mafd;
+    }
+    ff_dlog(ctx, "get_scene_score16() result is:%f\n", ret);
+    return ret;
+}
+
+static double get_scene_score(AVFilterContext *ctx, AVFrame *crnt, AVFrame *next)
+{
+    FrameRateContext *s = ctx->priv;
+    double ret = 0;
+
+    ff_dlog(ctx, "get_scene_score()\n");
+
+    if (crnt &&
+        crnt->height == next->height &&
+        crnt->width  == next->width) {
+        int x, y;
+        int64_t sad;
+        double mafd, diff;
+        uint8_t *p1 = crnt->data[0];
+        uint8_t *p2 = next->data[0];
+        const int p1_linesize = crnt->linesize[0];
+        const int p2_linesize = next->linesize[0];
+
+        ff_dlog(ctx, "get_scene_score() process\n");
+
+        for (sad = y = 0; y < crnt->height; y += 8) {
+            for (x = 0; x < p1_linesize; x += 8) {
+                sad += s->sad(p1 + y * p1_linesize + x,
+                              p1_linesize,
+                              p2 + y * p2_linesize + x,
+                              p2_linesize);
+            }
+        }
+        emms_c();
+        mafd = sad / (crnt->height * crnt->width * 3);
+        diff = fabs(mafd - s->prev_mafd);
+        ret  = av_clipf(FFMIN(mafd, diff), 0, 100.0);
+        s->prev_mafd = mafd;
+    }
+        ff_dlog(ctx, "get_scene_score() result is:%f\n", ret);
+    return ret;
+}
+
+static int blend_frames16(AVFilterContext *ctx, float interpolate,
+                          AVFrame *copy_src1, AVFrame *copy_src2)
+{
+    FrameRateContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    double interpolate_scene_score = 0;
+
+    if ((s->flags & FRAMERATE_FLAG_SCD) && copy_src2) {
+        interpolate_scene_score = get_scene_score16(ctx, copy_src1, copy_src2);
+        ff_dlog(ctx, "blend_frames16() interpolate scene score:%f\n", interpolate_scene_score);
+    }
+    // decide if the shot-change detection allows us to blend two frames
+    if (interpolate_scene_score < s->scene_score && copy_src2) {
+        uint16_t src2_factor = fabsf(interpolate) * (1 << (s->bitdepth - 8));
+        uint16_t src1_factor = s->max - src2_factor;
+        const int half = s->max / 2;
+        const int uv = (s->max + 1) * half;
+        const int shift = s->bitdepth;
+        int plane, line, pixel;
+
+        // get work-space for output frame
+        s->work = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!s->work)
+            return AVERROR(ENOMEM);
+
+        av_frame_copy_props(s->work, s->srce[s->crnt]);
+
+        ff_dlog(ctx, "blend_frames16() INTERPOLATE to create work frame\n");
+        for (plane = 0; plane < 4 && copy_src1->data[plane] && copy_src2->data[plane]; plane++) {
+            int cpy_line_width = s->line_size[plane];
+            const uint16_t *cpy_src1_data = (const uint16_t *)copy_src1->data[plane];
+            int cpy_src1_line_size = copy_src1->linesize[plane] / 2;
+            const uint16_t *cpy_src2_data = (const uint16_t *)copy_src2->data[plane];
+            int cpy_src2_line_size = copy_src2->linesize[plane] / 2;
+            int cpy_src_h = (plane > 0 && plane < 3) ? (copy_src1->height >> s->vsub) : (copy_src1->height);
+            uint16_t *cpy_dst_data = (uint16_t *)s->work->data[plane];
+            int cpy_dst_line_size = s->work->linesize[plane] / 2;
+
+            if (plane <1 || plane >2) {
+                // luma or alpha
+                for (line = 0; line < cpy_src_h; line++) {
+                    for (pixel = 0; pixel < cpy_line_width; pixel++)
+                        cpy_dst_data[pixel] = ((cpy_src1_data[pixel] * src1_factor) + (cpy_src2_data[pixel] * src2_factor) + half) >> shift;
+                    cpy_src1_data += cpy_src1_line_size;
+                    cpy_src2_data += cpy_src2_line_size;
+                    cpy_dst_data += cpy_dst_line_size;
+                }
+            } else {
+                // chroma
+                for (line = 0; line < cpy_src_h; line++) {
+                    for (pixel = 0; pixel < cpy_line_width; pixel++) {
+                        cpy_dst_data[pixel] = (((cpy_src1_data[pixel] - half) * src1_factor) + ((cpy_src2_data[pixel] - half) * src2_factor) + uv) >> shift;
+                    }
+                    cpy_src1_data += cpy_src1_line_size;
+                    cpy_src2_data += cpy_src2_line_size;
+                    cpy_dst_data += cpy_dst_line_size;
+                }
+            }
+        }
+        return 1;
+    }
+    return 0;
+}
+
+static int blend_frames8(AVFilterContext *ctx, float interpolate,
+                         AVFrame *copy_src1, AVFrame *copy_src2)
+{
+    FrameRateContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    double interpolate_scene_score = 0;
+
+    if ((s->flags & FRAMERATE_FLAG_SCD) && copy_src2) {
+        interpolate_scene_score = get_scene_score(ctx, copy_src1, copy_src2);
+        ff_dlog(ctx, "blend_frames8() interpolate scene score:%f\n", interpolate_scene_score);
+    }
+    // decide if the shot-change detection allows us to blend two frames
+    if (interpolate_scene_score < s->scene_score && copy_src2) {
+        uint16_t src2_factor = fabsf(interpolate);
+        uint16_t src1_factor = 256 - src2_factor;
+        int plane, line, pixel;
+
+        // get work-space for output frame
+        s->work = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!s->work)
+            return AVERROR(ENOMEM);
+
+        av_frame_copy_props(s->work, s->srce[s->crnt]);
+
+        ff_dlog(ctx, "blend_frames8() INTERPOLATE to create work frame\n");
+        for (plane = 0; plane < 4 && copy_src1->data[plane] && copy_src2->data[plane]; plane++) {
+            int cpy_line_width = s->line_size[plane];
+            uint8_t *cpy_src1_data = copy_src1->data[plane];
+            int cpy_src1_line_size = copy_src1->linesize[plane];
+            uint8_t *cpy_src2_data = copy_src2->data[plane];
+            int cpy_src2_line_size = copy_src2->linesize[plane];
+            int cpy_src_h = (plane > 0 && plane < 3) ? (copy_src1->height >> s->vsub) : (copy_src1->height);
+            uint8_t *cpy_dst_data = s->work->data[plane];
+            int cpy_dst_line_size = s->work->linesize[plane];
+            if (plane <1 || plane >2) {
+                // luma or alpha
+                for (line = 0; line < cpy_src_h; line++) {
+                    for (pixel = 0; pixel < cpy_line_width; pixel++) {
+                        // integer version of (src1 * src1_factor) + (src2 + src2_factor) + 0.5
+                        // 0.5 is for rounding
+                        // 128 is the integer representation of 0.5 << 8
+                        cpy_dst_data[pixel] = ((cpy_src1_data[pixel] * src1_factor) + (cpy_src2_data[pixel] * src2_factor) + 128) >> 8;
+                    }
+                    cpy_src1_data += cpy_src1_line_size;
+                    cpy_src2_data += cpy_src2_line_size;
+                    cpy_dst_data += cpy_dst_line_size;
+                }
+            } else {
+                // chroma
+                for (line = 0; line < cpy_src_h; line++) {
+                    for (pixel = 0; pixel < cpy_line_width; pixel++) {
+                        // as above
+                        // because U and V are based around 128 we have to subtract 128 from the components.
+                        // 32896 is the integer representation of 128.5 << 8
+                        cpy_dst_data[pixel] = (((cpy_src1_data[pixel] - 128) * src1_factor) + ((cpy_src2_data[pixel] - 128) * src2_factor) + 32896) >> 8;
+                    }
+                    cpy_src1_data += cpy_src1_line_size;
+                    cpy_src2_data += cpy_src2_line_size;
+                    cpy_dst_data += cpy_dst_line_size;
+                }
+            }
+        }
+        return 1;
+    }
+    return 0;
+}
+
+static int process_work_frame(AVFilterContext *ctx, int stop)
+{
+    FrameRateContext *s = ctx->priv;
+    int64_t work_next_pts;
+    AVFrame *copy_src1;
+    float interpolate;
+
+    ff_dlog(ctx, "process_work_frame()\n");
+
+    ff_dlog(ctx, "process_work_frame() pending_input_frames %d\n", s->pending_srce_frames);
+
+    if (s->srce[s->prev]) ff_dlog(ctx, "process_work_frame() srce prev pts:%"PRId64"\n", s->srce[s->prev]->pts);
+    if (s->srce[s->crnt]) ff_dlog(ctx, "process_work_frame() srce crnt pts:%"PRId64"\n", s->srce[s->crnt]->pts);
+    if (s->srce[s->next]) ff_dlog(ctx, "process_work_frame() srce next pts:%"PRId64"\n", s->srce[s->next]->pts);
+
+    if (!s->srce[s->crnt]) {
+        // the filter cannot do anything
+        ff_dlog(ctx, "process_work_frame() no current frame cached: move on to next frame, do not output a frame\n");
+        next_source(ctx);
+        return 0;
+    }
+
+    work_next_pts = s->pts + s->average_dest_pts_delta;
+
+    ff_dlog(ctx, "process_work_frame() work crnt pts:%"PRId64"\n", s->pts);
+    ff_dlog(ctx, "process_work_frame() work next pts:%"PRId64"\n", work_next_pts);
+    if (s->srce[s->prev])
+        ff_dlog(ctx, "process_work_frame() srce prev pts:%"PRId64" at dest time base:%u/%u\n",
+            s->srce_pts_dest[s->prev], s->dest_time_base.num, s->dest_time_base.den);
+    if (s->srce[s->crnt])
+        ff_dlog(ctx, "process_work_frame() srce crnt pts:%"PRId64" at dest time base:%u/%u\n",
+            s->srce_pts_dest[s->crnt], s->dest_time_base.num, s->dest_time_base.den);
+    if (s->srce[s->next])
+        ff_dlog(ctx, "process_work_frame() srce next pts:%"PRId64" at dest time base:%u/%u\n",
+            s->srce_pts_dest[s->next], s->dest_time_base.num, s->dest_time_base.den);
+
+    av_assert0(s->srce[s->next]);
+
+    // should filter be skipping input frame (output frame rate is lower than input frame rate)
+    if (!s->flush && s->pts >= s->srce_pts_dest[s->next]) {
+        ff_dlog(ctx, "process_work_frame() work crnt pts >= srce next pts: SKIP FRAME, move on to next frame, do not output a frame\n");
+        next_source(ctx);
+        s->pending_srce_frames--;
+        return 0;
+    }
+
+    // calculate interpolation
+    interpolate = ((s->pts - s->srce_pts_dest[s->crnt]) * 256.0 / s->average_srce_pts_dest_delta);
+    ff_dlog(ctx, "process_work_frame() interpolate:%f/256\n", interpolate);
+    copy_src1 = s->srce[s->crnt];
+    if (interpolate > s->interp_end) {
+        ff_dlog(ctx, "process_work_frame() source is:NEXT\n");
+        copy_src1 = s->srce[s->next];
+    }
+    if (s->srce[s->prev] && interpolate < -s->interp_end) {
+        ff_dlog(ctx, "process_work_frame() source is:PREV\n");
+        copy_src1 = s->srce[s->prev];
+    }
+
+    // decide whether to blend two frames
+    if ((interpolate >= s->interp_start && interpolate <= s->interp_end) || (interpolate <= -s->interp_start && interpolate >= -s->interp_end)) {
+        AVFrame *copy_src2;
+
+        if (interpolate > 0) {
+            ff_dlog(ctx, "process_work_frame() interpolate source is:NEXT\n");
+            copy_src2 = s->srce[s->next];
+        } else {
+            ff_dlog(ctx, "process_work_frame() interpolate source is:PREV\n");
+            copy_src2 = s->srce[s->prev];
+        }
+        if (s->blend_frames(ctx, interpolate, copy_src1, copy_src2))
+            goto copy_done;
+        else
+            ff_dlog(ctx, "process_work_frame() CUT - DON'T INTERPOLATE\n");
+    }
+
+    ff_dlog(ctx, "process_work_frame() COPY to the work frame\n");
+    // copy the frame we decided is our base source
+    s->work = av_frame_clone(copy_src1);
+    if (!s->work)
+        return AVERROR(ENOMEM);
+
+copy_done:
+    s->work->pts = s->pts;
+
+    // should filter be re-using input frame (output frame rate is higher than input frame rate)
+    if (!s->flush && (work_next_pts + s->average_dest_pts_delta) < (s->srce_pts_dest[s->crnt] + s->average_srce_pts_dest_delta)) {
+        ff_dlog(ctx, "process_work_frame() REPEAT FRAME\n");
+    } else {
+        ff_dlog(ctx, "process_work_frame() CONSUME FRAME, move to next frame\n");
+        s->pending_srce_frames--;
+        next_source(ctx);
+    }
+    ff_dlog(ctx, "process_work_frame() output a frame\n");
+    s->dest_frame_num++;
+    if (stop)
+        s->pending_end_frame = 0;
+    s->last_dest_frame_pts = s->work->pts;
+
+    return ff_filter_frame(ctx->outputs[0], s->work);
+}
+
+static void set_srce_frame_dest_pts(AVFilterContext *ctx)
+{
+    FrameRateContext *s = ctx->priv;
+
+    ff_dlog(ctx, "set_srce_frame_output_pts()\n");
+
+    // scale the input pts from the timebase difference between input and output
+    if (s->srce[s->prev])
+        s->srce_pts_dest[s->prev] = av_rescale_q(s->srce[s->prev]->pts, s->srce_time_base, s->dest_time_base);
+    if (s->srce[s->crnt])
+        s->srce_pts_dest[s->crnt] = av_rescale_q(s->srce[s->crnt]->pts, s->srce_time_base, s->dest_time_base);
+    if (s->srce[s->next])
+        s->srce_pts_dest[s->next] = av_rescale_q(s->srce[s->next]->pts, s->srce_time_base, s->dest_time_base);
+}
+
+static void set_work_frame_pts(AVFilterContext *ctx)
+{
+    FrameRateContext *s = ctx->priv;
+    int64_t pts, average_srce_pts_delta = 0;
+
+    ff_dlog(ctx, "set_work_frame_pts()\n");
+
+    av_assert0(s->srce[s->next]);
+    av_assert0(s->srce[s->crnt]);
+
+    ff_dlog(ctx, "set_work_frame_pts() srce crnt pts:%"PRId64"\n", s->srce[s->crnt]->pts);
+    ff_dlog(ctx, "set_work_frame_pts() srce next pts:%"PRId64"\n", s->srce[s->next]->pts);
+    if (s->srce[s->prev])
+        ff_dlog(ctx, "set_work_frame_pts() srce prev pts:%"PRId64"\n", s->srce[s->prev]->pts);
+
+    average_srce_pts_delta = s->average_srce_pts_dest_delta;
+    ff_dlog(ctx, "set_work_frame_pts() initial average srce pts:%"PRId64"\n", average_srce_pts_delta);
+
+    set_srce_frame_dest_pts(ctx);
+
+    // calculate the PTS delta
+    if ((pts = (s->srce_pts_dest[s->next] - s->srce_pts_dest[s->crnt]))) {
+        average_srce_pts_delta = average_srce_pts_delta?((average_srce_pts_delta+pts)>>1):pts;
+    } else if (s->srce[s->prev] && (pts = (s->srce_pts_dest[s->crnt] - s->srce_pts_dest[s->prev]))) {
+        average_srce_pts_delta = average_srce_pts_delta?((average_srce_pts_delta+pts)>>1):pts;
+    }
+
+    s->average_srce_pts_dest_delta = average_srce_pts_delta;
+    ff_dlog(ctx, "set_work_frame_pts() average srce pts:%"PRId64"\n", average_srce_pts_delta);
+    ff_dlog(ctx, "set_work_frame_pts() average srce pts:%"PRId64" at dest time base:%u/%u\n",
+            s->average_srce_pts_dest_delta, s->dest_time_base.num, s->dest_time_base.den);
+
+    if (ctx->inputs[0] && !s->average_dest_pts_delta) {
+        int64_t d = av_q2d(av_inv_q(av_mul_q(s->dest_time_base, s->dest_frame_rate)));
+        s->average_dest_pts_delta = d;
+        ff_dlog(ctx, "set_work_frame_pts() average dest pts delta:%"PRId64"\n", s->average_dest_pts_delta);
+    }
+
+    if (!s->dest_frame_num) {
+        s->pts = s->last_dest_frame_pts = s->srce_pts_dest[s->crnt];
+    } else {
+        s->pts = s->last_dest_frame_pts + s->average_dest_pts_delta;
+    }
+
+    ff_dlog(ctx, "set_work_frame_pts() calculated pts:%"PRId64" at dest time base:%u/%u\n",
+            s->pts, s->dest_time_base.num, s->dest_time_base.den);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    FrameRateContext *s = ctx->priv;
+
+    s->dest_frame_num = 0;
+
+    s->crnt = (N_SRCE)>>1;
+    s->last = N_SRCE - 1;
+
+    s->next = s->crnt - 1;
+    s->prev = s->crnt + 1;
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    FrameRateContext *s = ctx->priv;
+    int i;
+
+    for (i = s->frst + 1; i < s->last; i++) {
+        if (s->srce[i] && (s->srce[i] != s->srce[i + 1]))
+            av_frame_free(&s->srce[i]);
+    }
+    av_frame_free(&s->srce[s->last]);
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P12,
+        AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV422P12,
+        AV_PIX_FMT_YUV444P9, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV444P12,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    FrameRateContext *s = ctx->priv;
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(inlink->format);
+    int plane;
+
+    for (plane = 0; plane < 4; plane++) {
+        s->line_size[plane] = av_image_get_linesize(inlink->format, inlink->w,
+                                                    plane);
+    }
+
+    s->bitdepth = pix_desc->comp[0].depth;
+    s->vsub = pix_desc->log2_chroma_h;
+
+    s->sad = av_pixelutils_get_sad_fn(3, 3, 2, s); // 8x8 both sources aligned
+    if (!s->sad)
+        return AVERROR(EINVAL);
+
+    s->srce_time_base = inlink->time_base;
+
+    if (s->bitdepth == 8)
+        s->blend_frames = blend_frames8;
+    else
+        s->blend_frames = blend_frames16;
+    s->max = 1 << (s->bitdepth);
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
+{
+    AVFilterContext *ctx = inlink->dst;
+    FrameRateContext *s = ctx->priv;
+
+    // we have one new frame
+    s->pending_srce_frames++;
+
+    if (inpicref->interlaced_frame)
+        av_log(ctx, AV_LOG_WARNING, "Interlaced frame found - the output will not be correct.\n");
+
+    // store the pointer to the new frame
+    av_frame_free(&s->srce[s->frst]);
+    s->srce[s->frst] = inpicref;
+
+    if (!s->pending_end_frame && s->srce[s->crnt]) {
+        set_work_frame_pts(ctx);
+        s->pending_end_frame = 1;
+    } else {
+        set_srce_frame_dest_pts(ctx);
+    }
+
+    return process_work_frame(ctx, 1);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    FrameRateContext *s = ctx->priv;
+    int exact;
+
+    ff_dlog(ctx, "config_output()\n");
+
+    ff_dlog(ctx,
+           "config_output() input time base:%u/%u (%f)\n",
+           ctx->inputs[0]->time_base.num,ctx->inputs[0]->time_base.den,
+           av_q2d(ctx->inputs[0]->time_base));
+
+    // make sure timebase is small enough to hold the framerate
+
+    exact = av_reduce(&s->dest_time_base.num, &s->dest_time_base.den,
+                      av_gcd((int64_t)s->srce_time_base.num * s->dest_frame_rate.num,
+                             (int64_t)s->srce_time_base.den * s->dest_frame_rate.den ),
+                      (int64_t)s->srce_time_base.den * s->dest_frame_rate.num, INT_MAX);
+
+    av_log(ctx, AV_LOG_INFO,
+           "time base:%u/%u -> %u/%u exact:%d\n",
+           s->srce_time_base.num, s->srce_time_base.den,
+           s->dest_time_base.num, s->dest_time_base.den, exact);
+    if (!exact) {
+        av_log(ctx, AV_LOG_WARNING, "Timebase conversion is not exact\n");
+    }
+
+    outlink->frame_rate = s->dest_frame_rate;
+    outlink->time_base = s->dest_time_base;
+
+    ff_dlog(ctx,
+           "config_output() output time base:%u/%u (%f) w:%d h:%d\n",
+           outlink->time_base.num, outlink->time_base.den,
+           av_q2d(outlink->time_base),
+           outlink->w, outlink->h);
+
+
+    av_log(ctx, AV_LOG_INFO, "fps -> fps:%u/%u scene score:%f interpolate start:%d end:%d\n",
+            s->dest_frame_rate.num, s->dest_frame_rate.den,
+            s->scene_score, s->interp_start, s->interp_end);
+
+    return 0;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    FrameRateContext *s = ctx->priv;
+    int val, i;
+
+    ff_dlog(ctx, "request_frame()\n");
+
+    // if there is no "next" frame AND we are not in flush then get one from our input filter
+    if (!s->srce[s->frst] && !s->flush) {
+        ff_dlog(ctx, "request_frame() call source's request_frame()\n");
+        val = ff_request_frame(outlink->src->inputs[0]);
+        if (val < 0 && (val != AVERROR_EOF)) {
+            ff_dlog(ctx, "request_frame() source's request_frame() returned error:%d\n", val);
+            return val;
+        } else if (val == AVERROR_EOF) {
+            s->flush = 1;
+        }
+        ff_dlog(ctx, "request_frame() source's request_frame() returned:%d\n", val);
+        return 0;
+    }
+
+    ff_dlog(ctx, "request_frame() REPEAT or FLUSH\n");
+
+    if (s->pending_srce_frames <= 0) {
+        ff_dlog(ctx, "request_frame() nothing else to do, return:EOF\n");
+        return AVERROR_EOF;
+    }
+
+    // otherwise, make brand-new frame and pass to our output filter
+    ff_dlog(ctx, "request_frame() FLUSH\n");
+
+    // back fill at end of file when source has no more frames
+    for (i = s->last; i > s->frst; i--) {
+        if (!s->srce[i - 1] && s->srce[i]) {
+            ff_dlog(ctx, "request_frame() copy:%d to:%d\n", i, i - 1);
+            s->srce[i - 1] = s->srce[i];
+        }
+    }
+
+    set_work_frame_pts(ctx);
+    return process_work_frame(ctx, 0);
+}
+
+static const AVFilterPad framerate_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad framerate_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .request_frame = request_frame,
+        .config_props  = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_framerate = {
+    .name          = "framerate",
+    .description   = NULL_IF_CONFIG_SMALL("Upsamples or downsamples progressive source between specified frame rates."),
+    .priv_size     = sizeof(FrameRateContext),
+    .priv_class    = &framerate_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = framerate_inputs,
+    .outputs       = framerate_outputs,
+};
diff --git a/libavfilter/vf_framestep.c b/libavfilter/vf_framestep.c
index 09945e17..6f198b8f 100644
--- a/libavfilter/vf_framestep.c
+++ b/libavfilter/vf_framestep.c
@@ -49,7 +49,6 @@ static int config_output_props(AVFilterLink *outlink)
     FrameStepContext *framestep = ctx->priv;
     AVFilterLink *inlink = ctx->inputs[0];
 
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
     outlink->frame_rate =
         av_div_q(inlink->frame_rate, (AVRational){framestep->frame_step, 1});
 
diff --git a/libavfilter/vf_frei0r.c b/libavfilter/vf_frei0r.c
index bbefe51c..9aa3edcf 100644
--- a/libavfilter/vf_frei0r.c
+++ b/libavfilter/vf_frei0r.c
@@ -30,6 +30,7 @@
 #include "config.h"
 #include "libavutil/avstring.h"
 #include "libavutil/common.h"
+#include "libavutil/eval.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
 #include "libavutil/mathematics.h"
@@ -104,7 +105,7 @@ static int set_param(AVFilterContext *ctx, f0r_param_info_t info, int index, cha
         break;
 
     case F0R_PARAM_DOUBLE:
-        val.d = strtod(param, &tail);
+        val.d = av_strtod(param, &tail);
         if (*tail || val.d == HUGE_VAL)
             goto fail;
         break;
@@ -370,11 +371,14 @@ static int query_formats(AVFilterContext *ctx)
 {
     Frei0rContext *s = ctx->priv;
     AVFilterFormats *formats = NULL;
+    int ret;
 
     if        (s->plugin_info.color_model == F0R_COLOR_MODEL_BGRA8888) {
-        ff_add_format(&formats, AV_PIX_FMT_BGRA);
+        if ((ret = ff_add_format(&formats, AV_PIX_FMT_BGRA)) < 0)
+            return ret;
     } else if (s->plugin_info.color_model == F0R_COLOR_MODEL_RGBA8888) {
-        ff_add_format(&formats, AV_PIX_FMT_RGBA);
+        if ((ret = ff_add_format(&formats, AV_PIX_FMT_RGBA)) < 0)
+            return ret;
     } else {                                   /* F0R_COLOR_MODEL_PACKED32 */
         static const enum AVPixelFormat pix_fmts[] = {
             AV_PIX_FMT_BGRA, AV_PIX_FMT_ARGB, AV_PIX_FMT_ABGR, AV_PIX_FMT_ARGB, AV_PIX_FMT_NONE
diff --git a/libavfilter/vf_fspp.c b/libavfilter/vf_fspp.c
index 7bdaa91b..c6989046 100644
--- a/libavfilter/vf_fspp.c
+++ b/libavfilter/vf_fspp.c
@@ -48,7 +48,7 @@ static const AVOption fspp_options[] = {
     { "quality",       "set quality",                          OFFSET(log2_count),    AV_OPT_TYPE_INT, {.i64 = 4},   4, MAX_LEVEL, FLAGS },
     { "qp",            "force a constant quantizer parameter", OFFSET(qp),            AV_OPT_TYPE_INT, {.i64 = 0},   0, 64,        FLAGS },
     { "strength",      "set filter strength",                  OFFSET(strength),      AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32,        FLAGS },
-    { "use_bframe_qp", "use B-frames' QP",                     OFFSET(use_bframe_qp), AV_OPT_TYPE_INT, {.i64 = 0},   0, 1,         FLAGS },
+    { "use_bframe_qp", "use B-frames' QP",                     OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0},   0, 1,         FLAGS },
     { NULL }
 };
 
@@ -526,7 +526,7 @@ static int config_input(AVFilterLink *inlink)
         return AVERROR(ENOMEM);
 
     if (!fspp->use_bframe_qp && !fspp->qp) {
-        fspp->non_b_qp_alloc_size = FF_CEIL_RSHIFT(inlink->w, 4) * FF_CEIL_RSHIFT(inlink->h, 4);
+        fspp->non_b_qp_alloc_size = AV_CEIL_RSHIFT(inlink->w, 4) * AV_CEIL_RSHIFT(inlink->h, 4);
         fspp->non_b_qp_table = av_calloc(fspp->non_b_qp_alloc_size, sizeof(*fspp->non_b_qp_table));
         if (!fspp->non_b_qp_table)
             return AVERROR(ENOMEM);
@@ -590,11 +590,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             /* if the qp stride is not set, it means the QP are only defined on
              * a line basis */
            if (!qp_stride) {
-                w = FF_CEIL_RSHIFT(inlink->w, 4);
+                w = AV_CEIL_RSHIFT(inlink->w, 4);
                 h = 1;
             } else {
                 w = qp_stride;
-                h = FF_CEIL_RSHIFT(inlink->h, 4);
+                h = AV_CEIL_RSHIFT(inlink->h, 4);
             }
             if (w * h > fspp->non_b_qp_alloc_size) {
                 int ret = av_reallocp_array(&fspp->non_b_qp_table, w, h);
@@ -615,8 +615,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             qp_table = fspp->non_b_qp_table;
 
         if (qp_table || fspp->qp) {
-            const int cw = FF_CEIL_RSHIFT(inlink->w, fspp->hsub);
-            const int ch = FF_CEIL_RSHIFT(inlink->h, fspp->vsub);
+            const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
+            const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
 
             /* get a new frame if in-place is not possible or if the dimensions
              * are not multiple of 8 */
diff --git a/libavfilter/vf_fspp.h b/libavfilter/vf_fspp.h
index 237ffb1d..74a34473 100644
--- a/libavfilter/vf_fspp.h
+++ b/libavfilter/vf_fspp.h
@@ -44,12 +44,12 @@
 typedef int32_t int_simd16_t;
 static const int16_t FIX_0_382683433   = FIX(0.382683433, 14);
 static const int16_t FIX_0_541196100   = FIX(0.541196100, 14);
-static const int16_t FIX_0_707106781   = FIX(0.707106781, 14);
+static const int16_t FIX_0_707106781   = FIX(M_SQRT1_2  , 14);
 static const int16_t FIX_1_306562965   = FIX(1.306562965, 14);
-static const int16_t FIX_1_414213562_A = FIX(1.414213562, 14);
+static const int16_t FIX_1_414213562_A = FIX(M_SQRT2    , 14);
 static const int16_t FIX_1_847759065   = FIX(1.847759065, 13);
 static const int16_t FIX_2_613125930   = FIX(-2.613125930, 13);
-static const int16_t FIX_1_414213562   = FIX(1.414213562, 13);
+static const int16_t FIX_1_414213562   = FIX(M_SQRT2    , 13);
 static const int16_t FIX_1_082392200   = FIX(1.082392200, 13);
 
 typedef struct FSPPContext {
diff --git a/libavfilter/vf_geq.c b/libavfilter/vf_geq.c
index 887594f8..88d3b75a 100644
--- a/libavfilter/vf_geq.c
+++ b/libavfilter/vf_geq.c
@@ -26,6 +26,7 @@
  * ported by Clément Bœsch for FFmpeg.
  */
 
+#include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/eval.h"
 #include "libavutil/opt.h"
@@ -74,8 +75,8 @@ static inline double getpix(void *priv, double x, double y, int plane)
     AVFrame *picref = geq->picref;
     const uint8_t *src = picref->data[plane];
     const int linesize = picref->linesize[plane];
-    const int w = (plane == 1 || plane == 2) ? FF_CEIL_RSHIFT(picref->width,  geq->hsub) : picref->width;
-    const int h = (plane == 1 || plane == 2) ? FF_CEIL_RSHIFT(picref->height, geq->vsub) : picref->height;
+    const int w = (plane == 1 || plane == 2) ? AV_CEIL_RSHIFT(picref->width,  geq->hsub) : picref->width;
+    const int h = (plane == 1 || plane == 2) ? AV_CEIL_RSHIFT(picref->height, geq->vsub) : picref->height;
 
     if (!src)
         return 0;
@@ -192,6 +193,8 @@ static int geq_config_props(AVFilterLink *inlink)
     GEQContext *geq = inlink->dst->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 
+    av_assert0(desc);
+
     geq->hsub = desc->log2_chroma_w;
     geq->vsub = desc->log2_chroma_h;
     geq->planes = desc->nb_components;
@@ -221,8 +224,8 @@ static int geq_filter_frame(AVFilterLink *inlink, AVFrame *in)
         int x, y;
         uint8_t *dst = out->data[plane];
         const int linesize = out->linesize[plane];
-        const int w = (plane == 1 || plane == 2) ? FF_CEIL_RSHIFT(inlink->w, geq->hsub) : inlink->w;
-        const int h = (plane == 1 || plane == 2) ? FF_CEIL_RSHIFT(inlink->h, geq->vsub) : inlink->h;
+        const int w = (plane == 1 || plane == 2) ? AV_CEIL_RSHIFT(inlink->w, geq->hsub) : inlink->w;
+        const int h = (plane == 1 || plane == 2) ? AV_CEIL_RSHIFT(inlink->h, geq->vsub) : inlink->h;
 
         values[VAR_W]  = w;
         values[VAR_H]  = h;
diff --git a/libavfilter/vf_gradfun.c b/libavfilter/vf_gradfun.c
index 8c11bda4..e9af24fa 100644
--- a/libavfilter/vf_gradfun.c
+++ b/libavfilter/vf_gradfun.c
@@ -173,8 +173,8 @@ static int config_input(AVFilterLink *inlink)
     if (!s->buf)
         return AVERROR(ENOMEM);
 
-    s->chroma_w = FF_CEIL_RSHIFT(inlink->w, hsub);
-    s->chroma_h = FF_CEIL_RSHIFT(inlink->h, vsub);
+    s->chroma_w = AV_CEIL_RSHIFT(inlink->w, hsub);
+    s->chroma_h = AV_CEIL_RSHIFT(inlink->h, vsub);
     s->chroma_r = av_clip(((((s->radius >> hsub) + (s->radius >> vsub)) / 2 ) + 1) & ~1, 4, 32);
 
     return 0;
diff --git a/libavfilter/vf_hflip.c b/libavfilter/vf_hflip.c
index 362bd013..6f922847 100644
--- a/libavfilter/vf_hflip.c
+++ b/libavfilter/vf_hflip.c
@@ -44,15 +44,16 @@ typedef struct FlipContext {
 static int query_formats(AVFilterContext *ctx)
 {
     AVFilterFormats *pix_fmts = NULL;
-    int fmt;
+    int fmt, ret;
 
     for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
         if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL ||
               desc->flags & AV_PIX_FMT_FLAG_BITSTREAM ||
               (desc->log2_chroma_w != desc->log2_chroma_h &&
-               desc->comp[0].plane == desc->comp[1].plane)))
-            ff_add_format(&pix_fmts, fmt);
+               desc->comp[0].plane == desc->comp[1].plane)) &&
+            (ret = ff_add_format(&pix_fmts, fmt)) < 0)
+            return ret;
     }
 
     return ff_set_common_formats(ctx, pix_fmts);
@@ -67,9 +68,9 @@ static int config_props(AVFilterLink *inlink)
 
     av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);
     s->planewidth[0]  = s->planewidth[3]  = inlink->w;
-    s->planewidth[1]  = s->planewidth[2]  = FF_CEIL_RSHIFT(inlink->w, hsub);
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, hsub);
     s->planeheight[0] = s->planeheight[3] = inlink->h;
-    s->planeheight[1] = s->planeheight[2] = FF_CEIL_RSHIFT(inlink->h, vsub);
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
 
     return 0;
 }
diff --git a/libavfilter/vf_histeq.c b/libavfilter/vf_histeq.c
index ce28afda..b3d2545b 100644
--- a/libavfilter/vf_histeq.c
+++ b/libavfilter/vf_histeq.c
@@ -28,6 +28,7 @@
  */
 
 #include "libavutil/common.h"
+#include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 
@@ -168,7 +169,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpic)
 
 #ifdef DEBUG
     for (x = 0; x < 256; x++)
-        av_dlog(ctx, "in[%d]: %u\n", x, histeq->in_histogram[x]);
+        ff_dlog(ctx, "in[%d]: %u\n", x, histeq->in_histogram[x]);
 #endif
 
     /* Calculate the lookup table. */
@@ -244,7 +245,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpic)
     }
 #ifdef DEBUG
     for (x = 0; x < 256; x++)
-        av_dlog(ctx, "out[%d]: %u\n", x, histeq->out_histogram[x]);
+        ff_dlog(ctx, "out[%d]: %u\n", x, histeq->out_histogram[x]);
 #endif
 
     av_frame_free(&inpic);
diff --git a/libavfilter/vf_histogram.c b/libavfilter/vf_histogram.c
index 31004b71..3a4725be 100644
--- a/libavfilter/vf_histogram.c
+++ b/libavfilter/vf_histogram.c
@@ -22,110 +22,138 @@
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
 #include "avfilter.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
 
-enum HistogramMode {
-    MODE_LEVELS,
-    MODE_WAVEFORM,
-    MODE_COLOR,
-    MODE_COLOR2,
-    MODE_NB
-};
-
 typedef struct HistogramContext {
     const AVClass *class;               ///< AVClass context for log and options purpose
-    int mode;                           ///< HistogramMode
-    unsigned       histogram[256];
+    unsigned       histogram[256*256];
+    int            histogram_size;
+    int            mult;
     int            ncomp;
     const uint8_t  *bg_color;
     const uint8_t  *fg_color;
     int            level_height;
     int            scale_height;
-    int            step;
-    int            waveform_mode;
-    int            waveform_mirror;
     int            display_mode;
     int            levels_mode;
-    const AVPixFmtDescriptor *desc;
+    const AVPixFmtDescriptor *desc, *odesc;
+    int            components;
+    int            planewidth[4];
+    int            planeheight[4];
 } HistogramContext;
 
 #define OFFSET(x) offsetof(HistogramContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 
 static const AVOption histogram_options[] = {
-    { "mode", "set histogram mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=MODE_LEVELS}, 0, MODE_NB-1, FLAGS, "mode"},
-    { "levels", "standard histogram", 0, AV_OPT_TYPE_CONST, {.i64=MODE_LEVELS}, 0, 0, FLAGS, "mode" },
-    { "waveform", "per row/column luminance graph", 0, AV_OPT_TYPE_CONST, {.i64=MODE_WAVEFORM}, 0, 0, FLAGS, "mode" },
-    { "color", "chroma values in vectorscope", 0, AV_OPT_TYPE_CONST, {.i64=MODE_COLOR}, 0, 0, FLAGS, "mode" },
-    { "color2", "chroma values in vectorscope", 0, AV_OPT_TYPE_CONST, {.i64=MODE_COLOR2}, 0, 0, FLAGS, "mode" },
     { "level_height", "set level height", OFFSET(level_height), AV_OPT_TYPE_INT, {.i64=200}, 50, 2048, FLAGS},
     { "scale_height", "set scale height", OFFSET(scale_height), AV_OPT_TYPE_INT, {.i64=12}, 0, 40, FLAGS},
-    { "step", "set waveform step value", OFFSET(step), AV_OPT_TYPE_INT, {.i64=10}, 1, 255, FLAGS},
-    { "waveform_mode", "set waveform mode", OFFSET(waveform_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "waveform_mode"},
-    { "row",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "waveform_mode" },
-    { "column", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "waveform_mode" },
-    { "waveform_mirror", "set waveform mirroring", OFFSET(waveform_mirror), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "waveform_mirror"},
     { "display_mode", "set display mode", OFFSET(display_mode), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS, "display_mode"},
     { "parade",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "display_mode" },
     { "overlay", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "display_mode" },
     { "levels_mode", "set levels mode", OFFSET(levels_mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "levels_mode"},
     { "linear",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "levels_mode" },
     { "logarithmic", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "levels_mode" },
+    { "components", "set color components to display", OFFSET(components), AV_OPT_TYPE_INT, {.i64=7}, 1, 15, FLAGS},
     { NULL }
 };
 
 AVFILTER_DEFINE_CLASS(histogram);
 
-static const enum AVPixelFormat color_pix_fmts[] = {
-    AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVJ444P,
+static const enum AVPixelFormat levels_in_pix_fmts[] = {
+    AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P,
+    AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUV411P,  AV_PIX_FMT_YUVJ411P,
+    AV_PIX_FMT_YUV440P,  AV_PIX_FMT_YUV410P,
+    AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P,
+    AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+    AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+    AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+    AV_PIX_FMT_GBRAP,    AV_PIX_FMT_GBRP,
+    AV_PIX_FMT_GBRP9,    AV_PIX_FMT_GBRP10,
+    AV_PIX_FMT_GRAY8,
     AV_PIX_FMT_NONE
 };
 
-static const enum AVPixelFormat levels_pix_fmts[] = {
-    AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVJ444P,
-    AV_PIX_FMT_GRAY8, AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, AV_PIX_FMT_NONE
+static const enum AVPixelFormat levels_out_yuv8_pix_fmts[] = {
+    AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P,
+    AV_PIX_FMT_NONE
 };
 
-static const enum AVPixelFormat waveform_pix_fmts[] = {
-     AV_PIX_FMT_GBRP,     AV_PIX_FMT_GBRAP,
-     AV_PIX_FMT_YUV422P,  AV_PIX_FMT_YUV420P,
-     AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV440P,
-     AV_PIX_FMT_YUV411P,  AV_PIX_FMT_YUV410P,
-     AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P,
-     AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,
-     AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA420P,
-     AV_PIX_FMT_GRAY8,
-     AV_PIX_FMT_NONE
+static const enum AVPixelFormat levels_out_yuv9_pix_fmts[] = {
+    AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUV444P9,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat levels_out_yuv10_pix_fmts[] = {
+    AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUV444P10,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat levels_out_rgb8_pix_fmts[] = {
+    AV_PIX_FMT_GBRAP,    AV_PIX_FMT_GBRP,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat levels_out_rgb9_pix_fmts[] = {
+    AV_PIX_FMT_GBRP9,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat levels_out_rgb10_pix_fmts[] = {
+    AV_PIX_FMT_GBRP10,
+    AV_PIX_FMT_NONE
 };
 
 static int query_formats(AVFilterContext *ctx)
 {
-    HistogramContext *h = ctx->priv;
-    const enum AVPixelFormat *pix_fmts;
-    AVFilterFormats *fmts_list;
+    AVFilterFormats *avff;
+    const AVPixFmtDescriptor *desc;
+    const enum AVPixelFormat *out_pix_fmts;
+    int rgb, i, bits;
+    int ret;
 
-    switch (h->mode) {
-    case MODE_WAVEFORM:
-        pix_fmts = waveform_pix_fmts;
-        break;
-    case MODE_LEVELS:
-        pix_fmts = levels_pix_fmts;
-        break;
-    case MODE_COLOR:
-    case MODE_COLOR2:
-        pix_fmts = color_pix_fmts;
-        break;
-    default:
-        av_assert0(0);
+    if (!ctx->inputs[0]->in_formats ||
+        !ctx->inputs[0]->in_formats->nb_formats) {
+        return AVERROR(EAGAIN);
     }
 
-    fmts_list = ff_make_format_list(pix_fmts);
-    if (!fmts_list)
-        return AVERROR(ENOMEM);
-    return ff_set_common_formats(ctx, fmts_list);
+    if (!ctx->inputs[0]->out_formats)
+        if ((ret = ff_formats_ref(ff_make_format_list(levels_in_pix_fmts), &ctx->inputs[0]->out_formats)) < 0)
+            return ret;
+    avff = ctx->inputs[0]->in_formats;
+    desc = av_pix_fmt_desc_get(avff->formats[0]);
+    rgb = desc->flags & AV_PIX_FMT_FLAG_RGB;
+    bits = desc->comp[0].depth;
+    for (i = 1; i < avff->nb_formats; i++) {
+        desc = av_pix_fmt_desc_get(avff->formats[i]);
+        if ((rgb != (desc->flags & AV_PIX_FMT_FLAG_RGB)) ||
+            (bits != desc->comp[0].depth))
+            return AVERROR(EAGAIN);
+    }
+
+    if (rgb && bits == 8)
+        out_pix_fmts = levels_out_rgb8_pix_fmts;
+    else if (rgb && bits == 9)
+        out_pix_fmts = levels_out_rgb9_pix_fmts;
+    else if (rgb && bits == 10)
+        out_pix_fmts = levels_out_rgb10_pix_fmts;
+    else if (bits == 8)
+        out_pix_fmts = levels_out_yuv8_pix_fmts;
+    else if (bits == 9)
+        out_pix_fmts = levels_out_yuv9_pix_fmts;
+    else // if (bits == 10)
+        out_pix_fmts = levels_out_yuv10_pix_fmts;
+    if ((ret = ff_formats_ref(ff_make_format_list(out_pix_fmts), &ctx->outputs[0]->in_formats)) < 0)
+        return ret;
+
+    return 0;
 }
 
 static const uint8_t black_yuva_color[4] = { 0, 127, 127, 255 };
@@ -139,8 +167,12 @@ static int config_input(AVFilterLink *inlink)
 
     h->desc  = av_pix_fmt_desc_get(inlink->format);
     h->ncomp = h->desc->nb_components;
+    h->histogram_size = 1 << h->desc->comp[0].depth;
+    h->mult = h->histogram_size / 256;
 
     switch (inlink->format) {
+    case AV_PIX_FMT_GBRP10:
+    case AV_PIX_FMT_GBRP9:
     case AV_PIX_FMT_GBRAP:
     case AV_PIX_FMT_GBRP:
         h->bg_color = black_gbrp_color;
@@ -151,6 +183,11 @@ static int config_input(AVFilterLink *inlink)
         h->fg_color = white_yuva_color;
     }
 
+    h->planeheight[1] = h->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, h->desc->log2_chroma_h);
+    h->planeheight[0] = h->planeheight[3] = inlink->h;
+    h->planewidth[1]  = h->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, h->desc->log2_chroma_w);
+    h->planewidth[0]  = h->planewidth[3]  = inlink->w;
+
     return 0;
 }
 
@@ -158,87 +195,28 @@ static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     HistogramContext *h = ctx->priv;
+    int ncomp = 0, i;
 
-    switch (h->mode) {
-    case MODE_LEVELS:
-        outlink->w = 256;
-        outlink->h = (h->level_height + h->scale_height) * FFMAX(h->ncomp * h->display_mode, 1);
-        break;
-    case MODE_WAVEFORM:
-        if (h->waveform_mode)
-            outlink->h = 256 * FFMAX(h->ncomp * h->display_mode, 1);
-        else
-            outlink->w = 256 * FFMAX(h->ncomp * h->display_mode, 1);
-        break;
-    case MODE_COLOR:
-    case MODE_COLOR2:
-        outlink->h = outlink->w = 256;
-        break;
-    default:
-        av_assert0(0);
+    for (i = 0; i < h->ncomp; i++) {
+        if ((1 << i) & h->components)
+            ncomp++;
     }
+    outlink->w = h->histogram_size;
+    outlink->h = (h->level_height + h->scale_height) * FFMAX(ncomp * h->display_mode, 1);
 
+    h->odesc = av_pix_fmt_desc_get(outlink->format);
     outlink->sample_aspect_ratio = (AVRational){1,1};
 
     return 0;
 }
 
-static void gen_waveform(HistogramContext *h, AVFrame *inpicref, AVFrame *outpicref,
-                         int component, int intensity, int offset, int col_mode)
-{
-    const int plane = h->desc->comp[component].plane;
-    const int mirror = h->waveform_mirror;
-    const int is_chroma = (component == 1 || component == 2);
-    const int shift_w = (is_chroma ? h->desc->log2_chroma_w : 0);
-    const int shift_h = (is_chroma ? h->desc->log2_chroma_h : 0);
-    const int src_linesize = inpicref->linesize[plane];
-    const int dst_linesize = outpicref->linesize[plane];
-    const int dst_signed_linesize = dst_linesize * (mirror == 1 ? -1 : 1);
-    uint8_t *src_data = inpicref->data[plane];
-    uint8_t *dst_data = outpicref->data[plane] + (col_mode ? (offset >> shift_h) * dst_linesize : offset >> shift_w);
-    uint8_t * const dst_bottom_line = dst_data + dst_linesize * ((256 >> shift_h) - 1);
-    uint8_t * const dst_line = (mirror ? dst_bottom_line : dst_data);
-    const uint8_t max = 255 - intensity;
-    const int src_h = FF_CEIL_RSHIFT(inpicref->height, shift_h);
-    const int src_w = FF_CEIL_RSHIFT(inpicref->width, shift_w);
-    uint8_t *dst, *p;
-    int y;
-
-    if (!col_mode && mirror)
-        dst_data += 256 >> shift_w;
-    for (y = 0; y < src_h; y++) {
-        const uint8_t *src_data_end = src_data + src_w;
-        dst = dst_line;
-        for (p = src_data; p < src_data_end; p++) {
-            uint8_t *target;
-            if (col_mode) {
-                target = dst++ + dst_signed_linesize * (*p >> shift_h);
-            } else {
-                if (mirror)
-                    target = dst_data - (*p >> shift_w);
-                else
-                    target = dst_data + (*p >> shift_w);
-            }
-            if (*target <= max)
-                *target += intensity;
-            else
-                *target = 255;
-        }
-        src_data += src_linesize;
-        dst_data += dst_linesize;
-    }
-}
-
-
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     HistogramContext *h   = inlink->dst->priv;
     AVFilterContext *ctx  = inlink->dst;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out;
-    const uint8_t *src;
-    uint8_t *dst;
-    int i, j, k, l;
+    int i, j, k, l, m;
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {
@@ -248,42 +226,66 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
     out->pts = in->pts;
 
-    for (k = 0; k < h->ncomp; k++) {
+    for (k = 0; k < 4 && out->data[k]; k++) {
         const int is_chroma = (k == 1 || k == 2);
-        const int dst_h = FF_CEIL_RSHIFT(outlink->h, (is_chroma ? h->desc->log2_chroma_h : 0));
-        const int dst_w = FF_CEIL_RSHIFT(outlink->w, (is_chroma ? h->desc->log2_chroma_w : 0));
-        for (i = 0; i < dst_h ; i++)
-            memset(out->data[h->desc->comp[k].plane] +
-                   i * out->linesize[h->desc->comp[k].plane],
-                   h->bg_color[k], dst_w);
+        const int dst_h = AV_CEIL_RSHIFT(outlink->h, (is_chroma ? h->odesc->log2_chroma_h : 0));
+        const int dst_w = AV_CEIL_RSHIFT(outlink->w, (is_chroma ? h->odesc->log2_chroma_w : 0));
+
+        if (h->histogram_size <= 256) {
+            for (i = 0; i < dst_h ; i++)
+                memset(out->data[h->odesc->comp[k].plane] +
+                       i * out->linesize[h->odesc->comp[k].plane],
+                       h->bg_color[k], dst_w);
+        } else {
+            const int mult = h->mult;
+
+            for (i = 0; i < dst_h ; i++)
+                for (j = 0; j < dst_w; j++)
+                    AV_WN16(out->data[h->odesc->comp[k].plane] +
+                        i * out->linesize[h->odesc->comp[k].plane] + j * 2,
+                        h->bg_color[k] * mult);
+        }
     }
 
-    switch (h->mode) {
-    case MODE_LEVELS:
-        for (k = 0; k < h->ncomp; k++) {
-            const int p = h->desc->comp[k].plane;
-            const int start = k * (h->level_height + h->scale_height) * h->display_mode;
-            double max_hval_log;
-            unsigned max_hval = 0;
-
-            for (i = 0; i < in->height; i++) {
-                src = in->data[p] + i * in->linesize[p];
-                for (j = 0; j < in->width; j++)
+    for (m = 0, k = 0; k < h->ncomp; k++) {
+        const int p = h->desc->comp[k].plane;
+        const int height = h->planeheight[p];
+        const int width = h->planewidth[p];
+        double max_hval_log;
+        unsigned max_hval = 0;
+        int start;
+
+        if (!((1 << k) & h->components))
+            continue;
+        start = m++ * (h->level_height + h->scale_height) * h->display_mode;
+
+        if (h->histogram_size <= 256) {
+            for (i = 0; i < height; i++) {
+                const uint8_t *src = in->data[p] + i * in->linesize[p];
+                for (j = 0; j < width; j++)
                     h->histogram[src[j]]++;
             }
+        } else {
+            for (i = 0; i < height; i++) {
+                const uint16_t *src = (const uint16_t *)(in->data[p] + i * in->linesize[p]);
+                for (j = 0; j < width; j++)
+                    h->histogram[src[j]]++;
+            }
+        }
 
-            for (i = 0; i < 256; i++)
-                max_hval = FFMAX(max_hval, h->histogram[i]);
-            max_hval_log = log2(max_hval + 1);
+        for (i = 0; i < h->histogram_size; i++)
+            max_hval = FFMAX(max_hval, h->histogram[i]);
+        max_hval_log = log2(max_hval + 1);
 
-            for (i = 0; i < outlink->w; i++) {
-                int col_height;
+        for (i = 0; i < outlink->w; i++) {
+            int col_height;
 
-                if (h->levels_mode)
-                    col_height = round(h->level_height * (1. - (log2(h->histogram[i] + 1) / max_hval_log)));
-                else
-                    col_height = h->level_height - (h->histogram[i] * (int64_t)h->level_height + max_hval - 1) / max_hval;
+            if (h->levels_mode)
+                col_height = lrint(h->level_height * (1. - (log2(h->histogram[i] + 1) / max_hval_log)));
+            else
+                col_height = h->level_height - (h->histogram[i] * (int64_t)h->level_height + max_hval - 1) / max_hval;
 
+            if (h->histogram_size <= 256) {
                 for (j = h->level_height - 1; j >= col_height; j--) {
                     if (h->display_mode) {
                         for (l = 0; l < h->ncomp; l++)
@@ -294,54 +296,23 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                 }
                 for (j = h->level_height + h->scale_height - 1; j >= h->level_height; j--)
                     out->data[p][(j + start) * out->linesize[p] + i] = i;
-            }
+            } else {
+                const int mult = h->mult;
 
-            memset(h->histogram, 0, 256 * sizeof(unsigned));
-        }
-        break;
-    case MODE_WAVEFORM:
-        for (k = 0; k < h->ncomp; k++) {
-            const int offset = k * 256 * h->display_mode;
-            gen_waveform(h, in, out, k, h->step, offset, h->waveform_mode);
-        }
-        break;
-    case MODE_COLOR:
-        for (i = 0; i < inlink->h; i++) {
-            const int iw1 = i * in->linesize[1];
-            const int iw2 = i * in->linesize[2];
-            for (j = 0; j < inlink->w; j++) {
-                const int pos = in->data[1][iw1 + j] * out->linesize[0] + in->data[2][iw2 + j];
-                if (out->data[0][pos] < 255)
-                    out->data[0][pos]++;
-            }
-        }
-        for (i = 0; i < 256; i++) {
-            dst = out->data[0] + i * out->linesize[0];
-            for (j = 0; j < 256; j++) {
-                if (!dst[j]) {
-                    out->data[1][i * out->linesize[0] + j] = i;
-                    out->data[2][i * out->linesize[0] + j] = j;
+                for (j = h->level_height - 1; j >= col_height; j--) {
+                    if (h->display_mode) {
+                        for (l = 0; l < h->ncomp; l++)
+                            AV_WN16(out->data[l] + (j + start) * out->linesize[l] + i * 2, h->fg_color[l] * mult);
+                    } else {
+                        AV_WN16(out->data[p] + (j + start) * out->linesize[p] + i * 2, 255 * mult);
+                    }
                 }
+                for (j = h->level_height + h->scale_height - 1; j >= h->level_height; j--)
+                    AV_WN16(out->data[p] + (j + start) * out->linesize[p] + i * 2, i);
             }
         }
-        break;
-    case MODE_COLOR2:
-        for (i = 0; i < inlink->h; i++) {
-            const int iw1 = i * in->linesize[1];
-            const int iw2 = i * in->linesize[2];
-            for (j = 0; j < inlink->w; j++) {
-                const int u = in->data[1][iw1 + j];
-                const int v = in->data[2][iw2 + j];
-                const int pos = u * out->linesize[0] + v;
-                if (!out->data[0][pos])
-                    out->data[0][pos] = FFABS(128 - u) + FFABS(128 - v);
-                out->data[1][pos] = u;
-                out->data[2][pos] = v;
-            }
-        }
-        break;
-    default:
-        av_assert0(0);
+
+        memset(h->histogram, 0, h->histogram_size * sizeof(unsigned));
     }
 
     av_frame_free(&in);
diff --git a/libavfilter/vf_hqdn3d.c b/libavfilter/vf_hqdn3d.c
index ffc1b255..3fb69fc0 100644
--- a/libavfilter/vf_hqdn3d.c
+++ b/libavfilter/vf_hqdn3d.c
@@ -182,7 +182,7 @@ static int16_t *precalc_coefs(double dist25, int depth)
 
     for (i = -256<<LUT_BITS; i < 256<<LUT_BITS; i++) {
         double f = ((i<<(9-LUT_BITS)) + (1<<(8-LUT_BITS)) - 1) / 512.0; // midpoint of the bin
-        simil = FFMAX(0, 1.0 - FFABS(f) / 255.0);
+        simil = FFMAX(0, 1.0 - fabs(f) / 255.0);
         C = pow(simil, gamma) * 256.0 * f;
         ct[(256<<LUT_BITS)+i] = lrint(C);
     }
@@ -269,7 +269,7 @@ static int config_input(AVFilterLink *inlink)
 
     s->hsub  = desc->log2_chroma_w;
     s->vsub  = desc->log2_chroma_h;
-    s->depth = desc->comp[0].depth_minus1+1;
+    s->depth = desc->comp[0].depth;
 
     s->line = av_malloc_array(inlink->w, sizeof(*s->line));
     if (!s->line)
@@ -311,8 +311,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     for (c = 0; c < 3; c++) {
         denoise(s, in->data[c], out->data[c],
                 s->line, &s->frame_prev[c],
-                FF_CEIL_RSHIFT(in->width,  (!!c * s->hsub)),
-                FF_CEIL_RSHIFT(in->height, (!!c * s->vsub)),
+                AV_CEIL_RSHIFT(in->width,  (!!c * s->hsub)),
+                AV_CEIL_RSHIFT(in->height, (!!c * s->vsub)),
                 in->linesize[c], out->linesize[c],
                 s->coefs[c ? CHROMA_SPATIAL : LUMA_SPATIAL],
                 s->coefs[c ? CHROMA_TMP     : LUMA_TMP]);
diff --git a/libavfilter/vf_hqdn3d.h b/libavfilter/vf_hqdn3d.h
index be55400f..03a79a10 100644
--- a/libavfilter/vf_hqdn3d.h
+++ b/libavfilter/vf_hqdn3d.h
@@ -20,8 +20,8 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-#ifndef AVFILTER_VF_HQDN3D_H
-#define AVFILTER_VF_HQDN3D_H
+#ifndef AVFILTER_HQDN3D_H
+#define AVFILTER_HQDN3D_H
 
 #include <stddef.h>
 #include <stdint.h>
@@ -46,4 +46,4 @@ typedef struct HQDN3DContext {
 
 void ff_hqdn3d_init_x86(HQDN3DContext *hqdn3d);
 
-#endif /* AVFILTER_VF_HQDN3D_H */
+#endif /* AVFILTER_HQDN3D_H */
diff --git a/libavfilter/vf_hqx.c b/libavfilter/vf_hqx.c
index fa15d9c5..d1e360f9 100644
--- a/libavfilter/vf_hqx.c
+++ b/libavfilter/vf_hqx.c
@@ -65,9 +65,11 @@ static av_always_inline int yuv_diff(uint32_t yuv1, uint32_t yuv2)
 #define YMASK 0xff0000
 #define UMASK 0x00ff00
 #define VMASK 0x0000ff
-    return abs((yuv1 & YMASK) - (yuv2 & YMASK)) > (48 << 16) ||
-           abs((yuv1 & UMASK) - (yuv2 & UMASK)) > ( 7 <<  8) ||
-           abs((yuv1 & VMASK) - (yuv2 & VMASK)) > ( 6 <<  0);
+#define ABSDIFF(a,b) (abs((int)(a)-(int)(b)))
+
+    return ABSDIFF(yuv1 & YMASK, yuv2 & YMASK) > (48 << 16) ||
+           ABSDIFF(yuv1 & UMASK, yuv2 & UMASK) > ( 7 <<  8) ||
+           ABSDIFF(yuv1 & VMASK, yuv2 & VMASK) > ( 6 <<  0);
 }
 
 /* (c1*w1 + c2*w2) >> s */
diff --git a/libavfilter/vf_hue.c b/libavfilter/vf_hue.c
index 2c1b34ee..b5d72136 100644
--- a/libavfilter/vf_hue.c
+++ b/libavfilter/vf_hue.c
@@ -105,8 +105,8 @@ static inline void compute_sin_and_cos(HueContext *hue)
      * the saturation.
      * This will be useful in the apply_lut function.
      */
-    hue->hue_sin = rint(sin(hue->hue) * (1 << 16) * hue->saturation);
-    hue->hue_cos = rint(cos(hue->hue) * (1 << 16) * hue->saturation);
+    hue->hue_sin = lrint(sin(hue->hue) * (1 << 16) * hue->saturation);
+    hue->hue_cos = lrint(cos(hue->hue) * (1 << 16) * hue->saturation);
 }
 
 static inline void create_luma_lut(HueContext *h)
@@ -377,8 +377,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpic)
 
     apply_lut(hue, outpic->data[1], outpic->data[2], outpic->linesize[1],
               inpic->data[1],  inpic->data[2],  inpic->linesize[1],
-              FF_CEIL_RSHIFT(inlink->w, hue->hsub),
-              FF_CEIL_RSHIFT(inlink->h, hue->vsub));
+              AV_CEIL_RSHIFT(inlink->w, hue->hsub),
+              AV_CEIL_RSHIFT(inlink->h, hue->vsub));
     if (hue->brightness)
         apply_luma_lut(hue, outpic->data[0], outpic->linesize[0],
                        inpic->data[0], inpic->linesize[0], inlink->w, inlink->h);
diff --git a/libavfilter/vf_idet.c b/libavfilter/vf_idet.c
index eb1303a3..87d4144e 100644
--- a/libavfilter/vf_idet.c
+++ b/libavfilter/vf_idet.c
@@ -128,8 +128,8 @@ static void filter(AVFilterContext *ctx)
         int refs = idet->cur->linesize[i];
 
         if (i && i<3) {
-            w = FF_CEIL_RSHIFT(w, idet->csp->log2_chroma_w);
-            h = FF_CEIL_RSHIFT(h, idet->csp->log2_chroma_h);
+            w = AV_CEIL_RSHIFT(w, idet->csp->log2_chroma_w);
+            h = AV_CEIL_RSHIFT(h, idet->csp->log2_chroma_h);
         }
 
         for (y = 2; y < h - 2; y++) {
@@ -275,7 +275,7 @@ static int filter_frame(AVFilterLink *link, AVFrame *picref)
 
     if (!idet->csp)
         idet->csp = av_pix_fmt_desc_get(link->format);
-    if (idet->csp->comp[0].depth_minus1 / 8 == 1){
+    if (idet->csp->comp[0].depth > 8){
         idet->filter_line = (ff_idet_filter_func)ff_idet_filter_line_c_16bit;
         if (ARCH_X86)
             ff_idet_init_x86(idet, 1);
@@ -313,29 +313,24 @@ static int request_frame(AVFilterLink *link)
 {
     AVFilterContext *ctx = link->src;
     IDETContext *idet = ctx->priv;
+    int ret;
 
-    do {
-        int ret;
+    if (idet->eof)
+        return AVERROR_EOF;
 
-        if (idet->eof)
-            return AVERROR_EOF;
+    ret = ff_request_frame(link->src->inputs[0]);
 
-        ret = ff_request_frame(link->src->inputs[0]);
+    if (ret == AVERROR_EOF && idet->cur && !idet->analyze_interlaced_flag_done) {
+        AVFrame *next = av_frame_clone(idet->next);
 
-        if (ret == AVERROR_EOF && idet->cur && !idet->analyze_interlaced_flag_done) {
-            AVFrame *next = av_frame_clone(idet->next);
+        if (!next)
+            return AVERROR(ENOMEM);
 
-            if (!next)
-                return AVERROR(ENOMEM);
-
-            filter_frame(link->src->inputs[0], next);
-            idet->eof = 1;
-        } else if (ret < 0) {
-            return ret;
-        }
-    } while (link->frame_requested);
+        ret = filter_frame(link->src->inputs[0], next);
+        idet->eof = 1;
+    }
 
-    return 0;
+    return ret;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
@@ -405,12 +400,6 @@ static int query_formats(AVFilterContext *ctx)
     return ff_set_common_formats(ctx, fmts_list);
 }
 
-static int config_output(AVFilterLink *outlink)
-{
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
-    return 0;
-}
-
 static av_cold int init(AVFilterContext *ctx)
 {
     IDETContext *idet = ctx->priv;
@@ -420,7 +409,7 @@ static av_cold int init(AVFilterContext *ctx)
     memset(idet->history, UNDETERMINED, HIST_SIZE);
 
     if( idet->half_life > 0 )
-        idet->decay_coefficient = (uint64_t) round( PRECISION * exp2(-1.0 / idet->half_life) );
+        idet->decay_coefficient = lrint( PRECISION * exp2(-1.0 / idet->half_life) );
     else
         idet->decay_coefficient = PRECISION;
 
@@ -445,7 +434,6 @@ static const AVFilterPad idet_outputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
-        .config_props = config_output,
         .request_frame = request_frame
     },
     { NULL }
diff --git a/libavfilter/vf_il.c b/libavfilter/vf_il.c
index edb58d03..e0bf8d59 100644
--- a/libavfilter/vf_il.c
+++ b/libavfilter/vf_il.c
@@ -70,12 +70,12 @@ static const AVOption il_options[] = {
     {"i",            NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_INTERLEAVE},   0, 0, FLAGS, "alpha_mode"},
     {"deinterleave", NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_DEINTERLEAVE}, 0, 0, FLAGS, "alpha_mode"},
     {"d",            NULL, 0, AV_OPT_TYPE_CONST, {.i64=MODE_DEINTERLEAVE}, 0, 0, FLAGS, "alpha_mode"},
-    {"luma_swap",   "swap luma fields",   OFFSET(luma_swap),   AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS},
-    {"ls",          "swap luma fields",   OFFSET(luma_swap),   AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS},
-    {"chroma_swap", "swap chroma fields", OFFSET(chroma_swap), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS},
-    {"cs",          "swap chroma fields", OFFSET(chroma_swap), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS},
-    {"alpha_swap",  "swap alpha fields",  OFFSET(alpha_swap),  AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS},
-    {"as",          "swap alpha fields",  OFFSET(alpha_swap),  AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS},
+    {"luma_swap",   "swap luma fields",   OFFSET(luma_swap),   AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
+    {"ls",          "swap luma fields",   OFFSET(luma_swap),   AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
+    {"chroma_swap", "swap chroma fields", OFFSET(chroma_swap), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
+    {"cs",          "swap chroma fields", OFFSET(chroma_swap), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
+    {"alpha_swap",  "swap alpha fields",  OFFSET(alpha_swap),  AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
+    {"as",          "swap alpha fields",  OFFSET(alpha_swap),  AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
     {NULL}
 };
 
@@ -84,12 +84,14 @@ AVFILTER_DEFINE_CLASS(il);
 static int query_formats(AVFilterContext *ctx)
 {
     AVFilterFormats *formats = NULL;
-    int fmt;
+    int fmt, ret;
 
     for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
-        if (!(desc->flags & AV_PIX_FMT_FLAG_PAL) && !(desc->flags & AV_PIX_FMT_FLAG_HWACCEL))
-            ff_add_format(&formats, fmt);
+        if (!(desc->flags & AV_PIX_FMT_FLAG_PAL) &&
+            !(desc->flags & AV_PIX_FMT_FLAG_HWACCEL) &&
+            (ret = ff_add_format(&formats, fmt)) < 0)
+            return ret;
     }
 
     return ff_set_common_formats(ctx, formats);
@@ -107,7 +109,7 @@ static int config_input(AVFilterLink *inlink)
     if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
         return ret;
 
-    s->chroma_height = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->chroma_height = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
 
     return 0;
 }
diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
index a520776f..efa31287 100644
--- a/libavfilter/vf_interlace.c
+++ b/libavfilter/vf_interlace.c
@@ -38,16 +38,16 @@
 #include "video.h"
 
 #define OFFSET(x) offsetof(InterlaceContext, x)
-#define V AV_OPT_FLAG_VIDEO_PARAM
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption interlace_options[] = {
     { "scan", "scanning mode", OFFSET(scan),
-        AV_OPT_TYPE_INT,   {.i64 = MODE_TFF }, 0, 1, .flags = V, .unit = "scan" },
+        AV_OPT_TYPE_INT,   {.i64 = MODE_TFF }, 0, 1, .flags = FLAGS, .unit = "scan" },
     { "tff", "top field first", 0,
-        AV_OPT_TYPE_CONST, {.i64 = MODE_TFF }, INT_MIN, INT_MAX, .flags = V, .unit = "scan" },
+        AV_OPT_TYPE_CONST, {.i64 = MODE_TFF }, INT_MIN, INT_MAX, .flags = FLAGS, .unit = "scan" },
     { "bff", "bottom field first", 0,
-        AV_OPT_TYPE_CONST, {.i64 = MODE_BFF }, INT_MIN, INT_MAX, .flags = V, .unit = "scan" },
-    { "lowpass", "enable vertical low-pass filter", OFFSET(lowpass),
-        AV_OPT_TYPE_INT,   {.i64 = 1 },        0, 1, .flags = V },
+        AV_OPT_TYPE_CONST, {.i64 = MODE_BFF }, INT_MIN, INT_MAX, .flags = FLAGS, .unit = "scan" },
+    { "lowpass", "set vertical low-pass filter", OFFSET(lowpass),
+        AV_OPT_TYPE_BOOL,  {.i64 = 1 },        0, 1, .flags = FLAGS },
     { NULL }
 };
 
@@ -113,7 +113,6 @@ static int config_out_props(AVFilterLink *outlink)
     // half framerate
     outlink->time_base.num *= 2;
     outlink->frame_rate.den *= 2;
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
 
 
     if (s->lowpass) {
@@ -140,7 +139,7 @@ static void copy_picture_field(InterlaceContext *s,
 
     for (plane = 0; plane < desc->nb_components; plane++) {
         int cols  = (plane == 1 || plane == 2) ? -(-inlink->w) >> hsub : inlink->w;
-        int lines = (plane == 1 || plane == 2) ? FF_CEIL_RSHIFT(inlink->h, vsub) : inlink->h;
+        int lines = (plane == 1 || plane == 2) ? AV_CEIL_RSHIFT(inlink->h, vsub) : inlink->h;
         uint8_t *dstp = dst_frame->data[plane];
         const uint8_t *srcp = src_frame->data[plane];
 
diff --git a/libavfilter/vf_kerndeint.c b/libavfilter/vf_kerndeint.c
index 0e2417ad..4825ed5e 100644
--- a/libavfilter/vf_kerndeint.c
+++ b/libavfilter/vf_kerndeint.c
@@ -50,10 +50,10 @@ typedef struct {
 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption kerndeint_options[] = {
     { "thresh", "set the threshold", OFFSET(thresh), AV_OPT_TYPE_INT, {.i64=10}, 0, 255, FLAGS },
-    { "map",    "set the map", OFFSET(map), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
-    { "order",  "set the order", OFFSET(order), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
-    { "sharp",  "enable sharpening", OFFSET(sharp), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
-    { "twoway", "enable twoway", OFFSET(twoway), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
+    { "map",    "set the map",    OFFSET(map),    AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    { "order",  "set the order",  OFFSET(order),  AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    { "sharp",  "set sharpening", OFFSET(sharp),  AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    { "twoway", "set twoway",     OFFSET(twoway), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
     { NULL }
 };
 
@@ -152,7 +152,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpic)
     outpic->interlaced_frame = 0;
 
     for (plane = 0; plane < 4 && inpic->data[plane] && inpic->linesize[plane]; plane++) {
-        h = plane == 0 ? inlink->h : FF_CEIL_RSHIFT(inlink->h, kerndeint->vsub);
+        h = plane == 0 ? inlink->h : AV_CEIL_RSHIFT(inlink->h, kerndeint->vsub);
         bwidth = kerndeint->tmp_bwidth[plane];
 
         srcp_saved        = inpic->data[plane];
diff --git a/libavfilter/vf_libopencv.c b/libavfilter/vf_libopencv.c
index be275529..f8ae9d5a 100644
--- a/libavfilter/vf_libopencv.c
+++ b/libavfilter/vf_libopencv.c
@@ -23,8 +23,14 @@
  * libopencv wrapper functions
  */
 
+#include "config.h"
+#if HAVE_OPENCV2_CORE_CORE_C_H
+#include <opencv2/core/core_c.h>
+#include <opencv2/imgproc/imgproc_c.h>
+#else
 #include <opencv/cv.h>
 #include <opencv/cxcore.h>
+#endif
 #include "libavutil/avstring.h"
 #include "libavutil/common.h"
 #include "libavutil/file.h"
diff --git a/libavfilter/vf_lut.c b/libavfilter/vf_lut.c
index 93b18a82..51486634 100644
--- a/libavfilter/vf_lut.c
+++ b/libavfilter/vf_lut.c
@@ -25,6 +25,7 @@
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/bswap.h"
 #include "libavutil/common.h"
 #include "libavutil/eval.h"
 #include "libavutil/opt.h"
@@ -59,12 +60,13 @@ enum var_name {
 
 typedef struct LutContext {
     const AVClass *class;
-    uint8_t lut[4][256];  ///< lookup table for each component
+    uint16_t lut[4][256 * 256];  ///< lookup table for each component
     char   *comp_expr_str[4];
     AVExpr *comp_expr[4];
     int hsub, vsub;
     double var_values[VAR_VARS_NB];
     int is_rgb, is_yuv;
+    int is_16bit;
     int step;
     int negate_alpha; /* only used by negate */
 } LutContext;
@@ -112,12 +114,19 @@ static av_cold void uninit(AVFilterContext *ctx)
     AV_PIX_FMT_YUV411P,  AV_PIX_FMT_YUV410P,  AV_PIX_FMT_YUV440P,    \
     AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,   \
     AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,   \
-    AV_PIX_FMT_YUVJ440P
+    AV_PIX_FMT_YUVJ440P,                                             \
+    AV_PIX_FMT_YUV444P9LE, AV_PIX_FMT_YUV422P9LE, AV_PIX_FMT_YUV420P9LE, \
+    AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_YUV440P10LE, \
+    AV_PIX_FMT_YUV444P12LE, AV_PIX_FMT_YUV422P12LE, AV_PIX_FMT_YUV420P12LE, AV_PIX_FMT_YUV440P12LE, \
+    AV_PIX_FMT_YUV444P14LE, AV_PIX_FMT_YUV422P14LE, AV_PIX_FMT_YUV420P14LE, \
+    AV_PIX_FMT_YUV444P16LE, AV_PIX_FMT_YUV422P16LE, AV_PIX_FMT_YUV420P16LE, \
+    AV_PIX_FMT_YUVA444P16LE, AV_PIX_FMT_YUVA422P16LE, AV_PIX_FMT_YUVA420P16LE
 
 #define RGB_FORMATS                             \
     AV_PIX_FMT_ARGB,         AV_PIX_FMT_RGBA,         \
     AV_PIX_FMT_ABGR,         AV_PIX_FMT_BGRA,         \
-    AV_PIX_FMT_RGB24,        AV_PIX_FMT_BGR24
+    AV_PIX_FMT_RGB24,        AV_PIX_FMT_BGR24,        \
+    AV_PIX_FMT_RGB48LE,      AV_PIX_FMT_RGBA64LE
 
 static const enum AVPixelFormat yuv_pix_fmts[] = { YUV_FORMATS, AV_PIX_FMT_NONE };
 static const enum AVPixelFormat rgb_pix_fmts[] = { RGB_FORMATS, AV_PIX_FMT_NONE };
@@ -178,9 +187,9 @@ static double compute_gammaval709(void *opaque, double gamma)
 }
 
 static double (* const funcs1[])(void *, double) = {
-    (void *)clip,
-    (void *)compute_gammaval,
-    (void *)compute_gammaval709,
+    clip,
+    compute_gammaval,
+    compute_gammaval709,
     NULL
 };
 
@@ -205,6 +214,7 @@ static int config_props(AVFilterLink *inlink)
 
     s->var_values[VAR_W] = inlink->w;
     s->var_values[VAR_H] = inlink->h;
+    s->is_16bit = desc->comp[0].depth > 8;
 
     switch (inlink->format) {
     case AV_PIX_FMT_YUV410P:
@@ -216,10 +226,45 @@ static int config_props(AVFilterLink *inlink)
     case AV_PIX_FMT_YUVA420P:
     case AV_PIX_FMT_YUVA422P:
     case AV_PIX_FMT_YUVA444P:
-        min[Y] = min[U] = min[V] = 16;
-        max[Y] = 235;
-        max[U] = max[V] = 240;
-        min[A] = 0; max[A] = 255;
+    case AV_PIX_FMT_YUV420P9LE:
+    case AV_PIX_FMT_YUV422P9LE:
+    case AV_PIX_FMT_YUV444P9LE:
+    case AV_PIX_FMT_YUVA420P9LE:
+    case AV_PIX_FMT_YUVA422P9LE:
+    case AV_PIX_FMT_YUVA444P9LE:
+    case AV_PIX_FMT_YUV420P10LE:
+    case AV_PIX_FMT_YUV422P10LE:
+    case AV_PIX_FMT_YUV440P10LE:
+    case AV_PIX_FMT_YUV444P10LE:
+    case AV_PIX_FMT_YUVA420P10LE:
+    case AV_PIX_FMT_YUVA422P10LE:
+    case AV_PIX_FMT_YUVA444P10LE:
+    case AV_PIX_FMT_YUV420P12LE:
+    case AV_PIX_FMT_YUV422P12LE:
+    case AV_PIX_FMT_YUV440P12LE:
+    case AV_PIX_FMT_YUV444P12LE:
+    case AV_PIX_FMT_YUV420P14LE:
+    case AV_PIX_FMT_YUV422P14LE:
+    case AV_PIX_FMT_YUV444P14LE:
+    case AV_PIX_FMT_YUV420P16LE:
+    case AV_PIX_FMT_YUV422P16LE:
+    case AV_PIX_FMT_YUV444P16LE:
+    case AV_PIX_FMT_YUVA420P16LE:
+    case AV_PIX_FMT_YUVA422P16LE:
+    case AV_PIX_FMT_YUVA444P16LE:
+        min[Y] = 16 * (1 << (desc->comp[0].depth - 8));
+        min[U] = 16 * (1 << (desc->comp[1].depth - 8));
+        min[V] = 16 * (1 << (desc->comp[2].depth - 8));
+        min[A] = 0;
+        max[Y] = 235 * (1 << (desc->comp[0].depth - 8));
+        max[U] = 240 * (1 << (desc->comp[1].depth - 8));
+        max[V] = 240 * (1 << (desc->comp[2].depth - 8));
+        max[A] = (1 << desc->comp[3].depth) - 1;
+        break;
+    case AV_PIX_FMT_RGB48LE:
+    case AV_PIX_FMT_RGBA64LE:
+        min[0] = min[1] = min[2] = min[3] = 0;
+        max[0] = max[1] = max[2] = max[3] = 65535;
         break;
     default:
         min[0] = min[1] = min[2] = min[3] = 0;
@@ -233,6 +278,9 @@ static int config_props(AVFilterLink *inlink)
     if (s->is_rgb) {
         ff_fill_rgba_map(rgba_map, inlink->format);
         s->step = av_get_bits_per_pixel(desc) >> 3;
+        if (s->is_16bit) {
+            s->step = s->step >> 1;
+        }
     }
 
     for (color = 0; color < desc->nb_components; color++) {
@@ -255,7 +303,7 @@ static int config_props(AVFilterLink *inlink)
         s->var_values[VAR_MAXVAL] = max[color];
         s->var_values[VAR_MINVAL] = min[color];
 
-        for (val = 0; val < 256; val++) {
+        for (val = 0; val < (1 << desc->comp[0].depth); val++) {
             s->var_values[VAR_VAL] = val;
             s->var_values[VAR_CLIPVAL] = av_clip(val, min[color], max[color]);
             s->var_values[VAR_NEGVAL] =
@@ -283,7 +331,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     LutContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out;
-    uint8_t *inrow, *outrow, *inrow0, *outrow0;
     int i, j, plane, direct = 0;
 
     if (av_frame_is_writable(in)) {
@@ -298,11 +345,49 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         av_frame_copy_props(out, in);
     }
 
-    if (s->is_rgb) {
+    if (s->is_rgb && s->is_16bit) {
+        /* packed, 16-bit */
+        uint16_t *inrow, *outrow, *inrow0, *outrow0;
+        const int w = inlink->w;
+        const int h = in->height;
+        const uint16_t (*tab)[256*256] = (const uint16_t (*)[256*256])s->lut;
+        const int in_linesize  =  in->linesize[0] / 2;
+        const int out_linesize = out->linesize[0] / 2;
+        const int step = s->step;
+
+        inrow0  = (uint16_t*) in ->data[0];
+        outrow0 = (uint16_t*) out->data[0];
+
+        for (i = 0; i < h; i ++) {
+            inrow  = inrow0;
+            outrow = outrow0;
+            for (j = 0; j < w; j++) {
+
+                switch (step) {
+#if HAVE_BIGENDIAN
+                case 4:  outrow[3] = av_bswap16(tab[3][av_bswap16(inrow[3])]); // Fall-through
+                case 3:  outrow[2] = av_bswap16(tab[2][av_bswap16(inrow[2])]); // Fall-through
+                case 2:  outrow[1] = av_bswap16(tab[1][av_bswap16(inrow[1])]); // Fall-through
+                default: outrow[0] = av_bswap16(tab[0][av_bswap16(inrow[0])]);
+#else
+                case 4:  outrow[3] = tab[3][inrow[3]]; // Fall-through
+                case 3:  outrow[2] = tab[2][inrow[2]]; // Fall-through
+                case 2:  outrow[1] = tab[1][inrow[1]]; // Fall-through
+                default: outrow[0] = tab[0][inrow[0]];
+#endif
+                }
+                outrow += step;
+                inrow  += step;
+            }
+            inrow0  += in_linesize;
+            outrow0 += out_linesize;
+        }
+    } else if (s->is_rgb) {
         /* packed */
+        uint8_t *inrow, *outrow, *inrow0, *outrow0;
         const int w = inlink->w;
         const int h = in->height;
-        const uint8_t (*tab)[256] = (const uint8_t (*)[256])s->lut;
+        const uint16_t (*tab)[256*256] = (const uint16_t (*)[256*256])s->lut;
         const int in_linesize  =  in->linesize[0];
         const int out_linesize = out->linesize[0];
         const int step = s->step;
@@ -326,14 +411,44 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             inrow0  += in_linesize;
             outrow0 += out_linesize;
         }
+    } else if (s->is_16bit) {
+        // planar yuv >8 bit depth
+        uint16_t *inrow, *outrow;
+
+        for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; plane++) {
+            int vsub = plane == 1 || plane == 2 ? s->vsub : 0;
+            int hsub = plane == 1 || plane == 2 ? s->hsub : 0;
+            int h = AV_CEIL_RSHIFT(inlink->h, vsub);
+            int w = AV_CEIL_RSHIFT(inlink->w, hsub);
+            const uint16_t *tab = s->lut[plane];
+            const int in_linesize  =  in->linesize[plane] / 2;
+            const int out_linesize = out->linesize[plane] / 2;
+
+            inrow  = (uint16_t *)in ->data[plane];
+            outrow = (uint16_t *)out->data[plane];
+
+            for (i = 0; i < h; i++) {
+                for (j = 0; j < w; j++) {
+#if HAVE_BIGENDIAN
+                    outrow[j] = av_bswap16(tab[av_bswap16(inrow[j])]);
+#else
+                    outrow[j] = tab[inrow[j]];
+#endif
+                }
+                inrow  += in_linesize;
+                outrow += out_linesize;
+            }
+        }
     } else {
-        /* planar */
+        /* planar 8bit depth */
+        uint8_t *inrow, *outrow;
+
         for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; plane++) {
             int vsub = plane == 1 || plane == 2 ? s->vsub : 0;
             int hsub = plane == 1 || plane == 2 ? s->hsub : 0;
-            int h = FF_CEIL_RSHIFT(inlink->h, vsub);
-            int w = FF_CEIL_RSHIFT(inlink->w, hsub);
-            const uint8_t *tab = s->lut[plane];
+            int h = AV_CEIL_RSHIFT(inlink->h, vsub);
+            int w = AV_CEIL_RSHIFT(inlink->w, hsub);
+            const uint16_t *tab = s->lut[plane];
             const int in_linesize  =  in->linesize[plane];
             const int out_linesize = out->linesize[plane];
 
@@ -434,7 +549,7 @@ DEFINE_LUT_FILTER(lutrgb, "Compute and apply a lookup table to the RGB input vid
 #if CONFIG_NEGATE_FILTER
 
 static const AVOption negate_options[] = {
-    { "negate_alpha", NULL, OFFSET(negate_alpha), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS },
+    { "negate_alpha", NULL, OFFSET(negate_alpha), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
     { NULL }
 };
 
diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c
index 28a28506..2b8e0272 100644
--- a/libavfilter/vf_lut3d.c
+++ b/libavfilter/vf_lut3d.c
@@ -707,6 +707,8 @@ static int config_clut(AVFilterLink *inlink)
     LUT3DContext *lut3d = ctx->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 
+    av_assert0(desc);
+
     lut3d->clut_is16bit = 0;
     switch (inlink->format) {
     case AV_PIX_FMT_RGB48:
@@ -770,8 +772,8 @@ static av_cold void haldclut_uninit(AVFilterContext *ctx)
 }
 
 static const AVOption haldclut_options[] = {
-    { "shortest",   "force termination when the shortest input terminates", OFFSET(dinput.shortest),   AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS },
-    { "repeatlast", "continue applying the last clut after eos",            OFFSET(dinput.repeatlast), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, FLAGS },
+    { "shortest",   "force termination when the shortest input terminates", OFFSET(dinput.shortest),   AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+    { "repeatlast", "continue applying the last clut after eos",            OFFSET(dinput.repeatlast), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS },
     COMMON_OPTIONS
 };
 
diff --git a/libavfilter/vf_maskedmerge.c b/libavfilter/vf_maskedmerge.c
new file mode 100644
index 00000000..66f8fa56
--- /dev/null
+++ b/libavfilter/vf_maskedmerge.c
@@ -0,0 +1,311 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+#include "maskedmerge.h"
+
+#define OFFSET(x) offsetof(MaskedMergeContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption maskedmerge_options[] = {
+    { "planes", "set planes", OFFSET(planes), AV_OPT_TYPE_INT, {.i64=0xF}, 0, 0xF, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(maskedmerge);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_NONE
+    };
+
+    return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+}
+
+static int process_frame(FFFrameSync *fs)
+{
+    AVFilterContext *ctx = fs->parent;
+    MaskedMergeContext *s = fs->opaque;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out, *base, *overlay, *mask;
+    int ret;
+
+    if ((ret = ff_framesync_get_frame(&s->fs, 0, &base,    0)) < 0 ||
+        (ret = ff_framesync_get_frame(&s->fs, 1, &overlay, 0)) < 0 ||
+        (ret = ff_framesync_get_frame(&s->fs, 2, &mask,    0)) < 0)
+        return ret;
+
+    if (ctx->is_disabled) {
+        out = av_frame_clone(base);
+        if (!out)
+            return AVERROR(ENOMEM);
+    } else {
+        int p;
+
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out)
+            return AVERROR(ENOMEM);
+        av_frame_copy_props(out, base);
+
+        for (p = 0; p < s->nb_planes; p++) {
+            if (!((1 << p) & s->planes)) {
+                av_image_copy_plane(out->data[p], out->linesize[p], base->data[p], base->linesize[p],
+                                    s->width[p], s->height[p]);
+                continue;
+            }
+
+            s->maskedmerge(base->data[p], overlay->data[p],
+                           mask->data[p], out->data[p],
+                           base->linesize[p], overlay->linesize[p],
+                           mask->linesize[p], out->linesize[p],
+                           s->width[p], s->height[p],
+                           s->half, s->depth);
+        }
+    }
+    out->pts = av_rescale_q(base->pts, s->fs.time_base, outlink->time_base);
+
+    return ff_filter_frame(outlink, out);
+}
+
+static void maskedmerge8(const uint8_t *bsrc, const uint8_t *osrc,
+                         const uint8_t *msrc, uint8_t *dst,
+                         ptrdiff_t blinesize, ptrdiff_t olinesize,
+                         ptrdiff_t mlinesize, ptrdiff_t dlinesize,
+                         int w, int h,
+                         int half, int shift)
+{
+    int x, y;
+
+    for (y = 0; y < h; y++) {
+        for (x = 0; x < w; x++) {
+            dst[x] = bsrc[x] + ((msrc[x] * (osrc[x] - bsrc[x]) + 128) >> 8);
+        }
+
+        dst  += dlinesize;
+        bsrc += blinesize;
+        osrc += olinesize;
+        msrc += mlinesize;
+    }
+}
+
+static void maskedmerge16(const uint8_t *bbsrc, const uint8_t *oosrc,
+                          const uint8_t *mmsrc, uint8_t *ddst,
+                          ptrdiff_t blinesize, ptrdiff_t olinesize,
+                          ptrdiff_t mlinesize, ptrdiff_t dlinesize,
+                          int w, int h,
+                          int half, int shift)
+{
+    const uint16_t *bsrc = (const uint16_t *)bbsrc;
+    const uint16_t *osrc = (const uint16_t *)oosrc;
+    const uint16_t *msrc = (const uint16_t *)mmsrc;
+    uint16_t *dst = (uint16_t *)ddst;
+    int x, y;
+
+    for (y = 0; y < h; y++) {
+        for (x = 0; x < w; x++) {
+            dst[x] = bsrc[x] + ((msrc[x] * (osrc[x] - bsrc[x]) + half) >> shift);
+        }
+
+        dst  += dlinesize / 2;
+        bsrc += blinesize / 2;
+        osrc += olinesize / 2;
+        msrc += mlinesize / 2;
+    }
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    MaskedMergeContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int vsub, hsub;
+
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+
+    hsub = desc->log2_chroma_w;
+    vsub = desc->log2_chroma_h;
+    s->height[1] = s->height[2] = AV_CEIL_RSHIFT(inlink->h, vsub);
+    s->height[0] = s->height[3] = inlink->h;
+    s->width[1]  = s->width[2]  = AV_CEIL_RSHIFT(inlink->w, hsub);
+    s->width[0]  = s->width[3]  = inlink->w;
+
+    s->depth = desc->comp[0].depth;
+    s->half = (1 << s->depth) / 2;
+
+    if (desc->comp[0].depth == 8)
+        s->maskedmerge = maskedmerge8;
+    else
+        s->maskedmerge = maskedmerge16;
+
+    if (ARCH_X86)
+        ff_maskedmerge_init_x86(s);
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    MaskedMergeContext *s = ctx->priv;
+    AVFilterLink *base = ctx->inputs[0];
+    AVFilterLink *overlay = ctx->inputs[1];
+    AVFilterLink *mask = ctx->inputs[2];
+    FFFrameSyncIn *in;
+    int ret;
+
+    if (base->format != overlay->format ||
+        base->format != mask->format) {
+        av_log(ctx, AV_LOG_ERROR, "inputs must be of same pixel format\n");
+        return AVERROR(EINVAL);
+    }
+    if (base->w                       != overlay->w ||
+        base->h                       != overlay->h ||
+        base->sample_aspect_ratio.num != overlay->sample_aspect_ratio.num ||
+        base->sample_aspect_ratio.den != overlay->sample_aspect_ratio.den ||
+        base->w                       != mask->w ||
+        base->h                       != mask->h ||
+        base->sample_aspect_ratio.num != mask->sample_aspect_ratio.num ||
+        base->sample_aspect_ratio.den != mask->sample_aspect_ratio.den) {
+        av_log(ctx, AV_LOG_ERROR, "First input link %s parameters "
+               "(size %dx%d, SAR %d:%d) do not match the corresponding "
+               "second input link %s parameters (%dx%d, SAR %d:%d) "
+               "and/or third input link %s parameters (%dx%d, SAR %d:%d)\n",
+               ctx->input_pads[0].name, base->w, base->h,
+               base->sample_aspect_ratio.num,
+               base->sample_aspect_ratio.den,
+               ctx->input_pads[1].name, overlay->w, overlay->h,
+               overlay->sample_aspect_ratio.num,
+               overlay->sample_aspect_ratio.den,
+               ctx->input_pads[2].name, mask->w, mask->h,
+               mask->sample_aspect_ratio.num,
+               mask->sample_aspect_ratio.den);
+        return AVERROR(EINVAL);
+    }
+
+    outlink->w = base->w;
+    outlink->h = base->h;
+    outlink->time_base = base->time_base;
+    outlink->sample_aspect_ratio = base->sample_aspect_ratio;
+    outlink->frame_rate = base->frame_rate;
+
+    if ((ret = ff_framesync_init(&s->fs, ctx, 3)) < 0)
+        return ret;
+
+    in = s->fs.in;
+    in[0].time_base = base->time_base;
+    in[1].time_base = overlay->time_base;
+    in[2].time_base = mask->time_base;
+    in[0].sync   = 1;
+    in[0].before = EXT_STOP;
+    in[0].after  = EXT_INFINITY;
+    in[1].sync   = 1;
+    in[1].before = EXT_STOP;
+    in[1].after  = EXT_INFINITY;
+    in[2].sync   = 1;
+    in[2].before = EXT_STOP;
+    in[2].after  = EXT_INFINITY;
+    s->fs.opaque   = s;
+    s->fs.on_event = process_frame;
+
+    return ff_framesync_configure(&s->fs);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
+{
+    MaskedMergeContext *s = inlink->dst->priv;
+    return ff_framesync_filter_frame(&s->fs, inlink, buf);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    MaskedMergeContext *s = outlink->src->priv;
+    return ff_framesync_request_frame(&s->fs, outlink);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    MaskedMergeContext *s = ctx->priv;
+
+    ff_framesync_uninit(&s->fs);
+}
+
+static const AVFilterPad maskedmerge_inputs[] = {
+    {
+        .name         = "base",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    {
+        .name         = "overlay",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    {
+        .name         = "mask",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad maskedmerge_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_maskedmerge = {
+    .name          = "maskedmerge",
+    .description   = NULL_IF_CONFIG_SMALL("Merge first stream with second stream using third stream as mask."),
+    .priv_size     = sizeof(MaskedMergeContext),
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = maskedmerge_inputs,
+    .outputs       = maskedmerge_outputs,
+    .priv_class    = &maskedmerge_class,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+};
diff --git a/libavfilter/vf_mcdeint.c b/libavfilter/vf_mcdeint.c
index b0070d82..ea32a240 100644
--- a/libavfilter/vf_mcdeint.c
+++ b/libavfilter/vf_mcdeint.c
@@ -1,6 +1,8 @@
 /*
  * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
  *
+ * This file is part of FFmpeg.
+ *
  * FFmpeg is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -120,7 +122,7 @@ static int config_props(AVFilterLink *inlink)
     enc_ctx->gop_size = INT_MAX;
     enc_ctx->max_b_frames = 0;
     enc_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
-    enc_ctx->flags = CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY;
+    enc_ctx->flags = AV_CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY;
     enc_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
     enc_ctx->global_quality = 1;
     enc_ctx->me_cmp = enc_ctx->me_sub_cmp = FF_CMP_SAD;
@@ -134,10 +136,10 @@ static int config_props(AVFilterLink *inlink)
     case MODE_SLOW:
         enc_ctx->me_method = ME_ITER;
     case MODE_MEDIUM:
-        enc_ctx->flags |= CODEC_FLAG_4MV;
+        enc_ctx->flags |= AV_CODEC_FLAG_4MV;
         enc_ctx->dia_size = 2;
     case MODE_FAST:
-        enc_ctx->flags |= CODEC_FLAG_QPEL;
+        enc_ctx->flags |= AV_CODEC_FLAG_QPEL;
     }
 
     ret = avcodec_open2(enc_ctx, enc, &opts);
@@ -195,8 +197,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpic)
 
     for (i = 0; i < 3; i++) {
         int is_chroma = !!i;
-        int w = FF_CEIL_RSHIFT(inlink->w, is_chroma);
-        int h = FF_CEIL_RSHIFT(inlink->h, is_chroma);
+        int w = AV_CEIL_RSHIFT(inlink->w, is_chroma);
+        int h = AV_CEIL_RSHIFT(inlink->h, is_chroma);
         int fils = frame_dec->linesize[i];
         int srcs = inpic    ->linesize[i];
         int dsts = outpic   ->linesize[i];
@@ -275,7 +277,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpic)
     mcdeint->parity ^= 1;
 
 end:
-    av_free_packet(&pkt);
+    av_packet_unref(&pkt);
     av_frame_free(&inpic);
     if (ret < 0) {
         av_frame_free(&outpic);
diff --git a/libavfilter/vf_mergeplanes.c b/libavfilter/vf_mergeplanes.c
index c76e82a4..8128f337 100644
--- a/libavfilter/vf_mergeplanes.c
+++ b/libavfilter/vf_mergeplanes.c
@@ -46,7 +46,6 @@ typedef struct MergePlanesContext {
     const AVPixFmtDescriptor *outdesc;
 
     FFFrameSync fs;
-    FFFrameSyncIn fsin[3]; /* must be immediately after fs */
 } MergePlanesContext;
 
 #define OFFSET(x) offsetof(MergePlanesContext, x)
@@ -117,22 +116,25 @@ static int query_formats(AVFilterContext *ctx)
 {
     MergePlanesContext *s = ctx->priv;
     AVFilterFormats *formats = NULL;
-    int i;
+    int i, ret;
 
     s->outdesc = av_pix_fmt_desc_get(s->out_fmt);
     for (i = 0; av_pix_fmt_desc_get(i); i++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(i);
-        if (desc->comp[0].depth_minus1 == s->outdesc->comp[0].depth_minus1 &&
-            av_pix_fmt_count_planes(i) == desc->nb_components)
-            ff_add_format(&formats, i);
+        if (desc->comp[0].depth == s->outdesc->comp[0].depth &&
+            av_pix_fmt_count_planes(i) == desc->nb_components &&
+            (ret = ff_add_format(&formats, i)) < 0)
+                return ret;
     }
 
     for (i = 0; i < s->nb_inputs; i++)
-        ff_formats_ref(formats, &ctx->inputs[i]->out_formats);
+        if ((ret = ff_formats_ref(formats, &ctx->inputs[i]->out_formats)) < 0)
+            return ret;
 
     formats = NULL;
-    ff_add_format(&formats, s->out_fmt);
-    ff_formats_ref(formats, &ctx->outputs[0]->in_formats);
+    if ((ret = ff_add_format(&formats, s->out_fmt)) < 0 ||
+        (ret = ff_formats_ref(formats, &ctx->outputs[0]->in_formats)) < 0)
+        return ret;
 
     return 0;
 }
@@ -174,9 +176,11 @@ static int config_output(AVFilterLink *outlink)
     MergePlanesContext *s = ctx->priv;
     InputParam inputsp[4];
     FFFrameSyncIn *in;
-    int i;
+    int i, ret;
+
+    if ((ret = ff_framesync_init(&s->fs, ctx, s->nb_inputs)) < 0)
+        return ret;
 
-    ff_framesync_init(&s->fs, ctx, s->nb_inputs);
     in = s->fs.in;
     s->fs.opaque = s;
     s->fs.on_event = process_frame;
@@ -188,11 +192,11 @@ static int config_output(AVFilterLink *outlink)
     outlink->sample_aspect_ratio = ctx->inputs[0]->sample_aspect_ratio;
 
     s->planewidth[1]  =
-    s->planewidth[2]  = FF_CEIL_RSHIFT(outlink->w, s->outdesc->log2_chroma_w);
+    s->planewidth[2]  = AV_CEIL_RSHIFT(outlink->w, s->outdesc->log2_chroma_w);
     s->planewidth[0]  =
     s->planewidth[3]  = outlink->w;
     s->planeheight[1] =
-    s->planeheight[2] = FF_CEIL_RSHIFT(outlink->h, s->outdesc->log2_chroma_h);
+    s->planeheight[2] = AV_CEIL_RSHIFT(outlink->h, s->outdesc->log2_chroma_h);
     s->planeheight[0] =
     s->planeheight[3] = outlink->h;
 
@@ -216,17 +220,17 @@ static int config_output(AVFilterLink *outlink)
         }
 
         inputp->planewidth[1]  =
-        inputp->planewidth[2]  = FF_CEIL_RSHIFT(inlink->w, indesc->log2_chroma_w);
+        inputp->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, indesc->log2_chroma_w);
         inputp->planewidth[0]  =
         inputp->planewidth[3]  = inlink->w;
         inputp->planeheight[1] =
-        inputp->planeheight[2] = FF_CEIL_RSHIFT(inlink->h, indesc->log2_chroma_h);
+        inputp->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, indesc->log2_chroma_h);
         inputp->planeheight[0] =
         inputp->planeheight[3] = inlink->h;
         inputp->nb_planes = av_pix_fmt_count_planes(inlink->format);
 
         for (j = 0; j < inputp->nb_planes; j++)
-            inputp->depth[j] = indesc->comp[j].depth_minus1 + 1;
+            inputp->depth[j] = indesc->comp[j].depth;
 
         in[i].time_base = inlink->time_base;
         in[i].sync   = 1;
@@ -244,10 +248,10 @@ static int config_output(AVFilterLink *outlink)
                                       input, plane);
             goto fail;
         }
-        if (s->outdesc->comp[i].depth_minus1 + 1 != inputp->depth[plane]) {
+        if (s->outdesc->comp[i].depth != inputp->depth[plane]) {
             av_log(ctx, AV_LOG_ERROR, "output plane %d depth %d does not "
                                       "match input %d plane %d depth %d\n",
-                                      i, s->outdesc->comp[i].depth_minus1 + 1,
+                                      i, s->outdesc->comp[i].depth,
                                       input, plane, inputp->depth[plane]);
             goto fail;
         }
diff --git a/libavfilter/vf_mpdecimate.c b/libavfilter/vf_mpdecimate.c
index 7cc32547..dc345114 100644
--- a/libavfilter/vf_mpdecimate.c
+++ b/libavfilter/vf_mpdecimate.c
@@ -130,11 +130,14 @@ static int decimate_frame(AVFilterContext *ctx,
         if (diff_planes(ctx,
                         cur->data[plane], cur->linesize[plane],
                         ref->data[plane], ref->linesize[plane],
-                        FF_CEIL_RSHIFT(ref->width,  hsub),
-                        FF_CEIL_RSHIFT(ref->height, vsub)))
+                        AV_CEIL_RSHIFT(ref->width,  hsub),
+                        AV_CEIL_RSHIFT(ref->height, vsub))) {
+            emms_c();
             return 0;
+        }
     }
 
+    emms_c();
     return 1;
 }
 
@@ -168,9 +171,6 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_YUVJ420P,     AV_PIX_FMT_YUVJ440P,
         AV_PIX_FMT_YUVA420P,
 
-        AV_PIX_FMT_GRAY8A,
-        AV_PIX_FMT_YA8,
-
         AV_PIX_FMT_GBRP,
 
         AV_PIX_FMT_YUVA444P,
@@ -224,19 +224,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *cur)
     return 0;
 }
 
-static int request_frame(AVFilterLink *outlink)
-{
-    DecimateContext *decimate = outlink->src->priv;
-    AVFilterLink *inlink = outlink->src->inputs[0];
-    int ret;
-
-    do {
-        ret = ff_request_frame(inlink);
-    } while (decimate->drop_count > 0 && ret >= 0);
-
-    return ret;
-}
-
 static const AVFilterPad mpdecimate_inputs[] = {
     {
         .name         = "default",
@@ -251,7 +238,6 @@ static const AVFilterPad mpdecimate_outputs[] = {
     {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
-        .request_frame = request_frame,
     },
     { NULL }
 };
diff --git a/libavfilter/vf_neighbor.c b/libavfilter/vf_neighbor.c
new file mode 100644
index 00000000..de4a12f0
--- /dev/null
+++ b/libavfilter/vf_neighbor.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2012-2013 Oka Motofumi (chikuzen.mo at gmail dot com)
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct NContext {
+    const AVClass *class;
+    int planeheight[4];
+    int planewidth[4];
+    int nb_planes;
+    int threshold[4];
+    int coordinates;
+    uint8_t *buffer;
+
+    void (*filter)(uint8_t *dst, const uint8_t *p1, int width,
+                   int threshold, const uint8_t *coordinates[], int coord);
+} NContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA420P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ422P,AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE
+    };
+
+    return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NContext *s = ctx->priv;
+
+    av_freep(&s->buffer);
+}
+
+static inline void line_copy8(uint8_t *line, const uint8_t *srcp, int width, int mergin)
+{
+    int i;
+
+    memcpy(line, srcp, width);
+
+    for (i = mergin; i > 0; i--) {
+        line[-i] = line[i];
+        line[width - 1 + i] = line[width - 1 - i];
+    }
+}
+
+static void erosion(uint8_t *dst, const uint8_t *p1, int width,
+                    int threshold, const uint8_t *coordinates[], int coord)
+{
+    int x, i;
+
+    for (x = 0; x < width; x++) {
+        int min = p1[x];
+        int limit = FFMAX(min - threshold, 0);
+
+        for (i = 0; i < 8; i++) {
+            if (coord & (1 << i)) {
+                min = FFMIN(min, *(coordinates[i] + x));
+            }
+            min = FFMAX(min, limit);
+        }
+
+        dst[x] = min;
+    }
+}
+
+static void dilation(uint8_t *dst, const uint8_t *p1, int width,
+                     int threshold, const uint8_t *coordinates[], int coord)
+{
+    int x, i;
+
+    for (x = 0; x < width; x++) {
+        int max = p1[x];
+        int limit = FFMIN(max + threshold, 255);
+
+        for (i = 0; i < 8; i++) {
+            if (coord & (1 << i)) {
+                max = FFMAX(max, *(coordinates[i] + x));
+            }
+            max = FFMIN(max, limit);
+        }
+
+        dst[x] = max;
+    }
+}
+
+static void deflate(uint8_t *dst, const uint8_t *p1, int width,
+                    int threshold, const uint8_t *coordinates[], int coord)
+{
+    int x, i;
+
+    for (x = 0; x < width; x++) {
+        int sum = 0;
+        int limit = FFMAX(p1[x] - threshold, 0);
+
+        for (i = 0; i < 8; sum += *(coordinates[i++] + x));
+
+        dst[x] = FFMAX(FFMIN(sum / 8, p1[x]), limit);
+    }
+}
+
+static void inflate(uint8_t *dst, const uint8_t *p1, int width,
+                    int threshold, const uint8_t *coordinates[], int coord)
+{
+    int x, i;
+
+    for (x = 0; x < width; x++) {
+        int sum = 0;
+        int limit = FFMIN(p1[x] + threshold, 255);
+
+        for (i = 0; i < 8; sum += *(coordinates[i++] + x));
+
+        dst[x] = FFMIN(FFMAX(sum / 8, p1[x]), limit);
+    }
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    NContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int ret;
+
+    if ((ret = av_image_fill_linesizes(s->planewidth, inlink->format, inlink->w)) < 0)
+        return ret;
+
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+    s->buffer = av_malloc(3 * (s->planewidth[0] + 32));
+    if (!s->buffer)
+        return AVERROR(ENOMEM);
+
+    if (!strcmp(ctx->filter->name, "erosion"))
+        s->filter = erosion;
+    else if (!strcmp(ctx->filter->name, "dilation"))
+        s->filter = dilation;
+    else if (!strcmp(ctx->filter->name, "deflate"))
+        s->filter = deflate;
+    else if (!strcmp(ctx->filter->name, "inflate"))
+        s->filter = inflate;
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    NContext *s = ctx->priv;
+    AVFrame *out;
+    int plane, y;
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        const int threshold = s->threshold[plane];
+
+        if (threshold) {
+            const uint8_t *src = in->data[plane];
+            uint8_t *dst = out->data[plane];
+            int stride = in->linesize[plane];
+            int height = s->planeheight[plane];
+            int width  = s->planewidth[plane];
+            uint8_t *p0 = s->buffer + 16;
+            uint8_t *p1 = p0 + s->planewidth[0];
+            uint8_t *p2 = p1 + s->planewidth[0];
+            uint8_t *orig = p0, *end = p2;
+
+            line_copy8(p0, src + stride, width, 1);
+            line_copy8(p1, src, width, 1);
+
+            for (y = 0; y < height; y++) {
+                const uint8_t *coordinates[] = { p0 - 1, p0, p0 + 1,
+                                                 p1 - 1,     p1 + 1,
+                                                 p2 - 1, p2, p2 + 1};
+                src += stride * (y < height - 1 ? 1 : -1);
+                line_copy8(p2, src, width, 1);
+
+                s->filter(dst, p1, width, threshold, coordinates, s->coordinates);
+
+                p0 = p1;
+                p1 = p2;
+                p2 = (p2 == end) ? orig: p2 + s->planewidth[0];
+                dst += out->linesize[plane];
+            }
+        } else {
+            av_image_copy_plane(out->data[plane], out->linesize[plane],
+                                in->data[plane], in->linesize[plane],
+                                s->planewidth[plane], s->planeheight[plane]);
+        }
+    }
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static const AVFilterPad neighbor_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad neighbor_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+#define OFFSET(x) offsetof(NContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+#define DEFINE_NEIGHBOR_FILTER(name_, description_)          \
+AVFILTER_DEFINE_CLASS(name_);                                \
+                                                             \
+AVFilter ff_vf_##name_ = {                                   \
+    .name          = #name_,                                 \
+    .description   = NULL_IF_CONFIG_SMALL(description_),     \
+    .priv_size     = sizeof(NContext),                       \
+    .priv_class    = &name_##_class,                         \
+    .uninit        = uninit,                                 \
+    .query_formats = query_formats,                          \
+    .inputs        = neighbor_inputs,                        \
+    .outputs       = neighbor_outputs,                       \
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, \
+}
+
+#if CONFIG_EROSION_FILTER
+
+static const AVOption erosion_options[] = {
+    { "threshold0",  "set threshold for 1st plane",   OFFSET(threshold[0]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold1",  "set threshold for 2nd plane",   OFFSET(threshold[1]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold2",  "set threshold for 3rd plane",   OFFSET(threshold[2]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold3",  "set threshold for 4th plane",   OFFSET(threshold[3]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "coordinates", "set coordinates",               OFFSET(coordinates),    AV_OPT_TYPE_INT, {.i64=255},   0, 255,   FLAGS },
+    { NULL }
+};
+
+DEFINE_NEIGHBOR_FILTER(erosion, "Apply erosion effect.");
+
+#endif /* CONFIG_EROSION_FILTER */
+
+#if CONFIG_DILATION_FILTER
+
+static const AVOption dilation_options[] = {
+    { "threshold0",  "set threshold for 1st plane",   OFFSET(threshold[0]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold1",  "set threshold for 2nd plane",   OFFSET(threshold[1]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold2",  "set threshold for 3rd plane",   OFFSET(threshold[2]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold3",  "set threshold for 4th plane",   OFFSET(threshold[3]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "coordinates", "set coordinates",               OFFSET(coordinates),    AV_OPT_TYPE_INT, {.i64=255},   0, 255,   FLAGS },
+    { NULL }
+};
+
+DEFINE_NEIGHBOR_FILTER(dilation, "Apply dilation effect.");
+
+#endif /* CONFIG_DILATION_FILTER */
+
+#if CONFIG_DEFLATE_FILTER
+
+static const AVOption deflate_options[] = {
+    { "threshold0", "set threshold for 1st plane",   OFFSET(threshold[0]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold1", "set threshold for 2nd plane",   OFFSET(threshold[1]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold2", "set threshold for 3rd plane",   OFFSET(threshold[2]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold3", "set threshold for 4th plane",   OFFSET(threshold[3]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { NULL }
+};
+
+DEFINE_NEIGHBOR_FILTER(deflate, "Apply deflate effect.");
+
+#endif /* CONFIG_DEFLATE_FILTER */
+
+#if CONFIG_INFLATE_FILTER
+
+static const AVOption inflate_options[] = {
+    { "threshold0", "set threshold for 1st plane",   OFFSET(threshold[0]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold1", "set threshold for 2nd plane",   OFFSET(threshold[1]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold2", "set threshold for 3rd plane",   OFFSET(threshold[2]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { "threshold3", "set threshold for 4th plane",   OFFSET(threshold[3]),   AV_OPT_TYPE_INT, {.i64=65535}, 0, 65535, FLAGS },
+    { NULL }
+};
+
+DEFINE_NEIGHBOR_FILTER(inflate, "Apply inflate effect.");
+
+#endif /* CONFIG_INFLATE_FILTER */
diff --git a/libavfilter/vf_nnedi.c b/libavfilter/vf_nnedi.c
new file mode 100644
index 00000000..330d3d65
--- /dev/null
+++ b/libavfilter/vf_nnedi.c
@@ -0,0 +1,1210 @@
+/*
+ * Copyright (C) 2010-2011 Kevin Stone
+ * Copyright (C) 2016 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <float.h>
+
+#include "libavutil/common.h"
+#include "libavutil/float_dsp.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct FrameData {
+    uint8_t *paddedp[3];
+    int padded_stride[3];
+    int padded_width[3];
+    int padded_height[3];
+
+    uint8_t *dstp[3];
+    int dst_stride[3];
+
+    int field[3];
+
+    int32_t *lcount[3];
+    float *input;
+    float *temp;
+} FrameData;
+
+typedef struct NNEDIContext {
+    const AVClass *class;
+
+    char *weights_file;
+
+    AVFrame *src;
+    AVFrame *second;
+    AVFrame *dst;
+    int eof;
+    int64_t cur_pts;
+
+    AVFloatDSPContext *fdsp;
+    int nb_planes;
+    int linesize[4];
+    int planeheight[4];
+
+    float *weights0;
+    float *weights1[2];
+    int asize;
+    int nns;
+    int xdia;
+    int ydia;
+
+    // Parameters
+    int deint;
+    int field;
+    int process_plane;
+    int nsize;
+    int nnsparam;
+    int qual;
+    int etype;
+    int pscrn;
+    int fapprox;
+
+    int max_value;
+
+    void (*copy_pad)(const AVFrame *, FrameData *, struct NNEDIContext *, int);
+    void (*evalfunc_0)(struct NNEDIContext *, FrameData *);
+    void (*evalfunc_1)(struct NNEDIContext *, FrameData *);
+
+    // Functions used in evalfunc_0
+    void (*readpixels)(const uint8_t *, const int, float *);
+    void (*compute_network0)(struct NNEDIContext *s, const float *, const float *, uint8_t *);
+    int32_t (*process_line0)(const uint8_t *, int, uint8_t *, const uint8_t *, const int, const int, const int);
+
+    // Functions used in evalfunc_1
+    void (*extract)(const uint8_t *, const int, const int, const int, float *, float *);
+    void (*dot_prod)(struct NNEDIContext *, const float *, const float *, float *, const int, const int, const float *);
+    void (*expfunc)(float *, const int);
+    void (*wae5)(const float *, const int, float *);
+
+    FrameData frame_data;
+} NNEDIContext;
+
+#define OFFSET(x) offsetof(NNEDIContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption nnedi_options[] = {
+    {"weights",  "set weights file", OFFSET(weights_file),  AV_OPT_TYPE_STRING, {.str="nnedi3_weights.bin"}, 0, 0, FLAGS },
+    {"deint",         "set which frames to deinterlace", OFFSET(deint),         AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "deint" },
+        {"all",        "deinterlace all frames",                       0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "deint" },
+        {"interlaced", "only deinterlace frames marked as interlaced", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "deint" },
+    {"field",  "set mode of operation", OFFSET(field),         AV_OPT_TYPE_INT, {.i64=-1}, -2, 3, FLAGS, "field" },
+        {"af", "use frame flags, both fields",  0, AV_OPT_TYPE_CONST, {.i64=-2}, 0, 0, FLAGS, "field" },
+        {"a",  "use frame flags, single field", 0, AV_OPT_TYPE_CONST, {.i64=-1}, 0, 0, FLAGS, "field" },
+        {"t",  "use top field only",            0, AV_OPT_TYPE_CONST, {.i64=0},  0, 0, FLAGS, "field" },
+        {"b",  "use bottom field only",         0, AV_OPT_TYPE_CONST, {.i64=1},  0, 0, FLAGS, "field" },
+        {"tf", "use both fields, top first",    0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "field" },
+        {"bf", "use both fields, bottom first", 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "field" },
+    {"planes", "set which planes to process", OFFSET(process_plane), AV_OPT_TYPE_INT, {.i64=7}, 0, 7, FLAGS },
+    {"nsize",  "set size of local neighborhood around each pixel, used by the predictor neural network", OFFSET(nsize), AV_OPT_TYPE_INT, {.i64=6}, 0, 6, FLAGS, "nsize" },
+        {"s8x6",     NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "nsize" },
+        {"s16x6",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "nsize" },
+        {"s32x6",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "nsize" },
+        {"s48x6",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "nsize" },
+        {"s8x4",     NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, FLAGS, "nsize" },
+        {"s16x4",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=5}, 0, 0, FLAGS, "nsize" },
+        {"s32x4",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=6}, 0, 0, FLAGS, "nsize" },
+    {"nns",    "set number of neurons in predictor neural network", OFFSET(nnsparam), AV_OPT_TYPE_INT, {.i64=1}, 0, 4, FLAGS, "nns" },
+        {"n16",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "nns" },
+        {"n32",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "nns" },
+        {"n64",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "nns" },
+        {"n128",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "nns" },
+        {"n256",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, FLAGS, "nns" },
+    {"qual",  "set quality", OFFSET(qual), AV_OPT_TYPE_INT, {.i64=1}, 1, 2, FLAGS, "qual" },
+        {"fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "qual" },
+        {"slow", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "qual" },
+    {"etype", "set which set of weights to use in the predictor", OFFSET(etype), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "etype" },
+        {"a",  "weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "etype" },
+        {"s",  "weights trained to minimize squared error",  0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "etype" },
+    {"pscrn", "set prescreening", OFFSET(pscrn), AV_OPT_TYPE_INT, {.i64=2}, 0, 2, FLAGS, "pscrn" },
+        {"none",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "pscrn" },
+        {"original",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "pscrn" },
+        {"new",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "pscrn" },
+    {"fapprox",       NULL, OFFSET(fapprox),       AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(nnedi);
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    NNEDIContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int ret;
+
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+    if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
+        return ret;
+
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    NNEDIContext *s = ctx->priv;
+
+    outlink->time_base.num = ctx->inputs[0]->time_base.num;
+    outlink->time_base.den = ctx->inputs[0]->time_base.den * 2;
+    outlink->w             = ctx->inputs[0]->w;
+    outlink->h             = ctx->inputs[0]->h;
+
+    if (s->field > 1 || s->field == -2)
+        outlink->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
+                                       (AVRational){2, 1});
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_GBRP,
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static void copy_pad(const AVFrame *src, FrameData *frame_data, NNEDIContext *s, int fn)
+{
+    const int off = 1 - fn;
+    int plane, y, x;
+
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        const uint8_t *srcp = (const uint8_t *)src->data[plane];
+        uint8_t *dstp = (uint8_t *)frame_data->paddedp[plane];
+
+        const int src_stride = src->linesize[plane];
+        const int dst_stride = frame_data->padded_stride[plane];
+
+        const int src_height = s->planeheight[plane];
+        const int dst_height = frame_data->padded_height[plane];
+
+        const int src_width = s->linesize[plane];
+        const int dst_width = frame_data->padded_width[plane];
+
+        int c = 4;
+
+        if (!(s->process_plane & (1 << plane)))
+            continue;
+
+        // Copy.
+        for (y = off; y < src_height; y += 2)
+            memcpy(dstp + 32 + (6 + y) * dst_stride,
+                   srcp + y * src_stride,
+                   src_width * sizeof(uint8_t));
+
+        // And pad.
+        dstp += (6 + off) * dst_stride;
+        for (y = 6 + off; y < dst_height - 6; y += 2) {
+            int c = 2;
+
+            for (x = 0; x < 32; x++)
+                dstp[x] = dstp[64 - x];
+
+            for (x = dst_width - 32; x < dst_width; x++, c += 2)
+                dstp[x] = dstp[x - c];
+
+            dstp += dst_stride * 2;
+        }
+
+        dstp = (uint8_t *)frame_data->paddedp[plane];
+        for (y = off; y < 6; y += 2)
+            memcpy(dstp + y * dst_stride,
+                   dstp + (12 + 2 * off - y) * dst_stride,
+                   dst_width * sizeof(uint8_t));
+
+        for (y = dst_height - 6 + off; y < dst_height; y += 2, c += 4)
+            memcpy(dstp + y * dst_stride,
+                   dstp + (y - c) * dst_stride,
+                   dst_width * sizeof(uint8_t));
+    }
+}
+
+static void elliott(float *data, const int n)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+        data[i] = data[i] / (1.0f + FFABS(data[i]));
+}
+
+static void dot_prod(NNEDIContext *s, const float *data, const float *weights, float *vals, const int n, const int len, const float *scale)
+{
+    int i;
+
+    for (i = 0; i < n; i++) {
+        float sum;
+
+        sum = s->fdsp->scalarproduct_float(data, &weights[i * len], len);
+
+        vals[i] = sum * scale[0] + weights[n * len + i];
+    }
+}
+
+static void dot_prods(NNEDIContext *s, const float *dataf, const float *weightsf, float *vals, const int n, const int len, const float *scale)
+{
+    const int16_t *data = (int16_t *)dataf;
+    const int16_t *weights = (int16_t *)weightsf;
+    const float *wf = (float *)&weights[n * len];
+    int i, j;
+
+    for (i = 0; i < n; i++) {
+        int sum = 0, off = ((i >> 2) << 3) + (i & 3);
+        for (j = 0; j < len; j++)
+            sum += data[j] * weights[i * len + j];
+
+        vals[i] = sum * wf[off] * scale[0] + wf[off + 4];
+    }
+}
+
+static void compute_network0(NNEDIContext *s, const float *input, const float *weights, uint8_t *d)
+{
+    float t, temp[12], scale = 1.0f;
+
+    dot_prod(s, input, weights, temp, 4, 48, &scale);
+    t = temp[0];
+    elliott(temp, 4);
+    temp[0] = t;
+    dot_prod(s, temp, weights + 4 * 49, temp + 4, 4, 4, &scale);
+    elliott(temp + 4, 4);
+    dot_prod(s, temp, weights + 4 * 49 + 4 * 5, temp + 8, 4, 8, &scale);
+    if (FFMAX(temp[10], temp[11]) <= FFMAX(temp[8], temp[9]))
+        d[0] = 1;
+    else
+        d[0] = 0;
+}
+
+static void compute_network0_i16(NNEDIContext *s, const float *inputf, const float *weightsf, uint8_t *d)
+{
+    const float *wf = weightsf + 2 * 48;
+    float t, temp[12], scale = 1.0f;
+
+    dot_prods(s, inputf, weightsf, temp, 4, 48, &scale);
+    t = temp[0];
+    elliott(temp, 4);
+    temp[0] = t;
+    dot_prod(s, temp, wf + 8, temp + 4, 4, 4, &scale);
+    elliott(temp + 4, 4);
+    dot_prod(s, temp, wf + 8 + 4 * 5, temp + 8, 4, 8, &scale);
+    if (FFMAX(temp[10], temp[11]) <= FFMAX(temp[8], temp[9]))
+        d[0] = 1;
+    else
+        d[0] = 0;
+}
+
+static void pixel2float48(const uint8_t *t8, const int pitch, float *p)
+{
+    const uint8_t *t = (const uint8_t *)t8;
+    int y, x;
+
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 12; x++)
+            p[y * 12 + x] = t[y * pitch * 2 + x];
+}
+
+static void byte2word48(const uint8_t *t, const int pitch, float *pf)
+{
+    int16_t *p = (int16_t *)pf;
+    int y, x;
+
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 12; x++)
+            p[y * 12 + x] = t[y * pitch * 2 + x];
+}
+
+static int32_t process_line0(const uint8_t *tempu, int width, uint8_t *dstp8, const uint8_t *src3p8, const int src_pitch, const int max_value, const int chroma)
+{
+    uint8_t *dstp = (uint8_t *)dstp8;
+    const uint8_t *src3p = (const uint8_t *)src3p8;
+    int minimum = 0;
+    int maximum = max_value - 1; // Technically the -1 is only needed for 8 and 16 bit input.
+    int count = 0, x;
+    for (x = 0; x < width; x++) {
+        if (tempu[x]) {
+            int tmp = 19 * (src3p[x + src_pitch * 2] + src3p[x + src_pitch * 4]) - 3 * (src3p[x] + src3p[x + src_pitch * 6]);
+            tmp /= 32;
+            dstp[x] = FFMAX(FFMIN(tmp, maximum), minimum);
+        } else {
+            dstp[x] = 255;
+            count++;
+        }
+    }
+    return count;
+}
+
+// new prescreener functions
+static void byte2word64(const uint8_t *t, const int pitch, float *p)
+{
+    int16_t *ps = (int16_t *)p;
+    int y, x;
+
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 16; x++)
+            ps[y * 16 + x] = t[y * pitch * 2 + x];
+}
+
+static void compute_network0new(NNEDIContext *s, const float *datai, const float *weights, uint8_t *d)
+{
+    int16_t *data = (int16_t *)datai;
+    int16_t *ws = (int16_t *)weights;
+    float *wf = (float *)&ws[4 * 64];
+    float vals[8];
+    int mask, i, j;
+
+    for (i = 0; i < 4; i++) {
+        int sum = 0;
+        float t;
+
+        for (j = 0; j < 64; j++)
+            sum += data[j] * ws[(i << 3) + ((j >> 3) << 5) + (j & 7)];
+        t = sum * wf[i] + wf[4 + i];
+        vals[i] = t / (1.0f + FFABS(t));
+    }
+
+    for (i = 0; i < 4; i++) {
+        float sum = 0.0f;
+
+        for (j = 0; j < 4; j++)
+            sum += vals[j] * wf[8 + i + (j << 2)];
+        vals[4 + i] = sum + wf[8 + 16 + i];
+    }
+
+    mask = 0;
+    for (i = 0; i < 4; i++) {
+        if (vals[4 + i] > 0.0f)
+            mask |= (0x1 << (i << 3));
+    }
+
+    ((int *)d)[0] = mask;
+}
+
+static void evalfunc_0(NNEDIContext *s, FrameData *frame_data)
+{
+    float *input = frame_data->input;
+    const float *weights0 = s->weights0;
+    float *temp = frame_data->temp;
+    uint8_t *tempu = (uint8_t *)temp;
+    int plane, x, y;
+
+    // And now the actual work.
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        const uint8_t *srcp = (const uint8_t *)frame_data->paddedp[plane];
+        const int src_stride = frame_data->padded_stride[plane] / sizeof(uint8_t);
+
+        const int width = frame_data->padded_width[plane];
+        const int height = frame_data->padded_height[plane];
+
+        uint8_t *dstp = (uint8_t *)frame_data->dstp[plane];
+        const int dst_stride = frame_data->dst_stride[plane] / sizeof(uint8_t);
+        const uint8_t *src3p;
+        int ystart, ystop;
+        int32_t *lcount;
+
+        if (!(s->process_plane & (1 << plane)))
+            continue;
+
+        for (y = 1 - frame_data->field[plane]; y < height - 12; y += 2) {
+            memcpy(dstp + y * dst_stride,
+                   srcp + 32 + (6 + y) * src_stride,
+                   (width - 64) * sizeof(uint8_t));
+
+        }
+
+        ystart = 6 + frame_data->field[plane];
+        ystop = height - 6;
+        srcp += ystart * src_stride;
+        dstp += (ystart - 6) * dst_stride - 32;
+        src3p = srcp - src_stride * 3;
+        lcount = frame_data->lcount[plane] - 6;
+
+        if (s->pscrn == 1) { // original
+            for (y = ystart; y < ystop; y += 2) {
+                for (x = 32; x < width - 32; x++) {
+                    s->readpixels((const uint8_t *)(src3p + x - 5), src_stride, input);
+                    s->compute_network0(s, input, weights0, tempu+x);
+                }
+                lcount[y] += s->process_line0(tempu + 32, width - 64, (uint8_t *)(dstp + 32), (const uint8_t *)(src3p + 32), src_stride, s->max_value, plane);
+                src3p += src_stride * 2;
+                dstp += dst_stride * 2;
+            }
+        } else if (s->pscrn > 1) { // new
+            for (y = ystart; y < ystop; y += 2) {
+                for (x = 32; x < width - 32; x += 4) {
+                    s->readpixels((const uint8_t *)(src3p + x - 6), src_stride, input);
+                    s->compute_network0(s, input, weights0, tempu + x);
+                }
+                lcount[y] += s->process_line0(tempu + 32, width - 64, (uint8_t *)(dstp + 32), (const uint8_t *)(src3p + 32), src_stride, s->max_value, plane);
+                src3p += src_stride * 2;
+                dstp += dst_stride * 2;
+            }
+        } else { // no prescreening
+            for (y = ystart; y < ystop; y += 2) {
+                memset(dstp + 32, 255, (width - 64) * sizeof(uint8_t));
+                lcount[y] += width - 64;
+                dstp += dst_stride * 2;
+            }
+        }
+    }
+}
+
+static void extract_m8(const uint8_t *srcp8, const int stride, const int xdia, const int ydia, float *mstd, float *input)
+{
+    // uint8_t or uint16_t or float
+    const uint8_t *srcp = (const uint8_t *)srcp8;
+    float scale;
+    double tmp;
+
+    // int32_t or int64_t or double
+    int64_t sum = 0, sumsq = 0;
+    int y, x;
+
+    for (y = 0; y < ydia; y++) {
+        const uint8_t *srcpT = srcp + y * stride * 2;
+
+        for (x = 0; x < xdia; x++) {
+            sum += srcpT[x];
+            sumsq += (uint32_t)srcpT[x] * (uint32_t)srcpT[x];
+            input[x] = srcpT[x];
+        }
+        input += xdia;
+    }
+    scale = 1.0f / (xdia * ydia);
+    mstd[0] = sum * scale;
+    tmp = (double)sumsq * scale - (double)mstd[0] * mstd[0];
+    mstd[3] = 0.0f;
+    if (tmp <= FLT_EPSILON)
+        mstd[1] = mstd[2] = 0.0f;
+    else {
+        mstd[1] = sqrt(tmp);
+        mstd[2] = 1.0f / mstd[1];
+    }
+}
+
+static void extract_m8_i16(const uint8_t *srcp, const int stride, const int xdia, const int ydia, float *mstd, float *inputf)
+{
+    int16_t *input = (int16_t *)inputf;
+    float scale;
+    int sum = 0, sumsq = 0;
+    int y, x;
+
+    for (y = 0; y < ydia; y++) {
+        const uint8_t *srcpT = srcp + y * stride * 2;
+        for (x = 0; x < xdia; x++) {
+            sum += srcpT[x];
+            sumsq += srcpT[x] * srcpT[x];
+            input[x] = srcpT[x];
+        }
+        input += xdia;
+    }
+    scale = 1.0f / (float)(xdia * ydia);
+    mstd[0] = sum * scale;
+    mstd[1] = sumsq * scale - mstd[0] * mstd[0];
+    mstd[3] = 0.0f;
+    if (mstd[1] <= FLT_EPSILON)
+        mstd[1] = mstd[2] = 0.0f;
+    else {
+        mstd[1] = sqrt(mstd[1]);
+        mstd[2] = 1.0f / mstd[1];
+    }
+}
+
+
+static const float exp_lo = -80.0f;
+static const float exp_hi = +80.0f;
+
+static void e2_m16(float *s, const int n)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+        s[i] = exp(av_clipf(s[i], exp_lo, exp_hi));
+}
+
+const float min_weight_sum = 1e-10f;
+
+static void weighted_avg_elliott_mul5_m16(const float *w, const int n, float *mstd)
+{
+    float vsum = 0.0f, wsum = 0.0f;
+    int i;
+
+    for (i = 0; i < n; i++) {
+        vsum += w[i] * (w[n + i] / (1.0f + FFABS(w[n + i])));
+        wsum += w[i];
+    }
+    if (wsum > min_weight_sum)
+        mstd[3] += ((5.0f * vsum) / wsum) * mstd[1] + mstd[0];
+    else
+        mstd[3] += mstd[0];
+}
+
+
+static void evalfunc_1(NNEDIContext *s, FrameData *frame_data)
+{
+    float *input = frame_data->input;
+    float *temp = frame_data->temp;
+    float **weights1 = s->weights1;
+    const int qual = s->qual;
+    const int asize = s->asize;
+    const int nns = s->nns;
+    const int xdia = s->xdia;
+    const int xdiad2m1 = (xdia / 2) - 1;
+    const int ydia = s->ydia;
+    const float scale = 1.0f / (float)qual;
+    int plane, y, x, i;
+
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        const uint8_t *srcp = (const uint8_t *)frame_data->paddedp[plane];
+        const int src_stride = frame_data->padded_stride[plane] / sizeof(uint8_t);
+
+        const int width = frame_data->padded_width[plane];
+        const int height = frame_data->padded_height[plane];
+
+        uint8_t *dstp = (uint8_t *)frame_data->dstp[plane];
+        const int dst_stride = frame_data->dst_stride[plane] / sizeof(uint8_t);
+
+        const int ystart = frame_data->field[plane];
+        const int ystop = height - 12;
+        uint8_t *srcpp;
+
+        if (!(s->process_plane & (1 << plane)))
+            continue;
+
+        srcp += (ystart + 6) * src_stride;
+        dstp += ystart * dst_stride - 32;
+        srcpp = srcp - (ydia - 1) * src_stride - xdiad2m1;
+
+        for (y = ystart; y < ystop; y += 2) {
+            for (x = 32; x < width - 32; x++) {
+                float mstd[4];
+
+                if (dstp[x] != 255)
+                    continue;
+
+                s->extract((const uint8_t *)(srcpp + x), src_stride, xdia, ydia, mstd, input);
+                for (i = 0; i < qual; i++) {
+                    s->dot_prod(s, input, weights1[i], temp, nns * 2, asize, mstd + 2);
+                    s->expfunc(temp, nns);
+                    s->wae5(temp, nns, mstd);
+                }
+
+                dstp[x] = FFMIN(FFMAX((int)(mstd[3] * scale + 0.5f), 0), s->max_value);
+            }
+            srcpp += src_stride * 2;
+            dstp += dst_stride * 2;
+        }
+    }
+}
+
+#define NUM_NSIZE 7
+#define NUM_NNS 5
+
+static int roundds(const double f)
+{
+    if (f - floor(f) >= 0.5)
+        return FFMIN((int)ceil(f), 32767);
+    return FFMAX((int)floor(f), -32768);
+}
+
+static void select_functions(NNEDIContext *s)
+{
+    s->copy_pad = copy_pad;
+    s->evalfunc_0 = evalfunc_0;
+    s->evalfunc_1 = evalfunc_1;
+
+    // evalfunc_0
+    s->process_line0 = process_line0;
+
+    if (s->pscrn < 2) { // original prescreener
+        if (s->fapprox & 1) { // int16 dot products
+            s->readpixels = byte2word48;
+            s->compute_network0 = compute_network0_i16;
+        } else {
+            s->readpixels = pixel2float48;
+            s->compute_network0 = compute_network0;
+        }
+    } else { // new prescreener
+        // only int16 dot products
+        s->readpixels = byte2word64;
+        s->compute_network0 = compute_network0new;
+    }
+
+    // evalfunc_1
+    s->wae5 = weighted_avg_elliott_mul5_m16;
+
+    if (s->fapprox & 2) { // use int16 dot products
+        s->extract = extract_m8_i16;
+        s->dot_prod = dot_prods;
+    } else { // use float dot products
+        s->extract = extract_m8;
+        s->dot_prod = dot_prod;
+    }
+
+    s->expfunc = e2_m16;
+}
+
+static int modnpf(const int m, const int n)
+{
+    if ((m % n) == 0)
+        return m;
+    return m + n - (m % n);
+}
+
+static int get_frame(AVFilterContext *ctx, int is_second)
+{
+    NNEDIContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *src = s->src;
+    FrameData *frame_data;
+    int effective_field = s->field;
+    size_t temp_size;
+    int field_n;
+    int plane;
+
+    if (effective_field > 1)
+        effective_field -= 2;
+    else if (effective_field < 0)
+        effective_field += 2;
+
+    if (s->field < 0 && src->interlaced_frame && src->top_field_first == 0)
+        effective_field = 0;
+    else if (s->field < 0 && src->interlaced_frame && src->top_field_first == 1)
+        effective_field = 1;
+    else
+        effective_field = !effective_field;
+
+    if (s->field > 1 || s->field == -2) {
+        if (is_second) {
+            field_n = (effective_field == 0);
+        } else {
+            field_n = (effective_field == 1);
+        }
+    } else {
+        field_n = effective_field;
+    }
+
+    s->dst = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!s->dst)
+        return AVERROR(ENOMEM);
+    av_frame_copy_props(s->dst, src);
+    s->dst->interlaced_frame = 0;
+
+    frame_data = &s->frame_data;
+
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        int dst_height = s->planeheight[plane];
+        int dst_width = s->linesize[plane];
+
+        const int min_alignment = 16;
+        const int min_pad = 10;
+
+        if (!(s->process_plane & (1 << plane))) {
+            av_image_copy_plane(s->dst->data[plane], s->dst->linesize[plane],
+                                src->data[plane], src->linesize[plane],
+                                s->linesize[plane],
+                                s->planeheight[plane]);
+            continue;
+        }
+
+        frame_data->padded_width[plane]  = dst_width + 64;
+        frame_data->padded_height[plane] = dst_height + 12;
+        frame_data->padded_stride[plane] = modnpf(frame_data->padded_width[plane] + min_pad, min_alignment); // TODO: maybe min_pad is in pixels too?
+        if (!frame_data->paddedp[plane]) {
+            frame_data->paddedp[plane] = av_malloc_array(frame_data->padded_stride[plane], frame_data->padded_height[plane]);
+            if (!frame_data->paddedp[plane])
+                return AVERROR(ENOMEM);
+        }
+
+        frame_data->dstp[plane] = s->dst->data[plane];
+        frame_data->dst_stride[plane] = s->dst->linesize[plane];
+
+        if (!frame_data->lcount[plane]) {
+            frame_data->lcount[plane] = av_calloc(dst_height, sizeof(int32_t) * 16);
+            if (!frame_data->lcount[plane])
+                return AVERROR(ENOMEM);
+        } else {
+            memset(frame_data->lcount[plane], 0, dst_height * sizeof(int32_t) * 16);
+        }
+
+        frame_data->field[plane] = field_n;
+    }
+
+    if (!frame_data->input) {
+        frame_data->input = av_malloc(512 * sizeof(float));
+        if (!frame_data->input)
+            return AVERROR(ENOMEM);
+    }
+    // evalfunc_0 requires at least padded_width[0] bytes.
+    // evalfunc_1 requires at least 512 floats.
+    if (!frame_data->temp) {
+        temp_size = FFMAX(frame_data->padded_width[0], 512 * sizeof(float));
+        frame_data->temp = av_malloc(temp_size);
+        if (!frame_data->temp)
+            return AVERROR(ENOMEM);
+    }
+
+    // Copy src to a padded "frame" in frame_data and mirror the edges.
+    s->copy_pad(src, frame_data, s, field_n);
+
+    // Handles prescreening and the cubic interpolation.
+    s->evalfunc_0(s, frame_data);
+
+    // The rest.
+    s->evalfunc_1(s, frame_data);
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *src)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    NNEDIContext *s = ctx->priv;
+    int ret;
+
+    if ((s->field > 1 ||
+         s->field == -2) && !s->second) {
+        goto second;
+    } else if (s->field > 1 ||
+               s->field == -2) {
+        AVFrame *dst;
+
+        s->src = s->second;
+        ret = get_frame(ctx, 1);
+        if (ret < 0) {
+            av_frame_free(&s->dst);
+            av_frame_free(&s->src);
+            av_frame_free(&s->second);
+            return ret;
+        }
+        dst = s->dst;
+
+        if (src->pts != AV_NOPTS_VALUE &&
+            dst->pts != AV_NOPTS_VALUE)
+            dst->pts += src->pts;
+        else
+            dst->pts = AV_NOPTS_VALUE;
+
+        ret = ff_filter_frame(outlink, dst);
+        if (ret < 0)
+            return ret;
+        if (s->eof)
+            return 0;
+        s->cur_pts = s->second->pts;
+        av_frame_free(&s->second);
+second:
+        if ((s->deint && src->interlaced_frame &&
+             !ctx->is_disabled) ||
+            (!s->deint && !ctx->is_disabled)) {
+            s->second = src;
+        }
+    }
+
+    if ((s->deint && !src->interlaced_frame) || ctx->is_disabled) {
+        AVFrame *dst = av_frame_clone(src);
+        if (!dst) {
+            av_frame_free(&src);
+            av_frame_free(&s->second);
+            return AVERROR(ENOMEM);
+        }
+
+        if (s->field > 1 || s->field == -2) {
+            av_frame_free(&s->second);
+            if ((s->deint && src->interlaced_frame) ||
+                (!s->deint))
+                s->second = src;
+        } else {
+            av_frame_free(&src);
+        }
+        if (dst->pts != AV_NOPTS_VALUE)
+            dst->pts *= 2;
+        return ff_filter_frame(outlink, dst);
+    }
+
+    s->src = src;
+    ret = get_frame(ctx, 0);
+    if (ret < 0) {
+        av_frame_free(&s->dst);
+        av_frame_free(&s->src);
+        av_frame_free(&s->second);
+        return ret;
+    }
+
+    if (src->pts != AV_NOPTS_VALUE)
+        s->dst->pts = src->pts * 2;
+    if (s->field <= 1 && s->field > -2) {
+        av_frame_free(&src);
+        s->src = NULL;
+    }
+
+    return ff_filter_frame(outlink, s->dst);
+}
+
+static int request_frame(AVFilterLink *link)
+{
+    AVFilterContext *ctx = link->src;
+    NNEDIContext *s = ctx->priv;
+    int ret;
+
+    if (s->eof)
+        return AVERROR_EOF;
+
+    ret  = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && s->second) {
+        AVFrame *next = av_frame_clone(s->second);
+
+        if (!next)
+            return AVERROR(ENOMEM);
+
+        next->pts = s->second->pts * 2 - s->cur_pts;
+        s->eof = 1;
+
+        filter_frame(ctx->inputs[0], next);
+    } else if (ret < 0) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    NNEDIContext *s = ctx->priv;
+    FILE *weights_file = NULL;
+    int64_t expected_size = 13574928;
+    int64_t weights_size;
+    float *bdata;
+    size_t bytes_read;
+    const int xdia_table[NUM_NSIZE] = { 8, 16, 32, 48, 8, 16, 32 };
+    const int ydia_table[NUM_NSIZE] = { 6, 6, 6, 6, 4, 4, 4 };
+    const int nns_table[NUM_NNS] = { 16, 32, 64, 128, 256 };
+    const int dims0 = 49 * 4 + 5 * 4 + 9 * 4;
+    const int dims0new = 4 * 65 + 4 * 5;
+    const int dims1 = nns_table[s->nnsparam] * 2 * (xdia_table[s->nsize] * ydia_table[s->nsize] + 1);
+    int dims1tsize = 0;
+    int dims1offset = 0;
+    int ret = 0, i, j, k;
+
+    weights_file = fopen(s->weights_file, "rb");
+    if (!weights_file) {
+        av_log(ctx, AV_LOG_ERROR, "No weights file provided, aborting!\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (fseek(weights_file, 0, SEEK_END)) {
+        av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the end of weights file.\n");
+        fclose(weights_file);
+        return AVERROR(EINVAL);
+    }
+
+    weights_size = ftell(weights_file);
+
+    if (weights_size == -1) {
+        fclose(weights_file);
+        av_log(ctx, AV_LOG_ERROR, "Couldn't get size of weights file.\n");
+        return AVERROR(EINVAL);
+    } else if (weights_size != expected_size) {
+        fclose(weights_file);
+        av_log(ctx, AV_LOG_ERROR, "Unexpected weights file size.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (fseek(weights_file, 0, SEEK_SET)) {
+        fclose(weights_file);
+        av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the start of weights file.\n");
+        return AVERROR(EINVAL);
+    }
+
+    bdata = (float *)av_malloc(expected_size);
+    if (!bdata) {
+        fclose(weights_file);
+        return AVERROR(ENOMEM);
+    }
+
+    bytes_read = fread(bdata, 1, expected_size, weights_file);
+
+    if (bytes_read != (size_t)expected_size) {
+        fclose(weights_file);
+        ret = AVERROR_INVALIDDATA;
+        av_log(ctx, AV_LOG_ERROR, "Couldn't read weights file.\n");
+        goto fail;
+    }
+
+    fclose(weights_file);
+
+    for (j = 0; j < NUM_NNS; j++) {
+        for (i = 0; i < NUM_NSIZE; i++) {
+            if (i == s->nsize && j == s->nnsparam)
+                dims1offset = dims1tsize;
+            dims1tsize += nns_table[j] * 2 * (xdia_table[i] * ydia_table[i] + 1) * 2;
+        }
+    }
+
+    s->weights0 = av_malloc_array(FFMAX(dims0, dims0new), sizeof(float));
+    if (!s->weights0) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (i = 0; i < 2; i++) {
+        s->weights1[i] = av_malloc_array(dims1, sizeof(float));
+        if (!s->weights1[i]) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+    }
+
+    // Adjust prescreener weights
+    if (s->pscrn >= 2) {// using new prescreener
+        const float *bdw;
+        int16_t *ws;
+        float *wf;
+        double mean[4] = { 0.0, 0.0, 0.0, 0.0 };
+        int *offt = av_calloc(4 * 64, sizeof(int));
+
+        if (!offt) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        for (j = 0; j < 4; j++)
+            for (k = 0; k < 64; k++)
+                offt[j * 64 + k] = ((k >> 3) << 5) + ((j & 3) << 3) + (k & 7);
+
+        bdw = bdata + dims0 + dims0new * (s->pscrn - 2);
+        ws = (int16_t *)s->weights0;
+        wf = (float *)&ws[4 * 64];
+        // Calculate mean weight of each first layer neuron
+        for (j = 0; j < 4; j++) {
+            double cmean = 0.0;
+            for (k = 0; k < 64; k++)
+                cmean += bdw[offt[j * 64 + k]];
+            mean[j] = cmean / 64.0;
+        }
+        // Factor mean removal and 1.0/127.5 scaling
+        // into first layer weights. scale to int16 range
+        for (j = 0; j < 4; j++) {
+            double scale, mval = 0.0;
+
+            for (k = 0; k < 64; k++)
+                mval = FFMAX(mval, FFABS((bdw[offt[j * 64 + k]] - mean[j]) / 127.5));
+            scale = 32767.0 / mval;
+            for (k = 0; k < 64; k++)
+                ws[offt[j * 64 + k]] = roundds(((bdw[offt[j * 64 + k]] - mean[j]) / 127.5) * scale);
+            wf[j] = (float)(mval / 32767.0);
+        }
+        memcpy(wf + 4, bdw + 4 * 64, (dims0new - 4 * 64) * sizeof(float));
+        av_free(offt);
+    } else { // using old prescreener
+        double mean[4] = { 0.0, 0.0, 0.0, 0.0 };
+        // Calculate mean weight of each first layer neuron
+        for (j = 0; j < 4; j++) {
+            double cmean = 0.0;
+            for (k = 0; k < 48; k++)
+                cmean += bdata[j * 48 + k];
+            mean[j] = cmean / 48.0;
+        }
+        if (s->fapprox & 1) {// use int16 dot products in first layer
+            int16_t *ws = (int16_t *)s->weights0;
+            float *wf = (float *)&ws[4 * 48];
+            // Factor mean removal and 1.0/127.5 scaling
+            // into first layer weights. scale to int16 range
+            for (j = 0; j < 4; j++) {
+                double scale, mval = 0.0;
+                for (k = 0; k < 48; k++)
+                    mval = FFMAX(mval, FFABS((bdata[j * 48 + k] - mean[j]) / 127.5));
+                scale = 32767.0 / mval;
+                for (k = 0; k < 48; k++)
+                    ws[j * 48 + k] = roundds(((bdata[j * 48 + k] - mean[j]) / 127.5) * scale);
+                wf[j] = (float)(mval / 32767.0);
+            }
+            memcpy(wf + 4, bdata + 4 * 48, (dims0 - 4 * 48) * sizeof(float));
+        } else {// use float dot products in first layer
+            double half = (1 << 8) - 1;
+
+            half /= 2;
+
+            // Factor mean removal and 1.0/half scaling
+            // into first layer weights.
+            for (j = 0; j < 4; j++)
+                for (k = 0; k < 48; k++)
+                    s->weights0[j * 48 + k] = (float)((bdata[j * 48 + k] - mean[j]) / half);
+            memcpy(s->weights0 + 4 * 48, bdata + 4 * 48, (dims0 - 4 * 48) * sizeof(float));
+        }
+    }
+
+    // Adjust prediction weights
+    for (i = 0; i < 2; i++) {
+        const float *bdataT = bdata + dims0 + dims0new * 3 + dims1tsize * s->etype + dims1offset + i * dims1;
+        const int nnst = nns_table[s->nnsparam];
+        const int asize = xdia_table[s->nsize] * ydia_table[s->nsize];
+        const int boff = nnst * 2 * asize;
+        double *mean = (double *)av_calloc(asize + 1 + nnst * 2, sizeof(double));
+
+        if (!mean) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        // Calculate mean weight of each neuron (ignore bias)
+        for (j = 0; j < nnst * 2; j++) {
+            double cmean = 0.0;
+            for (k = 0; k < asize; k++)
+                cmean += bdataT[j * asize + k];
+            mean[asize + 1 + j] = cmean / (double)asize;
+        }
+        // Calculate mean softmax neuron
+        for (j = 0; j < nnst; j++) {
+            for (k = 0; k < asize; k++)
+                mean[k] += bdataT[j * asize + k] - mean[asize + 1 + j];
+            mean[asize] += bdataT[boff + j];
+        }
+        for (j = 0; j < asize + 1; j++)
+            mean[j] /= (double)(nnst);
+
+        if (s->fapprox & 2) { // use int16 dot products
+            int16_t *ws = (int16_t *)s->weights1[i];
+            float *wf = (float *)&ws[nnst * 2 * asize];
+            // Factor mean removal into weights, remove global offset from
+            // softmax neurons, and scale weights to int16 range.
+            for (j = 0; j < nnst; j++) { // softmax neurons
+                double scale, mval = 0.0;
+                for (k = 0; k < asize; k++)
+                    mval = FFMAX(mval, FFABS(bdataT[j * asize + k] - mean[asize + 1 + j] - mean[k]));
+                scale = 32767.0 / mval;
+                for (k = 0; k < asize; k++)
+                    ws[j * asize + k] = roundds((bdataT[j * asize + k] - mean[asize + 1 + j] - mean[k]) * scale);
+                wf[(j >> 2) * 8 + (j & 3)] = (float)(mval / 32767.0);
+                wf[(j >> 2) * 8 + (j & 3) + 4] = (float)(bdataT[boff + j] - mean[asize]);
+            }
+            for (j = nnst; j < nnst * 2; j++) { // elliott neurons
+                double scale, mval = 0.0;
+                for (k = 0; k < asize; k++)
+                    mval = FFMAX(mval, FFABS(bdataT[j * asize + k] - mean[asize + 1 + j]));
+                scale = 32767.0 / mval;
+                for (k = 0; k < asize; k++)
+                    ws[j * asize + k] = roundds((bdataT[j * asize + k] - mean[asize + 1 + j]) * scale);
+                wf[(j >> 2) * 8 + (j & 3)] = (float)(mval / 32767.0);
+                wf[(j >> 2) * 8 + (j & 3) + 4] = bdataT[boff + j];
+            }
+        } else { // use float dot products
+            // Factor mean removal into weights, and remove global
+            // offset from softmax neurons.
+            for (j = 0; j < nnst * 2; j++) {
+                for (k = 0; k < asize; k++) {
+                    const double q = j < nnst ? mean[k] : 0.0;
+                    s->weights1[i][j * asize + k] = (float)(bdataT[j * asize + k] - mean[asize + 1 + j] - q);
+                }
+                s->weights1[i][boff + j] = (float)(bdataT[boff + j] - (j < nnst ? mean[asize] : 0.0));
+            }
+        }
+        av_free(mean);
+    }
+
+    s->nns = nns_table[s->nnsparam];
+    s->xdia = xdia_table[s->nsize];
+    s->ydia = ydia_table[s->nsize];
+    s->asize = xdia_table[s->nsize] * ydia_table[s->nsize];
+
+    s->max_value = 65535 >> 8;
+
+    select_functions(s);
+
+    s->fdsp = avpriv_float_dsp_alloc(0);
+    if (!s->fdsp)
+        ret = AVERROR(ENOMEM);
+
+fail:
+    av_free(bdata);
+    return ret;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NNEDIContext *s = ctx->priv;
+    int i;
+
+    av_freep(&s->weights0);
+
+    for (i = 0; i < 2; i++)
+        av_freep(&s->weights1[i]);
+
+    for (i = 0; i < s->nb_planes; i++) {
+        av_freep(&s->frame_data.paddedp[i]);
+        av_freep(&s->frame_data.lcount[i]);
+    }
+
+    av_freep(&s->frame_data.input);
+    av_freep(&s->frame_data.temp);
+    av_frame_free(&s->second);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .filter_frame  = filter_frame,
+        .config_props  = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_nnedi = {
+    .name          = "nnedi",
+    .description   = NULL_IF_CONFIG_SMALL("Apply neural network edge directed interpolation intra-only deinterlacer."),
+    .priv_size     = sizeof(NNEDIContext),
+    .priv_class    = &nnedi_class,
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .inputs        = inputs,
+    .outputs       = outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+};
diff --git a/libavfilter/vf_noise.c b/libavfilter/vf_noise.c
index 861ac090..fa3863ed 100644
--- a/libavfilter/vf_noise.c
+++ b/libavfilter/vf_noise.c
@@ -133,12 +133,13 @@ static av_cold int init_noise(NoiseContext *n, int comp)
 static int query_formats(AVFilterContext *ctx)
 {
     AVFilterFormats *formats = NULL;
-    int fmt;
+    int fmt, ret;
 
     for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
-        if (desc->flags & AV_PIX_FMT_FLAG_PLANAR && !((desc->comp[0].depth_minus1 + 1) & 7))
-            ff_add_format(&formats, fmt);
+        if (desc->flags & AV_PIX_FMT_FLAG_PLANAR && !(desc->comp[0].depth & 7)
+            && (ret = ff_add_format(&formats, fmt)) < 0)
+                return ret;
     }
 
     return ff_set_common_formats(ctx, formats);
@@ -155,7 +156,7 @@ static int config_input(AVFilterLink *inlink)
     if ((ret = av_image_fill_linesizes(n->bytewidth, inlink->format, inlink->w)) < 0)
         return ret;
 
-    n->height[1] = n->height[2] = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    n->height[1] = n->height[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
     n->height[0] = n->height[3] = inlink->h;
 
     return 0;
diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c
new file mode 100644
index 00000000..870dd688
--- /dev/null
+++ b/libavfilter/vf_ocr.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <tesseract/capi.h>
+
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct OCRContext {
+    const AVClass *class;
+
+    char *datapath;
+    char *language;
+    char *whitelist;
+    char *blacklist;
+
+    TessBaseAPI *tess;
+} OCRContext;
+
+#define OFFSET(x) offsetof(OCRContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption ocr_options[] = {
+    { "datapath",  "set datapath",            OFFSET(datapath),  AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "language",  "set language",            OFFSET(language),  AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS },
+    { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~"}, 0, 0, FLAGS },
+    { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},    0, 0, FLAGS },
+    { NULL }
+};
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    OCRContext *s = ctx->priv;
+
+    s->tess = TessBaseAPICreate();
+    if (TessBaseAPIInit3(s->tess, s->datapath, s->language) == -1) {
+        av_log(ctx, AV_LOG_ERROR, "failed to init tesseract\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!TessBaseAPISetVariable(s->tess, "tessedit_char_whitelist", s->whitelist)) {
+        av_log(ctx, AV_LOG_ERROR, "failed to set whitelist\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!TessBaseAPISetVariable(s->tess, "tessedit_char_blacklist", s->blacklist)) {
+        av_log(ctx, AV_LOG_ERROR, "failed to set blacklist\n");
+        return AVERROR(EINVAL);
+    }
+
+    av_log(ctx, AV_LOG_DEBUG, "Tesseract version: %s\n", TessVersion());
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA420P,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    ff_set_common_formats(ctx, fmts_list);
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVDictionary **metadata = avpriv_frame_get_metadatap(in);
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    OCRContext *s = ctx->priv;
+    char *result;
+
+    result = TessBaseAPIRect(s->tess, in->data[0], 1,
+                             in->linesize[0], 0, 0, in->width, in->height);
+    av_dict_set(metadata, "lavfi.ocr.text", result, 0);
+    TessDeleteText(result);
+
+    return ff_filter_frame(outlink, in);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    OCRContext *s = ctx->priv;
+
+    TessBaseAPIEnd(s->tess);
+    TessBaseAPIDelete(s->tess);
+}
+
+AVFILTER_DEFINE_CLASS(ocr);
+
+static const AVFilterPad ocr_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad ocr_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_ocr = {
+    .name          = "ocr",
+    .description   = NULL_IF_CONFIG_SMALL("Optical Character Recognition."),
+    .priv_size     = sizeof(OCRContext),
+    .priv_class    = &ocr_class,
+    .query_formats = query_formats,
+    .init          = init,
+    .uninit        = uninit,
+    .inputs        = ocr_inputs,
+    .outputs       = ocr_outputs,
+};
diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index 788c1d94..37f19ea0 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -245,35 +245,57 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_NONE
     };
 
-    AVFilterFormats *main_formats;
-    AVFilterFormats *overlay_formats;
+    AVFilterFormats *main_formats = NULL;
+    AVFilterFormats *overlay_formats = NULL;
+    int ret;
 
     switch (s->format) {
     case OVERLAY_FORMAT_YUV420:
-        main_formats    = ff_make_format_list(main_pix_fmts_yuv420);
-        overlay_formats = ff_make_format_list(overlay_pix_fmts_yuv420);
+        if (!(main_formats    = ff_make_format_list(main_pix_fmts_yuv420)) ||
+            !(overlay_formats = ff_make_format_list(overlay_pix_fmts_yuv420))) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
         break;
     case OVERLAY_FORMAT_YUV422:
-        main_formats    = ff_make_format_list(main_pix_fmts_yuv422);
-        overlay_formats = ff_make_format_list(overlay_pix_fmts_yuv422);
+        if (!(main_formats    = ff_make_format_list(main_pix_fmts_yuv422)) ||
+            !(overlay_formats = ff_make_format_list(overlay_pix_fmts_yuv422))) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
         break;
     case OVERLAY_FORMAT_YUV444:
-        main_formats    = ff_make_format_list(main_pix_fmts_yuv444);
-        overlay_formats = ff_make_format_list(overlay_pix_fmts_yuv444);
+        if (!(main_formats    = ff_make_format_list(main_pix_fmts_yuv444)) ||
+            !(overlay_formats = ff_make_format_list(overlay_pix_fmts_yuv444))) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
         break;
     case OVERLAY_FORMAT_RGB:
-        main_formats    = ff_make_format_list(main_pix_fmts_rgb);
-        overlay_formats = ff_make_format_list(overlay_pix_fmts_rgb);
+        if (!(main_formats    = ff_make_format_list(main_pix_fmts_rgb)) ||
+            !(overlay_formats = ff_make_format_list(overlay_pix_fmts_rgb))) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
         break;
     default:
         av_assert0(0);
     }
 
-    ff_formats_ref(main_formats,    &ctx->inputs [MAIN   ]->out_formats);
-    ff_formats_ref(overlay_formats, &ctx->inputs [OVERLAY]->out_formats);
-    ff_formats_ref(main_formats,    &ctx->outputs[MAIN   ]->in_formats );
+    if ((ret = ff_formats_ref(main_formats   , &ctx->inputs[MAIN]->out_formats   )) < 0 ||
+        (ret = ff_formats_ref(overlay_formats, &ctx->inputs[OVERLAY]->out_formats)) < 0 ||
+        (ret = ff_formats_ref(main_formats   , &ctx->outputs[MAIN]->in_formats   )) < 0)
+            goto fail;
 
     return 0;
+fail:
+    if (main_formats)
+        av_freep(&main_formats->formats);
+    av_freep(&main_formats);
+    if (overlay_formats)
+        av_freep(&overlay_formats->formats);
+    av_freep(&overlay_formats);
+    return ret;
 }
 
 static const enum AVPixelFormat alpha_pix_fmts[] = {
@@ -498,10 +520,10 @@ static void blend_image(AVFilterContext *ctx,
         for (i = 0; i < 3; i++) {
             int hsub = i ? s->hsub : 0;
             int vsub = i ? s->vsub : 0;
-            int src_wp = FF_CEIL_RSHIFT(src_w, hsub);
-            int src_hp = FF_CEIL_RSHIFT(src_h, vsub);
-            int dst_wp = FF_CEIL_RSHIFT(dst_w, hsub);
-            int dst_hp = FF_CEIL_RSHIFT(dst_h, vsub);
+            int src_wp = AV_CEIL_RSHIFT(src_w, hsub);
+            int src_hp = AV_CEIL_RSHIFT(src_h, vsub);
+            int dst_wp = AV_CEIL_RSHIFT(dst_w, hsub);
+            int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub);
             int yp = y>>vsub;
             int xp = x>>hsub;
             uint8_t *s, *sp, *d, *dp, *a, *ap;
@@ -577,6 +599,11 @@ static AVFrame *do_blend(AVFilterContext *ctx, AVFrame *mainpic,
             NAN : mainpic->pts * av_q2d(inlink->time_base);
         s->var_values[VAR_POS] = pos == -1 ? NAN : pos;
 
+        s->var_values[VAR_OVERLAY_W] = s->var_values[VAR_OW] = second->width;
+        s->var_values[VAR_OVERLAY_H] = s->var_values[VAR_OH] = second->height;
+        s->var_values[VAR_MAIN_W   ] = s->var_values[VAR_MW] = mainpic->width;
+        s->var_values[VAR_MAIN_H   ] = s->var_values[VAR_MH] = mainpic->height;
+
         eval_expr(ctx);
         av_log(ctx, AV_LOG_DEBUG, "n:%f t:%f pos:%f x:%f xi:%d y:%f yi:%d\n",
                s->var_values[VAR_N], s->var_values[VAR_T], s->var_values[VAR_POS],
@@ -638,14 +665,14 @@ static const AVOption overlay_options[] = {
     { "eval", "specify when to evaluate expressions", OFFSET(eval_mode), AV_OPT_TYPE_INT, {.i64 = EVAL_MODE_FRAME}, 0, EVAL_MODE_NB-1, FLAGS, "eval" },
          { "init",  "eval expressions once during initialization", 0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_INIT},  .flags = FLAGS, .unit = "eval" },
          { "frame", "eval expressions per-frame",                  0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_FRAME}, .flags = FLAGS, .unit = "eval" },
-    { "rgb", "force packed RGB in input and output (deprecated)", OFFSET(allow_packed_rgb), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
-    { "shortest", "force termination when the shortest input terminates", OFFSET(dinput.shortest), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS },
+    { "rgb", "force packed RGB in input and output (deprecated)", OFFSET(allow_packed_rgb), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    { "shortest", "force termination when the shortest input terminates", OFFSET(dinput.shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
     { "format", "set output format", OFFSET(format), AV_OPT_TYPE_INT, {.i64=OVERLAY_FORMAT_YUV420}, 0, OVERLAY_FORMAT_NB-1, FLAGS, "format" },
         { "yuv420", "", 0, AV_OPT_TYPE_CONST, {.i64=OVERLAY_FORMAT_YUV420}, .flags = FLAGS, .unit = "format" },
         { "yuv422", "", 0, AV_OPT_TYPE_CONST, {.i64=OVERLAY_FORMAT_YUV422}, .flags = FLAGS, .unit = "format" },
         { "yuv444", "", 0, AV_OPT_TYPE_CONST, {.i64=OVERLAY_FORMAT_YUV444}, .flags = FLAGS, .unit = "format" },
         { "rgb",    "", 0, AV_OPT_TYPE_CONST, {.i64=OVERLAY_FORMAT_RGB},    .flags = FLAGS, .unit = "format" },
-    { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(dinput.repeatlast), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS },
+    { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(dinput.repeatlast), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
     { NULL }
 };
 
diff --git a/libavfilter/vf_owdenoise.c b/libavfilter/vf_owdenoise.c
index d4e90f86..3a77f896 100644
--- a/libavfilter/vf_owdenoise.c
+++ b/libavfilter/vf_owdenoise.c
@@ -227,8 +227,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     OWDenoiseContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out;
-    const int cw = FF_CEIL_RSHIFT(inlink->w, s->hsub);
-    const int ch = FF_CEIL_RSHIFT(inlink->h, s->vsub);
+    const int cw = AV_CEIL_RSHIFT(inlink->w, s->hsub);
+    const int ch = AV_CEIL_RSHIFT(inlink->h, s->vsub);
 
     if (av_frame_is_writable(in)) {
         direct = 1;
diff --git a/libavfilter/vf_pad.c b/libavfilter/vf_pad.c
index 63dc6a8b..35f955d3 100644
--- a/libavfilter/vf_pad.c
+++ b/libavfilter/vf_pad.c
@@ -123,12 +123,17 @@ static int config_input(AVFilterLink *inlink)
                                       NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
         goto eval_fail;
     s->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = res;
+    if (!s->h)
+        var_values[VAR_OUT_H] = var_values[VAR_OH] = s->h = inlink->h;
+
     /* evaluate the width again, as it may depend on the evaluated output height */
     if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),
                                       var_names, var_values,
                                       NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
         goto eval_fail;
     s->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res;
+    if (!s->w)
+        var_values[VAR_OUT_W] = var_values[VAR_OW] = s->w = inlink->w;
 
     /* evaluate x and y */
     av_expr_parse_and_eval(&res, (expr = s->x_expr),
@@ -153,11 +158,6 @@ static int config_input(AVFilterLink *inlink)
         return AVERROR(EINVAL);
     }
 
-    if (!s->w)
-        s->w = inlink->w;
-    if (!s->h)
-        s->h = inlink->h;
-
     s->w    = ff_draw_round_to_sub(&s->draw, 0, -1, s->w);
     s->h    = ff_draw_round_to_sub(&s->draw, 1, -1, s->h);
     s->x    = ff_draw_round_to_sub(&s->draw, 0, -1, s->x);
@@ -203,7 +203,7 @@ static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h)
 
     AVFrame *frame = ff_get_video_buffer(inlink->dst->outputs[0],
                                          w + (s->w - s->in_w),
-                                         h + (s->h - s->in_h));
+                                         h + (s->h - s->in_h) + (s->x > 0));
     int plane;
 
     if (!frame)
diff --git a/libavfilter/vf_palettegen.c b/libavfilter/vf_palettegen.c
index 4b49058b..fccc5ca3 100644
--- a/libavfilter/vf_palettegen.c
+++ b/libavfilter/vf_palettegen.c
@@ -24,6 +24,7 @@
  */
 
 #include "libavutil/avassert.h"
+#include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/qsort.h"
 #include "avfilter.h"
@@ -78,7 +79,7 @@ typedef struct {
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption palettegen_options[] = {
     { "max_colors", "set the maximum number of colors to use in the palette", OFFSET(max_colors), AV_OPT_TYPE_INT, {.i64=256}, 4, 256, FLAGS },
-    { "reserve_transparent", "reserve a palette entry for transparency", OFFSET(reserve_transparent), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS },
+    { "reserve_transparent", "reserve a palette entry for transparency", OFFSET(reserve_transparent), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
     { "stats_mode", "set statistics mode", OFFSET(stats_mode), AV_OPT_TYPE_INT, {.i64=STATS_MODE_ALL_FRAMES}, 0, NB_STATS_MODE, FLAGS, "mode" },
         { "full", "compute full frame histograms", 0, AV_OPT_TYPE_CONST, {.i64=STATS_MODE_ALL_FRAMES}, INT_MIN, INT_MAX, FLAGS, "mode" },
         { "diff", "compute histograms only for the part that differs from previous frame", 0, AV_OPT_TYPE_CONST, {.i64=STATS_MODE_DIFF_FRAMES}, INT_MIN, INT_MAX, FLAGS, "mode" },
@@ -91,6 +92,7 @@ static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat in_fmts[]  = {AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE};
     static const enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE};
+    int ret;
     AVFilterFormats *in  = ff_make_format_list(in_fmts);
     AVFilterFormats *out = ff_make_format_list(out_fmts);
     if (!in || !out) {
@@ -98,8 +100,9 @@ static int query_formats(AVFilterContext *ctx)
         av_freep(&out);
         return AVERROR(ENOMEM);
     }
-    ff_formats_ref(in,  &ctx->inputs[0]->out_formats);
-    ff_formats_ref(out, &ctx->outputs[0]->in_formats);
+    if ((ret = ff_formats_ref(in , &ctx->inputs[0]->out_formats)) < 0 ||
+        (ret = ff_formats_ref(out, &ctx->outputs[0]->in_formats)) < 0)
+        return ret;
     return 0;
 }
 
@@ -127,7 +130,7 @@ static int cmp_color(const void *a, const void *b)
 {
     const struct range_box *box1 = a;
     const struct range_box *box2 = b;
-    return box1->color - box2->color;
+    return FFDIFFSIGN(box1->color , box2->color);
 }
 
 static av_always_inline int diff(const uint32_t a, const uint32_t b)
@@ -347,7 +350,7 @@ static AVFrame *get_palette_frame(AVFilterContext *ctx)
         if (rr >= gr && rr >= br) longest = 0;
         if (gr >= rr && gr >= br) longest = 1; // prefer green again
 
-        av_dlog(ctx, "box #%02X [%6d..%-6d] (%6d) w:%-6"PRIu64" ranges:[%2x %2x %2x] sort by %c (already sorted:%c) ",
+        ff_dlog(ctx, "box #%02X [%6d..%-6d] (%6d) w:%-6"PRIu64" ranges:[%2x %2x %2x] sort by %c (already sorted:%c) ",
                 box_id, box->start, box->start + box->len - 1, box->len, box_weight,
                 rr, gr, br, "rgb"[longest], box->sorted_by == longest ? 'y':'n');
 
@@ -368,7 +371,7 @@ static AVFrame *get_palette_frame(AVFilterContext *ctx)
             if (box_weight > median)
                 break;
         }
-        av_dlog(ctx, "split @ i=%-6d with w=%-6"PRIu64" (target=%6"PRIu64")\n", i, box_weight, median);
+        ff_dlog(ctx, "split @ i=%-6d with w=%-6"PRIu64" (target=%6"PRIu64")\n", i, box_weight, median);
         split_box(s, box, i);
 
         box_id = get_next_box_id_to_split(s);
@@ -519,7 +522,6 @@ static int config_output(AVFilterLink *outlink)
 {
     outlink->w = outlink->h = 16;
     outlink->sample_aspect_ratio = av_make_q(1, 1);
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
     return 0;
 }
 
diff --git a/libavfilter/vf_paletteuse.c b/libavfilter/vf_paletteuse.c
index 8835d8b2..1225a660 100644
--- a/libavfilter/vf_paletteuse.c
+++ b/libavfilter/vf_paletteuse.c
@@ -24,6 +24,7 @@
  */
 
 #include "libavutil/bprint.h"
+#include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/qsort.h"
 #include "dualinput.h"
@@ -119,8 +120,8 @@ static const AVOption paletteuse_options[] = {
         { "nns_iterative", "iterative search",             0, AV_OPT_TYPE_CONST, {.i64=COLOR_SEARCH_NNS_ITERATIVE}, INT_MIN, INT_MAX, FLAGS, "search" },
         { "nns_recursive", "recursive search",             0, AV_OPT_TYPE_CONST, {.i64=COLOR_SEARCH_NNS_RECURSIVE}, INT_MIN, INT_MAX, FLAGS, "search" },
         { "bruteforce",    "brute-force into the palette", 0, AV_OPT_TYPE_CONST, {.i64=COLOR_SEARCH_BRUTEFORCE},    INT_MIN, INT_MAX, FLAGS, "search" },
-    { "mean_err", "compute and print mean error", OFFSET(calc_mean_err), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS },
-    { "debug_accuracy", "test color search accuracy", OFFSET(debug_accuracy), AV_OPT_TYPE_FLAGS, {.i64=0}, 0, 1, FLAGS },
+    { "mean_err", "compute and print mean error", OFFSET(calc_mean_err), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    { "debug_accuracy", "test color search accuracy", OFFSET(debug_accuracy), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
     { NULL }
 };
 
@@ -131,6 +132,7 @@ static int query_formats(AVFilterContext *ctx)
     static const enum AVPixelFormat in_fmts[]    = {AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE};
     static const enum AVPixelFormat inpal_fmts[] = {AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE};
     static const enum AVPixelFormat out_fmts[]   = {AV_PIX_FMT_PAL8,  AV_PIX_FMT_NONE};
+    int ret;
     AVFilterFormats *in    = ff_make_format_list(in_fmts);
     AVFilterFormats *inpal = ff_make_format_list(inpal_fmts);
     AVFilterFormats *out   = ff_make_format_list(out_fmts);
@@ -140,9 +142,10 @@ static int query_formats(AVFilterContext *ctx)
         av_freep(&out);
         return AVERROR(ENOMEM);
     }
-    ff_formats_ref(in,    &ctx->inputs[0]->out_formats);
-    ff_formats_ref(inpal, &ctx->inputs[1]->out_formats);
-    ff_formats_ref(out,   &ctx->outputs[0]->in_formats);
+    if ((ret = ff_formats_ref(in   , &ctx->inputs[0]->out_formats)) < 0 ||
+        (ret = ff_formats_ref(inpal, &ctx->inputs[1]->out_formats)) < 0 ||
+        (ret = ff_formats_ref(out  , &ctx->outputs[0]->in_formats)) < 0)
+        return ret;
     return 0;
 }
 
@@ -875,7 +878,7 @@ static AVFrame *apply_palette(AVFilterLink *inlink, AVFrame *in)
         return NULL;
     }
 
-    av_dlog(ctx, "%dx%d rect: (%d;%d) -> (%d,%d) [area:%dx%d]\n",
+    ff_dlog(ctx, "%dx%d rect: (%d;%d) -> (%d,%d) [area:%dx%d]\n",
             w, h, x, y, x+w, y+h, in->width, in->height);
 
     if (s->set_frame(s, out, in, x, y, w, h) < 0) {
diff --git a/libavfilter/vf_perspective.c b/libavfilter/vf_perspective.c
index 970870cd..4949ee82 100644
--- a/libavfilter/vf_perspective.c
+++ b/libavfilter/vf_perspective.c
@@ -150,7 +150,7 @@ static int config_input(AVFilterLink *inlink)
     if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
         return ret;
 
-    s->height[1] = s->height[2] = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->height[1] = s->height[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
     s->height[0] = s->height[3] = inlink->h;
 
     s->pv = av_realloc_f(s->pv, w * h, 2 * sizeof(*s->pv));
@@ -213,10 +213,10 @@ static int config_input(AVFilterLink *inlink)
         for (x = 0; x < w; x++){
             int u, v;
 
-            u = (int)floor(SUB_PIXELS * (x0 * x + x1 * y + x2) /
-                                        (x6 * x + x7 * y + x8) + 0.5);
-            v = (int)floor(SUB_PIXELS * (x3 * x + x4 * y + x5) /
-                                        (x6 * x + x7 * y + x8) + 0.5);
+            u =      lrint(SUB_PIXELS * (x0 * x + x1 * y + x2) /
+                                        (x6 * x + x7 * y + x8));
+            v =      lrint(SUB_PIXELS * (x3 * x + x4 * y + x5) /
+                                        (x6 * x + x7 * y + x8));
 
             s->pv[x + y * w][0] = u;
             s->pv[x + y * w][1] = v;
@@ -235,7 +235,7 @@ static int config_input(AVFilterLink *inlink)
             sum += temp[j];
 
         for (j = 0; j < 4; j++)
-            s->coeff[i][j] = (int)floor((1 << COEFF_BITS) * temp[j] / sum + 0.5);
+            s->coeff[i][j] = lrint((1 << COEFF_BITS) * temp[j] / sum);
     }
 
     return 0;
diff --git a/libavfilter/vf_phase.c b/libavfilter/vf_phase.c
index 35c343fb..fadeb626 100644
--- a/libavfilter/vf_phase.c
+++ b/libavfilter/vf_phase.c
@@ -92,7 +92,7 @@ static int config_input(AVFilterLink *inlink)
     if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
         return ret;
 
-    s->planeheight[1] = s->planeheight[2] = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
     s->planeheight[0] = s->planeheight[3] = inlink->h;
 
     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
@@ -116,14 +116,7 @@ static int config_input(AVFilterLink *inlink)
  */
 static enum PhaseMode analyze_plane(void *ctx, enum PhaseMode mode, AVFrame *old, AVFrame *new)
 {
-    double bdiff, tdiff, pdiff, scale;
-    const int ns = new->linesize[0];
-    const int os = old->linesize[0];
-    const uint8_t *nptr = new->data[0];
-    const uint8_t *optr = old->data[0];
-    const int h = new->height;
-    const int w = new->width;
-    int bdif, tdif, pdif;
+    double bdiff, tdiff, pdiff;
 
     if (mode == AUTO) {
         mode = new->interlaced_frame ? new->top_field_first ?
@@ -136,6 +129,15 @@ static enum PhaseMode analyze_plane(void *ctx, enum PhaseMode mode, AVFrame *old
     if (mode <= BOTTOM_FIRST) {
         bdiff = pdiff = tdiff = 65536.0;
     } else {
+        const int ns = new->linesize[0];
+        const int os = old->linesize[0];
+        const uint8_t *nptr = new->data[0];
+        const uint8_t *optr = old->data[0];
+        const int h = new->height;
+        const int w = new->width;
+        int bdif, tdif, pdif;
+        double scale;
+
         int top = 0, t;
         const uint8_t *rend, *end = nptr + (h - 2) * ns;
 
diff --git a/libavfilter/vf_pixdesctest.c b/libavfilter/vf_pixdesctest.c
index 790dd0d9..d6423acb 100644
--- a/libavfilter/vf_pixdesctest.c
+++ b/libavfilter/vf_pixdesctest.c
@@ -59,8 +59,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     AVFilterLink *outlink    = inlink->dst->outputs[0];
     AVFrame *out;
     int i, c, w = inlink->w, h = inlink->h;
-    const int cw = FF_CEIL_RSHIFT(w, priv->pix_desc->log2_chroma_w);
-    const int ch = FF_CEIL_RSHIFT(h, priv->pix_desc->log2_chroma_h);
+    const int cw = AV_CEIL_RSHIFT(w, priv->pix_desc->log2_chroma_w);
+    const int ch = AV_CEIL_RSHIFT(h, priv->pix_desc->log2_chroma_h);
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!out) {
diff --git a/libavfilter/vf_pp7.c b/libavfilter/vf_pp7.c
index 9e78c39a..570a1c90 100644
--- a/libavfilter/vf_pp7.c
+++ b/libavfilter/vf_pp7.c
@@ -328,8 +328,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         qp_table = av_frame_get_qp_table(in, &qp_stride, &pp7->qscale_type);
 
     if (!ctx->is_disabled) {
-        const int cw = FF_CEIL_RSHIFT(inlink->w, pp7->hsub);
-        const int ch = FF_CEIL_RSHIFT(inlink->h, pp7->vsub);
+        const int cw = AV_CEIL_RSHIFT(inlink->w, pp7->hsub);
+        const int ch = AV_CEIL_RSHIFT(inlink->h, pp7->vsub);
 
         /* get a new frame if in-place is not possible or if the dimensions
         * are not multiple of 8 */
diff --git a/libavfilter/vf_psnr.c b/libavfilter/vf_psnr.c
index 406be881..89acd3ca 100644
--- a/libavfilter/vf_psnr.c
+++ b/libavfilter/vf_psnr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011 Roger Pau Monn� <roger.pau@entel.upc.edu>
+ * Copyright (c) 2011 Roger Pau Monné <roger.pau@entel.upc.edu>
  * Copyright (c) 2011 Stefano Sabatini
  * Copyright (c) 2013 Paul B Mahol
  *
@@ -25,6 +25,7 @@
  * Caculate the PSNR between two input videos.
  */
 
+#include "libavutil/avstring.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avfilter.h"
@@ -32,12 +33,13 @@
 #include "drawutils.h"
 #include "formats.h"
 #include "internal.h"
+#include "psnr.h"
 #include "video.h"
 
 typedef struct PSNRContext {
     const AVClass *class;
     FFDualInputContext dinput;
-    double mse, min_mse, max_mse;
+    double mse, min_mse, max_mse, mse_comp[4];
     uint64_t nb_frames;
     FILE *stats_file;
     char *stats_file_str;
@@ -48,11 +50,8 @@ typedef struct PSNRContext {
     int nb_components;
     int planewidth[4];
     int planeheight[4];
-
-    void (*compute_mse)(struct PSNRContext *s,
-                        const uint8_t *m[4], const int ml[4],
-                        const uint8_t *r[4], const int rl[4],
-                        int w, int h, double mse[4]);
+    double planeweight[4];
+    PSNRDSPContext dsp;
 } PSNRContext;
 
 #define OFFSET(x) offsetof(PSNRContext, x)
@@ -73,58 +72,51 @@ static inline unsigned pow2(unsigned base)
 
 static inline double get_psnr(double mse, uint64_t nb_frames, int max)
 {
-    return 10.0 * log(pow2(max) / (mse / nb_frames)) / log(10.0);
+    return 10.0 * log10(pow2(max) / (mse / nb_frames));
 }
 
-static inline
-void compute_images_mse(PSNRContext *s,
-                        const uint8_t *main_data[4], const int main_linesizes[4],
-                        const uint8_t *ref_data[4], const int ref_linesizes[4],
-                        int w, int h, double mse[4])
+static uint64_t sse_line_8bit(const uint8_t *main_line,  const uint8_t *ref_line, int outw)
 {
-    int i, c, j;
+    int j;
+    unsigned m2 = 0;
 
-    for (c = 0; c < s->nb_components; c++) {
-        const int outw = s->planewidth[c];
-        const int outh = s->planeheight[c];
-        const uint8_t *main_line = main_data[c];
-        const uint8_t *ref_line = ref_data[c];
-        const int ref_linesize = ref_linesizes[c];
-        const int main_linesize = main_linesizes[c];
-        uint64_t m = 0;
+    for (j = 0; j < outw; j++)
+        m2 += pow2(main_line[j] - ref_line[j]);
 
-        for (i = 0; i < outh; i++) {
-            int m2 = 0;
-            for (j = 0; j < outw; j++)
-                m2 += pow2(main_line[j] - ref_line[j]);
-            m += m2;
-            ref_line += ref_linesize;
-            main_line += main_linesize;
-        }
-        mse[c] = m / (double)(outw * outh);
-    }
+    return m2;
+}
+
+static uint64_t sse_line_16bit(const uint8_t *_main_line, const uint8_t *_ref_line, int outw)
+{
+    int j;
+    uint64_t m2 = 0;
+    const uint16_t *main_line = (const uint16_t *) _main_line;
+    const uint16_t *ref_line = (const uint16_t *) _ref_line;
+
+    for (j = 0; j < outw; j++)
+        m2 += pow2(main_line[j] - ref_line[j]);
+
+    return m2;
 }
 
 static inline
-void compute_images_mse_16bit(PSNRContext *s,
+void compute_images_mse(PSNRContext *s,
                         const uint8_t *main_data[4], const int main_linesizes[4],
                         const uint8_t *ref_data[4], const int ref_linesizes[4],
                         int w, int h, double mse[4])
 {
-    int i, c, j;
+    int i, c;
 
     for (c = 0; c < s->nb_components; c++) {
         const int outw = s->planewidth[c];
         const int outh = s->planeheight[c];
-        const uint16_t *main_line = (uint16_t *)main_data[c];
-        const uint16_t *ref_line = (uint16_t *)ref_data[c];
-        const int ref_linesize = ref_linesizes[c] / 2;
-        const int main_linesize = main_linesizes[c] / 2;
+        const uint8_t *main_line = main_data[c];
+        const uint8_t *ref_line = ref_data[c];
+        const int ref_linesize = ref_linesizes[c];
+        const int main_linesize = main_linesizes[c];
         uint64_t m = 0;
-
         for (i = 0; i < outh; i++) {
-            for (j = 0; j < outw; j++)
-                m += pow2(main_line[j] - ref_line[j]);
+            m += s->dsp.sse_line(main_line, ref_line, outw);
             ref_line += ref_linesize;
             main_line += main_linesize;
         }
@@ -153,27 +145,28 @@ static AVFrame *do_psnr(AVFilterContext *ctx, AVFrame *main,
     int j, c;
     AVDictionary **metadata = avpriv_frame_get_metadatap(main);
 
-    s->compute_mse(s, (const uint8_t **)main->data, main->linesize,
-                      (const uint8_t **)ref->data, ref->linesize,
-                       main->width, main->height, comp_mse);
+    compute_images_mse(s, (const uint8_t **)main->data, main->linesize,
+                          (const uint8_t **)ref->data, ref->linesize,
+                          main->width, main->height, comp_mse);
 
     for (j = 0; j < s->nb_components; j++)
-        mse += comp_mse[j];
-    mse /= s->nb_components;
+        mse += comp_mse[j] * s->planeweight[j];
 
     s->min_mse = FFMIN(s->min_mse, mse);
     s->max_mse = FFMAX(s->max_mse, mse);
 
     s->mse += mse;
+    for (j = 0; j < s->nb_components; j++)
+        s->mse_comp[j] += comp_mse[j];
     s->nb_frames++;
 
     for (j = 0; j < s->nb_components; j++) {
         c = s->is_rgb ? s->rgba_map[j] : j;
         set_meta(metadata, "lavfi.psnr.mse.", s->comps[j], comp_mse[c]);
-        set_meta(metadata, "lavfi.psnr.mse_avg", 0, mse);
         set_meta(metadata, "lavfi.psnr.psnr.", s->comps[j], get_psnr(comp_mse[c], 1, s->max[c]));
-        set_meta(metadata, "lavfi.psnr.psnr_avg", 0, get_psnr(mse, 1, s->average_max));
     }
+    set_meta(metadata, "lavfi.psnr.mse_avg", 0, mse);
+    set_meta(metadata, "lavfi.psnr.psnr_avg", 0, get_psnr(mse, 1, s->average_max));
 
     if (s->stats_file) {
         fprintf(s->stats_file, "n:%"PRId64" mse_avg:%0.2f ", s->nb_frames, mse);
@@ -181,6 +174,7 @@ static AVFrame *do_psnr(AVFilterContext *ctx, AVFrame *main,
             c = s->is_rgb ? s->rgba_map[j] : j;
             fprintf(s->stats_file, "mse_%c:%0.2f ", s->comps[j], comp_mse[c]);
         }
+        fprintf(s->stats_file, "psnr_avg:%0.2f ", get_psnr(mse, 1, s->average_max));
         for (j = 0; j < s->nb_components; j++) {
             c = s->is_rgb ? s->rgba_map[j] : j;
             fprintf(s->stats_file, "psnr_%c:%0.2f ", s->comps[j],
@@ -200,14 +194,18 @@ static av_cold int init(AVFilterContext *ctx)
     s->max_mse = -INFINITY;
 
     if (s->stats_file_str) {
-        s->stats_file = fopen(s->stats_file_str, "w");
-        if (!s->stats_file) {
-            int err = AVERROR(errno);
-            char buf[128];
-            av_strerror(err, buf, sizeof(buf));
-            av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
-                   s->stats_file_str, buf);
-            return err;
+        if (!strcmp(s->stats_file_str, "-")) {
+            s->stats_file = stdout;
+        } else {
+            s->stats_file = fopen(s->stats_file_str, "w");
+            if (!s->stats_file) {
+                int err = AVERROR(errno);
+                char buf[128];
+                av_strerror(err, buf, sizeof(buf));
+                av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
+                       s->stats_file_str, buf);
+                return err;
+            }
         }
     }
 
@@ -243,6 +241,7 @@ static int config_input_ref(AVFilterLink *inlink)
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
     AVFilterContext *ctx  = inlink->dst;
     PSNRContext *s = ctx->priv;
+    unsigned sum;
     int j;
 
     s->nb_components = desc->nb_components;
@@ -256,33 +255,10 @@ static int config_input_ref(AVFilterLink *inlink)
         return AVERROR(EINVAL);
     }
 
-    switch (inlink->format) {
-    case AV_PIX_FMT_GRAY8:
-    case AV_PIX_FMT_GRAY16:
-    case AV_PIX_FMT_GBRP:
-    case AV_PIX_FMT_GBRP9:
-    case AV_PIX_FMT_GBRP10:
-    case AV_PIX_FMT_GBRP12:
-    case AV_PIX_FMT_GBRP14:
-    case AV_PIX_FMT_GBRP16:
-    case AV_PIX_FMT_GBRAP:
-    case AV_PIX_FMT_GBRAP16:
-    case AV_PIX_FMT_YUVJ411P:
-    case AV_PIX_FMT_YUVJ420P:
-    case AV_PIX_FMT_YUVJ422P:
-    case AV_PIX_FMT_YUVJ440P:
-    case AV_PIX_FMT_YUVJ444P:
-        s->max[0] = (1 << (desc->comp[0].depth_minus1 + 1)) - 1;
-        s->max[1] = (1 << (desc->comp[1].depth_minus1 + 1)) - 1;
-        s->max[2] = (1 << (desc->comp[2].depth_minus1 + 1)) - 1;
-        s->max[3] = (1 << (desc->comp[3].depth_minus1 + 1)) - 1;
-        break;
-    default:
-        s->max[0] = 235 * (1 << (desc->comp[0].depth_minus1 - 7));
-        s->max[1] = 240 * (1 << (desc->comp[1].depth_minus1 - 7));
-        s->max[2] = 240 * (1 << (desc->comp[2].depth_minus1 - 7));
-        s->max[3] = (1 << (desc->comp[3].depth_minus1 + 1)) - 1;
-    }
+    s->max[0] = (1 << desc->comp[0].depth) - 1;
+    s->max[1] = (1 << desc->comp[1].depth) - 1;
+    s->max[2] = (1 << desc->comp[2].depth) - 1;
+    s->max[3] = (1 << desc->comp[3].depth) - 1;
 
     s->is_rgb = ff_fill_rgba_map(s->rgba_map, inlink->format) >= 0;
     s->comps[0] = s->is_rgb ? 'r' : 'y' ;
@@ -290,16 +266,21 @@ static int config_input_ref(AVFilterLink *inlink)
     s->comps[2] = s->is_rgb ? 'b' : 'v' ;
     s->comps[3] = 'a';
 
-    for (j = 0; j < s->nb_components; j++)
-        s->average_max += s->max[j];
-    s->average_max /= s->nb_components;
-
-    s->planeheight[1] = s->planeheight[2] = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
     s->planeheight[0] = s->planeheight[3] = inlink->h;
-    s->planewidth[1]  = s->planewidth[2]  = FF_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
     s->planewidth[0]  = s->planewidth[3]  = inlink->w;
+    sum = 0;
+    for (j = 0; j < s->nb_components; j++)
+        sum += s->planeheight[j] * s->planewidth[j];
+    for (j = 0; j < s->nb_components; j++) {
+        s->planeweight[j] = (double) s->planeheight[j] * s->planewidth[j] / sum;
+        s->average_max += s->max[j] * s->planeweight[j];
+    }
 
-    s->compute_mse = desc->comp[0].depth_minus1 > 7 ? compute_images_mse_16bit : compute_images_mse;
+    s->dsp.sse_line = desc->comp[0].depth > 8 ? sse_line_16bit : sse_line_8bit;
+    if (ARCH_X86)
+        ff_psnr_init_x86(&s->dsp, desc->comp[0].depth);
 
     return 0;
 }
@@ -339,7 +320,17 @@ static av_cold void uninit(AVFilterContext *ctx)
     PSNRContext *s = ctx->priv;
 
     if (s->nb_frames > 0) {
-        av_log(ctx, AV_LOG_INFO, "PSNR average:%0.2f min:%0.2f max:%0.2f\n",
+        int j;
+        char buf[256];
+
+        buf[0] = 0;
+        for (j = 0; j < s->nb_components; j++) {
+            int c = s->is_rgb ? s->rgba_map[j] : j;
+            av_strlcatf(buf, sizeof(buf), " %c:%f", s->comps[j],
+                        get_psnr(s->mse_comp[c], s->nb_frames, s->max[c]));
+        }
+        av_log(ctx, AV_LOG_INFO, "PSNR%s average:%f min:%f max:%f\n",
+               buf,
                get_psnr(s->mse, s->nb_frames, s->average_max),
                get_psnr(s->max_mse, 1, s->average_max),
                get_psnr(s->min_mse, 1, s->average_max));
@@ -347,7 +338,7 @@ static av_cold void uninit(AVFilterContext *ctx)
 
     ff_dualinput_uninit(&s->dinput);
 
-    if (s->stats_file)
+    if (s->stats_file && s->stats_file != stdout)
         fclose(s->stats_file);
 }
 
diff --git a/libavfilter/vf_pullup.c b/libavfilter/vf_pullup.c
index ea15019a..fa76caad 100644
--- a/libavfilter/vf_pullup.c
+++ b/libavfilter/vf_pullup.c
@@ -42,7 +42,7 @@ static const AVOption pullup_options[] = {
     { "jr", "set right junk size", OFFSET(junk_right), AV_OPT_TYPE_INT, {.i64=1}, 0, INT_MAX, FLAGS },
     { "jt", "set top junk size",   OFFSET(junk_top),   AV_OPT_TYPE_INT, {.i64=4}, 1, INT_MAX, FLAGS },
     { "jb", "set bottom junk size", OFFSET(junk_bottom), AV_OPT_TYPE_INT, {.i64=4}, 1, INT_MAX, FLAGS },
-    { "sb", "set strict breaks", OFFSET(strict_breaks), AV_OPT_TYPE_INT, {.i64=0},-1, 1, FLAGS },
+    { "sb", "set strict breaks", OFFSET(strict_breaks), AV_OPT_TYPE_BOOL,{.i64=0},-1, 1, FLAGS },
     { "mp", "set metric plane",  OFFSET(metric_plane),  AV_OPT_TYPE_INT, {.i64=0}, 0, 2, FLAGS, "mp" },
     { "y", "luma",        0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "mp" },
     { "u", "chroma blue", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "mp" },
@@ -194,9 +194,9 @@ static int config_input(AVFilterLink *inlink)
         return AVERROR(EINVAL);
     }
 
-    s->planeheight[1] = s->planeheight[2] = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
     s->planeheight[0] = s->planeheight[3] = inlink->h;
-    s->planewidth[1]  = s->planewidth[2]  = FF_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
     s->planewidth[0]  = s->planewidth[3]  = inlink->w;
 
     s->metric_w      = (s->planewidth[mp]  - ((s->junk_left + s->junk_right)  << 3)) >> 3;
@@ -220,12 +220,6 @@ static int config_input(AVFilterLink *inlink)
     return 0;
 }
 
-static int config_output(AVFilterLink *outlink)
-{
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
-    return 0;
-}
-
 static PullupBuffer *pullup_lock_buffer(PullupBuffer *b, int parity)
 {
     if (!b)
@@ -766,7 +760,6 @@ static const AVFilterPad pullup_outputs[] = {
     {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
-        .config_props = config_output,
     },
     { NULL }
 };
diff --git a/libavfilter/vf_random.c b/libavfilter/vf_random.c
new file mode 100644
index 00000000..373a7db0
--- /dev/null
+++ b/libavfilter/vf_random.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/lfg.h"
+#include "libavutil/opt.h"
+#include "libavutil/random_seed.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+#define MAX_FRAMES 512
+
+typedef struct RandomContext {
+    const AVClass *class;
+
+    AVLFG lfg;
+    int nb_frames;
+    int64_t random_seed;
+    int nb_frames_filled;
+    AVFrame *frames[MAX_FRAMES];
+    int64_t pts[MAX_FRAMES];
+    int flush_idx;
+} RandomContext;
+
+#define OFFSET(x) offsetof(RandomContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption random_options[] = {
+    { "frames", "set number of frames in cache", OFFSET(nb_frames),   AV_OPT_TYPE_INT,   {.i64=30},  2, MAX_FRAMES, FLAGS },
+    { "seed",   "set the seed",                  OFFSET(random_seed), AV_OPT_TYPE_INT64, {.i64=-1}, -1, UINT32_MAX, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(random);
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    RandomContext *s = ctx->priv;
+    uint32_t seed;
+
+    if (s->random_seed < 0)
+        s->random_seed = av_get_random_seed();
+    seed = s->random_seed;
+    av_lfg_init(&s->lfg, seed);
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    RandomContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    int idx;
+
+    if (s->nb_frames_filled < s->nb_frames) {
+        s->frames[s->nb_frames_filled] = in;
+        s->pts[s->nb_frames_filled++] = in->pts;
+        return 0;
+    }
+
+    idx = av_lfg_get(&s->lfg) % s->nb_frames;
+
+    out = s->frames[idx];
+    out->pts = s->pts[0];
+    memmove(&s->pts[0], &s->pts[1], (s->nb_frames - 1) * sizeof(s->pts[0]));
+    s->frames[idx] = in;
+    s->pts[s->nb_frames - 1] = in->pts;
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    RandomContext *s = ctx->priv;
+    int ret;
+
+    ret = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && !ctx->is_disabled && s->nb_frames > 0) {
+        AVFrame *out = s->frames[s->nb_frames - 1];
+        out->pts = s->pts[s->flush_idx++];
+        ret = ff_filter_frame(outlink, out);
+        s->frames[s->nb_frames - 1] = NULL;
+        s->nb_frames--;
+    }
+
+    return ret;
+}
+
+static const AVFilterPad random_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad random_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_random = {
+    .name        = "random",
+    .description = NULL_IF_CONFIG_SMALL("Return random frames."),
+    .priv_size   = sizeof(RandomContext),
+    .priv_class  = &random_class,
+    .init        = init,
+    .inputs      = random_inputs,
+    .outputs     = random_outputs,
+};
diff --git a/libavfilter/vf_removegrain.c b/libavfilter/vf_removegrain.c
new file mode 100644
index 00000000..8ef09740
--- /dev/null
+++ b/libavfilter/vf_removegrain.c
@@ -0,0 +1,660 @@
+/*
+ * Copyright (c) 2012 Laurent de Soras
+ * Copyright (c) 2013 Fredrik Mellbin
+ * Copyright (c) 2015 Paul B Mahol
+ * Copyright (c) 2015 James Darnley
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/qsort.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "removegrain.h"
+#include "video.h"
+
+#define OFFSET(x) offsetof(RemoveGrainContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption removegrain_options[] = {
+    { "m0", "set mode for 1st plane", OFFSET(mode[0]), AV_OPT_TYPE_INT, {.i64=0}, 0, 24, FLAGS },
+    { "m1", "set mode for 2nd plane", OFFSET(mode[1]), AV_OPT_TYPE_INT, {.i64=0}, 0, 24, FLAGS },
+    { "m2", "set mode for 3rd plane", OFFSET(mode[2]), AV_OPT_TYPE_INT, {.i64=0}, 0, 24, FLAGS },
+    { "m3", "set mode for 4th plane", OFFSET(mode[3]), AV_OPT_TYPE_INT, {.i64=0}, 0, 24, FLAGS },
+    {NULL}
+};
+
+AVFILTER_DEFINE_CLASS(removegrain);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+#define REMOVE_GRAIN_SORT_AXIS       \
+    const int ma1 = FFMAX(a1, a8);   \
+    const int mi1 = FFMIN(a1, a8);   \
+    const int ma2 = FFMAX(a2, a7);   \
+    const int mi2 = FFMIN(a2, a7);   \
+    const int ma3 = FFMAX(a3, a6);   \
+    const int mi3 = FFMIN(a3, a6);   \
+    const int ma4 = FFMAX(a4, a5);   \
+    const int mi4 = FFMIN(a4, a5);
+
+static int mode01(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    const int mi = FFMIN(FFMIN(FFMIN(a1, a2), FFMIN(a3, a4)), FFMIN(FFMIN(a5, a6), FFMIN(a7, a8)));
+    const int ma = FFMAX(FFMAX(FFMAX(a1, a2), FFMAX(a3, a4)), FFMAX(FFMAX(a5, a6), FFMAX(a7, a8)));
+
+    return av_clip(c, mi, ma);
+}
+
+static int cmp_int(const void *p1, const void *p2)
+{
+    int left  = *(const int *)p1;
+    int right = *(const int *)p2;
+    return FFDIFFSIGN(left, right);
+}
+
+static int mode02(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    int a[8] = { a1, a2, a3, a4, a5, a6, a7, a8 };
+
+    AV_QSORT(a, 8, int, cmp_int);
+
+    return av_clip(c, a[2 - 1 ], a[7 - 1]);
+}
+
+static int mode03(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    int a[8] = { a1, a2, a3, a4, a5, a6, a7, a8 };
+
+    AV_QSORT(a, 8, int, cmp_int);
+
+    return av_clip(c, a[3 - 1 ], a[6 - 1]);
+}
+
+static int mode04(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    int a[8] = { a1, a2, a3, a4, a5, a6, a7, a8 };
+
+    AV_QSORT(a, 8, int, cmp_int);
+
+    return av_clip(c, a[4 - 1 ], a[5 - 1]);
+}
+
+static int mode05(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    REMOVE_GRAIN_SORT_AXIS
+
+    const int c1 = FFABS(c - av_clip(c, mi1, ma1));
+    const int c2 = FFABS(c - av_clip(c, mi2, ma2));
+    const int c3 = FFABS(c - av_clip(c, mi3, ma3));
+    const int c4 = FFABS(c - av_clip(c, mi4, ma4));
+
+    const int mindiff = FFMIN(FFMIN(c1, c2), FFMIN(c3, c4));
+
+    /* When adding SIMD notice the return order here: 4, 2, 3, 1. */
+    if (mindiff == c4) {
+        return av_clip(c, mi4, ma4);
+    } else if (mindiff == c2) {
+        return av_clip(c, mi2, ma2);
+    } else if (mindiff == c3) {
+        return av_clip(c, mi3, ma3);
+    }
+
+    return av_clip(c, mi1, ma1);
+}
+
+static int mode06(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    REMOVE_GRAIN_SORT_AXIS
+
+    const int d1 = ma1 - mi1;
+    const int d2 = ma2 - mi2;
+    const int d3 = ma3 - mi3;
+    const int d4 = ma4 - mi4;
+
+    const int cli1 = av_clip(c, mi1, ma1);
+    const int cli2 = av_clip(c, mi2, ma2);
+    const int cli3 = av_clip(c, mi3, ma3);
+    const int cli4 = av_clip(c, mi4, ma4);
+
+    const int c1 = av_clip_uint16((FFABS(c - cli1) << 1) + d1);
+    const int c2 = av_clip_uint16((FFABS(c - cli2) << 1) + d2);
+    const int c3 = av_clip_uint16((FFABS(c - cli3) << 1) + d3);
+    const int c4 = av_clip_uint16((FFABS(c - cli4) << 1) + d4);
+
+    const int mindiff = FFMIN(FFMIN(c1, c2), FFMIN(c3, c4));
+
+    if (mindiff == c4) {
+        return cli4;
+    } else if (mindiff == c2) {
+        return cli2;
+    } else if (mindiff == c3) {
+        return cli3;
+    }
+
+    return cli1;
+}
+
+static int mode07(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    REMOVE_GRAIN_SORT_AXIS
+
+    const int d1 = ma1 - mi1;
+    const int d2 = ma2 - mi2;
+    const int d3 = ma3 - mi3;
+    const int d4 = ma4 - mi4;
+
+    const int cli1 = av_clip(c, mi1, ma1);
+    const int cli2 = av_clip(c, mi2, ma2);
+    const int cli3 = av_clip(c, mi3, ma3);
+    const int cli4 = av_clip(c, mi4, ma4);
+
+    const int c1 = FFABS(c - cli1) + d1;
+    const int c2 = FFABS(c - cli2) + d2;
+    const int c3 = FFABS(c - cli3) + d3;
+    const int c4 = FFABS(c - cli4) + d4;
+
+    const int mindiff = FFMIN(FFMIN(c1, c2), FFMIN(c3, c4));
+
+    if (mindiff == c4) {
+        return cli4;
+    } else if (mindiff == c2) {
+        return cli2;
+    } else if (mindiff == c3) {
+        return cli3;
+    }
+
+    return cli1;
+}
+
+static int mode08(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    REMOVE_GRAIN_SORT_AXIS
+
+    const int d1 = ma1 - mi1;
+    const int d2 = ma2 - mi2;
+    const int d3 = ma3 - mi3;
+    const int d4 = ma4 - mi4;
+
+    const int cli1 = av_clip(c, mi1, ma1);
+    const int cli2 = av_clip(c, mi2, ma2);
+    const int cli3 = av_clip(c, mi3, ma3);
+    const int cli4 = av_clip(c, mi4, ma4);
+
+    const int c1 = av_clip_uint16(FFABS(c - cli1) + (d1 << 1));
+    const int c2 = av_clip_uint16(FFABS(c - cli2) + (d2 << 1));
+    const int c3 = av_clip_uint16(FFABS(c - cli3) + (d3 << 1));
+    const int c4 = av_clip_uint16(FFABS(c - cli4) + (d4 << 1));
+
+    const int mindiff = FFMIN(FFMIN(c1, c2), FFMIN(c3, c4));
+
+    if (mindiff == c4) {
+        return cli4;
+    } else if (mindiff == c2) {
+        return cli2;
+    } else if (mindiff == c3) {
+        return cli3;
+    }
+
+    return cli1;
+}
+
+static int mode09(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    REMOVE_GRAIN_SORT_AXIS
+
+    const int d1 = ma1 - mi1;
+    const int d2 = ma2 - mi2;
+    const int d3 = ma3 - mi3;
+    const int d4 = ma4 - mi4;
+
+    const int mindiff = FFMIN(FFMIN(d1, d2), FFMIN(d3, d4));
+
+    if (mindiff == d4) {
+        return av_clip(c, mi4, ma4);
+    } else if (mindiff == d2) {
+        return av_clip(c, mi2, ma2);
+    } else if (mindiff == d3) {
+        return av_clip(c, mi3, ma3);
+    }
+
+    return av_clip(c, mi1, ma1);
+}
+
+static int mode10(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    const int d1 = FFABS(c - a1);
+    const int d2 = FFABS(c - a2);
+    const int d3 = FFABS(c - a3);
+    const int d4 = FFABS(c - a4);
+    const int d5 = FFABS(c - a5);
+    const int d6 = FFABS(c - a6);
+    const int d7 = FFABS(c - a7);
+    const int d8 = FFABS(c - a8);
+
+    const int mindiff = FFMIN(FFMIN(FFMIN(d1, d2), FFMIN(d3, d4)),
+                              FFMIN(FFMIN(d5, d6), FFMIN(d7, d8)));
+
+    if (mindiff == d7) return a7;
+    if (mindiff == d8) return a8;
+    if (mindiff == d6) return a6;
+    if (mindiff == d2) return a2;
+    if (mindiff == d3) return a3;
+    if (mindiff == d1) return a1;
+    if (mindiff == d5) return a5;
+
+    return a4;
+}
+
+static int mode1112(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    const int sum = 4 * c + 2 * (a2 + a4 + a5 + a7) + a1 + a3 + a6 + a8;
+    const int val = (sum + 8) >> 4;
+
+    return val;
+}
+
+static int mode1314(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    const int d1 = FFABS(a1 - a8);
+    const int d2 = FFABS(a2 - a7);
+    const int d3 = FFABS(a3 - a6);
+
+    const int mindiff = FFMIN(FFMIN(d1, d2), d3);
+
+    if (mindiff == d2) {
+        return (a2 + a7 + 1) >> 1;
+    }
+    if (mindiff == d3) {
+        return (a3 + a6 + 1) >> 1;
+    }
+
+    return (a1 + a8 + 1) >> 1;
+}
+
+static int mode1516(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    const int d1 = FFABS(a1 - a8);
+    const int d2 = FFABS(a2 - a7);
+    const int d3 = FFABS(a3 - a6);
+
+    const int mindiff = FFMIN(FFMIN(d1, d2), d3);
+    const int average = (2 * (a2 + a7) + a1 + a3 + a6 + a8 + 4) >> 3;
+
+    if (mindiff == d2) {
+        return av_clip(average, FFMIN(a2, a7), FFMAX(a2, a7));
+    }
+    if (mindiff == d3) {
+        return av_clip(average, FFMIN(a3, a6), FFMAX(a3, a6));
+    }
+
+    return av_clip(average, FFMIN(a1, a8), FFMAX(a1, a8));
+}
+
+static int mode17(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    REMOVE_GRAIN_SORT_AXIS
+
+    const int l = FFMAX(FFMAX(mi1, mi2), FFMAX(mi3, mi4));
+    const int u = FFMIN(FFMIN(ma1, ma2), FFMIN(ma3, ma4));
+
+    return av_clip(c, FFMIN(l, u), FFMAX(l, u));
+}
+
+static int mode18(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    const int d1 = FFMAX(FFABS(c - a1), FFABS(c - a8));
+    const int d2 = FFMAX(FFABS(c - a2), FFABS(c - a7));
+    const int d3 = FFMAX(FFABS(c - a3), FFABS(c - a6));
+    const int d4 = FFMAX(FFABS(c - a4), FFABS(c - a5));
+
+    const int mindiff = FFMIN(FFMIN(d1, d2), FFMIN(d3, d4));
+
+    if (mindiff == d4) {
+        return av_clip(c, FFMIN(a4, a5), FFMAX(a4, a5));
+    }
+    if (mindiff == d2) {
+        return av_clip(c, FFMIN(a2, a7), FFMAX(a2, a7));
+    }
+    if (mindiff == d3) {
+        return av_clip(c, FFMIN(a3, a6), FFMAX(a3, a6));
+    }
+
+    return av_clip(c, FFMIN(a1, a8), FFMAX(a1, a8));
+}
+
+static int mode19(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    const int sum = a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8;
+    const int val = (sum + 4) >> 3;
+
+    return val;
+}
+
+static int mode20(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    const int sum = a1 + a2 + a3 + a4 + c + a5 + a6 + a7 + a8;
+    const int val = (sum + 4) / 9;
+
+    return val;
+}
+
+static int mode21(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    const int l1l = (a1 + a8) >> 1;
+    const int l2l = (a2 + a7) >> 1;
+    const int l3l = (a3 + a6) >> 1;
+    const int l4l = (a4 + a5) >> 1;
+
+    const int l1h = (a1 + a8 + 1) >> 1;
+    const int l2h = (a2 + a7 + 1) >> 1;
+    const int l3h = (a3 + a6 + 1) >> 1;
+    const int l4h = (a4 + a5 + 1) >> 1;
+
+    const int mi = FFMIN(FFMIN(l1l, l2l), FFMIN(l3l, l4l));
+    const int ma = FFMAX(FFMAX(l1h, l2h), FFMAX(l3h, l4h));
+
+    return av_clip(c, mi, ma);
+}
+
+static int mode22(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    const int l1 = (a1 + a8 + 1) >> 1;
+    const int l2 = (a2 + a7 + 1) >> 1;
+    const int l3 = (a3 + a6 + 1) >> 1;
+    const int l4 = (a4 + a5 + 1) >> 1;
+
+    const int mi = FFMIN(FFMIN(l1, l2), FFMIN(l3, l4));
+    const int ma = FFMAX(FFMAX(l1, l2), FFMAX(l3, l4));
+
+    return av_clip(c, mi, ma);
+}
+
+static int mode23(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    REMOVE_GRAIN_SORT_AXIS
+
+    const int linediff1 = ma1 - mi1;
+    const int linediff2 = ma2 - mi2;
+    const int linediff3 = ma3 - mi3;
+    const int linediff4 = ma4 - mi4;
+
+    const int u1 = FFMIN(c - ma1, linediff1);
+    const int u2 = FFMIN(c - ma2, linediff2);
+    const int u3 = FFMIN(c - ma3, linediff3);
+    const int u4 = FFMIN(c - ma4, linediff4);
+    const int u = FFMAX(FFMAX(FFMAX(u1, u2), FFMAX(u3, u4)), 0);
+
+    const int d1 = FFMIN(mi1 - c, linediff1);
+    const int d2 = FFMIN(mi2 - c, linediff2);
+    const int d3 = FFMIN(mi3 - c, linediff3);
+    const int d4 = FFMIN(mi4 - c, linediff4);
+    const int d = FFMAX(FFMAX(FFMAX(d1, d2), FFMAX(d3, d4)), 0);
+
+    return c - u + d;  // This probably will never overflow.
+}
+
+static int mode24(int c, int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8)
+{
+    REMOVE_GRAIN_SORT_AXIS
+
+    const int linediff1 = ma1 - mi1;
+    const int linediff2 = ma2 - mi2;
+    const int linediff3 = ma3 - mi3;
+    const int linediff4 = ma4 - mi4;
+
+    const int tu1 = c - ma1;
+    const int tu2 = c - ma2;
+    const int tu3 = c - ma3;
+    const int tu4 = c - ma4;
+
+    const int u1 = FFMIN(tu1, linediff1 - tu1);
+    const int u2 = FFMIN(tu2, linediff2 - tu2);
+    const int u3 = FFMIN(tu3, linediff3 - tu3);
+    const int u4 = FFMIN(tu4, linediff4 - tu4);
+    const int u = FFMAX(FFMAX(FFMAX(u1, u2), FFMAX(u3, u4)), 0);
+
+    const int td1 = mi1 - c;
+    const int td2 = mi2 - c;
+    const int td3 = mi3 - c;
+    const int td4 = mi4 - c;
+
+    const int d1 = FFMIN(td1, linediff1 - td1);
+    const int d2 = FFMIN(td2, linediff2 - td2);
+    const int d3 = FFMIN(td3, linediff3 - td3);
+    const int d4 = FFMIN(td4, linediff4 - td4);
+    const int d = FFMAX(FFMAX(FFMAX(d1, d2), FFMAX(d3, d4)), 0);
+
+    return c - u + d;  // This probably will never overflow.
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    RemoveGrainContext *s = inlink->dst->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    int i;
+
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0]  = s->planewidth[3]  = inlink->w;
+
+    for (i = 0; i < s->nb_planes; i++) {
+        switch (s->mode[i]) {
+        case 1:  s->rg[i] = mode01;   break;
+        case 2:  s->rg[i] = mode02;   break;
+        case 3:  s->rg[i] = mode03;   break;
+        case 4:  s->rg[i] = mode04;   break;
+        case 5:  s->rg[i] = mode05;   break;
+        case 6:  s->rg[i] = mode06;   break;
+        case 7:  s->rg[i] = mode07;   break;
+        case 8:  s->rg[i] = mode08;   break;
+        case 9:  s->rg[i] = mode09;   break;
+        case 10: s->rg[i] = mode10;   break;
+        case 11: s->rg[i] = mode1112; break;
+        case 12: s->rg[i] = mode1112; break;
+        case 13: s->skip_odd = 1;
+                 s->rg[i] = mode1314; break;
+        case 14: s->skip_even = 1;
+                 s->rg[i] = mode1314; break;
+        case 15: s->skip_odd = 1;
+                 s->rg[i] = mode1516; break;
+        case 16: s->skip_even = 1;
+                 s->rg[i] = mode1516; break;
+        case 17: s->rg[i] = mode17;   break;
+        case 18: s->rg[i] = mode18;   break;
+        case 19: s->rg[i] = mode19;   break;
+        case 20: s->rg[i] = mode20;   break;
+        case 21: s->rg[i] = mode21;   break;
+        case 22: s->rg[i] = mode22;   break;
+        case 23: s->rg[i] = mode23;   break;
+        case 24: s->rg[i] = mode24;   break;
+        }
+    }
+
+    if (ARCH_X86)
+        ff_removegrain_init_x86(s);
+
+    return 0;
+}
+
+typedef struct ThreadData {
+    AVFrame *in, *out;
+    int plane;
+} ThreadData;
+
+static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    RemoveGrainContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const int i = td->plane;
+    const int height = s->planeheight[i];
+    const int om = in->linesize[i] - 1;
+    const int o0 = in->linesize[i]    ;
+    const int op = in->linesize[i] + 1;
+    int start = (height *  jobnr   ) / nb_jobs;
+    int end   = (height * (jobnr+1)) / nb_jobs;
+    int x, y;
+
+    start = FFMAX(1, start);
+    end   = FFMIN(height-1, end);
+    for (y = start; y < end; y++) {
+        uint8_t *dst = out->data[i];
+        uint8_t *src = in->data[i];
+
+        src = in->data[i] + y * in->linesize[i];
+        dst = out->data[i] + y * out->linesize[i];
+
+        if (s->skip_even && !(y & 1)) {
+            memcpy(dst, src, s->planewidth[i]);
+            continue;
+        }
+        if (s->skip_odd && y & 1) {
+            memcpy(dst, src, s->planewidth[i]);
+            continue;
+        }
+
+        *dst++ = *src++;
+
+        if (s->fl[i]) {
+            int w_asm = (s->planewidth[i] - 2) & ~15;
+
+            s->fl[i](dst, src, in->linesize[i], w_asm);
+
+            x = 1 + w_asm;
+            dst += w_asm;
+            src += w_asm;
+        } else
+            x = 1;
+
+        for (; x < s->planewidth[i] - 1; x++) {
+            const int a1 = src[-op];
+            const int a2 = src[-o0];
+            const int a3 = src[-om];
+            const int a4 = src[-1 ];
+            const int c  = src[ 0 ];
+            const int a5 = src[ 1 ];
+            const int a6 = src[ om];
+            const int a7 = src[ o0];
+            const int a8 = src[ op];
+
+            const int res = s->rg[i](c, a1, a2, a3, a4, a5, a6, a7, a8);
+
+            *dst = res;
+            dst++, src++;
+        }
+        dst[0] = src[0];
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    RemoveGrainContext *s = ctx->priv;
+    ThreadData td;
+    AVFrame *out;
+    int i;
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    av_frame_copy_props(out, in);
+
+    for (i = 0; i < s->nb_planes; i++) {
+        uint8_t *dst = out->data[i];
+        uint8_t *src = in->data[i];
+
+        if (s->mode[i] == 0) {
+            av_image_copy_plane(dst, out->linesize[i],
+                                src, in->linesize[i],
+                                s->planewidth[i], s->planeheight[i]);
+            continue;
+        }
+
+        memcpy(dst, src, s->planewidth[i]);
+
+        td.in = in; td.out = out; td.plane = i;
+        ctx->internal->execute(ctx, filter_slice, &td, NULL,
+                               FFMIN(s->planeheight[i], ctx->graph->nb_threads));
+
+        src = in->data[i] + (s->planeheight[i] - 1) * in->linesize[i];
+        dst = out->data[i] + (s->planeheight[i] - 1) * out->linesize[i];
+        memcpy(dst, src, s->planewidth[i]);
+    }
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static const AVFilterPad removegrain_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad removegrain_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_removegrain = {
+    .name          = "removegrain",
+    .description   = NULL_IF_CONFIG_SMALL("Remove grain."),
+    .priv_size     = sizeof(RemoveGrainContext),
+    .query_formats = query_formats,
+    .inputs        = removegrain_inputs,
+    .outputs       = removegrain_outputs,
+    .priv_class    = &removegrain_class,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/vf_repeatfields.c b/libavfilter/vf_repeatfields.c
index b9e73ec6..3ac432b5 100644
--- a/libavfilter/vf_repeatfields.c
+++ b/libavfilter/vf_repeatfields.c
@@ -65,7 +65,7 @@ static int config_input(AVFilterLink *inlink)
     if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
         return ret;
 
-    s->planeheight[1] = s->planeheight[2] = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
     s->planeheight[0] = s->planeheight[3] = inlink->h;
 
     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
diff --git a/libavfilter/vf_rotate.c b/libavfilter/vf_rotate.c
index 46ab796c..47dc01e7 100644
--- a/libavfilter/vf_rotate.c
+++ b/libavfilter/vf_rotate.c
@@ -101,7 +101,7 @@ static const AVOption rotate_options[] = {
     { "oh",        "set output height expression", OFFSET(outh_expr_str), AV_OPT_TYPE_STRING, {.str="ih"}, CHAR_MIN, CHAR_MAX, .flags=FLAGS },
     { "fillcolor", "set background fill color",    OFFSET(fillcolor_str), AV_OPT_TYPE_STRING, {.str="black"}, CHAR_MIN, CHAR_MAX, .flags=FLAGS },
     { "c",         "set background fill color",    OFFSET(fillcolor_str), AV_OPT_TYPE_STRING, {.str="black"}, CHAR_MIN, CHAR_MAX, .flags=FLAGS },
-    { "bilinear",  "use bilinear interpolation",   OFFSET(use_bilinear),  AV_OPT_TYPE_INT, {.i64=1}, 0, 1, .flags=FLAGS },
+    { "bilinear",  "use bilinear interpolation",   OFFSET(use_bilinear),  AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, .flags=FLAGS },
     { NULL }
 };
 
@@ -239,12 +239,12 @@ static int config_props(AVFilterLink *outlink)
                            func1_names, func1, NULL, NULL, rot, 0, ctx);
     rot->var_values[VAR_OUT_W] = rot->var_values[VAR_OW] = res;
     rot->outw = res + 0.5;
-    SET_SIZE_EXPR(outh, "out_w");
+    SET_SIZE_EXPR(outh, "out_h");
     rot->var_values[VAR_OUT_H] = rot->var_values[VAR_OH] = res;
     rot->outh = res + 0.5;
 
     /* evaluate the width again, as it may depend on the evaluated output height */
-    SET_SIZE_EXPR(outw, "out_h");
+    SET_SIZE_EXPR(outw, "out_w");
     rot->var_values[VAR_OUT_W] = rot->var_values[VAR_OW] = res;
     rot->outw = res + 0.5;
 
@@ -494,11 +494,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     for (plane = 0; plane < rot->nb_planes; plane++) {
         int hsub = plane == 1 || plane == 2 ? rot->hsub : 0;
         int vsub = plane == 1 || plane == 2 ? rot->vsub : 0;
-        const int outw = FF_CEIL_RSHIFT(outlink->w, hsub);
-        const int outh = FF_CEIL_RSHIFT(outlink->h, vsub);
+        const int outw = AV_CEIL_RSHIFT(outlink->w, hsub);
+        const int outh = AV_CEIL_RSHIFT(outlink->h, vsub);
         ThreadData td = { .in = in,   .out  = out,
-                          .inw  = FF_CEIL_RSHIFT(inlink->w, hsub),
-                          .inh  = FF_CEIL_RSHIFT(inlink->h, vsub),
+                          .inw  = AV_CEIL_RSHIFT(inlink->w, hsub),
+                          .inh  = AV_CEIL_RSHIFT(inlink->h, vsub),
                           .outh = outh, .outw = outw,
                           .xi = -(outw-1) * c / 2, .yi =  (outw-1) * s / 2,
                           .xprime = -(outh-1) * s / 2,
diff --git a/libavfilter/vf_sab.c b/libavfilter/vf_sab.c
index da594396..3f0951f3 100644
--- a/libavfilter/vf_sab.c
+++ b/libavfilter/vf_sab.c
@@ -107,24 +107,24 @@ AVFILTER_DEFINE_CLASS(sab);
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    SabContext *sab = ctx->priv;
+    SabContext *s = ctx->priv;
 
     /* make chroma default to luma values, if not explicitly set */
-    if (sab->chroma.radius < RADIUS_MIN)
-        sab->chroma.radius = sab->luma.radius;
-    if (sab->chroma.pre_filter_radius < PRE_FILTER_RADIUS_MIN)
-        sab->chroma.pre_filter_radius = sab->luma.pre_filter_radius;
-    if (sab->chroma.strength < STRENGTH_MIN)
-        sab->chroma.strength = sab->luma.strength;
+    if (s->chroma.radius < RADIUS_MIN)
+        s->chroma.radius = s->luma.radius;
+    if (s->chroma.pre_filter_radius < PRE_FILTER_RADIUS_MIN)
+        s->chroma.pre_filter_radius = s->luma.pre_filter_radius;
+    if (s->chroma.strength < STRENGTH_MIN)
+        s->chroma.strength = s->luma.strength;
 
-    sab->luma.quality = sab->chroma.quality = 3.0;
-    sab->sws_flags = SWS_POINT;
+    s->luma.quality = s->chroma.quality = 3.0;
+    s->sws_flags = SWS_POINT;
 
     av_log(ctx, AV_LOG_VERBOSE,
            "luma_radius:%f luma_pre_filter_radius::%f luma_strength:%f "
            "chroma_radius:%f chroma_pre_filter_radius:%f chroma_strength:%f\n",
-           sab->luma  .radius, sab->luma  .pre_filter_radius, sab->luma  .strength,
-           sab->chroma.radius, sab->chroma.pre_filter_radius, sab->chroma.strength);
+           s->luma  .radius, s->luma  .pre_filter_radius, s->luma  .strength,
+           s->chroma.radius, s->chroma.pre_filter_radius, s->chroma.strength);
     return 0;
 }
 
@@ -140,10 +140,10 @@ static void close_filter_param(FilterParam *f)
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    SabContext *sab = ctx->priv;
+    SabContext *s = ctx->priv;
 
-    close_filter_param(&sab->luma);
-    close_filter_param(&sab->chroma);
+    close_filter_param(&s->luma);
+    close_filter_param(&s->chroma);
 }
 
 static int open_filter_param(FilterParam *f, int width, int height, unsigned int sws_flags)
@@ -200,22 +200,22 @@ static int open_filter_param(FilterParam *f, int width, int height, unsigned int
 
 static int config_props(AVFilterLink *inlink)
 {
-    SabContext *sab = inlink->dst->priv;
+    SabContext *s = inlink->dst->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
     int ret;
 
-    sab->hsub = desc->log2_chroma_w;
-    sab->vsub = desc->log2_chroma_h;
+    s->hsub = desc->log2_chroma_w;
+    s->vsub = desc->log2_chroma_h;
 
-    close_filter_param(&sab->luma);
-    ret = open_filter_param(&sab->luma, inlink->w, inlink->h, sab->sws_flags);
+    close_filter_param(&s->luma);
+    ret = open_filter_param(&s->luma, inlink->w, inlink->h, s->sws_flags);
     if (ret < 0)
         return ret;
 
-    close_filter_param(&sab->chroma);
-    ret = open_filter_param(&sab->chroma,
-                            FF_CEIL_RSHIFT(inlink->w, sab->hsub),
-                            FF_CEIL_RSHIFT(inlink->h, sab->vsub), sab->sws_flags);
+    close_filter_param(&s->chroma);
+    ret = open_filter_param(&s->chroma,
+                            AV_CEIL_RSHIFT(inlink->w, s->hsub),
+                            AV_CEIL_RSHIFT(inlink->h, s->vsub), s->sws_flags);
     return ret;
 }
 
@@ -281,7 +281,7 @@ static void blur(uint8_t       *dst, const int dst_linesize,
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *inpic)
 {
-    SabContext  *sab = inlink->dst->priv;
+    SabContext  *s = inlink->dst->priv;
     AVFilterLink *outlink = inlink->dst->outputs[0];
     AVFrame *outpic;
 
@@ -293,12 +293,12 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpic)
     av_frame_copy_props(outpic, inpic);
 
     blur(outpic->data[0], outpic->linesize[0], inpic->data[0],  inpic->linesize[0],
-         inlink->w, inlink->h, &sab->luma);
+         inlink->w, inlink->h, &s->luma);
     if (inpic->data[2]) {
-        int cw = FF_CEIL_RSHIFT(inlink->w, sab->hsub);
-        int ch = FF_CEIL_RSHIFT(inlink->h, sab->vsub);
-        blur(outpic->data[1], outpic->linesize[1], inpic->data[1], inpic->linesize[1], cw, ch, &sab->chroma);
-        blur(outpic->data[2], outpic->linesize[2], inpic->data[2], inpic->linesize[2], cw, ch, &sab->chroma);
+        int cw = AV_CEIL_RSHIFT(inlink->w, s->hsub);
+        int ch = AV_CEIL_RSHIFT(inlink->h, s->vsub);
+        blur(outpic->data[1], outpic->linesize[1], inpic->data[1], inpic->linesize[1], cw, ch, &s->chroma);
+        blur(outpic->data[2], outpic->linesize[2], inpic->data[2], inpic->linesize[2], cw, ch, &s->chroma);
     }
 
     av_frame_free(&inpic);
diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c
index 2a3d0080..ac9d4c31 100644
--- a/libavfilter/vf_scale.c
+++ b/libavfilter/vf_scale.c
@@ -71,6 +71,13 @@ enum var_name {
     VARS_NB
 };
 
+enum EvalMode {
+    EVAL_MODE_INIT,
+    EVAL_MODE_FRAME,
+    EVAL_MODE_NB
+};
+
+
 typedef struct ScaleContext {
     const AVClass *class;
     struct SwsContext *sws;     ///< software scaler context
@@ -86,6 +93,7 @@ typedef struct ScaleContext {
     int w, h;
     char *size_str;
     unsigned int flags;         ///sws flags
+    double param[2];            // sws params
 
     int hsub, vsub;             ///< chroma subsampling
     int slice_y;                ///< top of current output slice
@@ -109,8 +117,15 @@ typedef struct ScaleContext {
     int in_v_chr_pos;
 
     int force_original_aspect_ratio;
+
+    int nb_slices;
+
+    int eval_mode;              ///< expression evaluation mode
+
 } ScaleContext;
 
+AVFilter ff_vf_scale2ref;
+
 static av_cold int init_dict(AVFilterContext *ctx, AVDictionary **opts)
 {
     ScaleContext *scale = ctx->priv;
@@ -185,11 +200,11 @@ static int query_formats(AVFilterContext *ctx)
             if ((sws_isSupportedInput(pix_fmt) ||
                  sws_isSupportedEndiannessConversion(pix_fmt))
                 && (ret = ff_add_format(&formats, pix_fmt)) < 0) {
-                ff_formats_unref(&formats);
                 return ret;
             }
         }
-        ff_formats_ref(formats, &ctx->inputs[0]->out_formats);
+        if ((ret = ff_formats_ref(formats, &ctx->inputs[0]->out_formats)) < 0)
+            return ret;
     }
     if (ctx->outputs[0]) {
         const AVPixFmtDescriptor *desc = NULL;
@@ -199,11 +214,11 @@ static int query_formats(AVFilterContext *ctx)
             if ((sws_isSupportedOutput(pix_fmt) || pix_fmt == AV_PIX_FMT_PAL8 ||
                  sws_isSupportedEndiannessConversion(pix_fmt))
                 && (ret = ff_add_format(&formats, pix_fmt)) < 0) {
-                ff_formats_unref(&formats);
                 return ret;
             }
         }
-        ff_formats_ref(formats, &ctx->outputs[0]->in_formats);
+        if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->in_formats)) < 0)
+            return ret;
     }
 
     return 0;
@@ -234,7 +249,10 @@ static const int *parse_yuv_type(const char *s, enum AVColorSpace colorspace)
 static int config_props(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    AVFilterLink *inlink = outlink->src->inputs[0];
+    AVFilterLink *inlink0 = outlink->src->inputs[0];
+    AVFilterLink *inlink  = ctx->filter == &ff_vf_scale2ref ?
+                            outlink->src->inputs[1] :
+                            outlink->src->inputs[0];
     enum AVPixelFormat outfmt = outlink->format;
     ScaleContext *scale = ctx->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
@@ -343,8 +361,11 @@ static int config_props(AVFilterLink *outlink)
     if (scale->isws[1])
         sws_freeContext(scale->isws[1]);
     scale->isws[0] = scale->isws[1] = scale->sws = NULL;
-    if (inlink->w == outlink->w && inlink->h == outlink->h &&
-        inlink->format == outlink->format)
+    if (inlink0->w == outlink->w &&
+        inlink0->h == outlink->h &&
+        !scale->out_color_matrix &&
+        scale->in_range == scale->out_range &&
+        inlink0->format == outlink->format)
         ;
     else {
         struct SwsContext **swscs[3] = {&scale->sws, &scale->isws[0], &scale->isws[1]};
@@ -356,31 +377,37 @@ static int config_props(AVFilterLink *outlink)
             if (!*s)
                 return AVERROR(ENOMEM);
 
+            av_opt_set_int(*s, "srcw", inlink0 ->w, 0);
+            av_opt_set_int(*s, "srch", inlink0 ->h >> !!i, 0);
+            av_opt_set_int(*s, "src_format", inlink0->format, 0);
+            av_opt_set_int(*s, "dstw", outlink->w, 0);
+            av_opt_set_int(*s, "dsth", outlink->h >> !!i, 0);
+            av_opt_set_int(*s, "dst_format", outfmt, 0);
+            av_opt_set_int(*s, "sws_flags", scale->flags, 0);
+            av_opt_set_int(*s, "param0", scale->param[0], 0);
+            av_opt_set_int(*s, "param1", scale->param[1], 0);
+            if (scale->in_range != AVCOL_RANGE_UNSPECIFIED)
+                av_opt_set_int(*s, "src_range",
+                               scale->in_range == AVCOL_RANGE_JPEG, 0);
+            if (scale->out_range != AVCOL_RANGE_UNSPECIFIED)
+                av_opt_set_int(*s, "dst_range",
+                               scale->out_range == AVCOL_RANGE_JPEG, 0);
+
             if (scale->opts) {
                 AVDictionaryEntry *e = NULL;
-
                 while ((e = av_dict_get(scale->opts, "", e, AV_DICT_IGNORE_SUFFIX))) {
                     if ((ret = av_opt_set(*s, e->key, e->value, 0)) < 0)
                         return ret;
                 }
             }
-
-            av_opt_set_int(*s, "srcw", inlink ->w, 0);
-            av_opt_set_int(*s, "srch", inlink ->h >> !!i, 0);
-            av_opt_set_int(*s, "src_format", inlink->format, 0);
-            av_opt_set_int(*s, "dstw", outlink->w, 0);
-            av_opt_set_int(*s, "dsth", outlink->h >> !!i, 0);
-            av_opt_set_int(*s, "dst_format", outfmt, 0);
-            av_opt_set_int(*s, "sws_flags", scale->flags, 0);
-
-            /* Override YUV420P settings to have the correct (MPEG-2) chroma positions
+            /* Override YUV420P default settings to have the correct (MPEG-2) chroma positions
              * MPEG-2 chroma positions are used by convention
              * XXX: support other 4:2:0 pixel formats */
-            if (inlink->format == AV_PIX_FMT_YUV420P) {
+            if (inlink0->format == AV_PIX_FMT_YUV420P && scale->in_v_chr_pos == -513) {
                 scale->in_v_chr_pos = (i == 0) ? 128 : (i == 1) ? 64 : 192;
             }
 
-            if (outlink->format == AV_PIX_FMT_YUV420P) {
+            if (outlink->format == AV_PIX_FMT_YUV420P && scale->out_v_chr_pos == -513) {
                 scale->out_v_chr_pos = (i == 0) ? 128 : (i == 1) ? 64 : 192;
             }
 
@@ -417,6 +444,28 @@ static int config_props(AVFilterLink *outlink)
     return ret;
 }
 
+static int config_props_ref(AVFilterLink *outlink)
+{
+    AVFilterLink *inlink = outlink->src->inputs[1];
+
+    outlink->w = inlink->w;
+    outlink->h = inlink->h;
+    outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+    outlink->time_base = inlink->time_base;
+
+    return 0;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    return ff_request_frame(outlink->src->inputs[0]);
+}
+
+static int request_frame_ref(AVFilterLink *outlink)
+{
+    return ff_request_frame(outlink->src->inputs[1]);
+}
+
 static int scale_slice(AVFilterLink *link, AVFrame *out_buf, AVFrame *cur_pic, struct SwsContext *sws, int y, int h, int mul, int field)
 {
     ScaleContext *scale = link->dst->priv;
@@ -455,17 +504,25 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
 
     if(   in->width  != link->w
        || in->height != link->h
-       || in->format != link->format) {
+       || in->format != link->format
+       || in->sample_aspect_ratio.den != link->sample_aspect_ratio.den || in->sample_aspect_ratio.num != link->sample_aspect_ratio.num) {
         int ret;
-        snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
-        av_opt_set(scale, "w", buf, 0);
-        snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
-        av_opt_set(scale, "h", buf, 0);
+
+        if (scale->eval_mode == EVAL_MODE_INIT) {
+            snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
+            av_opt_set(scale, "w", buf, 0);
+            snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
+            av_opt_set(scale, "h", buf, 0);
+        }
 
         link->dst->inputs[0]->format = in->format;
         link->dst->inputs[0]->w      = in->width;
         link->dst->inputs[0]->h      = in->height;
 
+        link->dst->inputs[0]->sample_aspect_ratio.den = in->sample_aspect_ratio.den;
+        link->dst->inputs[0]->sample_aspect_ratio.num = in->sample_aspect_ratio.num;
+
+
         if ((ret = config_props(outlink)) < 0)
             return ret;
     }
@@ -507,6 +564,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
             inv_table = parse_yuv_type(scale->in_color_matrix, av_frame_get_colorspace(in));
         if (scale->out_color_matrix)
             table     = parse_yuv_type(scale->out_color_matrix, AVCOL_SPC_UNSPECIFIED);
+        else if (scale->in_color_matrix)
+            table = inv_table;
 
         if (scale-> in_range != AVCOL_RANGE_UNSPECIFIED)
             in_full  = (scale-> in_range == AVCOL_RANGE_JPEG);
@@ -526,6 +585,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
             sws_setColorspaceDetails(scale->isws[1], inv_table, in_full,
                                      table, out_full,
                                      brightness, contrast, saturation);
+
+        av_frame_set_color_range(out, out_full ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG);
     }
 
     av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
@@ -536,6 +597,15 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     if(scale->interlaced>0 || (scale->interlaced<0 && in->interlaced_frame)){
         scale_slice(link, out, in, scale->isws[0], 0, (link->h+1)/2, 2, 0);
         scale_slice(link, out, in, scale->isws[1], 0,  link->h   /2, 2, 1);
+    }else if (scale->nb_slices) {
+        int i, slice_h, slice_start, slice_end = 0;
+        const int nb_slices = FFMIN(scale->nb_slices, link->h);
+        for (i = 0; i < nb_slices; i++) {
+            slice_start = slice_end;
+            slice_end   = (link->h * (i+1)) / nb_slices;
+            slice_h     = slice_end - slice_start;
+            scale_slice(link, out, in, scale->sws, slice_start, slice_h, 1, 0);
+        }
     }else{
         scale_slice(link, out, in, scale->sws, 0, link->h, 1, 0);
     }
@@ -544,6 +614,37 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
     return ff_filter_frame(outlink, out);
 }
 
+static int filter_frame_ref(AVFilterLink *link, AVFrame *in)
+{
+    AVFilterLink *outlink = link->dst->outputs[1];
+
+    return ff_filter_frame(outlink, in);
+}
+
+static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
+                           char *res, int res_len, int flags)
+{
+    ScaleContext *scale = ctx->priv;
+    int ret;
+
+    if (   !strcmp(cmd, "width")  || !strcmp(cmd, "w")
+        || !strcmp(cmd, "height") || !strcmp(cmd, "h")) {
+
+        int old_w = scale->w;
+        int old_h = scale->h;
+        AVFilterLink *outlink = ctx->outputs[0];
+
+        av_opt_set(scale, cmd, args, 0);
+        if ((ret = config_props(outlink)) < 0) {
+            scale->w = old_w;
+            scale->h = old_h;
+        }
+    } else
+        ret = AVERROR(ENOSYS);
+
+    return ret;
+}
+
 static const AVClass *child_class_next(const AVClass *prev)
 {
     return prev ? NULL : sws_get_class();
@@ -558,7 +659,7 @@ static const AVOption scale_options[] = {
     { "h",     "Output video height",         OFFSET(h_expr),    AV_OPT_TYPE_STRING,        .flags = FLAGS },
     { "height","Output video height",         OFFSET(h_expr),    AV_OPT_TYPE_STRING,        .flags = FLAGS },
     { "flags", "Flags to pass to libswscale", OFFSET(flags_str), AV_OPT_TYPE_STRING, { .str = "bilinear" }, .flags = FLAGS },
-    { "interl", "set interlacing", OFFSET(interlaced), AV_OPT_TYPE_INT, {.i64 = 0 }, -1, 1, FLAGS },
+    { "interl", "set interlacing", OFFSET(interlaced), AV_OPT_TYPE_BOOL, {.i64 = 0 }, -1, 1, FLAGS },
     { "size",   "set video size",          OFFSET(size_str), AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS },
     { "s",      "set video size",          OFFSET(size_str), AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS },
     {  "in_color_matrix", "set input YCbCr type",   OFFSET(in_color_matrix),  AV_OPT_TYPE_STRING, { .str = "auto" }, .flags = FLAGS },
@@ -579,6 +680,12 @@ static const AVOption scale_options[] = {
     { "disable",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, FLAGS, "force_oar" },
     { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, FLAGS, "force_oar" },
     { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, FLAGS, "force_oar" },
+    { "param0", "Scaler param 0",             OFFSET(param[0]),  AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT  }, INT_MIN, INT_MAX, FLAGS },
+    { "param1", "Scaler param 1",             OFFSET(param[1]),  AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT  }, INT_MIN, INT_MAX, FLAGS },
+    { "nb_slices", "set the number of slices (debug purpose only)", OFFSET(nb_slices), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },
+    { "eval", "specify when to evaluate expressions", OFFSET(eval_mode), AV_OPT_TYPE_INT, {.i64 = EVAL_MODE_INIT}, 0, EVAL_MODE_NB-1, FLAGS, "eval" },
+         { "init",  "eval expressions once during initialization", 0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_INIT},  .flags = FLAGS, .unit = "eval" },
+         { "frame", "eval expressions during initialization and per-frame", 0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_FRAME}, .flags = FLAGS, .unit = "eval" },
     { NULL }
 };
 
@@ -610,13 +717,66 @@ static const AVFilterPad avfilter_vf_scale_outputs[] = {
 };
 
 AVFilter ff_vf_scale = {
-    .name          = "scale",
-    .description   = NULL_IF_CONFIG_SMALL("Scale the input video size and/or convert the image format."),
-    .init_dict     = init_dict,
-    .uninit        = uninit,
-    .query_formats = query_formats,
-    .priv_size     = sizeof(ScaleContext),
-    .priv_class    = &scale_class,
-    .inputs        = avfilter_vf_scale_inputs,
-    .outputs       = avfilter_vf_scale_outputs,
+    .name            = "scale",
+    .description     = NULL_IF_CONFIG_SMALL("Scale the input video size and/or convert the image format."),
+    .init_dict       = init_dict,
+    .uninit          = uninit,
+    .query_formats   = query_formats,
+    .priv_size       = sizeof(ScaleContext),
+    .priv_class      = &scale_class,
+    .inputs          = avfilter_vf_scale_inputs,
+    .outputs         = avfilter_vf_scale_outputs,
+    .process_command = process_command,
+};
+
+static const AVClass scale2ref_class = {
+    .class_name       = "scale2ref",
+    .item_name        = av_default_item_name,
+    .option           = scale_options,
+    .version          = LIBAVUTIL_VERSION_INT,
+    .category         = AV_CLASS_CATEGORY_FILTER,
+    .child_class_next = child_class_next,
+};
+
+static const AVFilterPad avfilter_vf_scale2ref_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    {
+        .name         = "ref",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame_ref,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_vf_scale2ref_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props,
+        .request_frame= request_frame,
+    },
+    {
+        .name         = "ref",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props_ref,
+        .request_frame= request_frame_ref,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_scale2ref = {
+    .name            = "scale2ref",
+    .description     = NULL_IF_CONFIG_SMALL("Scale the input video size and/or convert the image format to the given reference."),
+    .init_dict       = init_dict,
+    .uninit          = uninit,
+    .query_formats   = query_formats,
+    .priv_size       = sizeof(ScaleContext),
+    .priv_class      = &scale2ref_class,
+    .inputs          = avfilter_vf_scale2ref_inputs,
+    .outputs         = avfilter_vf_scale2ref_outputs,
+    .process_command = process_command,
 };
diff --git a/libavfilter/vf_selectivecolor.c b/libavfilter/vf_selectivecolor.c
new file mode 100644
index 00000000..e5900949
--- /dev/null
+++ b/libavfilter/vf_selectivecolor.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2015 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @todo
+ * - use integers so it can be made bitexact and a FATE test can be added
+ * - >8 bit support
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/file.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+enum color_range {
+    // WARNING: do NOT reorder (see parse_psfile())
+    RANGE_REDS,
+    RANGE_YELLOWS,
+    RANGE_GREENS,
+    RANGE_CYANS,
+    RANGE_BLUES,
+    RANGE_MAGENTAS,
+    RANGE_WHITES,
+    RANGE_NEUTRALS,
+    RANGE_BLACKS,
+    NB_RANGES
+};
+
+enum correction_method {
+    CORRECTION_METHOD_ABSOLUTE,
+    CORRECTION_METHOD_RELATIVE,
+    NB_CORRECTION_METHODS,
+};
+
+static const char *color_names[NB_RANGES] = {
+    "red", "yellow", "green", "cyan", "blue", "magenta", "white", "neutral", "black"
+};
+
+typedef int (*get_adjust_range_func)(int r, int g, int b, int min_val, int max_val);
+
+struct process_range {
+    int range_id;
+    uint32_t mask;
+    get_adjust_range_func get_adjust_range;
+};
+
+typedef struct ThreadData {
+    AVFrame *in, *out;
+} ThreadData;
+
+typedef struct {
+    const AVClass *class;
+    int correction_method;
+    char *opt_cmyk_adjust[NB_RANGES];
+    float cmyk_adjust[NB_RANGES][4];
+    struct process_range process_ranges[NB_RANGES]; // color ranges to process
+    int nb_process_ranges;
+    char *psfile;
+} SelectiveColorContext;
+
+#define OFFSET(x) offsetof(SelectiveColorContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+#define RANGE_OPTION(color_name, range) \
+    { color_name"s", "adjust "color_name" regions", OFFSET(opt_cmyk_adjust[range]), AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX, FLAGS }
+
+static const AVOption selectivecolor_options[] = {
+    { "correction_method", "select correction method", OFFSET(correction_method), AV_OPT_TYPE_INT, {.i64 = CORRECTION_METHOD_ABSOLUTE}, 0, NB_CORRECTION_METHODS-1, FLAGS, "correction_method" },
+        { "absolute", NULL, 0, AV_OPT_TYPE_CONST, {.i64=CORRECTION_METHOD_ABSOLUTE}, INT_MIN, INT_MAX, FLAGS, "correction_method" },
+        { "relative", NULL, 0, AV_OPT_TYPE_CONST, {.i64=CORRECTION_METHOD_RELATIVE}, INT_MIN, INT_MAX, FLAGS, "correction_method" },
+    RANGE_OPTION("red",     RANGE_REDS),
+    RANGE_OPTION("yellow",  RANGE_YELLOWS),
+    RANGE_OPTION("green",   RANGE_GREENS),
+    RANGE_OPTION("cyan",    RANGE_CYANS),
+    RANGE_OPTION("blue",    RANGE_BLUES),
+    RANGE_OPTION("magenta", RANGE_MAGENTAS),
+    RANGE_OPTION("white",   RANGE_WHITES),
+    RANGE_OPTION("neutral", RANGE_NEUTRALS),
+    RANGE_OPTION("black",   RANGE_BLACKS),
+    { "psfile", "set Photoshop selectivecolor file name", OFFSET(psfile), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(selectivecolor);
+
+static inline int get_mid_val(int r, int g, int b)
+{
+    if ((r < g && r > b) || (r < b && r > g)) return r;
+    if ((g < r && g > b) || (g < b && g > r)) return g;
+    if ((b < r && b > g) || (b < g && b > r)) return b;
+    return -1;
+}
+
+static int get_rgb_adjust_range(int r, int g, int b, int min_val, int max_val)
+{
+    // max - mid
+    const int mid_val = get_mid_val(r, g, b);
+    if (mid_val == -1) {
+        // XXX: can be simplified
+        if ((r != min_val && g == min_val && b == min_val) ||
+            (r == min_val && g != min_val && b == min_val) ||
+            (r == min_val && g == min_val && b != min_val))
+            return max_val - min_val;
+        return 0;
+    }
+    return max_val - mid_val;
+}
+
+static int get_cmy_adjust_range(int r, int g, int b, int min_val, int max_val)
+{
+    // mid - min
+    const int mid_val = get_mid_val(r, g, b);
+    if (mid_val == -1) {
+        // XXX: refactor with rgb
+        if ((r != max_val && g == max_val && b == max_val) ||
+            (r == max_val && g != max_val && b == max_val) ||
+            (r == max_val && g == max_val && b != max_val))
+            return max_val - min_val;
+        return 0;
+    }
+    return mid_val - min_val;
+}
+
+static int get_neutrals_adjust_range(int r, int g, int b, int min_val, int max_val)
+{
+    // 1 - (|max-0.5| + |min-0.5|)
+    return (255*2 - (abs((max_val<<1) - 255) + abs((min_val<<1) - 255)) + 1) >> 1;
+}
+
+static int get_whites_adjust_range(int r, int g, int b, int min_val, int max_val)
+{
+    // (min - 0.5) * 2
+    return (min_val<<1) - 255;
+}
+
+static int get_blacks_adjust_range(int r, int g, int b, int min_val, int max_val)
+{
+    // (0.5 - max) * 2
+    return 255 - (max_val<<1);
+}
+
+static int register_range(SelectiveColorContext *s, int range_id)
+{
+    const float *cmyk = s->cmyk_adjust[range_id];
+
+    /* If the color range has user settings, register the color range
+     * as "to be processed" */
+    if (cmyk[0] || cmyk[1] || cmyk[2] || cmyk[3]) {
+        struct process_range *pr = &s->process_ranges[s->nb_process_ranges++];
+
+        if (cmyk[0] < -1.0 || cmyk[0] > 1.0 ||
+            cmyk[1] < -1.0 || cmyk[1] > 1.0 ||
+            cmyk[2] < -1.0 || cmyk[2] > 1.0 ||
+            cmyk[3] < -1.0 || cmyk[3] > 1.0) {
+            av_log(s, AV_LOG_ERROR, "Invalid %s adjustments (%g %g %g %g). "
+                   "Settings must be set in [-1;1] range\n",
+                   color_names[range_id], cmyk[0], cmyk[1], cmyk[2], cmyk[3]);
+            return AVERROR(EINVAL);
+        }
+
+        pr->range_id = range_id;
+        pr->mask = 1 << range_id;
+        if      (pr->mask & (1<<RANGE_REDS  | 1<<RANGE_GREENS   | 1<<RANGE_BLUES))   pr->get_adjust_range = get_rgb_adjust_range;
+        else if (pr->mask & (1<<RANGE_CYANS | 1<<RANGE_MAGENTAS | 1<<RANGE_YELLOWS)) pr->get_adjust_range = get_cmy_adjust_range;
+        else if (pr->mask & 1<<RANGE_WHITES)                                         pr->get_adjust_range = get_whites_adjust_range;
+        else if (pr->mask & 1<<RANGE_NEUTRALS)                                       pr->get_adjust_range = get_neutrals_adjust_range;
+        else if (pr->mask & 1<<RANGE_BLACKS)                                         pr->get_adjust_range = get_blacks_adjust_range;
+        else
+            av_assert0(0);
+    }
+    return 0;
+}
+
+static int parse_psfile(AVFilterContext *ctx, const char *fname)
+{
+    int16_t val;
+    int ret, i, version;
+    uint8_t *buf;
+    size_t size;
+    SelectiveColorContext *s = ctx->priv;
+
+    ret = av_file_map(fname, &buf, &size, 0, NULL);
+    if (ret < 0)
+        return ret;
+
+#define READ16(dst) do {                \
+    if (size < 2) {                     \
+        ret = AVERROR_INVALIDDATA;      \
+        goto end;                       \
+    }                                   \
+    dst = AV_RB16(buf);                 \
+    buf  += 2;                          \
+    size -= 2;                          \
+} while (0)
+
+    READ16(version);
+    if (version != 1)
+        av_log(s, AV_LOG_WARNING, "Unsupported selective color file version %d, "
+               "the settings might not be loaded properly\n", version);
+
+    READ16(s->correction_method);
+
+    // 1st CMYK entry is reserved/unused
+    for (i = 0; i < FF_ARRAY_ELEMS(s->cmyk_adjust[0]); i++) {
+        READ16(val);
+        if (val)
+            av_log(s, AV_LOG_WARNING, "%c value of first CMYK entry is not 0 "
+                   "but %d\n", "CMYK"[i], val);
+    }
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->cmyk_adjust); i++) {
+        int k;
+        for (k = 0; k < FF_ARRAY_ELEMS(s->cmyk_adjust[0]); k++) {
+            READ16(val);
+            s->cmyk_adjust[i][k] = val / 100.;
+        }
+        ret = register_range(s, i);
+        if (ret < 0)
+            goto end;
+    }
+
+end:
+    av_file_unmap(buf, size);
+    return ret;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    int i, ret;
+    SelectiveColorContext *s = ctx->priv;
+
+    /* If the following conditions are not met, it will cause trouble while
+     * parsing the PS file */
+    av_assert0(FF_ARRAY_ELEMS(s->cmyk_adjust) == 10 - 1);
+    av_assert0(FF_ARRAY_ELEMS(s->cmyk_adjust[0]) == 4);
+
+    if (s->psfile) {
+        ret = parse_psfile(ctx, s->psfile);
+        if (ret < 0)
+            return ret;
+    } else {
+        for (i = 0; i < FF_ARRAY_ELEMS(s->opt_cmyk_adjust); i++) {
+            const char *opt_cmyk_adjust = s->opt_cmyk_adjust[i];
+
+            if (opt_cmyk_adjust) {
+                float *cmyk = s->cmyk_adjust[i];
+
+                sscanf(s->opt_cmyk_adjust[i], "%f %f %f %f", cmyk, cmyk+1, cmyk+2, cmyk+3);
+                ret = register_range(s, i);
+                if (ret < 0)
+                    return ret;
+            }
+        }
+    }
+
+    av_log(s, AV_LOG_VERBOSE, "Adjustments:%s\n", s->nb_process_ranges ? "" : " none");
+    for (i = 0; i < s->nb_process_ranges; i++) {
+        const struct process_range *pr = &s->process_ranges[i];
+        const float *cmyk = s->cmyk_adjust[pr->range_id];
+
+        av_log(s, AV_LOG_VERBOSE, "%8ss: C=%6g M=%6g Y=%6g K=%6g\n",
+               color_names[pr->range_id], cmyk[0], cmyk[1], cmyk[2], cmyk[3]);
+    }
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {AV_PIX_FMT_RGB32, AV_PIX_FMT_0RGB32, AV_PIX_FMT_NONE};
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static inline int comp_adjust(int adjust_range, float value, float adjust, float k, int correction_method)
+{
+    const float min = -value;
+    const float max = 1. - value;
+    float res = (-1. - adjust) * k - adjust;
+    if (correction_method == CORRECTION_METHOD_RELATIVE)
+        res *= max;
+    return lrint(av_clipf(res, min, max) * adjust_range);
+}
+
+static inline int selective_color(AVFilterContext *ctx, ThreadData *td,
+                                  int jobnr, int nb_jobs, int direct, int correction_method)
+{
+    int i, x, y;
+    const AVFrame *in = td->in;
+    AVFrame *out = td->out;
+    const SelectiveColorContext *s = ctx->priv;
+    const int height = in->height;
+    const int width  = in->width;
+    const int slice_start = (height *  jobnr   ) / nb_jobs;
+    const int slice_end   = (height * (jobnr+1)) / nb_jobs;
+    const int dst_linesize = out->linesize[0];
+    const int src_linesize =  in->linesize[0];
+    uint8_t       *dst = out->data[0] + slice_start * dst_linesize;
+    const uint8_t *src =  in->data[0] + slice_start * src_linesize;
+
+    for (y = slice_start; y < slice_end; y++) {
+        const uint32_t *src32 = (const uint32_t *)src;
+        uint32_t       *dst32 = (uint32_t *)dst;
+
+        for (x = 0; x < width; x++) {
+            const uint32_t color = *src32++;
+            const int r = color >> 16 & 0xff;
+            const int g = color >>  8 & 0xff;
+            const int b = color       & 0xff;
+            const int min_color = FFMIN3(r, g, b);
+            const int max_color = FFMAX3(r, g, b);
+            const uint32_t range_flag = (r == max_color) << RANGE_REDS
+                                      | (r == min_color) << RANGE_CYANS
+                                      | (g == max_color) << RANGE_GREENS
+                                      | (g == min_color) << RANGE_MAGENTAS
+                                      | (b == max_color) << RANGE_BLUES
+                                      | (b == min_color) << RANGE_YELLOWS
+                                      | (r > 128 && g > 128 && b > 128) << RANGE_WHITES
+                                      | (color && (color & 0xffffff) != 0xffffff) << RANGE_NEUTRALS
+                                      | (r < 128 && g < 128 && b < 128) << RANGE_BLACKS;
+
+            const float rnorm = r / 255.;
+            const float gnorm = g / 255.;
+            const float bnorm = b / 255.;
+            int adjust_r = 0, adjust_g = 0, adjust_b = 0;
+
+            for (i = 0; i < s->nb_process_ranges; i++) {
+                const struct process_range *pr = &s->process_ranges[i];
+
+                if (range_flag & pr->mask) {
+                    const int adjust_range = pr->get_adjust_range(r, g, b, min_color, max_color);
+
+                    if (adjust_range > 0) {
+                        const float *cmyk_adjust = s->cmyk_adjust[pr->range_id];
+                        const float adj_c = cmyk_adjust[0];
+                        const float adj_m = cmyk_adjust[1];
+                        const float adj_y = cmyk_adjust[2];
+                        const float k = cmyk_adjust[3];
+
+                        adjust_r += comp_adjust(adjust_range, rnorm, adj_c, k, correction_method);
+                        adjust_g += comp_adjust(adjust_range, gnorm, adj_m, k, correction_method);
+                        adjust_b += comp_adjust(adjust_range, bnorm, adj_y, k, correction_method);
+                    }
+                }
+            }
+
+            if (!direct || adjust_r || adjust_g || adjust_b)
+                *dst32 = (color & 0xff000000)
+                       | av_clip_uint8(r + adjust_r) << 16
+                       | av_clip_uint8(g + adjust_g) <<  8
+                       | av_clip_uint8(b + adjust_b);
+            dst32++;
+        }
+        src += src_linesize;
+        dst += dst_linesize;
+    }
+    return 0;
+}
+
+#define DEF_SELECTIVE_COLOR_FUNC(name, direct, correction_method)                           \
+static int selective_color_##name(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)  \
+{                                                                                           \
+    return selective_color(ctx, arg, jobnr, nb_jobs, direct, correction_method);            \
+}
+
+DEF_SELECTIVE_COLOR_FUNC(indirect_absolute, 0, CORRECTION_METHOD_ABSOLUTE)
+DEF_SELECTIVE_COLOR_FUNC(indirect_relative, 0, CORRECTION_METHOD_RELATIVE)
+DEF_SELECTIVE_COLOR_FUNC(  direct_absolute, 1, CORRECTION_METHOD_ABSOLUTE)
+DEF_SELECTIVE_COLOR_FUNC(  direct_relative, 1, CORRECTION_METHOD_RELATIVE)
+
+typedef int (*selective_color_func_type)(AVFilterContext *ctx, void *td, int jobnr, int nb_jobs);
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    int direct;
+    AVFrame *out;
+    ThreadData td;
+    const SelectiveColorContext *s = ctx->priv;
+    static const selective_color_func_type funcs[2][2] = {
+        {selective_color_indirect_absolute, selective_color_indirect_relative},
+        {selective_color_direct_absolute,   selective_color_direct_relative},
+    };
+
+    if (av_frame_is_writable(in)) {
+        direct = 1;
+        out = in;
+    } else {
+        direct = 0;
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out) {
+            av_frame_free(&in);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, in);
+    }
+
+    td.in = in;
+    td.out = out;
+    ctx->internal->execute(ctx, funcs[direct][s->correction_method], &td, NULL,
+                           FFMIN(inlink->h, ctx->graph->nb_threads));
+
+    if (!direct)
+        av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static const AVFilterPad selectivecolor_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad selectivecolor_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_selectivecolor = {
+    .name          = "selectivecolor",
+    .description   = NULL_IF_CONFIG_SMALL("Apply CMYK adjustments to specific color ranges."),
+    .priv_size     = sizeof(SelectiveColorContext),
+    .init          = init,
+    .query_formats = query_formats,
+    .inputs        = selectivecolor_inputs,
+    .outputs       = selectivecolor_outputs,
+    .priv_class    = &selectivecolor_class,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
+};
diff --git a/libavfilter/vf_separatefields.c b/libavfilter/vf_separatefields.c
index 42ce6829..3ea5eb67 100644
--- a/libavfilter/vf_separatefields.c
+++ b/libavfilter/vf_separatefields.c
@@ -30,10 +30,10 @@ typedef struct {
 static int config_props_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    SeparateFieldsContext *sf = ctx->priv;
+    SeparateFieldsContext *s = ctx->priv;
     AVFilterLink *inlink = ctx->inputs[0];
 
-    sf->nb_planes = av_pix_fmt_count_planes(inlink->format);
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
 
     if (inlink->h & 1) {
         av_log(ctx, AV_LOG_ERROR, "height must be even\n");
@@ -64,19 +64,19 @@ static void extract_field(AVFrame *frame, int nb_planes, int type)
 static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
 {
     AVFilterContext *ctx = inlink->dst;
-    SeparateFieldsContext *sf = ctx->priv;
+    SeparateFieldsContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     int ret;
 
     inpicref->height = outlink->h;
     inpicref->interlaced_frame = 0;
 
-    if (!sf->second) {
+    if (!s->second) {
         goto clone;
     } else {
-        AVFrame *second = sf->second;
+        AVFrame *second = s->second;
 
-        extract_field(second, sf->nb_planes, second->top_field_first);
+        extract_field(second, s->nb_planes, second->top_field_first);
 
         if (second->pts != AV_NOPTS_VALUE &&
             inpicref->pts != AV_NOPTS_VALUE)
@@ -88,12 +88,12 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
         if (ret < 0)
             return ret;
 clone:
-        sf->second = av_frame_clone(inpicref);
-        if (!sf->second)
+        s->second = av_frame_clone(inpicref);
+        if (!s->second)
             return AVERROR(ENOMEM);
     }
 
-    extract_field(inpicref, sf->nb_planes, !inpicref->top_field_first);
+    extract_field(inpicref, s->nb_planes, !inpicref->top_field_first);
 
     if (inpicref->pts != AV_NOPTS_VALUE)
         inpicref->pts *= 2;
@@ -104,15 +104,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
 static int request_frame(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    SeparateFieldsContext *sf = ctx->priv;
+    SeparateFieldsContext *s = ctx->priv;
     int ret;
 
     ret = ff_request_frame(ctx->inputs[0]);
-    if (ret == AVERROR_EOF && sf->second) {
-        sf->second->pts *= 2;
-        extract_field(sf->second, sf->nb_planes, sf->second->top_field_first);
-        ret = ff_filter_frame(outlink, sf->second);
-        sf->second = 0;
+    if (ret == AVERROR_EOF && s->second) {
+        s->second->pts *= 2;
+        extract_field(s->second, s->nb_planes, s->second->top_field_first);
+        ret = ff_filter_frame(outlink, s->second);
+        s->second = 0;
     }
 
     return ret;
diff --git a/libavfilter/vf_showinfo.c b/libavfilter/vf_showinfo.c
index 51259447..5146995c 100644
--- a/libavfilter/vf_showinfo.c
+++ b/libavfilter/vf_showinfo.c
@@ -87,7 +87,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 
     for (plane = 0; plane < 4 && frame->data[plane] && frame->linesize[plane]; plane++) {
         uint8_t *data = frame->data[plane];
-        int h = plane == 1 || plane == 2 ? FF_CEIL_RSHIFT(inlink->h, vsub) : inlink->h;
+        int h = plane == 1 || plane == 2 ? AV_CEIL_RSHIFT(inlink->h, vsub) : inlink->h;
         int linesize = av_image_get_linesize(frame->format, frame->width, plane);
 
         if (linesize < 0)
@@ -166,10 +166,9 @@ static int config_props(AVFilterContext *ctx, AVFilterLink *link, int is_out)
 {
 
     av_log(ctx, AV_LOG_INFO, "config %s time_base: %d/%d, frame_rate: %d/%d\n",
-           is_out ? "out" :"in",
+           is_out ? "out" : "in",
            link->time_base.num, link->time_base.den,
-           link->frame_rate.num, link->frame_rate.den
-    );
+           link->frame_rate.num, link->frame_rate.den);
 
     return 0;
 }
@@ -188,10 +187,10 @@ static int config_props_out(AVFilterLink *link)
 
 static const AVFilterPad avfilter_vf_showinfo_inputs[] = {
     {
-        .name         = "default",
-        .type         = AVMEDIA_TYPE_VIDEO,
-        .filter_frame = filter_frame,
-        .config_props  = config_props_in,
+        .name             = "default",
+        .type             = AVMEDIA_TYPE_VIDEO,
+        .filter_frame     = filter_frame,
+        .config_props     = config_props_in,
     },
     { NULL }
 };
diff --git a/libavfilter/vf_showpalette.c b/libavfilter/vf_showpalette.c
index e4d59b62..f1627ba5 100644
--- a/libavfilter/vf_showpalette.c
+++ b/libavfilter/vf_showpalette.c
@@ -46,16 +46,26 @@ static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat in_fmts[]  = {AV_PIX_FMT_PAL8,  AV_PIX_FMT_NONE};
     static const enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE};
+    int ret;
     AVFilterFormats *in  = ff_make_format_list(in_fmts);
     AVFilterFormats *out = ff_make_format_list(out_fmts);
     if (!in || !out) {
-        av_freep(&in);
-        av_freep(&out);
-        return AVERROR(ENOMEM);
+        ret = AVERROR(ENOMEM);
+        goto fail;
     }
-    ff_formats_ref(in,  &ctx->inputs[0]->out_formats);
-    ff_formats_ref(out, &ctx->outputs[0]->in_formats);
+
+    if ((ret = ff_formats_ref(in , &ctx->inputs[0]->out_formats)) < 0 ||
+        (ret = ff_formats_ref(out, &ctx->outputs[0]->in_formats)) < 0)
+        goto fail;
     return 0;
+fail:
+    if (in)
+        av_freep(&in->formats);
+    av_freep(&in);
+    if (out)
+        av_freep(&out->formats);
+    av_freep(&out);
+    return ret;
 }
 
 static int config_output(AVFilterLink *outlink)
@@ -120,7 +130,7 @@ static const AVFilterPad showpalette_outputs[] = {
 
 AVFilter ff_vf_showpalette = {
     .name          = "showpalette",
-    .description   = NULL_IF_CONFIG_SMALL("Display frame palette"),
+    .description   = NULL_IF_CONFIG_SMALL("Display frame palette."),
     .priv_size     = sizeof(ShowPaletteContext),
     .query_formats = query_formats,
     .inputs        = showpalette_inputs,
diff --git a/libavfilter/vf_shuffleframes.c b/libavfilter/vf_shuffleframes.c
new file mode 100644
index 00000000..f49c9c60
--- /dev/null
+++ b/libavfilter/vf_shuffleframes.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct ShuffleFramesContext {
+    const AVClass *class;
+    char *mapping;
+    AVFrame **frames;
+    int *map;
+    int64_t *pts;
+    int in_frames;
+    int nb_frames;
+} ShuffleFramesContext;
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    ShuffleFramesContext *s = ctx->priv;
+    char *mapping, *saveptr = NULL, *p;
+    int n, nb_items;
+
+    nb_items = 1;
+    for (p = s->mapping; *p; p++) {
+        if (*p == '|' || *p == ' ')
+            nb_items++;
+    }
+
+    s->frames = av_calloc(nb_items, sizeof(*s->frames));
+    s->map    = av_calloc(nb_items, sizeof(*s->map));
+    s->pts    = av_calloc(nb_items, sizeof(*s->pts));
+    if (!s->map || !s->frames || !s->pts) {
+        return AVERROR(ENOMEM);
+    }
+
+    mapping = av_strdup(s->mapping);
+    if (!mapping)
+        return AVERROR(ENOMEM);
+
+    for (n = 0; n < nb_items; n++) {
+        char *map = av_strtok(n == 0 ? mapping : NULL, " |", &saveptr);
+        if (!map || sscanf(map, "%d", &s->map[n]) != 1) {
+            av_free(mapping);
+            return AVERROR(EINVAL);
+        }
+
+        if (s->map[n] < 0 || s->map[n] >= nb_items) {
+            av_log(ctx, AV_LOG_ERROR, "Index out of range.\n");
+            av_free(mapping);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    s->nb_frames = nb_items;
+    av_free(mapping);
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext    *ctx = inlink->dst;
+    ShuffleFramesContext *s = ctx->priv;
+    int ret;
+
+    if (s->in_frames < s->nb_frames) {
+        s->frames[s->in_frames] = frame;
+        s->pts[s->in_frames] = frame->pts;
+        s->in_frames++;
+        ret = 0;
+    } else if (s->in_frames == s->nb_frames) {
+        int n, x;
+
+        for (n = 0; n < s->nb_frames; n++) {
+            AVFrame *out;
+
+            x = s->map[n];
+            out = av_frame_clone(s->frames[x]);
+            if (!out)
+                return AVERROR(ENOMEM);
+            out->pts = s->pts[n];
+            ret = ff_filter_frame(ctx->outputs[0], out);
+            s->in_frames--;
+        }
+
+        for (n = 0; n < s->nb_frames; n++)
+            av_frame_free(&s->frames[n]);
+    } else
+        av_assert0(0);
+
+    return ret;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    ShuffleFramesContext *s = ctx->priv;
+
+    av_freep(&s->frames);
+    av_freep(&s->map);
+    av_freep(&s->pts);
+}
+
+#define OFFSET(x) offsetof(ShuffleFramesContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption shuffleframes_options[] = {
+    { "mapping", "set destination indexes of input frames",  OFFSET(mapping), AV_OPT_TYPE_STRING, {.str="0"}, 0, 0, FLAGS },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(shuffleframes);
+
+static const AVFilterPad shuffleframes_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL },
+};
+
+static const AVFilterPad shuffleframes_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL },
+};
+
+AVFilter ff_vf_shuffleframes = {
+    .name          = "shuffleframes",
+    .description   = NULL_IF_CONFIG_SMALL("Shuffle video frames."),
+    .priv_size     = sizeof(ShuffleFramesContext),
+    .priv_class    = &shuffleframes_class,
+    .init          = init,
+    .uninit        = uninit,
+    .inputs        = shuffleframes_inputs,
+    .outputs       = shuffleframes_outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+};
diff --git a/libavfilter/vf_shuffleplanes.c b/libavfilter/vf_shuffleplanes.c
index 80085cd7..4bc7b79f 100644
--- a/libavfilter/vf_shuffleplanes.c
+++ b/libavfilter/vf_shuffleplanes.c
@@ -134,12 +134,7 @@ static const AVOption shuffleplanes_options[] = {
     { NULL },
 };
 
-static const AVClass shuffleplanes_class = {
-    .class_name = "shuffleplanes",
-    .item_name  = av_default_item_name,
-    .option     = shuffleplanes_options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
+AVFILTER_DEFINE_CLASS(shuffleplanes);
 
 static const AVFilterPad shuffleplanes_inputs[] = {
     {
@@ -162,7 +157,7 @@ static const AVFilterPad shuffleplanes_outputs[] = {
 
 AVFilter ff_vf_shuffleplanes = {
     .name         = "shuffleplanes",
-    .description  = NULL_IF_CONFIG_SMALL("Shuffle video planes"),
+    .description  = NULL_IF_CONFIG_SMALL("Shuffle video planes."),
 
     .priv_size    = sizeof(ShufflePlanesContext),
     .priv_class   = &shuffleplanes_class,
diff --git a/libavfilter/vf_signalstats.c b/libavfilter/vf_signalstats.c
index 88b715e6..c340d6b4 100644
--- a/libavfilter/vf_signalstats.c
+++ b/libavfilter/vf_signalstats.c
@@ -154,8 +154,8 @@ static int config_props(AVFilterLink *outlink)
     outlink->w = inlink->w;
     outlink->h = inlink->h;
 
-    s->chromaw = FF_CEIL_RSHIFT(inlink->w, s->hsub);
-    s->chromah = FF_CEIL_RSHIFT(inlink->h, s->vsub);
+    s->chromaw = AV_CEIL_RSHIFT(inlink->w, s->hsub);
+    s->chromah = AV_CEIL_RSHIFT(inlink->h, s->vsub);
 
     s->fs = inlink->w * inlink->h;
     s->cfs = s->chromaw * s->chromah;
diff --git a/libavfilter/vf_smartblur.c b/libavfilter/vf_smartblur.c
index 169f540b..117e0ec6 100644
--- a/libavfilter/vf_smartblur.c
+++ b/libavfilter/vf_smartblur.c
@@ -84,34 +84,34 @@ AVFILTER_DEFINE_CLASS(smartblur);
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    SmartblurContext *sblur = ctx->priv;
+    SmartblurContext *s = ctx->priv;
 
     /* make chroma default to luma values, if not explicitly set */
-    if (sblur->chroma.radius < RADIUS_MIN)
-        sblur->chroma.radius = sblur->luma.radius;
-    if (sblur->chroma.strength < STRENGTH_MIN)
-        sblur->chroma.strength  = sblur->luma.strength;
-    if (sblur->chroma.threshold < THRESHOLD_MIN)
-        sblur->chroma.threshold = sblur->luma.threshold;
+    if (s->chroma.radius < RADIUS_MIN)
+        s->chroma.radius = s->luma.radius;
+    if (s->chroma.strength < STRENGTH_MIN)
+        s->chroma.strength  = s->luma.strength;
+    if (s->chroma.threshold < THRESHOLD_MIN)
+        s->chroma.threshold = s->luma.threshold;
 
-    sblur->luma.quality = sblur->chroma.quality = 3.0;
-    sblur->sws_flags = SWS_BICUBIC;
+    s->luma.quality = s->chroma.quality = 3.0;
+    s->sws_flags = SWS_BICUBIC;
 
     av_log(ctx, AV_LOG_VERBOSE,
            "luma_radius:%f luma_strength:%f luma_threshold:%d "
            "chroma_radius:%f chroma_strength:%f chroma_threshold:%d\n",
-           sblur->luma.radius, sblur->luma.strength, sblur->luma.threshold,
-           sblur->chroma.radius, sblur->chroma.strength, sblur->chroma.threshold);
+           s->luma.radius, s->luma.strength, s->luma.threshold,
+           s->chroma.radius, s->chroma.strength, s->chroma.threshold);
 
     return 0;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    SmartblurContext *sblur = ctx->priv;
+    SmartblurContext *s = ctx->priv;
 
-    sws_freeContext(sblur->luma.filter_context);
-    sws_freeContext(sblur->chroma.filter_context);
+    sws_freeContext(s->luma.filter_context);
+    sws_freeContext(s->chroma.filter_context);
 }
 
 static int query_formats(AVFilterContext *ctx)
@@ -159,17 +159,17 @@ static int alloc_sws_context(FilterParam *f, int width, int height, unsigned int
 
 static int config_props(AVFilterLink *inlink)
 {
-    SmartblurContext *sblur = inlink->dst->priv;
+    SmartblurContext *s = inlink->dst->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 
-    sblur->hsub = desc->log2_chroma_w;
-    sblur->vsub = desc->log2_chroma_h;
+    s->hsub = desc->log2_chroma_w;
+    s->vsub = desc->log2_chroma_h;
 
-    alloc_sws_context(&sblur->luma, inlink->w, inlink->h, sblur->sws_flags);
-    alloc_sws_context(&sblur->chroma,
-                      FF_CEIL_RSHIFT(inlink->w, sblur->hsub),
-                      FF_CEIL_RSHIFT(inlink->h, sblur->vsub),
-                      sblur->sws_flags);
+    alloc_sws_context(&s->luma, inlink->w, inlink->h, s->sws_flags);
+    alloc_sws_context(&s->chroma,
+                      AV_CEIL_RSHIFT(inlink->w, s->hsub),
+                      AV_CEIL_RSHIFT(inlink->h, s->vsub),
+                      s->sws_flags);
 
     return 0;
 }
@@ -240,11 +240,11 @@ static void blur(uint8_t       *dst, const int dst_linesize,
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *inpic)
 {
-    SmartblurContext  *sblur  = inlink->dst->priv;
+    SmartblurContext  *s  = inlink->dst->priv;
     AVFilterLink *outlink     = inlink->dst->outputs[0];
     AVFrame *outpic;
-    int cw = FF_CEIL_RSHIFT(inlink->w, sblur->hsub);
-    int ch = FF_CEIL_RSHIFT(inlink->h, sblur->vsub);
+    int cw = AV_CEIL_RSHIFT(inlink->w, s->hsub);
+    int ch = AV_CEIL_RSHIFT(inlink->h, s->vsub);
 
     outpic = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     if (!outpic) {
@@ -255,18 +255,18 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpic)
 
     blur(outpic->data[0], outpic->linesize[0],
          inpic->data[0],  inpic->linesize[0],
-         inlink->w, inlink->h, sblur->luma.threshold,
-         sblur->luma.filter_context);
+         inlink->w, inlink->h, s->luma.threshold,
+         s->luma.filter_context);
 
     if (inpic->data[2]) {
         blur(outpic->data[1], outpic->linesize[1],
              inpic->data[1],  inpic->linesize[1],
-             cw, ch, sblur->chroma.threshold,
-             sblur->chroma.filter_context);
+             cw, ch, s->chroma.threshold,
+             s->chroma.filter_context);
         blur(outpic->data[2], outpic->linesize[2],
              inpic->data[2],  inpic->linesize[2],
-             cw, ch, sblur->chroma.threshold,
-             sblur->chroma.filter_context);
+             cw, ch, s->chroma.threshold,
+             s->chroma.filter_context);
     }
 
     av_frame_free(&inpic);
diff --git a/libavfilter/vf_spp.c b/libavfilter/vf_spp.c
index b75f5f39..fe579ced 100644
--- a/libavfilter/vf_spp.c
+++ b/libavfilter/vf_spp.c
@@ -63,7 +63,7 @@ static const AVOption spp_options[] = {
     { "mode", "set thresholding mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_HARD}, 0, NB_MODES - 1, FLAGS, "mode" },
         { "hard", "hard thresholding", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_HARD}, INT_MIN, INT_MAX, FLAGS, "mode" },
         { "soft", "soft thresholding", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_SOFT}, INT_MIN, INT_MAX, FLAGS, "mode" },
-    { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS },
+    { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
     { NULL }
 };
 
@@ -328,24 +328,24 @@ static int query_formats(AVFilterContext *ctx)
 
 static int config_input(AVFilterLink *inlink)
 {
-    SPPContext *spp = inlink->dst->priv;
+    SPPContext *s = inlink->dst->priv;
     const int h = FFALIGN(inlink->h + 16, 16);
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
-    const int bps = desc->comp[0].depth_minus1 + 1;
+    const int bps = desc->comp[0].depth;
 
-    av_opt_set_int(spp->dct, "bits_per_sample", bps, 0);
-    avcodec_dct_init(spp->dct);
+    av_opt_set_int(s->dct, "bits_per_sample", bps, 0);
+    avcodec_dct_init(s->dct);
 
     if (ARCH_X86)
-        ff_spp_init_x86(spp);
+        ff_spp_init_x86(s);
 
-    spp->hsub = desc->log2_chroma_w;
-    spp->vsub = desc->log2_chroma_h;
-    spp->temp_linesize = FFALIGN(inlink->w + 16, 16);
-    spp->temp = av_malloc_array(spp->temp_linesize, h * sizeof(*spp->temp));
-    spp->src  = av_malloc_array(spp->temp_linesize, h * sizeof(*spp->src) * 2);
+    s->hsub = desc->log2_chroma_w;
+    s->vsub = desc->log2_chroma_h;
+    s->temp_linesize = FFALIGN(inlink->w + 16, 16);
+    s->temp = av_malloc_array(s->temp_linesize, h * sizeof(*s->temp));
+    s->src  = av_malloc_array(s->temp_linesize, h * sizeof(*s->src) * 2);
 
-    if (!spp->temp || !spp->src)
+    if (!s->temp || !s->src)
         return AVERROR(ENOMEM);
     return 0;
 }
@@ -353,55 +353,55 @@ static int config_input(AVFilterLink *inlink)
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
-    SPPContext *spp = ctx->priv;
+    SPPContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out = in;
     int qp_stride = 0;
     const int8_t *qp_table = NULL;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
-    const int depth = desc->comp[0].depth_minus1 + 1;
+    const int depth = desc->comp[0].depth;
 
     /* if we are not in a constant user quantizer mode and we don't want to use
      * the quantizers from the B-frames (B-frames often have a higher QP), we
      * need to save the qp table from the last non B-frame; this is what the
      * following code block does */
-    if (!spp->qp) {
-        qp_table = av_frame_get_qp_table(in, &qp_stride, &spp->qscale_type);
+    if (!s->qp) {
+        qp_table = av_frame_get_qp_table(in, &qp_stride, &s->qscale_type);
 
-        if (qp_table && !spp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
+        if (qp_table && !s->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
             int w, h;
 
             /* if the qp stride is not set, it means the QP are only defined on
              * a line basis */
             if (!qp_stride) {
-                w = FF_CEIL_RSHIFT(inlink->w, 4);
+                w = AV_CEIL_RSHIFT(inlink->w, 4);
                 h = 1;
             } else {
                 w = qp_stride;
-                h = FF_CEIL_RSHIFT(inlink->h, 4);
+                h = AV_CEIL_RSHIFT(inlink->h, 4);
             }
 
-            if (w * h > spp->non_b_qp_alloc_size) {
-                int ret = av_reallocp_array(&spp->non_b_qp_table, w, h);
+            if (w * h > s->non_b_qp_alloc_size) {
+                int ret = av_reallocp_array(&s->non_b_qp_table, w, h);
                 if (ret < 0) {
-                    spp->non_b_qp_alloc_size = 0;
+                    s->non_b_qp_alloc_size = 0;
                     return ret;
                 }
-                spp->non_b_qp_alloc_size = w * h;
+                s->non_b_qp_alloc_size = w * h;
             }
 
-            av_assert0(w * h <= spp->non_b_qp_alloc_size);
-            memcpy(spp->non_b_qp_table, qp_table, w * h);
+            av_assert0(w * h <= s->non_b_qp_alloc_size);
+            memcpy(s->non_b_qp_table, qp_table, w * h);
         }
     }
 
-    if (spp->log2_count && !ctx->is_disabled) {
-        if (!spp->use_bframe_qp && spp->non_b_qp_table)
-            qp_table = spp->non_b_qp_table;
+    if (s->log2_count && !ctx->is_disabled) {
+        if (!s->use_bframe_qp && s->non_b_qp_table)
+            qp_table = s->non_b_qp_table;
 
-        if (qp_table || spp->qp) {
-            const int cw = FF_CEIL_RSHIFT(inlink->w, spp->hsub);
-            const int ch = FF_CEIL_RSHIFT(inlink->h, spp->vsub);
+        if (qp_table || s->qp) {
+            const int cw = AV_CEIL_RSHIFT(inlink->w, s->hsub);
+            const int ch = AV_CEIL_RSHIFT(inlink->h, s->vsub);
 
             /* get a new frame if in-place is not possible or if the dimensions
              * are not multiple of 8 */
@@ -419,11 +419,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
                 out->height = in->height;
             }
 
-            filter(spp, out->data[0], in->data[0], out->linesize[0], in->linesize[0], inlink->w, inlink->h, qp_table, qp_stride, 1, depth);
+            filter(s, out->data[0], in->data[0], out->linesize[0], in->linesize[0], inlink->w, inlink->h, qp_table, qp_stride, 1, depth);
 
             if (out->data[2]) {
-                filter(spp, out->data[1], in->data[1], out->linesize[1], in->linesize[1], cw,        ch,        qp_table, qp_stride, 0, depth);
-                filter(spp, out->data[2], in->data[2], out->linesize[2], in->linesize[2], cw,        ch,        qp_table, qp_stride, 0, depth);
+                filter(s, out->data[1], in->data[1], out->linesize[1], in->linesize[1], cw,        ch,        qp_table, qp_stride, 0, depth);
+                filter(s, out->data[2], in->data[2], out->linesize[2], in->linesize[2], cw,        ch,        qp_table, qp_stride, 0, depth);
             }
             emms_c();
         }
@@ -442,13 +442,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
                            char *res, int res_len, int flags)
 {
-    SPPContext *spp = ctx->priv;
+    SPPContext *s = ctx->priv;
 
     if (!strcmp(cmd, "level")) {
         if (!strcmp(args, "max"))
-            spp->log2_count = MAX_LEVEL;
+            s->log2_count = MAX_LEVEL;
         else
-            spp->log2_count = av_clip(strtol(args, NULL, 10), 0, MAX_LEVEL);
+            s->log2_count = av_clip(strtol(args, NULL, 10), 0, MAX_LEVEL);
         return 0;
     }
     return AVERROR(ENOSYS);
@@ -456,44 +456,44 @@ static int process_command(AVFilterContext *ctx, const char *cmd, const char *ar
 
 static av_cold int init_dict(AVFilterContext *ctx, AVDictionary **opts)
 {
-    SPPContext *spp = ctx->priv;
+    SPPContext *s = ctx->priv;
     int ret;
 
-    spp->avctx = avcodec_alloc_context3(NULL);
-    spp->dct = avcodec_dct_alloc();
-    if (!spp->avctx || !spp->dct)
+    s->avctx = avcodec_alloc_context3(NULL);
+    s->dct = avcodec_dct_alloc();
+    if (!s->avctx || !s->dct)
         return AVERROR(ENOMEM);
 
     if (opts) {
         AVDictionaryEntry *e = NULL;
 
         while ((e = av_dict_get(*opts, "", e, AV_DICT_IGNORE_SUFFIX))) {
-            if ((ret = av_opt_set(spp->dct, e->key, e->value, 0)) < 0)
+            if ((ret = av_opt_set(s->dct, e->key, e->value, 0)) < 0)
                 return ret;
         }
         av_dict_free(opts);
     }
 
-    spp->store_slice = store_slice_c;
-    switch (spp->mode) {
-    case MODE_HARD: spp->requantize = hardthresh_c; break;
-    case MODE_SOFT: spp->requantize = softthresh_c; break;
+    s->store_slice = store_slice_c;
+    switch (s->mode) {
+    case MODE_HARD: s->requantize = hardthresh_c; break;
+    case MODE_SOFT: s->requantize = softthresh_c; break;
     }
     return 0;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    SPPContext *spp = ctx->priv;
+    SPPContext *s = ctx->priv;
 
-    av_freep(&spp->temp);
-    av_freep(&spp->src);
-    if (spp->avctx) {
-        avcodec_close(spp->avctx);
-        av_freep(&spp->avctx);
+    av_freep(&s->temp);
+    av_freep(&s->src);
+    if (s->avctx) {
+        avcodec_close(s->avctx);
+        av_freep(&s->avctx);
     }
-    av_freep(&spp->dct);
-    av_freep(&spp->non_b_qp_table);
+    av_freep(&s->dct);
+    av_freep(&s->non_b_qp_table);
 }
 
 static const AVFilterPad spp_inputs[] = {
diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c
new file mode 100644
index 00000000..dd8f2648
--- /dev/null
+++ b/libavfilter/vf_ssim.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 2003-2013 Loren Merritt
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Computes the Structural Similarity Metric between two video streams.
+ * original algorithm:
+ * Z. Wang, A. C. Bovik, H. R. Sheikh and E. P. Simoncelli,
+ *   "Image quality assessment: From error visibility to structural similarity,"
+ *   IEEE Transactions on Image Processing, vol. 13, no. 4, pp. 600-612, Apr. 2004.
+ *
+ * To improve speed, this implementation uses the standard approximation of
+ * overlapped 8x8 block sums, rather than the original gaussian weights.
+ */
+
+/*
+ * @file
+ * Caculate the SSIM between two input videos.
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "dualinput.h"
+#include "drawutils.h"
+#include "formats.h"
+#include "internal.h"
+#include "ssim.h"
+#include "video.h"
+
+typedef struct SSIMContext {
+    const AVClass *class;
+    FFDualInputContext dinput;
+    FILE *stats_file;
+    char *stats_file_str;
+    int nb_components;
+    uint64_t nb_frames;
+    double ssim[4], ssim_total;
+    char comps[4];
+    float coefs[4];
+    uint8_t rgba_map[4];
+    int planewidth[4];
+    int planeheight[4];
+    int *temp;
+    int is_rgb;
+    SSIMDSPContext dsp;
+} SSIMContext;
+
+#define OFFSET(x) offsetof(SSIMContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption ssim_options[] = {
+    {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    {"f",          "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ssim);
+
+static void set_meta(AVDictionary **metadata, const char *key, char comp, float d)
+{
+    char value[128];
+    snprintf(value, sizeof(value), "%0.2f", d);
+    if (comp) {
+        char key2[128];
+        snprintf(key2, sizeof(key2), "%s%c", key, comp);
+        av_dict_set(metadata, key2, value, 0);
+    } else {
+        av_dict_set(metadata, key, value, 0);
+    }
+}
+
+static void ssim_4x4xn(const uint8_t *main, ptrdiff_t main_stride,
+                       const uint8_t *ref, ptrdiff_t ref_stride,
+                       int (*sums)[4], int width)
+{
+    int x, y, z;
+
+    for (z = 0; z < width; z++) {
+        uint32_t s1 = 0, s2 = 0, ss = 0, s12 = 0;
+
+        for (y = 0; y < 4; y++) {
+            for (x = 0; x < 4; x++) {
+                int a = main[x + y * main_stride];
+                int b = ref[x + y * ref_stride];
+
+                s1  += a;
+                s2  += b;
+                ss  += a*a;
+                ss  += b*b;
+                s12 += a*b;
+            }
+        }
+
+        sums[z][0] = s1;
+        sums[z][1] = s2;
+        sums[z][2] = ss;
+        sums[z][3] = s12;
+        main += 4;
+        ref += 4;
+    }
+}
+
+static float ssim_end1(int s1, int s2, int ss, int s12)
+{
+    static const int ssim_c1 = (int)(.01*.01*255*255*64 + .5);
+    static const int ssim_c2 = (int)(.03*.03*255*255*64*63 + .5);
+
+    int fs1 = s1;
+    int fs2 = s2;
+    int fss = ss;
+    int fs12 = s12;
+    int vars = fss * 64 - fs1 * fs1 - fs2 * fs2;
+    int covar = fs12 * 64 - fs1 * fs2;
+
+    return (float)(2 * fs1 * fs2 + ssim_c1) * (float)(2 * covar + ssim_c2)
+         / ((float)(fs1 * fs1 + fs2 * fs2 + ssim_c1) * (float)(vars + ssim_c2));
+}
+
+static float ssim_endn(const int (*sum0)[4], const int (*sum1)[4], int width)
+{
+    float ssim = 0.0;
+    int i;
+
+    for (i = 0; i < width; i++)
+        ssim += ssim_end1(sum0[i][0] + sum0[i + 1][0] + sum1[i][0] + sum1[i + 1][0],
+                          sum0[i][1] + sum0[i + 1][1] + sum1[i][1] + sum1[i + 1][1],
+                          sum0[i][2] + sum0[i + 1][2] + sum1[i][2] + sum1[i + 1][2],
+                          sum0[i][3] + sum0[i + 1][3] + sum1[i][3] + sum1[i + 1][3]);
+    return ssim;
+}
+
+static float ssim_plane(SSIMDSPContext *dsp,
+                        uint8_t *main, int main_stride,
+                        uint8_t *ref, int ref_stride,
+                        int width, int height, void *temp)
+{
+    int z = 0, y;
+    float ssim = 0.0;
+    int (*sum0)[4] = temp;
+    int (*sum1)[4] = sum0 + (width >> 2) + 3;
+
+    width >>= 2;
+    height >>= 2;
+
+    for (y = 1; y < height; y++) {
+        for (; z <= y; z++) {
+            FFSWAP(void*, sum0, sum1);
+            dsp->ssim_4x4_line(&main[4 * z * main_stride], main_stride,
+                               &ref[4 * z * ref_stride], ref_stride,
+                               sum0, width);
+        }
+
+        ssim += dsp->ssim_end_line((const int (*)[4])sum0, (const int (*)[4])sum1, width - 1);
+    }
+
+    return ssim / ((height - 1) * (width - 1));
+}
+
+static double ssim_db(double ssim, double weight)
+{
+    return 10 * log10(weight / (weight - ssim));
+}
+
+static AVFrame *do_ssim(AVFilterContext *ctx, AVFrame *main,
+                        const AVFrame *ref)
+{
+    AVDictionary **metadata = avpriv_frame_get_metadatap(main);
+    SSIMContext *s = ctx->priv;
+    float c[4], ssimv = 0.0;
+    int i;
+
+    s->nb_frames++;
+
+    for (i = 0; i < s->nb_components; i++) {
+        c[i] = ssim_plane(&s->dsp, main->data[i], main->linesize[i],
+                          ref->data[i], ref->linesize[i],
+                          s->planewidth[i], s->planeheight[i], s->temp);
+        ssimv += s->coefs[i] * c[i];
+        s->ssim[i] += c[i];
+    }
+    for (i = 0; i < s->nb_components; i++) {
+        int cidx = s->is_rgb ? s->rgba_map[i] : i;
+        set_meta(metadata, "lavfi.ssim.", s->comps[i], c[cidx]);
+    }
+    s->ssim_total += ssimv;
+
+    set_meta(metadata, "lavfi.ssim.All", 0, ssimv);
+    set_meta(metadata, "lavfi.ssim.dB", 0, ssim_db(ssimv, 1.0));
+
+    if (s->stats_file) {
+        fprintf(s->stats_file, "n:%"PRId64" ", s->nb_frames);
+
+        for (i = 0; i < s->nb_components; i++) {
+            int cidx = s->is_rgb ? s->rgba_map[i] : i;
+            fprintf(s->stats_file, "%c:%f ", s->comps[i], c[cidx]);
+        }
+
+        fprintf(s->stats_file, "All:%f (%f)\n", ssimv, ssim_db(ssimv, 1.0));
+    }
+
+    return main;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    SSIMContext *s = ctx->priv;
+
+    if (s->stats_file_str) {
+        if (!strcmp(s->stats_file_str, "-")) {
+            s->stats_file = stdout;
+        } else {
+            s->stats_file = fopen(s->stats_file_str, "w");
+            if (!s->stats_file) {
+                int err = AVERROR(errno);
+                char buf[128];
+                av_strerror(err, buf, sizeof(buf));
+                av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
+                       s->stats_file_str, buf);
+                return err;
+            }
+        }
+    }
+
+    s->dinput.process = do_ssim;
+    s->dinput.shortest = 1;
+    s->dinput.repeatlast = 0;
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_GBRP,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static int config_input_ref(AVFilterLink *inlink)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    AVFilterContext *ctx  = inlink->dst;
+    SSIMContext *s = ctx->priv;
+    int sum = 0, i;
+
+    s->nb_components = desc->nb_components;
+
+    if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
+        ctx->inputs[0]->h != ctx->inputs[1]->h) {
+        av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
+        return AVERROR(EINVAL);
+    }
+    if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
+        av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->is_rgb = ff_fill_rgba_map(s->rgba_map, inlink->format) >= 0;
+    s->comps[0] = s->is_rgb ? 'R' : 'Y';
+    s->comps[1] = s->is_rgb ? 'G' : 'U';
+    s->comps[2] = s->is_rgb ? 'B' : 'V';
+    s->comps[3] = 'A';
+
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0]  = s->planewidth[3]  = inlink->w;
+    for (i = 0; i < s->nb_components; i++)
+        sum += s->planeheight[i] * s->planewidth[i];
+    for (i = 0; i < s->nb_components; i++)
+        s->coefs[i] = (double) s->planeheight[i] * s->planewidth[i] / sum;
+
+    s->temp = av_malloc((2 * inlink->w + 12) * sizeof(*s->temp));
+    if (!s->temp)
+        return AVERROR(ENOMEM);
+
+    s->dsp.ssim_4x4_line = ssim_4x4xn;
+    s->dsp.ssim_end_line = ssim_endn;
+    if (ARCH_X86)
+        ff_ssim_init_x86(&s->dsp);
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    SSIMContext *s = ctx->priv;
+    AVFilterLink *mainlink = ctx->inputs[0];
+    int ret;
+
+    outlink->w = mainlink->w;
+    outlink->h = mainlink->h;
+    outlink->time_base = mainlink->time_base;
+    outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
+    outlink->frame_rate = mainlink->frame_rate;
+
+    if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
+{
+    SSIMContext *s = inlink->dst->priv;
+    return ff_dualinput_filter_frame(&s->dinput, inlink, buf);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    SSIMContext *s = outlink->src->priv;
+    return ff_dualinput_request_frame(&s->dinput, outlink);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SSIMContext *s = ctx->priv;
+
+    if (s->nb_frames > 0) {
+        char buf[256];
+        int i;
+        buf[0] = 0;
+        for (i = 0; i < s->nb_components; i++) {
+            int c = s->is_rgb ? s->rgba_map[i] : i;
+            av_strlcatf(buf, sizeof(buf), " %c:%f (%f)", s->comps[i], s->ssim[c] / s->nb_frames,
+                        ssim_db(s->ssim[c], s->nb_frames));
+        }
+        av_log(ctx, AV_LOG_INFO, "SSIM%s All:%f (%f)\n", buf,
+               s->ssim_total / s->nb_frames, ssim_db(s->ssim_total, s->nb_frames));
+    }
+
+    ff_dualinput_uninit(&s->dinput);
+
+    if (s->stats_file && s->stats_file != stdout)
+        fclose(s->stats_file);
+
+    av_freep(&s->temp);
+}
+
+static const AVFilterPad ssim_inputs[] = {
+    {
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },{
+        .name         = "reference",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input_ref,
+    },
+    { NULL }
+};
+
+static const AVFilterPad ssim_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_ssim = {
+    .name          = "ssim",
+    .description   = NULL_IF_CONFIG_SMALL("Calculate the SSIM between two video streams."),
+    .init          = init,
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(SSIMContext),
+    .priv_class    = &ssim_class,
+    .inputs        = ssim_inputs,
+    .outputs       = ssim_outputs,
+};
diff --git a/libavfilter/vf_stack.c b/libavfilter/vf_stack.c
new file mode 100644
index 00000000..03643b6f
--- /dev/null
+++ b/libavfilter/vf_stack.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2015 Paul B. Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "framesync.h"
+#include "video.h"
+
+typedef struct StackContext {
+    const AVClass *class;
+    const AVPixFmtDescriptor *desc;
+    int nb_inputs;
+    int shortest;
+    int is_vertical;
+    int nb_planes;
+
+    AVFrame **frames;
+    FFFrameSync fs;
+} StackContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *pix_fmts = NULL;
+    int fmt, ret;
+
+    for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
+        if (!(desc->flags & AV_PIX_FMT_FLAG_PAL ||
+              desc->flags & AV_PIX_FMT_FLAG_HWACCEL ||
+              desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) &&
+            (ret = ff_add_format(&pix_fmts, fmt)) < 0)
+            return ret;
+    }
+
+    return ff_set_common_formats(ctx, pix_fmts);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    StackContext *s = inlink->dst->priv;
+    return ff_framesync_filter_frame(&s->fs, inlink, in);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    StackContext *s = ctx->priv;
+    int i, ret;
+
+    if (!strcmp(ctx->filter->name, "vstack"))
+        s->is_vertical = 1;
+
+    s->frames = av_calloc(s->nb_inputs, sizeof(*s->frames));
+    if (!s->frames)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < s->nb_inputs; i++) {
+        AVFilterPad pad = { 0 };
+
+        pad.type = AVMEDIA_TYPE_VIDEO;
+        pad.name = av_asprintf("input%d", i);
+        if (!pad.name)
+            return AVERROR(ENOMEM);
+        pad.filter_frame = filter_frame;
+
+        if ((ret = ff_insert_inpad(ctx, i, &pad)) < 0) {
+            av_freep(&pad.name);
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static int process_frame(FFFrameSync *fs)
+{
+    AVFilterContext *ctx = fs->parent;
+    AVFilterLink *outlink = ctx->outputs[0];
+    StackContext *s = fs->opaque;
+    AVFrame **in = s->frames;
+    AVFrame *out;
+    int i, p, ret, offset[4] = { 0 };
+
+    for (i = 0; i < s->nb_inputs; i++) {
+        if ((ret = ff_framesync_get_frame(&s->fs, i, &in[i], 0)) < 0)
+            return ret;
+    }
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out)
+        return AVERROR(ENOMEM);
+    out->pts = av_rescale_q(s->fs.pts, s->fs.time_base, outlink->time_base);
+
+    for (i = 0; i < s->nb_inputs; i++) {
+        AVFilterLink *inlink = ctx->inputs[i];
+        int linesize[4];
+        int height[4];
+
+        if ((ret = av_image_fill_linesizes(linesize, inlink->format, inlink->w)) < 0) {
+            av_frame_free(&out);
+            return ret;
+        }
+
+        height[1] = height[2] = AV_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
+        height[0] = height[3] = inlink->h;
+
+        for (p = 0; p < s->nb_planes; p++) {
+            if (s->is_vertical) {
+                av_image_copy_plane(out->data[p] + offset[p] * out->linesize[p],
+                                    out->linesize[p],
+                                    in[i]->data[p],
+                                    in[i]->linesize[p],
+                                    linesize[p], height[p]);
+                offset[p] += height[p];
+            } else {
+                av_image_copy_plane(out->data[p] + offset[p],
+                                    out->linesize[p],
+                                    in[i]->data[p],
+                                    in[i]->linesize[p],
+                                    linesize[p], height[p]);
+                offset[p] += linesize[p];
+            }
+        }
+    }
+
+    return ff_filter_frame(outlink, out);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    StackContext *s = ctx->priv;
+    AVRational time_base = ctx->inputs[0]->time_base;
+    AVRational frame_rate = ctx->inputs[0]->frame_rate;
+    int height = ctx->inputs[0]->h;
+    int width = ctx->inputs[0]->w;
+    FFFrameSyncIn *in;
+    int i, ret;
+
+    if (s->is_vertical) {
+        for (i = 1; i < s->nb_inputs; i++) {
+            if (ctx->inputs[i]->w != width) {
+                av_log(ctx, AV_LOG_ERROR, "Input %d width %d does not match input %d width %d.\n", i, ctx->inputs[i]->w, 0, width);
+                return AVERROR(EINVAL);
+            }
+            height += ctx->inputs[i]->h;
+        }
+    } else {
+        for (i = 1; i < s->nb_inputs; i++) {
+            if (ctx->inputs[i]->h != height) {
+                av_log(ctx, AV_LOG_ERROR, "Input %d height %d does not match input %d height %d.\n", i, ctx->inputs[i]->h, 0, height);
+                return AVERROR(EINVAL);
+            }
+            width += ctx->inputs[i]->w;
+        }
+    }
+
+    s->desc = av_pix_fmt_desc_get(outlink->format);
+    if (!s->desc)
+        return AVERROR_BUG;
+    s->nb_planes = av_pix_fmt_count_planes(outlink->format);
+
+    outlink->w          = width;
+    outlink->h          = height;
+    outlink->time_base  = time_base;
+    outlink->frame_rate = frame_rate;
+
+    if ((ret = ff_framesync_init(&s->fs, ctx, s->nb_inputs)) < 0)
+        return ret;
+
+    in = s->fs.in;
+    s->fs.opaque = s;
+    s->fs.on_event = process_frame;
+
+    for (i = 0; i < s->nb_inputs; i++) {
+        AVFilterLink *inlink = ctx->inputs[i];
+
+        in[i].time_base = inlink->time_base;
+        in[i].sync   = 1;
+        in[i].before = EXT_STOP;
+        in[i].after  = s->shortest ? EXT_STOP : EXT_INFINITY;
+    }
+
+    return ff_framesync_configure(&s->fs);
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    StackContext *s = outlink->src->priv;
+    return ff_framesync_request_frame(&s->fs, outlink);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    StackContext *s = ctx->priv;
+    int i;
+
+    ff_framesync_uninit(&s->fs);
+    av_freep(&s->frames);
+
+    for (i = 0; i < ctx->nb_inputs; i++)
+        av_freep(&ctx->input_pads[i].name);
+}
+
+#define OFFSET(x) offsetof(StackContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption stack_options[] = {
+    { "inputs", "set number of inputs", OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64=2}, 2, INT_MAX, .flags = FLAGS },
+    { "shortest", "force termination when the shortest input terminates", OFFSET(shortest), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, .flags = FLAGS },
+    { NULL },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+        .request_frame = request_frame,
+    },
+    { NULL }
+};
+
+#if CONFIG_HSTACK_FILTER
+
+#define hstack_options stack_options
+AVFILTER_DEFINE_CLASS(hstack);
+
+AVFilter ff_vf_hstack = {
+    .name          = "hstack",
+    .description   = NULL_IF_CONFIG_SMALL("Stack video inputs horizontally."),
+    .priv_size     = sizeof(StackContext),
+    .priv_class    = &hstack_class,
+    .query_formats = query_formats,
+    .outputs       = outputs,
+    .init          = init,
+    .uninit        = uninit,
+    .flags         = AVFILTER_FLAG_DYNAMIC_INPUTS,
+};
+
+#endif /* CONFIG_HSTACK_FILTER */
+
+#if CONFIG_VSTACK_FILTER
+
+#define vstack_options stack_options
+AVFILTER_DEFINE_CLASS(vstack);
+
+AVFilter ff_vf_vstack = {
+    .name          = "vstack",
+    .description   = NULL_IF_CONFIG_SMALL("Stack video inputs vertically."),
+    .priv_size     = sizeof(StackContext),
+    .priv_class    = &vstack_class,
+    .query_formats = query_formats,
+    .outputs       = outputs,
+    .init          = init,
+    .uninit        = uninit,
+    .flags         = AVFILTER_FLAG_DYNAMIC_INPUTS,
+};
+
+#endif /* CONFIG_VSTACK_FILTER */
diff --git a/libavfilter/vf_stereo3d.c b/libavfilter/vf_stereo3d.c
index 771bdde6..2036c943 100644
--- a/libavfilter/vf_stereo3d.c
+++ b/libavfilter/vf_stereo3d.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2010 Gordon Schmidt <gordon.schmidt <at> s2000.tu-chemnitz.de>
- * Copyright (c) 2013 Paul B Mahol
+ * Copyright (c) 2013-2015 Paul B Mahol
  *
  * This file is part of FFmpeg.
  *
@@ -21,6 +21,7 @@
 
 #include "libavutil/avassert.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/pixdesc.h"
@@ -29,6 +30,7 @@
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
+#include "stereo3d.h"
 
 enum StereoCode {
     ANAGLYPH_RC_GRAY,   // anaglyph red/cyan gray
@@ -59,6 +61,11 @@ enum StereoCode {
     ABOVE_BELOW_2_RL,   // above-below with half height resolution
     ALTERNATING_LR,     // alternating frames (left eye first, right eye second)
     ALTERNATING_RL,     // alternating frames (right eye first, left eye second)
+    CHECKERBOARD_LR,    // checkerboard pattern (left eye first, right eye second)
+    CHECKERBOARD_RL,    // checkerboard pattern (right eye first, left eye second)
+    INTERLEAVE_COLS_LR, // column-interleave (left eye first, right eye second)
+    INTERLEAVE_COLS_RL, // column-interleave (right eye first, left eye second)
+    HDMI,               // HDMI frame pack (left eye first, right eye second)
     STEREO_CODE_COUNT   // TODO: needs autodetection
 };
 
@@ -68,6 +75,7 @@ typedef struct StereoComponent {
     int off_left, off_right;
     int off_lstep, off_rstep;
     int row_left, row_right;
+    int row_step;
 } StereoComponent;
 
 static const int ana_coeff[][3][6] = {
@@ -133,7 +141,6 @@ typedef struct Stereo3DContext {
     const AVClass *class;
     StereoComponent in, out;
     int width, height;
-    int row_step;
     const int *ana_matrix[3];
     int nb_planes;
     int linesize[4];
@@ -141,25 +148,31 @@ typedef struct Stereo3DContext {
     int hsub, vsub;
     int pixstep[4];
     AVFrame *prev;
-    double ts_unit;
+    int blanks;
+    int in_off_left[4], in_off_right[4];
+    Stereo3DDSPContext dsp;
 } Stereo3DContext;
 
 #define OFFSET(x) offsetof(Stereo3DContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 
 static const AVOption stereo3d_options[] = {
-    { "in",    "set input format",  OFFSET(in.format),  AV_OPT_TYPE_INT, {.i64=SIDE_BY_SIDE_LR}, SIDE_BY_SIDE_LR, STEREO_CODE_COUNT-1, FLAGS, "in"},
-    { "ab2l",  "above below half height left first",  0, AV_OPT_TYPE_CONST, {.i64=ABOVE_BELOW_2_LR},  0, 0, FLAGS, "in" },
-    { "ab2r",  "above below half height right first", 0, AV_OPT_TYPE_CONST, {.i64=ABOVE_BELOW_2_RL},  0, 0, FLAGS, "in" },
-    { "abl",   "above below left first",              0, AV_OPT_TYPE_CONST, {.i64=ABOVE_BELOW_LR},    0, 0, FLAGS, "in" },
-    { "abr",   "above below right first",             0, AV_OPT_TYPE_CONST, {.i64=ABOVE_BELOW_RL},    0, 0, FLAGS, "in" },
-    { "al",    "alternating frames left first",       0, AV_OPT_TYPE_CONST, {.i64=ALTERNATING_LR},    0, 0, FLAGS, "in" },
-    { "ar",    "alternating frames right first",      0, AV_OPT_TYPE_CONST, {.i64=ALTERNATING_RL},    0, 0, FLAGS, "in" },
-    { "sbs2l", "side by side half width left first",  0, AV_OPT_TYPE_CONST, {.i64=SIDE_BY_SIDE_2_LR}, 0, 0, FLAGS, "in" },
-    { "sbs2r", "side by side half width right first", 0, AV_OPT_TYPE_CONST, {.i64=SIDE_BY_SIDE_2_RL}, 0, 0, FLAGS, "in" },
-    { "sbsl",  "side by side left first",             0, AV_OPT_TYPE_CONST, {.i64=SIDE_BY_SIDE_LR},   0, 0, FLAGS, "in" },
-    { "sbsr",  "side by side right first",            0, AV_OPT_TYPE_CONST, {.i64=SIDE_BY_SIDE_RL},   0, 0, FLAGS, "in" },
-    { "out",   "set output format", OFFSET(out.format), AV_OPT_TYPE_INT, {.i64=ANAGLYPH_RC_DUBOIS}, 0, STEREO_CODE_COUNT-1, FLAGS, "out"},
+    { "in",    "set input format",  OFFSET(in.format),   AV_OPT_TYPE_INT,   {.i64=SIDE_BY_SIDE_LR}, INTERLEAVE_ROWS_LR, STEREO_CODE_COUNT-1, FLAGS, "in"},
+    { "ab2l",  "above below half height left first",  0, AV_OPT_TYPE_CONST, {.i64=ABOVE_BELOW_2_LR},   0, 0, FLAGS, "in" },
+    { "ab2r",  "above below half height right first", 0, AV_OPT_TYPE_CONST, {.i64=ABOVE_BELOW_2_RL},   0, 0, FLAGS, "in" },
+    { "abl",   "above below left first",              0, AV_OPT_TYPE_CONST, {.i64=ABOVE_BELOW_LR},     0, 0, FLAGS, "in" },
+    { "abr",   "above below right first",             0, AV_OPT_TYPE_CONST, {.i64=ABOVE_BELOW_RL},     0, 0, FLAGS, "in" },
+    { "al",    "alternating frames left first",       0, AV_OPT_TYPE_CONST, {.i64=ALTERNATING_LR},     0, 0, FLAGS, "in" },
+    { "ar",    "alternating frames right first",      0, AV_OPT_TYPE_CONST, {.i64=ALTERNATING_RL},     0, 0, FLAGS, "in" },
+    { "sbs2l", "side by side half width left first",  0, AV_OPT_TYPE_CONST, {.i64=SIDE_BY_SIDE_2_LR},  0, 0, FLAGS, "in" },
+    { "sbs2r", "side by side half width right first", 0, AV_OPT_TYPE_CONST, {.i64=SIDE_BY_SIDE_2_RL},  0, 0, FLAGS, "in" },
+    { "sbsl",  "side by side left first",             0, AV_OPT_TYPE_CONST, {.i64=SIDE_BY_SIDE_LR},    0, 0, FLAGS, "in" },
+    { "sbsr",  "side by side right first",            0, AV_OPT_TYPE_CONST, {.i64=SIDE_BY_SIDE_RL},    0, 0, FLAGS, "in" },
+    { "irl",   "interleave rows left first",          0, AV_OPT_TYPE_CONST, {.i64=INTERLEAVE_ROWS_LR}, 0, 0, FLAGS, "in" },
+    { "irr",   "interleave rows right first",         0, AV_OPT_TYPE_CONST, {.i64=INTERLEAVE_ROWS_RL}, 0, 0, FLAGS, "in" },
+    { "icl",   "interleave columns left first",       0, AV_OPT_TYPE_CONST, {.i64=INTERLEAVE_COLS_LR}, 0, 0, FLAGS, "in" },
+    { "icr",   "interleave columns right first",      0, AV_OPT_TYPE_CONST, {.i64=INTERLEAVE_COLS_RL}, 0, 0, FLAGS, "in" },
+    { "out",   "set output format", OFFSET(out.format),  AV_OPT_TYPE_INT,   {.i64=ANAGLYPH_RC_DUBOIS}, 0, STEREO_CODE_COUNT-1, FLAGS, "out"},
     { "ab2l",  "above below half height left first",  0, AV_OPT_TYPE_CONST, {.i64=ABOVE_BELOW_2_LR},   0, 0, FLAGS, "out" },
     { "ab2r",  "above below half height right first", 0, AV_OPT_TYPE_CONST, {.i64=ABOVE_BELOW_2_RL},   0, 0, FLAGS, "out" },
     { "abl",   "above below left first",              0, AV_OPT_TYPE_CONST, {.i64=ABOVE_BELOW_LR},     0, 0, FLAGS, "out" },
@@ -188,6 +201,11 @@ static const AVOption stereo3d_options[] = {
     { "sbs2r", "side by side half width right first", 0, AV_OPT_TYPE_CONST, {.i64=SIDE_BY_SIDE_2_RL},  0, 0, FLAGS, "out" },
     { "sbsl",  "side by side left first",             0, AV_OPT_TYPE_CONST, {.i64=SIDE_BY_SIDE_LR},    0, 0, FLAGS, "out" },
     { "sbsr",  "side by side right first",            0, AV_OPT_TYPE_CONST, {.i64=SIDE_BY_SIDE_RL},    0, 0, FLAGS, "out" },
+    { "chl",   "checkerboard left first",             0, AV_OPT_TYPE_CONST, {.i64=CHECKERBOARD_LR},    0, 0, FLAGS, "out" },
+    { "chr",   "checkerboard right first",            0, AV_OPT_TYPE_CONST, {.i64=CHECKERBOARD_RL},    0, 0, FLAGS, "out" },
+    { "icl",   "interleave columns left first",       0, AV_OPT_TYPE_CONST, {.i64=INTERLEAVE_COLS_LR}, 0, 0, FLAGS, "out" },
+    { "icr",   "interleave columns right first",      0, AV_OPT_TYPE_CONST, {.i64=INTERLEAVE_COLS_RL}, 0, 0, FLAGS, "out" },
+    { "hdmi",  "HDMI frame pack",                     0, AV_OPT_TYPE_CONST, {.i64=HDMI},               0, 0, FLAGS, "out" },
     { NULL }
 };
 
@@ -285,6 +303,57 @@ static int query_formats(AVFilterContext *ctx)
     return ff_set_common_formats(ctx, fmts_list);
 }
 
+static inline uint8_t ana_convert(const int *coeff, const uint8_t *left, const uint8_t *right)
+{
+    int sum;
+
+    sum  = coeff[0] * left[0] + coeff[3] * right[0]; //red in
+    sum += coeff[1] * left[1] + coeff[4] * right[1]; //green in
+    sum += coeff[2] * left[2] + coeff[5] * right[2]; //blue in
+
+    return av_clip_uint8(sum >> 16);
+}
+
+static void anaglyph_ic(uint8_t *dst, uint8_t *lsrc, uint8_t *rsrc,
+                        ptrdiff_t dst_linesize, ptrdiff_t l_linesize, ptrdiff_t r_linesize,
+                        int width, int height,
+                        const int *ana_matrix_r, const int *ana_matrix_g, const int *ana_matrix_b)
+{
+    int x, y, o;
+
+    for (y = 0; y < height; y++) {
+        for (o = 0, x = 0; x < width; x++, o+= 3) {
+            dst[o    ] = ana_convert(ana_matrix_r, lsrc + o * 2, rsrc + o * 2);
+            dst[o + 1] = ana_convert(ana_matrix_g, lsrc + o * 2, rsrc + o * 2);
+            dst[o + 2] = ana_convert(ana_matrix_b, lsrc + o * 2, rsrc + o * 2);
+        }
+
+        dst  += dst_linesize;
+        lsrc += l_linesize;
+        rsrc += r_linesize;
+    }
+}
+
+static void anaglyph(uint8_t *dst, uint8_t *lsrc, uint8_t *rsrc,
+                     ptrdiff_t dst_linesize, ptrdiff_t l_linesize, ptrdiff_t r_linesize,
+                     int width, int height,
+                     const int *ana_matrix_r, const int *ana_matrix_g, const int *ana_matrix_b)
+{
+    int x, y, o;
+
+    for (y = 0; y < height; y++) {
+        for (o = 0, x = 0; x < width; x++, o+= 3) {
+            dst[o    ] = ana_convert(ana_matrix_r, lsrc + o, rsrc + o);
+            dst[o + 1] = ana_convert(ana_matrix_g, lsrc + o, rsrc + o);
+            dst[o + 2] = ana_convert(ana_matrix_b, lsrc + o, rsrc + o);
+        }
+
+        dst  += dst_linesize;
+        lsrc += l_linesize;
+        rsrc += r_linesize;
+    }
+}
+
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
@@ -297,6 +366,8 @@ static int config_output(AVFilterLink *outlink)
     int ret;
 
     switch (s->in.format) {
+    case INTERLEAVE_COLS_LR:
+    case INTERLEAVE_COLS_RL:
     case SIDE_BY_SIDE_2_LR:
     case SIDE_BY_SIDE_LR:
     case SIDE_BY_SIDE_2_RL:
@@ -306,17 +377,12 @@ static int config_output(AVFilterLink *outlink)
             return AVERROR_INVALIDDATA;
         }
         break;
+    case INTERLEAVE_ROWS_LR:
+    case INTERLEAVE_ROWS_RL:
     case ABOVE_BELOW_2_LR:
     case ABOVE_BELOW_LR:
     case ABOVE_BELOW_2_RL:
     case ABOVE_BELOW_RL:
-        if (s->out.format == INTERLEAVE_ROWS_LR ||
-            s->out.format == INTERLEAVE_ROWS_RL) {
-            if (inlink->h & 3) {
-                av_log(ctx, AV_LOG_ERROR, "height must be multiple of 4\n");
-                return AVERROR_INVALIDDATA;
-            }
-        }
         if (inlink->h & 1) {
             av_log(ctx, AV_LOG_ERROR, "height must be even\n");
             return AVERROR_INVALIDDATA;
@@ -328,13 +394,13 @@ static int config_output(AVFilterLink *outlink)
     s->width        = inlink->w;
     s->in.height    =
     s->height       = inlink->h;
-    s->row_step     = 1;
     s->in.off_lstep =
     s->in.off_rstep =
     s->in.off_left  =
     s->in.off_right =
     s->in.row_left  =
     s->in.row_right = 0;
+    s->in.row_step  = 1;
 
     switch (s->in.format) {
     case SIDE_BY_SIDE_2_LR:
@@ -363,10 +429,24 @@ static int config_output(AVFilterLink *outlink)
         break;
     case ALTERNATING_RL:
     case ALTERNATING_LR:
-        outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
         fps.den        *= 2;
         tb.num         *= 2;
         break;
+    case INTERLEAVE_COLS_RL:
+    case INTERLEAVE_COLS_LR:
+        s->width        = inlink->w / 2;
+        break;
+    case INTERLEAVE_ROWS_LR:
+    case INTERLEAVE_ROWS_RL:
+        s->in.row_step  = 2;
+        if (s->in.format == INTERLEAVE_ROWS_RL)
+            s->in.off_lstep = 1;
+        else
+            s->in.off_rstep = 1;
+        if (s->out.format != CHECKERBOARD_LR &&
+            s->out.format != CHECKERBOARD_RL)
+            s->height   = inlink->h / 2;
+        break;
     default:
         av_log(ctx, AV_LOG_ERROR, "input format %d is not supported\n", s->in.format);
         return AVERROR(EINVAL);
@@ -380,6 +460,7 @@ static int config_output(AVFilterLink *outlink)
     s->out.off_right =
     s->out.row_left  =
     s->out.row_right = 0;
+    s->out.row_step  = 1;
 
     switch (s->out.format) {
     case ANAGLYPH_RB_GRAY:
@@ -422,6 +503,16 @@ static int config_output(AVFilterLink *outlink)
         s->out.height    = s->height * 2;
         s->out.row_right = s->height;
         break;
+    case HDMI:
+        if (s->height != 720 && s->height != 1080) {
+            av_log(ctx, AV_LOG_ERROR, "Only 720 and 1080 height supported\n");
+            return AVERROR(EINVAL);
+        }
+
+        s->blanks = s->height / 24;
+        s->out.height    = s->height * 2 + s->blanks;
+        s->out.row_right = s->height + s->blanks;
+        break;
     case ABOVE_BELOW_2_RL:
         aspect.num      *= 2;
     case ABOVE_BELOW_RL:
@@ -429,32 +520,58 @@ static int config_output(AVFilterLink *outlink)
         s->out.row_left  = s->height;
         break;
     case INTERLEAVE_ROWS_LR:
-        s->row_step      = 2;
-        s->height        = s->height / 2;
-        s->out.off_rstep =
-        s->in.off_rstep  = 1;
+        s->in.row_step   = 1 + (s->in.format == INTERLEAVE_ROWS_RL);
+        s->out.row_step  = 2;
+        s->out.height    = s->height * 2;
+        s->out.off_rstep = 1;
         break;
     case INTERLEAVE_ROWS_RL:
-        s->row_step      = 2;
-        s->height        = s->height / 2;
-        s->out.off_lstep =
-        s->in.off_lstep  = 1;
+        s->in.row_step   = 1 + (s->in.format == INTERLEAVE_ROWS_LR);
+        s->out.row_step  = 2;
+        s->out.height    = s->height * 2;
+        s->out.off_lstep = 1;
         break;
     case MONO_R:
-        s->in.off_left   = s->in.off_right;
-        s->in.row_left   = s->in.row_right;
+        if (s->in.format != INTERLEAVE_COLS_LR) {
+            s->in.off_left = s->in.off_right;
+            s->in.row_left = s->in.row_right;
+        }
+        if (s->in.format == INTERLEAVE_ROWS_LR)
+            FFSWAP(int, s->in.off_lstep, s->in.off_rstep);
+        break;
     case MONO_L:
+        if (s->in.format == INTERLEAVE_ROWS_RL)
+            FFSWAP(int, s->in.off_lstep, s->in.off_rstep);
         break;
     case ALTERNATING_RL:
     case ALTERNATING_LR:
         fps.num         *= 2;
         tb.den          *= 2;
         break;
+    case CHECKERBOARD_LR:
+    case CHECKERBOARD_RL:
+        s->out.width     = s->width * 2;
+        break;
+    case INTERLEAVE_COLS_LR:
+    case INTERLEAVE_COLS_RL:
+        s->out.width     = s->width * 2;
+        break;
     default:
         av_log(ctx, AV_LOG_ERROR, "output format %d is not supported\n", s->out.format);
         return AVERROR(EINVAL);
     }
 
+    if (s->in.format == INTERLEAVE_COLS_LR || s->in.format == INTERLEAVE_COLS_RL) {
+        if ((s->in.format & 1) != (s->out.format & 1)) {
+            FFSWAP(int, s->in.row_left,   s->in.row_right);
+            FFSWAP(int, s->in.off_lstep,  s->in.off_rstep);
+            FFSWAP(int, s->in.off_left,   s->in.off_right);
+            FFSWAP(int, s->out.row_left,  s->out.row_right);
+            FFSWAP(int, s->out.off_lstep, s->out.off_rstep);
+            FFSWAP(int, s->out.off_left,  s->out.off_right);
+        }
+    }
+
     outlink->w = s->out.width;
     outlink->h = s->out.height;
     outlink->frame_rate = fps;
@@ -465,24 +582,82 @@ static int config_output(AVFilterLink *outlink)
         return ret;
     s->nb_planes = av_pix_fmt_count_planes(outlink->format);
     av_image_fill_max_pixsteps(s->pixstep, NULL, desc);
-    s->ts_unit = av_q2d(av_inv_q(av_mul_q(outlink->frame_rate, outlink->time_base)));
-    s->pheight[1] = s->pheight[2] = FF_CEIL_RSHIFT(s->height, desc->log2_chroma_h);
+    s->pheight[1] = s->pheight[2] = AV_CEIL_RSHIFT(s->height, desc->log2_chroma_h);
     s->pheight[0] = s->pheight[3] = s->height;
     s->hsub = desc->log2_chroma_w;
     s->vsub = desc->log2_chroma_h;
 
+    s->dsp.anaglyph = anaglyph;
+    if (ARCH_X86)
+        ff_stereo3d_init_x86(&s->dsp);
+
     return 0;
 }
 
-static inline uint8_t ana_convert(const int *coeff, const uint8_t *left, const uint8_t *right)
+typedef struct ThreadData {
+    AVFrame *ileft, *iright;
+    AVFrame *out;
+} ThreadData;
+
+static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
-    int sum;
+    Stereo3DContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *ileft = td->ileft;
+    AVFrame *iright = td->iright;
+    AVFrame *out = td->out;
+    int height = s->out.height;
+    int start = (height *  jobnr   ) / nb_jobs;
+    int end   = (height * (jobnr+1)) / nb_jobs;
+    const int **ana_matrix = s->ana_matrix;
 
-    sum  = coeff[0] * left[0] + coeff[3] * right[0]; //red in
-    sum += coeff[1] * left[1] + coeff[4] * right[1]; //green in
-    sum += coeff[2] * left[2] + coeff[5] * right[2]; //blue in
+    s->dsp.anaglyph(out->data[0] + out->linesize[0] * start,
+             ileft ->data[0] + s->in_off_left [0]  + ileft->linesize[0] * start * s->in.row_step,
+             iright->data[0] + s->in_off_right[0] + iright->linesize[0] * start * s->in.row_step,
+             out->linesize[0],
+             ileft->linesize[0] * s->in.row_step,
+             iright->linesize[0] * s->in.row_step,
+             s->out.width, end - start,
+             ana_matrix[0], ana_matrix[1], ana_matrix[2]);
 
-    return av_clip_uint8(sum >> 16);
+    return 0;
+}
+
+static void interleave_cols_to_any(Stereo3DContext *s, int *out_off, int p, AVFrame *in, AVFrame *out, int d)
+{
+    int y, x;
+
+    for (y = 0; y < s->pheight[p]; y++) {
+        const uint8_t *src = (const uint8_t*)in->data[p] + y * in->linesize[p] + d * s->pixstep[p];
+        uint8_t *dst = out->data[p] + out_off[p] + y * out->linesize[p] * s->out.row_step;
+
+        switch (s->pixstep[p]) {
+        case 1:
+            for (x = 0; x < s->linesize[p]; x++)
+                dst[x] = src[x * 2];
+            break;
+        case 2:
+            for (x = 0; x < s->linesize[p]; x+=2)
+                AV_WN16(&dst[x], AV_RN16(&src[x * 2]));
+            break;
+        case 3:
+            for (x = 0; x < s->linesize[p]; x+=3)
+                AV_WB24(&dst[x], AV_RB24(&src[x * 2]));
+            break;
+        case 4:
+            for (x = 0; x < s->linesize[p]; x+=4)
+                AV_WN32(&dst[x], AV_RN32(&src[x * 2]));
+            break;
+        case 6:
+            for (x = 0; x < s->linesize[p]; x+=6)
+                AV_WB48(&dst[x], AV_RB48(&src[x * 2]));
+            break;
+        case 8:
+            for (x = 0; x < s->linesize[p]; x+=8)
+                AV_WN64(&dst[x], AV_RN64(&src[x * 2]));
+            break;
+        }
+    }
 }
 
 static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
@@ -492,8 +667,20 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out, *oleft, *oright, *ileft, *iright;
     int out_off_left[4], out_off_right[4];
-    int in_off_left[4], in_off_right[4];
-    int i;
+    int i, ret;
+
+    if (s->in.format == s->out.format)
+        return ff_filter_frame(outlink, inpicref);
+
+    switch (s->out.format) {
+    case ALTERNATING_LR:
+    case ALTERNATING_RL:
+        if (!s->prev) {
+            s->prev = inpicref;
+            return 0;
+        }
+        break;
+    };
 
     switch (s->in.format) {
     case ALTERNATING_LR:
@@ -511,38 +698,136 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
         ileft = iright = inpicref;
     };
 
-    out = oleft = oright = ff_get_video_buffer(outlink, outlink->w, outlink->h);
-    if (!out) {
-        av_frame_free(&s->prev);
-        av_frame_free(&inpicref);
-        return AVERROR(ENOMEM);
-    }
-    av_frame_copy_props(out, inpicref);
-
-    if (s->out.format == ALTERNATING_LR ||
-        s->out.format == ALTERNATING_RL) {
-        oright = ff_get_video_buffer(outlink, outlink->w, outlink->h);
-        if (!oright) {
+    if ((s->out.format == ALTERNATING_LR ||
+         s->out.format == ALTERNATING_RL) &&
+        (s->in.format == SIDE_BY_SIDE_LR ||
+         s->in.format == SIDE_BY_SIDE_RL ||
+         s->in.format == SIDE_BY_SIDE_2_LR ||
+         s->in.format == SIDE_BY_SIDE_2_RL ||
+         s->in.format == ABOVE_BELOW_LR ||
+         s->in.format == ABOVE_BELOW_RL ||
+         s->in.format == ABOVE_BELOW_2_LR ||
+         s->in.format == ABOVE_BELOW_2_RL ||
+         s->in.format == INTERLEAVE_ROWS_LR ||
+         s->in.format == INTERLEAVE_ROWS_RL)) {
+        oright = av_frame_clone(s->prev);
+        oleft  = av_frame_clone(s->prev);
+        if (!oright || !oleft) {
+            av_frame_free(&oright);
             av_frame_free(&oleft);
             av_frame_free(&s->prev);
             av_frame_free(&inpicref);
             return AVERROR(ENOMEM);
         }
-        av_frame_copy_props(oright, inpicref);
+    } else if ((s->out.format == MONO_L ||
+                s->out.format == MONO_R) &&
+        (s->in.format == SIDE_BY_SIDE_LR ||
+         s->in.format == SIDE_BY_SIDE_RL ||
+         s->in.format == SIDE_BY_SIDE_2_LR ||
+         s->in.format == SIDE_BY_SIDE_2_RL ||
+         s->in.format == ABOVE_BELOW_LR ||
+         s->in.format == ABOVE_BELOW_RL ||
+         s->in.format == ABOVE_BELOW_2_LR ||
+         s->in.format == ABOVE_BELOW_2_RL ||
+         s->in.format == INTERLEAVE_ROWS_LR ||
+         s->in.format == INTERLEAVE_ROWS_RL)) {
+        out = oleft = oright = av_frame_clone(inpicref);
+        if (!out) {
+            av_frame_free(&s->prev);
+            av_frame_free(&inpicref);
+            return AVERROR(ENOMEM);
+        }
+    } else if ((s->out.format == MONO_L && s->in.format == ALTERNATING_LR) ||
+               (s->out.format == MONO_R && s->in.format == ALTERNATING_RL)) {
+        s->prev->pts /= 2;
+        ret = ff_filter_frame(outlink, s->prev);
+        av_frame_free(&inpicref);
+        s->prev = NULL;
+        return ret;
+    } else if ((s->out.format == MONO_L && s->in.format == ALTERNATING_RL) ||
+               (s->out.format == MONO_R && s->in.format == ALTERNATING_LR)) {
+        av_frame_free(&s->prev);
+        inpicref->pts /= 2;
+        return ff_filter_frame(outlink, inpicref);
+    } else if ((s->out.format == ALTERNATING_LR && s->in.format == ALTERNATING_RL) ||
+               (s->out.format == ALTERNATING_RL && s->in.format == ALTERNATING_LR)) {
+        FFSWAP(int64_t, s->prev->pts, inpicref->pts);
+        ff_filter_frame(outlink, inpicref);
+        ret = ff_filter_frame(outlink, s->prev);
+        s->prev = NULL;
+        return ret;
+    } else {
+        out = oleft = oright = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out) {
+            av_frame_free(&s->prev);
+            av_frame_free(&inpicref);
+            return AVERROR(ENOMEM);
+        }
+        av_frame_copy_props(out, inpicref);
+
+        if (s->out.format == ALTERNATING_LR ||
+            s->out.format == ALTERNATING_RL) {
+            oright = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+            if (!oright) {
+                av_frame_free(&oleft);
+                av_frame_free(&s->prev);
+                av_frame_free(&inpicref);
+                return AVERROR(ENOMEM);
+            }
+            av_frame_copy_props(oright, s->prev);
+        }
     }
 
     for (i = 0; i < 4; i++) {
         int hsub = i == 1 || i == 2 ? s->hsub : 0;
         int vsub = i == 1 || i == 2 ? s->vsub : 0;
-        in_off_left[i]   = (FF_CEIL_RSHIFT(s->in.row_left,   vsub) + s->in.off_lstep)  * ileft->linesize[i]  + FF_CEIL_RSHIFT(s->in.off_left   * s->pixstep[i], hsub);
-        in_off_right[i]  = (FF_CEIL_RSHIFT(s->in.row_right,  vsub) + s->in.off_rstep)  * iright->linesize[i] + FF_CEIL_RSHIFT(s->in.off_right  * s->pixstep[i], hsub);
-        out_off_left[i]  = (FF_CEIL_RSHIFT(s->out.row_left,  vsub) + s->out.off_lstep) * oleft->linesize[i]  + FF_CEIL_RSHIFT(s->out.off_left  * s->pixstep[i], hsub);
-        out_off_right[i] = (FF_CEIL_RSHIFT(s->out.row_right, vsub) + s->out.off_rstep) * oright->linesize[i] + FF_CEIL_RSHIFT(s->out.off_right * s->pixstep[i], hsub);
+        s->in_off_left[i]   = (AV_CEIL_RSHIFT(s->in.row_left,   vsub) + s->in.off_lstep)  * ileft->linesize[i]  + AV_CEIL_RSHIFT(s->in.off_left   * s->pixstep[i], hsub);
+        s->in_off_right[i]  = (AV_CEIL_RSHIFT(s->in.row_right,  vsub) + s->in.off_rstep)  * iright->linesize[i] + AV_CEIL_RSHIFT(s->in.off_right  * s->pixstep[i], hsub);
+        out_off_left[i]  = (AV_CEIL_RSHIFT(s->out.row_left,  vsub) + s->out.off_lstep) * oleft->linesize[i]  + AV_CEIL_RSHIFT(s->out.off_left  * s->pixstep[i], hsub);
+        out_off_right[i] = (AV_CEIL_RSHIFT(s->out.row_right, vsub) + s->out.off_rstep) * oright->linesize[i] + AV_CEIL_RSHIFT(s->out.off_right * s->pixstep[i], hsub);
     }
 
     switch (s->out.format) {
     case ALTERNATING_LR:
     case ALTERNATING_RL:
+        switch (s->in.format) {
+        case INTERLEAVE_ROWS_LR:
+        case INTERLEAVE_ROWS_RL:
+            for (i = 0; i < s->nb_planes; i++) {
+                oleft->linesize[i]  *= 2;
+                oright->linesize[i] *= 2;
+            }
+        case ABOVE_BELOW_LR:
+        case ABOVE_BELOW_RL:
+        case ABOVE_BELOW_2_LR:
+        case ABOVE_BELOW_2_RL:
+        case SIDE_BY_SIDE_LR:
+        case SIDE_BY_SIDE_RL:
+        case SIDE_BY_SIDE_2_LR:
+        case SIDE_BY_SIDE_2_RL:
+            oleft->width   = outlink->w;
+            oright->width  = outlink->w;
+            oleft->height  = outlink->h;
+            oright->height = outlink->h;
+
+            for (i = 0; i < s->nb_planes; i++) {
+                oleft->data[i]  += s->in_off_left[i];
+                oright->data[i] += s->in_off_right[i];
+            }
+            break;
+        default:
+            goto copy;
+            break;
+        }
+        break;
+    case HDMI:
+        for (i = 0; i < s->nb_planes; i++) {
+            int j, h = s->height >> ((i == 1 || i == 2) ? s->vsub : 0);
+            int b = (s->blanks) >> ((i == 1 || i == 2) ? s->vsub : 0);
+
+            for (j = h; j < h + b; j++)
+                memset(oleft->data[i] + j * s->linesize[i], 0, s->linesize[i]);
+        }
     case SIDE_BY_SIDE_LR:
     case SIDE_BY_SIDE_RL:
     case SIDE_BY_SIDE_2_LR:
@@ -553,27 +838,70 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
     case ABOVE_BELOW_2_RL:
     case INTERLEAVE_ROWS_LR:
     case INTERLEAVE_ROWS_RL:
-        for (i = 0; i < s->nb_planes; i++) {
-            av_image_copy_plane(oleft->data[i] + out_off_left[i],
-                                oleft->linesize[i] * s->row_step,
-                                ileft->data[i] + in_off_left[i],
-                                ileft->linesize[i] * s->row_step,
-                                s->linesize[i], s->pheight[i]);
-            av_image_copy_plane(oright->data[i] + out_off_right[i],
-                                oright->linesize[i] * s->row_step,
-                                iright->data[i] + in_off_right[i],
-                                iright->linesize[i] * s->row_step,
-                                s->linesize[i], s->pheight[i]);
+copy:
+        if (s->in.format == INTERLEAVE_COLS_LR ||
+            s->in.format == INTERLEAVE_COLS_RL) {
+            for (i = 0; i < s->nb_planes; i++) {
+                int d = (s->in.format & 1) != (s->out.format & 1);
+
+                interleave_cols_to_any(s, out_off_left,  i, ileft,  oleft,   d);
+                interleave_cols_to_any(s, out_off_right, i, iright, oright, !d);
+            }
+        } else {
+            for (i = 0; i < s->nb_planes; i++) {
+                av_image_copy_plane(oleft->data[i] + out_off_left[i],
+                                    oleft->linesize[i] * s->out.row_step,
+                                    ileft->data[i] + s->in_off_left[i],
+                                    ileft->linesize[i] * s->in.row_step,
+                                    s->linesize[i], s->pheight[i]);
+                av_image_copy_plane(oright->data[i] + out_off_right[i],
+                                    oright->linesize[i] * s->out.row_step,
+                                    iright->data[i] + s->in_off_right[i],
+                                    iright->linesize[i] * s->in.row_step,
+                                    s->linesize[i], s->pheight[i]);
+            }
         }
         break;
     case MONO_L:
         iright = ileft;
     case MONO_R:
-        for (i = 0; i < s->nb_planes; i++) {
-            av_image_copy_plane(out->data[i], out->linesize[i],
-                                iright->data[i] + in_off_left[i],
-                                iright->linesize[i],
-                                s->linesize[i], s->pheight[i]);
+        switch (s->in.format) {
+        case INTERLEAVE_ROWS_LR:
+        case INTERLEAVE_ROWS_RL:
+            for (i = 0; i < s->nb_planes; i++) {
+                out->linesize[i] *= 2;
+            }
+        case ABOVE_BELOW_LR:
+        case ABOVE_BELOW_RL:
+        case ABOVE_BELOW_2_LR:
+        case ABOVE_BELOW_2_RL:
+        case SIDE_BY_SIDE_LR:
+        case SIDE_BY_SIDE_RL:
+        case SIDE_BY_SIDE_2_LR:
+        case SIDE_BY_SIDE_2_RL:
+            out->width  = outlink->w;
+            out->height = outlink->h;
+
+            for (i = 0; i < s->nb_planes; i++) {
+                out->data[i] += s->in_off_left[i];
+            }
+            break;
+        case INTERLEAVE_COLS_LR:
+        case INTERLEAVE_COLS_RL:
+            for (i = 0; i < s->nb_planes; i++) {
+                const int d = (s->in.format & 1) != (s->out.format & 1);
+
+                interleave_cols_to_any(s, out_off_right, i, iright, out, d);
+            }
+            break;
+        default:
+            for (i = 0; i < s->nb_planes; i++) {
+                av_image_copy_plane(out->data[i], out->linesize[i],
+                                    iright->data[i] + s->in_off_left[i],
+                                    iright->linesize[i] * s->in.row_step,
+                                    s->linesize[i], s->pheight[i]);
+            }
+            break;
         }
         break;
     case ANAGLYPH_RB_GRAY:
@@ -590,41 +918,162 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
     case ANAGLYPH_YB_HALF:
     case ANAGLYPH_YB_COLOR:
     case ANAGLYPH_YB_DUBOIS: {
-        int x, y, il, ir, o;
-        const uint8_t *lsrc = ileft->data[0];
-        const uint8_t *rsrc = iright->data[0];
-        uint8_t *dst = out->data[0];
-        int out_width = s->out.width;
-        const int **ana_matrix = s->ana_matrix;
-
-        for (y = 0; y < s->out.height; y++) {
-            o   = out->linesize[0] * y;
-            il  = in_off_left[0]  + y * ileft->linesize[0];
-            ir  = in_off_right[0] + y * iright->linesize[0];
-            for (x = 0; x < out_width; x++, il += 3, ir += 3, o+= 3) {
-                dst[o    ] = ana_convert(ana_matrix[0], lsrc + il, rsrc + ir);
-                dst[o + 1] = ana_convert(ana_matrix[1], lsrc + il, rsrc + ir);
-                dst[o + 2] = ana_convert(ana_matrix[2], lsrc + il, rsrc + ir);
-            }
+        if (s->in.format == INTERLEAVE_COLS_LR ||
+            s->in.format == INTERLEAVE_COLS_RL) {
+            const int d = (s->in.format & 1);
+
+            anaglyph_ic(out->data[0],
+                ileft ->data[0] + s->in_off_left [0] +   d  * 3,
+                iright->data[0] + s->in_off_right[0] + (!d) * 3,
+                out->linesize[0],
+                ileft->linesize[0] * s->in.row_step,
+                iright->linesize[0] * s->in.row_step,
+                s->out.width, s->out.height,
+                s->ana_matrix[0], s->ana_matrix[1], s->ana_matrix[2]);
+        } else {
+            ThreadData td;
+
+            td.ileft = ileft; td.iright = iright; td.out = out;
+            ctx->internal->execute(ctx, filter_slice, &td, NULL,
+                                   FFMIN(s->out.height, ctx->graph->nb_threads));
         }
         break;
     }
+    case CHECKERBOARD_RL:
+    case CHECKERBOARD_LR:
+        for (i = 0; i < s->nb_planes; i++) {
+            int x, y;
+
+            for (y = 0; y < s->pheight[i]; y++) {
+                uint8_t *dst = out->data[i] + out->linesize[i] * y;
+                const int d1 = (s->in.format == INTERLEAVE_COLS_LR || s->in.format == INTERLEAVE_COLS_RL) && (s->in.format & 1) != (s->out.format & 1);
+                const int d2 = (s->in.format == INTERLEAVE_COLS_LR || s->in.format == INTERLEAVE_COLS_RL) ? !d1 : 0;
+                const int m = 1 + (s->in.format == INTERLEAVE_COLS_LR || s->in.format == INTERLEAVE_COLS_RL);
+                uint8_t *left  = ileft->data[i]  + ileft->linesize[i]  * y + s->in_off_left[i]  + d1 * s->pixstep[i];
+                uint8_t *right = iright->data[i] + iright->linesize[i] * y + s->in_off_right[i] + d2 * s->pixstep[i];
+                int p, b;
+
+                if (s->out.format == CHECKERBOARD_RL && s->in.format != INTERLEAVE_COLS_LR && s->in.format != INTERLEAVE_COLS_RL)
+                    FFSWAP(uint8_t*, left, right);
+                switch (s->pixstep[i]) {
+                case 1:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=2, p++, b+=2) {
+                        dst[x  ] = (b&1) == (y&1) ? left[p*m] : right[p*m];
+                        dst[x+1] = (b&1) != (y&1) ? left[p*m] : right[p*m];
+                    }
+                    break;
+                case 2:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=4, p+=2, b+=2) {
+                        AV_WN16(&dst[x  ], (b&1) == (y&1) ? AV_RN16(&left[p*m]) : AV_RN16(&right[p*m]));
+                        AV_WN16(&dst[x+2], (b&1) != (y&1) ? AV_RN16(&left[p*m]) : AV_RN16(&right[p*m]));
+                    }
+                    break;
+                case 3:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=6, p+=3, b+=2) {
+                        AV_WB24(&dst[x  ], (b&1) == (y&1) ? AV_RB24(&left[p*m]) : AV_RB24(&right[p*m]));
+                        AV_WB24(&dst[x+3], (b&1) != (y&1) ? AV_RB24(&left[p*m]) : AV_RB24(&right[p*m]));
+                    }
+                    break;
+                case 4:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=8, p+=4, b+=2) {
+                        AV_WN32(&dst[x  ], (b&1) == (y&1) ? AV_RN32(&left[p*m]) : AV_RN32(&right[p*m]));
+                        AV_WN32(&dst[x+4], (b&1) != (y&1) ? AV_RN32(&left[p*m]) : AV_RN32(&right[p*m]));
+                    }
+                    break;
+                case 6:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=12, p+=6, b+=2) {
+                        AV_WB48(&dst[x  ], (b&1) == (y&1) ? AV_RB48(&left[p*m]) : AV_RB48(&right[p*m]));
+                        AV_WB48(&dst[x+6], (b&1) != (y&1) ? AV_RB48(&left[p*m]) : AV_RB48(&right[p*m]));
+                    }
+                    break;
+                case 8:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=16, p+=8, b+=2) {
+                        AV_WN64(&dst[x  ], (b&1) == (y&1) ? AV_RN64(&left[p*m]) : AV_RN64(&right[p*m]));
+                        AV_WN64(&dst[x+8], (b&1) != (y&1) ? AV_RN64(&left[p*m]) : AV_RN64(&right[p*m]));
+                    }
+                    break;
+                }
+            }
+        }
+        break;
+    case INTERLEAVE_COLS_LR:
+    case INTERLEAVE_COLS_RL:
+        for (i = 0; i < s->nb_planes; i++) {
+            const int d = (s->in.format == INTERLEAVE_COLS_LR || s->in.format == INTERLEAVE_COLS_RL);
+            const int m = 1 + d;
+            int x, y;
+
+            for (y = 0; y < s->pheight[i]; y++) {
+                uint8_t *dst = out->data[i] + out->linesize[i] * y;
+                uint8_t *left = ileft->data[i] + ileft->linesize[i] * y * s->in.row_step + s->in_off_left[i] + d * s->pixstep[i];
+                uint8_t *right = iright->data[i] + iright->linesize[i] * y * s->in.row_step + s->in_off_right[i];
+                int p, b;
+
+                if (s->out.format == INTERLEAVE_COLS_LR)
+                    FFSWAP(uint8_t*, left, right);
+
+                switch (s->pixstep[i]) {
+                case 1:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=2, p++, b+=2) {
+                        dst[x  ] =   b&1  ? left[p*m] : right[p*m];
+                        dst[x+1] = !(b&1) ? left[p*m] : right[p*m];
+                    }
+                    break;
+                case 2:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=4, p+=2, b+=2) {
+                        AV_WN16(&dst[x  ],   b&1  ? AV_RN16(&left[p*m]) : AV_RN16(&right[p*m]));
+                        AV_WN16(&dst[x+2], !(b&1) ? AV_RN16(&left[p*m]) : AV_RN16(&right[p*m]));
+                    }
+                    break;
+                case 3:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=6, p+=3, b+=2) {
+                        AV_WB24(&dst[x  ],   b&1  ? AV_RB24(&left[p*m]) : AV_RB24(&right[p*m]));
+                        AV_WB24(&dst[x+3], !(b&1) ? AV_RB24(&left[p*m]) : AV_RB24(&right[p*m]));
+                    }
+                    break;
+                case 4:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=8, p+=4, b+=2) {
+                        AV_WN32(&dst[x  ],   b&1  ? AV_RN32(&left[p*m]) : AV_RN32(&right[p*m]));
+                        AV_WN32(&dst[x+4], !(b&1) ? AV_RN32(&left[p*m]) : AV_RN32(&right[p*m]));
+                    }
+                    break;
+                case 6:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=12, p+=6, b+=2) {
+                        AV_WB48(&dst[x  ],   b&1  ? AV_RB48(&left[p*m]) : AV_RB48(&right[p*m]));
+                        AV_WB48(&dst[x+6], !(b&1) ? AV_RB48(&left[p*m]) : AV_RB48(&right[p*m]));
+                    }
+                    break;
+                case 8:
+                    for (x = 0, b = 0, p = 0; x < s->linesize[i] * 2; x+=16, p+=8, b+=2) {
+                        AV_WN64(&dst[x  ],   b&1 ?  AV_RN64(&left[p*m]) : AV_RN64(&right[p*m]));
+                        AV_WN64(&dst[x+8], !(b&1) ? AV_RN64(&left[p*m]) : AV_RN64(&right[p*m]));
+                    }
+                    break;
+                }
+            }
+        }
+        break;
     default:
         av_assert0(0);
     }
 
-    av_frame_free(&inpicref);
-    av_frame_free(&s->prev);
     if (oright != oleft) {
         if (s->out.format == ALTERNATING_LR)
             FFSWAP(AVFrame *, oleft, oright);
-        oright->pts = outlink->frame_count * s->ts_unit;
+        oright->pts = s->prev->pts * 2;
         ff_filter_frame(outlink, oright);
         out = oleft;
-        oleft->pts = outlink->frame_count * s->ts_unit;
+        oleft->pts = s->prev->pts + inpicref->pts;
+        av_frame_free(&s->prev);
+        s->prev = inpicref;
     } else if (s->in.format == ALTERNATING_LR ||
                s->in.format == ALTERNATING_RL) {
-        out->pts = outlink->frame_count * s->ts_unit;
+        out->pts = s->prev->pts / 2;
+        av_frame_free(&s->prev);
+        av_frame_free(&inpicref);
+    } else {
+        av_frame_free(&s->prev);
+        av_frame_free(&inpicref);
     }
     return ff_filter_frame(outlink, out);
 }
@@ -663,4 +1112,5 @@ AVFilter ff_vf_stereo3d = {
     .inputs        = stereo3d_inputs,
     .outputs       = stereo3d_outputs,
     .priv_class    = &stereo3d_class,
+    .flags         = AVFILTER_FLAG_SLICE_THREADS,
 };
diff --git a/libavfilter/vf_subtitles.c b/libavfilter/vf_subtitles.c
index 5e1324c7..63b22c37 100644
--- a/libavfilter/vf_subtitles.c
+++ b/libavfilter/vf_subtitles.c
@@ -50,6 +50,7 @@ typedef struct {
     ASS_Renderer *renderer;
     ASS_Track    *track;
     char *filename;
+    char *fontsdir;
     char *charenc;
     char *force_style;
     int stream_index;
@@ -67,6 +68,7 @@ typedef struct {
     {"filename",       "set the filename of file to read",                         OFFSET(filename),   AV_OPT_TYPE_STRING,     {.str = NULL},  CHAR_MIN, CHAR_MAX, FLAGS }, \
     {"f",              "set the filename of file to read",                         OFFSET(filename),   AV_OPT_TYPE_STRING,     {.str = NULL},  CHAR_MIN, CHAR_MAX, FLAGS }, \
     {"original_size",  "set the size of the original video (used to scale fonts)", OFFSET(original_w), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL},  CHAR_MIN, CHAR_MAX, FLAGS }, \
+    {"fontsdir",       "set the directory containing the fonts to read",           OFFSET(fontsdir),   AV_OPT_TYPE_STRING,     {.str = NULL},  CHAR_MIN, CHAR_MAX, FLAGS }, \
 
 /* libass supports a log level ranging from 0 to 7 */
 static const int ass_libavfilter_log_level_map[] = {
@@ -106,6 +108,8 @@ static av_cold int init(AVFilterContext *ctx)
     }
     ass_set_message_cb(ass->library, ass_log, ctx);
 
+    ass_set_fonts_dir(ass->library, ass->fontsdir);
+
     ass->renderer = ass_renderer_init(ass->library);
     if (!ass->renderer) {
         av_log(ctx, AV_LOG_ERROR, "Could not initialize libass renderer.\n");
@@ -440,7 +444,7 @@ static av_cold int init_subtitles(AVFilterContext *ctx)
                 }
             }
         }
-        av_free_packet(&pkt);
+        av_packet_unref(&pkt);
         avsubtitle_free(&sub);
     }
 
diff --git a/libavfilter/vf_super2xsai.c b/libavfilter/vf_super2xsai.c
index 67e9f97e..cbb3f624 100644
--- a/libavfilter/vf_super2xsai.c
+++ b/libavfilter/vf_super2xsai.c
@@ -58,15 +58,15 @@ static void super2xsai(AVFilterContext *ctx,
                        uint8_t *dst, int dst_linesize,
                        int width, int height)
 {
-    Super2xSaIContext *sai = ctx->priv;
+    Super2xSaIContext *s = ctx->priv;
     unsigned int x, y;
     uint32_t color[4][4];
     unsigned char *src_line[4];
-    const int bpp = sai->bpp;
-    const uint32_t hi_pixel_mask = sai->hi_pixel_mask;
-    const uint32_t lo_pixel_mask = sai->lo_pixel_mask;
-    const uint32_t q_hi_pixel_mask = sai->q_hi_pixel_mask;
-    const uint32_t q_lo_pixel_mask = sai->q_lo_pixel_mask;
+    const int bpp = s->bpp;
+    const uint32_t hi_pixel_mask = s->hi_pixel_mask;
+    const uint32_t lo_pixel_mask = s->lo_pixel_mask;
+    const uint32_t q_hi_pixel_mask = s->q_hi_pixel_mask;
+    const uint32_t q_lo_pixel_mask = s->q_lo_pixel_mask;
 
     /* Point to the first 4 lines, first line is duplicated */
     src_line[0] = src;
@@ -76,7 +76,7 @@ static void super2xsai(AVFilterContext *ctx,
 
 #define READ_COLOR4(dst, src_line, off) dst = *((const uint32_t *)src_line + off)
 #define READ_COLOR3(dst, src_line, off) dst = AV_RL24 (src_line + 3*off)
-#define READ_COLOR2(dst, src_line, off) dst = sai->is_be ? AV_RB16(src_line + 2 * off) : AV_RL16(src_line + 2 * off)
+#define READ_COLOR2(dst, src_line, off) dst = s->is_be ? AV_RB16(src_line + 2 * off) : AV_RL16(src_line + 2 * off)
 
     for (y = 0; y < height; y++) {
         uint8_t *dst_line[2];
@@ -179,7 +179,7 @@ static void super2xsai(AVFilterContext *ctx,
                 AV_WL24(dst_line[1] + x * 6 + 3, product2b);
                 break;
             default: // bpp = 2
-                if (sai->is_be) {
+                if (s->is_be) {
                     AV_WB32(dst_line[0] + x * 4, product1a | (product1b << 16));
                     AV_WB32(dst_line[1] + x * 4, product2a | (product2b << 16));
                 } else {
@@ -249,42 +249,42 @@ static int query_formats(AVFilterContext *ctx)
 
 static int config_input(AVFilterLink *inlink)
 {
-    Super2xSaIContext *sai = inlink->dst->priv;
+    Super2xSaIContext *s = inlink->dst->priv;
 
-    sai->hi_pixel_mask   = 0xFEFEFEFE;
-    sai->lo_pixel_mask   = 0x01010101;
-    sai->q_hi_pixel_mask = 0xFCFCFCFC;
-    sai->q_lo_pixel_mask = 0x03030303;
-    sai->bpp  = 4;
+    s->hi_pixel_mask   = 0xFEFEFEFE;
+    s->lo_pixel_mask   = 0x01010101;
+    s->q_hi_pixel_mask = 0xFCFCFCFC;
+    s->q_lo_pixel_mask = 0x03030303;
+    s->bpp  = 4;
 
     switch (inlink->format) {
     case AV_PIX_FMT_RGB24:
     case AV_PIX_FMT_BGR24:
-        sai->bpp = 3;
+        s->bpp = 3;
         break;
 
     case AV_PIX_FMT_RGB565BE:
     case AV_PIX_FMT_BGR565BE:
-        sai->is_be = 1;
+        s->is_be = 1;
     case AV_PIX_FMT_RGB565LE:
     case AV_PIX_FMT_BGR565LE:
-        sai->hi_pixel_mask   = 0xF7DEF7DE;
-        sai->lo_pixel_mask   = 0x08210821;
-        sai->q_hi_pixel_mask = 0xE79CE79C;
-        sai->q_lo_pixel_mask = 0x18631863;
-        sai->bpp = 2;
+        s->hi_pixel_mask   = 0xF7DEF7DE;
+        s->lo_pixel_mask   = 0x08210821;
+        s->q_hi_pixel_mask = 0xE79CE79C;
+        s->q_lo_pixel_mask = 0x18631863;
+        s->bpp = 2;
         break;
 
     case AV_PIX_FMT_BGR555BE:
     case AV_PIX_FMT_RGB555BE:
-        sai->is_be = 1;
+        s->is_be = 1;
     case AV_PIX_FMT_BGR555LE:
     case AV_PIX_FMT_RGB555LE:
-        sai->hi_pixel_mask   = 0x7BDE7BDE;
-        sai->lo_pixel_mask   = 0x04210421;
-        sai->q_hi_pixel_mask = 0x739C739C;
-        sai->q_lo_pixel_mask = 0x0C630C63;
-        sai->bpp = 2;
+        s->hi_pixel_mask   = 0x7BDE7BDE;
+        s->lo_pixel_mask   = 0x04210421;
+        s->q_hi_pixel_mask = 0x739C739C;
+        s->q_lo_pixel_mask = 0x0C630C63;
+        s->bpp = 2;
         break;
     }
 
diff --git a/libavfilter/vf_swaprect.c b/libavfilter/vf_swaprect.c
new file mode 100644
index 00000000..a4676273
--- /dev/null
+++ b/libavfilter/vf_swaprect.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2015 Paul B. Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/eval.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+typedef struct SwapRectContext {
+    const AVClass *class;
+    char *w, *h;
+    char *x1, *y1;
+    char *x2, *y2;
+
+    int nb_planes;
+    int pixsteps[4];
+
+    const AVPixFmtDescriptor *desc;
+    uint8_t *temp;
+} SwapRectContext;
+
+#define OFFSET(x) offsetof(SwapRectContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption swaprect_options[] = {
+    { "w",  "set rect width",                     OFFSET(w),  AV_OPT_TYPE_STRING, {.str="w/2"}, 0, 0, .flags = FLAGS },
+    { "h",  "set rect height",                    OFFSET(h),  AV_OPT_TYPE_STRING, {.str="h/2"}, 0, 0, .flags = FLAGS },
+    { "x1", "set 1st rect x top left coordinate", OFFSET(x1), AV_OPT_TYPE_STRING, {.str="w/2"}, 0, 0, .flags = FLAGS },
+    { "y1", "set 1st rect y top left coordinate", OFFSET(y1), AV_OPT_TYPE_STRING, {.str="h/2"}, 0, 0, .flags = FLAGS },
+    { "x2", "set 2nd rect x top left coordinate", OFFSET(x2), AV_OPT_TYPE_STRING, {.str="0"},   0, 0, .flags = FLAGS },
+    { "y2", "set 2nd rect y top left coordinate", OFFSET(y2), AV_OPT_TYPE_STRING, {.str="0"},   0, 0, .flags = FLAGS },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(swaprect);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *pix_fmts = NULL;
+    int fmt, ret;
+
+    for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
+        if (!(desc->flags & AV_PIX_FMT_FLAG_PAL ||
+              desc->flags & AV_PIX_FMT_FLAG_HWACCEL ||
+              desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) &&
+            (ret = ff_add_format(&pix_fmts, fmt)) < 0)
+            return ret;
+    }
+
+    return ff_set_common_formats(ctx, pix_fmts);
+}
+
+static const char *const var_names[] = {   "w",   "h",   "a",   "n",   "t",   "pos",   "sar",   "dar",        NULL };
+enum                                   { VAR_W, VAR_H, VAR_A, VAR_N, VAR_T, VAR_POS, VAR_SAR, VAR_DAR, VAR_VARS_NB };
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    SwapRectContext *s = ctx->priv;
+    double var_values[VAR_VARS_NB];
+    int x1[4], y1[4];
+    int x2[4], y2[4];
+    int aw[4], ah[4];
+    int lw[4], lh[4];
+    int pw[4], ph[4];
+    double dw,  dh;
+    double dx1, dy1;
+    double dx2, dy2;
+    int y, p, w, h, ret;
+
+    var_values[VAR_W]   = inlink->w;
+    var_values[VAR_H]   = inlink->h;
+    var_values[VAR_A]   = (float) inlink->w / inlink->h;
+    var_values[VAR_SAR] = inlink->sample_aspect_ratio.num ? av_q2d(inlink->sample_aspect_ratio) : 1;
+    var_values[VAR_DAR] = var_values[VAR_A] * var_values[VAR_SAR];
+    var_values[VAR_N]   = inlink->frame_count;
+    var_values[VAR_T]   = in->pts == AV_NOPTS_VALUE ? NAN : in->pts * av_q2d(inlink->time_base);
+    var_values[VAR_POS] = av_frame_get_pkt_pos(in) == -1 ? NAN : av_frame_get_pkt_pos(in);
+
+    ret = av_expr_parse_and_eval(&dw, s->w,
+                                 var_names, &var_values[0],
+                                 NULL, NULL, NULL, NULL,
+                                 0, 0, ctx);
+    if (ret < 0)
+        return ret;
+
+    ret = av_expr_parse_and_eval(&dh, s->h,
+                                 var_names, &var_values[0],
+                                 NULL, NULL, NULL, NULL,
+                                 0, 0, ctx);
+    if (ret < 0)
+        return ret;
+
+    ret = av_expr_parse_and_eval(&dx1, s->x1,
+                                 var_names, &var_values[0],
+                                 NULL, NULL, NULL, NULL,
+                                 0, 0, ctx);
+    if (ret < 0)
+        return ret;
+
+    ret = av_expr_parse_and_eval(&dy1, s->y1,
+                                 var_names, &var_values[0],
+                                 NULL, NULL, NULL, NULL,
+                                 0, 0, ctx);
+    if (ret < 0)
+        return ret;
+
+    ret = av_expr_parse_and_eval(&dx2, s->x2,
+                                 var_names, &var_values[0],
+                                 NULL, NULL, NULL, NULL,
+                                 0, 0, ctx);
+    if (ret < 0)
+        return ret;
+
+    ret = av_expr_parse_and_eval(&dy2, s->y2,
+                                 var_names, &var_values[0],
+                                 NULL, NULL, NULL, NULL,
+                                 0, 0, ctx);
+    if (ret < 0)
+        return ret;
+
+    w = dw; h = dh; x1[0] = dx1; y1[0] = dy1; x2[0] = dx2; y2[0] = dy2;
+
+    x1[0] = av_clip(x1[0], 0, inlink->w - 1);
+    y1[0] = av_clip(y1[0], 0, inlink->w - 1);
+
+    x2[0] = av_clip(x2[0], 0, inlink->w - 1);
+    y2[0] = av_clip(y2[0], 0, inlink->w - 1);
+
+    ah[1] = ah[2] = FF_CEIL_RSHIFT(h, s->desc->log2_chroma_h);
+    ah[0] = ah[3] = h;
+    aw[1] = aw[2] = FF_CEIL_RSHIFT(w, s->desc->log2_chroma_w);
+    aw[0] = aw[3] = w;
+
+    w = FFMIN3(w, inlink->w - x1[0], inlink->w - x2[0]);
+    h = FFMIN3(h, inlink->h - y1[0], inlink->h - y2[0]);
+
+    ph[1] = ph[2] = FF_CEIL_RSHIFT(h, s->desc->log2_chroma_h);
+    ph[0] = ph[3] = h;
+    pw[1] = pw[2] = FF_CEIL_RSHIFT(w, s->desc->log2_chroma_w);
+    pw[0] = pw[3] = w;
+
+    lh[1] = lh[2] = FF_CEIL_RSHIFT(inlink->h, s->desc->log2_chroma_h);
+    lh[0] = lh[3] = inlink->h;
+    lw[1] = lw[2] = FF_CEIL_RSHIFT(inlink->w, s->desc->log2_chroma_w);
+    lw[0] = lw[3] = inlink->w;
+
+    x1[1] = x1[2] = FF_CEIL_RSHIFT(x1[0], s->desc->log2_chroma_w);
+    x1[0] = x1[3] = x1[0];
+    y1[1] = y1[2] = FF_CEIL_RSHIFT(y1[0], s->desc->log2_chroma_h);
+    y1[0] = y1[3] = y1[0];
+
+    x2[1] = x2[2] = FF_CEIL_RSHIFT(x2[0], s->desc->log2_chroma_w);
+    x2[0] = x2[3] = x2[0];
+    y2[1] = y2[2] = FF_CEIL_RSHIFT(y2[0], s->desc->log2_chroma_h);
+    y2[0] = y2[3] = y2[0];
+
+    for (p = 0; p < s->nb_planes; p++) {
+        if (ph[p] == ah[p] && pw[p] == aw[p]) {
+            uint8_t *src = in->data[p] + y1[p] * in->linesize[p] + x1[p] * s->pixsteps[p];
+            uint8_t *dst = in->data[p] + y2[p] * in->linesize[p] + x2[p] * s->pixsteps[p];
+
+            for (y = 0; y < ph[p]; y++) {
+                memcpy(s->temp, src, pw[p] * s->pixsteps[p]);
+                memmove(src, dst, pw[p] * s->pixsteps[p]);
+                memcpy(dst, s->temp, pw[p] * s->pixsteps[p]);
+                src += in->linesize[p];
+                dst += in->linesize[p];
+            }
+        }
+    }
+
+    return ff_filter_frame(outlink, in);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    SwapRectContext *s = ctx->priv;
+
+    if (!s->w  || !s->h  ||
+        !s->x1 || !s->y1 ||
+        !s->x2 || !s->y2)
+        return AVERROR(EINVAL);
+
+    s->desc = av_pix_fmt_desc_get(inlink->format);
+    av_image_fill_max_pixsteps(s->pixsteps, NULL, s->desc);
+    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
+
+    s->temp = av_malloc_array(inlink->w, s->pixsteps[0]);
+    if (!s->temp)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    SwapRectContext *s = ctx->priv;
+    av_freep(&s->temp);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+        .filter_frame   = filter_frame,
+        .config_props   = config_input,
+        .needs_writable = 1,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_swaprect = {
+    .name          = "swaprect",
+    .description   = NULL_IF_CONFIG_SMALL("Swap 2 rectangular objects in video."),
+    .priv_size     = sizeof(SwapRectContext),
+    .priv_class    = &swaprect_class,
+    .query_formats = query_formats,
+    .uninit        = uninit,
+    .inputs        = inputs,
+    .outputs       = outputs,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+};
diff --git a/libavfilter/vf_swapuv.c b/libavfilter/vf_swapuv.c
index 632e31c7..1a82ef3c 100644
--- a/libavfilter/vf_swapuv.c
+++ b/libavfilter/vf_swapuv.c
@@ -24,6 +24,7 @@
  */
 
 #include "libavutil/pixdesc.h"
+#include "libavutil/version.h"
 #include "avfilter.h"
 #include "formats.h"
 #include "internal.h"
@@ -33,8 +34,13 @@ static void do_swap(AVFrame *frame)
 {
     FFSWAP(uint8_t*,     frame->data[1],     frame->data[2]);
     FFSWAP(int,          frame->linesize[1], frame->linesize[2]);
-    FFSWAP(uint64_t,     frame->error[1],    frame->error[2]);
     FFSWAP(AVBufferRef*, frame->buf[1],      frame->buf[2]);
+
+#if FF_API_ERROR_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    FFSWAP(uint64_t,     frame->error[1],    frame->error[2]);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 }
 
 static AVFrame *get_video_buffer(AVFilterLink *link, int w, int h)
@@ -56,10 +62,10 @@ static int is_planar_yuv(const AVPixFmtDescriptor *desc)
 
     if (desc->flags & ~(AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA) ||
         desc->nb_components < 3 ||
-        (desc->comp[1].depth_minus1 != desc->comp[2].depth_minus1))
+        (desc->comp[1].depth != desc->comp[2].depth))
         return 0;
     for (i = 0; i < desc->nb_components; i++) {
-        if (desc->comp[i].offset_plus1 != 1 ||
+        if (desc->comp[i].offset != 0 ||
             desc->comp[i].shift != 0 ||
             desc->comp[i].plane != i)
             return 0;
@@ -71,12 +77,12 @@ static int is_planar_yuv(const AVPixFmtDescriptor *desc)
 static int query_formats(AVFilterContext *ctx)
 {
     AVFilterFormats *formats = NULL;
-    int fmt;
+    int fmt, ret;
 
     for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
-        if (is_planar_yuv(desc))
-            ff_add_format(&formats, fmt);
+        if (is_planar_yuv(desc) && (ret = ff_add_format(&formats, fmt)) < 0)
+            return ret;
     }
 
     return ff_set_common_formats(ctx, formats);
diff --git a/libavfilter/vf_telecine.c b/libavfilter/vf_telecine.c
index 26f0ef8a..182c6708 100644
--- a/libavfilter/vf_telecine.c
+++ b/libavfilter/vf_telecine.c
@@ -102,14 +102,15 @@ static av_cold int init(AVFilterContext *ctx)
 static int query_formats(AVFilterContext *ctx)
 {
     AVFilterFormats *pix_fmts = NULL;
-    int fmt;
+    int fmt, ret;
 
     for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
         if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL ||
               desc->flags & AV_PIX_FMT_FLAG_PAL     ||
-              desc->flags & AV_PIX_FMT_FLAG_BITSTREAM))
-            ff_add_format(&pix_fmts, fmt);
+              desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) &&
+            (ret = ff_add_format(&pix_fmts, fmt)) < 0)
+            return ret;
     }
 
     return ff_set_common_formats(ctx, pix_fmts);
@@ -133,7 +134,7 @@ static int config_input(AVFilterLink *inlink)
     if ((ret = av_image_fill_linesizes(s->stride, inlink->format, inlink->w)) < 0)
         return ret;
 
-    s->planeheight[1] = s->planeheight[2] = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
     s->planeheight[0] = s->planeheight[3] = inlink->h;
 
     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
@@ -157,7 +158,6 @@ static int config_output(AVFilterLink *outlink)
     av_log(ctx, AV_LOG_VERBOSE, "FPS: %d/%d -> %d/%d\n",
            inlink->frame_rate.num, inlink->frame_rate.den, fps.num, fps.den);
 
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
     outlink->frame_rate = fps;
     outlink->time_base = av_mul_q(inlink->time_base, s->pts);
     av_log(ctx, AV_LOG_VERBOSE, "TB: %d/%d -> %d/%d\n",
diff --git a/libavfilter/vf_thumbnail.c b/libavfilter/vf_thumbnail.c
index d70d0635..417ccd56 100644
--- a/libavfilter/vf_thumbnail.c
+++ b/libavfilter/vf_thumbnail.c
@@ -58,15 +58,15 @@ AVFILTER_DEFINE_CLASS(thumbnail);
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    ThumbContext *thumb = ctx->priv;
+    ThumbContext *s = ctx->priv;
 
-    thumb->frames = av_calloc(thumb->n_frames, sizeof(*thumb->frames));
-    if (!thumb->frames) {
+    s->frames = av_calloc(s->n_frames, sizeof(*s->frames));
+    if (!s->frames) {
         av_log(ctx, AV_LOG_ERROR,
                "Allocation failure, try to lower the number of frames\n");
         return AVERROR(ENOMEM);
     }
-    av_log(ctx, AV_LOG_VERBOSE, "batch size: %d frames\n", thumb->n_frames);
+    av_log(ctx, AV_LOG_VERBOSE, "batch size: %d frames\n", s->n_frames);
     return 0;
 }
 
@@ -91,39 +91,39 @@ static double frame_sum_square_err(const int *hist, const double *median)
 static AVFrame *get_best_frame(AVFilterContext *ctx)
 {
     AVFrame *picref;
-    ThumbContext *thumb = ctx->priv;
+    ThumbContext *s = ctx->priv;
     int i, j, best_frame_idx = 0;
-    int nb_frames = thumb->n;
+    int nb_frames = s->n;
     double avg_hist[HIST_SIZE] = {0}, sq_err, min_sq_err = -1;
 
     // average histogram of the N frames
     for (j = 0; j < FF_ARRAY_ELEMS(avg_hist); j++) {
         for (i = 0; i < nb_frames; i++)
-            avg_hist[j] += (double)thumb->frames[i].histogram[j];
+            avg_hist[j] += (double)s->frames[i].histogram[j];
         avg_hist[j] /= nb_frames;
     }
 
     // find the frame closer to the average using the sum of squared errors
     for (i = 0; i < nb_frames; i++) {
-        sq_err = frame_sum_square_err(thumb->frames[i].histogram, avg_hist);
+        sq_err = frame_sum_square_err(s->frames[i].histogram, avg_hist);
         if (i == 0 || sq_err < min_sq_err)
             best_frame_idx = i, min_sq_err = sq_err;
     }
 
     // free and reset everything (except the best frame buffer)
     for (i = 0; i < nb_frames; i++) {
-        memset(thumb->frames[i].histogram, 0, sizeof(thumb->frames[i].histogram));
+        memset(s->frames[i].histogram, 0, sizeof(s->frames[i].histogram));
         if (i != best_frame_idx)
-            av_frame_free(&thumb->frames[i].buf);
+            av_frame_free(&s->frames[i].buf);
     }
-    thumb->n = 0;
+    s->n = 0;
 
     // raise the chosen one
-    picref = thumb->frames[best_frame_idx].buf;
+    picref = s->frames[best_frame_idx].buf;
     av_log(ctx, AV_LOG_INFO, "frame id #%d (pts_time=%f) selected "
            "from a set of %d images\n", best_frame_idx,
-           picref->pts * av_q2d(thumb->tb), nb_frames);
-    thumb->frames[best_frame_idx].buf = NULL;
+           picref->pts * av_q2d(s->tb), nb_frames);
+    s->frames[best_frame_idx].buf = NULL;
 
     return picref;
 }
@@ -132,13 +132,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 {
     int i, j;
     AVFilterContext *ctx  = inlink->dst;
-    ThumbContext *thumb   = ctx->priv;
+    ThumbContext *s   = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
-    int *hist = thumb->frames[thumb->n].histogram;
+    int *hist = s->frames[s->n].histogram;
     const uint8_t *p = frame->data[0];
 
     // keep a reference of each frame
-    thumb->frames[thumb->n].buf = frame;
+    s->frames[s->n].buf = frame;
 
     // update current frame RGB histogram
     for (j = 0; j < inlink->h; j++) {
@@ -151,8 +151,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
     }
 
     // no selection until the buffer of N frames is filled up
-    thumb->n++;
-    if (thumb->n < thumb->n_frames)
+    s->n++;
+    if (s->n < s->n_frames)
         return 0;
 
     return ff_filter_frame(outlink, get_best_frame(ctx));
@@ -161,39 +161,35 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
 static av_cold void uninit(AVFilterContext *ctx)
 {
     int i;
-    ThumbContext *thumb = ctx->priv;
-    for (i = 0; i < thumb->n_frames && thumb->frames[i].buf; i++)
-        av_frame_free(&thumb->frames[i].buf);
-    av_freep(&thumb->frames);
+    ThumbContext *s = ctx->priv;
+    for (i = 0; i < s->n_frames && s->frames[i].buf; i++)
+        av_frame_free(&s->frames[i].buf);
+    av_freep(&s->frames);
 }
 
 static int request_frame(AVFilterLink *link)
 {
     AVFilterContext *ctx = link->src;
-    ThumbContext *thumb = ctx->priv;
-
-    /* loop until a frame thumbnail is available (when a frame is queued,
-     * thumb->n is reset to zero) */
-    do {
-        int ret = ff_request_frame(ctx->inputs[0]);
-        if (ret == AVERROR_EOF && thumb->n) {
-            ret = ff_filter_frame(link, get_best_frame(ctx));
-            if (ret < 0)
-                return ret;
-            ret = AVERROR_EOF;
-        }
+    ThumbContext *s = ctx->priv;
+    int ret = ff_request_frame(ctx->inputs[0]);
+
+    if (ret == AVERROR_EOF && s->n) {
+        ret = ff_filter_frame(link, get_best_frame(ctx));
         if (ret < 0)
             return ret;
-    } while (thumb->n);
+        ret = AVERROR_EOF;
+    }
+    if (ret < 0)
+        return ret;
     return 0;
 }
 
 static int config_props(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
-    ThumbContext *thumb = ctx->priv;
+    ThumbContext *s = ctx->priv;
 
-    thumb->tb = inlink->time_base;
+    s->tb = inlink->time_base;
     return 0;
 }
 
diff --git a/libavfilter/vf_tile.c b/libavfilter/vf_tile.c
index 47569771..9af00bd8 100644
--- a/libavfilter/vf_tile.c
+++ b/libavfilter/vf_tile.c
@@ -116,8 +116,6 @@ static int config_props(AVFilterLink *outlink)
     ff_draw_init(&tile->draw, inlink->format, 0);
     ff_draw_color(&tile->draw, &tile->blank, tile->rgba_color);
 
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
-
     return 0;
 }
 
diff --git a/libavfilter/vf_tinterlace.c b/libavfilter/vf_tinterlace.c
index e2f8c343..8a796ce9 100644
--- a/libavfilter/vf_tinterlace.c
+++ b/libavfilter/vf_tinterlace.c
@@ -46,6 +46,7 @@ static const AVOption tinterlace_options[] = {
     {"interleave_top",    "interleave top and bottom fields",             0, AV_OPT_TYPE_CONST, {.i64=MODE_INTERLEAVE_TOP},    INT_MIN, INT_MAX, FLAGS, "mode"},
     {"interleave_bottom", "interleave bottom and top fields",             0, AV_OPT_TYPE_CONST, {.i64=MODE_INTERLEAVE_BOTTOM}, INT_MIN, INT_MAX, FLAGS, "mode"},
     {"interlacex2",       "interlace fields from two consecutive frames", 0, AV_OPT_TYPE_CONST, {.i64=MODE_INTERLACEX2},       INT_MIN, INT_MAX, FLAGS, "mode"},
+    {"mergex2",           "merge fields keeping same frame rate",         0, AV_OPT_TYPE_CONST, {.i64=MODE_MERGEX2},           INT_MIN, INT_MAX, FLAGS, "mode"},
 
     {"flags",             "set flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, 0, INT_MAX, 0, "flags" },
     {"low_pass_filter",   "enable vertical low-pass filter",              0, AV_OPT_TYPE_CONST, {.i64 = TINTERLACE_FLAG_VLPF}, INT_MIN, INT_MAX, FLAGS, "flags" },
@@ -117,11 +118,10 @@ static int config_out_props(AVFilterLink *outlink)
     int i;
 
     tinterlace->vsub = desc->log2_chroma_h;
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
     outlink->w = inlink->w;
-    outlink->h = tinterlace->mode == MODE_MERGE || tinterlace->mode == MODE_PAD ?
+    outlink->h = tinterlace->mode == MODE_MERGE || tinterlace->mode == MODE_PAD || tinterlace->mode == MODE_MERGEX2?
         inlink->h*2 : inlink->h;
-    if (tinterlace->mode == MODE_MERGE || tinterlace->mode == MODE_PAD)
+    if (tinterlace->mode == MODE_MERGE || tinterlace->mode == MODE_PAD || tinterlace->mode == MODE_MERGEX2)
         outlink->sample_aspect_ratio = av_mul_q(inlink->sample_aspect_ratio,
                                                 av_make_q(2, 1));
 
@@ -131,13 +131,13 @@ static int config_out_props(AVFilterLink *outlink)
         if (ff_fmt_is_in(outlink->format, full_scale_yuvj_pix_fmts))
             black[0] = black[3] = 0;
         ret = av_image_alloc(tinterlace->black_data, tinterlace->black_linesize,
-                             outlink->w, outlink->h, outlink->format, 1);
+                             outlink->w, outlink->h, outlink->format, 16);
         if (ret < 0)
             return ret;
 
         /* fill black picture with black */
         for (i = 0; i < 4 && tinterlace->black_data[i]; i++) {
-            int h = i == 1 || i == 2 ? FF_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h) : outlink->h;
+            int h = i == 1 || i == 2 ? AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h) : outlink->h;
             memset(tinterlace->black_data[i], black[i],
                    tinterlace->black_linesize[i] * h);
         }
@@ -154,6 +154,9 @@ static int config_out_props(AVFilterLink *outlink)
         tinterlace->preout_time_base.den *= 2;
         outlink->frame_rate = av_mul_q(inlink->frame_rate, (AVRational){2,1});
         outlink->time_base  = av_mul_q(inlink->time_base , (AVRational){1,2});
+    } else if (tinterlace->mode == MODE_MERGEX2) {
+        outlink->frame_rate = inlink->frame_rate;
+        outlink->time_base  = inlink->time_base;
     } else if (tinterlace->mode != MODE_PAD) {
         outlink->frame_rate = av_mul_q(inlink->frame_rate, (AVRational){1,2});
         outlink->time_base  = av_mul_q(inlink->time_base , (AVRational){2,1});
@@ -208,8 +211,8 @@ void copy_picture_field(TInterlaceContext *tinterlace,
     int h;
 
     for (plane = 0; plane < desc->nb_components; plane++) {
-        int lines = plane == 1 || plane == 2 ? FF_CEIL_RSHIFT(src_h, vsub) : src_h;
-        int cols  = plane == 1 || plane == 2 ? FF_CEIL_RSHIFT(    w, hsub) : w;
+        int lines = plane == 1 || plane == 2 ? AV_CEIL_RSHIFT(src_h, vsub) : src_h;
+        int cols  = plane == 1 || plane == 2 ? AV_CEIL_RSHIFT(    w, hsub) : w;
         uint8_t *dstp = dst[plane];
         const uint8_t *srcp = src[plane];
 
@@ -260,6 +263,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
         return 0;
 
     switch (tinterlace->mode) {
+    case MODE_MERGEX2: /* move the odd frame into the upper field of the new image, even into
+                        * the lower field, generating a double-height video at same framerate */
     case MODE_MERGE: /* move the odd frame into the upper field of the new image, even into
              * the lower field, generating a double-height video at half framerate */
         out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
@@ -275,13 +280,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
         copy_picture_field(tinterlace, out->data, out->linesize,
                            (const uint8_t **)cur->data, cur->linesize,
                            inlink->format, inlink->w, inlink->h,
-                           FIELD_UPPER_AND_LOWER, 1, FIELD_UPPER, tinterlace->flags);
+                           FIELD_UPPER_AND_LOWER, 1, tinterlace->mode == MODE_MERGEX2 ? inlink->frame_count & 1 ? FIELD_LOWER : FIELD_UPPER : FIELD_UPPER, tinterlace->flags);
         /* write even frame lines into the lower field of the new frame */
         copy_picture_field(tinterlace, out->data, out->linesize,
                            (const uint8_t **)next->data, next->linesize,
                            inlink->format, inlink->w, inlink->h,
-                           FIELD_UPPER_AND_LOWER, 1, FIELD_LOWER, tinterlace->flags);
-        av_frame_free(&tinterlace->next);
+                           FIELD_UPPER_AND_LOWER, 1, tinterlace->mode == MODE_MERGEX2 ? inlink->frame_count & 1 ? FIELD_UPPER : FIELD_LOWER : FIELD_LOWER, tinterlace->flags);
+        if (tinterlace->mode != MODE_MERGEX2)
+            av_frame_free(&tinterlace->next);
         break;
 
     case MODE_DROP_ODD:  /* only output even frames, odd  frames are dropped; height unchanged, half framerate */
diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index e4de31b0..9555ff20 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -63,15 +63,16 @@ typedef struct TransContext {
 static int query_formats(AVFilterContext *ctx)
 {
     AVFilterFormats *pix_fmts = NULL;
-    int fmt;
+    int fmt, ret;
 
     for (fmt = 0; av_pix_fmt_desc_get(fmt); fmt++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
         if (!(desc->flags & AV_PIX_FMT_FLAG_PAL ||
               desc->flags & AV_PIX_FMT_FLAG_HWACCEL ||
               desc->flags & AV_PIX_FMT_FLAG_BITSTREAM ||
-              desc->log2_chroma_w != desc->log2_chroma_h))
-            ff_add_format(&pix_fmts, fmt);
+              desc->log2_chroma_w != desc->log2_chroma_h) &&
+            (ret = ff_add_format(&pix_fmts, fmt)) < 0)
+            return ret;
     }
 
 
@@ -81,32 +82,32 @@ static int query_formats(AVFilterContext *ctx)
 static int config_props_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    TransContext *trans = ctx->priv;
+    TransContext *s = ctx->priv;
     AVFilterLink *inlink = ctx->inputs[0];
     const AVPixFmtDescriptor *desc_out = av_pix_fmt_desc_get(outlink->format);
     const AVPixFmtDescriptor *desc_in  = av_pix_fmt_desc_get(inlink->format);
 
-    if (trans->dir&4) {
+    if (s->dir&4) {
         av_log(ctx, AV_LOG_WARNING,
                "dir values greater than 3 are deprecated, use the passthrough option instead\n");
-        trans->dir &= 3;
-        trans->passthrough = TRANSPOSE_PT_TYPE_LANDSCAPE;
+        s->dir &= 3;
+        s->passthrough = TRANSPOSE_PT_TYPE_LANDSCAPE;
     }
 
-    if ((inlink->w >= inlink->h && trans->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) ||
-        (inlink->w <= inlink->h && trans->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) {
+    if ((inlink->w >= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) ||
+        (inlink->w <= inlink->h && s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) {
         av_log(ctx, AV_LOG_VERBOSE,
                "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",
                inlink->w, inlink->h, inlink->w, inlink->h);
         return 0;
     } else {
-        trans->passthrough = TRANSPOSE_PT_TYPE_NONE;
+        s->passthrough = TRANSPOSE_PT_TYPE_NONE;
     }
 
-    trans->hsub = desc_in->log2_chroma_w;
-    trans->vsub = desc_in->log2_chroma_h;
+    s->hsub = desc_in->log2_chroma_w;
+    s->vsub = desc_in->log2_chroma_h;
 
-    av_image_fill_max_pixsteps(trans->pixsteps, NULL, desc_out);
+    av_image_fill_max_pixsteps(s->pixsteps, NULL, desc_out);
 
     outlink->w = inlink->h;
     outlink->h = inlink->w;
@@ -119,17 +120,17 @@ static int config_props_output(AVFilterLink *outlink)
 
     av_log(ctx, AV_LOG_VERBOSE,
            "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n",
-           inlink->w, inlink->h, trans->dir, outlink->w, outlink->h,
-           trans->dir == 1 || trans->dir == 3 ? "clockwise" : "counterclockwise",
-           trans->dir == 0 || trans->dir == 3);
+           inlink->w, inlink->h, s->dir, outlink->w, outlink->h,
+           s->dir == 1 || s->dir == 3 ? "clockwise" : "counterclockwise",
+           s->dir == 0 || s->dir == 3);
     return 0;
 }
 
 static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h)
 {
-    TransContext *trans = inlink->dst->priv;
+    TransContext *s = inlink->dst->priv;
 
-    return trans->passthrough ?
+    return s->passthrough ?
         ff_null_get_video_buffer   (inlink, w, h) :
         ff_default_get_video_buffer(inlink, w, h);
 }
@@ -141,19 +142,19 @@ typedef struct ThreadData {
 static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr,
                         int nb_jobs)
 {
-    TransContext *trans = ctx->priv;
+    TransContext *s = ctx->priv;
     ThreadData *td = arg;
     AVFrame *out = td->out;
     AVFrame *in = td->in;
     int plane;
 
     for (plane = 0; out->data[plane]; plane++) {
-        int hsub    = plane == 1 || plane == 2 ? trans->hsub : 0;
-        int vsub    = plane == 1 || plane == 2 ? trans->vsub : 0;
-        int pixstep = trans->pixsteps[plane];
-        int inh     = FF_CEIL_RSHIFT(in->height, vsub);
-        int outw    = FF_CEIL_RSHIFT(out->width,  hsub);
-        int outh    = FF_CEIL_RSHIFT(out->height, vsub);
+        int hsub    = plane == 1 || plane == 2 ? s->hsub : 0;
+        int vsub    = plane == 1 || plane == 2 ? s->vsub : 0;
+        int pixstep = s->pixsteps[plane];
+        int inh     = AV_CEIL_RSHIFT(in->height, vsub);
+        int outw    = AV_CEIL_RSHIFT(out->width,  hsub);
+        int outh    = AV_CEIL_RSHIFT(out->height, vsub);
         int start   = (outh *  jobnr   ) / nb_jobs;
         int end     = (outh * (jobnr+1)) / nb_jobs;
         uint8_t *dst, *src;
@@ -165,12 +166,12 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr,
         src         = in->data[plane];
         srclinesize = in->linesize[plane];
 
-        if (trans->dir & 1) {
+        if (s->dir & 1) {
             src         += in->linesize[plane] * (inh - 1);
             srclinesize *= -1;
         }
 
-        if (trans->dir & 2) {
+        if (s->dir & 2) {
             dst          = out->data[plane] + dstlinesize * (outh - start - 1);
             dstlinesize *= -1;
         }
@@ -226,12 +227,12 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr,
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
-    TransContext *trans = ctx->priv;
+    TransContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     ThreadData td;
     AVFrame *out;
 
-    if (trans->passthrough)
+    if (s->passthrough)
         return ff_filter_frame(outlink, in);
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
diff --git a/libavfilter/vf_unsharp.c b/libavfilter/vf_unsharp.c
index d5f5018c..d264e24e 100644
--- a/libavfilter/vf_unsharp.c
+++ b/libavfilter/vf_unsharp.c
@@ -105,15 +105,15 @@ static void apply_unsharp(      uint8_t *dst, int dst_stride,
 static int apply_unsharp_c(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
 {
     AVFilterLink *inlink = ctx->inputs[0];
-    UnsharpContext *unsharp = ctx->priv;
+    UnsharpContext *s = ctx->priv;
     int i, plane_w[3], plane_h[3];
     UnsharpFilterParam *fp[3];
     plane_w[0] = inlink->w;
-    plane_w[1] = plane_w[2] = FF_CEIL_RSHIFT(inlink->w, unsharp->hsub);
+    plane_w[1] = plane_w[2] = AV_CEIL_RSHIFT(inlink->w, s->hsub);
     plane_h[0] = inlink->h;
-    plane_h[1] = plane_h[2] = FF_CEIL_RSHIFT(inlink->h, unsharp->vsub);
-    fp[0] = &unsharp->luma;
-    fp[1] = fp[2] = &unsharp->chroma;
+    plane_h[1] = plane_h[2] = AV_CEIL_RSHIFT(inlink->h, s->vsub);
+    fp[0] = &s->luma;
+    fp[1] = fp[2] = &s->chroma;
     for (i = 0; i < 3; i++) {
         apply_unsharp(out->data[i], out->linesize[i], in->data[i], in->linesize[i], plane_w[i], plane_h[i], fp[i]);
     }
@@ -135,19 +135,19 @@ static void set_filter_param(UnsharpFilterParam *fp, int msize_x, int msize_y, f
 static av_cold int init(AVFilterContext *ctx)
 {
     int ret = 0;
-    UnsharpContext *unsharp = ctx->priv;
+    UnsharpContext *s = ctx->priv;
 
 
-    set_filter_param(&unsharp->luma,   unsharp->lmsize_x, unsharp->lmsize_y, unsharp->lamount);
-    set_filter_param(&unsharp->chroma, unsharp->cmsize_x, unsharp->cmsize_y, unsharp->camount);
+    set_filter_param(&s->luma,   s->lmsize_x, s->lmsize_y, s->lamount);
+    set_filter_param(&s->chroma, s->cmsize_x, s->cmsize_y, s->camount);
 
-    unsharp->apply_unsharp = apply_unsharp_c;
-    if (!CONFIG_OPENCL && unsharp->opencl) {
+    s->apply_unsharp = apply_unsharp_c;
+    if (!CONFIG_OPENCL && s->opencl) {
         av_log(ctx, AV_LOG_ERROR, "OpenCL support was not enabled in this build, cannot be selected\n");
         return AVERROR(EINVAL);
     }
-    if (CONFIG_OPENCL && unsharp->opencl) {
-        unsharp->apply_unsharp = ff_opencl_apply_unsharp;
+    if (CONFIG_OPENCL && s->opencl) {
+        s->apply_unsharp = ff_opencl_apply_unsharp;
         ret = ff_opencl_unsharp_init(ctx);
         if (ret < 0)
             return ret;
@@ -194,17 +194,17 @@ static int init_filter_param(AVFilterContext *ctx, UnsharpFilterParam *fp, const
 
 static int config_props(AVFilterLink *link)
 {
-    UnsharpContext *unsharp = link->dst->priv;
+    UnsharpContext *s = link->dst->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
     int ret;
 
-    unsharp->hsub = desc->log2_chroma_w;
-    unsharp->vsub = desc->log2_chroma_h;
+    s->hsub = desc->log2_chroma_w;
+    s->vsub = desc->log2_chroma_h;
 
-    ret = init_filter_param(link->dst, &unsharp->luma,   "luma",   link->w);
+    ret = init_filter_param(link->dst, &s->luma,   "luma",   link->w);
     if (ret < 0)
         return ret;
-    ret = init_filter_param(link->dst, &unsharp->chroma, "chroma", FF_CEIL_RSHIFT(link->w, unsharp->hsub));
+    ret = init_filter_param(link->dst, &s->chroma, "chroma", AV_CEIL_RSHIFT(link->w, s->hsub));
     if (ret < 0)
         return ret;
 
@@ -221,19 +221,19 @@ static void free_filter_param(UnsharpFilterParam *fp)
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    UnsharpContext *unsharp = ctx->priv;
+    UnsharpContext *s = ctx->priv;
 
-    if (CONFIG_OPENCL && unsharp->opencl) {
+    if (CONFIG_OPENCL && s->opencl) {
         ff_opencl_unsharp_uninit(ctx);
     }
 
-    free_filter_param(&unsharp->luma);
-    free_filter_param(&unsharp->chroma);
+    free_filter_param(&s->luma);
+    free_filter_param(&s->chroma);
 }
 
 static int filter_frame(AVFilterLink *link, AVFrame *in)
 {
-    UnsharpContext *unsharp = link->dst->priv;
+    UnsharpContext *s = link->dst->priv;
     AVFilterLink *outlink   = link->dst->outputs[0];
     AVFrame *out;
     int ret = 0;
@@ -244,13 +244,13 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
         return AVERROR(ENOMEM);
     }
     av_frame_copy_props(out, in);
-    if (CONFIG_OPENCL && unsharp->opencl) {
+    if (CONFIG_OPENCL && s->opencl) {
         ret = ff_opencl_unsharp_process_inout_buf(link->dst, in, out);
         if (ret < 0)
             goto end;
     }
 
-    ret = unsharp->apply_unsharp(link->dst, in, out);
+    ret = s->apply_unsharp(link->dst, in, out);
 end:
     av_frame_free(&in);
 
@@ -276,7 +276,7 @@ static const AVOption unsharp_options[] = {
     { "cy",             "set chroma matrix vertical size",   OFFSET(cmsize_y), AV_OPT_TYPE_INT,   { .i64 = 5 }, MIN_SIZE, MAX_SIZE, FLAGS },
     { "chroma_amount",  "set chroma effect strength",        OFFSET(camount),  AV_OPT_TYPE_FLOAT, { .dbl = 0 },       -2,        5, FLAGS },
     { "ca",             "set chroma effect strength",        OFFSET(camount),  AV_OPT_TYPE_FLOAT, { .dbl = 0 },       -2,        5, FLAGS },
-    { "opencl",         "use OpenCL filtering capabilities", OFFSET(opencl), AV_OPT_TYPE_INT, { .i64 = 0 },        0,        1, FLAGS },
+    { "opencl",         "use OpenCL filtering capabilities", OFFSET(opencl),   AV_OPT_TYPE_BOOL,  { .i64 = 0 },        0,        1, FLAGS },
     { NULL }
 };
 
diff --git a/libavfilter/vf_uspp.c b/libavfilter/vf_uspp.c
index 82ee9976..f963254e 100644
--- a/libavfilter/vf_uspp.c
+++ b/libavfilter/vf_uspp.c
@@ -61,7 +61,7 @@ typedef struct {
 static const AVOption uspp_options[] = {
     { "quality",       "set quality",                          OFFSET(log2_count),    AV_OPT_TYPE_INT, {.i64 = 3}, 0, MAX_LEVEL, FLAGS },
     { "qp",            "force a constant quantizer parameter", OFFSET(qp),            AV_OPT_TYPE_INT, {.i64 = 0}, 0, 63,        FLAGS },
-    { "use_bframe_qp", "use B-frames' QP",                     OFFSET(use_bframe_qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1,         FLAGS },
+    { "use_bframe_qp", "use B-frames' QP",                     OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1,         FLAGS },
     { NULL }
 };
 
@@ -189,8 +189,8 @@ static void filter(USPPContext *p, uint8_t *dst[3], uint8_t *src[3],
 
     for (i = 0; i < 3; i++) {
         int is_chroma = !!i;
-        int w = FF_CEIL_RSHIFT(width,  is_chroma ? p->hsub : 0);
-        int h = FF_CEIL_RSHIFT(height, is_chroma ? p->vsub : 0);
+        int w = AV_CEIL_RSHIFT(width,  is_chroma ? p->hsub : 0);
+        int h = AV_CEIL_RSHIFT(height, is_chroma ? p->vsub : 0);
         int stride = p->temp_stride[i];
         int block = BLOCK >> (is_chroma ? p->hsub : 0);
 
@@ -263,8 +263,8 @@ static void filter(USPPContext *p, uint8_t *dst[3], uint8_t *src[3],
 
         offset = (BLOCKc-x1c) + (BLOCKc-y1c) * p->frame_dec->linesize[1];
 
-        for (y = 0; y < FF_CEIL_RSHIFT(height, p->vsub); y++) {
-            for (x = 0; x < FF_CEIL_RSHIFT(width, p->hsub); x++) {
+        for (y = 0; y < AV_CEIL_RSHIFT(height, p->vsub); y++) {
+            for (x = 0; x < AV_CEIL_RSHIFT(width, p->hsub); x++) {
                 p->temp[1][x + y * p->temp_stride[1]] += p->frame_dec->data[1][x + y * p->frame_dec->linesize[1] + offset];
                 p->temp[2][x + y * p->temp_stride[2]] += p->frame_dec->data[2][x + y * p->frame_dec->linesize[2] + offset];
             }
@@ -276,8 +276,8 @@ static void filter(USPPContext *p, uint8_t *dst[3], uint8_t *src[3],
         if (!dst[j])
             continue;
         store_slice_c(dst[j], p->temp[j], dst_stride[j], p->temp_stride[j],
-                      FF_CEIL_RSHIFT(width,  is_chroma ? p->hsub : 0),
-                      FF_CEIL_RSHIFT(height, is_chroma ? p->vsub : 0),
+                      AV_CEIL_RSHIFT(width,  is_chroma ? p->hsub : 0),
+                      AV_CEIL_RSHIFT(height, is_chroma ? p->vsub : 0),
                       8-p->log2_count);
     }
 }
@@ -325,8 +325,8 @@ static int config_input(AVFilterLink *inlink)
         int h = (height + 4 * BLOCK-1) & (~(2 * BLOCK-1));
 
         if (is_chroma) {
-            w = FF_CEIL_RSHIFT(w, uspp->hsub);
-            h = FF_CEIL_RSHIFT(h, uspp->vsub);
+            w = AV_CEIL_RSHIFT(w, uspp->hsub);
+            h = AV_CEIL_RSHIFT(h, uspp->vsub);
         }
 
         uspp->temp_stride[i] = w;
@@ -351,7 +351,7 @@ static int config_input(AVFilterLink *inlink)
         avctx_enc->gop_size = INT_MAX;
         avctx_enc->max_b_frames = 0;
         avctx_enc->pix_fmt = inlink->format;
-        avctx_enc->flags = CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY;
+        avctx_enc->flags = AV_CODEC_FLAG_QSCALE | CODEC_FLAG_LOW_DELAY;
         avctx_enc->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
         avctx_enc->global_quality = 123;
         av_dict_set(&opts, "no_bitstream", "1", 0);
@@ -394,11 +394,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             /* if the qp stride is not set, it means the QP are only defined on
              * a line basis */
             if (!qp_stride) {
-                w = FF_CEIL_RSHIFT(inlink->w, 4);
+                w = AV_CEIL_RSHIFT(inlink->w, 4);
                 h = 1;
             } else {
                 w = qp_stride;
-                h = FF_CEIL_RSHIFT(inlink->h, 4);
+                h = AV_CEIL_RSHIFT(inlink->h, 4);
             }
 
             if (w * h > uspp->non_b_qp_alloc_size) {
diff --git a/libavfilter/vf_vectorscope.c b/libavfilter/vf_vectorscope.c
new file mode 100644
index 00000000..2112b80a
--- /dev/null
+++ b/libavfilter/vf_vectorscope.c
@@ -0,0 +1,732 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+enum VectorscopeMode {
+    GRAY,
+    COLOR,
+    COLOR2,
+    COLOR3,
+    COLOR4,
+    MODE_NB
+};
+
+typedef struct VectorscopeContext {
+    const AVClass *class;
+    int mode;
+    int intensity;
+    float fintensity;
+    const uint8_t *bg_color;
+    int planewidth[4];
+    int planeheight[4];
+    int hsub, vsub;
+    int x, y, pd;
+    int is_yuv;
+    int size;
+    int mult;
+    int envelope;
+    uint8_t peak[1024][1024];
+
+    void (*vectorscope)(struct VectorscopeContext *s,
+                        AVFrame *in, AVFrame *out, int pd);
+} VectorscopeContext;
+
+#define OFFSET(x) offsetof(VectorscopeContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption vectorscope_options[] = {
+    { "mode", "set vectorscope mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=0}, 0, MODE_NB-1, FLAGS, "mode"},
+    { "m",    "set vectorscope mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=0}, 0, MODE_NB-1, FLAGS, "mode"},
+    {   "gray",   0, 0, AV_OPT_TYPE_CONST, {.i64=GRAY},   0, 0, FLAGS, "mode" },
+    {   "color",  0, 0, AV_OPT_TYPE_CONST, {.i64=COLOR},  0, 0, FLAGS, "mode" },
+    {   "color2", 0, 0, AV_OPT_TYPE_CONST, {.i64=COLOR2}, 0, 0, FLAGS, "mode" },
+    {   "color3", 0, 0, AV_OPT_TYPE_CONST, {.i64=COLOR3}, 0, 0, FLAGS, "mode" },
+    {   "color4", 0, 0, AV_OPT_TYPE_CONST, {.i64=COLOR4}, 0, 0, FLAGS, "mode" },
+    { "x", "set color component on X axis", OFFSET(x), AV_OPT_TYPE_INT, {.i64=1}, 0, 2, FLAGS},
+    { "y", "set color component on Y axis", OFFSET(y), AV_OPT_TYPE_INT, {.i64=2}, 0, 2, FLAGS},
+    { "intensity", "set intensity", OFFSET(fintensity), AV_OPT_TYPE_FLOAT, {.dbl=0.004}, 0, 1, FLAGS},
+    { "i",         "set intensity", OFFSET(fintensity), AV_OPT_TYPE_FLOAT, {.dbl=0.004}, 0, 1, FLAGS},
+    { "envelope",  "set envelope", OFFSET(envelope), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS, "envelope"},
+    { "e",         "set envelope", OFFSET(envelope), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS, "envelope"},
+    {   "none",         0, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "envelope" },
+    {   "instant",      0, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "envelope" },
+    {   "peak",         0, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "envelope" },
+    {   "peak+instant", 0, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "envelope" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(vectorscope);
+
+static const enum AVPixelFormat out_yuv8_pix_fmts[] = {
+    AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat out_yuv9_pix_fmts[] = {
+    AV_PIX_FMT_YUV444P9,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat out_yuv10_pix_fmts[] = {
+    AV_PIX_FMT_YUV444P10,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat out_rgb8_pix_fmts[] = {
+    AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRP,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat out_rgb9_pix_fmts[] = {
+    AV_PIX_FMT_GBRP9,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat out_rgb10_pix_fmts[] = {
+    AV_PIX_FMT_GBRP10,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat in1_pix_fmts[] = {
+    AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P,
+    AV_PIX_FMT_YUV444P9, AV_PIX_FMT_YUV444P10,
+    AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRP,
+    AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat in2_pix_fmts[] = {
+    AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVJ420P,
+    AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVJ422P,
+    AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P,
+    AV_PIX_FMT_YUV411P,  AV_PIX_FMT_YUVJ411P,
+    AV_PIX_FMT_YUV440P,  AV_PIX_FMT_YUV410P,
+    AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRP,
+    AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+    AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+    AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+    AV_PIX_FMT_NONE
+};
+
+static int query_formats(AVFilterContext *ctx)
+{
+    VectorscopeContext *s = ctx->priv;
+    const enum AVPixelFormat *out_pix_fmts;
+    const AVPixFmtDescriptor *desc;
+    AVFilterFormats *avff;
+    int depth, rgb, i, ret;
+
+    if (!ctx->inputs[0]->in_formats ||
+        !ctx->inputs[0]->in_formats->nb_formats) {
+        return AVERROR(EAGAIN);
+    }
+
+    if (!ctx->inputs[0]->out_formats) {
+        const enum AVPixelFormat *in_pix_fmts;
+
+        if ((s->x == 1 && s->y == 2) || (s->x == 2 && s->y == 1))
+            in_pix_fmts = in2_pix_fmts;
+        else
+            in_pix_fmts = in1_pix_fmts;
+        if ((ret = ff_formats_ref(ff_make_format_list(in_pix_fmts), &ctx->inputs[0]->out_formats)) < 0)
+            return ret;
+    }
+
+    avff = ctx->inputs[0]->in_formats;
+    desc = av_pix_fmt_desc_get(avff->formats[0]);
+    rgb = desc->flags & AV_PIX_FMT_FLAG_RGB;
+    depth = desc->comp[0].depth;
+    for (i = 1; i < avff->nb_formats; i++) {
+        desc = av_pix_fmt_desc_get(avff->formats[i]);
+        if (rgb != (desc->flags & AV_PIX_FMT_FLAG_RGB) ||
+            depth != desc->comp[0].depth)
+            return AVERROR(EAGAIN);
+    }
+
+    if (rgb && depth == 8)
+        out_pix_fmts = out_rgb8_pix_fmts;
+    else if (rgb && depth == 9)
+        out_pix_fmts = out_rgb9_pix_fmts;
+    else if (rgb && depth == 10)
+        out_pix_fmts = out_rgb10_pix_fmts;
+    else if (depth == 9)
+        out_pix_fmts = out_yuv9_pix_fmts;
+    else if (depth == 10)
+        out_pix_fmts = out_yuv10_pix_fmts;
+    else
+        out_pix_fmts = out_yuv8_pix_fmts;
+    if ((ret = ff_formats_ref(ff_make_format_list(out_pix_fmts), &ctx->outputs[0]->in_formats)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static const uint8_t black_yuva_color[4] = { 0, 127, 127, 0 };
+static const uint8_t black_gbrp_color[4] = { 0, 0, 0, 0 };
+
+static int config_output(AVFilterLink *outlink)
+{
+    VectorscopeContext *s = outlink->src->priv;
+
+    s->intensity = s->fintensity * (s->size - 1);
+    outlink->h = outlink->w = s->size;
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+    return 0;
+}
+
+static void envelope_instant16(VectorscopeContext *s, AVFrame *out)
+{
+    const int dlinesize = out->linesize[0] / 2;
+    uint16_t *dpd = s->mode == COLOR || !s->is_yuv ? (uint16_t *)out->data[s->pd] : (uint16_t *)out->data[0];
+    const int max = s->size - 1;
+    int i, j;
+
+    for (i = 0; i < out->height; i++) {
+        for (j = 0; j < out->width; j++) {
+            const int pos = i * dlinesize + j;
+            const int poa = (i - 1) * dlinesize + j;
+            const int pob = (i + 1) * dlinesize + j;
+
+            if (dpd[pos] && (((!j || !dpd[pos - 1]) || ((j == (out->width - 1)) || !dpd[pos + 1]))
+                         || ((!i || !dpd[poa]) || ((i == (out->height - 1)) || !dpd[pob])))) {
+                dpd[pos] = max;
+            }
+        }
+    }
+}
+
+static void envelope_peak16(VectorscopeContext *s, AVFrame *out)
+{
+    const int dlinesize = out->linesize[0] / 2;
+    uint16_t *dpd = s->mode == COLOR || !s->is_yuv ? (uint16_t *)out->data[s->pd] : (uint16_t *)out->data[0];
+    const int max = s->size - 1;
+    int i, j;
+
+    for (i = 0; i < out->height; i++) {
+        for (j = 0; j < out->width; j++) {
+            const int pos = i * dlinesize + j;
+
+            if (dpd[pos])
+                s->peak[i][j] = 1;
+        }
+    }
+
+    if (s->envelope == 3)
+        envelope_instant16(s, out);
+
+    for (i = 0; i < out->height; i++) {
+        for (j = 0; j < out->width; j++) {
+            const int pos = i * dlinesize + j;
+
+            if (s->peak[i][j] && (((!j || !s->peak[i][j-1]) || ((j == (out->width - 1)) || !s->peak[i][j + 1]))
+                              || ((!i || !s->peak[i-1][j]) || ((i == (out->height - 1)) || !s->peak[i + 1][j])))) {
+                dpd[pos] = max;
+            }
+        }
+    }
+}
+
+static void envelope_instant(VectorscopeContext *s, AVFrame *out)
+{
+    const int dlinesize = out->linesize[0];
+    uint8_t *dpd = s->mode == COLOR || !s->is_yuv ? out->data[s->pd] : out->data[0];
+    int i, j;
+
+    for (i = 0; i < out->height; i++) {
+        for (j = 0; j < out->width; j++) {
+            const int pos = i * dlinesize + j;
+            const int poa = (i - 1) * dlinesize + j;
+            const int pob = (i + 1) * dlinesize + j;
+
+            if (dpd[pos] && (((!j || !dpd[pos - 1]) || ((j == (out->width - 1)) || !dpd[pos + 1]))
+                         || ((!i || !dpd[poa]) || ((i == (out->height - 1)) || !dpd[pob])))) {
+                dpd[pos] = 255;
+            }
+        }
+    }
+}
+
+static void envelope_peak(VectorscopeContext *s, AVFrame *out)
+{
+    const int dlinesize = out->linesize[0];
+    uint8_t *dpd = s->mode == COLOR || !s->is_yuv ? out->data[s->pd] : out->data[0];
+    int i, j;
+
+    for (i = 0; i < out->height; i++) {
+        for (j = 0; j < out->width; j++) {
+            const int pos = i * dlinesize + j;
+
+            if (dpd[pos])
+                s->peak[i][j] = 1;
+        }
+    }
+
+    if (s->envelope == 3)
+        envelope_instant(s, out);
+
+    for (i = 0; i < out->height; i++) {
+        for (j = 0; j < out->width; j++) {
+            const int pos = i * dlinesize + j;
+
+            if (s->peak[i][j] && (((!j || !s->peak[i][j-1]) || ((j == (out->width - 1)) || !s->peak[i][j + 1]))
+                              || ((!i || !s->peak[i-1][j]) || ((i == (out->height - 1)) || !s->peak[i + 1][j])))) {
+                dpd[pos] = 255;
+            }
+        }
+    }
+}
+
+static void envelope16(VectorscopeContext *s, AVFrame *out)
+{
+    if (!s->envelope) {
+        return;
+    } else if (s->envelope == 1) {
+        envelope_instant16(s, out);
+    } else {
+        envelope_peak16(s, out);
+    }
+}
+
+static void envelope(VectorscopeContext *s, AVFrame *out)
+{
+    if (!s->envelope) {
+        return;
+    } else if (s->envelope == 1) {
+        envelope_instant(s, out);
+    } else {
+        envelope_peak(s, out);
+    }
+}
+
+static void vectorscope16(VectorscopeContext *s, AVFrame *in, AVFrame *out, int pd)
+{
+    const uint16_t * const *src = (const uint16_t * const *)in->data;
+    const int slinesizex = in->linesize[s->x] / 2;
+    const int slinesizey = in->linesize[s->y] / 2;
+    const int slinesized = in->linesize[pd] / 2;
+    const int dlinesize = out->linesize[0] / 2;
+    const int intensity = s->intensity;
+    const int px = s->x, py = s->y;
+    const int h = s->planeheight[py];
+    const int w = s->planewidth[px];
+    const uint16_t *spx = src[px];
+    const uint16_t *spy = src[py];
+    const uint16_t *spd = src[pd];
+    const int hsub = s->hsub;
+    const int vsub = s->vsub;
+    uint16_t **dst = (uint16_t **)out->data;
+    uint16_t *dpx = dst[px];
+    uint16_t *dpy = dst[py];
+    uint16_t *dpd = dst[pd];
+    const int max = s->size - 1;
+    const int mid = s->size / 2;
+    int i, j, k;
+
+    for (k = 0; k < 4 && dst[k]; k++) {
+        const int mult = s->mult;
+
+        for (i = 0; i < out->height ; i++)
+            for (j = 0; j < out->width; j++)
+                AV_WN16(out->data[k] + i * out->linesize[k] + j * 2,
+                        s->mode == COLOR && k == s->pd ? 0 : s->bg_color[k] * mult);
+    }
+
+    switch (s->mode) {
+    case COLOR:
+    case GRAY:
+        if (s->is_yuv) {
+            for (i = 0; i < h; i++) {
+                const int iwx = i * slinesizex;
+                const int iwy = i * slinesizey;
+                for (j = 0; j < w; j++) {
+                    const int x = FFMIN(spx[iwx + j], max);
+                    const int y = FFMIN(spy[iwy + j], max);
+                    const int pos = y * dlinesize + x;
+
+                    dpd[pos] = FFMIN(dpd[pos] + intensity, max);
+                    if (dst[3])
+                        dst[3][pos] = max;
+                }
+            }
+        } else {
+            for (i = 0; i < h; i++) {
+                const int iwx = i * slinesizex;
+                const int iwy = i * slinesizey;
+                for (j = 0; j < w; j++) {
+                    const int x = FFMIN(spx[iwx + j], max);
+                    const int y = FFMIN(spy[iwy + j], max);
+                    const int pos = y * dlinesize + x;
+
+                    dst[0][pos] = FFMIN(dst[0][pos] + intensity, max);
+                    dst[1][pos] = FFMIN(dst[1][pos] + intensity, max);
+                    dst[2][pos] = FFMIN(dst[2][pos] + intensity, max);
+                    if (dst[3])
+                        dst[3][pos] = max;
+                }
+            }
+        }
+        break;
+    case COLOR2:
+        if (s->is_yuv) {
+            for (i = 0; i < h; i++) {
+                const int iw1 = i * slinesizex;
+                const int iw2 = i * slinesizey;
+                for (j = 0; j < w; j++) {
+                    const int x = FFMIN(spx[iw1 + j], max);
+                    const int y = FFMIN(spy[iw2 + j], max);
+                    const int pos = y * dlinesize + x;
+
+                    if (!dpd[pos])
+                        dpd[pos] = FFABS(mid - x) + FFABS(mid - y);
+                    dpx[pos] = x;
+                    dpy[pos] = y;
+                    if (dst[3])
+                        dst[3][pos] = max;
+                }
+            }
+        } else {
+            for (i = 0; i < h; i++) {
+                const int iw1 = i * slinesizex;
+                const int iw2 = i * slinesizey;
+                for (j = 0; j < w; j++) {
+                    const int x = FFMIN(spx[iw1 + j], max);
+                    const int y = FFMIN(spy[iw2 + j], max);
+                    const int pos = y * dlinesize + x;
+
+                    if (!dpd[pos])
+                        dpd[pos] = FFMIN(x + y, max);
+                    dpx[pos] = x;
+                    dpy[pos] = y;
+                    if (dst[3])
+                        dst[3][pos] = max;
+                }
+            }
+        }
+        break;
+    case COLOR3:
+        for (i = 0; i < h; i++) {
+            const int iw1 = i * slinesizex;
+            const int iw2 = i * slinesizey;
+            for (j = 0; j < w; j++) {
+                const int x = FFMIN(spx[iw1 + j], max);
+                const int y = FFMIN(spy[iw2 + j], max);
+                const int pos = y * dlinesize + x;
+
+                dpd[pos] = FFMIN(max, dpd[pos] + intensity);
+                dpx[pos] = x;
+                dpy[pos] = y;
+                if (dst[3])
+                    dst[3][pos] = max;
+            }
+        }
+        break;
+    case COLOR4:
+        for (i = 0; i < in->height; i++) {
+            const int iwx = (i >> vsub) * slinesizex;
+            const int iwy = (i >> vsub) * slinesizey;
+            const int iwd = i * slinesized;
+            for (j = 0; j < in->width; j++) {
+                const int x = FFMIN(spx[iwx + (j >> hsub)], max);
+                const int y = FFMIN(spy[iwy + (j >> hsub)], max);
+                const int pos = y * dlinesize + x;
+
+                dpd[pos] = FFMAX(spd[iwd + j], dpd[pos]);
+                dpx[pos] = x;
+                dpy[pos] = y;
+                if (dst[3])
+                    dst[3][pos] = max;
+            }
+        }
+        break;
+    default:
+        av_assert0(0);
+    }
+
+    envelope16(s, out);
+
+    if (s->mode == COLOR) {
+        for (i = 0; i < out->height; i++) {
+            for (j = 0; j < out->width; j++) {
+                if (!dpd[i * dlinesize + j]) {
+                    dpx[i * dlinesize + j] = j;
+                    dpy[i * dlinesize + j] = i;
+                    dpd[i * dlinesize + j] = mid;
+                }
+            }
+        }
+    }
+}
+
+static void vectorscope8(VectorscopeContext *s, AVFrame *in, AVFrame *out, int pd)
+{
+    const uint8_t * const *src = (const uint8_t * const *)in->data;
+    const int slinesizex = in->linesize[s->x];
+    const int slinesizey = in->linesize[s->y];
+    const int slinesized = in->linesize[pd];
+    const int dlinesize = out->linesize[0];
+    const int intensity = s->intensity;
+    const int px = s->x, py = s->y;
+    const int h = s->planeheight[py];
+    const int w = s->planewidth[px];
+    const uint8_t *spx = src[px];
+    const uint8_t *spy = src[py];
+    const uint8_t *spd = src[pd];
+    const int hsub = s->hsub;
+    const int vsub = s->vsub;
+    uint8_t **dst = out->data;
+    uint8_t *dpx = dst[px];
+    uint8_t *dpy = dst[py];
+    uint8_t *dpd = dst[pd];
+    int i, j, k;
+
+    for (k = 0; k < 4 && dst[k]; k++)
+        for (i = 0; i < out->height ; i++)
+            memset(dst[k] + i * out->linesize[k],
+                   s->mode == COLOR && k == s->pd ? 0 : s->bg_color[k], out->width);
+
+    switch (s->mode) {
+    case COLOR:
+    case GRAY:
+        if (s->is_yuv) {
+            for (i = 0; i < h; i++) {
+                const int iwx = i * slinesizex;
+                const int iwy = i * slinesizey;
+                for (j = 0; j < w; j++) {
+                    const int x = spx[iwx + j];
+                    const int y = spy[iwy + j];
+                    const int pos = y * dlinesize + x;
+
+                    dpd[pos] = FFMIN(dpd[pos] + intensity, 255);
+                    if (dst[3])
+                        dst[3][pos] = 255;
+                }
+            }
+        } else {
+            for (i = 0; i < h; i++) {
+                const int iwx = i * slinesizex;
+                const int iwy = i * slinesizey;
+                for (j = 0; j < w; j++) {
+                    const int x = spx[iwx + j];
+                    const int y = spy[iwy + j];
+                    const int pos = y * dlinesize + x;
+
+                    dst[0][pos] = FFMIN(dst[0][pos] + intensity, 255);
+                    dst[1][pos] = FFMIN(dst[1][pos] + intensity, 255);
+                    dst[2][pos] = FFMIN(dst[2][pos] + intensity, 255);
+                    if (dst[3])
+                        dst[3][pos] = 255;
+                }
+            }
+        }
+        break;
+    case COLOR2:
+        if (s->is_yuv) {
+            for (i = 0; i < h; i++) {
+                const int iw1 = i * slinesizex;
+                const int iw2 = i * slinesizey;
+                for (j = 0; j < w; j++) {
+                    const int x = spx[iw1 + j];
+                    const int y = spy[iw2 + j];
+                    const int pos = y * dlinesize + x;
+
+                    if (!dpd[pos])
+                        dpd[pos] = FFABS(128 - x) + FFABS(128 - y);
+                    dpx[pos] = x;
+                    dpy[pos] = y;
+                    if (dst[3])
+                        dst[3][pos] = 255;
+                }
+            }
+        } else {
+            for (i = 0; i < h; i++) {
+                const int iw1 = i * slinesizex;
+                const int iw2 = i * slinesizey;
+                for (j = 0; j < w; j++) {
+                    const int x = spx[iw1 + j];
+                    const int y = spy[iw2 + j];
+                    const int pos = y * dlinesize + x;
+
+                    if (!dpd[pos])
+                        dpd[pos] = FFMIN(x + y, 255);
+                    dpx[pos] = x;
+                    dpy[pos] = y;
+                    if (dst[3])
+                        dst[3][pos] = 255;
+                }
+            }
+        }
+        break;
+    case COLOR3:
+        for (i = 0; i < h; i++) {
+            const int iw1 = i * slinesizex;
+            const int iw2 = i * slinesizey;
+            for (j = 0; j < w; j++) {
+                const int x = spx[iw1 + j];
+                const int y = spy[iw2 + j];
+                const int pos = y * dlinesize + x;
+
+                dpd[pos] = FFMIN(255, dpd[pos] + intensity);
+                dpx[pos] = x;
+                dpy[pos] = y;
+                if (dst[3])
+                    dst[3][pos] = 255;
+            }
+        }
+        break;
+    case COLOR4:
+        for (i = 0; i < in->height; i++) {
+            const int iwx = (i >> vsub) * slinesizex;
+            const int iwy = (i >> vsub) * slinesizey;
+            const int iwd = i * slinesized;
+            for (j = 0; j < in->width; j++) {
+                const int x = spx[iwx + (j >> hsub)];
+                const int y = spy[iwy + (j >> hsub)];
+                const int pos = y * dlinesize + x;
+
+                dpd[pos] = FFMAX(spd[iwd + j], dpd[pos]);
+                dpx[pos] = x;
+                dpy[pos] = y;
+                if (dst[3])
+                    dst[3][pos] = 255;
+            }
+        }
+        break;
+    default:
+        av_assert0(0);
+    }
+
+    envelope(s, out);
+
+    if (s->mode == COLOR) {
+        for (i = 0; i < out->height; i++) {
+            for (j = 0; j < out->width; j++) {
+                if (!dpd[i * out->linesize[pd] + j]) {
+                    dpx[i * out->linesize[px] + j] = j;
+                    dpy[i * out->linesize[py] + j] = i;
+                    dpd[i * out->linesize[pd] + j] = 128;
+                }
+            }
+        }
+    }
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx  = inlink->dst;
+    VectorscopeContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    out->pts = in->pts;
+
+    s->vectorscope(s, in, out, s->pd);
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    VectorscopeContext *s = inlink->dst->priv;
+
+    s->is_yuv = !(desc->flags & AV_PIX_FMT_FLAG_RGB);
+    s->size = 1 << desc->comp[0].depth;
+    s->mult = s->size / 256;
+
+    if (s->mode == GRAY && s->is_yuv)
+        s->pd = 0;
+    else {
+        if ((s->x == 1 && s->y == 2) || (s->x == 2 && s->y == 1))
+            s->pd = 0;
+        else if ((s->x == 0 && s->y == 2) || (s->x == 2 && s->y == 0))
+            s->pd = 1;
+        else if ((s->x == 0 && s->y == 1) || (s->x == 1 && s->y == 0))
+            s->pd = 2;
+    }
+
+    if (s->size == 256)
+        s->vectorscope = vectorscope8;
+    else
+        s->vectorscope = vectorscope16;
+
+    switch (inlink->format) {
+    case AV_PIX_FMT_GBRP10:
+    case AV_PIX_FMT_GBRP9:
+    case AV_PIX_FMT_GBRAP:
+    case AV_PIX_FMT_GBRP:
+        s->bg_color = black_gbrp_color;
+        break;
+    default:
+        s->bg_color = black_yuva_color;
+    }
+
+    s->hsub = desc->log2_chroma_w;
+    s->vsub = desc->log2_chroma_h;
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[0] = s->planeheight[3] = inlink->h;
+    s->planewidth[1]  = s->planewidth[2]  = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
+    s->planewidth[0]  = s->planewidth[3]  = inlink->w;
+
+    return 0;
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_vectorscope = {
+    .name          = "vectorscope",
+    .description   = NULL_IF_CONFIG_SMALL("Video vectorscope."),
+    .priv_size     = sizeof(VectorscopeContext),
+    .priv_class    = &vectorscope_class,
+    .query_formats = query_formats,
+    .inputs        = inputs,
+    .outputs       = outputs,
+};
diff --git a/libavfilter/vf_vflip.c b/libavfilter/vf_vflip.c
index 4a4ae0e5..cb085e83 100644
--- a/libavfilter/vf_vflip.c
+++ b/libavfilter/vf_vflip.c
@@ -55,7 +55,7 @@ static AVFrame *get_video_buffer(AVFilterLink *link, int w, int h)
 
     for (i = 0; i < 4; i ++) {
         int vsub = i == 1 || i == 2 ? flip->vsub : 0;
-        int height = FF_CEIL_RSHIFT(h, vsub);
+        int height = AV_CEIL_RSHIFT(h, vsub);
 
         if (frame->data[i]) {
             frame->data[i] += (height - 1) * frame->linesize[i];
@@ -73,7 +73,7 @@ static int filter_frame(AVFilterLink *link, AVFrame *frame)
 
     for (i = 0; i < 4; i ++) {
         int vsub = i == 1 || i == 2 ? flip->vsub : 0;
-        int height = FF_CEIL_RSHIFT(link->h, vsub);
+        int height = AV_CEIL_RSHIFT(link->h, vsub);
 
         if (frame->data[i]) {
             frame->data[i] += (height - 1) * frame->linesize[i];
diff --git a/libavfilter/vf_vidstabdetect.c b/libavfilter/vf_vidstabdetect.c
index d8f70f98..47429494 100644
--- a/libavfilter/vf_vidstabdetect.c
+++ b/libavfilter/vf_vidstabdetect.c
@@ -62,21 +62,21 @@ AVFILTER_DEFINE_CLASS(vidstabdetect);
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    StabData *sd = ctx->priv;
+    StabData *s = ctx->priv;
     ff_vs_init();
-    sd->class = &vidstabdetect_class;
+    s->class = &vidstabdetect_class;
     av_log(ctx, AV_LOG_VERBOSE, "vidstabdetect filter: init %s\n", LIBVIDSTAB_VERSION);
     return 0;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    StabData *sd = ctx->priv;
-    VSMotionDetect *md = &(sd->md);
+    StabData *s = ctx->priv;
+    VSMotionDetect *md = &(s->md);
 
-    if (sd->f) {
-        fclose(sd->f);
-        sd->f = NULL;
+    if (s->f) {
+        fclose(s->f);
+        s->f = NULL;
     }
 
     vsMotionDetectionCleanup(md);
@@ -102,9 +102,9 @@ static int query_formats(AVFilterContext *ctx)
 static int config_input(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->dst;
-    StabData *sd = ctx->priv;
+    StabData *s = ctx->priv;
 
-    VSMotionDetect* md = &(sd->md);
+    VSMotionDetect* md = &(s->md);
     VSFrameInfo fi;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 
@@ -125,30 +125,30 @@ static int config_input(AVFilterLink *inlink)
     }
 
     // set values that are not initialized by the options
-    sd->conf.algo     = 1;
-    sd->conf.modName  = "vidstabdetect";
-    if (vsMotionDetectInit(md, &sd->conf, &fi) != VS_OK) {
+    s->conf.algo     = 1;
+    s->conf.modName  = "vidstabdetect";
+    if (vsMotionDetectInit(md, &s->conf, &fi) != VS_OK) {
         av_log(ctx, AV_LOG_ERROR, "initialization of Motion Detection failed, please report a BUG");
         return AVERROR(EINVAL);
     }
 
-    vsMotionDetectGetConfig(&sd->conf, md);
+    vsMotionDetectGetConfig(&s->conf, md);
     av_log(ctx, AV_LOG_INFO, "Video stabilization settings (pass 1/2):\n");
-    av_log(ctx, AV_LOG_INFO, "     shakiness = %d\n", sd->conf.shakiness);
-    av_log(ctx, AV_LOG_INFO, "      accuracy = %d\n", sd->conf.accuracy);
-    av_log(ctx, AV_LOG_INFO, "      stepsize = %d\n", sd->conf.stepSize);
-    av_log(ctx, AV_LOG_INFO, "   mincontrast = %f\n", sd->conf.contrastThreshold);
-    av_log(ctx, AV_LOG_INFO, "        tripod = %d\n", sd->conf.virtualTripod);
-    av_log(ctx, AV_LOG_INFO, "          show = %d\n", sd->conf.show);
-    av_log(ctx, AV_LOG_INFO, "        result = %s\n", sd->result);
-
-    sd->f = fopen(sd->result, "w");
-    if (sd->f == NULL) {
-        av_log(ctx, AV_LOG_ERROR, "cannot open transform file %s\n", sd->result);
+    av_log(ctx, AV_LOG_INFO, "     shakiness = %d\n", s->conf.shakiness);
+    av_log(ctx, AV_LOG_INFO, "      accuracy = %d\n", s->conf.accuracy);
+    av_log(ctx, AV_LOG_INFO, "      stepsize = %d\n", s->conf.stepSize);
+    av_log(ctx, AV_LOG_INFO, "   mincontrast = %f\n", s->conf.contrastThreshold);
+    av_log(ctx, AV_LOG_INFO, "        tripod = %d\n", s->conf.virtualTripod);
+    av_log(ctx, AV_LOG_INFO, "          show = %d\n", s->conf.show);
+    av_log(ctx, AV_LOG_INFO, "        result = %s\n", s->result);
+
+    s->f = fopen(s->result, "w");
+    if (s->f == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "cannot open transform file %s\n", s->result);
         return AVERROR(EINVAL);
     } else {
-        if (vsPrepareFile(md, sd->f) != VS_OK) {
-            av_log(ctx, AV_LOG_ERROR, "cannot write to transform file %s\n", sd->result);
+        if (vsPrepareFile(md, s->f) != VS_OK) {
+            av_log(ctx, AV_LOG_ERROR, "cannot write to transform file %s\n", s->result);
             return AVERROR(EINVAL);
         }
     }
@@ -158,15 +158,15 @@ static int config_input(AVFilterLink *inlink)
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
-    StabData *sd = ctx->priv;
-    VSMotionDetect *md = &(sd->md);
+    StabData *s = ctx->priv;
+    VSMotionDetect *md = &(s->md);
     LocalMotions localmotions;
 
     AVFilterLink *outlink = inlink->dst->outputs[0];
     VSFrame frame;
     int plane;
 
-    if (sd->conf.show > 0 && !av_frame_is_writable(in))
+    if (s->conf.show > 0 && !av_frame_is_writable(in))
         av_frame_make_writable(in);
 
     for (plane = 0; plane < md->fi.planes; plane++) {
@@ -177,7 +177,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         av_log(ctx, AV_LOG_ERROR, "motion detection failed");
         return AVERROR(AVERROR_EXTERNAL);
     } else {
-        if (vsWriteToFile(md, sd->f, &localmotions) != VS_OK) {
+        if (vsWriteToFile(md, s->f, &localmotions) != VS_OK) {
             int ret = AVERROR(errno);
             av_log(ctx, AV_LOG_ERROR, "cannot write to transform file");
             return ret;
diff --git a/libavfilter/vf_vidstabtransform.c b/libavfilter/vf_vidstabtransform.c
index 97e6661d..dac0a2d1 100644
--- a/libavfilter/vf_vidstabtransform.c
+++ b/libavfilter/vf_vidstabtransform.c
@@ -96,9 +96,9 @@ static const AVOption vidstabtransform_options[] = {
                    AV_OPT_TYPE_CONST,  {.i64 = VS_BiCubic },0, 0,  FLAGS, "interpol"},
 
     {"tripod",    "enable virtual tripod mode (same as relative=0:smoothing=0)", OFFSET(tripod),
-                   AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,    FLAGS},
+                   AV_OPT_TYPE_BOOL,   {.i64 = 0},        0, 1,    FLAGS},
     {"debug",     "enable debug mode and writer global motions information to file", OFFSET(debug),
-                   AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,    FLAGS},
+                   AV_OPT_TYPE_BOOL,   {.i64 = 0},        0, 1,    FLAGS},
     {NULL}
 };
 
diff --git a/libavfilter/vf_vignette.c b/libavfilter/vf_vignette.c
index c1bafa89..1d66c502 100644
--- a/libavfilter/vf_vignette.c
+++ b/libavfilter/vf_vignette.c
@@ -90,7 +90,7 @@ static const AVOption vignette_options[] = {
     { "eval", "specify when to evaluate expressions", OFFSET(eval_mode), AV_OPT_TYPE_INT, {.i64 = EVAL_MODE_INIT}, 0, EVAL_MODE_NB-1, FLAGS, "eval" },
          { "init",  "eval expressions once during initialization", 0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_INIT},  .flags = FLAGS, .unit = "eval" },
          { "frame", "eval expressions for each frame",             0, AV_OPT_TYPE_CONST, {.i64=EVAL_MODE_FRAME}, .flags = FLAGS, .unit = "eval" },
-    { "dither", "set dithering", OFFSET(do_dither), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, FLAGS },
+    { "dither", "set dithering", OFFSET(do_dither), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
     { "aspect", "set aspect ratio", OFFSET(aspect), AV_OPT_TYPE_RATIONAL, {.dbl = 1}, 0, DBL_MAX, .flags = FLAGS },
     { NULL }
 };
@@ -267,8 +267,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             const int chroma = plane == 1 || plane == 2;
             const int hsub = chroma ? s->desc->log2_chroma_w : 0;
             const int vsub = chroma ? s->desc->log2_chroma_h : 0;
-            const int w = FF_CEIL_RSHIFT(inlink->w, hsub);
-            const int h = FF_CEIL_RSHIFT(inlink->h, vsub);
+            const int w = AV_CEIL_RSHIFT(inlink->w, hsub);
+            const int h = AV_CEIL_RSHIFT(inlink->h, vsub);
 
             for (y = 0; y < h; y++) {
                 uint8_t *dstp = dst;
diff --git a/libavfilter/vf_w3fdif.c b/libavfilter/vf_w3fdif.c
index bb0316b3..e6f6628a 100644
--- a/libavfilter/vf_w3fdif.c
+++ b/libavfilter/vf_w3fdif.c
@@ -29,6 +29,7 @@
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
+#include "w3fdif.h"
 
 typedef struct W3FDIFContext {
     const AVClass *class;
@@ -40,7 +41,10 @@ typedef struct W3FDIFContext {
     int eof;
     int nb_planes;
     AVFrame *prev, *cur, *next;  ///< previous, current, next frames
-    int32_t *work_line;   ///< line we are calculating
+    int32_t **work_line;  ///< lines we are calculating
+    int nb_threads;
+
+    W3FDIFDSPContext dsp;
 } W3FDIFContext;
 
 #define OFFSET(x) offsetof(W3FDIFContext, x)
@@ -80,23 +84,112 @@ static int query_formats(AVFilterContext *ctx)
     return ff_set_common_formats(ctx, fmts_list);
 }
 
+static void filter_simple_low(int32_t *work_line,
+                              uint8_t *in_lines_cur[2],
+                              const int16_t *coef, int linesize)
+{
+    int i;
+
+    for (i = 0; i < linesize; i++) {
+        *work_line    = *in_lines_cur[0]++ * coef[0];
+        *work_line++ += *in_lines_cur[1]++ * coef[1];
+    }
+}
+
+static void filter_complex_low(int32_t *work_line,
+                               uint8_t *in_lines_cur[4],
+                               const int16_t *coef, int linesize)
+{
+    int i;
+
+    for (i = 0; i < linesize; i++) {
+        *work_line    = *in_lines_cur[0]++ * coef[0];
+        *work_line   += *in_lines_cur[1]++ * coef[1];
+        *work_line   += *in_lines_cur[2]++ * coef[2];
+        *work_line++ += *in_lines_cur[3]++ * coef[3];
+    }
+}
+
+static void filter_simple_high(int32_t *work_line,
+                               uint8_t *in_lines_cur[3],
+                               uint8_t *in_lines_adj[3],
+                               const int16_t *coef, int linesize)
+{
+    int i;
+
+    for (i = 0; i < linesize; i++) {
+        *work_line   += *in_lines_cur[0]++ * coef[0];
+        *work_line   += *in_lines_adj[0]++ * coef[0];
+        *work_line   += *in_lines_cur[1]++ * coef[1];
+        *work_line   += *in_lines_adj[1]++ * coef[1];
+        *work_line   += *in_lines_cur[2]++ * coef[2];
+        *work_line++ += *in_lines_adj[2]++ * coef[2];
+    }
+}
+
+static void filter_complex_high(int32_t *work_line,
+                                uint8_t *in_lines_cur[5],
+                                uint8_t *in_lines_adj[5],
+                                const int16_t *coef, int linesize)
+{
+    int i;
+
+    for (i = 0; i < linesize; i++) {
+        *work_line   += *in_lines_cur[0]++ * coef[0];
+        *work_line   += *in_lines_adj[0]++ * coef[0];
+        *work_line   += *in_lines_cur[1]++ * coef[1];
+        *work_line   += *in_lines_adj[1]++ * coef[1];
+        *work_line   += *in_lines_cur[2]++ * coef[2];
+        *work_line   += *in_lines_adj[2]++ * coef[2];
+        *work_line   += *in_lines_cur[3]++ * coef[3];
+        *work_line   += *in_lines_adj[3]++ * coef[3];
+        *work_line   += *in_lines_cur[4]++ * coef[4];
+        *work_line++ += *in_lines_adj[4]++ * coef[4];
+    }
+}
+
+static void filter_scale(uint8_t *out_pixel, const int32_t *work_pixel, int linesize)
+{
+    int j;
+
+    for (j = 0; j < linesize; j++, out_pixel++, work_pixel++)
+        *out_pixel = av_clip(*work_pixel, 0, 255 * 256 * 128) >> 15;
+}
+
 static int config_input(AVFilterLink *inlink)
 {
-    W3FDIFContext *s = inlink->dst->priv;
+    AVFilterContext *ctx = inlink->dst;
+    W3FDIFContext *s = ctx->priv;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
-    int ret;
+    int ret, i;
 
     if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
         return ret;
 
-    s->planeheight[1] = s->planeheight[2] = FF_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
+    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
     s->planeheight[0] = s->planeheight[3] = inlink->h;
 
     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
-    s->work_line = av_calloc(s->linesize[0], sizeof(*s->work_line));
+    s->nb_threads = ctx->graph->nb_threads;
+    s->work_line = av_calloc(s->nb_threads, sizeof(*s->work_line));
     if (!s->work_line)
         return AVERROR(ENOMEM);
 
+    for (i = 0; i < s->nb_threads; i++) {
+        s->work_line[i] = av_calloc(FFALIGN(s->linesize[0], 32), sizeof(*s->work_line[0]));
+        if (!s->work_line[i])
+            return AVERROR(ENOMEM);
+    }
+
+    s->dsp.filter_simple_low   = filter_simple_low;
+    s->dsp.filter_complex_low  = filter_complex_low;
+    s->dsp.filter_simple_high  = filter_simple_high;
+    s->dsp.filter_complex_high = filter_complex_high;
+    s->dsp.filter_scale        = filter_scale;
+
+    if (ARCH_X86)
+        ff_w3fdif_init_x86(&s->dsp);
+
     return 0;
 }
 
@@ -108,13 +201,12 @@ static int config_output(AVFilterLink *outlink)
     outlink->time_base.den = inlink->time_base.den * 2;
     outlink->frame_rate.num = inlink->frame_rate.num * 2;
     outlink->frame_rate.den = inlink->frame_rate.den;
-    outlink->flags |= FF_LINK_FLAG_REQUEST_LOOP;
 
     return 0;
 }
 
 /*
- * Filter coefficients from PH-2071, scaled by 256 * 256.
+ * Filter coefficients from PH-2071, scaled by 256 * 128.
  * Each set of coefficients has a set for low-frequencies and high-frequencies.
  * n_coef_lf[] and n_coef_hf[] are the number of coefs for simple and more-complex.
  * It is important for later that n_coef_lf[] is even and n_coef_hf[] is odd.
@@ -122,17 +214,26 @@ static int config_output(AVFilterLink *outlink)
  * and high-frequencies for simple and more-complex mode.
  */
 static const int8_t   n_coef_lf[2] = { 2, 4 };
-static const int32_t coef_lf[2][4] = {{ 32768, 32768,     0,     0},
-                                      { -1704, 34472, 34472, -1704}};
+static const int16_t coef_lf[2][4] = {{ 16384, 16384,     0,    0},
+                                      {  -852, 17236, 17236, -852}};
 static const int8_t   n_coef_hf[2] = { 3, 5 };
-static const int32_t coef_hf[2][5] = {{ -4096,  8192, -4096,     0,     0},
-                                      {  2032, -7602, 11140, -7602,  2032}};
+static const int16_t coef_hf[2][5] = {{ -2048,  4096, -2048,     0,    0},
+                                      {  1016, -3801,  5570, -3801, 1016}};
 
-static void deinterlace_plane(AVFilterContext *ctx, AVFrame *out,
-                              const AVFrame *cur, const AVFrame *adj,
-                              const int filter, const int plane)
+typedef struct ThreadData {
+    AVFrame *out, *cur, *adj;
+    int plane;
+} ThreadData;
+
+static int deinterlace_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
     W3FDIFContext *s = ctx->priv;
+    ThreadData *td = arg;
+    AVFrame *out = td->out;
+    AVFrame *cur = td->cur;
+    AVFrame *adj = td->adj;
+    const int plane = td->plane;
+    const int filter = s->filter;
     uint8_t *in_line, *in_lines_cur[5], *in_lines_adj[5];
     uint8_t *out_line, *out_pixel;
     int32_t *work_line, *work_pixel;
@@ -144,15 +245,17 @@ static void deinterlace_plane(AVFilterContext *ctx, AVFrame *out,
     const int cur_line_stride = cur->linesize[plane];
     const int adj_line_stride = adj->linesize[plane];
     const int dst_line_stride = out->linesize[plane];
-    int i, j, y_in, y_out;
+    const int start = (height * jobnr) / nb_jobs;
+    const int end = (height * (jobnr+1)) / nb_jobs;
+    int j, y_in, y_out;
 
     /* copy unchanged the lines of the field */
-    y_out = s->field == cur->top_field_first;
+    y_out = start + (s->field == cur->top_field_first) - (start & 1);
 
     in_line  = cur_data + (y_out * cur_line_stride);
     out_line = dst_data + (y_out * dst_line_stride);
 
-    while (y_out < height) {
+    while (y_out < end) {
         memcpy(out_line, in_line, linesize);
         y_out += 2;
         in_line  += cur_line_stride * 2;
@@ -160,14 +263,11 @@ static void deinterlace_plane(AVFilterContext *ctx, AVFrame *out,
     }
 
     /* interpolate other lines of the field */
-    y_out = s->field != cur->top_field_first;
+    y_out = start + (s->field != cur->top_field_first) - (start & 1);
 
     out_line = dst_data + (y_out * dst_line_stride);
 
-    while (y_out < height) {
-        /* clear workspace */
-        memset(s->work_line, 0, sizeof(*s->work_line) * linesize);
-
+    while (y_out < end) {
         /* get low vertical frequencies from current field */
         for (j = 0; j < n_coef_lf[filter]; j++) {
             y_in = (y_out + 1) + (j * 2) - n_coef_lf[filter];
@@ -180,21 +280,15 @@ static void deinterlace_plane(AVFilterContext *ctx, AVFrame *out,
             in_lines_cur[j] = cur_data + (y_in * cur_line_stride);
         }
 
-        work_line = s->work_line;
+        work_line = s->work_line[jobnr];
         switch (n_coef_lf[filter]) {
         case 2:
-            for (i = 0; i < linesize; i++) {
-                *work_line   += *in_lines_cur[0]++ * coef_lf[filter][0];
-                *work_line++ += *in_lines_cur[1]++ * coef_lf[filter][1];
-            }
+            s->dsp.filter_simple_low(work_line, in_lines_cur,
+                                     coef_lf[filter], linesize);
             break;
         case 4:
-            for (i = 0; i < linesize; i++) {
-                *work_line   += *in_lines_cur[0]++ * coef_lf[filter][0];
-                *work_line   += *in_lines_cur[1]++ * coef_lf[filter][1];
-                *work_line   += *in_lines_cur[2]++ * coef_lf[filter][2];
-                *work_line++ += *in_lines_cur[3]++ * coef_lf[filter][3];
-            }
+            s->dsp.filter_complex_low(work_line, in_lines_cur,
+                                      coef_lf[filter], linesize);
         }
 
         /* get high vertical frequencies from adjacent fields */
@@ -210,44 +304,29 @@ static void deinterlace_plane(AVFilterContext *ctx, AVFrame *out,
             in_lines_adj[j] = adj_data + (y_in * adj_line_stride);
         }
 
-        work_line = s->work_line;
+        work_line = s->work_line[jobnr];
         switch (n_coef_hf[filter]) {
         case 3:
-            for (i = 0; i < linesize; i++) {
-                *work_line   += *in_lines_cur[0]++ * coef_hf[filter][0];
-                *work_line   += *in_lines_adj[0]++ * coef_hf[filter][0];
-                *work_line   += *in_lines_cur[1]++ * coef_hf[filter][1];
-                *work_line   += *in_lines_adj[1]++ * coef_hf[filter][1];
-                *work_line   += *in_lines_cur[2]++ * coef_hf[filter][2];
-                *work_line++ += *in_lines_adj[2]++ * coef_hf[filter][2];
-            }
+            s->dsp.filter_simple_high(work_line, in_lines_cur, in_lines_adj,
+                                      coef_hf[filter], linesize);
             break;
         case 5:
-            for (i = 0; i < linesize; i++) {
-                *work_line   += *in_lines_cur[0]++ * coef_hf[filter][0];
-                *work_line   += *in_lines_adj[0]++ * coef_hf[filter][0];
-                *work_line   += *in_lines_cur[1]++ * coef_hf[filter][1];
-                *work_line   += *in_lines_adj[1]++ * coef_hf[filter][1];
-                *work_line   += *in_lines_cur[2]++ * coef_hf[filter][2];
-                *work_line   += *in_lines_adj[2]++ * coef_hf[filter][2];
-                *work_line   += *in_lines_cur[3]++ * coef_hf[filter][3];
-                *work_line   += *in_lines_adj[3]++ * coef_hf[filter][3];
-                *work_line   += *in_lines_cur[4]++ * coef_hf[filter][4];
-                *work_line++ += *in_lines_adj[4]++ * coef_hf[filter][4];
-            }
+            s->dsp.filter_complex_high(work_line, in_lines_cur, in_lines_adj,
+                                       coef_hf[filter], linesize);
         }
 
-        /* save scaled result to the output frame, scaling down by 256 * 256 */
-        work_pixel = s->work_line;
+        /* save scaled result to the output frame, scaling down by 256 * 128 */
+        work_pixel = s->work_line[jobnr];
         out_pixel = out_line;
 
-        for (j = 0; j < linesize; j++, out_pixel++, work_pixel++)
-             *out_pixel = av_clip(*work_pixel, 0, 255 * 256 * 256) >> 16;
+        s->dsp.filter_scale(out_pixel, work_pixel, linesize);
 
         /* move on to next line */
         y_out += 2;
         out_line += dst_line_stride * 2;
     }
+
+    return 0;
 }
 
 static int filter(AVFilterContext *ctx, int is_second)
@@ -255,6 +334,7 @@ static int filter(AVFilterContext *ctx, int is_second)
     W3FDIFContext *s = ctx->priv;
     AVFilterLink *outlink = ctx->outputs[0];
     AVFrame *out, *adj;
+    ThreadData td;
     int plane;
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
@@ -278,8 +358,11 @@ static int filter(AVFilterContext *ctx, int is_second)
     }
 
     adj = s->field ? s->next : s->prev;
-    for (plane = 0; plane < s->nb_planes; plane++)
-        deinterlace_plane(ctx, out, s->cur, adj, s->filter, plane);
+    td.out = out; td.cur = s->cur; td.adj = adj;
+    for (plane = 0; plane < s->nb_planes; plane++) {
+        td.plane = plane;
+        ctx->internal->execute(ctx, deinterlace_slice, &td, NULL, FFMIN(s->planeheight[plane], s->nb_threads));
+    }
 
     s->field = !s->field;
 
@@ -328,26 +411,23 @@ static int request_frame(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
     W3FDIFContext *s = ctx->priv;
+    int ret;
 
-    do {
-        int ret;
-
-        if (s->eof)
-            return AVERROR_EOF;
+    if (s->eof)
+        return AVERROR_EOF;
 
-        ret = ff_request_frame(ctx->inputs[0]);
+    ret = ff_request_frame(ctx->inputs[0]);
 
-        if (ret == AVERROR_EOF && s->cur) {
-            AVFrame *next = av_frame_clone(s->next);
-            if (!next)
-                return AVERROR(ENOMEM);
-            next->pts = s->next->pts * 2 - s->cur->pts;
-            filter_frame(ctx->inputs[0], next);
-            s->eof = 1;
-        } else if (ret < 0) {
-            return ret;
-        }
-    } while (!s->cur);
+    if (ret == AVERROR_EOF && s->cur) {
+        AVFrame *next = av_frame_clone(s->next);
+        if (!next)
+            return AVERROR(ENOMEM);
+        next->pts = s->next->pts * 2 - s->cur->pts;
+        filter_frame(ctx->inputs[0], next);
+        s->eof = 1;
+    } else if (ret < 0) {
+        return ret;
+    }
 
     return 0;
 }
@@ -355,10 +435,15 @@ static int request_frame(AVFilterLink *outlink)
 static av_cold void uninit(AVFilterContext *ctx)
 {
     W3FDIFContext *s = ctx->priv;
+    int i;
 
     av_frame_free(&s->prev);
     av_frame_free(&s->cur );
     av_frame_free(&s->next);
+
+    for (i = 0; i < s->nb_threads; i++)
+        av_freep(&s->work_line[i]);
+
     av_freep(&s->work_line);
 }
 
@@ -391,5 +476,5 @@ AVFilter ff_vf_w3fdif = {
     .query_formats = query_formats,
     .inputs        = w3fdif_inputs,
     .outputs       = w3fdif_outputs,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 };
diff --git a/libavfilter/vf_waveform.c b/libavfilter/vf_waveform.c
new file mode 100644
index 00000000..4fb78bd7
--- /dev/null
+++ b/libavfilter/vf_waveform.c
@@ -0,0 +1,1273 @@
+/*
+ * Copyright (c) 2012-2015 Paul B Mahol
+ * Copyright (c) 2013 Marton Balint
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+enum FilterType {
+    LOWPASS,
+    FLAT,
+    AFLAT,
+    CHROMA,
+    ACHROMA,
+    COLOR,
+    NB_FILTERS
+};
+
+typedef struct WaveformContext {
+    const AVClass *class;
+    int            mode;
+    int            ncomp;
+    int            pcomp;
+    const uint8_t  *bg_color;
+    float          fintensity;
+    int            intensity;
+    int            mirror;
+    int            display;
+    int            envelope;
+    int            estart[4];
+    int            eend[4];
+    int            *emax[4][4];
+    int            *emin[4][4];
+    int            *peak;
+    int            filter;
+    int            bits;
+    int            max;
+    int            size;
+    void (*waveform)(struct WaveformContext *s, AVFrame *in, AVFrame *out,
+                     int component, int intensity, int offset, int column);
+    const AVPixFmtDescriptor *desc;
+} WaveformContext;
+
+#define OFFSET(x) offsetof(WaveformContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption waveform_options[] = {
+    { "mode", "set mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS, "mode" },
+    { "m",    "set mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS, "mode" },
+        { "row",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "mode" },
+        { "column", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "mode" },
+    { "intensity", "set intensity", OFFSET(fintensity), AV_OPT_TYPE_FLOAT, {.dbl=0.04}, 0, 1, FLAGS },
+    { "i",         "set intensity", OFFSET(fintensity), AV_OPT_TYPE_FLOAT, {.dbl=0.04}, 0, 1, FLAGS },
+    { "mirror", "set mirroring", OFFSET(mirror), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
+    { "r",      "set mirroring", OFFSET(mirror), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
+    { "display", "set display mode", OFFSET(display), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS, "display" },
+    { "d",       "set display mode", OFFSET(display), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS, "display" },
+        { "overlay", NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "display" },
+        { "parade",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "display" },
+    { "components", "set components to display", OFFSET(pcomp), AV_OPT_TYPE_INT, {.i64=1}, 1, 15, FLAGS },
+    { "c",          "set components to display", OFFSET(pcomp), AV_OPT_TYPE_INT, {.i64=1}, 1, 15, FLAGS },
+    { "envelope", "set envelope to display", OFFSET(envelope), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS, "envelope" },
+    { "e",        "set envelope to display", OFFSET(envelope), AV_OPT_TYPE_INT, {.i64=0}, 0, 3, FLAGS, "envelope" },
+        { "none",         NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "envelope" },
+        { "instant",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "envelope" },
+        { "peak",         NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, FLAGS, "envelope" },
+        { "peak+instant", NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, FLAGS, "envelope" },
+    { "filter", "set filter", OFFSET(filter), AV_OPT_TYPE_INT, {.i64=0}, 0, NB_FILTERS-1, FLAGS, "filter" },
+    { "f",      "set filter", OFFSET(filter), AV_OPT_TYPE_INT, {.i64=0}, 0, NB_FILTERS-1, FLAGS, "filter" },
+        { "lowpass", NULL, 0, AV_OPT_TYPE_CONST, {.i64=LOWPASS}, 0, 0, FLAGS, "filter" },
+        { "flat"   , NULL, 0, AV_OPT_TYPE_CONST, {.i64=FLAT},    0, 0, FLAGS, "filter" },
+        { "aflat"  , NULL, 0, AV_OPT_TYPE_CONST, {.i64=AFLAT},   0, 0, FLAGS, "filter" },
+        { "chroma",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=CHROMA},  0, 0, FLAGS, "filter" },
+        { "achroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64=ACHROMA}, 0, 0, FLAGS, "filter" },
+        { "color",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=COLOR},   0, 0, FLAGS, "filter" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(waveform);
+
+static const enum AVPixelFormat lowpass_pix_fmts[] = {
+    AV_PIX_FMT_GBRP,     AV_PIX_FMT_GBRAP,
+    AV_PIX_FMT_GBRP9,    AV_PIX_FMT_GBRP10,
+    AV_PIX_FMT_YUV422P,  AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV440P,
+    AV_PIX_FMT_YUV411P,  AV_PIX_FMT_YUV410P,
+    AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P,
+    AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,
+    AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA420P,
+    AV_PIX_FMT_GRAY8,
+    AV_PIX_FMT_YUV444P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV420P9,
+    AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA420P9,
+    AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV420P10,
+    AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA420P10,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat flat_pix_fmts[] = {
+    AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat color_pix_fmts[] = {
+    AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
+    AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+    AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P,
+    AV_PIX_FMT_YUV444P9, AV_PIX_FMT_YUV444P10,
+    AV_PIX_FMT_NONE
+};
+
+static int query_formats(AVFilterContext *ctx)
+{
+    WaveformContext *s = ctx->priv;
+    AVFilterFormats *fmts_list;
+    const enum AVPixelFormat *pix_fmts;
+
+    switch (s->filter) {
+    case LOWPASS: pix_fmts = lowpass_pix_fmts; break;
+    case FLAT:
+    case AFLAT:
+    case CHROMA:
+    case ACHROMA: pix_fmts = flat_pix_fmts;    break;
+    case COLOR:   pix_fmts = color_pix_fmts;   break;
+    }
+
+    fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static void envelope_instant16(WaveformContext *s, AVFrame *out, int plane, int component)
+{
+    const int dst_linesize = out->linesize[component] / 2;
+    const int bg = s->bg_color[component] * (s->max / 256);
+    const int limit = s->max - 1;
+    const int is_chroma = (component == 1 || component == 2);
+    const int shift_w = (is_chroma ? s->desc->log2_chroma_w : 0);
+    const int shift_h = (is_chroma ? s->desc->log2_chroma_h : 0);
+    const int dst_h = AV_CEIL_RSHIFT(out->height, shift_h);
+    const int dst_w = AV_CEIL_RSHIFT(out->width, shift_w);
+    const int start = s->estart[plane];
+    const int end = s->eend[plane];
+    uint16_t *dst;
+    int x, y;
+
+    if (s->mode) {
+        for (x = 0; x < dst_w; x++) {
+            for (y = start; y < end; y++) {
+                dst = (uint16_t *)out->data[component] + y * dst_linesize + x;
+                if (dst[0] != bg) {
+                    dst[0] = limit;
+                    break;
+                }
+            }
+            for (y = end - 1; y >= start; y--) {
+                dst = (uint16_t *)out->data[component] + y * dst_linesize + x;
+                if (dst[0] != bg) {
+                    dst[0] = limit;
+                    break;
+                }
+            }
+        }
+    } else {
+        for (y = 0; y < dst_h; y++) {
+            dst = (uint16_t *)out->data[component] + y * dst_linesize;
+            for (x = start; x < end; x++) {
+                if (dst[x] != bg) {
+                    dst[x] = limit;
+                    break;
+                }
+            }
+            for (x = end - 1; x >= start; x--) {
+                if (dst[x] != bg) {
+                    dst[x] = limit;
+                    break;
+                }
+            }
+        }
+    }
+}
+
+static void envelope_instant(WaveformContext *s, AVFrame *out, int plane, int component)
+{
+    const int dst_linesize = out->linesize[component];
+    const uint8_t bg = s->bg_color[component];
+    const int is_chroma = (component == 1 || component == 2);
+    const int shift_w = (is_chroma ? s->desc->log2_chroma_w : 0);
+    const int shift_h = (is_chroma ? s->desc->log2_chroma_h : 0);
+    const int dst_h = AV_CEIL_RSHIFT(out->height, shift_h);
+    const int dst_w = AV_CEIL_RSHIFT(out->width, shift_w);
+    const int start = s->estart[plane];
+    const int end = s->eend[plane];
+    uint8_t *dst;
+    int x, y;
+
+    if (s->mode) {
+        for (x = 0; x < dst_w; x++) {
+            for (y = start; y < end; y++) {
+                dst = out->data[component] + y * dst_linesize + x;
+                if (dst[0] != bg) {
+                    dst[0] = 255;
+                    break;
+                }
+            }
+            for (y = end - 1; y >= start; y--) {
+                dst = out->data[component] + y * dst_linesize + x;
+                if (dst[0] != bg) {
+                    dst[0] = 255;
+                    break;
+                }
+            }
+        }
+    } else {
+        for (y = 0; y < dst_h; y++) {
+            dst = out->data[component] + y * dst_linesize;
+            for (x = start; x < end; x++) {
+                if (dst[x] != bg) {
+                    dst[x] = 255;
+                    break;
+                }
+            }
+            for (x = end - 1; x >= start; x--) {
+                if (dst[x] != bg) {
+                    dst[x] = 255;
+                    break;
+                }
+            }
+        }
+    }
+}
+
+static void envelope_peak16(WaveformContext *s, AVFrame *out, int plane, int component)
+{
+    const int dst_linesize = out->linesize[component] / 2;
+    const int bg = s->bg_color[component] * (s->max / 256);
+    const int limit = s->max - 1;
+    const int is_chroma = (component == 1 || component == 2);
+    const int shift_w = (is_chroma ? s->desc->log2_chroma_w : 0);
+    const int shift_h = (is_chroma ? s->desc->log2_chroma_h : 0);
+    const int dst_h = AV_CEIL_RSHIFT(out->height, shift_h);
+    const int dst_w = AV_CEIL_RSHIFT(out->width, shift_w);
+    const int start = s->estart[plane];
+    const int end = s->eend[plane];
+    int *emax = s->emax[plane][component];
+    int *emin = s->emin[plane][component];
+    uint16_t *dst;
+    int x, y;
+
+    if (s->mode) {
+        for (x = 0; x < dst_w; x++) {
+            for (y = start; y < end && y < emin[x]; y++) {
+                dst = (uint16_t *)out->data[component] + y * dst_linesize + x;
+                if (dst[0] != bg) {
+                    emin[x] = y;
+                    break;
+                }
+            }
+            for (y = end - 1; y >= start && y >= emax[x]; y--) {
+                dst = (uint16_t *)out->data[component] + y * dst_linesize + x;
+                if (dst[0] != bg) {
+                    emax[x] = y;
+                    break;
+                }
+            }
+        }
+
+        if (s->envelope == 3)
+            envelope_instant16(s, out, plane, component);
+
+        for (x = 0; x < dst_w; x++) {
+            dst = (uint16_t *)out->data[component] + emin[x] * dst_linesize + x;
+            dst[0] = limit;
+            dst = (uint16_t *)out->data[component] + emax[x] * dst_linesize + x;
+            dst[0] = limit;
+        }
+    } else {
+        for (y = 0; y < dst_h; y++) {
+            dst = (uint16_t *)out->data[component] + y * dst_linesize;
+            for (x = start; x < end && x < emin[y]; x++) {
+                if (dst[x] != bg) {
+                    emin[y] = x;
+                    break;
+                }
+            }
+            for (x = end - 1; x >= start && x >= emax[y]; x--) {
+                if (dst[x] != bg) {
+                    emax[y] = x;
+                    break;
+                }
+            }
+        }
+
+        if (s->envelope == 3)
+            envelope_instant16(s, out, plane, component);
+
+        for (y = 0; y < dst_h; y++) {
+            dst = (uint16_t *)out->data[component] + y * dst_linesize + emin[y];
+            dst[0] = limit;
+            dst = (uint16_t *)out->data[component] + y * dst_linesize + emax[y];
+            dst[0] = limit;
+        }
+    }
+}
+
+static void envelope_peak(WaveformContext *s, AVFrame *out, int plane, int component)
+{
+    const int dst_linesize = out->linesize[component];
+    const int bg = s->bg_color[component];
+    const int is_chroma = (component == 1 || component == 2);
+    const int shift_w = (is_chroma ? s->desc->log2_chroma_w : 0);
+    const int shift_h = (is_chroma ? s->desc->log2_chroma_h : 0);
+    const int dst_h = AV_CEIL_RSHIFT(out->height, shift_h);
+    const int dst_w = AV_CEIL_RSHIFT(out->width, shift_w);
+    const int start = s->estart[plane];
+    const int end = s->eend[plane];
+    int *emax = s->emax[plane][component];
+    int *emin = s->emin[plane][component];
+    uint8_t *dst;
+    int x, y;
+
+    if (s->mode) {
+        for (x = 0; x < dst_w; x++) {
+            for (y = start; y < end && y < emin[x]; y++) {
+                dst = out->data[component] + y * dst_linesize + x;
+                if (dst[0] != bg) {
+                    emin[x] = y;
+                    break;
+                }
+            }
+            for (y = end - 1; y >= start && y >= emax[x]; y--) {
+                dst = out->data[component] + y * dst_linesize + x;
+                if (dst[0] != bg) {
+                    emax[x] = y;
+                    break;
+                }
+            }
+        }
+
+        if (s->envelope == 3)
+            envelope_instant(s, out, plane, component);
+
+        for (x = 0; x < dst_w; x++) {
+            dst = out->data[component] + emin[x] * dst_linesize + x;
+            dst[0] = 255;
+            dst = out->data[component] + emax[x] * dst_linesize + x;
+            dst[0] = 255;
+        }
+    } else {
+        for (y = 0; y < dst_h; y++) {
+            dst = out->data[component] + y * dst_linesize;
+            for (x = start; x < end && x < emin[y]; x++) {
+                if (dst[x] != bg) {
+                    emin[y] = x;
+                    break;
+                }
+            }
+            for (x = end - 1; x >= start && x >= emax[y]; x--) {
+                if (dst[x] != bg) {
+                    emax[y] = x;
+                    break;
+                }
+            }
+        }
+
+        if (s->envelope == 3)
+            envelope_instant(s, out, plane, component);
+
+        for (y = 0; y < dst_h; y++) {
+            dst = out->data[component] + y * dst_linesize + emin[y];
+            dst[0] = 255;
+            dst = out->data[component] + y * dst_linesize + emax[y];
+            dst[0] = 255;
+        }
+    }
+}
+
+static void envelope16(WaveformContext *s, AVFrame *out, int plane, int component)
+{
+    if (s->envelope == 0) {
+        return;
+    } else if (s->envelope == 1) {
+        envelope_instant16(s, out, plane, component);
+    } else {
+        envelope_peak16(s, out, plane, component);
+    }
+}
+
+static void envelope(WaveformContext *s, AVFrame *out, int plane, int component)
+{
+    if (s->envelope == 0) {
+        return;
+    } else if (s->envelope == 1) {
+        envelope_instant(s, out, plane, component);
+    } else {
+        envelope_peak(s, out, plane, component);
+    }
+}
+
+static void update16(uint16_t *target, int max, int intensity, int limit)
+{
+    if (*target <= max)
+        *target += intensity;
+    else
+        *target = limit;
+}
+
+static void update(uint8_t *target, int max, int intensity)
+{
+    if (*target <= max)
+        *target += intensity;
+    else
+        *target = 255;
+}
+
+static void lowpass16(WaveformContext *s, AVFrame *in, AVFrame *out,
+                      int component, int intensity, int offset, int column)
+{
+    const int plane = s->desc->comp[component].plane;
+    const int mirror = s->mirror;
+    const int is_chroma = (component == 1 || component == 2);
+    const int shift_w = (is_chroma ? s->desc->log2_chroma_w : 0);
+    const int shift_h = (is_chroma ? s->desc->log2_chroma_h : 0);
+    const int src_linesize = in->linesize[plane] / 2;
+    const int dst_linesize = out->linesize[plane] / 2;
+    const int dst_signed_linesize = dst_linesize * (mirror == 1 ? -1 : 1);
+    const int limit = s->max - 1;
+    const int max = limit - intensity;
+    const int src_h = AV_CEIL_RSHIFT(in->height, shift_h);
+    const int src_w = AV_CEIL_RSHIFT(in->width, shift_w);
+    const uint16_t *src_data = (const uint16_t *)in->data[plane];
+    uint16_t *dst_data = (uint16_t *)out->data[plane] + (column ? (offset >> shift_h) * dst_linesize : offset >> shift_w);
+    uint16_t * const dst_bottom_line = dst_data + dst_linesize * ((s->size >> shift_h) - 1);
+    uint16_t * const dst_line = (mirror ? dst_bottom_line : dst_data);
+    const uint16_t *p;
+    int y;
+
+    if (!column && mirror)
+        dst_data += s->size >> shift_w;
+
+    for (y = 0; y < src_h; y++) {
+        const uint16_t *src_data_end = src_data + src_w;
+        uint16_t *dst = dst_line;
+
+        for (p = src_data; p < src_data_end; p++) {
+            uint16_t *target;
+            int v = FFMIN(*p, limit);
+
+            if (column) {
+                target = dst++ + dst_signed_linesize * (v >> shift_h);
+            } else {
+                if (mirror)
+                    target = dst_data - (v >> shift_w) - 1;
+                else
+                    target = dst_data + (v >> shift_w);
+            }
+            update16(target, max, intensity, limit);
+        }
+        src_data += src_linesize;
+        dst_data += dst_linesize;
+    }
+
+    envelope16(s, out, plane, plane);
+}
+
+static void lowpass(WaveformContext *s, AVFrame *in, AVFrame *out,
+                    int component, int intensity, int offset, int column)
+{
+    const int plane = s->desc->comp[component].plane;
+    const int mirror = s->mirror;
+    const int is_chroma = (component == 1 || component == 2);
+    const int shift_w = (is_chroma ? s->desc->log2_chroma_w : 0);
+    const int shift_h = (is_chroma ? s->desc->log2_chroma_h : 0);
+    const int src_linesize = in->linesize[plane];
+    const int dst_linesize = out->linesize[plane];
+    const int dst_signed_linesize = dst_linesize * (mirror == 1 ? -1 : 1);
+    const int max = 255 - intensity;
+    const int src_h = AV_CEIL_RSHIFT(in->height, shift_h);
+    const int src_w = AV_CEIL_RSHIFT(in->width, shift_w);
+    const uint8_t *src_data = in->data[plane];
+    uint8_t *dst_data = out->data[plane] + (column ? (offset >> shift_h) * dst_linesize : offset >> shift_w);
+    uint8_t * const dst_bottom_line = dst_data + dst_linesize * ((s->size >> shift_h) - 1);
+    uint8_t * const dst_line = (mirror ? dst_bottom_line : dst_data);
+    const uint8_t *p;
+    int y;
+
+    if (!column && mirror)
+        dst_data += s->size >> shift_w;
+
+    for (y = 0; y < src_h; y++) {
+        const uint8_t *src_data_end = src_data + src_w;
+        uint8_t *dst = dst_line;
+
+        for (p = src_data; p < src_data_end; p++) {
+            uint8_t *target;
+            if (column) {
+                target = dst++ + dst_signed_linesize * (*p >> shift_h);
+            } else {
+                if (mirror)
+                    target = dst_data - (*p >> shift_w) - 1;
+                else
+                    target = dst_data + (*p >> shift_w);
+            }
+            update(target, max, intensity);
+        }
+        src_data += src_linesize;
+        dst_data += dst_linesize;
+    }
+
+    envelope(s, out, plane, plane);
+}
+
+static void flat(WaveformContext *s, AVFrame *in, AVFrame *out,
+                 int component, int intensity, int offset, int column)
+{
+    const int plane = s->desc->comp[component].plane;
+    const int mirror = s->mirror;
+    const int c0_linesize = in->linesize[ plane + 0 ];
+    const int c1_linesize = in->linesize[(plane + 1) % s->ncomp];
+    const int c2_linesize = in->linesize[(plane + 2) % s->ncomp];
+    const int d0_linesize = out->linesize[ plane + 0 ];
+    const int d1_linesize = out->linesize[(plane + 1) % s->ncomp];
+    const int max = 255 - intensity;
+    const int src_h = in->height;
+    const int src_w = in->width;
+    int x, y;
+
+    if (column) {
+        const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);
+        const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
+
+        for (x = 0; x < src_w; x++) {
+            const uint8_t *c0_data = in->data[plane + 0];
+            const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
+            const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
+            uint8_t *d0_data = out->data[plane] + offset * d0_linesize;
+            uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset * d1_linesize;
+            uint8_t * const d0_bottom_line = d0_data + d0_linesize * (s->size - 1);
+            uint8_t * const d0 = (mirror ? d0_bottom_line : d0_data);
+            uint8_t * const d1_bottom_line = d1_data + d1_linesize * (s->size - 1);
+            uint8_t * const d1 = (mirror ? d1_bottom_line : d1_data);
+
+            for (y = 0; y < src_h; y++) {
+                const int c0 = c0_data[x] + 256;
+                const int c1 = FFABS(c1_data[x] - 128) + FFABS(c2_data[x] - 128);
+                uint8_t *target;
+
+                target = d0 + x + d0_signed_linesize * c0;
+                update(target, max, intensity);
+                target = d1 + x + d1_signed_linesize * (c0 - c1);
+                update(target, max, 1);
+                target = d1 + x + d1_signed_linesize * (c0 + c1);
+                update(target, max, 1);
+
+                c0_data += c0_linesize;
+                c1_data += c1_linesize;
+                c2_data += c2_linesize;
+                d0_data += d0_linesize;
+                d1_data += d1_linesize;
+            }
+        }
+    } else {
+        const uint8_t *c0_data = in->data[plane];
+        const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
+        const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
+        uint8_t *d0_data = out->data[plane] + offset;
+        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset;
+
+        if (mirror) {
+            d0_data += s->size - 1;
+            d1_data += s->size - 1;
+        }
+
+        for (y = 0; y < src_h; y++) {
+            for (x = 0; x < src_w; x++) {
+                int c0 = c0_data[x] + 256;
+                const int c1 = FFABS(c1_data[x] - 128) + FFABS(c2_data[x] - 128);
+                uint8_t *target;
+
+                if (mirror) {
+                    target = d0_data - c0;
+                    update(target, max, intensity);
+                    target = d1_data - (c0 - c1);
+                    update(target, max, 1);
+                    target = d1_data - (c0 + c1);
+                    update(target, max, 1);
+                } else {
+                    target = d0_data + c0;
+                    update(target, max, intensity);
+                    target = d1_data + (c0 - c1);
+                    update(target, max, 1);
+                    target = d1_data + (c0 + c1);
+                    update(target, max, 1);
+                }
+            }
+
+            c0_data += c0_linesize;
+            c1_data += c1_linesize;
+            c2_data += c2_linesize;
+            d0_data += d0_linesize;
+            d1_data += d1_linesize;
+        }
+    }
+
+    envelope(s, out, plane, plane);
+    envelope(s, out, plane, (plane + 1) % s->ncomp);
+}
+
+static void aflat(WaveformContext *s, AVFrame *in, AVFrame *out,
+                  int component, int intensity, int offset, int column)
+{
+    const int plane = s->desc->comp[component].plane;
+    const int mirror = s->mirror;
+    const int c0_linesize = in->linesize[ plane + 0 ];
+    const int c1_linesize = in->linesize[(plane + 1) % s->ncomp];
+    const int c2_linesize = in->linesize[(plane + 2) % s->ncomp];
+    const int d0_linesize = out->linesize[ plane + 0 ];
+    const int d1_linesize = out->linesize[(plane + 1) % s->ncomp];
+    const int d2_linesize = out->linesize[(plane + 2) % s->ncomp];
+    const int max = 255 - intensity;
+    const int src_h = in->height;
+    const int src_w = in->width;
+    int x, y;
+
+    if (column) {
+        const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);
+        const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
+        const int d2_signed_linesize = d2_linesize * (mirror == 1 ? -1 : 1);
+
+        for (x = 0; x < src_w; x++) {
+            const uint8_t *c0_data = in->data[plane + 0];
+            const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
+            const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
+            uint8_t *d0_data = out->data[plane] + offset * d0_linesize;
+            uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset * d1_linesize;
+            uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + offset * d2_linesize;
+            uint8_t * const d0_bottom_line = d0_data + d0_linesize * (s->size - 1);
+            uint8_t * const d0 = (mirror ? d0_bottom_line : d0_data);
+            uint8_t * const d1_bottom_line = d1_data + d1_linesize * (s->size - 1);
+            uint8_t * const d1 = (mirror ? d1_bottom_line : d1_data);
+            uint8_t * const d2_bottom_line = d2_data + d2_linesize * (s->size - 1);
+            uint8_t * const d2 = (mirror ? d2_bottom_line : d2_data);
+
+            for (y = 0; y < src_h; y++) {
+                const int c0 = c0_data[x] + 128;
+                const int c1 = c1_data[x] - 128;
+                const int c2 = c2_data[x] - 128;
+                uint8_t *target;
+
+                target = d0 + x + d0_signed_linesize * c0;
+                update(target, max, intensity);
+
+                target = d1 + x + d1_signed_linesize * (c0 + c1);
+                update(target, max, 1);
+
+                target = d2 + x + d2_signed_linesize * (c0 + c2);
+                update(target, max, 1);
+
+                c0_data += c0_linesize;
+                c1_data += c1_linesize;
+                c2_data += c2_linesize;
+                d0_data += d0_linesize;
+                d1_data += d1_linesize;
+                d2_data += d2_linesize;
+            }
+        }
+    } else {
+        const uint8_t *c0_data = in->data[plane];
+        const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
+        const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
+        uint8_t *d0_data = out->data[plane] + offset;
+        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset;
+        uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + offset;
+
+        if (mirror) {
+            d0_data += s->size - 1;
+            d1_data += s->size - 1;
+            d2_data += s->size - 1;
+        }
+
+        for (y = 0; y < src_h; y++) {
+            for (x = 0; x < src_w; x++) {
+                const int c0 = c0_data[x] + 128;
+                const int c1 = c1_data[x] - 128;
+                const int c2 = c2_data[x] - 128;
+                uint8_t *target;
+
+                if (mirror) {
+                    target = d0_data - c0;
+                    update(target, max, intensity);
+                    target = d1_data - (c0 + c1);
+                    update(target, max, 1);
+                    target = d2_data - (c0 + c2);
+                    update(target, max, 1);
+                } else {
+                    target = d0_data + c0;
+                    update(target, max, intensity);
+                    target = d1_data + (c0 + c1);
+                    update(target, max, 1);
+                    target = d2_data + (c0 + c2);
+                    update(target, max, 1);
+                }
+            }
+
+            c0_data += c0_linesize;
+            c1_data += c1_linesize;
+            c2_data += c2_linesize;
+            d0_data += d0_linesize;
+            d1_data += d1_linesize;
+            d2_data += d2_linesize;
+        }
+    }
+
+    envelope(s, out, plane, (plane + 0) % s->ncomp);
+    envelope(s, out, plane, (plane + 1) % s->ncomp);
+    envelope(s, out, plane, (plane + 2) % s->ncomp);
+}
+
+static void chroma(WaveformContext *s, AVFrame *in, AVFrame *out,
+                   int component, int intensity, int offset, int column)
+{
+    const int plane = s->desc->comp[component].plane;
+    const int mirror = s->mirror;
+    const int c0_linesize = in->linesize[(plane + 1) % s->ncomp];
+    const int c1_linesize = in->linesize[(plane + 2) % s->ncomp];
+    const int dst_linesize = out->linesize[plane];
+    const int max = 255 - intensity;
+    const int src_h = in->height;
+    const int src_w = in->width;
+    int x, y;
+
+    if (column) {
+        const int dst_signed_linesize = dst_linesize * (mirror == 1 ? -1 : 1);
+
+        for (x = 0; x < src_w; x++) {
+            const uint8_t *c0_data = in->data[(plane + 1) % s->ncomp];
+            const uint8_t *c1_data = in->data[(plane + 2) % s->ncomp];
+            uint8_t *dst_data = out->data[plane] + offset * dst_linesize;
+            uint8_t * const dst_bottom_line = dst_data + dst_linesize * (s->size - 1);
+            uint8_t * const dst_line = (mirror ? dst_bottom_line : dst_data);
+            uint8_t *dst = dst_line;
+
+            for (y = 0; y < src_h; y++) {
+                const int sum = FFABS(c0_data[x] - 128) + FFABS(c1_data[x] - 128);
+                uint8_t *target;
+
+                target = dst + x + dst_signed_linesize * (256 - sum);
+                update(target, max, intensity);
+                target = dst + x + dst_signed_linesize * (255 + sum);
+                update(target, max, intensity);
+
+                c0_data += c0_linesize;
+                c1_data += c1_linesize;
+                dst_data += dst_linesize;
+            }
+        }
+    } else {
+        const uint8_t *c0_data = in->data[(plane + 1) % s->ncomp];
+        const uint8_t *c1_data = in->data[(plane + 2) % s->ncomp];
+        uint8_t *dst_data = out->data[plane] + offset;
+
+        if (mirror)
+            dst_data += s->size - 1;
+        for (y = 0; y < src_h; y++) {
+            for (x = 0; x < src_w; x++) {
+                const int sum = FFABS(c0_data[x] - 128) + FFABS(c1_data[x] - 128);
+                uint8_t *target;
+
+                if (mirror) {
+                    target = dst_data - (256 - sum);
+                    update(target, max, intensity);
+                    target = dst_data - (255 + sum);
+                    update(target, max, intensity);
+                } else {
+                    target = dst_data + (256 - sum);
+                    update(target, max, intensity);
+                    target = dst_data + (255 + sum);
+                    update(target, max, intensity);
+                }
+            }
+
+            c0_data += c0_linesize;
+            c1_data += c1_linesize;
+            dst_data += dst_linesize;
+        }
+    }
+
+    envelope(s, out, plane, (plane + 0) % s->ncomp);
+}
+
+static void achroma(WaveformContext *s, AVFrame *in, AVFrame *out,
+                    int component, int intensity, int offset, int column)
+{
+    const int plane = s->desc->comp[component].plane;
+    const int mirror = s->mirror;
+    const int c1_linesize = in->linesize[(plane + 1) % s->ncomp];
+    const int c2_linesize = in->linesize[(plane + 2) % s->ncomp];
+    const int d1_linesize = out->linesize[(plane + 1) % s->ncomp];
+    const int d2_linesize = out->linesize[(plane + 2) % s->ncomp];
+    const int max = 255 - intensity;
+    const int src_h = in->height;
+    const int src_w = in->width;
+    int x, y;
+
+    if (column) {
+        const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
+        const int d2_signed_linesize = d2_linesize * (mirror == 1 ? -1 : 1);
+
+        for (x = 0; x < src_w; x++) {
+            const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
+            const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
+            uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset * d1_linesize;
+            uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + offset * d2_linesize;
+            uint8_t * const d1_bottom_line = d1_data + d1_linesize * (s->size - 1);
+            uint8_t * const d1 = (mirror ? d1_bottom_line : d1_data);
+            uint8_t * const d2_bottom_line = d2_data + d2_linesize * (s->size - 1);
+            uint8_t * const d2 = (mirror ? d2_bottom_line : d2_data);
+
+            for (y = 0; y < src_h; y++) {
+                const int c1 = c1_data[x] - 128;
+                const int c2 = c2_data[x] - 128;
+                uint8_t *target;
+
+                target = d1 + x + d1_signed_linesize * (128 + c1);
+                update(target, max, intensity);
+
+                target = d2 + x + d2_signed_linesize * (128 + c2);
+                update(target, max, intensity);
+
+                c1_data += c1_linesize;
+                c2_data += c2_linesize;
+                d1_data += d1_linesize;
+                d2_data += d2_linesize;
+            }
+        }
+    } else {
+        const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
+        const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
+        uint8_t *d0_data = out->data[plane] + offset;
+        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset;
+        uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + offset;
+
+        if (mirror) {
+            d0_data += s->size - 1;
+            d1_data += s->size - 1;
+            d2_data += s->size - 1;
+        }
+
+        for (y = 0; y < src_h; y++) {
+            for (x = 0; x < src_w; x++) {
+                const int c1 = c1_data[x] - 128;
+                const int c2 = c2_data[x] - 128;
+                uint8_t *target;
+
+                if (mirror) {
+                    target = d1_data - (128 + c1);
+                    update(target, max, intensity);
+                    target = d2_data - (128 + c2);
+                    update(target, max, intensity);
+                } else {
+                    target = d1_data + (128 + c1);
+                    update(target, max, intensity);
+                    target = d2_data + (128 + c2);
+                    update(target, max, intensity);
+                }
+            }
+
+            c1_data += c1_linesize;
+            c2_data += c2_linesize;
+            d1_data += d1_linesize;
+            d2_data += d2_linesize;
+        }
+    }
+
+    envelope(s, out, plane, (plane + 1) % s->ncomp);
+    envelope(s, out, plane, (plane + 2) % s->ncomp);
+}
+
+static void color16(WaveformContext *s, AVFrame *in, AVFrame *out,
+                    int component, int intensity, int offset, int column)
+{
+    const int plane = s->desc->comp[component].plane;
+    const int mirror = s->mirror;
+    const int limit = s->max - 1;
+    const uint16_t *c0_data = (const uint16_t *)in->data[plane + 0];
+    const uint16_t *c1_data = (const uint16_t *)in->data[(plane + 1) % s->ncomp];
+    const uint16_t *c2_data = (const uint16_t *)in->data[(plane + 2) % s->ncomp];
+    const int c0_linesize = in->linesize[ plane + 0 ] / 2;
+    const int c1_linesize = in->linesize[(plane + 1) % s->ncomp] / 2;
+    const int c2_linesize = in->linesize[(plane + 2) % s->ncomp] / 2;
+    const int d0_linesize = out->linesize[ plane + 0 ] / 2;
+    const int d1_linesize = out->linesize[(plane + 1) % s->ncomp] / 2;
+    const int d2_linesize = out->linesize[(plane + 2) % s->ncomp] / 2;
+    const int src_h = in->height;
+    const int src_w = in->width;
+    int x, y;
+
+    if (s->mode) {
+        const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);
+        const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
+        const int d2_signed_linesize = d2_linesize * (mirror == 1 ? -1 : 1);
+        uint16_t *d0_data = (uint16_t *)out->data[plane] + offset * d0_linesize;
+        uint16_t *d1_data = (uint16_t *)out->data[(plane + 1) % s->ncomp] + offset * d1_linesize;
+        uint16_t *d2_data = (uint16_t *)out->data[(plane + 2) % s->ncomp] + offset * d2_linesize;
+        uint16_t * const d0_bottom_line = d0_data + d0_linesize * (s->size - 1);
+        uint16_t * const d0 = (mirror ? d0_bottom_line : d0_data);
+        uint16_t * const d1_bottom_line = d1_data + d1_linesize * (s->size - 1);
+        uint16_t * const d1 = (mirror ? d1_bottom_line : d1_data);
+        uint16_t * const d2_bottom_line = d2_data + d2_linesize * (s->size - 1);
+        uint16_t * const d2 = (mirror ? d2_bottom_line : d2_data);
+
+        for (y = 0; y < src_h; y++) {
+            for (x = 0; x < src_w; x++) {
+                const int c0 = FFMIN(c0_data[x], limit);
+                const int c1 = c1_data[x];
+                const int c2 = c2_data[x];
+
+                *(d0 + d0_signed_linesize * c0 + x) = c0;
+                *(d1 + d1_signed_linesize * c0 + x) = c1;
+                *(d2 + d2_signed_linesize * c0 + x) = c2;
+            }
+
+            c0_data += c0_linesize;
+            c1_data += c1_linesize;
+            c2_data += c2_linesize;
+            d0_data += d0_linesize;
+            d1_data += d1_linesize;
+            d2_data += d2_linesize;
+        }
+    } else {
+        uint16_t *d0_data = (uint16_t *)out->data[plane] + offset;
+        uint16_t *d1_data = (uint16_t *)out->data[(plane + 1) % s->ncomp] + offset;
+        uint16_t *d2_data = (uint16_t *)out->data[(plane + 2) % s->ncomp] + offset;
+
+        if (mirror) {
+            d0_data += s->size - 1;
+            d1_data += s->size - 1;
+            d2_data += s->size - 1;
+        }
+
+        for (y = 0; y < src_h; y++) {
+            for (x = 0; x < src_w; x++) {
+                const int c0 = FFMIN(c0_data[x], limit);
+                const int c1 = c1_data[x];
+                const int c2 = c2_data[x];
+
+                if (mirror) {
+                    *(d0_data - c0) = c0;
+                    *(d1_data - c0) = c1;
+                    *(d2_data - c0) = c2;
+                } else {
+                    *(d0_data + c0) = c0;
+                    *(d1_data + c0) = c1;
+                    *(d2_data + c0) = c2;
+                }
+            }
+
+            c0_data += c0_linesize;
+            c1_data += c1_linesize;
+            c2_data += c2_linesize;
+            d0_data += d0_linesize;
+            d1_data += d1_linesize;
+            d2_data += d2_linesize;
+        }
+    }
+
+    envelope16(s, out, plane, plane);
+}
+
+static void color(WaveformContext *s, AVFrame *in, AVFrame *out,
+                  int component, int intensity, int offset, int column)
+{
+    const int plane = s->desc->comp[component].plane;
+    const int mirror = s->mirror;
+    const uint8_t *c0_data = in->data[plane + 0];
+    const uint8_t *c1_data = in->data[(plane + 1) % s->ncomp];
+    const uint8_t *c2_data = in->data[(plane + 2) % s->ncomp];
+    const int c0_linesize = in->linesize[ plane + 0 ];
+    const int c1_linesize = in->linesize[(plane + 1) % s->ncomp];
+    const int c2_linesize = in->linesize[(plane + 2) % s->ncomp];
+    const int d0_linesize = out->linesize[ plane + 0 ];
+    const int d1_linesize = out->linesize[(plane + 1) % s->ncomp];
+    const int d2_linesize = out->linesize[(plane + 2) % s->ncomp];
+    const int src_h = in->height;
+    const int src_w = in->width;
+    int x, y;
+
+    if (s->mode) {
+        const int d0_signed_linesize = d0_linesize * (mirror == 1 ? -1 : 1);
+        const int d1_signed_linesize = d1_linesize * (mirror == 1 ? -1 : 1);
+        const int d2_signed_linesize = d2_linesize * (mirror == 1 ? -1 : 1);
+        uint8_t *d0_data = out->data[plane] + offset * d0_linesize;
+        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset * d1_linesize;
+        uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + offset * d2_linesize;
+        uint8_t * const d0_bottom_line = d0_data + d0_linesize * (s->size - 1);
+        uint8_t * const d0 = (mirror ? d0_bottom_line : d0_data);
+        uint8_t * const d1_bottom_line = d1_data + d1_linesize * (s->size - 1);
+        uint8_t * const d1 = (mirror ? d1_bottom_line : d1_data);
+        uint8_t * const d2_bottom_line = d2_data + d2_linesize * (s->size - 1);
+        uint8_t * const d2 = (mirror ? d2_bottom_line : d2_data);
+
+        for (y = 0; y < src_h; y++) {
+            for (x = 0; x < src_w; x++) {
+                const int c0 = c0_data[x];
+                const int c1 = c1_data[x];
+                const int c2 = c2_data[x];
+
+                *(d0 + d0_signed_linesize * c0 + x) = c0;
+                *(d1 + d1_signed_linesize * c0 + x) = c1;
+                *(d2 + d2_signed_linesize * c0 + x) = c2;
+            }
+
+            c0_data += c0_linesize;
+            c1_data += c1_linesize;
+            c2_data += c2_linesize;
+            d0_data += d0_linesize;
+            d1_data += d1_linesize;
+            d2_data += d2_linesize;
+        }
+    } else {
+        uint8_t *d0_data = out->data[plane] + offset;
+        uint8_t *d1_data = out->data[(plane + 1) % s->ncomp] + offset;
+        uint8_t *d2_data = out->data[(plane + 2) % s->ncomp] + offset;
+
+        if (mirror) {
+            d0_data += s->size - 1;
+            d1_data += s->size - 1;
+            d2_data += s->size - 1;
+        }
+
+        for (y = 0; y < src_h; y++) {
+            for (x = 0; x < src_w; x++) {
+                const int c0 = c0_data[x];
+                const int c1 = c1_data[x];
+                const int c2 = c2_data[x];
+
+                if (mirror) {
+                    *(d0_data - c0) = c0;
+                    *(d1_data - c0) = c1;
+                    *(d2_data - c0) = c2;
+                } else {
+                    *(d0_data + c0) = c0;
+                    *(d1_data + c0) = c1;
+                    *(d2_data + c0) = c2;
+                }
+            }
+
+            c0_data += c0_linesize;
+            c1_data += c1_linesize;
+            c2_data += c2_linesize;
+            d0_data += d0_linesize;
+            d1_data += d1_linesize;
+            d2_data += d2_linesize;
+        }
+    }
+
+    envelope(s, out, plane, plane);
+}
+
+static const uint8_t black_yuva_color[4] = { 0, 127, 127, 255 };
+static const uint8_t black_gbrp_color[4] = { 0, 0, 0, 255 };
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    WaveformContext *s = ctx->priv;
+
+    s->desc  = av_pix_fmt_desc_get(inlink->format);
+    s->ncomp = s->desc->nb_components;
+    s->bits = s->desc->comp[0].depth;
+    s->max = 1 << s->bits;
+    s->intensity = s->fintensity * (s->max - 1);
+
+    switch (s->filter) {
+    case LOWPASS:
+            s->size = 256;
+            s->waveform = s->bits > 8 ? lowpass16 : lowpass; break;
+    case FLAT:
+            s->size = 256 * 3;
+            s->waveform = flat;    break;
+    case AFLAT:
+            s->size = 256 * 2;
+            s->waveform = aflat;   break;
+    case CHROMA:
+            s->size = 256 * 2;
+            s->waveform = chroma;  break;
+    case ACHROMA:
+            s->size = 256;
+            s->waveform = achroma; break;
+    case COLOR:
+            s->size = 256;
+            s->waveform = s->bits > 8 ?   color16 :   color; break;
+    }
+
+    s->size = s->size << (s->bits - 8);
+
+    switch (inlink->format) {
+    case AV_PIX_FMT_GBRAP:
+    case AV_PIX_FMT_GBRP:
+    case AV_PIX_FMT_GBRP9:
+    case AV_PIX_FMT_GBRP10:
+        s->bg_color = black_gbrp_color;
+        break;
+    default:
+        s->bg_color = black_yuva_color;
+    }
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    WaveformContext *s = ctx->priv;
+    int comp = 0, i, j = 0, k, p, size, shift;
+
+    for (i = 0; i < s->ncomp; i++) {
+        if ((1 << i) & s->pcomp)
+            comp++;
+    }
+
+    av_freep(&s->peak);
+
+    if (s->mode) {
+        outlink->h = s->size * FFMAX(comp * s->display, 1);
+        size = inlink->w;
+    } else {
+        outlink->w = s->size * FFMAX(comp * s->display, 1);
+        size = inlink->h;
+    }
+
+    s->peak = av_malloc_array(size, 32 * sizeof(*s->peak));
+    if (!s->peak)
+        return AVERROR(ENOMEM);
+
+    for (p = 0; p < 4; p++) {
+        const int is_chroma = (p == 1 || p == 2);
+        const int shift_w = (is_chroma ? s->desc->log2_chroma_w : 0);
+        const int shift_h = (is_chroma ? s->desc->log2_chroma_h : 0);
+        const int plane = s->desc->comp[p].plane;
+        int offset;
+
+        if (!((1 << p) & s->pcomp))
+            continue;
+
+        shift = s->mode ? shift_h : shift_w;
+
+        for (k = 0; k < 4; k++) {
+            s->emax[plane][k] = s->peak + size * (plane * 4 + k + 0);
+            s->emin[plane][k] = s->peak + size * (plane * 4 + k + 16);
+        }
+
+        offset = j++ * s->size * s->display;
+        s->estart[plane] = offset >> shift;
+        s->eend[plane]   = (offset + s->size - 1) >> shift;
+        for (i = 0; i < size; i++) {
+            for (k = 0; k < 4; k++) {
+                s->emax[plane][k][i] = s->estart[plane];
+                s->emin[plane][k][i] = s->eend[plane];
+            }
+        }
+    }
+
+    outlink->sample_aspect_ratio = (AVRational){1,1};
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx  = inlink->dst;
+    WaveformContext *s    = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    int i, j, k;
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+    out->pts = in->pts;
+
+    for (k = 0; k < s->ncomp; k++) {
+        const int is_chroma = (k == 1 || k == 2);
+        const int dst_h = AV_CEIL_RSHIFT(outlink->h, (is_chroma ? s->desc->log2_chroma_h : 0));
+        const int dst_w = AV_CEIL_RSHIFT(outlink->w, (is_chroma ? s->desc->log2_chroma_w : 0));
+        if (s->bits <= 8) {
+            for (i = 0; i < dst_h ; i++)
+                memset(out->data[s->desc->comp[k].plane] +
+                       i * out->linesize[s->desc->comp[k].plane],
+                       s->bg_color[k], dst_w);
+        } else {
+            const int mult = s->size / 256;
+            uint16_t *dst = (uint16_t *)out->data[s->desc->comp[k].plane];
+
+            for (i = 0; i < dst_h ; i++) {
+                for (j = 0; j < dst_w; j++)
+                    dst[j] = s->bg_color[k] * mult;
+                dst += out->linesize[s->desc->comp[k].plane] / 2;
+            }
+        }
+    }
+
+    for (k = 0, i = 0; k < s->ncomp; k++) {
+        if ((1 << k) & s->pcomp) {
+            const int offset = i++ * s->size * s->display;
+            s->waveform(s, in, out, k, s->intensity, offset, s->mode);
+        }
+    }
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    WaveformContext *s = ctx->priv;
+
+    av_freep(&s->peak);
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_waveform = {
+    .name          = "waveform",
+    .description   = NULL_IF_CONFIG_SMALL("Video waveform monitor."),
+    .priv_size     = sizeof(WaveformContext),
+    .priv_class    = &waveform_class,
+    .query_formats = query_formats,
+    .uninit        = uninit,
+    .inputs        = inputs,
+    .outputs       = outputs,
+};
diff --git a/libavfilter/vf_xbr.c b/libavfilter/vf_xbr.c
index 38c3b706..c92e9a82 100644
--- a/libavfilter/vf_xbr.c
+++ b/libavfilter/vf_xbr.c
@@ -65,13 +65,14 @@ static uint32_t pixel_diff(uint32_t x, uint32_t y, const uint32_t *r2y)
 #define YMASK 0xff0000
 #define UMASK 0x00ff00
 #define VMASK 0x0000ff
+#define ABSDIFF(a,b) (abs((int)(a)-(int)(b)))
 
     uint32_t yuv1 = r2y[x & 0xffffff];
     uint32_t yuv2 = r2y[y & 0xffffff];
 
-    return (abs((yuv1 & YMASK) - (yuv2 & YMASK)) >> 16) +
-           (abs((yuv1 & UMASK) - (yuv2 & UMASK)) >>  8) +
-           abs((yuv1 & VMASK) - (yuv2 & VMASK));
+    return (ABSDIFF(yuv1 & YMASK, yuv2 & YMASK) >> 16) +
+           (ABSDIFF(yuv1 & UMASK, yuv2 & UMASK) >>  8) +
+            ABSDIFF(yuv1 & VMASK, yuv2 & VMASK);
 }
 
 #define ALPHA_BLEND_128_W(a, b) ((((a) & LB_MASK) >> 1) + (((b) & LB_MASK) >> 1))
@@ -328,11 +329,11 @@ XBR_FUNC(4)
 static int config_output(AVFilterLink *outlink)
 {
     AVFilterContext *ctx = outlink->src;
-    XBRContext *xbr = ctx->priv;
+    XBRContext *s = ctx->priv;
     AVFilterLink *inlink = ctx->inputs[0];
 
-    outlink->w = inlink->w * xbr->n;
-    outlink->h = inlink->h * xbr->n;
+    outlink->w = inlink->w * s->n;
+    outlink->h = inlink->h * s->n;
     return 0;
 }
 
@@ -352,7 +353,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
     AVFilterLink *outlink = ctx->outputs[0];
-    XBRContext *xbr = ctx->priv;
+    XBRContext *s = ctx->priv;
     ThreadData td;
 
     AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
@@ -365,8 +366,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
     td.in = in;
     td.out = out;
-    td.rgbtoyuv = xbr->rgbtoyuv;
-    ctx->internal->execute(ctx, xbr->func, &td, NULL, FFMIN(inlink->h, ctx->graph->nb_threads));
+    td.rgbtoyuv = s->rgbtoyuv;
+    ctx->internal->execute(ctx, s->func, &td, NULL, FFMIN(inlink->h, ctx->graph->nb_threads));
 
     out->width  = outlink->w;
     out->height = outlink->h;
@@ -377,7 +378,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
 static int init(AVFilterContext *ctx)
 {
-    XBRContext *xbr = ctx->priv;
+    XBRContext *s = ctx->priv;
     static const xbrfunc_t xbrfuncs[] = {xbr2x, xbr3x, xbr4x};
 
     uint32_t c;
@@ -392,13 +393,13 @@ static int init(AVFilterContext *ctx)
             uint32_t y = (uint32_t)(( 299*rg + 1000*startg + 114*bg)/1000);
             c = bg + (rg<<16) + 0x010101 * startg;
             for (g = startg; g <= endg; g++) {
-                xbr->rgbtoyuv[c] = ((y++) << 16) + (u << 8) + v;
+                s->rgbtoyuv[c] = ((y++) << 16) + (u << 8) + v;
                 c+= 0x010101;
             }
         }
     }
 
-    xbr->func = xbrfuncs[xbr->n - 2];
+    s->func = xbrfuncs[s->n - 2];
     return 0;
 }
 
diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c
index b32f38b5..8e6522cf 100644
--- a/libavfilter/vf_yadif.c
+++ b/libavfilter/vf_yadif.c
@@ -1,6 +1,8 @@
 /*
  * Copyright (C) 2006-2011 Michael Niedermayer <michaelni@gmx.at>
  *               2010      James Darnley <james.darnley@gmail.com>
+
+ * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -186,7 +188,7 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
     YADIFContext *s = ctx->priv;
     ThreadData *td  = arg;
     int refs = s->cur->linesize[td->plane];
-    int df = (s->csp->comp[td->plane].depth_minus1 + 8) / 8;
+    int df = (s->csp->comp[td->plane].depth + 7) / 8;
     int pix_3 = 3 * df;
     int slice_start = (td->h *  jobnr   ) / nb_jobs;
     int slice_end   = (td->h * (jobnr+1)) / nb_jobs;
@@ -231,8 +233,8 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
         int h = dstpic->height;
 
         if (i == 1 || i == 2) {
-            w = FF_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w);
-            h = FF_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h);
+            w = AV_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w);
+            h = AV_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h);
         }
 
 
@@ -377,34 +379,31 @@ static int request_frame(AVFilterLink *link)
 {
     AVFilterContext *ctx = link->src;
     YADIFContext *yadif = ctx->priv;
+    int ret;
 
     if (yadif->frame_pending) {
         return_frame(ctx, 1);
         return 0;
     }
 
-    do {
-        int ret;
-
-        if (yadif->eof)
-            return AVERROR_EOF;
+    if (yadif->eof)
+        return AVERROR_EOF;
 
-        ret  = ff_request_frame(link->src->inputs[0]);
+    ret  = ff_request_frame(ctx->inputs[0]);
 
-        if (ret == AVERROR_EOF && yadif->cur) {
-            AVFrame *next = av_frame_clone(yadif->next);
+    if (ret == AVERROR_EOF && yadif->cur) {
+        AVFrame *next = av_frame_clone(yadif->next);
 
-            if (!next)
-                return AVERROR(ENOMEM);
+        if (!next)
+            return AVERROR(ENOMEM);
 
-            next->pts = yadif->next->pts * 2 - yadif->cur->pts;
+        next->pts = yadif->next->pts * 2 - yadif->cur->pts;
 
-            filter_frame(link->src->inputs[0], next);
-            yadif->eof = 1;
-        } else if (ret < 0) {
-            return ret;
-        }
-    } while (!yadif->prev);
+        filter_frame(ctx->inputs[0], next);
+        yadif->eof = 1;
+    } else if (ret < 0) {
+        return ret;
+    }
 
     return 0;
 }
@@ -470,15 +469,16 @@ static int query_formats(AVFilterContext *ctx)
 static int config_props(AVFilterLink *link)
 {
     AVFilterContext *ctx = link->src;
-    YADIFContext *s = link->src->priv;
+    YADIFContext *s = ctx->priv;
 
-    link->time_base.num = link->src->inputs[0]->time_base.num;
-    link->time_base.den = link->src->inputs[0]->time_base.den * 2;
-    link->w             = link->src->inputs[0]->w;
-    link->h             = link->src->inputs[0]->h;
+    link->time_base.num = ctx->inputs[0]->time_base.num;
+    link->time_base.den = ctx->inputs[0]->time_base.den * 2;
+    link->w             = ctx->inputs[0]->w;
+    link->h             = ctx->inputs[0]->h;
 
-    if(s->mode&1)
-        link->frame_rate = av_mul_q(link->src->inputs[0]->frame_rate, (AVRational){2,1});
+    if(s->mode & 1)
+        link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
+                                    (AVRational){2, 1});
 
     if (link->w < 3 || link->h < 3) {
         av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or lines is not supported\n");
@@ -486,7 +486,7 @@ static int config_props(AVFilterLink *link)
     }
 
     s->csp = av_pix_fmt_desc_get(link->format);
-    if (s->csp->comp[0].depth_minus1 / 8 == 1) {
+    if (s->csp->comp[0].depth > 8) {
         s->filter_line  = filter_line_c_16bit;
         s->filter_edges = filter_edges_16bit;
     } else {
diff --git a/libavfilter/vf_zoompan.c b/libavfilter/vf_zoompan.c
index c49193a0..99a1a346 100644
--- a/libavfilter/vf_zoompan.c
+++ b/libavfilter/vf_zoompan.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/avassert.h"
 #include "libavutil/eval.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -85,6 +86,13 @@ typedef struct ZPcontext {
     int prev_nb_frames;
     struct SwsContext *sws;
     int64_t frame_count;
+    const AVPixFmtDescriptor *desc;
+    AVFrame *in;
+    double var_values[VARS_NB];
+    int nb_frames;
+    int current_frame;
+    int finished;
+    AVRational framerate;
 } ZPContext;
 
 #define OFFSET(x) offsetof(ZPContext, x)
@@ -96,6 +104,7 @@ static const AVOption zoompan_options[] = {
     { "y", "set the y expression", OFFSET(y_expr_str), AV_OPT_TYPE_STRING, {.str="0"}, .flags = FLAGS },
     { "d", "set the duration expression", OFFSET(duration_expr_str), AV_OPT_TYPE_STRING, {.str="90"}, .flags = FLAGS },
     { "s", "set the output image size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="hd720"}, .flags = FLAGS },
+    { "fps", "set the output framerate", OFFSET(framerate), AV_OPT_TYPE_VIDEO_RATE, { .str = "25" }, .flags = FLAGS },
     { NULL }
 };
 
@@ -116,141 +125,181 @@ static int config_output(AVFilterLink *outlink)
 
     outlink->w = s->w;
     outlink->h = s->h;
+    outlink->time_base = av_inv_q(s->framerate);
+    outlink->frame_rate = s->framerate;
+    s->desc = av_pix_fmt_desc_get(outlink->format);
 
     return 0;
 }
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+static int output_single_frame(AVFilterContext *ctx, AVFrame *in, double *var_values, int i,
+                               double *zoom, double *dx, double *dy)
 {
-    AVFilterContext *ctx = inlink->dst;
-    AVFilterLink *outlink = ctx->outputs[0];
     ZPContext *s = ctx->priv;
-    double var_values[VARS_NB], nb_frames, zoom, dx, dy;
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(in->format);
-    AVFrame *out = NULL;
-    int i, k, x, y, w, h, ret = 0;
-
-    var_values[VAR_IN_W]  = var_values[VAR_IW] = in->width;
-    var_values[VAR_IN_H]  = var_values[VAR_IH] = in->height;
-    var_values[VAR_OUT_W] = var_values[VAR_OW] = s->w;
-    var_values[VAR_OUT_H] = var_values[VAR_OH] = s->h;
-    var_values[VAR_IN]    = inlink->frame_count + 1;
-    var_values[VAR_ON]    = outlink->frame_count + 1;
-    var_values[VAR_PX]    = s->x;
-    var_values[VAR_PY]    = s->y;
-    var_values[VAR_X]     = 0;
-    var_values[VAR_Y]     = 0;
-    var_values[VAR_PZOOM] = s->prev_zoom;
-    var_values[VAR_ZOOM]  = 1;
-    var_values[VAR_PDURATION] = s->prev_nb_frames;
-    var_values[VAR_A]     = (double) in->width / in->height;
-    var_values[VAR_SAR]   = inlink->sample_aspect_ratio.num ?
-        (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1;
-    var_values[VAR_DAR]   = var_values[VAR_A] * var_values[VAR_SAR];
-    var_values[VAR_HSUB]  = 1 << desc->log2_chroma_w;
-    var_values[VAR_VSUB]  = 1 << desc->log2_chroma_h;
+    AVFilterLink *outlink = ctx->outputs[0];
+    int64_t pts = s->frame_count;
+    int k, x, y, w, h, ret = 0;
+    uint8_t *input[4];
+    int px[4], py[4];
+    AVFrame *out;
+
+    var_values[VAR_TIME] = pts * av_q2d(outlink->time_base);
+    var_values[VAR_FRAME] = i;
+    var_values[VAR_ON] = outlink->frame_count + 1;
+    if ((ret = av_expr_parse_and_eval(zoom, s->zoom_expr_str,
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        return ret;
 
-    if ((ret = av_expr_parse_and_eval(&nb_frames, s->duration_expr_str,
+    *zoom = av_clipd(*zoom, 1, 10);
+    var_values[VAR_ZOOM] = *zoom;
+    w = in->width * (1.0 / *zoom);
+    h = in->height * (1.0 / *zoom);
+
+    if ((ret = av_expr_parse_and_eval(dx, s->x_expr_str,
                                       var_names, var_values,
                                       NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
-        goto fail;
-
-    var_values[VAR_DURATION] = nb_frames;
-    for (i = 0; i < nb_frames; i++) {
-        int px[4];
-        int py[4];
-        uint8_t *input[4];
-        int64_t pts = av_rescale_q(in->pts, inlink->time_base,
-                                   outlink->time_base) + s->frame_count;
-
-        var_values[VAR_TIME] = pts * av_q2d(outlink->time_base);
-        var_values[VAR_FRAME] = i;
-        var_values[VAR_ON] = outlink->frame_count + 1;
-        if ((ret = av_expr_parse_and_eval(&zoom, s->zoom_expr_str,
-                                          var_names, var_values,
-                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
-            goto fail;
+        return ret;
+    x = *dx = av_clipd(*dx, 0, FFMAX(in->width - w, 0));
+    var_values[VAR_X] = *dx;
+    x &= ~((1 << s->desc->log2_chroma_w) - 1);
 
-        zoom = av_clipd(zoom, 1, 10);
-        var_values[VAR_ZOOM] = zoom;
-        w = in->width * (1.0 / zoom);
-        h = in->height * (1.0 / zoom);
+    if ((ret = av_expr_parse_and_eval(dy, s->y_expr_str,
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        return ret;
+    y = *dy = av_clipd(*dy, 0, FFMAX(in->height - h, 0));
+    var_values[VAR_Y] = *dy;
+    y &= ~((1 << s->desc->log2_chroma_h) - 1);
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        ret = AVERROR(ENOMEM);
+        return ret;
+    }
 
-        if ((ret = av_expr_parse_and_eval(&dx, s->x_expr_str,
-                                          var_names, var_values,
-                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
-            goto fail;
-        x = dx = av_clipd(dx, 0, FFMAX(in->width - w, 0));
-        var_values[VAR_X] = dx;
-        x &= ~((1 << desc->log2_chroma_w) - 1);
+    px[1] = px[2] = AV_CEIL_RSHIFT(x, s->desc->log2_chroma_w);
+    px[0] = px[3] = x;
 
-        if ((ret = av_expr_parse_and_eval(&dy, s->y_expr_str,
-                                          var_names, var_values,
-                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
-            goto fail;
-        y = dy = av_clipd(dy, 0, FFMAX(in->height - h, 0));
-        var_values[VAR_Y] = dy;
-        y &= ~((1 << desc->log2_chroma_h) - 1);
+    py[1] = py[2] = AV_CEIL_RSHIFT(y, s->desc->log2_chroma_h);
+    py[0] = py[3] = y;
 
-        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
-        if (!out) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
+    s->sws = sws_alloc_context();
+    if (!s->sws) {
+        ret = AVERROR(ENOMEM);
+        return ret;
+    }
 
-        px[1] = px[2] = FF_CEIL_RSHIFT(x, desc->log2_chroma_w);
-        px[0] = px[3] = x;
+    for (k = 0; in->data[k]; k++)
+        input[k] = in->data[k] + py[k] * in->linesize[k] + px[k];
 
-        py[1] = py[2] = FF_CEIL_RSHIFT(y, desc->log2_chroma_h);
-        py[0] = py[3] = y;
+    av_opt_set_int(s->sws, "srcw", w, 0);
+    av_opt_set_int(s->sws, "srch", h, 0);
+    av_opt_set_int(s->sws, "src_format", in->format, 0);
+    av_opt_set_int(s->sws, "dstw", outlink->w, 0);
+    av_opt_set_int(s->sws, "dsth", outlink->h, 0);
+    av_opt_set_int(s->sws, "dst_format", outlink->format, 0);
+    av_opt_set_int(s->sws, "sws_flags", SWS_BICUBIC, 0);
 
-        s->sws = sws_alloc_context();
-        if (!s->sws) {
-            ret = AVERROR(ENOMEM);
-            goto fail;
-        }
+    if ((ret = sws_init_context(s->sws, NULL, NULL)) < 0)
+        return ret;
 
-        for (k = 0; in->data[k]; k++)
-            input[k] = in->data[k] + py[k] * in->linesize[k] + px[k];
+    sws_scale(s->sws, (const uint8_t *const *)&input, in->linesize, 0, h, out->data, out->linesize);
 
-        av_opt_set_int(s->sws, "srcw", w, 0);
-        av_opt_set_int(s->sws, "srch", h, 0);
-        av_opt_set_int(s->sws, "src_format", in->format, 0);
-        av_opt_set_int(s->sws, "dstw", outlink->w, 0);
-        av_opt_set_int(s->sws, "dsth", outlink->h, 0);
-        av_opt_set_int(s->sws, "dst_format", outlink->format, 0);
-        av_opt_set_int(s->sws, "sws_flags", SWS_BICUBIC, 0);
+    out->pts = pts;
+    s->frame_count++;
 
-        if ((ret = sws_init_context(s->sws, NULL, NULL)) < 0)
-            goto fail;
+    ret = ff_filter_frame(outlink, out);
+    sws_freeContext(s->sws);
+    s->sws = NULL;
+    s->current_frame++;
+    return ret;
+}
 
-        sws_scale(s->sws, (const uint8_t *const *)&input, in->linesize, 0, h, out->data, out->linesize);
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    ZPContext *s = ctx->priv;
+    double nb_frames;
+    int ret;
+
+    av_assert0(s->in == NULL);
+
+    s->finished = 0;
+    s->var_values[VAR_IN_W]  = s->var_values[VAR_IW] = in->width;
+    s->var_values[VAR_IN_H]  = s->var_values[VAR_IH] = in->height;
+    s->var_values[VAR_OUT_W] = s->var_values[VAR_OW] = s->w;
+    s->var_values[VAR_OUT_H] = s->var_values[VAR_OH] = s->h;
+    s->var_values[VAR_IN]    = inlink->frame_count + 1;
+    s->var_values[VAR_ON]    = outlink->frame_count + 1;
+    s->var_values[VAR_PX]    = s->x;
+    s->var_values[VAR_PY]    = s->y;
+    s->var_values[VAR_X]     = 0;
+    s->var_values[VAR_Y]     = 0;
+    s->var_values[VAR_PZOOM] = s->prev_zoom;
+    s->var_values[VAR_ZOOM]  = 1;
+    s->var_values[VAR_PDURATION] = s->prev_nb_frames;
+    s->var_values[VAR_A]     = (double) in->width / in->height;
+    s->var_values[VAR_SAR]   = inlink->sample_aspect_ratio.num ?
+        (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1;
+    s->var_values[VAR_DAR]   = s->var_values[VAR_A] * s->var_values[VAR_SAR];
+    s->var_values[VAR_HSUB]  = 1 << s->desc->log2_chroma_w;
+    s->var_values[VAR_VSUB]  = 1 << s->desc->log2_chroma_h;
 
-        out->pts = pts;
-        s->frame_count++;
+    if ((ret = av_expr_parse_and_eval(&nb_frames, s->duration_expr_str,
+                                      var_names, s->var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0) {
+        av_frame_free(&in);
+        return ret;
+    }
 
-        ret = ff_filter_frame(outlink, out);
-        if (ret < 0)
-            break;
-        out = NULL;
+    s->var_values[VAR_DURATION] = s->nb_frames = nb_frames;
+    s->in = in;
+
+    return 0;
+}
 
-        sws_freeContext(s->sws);
-        s->sws = NULL;
+static int request_frame(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    ZPContext *s = ctx->priv;
+    AVFrame *in = s->in;
+    double zoom=1, dx=0, dy=0;
+    int ret = -1;
+
+    if (in) {
+        ret = output_single_frame(ctx, in, s->var_values, s->current_frame,
+                                  &zoom, &dx, &dy);
+        if (ret < 0)
+            goto fail;
     }
 
-    s->x = dx;
-    s->y = dy;
-    s->prev_zoom = zoom;
-    s->prev_nb_frames = nb_frames;
+    if (s->current_frame >= s->nb_frames) {
+        s->x = dx;
+        s->y = dy;
+        s->prev_zoom = zoom;
+        s->prev_nb_frames = s->nb_frames;
+        s->nb_frames = 0;
+        s->current_frame = 0;
+        av_frame_free(&s->in);
+        s->finished = 1;
+        ret = ff_request_frame(ctx->inputs[0]);
+    }
 
 fail:
     sws_freeContext(s->sws);
     s->sws = NULL;
-    av_frame_free(&out);
-    av_frame_free(&in);
+
     return ret;
 }
 
+static int poll_frame(AVFilterLink *link)
+{
+    ZPContext *s = link->src->priv;
+    return s->nb_frames - s->current_frame;
+}
+
 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat pix_fmts[] = {
@@ -262,6 +311,7 @@ static int query_formats(AVFilterContext *ctx)
         AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
         AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
         AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
         AV_PIX_FMT_GRAY8,
         AV_PIX_FMT_NONE
     };
@@ -285,6 +335,7 @@ static const AVFilterPad inputs[] = {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
         .filter_frame = filter_frame,
+        .needs_fifo   = 1,
     },
     { NULL }
 };
@@ -294,6 +345,8 @@ static const AVFilterPad outputs[] = {
         .name          = "default",
         .type          = AVMEDIA_TYPE_VIDEO,
         .config_props  = config_output,
+        .poll_frame    = poll_frame,
+        .request_frame = request_frame,
     },
     { NULL }
 };
diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c
new file mode 100644
index 00000000..939775ad
--- /dev/null
+++ b/libavfilter/vf_zscale.c
@@ -0,0 +1,748 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * zscale video filter using z.lib library
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <zimg.h>
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+#include "libavutil/avstring.h"
+#include "libavutil/eval.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/avassert.h"
+
+static const char *const var_names[] = {
+    "in_w",   "iw",
+    "in_h",   "ih",
+    "out_w",  "ow",
+    "out_h",  "oh",
+    "a",
+    "sar",
+    "dar",
+    "hsub",
+    "vsub",
+    "ohsub",
+    "ovsub",
+    NULL
+};
+
+enum var_name {
+    VAR_IN_W,   VAR_IW,
+    VAR_IN_H,   VAR_IH,
+    VAR_OUT_W,  VAR_OW,
+    VAR_OUT_H,  VAR_OH,
+    VAR_A,
+    VAR_SAR,
+    VAR_DAR,
+    VAR_HSUB,
+    VAR_VSUB,
+    VAR_OHSUB,
+    VAR_OVSUB,
+    VARS_NB
+};
+
+typedef struct ZScaleContext {
+    const AVClass *class;
+
+    /**
+     * New dimensions. Special values are:
+     *   0 = original width/height
+     *  -1 = keep original aspect
+     *  -N = try to keep aspect but make sure it is divisible by N
+     */
+    int w, h;
+    int dither;
+    int filter;
+    int colorspace;
+    int trc;
+    int primaries;
+    int range;
+    int colorspace_in;
+    int trc_in;
+    int primaries_in;
+    int range_in;
+    char *size_str;
+
+    char *w_expr;               ///< width  expression string
+    char *h_expr;               ///< height expression string
+
+    int out_h_chr_pos;
+    int out_v_chr_pos;
+    int in_h_chr_pos;
+    int in_v_chr_pos;
+
+    int force_original_aspect_ratio;
+
+    void *tmp;
+    size_t tmp_size;
+
+    zimg_image_format src_format, dst_format;
+    zimg_image_format alpha_src_format, alpha_dst_format;
+    zimg_graph_builder_params alpha_params, params;
+    zimg_filter_graph *alpha_graph, *graph;
+
+    enum AVColorSpace in_colorspace, out_colorspace;
+    enum AVColorTransferCharacteristic in_trc, out_trc;
+    enum AVColorPrimaries in_primaries, out_primaries;
+    enum AVColorRange in_range, out_range;
+} ZScaleContext;
+
+static av_cold int init_dict(AVFilterContext *ctx, AVDictionary **opts)
+{
+    ZScaleContext *s = ctx->priv;
+    int ret;
+
+    if (s->size_str && (s->w_expr || s->h_expr)) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Size and width/height expressions cannot be set at the same time.\n");
+            return AVERROR(EINVAL);
+    }
+
+    if (s->w_expr && !s->h_expr)
+        FFSWAP(char *, s->w_expr, s->size_str);
+
+    if (s->size_str) {
+        char buf[32];
+        if ((ret = av_parse_video_size(&s->w, &s->h, s->size_str)) < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Invalid size '%s'\n", s->size_str);
+            return ret;
+        }
+        snprintf(buf, sizeof(buf)-1, "%d", s->w);
+        av_opt_set(s, "w", buf, 0);
+        snprintf(buf, sizeof(buf)-1, "%d", s->h);
+        av_opt_set(s, "h", buf, 0);
+    }
+    if (!s->w_expr)
+        av_opt_set(s, "w", "iw", 0);
+    if (!s->h_expr)
+        av_opt_set(s, "h", "ih", 0);
+
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pixel_fmts[] = {
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
+        AV_PIX_FMT_YUVJ411P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12,
+        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
+        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16,
+        AV_PIX_FMT_NONE
+    };
+    int ret;
+
+    ret = ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->inputs[0]->out_formats);
+    if (ret < 0)
+        return ret;
+    return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->in_formats);
+}
+
+static int config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    ZScaleContext *s = ctx->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    const AVPixFmtDescriptor *out_desc = av_pix_fmt_desc_get(outlink->format);
+    int64_t w, h;
+    double var_values[VARS_NB], res;
+    char *expr;
+    int ret;
+    int factor_w, factor_h;
+
+    var_values[VAR_IN_W]  = var_values[VAR_IW] = inlink->w;
+    var_values[VAR_IN_H]  = var_values[VAR_IH] = inlink->h;
+    var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN;
+    var_values[VAR_OUT_H] = var_values[VAR_OH] = NAN;
+    var_values[VAR_A]     = (double) inlink->w / inlink->h;
+    var_values[VAR_SAR]   = inlink->sample_aspect_ratio.num ?
+        (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1;
+    var_values[VAR_DAR]   = var_values[VAR_A] * var_values[VAR_SAR];
+    var_values[VAR_HSUB]  = 1 << desc->log2_chroma_w;
+    var_values[VAR_VSUB]  = 1 << desc->log2_chroma_h;
+    var_values[VAR_OHSUB] = 1 << out_desc->log2_chroma_w;
+    var_values[VAR_OVSUB] = 1 << out_desc->log2_chroma_h;
+
+    /* evaluate width and height */
+    av_expr_parse_and_eval(&res, (expr = s->w_expr),
+                           var_names, var_values,
+                           NULL, NULL, NULL, NULL, NULL, 0, ctx);
+    s->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res;
+    if ((ret = av_expr_parse_and_eval(&res, (expr = s->h_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        goto fail;
+    s->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = res;
+    /* evaluate again the width, as it may depend on the output height */
+    if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        goto fail;
+    s->w = res;
+
+    w = s->w;
+    h = s->h;
+
+    /* Check if it is requested that the result has to be divisible by a some
+     * factor (w or h = -n with n being the factor). */
+    factor_w = 1;
+    factor_h = 1;
+    if (w < -1) {
+        factor_w = -w;
+    }
+    if (h < -1) {
+        factor_h = -h;
+    }
+
+    if (w < 0 && h < 0)
+        s->w = s->h = 0;
+
+    if (!(w = s->w))
+        w = inlink->w;
+    if (!(h = s->h))
+        h = inlink->h;
+
+    /* Make sure that the result is divisible by the factor we determined
+     * earlier. If no factor was set, it is nothing will happen as the default
+     * factor is 1 */
+    if (w < 0)
+        w = av_rescale(h, inlink->w, inlink->h * factor_w) * factor_w;
+    if (h < 0)
+        h = av_rescale(w, inlink->h, inlink->w * factor_h) * factor_h;
+
+    /* Note that force_original_aspect_ratio may overwrite the previous set
+     * dimensions so that it is not divisible by the set factors anymore. */
+    if (s->force_original_aspect_ratio) {
+        int tmp_w = av_rescale(h, inlink->w, inlink->h);
+        int tmp_h = av_rescale(w, inlink->h, inlink->w);
+
+        if (s->force_original_aspect_ratio == 1) {
+             w = FFMIN(tmp_w, w);
+             h = FFMIN(tmp_h, h);
+        } else {
+             w = FFMAX(tmp_w, w);
+             h = FFMAX(tmp_h, h);
+        }
+    }
+
+    if (w > INT_MAX || h > INT_MAX ||
+        (h * inlink->w) > INT_MAX  ||
+        (w * inlink->h) > INT_MAX)
+        av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
+
+    outlink->w = w;
+    outlink->h = h;
+
+    if (inlink->w == outlink->w &&
+        inlink->h == outlink->h &&
+        inlink->format == outlink->format)
+        ;
+    else {
+    }
+
+    if (inlink->sample_aspect_ratio.num){
+        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
+    } else
+        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d fmt:%s sar:%d/%d -> w:%d h:%d fmt:%s sar:%d/%d\n",
+           inlink ->w, inlink ->h, av_get_pix_fmt_name( inlink->format),
+           inlink->sample_aspect_ratio.num, inlink->sample_aspect_ratio.den,
+           outlink->w, outlink->h, av_get_pix_fmt_name(outlink->format),
+           outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den);
+    return 0;
+
+fail:
+    av_log(ctx, AV_LOG_ERROR,
+           "Error when evaluating the expression '%s'.\n"
+           "Maybe the expression for out_w:'%s' or for out_h:'%s' is self-referencing.\n",
+           expr, s->w_expr, s->h_expr);
+    return ret;
+}
+
+static int print_zimg_error(AVFilterContext *ctx)
+{
+    char err_msg[1024];
+    int err_code = zimg_get_last_error(err_msg, sizeof(err_msg));
+
+    av_log(ctx, AV_LOG_ERROR, "code %d: %s\n", err_code, err_msg);
+
+    return err_code;
+}
+
+static int convert_matrix(enum AVColorSpace colorspace)
+{
+    switch (colorspace) {
+    case AVCOL_SPC_RGB:
+        return ZIMG_MATRIX_RGB;
+    case AVCOL_SPC_BT709:
+        return ZIMG_MATRIX_709;
+    case AVCOL_SPC_UNSPECIFIED:
+        return ZIMG_MATRIX_UNSPECIFIED;
+    case AVCOL_SPC_BT470BG:
+        return ZIMG_MATRIX_470BG;
+    case AVCOL_SPC_SMPTE170M:
+        return ZIMG_MATRIX_170M;
+    case AVCOL_SPC_YCGCO:
+        return ZIMG_MATRIX_YCGCO;
+    case AVCOL_SPC_BT2020_NCL:
+        return ZIMG_MATRIX_2020_NCL;
+    case AVCOL_SPC_BT2020_CL:
+        return ZIMG_MATRIX_2020_CL;
+    }
+    return ZIMG_MATRIX_UNSPECIFIED;
+}
+
+static int convert_trc(enum AVColorTransferCharacteristic color_trc)
+{
+    switch (color_trc) {
+    case AVCOL_TRC_UNSPECIFIED:
+        return ZIMG_TRANSFER_UNSPECIFIED;
+    case AVCOL_TRC_BT709:
+        return ZIMG_TRANSFER_709;
+    case AVCOL_TRC_SMPTE170M:
+        return ZIMG_TRANSFER_601;
+    case AVCOL_TRC_LINEAR:
+        return ZIMG_TRANSFER_LINEAR;
+    case AVCOL_TRC_BT2020_10:
+        return ZIMG_TRANSFER_2020_10;
+    case AVCOL_TRC_BT2020_12:
+        return ZIMG_TRANSFER_2020_12;
+    }
+    return ZIMG_TRANSFER_UNSPECIFIED;
+}
+
+static int convert_primaries(enum AVColorPrimaries color_primaries)
+{
+    switch (color_primaries) {
+    case AVCOL_PRI_UNSPECIFIED:
+        return ZIMG_PRIMARIES_UNSPECIFIED;
+    case AVCOL_PRI_BT709:
+        return ZIMG_PRIMARIES_709;
+    case AVCOL_PRI_SMPTE170M:
+        return ZIMG_PRIMARIES_170M;
+    case AVCOL_PRI_SMPTE240M:
+        return ZIMG_PRIMARIES_240M;
+    case AVCOL_PRI_BT2020:
+        return ZIMG_PRIMARIES_2020;
+    }
+    return ZIMG_PRIMARIES_UNSPECIFIED;
+}
+
+static int convert_range(enum AVColorRange color_range)
+{
+    switch (color_range) {
+    case AVCOL_RANGE_UNSPECIFIED:
+    case AVCOL_RANGE_MPEG:
+        return ZIMG_RANGE_LIMITED;
+    case AVCOL_RANGE_JPEG:
+        return ZIMG_RANGE_FULL;
+    }
+    return ZIMG_RANGE_LIMITED;
+}
+
+static int filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    ZScaleContext *s = link->dst->priv;
+    AVFilterLink *outlink = link->dst->outputs[0];
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
+    const AVPixFmtDescriptor *odesc = av_pix_fmt_desc_get(outlink->format);
+    zimg_image_buffer_const src_buf = { ZIMG_API_VERSION };
+    zimg_image_buffer dst_buf = { ZIMG_API_VERSION };
+    char buf[32];
+    size_t tmp_size;
+    int ret = 0, plane;
+    AVFrame *out;
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        av_frame_free(&in);
+        return AVERROR(ENOMEM);
+    }
+
+    av_frame_copy_props(out, in);
+    out->width  = outlink->w;
+    out->height = outlink->h;
+
+    if(   in->width  != link->w
+       || in->height != link->h
+       || in->format != link->format
+       || s->in_colorspace != in->colorspace
+       || s->in_trc  != in->color_trc
+       || s->in_primaries != in->color_primaries
+       || s->in_range != in->color_range
+       || s->out_colorspace != out->colorspace
+       || s->out_trc  != out->color_trc
+       || s->out_primaries != out->color_primaries
+       || s->out_range != out->color_range) {
+        snprintf(buf, sizeof(buf)-1, "%d", outlink->w);
+        av_opt_set(s, "w", buf, 0);
+        snprintf(buf, sizeof(buf)-1, "%d", outlink->h);
+        av_opt_set(s, "h", buf, 0);
+
+        link->dst->inputs[0]->format = in->format;
+        link->dst->inputs[0]->w      = in->width;
+        link->dst->inputs[0]->h      = in->height;
+
+        if ((ret = config_props(outlink)) < 0) {
+            av_frame_free(&in);
+            av_frame_free(&out);
+            return ret;
+        }
+
+        zimg_image_format_default(&s->src_format, ZIMG_API_VERSION);
+        zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION);
+        zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);
+
+        s->params.dither_type = s->dither;
+        s->params.cpu_type = ZIMG_CPU_AUTO;
+        s->params.resample_filter = s->filter;
+        s->params.resample_filter_uv = s->filter;
+
+        s->src_format.width = in->width;
+        s->src_format.height = in->height;
+        s->src_format.subsample_w = desc->log2_chroma_w;
+        s->src_format.subsample_h = desc->log2_chroma_h;
+        s->src_format.depth = desc->comp[0].depth;
+        s->src_format.pixel_type = desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+        s->src_format.color_family = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
+        s->src_format.matrix_coefficients = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : s->colorspace_in == -1 ? convert_matrix(in->colorspace) : s->colorspace_in;
+        s->src_format.transfer_characteristics = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_TRANSFER_UNSPECIFIED : s->trc_in == - 1 ? convert_trc(in->color_trc) : s->trc_in;
+        s->src_format.color_primaries = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_PRIMARIES_UNSPECIFIED : s->primaries_in == -1 ? convert_primaries(in->color_primaries) : s->primaries_in;
+        s->src_format.pixel_range = (desc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : s->range_in == -1 ? convert_range(in->color_range) : s->range_in;
+
+        s->dst_format.width = out->width;
+        s->dst_format.height = out->height;
+        s->dst_format.subsample_w = odesc->log2_chroma_w;
+        s->dst_format.subsample_h = odesc->log2_chroma_h;
+        s->dst_format.depth = odesc->comp[0].depth;
+        s->dst_format.pixel_type = odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+        s->dst_format.color_family = (odesc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_COLOR_RGB : ZIMG_COLOR_YUV;
+        s->dst_format.matrix_coefficients = (odesc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_MATRIX_RGB : s->colorspace == -1 ? convert_matrix(out->colorspace) : s->colorspace;
+        s->dst_format.transfer_characteristics = (odesc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_TRANSFER_UNSPECIFIED : s->trc == -1 ? convert_trc(out->color_trc) : s->trc;
+        s->dst_format.color_primaries = (odesc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_PRIMARIES_UNSPECIFIED : s->primaries == -1 ? convert_primaries(out->color_primaries) : s->primaries;
+        s->dst_format.pixel_range = (odesc->flags & AV_PIX_FMT_FLAG_RGB) ? ZIMG_RANGE_FULL : s->range == -1 ? convert_range(out->color_range) : s->range;
+
+        if (s->colorspace != -1)
+            out->colorspace = (int)s->dst_format.matrix_coefficients;
+
+        if (s->primaries != -1)
+            out->color_primaries = (int)s->dst_format.color_primaries;
+
+        if (s->range != -1)
+            out->color_range = (int)s->dst_format.pixel_range + 1;
+
+        if (s->trc != -1)
+            out->color_trc = (int)s->dst_format.transfer_characteristics;
+
+        zimg_filter_graph_free(s->graph);
+        s->graph = zimg_filter_graph_build(&s->src_format, &s->dst_format, &s->params);
+        if (!s->graph) {
+            ret = print_zimg_error(link->dst);
+            goto fail;
+        }
+
+        if ((ret = zimg_filter_graph_get_tmp_size(s->graph, &tmp_size))) {
+            ret = print_zimg_error(link->dst);
+            goto fail;
+        }
+
+        if (tmp_size > s->tmp_size) {
+            av_freep(&s->tmp);
+            s->tmp = av_malloc(tmp_size);
+            if (!s->tmp) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+            s->tmp_size = tmp_size;
+        }
+
+        s->in_colorspace  = in->colorspace;
+        s->in_trc         = in->color_trc;
+        s->in_primaries   = in->color_primaries;
+        s->in_range       = in->color_range;
+        s->out_colorspace = out->colorspace;
+        s->out_trc        = out->color_trc;
+        s->out_primaries  = out->color_primaries;
+        s->out_range      = out->color_range;
+
+        if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+            zimg_image_format_default(&s->alpha_src_format, ZIMG_API_VERSION);
+            zimg_image_format_default(&s->alpha_dst_format, ZIMG_API_VERSION);
+            zimg_graph_builder_params_default(&s->alpha_params, ZIMG_API_VERSION);
+
+            s->alpha_params.dither_type = s->dither;
+            s->alpha_params.cpu_type = ZIMG_CPU_AUTO;
+            s->alpha_params.resample_filter = s->filter;
+
+            s->alpha_src_format.width = in->width;
+            s->alpha_src_format.height = in->height;
+            s->alpha_src_format.depth = desc->comp[0].depth;
+            s->alpha_src_format.pixel_type = desc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+            s->alpha_src_format.color_family = ZIMG_COLOR_GREY;
+
+            s->alpha_dst_format.width = out->width;
+            s->alpha_dst_format.height = out->height;
+            s->alpha_dst_format.depth = odesc->comp[0].depth;
+            s->alpha_dst_format.pixel_type = odesc->comp[0].depth > 8 ? ZIMG_PIXEL_WORD : ZIMG_PIXEL_BYTE;
+            s->alpha_dst_format.color_family = ZIMG_COLOR_GREY;
+
+            zimg_filter_graph_free(s->alpha_graph);
+            s->alpha_graph = zimg_filter_graph_build(&s->alpha_src_format, &s->alpha_dst_format, &s->alpha_params);
+            if (!s->alpha_graph) {
+                ret = print_zimg_error(link->dst);
+                goto fail;
+            }
+        }
+    }
+
+    if (s->colorspace != -1)
+        out->colorspace = (int)s->dst_format.matrix_coefficients;
+
+    if (s->primaries != -1)
+        out->color_primaries = (int)s->dst_format.color_primaries;
+
+    if (s->range != -1)
+        out->color_range = (int)s->dst_format.pixel_range;
+
+    if (s->trc != -1)
+        out->color_trc = (int)s->dst_format.transfer_characteristics;
+
+    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
+              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
+              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
+              INT_MAX);
+
+    for (plane = 0; plane < 3; plane++) {
+        int p = desc->comp[plane].plane;
+        src_buf.plane[plane].data   = in->data[p];
+        src_buf.plane[plane].stride = in->linesize[p];
+        src_buf.plane[plane].mask   = -1;
+
+        p = odesc->comp[plane].plane;
+        dst_buf.plane[plane].data   = out->data[p];
+        dst_buf.plane[plane].stride = out->linesize[p];
+        dst_buf.plane[plane].mask   = -1;
+    }
+
+    ret = zimg_filter_graph_process(s->graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
+    if (ret) {
+        print_zimg_error(link->dst);
+        goto fail;
+    }
+
+    if (desc->flags & AV_PIX_FMT_FLAG_ALPHA && odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        src_buf.plane[0].data   = in->data[3];
+        src_buf.plane[0].stride = in->linesize[3];
+        src_buf.plane[0].mask   = -1;
+
+        dst_buf.plane[0].data   = out->data[3];
+        dst_buf.plane[0].stride = out->linesize[3];
+        dst_buf.plane[0].mask   = -1;
+
+        ret = zimg_filter_graph_process(s->alpha_graph, &src_buf, &dst_buf, s->tmp, 0, 0, 0, 0);
+        if (ret) {
+            print_zimg_error(link->dst);
+            goto fail;
+        }
+    } else if (odesc->flags & AV_PIX_FMT_FLAG_ALPHA) {
+        int y;
+
+        for (y = 0; y < outlink->h; y++)
+            memset(out->data[3] + y * out->linesize[3], 0xff, outlink->w);
+    }
+
+fail:
+    av_frame_free(&in);
+    if (ret) {
+        av_frame_free(&out);
+        return ret;
+    }
+
+    return ff_filter_frame(outlink, out);
+}
+
+static void uninit(AVFilterContext *ctx)
+{
+    ZScaleContext *s = ctx->priv;
+
+    zimg_filter_graph_free(s->graph);
+    av_freep(&s->tmp);
+    s->tmp_size = 0;
+}
+
+static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
+                           char *res, int res_len, int flags)
+{
+    ZScaleContext *s = ctx->priv;
+    int ret;
+
+    if (   !strcmp(cmd, "width")  || !strcmp(cmd, "w")
+        || !strcmp(cmd, "height") || !strcmp(cmd, "h")) {
+
+        int old_w = s->w;
+        int old_h = s->h;
+        AVFilterLink *outlink = ctx->outputs[0];
+
+        av_opt_set(s, cmd, args, 0);
+        if ((ret = config_props(outlink)) < 0) {
+            s->w = old_w;
+            s->h = old_h;
+        }
+    } else
+        ret = AVERROR(ENOSYS);
+
+    return ret;
+}
+
+#define OFFSET(x) offsetof(ZScaleContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption zscale_options[] = {
+    { "w",      "Output video width",  OFFSET(w_expr),    AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "width",  "Output video width",  OFFSET(w_expr),    AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "h",      "Output video height", OFFSET(h_expr),    AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "height", "Output video height", OFFSET(h_expr),    AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "size",   "set video size",      OFFSET(size_str),  AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS },
+    { "s",      "set video size",      OFFSET(size_str),  AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS },
+    { "dither", "set dither type",     OFFSET(dither),    AV_OPT_TYPE_INT, {.i64 = 0}, 0, ZIMG_DITHER_ERROR_DIFFUSION, FLAGS, "dither" },
+    { "d",      "set dither type",     OFFSET(dither),    AV_OPT_TYPE_INT, {.i64 = 0}, 0, ZIMG_DITHER_ERROR_DIFFUSION, FLAGS, "dither" },
+    {     "none",             0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_DITHER_NONE},     0, 0, FLAGS, "dither" },
+    {     "ordered",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_DITHER_ORDERED},  0, 0, FLAGS, "dither" },
+    {     "random",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_DITHER_RANDOM},   0, 0, FLAGS, "dither" },
+    {     "error_diffusion",  0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_DITHER_ERROR_DIFFUSION}, 0, 0, FLAGS, "dither" },
+    { "filter", "set filter type",     OFFSET(filter),    AV_OPT_TYPE_INT, {.i64 = ZIMG_RESIZE_BILINEAR}, 0, ZIMG_RESIZE_LANCZOS, FLAGS, "filter" },
+    { "f",      "set filter type",     OFFSET(filter),    AV_OPT_TYPE_INT, {.i64 = ZIMG_RESIZE_BILINEAR}, 0, ZIMG_RESIZE_LANCZOS, FLAGS, "filter" },
+    {     "point",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_RESIZE_POINT},    0, 0, FLAGS, "filter" },
+    {     "bilinear",         0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_RESIZE_BILINEAR}, 0, 0, FLAGS, "filter" },
+    {     "bicubic",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_RESIZE_BICUBIC},  0, 0, FLAGS, "filter" },
+    {     "spline16",         0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_RESIZE_SPLINE16}, 0, 0, FLAGS, "filter" },
+    {     "spline36",         0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_RESIZE_SPLINE36}, 0, 0, FLAGS, "filter" },
+    {     "lanczos",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_RESIZE_LANCZOS},  0, 0, FLAGS, "filter" },
+    { "range", "set color range",      OFFSET(range),     AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_RANGE_FULL, FLAGS, "range" },
+    { "r",     "set color range",      OFFSET(range),     AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_RANGE_FULL, FLAGS, "range" },
+    {     "input",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = -1},                 0, 0, FLAGS, "range" },
+    {     "limited",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_RANGE_LIMITED}, 0, 0, FLAGS, "range" },
+    {     "full",             0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_RANGE_FULL},    0, 0, FLAGS, "range" },
+    { "primaries", "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_PRIMARIES_2020, FLAGS, "primaries" },
+    { "p",         "set color primaries", OFFSET(primaries), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_PRIMARIES_2020, FLAGS, "primaries" },
+    {     "input",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = -1},                         0, 0, FLAGS, "primaries" },
+    {     "709",              0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_709},         0, 0, FLAGS, "primaries" },
+    {     "unspecified",      0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_UNSPECIFIED}, 0, 0, FLAGS, "primaries" },
+    {     "170m",             0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_170M},        0, 0, FLAGS, "primaries" },
+    {     "240m",             0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_240M},        0, 0, FLAGS, "primaries" },
+    {     "2020",             0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_PRIMARIES_2020},        0, 0, FLAGS, "primaries" },
+    { "transfer", "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_TRANSFER_2020_12, FLAGS, "transfer" },
+    { "t",        "set transfer characteristic", OFFSET(trc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_TRANSFER_2020_12, FLAGS, "transfer" },
+    {     "input",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = -1},                         0, 0, FLAGS, "transfer" },
+    {     "709",              0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_709},         0, 0, FLAGS, "transfer" },
+    {     "unspecified",      0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_UNSPECIFIED}, 0, 0, FLAGS, "transfer" },
+    {     "601",              0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_601},         0, 0, FLAGS, "transfer" },
+    {     "linear",           0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_LINEAR},      0, 0, FLAGS, "transfer" },
+    {     "2020_10",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_2020_10},     0, 0, FLAGS, "transfer" },
+    {     "2020_12",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_TRANSFER_2020_12},     0, 0, FLAGS, "transfer" },
+    { "matrix", "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_MATRIX_2020_CL, FLAGS, "matrix" },
+    { "m",      "set colorspace matrix", OFFSET(colorspace), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_MATRIX_2020_CL, FLAGS, "matrix" },
+    {     "input",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = -1},                      0, 0, FLAGS, "matrix" },
+    {     "709",              0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_709},         0, 0, FLAGS, "matrix" },
+    {     "unspecified",      0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_UNSPECIFIED}, 0, 0, FLAGS, "matrix" },
+    {     "470bg",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_470BG},       0, 0, FLAGS, "matrix" },
+    {     "170m",             0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_170M},        0, 0, FLAGS, "matrix" },
+    {     "ycgco",            0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_YCGCO},       0, 0, FLAGS, "matrix" },
+    {     "2020_ncl",         0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_2020_NCL},    0, 0, FLAGS, "matrix" },
+    {     "2020_cl",          0,       0,                 AV_OPT_TYPE_CONST, {.i64 = ZIMG_MATRIX_2020_CL},     0, 0, FLAGS, "matrix" },
+    { "rangein", "set input color range", OFFSET(range_in),     AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_RANGE_FULL, FLAGS, "range" },
+    { "rin",     "set input color range", OFFSET(range_in),     AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_RANGE_FULL, FLAGS, "range" },
+    { "primariesin", "set input color primaries", OFFSET(primaries_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_PRIMARIES_2020, FLAGS, "primaries" },
+    { "pin",         "set input color primaries", OFFSET(primaries_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_PRIMARIES_2020, FLAGS, "primaries" },
+    { "transferin", "set input transfer characteristic", OFFSET(trc_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_TRANSFER_2020_12, FLAGS, "transfer" },
+    { "tin",        "set input transfer characteristic", OFFSET(trc_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_TRANSFER_2020_12, FLAGS, "transfer" },
+    { "matrixin", "set input colorspace matrix", OFFSET(colorspace_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_MATRIX_2020_CL, FLAGS, "matrix" },
+    { "min",      "set input colorspace matrix", OFFSET(colorspace_in), AV_OPT_TYPE_INT, {.i64 = -1}, -1, ZIMG_MATRIX_2020_CL, FLAGS, "matrix" },
+    { NULL }
+};
+
+static const AVClass zscale_class = {
+    .class_name       = "zscale",
+    .item_name        = av_default_item_name,
+    .option           = zscale_options,
+    .version          = LIBAVUTIL_VERSION_INT,
+    .category         = AV_CLASS_CATEGORY_FILTER,
+};
+
+static const AVFilterPad avfilter_vf_zscale_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_vf_zscale_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_zscale = {
+    .name            = "zscale",
+    .description     = NULL_IF_CONFIG_SMALL("Apply resizing, colorspace and bit depth conversion."),
+    .init_dict       = init_dict,
+    .query_formats   = query_formats,
+    .priv_size       = sizeof(ZScaleContext),
+    .priv_class      = &zscale_class,
+    .uninit          = uninit,
+    .inputs          = avfilter_vf_zscale_inputs,
+    .outputs         = avfilter_vf_zscale_outputs,
+    .process_command = process_command,
+};
diff --git a/libavfilter/video.c b/libavfilter/video.c
index 6a554834..2744be63 100644
--- a/libavfilter/video.c
+++ b/libavfilter/video.c
@@ -32,79 +32,46 @@
 #include "internal.h"
 #include "video.h"
 
+#define BUFFER_ALIGN 32
+
+
 AVFrame *ff_null_get_video_buffer(AVFilterLink *link, int w, int h)
 {
     return ff_get_video_buffer(link->dst->outputs[0], w, h);
 }
 
-/* TODO: set the buffer's priv member to a context structure for the whole
- * filter chain.  This will allow for a buffer pool instead of the constant
- * alloc & free cycle currently implemented. */
 AVFrame *ff_default_get_video_buffer(AVFilterLink *link, int w, int h)
 {
-    AVFrame *frame = av_frame_alloc();
-    int ret;
-
-    if (!frame)
-        return NULL;
-
-    frame->width  = w;
-    frame->height = h;
-    frame->format = link->format;
-
-    ret = av_frame_get_buffer(frame, 32);
-    if (ret < 0)
-        av_frame_free(&frame);
-
-    return frame;
-}
-
-#if FF_API_AVFILTERBUFFER
-AVFilterBufferRef *
-avfilter_get_video_buffer_ref_from_arrays(uint8_t * const data[4], const int linesize[4], int perms,
-                                          int w, int h, enum AVPixelFormat format)
-{
-    AVFilterBuffer *pic = av_mallocz(sizeof(AVFilterBuffer));
-    AVFilterBufferRef *picref = av_mallocz(sizeof(AVFilterBufferRef));
-
-    if (!pic || !picref)
-        goto fail;
-
-    picref->buf = pic;
-    picref->buf->free = ff_avfilter_default_free_buffer;
-    if (!(picref->video = av_mallocz(sizeof(AVFilterBufferRefVideoProps))))
-        goto fail;
-
-    pic->w = picref->video->w = w;
-    pic->h = picref->video->h = h;
-
-    /* make sure the buffer gets read permission or it's useless for output */
-    picref->perms = perms | AV_PERM_READ;
-
-    pic->refcount = 1;
-    picref->type = AVMEDIA_TYPE_VIDEO;
-    pic->format = picref->format = format;
-
-    memcpy(pic->data,        data,          4*sizeof(data[0]));
-    memcpy(pic->linesize,    linesize,      4*sizeof(linesize[0]));
-    memcpy(picref->data,     pic->data,     sizeof(picref->data));
-    memcpy(picref->linesize, pic->linesize, sizeof(picref->linesize));
-
-    pic->   extended_data = pic->data;
-    picref->extended_data = picref->data;
-
-    picref->pts = AV_NOPTS_VALUE;
-
-    return picref;
-
-fail:
-    if (picref && picref->video)
-        av_free(picref->video);
-    av_free(picref);
-    av_free(pic);
-    return NULL;
+    int pool_width = 0;
+    int pool_height = 0;
+    int pool_align = 0;
+    enum AVPixelFormat pool_format = AV_PIX_FMT_NONE;
+
+    if (!link->video_frame_pool) {
+        link->video_frame_pool = ff_video_frame_pool_init(av_buffer_allocz, w, h,
+                                                          link->format, BUFFER_ALIGN);
+        if (!link->video_frame_pool)
+            return NULL;
+    } else {
+        if (ff_video_frame_pool_get_config(link->video_frame_pool,
+                                           &pool_width, &pool_height,
+                                           &pool_format, &pool_align) < 0) {
+            return NULL;
+        }
+
+        if (pool_width != w || pool_height != h ||
+            pool_format != link->format || pool_align != BUFFER_ALIGN) {
+
+            ff_video_frame_pool_uninit((FFVideoFramePool **)&link->video_frame_pool);
+            link->video_frame_pool = ff_video_frame_pool_init(av_buffer_allocz, w, h,
+                                                              link->format, BUFFER_ALIGN);
+            if (!link->video_frame_pool)
+                return NULL;
+        }
+    }
+
+    return ff_video_frame_pool_get(link->video_frame_pool);
 }
-#endif
 
 AVFrame *ff_get_video_buffer(AVFilterLink *link, int w, int h)
 {
diff --git a/libavfilter/vsrc_cellauto.c b/libavfilter/vsrc_cellauto.c
index 4f4b01c6..67bd167a 100644
--- a/libavfilter/vsrc_cellauto.c
+++ b/libavfilter/vsrc_cellauto.c
@@ -26,6 +26,7 @@
 /* #define DEBUG */
 
 #include "libavutil/file.h"
+#include "libavutil/internal.h"
 #include "libavutil/lfg.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
@@ -73,10 +74,10 @@ static const AVOption cellauto_options[] = {
     { "ratio",             "set fill ratio for filling initial grid randomly", OFFSET(random_fill_ratio), AV_OPT_TYPE_DOUBLE, {.dbl = 1/M_PHI}, 0, 1, FLAGS },
     { "random_seed", "set the seed for filling the initial grid randomly", OFFSET(random_seed), AV_OPT_TYPE_INT, {.i64 = -1}, -1, UINT32_MAX, FLAGS },
     { "seed",        "set the seed for filling the initial grid randomly", OFFSET(random_seed), AV_OPT_TYPE_INT, {.i64 = -1}, -1, UINT32_MAX, FLAGS },
-    { "scroll",      "scroll pattern downward", OFFSET(scroll), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, FLAGS },
-    { "start_full",  "start filling the whole video", OFFSET(start_full), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS },
-    { "full",        "start filling the whole video", OFFSET(start_full), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, FLAGS },
-    { "stitch",      "stitch boundaries", OFFSET(stitch), AV_OPT_TYPE_INT,    {.i64 = 1},   0, 1, FLAGS },
+    { "scroll",      "scroll pattern downward", OFFSET(scroll), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
+    { "start_full",  "start filling the whole video", OFFSET(start_full), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
+    { "full",        "start filling the whole video", OFFSET(start_full), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
+    { "stitch",      "stitch boundaries", OFFSET(stitch), AV_OPT_TYPE_BOOL,    {.i64 = 1},   0, 1, FLAGS },
     { NULL }
 };
 
@@ -85,55 +86,55 @@ AVFILTER_DEFINE_CLASS(cellauto);
 #ifdef DEBUG
 static void show_cellauto_row(AVFilterContext *ctx)
 {
-    CellAutoContext *cellauto = ctx->priv;
+    CellAutoContext *s = ctx->priv;
     int i;
-    uint8_t *row = cellauto->buf + cellauto->w * cellauto->buf_row_idx;
-    char *line = av_malloc(cellauto->w + 1);
+    uint8_t *row = s->buf + s->w * s->buf_row_idx;
+    char *line = av_malloc(s->w + 1);
     if (!line)
         return;
 
-    for (i = 0; i < cellauto->w; i++)
+    for (i = 0; i < s->w; i++)
         line[i] = row[i] ? '@' : ' ';
     line[i] = 0;
-    av_log(ctx, AV_LOG_DEBUG, "generation:%"PRId64" row:%s|\n", cellauto->generation, line);
+    av_log(ctx, AV_LOG_DEBUG, "generation:%"PRId64" row:%s|\n", s->generation, line);
     av_free(line);
 }
 #endif
 
 static int init_pattern_from_string(AVFilterContext *ctx)
 {
-    CellAutoContext *cellauto = ctx->priv;
+    CellAutoContext *s = ctx->priv;
     char *p;
     int i, w = 0;
 
-    w = strlen(cellauto->pattern);
+    w = strlen(s->pattern);
     av_log(ctx, AV_LOG_DEBUG, "w:%d\n", w);
 
-    if (cellauto->w) {
-        if (w > cellauto->w) {
+    if (s->w) {
+        if (w > s->w) {
             av_log(ctx, AV_LOG_ERROR,
                    "The specified width is %d which cannot contain the provided string width of %d\n",
-                   cellauto->w, w);
+                   s->w, w);
             return AVERROR(EINVAL);
         }
     } else {
         /* width was not specified, set it to width of the provided row */
-        cellauto->w = w;
-        cellauto->h = (double)cellauto->w * M_PHI;
+        s->w = w;
+        s->h = (double)s->w * M_PHI;
     }
 
-    cellauto->buf = av_mallocz_array(sizeof(uint8_t) * cellauto->w, cellauto->h);
-    if (!cellauto->buf)
+    s->buf = av_mallocz_array(sizeof(uint8_t) * s->w, s->h);
+    if (!s->buf)
         return AVERROR(ENOMEM);
 
     /* fill buf */
-    p = cellauto->pattern;
-    for (i = (cellauto->w - w)/2;; i++) {
+    p = s->pattern;
+    for (i = (s->w - w)/2;; i++) {
         av_log(ctx, AV_LOG_DEBUG, "%d %c\n", i, *p == '\n' ? 'N' : *p);
         if (*p == '\n' || !*p)
             break;
         else
-            cellauto->buf[i] = !!av_isgraph(*(p++));
+            s->buf[i] = !!av_isgraph(*(p++));
     }
 
     return 0;
@@ -141,165 +142,165 @@ static int init_pattern_from_string(AVFilterContext *ctx)
 
 static int init_pattern_from_file(AVFilterContext *ctx)
 {
-    CellAutoContext *cellauto = ctx->priv;
+    CellAutoContext *s = ctx->priv;
     int ret;
 
-    ret = av_file_map(cellauto->filename,
-                      &cellauto->file_buf, &cellauto->file_bufsize, 0, ctx);
+    ret = av_file_map(s->filename,
+                      &s->file_buf, &s->file_bufsize, 0, ctx);
     if (ret < 0)
         return ret;
 
     /* create a string based on the read file */
-    cellauto->pattern = av_malloc(cellauto->file_bufsize + 1);
-    if (!cellauto->pattern)
+    s->pattern = av_malloc(s->file_bufsize + 1);
+    if (!s->pattern)
         return AVERROR(ENOMEM);
-    memcpy(cellauto->pattern, cellauto->file_buf, cellauto->file_bufsize);
-    cellauto->pattern[cellauto->file_bufsize] = 0;
+    memcpy(s->pattern, s->file_buf, s->file_bufsize);
+    s->pattern[s->file_bufsize] = 0;
 
     return init_pattern_from_string(ctx);
 }
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    CellAutoContext *cellauto = ctx->priv;
+    CellAutoContext *s = ctx->priv;
     int ret;
 
-    if (!cellauto->w && !cellauto->filename && !cellauto->pattern)
-        av_opt_set(cellauto, "size", "320x518", 0);
+    if (!s->w && !s->filename && !s->pattern)
+        av_opt_set(s, "size", "320x518", 0);
 
-    if (cellauto->filename && cellauto->pattern) {
+    if (s->filename && s->pattern) {
         av_log(ctx, AV_LOG_ERROR, "Only one of the filename or pattern options can be used\n");
         return AVERROR(EINVAL);
     }
 
-    if (cellauto->filename) {
+    if (s->filename) {
         if ((ret = init_pattern_from_file(ctx)) < 0)
             return ret;
-    } else if (cellauto->pattern) {
+    } else if (s->pattern) {
         if ((ret = init_pattern_from_string(ctx)) < 0)
             return ret;
     } else {
         /* fill the first row randomly */
         int i;
 
-        cellauto->buf = av_mallocz_array(sizeof(uint8_t) * cellauto->w, cellauto->h);
-        if (!cellauto->buf)
+        s->buf = av_mallocz_array(sizeof(uint8_t) * s->w, s->h);
+        if (!s->buf)
             return AVERROR(ENOMEM);
-        if (cellauto->random_seed == -1)
-            cellauto->random_seed = av_get_random_seed();
+        if (s->random_seed == -1)
+            s->random_seed = av_get_random_seed();
 
-        av_lfg_init(&cellauto->lfg, cellauto->random_seed);
+        av_lfg_init(&s->lfg, s->random_seed);
 
-        for (i = 0; i < cellauto->w; i++) {
-            double r = (double)av_lfg_get(&cellauto->lfg) / UINT32_MAX;
-            if (r <= cellauto->random_fill_ratio)
-                cellauto->buf[i] = 1;
+        for (i = 0; i < s->w; i++) {
+            double r = (double)av_lfg_get(&s->lfg) / UINT32_MAX;
+            if (r <= s->random_fill_ratio)
+                s->buf[i] = 1;
         }
     }
 
     av_log(ctx, AV_LOG_VERBOSE,
            "s:%dx%d r:%d/%d rule:%d stitch:%d scroll:%d full:%d seed:%u\n",
-           cellauto->w, cellauto->h, cellauto->frame_rate.num, cellauto->frame_rate.den,
-           cellauto->rule, cellauto->stitch, cellauto->scroll, cellauto->start_full,
-           cellauto->random_seed);
+           s->w, s->h, s->frame_rate.num, s->frame_rate.den,
+           s->rule, s->stitch, s->scroll, s->start_full,
+           s->random_seed);
     return 0;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    CellAutoContext *cellauto = ctx->priv;
+    CellAutoContext *s = ctx->priv;
 
-    av_file_unmap(cellauto->file_buf, cellauto->file_bufsize);
-    av_freep(&cellauto->buf);
-    av_freep(&cellauto->pattern);
+    av_file_unmap(s->file_buf, s->file_bufsize);
+    av_freep(&s->buf);
+    av_freep(&s->pattern);
 }
 
 static int config_props(AVFilterLink *outlink)
 {
-    CellAutoContext *cellauto = outlink->src->priv;
+    CellAutoContext *s = outlink->src->priv;
 
-    outlink->w = cellauto->w;
-    outlink->h = cellauto->h;
-    outlink->time_base = av_inv_q(cellauto->frame_rate);
+    outlink->w = s->w;
+    outlink->h = s->h;
+    outlink->time_base = av_inv_q(s->frame_rate);
 
     return 0;
 }
 
 static void evolve(AVFilterContext *ctx)
 {
-    CellAutoContext *cellauto = ctx->priv;
+    CellAutoContext *s = ctx->priv;
     int i, v, pos[3];
-    uint8_t *row, *prev_row = cellauto->buf + cellauto->buf_row_idx * cellauto->w;
+    uint8_t *row, *prev_row = s->buf + s->buf_row_idx * s->w;
     enum { NW, N, NE };
 
-    cellauto->buf_prev_row_idx = cellauto->buf_row_idx;
-    cellauto->buf_row_idx      = cellauto->buf_row_idx == cellauto->h-1 ? 0 : cellauto->buf_row_idx+1;
-    row = cellauto->buf + cellauto->w * cellauto->buf_row_idx;
+    s->buf_prev_row_idx = s->buf_row_idx;
+    s->buf_row_idx      = s->buf_row_idx == s->h-1 ? 0 : s->buf_row_idx+1;
+    row = s->buf + s->w * s->buf_row_idx;
 
-    for (i = 0; i < cellauto->w; i++) {
-        if (cellauto->stitch) {
-            pos[NW] = i-1 < 0 ? cellauto->w-1 : i-1;
+    for (i = 0; i < s->w; i++) {
+        if (s->stitch) {
+            pos[NW] = i-1 < 0 ? s->w-1 : i-1;
             pos[N]  = i;
-            pos[NE] = i+1 == cellauto->w ? 0  : i+1;
+            pos[NE] = i+1 == s->w ? 0  : i+1;
             v = prev_row[pos[NW]]<<2 | prev_row[pos[N]]<<1 | prev_row[pos[NE]];
         } else {
             v = 0;
             v|= i-1 >= 0          ? prev_row[i-1]<<2 : 0;
             v|=                     prev_row[i  ]<<1    ;
-            v|= i+1 < cellauto->w ? prev_row[i+1]    : 0;
+            v|= i+1 < s->w ? prev_row[i+1]    : 0;
         }
-        row[i] = !!(cellauto->rule & (1<<v));
-        av_dlog(ctx, "i:%d context:%c%c%c -> cell:%d\n", i,
+        row[i] = !!(s->rule & (1<<v));
+        ff_dlog(ctx, "i:%d context:%c%c%c -> cell:%d\n", i,
                 v&4?'@':' ', v&2?'@':' ', v&1?'@':' ', row[i]);
     }
 
-    cellauto->generation++;
+    s->generation++;
 }
 
 static void fill_picture(AVFilterContext *ctx, AVFrame *picref)
 {
-    CellAutoContext *cellauto = ctx->priv;
+    CellAutoContext *s = ctx->priv;
     int i, j, k, row_idx = 0;
     uint8_t *p0 = picref->data[0];
 
-    if (cellauto->scroll && cellauto->generation >= cellauto->h)
+    if (s->scroll && s->generation >= s->h)
         /* show on top the oldest row */
-        row_idx = (cellauto->buf_row_idx + 1) % cellauto->h;
+        row_idx = (s->buf_row_idx + 1) % s->h;
 
     /* fill the output picture with the whole buffer */
-    for (i = 0; i < cellauto->h; i++) {
+    for (i = 0; i < s->h; i++) {
         uint8_t byte = 0;
-        uint8_t *row = cellauto->buf + row_idx*cellauto->w;
+        uint8_t *row = s->buf + row_idx*s->w;
         uint8_t *p = p0;
-        for (k = 0, j = 0; j < cellauto->w; j++) {
+        for (k = 0, j = 0; j < s->w; j++) {
             byte |= row[j]<<(7-k++);
-            if (k==8 || j == cellauto->w-1) {
+            if (k==8 || j == s->w-1) {
                 k = 0;
                 *p++ = byte;
                 byte = 0;
             }
         }
-        row_idx = (row_idx + 1) % cellauto->h;
+        row_idx = (row_idx + 1) % s->h;
         p0 += picref->linesize[0];
     }
 }
 
 static int request_frame(AVFilterLink *outlink)
 {
-    CellAutoContext *cellauto = outlink->src->priv;
-    AVFrame *picref = ff_get_video_buffer(outlink, cellauto->w, cellauto->h);
+    CellAutoContext *s = outlink->src->priv;
+    AVFrame *picref = ff_get_video_buffer(outlink, s->w, s->h);
     if (!picref)
         return AVERROR(ENOMEM);
     picref->sample_aspect_ratio = (AVRational) {1, 1};
-    if (cellauto->generation == 0 && cellauto->start_full) {
+    if (s->generation == 0 && s->start_full) {
         int i;
-        for (i = 0; i < cellauto->h-1; i++)
+        for (i = 0; i < s->h-1; i++)
             evolve(outlink->src);
     }
     fill_picture(outlink->src, picref);
     evolve(outlink->src);
 
-    picref->pts = cellauto->pts++;
+    picref->pts = s->pts++;
 
 #ifdef DEBUG
     show_cellauto_row(outlink->src);
diff --git a/libavfilter/vsrc_life.c b/libavfilter/vsrc_life.c
index 47630add..3d1d36ff 100644
--- a/libavfilter/vsrc_life.c
+++ b/libavfilter/vsrc_life.c
@@ -26,6 +26,7 @@
 /* #define DEBUG */
 
 #include "libavutil/file.h"
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/lfg.h"
 #include "libavutil/opt.h"
@@ -88,7 +89,7 @@ static const AVOption life_options[] = {
     { "ratio",             "set fill ratio for filling initial grid randomly", OFFSET(random_fill_ratio), AV_OPT_TYPE_DOUBLE, {.dbl=1/M_PHI}, 0, 1, FLAGS },
     { "random_seed", "set the seed for filling the initial grid randomly", OFFSET(random_seed), AV_OPT_TYPE_INT, {.i64=-1}, -1, UINT32_MAX, FLAGS },
     { "seed",        "set the seed for filling the initial grid randomly", OFFSET(random_seed), AV_OPT_TYPE_INT, {.i64=-1}, -1, UINT32_MAX, FLAGS },
-    { "stitch",      "stitch boundaries", OFFSET(stitch), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS },
+    { "stitch",      "stitch boundaries", OFFSET(stitch), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
     { "mold",        "set mold speed for dead cells", OFFSET(mold), AV_OPT_TYPE_INT, {.i64=0}, 0, 0xFF, FLAGS },
     { "life_color",  "set life color",  OFFSET( life_color), AV_OPT_TYPE_COLOR, {.str="white"}, CHAR_MIN, CHAR_MAX, FLAGS },
     { "death_color", "set death color", OFFSET(death_color), AV_OPT_TYPE_COLOR, {.str="black"}, CHAR_MIN, CHAR_MAX, FLAGS },
@@ -334,7 +335,7 @@ static void evolve(AVFilterContext *ctx)
             if (alive)     *newbuf = ALIVE_CELL; // new cell is alive
             else if (cell) *newbuf = cell - 1;   // new cell is dead and in the process of mold
             else           *newbuf = 0;          // new cell is definitely dead
-            av_dlog(ctx, "i:%d j:%d live_neighbors:%d cell:%d -> cell:%d\n", i, j, n, cell, *newbuf);
+            ff_dlog(ctx, "i:%d j:%d live_neighbors:%d cell:%d -> cell:%d\n", i, j, n, cell, *newbuf);
             newbuf++;
         }
     }
@@ -427,10 +428,7 @@ static int query_formats(AVFilterContext *ctx)
     }
 
     fmts_list = ff_make_format_list(pix_fmts);
-    if (!fmts_list)
-        return AVERROR(ENOMEM);
-    ff_set_common_formats(ctx, fmts_list);
-    return 0;
+    return ff_set_common_formats(ctx, fmts_list);
 }
 
 static const AVFilterPad life_outputs[] = {
diff --git a/libavfilter/vsrc_mandelbrot.c b/libavfilter/vsrc_mandelbrot.c
index e26fecd9..2f6944f0 100644
--- a/libavfilter/vsrc_mandelbrot.c
+++ b/libavfilter/vsrc_mandelbrot.c
@@ -23,7 +23,7 @@
 
 /**
  * @file
- * Mandelbrot fraktal renderer
+ * Mandelbrot fractal renderer
  */
 
 #include "avfilter.h"
@@ -121,35 +121,35 @@ AVFILTER_DEFINE_CLASS(mandelbrot);
 
 static av_cold int init(AVFilterContext *ctx)
 {
-    MBContext *mb = ctx->priv;
+    MBContext *s = ctx->priv;
 
-    mb->bailout *= mb->bailout;
+    s->bailout *= s->bailout;
 
-    mb->start_scale /=mb->h;
-    mb->end_scale /=mb->h;
+    s->start_scale /=s->h;
+    s->end_scale /=s->h;
 
-    mb->cache_allocated = mb->w * mb->h * 3;
-    mb->cache_used = 0;
-    mb->point_cache= av_malloc_array(mb->cache_allocated, sizeof(*mb->point_cache));
-    mb-> next_cache= av_malloc_array(mb->cache_allocated, sizeof(*mb-> next_cache));
-    mb-> zyklus    = av_malloc_array(mb->maxiter + 16, sizeof(*mb->zyklus));
+    s->cache_allocated = s->w * s->h * 3;
+    s->cache_used = 0;
+    s->point_cache= av_malloc_array(s->cache_allocated, sizeof(*s->point_cache));
+    s-> next_cache= av_malloc_array(s->cache_allocated, sizeof(*s-> next_cache));
+    s-> zyklus    = av_malloc_array(s->maxiter + 16, sizeof(*s->zyklus));
 
     return 0;
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
-    MBContext *mb = ctx->priv;
+    MBContext *s = ctx->priv;
 
-    av_freep(&mb->point_cache);
-    av_freep(&mb-> next_cache);
-    av_freep(&mb->zyklus);
+    av_freep(&s->point_cache);
+    av_freep(&s-> next_cache);
+    av_freep(&s->zyklus);
 }
 
 static int query_formats(AVFilterContext *ctx)
 {
     static const enum AVPixelFormat pix_fmts[] = {
-        AV_PIX_FMT_BGR32,
+        AV_PIX_FMT_0BGR32,
         AV_PIX_FMT_NONE
     };
 
@@ -162,48 +162,48 @@ static int query_formats(AVFilterContext *ctx)
 static int config_props(AVFilterLink *inlink)
 {
     AVFilterContext *ctx = inlink->src;
-    MBContext *mb = ctx->priv;
+    MBContext *s = ctx->priv;
 
-    if (av_image_check_size(mb->w, mb->h, 0, ctx) < 0)
+    if (av_image_check_size(s->w, s->h, 0, ctx) < 0)
         return AVERROR(EINVAL);
 
-    inlink->w = mb->w;
-    inlink->h = mb->h;
-    inlink->time_base = av_inv_q(mb->frame_rate);
+    inlink->w = s->w;
+    inlink->h = s->h;
+    inlink->time_base = av_inv_q(s->frame_rate);
 
     return 0;
 }
 
 static void fill_from_cache(AVFilterContext *ctx, uint32_t *color, int *in_cidx, int *out_cidx, double py, double scale){
-    MBContext *mb = ctx->priv;
-    if(mb->morphamp)
+    MBContext *s = ctx->priv;
+    if(s->morphamp)
         return;
-    for(; *in_cidx < mb->cache_used; (*in_cidx)++){
-        Point *p= &mb->point_cache[*in_cidx];
+    for(; *in_cidx < s->cache_used; (*in_cidx)++){
+        Point *p= &s->point_cache[*in_cidx];
         int x;
         if(p->p[1] > py)
             break;
-        x= round((p->p[0] - mb->start_x) / scale + mb->w/2);
-        if(x<0 || x >= mb->w)
+        x= lrint((p->p[0] - s->start_x) / scale + s->w/2);
+        if(x<0 || x >= s->w)
             continue;
         if(color) color[x] = p->val;
-        if(out_cidx && *out_cidx < mb->cache_allocated)
-            mb->next_cache[(*out_cidx)++]= *p;
+        if(out_cidx && *out_cidx < s->cache_allocated)
+            s->next_cache[(*out_cidx)++]= *p;
     }
 }
 
-static int interpol(MBContext *mb, uint32_t *color, int x, int y, int linesize)
+static int interpol(MBContext *s, uint32_t *color, int x, int y, int linesize)
 {
     uint32_t a,b,c,d, i;
     uint32_t ipol=0xFF000000;
     int dist;
 
-    if(!x || !y || x+1==mb->w || y+1==mb->h)
+    if(!x || !y || x+1==s->w || y+1==s->h)
         return 0;
 
-    dist= FFMAX(FFABS(x-(mb->w>>1))*mb->h, FFABS(y-(mb->h>>1))*mb->w);
+    dist= FFMAX(FFABS(x-(s->w>>1))*s->h, FFABS(y-(s->h>>1))*s->w);
 
-    if(dist<(mb->w*mb->h>>3))
+    if(dist<(s->w*s->h>>3))
         return 0;
 
     a=color[(x+1) + (y+0)*linesize];
@@ -248,50 +248,50 @@ static int interpol(MBContext *mb, uint32_t *color, int x, int y, int linesize)
 
 static void draw_mandelbrot(AVFilterContext *ctx, uint32_t *color, int linesize, int64_t pts)
 {
-    MBContext *mb = ctx->priv;
+    MBContext *s = ctx->priv;
     int x,y,i, in_cidx=0, next_cidx=0, tmp_cidx;
-    double scale= mb->start_scale*pow(mb->end_scale/mb->start_scale, pts/mb->end_pts);
+    double scale= s->start_scale*pow(s->end_scale/s->start_scale, pts/s->end_pts);
     int use_zyklus=0;
-    fill_from_cache(ctx, NULL, &in_cidx, NULL, mb->start_y+scale*(-mb->h/2-0.5), scale);
+    fill_from_cache(ctx, NULL, &in_cidx, NULL, s->start_y+scale*(-s->h/2-0.5), scale);
     tmp_cidx= in_cidx;
-    memset(color, 0, sizeof(*color)*mb->w);
-    for(y=0; y<mb->h; y++){
+    memset(color, 0, sizeof(*color)*s->w);
+    for(y=0; y<s->h; y++){
         int y1= y+1;
-        const double ci=mb->start_y+scale*(y-mb->h/2);
+        const double ci=s->start_y+scale*(y-s->h/2);
         fill_from_cache(ctx, NULL, &in_cidx, &next_cidx, ci, scale);
-        if(y1<mb->h){
-            memset(color+linesize*y1, 0, sizeof(*color)*mb->w);
+        if(y1<s->h){
+            memset(color+linesize*y1, 0, sizeof(*color)*s->w);
             fill_from_cache(ctx, color+linesize*y1, &tmp_cidx, NULL, ci + 3*scale/2, scale);
         }
 
-        for(x=0; x<mb->w; x++){
+        for(x=0; x<s->w; x++){
             float av_uninit(epsilon);
-            const double cr=mb->start_x+scale*(x-mb->w/2);
+            const double cr=s->start_x+scale*(x-s->w/2);
             double zr=cr;
             double zi=ci;
             uint32_t c=0;
-            double dv= mb->dither / (double)(1LL<<32);
-            mb->dither= mb->dither*1664525+1013904223;
+            double dv= s->dither / (double)(1LL<<32);
+            s->dither= s->dither*1664525+1013904223;
 
             if(color[x + y*linesize] & 0xFF000000)
                 continue;
-            if(!mb->morphamp){
-                if(interpol(mb, color, x, y, linesize)){
-                    if(next_cidx < mb->cache_allocated){
-                        mb->next_cache[next_cidx  ].p[0]= cr;
-                        mb->next_cache[next_cidx  ].p[1]= ci;
-                        mb->next_cache[next_cidx++].val = color[x + y*linesize];
+            if(!s->morphamp){
+                if(interpol(s, color, x, y, linesize)){
+                    if(next_cidx < s->cache_allocated){
+                        s->next_cache[next_cidx  ].p[0]= cr;
+                        s->next_cache[next_cidx  ].p[1]= ci;
+                        s->next_cache[next_cidx++].val = color[x + y*linesize];
                     }
                     continue;
                 }
             }else{
-                zr += cos(pts * mb->morphxf) * mb->morphamp;
-                zi += sin(pts * mb->morphyf) * mb->morphamp;
+                zr += cos(pts * s->morphxf) * s->morphamp;
+                zi += sin(pts * s->morphyf) * s->morphamp;
             }
 
-            use_zyklus= (x==0 || mb->inner!=BLACK ||color[x-1 + y*linesize] == 0xFF000000);
+            use_zyklus= (x==0 || s->inner!=BLACK ||color[x-1 + y*linesize] == 0xFF000000);
             if(use_zyklus)
-                epsilon= scale*1*sqrt(SQR(x-mb->w/2) + SQR(y-mb->h/2))/mb->w;
+                epsilon= scale*(abs(x-s->w/2) + abs(y-s->h/2))/s->w;
 
 #define Z_Z2_C(outr,outi,inr,ini)\
             outr= inr*inr - ini*ini + cr;\
@@ -300,15 +300,15 @@ static void draw_mandelbrot(AVFilterContext *ctx, uint32_t *color, int linesize,
 #define Z_Z2_C_ZYKLUS(outr,outi,inr,ini, Z)\
             Z_Z2_C(outr,outi,inr,ini)\
             if(use_zyklus){\
-                if(Z && fabs(mb->zyklus[i>>1][0]-outr)+fabs(mb->zyklus[i>>1][1]-outi) <= epsilon)\
+                if(Z && fabs(s->zyklus[i>>1][0]-outr)+fabs(s->zyklus[i>>1][1]-outi) <= epsilon)\
                     break;\
             }\
-            mb->zyklus[i][0]= outr;\
-            mb->zyklus[i][1]= outi;\
+            s->zyklus[i][0]= outr;\
+            s->zyklus[i][1]= outi;\
 
 
 
-            for(i=0; i<mb->maxiter-8; i++){
+            for(i=0; i<s->maxiter-8; i++){
                 double t;
                 Z_Z2_C_ZYKLUS(t, zi, zr, zi, 0)
                 i++;
@@ -325,27 +325,27 @@ static void draw_mandelbrot(AVFilterContext *ctx, uint32_t *color, int linesize,
                 Z_Z2_C_ZYKLUS(t, zi, zr, zi, 0)
                 i++;
                 Z_Z2_C_ZYKLUS(zr, zi, t, zi, 1)
-                if(zr*zr + zi*zi > mb->bailout){
+                if(zr*zr + zi*zi > s->bailout){
                     i-= FFMIN(7, i);
-                    for(; i<mb->maxiter; i++){
-                        zr= mb->zyklus[i][0];
-                        zi= mb->zyklus[i][1];
-                        if(zr*zr + zi*zi > mb->bailout){
-                            switch(mb->outer){
+                    for(; i<s->maxiter; i++){
+                        zr= s->zyklus[i][0];
+                        zi= s->zyklus[i][1];
+                        if(zr*zr + zi*zi > s->bailout){
+                            switch(s->outer){
                             case            ITERATION_COUNT:
                                 zr = i;
-                                c = lrintf((sin(zr)+1)*127) + lrintf((sin(zr/1.234)+1)*127)*256*256 + lrintf((sin(zr/100)+1)*127)*256;
+                                c = lrintf((sinf(zr)+1)*127) + lrintf((sinf(zr/1.234)+1)*127)*256*256 + lrintf((sinf(zr/100)+1)*127)*256;
                                 break;
                             case NORMALIZED_ITERATION_COUNT:
-                                zr = i + log2(log(mb->bailout) / log(zr*zr + zi*zi));
-                                c = lrintf((sin(zr)+1)*127) + lrintf((sin(zr/1.234)+1)*127)*256*256 + lrintf((sin(zr/100)+1)*127)*256;
+                                zr = i + log2(log(s->bailout) / log(zr*zr + zi*zi));
+                                c = lrintf((sinf(zr)+1)*127) + lrintf((sinf(zr/1.234)+1)*127)*256*256 + lrintf((sinf(zr/100)+1)*127)*256;
                                 break;
                             case                      WHITE:
                                 c = 0xFFFFFF;
                                 break;
                             case                      OUTZ:
-                                zr /= mb->bailout;
-                                zi /= mb->bailout;
+                                zr /= s->bailout;
+                                zi /= s->bailout;
                                 c = (((int)(zr*128+128))&0xFF)*256 + (((int)(zi*128+128))&0xFF);
                             }
                             break;
@@ -355,55 +355,55 @@ static void draw_mandelbrot(AVFilterContext *ctx, uint32_t *color, int linesize,
                 }
             }
             if(!c){
-                if(mb->inner==PERIOD){
+                if(s->inner==PERIOD){
                     int j;
                     for(j=i-1; j; j--)
-                        if(SQR(mb->zyklus[j][0]-zr) + SQR(mb->zyklus[j][1]-zi) < epsilon*epsilon*10)
+                        if(SQR(s->zyklus[j][0]-zr) + SQR(s->zyklus[j][1]-zi) < epsilon*epsilon*10)
                             break;
                     if(j){
                         c= i-j;
                         c= ((c<<5)&0xE0) + ((c<<10)&0xE000) + ((c<<15)&0xE00000);
                     }
-                }else if(mb->inner==CONVTIME){
-                    c= floor(i*255.0/mb->maxiter+dv)*0x010101;
-                } else if(mb->inner==MINCOL){
+                }else if(s->inner==CONVTIME){
+                    c= floor(i*255.0/s->maxiter+dv)*0x010101;
+                } else if(s->inner==MINCOL){
                     int j;
                     double closest=9999;
                     int closest_index=0;
                     for(j=i-1; j>=0; j--)
-                        if(SQR(mb->zyklus[j][0]) + SQR(mb->zyklus[j][1]) < closest){
-                            closest= SQR(mb->zyklus[j][0]) + SQR(mb->zyklus[j][1]);
+                        if(SQR(s->zyklus[j][0]) + SQR(s->zyklus[j][1]) < closest){
+                            closest= SQR(s->zyklus[j][0]) + SQR(s->zyklus[j][1]);
                             closest_index= j;
                         }
                     closest = sqrt(closest);
-                    c= lrintf((mb->zyklus[closest_index][0]/closest+1)*127+dv) + lrintf((mb->zyklus[closest_index][1]/closest+1)*127+dv)*256;
+                    c= lrintf((s->zyklus[closest_index][0]/closest+1)*127+dv) + lrintf((s->zyklus[closest_index][1]/closest+1)*127+dv)*256;
                 }
             }
             c |= 0xFF000000;
             color[x + y*linesize]= c;
-            if(next_cidx < mb->cache_allocated){
-                mb->next_cache[next_cidx  ].p[0]= cr;
-                mb->next_cache[next_cidx  ].p[1]= ci;
-                mb->next_cache[next_cidx++].val = c;
+            if(next_cidx < s->cache_allocated){
+                s->next_cache[next_cidx  ].p[0]= cr;
+                s->next_cache[next_cidx  ].p[1]= ci;
+                s->next_cache[next_cidx++].val = c;
             }
         }
         fill_from_cache(ctx, NULL, &in_cidx, &next_cidx, ci + scale/2, scale);
     }
-    FFSWAP(void*, mb->next_cache, mb->point_cache);
-    mb->cache_used = next_cidx;
-    if(mb->cache_used == mb->cache_allocated)
+    FFSWAP(void*, s->next_cache, s->point_cache);
+    s->cache_used = next_cidx;
+    if(s->cache_used == s->cache_allocated)
         av_log(ctx, AV_LOG_INFO, "Mandelbrot cache is too small!\n");
 }
 
 static int request_frame(AVFilterLink *link)
 {
-    MBContext *mb = link->src->priv;
-    AVFrame *picref = ff_get_video_buffer(link, mb->w, mb->h);
+    MBContext *s = link->src->priv;
+    AVFrame *picref = ff_get_video_buffer(link, s->w, s->h);
     if (!picref)
         return AVERROR(ENOMEM);
 
     picref->sample_aspect_ratio = (AVRational) {1, 1};
-    picref->pts = mb->pts++;
+    picref->pts = s->pts++;
 
     draw_mandelbrot(link->src, (uint32_t*)picref->data[0], picref->linesize[0]/4, picref->pts);
     return ff_filter_frame(link, picref);
diff --git a/libavfilter/vsrc_mptestsrc.c b/libavfilter/vsrc_mptestsrc.c
index bc5b2cd2..25ad2ad0 100644
--- a/libavfilter/vsrc_mptestsrc.c
+++ b/libavfilter/vsrc_mptestsrc.c
@@ -121,7 +121,7 @@ static void idct(uint8_t *dst, int dst_linesize, int src[64])
             for (k = 0; k < 8; k++)
                 sum += c[k*8+i]*tmp[8*k+j];
 
-            dst[dst_linesize*i + j] = av_clip_uint8((int)floor(sum+0.5));
+            dst[dst_linesize*i + j] = av_clip_uint8(lrint(sum));
         }
     }
 }
@@ -240,7 +240,7 @@ static void ring2_test(uint8_t *dst, int dst_linesize, int off)
 
     for (y = 0; y < 16*16; y++) {
         for (x = 0; x < 16*16; x++) {
-            double d = sqrt((x-8*16)*(x-8*16) + (y-8*16)*(y-8*16));
+            double d = hypot(x-8*16, y-8*16);
             double r = d/20 - (int)(d/20);
             if (r < off/30.0) {
                 dst[x + y*dst_linesize]     = 255;
@@ -302,7 +302,7 @@ static int request_frame(AVFilterLink *outlink)
     MPTestContext *test = outlink->src->priv;
     AVFrame *picref;
     int w = WIDTH, h = HEIGHT,
-        cw = FF_CEIL_RSHIFT(w, test->hsub), ch = FF_CEIL_RSHIFT(h, test->vsub);
+        cw = AV_CEIL_RSHIFT(w, test->hsub), ch = AV_CEIL_RSHIFT(h, test->vsub);
     unsigned int frame = outlink->frame_count;
     enum test_type tt = test->test;
     int i;
diff --git a/libavfilter/vsrc_testsrc.c b/libavfilter/vsrc_testsrc.c
index 7f981bad..54d8b26e 100644
--- a/libavfilter/vsrc_testsrc.c
+++ b/libavfilter/vsrc_testsrc.c
@@ -30,7 +30,7 @@
  * rgbtestsrc is ported from MPlayer libmpcodecs/vf_rgbtest.c by
  * Michael Niedermayer.
  *
- * smptebars and smptehdbars are by Paul B Mahol.
+ * allyuv, smptebars and smptehdbars are by Paul B Mahol.
  */
 
 #include <float.h>
@@ -41,6 +41,7 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/xga_font_data.h"
 #include "avfilter.h"
 #include "drawutils.h"
 #include "formats.h"
@@ -614,7 +615,7 @@ static void test_fill_picture(AVFilterContext *ctx, AVFrame *frame)
     if (seg_size >= 1 && height >= 13 * seg_size) {
         int64_t p10decimals = 1;
         double time = av_q2d(test->time_base) * test->nb_frame *
-                      pow(10, test->nb_decimals);
+                      ff_exp10(test->nb_decimals);
         if (time >= INT_MAX)
             return;
 
@@ -679,6 +680,276 @@ AVFilter ff_vsrc_testsrc = {
 
 #endif /* CONFIG_TESTSRC_FILTER */
 
+#if CONFIG_TESTSRC2_FILTER
+
+static const AVOption testsrc2_options[] = {
+    COMMON_OPTIONS
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(testsrc2);
+
+static void set_color(TestSourceContext *s, FFDrawColor *color, uint32_t argb)
+{
+    uint8_t rgba[4] = { (argb >> 16) & 0xFF,
+                        (argb >>  8) & 0xFF,
+                        (argb >>  0) & 0xFF,
+                        (argb >> 24) & 0xFF, };
+    ff_draw_color(&s->draw, color, rgba);
+}
+
+static uint32_t color_gradient(unsigned index)
+{
+    unsigned si = index & 0xFF, sd = 0xFF - si;
+    switch (index >> 8) {
+    case 0: return 0xFF0000 + (si <<  8);
+    case 1: return 0x00FF00 + (sd << 16);
+    case 2: return 0x00FF00 + (si <<  0);
+    case 3: return 0x0000FF + (sd <<  8);
+    case 4: return 0x0000FF + (si << 16);
+    case 5: return 0xFF0000 + (sd <<  0);
+    }
+    av_assert0(0);
+}
+
+static void draw_text(TestSourceContext *s, AVFrame *frame, FFDrawColor *color,
+                      int x0, int y0, const uint8_t *text)
+{
+    int x = x0;
+
+    for (; *text; text++) {
+        if (*text == '\n') {
+            x = x0;
+            y0 += 16;
+            continue;
+        }
+        ff_blend_mask(&s->draw, color, frame->data, frame->linesize,
+                      frame->width, frame->height,
+                      avpriv_vga16_font + *text * 16, 1, 8, 16, 0, 0, x, y0);
+        x += 8;
+    }
+}
+
+static void test2_fill_picture(AVFilterContext *ctx, AVFrame *frame)
+{
+    TestSourceContext *s = ctx->priv;
+    FFDrawColor color;
+
+    /* colored background */
+    {
+        unsigned i, x = 0, x2;
+
+        x = 0;
+        for (i = 1; i < 7; i++) {
+            x2 = av_rescale(i, s->w, 6);
+            x2 = ff_draw_round_to_sub(&s->draw, 0, 0, x2);
+            set_color(s, &color, ((i & 1) ? 0xFF0000 : 0) |
+                                 ((i & 2) ? 0x00FF00 : 0) |
+                                 ((i & 4) ? 0x0000FF : 0));
+            ff_fill_rectangle(&s->draw, &color, frame->data, frame->linesize,
+                              x, 0, x2 - x, frame->height);
+            x = x2;
+        }
+    }
+
+    /* oblique gradient */
+    /* note: too slow if using blending */
+    if (s->h >= 64) {
+        unsigned x, dx, y0, y, g0, g;
+
+        dx = ff_draw_round_to_sub(&s->draw, 0, +1, 1);
+        y0 = av_rescale_q(s->pts, s->time_base, av_make_q(2, s->h - 16));
+        g0 = av_rescale_q(s->pts, s->time_base, av_make_q(1, 128));
+        for (x = 0; x < s->w; x += dx) {
+            g = (av_rescale(x, 6 * 256, s->w) + g0) % (6 * 256);
+            set_color(s, &color, color_gradient(g));
+            y = y0 + av_rescale(x, s->h / 2, s->w);
+            y %= 2 * (s->h - 16);
+            if (y > s->h - 16)
+                y = 2 * (s->h - 16) - y;
+            y = ff_draw_round_to_sub(&s->draw, 1, 0, y);
+            ff_fill_rectangle(&s->draw, &color, frame->data, frame->linesize,
+                              x, y, dx, 16);
+        }
+    }
+
+    /* top right: draw clock hands */
+    if (s->w >= 64 && s->h >= 64) {
+        int l = (FFMIN(s->w, s->h) - 32) >> 1;
+        int steps = FFMAX(4, l >> 5);
+        int xc = (s->w >> 2) + (s->w >> 1);
+        int yc = (s->h >> 2);
+        int cycle = l << 2;
+        int pos, xh, yh;
+        int c, i;
+
+        for (c = 0; c < 3; c++) {
+            set_color(s, &color, 0xBBBBBB ^ (0xFF << (c << 3)));
+            pos = av_rescale_q(s->pts, s->time_base, av_make_q(64 >> (c << 1), cycle)) % cycle;
+            xh = pos < 1 * l ? pos :
+                 pos < 2 * l ? l :
+                 pos < 3 * l ? 3 * l - pos : 0;
+            yh = pos < 1 * l ? 0 :
+                 pos < 2 * l ? pos - l :
+                 pos < 3 * l ? l :
+                               cycle - pos;
+            xh -= l >> 1;
+            yh -= l >> 1;
+            for (i = 1; i <= steps; i++) {
+                int x = av_rescale(xh, i, steps) + xc;
+                int y = av_rescale(yh, i, steps) + yc;
+                x = ff_draw_round_to_sub(&s->draw, 0, -1, x);
+                y = ff_draw_round_to_sub(&s->draw, 1, -1, y);
+                ff_fill_rectangle(&s->draw, &color, frame->data, frame->linesize,
+                                  x, y, 8, 8);
+            }
+        }
+    }
+
+    /* bottom left: beating rectangles */
+    if (s->w >= 64 && s->h >= 64) {
+        int l = (FFMIN(s->w, s->h) - 16) >> 2;
+        int cycle = l << 3;
+        int xc = (s->w >> 2);
+        int yc = (s->h >> 2) + (s->h >> 1);
+        int xm1 = ff_draw_round_to_sub(&s->draw, 0, -1, xc - 8);
+        int xm2 = ff_draw_round_to_sub(&s->draw, 0, +1, xc + 8);
+        int ym1 = ff_draw_round_to_sub(&s->draw, 1, -1, yc - 8);
+        int ym2 = ff_draw_round_to_sub(&s->draw, 1, +1, yc + 8);
+        int size, step, x1, x2, y1, y2;
+
+        size = av_rescale_q(s->pts, s->time_base, av_make_q(4, cycle));
+        step = size / l;
+        size %= l;
+        if (step & 1)
+            size = l - size;
+        step = (step >> 1) & 3;
+        set_color(s, &color, 0xFF808080);
+        x1 = ff_draw_round_to_sub(&s->draw, 0, -1, xc - 4 - size);
+        x2 = ff_draw_round_to_sub(&s->draw, 0, +1, xc + 4 + size);
+        y1 = ff_draw_round_to_sub(&s->draw, 1, -1, yc - 4 - size);
+        y2 = ff_draw_round_to_sub(&s->draw, 1, +1, yc + 4 + size);
+        if (step == 0 || step == 2)
+            ff_fill_rectangle(&s->draw, &color, frame->data, frame->linesize,
+                              x1, ym1, x2 - x1, ym2 - ym1);
+        if (step == 1 || step == 2)
+            ff_fill_rectangle(&s->draw, &color, frame->data, frame->linesize,
+                              xm1, y1, xm2 - xm1, y2 - y1);
+        if (step == 3)
+            ff_fill_rectangle(&s->draw, &color, frame->data, frame->linesize,
+                              x1, y1, x2 - x1, y2 - y1);
+    }
+
+    /* bottom right: checker with random noise */
+    {
+        unsigned xmin = av_rescale(5, s->w, 8);
+        unsigned xmax = av_rescale(7, s->w, 8);
+        unsigned ymin = av_rescale(5, s->h, 8);
+        unsigned ymax = av_rescale(7, s->h, 8);
+        unsigned x, y, i, r;
+        uint8_t alpha[256];
+
+        r = s->pts;
+        for (y = ymin; y < ymax - 15; y += 16) {
+            for (x = xmin; x < xmax - 15; x += 16) {
+                if ((x ^ y) & 16)
+                    continue;
+                for (i = 0; i < 256; i++) {
+                    r = r * 1664525 + 1013904223;
+                    alpha[i] = r >> 24;
+                }
+                set_color(s, &color, 0xFF00FF80);
+                ff_blend_mask(&s->draw, &color, frame->data, frame->linesize,
+                                   frame->width, frame->height,
+                                   alpha, 16, 16, 16, 3, 0, x, y);
+            }
+        }
+    }
+
+    /* bouncing square */
+    if (s->w >= 16 && s->h >= 16) {
+        unsigned w = s->w - 8;
+        unsigned h = s->h - 8;
+        unsigned x = av_rescale_q(s->pts, s->time_base, av_make_q(233, 55 * w)) % (w << 1);
+        unsigned y = av_rescale_q(s->pts, s->time_base, av_make_q(233, 89 * h)) % (h << 1);
+        if (x > w)
+            x = (w << 1) - x;
+        if (y > h)
+            y = (h << 1) - y;
+        x = ff_draw_round_to_sub(&s->draw, 0, -1, x);
+        y = ff_draw_round_to_sub(&s->draw, 1, -1, y);
+        set_color(s, &color, 0xFF8000FF);
+        ff_fill_rectangle(&s->draw, &color, frame->data, frame->linesize,
+                          x, y, 8, 8);
+    }
+
+    /* top right: draw frame time and frame number */
+    {
+        char buf[256];
+        unsigned time;
+
+        time = av_rescale_q(s->pts, s->time_base, av_make_q(1, 1000)) % 86400000;
+        set_color(s, &color, 0xC0000000);
+        ff_blend_rectangle(&s->draw, &color, frame->data, frame->linesize,
+                           frame->width, frame->height,
+                           2, 2, 100, 36);
+        set_color(s, &color, 0xFFFF8000);
+        snprintf(buf, sizeof(buf), "%02d:%02d:%02d.%03d\n%12"PRIi64,
+                 time / 3600000, (time / 60000) % 60, (time / 1000) % 60,
+                 time % 1000, s->pts);
+        draw_text(s, frame, &color, 4, 4, buf);
+    }
+}
+static av_cold int test2_init(AVFilterContext *ctx)
+{
+    TestSourceContext *s = ctx->priv;
+
+    s->fill_picture_fn = test2_fill_picture;
+    return init(ctx);
+}
+
+static int test2_query_formats(AVFilterContext *ctx)
+{
+    return ff_set_common_formats(ctx, ff_draw_supported_pixel_formats(0));
+}
+
+static int test2_config_props(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->src;
+    TestSourceContext *s = ctx->priv;
+
+    av_assert0(ff_draw_init(&s->draw, inlink->format, 0) >= 0);
+    s->w = ff_draw_round_to_sub(&s->draw, 0, -1, s->w);
+    s->h = ff_draw_round_to_sub(&s->draw, 1, -1, s->h);
+    if (av_image_check_size(s->w, s->h, 0, ctx) < 0)
+        return AVERROR(EINVAL);
+    return config_props(inlink);
+}
+
+static const AVFilterPad avfilter_vsrc_testsrc2_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .request_frame = request_frame,
+        .config_props  = test2_config_props,
+    },
+    { NULL }
+};
+
+AVFilter ff_vsrc_testsrc2 = {
+    .name          = "testsrc2",
+    .description   = NULL_IF_CONFIG_SMALL("Generate another test pattern."),
+    .priv_size     = sizeof(TestSourceContext),
+    .priv_class    = &testsrc2_class,
+    .init          = test2_init,
+    .uninit        = uninit,
+    .query_formats = test2_query_formats,
+    .inputs        = NULL,
+    .outputs       = avfilter_vsrc_testsrc2_outputs,
+};
+
+#endif /* CONFIG_TESTSRC2_FILTER */
+
 #if CONFIG_RGBTESTSRC_FILTER
 
 #define rgbtestsrc_options options
@@ -801,35 +1072,44 @@ AVFilter ff_vsrc_rgbtestsrc = {
 #if CONFIG_SMPTEBARS_FILTER || CONFIG_SMPTEHDBARS_FILTER
 
 static const uint8_t rainbow[7][4] = {
-    { 180, 128, 128, 255 },     /* gray */
-    { 168,  44, 136, 255 },     /* yellow */
-    { 145, 147,  44, 255 },     /* cyan */
-    { 133,  63,  52, 255 },     /* green */
-    {  63, 193, 204, 255 },     /* magenta */
-    {  51, 109, 212, 255 },     /* red */
-    {  28, 212, 120, 255 },     /* blue */
+    { 180, 128, 128, 255 },     /* 75% white */
+    { 161,  44, 141, 255 },     /* 75% yellow */
+    { 131, 156,  44, 255 },     /* 75% cyan */
+    { 112,  72,  57, 255 },     /* 75% green */
+    {  83, 183, 198, 255 },     /* 75% magenta */
+    {  65,  99, 212, 255 },     /* 75% red */
+    {  34, 212, 114, 255 },     /* 75% blue */
+};
+
+static const uint8_t rainbowhd[7][4] = {
+    { 180, 128, 128, 255 },     /* 75% white */
+    { 168,  44, 136, 255 },     /* 75% yellow */
+    { 145, 147,  44, 255 },     /* 75% cyan */
+    { 133,  63,  52, 255 },     /* 75% green */
+    {  63, 193, 204, 255 },     /* 75% magenta */
+    {  51, 109, 212, 255 },     /* 75% red */
+    {  28, 212, 120, 255 },     /* 75% blue */
 };
 
 static const uint8_t wobnair[7][4] = {
-    {  32, 240, 118, 255 },     /* blue */
+    {  34, 212, 114, 255 },     /* 75% blue */
     {  19, 128, 128, 255 },     /* 7.5% intensity black */
-    {  54, 184, 198, 255 },     /* magenta */
+    {  83, 183, 198, 255 },     /* 75% magenta */
     {  19, 128, 128, 255 },     /* 7.5% intensity black */
-    { 188, 154,  16, 255 },     /* cyan */
+    { 131, 156,  44, 255 },     /* 75% cyan */
     {  19, 128, 128, 255 },     /* 7.5% intensity black */
-    { 191, 128, 128, 255 },     /* gray */
+    { 180, 128, 128, 255 },     /* 75% white */
 };
 
 static const uint8_t white[4] = { 235, 128, 128, 255 };
-static const uint8_t black[4] = {  19, 128, 128, 255 }; /* 7.5% intensity black */
 
 /* pluge pulses */
-static const uint8_t neg4ire[4] = {  9, 128, 128, 255 }; /*  3.5% intensity black */
-static const uint8_t pos4ire[4] = { 29, 128, 128, 255 }; /* 11.5% intensity black */
+static const uint8_t neg4ire[4] = {  7, 128, 128, 255 };
+static const uint8_t pos4ire[4] = { 24, 128, 128, 255 };
 
 /* fudged Q/-I */
-static const uint8_t i_pixel[4] = { 61, 153,  99, 255 };
-static const uint8_t q_pixel[4] = { 35, 174, 152, 255 };
+static const uint8_t i_pixel[4] = { 57, 156,  97, 255 };
+static const uint8_t q_pixel[4] = { 44, 171, 147, 255 };
 
 static const uint8_t gray40[4] = { 104, 128, 128, 255 };
 static const uint8_t gray15[4] = {  49, 128, 128, 255 };
@@ -843,7 +1123,7 @@ static const uint8_t black4[4] = {  25, 128, 128, 255 };
 static const uint8_t   neg2[4] = {  12, 128, 128, 255 };
 
 static void draw_bar(TestSourceContext *test, const uint8_t color[4],
-                     unsigned x, unsigned y, unsigned w, unsigned h,
+                     int x, int y, int w, int h,
                      AVFrame *frame)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
@@ -865,9 +1145,9 @@ static void draw_bar(TestSourceContext *test, const uint8_t color[4],
 
         if (plane == 1 || plane == 2) {
             px = x >> desc->log2_chroma_w;
-            pw = w >> desc->log2_chroma_w;
+            pw = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
             py = y >> desc->log2_chroma_h;
-            ph = h >> desc->log2_chroma_h;
+            ph = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
         } else {
             px = x;
             pw = w;
@@ -940,16 +1220,16 @@ static void smptebars_fill_picture(AVFilterContext *ctx, AVFrame *picref)
     draw_bar(test, q_pixel, x, r_h + w_h, p_w, p_h, picref);
     x += p_w;
     tmp = FFALIGN(5 * r_w - x,  1 << pixdesc->log2_chroma_w);
-    draw_bar(test, black, x, r_h + w_h, tmp, p_h, picref);
+    draw_bar(test, black0, x, r_h + w_h, tmp, p_h, picref);
     x += tmp;
     tmp = FFALIGN(r_w / 3,  1 << pixdesc->log2_chroma_w);
     draw_bar(test, neg4ire, x, r_h + w_h, tmp, p_h, picref);
     x += tmp;
-    draw_bar(test, black, x, r_h + w_h, tmp, p_h, picref);
+    draw_bar(test, black0, x, r_h + w_h, tmp, p_h, picref);
     x += tmp;
     draw_bar(test, pos4ire, x, r_h + w_h, tmp, p_h, picref);
     x += tmp;
-    draw_bar(test, black, x, r_h + w_h, test->w - x, p_h, picref);
+    draw_bar(test, black0, x, r_h + w_h, test->w - x, p_h, picref);
 }
 
 static av_cold int smptebars_init(AVFilterContext *ctx)
@@ -995,7 +1275,7 @@ static void smptehdbars_fill_picture(AVFilterContext *ctx, AVFrame *picref)
 
     r_w = FFALIGN((((test->w + 3) / 4) * 3) / 7, 1 << pixdesc->log2_chroma_w);
     for (i = 0; i < 7; i++) {
-        draw_bar(test, rainbow[i], x, 0, r_w, r_h, picref);
+        draw_bar(test, rainbowhd[i], x, 0, r_w, r_h, picref);
         x += r_w;
     }
     draw_bar(test, gray40, x, 0, test->w - x, r_h, picref);
@@ -1006,7 +1286,7 @@ static void smptehdbars_fill_picture(AVFilterContext *ctx, AVFrame *picref)
     draw_bar(test, i_pixel, x, y, r_w, r_h, picref);
     x += r_w;
     tmp = r_w * 6;
-    draw_bar(test, rainbow[0], x, y, tmp, r_h, picref);
+    draw_bar(test, rainbowhd[0], x, y, tmp, r_h, picref);
     x += tmp;
     l_w = x;
     draw_bar(test, blue, x, y, test->w - x, r_h, picref);
@@ -1080,3 +1360,165 @@ AVFilter ff_vsrc_smptehdbars = {
 
 #endif  /* CONFIG_SMPTEHDBARS_FILTER */
 #endif  /* CONFIG_SMPTEBARS_FILTER || CONFIG_SMPTEHDBARS_FILTER */
+
+#if CONFIG_ALLYUV_FILTER
+
+static const AVOption allyuv_options[] = {
+    COMMON_OPTIONS_NOSIZE
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(allyuv);
+
+static void allyuv_fill_picture(AVFilterContext *ctx, AVFrame *frame)
+{
+    const int ys = frame->linesize[0];
+    const int us = frame->linesize[1];
+    const int vs = frame->linesize[2];
+    int x, y, j;
+
+    for (y = 0; y < 4096; y++) {
+        for (x = 0; x < 2048; x++) {
+            frame->data[0][y * ys + x] = ((x / 8) % 256);
+            frame->data[0][y * ys + 4095 - x] = ((x / 8) % 256);
+        }
+
+        for (x = 0; x < 2048; x+=8) {
+            for (j = 0; j < 8; j++) {
+                frame->data[1][vs * y + x + j]        = (y%16 + (j % 8) * 16);
+                frame->data[1][vs * y + 4095 - x - j] = (128 + y%16 + (j % 8) * 16);
+            }
+        }
+
+        for (x = 0; x < 4096; x++)
+            frame->data[2][y * us + x] = 256 * y / 4096;
+    }
+}
+
+static av_cold int allyuv_init(AVFilterContext *ctx)
+{
+    TestSourceContext *test = ctx->priv;
+
+    test->w = test->h = 4096;
+    test->draw_once = 1;
+    test->fill_picture_fn = allyuv_fill_picture;
+    return init(ctx);
+}
+
+static int allyuv_query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_GBRP,
+        AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static const AVFilterPad avfilter_vsrc_allyuv_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .request_frame = request_frame,
+        .config_props  = config_props,
+    },
+    { NULL }
+};
+
+AVFilter ff_vsrc_allyuv = {
+    .name          = "allyuv",
+    .description   = NULL_IF_CONFIG_SMALL("Generate all yuv colors."),
+    .priv_size     = sizeof(TestSourceContext),
+    .priv_class    = &allyuv_class,
+    .init          = allyuv_init,
+    .uninit        = uninit,
+    .query_formats = allyuv_query_formats,
+    .inputs        = NULL,
+    .outputs       = avfilter_vsrc_allyuv_outputs,
+};
+
+#endif /* CONFIG_ALLYUV_FILTER */
+
+#if CONFIG_ALLRGB_FILTER
+
+static const AVOption allrgb_options[] = {
+    COMMON_OPTIONS_NOSIZE
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(allrgb);
+
+static void allrgb_fill_picture(AVFilterContext *ctx, AVFrame *frame)
+{
+    unsigned x, y;
+    const int linesize = frame->linesize[0];
+    uint8_t *line = frame->data[0];
+
+    for (y = 0; y < 4096; y++) {
+        uint8_t *dst = line;
+
+        for (x = 0; x < 4096; x++) {
+            *dst++ = x;
+            *dst++ = y;
+            *dst++ = (x >> 8) | ((y >> 8) << 4);
+        }
+        line += linesize;
+    }
+}
+
+static av_cold int allrgb_init(AVFilterContext *ctx)
+{
+    TestSourceContext *test = ctx->priv;
+
+    test->w = test->h = 4096;
+    test->draw_once = 1;
+    test->fill_picture_fn = allrgb_fill_picture;
+    return init(ctx);
+}
+
+static int allrgb_config_props(AVFilterLink *outlink)
+{
+    TestSourceContext *test = outlink->src->priv;
+
+    ff_fill_rgba_map(test->rgba_map, outlink->format);
+    return config_props(outlink);
+}
+
+static int allrgb_query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE
+    };
+
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list)
+        return AVERROR(ENOMEM);
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static const AVFilterPad avfilter_vsrc_allrgb_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .request_frame = request_frame,
+        .config_props  = allrgb_config_props,
+    },
+    { NULL }
+};
+
+AVFilter ff_vsrc_allrgb = {
+    .name          = "allrgb",
+    .description   = NULL_IF_CONFIG_SMALL("Generate all RGB colors."),
+    .priv_size     = sizeof(TestSourceContext),
+    .priv_class    = &allrgb_class,
+    .init          = allrgb_init,
+    .uninit        = uninit,
+    .query_formats = allrgb_query_formats,
+    .inputs        = NULL,
+    .outputs       = avfilter_vsrc_allrgb_outputs,
+};
+
+#endif /* CONFIG_ALLRGB_FILTER */
diff --git a/libavfilter/w3fdif.h b/libavfilter/w3fdif.h
new file mode 100644
index 00000000..67bb8746
--- /dev/null
+++ b/libavfilter/w3fdif.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_W3FDIF_H
+#define AVFILTER_W3FDIF_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct W3FDIFDSPContext {
+    void (*filter_simple_low)(int32_t *work_line,
+                              uint8_t *in_lines_cur[2],
+                              const int16_t *coef, int linesize);
+    void (*filter_complex_low)(int32_t *work_line,
+                               uint8_t *in_lines_cur[4],
+                               const int16_t *coef, int linesize);
+    void (*filter_simple_high)(int32_t *work_line,
+                               uint8_t *in_lines_cur[3],
+                               uint8_t *in_lines_adj[3],
+                               const int16_t *coef, int linesize);
+    void (*filter_complex_high)(int32_t *work_line,
+                                uint8_t *in_lines_cur[5],
+                                uint8_t *in_lines_adj[5],
+                                const int16_t *coef, int linesize);
+    void (*filter_scale)(uint8_t *out_pixel, const int32_t *work_pixel, int linesize);
+} W3FDIFDSPContext;
+
+void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp);
+
+#endif /* AVFILTER_W3FDIF_H */
diff --git a/libavfilter/window_func.c b/libavfilter/window_func.c
new file mode 100644
index 00000000..9c6202aa
--- /dev/null
+++ b/libavfilter/window_func.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <math.h>
+
+#include "libavutil/avassert.h"
+#include "window_func.h"
+
+void ff_generate_window_func(float *lut, int N, int win_func, float *overlap)
+{
+    int n;
+
+    switch (win_func) {
+    case WFUNC_RECT:
+        for (n = 0; n < N; n++)
+            lut[n] = 1.;
+        *overlap = 0.;
+        break;
+    case WFUNC_BARTLETT:
+        for (n = 0; n < N; n++)
+            lut[n] = 1.-fabs((n-(N-1)/2.)/((N-1)/2.));
+        *overlap = 0.5;
+        break;
+    case WFUNC_HANNING:
+        for (n = 0; n < N; n++)
+            lut[n] = .5*(1-cos(2*M_PI*n/(N-1)));
+        *overlap = 0.5;
+        break;
+    case WFUNC_HAMMING:
+        for (n = 0; n < N; n++)
+            lut[n] = .54-.46*cos(2*M_PI*n/(N-1));
+        *overlap = 0.5;
+        break;
+    case WFUNC_BLACKMAN:
+        for (n = 0; n < N; n++)
+            lut[n] = .42659-.49656*cos(2*M_PI*n/(N-1))+.076849*cos(4*M_PI*n/(N-1));
+        *overlap = 0.661;
+        break;
+    case WFUNC_WELCH:
+        for (n = 0; n < N; n++)
+            lut[n] = 1.-(n-(N-1)/2.)/((N-1)/2.)*(n-(N-1)/2.)/((N-1)/2.);
+        *overlap = 0.293;
+        break;
+    case WFUNC_FLATTOP:
+        for (n = 0; n < N; n++)
+            lut[n] = 1.-1.985844164102*cos( 2*M_PI*n/(N-1))+1.791176438506*cos( 4*M_PI*n/(N-1))-
+                        1.282075284005*cos( 6*M_PI*n/(N-1))+0.667777530266*cos( 8*M_PI*n/(N-1))-
+                        0.240160796576*cos(10*M_PI*n/(N-1))+0.056656381764*cos(12*M_PI*n/(N-1))-
+                        0.008134974479*cos(14*M_PI*n/(N-1))+0.000624544650*cos(16*M_PI*n/(N-1))-
+                        0.000019808998*cos(18*M_PI*n/(N-1))+0.000000132974*cos(20*M_PI*n/(N-1));
+        *overlap = 0.841;
+        break;
+    case WFUNC_BHARRIS:
+        for (n = 0; n < N; n++)
+            lut[n] = 0.35875-0.48829*cos(2*M_PI*n/(N-1))+0.14128*cos(4*M_PI*n/(N-1))-0.01168*cos(6*M_PI*n/(N-1));
+        *overlap = 0.661;
+        break;
+    case WFUNC_BNUTTALL:
+        for (n = 0; n < N; n++)
+            lut[n] = 0.3635819-0.4891775*cos(2*M_PI*n/(N-1))+0.1365995*cos(4*M_PI*n/(N-1))-0.0106411*cos(6*M_PI*n/(N-1));
+        *overlap = 0.661;
+        break;
+    case WFUNC_BHANN:
+        for (n = 0; n < N; n++)
+            lut[n] = 0.62-0.48*fabs(n/(double)(N-1)-.5)-0.38*cos(2*M_PI*n/(N-1));
+        *overlap = 0.5;
+        break;
+    case WFUNC_SINE:
+        for (n = 0; n < N; n++)
+            lut[n] = sin(M_PI*n/(N-1));
+        *overlap = 0.75;
+        break;
+    case WFUNC_NUTTALL:
+        for (n = 0; n < N; n++)
+            lut[n] = 0.355768-0.487396*cos(2*M_PI*n/(N-1))+0.144232*cos(4*M_PI*n/(N-1))-0.012604*cos(6*M_PI*n/(N-1));
+        *overlap = 0.663;
+        break;
+    case WFUNC_LANCZOS:
+#define SINC(x) (!(x)) ? 1 : sin(M_PI * (x))/(M_PI * (x));
+        for (n = 0; n < N; n++)
+            lut[n] = SINC((2.*n)/(N-1)-1);
+        *overlap = 0.75;
+        break;
+    case WFUNC_GAUSS:
+#define SQR(x) ((x)*(x))
+        for (n = 0; n < N; n++)
+            lut[n] = exp(-0.5 * SQR((n-(N-1)/2)/(0.4*(N-1)/2.f)));
+        *overlap = 0.75;
+        break;
+    case WFUNC_TUKEY:
+        for (n = 0; n < N; n++) {
+            float M = (N-1)/2.;
+
+            if (FFABS(n - M) >= 0.3 * M) {
+                lut[n] = 0.5 * (1 + cos((M_PI*(FFABS(n - M) - 0.3 * M))/((1 - 0.3) * M)));
+            } else {
+                lut[n] = 1;
+            }
+        }
+        *overlap = 0.33;
+        break;
+    default:
+        av_assert0(0);
+    }
+}
diff --git a/libavfilter/window_func.h b/libavfilter/window_func.h
new file mode 100644
index 00000000..3da09d8d
--- /dev/null
+++ b/libavfilter/window_func.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#ifndef AVFILTER_WINDOW_FUNC_H
+#define AVFILTER_WINDOW_FUNC_H
+
+enum WindowFunc     { WFUNC_RECT, WFUNC_HANNING, WFUNC_HAMMING, WFUNC_BLACKMAN,
+                      WFUNC_BARTLETT, WFUNC_WELCH, WFUNC_FLATTOP,
+                      WFUNC_BHARRIS, WFUNC_BNUTTALL, WFUNC_SINE, WFUNC_NUTTALL,
+                      WFUNC_BHANN, WFUNC_LANCZOS, WFUNC_GAUSS, WFUNC_TUKEY, NB_WFUNC };
+
+void ff_generate_window_func(float *lut, int N, int win_func, float *overlap);
+
+#endif /* AVFILTER_WINDOW_FUNC_H */
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 49f45b63..33de380b 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -1,24 +1,42 @@
+OBJS-$(CONFIG_BLEND_FILTER)                  += x86/vf_blend_init.o
 OBJS-$(CONFIG_EQ_FILTER)                     += x86/vf_eq.o
 OBJS-$(CONFIG_FSPP_FILTER)                   += x86/vf_fspp_init.o
 OBJS-$(CONFIG_GRADFUN_FILTER)                += x86/vf_gradfun_init.o
 OBJS-$(CONFIG_HQDN3D_FILTER)                 += x86/vf_hqdn3d_init.o
 OBJS-$(CONFIG_IDET_FILTER)                   += x86/vf_idet_init.o
 OBJS-$(CONFIG_INTERLACE_FILTER)              += x86/vf_interlace_init.o
+OBJS-$(CONFIG_MASKEDMERGE_FILTER)            += x86/vf_maskedmerge_init.o
 OBJS-$(CONFIG_NOISE_FILTER)                  += x86/vf_noise.o
 OBJS-$(CONFIG_PP7_FILTER)                    += x86/vf_pp7_init.o
+OBJS-$(CONFIG_PSNR_FILTER)                   += x86/vf_psnr_init.o
 OBJS-$(CONFIG_PULLUP_FILTER)                 += x86/vf_pullup_init.o
+OBJS-$(CONFIG_REMOVEGRAIN_FILTER)            += x86/vf_removegrain_init.o
 OBJS-$(CONFIG_SPP_FILTER)                    += x86/vf_spp.o
+OBJS-$(CONFIG_SSIM_FILTER)                   += x86/vf_ssim_init.o
+OBJS-$(CONFIG_STEREO3D_FILTER)               += x86/vf_stereo3d_init.o
+OBJS-$(CONFIG_TBLEND_FILTER)                 += x86/vf_blend_init.o
 OBJS-$(CONFIG_TINTERLACE_FILTER)             += x86/vf_tinterlace_init.o
 OBJS-$(CONFIG_VOLUME_FILTER)                 += x86/af_volume_init.o
+OBJS-$(CONFIG_W3FDIF_FILTER)                 += x86/vf_w3fdif_init.o
 OBJS-$(CONFIG_YADIF_FILTER)                  += x86/vf_yadif_init.o
 
+YASM-OBJS-$(CONFIG_BLEND_FILTER)             += x86/vf_blend.o
 YASM-OBJS-$(CONFIG_FSPP_FILTER)              += x86/vf_fspp.o
 YASM-OBJS-$(CONFIG_GRADFUN_FILTER)           += x86/vf_gradfun.o
 YASM-OBJS-$(CONFIG_HQDN3D_FILTER)            += x86/vf_hqdn3d.o
 YASM-OBJS-$(CONFIG_IDET_FILTER)              += x86/vf_idet.o
 YASM-OBJS-$(CONFIG_INTERLACE_FILTER)         += x86/vf_interlace.o
+YASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER)       += x86/vf_maskedmerge.o
 YASM-OBJS-$(CONFIG_PP7_FILTER)               += x86/vf_pp7.o
+YASM-OBJS-$(CONFIG_PSNR_FILTER)              += x86/vf_psnr.o
 YASM-OBJS-$(CONFIG_PULLUP_FILTER)            += x86/vf_pullup.o
+ifdef CONFIG_GPL
+YASM-OBJS-$(CONFIG_REMOVEGRAIN_FILTER)       += x86/vf_removegrain.o
+endif
+YASM-OBJS-$(CONFIG_SSIM_FILTER)              += x86/vf_ssim.o
+YASM-OBJS-$(CONFIG_STEREO3D_FILTER)          += x86/vf_stereo3d.o
+YASM-OBJS-$(CONFIG_TBLEND_FILTER)            += x86/vf_blend.o
 YASM-OBJS-$(CONFIG_TINTERLACE_FILTER)        += x86/vf_interlace.o
 YASM-OBJS-$(CONFIG_VOLUME_FILTER)            += x86/af_volume.o
+YASM-OBJS-$(CONFIG_W3FDIF_FILTER)            += x86/vf_w3fdif.o
 YASM-OBJS-$(CONFIG_YADIF_FILTER)             += x86/vf_yadif.o x86/yadif-16.o x86/yadif-10.o
diff --git a/libavfilter/x86/af_volume.asm b/libavfilter/x86/af_volume.asm
index f4cbcbc5..723ab1f8 100644
--- a/libavfilter/x86/af_volume.asm
+++ b/libavfilter/x86/af_volume.asm
@@ -29,7 +29,7 @@ pw_1:         times 8 dw 1
 pw_128:       times 8 dw 128
 pq_128:       times 2 dq 128
 
-SECTION_TEXT
+SECTION .text
 
 ;------------------------------------------------------------------------------
 ; void ff_scale_samples_s16(uint8_t *dst, const uint8_t *src, int len,
diff --git a/libavfilter/x86/vf_blend.asm b/libavfilter/x86/vf_blend.asm
new file mode 100644
index 00000000..47471aaa
--- /dev/null
+++ b/libavfilter/x86/vf_blend.asm
@@ -0,0 +1,286 @@
+;*****************************************************************************
+;* x86-optimized functions for blend filter
+;*
+;* Copyright (C) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pw_1:   times 8 dw 1
+pw_128: times 8 dw 128
+pw_255: times 8 dw 255
+pb_127: times 16 db 127
+pb_128: times 16 db 128
+pb_255: times 16 db 255
+
+SECTION .text
+
+%macro BLEND_INIT 2
+%if ARCH_X86_64
+cglobal blend_%1, 6, 9, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, end, x
+    mov    widthd, dword widthm
+%else
+cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end, x
+%define dst_linesizeq r5mp
+%define widthq r6mp
+%endif
+    mov      endd, dword r7m
+    add      topq, widthq
+    add   bottomq, widthq
+    add      dstq, widthq
+    neg    widthq
+%endmacro
+
+%macro BLEND_END 0
+    add          topq, top_linesizeq
+    add       bottomq, bottom_linesizeq
+    add          dstq, dst_linesizeq
+    sub          endd, 1
+    jg .nextrow
+REP_RET
+%endmacro
+
+%macro BLEND_SIMPLE 2
+BLEND_INIT %1, 2
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movu            m0, [topq + xq]
+        movu            m1, [bottomq + xq]
+        p%2             m0, m1
+        mova   [dstq + xq], m0
+        add             xq, mmsize
+    jl .loop
+BLEND_END
+%endmacro
+
+INIT_XMM sse2
+BLEND_SIMPLE xor,      xor
+BLEND_SIMPLE or,       or
+BLEND_SIMPLE and,      and
+BLEND_SIMPLE addition, addusb
+BLEND_SIMPLE subtract, subusb
+BLEND_SIMPLE darken,   minub
+BLEND_SIMPLE lighten,  maxub
+
+BLEND_INIT difference128, 4
+    pxor       m2, m2
+    mova       m3, [pw_128]
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movh            m0, [topq + xq]
+        movh            m1, [bottomq + xq]
+        punpcklbw       m0, m2
+        punpcklbw       m1, m2
+        paddw           m0, m3
+        psubw           m0, m1
+        packuswb        m0, m0
+        movh   [dstq + xq], m0
+        add             xq, mmsize / 2
+    jl .loop
+BLEND_END
+
+%macro MULTIPLY 3 ; a, b, pw_1
+    pmullw          %1, %2               ; xxxxxxxx  a * b
+    paddw           %1, %3
+    mova            %2, %1
+    psrlw           %2, 8
+    paddw           %1, %2
+    psrlw           %1, 8                ; 00xx00xx  a * b / 255
+%endmacro
+
+%macro SCREEN 4   ; a, b, pw_1, pw_255
+    pxor            %1, %4               ; 00xx00xx  255 - a
+    pxor            %2, %4
+    MULTIPLY        %1, %2, %3
+    pxor            %1, %4               ; 00xx00xx  255 - x / 255
+%endmacro
+
+BLEND_INIT multiply, 4
+    pxor       m2, m2
+    mova       m3, [pw_1]
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+                                             ;     word
+                                             ;     |--|
+        movh            m0, [topq + xq]      ; 0000xxxx
+        movh            m1, [bottomq + xq]
+        punpcklbw       m0, m2               ; 00xx00xx
+        punpcklbw       m1, m2
+
+        MULTIPLY        m0, m1, m3
+
+        packuswb        m0, m0               ; 0000xxxx
+        movh   [dstq + xq], m0
+        add             xq, mmsize / 2
+
+    jl .loop
+BLEND_END
+
+BLEND_INIT screen, 5
+    pxor       m2, m2
+    mova       m3, [pw_1]
+    mova       m4, [pw_255]
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movh            m0, [topq + xq]      ; 0000xxxx
+        movh            m1, [bottomq + xq]
+        punpcklbw       m0, m2               ; 00xx00xx
+        punpcklbw       m1, m2
+
+        SCREEN          m0, m1, m3, m4
+
+        packuswb        m0, m0               ; 0000xxxx
+        movh   [dstq + xq], m0
+        add             xq, mmsize / 2
+
+    jl .loop
+BLEND_END
+
+BLEND_INIT average, 3
+    pxor       m2, m2
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movh            m0, [topq + xq]
+        movh            m1, [bottomq + xq]
+        punpcklbw       m0, m2
+        punpcklbw       m1, m2
+        paddw           m0, m1
+        psrlw           m0, 1
+        packuswb        m0, m0
+        movh   [dstq + xq], m0
+        add             xq, mmsize / 2
+    jl .loop
+BLEND_END
+
+BLEND_INIT addition128, 4
+    pxor       m2, m2
+    mova       m3, [pw_128]
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movh            m0, [topq + xq]
+        movh            m1, [bottomq + xq]
+        punpcklbw       m0, m2
+        punpcklbw       m1, m2
+        paddw           m0, m1
+        psubw           m0, m3
+        packuswb        m0, m0
+        movh   [dstq + xq], m0
+        add             xq, mmsize / 2
+    jl .loop
+BLEND_END
+
+BLEND_INIT hardmix, 5
+    mova       m2, [pb_255]
+    mova       m3, [pb_128]
+    mova       m4, [pb_127]
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movu            m0, [topq + xq]
+        movu            m1, [bottomq + xq]
+        pxor            m1, m4
+        pxor            m0, m3
+        pcmpgtb         m1, m0
+        pxor            m1, m2
+        mova   [dstq + xq], m1
+        add             xq, mmsize
+    jl .loop
+BLEND_END
+
+BLEND_INIT phoenix, 4
+    mova       m3, [pb_255]
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movu            m0, [topq + xq]
+        movu            m1, [bottomq + xq]
+        mova            m2, m0
+        pminub          m0, m1
+        pmaxub          m1, m2
+        mova            m2, m3
+        psubusb         m2, m1
+        paddusb         m2, m0
+        mova   [dstq + xq], m2
+        add             xq, mmsize
+    jl .loop
+BLEND_END
+
+%macro BLEND_ABS 0
+BLEND_INIT difference, 3
+    pxor       m2, m2
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movh            m0, [topq + xq]
+        movh            m1, [bottomq + xq]
+        punpcklbw       m0, m2
+        punpcklbw       m1, m2
+        psubw           m0, m1
+        ABS1            m0, m1
+        packuswb        m0, m0
+        movh   [dstq + xq], m0
+        add             xq, mmsize / 2
+    jl .loop
+BLEND_END
+
+BLEND_INIT negation, 5
+    pxor       m2, m2
+    mova       m4, [pw_255]
+.nextrow:
+    mov        xq, widthq
+
+    .loop:
+        movh            m0, [topq + xq]
+        movh            m1, [bottomq + xq]
+        punpcklbw       m0, m2
+        punpcklbw       m1, m2
+        mova            m3, m4
+        psubw           m3, m0
+        psubw           m3, m1
+        ABS1            m3, m1
+        mova            m0, m4
+        psubw           m0, m3
+        packuswb        m0, m0
+        movh   [dstq + xq], m0
+        add             xq, mmsize / 2
+    jl .loop
+BLEND_END
+%endmacro
+
+INIT_XMM sse2
+BLEND_ABS
+INIT_XMM ssse3
+BLEND_ABS
diff --git a/libavfilter/x86/vf_blend_init.c b/libavfilter/x86/vf_blend_init.c
new file mode 100644
index 00000000..555e1e54
--- /dev/null
+++ b/libavfilter/x86/vf_blend_init.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/blend.h"
+
+#define BLEND_FUNC(name, opt) \
+void ff_blend_##name##_##opt(const uint8_t *top, ptrdiff_t top_linesize,       \
+                             const uint8_t *bottom, ptrdiff_t bottom_linesize, \
+                             uint8_t *dst, ptrdiff_t dst_linesize,             \
+                             ptrdiff_t width, ptrdiff_t height,                \
+                             struct FilterParams *param, double *values);
+
+BLEND_FUNC(addition, sse2)
+BLEND_FUNC(addition128, sse2)
+BLEND_FUNC(average, sse2)
+BLEND_FUNC(and, sse2)
+BLEND_FUNC(darken, sse2)
+BLEND_FUNC(difference128, sse2)
+BLEND_FUNC(multiply, sse2)
+BLEND_FUNC(screen, sse2)
+BLEND_FUNC(hardmix, sse2)
+BLEND_FUNC(lighten, sse2)
+BLEND_FUNC(or, sse2)
+BLEND_FUNC(phoenix, sse2)
+BLEND_FUNC(subtract, sse2)
+BLEND_FUNC(xor, sse2)
+BLEND_FUNC(difference, sse2)
+BLEND_FUNC(difference, ssse3)
+BLEND_FUNC(negation, sse2)
+BLEND_FUNC(negation, ssse3)
+
+av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1 && !is_16bit) {
+        switch (param->mode) {
+        case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break;
+        case BLEND_ADDITION128: param->blend = ff_blend_addition128_sse2; break;
+        case BLEND_AND:      param->blend = ff_blend_and_sse2;      break;
+        case BLEND_AVERAGE:  param->blend = ff_blend_average_sse2;  break;
+        case BLEND_DARKEN:   param->blend = ff_blend_darken_sse2;   break;
+        case BLEND_DIFFERENCE128: param->blend = ff_blend_difference128_sse2; break;
+        case BLEND_HARDMIX:  param->blend = ff_blend_hardmix_sse2;  break;
+        case BLEND_LIGHTEN:  param->blend = ff_blend_lighten_sse2;  break;
+        case BLEND_MULTIPLY: param->blend = ff_blend_multiply_sse2; break;
+        case BLEND_OR:       param->blend = ff_blend_or_sse2;       break;
+        case BLEND_PHOENIX:  param->blend = ff_blend_phoenix_sse2;  break;
+        case BLEND_SCREEN:   param->blend = ff_blend_screen_sse2; break;
+        case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
+        case BLEND_XOR:      param->blend = ff_blend_xor_sse2;      break;
+        case BLEND_DIFFERENCE: param->blend = ff_blend_difference_sse2; break;
+        case BLEND_NEGATION:   param->blend = ff_blend_negation_sse2;   break;
+        }
+    }
+    if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1 && !is_16bit) {
+        switch (param->mode) {
+        case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
+        case BLEND_NEGATION:   param->blend = ff_blend_negation_ssse3;   break;
+        }
+    }
+}
diff --git a/libavfilter/x86/vf_idet.asm b/libavfilter/x86/vf_idet.asm
index 007e63de..9596abd7 100644
--- a/libavfilter/x86/vf_idet.asm
+++ b/libavfilter/x86/vf_idet.asm
@@ -23,7 +23,7 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_TEXT
+SECTION .text
 
 ; Implementation that does 8-bytes at a time using single-word operations.
 %macro IDET_FILTER_LINE 1
diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
index ce3dd813..f70c7009 100644
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -37,7 +37,7 @@ cglobal lowpass_line, 5, 5, 7
 
     pcmpeqb m6, m6
 
-.loop
+.loop:
     mova m0, [r3+r1]
     mova m1, [r3+r1+mmsize]
     pavgb m0, [r4+r1]
diff --git a/libavfilter/x86/vf_maskedmerge.asm b/libavfilter/x86/vf_maskedmerge.asm
new file mode 100644
index 00000000..7e61935b
--- /dev/null
+++ b/libavfilter/x86/vf_maskedmerge.asm
@@ -0,0 +1,81 @@
+;*****************************************************************************
+;* x86-optimized functions for maskedmerge filter
+;*
+;* Copyright (C) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pw_128: times 8 dw 128
+pw_256: times 8 dw 256
+
+SECTION .text
+
+INIT_XMM sse2
+%if ARCH_X86_64
+cglobal maskedmerge8, 8, 11, 7, bsrc, osrc, msrc, dst, blinesize, olinesize, mlinesize, dlinesize, w, h, x
+    mov         wd, dword wm
+    mov         hd, dword hm
+%else
+cglobal maskedmerge8, 5, 7, 7, bsrc, osrc, msrc, dst, blinesize, w, x
+    mov         wd, r8m
+%define olinesizeq r5mp
+%define mlinesizeq r6mp
+%define dlinesizeq r7mp
+%define hd r9mp
+%endif
+    mova        m4, [pw_256]
+    mova        m5, [pw_128]
+    pxor        m6, m6
+    add      bsrcq, wq
+    add      osrcq, wq
+    add      msrcq, wq
+    add       dstq, wq
+    neg         wq
+.nextrow:
+    mov         xq, wq
+
+    .loop:
+        movh            m0, [bsrcq + xq]
+        movh            m1, [osrcq + xq]
+        movh            m3, [msrcq + xq]
+        mova            m2, m4
+        punpcklbw       m0, m6
+        punpcklbw       m1, m6
+        punpcklbw       m3, m6
+        psubw           m2, m3
+        pmullw          m2, m0
+        pmullw          m1, m3
+        paddw           m1, m2
+        paddw           m1, m5
+        psrlw           m1, 8
+        packuswb        m1, m1
+        movh   [dstq + xq], m1
+        add             xq, mmsize / 2
+    jl .loop
+
+    add         bsrcq, blinesizeq
+    add         osrcq, olinesizeq
+    add         msrcq, mlinesizeq
+    add          dstq, dlinesizeq
+    sub         hd, 1
+    jg .nextrow
+REP_RET
diff --git a/libavfilter/x86/vf_maskedmerge_init.c b/libavfilter/x86/vf_maskedmerge_init.c
new file mode 100644
index 00000000..73ab8880
--- /dev/null
+++ b/libavfilter/x86/vf_maskedmerge_init.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/maskedmerge.h"
+
+void ff_maskedmerge8_sse2(const uint8_t *bsrc, const uint8_t *osrc,
+                          const uint8_t *msrc, uint8_t *dst,
+                          ptrdiff_t blinesize, ptrdiff_t olinesize,
+                          ptrdiff_t mlinesize, ptrdiff_t dlinesize,
+                          int w, int h,
+                          int half, int shift);
+
+av_cold void ff_maskedmerge_init_x86(MaskedMergeContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags) && s->depth == 8) {
+        s->maskedmerge = ff_maskedmerge8_sse2;
+    }
+}
diff --git a/libavfilter/x86/vf_psnr.asm b/libavfilter/x86/vf_psnr.asm
new file mode 100644
index 00000000..11eb81a2
--- /dev/null
+++ b/libavfilter/x86/vf_psnr.asm
@@ -0,0 +1,140 @@
+;*****************************************************************************
+;* x86-optimized functions for psnr filter
+;*
+;* Copyright (C) 2015 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+%macro SSE_LINE_FN 2 ; 8 or 16, byte or word
+INIT_XMM sse2
+%if ARCH_X86_32
+%if %1 == 8
+cglobal sse_line_%1 %+ bit, 0, 6, 8, res, buf, w, px1, px2, ref
+%else
+cglobal sse_line_%1 %+ bit, 0, 7, 8, res, buf, reshigh, w, px1, px2, ref
+%endif
+    mov       bufq, r0mp
+    mov       refq, r1mp
+    mov         wd, r2m
+%else
+cglobal sse_line_%1 %+ bit, 3, 5, 8, buf, ref, w, px1, px2
+%endif
+    pxor        m6, m6
+    pxor        m7, m7
+    sub         wd, mmsize*2
+    jl .end
+
+.loop:
+    movu        m0, [bufq+mmsize*0]
+    movu        m1, [bufq+mmsize*1]
+    movu        m2, [refq+mmsize*0]
+    movu        m3, [refq+mmsize*1]
+%if %1 == 8
+    add       bufq, mmsize*2
+    add       refq, mmsize*2
+    psubusb     m4, m0, m2
+    psubusb     m5, m1, m3
+    psubusb     m2, m0
+    psubusb     m3, m1
+    por         m2, m4
+    por         m3, m5
+    punpcklbw   m0, m2, m6
+    punpcklbw   m1, m3, m6
+    punpckhbw   m2, m6
+    punpckhbw   m3, m6
+%else
+    psubw       m0, m2
+    psubw       m1, m3
+    movu        m2, [bufq+mmsize*2]
+    movu        m3, [bufq+mmsize*3]
+    movu        m4, [refq+mmsize*2]
+    movu        m5, [refq+mmsize*3]
+    psubw       m2, m4
+    psubw       m3, m5
+    add       bufq, mmsize*4
+    add       refq, mmsize*4
+%endif
+    pmaddwd     m0, m0
+    pmaddwd     m1, m1
+    pmaddwd     m2, m2
+    pmaddwd     m3, m3
+    paddd       m0, m1
+    paddd       m2, m3
+%if %1 == 8
+    paddd       m7, m0
+    paddd       m7, m2
+%else
+    paddd       m0, m2
+    punpckldq   m2, m0, m6
+    punpckhdq   m0, m6
+    paddq       m7, m0
+    paddq       m7, m2
+%endif
+    sub         wd, mmsize*2
+    jge .loop
+
+.end:
+    add         wd, mmsize*2
+    movhlps     m0, m7
+%if %1 == 8
+    paddd       m7, m0
+    pshufd      m0, m7, 1
+    paddd       m7, m0
+    movd       eax, m7
+%else
+    paddq       m7, m0
+%if ARCH_X86_32
+    movd       eax, m7
+    psrldq      m7, 4
+    movd       edx, m7
+%else
+    movq       rax, m7
+%endif
+%endif
+
+    ; deal with cases where w % 32 != 0
+    test        wd, wd
+    jz .end_scalar
+.loop_scalar:
+    movzx     px1d, %2 [bufq+wq*(%1/8)-(%1/8)]
+    movzx     px2d, %2 [refq+wq*(%1/8)-(%1/8)]
+    sub       px1d, px2d
+    imul      px1d, px1d
+%if %1 == 8
+    add        eax, px1d
+%elif ARCH_X86_64
+    add        rax, px1q
+%else
+    add        eax, px1d
+    adc        edx, 0
+%endif
+    dec         wd
+    jg .loop_scalar
+
+.end_scalar:
+    ; for %1=8, no need to zero edx on x86-32, since edx=wd, which is zero
+    RET
+%endmacro
+
+INIT_XMM sse2
+SSE_LINE_FN  8, byte
+SSE_LINE_FN 16, word
diff --git a/libavfilter/x86/vf_psnr_init.c b/libavfilter/x86/vf_psnr_init.c
new file mode 100644
index 00000000..c3878122
--- /dev/null
+++ b/libavfilter/x86/vf_psnr_init.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2015 Ronald S. Bultje <rsbultje@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86/cpu.h"
+
+#include "libavfilter/psnr.h"
+
+uint64_t ff_sse_line_8bit_sse2(const uint8_t *buf, const uint8_t *ref, int w);
+uint64_t ff_sse_line_16bit_sse2(const uint8_t *buf, const uint8_t *ref, int w);
+
+void ff_psnr_init_x86(PSNRDSPContext *dsp, int bpp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        if (bpp <= 8) {
+            dsp->sse_line = ff_sse_line_8bit_sse2;
+        } else if (bpp <= 15) {
+            dsp->sse_line = ff_sse_line_16bit_sse2;
+        }
+    }
+}
diff --git a/libavfilter/x86/vf_pullup.asm b/libavfilter/x86/vf_pullup.asm
index d3a19551..26c2a27d 100644
--- a/libavfilter/x86/vf_pullup.asm
+++ b/libavfilter/x86/vf_pullup.asm
@@ -20,7 +20,7 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_TEXT
+SECTION .text
 
 INIT_MMX mmx
 cglobal pullup_filter_diff, 3, 5, 8, first, second, size
diff --git a/libavfilter/x86/vf_removegrain.asm b/libavfilter/x86/vf_removegrain.asm
new file mode 100644
index 00000000..d049bf25
--- /dev/null
+++ b/libavfilter/x86/vf_removegrain.asm
@@ -0,0 +1,1218 @@
+;*****************************************************************************
+;* x86-optimized functions for removegrain filter
+;*
+;* Copyright (C) 2015 James Darnley
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License along
+;* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+;*****************************************************************************
+
+; column: -1  0 +1
+; row -1: a1 a2 a3
+; row  0: a4  c a5
+; row +1: a6 a7 a8
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pw_4:    times 16 dw 4
+pw_8:    times 16 dw 8
+pw_div9: times 16 dw ((1<<16)+4)/9
+
+SECTION .text
+
+;*** Preprocessor helpers
+
+%define a1 srcq+stride_n-1
+%define a2 srcq+stride_n
+%define a3 srcq+stride_n+1
+%define a4 srcq-1
+%define c  srcq
+%define a5 srcq+1
+%define a6 srcq+stride_p-1
+%define a7 srcq+stride_p
+%define a8 srcq+stride_p+1
+
+; %1 dest simd register
+; %2 source memory location
+; %3 zero location (simd register/memory)
+%macro LOAD 3
+    movh %1, %2
+    punpcklbw %1, %3
+%endmacro
+
+%macro LOAD_SQUARE 0
+    movu m1, [a1]
+    movu m2, [a2]
+    movu m3, [a3]
+    movu m4, [a4]
+    movu m0, [c]
+    movu m5, [a5]
+    movu m6, [a6]
+    movu m7, [a7]
+    movu m8, [a8]
+%endmacro
+
+; %1 zero location (simd register/memory)
+%macro LOAD_SQUARE_16 1
+    LOAD m1, [a1], %1
+    LOAD m2, [a2], %1
+    LOAD m3, [a3], %1
+    LOAD m4, [a4], %1
+    LOAD m0, [c], %1
+    LOAD m5, [a5], %1
+    LOAD m6, [a6], %1
+    LOAD m7, [a7], %1
+    LOAD m8, [a8], %1
+%endmacro
+
+; %1 data type
+; %2 simd register to hold maximums
+; %3 simd register to hold minimums
+; %4 temp location (simd register/memory)
+%macro SORT_PAIR 4
+    mova   %4, %2
+    pmin%1 %2, %3
+    pmax%1 %3, %4
+%endmacro
+
+%macro SORT_AXIS 0
+    SORT_PAIR ub, m1, m8, m9
+    SORT_PAIR ub, m2, m7, m10
+    SORT_PAIR ub, m3, m6, m11
+    SORT_PAIR ub, m4, m5, m12
+%endmacro
+
+
+%macro SORT_AXIS_16 0
+    SORT_PAIR sw, m1, m8, m9
+    SORT_PAIR sw, m2, m7, m10
+    SORT_PAIR sw, m3, m6, m11
+    SORT_PAIR sw, m4, m5, m12
+%endmacro
+
+; The loop doesn't need to do all the iterations.  It could stop when the right
+; pixels are in the right registers.
+%macro SORT_SQUARE 0
+    %assign k 7
+    %rep 7
+        %assign i 1
+        %assign j 2
+        %rep k
+            SORT_PAIR ub, m %+ i , m %+ j , m9
+            %assign i i+1
+            %assign j j+1
+        %endrep
+        %assign k k-1
+    %endrep
+%endmacro
+
+; %1 dest simd register
+; %2 source (simd register/memory)
+; %3 temp simd register
+%macro ABS_DIFF 3
+    mova %3, %2
+    psubusb %3, %1
+    psubusb %1, %2
+    por %1, %3
+%endmacro
+
+; %1 dest simd register
+; %2 source (simd register/memory)
+; %3 temp simd register
+%macro ABS_DIFF_W 3
+    mova %3, %2
+    psubusw %3, %1
+    psubusw %1, %2
+    por %1, %3
+%endmacro
+
+; %1 simd register that holds the "false" values and will hold the result
+; %2 simd register that holds the "true" values
+; %3 location (simd register/memory) that hold the mask
+%macro BLEND 3
+%if cpuflag(avx2)
+    vpblendvb %1, %1, %2, %3
+%else
+    pand      %2, %3
+    pandn     %3, %1
+    por       %3, %2
+    SWAP      %1, %3
+%endif
+%endmacro
+
+; Functions
+
+INIT_XMM sse2
+cglobal rg_fl_mode_1, 4, 5, 3, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    .loop:
+        movu m0, [a1]
+        mova m1, m0
+
+        movu m2, [a2]
+        pmaxub m0, m2
+        pminub m1, m2
+
+        movu m2, [a3]
+        pmaxub m0, m2
+        pminub m1, m2
+
+        movu m2, [a4]
+        pmaxub m0, m2
+        pminub m1, m2
+
+        movu m2, [a5]
+        pmaxub m0, m2
+        pminub m1, m2
+
+        movu m2, [a6]
+        pmaxub m0, m2
+        pminub m1, m2
+
+        movu m2, [a7]
+        pmaxub m0, m2
+        pminub m1, m2
+
+        movu m2, [a8]
+        pmaxub m0, m2
+        pminub m1, m2
+
+        movu m2, [c]
+        pminub m2, m0
+        pmaxub m2, m1
+
+        movu [dstq], m2
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+
+%if ARCH_X86_64
+cglobal rg_fl_mode_2, 4, 5, 10, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    .loop:
+        LOAD_SQUARE
+        SORT_SQUARE
+
+        CLIPUB m0, m2, m7
+
+        movu [dstq], m0
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+
+cglobal rg_fl_mode_3, 4, 5, 10, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    .loop:
+        LOAD_SQUARE
+        SORT_SQUARE
+
+        CLIPUB m0, m3, m6
+
+        movu [dstq], m0
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+
+cglobal rg_fl_mode_4, 4, 5, 10, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    .loop:
+        LOAD_SQUARE
+        SORT_SQUARE
+
+        CLIPUB m0, m4, m5
+
+        movu [dstq], m0
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+
+cglobal rg_fl_mode_5, 4, 5, 13, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    .loop:
+        LOAD_SQUARE
+        SORT_AXIS
+
+        mova m9, m0
+        mova m10, m0
+        mova m11, m0
+        mova m12, m0
+
+        CLIPUB m9, m1, m8
+        CLIPUB m10, m2, m7
+        CLIPUB m11, m3, m6
+        CLIPUB m12, m4, m5
+
+        mova m8, m9  ; clip1
+        mova m7, m10 ; clip2
+        mova m6, m11 ; clip3
+        mova m5, m12 ; clip4
+
+        ABS_DIFF m9, m0, m1  ; c1
+        ABS_DIFF m10, m0, m2 ; c2
+        ABS_DIFF m11, m0, m3 ; c3
+        ABS_DIFF m12, m0, m4 ; c4
+
+        pminub m9, m10
+        pminub m9, m11
+        pminub m9, m12 ; mindiff
+
+        pcmpeqb m10, m9
+        pcmpeqb m11, m9
+        pcmpeqb m12, m9
+
+        ; Notice the order here: c1, c3, c2, c4
+        BLEND m8, m6, m11
+        BLEND m8, m7, m10
+        BLEND m8, m5, m12
+
+        movu [dstq], m8
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+
+cglobal rg_fl_mode_6, 4, 5, 16, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    ; Some register saving suggestions: the zero can be somewhere other than a
+    ; register, the center pixels could be on the stack.
+
+    pxor m15, m15
+    .loop:
+        LOAD_SQUARE_16 m15
+        SORT_AXIS_16
+
+        mova m9, m0
+        mova m10, m0
+        mova m11, m0
+        mova m12, m0
+        CLIPW m9, m1, m8  ; clip1
+        CLIPW m10, m2, m7 ; clip2
+        CLIPW m11, m3, m6 ; clip3
+        CLIPW m12, m4, m5 ; clip4
+
+        psubw m8, m1 ; d1
+        psubw m7, m2 ; d2
+        psubw m6, m3 ; d3
+        psubw m5, m4 ; d4
+
+        mova m1, m9
+        mova m2, m10
+        mova m3, m11
+        mova m4, m12
+        ABS_DIFF_W m1, m0, m13
+        ABS_DIFF_W m2, m0, m14
+        ABS_DIFF_W m3, m0, m13
+        ABS_DIFF_W m4, m0, m14
+        psllw m1, 1
+        psllw m2, 1
+        psllw m3, 1
+        psllw m4, 1
+        paddw m1, m8 ; c1
+        paddw m2, m7 ; c2
+        paddw m3, m6 ; c3
+        paddw m4, m5 ; c4
+        ; As the differences (d1..d4) can only be positive, there is no need to
+        ; clip to zero.  Also, the maximum positive value is less than 768.
+
+        pminsw m1, m2
+        pminsw m1, m3
+        pminsw m1, m4
+
+        pcmpeqw m2, m1
+        pcmpeqw m3, m1
+        pcmpeqw m4, m1
+
+        BLEND m9, m11, m3
+        BLEND m9, m10, m2
+        BLEND m9, m12, m4
+        packuswb m9, m9
+
+        movh [dstq], m9
+        add srcq, mmsize/2
+        add dstq, mmsize/2
+        sub pixelsd, mmsize/2
+    jg .loop
+RET
+
+; This is just copy-pasted straight from mode 6 with the left shifts removed.
+cglobal rg_fl_mode_7, 4, 5, 16, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    ; Can this be done without unpacking?
+
+    pxor m15, m15
+    .loop:
+        LOAD_SQUARE_16 m15
+        SORT_AXIS_16
+
+        mova m9, m0
+        mova m10, m0
+        mova m11, m0
+        mova m12, m0
+        CLIPW m9, m1, m8  ; clip1
+        CLIPW m10, m2, m7 ; clip2
+        CLIPW m11, m3, m6 ; clip3
+        CLIPW m12, m4, m5 ; clip4
+
+        psubw m8, m1 ; d1
+        psubw m7, m2 ; d2
+        psubw m6, m3 ; d3
+        psubw m5, m4 ; d4
+
+        mova m1, m9
+        mova m2, m10
+        mova m3, m11
+        mova m4, m12
+        ABS_DIFF_W m1, m0, m13
+        ABS_DIFF_W m2, m0, m14
+        ABS_DIFF_W m3, m0, m13
+        ABS_DIFF_W m4, m0, m14
+        paddw m1, m8 ; c1
+        paddw m2, m7 ; c2
+        paddw m3, m6 ; c3
+        paddw m4, m5 ; c4
+
+        pminsw m1, m2
+        pminsw m1, m3
+        pminsw m1, m4
+
+        pcmpeqw m2, m1
+        pcmpeqw m3, m1
+        pcmpeqw m4, m1
+
+        BLEND m9, m11, m3
+        BLEND m9, m10, m2
+        BLEND m9, m12, m4
+        packuswb m9, m9
+
+        movh [dstq], m9
+        add srcq, mmsize/2
+        add dstq, mmsize/2
+        sub pixelsd, mmsize/2
+    jg .loop
+RET
+
+; This is just copy-pasted straight from mode 6 with a few changes.
+cglobal rg_fl_mode_8, 4, 5, 16, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    pxor m15, m15
+    .loop:
+        LOAD_SQUARE_16 m15
+        SORT_AXIS_16
+
+        mova m9, m0
+        mova m10, m0
+        mova m11, m0
+        mova m12, m0
+        CLIPW m9, m1, m8  ; clip1
+        CLIPW m10, m2, m7 ; clip2
+        CLIPW m11, m3, m6 ; clip3
+        CLIPW m12, m4, m5 ; clip4
+
+        psubw m8, m1 ; d1
+        psubw m7, m2 ; d2
+        psubw m6, m3 ; d3
+        psubw m5, m4 ; d4
+        psllw m8, 1
+        psllw m7, 1
+        psllw m6, 1
+        psllw m5, 1
+
+        mova m1, m9
+        mova m2, m10
+        mova m3, m11
+        mova m4, m12
+        ABS_DIFF_W m1, m0, m13
+        ABS_DIFF_W m2, m0, m14
+        ABS_DIFF_W m3, m0, m13
+        ABS_DIFF_W m4, m0, m14
+        paddw m1, m8 ; c1
+        paddw m2, m7 ; c1
+        paddw m3, m6 ; c1
+        paddw m4, m5 ; c1
+        ; As the differences (d1..d4) can only be positive, there is no need to
+        ; clip to zero.  Also, the maximum positive value is less than 768.
+
+        pminsw m1, m2
+        pminsw m1, m3
+        pminsw m1, m4
+
+        pcmpeqw m2, m1
+        pcmpeqw m3, m1
+        pcmpeqw m4, m1
+
+        BLEND m9, m11, m3
+        BLEND m9, m10, m2
+        BLEND m9, m12, m4
+        packuswb m9, m9
+
+        movh [dstq], m9
+        add srcq, mmsize/2
+        add dstq, mmsize/2
+        sub pixelsd, mmsize/2
+    jg .loop
+RET
+
+cglobal rg_fl_mode_9, 4, 5, 13, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    .loop:
+        LOAD_SQUARE
+        SORT_AXIS
+
+        mova m9, m0
+        mova m10, m0
+        mova m11, m0
+        mova m12, m0
+        CLIPUB m9, m1, m8  ; clip1
+        CLIPUB m10, m2, m7 ; clip2
+        CLIPUB m11, m3, m6 ; clip3
+        CLIPUB m12, m4, m5 ; clip4
+
+        psubb m8, m1 ; d1
+        psubb m7, m2 ; d2
+        psubb m6, m3 ; d3
+        psubb m5, m4 ; d4
+
+        pminub m8, m7
+        pminub m8, m6
+        pminub m8, m5
+
+        pcmpeqb m7, m8
+        pcmpeqb m6, m8
+        pcmpeqb m5, m8
+
+        BLEND m9, m11, m6
+        BLEND m9, m10, m7
+        BLEND m9, m12, m5
+
+        movu [dstq], m9
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+%endif
+
+cglobal rg_fl_mode_10, 4, 5, 8, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    .loop:
+        movu m0, [c]
+
+        movu m1, [a4]
+        mova m2, m1
+        ABS_DIFF m1, m0, m7
+
+        movu m3, [a5]       ; load pixel
+        mova m4, m3
+        ABS_DIFF m4, m0, m7 ; absolute difference from center
+        pminub m1, m4       ; mindiff
+        pcmpeqb m4, m1      ; if (difference == mindiff)
+        BLEND m2, m3, m4    ;     return pixel
+
+        movu m5, [a1]
+        mova m6, m5
+        ABS_DIFF m6, m0, m7
+        pminub m1, m6
+        pcmpeqb m6, m1
+        BLEND m2, m5, m6
+
+        movu m3, [a3]
+        mova m4, m3
+        ABS_DIFF m4, m0, m7
+        pminub m1, m4
+        pcmpeqb m4, m1
+        BLEND m2, m3, m4
+
+        movu m5, [a2]
+        mova m6, m5
+        ABS_DIFF m6, m0, m7
+        pminub m1, m6
+        pcmpeqb m6, m1
+        BLEND m2, m5, m6
+
+        movu m3, [a6]
+        mova m4, m3
+        ABS_DIFF m4, m0, m7
+        pminub m1, m4
+        pcmpeqb m4, m1
+        BLEND m2, m3, m4
+
+        movu m5, [a8]
+        mova m6, m5
+        ABS_DIFF m6, m0, m7
+        pminub m1, m6
+        pcmpeqb m6, m1
+        BLEND m2, m5, m6
+
+        movu m3, [a7]
+        mova m4, m3
+        ABS_DIFF m4, m0, m7
+        pminub m1, m4
+        pcmpeqb m4, m1
+        BLEND m2, m3, m4
+
+        movu [dstq], m2
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+
+cglobal rg_fl_mode_11_12, 4, 5, 7, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    pxor m0, m0
+    .loop:
+        LOAD m1, [c], m0
+        LOAD m2, [a2], m0
+        LOAD m3, [a4], m0
+        LOAD m4, [a5], m0
+        LOAD m5, [a7], m0
+
+        psllw m1, 2
+        paddw m2, m3
+        paddw m4, m5
+        paddw m2, m4
+        psllw m2, 1
+
+        LOAD m3, [a1], m0
+        LOAD m4, [a3], m0
+        LOAD m5, [a6], m0
+        LOAD m6, [a8], m0
+        paddw m1, m2
+        paddw m3, m4
+        paddw m5, m6
+        paddw m1, m3
+        paddw m1, m5
+
+        paddw m1, [pw_8]
+        psraw m1, 4
+
+        packuswb m1, m1
+
+        movh [dstq], m1
+        add srcq, mmsize/2
+        add dstq, mmsize/2
+        sub pixelsd, mmsize/2
+    jg .loop
+RET
+
+cglobal rg_fl_mode_13_14, 4, 5, 8, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    .loop:
+        movu m1, [a1]
+        movu m2, [a8]
+        mova m0, m1
+        pavgb m1, m2
+        ABS_DIFF m0, m2, m6
+
+        movu m3, [a3]
+        movu m4, [a6]
+        mova m5, m3
+        pavgb m3, m4
+        ABS_DIFF m5, m4, m7
+        pminub m0, m5
+        pcmpeqb m5, m0
+        BLEND m1, m3, m5
+
+        movu m2, [a2]
+        movu m3, [a7]
+        mova m4, m2
+        pavgb m2, m3
+        ABS_DIFF m4, m3, m6
+        pminub m0, m4
+        pcmpeqb m4, m0
+        BLEND m1, m2, m4
+
+        movu [dstq], m1
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+
+%if ARCH_X86_64
+cglobal rg_fl_mode_15_16, 4, 5, 16, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    pxor m15, m15
+    .loop:
+        LOAD_SQUARE_16 m15
+
+        mova m9, m1
+        mova m10, m2
+        mova m11, m3
+        ABS_DIFF_W m9, m8, m12
+        ABS_DIFF_W m10, m7, m13
+        ABS_DIFF_W m11, m6, m14
+        pminsw m9, m10
+        pminsw m9, m11
+        pcmpeqw m10, m9
+        pcmpeqw m11, m9
+
+        mova m12, m2
+        mova m13, m1
+        mova m14, m6
+        paddw m12, m7
+        psllw m12, 1
+        paddw m13, m3
+        paddw m14, m8
+        paddw m12, [pw_4]
+        paddw m13, m14
+        paddw m12, m13
+        psrlw m12, 3
+
+        SORT_PAIR ub, m1, m8, m0
+        SORT_PAIR ub, m2, m7, m9
+        SORT_PAIR ub, m3, m6, m14
+        mova m4, m12
+        mova m5, m12
+        CLIPW m4, m1, m8
+        CLIPW m5, m2, m7
+        CLIPW m12, m3, m6
+
+        BLEND m4, m12, m11
+        BLEND m4,  m5, m10
+        packuswb m4, m4
+
+        movh [dstq], m4
+        add srcq, mmsize/2
+        add dstq, mmsize/2
+        sub pixelsd, mmsize/2
+    jg .loop
+RET
+
+cglobal rg_fl_mode_17, 4, 5, 9, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    .loop:
+        LOAD_SQUARE
+        SORT_AXIS
+
+        pmaxub m1, m2
+        pmaxub m3, m4
+
+        pminub m8, m7
+        pminub m5, m6
+
+        pmaxub m1, m3
+        pminub m8, m5
+
+        mova m2, m1
+        pminub m1, m8
+        pmaxub m8, m2
+
+        CLIPUB m0, m1, m8
+
+        movu [dstq], m0
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+
+cglobal rg_fl_mode_18, 4, 5, 16, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    .loop:
+        LOAD_SQUARE
+
+        mova m9, m1
+        mova m10, m8
+        ABS_DIFF m9, m0, m11
+        ABS_DIFF m10, m0, m12
+        pmaxub m9, m10 ; m9 = d1
+
+        mova m10, m2
+        mova m11, m7
+        ABS_DIFF m10, m0, m12
+        ABS_DIFF m11, m0, m13
+        pmaxub m10, m11 ; m10 = d2
+
+        mova m11, m3
+        mova m12, m6
+        ABS_DIFF m11, m0, m13
+        ABS_DIFF m12, m0, m14
+        pmaxub m11, m12 ; m11 = d3
+
+        mova m12, m4
+        mova m13, m5
+        ABS_DIFF m12, m0, m14
+        ABS_DIFF m13, m0, m15
+        pmaxub m12, m13 ; m12 = d4
+
+        mova m13, m9
+        pminub m13, m10
+        pminub m13, m11
+        pminub m13, m12 ; m13 = mindiff
+
+        pcmpeqb m10, m13
+        pcmpeqb m11, m13
+        pcmpeqb m12, m13
+
+        mova m14, m1
+        pminub m1, m8
+        pmaxub m8, m14
+
+        mova m13, m0
+        mova m14, m1
+        pminub m1, m8
+        pmaxub m8, m14
+        CLIPUB m13, m1, m8 ; m13 = ret...d1
+
+        mova m14, m0
+        mova m15, m3
+        pminub m3, m6
+        pmaxub m6, m15
+        CLIPUB m14, m3, m6
+        pand m14, m11
+        pandn m11, m13
+        por m14, m11 ; m14 = ret...d3
+
+        mova m15, m0
+        mova m1, m2
+        pminub m2, m7
+        pmaxub m7, m1
+        CLIPUB m15, m2, m7
+        pand m15, m10
+        pandn m10, m14
+        por m15, m10 ; m15 = ret...d2
+
+        mova m1, m0
+        mova m2, m4
+        pminub m4, m5
+        pmaxub m5, m2
+        CLIPUB m1, m4, m5
+        pand m1, m12
+        pandn m12, m15
+        por m1, m12 ; m15 = ret...d4
+
+        movu [dstq], m1
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+%endif
+
+cglobal rg_fl_mode_19, 4, 5, 7, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    pxor m0, m0
+    .loop:
+        LOAD m1, [a1], m0
+        LOAD m2, [a2], m0
+        paddw m1, m2
+
+        LOAD m3, [a3], m0
+        LOAD m4, [a4], m0
+        paddw m3, m4
+
+        LOAD m5, [a5], m0
+        LOAD m6, [a6], m0
+        paddw m5, m6
+
+        LOAD m2, [a7], m0
+        LOAD m4, [a8], m0
+        paddw m2, m4
+
+        paddw m1, m3
+        paddw m2, m5
+        paddw m1, m2
+
+        paddw m1, [pw_4]
+        psraw m1, 3
+
+        packuswb m1, m1
+
+        movh [dstq], m1
+        add srcq, mmsize/2
+        add dstq, mmsize/2
+        sub pixelsd, mmsize/2
+    jg .loop
+RET
+
+cglobal rg_fl_mode_20, 4, 5, 7, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    pxor m0, m0
+    .loop:
+        LOAD m1, [a1], m0
+        LOAD m2, [a2], m0
+        paddw m1, m2
+
+        LOAD m3, [a3], m0
+        LOAD m4, [a4], m0
+        paddw m3, m4
+
+        LOAD m5, [a5], m0
+        LOAD m6, [a6], m0
+        paddw m5, m6
+
+        LOAD m2, [a7], m0
+        LOAD m4, [a8], m0
+        paddw m2, m4
+
+        LOAD m6, [c], m0
+        paddw m1, m3
+        paddw m2, m5
+        paddw m6, [pw_4]
+
+        paddw m1, m2
+        paddw m1, m6
+
+        pmulhuw m1, [pw_div9]
+
+        packuswb m1, m1
+
+        movh [dstq], m1
+        add srcq, mmsize/2
+        add dstq, mmsize/2
+        sub pixelsd, mmsize/2
+    jg .loop
+RET
+
+cglobal rg_fl_mode_21, 4, 5, 8, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    pxor m0, m0
+    .loop:
+        movu m1, [a1]
+        movu m2, [a8]
+        pavgb m7, m1, m2
+        punpckhbw m3, m1, m0
+        punpcklbw m1, m0
+        punpckhbw m4, m2, m0
+        punpcklbw m2, m0
+        paddw m3, m4
+        paddw m1, m2
+        psrlw m3, 1
+        psrlw m1, 1
+        packuswb m1, m3
+
+        movu m2, [a2]
+        movu m3, [a7]
+        pavgb m6, m2, m3
+        punpckhbw m4, m2, m0
+        punpcklbw m2, m0
+        punpckhbw m5, m3, m0
+        punpcklbw m3, m0
+        paddw m4, m5
+        paddw m2, m3
+        psrlw m4, 1
+        psrlw m2, 1
+        packuswb m2, m4
+
+        pminub m1, m2
+        pmaxub m7, m6
+
+        movu m2, [a3]
+        movu m3, [a6]
+        pavgb m6, m2, m3
+        punpckhbw m4, m2, m0
+        punpcklbw m2, m0
+        punpckhbw m5, m3, m0
+        punpcklbw m3, m0
+        paddw m4, m5
+        paddw m2, m3
+        psrlw m4, 1
+        psrlw m2, 1
+        packuswb m2, m4
+
+        pminub m1, m2
+        pmaxub m7, m6
+
+        movu m2, [a4]
+        movu m3, [a5]
+        pavgb m6, m2, m3
+        punpckhbw m4, m2, m0
+        punpcklbw m2, m0
+        punpckhbw m5, m3, m0
+        punpcklbw m3, m0
+        paddw m4, m5
+        paddw m2, m3
+        psrlw m4, 1
+        psrlw m2, 1
+        packuswb m2, m4
+
+        pminub m1, m2
+        pmaxub m7, m6
+
+        movu m3, [c]
+        CLIPUB m3, m1, m7
+
+        movu [dstq], m3
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+
+cglobal rg_fl_mode_22, 4, 5, 8, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    .loop:
+        movu m0, [a1]
+        movu m1, [a8]
+        pavgb m0, m1
+        movu m2, [a2]
+        movu m3, [a7]
+        pavgb m2, m3
+        movu m4, [a3]
+        movu m5, [a6]
+        pavgb m4, m5
+        movu m6, [a4]
+        movu m7, [a5]
+        pavgb m6, m7
+
+        mova m1, m0
+        mova m3, m2
+        mova m5, m4
+        mova m7, m6
+        pminub m0, m2
+        pminub m4, m6
+        pmaxub m1, m3
+        pmaxub m5, m7
+        pminub m0, m4
+        pmaxub m1, m5
+
+        movu m2, [c]
+        CLIPUB m2, m0, m1
+
+        movu [dstq], m2
+        add srcq, mmsize
+        add dstq, mmsize
+        sub pixelsd, mmsize
+    jg .loop
+RET
+
+%if ARCH_X86_64
+cglobal rg_fl_mode_23, 4, 5, 16, 0, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    pxor m15, m15
+    .loop:
+        LOAD_SQUARE_16 m15
+        SORT_AXIS_16
+
+        mova m9, m8
+        mova m10, m7
+        mova m11, m6
+        mova m12, m5
+        psubw m9, m1  ; linediff1
+        psubw m10, m2 ; linediff2
+        psubw m11, m3 ; linediff3
+        psubw m12, m4 ; linediff4
+
+        psubw m1, m0
+        psubw m2, m0
+        psubw m3, m0
+        psubw m4, m0
+        pminsw m1, m9  ; d1
+        pminsw m2, m10 ; d2
+        pminsw m3, m11 ; d3
+        pminsw m4, m12 ; d4
+        pmaxsw m1, m2
+        pmaxsw m3, m4
+        pmaxsw m1, m3
+        pmaxsw m1, m15 ; d
+
+        mova m13, m0
+        mova m14, m0
+        mova m2, m0
+        mova m4, m0
+        psubw m13, m8
+        psubw m14, m7
+        psubw m2, m6
+        psubw m4, m5
+        pminsw m9, m13  ; u1
+        pminsw m10, m14 ; u2
+        pminsw m11, m2  ; u3
+        pminsw m12, m4  ; u4
+        pmaxsw m9, m10
+        pmaxsw m11, m12
+        pmaxsw m9, m11
+        pmaxsw m9, m15  ; u
+
+        paddw m0, m1
+        psubw m0, m9
+        packuswb m0, m0
+
+        movh [dstq], m0
+        add srcq, mmsize/2
+        add dstq, mmsize/2
+        sub pixelsd, mmsize/2
+    jg .loop
+RET
+
+cglobal rg_fl_mode_24, 4, 5, 16, mmsize, dst, src, stride, pixels
+    mov r4q, strideq
+    neg r4q
+    %define stride_p strideq
+    %define stride_n r4q
+
+    pxor m15, m15
+    .loop:
+        LOAD_SQUARE_16 m15
+        mova [rsp], m0
+        SORT_AXIS_16
+
+        mova m9, m8
+        mova m10, m7
+        mova m11, m6
+        mova m12, m5
+        psubw m9, m1  ; linediff1
+        psubw m10, m2 ; linediff2
+        psubw m11, m3 ; linediff3
+        psubw m12, m4 ; linediff4
+
+        psubw m1, [rsp] ; td1
+        psubw m2, [rsp] ; td2
+        psubw m3, [rsp] ; td3
+        psubw m4, [rsp] ; td4
+        mova m0, m9
+        mova m13, m10
+        mova m14, m11
+        mova m15, m12
+        psubw m0, m1
+        psubw m13, m2
+        psubw m14, m3
+        psubw m15, m4
+        pminsw m1, m0  ; d1
+        pminsw m2, m13 ; d2
+        pminsw m3, m14 ; d3
+        pminsw m4, m15 ; d4
+        pmaxsw m1, m2
+        pmaxsw m3, m4
+
+        mova m0, [rsp]
+        mova m13, [rsp]
+        mova m14, [rsp]
+        mova m15, [rsp]
+        psubw m0, m8  ; tu1
+        psubw m13, m7 ; tu2
+        psubw m14, m6 ; tu3
+        psubw m15, m5 ; tu4
+        psubw m9, m0
+        psubw m10, m13
+        psubw m11, m14
+        psubw m12, m15
+        pminsw m9, m0   ; u1
+        pminsw m10, m13 ; u2
+        pminsw m11, m14 ; u3
+        pminsw m12, m15 ; u4
+        pmaxsw m9, m10
+        pmaxsw m11, m12
+
+        pmaxsw m1, m3  ; d without max(d,0)
+        pmaxsw m9, m11  ; u without max(u,0)
+        pxor m15, m15
+        pmaxsw m1, m15
+        pmaxsw m9, m15
+
+        mova m0, [rsp]
+        paddw m0, m1
+        psubw m0, m9
+        packuswb m0, m0
+
+        movh [dstq], m0
+        add srcq, mmsize/2
+        add dstq, mmsize/2
+        sub pixelsd, mmsize/2
+    jg .loop
+RET
+%endif
diff --git a/libavfilter/x86/vf_removegrain_init.c b/libavfilter/x86/vf_removegrain_init.c
new file mode 100644
index 00000000..07314b32
--- /dev/null
+++ b/libavfilter/x86/vf_removegrain_init.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2015 James Darnley
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/removegrain.h"
+
+void ff_rg_fl_mode_1_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_10_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_11_12_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_13_14_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_19_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_20_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_21_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_22_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+#if ARCH_X86_64
+void ff_rg_fl_mode_2_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_3_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_4_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_5_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_6_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_7_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_8_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_9_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_15_16_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_17_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_18_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_23_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+void ff_rg_fl_mode_24_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int pixels);
+#endif
+
+av_cold void ff_removegrain_init_x86(RemoveGrainContext *rg)
+{
+#if CONFIG_GPL
+    int cpu_flags = av_get_cpu_flags();
+    int i;
+
+    for (i = 0; i < rg->nb_planes; i++) {
+        if (EXTERNAL_SSE2(cpu_flags))
+            switch (rg->mode[i]) {
+                case 1: rg->fl[i] = ff_rg_fl_mode_1_sse2; break;
+                case 10: rg->fl[i] = ff_rg_fl_mode_10_sse2; break;
+                case 11: /* fall through */
+                case 12: rg->fl[i] = ff_rg_fl_mode_11_12_sse2; break;
+                case 13: /* fall through */
+                case 14: rg->fl[i] = ff_rg_fl_mode_13_14_sse2; break;
+                case 19: rg->fl[i] = ff_rg_fl_mode_19_sse2; break;
+                case 20: rg->fl[i] = ff_rg_fl_mode_20_sse2; break;
+                case 21: rg->fl[i] = ff_rg_fl_mode_21_sse2; break;
+                case 22: rg->fl[i] = ff_rg_fl_mode_22_sse2; break;
+#if ARCH_X86_64
+                case 2: rg->fl[i] = ff_rg_fl_mode_2_sse2; break;
+                case 3: rg->fl[i] = ff_rg_fl_mode_3_sse2; break;
+                case 4: rg->fl[i] = ff_rg_fl_mode_4_sse2; break;
+                case 5: rg->fl[i] = ff_rg_fl_mode_5_sse2; break;
+                case 6: rg->fl[i] = ff_rg_fl_mode_6_sse2; break;
+                case 7: rg->fl[i] = ff_rg_fl_mode_7_sse2; break;
+                case 8: rg->fl[i] = ff_rg_fl_mode_8_sse2; break;
+                case 9: rg->fl[i] = ff_rg_fl_mode_9_sse2; break;
+                case 15: /* fall through */
+                case 16: rg->fl[i] = ff_rg_fl_mode_15_16_sse2; break;
+                case 17: rg->fl[i] = ff_rg_fl_mode_17_sse2; break;
+                case 18: rg->fl[i] = ff_rg_fl_mode_18_sse2; break;
+                case 23: rg->fl[i] = ff_rg_fl_mode_23_sse2; break;
+                case 24: rg->fl[i] = ff_rg_fl_mode_24_sse2; break;
+#endif /* ARCH_x86_64 */
+            }
+    }
+#endif /* CONFIG_GPL */
+}
diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c
index 1cfb9e81..45a9eb06 100644
--- a/libavfilter/x86/vf_spp.c
+++ b/libavfilter/x86/vf_spp.c
@@ -223,8 +223,10 @@ av_cold void ff_spp_init_x86(SPPContext *s)
     int cpu_flags = av_get_cpu_flags();
 
     if (cpu_flags & AV_CPU_FLAG_MMX) {
+        int64_t bps;
         s->store_slice = store_slice_mmx;
-        if (av_get_int(s->dct, "bits_per_sample", NULL) <= 8) {
+        av_opt_get_int(s->dct, "bits_per_sample", 0, &bps);
+        if (bps <= 8) {
             switch (s->mode) {
             case 0: s->requantize = hardthresh_mmx; break;
             case 1: s->requantize = softthresh_mmx; break;
diff --git a/libavfilter/x86/vf_ssim.asm b/libavfilter/x86/vf_ssim.asm
new file mode 100644
index 00000000..3293e667
--- /dev/null
+++ b/libavfilter/x86/vf_ssim.asm
@@ -0,0 +1,247 @@
+;*****************************************************************************
+;* x86-optimized functions for ssim filter
+;*
+;* Copyright (C) 2015 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pw_1: times 8 dw 1
+ssim_c1: times 4 dd 416 ;(.01*.01*255*255*64 + .5)
+ssim_c2: times 4 dd 235963 ;(.03*.03*255*255*64*63 + .5)
+
+SECTION .text
+
+%macro SSIM_4X4_LINE 1
+%if ARCH_X86_64
+cglobal ssim_4x4_line, 6, 8, %1, buf, buf_stride, ref, ref_stride, sums, w, buf_stride3, ref_stride3
+%else
+cglobal ssim_4x4_line, 5, 7, %1, buf, buf_stride, ref, ref_stride, sums, buf_stride3, ref_stride3
+%define wd r5mp
+%endif
+    lea     ref_stride3q, [ref_strideq*3]
+    lea     buf_stride3q, [buf_strideq*3]
+%if notcpuflag(xop)
+    pxor              m7, m7
+    mova             m15, [pw_1]
+%endif
+
+.loop:
+%if cpuflag(xop)
+    pmovzxbw          m0, [bufq+buf_strideq*0]
+    pmovzxbw          m1, [refq+ref_strideq*0]
+    pmaddwd           m4, m0, m0
+    pmaddwd           m6, m0, m1
+    pmovzxbw          m2, [bufq+buf_strideq*1]
+    vpmadcswd         m4, m1, m1, m4
+    pmovzxbw          m3, [refq+ref_strideq*1]
+    paddw             m0, m2
+    vpmadcswd         m4, m2, m2, m4
+    vpmadcswd         m6, m2, m3, m6
+    paddw             m1, m3
+    vpmadcswd         m4, m3, m3, m4
+
+    pmovzxbw          m2, [bufq+buf_strideq*2]
+    pmovzxbw          m3, [refq+ref_strideq*2]
+    vpmadcswd         m4, m2, m2, m4
+    vpmadcswd         m6, m2, m3, m6
+    pmovzxbw          m5, [bufq+buf_stride3q]
+    pmovzxbw          m7, [refq+ref_stride3q]
+    vpmadcswd         m4, m3, m3, m4
+    vpmadcswd         m6, m5, m7, m6
+    paddw             m0, m2
+    paddw             m1, m3
+    vpmadcswd         m4, m5, m5, m4
+    paddw             m0, m5
+    paddw             m1, m7
+    vpmadcswd         m4, m7, m7, m4
+%else
+    movh              m0, [bufq+buf_strideq*0]  ; a1
+    movh              m1, [refq+ref_strideq*0]  ; b1
+    movh              m2, [bufq+buf_strideq*1]  ; a2
+    movh              m3, [refq+ref_strideq*1]  ; b2
+    punpcklbw         m0, m7                    ; s1 [word]
+    punpcklbw         m1, m7                    ; s2 [word]
+    punpcklbw         m2, m7                    ; s1 [word]
+    punpcklbw         m3, m7                    ; s2 [word]
+    pmaddwd           m4, m0, m0                ; a1 * a1
+    pmaddwd           m5, m1, m1                ; b1 * b1
+    pmaddwd           m8, m2, m2                ; a2 * a2
+    pmaddwd           m9, m3, m3                ; b2 * b2
+    paddd             m4, m5                    ; ss
+    paddd             m8, m9                    ; ss
+    pmaddwd           m6, m0, m1                ; a1 * b1 = ss12
+    pmaddwd           m5, m2, m3                ; a2 * b2 = ss12
+    paddw             m0, m2
+    paddw             m1, m3
+    paddd             m6, m5                    ; s12
+    paddd             m4, m8                    ; ss
+
+    movh              m2, [bufq+buf_strideq*2]  ; a3
+    movh              m3, [refq+ref_strideq*2]  ; b3
+    movh              m5, [bufq+buf_stride3q]   ; a4
+    movh              m8, [refq+ref_stride3q]   ; b4
+    punpcklbw         m2, m7                    ; s1 [word]
+    punpcklbw         m3, m7                    ; s2 [word]
+    punpcklbw         m5, m7                    ; s1 [word]
+    punpcklbw         m8, m7                    ; s2 [word]
+    pmaddwd           m9, m2, m2                ; a3 * a3
+    pmaddwd          m10, m3, m3                ; b3 * b3
+    pmaddwd          m12, m5, m5                ; a4 * a4
+    pmaddwd          m13, m8, m8                ; b4 * b4
+    pmaddwd          m11, m2, m3                ; a3 * b3 = ss12
+    pmaddwd          m14, m5, m8                ; a4 * b4 = ss12
+    paddd             m9, m10
+    paddd            m12, m13
+    paddw             m0, m2
+    paddw             m1, m3
+    paddw             m0, m5
+    paddw             m1, m8
+    paddd             m6, m11
+    paddd             m4, m9
+    paddd             m6, m14
+    paddd             m4, m12
+%endif
+
+    ; m0 = [word] s1 a,a,a,a,b,b,b,b
+    ; m1 = [word] s2 a,a,a,a,b,b,b,b
+    ; m4 = [dword] ss a,a,b,b
+    ; m6 = [dword] s12 a,a,b,b
+
+%if cpuflag(xop)
+    vphaddwq          m0, m0                    ; [dword] s1  a, 0, b, 0
+    vphaddwq          m1, m1                    ; [dword] s2  a, 0, b, 0
+    vphadddq          m4, m4                    ; [dword] ss  a, 0, b, 0
+    vphadddq          m6, m6                    ; [dword] s12 a, 0, b, 0
+    punpckhdq     m2, m0, m1                    ; [dword] s1  b, s2 b, 0, 0
+    punpckldq         m0, m1                    ; [dword] s1  a, s2 a, 0, 0
+    punpckhdq     m3, m4, m6                    ; [dword] ss  b, s12 b, 0, 0
+    punpckldq         m4, m6                    ; [dword] ss  a, s12 a, 0, 0
+    punpcklqdq    m1, m2, m3                    ; [dword] b s1, s2, ss, s12
+    punpcklqdq        m0, m4                    ; [dword] a s1, s2, ss, s12
+%else
+    pmaddwd           m0, m15                   ; [dword] s1 a,a,b,b
+    pmaddwd           m1, m15                   ; [dword] s2 a,a,b,b
+    phaddd            m0, m4                    ; [dword] s1 a, b, ss a, b
+    phaddd            m1, m6                    ; [dword] s2 a, b, s12 a, b
+    punpckhdq     m2, m0, m1                    ; [dword] ss a, s12 a, ss b, s12 b
+    punpckldq         m0, m1                    ; [dword] s1 a, s2 a, s1 b, s2 b
+    punpckhqdq    m1, m0, m2                    ; [dword] b s1, s2, ss, s12
+    punpcklqdq        m0, m2                    ; [dword] a s1, s2, ss, s12
+%endif
+
+    mova  [sumsq+     0], m0
+    mova  [sumsq+mmsize], m1
+
+    add             bufq, mmsize/2
+    add             refq, mmsize/2
+    add            sumsq, mmsize*2
+    sub               wd, mmsize/8
+    jg .loop
+    RET
+%endmacro
+
+%if ARCH_X86_64
+INIT_XMM ssse3
+SSIM_4X4_LINE 16
+%endif
+%if HAVE_XOP_EXTERNAL
+INIT_XMM xop
+SSIM_4X4_LINE 8
+%endif
+
+INIT_XMM sse4
+cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w
+    pxor              m0, m0
+.loop:
+    mova              m1, [sum0q+mmsize*0]
+    mova              m2, [sum0q+mmsize*1]
+    mova              m3, [sum0q+mmsize*2]
+    mova              m4, [sum0q+mmsize*3]
+    paddd             m1, [sum1q+mmsize*0]
+    paddd             m2, [sum1q+mmsize*1]
+    paddd             m3, [sum1q+mmsize*2]
+    paddd             m4, [sum1q+mmsize*3]
+    paddd             m1, m2
+    paddd             m2, m3
+    paddd             m3, m4
+    paddd             m4, [sum0q+mmsize*4]
+    paddd             m4, [sum1q+mmsize*4]
+    TRANSPOSE4x4D      1, 2, 3, 4, 5
+
+    ; m1 = fs1, m2 = fs2, m3 = fss, m4 = fs12
+    pslld             m3, 6
+    pslld             m4, 6
+    pmulld            m5, m1, m2                ; fs1 * fs2
+    pmulld            m1, m1                    ; fs1 * fs1
+    pmulld            m2, m2                    ; fs2 * fs2
+    psubd             m3, m1
+    psubd             m4, m5                    ; covariance
+    psubd             m3, m2                    ; variance
+
+    ; m1 = fs1 * fs1, m2 = fs2 * fs2, m3 = variance, m4 = covariance, m5 = fs1 * fs2
+    paddd             m4, m4                    ; 2 * covariance
+    paddd             m5, m5                    ; 2 * fs1 * fs2
+    paddd             m1, m2                    ; fs1 * fs1 + fs2 * fs2
+    paddd             m3, [ssim_c2]             ; variance + ssim_c2
+    paddd             m4, [ssim_c2]             ; 2 * covariance + ssim_c2
+    paddd             m5, [ssim_c1]             ; 2 * fs1 * fs2 + ssim_c1
+    paddd             m1, [ssim_c1]             ; fs1 * fs1 + fs2 * fs2 + ssim_c1
+
+    ; convert to float
+    cvtdq2ps          m3, m3
+    cvtdq2ps          m4, m4
+    cvtdq2ps          m5, m5
+    cvtdq2ps          m1, m1
+    mulps             m4, m5
+    mulps             m3, m1
+    divps             m4, m3                    ; ssim_endl
+    addps             m0, m4                    ; ssim
+    add            sum0q, mmsize*4
+    add            sum1q, mmsize*4
+    sub               wd, 4
+    jg .loop
+
+    ; subps the ones we added too much
+    test              wd, wd
+    jz .end
+    add               wd, 4
+    test              wd, 2
+    jz .skip2
+    psrldq            m4, 8
+.skip2:
+    test              wd, 1
+    jz .skip1
+    psrldq            m4, 4
+.skip1:
+    subps             m0, m4
+
+.end:
+    movhlps           m4, m0
+    addps             m0, m4
+    movss             m4, m0
+    shufps            m0, m0, 1
+    addss             m0, m4
+%if ARCH_X86_32
+    movss            r0m, m0
+    fld             r0mp
+%endif
+    RET
diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c
new file mode 100644
index 00000000..599c9284
--- /dev/null
+++ b/libavfilter/x86/vf_ssim_init.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2015 Ronald S. Bultje <rsbultje@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86/cpu.h"
+
+#include "libavfilter/ssim.h"
+
+void ff_ssim_4x4_line_ssse3(const uint8_t *buf, ptrdiff_t buf_stride,
+                            const uint8_t *ref, ptrdiff_t ref_stride,
+                            int (*sums)[4], int w);
+void ff_ssim_4x4_line_xop  (const uint8_t *buf, ptrdiff_t buf_stride,
+                            const uint8_t *ref, ptrdiff_t ref_stride,
+                            int (*sums)[4], int w);
+float ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w);
+
+void ff_ssim_init_x86(SSIMDSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (ARCH_X86_64 && EXTERNAL_SSSE3(cpu_flags))
+        dsp->ssim_4x4_line = ff_ssim_4x4_line_ssse3;
+    if (EXTERNAL_SSE4(cpu_flags))
+        dsp->ssim_end_line = ff_ssim_end_line_sse4;
+    if (EXTERNAL_XOP(cpu_flags))
+        dsp->ssim_4x4_line = ff_ssim_4x4_line_xop;
+}
diff --git a/libavfilter/x86/vf_stereo3d.asm b/libavfilter/x86/vf_stereo3d.asm
new file mode 100644
index 00000000..a057e495
--- /dev/null
+++ b/libavfilter/x86/vf_stereo3d.asm
@@ -0,0 +1,216 @@
+;*****************************************************************************
+;* x86-optimized functions for stereo3d filter
+;*
+;* Copyright (C) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+; rgbrgbrgbrgb
+; rrrrggggbbbb
+
+shuf: db 0, 4, 8, 1,5, 9, 2, 6,10,3, 7,11,-1,-1,-1,-1
+ex_r: db 0,-1,-1,-1,3,-1,-1,-1,6,-1,-1,-1, 9,-1,-1,-1
+ex_g: db 1,-1,-1,-1,4,-1,-1,-1,7,-1,-1,-1,10,-1,-1,-1
+ex_b: db 2,-1,-1,-1,5,-1,-1,-1,8,-1,-1,-1,11,-1,-1,-1
+
+SECTION .text
+
+INIT_XMM sse4
+%if ARCH_X86_64
+cglobal anaglyph, 6, 10, 14, 2*6*mmsize, dst, lsrc, rsrc, dst_linesize, l_linesize, r_linesize, width, height, o, cnt
+%define ana_matrix_rq r6q
+%define ana_matrix_gq r7q
+%define ana_matrix_bq r8q
+
+%else ; ARCH_X86_32
+%if HAVE_ALIGNED_STACK
+cglobal anaglyph, 3, 7, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, l_linesize, o, cnt
+%else
+cglobal anaglyph, 3, 6, 8, 2*9*mmsize, dst, lsrc, rsrc, dst_linesize, o, cnt
+%define l_linesizeq r4mp
+%endif ; HAVE_ALIGNED_STACK
+%define ana_matrix_rq r3q
+%define ana_matrix_gq r4q
+%define ana_matrix_bq r5q
+%define r_linesizeq r5mp
+%define widthd  r6mp
+%define heightd r7mp
+%define  m8 [rsp+mmsize*12]
+%define  m9 [rsp+mmsize*13]
+%define m10 [rsp+mmsize*14]
+%define m11 [rsp+mmsize*15]
+%define m12 [rsp+mmsize*16]
+%define m13 [rsp+mmsize*17]
+%endif ; ARCH
+
+    mov        ana_matrix_rq, r8m
+    mov        ana_matrix_gq, r9m
+    mov        ana_matrix_bq, r10m
+    movu                  m3, [ana_matrix_rq+ 0]
+    movq                  m5, [ana_matrix_rq+16]
+    pshufd                m0, m3, q0000
+    pshufd                m1, m3, q1111
+    pshufd                m2, m3, q2222
+    pshufd                m3, m3, q3333
+    pshufd                m4, m5, q0000
+    pshufd                m5, m5, q1111
+    mova      [rsp+mmsize*0], m0
+    mova      [rsp+mmsize*1], m1
+    mova      [rsp+mmsize*2], m2
+    mova      [rsp+mmsize*3], m3
+    mova      [rsp+mmsize*4], m4
+    mova      [rsp+mmsize*5], m5
+
+    movu                  m3, [ana_matrix_gq+ 0]
+    movq                  m5, [ana_matrix_gq+16]
+    pshufd                m0, m3, q0000
+    pshufd                m1, m3, q1111
+    pshufd                m2, m3, q2222
+    pshufd                m3, m3, q3333
+    pshufd                m4, m5, q0000
+    pshufd                m5, m5, q1111
+    mova     [rsp+mmsize*6 ], m0
+    mova     [rsp+mmsize*7 ], m1
+    mova     [rsp+mmsize*8 ], m2
+    mova     [rsp+mmsize*9 ], m3
+    mova     [rsp+mmsize*10], m4
+    mova     [rsp+mmsize*11], m5
+
+%if ARCH_X86_64
+    movu                 m11, [ana_matrix_bq+ 0]
+    movq                 m13, [ana_matrix_bq+16]
+    pshufd                m8, m11, q0000
+    pshufd                m9, m11, q1111
+    pshufd               m10, m11, q2222
+    pshufd               m11, m11, q3333
+    pshufd               m12, m13, q0000
+    pshufd               m13, m13, q1111
+    mov               widthd, dword widthm
+    mov              heightd, dword heightm
+%else
+    movu                  m3, [ana_matrix_bq+ 0]
+    movq                  m5, [ana_matrix_bq+16]
+    pshufd                m0, m3, q0000
+    pshufd                m1, m3, q1111
+    pshufd                m2, m3, q2222
+    pshufd                m3, m3, q3333
+    pshufd                m4, m5, q0000
+    pshufd                m5, m5, q1111
+    mova     [rsp+mmsize*12], m0
+    mova     [rsp+mmsize*13], m1
+    mova     [rsp+mmsize*14], m2
+    mova     [rsp+mmsize*15], m3
+    mova     [rsp+mmsize*16], m4
+    mova     [rsp+mmsize*17], m5
+    mov        dst_linesizeq, r3m
+%if HAVE_ALIGNED_STACK
+    mov          l_linesizeq, r4m
+%endif
+%endif ; ARCH
+
+.nextrow:
+    mov                   od, widthd
+    xor                 cntd, cntd
+
+    .loop:
+        movu                 m3, [lsrcq+cntq]
+        pshufb               m1, m3, [ex_r]
+        pshufb               m2, m3, [ex_g]
+        pshufb               m3, [ex_b]
+        movu                 m0, [rsrcq+cntq]
+        pshufb               m4, m0, [ex_r]
+        pshufb               m5, m0, [ex_g]
+        pshufb               m0, [ex_b]
+        pmulld               m1, [rsp+mmsize*0]
+        pmulld               m2, [rsp+mmsize*1]
+        pmulld               m3, [rsp+mmsize*2]
+        pmulld               m4, [rsp+mmsize*3]
+        pmulld               m5, [rsp+mmsize*4]
+        pmulld               m0, [rsp+mmsize*5]
+        paddd                m1, m2
+        paddd                m3, m4
+        paddd                m5, m0
+        paddd                m1, m3
+        paddd                m1, m5
+
+        movu                 m3, [lsrcq+cntq]
+        pshufb               m7, m3, [ex_r]
+        pshufb               m2, m3, [ex_g]
+        pshufb               m3, [ex_b]
+        movu                 m0, [rsrcq+cntq]
+        pshufb               m4, m0, [ex_r]
+        pshufb               m5, m0, [ex_g]
+        pshufb               m0, [ex_b]
+        pmulld               m7, [rsp+mmsize*6]
+        pmulld               m2, [rsp+mmsize*7]
+        pmulld               m3, [rsp+mmsize*8]
+        pmulld               m4, [rsp+mmsize*9]
+        pmulld               m5, [rsp+mmsize*10]
+        pmulld               m0, [rsp+mmsize*11]
+        paddd                m7, m2
+        paddd                m3, m4
+        paddd                m5, m0
+        paddd                m7, m3
+        paddd                m7, m5
+
+        movu                 m4, [lsrcq+cntq]
+        pshufb               m2, m4, [ex_r]
+        pshufb               m3, m4, [ex_g]
+        pshufb               m4, [ex_b]
+        movu                 m0, [rsrcq+cntq]
+        pshufb               m5, m0, [ex_r]
+        pshufb               m6, m0, [ex_g]
+        pshufb               m0, [ex_b]
+        pmulld               m2, m8
+        pmulld               m3, m9
+        pmulld               m4, m10
+        pmulld               m5, m11
+        pmulld               m6, m12
+        pmulld               m0, m13
+        paddd                m2, m3
+        paddd                m4, m5
+        paddd                m6, m0
+        paddd                m2, m4
+        paddd                m2, m6
+
+        psrld                m1, 16
+        psrld                m7, 16
+        psrld                m2, 16
+
+        packusdw             m1, m7
+        packusdw             m2, m2
+        packuswb             m1, m2
+        pshufb               m1, [shuf]
+
+        movq      [dstq+cntq+0], m1
+        psrldq               m1, 8
+        movd      [dstq+cntq+8], m1
+        add                cntd, 12
+        sub                  od, 4
+    jg .loop
+
+    add          dstq, dst_linesizeq
+    add         lsrcq, l_linesizeq
+    add         rsrcq, r_linesizeq
+    sub       heightd, 1
+    jg .nextrow
+REP_RET
diff --git a/libavfilter/x86/vf_stereo3d_init.c b/libavfilter/x86/vf_stereo3d_init.c
new file mode 100644
index 00000000..da160a89
--- /dev/null
+++ b/libavfilter/x86/vf_stereo3d_init.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86/cpu.h"
+
+#include "libavfilter/stereo3d.h"
+
+void ff_anaglyph_sse4(uint8_t *dst, uint8_t *lsrc, uint8_t *rsrc,
+                      ptrdiff_t dst_linesize, ptrdiff_t l_linesize, ptrdiff_t r_linesize,
+                      int width, int height,
+                      const int *ana_matrix_r, const int *ana_matrix_g, const int *ana_matrix_b);
+
+void ff_stereo3d_init_x86(Stereo3DDSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE4(cpu_flags)) {
+        dsp->anaglyph = ff_anaglyph_sse4;
+    }
+}
diff --git a/libavfilter/x86/vf_w3fdif.asm b/libavfilter/x86/vf_w3fdif.asm
new file mode 100644
index 00000000..52628c38
--- /dev/null
+++ b/libavfilter/x86/vf_w3fdif.asm
@@ -0,0 +1,259 @@
+;*****************************************************************************
+;* x86-optimized functions for w3fdif filter
+;*
+;* Copyright (c) 2015 Paul B Mahol
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+INIT_XMM sse2
+cglobal w3fdif_scale, 3, 3, 2, 0, out_pixel, work_pixel, linesize
+.loop:
+    mova                         m0, [work_pixelq]
+    mova                         m1, [work_pixelq+mmsize]
+    psrad                        m0, 15
+    psrad                        m1, 15
+    packssdw                     m0, m1
+    packuswb                     m0, m0
+    movh               [out_pixelq], m0
+    add                  out_pixelq, mmsize/2
+    add                 work_pixelq, mmsize*2
+    sub                   linesized, mmsize/2
+    jg .loop
+REP_RET
+
+cglobal w3fdif_simple_low, 4, 5, 6, 0, work_line, in_lines_cur0, coef, linesize, offset
+    movd                  m1, [coefq]
+    DEFINE_ARGS    work_line, in_lines_cur0, in_lines_cur1, linesize, offset
+    SPLATW                m0, m1, 0
+    SPLATW                m1, m1, 1
+    pxor                  m4, m4
+    mov              offsetq, 0
+    mov       in_lines_cur1q, [in_lines_cur0q + gprsize]
+    mov       in_lines_cur0q, [in_lines_cur0q]
+
+.loop:
+    movh                                   m2, [in_lines_cur0q+offsetq]
+    movh                                   m3, [in_lines_cur1q+offsetq]
+    punpcklbw                              m2, m4
+    punpcklbw                              m3, m4
+    SBUTTERFLY                             wd, 2, 3, 5
+    pmaddwd                                m2, m0
+    pmaddwd                                m3, m1
+    mova               [work_lineq+offsetq*4], m2
+    mova        [work_lineq+offsetq*4+mmsize], m3
+    add                               offsetq, mmsize/2
+    sub                             linesized, mmsize/2
+    jg .loop
+REP_RET
+
+cglobal w3fdif_complex_low, 4, 7, 8, 0, work_line, in_lines_cur0, coef, linesize
+    movq                  m0, [coefq]
+    DEFINE_ARGS    work_line, in_lines_cur0, in_lines_cur1, linesize, offset, in_lines_cur2, in_lines_cur3
+    pshufd                m2, m0, q1111
+    SPLATD                m0
+    pxor                  m1, m1
+    mov              offsetq, 0
+    mov       in_lines_cur3q, [in_lines_cur0q+gprsize*3]
+    mov       in_lines_cur2q, [in_lines_cur0q+gprsize*2]
+    mov       in_lines_cur1q, [in_lines_cur0q+gprsize]
+    mov       in_lines_cur0q, [in_lines_cur0q]
+
+.loop:
+    movh                                   m4, [in_lines_cur0q+offsetq]
+    movh                                   m5, [in_lines_cur1q+offsetq]
+    punpcklbw                              m4, m1
+    punpcklbw                              m5, m1
+    SBUTTERFLY                             wd, 4, 5, 7
+    pmaddwd                                m4, m0
+    pmaddwd                                m5, m0
+    movh                                   m6, [in_lines_cur2q+offsetq]
+    movh                                   m3, [in_lines_cur3q+offsetq]
+    punpcklbw                              m6, m1
+    punpcklbw                              m3, m1
+    SBUTTERFLY                             wd, 6, 3, 7
+    pmaddwd                                m6, m2
+    pmaddwd                                m3, m2
+    paddd                                  m4, m6
+    paddd                                  m5, m3
+    mova               [work_lineq+offsetq*4], m4
+    mova        [work_lineq+offsetq*4+mmsize], m5
+    add                               offsetq, mmsize/2
+    sub                             linesized, mmsize/2
+    jg .loop
+REP_RET
+
+%if ARCH_X86_64
+cglobal w3fdif_simple_high, 5, 9, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
+%else
+cglobal w3fdif_simple_high, 4, 7, 8, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
+%endif
+    movq                  m2, [coefq]
+%if ARCH_X86_64
+    DEFINE_ARGS    work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, linesize, offset, in_lines_cur2, in_lines_adj1, in_lines_adj2
+    xor              offsetq, offsetq
+%else
+    DEFINE_ARGS    work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, in_lines_cur2, in_lines_adj1, in_lines_adj2
+    %define linesized r4mp
+%endif
+
+    pshufd                m0, m2, q0000
+    SPLATW                m2, m2, 2
+    pxor                  m7, m7
+    mov       in_lines_cur2q, [in_lines_cur0q+gprsize*2]
+    mov       in_lines_cur1q, [in_lines_cur0q+gprsize]
+    mov       in_lines_cur0q, [in_lines_cur0q]
+    mov       in_lines_adj2q, [in_lines_adj0q+gprsize*2]
+    mov       in_lines_adj1q, [in_lines_adj0q+gprsize]
+    mov       in_lines_adj0q, [in_lines_adj0q]
+
+%if ARCH_X86_32
+    sub in_lines_cur1q, in_lines_cur0q
+    sub in_lines_cur2q, in_lines_cur0q
+    sub in_lines_adj0q, in_lines_cur0q
+    sub in_lines_adj1q, in_lines_cur0q
+    sub in_lines_adj2q, in_lines_cur0q
+    %define offsetq in_lines_cur0q
+%endif
+
+.loop:
+%if ARCH_X86_64
+    movh                                   m3, [in_lines_cur0q+offsetq]
+%else
+    movh                                   m3, [in_lines_cur0q]
+%endif
+    movh                                   m4, [in_lines_cur1q+offsetq]
+    punpcklbw                              m3, m7
+    punpcklbw                              m4, m7
+    SBUTTERFLY                             wd, 3, 4, 1
+    pmaddwd                                m3, m0
+    pmaddwd                                m4, m0
+    movh                                   m5, [in_lines_adj0q+offsetq]
+    movh                                   m6, [in_lines_adj1q+offsetq]
+    punpcklbw                              m5, m7
+    punpcklbw                              m6, m7
+    SBUTTERFLY                             wd, 5, 6, 1
+    pmaddwd                                m5, m0
+    pmaddwd                                m6, m0
+    paddd                                  m3, m5
+    paddd                                  m4, m6
+    movh                                   m5, [in_lines_cur2q+offsetq]
+    movh                                   m6, [in_lines_adj2q+offsetq]
+    punpcklbw                              m5, m7
+    punpcklbw                              m6, m7
+    SBUTTERFLY                             wd, 5, 6, 1
+    pmaddwd                                m5, m2
+    pmaddwd                                m6, m2
+    paddd                                  m3, m5
+    paddd                                  m4, m6
+%if ARCH_X86_64
+    paddd                                  m3, [work_lineq+offsetq*4]
+    paddd                                  m4, [work_lineq+offsetq*4+mmsize]
+    mova               [work_lineq+offsetq*4], m3
+    mova        [work_lineq+offsetq*4+mmsize], m4
+%else
+    paddd                                  m3, [work_lineq]
+    paddd                                  m4, [work_lineq+mmsize]
+    mova                         [work_lineq], m3
+    mova                  [work_lineq+mmsize], m4
+    add                            work_lineq, mmsize*2
+%endif
+    add                               offsetq, mmsize/2
+    sub                             linesized, mmsize/2
+    jg .loop
+REP_RET
+
+%if ARCH_X86_64
+
+cglobal w3fdif_complex_high, 5, 13, 10, 0, work_line, in_lines_cur0, in_lines_adj0, coef, linesize
+    movq                  m0, [coefq+0]
+    movd                  m4, [coefq+8]
+    DEFINE_ARGS    work_line, in_lines_cur0, in_lines_adj0, in_lines_cur1, linesize, offset, in_lines_cur2, in_lines_cur3, in_lines_cur4, in_lines_adj1, in_lines_adj2, in_lines_adj3, in_lines_adj4
+    pshufd                m1, m0, q1111
+    SPLATD                m0
+    SPLATW                m4, m4
+    pxor                  m3, m3
+    mov              offsetq, 0
+    mov       in_lines_cur4q, [in_lines_cur0q+gprsize*4]
+    mov       in_lines_cur3q, [in_lines_cur0q+gprsize*3]
+    mov       in_lines_cur2q, [in_lines_cur0q+gprsize*2]
+    mov       in_lines_cur1q, [in_lines_cur0q+gprsize]
+    mov       in_lines_cur0q, [in_lines_cur0q]
+    mov       in_lines_adj4q, [in_lines_adj0q+gprsize*4]
+    mov       in_lines_adj3q, [in_lines_adj0q+gprsize*3]
+    mov       in_lines_adj2q, [in_lines_adj0q+gprsize*2]
+    mov       in_lines_adj1q, [in_lines_adj0q+gprsize]
+    mov       in_lines_adj0q, [in_lines_adj0q]
+
+.loop:
+    movh                                   m5, [in_lines_cur0q+offsetq]
+    movh                                   m6, [in_lines_cur1q+offsetq]
+    punpcklbw                              m5, m3
+    punpcklbw                              m6, m3
+    SBUTTERFLY                             wd, 5, 6, 2
+    pmaddwd                                m5, m0
+    pmaddwd                                m6, m0
+    movh                                   m8, [in_lines_cur2q+offsetq]
+    movh                                   m9, [in_lines_cur3q+offsetq]
+    punpcklbw                              m8, m3
+    punpcklbw                              m9, m3
+    SBUTTERFLY                             wd, 8, 9, 2
+    pmaddwd                                m8, m1
+    pmaddwd                                m9, m1
+    paddd                                  m5, m8
+    paddd                                  m6, m9
+    movh                                   m8, [in_lines_adj0q+offsetq]
+    movh                                   m9, [in_lines_adj1q+offsetq]
+    punpcklbw                              m8, m3
+    punpcklbw                              m9, m3
+    SBUTTERFLY                             wd, 8, 9, 2
+    pmaddwd                                m8, m0
+    pmaddwd                                m9, m0
+    paddd                                  m5, m8
+    paddd                                  m6, m9
+    movh                                   m8, [in_lines_adj2q+offsetq]
+    movh                                   m9, [in_lines_adj3q+offsetq]
+    punpcklbw                              m8, m3
+    punpcklbw                              m9, m3
+    SBUTTERFLY                             wd, 8, 9, 2
+    pmaddwd                                m8, m1
+    pmaddwd                                m9, m1
+    paddd                                  m5, m8
+    paddd                                  m6, m9
+    movh                                   m8, [in_lines_cur4q+offsetq]
+    movh                                   m9, [in_lines_adj4q+offsetq]
+    punpcklbw                              m8, m3
+    punpcklbw                              m9, m3
+    SBUTTERFLY                             wd, 8, 9, 2
+    pmaddwd                                m8, m4
+    pmaddwd                                m9, m4
+    paddd                                  m5, m8
+    paddd                                  m6, m9
+    paddd                                  m5, [work_lineq+offsetq*4]
+    paddd                                  m6, [work_lineq+offsetq*4+mmsize]
+    mova               [work_lineq+offsetq*4], m5
+    mova        [work_lineq+offsetq*4+mmsize], m6
+    add                               offsetq, mmsize/2
+    sub                             linesized, mmsize/2
+    jg .loop
+REP_RET
+
+%endif
diff --git a/libavfilter/x86/vf_w3fdif_init.c b/libavfilter/x86/vf_w3fdif_init.c
new file mode 100644
index 00000000..9bf06e84
--- /dev/null
+++ b/libavfilter/x86/vf_w3fdif_init.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavfilter/w3fdif.h"
+
+void ff_w3fdif_simple_low_sse2(int32_t *work_line,
+                               uint8_t *in_lines_cur[2],
+                               const int16_t *coef, int linesize);
+
+void ff_w3fdif_simple_high_sse2(int32_t *work_line,
+                                uint8_t *in_lines_cur[3],
+                                uint8_t *in_lines_adj[3],
+                                const int16_t *coef, int linesize);
+
+void ff_w3fdif_complex_low_sse2(int32_t *work_line,
+                                uint8_t *in_lines_cur[4],
+                                const int16_t *coef, int linesize);
+
+void ff_w3fdif_complex_high_sse2(int32_t *work_line,
+                                 uint8_t *in_lines_cur[5],
+                                 uint8_t *in_lines_adj[5],
+                                 const int16_t *coef, int linesize);
+
+void ff_w3fdif_scale_sse2(uint8_t *out_pixel, const int32_t *work_pixel, int linesize);
+
+av_cold void ff_w3fdif_init_x86(W3FDIFDSPContext *dsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        dsp->filter_simple_low   = ff_w3fdif_simple_low_sse2;
+        dsp->filter_simple_high  = ff_w3fdif_simple_high_sse2;
+        dsp->filter_complex_low  = ff_w3fdif_complex_low_sse2;
+        dsp->filter_scale        = ff_w3fdif_scale_sse2;
+    }
+
+    if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) {
+        dsp->filter_complex_high = ff_w3fdif_complex_high_sse2;
+    }
+}
diff --git a/libavfilter/x86/vf_yadif_init.c b/libavfilter/x86/vf_yadif_init.c
index 1460a642..c36a2d01 100644
--- a/libavfilter/x86/vf_yadif_init.c
+++ b/libavfilter/x86/vf_yadif_init.c
@@ -62,7 +62,7 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif)
 {
     int cpu_flags = av_get_cpu_flags();
     int bit_depth = (!yadif->csp) ? 8
-                                  : yadif->csp->comp[0].depth_minus1 + 1;
+                                  : yadif->csp->comp[0].depth;
 
     if (bit_depth >= 15) {
 #if ARCH_X86_32
diff --git a/libavformat/3dostr.c b/libavformat/3dostr.c
new file mode 100644
index 00000000..5325a03f
--- /dev/null
+++ b/libavformat/3dostr.c
@@ -0,0 +1,168 @@
+/*
+ * 3DO STR demuxer
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "internal.h"
+
+static int threedostr_probe(AVProbeData *p)
+{
+    if (memcmp(p->buf, "CTRL", 4) &&
+        memcmp(p->buf, "SHDR", 4) &&
+        memcmp(p->buf, "SNDS", 4))
+        return 0;
+
+    return AVPROBE_SCORE_MAX / 3 * 2;
+}
+
+static int threedostr_read_header(AVFormatContext *s)
+{
+    unsigned chunk, codec = 0, size, ctrl_size = -1, found_shdr = 0;
+    AVStream *st;
+
+    while (!avio_feof(s->pb) && !found_shdr) {
+        chunk = avio_rl32(s->pb);
+        size  = avio_rb32(s->pb);
+
+        if (size < 8)
+            return AVERROR_INVALIDDATA;
+        size -= 8;
+
+        switch (chunk) {
+        case MKTAG('C','T','R','L'):
+            ctrl_size = size;
+            break;
+        case MKTAG('S','N','D','S'):
+            if (size < 56)
+                return AVERROR_INVALIDDATA;
+            avio_skip(s->pb, 8);
+            if (avio_rl32(s->pb) != MKTAG('S','H','D','R'))
+                return AVERROR_INVALIDDATA;
+            avio_skip(s->pb, 24);
+
+            st = avformat_new_stream(s, NULL);
+            if (!st)
+                return AVERROR(ENOMEM);
+
+            st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
+            st->codec->sample_rate = avio_rb32(s->pb);
+            st->codec->channels    = avio_rb32(s->pb);
+            if (st->codec->channels <= 0)
+                return AVERROR_INVALIDDATA;
+            codec                  = avio_rl32(s->pb);
+            avio_skip(s->pb, 4);
+            if (ctrl_size == 20 || ctrl_size == 3 || ctrl_size == -1)
+                st->duration       = (avio_rb32(s->pb) - 1) / st->codec->channels;
+            else
+                st->duration       = avio_rb32(s->pb) * 16 / st->codec->channels;
+            size -= 56;
+            found_shdr = 1;
+            break;
+        case MKTAG('S','H','D','R'):
+            if (size >  0x78) {
+                avio_skip(s->pb, 0x74);
+                size -= 0x78;
+                if (avio_rl32(s->pb) == MKTAG('C','T','R','L') && size > 4) {
+                    ctrl_size = avio_rb32(s->pb);
+                    size -= 4;
+                }
+            }
+            break;
+        default:
+            av_log(s, AV_LOG_DEBUG, "skipping unknown chunk: %X\n", chunk);
+            break;
+        }
+
+        avio_skip(s->pb, size);
+    }
+
+    switch (codec) {
+    case MKTAG('S','D','X','2'):
+        st->codec->codec_id    = AV_CODEC_ID_SDX2_DPCM;
+        st->codec->block_align = 1 * st->codec->channels;
+        break;
+    default:
+        avpriv_request_sample(s, "codec %X", codec);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
+
+    return 0;
+}
+
+static int threedostr_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    unsigned chunk, size, found_ssmp = 0;
+    AVStream *st = s->streams[0];
+    int64_t pos;
+    int ret = 0;
+
+    while (!found_ssmp) {
+        if (avio_feof(s->pb))
+            return AVERROR_EOF;
+
+        pos   = avio_tell(s->pb);
+        chunk = avio_rl32(s->pb);
+        size  = avio_rb32(s->pb);
+
+        if (!size)
+            continue;
+
+        if (size < 8)
+            return AVERROR_INVALIDDATA;
+        size -= 8;
+
+        switch (chunk) {
+        case MKTAG('S','N','D','S'):
+            if (size <= 16)
+                return AVERROR_INVALIDDATA;
+            avio_skip(s->pb, 8);
+            if (avio_rl32(s->pb) != MKTAG('S','S','M','P'))
+                return AVERROR_INVALIDDATA;
+            avio_skip(s->pb, 4);
+            size -= 16;
+            ret = av_get_packet(s->pb, pkt, size);
+            pkt->pos = pos;
+            pkt->stream_index = 0;
+            pkt->duration = size / st->codec->channels;
+            size = 0;
+            found_ssmp = 1;
+            break;
+        default:
+            av_log(s, AV_LOG_DEBUG, "skipping unknown chunk: %X\n", chunk);
+            break;
+        }
+
+        avio_skip(s->pb, size);
+    }
+
+    return ret;
+}
+
+AVInputFormat ff_threedostr_demuxer = {
+    .name           = "3dostr",
+    .long_name      = NULL_IF_CONFIG_SMALL("3DO STR"),
+    .read_probe     = threedostr_probe,
+    .read_header    = threedostr_read_header,
+    .read_packet    = threedostr_read_packet,
+    .extensions     = "str",
+    .flags          = AVFMT_GENERIC_INDEX,
+};
diff --git a/libavformat/4xm.c b/libavformat/4xm.c
index 8fdad185..70f0def3 100644
--- a/libavformat/4xm.c
+++ b/libavformat/4xm.c
@@ -111,7 +111,7 @@ static int parse_vtrk(AVFormatContext *s,
     st->codec->codec_type     = AVMEDIA_TYPE_VIDEO;
     st->codec->codec_id       = AV_CODEC_ID_4XM;
 
-    st->codec->extradata      = av_mallocz(4 + FF_INPUT_BUFFER_PADDING_SIZE);
+    st->codec->extradata      = av_mallocz(4 + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!st->codec->extradata)
         return AVERROR(ENOMEM);
     st->codec->extradata_size = 4;
@@ -321,7 +321,7 @@ static int fourxm_read_packet(AVFormatContext *s,
             ret = avio_read(s->pb, &pkt->data[8], size);
 
             if (ret < 0) {
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
             } else {
                 packet_read = 1;
                 av_shrink_packet(pkt, ret + 8);
diff --git a/libavformat/Makefile b/libavformat/Makefile
index b9169d94..35a383d3 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -18,6 +18,7 @@ OBJS = allformats.o         \
        mux.o                \
        options.o            \
        os_support.o         \
+       qtpalette.o          \
        riff.o               \
        sdp.o                \
        url.o                \
@@ -59,12 +60,15 @@ OBJS-$(CONFIG_SHARED)                    += log2_tab.o golomb_tab.o
 
 # muxers/demuxers
 OBJS-$(CONFIG_A64_MUXER)                 += a64.o rawenc.o
+OBJS-$(CONFIG_AA_DEMUXER)                += aadec.o
 OBJS-$(CONFIG_AAC_DEMUXER)               += aacdec.o apetag.o img2.o rawdec.o
 OBJS-$(CONFIG_AC3_DEMUXER)               += ac3dec.o rawdec.o
 OBJS-$(CONFIG_AC3_MUXER)                 += rawenc.o
+OBJS-$(CONFIG_ACM_DEMUXER)               += acm.o rawdec.o
 OBJS-$(CONFIG_ACT_DEMUXER)               += act.o
 OBJS-$(CONFIG_ADF_DEMUXER)               += bintext.o sauce.o
 OBJS-$(CONFIG_ADP_DEMUXER)               += adp.o
+OBJS-$(CONFIG_ADS_DEMUXER)               += ads.o
 OBJS-$(CONFIG_ADX_DEMUXER)               += adxdec.o
 OBJS-$(CONFIG_ADX_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_ADTS_MUXER)                += adtsenc.o apetag.o img2.o \
@@ -82,7 +86,9 @@ OBJS-$(CONFIG_APE_DEMUXER)               += ape.o apetag.o img2.o
 OBJS-$(CONFIG_APNG_DEMUXER)              += apngdec.o
 OBJS-$(CONFIG_APNG_MUXER)                += apngenc.o
 OBJS-$(CONFIG_AQTITLE_DEMUXER)           += aqtitledec.o subtitles.o
-OBJS-$(CONFIG_ASF_DEMUXER)               += asfdec.o asf.o asfcrypt.o \
+OBJS-$(CONFIG_ASF_DEMUXER)               += asfdec_f.o asf.o asfcrypt.o \
+                                            avlanguage.o
+OBJS-$(CONFIG_ASF_O_DEMUXER)             += asfdec_o.o asf.o asfcrypt.o \
                                             avlanguage.o
 OBJS-$(CONFIG_ASF_MUXER)                 += asfenc.o asf.o
 OBJS-$(CONFIG_ASS_DEMUXER)               += assdec.o subtitles.o
@@ -105,6 +111,7 @@ OBJS-$(CONFIG_BIT_DEMUXER)               += bit.o
 OBJS-$(CONFIG_BIT_MUXER)                 += bit.o
 OBJS-$(CONFIG_BMV_DEMUXER)               += bmv.o
 OBJS-$(CONFIG_BOA_DEMUXER)               += boadec.o
+OBJS-$(CONFIG_BFSTM_DEMUXER)             += brstm.o
 OBJS-$(CONFIG_BRSTM_DEMUXER)             += brstm.o
 OBJS-$(CONFIG_C93_DEMUXER)               += c93.o vocdec.o voc.o
 OBJS-$(CONFIG_CAF_DEMUXER)               += cafdec.o caf.o mov.o mov_chan.o \
@@ -122,6 +129,7 @@ OBJS-$(CONFIG_DATA_MUXER)                += rawdec.o
 OBJS-$(CONFIG_DASH_MUXER)                += dashenc.o isom.o
 OBJS-$(CONFIG_DAUD_DEMUXER)              += dauddec.o
 OBJS-$(CONFIG_DAUD_MUXER)                += daudenc.o
+OBJS-$(CONFIG_DCSTR_DEMUXER)             += dcstr.o
 OBJS-$(CONFIG_DFA_DEMUXER)               += dfa.o
 OBJS-$(CONFIG_DIRAC_DEMUXER)             += diracdec.o rawdec.o
 OBJS-$(CONFIG_DIRAC_MUXER)               += rawenc.o
@@ -163,6 +171,7 @@ OBJS-$(CONFIG_FOURXM_DEMUXER)            += 4xm.o
 OBJS-$(CONFIG_FRAMECRC_MUXER)            += framecrcenc.o framehash.o
 OBJS-$(CONFIG_FRAMEMD5_MUXER)            += md5enc.o framehash.o
 OBJS-$(CONFIG_FRM_DEMUXER)               += frmdec.o
+OBJS-$(CONFIG_FSB_DEMUXER)               += fsb.o
 OBJS-$(CONFIG_GIF_MUXER)                 += gif.o
 OBJS-$(CONFIG_GIF_DEMUXER)               += gifdec.o
 OBJS-$(CONFIG_GSM_DEMUXER)               += gsmdec.o
@@ -173,6 +182,7 @@ OBJS-$(CONFIG_G722_MUXER)                += rawenc.o
 OBJS-$(CONFIG_G723_1_DEMUXER)            += g723_1.o
 OBJS-$(CONFIG_G723_1_MUXER)              += rawenc.o
 OBJS-$(CONFIG_G729_DEMUXER)              += g729dec.o
+OBJS-$(CONFIG_GENH_DEMUXER)              += genh.o
 OBJS-$(CONFIG_H261_DEMUXER)              += h261dec.o rawdec.o
 OBJS-$(CONFIG_H261_MUXER)                += rawenc.o
 OBJS-$(CONFIG_H263_DEMUXER)              += h263dec.o rawdec.o
@@ -199,6 +209,7 @@ OBJS-$(CONFIG_IMAGE2PIPE_MUXER)          += img2enc.o img2.o
 OBJS-$(CONFIG_IMAGE2_ALIAS_PIX_DEMUXER)  += img2_alias_pix.o
 OBJS-$(CONFIG_IMAGE2_BRENDER_PIX_DEMUXER) += img2_brender_pix.o
 OBJS-$(CONFIG_IMAGE_BMP_PIPE_DEMUXER)     += img2dec.o img2.o
+OBJS-$(CONFIG_IMAGE_DDS_PIPE_DEMUXER)     += img2dec.o img2.o
 OBJS-$(CONFIG_IMAGE_DPX_PIPE_DEMUXER)     += img2dec.o img2.o
 OBJS-$(CONFIG_IMAGE_EXR_PIPE_DEMUXER)     += img2dec.o img2.o
 OBJS-$(CONFIG_IMAGE_J2K_PIPE_DEMUXER)     += img2dec.o img2.o
@@ -219,10 +230,10 @@ OBJS-$(CONFIG_ISS_DEMUXER)               += iss.o
 OBJS-$(CONFIG_IV8_DEMUXER)               += iv8.o
 OBJS-$(CONFIG_IVF_DEMUXER)               += ivfdec.o
 OBJS-$(CONFIG_IVF_MUXER)                 += ivfenc.o
+OBJS-$(CONFIG_IVR_DEMUXER)               += rmdec.o rm.o rmsipr.o
 OBJS-$(CONFIG_JACOSUB_DEMUXER)           += jacosubdec.o subtitles.o
 OBJS-$(CONFIG_JACOSUB_MUXER)             += jacosubenc.o rawenc.o
 OBJS-$(CONFIG_JV_DEMUXER)                += jvdec.o
-OBJS-$(CONFIG_LATM_DEMUXER)              += rawdec.o
 OBJS-$(CONFIG_LATM_MUXER)                += latmenc.o rawenc.o
 OBJS-$(CONFIG_LMLM4_DEMUXER)             += lmlm4.o
 OBJS-$(CONFIG_LOAS_DEMUXER)              += loasdec.o rawdec.o
@@ -246,7 +257,7 @@ OBJS-$(CONFIG_MICRODVD_DEMUXER)          += microdvddec.o subtitles.o
 OBJS-$(CONFIG_MICRODVD_MUXER)            += microdvdenc.o
 OBJS-$(CONFIG_MJPEG_DEMUXER)             += rawdec.o
 OBJS-$(CONFIG_MJPEG_MUXER)               += rawenc.o
-OBJS-$(CONFIG_MLP_DEMUXER)               += rawdec.o
+OBJS-$(CONFIG_MLP_DEMUXER)               += rawdec.o mlpdec.o
 OBJS-$(CONFIG_MLP_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_MLV_DEMUXER)               += mlvdec.o riffdec.o
 OBJS-$(CONFIG_MM_DEMUXER)                += mm.o
@@ -254,7 +265,7 @@ OBJS-$(CONFIG_MMF_DEMUXER)               += mmf.o
 OBJS-$(CONFIG_MMF_MUXER)                 += mmf.o rawenc.o
 OBJS-$(CONFIG_MOV_DEMUXER)               += mov.o isom.o mov_chan.o replaygain.o
 OBJS-$(CONFIG_MOV_MUXER)                 += movenc.o isom.o avc.o hevc.o \
-                                            movenchint.o mov_chan.o rtp.o
+                                            movenchint.o mov_chan.o rtp.o movenccenc.o
 OBJS-$(CONFIG_MP2_MUXER)                 += mp3enc.o rawenc.o id3v2enc.o
 OBJS-$(CONFIG_MP3_DEMUXER)               += mp3dec.o replaygain.o
 OBJS-$(CONFIG_MP3_MUXER)                 += mp3enc.o rawenc.o id3v2enc.o
@@ -274,6 +285,7 @@ OBJS-$(CONFIG_MPEGVIDEO_DEMUXER)         += mpegvideodec.o rawdec.o
 OBJS-$(CONFIG_MPJPEG_DEMUXER)            += mpjpegdec.o
 OBJS-$(CONFIG_MPJPEG_MUXER)              += mpjpeg.o
 OBJS-$(CONFIG_MPL2_DEMUXER)              += mpl2dec.o subtitles.o
+OBJS-$(CONFIG_MSF_DEMUXER)               += msf.o
 OBJS-$(CONFIG_MPSUB_DEMUXER)             += mpsubdec.o subtitles.o
 OBJS-$(CONFIG_MSNWC_TCP_DEMUXER)         += msnwc_tcp.o
 OBJS-$(CONFIG_MTV_DEMUXER)               += mtv.o
@@ -291,6 +303,7 @@ OBJS-$(CONFIG_NUT_MUXER)                 += nutenc.o nut.o
 OBJS-$(CONFIG_NUV_DEMUXER)               += nuv.o
 OBJS-$(CONFIG_OGG_DEMUXER)               += oggdec.o         \
                                             oggparsecelt.o   \
+                                            oggparsedaala.o  \
                                             oggparsedirac.o  \
                                             oggparseflac.o   \
                                             oggparseogm.o    \
@@ -398,7 +411,7 @@ OBJS-$(CONFIG_SDP_DEMUXER)               += rtsp.o
 OBJS-$(CONFIG_SDR2_DEMUXER)              += sdr2.o
 OBJS-$(CONFIG_SEGAFILM_DEMUXER)          += segafilm.o
 OBJS-$(CONFIG_SEGMENT_MUXER)             += segment.o
-OBJS-$(CONFIG_SHORTEN_DEMUXER)           += rawdec.o
+OBJS-$(CONFIG_SHORTEN_DEMUXER)           += shortendec.o rawdec.o
 OBJS-$(CONFIG_SIFF_DEMUXER)              += siff.o
 OBJS-$(CONFIG_SINGLEJPEG_MUXER)          += rawenc.o
 OBJS-$(CONFIG_SMACKER_DEMUXER)           += smacker.o
@@ -420,22 +433,27 @@ OBJS-$(CONFIG_STR_DEMUXER)               += psxstr.o
 OBJS-$(CONFIG_SUBVIEWER1_DEMUXER)        += subviewer1dec.o subtitles.o
 OBJS-$(CONFIG_SUBVIEWER_DEMUXER)         += subviewerdec.o subtitles.o
 OBJS-$(CONFIG_SUP_DEMUXER)               += supdec.o
+OBJS-$(CONFIG_SVAG_DEMUXER)              += svag.o
 OBJS-$(CONFIG_SWF_DEMUXER)               += swfdec.o swf.o
 OBJS-$(CONFIG_SWF_MUXER)                 += swfenc.o swf.o
 OBJS-$(CONFIG_TAK_DEMUXER)               += takdec.o apetag.o img2.o rawdec.o
 OBJS-$(CONFIG_TEDCAPTIONS_DEMUXER)       += tedcaptionsdec.o subtitles.o
 OBJS-$(CONFIG_TEE_MUXER)                 += tee.o
 OBJS-$(CONFIG_THP_DEMUXER)               += thp.o
+OBJS-$(CONFIG_THREEDOSTR_DEMUXER)        += 3dostr.o
 OBJS-$(CONFIG_TIERTEXSEQ_DEMUXER)        += tiertexseq.o
 OBJS-$(CONFIG_MKVTIMESTAMP_V2_MUXER)     += mkvtimestamp_v2.o
 OBJS-$(CONFIG_TMV_DEMUXER)               += tmv.o
-OBJS-$(CONFIG_TRUEHD_DEMUXER)            += rawdec.o
+OBJS-$(CONFIG_TRUEHD_DEMUXER)            += rawdec.o mlpdec.o
 OBJS-$(CONFIG_TRUEHD_MUXER)              += rawenc.o
 OBJS-$(CONFIG_TTA_DEMUXER)               += tta.o apetag.o img2.o
 OBJS-$(CONFIG_TTY_DEMUXER)               += tty.o sauce.o
 OBJS-$(CONFIG_TXD_DEMUXER)               += txd.o
 OBJS-$(CONFIG_UNCODEDFRAMECRC_MUXER)     += uncodedframecrcenc.o framehash.o
-OBJS-$(CONFIG_VC1_DEMUXER)               += rawdec.o
+OBJS-$(CONFIG_V210_DEMUXER)              += v210.o
+OBJS-$(CONFIG_V210X_DEMUXER)             += v210.o
+OBJS-$(CONFIG_VAG_DEMUXER)               += vag.o
+OBJS-$(CONFIG_VC1_DEMUXER)               += rawdec.o vc1dec.o
 OBJS-$(CONFIG_VC1_MUXER)                 += rawenc.o
 OBJS-$(CONFIG_VC1T_DEMUXER)              += vc1test.o
 OBJS-$(CONFIG_VC1T_MUXER)                += vc1testenc.o
@@ -444,6 +462,7 @@ OBJS-$(CONFIG_VMD_DEMUXER)               += sierravmd.o
 OBJS-$(CONFIG_VOBSUB_DEMUXER)            += subtitles.o # mpeg demuxer is in the dependencies
 OBJS-$(CONFIG_VOC_DEMUXER)               += vocdec.o voc.o
 OBJS-$(CONFIG_VOC_MUXER)                 += vocenc.o voc.o
+OBJS-$(CONFIG_VPK_DEMUXER)               += vpk.o
 OBJS-$(CONFIG_VPLAYER_DEMUXER)           += vplayerdec.o subtitles.o
 OBJS-$(CONFIG_VQF_DEMUXER)               += vqf.o
 OBJS-$(CONFIG_W64_DEMUXER)               += wavdec.o w64.o pcm.o
@@ -467,31 +486,34 @@ OBJS-$(CONFIG_WEBVTT_DEMUXER)            += webvttdec.o subtitles.o
 OBJS-$(CONFIG_WEBVTT_MUXER)              += webvttenc.o
 OBJS-$(CONFIG_WSAUD_DEMUXER)             += westwood_aud.o
 OBJS-$(CONFIG_WSVQA_DEMUXER)             += westwood_vqa.o
-OBJS-$(CONFIG_WTV_DEMUXER)               += wtvdec.o wtv_common.o asfdec.o asf.o asfcrypt.o \
+OBJS-$(CONFIG_WTV_DEMUXER)               += wtvdec.o wtv_common.o asf.o \
                                             avlanguage.o mpegts.o isom.o
 OBJS-$(CONFIG_WTV_MUXER)                 += wtvenc.o wtv_common.o \
                                             mpegtsenc.o asf.o
 OBJS-$(CONFIG_WV_DEMUXER)                += wvdec.o wv.o apetag.o img2.o
+OBJS-$(CONFIG_WVE_DEMUXER)               += wvedec.o pcm.o
 OBJS-$(CONFIG_WV_MUXER)                  += wvenc.o wv.o apetag.o img2.o
 OBJS-$(CONFIG_XA_DEMUXER)                += xa.o
 OBJS-$(CONFIG_XBIN_DEMUXER)              += bintext.o sauce.o
 OBJS-$(CONFIG_XMV_DEMUXER)               += xmv.o
+OBJS-$(CONFIG_XVAG_DEMUXER)              += xvag.o
 OBJS-$(CONFIG_XWMA_DEMUXER)              += xwma.o
 OBJS-$(CONFIG_YOP_DEMUXER)               += yop.o
 OBJS-$(CONFIG_YUV4MPEGPIPE_MUXER)        += yuv4mpegenc.o
 OBJS-$(CONFIG_YUV4MPEGPIPE_DEMUXER)      += yuv4mpegdec.o
 
 # external libraries
+OBJS-$(CONFIG_CHROMAPRINT_MUXER)         += chromaprint.o
 OBJS-$(CONFIG_LIBGME_DEMUXER)            += libgme.o
 OBJS-$(CONFIG_LIBMODPLUG_DEMUXER)        += libmodplug.o
 OBJS-$(CONFIG_LIBNUT_DEMUXER)            += libnut.o
 OBJS-$(CONFIG_LIBNUT_MUXER)              += libnut.o
-OBJS-$(CONFIG_LIBQUVI_DEMUXER)           += libquvi.o
 OBJS-$(CONFIG_LIBRTMP)                   += librtmp.o
 OBJS-$(CONFIG_LIBSSH_PROTOCOL)           += libssh.o
 OBJS-$(CONFIG_LIBSMBCLIENT_PROTOCOL)     += libsmbclient.o
 
 # protocols I/O
+OBJS-$(CONFIG_ASYNC_PROTOCOL)            += async.o
 OBJS-$(CONFIG_APPLEHTTP_PROTOCOL)        += hlsproto.o
 OBJS-$(CONFIG_BLURAY_PROTOCOL)           += bluray.o
 OBJS-$(CONFIG_CACHE_PROTOCOL)            += cache.o
@@ -525,6 +547,7 @@ OBJS-$(CONFIG_SUBFILE_PROTOCOL)          += subfile.o
 OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
 OBJS-$(CONFIG_TLS_GNUTLS_PROTOCOL)       += tls_gnutls.o tls.o
 OBJS-$(CONFIG_TLS_OPENSSL_PROTOCOL)      += tls_openssl.o tls.o
+OBJS-$(CONFIG_TLS_SCHANNEL_PROTOCOL)     += tls_schannel.o tls.o
 OBJS-$(CONFIG_TLS_SECURETRANSPORT_PROTOCOL) += tls_securetransport.o tls.o
 OBJS-$(CONFIG_UDP_PROTOCOL)              += udp.o
 OBJS-$(CONFIG_UDPLITE_PROTOCOL)          += udp.o
@@ -541,10 +564,12 @@ SLIBOBJS-$(HAVE_GNU_WINDRES)             += avformatres.o
 SKIPHEADERS-$(CONFIG_FFRTMPCRYPT_PROTOCOL) += rtmpdh.h
 SKIPHEADERS-$(CONFIG_NETWORK)            += network.h rtsp.h
 
-TESTPROGS = seek                                                        \
+TESTPROGS = async                                                       \
+            seek                                                        \
             srtp                                                        \
             url                                                         \
 
+TESTPROGS-$(CONFIG_MOV_MUXER)            += movenc
 TESTPROGS-$(CONFIG_NETWORK)              += noproxy
 TESTPROGS-$(CONFIG_FFRTMPCRYPT_PROTOCOL) += rtmpdh
 
diff --git a/libavformat/aadec.c b/libavformat/aadec.c
new file mode 100644
index 00000000..266a8e85
--- /dev/null
+++ b/libavformat/aadec.c
@@ -0,0 +1,314 @@
+/*
+ * Audible AA demuxer
+ * Copyright (c) 2015 Vesselin Bontchev
+ *
+ * Header parsing is borrowed from https://github.com/jteeuwen/audible project.
+ * Copyright (c) 2001-2014, Jim Teeuwen
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "avformat.h"
+#include "internal.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/tea.h"
+#include "libavutil/opt.h"
+
+#define AA_MAGIC 1469084982 /* this identifies an audible .aa file */
+#define MAX_CODEC_SECOND_SIZE 3982
+#define MAX_TOC_ENTRIES 16
+#define MAX_DICTIONARY_ENTRIES 128
+#define TEA_BLOCK_SIZE 8
+
+typedef struct AADemuxContext {
+    AVClass *class;
+    uint8_t *aa_fixed_key;
+    int aa_fixed_key_len;
+    int codec_second_size;
+    int current_codec_second_size;
+    int chapter_idx;
+    struct AVTEA *tea_ctx;
+    uint8_t file_key[16];
+    int64_t current_chapter_size;
+} AADemuxContext;
+
+static int get_second_size(char *codec_name)
+{
+    int result = -1;
+
+    if (!strcmp(codec_name, "mp332")) {
+        result = 3982;
+    } else if (!strcmp(codec_name, "acelp16")) {
+        result = 2000;
+    } else if (!strcmp(codec_name, "acelp85")) {
+        result = 1045;
+    }
+
+    return result;
+}
+
+static int aa_read_header(AVFormatContext *s)
+{
+    int i, j, idx, largest_idx = -1;
+    uint32_t nkey, nval, toc_size, npairs, header_seed = 0, start;
+    char key[128], val[128], codec_name[64] = {0};
+    uint8_t output[24], dst[8], src[8];
+    int64_t largest_size = -1, current_size = -1;
+    struct toc_entry {
+        uint32_t offset;
+        uint32_t size;
+    } TOC[MAX_TOC_ENTRIES];
+    uint32_t header_key_part[4];
+    uint8_t header_key[16] = {0};
+    AADemuxContext *c = s->priv_data;
+    AVIOContext *pb = s->pb;
+    AVStream *st;
+
+    /* parse .aa header */
+    avio_skip(pb, 4); // file size
+    avio_skip(pb, 4); // magic string
+    toc_size = avio_rb32(pb); // TOC size
+    avio_skip(pb, 4); // unidentified integer
+    if (toc_size > MAX_TOC_ENTRIES)
+        return AVERROR_INVALIDDATA;
+    for (i = 0; i < toc_size; i++) { // read TOC
+        avio_skip(pb, 4); // TOC entry index
+        TOC[i].offset = avio_rb32(pb); // block offset
+        TOC[i].size = avio_rb32(pb); // block size
+    }
+    avio_skip(pb, 24); // header termination block (ignored)
+    npairs = avio_rb32(pb); // read dictionary entries
+    if (npairs > MAX_DICTIONARY_ENTRIES)
+        return AVERROR_INVALIDDATA;
+    for (i = 0; i < npairs; i++) {
+        memset(val, 0, sizeof(val));
+        memset(key, 0, sizeof(key));
+        avio_skip(pb, 1); // unidentified integer
+        nkey = avio_rb32(pb); // key string length
+        nval = avio_rb32(pb); // value string length
+        if (nkey > sizeof(key)) {
+            avio_skip(pb, nkey);
+        } else {
+            avio_read(pb, key, nkey); // key string
+        }
+        if (nval > sizeof(val)) {
+            avio_skip(pb, nval);
+        } else {
+            avio_read(pb, val, nval); // value string
+        }
+        if (!strcmp(key, "codec")) {
+            av_log(s, AV_LOG_DEBUG, "Codec is <%s>\n", val);
+            strncpy(codec_name, val, sizeof(codec_name) - 1);
+        }
+        if (!strcmp(key, "HeaderSeed")) {
+            av_log(s, AV_LOG_DEBUG, "HeaderSeed is <%s>\n", val);
+            header_seed = atoi(val);
+        }
+        if (!strcmp(key, "HeaderKey")) { // this looks like "1234567890 1234567890 1234567890 1234567890"
+            av_log(s, AV_LOG_DEBUG, "HeaderKey is <%s>\n", val);
+            sscanf(val, "%u%u%u%u", &header_key_part[0], &header_key_part[1], &header_key_part[2], &header_key_part[3]);
+            for (idx = 0; idx < 4; idx++) {
+                AV_WB32(&header_key[idx * 4], header_key_part[idx]); // convert each part to BE!
+            }
+            av_log(s, AV_LOG_DEBUG, "Processed HeaderKey is ");
+            for (i = 0; i < 16; i++)
+                av_log(s, AV_LOG_DEBUG, "%02x", header_key[i]);
+            av_log(s, AV_LOG_DEBUG, "\n");
+        }
+    }
+
+    /* verify fixed key */
+    if (c->aa_fixed_key_len != 16) {
+        av_log(s, AV_LOG_ERROR, "aa_fixed_key value needs to be 16 bytes!\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* verify codec */
+    if ((c->codec_second_size = get_second_size(codec_name)) == -1) {
+        av_log(s, AV_LOG_ERROR, "unknown codec <%s>!\n", codec_name);
+        return AVERROR(EINVAL);
+    }
+
+    /* decryption key derivation */
+    c->tea_ctx = av_tea_alloc();
+    if (!c->tea_ctx)
+        return AVERROR(ENOMEM);
+    av_tea_init(c->tea_ctx, c->aa_fixed_key, 16);
+    output[0] = output[1] = 0; // purely for padding purposes
+    memcpy(output + 2, header_key, 16);
+    idx = 0;
+    for (i = 0; i < 3; i++) { // TEA CBC with weird mixed endianness
+        AV_WB32(src, header_seed);
+        AV_WB32(src + 4, header_seed + 1);
+        header_seed += 2;
+        av_tea_crypt(c->tea_ctx, dst, src, 1, NULL, 0); // TEA ECB encrypt
+        for (j = 0; j < TEA_BLOCK_SIZE && idx < 18; j+=1, idx+=1) {
+            output[idx] = output[idx] ^ dst[j];
+        }
+    }
+    memcpy(c->file_key, output + 2, 16); // skip first 2 bytes of output
+    av_log(s, AV_LOG_DEBUG, "File key is ");
+    for (i = 0; i < 16; i++)
+        av_log(s, AV_LOG_DEBUG, "%02x", c->file_key[i]);
+    av_log(s, AV_LOG_DEBUG, "\n");
+
+    /* decoder setup */
+    st = avformat_new_stream(s, NULL);
+    if (!st) {
+        av_freep(&c->tea_ctx);
+        return AVERROR(ENOMEM);
+    }
+    st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
+    if (!strcmp(codec_name, "mp332")) {
+        st->codec->codec_id = AV_CODEC_ID_MP3;
+        st->codec->sample_rate = 22050;
+        st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
+        st->start_time = 0;
+    } else if (!strcmp(codec_name, "acelp85")) {
+        st->codec->codec_id = AV_CODEC_ID_SIPR;
+        st->codec->block_align = 19;
+        st->codec->channels = 1;
+        st->codec->sample_rate = 8500;
+    } else if (!strcmp(codec_name, "acelp16")) {
+        st->codec->codec_id = AV_CODEC_ID_SIPR;
+        st->codec->block_align = 20;
+        st->codec->channels = 1;
+        st->codec->sample_rate = 16000;
+    }
+
+    /* determine, and jump to audio start offset */
+    for (i = 1; i < toc_size; i++) { // skip the first entry!
+        current_size = TOC[i].size;
+        if (current_size > largest_size) {
+            largest_idx = i;
+            largest_size = current_size;
+        }
+    }
+    start = TOC[largest_idx].offset;
+    avio_seek(pb, start, SEEK_SET);
+    c->current_chapter_size = 0;
+
+    return 0;
+}
+
+static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    uint8_t dst[TEA_BLOCK_SIZE];
+    uint8_t src[TEA_BLOCK_SIZE];
+    int i;
+    int trailing_bytes;
+    int blocks;
+    uint8_t buf[MAX_CODEC_SECOND_SIZE * 2];
+    int written = 0;
+    int ret;
+    AADemuxContext *c = s->priv_data;
+
+    // are we at the start of a chapter?
+    if (c->current_chapter_size == 0) {
+        c->current_chapter_size = avio_rb32(s->pb);
+        if (c->current_chapter_size == 0) {
+            return AVERROR_EOF;
+        }
+        av_log(s, AV_LOG_DEBUG, "Chapter %d (%" PRId64 " bytes)\n", c->chapter_idx, c->current_chapter_size);
+        c->chapter_idx = c->chapter_idx + 1;
+        avio_skip(s->pb, 4); // data start offset
+        c->current_codec_second_size = c->codec_second_size;
+    }
+
+    // is this the last block in this chapter?
+    if (c->current_chapter_size / c->current_codec_second_size == 0) {
+        c->current_codec_second_size = c->current_chapter_size % c->current_codec_second_size;
+    }
+
+    // decrypt c->current_codec_second_size bytes
+    blocks = c->current_codec_second_size / TEA_BLOCK_SIZE;
+    for (i = 0; i < blocks; i++) {
+        avio_read(s->pb, src, TEA_BLOCK_SIZE);
+        av_tea_init(c->tea_ctx, c->file_key, 16);
+        av_tea_crypt(c->tea_ctx, dst, src, 1, NULL, 1);
+        memcpy(buf + written, dst, TEA_BLOCK_SIZE);
+        written = written + TEA_BLOCK_SIZE;
+    }
+    trailing_bytes = c->current_codec_second_size % TEA_BLOCK_SIZE;
+    if (trailing_bytes != 0) { // trailing bytes are left unencrypted!
+        avio_read(s->pb, src, trailing_bytes);
+        memcpy(buf + written, src, trailing_bytes);
+        written = written + trailing_bytes;
+    }
+
+    // update state
+    c->current_chapter_size = c->current_chapter_size - c->current_codec_second_size;
+    if (c->current_chapter_size <= 0)
+        c->current_chapter_size = 0;
+
+    ret = av_new_packet(pkt, written);
+    if (ret < 0)
+        return ret;
+    memcpy(pkt->data, buf, written);
+
+    return 0;
+}
+
+static int aa_probe(AVProbeData *p)
+{
+    uint8_t *buf = p->buf;
+
+    // first 4 bytes are file size, next 4 bytes are the magic
+    if (AV_RB32(buf+4) != AA_MAGIC)
+        return 0;
+
+    return AVPROBE_SCORE_MAX / 2;
+}
+
+static int aa_read_close(AVFormatContext *s)
+{
+    AADemuxContext *c = s->priv_data;
+
+    av_freep(&c->tea_ctx);
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(AADemuxContext, x)
+static const AVOption aa_options[] = {
+    { "aa_fixed_key", // extracted from libAAX_SDK.so and AAXSDKWin.dll files!
+        "Fixed key used for handling Audible AA files", OFFSET(aa_fixed_key),
+        AV_OPT_TYPE_BINARY, {.str="77214d4b196a87cd520045fd2a51d673"},
+        .flags = AV_OPT_FLAG_DECODING_PARAM },
+    { NULL },
+};
+
+static const AVClass aa_class = {
+    .class_name = "aa",
+    .item_name  = av_default_item_name,
+    .option     = aa_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVInputFormat ff_aa_demuxer = {
+    .name           = "aa",
+    .long_name      = NULL_IF_CONFIG_SMALL("Audible AA format files"),
+    .priv_class     = &aa_class,
+    .priv_data_size = sizeof(AADemuxContext),
+    .extensions     = "aa",
+    .read_probe     = aa_probe,
+    .read_header    = aa_read_header,
+    .read_packet    = aa_read_packet,
+    .read_close     = aa_read_close,
+    .flags          = AVFMT_GENERIC_INDEX,
+};
diff --git a/libavformat/ac3dec.c b/libavformat/ac3dec.c
index 58ef44d4..363a32e0 100644
--- a/libavformat/ac3dec.c
+++ b/libavformat/ac3dec.c
@@ -55,7 +55,7 @@ static int ac3_eac3_probe(AVProbeData *p, enum AVCodecID expected_codec_id)
                 init_get_bits(&gbc, buf3, 54);
             }else
                 init_get_bits(&gbc, buf2, 54);
-            if(avpriv_ac3_parse_header2(&gbc, &phdr) < 0)
+            if(avpriv_ac3_parse_header(&gbc, &phdr) < 0)
                 break;
             if(buf2 + phdr->frame_size > end)
                 break;
@@ -80,7 +80,7 @@ static int ac3_eac3_probe(AVProbeData *p, enum AVCodecID expected_codec_id)
     if(codec_id != expected_codec_id) return 0;
     // keep this in sync with mp3 probe, both need to avoid
     // issues with MPEG-files!
-    if   (first_frames>=4) return AVPROBE_SCORE_EXTENSION + 1;
+    if   (first_frames>=7) return AVPROBE_SCORE_EXTENSION + 1;
     else if(max_frames>200)return AVPROBE_SCORE_EXTENSION;
     else if(max_frames>=4) return AVPROBE_SCORE_EXTENSION/2;
     else if(max_frames>=1) return 1;
diff --git a/libavformat/acm.c b/libavformat/acm.c
new file mode 100644
index 00000000..afcafa8d
--- /dev/null
+++ b/libavformat/acm.c
@@ -0,0 +1,75 @@
+/*
+ * ACM demuxer
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avformat.h"
+#include "rawdec.h"
+#include "internal.h"
+
+static int acm_probe(AVProbeData *p)
+{
+    if (AV_RB32(p->buf) != 0x97280301)
+        return 0;
+
+    return AVPROBE_SCORE_MAX / 3 * 2;
+}
+
+static int acm_read_header(AVFormatContext *s)
+{
+    AVStream *st;
+    int ret;
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
+    st->codec->codec_id   = AV_CODEC_ID_INTERPLAY_ACM;
+
+    ff_alloc_extradata(st->codec, 14);
+    if (!st->codec->extradata)
+        return AVERROR(ENOMEM);
+    ret = avio_read(s->pb, st->codec->extradata, 14);
+    if (ret < 10)
+        return ret < 0 ? ret : AVERROR_EOF;
+
+    st->codec->channels    = AV_RL16(st->codec->extradata +  8);
+    st->codec->sample_rate = AV_RL16(st->codec->extradata + 10);
+    if (st->codec->channels <= 0 || st->codec->sample_rate <= 0)
+        return AVERROR_INVALIDDATA;
+    st->start_time         = 0;
+    st->duration           = AV_RL32(st->codec->extradata +  4) / st->codec->channels;
+    st->need_parsing       = AVSTREAM_PARSE_FULL_RAW;
+    avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
+
+    return 0;
+}
+
+AVInputFormat ff_acm_demuxer = {
+    .name           = "acm",
+    .long_name      = NULL_IF_CONFIG_SMALL("Interplay ACM"),
+    .read_probe     = acm_probe,
+    .read_header    = acm_read_header,
+    .read_packet    = ff_raw_read_partial_packet,
+    .flags          = AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH | AVFMT_NO_BYTE_SEEK | AVFMT_NOTIMESTAMPS,
+    .extensions     = "acm",
+    .raw_codec_id   = AV_CODEC_ID_INTERPLAY_ACM,
+};
diff --git a/libavformat/act.c b/libavformat/act.c
index 7b6b8406..35aacbc4 100644
--- a/libavformat/act.c
+++ b/libavformat/act.c
@@ -75,7 +75,7 @@ static int read_header(AVFormatContext *s)
 
     avio_skip(pb, 16);
     size=avio_rl32(pb);
-    ff_get_wav_header(pb, st->codec, size, 0);
+    ff_get_wav_header(s, pb, st->codec, size, 0);
 
     /*
       8000Hz (Fine-rec) file format has 10 bytes long
diff --git a/libavformat/adp.c b/libavformat/adp.c
index 8a3661ae..f53375ae 100644
--- a/libavformat/adp.c
+++ b/libavformat/adp.c
@@ -78,7 +78,7 @@ static int adp_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     if (ret != size) {
         if (ret < 0) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return ret;
         }
         av_shrink_packet(pkt, ret);
diff --git a/libavformat/ads.c b/libavformat/ads.c
new file mode 100644
index 00000000..bda2673b
--- /dev/null
+++ b/libavformat/ads.c
@@ -0,0 +1,89 @@
+/*
+ * ADS/SS2 demuxer
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/channel_layout.h"
+#include "avformat.h"
+#include "internal.h"
+
+static int ads_probe(AVProbeData *p)
+{
+    if (memcmp(p->buf, "SShd", 4) ||
+        memcmp(p->buf+32, "SSbd", 4))
+        return 0;
+
+    return AVPROBE_SCORE_MAX / 3 * 2;
+}
+
+static int ads_read_header(AVFormatContext *s)
+{
+    int align, codec, size;
+    AVStream *st;
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    avio_skip(s->pb, 8);
+    st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
+    codec                  = avio_rl32(s->pb);
+    st->codec->sample_rate = avio_rl32(s->pb);
+    if (st->codec->sample_rate <= 0)
+        return AVERROR_INVALIDDATA;
+    st->codec->channels    = avio_rl32(s->pb);
+    if (st->codec->channels <= 0)
+        return AVERROR_INVALIDDATA;
+    align                  = avio_rl32(s->pb);
+    if (align <= 0 || align > INT_MAX / st->codec->channels)
+        return AVERROR_INVALIDDATA;
+
+    if (codec == 1)
+        st->codec->codec_id = AV_CODEC_ID_PCM_S16LE_PLANAR;
+    else
+        st->codec->codec_id = AV_CODEC_ID_ADPCM_PSX;
+
+    st->codec->block_align = st->codec->channels * align;
+    avio_skip(s->pb, 12);
+    size = avio_rl32(s->pb);
+    if (st->codec->codec_id == AV_CODEC_ID_ADPCM_PSX)
+        st->duration = (size - 0x40) / 16 / st->codec->channels * 28;
+    avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
+
+    return 0;
+}
+
+static int ads_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    AVCodecContext *codec = s->streams[0]->codec;
+    int ret;
+
+    ret = av_get_packet(s->pb, pkt, codec->block_align);
+    pkt->stream_index = 0;
+    return ret;
+}
+
+AVInputFormat ff_ads_demuxer = {
+    .name           = "ads",
+    .long_name      = NULL_IF_CONFIG_SMALL("Sony PS2 ADS"),
+    .read_probe     = ads_probe,
+    .read_header    = ads_read_header,
+    .read_packet    = ads_read_packet,
+    .extensions     = "ads,ss2",
+};
diff --git a/libavformat/adtsenc.c b/libavformat/adtsenc.c
index 7448ec79..242d7383 100644
--- a/libavformat/adtsenc.c
+++ b/libavformat/adtsenc.c
@@ -183,8 +183,8 @@ static int adts_write_trailer(AVFormatContext *s)
 #define ENC AV_OPT_FLAG_ENCODING_PARAM
 #define OFFSET(obj) offsetof(ADTSContext, obj)
 static const AVOption options[] = {
-    { "write_id3v2", "Enable ID3v2 tag writing", OFFSET(id3v2tag), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, ENC},
-    { "write_apetag", "Enable APE tag writing", OFFSET(apetag), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, ENC},
+    { "write_id3v2",  "Enable ID3v2 tag writing", OFFSET(id3v2tag), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, ENC},
+    { "write_apetag", "Enable APE tag writing",   OFFSET(apetag),   AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, ENC},
     { NULL },
 };
 
diff --git a/libavformat/adxdec.c b/libavformat/adxdec.c
index e7107ac5..05cef0b0 100644
--- a/libavformat/adxdec.c
+++ b/libavformat/adxdec.c
@@ -34,6 +34,19 @@ typedef struct ADXDemuxerContext {
     int header_size;
 } ADXDemuxerContext;
 
+static int adx_probe(AVProbeData *p)
+{
+    int offset;
+    if (AV_RB16(p->buf) != 0x8000)
+        return 0;
+    offset = AV_RB16(&p->buf[2]);
+    if (   offset < 8
+        || offset > p->buf_size - 4
+        || memcmp(p->buf + offset - 2, "(c)CRI", 6))
+        return 0;
+    return AVPROBE_SCORE_MAX * 3 / 4;
+}
+
 static int adx_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     ADXDemuxerContext *c = s->priv_data;
@@ -52,11 +65,11 @@ static int adx_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     ret = av_get_packet(s->pb, pkt, size);
     if (ret != size) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return ret < 0 ? ret : AVERROR(EIO);
     }
     if (AV_RB16(pkt->data) & 0x8000) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return AVERROR_EOF;
     }
     pkt->size     = size;
@@ -107,6 +120,7 @@ static int adx_read_header(AVFormatContext *s)
 AVInputFormat ff_adx_demuxer = {
     .name           = "adx",
     .long_name      = NULL_IF_CONFIG_SMALL("CRI ADX"),
+    .read_probe     = adx_probe,
     .priv_data_size = sizeof(ADXDemuxerContext),
     .read_header    = adx_read_header,
     .read_packet    = adx_read_packet,
diff --git a/libavformat/aiff.h b/libavformat/aiff.h
index 44702540..2a87d6e8 100644
--- a/libavformat/aiff.h
+++ b/libavformat/aiff.h
@@ -52,6 +52,8 @@ static const AVCodecTag ff_codec_aiff_tags[] = {
     { AV_CODEC_ID_ADPCM_IMA_QT, MKTAG('i','m','a','4') },
     { AV_CODEC_ID_QDM2,         MKTAG('Q','D','M','2') },
     { AV_CODEC_ID_QCELP,        MKTAG('Q','c','l','p') },
+    { AV_CODEC_ID_SDX2_DPCM,    MKTAG('S','D','X','2') },
+    { AV_CODEC_ID_ADPCM_IMA_WS, MKTAG('A','D','P','4') },
     { AV_CODEC_ID_NONE,         0 },
 };
 
diff --git a/libavformat/aiffdec.c b/libavformat/aiffdec.c
index ff04c2b8..34b266d3 100644
--- a/libavformat/aiffdec.c
+++ b/libavformat/aiffdec.c
@@ -91,7 +91,7 @@ static void get_meta(AVFormatContext *s, const char *key, int size)
 }
 
 /* Returns the number of sound data frames or negative on error */
-static unsigned int get_aiff_header(AVFormatContext *s, int size,
+static int get_aiff_header(AVFormatContext *s, int size,
                                     unsigned version)
 {
     AVIOContext *pb        = s->pb;
@@ -99,7 +99,7 @@ static unsigned int get_aiff_header(AVFormatContext *s, int size,
     AIFFInputContext *aiff = s->priv_data;
     int exp;
     uint64_t val;
-    double sample_rate;
+    int sample_rate;
     unsigned int num_frames;
 
     if (size & 1)
@@ -109,9 +109,16 @@ static unsigned int get_aiff_header(AVFormatContext *s, int size,
     num_frames = avio_rb32(pb);
     codec->bits_per_coded_sample = avio_rb16(pb);
 
-    exp = avio_rb16(pb);
+    exp = avio_rb16(pb) - 16383 - 63;
     val = avio_rb64(pb);
-    sample_rate = ldexp(val, exp - 16383 - 63);
+    if (exp <-63 || exp >63) {
+        av_log(s, AV_LOG_ERROR, "exp %d is out of range\n", exp);
+        return AVERROR_INVALIDDATA;
+    }
+    if (exp >= 0)
+        sample_rate = val << exp;
+    else
+        sample_rate = (val + (1ULL<<(-exp-1))) >> -exp;
     codec->sample_rate = sample_rate;
     size -= 18;
 
@@ -121,6 +128,11 @@ static unsigned int get_aiff_header(AVFormatContext *s, int size,
     } else if (version == AIFF_C_VERSION1) {
         codec->codec_tag = avio_rl32(pb);
         codec->codec_id  = ff_codec_get_id(ff_codec_aiff_tags, codec->codec_tag);
+        if (codec->codec_id == AV_CODEC_ID_NONE) {
+            char tag[32];
+            av_get_codec_tag_string(tag, sizeof(tag), codec->codec_tag);
+            avpriv_request_sample(s, "unknown or unsupported codec tag: %s", tag);
+        }
         size -= 4;
     }
 
@@ -145,8 +157,10 @@ static unsigned int get_aiff_header(AVFormatContext *s, int size,
             break;
         case AV_CODEC_ID_ADPCM_G726LE:
             codec->bits_per_coded_sample = 5;
+        case AV_CODEC_ID_ADPCM_IMA_WS:
         case AV_CODEC_ID_ADPCM_G722:
         case AV_CODEC_ID_MACE6:
+        case AV_CODEC_ID_SDX2_DPCM:
             codec->block_align = 1*codec->channels;
             break;
         case AV_CODEC_ID_GSM:
diff --git a/libavformat/aiffenc.c b/libavformat/aiffenc.c
index e2828e75..25dc5e62 100644
--- a/libavformat/aiffenc.c
+++ b/libavformat/aiffenc.c
@@ -112,7 +112,7 @@ static int aiff_write_header(AVFormatContext *s)
         if (aiff->audio_stream_idx < 0 && st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
             aiff->audio_stream_idx = i;
         } else if (st->codec->codec_type != AVMEDIA_TYPE_VIDEO) {
-            av_log(s, AV_LOG_ERROR, "Only audio streams and pictures are allowed in AIFF.\n");
+            av_log(s, AV_LOG_ERROR, "AIFF allows only one audio stream and a picture.\n");
             return AVERROR(EINVAL);
         }
     }
@@ -293,7 +293,7 @@ static int aiff_write_trailer(AVFormatContext *s)
 
     while (pict_list) {
         AVPacketList *next = pict_list->next;
-        av_free_packet(&pict_list->pkt);
+        av_packet_unref(&pict_list->pkt);
         av_freep(&pict_list);
         pict_list = next;
     }
@@ -305,7 +305,7 @@ static int aiff_write_trailer(AVFormatContext *s)
 #define ENC AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
     { "write_id3v2", "Enable ID3 tags writing.",
-      OFFSET(write_id3v2), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, ENC },
+      OFFSET(write_id3v2), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, ENC },
     { "id3v2_version", "Select ID3v2 version to write. Currently 3 and 4 are supported.",
       OFFSET(id3v2_version), AV_OPT_TYPE_INT, {.i64 = 4}, 3, 4, ENC },
     { NULL },
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 3a49650c..02bb16a7 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -60,11 +60,14 @@ void av_register_all(void)
 
     /* (de)muxers */
     REGISTER_MUXER   (A64,              a64);
+    REGISTER_DEMUXER (AA,               aa);
     REGISTER_DEMUXER (AAC,              aac);
     REGISTER_MUXDEMUX(AC3,              ac3);
+    REGISTER_DEMUXER (ACM,              acm);
     REGISTER_DEMUXER (ACT,              act);
     REGISTER_DEMUXER (ADF,              adf);
     REGISTER_DEMUXER (ADP,              adp);
+    REGISTER_DEMUXER (ADS,              ads);
     REGISTER_MUXER   (ADTS,             adts);
     REGISTER_MUXDEMUX(ADX,              adx);
     REGISTER_DEMUXER (AEA,              aea);
@@ -77,6 +80,7 @@ void av_register_all(void)
     REGISTER_MUXDEMUX(APNG,             apng);
     REGISTER_DEMUXER (AQTITLE,          aqtitle);
     REGISTER_MUXDEMUX(ASF,              asf);
+    REGISTER_DEMUXER (ASF_O,            asf_o);
     REGISTER_MUXDEMUX(ASS,              ass);
     REGISTER_MUXDEMUX(AST,              ast);
     REGISTER_MUXER   (ASF_STREAM,       asf_stream);
@@ -92,6 +96,7 @@ void av_register_all(void)
     REGISTER_DEMUXER (BINK,             bink);
     REGISTER_MUXDEMUX(BIT,              bit);
     REGISTER_DEMUXER (BMV,              bmv);
+    REGISTER_DEMUXER (BFSTM,            bfstm);
     REGISTER_DEMUXER (BRSTM,            brstm);
     REGISTER_DEMUXER (BOA,              boa);
     REGISTER_DEMUXER (C93,              c93);
@@ -105,6 +110,7 @@ void av_register_all(void)
     REGISTER_MUXER   (DASH,             dash);
     REGISTER_MUXDEMUX(DATA,             data);
     REGISTER_MUXDEMUX(DAUD,             daud);
+    REGISTER_DEMUXER (DCSTR,            dcstr);
     REGISTER_DEMUXER (DFA,              dfa);
     REGISTER_MUXDEMUX(DIRAC,            dirac);
     REGISTER_MUXDEMUX(DNXHD,            dnxhd);
@@ -132,9 +138,11 @@ void av_register_all(void)
     REGISTER_MUXER   (FRAMECRC,         framecrc);
     REGISTER_MUXER   (FRAMEMD5,         framemd5);
     REGISTER_DEMUXER (FRM,              frm);
+    REGISTER_DEMUXER (FSB,              fsb);
     REGISTER_MUXDEMUX(G722,             g722);
     REGISTER_MUXDEMUX(G723_1,           g723_1);
     REGISTER_DEMUXER (G729,             g729);
+    REGISTER_DEMUXER (GENH,             genh);
     REGISTER_MUXDEMUX(GIF,              gif);
     REGISTER_DEMUXER (GSM,              gsm);
     REGISTER_MUXDEMUX(GXF,              gxf);
@@ -162,9 +170,10 @@ void av_register_all(void)
     REGISTER_DEMUXER (ISS,              iss);
     REGISTER_DEMUXER (IV8,              iv8);
     REGISTER_MUXDEMUX(IVF,              ivf);
+    REGISTER_DEMUXER (IVR,              ivr);
     REGISTER_MUXDEMUX(JACOSUB,          jacosub);
     REGISTER_DEMUXER (JV,               jv);
-    REGISTER_MUXDEMUX(LATM,             latm);
+    REGISTER_MUXER   (LATM,             latm);
     REGISTER_DEMUXER (LMLM4,            lmlm4);
     REGISTER_DEMUXER (LOAS,             loas);
     REGISTER_MUXDEMUX(LRC,              lrc);
@@ -201,6 +210,7 @@ void av_register_all(void)
     REGISTER_MUXDEMUX(MPJPEG,           mpjpeg);
     REGISTER_DEMUXER (MPL2,             mpl2);
     REGISTER_DEMUXER (MPSUB,            mpsub);
+    REGISTER_DEMUXER (MSF,              msf);
     REGISTER_DEMUXER (MSNWC_TCP,        msnwc_tcp);
     REGISTER_DEMUXER (MTV,              mtv);
     REGISTER_DEMUXER (MV,               mv);
@@ -289,6 +299,7 @@ void av_register_all(void)
     REGISTER_DEMUXER (SUBVIEWER1,       subviewer1);
     REGISTER_DEMUXER (SUBVIEWER,        subviewer);
     REGISTER_DEMUXER (SUP,              sup);
+    REGISTER_DEMUXER (SVAG,             svag);
     REGISTER_MUXDEMUX(SWF,              swf);
     REGISTER_DEMUXER (TAK,              tak);
     REGISTER_MUXER   (TEE,              tee);
@@ -296,6 +307,7 @@ void av_register_all(void)
     REGISTER_MUXER   (TG2,              tg2);
     REGISTER_MUXER   (TGP,              tgp);
     REGISTER_DEMUXER (THP,              thp);
+    REGISTER_DEMUXER (THREEDOSTR,       threedostr);
     REGISTER_DEMUXER (TIERTEXSEQ,       tiertexseq);
     REGISTER_MUXER   (MKVTIMESTAMP_V2,  mkvtimestamp_v2);
     REGISTER_DEMUXER (TMV,              tmv);
@@ -304,12 +316,16 @@ void av_register_all(void)
     REGISTER_DEMUXER (TXD,              txd);
     REGISTER_DEMUXER (TTY,              tty);
     REGISTER_MUXER   (UNCODEDFRAMECRC,  uncodedframecrc);
+    REGISTER_DEMUXER (V210,             v210);
+    REGISTER_DEMUXER (V210X,            v210x);
+    REGISTER_DEMUXER (VAG,              vag);
     REGISTER_MUXDEMUX(VC1,              vc1);
     REGISTER_MUXDEMUX(VC1T,             vc1t);
     REGISTER_DEMUXER (VIVO,             vivo);
     REGISTER_DEMUXER (VMD,              vmd);
     REGISTER_DEMUXER (VOBSUB,           vobsub);
     REGISTER_MUXDEMUX(VOC,              voc);
+    REGISTER_DEMUXER (VPK,              vpk);
     REGISTER_DEMUXER (VPLAYER,          vplayer);
     REGISTER_DEMUXER (VQF,              vqf);
     REGISTER_MUXDEMUX(W64,              w64);
@@ -323,16 +339,19 @@ void av_register_all(void)
     REGISTER_DEMUXER (WSAUD,            wsaud);
     REGISTER_DEMUXER (WSVQA,            wsvqa);
     REGISTER_MUXDEMUX(WTV,              wtv);
+    REGISTER_DEMUXER (WVE,              wve);
     REGISTER_MUXDEMUX(WV,               wv);
     REGISTER_DEMUXER (XA,               xa);
     REGISTER_DEMUXER (XBIN,             xbin);
     REGISTER_DEMUXER (XMV,              xmv);
+    REGISTER_DEMUXER (XVAG,             xvag);
     REGISTER_DEMUXER (XWMA,             xwma);
     REGISTER_DEMUXER (YOP,              yop);
     REGISTER_MUXDEMUX(YUV4MPEGPIPE,     yuv4mpegpipe);
 
     /* image demuxers */
     REGISTER_DEMUXER (IMAGE_BMP_PIPE,        image_bmp_pipe);
+    REGISTER_DEMUXER (IMAGE_DDS_PIPE,        image_dds_pipe);
     REGISTER_DEMUXER (IMAGE_DPX_PIPE,        image_dpx_pipe);
     REGISTER_DEMUXER (IMAGE_EXR_PIPE,        image_exr_pipe);
     REGISTER_DEMUXER (IMAGE_J2K_PIPE,        image_j2k_pipe);
@@ -348,6 +367,7 @@ void av_register_all(void)
 
 
     /* protocols */
+    REGISTER_PROTOCOL(ASYNC,            async);
     REGISTER_PROTOCOL(BLURAY,           bluray);
     REGISTER_PROTOCOL(CACHE,            cache);
     REGISTER_PROTOCOL(CONCAT,           concat);
@@ -378,6 +398,7 @@ void av_register_all(void)
     REGISTER_PROTOCOL(SRTP,             srtp);
     REGISTER_PROTOCOL(SUBFILE,          subfile);
     REGISTER_PROTOCOL(TCP,              tcp);
+    REGISTER_PROTOCOL(TLS_SCHANNEL,     tls_schannel);
     REGISTER_PROTOCOL(TLS_SECURETRANSPORT, tls_securetransport);
     REGISTER_PROTOCOL(TLS_GNUTLS,       tls_gnutls);
     REGISTER_PROTOCOL(TLS_OPENSSL,      tls_openssl);
@@ -386,10 +407,10 @@ void av_register_all(void)
     REGISTER_PROTOCOL(UNIX,             unix);
 
     /* external libraries */
+    REGISTER_MUXER   (CHROMAPRINT,      chromaprint);
     REGISTER_DEMUXER (LIBGME,           libgme);
     REGISTER_DEMUXER (LIBMODPLUG,       libmodplug);
     REGISTER_MUXDEMUX(LIBNUT,           libnut);
-    REGISTER_DEMUXER (LIBQUVI,          libquvi);
     REGISTER_PROTOCOL(LIBRTMP,          librtmp);
     REGISTER_PROTOCOL(LIBRTMPE,         librtmpe);
     REGISTER_PROTOCOL(LIBRTMPS,         librtmps);
diff --git a/libavformat/amr.c b/libavformat/amr.c
index 8f44de1b..137df110 100644
--- a/libavformat/amr.c
+++ b/libavformat/amr.c
@@ -155,7 +155,7 @@ static int amr_read_packet(AVFormatContext *s, AVPacket *pkt)
     read              = avio_read(s->pb, pkt->data + 1, size - 1);
 
     if (read != size - 1) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return AVERROR(EIO);
     }
 
diff --git a/libavformat/anm.c b/libavformat/anm.c
index f7187973..23200474 100644
--- a/libavformat/anm.c
+++ b/libavformat/anm.c
@@ -133,7 +133,7 @@ static int read_header(AVFormatContext *s)
 
     /* color cycling and palette data */
     st->codec->extradata_size = 16*8 + 4*256;
-    st->codec->extradata      = av_mallocz(st->codec->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    st->codec->extradata      = av_mallocz(st->codec->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!st->codec->extradata) {
         return AVERROR(ENOMEM);
     }
diff --git a/libavformat/ape.c b/libavformat/ape.c
index 69b001a5..3e819728 100644
--- a/libavformat/ape.c
+++ b/libavformat/ape.c
@@ -337,7 +337,7 @@ static int ape_read_header(AVFormatContext * s)
 
     ape_dumpinfo(s, ape);
 
-    av_log(s, AV_LOG_DEBUG, "Decoding file - v%d.%02d, compression level %"PRIu16"\n",
+    av_log(s, AV_LOG_VERBOSE, "Decoding file - v%d.%02d, compression level %"PRIu16"\n",
            ape->fileversion / 1000, (ape->fileversion % 1000) / 10,
            ape->compressiontype);
 
@@ -418,7 +418,7 @@ static int ape_read_packet(AVFormatContext * s, AVPacket * pkt)
     AV_WL32(pkt->data + 4, ape->frames[ape->currentframe].skip);
     ret = avio_read(s->pb, pkt->data + extra_size, ape->frames[ape->currentframe].size);
     if (ret < 0) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return ret;
     }
 
diff --git a/libavformat/apetag.c b/libavformat/apetag.c
index 26359205..2ee277f8 100644
--- a/libavformat/apetag.c
+++ b/libavformat/apetag.c
@@ -55,7 +55,7 @@ static int ape_tag_read_field(AVFormatContext *s)
         av_log(s, AV_LOG_WARNING, "Invalid APE tag key '%s'.\n", key);
         return -1;
     }
-    if (size > INT32_MAX - FF_INPUT_BUFFER_PADDING_SIZE) {
+    if (size > INT32_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
         av_log(s, AV_LOG_ERROR, "APE tag size too large.\n");
         return AVERROR_INVALIDDATA;
     }
diff --git a/libavformat/apngdec.c b/libavformat/apngdec.c
index 6deff3b6..c6403a19 100644
--- a/libavformat/apngdec.c
+++ b/libavformat/apngdec.c
@@ -132,7 +132,7 @@ static int append_extradata(AVCodecContext *s, AVIOContext *pb, int len)
         return AVERROR_INVALIDDATA;
 
     new_size = previous_size + len;
-    new_extradata = av_realloc(s->extradata, new_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    new_extradata = av_realloc(s->extradata, new_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!new_extradata)
         return AVERROR(ENOMEM);
     s->extradata = new_extradata;
@@ -178,7 +178,7 @@ static int apng_read_header(AVFormatContext *s)
         return ret;
 
     /* extradata will contain every chunk up to the first fcTL (excluded) */
-    st->codec->extradata = av_malloc(len + 12 + FF_INPUT_BUFFER_PADDING_SIZE);
+    st->codec->extradata = av_malloc(len + 12 + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!st->codec->extradata)
         return AVERROR(ENOMEM);
     st->codec->extradata_size = len + 12;
@@ -419,7 +419,7 @@ static int apng_read_packet(AVFormatContext *s, AVPacket *pkt)
 
 static const AVOption options[] = {
     { "ignore_loop", "ignore loop setting"                         , offsetof(APNGDemuxContext, ignore_loop),
-      AV_OPT_TYPE_INT, { .i64 = 1 }               , 0, 1      , AV_OPT_FLAG_DECODING_PARAM },
+      AV_OPT_TYPE_BOOL, { .i64 = 1 }              , 0, 1      , AV_OPT_FLAG_DECODING_PARAM },
     { "max_fps"    , "maximum framerate (0 is no limit)"           , offsetof(APNGDemuxContext, max_fps),
       AV_OPT_TYPE_INT, { .i64 = DEFAULT_APNG_FPS }, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
     { "default_fps", "default framerate (0 is as fast as possible)", offsetof(APNGDemuxContext, default_fps),
diff --git a/libavformat/apngenc.c b/libavformat/apngenc.c
index dcf6b906..4326a7cd 100644
--- a/libavformat/apngenc.c
+++ b/libavformat/apngenc.c
@@ -173,7 +173,7 @@ static void flush_packet(AVFormatContext *format_context, AVPacket *packet)
                                "Frame rate is too high or specified too precisely. Unable to copy losslessly.\n");
                         apng->framerate_warned = 1;
                     }
-                } else if (apng->last_delay.den > 0) {
+                } else if (apng->last_delay.num > 0) {
                     delay = apng->last_delay;
                 } else {
                     delay = apng->prev_delay;
@@ -192,7 +192,7 @@ static void flush_packet(AVFormatContext *format_context, AVPacket *packet)
     }
     ++apng->frame_number;
 
-    av_free_packet(apng->prev_packet);
+    av_packet_unref(apng->prev_packet);
     if (packet)
         av_copy_packet(apng->prev_packet, packet);
 }
diff --git a/libavformat/aqtitledec.c b/libavformat/aqtitledec.c
index 95087665..7c864c8e 100644
--- a/libavformat/aqtitledec.c
+++ b/libavformat/aqtitledec.c
@@ -95,7 +95,7 @@ static int aqt_read_header(AVFormatContext *s)
         }
     }
 
-    ff_subtitles_queue_finalize(&aqt->q);
+    ff_subtitles_queue_finalize(s, &aqt->q);
     return 0;
 }
 
diff --git a/libavformat/asf.c b/libavformat/asf.c
index 80d24dbf..455ca4d8 100644
--- a/libavformat/asf.c
+++ b/libavformat/asf.c
@@ -143,6 +143,10 @@ const ff_asf_guid ff_asf_digital_signature = {
     0xfc, 0xb3, 0x11, 0x22, 0x23, 0xbd, 0xd2, 0x11, 0xb4, 0xb7, 0x00, 0xa0, 0xc9, 0x55, 0xfc, 0x6e
 };
 
+const ff_asf_guid ff_asf_extended_stream_properties_object = {
+    0xcb, 0xa5, 0xe6, 0x14, 0x72, 0xc6, 0x32, 0x43, 0x83, 0x99, 0xa9, 0x69, 0x52, 0x06, 0x5b, 0x5a
+};
+
 /* List of official tags at http://msdn.microsoft.com/en-us/library/dd743066(VS.85).aspx */
 const AVMetadataConv ff_asf_metadata_conv[] = {
     { "WM/AlbumArtist",          "album_artist"     },
diff --git a/libavformat/asf.h b/libavformat/asf.h
index 0c9598a8..914ddef4 100644
--- a/libavformat/asf.h
+++ b/libavformat/asf.h
@@ -26,39 +26,15 @@
 #include "metadata.h"
 #include "riff.h"
 
-#define PACKET_SIZE 3200
-
-typedef struct ASFPayload {
-    uint8_t type;
-    uint16_t size;
-} ASFPayload;
-
-typedef struct ASFStream {
-    int num;
-    unsigned char seq;
-    /* use for reading */
-    AVPacket pkt;
-    int frag_offset;
-    int packet_obj_size;
-    int timestamp;
-    int64_t duration;
-    int skip_to_key;
-    int pkt_clean;
-
-    int ds_span;                /* descrambling  */
-    int ds_packet_size;
-    int ds_chunk_size;
-
-    int64_t packet_pos;
-
-    uint16_t stream_language_index;
-
-    int      palette_changed;
-    uint32_t palette[256];
-
-    int payload_ext_ct;
-    ASFPayload payload[8];
-} ASFStream;
+typedef enum ASFDataType {
+    ASF_UNICODE    = 0,
+    ASF_BYTE_ARRAY = 1,
+    ASF_BOOL       = 2,
+    ASF_DWORD      = 3,
+    ASF_QWORD      = 4,
+    ASF_WORD       = 5,
+    ASF_GUID       = 6,
+}ASFDataType;
 
 typedef struct ASFMainHeader {
     ff_asf_guid guid;                  ///< generated by client computer
@@ -123,6 +99,7 @@ extern const ff_asf_guid ff_asf_language_guid;
 extern const ff_asf_guid ff_asf_content_encryption;
 extern const ff_asf_guid ff_asf_ext_content_encryption;
 extern const ff_asf_guid ff_asf_digital_signature;
+extern const ff_asf_guid ff_asf_extended_stream_properties_object;
 
 extern const AVMetadataConv ff_asf_metadata_conv[];
 
diff --git a/libavformat/asfcrypt.c b/libavformat/asfcrypt.c
index a402758d..221a8a89 100644
--- a/libavformat/asfcrypt.c
+++ b/libavformat/asfcrypt.c
@@ -146,8 +146,8 @@ static uint64_t multiswap_dec(const uint32_t keys[12],
 
 void ff_asfcrypt_dec(const uint8_t key[20], uint8_t *data, int len)
 {
-    struct AVDES des;
-    struct AVRC4 rc4;
+    struct AVDES *des;
+    struct AVRC4 *rc4;
     int num_qwords      = len >> 3;
     uint8_t *qwords     = data;
     uint64_t rc4buff[8] = { 0 };
@@ -160,19 +160,26 @@ void ff_asfcrypt_dec(const uint8_t key[20], uint8_t *data, int len)
             data[i] ^= key[i];
         return;
     }
+    des = av_des_alloc();
+    rc4 = av_rc4_alloc();
+    if (!des || !rc4) {
+        av_freep(&des);
+        av_freep(&rc4);
+        return;
+    }
 
-    av_rc4_init(&rc4, key, 12 * 8, 1);
-    av_rc4_crypt(&rc4, (uint8_t *)rc4buff, NULL, sizeof(rc4buff), NULL, 1);
+    av_rc4_init(rc4, key, 12 * 8, 1);
+    av_rc4_crypt(rc4, (uint8_t *)rc4buff, NULL, sizeof(rc4buff), NULL, 1);
     multiswap_init((uint8_t *)rc4buff, ms_keys);
 
     packetkey  = AV_RN64(&qwords[num_qwords * 8 - 8]);
     packetkey ^= rc4buff[7];
-    av_des_init(&des, key + 12, 64, 1);
-    av_des_crypt(&des, (uint8_t *)&packetkey, (uint8_t *)&packetkey, 1, NULL, 1);
+    av_des_init(des, key + 12, 64, 1);
+    av_des_crypt(des, (uint8_t *)&packetkey, (uint8_t *)&packetkey, 1, NULL, 1);
     packetkey ^= rc4buff[6];
 
-    av_rc4_init(&rc4, (uint8_t *)&packetkey, 64, 1);
-    av_rc4_crypt(&rc4, data, data, len, NULL, 1);
+    av_rc4_init(rc4, (uint8_t *)&packetkey, 64, 1);
+    av_rc4_crypt(rc4, data, data, len, NULL, 1);
 
     ms_state = 0;
     for (i = 0; i < num_qwords - 1; i++, qwords += 8)
@@ -182,4 +189,7 @@ void ff_asfcrypt_dec(const uint8_t key[20], uint8_t *data, int len)
     packetkey = av_le2ne64(packetkey);
     packetkey = multiswap_dec(ms_keys, ms_state, packetkey);
     AV_WL64(qwords, packetkey);
+
+    av_free(rc4);
+    av_free(des);
 }
diff --git a/libavformat/asfdec.c b/libavformat/asfdec_f.c
similarity index 91%
rename from libavformat/asfdec.c
rename to libavformat/asfdec_f.c
index 92784348..3ee0fcd4 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec_f.c
@@ -39,6 +39,38 @@
 #include "asf.h"
 #include "asfcrypt.h"
 
+typedef struct ASFPayload {
+    uint8_t type;
+    uint16_t size;
+} ASFPayload;
+
+typedef struct ASFStream {
+    int num;
+    unsigned char seq;
+    /* use for reading */
+    AVPacket pkt;
+    int frag_offset;
+    int packet_obj_size;
+    int timestamp;
+    int64_t duration;
+    int skip_to_key;
+    int pkt_clean;
+
+    int ds_span;                /* descrambling  */
+    int ds_packet_size;
+    int ds_chunk_size;
+
+    int64_t packet_pos;
+
+    uint16_t stream_language_index;
+
+    int      palette_changed;
+    uint32_t palette[256];
+
+    int payload_ext_ct;
+    ASFPayload payload[8];
+} ASFStream;
+
 typedef struct ASFContext {
     const AVClass *class;
     int asfid2avid[128];                 ///< conversion table from asf ID 2 AVStream ID
@@ -81,11 +113,13 @@ typedef struct ASFContext {
 
     int no_resync_search;
     int export_xmp;
+
+    int uses_std_ecc;
 } ASFContext;
 
 static const AVOption options[] = {
-    { "no_resync_search", "Don't try to resynchronize by looking for a certain optional start code", offsetof(ASFContext, no_resync_search), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
-    { "export_xmp", "Export full XMP metadata", offsetof(ASFContext, export_xmp), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
+    { "no_resync_search", "Don't try to resynchronize by looking for a certain optional start code", offsetof(ASFContext, no_resync_search), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
+    { "export_xmp", "Export full XMP metadata", offsetof(ASFContext, export_xmp), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
     { NULL },
 };
 
@@ -100,8 +134,9 @@ static const AVClass asf_class = {
 #include <assert.h>
 
 #define ASF_MAX_STREAMS 127
-#define FRAME_HEADER_SIZE 16
-// Fix Me! FRAME_HEADER_SIZE may be different. (17 is known to be too large)
+#define FRAME_HEADER_SIZE 6
+// Fix Me! FRAME_HEADER_SIZE may be different.
+// (7 is known to be too large for GipsyGuitar.wmv)
 
 #ifdef DEBUG
 static const ff_asf_guid stream_bitrate_guid = { /* (http://get.to/sdp) */
@@ -263,7 +298,7 @@ static int asf_read_picture(AVFormatContext *s, int len)
 
 fail:
     av_freep(&desc);
-    av_free_packet(&pkt);
+    av_packet_unref(&pkt);
     return ret;
 }
 
@@ -294,12 +329,15 @@ static void get_tag(AVFormatContext *s, const char *key, int type, int len, int
     if (!value)
         goto finish;
 
-    if (type == 0) {         // UTF16-LE
+    switch (type) {
+    case ASF_UNICODE:
         avio_get_str16le(s->pb, len, value, 2 * len + 1);
-    } else if (type == -1) { // ASCII
+        break;
+    case -1: // ASCI
         avio_read(s->pb, value, len);
         value[len]=0;
-    } else if (type == 1) {  // byte array
+        break;
+    case ASF_BYTE_ARRAY:
         if (!strcmp(key, "WM/Picture")) { // handle cover art
             asf_read_picture(s, len);
         } else if (!strcmp(key, "ID3")) { // handle ID3 tag
@@ -308,13 +346,18 @@ static void get_tag(AVFormatContext *s, const char *key, int type, int len, int
             av_log(s, AV_LOG_VERBOSE, "Unsupported byte array in tag %s.\n", key);
         }
         goto finish;
-    } else if (type > 1 && type <= 5) {  // boolean or DWORD or QWORD or WORD
+    case ASF_BOOL:
+    case ASF_DWORD:
+    case ASF_QWORD:
+    case ASF_WORD: {
         uint64_t num = get_value(s->pb, type, type2_size);
         snprintf(value, LEN, "%"PRIu64, num);
-    } else if (type == 6) { // (don't) handle GUID
+        break;
+    }
+    case ASF_GUID:
         av_log(s, AV_LOG_DEBUG, "Unsupported GUID value in tag %s.\n", key);
         goto finish;
-    } else {
+    default:
         av_log(s, AV_LOG_DEBUG,
                "Unsupported value type %d in tag %s.\n", type, key);
         goto finish;
@@ -429,7 +472,7 @@ static int asf_read_stream_properties(AVFormatContext *s, int64_t size)
 
     st->codec->codec_type = type;
     if (type == AVMEDIA_TYPE_AUDIO) {
-        int ret = ff_get_wav_header(pb, st->codec, type_specific_size, 0);
+        int ret = ff_get_wav_header(s, pb, st->codec, type_specific_size, 0);
         if (ret < 0)
             return ret;
         if (is_dvr_ms_audio) {
@@ -474,7 +517,7 @@ static int asf_read_stream_properties(AVFormatContext *s, int64_t size)
         if (sizeX > 40) {
             st->codec->extradata_size = ffio_limit(pb, sizeX - 40);
             st->codec->extradata      = av_mallocz(st->codec->extradata_size +
-                                                   FF_INPUT_BUFFER_PADDING_SIZE);
+                                                   AV_INPUT_BUFFER_PADDING_SIZE);
             if (!st->codec->extradata)
                 return AVERROR(ENOMEM);
             avio_read(pb, st->codec->extradata, st->codec->extradata_size);
@@ -653,24 +696,29 @@ static int asf_read_metadata(AVFormatContext *s, int64_t size)
 {
     AVIOContext *pb = s->pb;
     ASFContext *asf = s->priv_data;
-    int n, stream_num, name_len, value_len;
+    int n, stream_num, name_len_utf16, name_len_utf8, value_len;
     int ret, i;
     n = avio_rl16(pb);
 
     for (i = 0; i < n; i++) {
-        char name[1024];
+        uint8_t *name;
         int value_type;
 
         avio_rl16(pb);  // lang_list_index
         stream_num = avio_rl16(pb);
-        name_len   = avio_rl16(pb);
+        name_len_utf16 = avio_rl16(pb);
         value_type = avio_rl16(pb); /* value_type */
         value_len  = avio_rl32(pb);
 
-        if ((ret = avio_get_str16le(pb, name_len, name, sizeof(name))) < name_len)
-            avio_skip(pb, name_len - ret);
+        name_len_utf8 = 2*name_len_utf16 + 1;
+        name          = av_malloc(name_len_utf8);
+        if (!name)
+            return AVERROR(ENOMEM);
+
+        if ((ret = avio_get_str16le(pb, name_len_utf16, name, name_len_utf8)) < name_len_utf16)
+            avio_skip(pb, name_len_utf16 - ret);
         av_log(s, AV_LOG_TRACE, "%d stream %d name_len %2d type %d len %4d <%s>\n",
-                i, stream_num, name_len, value_type, value_len, name);
+                i, stream_num, name_len_utf16, value_type, value_len, name);
 
         if (!strcmp(name, "AspectRatioX")){
             int aspect_x = get_value(s->pb, value_type, 16);
@@ -683,6 +731,7 @@ static int asf_read_metadata(AVFormatContext *s, int64_t size)
         } else {
             get_tag(s, name, value_type, value_len, 16);
         }
+        av_freep(&name);
     }
 
     return 0;
@@ -746,6 +795,7 @@ static int asf_read_header(AVFormatContext *s)
 
     for (;;) {
         uint64_t gpos = avio_tell(pb);
+        int ret = 0;
         ff_get_guid(pb, &g);
         gsize = avio_rl64(pb);
         print_guid(&g);
@@ -762,13 +812,9 @@ static int asf_read_header(AVFormatContext *s)
         if (gsize < 24)
             return AVERROR_INVALIDDATA;
         if (!ff_guidcmp(&g, &ff_asf_file_header)) {
-            int ret = asf_read_file_properties(s, gsize);
-            if (ret < 0)
-                return ret;
+            ret = asf_read_file_properties(s, gsize);
         } else if (!ff_guidcmp(&g, &ff_asf_stream_header)) {
-            int ret = asf_read_stream_properties(s, gsize);
-            if (ret < 0)
-                return ret;
+            ret = asf_read_stream_properties(s, gsize);
         } else if (!ff_guidcmp(&g, &ff_asf_comment_header)) {
             asf_read_content_desc(s, gsize);
         } else if (!ff_guidcmp(&g, &ff_asf_language_guid)) {
@@ -797,7 +843,6 @@ static int asf_read_header(AVFormatContext *s)
             if (!s->keylen) {
                 if (!ff_guidcmp(&g, &ff_asf_content_encryption)) {
                     unsigned int len;
-                    int ret;
                     AVPacket pkt;
                     av_log(s, AV_LOG_WARNING,
                            "DRM protected stream detected, decoding will likely fail!\n");
@@ -807,7 +852,7 @@ static int asf_read_header(AVFormatContext *s)
                     if ((ret = av_get_packet(pb, &pkt, len)) < 0)
                         return ret;
                     av_hex_dump_log(s, AV_LOG_DEBUG, pkt.data, pkt.size);
-                    av_free_packet(&pkt);
+                    av_packet_unref(&pkt);
                     len= avio_rl32(pb);
                     get_tag(s, "ASF_Protection_Type", -1, len, 32);
                     len= avio_rl32(pb);
@@ -823,6 +868,9 @@ static int asf_read_header(AVFormatContext *s)
                 }
             }
         }
+        if (ret < 0)
+            return ret;
+
         if (avio_tell(pb) != gpos + gsize)
             av_log(s, AV_LOG_DEBUG,
                    "gpos mismatch our pos=%"PRIu64", end=%"PRId64"\n",
@@ -910,44 +958,67 @@ static int asf_get_packet(AVFormatContext *s, AVIOContext *pb)
     int rsize = 8;
     int c, d, e, off;
 
-    // if we do not know packet size, allow skipping up to 32 kB
-    off = 32768;
-    if (asf->no_resync_search)
-        off = 3;
-    else if (s->packet_size > 0)
-        off = (avio_tell(pb) - s->internal->data_offset) % s->packet_size + 3;
-
-    c = d = e = -1;
-    while (off-- > 0) {
-        c = d;
-        d = e;
-        e = avio_r8(pb);
-        if (c == 0x82 && !d && !e)
-            break;
-    }
+    if (asf->uses_std_ecc > 0) {
+        // if we do not know packet size, allow skipping up to 32 kB
+        off = 32768;
+        if (asf->no_resync_search)
+            off = 3;
+//         else if (s->packet_size > 0 && !asf->uses_std_ecc)
+//             off = (avio_tell(pb) - s->internal->data_offset) % s->packet_size + 3;
+
+        c = d = e = -1;
+        while (off-- > 0) {
+            c = d;
+            d = e;
+            e = avio_r8(pb);
+            if (c == 0x82 && !d && !e)
+                break;
+        }
 
-    if (c != 0x82) {
-        /* This code allows handling of -EAGAIN at packet boundaries (i.e.
-         * if the packet sync code above triggers -EAGAIN). This does not
-         * imply complete -EAGAIN handling support at random positions in
-         * the stream. */
-        if (pb->error == AVERROR(EAGAIN))
-            return AVERROR(EAGAIN);
-        if (!avio_feof(pb))
-            av_log(s, AV_LOG_ERROR,
-                   "ff asf bad header %x  at:%"PRId64"\n", c, avio_tell(pb));
-    }
-    if ((c & 0x8f) == 0x82) {
-        if (d || e) {
+        if (c != 0x82) {
+            /* This code allows handling of -EAGAIN at packet boundaries (i.e.
+            * if the packet sync code above triggers -EAGAIN). This does not
+            * imply complete -EAGAIN handling support at random positions in
+            * the stream. */
+            if (pb->error == AVERROR(EAGAIN))
+                return AVERROR(EAGAIN);
             if (!avio_feof(pb))
-                av_log(s, AV_LOG_ERROR, "ff asf bad non zero\n");
-            return AVERROR_INVALIDDATA;
+                av_log(s, AV_LOG_ERROR,
+                    "ff asf bad header %x  at:%"PRId64"\n", c, avio_tell(pb));
         }
-        c      = avio_r8(pb);
-        d      = avio_r8(pb);
-        rsize += 3;
-    } else if(!avio_feof(pb)) {
-        avio_seek(pb, -1, SEEK_CUR); // FIXME
+        if ((c & 0x8f) == 0x82) {
+            if (d || e) {
+                if (!avio_feof(pb))
+                    av_log(s, AV_LOG_ERROR, "ff asf bad non zero\n");
+                return AVERROR_INVALIDDATA;
+            }
+            c      = avio_r8(pb);
+            d      = avio_r8(pb);
+            rsize += 3;
+        } else if(!avio_feof(pb)) {
+            avio_seek(pb, -1, SEEK_CUR); // FIXME
+        }
+    } else {
+        c = avio_r8(pb);
+        if (c & 0x80) {
+            rsize ++;
+            if (!(c & 0x60)) {
+                d = avio_r8(pb);
+                e = avio_r8(pb);
+                avio_seek(pb, (c & 0xF) - 2, SEEK_CUR);
+                rsize += c & 0xF;
+            }
+
+            if (c != 0x82)
+                avpriv_request_sample(s, "Invalid ECC byte\n");
+
+            if (!asf->uses_std_ecc)
+                asf->uses_std_ecc =  (c == 0x82 && !d && !e) ? 1 : -1;
+
+            c = avio_r8(pb);
+        } else
+            asf->uses_std_ecc =  -1;
+        d = avio_r8(pb);
     }
 
     asf->packet_flags    = c;
@@ -1019,9 +1090,9 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb)
     DO_2BITS(asf->packet_property >> 4, asf->packet_seq, 0);
     DO_2BITS(asf->packet_property >> 2, asf->packet_frag_offset, 0);
     DO_2BITS(asf->packet_property, asf->packet_replic_size, 0);
-    av_log(asf, AV_LOG_TRACE, "key:%d stream:%d seq:%d offset:%d replic_size:%d\n",
+    av_log(asf, AV_LOG_TRACE, "key:%d stream:%d seq:%d offset:%d replic_size:%d num:%X packet_property %X\n",
             asf->packet_key_frame, asf->stream_index, asf->packet_seq,
-            asf->packet_frag_offset, asf->packet_replic_size);
+            asf->packet_frag_offset, asf->packet_replic_size, num, asf->packet_property);
     if (rsize+(int64_t)asf->packet_replic_size > asf->packet_size_left) {
         av_log(s, AV_LOG_ERROR, "packet_replic_size %d is invalid\n", asf->packet_replic_size);
         return AVERROR_INVALIDDATA;
@@ -1030,8 +1101,8 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb)
         int64_t end = avio_tell(pb) + asf->packet_replic_size;
         AVRational aspect;
         asfst->packet_obj_size = avio_rl32(pb);
-        if (asfst->packet_obj_size >= (1 << 24) || asfst->packet_obj_size <= 0) {
-            av_log(s, AV_LOG_ERROR, "packet_obj_size invalid\n");
+        if (asfst->packet_obj_size >= (1 << 24) || asfst->packet_obj_size < 0) {
+            av_log(s, AV_LOG_ERROR, "packet_obj_size %d invalid\n", asfst->packet_obj_size);
             asfst->packet_obj_size = 0;
             return AVERROR_INVALIDDATA;
         }
@@ -1100,8 +1171,8 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb)
             return AVERROR_INVALIDDATA;
         } else if (asf->packet_frag_size > asf->packet_size_left - rsize) {
             if (asf->packet_frag_size > asf->packet_size_left - rsize + asf->packet_padsize) {
-                av_log(s, AV_LOG_ERROR, "packet_frag_size is invalid (%d-%d)\n",
-                       asf->packet_size_left, rsize);
+                av_log(s, AV_LOG_ERROR, "packet_frag_size is invalid (%d>%d-%d+%d)\n",
+                       asf->packet_frag_size, asf->packet_size_left, rsize, asf->packet_padsize);
                 return AVERROR_INVALIDDATA;
             } else {
                 int diff = asf->packet_frag_size - (asf->packet_size_left - rsize);
@@ -1143,6 +1214,9 @@ static int asf_parse_packet(AVFormatContext *s, AVIOContext *pb, AVPacket *pkt)
             asf->packet_segments < 1 && asf->packet_time_start == 0) {
             int ret = asf->packet_size_left + asf->packet_padsize;
 
+            if (asf->packet_size_left && asf->packet_size_left < FRAME_HEADER_SIZE)
+                av_log(s, AV_LOG_WARNING, "Skip due to FRAME_HEADER_SIZE\n");
+
             assert(ret >= 0);
             /* fail safe */
             avio_skip(pb, ret);
@@ -1172,7 +1246,8 @@ static int asf_parse_packet(AVFormatContext *s, AVIOContext *pb, AVPacket *pkt)
                 continue;
             }
             asf->asf_st = &asf->streams[s->streams[asf->stream_index]->id];
-            asf->asf_st->skip_to_key = 0;
+            if (!asf->packet_frag_offset)
+                asf->asf_st->skip_to_key = 0;
         }
         asf_st = asf->asf_st;
         av_assert0(asf_st);
@@ -1213,7 +1288,7 @@ static int asf_parse_packet(AVFormatContext *s, AVIOContext *pb, AVPacket *pkt)
                        "freeing incomplete packet size %d, new %d\n",
                        asf_st->pkt.size, asf_st->packet_obj_size);
                 asf_st->frag_offset = 0;
-                av_free_packet(&asf_st->pkt);
+                av_packet_unref(&asf_st->pkt);
             }
             /* new packet */
             if ((ret = av_new_packet(&asf_st->pkt, asf_st->packet_obj_size)) < 0)
@@ -1304,7 +1379,7 @@ static int asf_parse_packet(AVFormatContext *s, AVIOContext *pb, AVPacket *pkt)
                 if (i == asf_st->pkt.size) {
                     av_log(s, AV_LOG_DEBUG, "discarding ms fart\n");
                     asf_st->frag_offset = 0;
-                    av_free_packet(&asf_st->pkt);
+                    av_packet_unref(&asf_st->pkt);
                     continue;
                 }
             }
@@ -1319,12 +1394,12 @@ static int asf_parse_packet(AVFormatContext *s, AVIOContext *pb, AVPacket *pkt)
                 } else {
                     /* packet descrambling */
                     AVBufferRef *buf = av_buffer_alloc(asf_st->pkt.size +
-                                                       FF_INPUT_BUFFER_PADDING_SIZE);
+                                                       AV_INPUT_BUFFER_PADDING_SIZE);
                     if (buf) {
                         uint8_t *newdata = buf->data;
                         int offset = 0;
                         memset(newdata + asf_st->pkt.size, 0,
-                               FF_INPUT_BUFFER_PADDING_SIZE);
+                               AV_INPUT_BUFFER_PADDING_SIZE);
                         while (offset < asf_st->pkt.size) {
                             int off = offset / asf_st->ds_chunk_size;
                             int row = off / asf_st->ds_span;
@@ -1345,11 +1420,6 @@ static int asf_parse_packet(AVFormatContext *s, AVIOContext *pb, AVPacket *pkt)
             }
             asf_st->frag_offset         = 0;
             *pkt                        = asf_st->pkt;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-            asf_st->pkt.destruct        = NULL;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
             asf_st->pkt.buf             = 0;
             asf_st->pkt.size            = 0;
             asf_st->pkt.data            = 0;
@@ -1406,7 +1476,7 @@ static void asf_reset_header(AVFormatContext *s)
 
     for (i = 0; i < 128; i++) {
         asf_st = &asf->streams[i];
-        av_free_packet(&asf_st->pkt);
+        av_packet_unref(&asf_st->pkt);
         asf_st->packet_obj_size = 0;
         asf_st->frag_offset = 0;
         asf_st->seq         = 0;
@@ -1468,7 +1538,6 @@ static int64_t asf_read_pts(AVFormatContext *s, int stream_index,
 
         pts = pkt->dts;
 
-        av_free_packet(pkt);
         if (pkt->flags & AV_PKT_FLAG_KEY) {
             i = pkt->stream_index;
 
@@ -1476,14 +1545,18 @@ static int64_t asf_read_pts(AVFormatContext *s, int stream_index,
 
 //            assert((asf_st->packet_pos - s->data_offset) % s->packet_size == 0);
             pos = asf_st->packet_pos;
+            av_assert1(pkt->pos == asf_st->packet_pos);
 
             av_add_index_entry(s->streams[i], pos, pts, pkt->size,
                                pos - start_pos[i] + 1, AVINDEX_KEYFRAME);
             start_pos[i] = asf_st->packet_pos + 1;
 
-            if (pkt->stream_index == stream_index)
+            if (pkt->stream_index == stream_index) {
+                av_packet_unref(pkt);
                 break;
+            }
         }
+        av_packet_unref(pkt);
     }
 
     *ppos = pos;
diff --git a/libavformat/asfdec_o.c b/libavformat/asfdec_o.c
new file mode 100644
index 00000000..2320c661
--- /dev/null
+++ b/libavformat/asfdec_o.c
@@ -0,0 +1,1791 @@
+/*
+ * Microsoft Advanced Streaming Format demuxer
+ * Copyright (c) 2014 Alexandra Hájková
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bswap.h"
+#include "libavutil/common.h"
+#include "libavutil/dict.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/time_internal.h"
+
+#include "avformat.h"
+#include "avio_internal.h"
+#include "avlanguage.h"
+#include "id3v2.h"
+#include "internal.h"
+#include "riff.h"
+#include "asf.h"
+#include "asfcrypt.h"
+
+#define ASF_BOOL                              0x2
+#define ASF_WORD                              0x5
+#define ASF_GUID                              0x6
+#define ASF_DWORD                             0x3
+#define ASF_QWORD                             0x4
+#define ASF_UNICODE                           0x0
+#define ASF_FLAG_BROADCAST                    0x1
+#define ASF_BYTE_ARRAY                        0x1
+#define ASF_TYPE_AUDIO                        0x2
+#define ASF_TYPE_VIDEO                        0x1
+#define ASF_STREAM_NUM                        0x7F
+#define ASF_MAX_STREAMS                       128
+#define BMP_HEADER_SIZE                       40
+#define ASF_NUM_OF_PAYLOADS                   0x3F
+#define ASF_ERROR_CORRECTION_LENGTH_TYPE      0x60
+#define ASF_PACKET_ERROR_CORRECTION_DATA_SIZE 0x2
+
+typedef struct GUIDParseTable {
+    const char *name;
+    ff_asf_guid guid;
+    int (*read_object)(AVFormatContext *, const struct GUIDParseTable *);
+    int is_subobject;
+} GUIDParseTable;
+
+typedef struct ASFPacket {
+    AVPacket avpkt;
+    int64_t dts;
+    uint32_t frame_num; // ASF payloads with the same number are parts of the same frame
+    int flags;
+    int data_size;
+    int duration;
+    int size_left;
+    uint8_t stream_index;
+} ASFPacket;
+
+typedef struct ASFStream {
+    uint8_t stream_index; // from packet header
+    int index;  // stream index in AVFormatContext, set in asf_read_stream_properties
+    int type;
+    int indexed; // added index entries from the Simple Index Object or not
+    int8_t span;   // for deinterleaving
+    uint16_t virtual_pkt_len;
+    uint16_t virtual_chunk_len;
+    int16_t lang_idx;
+    ASFPacket pkt;
+} ASFStream;
+
+typedef struct ASFStreamData{
+    char langs[32];
+    AVDictionary *asf_met; // for storing per-stream metadata
+    AVRational aspect_ratio;
+} ASFStreamData;
+
+typedef struct ASFContext {
+    int data_reached;
+    int is_simple_index; // is simple index present or not 1/0
+    int is_header;
+
+    uint64_t preroll;
+    uint64_t nb_packets; // ASF packets
+    uint32_t packet_size;
+    int64_t send_time;
+    int duration;
+
+    uint32_t b_flags;    // flags with broadcast flag
+    uint32_t prop_flags; // file properties object flags
+
+    uint64_t data_size; // data object size
+    uint64_t unknown_size; // size of the unknown object
+
+    int64_t offset; // offset of the current object
+
+    int64_t data_offset;
+    int64_t first_packet_offset; // packet offset
+    int64_t unknown_offset;   // for top level header objects or subobjects without specified behavior
+
+    // ASF file must not contain more than 128 streams according to the specification
+    ASFStream *asf_st[ASF_MAX_STREAMS];
+    ASFStreamData asf_sd[ASF_MAX_STREAMS];
+    int nb_streams;
+
+    int stream_index; // from packet header, for the subpayload case
+
+    // packet parameteres
+    uint64_t sub_header_offset; // offset of subplayload header
+    int64_t sub_dts;
+    uint8_t dts_delta; // for subpayloads
+    uint32_t packet_size_internal; // packet size stored inside ASFPacket, can be 0
+    int64_t packet_offset; // offset of the current packet inside Data Object
+    uint32_t pad_len; // padding after payload
+    uint32_t rep_data_len;
+
+    // packet state
+    uint64_t sub_left;  // subpayloads left or not
+    unsigned int nb_sub; // number of subpayloads read so far from the current ASF packet
+    uint16_t mult_sub_len; // total length of subpayloads array inside multiple payload
+    uint64_t nb_mult_left; // multiple payloads left
+    int return_subpayload;
+    enum {
+        PARSE_PACKET_HEADER,
+        READ_SINGLE,
+        READ_MULTI,
+        READ_MULTI_SUB
+    } state;
+} ASFContext;
+
+static int detect_unknown_subobject(AVFormatContext *s, int64_t offset, int64_t size);
+static const GUIDParseTable *find_guid(ff_asf_guid guid);
+
+static int asf_probe(AVProbeData *pd)
+{
+    /* check file header */
+    if (!ff_guidcmp(pd->buf, &ff_asf_header))
+        return AVPROBE_SCORE_MAX/2;
+    else
+        return 0;
+}
+
+static void swap_guid(ff_asf_guid guid)
+{
+    FFSWAP(unsigned char, guid[0], guid[3]);
+    FFSWAP(unsigned char, guid[1], guid[2]);
+    FFSWAP(unsigned char, guid[4], guid[5]);
+    FFSWAP(unsigned char, guid[6], guid[7]);
+}
+
+static void align_position(AVIOContext *pb,  int64_t offset, uint64_t size)
+{
+    if (size < INT64_MAX - offset && avio_tell(pb) != offset + size)
+        avio_seek(pb, offset + size, SEEK_SET);
+}
+
+static int asf_read_unknown(AVFormatContext *s, const GUIDParseTable *g)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    uint64_t size   = avio_rl64(pb);
+    int ret;
+
+    if (size > INT64_MAX)
+        return AVERROR_INVALIDDATA;
+
+    if (asf->is_header)
+        asf->unknown_size = size;
+    asf->is_header = 0;
+    if (!g->is_subobject) {
+        if (!(ret = strcmp(g->name, "Header Extension")))
+            avio_skip(pb, 22); // skip reserved fields and Data Size
+        if ((ret = detect_unknown_subobject(s, asf->unknown_offset,
+                                            asf->unknown_size)) < 0)
+            return ret;
+    } else {
+        if (size < 24) {
+            av_log(s, AV_LOG_ERROR, "Too small size %"PRIu64" (< 24).\n", size);
+            return AVERROR_INVALIDDATA;
+        }
+        avio_skip(pb, size - 24);
+    }
+
+    return 0;
+}
+
+static int get_asf_string(AVIOContext *pb, int maxlen, char *buf, int buflen)
+{
+    char *q = buf;
+    int ret = 0;
+    if (buflen <= 0)
+        return AVERROR(EINVAL);
+    while (ret + 1 < maxlen) {
+        uint8_t tmp;
+        uint32_t ch;
+        GET_UTF16(ch, (ret += 2) <= maxlen ? avio_rl16(pb) : 0, break;);
+        PUT_UTF8(ch, tmp, if (q - buf < buflen - 1) *q++ = tmp;)
+    }
+    *q = 0;
+
+    return ret;
+}
+
+static int asf_read_marker(AVFormatContext *s, const GUIDParseTable *g)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    uint64_t size   = avio_rl64(pb);
+    int i, nb_markers, ret;
+    size_t len;
+    char name[1024];
+
+    avio_skip(pb, 8);
+    avio_skip(pb, 8); // skip reserved GUID
+    nb_markers = avio_rl32(pb);
+    avio_skip(pb, 2); // skip reserved field
+    len = avio_rl16(pb);
+    for (i = 0; i < len; i++)
+        avio_skip(pb, 1);
+
+    for (i = 0; i < nb_markers; i++) {
+        int64_t pts;
+
+        avio_skip(pb, 8);
+        pts = avio_rl64(pb);
+        pts -= asf->preroll * 10000;
+        avio_skip(pb, 2); // entry length
+        avio_skip(pb, 4); // send time
+        avio_skip(pb, 4); // flags
+        len = avio_rl32(pb);
+
+        if ((ret = avio_get_str16le(pb, len, name,
+                                    sizeof(name))) < len)
+            avio_skip(pb, len - ret);
+        avpriv_new_chapter(s, i, (AVRational) { 1, 10000000 }, pts,
+                           AV_NOPTS_VALUE, name);
+    }
+    align_position(pb, asf->offset, size);
+
+    return 0;
+}
+
+static int asf_read_metadata(AVFormatContext *s, const char *title, uint16_t len,
+                             unsigned char *ch, uint16_t buflen)
+{
+    AVIOContext *pb = s->pb;
+
+    avio_get_str16le(pb, len, ch, buflen);
+    if (ch[0]) {
+        if (av_dict_set(&s->metadata, title, ch, 0) < 0)
+            av_log(s, AV_LOG_WARNING, "av_dict_set failed.\n");
+    }
+
+    return 0;
+}
+
+static int asf_read_value(AVFormatContext *s, const uint8_t *name,
+                          uint16_t val_len, int type, AVDictionary **met)
+{
+    int ret;
+    uint8_t *value;
+    uint16_t buflen = 2 * val_len + 1;
+    AVIOContext *pb = s->pb;
+
+    value = av_malloc(buflen);
+    if (!value)
+        return AVERROR(ENOMEM);
+    if (type == ASF_UNICODE) {
+        // get_asf_string reads UTF-16 and converts it to UTF-8 which needs longer buffer
+        if ((ret = get_asf_string(pb, val_len, value, buflen)) < 0)
+            goto failed;
+        if (av_dict_set(met, name, value, 0) < 0)
+            av_log(s, AV_LOG_WARNING, "av_dict_set failed.\n");
+    } else {
+        char buf[256];
+        if (val_len > sizeof(buf)) {
+            ret = AVERROR_INVALIDDATA;
+            goto failed;
+        }
+        if ((ret = avio_read(pb, value, val_len)) < 0)
+            goto failed;
+        if (ret < 2 * val_len)
+            value[ret] = '\0';
+        else
+            value[2 * val_len - 1] = '\0';
+        snprintf(buf, sizeof(buf), "%s", value);
+        if (av_dict_set(met, name, buf, 0) < 0)
+            av_log(s, AV_LOG_WARNING, "av_dict_set failed.\n");
+    }
+    av_freep(&value);
+
+    return 0;
+
+failed:
+    av_freep(&value);
+    return ret;
+}
+static int asf_read_generic_value(AVIOContext *pb, int type, uint64_t *value)
+{
+
+    switch (type) {
+    case ASF_BOOL:
+        *value = avio_rl16(pb);
+        break;
+    case ASF_DWORD:
+        *value = avio_rl32(pb);
+        break;
+    case ASF_QWORD:
+        *value = avio_rl64(pb);
+        break;
+    case ASF_WORD:
+        *value = avio_rl16(pb);
+        break;
+    default:
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static int asf_set_metadata(AVFormatContext *s, const uint8_t *name,
+                            int type, AVDictionary **met)
+{
+    AVIOContext *pb = s->pb;
+    uint64_t value;
+    char buf[32];
+    int ret;
+
+    ret = asf_read_generic_value(pb, type, &value);
+    if (ret < 0)
+        return ret;
+
+    snprintf(buf, sizeof(buf), "%"PRIu64, value);
+    if (av_dict_set(met, name, buf, 0) < 0)
+        av_log(s, AV_LOG_WARNING, "av_dict_set failed.\n");
+
+    return 0;
+}
+
+/* MSDN claims that this should be "compatible with the ID3 frame, APIC",
+ * but in reality this is only loosely similar */
+static int asf_read_picture(AVFormatContext *s, int len)
+{
+    ASFContext *asf       = s->priv_data;
+    AVPacket pkt          = { 0 };
+    const CodecMime *mime = ff_id3v2_mime_tags;
+    enum  AVCodecID id    = AV_CODEC_ID_NONE;
+    char mimetype[64];
+    uint8_t  *desc = NULL;
+    AVStream   *st = NULL;
+    int ret, type, picsize, desc_len;
+    ASFStream *asf_st;
+
+    /* type + picsize + mime + desc */
+    if (len < 1 + 4 + 2 + 2) {
+        av_log(s, AV_LOG_ERROR, "Invalid attached picture size: %d.\n", len);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* picture type */
+    type = avio_r8(s->pb);
+    len--;
+    if (type >= FF_ARRAY_ELEMS(ff_id3v2_picture_types) || type < 0) {
+        av_log(s, AV_LOG_WARNING, "Unknown attached picture type: %d.\n", type);
+        type = 0;
+    }
+
+    /* picture data size */
+    picsize = avio_rl32(s->pb);
+    len    -= 4;
+
+    /* picture MIME type */
+    len -= avio_get_str16le(s->pb, len, mimetype, sizeof(mimetype));
+    while (mime->id != AV_CODEC_ID_NONE) {
+        if (!strncmp(mime->str, mimetype, sizeof(mimetype))) {
+            id = mime->id;
+            break;
+        }
+        mime++;
+    }
+    if (id == AV_CODEC_ID_NONE) {
+        av_log(s, AV_LOG_ERROR, "Unknown attached picture mimetype: %s.\n",
+               mimetype);
+        return 0;
+    }
+
+    if (picsize >= len) {
+        av_log(s, AV_LOG_ERROR, "Invalid attached picture data size: %d >= %d.\n",
+               picsize, len);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* picture description */
+    desc_len = (len - picsize) * 2 + 1;
+    desc     = av_malloc(desc_len);
+    if (!desc)
+        return AVERROR(ENOMEM);
+    len -= avio_get_str16le(s->pb, len - picsize, desc, desc_len);
+
+    ret = av_get_packet(s->pb, &pkt, picsize);
+    if (ret < 0)
+        goto fail;
+
+    st  = avformat_new_stream(s, NULL);
+    if (!st) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    asf->asf_st[asf->nb_streams] = av_mallocz(sizeof(*asf_st));
+    asf_st = asf->asf_st[asf->nb_streams];
+    if (!asf_st) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    st->disposition              |= AV_DISPOSITION_ATTACHED_PIC;
+    st->codec->codec_type         = asf_st->type = AVMEDIA_TYPE_VIDEO;
+    st->codec->codec_id           = id;
+    st->attached_pic              = pkt;
+    st->attached_pic.stream_index = asf_st->index = st->index;
+    st->attached_pic.flags       |= AV_PKT_FLAG_KEY;
+
+    asf->nb_streams++;
+
+    if (*desc) {
+        if (av_dict_set(&st->metadata, "title", desc, AV_DICT_DONT_STRDUP_VAL) < 0)
+            av_log(s, AV_LOG_WARNING, "av_dict_set failed.\n");
+    } else
+        av_freep(&desc);
+
+    if (av_dict_set(&st->metadata, "comment", ff_id3v2_picture_types[type], 0) < 0)
+        av_log(s, AV_LOG_WARNING, "av_dict_set failed.\n");
+
+    return 0;
+
+fail:
+    av_freep(&desc);
+    av_packet_unref(&pkt);
+    return ret;
+}
+
+static void get_id3_tag(AVFormatContext *s, int len)
+{
+    ID3v2ExtraMeta *id3v2_extra_meta = NULL;
+
+    ff_id3v2_read(s, ID3v2_DEFAULT_MAGIC, &id3v2_extra_meta, len);
+    if (id3v2_extra_meta)
+        ff_id3v2_parse_apic(s, &id3v2_extra_meta);
+    ff_id3v2_free_extra_meta(&id3v2_extra_meta);
+}
+
+static int process_metadata(AVFormatContext *s, const uint8_t *name, uint16_t name_len,
+                            uint16_t val_len, uint16_t type, AVDictionary **met)
+{
+    int ret;
+    ff_asf_guid guid;
+
+    if (val_len) {
+        switch (type) {
+        case ASF_UNICODE:
+            asf_read_value(s, name, val_len, type, met);
+            break;
+        case ASF_BYTE_ARRAY:
+            if (!strcmp(name, "WM/Picture")) // handle cover art
+                asf_read_picture(s, val_len);
+            else if (!strcmp(name, "ID3")) // handle ID3 tag
+                get_id3_tag(s, val_len);
+            else
+                asf_read_value(s, name, val_len, type, met);
+            break;
+        case ASF_GUID:
+            ff_get_guid(s->pb, &guid);
+            break;
+        default:
+            if ((ret = asf_set_metadata(s, name, type, met)) < 0)
+                return ret;
+            break;
+        }
+    }
+
+    return 0;
+}
+
+static int asf_read_ext_content(AVFormatContext *s, const GUIDParseTable *g)
+{
+    ASFContext *asf  = s->priv_data;
+    AVIOContext *pb  = s->pb;
+    uint64_t size    = avio_rl64(pb);
+    uint16_t nb_desc = avio_rl16(pb);
+    int i, ret;
+
+    for (i = 0; i < nb_desc; i++) {
+        uint16_t name_len, type, val_len;
+        uint8_t *name = NULL;
+
+        name_len = avio_rl16(pb);
+        if (!name_len)
+            return AVERROR_INVALIDDATA;
+        name = av_malloc(name_len);
+        if (!name)
+            return AVERROR(ENOMEM);
+        avio_get_str16le(pb, name_len, name,
+                         name_len);
+        type    = avio_rl16(pb);
+        // BOOL values are 16 bits long in the Metadata Object
+        // but 32 bits long in the Extended Content Description Object
+        if (type == ASF_BOOL)
+            type = ASF_DWORD;
+        val_len = avio_rl16(pb);
+
+        ret = process_metadata(s, name, name_len, val_len, type, &s->metadata);
+        av_freep(&name);
+        if (ret < 0)
+            return ret;
+    }
+
+    align_position(pb, asf->offset, size);
+    return 0;
+}
+
+static AVStream *find_stream(AVFormatContext *s, uint16_t st_num)
+{
+    AVStream *st = NULL;
+    ASFContext *asf = s->priv_data;
+    int i;
+
+    for (i = 0; i < asf->nb_streams; i++) {
+        if (asf->asf_st[i]->stream_index == st_num) {
+            st = s->streams[asf->asf_st[i]->index];
+            break;
+        }
+    }
+
+    return st;
+}
+
+static int asf_store_aspect_ratio(AVFormatContext *s, uint8_t st_num, uint8_t *name, int type)
+{
+    ASFContext *asf   = s->priv_data;
+    AVIOContext *pb   = s->pb;
+    uint64_t value = 0;
+    int ret;
+
+    ret = asf_read_generic_value(pb, type, &value);
+    if (ret < 0)
+        return ret;
+
+    if (st_num < ASF_MAX_STREAMS) {
+        if (!strcmp(name, "AspectRatioX"))
+            asf->asf_sd[st_num].aspect_ratio.num = value;
+        else
+            asf->asf_sd[st_num].aspect_ratio.den = value;
+    }
+    return 0;
+}
+
+static int asf_read_metadata_obj(AVFormatContext *s, const GUIDParseTable *g)
+{
+    ASFContext *asf   = s->priv_data;
+    AVIOContext *pb   = s->pb;
+    uint64_t size     = avio_rl64(pb);
+    uint16_t nb_recs  = avio_rl16(pb); // number of records in the Description Records list
+    int i, ret;
+
+    for (i = 0; i < nb_recs; i++) {
+        uint16_t name_len, buflen, type, val_len, st_num;
+        uint8_t *name = NULL;
+
+        avio_skip(pb, 2); // skip reserved field
+        st_num   = avio_rl16(pb);
+        name_len = avio_rl16(pb);
+        buflen   = 2 * name_len + 1;
+        if (!name_len)
+            break;
+        type     = avio_rl16(pb);
+        val_len  = avio_rl32(pb);
+        name     = av_malloc(buflen);
+        if (!name)
+            return AVERROR(ENOMEM);
+        avio_get_str16le(pb, name_len, name,
+                         buflen);
+        if (!strcmp(name, "AspectRatioX") || !strcmp(name, "AspectRatioY")) {
+            ret = asf_store_aspect_ratio(s, st_num, name, type);
+            if (ret < 0) {
+                av_freep(&name);
+                break;
+            }
+        } else {
+            if (st_num < ASF_MAX_STREAMS) {
+                if ((ret = process_metadata(s, name, name_len, val_len, type,
+                                            &asf->asf_sd[st_num].asf_met)) < 0) {
+                    av_freep(&name);
+                    break;
+                }
+            }
+        }
+        av_freep(&name);
+    }
+
+    align_position(pb, asf->offset, size);
+    return 0;
+}
+
+static int asf_read_content_desc(AVFormatContext *s, const GUIDParseTable *g)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int i;
+    static const char *const titles[] =
+    { "Title", "Author", "Copyright", "Description", "Rate" };
+    uint16_t len[5], buflen[5] = { 0 };
+    uint8_t *ch;
+    uint64_t size = avio_rl64(pb);
+
+    for (i = 0; i < 5; i++) {
+        len[i]  = avio_rl16(pb);
+        // utf8 string should be <= 2 * utf16 string, extra byte for the terminator
+        buflen[i]  = 2 * len[i] + 1;
+    }
+
+    for (i = 0; i < 5; i++) {
+        ch = av_malloc(buflen[i]);
+        if (!ch)
+            return(AVERROR(ENOMEM));
+        asf_read_metadata(s, titles[i], len[i], ch, buflen[i]);
+        av_freep(&ch);
+    }
+    align_position(pb, asf->offset, size);
+
+    return 0;
+}
+
+static int asf_read_properties(AVFormatContext *s, const GUIDParseTable *g)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    time_t creation_time;
+
+    avio_rl64(pb); // read object size
+    avio_skip(pb, 16); // skip File ID
+    avio_skip(pb, 8);  // skip File size
+    creation_time = avio_rl64(pb);
+    if (!(asf->b_flags & ASF_FLAG_BROADCAST)) {
+        struct tm tmbuf;
+        struct tm *tm;
+        char buf[64];
+
+        // creation date is in 100 ns units from 1 Jan 1601, conversion to s
+        creation_time /= 10000000;
+        // there are 11644473600 seconds between 1 Jan 1601 and 1 Jan 1970
+        creation_time -= 11644473600;
+        tm = gmtime_r(&creation_time, &tmbuf);
+        if (tm) {
+            if (!strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", tm))
+                buf[0] = '\0';
+        } else
+            buf[0] = '\0';
+        if (buf[0]) {
+            if (av_dict_set(&s->metadata, "creation_time", buf, 0) < 0)
+                av_log(s, AV_LOG_WARNING, "av_dict_set failed.\n");
+        }
+    }
+    asf->nb_packets  = avio_rl64(pb);
+    asf->duration    = avio_rl64(pb) / 10000; // stream duration
+    avio_skip(pb, 8); // skip send duration
+    asf->preroll     = avio_rl64(pb);
+    asf->duration   -= asf->preroll;
+    asf->b_flags     = avio_rl32(pb);
+    avio_skip(pb, 4); // skip minimal packet size
+    asf->packet_size  = avio_rl32(pb);
+    avio_skip(pb, 4); // skip max_bitrate
+
+    return 0;
+}
+
+static int parse_video_info(AVIOContext *pb, AVStream *st)
+{
+    uint16_t size;
+    unsigned int tag;
+
+    st->codec->width  = avio_rl32(pb);
+    st->codec->height = avio_rl32(pb);
+    avio_skip(pb, 1); // skip reserved flags
+    size = avio_rl16(pb); // size of the Format Data
+    tag  = ff_get_bmp_header(pb, st, NULL);
+    st->codec->codec_tag = tag;
+    st->codec->codec_id  = ff_codec_get_id(ff_codec_bmp_tags, tag);
+
+    if (size > BMP_HEADER_SIZE) {
+        int ret;
+        st->codec->extradata_size  = size - BMP_HEADER_SIZE;
+        if (!(st->codec->extradata = av_malloc(st->codec->extradata_size +
+                                               AV_INPUT_BUFFER_PADDING_SIZE))) {
+            st->codec->extradata_size = 0;
+            return AVERROR(ENOMEM);
+        }
+        memset(st->codec->extradata + st->codec->extradata_size , 0,
+               AV_INPUT_BUFFER_PADDING_SIZE);
+        if ((ret = avio_read(pb, st->codec->extradata,
+                             st->codec->extradata_size)) < 0)
+            return ret;
+    }
+    return 0;
+}
+
+static int asf_read_stream_properties(AVFormatContext *s, const GUIDParseTable *g)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    uint64_t size;
+    uint32_t err_data_len, ts_data_len; // type specific data length
+    uint16_t flags;
+    ff_asf_guid stream_type;
+    enum AVMediaType type;
+    int i, ret;
+    uint8_t stream_index;
+    AVStream *st;
+    ASFStream *asf_st;
+
+    // ASF file must not contain more than 128 streams according to the specification
+    if (asf->nb_streams >= ASF_MAX_STREAMS)
+        return AVERROR_INVALIDDATA;
+
+    size = avio_rl64(pb);
+    ff_get_guid(pb, &stream_type);
+    if (!ff_guidcmp(&stream_type, &ff_asf_audio_stream))
+        type = AVMEDIA_TYPE_AUDIO;
+    else if (!ff_guidcmp(&stream_type, &ff_asf_video_stream))
+        type = AVMEDIA_TYPE_VIDEO;
+    else if (!ff_guidcmp(&stream_type, &ff_asf_jfif_media))
+        type = AVMEDIA_TYPE_VIDEO;
+    else if (!ff_guidcmp(&stream_type, &ff_asf_command_stream))
+        type = AVMEDIA_TYPE_DATA;
+    else if (!ff_guidcmp(&stream_type,
+                         &ff_asf_ext_stream_embed_stream_header))
+        type = AVMEDIA_TYPE_UNKNOWN;
+    else
+        return AVERROR_INVALIDDATA;
+
+    ff_get_guid(pb, &stream_type); // error correction type
+    avio_skip(pb, 8); // skip the time offset
+    ts_data_len      = avio_rl32(pb);
+    err_data_len     = avio_rl32(pb);
+    flags            = avio_rl16(pb); // bit 15 - Encrypted Content
+
+    stream_index = flags & ASF_STREAM_NUM;
+    for (i = 0; i < asf->nb_streams; i++)
+        if (stream_index == asf->asf_st[i]->stream_index) {
+            av_log(s, AV_LOG_WARNING,
+                   "Duplicate stream found, this stream will be ignored.\n");
+            align_position(pb, asf->offset, size);
+            return 0;
+        }
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    avpriv_set_pts_info(st, 32, 1, 1000); // pts should be dword, in milliseconds
+    st->codec->codec_type = type;
+    asf->asf_st[asf->nb_streams] = av_mallocz(sizeof(*asf_st));
+    if (!asf->asf_st[asf->nb_streams])
+        return AVERROR(ENOMEM);
+    asf_st                       = asf->asf_st[asf->nb_streams];
+    asf->nb_streams++;
+    asf_st->stream_index         = stream_index;
+    asf_st->index                = st->index;
+    asf_st->indexed              = 0;
+    st->id                       = flags & ASF_STREAM_NUM;
+    av_init_packet(&asf_st->pkt.avpkt);
+    asf_st->pkt.data_size        = 0;
+    avio_skip(pb, 4); // skip reserved field
+
+    switch (type) {
+    case AVMEDIA_TYPE_AUDIO:
+        asf_st->type = AVMEDIA_TYPE_AUDIO;
+        if ((ret = ff_get_wav_header(s, pb, st->codec, ts_data_len, 0)) < 0)
+            return ret;
+        break;
+    case AVMEDIA_TYPE_VIDEO:
+        asf_st->type = AVMEDIA_TYPE_VIDEO;
+        if ((ret = parse_video_info(pb, st)) < 0)
+            return ret;
+        break;
+    default:
+        avio_skip(pb, ts_data_len);
+        break;
+    }
+
+    if (err_data_len) {
+        if (type == AVMEDIA_TYPE_AUDIO) {
+            uint8_t span = avio_r8(pb);
+            if (span > 1) {
+                asf_st->span              = span;
+                asf_st->virtual_pkt_len   = avio_rl16(pb);
+                asf_st->virtual_chunk_len = avio_rl16(pb);
+                if (!asf_st->virtual_chunk_len || !asf_st->virtual_pkt_len)
+                    return AVERROR_INVALIDDATA;
+                avio_skip(pb, err_data_len - 5);
+            } else
+                avio_skip(pb, err_data_len - 1);
+        } else
+            avio_skip(pb, err_data_len);
+    }
+
+    align_position(pb, asf->offset, size);
+
+    return 0;
+}
+
+static void set_language(AVFormatContext *s, const char *rfc1766, AVDictionary **met)
+{
+    // language abbr should contain at least 2 chars
+    if (rfc1766 && strlen(rfc1766) > 1) {
+        const char primary_tag[3] = { rfc1766[0], rfc1766[1], '\0' }; // ignore country code if any
+        const char *iso6392       = av_convert_lang_to(primary_tag,
+                                                       AV_LANG_ISO639_2_BIBL);
+        if (iso6392)
+            if (av_dict_set(met, "language", iso6392, 0) < 0)
+                av_log(s, AV_LOG_WARNING, "av_dict_set failed.\n");
+    }
+}
+
+static int asf_read_ext_stream_properties(AVFormatContext *s, const GUIDParseTable *g)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    AVStream *st    = NULL;
+    ff_asf_guid guid;
+    uint16_t nb_st_name, nb_pay_exts, st_num, lang_idx;
+    int i, ret;
+    uint32_t bitrate;
+    uint64_t start_time, end_time, time_per_frame;
+    uint64_t size = avio_rl64(pb);
+
+    start_time = avio_rl64(pb);
+    end_time   = avio_rl64(pb);
+    bitrate    = avio_rl32(pb);
+    avio_skip(pb, 28); // skip some unused values
+    st_num     = avio_rl16(pb);
+    st_num    &= ASF_STREAM_NUM;
+    lang_idx   = avio_rl16(pb); // Stream Language ID Index
+    for (i = 0; i < asf->nb_streams; i++) {
+        if (st_num == asf->asf_st[i]->stream_index) {
+            st                       = s->streams[asf->asf_st[i]->index];
+            asf->asf_st[i]->lang_idx = lang_idx;
+            break;
+        }
+    }
+    time_per_frame = avio_rl64(pb); // average time per frame
+    if (st) {
+        st->start_time           = start_time;
+        st->duration             = end_time - start_time;
+        st->codec->bit_rate      = bitrate;
+        st->avg_frame_rate.num   = 10000000;
+        st->avg_frame_rate.den   = time_per_frame;
+    }
+    nb_st_name = avio_rl16(pb);
+    nb_pay_exts   = avio_rl16(pb);
+    for (i = 0; i < nb_st_name; i++) {
+        uint16_t len;
+
+        avio_rl16(pb); // Language ID Index
+        len = avio_rl16(pb);
+        avio_skip(pb, len);
+    }
+
+    for (i = 0; i < nb_pay_exts; i++) {
+        uint32_t len;
+        avio_skip(pb, 16); // Extension System ID
+        avio_skip(pb, 2);  // Extension Data Size
+        len = avio_rl32(pb);
+        avio_skip(pb, len);
+    }
+
+    if ((ret = ff_get_guid(pb, &guid)) < 0) {
+        align_position(pb, asf->offset, size);
+
+        return 0;
+    }
+
+    g = find_guid(guid);
+    if (g && !(strcmp(g->name, "Stream Properties"))) {
+        if ((ret = g->read_object(s, g)) < 0)
+            return ret;
+    }
+
+    align_position(pb, asf->offset, size);
+    return 0;
+}
+
+static int asf_read_language_list(AVFormatContext *s, const GUIDParseTable *g)
+{
+    ASFContext *asf   = s->priv_data;
+    AVIOContext *pb   = s->pb;
+    int i, ret;
+    uint64_t size     = avio_rl64(pb);
+    uint16_t nb_langs = avio_rl16(pb);
+
+    if (nb_langs < ASF_MAX_STREAMS) {
+        for (i = 0; i < nb_langs; i++) {
+            size_t len;
+            len = avio_r8(pb);
+            if (!len)
+                len = 6;
+            if ((ret = get_asf_string(pb, len, asf->asf_sd[i].langs,
+                                      sizeof(asf->asf_sd[i].langs))) < 0) {
+                return ret;
+            }
+        }
+    }
+
+    align_position(pb, asf->offset, size);
+    return 0;
+}
+
+// returns data object offset when reading this object for the first time
+static int asf_read_data(AVFormatContext *s, const GUIDParseTable *g)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    uint64_t size   = asf->data_size = avio_rl64(pb);
+    int i;
+
+    if (!asf->data_reached) {
+        asf->data_reached       = 1;
+        asf->data_offset        = asf->offset;
+    }
+
+    for (i = 0; i < asf->nb_streams; i++) {
+        if (!(asf->b_flags & ASF_FLAG_BROADCAST))
+            s->streams[i]->duration = asf->duration;
+    }
+    asf->nb_mult_left           = 0;
+    asf->sub_left               = 0;
+    asf->state                  = PARSE_PACKET_HEADER;
+    asf->return_subpayload      = 0;
+    asf->packet_size_internal   = 0;
+    avio_skip(pb, 16); // skip File ID
+    size = avio_rl64(pb); // Total Data Packets
+    if (size != asf->nb_packets)
+        av_log(s, AV_LOG_WARNING,
+               "Number of Packets from File Properties Object is not equal to Total"
+               "Datapackets value! num of packets %"PRIu64" total num %"PRIu64".\n",
+               size, asf->nb_packets);
+    avio_skip(pb, 2); // skip reserved field
+    asf->first_packet_offset = avio_tell(pb);
+    if (pb->seekable && !(asf->b_flags & ASF_FLAG_BROADCAST))
+        align_position(pb, asf->offset, asf->data_size);
+
+    return 0;
+}
+
+static int asf_read_simple_index(AVFormatContext *s, const GUIDParseTable *g)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    AVStream *st    = NULL;
+    uint64_t interval; // index entry time interval in 100 ns units, usually it's 1s
+    uint32_t pkt_num, nb_entries;
+    int32_t prev_pkt_num = -1;
+    int i, ret;
+    uint64_t size = avio_rl64(pb);
+
+    // simple index objects should be ordered by stream number, this loop tries to find
+    // the first not indexed video stream
+    for (i = 0; i < asf->nb_streams; i++) {
+        if ((asf->asf_st[i]->type == AVMEDIA_TYPE_VIDEO) && !asf->asf_st[i]->indexed) {
+            asf->asf_st[i]->indexed = 1;
+            st = s->streams[asf->asf_st[i]->index];
+            break;
+        }
+    }
+    if (!st) {
+        avio_skip(pb, size - 24); // if there's no video stream, skip index object
+        return 0;
+    }
+    avio_skip(pb, 16); // skip File ID
+    interval = avio_rl64(pb);
+    avio_skip(pb, 4);
+    nb_entries = avio_rl32(pb);
+    for (i = 0; i < nb_entries; i++) {
+        pkt_num = avio_rl32(pb);
+        ret = avio_skip(pb, 2);
+        if (ret < 0) {
+            av_log(s, AV_LOG_ERROR, "Skipping failed in asf_read_simple_index.\n");
+            return ret;
+        }
+        if (prev_pkt_num != pkt_num) {
+            av_add_index_entry(st, asf->first_packet_offset + asf->packet_size *
+                               pkt_num, av_rescale(interval, i, 10000),
+                               asf->packet_size, 0, AVINDEX_KEYFRAME);
+            prev_pkt_num = pkt_num;
+        }
+    }
+    asf->is_simple_index = 1;
+    align_position(pb, asf->offset, size);
+
+    return 0;
+}
+
+static const GUIDParseTable gdef[] = {
+    { "Data",                         { 0x75, 0xB2, 0x26, 0x36, 0x66, 0x8E, 0x11, 0xCF, 0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C }, asf_read_data, 1 },
+    { "Simple Index",                 { 0x33, 0x00, 0x08, 0x90, 0xE5, 0xB1, 0x11, 0xCF, 0x89, 0xF4, 0x00, 0xA0, 0xC9, 0x03, 0x49, 0xCB }, asf_read_simple_index, 1 },
+    { "Content Description",          { 0x75, 0xB2, 0x26, 0x33, 0x66 ,0x8E, 0x11, 0xCF, 0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C }, asf_read_content_desc, 1 },
+    { "Extended Content Description", { 0xD2, 0xD0, 0xA4, 0x40, 0xE3, 0x07, 0x11, 0xD2, 0x97, 0xF0, 0x00, 0xA0, 0xC9, 0x5e, 0xA8, 0x50 }, asf_read_ext_content, 1 },
+    { "Stream Bitrate Properties",    { 0x7B, 0xF8, 0x75, 0xCE, 0x46, 0x8D, 0x11, 0xD1, 0x8D, 0x82, 0x00, 0x60, 0x97, 0xC9, 0xA2, 0xB2 }, asf_read_unknown, 1 },
+    { "File Properties",              { 0x8C, 0xAB, 0xDC, 0xA1, 0xA9, 0x47, 0x11, 0xCF, 0x8E, 0xE4, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65 }, asf_read_properties, 1 },
+    { "Header Extension",             { 0x5F, 0xBF, 0x03, 0xB5, 0xA9, 0x2E, 0x11, 0xCF, 0x8E, 0xE3, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65 }, asf_read_unknown, 0 },
+    { "Stream Properties",            { 0xB7, 0xDC, 0x07, 0x91, 0xA9, 0xB7, 0x11, 0xCF, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65 }, asf_read_stream_properties, 1 },
+    { "Codec List",                   { 0x86, 0xD1, 0x52, 0x40, 0x31, 0x1D, 0x11, 0xD0, 0xA3, 0xA4, 0x00, 0xA0, 0xC9, 0x03, 0x48, 0xF6 }, asf_read_unknown, 1 },
+    { "Marker",                       { 0xF4, 0x87, 0xCD, 0x01, 0xA9, 0x51, 0x11, 0xCF, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65 }, asf_read_marker, 1 },
+    { "Script Command",               { 0x1E, 0xFB, 0x1A, 0x30, 0x0B, 0x62, 0x11, 0xD0, 0xA3, 0x9B, 0x00, 0xA0, 0xC9, 0x03, 0x48, 0xF6 }, asf_read_unknown, 1 },
+    { "Language List",                { 0x7C, 0x43, 0x46, 0xa9, 0xef, 0xe0, 0x4B, 0xFC, 0xB2, 0x29, 0x39, 0x3e, 0xde, 0x41, 0x5c, 0x85 }, asf_read_language_list, 1},
+    { "Padding",                      { 0x18, 0x06, 0xD4, 0x74, 0xCA, 0xDF, 0x45, 0x09, 0xA4, 0xBA, 0x9A, 0xAB, 0xCB, 0x96, 0xAA, 0xE8 }, asf_read_unknown, 1 },
+    { "DRMv1 Header",                 { 0x22, 0x11, 0xB3, 0xFB, 0xBD, 0x23, 0x11, 0xD2, 0xB4, 0xB7, 0x00, 0xA0, 0xC9, 0x55, 0xFC, 0x6E }, asf_read_unknown, 1 },
+    { "DRMv2 Header",                 { 0x29, 0x8A, 0xE6, 0x14, 0x26, 0x22, 0x4C, 0x17, 0xB9, 0x35, 0xDA, 0xE0, 0x7E, 0xE9, 0x28, 0x9c }, asf_read_unknown, 1 },
+    { "Index",                        { 0xD6, 0xE2, 0x29, 0xD3, 0x35, 0xDA, 0x11, 0xD1, 0x90, 0x34, 0x00, 0xA0, 0xC9, 0x03, 0x49, 0xBE }, asf_read_unknown, 1 },
+    { "Media Object Index",           { 0xFE, 0xB1, 0x03, 0xF8, 0x12, 0xAD, 0x4C, 0x64, 0x84, 0x0F, 0x2A, 0x1D, 0x2F, 0x7A, 0xD4, 0x8C }, asf_read_unknown, 1 },
+    { "Timecode Index",               { 0x3C, 0xB7, 0x3F, 0xD0, 0x0C, 0x4A, 0x48, 0x03, 0x95, 0x3D, 0xED, 0xF7, 0xB6, 0x22, 0x8F, 0x0C }, asf_read_unknown, 0 },
+    { "Bitrate_Mutual_Exclusion",     { 0xD6, 0xE2, 0x29, 0xDC, 0x35, 0xDA, 0x11, 0xD1, 0x90, 0x34, 0x00, 0xA0, 0xC9, 0x03, 0x49, 0xBE }, asf_read_unknown, 1 },
+    { "Error Correction",             { 0x75, 0xB2, 0x26, 0x35, 0x66, 0x8E, 0x11, 0xCF, 0xA6, 0xD9, 0x00, 0xAA, 0x00, 0x62, 0xCE, 0x6C }, asf_read_unknown, 1 },
+    { "Content Branding",             { 0x22, 0x11, 0xB3, 0xFA, 0xBD, 0x23, 0x11, 0xD2, 0xB4, 0xB7, 0x00, 0xA0, 0xC9, 0x55, 0xFC, 0x6E }, asf_read_unknown, 1 },
+    { "Content Encryption",           { 0x22, 0x11, 0xB3, 0xFB, 0xBD, 0x23, 0x11, 0xD2, 0xB4, 0xB7, 0x00, 0xA0, 0xC9, 0x55, 0xFC, 0x6E }, asf_read_unknown, 1 },
+    { "Extended Content Encryption",  { 0x29, 0x8A, 0xE6, 0x14, 0x26, 0x22, 0x4C, 0x17, 0xB9, 0x35, 0xDA, 0xE0, 0x7E, 0xE9, 0x28, 0x9C }, asf_read_unknown, 1 },
+    { "Digital Signature",            { 0x22, 0x11, 0xB3, 0xFC, 0xBD, 0x23, 0x11, 0xD2, 0xB4, 0xB7, 0x00, 0xA0, 0xC9, 0x55, 0xFC, 0x6E }, asf_read_unknown, 1 },
+    { "Extended Stream Properties",   { 0x14, 0xE6, 0xA5, 0xCB, 0xC6, 0x72, 0x43, 0x32, 0x83, 0x99, 0xA9, 0x69, 0x52, 0x06, 0x5B, 0x5A }, asf_read_ext_stream_properties, 1 },
+    { "Advanced Mutual Exclusion",    { 0xA0, 0x86, 0x49, 0xCF, 0x47, 0x75, 0x46, 0x70, 0x8A, 0x16, 0x6E, 0x35, 0x35, 0x75, 0x66, 0xCD }, asf_read_unknown, 1 },
+    { "Group Mutual Exclusion",       { 0xD1, 0x46, 0x5A, 0x40, 0x5A, 0x79, 0x43, 0x38, 0xB7, 0x1B, 0xE3, 0x6B, 0x8F, 0xD6, 0xC2, 0x49 }, asf_read_unknown, 1},
+    { "Stream Prioritization",        { 0xD4, 0xFE, 0xD1, 0x5B, 0x88, 0xD3, 0x45, 0x4F, 0x81, 0xF0, 0xED, 0x5C, 0x45, 0x99, 0x9E, 0x24 }, asf_read_unknown, 1 },
+    { "Bandwidth Sharing Object",     { 0xA6, 0x96, 0x09, 0xE6, 0x51, 0x7B, 0x11, 0xD2, 0xB6, 0xAF, 0x00, 0xC0, 0x4F, 0xD9, 0x08, 0xE9 }, asf_read_unknown, 1 },
+    { "Metadata",                     { 0xC5, 0xF8, 0xCB, 0xEA, 0x5B, 0xAF, 0x48, 0x77, 0x84, 0x67, 0xAA, 0x8C, 0x44, 0xFA, 0x4C, 0xCA }, asf_read_metadata_obj, 1 },
+    { "Metadata Library",             { 0x44, 0x23, 0x1C, 0x94, 0x94, 0x98, 0x49, 0xD1, 0xA1, 0x41, 0x1D, 0x13, 0x4E, 0x45, 0x70, 0x54 }, asf_read_metadata_obj, 1 },
+    { "Audio Spread",                 { 0xBF, 0xC3, 0xCD, 0x50, 0x61, 0x8F, 0x11, 0xCF, 0x8B, 0xB2, 0x00, 0xAA, 0x00, 0xB4, 0xE2, 0x20 }, asf_read_unknown, 1 },
+    { "Index Parameters",             { 0xD6, 0xE2, 0x29, 0xDF, 0x35, 0xDA, 0x11, 0xD1, 0x90, 0x34, 0x00, 0xA0, 0xC9, 0x03, 0x49, 0xBE }, asf_read_unknown, 1 },
+    { "Content Encryption System Windows Media DRM Network Devices",
+                                      { 0x7A, 0x07, 0x9B, 0xB6, 0xDA, 0XA4, 0x4e, 0x12, 0xA5, 0xCA, 0x91, 0xD3, 0x8D, 0xC1, 0x1A, 0x8D }, asf_read_unknown, 1 },
+    { "Mutex Language",               { 0xD6, 0xE2, 0x2A, 0x00, 0x25, 0xDA, 0x11, 0xD1, 0x90, 0x34, 0x00, 0xA0, 0xC9, 0x03, 0x49, 0xBE }, asf_read_unknown, 1 },
+    { "Mutex Bitrate",                { 0xD6, 0xE2, 0x2A, 0x01, 0x25, 0xDA, 0x11, 0xD1, 0x90, 0x34, 0x00, 0xA0, 0xC9, 0x03, 0x49, 0xBE }, asf_read_unknown, 1 },
+    { "Mutex Unknown",                { 0xD6, 0xE2, 0x2A, 0x02, 0x25, 0xDA, 0x11, 0xD1, 0x90, 0x34, 0x00, 0xA0, 0xC9, 0x03, 0x49, 0xBE }, asf_read_unknown, 1 },
+    { "Bandwidth Sharing Exclusive",  { 0xAF, 0x60, 0x60, 0xAA, 0x51, 0x97, 0x11, 0xD2, 0xB6, 0xAF, 0x00, 0xC0, 0x4F, 0xD9, 0x08, 0xE9 }, asf_read_unknown, 1 },
+    { "Bandwidth Sharing Partial",    { 0xAF, 0x60, 0x60, 0xAB, 0x51, 0x97, 0x11, 0xD2, 0xB6, 0xAF, 0x00, 0xC0, 0x4F, 0xD9, 0x08, 0xE9 }, asf_read_unknown, 1 },
+    { "Payload Extension System Timecode", { 0x39, 0x95, 0x95, 0xEC, 0x86, 0x67, 0x4E, 0x2D, 0x8F, 0xDB, 0x98, 0x81, 0x4C, 0xE7, 0x6C, 0x1E }, asf_read_unknown, 1 },
+    { "Payload Extension System File Name", { 0xE1, 0x65, 0xEC, 0x0E, 0x19, 0xED, 0x45, 0xD7, 0xB4, 0xA7, 0x25, 0xCB, 0xD1, 0xE2, 0x8E, 0x9B }, asf_read_unknown, 1 },
+    { "Payload Extension System Content Type", { 0xD5, 0x90, 0xDC, 0x20, 0x07, 0xBC, 0x43, 0x6C, 0x9C, 0xF7, 0xF3, 0xBB, 0xFB, 0xF1, 0xA4, 0xDC }, asf_read_unknown, 1 },
+    { "Payload Extension System Pixel Aspect Ratio", { 0x1, 0x1E, 0xE5, 0x54, 0xF9, 0xEA, 0x4B, 0xC8, 0x82, 0x1A, 0x37, 0x6B, 0x74, 0xE4, 0xC4, 0xB8 }, asf_read_unknown, 1 },
+    { "Payload Extension System Sample Duration", { 0xC6, 0xBD, 0x94, 0x50, 0x86, 0x7F, 0x49, 0x07, 0x83, 0xA3, 0xC7, 0x79, 0x21, 0xB7, 0x33, 0xAD }, asf_read_unknown, 1 },
+    { "Payload Extension System Encryption Sample ID", { 0x66, 0x98, 0xB8, 0x4E, 0x0A, 0xFA, 0x43, 0x30, 0xAE, 0xB2, 0x1C, 0x0A, 0x98, 0xD7, 0xA4, 0x4D }, asf_read_unknown, 1 },
+    { "Payload Extension System Degradable JPEG", { 0x00, 0xE1, 0xAF, 0x06, 0x7B, 0xEC, 0x11, 0xD1, 0xA5, 0x82, 0x00, 0xC0, 0x4F, 0xC2, 0x9C, 0xFB }, asf_read_unknown, 1 },
+};
+
+#define READ_LEN(flag, name, len)            \
+    do {                                     \
+        if ((flag) == name ## IS_BYTE)       \
+            len = avio_r8(pb);               \
+        else if ((flag) == name ## IS_WORD)  \
+            len = avio_rl16(pb);             \
+        else if ((flag) == name ## IS_DWORD) \
+            len = avio_rl32(pb);             \
+        else                                 \
+            len = 0;                         \
+    } while(0)
+
+static int asf_read_subpayload(AVFormatContext *s, AVPacket *pkt, int is_header)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    uint8_t sub_len;
+    int ret, i;
+
+    if (is_header) {
+        asf->dts_delta = avio_r8(pb);
+        if (asf->nb_mult_left) {
+            asf->mult_sub_len = avio_rl16(pb); // total
+        }
+        asf->sub_header_offset = avio_tell(pb);
+        asf->nb_sub = 0;
+        asf->sub_left = 1;
+    }
+    sub_len = avio_r8(pb);
+    if ((ret = av_get_packet(pb, pkt, sub_len)) < 0) // each subpayload is entire frame
+        return ret;
+    for (i = 0; i < asf->nb_streams; i++) {
+        if (asf->stream_index == asf->asf_st[i]->stream_index) {
+            pkt->stream_index  = asf->asf_st[i]->index;
+            break;
+        }
+    }
+    asf->return_subpayload = 1;
+    if (!sub_len)
+        asf->return_subpayload = 0;
+
+    if (sub_len)
+        asf->nb_sub++;
+    pkt->dts = asf->sub_dts + (asf->nb_sub - 1) * asf->dts_delta - asf->preroll;
+    if (asf->nb_mult_left && (avio_tell(pb) >=
+                              (asf->sub_header_offset + asf->mult_sub_len))) {
+        asf->sub_left = 0;
+        asf->nb_mult_left--;
+    }
+    if (avio_tell(pb) >= asf->packet_offset + asf->packet_size - asf->pad_len) {
+        asf->sub_left = 0;
+        if (!asf->nb_mult_left) {
+            avio_skip(pb, asf->pad_len);
+            if (avio_tell(pb) != asf->packet_offset + asf->packet_size) {
+                if (!asf->packet_size)
+                    return AVERROR_INVALIDDATA;
+                av_log(s, AV_LOG_WARNING,
+                       "Position %"PRId64" wrong, should be %"PRId64"\n",
+                       avio_tell(pb), asf->packet_offset + asf->packet_size);
+                avio_seek(pb, asf->packet_offset + asf->packet_size, SEEK_SET);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static void reset_packet(ASFPacket *asf_pkt)
+{
+    asf_pkt->size_left = 0;
+    asf_pkt->data_size = 0;
+    asf_pkt->duration  = 0;
+    asf_pkt->flags     = 0;
+    asf_pkt->dts       = 0;
+    asf_pkt->duration  = 0;
+    av_packet_unref(&asf_pkt->avpkt);
+    av_init_packet(&asf_pkt->avpkt);
+}
+
+static int asf_read_replicated_data(AVFormatContext *s, ASFPacket *asf_pkt)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int ret, data_size;
+
+    if (!asf_pkt->data_size) {
+        data_size = avio_rl32(pb); // read media object size
+        if (data_size <= 0)
+            return AVERROR_INVALIDDATA;
+        if ((ret = av_new_packet(&asf_pkt->avpkt, data_size)) < 0)
+            return ret;
+        asf_pkt->data_size = asf_pkt->size_left = data_size;
+    } else
+        avio_skip(pb, 4); // reading of media object size is already done
+    asf_pkt->dts = avio_rl32(pb); // read presentation time
+    if (asf->rep_data_len && (asf->rep_data_len >= 8))
+        avio_skip(pb, asf->rep_data_len - 8); // skip replicated data
+
+    return 0;
+}
+
+static int asf_read_multiple_payload(AVFormatContext *s, AVPacket *pkt,
+                                 ASFPacket *asf_pkt)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    uint16_t pay_len;
+    unsigned char *p;
+    int ret;
+    int skip = 0;
+
+    // if replicated length is 1, subpayloads are present
+    if (asf->rep_data_len == 1) {
+        asf->sub_left = 1;
+        asf->state = READ_MULTI_SUB;
+        pkt->flags = asf_pkt->flags;
+        if ((ret = asf_read_subpayload(s, pkt, 1)) < 0)
+            return ret;
+    } else {
+        if (asf->rep_data_len)
+            if ((ret = asf_read_replicated_data(s, asf_pkt)) < 0)
+                return ret;
+        pay_len = avio_rl16(pb); // payload length should be WORD
+        if (pay_len > asf->packet_size) {
+            av_log(s, AV_LOG_ERROR,
+                   "Error: invalid data packet size, pay_len %"PRIu16", "
+                   "asf->packet_size %"PRIu32", offset %"PRId64".\n",
+                   pay_len, asf->packet_size, avio_tell(pb));
+            return AVERROR_INVALIDDATA;
+        }
+        p = asf_pkt->avpkt.data + asf_pkt->data_size - asf_pkt->size_left;
+        if (pay_len > asf_pkt->size_left) {
+            av_log(s, AV_LOG_ERROR,
+                   "Error: invalid buffer size, pay_len %d, data size left %d.\n",
+            pay_len, asf_pkt->size_left);
+            skip = pay_len - asf_pkt->size_left;
+            pay_len = asf_pkt->size_left;
+        }
+        if (asf_pkt->size_left <= 0)
+            return AVERROR_INVALIDDATA;
+        if ((ret = avio_read(pb, p, pay_len)) < 0)
+            return ret;
+        if (s->key && s->keylen == 20)
+            ff_asfcrypt_dec(s->key, p, ret);
+        avio_skip(pb, skip);
+        asf_pkt->size_left -= pay_len;
+        asf->nb_mult_left--;
+    }
+
+    return 0;
+}
+
+static int asf_read_single_payload(AVFormatContext *s, ASFPacket *asf_pkt)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int64_t  offset;
+    uint64_t size;
+    unsigned char *p;
+    int ret, data_size;
+
+    if (!asf_pkt->data_size) {
+        data_size = avio_rl32(pb); // read media object size
+        if (data_size <= 0)
+            return AVERROR_EOF;
+        if ((ret = av_new_packet(&asf_pkt->avpkt, data_size)) < 0)
+            return ret;
+        asf_pkt->data_size = asf_pkt->size_left = data_size;
+    } else
+        avio_skip(pb, 4); // skip media object size
+    asf_pkt->dts = avio_rl32(pb); // read presentation time
+    if (asf->rep_data_len >= 8)
+        avio_skip(pb, asf->rep_data_len - 8); // skip replicated data
+    offset = avio_tell(pb);
+
+    // size of the payload - size of the packet without header and padding
+    if (asf->packet_size_internal)
+        size = asf->packet_size_internal - offset + asf->packet_offset - asf->pad_len;
+    else
+        size = asf->packet_size - offset + asf->packet_offset - asf->pad_len;
+    if (size > asf->packet_size) {
+        av_log(s, AV_LOG_ERROR,
+               "Error: invalid data packet size, offset %"PRId64".\n",
+               avio_tell(pb));
+        return AVERROR_INVALIDDATA;
+    }
+    p = asf_pkt->avpkt.data + asf_pkt->data_size - asf_pkt->size_left;
+    if (size > asf_pkt->size_left || asf_pkt->size_left <= 0)
+        return AVERROR_INVALIDDATA;
+    if (asf_pkt->size_left > size)
+        asf_pkt->size_left -= size;
+    else
+        asf_pkt->size_left = 0;
+    if ((ret = avio_read(pb, p, size)) < 0)
+        return ret;
+    if (s->key && s->keylen == 20)
+            ff_asfcrypt_dec(s->key, p, ret);
+    if (asf->packet_size_internal)
+        avio_skip(pb, asf->packet_size - asf->packet_size_internal);
+    avio_skip(pb, asf->pad_len); // skip padding
+
+    return 0;
+}
+
+static int asf_read_payload(AVFormatContext *s, AVPacket *pkt)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int ret, i;
+    ASFPacket *asf_pkt = NULL;
+
+    if (!asf->sub_left) {
+        uint32_t off_len, media_len;
+        uint8_t stream_num;
+
+        stream_num = avio_r8(pb);
+        asf->stream_index = stream_num & ASF_STREAM_NUM;
+        for (i = 0; i < asf->nb_streams; i++) {
+            if (asf->stream_index == asf->asf_st[i]->stream_index) {
+                asf_pkt               = &asf->asf_st[i]->pkt;
+                asf_pkt->stream_index = asf->asf_st[i]->index;
+                break;
+            }
+        }
+        if (!asf_pkt) {
+            if (asf->packet_offset + asf->packet_size <= asf->data_offset + asf->data_size) {
+                if (!asf->packet_size) {
+                    av_log(s, AV_LOG_ERROR, "Invalid packet size 0.\n");
+                    return AVERROR_INVALIDDATA;
+                }
+                avio_seek(pb, asf->packet_offset + asf->packet_size, SEEK_SET);
+                av_log(s, AV_LOG_WARNING, "Skipping the stream with the invalid stream index %d.\n",
+                       asf->stream_index);
+                return AVERROR(EAGAIN);
+            } else
+                return AVERROR_INVALIDDATA;
+        }
+
+        if (stream_num >> 7)
+            asf_pkt->flags |= AV_PKT_FLAG_KEY;
+        READ_LEN(asf->prop_flags & ASF_PL_MASK_MEDIA_OBJECT_NUMBER_LENGTH_FIELD_SIZE,
+                 ASF_PL_FLAG_MEDIA_OBJECT_NUMBER_LENGTH_FIELD_, media_len);
+        READ_LEN(asf->prop_flags & ASF_PL_MASK_OFFSET_INTO_MEDIA_OBJECT_LENGTH_FIELD_SIZE,
+                 ASF_PL_FLAG_OFFSET_INTO_MEDIA_OBJECT_LENGTH_FIELD_, off_len);
+        READ_LEN(asf->prop_flags & ASF_PL_MASK_REPLICATED_DATA_LENGTH_FIELD_SIZE,
+                 ASF_PL_FLAG_REPLICATED_DATA_LENGTH_FIELD_, asf->rep_data_len);
+        if (asf_pkt->size_left && (asf_pkt->frame_num != media_len)) {
+            av_log(s, AV_LOG_WARNING, "Unfinished frame will be ignored\n");
+            reset_packet(asf_pkt);
+        }
+        asf_pkt->frame_num = media_len;
+        asf->sub_dts = off_len;
+        if (asf->nb_mult_left) {
+            if ((ret = asf_read_multiple_payload(s, pkt, asf_pkt)) < 0)
+                return ret;
+        } else if (asf->rep_data_len == 1) {
+            asf->sub_left = 1;
+            asf->state    = READ_SINGLE;
+            pkt->flags    = asf_pkt->flags;
+            if ((ret = asf_read_subpayload(s, pkt, 1)) < 0)
+                return ret;
+        } else {
+            if ((ret = asf_read_single_payload(s, asf_pkt)) < 0)
+                return ret;
+        }
+    } else {
+        for (i = 0; i <= asf->nb_streams; i++) {
+            if (asf->stream_index == asf->asf_st[i]->stream_index) {
+                asf_pkt = &asf->asf_st[i]->pkt;
+                break;
+            }
+        }
+        if (!asf_pkt)
+            return AVERROR_INVALIDDATA;
+        pkt->flags         = asf_pkt->flags;
+        pkt->dts           = asf_pkt->dts;
+        pkt->stream_index  = asf->asf_st[i]->index;
+        if ((ret = asf_read_subpayload(s, pkt, 0)) < 0) // read subpayload without its header
+            return ret;
+    }
+
+    return 0;
+}
+
+static int asf_read_packet_header(AVFormatContext *s)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    uint64_t size;
+    uint32_t av_unused seq;
+    unsigned char error_flags, len_flags, pay_flags;
+
+    asf->packet_offset = avio_tell(pb);
+    error_flags = avio_r8(pb); // read Error Correction Flags
+    if (error_flags & ASF_PACKET_FLAG_ERROR_CORRECTION_PRESENT) {
+        if (!(error_flags & ASF_ERROR_CORRECTION_LENGTH_TYPE)) {
+            size = error_flags & ASF_PACKET_ERROR_CORRECTION_DATA_SIZE;
+            avio_skip(pb, size);
+        }
+        len_flags       = avio_r8(pb);
+    } else
+        len_flags = error_flags;
+    asf->prop_flags = avio_r8(pb);
+    READ_LEN(len_flags & ASF_PPI_MASK_PACKET_LENGTH_FIELD_SIZE,
+             ASF_PPI_FLAG_PACKET_LENGTH_FIELD_, asf->packet_size_internal);
+    READ_LEN(len_flags & ASF_PPI_MASK_SEQUENCE_FIELD_SIZE,
+             ASF_PPI_FLAG_SEQUENCE_FIELD_, seq);
+    READ_LEN(len_flags & ASF_PPI_MASK_PADDING_LENGTH_FIELD_SIZE,
+             ASF_PPI_FLAG_PADDING_LENGTH_FIELD_, asf->pad_len );
+    asf->send_time = avio_rl32(pb); // send time
+    avio_skip(pb, 2); // skip duration
+    if (len_flags & ASF_PPI_FLAG_MULTIPLE_PAYLOADS_PRESENT) { // Multiple Payloads present
+        pay_flags = avio_r8(pb);
+        asf->nb_mult_left = (pay_flags & ASF_NUM_OF_PAYLOADS);
+    }
+
+    return 0;
+}
+
+static int asf_deinterleave(AVFormatContext *s, ASFPacket *asf_pkt, int st_num)
+{
+    ASFContext *asf    = s->priv_data;
+    ASFStream *asf_st  = asf->asf_st[st_num];
+    unsigned char *p   = asf_pkt->avpkt.data;
+    uint16_t pkt_len   = asf->asf_st[st_num]->virtual_pkt_len;
+    uint16_t chunk_len = asf->asf_st[st_num]->virtual_chunk_len;
+    int nchunks        = pkt_len / chunk_len;
+    AVPacket pkt;
+    int pos = 0, j, l, ret;
+
+
+    if ((ret = av_new_packet(&pkt, asf_pkt->data_size)) < 0)
+        return ret;
+
+    while (asf_pkt->data_size >= asf_st->span * pkt_len + pos) {
+        if (pos >= asf_pkt->data_size) {
+            break;
+        }
+        for (l = 0; l < pkt_len; l++) {
+            if (pos >= asf_pkt->data_size) {
+                break;
+            }
+            for (j = 0; j < asf_st->span; j++) {
+                if ((pos + chunk_len) >= asf_pkt->data_size)
+                    break;
+                memcpy(pkt.data + pos,
+                       p + (j * nchunks + l) * chunk_len,
+                       chunk_len);
+                pos += chunk_len;
+            }
+        }
+        p += asf_st->span * pkt_len;
+        if (p > asf_pkt->avpkt.data + asf_pkt->data_size)
+            break;
+    }
+    av_packet_unref(&asf_pkt->avpkt);
+    asf_pkt->avpkt = pkt;
+
+    return 0;
+}
+
+static int asf_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int ret, i;
+
+    if ((avio_tell(pb) >= asf->data_offset + asf->data_size) &&
+        !(asf->b_flags & ASF_FLAG_BROADCAST))
+        return AVERROR_EOF;
+    while (!pb->eof_reached) {
+        if (asf->state == PARSE_PACKET_HEADER) {
+            asf_read_packet_header(s);
+            if (pb->eof_reached)
+                break;
+            if (!asf->nb_mult_left)
+                asf->state = READ_SINGLE;
+            else
+                asf->state = READ_MULTI;
+        }
+        ret = asf_read_payload(s, pkt);
+        if (ret == AVERROR(EAGAIN)) {
+            asf->state = PARSE_PACKET_HEADER;
+            continue;
+        }
+        else if (ret < 0)
+            return ret;
+
+        switch (asf->state) {
+        case READ_SINGLE:
+            if (!asf->sub_left)
+                asf->state = PARSE_PACKET_HEADER;
+            break;
+        case READ_MULTI_SUB:
+            if (!asf->sub_left && !asf->nb_mult_left) {
+                asf->state = PARSE_PACKET_HEADER;
+                if (!asf->return_subpayload &&
+                    (avio_tell(pb) <= asf->packet_offset +
+                     asf->packet_size - asf->pad_len))
+                    avio_skip(pb, asf->pad_len); // skip padding
+                if (asf->packet_offset + asf->packet_size > avio_tell(pb))
+                    avio_seek(pb, asf->packet_offset + asf->packet_size, SEEK_SET);
+            } else if (!asf->sub_left)
+                asf->state = READ_MULTI;
+            break;
+        case READ_MULTI:
+            if (!asf->nb_mult_left) {
+                asf->state = PARSE_PACKET_HEADER;
+                if (!asf->return_subpayload &&
+                    (avio_tell(pb) <= asf->packet_offset +
+                     asf->packet_size - asf->pad_len))
+                    avio_skip(pb, asf->pad_len); // skip padding
+                if (asf->packet_offset + asf->packet_size > avio_tell(pb))
+                    avio_seek(pb, asf->packet_offset + asf->packet_size, SEEK_SET);
+            }
+            break;
+        }
+        if (asf->return_subpayload) {
+            asf->return_subpayload = 0;
+            return 0;
+        }
+        for (i = 0; i < s->nb_streams; i++) {
+            ASFPacket *asf_pkt = &asf->asf_st[i]->pkt;
+            if (asf_pkt && !asf_pkt->size_left && asf_pkt->data_size) {
+                if (asf->asf_st[i]->span > 1 &&
+                    asf->asf_st[i]->type == AVMEDIA_TYPE_AUDIO)
+                    if ((ret = asf_deinterleave(s, asf_pkt, i)) < 0)
+                        return ret;
+                av_packet_move_ref(pkt, &asf_pkt->avpkt);
+                pkt->stream_index  = asf->asf_st[i]->index;
+                pkt->flags         = asf_pkt->flags;
+                pkt->dts           = asf_pkt->dts - asf->preroll;
+                asf_pkt->data_size = 0;
+                asf_pkt->frame_num = 0;
+                return 0;
+            }
+        }
+    }
+
+    if (pb->eof_reached)
+        return AVERROR_EOF;
+
+    return 0;
+}
+
+static int asf_read_close(AVFormatContext *s)
+{
+    ASFContext *asf = s->priv_data;
+    int i;
+
+    for (i = 0; i < ASF_MAX_STREAMS; i++) {
+        av_dict_free(&asf->asf_sd[i].asf_met);
+        if (i < asf->nb_streams) {
+            av_packet_unref(&asf->asf_st[i]->pkt.avpkt);
+            av_freep(&asf->asf_st[i]);
+        }
+    }
+
+    asf->nb_streams = 0;
+    return 0;
+}
+
+static void reset_packet_state(AVFormatContext *s)
+{
+    ASFContext *asf        = s->priv_data;
+    int i;
+
+    asf->state             = PARSE_PACKET_HEADER;
+    asf->offset            = 0;
+    asf->return_subpayload = 0;
+    asf->sub_left          = 0;
+    asf->sub_header_offset = 0;
+    asf->packet_offset     = asf->first_packet_offset;
+    asf->pad_len           = 0;
+    asf->rep_data_len      = 0;
+    asf->dts_delta         = 0;
+    asf->mult_sub_len      = 0;
+    asf->nb_mult_left      = 0;
+    asf->nb_sub            = 0;
+    asf->prop_flags        = 0;
+    asf->sub_dts           = 0;
+    for (i = 0; i < asf->nb_streams; i++) {
+        ASFPacket *pkt = &asf->asf_st[i]->pkt;
+        pkt->size_left = 0;
+        pkt->data_size = 0;
+        pkt->duration  = 0;
+        pkt->flags     = 0;
+        pkt->dts       = 0;
+        pkt->duration  = 0;
+        av_packet_unref(&pkt->avpkt);
+        av_init_packet(&pkt->avpkt);
+    }
+}
+
+/*
+ * Find a timestamp for the requested position within the payload
+ * where the pos (position) is the offset inside the Data Object.
+ * When position is not on the packet boundary, asf_read_timestamp tries
+ * to find the closest packet offset after this position. If this packet
+ * is a key frame, this packet timestamp is read and an index entry is created
+ * for the packet. If this packet belongs to the requested stream,
+ * asf_read_timestamp upgrades pos to the packet beginning offset and
+ * returns this packet's dts. So returned dts is the dts of the first key frame with
+ * matching stream number after given position.
+ */
+static int64_t asf_read_timestamp(AVFormatContext *s, int stream_index,
+                                  int64_t *pos, int64_t pos_limit)
+{
+    ASFContext *asf = s->priv_data;
+    int64_t pkt_pos = *pos, pkt_offset, dts = AV_NOPTS_VALUE, data_end;
+    AVPacket pkt;
+    int n;
+
+    data_end = asf->data_offset + asf->data_size;
+
+    n = (pkt_pos - asf->first_packet_offset + asf->packet_size - 1) /
+        asf->packet_size;
+    n = av_clip(n, 0, ((data_end - asf->first_packet_offset) / asf->packet_size - 1));
+    pkt_pos = asf->first_packet_offset +  n * asf->packet_size;
+
+    avio_seek(s->pb, pkt_pos, SEEK_SET);
+    pkt_offset = pkt_pos;
+
+    reset_packet_state(s);
+    while (avio_tell(s->pb) < data_end) {
+
+        int i, ret, st_found;
+
+        av_init_packet(&pkt);
+        pkt_offset = avio_tell(s->pb);
+        if ((ret = asf_read_packet(s, &pkt)) < 0) {
+            dts = AV_NOPTS_VALUE;
+            return ret;
+        }
+        // ASFPacket may contain fragments of packets belonging to different streams,
+        // pkt_offset is the offset of the first fragment within it.
+        if ((pkt_offset >= (pkt_pos + asf->packet_size)))
+            pkt_pos += asf->packet_size;
+        for (i = 0; i < asf->nb_streams; i++) {
+            ASFStream *st = asf->asf_st[i];
+
+            st_found = 0;
+            if (pkt.flags & AV_PKT_FLAG_KEY) {
+                dts = pkt.dts;
+                if (dts) {
+                    av_add_index_entry(s->streams[pkt.stream_index], pkt_pos,
+                                       dts, pkt.size, 0, AVINDEX_KEYFRAME);
+                    if (stream_index == st->index) {
+                        st_found = 1;
+                        break;
+                    }
+                }
+            }
+        }
+        if (st_found)
+            break;
+        av_packet_unref(&pkt);
+    }
+    *pos = pkt_pos;
+
+    av_packet_unref(&pkt);
+    return dts;
+}
+
+static int asf_read_seek(AVFormatContext *s, int stream_index,
+                         int64_t timestamp, int flags)
+{
+    ASFContext *asf = s->priv_data;
+    int idx, ret;
+
+    if (s->streams[stream_index]->nb_index_entries && asf->is_simple_index) {
+        idx = av_index_search_timestamp(s->streams[stream_index], timestamp, flags);
+        if (idx < 0 || idx >= s->streams[stream_index]->nb_index_entries)
+            return AVERROR_INVALIDDATA;
+        avio_seek(s->pb, s->streams[stream_index]->index_entries[idx].pos, SEEK_SET);
+    } else {
+        if ((ret = ff_seek_frame_binary(s, stream_index, timestamp, flags)) < 0)
+            return ret;
+    }
+
+    reset_packet_state(s);
+
+    return 0;
+}
+
+static const GUIDParseTable *find_guid(ff_asf_guid guid)
+{
+    int j, ret;
+    const GUIDParseTable *g;
+
+    swap_guid(guid);
+    g = gdef;
+    for (j = 0; j < FF_ARRAY_ELEMS(gdef); j++) {
+        if (!(ret = memcmp(guid, g->guid, sizeof(g->guid))))
+            return g;
+        g++;
+    }
+
+    return NULL;
+}
+
+static int detect_unknown_subobject(AVFormatContext *s, int64_t offset, int64_t size)
+{
+    ASFContext *asf = s->priv_data;
+    AVIOContext *pb = s->pb;
+    const GUIDParseTable *g = NULL;
+    ff_asf_guid guid;
+    int ret;
+
+    while (avio_tell(pb) <= offset + size) {
+        if (avio_tell(pb) == asf->offset)
+            break;
+        asf->offset = avio_tell(pb);
+        if ((ret = ff_get_guid(pb, &guid)) < 0)
+            return ret;
+        g = find_guid(guid);
+        if (g) {
+            if ((ret = g->read_object(s, g)) < 0)
+                return ret;
+        } else {
+            GUIDParseTable g2;
+
+            g2.name         = "Unknown";
+            g2.is_subobject = 1;
+            asf_read_unknown(s, &g2);
+        }
+    }
+
+    return 0;
+}
+
+static int asf_read_header(AVFormatContext *s)
+{
+    ASFContext *asf         = s->priv_data;
+    AVIOContext *pb         = s->pb;
+    const GUIDParseTable *g = NULL;
+    ff_asf_guid guid;
+    int i, ret;
+    uint64_t size;
+
+    asf->preroll         = 0;
+    asf->is_simple_index = 0;
+    ff_get_guid(pb, &guid);
+    if (ff_guidcmp(&guid, &ff_asf_header))
+        return AVERROR_INVALIDDATA;
+    avio_skip(pb, 8); // skip header object size
+    avio_skip(pb, 6); // skip number of header objects and 2 reserved bytes
+    asf->data_reached = 0;
+
+    /* 1  is here instead of pb->eof_reached because (when not streaming), Data are skipped
+     * for the first time,
+     * Index object is processed and got eof and then seeking back to the Data is performed.
+     */
+    while (1) {
+        // for the cases when object size is invalid
+        if (avio_tell(pb) == asf->offset)
+            break;
+        asf->offset = avio_tell(pb);
+        if ((ret = ff_get_guid(pb, &guid)) < 0) {
+            if (ret == AVERROR_EOF && asf->data_reached)
+                break;
+            else
+                goto failed;
+        }
+        g = find_guid(guid);
+        if (g) {
+            asf->unknown_offset = asf->offset;
+            asf->is_header = 1;
+            if ((ret = g->read_object(s, g)) < 0)
+                goto failed;
+        } else {
+            size = avio_rl64(pb);
+            align_position(pb, asf->offset, size);
+        }
+        if (asf->data_reached && (!pb->seekable || (asf->b_flags & ASF_FLAG_BROADCAST)))
+            break;
+    }
+
+    if (!asf->data_reached) {
+        av_log(s, AV_LOG_ERROR, "Data Object was not found.\n");
+        ret = AVERROR_INVALIDDATA;
+        goto failed;
+    }
+    if (pb->seekable)
+        avio_seek(pb, asf->first_packet_offset, SEEK_SET);
+
+    for (i = 0; i < asf->nb_streams; i++) {
+        const char *rfc1766 = asf->asf_sd[asf->asf_st[i]->lang_idx].langs;
+        AVStream *st        = s->streams[asf->asf_st[i]->index];
+        set_language(s, rfc1766, &st->metadata);
+    }
+
+    for (i = 0; i < ASF_MAX_STREAMS; i++) {
+        AVStream *st = NULL;
+
+        st = find_stream(s, i);
+        if (st) {
+            av_dict_copy(&st->metadata, asf->asf_sd[i].asf_met, AV_DICT_IGNORE_SUFFIX);
+            if (asf->asf_sd[i].aspect_ratio.num > 0 && asf->asf_sd[i].aspect_ratio.den > 0) {
+                st->sample_aspect_ratio.num = asf->asf_sd[i].aspect_ratio.num;
+                st->sample_aspect_ratio.den = asf->asf_sd[i].aspect_ratio.den;
+            }
+        }
+    }
+
+    return 0;
+
+failed:
+    asf_read_close(s);
+    return ret;
+}
+
+AVInputFormat ff_asf_o_demuxer = {
+    .name           = "asf_o",
+    .long_name      = NULL_IF_CONFIG_SMALL("ASF (Advanced / Active Streaming Format)"),
+    .priv_data_size = sizeof(ASFContext),
+    .read_probe     = asf_probe,
+    .read_header    = asf_read_header,
+    .read_packet    = asf_read_packet,
+    .read_close     = asf_read_close,
+    .read_timestamp = asf_read_timestamp,
+    .read_seek      = asf_read_seek,
+    .flags          = AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH,
+};
diff --git a/libavformat/asfenc.c b/libavformat/asfenc.c
index 015c731c..14f92e25 100644
--- a/libavformat/asfenc.c
+++ b/libavformat/asfenc.c
@@ -22,7 +22,10 @@
 #include "libavutil/avassert.h"
 #include "libavutil/dict.h"
 #include "libavutil/mathematics.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/opt.h"
 #include "avformat.h"
+#include "avlanguage.h"
 #include "avio_internal.h"
 #include "internal.h"
 #include "riff.h"
@@ -170,23 +173,60 @@
      ASF_PAYLOAD_REPLICATED_DATA_LENGTH +                 \
      ASF_PAYLOAD_LENGTH_FIELD_SIZE)
 
-#define SINGLE_PAYLOAD_DATA_LENGTH                        \
-    (PACKET_SIZE -                                        \
-     PACKET_HEADER_MIN_SIZE -                             \
+#define SINGLE_PAYLOAD_HEADERS                            \
+    (PACKET_HEADER_MIN_SIZE +                             \
      PAYLOAD_HEADER_SIZE_SINGLE_PAYLOAD)
 
-#define MULTI_PAYLOAD_CONSTANT                            \
-    (PACKET_SIZE -                                        \
-     PACKET_HEADER_MIN_SIZE -                             \
-     1 -         /* Payload Flags */                      \
+#define MULTI_PAYLOAD_HEADERS                             \
+    (PACKET_HEADER_MIN_SIZE +                             \
+     1 +         /* Payload Flags */                      \
      2 * PAYLOAD_HEADER_SIZE_MULTIPLE_PAYLOADS)
 
 #define DATA_HEADER_SIZE 50
 
+#define PACKET_SIZE_MAX 65536
+#define PACKET_SIZE_MIN 100
+
+typedef struct ASFPayload {
+    uint8_t type;
+    uint16_t size;
+} ASFPayload;
+
+typedef struct ASFStream {
+    int num;
+    unsigned char seq;
+    /* use for reading */
+    AVPacket pkt;
+    int frag_offset;
+    int packet_obj_size;
+    int timestamp;
+    int64_t duration;
+    int skip_to_key;
+    int pkt_clean;
+
+    int ds_span;                /* descrambling  */
+    int ds_packet_size;
+    int ds_chunk_size;
+
+    int64_t packet_pos;
+
+    uint16_t stream_language_index;
+
+    int      palette_changed;
+    uint32_t palette[256];
+
+    int payload_ext_ct;
+    ASFPayload payload[8];
+} ASFStream;
+
 typedef struct ASFContext {
+    AVClass *av_class;
     uint32_t seqno;
     int is_streamed;
     ASFStream streams[128];              ///< it's max number and it's not that big
+    const char *languages[128];
+    int nb_languages;
+    int64_t creation_time;
     /* non streamed additonnal info */
     uint64_t nb_packets;                 ///< how many packets are there in the file, invalid if broadcasting
     int64_t duration;                    ///< in 100ns units
@@ -196,7 +236,7 @@ typedef struct ASFContext {
     int64_t packet_timestamp_start;
     int64_t packet_timestamp_end;
     unsigned int packet_nb_payloads;
-    uint8_t packet_buf[PACKET_SIZE];
+    uint8_t packet_buf[PACKET_SIZE_MAX];
     AVIOContext pb;
     /* only for reading */
     uint64_t data_offset;                ///< beginning of the first data packet
@@ -209,6 +249,7 @@ typedef struct ASFContext {
     uint64_t next_packet_offset;
     int      next_start_sec;
     int      end_sec;
+    int      packet_size;
 } ASFContext;
 
 static const AVCodecTag codec_asf_bmp_tags[] = {
@@ -273,12 +314,12 @@ static void put_chunk(AVFormatContext *s, int type,
     asf->seqno++;
 }
 
-/* convert from unix to windows time */
-static int64_t unix_to_file_time(int ti)
+/* convert from av time to windows time */
+static int64_t unix_to_file_time(int64_t ti)
 {
     int64_t t;
 
-    t  = ti * INT64_C(10000000);
+    t  = ti * INT64_C(10);
     t += INT64_C(116444736000000000);
     return t;
 }
@@ -348,7 +389,7 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size,
     ASFContext *asf = s->priv_data;
     AVIOContext *pb = s->pb;
     AVDictionaryEntry *tags[5];
-    int header_size, n, extra_size, extra_size2, wav_extra_size, file_time;
+    int header_size, n, extra_size, extra_size2, wav_extra_size;
     int has_title, has_aspect_ratio = 0;
     int metadata_count;
     AVCodecContext *enc;
@@ -366,10 +407,17 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size,
 
     duration       = asf->duration + PREROLL_TIME * 10000;
     has_title      = tags[0] || tags[1] || tags[2] || tags[3] || tags[4];
+
+    if (!file_size) {
+        if (ff_parse_creation_time_metadata(s, &asf->creation_time, 0) != 0)
+            av_dict_set(&s->metadata, "creation_time", NULL, 0);
+    }
+
     metadata_count = av_dict_count(s->metadata);
 
     bit_rate = 0;
     for (n = 0; n < s->nb_streams; n++) {
+        AVDictionaryEntry *entry;
         enc = s->streams[n]->codec;
 
         avpriv_set_pts_info(s->streams[n], 32, 1, 1000); /* 32 bit pts in ms */
@@ -379,6 +427,27 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size,
             && enc->sample_aspect_ratio.num > 0
             && enc->sample_aspect_ratio.den > 0)
             has_aspect_ratio++;
+
+        entry = av_dict_get(s->streams[n]->metadata, "language", NULL, 0);
+        if (entry) {
+            const char *iso6391lang = av_convert_lang_to(entry->value, AV_LANG_ISO639_1);
+            if (iso6391lang) {
+                int i;
+                for (i = 0; i < asf->nb_languages; i++) {
+                    if (!strcmp(asf->languages[i], iso6391lang)) {
+                        asf->streams[n].stream_language_index = i;
+                        break;
+                    }
+                }
+                if (i >= asf->nb_languages) {
+                    asf->languages[asf->nb_languages] = iso6391lang;
+                    asf->streams[n].stream_language_index = asf->nb_languages;
+                    asf->nb_languages++;
+                }
+            }
+        } else {
+            asf->streams[n].stream_language_index = 128;
+        }
     }
 
     if (asf->is_streamed) {
@@ -396,8 +465,7 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size,
     hpos          = put_header(pb, &ff_asf_file_header);
     ff_put_guid(pb, &ff_asf_my_guid);
     avio_wl64(pb, file_size);
-    file_time = 0;
-    avio_wl64(pb, unix_to_file_time(file_time));
+    avio_wl64(pb, unix_to_file_time(asf->creation_time));
     avio_wl64(pb, asf->nb_packets); /* number of packets */
     avio_wl64(pb, duration); /* end time stamp (in 100ns units) */
     avio_wl64(pb, asf->duration); /* duration (in 100ns units) */
@@ -408,13 +476,48 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size,
     avio_wl32(pb, bit_rate ? bit_rate : -1); /* Maximum data rate in bps */
     end_header(pb, hpos);
 
-    /* unknown headers */
+    /* header_extension */
     hpos = put_header(pb, &ff_asf_head1_guid);
     ff_put_guid(pb, &ff_asf_head2_guid);
     avio_wl16(pb, 6);
+    avio_wl32(pb, 0); /* length, to be filled later */
+    if (asf->nb_languages) {
+        int64_t hpos2;
+        int i;
+
+        hpos2 = put_header(pb, &ff_asf_language_guid);
+        avio_wl16(pb, asf->nb_languages);
+        for (i = 0; i < asf->nb_languages; i++) {
+            avio_w8(pb, 6);
+            avio_put_str16le(pb, asf->languages[i]);
+        }
+        end_header(pb, hpos2);
+
+        for (n = 0; n < s->nb_streams; n++) {
+            int64_t es_pos;
+            if (asf->streams[n].stream_language_index > 127)
+                continue;
+            es_pos = put_header(pb, &ff_asf_extended_stream_properties_object);
+            avio_wl64(pb, 0); /* start time */
+            avio_wl64(pb, 0); /* end time */
+            avio_wl32(pb, s->streams[n]->codec->bit_rate); /* data bitrate bps */
+            avio_wl32(pb, 5000); /* buffer size ms */
+            avio_wl32(pb, 0); /* initial buffer fullness */
+            avio_wl32(pb, s->streams[n]->codec->bit_rate); /* peak data bitrate */
+            avio_wl32(pb, 5000); /* maximum buffer size ms */
+            avio_wl32(pb, 0); /* max initial buffer fullness */
+            avio_wl32(pb, 0); /* max object size */
+            avio_wl32(pb, (!asf->is_streamed && pb->seekable) << 1); /* flags - seekable */
+            avio_wl16(pb, n + 1); /* stream number */
+            avio_wl16(pb, asf->streams[n].stream_language_index); /* language id index */
+            avio_wl64(pb, 0); /* avg time per frame */
+            avio_wl16(pb, 0); /* stream name count */
+            avio_wl16(pb, 0); /* payload extension system count */
+            end_header(pb, es_pos);
+        }
+    }
     if (has_aspect_ratio) {
         int64_t hpos2;
-        avio_wl32(pb, 26 + has_aspect_ratio * 84);
         hpos2 = put_header(pb, &ff_asf_metadata_header);
         avio_wl16(pb, 2 * has_aspect_ratio);
         for (n = 0; n < s->nb_streams; n++) {
@@ -442,8 +545,13 @@ static int asf_write_header1(AVFormatContext *s, int64_t file_size,
             }
         }
         end_header(pb, hpos2);
-    } else {
-        avio_wl32(pb, 0);
+    }
+    {
+        int64_t pos1;
+        pos1 = avio_tell(pb);
+        avio_seek(pb, hpos + 42, SEEK_SET);
+        avio_wl32(pb, pos1 - hpos - 46);
+        avio_seek(pb, pos1, SEEK_SET);
     }
     end_header(pb, hpos);
 
@@ -647,10 +755,15 @@ static int asf_write_header(AVFormatContext *s)
 {
     ASFContext *asf = s->priv_data;
 
-    s->packet_size  = PACKET_SIZE;
+    s->packet_size  = asf->packet_size;
     s->max_interleave_delta = 0;
     asf->nb_packets = 0;
 
+    if (s->nb_streams > 127) {
+        av_log(s, AV_LOG_ERROR, "ASF can only handle 127 streams\n");
+        return AVERROR(EINVAL);
+    }
+
     asf->index_ptr             = av_malloc(sizeof(ASFIndex) * ASF_INDEX_BLOCK);
     if (!asf->index_ptr)
         return AVERROR(ENOMEM);
@@ -753,7 +866,7 @@ static void flush_packet(AVFormatContext *s)
                                                asf->packet_nb_payloads,
                                                asf->packet_size_left);
 
-    packet_filled_size = PACKET_SIZE - asf->packet_size_left;
+    packet_filled_size = asf->packet_size - asf->packet_size_left;
     av_assert0(packet_hdr_size <= asf->packet_size_left);
     memset(asf->packet_buf + packet_filled_size, 0, asf->packet_size_left);
 
@@ -810,13 +923,14 @@ static void put_frame(AVFormatContext *s, ASFStream *stream, AVStream *avst,
     while (m_obj_offset < m_obj_size) {
         payload_len = m_obj_size - m_obj_offset;
         if (asf->packet_timestamp_start == -1) {
-            asf->multi_payloads_present = (payload_len < MULTI_PAYLOAD_CONSTANT);
+            const int multi_payload_constant = (asf->packet_size - MULTI_PAYLOAD_HEADERS);
+            asf->multi_payloads_present = (payload_len < multi_payload_constant);
 
-            asf->packet_size_left = PACKET_SIZE;
+            asf->packet_size_left = asf->packet_size;
             if (asf->multi_payloads_present) {
-                frag_len1 = MULTI_PAYLOAD_CONSTANT - 1;
+                frag_len1 = multi_payload_constant - 1;
             } else {
-                frag_len1 = SINGLE_PAYLOAD_DATA_LENGTH;
+                frag_len1 = asf->packet_size - SINGLE_PAYLOAD_HEADERS;
             }
             asf->packet_timestamp_start = timestamp;
         } else {
@@ -830,6 +944,11 @@ static void put_frame(AVFormatContext *s, ASFStream *stream, AVStream *avst,
                 flush_packet(s);
                 continue;
             }
+            if (asf->packet_timestamp_start > INT64_MAX - UINT16_MAX ||
+                timestamp > asf->packet_timestamp_start + UINT16_MAX) {
+                flush_packet(s);
+                continue;
+            }
         }
         if (frag_len1 > 0) {
             if (payload_len > frag_len1)
@@ -927,6 +1046,11 @@ static int asf_write_packet(AVFormatContext *s, AVPacket *pkt)
 
     pts = (pkt->pts != AV_NOPTS_VALUE) ? pkt->pts : pkt->dts;
     av_assert0(pts != AV_NOPTS_VALUE);
+    if (   pts < - PREROLL_TIME
+        || pts > (INT_MAX-3)/10000LL * ASF_INDEXED_INTERVAL - PREROLL_TIME) {
+        av_log(s, AV_LOG_ERROR, "input pts %"PRId64" is invalid\n", pts);
+        return AVERROR(EINVAL);
+    }
     pts *= 10000;
     asf->duration = FFMAX(asf->duration, pts + pkt->duration * 10000);
 
@@ -1001,7 +1125,19 @@ static int asf_write_trailer(AVFormatContext *s)
     return 0;
 }
 
+static const AVOption asf_options[] = {
+    { "packet_size", "Packet size", offsetof(ASFContext, packet_size), AV_OPT_TYPE_INT, {.i64 = 3200}, PACKET_SIZE_MIN, PACKET_SIZE_MAX, AV_OPT_FLAG_ENCODING_PARAM },
+    { NULL },
+};
+
 #if CONFIG_ASF_MUXER
+static const AVClass asf_muxer_class = {
+    .class_name     = "ASF muxer",
+    .item_name      = av_default_item_name,
+    .option         = asf_options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
 AVOutputFormat ff_asf_muxer = {
     .name           = "asf",
     .long_name      = NULL_IF_CONFIG_SMALL("ASF (Advanced / Active Streaming Format)"),
@@ -1017,10 +1153,18 @@ AVOutputFormat ff_asf_muxer = {
     .codec_tag      = (const AVCodecTag * const []) {
         codec_asf_bmp_tags, ff_codec_bmp_tags, ff_codec_wav_tags, 0
     },
+    .priv_class        = &asf_muxer_class,
 };
 #endif /* CONFIG_ASF_MUXER */
 
 #if CONFIG_ASF_STREAM_MUXER
+static const AVClass asf_stream_muxer_class = {
+    .class_name     = "ASF stream muxer",
+    .item_name      = av_default_item_name,
+    .option         = asf_options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
 AVOutputFormat ff_asf_stream_muxer = {
     .name           = "asf_stream",
     .long_name      = NULL_IF_CONFIG_SMALL("ASF (Advanced / Active Streaming Format)"),
@@ -1036,5 +1180,6 @@ AVOutputFormat ff_asf_stream_muxer = {
     .codec_tag      = (const AVCodecTag * const []) {
         codec_asf_bmp_tags, ff_codec_bmp_tags, ff_codec_wav_tags, 0
     },
+    .priv_class        = &asf_stream_muxer_class,
 };
 #endif /* CONFIG_ASF_STREAM_MUXER */
diff --git a/libavformat/assdec.c b/libavformat/assdec.c
index c62e76f0..21148b60 100644
--- a/libavformat/assdec.c
+++ b/libavformat/assdec.c
@@ -39,6 +39,9 @@ static int ass_probe(AVProbeData *p)
     FFTextReader tr;
     ff_text_init_buf(&tr, p->buf, p->buf_size);
 
+    while (ff_text_peek_r8(&tr) == '\r' || ff_text_peek_r8(&tr) == '\n')
+        ff_text_r8(&tr);
+
     ff_text_read(&tr, buf, sizeof(buf));
 
     if (!memcmp(buf, "[Script Info]", 13))
@@ -125,6 +128,8 @@ static int ass_read_header(AVFormatContext *s)
     av_bprint_init(&line,   0, AV_BPRINT_SIZE_UNLIMITED);
     av_bprint_init(&rline,  0, AV_BPRINT_SIZE_UNLIMITED);
 
+    ass->q.keep_duplicates = 1;
+
     for (;;) {
         int64_t pos = get_line(&line, &tr);
         int64_t ts_start = AV_NOPTS_VALUE;
@@ -152,7 +157,7 @@ static int ass_read_header(AVFormatContext *s)
     if (res < 0)
         goto end;
 
-    ff_subtitles_queue_finalize(&ass->q);
+    ff_subtitles_queue_finalize(s, &ass->q);
 
 end:
     av_bprint_finalize(&header, NULL);
diff --git a/libavformat/assenc.c b/libavformat/assenc.c
index 52226168..e59c266e 100644
--- a/libavformat/assenc.c
+++ b/libavformat/assenc.c
@@ -223,7 +223,7 @@ static int write_trailer(AVFormatContext *s)
 #define OFFSET(x) offsetof(ASSContext, x)
 #define E AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "ignore_readorder", "write events immediately, even if they're out-of-order", OFFSET(ignore_readorder), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, E },
+    { "ignore_readorder", "write events immediately, even if they're out-of-order", OFFSET(ignore_readorder), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, E },
     { NULL },
 };
 
diff --git a/libavformat/async.c b/libavformat/async.c
new file mode 100644
index 00000000..138ef137
--- /dev/null
+++ b/libavformat/async.c
@@ -0,0 +1,699 @@
+/*
+ * Input async protocol.
+ * Copyright (c) 2015 Zhang Rui <bbcallen@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Based on libavformat/cache.c by Michael Niedermayer
+ */
+
+ /**
+ * @TODO
+ *      support timeout
+ *      support work with concatdec, hls
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/error.h"
+#include "libavutil/fifo.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+#include "libavutil/thread.h"
+#include "url.h"
+#include <stdint.h>
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#define BUFFER_CAPACITY         (4 * 1024 * 1024)
+#define READ_BACK_CAPACITY      (4 * 1024 * 1024)
+#define SHORT_SEEK_THRESHOLD    (256 * 1024)
+
+typedef struct RingBuffer
+{
+    AVFifoBuffer *fifo;
+    int           read_back_capacity;
+
+    int           read_pos;
+} RingBuffer;
+
+typedef struct Context {
+    AVClass        *class;
+    URLContext     *inner;
+
+    int             seek_request;
+    int64_t         seek_pos;
+    int             seek_whence;
+    int             seek_completed;
+    int64_t         seek_ret;
+
+    int             inner_io_error;
+    int             io_error;
+    int             io_eof_reached;
+
+    int64_t         logical_pos;
+    int64_t         logical_size;
+    RingBuffer      ring;
+
+    pthread_cond_t  cond_wakeup_main;
+    pthread_cond_t  cond_wakeup_background;
+    pthread_mutex_t mutex;
+    pthread_t       async_buffer_thread;
+
+    int             abort_request;
+    AVIOInterruptCB interrupt_callback;
+} Context;
+
+static int ring_init(RingBuffer *ring, unsigned int capacity, int read_back_capacity)
+{
+    memset(ring, 0, sizeof(RingBuffer));
+    ring->fifo = av_fifo_alloc(capacity + read_back_capacity);
+    if (!ring->fifo)
+        return AVERROR(ENOMEM);
+
+    ring->read_back_capacity = read_back_capacity;
+    return 0;
+}
+
+static void ring_destroy(RingBuffer *ring)
+{
+    av_fifo_freep(&ring->fifo);
+}
+
+static void ring_reset(RingBuffer *ring)
+{
+    av_fifo_reset(ring->fifo);
+    ring->read_pos = 0;
+}
+
+static int ring_size(RingBuffer *ring)
+{
+    return av_fifo_size(ring->fifo) - ring->read_pos;
+}
+
+static int ring_space(RingBuffer *ring)
+{
+    return av_fifo_space(ring->fifo);
+}
+
+static int ring_generic_read(RingBuffer *ring, void *dest, int buf_size, void (*func)(void*, void*, int))
+{
+    int ret;
+
+    av_assert2(buf_size <= ring_size(ring));
+    ret = av_fifo_generic_peek_at(ring->fifo, dest, ring->read_pos, buf_size, func);
+    ring->read_pos += buf_size;
+
+    if (ring->read_pos > ring->read_back_capacity) {
+        av_fifo_drain(ring->fifo, ring->read_pos - ring->read_back_capacity);
+        ring->read_pos = ring->read_back_capacity;
+    }
+
+    return ret;
+}
+
+static int ring_generic_write(RingBuffer *ring, void *src, int size, int (*func)(void*, void*, int))
+{
+    av_assert2(size <= ring_space(ring));
+    return av_fifo_generic_write(ring->fifo, src, size, func);
+}
+
+static int ring_size_of_read_back(RingBuffer *ring)
+{
+    return ring->read_pos;
+}
+
+static int ring_drain(RingBuffer *ring, int offset)
+{
+    av_assert2(offset >= -ring_size_of_read_back(ring));
+    av_assert2(offset <= -ring_size(ring));
+    ring->read_pos += offset;
+    return 0;
+}
+
+static int async_check_interrupt(void *arg)
+{
+    URLContext *h   = arg;
+    Context    *c   = h->priv_data;
+
+    if (c->abort_request)
+        return 1;
+
+    if (ff_check_interrupt(&c->interrupt_callback))
+        c->abort_request = 1;
+
+    return c->abort_request;
+}
+
+static int wrapped_url_read(void *src, void *dst, int size)
+{
+    URLContext *h   = src;
+    Context    *c   = h->priv_data;
+    int         ret;
+
+    ret = ffurl_read(c->inner, dst, size);
+    c->inner_io_error = ret < 0 ? ret : 0;
+
+    return ret;
+}
+
+static void *async_buffer_task(void *arg)
+{
+    URLContext   *h    = arg;
+    Context      *c    = h->priv_data;
+    RingBuffer   *ring = &c->ring;
+    int           ret  = 0;
+    int64_t       seek_ret;
+
+    while (1) {
+        int fifo_space, to_copy;
+
+        pthread_mutex_lock(&c->mutex);
+        if (async_check_interrupt(h)) {
+            c->io_eof_reached = 1;
+            c->io_error       = AVERROR_EXIT;
+            pthread_cond_signal(&c->cond_wakeup_main);
+            pthread_mutex_unlock(&c->mutex);
+            break;
+        }
+
+        if (c->seek_request) {
+            seek_ret = ffurl_seek(c->inner, c->seek_pos, c->seek_whence);
+            if (seek_ret >= 0) {
+                c->io_eof_reached = 0;
+                c->io_error       = 0;
+                ring_reset(ring);
+            }
+
+            c->seek_completed = 1;
+            c->seek_ret       = seek_ret;
+            c->seek_request   = 0;
+
+
+            pthread_cond_signal(&c->cond_wakeup_main);
+            pthread_mutex_unlock(&c->mutex);
+            continue;
+        }
+
+        fifo_space = ring_space(ring);
+        if (c->io_eof_reached || fifo_space <= 0) {
+            pthread_cond_signal(&c->cond_wakeup_main);
+            pthread_cond_wait(&c->cond_wakeup_background, &c->mutex);
+            pthread_mutex_unlock(&c->mutex);
+            continue;
+        }
+        pthread_mutex_unlock(&c->mutex);
+
+        to_copy = FFMIN(4096, fifo_space);
+        ret = ring_generic_write(ring, (void *)h, to_copy, wrapped_url_read);
+
+        pthread_mutex_lock(&c->mutex);
+        if (ret <= 0) {
+            c->io_eof_reached = 1;
+            if (c->inner_io_error < 0)
+                c->io_error = c->inner_io_error;
+        }
+
+        pthread_cond_signal(&c->cond_wakeup_main);
+        pthread_mutex_unlock(&c->mutex);
+    }
+
+    return NULL;
+}
+
+static int async_open(URLContext *h, const char *arg, int flags, AVDictionary **options)
+{
+    Context         *c = h->priv_data;
+    int              ret;
+    AVIOInterruptCB  interrupt_callback = {.callback = async_check_interrupt, .opaque = h};
+
+    av_strstart(arg, "async:", &arg);
+
+    ret = ring_init(&c->ring, BUFFER_CAPACITY, READ_BACK_CAPACITY);
+    if (ret < 0)
+        goto fifo_fail;
+
+    /* wrap interrupt callback */
+    c->interrupt_callback = h->interrupt_callback;
+    ret = ffurl_open_whitelist(&c->inner, arg, flags, &interrupt_callback, options, h->protocol_whitelist);
+    if (ret != 0) {
+        av_log(h, AV_LOG_ERROR, "ffurl_open failed : %s, %s\n", av_err2str(ret), arg);
+        goto url_fail;
+    }
+
+    c->logical_size = ffurl_size(c->inner);
+    h->is_streamed  = c->inner->is_streamed;
+
+    ret = pthread_mutex_init(&c->mutex, NULL);
+    if (ret != 0) {
+        av_log(h, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n", av_err2str(ret));
+        goto mutex_fail;
+    }
+
+    ret = pthread_cond_init(&c->cond_wakeup_main, NULL);
+    if (ret != 0) {
+        av_log(h, AV_LOG_ERROR, "pthread_cond_init failed : %s\n", av_err2str(ret));
+        goto cond_wakeup_main_fail;
+    }
+
+    ret = pthread_cond_init(&c->cond_wakeup_background, NULL);
+    if (ret != 0) {
+        av_log(h, AV_LOG_ERROR, "pthread_cond_init failed : %s\n", av_err2str(ret));
+        goto cond_wakeup_background_fail;
+    }
+
+    ret = pthread_create(&c->async_buffer_thread, NULL, async_buffer_task, h);
+    if (ret) {
+        av_log(h, AV_LOG_ERROR, "pthread_create failed : %s\n", av_err2str(ret));
+        goto thread_fail;
+    }
+
+    return 0;
+
+thread_fail:
+    pthread_cond_destroy(&c->cond_wakeup_background);
+cond_wakeup_background_fail:
+    pthread_cond_destroy(&c->cond_wakeup_main);
+cond_wakeup_main_fail:
+    pthread_mutex_destroy(&c->mutex);
+mutex_fail:
+    ffurl_close(c->inner);
+url_fail:
+    ring_destroy(&c->ring);
+fifo_fail:
+    return ret;
+}
+
+static int async_close(URLContext *h)
+{
+    Context *c = h->priv_data;
+    int      ret;
+
+    pthread_mutex_lock(&c->mutex);
+    c->abort_request = 1;
+    pthread_cond_signal(&c->cond_wakeup_background);
+    pthread_mutex_unlock(&c->mutex);
+
+    ret = pthread_join(c->async_buffer_thread, NULL);
+    if (ret != 0)
+        av_log(h, AV_LOG_ERROR, "pthread_join(): %s\n", av_err2str(ret));
+
+    pthread_cond_destroy(&c->cond_wakeup_background);
+    pthread_cond_destroy(&c->cond_wakeup_main);
+    pthread_mutex_destroy(&c->mutex);
+    ffurl_close(c->inner);
+    ring_destroy(&c->ring);
+
+    return 0;
+}
+
+static int async_read_internal(URLContext *h, void *dest, int size, int read_complete,
+                               void (*func)(void*, void*, int))
+{
+    Context      *c       = h->priv_data;
+    RingBuffer   *ring    = &c->ring;
+    int           to_read = size;
+    int           ret     = 0;
+
+    pthread_mutex_lock(&c->mutex);
+
+    while (to_read > 0) {
+        int fifo_size, to_copy;
+        if (async_check_interrupt(h)) {
+            ret = AVERROR_EXIT;
+            break;
+        }
+        fifo_size = ring_size(ring);
+        to_copy   = FFMIN(to_read, fifo_size);
+        if (to_copy > 0) {
+            ring_generic_read(ring, dest, to_copy, func);
+            if (!func)
+                dest = (uint8_t *)dest + to_copy;
+            c->logical_pos += to_copy;
+            to_read        -= to_copy;
+            ret             = size - to_read;
+
+            if (to_read <= 0 || !read_complete)
+                break;
+        } else if (c->io_eof_reached) {
+            if (ret <= 0) {
+                if (c->io_error)
+                    ret = c->io_error;
+                else
+                    ret = AVERROR_EOF;
+            }
+            break;
+        }
+        pthread_cond_signal(&c->cond_wakeup_background);
+        pthread_cond_wait(&c->cond_wakeup_main, &c->mutex);
+    }
+
+    pthread_cond_signal(&c->cond_wakeup_background);
+    pthread_mutex_unlock(&c->mutex);
+
+    return ret;
+}
+
+static int async_read(URLContext *h, unsigned char *buf, int size)
+{
+    return async_read_internal(h, buf, size, 0, NULL);
+}
+
+static void fifo_do_not_copy_func(void* dest, void* src, int size) {
+    // do not copy
+}
+
+static int64_t async_seek(URLContext *h, int64_t pos, int whence)
+{
+    Context      *c    = h->priv_data;
+    RingBuffer   *ring = &c->ring;
+    int64_t       ret;
+    int64_t       new_logical_pos;
+    int fifo_size;
+    int fifo_size_of_read_back;
+
+    if (whence == AVSEEK_SIZE) {
+        av_log(h, AV_LOG_TRACE, "async_seek: AVSEEK_SIZE: %"PRId64"\n", (int64_t)c->logical_size);
+        return c->logical_size;
+    } else if (whence == SEEK_CUR) {
+        av_log(h, AV_LOG_TRACE, "async_seek: %"PRId64"\n", pos);
+        new_logical_pos = pos + c->logical_pos;
+    } else if (whence == SEEK_SET){
+        av_log(h, AV_LOG_TRACE, "async_seek: %"PRId64"\n", pos);
+        new_logical_pos = pos;
+    } else {
+        return AVERROR(EINVAL);
+    }
+    if (new_logical_pos < 0)
+        return AVERROR(EINVAL);
+
+    fifo_size = ring_size(ring);
+    fifo_size_of_read_back = ring_size_of_read_back(ring);
+    if (new_logical_pos == c->logical_pos) {
+        /* current position */
+        return c->logical_pos;
+    } else if ((new_logical_pos >= (c->logical_pos - fifo_size_of_read_back)) &&
+               (new_logical_pos < (c->logical_pos + fifo_size + SHORT_SEEK_THRESHOLD))) {
+        int pos_delta = (int)(new_logical_pos - c->logical_pos);
+        /* fast seek */
+        av_log(h, AV_LOG_TRACE, "async_seek: fask_seek %"PRId64" from %d dist:%d/%d\n",
+                new_logical_pos, (int)c->logical_pos,
+                (int)(new_logical_pos - c->logical_pos), fifo_size);
+
+        if (pos_delta > 0) {
+            // fast seek forwards
+            async_read_internal(h, NULL, pos_delta, 1, fifo_do_not_copy_func);
+        } else {
+            // fast seek backwards
+            ring_drain(ring, pos_delta);
+            c->logical_pos = new_logical_pos;
+        }
+
+        return c->logical_pos;
+    } else if (c->logical_size <= 0) {
+        /* can not seek */
+        return AVERROR(EINVAL);
+    } else if (new_logical_pos > c->logical_size) {
+        /* beyond end */
+        return AVERROR(EINVAL);
+    }
+
+    pthread_mutex_lock(&c->mutex);
+
+    c->seek_request   = 1;
+    c->seek_pos       = new_logical_pos;
+    c->seek_whence    = SEEK_SET;
+    c->seek_completed = 0;
+    c->seek_ret       = 0;
+
+    while (1) {
+        if (async_check_interrupt(h)) {
+            ret = AVERROR_EXIT;
+            break;
+        }
+        if (c->seek_completed) {
+            if (c->seek_ret >= 0)
+                c->logical_pos  = c->seek_ret;
+            ret = c->seek_ret;
+            break;
+        }
+        pthread_cond_signal(&c->cond_wakeup_background);
+        pthread_cond_wait(&c->cond_wakeup_main, &c->mutex);
+    }
+
+    pthread_mutex_unlock(&c->mutex);
+
+    return ret;
+}
+
+#define OFFSET(x) offsetof(Context, x)
+#define D AV_OPT_FLAG_DECODING_PARAM
+
+static const AVOption options[] = {
+    {NULL},
+};
+
+#undef D
+#undef OFFSET
+
+static const AVClass async_context_class = {
+    .class_name = "Async",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+URLProtocol ff_async_protocol = {
+    .name                = "async",
+    .url_open2           = async_open,
+    .url_read            = async_read,
+    .url_seek            = async_seek,
+    .url_close           = async_close,
+    .priv_data_size      = sizeof(Context),
+    .priv_data_class     = &async_context_class,
+};
+
+#ifdef TEST
+
+#define TEST_SEEK_POS    (1536)
+#define TEST_STREAM_SIZE (2048)
+
+typedef struct TestContext {
+    AVClass        *class;
+    int64_t         logical_pos;
+    int64_t         logical_size;
+
+    /* options */
+    int             opt_read_error;
+} TestContext;
+
+static int async_test_open(URLContext *h, const char *arg, int flags, AVDictionary **options)
+{
+    TestContext *c = h->priv_data;
+    c->logical_pos  = 0;
+    c->logical_size = TEST_STREAM_SIZE;
+    return 0;
+}
+
+static int async_test_close(URLContext *h)
+{
+    return 0;
+}
+
+static int async_test_read(URLContext *h, unsigned char *buf, int size)
+{
+    TestContext *c = h->priv_data;
+    int          i;
+    int          read_len = 0;
+
+    if (c->opt_read_error)
+        return c->opt_read_error;
+
+    if (c->logical_pos >= c->logical_size)
+        return AVERROR_EOF;
+
+    for (i = 0; i < size; ++i) {
+        buf[i] = c->logical_pos & 0xFF;
+
+        c->logical_pos++;
+        read_len++;
+
+        if (c->logical_pos >= c->logical_size)
+            break;
+    }
+
+    return read_len;
+}
+
+static int64_t async_test_seek(URLContext *h, int64_t pos, int whence)
+{
+    TestContext *c = h->priv_data;
+    int64_t      new_logical_pos;
+
+    if (whence == AVSEEK_SIZE) {
+        return c->logical_size;
+    } else if (whence == SEEK_CUR) {
+        new_logical_pos = pos + c->logical_pos;
+    } else if (whence == SEEK_SET){
+        new_logical_pos = pos;
+    } else {
+        return AVERROR(EINVAL);
+    }
+    if (new_logical_pos < 0)
+        return AVERROR(EINVAL);
+
+    c->logical_pos = new_logical_pos;
+    return new_logical_pos;
+}
+
+#define OFFSET(x) offsetof(TestContext, x)
+#define D AV_OPT_FLAG_DECODING_PARAM
+
+static const AVOption async_test_options[] = {
+    { "async-test-read-error",      "cause read fail",
+        OFFSET(opt_read_error),     AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, .flags = D },
+    {NULL},
+};
+
+#undef D
+#undef OFFSET
+
+static const AVClass async_test_context_class = {
+    .class_name = "Async-Test",
+    .item_name  = av_default_item_name,
+    .option     = async_test_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+URLProtocol ff_async_test_protocol = {
+    .name                = "async-test",
+    .url_open2           = async_test_open,
+    .url_read            = async_test_read,
+    .url_seek            = async_test_seek,
+    .url_close           = async_test_close,
+    .priv_data_size      = sizeof(TestContext),
+    .priv_data_class     = &async_test_context_class,
+};
+
+int main(void)
+{
+    URLContext   *h = NULL;
+    int           i;
+    int           ret;
+    int64_t       size;
+    int64_t       pos;
+    int64_t       read_len;
+    unsigned char buf[4096];
+    AVDictionary *opts = NULL;
+
+    ffurl_register_protocol(&ff_async_protocol);
+    ffurl_register_protocol(&ff_async_test_protocol);
+
+    /*
+     * test normal read
+     */
+    ret = ffurl_open(&h, "async:async-test:", AVIO_FLAG_READ, NULL, NULL);
+    printf("open: %d\n", ret);
+
+    size = ffurl_size(h);
+    printf("size: %"PRId64"\n", size);
+
+    pos = ffurl_seek(h, 0, SEEK_CUR);
+    read_len = 0;
+    while (1) {
+        ret = ffurl_read(h, buf, sizeof(buf));
+        if (ret == AVERROR_EOF) {
+            printf("read-error: AVERROR_EOF at %"PRId64"\n", ffurl_seek(h, 0, SEEK_CUR));
+            break;
+        }
+        else if (ret == 0)
+            break;
+        else if (ret < 0) {
+            printf("read-error: %d at %"PRId64"\n", ret, ffurl_seek(h, 0, SEEK_CUR));
+            goto fail;
+        } else {
+            for (i = 0; i < ret; ++i) {
+                if (buf[i] != (pos & 0xFF)) {
+                    printf("read-mismatch: actual %d, expecting %d, at %"PRId64"\n",
+                           (int)buf[i], (int)(pos & 0xFF), pos);
+                    break;
+                }
+                pos++;
+            }
+        }
+
+        read_len += ret;
+    }
+    printf("read: %"PRId64"\n", read_len);
+
+    /*
+     * test normal seek
+     */
+    ret = ffurl_read(h, buf, 1);
+    printf("read: %d\n", ret);
+
+    pos = ffurl_seek(h, TEST_SEEK_POS, SEEK_SET);
+    printf("seek: %"PRId64"\n", pos);
+
+    read_len = 0;
+    while (1) {
+        ret = ffurl_read(h, buf, sizeof(buf));
+        if (ret == AVERROR_EOF)
+            break;
+        else if (ret == 0)
+            break;
+        else if (ret < 0) {
+            printf("read-error: %d at %"PRId64"\n", ret, ffurl_seek(h, 0, SEEK_CUR));
+            goto fail;
+        } else {
+            for (i = 0; i < ret; ++i) {
+                if (buf[i] != (pos & 0xFF)) {
+                    printf("read-mismatch: actual %d, expecting %d, at %"PRId64"\n",
+                           (int)buf[i], (int)(pos & 0xFF), pos);
+                    break;
+                }
+                pos++;
+            }
+        }
+
+        read_len += ret;
+    }
+    printf("read: %"PRId64"\n", read_len);
+
+    ret = ffurl_read(h, buf, 1);
+    printf("read: %d\n", ret);
+
+    /*
+     * test read error
+     */
+    ffurl_close(h);
+    av_dict_set_int(&opts, "async-test-read-error", -10000, 0);
+    ret = ffurl_open(&h, "async:async-test:", AVIO_FLAG_READ, NULL, &opts);
+    printf("open: %d\n", ret);
+
+    ret = ffurl_read(h, buf, 1);
+    printf("read: %d\n", ret);
+
+fail:
+    av_dict_free(&opts);
+    ffurl_close(h);
+    return 0;
+}
+
+#endif
diff --git a/libavformat/avc.c b/libavformat/avc.c
index 9d843e0c..f7b8f38a 100644
--- a/libavformat/avc.c
+++ b/libavformat/avc.c
@@ -180,7 +180,7 @@ int ff_avc_write_annexb_extradata(const uint8_t *in, uint8_t **buf, int *size)
     if (11 + sps_size + pps_size > *size)
         return AVERROR_INVALIDDATA;
     out_size = 8 + sps_size + pps_size;
-    out = av_mallocz(out_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    out = av_mallocz(out_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!out)
         return AVERROR(ENOMEM);
     AV_WB32(&out[0], 0x00000001);
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index fb69852f..34bad436 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -78,6 +78,18 @@
  * if its AVClass is non-NULL, and the protocols layer. See the discussion on
  * nesting in @ref avoptions documentation to learn how to access those.
  *
+ * @section urls
+ * URL strings in libavformat are made of a scheme/protocol, a ':', and a
+ * scheme specific string. URLs without a scheme and ':' used for local files
+ * are supported but deprecated. "file:" should be used for local files.
+ *
+ * It is important that the scheme string is not taken from untrusted
+ * sources without checks.
+ *
+ * Note that some schemes/protocols are quite powerful, allowing access to
+ * both local and remote files, parts of them, concatenations of them, local
+ * audio and video devices and so on.
+ *
  * @defgroup lavf_decoding Demuxing
  * @{
  * Demuxers read a media file and split it into chunks of data (@em packets). A
@@ -88,10 +100,10 @@
  * cleanup.
  *
  * @section lavf_decoding_open Opening a media file
- * The minimum information required to open a file is its URL or filename, which
+ * The minimum information required to open a file is its URL, which
  * is passed to avformat_open_input(), as in the following code:
  * @code
- * const char    *url = "in.mp3";
+ * const char    *url = "file:in.mp3";
  * AVFormatContext *s = NULL;
  * int ret = avformat_open_input(&s, url, NULL, NULL);
  * if (ret < 0)
@@ -165,7 +177,7 @@
  * until the next av_read_frame() call or closing the file. If the caller
  * requires a longer lifetime, av_dup_packet() will make an av_malloc()ed copy
  * of it.
- * In both cases, the packet must be freed with av_free_packet() when it is no
+ * In both cases, the packet must be freed with av_packet_unref() when it is no
  * longer needed.
  *
  * @section lavf_decoding_seek Seeking
@@ -465,8 +477,10 @@ typedef struct AVProbeData {
 #define AVFMT_NOFILE        0x0001
 #define AVFMT_NEEDNUMBER    0x0002 /**< Needs '%d' in filename. */
 #define AVFMT_SHOW_IDS      0x0008 /**< Show format stream IDs numbers. */
+#if FF_API_LAVF_FMT_RAWPICTURE
 #define AVFMT_RAWPICTURE    0x0020 /**< Format wants AVPicture structure for
-                                      raw picture data. */
+                                      raw picture data. @deprecated Not used anymore */
+#endif
 #define AVFMT_GLOBALHEADER  0x0040 /**< Format wants global header. */
 #define AVFMT_NOTIMESTAMPS  0x0080 /**< Format does not need / have any timestamps. */
 #define AVFMT_GENERIC_INDEX 0x0100 /**< Use generic index building code. */
@@ -478,12 +492,7 @@ typedef struct AVProbeData {
 #define AVFMT_NOGENSEARCH   0x4000 /**< Format does not allow to fall back on generic search */
 #define AVFMT_NO_BYTE_SEEK  0x8000 /**< Format does not allow seeking by bytes */
 #define AVFMT_ALLOW_FLUSH  0x10000 /**< Format allows flushing. If not set, the muxer will not receive a NULL packet in the write_packet function. */
-#if LIBAVFORMAT_VERSION_MAJOR <= 54
-#define AVFMT_TS_NONSTRICT 0x8020000 //we try to be compatible to the ABIs of ffmpeg and major forks
-#else
-#define AVFMT_TS_NONSTRICT 0x20000
-#endif
-                                   /**< Format does not require strictly
+#define AVFMT_TS_NONSTRICT 0x20000 /**< Format does not require strictly
                                         increasing timestamps, but they must
                                         still be monotonic */
 #define AVFMT_TS_NEGATIVE  0x40000 /**< Format allows muxing negative
@@ -516,7 +525,7 @@ typedef struct AVOutputFormat {
     enum AVCodecID video_codec;    /**< default video codec */
     enum AVCodecID subtitle_codec; /**< default subtitle codec */
     /**
-     * can use flags: AVFMT_NOFILE, AVFMT_NEEDNUMBER, AVFMT_RAWPICTURE,
+     * can use flags: AVFMT_NOFILE, AVFMT_NEEDNUMBER,
      * AVFMT_GLOBALHEADER, AVFMT_NOTIMESTAMPS, AVFMT_VARIABLE_FPS,
      * AVFMT_NODIMENSIONS, AVFMT_NOSTREAMS, AVFMT_ALLOW_FLUSH,
      * AVFMT_TS_NONSTRICT
@@ -603,6 +612,29 @@ typedef struct AVOutputFormat {
      */
     int (*free_device_capabilities)(struct AVFormatContext *s, struct AVDeviceCapabilitiesQuery *caps);
     enum AVCodecID data_codec; /**< default data codec */
+    /**
+     * Initialize format. May allocate data here, and set any AVFormatContext or
+     * AVStream parameters that need to be set before packets are sent.
+     * This method must not write output.
+     *
+     * Any allocations made here must be freed in deinit().
+     */
+    int (*init)(struct AVFormatContext *);
+    /**
+     * Deinitialize format. If present, this is called whenever the muxer is being
+     * destroyed, regardless of whether or not the header has been written.
+     *
+     * If a trailer is being written, this is called after write_trailer().
+     *
+     * This is called if init() fails as well.
+     */
+    void (*deinit)(struct AVFormatContext *);
+    /**
+     * Set up any necessary bitstream filtering and extract any extra data needed
+     * for the global header.
+     * Return 0 if more packets from this stream must be checked; 1 if not.
+     */
+    int (*check_bitstream)(struct AVFormatContext *, const AVPacket *pkt);
 } AVOutputFormat;
 /**
  * @}
@@ -810,6 +842,8 @@ typedef struct AVIndexEntry {
  */
 #define AV_DISPOSITION_ATTACHED_PIC      0x0400
 
+typedef struct AVStreamInternal AVStreamInternal;
+
 /**
  * To specify text track kind (different from subtitles default).
  */
@@ -968,7 +1002,7 @@ typedef struct AVStream {
     /**
      * Stream information used internally by av_find_stream_info()
      */
-#define MAX_STD_TIMEBASES (30*12+7+6)
+#define MAX_STD_TIMEBASES (30*12+30+3+6)
     struct {
         int64_t last_dts;
         int64_t duration_gcd;
@@ -1015,7 +1049,6 @@ typedef struct AVStream {
     /**
      * Number of packets to buffer for codec probing
      */
-#define MAX_PROBE_PACKETS 2500
     int probe_packets;
 
     /**
@@ -1171,6 +1204,14 @@ typedef struct AVStream {
      * - decoding: Set by libavformat to calculate sample_aspect_ratio internally
      */
     AVRational display_aspect_ratio;
+
+    struct FFFrac *priv_pts;
+
+    /**
+     * An opaque field for libavformat internal usage.
+     * Must not be accessed in any way by callers.
+     */
+    AVStreamInternal *internal;
 } AVStream;
 
 AVRational av_stream_get_r_frame_rate(const AVStream *s);
@@ -1362,7 +1403,7 @@ typedef struct AVFormatContext {
      * available. Never set it directly if the file_size and the
      * duration are known as FFmpeg can compute it automatically.
      */
-    int bit_rate;
+    int64_t bit_rate;
 
     unsigned int packet_size;
     int max_delay;
@@ -1396,15 +1437,19 @@ typedef struct AVFormatContext {
 #define AVFMT_FLAG_FAST_SEEK   0x80000 ///< Enable fast, but inaccurate seeks for some formats
 
     /**
-     * @deprecated deprecated in favor of probesize2
+     * Maximum size of the data read from input for determining
+     * the input container format.
+     * Demuxing only, set by the caller before avformat_open_input().
      */
-    unsigned int probesize;
+    int64_t probesize;
 
     /**
-     * @deprecated deprecated in favor of max_analyze_duration2
+     * Maximum duration (in AV_TIME_BASE units) of the data read
+     * from input in avformat_find_stream_info().
+     * Demuxing only, set by the caller before avformat_find_stream_info().
+     * Can be set to 0 to let avformat choose using a heuristic.
      */
-    attribute_deprecated
-    int max_analyze_duration;
+    int64_t max_analyze_duration;
 
     const uint8_t *key;
     int keylen;
@@ -1735,7 +1780,6 @@ typedef struct AVFormatContext {
     /**
      * User data.
      * This is a place for some private data of the user.
-     * Mostly usable with control_message_cb or any future callbacks in device's context.
      */
     void *opaque;
 
@@ -1750,23 +1794,6 @@ typedef struct AVFormatContext {
      */
     int64_t output_ts_offset;
 
-    /**
-     * Maximum duration (in AV_TIME_BASE units) of the data read
-     * from input in avformat_find_stream_info().
-     * Demuxing only, set by the caller before avformat_find_stream_info()
-     * via AVOptions (NO direct access).
-     * Can be set to 0 to let avformat choose using a heuristic.
-     */
-    int64_t max_analyze_duration2;
-
-    /**
-     * Maximum size of the data read from input for determining
-     * the input container format.
-     * Demuxing only, set by the caller before avformat_open_input()
-     * via AVOptions (NO direct access).
-     */
-    int64_t probesize2;
-
     /**
      * dump format separator.
      * can be ", " or "\n      " or anything else
@@ -1783,6 +1810,7 @@ typedef struct AVFormatContext {
      */
     enum AVCodecID data_codec_id;
 
+#if FF_API_OLD_OPEN_CALLBACKS
     /**
      * Called to open further IO contexts when needed for demuxing.
      *
@@ -1797,8 +1825,47 @@ typedef struct AVFormatContext {
      * @See av_format_set_open_cb()
      *
      * Demuxing: Set by user.
+     *
+     * @deprecated Use io_open and io_close.
      */
+    attribute_deprecated
     int (*open_cb)(struct AVFormatContext *s, AVIOContext **p, const char *url, int flags, const AVIOInterruptCB *int_cb, AVDictionary **options);
+#endif
+
+    /**
+     * ',' separated list of allowed protocols.
+     * - encoding: unused
+     * - decoding: set by user through AVOptions (NO direct access)
+     */
+    char *protocol_whitelist;
+
+    /*
+     * A callback for opening new IO streams.
+     *
+     * Certain muxers or demuxers (e.g. for various playlist-based formats) need
+     * to open additional files during muxing or demuxing. This callback allows
+     * the caller to provide custom IO in such cases.
+     *
+     * @param s the format context
+     * @param pb on success, the newly opened IO context should be returned here
+     * @param url the url to open
+     * @param flags a combination of AVIO_FLAG_*
+     * @param options a dictionary of additional options, with the same
+     *                semantics as in avio_open2()
+     * @return 0 on success, a negative AVERROR code on failure
+     *
+     * @note Certain muxers and demuxers do nesting, i.e. they open one or more
+     * additional internal format contexts. Thus the AVFormatContext pointer
+     * passed to this callback may be different from the one facing the caller.
+     * It will, however, have the same 'opaque' field.
+     */
+    int (*io_open)(struct AVFormatContext *s, AVIOContext **pb, const char *url,
+                   int flags, AVDictionary **options);
+
+    /**
+     * A callback for closing the streams opened with AVFormatContext.io_open().
+     */
+    void (*io_close)(struct AVFormatContext *s, AVIOContext *pb);
 } AVFormatContext;
 
 int av_format_get_probe_score(const AVFormatContext *s);
@@ -1816,8 +1883,10 @@ void *    av_format_get_opaque(const AVFormatContext *s);
 void      av_format_set_opaque(AVFormatContext *s, void *opaque);
 av_format_control_message av_format_get_control_message_cb(const AVFormatContext *s);
 void      av_format_set_control_message_cb(AVFormatContext *s, av_format_control_message callback);
-AVOpenCallback av_format_get_open_cb(const AVFormatContext *s);
-void      av_format_set_open_cb(AVFormatContext *s, AVOpenCallback callback);
+#if FF_API_OLD_OPEN_CALLBACKS
+attribute_deprecated AVOpenCallback av_format_get_open_cb(const AVFormatContext *s);
+attribute_deprecated void av_format_set_open_cb(AVFormatContext *s, AVOpenCallback callback);
+#endif
 
 /**
  * This function will cause global side data to be injected in the next packet
@@ -1946,6 +2015,16 @@ const AVClass *avformat_get_class(void);
  */
 AVStream *avformat_new_stream(AVFormatContext *s, const AVCodec *c);
 
+/**
+ * Allocate new information from stream.
+ *
+ * @param stream stream
+ * @param type desired side information type
+ * @param size side information size
+ * @return pointer to fresh allocated data or NULL otherwise
+ */
+uint8_t *av_stream_new_side_data(AVStream *stream,
+                                 enum AVPacketSideDataType type, int size);
 /**
  * Get side information from stream.
  *
@@ -2033,7 +2112,7 @@ AVInputFormat *av_probe_input_format3(AVProbeData *pd, int is_opened, int *score
  *
  * @param pb the bytestream to probe
  * @param fmt the input format is put here
- * @param filename the filename of the stream
+ * @param url the url of the stream
  * @param logctx the log context
  * @param offset the offset within the bytestream to probe from
  * @param max_probe_size the maximum probe buffer size (zero for default)
@@ -2042,14 +2121,14 @@ AVInputFormat *av_probe_input_format3(AVProbeData *pd, int is_opened, int *score
  * AVERROR code otherwise
  */
 int av_probe_input_buffer2(AVIOContext *pb, AVInputFormat **fmt,
-                           const char *filename, void *logctx,
+                           const char *url, void *logctx,
                            unsigned int offset, unsigned int max_probe_size);
 
 /**
  * Like av_probe_input_buffer2() but returns 0 on success
  */
 int av_probe_input_buffer(AVIOContext *pb, AVInputFormat **fmt,
-                          const char *filename, void *logctx,
+                          const char *url, void *logctx,
                           unsigned int offset, unsigned int max_probe_size);
 
 /**
@@ -2060,7 +2139,7 @@ int av_probe_input_buffer(AVIOContext *pb, AVInputFormat **fmt,
  *           May be a pointer to NULL, in which case an AVFormatContext is allocated by this
  *           function and written into ps.
  *           Note that a user-supplied AVFormatContext will be freed on failure.
- * @param filename Name of the stream to open.
+ * @param url URL of the stream to open.
  * @param fmt If non-NULL, this parameter forces a specific input format.
  *            Otherwise the format is autodetected.
  * @param options  A dictionary filled with AVFormatContext and demuxer-private options.
@@ -2071,7 +2150,7 @@ int av_probe_input_buffer(AVIOContext *pb, AVInputFormat **fmt,
  *
  * @note If you want to use custom IO, preallocate the format context and set its pb field.
  */
-int avformat_open_input(AVFormatContext **ps, const char *filename, AVInputFormat *fmt, AVDictionary **options);
+int avformat_open_input(AVFormatContext **ps, const char *url, AVInputFormat *fmt, AVDictionary **options);
 
 attribute_deprecated
 int av_demuxer_open(AVFormatContext *ic);
@@ -2111,6 +2190,8 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options);
  */
 AVProgram *av_find_program_from_stream(AVFormatContext *ic, AVProgram *last, int s);
 
+void av_program_add_stream_index(AVFormatContext *ac, int progid, unsigned int idx);
+
 /**
  * Find the "best" stream in the file.
  * The best stream is determined according to various heuristics as the most
@@ -2153,7 +2234,7 @@ int av_find_best_stream(AVFormatContext *ic,
  * If pkt->buf is NULL, then the packet is valid until the next
  * av_read_frame() or until avformat_close_input(). Otherwise the packet
  * is valid indefinitely. In both cases the packet must be freed with
- * av_free_packet when it is no longer needed. For video, the packet contains
+ * av_packet_unref when it is no longer needed. For video, the packet contains
  * exactly one frame. For audio, it contains an integer number of frames if each
  * frame has a known fixed size (e.g. PCM or ADPCM data). If the audio frames
  * have a variable size (e.g. MPEG audio), then it contains one frame.
@@ -2278,6 +2359,7 @@ void avformat_close_input(AVFormatContext **s);
  *
  * @see av_opt_find, av_dict_set, avio_open, av_oformat_next.
  */
+av_warn_unused_result
 int avformat_write_header(AVFormatContext *s, AVDictionary **options);
 
 /**
@@ -2302,10 +2384,17 @@ int avformat_write_header(AVFormatContext *s, AVDictionary **options);
  *            <br>
  *            Packet's @ref AVPacket.stream_index "stream_index" field must be
  *            set to the index of the corresponding stream in @ref
- *            AVFormatContext.streams "s->streams". It is very strongly
- *            recommended that timing information (@ref AVPacket.pts "pts", @ref
- *            AVPacket.dts "dts", @ref AVPacket.duration "duration") is set to
- *            correct values.
+ *            AVFormatContext.streams "s->streams".
+ *            <br>
+ *            The timestamps (@ref AVPacket.pts "pts", @ref AVPacket.dts "dts")
+ *            must be set to correct values in the stream's timebase (unless the
+ *            output format is flagged with the AVFMT_NOTIMESTAMPS flag, then
+ *            they can be set to AV_NOPTS_VALUE).
+ *            The dts for subsequent packets passed to this function must be strictly
+ *            increasing when compared in their respective timebases (unless the
+ *            output format is flagged with the AVFMT_TS_NONSTRICT, then they
+ *            merely have to be nondecreasing).  @ref AVPacket.duration
+ *            "duration") should also be set if known.
  * @return < 0 on error, = 0 if OK, 1 if flushed and there is no more data to flush
  *
  * @see av_interleaved_write_frame()
@@ -2335,10 +2424,16 @@ int av_write_frame(AVFormatContext *s, AVPacket *pkt);
  *            <br>
  *            Packet's @ref AVPacket.stream_index "stream_index" field must be
  *            set to the index of the corresponding stream in @ref
- *            AVFormatContext.streams "s->streams". It is very strongly
- *            recommended that timing information (@ref AVPacket.pts "pts", @ref
- *            AVPacket.dts "dts", @ref AVPacket.duration "duration") is set to
- *            correct values.
+ *            AVFormatContext.streams "s->streams".
+ *            <br>
+ *            The timestamps (@ref AVPacket.pts "pts", @ref AVPacket.dts "dts")
+ *            must be set to correct values in the stream's timebase (unless the
+ *            output format is flagged with the AVFMT_NOTIMESTAMPS flag, then
+ *            they can be set to AV_NOPTS_VALUE).
+ *            The dts for subsequent packets in one stream must be strictly
+ *            increasing (unless the output format is flagged with the
+ *            AVFMT_TS_NONSTRICT, then they merely have to be nondecreasing).
+ *            @ref AVPacket.duration "duration") should also be set if known.
  *
  * @return 0 on success, a negative AVERROR on error. Libavformat will always
  *         take care of freeing the packet, even if this function fails.
@@ -2745,6 +2840,17 @@ int avformat_match_stream_specifier(AVFormatContext *s, AVStream *st,
 
 int avformat_queue_attached_pictures(AVFormatContext *s);
 
+/**
+ * Apply a list of bitstream filters to a packet.
+ *
+ * @param codec AVCodecContext, usually from an AVStream
+ * @param pkt the packet to apply filters to
+ * @param bsfc a NULL-terminated list of filters to apply
+ * @return  >=0 on success;
+ *          AVERROR code on failure
+ */
+int av_apply_bitstream_filters(AVCodecContext *codec, AVPacket *pkt,
+                               AVBitStreamFilterContext *bsfc);
 
 /**
  * @}
diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index 5348b44b..38598107 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -87,7 +87,7 @@ typedef struct AVIContext {
 
 
 static const AVOption options[] = {
-    { "use_odml", "use odml index", offsetof(AVIContext, use_odml), AV_OPT_TYPE_INT, {.i64 = 1}, -1, 1, AV_OPT_FLAG_DECODING_PARAM},
+    { "use_odml", "use odml index", offsetof(AVIContext, use_odml), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AV_OPT_FLAG_DECODING_PARAM},
     { NULL },
 };
 
@@ -480,6 +480,7 @@ static int avi_read_header(AVFormatContext *s)
     int avih_width      = 0, avih_height = 0;
     int amv_file_format = 0;
     uint64_t list_end   = 0;
+    int64_t pos;
     int ret;
     AVDictionaryEntry *dict_entry;
 
@@ -817,13 +818,15 @@ static int avi_read_header(AVFormatContext *s)
 
                     if (st->codec->codec_tag == MKTAG('V', 'S', 'S', 'H'))
                         st->need_parsing = AVSTREAM_PARSE_FULL;
+                    if (st->codec->codec_id == AV_CODEC_ID_RV40)
+                        st->need_parsing = AVSTREAM_PARSE_NONE;
 
                     if (st->codec->codec_tag == 0 && st->codec->height > 0 &&
                         st->codec->extradata_size < 1U << 30) {
                         st->codec->extradata_size += 9;
                         if ((ret = av_reallocp(&st->codec->extradata,
                                                st->codec->extradata_size +
-                                               FF_INPUT_BUFFER_PADDING_SIZE)) < 0) {
+                                               AV_INPUT_BUFFER_PADDING_SIZE)) < 0) {
                             st->codec->extradata_size = 0;
                             return ret;
                         } else
@@ -835,7 +838,7 @@ static int avi_read_header(AVFormatContext *s)
 //                    avio_skip(pb, size - 5 * 4);
                     break;
                 case AVMEDIA_TYPE_AUDIO:
-                    ret = ff_get_wav_header(pb, st->codec, size, 0);
+                    ret = ff_get_wav_header(s, pb, st->codec, size, 0);
                     if (ret < 0)
                         return ret;
                     ast->dshow_block_align = st->codec->block_align;
@@ -861,6 +864,9 @@ static int avi_read_header(AVFormatContext *s)
                     if (st->codec->codec_id == AV_CODEC_ID_AAC &&
                         st->codec->extradata_size)
                         st->need_parsing = AVSTREAM_PARSE_NONE;
+                    // The flac parser does not work with AVSTREAM_PARSE_TIMESTAMPS
+                    if (st->codec->codec_id == AV_CODEC_ID_FLAC)
+                        st->need_parsing = AVSTREAM_PARSE_NONE;
                     /* AVI files with Xan DPCM audio (wrongly) declare PCM
                      * audio in the header but have Axan as stream_code_tag. */
                     if (ast->handler == AV_RL32("Axan")) {
@@ -872,8 +878,9 @@ static int avi_read_header(AVFormatContext *s)
                         st->codec->codec_id    = AV_CODEC_ID_ADPCM_IMA_AMV;
                         ast->dshow_block_align = 0;
                     }
-                    if (st->codec->codec_id == AV_CODEC_ID_AAC && ast->dshow_block_align <= 4 && ast->dshow_block_align ||
-                        st->codec->codec_id == AV_CODEC_ID_MP2 && ast->dshow_block_align <= 4 && ast->dshow_block_align) {
+                    if ((st->codec->codec_id == AV_CODEC_ID_AAC  ||
+                         st->codec->codec_id == AV_CODEC_ID_FLAC ||
+                         st->codec->codec_id == AV_CODEC_ID_MP2 ) && ast->dshow_block_align <= 4 && ast->dshow_block_align) {
                         av_log(s, AV_LOG_DEBUG, "overriding invalid dshow_block_align of %d\n", ast->dshow_block_align);
                         ast->dshow_block_align = 0;
                     }
@@ -924,13 +931,13 @@ static int avi_read_header(AVFormatContext *s)
             }
             break;
         case MKTAG('i', 'n', 'd', 'x'):
-            i = avio_tell(pb);
+            pos = avio_tell(pb);
             if (pb->seekable && !(s->flags & AVFMT_FLAG_IGNIDX) &&
                 avi->use_odml &&
                 read_braindead_odml_indx(s, 0) < 0 &&
                 (s->error_recognition & AV_EF_EXPLODE))
                 goto fail;
-            avio_seek(pb, i + size, SEEK_SET);
+            avio_seek(pb, pos + size, SEEK_SET);
             break;
         case MKTAG('v', 'p', 'r', 'p'):
             if (stream_index < (unsigned)s->nb_streams && size > 9 * 4) {
@@ -1299,14 +1306,80 @@ static int avi_sync(AVFormatContext *s, int exit_early)
     return AVERROR_EOF;
 }
 
+static int ni_prepare_read(AVFormatContext *s)
+{
+    AVIContext *avi = s->priv_data;
+    int best_stream_index = 0;
+    AVStream *best_st     = NULL;
+    AVIStream *best_ast;
+    int64_t best_ts = INT64_MAX;
+    int i;
+
+    for (i = 0; i < s->nb_streams; i++) {
+        AVStream *st   = s->streams[i];
+        AVIStream *ast = st->priv_data;
+        int64_t ts     = ast->frame_offset;
+        int64_t last_ts;
+
+        if (!st->nb_index_entries)
+            continue;
+
+        last_ts = st->index_entries[st->nb_index_entries - 1].timestamp;
+        if (!ast->remaining && ts > last_ts)
+            continue;
+
+        ts = av_rescale_q(ts, st->time_base,
+                          (AVRational) { FFMAX(1, ast->sample_size),
+                                         AV_TIME_BASE });
+
+        av_log(s, AV_LOG_TRACE, "%"PRId64" %d/%d %"PRId64"\n", ts,
+                st->time_base.num, st->time_base.den, ast->frame_offset);
+        if (ts < best_ts) {
+            best_ts           = ts;
+            best_st           = st;
+            best_stream_index = i;
+        }
+    }
+    if (!best_st)
+        return AVERROR_EOF;
+
+    best_ast = best_st->priv_data;
+    best_ts  = best_ast->frame_offset;
+    if (best_ast->remaining) {
+        i = av_index_search_timestamp(best_st,
+                                      best_ts,
+                                      AVSEEK_FLAG_ANY |
+                                      AVSEEK_FLAG_BACKWARD);
+    } else {
+        i = av_index_search_timestamp(best_st, best_ts, AVSEEK_FLAG_ANY);
+        if (i >= 0)
+            best_ast->frame_offset = best_st->index_entries[i].timestamp;
+    }
+
+    if (i >= 0) {
+        int64_t pos = best_st->index_entries[i].pos;
+        pos += best_ast->packet_size - best_ast->remaining;
+        if (avio_seek(s->pb, pos + 8, SEEK_SET) < 0)
+          return AVERROR_EOF;
+
+        av_assert0(best_ast->remaining <= best_ast->packet_size);
+
+        avi->stream_index = best_stream_index;
+        if (!best_ast->remaining)
+            best_ast->packet_size =
+            best_ast->remaining   = best_st->index_entries[i].size;
+    }
+    else
+        return AVERROR_EOF;
+
+    return 0;
+}
+
 static int avi_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     AVIContext *avi = s->priv_data;
     AVIOContext *pb = s->pb;
     int err;
-#if FF_API_DESTRUCT_PACKET
-    void *dstr;
-#endif
 
     if (CONFIG_DV_DEMUXER && avi->dv_demux) {
         int size = avpriv_dv_get_packet(avi->dv_demux, pkt);
@@ -1317,68 +1390,9 @@ static int avi_read_packet(AVFormatContext *s, AVPacket *pkt)
     }
 
     if (avi->non_interleaved) {
-        int best_stream_index = 0;
-        AVStream *best_st     = NULL;
-        AVIStream *best_ast;
-        int64_t best_ts = INT64_MAX;
-        int i;
-
-        for (i = 0; i < s->nb_streams; i++) {
-            AVStream *st   = s->streams[i];
-            AVIStream *ast = st->priv_data;
-            int64_t ts     = ast->frame_offset;
-            int64_t last_ts;
-
-            if (!st->nb_index_entries)
-                continue;
-
-            last_ts = st->index_entries[st->nb_index_entries - 1].timestamp;
-            if (!ast->remaining && ts > last_ts)
-                continue;
-
-            ts = av_rescale_q(ts, st->time_base,
-                              (AVRational) { FFMAX(1, ast->sample_size),
-                                             AV_TIME_BASE });
-
-            av_log(s, AV_LOG_TRACE, "%"PRId64" %d/%d %"PRId64"\n", ts,
-                    st->time_base.num, st->time_base.den, ast->frame_offset);
-            if (ts < best_ts) {
-                best_ts           = ts;
-                best_st           = st;
-                best_stream_index = i;
-            }
-        }
-        if (!best_st)
-            return AVERROR_EOF;
-
-        best_ast = best_st->priv_data;
-        best_ts  = best_ast->frame_offset;
-        if (best_ast->remaining) {
-            i = av_index_search_timestamp(best_st,
-                                          best_ts,
-                                          AVSEEK_FLAG_ANY |
-                                          AVSEEK_FLAG_BACKWARD);
-        } else {
-            i = av_index_search_timestamp(best_st, best_ts, AVSEEK_FLAG_ANY);
-            if (i >= 0)
-                best_ast->frame_offset = best_st->index_entries[i].timestamp;
-        }
-
-        if (i >= 0) {
-            int64_t pos = best_st->index_entries[i].pos;
-            pos += best_ast->packet_size - best_ast->remaining;
-            if (avio_seek(s->pb, pos + 8, SEEK_SET) < 0)
-              return AVERROR_EOF;
-
-            av_assert0(best_ast->remaining <= best_ast->packet_size);
-
-            avi->stream_index = best_stream_index;
-            if (!best_ast->remaining)
-                best_ast->packet_size =
-                best_ast->remaining   = best_st->index_entries[i].size;
-        }
-        else
-          return AVERROR_EOF;
+        err = ni_prepare_read(s);
+        if (err < 0)
+            return err;
     }
 
 resync:
@@ -1423,22 +1437,12 @@ static int avi_read_packet(AVFormatContext *s, AVPacket *pkt)
 
         if (CONFIG_DV_DEMUXER && avi->dv_demux) {
             AVBufferRef *avbuf = pkt->buf;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-            dstr = pkt->destruct;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
             size = avpriv_dv_produce_packet(avi->dv_demux, pkt,
                                             pkt->data, pkt->size, pkt->pos);
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-            pkt->destruct = dstr;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
             pkt->buf    = avbuf;
             pkt->flags |= AV_PKT_FLAG_KEY;
             if (size < 0)
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
         } else if (st->codec->codec_type == AVMEDIA_TYPE_SUBTITLE &&
                    !st->codec->codec_tag && read_gab2_sub(s, st, pkt)) {
             ast->frame_offset++;
@@ -1503,7 +1507,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
         }
 
         if (!avi->non_interleaved && pkt->pos >= 0 && ast->seek_pos > pkt->pos) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             goto resync;
         }
         ast->seek_pos= 0;
@@ -1535,8 +1539,9 @@ static int avi_read_idx1(AVFormatContext *s, int size)
     int nb_index_entries, i;
     AVStream *st;
     AVIStream *ast;
-    unsigned int index, tag, flags, pos, len, first_packet = 1;
-    unsigned last_pos = -1;
+    int64_t pos;
+    unsigned int index, tag, flags, len, first_packet = 1;
+    int64_t last_pos = -1;
     unsigned last_idx = -1;
     int64_t idx1_pos, first_packet_pos = 0, data_offset = 0;
     int anykey = 0;
@@ -1566,7 +1571,7 @@ static int avi_read_idx1(AVFormatContext *s, int size)
         flags = avio_rl32(pb);
         pos   = avio_rl32(pb);
         len   = avio_rl32(pb);
-        av_log(s, AV_LOG_TRACE, "%d: tag=0x%x flags=0x%x pos=0x%x len=%d/",
+        av_log(s, AV_LOG_TRACE, "%d: tag=0x%x flags=0x%x pos=0x%"PRIx64" len=%d/",
                 i, tag, flags, pos, len);
 
         index  = ((tag      & 0xff) - '0') * 10;
@@ -1577,7 +1582,8 @@ static int avi_read_idx1(AVFormatContext *s, int size)
         ast = st->priv_data;
 
         if (first_packet && first_packet_pos) {
-            data_offset  = first_packet_pos - pos;
+            if (avi->movi_list + 4 != pos || pos + 500 > first_packet_pos)
+                data_offset  = first_packet_pos - pos;
             first_packet = 0;
         }
         pos += data_offset;
@@ -1681,9 +1687,13 @@ static int guess_ni_flag(AVFormatContext *s)
 
         if (n >= 2) {
             int64_t pos = st->index_entries[0].pos;
-            avio_seek(s->pb, pos + 4, SEEK_SET);
+            unsigned tag[2];
+            avio_seek(s->pb, pos, SEEK_SET);
+            tag[0] = avio_r8(s->pb);
+            tag[1] = avio_r8(s->pb);
+            avio_rl16(s->pb);
             size = avio_rl32(s->pb);
-            if (pos + size > st->index_entries[1].pos)
+            if (get_stream_idx(tag) == i && pos + size > st->index_entries[1].pos)
                 last_start = INT64_MAX;
         }
 
@@ -1751,7 +1761,7 @@ static void seek_subtitle(AVStream *st, AVStream *st2, int64_t timestamp)
 {
     AVIStream *ast2 = st2->priv_data;
     int64_t ts2     = av_rescale_q(timestamp, st->time_base, st2->time_base);
-    av_free_packet(&ast2->sub_pkt);
+    av_packet_unref(&ast2->sub_pkt);
     if (avformat_seek_file(ast2->sub_ctx, 0, INT64_MIN, ts2, ts2, 0) >= 0 ||
         avformat_seek_file(ast2->sub_ctx, 0, ts2, ts2, INT64_MAX, 0) >= 0)
         ff_read_packet(ast2->sub_ctx, &ast2->sub_pkt);
@@ -1889,7 +1899,7 @@ static int avi_read_close(AVFormatContext *s)
                 avformat_close_input(&ast->sub_ctx);
             }
             av_freep(&ast->sub_buffer);
-            av_free_packet(&ast->sub_pkt);
+            av_packet_unref(&ast->sub_pkt);
         }
     }
 
diff --git a/libavformat/avienc.c b/libavformat/avienc.c
index e5609d99..649961d1 100644
--- a/libavformat/avienc.c
+++ b/libavformat/avienc.c
@@ -29,10 +29,12 @@
 #include "mpegts.h"
 #include "libavformat/avlanguage.h"
 #include "libavutil/avstring.h"
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/dict.h"
 #include "libavutil/avassert.h"
 #include "libavutil/timestamp.h"
+#include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "libavcodec/raw.h"
 
@@ -57,9 +59,11 @@ typedef struct AVIIndex {
 } AVIIndex;
 
 typedef struct AVIContext {
+    const AVClass *class;
     int64_t riff_start, movi_list, odml_list;
     int64_t frames_hdr_all;
     int riff_id;
+    int write_channel_mask;
 } AVIContext;
 
 typedef struct AVIStream {
@@ -338,7 +342,7 @@ static int avi_write_header(AVFormatContext *s)
         ff_end_tag(pb, strh);
 
         if (enc->codec_type != AVMEDIA_TYPE_DATA) {
-            int ret;
+            int ret, flags;
             enum AVPixelFormat pix_fmt;
 
             strf = ff_start_tag(pb, "strf");
@@ -366,7 +370,8 @@ static int avi_write_header(AVFormatContext *s)
                           av_get_pix_fmt_name(enc->pix_fmt));
                 break;
             case AVMEDIA_TYPE_AUDIO:
-                if ((ret = ff_put_wav_header(pb, enc, 0)) < 0)
+                flags = (avi->write_channel_mask == 0) ? FF_PUT_WAV_HEADER_SKIP_CHANNELMASK : 0;
+                if ((ret = ff_put_wav_header(pb, enc, flags)) < 0)
                     return ret;
                 break;
             default:
@@ -618,7 +623,7 @@ static int write_skip_frames(AVFormatContext *s, int stream_index, int64_t dts)
     AVIStream *avist    = s->streams[stream_index]->priv_data;
     AVCodecContext *enc = s->streams[stream_index]->codec;
 
-    av_dlog(s, "dts:%s packet_count:%d stream_index:%d\n", av_ts2str(dts), avist->packet_count, stream_index);
+    ff_dlog(s, "dts:%s packet_count:%d stream_index:%d\n", av_ts2str(dts), avist->packet_count, stream_index);
     while (enc->block_align == 0 && dts != AV_NOPTS_VALUE &&
            dts > avist->packet_count && enc->codec_id != AV_CODEC_ID_XSUB && avist->packet_count) {
         AVPacket empty_packet;
@@ -633,7 +638,7 @@ static int write_skip_frames(AVFormatContext *s, int stream_index, int64_t dts)
         empty_packet.data         = NULL;
         empty_packet.stream_index = stream_index;
         avi_write_packet(s, &empty_packet);
-        av_dlog(s, "dup dts:%s packet_count:%d\n", av_ts2str(dts), avist->packet_count);
+        ff_dlog(s, "dup dts:%s packet_count:%d\n", av_ts2str(dts), avist->packet_count);
     }
 
     return 0;
@@ -781,6 +786,20 @@ static int avi_write_trailer(AVFormatContext *s)
     return res;
 }
 
+#define OFFSET(x) offsetof(AVIContext, x)
+#define ENC AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "write_channel_mask", "write channel mask into wave format header", OFFSET(write_channel_mask), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, ENC },
+    { NULL },
+};
+
+static const AVClass avi_muxer_class = {
+    .class_name = "AVI muxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVOutputFormat ff_avi_muxer = {
     .name           = "avi",
     .long_name      = NULL_IF_CONFIG_SMALL("AVI (Audio Video Interleaved)"),
@@ -795,4 +814,5 @@ AVOutputFormat ff_avi_muxer = {
     .codec_tag      = (const AVCodecTag * const []) {
         ff_codec_bmp_tags, ff_codec_wav_tags, 0
     },
+    .priv_class     = &avi_muxer_class,
 };
diff --git a/libavformat/avio.c b/libavformat/avio.c
index 261ff2af..362099dd 100644
--- a/libavformat/avio.c
+++ b/libavformat/avio.c
@@ -73,7 +73,13 @@ static const AVClass *urlcontext_child_class_next(const AVClass *prev)
     return NULL;
 }
 
-static const AVOption options[] = { { NULL } };
+#define OFFSET(x) offsetof(URLContext,x)
+#define E AV_OPT_FLAG_ENCODING_PARAM
+#define D AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    {"protocol_whitelist", "List of protocols that are allowed to be used", OFFSET(protocol_whitelist), AV_OPT_TYPE_STRING, { .str = NULL },  CHAR_MIN, CHAR_MAX, D },
+    { NULL }
+};
 const AVClass ffurl_context_class = {
     .class_name       = "URLContext",
     .item_name        = urlcontext_to_name,
@@ -156,9 +162,16 @@ static int url_alloc_for_protocol(URLContext **puc, struct URLProtocol *up,
                 char sep= *++p;
                 char *key, *val;
                 p++;
+
+                if (strcmp(up->name, "subfile"))
+                    ret = AVERROR(EINVAL);
+
                 while(ret >= 0 && (key= strchr(p, sep)) && p<key && (val = strchr(key+1, sep))){
                     *val= *key= 0;
-                    ret= av_opt_set(uc->priv_data, p, key+1, 0);
+                    if (strcmp(p, "start") && strcmp(p, "end")) {
+                        ret = AVERROR_OPTION_NOT_FOUND;
+                    } else
+                        ret= av_opt_set(uc->priv_data, p, key+1, 0);
                     if (ret == AVERROR_OPTION_NOT_FOUND)
                         av_log(uc, AV_LOG_ERROR, "Key '%s' not found.\n", p);
                     *val= *key= sep;
@@ -194,12 +207,43 @@ static int url_alloc_for_protocol(URLContext **puc, struct URLProtocol *up,
 
 int ffurl_connect(URLContext *uc, AVDictionary **options)
 {
-    int err =
+    int err;
+    AVDictionary *tmp_opts = NULL;
+    AVDictionaryEntry *e;
+
+    if (!options)
+        options = &tmp_opts;
+
+    // Check that URLContext was initialized correctly and lists are matching if set
+    av_assert0(!(e=av_dict_get(*options, "protocol_whitelist", NULL, 0)) ||
+               (uc->protocol_whitelist && !strcmp(uc->protocol_whitelist, e->value)));
+
+    if (uc->protocol_whitelist && av_match_list(uc->prot->name, uc->protocol_whitelist, ',') <= 0) {
+        av_log(uc, AV_LOG_ERROR, "Protocol not on whitelist \'%s\'!\n", uc->protocol_whitelist);
+        return AVERROR(EINVAL);
+    }
+
+    if (!uc->protocol_whitelist && uc->prot->default_whitelist) {
+        av_log(uc, AV_LOG_DEBUG, "Setting default whitelist '%s'\n", uc->prot->default_whitelist);
+        uc->protocol_whitelist = av_strdup(uc->prot->default_whitelist);
+        if (!uc->protocol_whitelist) {
+            return AVERROR(ENOMEM);
+        }
+    } else if (!uc->protocol_whitelist)
+        av_log(uc, AV_LOG_DEBUG, "No default whitelist set\n"); // This should be an error once all declare a default whitelist
+
+    if ((err = av_dict_set(options, "protocol_whitelist", uc->protocol_whitelist, 0)) < 0)
+        return err;
+
+    err =
         uc->prot->url_open2 ? uc->prot->url_open2(uc,
                                                   uc->filename,
                                                   uc->flags,
                                                   options) :
         uc->prot->url_open(uc, uc->filename, uc->flags);
+
+    av_dict_set(options, "protocol_whitelist", NULL, 0);
+
     if (err)
         return err;
     uc->is_connected = 1;
@@ -211,6 +255,26 @@ int ffurl_connect(URLContext *uc, AVDictionary **options)
     return 0;
 }
 
+int ffurl_accept(URLContext *s, URLContext **c)
+{
+    av_assert0(!*c);
+    if (s->prot->url_accept)
+        return s->prot->url_accept(s, c);
+    return AVERROR(EBADF);
+}
+
+int ffurl_handshake(URLContext *c)
+{
+    int ret;
+    if (c->prot->url_handshake) {
+        ret = c->prot->url_handshake(c);
+        if (ret)
+            return ret;
+    }
+    c->is_connected = 1;
+    return 0;
+}
+
 #define URL_SCHEME_CHARS                        \
     "abcdefghijklmnopqrstuvwxyz"                \
     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"                \
@@ -223,7 +287,7 @@ static struct URLProtocol *url_find_protocol(const char *filename)
     size_t proto_len = strspn(filename, URL_SCHEME_CHARS);
 
     if (filename[proto_len] != ':' &&
-        (filename[proto_len] != ',' || !strchr(filename + proto_len + 1, ':')) ||
+        (strncmp(filename, "subfile,", 8) || !strchr(filename + proto_len + 1, ':')) ||
         is_dos_path(filename))
         strcpy(proto_str, "file");
     else
@@ -269,18 +333,33 @@ int ffurl_alloc(URLContext **puc, const char *filename, int flags,
     return AVERROR_PROTOCOL_NOT_FOUND;
 }
 
-int ffurl_open(URLContext **puc, const char *filename, int flags,
-               const AVIOInterruptCB *int_cb, AVDictionary **options)
+int ffurl_open_whitelist(URLContext **puc, const char *filename, int flags,
+                         const AVIOInterruptCB *int_cb, AVDictionary **options, const char *whitelist)
 {
+    AVDictionary *tmp_opts = NULL;
+    AVDictionaryEntry *e;
     int ret = ffurl_alloc(puc, filename, flags, int_cb);
     if (ret < 0)
         return ret;
     if (options && (*puc)->prot->priv_data_class &&
         (ret = av_opt_set_dict((*puc)->priv_data, options)) < 0)
         goto fail;
+
+    if (!options)
+        options = &tmp_opts;
+
+    av_assert0(!whitelist ||
+               !(e=av_dict_get(*options, "protocol_whitelist", NULL, 0)) ||
+               !strcmp(whitelist, e->value));
+
+    if ((ret = av_dict_set(options, "protocol_whitelist", whitelist, 0)) < 0)
+        goto fail;
+
     if ((ret = av_opt_set_dict(*puc, options)) < 0)
         goto fail;
+
     ret = ffurl_connect(*puc, options);
+
     if (!ret)
         return 0;
 fail:
@@ -289,6 +368,13 @@ int ffurl_open(URLContext **puc, const char *filename, int flags,
     return ret;
 }
 
+int ffurl_open(URLContext **puc, const char *filename, int flags,
+               const AVIOInterruptCB *int_cb, AVDictionary **options)
+{
+    return ffurl_open_whitelist(puc, filename, flags,
+                                int_cb, options, NULL);
+}
+
 static inline int retry_transfer_wrapper(URLContext *h, uint8_t *buf,
                                          int size, int size_min,
                                          int (*transfer_func)(URLContext *h,
@@ -385,6 +471,7 @@ int ffurl_closep(URLContext **hh)
             av_opt_free(h->priv_data);
         av_freep(&h->priv_data);
     }
+    av_opt_free(h);
     av_freep(hh);
     return ret;
 }
@@ -421,6 +508,44 @@ int avio_check(const char *url, int flags)
     return ret;
 }
 
+int avpriv_io_move(const char *url_src, const char *url_dst)
+{
+    URLContext *h_src, *h_dst;
+    int ret = ffurl_alloc(&h_src, url_src, AVIO_FLAG_READ_WRITE, NULL);
+    if (ret < 0)
+        return ret;
+    ret = ffurl_alloc(&h_dst, url_dst, AVIO_FLAG_WRITE, NULL);
+    if (ret < 0) {
+        ffurl_close(h_src);
+        return ret;
+    }
+
+    if (h_src->prot == h_dst->prot && h_src->prot->url_move)
+        ret = h_src->prot->url_move(h_src, h_dst);
+    else
+        ret = AVERROR(ENOSYS);
+
+    ffurl_close(h_src);
+    ffurl_close(h_dst);
+    return ret;
+}
+
+int avpriv_io_delete(const char *url)
+{
+    URLContext *h;
+    int ret = ffurl_alloc(&h, url, AVIO_FLAG_WRITE, NULL);
+    if (ret < 0)
+        return ret;
+
+    if (h->prot->url_delete)
+        ret = h->prot->url_delete(h);
+    else
+        ret = AVERROR(ENOSYS);
+
+    ffurl_close(h);
+    return ret;
+}
+
 int avio_open_dir(AVIODirContext **s, const char *url, AVDictionary **options)
 {
     URLContext *h = NULL;
@@ -447,6 +572,7 @@ int avio_open_dir(AVIODirContext **s, const char *url, AVDictionary **options)
     if (ret < 0)
         goto fail;
 
+    h->is_connected = 1;
     ctx->url_context = h;
     *s = ctx;
     return 0;
diff --git a/libavformat/avio.h b/libavformat/avio.h
index 07305934..7fbce32b 100644
--- a/libavformat/avio.h
+++ b/libavformat/avio.h
@@ -122,6 +122,53 @@ typedef struct AVIOContext {
      * to any av_opt_* functions in that case.
      */
     const AVClass *av_class;
+
+    /*
+     * The following shows the relationship between buffer, buf_ptr, buf_end, buf_size,
+     * and pos, when reading and when writing (since AVIOContext is used for both):
+     *
+     **********************************************************************************
+     *                                   READING
+     **********************************************************************************
+     *
+     *                            |              buffer_size              |
+     *                            |---------------------------------------|
+     *                            |                                       |
+     *
+     *                         buffer          buf_ptr       buf_end
+     *                            +---------------+-----------------------+
+     *                            |/ / / / / / / /|/ / / / / / /|         |
+     *  read buffer:              |/ / consumed / | to be read /|         |
+     *                            |/ / / / / / / /|/ / / / / / /|         |
+     *                            +---------------+-----------------------+
+     *
+     *                                                         pos
+     *              +-------------------------------------------+-----------------+
+     *  input file: |                                           |                 |
+     *              +-------------------------------------------+-----------------+
+     *
+     *
+     **********************************************************************************
+     *                                   WRITING
+     **********************************************************************************
+     *
+     *                                          |          buffer_size          |
+     *                                          |-------------------------------|
+     *                                          |                               |
+     *
+     *                                       buffer              buf_ptr     buf_end
+     *                                          +-------------------+-----------+
+     *                                          |/ / / / / / / / / /|           |
+     *  write buffer:                           | / to be flushed / |           |
+     *                                          |/ / / / / / / / / /|           |
+     *                                          +-------------------+-----------+
+     *
+     *                                         pos
+     *               +--------------------------+-----------------------------------+
+     *  output file: |                          |                                   |
+     *               +--------------------------+-----------------------------------+
+     *
+     */
     unsigned char *buffer;  /**< Start of the buffer. */
     int buffer_size;        /**< Maximum buffer size */
     unsigned char *buf_ptr; /**< Current position in the buffer */
@@ -196,6 +243,17 @@ typedef struct AVIOContext {
      * This field is internal to libavformat and access from outside is not allowed.
      */
     int orig_buffer_size;
+
+    /**
+     * Threshold to favor readahead over seek.
+     * This is current internal only, do not use from outside.
+     */
+    int short_seek_threshold;
+
+    /**
+     * ',' separated list of allowed protocols.
+     */
+    const char *protocol_whitelist;
 } AVIOContext;
 
 /* unbuffered I/O */
@@ -223,6 +281,25 @@ const char *avio_find_protocol_name(const char *url);
  */
 int avio_check(const char *url, int flags);
 
+/**
+ * Move or rename a resource.
+ *
+ * @note url_src and url_dst should share the same protocol and authority.
+ *
+ * @param url_src url to resource to be moved
+ * @param url_dst new url to resource if the operation succeeded
+ * @return >=0 on success or negative on error.
+ */
+int avpriv_io_move(const char *url_src, const char *url_dst);
+
+/**
+ * Delete a resource.
+ *
+ * @param url resource to be deleted.
+ * @return >=0 on success or negative on error.
+ */
+int avpriv_io_delete(const char *url);
+
 /**
  * Open directory for reading.
  *
@@ -386,7 +463,7 @@ attribute_deprecated
 int url_feof(AVIOContext *s);
 #endif
 
-/** @warning currently size is limited */
+/** @warning Writes up to 4 KiB per call */
 int avio_printf(AVIOContext *s, const char *fmt, ...) av_printf_format(2, 3);
 
 /**
@@ -560,7 +637,7 @@ int avio_open_dyn_buf(AVIOContext **s);
 /**
  * Return the written size and a pointer to the buffer. The buffer
  * must be freed with av_free().
- * Padding of FF_INPUT_BUFFER_PADDING_SIZE is added to the buffer.
+ * Padding of AV_INPUT_BUFFER_PADDING_SIZE is added to the buffer.
  *
  * @param s IO context
  * @param pbuffer pointer to a byte buffer
@@ -623,4 +700,33 @@ struct AVBPrint;
  */
 int avio_read_to_bprint(AVIOContext *h, struct AVBPrint *pb, size_t max_size);
 
+/**
+ * Accept and allocate a client context on a server context.
+ * @param  s the server context
+ * @param  c the client context, must be unallocated
+ * @return   >= 0 on success or a negative value corresponding
+ *           to an AVERROR on failure
+ */
+int avio_accept(AVIOContext *s, AVIOContext **c);
+
+/**
+ * Perform one step of the protocol handshake to accept a new client.
+ * This function must be called on a client returned by avio_accept() before
+ * using it as a read/write context.
+ * It is separate from avio_accept() because it may block.
+ * A step of the handshake is defined by places where the application may
+ * decide to change the proceedings.
+ * For example, on a protocol with a request header and a reply header, each
+ * one can constitute a step because the application may use the parameters
+ * from the request to change parameters in the reply; or each individual
+ * chunk of the request can constitute a step.
+ * If the handshake is already finished, avio_handshake() does nothing and
+ * returns 0 immediately.
+ *
+ * @param  c the client context to perform the handshake on
+ * @return   0   on a complete and successful handshake
+ *           > 0 if the handshake progressed, but is not complete
+ *           < 0 for an AVERROR code
+ */
+int avio_handshake(AVIOContext *c);
 #endif /* AVFORMAT_AVIO_H */
diff --git a/libavformat/avio_internal.h b/libavformat/avio_internal.h
index ad505673..f7c85882 100644
--- a/libavformat/avio_internal.h
+++ b/libavformat/avio_internal.h
@@ -149,6 +149,10 @@ int ffio_fdopen(AVIOContext **s, URLContext *h);
  */
 int ffio_open_null_buf(AVIOContext **s);
 
+int ffio_open_whitelist(AVIOContext **s, const char *url, int flags,
+                         const AVIOInterruptCB *int_cb, AVDictionary **options,
+                         const char *whitelist);
+
 /**
  * Close a null buffer.
  *
diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c
index 392b369a..213ee96f 100644
--- a/libavformat/aviobuf.c
+++ b/libavformat/aviobuf.c
@@ -53,7 +53,11 @@ static const AVClass *ff_avio_child_class_next(const AVClass *prev)
     return prev ? NULL : &ffurl_context_class;
 }
 
+#define OFFSET(x) offsetof(AVIOContext,x)
+#define E AV_OPT_FLAG_ENCODING_PARAM
+#define D AV_OPT_FLAG_DECODING_PARAM
 static const AVOption ff_avio_options[] = {
+    {"protocol_whitelist", "List of protocols that are allowed to be used", OFFSET(protocol_whitelist), AV_OPT_TYPE_STRING, { .str = NULL },  CHAR_MIN, CHAR_MAX, D },
     { NULL },
 };
 
@@ -97,6 +101,7 @@ int ffio_init_context(AVIOContext *s,
     s->seekable        = seek ? AVIO_SEEKABLE_NORMAL : 0;
     s->max_packet_size = 0;
     s->update_checksum = NULL;
+    s->short_seek_threshold = SHORT_SEEK_THRESHOLD;
 
     if (!read_packet && !write_flag) {
         s->pos     = buffer_size;
@@ -212,6 +217,7 @@ int64_t avio_seek(AVIOContext *s, int64_t offset, int whence)
         return AVERROR(EINVAL);
 
     buffer_size = s->buf_end - s->buffer;
+    // pos is the absolute position that the beginning of s->buffer corresponds to in the file
     pos = s->pos - (s->write_flag ? 0 : buffer_size);
 
     if (whence != SEEK_CUR && whence != SEEK_SET)
@@ -226,13 +232,13 @@ int64_t avio_seek(AVIOContext *s, int64_t offset, int whence)
     if (offset < 0)
         return AVERROR(EINVAL);
 
-    offset1 = offset - pos;
+    offset1 = offset - pos; // "offset1" is the relative offset from the beginning of s->buffer
     if (!s->must_flush && (!s->direct || !s->seek) &&
         offset1 >= 0 && offset1 <= buffer_size - s->write_flag) {
         /* can do the seek inside the buffer */
         s->buf_ptr = s->buffer + offset1;
     } else if ((!s->seekable ||
-               offset1 <= s->buf_end + SHORT_SEEK_THRESHOLD - s->buffer) &&
+               offset1 <= buffer_size + s->short_seek_threshold) &&
                !s->write_flag && offset1 >= 0 &&
                (!s->direct || !s->seek) &&
               (whence != SEEK_END || force)) {
@@ -240,7 +246,7 @@ int64_t avio_seek(AVIOContext *s, int64_t offset, int whence)
             fill_buffer(s);
         if (s->eof_reached)
             return AVERROR_EOF;
-        s->buf_ptr = s->buf_end + offset - s->pos;
+        s->buf_ptr = s->buf_end - (s->pos - offset);
     } else if(!s->write_flag && offset1 < 0 && -offset1 < buffer_size>>1 && s->seek && offset > 0) {
         int64_t res;
 
@@ -359,6 +365,8 @@ static inline int put_str16(AVIOContext *s, const char *str, const int be)
 invalid:
         av_log(s, AV_LOG_ERROR, "Invaid UTF8 sequence in avio_put_str16%s\n", be ? "be" : "le");
         err = AVERROR(EINVAL);
+        if (!*(q-1))
+            break;
     }
     if (be)
         avio_wb16(s, 0);
@@ -540,13 +548,13 @@ int avio_read(AVIOContext *s, unsigned char *buf, int size)
 
     size1 = size;
     while (size > 0) {
-        len = s->buf_end - s->buf_ptr;
-        if (len > size)
-            len = size;
+        len = FFMIN(s->buf_end - s->buf_ptr, size);
         if (len == 0 || s->write_flag) {
-            if((s->direct || size > s->buffer_size) && !s->update_checksum){
+            if((s->direct || size > s->buffer_size) && !s->update_checksum) {
+                // bypass the buffer and read data directly into buf
                 if(s->read_packet)
                     len = s->read_packet(s->opaque, buf, size);
+
                 if (len <= 0) {
                     /* do not modify buffer if EOF reached so that a seek back can
                     be done without rereading data */
@@ -559,6 +567,7 @@ int avio_read(AVIOContext *s, unsigned char *buf, int size)
                     s->bytes_read += len;
                     size -= len;
                     buf += len;
+                    // reset the buffer
                     s->buf_ptr = s->buffer;
                     s->buf_end = s->buffer/* + len*/;
                 }
@@ -795,6 +804,11 @@ int ffio_fdopen(AVIOContext **s, URLContext *h)
         av_free(buffer);
         return AVERROR(ENOMEM);
     }
+    (*s)->protocol_whitelist = av_strdup(h->protocol_whitelist);
+    if (!(*s)->protocol_whitelist && h->protocol_whitelist) {
+        avio_closep(s);
+        return AVERROR(ENOMEM);
+    }
     (*s)->direct = h->flags & AVIO_FLAG_DIRECT;
     (*s)->seekable = h->is_streamed ? 0 : AVIO_SEEKABLE_NORMAL;
     (*s)->max_packet_size = max_packet_size;
@@ -914,13 +928,15 @@ int avio_open(AVIOContext **s, const char *filename, int flags)
     return avio_open2(s, filename, flags, NULL, NULL);
 }
 
-int avio_open2(AVIOContext **s, const char *filename, int flags,
-               const AVIOInterruptCB *int_cb, AVDictionary **options)
+int ffio_open_whitelist(AVIOContext **s, const char *filename, int flags,
+                         const AVIOInterruptCB *int_cb, AVDictionary **options,
+                         const char *whitelist
+                        )
 {
     URLContext *h;
     int err;
 
-    err = ffurl_open(&h, filename, flags, int_cb, options);
+    err = ffurl_open_whitelist(&h, filename, flags, int_cb, options, whitelist);
     if (err < 0)
         return err;
     err = ffio_fdopen(s, h);
@@ -931,10 +947,16 @@ int avio_open2(AVIOContext **s, const char *filename, int flags,
     return 0;
 }
 
+int avio_open2(AVIOContext **s, const char *filename, int flags,
+               const AVIOInterruptCB *int_cb, AVDictionary **options)
+{
+    return ffio_open_whitelist(s, filename, flags, int_cb, options, NULL);
+}
+
 int ffio_open2_wrapper(struct AVFormatContext *s, AVIOContext **pb, const char *url, int flags,
                        const AVIOInterruptCB *int_cb, AVDictionary **options)
 {
-    return avio_open2(pb, url, flags, int_cb, options);
+    return ffio_open_whitelist(pb, url, flags, int_cb, options, s->protocol_whitelist);
 }
 
 int avio_close(AVIOContext *s)
@@ -951,6 +973,7 @@ int avio_close(AVIOContext *s)
         av_log(s, AV_LOG_DEBUG, "Statistics: %d seeks, %d writeouts\n", s->seek_count, s->writeout_count);
     else
         av_log(s, AV_LOG_DEBUG, "Statistics: %"PRId64" bytes read, %d seeks\n", s->bytes_read, s->seek_count);
+    av_opt_free(s);
     av_free(s);
     return ffurl_close(h);
 }
@@ -965,7 +988,7 @@ int avio_closep(AVIOContext **s)
 int avio_printf(AVIOContext *s, const char *fmt, ...)
 {
     va_list ap;
-    char buf[4096];
+    char buf[4096]; /* update doc entry in avio.h if changed */
     int ret;
 
     va_start(ap, fmt);
@@ -1020,6 +1043,23 @@ int avio_read_to_bprint(AVIOContext *h, AVBPrint *pb, size_t max_size)
     return 0;
 }
 
+int avio_accept(AVIOContext *s, AVIOContext **c)
+{
+    int ret;
+    URLContext *sc = s->opaque;
+    URLContext *cc = NULL;
+    ret = ffurl_accept(sc, &cc);
+    if (ret < 0)
+        return ret;
+    return ffio_fdopen(c, cc);
+}
+
+int avio_handshake(AVIOContext *c)
+{
+    URLContext *cc = c->opaque;
+    return ffurl_handshake(cc);
+}
+
 /* output in a dynamic buffer */
 
 typedef struct DynBuffer {
@@ -1129,7 +1169,7 @@ int avio_close_dyn_buf(AVIOContext *s, uint8_t **pbuffer)
 {
     DynBuffer *d;
     int size;
-    static const char padbuf[FF_INPUT_BUFFER_PADDING_SIZE] = {0};
+    static const char padbuf[AV_INPUT_BUFFER_PADDING_SIZE] = {0};
     int padding = 0;
 
     if (!s) {
@@ -1140,7 +1180,7 @@ int avio_close_dyn_buf(AVIOContext *s, uint8_t **pbuffer)
     /* don't attempt to pad fixed-size packet buffers */
     if (!s->max_packet_size) {
         avio_write(s, padbuf, sizeof(padbuf));
-        padding = FF_INPUT_BUFFER_PADDING_SIZE;
+        padding = AV_INPUT_BUFFER_PADDING_SIZE;
     }
 
     avio_flush(s);
diff --git a/libavformat/avisynth.c b/libavformat/avisynth.c
index 7dc5ee7d..45641c0f 100644
--- a/libavformat/avisynth.c
+++ b/libavformat/avisynth.c
@@ -237,13 +237,12 @@ static int avisynth_create_stream_video(AVFormatContext *s, AVStream *st)
     st->codec->width      = avs->vi->width;
     st->codec->height     = avs->vi->height;
 
-    st->time_base         = (AVRational) { avs->vi->fps_denominator,
-                                           avs->vi->fps_numerator };
     st->avg_frame_rate    = (AVRational) { avs->vi->fps_numerator,
                                            avs->vi->fps_denominator };
     st->start_time        = 0;
     st->duration          = avs->vi->num_frames;
     st->nb_frames         = avs->vi->num_frames;
+    avpriv_set_pts_info(st, 32, avs->vi->fps_denominator, avs->vi->fps_numerator);
 
     switch (avs->vi->pixel_type) {
 #ifdef USING_AVISYNTH
@@ -311,9 +310,8 @@ static int avisynth_create_stream_audio(AVFormatContext *s, AVStream *st)
     st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
     st->codec->sample_rate = avs->vi->audio_samples_per_second;
     st->codec->channels    = avs->vi->nchannels;
-    st->time_base          = (AVRational) { 1,
-                                            avs->vi->audio_samples_per_second };
     st->duration           = avs->vi->num_audio_samples;
+    avpriv_set_pts_info(st, 64, 1, avs->vi->audio_samples_per_second);
 
     switch (avs->vi->sample_type) {
     case AVS_SAMPLE_INT8:
diff --git a/libavformat/avs.c b/libavformat/avs.c
index b699dbf9..b264b55e 100644
--- a/libavformat/avs.c
+++ b/libavformat/avs.c
@@ -108,7 +108,7 @@ avs_read_video_packet(AVFormatContext * s, AVPacket * pkt,
     pkt->data[palette_size + 3] = (size >> 8) & 0xFF;
     ret = avio_read(s->pb, pkt->data + palette_size + 4, size - 4) + 4;
     if (ret < size) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return AVERROR(EIO);
     }
 
diff --git a/libavformat/brstm.c b/libavformat/brstm.c
index 19a4a2a9..aae2575f 100644
--- a/libavformat/brstm.c
+++ b/libavformat/brstm.c
@@ -30,8 +30,12 @@ typedef struct BRSTMDemuxContext {
     uint32_t    current_block;
     uint32_t    samples_per_block;
     uint32_t    last_block_used_bytes;
+    uint32_t    last_block_size;
+    uint32_t    last_block_samples;
+    uint32_t    data_start;
     uint8_t     *table;
     uint8_t     *adpc;
+    int         little_endian;
 } BRSTMDemuxContext;
 
 static int probe(AVProbeData *p)
@@ -43,6 +47,16 @@ static int probe(AVProbeData *p)
     return 0;
 }
 
+static int probe_bfstm(AVProbeData *p)
+{
+    if ((AV_RL32(p->buf) == MKTAG('F','S','T','M') ||
+         AV_RL32(p->buf) == MKTAG('C','S','T','M')) &&
+        (AV_RL16(p->buf + 4) == 0xFFFE ||
+         AV_RL16(p->buf + 4) == 0xFEFF))
+        return AVPROBE_SCORE_MAX / 3 * 2;
+    return 0;
+}
+
 static int read_close(AVFormatContext *s)
 {
     BRSTMDemuxContext *b = s->priv_data;
@@ -53,14 +67,34 @@ static int read_close(AVFormatContext *s)
     return 0;
 }
 
+static av_always_inline unsigned int read16(AVFormatContext *s)
+{
+    BRSTMDemuxContext *b = s->priv_data;
+    if (b->little_endian)
+        return avio_rl16(s->pb);
+    else
+        return avio_rb16(s->pb);
+}
+
+static av_always_inline unsigned int read32(AVFormatContext *s)
+{
+    BRSTMDemuxContext *b = s->priv_data;
+    if (b->little_endian)
+        return avio_rl32(s->pb);
+    else
+        return avio_rb32(s->pb);
+}
+
 static int read_header(AVFormatContext *s)
 {
     BRSTMDemuxContext *b = s->priv_data;
     int bom, major, minor, codec, chunk;
-    int64_t pos, h1offset, toffset;
-    uint32_t size, start, asize;
+    int64_t h1offset, pos, toffset;
+    uint32_t size, asize, start = 0;
     AVStream *st;
     int ret = AVERROR_EOF;
+    int loop = 0;
+    int bfstm = !strcmp("bfstm", s->iformat->name);
 
     st = avformat_new_stream(s, NULL);
     if (!st)
@@ -74,31 +108,75 @@ static int read_header(AVFormatContext *s)
         av_log(s, AV_LOG_ERROR, "invalid byte order: %X\n", bom);
         return AVERROR_INVALIDDATA;
     }
-    if (bom == 0xFFFE) {
-        avpriv_request_sample(s, "little endian byte order");
-        return AVERROR_PATCHWELCOME;
-    }
 
-    major = avio_r8(s->pb);
-    minor = avio_r8(s->pb);
-    avio_skip(s->pb, 4); // size of file
-    size = avio_rb16(s->pb);
-    if (size < 14)
-        return AVERROR_INVALIDDATA;
+    if (bom == 0xFFFE)
+        b->little_endian = 1;
 
-    avio_skip(s->pb, size - 14);
-    pos = avio_tell(s->pb);
-    if (avio_rl32(s->pb) != MKTAG('H','E','A','D'))
-        return AVERROR_INVALIDDATA;
-    size = avio_rb32(s->pb);
-    if (size < 256)
+    if (!bfstm) {
+        major = avio_r8(s->pb);
+        minor = avio_r8(s->pb);
+        avio_skip(s->pb, 4); // size of file
+        size = read16(s);
+        if (size < 14)
+            return AVERROR_INVALIDDATA;
+
+        avio_skip(s->pb, size - 14);
+        pos = avio_tell(s->pb);
+        if (avio_rl32(s->pb) != MKTAG('H','E','A','D'))
+            return AVERROR_INVALIDDATA;
+    } else {
+        uint32_t info_offset = 0;
+        uint16_t section_count, header_size, i;
+
+        header_size = read16(s); // 6
+
+        avio_skip(s->pb, 4); // Unknown constant 0x00030000
+        avio_skip(s->pb, 4); // size of file
+        section_count = read16(s);
+        avio_skip(s->pb, 2); // padding
+        for (i = 0; avio_tell(s->pb) < header_size
+                    && !(start && info_offset)
+                    && i < section_count; i++) {
+            uint16_t flag = read16(s);
+            avio_skip(s->pb, 2);
+            switch (flag) {
+            case 0x4000:
+                info_offset = read32(s);
+                /*info_size =*/ read32(s);
+                break;
+            case 0x4001:
+                avio_skip(s->pb, 4); // seek offset
+                avio_skip(s->pb, 4); // seek size
+                break;
+            case 0x4002:
+                start = read32(s) + 8;
+                avio_skip(s->pb, 4); //data_size = read32(s);
+                break;
+            case 0x4003:
+                avio_skip(s->pb, 4); // REGN offset
+                avio_skip(s->pb, 4); // REGN size
+                break;
+            }
+        }
+
+        if (!info_offset || !start)
+            return AVERROR_INVALIDDATA;
+
+        avio_skip(s->pb, info_offset - avio_tell(s->pb));
+        pos = avio_tell(s->pb);
+        if (avio_rl32(s->pb) != MKTAG('I','N','F','O'))
+            return AVERROR_INVALIDDATA;
+    }
+
+    size = read32(s);
+    if (size < 192)
         return AVERROR_INVALIDDATA;
     avio_skip(s->pb, 4); // unknown
-    h1offset = avio_rb32(s->pb);
+    h1offset = read32(s);
     if (h1offset > size)
         return AVERROR_INVALIDDATA;
     avio_skip(s->pb, 12);
-    toffset = avio_rb32(s->pb) + 16LL;
+    toffset = read32(s) + 16LL;
     if (toffset > size)
         return AVERROR_INVALIDDATA;
 
@@ -107,57 +185,77 @@ static int read_header(AVFormatContext *s)
 
     switch (codec) {
     case 0: codec = AV_CODEC_ID_PCM_S8_PLANAR;    break;
-    case 1: codec = AV_CODEC_ID_PCM_S16BE_PLANAR; break;
-    case 2: codec = AV_CODEC_ID_ADPCM_THP;        break;
+    case 1: codec = b->little_endian ?
+                    AV_CODEC_ID_PCM_S16LE_PLANAR :
+                    AV_CODEC_ID_PCM_S16BE_PLANAR; break;
+    case 2: codec = b->little_endian ?
+                    AV_CODEC_ID_ADPCM_THP_LE :
+                    AV_CODEC_ID_ADPCM_THP;        break;
     default:
         avpriv_request_sample(s, "codec %d", codec);
         return AVERROR_PATCHWELCOME;
     }
 
-    avio_skip(s->pb, 1); // loop flag
+    loop = avio_r8(s->pb); // loop flag
     st->codec->codec_id = codec;
     st->codec->channels = avio_r8(s->pb);
     if (!st->codec->channels)
         return AVERROR_INVALIDDATA;
 
     avio_skip(s->pb, 1); // padding
-    st->codec->sample_rate = avio_rb16(s->pb);
-    if (!st->codec->sample_rate)
+
+    st->codec->sample_rate = bfstm ? read32(s) : read16(s);
+    if (st->codec->sample_rate <= 0)
         return AVERROR_INVALIDDATA;
 
-    avio_skip(s->pb, 2); // padding
-    avio_skip(s->pb, 4); // loop start sample
+    if (!bfstm)
+        avio_skip(s->pb, 2); // padding
+
+    if (loop) {
+        if (av_dict_set_int(&s->metadata, "loop_start",
+                            av_rescale(read32(s), AV_TIME_BASE,
+                                       st->codec->sample_rate),
+                            0) < 0)
+            return AVERROR(ENOMEM);
+    } else {
+        avio_skip(s->pb, 4);
+    }
+
     st->start_time = 0;
-    st->duration = avio_rb32(s->pb);
+    st->duration = read32(s);
     avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
 
-    start = avio_rb32(s->pb);
+    if (!bfstm)
+        start = read32(s);
     b->current_block = 0;
-    b->block_count = avio_rb32(s->pb);
+    b->block_count = read32(s);
     if (b->block_count > UINT16_MAX) {
         av_log(s, AV_LOG_WARNING, "too many blocks: %u\n", b->block_count);
         return AVERROR_INVALIDDATA;
     }
 
-    b->block_size = avio_rb32(s->pb);
-    if (b->block_size > UINT16_MAX / st->codec->channels)
+    b->block_size = read32(s);
+    if (b->block_size > UINT32_MAX / st->codec->channels)
         return AVERROR_INVALIDDATA;
-    b->block_size *= st->codec->channels;
 
-    b->samples_per_block = avio_rb32(s->pb);
-    b->last_block_used_bytes = avio_rb32(s->pb);
-    if (b->last_block_used_bytes > UINT16_MAX / st->codec->channels)
+    b->samples_per_block = read32(s);
+    b->last_block_used_bytes = read32(s);
+    b->last_block_samples = read32(s);
+    b->last_block_size = read32(s);
+    if (b->last_block_size > UINT32_MAX / st->codec->channels)
+        return AVERROR_INVALIDDATA;
+    if (b->last_block_used_bytes > b->last_block_size)
         return AVERROR_INVALIDDATA;
-    b->last_block_used_bytes *= st->codec->channels;
 
-    avio_skip(s->pb, 4); // last block samples
-    avio_skip(s->pb, 4); // last block size
 
-    if (codec == AV_CODEC_ID_ADPCM_THP) {
+    if (codec == AV_CODEC_ID_ADPCM_THP || codec == AV_CODEC_ID_ADPCM_THP_LE) {
         int ch;
 
         avio_skip(s->pb, pos + toffset - avio_tell(s->pb));
-        toffset = avio_rb32(s->pb) + 16LL;
+        if (!bfstm)
+            toffset = read32(s) + 16LL;
+        else
+            toffset = toffset + read32(s) + st->codec->channels * 8 - 8;
         if (toffset > size)
             return AVERROR_INVALIDDATA;
 
@@ -171,7 +269,7 @@ static int read_header(AVFormatContext *s)
                 ret = AVERROR_INVALIDDATA;
                 goto fail;
             }
-            avio_skip(s->pb, 24);
+            avio_skip(s->pb, bfstm ? 14 : 24);
         }
     }
 
@@ -179,19 +277,22 @@ static int read_header(AVFormatContext *s)
         ret = AVERROR_INVALIDDATA;
         goto fail;
     }
+
     avio_skip(s->pb, size - (avio_tell(s->pb) - pos));
 
     while (!avio_feof(s->pb)) {
         chunk = avio_rl32(s->pb);
-        size  = avio_rb32(s->pb);
+        size  = read32(s);
         if (size < 8) {
             ret = AVERROR_INVALIDDATA;
             goto fail;
         }
         size -= 8;
         switch (chunk) {
+        case MKTAG('S','E','E','K'):
         case MKTAG('A','D','P','C'):
-            if (codec != AV_CODEC_ID_ADPCM_THP)
+            if (codec != AV_CODEC_ID_ADPCM_THP &&
+                codec != AV_CODEC_ID_ADPCM_THP_LE)
                 goto skip;
 
             asize = b->block_count * st->codec->channels * 4;
@@ -208,19 +309,36 @@ static int read_header(AVFormatContext *s)
                     ret = AVERROR(ENOMEM);
                     goto fail;
                 }
-                avio_read(s->pb, b->adpc, asize);
+                if (bfstm && codec != AV_CODEC_ID_ADPCM_THP_LE) {
+                    // Big-endian BFSTMs have little-endian SEEK tables
+                    // for some strange reason.
+                    int i;
+                    for (i = 0; i < asize; i += 2) {
+                        b->adpc[i+1] = avio_r8(s->pb);
+                        b->adpc[i]   = avio_r8(s->pb);
+                    }
+                } else {
+                    avio_read(s->pb, b->adpc, asize);
+                }
                 avio_skip(s->pb, size - asize);
             }
             break;
         case MKTAG('D','A','T','A'):
             if ((start < avio_tell(s->pb)) ||
-                (!b->adpc && codec == AV_CODEC_ID_ADPCM_THP)) {
+                (!b->adpc && (codec == AV_CODEC_ID_ADPCM_THP ||
+                              codec == AV_CODEC_ID_ADPCM_THP_LE))) {
                 ret = AVERROR_INVALIDDATA;
                 goto fail;
             }
             avio_skip(s->pb, start - avio_tell(s->pb));
 
-            if (major != 1 || minor)
+            if (bfstm && (codec == AV_CODEC_ID_ADPCM_THP ||
+                          codec == AV_CODEC_ID_ADPCM_THP_LE))
+                avio_skip(s->pb, 24);
+
+            b->data_start = avio_tell(s->pb);
+
+            if (!bfstm && (major != 1 || minor))
                 avpriv_request_sample(s, "Version %d.%d", major, minor);
 
             return 0;
@@ -241,15 +359,25 @@ static int read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     AVCodecContext *codec = s->streams[0]->codec;
     BRSTMDemuxContext *b = s->priv_data;
-    uint32_t samples, size;
-    int ret;
+    uint32_t samples, size, skip = 0;
+    int ret, i;
 
     if (avio_feof(s->pb))
         return AVERROR_EOF;
     b->current_block++;
     if (b->current_block == b->block_count) {
         size    = b->last_block_used_bytes;
-        samples = size / (8 * codec->channels) * 14;
+        samples = b->last_block_samples;
+        skip    = b->last_block_size - b->last_block_used_bytes;
+
+        if (samples < size * 14 / 8) {
+            uint32_t adjusted_size = samples / 14 * 8;
+            if (samples % 14)
+                adjusted_size += (samples % 14 + 1) / 2 + 1;
+
+            skip += size - adjusted_size;
+            size = adjusted_size;
+        }
     } else if (b->current_block < b->block_count) {
         size    = b->block_size;
         samples = b->samples_per_block;
@@ -257,23 +385,50 @@ static int read_packet(AVFormatContext *s, AVPacket *pkt)
         return AVERROR_EOF;
     }
 
-    if (codec->codec_id == AV_CODEC_ID_ADPCM_THP) {
+    if (codec->codec_id == AV_CODEC_ID_ADPCM_THP ||
+        codec->codec_id == AV_CODEC_ID_ADPCM_THP_LE) {
         uint8_t *dst;
 
-        if (av_new_packet(pkt, 8 + (32 + 4) * codec->channels + size) < 0)
+        if (!b->adpc) {
+            av_log(s, AV_LOG_ERROR, "adpcm_thp requires ADPC chunk, but none was found.\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (!b->table) {
+            b->table = av_mallocz(32 * codec->channels);
+            if (!b->table)
+                return AVERROR(ENOMEM);
+        }
+
+        if (size > (INT_MAX - 32 - 4) ||
+            (32 + 4 + size) > (INT_MAX / codec->channels) ||
+            (32 + 4 + size) * codec->channels > INT_MAX - 8)
+            return AVERROR_INVALIDDATA;
+        if (av_new_packet(pkt, 8 + (32 + 4 + size) * codec->channels) < 0)
             return AVERROR(ENOMEM);
         dst = pkt->data;
-        bytestream_put_be32(&dst, size);
-        bytestream_put_be32(&dst, samples);
+        if (codec->codec_id == AV_CODEC_ID_ADPCM_THP_LE) {
+            bytestream_put_le32(&dst, size * codec->channels);
+            bytestream_put_le32(&dst, samples);
+        } else {
+            bytestream_put_be32(&dst, size * codec->channels);
+            bytestream_put_be32(&dst, samples);
+        }
         bytestream_put_buffer(&dst, b->table, 32 * codec->channels);
         bytestream_put_buffer(&dst, b->adpc + 4 * codec->channels *
                                     (b->current_block - 1), 4 * codec->channels);
 
-        ret = avio_read(s->pb, dst, size);
-        if (ret != size)
-            av_free_packet(pkt);
+        for (i = 0; i < codec->channels; i++) {
+            ret = avio_read(s->pb, dst, size);
+            dst += size;
+            avio_skip(s->pb, skip);
+            if (ret != size) {
+                av_packet_unref(pkt);
+                break;
+            }
+        }
         pkt->duration = samples;
     } else {
+        size *= codec->channels;
         ret = av_get_packet(s->pb, pkt, size);
     }
 
@@ -285,6 +440,24 @@ static int read_packet(AVFormatContext *s, AVPacket *pkt)
     return ret;
 }
 
+static int read_seek(AVFormatContext *s, int stream_index,
+                     int64_t timestamp, int flags)
+{
+    AVStream *st = s->streams[stream_index];
+    BRSTMDemuxContext *b = s->priv_data;
+    int64_t ret = 0;
+
+    timestamp /= b->samples_per_block;
+    ret = avio_seek(s->pb, b->data_start + timestamp * b->block_size *
+                           st->codec->channels, SEEK_SET);
+    if (ret < 0)
+        return ret;
+
+    b->current_block = timestamp;
+    ff_update_cur_dts(s, st, timestamp * b->samples_per_block);
+    return 0;
+}
+
 AVInputFormat ff_brstm_demuxer = {
     .name           = "brstm",
     .long_name      = NULL_IF_CONFIG_SMALL("BRSTM (Binary Revolution Stream)"),
@@ -293,5 +466,18 @@ AVInputFormat ff_brstm_demuxer = {
     .read_header    = read_header,
     .read_packet    = read_packet,
     .read_close     = read_close,
+    .read_seek      = read_seek,
     .extensions     = "brstm",
 };
+
+AVInputFormat ff_bfstm_demuxer = {
+    .name           = "bfstm",
+    .long_name      = NULL_IF_CONFIG_SMALL("BFSTM (Binary Cafe Stream)"),
+    .priv_data_size = sizeof(BRSTMDemuxContext),
+    .read_probe     = probe_bfstm,
+    .read_header    = read_header,
+    .read_packet    = read_packet,
+    .read_close     = read_close,
+    .read_seek      = read_seek,
+    .extensions     = "bfstm,bcstm",
+};
diff --git a/libavformat/c93.c b/libavformat/c93.c
index d67afcfb..20ae9c49 100644
--- a/libavformat/c93.c
+++ b/libavformat/c93.c
@@ -188,7 +188,7 @@ static int read_packet(AVFormatContext *s, AVPacket *pkt)
     return 0;
 
     fail:
-    av_free_packet(pkt);
+    av_packet_unref(pkt);
     return ret;
 }
 
diff --git a/libavformat/cache.c b/libavformat/cache.c
index d3d12bb4..8aed37eb 100644
--- a/libavformat/cache.c
+++ b/libavformat/cache.c
@@ -65,9 +65,9 @@ typedef struct Context {
     int read_ahead_limit;
 } Context;
 
-static int cmp(void *key, const void *node)
+static int cmp(const void *key, const void *node)
 {
-    return (*(int64_t *) key) - ((const CacheEntry *) node)->logical_pos;
+    return FFDIFFSIGN(*(const int64_t *)key, ((const CacheEntry *) node)->logical_pos);
 }
 
 static int cache_open(URLContext *h, const char *arg, int flags, AVDictionary **options)
@@ -86,7 +86,8 @@ static int cache_open(URLContext *h, const char *arg, int flags, AVDictionary **
     unlink(buffername);
     av_freep(&buffername);
 
-    return ffurl_open(&c->inner, arg, flags, &h->interrupt_callback, options);
+    return ffurl_open_whitelist(&c->inner, arg, flags, &h->interrupt_callback,
+                                options, h->protocol_whitelist);
 }
 
 static int add_entry(URLContext *h, const unsigned char *buf, int size)
@@ -156,7 +157,7 @@ static int cache_read(URLContext *h, unsigned char *buf, int size)
 {
     Context *c= h->priv_data;
     CacheEntry *entry, *next[2] = {NULL, NULL};
-    int r;
+    int64_t r;
 
     entry = av_tree_find(c->root, &c->logical_pos, cmp, (void**)next);
 
@@ -282,6 +283,12 @@ static int64_t cache_seek(URLContext *h, int64_t pos, int whence)
     return ret;
 }
 
+static int enu_free(void *opaque, void *elem)
+{
+    av_free(elem);
+    return 0;
+}
+
 static int cache_close(URLContext *h)
 {
     Context *c= h->priv_data;
@@ -291,6 +298,7 @@ static int cache_close(URLContext *h)
 
     close(c->fd);
     ffurl_close(c->inner);
+    av_tree_enumerate(c->root, NULL, NULL, enu_free);
     av_tree_destroy(c->root);
 
     return 0;
diff --git a/libavformat/caf.c b/libavformat/caf.c
index c1ecc944..00854615 100644
--- a/libavformat/caf.c
+++ b/libavformat/caf.c
@@ -61,6 +61,18 @@ const AVCodecTag ff_codec_caf_tags[] = {
   /*{ MPEG4CELP                 MKTAG('c','e','l','p') },*/
   /*{ MPEG4HVXC                 MKTAG('h','v','x','c') },*/
   /*{ MPEG4TwinVQ               MKTAG('t','w','v','q') },*/
+
+    { AV_CODEC_ID_PCM_S8,       MKTAG('l','p','c','m') },
+    { AV_CODEC_ID_PCM_S16LE,    MKTAG('l','p','c','m') },
+    { AV_CODEC_ID_PCM_S16BE,    MKTAG('l','p','c','m') },
+    { AV_CODEC_ID_PCM_S24LE,    MKTAG('l','p','c','m') },
+    { AV_CODEC_ID_PCM_S24BE,    MKTAG('l','p','c','m') },
+    { AV_CODEC_ID_PCM_S32LE,    MKTAG('l','p','c','m') },
+    { AV_CODEC_ID_PCM_S32BE,    MKTAG('l','p','c','m') },
+    { AV_CODEC_ID_PCM_F32LE,    MKTAG('l','p','c','m') },
+    { AV_CODEC_ID_PCM_F32BE,    MKTAG('l','p','c','m') },
+    { AV_CODEC_ID_PCM_F64LE,    MKTAG('l','p','c','m') },
+    { AV_CODEC_ID_PCM_F64BE,    MKTAG('l','p','c','m') },
     { AV_CODEC_ID_NONE,            0 },
 };
 
diff --git a/libavformat/cafdec.c b/libavformat/cafdec.c
index cc6ed0ce..bfbbb026 100644
--- a/libavformat/cafdec.c
+++ b/libavformat/cafdec.c
@@ -101,7 +101,7 @@ static int read_kuki_chunk(AVFormatContext *s, int64_t size)
     AVIOContext *pb = s->pb;
     AVStream *st      = s->streams[0];
 
-    if (size < 0 || size > INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE)
+    if (size < 0 || size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
         return -1;
 
     if (st->codec->codec_id == AV_CODEC_ID_AAC) {
diff --git a/libavformat/cafenc.c b/libavformat/cafenc.c
index 1708275e..544bc4a1 100644
--- a/libavformat/cafenc.c
+++ b/libavformat/cafenc.c
@@ -120,21 +120,6 @@ static int caf_write_header(AVFormatContext *s)
         return AVERROR_PATCHWELCOME;
     }
 
-    switch (enc->codec_id) {
-    case AV_CODEC_ID_PCM_S8:
-    case AV_CODEC_ID_PCM_S16LE:
-    case AV_CODEC_ID_PCM_S16BE:
-    case AV_CODEC_ID_PCM_S24LE:
-    case AV_CODEC_ID_PCM_S24BE:
-    case AV_CODEC_ID_PCM_S32LE:
-    case AV_CODEC_ID_PCM_S32BE:
-    case AV_CODEC_ID_PCM_F32LE:
-    case AV_CODEC_ID_PCM_F32BE:
-    case AV_CODEC_ID_PCM_F64LE:
-    case AV_CODEC_ID_PCM_F64BE:
-        codec_tag = MKTAG('l','p','c','m');
-    }
-
     if (!codec_tag) {
         av_log(s, AV_LOG_ERROR, "unsupported codec\n");
         return AVERROR_INVALIDDATA;
diff --git a/libavformat/cdg.c b/libavformat/cdg.c
index baf37d4c..b1f137ff 100644
--- a/libavformat/cdg.c
+++ b/libavformat/cdg.c
@@ -63,7 +63,7 @@ static int read_packet(AVFormatContext *s, AVPacket *pkt)
         ret = av_get_packet(s->pb, pkt, CDG_PACKET_SIZE);
         if (ret < 1 || (pkt->data[0] & CDG_MASK) == CDG_COMMAND)
             break;
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
     }
 
     if (!priv->got_first_packet) {
diff --git a/libavformat/cdxl.c b/libavformat/cdxl.c
index f198bf50..3d80b477 100644
--- a/libavformat/cdxl.c
+++ b/libavformat/cdxl.c
@@ -202,7 +202,7 @@ static int cdxl_read_packet(AVFormatContext *s, AVPacket *pkt)
         memcpy(pkt->data, cdxl->header, CDXL_HEADER_SIZE);
         ret = avio_read(pb, pkt->data + CDXL_HEADER_SIZE, video_size);
         if (ret < 0) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return ret;
         }
         av_shrink_packet(pkt, CDXL_HEADER_SIZE + ret);
diff --git a/libavformat/chromaprint.c b/libavformat/chromaprint.c
new file mode 100644
index 00000000..4d67f434
--- /dev/null
+++ b/libavformat/chromaprint.c
@@ -0,0 +1,186 @@
+/*
+ * Chromaprint fingerprinting muxer
+ * Copyright (c) 2015 Rodger Combs
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "libavutil/opt.h"
+#include "libavcodec/internal.h"
+#include <chromaprint.h>
+
+#define CPR_VERSION_INT AV_VERSION_INT(CHROMAPRINT_VERSION_MAJOR, \
+                                       CHROMAPRINT_VERSION_MINOR, \
+                                       CHROMAPRINT_VERSION_PATCH)
+
+typedef enum FingerprintFormat {
+    FINGERPRINT_RAW,
+    FINGERPRINT_COMPRESSED,
+    FINGERPRINT_BASE64,
+} FingerprintFormat;
+
+typedef struct ChromaprintMuxContext {
+    const AVClass *class;
+    int silence_threshold;
+    int algorithm;
+    FingerprintFormat fp_format;
+    ChromaprintContext ctx;
+} ChromaprintMuxContext;
+
+static void cleanup(ChromaprintMuxContext *cpr)
+{
+    if (cpr->ctx) {
+        avpriv_lock_avformat();
+        chromaprint_free(cpr->ctx);
+        avpriv_unlock_avformat();
+    }
+}
+
+static int write_header(AVFormatContext *s)
+{
+    ChromaprintMuxContext *cpr = s->priv_data;
+    AVStream *st;
+
+    avpriv_lock_avformat();
+    cpr->ctx = chromaprint_new(cpr->algorithm);
+    avpriv_unlock_avformat();
+
+    if (!cpr->ctx) {
+        av_log(s, AV_LOG_ERROR, "Failed to create chromaprint context.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    if (cpr->silence_threshold != -1) {
+#if CPR_VERSION_INT >= AV_VERSION_INT(0, 7, 0)
+        if (!chromaprint_set_option(cpr->ctx, "silence_threshold", cpr->silence_threshold)) {
+            av_log(s, AV_LOG_ERROR, "Failed to set silence threshold.\n");
+            goto fail;
+        }
+#else
+        av_log(s, AV_LOG_ERROR, "Setting the silence threshold requires Chromaprint "
+                                "version 0.7.0 or later.\n");
+        goto fail;
+#endif
+    }
+
+    if (s->nb_streams != 1) {
+        av_log(s, AV_LOG_ERROR, "Only one stream is supported\n");
+        goto fail;
+    }
+
+    st = s->streams[0];
+
+    if (st->codec->channels > 2) {
+        av_log(s, AV_LOG_ERROR, "Only up to 2 channels are supported\n");
+        goto fail;
+    }
+
+    if (st->codec->sample_rate < 1000) {
+        av_log(s, AV_LOG_ERROR, "Sampling rate must be at least 1000\n");
+        goto fail;
+    }
+
+    if (!chromaprint_start(cpr->ctx, st->codec->sample_rate, st->codec->channels)) {
+        av_log(s, AV_LOG_ERROR, "Failed to start chromaprint\n");
+        goto fail;
+    }
+
+    return 0;
+fail:
+    cleanup(cpr);
+    return AVERROR(EINVAL);
+}
+
+static int write_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    ChromaprintMuxContext *cpr = s->priv_data;
+    return chromaprint_feed(cpr->ctx, pkt->data, pkt->size / 2) ? 0 : AVERROR(EINVAL);
+}
+
+static int write_trailer(AVFormatContext *s)
+{
+    ChromaprintMuxContext *cpr = s->priv_data;
+    AVIOContext *pb = s->pb;
+    void *fp = NULL, *enc_fp = NULL;
+    int size, enc_size, ret = AVERROR(EINVAL);
+
+    if (!chromaprint_finish(cpr->ctx)) {
+        av_log(s, AV_LOG_ERROR, "Failed to generate fingerprint\n");
+        goto fail;
+    }
+
+    if (!chromaprint_get_raw_fingerprint(cpr->ctx, &fp, &size)) {
+        av_log(s, AV_LOG_ERROR, "Failed to retrieve fingerprint\n");
+        goto fail;
+    }
+
+    switch (cpr->fp_format) {
+    case FINGERPRINT_RAW:
+        avio_write(pb, fp, size);
+        break;
+    case FINGERPRINT_COMPRESSED:
+    case FINGERPRINT_BASE64:
+        if (!chromaprint_encode_fingerprint(fp, size, cpr->algorithm, &enc_fp, &enc_size,
+                                            cpr->fp_format == FINGERPRINT_BASE64)) {
+            av_log(s, AV_LOG_ERROR, "Failed to encode fingerprint\n");
+            goto fail;
+        }
+        avio_write(pb, enc_fp, enc_size);
+        break;
+    }
+
+    ret = 0;
+fail:
+    if (fp)
+        chromaprint_dealloc(fp);
+    if (enc_fp)
+        chromaprint_dealloc(enc_fp);
+    cleanup(cpr);
+    return ret;
+}
+
+#define OFFSET(x) offsetof(ChromaprintMuxContext, x)
+#define FLAGS AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "silence_threshold", "threshold for detecting silence", OFFSET(silence_threshold), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 32767, FLAGS },
+    { "algorithm", "version of the fingerprint algorithm", OFFSET(algorithm), AV_OPT_TYPE_INT, { .i64 = CHROMAPRINT_ALGORITHM_DEFAULT }, CHROMAPRINT_ALGORITHM_TEST1, INT_MAX, FLAGS },
+    { "fp_format", "fingerprint format to write", OFFSET(fp_format), AV_OPT_TYPE_INT, { .i64 = FINGERPRINT_BASE64 }, FINGERPRINT_RAW, FINGERPRINT_BASE64, FLAGS },
+    { "raw", "binary raw fingerprint", 0, AV_OPT_TYPE_CONST, {.i64 = FINGERPRINT_RAW }, INT_MIN, INT_MAX, FLAGS, "fp_format"},
+    { "compressed", "binary compressed fingerprint", 0, AV_OPT_TYPE_CONST, {.i64 = FINGERPRINT_COMPRESSED }, INT_MIN, INT_MAX, FLAGS, "fp_format"},
+    { "base64", "Base64 compressed fingerprint", 0, AV_OPT_TYPE_CONST, {.i64 = FINGERPRINT_BASE64 }, INT_MIN, INT_MAX, FLAGS, "fp_format"},
+    { NULL },
+};
+
+static const AVClass chromaprint_class = {
+    .class_name = "chromaprint muxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVOutputFormat ff_chromaprint_muxer = {
+    .name              = "chromaprint",
+    .long_name         = NULL_IF_CONFIG_SMALL("Chromaprint"),
+    .priv_data_size    = sizeof(ChromaprintMuxContext),
+    .audio_codec       = AV_NE(AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S16LE),
+    .write_header      = write_header,
+    .write_packet      = write_packet,
+    .write_trailer     = write_trailer,
+    .flags             = AVFMT_NOTIMESTAMPS,
+    .priv_class        = &chromaprint_class,
+};
diff --git a/libavformat/cinedec.c b/libavformat/cinedec.c
index 632f46c4..31840843 100644
--- a/libavformat/cinedec.c
+++ b/libavformat/cinedec.c
@@ -49,13 +49,13 @@ enum {
     CFA_VRIV6     = 2,  /**< BGGR/GRBG */
     CFA_BAYER     = 3,  /**< GB/RG */
     CFA_BAYERFLIP = 4,  /**< RG/GB */
-
-    CFA_TLGRAY    = 0x80000000,
-    CFA_TRGRAY    = 0x40000000,
-    CFA_BLGRAY    = 0x20000000,
-    CFA_BRGRAY    = 0x10000000
 };
 
+#define CFA_TLGRAY  0x80000000U
+#define CFA_TRGRAY  0x40000000U
+#define CFA_BLGRAY  0x20000000U
+#define CFA_BRGRAY  0x10000000U
+
 static int cine_read_probe(AVProbeData *p)
 {
     int HeaderSize;
diff --git a/libavformat/concat.c b/libavformat/concat.c
index 81fe9708..7a8eb1be 100644
--- a/libavformat/concat.c
+++ b/libavformat/concat.c
@@ -65,7 +65,10 @@ static av_cold int concat_open(URLContext *h, const char *uri, int flags)
     struct concat_data  *data = h->priv_data;
     struct concat_nodes *nodes;
 
-    av_strstart(uri, "concat:", &uri);
+    if (!av_strstart(uri, "concat:", &uri)) {
+        av_log(h, AV_LOG_ERROR, "URL %s lacks prefix\n", uri);
+        return AVERROR(EINVAL);
+    }
 
     for (i = 0, len = 1; uri[i]; i++) {
         if (uri[i] == *AV_CAT_SEPARATOR) {
@@ -94,8 +97,9 @@ static av_cold int concat_open(URLContext *h, const char *uri, int flags)
         uri += len + strspn(uri + len, AV_CAT_SEPARATOR);
 
         /* creating URLContext */
-        if ((err = ffurl_open(&uc, node_uri, flags,
-                              &h->interrupt_callback, NULL)) < 0)
+        err = ffurl_open_whitelist(&uc, node_uri, flags,
+                                   &h->interrupt_callback, NULL, h->protocol_whitelist);
+        if (err < 0)
             break;
 
         /* creating size */
@@ -189,4 +193,5 @@ URLProtocol ff_concat_protocol = {
     .url_seek       = concat_seek,
     .url_close      = concat_close,
     .priv_data_size = sizeof(struct concat_data),
+    .default_whitelist = "concat,file,subfile",
 };
diff --git a/libavformat/concatdec.c b/libavformat/concatdec.c
index 07db9f96..de7b89a2 100644
--- a/libavformat/concatdec.c
+++ b/libavformat/concatdec.c
@@ -41,8 +41,13 @@ typedef struct ConcatStream {
 typedef struct {
     char *url;
     int64_t start_time;
+    int64_t file_start_time;
+    int64_t file_inpoint;
     int64_t duration;
     ConcatStream *streams;
+    int64_t inpoint;
+    int64_t outpoint;
+    AVDictionary *metadata;
     int nb_streams;
 } ConcatFile;
 
@@ -54,8 +59,10 @@ typedef struct {
     AVFormatContext *avf;
     int safe;
     int seekable;
+    int eof;
     ConcatMatchMode stream_match_mode;
     unsigned auto_convert;
+    int segment_time_metadata;
 } ConcatContext;
 
 static int concat_probe(AVProbeData *probe)
@@ -142,6 +149,8 @@ static int add_file(AVFormatContext *avf, char *filename, ConcatFile **rfile,
     file->url        = url;
     file->start_time = AV_NOPTS_VALUE;
     file->duration   = AV_NOPTS_VALUE;
+    file->inpoint    = AV_NOPTS_VALUE;
+    file->outpoint   = AV_NOPTS_VALUE;
 
     return 0;
 
@@ -172,6 +181,8 @@ static int copy_stream_props(AVStream *st, AVStream *source_st)
     st->avg_frame_rate      = source_st->avg_frame_rate;
     st->time_base           = source_st->time_base;
     st->sample_aspect_ratio = source_st->sample_aspect_ratio;
+
+    av_dict_copy(&st->metadata, source_st->metadata, 0);
     return 0;
 }
 
@@ -304,8 +315,23 @@ static int open_file(AVFormatContext *avf, unsigned fileno)
         file->start_time = !fileno ? 0 :
                            cat->files[fileno - 1].start_time +
                            cat->files[fileno - 1].duration;
+    file->file_start_time = (cat->avf->start_time == AV_NOPTS_VALUE) ? 0 : cat->avf->start_time;
+    file->file_inpoint = (file->inpoint == AV_NOPTS_VALUE) ? file->file_start_time : file->inpoint;
+    if (file->duration == AV_NOPTS_VALUE && file->outpoint != AV_NOPTS_VALUE)
+        file->duration = file->outpoint - file->file_inpoint;
+
+    if (cat->segment_time_metadata) {
+        av_dict_set_int(&file->metadata, "lavf.concatdec.start_time", file->start_time, 0);
+        if (file->duration != AV_NOPTS_VALUE)
+            av_dict_set_int(&file->metadata, "lavf.concatdec.duration", file->duration, 0);
+    }
+
     if ((ret = match_streams(avf)) < 0)
         return ret;
+    if (file->inpoint != AV_NOPTS_VALUE) {
+       if ((ret = avformat_seek_file(cat->avf, -1, INT64_MIN, file->inpoint, file->inpoint, 0)) < 0)
+           return ret;
+    }
     return 0;
 }
 
@@ -319,6 +345,7 @@ static int concat_read_close(AVFormatContext *avf)
     for (i = 0; i < cat->nb_files; i++) {
         av_freep(&cat->files[i].url);
         av_freep(&cat->files[i].streams);
+        av_dict_free(&cat->files[i].metadata);
     }
     av_freep(&cat->files);
     return 0;
@@ -351,20 +378,43 @@ static int concat_read_header(AVFormatContext *avf)
             }
             if ((ret = add_file(avf, filename, &file, &nb_files_alloc)) < 0)
                 goto fail;
-        } else if (!strcmp(keyword, "duration")) {
+        } else if (!strcmp(keyword, "duration") || !strcmp(keyword, "inpoint") || !strcmp(keyword, "outpoint")) {
             char *dur_str = get_keyword(&cursor);
             int64_t dur;
             if (!file) {
-                av_log(avf, AV_LOG_ERROR, "Line %d: duration without file\n",
-                       line);
+                av_log(avf, AV_LOG_ERROR, "Line %d: %s without file\n",
+                       line, keyword);
                 FAIL(AVERROR_INVALIDDATA);
             }
             if ((ret = av_parse_time(&dur, dur_str, 1)) < 0) {
-                av_log(avf, AV_LOG_ERROR, "Line %d: invalid duration '%s'\n",
-                       line, dur_str);
+                av_log(avf, AV_LOG_ERROR, "Line %d: invalid %s '%s'\n",
+                       line, keyword, dur_str);
                 goto fail;
             }
-            file->duration = dur;
+            if (!strcmp(keyword, "duration"))
+                file->duration = dur;
+            else if (!strcmp(keyword, "inpoint"))
+                file->inpoint = dur;
+            else if (!strcmp(keyword, "outpoint"))
+                file->outpoint = dur;
+        } else if (!strcmp(keyword, "file_packet_metadata")) {
+            char *metadata;
+            if (!file) {
+                av_log(avf, AV_LOG_ERROR, "Line %d: %s without file\n",
+                       line, keyword);
+                FAIL(AVERROR_INVALIDDATA);
+            }
+            metadata = av_get_token((const char **)&cursor, SPACE_CHARS);
+            if (!metadata) {
+                av_log(avf, AV_LOG_ERROR, "Line %d: packet metadata required\n", line);
+                FAIL(AVERROR_INVALIDDATA);
+            }
+            if ((ret = av_dict_parse_string(&file->metadata, metadata, "=", "", 0)) < 0) {
+                av_log(avf, AV_LOG_ERROR, "Line %d: failed to parse metadata string\n", line);
+                av_freep(&metadata);
+                FAIL(AVERROR_INVALIDDATA);
+            }
+            av_freep(&metadata);
         } else if (!strcmp(keyword, "stream")) {
             if (!avformat_new_stream(avf, NULL))
                 FAIL(AVERROR(ENOMEM));
@@ -401,8 +451,11 @@ static int concat_read_header(AVFormatContext *avf)
             cat->files[i].start_time = time;
         else
             time = cat->files[i].start_time;
-        if (cat->files[i].duration == AV_NOPTS_VALUE)
-            break;
+        if (cat->files[i].duration == AV_NOPTS_VALUE) {
+            if (cat->files[i].inpoint == AV_NOPTS_VALUE || cat->files[i].outpoint == AV_NOPTS_VALUE)
+                break;
+            cat->files[i].duration = cat->files[i].outpoint - cat->files[i].inpoint;
+        }
         time += cat->files[i].duration;
     }
     if (i == cat->nb_files) {
@@ -427,10 +480,12 @@ static int open_next_file(AVFormatContext *avf)
     unsigned fileno = cat->cur_file - cat->files;
 
     if (cat->cur_file->duration == AV_NOPTS_VALUE)
-        cat->cur_file->duration = cat->avf->duration;
+        cat->cur_file->duration = cat->avf->duration - (cat->cur_file->file_inpoint - cat->cur_file->file_start_time);
 
-    if (++fileno >= cat->nb_files)
+    if (++fileno >= cat->nb_files) {
+        cat->eof = 1;
         return AVERROR_EOF;
+    }
     return open_file(avf, fileno);
 }
 
@@ -461,7 +516,7 @@ static int filter_packet(AVFormatContext *avf, ConcatStream *cs, AVPacket *pkt)
             ret = 1;
         }
         if (ret > 0) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             pkt2.buf = av_buffer_create(pkt2.data, pkt2.size,
                                         av_buffer_default_free, NULL, 0);
             if (!pkt2.buf) {
@@ -474,14 +529,27 @@ static int filter_packet(AVFormatContext *avf, ConcatStream *cs, AVPacket *pkt)
     return 0;
 }
 
+/* Returns true if the packet dts is greater or equal to the specified outpoint. */
+static int packet_after_outpoint(ConcatContext *cat, AVPacket *pkt)
+{
+    if (cat->cur_file->outpoint != AV_NOPTS_VALUE && pkt->dts != AV_NOPTS_VALUE) {
+        return av_compare_ts(pkt->dts, cat->avf->streams[pkt->stream_index]->time_base,
+                             cat->cur_file->outpoint, AV_TIME_BASE_Q) >= 0;
+    }
+    return 0;
+}
+
 static int concat_read_packet(AVFormatContext *avf, AVPacket *pkt)
 {
     ConcatContext *cat = avf->priv_data;
     int ret;
-    int64_t file_start_time, delta;
+    int64_t delta;
     ConcatStream *cs;
     AVStream *st;
 
+    if (cat->eof)
+        return AVERROR_EOF;
+
     if (!cat->avf)
         return AVERROR(EIO);
 
@@ -498,6 +566,12 @@ static int concat_read_packet(AVFormatContext *avf, AVPacket *pkt)
             av_packet_unref(pkt);
             return ret;
         }
+        if (packet_after_outpoint(cat, pkt)) {
+            av_packet_unref(pkt);
+            if ((ret = open_next_file(avf)) < 0)
+                return ret;
+            continue;
+        }
         cs = &cat->cur_file->streams[pkt->stream_index];
         if (cs->out_stream_index < 0) {
             av_packet_unref(pkt);
@@ -515,10 +589,7 @@ static int concat_read_packet(AVFormatContext *avf, AVPacket *pkt)
            av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, &st->time_base),
            av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, &st->time_base));
 
-    file_start_time = cat->avf->start_time;
-    if (file_start_time == AV_NOPTS_VALUE)
-        file_start_time = 0;
-    delta = av_rescale_q(cat->cur_file->start_time - file_start_time,
+    delta = av_rescale_q(cat->cur_file->start_time - cat->cur_file->file_inpoint,
                          AV_TIME_BASE_Q,
                          cat->avf->streams[pkt->stream_index]->time_base);
     if (pkt->pts != AV_NOPTS_VALUE)
@@ -528,6 +599,19 @@ static int concat_read_packet(AVFormatContext *avf, AVPacket *pkt)
     av_log(avf, AV_LOG_DEBUG, " -> pts:%s pts_time:%s dts:%s dts_time:%s\n",
            av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, &st->time_base),
            av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, &st->time_base));
+    if (cat->cur_file->metadata) {
+        uint8_t* metadata;
+        int metadata_len;
+        char* packed_metadata = av_packet_pack_dictionary(cat->cur_file->metadata, &metadata_len);
+        if (!packed_metadata)
+            return AVERROR(ENOMEM);
+        if (!(metadata = av_packet_new_side_data(pkt, AV_PKT_DATA_STRINGS_METADATA, metadata_len))) {
+            av_freep(&packed_metadata);
+            return AVERROR(ENOMEM);
+        }
+        memcpy(metadata, packed_metadata, metadata_len);
+        av_freep(&packed_metadata);
+    }
     return ret;
 }
 
@@ -545,7 +629,7 @@ static int try_seek(AVFormatContext *avf, int stream,
                     int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
 {
     ConcatContext *cat = avf->priv_data;
-    int64_t t0 = cat->cur_file->start_time - cat->avf->start_time;
+    int64_t t0 = cat->cur_file->start_time - cat->cur_file->file_inpoint;
 
     ts -= t0;
     min_ts = min_ts == INT64_MIN ? INT64_MIN : min_ts - t0;
@@ -616,6 +700,7 @@ static int concat_seek(AVFormatContext *avf, int stream,
         cat->cur_file = cur_file_saved;
     } else {
         avformat_close_input(&cur_avf_saved);
+        cat->eof = 0;
     }
     return ret;
 }
@@ -625,9 +710,11 @@ static int concat_seek(AVFormatContext *avf, int stream,
 
 static const AVOption options[] = {
     { "safe", "enable safe mode",
-      OFFSET(safe), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, DEC },
+      OFFSET(safe), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, DEC },
     { "auto_convert", "automatically convert bitstream format",
-      OFFSET(auto_convert), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, DEC },
+      OFFSET(auto_convert), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, DEC },
+    { "segment_time_metadata", "output file segment start time and duration as packet metadata",
+      OFFSET(segment_time_metadata), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DEC },
     { NULL }
 };
 
diff --git a/libavformat/crypto.c b/libavformat/crypto.c
index f56ee4eb..b1871fe9 100644
--- a/libavformat/crypto.c
+++ b/libavformat/crypto.c
@@ -136,8 +136,9 @@ static int crypto_open2(URLContext *h, const char *uri, int flags, AVDictionary
             goto err;
     }
 
-    if ((ret = ffurl_open(&c->hd, nested_url, flags,
-                          &h->interrupt_callback, options)) < 0) {
+    if ((ret = ffurl_open_whitelist(&c->hd, nested_url, flags,
+                                    &h->interrupt_callback, options,
+                                    h->protocol_whitelist)) < 0) {
         av_log(h, AV_LOG_ERROR, "Unable to open resource: %s\n", nested_url);
         goto err;
     }
diff --git a/libavformat/dashenc.c b/libavformat/dashenc.c
index 7a932147..1dd1a9d2 100644
--- a/libavformat/dashenc.c
+++ b/libavformat/dashenc.c
@@ -24,10 +24,12 @@
 #include <unistd.h>
 #endif
 
+#include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
+#include "libavutil/rational.h"
 #include "libavutil/time_internal.h"
 
 #include "avc.h"
@@ -94,6 +96,8 @@ typedef struct DASHContext {
     const char *single_file_name;
     const char *init_seg_name;
     const char *media_seg_name;
+    AVRational min_frame_rate, max_frame_rate;
+    int ambiguous_frame_rate;
 } DASHContext;
 
 static int dash_write(void *opaque, uint8_t *buf, int buf_size)
@@ -444,7 +448,7 @@ static int write_manifest(AVFormatContext *s, int final)
     AVDictionaryEntry *title = av_dict_get(s->metadata, "title", NULL, 0);
 
     snprintf(temp_filename, sizeof(temp_filename), "%s.tmp", s->filename);
-    ret = avio_open2(&out, temp_filename, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL);
+    ret = s->io_open(s, &out, temp_filename, AVIO_FLAG_WRITE, NULL);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Unable to open %s for writing\n", temp_filename);
         return ret;
@@ -503,7 +507,11 @@ static int write_manifest(AVFormatContext *s, int final)
     }
 
     if (c->has_video) {
-        avio_printf(out, "\t\t<AdaptationSet contentType=\"video\" segmentAlignment=\"true\" bitstreamSwitching=\"true\">\n");
+        avio_printf(out, "\t\t<AdaptationSet contentType=\"video\" segmentAlignment=\"true\" bitstreamSwitching=\"true\"");
+        if (c->max_frame_rate.num && !c->ambiguous_frame_rate)
+            avio_printf(out, " %s=\"%d/%d\"", (av_cmp_q(c->min_frame_rate, c->max_frame_rate) < 0) ? "maxFrameRate" : "frameRate", c->max_frame_rate.num, c->max_frame_rate.den);
+        avio_printf(out, ">\n");
+
         for (i = 0; i < s->nb_streams; i++) {
             AVStream *st = s->streams[i];
             OutputStream *os = &c->streams[i];
@@ -511,7 +519,11 @@ static int write_manifest(AVFormatContext *s, int final)
             if (st->codec->codec_type != AVMEDIA_TYPE_VIDEO)
                 continue;
 
-            avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\">\n", i, os->codec_str, os->bandwidth_str, st->codec->width, st->codec->height);
+            avio_printf(out, "\t\t\t<Representation id=\"%d\" mimeType=\"video/mp4\" codecs=\"%s\"%s width=\"%d\" height=\"%d\"", i, os->codec_str, os->bandwidth_str, st->codec->width, st->codec->height);
+            if (st->avg_frame_rate.num)
+                avio_printf(out, " frameRate=\"%d/%d\"", st->avg_frame_rate.num, st->avg_frame_rate.den);
+            avio_printf(out, ">\n");
+
             output_segment_list(&c->streams[i], out, c);
             avio_printf(out, "\t\t\t</Representation>\n");
         }
@@ -536,7 +548,7 @@ static int write_manifest(AVFormatContext *s, int final)
     avio_printf(out, "\t</Period>\n");
     avio_printf(out, "</MPD>\n");
     avio_flush(out);
-    avio_close(out);
+    ff_format_io_close(s, &out);
     return ff_rename(temp_filename, s->filename, s);
 }
 
@@ -552,6 +564,7 @@ static int dash_write_header(AVFormatContext *s)
         c->single_file = 1;
     if (c->single_file)
         c->use_template = 0;
+    c->ambiguous_frame_rate = 0;
 
     av_strlcpy(c->dirname, s->filename, sizeof(c->dirname));
     ptr = strrchr(c->dirname, '/');
@@ -610,6 +623,9 @@ static int dash_write_header(AVFormatContext *s)
         os->ctx = ctx;
         ctx->oformat = oformat;
         ctx->interrupt_callback = s->interrupt_callback;
+        ctx->opaque             = s->opaque;
+        ctx->io_close           = s->io_close;
+        ctx->io_open            = s->io_open;
 
         if (!(st = avformat_new_stream(ctx, NULL))) {
             ret = AVERROR(ENOMEM);
@@ -635,7 +651,7 @@ static int dash_write_header(AVFormatContext *s)
             dash_fill_tmpl_params(os->initfile, sizeof(os->initfile), c->init_seg_name, i, 0, os->bit_rate, 0);
         }
         snprintf(filename, sizeof(filename), "%s%s", c->dirname, os->initfile);
-        ret = ffurl_open(&os->out, filename, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL);
+        ret = ffurl_open_whitelist(&os->out, filename, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL, s->protocol_whitelist);
         if (ret < 0)
             goto fail;
         os->init_start_pos = 0;
@@ -655,10 +671,20 @@ static int dash_write_header(AVFormatContext *s)
         // already before being handed to this muxer, so we don't have mismatches
         // between the MPD and the actual segments.
         s->avoid_negative_ts = ctx->avoid_negative_ts;
-        if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO)
+        if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
+            AVRational avg_frame_rate = s->streams[i]->avg_frame_rate;
+            if (avg_frame_rate.num > 0) {
+                if (av_cmp_q(avg_frame_rate, c->min_frame_rate) < 0)
+                    c->min_frame_rate = avg_frame_rate;
+                if (av_cmp_q(c->max_frame_rate, avg_frame_rate) < 0)
+                    c->max_frame_rate = avg_frame_rate;
+            } else {
+                c->ambiguous_frame_rate = 1;
+            }
             c->has_video = 1;
-        else if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO)
+        } else if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
             c->has_audio = 1;
+        }
 
         set_codec_str(s, st->codec, os->codec_str, sizeof(os->codec_str));
         os->first_pts = AV_NOPTS_VALUE;
@@ -732,7 +758,7 @@ static void find_index_range(AVFormatContext *s, const char *full_path,
     URLContext *fd;
     int ret;
 
-    ret = ffurl_open(&fd, full_path, AVIO_FLAG_READ, &s->interrupt_callback, NULL);
+    ret = ffurl_open_whitelist(&fd, full_path, AVIO_FLAG_READ, &s->interrupt_callback, NULL, s->protocol_whitelist);
     if (ret < 0)
         return;
     if (ffurl_seek(fd, pos, SEEK_SET) != pos) {
@@ -815,7 +841,7 @@ static int dash_flush(AVFormatContext *s, int final, int stream)
             dash_fill_tmpl_params(filename, sizeof(filename), c->media_seg_name, i, os->segment_index, os->bit_rate, os->start_pts);
             snprintf(full_path, sizeof(full_path), "%s%s", c->dirname, filename);
             snprintf(temp_path, sizeof(temp_path), "%s.tmp", full_path);
-            ret = ffurl_open(&os->out, temp_path, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL);
+            ret = ffurl_open_whitelist(&os->out, temp_path, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL, s->protocol_whitelist);
             if (ret < 0)
                 break;
             write_styp(os->ctx->pb);
@@ -981,10 +1007,10 @@ static const AVOption options[] = {
     { "window_size", "number of segments kept in the manifest", OFFSET(window_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, E },
     { "extra_window_size", "number of segments kept outside of the manifest before removing from disk", OFFSET(extra_window_size), AV_OPT_TYPE_INT, { .i64 = 5 }, 0, INT_MAX, E },
     { "min_seg_duration", "minimum segment duration (in microseconds)", OFFSET(min_seg_duration), AV_OPT_TYPE_INT64, { .i64 = 5000000 }, 0, INT_MAX, E },
-    { "remove_at_exit", "remove all segments when finished", OFFSET(remove_at_exit), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, E },
-    { "use_template", "Use SegmentTemplate instead of SegmentList", OFFSET(use_template), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, E },
-    { "use_timeline", "Use SegmentTimeline in SegmentTemplate", OFFSET(use_timeline), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, E },
-    { "single_file", "Store all segments in one file, accessed using byte ranges", OFFSET(single_file), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, E },
+    { "remove_at_exit", "remove all segments when finished", OFFSET(remove_at_exit), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
+    { "use_template", "Use SegmentTemplate instead of SegmentList", OFFSET(use_template), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, E },
+    { "use_timeline", "Use SegmentTimeline in SegmentTemplate", OFFSET(use_timeline), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, E },
+    { "single_file", "Store all segments in one file, accessed using byte ranges", OFFSET(single_file), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
     { "single_file_name", "DASH-templated name to be used for baseURL. Implies storing all segments in one file, accessed using byte ranges", OFFSET(single_file_name), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
     { "init_seg_name", "DASH-templated name to used for the initialization segment", OFFSET(init_seg_name), AV_OPT_TYPE_STRING, {.str = "init-stream$RepresentationID$.m4s"}, 0, 0, E },
     { "media_seg_name", "DASH-templated name to used for the media segments", OFFSET(media_seg_name), AV_OPT_TYPE_STRING, {.str = "chunk-stream$RepresentationID$-$Number%05d$.m4s"}, 0, 0, E },
diff --git a/libavformat/dcstr.c b/libavformat/dcstr.c
new file mode 100644
index 00000000..2ae61dec
--- /dev/null
+++ b/libavformat/dcstr.c
@@ -0,0 +1,81 @@
+/*
+ * DC STR demuxer
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "internal.h"
+
+static int dcstr_probe(AVProbeData *p)
+{
+    if (p->buf_size < 224 || memcmp(p->buf + 213, "Sega Stream", 11))
+        return 0;
+
+    return AVPROBE_SCORE_MAX;
+}
+
+static int dcstr_read_header(AVFormatContext *s)
+{
+    unsigned codec, align;
+    AVStream *st;
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
+    st->codec->channels    = avio_rl32(s->pb);
+    st->codec->sample_rate = avio_rl32(s->pb);
+    codec                  = avio_rl32(s->pb);
+    align                  = avio_rl32(s->pb);
+    avio_skip(s->pb, 4);
+    st->duration           = avio_rl32(s->pb);
+    st->codec->channels   *= avio_rl32(s->pb);
+    if (!align || align > INT_MAX / st->codec->channels)
+        return AVERROR_INVALIDDATA;
+    st->codec->block_align = align * st->codec->channels;
+
+    switch (codec) {
+    case  4: st->codec->codec_id = AV_CODEC_ID_ADPCM_AICA;       break;
+    case 16: st->codec->codec_id = AV_CODEC_ID_PCM_S16LE_PLANAR; break;
+    default: avpriv_request_sample(s, "codec %X", codec);
+             return AVERROR_PATCHWELCOME;
+    }
+
+    avio_skip(s->pb, 0x800 - avio_tell(s->pb));
+    avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
+
+    return 0;
+}
+
+static int dcstr_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    AVCodecContext *codec = s->streams[0]->codec;
+    return av_get_packet(s->pb, pkt, codec->block_align);
+}
+
+AVInputFormat ff_dcstr_demuxer = {
+    .name           = "dcstr",
+    .long_name      = NULL_IF_CONFIG_SMALL("Sega DC STR"),
+    .read_probe     = dcstr_probe,
+    .read_header    = dcstr_read_header,
+    .read_packet    = dcstr_read_packet,
+    .extensions     = "str",
+    .flags          = AVFMT_GENERIC_INDEX | AVFMT_NO_BYTE_SEEK | AVFMT_NOBINSEARCH,
+};
diff --git a/libavformat/dfa.c b/libavformat/dfa.c
index 450bc212..b16672cc 100644
--- a/libavformat/dfa.c
+++ b/libavformat/dfa.c
@@ -93,7 +93,7 @@ static int dfa_read_packet(AVFormatContext *s, AVPacket *pkt)
         if (!first) {
             ret = av_append_packet(pb, pkt, 12);
             if (ret < 0) {
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 return ret;
             }
         } else
@@ -101,6 +101,7 @@ static int dfa_read_packet(AVFormatContext *s, AVPacket *pkt)
         frame_size = AV_RL32(pkt->data + pkt->size - 8);
         if (frame_size > INT_MAX - 4) {
             av_log(s, AV_LOG_ERROR, "Too large chunk size: %"PRIu32"\n", frame_size);
+            av_packet_unref(pkt);
             return AVERROR(EIO);
         }
         if (AV_RL32(pkt->data + pkt->size - 12) == MKTAG('E', 'O', 'F', 'R')) {
@@ -114,7 +115,7 @@ static int dfa_read_packet(AVFormatContext *s, AVPacket *pkt)
         }
         ret = av_append_packet(pb, pkt, frame_size);
         if (ret < 0) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return ret;
         }
     }
diff --git a/libavformat/dnxhddec.c b/libavformat/dnxhddec.c
index 910e6b66..48c890d1 100644
--- a/libavformat/dnxhddec.c
+++ b/libavformat/dnxhddec.c
@@ -23,21 +23,22 @@
 #include "libavutil/intreadwrite.h"
 #include "avformat.h"
 #include "rawdec.h"
+#include "libavcodec/dnxhddata.h"
 
 static int dnxhd_probe(AVProbeData *p)
 {
-    static const uint8_t header[] = {0x00,0x00,0x02,0x80,0x01};
     int w, h, compression_id;
     if (p->buf_size < 0x2c)
         return 0;
-    if (memcmp(p->buf, header, 5))
+    if (avpriv_dnxhd_parse_header_prefix(p->buf) == 0)
         return 0;
     h = AV_RB16(p->buf + 0x18);
     w = AV_RB16(p->buf + 0x1a);
     if (!w || !h)
         return 0;
     compression_id = AV_RB32(p->buf + 0x28);
-    if (compression_id < 1235 || compression_id > 1258)
+    if ((compression_id < 1235 || compression_id > 1260) &&
+        (compression_id < 1270 || compression_id > 1274))
         return 0;
     return AVPROBE_SCORE_MAX;
 }
diff --git a/libavformat/dsicin.c b/libavformat/dsicin.c
index 4b5a934c..6ba8c28c 100644
--- a/libavformat/dsicin.c
+++ b/libavformat/dsicin.c
@@ -200,7 +200,7 @@ static int cin_read_packet(AVFormatContext *s, AVPacket *pkt)
 
         ret = avio_read(pb, &pkt->data[4], pkt_size);
         if (ret < 0) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return ret;
         }
         if (ret < pkt_size)
diff --git a/libavformat/dss.c b/libavformat/dss.c
index ead0ee00..bf7a1a4e 100644
--- a/libavformat/dss.c
+++ b/libavformat/dss.c
@@ -267,7 +267,7 @@ static int dss_sp_read_packet(AVFormatContext *s, AVPacket *pkt)
     return pkt->size;
 
 error_eof:
-    av_free_packet(pkt);
+    av_packet_unref(pkt);
     return ret < 0 ? ret : AVERROR_EOF;
 }
 
@@ -309,7 +309,7 @@ static int dss_723_1_read_packet(AVFormatContext *s, AVPacket *pkt)
         ret = avio_read(s->pb, pkt->data + offset,
                         size2 - offset);
         if (ret < size2 - offset) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return ret < 0 ? ret : AVERROR_EOF;
         }
 
@@ -319,7 +319,7 @@ static int dss_723_1_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     ret = avio_read(s->pb, pkt->data + offset, size - offset);
     if (ret < size - offset) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return ret < 0 ? ret : AVERROR_EOF;
     }
 
diff --git a/libavformat/dtsdec.c b/libavformat/dtsdec.c
index da0fb61a..ef283916 100644
--- a/libavformat/dtsdec.c
+++ b/libavformat/dtsdec.c
@@ -34,7 +34,7 @@ static int dts_probe(AVProbeData *p)
     int markers[4*16] = {0};
     int sum, max, i;
     int64_t diff = 0;
-    uint8_t hdr[12 + FF_INPUT_BUFFER_PADDING_SIZE] = { 0 };
+    uint8_t hdr[12 + AV_INPUT_BUFFER_PADDING_SIZE] = { 0 };
 
     buf = p->buf + FFMIN(4096, p->buf_size);
 
diff --git a/libavformat/dump.c b/libavformat/dump.c
index 6355b99f..eaf87fe0 100644
--- a/libavformat/dump.c
+++ b/libavformat/dump.c
@@ -377,6 +377,9 @@ static void dump_sidedata(void *ctx, AVStream *st, const char *indent)
             av_log(ctx, AV_LOG_INFO, "audio service type: ");
             dump_audioservicetype(ctx, &sd);
             break;
+        case AV_PKT_DATA_QUALITY_STATS:
+            av_log(ctx, AV_LOG_INFO, "quality factor: %d, pict_type: %c", AV_RL32(sd.data), av_get_picture_type_char(sd.data[4]));
+            break;
         default:
             av_log(ctx, AV_LOG_WARNING,
                    "unknown side data type %d (%d bytes)", sd.type, sd.size);
@@ -420,8 +423,8 @@ static void dump_stream_format(AVFormatContext *ic, int i,
         av_cmp_q(st->sample_aspect_ratio, st->codec->sample_aspect_ratio)) {
         AVRational display_aspect_ratio;
         av_reduce(&display_aspect_ratio.num, &display_aspect_ratio.den,
-                  st->codec->width  * st->sample_aspect_ratio.num,
-                  st->codec->height * st->sample_aspect_ratio.den,
+                  st->codec->width  * (int64_t)st->sample_aspect_ratio.num,
+                  st->codec->height * (int64_t)st->sample_aspect_ratio.den,
                   1024 * 1024);
         av_log(NULL, AV_LOG_INFO, ", SAR %d:%d DAR %d:%d",
                st->sample_aspect_ratio.num, st->sample_aspect_ratio.den,
@@ -493,7 +496,7 @@ void av_dump_format(AVFormatContext *ic, int index,
         av_log(NULL, AV_LOG_INFO, "  Duration: ");
         if (ic->duration != AV_NOPTS_VALUE) {
             int hours, mins, secs, us;
-            int64_t duration = ic->duration + 5000;
+            int64_t duration = ic->duration + (ic->duration <= INT64_MAX - 5000 ? 5000 : 0);
             secs  = duration / AV_TIME_BASE;
             us    = duration % AV_TIME_BASE;
             mins  = secs / 60;
@@ -509,13 +512,13 @@ void av_dump_format(AVFormatContext *ic, int index,
             int secs, us;
             av_log(NULL, AV_LOG_INFO, ", start: ");
             secs = ic->start_time / AV_TIME_BASE;
-            us   = abs(ic->start_time % AV_TIME_BASE);
+            us   = llabs(ic->start_time % AV_TIME_BASE);
             av_log(NULL, AV_LOG_INFO, "%d.%06d",
                    secs, (int) av_rescale(us, 1000000, AV_TIME_BASE));
         }
         av_log(NULL, AV_LOG_INFO, ", bitrate: ");
         if (ic->bit_rate)
-            av_log(NULL, AV_LOG_INFO, "%d kb/s", ic->bit_rate / 1000);
+            av_log(NULL, AV_LOG_INFO, "%"PRId64" kb/s", (int64_t)ic->bit_rate / 1000);
         else
             av_log(NULL, AV_LOG_INFO, "N/A");
         av_log(NULL, AV_LOG_INFO, "\n");
diff --git a/libavformat/dv.c b/libavformat/dv.c
index 85002289..84c30611 100644
--- a/libavformat/dv.c
+++ b/libavformat/dv.c
@@ -553,12 +553,17 @@ static int dv_read_packet(AVFormatContext *s, AVPacket *pkt)
     size = avpriv_dv_get_packet(c->dv_demux, pkt);
 
     if (size < 0) {
+        int ret;
         int64_t pos = avio_tell(s->pb);
         if (!c->dv_demux->sys)
             return AVERROR(EIO);
         size = c->dv_demux->sys->frame_size;
-        if (avio_read(s->pb, c->buf, size) <= 0)
+        ret = avio_read(s->pb, c->buf, size);
+        if (ret < 0) {
+            return ret;
+        } else if (ret == 0) {
             return AVERROR(EIO);
+        }
 
         size = avpriv_dv_produce_packet(c->dv_demux, pkt, c->buf, size, pos);
     }
diff --git a/libavformat/dvenc.c b/libavformat/dvenc.c
index e99ac3ce..6cefe8be 100644
--- a/libavformat/dvenc.c
+++ b/libavformat/dvenc.c
@@ -250,6 +250,11 @@ static int dv_assemble_frame(DVMuxContext *c, AVStream* st,
         /* FIXME: we have to have more sensible approach than this one */
         if (c->has_video)
             av_log(st->codec, AV_LOG_ERROR, "Can't process DV frame #%d. Insufficient audio data or severe sync problem.\n", c->frames);
+        if (data_size != c->sys->frame_size) {
+            av_log(st->codec, AV_LOG_ERROR, "Unexpected frame size, %d != %d\n",
+                   data_size, c->sys->frame_size);
+            return AVERROR(ENOSYS);
+        }
 
         memcpy(*frame, data, c->sys->frame_size);
         c->has_video = 1;
@@ -297,7 +302,6 @@ static DVMuxContext* dv_init_mux(AVFormatContext* s)
 {
     DVMuxContext *c = s->priv_data;
     AVStream *vst = NULL;
-    AVDictionaryEntry *t;
     int i;
 
     /* we support at most 1 video and 2 audio streams */
@@ -358,8 +362,7 @@ static DVMuxContext* dv_init_mux(AVFormatContext* s)
     c->frames     = 0;
     c->has_audio  = 0;
     c->has_video  = 0;
-    if (t = av_dict_get(s->metadata, "creation_time", NULL, 0))
-        c->start_time = ff_iso8601_to_unix_time(t->value);
+    ff_parse_creation_time_metadata(s, &c->start_time, 1);
 
     for (i=0; i < c->n_ast; i++) {
         if (c->ast[i] && !(c->audio_data[i]=av_fifo_alloc_array(100, MAX_AUDIO_FRAME_SIZE))) {
diff --git a/libavformat/dxa.c b/libavformat/dxa.c
index 44033563..1a5822aa 100644
--- a/libavformat/dxa.c
+++ b/libavformat/dxa.c
@@ -106,7 +106,7 @@ static int dxa_read_header(AVFormatContext *s)
         ast = avformat_new_stream(s, NULL);
         if (!ast)
             return AVERROR(ENOMEM);
-        ret = ff_get_wav_header(pb, ast->codec, fsize, 0);
+        ret = ff_get_wav_header(s, pb, ast->codec, fsize, 0);
         if (ret < 0)
             return ret;
         if (ast->codec->sample_rate > 0)
@@ -207,7 +207,7 @@ static int dxa_read_packet(AVFormatContext *s, AVPacket *pkt)
             memcpy(pkt->data + pal_size, buf, DXA_EXTRA_SIZE);
             ret = avio_read(s->pb, pkt->data + DXA_EXTRA_SIZE + pal_size, size);
             if(ret != size){
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 return AVERROR(EIO);
             }
             if(pal_size) memcpy(pkt->data, pal, pal_size);
diff --git a/libavformat/electronicarts.c b/libavformat/electronicarts.c
index d6a396b1..8601782a 100644
--- a/libavformat/electronicarts.c
+++ b/libavformat/electronicarts.c
@@ -59,17 +59,25 @@
 #define MVhd_TAG MKTAG('M', 'V', 'h', 'd')
 #define MV0K_TAG MKTAG('M', 'V', '0', 'K')
 #define MV0F_TAG MKTAG('M', 'V', '0', 'F')
+#define AVhd_TAG MKTAG('A', 'V', 'h', 'd')
+#define AV0K_TAG MKTAG('A', 'V', '0', 'K')
+#define AV0F_TAG MKTAG('A', 'V', '0', 'F')
 #define MVIh_TAG MKTAG('M', 'V', 'I', 'h')  /* CMV header */
 #define MVIf_TAG MKTAG('M', 'V', 'I', 'f')  /* CMV I-frame */
+#define AVP6_TAG MKTAG('A', 'V', 'P', '6')
 
-typedef struct EaDemuxContext {
-    int big_endian;
-
-    enum AVCodecID video_codec;
+typedef struct VideoProperties {
+    enum AVCodecID codec;
     AVRational time_base;
     int width, height;
     int nb_frames;
-    int video_stream_index;
+    int stream_index;
+} VideoProperties;
+
+typedef struct EaDemuxContext {
+    int big_endian;
+
+    VideoProperties video, alpha;
 
     enum AVCodecID audio_codec;
     int audio_stream_index;
@@ -78,6 +86,8 @@ typedef struct EaDemuxContext {
     int sample_rate;
     int num_channels;
     int num_samples;
+
+    int platform;
 } EaDemuxContext;
 
 static uint32_t read_arbitrary(AVIOContext *pb)
@@ -229,6 +239,7 @@ static int process_audio_header_elements(AVFormatContext *s)
                 return 0;
             }
             break;
+        case 15:
         case 16:
             ea->audio_codec = AV_CODEC_ID_MP3;
             break;
@@ -247,6 +258,8 @@ static int process_audio_header_elements(AVFormatContext *s)
         return 0;
     }
 
+    if (ea->audio_codec == AV_CODEC_ID_NONE && ea->platform == 0x01)
+        ea->audio_codec = AV_CODEC_ID_ADPCM_PSX;
     if (ea->sample_rate == -1)
         ea->sample_rate = revision == 3 ? 48000 : 22050;
 
@@ -301,46 +314,43 @@ static void process_audio_header_sead(AVFormatContext *s)
     ea->audio_codec  = AV_CODEC_ID_ADPCM_IMA_EA_SEAD;
 }
 
-static void process_video_header_mdec(AVFormatContext *s)
+static void process_video_header_mdec(AVFormatContext *s, VideoProperties *video)
 {
-    EaDemuxContext *ea = s->priv_data;
     AVIOContext *pb    = s->pb;
     avio_skip(pb, 4);
-    ea->width       = avio_rl16(pb);
-    ea->height      = avio_rl16(pb);
-    ea->time_base   = (AVRational) { 1, 15 };
-    ea->video_codec = AV_CODEC_ID_MDEC;
+    video->width       = avio_rl16(pb);
+    video->height      = avio_rl16(pb);
+    video->time_base   = (AVRational) { 1, 15 };
+    video->codec = AV_CODEC_ID_MDEC;
 }
 
-static int process_video_header_vp6(AVFormatContext *s)
+static int process_video_header_vp6(AVFormatContext *s, VideoProperties *video)
 {
-    EaDemuxContext *ea = s->priv_data;
-    AVIOContext *pb    = s->pb;
+    AVIOContext *pb = s->pb;
 
     avio_skip(pb, 8);
-    ea->nb_frames = avio_rl32(pb);
+    video->nb_frames = avio_rl32(pb);
     avio_skip(pb, 4);
-    ea->time_base.den = avio_rl32(pb);
-    ea->time_base.num = avio_rl32(pb);
-    if (ea->time_base.den <= 0 || ea->time_base.num <= 0) {
+    video->time_base.den = avio_rl32(pb);
+    video->time_base.num = avio_rl32(pb);
+    if (video->time_base.den <= 0 || video->time_base.num <= 0) {
         av_log(s, AV_LOG_ERROR, "Timebase is invalid\n");
         return AVERROR_INVALIDDATA;
     }
-    ea->video_codec   = AV_CODEC_ID_VP6;
+    video->codec   = AV_CODEC_ID_VP6;
 
     return 1;
 }
 
-static void process_video_header_cmv(AVFormatContext *s)
+static void process_video_header_cmv(AVFormatContext *s, VideoProperties *video)
 {
-    EaDemuxContext *ea = s->priv_data;
     int fps;
 
     avio_skip(s->pb, 10);
     fps = avio_rl16(s->pb);
     if (fps)
-        ea->time_base = (AVRational) { 1, fps };
-    ea->video_codec = AV_CODEC_ID_CMV;
+        video->time_base = (AVRational) { 1, fps };
+    video->codec = AV_CODEC_ID_CMV;
 }
 
 /* Process EA file header.
@@ -352,7 +362,7 @@ static int process_ea_header(AVFormatContext *s)
     AVIOContext *pb    = s->pb;
     int i;
 
-    for (i = 0; i < 5 && (!ea->audio_codec || !ea->video_codec); i++) {
+    for (i = 0; i < 5 && (!ea->audio_codec || !ea->video.codec); i++) {
         uint64_t startpos     = avio_tell(pb);
         int err               = 0;
 
@@ -382,10 +392,10 @@ static int process_ea_header(AVFormatContext *s)
             blockid = avio_rl32(pb);
             if (blockid == GSTR_TAG) {
                 avio_skip(pb, 4);
-            } else if ((blockid & 0xFFFF) != PT00_TAG) {
-                avpriv_request_sample(s, "unknown SCHl headerid");
-                return 0;
+            } else if ((blockid & 0xFF) != (PT00_TAG & 0xFF)) {
+                blockid = avio_rl32(pb);
             }
+            ea->platform = (blockid >> 16) & 0xFF;
             err = process_audio_header_elements(s);
             break;
 
@@ -394,40 +404,44 @@ static int process_ea_header(AVFormatContext *s)
             break;
 
         case MVIh_TAG:
-            process_video_header_cmv(s);
+            process_video_header_cmv(s, &ea->video);
             break;
 
         case kVGT_TAG:
-            ea->video_codec = AV_CODEC_ID_TGV;
+            ea->video.codec = AV_CODEC_ID_TGV;
             break;
 
         case mTCD_TAG:
-            process_video_header_mdec(s);
+            process_video_header_mdec(s, &ea->video);
             break;
 
         case MPCh_TAG:
-            ea->video_codec = AV_CODEC_ID_MPEG2VIDEO;
+            ea->video.codec = AV_CODEC_ID_MPEG2VIDEO;
             break;
 
         case pQGT_TAG:
         case TGQs_TAG:
-            ea->video_codec = AV_CODEC_ID_TGQ;
-            ea->time_base   = (AVRational) { 1, 15 };
+            ea->video.codec = AV_CODEC_ID_TGQ;
+            ea->video.time_base   = (AVRational) { 1, 15 };
             break;
 
         case pIQT_TAG:
-            ea->video_codec = AV_CODEC_ID_TQI;
-            ea->time_base   = (AVRational) { 1, 15 };
+            ea->video.codec = AV_CODEC_ID_TQI;
+            ea->video.time_base   = (AVRational) { 1, 15 };
             break;
 
         case MADk_TAG:
-            ea->video_codec = AV_CODEC_ID_MAD;
+            ea->video.codec = AV_CODEC_ID_MAD;
             avio_skip(pb, 6);
-            ea->time_base = (AVRational) { avio_rl16(pb), 1000 };
+            ea->video.time_base = (AVRational) { avio_rl16(pb), 1000 };
             break;
 
         case MVhd_TAG:
-            err = process_video_header_vp6(s);
+            err = process_video_header_vp6(s, &ea->video);
+            break;
+
+        case AVhd_TAG:
+            err = process_video_header_vp6(s, &ea->alpha);
             break;
         }
 
@@ -458,6 +472,7 @@ static int ea_probe(AVProbeData *p)
     case MPCh_TAG:
     case MVhd_TAG:
     case MVIh_TAG:
+    case AVP6_TAG:
         break;
     default:
         return 0;
@@ -472,6 +487,34 @@ static int ea_probe(AVProbeData *p)
     return AVPROBE_SCORE_MAX;
 }
 
+static int init_video_stream(AVFormatContext *s, VideoProperties *video)
+{
+    AVStream *st;
+
+    if (!video->codec)
+        return 0;
+
+    /* initialize the video decoder stream */
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    video->stream_index = st->index;
+    st->codec->codec_type  = AVMEDIA_TYPE_VIDEO;
+    st->codec->codec_id    = video->codec;
+    // parsing is necessary to make FFmpeg generate correct timestamps
+    if (st->codec->codec_id == AV_CODEC_ID_MPEG2VIDEO)
+        st->need_parsing = AVSTREAM_PARSE_HEADERS;
+    st->codec->codec_tag   = 0; /* no fourcc */
+    st->codec->width       = video->width;
+    st->codec->height      = video->height;
+    st->duration           = st->nb_frames = video->nb_frames;
+    if (video->time_base.num)
+        avpriv_set_pts_info(st, 64, video->time_base.num, video->time_base.den);
+    st->r_frame_rate       =
+    st->avg_frame_rate     = av_inv_q(video->time_base);
+    return 0;
+}
+
 static int ea_read_header(AVFormatContext *s)
 {
     EaDemuxContext *ea = s->priv_data;
@@ -480,26 +523,8 @@ static int ea_read_header(AVFormatContext *s)
     if (process_ea_header(s)<=0)
         return AVERROR(EIO);
 
-    if (ea->video_codec) {
-        /* initialize the video decoder stream */
-        st = avformat_new_stream(s, NULL);
-        if (!st)
-            return AVERROR(ENOMEM);
-        ea->video_stream_index = st->index;
-        st->codec->codec_type  = AVMEDIA_TYPE_VIDEO;
-        st->codec->codec_id    = ea->video_codec;
-        // parsing is necessary to make FFmpeg generate correct timestamps
-        if (st->codec->codec_id == AV_CODEC_ID_MPEG2VIDEO)
-            st->need_parsing = AVSTREAM_PARSE_HEADERS;
-        st->codec->codec_tag   = 0; /* no fourcc */
-        st->codec->width       = ea->width;
-        st->codec->height      = ea->height;
-        st->duration           = st->nb_frames = ea->nb_frames;
-        if (ea->time_base.num)
-            avpriv_set_pts_info(st, 64, ea->time_base.num, ea->time_base.den);
-        st->r_frame_rate       =
-        st->avg_frame_rate     = av_inv_q(ea->time_base);
-    }
+    if (init_video_stream(s, &ea->video) || init_video_stream(s, &ea->alpha))
+        return AVERROR(ENOMEM);
 
     if (ea->audio_codec) {
         if (ea->num_channels <= 0 || ea->num_channels > 2) {
@@ -580,11 +605,14 @@ static int ea_read_packet(AVFormatContext *s, AVPacket *pkt)
                 num_samples = avio_rl32(pb);
                 avio_skip(pb, 8);
                 chunk_size -= 12;
+            } else if (ea->audio_codec == AV_CODEC_ID_ADPCM_PSX) {
+                avio_skip(pb, 8);
+                chunk_size -= 8;
             }
 
             if (partial_packet) {
                 avpriv_request_sample(s, "video header followed by audio packet");
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 partial_packet = 0;
             }
 
@@ -604,7 +632,7 @@ static int ea_read_packet(AVFormatContext *s, AVPacket *pkt)
             case AV_CODEC_ID_ADPCM_EA_R3:
                 if (pkt->size < 4) {
                     av_log(s, AV_LOG_ERROR, "Packet is too short\n");
-                    av_free_packet(pkt);
+                    av_packet_unref(pkt);
                     return AVERROR_INVALIDDATA;
                 }
                 if (ea->audio_codec == AV_CODEC_ID_ADPCM_EA_R3)
@@ -619,6 +647,9 @@ static int ea_read_packet(AVFormatContext *s, AVPacket *pkt)
             case AV_CODEC_ID_MP3:
                 pkt->duration = num_samples;
                 break;
+            case AV_CODEC_ID_ADPCM_PSX:
+                pkt->duration = chunk_size / (16 * ea->num_channels) * 28;
+                break;
             default:
                 pkt->duration = chunk_size / (ea->bytes * ea->num_channels);
             }
@@ -632,7 +663,19 @@ static int ea_read_packet(AVFormatContext *s, AVPacket *pkt)
         case SCEl_TAG:
         case SEND_TAG:
         case SEEN_TAG:
-            ret         = AVERROR(EIO);
+            while (!avio_feof(pb)) {
+                int tag = avio_rl32(pb);
+
+                if (tag == ISNh_TAG ||
+                    tag == SCHl_TAG ||
+                    tag == SEAD_TAG ||
+                    tag == SHEN_TAG) {
+                    avio_skip(pb, -4);
+                    break;
+                }
+            }
+            if (avio_feof(pb))
+                ret = AVERROR_EOF;
             packet_read = 1;
             break;
 
@@ -659,10 +702,12 @@ static int ea_read_packet(AVFormatContext *s, AVPacket *pkt)
             goto get_video_packet;
 
         case MV0K_TAG:
+        case AV0K_TAG:
         case MPCh_TAG:
         case pIQT_TAG:
             key = AV_PKT_FLAG_KEY;
         case MV0F_TAG:
+        case AV0F_TAG:
 get_video_packet:
             if (!chunk_size)
                 continue;
@@ -676,7 +721,10 @@ static int ea_read_packet(AVFormatContext *s, AVPacket *pkt)
                 break;
             }
             partial_packet = chunk_type == MVIh_TAG;
-            pkt->stream_index = ea->video_stream_index;
+            if (chunk_type == AV0K_TAG || chunk_type == AV0F_TAG)
+                pkt->stream_index = ea->alpha.stream_index;
+            else
+                pkt->stream_index = ea->video.stream_index;
             pkt->flags       |= key;
             packet_read       = 1;
             break;
@@ -688,7 +736,7 @@ static int ea_read_packet(AVFormatContext *s, AVPacket *pkt)
     }
 
     if (ret < 0 && partial_packet)
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
     return ret;
 }
 
diff --git a/libavformat/ffm.h b/libavformat/ffm.h
index b392b8d8..c445f472 100644
--- a/libavformat/ffm.h
+++ b/libavformat/ffm.h
@@ -42,6 +42,7 @@ enum {
 };
 
 typedef struct FFMContext {
+    const AVClass *class;
     /* only reading mode */
     int64_t write_index, file_size;
     int read_state;
@@ -55,6 +56,7 @@ typedef struct FFMContext {
     uint8_t *packet_ptr, *packet_end;
     uint8_t packet[FFM_PACKET_SIZE];
     int64_t start_time;
+    int server_attached;
 } FFMContext;
 
 #endif /* AVFORMAT_FFM_H */
diff --git a/libavformat/ffmdec.c b/libavformat/ffmdec.c
index 9b50c9f5..257319bd 100644
--- a/libavformat/ffmdec.c
+++ b/libavformat/ffmdec.c
@@ -21,6 +21,7 @@
 
 #include <stdint.h>
 
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/intfloat.h"
 #include "libavutil/opt.h"
@@ -35,7 +36,7 @@ static int ffm_is_avail_data(AVFormatContext *s, int size)
 {
     FFMContext *ffm = s->priv_data;
     int64_t pos, avail_size;
-    int len;
+    ptrdiff_t len;
 
     len = ffm->packet_end - ffm->packet_ptr;
     if (size <= len)
@@ -48,7 +49,10 @@ static int ffm_is_avail_data(AVFormatContext *s, int size)
     } else {
     if (pos == ffm->write_index) {
         /* exactly at the end of stream */
-        return AVERROR(EAGAIN);
+        if (ffm->server_attached)
+            return AVERROR(EAGAIN);
+        else
+            return AVERROR_INVALIDDATA;
     } else if (pos < ffm->write_index) {
         avail_size = ffm->write_index - pos;
     } else {
@@ -58,11 +62,13 @@ static int ffm_is_avail_data(AVFormatContext *s, int size)
     avail_size = (avail_size / ffm->packet_size) * (ffm->packet_size - FFM_HEADER_SIZE) + len;
     if (size <= avail_size)
         return 1;
-    else
+    else if (ffm->server_attached)
         return AVERROR(EAGAIN);
+    else
+        return AVERROR_INVALIDDATA;
 }
 
-static int ffm_resync(AVFormatContext *s, int state)
+static int ffm_resync(AVFormatContext *s, uint32_t state)
 {
     av_log(s, AV_LOG_ERROR, "resyncing\n");
     while (state != PACKET_ID) {
@@ -81,7 +87,9 @@ static int ffm_read_data(AVFormatContext *s,
 {
     FFMContext *ffm = s->priv_data;
     AVIOContext *pb = s->pb;
-    int len, fill_size, size1, frame_offset, id;
+    int fill_size, size1, frame_offset;
+    uint32_t id;
+    ptrdiff_t len;
     int64_t last_pos = -1;
 
     size1 = size;
@@ -93,8 +101,12 @@ static int ffm_read_data(AVFormatContext *s,
         if (len > size)
             len = size;
         if (len == 0) {
-            if (avio_tell(pb) == ffm->file_size)
-                avio_seek(pb, ffm->packet_size, SEEK_SET);
+            if (avio_tell(pb) == ffm->file_size) {
+                if (ffm->server_attached) {
+                    avio_seek(pb, ffm->packet_size, SEEK_SET);
+                } else
+                    return AVERROR_EOF;
+            }
     retry_read:
             if (pb->buffer_size != ffm->packet_size) {
                 int64_t tell = avio_tell(pb);
@@ -113,9 +125,10 @@ static int ffm_read_data(AVFormatContext *s,
             ffm->dts = avio_rb64(pb);
             frame_offset = avio_rb16(pb);
             avio_read(pb, ffm->packet, ffm->packet_size - FFM_HEADER_SIZE);
-            ffm->packet_end = ffm->packet + (ffm->packet_size - FFM_HEADER_SIZE - fill_size);
-            if (ffm->packet_end < ffm->packet || frame_offset < 0)
+            if (ffm->packet_size < FFM_HEADER_SIZE + fill_size || frame_offset < 0) {
                 return -1;
+            }
+            ffm->packet_end = ffm->packet + (ffm->packet_size - FFM_HEADER_SIZE - fill_size);
             /* if first packet or resynchronization packet, we must
                handle it specifically */
             if (ffm->first_packet || (frame_offset & 0x8000)) {
@@ -131,8 +144,10 @@ static int ffm_read_data(AVFormatContext *s,
                     return 0;
                 }
                 ffm->first_packet = 0;
-                if ((frame_offset & 0x7fff) < FFM_HEADER_SIZE)
+                if ((frame_offset & 0x7fff) < FFM_HEADER_SIZE) {
+                    ffm->packet_end = ffm->packet_ptr;
                     return -1;
+                }
                 ffm->packet_ptr = ffm->packet + (frame_offset & 0x7fff) - FFM_HEADER_SIZE;
                 if (!header)
                     break;
@@ -150,7 +165,7 @@ static int ffm_read_data(AVFormatContext *s,
     return size1 - size;
 }
 
-/* ensure that acutal seeking happens between FFM_PACKET_SIZE
+/* ensure that actual seeking happens between FFM_PACKET_SIZE
    and file_size - FFM_PACKET_SIZE */
 static int64_t ffm_seek1(AVFormatContext *s, int64_t pos1)
 {
@@ -160,7 +175,7 @@ static int64_t ffm_seek1(AVFormatContext *s, int64_t pos1)
 
     pos = FFMIN(pos1, ffm->file_size - FFM_PACKET_SIZE);
     pos = FFMAX(pos, FFM_PACKET_SIZE);
-    av_dlog(s, "seek to %"PRIx64" -> %"PRIx64"\n", pos1, pos);
+    ff_dlog(s, "seek to %"PRIx64" -> %"PRIx64"\n", pos1, pos);
     return avio_seek(pb, pos, SEEK_SET);
 }
 
@@ -172,7 +187,7 @@ static int64_t get_dts(AVFormatContext *s, int64_t pos)
     ffm_seek1(s, pos);
     avio_skip(pb, 4);
     dts = avio_rb64(pb);
-    av_dlog(s, "dts=%0.6f\n", dts / 1000000.0);
+    ff_dlog(s, "dts=%0.6f\n", dts / 1000000.0);
     return dts;
 }
 
@@ -267,6 +282,7 @@ static int ffm2_read_header(AVFormatContext *s)
     AVStream *st;
     AVIOContext *pb = s->pb;
     AVCodecContext *codec;
+    const AVCodecDescriptor *codec_desc;
     int ret;
     int f_main = 0, f_cprv = -1, f_stvi = -1, f_stau = -1;
     AVCodec *enc;
@@ -321,12 +337,25 @@ static int ffm2_read_header(AVFormatContext *s)
             codec = st->codec;
             /* generic info */
             codec->codec_id = avio_rb32(pb);
+            codec_desc = avcodec_descriptor_get(codec->codec_id);
+            if (!codec_desc) {
+                av_log(s, AV_LOG_ERROR, "Invalid codec id: %d\n", codec->codec_id);
+                codec->codec_id = AV_CODEC_ID_NONE;
+                goto fail;
+            }
             codec->codec_type = avio_r8(pb);
+            if (codec->codec_type != codec_desc->type) {
+                av_log(s, AV_LOG_ERROR, "Codec type mismatch: expected %d, found %d\n",
+                       codec_desc->type, codec->codec_type);
+                codec->codec_id = AV_CODEC_ID_NONE;
+                codec->codec_type = AVMEDIA_TYPE_UNKNOWN;
+                goto fail;
+            }
             codec->bit_rate = avio_rb32(pb);
             codec->flags = avio_rb32(pb);
             codec->flags2 = avio_rb32(pb);
             codec->debug = avio_rb32(pb);
-            if (codec->flags & CODEC_FLAG_GLOBAL_HEADER) {
+            if (codec->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
                 if (ff_get_extradata(codec, pb, avio_rb32(pb)) < 0)
                     return AVERROR(ENOMEM);
             }
@@ -414,7 +443,7 @@ static int ffm2_read_header(AVFormatContext *s)
             }
             break;
         case MKBETAG('S', '2', 'V', 'I'):
-            if (f_stvi++) {
+            if (f_stvi++ || !size) {
                 ret = AVERROR(EINVAL);
                 goto fail;
             }
@@ -429,7 +458,7 @@ static int ffm2_read_header(AVFormatContext *s)
                 goto fail;
             break;
         case MKBETAG('S', '2', 'A', 'U'):
-            if (f_stau++) {
+            if (f_stau++ || !size) {
                 ret = AVERROR(EINVAL);
                 goto fail;
             }
@@ -470,6 +499,7 @@ static int ffm_read_header(AVFormatContext *s)
     AVStream *st;
     AVIOContext *pb = s->pb;
     AVCodecContext *codec;
+    const AVCodecDescriptor *codec_desc;
     int i, nb_streams;
     uint32_t tag;
 
@@ -507,7 +537,20 @@ static int ffm_read_header(AVFormatContext *s)
         codec = st->codec;
         /* generic info */
         codec->codec_id = avio_rb32(pb);
+        codec_desc = avcodec_descriptor_get(codec->codec_id);
+        if (!codec_desc) {
+            av_log(s, AV_LOG_ERROR, "Invalid codec id: %d\n", codec->codec_id);
+            codec->codec_id = AV_CODEC_ID_NONE;
+            goto fail;
+        }
         codec->codec_type = avio_r8(pb); /* codec_type */
+        if (codec->codec_type != codec_desc->type) {
+            av_log(s, AV_LOG_ERROR, "Codec type mismatch: expected %d, found %d\n",
+                   codec_desc->type, codec->codec_type);
+            codec->codec_id = AV_CODEC_ID_NONE;
+            codec->codec_type = AVMEDIA_TYPE_UNKNOWN;
+            goto fail;
+        }
         codec->bit_rate = avio_rb32(pb);
         codec->flags = avio_rb32(pb);
         codec->flags2 = avio_rb32(pb);
@@ -573,7 +616,7 @@ static int ffm_read_header(AVFormatContext *s)
         default:
             goto fail;
         }
-        if (codec->flags & CODEC_FLAG_GLOBAL_HEADER) {
+        if (codec->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
             if (ff_get_extradata(codec, pb, avio_rb32(pb)) < 0)
                 return AVERROR(ENOMEM);
         }
@@ -608,7 +651,7 @@ static int ffm_read_packet(AVFormatContext *s, AVPacket *pkt)
         if ((ret = ffm_is_avail_data(s, FRAME_HEADER_SIZE+4)) < 0)
             return ret;
 
-        av_dlog(s, "pos=%08"PRIx64" spos=%"PRIx64", write_index=%"PRIx64" size=%"PRIx64"\n",
+        ff_dlog(s, "pos=%08"PRIx64" spos=%"PRIx64", write_index=%"PRIx64" size=%"PRIx64"\n",
                avio_tell(s->pb), s->pb->pos, ffm->write_index, ffm->file_size);
         if (ffm_read_data(s, ffm->header, FRAME_HEADER_SIZE, 1) !=
             FRAME_HEADER_SIZE)
@@ -631,7 +674,7 @@ static int ffm_read_packet(AVFormatContext *s, AVPacket *pkt)
         pkt->stream_index = ffm->header[0];
         if ((unsigned)pkt->stream_index >= s->nb_streams) {
             av_log(s, AV_LOG_ERROR, "invalid stream index %d\n", pkt->stream_index);
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             ffm->read_state = READ_HEADER;
             return -1;
         }
@@ -642,7 +685,7 @@ static int ffm_read_packet(AVFormatContext *s, AVPacket *pkt)
         ffm->read_state = READ_HEADER;
         if (ffm_read_data(s, pkt->data, size, 0) != size) {
             /* bad case: desynchronized packet. we cancel all the packet loading */
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return -1;
         }
         pkt->pts = AV_RB64(ffm->header+8);
@@ -666,7 +709,7 @@ static int ffm_seek(AVFormatContext *s, int stream_index, int64_t wanted_pts, in
     int64_t pts_min, pts_max, pts;
     double pos1;
 
-    av_dlog(s, "wanted_pts=%0.6f\n", wanted_pts / 1000000.0);
+    ff_dlog(s, "wanted_pts=%0.6f\n", wanted_pts / 1000000.0);
     /* find the position using linear interpolation (better than
        dichotomy in typical cases) */
     if (ffm->write_index && ffm->write_index < ffm->file_size) {
@@ -730,6 +773,19 @@ static int ffm_probe(AVProbeData *p)
     return 0;
 }
 
+static const AVOption options[] = {
+    {"server_attached", NULL, offsetof(FFMContext, server_attached), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_EXPORT },
+    {"ffm_write_index", NULL, offsetof(FFMContext, write_index), AV_OPT_TYPE_INT64, {.i64 = 0}, 0, INT64_MAX, AV_OPT_FLAG_EXPORT },
+    {"ffm_file_size", NULL, offsetof(FFMContext, file_size), AV_OPT_TYPE_INT64, {.i64 = 0}, 0, INT64_MAX, AV_OPT_FLAG_EXPORT },
+    { NULL },
+};
+
+static const AVClass ffm_class = {
+    .class_name = "ffm demuxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 AVInputFormat ff_ffm_demuxer = {
     .name           = "ffm",
     .long_name      = NULL_IF_CONFIG_SMALL("FFM (FFserver live feed)"),
@@ -739,4 +795,5 @@ AVInputFormat ff_ffm_demuxer = {
     .read_packet    = ffm_read_packet,
     .read_close     = ffm_close,
     .read_seek      = ffm_seek,
+    .priv_class     = &ffm_class,
 };
diff --git a/libavformat/ffmenc.c b/libavformat/ffmenc.c
index 3abbfdd0..0f23b796 100644
--- a/libavformat/ffmenc.c
+++ b/libavformat/ffmenc.c
@@ -221,17 +221,13 @@ static int ffm_write_recommended_config(AVIOContext *pb, AVCodecContext *ctx, un
 static int ffm_write_header(AVFormatContext *s)
 {
     FFMContext *ffm = s->priv_data;
-    AVDictionaryEntry *t;
     AVStream *st;
     AVIOContext *pb = s->pb;
     AVCodecContext *codec;
     int bit_rate, i, ret;
 
-    if (t = av_dict_get(s->metadata, "creation_time", NULL, 0)) {
-        ret = av_parse_time(&ffm->start_time, t->value, 0);
-        if (ret < 0)
-            return ret;
-    }
+    if ((ret = ff_parse_creation_time_metadata(s, &ffm->start_time, 0)) < 0)
+        return ret;
 
     ffm->packet_size = FFM_PACKET_SIZE;
 
@@ -268,7 +264,7 @@ static int ffm_write_header(AVFormatContext *s)
         avio_wb32(pb, codec->flags);
         avio_wb32(pb, codec->flags2);
         avio_wb32(pb, codec->debug);
-        if (codec->flags & CODEC_FLAG_GLOBAL_HEADER) {
+        if (codec->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
             avio_wb32(pb, codec->extradata_size);
             avio_write(pb, codec->extradata, codec->extradata_size);
         }
diff --git a/libavformat/file.c b/libavformat/file.c
index 6511328d..544647f5 100644
--- a/libavformat/file.c
+++ b/libavformat/file.c
@@ -23,6 +23,9 @@
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "avformat.h"
+#if HAVE_DIRENT_H
+#include <dirent.h>
+#endif
 #include <fcntl.h>
 #if HAVE_IO_H
 #include <io.h>
@@ -44,6 +47,24 @@
 #  endif
 #endif
 
+/* Not available in POSIX.1-1996 */
+#ifndef S_ISLNK
+#  ifdef S_IFLNK
+#    define S_ISLNK(m) (((m) & S_IFLNK) == S_IFLNK)
+#  else
+#    define S_ISLNK(m) 0
+#  endif
+#endif
+
+/* Not available in POSIX.1-1996 */
+#ifndef S_ISSOCK
+#  ifdef S_IFSOCK
+#    define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK)
+#  else
+#    define S_ISSOCK(m) 0
+#  endif
+#endif
+
 /* standard file protocol */
 
 typedef struct FileContext {
@@ -51,10 +72,13 @@ typedef struct FileContext {
     int fd;
     int trunc;
     int blocksize;
+#if HAVE_DIRENT_H
+    DIR *dir;
+#endif
 } FileContext;
 
 static const AVOption file_options[] = {
-    { "truncate", "truncate existing files on write", offsetof(FileContext, trunc), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
+    { "truncate", "truncate existing files on write", offsetof(FileContext, trunc), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
     { "blocksize", "set I/O operation maximum block size", offsetof(FileContext, blocksize), AV_OPT_TYPE_INT, { .i64 = INT_MAX }, 1, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
     { NULL }
 };
@@ -81,19 +105,19 @@ static const AVClass pipe_class = {
 static int file_read(URLContext *h, unsigned char *buf, int size)
 {
     FileContext *c = h->priv_data;
-    int r;
+    int ret;
     size = FFMIN(size, c->blocksize);
-    r = read(c->fd, buf, size);
-    return (-1 == r)?AVERROR(errno):r;
+    ret = read(c->fd, buf, size);
+    return (ret == -1) ? AVERROR(errno) : ret;
 }
 
 static int file_write(URLContext *h, const unsigned char *buf, int size)
 {
     FileContext *c = h->priv_data;
-    int r;
+    int ret;
     size = FFMIN(size, c->blocksize);
-    r = write(c->fd, buf, size);
-    return (-1 == r)?AVERROR(errno):r;
+    ret = write(c->fd, buf, size);
+    return (ret == -1) ? AVERROR(errno) : ret;
 }
 
 static int file_get_handle(URLContext *h)
@@ -131,6 +155,42 @@ static int file_check(URLContext *h, int mask)
     return ret;
 }
 
+static int file_delete(URLContext *h)
+{
+#if HAVE_UNISTD_H
+    int ret;
+    const char *filename = h->filename;
+    av_strstart(filename, "file:", &filename);
+
+    ret = rmdir(filename);
+    if (ret < 0 && errno == ENOTDIR)
+        ret = unlink(filename);
+    if (ret < 0)
+        return AVERROR(errno);
+
+    return ret;
+#else
+    return AVERROR(ENOSYS);
+#endif /* HAVE_UNISTD_H */
+}
+
+static int file_move(URLContext *h_src, URLContext *h_dst)
+{
+#if HAVE_UNISTD_H
+    const char *filename_src = h_src->filename;
+    const char *filename_dst = h_dst->filename;
+    av_strstart(filename_src, "file:", &filename_src);
+    av_strstart(filename_dst, "file:", &filename_dst);
+
+    if (rename(filename_src, filename_dst) < 0)
+        return AVERROR(errno);
+
+    return 0;
+#else
+    return AVERROR(ENOSYS);
+#endif /* HAVE_UNISTD_H */
+}
+
 #if CONFIG_FILE_PROTOCOL
 
 static int file_open(URLContext *h, const char *filename, int flags)
@@ -189,6 +249,90 @@ static int file_close(URLContext *h)
     return close(c->fd);
 }
 
+static int file_open_dir(URLContext *h)
+{
+#if HAVE_LSTAT
+    FileContext *c = h->priv_data;
+
+    c->dir = opendir(h->filename);
+    if (!c->dir)
+        return AVERROR(errno);
+
+    return 0;
+#else
+    return AVERROR(ENOSYS);
+#endif /* HAVE_LSTAT */
+}
+
+static int file_read_dir(URLContext *h, AVIODirEntry **next)
+{
+#if HAVE_LSTAT
+    FileContext *c = h->priv_data;
+    struct dirent *dir;
+    char *fullpath = NULL;
+
+    *next = ff_alloc_dir_entry();
+    if (!*next)
+        return AVERROR(ENOMEM);
+    do {
+        errno = 0;
+        dir = readdir(c->dir);
+        if (!dir) {
+            av_freep(next);
+            return AVERROR(errno);
+        }
+    } while (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."));
+
+    fullpath = av_append_path_component(h->filename, dir->d_name);
+    if (fullpath) {
+        struct stat st;
+        if (!lstat(fullpath, &st)) {
+            if (S_ISDIR(st.st_mode))
+                (*next)->type = AVIO_ENTRY_DIRECTORY;
+            else if (S_ISFIFO(st.st_mode))
+                (*next)->type = AVIO_ENTRY_NAMED_PIPE;
+            else if (S_ISCHR(st.st_mode))
+                (*next)->type = AVIO_ENTRY_CHARACTER_DEVICE;
+            else if (S_ISBLK(st.st_mode))
+                (*next)->type = AVIO_ENTRY_BLOCK_DEVICE;
+            else if (S_ISLNK(st.st_mode))
+                (*next)->type = AVIO_ENTRY_SYMBOLIC_LINK;
+            else if (S_ISSOCK(st.st_mode))
+                (*next)->type = AVIO_ENTRY_SOCKET;
+            else if (S_ISREG(st.st_mode))
+                (*next)->type = AVIO_ENTRY_FILE;
+            else
+                (*next)->type = AVIO_ENTRY_UNKNOWN;
+
+            (*next)->group_id = st.st_gid;
+            (*next)->user_id = st.st_uid;
+            (*next)->size = st.st_size;
+            (*next)->filemode = st.st_mode & 0777;
+            (*next)->modification_timestamp = INT64_C(1000000) * st.st_mtime;
+            (*next)->access_timestamp =  INT64_C(1000000) * st.st_atime;
+            (*next)->status_change_timestamp = INT64_C(1000000) * st.st_ctime;
+        }
+        av_free(fullpath);
+    }
+
+    (*next)->name = av_strdup(dir->d_name);
+    return 0;
+#else
+    return AVERROR(ENOSYS);
+#endif /* HAVE_LSTAT */
+}
+
+static int file_close_dir(URLContext *h)
+{
+#if HAVE_LSTAT
+    FileContext *c = h->priv_data;
+    closedir(c->dir);
+    return 0;
+#else
+    return AVERROR(ENOSYS);
+#endif /* HAVE_LSTAT */
+}
+
 URLProtocol ff_file_protocol = {
     .name                = "file",
     .url_open            = file_open,
@@ -198,8 +342,14 @@ URLProtocol ff_file_protocol = {
     .url_close           = file_close,
     .url_get_file_handle = file_get_handle,
     .url_check           = file_check,
+    .url_delete          = file_delete,
+    .url_move            = file_move,
     .priv_data_size      = sizeof(FileContext),
     .priv_data_class     = &file_class,
+    .url_open_dir        = file_open_dir,
+    .url_read_dir        = file_read_dir,
+    .url_close_dir       = file_close_dir,
+    .default_whitelist   = "file,crypto"
 };
 
 #endif /* CONFIG_FILE_PROTOCOL */
@@ -238,6 +388,7 @@ URLProtocol ff_pipe_protocol = {
     .url_check           = file_check,
     .priv_data_size      = sizeof(FileContext),
     .priv_data_class     = &pipe_class,
+    .default_whitelist   = "crypto"
 };
 
 #endif /* CONFIG_PIPE_PROTOCOL */
diff --git a/libavformat/flac_picture.c b/libavformat/flac_picture.c
index 669fd2e7..7bd98258 100644
--- a/libavformat/flac_picture.c
+++ b/libavformat/flac_picture.c
@@ -108,10 +108,10 @@ int ff_flac_parse_picture(AVFormatContext *s, uint8_t *buf, int buf_size)
             ret = AVERROR_INVALIDDATA;
         goto fail;
     }
-    if (!(data = av_buffer_alloc(len + FF_INPUT_BUFFER_PADDING_SIZE))) {
+    if (!(data = av_buffer_alloc(len + AV_INPUT_BUFFER_PADDING_SIZE))) {
         RETURN_ERROR(AVERROR(ENOMEM));
     }
-    memset(data->data + len, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(data->data + len, 0, AV_INPUT_BUFFER_PADDING_SIZE);
     if (avio_read(pb, data->data, len) != len) {
         av_log(s, AV_LOG_ERROR, "Error reading attached picture data.\n");
         if (s->error_recognition & AV_EF_EXPLODE)
diff --git a/libavformat/flacdec.c b/libavformat/flacdec.c
index 4207fd2b..eb92216c 100644
--- a/libavformat/flacdec.c
+++ b/libavformat/flacdec.c
@@ -28,11 +28,27 @@
 #include "vorbiscomment.h"
 #include "replaygain.h"
 
+#define SEEKPOINT_SIZE 18
+
+typedef struct FLACDecContext {
+    int found_seektable;
+} FLACDecContext;
+
+static void reset_index_position(int64_t metadata_head_size, AVStream *st)
+{
+    /* the real seek index offset should be the size of metadata blocks with the offset in the frame blocks */
+    int i;
+    for(i=0; i<st->nb_index_entries; i++) {
+        st->index_entries[i].pos += metadata_head_size;
+    }
+}
+
 static int flac_read_header(AVFormatContext *s)
 {
     int ret, metadata_last=0, metadata_type, metadata_size, found_streaminfo=0;
     uint8_t header[4];
     uint8_t *buffer=NULL;
+    FLACDecContext *flac = s->priv_data;
     AVStream *st = avformat_new_stream(s, NULL);
     if (!st)
         return AVERROR(ENOMEM);
@@ -58,7 +74,8 @@ static int flac_read_header(AVFormatContext *s)
         case FLAC_METADATA_TYPE_CUESHEET:
         case FLAC_METADATA_TYPE_PICTURE:
         case FLAC_METADATA_TYPE_VORBIS_COMMENT:
-            buffer = av_mallocz(metadata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+        case FLAC_METADATA_TYPE_SEEKTABLE:
+            buffer = av_mallocz(metadata_size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!buffer) {
                 return AVERROR(ENOMEM);
             }
@@ -132,7 +149,23 @@ static int flac_read_header(AVFormatContext *s)
                 av_log(s, AV_LOG_ERROR, "Error parsing attached picture.\n");
                 return ret;
             }
-        } else {
+        } else if (metadata_type == FLAC_METADATA_TYPE_SEEKTABLE) {
+            const uint8_t *seekpoint = buffer;
+            int i, seek_point_count = metadata_size/SEEKPOINT_SIZE;
+            flac->found_seektable = 1;
+            if ((s->flags&AVFMT_FLAG_FAST_SEEK)) {
+                for(i=0; i<seek_point_count; i++) {
+                    int64_t timestamp = bytestream_get_be64(&seekpoint);
+                    int64_t pos = bytestream_get_be64(&seekpoint);
+                    /* skip number of samples */
+                    bytestream_get_be16(&seekpoint);
+                    av_add_index_entry(st, pos, timestamp, 0, 0, AVINDEX_KEYFRAME);
+                }
+            }
+            av_freep(&buffer);
+        }
+        else {
+
             /* STREAMINFO must be the first block */
             if (!found_streaminfo) {
                 RETURN_ERROR(AVERROR_INVALIDDATA);
@@ -169,6 +202,7 @@ static int flac_read_header(AVFormatContext *s)
     if (ret < 0)
         return ret;
 
+    reset_index_position(avio_tell(s->pb), st);
     return 0;
 
 fail:
@@ -229,11 +263,11 @@ static av_unused int64_t flac_read_timestamp(AVFormatContext *s, int stream_inde
                 break;
         }
         av_init_packet(&out_pkt);
-        ret = av_parser_parse2(parser, st->codec,
-                               &out_pkt.data, &out_pkt.size, pkt.data, pkt.size,
-                               pkt.pts, pkt.dts, *ppos);
+        av_parser_parse2(parser, st->codec,
+                         &out_pkt.data, &out_pkt.size, pkt.data, pkt.size,
+                         pkt.pts, pkt.dts, *ppos);
 
-        av_free_packet(&pkt);
+        av_packet_unref(&pkt);
         if (out_pkt.size){
             int size = out_pkt.size;
             if (parser->pts != AV_NOPTS_VALUE){
@@ -249,14 +283,38 @@ static av_unused int64_t flac_read_timestamp(AVFormatContext *s, int stream_inde
     return pts;
 }
 
+static int flac_seek(AVFormatContext *s, int stream_index, int64_t timestamp, int flags) {
+    int index;
+    int64_t pos;
+    AVIndexEntry e;
+    FLACDecContext *flac = s->priv_data;
+
+    if (!flac->found_seektable || !(s->flags&AVFMT_FLAG_FAST_SEEK)) {
+        return -1;
+    }
+
+    index = av_index_search_timestamp(s->streams[0], timestamp, flags);
+    if(index<0 || index >= s->streams[0]->nb_index_entries)
+        return -1;
+
+    e = s->streams[0]->index_entries[index];
+    pos = avio_seek(s->pb, e.pos, SEEK_SET);
+    if (pos >= 0) {
+        return 0;
+    }
+    return -1;
+}
+
 AVInputFormat ff_flac_demuxer = {
     .name           = "flac",
     .long_name      = NULL_IF_CONFIG_SMALL("raw FLAC"),
     .read_probe     = flac_probe,
     .read_header    = flac_read_header,
     .read_packet    = ff_raw_read_partial_packet,
+    .read_seek      = flac_seek,
     .read_timestamp = flac_read_timestamp,
     .flags          = AVFMT_GENERIC_INDEX,
     .extensions     = "flac",
     .raw_codec_id   = AV_CODEC_ID_FLAC,
+    .priv_data_size = sizeof(FLACDecContext),
 };
diff --git a/libavformat/flacenc.c b/libavformat/flacenc.c
index a45f83ec..321af50e 100644
--- a/libavformat/flacenc.c
+++ b/libavformat/flacenc.c
@@ -187,7 +187,7 @@ static int flac_write_packet(struct AVFormatContext *s, AVPacket *pkt)
 }
 
 static const AVOption flacenc_options[] = {
-    { "write_header", "Write the file header", offsetof(FlacMuxerContext, write_header), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
+    { "write_header", "Write the file header", offsetof(FlacMuxerContext, write_header), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
     { NULL },
 };
 
diff --git a/libavformat/flic.c b/libavformat/flic.c
index bef70c18..30de3515 100644
--- a/libavformat/flic.c
+++ b/libavformat/flic.c
@@ -227,7 +227,7 @@ static int flic_read_packet(AVFormatContext *s,
             ret = avio_read(pb, pkt->data + FLIC_PREAMBLE_SIZE,
                 size - FLIC_PREAMBLE_SIZE);
             if (ret != size - FLIC_PREAMBLE_SIZE) {
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 ret = AVERROR(EIO);
             }
             packet_read = 1;
@@ -245,7 +245,7 @@ static int flic_read_packet(AVFormatContext *s,
             ret = avio_read(pb, pkt->data, size);
 
             if (ret != size) {
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 ret = AVERROR(EIO);
             }
 
diff --git a/libavformat/flv.h b/libavformat/flv.h
index db9468f4..df5ce3d1 100644
--- a/libavformat/flv.h
+++ b/libavformat/flv.h
@@ -56,7 +56,7 @@ enum {
     FLV_HEADER_FLAG_HASAUDIO = 4,
 };
 
-enum {
+enum FlvTagType {
     FLV_TAG_TYPE_AUDIO = 0x08,
     FLV_TAG_TYPE_VIDEO = 0x09,
     FLV_TAG_TYPE_META  = 0x12,
diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
index 940d4dda..5090ac21 100644
--- a/libavformat/flvdec.c
+++ b/libavformat/flvdec.c
@@ -39,6 +39,8 @@
 
 #define VALIDATE_INDEX_TS_THRESH 2500
 
+#define RESYNC_BUFFER_SIZE (1<<20)
+
 typedef struct FLVContext {
     const AVClass *class; ///< Class for private options.
     int trust_metadata;   ///< configure streams according onMetaData
@@ -54,6 +56,11 @@ typedef struct FLVContext {
     int validate_next;
     int validate_count;
     int searched_for_end;
+
+    uint8_t resync_buffer[2*RESYNC_BUFFER_SIZE];
+
+    int broken_sizes;
+    int sum_flv_tag_size;
 } FLVContext;
 
 static int probe(AVProbeData *p, int live)
@@ -433,6 +440,8 @@ static int amf_parse_object(AVFormatContext *s, AVStream *astream,
     case AMF_DATA_TYPE_UNSUPPORTED:
         break;     // these take up no additional space
     case AMF_DATA_TYPE_MIXEDARRAY:
+    {
+        unsigned v;
         avio_skip(ioc, 4);     // skip 32-bit max array index
         while (avio_tell(ioc) < max_pos - 2 &&
                amf_get_string(ioc, str_val, sizeof(str_val)) > 0)
@@ -441,11 +450,13 @@ static int amf_parse_object(AVFormatContext *s, AVStream *astream,
             if (amf_parse_object(s, astream, vstream, str_val, max_pos,
                                  depth + 1) < 0)
                 return -1;
-        if (avio_r8(ioc) != AMF_END_OF_OBJECT) {
-            av_log(s, AV_LOG_ERROR, "Missing AMF_END_OF_OBJECT in AMF_DATA_TYPE_MIXEDARRAY\n");
+        v = avio_r8(ioc);
+        if (v != AMF_END_OF_OBJECT) {
+            av_log(s, AV_LOG_ERROR, "Missing AMF_END_OF_OBJECT in AMF_DATA_TYPE_MIXEDARRAY, found %d\n", v);
             return -1;
         }
         break;
+    }
     case AMF_DATA_TYPE_ARRAY:
     {
         unsigned int arraylen, i;
@@ -508,6 +519,17 @@ static int amf_parse_object(AVFormatContext *s, AVStream *astream,
                     }
                 }
             }
+            if (amf_type == AMF_DATA_TYPE_STRING) {
+                if (!strcmp(key, "encoder")) {
+                    int version = -1;
+                    if (1 == sscanf(str_val, "Open Broadcaster Software v0.%d", &version)) {
+                        if (version > 0 && version <= 655)
+                            flv->broken_sizes = 1;
+                    }
+                } else if (!strcmp(key, "metadatacreator") && !strcmp(str_val, "MEGA")) {
+                    flv->broken_sizes = 1;
+                }
+            }
         }
 
         if (amf_type == AMF_DATA_TYPE_OBJECT && s->nb_streams == 1 &&
@@ -608,6 +630,7 @@ static int flv_read_metabody(AVFormatContext *s, int64_t next_pos)
 static int flv_read_header(AVFormatContext *s)
 {
     int offset, flags;
+    FLVContext *flv = s->priv_data;
 
     avio_skip(s->pb, 4);
     flags = avio_r8(s->pb);
@@ -628,6 +651,7 @@ static int flv_read_header(AVFormatContext *s)
     avio_skip(s->pb, 4);
 
     s->start_time = 0;
+    flv->sum_flv_tag_size = 0;
 
     return 0;
 }
@@ -654,7 +678,7 @@ static int flv_queue_extradata(FLVContext *flv, AVIOContext *pb, int stream,
 {
     av_free(flv->new_extradata[stream]);
     flv->new_extradata[stream] = av_mallocz(size +
-                                            FF_INPUT_BUFFER_PADDING_SIZE);
+                                            AV_INPUT_BUFFER_PADDING_SIZE);
     if (!flv->new_extradata[stream])
         return AVERROR(ENOMEM);
     flv->new_extradata_size[stream] = size;
@@ -786,25 +810,60 @@ static int flv_data_packet(AVFormatContext *s, AVPacket *pkt,
     return ret;
 }
 
+static int resync(AVFormatContext *s)
+{
+    FLVContext *flv = s->priv_data;
+    int64_t i;
+    int64_t pos = avio_tell(s->pb);
+
+    for (i=0; !avio_feof(s->pb); i++) {
+        int j  = i & (RESYNC_BUFFER_SIZE-1);
+        int j1 = j + RESYNC_BUFFER_SIZE;
+        flv->resync_buffer[j ] =
+        flv->resync_buffer[j1] = avio_r8(s->pb);
+
+        if (i > 22) {
+            unsigned lsize2 = AV_RB32(flv->resync_buffer + j1 - 4);
+            if (lsize2 >= 11 && lsize2 + 8LL < FFMIN(i, RESYNC_BUFFER_SIZE)) {
+                unsigned  size2 = AV_RB24(flv->resync_buffer + j1 - lsize2 + 1 - 4);
+                unsigned lsize1 = AV_RB32(flv->resync_buffer + j1 - lsize2 - 8);
+                if (lsize1 >= 11 && lsize1 + 8LL + lsize2 < FFMIN(i, RESYNC_BUFFER_SIZE)) {
+                    unsigned  size1 = AV_RB24(flv->resync_buffer + j1 - lsize1 + 1 - lsize2 - 8);
+                    if (size1 == lsize1 - 11 && size2  == lsize2 - 11) {
+                        avio_seek(s->pb, pos + i - lsize1 - lsize2 - 8, SEEK_SET);
+                        return 1;
+                    }
+                }
+            }
+        }
+    }
+    return AVERROR_EOF;
+}
+
 static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     FLVContext *flv = s->priv_data;
-    int ret, i, type, size, flags;
+    int ret, i, size, flags;
+    enum FlvTagType type;
     int stream_type=-1;
     int64_t next, pos, meta_pos;
     int64_t dts, pts = AV_NOPTS_VALUE;
     int av_uninit(channels);
     int av_uninit(sample_rate);
     AVStream *st    = NULL;
+    int last = -1;
+    int orig_size;
 
+retry:
     /* pkt size is repeated at end. skip it */
-    for (;; avio_skip(s->pb, 4)) {
         pos  = avio_tell(s->pb);
         type = (avio_r8(s->pb) & 0x1F);
+        orig_size =
         size = avio_rb24(s->pb);
+        flv->sum_flv_tag_size += size + 11;
         dts  = avio_rb24(s->pb);
-        dts |= avio_r8(s->pb) << 24;
-        av_log(s, AV_LOG_TRACE, "type:%d, size:%d, dts:%"PRId64" pos:%"PRId64"\n", type, size, dts, avio_tell(s->pb));
+        dts |= (unsigned)avio_r8(s->pb) << 24;
+        av_log(s, AV_LOG_TRACE, "type:%d, size:%d, last:%d, dts:%"PRId64" pos:%"PRId64"\n", type, size, last, dts, avio_tell(s->pb));
         if (avio_feof(s->pb))
             return AVERROR_EOF;
         avio_skip(s->pb, 3); /* stream id, always 0 */
@@ -826,8 +885,10 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
             }
         }
 
-        if (size == 0)
-            continue;
+        if (size == 0) {
+            ret = FFERROR_REDO;
+            goto leave;
+        }
 
         next = size + avio_tell(s->pb);
 
@@ -848,6 +909,13 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
                 meta_pos = avio_tell(s->pb);
                 type = flv_read_metabody(s, next);
                 if (type == 0 && dts == 0 || type < 0 || type == TYPE_UNKNOWN) {
+                    if (type < 0 && flv->validate_count &&
+                        flv->validate_index[0].pos     > next &&
+                        flv->validate_index[0].pos - 4 < next
+                    ) {
+                        av_log(s, AV_LOG_WARNING, "Adjusting next position due to index mismatch\n");
+                        next = flv->validate_index[0].pos - 4;
+                    }
                     goto skip;
                 } else if (type == TYPE_ONTEXTDATA) {
                     avpriv_request_sample(s, "OnTextData packet");
@@ -863,12 +931,15 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
                    type, size, flags);
 skip:
             avio_seek(s->pb, next, SEEK_SET);
-            continue;
+            ret = FFERROR_REDO;
+            goto leave;
         }
 
         /* skip empty data packets */
-        if (!size)
-            continue;
+        if (!size) {
+            ret = FFERROR_REDO;
+            goto leave;
+        }
 
         /* now find stream */
         for (i = 0; i < s->nb_streams; i++) {
@@ -888,7 +959,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
         }
         if (i == s->nb_streams) {
             static const enum AVMediaType stream_types[] = {AVMEDIA_TYPE_VIDEO, AVMEDIA_TYPE_AUDIO, AVMEDIA_TYPE_SUBTITLE};
-            av_log(s, AV_LOG_WARNING, "Stream discovered after head already parsed\n");
+            av_log(s, AV_LOG_WARNING, "%s stream discovered after head already parsed\n", av_get_media_type_string(stream_types[stream_type]));
             st = create_stream(s, stream_types[stream_type]);
             if (!st)
                 return AVERROR(ENOMEM);
@@ -896,8 +967,9 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
         }
         av_log(s, AV_LOG_TRACE, "%d %X %d \n", stream_type, flags, st->discard);
 
-        if ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY ||
-            stream_type == FLV_STREAM_TYPE_AUDIO)
+        if (s->pb->seekable &&
+            ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY ||
+              stream_type == FLV_STREAM_TYPE_AUDIO))
             av_add_index_entry(st, pos, dts, size, 0, AVINDEX_KEYFRAME);
 
         if (  (st->discard >= AVDISCARD_NONKEY && !((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY || (stream_type == FLV_STREAM_TYPE_AUDIO)))
@@ -905,14 +977,14 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
             || st->discard >= AVDISCARD_ALL
         ) {
             avio_seek(s->pb, next, SEEK_SET);
-            continue;
+            ret = FFERROR_REDO;
+            goto leave;
         }
-        break;
-    }
 
     // if not streamed and no duration from metadata then seek to end to find
     // the duration from the timestamps
-    if (s->pb->seekable && (!s->duration || s->duration == AV_NOPTS_VALUE) && !flv->searched_for_end) {
+    if (s->pb->seekable && (!s->duration || s->duration == AV_NOPTS_VALUE) &&
+        !flv->searched_for_end) {
         int size;
         const int64_t pos   = avio_tell(s->pb);
         // Read the last 4 bytes of the file, this should be the size of the
@@ -921,17 +993,19 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
 retry_duration:
         avio_seek(s->pb, fsize - 4, SEEK_SET);
         size = avio_rb32(s->pb);
-        // Seek to the start of the last FLV tag at position (fsize - 4 - size)
-        // but skip the byte indicating the type.
-        avio_seek(s->pb, fsize - 3 - size, SEEK_SET);
-        if (size == avio_rb24(s->pb) + 11) {
-            uint32_t ts = avio_rb24(s->pb);
-            ts         |= avio_r8(s->pb) << 24;
-            if (ts)
-                s->duration = ts * (int64_t)AV_TIME_BASE / 1000;
-            else if (fsize >= 8 && fsize - 8 >= size) {
-                fsize -= size+4;
-                goto retry_duration;
+        if (size > 0 && size < fsize) {
+            // Seek to the start of the last FLV tag at position (fsize - 4 - size)
+            // but skip the byte indicating the type.
+            avio_seek(s->pb, fsize - 3 - size, SEEK_SET);
+            if (size == avio_rb24(s->pb) + 11) {
+                uint32_t ts = avio_rb24(s->pb);
+                ts         |= avio_r8(s->pb) << 24;
+                if (ts)
+                    s->duration = ts * (int64_t)AV_TIME_BASE / 1000;
+                else if (fsize >= 8 && fsize - 8 >= size) {
+                    fsize -= size+4;
+                    goto retry_duration;
+                }
             }
         }
 
@@ -1001,7 +1075,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
             if (st->codec->extradata) {
                 if ((ret = flv_queue_extradata(flv, s->pb, stream_type, size)) < 0)
                     return ret;
-                ret = AVERROR(EAGAIN);
+                ret = FFERROR_REDO;
                 goto leave;
             }
             if ((ret = flv_get_extradata(s, st, size)) < 0)
@@ -1028,14 +1102,14 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
                 }
             }
 
-            ret = AVERROR(EAGAIN);
+            ret = FFERROR_REDO;
             goto leave;
         }
     }
 
     /* skip empty data packets */
     if (!size) {
-        ret = AVERROR(EAGAIN);
+        ret = FFERROR_REDO;
         goto leave;
     }
 
@@ -1069,7 +1143,18 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
         pkt->flags |= AV_PKT_FLAG_KEY;
 
 leave:
-    avio_skip(s->pb, 4);
+    last = avio_rb32(s->pb);
+    if (last != orig_size + 11 &&
+        (last != orig_size || !last) && last != flv->sum_flv_tag_size &&
+        !flv->broken_sizes) {
+        av_log(s, AV_LOG_ERROR, "Packet mismatch %d %d\n", last, orig_size + 11);
+        avio_seek(s->pb, pos + 1, SEEK_SET);
+        ret = resync(s);
+        av_packet_unref(pkt);
+        if (ret >= 0) {
+            goto retry;
+        }
+    }
     return ret;
 }
 
@@ -1084,7 +1169,7 @@ static int flv_read_seek(AVFormatContext *s, int stream_index,
 #define OFFSET(x) offsetof(FLVContext, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
-    { "flv_metadata", "Allocate streams according to the onMetaData array", OFFSET(trust_metadata), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VD },
+    { "flv_metadata", "Allocate streams according to the onMetaData array", OFFSET(trust_metadata), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
     { NULL }
 };
 
diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
index e217ba8a..8fd5d294 100644
--- a/libavformat/flvenc.c
+++ b/libavformat/flvenc.c
@@ -28,6 +28,9 @@
 #include "flv.h"
 #include "internal.h"
 #include "metadata.h"
+#include "libavutil/opt.h"
+#include "libavcodec/put_bits.h"
+#include "libavcodec/aacenctab.h"
 
 
 static const AVCodecTag flv_video_codec_ids[] = {
@@ -58,6 +61,7 @@ static const AVCodecTag flv_audio_codec_ids[] = {
 };
 
 typedef struct FLVContext {
+    AVClass *av_class;
     int     reserved;
     int64_t duration_offset;
     int64_t filesize_offset;
@@ -68,6 +72,8 @@ typedef struct FLVContext {
     AVCodecContext *video_enc;
     double framerate;
     AVCodecContext *data_enc;
+
+    int flags;
 } FLVContext;
 
 typedef struct FLVStreamContext {
@@ -452,6 +458,31 @@ static int flv_write_header(AVFormatContext *s)
             if (enc->codec_id == AV_CODEC_ID_AAC) {
                 avio_w8(pb, get_audio_flags(s, enc));
                 avio_w8(pb, 0); // AAC sequence header
+
+                if (!enc->extradata_size && flv->flags & 1) {
+                    PutBitContext pbc;
+                    int samplerate_index;
+                    int channels = flv->audio_enc->channels - (flv->audio_enc->channels == 8 ? 1 : 0);
+                    uint8_t data[2];
+
+                    for (samplerate_index = 0; samplerate_index < 16; samplerate_index++)
+                        if (flv->audio_enc->sample_rate == mpeg4audio_sample_rates[samplerate_index])
+                            break;
+
+                    init_put_bits(&pbc, data, sizeof(data));
+                    put_bits(&pbc, 5, flv->audio_enc->profile + 1); //profile
+                    put_bits(&pbc, 4, samplerate_index); //sample rate index
+                    put_bits(&pbc, 4, channels);
+                    put_bits(&pbc, 1, 0); //frame length - 1024 samples
+                    put_bits(&pbc, 1, 0); //does not depend on core coder
+                    put_bits(&pbc, 1, 0); //is not extension
+                    flush_put_bits(&pbc);
+
+                    avio_w8(pb, data[0]);
+                    avio_w8(pb, data[1]);
+
+                    av_log(s, AV_LOG_WARNING, "AAC sequence header: %02x %02x.\n", data[0], data[1]);
+                }
                 avio_write(pb, enc->extradata, enc->extradata_size);
             } else {
                 avio_w8(pb, enc->codec_tag | FLV_FRAME_KEY); // flags
@@ -531,7 +562,7 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
         return AVERROR(EINVAL);
     }
 
-    ts = pkt->dts + flv->delay; // add delay to force positive dts
+    ts = pkt->dts;
 
     if (s->event_flags & AVSTREAM_EVENT_FLAG_METADATA_UPDATED) {
         write_metadata(s, ts);
@@ -655,6 +686,19 @@ static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
     return pb->error;
 }
 
+static const AVOption options[] = {
+    { "flvflags", "FLV muxer flags", offsetof(FLVContext, flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "flvflags" },
+    { "aac_seq_header_detect", "Put AAC sequence header based on stream data", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "flvflags" },
+    { NULL },
+};
+
+static const AVClass flv_muxer_class = {
+    .class_name = "flv muxer",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVOutputFormat ff_flv_muxer = {
     .name           = "flv",
     .long_name      = NULL_IF_CONFIG_SMALL("FLV (Flash Video)"),
@@ -671,4 +715,5 @@ AVOutputFormat ff_flv_muxer = {
                       },
     .flags          = AVFMT_GLOBALHEADER | AVFMT_VARIABLE_FPS |
                       AVFMT_TS_NONSTRICT,
+    .priv_class     = &flv_muxer_class,
 };
diff --git a/libavformat/format.c b/libavformat/format.c
index 7df06b70..15fe167f 100644
--- a/libavformat/format.c
+++ b/libavformat/format.c
@@ -171,21 +171,29 @@ AVInputFormat *av_probe_input_format3(AVProbeData *pd, int is_opened,
 {
     AVProbeData lpd = *pd;
     AVInputFormat *fmt1 = NULL, *fmt;
-    int score, nodat = 0, score_max = 0;
+    int score, score_max = 0;
     const static uint8_t zerobuffer[AVPROBE_PADDING_SIZE];
+    enum nodat {
+        NO_ID3,
+        ID3_ALMOST_GREATER_PROBE,
+        ID3_GREATER_PROBE,
+        ID3_GREATER_MAX_PROBE,
+    } nodat = NO_ID3;
 
     if (!lpd.buf)
-        lpd.buf = zerobuffer;
+        lpd.buf = (unsigned char *) zerobuffer;
 
     if (lpd.buf_size > 10 && ff_id3v2_match(lpd.buf, ID3v2_DEFAULT_MAGIC)) {
         int id3len = ff_id3v2_tag_len(lpd.buf);
         if (lpd.buf_size > id3len + 16) {
+            if (lpd.buf_size < 2LL*id3len + 16)
+                nodat = ID3_ALMOST_GREATER_PROBE;
             lpd.buf      += id3len;
             lpd.buf_size -= id3len;
         } else if (id3len >= PROBE_BUF_MAX) {
-            nodat = 2;
+            nodat = ID3_GREATER_MAX_PROBE;
         } else
-            nodat = 1;
+            nodat = ID3_GREATER_PROBE;
     }
 
     fmt = NULL;
@@ -198,9 +206,18 @@ AVInputFormat *av_probe_input_format3(AVProbeData *pd, int is_opened,
             if (score)
                 av_log(NULL, AV_LOG_TRACE, "Probing %s score:%d size:%d\n", fmt1->name, score, lpd.buf_size);
             if (fmt1->extensions && av_match_ext(lpd.filename, fmt1->extensions)) {
-                if      (nodat == 0) score = FFMAX(score, 1);
-                else if (nodat == 1) score = FFMAX(score, AVPROBE_SCORE_EXTENSION / 2 - 1);
-                else                 score = FFMAX(score, AVPROBE_SCORE_EXTENSION);
+                switch (nodat) {
+                case NO_ID3:
+                    score = FFMAX(score, 1);
+                    break;
+                case ID3_GREATER_PROBE:
+                case ID3_ALMOST_GREATER_PROBE:
+                    score = FFMAX(score, AVPROBE_SCORE_EXTENSION / 2 - 1);
+                    break;
+                case ID3_GREATER_MAX_PROBE:
+                    score = FFMAX(score, AVPROBE_SCORE_EXTENSION);
+                    break;
+                }
             }
         } else if (fmt1->extensions) {
             if (av_match_ext(lpd.filename, fmt1->extensions))
@@ -214,7 +231,7 @@ AVInputFormat *av_probe_input_format3(AVProbeData *pd, int is_opened,
         } else if (score == score_max)
             fmt = NULL;
     }
-    if (nodat == 1)
+    if (nodat == ID3_GREATER_PROBE)
         score_max = FFMIN(AVPROBE_SCORE_EXTENSION / 2 - 1, score_max);
     *score_ret = score_max;
 
@@ -261,8 +278,13 @@ int av_probe_input_buffer2(AVIOContext *pb, AVInputFormat **fmt,
 
     if (pb->av_class) {
         uint8_t *mime_type_opt = NULL;
+        char *semi;
         av_opt_get(pb, "mime_type", AV_OPT_SEARCH_CHILDREN, &mime_type_opt);
         pd.mime_type = (const char *)mime_type_opt;
+        semi = pd.mime_type ? strchr(pd.mime_type, ';') : NULL;
+        if (semi) {
+            *semi = '\0';
+        }
     }
 #if 0
     if (!*fmt && pb->av_class && av_opt_get(pb, "mime_type", AV_OPT_SEARCH_CHILDREN, &mime_type) >= 0 && mime_type) {
diff --git a/libavformat/framecrcenc.c b/libavformat/framecrcenc.c
index 805b5428..eacbc457 100644
--- a/libavformat/framecrcenc.c
+++ b/libavformat/framecrcenc.c
@@ -47,7 +47,7 @@ static int framecrc_write_packet(struct AVFormatContext *s, AVPacket *pkt)
     uint32_t crc = av_adler32_update(0, pkt->data, pkt->size);
     char buf[256];
 
-    snprintf(buf, sizeof(buf), "%d, %10"PRId64", %10"PRId64", %8d, %8d, 0x%08"PRIx32,
+    snprintf(buf, sizeof(buf), "%d, %10"PRId64", %10"PRId64", %8"PRId64", %8d, 0x%08"PRIx32,
              pkt->stream_index, pkt->dts, pkt->pts, pkt->duration, pkt->size, crc);
     if (pkt->flags != AV_PKT_FLAG_KEY)
         av_strlcatf(buf, sizeof(buf), ", F=0x%0X", pkt->flags);
diff --git a/libavformat/frmdec.c b/libavformat/frmdec.c
index a6f19afc..260afbc5 100644
--- a/libavformat/frmdec.c
+++ b/libavformat/frmdec.c
@@ -25,6 +25,7 @@
  */
 
 #include "libavcodec/raw.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
 #include "avformat.h"
 
@@ -80,7 +81,7 @@ static int frm_read_packet(AVFormatContext *avctx, AVPacket *pkt)
     if (s->count)
         return AVERROR_EOF;
 
-    packet_size = avpicture_get_size(stc->pix_fmt, stc->width, stc->height);
+    packet_size = av_image_get_buffer_size(stc->pix_fmt, stc->width, stc->height, 1);
     if (packet_size < 0)
         return AVERROR_INVALIDDATA;
 
diff --git a/libavformat/fsb.c b/libavformat/fsb.c
new file mode 100644
index 00000000..2837c191
--- /dev/null
+++ b/libavformat/fsb.c
@@ -0,0 +1,210 @@
+/*
+ * FSB demuxer
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/intreadwrite.h"
+#include "avformat.h"
+#include "avio.h"
+#include "internal.h"
+
+static int fsb_probe(AVProbeData *p)
+{
+    if (memcmp(p->buf, "FSB", 3) || p->buf[3] - '0' < 1 || p->buf[3] - '0' > 5)
+        return 0;
+    if (AV_RL32(p->buf + 4) != 1)
+        return 0;
+    return AVPROBE_SCORE_MAX;
+}
+
+static int fsb_read_header(AVFormatContext *s)
+{
+    AVIOContext *pb = s->pb;
+    unsigned format, version, c;
+    int64_t offset;
+    AVCodecContext *codec;
+    AVStream *st = avformat_new_stream(s, NULL);
+
+    avio_skip(pb, 3); // "FSB"
+    version = avio_r8(pb) - '0';
+    if (version != 4 && version != 3) {
+        avpriv_request_sample(s, "version %d", version);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    avio_skip(pb, 4);
+
+    if (!st)
+        return AVERROR(ENOMEM);
+    codec = st->codec;
+    codec->codec_type  = AVMEDIA_TYPE_AUDIO;
+    codec->codec_tag   = 0;
+
+    if (version == 3) {
+        offset = avio_rl32(pb) + 0x18;
+        avio_skip(pb, 44);
+        st->duration = avio_rl32(pb);
+        avio_skip(pb, 12);
+        format = avio_rl32(pb);
+        codec->sample_rate = avio_rl32(pb);
+        if (codec->sample_rate <= 0)
+            return AVERROR_INVALIDDATA;
+        avio_skip(pb, 6);
+        codec->channels    = avio_rl16(pb);
+        if (!codec->channels)
+            return AVERROR_INVALIDDATA;
+
+        if (format & 0x00000100) {
+            codec->codec_id    = AV_CODEC_ID_PCM_S16LE;
+            codec->block_align = 4096 * codec->channels;
+        } else if (format & 0x00400000) {
+            codec->bits_per_coded_sample = 4;
+            codec->codec_id    = AV_CODEC_ID_ADPCM_IMA_WAV;
+            codec->block_align = 36 * codec->channels;
+        } else if (format & 0x00800000) {
+            codec->codec_id    = AV_CODEC_ID_ADPCM_PSX;
+            codec->block_align = 16 * codec->channels;
+        } else if (format & 0x02000000) {
+            codec->codec_id    = AV_CODEC_ID_ADPCM_THP;
+            codec->block_align = 8 * codec->channels;
+            if (codec->channels > INT_MAX / 32)
+                return AVERROR_INVALIDDATA;
+            ff_alloc_extradata(codec, 32 * codec->channels);
+            if (!codec->extradata)
+                return AVERROR(ENOMEM);
+            avio_seek(pb, 0x68, SEEK_SET);
+            for (c = 0; c < codec->channels; c++) {
+                avio_read(pb, codec->extradata + 32 * c, 32);
+                avio_skip(pb, 14);
+            }
+        } else {
+            avpriv_request_sample(s, "format 0x%X", format);
+            return AVERROR_PATCHWELCOME;
+        }
+    } else if (version == 4) {
+        offset = avio_rl32(pb) + 0x30;
+        avio_skip(pb, 80);
+        st->duration = avio_rl32(pb);
+
+        format = avio_rb32(pb);
+        switch(format) {
+        case 0x40001001:
+        case 0x00001005:
+        case 0x40001081:
+        case 0x40200001:
+            codec->codec_id = AV_CODEC_ID_XMA2;
+            break;
+        case 0x40000802:
+            codec->codec_id = AV_CODEC_ID_ADPCM_THP;
+            break;
+        default:
+            avpriv_request_sample(s, "format 0x%X", format);
+            return AVERROR_PATCHWELCOME;
+        }
+
+        codec->sample_rate = avio_rl32(pb);
+        if (codec->sample_rate <= 0)
+            return AVERROR_INVALIDDATA;
+        avio_skip(pb, 6);
+
+        codec->channels    = avio_rl16(pb);
+        if (!codec->channels)
+            return AVERROR_INVALIDDATA;
+
+        switch (codec->codec_id) {
+        case AV_CODEC_ID_XMA2:
+            ff_alloc_extradata(codec, 34);
+            if (!codec->extradata)
+                return AVERROR(ENOMEM);
+            memset(codec->extradata, 0, 34);
+            codec->block_align = 2048;
+            break;
+        case AV_CODEC_ID_ADPCM_THP:
+            if (codec->channels > INT_MAX / 32)
+                return AVERROR_INVALIDDATA;
+            ff_alloc_extradata(codec, 32 * codec->channels);
+            if (!codec->extradata)
+                return AVERROR(ENOMEM);
+            avio_seek(pb, 0x80, SEEK_SET);
+            for (c = 0; c < codec->channels; c++) {
+                avio_read(pb, codec->extradata + 32 * c, 32);
+                avio_skip(pb, 14);
+            }
+            codec->block_align = 8 * codec->channels;
+            break;
+        }
+    } else {
+        av_assert0(0);
+    }
+
+    avio_skip(pb, offset - avio_tell(pb));
+    s->internal->data_offset = avio_tell(pb);
+
+    avpriv_set_pts_info(st, 64, 1, codec->sample_rate);
+
+    return 0;
+}
+
+static int fsb_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    AVCodecContext *codec = s->streams[0]->codec;
+    int64_t pos;
+    int ret;
+
+    if (avio_feof(s->pb))
+        return AVERROR_EOF;
+
+    pos = avio_tell(s->pb);
+    if (codec->codec_id == AV_CODEC_ID_ADPCM_THP &&
+               codec->channels > 1) {
+        int i, ch;
+
+        ret = av_new_packet(pkt, codec->block_align);
+        if (ret < 0)
+            return ret;
+        for (i = 0; i < 4; i++) {
+            for (ch = 0; ch < codec->channels; ch++) {
+                pkt->data[ch * 8 + i * 2 + 0] = avio_r8(s->pb);
+                pkt->data[ch * 8 + i * 2 + 1] = avio_r8(s->pb);
+            }
+        }
+        ret = 0;
+    } else {
+        ret = av_get_packet(s->pb, pkt, codec->block_align);
+    }
+
+    if (codec->codec_id == AV_CODEC_ID_XMA2 && pkt->size >= 1)
+        pkt->duration = (pkt->data[0] >> 2) * 512;
+
+    pkt->pos = pos;
+    pkt->stream_index = 0;
+
+    return ret;
+}
+
+AVInputFormat ff_fsb_demuxer = {
+    .name        = "fsb",
+    .long_name   = NULL_IF_CONFIG_SMALL("FMOD Sample Bank"),
+    .read_probe  = fsb_probe,
+    .read_header = fsb_read_header,
+    .read_packet = fsb_read_packet,
+    .extensions  = "fsb",
+    .flags       = AVFMT_GENERIC_INDEX,
+};
diff --git a/libavformat/ftp.c b/libavformat/ftp.c
index 27a172e8..c2a60f61 100644
--- a/libavformat/ftp.c
+++ b/libavformat/ftp.c
@@ -19,6 +19,8 @@
  */
 
 #include "libavutil/avstring.h"
+#include "libavutil/internal.h"
+#include "libavutil/parseutils.h"
 #include "avformat.h"
 #include "internal.h"
 #include "url.h"
@@ -26,15 +28,23 @@
 #include "libavutil/bprint.h"
 
 #define CONTROL_BUFFER_SIZE 1024
+#define DIR_BUFFER_SIZE 4096
 
 typedef enum {
     UNKNOWN,
     READY,
     DOWNLOADING,
     UPLOADING,
+    LISTING_DIR,
     DISCONNECTED
 } FTPState;
 
+typedef enum {
+    UNKNOWN_METHOD,
+    NLST,
+    MLSD
+} FTPListingMethod;
+
 typedef struct {
     const AVClass *class;
     URLContext *conn_control;                    /**< Control connection */
@@ -53,6 +63,12 @@ typedef struct {
     const char *anonymous_password;              /**< Password to be used for anonymous user. An email should be used. */
     int write_seekable;                          /**< Control seekability, 0 = disable, 1 = enable. */
     FTPState state;                              /**< State of data connection */
+    FTPListingMethod listing_method;             /**< Called listing method */
+    char *features;                              /**< List of server's features represented as raw response */
+    char *dir_buffer;
+    size_t dir_buffer_size;
+    size_t dir_buffer_offset;
+    int utf8;
 } FTPContext;
 
 #define OFFSET(x) offsetof(FTPContext, x)
@@ -60,7 +76,7 @@ typedef struct {
 #define E AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
     {"timeout", "set timeout of socket I/O operations", OFFSET(rw_timeout), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, D|E },
-    {"ftp-write-seekable", "control seekability of connection during encoding", OFFSET(write_seekable), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, E },
+    {"ftp-write-seekable", "control seekability of connection during encoding", OFFSET(write_seekable), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, E },
     {"ftp-anonymous-password", "password for anonymous login. E-mail address should be used.", OFFSET(anonymous_password), AV_OPT_TYPE_STRING, { 0 }, 0, 0, D|E },
     {NULL}
 };
@@ -185,6 +201,8 @@ static int ftp_send_command(FTPContext *s, const char *command,
 {
     int err;
 
+    ff_dlog(s, "%s", command);
+
     if (response)
         *response = NULL;
 
@@ -266,7 +284,7 @@ static int ftp_passive_mode_epsv(FTPContext *s)
     end[-1] = '\0';
 
     s->server_data_port = atoi(start);
-    av_dlog(s, "Server data port: %d\n", s->server_data_port);
+    ff_dlog(s, "Server data port: %d\n", s->server_data_port);
 
     av_free(res);
     return 0;
@@ -312,7 +330,7 @@ static int ftp_passive_mode(FTPContext *s)
     start = av_strtok(end, ",", &end);
     if (!start) goto fail;
     s->server_data_port += atoi(start);
-    av_dlog(s, "Server data port: %d\n", s->server_data_port);
+    ff_dlog(s, "Server data port: %d\n", s->server_data_port);
 
     av_free(res);
     return 0;
@@ -347,10 +365,7 @@ static int ftp_current_dir(FTPContext *s)
     if (!end)
         goto fail;
 
-    if (end > res && end[-1] == '/') {
-        end[-1] = '\0';
-    } else
-        *end = '\0';
+    *end = '\0';
     s->path = av_strdup(start);
 
     av_free(res);
@@ -434,19 +449,76 @@ static int ftp_restart(FTPContext *s, int64_t pos)
     return 0;
 }
 
+static int ftp_set_dir(FTPContext *s)
+{
+    static const int cwd_codes[] = {250, 550, 0}; /* 550 is incorrect code */
+    char command[MAX_URL_SIZE];
+
+    snprintf(command, sizeof(command), "CWD %s\r\n", s->path);
+    if (ftp_send_command(s, command, cwd_codes, NULL) != 250)
+        return AVERROR(EIO);
+    return 0;
+}
+
+static int ftp_list_mlsd(FTPContext *s)
+{
+    static const char *command = "MLSD\r\n";
+    static const int mlsd_codes[] = {150, 500, 0}; /* 500 is incorrect code */
+
+    if (ftp_send_command(s, command, mlsd_codes, NULL) != 150)
+        return AVERROR(ENOSYS);
+    s->listing_method = MLSD;
+    return 0;
+}
+
+static int ftp_list_nlst(FTPContext *s)
+{
+    static const char *command = "NLST\r\n";
+    static const int nlst_codes[] = {226, 425, 426, 451, 450, 550, 0};
+
+    if (ftp_send_command(s, command, nlst_codes, NULL) != 226)
+        return AVERROR(ENOSYS);
+    s->listing_method = NLST;
+    return 0;
+}
+
+static int ftp_has_feature(FTPContext *s, const char *feature_name);
+
+static int ftp_list(FTPContext *s)
+{
+    int ret;
+    s->state = LISTING_DIR;
+
+    if ((ret = ftp_list_mlsd(s)) < 0)
+        ret = ftp_list_nlst(s);
+
+    return ret;
+}
+
+static int ftp_has_feature(FTPContext *s, const char *feature_name)
+{
+    if (!s->features)
+        return 0;
+
+    return av_stristr(s->features, feature_name) != NULL;
+}
+
 static int ftp_features(FTPContext *s)
 {
     static const char *feat_command        = "FEAT\r\n";
     static const char *enable_utf8_command = "OPTS UTF8 ON\r\n";
     static const int feat_codes[] = {211, 0};
-    static const int opts_codes[] = {200, 451};
-    char *feat = NULL;
+    static const int opts_codes[] = {200, 451, 0};
 
-    if (ftp_send_command(s, feat_command, feat_codes, &feat) == 211) {
-        if (av_stristr(feat, "UTF8"))
-            ftp_send_command(s, enable_utf8_command, opts_codes, NULL);
+    av_freep(&s->features);
+    if (ftp_send_command(s, feat_command, feat_codes, &s->features) != 211) {
+        av_freep(&s->features);
+    }
+
+    if (ftp_has_feature(s, "UTF8")) {
+        if (ftp_send_command(s, enable_utf8_command, opts_codes, NULL) == 200)
+            s->utf8 = 1;
     }
-    av_freep(&feat);
 
     return 0;
 }
@@ -465,8 +537,9 @@ static int ftp_connect_control_connection(URLContext *h)
         if (s->rw_timeout != -1) {
             av_dict_set_int(&opts, "timeout", s->rw_timeout, 0);
         } /* if option is not given, don't pass it and let tcp use its own default */
-        err = ffurl_open(&s->conn_control, buf, AVIO_FLAG_READ_WRITE,
-                         &h->interrupt_callback, &opts);
+        err = ffurl_open_whitelist(&s->conn_control, buf, AVIO_FLAG_READ_WRITE,
+                                   &h->interrupt_callback, &opts,
+                                   h->protocol_whitelist);
         av_dict_free(&opts);
         if (err < 0) {
             av_log(h, AV_LOG_ERROR, "Cannot open control connection\n");
@@ -518,8 +591,9 @@ static int ftp_connect_data_connection(URLContext *h)
         if (s->rw_timeout != -1) {
             av_dict_set_int(&opts, "timeout", s->rw_timeout, 0);
         } /* if option is not given, don't pass it and let tcp use its own default */
-        err = ffurl_open(&s->conn_data, buf, h->flags,
-                         &h->interrupt_callback, &opts);
+        err = ffurl_open_whitelist(&s->conn_data, buf, h->flags,
+                                   &h->interrupt_callback, &opts,
+                                   h->protocol_whitelist);
         av_dict_free(&opts);
         if (err < 0)
             return err;
@@ -570,20 +644,19 @@ static int ftp_abort(URLContext *h)
     return 0;
 }
 
-static int ftp_open(URLContext *h, const char *url, int flags)
+static int ftp_connect(URLContext *h, const char *url)
 {
     char proto[10], path[MAX_URL_SIZE], credencials[MAX_URL_SIZE], hostname[MAX_URL_SIZE];
     const char *tok_user = NULL, *tok_pass = NULL;
-    char *end = NULL;
+    char *end = NULL, *newpath = NULL;
     int err;
-    size_t pathlen;
     FTPContext *s = h->priv_data;
 
-    av_dlog(h, "ftp protocol open\n");
-
     s->state = DISCONNECTED;
+    s->listing_method = UNKNOWN_METHOD;
     s->filesize = -1;
     s->position = 0;
+    s->features = NULL;
 
     av_url_split(proto, sizeof(proto),
                  credencials, sizeof(credencials),
@@ -602,22 +675,36 @@ static int ftp_open(URLContext *h, const char *url, int flags)
     s->password = av_strdup(tok_pass);
     s->hostname = av_strdup(hostname);
     if (!s->hostname || !s->user || (tok_pass && !s->password)) {
-        err = AVERROR(ENOMEM);
-        goto fail;
+        return AVERROR(ENOMEM);
     }
 
     if (s->server_control_port < 0 || s->server_control_port > 65535)
         s->server_control_port = 21;
 
     if ((err = ftp_connect_control_connection(h)) < 0)
-        goto fail;
+        return err;
 
     if ((err = ftp_current_dir(s)) < 0)
+        return err;
+
+    newpath = av_append_path_component(s->path, path);
+    if (!newpath)
+        return AVERROR(ENOMEM);
+    av_free(s->path);
+    s->path = newpath;
+
+    return 0;
+}
+
+static int ftp_open(URLContext *h, const char *url, int flags)
+{
+    FTPContext *s = h->priv_data;
+    int err;
+
+    ff_dlog(h, "ftp protocol open\n");
+
+    if ((err = ftp_connect(h, url)) < 0)
         goto fail;
-    pathlen = strlen(s->path) + strlen(path) + 1;
-    if ((err = av_reallocp(&s->path, pathlen)) < 0)
-        goto fail;
-    av_strlcat(s->path + strlen(s->path), path, pathlen);
 
     if (ftp_restart(s, 0) < 0) {
         h->is_streamed = 1;
@@ -642,7 +729,7 @@ static int64_t ftp_seek(URLContext *h, int64_t pos, int whence)
     int err;
     int64_t new_pos, fake_pos;
 
-    av_dlog(h, "ftp protocol seek %"PRId64" %d\n", pos, whence);
+    ff_dlog(h, "ftp protocol seek %"PRId64" %d\n", pos, whence);
 
     switch(whence) {
     case AVSEEK_SIZE:
@@ -684,7 +771,7 @@ static int ftp_read(URLContext *h, unsigned char *buf, int size)
     FTPContext *s = h->priv_data;
     int read, err, retry_done = 0;
 
-    av_dlog(h, "ftp protocol read %d bytes\n", size);
+    ff_dlog(h, "ftp protocol read %d bytes\n", size);
   retry:
     if (s->state == DISCONNECTED) {
         /* optimization */
@@ -742,7 +829,7 @@ static int ftp_write(URLContext *h, const unsigned char *buf, int size)
     FTPContext *s = h->priv_data;
     int written;
 
-    av_dlog(h, "ftp protocol write %d bytes\n", size);
+    ff_dlog(h, "ftp protocol write %d bytes\n", size);
 
     if (s->state == DISCONNECTED) {
         if ((err = ftp_connect_data_connection(h)) < 0)
@@ -769,13 +856,14 @@ static int ftp_close(URLContext *h)
 {
     FTPContext *s = h->priv_data;
 
-    av_dlog(h, "ftp protocol close\n");
+    ff_dlog(h, "ftp protocol close\n");
 
     ftp_close_both_connections(s);
     av_freep(&s->user);
     av_freep(&s->password);
     av_freep(&s->hostname);
     av_freep(&s->path);
+    av_freep(&s->features);
 
     return 0;
 }
@@ -784,7 +872,7 @@ static int ftp_get_file_handle(URLContext *h)
 {
     FTPContext *s = h->priv_data;
 
-    av_dlog(h, "ftp protocol get_file_handle\n");
+    ff_dlog(h, "ftp protocol get_file_handle\n");
 
     if (s->conn_data)
         return ffurl_get_file_handle(s->conn_data);
@@ -796,7 +884,7 @@ static int ftp_shutdown(URLContext *h, int flags)
 {
     FTPContext *s = h->priv_data;
 
-    av_dlog(h, "ftp protocol shutdown\n");
+    ff_dlog(h, "ftp protocol shutdown\n");
 
     if (s->conn_data)
         return ffurl_shutdown(s->conn_data, flags);
@@ -804,6 +892,213 @@ static int ftp_shutdown(URLContext *h, int flags)
     return AVERROR(EIO);
 }
 
+static int ftp_open_dir(URLContext *h)
+{
+    FTPContext *s = h->priv_data;
+    int ret;
+
+    if ((ret = ftp_connect(h, h->filename)) < 0)
+        goto fail;
+    if ((ret = ftp_set_dir(s)) < 0)
+        goto fail;
+    if ((ret = ftp_connect_data_connection(h)) < 0)
+        goto fail;
+    if ((ret = ftp_list(s)) < 0)
+        goto fail;
+    s->dir_buffer = av_malloc(DIR_BUFFER_SIZE);
+    if (!s->dir_buffer) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    s->dir_buffer[0] = 0;
+    if (s->conn_data && s->state == LISTING_DIR)
+        return 0;
+  fail:
+    ffurl_closep(&s->conn_control);
+    ffurl_closep(&s->conn_data);
+    return ret;
+}
+
+static int64_t ftp_parse_date(const char *date)
+{
+    struct tm tv;
+    memset(&tv, 0, sizeof(struct tm));
+    av_small_strptime(date, "%Y%m%d%H%M%S", &tv);
+    return INT64_C(1000000) * av_timegm(&tv);
+}
+
+static int ftp_parse_entry_nlst(char *line, AVIODirEntry *next)
+{
+    next->name = av_strdup(line);
+    return 0;
+}
+
+static int ftp_parse_entry_mlsd(char *mlsd, AVIODirEntry *next)
+{
+    char *fact, *value;
+    ff_dlog(NULL, "%s\n", mlsd);
+    while(fact = av_strtok(mlsd, ";", &mlsd)) {
+        if (fact[0] == ' ') {
+            next->name = av_strdup(&fact[1]);
+            continue;
+        }
+        fact = av_strtok(fact, "=", &value);
+        if (!av_strcasecmp(fact, "type")) {
+            if (!av_strcasecmp(value, "cdir") || !av_strcasecmp(value, "pdir"))
+                return 1;
+            if (!av_strcasecmp(value, "dir"))
+                next->type = AVIO_ENTRY_DIRECTORY;
+            else if (!av_strcasecmp(value, "file"))
+                next->type = AVIO_ENTRY_FILE;
+            else if (!av_strcasecmp(value, "OS.unix=slink:"))
+                next->type = AVIO_ENTRY_SYMBOLIC_LINK;
+        } else if (!av_strcasecmp(fact, "modify")) {
+            next->modification_timestamp = ftp_parse_date(value);
+        } else if (!av_strcasecmp(fact, "UNIX.mode")) {
+            next->filemode = strtoumax(value, NULL, 8);
+        } else if (!av_strcasecmp(fact, "UNIX.uid") || !av_strcasecmp(fact, "UNIX.owner"))
+            next->user_id = strtoumax(value, NULL, 10);
+        else if (!av_strcasecmp(fact, "UNIX.gid") || !av_strcasecmp(fact, "UNIX.group"))
+            next->group_id = strtoumax(value, NULL, 10);
+        else if (!av_strcasecmp(fact, "size") || !av_strcasecmp(fact, "sizd"))
+            next->size = strtoll(value, NULL, 10);
+    }
+    return 0;
+}
+
+/**
+ * @return 0 on success, negative on error, positive on entry to discard.
+ */
+static int ftp_parse_entry(URLContext *h, char *line, AVIODirEntry *next)
+{
+    FTPContext *s = h->priv_data;
+
+    switch (s->listing_method) {
+    case MLSD:
+        return ftp_parse_entry_mlsd(line, next);
+    case NLST:
+        return ftp_parse_entry_nlst(line, next);
+    case UNKNOWN_METHOD:
+    default:
+        return -1;
+    }
+}
+
+static int ftp_read_dir(URLContext *h, AVIODirEntry **next)
+{
+    FTPContext *s = h->priv_data;
+    char *start, *found;
+    int ret, retried;
+
+    do {
+        retried = 0;
+        start = s->dir_buffer + s->dir_buffer_offset;
+        while (!(found = strstr(start, "\n"))) {
+            if (retried)
+                return AVERROR(EIO);
+            s->dir_buffer_size -= s->dir_buffer_offset;
+            s->dir_buffer_offset = 0;
+            if (s->dir_buffer_size)
+                memmove(s->dir_buffer, start, s->dir_buffer_size);
+            ret = ffurl_read(s->conn_data, s->dir_buffer + s->dir_buffer_size, DIR_BUFFER_SIZE - (s->dir_buffer_size + 1));
+            if (ret < 0)
+                return ret;
+            if (!ret) {
+                *next = NULL;
+                return 0;
+            }
+            s->dir_buffer_size += ret;
+            s->dir_buffer[s->dir_buffer_size] = 0;
+            start = s->dir_buffer;
+            retried = 1;
+        }
+        s->dir_buffer_offset += (found + 1 - start);
+        found[0] = 0;
+        if (found > start && found[-1] == '\r')
+            found[-1] = 0;
+
+        *next = ff_alloc_dir_entry();
+        if (!*next)
+            return AVERROR(ENOMEM);
+        (*next)->utf8 = s->utf8;
+        ret = ftp_parse_entry(h, start, *next);
+        if (ret) {
+            avio_free_directory_entry(next);
+            if (ret < 0)
+                return ret;
+        }
+    } while (ret > 0);
+    return 0;
+}
+
+static int ftp_close_dir(URLContext *h)
+{
+    FTPContext *s = h->priv_data;
+    av_freep(&s->dir_buffer);
+    ffurl_closep(&s->conn_control);
+    ffurl_closep(&s->conn_data);
+    return 0;
+}
+
+static int ftp_delete(URLContext *h)
+{
+    FTPContext *s = h->priv_data;
+    char command[MAX_URL_SIZE];
+    static const int del_codes[] = {250, 421, 450, 500, 501, 502, 530, 550, 0};
+    static const int rmd_codes[] = {250, 421, 500, 501, 502, 530, 550, 0};
+    int ret;
+
+    if ((ret = ftp_connect(h, h->filename)) < 0)
+        goto cleanup;
+
+    snprintf(command, sizeof(command), "DELE %s\r\n", s->path);
+    if (ftp_send_command(s, command, del_codes, NULL) == 250) {
+        ret = 0;
+        goto cleanup;
+    }
+
+    snprintf(command, sizeof(command), "RMD %s\r\n", s->path);
+    if (ftp_send_command(s, command, rmd_codes, NULL) == 250)
+        ret = 0;
+    else
+        ret = AVERROR(EIO);
+
+cleanup:
+    ftp_close(h);
+    return ret;
+}
+
+static int ftp_move(URLContext *h_src, URLContext *h_dst)
+{
+    FTPContext *s = h_src->priv_data;
+    char command[MAX_URL_SIZE], path[MAX_URL_SIZE];
+    static const int rnfr_codes[] = {350, 421, 450, 500, 501, 502, 503, 530, 0};
+    static const int rnto_codes[] = {250, 421, 500, 501, 502, 503, 530, 532, 553, 0};
+    int ret;
+
+    if ((ret = ftp_connect(h_src, h_src->filename)) < 0)
+        goto cleanup;
+
+    snprintf(command, sizeof(command), "RNFR %s\r\n", s->path);
+    if (ftp_send_command(s, command, rnfr_codes, NULL) != 350) {
+        ret = AVERROR(EIO);
+        goto cleanup;
+    }
+
+    av_url_split(0, 0, 0, 0, 0, 0, 0,
+                 path, sizeof(path),
+                 h_dst->filename);
+    snprintf(command, sizeof(command), "RNTO %s\r\n", path);
+    if (ftp_send_command(s, command, rnto_codes, NULL) == 250)
+        ret = 0;
+    else
+        ret = AVERROR(EIO);
+
+cleanup:
+    ftp_close(h_src);
+    return ret;
+}
+
 URLProtocol ff_ftp_protocol = {
     .name                = "ftp",
     .url_open            = ftp_open,
@@ -815,5 +1110,11 @@ URLProtocol ff_ftp_protocol = {
     .url_shutdown        = ftp_shutdown,
     .priv_data_size      = sizeof(FTPContext),
     .priv_data_class     = &ftp_context_class,
+    .url_open_dir        = ftp_open_dir,
+    .url_read_dir        = ftp_read_dir,
+    .url_close_dir       = ftp_close_dir,
+    .url_delete          = ftp_delete,
+    .url_move            = ftp_move,
     .flags               = URL_PROTOCOL_FLAG_NETWORK,
+    .default_whitelist   = "tcp",
 };
diff --git a/libavformat/g723_1.c b/libavformat/g723_1.c
index 4f3ce8f0..661e7bd3 100644
--- a/libavformat/g723_1.c
+++ b/libavformat/g723_1.c
@@ -69,7 +69,7 @@ static int g723_1_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     ret = avio_read(s->pb, pkt->data + 1, size - 1);
     if (ret < size - 1) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return ret < 0 ? ret : AVERROR_EOF;
     }
 
diff --git a/libavformat/g729dec.c b/libavformat/g729dec.c
index 794558ef..349a0143 100644
--- a/libavformat/g729dec.c
+++ b/libavformat/g729dec.c
@@ -57,7 +57,7 @@ static int g729_read_header(AVFormatContext *s)
     } else if (s->bit_rate == 8000) {
         st->codec->block_align = 10;
     } else {
-        av_log(s, AV_LOG_ERROR, "Only 8000 b/s and 6400 b/s bitrates are supported. Provided: %d b/s\n", s->bit_rate);
+        av_log(s, AV_LOG_ERROR, "Only 8000 b/s and 6400 b/s bitrates are supported. Provided: %"PRId64" b/s\n", (int64_t)s->bit_rate);
         return AVERROR_INVALIDDATA;
     }
 
diff --git a/libavformat/genh.c b/libavformat/genh.c
new file mode 100644
index 00000000..cb1a02f2
--- /dev/null
+++ b/libavformat/genh.c
@@ -0,0 +1,194 @@
+/*
+ * GENH demuxer
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avformat.h"
+#include "internal.h"
+
+typedef struct GENHDemuxContext {
+    unsigned dsp_int_type;
+    unsigned interleave_size;
+} GENHDemuxContext;
+
+static int genh_probe(AVProbeData *p)
+{
+    if (AV_RL32(p->buf) != MKTAG('G','E','N','H'))
+        return 0;
+    if (AV_RL32(p->buf+4) <= 0 || AV_RL32(p->buf+4) > 0xFFFF) // channels
+        return 0;
+
+    return AVPROBE_SCORE_MAX / 3 * 2;
+}
+
+static int genh_read_header(AVFormatContext *s)
+{
+    unsigned start_offset, header_size, codec, coef_type, coef[2];
+    GENHDemuxContext *c = s->priv_data;
+    av_unused unsigned coef_splitted[2];
+    int align, ch, ret;
+    AVStream *st;
+
+    avio_skip(s->pb, 4);
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
+    st->codec->channels    = avio_rl32(s->pb);
+    if (st->codec->channels <= 0)
+        return AVERROR_INVALIDDATA;
+    if (st->codec->channels == 1)
+        st->codec->channel_layout = AV_CH_LAYOUT_MONO;
+    else if (st->codec->channels == 2)
+        st->codec->channel_layout = AV_CH_LAYOUT_STEREO;
+    align                  =
+    c->interleave_size     = avio_rl32(s->pb);
+    if (align < 0 || align > INT_MAX / st->codec->channels)
+        return AVERROR_INVALIDDATA;
+    st->codec->block_align = align * st->codec->channels;
+    st->codec->sample_rate = avio_rl32(s->pb);
+    avio_skip(s->pb, 4);
+    st->duration = avio_rl32(s->pb);
+
+    codec = avio_rl32(s->pb);
+    switch (codec) {
+    case  0: st->codec->codec_id = AV_CODEC_ID_ADPCM_PSX;        break;
+    case  1:
+    case 11: st->codec->bits_per_coded_sample = 4;
+             st->codec->block_align = 36 * st->codec->channels;
+             st->codec->codec_id = AV_CODEC_ID_ADPCM_IMA_WAV;    break;
+    case  2: st->codec->codec_id = AV_CODEC_ID_ADPCM_DTK;        break;
+    case  3: st->codec->codec_id = st->codec->block_align > 0 ?
+                                   AV_CODEC_ID_PCM_S16BE_PLANAR :
+                                   AV_CODEC_ID_PCM_S16BE;        break;
+    case  4: st->codec->codec_id = st->codec->block_align > 0 ?
+                                   AV_CODEC_ID_PCM_S16LE_PLANAR :
+                                   AV_CODEC_ID_PCM_S16LE;        break;
+    case  5: st->codec->codec_id = st->codec->block_align > 0 ?
+                                   AV_CODEC_ID_PCM_S8_PLANAR :
+                                   AV_CODEC_ID_PCM_S8;           break;
+    case  6: st->codec->codec_id = AV_CODEC_ID_SDX2_DPCM;        break;
+    case  7: ret = ff_alloc_extradata(st->codec, 2);
+             if (ret < 0)
+                 return ret;
+             AV_WL16(st->codec->extradata, 3);
+             st->codec->codec_id = AV_CODEC_ID_ADPCM_IMA_WS;     break;
+    case 10: st->codec->codec_id = AV_CODEC_ID_ADPCM_AICA;       break;
+    case 12: st->codec->codec_id = AV_CODEC_ID_ADPCM_THP;        break;
+    case 13: st->codec->codec_id = AV_CODEC_ID_PCM_U8;           break;
+    case 17: st->codec->codec_id = AV_CODEC_ID_ADPCM_IMA_QT;     break;
+    default:
+             avpriv_request_sample(s, "codec %d", codec);
+             return AVERROR_PATCHWELCOME;
+    }
+
+    start_offset = avio_rl32(s->pb);
+    header_size  = avio_rl32(s->pb);
+
+    if (header_size > start_offset)
+        return AVERROR_INVALIDDATA;
+
+    if (header_size == 0)
+        start_offset = 0x800;
+
+    coef[0]          = avio_rl32(s->pb);
+    coef[1]          = avio_rl32(s->pb);
+    c->dsp_int_type  = avio_rl32(s->pb);
+    coef_type        = avio_rl32(s->pb);
+    coef_splitted[0] = avio_rl32(s->pb);
+    coef_splitted[1] = avio_rl32(s->pb);
+
+    if (st->codec->codec_id == AV_CODEC_ID_ADPCM_THP) {
+        if (st->codec->channels > 2) {
+            avpriv_request_sample(s, "channels %d>2", st->codec->channels);
+            return AVERROR_PATCHWELCOME;
+        }
+
+        ff_alloc_extradata(st->codec, 32 * st->codec->channels);
+        for (ch = 0; ch < st->codec->channels; ch++) {
+            if (coef_type & 1) {
+                avpriv_request_sample(s, "coef_type & 1");
+                return AVERROR_PATCHWELCOME;
+            } else {
+                avio_seek(s->pb, coef[ch], SEEK_SET);
+                avio_read(s->pb, st->codec->extradata + 32 * ch, 32);
+            }
+        }
+
+        if (c->dsp_int_type == 1) {
+            st->codec->block_align = 8 * st->codec->channels;
+            if (c->interleave_size != 1 &&
+                c->interleave_size != 2 &&
+                c->interleave_size != 4)
+                return AVERROR_INVALIDDATA;
+        }
+    }
+
+    avio_skip(s->pb, start_offset - avio_tell(s->pb));
+
+    avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
+
+    return 0;
+}
+
+static int genh_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    AVCodecContext *codec = s->streams[0]->codec;
+    GENHDemuxContext *c = s->priv_data;
+    int ret;
+
+    if (c->dsp_int_type == 1 && codec->codec_id == AV_CODEC_ID_ADPCM_THP &&
+        codec->channels > 1) {
+        int i, ch;
+
+        if (avio_feof(s->pb))
+            return AVERROR_EOF;
+        ret = av_new_packet(pkt, 8 * codec->channels);
+        if (ret < 0)
+            return ret;
+        for (i = 0; i < 8 / c->interleave_size; i++) {
+            for (ch = 0; ch < codec->channels; ch++) {
+                pkt->data[ch * 8 + i*c->interleave_size+0] = avio_r8(s->pb);
+                pkt->data[ch * 8 + i*c->interleave_size+1] = avio_r8(s->pb);
+            }
+        }
+        ret = 0;
+    } else if (codec->codec_id == AV_CODEC_ID_SDX2_DPCM) {
+        ret = av_get_packet(s->pb, pkt, codec->block_align * 1024);
+
+    } else {
+        ret = av_get_packet(s->pb, pkt, codec->block_align ? codec->block_align : 1024 * codec->channels);
+    }
+
+    pkt->stream_index = 0;
+    return ret;
+}
+
+AVInputFormat ff_genh_demuxer = {
+    .name           = "genh",
+    .long_name      = NULL_IF_CONFIG_SMALL("GENeric Header"),
+    .priv_data_size = sizeof(GENHDemuxContext),
+    .read_probe     = genh_probe,
+    .read_header    = genh_read_header,
+    .read_packet    = genh_read_packet,
+    .extensions     = "genh",
+};
diff --git a/libavformat/gif.c b/libavformat/gif.c
index e5d558df..6537e557 100644
--- a/libavformat/gif.c
+++ b/libavformat/gif.c
@@ -173,7 +173,7 @@ static int flush_packet(AVFormatContext *s, AVPacket *new)
 
     avio_write(pb, pkt->data, pkt->size);
 
-    av_free_packet(gif->prev_pkt);
+    av_packet_unref(gif->prev_pkt);
     if (new)
         av_copy_packet(gif->prev_pkt, new);
 
diff --git a/libavformat/gifdec.c b/libavformat/gifdec.c
index bb4c6ec6..48bd603d 100644
--- a/libavformat/gifdec.c
+++ b/libavformat/gifdec.c
@@ -52,6 +52,9 @@ typedef struct GIFDemuxContext {
     int total_iter;
     int iter_count;
     int ignore_loop;
+
+    int nb_frames;
+    int last_duration;
 } GIFDemuxContext;
 
 /**
@@ -279,6 +282,9 @@ static int gif_read_packet(AVFormatContext *s, AVPacket *pkt)
             pkt->stream_index = 0;
             pkt->duration = gdc->delay;
 
+            gdc->nb_frames ++;
+            gdc->last_duration = pkt->duration;
+
             /* Graphic Control Extension's scope is single frame.
              * Remove its influence. */
             gdc->delay = gdc->default_delay;
@@ -299,6 +305,9 @@ static int gif_read_packet(AVFormatContext *s, AVPacket *pkt)
     }
 
     if ((ret >= 0 && !frame_parsed) || ret == AVERROR_EOF) {
+        if (gdc->nb_frames == 1) {
+            s->streams[0]->r_frame_rate = (AVRational) {100, gdc->last_duration};
+        }
         /* This might happen when there is no image block
          * between extension blocks and GIF_TRAILER or EOF */
         if (!gdc->ignore_loop && (block_label == GIF_TRAILER || avio_feof(pb))
@@ -313,7 +322,7 @@ static const AVOption options[] = {
     { "min_delay"    , "minimum valid delay between frames (in hundredths of second)", offsetof(GIFDemuxContext, min_delay)    , AV_OPT_TYPE_INT, {.i64 = GIF_MIN_DELAY}    , 0, 100 * 60, AV_OPT_FLAG_DECODING_PARAM },
     { "max_gif_delay", "maximum valid delay between frames (in hundredths of seconds)", offsetof(GIFDemuxContext, max_delay)   , AV_OPT_TYPE_INT, {.i64 = 65535}            , 0, 65535   , AV_OPT_FLAG_DECODING_PARAM },
     { "default_delay", "default delay between frames (in hundredths of second)"      , offsetof(GIFDemuxContext, default_delay), AV_OPT_TYPE_INT, {.i64 = GIF_DEFAULT_DELAY}, 0, 100 * 60, AV_OPT_FLAG_DECODING_PARAM },
-    { "ignore_loop"  , "ignore loop setting (netscape extension)"                    , offsetof(GIFDemuxContext, ignore_loop)  , AV_OPT_TYPE_INT, {.i64 = 1}                , 0,        1, AV_OPT_FLAG_DECODING_PARAM },
+    { "ignore_loop"  , "ignore loop setting (netscape extension)"                    , offsetof(GIFDemuxContext, ignore_loop)  , AV_OPT_TYPE_BOOL,{.i64 = 1}                , 0,        1, AV_OPT_FLAG_DECODING_PARAM },
     { NULL },
 };
 
diff --git a/libavformat/gopher.c b/libavformat/gopher.c
index a5340d27..835ad7f9 100644
--- a/libavformat/gopher.c
+++ b/libavformat/gopher.c
@@ -93,8 +93,8 @@ static int gopher_open(URLContext *h, const char *uri, int flags)
     ff_url_join(buf, sizeof(buf), "tcp", NULL, hostname, port, NULL);
 
     s->hd = NULL;
-    err = ffurl_open(&s->hd, buf, AVIO_FLAG_READ_WRITE,
-                     &h->interrupt_callback, NULL);
+    err = ffurl_open_whitelist(&s->hd, buf, AVIO_FLAG_READ_WRITE,
+                               &h->interrupt_callback, NULL, h->protocol_whitelist);
     if (err < 0)
         goto fail;
 
diff --git a/libavformat/gsmdec.c b/libavformat/gsmdec.c
index a9865dbe..97dd8c58 100644
--- a/libavformat/gsmdec.c
+++ b/libavformat/gsmdec.c
@@ -45,7 +45,7 @@ static int gsm_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     ret = av_get_packet(s->pb, pkt, size);
     if (ret < GSM_BLOCK_SIZE) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return ret < 0 ? ret : AVERROR(EIO);
     }
     pkt->duration = 1;
diff --git a/libavformat/gxfenc.c b/libavformat/gxfenc.c
index 12031f7c..8229fe95 100644
--- a/libavformat/gxfenc.c
+++ b/libavformat/gxfenc.c
@@ -436,13 +436,11 @@ static int gxf_write_umf_material_description(AVFormatContext *s)
     AVIOContext *pb = s->pb;
     int timecode_base = gxf->time_base.den == 60000 ? 60 : 50;
     int64_t timestamp = 0;
-    AVDictionaryEntry *t;
     uint64_t nb_fields;
     uint32_t timecode_in; // timecode at mark in
     uint32_t timecode_out; // timecode at mark out
 
-    if (t = av_dict_get(s->metadata, "creation_time", NULL, 0))
-        timestamp = ff_iso8601_to_unix_time(t->value);
+    ff_parse_creation_time_metadata(s, &timestamp, 1);
 
     timecode_in = GXF_TIMECODE(gxf->tc.color, gxf->tc.drop,
                                gxf->tc.hh, gxf->tc.mm,
diff --git a/libavformat/hdsenc.c b/libavformat/hdsenc.c
index 575cc203..3e6e821a 100644
--- a/libavformat/hdsenc.c
+++ b/libavformat/hdsenc.c
@@ -26,6 +26,7 @@
 #endif
 
 #include "avformat.h"
+#include "avio_internal.h"
 #include "internal.h"
 #include "os_support.h"
 
@@ -139,7 +140,8 @@ static void hds_free(AVFormatContext *s)
         return;
     for (i = 0; i < s->nb_streams; i++) {
         OutputStream *os = &c->streams[i];
-        avio_closep(&os->out);
+        if (os->out)
+            ff_format_io_close(s, &os->out);
         if (os->ctx && os->ctx_inited)
             av_write_trailer(os->ctx);
         if (os->ctx)
@@ -169,8 +171,7 @@ static int write_manifest(AVFormatContext *s, int final)
 
     snprintf(filename, sizeof(filename), "%s/index.f4m", s->filename);
     snprintf(temp_filename, sizeof(temp_filename), "%s/index.f4m.tmp", s->filename);
-    ret = avio_open2(&out, temp_filename, AVIO_FLAG_WRITE,
-                     &s->interrupt_callback, NULL);
+    ret = s->io_open(s, &out, temp_filename, AVIO_FLAG_WRITE, NULL);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Unable to open %s for writing\n", temp_filename);
         return ret;
@@ -188,7 +189,7 @@ static int write_manifest(AVFormatContext *s, int final)
         int b64_size = AV_BASE64_SIZE(os->metadata_size);
         char *base64 = av_malloc(b64_size);
         if (!base64) {
-            avio_close(out);
+            ff_format_io_close(s, &out);
             return AVERROR(ENOMEM);
         }
         av_base64_encode(base64, b64_size, os->metadata, os->metadata_size);
@@ -201,7 +202,7 @@ static int write_manifest(AVFormatContext *s, int final)
     }
     avio_printf(out, "</manifest>\n");
     avio_flush(out);
-    avio_close(out);
+    ff_format_io_close(s, &out);
     return ff_rename(temp_filename, filename, s);
 }
 
@@ -238,8 +239,7 @@ static int write_abst(AVFormatContext *s, OutputStream *os, int final)
              "%s/stream%d.abst", s->filename, index);
     snprintf(temp_filename, sizeof(temp_filename),
              "%s/stream%d.abst.tmp", s->filename, index);
-    ret = avio_open2(&out, temp_filename, AVIO_FLAG_WRITE,
-                     &s->interrupt_callback, NULL);
+    ret = s->io_open(s, &out, temp_filename, AVIO_FLAG_WRITE, NULL);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Unable to open %s for writing\n", temp_filename);
         return ret;
@@ -282,15 +282,14 @@ static int write_abst(AVFormatContext *s, OutputStream *os, int final)
     }
     update_size(out, afrt_pos);
     update_size(out, 0);
-    avio_close(out);
+    ff_format_io_close(s, &out);
     return ff_rename(temp_filename, filename, s);
 }
 
 static int init_file(AVFormatContext *s, OutputStream *os, int64_t start_ts)
 {
     int ret, i;
-    ret = avio_open2(&os->out, os->temp_filename, AVIO_FLAG_WRITE,
-                     &s->interrupt_callback, NULL);
+    ret = s->io_open(s, &os->out, os->temp_filename, AVIO_FLAG_WRITE, NULL);
     if (ret < 0)
         return ret;
     avio_wb32(os->out, 0);
@@ -303,13 +302,13 @@ static int init_file(AVFormatContext *s, OutputStream *os, int64_t start_ts)
     return 0;
 }
 
-static void close_file(OutputStream *os)
+static void close_file(AVFormatContext *s, OutputStream *os)
 {
     int64_t pos = avio_tell(os->out);
     avio_seek(os->out, 0, SEEK_SET);
     avio_wb32(os->out, pos);
     avio_flush(os->out);
-    avio_closep(&os->out);
+    ff_format_io_close(s, &os->out);
 }
 
 static int hds_write_header(AVFormatContext *s)
@@ -474,7 +473,7 @@ static int hds_flush(AVFormatContext *s, OutputStream *os, int final,
 
     avio_flush(os->ctx->pb);
     os->packets_written = 0;
-    close_file(os);
+    close_file(s, os);
 
     snprintf(target_filename, sizeof(target_filename),
              "%s/stream%dSeg1-Frag%d", s->filename, index, os->fragment_index);
@@ -569,7 +568,7 @@ static const AVOption options[] = {
     { "window_size", "number of fragments kept in the manifest", OFFSET(window_size), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, E },
     { "extra_window_size", "number of fragments kept outside of the manifest before removing from disk", OFFSET(extra_window_size), AV_OPT_TYPE_INT, { .i64 = 5 }, 0, INT_MAX, E },
     { "min_frag_duration", "minimum fragment duration (in microseconds)", OFFSET(min_frag_duration), AV_OPT_TYPE_INT64, { .i64 = 10000000 }, 0, INT_MAX, E },
-    { "remove_at_exit", "remove all fragments when finished", OFFSET(remove_at_exit), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, E },
+    { "remove_at_exit", "remove all fragments when finished", OFFSET(remove_at_exit), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
     { NULL },
 };
 
diff --git a/libavformat/hevc.c b/libavformat/hevc.c
index 643b7159..7c294ef8 100644
--- a/libavformat/hevc.c
+++ b/libavformat/hevc.c
@@ -565,7 +565,10 @@ static int hvcc_parse_sps(GetBitContext *gb,
     }
 
     if (get_bits1(gb)) {                               // long_term_ref_pics_present_flag
-        for (i = 0; i < get_ue_golomb_long(gb); i++) { // num_long_term_ref_pics_sps
+        unsigned num_long_term_ref_pics_sps = get_ue_golomb_long(gb);
+        if (num_long_term_ref_pics_sps > 31U)
+            return AVERROR_INVALIDDATA;
+        for (i = 0; i < num_long_term_ref_pics_sps; i++) { // num_long_term_ref_pics_sps
             int len = FFMIN(log2_max_pic_order_cnt_lsb_minus4 + 4, 16);
             skip_bits (gb, len); // lt_ref_pic_poc_lsb_sps[i]
             skip_bits1(gb);      // used_by_curr_pic_lt_sps_flag[i]
@@ -616,11 +619,12 @@ static int hvcc_parse_pps(GetBitContext *gb,
     get_se_golomb_long(gb); // pps_cr_qp_offset
 
     /*
+     * pps_slice_chroma_qp_offsets_present_flag u(1)
      * weighted_pred_flag               u(1)
      * weighted_bipred_flag             u(1)
      * transquant_bypass_enabled_flag   u(1)
      */
-    skip_bits(gb, 3);
+    skip_bits(gb, 4);
 
     tiles_enabled_flag               = get_bits1(gb);
     entropy_coding_sync_enabled_flag = get_bits1(gb);
@@ -644,7 +648,7 @@ static uint8_t *nal_unit_extract_rbsp(const uint8_t *src, uint32_t src_len,
     uint8_t *dst;
     uint32_t i, len;
 
-    dst = av_malloc(src_len);
+    dst = av_malloc(src_len + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!dst)
         return NULL;
 
diff --git a/libavformat/hls.c b/libavformat/hls.c
index 3f1d97eb..fc1ff38e 100644
--- a/libavformat/hls.c
+++ b/libavformat/hls.c
@@ -73,6 +73,8 @@ struct segment {
     char *key;
     enum KeyType key_type;
     uint8_t iv[16];
+    /* associated Media Initialization Section, treated as a segment */
+    struct segment *init_section;
 };
 
 struct rendition;
@@ -110,6 +112,13 @@ struct playlist {
     int64_t cur_seg_offset;
     int64_t last_load_time;
 
+    /* Currently active Media Initialization Section */
+    struct segment *cur_init_section;
+    uint8_t *init_sec_buf;
+    unsigned int init_sec_buf_size;
+    unsigned int init_sec_data_len;
+    unsigned int init_sec_buf_read_offset;
+
     char key_url[MAX_URL_SIZE];
     uint8_t key[16];
 
@@ -135,6 +144,11 @@ struct playlist {
      * multiple (playlist-less) renditions associated with them. */
     int n_renditions;
     struct rendition **renditions;
+
+    /* Media Initialization Sections (EXT-X-MAP) associated with this
+     * playlist, if any. */
+    int n_init_sections;
+    struct segment **init_sections;
 };
 
 /*
@@ -166,6 +180,7 @@ struct variant {
 
 typedef struct HLSContext {
     AVClass *class;
+    AVFormatContext *ctx;
     int n_variants;
     struct variant **variants;
     int n_playlists;
@@ -182,6 +197,9 @@ typedef struct HLSContext {
     char *user_agent;                    ///< holds HTTP user agent set as an AVOption to the HTTP protocol context
     char *cookies;                       ///< holds HTTP cookie values set in either the initial response or as an AVOption to the HTTP protocol context
     char *headers;                       ///< holds HTTP headers set as an AVOption to the HTTP protocol context
+    char *http_proxy;                    ///< holds the address of the HTTP proxy server
+    AVDictionary *avio_opts;
+    int strict_std_compliance;
 } HLSContext;
 
 static int read_chomp_line(AVIOContext *s, char *buf, int maxlen)
@@ -204,17 +222,30 @@ static void free_segment_list(struct playlist *pls)
     pls->n_segments = 0;
 }
 
+static void free_init_section_list(struct playlist *pls)
+{
+    int i;
+    for (i = 0; i < pls->n_init_sections; i++) {
+        av_freep(&pls->init_sections[i]->url);
+        av_freep(&pls->init_sections[i]);
+    }
+    av_freep(&pls->init_sections);
+    pls->n_init_sections = 0;
+}
+
 static void free_playlist_list(HLSContext *c)
 {
     int i;
     for (i = 0; i < c->n_playlists; i++) {
         struct playlist *pls = c->playlists[i];
         free_segment_list(pls);
+        free_init_section_list(pls);
         av_freep(&pls->renditions);
         av_freep(&pls->id3_buf);
         av_dict_free(&pls->id3_initial);
         ff_id3v2_free_extra_meta(&pls->id3_deferred_extra);
-        av_free_packet(&pls->pkt);
+        av_freep(&pls->init_sec_buf);
+        av_packet_unref(&pls->pkt);
         av_freep(&pls->pb.buffer);
         if (pls->input)
             ffurl_close(pls->input);
@@ -227,6 +258,8 @@ static void free_playlist_list(HLSContext *c)
     av_freep(&c->playlists);
     av_freep(&c->cookies);
     av_freep(&c->user_agent);
+    av_freep(&c->headers);
+    av_freep(&c->http_proxy);
     c->n_playlists = 0;
 }
 
@@ -351,6 +384,60 @@ static void handle_key_args(struct key_info *info, const char *key,
     }
 }
 
+struct init_section_info {
+    char uri[MAX_URL_SIZE];
+    char byterange[32];
+};
+
+static struct segment *new_init_section(struct playlist *pls,
+                                        struct init_section_info *info,
+                                        const char *url_base)
+{
+    struct segment *sec;
+    char *ptr;
+    char tmp_str[MAX_URL_SIZE];
+
+    if (!info->uri[0])
+        return NULL;
+
+    sec = av_mallocz(sizeof(*sec));
+    if (!sec)
+        return NULL;
+
+    ff_make_absolute_url(tmp_str, sizeof(tmp_str), url_base, info->uri);
+    sec->url = av_strdup(tmp_str);
+    if (!sec->url) {
+        av_free(sec);
+        return NULL;
+    }
+
+    if (info->byterange[0]) {
+        sec->size = atoi(info->byterange);
+        ptr = strchr(info->byterange, '@');
+        if (ptr)
+            sec->url_offset = atoi(ptr+1);
+    } else {
+        /* the entire file is the init section */
+        sec->size = -1;
+    }
+
+    dynarray_add(&pls->init_sections, &pls->n_init_sections, sec);
+
+    return sec;
+}
+
+static void handle_init_section_args(struct init_section_info *info, const char *key,
+                                           int key_len, char **dest, int *dest_len)
+{
+    if (!strncmp(key, "URI=", key_len)) {
+        *dest     =        info->uri;
+        *dest_len = sizeof(info->uri);
+    } else if (!strncmp(key, "BYTERANGE=", key_len)) {
+        *dest     =        info->byterange;
+        *dest_len = sizeof(info->byterange);
+    }
+}
+
 struct rendition_info {
     char type[16];
     char uri[MAX_URL_SIZE];
@@ -391,8 +478,9 @@ static struct rendition *new_rendition(HLSContext *c, struct rendition_info *inf
         return NULL;
 
     /* TODO: handle subtitles (each segment has to parsed separately) */
-    if (type == AVMEDIA_TYPE_SUBTITLE)
-        return NULL;
+    if (c->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL)
+        if (type == AVMEDIA_TYPE_SUBTITLE)
+            return NULL;
 
     rend = av_mallocz(sizeof(struct rendition));
     if (!rend)
@@ -481,7 +569,9 @@ static void handle_rendition_args(struct rendition_info *info, const char *key,
 /* used by parse_playlist to allocate a new variant+playlist when the
  * playlist is detected to be a Media Playlist (not Master Playlist)
  * and we have no parent Master Playlist (parsing of which would have
- * allocated the variant and playlist already) */
+ * allocated the variant and playlist already)
+ * *pls == NULL  => Master Playlist or parentless Media Playlist
+ * *pls != NULL => parented Media Playlist, playlist+variant allocated */
 static int ensure_playlist(HLSContext *c, struct playlist **pls, const char *url)
 {
     if (*pls)
@@ -492,8 +582,72 @@ static int ensure_playlist(HLSContext *c, struct playlist **pls, const char *url
     return 0;
 }
 
-/* pls = NULL  => Master Playlist or parentless Media Playlist
- * pls = !NULL => parented Media Playlist, playlist+variant allocated */
+static int url_connect(struct playlist *pls, AVDictionary *opts, AVDictionary *opts2)
+{
+    AVDictionary *tmp = NULL;
+    int ret;
+
+    av_dict_copy(&tmp, opts, 0);
+    av_dict_copy(&tmp, opts2, 0);
+
+    if (pls->parent->protocol_whitelist) {
+        pls->input->protocol_whitelist =  av_strdup(pls->parent->protocol_whitelist);
+        if (!pls->input->protocol_whitelist) {
+            av_dict_free(&tmp);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    if ((ret = ffurl_connect(pls->input, &tmp)) < 0) {
+        ffurl_close(pls->input);
+        pls->input = NULL;
+    }
+
+    av_dict_free(&tmp);
+    return ret;
+}
+
+static void update_options(char **dest, const char *name, void *src)
+{
+    av_freep(dest);
+    av_opt_get(src, name, 0, (uint8_t**)dest);
+    if (*dest && !strlen(*dest))
+        av_freep(dest);
+}
+
+static int open_url(HLSContext *c, URLContext **uc, const char *url, AVDictionary *opts)
+{
+    AVDictionary *tmp = NULL;
+    int ret;
+    const char *proto_name = avio_find_protocol_name(url);
+
+    if (!proto_name)
+        return AVERROR_INVALIDDATA;
+
+    // only http(s) & file are allowed
+    if (!av_strstart(proto_name, "http", NULL) && !av_strstart(proto_name, "file", NULL))
+        return AVERROR_INVALIDDATA;
+    if (!strncmp(proto_name, url, strlen(proto_name)) && url[strlen(proto_name)] == ':')
+        ;
+    else if (strcmp(proto_name, "file") || !strncmp(url, "file,", 5))
+        return AVERROR_INVALIDDATA;
+
+    av_dict_copy(&tmp, c->avio_opts, 0);
+    av_dict_copy(&tmp, opts, 0);
+
+    ret = ffurl_open_whitelist(uc, url, AVIO_FLAG_READ, c->interrupt_callback, &tmp, c->ctx->protocol_whitelist);
+    if( ret >= 0) {
+        // update cookies on http response with setcookies.
+        URLContext *u = *uc;
+        update_options(&c->cookies, "cookies", u->priv_data);
+        av_dict_set(&opts, "cookies", c->cookies, 0);
+    }
+
+    av_dict_free(&tmp);
+
+    return ret;
+}
+
 static int parse_playlist(HLSContext *c, const char *url,
                           struct playlist *pls, AVIOContext *in)
 {
@@ -511,8 +665,10 @@ static int parse_playlist(HLSContext *c, const char *url,
     uint8_t *new_url = NULL;
     struct variant_info variant_info;
     char tmp_str[MAX_URL_SIZE];
+    struct segment *cur_init_section = NULL;
 
     if (!in) {
+#if 1
         AVDictionary *opts = NULL;
         close_in = 1;
         /* Some HLS servers don't like being sent the range header */
@@ -522,12 +678,18 @@ static int parse_playlist(HLSContext *c, const char *url,
         av_dict_set(&opts, "user-agent", c->user_agent, 0);
         av_dict_set(&opts, "cookies", c->cookies, 0);
         av_dict_set(&opts, "headers", c->headers, 0);
+        av_dict_set(&opts, "http_proxy", c->http_proxy, 0);
 
-        ret = avio_open2(&in, url, AVIO_FLAG_READ,
-                         c->interrupt_callback, &opts);
+        ret = c->ctx->io_open(c->ctx, &in, url, AVIO_FLAG_READ, &opts);
         av_dict_free(&opts);
         if (ret < 0)
             return ret;
+#else
+        ret = open_in(c, &in, url);
+        if (ret < 0)
+            return ret;
+        close_in = 1;
+#endif
     }
 
     if (av_opt_get(in, "location", AV_OPT_SEARCH_CHILDREN, &new_url) >= 0)
@@ -589,6 +751,14 @@ static int parse_playlist(HLSContext *c, const char *url,
                 pls->type = PLS_TYPE_EVENT;
             else if (!strcmp(ptr, "VOD"))
                 pls->type = PLS_TYPE_VOD;
+        } else if (av_strstart(line, "#EXT-X-MAP:", &ptr)) {
+            struct init_section_info info = {{0}};
+            ret = ensure_playlist(c, &pls, url);
+            if (ret < 0)
+                goto fail;
+            ff_parse_key_value(ptr, (ff_parse_key_val_cb) handle_init_section_args,
+                               &info);
+            cur_init_section = new_init_section(pls, &info, url);
         } else if (av_strstart(line, "#EXT-X-ENDLIST", &ptr)) {
             if (pls)
                 pls->finished = 1;
@@ -667,6 +837,8 @@ static int parse_playlist(HLSContext *c, const char *url,
                     seg->url_offset = 0;
                     seg_offset = 0;
                 }
+
+                seg->init_section = cur_init_section;
             }
         }
     }
@@ -676,21 +848,26 @@ static int parse_playlist(HLSContext *c, const char *url,
 fail:
     av_free(new_url);
     if (close_in)
-        avio_close(in);
+        ff_format_io_close(c->ctx, &in);
     return ret;
 }
 
+static struct segment *current_segment(struct playlist *pls)
+{
+    return pls->segments[pls->cur_seq_no - pls->start_seq_no];
+}
+
 enum ReadFromURLMode {
     READ_NORMAL,
     READ_COMPLETE,
 };
 
 /* read from URLContext, limiting read to current segment */
-static int read_from_url(struct playlist *pls, uint8_t *buf, int buf_size,
+static int read_from_url(struct playlist *pls, struct segment *seg,
+                         uint8_t *buf, int buf_size,
                          enum ReadFromURLMode mode)
 {
     int ret;
-    struct segment *seg = pls->segments[pls->cur_seq_no - pls->start_seq_no];
 
      /* limit read if the segment was only a part of a file */
     if (seg->size >= 0)
@@ -752,7 +929,7 @@ static int id3_has_changed_values(struct playlist *pls, AVDictionary *metadata,
 
     if (apic) {
         int size = pls->ctx->streams[1]->attached_pic.size;
-        if (size != apic->buf->size - FF_INPUT_BUFFER_PADDING_SIZE)
+        if (size != apic->buf->size - AV_INPUT_BUFFER_PADDING_SIZE)
             return 1;
 
         if (memcmp(apic->buf->data, pls->ctx->streams[1]->attached_pic.data, size) != 0)
@@ -813,12 +990,13 @@ static void intercept_id3(struct playlist *pls, uint8_t *buf,
     int bytes;
     int id3_buf_pos = 0;
     int fill_buf = 0;
+    struct segment *seg = current_segment(pls);
 
     /* gather all the id3 tags */
     while (1) {
         /* see if we can retrieve enough data for ID3 header */
         if (*len < ID3v2_HEADER_SIZE && buf_size >= ID3v2_HEADER_SIZE) {
-            bytes = read_from_url(pls, buf + *len, ID3v2_HEADER_SIZE - *len, READ_COMPLETE);
+            bytes = read_from_url(pls, seg, buf + *len, ID3v2_HEADER_SIZE - *len, READ_COMPLETE);
             if (bytes > 0) {
 
                 if (bytes == ID3v2_HEADER_SIZE - *len)
@@ -839,7 +1017,6 @@ static void intercept_id3(struct playlist *pls, uint8_t *buf,
             break;
 
         if (ff_id3v2_match(buf, ID3v2_DEFAULT_MAGIC)) {
-            struct segment *seg = pls->segments[pls->cur_seq_no - pls->start_seq_no];
             int64_t maxsize = seg->size >= 0 ? seg->size : 1024*1024;
             int taglen = ff_id3v2_tag_len(buf);
             int tag_got_bytes = FFMIN(taglen, *len);
@@ -871,7 +1048,7 @@ static void intercept_id3(struct playlist *pls, uint8_t *buf,
 
             if (remaining > 0) {
                 /* read the rest of the tag in */
-                if (read_from_url(pls, pls->id3_buf + id3_buf_pos, remaining, READ_COMPLETE) != remaining)
+                if (read_from_url(pls, seg, pls->id3_buf + id3_buf_pos, remaining, READ_COMPLETE) != remaining)
                     break;
                 id3_buf_pos += remaining;
                 av_log(pls->ctx, AV_LOG_DEBUG, "Stripped additional %d HLS ID3 bytes\n", remaining);
@@ -885,7 +1062,7 @@ static void intercept_id3(struct playlist *pls, uint8_t *buf,
 
     /* re-fill buffer for the caller unless EOF */
     if (*len >= 0 && (fill_buf || *len == 0)) {
-        bytes = read_from_url(pls, buf + *len, buf_size - *len, READ_NORMAL);
+        bytes = read_from_url(pls, seg, buf + *len, buf_size - *len, READ_NORMAL);
 
         /* ignore error if we already had some data */
         if (bytes >= 0)
@@ -905,30 +1082,18 @@ static void intercept_id3(struct playlist *pls, uint8_t *buf,
         pls->is_id3_timestamped = (pls->id3_mpegts_timestamp != AV_NOPTS_VALUE);
 }
 
-static void update_options(char **dest, const char *name, void *src)
-{
-    av_freep(dest);
-    av_opt_get(src, name, 0, (uint8_t**)dest);
-    if (*dest && !strlen(*dest))
-        av_freep(dest);
-}
-
-static int open_input(HLSContext *c, struct playlist *pls)
+static int open_input(HLSContext *c, struct playlist *pls, struct segment *seg)
 {
     AVDictionary *opts = NULL;
-    AVDictionary *opts2 = NULL;
     int ret;
-    struct segment *seg = pls->segments[pls->cur_seq_no - pls->start_seq_no];
 
     // broker prior HTTP options that should be consistent across requests
     av_dict_set(&opts, "user-agent", c->user_agent, 0);
     av_dict_set(&opts, "cookies", c->cookies, 0);
     av_dict_set(&opts, "headers", c->headers, 0);
+    av_dict_set(&opts, "http_proxy", c->http_proxy, 0);
     av_dict_set(&opts, "seekable", "0", 0);
 
-    // Same opts for key request (ffurl_open mutilates the opts so it cannot be used twice)
-    av_dict_copy(&opts2, opts, 0);
-
     if (seg->size >= 0) {
         /* try to restrict the HTTP request to the part we want
          * (if this is in fact a HTTP request) */
@@ -940,22 +1105,18 @@ static int open_input(HLSContext *c, struct playlist *pls)
            seg->url, seg->url_offset, pls->index);
 
     if (seg->key_type == KEY_NONE) {
-        ret = ffurl_open(&pls->input, seg->url, AVIO_FLAG_READ,
-                          &pls->parent->interrupt_callback, &opts);
-
+        ret = open_url(pls->parent->priv_data, &pls->input, seg->url, opts);
     } else if (seg->key_type == KEY_AES_128) {
+//         HLSContext *c = var->parent->priv_data;
         char iv[33], key[33], url[MAX_URL_SIZE];
         if (strcmp(seg->key, pls->key_url)) {
             URLContext *uc;
-            if (ffurl_open(&uc, seg->key, AVIO_FLAG_READ,
-                           &pls->parent->interrupt_callback, &opts2) == 0) {
+            if (open_url(pls->parent->priv_data, &uc, seg->key, opts) == 0) {
                 if (ffurl_read_complete(uc, pls->key, sizeof(pls->key))
                     != sizeof(pls->key)) {
                     av_log(NULL, AV_LOG_ERROR, "Unable to read key file %s\n",
                            seg->key);
                 }
-                update_options(&c->cookies, "cookies", uc->priv_data);
-                av_dict_set(&opts, "cookies", c->cookies, 0);
                 ffurl_close(uc);
             } else {
                 av_log(NULL, AV_LOG_ERROR, "Unable to open key file %s\n",
@@ -970,15 +1131,14 @@ static int open_input(HLSContext *c, struct playlist *pls)
             snprintf(url, sizeof(url), "crypto+%s", seg->url);
         else
             snprintf(url, sizeof(url), "crypto:%s", seg->url);
+
         if ((ret = ffurl_alloc(&pls->input, url, AVIO_FLAG_READ,
                                &pls->parent->interrupt_callback)) < 0)
             goto cleanup;
         av_opt_set(pls->input->priv_data, "key", key, 0);
         av_opt_set(pls->input->priv_data, "iv", iv, 0);
 
-        if ((ret = ffurl_connect(pls->input, &opts)) < 0) {
-            ffurl_close(pls->input);
-            pls->input = NULL;
+        if ((ret = url_connect(pls, c->avio_opts, opts)) < 0) {
             goto cleanup;
         }
         ret = 0;
@@ -993,7 +1153,7 @@ static int open_input(HLSContext *c, struct playlist *pls)
     /* Seek to the requested position. If this was a HTTP request, the offset
      * should already be where want it to, but this allows e.g. local testing
      * without a HTTP server. */
-    if (ret == 0 && seg->key_type == KEY_NONE) {
+    if (ret == 0 && seg->key_type == KEY_NONE && seg->url_offset) {
         int seekret = ffurl_seek(pls->input, seg->url_offset, SEEK_SET);
         if (seekret < 0) {
             av_log(pls->parent, AV_LOG_ERROR, "Unable to seek to offset %"PRId64" of HLS segment '%s'\n", seg->url_offset, seg->url);
@@ -1005,11 +1165,70 @@ static int open_input(HLSContext *c, struct playlist *pls)
 
 cleanup:
     av_dict_free(&opts);
-    av_dict_free(&opts2);
     pls->cur_seg_offset = 0;
     return ret;
 }
 
+static int update_init_section(struct playlist *pls, struct segment *seg)
+{
+    static const int max_init_section_size = 1024*1024;
+    HLSContext *c = pls->parent->priv_data;
+    int64_t sec_size;
+    int64_t urlsize;
+    int ret;
+
+    if (seg->init_section == pls->cur_init_section)
+        return 0;
+
+    pls->cur_init_section = NULL;
+
+    if (!seg->init_section)
+        return 0;
+
+    /* this will clobber playlist URLContext stuff, so this should be
+     * called between segments only */
+    ret = open_input(c, pls, seg->init_section);
+    if (ret < 0) {
+        av_log(pls->parent, AV_LOG_WARNING,
+               "Failed to open an initialization section in playlist %d\n",
+               pls->index);
+        return ret;
+    }
+
+    if (seg->init_section->size >= 0)
+        sec_size = seg->init_section->size;
+    else if ((urlsize = ffurl_size(pls->input)) >= 0)
+        sec_size = urlsize;
+    else
+        sec_size = max_init_section_size;
+
+    av_log(pls->parent, AV_LOG_DEBUG,
+           "Downloading an initialization section of size %"PRId64"\n",
+           sec_size);
+
+    sec_size = FFMIN(sec_size, max_init_section_size);
+
+    av_fast_malloc(&pls->init_sec_buf, &pls->init_sec_buf_size, sec_size);
+
+    ret = read_from_url(pls, seg->init_section, pls->init_sec_buf,
+                        pls->init_sec_buf_size, READ_COMPLETE);
+    ffurl_close(pls->input);
+    pls->input = NULL;
+
+    if (ret < 0)
+        return ret;
+
+    pls->cur_init_section = seg->init_section;
+    pls->init_sec_data_len = ret;
+    pls->init_sec_buf_read_offset = 0;
+
+    /* spec says audio elementary streams do not have media initialization
+     * sections, so there should be no ID3 timestamps */
+    pls->is_id3_timestamped = 0;
+
+    return 0;
+}
+
 static int64_t default_reload_interval(struct playlist *pls)
 {
     return pls->n_segments > 0 ?
@@ -1030,6 +1249,7 @@ static int read_data(void *opaque, uint8_t *buf, int buf_size)
 
     if (!v->input) {
         int64_t reload_interval;
+        struct segment *seg;
 
         /* Check that the playlist is still needed before opening a new
          * segment. */
@@ -1083,8 +1303,17 @@ static int read_data(void *opaque, uint8_t *buf, int buf_size)
             goto reload;
         }
 
-        ret = open_input(c, v);
+        seg = current_segment(v);
+
+        /* load/update Media Initialization Section, if any */
+        ret = update_init_section(v, seg);
+        if (ret)
+            return ret;
+
+        ret = open_input(c, v, seg);
         if (ret < 0) {
+            if (ff_check_interrupt(c->interrupt_callback))
+                return AVERROR_EXIT;
             av_log(v->parent, AV_LOG_WARNING, "Failed to open segment of playlist %d\n",
                    v->index);
             v->cur_seq_no += 1;
@@ -1093,7 +1322,15 @@ static int read_data(void *opaque, uint8_t *buf, int buf_size)
         just_opened = 1;
     }
 
-    ret = read_from_url(v, buf, buf_size, READ_NORMAL);
+    if (v->init_sec_buf_read_offset < v->init_sec_data_len) {
+        /* Push init section out first before first actual segment */
+        int copy_size = FFMIN(v->init_sec_data_len - v->init_sec_buf_read_offset, buf_size);
+        memcpy(buf, v->init_sec_buf, copy_size);
+        v->init_sec_buf_read_offset += copy_size;
+        return copy_size;
+    }
+
+    ret = read_from_url(v, current_segment(v), buf, buf_size, READ_NORMAL);
     if (ret > 0) {
         if (just_opened && v->is_id3_timestamped != 0) {
             /* Intercept ID3 tags here, elementary audio streams are required
@@ -1252,13 +1489,37 @@ static int select_cur_seq_no(HLSContext *c, struct playlist *pls)
     return pls->start_seq_no;
 }
 
+static int save_avio_options(AVFormatContext *s)
+{
+    HLSContext *c = s->priv_data;
+    const char *opts[] = {
+        "headers", "http_proxy", "user_agent", "user-agent", "cookies", NULL };
+    const char **opt = opts;
+    uint8_t *buf;
+    int ret = 0;
+
+    while (*opt) {
+        if (av_opt_get(s->pb, *opt, AV_OPT_SEARCH_CHILDREN | AV_OPT_ALLOW_NULL, &buf) >= 0) {
+            ret = av_dict_set(&c->avio_opts, *opt, buf,
+                              AV_DICT_DONT_STRDUP_VAL);
+            if (ret < 0)
+                return ret;
+        }
+        opt++;
+    }
+
+    return ret;
+}
+
 static int hls_read_header(AVFormatContext *s)
 {
     URLContext *u = (s->flags & AVFMT_FLAG_CUSTOM_IO) ? NULL : s->pb->opaque;
     HLSContext *c = s->priv_data;
     int ret = 0, i, j, stream_offset = 0;
 
+    c->ctx                = s;
     c->interrupt_callback = &s->interrupt_callback;
+    c->strict_std_compliance = s->strict_std_compliance;
 
     c->first_packet = 1;
     c->first_timestamp = AV_NOPTS_VALUE;
@@ -1274,11 +1535,20 @@ static int hls_read_header(AVFormatContext *s)
 
         // get the previous headers & set back to null if string size is zero
         update_options(&c->headers, "headers", u->priv_data);
+
+        // get the previous http proxt & set back to null if string size is zero
+        update_options(&c->http_proxy, "http_proxy", u->priv_data);
     }
 
     if ((ret = parse_playlist(c, s->filename, NULL, s->pb)) < 0)
         goto fail;
 
+    if ((ret = save_avio_options(s)) < 0)
+        goto fail;
+
+    /* Some HLS servers don't like being sent the range header */
+    av_dict_set(&c->avio_opts, "seekable", "0", 0);
+
     if (c->n_variants == 0) {
         av_log(NULL, AV_LOG_WARNING, "Empty playlist\n");
         ret = AVERROR_EOF;
@@ -1429,7 +1699,7 @@ static int hls_read_header(AVFormatContext *s)
             for (k = 0; k < pls->ctx->nb_streams; k++) {
                 struct AVStream *st = s->streams[pls->stream_offset + k];
 
-                ff_program_add_stream_index(s, i, pls->stream_offset + k);
+                av_program_add_stream_index(s, i, pls->stream_offset + k);
 
                 /* Set variant_bitrate for streams unique to this variant */
                 if (!is_shared && v->bandwidth)
@@ -1535,6 +1805,7 @@ static int hls_read_packet(AVFormatContext *s, AVPacket *pkt)
     int ret, i, minplaylist = -1;
 
     recheck_discard_flags(s, c->first_packet);
+    c->first_packet = 0;
 
     for (i = 0; i < c->n_playlists; i++) {
         struct playlist *pls = c->playlists[i];
@@ -1584,7 +1855,7 @@ static int hls_read_packet(AVFormatContext *s, AVPacket *pkt)
                         break;
                     }
                 }
-                av_free_packet(&pls->pkt);
+                av_packet_unref(&pls->pkt);
                 reset_packet(&pls->pkt);
             }
         }
@@ -1629,6 +1900,9 @@ static int hls_close(AVFormatContext *s)
     free_playlist_list(c);
     free_variant_list(c);
     free_rendition_list(c);
+
+    av_dict_free(&c->avio_opts);
+
     return 0;
 }
 
@@ -1683,7 +1957,7 @@ static int hls_read_seek(AVFormatContext *s, int stream_index,
             ffurl_close(pls->input);
             pls->input = NULL;
         }
-        av_free_packet(&pls->pkt);
+        av_packet_unref(&pls->pkt);
         reset_packet(&pls->pkt);
         pls->pb.eof_reached = 0;
         /* Clear any buffered data */
@@ -1718,6 +1992,7 @@ static int hls_probe(AVProbeData *p)
      * somewhere for a proper match. */
     if (strncmp(p->buf, "#EXTM3U", 7))
         return 0;
+
     if (strstr(p->buf, "#EXT-X-STREAM-INF:")     ||
         strstr(p->buf, "#EXT-X-TARGETDURATION:") ||
         strstr(p->buf, "#EXT-X-MEDIA-SEQUENCE:"))
@@ -1729,7 +2004,7 @@ static int hls_probe(AVProbeData *p)
 #define FLAGS AV_OPT_FLAG_DECODING_PARAM
 static const AVOption hls_options[] = {
     {"live_start_index", "segment index to start live streams at (negative values are from the end)",
-        OFFSET(live_start_index), FF_OPT_TYPE_INT, {.i64 = -3}, INT_MIN, INT_MAX, FLAGS},
+        OFFSET(live_start_index), AV_OPT_TYPE_INT, {.i64 = -3}, INT_MIN, INT_MAX, FLAGS},
     {NULL}
 };
 
diff --git a/libavformat/hlsenc.c b/libavformat/hlsenc.c
index 4d9466c9..85a0907e 100644
--- a/libavformat/hlsenc.c
+++ b/libavformat/hlsenc.c
@@ -32,17 +32,26 @@
 #include "libavutil/avstring.h"
 #include "libavutil/opt.h"
 #include "libavutil/log.h"
+#include "libavutil/time_internal.h"
 
 #include "avformat.h"
+#include "avio_internal.h"
 #include "internal.h"
 #include "os_support.h"
 
+#define KEYSIZE 16
+#define LINE_BUFFER_SIZE 1024
+
 typedef struct HLSSegment {
     char filename[1024];
+    char sub_filename[1024];
     double duration; /* in seconds */
     int64_t pos;
     int64_t size;
 
+    char key_uri[LINE_BUFFER_SIZE + 1];
+    char iv_string[KEYSIZE*2 + 1];
+
     struct HLSSegment *next;
 } HLSSegment;
 
@@ -61,8 +70,10 @@ typedef struct HLSContext {
     int64_t sequence;
     int64_t start_sequence;
     AVOutputFormat *oformat;
+    AVOutputFormat *vtt_oformat;
 
     AVFormatContext *avf;
+    AVFormatContext *vtt_avf;
 
     float time;            // Set by a private option.
     int max_nb_segments;   // Set by a private option.
@@ -70,9 +81,11 @@ typedef struct HLSContext {
     uint32_t flags;        // enum HLSFlags
     char *segment_filename;
 
+    int use_localtime;      ///< flag to expand filename with localtime
     int allowcache;
     int64_t recording_time;
     int has_video;
+    int has_subtitle;
     int64_t start_pts;
     int64_t end_pts;
     double duration;      // last segment duration computed so far, in seconds
@@ -86,17 +99,32 @@ typedef struct HLSContext {
     HLSSegment *old_segments;
 
     char *basename;
+    char *vtt_basename;
+    char *vtt_m3u8_name;
     char *baseurl;
     char *format_options_str;
+    char *vtt_format_options_str;
+    char *subtitle_filename;
     AVDictionary *format_options;
+
+    char *key_info_file;
+    char key_file[LINE_BUFFER_SIZE + 1];
+    char key_uri[LINE_BUFFER_SIZE + 1];
+    char key_string[KEYSIZE*2 + 1];
+    char iv_string[KEYSIZE*2 + 1];
+    AVDictionary *vtt_format_options;
+
+    char *method;
+
 } HLSContext;
 
 static int hls_delete_old_segments(HLSContext *hls) {
 
     HLSSegment *segment, *previous_segment = NULL;
     float playlist_duration = 0.0f;
-    int ret = 0, path_size;
-    char *dirname = NULL, *p, *path;
+    int ret = 0, path_size, sub_path_size;
+    char *dirname = NULL, *p, *sub_path;
+    char *path = NULL;
 
     segment = hls->segments;
     while (segment) {
@@ -138,28 +166,100 @@ static int hls_delete_old_segments(HLSContext *hls) {
             ret = AVERROR(ENOMEM);
             goto fail;
         }
+
         av_strlcpy(path, dirname, path_size);
         av_strlcat(path, segment->filename, path_size);
         if (unlink(path) < 0) {
             av_log(hls, AV_LOG_ERROR, "failed to delete old segment %s: %s\n",
                                      path, strerror(errno));
         }
-        av_free(path);
+
+        if (segment->sub_filename[0] != '\0') {
+            sub_path_size = strlen(dirname) + strlen(segment->sub_filename) + 1;
+            sub_path = av_malloc(sub_path_size);
+            if (!sub_path) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            av_strlcpy(sub_path, dirname, sub_path_size);
+            av_strlcat(sub_path, segment->sub_filename, sub_path_size);
+            if (unlink(sub_path) < 0) {
+                av_log(hls, AV_LOG_ERROR, "failed to delete old segment %s: %s\n",
+                                         sub_path, strerror(errno));
+            }
+            av_free(sub_path);
+        }
+        av_freep(&path);
         previous_segment = segment;
         segment = previous_segment->next;
         av_free(previous_segment);
     }
 
 fail:
+    av_free(path);
     av_free(dirname);
 
     return ret;
 }
 
+static int hls_encryption_start(AVFormatContext *s)
+{
+    HLSContext *hls = s->priv_data;
+    int ret;
+    AVIOContext *pb;
+    uint8_t key[KEYSIZE];
+
+    if ((ret = s->io_open(s, &pb, hls->key_info_file, AVIO_FLAG_READ, NULL)) < 0) {
+        av_log(hls, AV_LOG_ERROR,
+                "error opening key info file %s\n", hls->key_info_file);
+        return ret;
+    }
+
+    ff_get_line(pb, hls->key_uri, sizeof(hls->key_uri));
+    hls->key_uri[strcspn(hls->key_uri, "\r\n")] = '\0';
+
+    ff_get_line(pb, hls->key_file, sizeof(hls->key_file));
+    hls->key_file[strcspn(hls->key_file, "\r\n")] = '\0';
+
+    ff_get_line(pb, hls->iv_string, sizeof(hls->iv_string));
+    hls->iv_string[strcspn(hls->iv_string, "\r\n")] = '\0';
+
+    ff_format_io_close(s, &pb);
+
+    if (!*hls->key_uri) {
+        av_log(hls, AV_LOG_ERROR, "no key URI specified in key info file\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!*hls->key_file) {
+        av_log(hls, AV_LOG_ERROR, "no key file specified in key info file\n");
+        return AVERROR(EINVAL);
+    }
+
+    if ((ret = s->io_open(s, &pb, hls->key_file, AVIO_FLAG_READ, NULL)) < 0) {
+        av_log(hls, AV_LOG_ERROR, "error opening key file %s\n", hls->key_file);
+        return ret;
+    }
+
+    ret = avio_read(pb, key, sizeof(key));
+    ff_format_io_close(s, &pb);
+    if (ret != sizeof(key)) {
+        av_log(hls, AV_LOG_ERROR, "error reading key file %s\n", hls->key_file);
+        if (ret >= 0 || ret == AVERROR_EOF)
+            ret = AVERROR(EINVAL);
+        return ret;
+    }
+    ff_data_to_hex(hls->key_string, key, sizeof(key), 0);
+
+    return 0;
+}
+
 static int hls_mux_init(AVFormatContext *s)
 {
     HLSContext *hls = s->priv_data;
     AVFormatContext *oc;
+    AVFormatContext *vtt_oc = NULL;
     int i, ret;
 
     ret = avformat_alloc_output_context2(&hls->avf, hls->oformat, NULL, NULL);
@@ -170,11 +270,29 @@ static int hls_mux_init(AVFormatContext *s)
     oc->oformat            = hls->oformat;
     oc->interrupt_callback = s->interrupt_callback;
     oc->max_delay          = s->max_delay;
+    oc->opaque             = s->opaque;
+    oc->io_open            = s->io_open;
+    oc->io_close           = s->io_close;
     av_dict_copy(&oc->metadata, s->metadata, 0);
 
+    if(hls->vtt_oformat) {
+        ret = avformat_alloc_output_context2(&hls->vtt_avf, hls->vtt_oformat, NULL, NULL);
+        if (ret < 0)
+            return ret;
+        vtt_oc          = hls->vtt_avf;
+        vtt_oc->oformat = hls->vtt_oformat;
+        av_dict_copy(&vtt_oc->metadata, s->metadata, 0);
+    }
+
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *st;
-        if (!(st = avformat_new_stream(oc, NULL)))
+        AVFormatContext *loc;
+        if (s->streams[i]->codec->codec_type == AVMEDIA_TYPE_SUBTITLE)
+            loc = vtt_oc;
+        else
+            loc = oc;
+
+        if (!(st = avformat_new_stream(loc, NULL)))
             return AVERROR(ENOMEM);
         avcodec_copy_context(st->codec, s->streams[i]->codec);
         st->sample_aspect_ratio = s->streams[i]->sample_aspect_ratio;
@@ -197,11 +315,21 @@ static int hls_append_segment(HLSContext *hls, double duration, int64_t pos,
 
     av_strlcpy(en->filename, av_basename(hls->avf->filename), sizeof(en->filename));
 
+    if(hls->has_subtitle)
+        av_strlcpy(en->sub_filename, av_basename(hls->vtt_avf->filename), sizeof(en->sub_filename));
+    else
+        en->sub_filename[0] = '\0';
+
     en->duration = duration;
     en->pos      = pos;
     en->size     = size;
     en->next     = NULL;
 
+    if (hls->key_info_file) {
+        av_strlcpy(en->key_uri, hls->key_uri, sizeof(en->key_uri));
+        av_strlcpy(en->iv_string, hls->iv_string, sizeof(en->iv_string));
+    }
+
     if (!hls->segments)
         hls->segments = en;
     else
@@ -239,6 +367,12 @@ static void hls_free_segments(HLSSegment *p)
     }
 }
 
+static void set_http_options(AVDictionary **options, HLSContext *c)
+{
+    if (c->method)
+        av_dict_set(options, "method", c->method, 0);
+}
+
 static int hls_window(AVFormatContext *s, int last)
 {
     HLSContext *hls = s->priv_data;
@@ -246,19 +380,23 @@ static int hls_window(AVFormatContext *s, int last)
     int target_duration = 0;
     int ret = 0;
     AVIOContext *out = NULL;
+    AVIOContext *sub_out = NULL;
     char temp_filename[1024];
     int64_t sequence = FFMAX(hls->start_sequence, hls->sequence - hls->nb_entries);
     int version = hls->flags & HLS_SINGLE_FILE ? 4 : 3;
     const char *proto = avio_find_protocol_name(s->filename);
     int use_rename = proto && !strcmp(proto, "file");
     static unsigned warned_non_file;
+    char *key_uri = NULL;
+    char *iv_string = NULL;
+    AVDictionary *options = NULL;
 
     if (!use_rename && !warned_non_file++)
         av_log(s, AV_LOG_ERROR, "Cannot use rename on non file protocol, this may lead to races and temporarly partial files\n");
 
+    set_http_options(&options, hls);
     snprintf(temp_filename, sizeof(temp_filename), use_rename ? "%s.tmp" : "%s", s->filename);
-    if ((ret = avio_open2(&out, temp_filename, AVIO_FLAG_WRITE,
-                          &s->interrupt_callback, NULL)) < 0)
+    if ((ret = s->io_open(s, &out, temp_filename, AVIO_FLAG_WRITE, &options)) < 0)
         goto fail;
 
     for (en = hls->segments; en; en = en->next) {
@@ -282,8 +420,18 @@ static int hls_window(AVFormatContext *s, int last)
         hls->discontinuity_set = 1;
     }
     for (en = hls->segments; en; en = en->next) {
+        if (hls->key_info_file && (!key_uri || strcmp(en->key_uri, key_uri) ||
+                                    av_strcasecmp(en->iv_string, iv_string))) {
+            avio_printf(out, "#EXT-X-KEY:METHOD=AES-128,URI=\"%s\"", en->key_uri);
+            if (*en->iv_string)
+                avio_printf(out, ",IV=0x%s", en->iv_string);
+            avio_printf(out, "\n");
+            key_uri = en->key_uri;
+            iv_string = en->iv_string;
+        }
+
         if (hls->flags & HLS_ROUND_DURATIONS)
-            avio_printf(out, "#EXTINF:%d,\n",  (int)round(en->duration));
+            avio_printf(out, "#EXTINF:%ld,\n",  lrint(en->duration));
         else
             avio_printf(out, "#EXTINF:%f,\n", en->duration);
         if (hls->flags & HLS_SINGLE_FILE)
@@ -297,8 +445,39 @@ static int hls_window(AVFormatContext *s, int last)
     if (last && (hls->flags & HLS_OMIT_ENDLIST)==0)
         avio_printf(out, "#EXT-X-ENDLIST\n");
 
+    if( hls->vtt_m3u8_name ) {
+        if ((ret = s->io_open(s, &sub_out, hls->vtt_m3u8_name, AVIO_FLAG_WRITE, &options)) < 0)
+            goto fail;
+        avio_printf(sub_out, "#EXTM3U\n");
+        avio_printf(sub_out, "#EXT-X-VERSION:%d\n", version);
+        if (hls->allowcache == 0 || hls->allowcache == 1) {
+            avio_printf(sub_out, "#EXT-X-ALLOW-CACHE:%s\n", hls->allowcache == 0 ? "NO" : "YES");
+        }
+        avio_printf(sub_out, "#EXT-X-TARGETDURATION:%d\n", target_duration);
+        avio_printf(sub_out, "#EXT-X-MEDIA-SEQUENCE:%"PRId64"\n", sequence);
+
+        av_log(s, AV_LOG_VERBOSE, "EXT-X-MEDIA-SEQUENCE:%"PRId64"\n",
+               sequence);
+
+        for (en = hls->segments; en; en = en->next) {
+            avio_printf(sub_out, "#EXTINF:%f,\n", en->duration);
+            if (hls->flags & HLS_SINGLE_FILE)
+                 avio_printf(sub_out, "#EXT-X-BYTERANGE:%"PRIi64"@%"PRIi64"\n",
+                         en->size, en->pos);
+            if (hls->baseurl)
+                avio_printf(sub_out, "%s", hls->baseurl);
+            avio_printf(sub_out, "%s\n", en->sub_filename);
+        }
+
+        if (last)
+            avio_printf(sub_out, "#EXT-X-ENDLIST\n");
+
+    }
+
 fail:
-    avio_closep(&out);
+    av_dict_free(&options);
+    ff_format_io_close(s, &out);
+    ff_format_io_close(s, &sub_out);
     if (ret >= 0 && use_rename)
         ff_rename(temp_filename, s->filename, s);
     return ret;
@@ -308,27 +487,98 @@ static int hls_start(AVFormatContext *s)
 {
     HLSContext *c = s->priv_data;
     AVFormatContext *oc = c->avf;
+    AVFormatContext *vtt_oc = c->vtt_avf;
+    AVDictionary *options = NULL;
+    char *filename, iv_string[KEYSIZE*2 + 1];
     int err = 0;
 
-    if (c->flags & HLS_SINGLE_FILE)
+    if (c->flags & HLS_SINGLE_FILE) {
         av_strlcpy(oc->filename, c->basename,
                    sizeof(oc->filename));
-    else
-        if (av_get_frame_filename(oc->filename, sizeof(oc->filename),
+        if (c->vtt_basename)
+            av_strlcpy(vtt_oc->filename, c->vtt_basename,
+                  sizeof(vtt_oc->filename));
+    } else {
+        if (c->use_localtime) {
+            time_t now0;
+            struct tm *tm, tmpbuf;
+            time(&now0);
+            tm = localtime_r(&now0, &tmpbuf);
+            if (!strftime(oc->filename, sizeof(oc->filename), c->basename, tm)) {
+                av_log(oc, AV_LOG_ERROR, "Could not get segment filename with use_localtime\n");
+                return AVERROR(EINVAL);
+            }
+       } else if (av_get_frame_filename(oc->filename, sizeof(oc->filename),
                                   c->basename, c->wrap ? c->sequence % c->wrap : c->sequence) < 0) {
-            av_log(oc, AV_LOG_ERROR, "Invalid segment filename template '%s'\n", c->basename);
+            av_log(oc, AV_LOG_ERROR, "Invalid segment filename template '%s' you can try use -use_localtime 1 with it\n", c->basename);
             return AVERROR(EINVAL);
         }
+        if( c->vtt_basename) {
+            if (av_get_frame_filename(vtt_oc->filename, sizeof(vtt_oc->filename),
+                              c->vtt_basename, c->wrap ? c->sequence % c->wrap : c->sequence) < 0) {
+                av_log(vtt_oc, AV_LOG_ERROR, "Invalid segment filename template '%s'\n", c->vtt_basename);
+                return AVERROR(EINVAL);
+            }
+       }
+    }
     c->number++;
 
-    if ((err = avio_open2(&oc->pb, oc->filename, AVIO_FLAG_WRITE,
-                          &s->interrupt_callback, NULL)) < 0)
-        return err;
+    set_http_options(&options, c);
+
+    if (c->key_info_file) {
+        if ((err = hls_encryption_start(s)) < 0)
+            goto fail;
+        if ((err = av_dict_set(&options, "encryption_key", c->key_string, 0))
+                < 0)
+            goto fail;
+        err = av_strlcpy(iv_string, c->iv_string, sizeof(iv_string));
+        if (!err)
+            snprintf(iv_string, sizeof(iv_string), "%032"PRIx64, c->sequence);
+        if ((err = av_dict_set(&options, "encryption_iv", iv_string, 0)) < 0)
+           goto fail;
+
+        filename = av_asprintf("crypto:%s", oc->filename);
+        if (!filename) {
+            err = AVERROR(ENOMEM);
+            goto fail;
+        }
+        err = s->io_open(s, &oc->pb, filename, AVIO_FLAG_WRITE, &options);
+        av_free(filename);
+        av_dict_free(&options);
+        if (err < 0)
+            return err;
+    } else
+        if ((err = s->io_open(s, &oc->pb, oc->filename, AVIO_FLAG_WRITE, &options)) < 0)
+            goto fail;
+    if (c->vtt_basename) {
+        set_http_options(&options, c);
+        if ((err = s->io_open(s, &vtt_oc->pb, vtt_oc->filename, AVIO_FLAG_WRITE, &options)) < 0)
+            goto fail;
+    }
+    av_dict_free(&options);
+
+    /* We only require one PAT/PMT per segment. */
+    if (oc->oformat->priv_class && oc->priv_data) {
+        char period[21];
+
+        snprintf(period, sizeof(period), "%d", (INT_MAX / 2) - 1);
 
-    if (oc->oformat->priv_class && oc->priv_data)
         av_opt_set(oc->priv_data, "mpegts_flags", "resend_headers", 0);
+        av_opt_set(oc->priv_data, "sdt_period", period, 0);
+        av_opt_set(oc->priv_data, "pat_period", period, 0);
+    }
+
+    if (c->vtt_basename) {
+        err = avformat_write_header(vtt_oc,NULL);
+        if (err < 0)
+            return err;
+    }
 
     return 0;
+fail:
+    av_dict_free(&options);
+
+    return err;
 }
 
 static int hls_write_header(AVFormatContext *s)
@@ -337,8 +587,11 @@ static int hls_write_header(AVFormatContext *s)
     int ret, i;
     char *p;
     const char *pattern = "%d.ts";
+    const char *pattern_localtime_fmt = "-%s.ts";
+    const char *vtt_pattern = "%d.vtt";
     AVDictionary *options = NULL;
     int basename_size;
+    int vtt_basename_size;
 
     hls->sequence       = hls->start_sequence;
     hls->recording_time = hls->time * AV_TIME_BASE;
@@ -352,9 +605,12 @@ static int hls_write_header(AVFormatContext *s)
         }
     }
 
-    for (i = 0; i < s->nb_streams; i++)
+    for (i = 0; i < s->nb_streams; i++) {
         hls->has_video +=
             s->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO;
+        hls->has_subtitle +=
+            s->streams[i]->codec->codec_type == AVMEDIA_TYPE_SUBTITLE;
+    }
 
     if (hls->has_video > 1)
         av_log(s, AV_LOG_WARNING,
@@ -368,6 +624,14 @@ static int hls_write_header(AVFormatContext *s)
         goto fail;
     }
 
+    if(hls->has_subtitle) {
+        hls->vtt_oformat = av_guess_format("webvtt", NULL, NULL);
+        if (!hls->oformat) {
+            ret = AVERROR_MUXER_NOT_FOUND;
+            goto fail;
+        }
+    }
+
     if (hls->segment_filename) {
         hls->basename = av_strdup(hls->segment_filename);
         if (!hls->basename) {
@@ -378,7 +642,11 @@ static int hls_write_header(AVFormatContext *s)
         if (hls->flags & HLS_SINGLE_FILE)
             pattern = ".ts";
 
-        basename_size = strlen(s->filename) + strlen(pattern) + 1;
+        if (hls->use_localtime) {
+            basename_size = strlen(s->filename) + strlen(pattern_localtime_fmt) + 1;
+        } else {
+            basename_size = strlen(s->filename) + strlen(pattern) + 1;
+        }
         hls->basename = av_malloc(basename_size);
         if (!hls->basename) {
             ret = AVERROR(ENOMEM);
@@ -390,7 +658,40 @@ static int hls_write_header(AVFormatContext *s)
         p = strrchr(hls->basename, '.');
         if (p)
             *p = '\0';
-        av_strlcat(hls->basename, pattern, basename_size);
+        if (hls->use_localtime) {
+            av_strlcat(hls->basename, pattern_localtime_fmt, basename_size);
+        } else {
+            av_strlcat(hls->basename, pattern, basename_size);
+        }
+    }
+
+    if(hls->has_subtitle) {
+
+        if (hls->flags & HLS_SINGLE_FILE)
+            vtt_pattern = ".vtt";
+        vtt_basename_size = strlen(s->filename) + strlen(vtt_pattern) + 1;
+        hls->vtt_basename = av_malloc(vtt_basename_size);
+        if (!hls->vtt_basename) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        hls->vtt_m3u8_name = av_malloc(vtt_basename_size);
+        if (!hls->vtt_m3u8_name ) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        av_strlcpy(hls->vtt_basename, s->filename, vtt_basename_size);
+        p = strrchr(hls->vtt_basename, '.');
+        if (p)
+            *p = '\0';
+
+        if( hls->subtitle_filename ) {
+            strcpy(hls->vtt_m3u8_name, hls->subtitle_filename);
+        } else {
+            strcpy(hls->vtt_m3u8_name, hls->vtt_basename);
+            av_strlcat(hls->vtt_m3u8_name, "_vtt.m3u8", vtt_basename_size);
+        }
+        av_strlcat(hls->vtt_basename, vtt_pattern, vtt_basename_size);
     }
 
     if ((ret = hls_mux_init(s)) < 0)
@@ -406,10 +707,19 @@ static int hls_write_header(AVFormatContext *s)
         ret = AVERROR(EINVAL);
         goto fail;
     }
-    av_assert0(s->nb_streams == hls->avf->nb_streams);
+    //av_assert0(s->nb_streams == hls->avf->nb_streams);
     for (i = 0; i < s->nb_streams; i++) {
-        AVStream *inner_st  = hls->avf->streams[i];
+        AVStream *inner_st;
         AVStream *outer_st = s->streams[i];
+        if (outer_st->codec->codec_type != AVMEDIA_TYPE_SUBTITLE)
+            inner_st = hls->avf->streams[i];
+        else if (hls->vtt_avf)
+            inner_st = hls->vtt_avf->streams[0];
+        else {
+            /* We have a subtitle stream, when the user does not want one */
+            inner_st = NULL;
+            continue;
+        }
         avpriv_set_pts_info(outer_st, inner_st->pts_wrap_bits, inner_st->time_base.num, inner_st->time_base.den);
     }
 fail:
@@ -417,8 +727,12 @@ static int hls_write_header(AVFormatContext *s)
     av_dict_free(&options);
     if (ret < 0) {
         av_freep(&hls->basename);
+        av_freep(&hls->vtt_basename);
         if (hls->avf)
             avformat_free_context(hls->avf);
+        if (hls->vtt_avf)
+            avformat_free_context(hls->vtt_avf);
+
     }
     return ret;
 }
@@ -426,12 +740,20 @@ static int hls_write_header(AVFormatContext *s)
 static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     HLSContext *hls = s->priv_data;
-    AVFormatContext *oc = hls->avf;
+    AVFormatContext *oc = NULL;
     AVStream *st = s->streams[pkt->stream_index];
     int64_t end_pts = hls->recording_time * hls->number;
     int is_ref_pkt = 1;
     int ret, can_split = 1;
+    int stream_index = 0;
 
+    if( st->codec->codec_type == AVMEDIA_TYPE_SUBTITLE ) {
+        oc = hls->vtt_avf;
+        stream_index = 0;
+    } else {
+        oc = hls->avf;
+        stream_index = pkt->stream_index;
+    }
     if (hls->start_pts == AV_NOPTS_VALUE) {
         hls->start_pts = pkt->pts;
         hls->end_pts   = pkt->pts;
@@ -469,7 +791,9 @@ static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
                 av_opt_set(hls->avf->priv_data, "mpegts_flags", "resend_headers", 0);
             hls->number++;
         } else {
-            avio_closep(&oc->pb);
+            ff_format_io_close(s, &oc->pb);
+            if (hls->vtt_avf)
+                ff_format_io_close(s, &hls->vtt_avf->pb);
 
             ret = hls_start(s);
         }
@@ -477,13 +801,16 @@ static int hls_write_packet(AVFormatContext *s, AVPacket *pkt)
         if (ret < 0)
             return ret;
 
+        if( st->codec->codec_type == AVMEDIA_TYPE_SUBTITLE )
+            oc = hls->vtt_avf;
+        else
         oc = hls->avf;
 
         if ((ret = hls_window(s, 0)) < 0)
             return ret;
     }
 
-    ret = ff_write_chained(oc, pkt->stream_index, pkt, s, 0);
+    ret = ff_write_chained(oc, stream_index, pkt, s, 0);
 
     return ret;
 }
@@ -492,15 +819,30 @@ static int hls_write_trailer(struct AVFormatContext *s)
 {
     HLSContext *hls = s->priv_data;
     AVFormatContext *oc = hls->avf;
+    AVFormatContext *vtt_oc = hls->vtt_avf;
 
     av_write_trailer(oc);
     if (oc->pb) {
         hls->size = avio_tell(hls->avf->pb) - hls->start_pos;
-        avio_closep(&oc->pb);
+        ff_format_io_close(s, &oc->pb);
         hls_append_segment(hls, hls->duration, hls->start_pos, hls->size);
     }
+
+    if (vtt_oc) {
+        if (vtt_oc->pb)
+            av_write_trailer(vtt_oc);
+        hls->size = avio_tell(hls->vtt_avf->pb) - hls->start_pos;
+        ff_format_io_close(s, &vtt_oc->pb);
+    }
     av_freep(&hls->basename);
     avformat_free_context(oc);
+
+    if (vtt_oc) {
+        av_freep(&hls->vtt_basename);
+        av_freep(&hls->vtt_m3u8_name);
+        avformat_free_context(vtt_oc);
+    }
+
     hls->avf = NULL;
     hls_window(s, 1);
 
@@ -516,16 +858,21 @@ static const AVOption options[] = {
     {"hls_time",      "set segment length in seconds",           OFFSET(time),    AV_OPT_TYPE_FLOAT,  {.dbl = 2},     0, FLT_MAX, E},
     {"hls_list_size", "set maximum number of playlist entries",  OFFSET(max_nb_segments),    AV_OPT_TYPE_INT,    {.i64 = 5},     0, INT_MAX, E},
     {"hls_ts_options","set hls mpegts list of options for the container format used for hls", OFFSET(format_options_str), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,    E},
+    {"hls_vtt_options","set hls vtt list of options for the container format used for hls", OFFSET(vtt_format_options_str), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,    E},
     {"hls_wrap",      "set number after which the index wraps",  OFFSET(wrap),    AV_OPT_TYPE_INT,    {.i64 = 0},     0, INT_MAX, E},
     {"hls_allow_cache", "explicitly set whether the client MAY (1) or MUST NOT (0) cache media segments", OFFSET(allowcache), AV_OPT_TYPE_INT, {.i64 = -1}, INT_MIN, INT_MAX, E},
     {"hls_base_url",  "url to prepend to each playlist entry",   OFFSET(baseurl), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,       E},
     {"hls_segment_filename", "filename template for segment files", OFFSET(segment_filename),   AV_OPT_TYPE_STRING, {.str = NULL},            0,       0,         E},
+    {"hls_key_info_file",    "file with key URI and key file path", OFFSET(key_info_file),      AV_OPT_TYPE_STRING, {.str = NULL},            0,       0,         E},
+    {"hls_subtitle_path",     "set path of hls subtitles", OFFSET(subtitle_filename), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,    E},
     {"hls_flags",     "set flags affecting HLS playlist and media file generation", OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0 }, 0, UINT_MAX, E, "flags"},
     {"single_file",   "generate a single media file indexed with byte ranges", 0, AV_OPT_TYPE_CONST, {.i64 = HLS_SINGLE_FILE }, 0, UINT_MAX,   E, "flags"},
     {"delete_segments", "delete segment files that are no longer part of the playlist", 0, AV_OPT_TYPE_CONST, {.i64 = HLS_DELETE_SEGMENTS }, 0, UINT_MAX,   E, "flags"},
     {"round_durations", "round durations in m3u8 to whole numbers", 0, AV_OPT_TYPE_CONST, {.i64 = HLS_ROUND_DURATIONS }, 0, UINT_MAX,   E, "flags"},
     {"discont_start", "start the playlist with a discontinuity tag", 0, AV_OPT_TYPE_CONST, {.i64 = HLS_DISCONT_START }, 0, UINT_MAX,   E, "flags"},
     {"omit_endlist", "Do not append an endlist when ending stream", 0, AV_OPT_TYPE_CONST, {.i64 = HLS_OMIT_ENDLIST }, 0, UINT_MAX,   E, "flags"},
+    { "use_localtime", "set filename expansion with strftime at segment creation", OFFSET(use_localtime), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, E },
+    {"method", "set the HTTP method", OFFSET(method), AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,    E},
 
     { NULL },
 };
@@ -545,6 +892,7 @@ AVOutputFormat ff_hls_muxer = {
     .priv_data_size = sizeof(HLSContext),
     .audio_codec    = AV_CODEC_ID_AAC,
     .video_codec    = AV_CODEC_ID_H264,
+    .subtitle_codec = AV_CODEC_ID_WEBVTT,
     .flags          = AVFMT_NOFILE | AVFMT_ALLOW_FLUSH,
     .write_header   = hls_write_header,
     .write_packet   = hls_write_packet,
diff --git a/libavformat/hlsproto.c b/libavformat/hlsproto.c
index 92843df2..b8f2f379 100644
--- a/libavformat/hlsproto.c
+++ b/libavformat/hlsproto.c
@@ -28,6 +28,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/time.h"
 #include "avformat.h"
+#include "avio_internal.h"
 #include "internal.h"
 #include "url.h"
 #include "version.h"
@@ -116,8 +117,9 @@ static int parse_playlist(URLContext *h, const char *url)
     char line[1024];
     const char *ptr;
 
-    if ((ret = avio_open2(&in, url, AVIO_FLAG_READ,
-                          &h->interrupt_callback, NULL)) < 0)
+    if ((ret = ffio_open_whitelist(&in, url, AVIO_FLAG_READ,
+                                   &h->interrupt_callback, NULL,
+                                   h->protocol_whitelist)) < 0)
         return ret;
 
     read_chomp_line(in, line, sizeof(line));
@@ -303,8 +305,9 @@ static int hls_read(URLContext *h, uint8_t *buf, int size)
     }
     url = s->segments[s->cur_seq_no - s->start_seq_no]->url,
     av_log(h, AV_LOG_DEBUG, "opening %s\n", url);
-    ret = ffurl_open(&s->seg_hd, url, AVIO_FLAG_READ,
-                     &h->interrupt_callback, NULL);
+    ret = ffurl_open_whitelist(&s->seg_hd, url, AVIO_FLAG_READ,
+                               &h->interrupt_callback, NULL,
+                               h->protocol_whitelist);
     if (ret < 0) {
         if (ff_check_interrupt(&h->interrupt_callback))
             return AVERROR_EXIT;
diff --git a/libavformat/hnm.c b/libavformat/hnm.c
index 1320fa52..8bd8097b 100644
--- a/libavformat/hnm.c
+++ b/libavformat/hnm.c
@@ -190,7 +190,7 @@ static int hnm_read_close(AVFormatContext *s)
     Hnm4DemuxContext *hnm = s->priv_data;
 
     if (hnm->vpkt.size > 0)
-        av_free_packet(&hnm->vpkt);
+        av_packet_unref(&hnm->vpkt);
 
     return 0;
 }
diff --git a/libavformat/http.c b/libavformat/http.c
index 2db2dea5..3dad2ef9 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -25,8 +25,10 @@
 #include <zlib.h>
 #endif /* CONFIG_ZLIB */
 
+#include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/opt.h"
+#include "libavutil/time.h"
 
 #include "avformat.h"
 #include "http.h"
@@ -44,6 +46,14 @@
  * path names). */
 #define BUFFER_SIZE   MAX_URL_SIZE
 #define MAX_REDIRECTS 8
+#define HTTP_SINGLE   1
+#define HTTP_MUTLI    2
+typedef enum {
+    LOWER_PROTO,
+    READ_HEADERS,
+    WRITE_REPLY_HEADERS,
+    FINISH
+}HandshakeState;
 
 typedef struct HTTPContext {
     const AVClass *class;
@@ -57,6 +67,7 @@ typedef struct HTTPContext {
     char *location;
     HTTPAuthState auth_state;
     HTTPAuthState proxy_auth_state;
+    char *http_proxy;
     char *headers;
     char *mime_type;
     char *user_agent;
@@ -96,7 +107,16 @@ typedef struct HTTPContext {
     int send_expect_100;
     char *method;
     int reconnect;
+    int reconnect_at_eof;
+    int reconnect_streamed;
+    int reconnect_delay;
+    int reconnect_delay_max;
     int listen;
+    char *resource;
+    int reply_code;
+    int is_multi_client;
+    HandshakeState handshake_step;
+    int is_connected_server;
 } HTTPContext;
 
 #define OFFSET(x) offsetof(HTTPContext, x)
@@ -105,30 +125,36 @@ typedef struct HTTPContext {
 #define DEFAULT_USER_AGENT "Lavf/" AV_STRINGIFY(LIBAVFORMAT_VERSION)
 
 static const AVOption options[] = {
-    { "seekable", "control seekability of connection", OFFSET(seekable), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, D },
-    { "chunked_post", "use chunked transfer-encoding for posts", OFFSET(chunked_post), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, E },
-    { "headers", "set custom HTTP headers, can override built in default headers", OFFSET(headers), AV_OPT_TYPE_STRING, { 0 }, 0, 0, D | E },
-    { "content_type", "set a specific content type for the POST messages", OFFSET(content_type), AV_OPT_TYPE_STRING, { 0 }, 0, 0, D | E },
+    { "seekable", "control seekability of connection", OFFSET(seekable), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, D },
+    { "chunked_post", "use chunked transfer-encoding for posts", OFFSET(chunked_post), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, E },
+    { "http_proxy", "set HTTP proxy to tunnel through", OFFSET(http_proxy), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D | E },
+    { "headers", "set custom HTTP headers, can override built in default headers", OFFSET(headers), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D | E },
+    { "content_type", "set a specific content type for the POST messages", OFFSET(content_type), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D | E },
     { "user_agent", "override User-Agent header", OFFSET(user_agent), AV_OPT_TYPE_STRING, { .str = DEFAULT_USER_AGENT }, 0, 0, D },
     { "user-agent", "override User-Agent header", OFFSET(user_agent), AV_OPT_TYPE_STRING, { .str = DEFAULT_USER_AGENT }, 0, 0, D },
-    { "multiple_requests", "use persistent connections", OFFSET(multiple_requests), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, D | E },
+    { "multiple_requests", "use persistent connections", OFFSET(multiple_requests), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, D | E },
     { "post_data", "set custom HTTP post data", OFFSET(post_data), AV_OPT_TYPE_BINARY, .flags = D | E },
-    { "mime_type", "export the MIME type", OFFSET(mime_type), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
-    { "cookies", "set cookies to be sent in applicable future requests, use newline delimited Set-Cookie HTTP field value syntax", OFFSET(cookies), AV_OPT_TYPE_STRING, { 0 }, 0, 0, D },
-    { "icy", "request ICY metadata", OFFSET(icy), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, D },
-    { "icy_metadata_headers", "return ICY metadata headers", OFFSET(icy_metadata_headers), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_EXPORT },
-    { "icy_metadata_packet", "return current ICY metadata packet", OFFSET(icy_metadata_packet), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_EXPORT },
+    { "mime_type", "export the MIME type", OFFSET(mime_type), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
+    { "cookies", "set cookies to be sent in applicable future requests, use newline delimited Set-Cookie HTTP field value syntax", OFFSET(cookies), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D },
+    { "icy", "request ICY metadata", OFFSET(icy), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, D },
+    { "icy_metadata_headers", "return ICY metadata headers", OFFSET(icy_metadata_headers), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, AV_OPT_FLAG_EXPORT },
+    { "icy_metadata_packet", "return current ICY metadata packet", OFFSET(icy_metadata_packet), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, AV_OPT_FLAG_EXPORT },
     { "metadata", "metadata read from the bitstream", OFFSET(metadata), AV_OPT_TYPE_DICT, {0}, 0, 0, AV_OPT_FLAG_EXPORT },
     { "auth_type", "HTTP authentication type", OFFSET(auth_state.auth_type), AV_OPT_TYPE_INT, { .i64 = HTTP_AUTH_NONE }, HTTP_AUTH_NONE, HTTP_AUTH_BASIC, D | E, "auth_type"},
     { "none", "No auth method set, autodetect", 0, AV_OPT_TYPE_CONST, { .i64 = HTTP_AUTH_NONE }, 0, 0, D | E, "auth_type"},
     { "basic", "HTTP basic authentication", 0, AV_OPT_TYPE_CONST, { .i64 = HTTP_AUTH_BASIC }, 0, 0, D | E, "auth_type"},
-    { "send_expect_100", "Force sending an Expect: 100-continue header for POST", OFFSET(send_expect_100), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, E },
-    { "location", "The actual location of the data received", OFFSET(location), AV_OPT_TYPE_STRING, { 0 }, 0, 0, D | E },
+    { "send_expect_100", "Force sending an Expect: 100-continue header for POST", OFFSET(send_expect_100), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
+    { "location", "The actual location of the data received", OFFSET(location), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D | E },
     { "offset", "initial byte offset", OFFSET(off), AV_OPT_TYPE_INT64, { .i64 = 0 }, 0, INT64_MAX, D },
     { "end_offset", "try to limit the request to bytes preceding this offset", OFFSET(end_off), AV_OPT_TYPE_INT64, { .i64 = 0 }, 0, INT64_MAX, D },
     { "method", "Override the HTTP method or set the expected HTTP method from a client", OFFSET(method), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, D | E },
-    { "reconnect", "auto reconnect after disconnect before EOF", OFFSET(reconnect), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, D },
-    { "listen", "listen on HTTP", OFFSET(listen), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, D | E },
+    { "reconnect", "auto reconnect after disconnect before EOF", OFFSET(reconnect), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, D },
+    { "reconnect_at_eof", "auto reconnect at EOF", OFFSET(reconnect_at_eof), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, D },
+    { "reconnect_streamed", "auto reconnect streamed / non seekable streams", OFFSET(reconnect_streamed), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, D },
+    { "reconnect_delay_max", "max reconnect delay in seconds after which to give up", OFFSET(reconnect_delay_max), AV_OPT_TYPE_INT, { .i64 = 120 }, 0, UINT_MAX/1000/1000, D },
+    { "listen", "listen on HTTP", OFFSET(listen), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, D | E },
+    { "resource", "The resource requested by a client", OFFSET(resource), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
+    { "reply_code", "The http status code to return to a client", OFFSET(reply_code), AV_OPT_TYPE_INT, { .i64 = 200}, INT_MIN, 599, E},
     { NULL }
 };
 
@@ -162,7 +188,7 @@ static int http_open_cnx_internal(URLContext *h, AVDictionary **options)
                  path1, sizeof(path1), s->location);
     ff_url_join(hoststr, sizeof(hoststr), NULL, NULL, hostname, port, NULL);
 
-    proxy_path = getenv("http_proxy");
+    proxy_path = s->http_proxy ? s->http_proxy : getenv("http_proxy");
     use_proxy  = !ff_http_match_no_proxy(getenv("no_proxy"), hostname) &&
                  proxy_path && av_strstart(proxy_path, "http://", NULL);
 
@@ -193,8 +219,9 @@ static int http_open_cnx_internal(URLContext *h, AVDictionary **options)
     ff_url_join(buf, sizeof(buf), lower_proto, NULL, hostname, port, NULL);
 
     if (!s->hd) {
-        err = ffurl_open(&s->hd, buf, AVIO_FLAG_READ_WRITE,
-                         &h->interrupt_callback, options);
+        err = ffurl_open_whitelist(&s->hd, buf, AVIO_FLAG_READ_WRITE,
+                                   &h->interrupt_callback, options,
+                                   h->protocol_whitelist);
         if (err < 0)
             return err;
     }
@@ -299,50 +326,145 @@ int ff_http_averror(int status_code, int default_averror)
         return default_averror;
 }
 
-static void handle_http_errors(URLContext *h, int error)
+static int http_write_reply(URLContext* h, int status_code)
 {
-    static const char bad_request[] = "HTTP/1.1 400 Bad Request\r\nContent-Type: text/plain\r\n\r\n400 Bad Request\r\n";
-    static const char internal_server_error[] = "HTTP/1.1 500 Internal server error\r\nContent-Type: text/plain\r\n\r\n500 Internal server error\r\n";
+    int ret, body = 0, reply_code, message_len;
+    const char *reply_text, *content_type;
     HTTPContext *s = h->priv_data;
-    if (h->is_connected) {
-        switch(error) {
-            case AVERROR_HTTP_BAD_REQUEST:
-                ffurl_write(s->hd, bad_request, strlen(bad_request));
-                break;
-            default:
-                av_log(h, AV_LOG_ERROR, "Unhandled HTTP error.\n");
-                ffurl_write(s->hd, internal_server_error, strlen(internal_server_error));
+    char message[BUFFER_SIZE];
+    content_type = "text/plain";
+
+    if (status_code < 0)
+        body = 1;
+    switch (status_code) {
+    case AVERROR_HTTP_BAD_REQUEST:
+    case 400:
+        reply_code = 400;
+        reply_text = "Bad Request";
+        break;
+    case AVERROR_HTTP_FORBIDDEN:
+    case 403:
+        reply_code = 403;
+        reply_text = "Forbidden";
+        break;
+    case AVERROR_HTTP_NOT_FOUND:
+    case 404:
+        reply_code = 404;
+        reply_text = "Not Found";
+        break;
+    case 200:
+        reply_code = 200;
+        reply_text = "OK";
+        content_type = "application/octet-stream";
+        break;
+    case AVERROR_HTTP_SERVER_ERROR:
+    case 500:
+        reply_code = 500;
+        reply_text = "Internal server error";
+        break;
+    default:
+        return AVERROR(EINVAL);
+    }
+    if (body) {
+        s->chunked_post = 0;
+        message_len = snprintf(message, sizeof(message),
+                 "HTTP/1.1 %03d %s\r\n"
+                 "Content-Type: %s\r\n"
+                 "Content-Length: %zu\r\n"
+                 "\r\n"
+                 "%03d %s\r\n",
+                 reply_code,
+                 reply_text,
+                 content_type,
+                 strlen(reply_text) + 6, // 3 digit status code + space + \r\n
+                 reply_code,
+                 reply_text);
+    } else {
+        s->chunked_post = 1;
+        message_len = snprintf(message, sizeof(message),
+                 "HTTP/1.1 %03d %s\r\n"
+                 "Content-Type: %s\r\n"
+                 "Transfer-Encoding: chunked\r\n"
+                 "\r\n",
+                 reply_code,
+                 reply_text,
+                 content_type);
+    }
+    av_log(h, AV_LOG_TRACE, "HTTP reply header: \n%s----\n", message);
+    if ((ret = ffurl_write(s->hd, message, message_len)) < 0)
+        return ret;
+    return 0;
+}
+
+static void handle_http_errors(URLContext *h, int error)
+{
+    av_assert0(error < 0);
+    http_write_reply(h, error);
+}
+
+static int http_handshake(URLContext *c)
+{
+    int ret, err, new_location;
+    HTTPContext *ch = c->priv_data;
+    URLContext *cl = ch->hd;
+    switch (ch->handshake_step) {
+    case LOWER_PROTO:
+        av_log(c, AV_LOG_TRACE, "Lower protocol\n");
+        if ((ret = ffurl_handshake(cl)) > 0)
+            return 2 + ret;
+        if (ret < 0)
+            return ret;
+        ch->handshake_step = READ_HEADERS;
+        ch->is_connected_server = 1;
+        return 2;
+    case READ_HEADERS:
+        av_log(c, AV_LOG_TRACE, "Read headers\n");
+        if ((err = http_read_header(c, &new_location)) < 0) {
+            handle_http_errors(c, err);
+            return err;
         }
+        ch->handshake_step = WRITE_REPLY_HEADERS;
+        return 1;
+    case WRITE_REPLY_HEADERS:
+        av_log(c, AV_LOG_TRACE, "Reply code: %d\n", ch->reply_code);
+        if ((err = http_write_reply(c, ch->reply_code)) < 0)
+            return err;
+        ch->handshake_step = FINISH;
+        return 1;
+    case FINISH:
+        return 0;
     }
+    // this should never be reached.
+    return AVERROR(EINVAL);
 }
 
 static int http_listen(URLContext *h, const char *uri, int flags,
                        AVDictionary **options) {
     HTTPContext *s = h->priv_data;
     int ret;
-    static const char header[] = "HTTP/1.1 200 OK\r\nContent-Type: application/octet-stream\r\nTransfer-Encoding: chunked\r\n\r\n";
     char hostname[1024], proto[10];
     char lower_url[100];
     const char *lower_proto = "tcp";
-    int port, new_location;
+    int port;
     av_url_split(proto, sizeof(proto), NULL, 0, hostname, sizeof(hostname), &port,
                  NULL, 0, uri);
     if (!strcmp(proto, "https"))
         lower_proto = "tls";
     ff_url_join(lower_url, sizeof(lower_url), lower_proto, NULL, hostname, port,
                 NULL);
-    av_dict_set(options, "listen", "1", 0);
-    if ((ret = ffurl_open(&s->hd, lower_url, AVIO_FLAG_READ_WRITE,
-                          &h->interrupt_callback, options)) < 0)
+    if ((ret = av_dict_set_int(options, "listen", s->listen, 0)) < 0)
         goto fail;
-    if ((ret = http_read_header(h, &new_location)) < 0)
-         goto fail;
-    if ((ret = ffurl_write(s->hd, header, strlen(header))) < 0)
-         goto fail;
-    return 0;
-
+    if ((ret = ffurl_open_whitelist(&s->hd, lower_url, AVIO_FLAG_READ_WRITE,
+                                    &h->interrupt_callback, options,
+                                    h->protocol_whitelist
+                                   )) < 0)
+        goto fail;
+    s->handshake_step = LOWER_PROTO;
+    if (s->listen == HTTP_SINGLE) { /* single client */
+        s->reply_code = 200;
+        while ((ret = http_handshake(h)) > 0);
+    }
 fail:
-    handle_http_errors(h, ret);
     av_dict_free(&s->chained_options);
     return ret;
 }
@@ -367,9 +489,16 @@ static int http_open(URLContext *h, const char *uri, int flags,
 
     if (s->headers) {
         int len = strlen(s->headers);
-        if (len < 2 || strcmp("\r\n", s->headers + len - 2))
+        if (len < 2 || strcmp("\r\n", s->headers + len - 2)) {
             av_log(h, AV_LOG_WARNING,
                    "No trailing CRLF found in HTTP header.\n");
+            ret = av_reallocp(&s->headers, len + 3);
+            if (ret < 0)
+                return ret;
+            s->headers[len]     = '\r';
+            s->headers[len + 1] = '\n';
+            s->headers[len + 2] = '\0';
+        }
     }
 
     if (s->listen) {
@@ -381,6 +510,26 @@ static int http_open(URLContext *h, const char *uri, int flags,
     return ret;
 }
 
+static int http_accept(URLContext *s, URLContext **c)
+{
+    int ret;
+    HTTPContext *sc = s->priv_data;
+    HTTPContext *cc;
+    URLContext *sl = sc->hd;
+    URLContext *cl = NULL;
+
+    av_assert0(sc->listen);
+    if ((ret = ffurl_alloc(c, s->filename, s->flags, &sl->interrupt_callback)) < 0)
+        goto fail;
+    cc = (*c)->priv_data;
+    if ((ret = ffurl_accept(sl, &cl)) < 0)
+        goto fail;
+    cc->hd = cl;
+    cc->is_multi_client = 1;
+fail:
+    return ret;
+}
+
 static int http_getc(HTTPContext *s)
 {
     int len;
@@ -575,10 +724,10 @@ static int process_line(URLContext *h, char *line, int line_count,
 
     p = line;
     if (line_count == 0) {
-        if (s->listen) {
+        if (s->is_connected_server) {
             // HTTP method
             method = p;
-            while (!av_isspace(*p))
+            while (*p && !av_isspace(*p))
                 p++;
             *(p++) = '\0';
             av_log(h, AV_LOG_TRACE, "Received method: %s\n", method);
@@ -596,6 +745,8 @@ static int process_line(URLContext *h, char *line, int line_count,
                            "(%s autodetected %s received)\n", auto_method, method);
                     return ff_http_averror(400, AVERROR(EIO));
                 }
+                if (!(s->method = av_strdup(method)))
+                    return AVERROR(ENOMEM);
             }
 
             // HTTP resource
@@ -606,12 +757,14 @@ static int process_line(URLContext *h, char *line, int line_count,
                 p++;
             *(p++) = '\0';
             av_log(h, AV_LOG_TRACE, "Requested resource: %s\n", resource);
+            if (!(s->resource = av_strdup(resource)))
+                return AVERROR(ENOMEM);
 
             // HTTP version
             while (av_isspace(*p))
                 p++;
             version = p;
-            while (!av_isspace(*p))
+            while (*p && !av_isspace(*p))
                 p++;
             *p = '\0';
             if (av_strncasecmp(version, "HTTP/", 5)) {
@@ -1007,15 +1160,16 @@ static int http_buf_read(URLContext *h, uint8_t *buf, int size)
         memcpy(buf, s->buf_ptr, len);
         s->buf_ptr += len;
     } else {
+        int64_t target_end = s->end_off ? s->end_off : s->filesize;
         if ((!s->willclose || s->chunksize < 0) &&
-            s->filesize >= 0 && s->off >= s->filesize)
+            target_end >= 0 && s->off >= target_end)
             return AVERROR_EOF;
         len = ffurl_read(s->hd, buf, size);
         if (!len && (!s->willclose || s->chunksize < 0) &&
-            s->filesize >= 0 && s->off < s->filesize) {
+            target_end >= 0 && s->off < target_end) {
             av_log(h, AV_LOG_ERROR,
                    "Stream ends prematurely at %"PRId64", should be %"PRId64"\n",
-                   s->off, s->filesize
+                   s->off, target_end
                   );
             return AVERROR(EIO);
         }
@@ -1101,16 +1255,25 @@ static int http_read_stream(URLContext *h, uint8_t *buf, int size)
         return http_buf_read_compressed(h, buf, size);
 #endif /* CONFIG_ZLIB */
     read_ret = http_buf_read(h, buf, size);
-    if (read_ret < 0 && s->reconnect && !h->is_streamed && s->filesize > 0 && s->off < s->filesize) {
-        av_log(h, AV_LOG_INFO, "Will reconnect at %"PRId64".\n", s->off);
-        seek_ret = http_seek_internal(h, s->off, SEEK_SET, 1);
-        if (seek_ret != s->off) {
-            av_log(h, AV_LOG_ERROR, "Failed to reconnect at %"PRId64".\n", s->off);
+    if (   (read_ret  < 0 && s->reconnect        && (!h->is_streamed || s->reconnect_streamed) && s->filesize > 0 && s->off < s->filesize)
+        || (read_ret == 0 && s->reconnect_at_eof && (!h->is_streamed || s->reconnect_streamed))) {
+        int64_t target = h->is_streamed ? 0 : s->off;
+
+        if (s->reconnect_delay > s->reconnect_delay_max)
+            return AVERROR(EIO);
+
+        av_log(h, AV_LOG_INFO, "Will reconnect at %"PRId64" error=%s.\n", s->off, av_err2str(read_ret));
+        av_usleep(1000U*1000*s->reconnect_delay);
+        s->reconnect_delay = 1 + 2*s->reconnect_delay;
+        seek_ret = http_seek_internal(h, target, SEEK_SET, 1);
+        if (seek_ret != target) {
+            av_log(h, AV_LOG_ERROR, "Failed to reconnect at %"PRId64".\n", target);
             return read_ret;
         }
 
         read_ret = http_buf_read(h, buf, size);
-    }
+    } else
+        s->reconnect_delay = 0;
 
     return read_ret;
 }
@@ -1242,7 +1405,8 @@ static int http_shutdown(URLContext *h, int flags)
     HTTPContext *s = h->priv_data;
 
     /* signal end of chunked encoding if used */
-    if ((flags & AVIO_FLAG_WRITE) && s->chunked_post) {
+    if (((flags & AVIO_FLAG_WRITE) && s->chunked_post) ||
+        ((flags & AVIO_FLAG_READ) && s->chunked_post && s->listen)) {
         ret = ffurl_write(s->hd, footer, sizeof(footer) - 1);
         ret = ret > 0 ? 0 : ret;
         s->end_chunked_post = 1;
@@ -1286,7 +1450,7 @@ static int64_t http_seek_internal(URLContext *h, int64_t off, int whence, int fo
              ((whence == SEEK_CUR && off == 0) ||
               (whence == SEEK_SET && off == s->off)))
         return s->off;
-    else if ((s->filesize == -1 && whence == SEEK_END) || h->is_streamed)
+    else if ((s->filesize == -1 && whence == SEEK_END))
         return AVERROR(ENOSYS);
 
     if (whence == SEEK_CUR)
@@ -1299,6 +1463,9 @@ static int64_t http_seek_internal(URLContext *h, int64_t off, int whence, int fo
         return AVERROR(EINVAL);
     s->off = off;
 
+    if (s->off && h->is_streamed)
+        return AVERROR(ENOSYS);
+
     /* we save the old context in case the seek fails */
     old_buf_size = s->buf_end - s->buf_ptr;
     memcpy(old_buf, s->buf_ptr, old_buf_size);
@@ -1344,6 +1511,8 @@ HTTP_CLASS(http);
 URLProtocol ff_http_protocol = {
     .name                = "http",
     .url_open2           = http_open,
+    .url_accept          = http_accept,
+    .url_handshake       = http_handshake,
     .url_read            = http_read,
     .url_write           = http_write,
     .url_seek            = http_seek,
@@ -1353,6 +1522,7 @@ URLProtocol ff_http_protocol = {
     .priv_data_size      = sizeof(HTTPContext),
     .priv_data_class     = &http_context_class,
     .flags               = URL_PROTOCOL_FLAG_NETWORK,
+    .default_whitelist   = "http,https,tls,rtp,tcp,udp,crypto,httpproxy"
 };
 #endif /* CONFIG_HTTP_PROTOCOL */
 
@@ -1371,6 +1541,7 @@ URLProtocol ff_https_protocol = {
     .priv_data_size      = sizeof(HTTPContext),
     .priv_data_class     = &https_context_class,
     .flags               = URL_PROTOCOL_FLAG_NETWORK,
+    .default_whitelist   = "http,https,tls,rtp,tcp,udp,crypto,httpproxy"
 };
 #endif /* CONFIG_HTTPS_PROTOCOL */
 
@@ -1409,8 +1580,9 @@ static int http_proxy_open(URLContext *h, const char *uri, int flags)
     ff_url_join(lower_url, sizeof(lower_url), "tcp", NULL, hostname, port,
                 NULL);
 redo:
-    ret = ffurl_open(&s->hd, lower_url, AVIO_FLAG_READ_WRITE,
-                     &h->interrupt_callback, NULL);
+    ret = ffurl_open_whitelist(&s->hd, lower_url, AVIO_FLAG_READ_WRITE,
+                               &h->interrupt_callback, NULL,
+                               h->protocol_whitelist);
     if (ret < 0)
         return ret;
 
diff --git a/libavformat/httpauth.c b/libavformat/httpauth.c
index dbe3eff4..18cf36bc 100644
--- a/libavformat/httpauth.c
+++ b/libavformat/httpauth.c
@@ -220,21 +220,21 @@ static char *make_digest_auth(HTTPAuthState *state, const char *username,
 
     /* TODO: Escape the quoted strings properly. */
     av_strlcatf(authstr, len, "username=\"%s\"",   username);
-    av_strlcatf(authstr, len, ",realm=\"%s\"",     state->realm);
-    av_strlcatf(authstr, len, ",nonce=\"%s\"",     digest->nonce);
-    av_strlcatf(authstr, len, ",uri=\"%s\"",       uri);
-    av_strlcatf(authstr, len, ",response=\"%s\"",  response);
+    av_strlcatf(authstr, len, ", realm=\"%s\"",     state->realm);
+    av_strlcatf(authstr, len, ", nonce=\"%s\"",     digest->nonce);
+    av_strlcatf(authstr, len, ", uri=\"%s\"",       uri);
+    av_strlcatf(authstr, len, ", response=\"%s\"",  response);
 
     // we are violating the RFC and use "" because all others seem to do that too.
     if (digest->algorithm[0])
-        av_strlcatf(authstr, len, ",algorithm=\"%s\"",  digest->algorithm);
+        av_strlcatf(authstr, len, ", algorithm=\"%s\"",  digest->algorithm);
 
     if (digest->opaque[0])
-        av_strlcatf(authstr, len, ",opaque=\"%s\"", digest->opaque);
+        av_strlcatf(authstr, len, ", opaque=\"%s\"", digest->opaque);
     if (digest->qop[0]) {
-        av_strlcatf(authstr, len, ",qop=\"%s\"",    digest->qop);
-        av_strlcatf(authstr, len, ",cnonce=\"%s\"", cnonce);
-        av_strlcatf(authstr, len, ",nc=%s",         nc);
+        av_strlcatf(authstr, len, ", qop=\"%s\"",    digest->qop);
+        av_strlcatf(authstr, len, ", cnonce=\"%s\"", cnonce);
+        av_strlcatf(authstr, len, ", nc=%s",         nc);
     }
 
     av_strlcatf(authstr, len, "\r\n");
diff --git a/libavformat/icecast.c b/libavformat/icecast.c
index a7c7001f..a3b9a36b 100644
--- a/libavformat/icecast.c
+++ b/libavformat/icecast.c
@@ -53,15 +53,15 @@ typedef struct IcecastContext {
 #define E AV_OPT_FLAG_ENCODING_PARAM
 
 static const AVOption options[] = {
-    { "ice_genre", "set stream genre", OFFSET(genre), AV_OPT_TYPE_STRING, { 0 }, 0, 0, E },
-    { "ice_name", "set stream description", OFFSET(name), AV_OPT_TYPE_STRING, { 0 }, 0, 0, E },
-    { "ice_description", "set stream description", OFFSET(description), AV_OPT_TYPE_STRING, { 0 }, 0, 0, E },
-    { "ice_url", "set stream website", OFFSET(url), AV_OPT_TYPE_STRING, { 0 }, 0, 0, E },
-    { "ice_public", "set if stream is public", OFFSET(public), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, E },
-    { "user_agent", "override User-Agent header", OFFSET(user_agent), AV_OPT_TYPE_STRING, { 0 }, 0, 0, E },
-    { "password", "set password", OFFSET(pass), AV_OPT_TYPE_STRING, { 0 }, 0, 0, E },
-    { "content_type", "set content-type, MUST be set if not audio/mpeg", OFFSET(content_type), AV_OPT_TYPE_STRING, { 0 }, 0, 0, E },
-    { "legacy_icecast", "use legacy SOURCE method, for Icecast < v2.4", OFFSET(legacy_icecast), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, E },
+    { "ice_genre", "set stream genre", OFFSET(genre), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
+    { "ice_name", "set stream description", OFFSET(name), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
+    { "ice_description", "set stream description", OFFSET(description), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
+    { "ice_url", "set stream website", OFFSET(url), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
+    { "ice_public", "set if stream is public", OFFSET(public), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
+    { "user_agent", "override User-Agent header", OFFSET(user_agent), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
+    { "password", "set password", OFFSET(pass), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
+    { "content_type", "set content-type, MUST be set if not audio/mpeg", OFFSET(content_type), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, E },
+    { "legacy_icecast", "use legacy SOURCE method, for Icecast < v2.4", OFFSET(legacy_icecast), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
     { NULL }
 };
 
@@ -164,7 +164,8 @@ static int icecast_open(URLContext *h, const char *uri, int flags)
     // Build new URI for passing to http protocol
     ff_url_join(h_url, sizeof(h_url), "http", auth, host, port, "%s", path);
     // Finally open http proto handler
-    ret = ffurl_open(&s->hd, h_url, AVIO_FLAG_READ_WRITE, NULL, &opt_dict);
+    ret = ffurl_open_whitelist(&s->hd, h_url, AVIO_FLAG_READ_WRITE, NULL,
+                               &opt_dict, h->protocol_whitelist);
 
 cleanup:
     av_freep(&user);
diff --git a/libavformat/icodec.c b/libavformat/icodec.c
index 847f0eea..6ddb901b 100644
--- a/libavformat/icodec.c
+++ b/libavformat/icodec.c
@@ -27,6 +27,7 @@
 #include "libavutil/intreadwrite.h"
 #include "libavcodec/bytestream.h"
 #include "libavcodec/bmp.h"
+#include "libavcodec/png.h"
 #include "avformat.h"
 #include "internal.h"
 
@@ -44,9 +45,30 @@ typedef struct {
 
 static int probe(AVProbeData *p)
 {
-    if (AV_RL16(p->buf) == 0 && AV_RL16(p->buf + 2) == 1 && AV_RL16(p->buf + 4))
-        return AVPROBE_SCORE_MAX / 4;
-    return 0;
+    unsigned i, frames = AV_RL16(p->buf + 4);
+
+    if (AV_RL16(p->buf) || AV_RL16(p->buf + 2) != 1 || !frames)
+        return 0;
+    for (i = 0; i < frames; i++) {
+        unsigned offset;
+        if (AV_RL16(p->buf + 10 + i * 16) & ~1)
+            return FFMIN(i, AVPROBE_SCORE_MAX / 4);
+        if (p->buf[13 + i * 16])
+            return FFMIN(i, AVPROBE_SCORE_MAX / 4);
+        if (AV_RL32(p->buf + 14 + i * 16) < 40)
+            return FFMIN(i, AVPROBE_SCORE_MAX / 4);
+        offset = AV_RL32(p->buf + 18 + i * 16);
+        if (offset < 22)
+            return FFMIN(i, AVPROBE_SCORE_MAX / 4);
+        if (offset + 8 > p->buf_size)
+            return AVPROBE_SCORE_MAX / 4 + FFMIN(i, 1);
+        if (p->buf[offset] != 40 && AV_RB64(p->buf + offset) != PNGSIG)
+            return FFMIN(i, AVPROBE_SCORE_MAX / 4);
+        if (i * 16 + 6 > p->buf_size)
+            return AVPROBE_SCORE_MAX / 4 + FFMIN(i, 1);
+    }
+
+    return AVPROBE_SCORE_MAX / 2 + 1;
 }
 
 static int read_header(AVFormatContext *s)
@@ -124,7 +146,7 @@ static int read_packet(AVFormatContext *s, AVPacket *pkt)
     int ret;
 
     if (ico->current_image >= ico->nb_images)
-        return AVERROR(EIO);
+        return AVERROR_EOF;
 
     image = &ico->images[ico->current_image];
 
diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
index 2289bfc1..addf937a 100644
--- a/libavformat/id3v2.c
+++ b/libavformat/id3v2.c
@@ -535,6 +535,13 @@ static void free_apic(void *obj)
     av_freep(&apic);
 }
 
+static void rstrip_spaces(char *buf)
+{
+    size_t len = strlen(buf);
+    while (len > 0 && buf[len - 1] == ' ')
+        buf[--len] = 0;
+}
+
 static void read_apic(AVFormatContext *s, AVIOContext *pb, int taglen,
                       const char *tag, ID3v2ExtraMeta **extra_meta,
                       int isv34)
@@ -598,16 +605,20 @@ static void read_apic(AVFormatContext *s, AVIOContext *pb, int taglen,
         goto fail;
     }
 
-    apic->buf = av_buffer_alloc(taglen + FF_INPUT_BUFFER_PADDING_SIZE);
+    apic->buf = av_buffer_alloc(taglen + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!apic->buf || !taglen || avio_read(pb, apic->buf->data, taglen) != taglen)
         goto fail;
-    memset(apic->buf->data + taglen, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(apic->buf->data + taglen, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
     new_extra->tag  = "APIC";
     new_extra->data = apic;
     new_extra->next = *extra_meta;
     *extra_meta     = new_extra;
 
+    // The description must be unique, and some ID3v2 tag writers add spaces
+    // to write several APIC entries with the same description.
+    rstrip_spaces(apic->description);
+
     return;
 
 fail:
@@ -1083,6 +1094,9 @@ int ff_id3v2_parse_apic(AVFormatContext *s, ID3v2ExtraMeta **extra_meta)
         st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
         st->codec->codec_id   = apic->id;
 
+        if (AV_RB64(apic->buf->data) == 0x89504e470d0a1a0a)
+            st->codec->codec_id = AV_CODEC_ID_PNG;
+
         if (apic->description[0])
             av_dict_set(&st->metadata, "title", apic->description, 0);
 
@@ -1091,7 +1105,7 @@ int ff_id3v2_parse_apic(AVFormatContext *s, ID3v2ExtraMeta **extra_meta)
         av_init_packet(&st->attached_pic);
         st->attached_pic.buf          = apic->buf;
         st->attached_pic.data         = apic->buf->data;
-        st->attached_pic.size         = apic->buf->size - FF_INPUT_BUFFER_PADDING_SIZE;
+        st->attached_pic.size         = apic->buf->size - AV_INPUT_BUFFER_PADDING_SIZE;
         st->attached_pic.stream_index = st->index;
         st->attached_pic.flags       |= AV_PKT_FLAG_KEY;
 
diff --git a/libavformat/idcin.c b/libavformat/idcin.c
index 61a27244..10afed6f 100644
--- a/libavformat/idcin.c
+++ b/libavformat/idcin.c
@@ -313,7 +313,7 @@ static int idcin_read_packet(AVFormatContext *s,
             return ret;
         else if (ret != chunk_size) {
             av_log(s, AV_LOG_ERROR, "incomplete packet\n");
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return AVERROR(EIO);
         }
         if (command == 1) {
@@ -322,7 +322,7 @@ static int idcin_read_packet(AVFormatContext *s,
             pal = av_packet_new_side_data(pkt, AV_PKT_DATA_PALETTE,
                                           AVPALETTE_SIZE);
             if (!pal) {
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 return AVERROR(ENOMEM);
             }
             memcpy(pal, palette, AVPALETTE_SIZE);
diff --git a/libavformat/iff.c b/libavformat/iff.c
index 7235bc1c..28890831 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -455,7 +455,7 @@ static int iff_read_header(AVFormatContext *s)
                  return AVERROR_INVALIDDATA;
             }
             st->codec->extradata_size = data_size + IFF_EXTRA_VIDEO_SIZE;
-            st->codec->extradata      = av_malloc(data_size + IFF_EXTRA_VIDEO_SIZE + FF_INPUT_BUFFER_PADDING_SIZE);
+            st->codec->extradata      = av_malloc(data_size + IFF_EXTRA_VIDEO_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!st->codec->extradata)
                 return AVERROR(ENOMEM);
             if (avio_read(pb, st->codec->extradata + IFF_EXTRA_VIDEO_SIZE, data_size) < 0)
@@ -682,7 +682,7 @@ static int iff_read_header(AVFormatContext *s)
 
         if (!st->codec->extradata) {
             st->codec->extradata_size = IFF_EXTRA_VIDEO_SIZE;
-            st->codec->extradata      = av_malloc(IFF_EXTRA_VIDEO_SIZE + FF_INPUT_BUFFER_PADDING_SIZE);
+            st->codec->extradata      = av_malloc(IFF_EXTRA_VIDEO_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!st->codec->extradata)
                 return AVERROR(ENOMEM);
         }
@@ -721,11 +721,15 @@ static int iff_read_packet(AVFormatContext *s,
         if (st->codec->codec_tag == ID_DSD || st->codec->codec_tag == ID_MAUD) {
             ret = av_get_packet(pb, pkt, FFMIN(iff->body_end - pos, 1024 * st->codec->block_align));
         } else {
+            if (iff->body_size > INT_MAX)
+                return AVERROR_INVALIDDATA;
             ret = av_get_packet(pb, pkt, iff->body_size);
         }
     } else if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
         uint8_t *buf;
 
+        if (iff->body_size > INT_MAX - 2)
+            return AVERROR_INVALIDDATA;
         if (av_new_packet(pkt, iff->body_size + 2) < 0) {
             return AVERROR(ENOMEM);
         }
@@ -734,7 +738,7 @@ static int iff_read_packet(AVFormatContext *s,
         bytestream_put_be16(&buf, 2);
         ret = avio_read(pb, buf, iff->body_size);
         if (ret<0) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
         } else if (ret < iff->body_size)
             av_shrink_packet(pkt, ret + 2);
     } else {
diff --git a/libavformat/ilbc.c b/libavformat/ilbc.c
index 3f154ce2..ebee2fb0 100644
--- a/libavformat/ilbc.c
+++ b/libavformat/ilbc.c
@@ -112,7 +112,7 @@ static int ilbc_read_packet(AVFormatContext *s,
     pkt->pos = avio_tell(s->pb);
     pkt->duration = enc->block_align == 38 ? 160 : 240;
     if ((ret = avio_read(s->pb, pkt->data, enc->block_align)) != enc->block_align) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return ret < 0 ? ret : AVERROR(EIO);
     }
 
diff --git a/libavformat/img2.c b/libavformat/img2.c
index cf8a4785..50352b53 100644
--- a/libavformat/img2.c
+++ b/libavformat/img2.c
@@ -22,13 +22,9 @@
 
 #include "libavutil/avstring.h"
 #include "internal.h"
+#include "img2.h"
 
-typedef struct IdStrMap {
-    enum AVCodecID id;
-    const char *str;
-} IdStrMap;
-
-static const IdStrMap img_tags[] = {
+const IdStrMap ff_img_tags[] = {
     { AV_CODEC_ID_MJPEG,      "jpeg"     },
     { AV_CODEC_ID_MJPEG,      "jpg"      },
     { AV_CODEC_ID_MJPEG,      "jps"      },
@@ -45,6 +41,7 @@ static const IdStrMap img_tags[] = {
     { AV_CODEC_ID_PBM,        "pbm"      },
     { AV_CODEC_ID_PAM,        "pam"      },
     { AV_CODEC_ID_ALIAS_PIX,  "pix"      },
+    { AV_CODEC_ID_DDS,        "dds"      },
     { AV_CODEC_ID_MPEG1VIDEO, "mpg1-img" },
     { AV_CODEC_ID_MPEG2VIDEO, "mpg2-img" },
     { AV_CODEC_ID_MPEG4,      "mpg4-img" },
@@ -102,5 +99,5 @@ static enum AVCodecID av_str2id(const IdStrMap *tags, const char *str)
 
 enum AVCodecID ff_guess_image2_codec(const char *filename)
 {
-    return av_str2id(img_tags, filename);
+    return av_str2id(ff_img_tags, filename);
 }
diff --git a/libavformat/img2.h b/libavformat/img2.h
index f6b9dd92..0e5b374a 100644
--- a/libavformat/img2.h
+++ b/libavformat/img2.h
@@ -34,7 +34,8 @@ enum PatternType {
     PT_GLOB_SEQUENCE,
     PT_GLOB,
     PT_SEQUENCE,
-    PT_NONE
+    PT_NONE,
+    PT_DEFAULT
 };
 
 typedef struct VideoDemuxData {
@@ -62,6 +63,13 @@ typedef struct VideoDemuxData {
     int ts_from_file;
 } VideoDemuxData;
 
+typedef struct IdStrMap {
+    enum AVCodecID id;
+    const char *str;
+} IdStrMap;
+
+extern const IdStrMap ff_img_tags[];
+
 extern const AVOption ff_img_options[];
 
 int ff_img_read_header(AVFormatContext *s1);
diff --git a/libavformat/img2dec.c b/libavformat/img2dec.c
index 0830f007..c3535636 100644
--- a/libavformat/img2dec.c
+++ b/libavformat/img2dec.c
@@ -20,6 +20,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define _DEFAULT_SOURCE
 #define _BSD_SOURCE
 #include <sys/stat.h>
 #include "libavutil/avstring.h"
@@ -102,7 +103,7 @@ static int is_glob(const char *path)
  * @param start_index  minimum accepted value for the first index in the range
  * @return -1 if no image file could be found
  */
-static int find_image_range(int *pfirst_index, int *plast_index,
+static int find_image_range(AVIOContext *pb, int *pfirst_index, int *plast_index,
                             const char *path, int start_index, int start_index_range)
 {
     char buf[1024];
@@ -113,7 +114,7 @@ static int find_image_range(int *pfirst_index, int *plast_index,
         if (av_get_frame_filename(buf, sizeof(buf), path, first_index) < 0) {
             *pfirst_index =
             *plast_index  = 1;
-            if (avio_check(buf, AVIO_FLAG_READ) > 0)
+            if (pb || avio_check(buf, AVIO_FLAG_READ) > 0)
                 return 0;
             return -1;
         }
@@ -224,6 +225,13 @@ int ff_img_read_header(AVFormatContext *s1)
     }
 
     if (!s->is_pipe) {
+        if (s->pattern_type == PT_DEFAULT) {
+            if (s1->pb) {
+                s->pattern_type = PT_NONE;
+            } else
+                s->pattern_type = PT_GLOB_SEQUENCE;
+        }
+
         if (s->pattern_type == PT_GLOB_SEQUENCE) {
         s->use_glob = is_glob(s->path);
         if (s->use_glob) {
@@ -259,7 +267,7 @@ int ff_img_read_header(AVFormatContext *s1)
         }
         }
         if ((s->pattern_type == PT_GLOB_SEQUENCE && !s->use_glob) || s->pattern_type == PT_SEQUENCE) {
-            if (find_image_range(&first_index, &last_index, s->path,
+            if (find_image_range(s1->pb, &first_index, &last_index, s->path,
                                  s->start_number, s->start_number_range) < 0) {
                 av_log(s1, AV_LOG_ERROR,
                        "Could find no file with path '%s' and index in the range %d-%d\n",
@@ -390,8 +398,12 @@ int ff_img_read_packet(AVFormatContext *s1, AVPacket *pkt)
             return AVERROR(EIO);
         }
         for (i = 0; i < 3; i++) {
-            if (avio_open2(&f[i], filename, AVIO_FLAG_READ,
-                           &s1->interrupt_callback, NULL) < 0) {
+            if (s1->pb &&
+                !strcmp(filename_bytes, s->path) &&
+                !s->loop &&
+                !s->split_planes) {
+                f[i] = s1->pb;
+            } else if (s1->io_open(s1, &f[i], filename, AVIO_FLAG_READ, NULL) < 0) {
                 if (i >= 1)
                     break;
                 av_log(s1, AV_LOG_ERROR, "Could not open file : %s\n",
@@ -444,14 +456,17 @@ int ff_img_read_packet(AVFormatContext *s1, AVPacket *pkt)
     }
 
     res = av_new_packet(pkt, size[0] + size[1] + size[2]);
-    if (res < 0)
-        return res;
+    if (res < 0) {
+        goto fail;
+    }
     pkt->stream_index = 0;
     pkt->flags       |= AV_PKT_FLAG_KEY;
     if (s->ts_from_file) {
         struct stat img_stat;
-        if (stat(filename, &img_stat))
-            return AVERROR(EIO);
+        if (stat(filename, &img_stat)) {
+            res = AVERROR(EIO);
+            goto fail;
+        }
         pkt->pts = (int64_t)img_stat.st_mtime;
 #if HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
         if (s->ts_from_file == 2)
@@ -475,28 +490,40 @@ int ff_img_read_packet(AVFormatContext *s1, AVPacket *pkt)
                     ret[i] = avio_read(f[i], pkt->data + pkt->size, size[i]);
                 }
             }
-            if (!s->is_pipe)
-                avio_closep(&f[i]);
+            if (!s->is_pipe && f[i] != s1->pb)
+                ff_format_io_close(s1, &f[i]);
             if (ret[i] > 0)
                 pkt->size += ret[i];
         }
     }
 
     if (ret[0] <= 0 || ret[1] < 0 || ret[2] < 0) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         if (ret[0] < 0) {
-            return ret[0];
+            res = ret[0];
         } else if (ret[1] < 0) {
-            return ret[1];
-        } else if (ret[2] < 0)
-            return ret[2];
-        return AVERROR_EOF;
+            res = ret[1];
+        } else if (ret[2] < 0) {
+            res = ret[2];
+        } else {
+            res = AVERROR_EOF;
+        }
+        goto fail;
     } else {
         s->img_count++;
         s->img_number++;
         s->pts++;
         return 0;
     }
+
+fail:
+    if (!s->is_pipe) {
+        for (i = 0; i < 3; i++) {
+            if (f[i] != s1->pb)
+                ff_format_io_close(s1, &f[i]);
+        }
+    }
+    return res;
 }
 
 static int img_read_close(struct AVFormatContext* s1)
@@ -534,9 +561,9 @@ static int img_read_seek(AVFormatContext *s, int stream_index, int64_t timestamp
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 const AVOption ff_img_options[] = {
     { "framerate",    "set the video framerate",             OFFSET(framerate),    AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, 0,   DEC },
-    { "loop",         "force loop over input file sequence", OFFSET(loop),         AV_OPT_TYPE_INT,    {.i64 = 0   }, 0, 1,       DEC },
+    { "loop",         "force loop over input file sequence", OFFSET(loop),         AV_OPT_TYPE_BOOL,   {.i64 = 0   }, 0, 1,       DEC },
 
-    { "pattern_type", "set pattern type",                    OFFSET(pattern_type), AV_OPT_TYPE_INT,    {.i64=PT_GLOB_SEQUENCE}, 0,       INT_MAX, DEC, "pattern_type"},
+    { "pattern_type", "set pattern type",                    OFFSET(pattern_type), AV_OPT_TYPE_INT,    {.i64=PT_DEFAULT}, 0,       INT_MAX, DEC, "pattern_type"},
     { "glob_sequence","select glob/sequence pattern type",   0, AV_OPT_TYPE_CONST,  {.i64=PT_GLOB_SEQUENCE}, INT_MIN, INT_MAX, DEC, "pattern_type" },
     { "glob",         "select glob pattern type",            0, AV_OPT_TYPE_CONST,  {.i64=PT_GLOB         }, INT_MIN, INT_MAX, DEC, "pattern_type" },
     { "sequence",     "select sequence pattern type",        0, AV_OPT_TYPE_CONST,  {.i64=PT_SEQUENCE     }, INT_MIN, INT_MAX, DEC, "pattern_type" },
@@ -609,6 +636,17 @@ static int bmp_probe(AVProbeData *p)
     return AVPROBE_SCORE_EXTENSION / 4;
 }
 
+static int dds_probe(AVProbeData *p)
+{
+    const uint8_t *b = p->buf;
+
+    if (   AV_RB64(b) == 0x444453207c000000
+        && AV_RL32(b +  8)
+        && AV_RL32(b + 12))
+        return AVPROBE_SCORE_MAX - 1;
+    return 0;
+}
+
 static int dpx_probe(AVProbeData *p)
 {
     const uint8_t *b = p->buf;
@@ -649,18 +687,14 @@ static int j2k_probe(AVProbeData *p)
 static int jpeg_probe(AVProbeData *p)
 {
     const uint8_t *b = p->buf;
-    int i, state = 0xD8, exif_size = 0;
+    int i, state = 0xD8;
 
     if (AV_RB16(b) != 0xFFD8 ||
         AV_RB32(b) == 0xFFD8FFF7)
     return 0;
 
     b += 2;
-    if (AV_RB16(b) == 0xFFE1 && AV_RB32(b + 4) == AV_RB32("Exif")) {
-        exif_size = AV_RB16(b + 2) + 2;
-        b += exif_size;
-    }
-    for (i = 0; i + exif_size < p->buf_size - 2; i++) {
+    for (i = 0; i < p->buf_size - 3; i++) {
         int c;
         if (b[i] != 0xFF)
             continue;
@@ -689,6 +723,24 @@ static int jpeg_probe(AVProbeData *p)
                 return 0;
             state = 0xD9;
             break;
+        case 0xE0:
+        case 0xE1:
+        case 0xE2:
+        case 0xE3:
+        case 0xE4:
+        case 0xE5:
+        case 0xE6:
+        case 0xE7:
+        case 0xE8:
+        case 0xE9:
+        case 0xEA:
+        case 0xEB:
+        case 0xEC:
+        case 0xED:
+        case 0xEE:
+        case 0xEF:
+            i += AV_RB16(&b[i + 2]) + 1;
+            break;
         default:
             if (  (c >= 0x02 && c <= 0xBF)
                 || c == 0xC8)
@@ -714,9 +766,15 @@ static int qdraw_probe(AVProbeData *p)
 {
     const uint8_t *b = p->buf;
 
-    if (!b[10] && AV_RB32(b+11) == 0x1102ff0c && !b[15] ||
-        p->buf_size >= 528 && !b[522] && AV_RB32(b+523) == 0x1102ff0c && !b[527])
-        return AVPROBE_SCORE_EXTENSION + 1;
+    if (   p->buf_size >= 528
+        && (AV_RB64(b + 520) & 0xFFFFFFFFFFFF) == 0x001102ff0c00
+        && AV_RB16(b + 520)
+        && AV_RB16(b + 518))
+        return AVPROBE_SCORE_MAX * 3 / 4;
+    if (   (AV_RB64(b + 8) & 0xFFFFFFFFFFFF) == 0x001102ff0c00
+        && AV_RB16(b + 8)
+        && AV_RB16(b + 6))
+        return AVPROBE_SCORE_EXTENSION / 4;
     return 0;
 }
 
@@ -799,6 +857,7 @@ AVInputFormat ff_image_ ## imgname ## _pipe_demuxer = {\
 };
 
 IMAGEAUTO_DEMUXER(bmp,     AV_CODEC_ID_BMP)
+IMAGEAUTO_DEMUXER(dds,     AV_CODEC_ID_DDS)
 IMAGEAUTO_DEMUXER(dpx,     AV_CODEC_ID_DPX)
 IMAGEAUTO_DEMUXER(exr,     AV_CODEC_ID_EXR)
 IMAGEAUTO_DEMUXER(j2k,     AV_CODEC_ID_JPEG2000)
diff --git a/libavformat/img2enc.c b/libavformat/img2enc.c
index f56c39e6..ebbac2bf 100644
--- a/libavformat/img2enc.c
+++ b/libavformat/img2enc.c
@@ -30,6 +30,7 @@
 #include "avformat.h"
 #include "avio_internal.h"
 #include "internal.h"
+#include "img2.h"
 
 typedef struct VideoMuxData {
     const AVClass *class;  /**< Class for private options. */
@@ -37,9 +38,12 @@ typedef struct VideoMuxData {
     int is_pipe;
     int split_planes;       /**< use independent file for each Y, U, V plane */
     char path[1024];
+    char tmp[4][1024];
+    char target[4][1024];
     int update;
     int use_strftime;
     const char *muxer;
+    int use_rename;
 } VideoMuxData;
 
 static int write_header(AVFormatContext *s)
@@ -67,6 +71,7 @@ static int write_header(AVFormatContext *s)
                              &&(desc->flags & AV_PIX_FMT_FLAG_PLANAR)
                              && desc->nb_components >= 3;
     }
+
     return 0;
 }
 
@@ -78,6 +83,7 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
     AVCodecContext *codec = s->streams[pkt->stream_index]->codec;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(codec->pix_fmt);
     int i;
+    int nb_renames = 0;
 
     if (!img->is_pipe) {
         if (img->update) {
@@ -99,9 +105,10 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
             return AVERROR(EINVAL);
         }
         for (i = 0; i < 4; i++) {
-            if (avio_open2(&pb[i], filename, AVIO_FLAG_WRITE,
-                           &s->interrupt_callback, NULL) < 0) {
-                av_log(s, AV_LOG_ERROR, "Could not open file : %s\n", filename);
+            snprintf(img->tmp[i], sizeof(img->tmp[i]), "%s.tmp", filename);
+            av_strlcpy(img->target[i], filename, sizeof(img->target[i]));
+            if (s->io_open(s, &pb[i], img->use_rename ? img->tmp[i] : filename, AVIO_FLAG_WRITE, NULL) < 0) {
+                av_log(s, AV_LOG_ERROR, "Could not open file : %s\n", img->use_rename ? img->tmp[i] : filename);
                 return AVERROR(EIO);
             }
 
@@ -109,25 +116,27 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
                 break;
             filename[strlen(filename) - 1] = "UVAx"[i];
         }
+        if (img->use_rename)
+            nb_renames = i + 1;
     } else {
         pb[0] = s->pb;
     }
 
     if (img->split_planes) {
         int ysize = codec->width * codec->height;
-        int usize = FF_CEIL_RSHIFT(codec->width, desc->log2_chroma_w) * FF_CEIL_RSHIFT(codec->height, desc->log2_chroma_h);
-        if (desc->comp[0].depth_minus1 >= 8) {
+        int usize = AV_CEIL_RSHIFT(codec->width, desc->log2_chroma_w) * AV_CEIL_RSHIFT(codec->height, desc->log2_chroma_h);
+        if (desc->comp[0].depth >= 9) {
             ysize *= 2;
             usize *= 2;
         }
         avio_write(pb[0], pkt->data                , ysize);
         avio_write(pb[1], pkt->data + ysize        , usize);
         avio_write(pb[2], pkt->data + ysize + usize, usize);
-        avio_closep(&pb[1]);
-        avio_closep(&pb[2]);
+        ff_format_io_close(s, &pb[1]);
+        ff_format_io_close(s, &pb[2]);
         if (desc->nb_components > 3) {
             avio_write(pb[3], pkt->data + ysize + 2*usize, ysize);
-            avio_closep(&pb[3]);
+            ff_format_io_close(s, &pb[3]);
         }
     } else if (img->muxer) {
         int ret;
@@ -154,31 +163,48 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
             (ret = avformat_write_header(fmt, NULL))                      < 0 ||
             (ret = av_interleaved_write_frame(fmt, &pkt2))                < 0 ||
             (ret = av_write_trailer(fmt))                                 < 0) {
-            av_free_packet(&pkt2);
+            av_packet_unref(&pkt2);
             avformat_free_context(fmt);
             return ret;
         }
-        av_free_packet(&pkt2);
+        av_packet_unref(&pkt2);
         avformat_free_context(fmt);
     } else {
         avio_write(pb[0], pkt->data, pkt->size);
     }
     avio_flush(pb[0]);
     if (!img->is_pipe) {
-        avio_closep(&pb[0]);
+        ff_format_io_close(s, &pb[0]);
+        for (i = 0; i < nb_renames; i++) {
+            int ret = ff_rename(img->tmp[i], img->target[i], s);
+            if (ret < 0)
+                return ret;
+        }
     }
 
     img->img_number++;
     return 0;
 }
 
+static int query_codec(enum AVCodecID id, int std_compliance)
+{
+    int i;
+    for (i = 0; ff_img_tags[i].id != AV_CODEC_ID_NONE; i++)
+        if (ff_img_tags[i].id == id)
+            return 1;
+
+    // Anything really can be stored in img2
+    return std_compliance < FF_COMPLIANCE_NORMAL;
+}
+
 #define OFFSET(x) offsetof(VideoMuxData, x)
 #define ENC AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption muxoptions[] = {
-    { "updatefirst",  "continuously overwrite one file", OFFSET(update),  AV_OPT_TYPE_INT, { .i64 = 0 }, 0,       1, ENC },
-    { "update",       "continuously overwrite one file", OFFSET(update),  AV_OPT_TYPE_INT, { .i64 = 0 }, 0,       1, ENC },
+    { "updatefirst",  "continuously overwrite one file", OFFSET(update),  AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0,       1, ENC },
+    { "update",       "continuously overwrite one file", OFFSET(update),  AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0,       1, ENC },
     { "start_number", "set first number in the sequence", OFFSET(img_number), AV_OPT_TYPE_INT,  { .i64 = 1 }, 0, INT_MAX, ENC },
-    { "strftime",     "use strftime for filename", OFFSET(use_strftime), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, ENC },
+    { "strftime",     "use strftime for filename", OFFSET(use_strftime),  AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, ENC },
+    { "atomic_writing", "write files atomically (using temporary files and renames)", OFFSET(use_rename), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, ENC },
     { NULL },
 };
 
@@ -200,6 +226,7 @@ AVOutputFormat ff_image2_muxer = {
     .video_codec    = AV_CODEC_ID_MJPEG,
     .write_header   = write_header,
     .write_packet   = write_packet,
+    .query_codec    = query_codec,
     .flags          = AVFMT_NOTIMESTAMPS | AVFMT_NODIMENSIONS | AVFMT_NOFILE,
     .priv_class     = &img2mux_class,
 };
@@ -212,6 +239,7 @@ AVOutputFormat ff_image2pipe_muxer = {
     .video_codec    = AV_CODEC_ID_MJPEG,
     .write_header   = write_header,
     .write_packet   = write_packet,
+    .query_codec    = query_codec,
     .flags          = AVFMT_NOTIMESTAMPS | AVFMT_NODIMENSIONS
 };
 #endif
diff --git a/libavformat/ingenientdec.c b/libavformat/ingenientdec.c
index 94c549c7..c0ba61e8 100644
--- a/libavformat/ingenientdec.c
+++ b/libavformat/ingenientdec.c
@@ -21,8 +21,18 @@
 
 #include "avformat.h"
 #include "rawdec.h"
+#include "libavutil/intreadwrite.h"
+
+// http://multimedia.cx/ingenient.txt
+static int ingenient_probe(AVProbeData *p)
+{
+    if (   AV_RN32(p->buf) != AV_RN32("MJPG")
+        || p->buf_size < 50
+        || AV_RB16(p->buf + 48) != 0xffd8)
+        return 0;
+    return AVPROBE_SCORE_MAX * 3 / 4;
+}
 
-// http://www.artificis.hu/files/texts/ingenient.txt
 static int ingenient_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     int ret, size, w, h, unk1, unk2;
@@ -57,6 +67,7 @@ AVInputFormat ff_ingenient_demuxer = {
     .name           = "ingenient",
     .long_name      = NULL_IF_CONFIG_SMALL("raw Ingenient MJPEG"),
     .priv_data_size = sizeof(FFRawVideoDemuxerContext),
+    .read_probe     = ingenient_probe,
     .read_header    = ff_raw_video_read_header,
     .read_packet    = ingenient_read_packet,
     .flags          = AVFMT_GENERIC_INDEX,
diff --git a/libavformat/internal.h b/libavformat/internal.h
index f90df902..fee823d3 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -31,6 +31,8 @@
 #define PROBE_BUF_MIN 2048
 #define PROBE_BUF_MAX (1 << 20)
 
+#define MAX_PROBE_PACKETS 2500
+
 #ifdef DEBUG
 #    define hex_dump_debug(class, buf, size) av_hex_dump_log(class, AV_LOG_DEBUG, buf, size)
 #else
@@ -47,6 +49,18 @@ typedef struct CodecMime{
     enum AVCodecID id;
 } CodecMime;
 
+/*************************************************/
+/* fractional numbers for exact pts handling */
+
+/**
+ * The exact value of the fractional number is: 'val + num / den'.
+ * num is assumed to be 0 <= num < den.
+ */
+typedef struct FFFrac {
+    int64_t val, num, den;
+} FFFrac;
+
+
 struct AVFormatInternal {
     /**
      * Number of streams relevant for interleaving.
@@ -96,9 +110,38 @@ struct AVFormatInternal {
      */
     AVRational offset_timebase;
 
+#if FF_API_COMPUTE_PKT_FIELDS2
+    int missing_ts_warning;
+#endif
+
     int inject_global_side_data;
 
     int avoid_negative_ts_use_pts;
+
+    /**
+     * Whether or not a header has already been written
+     */
+    int header_written;
+};
+
+struct AVStreamInternal {
+    /**
+     * Set to 1 if the codec allows reordering, so pts can be different
+     * from dts.
+     */
+    int reorder;
+
+    /**
+     * bitstream filter to run on stream
+     * - encoding: Set by muxer using ff_stream_add_bitstream_filter
+     * - decoding: unused
+     */
+    AVBitStreamFilterContext *bsfc;
+
+    /**
+     * Whether or not check_bitstream should still be run on each packet
+     */
+    int bitstream_checked;
 };
 
 #ifdef __GNUC__
@@ -130,8 +173,6 @@ char *ff_data_to_hex(char *buf, const uint8_t *src, int size, int lowercase);
  */
 int ff_hex_to_data(uint8_t *data, const char *p);
 
-void ff_program_add_stream_index(AVFormatContext *ac, int progid, unsigned int idx);
-
 /**
  * Add packet to AVFormatContext->packet_buffer list, determining its
  * interleaved position using compare() function argument.
@@ -254,6 +295,8 @@ int ff_add_index_entry(AVIndexEntry **index_entries,
                        unsigned int *index_entries_allocated_size,
                        int64_t pos, int64_t timestamp, int size, int distance, int flags);
 
+void ff_configure_buffers_for_index(AVFormatContext *s, int64_t time_tolerance);
+
 /**
  * Add a new chapter.
  *
@@ -277,11 +320,6 @@ void ff_reduce_index(AVFormatContext *s, int stream_index);
 
 enum AVCodecID ff_guess_image2_codec(const char *filename);
 
-/**
- * Convert a date string in ISO8601 format to Unix timestamp.
- */
-int64_t ff_iso8601_to_unix_time(const char *datestr);
-
 /**
  * Perform a binary search using av_index_search_timestamp() and
  * AVInputFormat.read_timestamp().
@@ -360,7 +398,7 @@ int ff_read_packet(AVFormatContext *s, AVPacket *pkt);
  * Interleave a packet per dts in an output media file.
  *
  * Packets with pkt->destruct == av_destruct_packet will be freed inside this
- * function, so they cannot be used after it. Note that calling av_free_packet()
+ * function, so they cannot be used after it. Note that calling av_packet_unref()
  * on them is still safe.
  *
  * @param s media file handle
@@ -421,6 +459,17 @@ enum AVChromaLocation ff_choose_chroma_location(AVFormatContext *s, AVStream *st
  */
 int ff_generate_avci_extradata(AVStream *st);
 
+/**
+ * Add a bitstream filter to a stream.
+ *
+ * @param st output stream to add a filter to
+ * @param name the name of the filter to add
+ * @param args filter-specific argument string
+ * @return  >0 on success;
+ *          AVERROR code on failure
+ */
+int ff_stream_add_bitstream_filter(AVStream *st, const char *name, const char *args);
+
 /**
  * Wrap errno on rename() error.
  *
@@ -440,14 +489,7 @@ static inline int ff_rename(const char *oldpath, const char *newpath, void *logc
 }
 
 /**
- * Add new side data to a stream. If a side data of this type already exists, it
- * is replaced.
- */
-uint8_t *ff_stream_new_side_data(AVStream *st, enum AVPacketSideDataType type,
-                                 int size);
-
-/**
- * Allocate extradata with additional FF_INPUT_BUFFER_PADDING_SIZE at end
+ * Allocate extradata with additional AV_INPUT_BUFFER_PADDING_SIZE at end
  * which is always set to 0.
  *
  * @param size size of extradata
@@ -456,7 +498,7 @@ uint8_t *ff_stream_new_side_data(AVStream *st, enum AVPacketSideDataType type,
 int ff_alloc_extradata(AVCodecContext *avctx, int size);
 
 /**
- * Allocate extradata with additional FF_INPUT_BUFFER_PADDING_SIZE at end
+ * Allocate extradata with additional AV_INPUT_BUFFER_PADDING_SIZE at end
  * which is always set to 0 and fill it from pb.
  *
  * @param size size of extradata
@@ -495,4 +537,27 @@ int ff_copy_whitelists(AVFormatContext *dst, AVFormatContext *src);
 int ffio_open2_wrapper(struct AVFormatContext *s, AVIOContext **pb, const char *url, int flags,
                        const AVIOInterruptCB *int_cb, AVDictionary **options);
 
+/**
+ * Returned by demuxers to indicate that data was consumed but discarded
+ * (ignored streams or junk data). The framework will re-call the demuxer.
+ */
+#define FFERROR_REDO FFERRTAG('R','E','D','O')
+
+/*
+ * A wrapper around AVFormatContext.io_close that should be used
+ * intead of calling the pointer directly.
+ */
+void ff_format_io_close(AVFormatContext *s, AVIOContext **pb);
+
+/**
+ * Parse creation_time in AVFormatContext metadata if exists and warn if the
+ * parsing fails.
+ *
+ * @param s AVFormatContext
+ * @param timestamp parsed timestamp in microseconds, only set on successful parsing
+ * @param return_seconds set this to get the number of seconds in timestamp instead of microseconds
+ * @return 1 if OK, 0 if the metadata was not present, AVERROR(EINVAL) on parse error
+ */
+int ff_parse_creation_time_metadata(AVFormatContext *s, int64_t *timestamp, int return_seconds);
+
 #endif /* AVFORMAT_INTERNAL_H */
diff --git a/libavformat/ipmovie.c b/libavformat/ipmovie.c
index af518b59..bc5d8ccb 100644
--- a/libavformat/ipmovie.c
+++ b/libavformat/ipmovie.c
@@ -78,7 +78,7 @@
 #define PALETTE_COUNT 256
 
 typedef struct IPMVEContext {
-
+    AVFormatContext *avf;
     unsigned char *buf;
     int buf_size;
 
@@ -119,7 +119,7 @@ static int load_ipmovie_packet(IPMVEContext *s, AVIOContext *pb,
 
     if (s->audio_chunk_offset && s->audio_channels && s->audio_bits) {
         if (s->audio_type == AV_CODEC_ID_NONE) {
-            av_log(NULL, AV_LOG_ERROR, "Can not read audio packet before"
+            av_log(s->avf, AV_LOG_ERROR, "Can not read audio packet before"
                    "audio codec is known\n");
                 return CHUNK_BAD;
         }
@@ -147,7 +147,7 @@ static int load_ipmovie_packet(IPMVEContext *s, AVIOContext *pb,
             s->audio_frame_count +=
                 (s->audio_chunk_size - 6 - s->audio_channels) / s->audio_channels;
 
-        av_log(NULL, AV_LOG_TRACE, "sending audio frame with pts %"PRId64" (%d audio frames)\n",
+        av_log(s->avf, AV_LOG_TRACE, "sending audio frame with pts %"PRId64" (%d audio frames)\n",
                 pkt->pts, s->audio_frame_count);
 
         chunk_type = CHUNK_VIDEO;
@@ -156,7 +156,7 @@ static int load_ipmovie_packet(IPMVEContext *s, AVIOContext *pb,
 
         /* send both the decode map and the video data together */
 
-        if (av_new_packet(pkt, s->decode_map_chunk_size + s->video_chunk_size))
+        if (av_new_packet(pkt, 2 + s->decode_map_chunk_size + s->video_chunk_size))
             return CHUNK_NOMEM;
 
         if (s->has_palette) {
@@ -178,25 +178,26 @@ static int load_ipmovie_packet(IPMVEContext *s, AVIOContext *pb,
         avio_seek(pb, s->decode_map_chunk_offset, SEEK_SET);
         s->decode_map_chunk_offset = 0;
 
-        if (avio_read(pb, pkt->data, s->decode_map_chunk_size) !=
+        AV_WL16(pkt->data, s->decode_map_chunk_size);
+        if (avio_read(pb, pkt->data + 2, s->decode_map_chunk_size) !=
             s->decode_map_chunk_size) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return CHUNK_EOF;
         }
 
         avio_seek(pb, s->video_chunk_offset, SEEK_SET);
         s->video_chunk_offset = 0;
 
-        if (avio_read(pb, pkt->data + s->decode_map_chunk_size,
+        if (avio_read(pb, pkt->data + 2 + s->decode_map_chunk_size,
             s->video_chunk_size) != s->video_chunk_size) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return CHUNK_EOF;
         }
 
         pkt->stream_index = s->video_stream_index;
         pkt->pts = s->video_pts;
 
-        av_log(NULL, AV_LOG_TRACE, "sending video frame with pts %"PRId64"\n", pkt->pts);
+        av_log(s->avf, AV_LOG_TRACE, "sending video frame with pts %"PRId64"\n", pkt->pts);
 
         s->video_pts += s->frame_pts_inc;
 
@@ -212,6 +213,31 @@ static int load_ipmovie_packet(IPMVEContext *s, AVIOContext *pb,
     return chunk_type;
 }
 
+static int init_audio(AVFormatContext *s)
+{
+    IPMVEContext *ipmovie = s->priv_data;
+    AVStream *st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    avpriv_set_pts_info(st, 32, 1, ipmovie->audio_sample_rate);
+    ipmovie->audio_stream_index = st->index;
+    st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
+    st->codec->codec_id = ipmovie->audio_type;
+    st->codec->codec_tag = 0;  /* no tag */
+    st->codec->channels = ipmovie->audio_channels;
+    st->codec->channel_layout = st->codec->channels == 1 ? AV_CH_LAYOUT_MONO :
+                                                            AV_CH_LAYOUT_STEREO;
+    st->codec->sample_rate = ipmovie->audio_sample_rate;
+    st->codec->bits_per_coded_sample = ipmovie->audio_bits;
+    st->codec->bit_rate = st->codec->channels * st->codec->sample_rate *
+        st->codec->bits_per_coded_sample;
+    if (st->codec->codec_id == AV_CODEC_ID_INTERPLAY_DPCM)
+        st->codec->bit_rate /= 2;
+    st->codec->block_align = st->codec->channels * st->codec->bits_per_coded_sample;
+
+    return 0;
+}
+
 /* This function loads and processes a single chunk in an IP movie file.
  * It returns the type of chunk that was processed. */
 static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
@@ -245,36 +271,36 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
     chunk_size = AV_RL16(&chunk_preamble[0]);
     chunk_type = AV_RL16(&chunk_preamble[2]);
 
-    av_log(NULL, AV_LOG_TRACE, "chunk type 0x%04X, 0x%04X bytes: ", chunk_type, chunk_size);
+    av_log(s->avf, AV_LOG_TRACE, "chunk type 0x%04X, 0x%04X bytes: ", chunk_type, chunk_size);
 
     switch (chunk_type) {
 
     case CHUNK_INIT_AUDIO:
-        av_log(NULL, AV_LOG_TRACE, "initialize audio\n");
+        av_log(s->avf, AV_LOG_TRACE, "initialize audio\n");
         break;
 
     case CHUNK_AUDIO_ONLY:
-        av_log(NULL, AV_LOG_TRACE, "audio only\n");
+        av_log(s->avf, AV_LOG_TRACE, "audio only\n");
         break;
 
     case CHUNK_INIT_VIDEO:
-        av_log(NULL, AV_LOG_TRACE, "initialize video\n");
+        av_log(s->avf, AV_LOG_TRACE, "initialize video\n");
         break;
 
     case CHUNK_VIDEO:
-        av_log(NULL, AV_LOG_TRACE, "video (and audio)\n");
+        av_log(s->avf, AV_LOG_TRACE, "video (and audio)\n");
         break;
 
     case CHUNK_SHUTDOWN:
-        av_log(NULL, AV_LOG_TRACE, "shutdown\n");
+        av_log(s->avf, AV_LOG_TRACE, "shutdown\n");
         break;
 
     case CHUNK_END:
-        av_log(NULL, AV_LOG_TRACE, "end\n");
+        av_log(s->avf, AV_LOG_TRACE, "end\n");
         break;
 
     default:
-        av_log(NULL, AV_LOG_TRACE, "invalid chunk\n");
+        av_log(s->avf, AV_LOG_TRACE, "invalid chunk\n");
         chunk_type = CHUNK_BAD;
         break;
 
@@ -300,29 +326,29 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
         chunk_size -= OPCODE_PREAMBLE_SIZE;
         chunk_size -= opcode_size;
         if (chunk_size < 0) {
-            av_log(NULL, AV_LOG_TRACE, "chunk_size countdown just went negative\n");
+            av_log(s->avf, AV_LOG_TRACE, "chunk_size countdown just went negative\n");
             chunk_type = CHUNK_BAD;
             break;
         }
 
-        av_log(NULL, AV_LOG_TRACE, "  opcode type %02X, version %d, 0x%04X bytes: ",
+        av_log(s->avf, AV_LOG_TRACE, "  opcode type %02X, version %d, 0x%04X bytes: ",
                 opcode_type, opcode_version, opcode_size);
         switch (opcode_type) {
 
         case OPCODE_END_OF_STREAM:
-            av_log(NULL, AV_LOG_TRACE, "end of stream\n");
+            av_log(s->avf, AV_LOG_TRACE, "end of stream\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_END_OF_CHUNK:
-            av_log(NULL, AV_LOG_TRACE, "end of chunk\n");
+            av_log(s->avf, AV_LOG_TRACE, "end of chunk\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_CREATE_TIMER:
-            av_log(NULL, AV_LOG_TRACE, "create timer\n");
+            av_log(s->avf, AV_LOG_TRACE, "create timer\n");
             if ((opcode_version > 0) || (opcode_size != 6)) {
-                av_log(NULL, AV_LOG_TRACE, "bad create_timer opcode\n");
+                av_log(s->avf, AV_LOG_TRACE, "bad create_timer opcode\n");
                 chunk_type = CHUNK_BAD;
                 break;
             }
@@ -332,15 +358,15 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
                 break;
             }
             s->frame_pts_inc = ((uint64_t)AV_RL32(&scratch[0])) * AV_RL16(&scratch[4]);
-            av_log(NULL, AV_LOG_TRACE, "  %.2f frames/second (timer div = %d, subdiv = %d)\n",
+            av_log(s->avf, AV_LOG_TRACE, "  %.2f frames/second (timer div = %d, subdiv = %d)\n",
                     1000000.0 / s->frame_pts_inc, AV_RL32(&scratch[0]),
                     AV_RL16(&scratch[4]));
             break;
 
         case OPCODE_INIT_AUDIO_BUFFERS:
-            av_log(NULL, AV_LOG_TRACE, "initialize audio buffers\n");
+            av_log(s->avf, AV_LOG_TRACE, "initialize audio buffers\n");
             if (opcode_version > 1 || opcode_size > 10 || opcode_size < 6) {
-                av_log(NULL, AV_LOG_TRACE, "bad init_audio_buffers opcode\n");
+                av_log(s->avf, AV_LOG_TRACE, "bad init_audio_buffers opcode\n");
                 chunk_type = CHUNK_BAD;
                 break;
             }
@@ -362,7 +388,7 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
                 s->audio_type = AV_CODEC_ID_PCM_S16LE;
             else
                 s->audio_type = AV_CODEC_ID_PCM_U8;
-            av_log(NULL, AV_LOG_TRACE, "audio: %d bits, %d Hz, %s, %s format\n",
+            av_log(s->avf, AV_LOG_TRACE, "audio: %d bits, %d Hz, %s, %s format\n",
                     s->audio_bits, s->audio_sample_rate,
                     (s->audio_channels == 2) ? "stereo" : "mono",
                     (s->audio_type == AV_CODEC_ID_INTERPLAY_DPCM) ?
@@ -370,16 +396,16 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             break;
 
         case OPCODE_START_STOP_AUDIO:
-            av_log(NULL, AV_LOG_TRACE, "start/stop audio\n");
+            av_log(s->avf, AV_LOG_TRACE, "start/stop audio\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_INIT_VIDEO_BUFFERS:
-            av_log(NULL, AV_LOG_TRACE, "initialize video buffers\n");
+            av_log(s->avf, AV_LOG_TRACE, "initialize video buffers\n");
             if ((opcode_version > 2) || (opcode_size > 8) || opcode_size < 4
                 || opcode_version == 2 && opcode_size < 8
             ) {
-                av_log(NULL, AV_LOG_TRACE, "bad init_video_buffers opcode\n");
+                av_log(s->avf, AV_LOG_TRACE, "bad init_video_buffers opcode\n");
                 chunk_type = CHUNK_BAD;
                 break;
             }
@@ -403,7 +429,7 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             } else {
                 s->video_bpp = 16;
             }
-            av_log(NULL, AV_LOG_TRACE, "video resolution: %d x %d\n",
+            av_log(s->avf, AV_LOG_TRACE, "video resolution: %d x %d\n",
                     s->video_width, s->video_height);
             break;
 
@@ -414,17 +440,17 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
         case OPCODE_UNKNOWN_13:
         case OPCODE_UNKNOWN_14:
         case OPCODE_UNKNOWN_15:
-            av_log(NULL, AV_LOG_TRACE, "unknown (but documented) opcode %02X\n", opcode_type);
+            av_log(s->avf, AV_LOG_TRACE, "unknown (but documented) opcode %02X\n", opcode_type);
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_SEND_BUFFER:
-            av_log(NULL, AV_LOG_TRACE, "send buffer\n");
+            av_log(s->avf, AV_LOG_TRACE, "send buffer\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_AUDIO_FRAME:
-            av_log(NULL, AV_LOG_TRACE, "audio frame\n");
+            av_log(s->avf, AV_LOG_TRACE, "audio frame\n");
 
             /* log position and move on for now */
             s->audio_chunk_offset = avio_tell(pb);
@@ -433,26 +459,26 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             break;
 
         case OPCODE_SILENCE_FRAME:
-            av_log(NULL, AV_LOG_TRACE, "silence frame\n");
+            av_log(s->avf, AV_LOG_TRACE, "silence frame\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_INIT_VIDEO_MODE:
-            av_log(NULL, AV_LOG_TRACE, "initialize video mode\n");
+            av_log(s->avf, AV_LOG_TRACE, "initialize video mode\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_CREATE_GRADIENT:
-            av_log(NULL, AV_LOG_TRACE, "create gradient\n");
+            av_log(s->avf, AV_LOG_TRACE, "create gradient\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_SET_PALETTE:
-            av_log(NULL, AV_LOG_TRACE, "set palette\n");
+            av_log(s->avf, AV_LOG_TRACE, "set palette\n");
             /* check for the logical maximum palette size
              * (3 * 256 + 4 bytes) */
             if (opcode_size > 0x304 || opcode_size < 4) {
-                av_log(NULL, AV_LOG_TRACE, "demux_ipmovie: set_palette opcode with invalid size\n");
+                av_log(s->avf, AV_LOG_TRACE, "demux_ipmovie: set_palette opcode with invalid size\n");
                 chunk_type = CHUNK_BAD;
                 break;
             }
@@ -467,7 +493,7 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             /* sanity check (since they are 16 bit values) */
             if (   (first_color > 0xFF) || (last_color > 0xFF)
                 || (last_color - first_color + 1)*3 + 4 > opcode_size) {
-                av_log(NULL, AV_LOG_TRACE, "demux_ipmovie: set_palette indexes out of range (%d -> %d)\n",
+                av_log(s->avf, AV_LOG_TRACE, "demux_ipmovie: set_palette indexes out of range (%d -> %d)\n",
                     first_color, last_color);
                 chunk_type = CHUNK_BAD;
                 break;
@@ -486,12 +512,12 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             break;
 
         case OPCODE_SET_PALETTE_COMPRESSED:
-            av_log(NULL, AV_LOG_TRACE, "set palette compressed\n");
+            av_log(s->avf, AV_LOG_TRACE, "set palette compressed\n");
             avio_skip(pb, opcode_size);
             break;
 
         case OPCODE_SET_DECODING_MAP:
-            av_log(NULL, AV_LOG_TRACE, "set decoding map\n");
+            av_log(s->avf, AV_LOG_TRACE, "set decoding map\n");
 
             /* log position and move on for now */
             s->decode_map_chunk_offset = avio_tell(pb);
@@ -500,7 +526,7 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             break;
 
         case OPCODE_VIDEO_DATA:
-            av_log(NULL, AV_LOG_TRACE, "set video data\n");
+            av_log(s->avf, AV_LOG_TRACE, "set video data\n");
 
             /* log position and move on for now */
             s->video_chunk_offset = avio_tell(pb);
@@ -509,13 +535,16 @@ static int process_ipmovie_chunk(IPMVEContext *s, AVIOContext *pb,
             break;
 
         default:
-            av_log(NULL, AV_LOG_TRACE, "*** unknown opcode type\n");
+            av_log(s->avf, AV_LOG_TRACE, "*** unknown opcode type\n");
             chunk_type = CHUNK_BAD;
             break;
 
         }
     }
 
+    if (s->avf->nb_streams == 1 && s->audio_type)
+        init_audio(s->avf);
+
     /* make a note of where the stream is sitting */
     s->next_chunk_offset = avio_tell(pb);
 
@@ -551,6 +580,8 @@ static int ipmovie_read_header(AVFormatContext *s)
     int chunk_type, i;
     uint8_t signature_buffer[sizeof(signature)];
 
+    ipmovie->avf = s;
+
     avio_read(pb, signature_buffer, sizeof(signature_buffer));
     while (memcmp(signature_buffer, signature, sizeof(signature))) {
         memmove(signature_buffer, signature_buffer + 1, sizeof(signature_buffer) - 1);
@@ -600,25 +631,9 @@ static int ipmovie_read_header(AVFormatContext *s)
     st->codec->bits_per_coded_sample = ipmovie->video_bpp;
 
     if (ipmovie->audio_type) {
-        st = avformat_new_stream(s, NULL);
-        if (!st)
-            return AVERROR(ENOMEM);
-        avpriv_set_pts_info(st, 32, 1, ipmovie->audio_sample_rate);
-        ipmovie->audio_stream_index = st->index;
-        st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
-        st->codec->codec_id = ipmovie->audio_type;
-        st->codec->codec_tag = 0;  /* no tag */
-        st->codec->channels = ipmovie->audio_channels;
-        st->codec->channel_layout = st->codec->channels == 1 ? AV_CH_LAYOUT_MONO :
-                                                               AV_CH_LAYOUT_STEREO;
-        st->codec->sample_rate = ipmovie->audio_sample_rate;
-        st->codec->bits_per_coded_sample = ipmovie->audio_bits;
-        st->codec->bit_rate = st->codec->channels * st->codec->sample_rate *
-            st->codec->bits_per_coded_sample;
-        if (st->codec->codec_id == AV_CODEC_ID_INTERPLAY_DPCM)
-            st->codec->bit_rate /= 2;
-        st->codec->block_align = st->codec->channels * st->codec->bits_per_coded_sample;
-    }
+        return init_audio(s);
+    } else
+       s->ctx_flags |= AVFMTCTX_NOHEADER;
 
     return 0;
 }
@@ -643,7 +658,7 @@ static int ipmovie_read_packet(AVFormatContext *s,
     else if (ret == CHUNK_INIT_VIDEO || ret == CHUNK_INIT_AUDIO)
         continue;
     else
-        ret = -1;
+        continue;
 
     return ret;
     }
diff --git a/libavformat/isom.c b/libavformat/isom.c
index c99f2dc5..2ca12658 100644
--- a/libavformat/isom.c
+++ b/libavformat/isom.c
@@ -110,7 +110,7 @@ const AVCodecTag ff_codec_movvideo_tags[] = {
 
     { AV_CODEC_ID_MJPEG,  MKTAG('j', 'p', 'e', 'g') }, /* PhotoJPEG */
     { AV_CODEC_ID_MJPEG,  MKTAG('m', 'j', 'p', 'a') }, /* Motion-JPEG (format A) */
-    { AV_CODEC_ID_MJPEG,  MKTAG('A', 'V', 'D', 'J') }, /* MJPEG with alpha-channel (AVID JFIF meridien compressed) */
+    { AV_CODEC_ID_AVRN ,  MKTAG('A', 'V', 'D', 'J') }, /* MJPEG with alpha-channel (AVID JFIF meridien compressed) */
 /*  { AV_CODEC_ID_MJPEG,  MKTAG('A', 'V', 'R', 'n') }, *//* MJPEG with alpha-channel (AVID ABVB/Truevision NuVista) */
     { AV_CODEC_ID_MJPEG,  MKTAG('d', 'm', 'b', '1') }, /* Motion JPEG OpenDML */
     { AV_CODEC_ID_MJPEGB, MKTAG('m', 'j', 'p', 'b') }, /* Motion-JPEG (format B) */
@@ -160,6 +160,9 @@ const AVCodecTag ff_codec_movvideo_tags[] = {
     { AV_CODEC_ID_HEVC, MKTAG('h', 'v', 'c', '1') }, /* HEVC/H.265 which indicates parameter sets shall not be in ES */
 
     { AV_CODEC_ID_H264, MKTAG('a', 'v', 'c', '1') }, /* AVC-1/H.264 */
+    { AV_CODEC_ID_H264, MKTAG('a', 'v', 'c', '2') },
+    { AV_CODEC_ID_H264, MKTAG('a', 'v', 'c', '3') },
+    { AV_CODEC_ID_H264, MKTAG('a', 'v', 'c', '4') },
     { AV_CODEC_ID_H264, MKTAG('a', 'i', '5', 'p') }, /* AVC-Intra  50M 720p24/30/60 */
     { AV_CODEC_ID_H264, MKTAG('a', 'i', '5', 'q') }, /* AVC-Intra  50M 720p25/50 */
     { AV_CODEC_ID_H264, MKTAG('a', 'i', '5', '2') }, /* AVC-Intra  50M 1080p25/50 */
@@ -240,6 +243,7 @@ const AVCodecTag ff_codec_movvideo_tags[] = {
 
     { AV_CODEC_ID_DIRAC,     MKTAG('d', 'r', 'a', 'c') },
     { AV_CODEC_ID_DNXHD,     MKTAG('A', 'V', 'd', 'n') }, /* AVID DNxHD */
+    { AV_CODEC_ID_DNXHD,     MKTAG('A', 'V', 'd', 'h') }, /* AVID DNxHR */
     { AV_CODEC_ID_H263,      MKTAG('H', '2', '6', '3') },
     { AV_CODEC_ID_MSMPEG4V3, MKTAG('3', 'I', 'V', 'D') }, /* 3ivx DivX Doctor */
     { AV_CODEC_ID_RAWVIDEO,  MKTAG('A', 'V', '1', 'x') }, /* AVID 1:1x */
@@ -258,6 +262,13 @@ const AVCodecTag ff_codec_movvideo_tags[] = {
 
     { AV_CODEC_ID_AIC, MKTAG('i', 'c', 'o', 'd') },
 
+    { AV_CODEC_ID_HAP, MKTAG('H', 'a', 'p', '1') },
+    { AV_CODEC_ID_HAP, MKTAG('H', 'a', 'p', '5') },
+    { AV_CODEC_ID_HAP, MKTAG('H', 'a', 'p', 'Y') },
+
+    { AV_CODEC_ID_DXV, MKTAG('D', 'X', 'D', '3') },
+    { AV_CODEC_ID_DXV, MKTAG('D', 'X', 'D', 'I') },
+
     { AV_CODEC_ID_NONE, 0 },
 };
 
@@ -447,19 +458,24 @@ static const AVCodecTag mp4_audio_types[] = {
 int ff_mp4_read_dec_config_descr(AVFormatContext *fc, AVStream *st, AVIOContext *pb)
 {
     enum AVCodecID codec_id;
+    unsigned v;
     int len, tag;
     int ret;
     int object_type_id = avio_r8(pb);
     avio_r8(pb); /* stream type */
     avio_rb24(pb); /* buffer size db */
-    avio_rb32(pb); /* max bitrate */
-    avio_rb32(pb); /* avg bitrate */
 
     if(avcodec_is_open(st->codec)) {
         av_log(fc, AV_LOG_DEBUG, "codec open in read_dec_config_descr\n");
         return -1;
     }
 
+    v = avio_rb32(pb);
+    if (v < INT32_MAX)
+        st->codec->rc_max_rate = v;
+
+    st->codec->bit_rate = avio_rb32(pb); /* avg bitrate */
+
     codec_id= ff_codec_get_id(ff_mp4_obj_type, object_type_id);
     if (codec_id)
         st->codec->codec_id= codec_id;
diff --git a/libavformat/isom.h b/libavformat/isom.h
index 5d48989f..99bc7bed 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -37,6 +37,8 @@ extern const AVCodecTag ff_codec_movsubtitle_tags[];
 int ff_mov_iso639_to_lang(const char lang[4], int mp4);
 int ff_mov_lang_to_iso639(unsigned code, char to[4]);
 
+struct AVAESCTR;
+
 /* the QuickTime file format is quite convoluted...
  * it has lots of index tables, each indexing something in another one...
  * Here we just use what is needed to read the chunks
@@ -103,6 +105,7 @@ typedef struct MOVSbgp {
 typedef struct MOVFragmentIndexItem {
     int64_t moof_offset;
     int64_t time;
+    int headers_read;
 } MOVFragmentIndexItem;
 
 typedef struct MOVFragmentIndex {
@@ -167,6 +170,15 @@ typedef struct MOVStreamContext {
     int64_t duration_for_fps;
 
     int32_t *display_matrix;
+    uint32_t format;
+
+    struct {
+        int use_subsamples;
+        uint8_t* auxiliary_info;
+        uint8_t* auxiliary_info_end;
+        uint8_t* auxiliary_info_pos;
+        struct AVAESCTR* aes_ctr;
+    } cenc;
 } MOVStreamContext;
 
 typedef struct MOVContext {
@@ -176,6 +188,10 @@ typedef struct MOVContext {
     int64_t duration;     ///< duration of the longest track
     int found_moov;       ///< 'moov' atom has been found
     int found_mdat;       ///< 'mdat' atom has been found
+    int found_hdlr_mdta;  ///< 'hdlr' atom with type 'mdta' has been found
+    int trak_index;       ///< Index of the current 'trak'
+    char **meta_keys;
+    unsigned meta_keys_count;
     DVDemuxContext *dv_demux;
     AVFormatContext *dv_fctx;
     int isom;             ///< 1 if file is ISO Media (mp4/3gp)
@@ -183,9 +199,11 @@ typedef struct MOVContext {
     MOVTrackExt *trex_data;
     unsigned trex_count;
     int itunes_metadata;  ///< metadata are itunes style
+    int handbrake_version;
     int chapter_track;
     int use_absolute_path;
     int ignore_editlist;
+    int ignore_chapters;
     int seek_individually;
     int64_t next_root_atom; ///< offset of the next root atom
     int export_all;
@@ -197,7 +215,19 @@ typedef struct MOVContext {
     int has_looked_for_mfra;
     MOVFragmentIndex** fragment_index_data;
     unsigned fragment_index_count;
+    int fragment_index_complete;
     int atom_depth;
+    unsigned int aax_mode;  ///< 'aax' file has been detected
+    uint8_t file_key[20];
+    uint8_t file_iv[20];
+    void *activation_bytes;
+    int activation_bytes_size;
+    void *audible_fixed_key;
+    int audible_fixed_key_size;
+    struct AVAES *aes_decrypt;
+    uint8_t *decryption_key;
+    int decryption_key_len;
+    int enable_drefs;
 } MOVContext;
 
 int ff_mp4_read_descr_len(AVIOContext *pb);
diff --git a/libavformat/iv8.c b/libavformat/iv8.c
index 38b79609..f1e351cb 100644
--- a/libavformat/iv8.c
+++ b/libavformat/iv8.c
@@ -92,7 +92,7 @@ static int read_packet(AVFormatContext *s, AVPacket *pkt)
             ret = av_append_packet(s->pb, pkt, size);
             if (ret < 0) {
                 av_log(s, AV_LOG_ERROR, "failed to grow packet\n");
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 return ret;
             }
         }
diff --git a/libavformat/ivfenc.c b/libavformat/ivfenc.c
index 1d76c5c7..484d87d0 100644
--- a/libavformat/ivfenc.c
+++ b/libavformat/ivfenc.c
@@ -20,6 +20,11 @@
 #include "avformat.h"
 #include "libavutil/intreadwrite.h"
 
+typedef struct IVFEncContext {
+    unsigned frame_cnt;
+    uint64_t last_pts, sum_delta_pts;
+} IVFEncContext;
+
 static int ivf_write_header(AVFormatContext *s)
 {
     AVCodecContext *ctx;
@@ -43,7 +48,7 @@ static int ivf_write_header(AVFormatContext *s)
     avio_wl16(pb, ctx->height);
     avio_wl32(pb, s->streams[0]->time_base.den);
     avio_wl32(pb, s->streams[0]->time_base.num);
-    avio_wl64(pb, s->streams[0]->duration); // TODO: duration or number of frames?!?
+    avio_wl64(pb, 0xFFFFFFFFFFFFFFFFULL);
 
     return 0;
 }
@@ -51,14 +56,37 @@ static int ivf_write_header(AVFormatContext *s)
 static int ivf_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     AVIOContext *pb = s->pb;
+    IVFEncContext *ctx = s->priv_data;
+
     avio_wl32(pb, pkt->size);
     avio_wl64(pb, pkt->pts);
     avio_write(pb, pkt->data, pkt->size);
+    if (ctx->frame_cnt)
+        ctx->sum_delta_pts += pkt->pts - ctx->last_pts;
+    ctx->frame_cnt++;
+    ctx->last_pts = pkt->pts;
+
+    return 0;
+}
+
+static int ivf_write_trailer(AVFormatContext *s)
+{
+    AVIOContext *pb = s->pb;
+    IVFEncContext *ctx = s->priv_data;
+
+    if (pb->seekable && ctx->frame_cnt > 1) {
+        size_t end = avio_tell(pb);
+
+        avio_seek(pb, 24, SEEK_SET);
+        avio_wl64(pb, ctx->frame_cnt * ctx->sum_delta_pts / (ctx->frame_cnt - 1));
+        avio_seek(pb, end, SEEK_SET);
+    }
 
     return 0;
 }
 
 AVOutputFormat ff_ivf_muxer = {
+    .priv_data_size = sizeof(IVFEncContext),
     .name         = "ivf",
     .long_name    = NULL_IF_CONFIG_SMALL("On2 IVF"),
     .extensions   = "ivf",
@@ -66,4 +94,5 @@ AVOutputFormat ff_ivf_muxer = {
     .video_codec  = AV_CODEC_ID_VP8,
     .write_header = ivf_write_header,
     .write_packet = ivf_write_packet,
+    .write_trailer = ivf_write_trailer,
 };
diff --git a/libavformat/jacosubdec.c b/libavformat/jacosubdec.c
index 1ca00558..0436a932 100644
--- a/libavformat/jacosubdec.c
+++ b/libavformat/jacosubdec.c
@@ -101,7 +101,7 @@ static int jacosub_read_close(AVFormatContext *s)
 }
 
 static const char *read_ts(JACOsubContext *jacosub, const char *buf,
-                           int64_t *start, int *duration)
+                           int64_t *start, int64_t *duration)
 {
     int len;
     unsigned hs, ms, ss, fs; // hours, minutes, seconds, frame start
@@ -172,7 +172,7 @@ static int jacosub_read_header(AVFormatContext *s)
 
     jacosub->timeres = 30;
 
-    av_bprint_init(&header, 1024+FF_INPUT_BUFFER_PADDING_SIZE, 4096);
+    av_bprint_init(&header, 1024+AV_INPUT_BUFFER_PADDING_SIZE, 4096);
 
     while (!avio_feof(pb)) {
         int cmd_len;
@@ -240,7 +240,7 @@ static int jacosub_read_header(AVFormatContext *s)
         AVPacket *sub = &jacosub->q.subs[i];
         read_ts(jacosub, sub->data, &sub->pts, &sub->duration);
     }
-    ff_subtitles_queue_finalize(&jacosub->q);
+    ff_subtitles_queue_finalize(s, &jacosub->q);
 
     return 0;
 fail:
diff --git a/libavformat/jvdec.c b/libavformat/jvdec.c
index 64d31e0e..a31c7236 100644
--- a/libavformat/jvdec.c
+++ b/libavformat/jvdec.c
@@ -54,7 +54,7 @@ typedef struct JVDemuxContext {
 
 static int read_probe(AVProbeData *pd)
 {
-    if (pd->buf[0] == 'J' && pd->buf[1] == 'V' && strlen(MAGIC) <= pd->buf_size - 4 &&
+    if (pd->buf[0] == 'J' && pd->buf[1] == 'V' && strlen(MAGIC) + 4 <= pd->buf_size &&
         !memcmp(pd->buf + 4, MAGIC, strlen(MAGIC)))
         return AVPROBE_SCORE_MAX;
     return 0;
@@ -196,7 +196,7 @@ static int read_packet(AVFormatContext *s, AVPacket *pkt)
                     return ret;
                 if (ret < size) {
                     memset(pkt->data + JV_PREAMBLE_SIZE + ret, 0,
-                           FF_INPUT_BUFFER_PADDING_SIZE);
+                           AV_INPUT_BUFFER_PADDING_SIZE);
                     pkt->flags |= AV_PKT_FLAG_CORRUPT;
                 }
                 pkt->size         = ret + JV_PREAMBLE_SIZE;
diff --git a/libavformat/latmenc.c b/libavformat/latmenc.c
index 17dbf33b..db6977e5 100644
--- a/libavformat/latmenc.c
+++ b/libavformat/latmenc.c
@@ -124,7 +124,8 @@ static void latm_write_frame_header(AVFormatContext *s, PutBitContext *bs)
 
             if (!ctx->channel_conf) {
                 GetBitContext gb;
-                init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
+                int ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
+                av_assert0(ret >= 0); // extradata size has been checked already, so this should not fail
                 skip_bits_long(&gb, ctx->off + 3);
                 avpriv_copy_pce_data(bs, &gb);
             }
diff --git a/libavformat/libavformat.v b/libavformat/libavformat.v
index e90aef79..a00a3093 100644
--- a/libavformat/libavformat.v
+++ b/libavformat/libavformat.v
@@ -10,9 +10,6 @@ LIBAVFORMAT_$MAJOR {
                 ffio_set_buf_size;
                 ffurl_close;
                 ffurl_open;
-                ffurl_read_complete;
-                ffurl_seek;
-                ffurl_size;
                 ffurl_write;
                 #those are deprecated, remove on next bump
                 url_feof;
diff --git a/libavformat/libmodplug.c b/libavformat/libmodplug.c
index 158a6303..75699e89 100644
--- a/libavformat/libmodplug.c
+++ b/libavformat/libmodplug.c
@@ -325,7 +325,7 @@ static int modplug_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     pkt->size = ModPlug_Read(modplug->f, pkt->data, AUDIO_PKT_SIZE);
     if (pkt->size <= 0) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return pkt->size == 0 ? AVERROR_EOF : AVERROR(EIO);
     }
     return 0;
diff --git a/libavformat/libnut.c b/libavformat/libnut.c
index 4a9a21a7..92623ed4 100644
--- a/libavformat/libnut.c
+++ b/libavformat/libnut.c
@@ -179,7 +179,7 @@ static size_t av_read(void * h, size_t len, uint8_t * buf) {
     return avio_read(bc, buf, len);
 }
 
-static off_t av_seek(void * h, long long pos, int whence) {
+static off_t av_seek(void * h, int64_t pos, int whence) {
     AVIOContext * bc = h;
     if (whence == SEEK_END) {
         pos = avio_size(bc) + pos;
diff --git a/libavformat/libquvi.c b/libavformat/libquvi.c
deleted file mode 100644
index 71516945..00000000
--- a/libavformat/libquvi.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright (c) 2013 Clément Bœsch
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <quvi/quvi.h>
-
-#include "libavformat/avformat.h"
-#include "libavformat/internal.h"
-#include "libavutil/avassert.h"
-#include "libavutil/opt.h"
-
-typedef struct {
-    const AVClass *class;
-    char *format;
-    AVFormatContext *fmtctx;
-} LibQuviContext;
-
-#define OFFSET(x) offsetof(LibQuviContext, x)
-#define FLAGS AV_OPT_FLAG_DECODING_PARAM
-static const AVOption libquvi_options[] = {
-    { "format", "request specific format", OFFSET(format), AV_OPT_TYPE_STRING, {.str="best"}, .flags = FLAGS },
-    { NULL }
-};
-
-static const AVClass libquvi_context_class = {
-    .class_name     = "libquvi",
-    .item_name      = av_default_item_name,
-    .option         = libquvi_options,
-    .version        = LIBAVUTIL_VERSION_INT,
-};
-
-static int libquvi_close(AVFormatContext *s)
-{
-    LibQuviContext *qc = s->priv_data;
-    if (qc->fmtctx)
-        avformat_close_input(&qc->fmtctx);
-    return 0;
-}
-
-static int libquvi_read_header(AVFormatContext *s)
-{
-    int i, ret;
-    quvi_t q;
-    quvi_media_t m;
-    QUVIcode rc;
-    LibQuviContext *qc = s->priv_data;
-    char *media_url, *pagetitle;
-
-    rc = quvi_init(&q);
-    if (rc != QUVI_OK) {
-        av_log(s, AV_LOG_ERROR, "%s\n", quvi_strerror(q, rc));
-        return AVERROR_EXTERNAL;
-    }
-
-    quvi_setopt(q, QUVIOPT_FORMAT, qc->format);
-
-    rc = quvi_parse(q, s->filename, &m);
-    if (rc != QUVI_OK) {
-        av_log(s, AV_LOG_ERROR, "%s\n", quvi_strerror(q, rc));
-        ret = AVERROR_EXTERNAL;
-        goto err_quvi_close;
-    }
-
-    rc = quvi_getprop(m, QUVIPROP_MEDIAURL, &media_url);
-    if (rc != QUVI_OK) {
-        av_log(s, AV_LOG_ERROR, "%s\n", quvi_strerror(q, rc));
-        ret = AVERROR_EXTERNAL;
-        goto err_quvi_cleanup;
-    }
-
-    if (!(qc->fmtctx = avformat_alloc_context())) {
-        ret = AVERROR(ENOMEM);
-        goto err_quvi_cleanup;
-    }
-
-    if ((ret = ff_copy_whitelists(qc->fmtctx, s)) < 0) {
-        avformat_free_context(qc->fmtctx);
-        qc->fmtctx = NULL;
-        goto err_quvi_cleanup;
-    }
-
-    ret = avformat_open_input(&qc->fmtctx, media_url, NULL, NULL);
-    if (ret < 0)
-        goto err_quvi_cleanup;
-
-    rc = quvi_getprop(m, QUVIPROP_PAGETITLE, &pagetitle);
-    if (rc == QUVI_OK)
-        av_dict_set(&s->metadata, "title", pagetitle, 0);
-
-    for (i = 0; i < qc->fmtctx->nb_streams; i++) {
-        AVStream *st = avformat_new_stream(s, NULL);
-        AVStream *ist = qc->fmtctx->streams[i];
-        if (!st) {
-            ret = AVERROR(ENOMEM);
-            goto err_close_input;
-        }
-        avpriv_set_pts_info(st, ist->pts_wrap_bits, ist->time_base.num, ist->time_base.den);
-        avcodec_copy_context(st->codec, qc->fmtctx->streams[i]->codec);
-    }
-
-    return 0;
-
-  err_close_input:
-    avformat_close_input(&qc->fmtctx);
-  err_quvi_cleanup:
-    quvi_parse_close(&m);
-  err_quvi_close:
-    quvi_close(&q);
-    return ret;
-}
-
-static int libquvi_read_packet(AVFormatContext *s, AVPacket *pkt)
-{
-    LibQuviContext *qc = s->priv_data;
-    return av_read_frame(qc->fmtctx, pkt);
-}
-
-static int libquvi_read_seek(AVFormatContext *s, int stream_index, int64_t timestamp, int flags)
-{
-    LibQuviContext *qc = s->priv_data;
-    return av_seek_frame(qc->fmtctx, stream_index, timestamp, flags);
-}
-
-static int libquvi_probe(AVProbeData *p)
-{
-    int score;
-    quvi_t q;
-    QUVIcode rc;
-
-    rc = quvi_init(&q);
-    if (rc != QUVI_OK)
-        return AVERROR(ENOMEM);
-    score = quvi_supported(q, (char *)p->filename) == QUVI_OK ? AVPROBE_SCORE_EXTENSION : 0;
-    quvi_close(&q);
-    return score;
-}
-
-AVInputFormat ff_libquvi_demuxer = {
-    .name           = "libquvi",
-    .long_name      = NULL_IF_CONFIG_SMALL("libquvi demuxer"),
-    .priv_data_size = sizeof(LibQuviContext),
-    .read_probe     = libquvi_probe,
-    .read_header    = libquvi_read_header,
-    .read_packet    = libquvi_read_packet,
-    .read_close     = libquvi_close,
-    .read_seek      = libquvi_read_seek,
-    .priv_class     = &libquvi_context_class,
-    .flags          = AVFMT_NOFILE,
-};
diff --git a/libavformat/libsmbclient.c b/libavformat/libsmbclient.c
index 1af81636..84fef7f1 100644
--- a/libavformat/libsmbclient.c
+++ b/libavformat/libsmbclient.c
@@ -287,6 +287,67 @@ static int libsmbc_close_dir(URLContext *h)
     return 0;
 }
 
+static int libsmbc_delete(URLContext *h)
+{
+    LIBSMBContext *libsmbc = h->priv_data;
+    int ret;
+    struct stat st;
+
+    if ((ret = libsmbc_connect(h)) < 0)
+        goto cleanup;
+
+    if ((libsmbc->fd = smbc_open(h->filename, O_WRONLY, 0666)) < 0) {
+        ret = AVERROR(errno);
+        goto cleanup;
+    }
+
+    if (smbc_fstat(libsmbc->fd, &st) < 0) {
+        ret = AVERROR(errno);
+        goto cleanup;
+    }
+
+    smbc_close(libsmbc->fd);
+    libsmbc->fd = -1;
+
+    if (S_ISDIR(st.st_mode)) {
+        if (smbc_rmdir(h->filename) < 0) {
+            ret = AVERROR(errno);
+            goto cleanup;
+        }
+    } else {
+        if (smbc_unlink(h->filename) < 0) {
+            ret = AVERROR(errno);
+            goto cleanup;
+        }
+    }
+
+    ret = 0;
+
+cleanup:
+    libsmbc_close(h);
+    return ret;
+}
+
+static int libsmbc_move(URLContext *h_src, URLContext *h_dst)
+{
+    LIBSMBContext *libsmbc = h_src->priv_data;
+    int ret;
+
+    if ((ret = libsmbc_connect(h_src)) < 0)
+        goto cleanup;
+
+    if ((libsmbc->dh = smbc_rename(h_src->filename, h_dst->filename)) < 0) {
+        ret = AVERROR(errno);
+        goto cleanup;
+    }
+
+    ret = 0;
+
+cleanup:
+    libsmbc_close(h_src);
+    return ret;
+}
+
 #define OFFSET(x) offsetof(LIBSMBContext, x)
 #define D AV_OPT_FLAG_DECODING_PARAM
 #define E AV_OPT_FLAG_ENCODING_PARAM
@@ -311,6 +372,8 @@ URLProtocol ff_libsmbclient_protocol = {
     .url_write           = libsmbc_write,
     .url_seek            = libsmbc_seek,
     .url_close           = libsmbc_close,
+    .url_delete          = libsmbc_delete,
+    .url_move            = libsmbc_move,
     .url_open_dir        = libsmbc_open_dir,
     .url_read_dir        = libsmbc_read_dir,
     .url_close_dir       = libsmbc_close_dir,
diff --git a/libavformat/libssh.c b/libavformat/libssh.c
index fac61142..3c056f87 100644
--- a/libavformat/libssh.c
+++ b/libavformat/libssh.c
@@ -24,6 +24,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/opt.h"
 #include "libavutil/attributes.h"
+#include "libavformat/avio.h"
 #include "avformat.h"
 #include "internal.h"
 #include "url.h"
@@ -33,6 +34,7 @@ typedef struct {
     ssh_session session;
     sftp_session sftp;
     sftp_file file;
+    sftp_dir dir;
     int64_t filesize;
     int rw_timeout;
     int trunc;
@@ -187,11 +189,11 @@ static av_cold int libssh_close(URLContext *h)
     return 0;
 }
 
-static av_cold int libssh_open(URLContext *h, const char *url, int flags)
+static av_cold int libssh_connect(URLContext *h, const char *url, char *path, size_t path_size)
 {
     LIBSSHContext *libssh = h->priv_data;
-    char proto[10], path[MAX_URL_SIZE], hostname[1024], credencials[1024];
-    int port, ret;
+    char proto[10], hostname[1024], credencials[1024];
+    int port = 22, ret;
     const char *user = NULL, *pass = NULL;
     char *end = NULL;
 
@@ -199,23 +201,38 @@ static av_cold int libssh_open(URLContext *h, const char *url, int flags)
                  credencials, sizeof(credencials),
                  hostname, sizeof(hostname),
                  &port,
-                 path, sizeof(path),
+                 path, path_size,
                  url);
 
+    if (!(*path))
+        av_strlcpy(path, "/", path_size);
+
     // a port of 0 will use a port from ~/.ssh/config or the default value 22
     if (port < 0 || port > 65535)
         port = 0;
 
     if ((ret = libssh_create_ssh_session(libssh, hostname, port)) < 0)
-        goto fail;
+        return ret;
 
     user = av_strtok(credencials, ":", &end);
     pass = av_strtok(end, ":", &end);
 
     if ((ret = libssh_authentication(libssh, user, pass)) < 0)
-        goto fail;
+        return ret;
 
     if ((ret = libssh_create_sftp_session(libssh)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static av_cold int libssh_open(URLContext *h, const char *url, int flags)
+{
+    int ret;
+    LIBSSHContext *libssh = h->priv_data;
+    char path[MAX_URL_SIZE];
+
+    if ((ret = libssh_connect(h, url, path, sizeof(path))) < 0)
         goto fail;
 
     if ((ret = libssh_open_file(libssh, flags, path)) < 0)
@@ -293,6 +310,168 @@ static int libssh_write(URLContext *h, const unsigned char *buf, int size)
     return bytes_written;
 }
 
+static int libssh_open_dir(URLContext *h)
+{
+    LIBSSHContext *libssh = h->priv_data;
+    int ret;
+    char path[MAX_URL_SIZE];
+
+    if ((ret = libssh_connect(h, h->filename, path, sizeof(path))) < 0)
+        goto fail;
+
+    if (!(libssh->dir = sftp_opendir(libssh->sftp, path))) {
+        av_log(libssh, AV_LOG_ERROR, "Error opening sftp dir: %s\n", ssh_get_error(libssh->session));
+        ret = AVERROR(EIO);
+        goto fail;
+    }
+
+    return 0;
+
+  fail:
+    libssh_close(h);
+    return ret;
+}
+
+static int libssh_read_dir(URLContext *h, AVIODirEntry **next)
+{
+    LIBSSHContext *libssh = h->priv_data;
+    sftp_attributes attr = NULL;
+    AVIODirEntry *entry;
+
+    *next = entry = ff_alloc_dir_entry();
+    if (!entry)
+        return AVERROR(ENOMEM);
+
+    do {
+        if (attr)
+            sftp_attributes_free(attr);
+        attr = sftp_readdir(libssh->sftp, libssh->dir);
+        if (!attr) {
+            av_freep(next);
+            if (sftp_dir_eof(libssh->dir))
+                return 0;
+            return AVERROR(EIO);
+        }
+    } while (!strcmp(attr->name, ".") || !strcmp(attr->name, ".."));
+
+    entry->name = av_strdup(attr->name);
+    entry->group_id = attr->gid;
+    entry->user_id = attr->uid;
+    entry->size = attr->size;
+    entry->access_timestamp = INT64_C(1000000) * attr->atime;
+    entry->modification_timestamp = INT64_C(1000000) * attr->mtime;
+    entry->filemode = attr->permissions & 0777;
+    switch(attr->type) {
+    case SSH_FILEXFER_TYPE_REGULAR:
+        entry->type = AVIO_ENTRY_FILE;
+        break;
+    case SSH_FILEXFER_TYPE_DIRECTORY:
+        entry->type = AVIO_ENTRY_DIRECTORY;
+        break;
+    case SSH_FILEXFER_TYPE_SYMLINK:
+        entry->type = AVIO_ENTRY_SYMBOLIC_LINK;
+        break;
+    case SSH_FILEXFER_TYPE_SPECIAL:
+        /* Special type includes: sockets, char devices, block devices and pipes.
+           It is probably better to return unknown type, to not confuse anybody. */
+    case SSH_FILEXFER_TYPE_UNKNOWN:
+    default:
+        entry->type = AVIO_ENTRY_UNKNOWN;
+    }
+    sftp_attributes_free(attr);
+    return 0;
+}
+
+static int libssh_close_dir(URLContext *h)
+{
+    LIBSSHContext *libssh = h->priv_data;
+    if (libssh->dir)
+        sftp_closedir(libssh->dir);
+    libssh->dir = NULL;
+    libssh_close(h);
+    return 0;
+}
+
+static int libssh_delete(URLContext *h)
+{
+    int ret;
+    LIBSSHContext *libssh = h->priv_data;
+    sftp_attributes attr = NULL;
+    char path[MAX_URL_SIZE];
+
+    if ((ret = libssh_connect(h, h->filename, path, sizeof(path))) < 0)
+        goto cleanup;
+
+    if (!(attr = sftp_stat(libssh->sftp, path))) {
+        ret = AVERROR(sftp_get_error(libssh->sftp));
+        goto cleanup;
+    }
+
+    if (attr->type == SSH_FILEXFER_TYPE_DIRECTORY) {
+        if (sftp_rmdir(libssh->sftp, path) < 0) {
+            ret = AVERROR(sftp_get_error(libssh->sftp));
+            goto cleanup;
+        }
+    } else {
+        if (sftp_unlink(libssh->sftp, path) < 0) {
+            ret = AVERROR(sftp_get_error(libssh->sftp));
+            goto cleanup;
+        }
+    }
+
+    ret = 0;
+
+cleanup:
+    if (attr)
+        sftp_attributes_free(attr);
+    libssh_close(h);
+    return ret;
+}
+
+static int libssh_move(URLContext *h_src, URLContext *h_dst)
+{
+    int ret;
+    LIBSSHContext *libssh = h_src->priv_data;
+    char path_src[MAX_URL_SIZE], path_dst[MAX_URL_SIZE];
+    char hostname_src[1024], hostname_dst[1024];
+    char credentials_src[1024], credentials_dst[1024];
+    int port_src = 22, port_dst = 22;
+
+    av_url_split(NULL, 0,
+                 credentials_src, sizeof(credentials_src),
+                 hostname_src, sizeof(hostname_src),
+                 &port_src,
+                 path_src, sizeof(path_src),
+                 h_src->filename);
+
+    av_url_split(NULL, 0,
+                 credentials_dst, sizeof(credentials_dst),
+                 hostname_dst, sizeof(hostname_dst),
+                 &port_dst,
+                 path_dst, sizeof(path_dst),
+                 h_dst->filename);
+
+    if (strcmp(credentials_src, credentials_dst) ||
+            strcmp(hostname_src, hostname_dst) ||
+            port_src != port_dst) {
+        return AVERROR(EINVAL);
+    }
+
+    if ((ret = libssh_connect(h_src, h_src->filename, path_src, sizeof(path_src))) < 0)
+        goto cleanup;
+
+    if (sftp_rename(libssh->sftp, path_src, path_dst) < 0) {
+        ret = AVERROR(sftp_get_error(libssh->sftp));
+        goto cleanup;
+    }
+
+    ret = 0;
+
+cleanup:
+    libssh_close(h_src);
+    return ret;
+}
+
 #define OFFSET(x) offsetof(LIBSSHContext, x)
 #define D AV_OPT_FLAG_DECODING_PARAM
 #define E AV_OPT_FLAG_ENCODING_PARAM
@@ -317,6 +496,11 @@ URLProtocol ff_libssh_protocol = {
     .url_write           = libssh_write,
     .url_seek            = libssh_seek,
     .url_close           = libssh_close,
+    .url_delete          = libssh_delete,
+    .url_move            = libssh_move,
+    .url_open_dir        = libssh_open_dir,
+    .url_read_dir        = libssh_read_dir,
+    .url_close_dir       = libssh_close_dir,
     .priv_data_size      = sizeof(LIBSSHContext),
     .priv_data_class     = &libssh_context_class,
     .flags               = URL_PROTOCOL_FLAG_NETWORK,
diff --git a/libavformat/lrcdec.c b/libavformat/lrcdec.c
index df61853d..d3655fcc 100644
--- a/libavformat/lrcdec.c
+++ b/libavformat/lrcdec.c
@@ -210,7 +210,7 @@ static int lrc_read_header(AVFormatContext *s)
             }
         }
     }
-    ff_subtitles_queue_finalize(&lrc->q);
+    ff_subtitles_queue_finalize(s, &lrc->q);
     ff_metadata_conv_ctx(s, NULL, ff_lrc_metadata_conv);
     return 0;
 }
diff --git a/libavformat/lxfdec.c b/libavformat/lxfdec.c
index ce5da820..c00b4bde 100644
--- a/libavformat/lxfdec.c
+++ b/libavformat/lxfdec.c
@@ -130,7 +130,7 @@ static int get_packet_header(AVFormatContext *s)
     version     = bytestream_get_le32(&p);
     header_size = bytestream_get_le32(&p);
     if (version > 1)
-        avpriv_request_sample(s, "Unknown format version %"PRIu32"\n", version);
+        avpriv_request_sample(s, "Format version %"PRIu32, version);
 
     if (header_size < (version ? 72 : 60) ||
         header_size > LXF_MAX_PACKET_HEADER_SIZE ||
@@ -305,7 +305,7 @@ static int lxf_read_packet(AVFormatContext *s, AVPacket *pkt)
     if (stream > 1) {
         av_log(s, AV_LOG_WARNING,
                "got packet with illegal stream index %"PRIu32"\n", stream);
-        return AVERROR(EAGAIN);
+        return FFERROR_REDO;
     }
 
     if (stream == 1 && s->nb_streams < 2) {
@@ -317,7 +317,7 @@ static int lxf_read_packet(AVFormatContext *s, AVPacket *pkt)
         return ret2;
 
     if ((ret2 = avio_read(pb, pkt->data, ret)) != ret) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return ret2 < 0 ? ret2 : AVERROR_EOF;
     }
 
diff --git a/libavformat/matroska.h b/libavformat/matroska.h
index 344b2c32..a654e0c6 100644
--- a/libavformat/matroska.h
+++ b/libavformat/matroska.h
@@ -218,6 +218,7 @@
 #define MATROSKA_ID_CHAPTERDISPLAY      0x80
 #define MATROSKA_ID_CHAPSTRING          0x85
 #define MATROSKA_ID_CHAPLANG            0x437C
+#define MATROSKA_ID_CHAPCOUNTRY         0x437E
 #define MATROSKA_ID_EDITIONUID          0x45BC
 #define MATROSKA_ID_EDITIONFLAGHIDDEN   0x45BD
 #define MATROSKA_ID_EDITIONFLAGDEFAULT  0x45DB
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index ca43c28c..d7882321 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -64,6 +64,8 @@
 #include <zlib.h>
 #endif
 
+#include "qtpalette.h"
+
 typedef enum {
     EBML_NONE,
     EBML_UINT,
@@ -312,6 +314,9 @@ typedef struct MatroskaDemuxContext {
 
     /* WebM DASH Manifest live flag/ */
     int is_live;
+
+    uint32_t palette[AVPALETTE_COUNT];
+    int has_palette;
 } MatroskaDemuxContext;
 
 typedef struct MatroskaBlock {
@@ -474,8 +479,9 @@ static const EbmlSyntax matroska_attachments[] = {
 };
 
 static const EbmlSyntax matroska_chapter_display[] = {
-    { MATROSKA_ID_CHAPSTRING, EBML_UTF8, 0, offsetof(MatroskaChapter, title) },
-    { MATROSKA_ID_CHAPLANG,   EBML_NONE },
+    { MATROSKA_ID_CHAPSTRING,  EBML_UTF8, 0, offsetof(MatroskaChapter, title) },
+    { MATROSKA_ID_CHAPLANG,    EBML_NONE },
+    { MATROSKA_ID_CHAPCOUNTRY, EBML_NONE },
     { 0 }
 };
 
@@ -806,7 +812,7 @@ static int ebml_read_sint(AVIOContext *pb, int size, int64_t *num)
 
         /* big-endian ordering; build up number */
         while (n++ < size)
-            *num = (*num << 8) | avio_r8(pb);
+            *num = ((uint64_t)*num << 8) | avio_r8(pb);
     }
 
     return 0;
@@ -995,6 +1001,15 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
     return res;
 }
 
+static int is_ebml_id_valid(uint32_t id)
+{
+    // Due to endian nonsense in Matroska, the highest byte with any bits set
+    // will contain the leading length bit. This bit in turn identifies the
+    // total byte length of the element by its position within the byte.
+    unsigned int bits = av_log2(id);
+    return id && (bits + 7) / 8 ==  (8 - bits % 8);
+}
+
 /*
  * Allocate and return the entry for the level1 element with the given ID. If
  * an entry already exists, return the existing entry.
@@ -1005,6 +1020,9 @@ static MatroskaLevel1Element *matroska_find_level1_elem(MatroskaDemuxContext *ma
     int i;
     MatroskaLevel1Element *elem;
 
+    if (!is_ebml_id_valid(id))
+        return NULL;
+
     // Some files link to all clusters; useless.
     if (id == MATROSKA_ID_CLUSTER)
         return NULL;
@@ -1392,24 +1410,55 @@ static void matroska_convert_tags(AVFormatContext *s)
     for (i = 0; i < matroska->tags.nb_elem; i++) {
         if (tags[i].target.attachuid) {
             MatroskaAttachment *attachment = matroska->attachments.elem;
-            for (j = 0; j < matroska->attachments.nb_elem; j++)
+            int found = 0;
+            for (j = 0; j < matroska->attachments.nb_elem; j++) {
                 if (attachment[j].uid == tags[i].target.attachuid &&
-                    attachment[j].stream)
+                    attachment[j].stream) {
                     matroska_convert_tag(s, &tags[i].tag,
                                          &attachment[j].stream->metadata, NULL);
+                    found = 1;
+                }
+            }
+            if (!found) {
+                av_log(NULL, AV_LOG_WARNING,
+                       "The tags at index %d refer to a "
+                       "non-existent attachment %"PRId64".\n",
+                       i, tags[i].target.attachuid);
+            }
         } else if (tags[i].target.chapteruid) {
             MatroskaChapter *chapter = matroska->chapters.elem;
-            for (j = 0; j < matroska->chapters.nb_elem; j++)
+            int found = 0;
+            for (j = 0; j < matroska->chapters.nb_elem; j++) {
                 if (chapter[j].uid == tags[i].target.chapteruid &&
-                    chapter[j].chapter)
+                    chapter[j].chapter) {
                     matroska_convert_tag(s, &tags[i].tag,
                                          &chapter[j].chapter->metadata, NULL);
+                    found = 1;
+                }
+            }
+            if (!found) {
+                av_log(NULL, AV_LOG_WARNING,
+                       "The tags at index %d refer to a non-existent chapter "
+                       "%"PRId64".\n",
+                       i, tags[i].target.chapteruid);
+            }
         } else if (tags[i].target.trackuid) {
             MatroskaTrack *track = matroska->tracks.elem;
-            for (j = 0; j < matroska->tracks.nb_elem; j++)
-                if (track[j].uid == tags[i].target.trackuid && track[j].stream)
+            int found = 0;
+            for (j = 0; j < matroska->tracks.nb_elem; j++) {
+                if (track[j].uid == tags[i].target.trackuid &&
+                    track[j].stream) {
                     matroska_convert_tag(s, &tags[i].tag,
                                          &track[j].stream->metadata, NULL);
+                    found = 1;
+               }
+            }
+            if (!found) {
+                av_log(NULL, AV_LOG_WARNING,
+                       "The tags at index %d refer to a non-existent track "
+                       "%"PRId64".\n",
+                       i, tags[i].target.trackuid);
+            }
         } else {
             matroska_convert_tag(s, &tags[i].tag, &s->metadata,
                                  tags[i].target.type);
@@ -1508,10 +1557,11 @@ static void matroska_add_index_entries(MatroskaDemuxContext *matroska)
 
     index_list = &matroska->index;
     index      = index_list->elem;
-    if (index_list->nb_elem &&
-        index[0].time > 1E14 / matroska->time_scale) {
-        av_log(matroska->ctx, AV_LOG_WARNING, "Working around broken index.\n");
-        index_scale = matroska->time_scale;
+    if (index_list->nb_elem < 2)
+        return;
+    if (index[1].time > 1E14 / matroska->time_scale) {
+        av_log(matroska->ctx, AV_LOG_WARNING, "Dropping apparently-broken index.\n");
+        return;
     }
     for (i = 0; i < index_list->nb_elem; i++) {
         EbmlList *pos_list    = &index[i].pos;
@@ -1634,6 +1684,57 @@ static int matroska_parse_flac(AVFormatContext *s,
     return 0;
 }
 
+static void mkv_stereo_mode_display_mul(int stereo_mode, int *h_width, int *h_height)
+{
+    switch (stereo_mode) {
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_MONO:
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_RL:
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_CHECKERBOARD_LR:
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_RL:
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_BOTH_EYES_BLOCK_LR:
+            break;
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_RIGHT_LEFT:
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_LEFT_RIGHT:
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_RL:
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_COL_INTERLEAVED_LR:
+            *h_width = 2;
+            break;
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_BOTTOM_TOP:
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_TOP_BOTTOM:
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_RL:
+        case MATROSKA_VIDEO_STEREOMODE_TYPE_ROW_INTERLEAVED_LR:
+            *h_height = 2;
+            break;
+    }
+}
+
+static int get_qt_codec(MatroskaTrack *track, uint32_t *fourcc, enum AVCodecID *codec_id)
+{
+    const AVCodecTag *codec_tags;
+
+    codec_tags = track->type == MATROSKA_TRACK_TYPE_VIDEO ?
+            ff_codec_movvideo_tags : ff_codec_movaudio_tags;
+
+    /* Normalize noncompliant private data that starts with the fourcc
+     * by expanding/shifting the data by 4 bytes and storing the data
+     * size at the start. */
+    if (ff_codec_get_id(codec_tags, AV_RL32(track->codec_priv.data))) {
+        uint8_t *p = av_realloc(track->codec_priv.data,
+                                track->codec_priv.size + 4);
+        if (!p)
+            return AVERROR(ENOMEM);
+        memmove(p + 4, p, track->codec_priv.size);
+        track->codec_priv.data = p;
+        track->codec_priv.size += 4;
+        AV_WB32(track->codec_priv.data, track->codec_priv.size);
+    }
+
+    *fourcc = AV_RL32(track->codec_priv.data + 4);
+    *codec_id = ff_codec_get_id(codec_tags, *fourcc);
+
+    return 0;
+}
+
 static int matroska_parse_tracks(AVFormatContext *s)
 {
     MatroskaDemuxContext *matroska = s->priv_data;
@@ -1780,37 +1881,60 @@ static int matroska_parse_tracks(AVFormatContext *s)
             ffio_init_context(&b, track->codec_priv.data,
                               track->codec_priv.size,
                               0, NULL, NULL, NULL, NULL);
-            ret = ff_get_wav_header(&b, st->codec, track->codec_priv.size, 0);
+            ret = ff_get_wav_header(s, &b, st->codec, track->codec_priv.size, 0);
             if (ret < 0)
                 return ret;
             codec_id         = st->codec->codec_id;
+            fourcc           = st->codec->codec_tag;
             extradata_offset = FFMIN(track->codec_priv.size, 18);
         } else if (!strcmp(track->codec_id, "A_QUICKTIME")
-                   && (track->codec_priv.size >= 86)
+                   /* Normally 36, but allow noncompliant private data */
+                   && (track->codec_priv.size >= 32)
                    && (track->codec_priv.data)) {
-            fourcc = AV_RL32(track->codec_priv.data + 4);
-            codec_id = ff_codec_get_id(ff_codec_movaudio_tags, fourcc);
-            if (ff_codec_get_id(ff_codec_movaudio_tags, AV_RL32(track->codec_priv.data))) {
-                fourcc = AV_RL32(track->codec_priv.data);
-                codec_id = ff_codec_get_id(ff_codec_movaudio_tags, fourcc);
+            uint16_t sample_size;
+            int ret = get_qt_codec(track, &fourcc, &codec_id);
+            if (ret < 0)
+                return ret;
+            sample_size = AV_RB16(track->codec_priv.data + 26);
+            if (fourcc == 0) {
+                if (sample_size == 8) {
+                    fourcc = MKTAG('r','a','w',' ');
+                    codec_id = ff_codec_get_id(ff_codec_movaudio_tags, fourcc);
+                } else if (sample_size == 16) {
+                    fourcc = MKTAG('t','w','o','s');
+                    codec_id = ff_codec_get_id(ff_codec_movaudio_tags, fourcc);
+                }
             }
+            if ((fourcc == MKTAG('t','w','o','s') ||
+                    fourcc == MKTAG('s','o','w','t')) &&
+                    sample_size == 8)
+                codec_id = AV_CODEC_ID_PCM_S8;
         } else if (!strcmp(track->codec_id, "V_QUICKTIME") &&
                    (track->codec_priv.size >= 21)          &&
                    (track->codec_priv.data)) {
-            fourcc   = AV_RL32(track->codec_priv.data + 4);
-            codec_id = ff_codec_get_id(ff_codec_movvideo_tags, fourcc);
-            if (ff_codec_get_id(ff_codec_movvideo_tags, AV_RL32(track->codec_priv.data))) {
-                fourcc   = AV_RL32(track->codec_priv.data);
+            int ret = get_qt_codec(track, &fourcc, &codec_id);
+            if (ret < 0)
+                return ret;
+            if (codec_id == AV_CODEC_ID_NONE && AV_RL32(track->codec_priv.data+4) == AV_RL32("SMI ")) {
+                fourcc = MKTAG('S','V','Q','3');
                 codec_id = ff_codec_get_id(ff_codec_movvideo_tags, fourcc);
             }
-            if (codec_id == AV_CODEC_ID_NONE && AV_RL32(track->codec_priv.data+4) == AV_RL32("SMI "))
-                codec_id = AV_CODEC_ID_SVQ3;
             if (codec_id == AV_CODEC_ID_NONE) {
                 char buf[32];
                 av_get_codec_tag_string(buf, sizeof(buf), fourcc);
                 av_log(matroska->ctx, AV_LOG_ERROR,
                        "mov FourCC not found %s.\n", buf);
             }
+            if (track->codec_priv.size >= 86) {
+                bit_depth = AV_RB16(track->codec_priv.data + 82);
+                ffio_init_context(&b, track->codec_priv.data,
+                                  track->codec_priv.size,
+                                  0, NULL, NULL, NULL, NULL);
+                if (ff_get_qtpalette(codec_id, &b, matroska->palette)) {
+                    bit_depth &= 0x1F;
+                    matroska->has_palette = 1;
+                }
+            }
         } else if (codec_id == AV_CODEC_ID_PCM_S16BE) {
             switch (track->audio.bitdepth) {
             case  8:
@@ -1841,7 +1965,7 @@ static int matroska_parse_tracks(AVFormatContext *s)
         } else if (codec_id == AV_CODEC_ID_AAC && !track->codec_priv.size) {
             int profile = matroska_aac_profile(track->codec_id);
             int sri     = matroska_aac_sri(track->audio.samplerate);
-            extradata   = av_mallocz(5 + FF_INPUT_BUFFER_PADDING_SIZE);
+            extradata   = av_mallocz(5 + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!extradata)
                 return AVERROR(ENOMEM);
             extradata[0] = (profile << 3) | ((sri & 0x0E) >> 1);
@@ -1854,13 +1978,13 @@ static int matroska_parse_tracks(AVFormatContext *s)
                 extradata_size = 5;
             } else
                 extradata_size = 2;
-        } else if (codec_id == AV_CODEC_ID_ALAC && track->codec_priv.size && track->codec_priv.size < INT_MAX - 12 - FF_INPUT_BUFFER_PADDING_SIZE) {
+        } else if (codec_id == AV_CODEC_ID_ALAC && track->codec_priv.size && track->codec_priv.size < INT_MAX - 12 - AV_INPUT_BUFFER_PADDING_SIZE) {
             /* Only ALAC's magic cookie is stored in Matroska's track headers.
              * Create the "atom size", "tag", and "tag version" fields the
              * decoder expects manually. */
             extradata_size = 12 + track->codec_priv.size;
             extradata      = av_mallocz(extradata_size +
-                                        FF_INPUT_BUFFER_PADDING_SIZE);
+                                        AV_INPUT_BUFFER_PADDING_SIZE);
             if (!extradata)
                 return AVERROR(ENOMEM);
             AV_WB32(extradata, extradata_size);
@@ -1870,7 +1994,7 @@ static int matroska_parse_tracks(AVFormatContext *s)
                    track->codec_priv.size);
         } else if (codec_id == AV_CODEC_ID_TTA) {
             extradata_size = 30;
-            extradata      = av_mallocz(extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+            extradata      = av_mallocz(extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!extradata)
                 return AVERROR(ENOMEM);
             ffio_init_context(&b, extradata, extradata_size, 1,
@@ -1993,6 +2117,8 @@ static int matroska_parse_tracks(AVFormatContext *s)
 
         if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
             MatroskaTrackPlane *planes = track->operation.combine_planes.elem;
+            int display_width_mul  = 1;
+            int display_height_mul = 1;
 
             st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
             st->codec->codec_tag  = fourcc;
@@ -2000,10 +2126,14 @@ static int matroska_parse_tracks(AVFormatContext *s)
                 st->codec->bits_per_coded_sample = bit_depth;
             st->codec->width      = track->video.pixel_width;
             st->codec->height     = track->video.pixel_height;
+
+            if (track->video.stereo_mode && track->video.stereo_mode < MATROSKA_VIDEO_STEREOMODE_TYPE_NB)
+                mkv_stereo_mode_display_mul(track->video.stereo_mode, &display_width_mul, &display_height_mul);
+
             av_reduce(&st->sample_aspect_ratio.num,
                       &st->sample_aspect_ratio.den,
-                      st->codec->height * track->video.display_width,
-                      st->codec->width  * track->video.display_height,
+                      st->codec->height * track->video.display_width  * display_width_mul,
+                      st->codec->width  * track->video.display_height * display_height_mul,
                       255);
             if (st->codec->codec_id != AV_CODEC_ID_HEVC)
                 st->need_parsing = AVSTREAM_PARSE_HEADERS;
@@ -2049,11 +2179,14 @@ static int matroska_parse_tracks(AVFormatContext *s)
             }
         } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
             st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
+            st->codec->codec_tag   = fourcc;
             st->codec->sample_rate = track->audio.out_samplerate;
             st->codec->channels    = track->audio.channels;
             if (!st->codec->bits_per_coded_sample)
                 st->codec->bits_per_coded_sample = track->audio.bitdepth;
-            if (st->codec->codec_id != AV_CODEC_ID_AAC)
+            if (st->codec->codec_id == AV_CODEC_ID_MP3)
+                st->need_parsing = AVSTREAM_PARSE_FULL;
+            else if (st->codec->codec_id != AV_CODEC_ID_AAC)
                 st->need_parsing = AVSTREAM_PARSE_HEADERS;
             if (track->codec_delay > 0) {
                 st->codec->delay = av_rescale_q(track->codec_delay,
@@ -2247,6 +2380,15 @@ static int matroska_deliver_packet(MatroskaDemuxContext *matroska,
     if (matroska->num_packets > 0) {
         memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
         av_freep(&matroska->packets[0]);
+        if (matroska->has_palette) {
+            uint8_t *pal = av_packet_new_side_data(pkt, AV_PKT_DATA_PALETTE, AVPALETTE_SIZE);
+            if (!pal) {
+                av_log(matroska->ctx, AV_LOG_ERROR, "Cannot append palette to packet\n");
+            } else {
+                memcpy(pal, matroska->palette, AVPALETTE_SIZE);
+            }
+            matroska->has_palette = 0;
+        }
         if (matroska->num_packets > 1) {
             void *newpackets;
             memmove(&matroska->packets[0], &matroska->packets[1],
@@ -2276,7 +2418,7 @@ static void matroska_clear_queue(MatroskaDemuxContext *matroska)
     if (matroska->packets) {
         int n;
         for (n = 0; n < matroska->num_packets; n++) {
-            av_free_packet(matroska->packets[n]);
+            av_packet_unref(matroska->packets[n]);
             av_freep(&matroska->packets[n]);
         }
         av_freep(&matroska->packets);
@@ -2734,7 +2876,7 @@ static int matroska_parse_frame(MatroskaDemuxContext *matroska,
                                                      AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,
                                                      additional_size + 8);
         if (!side_data) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             av_free(pkt);
             return AVERROR(ENOMEM);
         }
@@ -2747,7 +2889,7 @@ static int matroska_parse_frame(MatroskaDemuxContext *matroska,
                                                      AV_PKT_DATA_SKIP_SAMPLES,
                                                      10);
         if (!side_data) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             av_free(pkt);
             return AVERROR(ENOMEM);
         }
@@ -2762,29 +2904,15 @@ static int matroska_parse_frame(MatroskaDemuxContext *matroska,
     else
         pkt->pts = timecode;
     pkt->pos = pos;
+    pkt->duration = lace_duration;
+
+#if FF_API_CONVERGENCE_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
     if (st->codec->codec_id == AV_CODEC_ID_SUBRIP) {
-        /*
-         * For backward compatibility.
-         * Historically, we have put subtitle duration
-         * in convergence_duration, on the off chance
-         * that the time_scale is less than 1us, which
-         * could result in a 32bit overflow on the
-         * normal duration field.
-         */
         pkt->convergence_duration = lace_duration;
     }
-
-    if (track->type != MATROSKA_TRACK_TYPE_SUBTITLE ||
-        lace_duration <= INT_MAX) {
-        /*
-         * For non subtitle tracks, just store the duration
-         * as normal.
-         *
-         * If it's a subtitle track and duration value does
-         * not overflow a uint32, then also store it normally.
-         */
-        pkt->duration = lace_duration;
-    }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     dynarray_add(&matroska->packets, &matroska->num_packets, pkt);
     matroska->prev_pkt = pkt;
@@ -3074,6 +3202,7 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
         tracks[i].audio.buf_timecode   = AV_NOPTS_VALUE;
         tracks[i].end_timecode         = 0;
         if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE &&
+            tracks[i].stream &&
             tracks[i].stream->discard != AVDISCARD_ALL) {
             index_sub = av_index_search_timestamp(
                 tracks[i].stream, st->index_entries[index].timestamp,
@@ -3481,7 +3610,7 @@ static int webm_dash_manifest_read_packet(AVFormatContext *s, AVPacket *pkt)
 
 #define OFFSET(x) offsetof(MatroskaDemuxContext, x)
 static const AVOption options[] = {
-    { "live", "flag indicating that the input is a live file that only has the headers.", OFFSET(is_live), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
+    { "live", "flag indicating that the input is a live file that only has the headers.", OFFSET(is_live), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
     { NULL },
 };
 
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 3b525ad4..f42434b5 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -44,6 +44,7 @@
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 #include "libavutil/random_seed.h"
+#include "libavutil/rational.h"
 #include "libavutil/samplefmt.h"
 #include "libavutil/sha.h"
 #include "libavutil/stereo3d.h"
@@ -131,6 +132,9 @@ typedef struct MatroskaMuxContext {
 
     int64_t last_track_timestamp[MAX_TRACKS];
 
+    int64_t* stream_durations;
+    int64_t* stream_duration_offsets;
+
     int allow_raw_vfw;
 } MatroskaMuxContext;
 
@@ -304,6 +308,23 @@ static void put_xiph_size(AVIOContext *pb, int size)
     avio_w8(pb, size % 255);
 }
 
+/**
+ * Free the members allocated in the mux context.
+ */
+static void mkv_free(MatroskaMuxContext *mkv) {
+    if (mkv->main_seekhead) {
+        av_freep(&mkv->main_seekhead->entries);
+        av_freep(&mkv->main_seekhead);
+    }
+    if (mkv->cues) {
+        av_freep(&mkv->cues->entries);
+        av_freep(&mkv->cues);
+    }
+    av_freep(&mkv->tracks);
+    av_freep(&mkv->stream_durations);
+    av_freep(&mkv->stream_duration_offsets);
+}
+
 /**
  * Initialize a mkv_seekhead element to be ready to index level 1 Matroska
  * elements. If a maximum number of elements is specified, enough space
@@ -364,8 +385,9 @@ static int mkv_add_seekhead_entry(mkv_seekhead *seekhead, unsigned int elementid
  * @return The file offset where the seekhead was written,
  * -1 if an error occurred.
  */
-static int64_t mkv_write_seekhead(AVIOContext *pb, mkv_seekhead *seekhead)
+static int64_t mkv_write_seekhead(AVIOContext *pb, MatroskaMuxContext *mkv)
 {
+    mkv_seekhead *seekhead = mkv->main_seekhead;
     ebml_master metaseek, seekentry;
     int64_t currentpos;
     int i;
@@ -402,8 +424,8 @@ static int64_t mkv_write_seekhead(AVIOContext *pb, mkv_seekhead *seekhead)
         currentpos = seekhead->filepos;
     }
 fail:
-    av_freep(&seekhead->entries);
-    av_free(seekhead);
+    av_freep(&mkv->main_seekhead->entries);
+    av_freep(&mkv->main_seekhead);
 
     return currentpos;
 }
@@ -823,9 +845,6 @@ static int mkv_write_track(AVFormatContext *s, MatroskaMuxContext *mkv,
     int j, ret;
     AVDictionaryEntry *tag;
 
-    // ms precision is the de-facto standard timescale for mkv files
-    avpriv_set_pts_info(st, 64, 1, 1000);
-
     if (codec->codec_type == AVMEDIA_TYPE_ATTACHMENT) {
         mkv->have_attachments = 1;
         return 0;
@@ -904,14 +923,18 @@ static int mkv_write_track(AVFormatContext *s, MatroskaMuxContext *mkv,
     }
 
     if (codec->codec_type == AVMEDIA_TYPE_AUDIO && codec->initial_padding && codec->codec_id == AV_CODEC_ID_OPUS) {
+        int64_t codecdelay = av_rescale_q(codec->initial_padding,
+                                          (AVRational){ 1, 48000 },
+                                          (AVRational){ 1, 1000000000 });
+        if (codecdelay < 0) {
+            av_log(s, AV_LOG_ERROR, "Initial padding is invalid\n");
+            return AVERROR(EINVAL);
+        }
 //         mkv->tracks[i].ts_offset = av_rescale_q(codec->initial_padding,
 //                                                 (AVRational){ 1, codec->sample_rate },
 //                                                 st->time_base);
 
-        put_ebml_uint(pb, MATROSKA_ID_CODECDELAY,
-                      av_rescale_q(codec->initial_padding,
-                                   (AVRational){ 1, codec->sample_rate },
-                                   (AVRational){ 1, 1000000000 }));
+        put_ebml_uint(pb, MATROSKA_ID_CODECDELAY, codecdelay);
     }
     if (codec->codec_id == AV_CODEC_ID_OPUS) {
         put_ebml_uint(pb, MATROSKA_ID_SEEKPREROLL, OPUS_SEEK_PREROLL);
@@ -1151,12 +1174,12 @@ static int mkv_write_simpletag(AVIOContext *pb, AVDictionaryEntry *t)
     return 0;
 }
 
-static int mkv_write_tag(AVFormatContext *s, AVDictionary *m, unsigned int elementid,
-                         unsigned int uid, ebml_master *tags)
+static int mkv_write_tag_targets(AVFormatContext *s,
+                                 unsigned int elementid, unsigned int uid,
+                                 ebml_master *tags, ebml_master* tag)
 {
     MatroskaMuxContext *mkv = s->priv_data;
-    ebml_master tag, targets;
-    AVDictionaryEntry *t = NULL;
+    ebml_master targets;
     int ret;
 
     if (!tags->pos) {
@@ -1166,16 +1189,32 @@ static int mkv_write_tag(AVFormatContext *s, AVDictionary *m, unsigned int eleme
         *tags = start_ebml_master(s->pb, MATROSKA_ID_TAGS, 0);
     }
 
-    tag     = start_ebml_master(s->pb, MATROSKA_ID_TAG,        0);
+    *tag     = start_ebml_master(s->pb, MATROSKA_ID_TAG,        0);
     targets = start_ebml_master(s->pb, MATROSKA_ID_TAGTARGETS, 0);
     if (elementid)
         put_ebml_uint(s->pb, elementid, uid);
     end_ebml_master(s->pb, targets);
+    return 0;
+}
+
+static int mkv_write_tag(AVFormatContext *s, AVDictionary *m, unsigned int elementid,
+                         unsigned int uid, ebml_master *tags)
+{
+    ebml_master tag;
+    int ret;
+    AVDictionaryEntry *t = NULL;
+
+    ret = mkv_write_tag_targets(s, elementid, uid, tags, &tag);
+    if (ret < 0)
+        return ret;
 
     while ((t = av_dict_get(m, "", t, AV_DICT_IGNORE_SUFFIX))) {
         if (av_strcasecmp(t->key, "title") &&
             av_strcasecmp(t->key, "stereo_mode") &&
-            av_strcasecmp(t->key, "encoding_tool")) {
+            av_strcasecmp(t->key, "creation_time") &&
+            av_strcasecmp(t->key, "encoding_tool") &&
+            (elementid != MATROSKA_ID_TAGTARGETS_TRACKUID ||
+             av_strcasecmp(t->key, "language"))) {
             ret = mkv_write_simpletag(s->pb, t);
             if (ret < 0)
                 return ret;
@@ -1220,6 +1259,25 @@ static int mkv_write_tags(AVFormatContext *s)
         if (ret < 0) return ret;
     }
 
+    if (!mkv->is_live) {
+        for (i = 0; i < s->nb_streams; i++) {
+            ebml_master tag_target;
+            ebml_master tag;
+
+            mkv_write_tag_targets(s, MATROSKA_ID_TAGTARGETS_TRACKUID, i + 1, &tags, &tag_target);
+
+            tag = start_ebml_master(s->pb, MATROSKA_ID_SIMPLETAG, 0);
+            put_ebml_string(s->pb, MATROSKA_ID_TAGNAME, "DURATION");
+            mkv->stream_duration_offsets[i] = avio_tell(s->pb);
+
+            // Reserve space to write duration as a 20-byte string.
+            // 2 (ebml id) + 1 (data size) + 20 (data)
+            put_ebml_void(s->pb, 23);
+            end_ebml_master(s->pb, tag);
+            end_ebml_master(s->pb, tag_target);
+        }
+    }
+
     for (i = 0; i < s->nb_chapters; i++) {
         AVChapter *ch = s->chapters[i];
 
@@ -1326,17 +1384,13 @@ static int mkv_write_header(AVFormatContext *s)
     ebml_master ebml_header, segment_info;
     AVDictionaryEntry *tag;
     int ret, i, version = 2;
+    int64_t creation_time;
 
     if (!strcmp(s->oformat->name, "webm"))
         mkv->mode = MODE_WEBM;
     else
         mkv->mode = MODE_MATROSKAv2;
 
-    if (s->avoid_negative_ts < 0) {
-        s->avoid_negative_ts = 1;
-        s->internal->avoid_negative_ts_use_pts = 1;
-    }
-
     if (mkv->mode != MODE_WEBM ||
         av_dict_get(s->metadata, "stereo_mode", NULL, 0) ||
         av_dict_get(s->metadata, "alpha_mode", NULL, 0))
@@ -1361,9 +1415,10 @@ static int mkv_write_header(AVFormatContext *s)
     }
 
     mkv->tracks = av_mallocz_array(s->nb_streams, sizeof(*mkv->tracks));
-    if (!mkv->tracks)
-        return AVERROR(ENOMEM);
-
+    if (!mkv->tracks) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
     ebml_header = start_ebml_master(pb, EBML_ID_HEADER, 0);
     put_ebml_uint   (pb, EBML_ID_EBMLVERSION        ,           1);
     put_ebml_uint   (pb, EBML_ID_EBMLREADVERSION    ,           1);
@@ -1383,11 +1438,13 @@ static int mkv_write_header(AVFormatContext *s)
     // isn't more than 10 elements if we only write one of each other
     // currently defined level 1 element
     mkv->main_seekhead    = mkv_start_seekhead(pb, mkv->segment_offset, 10);
-    if (!mkv->main_seekhead)
-        return AVERROR(ENOMEM);
+    if (!mkv->main_seekhead) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
 
     ret = mkv_add_seekhead_entry(mkv->main_seekhead, MATROSKA_ID_INFO, avio_tell(pb));
-    if (ret < 0) return ret;
+    if (ret < 0) goto fail;
 
     segment_info = start_ebml_master(pb, MATROSKA_ID_INFO, 0);
     put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, 1000000);
@@ -1414,9 +1471,9 @@ static int mkv_write_header(AVFormatContext *s)
         put_ebml_string(pb, MATROSKA_ID_WRITINGAPP, ident);
     }
 
-    if (tag = av_dict_get(s->metadata, "creation_time", NULL, 0)) {
+    if (ff_parse_creation_time_metadata(s, &creation_time, 0) > 0) {
         // Adjust time so it's relative to 2001-01-01 and convert to nanoseconds.
-        int64_t date_utc = (ff_iso8601_to_unix_time(tag->value) - 978307200) * 1000000000;
+        int64_t date_utc = (creation_time - 978307200000000LL) * 1000;
         uint8_t date_utc_buf[8];
         AV_WB64(date_utc_buf, date_utc);
         put_ebml_binary(pb, MATROSKA_ID_DATEUTC, date_utc_buf, 8);
@@ -1430,9 +1487,13 @@ static int mkv_write_header(AVFormatContext *s)
     }
     end_ebml_master(pb, segment_info);
 
+    // initialize stream_duration fields
+    mkv->stream_durations = av_mallocz(s->nb_streams * sizeof(int64_t));
+    mkv->stream_duration_offsets = av_mallocz(s->nb_streams * sizeof(int64_t));
+
     ret = mkv_write_tracks(s);
     if (ret < 0)
-        return ret;
+        goto fail;
 
     for (i = 0; i < s->nb_chapters; i++)
         mkv->chapter_id_offset = FFMAX(mkv->chapter_id_offset, 1LL - s->chapters[i]->id);
@@ -1440,24 +1501,25 @@ static int mkv_write_header(AVFormatContext *s)
     if (mkv->mode != MODE_WEBM) {
         ret = mkv_write_chapters(s);
         if (ret < 0)
-            return ret;
+            goto fail;
 
         ret = mkv_write_tags(s);
         if (ret < 0)
-            return ret;
+            goto fail;
 
         ret = mkv_write_attachments(s);
         if (ret < 0)
-            return ret;
+            goto fail;
     }
 
     if (!s->pb->seekable && !mkv->is_live)
-        mkv_write_seekhead(pb, mkv->main_seekhead);
+        mkv_write_seekhead(pb, mkv);
 
     mkv->cues = mkv_start_cues(mkv->segment_offset);
-    if (!mkv->cues)
-        return AVERROR(ENOMEM);
-
+    if (!mkv->cues) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
     if (pb->seekable && mkv->reserve_cues_space) {
         mkv->cues_pos = avio_tell(pb);
         put_ebml_void(pb, mkv->reserve_cues_space);
@@ -1484,6 +1546,9 @@ static int mkv_write_header(AVFormatContext *s)
     }
 
     return 0;
+fail:
+    mkv_free(mkv);
+    return ret;
 }
 
 static int mkv_blockgroup_size(int pkt_size)
@@ -1564,7 +1629,7 @@ static void mkv_write_block(AVFormatContext *s, AVIOContext *pb,
     ebml_master block_group, block_additions, block_more;
 
     av_log(s, AV_LOG_DEBUG, "Writing block at offset %" PRIu64 ", size %d, "
-           "pts %" PRId64 ", dts %" PRId64 ", duration %d, keyframe %d\n",
+           "pts %" PRId64 ", dts %" PRId64 ", duration %" PRId64 ", keyframe %d\n",
            avio_tell(pb), pkt->size, pkt->pts, pkt->dts, pkt->duration,
            keyframe != 0);
     if (codec->codec_id == AV_CODEC_ID_H264 && codec->extradata_size > 0 &&
@@ -1669,7 +1734,7 @@ static int mkv_write_vtt_blocks(AVFormatContext *s, AVIOContext *pb, AVPacket *p
     size = id_size + 1 + settings_size + 1 + pkt->size;
 
     av_log(s, AV_LOG_DEBUG, "Writing block at offset %" PRIu64 ", size %d, "
-           "pts %" PRId64 ", dts %" PRId64 ", duration %d, flags %d\n",
+           "pts %" PRId64 ", dts %" PRId64 ", duration %" PRId64 ", flags %d\n",
            avio_tell(pb), size, pkt->pts, pkt->dts, pkt->duration, flags);
 
     blockgroup = start_ebml_master(pb, MATROSKA_ID_BLOCKGROUP, mkv_blockgroup_size(size));
@@ -1772,33 +1837,45 @@ static int mkv_write_packet_internal(AVFormatContext *s, AVPacket *pkt, int add_
 
     if (codec->codec_type != AVMEDIA_TYPE_SUBTITLE) {
         mkv_write_block(s, pb, MATROSKA_ID_SIMPLEBLOCK, pkt, keyframe);
-        if (codec->codec_type == AVMEDIA_TYPE_VIDEO && keyframe || add_cue) {
+        if (s->pb->seekable && (codec->codec_type == AVMEDIA_TYPE_VIDEO && keyframe || add_cue)) {
             ret = mkv_add_cuepoint(mkv->cues, pkt->stream_index, dash_tracknum, ts, mkv->cluster_pos, relative_packet_pos, -1);
             if (ret < 0) return ret;
         }
     } else {
-    if (codec->codec_id == AV_CODEC_ID_WEBVTT) {
-        duration = mkv_write_vtt_blocks(s, pb, pkt);
-    } else {
-        ebml_master blockgroup = start_ebml_master(pb, MATROSKA_ID_BLOCKGROUP,
-                                                   mkv_blockgroup_size(pkt->size));
-        /* For backward compatibility, prefer convergence_duration. */
-        if (pkt->convergence_duration > 0) {
-            duration = pkt->convergence_duration;
+        if (codec->codec_id == AV_CODEC_ID_WEBVTT) {
+            duration = mkv_write_vtt_blocks(s, pb, pkt);
+        } else {
+            ebml_master blockgroup = start_ebml_master(pb, MATROSKA_ID_BLOCKGROUP,
+                                                       mkv_blockgroup_size(pkt->size));
+
+#if FF_API_CONVERGENCE_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
+            /* For backward compatibility, prefer convergence_duration. */
+            if (pkt->convergence_duration > 0) {
+                duration = pkt->convergence_duration;
+            }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+            /* All subtitle blocks are considered to be keyframes. */
+            mkv_write_block(s, pb, MATROSKA_ID_BLOCK, pkt, 1);
+            put_ebml_uint(pb, MATROSKA_ID_BLOCKDURATION, duration);
+            end_ebml_master(pb, blockgroup);
         }
-        /* All subtitle blocks are considered to be keyframes. */
-        mkv_write_block(s, pb, MATROSKA_ID_BLOCK, pkt, 1);
-        put_ebml_uint(pb, MATROSKA_ID_BLOCKDURATION, duration);
-        end_ebml_master(pb, blockgroup);
-    }
 
-    ret = mkv_add_cuepoint(mkv->cues, pkt->stream_index, dash_tracknum, ts,
-                           mkv->cluster_pos, relative_packet_pos, duration);
-    if (ret < 0)
-        return ret;
+        if (s->pb->seekable) {
+            ret = mkv_add_cuepoint(mkv->cues, pkt->stream_index, dash_tracknum, ts,
+                                   mkv->cluster_pos, relative_packet_pos, duration);
+            if (ret < 0)
+                return ret;
+        }
     }
 
     mkv->duration = FFMAX(mkv->duration, ts + duration);
+
+    if (mkv->stream_durations)
+        mkv->stream_durations[pkt->stream_index] =
+            FFMAX(mkv->stream_durations[pkt->stream_index], ts + duration);
+
     return 0;
 }
 
@@ -1855,7 +1932,7 @@ static int mkv_write_packet(AVFormatContext *s, AVPacket *pkt)
         // for DASH audio, a CuePoint has to be added when there is a new cluster.
         ret = mkv_write_packet_internal(s, &mkv->cur_audio_pkt,
                                         mkv->is_dash ? start_new_cluster : 0);
-        av_free_packet(&mkv->cur_audio_pkt);
+        av_packet_unref(&mkv->cur_audio_pkt);
         if (ret < 0) {
             av_log(s, AV_LOG_ERROR,
                    "Could not write cached audio packet ret:%d\n", ret);
@@ -1866,15 +1943,7 @@ static int mkv_write_packet(AVFormatContext *s, AVPacket *pkt)
     // buffer an audio packet to ensure the packet containing the video
     // keyframe's timecode is contained in the same cluster for WebM
     if (codec_type == AVMEDIA_TYPE_AUDIO) {
-        mkv->cur_audio_pkt = *pkt;
-        if (pkt->buf) {
-            mkv->cur_audio_pkt.buf = av_buffer_ref(pkt->buf);
-            ret = mkv->cur_audio_pkt.buf ? 0 : AVERROR(ENOMEM);
-        } else
-            ret = av_dup_packet(&mkv->cur_audio_pkt);
-        if (mkv->cur_audio_pkt.side_data_elems > 0) {
-            ret = av_copy_packet_side_data(&mkv->cur_audio_pkt, &mkv->cur_audio_pkt);
-        }
+        ret = av_packet_ref(&mkv->cur_audio_pkt, pkt);
     } else
         ret = mkv_write_packet_internal(s, pkt, 0);
     return ret;
@@ -1914,7 +1983,7 @@ static int mkv_write_trailer(AVFormatContext *s)
     // check if we have an audio packet cached
     if (mkv->cur_audio_pkt.size > 0) {
         ret = mkv_write_packet_internal(s, &mkv->cur_audio_pkt, 0);
-        av_free_packet(&mkv->cur_audio_pkt);
+        av_packet_unref(&mkv->cur_audio_pkt);
         if (ret < 0) {
             av_log(s, AV_LOG_ERROR,
                    "Could not write cached audio packet ret:%d\n", ret);
@@ -1968,7 +2037,7 @@ static int mkv_write_trailer(AVFormatContext *s)
                 return ret;
         }
 
-        mkv_write_seekhead(pb, mkv->main_seekhead);
+        mkv_write_seekhead(pb, mkv);
 
         // update the duration
         av_log(s, AV_LOG_DEBUG, "end duration = %" PRIu64 "\n", mkv->duration);
@@ -1976,16 +2045,37 @@ static int mkv_write_trailer(AVFormatContext *s)
         avio_seek(pb, mkv->duration_offset, SEEK_SET);
         put_ebml_float(pb, MATROSKA_ID_DURATION, mkv->duration);
 
+        // update stream durations
+        if (mkv->stream_durations) {
+            int i;
+            for (i = 0; i < s->nb_streams; ++i) {
+                AVStream *st = s->streams[i];
+                double duration_sec = mkv->stream_durations[i] * av_q2d(st->time_base);
+                char duration_string[20] = "";
+
+                av_log(s, AV_LOG_DEBUG, "stream %d end duration = %" PRIu64 "\n", i,
+                       mkv->stream_durations[i]);
+
+                if (!mkv->is_live && mkv->stream_duration_offsets[i] > 0) {
+                    avio_seek(pb, mkv->stream_duration_offsets[i], SEEK_SET);
+
+                    snprintf(duration_string, 20, "%02d:%02d:%012.9f",
+                             (int) duration_sec / 3600, ((int) duration_sec / 60) % 60,
+                             fmod(duration_sec, 60));
+
+                    put_ebml_binary(pb, MATROSKA_ID_TAGSTRING, duration_string, 20);
+                }
+            }
+        }
+
         avio_seek(pb, currentpos, SEEK_SET);
     }
 
     if (!mkv->is_live) {
         end_ebml_master(pb, mkv->segment);
     }
-    av_freep(&mkv->tracks);
-    av_freep(&mkv->cues->entries);
-    av_freep(&mkv->cues);
 
+    mkv_free(mkv);
     return 0;
 }
 
@@ -2006,6 +2096,35 @@ static int mkv_query_codec(enum AVCodecID codec_id, int std_compliance)
     return 0;
 }
 
+static int mkv_init(struct AVFormatContext *s)
+{
+    int i;
+
+    if (s->avoid_negative_ts < 0) {
+        s->avoid_negative_ts = 1;
+        s->internal->avoid_negative_ts_use_pts = 1;
+    }
+
+    for (i = 0; i < s->nb_streams; i++) {
+        // ms precision is the de-facto standard timescale for mkv files
+        avpriv_set_pts_info(s->streams[i], 64, 1, 1000);
+    }
+
+    return 0;
+}
+
+static int mkv_check_bitstream(struct AVFormatContext *s, const AVPacket *pkt)
+{
+    int ret = 1;
+    AVStream *st = s->streams[pkt->stream_index];
+
+    if (st->codec->codec_id == AV_CODEC_ID_AAC)
+        if (pkt->size > 2 && (AV_RB16(pkt->data) & 0xfff0) == 0xfff0)
+            ret = ff_stream_add_bitstream_filter(st, "aac_adtstoasc", NULL);
+
+    return ret;
+}
+
 static const AVCodecTag additional_audio_tags[] = {
     { AV_CODEC_ID_ALAC,      0XFFFFFFFF },
     { AV_CODEC_ID_EAC3,      0XFFFFFFFF },
@@ -2043,10 +2162,10 @@ static const AVOption options[] = {
     { "reserve_index_space", "Reserve a given amount of space (in bytes) at the beginning of the file for the index (cues).", OFFSET(reserve_cues_space), AV_OPT_TYPE_INT,   { .i64 = 0 },   0, INT_MAX,   FLAGS },
     { "cluster_size_limit",  "Store at most the provided amount of bytes in a cluster. ",                                     OFFSET(cluster_size_limit), AV_OPT_TYPE_INT  , { .i64 = -1 }, -1, INT_MAX,   FLAGS },
     { "cluster_time_limit",  "Store at most the provided number of milliseconds in a cluster.",                               OFFSET(cluster_time_limit), AV_OPT_TYPE_INT64, { .i64 = -1 }, -1, INT64_MAX, FLAGS },
-    { "dash", "Create a WebM file conforming to WebM DASH specification", OFFSET(is_dash), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS },
+    { "dash", "Create a WebM file conforming to WebM DASH specification", OFFSET(is_dash), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
     { "dash_track_number", "Track number for the DASH stream", OFFSET(dash_track_number), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 127, FLAGS },
-    { "live", "Write files assuming it is a live stream.", OFFSET(is_live), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS },
-    { "allow_raw_vfw", "allow RAW VFW mode", OFFSET(allow_raw_vfw), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS },
+    { "live", "Write files assuming it is a live stream.", OFFSET(is_live), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
+    { "allow_raw_vfw", "allow RAW VFW mode", OFFSET(allow_raw_vfw), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
     { NULL },
 };
 
@@ -2068,6 +2187,7 @@ AVOutputFormat ff_matroska_muxer = {
                          AV_CODEC_ID_VORBIS : AV_CODEC_ID_AC3,
     .video_codec       = CONFIG_LIBX264_ENCODER ?
                          AV_CODEC_ID_H264 : AV_CODEC_ID_MPEG4,
+    .init              = mkv_init,
     .write_header      = mkv_write_header,
     .write_packet      = mkv_write_flush_packet,
     .write_trailer     = mkv_write_trailer,
@@ -2079,6 +2199,7 @@ AVOutputFormat ff_matroska_muxer = {
     },
     .subtitle_codec    = AV_CODEC_ID_ASS,
     .query_codec       = mkv_query_codec,
+    .check_bitstream   = mkv_check_bitstream,
     .priv_class        = &matroska_class,
 };
 #endif
@@ -2097,12 +2218,14 @@ AVOutputFormat ff_webm_muxer = {
     .mime_type         = "video/webm",
     .extensions        = "webm",
     .priv_data_size    = sizeof(MatroskaMuxContext),
-    .audio_codec       = AV_CODEC_ID_VORBIS,
-    .video_codec       = AV_CODEC_ID_VP8,
+    .audio_codec       = CONFIG_LIBOPUS_ENCODER ? AV_CODEC_ID_OPUS : AV_CODEC_ID_VORBIS,
+    .video_codec       = CONFIG_LIBVPX_VP9_ENCODER? AV_CODEC_ID_VP9 : AV_CODEC_ID_VP8,
     .subtitle_codec    = AV_CODEC_ID_WEBVTT,
+    .init              = mkv_init,
     .write_header      = mkv_write_header,
     .write_packet      = mkv_write_flush_packet,
     .write_trailer     = mkv_write_trailer,
+    .check_bitstream   = mkv_check_bitstream,
     .flags             = AVFMT_GLOBALHEADER | AVFMT_VARIABLE_FPS |
                          AVFMT_TS_NONSTRICT | AVFMT_ALLOW_FLUSH,
     .priv_class        = &webm_class,
@@ -2125,9 +2248,11 @@ AVOutputFormat ff_matroska_audio_muxer = {
     .audio_codec       = CONFIG_LIBVORBIS_ENCODER ?
                          AV_CODEC_ID_VORBIS : AV_CODEC_ID_AC3,
     .video_codec       = AV_CODEC_ID_NONE,
+    .init              = mkv_init,
     .write_header      = mkv_write_header,
     .write_packet      = mkv_write_flush_packet,
     .write_trailer     = mkv_write_trailer,
+    .check_bitstream   = mkv_check_bitstream,
     .flags             = AVFMT_GLOBALHEADER | AVFMT_TS_NONSTRICT |
                          AVFMT_ALLOW_FLUSH,
     .codec_tag         = (const AVCodecTag* const []){
diff --git a/libavformat/md5enc.c b/libavformat/md5enc.c
index 8e87f095..8433be44 100644
--- a/libavformat/md5enc.c
+++ b/libavformat/md5enc.c
@@ -107,7 +107,8 @@ AVOutputFormat ff_md5_muxer = {
     .write_header      = write_header,
     .write_packet      = write_packet,
     .write_trailer     = write_trailer,
-    .flags             = AVFMT_NOTIMESTAMPS,
+    .flags             = AVFMT_VARIABLE_FPS | AVFMT_TS_NONSTRICT |
+                         AVFMT_TS_NEGATIVE,
     .priv_class        = &md5enc_class,
 };
 #endif
@@ -134,7 +135,7 @@ static int framemd5_write_packet(struct AVFormatContext *s, AVPacket *pkt)
     av_hash_init(c->hash);
     av_hash_update(c->hash, pkt->data, pkt->size);
 
-    snprintf(buf, sizeof(buf) - 64, "%d, %10"PRId64", %10"PRId64", %8d, %8d, ",
+    snprintf(buf, sizeof(buf) - 64, "%d, %10"PRId64", %10"PRId64", %8"PRId64", %8d, ",
              pkt->stream_index, pkt->dts, pkt->pts, pkt->duration, pkt->size);
     md5_finish(s, buf);
     return 0;
diff --git a/libavformat/md5proto.c b/libavformat/md5proto.c
index 6af0a6ed..9a092e4b 100644
--- a/libavformat/md5proto.c
+++ b/libavformat/md5proto.c
@@ -69,8 +69,9 @@ static int md5_close(URLContext *h)
     av_strstart(filename, "md5:", &filename);
 
     if (*filename) {
-        err = ffurl_open(&out, filename, AVIO_FLAG_WRITE,
-                         &h->interrupt_callback, NULL);
+        err = ffurl_open_whitelist(&out, filename, AVIO_FLAG_WRITE,
+                                   &h->interrupt_callback, NULL,
+                                   h->protocol_whitelist);
         if (err)
             return err;
         err = ffurl_write(out, buf, i*2+1);
diff --git a/libavformat/microdvddec.c b/libavformat/microdvddec.c
index a3839051..727ff947 100644
--- a/libavformat/microdvddec.c
+++ b/libavformat/microdvddec.c
@@ -141,7 +141,7 @@ static int microdvd_read_header(AVFormatContext *s)
         sub->pts = get_pts(line);
         sub->duration = get_duration(line);
     }
-    ff_subtitles_queue_finalize(&microdvd->q);
+    ff_subtitles_queue_finalize(s, &microdvd->q);
     if (has_real_fps) {
         /* export the FPS info only if set in the file */
         microdvd->frame_rate = pts_info;
diff --git a/libavformat/mlpdec.c b/libavformat/mlpdec.c
new file mode 100644
index 00000000..d82df219
--- /dev/null
+++ b/libavformat/mlpdec.c
@@ -0,0 +1,88 @@
+/*
+ * MLP and TrueHD demuxer
+ * Copyright (c) 2001 Fabrice Bellard
+ * Copyright (c) 2005 Alex Beregszaszi
+ * Copyright (c) 2015 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "rawdec.h"
+#include "libavutil/intreadwrite.h"
+
+static int av_always_inline mlp_thd_probe(AVProbeData *p, uint32_t sync)
+{
+    const uint8_t *buf, *last_buf = p->buf, *end = p->buf + p->buf_size;
+    int frames = 0, valid = 0, size = 0;
+    int nsubframes = 0;
+
+    for (buf = p->buf; buf + 8 <= end; buf++) {
+        if (AV_RB32(buf + 4) == sync) {
+            frames++;
+            if (last_buf + size == buf) {
+                valid += 1 + nsubframes / 8;
+            }
+            nsubframes = 0;
+            last_buf = buf;
+            size = (AV_RB16(buf) & 0xfff) * 2;
+        } else if (buf - last_buf == size) {
+            nsubframes++;
+            size += (AV_RB16(buf) & 0xfff) * 2;
+        }
+    }
+    if (valid >= 100)
+        return AVPROBE_SCORE_MAX;
+    return 0;
+}
+
+#if CONFIG_MLP_DEMUXER
+static int mlp_probe(AVProbeData *p)
+{
+    return mlp_thd_probe(p, 0xf8726fbb);
+}
+
+AVInputFormat ff_mlp_demuxer = {
+    .name           = "mlp",
+    .long_name      = NULL_IF_CONFIG_SMALL("raw MLP"),
+    .read_probe     = mlp_probe,
+    .read_header    = ff_raw_audio_read_header,
+    .read_packet    = ff_raw_read_partial_packet,
+    .flags          = AVFMT_GENERIC_INDEX | AVFMT_NOTIMESTAMPS,
+    .extensions     = "mlp",
+    .raw_codec_id   = AV_CODEC_ID_MLP,
+};
+#endif
+
+#if CONFIG_TRUEHD_DEMUXER
+static int thd_probe(AVProbeData *p)
+{
+    return mlp_thd_probe(p, 0xf8726fba);
+}
+
+AVInputFormat ff_truehd_demuxer = {
+    .name           = "truehd",
+    .long_name      = NULL_IF_CONFIG_SMALL("raw TrueHD"),
+    .read_probe     = thd_probe,
+    .read_header    = ff_raw_audio_read_header,
+    .read_packet    = ff_raw_read_partial_packet,
+    .flags          = AVFMT_GENERIC_INDEX | AVFMT_NOTIMESTAMPS,
+    .extensions     = "thd",
+    .raw_codec_id   = AV_CODEC_ID_TRUEHD,
+};
+#endif
+
diff --git a/libavformat/mlvdec.c b/libavformat/mlvdec.c
index aa1ba60d..288b2a10 100644
--- a/libavformat/mlvdec.c
+++ b/libavformat/mlvdec.c
@@ -25,6 +25,7 @@
  */
 
 #include "libavutil/eval.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/rational.h"
 #include "avformat.h"
@@ -131,10 +132,21 @@ static int scan_file(AVFormatContext *avctx, AVStream *vst, AVStream *ast, int f
         if (vst && type == MKTAG('R','A','W','I') && size >= 164) {
             vst->codec->width  = avio_rl16(pb);
             vst->codec->height = avio_rl16(pb);
+            ret = av_image_check_size(vst->codec->width, vst->codec->height, 0, avctx);
+            if (ret < 0)
+                return ret;
             if (avio_rl32(pb) != 1)
                 avpriv_request_sample(avctx, "raw api version");
             avio_skip(pb, 20); // pointer, width, height, pitch, frame_size
             vst->codec->bits_per_coded_sample = avio_rl32(pb);
+            if (vst->codec->bits_per_coded_sample < 0 ||
+                vst->codec->bits_per_coded_sample > (INT_MAX - 7) / (vst->codec->width * vst->codec->height)) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "invalid bits_per_coded_sample %d (size: %dx%d)\n",
+                       vst->codec->bits_per_coded_sample,
+                       vst->codec->width, vst->codec->height);
+                return AVERROR_INVALIDDATA;
+            }
             avio_skip(pb, 8 + 16 + 24); // black_level, white_level, xywh, active_area, exposure_bias
             if (avio_rl32(pb) != 0x2010100) /* RGGB */
                 avpriv_request_sample(avctx, "cfa_pattern");
@@ -143,7 +155,7 @@ static int scan_file(AVFormatContext *avctx, AVStream *vst, AVStream *ast, int f
             vst->codec->codec_tag = MKTAG('B', 'I', 'T', 16);
             size -= 164;
         } else if (ast && type == MKTAG('W', 'A', 'V', 'I') && size >= 16) {
-            ret = ff_get_wav_header(pb, ast->codec, 16, 0);
+            ret = ff_get_wav_header(avctx, pb, ast->codec, 16, 0);
             if (ret < 0)
                 return ret;
             size -= 16;
@@ -332,28 +344,24 @@ static int read_header(AVFormatContext *avctx)
     if (strlen(avctx->filename) > 2) {
         int i;
         char *filename = av_strdup(avctx->filename);
-        AVOpenCallback open_func = avctx->open_cb;
 
         if (!filename)
             return AVERROR(ENOMEM);
 
-        if (!open_func)
-            open_func = ffio_open2_wrapper;
-
         for (i = 0; i < 100; i++) {
             snprintf(filename + strlen(filename) - 2, 3, "%02d", i);
-            if (open_func(avctx, &mlv->pb[i], filename, AVIO_FLAG_READ, &avctx->interrupt_callback, NULL) < 0)
+            if (avctx->io_open(avctx, &mlv->pb[i], filename, AVIO_FLAG_READ, NULL) < 0)
                 break;
             if (check_file_header(mlv->pb[i], guid) < 0) {
                 av_log(avctx, AV_LOG_WARNING, "ignoring %s; bad format or guid mismatch\n", filename);
-                avio_closep(&mlv->pb[i]);
+                ff_format_io_close(avctx, &mlv->pb[i]);
                 continue;
             }
             av_log(avctx, AV_LOG_INFO, "scanning %s\n", filename);
             ret = scan_file(avctx, vst, ast, i);
             if (ret < 0) {
                 av_log(avctx, AV_LOG_WARNING, "ignoring %s; %s\n", filename, av_err2str(ret));
-                avio_closep(&mlv->pb[i]);
+                ff_format_io_close(avctx, &mlv->pb[i]);
                 continue;
             }
         }
@@ -365,6 +373,11 @@ static int read_header(AVFormatContext *avctx)
     if (ast)
         ast->duration = ast->nb_index_entries;
 
+    if ((vst && !vst->nb_index_entries) || (ast && !ast->nb_index_entries)) {
+        av_log(avctx, AV_LOG_ERROR, "no index entries found\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     if (vst && ast)
         avio_seek(pb, FFMIN(vst->index_entries[0].pos, ast->index_entries[0].pos), SEEK_SET);
     else if (vst)
@@ -449,7 +462,7 @@ static int read_close(AVFormatContext *s)
     int i;
     for (i = 0; i < 100; i++)
         if (mlv->pb[i])
-            avio_closep(&mlv->pb[i]);
+            ff_format_io_close(s, &mlv->pb[i]);
     return 0;
 }
 
diff --git a/libavformat/mmst.c b/libavformat/mmst.c
index aa245ea5..21cf2a6d 100644
--- a/libavformat/mmst.c
+++ b/libavformat/mmst.c
@@ -528,8 +528,9 @@ static int mms_open(URLContext *h, const char *uri, int flags)
 
     // establish tcp connection.
     ff_url_join(tcpname, sizeof(tcpname), "tcp", NULL, mmst->host, port, NULL);
-    err = ffurl_open(&mms->mms_hd, tcpname, AVIO_FLAG_READ_WRITE,
-                     &h->interrupt_callback, NULL);
+    err = ffurl_open_whitelist(&mms->mms_hd, tcpname, AVIO_FLAG_READ_WRITE,
+                               &h->interrupt_callback, NULL,
+                               h->protocol_whitelist);
     if (err)
         goto fail;
 
diff --git a/libavformat/mov.c b/libavformat/mov.c
index b42537f5..0408ad16 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -29,6 +29,7 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/channel_layout.h"
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/intfloat.h"
 #include "libavutil/mathematics.h"
@@ -37,6 +38,9 @@
 #include "libavutil/dict.h"
 #include "libavutil/display.h"
 #include "libavutil/opt.h"
+#include "libavutil/aes.h"
+#include "libavutil/aes_ctr.h"
+#include "libavutil/sha.h"
 #include "libavutil/timecode.h"
 #include "libavcodec/ac3tab.h"
 #include "avformat.h"
@@ -191,6 +195,14 @@ static int mov_read_covr(MOVContext *c, AVIOContext *pb, int type, int len)
     if (ret < 0)
         return ret;
 
+    if (pkt.size >= 8 && id != AV_CODEC_ID_BMP) {
+        if (AV_RB64(pkt.data) == 0x89504e470d0a1a0a) {
+            id = AV_CODEC_ID_PNG;
+        } else {
+            id = AV_CODEC_ID_MJPEG;
+        }
+    }
+
     st->disposition              |= AV_DISPOSITION_ATTACHED_PIC;
 
     st->attached_pic              = pkt;
@@ -262,6 +274,7 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     uint32_t data_type = 0, str_size, str_size_alloc;
     int (*parse)(MOVContext*, AVIOContext*, unsigned, const char*) = NULL;
     int raw = 0;
+    int num = 0;
 
     switch (atom.type) {
     case MKTAG( '@','P','R','M'): key = "premiere_version"; raw = 1; break;
@@ -365,6 +378,15 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                     av_log(c->fc, AV_LOG_ERROR, "Error parsing cover art.\n");
                 }
                 return ret;
+            } else if (!key && c->found_hdlr_mdta && c->meta_keys) {
+                uint32_t index = AV_RB32(&atom.type);
+                if (index < c->meta_keys_count) {
+                    key = c->meta_keys[index];
+                } else {
+                    av_log(c->fc, AV_LOG_WARNING,
+                           "The index of 'data' is out of range: %d >= %d.\n",
+                           index, c->meta_keys_count);
+                }
             }
         } else return 0;
     } else if (atom.size > 4 && key && !c->itunes_metadata && !raw) {
@@ -391,8 +413,10 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     if (atom.size < 0 || str_size >= INT_MAX/2)
         return AVERROR_INVALIDDATA;
 
+    // Allocates enough space if data_type is a float32 number, otherwise
     // worst-case requirement for output string in case of utf8 coded input
-    str_size_alloc = (raw ? str_size : str_size * 2) + 1;
+    num = (data_type == 23);
+    str_size_alloc = (num ? 512 : (raw ? str_size : str_size * 2)) + 1;
     str = av_mallocz(str_size_alloc);
     if (!str)
         return AVERROR(ENOMEM);
@@ -402,6 +426,14 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     else {
         if (!raw && (data_type == 3 || (data_type == 0 && (langcode < 0x400 || langcode == 0x7fff)))) { // MAC Encoded
             mov_read_mac_string(c, pb, str_size, str, str_size_alloc);
+        } else if (data_type == 23 && str_size >= 4) {  // BE float32
+            float val = av_int2float(avio_rb32(pb));
+            if (snprintf(str, str_size_alloc, "%f", val) >= str_size_alloc) {
+                av_log(c->fc, AV_LOG_ERROR,
+                       "Failed to store the float32 number (%f) in string.\n", val);
+                av_free(str);
+                return AVERROR_INVALIDDATA;
+            }
         } else {
             int ret = ffio_read_size(pb, str, str_size);
             if (ret < 0) {
@@ -416,6 +448,12 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             snprintf(key2, sizeof(key2), "%s-%s", key, language);
             av_dict_set(&c->fc->metadata, key2, str, 0);
         }
+        if (!strcmp(key, "encoder")) {
+            int major, minor, micro;
+            if (sscanf(str, "HandBrake %d.%d.%d", &major, &minor, &micro) == 3) {
+                c->handbrake_version = 1000000*major + 1000*minor + micro;
+            }
+        }
     }
     av_log(c->fc, AV_LOG_TRACE, "lang \"%3s\" ", language);
     av_log(c->fc, AV_LOG_TRACE, "tag \"%s\" value \"%s\" atom \"%.4s\" %d %"PRId64"\n",
@@ -432,6 +470,9 @@ static int mov_read_chpl(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     char str[256+1];
     int ret;
 
+    if (c->ignore_chapters)
+        return 0;
+
     if ((atom.size -= 5) < 0)
         return 0;
 
@@ -484,7 +525,7 @@ static int mov_read_dref(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return AVERROR(ENOMEM);
     sc->drefs_count = entries;
 
-    for (i = 0; i < sc->drefs_count; i++) {
+    for (i = 0; i < entries; i++) {
         MOVDref *dref = &sc->drefs[i];
         uint32_t size = avio_rb32(pb);
         int64_t next = avio_tell(pb) + size - 4;
@@ -540,7 +581,7 @@ static int mov_read_dref(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                 av_log(c->fc, AV_LOG_DEBUG, "type %d, len %d\n", type, len);
                 if (len&1)
                     len += 1;
-                if (type == 2 || type == 18) { // absolute path
+                if (type == 2) { // absolute path
                     av_free(dref->path);
                     dref->path = av_mallocz(len+1);
                     if (!dref->path)
@@ -551,15 +592,13 @@ static int mov_read_dref(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                         av_freep(&dref->path);
                         return ret;
                     }
-                    if (type == 18) // no additional processing needed
-                        continue;
                     if (len > volume_len && !strncmp(dref->path, dref->volume, volume_len)) {
                         len -= volume_len;
                         memmove(dref->path, dref->path+volume_len, len);
                         dref->path[len] = 0;
                     }
                     for (j = 0; j < len; j++)
-                        if (dref->path[j] == ':')
+                        if (dref->path[j] == ':' || dref->path[j] == 0)
                             dref->path[j] = '/';
                     av_log(c->fc, AV_LOG_DEBUG, "path %s\n", dref->path);
                 } else if (type == 0) { // directory name
@@ -581,6 +620,11 @@ static int mov_read_dref(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                 } else
                     avio_skip(pb, len);
             }
+        } else {
+            av_log(c->fc, AV_LOG_DEBUG, "Unknown dref type 0x08%x size %d\n",
+                   dref->type, size);
+            entries--;
+            i--;
         }
         avio_seek(pb, next, SEEK_SET);
     }
@@ -596,11 +640,6 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     char *title_str;
     int ret;
 
-    if (c->fc->nb_streams < 1) // meta before first trak
-        return 0;
-
-    st = c->fc->streams[c->fc->nb_streams-1];
-
     avio_r8(pb); /* version */
     avio_rb24(pb); /* flags */
 
@@ -611,6 +650,15 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     av_log(c->fc, AV_LOG_TRACE, "ctype= %.4s (0x%08x)\n", (char*)&ctype, ctype);
     av_log(c->fc, AV_LOG_TRACE, "stype= %.4s\n", (char*)&type);
 
+    if (c->trak_index < 0) {  // meta not inside a trak
+        if (type == MKTAG('m','d','t','a')) {
+            c->found_hdlr_mdta = 1;
+        }
+        return 0;
+    }
+
+    st = c->fc->streams[c->fc->nb_streams-1];
+
     if     (type == MKTAG('v','i','d','e'))
         st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
     else if (type == MKTAG('s','o','u','n'))
@@ -683,7 +731,7 @@ static int mov_read_dac3(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return 0;
     st = c->fc->streams[c->fc->nb_streams-1];
 
-    ast = (enum AVAudioServiceType*)ff_stream_new_side_data(st, AV_PKT_DATA_AUDIO_SERVICE_TYPE,
+    ast = (enum AVAudioServiceType*)av_stream_new_side_data(st, AV_PKT_DATA_AUDIO_SERVICE_TYPE,
                                                             sizeof(*ast));
     if (!ast)
         return AVERROR(ENOMEM);
@@ -715,7 +763,7 @@ static int mov_read_dec3(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return 0;
     st = c->fc->streams[c->fc->nb_streams-1];
 
-    ast = (enum AVAudioServiceType*)ff_stream_new_side_data(st, AV_PKT_DATA_AUDIO_SERVICE_TYPE,
+    ast = (enum AVAudioServiceType*)av_stream_new_side_data(st, AV_PKT_DATA_AUDIO_SERVICE_TYPE,
                                                             sizeof(*ast));
     if (!ast)
         return AVERROR(ENOMEM);
@@ -741,6 +789,61 @@ static int mov_read_dec3(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return 0;
 }
 
+static int mov_read_ddts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    const uint32_t ddts_size = 20;
+    AVStream *st = NULL;
+    uint8_t *buf = NULL;
+    uint32_t frame_duration_code = 0;
+    uint32_t channel_layout_code = 0;
+    GetBitContext gb;
+
+    buf = av_malloc(ddts_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    if (!buf) {
+        return AVERROR(ENOMEM);
+    }
+    if (avio_read(pb, buf, ddts_size) < ddts_size) {
+        av_free(buf);
+        return AVERROR_INVALIDDATA;
+    }
+
+    init_get_bits(&gb, buf, 8*ddts_size);
+
+    if (c->fc->nb_streams < 1) {
+        return 0;
+    }
+    st = c->fc->streams[c->fc->nb_streams-1];
+
+    st->codec->sample_rate = get_bits_long(&gb, 32);
+    skip_bits_long(&gb, 32); /* max bitrate */
+    st->codec->bit_rate = get_bits_long(&gb, 32);
+    st->codec->bits_per_coded_sample = get_bits(&gb, 8);
+    frame_duration_code = get_bits(&gb, 2);
+    skip_bits(&gb, 30); /* various fields */
+    channel_layout_code = get_bits(&gb, 16);
+
+    st->codec->frame_size =
+            (frame_duration_code == 0) ? 512 :
+            (frame_duration_code == 1) ? 1024 :
+            (frame_duration_code == 2) ? 2048 :
+            (frame_duration_code == 3) ? 4096 : 0;
+
+    if (channel_layout_code > 0xff) {
+        av_log(c->fc, AV_LOG_WARNING, "Unsupported DTS audio channel layout");
+    }
+    st->codec->channel_layout =
+            ((channel_layout_code & 0x1) ? AV_CH_FRONT_CENTER : 0) |
+            ((channel_layout_code & 0x2) ? AV_CH_FRONT_LEFT : 0) |
+            ((channel_layout_code & 0x2) ? AV_CH_FRONT_RIGHT : 0) |
+            ((channel_layout_code & 0x4) ? AV_CH_SIDE_LEFT : 0) |
+            ((channel_layout_code & 0x4) ? AV_CH_SIDE_RIGHT : 0) |
+            ((channel_layout_code & 0x8) ? AV_CH_LOW_FREQUENCY : 0);
+
+    st->codec->channels = av_get_channel_layout_nb_channels(st->codec->channel_layout);
+
+    return 0;
+}
+
 static int mov_read_chan(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     AVStream *st;
@@ -769,7 +872,7 @@ static int mov_read_wfex(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return 0;
     st = c->fc->streams[c->fc->nb_streams-1];
 
-    if ((ret = ff_get_wav_header(pb, st->codec, atom.size, 0)) < 0)
+    if ((ret = ff_get_wav_header(c->fc, pb, st->codec, atom.size, 0)) < 0)
         av_log(c->fc, AV_LOG_WARNING, "get_wav_header failed\n");
 
     return ret;
@@ -807,6 +910,120 @@ static int mov_read_mdat(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return 0; /* now go for moov */
 }
 
+#define DRM_BLOB_SIZE 56
+
+static int mov_read_adrm(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    uint8_t intermediate_key[20];
+    uint8_t intermediate_iv[20];
+    uint8_t input[64];
+    uint8_t output[64];
+    uint8_t file_checksum[20];
+    uint8_t calculated_checksum[20];
+    struct AVSHA *sha;
+    int i;
+    int ret = 0;
+    uint8_t *activation_bytes = c->activation_bytes;
+    uint8_t *fixed_key = c->audible_fixed_key;
+
+    c->aax_mode = 1;
+
+    sha = av_sha_alloc();
+    if (!sha)
+        return AVERROR(ENOMEM);
+    c->aes_decrypt = av_aes_alloc();
+    if (!c->aes_decrypt) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    /* drm blob processing */
+    avio_read(pb, output, 8); // go to offset 8, absolute position 0x251
+    avio_read(pb, input, DRM_BLOB_SIZE);
+    avio_read(pb, output, 4); // go to offset 4, absolute position 0x28d
+    avio_read(pb, file_checksum, 20);
+
+    av_log(c->fc, AV_LOG_INFO, "[aax] file checksum == "); // required by external tools
+    for (i = 0; i < 20; i++)
+        av_log(sha, AV_LOG_INFO, "%02x", file_checksum[i]);
+    av_log(c->fc, AV_LOG_INFO, "\n");
+
+    /* verify activation data */
+    if (!activation_bytes) {
+        av_log(c->fc, AV_LOG_WARNING, "[aax] activation_bytes option is missing!\n");
+        ret = 0;  /* allow ffprobe to continue working on .aax files */
+        goto fail;
+    }
+    if (c->activation_bytes_size != 4) {
+        av_log(c->fc, AV_LOG_FATAL, "[aax] activation_bytes value needs to be 4 bytes!\n");
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    /* verify fixed key */
+    if (c->audible_fixed_key_size != 16) {
+        av_log(c->fc, AV_LOG_FATAL, "[aax] audible_fixed_key value needs to be 16 bytes!\n");
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    /* AAX (and AAX+) key derivation */
+    av_sha_init(sha, 160);
+    av_sha_update(sha, fixed_key, 16);
+    av_sha_update(sha, activation_bytes, 4);
+    av_sha_final(sha, intermediate_key);
+    av_sha_init(sha, 160);
+    av_sha_update(sha, fixed_key, 16);
+    av_sha_update(sha, intermediate_key, 20);
+    av_sha_update(sha, activation_bytes, 4);
+    av_sha_final(sha, intermediate_iv);
+    av_sha_init(sha, 160);
+    av_sha_update(sha, intermediate_key, 16);
+    av_sha_update(sha, intermediate_iv, 16);
+    av_sha_final(sha, calculated_checksum);
+    if (memcmp(calculated_checksum, file_checksum, 20)) { // critical error
+        av_log(c->fc, AV_LOG_ERROR, "[aax] mismatch in checksums!\n");
+        ret = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+    av_aes_init(c->aes_decrypt, intermediate_key, 128, 1);
+    av_aes_crypt(c->aes_decrypt, output, input, DRM_BLOB_SIZE >> 4, intermediate_iv, 1);
+    for (i = 0; i < 4; i++) {
+        // file data (in output) is stored in big-endian mode
+        if (activation_bytes[i] != output[3 - i]) { // critical error
+            av_log(c->fc, AV_LOG_ERROR, "[aax] error in drm blob decryption!\n");
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+    }
+    memcpy(c->file_key, output + 8, 16);
+    memcpy(input, output + 26, 16);
+    av_sha_init(sha, 160);
+    av_sha_update(sha, input, 16);
+    av_sha_update(sha, c->file_key, 16);
+    av_sha_update(sha, fixed_key, 16);
+    av_sha_final(sha, c->file_iv);
+
+fail:
+    av_free(sha);
+
+    return ret;
+}
+
+// Audible AAX (and AAX+) bytestream decryption
+static int aax_filter(uint8_t *input, int size, MOVContext *c)
+{
+    int blocks = 0;
+    unsigned char iv[16];
+
+    memcpy(iv, c->file_iv, 16); // iv is overwritten
+    blocks = size >> 4; // trailing bytes are not encrypted!
+    av_aes_init(c->aes_decrypt, c->file_key, 128, 1);
+    av_aes_crypt(c->aes_decrypt, input, input, blocks, iv, 1);
+
+    return 0;
+}
+
 /* read major brand, minor version and compatible brands and store them as metadata */
 static int mov_read_ftyp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
@@ -1024,7 +1241,7 @@ static int mov_read_colr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     AVStream *st;
     char color_parameter_type[5] = { 0 };
-    int color_primaries, color_trc, color_matrix;
+    uint16_t color_primaries, color_trc, color_matrix;
     int ret;
 
     if (c->fc->nb_streams < 1)
@@ -1130,14 +1347,14 @@ static int mov_read_fiel(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 static int mov_realloc_extradata(AVCodecContext *codec, MOVAtom atom)
 {
     int err = 0;
-    uint64_t size = (uint64_t)codec->extradata_size + atom.size + 8 + FF_INPUT_BUFFER_PADDING_SIZE;
+    uint64_t size = (uint64_t)codec->extradata_size + atom.size + 8 + AV_INPUT_BUFFER_PADDING_SIZE;
     if (size > INT_MAX || (uint64_t)atom.size > INT_MAX)
         return AVERROR_INVALIDDATA;
     if ((err = av_reallocp(&codec->extradata, size)) < 0) {
         codec->extradata_size = 0;
         return err;
     }
-    codec->extradata_size = size - FF_INPUT_BUFFER_PADDING_SIZE;
+    codec->extradata_size = size - AV_INPUT_BUFFER_PADDING_SIZE;
     return 0;
 }
 
@@ -1159,7 +1376,7 @@ static int64_t mov_read_atom_into_extradata(MOVContext *c, AVIOContext *pb, MOVA
         codec->extradata_size -= atom.size - err;
         result = err;
     }
-    memset(buf + 8 + err, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(buf + 8 + err, 0, AV_INPUT_BUFFER_PADDING_SIZE);
     return result;
 }
 
@@ -1277,7 +1494,7 @@ static int mov_read_aclr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                         av_log(c, AV_LOG_WARNING, "ignored unknown aclr value (%d)\n", range_value);
                         break;
                     }
-                    av_dlog(c, "color_range: %d\n", codec->color_range);
+                    ff_dlog(c, "color_range: %d\n", codec->color_range);
                 } else {
                   /* For some reason the whole atom was not added to the extradata */
                   av_log(c, AV_LOG_ERROR, "aclr not decoded - incomplete atom\n");
@@ -1319,6 +1536,32 @@ static int mov_read_wave(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         if (ret < 0)
             return ret;
     } else if (atom.size > 8) { /* to read frma, esds atoms */
+        if (st->codec->codec_id == AV_CODEC_ID_ALAC && atom.size >= 24) {
+            uint64_t buffer;
+            ret = ffio_ensure_seekback(pb, 8);
+            if (ret < 0)
+                return ret;
+            buffer = avio_rb64(pb);
+            atom.size -= 8;
+            if (  (buffer & 0xFFFFFFFF) == MKBETAG('f','r','m','a')
+                && buffer >> 32 <= atom.size
+                && buffer >> 32 >= 8) {
+                avio_skip(pb, -8);
+                atom.size += 8;
+            } else if (!st->codec->extradata_size) {
+#define ALAC_EXTRADATA_SIZE 36
+                st->codec->extradata = av_mallocz(ALAC_EXTRADATA_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
+                if (!st->codec->extradata)
+                    return AVERROR(ENOMEM);
+                st->codec->extradata_size = ALAC_EXTRADATA_SIZE;
+                AV_WB32(st->codec->extradata    , ALAC_EXTRADATA_SIZE);
+                AV_WB32(st->codec->extradata + 4, MKTAG('a','l','a','c'));
+                AV_WB64(st->codec->extradata + 12, buffer);
+                avio_read(pb, st->codec->extradata + 20, 16);
+                avio_skip(pb, atom.size - 24);
+                return 0;
+            }
+        }
         if ((ret = mov_read_default(c, pb, atom)) < 0)
             return ret;
     } else
@@ -1513,9 +1756,12 @@ static void mov_parse_stsd_video(MOVContext *c, AVIOContext *pb,
                                  AVStream *st, MOVStreamContext *sc)
 {
     uint8_t codec_name[32];
-    unsigned int color_depth, len, j;
-    int color_greyscale;
-    int color_table_id;
+    int64_t stsd_start;
+    unsigned int len;
+
+    /* The first 16 bytes of the video sample description are already
+     * read in ff_mov_read_stsd_entries() */
+    stsd_start = avio_tell(pb) - 16;
 
     avio_rb16(pb); /* version */
     avio_rb16(pb); /* revision level */
@@ -1553,74 +1799,11 @@ static void mov_parse_stsd_video(MOVContext *c, AVIOContext *pb,
         st->codec->codec_id = AV_CODEC_ID_FLV1;
 
     st->codec->bits_per_coded_sample = avio_rb16(pb); /* depth */
-    color_table_id = avio_rb16(pb); /* colortable id */
-    av_log(c->fc, AV_LOG_TRACE, "depth %d, ctab id %d\n",
-            st->codec->bits_per_coded_sample, color_table_id);
-    /* figure out the palette situation */
-    color_depth     = st->codec->bits_per_coded_sample & 0x1F;
-    color_greyscale = st->codec->bits_per_coded_sample & 0x20;
-    /* Do not create a greyscale palette for cinepak */
-    if (color_greyscale && st->codec->codec_id == AV_CODEC_ID_CINEPAK)
-        return;
 
-    /* if the depth is 2, 4, or 8 bpp, file is palettized */
-    if ((color_depth == 2) || (color_depth == 4) || (color_depth == 8)) {
-        /* for palette traversal */
-        unsigned int color_start, color_count, color_end;
-        unsigned char a, r, g, b;
-
-        if (color_greyscale) {
-            int color_index, color_dec;
-            /* compute the greyscale palette */
-            st->codec->bits_per_coded_sample = color_depth;
-            color_count = 1 << color_depth;
-            color_index = 255;
-            color_dec   = 256 / (color_count - 1);
-            for (j = 0; j < color_count; j++) {
-                r = g = b = color_index;
-                sc->palette[j] = (0xFFU << 24) | (r << 16) | (g << 8) | (b);
-                color_index -= color_dec;
-                if (color_index < 0)
-                    color_index = 0;
-            }
-        } else if (color_table_id) {
-            const uint8_t *color_table;
-            /* if flag bit 3 is set, use the default palette */
-            color_count = 1 << color_depth;
-            if (color_depth == 2)
-                color_table = ff_qt_default_palette_4;
-            else if (color_depth == 4)
-                color_table = ff_qt_default_palette_16;
-            else
-                color_table = ff_qt_default_palette_256;
+    avio_seek(pb, stsd_start, SEEK_SET);
 
-            for (j = 0; j < color_count; j++) {
-                r = color_table[j * 3 + 0];
-                g = color_table[j * 3 + 1];
-                b = color_table[j * 3 + 2];
-                sc->palette[j] = (0xFFU << 24) | (r << 16) | (g << 8) | (b);
-            }
-        } else {
-            /* load the palette from the file */
-            color_start = avio_rb32(pb);
-            color_count = avio_rb16(pb);
-            color_end   = avio_rb16(pb);
-            if ((color_start <= 255) && (color_end <= 255)) {
-                for (j = color_start; j <= color_end; j++) {
-                    /* each A, R, G, or B component is 16 bits;
-                        * only use the top 8 bits */
-                    a = avio_r8(pb);
-                    avio_r8(pb);
-                    r = avio_r8(pb);
-                    avio_r8(pb);
-                    g = avio_r8(pb);
-                    avio_r8(pb);
-                    b = avio_r8(pb);
-                    avio_r8(pb);
-                    sc->palette[j] = (a << 24 ) | (r << 16) | (g << 8) | (b);
-                }
-            }
-        }
+    if (ff_get_qtpalette(st->codec->codec_id, pb, sc->palette)) {
+        st->codec->bits_per_coded_sample &= 0x1F;
         sc->has_palette = 1;
     }
 }
@@ -1669,6 +1852,22 @@ static void mov_parse_stsd_audio(MOVContext *c, AVIOContext *pb,
                     ff_mov_get_lpcm_codec_id(st->codec->bits_per_coded_sample,
                                              flags);
         }
+        if (version == 0 || (version == 1 && sc->audio_cid != -2)) {
+            /* can't correctly handle variable sized packet as audio unit */
+            switch (st->codec->codec_id) {
+            case AV_CODEC_ID_MP2:
+            case AV_CODEC_ID_MP3:
+                st->need_parsing = AVSTREAM_PARSE_FULL;
+                break;
+            }
+        }
+    }
+
+    if (sc->format == 0) {
+        if (st->codec->bits_per_coded_sample == 8)
+            st->codec->codec_id = mov_codec_id(st, MKTAG('r','a','w',' '));
+        else if (st->codec->bits_per_coded_sample == 16)
+            st->codec->codec_id = mov_codec_id(st, MKTAG('t','w','o','s'));
     }
 
     switch (st->codec->codec_id) {
@@ -1774,7 +1973,7 @@ static int mov_rewrite_dvd_sub_extradata(AVStream *st)
 
     av_freep(&st->codec->extradata);
     st->codec->extradata_size = 0;
-    st->codec->extradata = av_mallocz(strlen(buf) + FF_INPUT_BUFFER_PADDING_SIZE);
+    st->codec->extradata = av_mallocz(strlen(buf) + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!st->codec->extradata)
         return AVERROR(ENOMEM);
     st->codec->extradata_size = strlen(buf);
@@ -1802,7 +2001,7 @@ static int mov_parse_stsd_data(MOVContext *c, AVIOContext *pb,
             val = AV_RB32(st->codec->extradata + 4);
             tmcd_ctx->tmcd_flags = val;
             if (val & 1)
-                st->codec->flags2 |= CODEC_FLAG2_DROP_FRAME_TIMECODE;
+                st->codec->flags2 |= AV_CODEC_FLAG2_DROP_FRAME_TIMECODE;
             st->codec->time_base.den = st->codec->extradata[16]; /* number of frame */
             st->codec->time_base.num = 1;
             /* adjust for per frame dur in counter mode */
@@ -1889,7 +2088,6 @@ static int mov_finalize_stsd_codec(MOVContext *c, AVIOContext *pb,
     case AV_CODEC_ID_MP3:
         /* force type after stsd for m1a hdlr */
         st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
-        st->need_parsing      = AVSTREAM_PARSE_FULL;
         break;
     case AV_CODEC_ID_GSM:
     case AV_CODEC_ID_ADPCM_MS:
@@ -1982,12 +2180,14 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
 
         sc->pseudo_stream_id = st->codec->codec_tag ? -1 : pseudo_stream_id;
         sc->dref_id= dref_id;
+        sc->format = format;
 
         id = mov_codec_id(st, format);
 
-        av_log(c->fc, AV_LOG_TRACE, "size=%"PRId64" 4CC= %c%c%c%c codec_type=%d\n", size,
+        av_log(c->fc, AV_LOG_TRACE,
+               "size=%"PRId64" 4CC= %c%c%c%c/0x%08x codec_type=%d\n", size,
                 (format >> 0) & 0xff, (format >> 8) & 0xff, (format >> 16) & 0xff,
-                (format >> 24) & 0xff, st->codec->codec_type);
+                (format >> 24) & 0xff, format, st->codec->codec_type);
 
         if (st->codec->codec_type==AVMEDIA_TYPE_VIDEO) {
             st->codec->codec_id = id;
@@ -2211,7 +2411,7 @@ static int mov_read_stsz(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     num_bytes = (entries*field_size+4)>>3;
 
-    buf = av_malloc(num_bytes+FF_INPUT_BUFFER_PADDING_SIZE);
+    buf = av_malloc(num_bytes+AV_INPUT_BUFFER_PADDING_SIZE);
     if (!buf) {
         av_freep(&sc->sample_sizes);
         return AVERROR(ENOMEM);
@@ -2314,6 +2514,10 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 static void mov_update_dts_shift(MOVStreamContext *sc, int duration)
 {
     if (duration < 0) {
+        if (duration == INT_MIN) {
+            av_log(NULL, AV_LOG_WARNING, "mov_update_dts_shift(): dts_shift set to %d\n", INT_MAX);
+            duration++;
+        }
         sc->dts_shift = FFMAX(sc->dts_shift, -duration);
     }
 }
@@ -2354,7 +2558,7 @@ static int mov_read_ctts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         av_log(c->fc, AV_LOG_TRACE, "count=%d, duration=%d\n",
                 count, duration);
 
-        if (FFABS(duration) > (1<<28) && i+2<entries) {
+        if (FFNABS(duration) < -(1<<28) && i+2<entries) {
             av_log(c->fc, AV_LOG_WARNING, "CTTS invalid\n");
             av_freep(&sc->ctts_data);
             sc->ctts_count = 0;
@@ -2689,17 +2893,40 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
     }
 }
 
-static int mov_open_dref(MOVContext *c, AVIOContext **pb, const char *src, MOVDref *ref,
-                         AVIOInterruptCB *int_cb)
-{
-    AVOpenCallback open_func = c->fc->open_cb;
+static int test_same_origin(const char *src, const char *ref) {
+    char src_proto[64];
+    char ref_proto[64];
+    char src_auth[256];
+    char ref_auth[256];
+    char src_host[256];
+    char ref_host[256];
+    int src_port=-1;
+    int ref_port=-1;
 
-    if (!open_func)
-        open_func = ffio_open2_wrapper;
+    av_url_split(src_proto, sizeof(src_proto), src_auth, sizeof(src_auth), src_host, sizeof(src_host), &src_port, NULL, 0, src);
+    av_url_split(ref_proto, sizeof(ref_proto), ref_auth, sizeof(ref_auth), ref_host, sizeof(ref_host), &ref_port, NULL, 0, ref);
 
+    if (strlen(src) == 0) {
+        return -1;
+    } else if (strlen(src_auth) + 1 >= sizeof(src_auth) ||
+        strlen(ref_auth) + 1 >= sizeof(ref_auth) ||
+        strlen(src_host) + 1 >= sizeof(src_host) ||
+        strlen(ref_host) + 1 >= sizeof(ref_host)) {
+        return 0;
+    } else if (strcmp(src_proto, ref_proto) ||
+               strcmp(src_auth, ref_auth) ||
+               strcmp(src_host, ref_host) ||
+               src_port != ref_port) {
+        return 0;
+    } else
+        return 1;
+}
+
+static int mov_open_dref(MOVContext *c, AVIOContext **pb, const char *src, MOVDref *ref)
+{
     /* try relative path, we do not try the absolute because it can leak information about our
        system to an attacker */
-    if (ref->nlvl_to > 0 && ref->nlvl_from > 0 && ref->path[0] != '/') {
+    if (ref->nlvl_to > 0 && ref->nlvl_from > 0) {
         char filename[1025];
         const char *src_path;
         int i, l;
@@ -2729,22 +2956,33 @@ static int mov_open_dref(MOVContext *c, AVIOContext **pb, const char *src, MOVDr
                 av_strlcat(filename, "../", sizeof(filename));
 
             av_strlcat(filename, ref->path + l + 1, sizeof(filename));
-            if (!c->use_absolute_path && !c->fc->open_cb)
-                if(strstr(ref->path + l + 1, "..") || ref->nlvl_from > 1)
+            if (!c->use_absolute_path) {
+                int same_origin = test_same_origin(src, filename);
+
+                if (!same_origin) {
+                    av_log(c->fc, AV_LOG_ERROR,
+                        "Reference with mismatching origin, %s not tried for security reasons, "
+                        "set demuxer option use_absolute_path to allow it anyway\n",
+                        ref->path);
+                    return AVERROR(ENOENT);
+                }
+
+                if(strstr(ref->path + l + 1, "..") ||
+                   strstr(ref->path + l + 1, ":") ||
+                   (ref->nlvl_from > 1 && same_origin < 0) ||
+                   (filename[0] == '/' && src_path == src))
                     return AVERROR(ENOENT);
+            }
 
             if (strlen(filename) + 1 == sizeof(filename))
                 return AVERROR(ENOENT);
-            if (!open_func(c->fc, pb, filename, AVIO_FLAG_READ, int_cb, NULL))
+            if (!c->fc->io_open(c->fc, pb, filename, AVIO_FLAG_READ, NULL))
                 return 0;
         }
     } else if (c->use_absolute_path) {
         av_log(c->fc, AV_LOG_WARNING, "Using absolute path on user request, "
                "this is a possible security issue\n");
-        if (!open_func(c->fc, pb, ref->path, AVIO_FLAG_READ, int_cb, NULL))
-            return 0;
-    } else if (c->fc->open_cb) {
-        if (!open_func(c->fc, pb, ref->path, AVIO_FLAG_READ, int_cb, NULL))
+        if (!c->fc->io_open(c->fc, pb, ref->path, AVIO_FLAG_READ, NULL))
             return 0;
     } else {
         av_log(c->fc, AV_LOG_ERROR,
@@ -2781,10 +3019,13 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     st->priv_data = sc;
     st->codec->codec_type = AVMEDIA_TYPE_DATA;
     sc->ffindex = st->index;
+    c->trak_index = st->index;
 
     if ((ret = mov_read_default(c, pb, atom)) < 0)
         return ret;
 
+    c->trak_index = -1;
+
     /* sanity checks */
     if (sc->chunk_count && (!sc->stts_count || !sc->stsc_count ||
                             (!sc->sample_size && !sc->sample_count))) {
@@ -2801,13 +3042,22 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 
     if (sc->dref_id-1 < sc->drefs_count && sc->drefs[sc->dref_id-1].path) {
         MOVDref *dref = &sc->drefs[sc->dref_id - 1];
-        if (mov_open_dref(c, &sc->pb, c->fc->filename, dref,
-                          &c->fc->interrupt_callback) < 0)
-            av_log(c->fc, AV_LOG_ERROR,
-                   "stream %d, error opening alias: path='%s', dir='%s', "
-                   "filename='%s', volume='%s', nlvl_from=%d, nlvl_to=%d\n",
+        if (c->enable_drefs) {
+            if (mov_open_dref(c, &sc->pb, c->fc->filename, dref) < 0)
+                av_log(c->fc, AV_LOG_ERROR,
+                       "stream %d, error opening alias: path='%s', dir='%s', "
+                       "filename='%s', volume='%s', nlvl_from=%d, nlvl_to=%d\n",
+                       st->index, dref->path, dref->dir, dref->filename,
+                       dref->volume, dref->nlvl_from, dref->nlvl_to);
+        } else {
+            av_log(c->fc, AV_LOG_WARNING,
+                   "Skipped opening external track: "
+                   "stream %d, alias: path='%s', dir='%s', "
+                   "filename='%s', volume='%s', nlvl_from=%d, nlvl_to=%d."
+                   "Set enable_drefs to allow this.\n",
                    st->index, dref->path, dref->dir, dref->filename,
                    dref->volume, dref->nlvl_from, dref->nlvl_to);
+        }
     } else {
         sc->pb = c->fc->pb;
         sc->pb_is_copied = 1;
@@ -2851,6 +3101,13 @@ static int mov_read_trak(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         break;
     }
 
+    // If the duration of the mp3 packets is not constant, then they could need a parser
+    if (st->codec->codec_id == AV_CODEC_ID_MP3
+        && sc->stts_count > 3
+        && sc->stts_count*10 > st->nb_frames
+        && sc->time_scale == st->codec->sample_rate) {
+            st->need_parsing = AVSTREAM_PARSE_FULL;
+    }
     /* Do not need those anymore. */
     av_freep(&sc->chunk_offsets);
     av_freep(&sc->stsc_data);
@@ -2873,6 +3130,48 @@ static int mov_read_ilst(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return ret;
 }
 
+static int mov_read_keys(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    uint32_t count;
+    uint32_t i;
+
+    if (atom.size < 8)
+        return 0;
+
+    avio_skip(pb, 4);
+    count = avio_rb32(pb);
+    if (count > UINT_MAX / sizeof(*c->meta_keys)) {
+        av_log(c->fc, AV_LOG_ERROR,
+               "The 'keys' atom with the invalid key count: %d\n", count);
+        return AVERROR_INVALIDDATA;
+    }
+
+    c->meta_keys_count = count + 1;
+    c->meta_keys = av_mallocz(c->meta_keys_count * sizeof(*c->meta_keys));
+    if (!c->meta_keys)
+        return AVERROR(ENOMEM);
+
+    for (i = 1; i <= count; ++i) {
+        uint32_t key_size = avio_rb32(pb);
+        uint32_t type = avio_rl32(pb);
+        if (key_size < 8) {
+            av_log(c->fc, AV_LOG_ERROR,
+                   "The key# %d in meta has invalid size: %d\n", i, key_size);
+            return AVERROR_INVALIDDATA;
+        }
+        key_size -= 8;
+        if (type != MKTAG('m','d','t','a')) {
+            avio_skip(pb, key_size);
+        }
+        c->meta_keys[i] = av_mallocz(key_size + 1);
+        if (!c->meta_keys[i])
+            return AVERROR(ENOMEM);
+        avio_read(pb, c->meta_keys[i], key_size);
+    }
+
+    return 0;
+}
+
 static int mov_read_custom_2plus(MOVContext *c, AVIOContext *pb, int size)
 {
     int64_t end = avio_tell(pb) + size;
@@ -3080,9 +3379,8 @@ static int mov_read_tkhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     if (width && height && sc->display_matrix) {
         double disp_transform[2];
 
-#define SQR(a) ((a)*(double)(a))
         for (i = 0; i < 2; i++)
-            disp_transform[i] = sqrt(SQR(display_matrix[i][0]) + SQR(display_matrix[i][1]));
+            disp_transform[i] = hypot(display_matrix[i][0], display_matrix[i][1]);
 
         if (disp_transform[0] > 0       && disp_transform[1] > 0 &&
             disp_transform[0] < (1<<24) && disp_transform[1] < (1<<24) &&
@@ -3099,7 +3397,7 @@ static int mov_read_tfhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     MOVFragment *frag = &c->fragment;
     MOVTrackExt *trex = NULL;
     MOVFragmentIndex* index = NULL;
-    int flags, track_id, i;
+    int flags, track_id, i, found = 0;
 
     avio_r8(pb); /* version */
     flags = avio_rb24(pb);
@@ -3117,15 +3415,6 @@ static int mov_read_tfhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         av_log(c->fc, AV_LOG_ERROR, "could not find corresponding trex\n");
         return AVERROR_INVALIDDATA;
     }
-    for (i = 0; i < c->fragment_index_count; i++) {
-        MOVFragmentIndex* candidate = c->fragment_index_data[i];
-        if (candidate->track_id == frag->track_id) {
-            av_log(c->fc, AV_LOG_DEBUG,
-                   "found fragment index for track %u\n", frag->track_id);
-            index = candidate;
-            break;
-        }
-    }
 
     frag->base_data_offset = flags & MOV_TFHD_BASE_DATA_OFFSET ?
                              avio_rb64(pb) : flags & MOV_TFHD_DEFAULT_BASE_IS_MOOF ?
@@ -3139,24 +3428,33 @@ static int mov_read_tfhd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     frag->flags    = flags & MOV_TFHD_DEFAULT_FLAGS ?
                      avio_rb32(pb) : trex->flags;
     frag->time     = AV_NOPTS_VALUE;
-    if (index) {
-        int i, found = 0;
-        for (i = index->current_item; i < index->item_count; i++) {
-            if (frag->implicit_offset == index->items[i].moof_offset) {
-                av_log(c->fc, AV_LOG_DEBUG, "found fragment index entry "
-                        "for track %u and moof_offset %"PRId64"\n",
-                        frag->track_id, index->items[i].moof_offset);
-                frag->time = index->items[i].time;
-                index->current_item = i + 1;
-                found = 1;
+    for (i = 0; i < c->fragment_index_count; i++) {
+        int j;
+        MOVFragmentIndex* candidate = c->fragment_index_data[i];
+        if (candidate->track_id == frag->track_id) {
+            av_log(c->fc, AV_LOG_DEBUG,
+                   "found fragment index for track %u\n", frag->track_id);
+            index = candidate;
+            for (j = index->current_item; j < index->item_count; j++) {
+                if (frag->implicit_offset == index->items[j].moof_offset) {
+                    av_log(c->fc, AV_LOG_DEBUG, "found fragment index entry "
+                            "for track %u and moof_offset %"PRId64"\n",
+                            frag->track_id, index->items[j].moof_offset);
+                    frag->time = index->items[j].time;
+                    index->current_item = j + 1;
+                    found = 1;
+                    break;
+                }
             }
-        }
-        if (!found) {
-            av_log(c->fc, AV_LOG_WARNING, "track %u has a fragment index "
-                   "but it doesn't have an (in-order) entry for moof_offset "
-                   "%"PRId64"\n", frag->track_id, frag->implicit_offset);
+            if (found)
+                break;
         }
     }
+    if (index && !found) {
+        av_log(c->fc, AV_LOG_DEBUG, "track %u has a fragment index but "
+               "it doesn't have an (in-order) entry for moof_offset "
+               "%"PRId64"\n", frag->track_id, frag->implicit_offset);
+    }
     av_log(c->fc, AV_LOG_TRACE, "frag flags 0x%x\n", frag->flags);
     return 0;
 }
@@ -3233,7 +3531,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     int64_t dts;
     int data_offset = 0;
     unsigned entries, first_sample_flags = frag->flags;
-    int flags, distance, i, found_keyframe = 0, err;
+    int flags, distance, i, err;
 
     for (i = 0; i < c->fc->nb_streams; i++) {
         if (c->fc->streams[i]->id == frag->track_id) {
@@ -3311,7 +3609,7 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                 }
                 av_log(c->fc, AV_LOG_DEBUG, "calculated into dts %"PRId64"\n", dts);
             } else {
-                dts = frag->time;
+                dts = frag->time - sc->time_offset;
                 av_log(c->fc, AV_LOG_DEBUG, "found frag time %"PRId64
                         ", using it for dts\n", dts);
             }
@@ -3320,8 +3618,8 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         sc->ctts_count++;
         if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO)
             keyframe = 1;
-        else if (!found_keyframe)
-            keyframe = found_keyframe =
+        else
+            keyframe =
                 !(sample_flags & (MOV_FRAG_SAMPLE_FLAG_IS_NON_SYNC |
                                   MOV_FRAG_SAMPLE_FLAG_DEPENDS_YES));
         if (keyframe)
@@ -3346,7 +3644,106 @@ static int mov_read_trun(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return AVERROR_EOF;
 
     frag->implicit_offset = offset;
-    st->duration = sc->track_end = dts + sc->time_offset;
+
+    sc->track_end = dts + sc->time_offset;
+    if (st->duration < sc->track_end)
+        st->duration = sc->track_end;
+
+    return 0;
+}
+
+static int mov_read_sidx(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    int64_t offset = avio_tell(pb) + atom.size, pts;
+    uint8_t version;
+    unsigned i, track_id;
+    AVStream *st = NULL;
+    MOVStreamContext *sc;
+    MOVFragmentIndex *index = NULL;
+    MOVFragmentIndex **tmp;
+    AVRational timescale;
+
+    version = avio_r8(pb);
+    if (version > 1) {
+        avpriv_request_sample(c->fc, "sidx version %u", version);
+        return 0;
+    }
+
+    avio_rb24(pb); // flags
+
+    track_id = avio_rb32(pb); // Reference ID
+    for (i = 0; i < c->fc->nb_streams; i++) {
+        if (c->fc->streams[i]->id == track_id) {
+            st = c->fc->streams[i];
+            break;
+        }
+    }
+    if (!st) {
+        av_log(c->fc, AV_LOG_WARNING, "could not find corresponding track id %d\n", track_id);
+        return 0;
+    }
+
+    sc = st->priv_data;
+
+    timescale = av_make_q(1, avio_rb32(pb));
+
+    if (version == 0) {
+        pts = avio_rb32(pb);
+        offset += avio_rb32(pb);
+    } else {
+        pts = avio_rb64(pb);
+        offset += avio_rb64(pb);
+    }
+
+    avio_rb16(pb); // reserved
+
+    index = av_mallocz(sizeof(MOVFragmentIndex));
+    if (!index)
+        return AVERROR(ENOMEM);
+
+    index->track_id = track_id;
+
+    index->item_count = avio_rb16(pb);
+    index->items = av_mallocz_array(index->item_count, sizeof(MOVFragmentIndexItem));
+
+    if (!index->items) {
+        av_freep(&index);
+        return AVERROR(ENOMEM);
+    }
+
+    for (i = 0; i < index->item_count; i++) {
+        uint32_t size = avio_rb32(pb);
+        uint32_t duration = avio_rb32(pb);
+        if (size & 0x80000000) {
+            avpriv_request_sample(c->fc, "sidx reference_type 1");
+            av_freep(&index->items);
+            av_freep(&index);
+            return AVERROR_PATCHWELCOME;
+        }
+        avio_rb32(pb); // sap_flags
+        index->items[i].moof_offset = offset;
+        index->items[i].time = av_rescale_q(pts, st->time_base, timescale);
+        offset += size;
+        pts += duration;
+    }
+
+    st->duration = sc->track_end = pts;
+
+    tmp = av_realloc_array(c->fragment_index_data,
+                           c->fragment_index_count + 1,
+                           sizeof(MOVFragmentIndex*));
+    if (!tmp) {
+        av_freep(&index->items);
+        av_freep(&index);
+        return AVERROR(ENOMEM);
+    }
+
+    c->fragment_index_data = tmp;
+    c->fragment_index_data[c->fragment_index_count++] = index;
+
+    if (offset == avio_size(pb))
+        c->fragment_index_complete = 1;
+
     return 0;
 }
 
@@ -3488,6 +3885,10 @@ static int mov_read_uuid(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         0xa5, 0xd4, 0x0b, 0x30, 0xe8, 0x14, 0x11, 0xdd,
         0xba, 0x2f, 0x08, 0x00, 0x20, 0x0c, 0x9a, 0x66
     };
+    static const uint8_t uuid_xmp[] = {
+        0xbe, 0x7a, 0xcf, 0xcb, 0x97, 0xa9, 0x42, 0xe8,
+        0x9c, 0x71, 0x99, 0x94, 0x91, 0xe3, 0xaf, 0xac
+    };
 
     if (atom.size < sizeof(uuid) || atom.size == INT64_MAX)
         return AVERROR_INVALIDDATA;
@@ -3541,6 +3942,27 @@ static int mov_read_uuid(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             }
         }
 
+        av_free(buffer);
+    } else if (!memcmp(uuid, uuid_xmp, sizeof(uuid))) {
+        uint8_t *buffer;
+        size_t len = atom.size - sizeof(uuid);
+
+        buffer = av_mallocz(len + 1);
+        if (!buffer) {
+            return AVERROR(ENOMEM);
+        }
+        ret = avio_read(pb, buffer, len);
+        if (ret < 0) {
+            av_free(buffer);
+            return ret;
+        } else if (ret != len) {
+            av_free(buffer);
+            return AVERROR_INVALIDDATA;
+        }
+        if (c->export_xmp) {
+            buffer[len] = '\0';
+            av_dict_set(&c->fc->metadata, "xmp", buffer, 0);
+        }
         av_free(buffer);
     }
     return 0;
@@ -3568,6 +3990,166 @@ static int mov_read_free(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return 0;
 }
 
+static int mov_read_frma(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    uint32_t format = avio_rl32(pb);
+    MOVStreamContext *sc;
+    enum AVCodecID id;
+    AVStream *st;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams - 1];
+    sc = st->priv_data;
+
+    switch (sc->format)
+    {
+    case MKTAG('e','n','c','v'):        // encrypted video
+    case MKTAG('e','n','c','a'):        // encrypted audio
+        id = mov_codec_id(st, format);
+        if (st->codec->codec_id != AV_CODEC_ID_NONE &&
+            st->codec->codec_id != id) {
+            av_log(c->fc, AV_LOG_WARNING,
+                   "ignoring 'frma' atom of '%.4s', stream has codec id %d\n",
+                   (char*)&format, st->codec->codec_id);
+            break;
+        }
+
+        st->codec->codec_id = id;
+        sc->format = format;
+        break;
+
+    default:
+        if (format != sc->format) {
+            av_log(c->fc, AV_LOG_WARNING,
+                   "ignoring 'frma' atom of '%.4s', stream format is '%.4s'\n",
+                   (char*)&format, (char*)&sc->format);
+        }
+        break;
+    }
+
+    return 0;
+}
+
+static int mov_read_senc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVStream *st;
+    MOVStreamContext *sc;
+    size_t auxiliary_info_size;
+
+    if (c->decryption_key_len == 0 || c->fc->nb_streams < 1)
+        return 0;
+
+    st = c->fc->streams[c->fc->nb_streams - 1];
+    sc = st->priv_data;
+
+    if (sc->cenc.aes_ctr) {
+        av_log(c->fc, AV_LOG_ERROR, "duplicate senc atom\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    avio_r8(pb); /* version */
+    sc->cenc.use_subsamples = avio_rb24(pb) & 0x02; /* flags */
+
+    avio_rb32(pb);        /* entries */
+
+    if (atom.size < 8) {
+        av_log(c->fc, AV_LOG_ERROR, "senc atom size %"PRId64" too small\n", atom.size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* save the auxiliary info as is */
+    auxiliary_info_size = atom.size - 8;
+
+    sc->cenc.auxiliary_info = av_malloc(auxiliary_info_size);
+    if (!sc->cenc.auxiliary_info) {
+        return AVERROR(ENOMEM);
+    }
+
+    sc->cenc.auxiliary_info_end = sc->cenc.auxiliary_info + auxiliary_info_size;
+
+    sc->cenc.auxiliary_info_pos = sc->cenc.auxiliary_info;
+
+    if (avio_read(pb, sc->cenc.auxiliary_info, auxiliary_info_size) != auxiliary_info_size) {
+        av_log(c->fc, AV_LOG_ERROR, "failed to read the auxiliary info");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* initialize the cipher */
+    sc->cenc.aes_ctr = av_aes_ctr_alloc();
+    if (!sc->cenc.aes_ctr) {
+        return AVERROR(ENOMEM);
+    }
+
+    return av_aes_ctr_init(sc->cenc.aes_ctr, c->decryption_key);
+}
+
+static int cenc_filter(MOVContext *c, MOVStreamContext *sc, uint8_t *input, int size)
+{
+    uint32_t encrypted_bytes;
+    uint16_t subsample_count;
+    uint16_t clear_bytes;
+    uint8_t* input_end = input + size;
+
+    /* read the iv */
+    if (AES_CTR_IV_SIZE > sc->cenc.auxiliary_info_end - sc->cenc.auxiliary_info_pos) {
+        av_log(c->fc, AV_LOG_ERROR, "failed to read iv from the auxiliary info\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    av_aes_ctr_set_iv(sc->cenc.aes_ctr, sc->cenc.auxiliary_info_pos);
+    sc->cenc.auxiliary_info_pos += AES_CTR_IV_SIZE;
+
+    if (!sc->cenc.use_subsamples)
+    {
+        /* decrypt the whole packet */
+        av_aes_ctr_crypt(sc->cenc.aes_ctr, input, input, size);
+        return 0;
+    }
+
+    /* read the subsample count */
+    if (sizeof(uint16_t) > sc->cenc.auxiliary_info_end - sc->cenc.auxiliary_info_pos) {
+        av_log(c->fc, AV_LOG_ERROR, "failed to read subsample count from the auxiliary info\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    subsample_count = AV_RB16(sc->cenc.auxiliary_info_pos);
+    sc->cenc.auxiliary_info_pos += sizeof(uint16_t);
+
+    for (; subsample_count > 0; subsample_count--)
+    {
+        if (6 > sc->cenc.auxiliary_info_end - sc->cenc.auxiliary_info_pos) {
+            av_log(c->fc, AV_LOG_ERROR, "failed to read subsample from the auxiliary info\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        /* read the number of clear / encrypted bytes */
+        clear_bytes = AV_RB16(sc->cenc.auxiliary_info_pos);
+        sc->cenc.auxiliary_info_pos += sizeof(uint16_t);
+        encrypted_bytes = AV_RB32(sc->cenc.auxiliary_info_pos);
+        sc->cenc.auxiliary_info_pos += sizeof(uint32_t);
+
+        if ((uint64_t)clear_bytes + encrypted_bytes > input_end - input) {
+            av_log(c->fc, AV_LOG_ERROR, "subsample size exceeds the packet size left\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        /* skip the clear bytes */
+        input += clear_bytes;
+
+        /* decrypt the encrypted bytes */
+        av_aes_ctr_crypt(sc->cenc.aes_ctr, input, input, encrypted_bytes);
+        input += encrypted_bytes;
+    }
+
+    if (input < input_end) {
+        av_log(c->fc, AV_LOG_ERROR, "leftover packet bytes after subsample processing\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
 static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('A','C','L','R'), mov_read_aclr },
 { MKTAG('A','P','R','G'), mov_read_avid },
@@ -3585,6 +4167,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('e','l','s','t'), mov_read_elst },
 { MKTAG('e','n','d','a'), mov_read_enda },
 { MKTAG('f','i','e','l'), mov_read_fiel },
+{ MKTAG('a','d','r','m'), mov_read_adrm },
 { MKTAG('f','t','y','p'), mov_read_ftyp },
 { MKTAG('g','l','b','l'), mov_read_glbl },
 { MKTAG('h','d','l','r'), mov_read_hdlr },
@@ -3603,6 +4186,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('a','l','a','c'), mov_read_alac }, /* alac specific atom */
 { MKTAG('a','v','c','C'), mov_read_glbl },
 { MKTAG('p','a','s','p'), mov_read_pasp },
+{ MKTAG('s','i','d','x'), mov_read_sidx },
 { MKTAG('s','t','b','l'), mov_read_default },
 { MKTAG('s','t','c','o'), mov_read_stco },
 { MKTAG('s','t','p','s'), mov_read_stps },
@@ -3628,6 +4212,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('e','s','d','s'), mov_read_esds },
 { MKTAG('d','a','c','3'), mov_read_dac3 }, /* AC-3 info */
 { MKTAG('d','e','c','3'), mov_read_dec3 }, /* EAC-3 info */
+{ MKTAG('d','d','t','s'), mov_read_ddts }, /* DTS audio descriptor */
 { MKTAG('w','i','d','e'), mov_read_wide }, /* place holder */
 { MKTAG('w','f','e','x'), mov_read_wfex },
 { MKTAG('c','m','o','v'), mov_read_cmov },
@@ -3639,6 +4224,9 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('C','i','n', 0x8e), mov_read_targa_y216 },
 { MKTAG('f','r','e','e'), mov_read_free },
 { MKTAG('-','-','-','-'), mov_read_custom },
+{ MKTAG('s','i','n','f'), mov_read_default },
+{ MKTAG('f','r','m','a'), mov_read_frma },
+{ MKTAG('s','e','n','c'), mov_read_senc },
 { 0, NULL }
 };
 
@@ -3715,6 +4303,14 @@ static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                        atom.type == MKTAG('i','l','s','t')))
             parse = mov_read_udta_string;
 
+        // Supports parsing the QuickTime Metadata Keys.
+        // https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/Metadata/Metadata.html
+        if (!parse && c->found_hdlr_mdta &&
+            atom.type == MKTAG('m','e','t','a') &&
+            a.type == MKTAG('k','e','y','s')) {
+            parse = mov_read_keys;
+        }
+
         if (!parse) { /* skip leaf atoms data */
             avio_skip(pb, a.size);
         } else {
@@ -3726,9 +4322,9 @@ static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom)
                 return err;
             }
             if (c->found_moov && c->found_mdat &&
-                ((!pb->seekable || c->fc->flags & AVFMT_FLAG_IGNIDX) ||
+                ((!pb->seekable || c->fc->flags & AVFMT_FLAG_IGNIDX || c->fragment_index_complete) ||
                  start_pos + a.size == avio_size(pb))) {
-                if (!pb->seekable || c->fc->flags & AVFMT_FLAG_IGNIDX)
+                if (!pb->seekable || c->fc->flags & AVFMT_FLAG_IGNIDX || c->fragment_index_complete)
                     c->next_root_atom = start_pos + a.size;
                 c->atom_depth --;
                 return 0;
@@ -3977,7 +4573,7 @@ static int mov_read_close(AVFormatContext *s)
         sc->drefs_count = 0;
 
         if (!sc->pb_is_copied)
-            avio_closep(&sc->pb);
+            ff_format_io_close(s, &sc->pb);
 
         sc->pb = NULL;
         av_freep(&sc->chunk_offsets);
@@ -3989,6 +4585,9 @@ static int mov_read_close(AVFormatContext *s)
         av_freep(&sc->elst_data);
         av_freep(&sc->rap_group);
         av_freep(&sc->display_matrix);
+
+        av_freep(&sc->cenc.auxiliary_info);
+        av_aes_ctr_free(sc->cenc.aes_ctr);
     }
 
     if (mov->dv_demux) {
@@ -3996,6 +4595,13 @@ static int mov_read_close(AVFormatContext *s)
         mov->dv_fctx = NULL;
     }
 
+    if (mov->meta_keys) {
+        for (i = 1; i < mov->meta_keys_count; i++) {
+            av_freep(&mov->meta_keys[i]);
+        }
+        av_freep(&mov->meta_keys);
+    }
+
     av_freep(&mov->trex_data);
     av_freep(&mov->bitrates);
 
@@ -4006,6 +4612,8 @@ static int mov_read_close(AVFormatContext *s)
     }
     av_freep(&mov->fragment_index_data);
 
+    av_freep(&mov->aes_decrypt);
+
     return 0;
 }
 
@@ -4157,7 +4765,14 @@ static int mov_read_header(AVFormatContext *s)
     MOVAtom atom = { AV_RL32("root") };
     int i;
 
+    if (mov->decryption_key_len != 0 && mov->decryption_key_len != AES_CTR_KEY_SIZE) {
+        av_log(s, AV_LOG_ERROR, "Invalid decryption key len %d expected %d\n",
+            mov->decryption_key_len, AES_CTR_KEY_SIZE);
+        return AVERROR(EINVAL);
+    }
+
     mov->fc = s;
+    mov->trak_index = -1;
     /* .mov and .mp4 aren't streamable anyway (only progressive download if moov is before mdat) */
     if (pb->seekable)
         atom.size = avio_size(pb);
@@ -4182,7 +4797,7 @@ static int mov_read_header(AVFormatContext *s)
     av_log(mov->fc, AV_LOG_TRACE, "on_parse_exit_offset=%"PRId64"\n", avio_tell(pb));
 
     if (pb->seekable) {
-        if (mov->chapter_track > 0)
+        if (mov->chapter_track > 0 && !mov->ignore_chapters)
             mov_read_chapters(s);
         for (i = 0; i < s->nb_streams; i++)
             if (s->streams[i]->codec->codec_tag == AV_RL32("tmcd"))
@@ -4230,6 +4845,13 @@ static int mov_read_header(AVFormatContext *s)
                     return err;
             }
         }
+        if (mov->handbrake_version &&
+            mov->handbrake_version <= 1000000*0 + 1000*10 + 2 &&  // 0.10.2
+            st->codec->codec_id == AV_CODEC_ID_MP3
+        ) {
+            av_log(s, AV_LOG_VERBOSE, "Forcing full parsing for mp3 stream\n");
+            st->need_parsing = AVSTREAM_PARSE_FULL;
+        }
     }
 
     if (mov->trex_data) {
@@ -4293,6 +4915,7 @@ static int mov_read_header(AVFormatContext *s)
             break;
         }
     }
+    ff_configure_buffers_for_index(s, AV_TIME_BASE);
 
     return 0;
 }
@@ -4330,6 +4953,52 @@ static int should_retry(AVIOContext *pb, int error_code) {
     return 1;
 }
 
+static int mov_switch_root(AVFormatContext *s, int64_t target)
+{
+    MOVContext *mov = s->priv_data;
+    int i, j;
+    int already_read = 0;
+
+    if (avio_seek(s->pb, target, SEEK_SET) != target) {
+        av_log(mov->fc, AV_LOG_ERROR, "root atom offset 0x%"PRIx64": partial file\n", target);
+        return AVERROR_INVALIDDATA;
+    }
+
+    mov->next_root_atom = 0;
+
+    for (i = 0; i < mov->fragment_index_count; i++) {
+        MOVFragmentIndex *index = mov->fragment_index_data[i];
+        int found = 0;
+        for (j = 0; j < index->item_count; j++) {
+            MOVFragmentIndexItem *item = &index->items[j];
+            if (found) {
+                mov->next_root_atom = item->moof_offset;
+                break; // Advance to next index in outer loop
+            } else if (item->moof_offset == target) {
+                index->current_item = FFMIN(j, index->current_item);
+                if (item->headers_read)
+                    already_read = 1;
+                item->headers_read = 1;
+                found = 1;
+            }
+        }
+        if (!found)
+            index->current_item = 0;
+    }
+
+    if (already_read)
+        return 0;
+
+    mov->found_mdat = 0;
+
+    if (mov_read_default(mov, s->pb, (MOVAtom){ AV_RL32("root"), INT64_MAX }) < 0 ||
+        avio_feof(s->pb))
+        return AVERROR_EOF;
+    av_log(s, AV_LOG_TRACE, "read fragments, offset 0x%"PRIx64"\n", avio_tell(s->pb));
+
+    return 1;
+}
+
 static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     MOVContext *mov = s->priv_data;
@@ -4340,19 +5009,11 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
     mov->fc = s;
  retry:
     sample = mov_find_next_sample(s, &st);
-    if (!sample) {
-        mov->found_mdat = 0;
+    if (!sample || (mov->next_root_atom && sample->pos > mov->next_root_atom)) {
         if (!mov->next_root_atom)
             return AVERROR_EOF;
-        if (avio_seek(s->pb, mov->next_root_atom, SEEK_SET) != mov->next_root_atom) {
-            av_log(mov->fc, AV_LOG_ERROR, "next root atom offset 0x%"PRIx64": partial file\n", mov->next_root_atom);
-            return AVERROR_INVALIDDATA;
-        }
-        mov->next_root_atom = 0;
-        if (mov_read_default(mov, s->pb, (MOVAtom){ AV_RL32("root"), INT64_MAX }) < 0 ||
-            avio_feof(s->pb))
-            return AVERROR_EOF;
-        av_log(s, AV_LOG_TRACE, "read fragments, offset 0x%"PRIx64"\n", avio_tell(s->pb));
+        if ((ret = mov_switch_root(s, mov->next_root_atom)) < 0)
+            return ret;
         goto retry;
     }
     sc = st->priv_data;
@@ -4424,6 +5085,41 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
     pkt->flags |= sample->flags & AVINDEX_KEYFRAME ? AV_PKT_FLAG_KEY : 0;
     pkt->pos = sample->pos;
 
+    if (mov->aax_mode)
+        aax_filter(pkt->data, pkt->size, mov);
+
+    if (sc->cenc.aes_ctr) {
+        ret = cenc_filter(mov, sc, pkt->data, pkt->size);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static int mov_seek_fragment(AVFormatContext *s, AVStream *st, int64_t timestamp)
+{
+    MOVContext *mov = s->priv_data;
+    int i, j;
+
+    if (!mov->fragment_index_complete)
+        return 0;
+
+    for (i = 0; i < mov->fragment_index_count; i++) {
+        if (mov->fragment_index_data[i]->track_id == st->id) {
+            MOVFragmentIndex *index = mov->fragment_index_data[i];
+            for (j = index->item_count - 1; j >= 0; j--) {
+                if (index->items[j].time <= timestamp) {
+                    if (index->items[j].headers_read)
+                        return 0;
+
+                    return mov_switch_root(s, index->items[j].moof_offset);
+                }
+            }
+        }
+    }
+
     return 0;
 }
 
@@ -4433,6 +5129,10 @@ static int mov_seek_stream(AVFormatContext *s, AVStream *st, int64_t timestamp,
     int sample, time_sample;
     int i;
 
+    int ret = mov_seek_fragment(s, st, timestamp);
+    if (ret < 0)
+        return ret;
+
     sample = av_index_search_timestamp(st, timestamp, flags);
     av_log(s, AV_LOG_TRACE, "stream %d, timestamp %"PRId64", sample %d\n", st->index, timestamp, sample);
     if (sample < 0 && st->nb_index_entries && timestamp < st->index_entries[0].timestamp)
@@ -4514,17 +5214,19 @@ static int mov_read_seek(AVFormatContext *s, int stream_index, int64_t sample_ti
 static const AVOption mov_options[] = {
     {"use_absolute_path",
         "allow using absolute path when opening alias, this is a possible security issue",
-        OFFSET(use_absolute_path), FF_OPT_TYPE_INT, {.i64 = 0},
+        OFFSET(use_absolute_path), AV_OPT_TYPE_BOOL, {.i64 = 0},
         0, 1, FLAGS},
     {"seek_streams_individually",
         "Seek each stream individually to the to the closest point",
-        OFFSET(seek_individually), AV_OPT_TYPE_INT, { .i64 = 1 },
+        OFFSET(seek_individually), AV_OPT_TYPE_BOOL, { .i64 = 1 },
+        0, 1, FLAGS},
+    {"ignore_editlist", "", OFFSET(ignore_editlist), AV_OPT_TYPE_BOOL, {.i64 = 0},
         0, 1, FLAGS},
-    {"ignore_editlist", "", OFFSET(ignore_editlist), FF_OPT_TYPE_INT, {.i64 = 0},
+    {"ignore_chapters", "", OFFSET(ignore_chapters), AV_OPT_TYPE_BOOL, {.i64 = 0},
         0, 1, FLAGS},
     {"use_mfra_for",
         "use mfra for fragment timestamps",
-        OFFSET(use_mfra_for), FF_OPT_TYPE_INT, {.i64 = FF_MOV_FLAG_MFRA_AUTO},
+        OFFSET(use_mfra_for), AV_OPT_TYPE_INT, {.i64 = FF_MOV_FLAG_MFRA_AUTO},
         -1, FF_MOV_FLAG_MFRA_PTS, FLAGS,
         "use_mfra_for"},
     {"auto", "auto", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_MFRA_AUTO}, 0, 0,
@@ -4534,9 +5236,19 @@ static const AVOption mov_options[] = {
     {"pts", "pts", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_MFRA_PTS}, 0, 0,
         FLAGS, "use_mfra_for" },
     { "export_all", "Export unrecognized metadata entries", OFFSET(export_all),
-        AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, .flags = FLAGS },
+        AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, .flags = FLAGS },
     { "export_xmp", "Export full XMP metadata", OFFSET(export_xmp),
-        AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, .flags = FLAGS },
+        AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, .flags = FLAGS },
+    { "activation_bytes", "Secret bytes for Audible AAX files", OFFSET(activation_bytes),
+        AV_OPT_TYPE_BINARY, .flags = AV_OPT_FLAG_DECODING_PARAM },
+    { "audible_fixed_key", // extracted from libAAX_SDK.so and AAXSDKWin.dll files!
+        "Fixed key used for handling Audible AAX files", OFFSET(audible_fixed_key),
+        AV_OPT_TYPE_BINARY, {.str="77214d4b196a87cd520045fd20a51d67"},
+        .flags = AV_OPT_FLAG_DECODING_PARAM },
+    { "decryption_key", "The media decryption key (hex)", OFFSET(decryption_key), AV_OPT_TYPE_BINARY, .flags = AV_OPT_FLAG_DECODING_PARAM },
+    { "enable_drefs", "Enable external track support.", OFFSET(enable_drefs), AV_OPT_TYPE_BOOL,
+        {.i64 = 0}, 0, 1, FLAGS },
+
     { NULL },
 };
 
diff --git a/libavformat/mov_chan.c b/libavformat/mov_chan.c
index a2fa8d64..cba07c51 100644
--- a/libavformat/mov_chan.c
+++ b/libavformat/mov_chan.c
@@ -45,7 +45,7 @@
  *            do not specify a particular ordering of those channels."
  */
 enum MovChannelLayoutTag {
-    MOV_CH_LAYOUT_UNKNOWN               = 0xFFFF0000,
+#define MOV_CH_LAYOUT_UNKNOWN             0xFFFF0000
     MOV_CH_LAYOUT_USE_DESCRIPTIONS      = (  0 << 16) | 0,
     MOV_CH_LAYOUT_USE_BITMAP            = (  1 << 16) | 0,
     MOV_CH_LAYOUT_DISCRETEINORDER       = (147 << 16) | 0,
diff --git a/libavformat/movenc-test.c b/libavformat/movenc-test.c
new file mode 100644
index 00000000..8c69c767
--- /dev/null
+++ b/libavformat/movenc-test.c
@@ -0,0 +1,676 @@
+/*
+ * Copyright (c) 2015 Martin Storsjo
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/md5.h"
+
+#include "avformat.h"
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if !HAVE_GETOPT
+#include "compat/getopt.c"
+#endif
+
+#define HASH_SIZE 16
+
+static const uint8_t h264_extradata[] = {
+    0x01, 0x4d, 0x40, 0x1e, 0xff, 0xe1, 0x00, 0x02, 0x67, 0x4d, 0x01, 0x00, 0x02, 0x68, 0xef
+};
+static const uint8_t aac_extradata[] = {
+    0x12, 0x10
+};
+
+
+static const char *format = "mp4";
+AVFormatContext *ctx;
+uint8_t iobuf[32768];
+AVDictionary *opts;
+
+int write_file;
+const char *cur_name;
+FILE* out;
+int out_size;
+struct AVMD5* md5;
+uint8_t hash[HASH_SIZE];
+
+AVStream *video_st, *audio_st;
+int64_t audio_dts, video_dts;
+
+int bframes;
+int64_t duration;
+int64_t audio_duration;
+int frames;
+int gop_size;
+int64_t next_p_pts;
+enum AVPictureType last_picture;
+int skip_write;
+int skip_write_audio;
+int clear_duration;
+
+int num_warnings;
+
+int check_faults;
+
+
+static void count_warnings(void *avcl, int level, const char *fmt, va_list vl)
+{
+    if (level == AV_LOG_WARNING)
+        num_warnings++;
+}
+
+static void init_count_warnings(void)
+{
+    av_log_set_callback(count_warnings);
+    num_warnings = 0;
+}
+
+static void reset_count_warnings(void)
+{
+    av_log_set_callback(av_log_default_callback);
+}
+
+static int io_write(void *opaque, uint8_t *buf, int size)
+{
+    out_size += size;
+    av_md5_update(md5, buf, size);
+    if (out)
+        fwrite(buf, 1, size, out);
+    return size;
+}
+
+static void init_out(const char *name)
+{
+    char buf[100];
+    cur_name = name;
+    snprintf(buf, sizeof(buf), "%s.%s", cur_name, format);
+
+    av_md5_init(md5);
+    if (write_file) {
+        out = fopen(buf, "wb");
+        if (!out)
+            perror(buf);
+    }
+    out_size = 0;
+}
+
+static void close_out(void)
+{
+    int i;
+    av_md5_final(md5, hash);
+    for (i = 0; i < HASH_SIZE; i++)
+        printf("%02x", hash[i]);
+    printf(" %d %s\n", out_size, cur_name);
+    if (out)
+        fclose(out);
+    out = NULL;
+}
+
+static void check_func(int value, int line, const char *msg, ...)
+{
+    if (!value) {
+        va_list ap;
+        va_start(ap, msg);
+        printf("%d: ", line);
+        vprintf(msg, ap);
+        printf("\n");
+        check_faults++;
+        va_end(ap);
+    }
+}
+#define check(value, ...) check_func(value, __LINE__, __VA_ARGS__)
+
+static void init_fps(int bf, int audio_preroll, int fps)
+{
+    AVStream *st;
+    ctx = avformat_alloc_context();
+    if (!ctx)
+        exit(1);
+    ctx->oformat = av_guess_format(format, NULL, NULL);
+    if (!ctx->oformat)
+        exit(1);
+    ctx->pb = avio_alloc_context(iobuf, sizeof(iobuf), AVIO_FLAG_WRITE, NULL, NULL, io_write, NULL);
+    if (!ctx->pb)
+        exit(1);
+    ctx->flags |= AVFMT_FLAG_BITEXACT;
+
+    st = avformat_new_stream(ctx, NULL);
+    if (!st)
+        exit(1);
+    st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
+    st->codec->codec_id = AV_CODEC_ID_H264;
+    st->codec->width = 640;
+    st->codec->height = 480;
+    st->time_base.num = 1;
+    st->time_base.den = 30;
+    st->codec->extradata_size = sizeof(h264_extradata);
+    st->codec->extradata = av_mallocz(st->codec->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!st->codec->extradata)
+        exit(1);
+    memcpy(st->codec->extradata, h264_extradata, sizeof(h264_extradata));
+    video_st = st;
+
+    st = avformat_new_stream(ctx, NULL);
+    if (!st)
+        exit(1);
+    st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
+    st->codec->codec_id = AV_CODEC_ID_AAC;
+    st->codec->sample_rate = 44100;
+    st->codec->channels = 2;
+    st->time_base.num = 1;
+    st->time_base.den = 44100;
+    st->codec->extradata_size = sizeof(aac_extradata);
+    st->codec->extradata = av_mallocz(st->codec->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!st->codec->extradata)
+        exit(1);
+    memcpy(st->codec->extradata, aac_extradata, sizeof(aac_extradata));
+    audio_st = st;
+
+    if (avformat_write_header(ctx, &opts) < 0)
+        exit(1);
+    av_dict_free(&opts);
+
+    frames = 0;
+    gop_size = 30;
+    duration = video_st->time_base.den / fps;
+    audio_duration = 1024LL * audio_st->time_base.den / audio_st->codec->sample_rate;
+    if (audio_preroll)
+        audio_preroll = 2048LL * audio_st->time_base.den / audio_st->codec->sample_rate;
+
+    bframes = bf;
+    video_dts = bframes ? -duration : 0;
+    audio_dts = -audio_preroll;
+}
+
+static void init(int bf, int audio_preroll)
+{
+    init_fps(bf, audio_preroll, 30);
+}
+
+static void mux_frames(int n)
+{
+    int end_frames = frames + n;
+    while (1) {
+        AVPacket pkt;
+        uint8_t pktdata[8] = { 0 };
+        av_init_packet(&pkt);
+
+        if (av_compare_ts(audio_dts, audio_st->time_base, video_dts, video_st->time_base) < 0) {
+            pkt.dts = pkt.pts = audio_dts;
+            pkt.stream_index = 1;
+            pkt.duration = audio_duration;
+            audio_dts += audio_duration;
+        } else {
+            if (frames == end_frames)
+                break;
+            pkt.dts = video_dts;
+            pkt.stream_index = 0;
+            pkt.duration = duration;
+            if ((frames % gop_size) == 0) {
+                pkt.flags |= AV_PKT_FLAG_KEY;
+                last_picture = AV_PICTURE_TYPE_I;
+                pkt.pts = pkt.dts + duration;
+                video_dts = pkt.pts;
+            } else {
+                if (last_picture == AV_PICTURE_TYPE_P) {
+                    last_picture = AV_PICTURE_TYPE_B;
+                    pkt.pts = pkt.dts;
+                    video_dts = next_p_pts;
+                } else {
+                    last_picture = AV_PICTURE_TYPE_P;
+                    if (((frames + 1) % gop_size) == 0) {
+                        pkt.pts = pkt.dts + duration;
+                        video_dts = pkt.pts;
+                    } else {
+                        next_p_pts = pkt.pts = pkt.dts + 2 * duration;
+                        video_dts += duration;
+                    }
+                }
+            }
+            if (!bframes)
+                pkt.pts = pkt.dts;
+            frames++;
+        }
+
+        if (clear_duration)
+            pkt.duration = 0;
+        AV_WB32(pktdata + 4, pkt.pts);
+        pkt.data = pktdata;
+        pkt.size = 8;
+        if (skip_write)
+            continue;
+        if (skip_write_audio && pkt.stream_index == 1)
+            continue;
+        av_write_frame(ctx, &pkt);
+    }
+}
+
+static void mux_gops(int n)
+{
+    mux_frames(gop_size * n);
+}
+
+static void skip_gops(int n)
+{
+    skip_write = 1;
+    mux_gops(n);
+    skip_write = 0;
+}
+
+static void signal_init_ts(void)
+{
+    AVPacket pkt;
+    av_init_packet(&pkt);
+    pkt.size = 0;
+    pkt.data = NULL;
+
+    pkt.stream_index = 0;
+    pkt.dts = video_dts;
+    pkt.pts = 0;
+    av_write_frame(ctx, &pkt);
+
+    pkt.stream_index = 1;
+    pkt.dts = pkt.pts = audio_dts;
+    av_write_frame(ctx, &pkt);
+}
+
+static void finish(void)
+{
+    av_write_trailer(ctx);
+    av_free(ctx->pb);
+    avformat_free_context(ctx);
+    ctx = NULL;
+}
+
+static void help(void)
+{
+    printf("movenc-test [-w]\n"
+           "-w          write output into files\n");
+}
+
+int main(int argc, char **argv)
+{
+    int c;
+    uint8_t header[HASH_SIZE];
+    uint8_t content[HASH_SIZE];
+    int empty_moov_pos;
+    int prev_pos;
+
+    for (;;) {
+        c = getopt(argc, argv, "wh");
+        if (c == -1)
+            break;
+        switch (c) {
+        case 'w':
+            write_file = 1;
+            break;
+        default:
+        case 'h':
+            help();
+            return 0;
+        }
+    }
+
+    av_register_all();
+
+    md5 = av_md5_alloc();
+    if (!md5)
+        return 1;
+
+    // Write a fragmented file with an initial moov that actually contains some
+    // samples. One moov+mdat with 1 second of data and one moof+mdat with 1
+    // second of data.
+    init_out("non-empty-moov");
+    av_dict_set(&opts, "movflags", "frag_keyframe", 0);
+    init(0, 0);
+    mux_gops(2);
+    finish();
+    close_out();
+
+    // Write a similar file, but with b-frames and audio preroll, handled
+    // via an edit list.
+    init_out("non-empty-moov-elst");
+    av_dict_set(&opts, "movflags", "frag_keyframe", 0);
+    av_dict_set(&opts, "use_editlist", "1", 0);
+    init(1, 1);
+    mux_gops(2);
+    finish();
+    close_out();
+
+    // Use b-frames but no audio-preroll, but without an edit list.
+    // Due to avoid_negative_ts == AVFMT_AVOID_NEG_TS_MAKE_ZERO, the dts
+    // of the first audio packet is > 0, but it is set to zero since edit
+    // lists aren't used, increasing the duration of the first packet instead.
+    init_out("non-empty-moov-no-elst");
+    av_dict_set(&opts, "movflags", "frag_keyframe", 0);
+    av_dict_set(&opts, "use_editlist", "0", 0);
+    init(1, 0);
+    mux_gops(2);
+    finish();
+    close_out();
+
+    format = "ismv";
+    // Write an ISMV, with b-frames and audio preroll.
+    init_out("ismv");
+    av_dict_set(&opts, "movflags", "frag_keyframe", 0);
+    init(1, 1);
+    mux_gops(2);
+    finish();
+    close_out();
+    format = "mp4";
+
+    // An initial moov that doesn't contain any samples, followed by two
+    // moof+mdat pairs.
+    init_out("empty-moov");
+    av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov", 0);
+    av_dict_set(&opts, "use_editlist", "0", 0);
+    init(0, 0);
+    mux_gops(2);
+    finish();
+    close_out();
+    memcpy(content, hash, HASH_SIZE);
+
+    // Similar to the previous one, but with input that doesn't start at
+    // pts/dts 0. avoid_negative_ts behaves in the same way as
+    // in non-empty-moov-no-elst above.
+    init_out("empty-moov-no-elst");
+    av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov", 0);
+    init(1, 0);
+    mux_gops(2);
+    finish();
+    close_out();
+
+    // Same as the previous one, but disable avoid_negative_ts (which
+    // would require using an edit list, but with empty_moov, one can't
+    // write a sensible edit list, when the start timestamps aren't known).
+    // This should trigger a warning - we check that the warning is produced.
+    init_count_warnings();
+    init_out("empty-moov-no-elst-no-adjust");
+    av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov", 0);
+    av_dict_set(&opts, "avoid_negative_ts", "0", 0);
+    init(1, 0);
+    mux_gops(2);
+    finish();
+    close_out();
+
+    reset_count_warnings();
+    check(num_warnings > 0, "No warnings printed for unhandled start offset");
+
+    // Verify that delay_moov produces the same as empty_moov for
+    // simple input
+    init_out("delay-moov");
+    av_dict_set(&opts, "movflags", "frag_keyframe+delay_moov", 0);
+    av_dict_set(&opts, "use_editlist", "0", 0);
+    init(0, 0);
+    mux_gops(2);
+    finish();
+    close_out();
+    check(!memcmp(hash, content, HASH_SIZE), "delay_moov differs from empty_moov");
+
+    // Test writing content that requires an edit list using delay_moov
+    init_out("delay-moov-elst");
+    av_dict_set(&opts, "movflags", "frag_keyframe+delay_moov", 0);
+    init(1, 1);
+    mux_gops(2);
+    finish();
+    close_out();
+
+    // Test writing a file with one track lacking packets, with delay_moov.
+    skip_write_audio = 1;
+    init_out("delay-moov-empty-track");
+    av_dict_set(&opts, "movflags", "frag_keyframe+delay_moov", 0);
+    init(0, 0);
+    mux_gops(2);
+    // The automatic flushing shouldn't output anything, since we're still
+    // waiting for data for some tracks
+    check(out_size == 0, "delay_moov flushed prematurely");
+    // When closed (or manually flushed), all the written data should still
+    // be output.
+    finish();
+    close_out();
+    check(out_size > 0, "delay_moov didn't output anything");
+
+    // Check that manually flushing still outputs things as expected. This
+    // produces two fragments, while the one above produces only one.
+    init_out("delay-moov-empty-track-flush");
+    av_dict_set(&opts, "movflags", "frag_custom+delay_moov", 0);
+    init(0, 0);
+    mux_gops(1);
+    av_write_frame(ctx, NULL); // Force writing the moov
+    check(out_size > 0, "No moov written");
+    av_write_frame(ctx, NULL);
+    mux_gops(1);
+    av_write_frame(ctx, NULL);
+    finish();
+    close_out();
+
+    skip_write_audio = 0;
+
+
+
+    // Verify that the header written by delay_moov when manually flushed
+    // is identical to the one by empty_moov.
+    init_out("empty-moov-header");
+    av_dict_set(&opts, "movflags", "frag_keyframe+empty_moov", 0);
+    av_dict_set(&opts, "use_editlist", "0", 0);
+    init(0, 0);
+    close_out();
+    memcpy(header, hash, HASH_SIZE);
+    init_out("empty-moov-content");
+    mux_gops(2);
+    // Written 2 seconds of content, with an automatic flush after 1 second.
+    check(out_size > 0, "No automatic flush?");
+    empty_moov_pos = prev_pos = out_size;
+    // Manually flush the second fragment
+    av_write_frame(ctx, NULL);
+    check(out_size > prev_pos, "No second fragment flushed?");
+    prev_pos = out_size;
+    // Check that an extra flush doesn't output any more data
+    av_write_frame(ctx, NULL);
+    check(out_size == prev_pos, "More data written?");
+    close_out();
+    memcpy(content, hash, HASH_SIZE);
+    // Ignore the trailer written here
+    finish();
+
+    init_out("delay-moov-header");
+    av_dict_set(&opts, "movflags", "frag_custom+delay_moov", 0);
+    av_dict_set(&opts, "use_editlist", "0", 0);
+    init(0, 0);
+    check(out_size == 0, "Output written during init with delay_moov");
+    mux_gops(1); // Write 1 second of content
+    av_write_frame(ctx, NULL); // Force writing the moov
+    close_out();
+    check(!memcmp(hash, header, HASH_SIZE), "delay_moov header differs from empty_moov");
+    init_out("delay-moov-content");
+    av_write_frame(ctx, NULL); // Flush the first fragment
+    check(out_size == empty_moov_pos, "Manually flushed content differs from automatically flushed, %d vs %d", out_size, empty_moov_pos);
+    mux_gops(1); // Write the rest of the content
+    av_write_frame(ctx, NULL); // Flush the second fragment
+    close_out();
+    check(!memcmp(hash, content, HASH_SIZE), "delay_moov content differs from empty_moov");
+    finish();
+
+
+    // Verify that we can produce an identical second fragment without
+    // writing the first one. First write the reference fragments that
+    // we want to reproduce.
+    av_dict_set(&opts, "movflags", "frag_custom+empty_moov+dash", 0);
+    init(0, 0);
+    mux_gops(1);
+    av_write_frame(ctx, NULL); // Output the first fragment
+    init_out("empty-moov-second-frag");
+    mux_gops(1);
+    av_write_frame(ctx, NULL); // Output the second fragment
+    close_out();
+    memcpy(content, hash, HASH_SIZE);
+    finish();
+
+    // Produce the same second fragment without actually writing the first
+    // one before.
+    av_dict_set(&opts, "movflags", "frag_custom+empty_moov+dash+frag_discont", 0);
+    av_dict_set(&opts, "fragment_index", "2", 0);
+    av_dict_set(&opts, "avoid_negative_ts", "0", 0);
+    av_dict_set(&opts, "use_editlist", "0", 0);
+    init(0, 0);
+    skip_gops(1);
+    init_out("empty-moov-second-frag-discont");
+    mux_gops(1);
+    av_write_frame(ctx, NULL); // Output the second fragment
+    close_out();
+    check(!memcmp(hash, content, HASH_SIZE), "discontinuously written fragment differs");
+    finish();
+
+    // Produce the same thing by using delay_moov, which requires a slightly
+    // different call sequence.
+    av_dict_set(&opts, "movflags", "frag_custom+delay_moov+dash+frag_discont", 0);
+    av_dict_set(&opts, "fragment_index", "2", 0);
+    init(0, 0);
+    skip_gops(1);
+    mux_gops(1);
+    av_write_frame(ctx, NULL); // Output the moov
+    init_out("delay-moov-second-frag-discont");
+    av_write_frame(ctx, NULL); // Output the second fragment
+    close_out();
+    check(!memcmp(hash, content, HASH_SIZE), "discontinuously written fragment differs");
+    finish();
+
+
+    // Test discontinously written fragments with b-frames (where the
+    // assumption of starting at pts=0 works) but not with audio preroll
+    // (which can't be guessed).
+    av_dict_set(&opts, "movflags", "frag_custom+delay_moov+dash", 0);
+    init(1, 0);
+    mux_gops(1);
+    init_out("delay-moov-elst-init");
+    av_write_frame(ctx, NULL); // Output the moov
+    close_out();
+    memcpy(header, hash, HASH_SIZE);
+    av_write_frame(ctx, NULL); // Output the first fragment
+    init_out("delay-moov-elst-second-frag");
+    mux_gops(1);
+    av_write_frame(ctx, NULL); // Output the second fragment
+    close_out();
+    memcpy(content, hash, HASH_SIZE);
+    finish();
+
+    av_dict_set(&opts, "movflags", "frag_custom+delay_moov+dash+frag_discont", 0);
+    av_dict_set(&opts, "fragment_index", "2", 0);
+    init(1, 0);
+    skip_gops(1);
+    mux_gops(1); // Write the second fragment
+    init_out("delay-moov-elst-init-discont");
+    av_write_frame(ctx, NULL); // Output the moov
+    close_out();
+    check(!memcmp(hash, header, HASH_SIZE), "discontinuously written header differs");
+    init_out("delay-moov-elst-second-frag-discont");
+    av_write_frame(ctx, NULL); // Output the second fragment
+    close_out();
+    check(!memcmp(hash, content, HASH_SIZE), "discontinuously written fragment differs");
+    finish();
+
+
+    // Test discontinously written fragments with b-frames and audio preroll,
+    // properly signaled.
+    av_dict_set(&opts, "movflags", "frag_custom+delay_moov+dash", 0);
+    init(1, 1);
+    mux_gops(1);
+    init_out("delay-moov-elst-signal-init");
+    av_write_frame(ctx, NULL); // Output the moov
+    close_out();
+    memcpy(header, hash, HASH_SIZE);
+    av_write_frame(ctx, NULL); // Output the first fragment
+    init_out("delay-moov-elst-signal-second-frag");
+    mux_gops(1);
+    av_write_frame(ctx, NULL); // Output the second fragment
+    close_out();
+    memcpy(content, hash, HASH_SIZE);
+    finish();
+
+    av_dict_set(&opts, "movflags", "frag_custom+delay_moov+dash+frag_discont", 0);
+    av_dict_set(&opts, "fragment_index", "2", 0);
+    init(1, 1);
+    signal_init_ts();
+    skip_gops(1);
+    mux_gops(1); // Write the second fragment
+    init_out("delay-moov-elst-signal-init-discont");
+    av_write_frame(ctx, NULL); // Output the moov
+    close_out();
+    check(!memcmp(hash, header, HASH_SIZE), "discontinuously written header differs");
+    init_out("delay-moov-elst-signal-second-frag-discont");
+    av_write_frame(ctx, NULL); // Output the second fragment
+    close_out();
+    check(!memcmp(hash, content, HASH_SIZE), "discontinuously written fragment differs");
+    finish();
+
+
+    // Test VFR content, with sidx atoms (which declare the pts duration
+    // of a fragment, forcing overriding the start pts of the next one).
+    // Here, the fragment duration in pts is significantly different from
+    // the duration in dts. The video stream starts at dts=-10,pts=0, and
+    // the second fragment starts at dts=155,pts=156. The trun duration sum
+    // of the first fragment is 165, which also is written as
+    // baseMediaDecodeTime in the tfdt in the second fragment. The sidx for
+    // the first fragment says earliest_presentation_time = 0 and
+    // subsegment_duration = 156, which also matches the sidx in the second
+    // fragment. For the audio stream, the pts and dts durations also don't
+    // match - the input stream starts at pts=-2048, but that part is excluded
+    // by the edit list.
+    init_out("vfr");
+    av_dict_set(&opts, "movflags", "frag_keyframe+delay_moov+dash", 0);
+    init_fps(1, 1, 3);
+    mux_frames(gop_size/2);
+    duration /= 10;
+    mux_frames(gop_size/2);
+    mux_gops(1);
+    finish();
+    close_out();
+
+    // Test VFR content, with cleared duration fields. In these cases,
+    // the muxer must guess the duration of the last packet of each
+    // fragment. As long as the framerate doesn't vary (too much) at the
+    // fragment edge, it works just fine. Additionally, when automatically
+    // cutting fragments, the muxer already know the timestamps of the next
+    // packet for one stream (in most cases the video stream), avoiding
+    // having to use guesses for that one.
+    init_count_warnings();
+    clear_duration = 1;
+    init_out("vfr-noduration");
+    av_dict_set(&opts, "movflags", "frag_keyframe+delay_moov+dash", 0);
+    init_fps(1, 1, 3);
+    mux_frames(gop_size/2);
+    duration /= 10;
+    mux_frames(gop_size/2);
+    mux_gops(1);
+    finish();
+    close_out();
+    clear_duration = 0;
+    reset_count_warnings();
+    check(num_warnings > 0, "No warnings printed for filled in durations");
+
+    av_free(md5);
+
+    return check_faults > 0 ? 1 : 0;
+}
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 9b2c8da2..b9c0f7ae 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -66,10 +66,11 @@ static const AVOption options[] = {
     { "dash", "Write DASH compatible fragmented MP4", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_DASH}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "frag_discont", "Signal that the next fragment is discontinuous from earlier ones", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_FRAG_DISCONT}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "delay_moov", "Delay writing the initial moov until the first fragment is cut, or until the first fragment flush", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_DELAY_MOOV}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
+    { "global_sidx", "Write a global sidx index at the start of the file", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_GLOBAL_SIDX}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "write_colr", "Write colr atom (Experimental, may be renamed or changed, do not use from scripts)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_WRITE_COLR}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     { "write_gama", "Write deprecated gama atom", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MOV_FLAG_WRITE_GAMA}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "movflags" },
     FF_RTP_FLAG_OPTS(MOVMuxContext, rtp_flags),
-    { "skip_iods", "Skip writing iods atom.", offsetof(MOVMuxContext, iods_skip), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
+    { "skip_iods", "Skip writing iods atom.", offsetof(MOVMuxContext, iods_skip), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
     { "iods_audio_profile", "iods audio profile atom.", offsetof(MOVMuxContext, iods_audio_profile), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 255, AV_OPT_FLAG_ENCODING_PARAM},
     { "iods_video_profile", "iods video profile atom.", offsetof(MOVMuxContext, iods_video_profile), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 255, AV_OPT_FLAG_ENCODING_PARAM},
     { "frag_duration", "Maximum fragment duration", offsetof(MOVMuxContext, max_fragment_duration), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
@@ -78,10 +79,13 @@ static const AVOption options[] = {
     { "ism_lookahead", "Number of lookahead entries for ISM files", offsetof(MOVMuxContext, ism_lookahead), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
     { "video_track_timescale", "set timescale of all video tracks", offsetof(MOVMuxContext, video_track_timescale), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
     { "brand",    "Override major brand", offsetof(MOVMuxContext, major_brand),   AV_OPT_TYPE_STRING, {.str = NULL}, .flags = AV_OPT_FLAG_ENCODING_PARAM },
-    { "use_editlist", "use edit list", offsetof(MOVMuxContext, use_editlist), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, AV_OPT_FLAG_ENCODING_PARAM},
+    { "use_editlist", "use edit list", offsetof(MOVMuxContext, use_editlist), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AV_OPT_FLAG_ENCODING_PARAM},
     { "fragment_index", "Fragment number of the next fragment", offsetof(MOVMuxContext, fragments), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM},
     { "mov_gamma", "gamma value for gama atom", offsetof(MOVMuxContext, gamma), AV_OPT_TYPE_FLOAT, {.dbl = 0.0 }, 0.0, 10, AV_OPT_FLAG_ENCODING_PARAM},
     { "frag_interleave", "Interleave samples within fragments (max number of consecutive samples, lower is tighter interleaving, but with more overhead)", offsetof(MOVMuxContext, frag_interleave), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
+    { "encryption_scheme",    "Configures the encryption scheme, allowed values are none, cenc-aes-ctr", offsetof(MOVMuxContext, encryption_scheme_str),   AV_OPT_TYPE_STRING, {.str = NULL}, .flags = AV_OPT_FLAG_ENCODING_PARAM, .unit = "movflags" },
+    { "encryption_key", "The media encryption key (hex)", offsetof(MOVMuxContext, encryption_key), AV_OPT_TYPE_BINARY, .flags = AV_OPT_FLAG_ENCODING_PARAM, .unit = "movflags" },
+    { "encryption_kid", "The media encryption key identifier (hex)", offsetof(MOVMuxContext, encryption_kid), AV_OPT_TYPE_BINARY, .flags = AV_OPT_FLAG_ENCODING_PARAM, .unit = "movflags" },
     { NULL },
 };
 
@@ -340,7 +344,7 @@ static int handle_eac3(MOVMuxContext *mov, AVPacket *pkt, MOVTrack *track)
     info = track->eac3_priv;
 
     init_get_bits(&gbc, pkt->data, pkt->size * 8);
-    if (avpriv_ac3_parse_header2(&gbc, &hdr) < 0) {
+    if (avpriv_ac3_parse_header(&gbc, &hdr) < 0) {
         /* drop the packets until we see a good one */
         if (!track->entry) {
             av_log(mov, AV_LOG_WARNING, "Dropping invalid packet from start of the stream\n");
@@ -390,7 +394,7 @@ static int handle_eac3(MOVMuxContext *mov, AVPacket *pkt, MOVTrack *track)
             while (cumul_size != pkt->size) {
                 int i;
                 init_get_bits(&gbc, pkt->data + cumul_size, (pkt->size - cumul_size) * 8);
-                if (avpriv_ac3_parse_header2(&gbc, &hdr) < 0)
+                if (avpriv_ac3_parse_header(&gbc, &hdr) < 0)
                     return AVERROR_INVALIDDATA;
                 if (hdr->frame_type != EAC3_FRAME_TYPE_DEPENDENT)
                     return AVERROR(EINVAL);
@@ -439,10 +443,10 @@ static int handle_eac3(MOVMuxContext *mov, AVPacket *pkt, MOVTrack *track)
             return ret;
         if (info->num_blocks != 6)
             return 0;
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         if ((ret = av_copy_packet(pkt, &info->pkt)) < 0)
             return ret;
-        av_free_packet(&info->pkt);
+        av_packet_unref(&info->pkt);
         info->num_blocks = 0;
     }
 
@@ -497,7 +501,7 @@ static int mov_write_eac3_tag(AVIOContext *pb, MOVTrack *track)
     av_free(buf);
 
 end:
-    av_free_packet(&info->pkt);
+    av_packet_unref(&info->pkt);
     av_freep(&track->eac3_priv);
 
     return size;
@@ -551,6 +555,7 @@ static unsigned compute_avg_bitrate(MOVTrack *track)
 
 static int mov_write_esds_tag(AVIOContext *pb, MOVTrack *track) // Basic
 {
+    AVCPBProperties *props;
     int64_t pos = avio_tell(pb);
     int decoder_specific_info_len = track->vos_len ? 5 + track->vos_len : 0;
     unsigned avg_bitrate;
@@ -584,11 +589,13 @@ static int mov_write_esds_tag(AVIOContext *pb, MOVTrack *track) // Basic
     else
         avio_w8(pb, 0x11); // flags (= Visualstream)
 
-    avio_wb24(pb, track->enc->rc_buffer_size >> 3); // Buffersize DB
+    props = (AVCPBProperties*)av_stream_get_side_data(track->st, AV_PKT_DATA_CPB_PROPERTIES,
+                                                      NULL);
+
+    avio_wb24(pb, props ? props->buffer_size / 8 : 0); // Buffersize DB
 
     avg_bitrate = compute_avg_bitrate(track);
-    // maxbitrate (FIXME should be max rate in any 1 sec window)
-    avio_wb32(pb, FFMAX3(track->enc->bit_rate, track->enc->rc_max_rate, avg_bitrate));
+    avio_wb32(pb, props ? FFMAX3(props->max_bitrate, props->avg_bitrate, avg_bitrate) : FFMAX(track->enc->bit_rate, avg_bitrate)); // maxbitrate (FIXME should be max rate in any 1 sec window)
     avio_wb32(pb, avg_bitrate);
 
     if (track->vos_len) {
@@ -733,7 +740,7 @@ static int mov_write_dvc1_structs(MOVTrack *track, uint8_t *buf)
                "dvc1 atom. Set the delay_moov flag to fix this.\n");
     }
 
-    unescaped = av_mallocz(track->vos_len + FF_INPUT_BUFFER_PADDING_SIZE);
+    unescaped = av_mallocz(track->vos_len + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!unescaped)
         return AVERROR(ENOMEM);
     start = find_next_marker(track->vos_data, end);
@@ -890,7 +897,7 @@ static int get_samples_per_packet(MOVTrack *track)
     return first_duration;
 }
 
-static int mov_write_audio_tag(AVIOContext *pb, MOVTrack *track)
+static int mov_write_audio_tag(AVIOContext *pb, MOVMuxContext *mov, MOVTrack *track)
 {
     int64_t pos = avio_tell(pb);
     int version = 0;
@@ -911,7 +918,11 @@ static int mov_write_audio_tag(AVIOContext *pb, MOVTrack *track)
     }
 
     avio_wb32(pb, 0); /* size */
-    avio_wl32(pb, tag); // store it byteswapped
+    if (mov->encryption_scheme != MOV_ENC_NONE) {
+        ffio_wfourcc(pb, "enca");
+    } else {
+        avio_wl32(pb, tag); // store it byteswapped
+    }
     avio_wb32(pb, 0); /* Reserved */
     avio_wb16(pb, 0); /* Reserved */
     avio_wb16(pb, 1); /* Data-reference index, XXX  == 1 */
@@ -999,6 +1010,10 @@ static int mov_write_audio_tag(AVIOContext *pb, MOVTrack *track)
     if (track->mode == MODE_MOV && track->enc->codec_type == AVMEDIA_TYPE_AUDIO)
         mov_write_chan_tag(pb, track);
 
+    if (mov->encryption_scheme != MOV_ENC_NONE) {
+        ff_mov_cenc_write_sinf_tag(track, pb, mov->encryption_kid);
+    }
+
     return update_size(pb, pos);
 }
 
@@ -1658,7 +1673,11 @@ static int mov_write_video_tag(AVIOContext *pb, MOVMuxContext *mov, MOVTrack *tr
     int avid = 0;
 
     avio_wb32(pb, 0); /* size */
-    avio_wl32(pb, track->tag); // store it byteswapped
+    if (mov->encryption_scheme != MOV_ENC_NONE) {
+        ffio_wfourcc(pb, "encv");
+    } else {
+        avio_wl32(pb, track->tag); // store it byteswapped
+    }
     avio_wb32(pb, 0); /* Reserved */
     avio_wb16(pb, 0); /* Reserved */
     avio_wb16(pb, 1); /* Data-reference index */
@@ -1749,6 +1768,10 @@ static int mov_write_video_tag(AVIOContext *pb, MOVMuxContext *mov, MOVTrack *tr
         mov_write_pasp_tag(pb, track);
     }
 
+    if (mov->encryption_scheme != MOV_ENC_NONE) {
+        ff_mov_cenc_write_sinf_tag(track, pb, mov->encryption_kid);
+    }
+
     /* extra padding for avid stsd */
     /* https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html#//apple_ref/doc/uid/TP40000939-CH204-61112 */
     if (avid)
@@ -1847,9 +1870,9 @@ static int mov_write_stsd_tag(AVIOContext *pb, MOVMuxContext *mov, MOVTrack *tra
     avio_wb32(pb, 0); /* version & flags */
     avio_wb32(pb, 1); /* entry count */
     if (track->enc->codec_type == AVMEDIA_TYPE_VIDEO)
-        mov_write_video_tag(pb, mov,  track);
+        mov_write_video_tag(pb, mov, track);
     else if (track->enc->codec_type == AVMEDIA_TYPE_AUDIO)
-        mov_write_audio_tag(pb, track);
+        mov_write_audio_tag(pb, mov, track);
     else if (track->enc->codec_type == AVMEDIA_TYPE_SUBTITLE)
         mov_write_subtitle_tag(pb, track);
     else if (track->enc->codec_tag == MKTAG('r','t','p',' '))
@@ -1979,6 +2002,9 @@ static int mov_write_stbl_tag(AVIOContext *pb, MOVMuxContext *mov, MOVTrack *tra
     mov_write_stsc_tag(pb, track);
     mov_write_stsz_tag(pb, track);
     mov_write_stco_tag(pb, track);
+    if (mov->encryption_scheme == MOV_ENC_CENC_AES_CTR) {
+        ff_mov_cenc_write_stbl_atoms(&track->cenc, pb);
+    }
     return update_size(pb, pos);
 }
 
@@ -2573,7 +2599,7 @@ static int mov_write_track_udta_tag(AVIOContext *pb, MOVMuxContext *mov,
     int ret, size;
     uint8_t *buf;
 
-    if (!st || mov->fc->flags & AVFMT_FLAG_BITEXACT)
+    if (!st)
         return 0;
 
     ret = avio_open_dyn_buf(&pb_buf);
@@ -2919,9 +2945,10 @@ static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov,
     mov_write_string_metadata(s, pb, "\251wrt", "composer" , 1);
     mov_write_string_metadata(s, pb, "\251alb", "album"    , 1);
     mov_write_string_metadata(s, pb, "\251day", "date"     , 1);
-    if (!mov->exact &&
-        !mov_write_string_metadata(s, pb, "\251too", "encoding_tool", 1))
-        mov_write_string_tag(pb, "\251too", LIBAVFORMAT_IDENT, 0, 1);
+    if (!mov_write_string_metadata(s, pb, "\251too", "encoding_tool", 1)) {
+        if (!(s->flags & AVFMT_FLAG_BITEXACT))
+            mov_write_string_tag(pb, "\251too", LIBAVFORMAT_IDENT, 0, 1);
+    }
     mov_write_string_metadata(s, pb, "\251cmt", "comment"  , 1);
     mov_write_string_metadata(s, pb, "\251gen", "genre"    , 1);
     mov_write_string_metadata(s, pb, "\251cpy", "copyright", 1);
@@ -3115,7 +3142,6 @@ static void mov_write_psp_udta_tag(AVIOContext *pb,
 
 static int mov_write_uuidusmt_tag(AVIOContext *pb, AVFormatContext *s)
 {
-    MOVMuxContext *mov = s->priv_data;
     AVDictionaryEntry *title = av_dict_get(s->metadata, "title", NULL, 0);
     int64_t pos, pos2;
 
@@ -3140,7 +3166,7 @@ static int mov_write_uuidusmt_tag(AVIOContext *pb, AVFormatContext *s)
         avio_wb16(pb, 0x0);                  /* ? */
         avio_wb16(pb, 0x021C);               /* data */
 
-        if (!mov->exact)
+        if (!(s->flags & AVFMT_FLAG_BITEXACT))
             mov_write_psp_udta_tag(pb, LIBAVCODEC_IDENT,      "eng", 0x04);
         mov_write_psp_udta_tag(pb, title->value,          "eng", 0x01);
         mov_write_psp_udta_tag(pb, "2006/04/01 11:11:11", "und", 0x03);
@@ -3200,10 +3226,21 @@ static int mov_write_moov_tag(AVIOContext *pb, MOVMuxContext *mov,
             mov->tracks[i].tref_id  = mov->tracks[mov->chapter_track].track_id;
         }
     for (i = 0; i < mov->nb_streams; i++) {
-        if (mov->tracks[i].tag == MKTAG('r','t','p',' ')) {
-            mov->tracks[i].tref_tag = MKTAG('h','i','n','t');
-            mov->tracks[i].tref_id =
-                mov->tracks[mov->tracks[i].src_track].track_id;
+        MOVTrack *track = &mov->tracks[i];
+        if (track->tag == MKTAG('r','t','p',' ')) {
+            track->tref_tag = MKTAG('h','i','n','t');
+            track->tref_id = mov->tracks[track->src_track].track_id;
+        } else if (track->enc->codec_type == AVMEDIA_TYPE_AUDIO) {
+            int * fallback, size;
+            fallback = (int*)av_stream_get_side_data(track->st,
+                                                     AV_PKT_DATA_FALLBACK_TRACK,
+                                                     &size);
+            if (fallback != NULL && size == sizeof(int)) {
+                if (*fallback >= 0 && *fallback < mov->nb_streams) {
+                    track->tref_tag = MKTAG('f','a','l','l');
+                    track->tref_id = mov->tracks[*fallback].track_id;
+                }
+            }
         }
     }
     for (i = 0; i < mov->nb_streams; i++) {
@@ -3275,7 +3312,7 @@ static int mov_write_isml_manifest(AVIOContext *pb, MOVMuxContext *mov)
     avio_printf(pb, "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
     avio_printf(pb, "<smil xmlns=\"http://www.w3.org/2001/SMIL20/Language\">\n");
     avio_printf(pb, "<head>\n");
-    if (!mov->exact)
+    if (!(mov->fc->flags & AVFMT_FLAG_BITEXACT))
         avio_printf(pb, "<meta name=\"creator\" content=\"%s\" />\n",
                     LIBAVFORMAT_IDENT);
     avio_printf(pb, "</head>\n");
@@ -3295,8 +3332,8 @@ static int mov_write_isml_manifest(AVIOContext *pb, MOVMuxContext *mov)
         } else {
             continue;
         }
-        avio_printf(pb, "<%s systemBitrate=\"%d\">\n", type,
-                                                       track->enc->bit_rate);
+        avio_printf(pb, "<%s systemBitrate=\"%"PRId64"\">\n", type,
+                    (int64_t)track->enc->bit_rate);
         param_write_int(pb, "systemBitrate", track->enc->bit_rate);
         param_write_int(pb, "trackID", track_id);
         if (track->enc->codec_type == AVMEDIA_TYPE_VIDEO) {
@@ -3322,7 +3359,17 @@ static int mov_write_isml_manifest(AVIOContext *pb, MOVMuxContext *mov)
             param_write_int(pb, "DisplayHeight", track->enc->height);
         } else {
             if (track->enc->codec_id == AV_CODEC_ID_AAC) {
-                param_write_string(pb, "FourCC", "AACL");
+                switch (track->enc->profile)
+                {
+                    case FF_PROFILE_AAC_HE_V2:
+                        param_write_string(pb, "FourCC", "AACP");
+                        break;
+                    case FF_PROFILE_AAC_HE:
+                        param_write_string(pb, "FourCC", "AACH");
+                        break;
+                    default:
+                        param_write_string(pb, "FourCC", "AACL");
+                }
             } else if (track->enc->codec_id == AV_CODEC_ID_WMAPRO) {
                 param_write_string(pb, "FourCC", "WMAP");
             }
@@ -3680,6 +3727,8 @@ static int mov_write_sidx_tag(AVIOContext *pb,
         }
     } else {
         entries = track->nb_frag_info;
+        if (entries <= 0)
+            return 0;
         presentation_time = track->frag_info[0].time;
     }
 
@@ -3763,7 +3812,7 @@ static int mov_write_moof_tag(AVIOContext *pb, MOVMuxContext *mov, int tracks,
     mov_write_moof_tag_internal(avio_buf, mov, tracks, 0);
     moof_size = ffio_close_null_buf(avio_buf);
 
-    if (mov->flags & FF_MOV_FLAG_DASH && !(mov->flags & FF_MOV_FLAG_FASTSTART))
+    if (mov->flags & FF_MOV_FLAG_DASH && !(mov->flags & FF_MOV_FLAG_GLOBAL_SIDX))
         mov_write_sidx_tags(pb, mov, tracks, moof_size + 8 + mdat_size);
 
     if ((ret = mov_add_tfra_entries(pb, mov, tracks, moof_size + 8 + mdat_size)) < 0)
@@ -3903,7 +3952,7 @@ static int mov_write_ftyp_tag(AVIOContext *pb, AVFormatContext *s)
     else if (mov->mode == MODE_MP4)
         ffio_wfourcc(pb, "mp41");
 
-    if (mov->flags & FF_MOV_FLAG_DASH && mov->flags & FF_MOV_FLAG_FASTSTART)
+    if (mov->flags & FF_MOV_FLAG_DASH && mov->flags & FF_MOV_FLAG_GLOBAL_SIDX)
         ffio_wfourcc(pb, "dash");
 
     return update_size(pb, pos);
@@ -4106,7 +4155,7 @@ static int mov_flush_fragment_interleaving(AVFormatContext *s, MOVTrack *track)
     return 0;
 }
 
-static int mov_flush_fragment(AVFormatContext *s)
+static int mov_flush_fragment(AVFormatContext *s, int force)
 {
     MOVMuxContext *mov = s->priv_data;
     int i, first_track = -1;
@@ -4152,7 +4201,7 @@ static int mov_flush_fragment(AVFormatContext *s)
             if (!mov->tracks[i].entry)
                 break;
         /* Don't write the initial moov unless all tracks have data */
-        if (i < mov->nb_streams)
+        if (i < mov->nb_streams && !force)
             return 0;
 
         moov_size = get_moov_size(s);
@@ -4165,8 +4214,8 @@ static int mov_flush_fragment(AVFormatContext *s)
             return ret;
 
         if (mov->flags & FF_MOV_FLAG_DELAY_MOOV) {
-            if (mov->flags & FF_MOV_FLAG_FASTSTART)
-                mov->reserved_moov_pos = avio_tell(s->pb);
+            if (mov->flags & FF_MOV_FLAG_GLOBAL_SIDX)
+                mov->reserved_header_pos = avio_tell(s->pb);
             avio_flush(s->pb);
             mov->moov_written = 1;
             return 0;
@@ -4179,6 +4228,9 @@ static int mov_flush_fragment(AVFormatContext *s)
         avio_write(s->pb, buf, buf_size);
         av_free(buf);
 
+        if (mov->flags & FF_MOV_FLAG_GLOBAL_SIDX)
+            mov->reserved_header_pos = avio_tell(s->pb);
+
         mov->moov_written = 1;
         mov->mdat_size = 0;
         for (i = 0; i < mov->nb_streams; i++) {
@@ -4276,17 +4328,17 @@ static int mov_flush_fragment(AVFormatContext *s)
     return 0;
 }
 
-static int mov_auto_flush_fragment(AVFormatContext *s)
+static int mov_auto_flush_fragment(AVFormatContext *s, int force)
 {
     MOVMuxContext *mov = s->priv_data;
     int had_moov = mov->moov_written;
-    int ret = mov_flush_fragment(s);
+    int ret = mov_flush_fragment(s, force);
     if (ret < 0)
         return ret;
     // If using delay_moov, the first flush only wrote the moov,
     // not the actual moof+mdat pair, thus flush once again.
     if (!had_moov && mov->flags & FF_MOV_FLAG_DELAY_MOOV)
-        ret = mov_flush_fragment(s);
+        ret = mov_flush_fragment(s, force);
     return ret;
 }
 
@@ -4310,10 +4362,10 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
             pkt->dts = trk->cluster[trk->entry - 1].dts + 1;
             pkt->pts = AV_NOPTS_VALUE;
         }
-        if (pkt->duration < 0) {
-            av_log(s, AV_LOG_ERROR, "Application provided duration: %d is invalid\n", pkt->duration);
-            return AVERROR(EINVAL);
-        }
+    }
+    if (pkt->duration < 0 || pkt->duration > INT_MAX) {
+        av_log(s, AV_LOG_ERROR, "Application provided duration: %"PRId64" is invalid\n", pkt->duration);
+        return AVERROR(EINVAL);
     }
     if (mov->flags & FF_MOV_FLAG_FRAGMENT) {
         int ret;
@@ -4341,7 +4393,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
 
     if (enc->codec_id == AV_CODEC_ID_AMR_NB) {
         /* We must find out how many AMR blocks there are in one packet */
-        static uint16_t packed_size[16] =
+        static const uint16_t packed_size[16] =
             {13, 14, 16, 18, 20, 21, 27, 32, 6, 0, 0, 0, 0, 0, 0, 1};
         int len = 0;
 
@@ -4392,7 +4444,15 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
                                        &size);
             avio_write(pb, reformatted_data, size);
         } else {
-            size = ff_avc_parse_nal_units(pb, pkt->data, pkt->size);
+            if (mov->encryption_scheme == MOV_ENC_CENC_AES_CTR) {
+                size = ff_mov_cenc_avc_parse_nal_units(&trk->cenc, pb, pkt->data, size);
+                if (size < 0) {
+                    ret = size;
+                    goto err;
+                }
+            } else {
+                size = ff_avc_parse_nal_units(pb, pkt->data, pkt->size);
+            }
         }
     } else if (enc->codec_id == AV_CODEC_ID_HEVC && trk->vos_len > 6 &&
                (AV_RB24(trk->vos_data) == 1 || AV_RB32(trk->vos_data) == 1)) {
@@ -4413,7 +4473,20 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
         avio_write(pb, pkt->data, size);
 #endif
     } else {
-        avio_write(pb, pkt->data, size);
+        if (mov->encryption_scheme == MOV_ENC_CENC_AES_CTR) {
+            if (enc->codec_id == AV_CODEC_ID_H264 && enc->extradata_size > 4) {
+                int nal_size_length = (enc->extradata[4] & 0x3) + 1;
+                ret = ff_mov_cenc_avc_write_nal_units(s, &trk->cenc, nal_size_length, pb, pkt->data, size);
+            } else {
+                ret = ff_mov_cenc_write_packet(&trk->cenc, pb, pkt->data, size);
+            }
+
+            if (ret) {
+                goto err;
+            }
+        } else {
+            avio_write(pb, pkt->data, size);
+        }
     }
 
     if ((enc->codec_id == AV_CODEC_ID_DNXHD ||
@@ -4455,7 +4528,7 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
              * in sidx/tfrf/tfxd tags; make sure the sidx pts and duration match up with
              * the next fragment. This means the cts of the first sample must
              * be the same in all fragments. */
-            if ((mov->flags & FF_MOV_FLAG_DASH && !(mov->flags & FF_MOV_FLAG_FASTSTART)) ||
+            if ((mov->flags & FF_MOV_FLAG_DASH && !(mov->flags & FF_MOV_FLAG_GLOBAL_SIDX)) ||
                 mov->mode == MODE_ISM)
                 pkt->pts = pkt->dts + trk->end_pts - trk->cluster[trk->entry].dts;
         } else {
@@ -4480,10 +4553,18 @@ int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt)
     if (trk->start_dts == AV_NOPTS_VALUE) {
         trk->start_dts = pkt->dts;
         if (trk->frag_discont) {
-            /* Pretend the whole stream started at dts=0, with earlier fragments
-             * already written, with a duration summing up to pkt->dts. */
-            trk->frag_start   = pkt->dts;
-            trk->start_dts    = 0;
+            if (mov->use_editlist) {
+                /* Pretend the whole stream started at pts=0, with earlier fragments
+                 * already written. If the stream started at pts=0, the duration sum
+                 * of earlier fragments would have been pkt->pts. */
+                trk->frag_start = pkt->pts;
+                trk->start_dts  = pkt->dts - pkt->pts;
+            } else {
+                /* Pretend the whole stream started at dts=0, with earlier fragments
+                 * already written, with a duration summing up to pkt->dts. */
+                trk->frag_start = pkt->dts;
+                trk->start_dts  = 0;
+            }
             trk->frag_discont = 0;
         } else if (pkt->dts && mov->moov_written)
             av_log(s, AV_LOG_WARNING,
@@ -4550,9 +4631,6 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
         int64_t frag_duration = 0;
         int size = pkt->size;
 
-        if (!pkt->size)
-            return 0;             /* Discard 0 sized packets */
-
         if (mov->flags & FF_MOV_FLAG_FRAG_DISCONT) {
             int i;
             for (i = 0; i < s->nb_streams; i++)
@@ -4560,6 +4638,18 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
             mov->flags &= ~FF_MOV_FLAG_FRAG_DISCONT;
         }
 
+        if (!pkt->size) {
+            if (trk->start_dts == AV_NOPTS_VALUE && trk->frag_discont) {
+                trk->start_dts = pkt->dts;
+                if (pkt->pts != AV_NOPTS_VALUE)
+                    trk->start_cts = pkt->pts - pkt->dts;
+                else
+                    trk->start_cts = 0;
+            }
+
+            return 0;             /* Discard 0 sized packets */
+        }
+
         if (trk->entry && pkt->stream_index < s->nb_streams)
             frag_duration = av_rescale_q(pkt->dts - trk->cluster[0].dts,
                                          s->streams[pkt->stream_index]->time_base,
@@ -4577,7 +4667,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
                 // for the other ones that are flushed at the same time.
                 trk->track_duration = pkt->dts - trk->start_dts;
                 trk->end_pts = pkt->pts;
-                mov_auto_flush_fragment(s);
+                mov_auto_flush_fragment(s, 0);
             }
         }
 
@@ -4600,7 +4690,7 @@ static int mov_write_subtitle_end_packet(AVFormatContext *s,
     end.stream_index = stream_index;
 
     ret = mov_write_single_packet(s, &end);
-    av_free_packet(&end);
+    av_packet_unref(&end);
 
     return ret;
 }
@@ -4608,13 +4698,14 @@ static int mov_write_subtitle_end_packet(AVFormatContext *s,
 static int mov_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     if (!pkt) {
-        mov_flush_fragment(s);
+        mov_flush_fragment(s, 1);
         return 1;
     } else {
         int i;
         MOVMuxContext *mov = s->priv_data;
 
-        if (!pkt->size) return 0; /* Discard 0 sized packets */
+        if (!pkt->size)
+            return mov_write_single_packet(s, pkt); /* Passthrough. */
 
         /*
          * Subtitles require special handling.
@@ -4868,6 +4959,8 @@ static void mov_free(AVFormatContext *s)
 
         if (mov->tracks[i].vos_len)
             av_freep(&mov->tracks[i].vos_data);
+
+        ff_mov_cenc_free(&mov->tracks[i].cenc);
     }
 
     av_freep(&mov->tracks);
@@ -4960,9 +5053,6 @@ static int mov_write_header(AVFormatContext *s)
         else if (!strcmp("f4v", s->oformat->name)) mov->mode = MODE_F4V;
     }
 
-    if (s->flags & AVFMT_FLAG_BITEXACT)
-        mov->exact = 1;
-
     if (mov->flags & FF_MOV_FLAG_DELAY_MOOV)
         mov->flags |= FF_MOV_FLAG_EMPTY_MOOV;
 
@@ -5081,6 +5171,31 @@ static int mov_write_header(AVFormatContext *s)
     if (!mov->tracks)
         return AVERROR(ENOMEM);
 
+    if (mov->encryption_scheme_str != NULL && strcmp(mov->encryption_scheme_str, "none") != 0) {
+        if (strcmp(mov->encryption_scheme_str, "cenc-aes-ctr") == 0) {
+            mov->encryption_scheme = MOV_ENC_CENC_AES_CTR;
+
+            if (mov->encryption_key_len != AES_CTR_KEY_SIZE) {
+                av_log(s, AV_LOG_ERROR, "Invalid encryption key len %d expected %d\n",
+                    mov->encryption_key_len, AES_CTR_KEY_SIZE);
+                ret = AVERROR(EINVAL);
+                goto error;
+            }
+
+            if (mov->encryption_kid_len != CENC_KID_SIZE) {
+                av_log(s, AV_LOG_ERROR, "Invalid encryption kid len %d expected %d\n",
+                    mov->encryption_kid_len, CENC_KID_SIZE);
+                ret = AVERROR(EINVAL);
+                goto error;
+            }
+        } else {
+            av_log(s, AV_LOG_ERROR, "unsupported encryption scheme %s\n",
+                mov->encryption_scheme_str);
+            ret = AVERROR(EINVAL);
+            goto error;
+        }
+    }
+
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *st= s->streams[i];
         MOVTrack *track= &mov->tracks[i];
@@ -5159,7 +5274,7 @@ static int mov_write_header(AVFormatContext *s)
             }
             if (track->mode != MODE_MOV &&
                 track->enc->codec_id == AV_CODEC_ID_MP3 && track->timescale < 16000) {
-                if (track->enc->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
+                if (s->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
                     av_log(s, AV_LOG_ERROR, "track %d: muxing mp3 at %dhz is not standard, to mux anyway set strict to -1\n",
                         i, track->enc->sample_rate);
                     ret = AVERROR(EINVAL);
@@ -5199,6 +5314,14 @@ static int mov_write_header(AVFormatContext *s)
                 memcpy(track->vos_data, st->codec->extradata, track->vos_len);
             }
         }
+
+        if (mov->encryption_scheme == MOV_ENC_CENC_AES_CTR) {
+            ret = ff_mov_cenc_init(&track->cenc, mov->encryption_key,
+                track->enc->codec_id == AV_CODEC_ID_H264, s->flags & AVFMT_FLAG_BITEXACT);
+            if (ret) {
+                goto error;
+            }
+        }
     }
 
     for (i = 0; i < s->nb_streams; i++) {
@@ -5230,7 +5353,7 @@ static int mov_write_header(AVFormatContext *s)
 
 
     if (mov->reserved_moov_size){
-        mov->reserved_moov_pos= avio_tell(pb);
+        mov->reserved_header_pos = avio_tell(pb);
         if (mov->reserved_moov_size > 0)
             avio_skip(pb, mov->reserved_moov_size);
     }
@@ -5243,12 +5366,11 @@ static int mov_write_header(AVFormatContext *s)
             mov->flags |= FF_MOV_FLAG_FRAG_KEYFRAME;
     } else {
         if (mov->flags & FF_MOV_FLAG_FASTSTART)
-            mov->reserved_moov_pos = avio_tell(pb);
+            mov->reserved_header_pos = avio_tell(pb);
         mov_write_mdat_tag(pb, mov);
     }
 
-    if (t = av_dict_get(s->metadata, "creation_time", NULL, 0))
-        mov->time = ff_iso8601_to_unix_time(t->value);
+    ff_parse_creation_time_metadata(s, &mov->time, 1);
     if (mov->time)
         mov->time += 0x7C25B080; // 1970 based -> 1904 based
 
@@ -5296,9 +5418,10 @@ static int mov_write_header(AVFormatContext *s)
         !(mov->flags & FF_MOV_FLAG_DELAY_MOOV)) {
         if ((ret = mov_write_moov_tag(pb, mov, s)) < 0)
             return ret;
+        avio_flush(pb);
         mov->moov_written = 1;
-        if (mov->flags & FF_MOV_FLAG_FASTSTART)
-            mov->reserved_moov_pos = avio_tell(pb);
+        if (mov->flags & FF_MOV_FLAG_GLOBAL_SIDX)
+            mov->reserved_header_pos = avio_tell(pb);
     }
 
     return 0;
@@ -5405,7 +5528,7 @@ static int shift_data(AVFormatContext *s)
      * writing, so we re-open the same output, but for reading. It also avoids
      * a read/seek/write/seek back and forth. */
     avio_flush(s->pb);
-    ret = avio_open(&read_pb, s->filename, AVIO_FLAG_READ);
+    ret = s->io_open(s, &read_pb, s->filename, AVIO_FLAG_READ, NULL);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Unable to re-open %s output file for "
                "the second pass (faststart)\n", s->filename);
@@ -5415,10 +5538,10 @@ static int shift_data(AVFormatContext *s)
     /* mark the end of the shift to up to the last data we wrote, and get ready
      * for writing */
     pos_end = avio_tell(s->pb);
-    avio_seek(s->pb, mov->reserved_moov_pos + moov_size, SEEK_SET);
+    avio_seek(s->pb, mov->reserved_header_pos + moov_size, SEEK_SET);
 
     /* start reading at where the new moov will be placed */
-    avio_seek(read_pb, mov->reserved_moov_pos, SEEK_SET);
+    avio_seek(read_pb, mov->reserved_header_pos, SEEK_SET);
     pos = avio_tell(read_pb);
 
 #define READ_BLOCK do {                                                             \
@@ -5437,7 +5560,7 @@ static int shift_data(AVFormatContext *s)
         avio_write(s->pb, read_buf[read_buf_id], n);
         pos += n;
     } while (pos < pos_end);
-    avio_close(read_pb);
+    ff_format_io_close(s, &read_pb);
 
 end:
     av_free(buf);
@@ -5492,13 +5615,13 @@ static int mov_write_trailer(AVFormatContext *s)
             ffio_wfourcc(pb, "mdat");
             avio_wb64(pb, mov->mdat_size + 16);
         }
-        avio_seek(pb, mov->reserved_moov_size > 0 ? mov->reserved_moov_pos : moov_pos, SEEK_SET);
+        avio_seek(pb, mov->reserved_moov_size > 0 ? mov->reserved_header_pos : moov_pos, SEEK_SET);
 
         if (mov->flags & FF_MOV_FLAG_FASTSTART) {
             av_log(s, AV_LOG_INFO, "Starting second pass: moving the moov atom to the beginning of the file\n");
             res = shift_data(s);
             if (res == 0) {
-                avio_seek(pb, mov->reserved_moov_pos, SEEK_SET);
+                avio_seek(pb, mov->reserved_header_pos, SEEK_SET);
                 if ((res = mov_write_moov_tag(pb, mov, s)) < 0)
                     goto error;
             }
@@ -5506,7 +5629,7 @@ static int mov_write_trailer(AVFormatContext *s)
             int64_t size;
             if ((res = mov_write_moov_tag(pb, mov, s)) < 0)
                 goto error;
-            size = mov->reserved_moov_size - (avio_tell(pb) - mov->reserved_moov_pos);
+            size = mov->reserved_moov_size - (avio_tell(pb) - mov->reserved_header_pos);
             if (size < 8){
                 av_log(s, AV_LOG_ERROR, "reserved_moov_size is too small, needed %"PRId64" additional\n", 8-size);
                 res = AVERROR(EINVAL);
@@ -5522,15 +5645,15 @@ static int mov_write_trailer(AVFormatContext *s)
         }
         res = 0;
     } else {
-        mov_auto_flush_fragment(s);
+        mov_auto_flush_fragment(s, 1);
         for (i = 0; i < mov->nb_streams; i++)
            mov->tracks[i].data_offset = 0;
-        if (mov->flags & FF_MOV_FLAG_FASTSTART) {
+        if (mov->flags & FF_MOV_FLAG_GLOBAL_SIDX) {
             av_log(s, AV_LOG_INFO, "Starting second pass: inserting sidx atoms\n");
             res = shift_data(s);
             if (res == 0) {
                 int64_t end = avio_tell(pb);
-                avio_seek(pb, mov->reserved_moov_pos, SEEK_SET);
+                avio_seek(pb, mov->reserved_header_pos, SEEK_SET);
                 mov_write_sidx_tags(pb, mov, -1, 0);
                 avio_seek(pb, end, SEEK_SET);
                 mov_write_mfra_tag(pb, mov);
@@ -5588,7 +5711,7 @@ MOV_CLASS(mp4)
 AVOutputFormat ff_mp4_muxer = {
     .name              = "mp4",
     .long_name         = NULL_IF_CONFIG_SMALL("MP4 (MPEG-4 Part 14)"),
-    .mime_type         = "application/mp4",
+    .mime_type         = "video/mp4",
     .extensions        = "mp4",
     .priv_data_size    = sizeof(MOVMuxContext),
     .audio_codec       = AV_CODEC_ID_AAC,
@@ -5642,7 +5765,7 @@ MOV_CLASS(ipod)
 AVOutputFormat ff_ipod_muxer = {
     .name              = "ipod",
     .long_name         = NULL_IF_CONFIG_SMALL("iPod H.264 MP4 (MPEG-4 Part 14)"),
-    .mime_type         = "application/mp4",
+    .mime_type         = "video/mp4",
     .extensions        = "m4v,m4a",
     .priv_data_size    = sizeof(MOVMuxContext),
     .audio_codec       = AV_CODEC_ID_AAC,
@@ -5660,7 +5783,7 @@ MOV_CLASS(ismv)
 AVOutputFormat ff_ismv_muxer = {
     .name              = "ismv",
     .long_name         = NULL_IF_CONFIG_SMALL("ISMV/ISMA (Smooth Streaming)"),
-    .mime_type         = "application/mp4",
+    .mime_type         = "video/mp4",
     .extensions        = "ismv,isma",
     .priv_data_size    = sizeof(MOVMuxContext),
     .audio_codec       = AV_CODEC_ID_AAC,
diff --git a/libavformat/movenc.h b/libavformat/movenc.h
index 744d14e5..deb90fe2 100644
--- a/libavformat/movenc.h
+++ b/libavformat/movenc.h
@@ -25,6 +25,7 @@
 #define AVFORMAT_MOVENC_H
 
 #include "avformat.h"
+#include "movenccenc.h"
 
 #define MOV_FRAG_INFO_ALLOC_INCREMENT 64
 #define MOV_INDEX_CLUSTER_SIZE 1024
@@ -149,8 +150,15 @@ typedef struct MOVTrack {
     } vc1_info;
 
     void       *eac3_priv;
+
+    MOVMuxCencContext cenc;
 } MOVTrack;
 
+typedef enum {
+    MOV_ENC_NONE = 0,
+    MOV_ENC_CENC_AES_CTR,
+} MOVEncryptionScheme;
+
 typedef struct MOVMuxContext {
     const AVClass *av_class;
     int     mode;
@@ -164,7 +172,6 @@ typedef struct MOVMuxContext {
 
     int flags;
     int rtp_flags;
-    int exact;
 
     int iods_skip;
     int iods_video_profile;
@@ -182,7 +189,7 @@ typedef struct MOVMuxContext {
     int video_track_timescale;
 
     int reserved_moov_size; ///< 0 for disabled, -1 for automatic, size otherwise
-    int64_t reserved_moov_pos;
+    int64_t reserved_header_pos;
 
     char *major_brand;
 
@@ -194,6 +201,14 @@ typedef struct MOVMuxContext {
 
     int frag_interleave;
     int missing_duration_warned;
+
+    char *encryption_scheme_str;
+    MOVEncryptionScheme encryption_scheme;
+    uint8_t *encryption_key;
+    int encryption_key_len;
+    uint8_t *encryption_kid;
+    int encryption_kid_len;
+
 } MOVMuxContext;
 
 #define FF_MOV_FLAG_RTP_HINT              (1 <<  0)
@@ -210,8 +225,9 @@ typedef struct MOVMuxContext {
 #define FF_MOV_FLAG_DASH                  (1 << 11)
 #define FF_MOV_FLAG_FRAG_DISCONT          (1 << 12)
 #define FF_MOV_FLAG_DELAY_MOOV            (1 << 13)
-#define FF_MOV_FLAG_WRITE_COLR            (1 << 14)
-#define FF_MOV_FLAG_WRITE_GAMA            (1 << 15)
+#define FF_MOV_FLAG_GLOBAL_SIDX           (1 << 14)
+#define FF_MOV_FLAG_WRITE_COLR            (1 << 15)
+#define FF_MOV_FLAG_WRITE_GAMA            (1 << 16)
 
 int ff_mov_write_packet(AVFormatContext *s, AVPacket *pkt);
 
diff --git a/libavformat/movenccenc.c b/libavformat/movenccenc.c
new file mode 100644
index 00000000..b91294f7
--- /dev/null
+++ b/libavformat/movenccenc.c
@@ -0,0 +1,415 @@
+/*
+ * MOV CENC (Common Encryption) writer
+ * Copyright (c) 2015 Eran Kornblau <erankor at gmail dot com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "movenccenc.h"
+#include "libavutil/intreadwrite.h"
+#include "avio_internal.h"
+#include "movenc.h"
+#include "avc.h"
+
+static int auxiliary_info_alloc_size(MOVMuxCencContext* ctx, int size)
+{
+    size_t new_alloc_size;
+
+    if (ctx->auxiliary_info_size + size > ctx->auxiliary_info_alloc_size) {
+        new_alloc_size = FFMAX(ctx->auxiliary_info_size + size, ctx->auxiliary_info_alloc_size * 2);
+        if (av_reallocp(&ctx->auxiliary_info, new_alloc_size)) {
+            return AVERROR(ENOMEM);
+        }
+
+        ctx->auxiliary_info_alloc_size = new_alloc_size;
+    }
+
+    return 0;
+}
+
+static int auxiliary_info_write(MOVMuxCencContext* ctx,
+                                         const uint8_t *buf_in, int size)
+{
+    int ret;
+
+    ret = auxiliary_info_alloc_size(ctx, size);
+    if (ret) {
+        return ret;
+    }
+    memcpy(ctx->auxiliary_info + ctx->auxiliary_info_size, buf_in, size);
+    ctx->auxiliary_info_size += size;
+
+    return 0;
+}
+
+static int auxiliary_info_add_subsample(MOVMuxCencContext* ctx,
+    uint16_t clear_bytes, uint32_t encrypted_bytes)
+{
+    uint8_t* p;
+    int ret;
+
+    if (!ctx->use_subsamples) {
+        return 0;
+    }
+
+    ret = auxiliary_info_alloc_size(ctx, 6);
+    if (ret) {
+        return ret;
+    }
+
+    p = ctx->auxiliary_info + ctx->auxiliary_info_size;
+
+    AV_WB16(p, clear_bytes);
+    p += sizeof(uint16_t);
+
+    AV_WB32(p, encrypted_bytes);
+
+    ctx->auxiliary_info_size += 6;
+    ctx->subsample_count++;
+
+    return 0;
+}
+
+/**
+ * Encrypt the input buffer and write using avio_write
+ */
+static void mov_cenc_write_encrypted(MOVMuxCencContext* ctx, AVIOContext *pb,
+                                     const uint8_t *buf_in, int size)
+{
+    uint8_t chunk[4096];
+    const uint8_t* cur_pos = buf_in;
+    int size_left = size;
+    int cur_size;
+
+    while (size_left > 0) {
+        cur_size = FFMIN(size_left, sizeof(chunk));
+        av_aes_ctr_crypt(ctx->aes_ctr, chunk, cur_pos, cur_size);
+        avio_write(pb, chunk, cur_size);
+        cur_pos += cur_size;
+        size_left -= cur_size;
+    }
+}
+
+/**
+ * Start writing a packet
+ */
+static int mov_cenc_start_packet(MOVMuxCencContext* ctx)
+{
+    int ret;
+
+    /* write the iv */
+    ret = auxiliary_info_write(ctx, av_aes_ctr_get_iv(ctx->aes_ctr), AES_CTR_IV_SIZE);
+    if (ret) {
+        return ret;
+    }
+
+    if (!ctx->use_subsamples) {
+        return 0;
+    }
+
+    /* write a zero subsample count */
+    ctx->auxiliary_info_subsample_start = ctx->auxiliary_info_size;
+    ctx->subsample_count = 0;
+    ret = auxiliary_info_write(ctx, (uint8_t*)&ctx->subsample_count, sizeof(ctx->subsample_count));
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
+/**
+ * Finalize a packet
+ */
+static int mov_cenc_end_packet(MOVMuxCencContext* ctx)
+{
+    size_t new_alloc_size;
+
+    av_aes_ctr_increment_iv(ctx->aes_ctr);
+
+    if (!ctx->use_subsamples) {
+        ctx->auxiliary_info_entries++;
+        return 0;
+    }
+
+    /* add the auxiliary info entry size*/
+    if (ctx->auxiliary_info_entries >= ctx->auxiliary_info_sizes_alloc_size) {
+        new_alloc_size = ctx->auxiliary_info_entries * 2 + 1;
+        if (av_reallocp(&ctx->auxiliary_info_sizes, new_alloc_size)) {
+            return AVERROR(ENOMEM);
+        }
+
+        ctx->auxiliary_info_sizes_alloc_size = new_alloc_size;
+    }
+    ctx->auxiliary_info_sizes[ctx->auxiliary_info_entries] =
+        AES_CTR_IV_SIZE + ctx->auxiliary_info_size - ctx->auxiliary_info_subsample_start;
+    ctx->auxiliary_info_entries++;
+
+    /* update the subsample count*/
+    AV_WB16(ctx->auxiliary_info + ctx->auxiliary_info_subsample_start, ctx->subsample_count);
+
+    return 0;
+}
+
+int ff_mov_cenc_write_packet(MOVMuxCencContext* ctx, AVIOContext *pb,
+                          const uint8_t *buf_in, int size)
+{
+    int ret;
+
+    ret = mov_cenc_start_packet(ctx);
+    if (ret) {
+        return ret;
+    }
+
+    ret = auxiliary_info_add_subsample(ctx, 0, size);
+    if (ret) {
+        return ret;
+    }
+
+    mov_cenc_write_encrypted(ctx, pb, buf_in, size);
+
+    ret = mov_cenc_end_packet(ctx);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
+int ff_mov_cenc_avc_parse_nal_units(MOVMuxCencContext* ctx, AVIOContext *pb,
+                                 const uint8_t *buf_in, int size)
+{
+    const uint8_t *p = buf_in;
+    const uint8_t *end = p + size;
+    const uint8_t *nal_start, *nal_end;
+    int ret;
+
+    ret = mov_cenc_start_packet(ctx);
+    if (ret) {
+        return ret;
+    }
+
+    size = 0;
+    nal_start = ff_avc_find_startcode(p, end);
+    for (;;) {
+        while (nal_start < end && !*(nal_start++));
+        if (nal_start == end)
+            break;
+
+        nal_end = ff_avc_find_startcode(nal_start, end);
+
+        avio_wb32(pb, nal_end - nal_start);
+        avio_w8(pb, *nal_start);
+        mov_cenc_write_encrypted(ctx, pb, nal_start + 1, nal_end - nal_start - 1);
+
+        auxiliary_info_add_subsample(ctx, 5, nal_end - nal_start - 1);
+
+        size += 4 + nal_end - nal_start;
+        nal_start = nal_end;
+    }
+
+    ret = mov_cenc_end_packet(ctx);
+    if (ret) {
+        return ret;
+    }
+
+    return size;
+}
+
+int ff_mov_cenc_avc_write_nal_units(AVFormatContext *s, MOVMuxCencContext* ctx,
+    int nal_length_size, AVIOContext *pb, const uint8_t *buf_in, int size)
+{
+    int nalsize;
+    int ret;
+    int j;
+
+    ret = mov_cenc_start_packet(ctx);
+    if (ret) {
+        return ret;
+    }
+
+    while (size > 0) {
+        /* parse the nal size */
+        if (size < nal_length_size + 1) {
+            av_log(s, AV_LOG_ERROR, "CENC-AVC: remaining size %d smaller than nal length+type %d\n",
+                size, nal_length_size + 1);
+            return -1;
+        }
+
+        avio_write(pb, buf_in, nal_length_size + 1);
+
+        nalsize = 0;
+        for (j = 0; j < nal_length_size; j++) {
+            nalsize = (nalsize << 8) | *buf_in++;
+        }
+        size -= nal_length_size;
+
+        /* encrypt the nal body */
+        if (nalsize <= 0 || nalsize > size) {
+            av_log(s, AV_LOG_ERROR, "CENC-AVC: nal size %d remaining %d\n", nalsize, size);
+            return -1;
+        }
+
+        mov_cenc_write_encrypted(ctx, pb, buf_in + 1, nalsize - 1);
+        buf_in += nalsize;
+        size -= nalsize;
+
+        auxiliary_info_add_subsample(ctx, nal_length_size + 1, nalsize - 1);
+    }
+
+    ret = mov_cenc_end_packet(ctx);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
+/* TODO: reuse this function from movenc.c */
+static int64_t update_size(AVIOContext *pb, int64_t pos)
+{
+    int64_t curpos = avio_tell(pb);
+    avio_seek(pb, pos, SEEK_SET);
+    avio_wb32(pb, curpos - pos); /* rewrite size */
+    avio_seek(pb, curpos, SEEK_SET);
+
+    return curpos - pos;
+}
+
+static int mov_cenc_write_senc_tag(MOVMuxCencContext* ctx, AVIOContext *pb,
+                                   int64_t* auxiliary_info_offset)
+{
+    int64_t pos = avio_tell(pb);
+
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "senc");
+    avio_wb32(pb, ctx->use_subsamples ? 0x02 : 0); /* version & flags */
+    avio_wb32(pb, ctx->auxiliary_info_entries); /* entry count */
+    *auxiliary_info_offset = avio_tell(pb);
+    avio_write(pb, ctx->auxiliary_info, ctx->auxiliary_info_size);
+    return update_size(pb, pos);
+}
+
+static int mov_cenc_write_saio_tag(AVIOContext *pb, int64_t auxiliary_info_offset)
+{
+    int64_t pos = avio_tell(pb);
+    uint8_t version;
+
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "saio");
+    version = auxiliary_info_offset > 0xffffffff ? 1 : 0;
+    avio_w8(pb, version);
+    avio_wb24(pb, 0); /* flags */
+    avio_wb32(pb, 1); /* entry count */
+    if (version) {
+        avio_wb64(pb, auxiliary_info_offset);
+    } else {
+        avio_wb32(pb, auxiliary_info_offset);
+    }
+    return update_size(pb, pos);
+}
+
+static int mov_cenc_write_saiz_tag(MOVMuxCencContext* ctx, AVIOContext *pb)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "saiz");
+    avio_wb32(pb, 0); /* version & flags */
+    avio_w8(pb, ctx->use_subsamples ? 0 : AES_CTR_IV_SIZE);    /* default size*/
+    avio_wb32(pb, ctx->auxiliary_info_entries); /* entry count */
+    if (ctx->use_subsamples) {
+        avio_write(pb, ctx->auxiliary_info_sizes, ctx->auxiliary_info_entries);
+    }
+    return update_size(pb, pos);
+}
+
+void ff_mov_cenc_write_stbl_atoms(MOVMuxCencContext* ctx, AVIOContext *pb)
+{
+    int64_t auxiliary_info_offset;
+
+    mov_cenc_write_senc_tag(ctx, pb, &auxiliary_info_offset);
+    mov_cenc_write_saio_tag(pb, auxiliary_info_offset);
+    mov_cenc_write_saiz_tag(ctx, pb);
+}
+
+static int mov_cenc_write_schi_tag(AVIOContext *pb, uint8_t* kid)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0);     /* size */
+    ffio_wfourcc(pb, "schi");
+
+    avio_wb32(pb, 32);    /* size */
+    ffio_wfourcc(pb, "tenc");
+    avio_wb32(pb, 0);     /* version & flags */
+    avio_wb24(pb, 1);     /* is encrypted */
+    avio_w8(pb, AES_CTR_IV_SIZE); /* iv size */
+    avio_write(pb, kid, CENC_KID_SIZE);
+
+    return update_size(pb, pos);
+}
+
+int ff_mov_cenc_write_sinf_tag(MOVTrack* track, AVIOContext *pb, uint8_t* kid)
+{
+    int64_t pos = avio_tell(pb);
+    avio_wb32(pb, 0); /* size */
+    ffio_wfourcc(pb, "sinf");
+
+    /* frma */
+    avio_wb32(pb, 12);    /* size */
+    ffio_wfourcc(pb, "frma");
+    avio_wl32(pb, track->tag);
+
+    /* schm */
+    avio_wb32(pb, 20);    /* size */
+    ffio_wfourcc(pb, "schm");
+    avio_wb32(pb, 0); /* version & flags */
+    ffio_wfourcc(pb, "cenc");    /* scheme type*/
+    avio_wb32(pb, 0x10000); /* scheme version */
+
+    /* schi */
+    mov_cenc_write_schi_tag(pb, kid);
+
+    return update_size(pb, pos);
+}
+
+int ff_mov_cenc_init(MOVMuxCencContext* ctx, uint8_t* encryption_key,
+                     int use_subsamples, int bitexact)
+{
+    int ret;
+
+    ctx->aes_ctr = av_aes_ctr_alloc();
+    if (!ctx->aes_ctr) {
+        return AVERROR(ENOMEM);
+    }
+
+    ret = av_aes_ctr_init(ctx->aes_ctr, encryption_key);
+    if (ret != 0) {
+        return ret;
+    }
+
+    if (!bitexact) {
+        av_aes_ctr_set_random_iv(ctx->aes_ctr);
+    }
+
+    ctx->use_subsamples = use_subsamples;
+
+    return 0;
+}
+
+void ff_mov_cenc_free(MOVMuxCencContext* ctx)
+{
+    av_aes_ctr_free(ctx->aes_ctr);
+}
diff --git a/libavformat/movenccenc.h b/libavformat/movenccenc.h
new file mode 100644
index 00000000..6f9e70e9
--- /dev/null
+++ b/libavformat/movenccenc.h
@@ -0,0 +1,86 @@
+/*
+ * MOV CENC (Common Encryption) writer
+ * Copyright (c) 2015 Eran Kornblau <erankor at gmail dot com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_MOVENCCENC_H
+#define AVFORMAT_MOVENCCENC_H
+
+#include "libavutil/aes_ctr.h"
+#include "avformat.h"
+#include "avio.h"
+
+#define CENC_KID_SIZE (16)
+
+struct MOVTrack;
+
+typedef struct {
+    struct AVAESCTR* aes_ctr;
+    uint8_t* auxiliary_info;
+    size_t auxiliary_info_size;
+    size_t auxiliary_info_alloc_size;
+    uint32_t auxiliary_info_entries;
+
+    /* subsample support */
+    int use_subsamples;
+    uint16_t subsample_count;
+    size_t auxiliary_info_subsample_start;
+    uint8_t* auxiliary_info_sizes;
+    size_t  auxiliary_info_sizes_alloc_size;
+} MOVMuxCencContext;
+
+/**
+ * Initialize a CENC context
+ * @param key encryption key, must have a length of AES_CTR_KEY_SIZE
+ * @param use_subsamples when enabled parts of a packet can be encrypted, otherwise the whole packet is encrypted
+ */
+int ff_mov_cenc_init(MOVMuxCencContext* ctx, uint8_t* encryption_key, int use_subsamples, int bitexact);
+
+/**
+ * Free a CENC context
+ */
+void ff_mov_cenc_free(MOVMuxCencContext* ctx);
+
+/**
+ * Write a fully encrypted packet
+ */
+int ff_mov_cenc_write_packet(MOVMuxCencContext* ctx, AVIOContext *pb, const uint8_t *buf_in, int size);
+
+/**
+ * Parse AVC NAL units from annex B format, the nal size and type are written in the clear while the body is encrypted
+ */
+int ff_mov_cenc_avc_parse_nal_units(MOVMuxCencContext* ctx, AVIOContext *pb, const uint8_t *buf_in, int size);
+
+/**
+ * Write AVC NAL units that are in MP4 format, the nal size and type are written in the clear while the body is encrypted
+ */
+int ff_mov_cenc_avc_write_nal_units(AVFormatContext *s, MOVMuxCencContext* ctx, int nal_length_size,
+    AVIOContext *pb, const uint8_t *buf_in, int size);
+
+/**
+ * Write the cenc atoms that should reside inside stbl
+ */
+void ff_mov_cenc_write_stbl_atoms(MOVMuxCencContext* ctx, AVIOContext *pb);
+
+/**
+ * Write the sinf atom, contained inside stsd
+ */
+int ff_mov_cenc_write_sinf_tag(struct MOVTrack* track, AVIOContext *pb, uint8_t* kid);
+
+#endif /* AVFORMAT_MOVENCCENC_H */
diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index 07d7f543..c76b21eb 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -42,6 +42,9 @@
 
 #define XING_TOC_COUNT 100
 
+#define SAME_HEADER_MASK \
+   (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19))
+
 typedef struct {
     AVClass *class;
     int64_t filesize;
@@ -54,20 +57,16 @@ typedef struct {
     int is_cbr;
 } MP3DecContext;
 
-static int check(AVFormatContext *s, int64_t pos);
+static int check(AVIOContext *pb, int64_t pos, uint32_t *header);
 
 /* mp3 read */
 
 static int mp3_read_probe(AVProbeData *p)
 {
     int max_frames, first_frames = 0;
-    int fsize, frames;
+    int frames, ret;
     uint32_t header;
     const uint8_t *buf, *buf0, *buf2, *end;
-    AVCodecContext *avctx = avcodec_alloc_context3(NULL);
-
-    if (!avctx)
-        return AVERROR(ENOMEM);
 
     buf0 = p->buf;
     end = p->buf + p->buf_size - sizeof(uint32_t);
@@ -79,26 +78,22 @@ static int mp3_read_probe(AVProbeData *p)
 
     for(; buf < end; buf= buf2+1) {
         buf2 = buf;
-        if(ff_mpa_check_header(AV_RB32(buf2)))
-            continue;
-
         for(frames = 0; buf2 < end; frames++) {
-            int dummy;
+            MPADecodeHeader h;
+
             header = AV_RB32(buf2);
-            fsize = avpriv_mpa_decode_header(avctx, header,
-                                             &dummy, &dummy, &dummy, &dummy);
-            if(fsize < 0)
+            ret = avpriv_mpegaudio_decode_header(&h, header);
+            if (ret != 0)
                 break;
-            buf2 += fsize;
+            buf2 += h.frame_size;
         }
         max_frames = FFMAX(max_frames, frames);
         if(buf == buf0)
             first_frames= frames;
     }
-    avcodec_free_context(&avctx);
     // keep this in sync with ac3 probe, both need to avoid
     // issues with MPEG-files!
-    if   (first_frames>=4) return AVPROBE_SCORE_EXTENSION + 1;
+    if   (first_frames>=7) return AVPROBE_SCORE_EXTENSION + 1;
     else if(max_frames>200)return AVPROBE_SCORE_EXTENSION;
     else if(max_frames>=4 && max_frames >= p->buf_size/10000) return AVPROBE_SCORE_EXTENSION / 2;
     else if(ff_id3v2_match(buf0, ID3v2_DEFAULT_MAGIC) && 2*ff_id3v2_tag_len(buf0) >= p->buf_size)
@@ -112,7 +107,8 @@ static void read_xing_toc(AVFormatContext *s, int64_t filesize, int64_t duration
 {
     int i;
     MP3DecContext *mp3 = s->priv_data;
-    int fill_index = mp3->usetoc == 1 && duration > 0;
+    int fast_seek = s->flags & AVFMT_FLAG_FAST_SEEK;
+    int fill_index = (mp3->usetoc || fast_seek) && duration > 0;
 
     if (!filesize &&
         !(filesize = avio_size(s->pb))) {
@@ -298,14 +294,16 @@ static int mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
     MPADecodeHeader c;
     int vbrtag_size = 0;
     MP3DecContext *mp3 = s->priv_data;
+    int ret;
 
     ffio_init_checksum(s->pb, ff_crcA001_update, 0);
 
     v = avio_rb32(s->pb);
-    if(ff_mpa_check_header(v) < 0)
-      return -1;
 
-    if (avpriv_mpegaudio_decode_header(&c, v) == 0)
+    ret = avpriv_mpegaudio_decode_header(&c, v);
+    if (ret < 0)
+        return ret;
+    else if (ret == 0)
         vbrtag_size = c.frame_size;
     if(c.layer != 3)
         return -1;
@@ -341,9 +339,6 @@ static int mp3_read_header(AVFormatContext *s)
     int ret;
     int i;
 
-    if (mp3->usetoc < 0)
-        mp3->usetoc = (s->flags & AVFMT_FLAG_FAST_SEEK) ? 0 : 2;
-
     st = avformat_new_stream(s, NULL);
     if (!st)
         return AVERROR(ENOMEM);
@@ -374,12 +369,21 @@ static int mp3_read_header(AVFormatContext *s)
 
     off = avio_tell(s->pb);
     for (i = 0; i < 64 * 1024; i++) {
+        uint32_t header, header2;
+        int frame_size;
         if (!(i&1023))
             ffio_ensure_seekback(s->pb, i + 1024 + 4);
-        if (check(s, off + i) >= 0) {
-            av_log(s, AV_LOG_INFO, "Skipping %d bytes of junk at %lld.\n", i, (long long)off);
-            avio_seek(s->pb, off + i, SEEK_SET);
-            break;
+        frame_size = check(s->pb, off + i, &header);
+        if (frame_size > 0) {
+            avio_seek(s->pb, off, SEEK_SET);
+            ffio_ensure_seekback(s->pb, i + 1024 + frame_size + 4);
+            if (check(s->pb, off + i + frame_size, &header2) >= 0 &&
+                (header & SAME_HEADER_MASK) == (header2 & SAME_HEADER_MASK))
+            {
+                av_log(s, AV_LOG_INFO, "Skipping %d bytes of junk at %"PRId64".\n", i, off);
+                avio_seek(s->pb, off + i, SEEK_SET);
+                break;
+            }
         }
         avio_seek(s->pb, off, SEEK_SET);
     }
@@ -415,74 +419,47 @@ static int mp3_read_packet(AVFormatContext *s, AVPacket *pkt)
     pkt->flags &= ~AV_PKT_FLAG_CORRUPT;
     pkt->stream_index = 0;
 
-    if (ret >= ID3v1_TAG_SIZE &&
-        memcmp(&pkt->data[ret - ID3v1_TAG_SIZE], "TAG", 3) == 0)
-        ret -= ID3v1_TAG_SIZE;
-
-    /* note: we need to modify the packet size here to handle the last
-       packet */
-    pkt->size = ret;
     return ret;
 }
 
-static int check(AVFormatContext *s, int64_t pos)
+#define SEEK_WINDOW 4096
+
+static int check(AVIOContext *pb, int64_t pos, uint32_t *ret_header)
 {
-    int64_t ret = avio_seek(s->pb, pos, SEEK_SET);
+    int64_t ret = avio_seek(pb, pos, SEEK_SET);
     unsigned header;
     MPADecodeHeader sd;
     if (ret < 0)
         return ret;
-    header = avio_rb32(s->pb);
+
+    header = avio_rb32(pb);
     if (ff_mpa_check_header(header) < 0)
         return -1;
     if (avpriv_mpegaudio_decode_header(&sd, header) == 1)
         return -1;
+
+    if (ret_header)
+        *ret_header = header;
     return sd.frame_size;
 }
 
-static int mp3_seek(AVFormatContext *s, int stream_index, int64_t timestamp,
-                    int flags)
+static int64_t mp3_sync(AVFormatContext *s, int64_t target_pos, int flags)
 {
-    MP3DecContext *mp3 = s->priv_data;
-    AVIndexEntry *ie, ie1;
-    AVStream *st = s->streams[0];
-    int64_t ret  = av_index_search_timestamp(st, timestamp, flags);
-    int i, j;
     int dir = (flags&AVSEEK_FLAG_BACKWARD) ? -1 : 1;
     int64_t best_pos;
-    int best_score;
-
-    if (mp3->usetoc == 2)
-        return -1; // generic index code
-
-    if (   mp3->is_cbr
-        && (mp3->usetoc == 0 || !mp3->xing_toc)
-        && st->duration > 0
-        && mp3->header_filesize > s->internal->data_offset
-        && mp3->frames) {
-        ie = &ie1;
-        timestamp = av_clip64(timestamp, 0, st->duration);
-        ie->timestamp = timestamp;
-        ie->pos       = av_rescale(timestamp, mp3->header_filesize, st->duration) + s->internal->data_offset;
-    } else if (mp3->xing_toc) {
-        if (ret < 0)
-            return ret;
-
-        ie = &st->index_entries[ret];
-    } else {
-        return -1;
-    }
+    int best_score, i, j;
+    int64_t ret;
 
-    avio_seek(s->pb, FFMAX(ie->pos - 4096, 0), SEEK_SET);
-    ret = avio_seek(s->pb, ie->pos, SEEK_SET);
+    avio_seek(s->pb, FFMAX(target_pos - SEEK_WINDOW, 0), SEEK_SET);
+    ret = avio_seek(s->pb, target_pos, SEEK_SET);
     if (ret < 0)
         return ret;
 
 #define MIN_VALID 3
-    best_pos = ie->pos;
+    best_pos = target_pos;
     best_score = 999;
-    for(i=0; i<4096; i++) {
-        int64_t pos = ie->pos + (dir > 0 ? i - 1024 : -i);
+    for(i=0; i<SEEK_WINDOW; i++) {
+        int64_t pos = target_pos + (dir > 0 ? i - SEEK_WINDOW/4 : -i);
         int64_t candidate = -1;
         int score = 999;
 
@@ -490,10 +467,10 @@ static int mp3_seek(AVFormatContext *s, int stream_index, int64_t timestamp,
             continue;
 
         for(j=0; j<MIN_VALID; j++) {
-            ret = check(s, pos);
+            ret = check(s->pb, pos, NULL);
             if(ret < 0)
                 break;
-            if ((ie->pos - pos)*dir <= 0 && abs(MIN_VALID/2-j) < score) {
+            if ((target_pos - pos)*dir <= 0 && abs(MIN_VALID/2-j) < score) {
                 candidate = pos;
                 score = abs(MIN_VALID/2-j);
             }
@@ -507,11 +484,53 @@ static int mp3_seek(AVFormatContext *s, int stream_index, int64_t timestamp,
         }
     }
 
-    ret = avio_seek(s->pb, best_pos, SEEK_SET);
-    if (ret < 0)
-        return ret;
+    return avio_seek(s->pb, best_pos, SEEK_SET);
+}
+
+static int mp3_seek(AVFormatContext *s, int stream_index, int64_t timestamp,
+                    int flags)
+{
+    MP3DecContext *mp3 = s->priv_data;
+    AVIndexEntry *ie, ie1;
+    AVStream *st = s->streams[0];
+    int64_t best_pos;
+    int fast_seek = s->flags & AVFMT_FLAG_FAST_SEEK;
+    int64_t filesize = mp3->header_filesize;
+
+    if (filesize <= 0) {
+        int64_t size = avio_size(s->pb);
+        if (size > 0 && size > s->internal->data_offset)
+            filesize = size - s->internal->data_offset;
+    }
+
+    if (mp3->xing_toc && (mp3->usetoc || (fast_seek && !mp3->is_cbr))) {
+        int64_t ret = av_index_search_timestamp(st, timestamp, flags);
+
+        // NOTE: The MP3 TOC is not a precise lookup table. Accuracy is worse
+        // for bigger files.
+        av_log(s, AV_LOG_WARNING, "Using MP3 TOC to seek; may be imprecise.\n");
+
+        if (ret < 0)
+            return ret;
+
+        ie = &st->index_entries[ret];
+    } else if (fast_seek && st->duration > 0 && filesize > 0) {
+        if (!mp3->is_cbr)
+            av_log(s, AV_LOG_WARNING, "Using scaling to seek VBR MP3; may be imprecise.\n");
+
+        ie = &ie1;
+        timestamp = av_clip64(timestamp, 0, st->duration);
+        ie->timestamp = timestamp;
+        ie->pos       = av_rescale(timestamp, filesize, st->duration) + s->internal->data_offset;
+    } else {
+        return -1; // generic index code
+    }
+
+    best_pos = mp3_sync(s, ie->pos, flags);
+    if (best_pos < 0)
+        return best_pos;
 
-    if (mp3->is_cbr && ie == &ie1) {
+    if (mp3->is_cbr && ie == &ie1 && mp3->frames) {
         int frame_duration = av_rescale(st->duration, 1, mp3->frames);
         ie1.timestamp = frame_duration * av_rescale(best_pos - s->internal->data_offset, mp3->frames, mp3->header_filesize);
     }
@@ -521,7 +540,7 @@ static int mp3_seek(AVFormatContext *s, int stream_index, int64_t timestamp,
 }
 
 static const AVOption options[] = {
-    { "usetoc", "use table of contents", offsetof(MP3DecContext, usetoc), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 2, AV_OPT_FLAG_DECODING_PARAM},
+    { "usetoc", "use table of contents", offsetof(MP3DecContext, usetoc), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM},
     { NULL },
 };
 
diff --git a/libavformat/mp3enc.c b/libavformat/mp3enc.c
index d4b6af02..71f5178a 100644
--- a/libavformat/mp3enc.c
+++ b/libavformat/mp3enc.c
@@ -192,7 +192,8 @@ static int mp3_write_xing(AVFormatContext *s)
             return -1;
         header |= mask;
 
-        avpriv_mpegaudio_decode_header(&mpah, header);
+        ret = avpriv_mpegaudio_decode_header(&mpah, header);
+        av_assert0(ret >= 0);
         mp3->xing_offset = xing_offtbl[mpah.lsf == 1][mpah.nb_channels == 1] + 4;
         bytes_needed     = mp3->xing_offset + XING_SIZE;
 
@@ -309,12 +310,13 @@ static int mp3_write_audio_packet(AVFormatContext *s, AVPacket *pkt)
 
     if (pkt->data && pkt->size >= 4) {
         MPADecodeHeader mpah;
+        int ret;
         int av_unused base;
         uint32_t h;
 
         h = AV_RB32(pkt->data);
-        if (ff_mpa_check_header(h) == 0) {
-            avpriv_mpegaudio_decode_header(&mpah, h);
+        ret = avpriv_mpegaudio_decode_header(&mpah, h);
+        if (ret >= 0) {
             if (!mp3->initial_bitrate)
                 mp3->initial_bitrate = mpah.bit_rate;
             if ((mpah.bit_rate == 0) || (mp3->initial_bitrate != mpah.bit_rate))
@@ -365,7 +367,7 @@ static int mp3_queue_flush(AVFormatContext *s)
     while ((pktl = mp3->queue)) {
         if (write && (ret = mp3_write_audio_packet(s, &pktl->pkt)) < 0)
             write = 0;
-        av_free_packet(&pktl->pkt);
+        av_packet_unref(&pktl->pkt);
         mp3->queue = pktl->next;
         av_freep(&pktl);
     }
@@ -485,9 +487,9 @@ static const AVOption options[] = {
     { "id3v2_version", "Select ID3v2 version to write. Currently 3 and 4 are supported.",
       offsetof(MP3Context, id3v2_version), AV_OPT_TYPE_INT, {.i64 = 4}, 0, 4, AV_OPT_FLAG_ENCODING_PARAM},
     { "write_id3v1", "Enable ID3v1 writing. ID3v1 tags are written in UTF-8 which may not be supported by most software.",
-      offsetof(MP3Context, write_id3v1), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
+      offsetof(MP3Context, write_id3v1), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
     { "write_xing",  "Write the Xing header containing file duration.",
-      offsetof(MP3Context, write_xing),  AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
+      offsetof(MP3Context, write_xing),  AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
     { NULL },
 };
 
diff --git a/libavformat/mpc.c b/libavformat/mpc.c
index a62a3f27..d0c1b85a 100644
--- a/libavformat/mpc.c
+++ b/libavformat/mpc.c
@@ -166,7 +166,7 @@ static int mpc_read_packet(AVFormatContext *s, AVPacket *pkt)
     if(c->curbits)
         avio_seek(s->pb, -4, SEEK_CUR);
     if(ret < size){
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return ret < 0 ? ret : AVERROR(EIO);
     }
     pkt->size = ret + 4;
@@ -217,7 +217,7 @@ static int mpc_read_seek(AVFormatContext *s, int stream_index, int64_t timestamp
             c->curframe = lastframe;
             return ret;
         }
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
     }
     return 0;
 }
diff --git a/libavformat/mpc8.c b/libavformat/mpc8.c
index 0cef65ff..bf597b88 100644
--- a/libavformat/mpc8.c
+++ b/libavformat/mpc8.c
@@ -154,7 +154,7 @@ static void mpc8_parse_seektable(AVFormatContext *s, int64_t off)
         av_log(s, AV_LOG_ERROR, "Bad seek table size\n");
         return;
     }
-    if(!(buf = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE)))
+    if(!(buf = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE)))
         return;
     ret = avio_read(s->pb, buf, size);
     if (ret != size) {
@@ -162,7 +162,7 @@ static void mpc8_parse_seektable(AVFormatContext *s, int64_t off)
         av_free(buf);
         return;
     }
-    memset(buf+size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(buf+size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
     init_get_bits(&gb, buf, size * 8);
     size = gb_get_v(&gb);
diff --git a/libavformat/mpeg.c b/libavformat/mpeg.c
index edb134f7..69685cf8 100644
--- a/libavformat/mpeg.c
+++ b/libavformat/mpeg.c
@@ -256,7 +256,7 @@ static int mpegps_read_pes_header(AVFormatContext *s,
         if (avio_feof(s->pb))
             return AVERROR_EOF;
         // FIXME we should remember header_state
-        return AVERROR(EAGAIN);
+        return FFERROR_REDO;
     }
 
     if (startcode == PACK_START_CODE)
@@ -612,7 +612,7 @@ static int mpegps_read_packet(AVFormatContext *s,
     if (st->discard >= AVDISCARD_ALL)
         goto skip;
     if (startcode >= 0xa0 && startcode <= 0xaf) {
-      if (lpcm_header_len == 6 && st->codec->codec_id == AV_CODEC_ID_MLP) {
+      if (st->codec->codec_id == AV_CODEC_ID_MLP) {
             if (len < 6)
                 goto skip;
             avio_skip(s->pb, 6);
@@ -857,7 +857,8 @@ static int vobsub_read_header(AVFormatContext *s)
 
     for (i = 0; i < s->nb_streams; i++) {
         vobsub->q[i].sort = SUB_SORT_POS_TS;
-        ff_subtitles_queue_finalize(&vobsub->q[i]);
+        vobsub->q[i].keep_duplicates = 1;
+        ff_subtitles_queue_finalize(s, &vobsub->q[i]);
     }
 
     if (!av_bprint_is_complete(&header)) {
@@ -939,7 +940,7 @@ static int vobsub_read_packet(AVFormatContext *s, AVPacket *pkt)
         total_read += pkt_size;
 
         /* the current chunk doesn't match the stream index (unlikely) */
-        if ((startcode & 0x1f) != idx_pkt.stream_index)
+        if ((startcode & 0x1f) != s->streams[idx_pkt.stream_index]->id)
             break;
 
         ret = av_grow_packet(pkt, to_read);
@@ -955,12 +956,12 @@ static int vobsub_read_packet(AVFormatContext *s, AVPacket *pkt)
     pkt->pos = idx_pkt.pos;
     pkt->stream_index = idx_pkt.stream_index;
 
-    av_free_packet(&idx_pkt);
+    av_packet_unref(&idx_pkt);
     return 0;
 
 fail:
-    av_free_packet(pkt);
-    av_free_packet(&idx_pkt);
+    av_packet_unref(pkt);
+    av_packet_unref(&idx_pkt);
     return ret;
 }
 
diff --git a/libavformat/mpegenc.c b/libavformat/mpegenc.c
index 2520f49d..2e095495 100644
--- a/libavformat/mpegenc.c
+++ b/libavformat/mpegenc.c
@@ -338,6 +338,8 @@ static av_cold int mpeg_mux_init(AVFormatContext *ctx)
     lpcm_id = LPCM_ID;
 
     for (i = 0; i < ctx->nb_streams; i++) {
+        AVCPBProperties *props;
+
         st     = ctx->streams[i];
         stream = av_mallocz(sizeof(StreamInfo));
         if (!stream)
@@ -389,8 +391,10 @@ static av_cold int mpeg_mux_init(AVFormatContext *ctx)
                 stream->id = h264_id++;
             else
                 stream->id = mpv_id++;
-            if (st->codec->rc_buffer_size)
-                stream->max_buffer_size = 6 * 1024 + st->codec->rc_buffer_size / 8;
+
+            props = (AVCPBProperties*)av_stream_get_side_data(st, AV_PKT_DATA_CPB_PROPERTIES, NULL);
+            if (props && props->buffer_size)
+                stream->max_buffer_size = 6 * 1024 + props->buffer_size / 8;
             else {
                 av_log(ctx, AV_LOG_WARNING,
                        "VBV buffer size not set, using default size of 130KB\n"
@@ -410,7 +414,9 @@ static av_cold int mpeg_mux_init(AVFormatContext *ctx)
             stream->max_buffer_size = 16 * 1024;
             break;
         default:
-            return -1;
+            av_log(ctx, AV_LOG_ERROR, "Invalid media type %s for output stream #%d\n",
+                   av_get_media_type_string(st->codec->codec_type), i);
+            return AVERROR(EINVAL);
         }
         stream->fifo = av_fifo_alloc(16);
         if (!stream->fifo)
@@ -420,13 +426,14 @@ static av_cold int mpeg_mux_init(AVFormatContext *ctx)
     audio_bitrate = 0;
     video_bitrate = 0;
     for (i = 0; i < ctx->nb_streams; i++) {
+        AVCPBProperties *props;
         int codec_rate;
         st     = ctx->streams[i];
         stream = (StreamInfo *)st->priv_data;
 
-        if (st->codec->rc_max_rate ||
-            st->codec->codec_type == AVMEDIA_TYPE_VIDEO)
-            codec_rate = st->codec->rc_max_rate;
+        props = (AVCPBProperties*)av_stream_get_side_data(st, AV_PKT_DATA_CPB_PROPERTIES, NULL);
+        if (props)
+            codec_rate = props->max_bitrate;
         else
             codec_rate = st->codec->bit_rate;
 
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index aeb2335e..22874e6f 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/buffer.h"
 #include "libavutil/crc.h"
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/log.h"
 #include "libavutil/dict.h"
@@ -161,19 +162,19 @@ struct MpegTSContext {
 };
 
 #define MPEGTS_OPTIONS \
-    { "resync_size",   "Size limit for looking up a new synchronization.", offsetof(MpegTSContext, resync_size), AV_OPT_TYPE_INT,  { .i64 =  MAX_RESYNC_SIZE}, 0, INT_MAX,  AV_OPT_FLAG_DECODING_PARAM }
+    { "resync_size",   "set size limit for looking up a new synchronization", offsetof(MpegTSContext, resync_size), AV_OPT_TYPE_INT,  { .i64 =  MAX_RESYNC_SIZE}, 0, INT_MAX,  AV_OPT_FLAG_DECODING_PARAM }
 
 static const AVOption options[] = {
     MPEGTS_OPTIONS,
-    {"fix_teletext_pts", "Try to fix pts values of dvb teletext streams.", offsetof(MpegTSContext, fix_teletext_pts), AV_OPT_TYPE_INT,
+    {"fix_teletext_pts", "try to fix pts values of dvb teletext streams", offsetof(MpegTSContext, fix_teletext_pts), AV_OPT_TYPE_BOOL,
      {.i64 = 1}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
-    {"ts_packetsize", "Output option carrying the raw packet size.", offsetof(MpegTSContext, raw_packet_size), AV_OPT_TYPE_INT,
+    {"ts_packetsize", "output option carrying the raw packet size", offsetof(MpegTSContext, raw_packet_size), AV_OPT_TYPE_INT,
      {.i64 = 0}, 0, 0, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
-    {"scan_all_pmts",   "Scan and combine all PMTs", offsetof(MpegTSContext, scan_all_pmts), AV_OPT_TYPE_INT,
+    {"scan_all_pmts",   "scan and combine all PMTs", offsetof(MpegTSContext, scan_all_pmts), AV_OPT_TYPE_BOOL,
      { .i64 =  -1}, -1, 1,  AV_OPT_FLAG_DECODING_PARAM },
-    {"skip_changes", "Skip changing / adding streams / programs.", offsetof(MpegTSContext, skip_changes), AV_OPT_TYPE_INT,
+    {"skip_changes", "skip changing / adding streams / programs", offsetof(MpegTSContext, skip_changes), AV_OPT_TYPE_BOOL,
      {.i64 = 0}, 0, 1, 0 },
-    {"skip_clear", "Skip clearing programs.", offsetof(MpegTSContext, skip_clear), AV_OPT_TYPE_INT,
+    {"skip_clear", "skip clearing programs", offsetof(MpegTSContext, skip_clear), AV_OPT_TYPE_BOOL,
      {.i64 = 0}, 0, 1, 0 },
     { NULL },
 };
@@ -187,10 +188,10 @@ static const AVClass mpegts_class = {
 
 static const AVOption raw_options[] = {
     MPEGTS_OPTIONS,
-    { "compute_pcr",   "Compute exact PCR for each transport stream packet.",
-          offsetof(MpegTSContext, mpeg2ts_compute_pcr), AV_OPT_TYPE_INT,
+    { "compute_pcr",   "compute exact PCR for each transport stream packet",
+          offsetof(MpegTSContext, mpeg2ts_compute_pcr), AV_OPT_TYPE_BOOL,
           { .i64 = 0 }, 0, 1,  AV_OPT_FLAG_DECODING_PARAM },
-    { "ts_packetsize", "Output option carrying the raw packet size.",
+    { "ts_packetsize", "output option carrying the raw packet size",
       offsetof(MpegTSContext, raw_packet_size), AV_OPT_TYPE_INT,
       { .i64 = 0 }, 0, 0,
       AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
@@ -698,6 +699,7 @@ static const StreamType ISO_types[] = {
 #endif
     { 0x1b, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_H264       },
     { 0x20, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_H264       },
+    { 0x21, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_JPEG2000   },
     { 0x24, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_HEVC       },
     { 0x42, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_CAVS       },
     { 0xd1, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_DIRAC      },
@@ -716,6 +718,7 @@ static const StreamType HDMV_types[] = {
     { 0xa1, AVMEDIA_TYPE_AUDIO,    AV_CODEC_ID_EAC3              }, /* E-AC3 Secondary Audio */
     { 0xa2, AVMEDIA_TYPE_AUDIO,    AV_CODEC_ID_DTS               }, /* DTS Express Secondary Audio */
     { 0x90, AVMEDIA_TYPE_SUBTITLE, AV_CODEC_ID_HDMV_PGS_SUBTITLE },
+    { 0x92, AVMEDIA_TYPE_SUBTITLE, AV_CODEC_ID_HDMV_TEXT_SUBTITLE },
     { 0 },
 };
 
@@ -800,6 +803,8 @@ static int mpegts_set_stream_info(AVStream *st, PESContext *pes,
     st->codec->codec_tag = pes->stream_type;
 
     mpegts_find_stream_type(st, pes->stream_type, ISO_types);
+    if (pes->stream_type == 4)
+        st->request_probe = 50;
     if ((prog_reg_desc == AV_RL32("HDMV") ||
          prog_reg_desc == AV_RL32("HDPR")) &&
         st->codec->codec_id == AV_CODEC_ID_NONE) {
@@ -835,6 +840,15 @@ static int mpegts_set_stream_info(AVStream *st, PESContext *pes,
         st->codec->codec_id  = old_codec_id;
         st->codec->codec_type = old_codec_type;
     }
+    if ((st->codec->codec_id == AV_CODEC_ID_NONE ||
+            (st->request_probe > 0 && st->request_probe < AVPROBE_SCORE_STREAM_RETRY / 5)) &&
+        !avcodec_is_open(st->codec) &&
+        st->probe_packets > 0 &&
+        stream_type == STREAM_TYPE_PRIVATE_DATA) {
+        st->codec->codec_type = AVMEDIA_TYPE_DATA;
+        st->codec->codec_id   = AV_CODEC_ID_BIN_DATA;
+        st->request_probe = AVPROBE_SCORE_STREAM_RETRY / 5;
+    }
 
     return 0;
 }
@@ -862,7 +876,7 @@ static void new_pes_packet(PESContext *pes, AVPacket *pkt)
         av_log(pes->stream, AV_LOG_WARNING, "PES packet size mismatch\n");
         pes->flags |= AV_PKT_FLAG_CORRUPT;
     }
-    memset(pkt->data + pkt->size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(pkt->data + pkt->size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
     // Separate out the AC3 substream from an HDMV combined TrueHD/AC3 PID
     if (pes->sub_st && pes->stream_type == 0x83 && pes->extended_stream_id == 0x76)
@@ -894,8 +908,8 @@ static int read_sl_header(PESContext *pes, SLConfigDescr *sl,
     int padding_flag = 0, padding_bits = 0, inst_bitrate_flag = 0;
     int dts_flag = -1, cts_flag = -1;
     int64_t dts = AV_NOPTS_VALUE, cts = AV_NOPTS_VALUE;
-    uint8_t buf_padded[128 + FF_INPUT_BUFFER_PADDING_SIZE];
-    int buf_padded_size = FFMIN(buf_size, sizeof(buf_padded) - FF_INPUT_BUFFER_PADDING_SIZE);
+    uint8_t buf_padded[128 + AV_INPUT_BUFFER_PADDING_SIZE];
+    int buf_padded_size = FFMIN(buf_size, sizeof(buf_padded) - AV_INPUT_BUFFER_PADDING_SIZE);
 
     memcpy(buf_padded, buf, buf_padded_size);
 
@@ -1027,7 +1041,7 @@ static int mpegts_push_data(MpegTSFilter *filter,
 
                     /* allocate pes buffer */
                     pes->buffer = av_buffer_alloc(pes->total_size +
-                                                  FF_INPUT_BUFFER_PADDING_SIZE);
+                                                  AV_INPUT_BUFFER_PADDING_SIZE);
                     if (!pes->buffer)
                         return AVERROR(ENOMEM);
 
@@ -1044,6 +1058,7 @@ static int mpegts_push_data(MpegTSFilter *filter,
                             pes->st->request_probe = 1;
                         }
                     } else {
+                        pes->pes_header_size = 6;
                         pes->state      = MPEGTS_PAYLOAD;
                         pes->data_index = 0;
                     }
@@ -1131,7 +1146,10 @@ static int mpegts_push_data(MpegTSFilter *filter,
                     p += 5;
                     buf_size -= 5;
                 }
-                if (pes->ts->fix_teletext_pts && pes->st->codec->codec_id == AV_CODEC_ID_DVB_TELETEXT) {
+                if (   pes->ts->fix_teletext_pts
+                    && (   pes->st->codec->codec_id == AV_CODEC_ID_DVB_TELETEXT
+                        || pes->st->codec->codec_id == AV_CODEC_ID_DVB_SUBTITLE)
+                    ) {
                     AVProgram *p = NULL;
                     while ((p = av_find_program_from_stream(pes->stream, p, pes->st->index))) {
                         if (p->pcr_pid != -1 && p->discard != AVDISCARD_ALL) {
@@ -1160,7 +1178,11 @@ static int mpegts_push_data(MpegTSFilter *filter,
                                     pes->st->pts_wrap_behavior = st->pts_wrap_behavior;
                                     if (pes->dts == AV_NOPTS_VALUE || pes->dts < pcr) {
                                         pes->pts = pes->dts = pcr;
-                                    } else if (pes->dts > pcr + 3654 + 9000) {
+                                    } else if (pes->st->codec->codec_id == AV_CODEC_ID_DVB_TELETEXT &&
+                                               pes->dts > pcr + 3654 + 9000) {
+                                        pes->pts = pes->dts = pcr + 3654 + 9000;
+                                    } else if (pes->st->codec->codec_id == AV_CODEC_ID_DVB_SUBTITLE &&
+                                               pes->dts > pcr + 10*90000) { //10sec
                                         pes->pts = pes->dts = pcr + 3654 + 9000;
                                     }
                                     break;
@@ -1178,7 +1200,7 @@ static int mpegts_push_data(MpegTSFilter *filter,
                     new_pes_packet(pes, ts->pkt);
                     pes->total_size = MAX_PES_PAYLOAD;
                     pes->buffer = av_buffer_alloc(pes->total_size +
-                                                  FF_INPUT_BUFFER_PADDING_SIZE);
+                                                  AV_INPUT_BUFFER_PADDING_SIZE);
                     if (!pes->buffer)
                         return AVERROR(ENOMEM);
                     ts->stop_parse = 1;
@@ -1524,14 +1546,7 @@ static void m4sl_cb(MpegTSFilter *filter, const uint8_t *section,
                 st->codec->extradata_size > 0)
                 st->need_parsing = 0;
 
-            if (st->codec->codec_id <= AV_CODEC_ID_NONE) {
-                // do nothing
-            } else if (st->codec->codec_id < AV_CODEC_ID_FIRST_AUDIO)
-                st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
-            else if (st->codec->codec_id < AV_CODEC_ID_FIRST_SUBTITLE)
-                st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
-            else if (st->codec->codec_id < AV_CODEC_ID_FIRST_UNKNOWN)
-                st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
+            st->codec->codec_type = avcodec_get_type(st->codec->codec_id);
         }
     }
     for (i = 0; i < mp4_descr_count; i++)
@@ -1635,9 +1650,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type
 
             if (language_count > 0) {
                 /* 4 bytes per language code (3 bytes) with comma or NUL byte should fit language buffer */
-                if (language_count > sizeof(language) / 4) {
-                    language_count = sizeof(language) / 4;
-                }
+                av_assert0(language_count <= sizeof(language) / 4);
 
                 if (st->codec->extradata == NULL) {
                     if (ff_alloc_extradata(st->codec, language_count * 2)) {
@@ -1687,9 +1700,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type
                 uint8_t *extradata;
 
                 /* 4 bytes per language code (3 bytes) with comma or NUL byte should fit language buffer */
-                if (language_count > sizeof(language) / 4) {
-                    language_count = sizeof(language) / 4;
-                }
+                av_assert0(language_count <= sizeof(language) / 4);
 
                 if (st->codec->extradata == NULL) {
                     if (ff_alloc_extradata(st->codec, language_count * 5)) {
@@ -1758,7 +1769,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type
     case 0x05: /* registration descriptor */
         st->codec->codec_tag = bytestream_get_le32(pp);
         av_log(fc, AV_LOG_TRACE, "reg_desc=%.4s\n", (char *)&st->codec->codec_tag);
-        if (st->codec->codec_id == AV_CODEC_ID_NONE)
+        if (st->codec->codec_id == AV_CODEC_ID_NONE || st->request_probe > 0)
             mpegts_find_stream_type(st, st->codec->codec_tag, REGD_types);
         break;
     case 0x52: /* stream identifier descriptor */
@@ -1781,7 +1792,7 @@ int ff_parse_mpeg2_descriptor(AVFormatContext *fc, AVStream *st, int stream_type
             ext_desc_tag == 0x80) { /* User defined (provisional Opus) */
             if (!st->codec->extradata) {
                 st->codec->extradata = av_mallocz(sizeof(opus_default_extradata) +
-                                                  FF_INPUT_BUFFER_PADDING_SIZE);
+                                                  AV_INPUT_BUFFER_PADDING_SIZE);
                 if (!st->codec->extradata)
                     return AVERROR(ENOMEM);
 
@@ -1949,7 +1960,7 @@ static void pmt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
 
         add_pid_to_pmt(ts, h->id, pid);
 
-        ff_program_add_stream_index(ts->stream, h->id, st->index);
+        av_program_add_stream_index(ts->stream, h->id, st->index);
 
         desc_list_len = get16(&p, p_end);
         if (desc_list_len < 0)
@@ -1966,7 +1977,7 @@ static void pmt_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
 
             if (pes && prog_reg_desc == AV_RL32("HDMV") &&
                 stream_type == 0x83 && pes->sub_st) {
-                ff_program_add_stream_index(ts->stream, h->id,
+                av_program_add_stream_index(ts->stream, h->id,
                                             pes->sub_st->index);
                 pes->sub_st->codec->codec_tag = st->codec->codec_tag;
             }
@@ -2370,7 +2381,7 @@ static void finished_reading_packet(AVFormatContext *s, int raw_packet_size)
 static int handle_packets(MpegTSContext *ts, int64_t nb_packets)
 {
     AVFormatContext *s = ts->stream;
-    uint8_t packet[TS_PACKET_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
+    uint8_t packet[TS_PACKET_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
     const uint8_t *data;
     int64_t packet_num;
     int ret = 0;
@@ -2397,7 +2408,7 @@ static int handle_packets(MpegTSContext *ts, int64_t nb_packets)
 
     ts->stop_parse = 0;
     packet_num = 0;
-    memset(packet + TS_PACKET_SIZE, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(packet + TS_PACKET_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
     for (;;) {
         packet_num++;
         if (nb_packets != 0 && packet_num >= nb_packets ||
@@ -2431,7 +2442,7 @@ static int mpegts_probe(AVProbeData *p)
 #define CHECK_BLOCK 100
 
     if (check_count < CHECK_COUNT)
-        return AVERROR_INVALIDDATA;
+        return 0;
 
     for (i = 0; i<check_count; i+=CHECK_BLOCK) {
         int left = FFMIN(check_count - i, CHECK_BLOCK);
@@ -2446,12 +2457,12 @@ static int mpegts_probe(AVProbeData *p)
     sumscore = sumscore * CHECK_COUNT / check_count;
     maxscore = maxscore * CHECK_COUNT / CHECK_BLOCK;
 
-    av_dlog(0, "TS score: %d %d\n", sumscore, maxscore);
+    ff_dlog(0, "TS score: %d %d\n", sumscore, maxscore);
 
     if      (sumscore > 6) return AVPROBE_SCORE_MAX   + sumscore - CHECK_COUNT;
     else if (maxscore > 6) return AVPROBE_SCORE_MAX/2 + sumscore - CHECK_COUNT;
     else
-        return AVERROR_INVALIDDATA;
+        return 0;
 }
 
 /* return the 90kHz PCR and the extension for the 27MHz PCR. return
@@ -2497,7 +2508,7 @@ static int mpegts_read_header(AVFormatContext *s)
     AVIOContext *pb   = s->pb;
     uint8_t buf[8 * 1024] = {0};
     int len;
-    int64_t pos, probesize = s->probesize ? s->probesize : s->probesize2;
+    int64_t pos, probesize = s->probesize;
 
     if (ffio_ensure_seekback(pb, probesize) < 0)
         av_log(s, AV_LOG_WARNING, "Failed to allocate buffers for seekback\n");
@@ -2603,7 +2614,7 @@ static int mpegts_raw_read_packet(AVFormatContext *s, AVPacket *pkt)
     ret = read_packet(s, pkt->data, ts->raw_packet_size, &data);
     pkt->pos = avio_tell(s->pb);
     if (ret < 0) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return ret;
     }
     if (data != pkt->data)
@@ -2646,7 +2657,7 @@ static int mpegts_read_packet(AVFormatContext *s, AVPacket *pkt)
     ts->pkt = pkt;
     ret = handle_packets(ts, 0);
     if (ret < 0) {
-        av_free_packet(ts->pkt);
+        av_packet_unref(ts->pkt);
         /* flush pes data left */
         for (i = 0; i < NB_PID_MAX; i++)
             if (ts->pids[i] && ts->pids[i]->type == MPEGTS_PES) {
@@ -2661,7 +2672,7 @@ static int mpegts_read_packet(AVFormatContext *s, AVPacket *pkt)
     }
 
     if (!ret && pkt->size < 0)
-        ret = AVERROR(EINTR);
+        ret = AVERROR_INVALIDDATA;
     return ret;
 }
 
@@ -2735,16 +2746,18 @@ static int64_t mpegts_get_dts(AVFormatContext *s, int stream_index,
         ret = av_read_frame(s, &pkt);
         if (ret < 0)
             return AV_NOPTS_VALUE;
-        av_free_packet(&pkt);
         if (pkt.dts != AV_NOPTS_VALUE && pkt.pos >= 0) {
             ff_reduce_index(s, pkt.stream_index);
             av_add_index_entry(s->streams[pkt.stream_index], pkt.pos, pkt.dts, 0, 0, AVINDEX_KEYFRAME /* FIXME keyframe? */);
             if (pkt.stream_index == stream_index && pkt.pos >= *ppos) {
+                int64_t dts = pkt.dts;
                 *ppos = pkt.pos;
-                return pkt.dts;
+                av_packet_unref(&pkt);
+                return dts;
             }
         }
         pos = pkt.pos;
+        av_packet_unref(&pkt);
     }
 
     return AV_NOPTS_VALUE;
diff --git a/libavformat/mpegts.h b/libavformat/mpegts.h
index 84f30983..0cdbc76b 100644
--- a/libavformat/mpegts.h
+++ b/libavformat/mpegts.h
@@ -60,6 +60,7 @@
 #define STREAM_TYPE_AUDIO_AC3       0x81
 #define STREAM_TYPE_AUDIO_DTS       0x82
 #define STREAM_TYPE_AUDIO_TRUEHD    0x83
+#define STREAM_TYPE_AUDIO_EAC3      0x87
 
 typedef struct MpegTSContext MpegTSContext;
 
diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
index 9efa9fc3..6bf5461b 100644
--- a/libavformat/mpegtsenc.c
+++ b/libavformat/mpegtsenc.c
@@ -56,6 +56,7 @@ typedef struct MpegTSService {
     int pcr_pid;
     int pcr_packet_count;
     int pcr_packet_period;
+    AVProgram *program;
 } MpegTSService;
 
 // service_type values as defined in ETSI 300 468
@@ -98,9 +99,15 @@ typedef struct MpegTSWrite {
     int pcr_period;
 #define MPEGTS_FLAG_REEMIT_PAT_PMT  0x01
 #define MPEGTS_FLAG_AAC_LATM        0x02
+#define MPEGTS_FLAG_PAT_PMT_AT_FRAMES           0x04
+#define MPEGTS_FLAG_SYSTEM_B        0x08
     int flags;
     int copyts;
     int tables_version;
+    double pat_period;
+    double sdt_period;
+    int64_t last_pat_ts;
+    int64_t last_sdt_ts;
 
     int omit_video_pes_length;
 } MpegTSWrite;
@@ -222,6 +229,10 @@ typedef struct MpegTSWriteStream {
     uint8_t *payload;
     AVFormatContext *amux;
     AVRational user_tb;
+
+    /* For Opus */
+    int opus_queued_samples;
+    int opus_pending_trim_start;
 } MpegTSWriteStream;
 
 static void mpegts_write_pat(AVFormatContext *s)
@@ -264,6 +275,20 @@ static int mpegts_write_pmt(AVFormatContext *s, MpegTSService *service)
         MpegTSWriteStream *ts_st = st->priv_data;
         AVDictionaryEntry *lang = av_dict_get(st->metadata, "language", NULL, 0);
 
+        if (s->nb_programs) {
+            int k, found = 0;
+            AVProgram *program = service->program;
+
+            for (k = 0; k < program->nb_stream_indexes; k++)
+                if (program->stream_index[k] == i) {
+                    found = 1;
+                    break;
+                }
+
+            if (!found)
+                continue;
+        }
+
         if (q - data > SECTION_LENGTH - 32) {
             err = 1;
             break;
@@ -288,6 +313,9 @@ static int mpegts_write_pmt(AVFormatContext *s, MpegTSService *service)
         case AV_CODEC_ID_DIRAC:
             stream_type = STREAM_TYPE_VIDEO_DIRAC;
             break;
+        case AV_CODEC_ID_VC1:
+            stream_type = STREAM_TYPE_VIDEO_VC1;
+            break;
         case AV_CODEC_ID_MP2:
         case AV_CODEC_ID_MP3:
             stream_type = STREAM_TYPE_AUDIO_MPEG1;
@@ -301,7 +329,14 @@ static int mpegts_write_pmt(AVFormatContext *s, MpegTSService *service)
             stream_type = STREAM_TYPE_AUDIO_AAC_LATM;
             break;
         case AV_CODEC_ID_AC3:
-            stream_type = STREAM_TYPE_AUDIO_AC3;
+            stream_type = (ts->flags & MPEGTS_FLAG_SYSTEM_B)
+                          ? STREAM_TYPE_PRIVATE_DATA
+                          : STREAM_TYPE_AUDIO_AC3;
+            break;
+        case AV_CODEC_ID_EAC3:
+            stream_type = (ts->flags & MPEGTS_FLAG_SYSTEM_B)
+                          ? STREAM_TYPE_PRIVATE_DATA
+                          : STREAM_TYPE_AUDIO_EAC3;
             break;
         case AV_CODEC_ID_DTS:
             stream_type = STREAM_TYPE_AUDIO_DTS;
@@ -309,6 +344,9 @@ static int mpegts_write_pmt(AVFormatContext *s, MpegTSService *service)
         case AV_CODEC_ID_TRUEHD:
             stream_type = STREAM_TYPE_AUDIO_TRUEHD;
             break;
+        case AV_CODEC_ID_OPUS:
+            stream_type = STREAM_TYPE_PRIVATE_DATA;
+            break;
         default:
             stream_type = STREAM_TYPE_PRIVATE_DATA;
             break;
@@ -322,7 +360,12 @@ static int mpegts_write_pmt(AVFormatContext *s, MpegTSService *service)
         /* write optional descriptors here */
         switch (st->codec->codec_type) {
         case AVMEDIA_TYPE_AUDIO:
-            if (st->codec->codec_id==AV_CODEC_ID_EAC3) {
+            if (st->codec->codec_id==AV_CODEC_ID_AC3 && (ts->flags & MPEGTS_FLAG_SYSTEM_B)) {
+                *q++=0x6a; // AC3 descriptor see A038 DVB SI
+                *q++=1; // 1 byte, all flags sets to 0
+                *q++=0; // omit all fields...
+            }
+            if (st->codec->codec_id==AV_CODEC_ID_EAC3 && (ts->flags & MPEGTS_FLAG_SYSTEM_B)) {
                 *q++=0x7a; // EAC3 descriptor see A038 DVB SI
                 *q++=1; // 1 byte, all flags sets to 0
                 *q++=0; // omit all fields...
@@ -335,6 +378,82 @@ static int mpegts_write_pmt(AVFormatContext *s, MpegTSService *service)
                 *q++ = 'S';
                 *q++ = 'D';
             }
+            if (st->codec->codec_id==AV_CODEC_ID_OPUS) {
+                /* 6 bytes registration descriptor, 4 bytes Opus audio descriptor */
+                if (q - data > SECTION_LENGTH - 6 - 4) {
+                    err = 1;
+                    break;
+                }
+
+                *q++ = 0x05; /* MPEG-2 registration descriptor*/
+                *q++ = 4;
+                *q++ = 'O';
+                *q++ = 'p';
+                *q++ = 'u';
+                *q++ = 's';
+
+                *q++ = 0x7f; /* DVB extension descriptor */
+                *q++ = 2;
+                *q++ = 0x80;
+
+                if (st->codec->extradata && st->codec->extradata_size >= 19) {
+                    if (st->codec->extradata[18] == 0 && st->codec->channels <= 2) {
+                        /* RTP mapping family */
+                        *q++ = st->codec->channels;
+                    } else if (st->codec->extradata[18] == 1 && st->codec->channels <= 8 &&
+                               st->codec->extradata_size >= 21 + st->codec->channels) {
+                        static const uint8_t coupled_stream_counts[9] = {
+                            1, 0, 1, 1, 2, 2, 2, 3, 3
+                        };
+                        static const uint8_t channel_map_a[8][8] = {
+                            {0},
+                            {0, 1},
+                            {0, 2, 1},
+                            {0, 1, 2, 3},
+                            {0, 4, 1, 2, 3},
+                            {0, 4, 1, 2, 3, 5},
+                            {0, 4, 1, 2, 3, 5, 6},
+                            {0, 6, 1, 2, 3, 4, 5, 7},
+                        };
+                        static const uint8_t channel_map_b[8][8] = {
+                            {0},
+                            {0, 1},
+                            {0, 1, 2},
+                            {0, 1, 2, 3},
+                            {0, 1, 2, 3, 4},
+                            {0, 1, 2, 3, 4, 5},
+                            {0, 1, 2, 3, 4, 5, 6},
+                            {0, 1, 2, 3, 4, 5, 6, 7},
+                        };
+                        /* Vorbis mapping family */
+
+                        if (st->codec->extradata[19] == st->codec->channels - coupled_stream_counts[st->codec->channels] &&
+                            st->codec->extradata[20] == coupled_stream_counts[st->codec->channels] &&
+                            memcmp(&st->codec->extradata[21], channel_map_a[st->codec->channels-1], st->codec->channels) == 0) {
+                            *q++ = st->codec->channels;
+                        } else if (st->codec->channels >= 2 && st->codec->extradata[19] == st->codec->channels &&
+                                   st->codec->extradata[20] == 0 &&
+                                   memcmp(&st->codec->extradata[21], channel_map_b[st->codec->channels-1], st->codec->channels) == 0) {
+                            *q++ = st->codec->channels | 0x80;
+                        } else {
+                            /* Unsupported, could write an extended descriptor here */
+                            av_log(s, AV_LOG_ERROR, "Unsupported Opus Vorbis-style channel mapping");
+                            *q++ = 0xff;
+                        }
+                    } else {
+                        /* Unsupported */
+                        av_log(s, AV_LOG_ERROR, "Unsupported Opus channel mapping for family %d", st->codec->extradata[18]);
+                        *q++ = 0xff;
+                    }
+                } else if (st->codec->channels <= 2) {
+                    /* Assume RTP mapping family */
+                    *q++ = st->codec->channels;
+                } else {
+                    /* Unsupported */
+                    av_log(s, AV_LOG_ERROR, "Unsupported Opus channel mapping");
+                    *q++ = 0xff;
+                }
+            }
 
             if (lang) {
                 char *p;
@@ -463,6 +582,13 @@ static int mpegts_write_pmt(AVFormatContext *s, MpegTSService *service)
                 *q++ = 'r';
                 *q++ = 'a';
                 *q++ = 'c';
+            } else if (stream_type == STREAM_TYPE_VIDEO_VC1) {
+                *q++ = 0x05; /*MPEG-2 registration descriptor*/
+                *q++ = 4;
+                *q++ = 'V';
+                *q++ = 'C';
+                *q++ = '-';
+                *q++ = '1';
             }
             break;
         case AVMEDIA_TYPE_DATA:
@@ -600,7 +726,7 @@ static void section_write_packet(MpegTSSection *s, const uint8_t *packet)
     avio_write(ctx->pb, packet, TS_PACKET_SIZE);
 }
 
-static int mpegts_write_header(AVFormatContext *s)
+static int mpegts_init(AVFormatContext *s)
 {
     MpegTSWrite *ts = s->priv_data;
     MpegTSWriteStream *ts_st;
@@ -621,22 +747,44 @@ static int mpegts_write_header(AVFormatContext *s)
 
     ts->tsid = ts->transport_stream_id;
     ts->onid = ts->original_network_id;
-    /* allocate a single DVB service */
-    title = av_dict_get(s->metadata, "service_name", NULL, 0);
-    if (!title)
-        title = av_dict_get(s->metadata, "title", NULL, 0);
-    service_name  = title ? title->value : DEFAULT_SERVICE_NAME;
-    provider      = av_dict_get(s->metadata, "service_provider", NULL, 0);
-    provider_name = provider ? provider->value : DEFAULT_PROVIDER_NAME;
-    service       = mpegts_add_service(ts, ts->service_id,
-                                       provider_name, service_name);
-
-    if (!service)
-        return AVERROR(ENOMEM);
+    if (!s->nb_programs) {
+        /* allocate a single DVB service */
+        title = av_dict_get(s->metadata, "service_name", NULL, 0);
+        if (!title)
+            title = av_dict_get(s->metadata, "title", NULL, 0);
+        service_name  = title ? title->value : DEFAULT_SERVICE_NAME;
+        provider      = av_dict_get(s->metadata, "service_provider", NULL, 0);
+        provider_name = provider ? provider->value : DEFAULT_PROVIDER_NAME;
+        service       = mpegts_add_service(ts, ts->service_id,
+                                           provider_name, service_name);
+
+        if (!service)
+            return AVERROR(ENOMEM);
+
+        service->pmt.write_packet = section_write_packet;
+        service->pmt.opaque       = s;
+        service->pmt.cc           = 15;
+    } else {
+        for (i = 0; i < s->nb_programs; i++) {
+            AVProgram *program = s->programs[i];
+            title = av_dict_get(program->metadata, "service_name", NULL, 0);
+            if (!title)
+                title = av_dict_get(program->metadata, "title", NULL, 0);
+            service_name  = title ? title->value : DEFAULT_SERVICE_NAME;
+            provider      = av_dict_get(program->metadata, "service_provider", NULL, 0);
+            provider_name = provider ? provider->value : DEFAULT_PROVIDER_NAME;
+            service       = mpegts_add_service(ts, program->id,
+                                               provider_name, service_name);
+
+            if (!service)
+                return AVERROR(ENOMEM);
 
-    service->pmt.write_packet = section_write_packet;
-    service->pmt.opaque       = s;
-    service->pmt.cc           = 15;
+            service->pmt.write_packet = section_write_packet;
+            service->pmt.opaque       = s;
+            service->pmt.cc           = 15;
+            service->program          = program;
+        }
+    }
 
     ts->pat.pid          = PAT_PID;
     /* Initialize at 15 so that it wraps and is equal to 0 for the
@@ -658,6 +806,7 @@ static int mpegts_write_header(AVFormatContext *s)
 
     /* assign pids to each stream */
     for (i = 0; i < s->nb_streams; i++) {
+        AVProgram *program;
         st = s->streams[i];
 
         ts_st = av_mallocz(sizeof(MpegTSWriteStream));
@@ -675,6 +824,17 @@ static int mpegts_write_header(AVFormatContext *s)
             ret = AVERROR(ENOMEM);
             goto fail;
         }
+
+        program = av_find_program_from_stream(s, NULL, i);
+        if (program) {
+            for (j = 0; j < ts->nb_services; j++) {
+                if (ts->services[j]->program == program) {
+                    service = ts->services[j];
+                    break;
+                }
+            }
+        }
+
         ts_st->service = service;
         /* MPEG pid values < 16 are reserved. Applications which set st->id in
          * this range are assigned a calculated pid. */
@@ -738,6 +898,9 @@ static int mpegts_write_header(AVFormatContext *s)
             if (ret < 0)
                 goto fail;
         }
+        if (st->codec->codec_id == AV_CODEC_ID_OPUS) {
+            ts_st->opus_pending_trim_start = st->codec->initial_padding * 48000 / st->codec->sample_rate;
+        }
     }
 
     av_freep(&pids);
@@ -751,11 +914,11 @@ static int mpegts_write_header(AVFormatContext *s)
         ts_st = pcr_st->priv_data;
 
     if (ts->mux_rate > 1) {
-        service->pcr_packet_period = (ts->mux_rate * ts->pcr_period) /
+        service->pcr_packet_period = (int64_t)ts->mux_rate * ts->pcr_period /
                                      (TS_PACKET_SIZE * 8 * 1000);
-        ts->sdt_packet_period      = (ts->mux_rate * SDT_RETRANS_TIME) /
+        ts->sdt_packet_period      = (int64_t)ts->mux_rate * SDT_RETRANS_TIME /
                                      (TS_PACKET_SIZE * 8 * 1000);
-        ts->pat_packet_period      = (ts->mux_rate * PAT_RETRANS_TIME) /
+        ts->pat_packet_period      = (int64_t)ts->mux_rate * PAT_RETRANS_TIME /
                                      (TS_PACKET_SIZE * 8 * 1000);
 
         if (ts->copyts < 1)
@@ -783,6 +946,16 @@ static int mpegts_write_header(AVFormatContext *s)
             service->pcr_packet_period = 1;
     }
 
+    ts->last_pat_ts = AV_NOPTS_VALUE;
+    ts->last_sdt_ts = AV_NOPTS_VALUE;
+    // The user specified a period, use only it
+    if (ts->pat_period < INT_MAX/2) {
+        ts->pat_packet_period = INT_MAX;
+    }
+    if (ts->sdt_period < INT_MAX/2) {
+        ts->sdt_packet_period = INT_MAX;
+    }
+
     // output a PCR as soon as possible
     service->pcr_packet_count = service->pcr_packet_period;
     ts->pat_packet_count      = ts->pat_packet_period - 1;
@@ -809,41 +982,31 @@ static int mpegts_write_header(AVFormatContext *s)
 
 fail:
     av_freep(&pids);
-    for (i = 0; i < s->nb_streams; i++) {
-        st    = s->streams[i];
-        ts_st = st->priv_data;
-        if (ts_st) {
-            av_freep(&ts_st->payload);
-            if (ts_st->amux) {
-                avformat_free_context(ts_st->amux);
-                ts_st->amux = NULL;
-            }
-        }
-        av_freep(&st->priv_data);
-    }
-
-    for (i = 0; i < ts->nb_services; i++) {
-        service = ts->services[i];
-        av_freep(&service->provider_name);
-        av_freep(&service->name);
-        av_freep(&service);
-    }
-    av_freep(&ts->services);
     return ret;
 }
 
 /* send SDT, PAT and PMT tables regulary */
-static void retransmit_si_info(AVFormatContext *s, int force_pat)
+static void retransmit_si_info(AVFormatContext *s, int force_pat, int64_t dts)
 {
     MpegTSWrite *ts = s->priv_data;
     int i;
 
-    if (++ts->sdt_packet_count == ts->sdt_packet_period) {
+    if (++ts->sdt_packet_count == ts->sdt_packet_period ||
+        (dts != AV_NOPTS_VALUE && ts->last_sdt_ts == AV_NOPTS_VALUE) ||
+        (dts != AV_NOPTS_VALUE && dts - ts->last_sdt_ts >= ts->sdt_period*90000.0)
+    ) {
         ts->sdt_packet_count = 0;
+        if (dts != AV_NOPTS_VALUE)
+            ts->last_sdt_ts = FFMAX(dts, ts->last_sdt_ts);
         mpegts_write_sdt(s);
     }
-    if (++ts->pat_packet_count == ts->pat_packet_period || force_pat) {
+    if (++ts->pat_packet_count == ts->pat_packet_period ||
+        (dts != AV_NOPTS_VALUE && ts->last_pat_ts == AV_NOPTS_VALUE) ||
+        (dts != AV_NOPTS_VALUE && dts - ts->last_pat_ts >= ts->pat_period*90000.0) ||
+        force_pat) {
         ts->pat_packet_count = 0;
+        if (dts != AV_NOPTS_VALUE)
+            ts->last_pat_ts = FFMAX(dts, ts->last_pat_ts);
         mpegts_write_pat(s);
         for (i = 0; i < ts->nb_services; i++)
             mpegts_write_pmt(s, ts->services[i]);
@@ -971,9 +1134,14 @@ static void mpegts_write_pes(AVFormatContext *s, AVStream *st,
     int64_t delay = av_rescale(s->max_delay, 90000, AV_TIME_BASE);
     int force_pat = st->codec->codec_type == AVMEDIA_TYPE_VIDEO && key && !ts_st->prev_payload_key;
 
+    av_assert0(ts_st->payload != buf || st->codec->codec_type != AVMEDIA_TYPE_VIDEO);
+    if (ts->flags & MPEGTS_FLAG_PAT_PMT_AT_FRAMES && st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
+        force_pat = 1;
+    }
+
     is_start = 1;
     while (payload_size > 0) {
-        retransmit_si_info(s, force_pat);
+        retransmit_si_info(s, force_pat, dts);
         force_pat = 0;
 
         write_pcr = 0;
@@ -1231,6 +1399,58 @@ static int check_hevc_startcode(AVFormatContext *s, const AVStream *st, const AV
     return 0;
 }
 
+/* Based on GStreamer's gst-plugins-base/ext/ogg/gstoggstream.c
+ * Released under the LGPL v2.1+, written by
+ * Vincent Penquerc'h <vincent.penquerch@collabora.co.uk>
+ */
+static int opus_get_packet_samples(AVFormatContext *s, AVPacket *pkt)
+{
+    static const int durations[32] = {
+      480, 960, 1920, 2880,       /* Silk NB */
+      480, 960, 1920, 2880,       /* Silk MB */
+      480, 960, 1920, 2880,       /* Silk WB */
+      480, 960,                   /* Hybrid SWB */
+      480, 960,                   /* Hybrid FB */
+      120, 240, 480, 960,         /* CELT NB */
+      120, 240, 480, 960,         /* CELT NB */
+      120, 240, 480, 960,         /* CELT NB */
+      120, 240, 480, 960,         /* CELT NB */
+    };
+    int toc, frame_duration, nframes, duration;
+
+    if (pkt->size < 1)
+        return 0;
+
+    toc = pkt->data[0];
+
+    frame_duration = durations[toc >> 3];
+    switch (toc & 3) {
+    case 0:
+        nframes = 1;
+        break;
+    case 1:
+        nframes = 2;
+        break;
+    case 2:
+        nframes = 2;
+        break;
+    case 3:
+        if (pkt->size < 2)
+            return 0;
+        nframes = pkt->data[1] & 63;
+        break;
+    }
+
+    duration = nframes * frame_duration;
+    if (duration > 5760) {
+        av_log(s, AV_LOG_WARNING,
+               "Opus packet duration > 120 ms, invalid");
+        return 0;
+    }
+
+    return duration;
+}
+
 static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
 {
     AVStream *st = s->streams[pkt->stream_index];
@@ -1241,6 +1461,7 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
     MpegTSWriteStream *ts_st = st->priv_data;
     const int64_t delay = av_rescale(s->max_delay, 90000, AV_TIME_BASE) * 2;
     int64_t dts = pkt->dts, pts = pkt->pts;
+    int opus_samples = 0;
 
     if (ts->reemit_pat_pmt) {
         av_log(s, AV_LOG_WARNING,
@@ -1313,9 +1534,7 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
             if (!ts_st->amux) {
                 av_log(s, AV_LOG_ERROR, "AAC bitstream not in ADTS format "
                                         "and extradata missing\n");
-                return AVERROR_INVALIDDATA;
-            }
-
+            } else {
             av_init_packet(&pkt2);
             pkt2.data = pkt->data;
             pkt2.size = pkt->size;
@@ -1334,11 +1553,83 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
             size            = avio_close_dyn_buf(ts_st->amux->pb, &data);
             ts_st->amux->pb = NULL;
             buf             = data;
+            }
         }
     } else if (st->codec->codec_id == AV_CODEC_ID_HEVC) {
         int ret = check_hevc_startcode(s, st, pkt);
         if (ret < 0)
             return ret;
+    } else if (st->codec->codec_id == AV_CODEC_ID_OPUS) {
+        if (pkt->size < 2) {
+            av_log(s, AV_LOG_ERROR, "Opus packet too short\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        /* Add Opus control header */
+        if ((AV_RB16(pkt->data) >> 5) != 0x3ff) {
+            uint8_t *side_data;
+            int side_data_size;
+            int i, n;
+            int ctrl_header_size;
+            int trim_start = 0, trim_end = 0;
+
+            opus_samples = opus_get_packet_samples(s, pkt);
+
+            side_data = av_packet_get_side_data(pkt,
+                                                AV_PKT_DATA_SKIP_SAMPLES,
+                                                &side_data_size);
+
+            if (side_data && side_data_size >= 10) {
+                trim_end = AV_RL32(side_data + 4) * 48000 / st->codec->sample_rate;
+            }
+
+            ctrl_header_size = pkt->size + 2 + pkt->size / 255 + 1;
+            if (ts_st->opus_pending_trim_start)
+              ctrl_header_size += 2;
+            if (trim_end)
+              ctrl_header_size += 2;
+
+            data = av_malloc(ctrl_header_size);
+            if (!data)
+                return AVERROR(ENOMEM);
+
+            data[0] = 0x7f;
+            data[1] = 0xe0;
+            if (ts_st->opus_pending_trim_start)
+                data[1] |= 0x10;
+            if (trim_end)
+                data[1] |= 0x08;
+
+            n = pkt->size;
+            i = 2;
+            do {
+                data[i] = FFMIN(n, 255);
+                n -= 255;
+                i++;
+            } while (n >= 0);
+
+            av_assert0(2 + pkt->size / 255 + 1 == i);
+
+            if (ts_st->opus_pending_trim_start) {
+                trim_start = FFMIN(ts_st->opus_pending_trim_start, opus_samples);
+                AV_WB16(data + i, trim_start);
+                i += 2;
+                ts_st->opus_pending_trim_start -= trim_start;
+            }
+            if (trim_end) {
+                trim_end = FFMIN(trim_end, opus_samples - trim_start);
+                AV_WB16(data + i, trim_end);
+                i += 2;
+            }
+
+            memcpy(data + i, pkt->data, pkt->size);
+            buf     = data;
+            size    = ctrl_header_size;
+        } else {
+            /* TODO: Can we get TS formatted data here? If so we will
+             * need to count the samples of that too! */
+            av_log(s, AV_LOG_WARNING, "Got MPEG-TS formatted Opus data, unhandled");
+        }
     }
 
     if (pkt->dts != AV_NOPTS_VALUE) {
@@ -1359,11 +1650,13 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
     if (ts_st->payload_size && (ts_st->payload_size + size > ts->pes_payload_size ||
         (dts != AV_NOPTS_VALUE && ts_st->payload_dts != AV_NOPTS_VALUE &&
          av_compare_ts(dts - ts_st->payload_dts, st->time_base,
-                       s->max_delay, AV_TIME_BASE_Q) >= 0))) {
+                       s->max_delay, AV_TIME_BASE_Q) >= 0) ||
+        ts_st->opus_queued_samples + opus_samples >= 5760 /* 120ms */)) {
         mpegts_write_pes(s, st, ts_st->payload, ts_st->payload_size,
                          ts_st->payload_pts, ts_st->payload_dts,
                          ts_st->payload_flags & AV_PKT_FLAG_KEY);
         ts_st->payload_size = 0;
+        ts_st->opus_queued_samples = 0;
     }
 
     if (st->codec->codec_type != AVMEDIA_TYPE_AUDIO || size > ts->pes_payload_size) {
@@ -1371,6 +1664,7 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
         // for video and subtitle, write a single pes packet
         mpegts_write_pes(s, st, buf, size, pts, dts,
                          pkt->flags & AV_PKT_FLAG_KEY);
+        ts_st->opus_queued_samples = 0;
         av_free(data);
         return 0;
     }
@@ -1383,6 +1677,7 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
 
     memcpy(ts_st->payload + ts_st->payload_size, buf, size);
     ts_st->payload_size += size;
+    ts_st->opus_queued_samples += opus_samples;
 
     av_free(data);
 
@@ -1402,6 +1697,7 @@ static void mpegts_write_flush(AVFormatContext *s)
                              ts_st->payload_pts, ts_st->payload_dts,
                              ts_st->payload_flags & AV_PKT_FLAG_KEY);
             ts_st->payload_size = 0;
+            ts_st->opus_queued_samples = 0;
         }
     }
 }
@@ -1417,21 +1713,28 @@ static int mpegts_write_packet(AVFormatContext *s, AVPacket *pkt)
 }
 
 static int mpegts_write_end(AVFormatContext *s)
+{
+    if (s->pb)
+        mpegts_write_flush(s);
+
+    return 0;
+}
+
+static void mpegts_deinit(AVFormatContext *s)
 {
     MpegTSWrite *ts = s->priv_data;
     MpegTSService *service;
     int i;
 
-    if (s->pb)
-        mpegts_write_flush(s);
-
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *st = s->streams[i];
         MpegTSWriteStream *ts_st = st->priv_data;
-        av_freep(&ts_st->payload);
-        if (ts_st->amux) {
-            avformat_free_context(ts_st->amux);
-            ts_st->amux = NULL;
+        if (ts_st) {
+            av_freep(&ts_st->payload);
+            if (ts_st->amux) {
+                avformat_free_context(ts_st->amux);
+                ts_st->amux = NULL;
+            }
         }
     }
 
@@ -1442,8 +1745,24 @@ static int mpegts_write_end(AVFormatContext *s)
         av_freep(&service);
     }
     av_freep(&ts->services);
+}
 
-    return 0;
+static int mpegts_check_bitstream(struct AVFormatContext *s, const AVPacket *pkt)
+{
+    int ret = 1;
+    AVStream *st = s->streams[pkt->stream_index];
+
+    if (st->codec->codec_id == AV_CODEC_ID_H264) {
+        if (pkt->size >= 5 && AV_RB32(pkt->data) != 0x0000001 &&
+                              AV_RB24(pkt->data) != 0x000001)
+            ret = ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL);
+    } else if (st->codec->codec_id == AV_CODEC_ID_HEVC) {
+        if (pkt->size >= 5 && AV_RB32(pkt->data) != 0x0000001 &&
+                              AV_RB24(pkt->data) != 0x000001)
+            ret = ff_stream_add_bitstream_filter(st, "hevc_mp4toannexb", NULL);
+    }
+
+    return ret;
 }
 
 static const AVOption options[] = {
@@ -1487,7 +1806,7 @@ static const AVOption options[] = {
       offsetof(MpegTSWrite, start_pid), AV_OPT_TYPE_INT,
       { .i64 = 0x0100 }, 0x0020, 0x0f00, AV_OPT_FLAG_ENCODING_PARAM },
     { "mpegts_m2ts_mode", "Enable m2ts mode.",
-      offsetof(MpegTSWrite, m2ts_mode), AV_OPT_TYPE_INT,
+      offsetof(MpegTSWrite, m2ts_mode), AV_OPT_TYPE_BOOL,
       { .i64 = -1 }, -1, 1, AV_OPT_FLAG_ENCODING_PARAM },
     { "muxrate", NULL,
       offsetof(MpegTSWrite, mux_rate), AV_OPT_TYPE_INT,
@@ -1504,22 +1823,34 @@ static const AVOption options[] = {
     { "latm", "Use LATM packetization for AAC",
       0, AV_OPT_TYPE_CONST, { .i64 = MPEGTS_FLAG_AAC_LATM }, 0, INT_MAX,
       AV_OPT_FLAG_ENCODING_PARAM, "mpegts_flags" },
+    { "pat_pmt_at_frames", "Reemit PAT and PMT at each video frame",
+      0, AV_OPT_TYPE_CONST, { .i64 = MPEGTS_FLAG_PAT_PMT_AT_FRAMES}, 0, INT_MAX,
+      AV_OPT_FLAG_ENCODING_PARAM, "mpegts_flags" },
+    { "system_b", "Conform to System B (DVB) instead of System A (ATSC)",
+      0, AV_OPT_TYPE_CONST, { .i64 = MPEGTS_FLAG_SYSTEM_B }, 0, INT_MAX,
+      AV_OPT_FLAG_ENCODING_PARAM, "mpegts_flags" },
     // backward compatibility
     { "resend_headers", "Reemit PAT/PMT before writing the next packet",
       offsetof(MpegTSWrite, reemit_pat_pmt), AV_OPT_TYPE_INT,
       { .i64 = 0 }, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
     { "mpegts_copyts", "don't offset dts/pts",
-      offsetof(MpegTSWrite, copyts), AV_OPT_TYPE_INT,
+      offsetof(MpegTSWrite, copyts), AV_OPT_TYPE_BOOL,
       { .i64 = -1 }, -1, 1, AV_OPT_FLAG_ENCODING_PARAM },
     { "tables_version", "set PAT, PMT and SDT version",
       offsetof(MpegTSWrite, tables_version), AV_OPT_TYPE_INT,
       { .i64 = 0 }, 0, 31, AV_OPT_FLAG_ENCODING_PARAM },
     { "omit_video_pes_length", "Omit the PES packet length for video packets",
-      offsetof(MpegTSWrite, omit_video_pes_length), AV_OPT_TYPE_INT,
+      offsetof(MpegTSWrite, omit_video_pes_length), AV_OPT_TYPE_BOOL,
       { .i64 = 1 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
     { "pcr_period", "PCR retransmission time",
       offsetof(MpegTSWrite, pcr_period), AV_OPT_TYPE_INT,
       { .i64 = PCR_RETRANS_TIME }, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
+    { "pat_period", "PAT/PMT retransmission time limit in seconds",
+      offsetof(MpegTSWrite, pat_period), AV_OPT_TYPE_DOUBLE,
+      { .dbl = INT_MAX }, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
+    { "sdt_period", "SDT retransmission time limit in seconds",
+      offsetof(MpegTSWrite, sdt_period), AV_OPT_TYPE_DOUBLE,
+      { .dbl = INT_MAX }, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
     { NULL },
 };
 
@@ -1538,9 +1869,11 @@ AVOutputFormat ff_mpegts_muxer = {
     .priv_data_size = sizeof(MpegTSWrite),
     .audio_codec    = AV_CODEC_ID_MP2,
     .video_codec    = AV_CODEC_ID_MPEG2VIDEO,
-    .write_header   = mpegts_write_header,
+    .init           = mpegts_init,
     .write_packet   = mpegts_write_packet,
     .write_trailer  = mpegts_write_end,
-    .flags          = AVFMT_ALLOW_FLUSH,
+    .deinit         = mpegts_deinit,
+    .check_bitstream = mpegts_check_bitstream,
+    .flags          = AVFMT_ALLOW_FLUSH | AVFMT_VARIABLE_FPS,
     .priv_class     = &mpegts_muxer_class,
 };
diff --git a/libavformat/mpjpeg.c b/libavformat/mpjpeg.c
index 7b975e28..3904ccb2 100644
--- a/libavformat/mpjpeg.c
+++ b/libavformat/mpjpeg.c
@@ -33,10 +33,7 @@ typedef struct MPJPEGContext {
 static int mpjpeg_write_header(AVFormatContext *s)
 {
     MPJPEGContext *mpj = s->priv_data;
-    uint8_t buf1[256];
-
-    snprintf(buf1, sizeof(buf1), "--%s\r\n", mpj->boundary_tag);
-    avio_write(s->pb, buf1, strlen(buf1));
+    avio_printf(s->pb, "--%s\r\n", mpj->boundary_tag);
     avio_flush(s->pb);
     return 0;
 }
@@ -44,17 +41,12 @@ static int mpjpeg_write_header(AVFormatContext *s)
 static int mpjpeg_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     MPJPEGContext *mpj = s->priv_data;
-    uint8_t buf1[256];
-
-    snprintf(buf1, sizeof(buf1), "Content-type: image/jpeg\r\n");
-    avio_write(s->pb, buf1, strlen(buf1));
-
-    snprintf(buf1, sizeof(buf1), "Content-length: %d\r\n\r\n", pkt->size);
-    avio_write(s->pb, buf1, strlen(buf1));
+    avio_printf(s->pb, "Content-type: image/jpeg\r\n");
+    avio_printf(s->pb, "Content-length: %d\r\n\r\n",
+                pkt->size);
     avio_write(s->pb, pkt->data, pkt->size);
 
-    snprintf(buf1, sizeof(buf1), "\r\n--%s\r\n", mpj->boundary_tag);
-    avio_write(s->pb, buf1, strlen(buf1));
+    avio_printf(s->pb, "\r\n--%s\r\n", mpj->boundary_tag);
     return 0;
 }
 
diff --git a/libavformat/mpjpegdec.c b/libavformat/mpjpegdec.c
index 845e95cb..7a6bbe49 100644
--- a/libavformat/mpjpegdec.c
+++ b/libavformat/mpjpegdec.c
@@ -20,16 +20,38 @@
  */
 
 #include "libavutil/avstring.h"
+#include "libavutil/opt.h"
 
 #include "avformat.h"
 #include "internal.h"
+#include "avio_internal.h"
 
-static int get_line(AVIOContext *pb, char *line, int line_size)
+
+
+typedef struct MPJPEGDemuxContext {
+    const AVClass *class;
+    char       *boundary;
+    char       *searchstr;
+    int         searchstr_len;
+    int         strict_mime_boundary;
+} MPJPEGDemuxContext;
+
+
+static void trim_right(char *p)
 {
-    int i = ff_get_line(pb, line, line_size);
+    char *end;
 
-    if (i > 1 && line[i - 2] == '\r')
-        line[i - 2] = '\0';
+    if (!p || !*p)
+        return;
+
+    end = p + strlen(p);
+    while (end > p && av_isspace(*(end-1)))
+        *(--end) = '\0';
+}
+
+static int get_line(AVIOContext *pb, char *line, int line_size)
+{
+    ff_get_line(pb, line, line_size);
 
     if (pb->error)
         return pb->error;
@@ -37,20 +59,33 @@ static int get_line(AVIOContext *pb, char *line, int line_size)
     if (pb->eof_reached)
         return AVERROR_EOF;
 
+    trim_right(line);
     return 0;
 }
 
+
+
 static int split_tag_value(char **tag, char **value, char *line)
 {
     char *p = line;
+    int  foundData = 0;
+
+    *tag = NULL;
+    *value = NULL;
+
 
-    while (*p != '\0' && *p != ':')
+    while (*p != '\0' && *p != ':') {
+        if (!av_isspace(*p)) {
+            foundData = 1;
+        }
         p++;
+    }
     if (*p != ':')
-        return AVERROR_INVALIDDATA;
+        return foundData ? AVERROR_INVALIDDATA : 0;
 
     *p   = '\0';
     *tag = line;
+    trim_right(*tag);
 
     p++;
 
@@ -58,49 +93,38 @@ static int split_tag_value(char **tag, char **value, char *line)
         p++;
 
     *value = p;
+    trim_right(*value);
 
     return 0;
 }
 
-static int check_content_type(char *line)
-{
-    char *tag, *value;
-    int ret = split_tag_value(&tag, &value, line);
-
-    if (ret < 0)
-        return ret;
-
-    if (av_strcasecmp(tag, "Content-type") ||
-        av_strcasecmp(value, "image/jpeg"))
-        return AVERROR_INVALIDDATA;
+static int parse_multipart_header(AVIOContext *pb,
+                                    int* size,
+                                    const char* expected_boundary,
+                                    void *log_ctx);
 
+static int mpjpeg_read_close(AVFormatContext *s)
+{
+    MPJPEGDemuxContext *mpjpeg = s->priv_data;
+    av_freep(&mpjpeg->boundary);
+    av_freep(&mpjpeg->searchstr);
     return 0;
 }
 
 static int mpjpeg_read_probe(AVProbeData *p)
 {
     AVIOContext *pb;
-    char line[128] = { 0 };
     int ret = 0;
+    int size = 0;
 
     if (p->buf_size < 2 || p->buf[0] != '-' || p->buf[1] != '-')
         return 0;
 
     pb = avio_alloc_context(p->buf, p->buf_size, 0, NULL, NULL, NULL, NULL);
     if (!pb)
-        return AVERROR(ENOMEM);
+        return 0;
 
-    while (!pb->eof_reached) {
-        ret = get_line(pb, line, sizeof(line));
-        if (ret < 0)
-            break;
-
-        ret = check_content_type(line);
-        if (!ret) {
-            ret = AVPROBE_SCORE_MAX;
-            break;
-        }
-    }
+    ret = (parse_multipart_header(pb, &size, "--", NULL) > 0) ? AVPROBE_SCORE_MAX : 0;
 
     av_free(pb);
 
@@ -110,19 +134,22 @@ static int mpjpeg_read_probe(AVProbeData *p)
 static int mpjpeg_read_header(AVFormatContext *s)
 {
     AVStream *st;
-    char boundary[70 + 2 + 1];
+    char boundary[70 + 2 + 1] = {0};
     int64_t pos = avio_tell(s->pb);
     int ret;
 
-
-    ret = get_line(s->pb, boundary, sizeof(boundary));
-    if (ret < 0)
-        return ret;
+    do {
+        ret = get_line(s->pb, boundary, sizeof(boundary));
+        if (ret < 0)
+            return ret;
+    } while (!boundary[0]);
 
     if (strncmp(boundary, "--", 2))
         return AVERROR_INVALIDDATA;
 
     st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
 
     st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
     st->codec->codec_id   = AV_CODEC_ID_MJPEG;
@@ -145,25 +172,51 @@ static int parse_content_length(const char *value)
     return val;
 }
 
-static int parse_multipart_header(AVFormatContext *s)
+static int parse_multipart_header(AVIOContext *pb,
+                            int* size,
+                            const char* expected_boundary,
+                            void *log_ctx)
 {
     char line[128];
     int found_content_type = 0;
-    int ret, size = -1;
+    int ret;
+
+    *size = -1;
 
-    ret = get_line(s->pb, line, sizeof(line));
+    // get the CRLF as empty string
+    ret = get_line(pb, line, sizeof(line));
     if (ret < 0)
         return ret;
 
-    if (strncmp(line, "--", 2))
+    /* some implementation do not provide the required
+     * initial CRLF (see rfc1341 7.2.1)
+     */
+    while (!line[0]) {
+        ret = get_line(pb, line, sizeof(line));
+        if (ret < 0)
+            return ret;
+    }
+
+    if (!av_strstart(line, expected_boundary, NULL)) {
+        if (log_ctx)
+        av_log(log_ctx,
+            AV_LOG_ERROR,
+            "Expected boundary '%s' not found, instead found a line of %zu bytes\n",
+            expected_boundary,
+            strlen(line));
+
         return AVERROR_INVALIDDATA;
+    }
 
-    while (!s->pb->eof_reached) {
+    while (!pb->eof_reached) {
         char *tag, *value;
 
-        ret = get_line(s->pb, line, sizeof(line));
-        if (ret < 0)
+        ret = get_line(pb, line, sizeof(line));
+        if (ret < 0) {
+            if (ret == AVERROR_EOF)
+                break;
             return ret;
+        }
 
         if (line[0] == '\0')
             break;
@@ -171,53 +224,181 @@ static int parse_multipart_header(AVFormatContext *s)
         ret = split_tag_value(&tag, &value, line);
         if (ret < 0)
             return ret;
+        if (value==NULL || tag==NULL)
+            break;
 
         if (!av_strcasecmp(tag, "Content-type")) {
             if (av_strcasecmp(value, "image/jpeg")) {
-                av_log(s, AV_LOG_ERROR,
-                       "Unexpected %s : %s\n",
-                       tag, value);
+                if (log_ctx)
+                av_log(log_ctx, AV_LOG_ERROR,
+                           "Unexpected %s : %s\n",
+                           tag, value);
                 return AVERROR_INVALIDDATA;
             } else
                 found_content_type = 1;
         } else if (!av_strcasecmp(tag, "Content-Length")) {
-            size = parse_content_length(value);
-            if (size < 0)
-                return size;
+            *size = parse_content_length(value);
+            if ( *size < 0 )
+                if (log_ctx)
+                av_log(log_ctx, AV_LOG_WARNING,
+                           "Invalid Content-Length value : %s\n",
+                           value);
         }
     }
 
-    if (!found_content_type || size < 0) {
-        return AVERROR_INVALIDDATA;
+    return found_content_type ? 0 : AVERROR_INVALIDDATA;
+}
+
+
+static char* mpjpeg_get_boundary(AVIOContext* pb)
+{
+    uint8_t *mime_type = NULL;
+    const char *start;
+    const char *end;
+    uint8_t *res = NULL;
+    int     len;
+
+    /* get MIME type, and skip to the first parameter */
+    av_opt_get(pb, "mime_type", AV_OPT_SEARCH_CHILDREN, &mime_type);
+    start = mime_type;
+    while (start != NULL && *start != '\0') {
+        start = strchr(start, ';');
+        if (!start)
+            break;
+
+        start = start+1;
+
+        while (av_isspace(*start))
+            start++;
+
+        if (!av_stristart(start, "boundary=", &start)) {
+            end = strchr(start, ';');
+            if (end)
+                len = end - start - 1;
+            else
+                len = strlen(start);
+
+            /* some endpoints may enclose the boundary
+              in Content-Type in quotes */
+            if ( len>2 && *start == '"' && start[len-1] == '"' ) {
+                start++;
+                len -= 2;
+            }
+            res = av_strndup(start, len);
+            break;
+        }
     }
 
-    return size;
+    av_freep(&mime_type);
+    return res;
 }
 
+
 static int mpjpeg_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
+    int size;
     int ret;
-    int size = parse_multipart_header(s);
 
-    if (size < 0)
-        return size;
+    MPJPEGDemuxContext *mpjpeg = s->priv_data;
+    if (mpjpeg->boundary == NULL) {
+        uint8_t* boundary = NULL;
+        if (mpjpeg->strict_mime_boundary) {
+            boundary = mpjpeg_get_boundary(s->pb);
+        }
+        if (boundary != NULL) {
+            mpjpeg->boundary = boundary;
+            mpjpeg->searchstr = av_asprintf( "\r\n%s\r\n", boundary );
+        } else {
+            mpjpeg->boundary = av_strdup("--");
+            mpjpeg->searchstr = av_strdup("\r\n--");
+        }
+        if (!mpjpeg->boundary || !mpjpeg->searchstr) {
+            av_freep(&mpjpeg->boundary);
+            av_freep(&mpjpeg->searchstr);
+            return AVERROR(ENOMEM);
+        }
+        mpjpeg->searchstr_len = strlen(mpjpeg->searchstr);
+    }
+
+    ret = parse_multipart_header(s->pb, &size, mpjpeg->boundary, s);
+
 
-    ret = av_get_packet(s->pb, pkt, size);
     if (ret < 0)
         return ret;
 
-    // trailing empty line
-    avio_skip(s->pb, 2);
+    if (size > 0) {
+        /* size has been provided to us in MIME header */
+        ret = av_get_packet(s->pb, pkt, size);
+    } else {
+        /* no size was given -- we read until the next boundary or end-of-file */
+        int remaining = 0, len;
+
+        const int read_chunk = 2048;
+        av_init_packet(pkt);
+        pkt->data = NULL;
+        pkt->size = 0;
+        pkt->pos  = avio_tell(s->pb);
+
+        /* we may need to return as much as all we've read back to the buffer */
+        ffio_ensure_seekback(s->pb, read_chunk);
+
+        while ((ret = av_append_packet(s->pb, pkt, read_chunk - remaining)) >= 0) {
+            /* scan the new data */
+            char *start;
+
+            len = ret + remaining;
+            start = pkt->data + pkt->size - len;
+            do {
+                if (!memcmp(start, mpjpeg->searchstr, mpjpeg->searchstr_len)) {
+                    // got the boundary! rewind the stream
+                    avio_seek(s->pb, -len, SEEK_CUR);
+                    pkt->size -= len;
+                    return pkt->size;
+                }
+                len--;
+                start++;
+            } while (len >= mpjpeg->searchstr_len);
+            remaining = len;
+        }
 
-    return 0;
+        /* error or EOF occurred */
+        if (ret == AVERROR_EOF) {
+            ret = pkt->size > 0 ? pkt->size : AVERROR_EOF;
+        } else {
+            av_packet_unref(pkt);
+        }
+    }
+
+    return ret;
 }
 
+#define OFFSET(x) offsetof(MPJPEGDemuxContext, x)
+
+#define DEC AV_OPT_FLAG_DECODING_PARAM
+const AVOption mpjpeg_options[] = {
+    { "strict_mime_boundary",  "require MIME boundaries match", OFFSET(strict_mime_boundary), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DEC },
+    { NULL }
+};
+
+
+static const AVClass mpjpeg_demuxer_class = {
+    .class_name     = "MPJPEG demuxer",
+    .item_name      = av_default_item_name,
+    .option         = mpjpeg_options,
+    .version        = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_mpjpeg_demuxer = {
     .name              = "mpjpeg",
     .long_name         = NULL_IF_CONFIG_SMALL("MIME multipart JPEG"),
     .mime_type         = "multipart/x-mixed-replace",
     .extensions        = "mjpg",
+    .priv_data_size    = sizeof(MPJPEGDemuxContext),
     .read_probe        = mpjpeg_read_probe,
     .read_header       = mpjpeg_read_header,
     .read_packet       = mpjpeg_read_packet,
+    .read_close        = mpjpeg_read_close,
+    .priv_class        = &mpjpeg_demuxer_class
 };
+
+
diff --git a/libavformat/mpl2dec.c b/libavformat/mpl2dec.c
index 260b7be0..81cc0bbb 100644
--- a/libavformat/mpl2dec.c
+++ b/libavformat/mpl2dec.c
@@ -108,7 +108,7 @@ static int mpl2_read_header(AVFormatContext *s)
         }
     }
 
-    ff_subtitles_queue_finalize(&mpl2->q);
+    ff_subtitles_queue_finalize(s, &mpl2->q);
     return res;
 }
 
diff --git a/libavformat/mpsubdec.c b/libavformat/mpsubdec.c
index 7c26d4f4..c5a50ecb 100644
--- a/libavformat/mpsubdec.c
+++ b/libavformat/mpsubdec.c
@@ -103,7 +103,7 @@ static int mpsub_read_header(AVFormatContext *s)
     st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
     st->codec->codec_id   = AV_CODEC_ID_TEXT;
 
-    ff_subtitles_queue_finalize(&mpsub->q);
+    ff_subtitles_queue_finalize(s, &mpsub->q);
 
 end:
     av_bprint_finalize(&buf, NULL);
diff --git a/libavformat/msf.c b/libavformat/msf.c
new file mode 100644
index 00000000..73a5a015
--- /dev/null
+++ b/libavformat/msf.c
@@ -0,0 +1,95 @@
+/*
+ * MSF demuxer
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avformat.h"
+#include "internal.h"
+
+static int msf_probe(AVProbeData *p)
+{
+    if (memcmp(p->buf, "MSF", 3))
+        return 0;
+
+    if (AV_RB32(p->buf+8) <= 0)
+        return 0;
+
+    if (AV_RB32(p->buf+16) <= 0)
+        return 0;
+
+    return AVPROBE_SCORE_MAX / 3 * 2;
+}
+
+static int msf_read_header(AVFormatContext *s)
+{
+    unsigned codec, align, size;
+    AVStream *st;
+
+    avio_skip(s->pb, 4);
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
+    codec                  = avio_rb32(s->pb);
+    st->codec->channels    = avio_rb32(s->pb);
+    if (st->codec->channels <= 0 || st->codec->channels >= INT_MAX / 1024)
+        return AVERROR_INVALIDDATA;
+    size = avio_rb32(s->pb);
+    st->codec->sample_rate = avio_rb32(s->pb);
+    if (st->codec->sample_rate <= 0)
+        return AVERROR_INVALIDDATA;
+    align = avio_rb32(s->pb) ;
+    if (align > INT_MAX / st->codec->channels)
+        return AVERROR_INVALIDDATA;
+    st->codec->block_align = align;
+    switch (codec) {
+    case 0: st->codec->codec_id = AV_CODEC_ID_PCM_S16BE; break;
+    case 3: st->codec->block_align = 16 * st->codec->channels;
+            st->codec->codec_id = AV_CODEC_ID_ADPCM_PSX; break;
+    case 7: st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
+            st->codec->codec_id = AV_CODEC_ID_MP3;       break;
+    default:
+            avpriv_request_sample(s, "Codec %d", codec);
+            return AVERROR_PATCHWELCOME;
+    }
+    st->duration = av_get_audio_frame_duration(st->codec, size);
+    avio_skip(s->pb, 0x40 - avio_tell(s->pb));
+    avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
+
+    return 0;
+}
+
+static int msf_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    AVCodecContext *codec = s->streams[0]->codec;
+
+    return av_get_packet(s->pb, pkt, codec->block_align ? codec->block_align : 1024 * codec->channels);
+}
+
+AVInputFormat ff_msf_demuxer = {
+    .name           = "msf",
+    .long_name      = NULL_IF_CONFIG_SMALL("Sony PS3 MSF"),
+    .read_probe     = msf_probe,
+    .read_header    = msf_read_header,
+    .read_packet    = msf_read_packet,
+    .extensions     = "msf",
+};
diff --git a/libavformat/msnwc_tcp.c b/libavformat/msnwc_tcp.c
index 60225af6..5abf006c 100644
--- a/libavformat/msnwc_tcp.c
+++ b/libavformat/msnwc_tcp.c
@@ -40,25 +40,26 @@ static int msnwc_tcp_probe(AVProbeData *p)
 {
     int i;
 
-    for(i = 0 ; i + HEADER_SIZE <= p->buf_size ; i++) {
+    for (i = 0; i + HEADER_SIZE <= p->buf_size; i++) {
         uint16_t width, height;
         uint32_t fourcc;
-        const uint8_t *bytestream = p->buf+i;
+        const uint8_t *bytestream = p->buf + i;
 
-        if(bytestream_get_le16(&bytestream) != HEADER_SIZE)
+        if (bytestream_get_le16(&bytestream) != HEADER_SIZE)
             continue;
         width  = bytestream_get_le16(&bytestream);
         height = bytestream_get_le16(&bytestream);
-        if(!(width==320 && height==240) && !(width==160 && height==120))
+        if (!(width == 320 &&
+              height == 240) && !(width == 160 && height == 120))
             continue;
         bytestream += 2; // keyframe
         bytestream += 4; // size
-        fourcc = bytestream_get_le32(&bytestream);
-        if(fourcc != MKTAG('M', 'L', '2', '0'))
+        fourcc      = bytestream_get_le32(&bytestream);
+        if (fourcc != MKTAG('M', 'L', '2', '0'))
             continue;
 
-        if(i) {
-            if(i < 14)  /* starts with SwitchBoard connection info */
+        if (i) {
+            if (i < 14) /* starts with SwitchBoard connection info */
                 return AVPROBE_SCORE_MAX / 2;
             else        /* starts in the middle of stream */
                 return AVPROBE_SCORE_MAX / 3;
@@ -67,7 +68,7 @@ static int msnwc_tcp_probe(AVProbeData *p)
         }
     }
 
-    return -1;
+    return 0;
 }
 
 static int msnwc_tcp_read_header(AVFormatContext *ctx)
@@ -77,23 +78,23 @@ static int msnwc_tcp_read_header(AVFormatContext *ctx)
     AVStream *st;
 
     st = avformat_new_stream(ctx, NULL);
-    if(!st)
+    if (!st)
         return AVERROR(ENOMEM);
 
-    codec = st->codec;
+    codec             = st->codec;
     codec->codec_type = AVMEDIA_TYPE_VIDEO;
-    codec->codec_id = AV_CODEC_ID_MIMIC;
-    codec->codec_tag = MKTAG('M', 'L', '2', '0');
+    codec->codec_id   = AV_CODEC_ID_MIMIC;
+    codec->codec_tag  = MKTAG('M', 'L', '2', '0');
 
     avpriv_set_pts_info(st, 32, 1, 1000);
 
     /* Some files start with "connected\r\n\r\n".
      * So skip until we find the first byte of struct size */
-    while(avio_r8(pb) != HEADER_SIZE && !avio_feof(pb));
+    while(avio_r8(pb) != HEADER_SIZE && !avio_feof(pb)) ;
 
     if(avio_feof(pb)) {
         av_log(ctx, AV_LOG_ERROR, "Could not find valid start.\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     return 0;
@@ -104,37 +105,41 @@ static int msnwc_tcp_read_packet(AVFormatContext *ctx, AVPacket *pkt)
     AVIOContext *pb = ctx->pb;
     uint16_t keyframe;
     uint32_t size, timestamp;
+    int ret;
 
     avio_skip(pb, 1); /* one byte has been read ahead */
     avio_skip(pb, 2);
     avio_skip(pb, 2);
     keyframe = avio_rl16(pb);
-    size = avio_rl32(pb);
+    size     = avio_rl32(pb);
     avio_skip(pb, 4);
     avio_skip(pb, 4);
     timestamp = avio_rl32(pb);
 
-    if(!size || av_get_packet(pb, pkt, size) != size)
-        return -1;
+    if (!size)
+        return AVERROR_INVALIDDATA;
+
+    if ((ret = av_get_packet(pb, pkt, size)) < 0)
+        return ret;
 
     avio_skip(pb, 1); /* Read ahead one byte of struct size like read_header */
 
-    pkt->pts = timestamp;
-    pkt->dts = timestamp;
+    pkt->pts          = timestamp;
+    pkt->dts          = timestamp;
     pkt->stream_index = 0;
 
     /* Some aMsn generated videos (or was it Mercury Messenger?) don't set
      * this bit and rely on the codec to get keyframe information */
-    if(keyframe&1)
+    if (keyframe & 1)
         pkt->flags |= AV_PKT_FLAG_KEY;
 
     return HEADER_SIZE + size;
 }
 
 AVInputFormat ff_msnwc_tcp_demuxer = {
-    .name           = "msnwctcp",
-    .long_name      = NULL_IF_CONFIG_SMALL("MSN TCP Webcam stream"),
-    .read_probe     = msnwc_tcp_probe,
-    .read_header    = msnwc_tcp_read_header,
-    .read_packet    = msnwc_tcp_read_packet,
+    .name        = "msnwctcp",
+    .long_name   = NULL_IF_CONFIG_SMALL("MSN TCP Webcam stream"),
+    .read_probe  = msnwc_tcp_probe,
+    .read_header = msnwc_tcp_read_header,
+    .read_packet = msnwc_tcp_read_packet,
 };
diff --git a/libavformat/mux.c b/libavformat/mux.c
index f99dbd9c..f14bfd52 100644
--- a/libavformat/mux.c
+++ b/libavformat/mux.c
@@ -61,7 +61,7 @@
  * @param num must be >= 0
  * @param den must be >= 1
  */
-static void frac_init(AVFrac *f, int64_t val, int64_t num, int64_t den)
+static void frac_init(FFFrac *f, int64_t val, int64_t num, int64_t den)
 {
     num += (den >> 1);
     if (num >= den) {
@@ -79,7 +79,7 @@ static void frac_init(AVFrac *f, int64_t val, int64_t num, int64_t den)
  * @param f fractional number
  * @param incr increment, can be positive or negative
  */
-static void frac_add(AVFrac *f, int64_t incr)
+static void frac_add(FFFrac *f, int64_t incr)
 {
     int64_t num, den;
 
@@ -239,6 +239,7 @@ static int init_muxer(AVFormatContext *s, AVDictionary **options)
     AVDictionary *tmp = NULL;
     AVCodecContext *codec = NULL;
     AVOutputFormat *of = s->oformat;
+    const AVCodecDescriptor *desc;
     AVDictionaryEntry *e;
 
     if (options)
@@ -250,10 +251,23 @@ static int init_muxer(AVFormatContext *s, AVDictionary **options)
         (ret = av_opt_set_dict2(s->priv_data, &tmp, AV_OPT_SEARCH_CHILDREN)) < 0)
         goto fail;
 
+    if (s->nb_streams && s->streams[0]->codec->flags & AV_CODEC_FLAG_BITEXACT) {
+        if (!(s->flags & AVFMT_FLAG_BITEXACT)) {
 #if FF_API_LAVF_BITEXACT
-    if (s->nb_streams && s->streams[0]->codec->flags & CODEC_FLAG_BITEXACT)
-        s->flags |= AVFMT_FLAG_BITEXACT;
+            av_log(s, AV_LOG_WARNING,
+                   "Setting the AVFormatContext to bitexact mode, because "
+                   "the AVCodecContext is in that mode. This behavior will "
+                   "change in the future. To keep the current behavior, set "
+                   "AVFormatContext.flags |= AVFMT_FLAG_BITEXACT.\n");
+            s->flags |= AVFMT_FLAG_BITEXACT;
+#else
+            av_log(s, AV_LOG_WARNING,
+                   "The AVFormatContext is not in set to bitexact mode, only "
+                   "the AVCodecContext. If this is not intended, set "
+                   "AVFormatContext.flags |= AVFMT_FLAG_BITEXACT.\n");
 #endif
+        }
+    }
 
     // some sanity checks
     if (s->nb_streams == 0 && !(of->flags & AVFMT_NOSTREAMS)) {
@@ -304,7 +318,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
                 goto fail;
             }
             if (av_cmp_q(st->sample_aspect_ratio, codec->sample_aspect_ratio)
-                && FFABS(av_q2d(st->sample_aspect_ratio) - av_q2d(codec->sample_aspect_ratio)) > 0.004*av_q2d(st->sample_aspect_ratio)
+                && fabs(av_q2d(st->sample_aspect_ratio) - av_q2d(codec->sample_aspect_ratio)) > 0.004*av_q2d(st->sample_aspect_ratio)
             ) {
                 if (st->sample_aspect_ratio.num != 0 &&
                     st->sample_aspect_ratio.den != 0 &&
@@ -322,6 +336,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
             break;
         }
 
+        desc = avcodec_descriptor_get(codec->codec_id);
+        if (desc && desc->props & AV_CODEC_PROP_REORDER)
+            st->internal->reorder = 1;
+
         if (of->codec_tag) {
             if (   codec->codec_tag
                 && codec->codec_id == AV_CODEC_ID_RAWVIDEO
@@ -347,12 +365,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
                 codec->codec_tag = av_codec_get_tag(of->codec_tag, codec->codec_id);
         }
 
-        if (of->flags & AVFMT_GLOBALHEADER &&
-            !(codec->flags & CODEC_FLAG_GLOBAL_HEADER))
-            av_log(s, AV_LOG_WARNING,
-                   "Codec for stream %d does not use global headers "
-                   "but container format requires global headers\n", i);
-
         if (codec->codec_type != AVMEDIA_TYPE_ATTACHMENT)
             s->internal->nb_interleaved_streams++;
     }
@@ -387,6 +399,12 @@ FF_ENABLE_DEPRECATION_WARNINGS
          *options = tmp;
     }
 
+    if (s->oformat->init && (ret = s->oformat->init(s)) < 0) {
+        if (s->oformat->deinit)
+            s->oformat->deinit(s);
+        goto fail;
+    }
+
     return 0;
 
 fail:
@@ -414,11 +432,17 @@ static int init_pts(AVFormatContext *s)
         default:
             break;
         }
+
+        if (!st->priv_pts)
+            st->priv_pts = av_mallocz(sizeof(*st->priv_pts));
+        if (!st->priv_pts)
+            return AVERROR(ENOMEM);
+
         if (den != AV_NOPTS_VALUE) {
             if (den <= 0)
                 return AVERROR_INVALIDDATA;
 
-            frac_init(&st->pts, 0, 0, den);
+            frac_init(st->priv_pts, 0, 0, den);
         }
     }
 
@@ -432,7 +456,7 @@ int avformat_write_header(AVFormatContext *s, AVDictionary **options)
     if ((ret = init_muxer(s, options)) < 0)
         return ret;
 
-    if (s->oformat->write_header) {
+    if (s->oformat->write_header && !s->oformat->check_bitstream) {
         ret = s->oformat->write_header(s);
         if (ret >= 0 && s->pb && s->pb->error < 0)
             ret = s->pb->error;
@@ -440,6 +464,7 @@ int avformat_write_header(AVFormatContext *s, AVDictionary **options)
             return ret;
         if (s->flush_packets && s->pb && s->pb->error >= 0 && s->flags & AVFMT_FLAG_FLUSH_PACKETS)
             avio_flush(s->pb);
+        s->internal->header_written = 1;
     }
 
     if ((ret = init_pts(s)) < 0)
@@ -465,19 +490,30 @@ int avformat_write_header(AVFormatContext *s, AVDictionary **options)
 #define UNCODED_FRAME_PACKET_SIZE (INT_MIN / 3 * 2 + (int)sizeof(AVFrame))
 
 
+#if FF_API_COMPUTE_PKT_FIELDS2
 //FIXME merge with compute_pkt_fields
-static int compute_pkt_fields2(AVFormatContext *s, AVStream *st, AVPacket *pkt)
+static int compute_muxer_pkt_fields(AVFormatContext *s, AVStream *st, AVPacket *pkt)
 {
     int delay = FFMAX(st->codec->has_b_frames, st->codec->max_b_frames > 0);
     int num, den, i;
     int frame_size;
 
+    if (!s->internal->missing_ts_warning &&
+        !(s->oformat->flags & AVFMT_NOTIMESTAMPS) &&
+        (pkt->pts == AV_NOPTS_VALUE || pkt->dts == AV_NOPTS_VALUE)) {
+        av_log(s, AV_LOG_WARNING,
+               "Timestamps are unset in a packet for stream %d. "
+               "This is deprecated and will stop working in the future. "
+               "Fix your code to set the timestamps properly\n", st->index);
+        s->internal->missing_ts_warning = 1;
+    }
+
     if (s->debug & FF_FDEBUG_TS)
-        av_log(s, AV_LOG_TRACE, "compute_pkt_fields2: pts:%s dts:%s cur_dts:%s b:%d size:%d st:%d\n",
+        av_log(s, AV_LOG_TRACE, "compute_muxer_pkt_fields: pts:%s dts:%s cur_dts:%s b:%d size:%d st:%d\n",
             av_ts2str(pkt->pts), av_ts2str(pkt->dts), av_ts2str(st->cur_dts), delay, pkt->size, pkt->stream_index);
 
     if (pkt->duration < 0 && st->codec->codec_type != AVMEDIA_TYPE_SUBTITLE) {
-        av_log(s, AV_LOG_WARNING, "Packet with invalid duration %d in stream %d\n",
+        av_log(s, AV_LOG_WARNING, "Packet with invalid duration %"PRId64" in stream %d\n",
                pkt->duration, pkt->stream_index);
         pkt->duration = 0;
     }
@@ -502,7 +538,7 @@ static int compute_pkt_fields2(AVFormatContext *s, AVStream *st, AVPacket *pkt)
         }
         pkt->dts =
 //        pkt->pts= st->cur_dts;
-            pkt->pts = st->pts.val;
+            pkt->pts = st->priv_pts->val;
     }
 
     //calculate dts from pts
@@ -538,7 +574,7 @@ static int compute_pkt_fields2(AVFormatContext *s, AVStream *st, AVPacket *pkt)
             av_ts2str(pkt->pts), av_ts2str(pkt->dts));
 
     st->cur_dts = pkt->dts;
-    st->pts.val = pkt->dts;
+    st->priv_pts->val = pkt->dts;
 
     /* update pts */
     switch (st->codec->codec_type) {
@@ -550,16 +586,17 @@ static int compute_pkt_fields2(AVFormatContext *s, AVStream *st, AVPacket *pkt)
         /* HACK/FIXME, we skip the initial 0 size packets as they are most
          * likely equal to the encoder delay, but it would be better if we
          * had the real timestamps from the encoder */
-        if (frame_size >= 0 && (pkt->size || st->pts.num != st->pts.den >> 1 || st->pts.val)) {
-            frac_add(&st->pts, (int64_t)st->time_base.den * frame_size);
+        if (frame_size >= 0 && (pkt->size || st->priv_pts->num != st->priv_pts->den >> 1 || st->priv_pts->val)) {
+            frac_add(st->priv_pts, (int64_t)st->time_base.den * frame_size);
         }
         break;
     case AVMEDIA_TYPE_VIDEO:
-        frac_add(&st->pts, (int64_t)st->time_base.den * st->codec->time_base.num);
+        frac_add(st->priv_pts, (int64_t)st->time_base.den * st->codec->time_base.num);
         break;
     }
     return 0;
 }
+#endif
 
 /**
  * Make timestamps non negative, move side data from payload to internal struct, call muxer, and restore
@@ -632,6 +669,18 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
     }
 
     did_split = av_packet_split_side_data(pkt);
+
+    if (!s->internal->header_written && s->oformat->write_header) {
+        ret = s->oformat->write_header(s);
+        if (ret >= 0 && s->pb && s->pb->error < 0)
+            ret = s->pb->error;
+        if (ret < 0)
+            goto fail;
+        if (s->flush_packets && s->pb && s->pb->error >= 0 && s->flags & AVFMT_FLAG_FLUSH_PACKETS)
+            avio_flush(s->pb);
+        s->internal->header_written = 1;
+    }
+
     if ((pkt->flags & AV_PKT_FLAG_UNCODED_FRAME)) {
         AVFrame *frame = (AVFrame *)pkt->data;
         av_assert0(pkt->size == UNCODED_FRAME_PACKET_SIZE);
@@ -641,9 +690,14 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
         ret = s->oformat->write_packet(s, pkt);
     }
 
-    if (s->flush_packets && s->pb && ret >= 0 && s->flags & AVFMT_FLAG_FLUSH_PACKETS)
-        avio_flush(s->pb);
+    if (s->pb && ret >= 0) {
+        if (s->flush_packets && s->flags & AVFMT_FLAG_FLUSH_PACKETS)
+            avio_flush(s->pb);
+        if (s->pb->error < 0)
+            ret = s->pb->error;
+    }
 
+fail:
     if (did_split)
         av_packet_merge_side_data(pkt);
 
@@ -669,7 +723,7 @@ static int check_packet(AVFormatContext *s, AVPacket *pkt)
     return 0;
 }
 
-int av_write_frame(AVFormatContext *s, AVPacket *pkt)
+static int prepare_input_packet(AVFormatContext *s, AVPacket *pkt)
 {
     int ret;
 
@@ -677,6 +731,58 @@ int av_write_frame(AVFormatContext *s, AVPacket *pkt)
     if (ret < 0)
         return ret;
 
+#if !FF_API_COMPUTE_PKT_FIELDS2
+    /* sanitize the timestamps */
+    if (!(s->oformat->flags & AVFMT_NOTIMESTAMPS)) {
+        AVStream *st = s->streams[pkt->stream_index];
+
+        /* when there is no reordering (so dts is equal to pts), but
+         * only one of them is set, set the other as well */
+        if (!st->internal->reorder) {
+            if (pkt->pts == AV_NOPTS_VALUE && pkt->dts != AV_NOPTS_VALUE)
+                pkt->pts = pkt->dts;
+            if (pkt->dts == AV_NOPTS_VALUE && pkt->pts != AV_NOPTS_VALUE)
+                pkt->dts = pkt->pts;
+        }
+
+        /* check that the timestamps are set */
+        if (pkt->pts == AV_NOPTS_VALUE || pkt->dts == AV_NOPTS_VALUE) {
+            av_log(s, AV_LOG_ERROR,
+                   "Timestamps are unset in a packet for stream %d\n", st->index);
+            return AVERROR(EINVAL);
+        }
+
+        /* check that the dts are increasing (or at least non-decreasing,
+         * if the format allows it */
+        if (st->cur_dts != AV_NOPTS_VALUE &&
+            ((!(s->oformat->flags & AVFMT_TS_NONSTRICT) && st->cur_dts >= pkt->dts) ||
+             st->cur_dts > pkt->dts)) {
+            av_log(s, AV_LOG_ERROR,
+                   "Application provided invalid, non monotonically increasing "
+                   "dts to muxer in stream %d: %" PRId64 " >= %" PRId64 "\n",
+                   st->index, st->cur_dts, pkt->dts);
+            return AVERROR(EINVAL);
+        }
+
+        if (pkt->pts < pkt->dts) {
+            av_log(s, AV_LOG_ERROR, "pts %" PRId64 " < dts %" PRId64 " in stream %d\n",
+                   pkt->pts, pkt->dts, st->index);
+            return AVERROR(EINVAL);
+        }
+    }
+#endif
+
+    return 0;
+}
+
+int av_write_frame(AVFormatContext *s, AVPacket *pkt)
+{
+    int ret;
+
+    ret = prepare_input_packet(s, pkt);
+    if (ret < 0)
+        return ret;
+
     if (!pkt) {
         if (s->oformat->flags & AVFMT_ALLOW_FLUSH) {
             ret = s->oformat->write_packet(s, NULL);
@@ -689,10 +795,12 @@ int av_write_frame(AVFormatContext *s, AVPacket *pkt)
         return 1;
     }
 
-    ret = compute_pkt_fields2(s, s->streams[pkt->stream_index], pkt);
+#if FF_API_COMPUTE_PKT_FIELDS2
+    ret = compute_muxer_pkt_fields(s, s->streams[pkt->stream_index], pkt);
 
     if (ret < 0 && !(s->oformat->flags & AVFMT_NOTIMESTAMPS))
         return ret;
+#endif
 
     ret = write_packet(s, pkt);
     if (ret >= 0 && s->pb && s->pb->error < 0)
@@ -716,21 +824,15 @@ int ff_interleave_add_packet(AVFormatContext *s, AVPacket *pkt,
     this_pktl      = av_mallocz(sizeof(AVPacketList));
     if (!this_pktl)
         return AVERROR(ENOMEM);
-    this_pktl->pkt = *pkt;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-    pkt->destruct  = NULL;           // do not free original but only the copy
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-    pkt->buf       = NULL;
-    pkt->side_data = NULL;
-    pkt->side_data_elems = 0;
     if ((pkt->flags & AV_PKT_FLAG_UNCODED_FRAME)) {
         av_assert0(pkt->size == UNCODED_FRAME_PACKET_SIZE);
         av_assert0(((AVFrame *)pkt->data)->buf);
+        this_pktl->pkt = *pkt;
+        pkt->buf = NULL;
+        pkt->side_data = NULL;
+        pkt->side_data_elems = 0;
     } else {
-        // Duplicate the packet if it uses non-allocated memory
-        if ((ret = av_dup_packet(&this_pktl->pkt)) < 0) {
+        if ((ret = av_packet_ref(&this_pktl->pkt, pkt)) < 0) {
             av_free(this_pktl);
             return ret;
         }
@@ -784,6 +886,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
     s->streams[pkt->stream_index]->last_in_packet_buffer =
         *next_point                                      = this_pktl;
 
+    av_packet_unref(pkt);
+
     return 0;
 }
 
@@ -904,7 +1008,7 @@ static int interleave_packet(AVFormatContext *s, AVPacket *out, AVPacket *in, in
     if (s->oformat->interleave_packet) {
         int ret = s->oformat->interleave_packet(s, out, in, flush);
         if (in)
-            av_free_packet(in);
+            av_packet_unref(in);
         return ret;
     } else
         return ff_interleave_packet_per_dts(s, out, in, flush);
@@ -914,7 +1018,7 @@ int av_interleaved_write_frame(AVFormatContext *s, AVPacket *pkt)
 {
     int ret, flush = 0;
 
-    ret = check_packet(s, pkt);
+    ret = prepare_input_packet(s, pkt);
     if (ret < 0)
         goto fail;
 
@@ -925,13 +1029,26 @@ int av_interleaved_write_frame(AVFormatContext *s, AVPacket *pkt)
             av_log(s, AV_LOG_TRACE, "av_interleaved_write_frame size:%d dts:%s pts:%s\n",
                 pkt->size, av_ts2str(pkt->dts), av_ts2str(pkt->pts));
 
-        if ((ret = compute_pkt_fields2(s, st, pkt)) < 0 && !(s->oformat->flags & AVFMT_NOTIMESTAMPS))
+#if FF_API_COMPUTE_PKT_FIELDS2
+        if ((ret = compute_muxer_pkt_fields(s, st, pkt)) < 0 && !(s->oformat->flags & AVFMT_NOTIMESTAMPS))
             goto fail;
+#endif
 
         if (pkt->dts == AV_NOPTS_VALUE && !(s->oformat->flags & AVFMT_NOTIMESTAMPS)) {
             ret = AVERROR(EINVAL);
             goto fail;
         }
+
+        if (s->oformat->check_bitstream) {
+            if (!st->internal->bitstream_checked) {
+                if ((ret = s->oformat->check_bitstream(s, pkt)) < 0)
+                    goto fail;
+                else if (ret == 1)
+                    st->internal->bitstream_checked = 1;
+            }
+        }
+
+        av_apply_bitstream_filters(st->codec, pkt, st->internal->bsfc);
     } else {
         av_log(s, AV_LOG_TRACE, "av_interleaved_write_frame FLUSH\n");
         flush = 1;
@@ -952,7 +1069,7 @@ int av_interleaved_write_frame(AVFormatContext *s, AVPacket *pkt)
         if (ret >= 0)
             s->streams[opkt.stream_index]->nb_frames++;
 
-        av_free_packet(&opkt);
+        av_packet_unref(&opkt);
 
         if (ret < 0)
             return ret;
@@ -980,7 +1097,7 @@ int av_write_trailer(AVFormatContext *s)
         if (ret >= 0)
             s->streams[pkt.stream_index]->nb_frames++;
 
-        av_free_packet(&pkt);
+        av_packet_unref(&pkt);
 
         if (ret < 0)
             goto fail;
@@ -988,14 +1105,28 @@ int av_write_trailer(AVFormatContext *s)
             goto fail;
     }
 
+    if (!s->internal->header_written && s->oformat->write_header) {
+        ret = s->oformat->write_header(s);
+        if (ret >= 0 && s->pb && s->pb->error < 0)
+            ret = s->pb->error;
+        if (ret < 0)
+            goto fail;
+        if (s->flush_packets && s->pb && s->pb->error >= 0 && s->flags & AVFMT_FLAG_FLUSH_PACKETS)
+            avio_flush(s->pb);
+        s->internal->header_written = 1;
+    }
+
 fail:
-    if (s->oformat->write_trailer)
+    if ((s->internal->header_written || !s->oformat->write_header) && s->oformat->write_trailer)
         if (ret >= 0) {
         ret = s->oformat->write_trailer(s);
         } else {
             s->oformat->write_trailer(s);
         }
 
+    if (s->oformat->deinit)
+        s->oformat->deinit(s);
+
     if (s->pb)
        avio_flush(s->pb);
     if (ret == 0)
@@ -1043,7 +1174,8 @@ int ff_write_chained(AVFormatContext *dst, int dst_stream, AVPacket *pkt,
     if (interleave) ret = av_interleaved_write_frame(dst, &local_pkt);
     else            ret = av_write_frame(dst, &local_pkt);
     pkt->buf = local_pkt.buf;
-    pkt->destruct = local_pkt.destruct;
+    pkt->side_data       = local_pkt.side_data;
+    pkt->side_data_elems = local_pkt.side_data_elems;
     return ret;
 }
 
diff --git a/libavformat/mxf.c b/libavformat/mxf.c
index ecfb8a23..4d77ada7 100644
--- a/libavformat/mxf.c
+++ b/libavformat/mxf.c
@@ -42,6 +42,16 @@ const MXFCodecUL ff_mxf_codec_uls[] = {
     { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x03,0x04,0x01,0x02,0x02,0x01,0x20,0x02,0x03 }, 14,      AV_CODEC_ID_MPEG4 }, /* XDCAM proxy_pal030926.mxf */
     { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x04,0x01,0x02,0x02,0x02,0x01,0x02,0x00 }, 13,    AV_CODEC_ID_DVVIDEO }, /* DV25 IEC PAL */
     { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x07,0x04,0x01,0x02,0x02,0x03,0x01,0x01,0x00 }, 14,   AV_CODEC_ID_JPEG2000 }, /* JPEG2000 Codestream */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x04,0x01,0x00,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC1 SP@LL */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x04,0x02,0x00,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC1 SP@ML */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x04,0x03,0x00,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC1 MP@LL */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x04,0x04,0x00,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC1 MP@ML */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x04,0x05,0x00,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC1 MP@HL */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x04,0x06,0x00,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC1 AP@L0 */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x04,0x07,0x00,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC1 AP@L1 */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x04,0x08,0x00,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC1 AP@L2 */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x04,0x09,0x00,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC1 AP@L3 */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x04,0x0A,0x00,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC1 AP@L4 */
     { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x04,0x01,0x02,0x01,0x7F,0x00,0x00,0x00 }, 13,   AV_CODEC_ID_RAWVIDEO }, /* Uncompressed */
     { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x01,0x01,0x02,0x01,0x00 }, 15,   AV_CODEC_ID_RAWVIDEO }, /* Uncompressed 422 8-bit */
     { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x04,0x01,0x02,0x02,0x71,0x00,0x00,0x00 }, 13,      AV_CODEC_ID_DNXHD }, /* SMPTE VC-3/DNxHD */
@@ -69,6 +79,11 @@ const MXFCodecUL ff_mxf_pixel_format_uls[] = {
     { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0,    AV_PIX_FMT_NONE },
 };
 
+const MXFCodecUL ff_mxf_codec_tag_uls[] = {
+    { { 0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x0E,0x04,0x03,0x01,0x01,0x03,0x01,0x00 }, 15, MKTAG('A', 'V', 'u', 'p') }, /* Avid 1:1 */
+    { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0,                         0 },
+};
+
 static const struct {
     enum AVPixelFormat pix_fmt;
     const char data[16];
diff --git a/libavformat/mxf.h b/libavformat/mxf.h
index 1763063a..f3db1f93 100644
--- a/libavformat/mxf.h
+++ b/libavformat/mxf.h
@@ -78,6 +78,7 @@ typedef struct {
 extern const MXFCodecUL ff_mxf_data_definition_uls[];
 extern const MXFCodecUL ff_mxf_codec_uls[];
 extern const MXFCodecUL ff_mxf_pixel_format_uls[];
+extern const MXFCodecUL ff_mxf_codec_tag_uls[];
 
 int ff_mxf_decode_pixel_layout(const char pixel_layout[16], enum AVPixelFormat *pix_fmt);
 const MXFSamplesPerFrame *ff_mxf_get_samples_per_frame(AVFormatContext *s, AVRational time_base);
diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
index 78e2393c..f4222fca 100644
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c
@@ -166,6 +166,7 @@ typedef struct MXFDescriptor {
     enum MXFMetadataSetType type;
     UID essence_container_ul;
     UID essence_codec_ul;
+    UID codec_ul;
     AVRational sample_rate;
     AVRational aspect_ratio;
     int width;
@@ -232,6 +233,7 @@ typedef struct MXFIndexTable {
     int nb_segments;
     MXFIndexTableSegment **segments;    /* sorted by IndexStartPosition */
     AVIndexEntry *fake_index;   /* used for calling ff_index_search_timestamp() */
+    int8_t *offsets;            /* temporal offsets for display order to stored order conversion */
 } MXFIndexTable;
 
 typedef struct MXFContext {
@@ -974,6 +976,9 @@ static int mxf_read_generic_descriptor(void *arg, AVIOContext *pb, int tag, int
     case 0x3004:
         avio_read(pb, descriptor->essence_container_ul, 16);
         break;
+    case 0x3005:
+        avio_read(pb, descriptor->codec_ul, 16);
+        break;
     case 0x3006:
         descriptor->linked_track_id = avio_rb32(pb);
         break;
@@ -1116,7 +1121,11 @@ static void *mxf_resolve_strong_ref(MXFContext *mxf, UID *strong_ref, enum MXFMe
 
 static const MXFCodecUL mxf_picture_essence_container_uls[] = {
     // video essence container uls
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x07,0x0d,0x01,0x03,0x01,0x02,0x0c,0x01,0x00 }, 14,   AV_CODEC_ID_JPEG2000 },
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x10,0x60,0x01 }, 14,       AV_CODEC_ID_H264 }, /* H264 Frame wrapped */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x12,0x01,0x00 }, 14,        AV_CODEC_ID_VC1 }, /* VC-1 Frame wrapped */
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x04,0x60,0x01 }, 14, AV_CODEC_ID_MPEG2VIDEO }, /* MPEG-ES Frame wrapped */
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x01,0x04,0x01 }, 14, AV_CODEC_ID_MPEG2VIDEO }, /* Type D-10 mapping of 40Mbps 525/60-I */
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x02,0x41,0x01 }, 14,    AV_CODEC_ID_DVVIDEO }, /* DV 625 25mbps */
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x05,0x00,0x00 }, 14,   AV_CODEC_ID_RAWVIDEO }, /* Uncompressed Picture */
     { { 0x06,0x0e,0x2b,0x34,0x01,0x01,0x01,0xff,0x4b,0x46,0x41,0x41,0x00,0x0d,0x4d,0x4f }, 14,   AV_CODEC_ID_RAWVIDEO }, /* Legacy ?? Uncompressed Picture */
@@ -1136,6 +1145,15 @@ static const MXFCodecUL mxf_intra_only_picture_essence_coding_uls[] = {
     { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0,       AV_CODEC_ID_NONE },
 };
 
+/* actual coded width for AVC-Intra to allow selecting correct SPS/PPS */
+static const MXFCodecUL mxf_intra_only_picture_coded_width[] = {
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x01 }, 16, 1440 },
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x02 }, 16, 1440 },
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x03 }, 16, 1440 },
+    { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x0A,0x04,0x01,0x02,0x02,0x01,0x32,0x21,0x04 }, 16, 1440 },
+    { { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 },  0,    0 },
+};
+
 static const MXFCodecUL mxf_sound_essence_container_uls[] = {
     // sound essence container uls
     { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x01,0x0d,0x01,0x03,0x01,0x02,0x06,0x01,0x00 }, 14, AV_CODEC_ID_PCM_S16LE }, /* BWF Frame wrapped */
@@ -1325,6 +1343,7 @@ static int mxf_compute_ptses_fake_index(MXFContext *mxf, MXFIndexTable *index_ta
 {
     int i, j, x;
     int8_t max_temporal_offset = -128;
+    uint8_t *flags;
 
     /* first compute how many entries we have */
     for (i = 0; i < index_table->nb_segments; i++) {
@@ -1343,8 +1362,12 @@ static int mxf_compute_ptses_fake_index(MXFContext *mxf, MXFIndexTable *index_ta
         return 0;
 
     if (!(index_table->ptses      = av_calloc(index_table->nb_ptses, sizeof(int64_t))) ||
-        !(index_table->fake_index = av_calloc(index_table->nb_ptses, sizeof(AVIndexEntry)))) {
+        !(index_table->fake_index = av_calloc(index_table->nb_ptses, sizeof(AVIndexEntry))) ||
+        !(index_table->offsets    = av_calloc(index_table->nb_ptses, sizeof(int8_t))) ||
+        !(flags                   = av_calloc(index_table->nb_ptses, sizeof(uint8_t)))) {
         av_freep(&index_table->ptses);
+        av_freep(&index_table->fake_index);
+        av_freep(&index_table->offsets);
         return AVERROR(ENOMEM);
     }
 
@@ -1402,8 +1425,7 @@ static int mxf_compute_ptses_fake_index(MXFContext *mxf, MXFIndexTable *index_ta
                 break;
             }
 
-            index_table->fake_index[x].timestamp = x;
-            index_table->fake_index[x].flags = !(s->flag_entries[j] & 0x30) ? AVINDEX_KEYFRAME : 0;
+            flags[x] = !(s->flag_entries[j] & 0x30) ? AVINDEX_KEYFRAME : 0;
 
             if (index < 0 || index >= index_table->nb_ptses) {
                 av_log(mxf->fc, AV_LOG_ERROR,
@@ -1412,11 +1434,20 @@ static int mxf_compute_ptses_fake_index(MXFContext *mxf, MXFIndexTable *index_ta
                 continue;
             }
 
+            index_table->offsets[x] = offset;
             index_table->ptses[index] = x;
             max_temporal_offset = FFMAX(max_temporal_offset, offset);
         }
     }
 
+    /* calculate the fake index table in display order */
+    for (x = 0; x < index_table->nb_ptses; x++) {
+        index_table->fake_index[x].timestamp = x;
+        if (index_table->ptses[x] != AV_NOPTS_VALUE)
+            index_table->fake_index[index_table->ptses[x]].flags = flags[x];
+    }
+    av_freep(&flags);
+
     index_table->first_dts = -max_temporal_offset;
 
     return 0;
@@ -1754,6 +1785,16 @@ static int mxf_parse_physical_source_package(MXFContext *mxf, MXFTrack *source_t
                 continue;
             }
 
+        if (physical_track->edit_rate.num <= 0 ||
+            physical_track->edit_rate.den <= 0) {
+            av_log(mxf->fc, AV_LOG_WARNING,
+                   "Invalid edit rate (%d/%d) found on structural"
+                   " component #%d, defaulting to 25/1\n",
+                   physical_track->edit_rate.num,
+                   physical_track->edit_rate.den, i);
+            physical_track->edit_rate = (AVRational){25, 1};
+        }
+
             for (k = 0; k < physical_track->sequence->structural_components_count; k++) {
                 if (!(mxf_tc = mxf_resolve_timecode_component(mxf, &physical_track->sequence->structural_components_refs[k])))
                     continue;
@@ -1950,6 +1991,11 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
         /* TODO: drop PictureEssenceCoding and SoundEssenceCompression, only check EssenceContainer */
         codec_ul = mxf_get_codec_ul(ff_mxf_codec_uls, &descriptor->essence_codec_ul);
         st->codec->codec_id = (enum AVCodecID)codec_ul->id;
+        if (st->codec->codec_id == AV_CODEC_ID_NONE) {
+            codec_ul = mxf_get_codec_ul(ff_mxf_codec_uls, &descriptor->codec_ul);
+            st->codec->codec_id = (enum AVCodecID)codec_ul->id;
+        }
+
         av_log(mxf->fc, AV_LOG_VERBOSE, "%s: Universal Label: ",
                avcodec_get_name(st->codec->codec_id));
         for (k = 0; k < 16; k++) {
@@ -1974,10 +2020,6 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
             st->codec->width = descriptor->width;
             st->codec->height = descriptor->height; /* Field height, not frame height */
             switch (descriptor->frame_layout) {
-                case SegmentedFrame:
-                    /* This one is a weird layout I don't fully understand. */
-                    av_log(mxf->fc, AV_LOG_INFO, "SegmentedFrame layout isn't currently supported\n");
-                    break;
                 case FullFrame:
                     st->codec->field_order = AV_FIELD_PROGRESSIVE;
                     break;
@@ -1989,6 +2031,8 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
                               It's also for compatibility with the old behavior. */
                 case MixedFields:
                     break;
+                case SegmentedFrame:
+                    st->codec->field_order = AV_FIELD_PROGRESSIVE;
                 case SeparateFields:
                     switch (descriptor->field_dominance) {
                     case MXF_TFF:
@@ -2017,12 +2061,16 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
                                                   &descriptor->essence_codec_ul);
                     st->codec->pix_fmt = (enum AVPixelFormat)pix_fmt_ul->id;
                     if (st->codec->pix_fmt == AV_PIX_FMT_NONE) {
-                        /* support files created before RP224v10 by defaulting to UYVY422
-                           if subsampling is 4:2:2 and component depth is 8-bit */
-                        if (descriptor->horiz_subsampling == 2 &&
-                            descriptor->vert_subsampling == 1 &&
-                            descriptor->component_depth == 8) {
-                            st->codec->pix_fmt = AV_PIX_FMT_UYVY422;
+                        st->codec->codec_tag = mxf_get_codec_ul(ff_mxf_codec_tag_uls,
+                                                                &descriptor->essence_codec_ul)->id;
+                        if (!st->codec->codec_tag) {
+                            /* support files created before RP224v10 by defaulting to UYVY422
+                               if subsampling is 4:2:2 and component depth is 8-bit */
+                            if (descriptor->horiz_subsampling == 2 &&
+                                descriptor->vert_subsampling == 1 &&
+                                descriptor->component_depth == 8) {
+                                st->codec->pix_fmt = AV_PIX_FMT_UYVY422;
+                            }
                         }
                     }
                 }
@@ -2089,6 +2137,10 @@ static int mxf_parse_structural_metadata(MXFContext *mxf)
                 memcpy(st->codec->extradata, descriptor->extradata, descriptor->extradata_size);
             }
         } else if (st->codec->codec_id == AV_CODEC_ID_H264) {
+            int coded_width = mxf_get_codec_ul(mxf_intra_only_picture_coded_width,
+                                               &descriptor->essence_codec_ul)->id;
+            if (coded_width)
+                st->codec->width = coded_width;
             ret = ff_generate_avci_extradata(st);
             if (ret < 0)
                 return ret;
@@ -2227,7 +2279,6 @@ static const MXFMetadataReadTableEntry mxf_metadata_read_table[] = {
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x42,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* Generic Sound */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x28,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* CDCI */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x29,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* RGBA */
-    { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x51,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* MPEG 2 Video */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x48,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* Wave */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x47,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* AES3 */
     { { 0x06,0x0e,0x2b,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x51,0x00 }, mxf_read_generic_descriptor, sizeof(MXFDescriptor), Descriptor }, /* MPEG2VideoDescriptor */
@@ -2736,13 +2787,13 @@ static int mxf_read_header(AVFormatContext *s)
                 if ((ret = mxf_parse_klv(mxf, klv, metadata->read, metadata->ctx_size, metadata->type)) < 0)
                     goto fail;
                 break;
-            } else {
-                av_log(s, AV_LOG_VERBOSE, "Dark key " PRIxUID "\n",
-                       UID_ARG(klv.key));
             }
         }
-        if (!metadata->read)
+        if (!metadata->read) {
+            av_log(s, AV_LOG_VERBOSE, "Dark key " PRIxUID "\n",
+                            UID_ARG(klv.key));
             avio_skip(s->pb, klv.length);
+        }
     }
     /* FIXME avoid seek */
     if (!essence_offset)  {
@@ -2928,7 +2979,7 @@ static int mxf_read_packet_old(AVFormatContext *s, AVPacket *pkt)
                 /* if this check is hit then it's possible OPAtom was treated as OP1a
                  * truncate the packet since it's probably very large (>2 GiB is common) */
                 avpriv_request_sample(s,
-                                      "OPAtom misinterpreted as OP1a?"
+                                      "OPAtom misinterpreted as OP1a? "
                                       "KLV for edit unit %i extending into "
                                       "next edit unit",
                                       mxf->current_edit_unit);
@@ -3071,6 +3122,7 @@ static int mxf_read_close(AVFormatContext *s)
             av_freep(&mxf->index_tables[i].segments);
             av_freep(&mxf->index_tables[i].ptses);
             av_freep(&mxf->index_tables[i].fake_index);
+            av_freep(&mxf->index_tables[i].offsets);
         }
     }
     av_freep(&mxf->index_tables);
@@ -3141,9 +3193,21 @@ static int mxf_read_seek(AVFormatContext *s, int stream_index, int64_t sample_ti
         sample_time = FFMAX(sample_time, 0);
 
         if (t->fake_index) {
+            /* The first frames may not be keyframes in presentation order, so
+             * we have to advance the target to be able to find the first
+             * keyframe backwards... */
+            if (!(flags & AVSEEK_FLAG_ANY) &&
+                (flags & AVSEEK_FLAG_BACKWARD) &&
+                t->ptses[0] != AV_NOPTS_VALUE &&
+                sample_time < t->ptses[0] &&
+                (t->fake_index[t->ptses[0]].flags & AVINDEX_KEYFRAME))
+                sample_time = t->ptses[0];
+
             /* behave as if we have a proper index */
             if ((sample_time = ff_index_search_timestamp(t->fake_index, t->nb_ptses, sample_time, flags)) < 0)
                 return sample_time;
+            /* get the stored order index from the display order index */
+            sample_time += t->offsets[sample_time];
         } else {
             /* no IndexEntryArray (one or more CBR segments)
              * make sure we don't seek past the end */
@@ -3177,6 +3241,7 @@ static int mxf_read_seek(AVFormatContext *s, int stream_index, int64_t sample_ti
 AVInputFormat ff_mxf_demuxer = {
     .name           = "mxf",
     .long_name      = NULL_IF_CONFIG_SMALL("MXF (Material eXchange Format)"),
+    .flags          = AVFMT_SEEK_TO_PTS,
     .priv_data_size = sizeof(MXFContext),
     .read_probe     = mxf_probe,
     .read_header    = mxf_read_header,
diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index db7d2bf4..cd13f897 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -20,6 +20,11 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+/*
+ * signal_standard, color_siting, store_user_comments and klv_fill_key version
+ * fixes sponsored by NOA GmbH
+ */
+
 /*
  * References
  * SMPTE 336M KLV Data Encoding Protocol Using Key-Length-Value
@@ -319,6 +324,7 @@ typedef struct MXFContext {
     int signal_standard;
     uint32_t tagged_value_count;
     AVRational audio_edit_rate;
+    int store_user_comments;
 } MXFContext;
 
 static const uint8_t uuid_base[]            = { 0xAD,0xAB,0x44,0x24,0x2f,0x25,0x4d,0xc7,0x92,0xff,0x29,0xbd };
@@ -376,7 +382,6 @@ static const MXFLocalTagPair mxf_local_tag_batch[] = {
     { 0x4404, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x07,0x02,0x01,0x10,0x02,0x05,0x00,0x00}}, /* Package Modified Date */
     { 0x4402, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x01,0x03,0x03,0x02,0x01,0x00,0x00,0x00}}, /* Package Name */
     { 0x4403, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x06,0x01,0x01,0x04,0x06,0x05,0x00,0x00}}, /* Tracks Strong reference array */
-    { 0x4406, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x03,0x02,0x01,0x02,0x0C,0x00,0x00,0x00}}, /* User Comments */
     { 0x4701, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x06,0x01,0x01,0x04,0x02,0x03,0x00,0x00}}, /* Descriptor */
     // Track
     { 0x4801, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x01,0x07,0x01,0x01,0x00,0x00,0x00,0x00}}, /* Track ID */
@@ -396,9 +401,6 @@ static const MXFLocalTagPair mxf_local_tag_batch[] = {
     { 0x1501, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x07,0x02,0x01,0x03,0x01,0x05,0x00,0x00}}, /* Start Time Code */
     { 0x1502, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x04,0x04,0x01,0x01,0x02,0x06,0x00,0x00}}, /* Rounded Time Code Base */
     { 0x1503, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x01,0x04,0x04,0x01,0x01,0x05,0x00,0x00,0x00}}, /* Drop Frame */
-    // Tagged Value
-    { 0x5001, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x03,0x02,0x01,0x02,0x09,0x01,0x00,0x00}}, /* Name */
-    { 0x5003, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x03,0x02,0x01,0x02,0x0A,0x01,0x00,0x00}}, /* Value */
     // File Descriptor
     { 0x3F01, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x04,0x06,0x01,0x01,0x04,0x06,0x0B,0x00,0x00}}, /* Sub Descriptors reference array */
     { 0x3006, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x06,0x01,0x01,0x03,0x05,0x00,0x00,0x00}}, /* Linked Track ID */
@@ -444,6 +446,12 @@ static const MXFLocalTagPair mxf_local_tag_batch[] = {
     { 0x3D0A, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x05,0x04,0x02,0x03,0x02,0x01,0x00,0x00,0x00}}, /* Block Align */
 };
 
+static const MXFLocalTagPair mxf_user_comments_local_tag[] = {
+    { 0x4406, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x03,0x02,0x01,0x02,0x0C,0x00,0x00,0x00}}, /* User Comments */
+    { 0x5001, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x03,0x02,0x01,0x02,0x09,0x01,0x00,0x00}}, /* Name */
+    { 0x5003, {0x06,0x0E,0x2B,0x34,0x01,0x01,0x01,0x02,0x03,0x02,0x01,0x02,0x0A,0x01,0x00,0x00}}, /* Value */
+};
+
 static void mxf_write_uuid(AVIOContext *pb, enum MXFMetadataSetType type, int value)
 {
     avio_write(pb, uuid_base, 12);
@@ -521,10 +529,12 @@ static int mxf_get_essence_container_ul_index(enum AVCodecID id)
 
 static void mxf_write_primer_pack(AVFormatContext *s)
 {
+    MXFContext *mxf = s->priv_data;
     AVIOContext *pb = s->pb;
     int local_tag_number, i = 0;
 
     local_tag_number = FF_ARRAY_ELEMS(mxf_local_tag_batch);
+    local_tag_number += mxf->store_user_comments * FF_ARRAY_ELEMS(mxf_user_comments_local_tag);
 
     avio_write(pb, primer_pack_key, 16);
     klv_encode_ber_length(pb, local_tag_number * 18 + 8);
@@ -532,10 +542,15 @@ static void mxf_write_primer_pack(AVFormatContext *s)
     avio_wb32(pb, local_tag_number); // local_tag num
     avio_wb32(pb, 18); // item size, always 18 according to the specs
 
-    for (i = 0; i < local_tag_number; i++) {
+    for (i = 0; i < FF_ARRAY_ELEMS(mxf_local_tag_batch); i++) {
         avio_wb16(pb, mxf_local_tag_batch[i].local_tag);
         avio_write(pb, mxf_local_tag_batch[i].uid, 16);
     }
+    if (mxf->store_user_comments)
+        for (i = 0; i < FF_ARRAY_ELEMS(mxf_user_comments_local_tag); i++) {
+            avio_wb16(pb, mxf_user_comments_local_tag[i].local_tag);
+            avio_write(pb, mxf_user_comments_local_tag[i].uid, 16);
+        }
 }
 
 static void mxf_write_local_tag(AVIOContext *pb, int size, int tag)
@@ -998,7 +1013,7 @@ static void mxf_write_cdci_common(AVFormatContext *s, AVStream *st, const UID ke
     int stored_height = (st->codec->height+15)/16*16;
     int display_height;
     int f1, f2;
-    unsigned desc_size = size+8+8+8+8+8+8+8+5+16+sc->interlaced*4+12+20+5;
+    unsigned desc_size = size+8+8+8+8+8+8+8+5+16+4+12+20+5;
     if (sc->interlaced && sc->field_dominance)
         desc_size += 5;
     if (sc->signal_standard)
@@ -1066,12 +1081,12 @@ static void mxf_write_cdci_common(AVFormatContext *s, AVStream *st, const UID ke
         f1 *= 2;
     }
 
-    mxf_write_local_tag(pb, 12+sc->interlaced*4, 0x320D);
-    avio_wb32(pb, sc->interlaced ? 2 : 1);
+
+    mxf_write_local_tag(pb, 16, 0x320D);
+    avio_wb32(pb, 2);
     avio_wb32(pb, 4);
     avio_wb32(pb, f1);
-    if (sc->interlaced)
-        avio_wb32(pb, f2);
+    avio_wb32(pb, f2);
 
     mxf_write_local_tag(pb, 8, 0x320E);
     avio_wb32(pb, sc->aspect_ratio.num);
@@ -1249,14 +1264,15 @@ static void mxf_write_package(AVFormatContext *s, enum MXFMetadataSetType type,
     int user_comment_count = 0;
 
     if (type == MaterialPackage) {
-        user_comment_count = mxf_write_user_comments(s, s->metadata);
+        if (mxf->store_user_comments)
+            user_comment_count = mxf_write_user_comments(s, s->metadata);
         mxf_write_metadata_key(pb, 0x013600);
         PRINT_KEY(s, "Material Package key", pb->buf_ptr - 16);
-        klv_encode_ber_length(pb, 104 + name_size + (16*track_count) + (16*user_comment_count));
+        klv_encode_ber_length(pb, 92 + name_size + (16*track_count) + (16*user_comment_count) + 12LL*mxf->store_user_comments);
     } else {
         mxf_write_metadata_key(pb, 0x013700);
         PRINT_KEY(s, "Source Package key", pb->buf_ptr - 16);
-        klv_encode_ber_length(pb, 124 + name_size + (16*track_count)); // 20 bytes length for descriptor reference
+        klv_encode_ber_length(pb, 112 + name_size + (16*track_count) + 12LL*mxf->store_user_comments); // 20 bytes length for descriptor reference
     }
 
     // write uid
@@ -1291,10 +1307,12 @@ static void mxf_write_package(AVFormatContext *s, enum MXFMetadataSetType type,
         mxf_write_uuid(pb, type == MaterialPackage ? Track : Track + TypeBottom, i);
 
     // write user comment refs
-    mxf_write_local_tag(pb, user_comment_count*16 + 8, 0x4406);
-    mxf_write_refs_count(pb, user_comment_count);
-    for (i = 0; i < user_comment_count; i++)
-         mxf_write_uuid(pb, TaggedValue, mxf->tagged_value_count - user_comment_count + i);
+    if (mxf->store_user_comments) {
+        mxf_write_local_tag(pb, user_comment_count*16 + 8, 0x4406);
+        mxf_write_refs_count(pb, user_comment_count);
+        for (i = 0; i < user_comment_count; i++)
+            mxf_write_uuid(pb, TaggedValue, mxf->tagged_value_count - user_comment_count + i);
+    }
 
     // write multiple descriptor reference
     if (type == SourcePackage) {
@@ -1902,7 +1920,7 @@ static const UID mxf_mpeg2_codec_uls[] = {
 
 static const UID *mxf_get_mpeg2_codec_ul(AVCodecContext *avctx)
 {
-    int long_gop = avctx->gop_size > 1 || avctx->has_b_frames;
+    int long_gop = 1;
 
     if (avctx->profile == 4) { // Main
         if (avctx->level == 8) // Main
@@ -2023,7 +2041,6 @@ static int mxf_write_header(AVFormatContext *s)
     int i, ret;
     uint8_t present[FF_ARRAY_ELEMS(mxf_essence_container_uls)] = {0};
     const MXFSamplesPerFrame *spf = NULL;
-    AVDictionaryEntry *t;
     int64_t timestamp = 0;
 
     if (!s->nb_streams)
@@ -2034,6 +2051,9 @@ static int mxf_write_header(AVFormatContext *s)
         return -1;
     }
 
+    if (!av_dict_get(s->metadata, "comment_", NULL, AV_DICT_IGNORE_SUFFIX))
+        mxf->store_user_comments = 0;
+
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *st = s->streams[i];
         MXFStreamContext *sc = av_mallocz(sizeof(*sc));
@@ -2056,7 +2076,7 @@ static int mxf_write_header(AVFormatContext *s)
             sc->color_siting = 0xFF;
 
             if (pix_desc) {
-                sc->component_depth     = pix_desc->comp[0].depth_minus1 + 1;
+                sc->component_depth     = pix_desc->comp[0].depth;
                 sc->h_chroma_sub_sample = 1 << pix_desc->log2_chroma_w;
             }
             switch (ff_choose_chroma_location(s, st)) {
@@ -2191,9 +2211,7 @@ static int mxf_write_header(AVFormatContext *s)
             sc->order = AV_RB32(sc->track_essence_element_key+12);
     }
 
-    if (t = av_dict_get(s->metadata, "creation_time", NULL, 0))
-        timestamp = ff_iso8601_to_unix_time(t->value);
-    if (timestamp)
+    if (ff_parse_creation_time_metadata(s, &timestamp, 1) > 0)
         mxf->timestamp = mxf_parse_timestamp(timestamp);
     mxf->duration = -1;
 
@@ -2448,6 +2466,10 @@ static int mxf_write_packet(AVFormatContext *s, AVPacket *pkt)
         }
         mxf->edit_units_count++;
     } else if (!mxf->edit_unit_byte_count && st->index == 1) {
+        if (!mxf->edit_units_count) {
+            av_log(s, AV_LOG_ERROR, "No packets in first stream\n");
+            return AVERROR_PATCHWELCOME;
+        }
         mxf->index_entries[mxf->edit_units_count-1].slice_offset =
             mxf->body_offset - mxf->index_entries[mxf->edit_units_count-1].offset;
     }
@@ -2585,7 +2607,7 @@ static int mxf_interleave_get_packet(AVFormatContext *s, AVPacket *out, AVPacket
 
                 if(s->streams[pktl->pkt.stream_index]->last_in_packet_buffer == pktl)
                     s->streams[pktl->pkt.stream_index]->last_in_packet_buffer= NULL;
-                av_free_packet(&pktl->pkt);
+                av_packet_unref(&pktl->pkt);
                 av_freep(&pktl);
                 pktl = next;
             }
@@ -2652,6 +2674,8 @@ static int mxf_interleave(AVFormatContext *s, AVPacket *out, AVPacket *pkt, int
 
 static const AVOption mxf_options[] = {
     MXF_COMMON_OPTIONS
+    { "store_user_comments", "",
+      offsetof(MXFContext, store_user_comments), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
     { NULL },
 };
 
@@ -2666,6 +2690,8 @@ static const AVOption d10_options[] = {
     { "d10_channelcount", "Force/set channelcount in generic sound essence descriptor",
       offsetof(MXFContext, channel_count), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 8, AV_OPT_FLAG_ENCODING_PARAM},
     MXF_COMMON_OPTIONS
+    { "store_user_comments", "",
+      offsetof(MXFContext, store_user_comments), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
     { NULL },
 };
 
diff --git a/libavformat/mxg.c b/libavformat/mxg.c
index 34977b81..45cc5b52 100644
--- a/libavformat/mxg.c
+++ b/libavformat/mxg.c
@@ -111,7 +111,7 @@ static int mxg_update_cache(AVFormatContext *s, unsigned int cache_size)
     soi_pos = mxg->soi_ptr - mxg->buffer;
     buffer = av_fast_realloc(mxg->buffer, &mxg->buffer_size,
                              current_pos + cache_size +
-                             FF_INPUT_BUFFER_PADDING_SIZE);
+                             AV_INPUT_BUFFER_PADDING_SIZE);
     if (!buffer)
         return AVERROR(ENOMEM);
     mxg->buffer = buffer;
@@ -171,18 +171,13 @@ static int mxg_read_packet(AVFormatContext *s, AVPacket *pkt)
 
                 pkt->pts = pkt->dts = mxg->dts;
                 pkt->stream_index = 0;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-                pkt->destruct = NULL;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
                 pkt->buf  = NULL;
                 pkt->size = mxg->buffer_ptr - mxg->soi_ptr;
                 pkt->data = mxg->soi_ptr;
 
                 if (mxg->soi_ptr - mxg->buffer > mxg->cache_size) {
                     if (mxg->cache_size > 0) {
-                        memcpy(mxg->buffer, mxg->buffer_ptr, mxg->cache_size);
+                        memmove(mxg->buffer, mxg->buffer_ptr, mxg->cache_size);
                     }
 
                     mxg->buffer_ptr = mxg->buffer;
@@ -214,11 +209,6 @@ FF_ENABLE_DEPRECATION_WARNINGS
                     /* time (GMT) of first sample in usec since 1970, little-endian */
                     pkt->pts = pkt->dts = AV_RL64(startmarker_ptr + 8);
                     pkt->stream_index = 1;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-                    pkt->destruct = NULL;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
                     pkt->buf  = NULL;
                     pkt->size = size - 14;
                     pkt->data = startmarker_ptr + 16;
diff --git a/libavformat/ncdec.c b/libavformat/ncdec.c
index 062899f1..745ba61e 100644
--- a/libavformat/ncdec.c
+++ b/libavformat/ncdec.c
@@ -83,7 +83,7 @@ static int nc_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     ret = av_get_packet(s->pb, pkt, size);
     if (ret != size) {
-        if (ret > 0) av_free_packet(pkt);
+        if (ret > 0) av_packet_unref(pkt);
         return AVERROR(EIO);
     }
 
diff --git a/libavformat/network.c b/libavformat/network.c
index 47ade8cb..2fb1c8b0 100644
--- a/libavformat/network.c
+++ b/libavformat/network.c
@@ -75,7 +75,7 @@ int ff_network_wait_fd(int fd, int write)
     int ev = write ? POLLOUT : POLLIN;
     struct pollfd p = { .fd = fd, .events = ev, .revents = 0 };
     int ret;
-    ret = poll(&p, 1, 100);
+    ret = poll(&p, 1, POLLING_TIME);
     return ret < 0 ? ff_neterrno() : p.revents & (ev | POLLERR | POLLHUP) ? 0 : AVERROR(EAGAIN);
 }
 
@@ -187,12 +187,11 @@ int ff_socket(int af, int type, int proto)
     return fd;
 }
 
-int ff_listen_bind(int fd, const struct sockaddr *addr,
-                   socklen_t addrlen, int timeout, URLContext *h)
+int ff_listen(int fd, const struct sockaddr *addr,
+              socklen_t addrlen)
 {
     int ret;
     int reuse = 1;
-    struct pollfd lp = { fd, POLLIN, 0 };
     if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse))) {
         av_log(NULL, AV_LOG_WARNING, "setsockopt(SO_REUSEADDR) failed\n");
     }
@@ -203,6 +202,13 @@ int ff_listen_bind(int fd, const struct sockaddr *addr,
     ret = listen(fd, 1);
     if (ret)
         return ff_neterrno();
+    return ret;
+}
+
+int ff_accept(int fd, int timeout, URLContext *h)
+{
+    int ret;
+    struct pollfd lp = { fd, POLLIN, 0 };
 
     ret = ff_poll_interrupt(&lp, 1, timeout, &h->interrupt_callback);
     if (ret < 0)
@@ -211,15 +217,24 @@ int ff_listen_bind(int fd, const struct sockaddr *addr,
     ret = accept(fd, NULL, NULL);
     if (ret < 0)
         return ff_neterrno();
-
-    closesocket(fd);
-
     if (ff_socket_nonblock(ret, 1) < 0)
         av_log(NULL, AV_LOG_DEBUG, "ff_socket_nonblock failed\n");
 
     return ret;
 }
 
+int ff_listen_bind(int fd, const struct sockaddr *addr,
+                   socklen_t addrlen, int timeout, URLContext *h)
+{
+    int ret;
+    if ((ret = ff_listen(fd, addr, addrlen)) < 0)
+        return ret;
+    if ((ret = ff_accept(fd, timeout, h)) < 0)
+        return ret;
+    closesocket(fd);
+    return ret;
+}
+
 int ff_listen_connect(int fd, const struct sockaddr *addr,
                       socklen_t addrlen, int timeout, URLContext *h,
                       int will_try_next)
diff --git a/libavformat/network.h b/libavformat/network.h
index 86fb6561..f83c796a 100644
--- a/libavformat/network.h
+++ b/libavformat/network.h
@@ -254,6 +254,26 @@ int ff_listen_bind(int fd, const struct sockaddr *addr,
                    socklen_t addrlen, int timeout,
                    URLContext *h);
 
+/**
+ * Bind to a file descriptor to an address without accepting connections.
+ * @param fd      First argument of bind().
+ * @param addr    Second argument of bind().
+ * @param addrlen Third argument of bind().
+ * @return        0 on success or an AVERROR on failure.
+ */
+int ff_listen(int fd, const struct sockaddr *addr, socklen_t addrlen);
+
+/**
+ * Poll for a single connection on the passed file descriptor.
+ * @param fd      The listening socket file descriptor.
+ * @param timeout Polling timeout in milliseconds.
+ * @param h       URLContext providing interrupt check
+ *                callback and logging context.
+ * @return        A non-blocking file descriptor on success
+ *                or an AVERROR on failure.
+ */
+int ff_accept(int fd, int timeout, URLContext *h);
+
 /**
  * Connect to a file descriptor and poll for result.
  *
diff --git a/libavformat/nsvdec.c b/libavformat/nsvdec.c
index de553965..95fab644 100644
--- a/libavformat/nsvdec.c
+++ b/libavformat/nsvdec.c
@@ -713,9 +713,9 @@ static int nsv_read_close(AVFormatContext *s)
     av_freep(&nsv->nsvs_file_offset);
     av_freep(&nsv->nsvs_timestamps);
     if (nsv->ahead[0].data)
-        av_free_packet(&nsv->ahead[0]);
+        av_packet_unref(&nsv->ahead[0]);
     if (nsv->ahead[1].data)
-        av_free_packet(&nsv->ahead[1]);
+        av_packet_unref(&nsv->ahead[1]);
     return 0;
 }
 
diff --git a/libavformat/nullenc.c b/libavformat/nullenc.c
index 7c08c396..fd293d72 100644
--- a/libavformat/nullenc.c
+++ b/libavformat/nullenc.c
@@ -30,7 +30,7 @@ AVOutputFormat ff_null_muxer = {
     .name              = "null",
     .long_name         = NULL_IF_CONFIG_SMALL("raw null video"),
     .audio_codec       = AV_NE(AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S16LE),
-    .video_codec       = AV_CODEC_ID_RAWVIDEO,
+    .video_codec       = AV_CODEC_ID_WRAPPED_AVFRAME,
     .write_packet      = null_write_packet,
-    .flags             = AVFMT_VARIABLE_FPS | AVFMT_NOFILE | AVFMT_NOTIMESTAMPS | AVFMT_RAWPICTURE,
+    .flags             = AVFMT_VARIABLE_FPS | AVFMT_NOFILE | AVFMT_NOTIMESTAMPS,
 };
diff --git a/libavformat/nut.c b/libavformat/nut.c
index c6fdb0bf..f4901e6c 100644
--- a/libavformat/nut.c
+++ b/libavformat/nut.c
@@ -88,6 +88,12 @@ const AVCodecTag ff_nut_video_tags[] = {
     { AV_CODEC_ID_RAWVIDEO,         MKTAG('B', 'R', 'A', 64 ) },
     { AV_CODEC_ID_RAWVIDEO,         MKTAG(64 , 'R', 'B', 'A') },
     { AV_CODEC_ID_RAWVIDEO,         MKTAG(64 , 'B', 'R', 'A') },
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '3', 11 ,  9 ) },
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG( 9 , 11 , '3', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '3', 10 ,  9 ) },
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG( 9 , 10 , '3', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '3',  0 ,  9 ) },
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG( 9 ,  0 , '3', 'Y') },
     { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '3', 11 , 10 ) },
     { AV_CODEC_ID_RAWVIDEO,         MKTAG(10 , 11 , '3', 'Y') },
     { AV_CODEC_ID_RAWVIDEO,         MKTAG('Y', '3', 10 , 10 ) },
@@ -162,6 +168,8 @@ const AVCodecTag ff_nut_video_tags[] = {
     { AV_CODEC_ID_RAWVIDEO,         MKTAG('X', 'Y', 'Z' , 36 ) },
     { AV_CODEC_ID_RAWVIDEO,         MKTAG(36 , 'Z' , 'Y', 'X') },
 
+    { AV_CODEC_ID_RAWVIDEO,         MKTAG('P', 'A', 'L', 8 ) },
+
     { AV_CODEC_ID_RAWVIDEO, MKTAG(0xBA, 'B', 'G', 8   ) },
     { AV_CODEC_ID_RAWVIDEO, MKTAG(0xBA, 'B', 'G', 16  ) },
     { AV_CODEC_ID_RAWVIDEO, MKTAG(16  , 'G', 'B', 0xBA) },
@@ -237,14 +245,16 @@ int64_t ff_lsb2full(StreamContext *stream, int64_t lsb)
     return ((lsb - delta) & mask) + delta;
 }
 
-int ff_nut_sp_pos_cmp(const Syncpoint *a, const Syncpoint *b)
+int ff_nut_sp_pos_cmp(const void *a, const void *b)
 {
-    return ((a->pos - b->pos) >> 32) - ((b->pos - a->pos) >> 32);
+    const Syncpoint *va = a, *vb = b;
+    return ((va->pos - vb->pos) >> 32) - ((vb->pos - va->pos) >> 32);
 }
 
-int ff_nut_sp_pts_cmp(const Syncpoint *a, const Syncpoint *b)
+int ff_nut_sp_pts_cmp(const void *a, const void *b)
 {
-    return ((a->ts - b->ts) >> 32) - ((b->ts - a->ts) >> 32);
+    const Syncpoint *va = a, *vb = b;
+    return ((va->ts - vb->ts) >> 32) - ((vb->ts - va->ts) >> 32);
 }
 
 int ff_nut_add_sp(NUTContext *nut, int64_t pos, int64_t back_ptr, int64_t ts)
@@ -263,7 +273,7 @@ int ff_nut_add_sp(NUTContext *nut, int64_t pos, int64_t back_ptr, int64_t ts)
     sp->pos      = pos;
     sp->back_ptr = back_ptr;
     sp->ts       = ts;
-    av_tree_insert(&nut->syncpoints, sp, (void *) ff_nut_sp_pos_cmp, &node);
+    av_tree_insert(&nut->syncpoints, sp, ff_nut_sp_pos_cmp, &node);
     if (node) {
         av_free(sp);
         av_free(node);
@@ -280,8 +290,10 @@ static int enu_free(void *opaque, void *elem)
 
 void ff_nut_free_sp(NUTContext *nut)
 {
-    av_tree_enumerate(nut->syncpoints, NULL, NULL, enu_free);
-    av_tree_destroy(nut->syncpoints);
+    if (nut->syncpoints) {
+        av_tree_enumerate(nut->syncpoints, NULL, NULL, enu_free);
+        av_tree_destroy(nut->syncpoints);
+    }
 }
 
 const Dispositions ff_nut_dispositions[] = {
diff --git a/libavformat/nut.h b/libavformat/nut.h
index 0c678a51..a4409ee2 100644
--- a/libavformat/nut.h
+++ b/libavformat/nut.h
@@ -41,18 +41,18 @@
 #define NUT_MIN_VERSION 2
 
 typedef enum{
-    FLAG_KEY        =   1, ///<if set, frame is keyframe
-    FLAG_EOR        =   2, ///<if set, stream has no relevance on presentation. (EOR)
-    FLAG_CODED_PTS  =   8, ///<if set, coded_pts is in the frame header
-    FLAG_STREAM_ID  =  16, ///<if set, stream_id is coded in the frame header
-    FLAG_SIZE_MSB   =  32, ///<if set, data_size_msb is at frame header, otherwise data_size_msb is 0
-    FLAG_CHECKSUM   =  64, ///<if set, the frame header contains a checksum
-    FLAG_RESERVED   = 128, ///<if set, reserved_count is coded in the frame header
-    FLAG_SM_DATA    = 256, ///<if set, side / meta data is stored in the frame header.
-    FLAG_HEADER_IDX =1024, ///<If set, header_idx is coded in the frame header.
-    FLAG_MATCH_TIME =2048, ///<If set, match_time_delta is coded in the frame header
-    FLAG_CODED      =4096, ///<if set, coded_flags are stored in the frame header
-    FLAG_INVALID    =8192, ///<if set, frame_code is invalid
+    FLAG_KEY        =   1, // if set, frame is keyframe
+    FLAG_EOR        =   2, // if set, stream has no relevance on presentation. (EOR)
+    FLAG_CODED_PTS  =   8, // if set, coded_pts is in the frame header
+    FLAG_STREAM_ID  =  16, // if set, stream_id is coded in the frame header
+    FLAG_SIZE_MSB   =  32, // if set, data_size_msb is at frame header, otherwise data_size_msb is 0
+    FLAG_CHECKSUM   =  64, // if set, the frame header contains a checksum
+    FLAG_RESERVED   = 128, // if set, reserved_count is coded in the frame header
+    FLAG_SM_DATA    = 256, // if set, side / meta data is stored in the frame header.
+    FLAG_HEADER_IDX =1024, // If set, header_idx is coded in the frame header.
+    FLAG_MATCH_TIME =2048, // If set, match_time_delta is coded in the frame header
+    FLAG_CODED      =4096, // if set, coded_flags are stored in the frame header
+    FLAG_INVALID    =8192, // if set, frame_code is invalid
 } Flag;
 
 typedef struct Syncpoint {
@@ -96,7 +96,7 @@ typedef struct NUTContext {
     FrameCode frame_code[256];
     uint8_t header_len[128];
     const uint8_t *header[128];
-    uint64_t next_startcode;     ///< stores the next startcode if it has already been parsed but the stream is not seekable
+    uint64_t next_startcode;     // stores the next startcode if it has already been parsed but the stream is not seekable
     StreamContext *stream;
     ChapterContext *chapter;
     unsigned int max_distance;
@@ -132,8 +132,8 @@ typedef struct Dispositions {
 
 void ff_nut_reset_ts(NUTContext *nut, AVRational time_base, int64_t val);
 int64_t ff_lsb2full(StreamContext *stream, int64_t lsb);
-int ff_nut_sp_pos_cmp(const Syncpoint *a, const Syncpoint *b);
-int ff_nut_sp_pts_cmp(const Syncpoint *a, const Syncpoint *b);
+int ff_nut_sp_pos_cmp(const void *a, const void *b);
+int ff_nut_sp_pts_cmp(const void *a, const void *b);
 int ff_nut_add_sp(NUTContext *nut, int64_t pos, int64_t back_ptr, int64_t ts);
 void ff_nut_free_sp(NUTContext *nut);
 
diff --git a/libavformat/nutdec.c b/libavformat/nutdec.c
index 606deaa8..4df6a57a 100644
--- a/libavformat/nutdec.c
+++ b/libavformat/nutdec.c
@@ -264,7 +264,9 @@ static int decode_main_header(NUTContext *nut)
         GET_V(nut->time_base[i].num, tmp > 0 && tmp < (1ULL << 31));
         GET_V(nut->time_base[i].den, tmp > 0 && tmp < (1ULL << 31));
         if (av_gcd(nut->time_base[i].num, nut->time_base[i].den) != 1) {
-            av_log(s, AV_LOG_ERROR, "time base invalid\n");
+            av_log(s, AV_LOG_ERROR, "invalid time base %d/%d\n",
+                   nut->time_base[i].num,
+                   nut->time_base[i].den);
             ret = AVERROR_INVALIDDATA;
             goto fail;
         }
@@ -315,7 +317,8 @@ static int decode_main_header(NUTContext *nut)
             goto fail;
         }
         if (tmp_stream >= stream_count) {
-            av_log(s, AV_LOG_ERROR, "illegal stream number\n");
+            av_log(s, AV_LOG_ERROR, "illegal stream number %d >= %d\n",
+                   tmp_stream, stream_count);
             ret = AVERROR_INVALIDDATA;
             goto fail;
         }
@@ -344,12 +347,14 @@ static int decode_main_header(NUTContext *nut)
         for (i = 1; i < nut->header_count; i++) {
             uint8_t *hdr;
             GET_V(nut->header_len[i], tmp > 0 && tmp < 256);
-            rem -= nut->header_len[i];
-            if (rem < 0) {
-                av_log(s, AV_LOG_ERROR, "invalid elision header\n");
+            if (rem < nut->header_len[i]) {
+                av_log(s, AV_LOG_ERROR,
+                       "invalid elision header %d : %d > %d\n",
+                       i, nut->header_len[i], rem);
                 ret = AVERROR_INVALIDDATA;
                 goto fail;
             }
+            rem -= nut->header_len[i];
             hdr = av_malloc(nut->header_len[i]);
             if (!hdr) {
                 ret = AVERROR(ENOMEM);
@@ -598,7 +603,9 @@ static int decode_info_header(NUTContext *nut)
         }
 
         if (stream_id_plus1 > s->nb_streams) {
-            av_log(s, AV_LOG_ERROR, "invalid stream id for info packet\n");
+            av_log(s, AV_LOG_WARNING,
+                   "invalid stream id %d for info packet\n",
+                   stream_id_plus1);
             continue;
         }
 
@@ -707,7 +714,7 @@ static int find_and_decode_index(NUTContext *nut)
     avio_seek(bc, filesize - 12, SEEK_SET);
     avio_seek(bc, filesize - avio_rb64(bc), SEEK_SET);
     if (avio_rb64(bc) != INDEX_STARTCODE) {
-        av_log(s, AV_LOG_ERROR, "no index at the end\n");
+        av_log(s, AV_LOG_WARNING, "no index at the end\n");
 
         if(s->duration<=0)
             s->duration = find_duration(nut, filesize);
@@ -927,7 +934,7 @@ static int read_sm_data(AVFormatContext *s, AVIOContext *bc, AVPacket *pkt, int
                 return ret;
             }
             value_len = ffio_read_varlen(bc);
-            if (avio_tell(bc) + value_len >= maxpos)
+            if (value_len < 0 || value_len >= maxpos - avio_tell(bc))
                 return AVERROR_INVALIDDATA;
             if (!strcmp(name, "Palette")) {
                 dst = av_packet_new_side_data(pkt, AV_PKT_DATA_PALETTE, value_len);
@@ -1126,7 +1133,8 @@ static int decode_frame(NUTContext *nut, AVPacket *pkt, int frame_code)
     ret = av_new_packet(pkt, size + nut->header_len[header_idx]);
     if (ret < 0)
         return ret;
-    memcpy(pkt->data, nut->header[header_idx], nut->header_len[header_idx]);
+    if (nut->header[header_idx])
+        memcpy(pkt->data, nut->header[header_idx], nut->header_len[header_idx]);
     pkt->pos = avio_tell(bc); // FIXME
     if (stc->last_flags & FLAG_SM_DATA) {
         int sm_size;
@@ -1157,7 +1165,7 @@ static int decode_frame(NUTContext *nut, AVPacket *pkt, int frame_code)
 
     return 0;
 fail:
-    av_free_packet(pkt);
+    av_packet_unref(pkt);
     return ret;
 }
 
@@ -1271,7 +1279,7 @@ static int read_seek(AVFormatContext *s, int stream_index,
         pos2 = st->index_entries[index].pos;
         ts   = st->index_entries[index].timestamp;
     } else {
-        av_tree_find(nut->syncpoints, &dummy, (void *) ff_nut_sp_pts_cmp,
+        av_tree_find(nut->syncpoints, &dummy, ff_nut_sp_pts_cmp,
                      (void **) next_node);
         av_log(s, AV_LOG_DEBUG, "%"PRIu64"-%"PRIu64" %"PRId64"-%"PRId64"\n",
                next_node[0]->pos, next_node[1]->pos, next_node[0]->ts,
@@ -1280,11 +1288,13 @@ static int read_seek(AVFormatContext *s, int stream_index,
                             next_node[1]->pos, next_node[1]->pos,
                             next_node[0]->ts, next_node[1]->ts,
                             AVSEEK_FLAG_BACKWARD, &ts, nut_read_timestamp);
+        if (pos < 0)
+            return pos;
 
         if (!(flags & AVSEEK_FLAG_BACKWARD)) {
             dummy.pos    = pos + 16;
             next_node[1] = &nopts_sp;
-            av_tree_find(nut->syncpoints, &dummy, (void *) ff_nut_sp_pos_cmp,
+            av_tree_find(nut->syncpoints, &dummy, ff_nut_sp_pos_cmp,
                          (void **) next_node);
             pos2 = ff_gen_search(s, -2, dummy.pos, next_node[0]->pos,
                                  next_node[1]->pos, next_node[1]->pos,
@@ -1295,7 +1305,7 @@ static int read_seek(AVFormatContext *s, int stream_index,
             // FIXME dir but I think it does not matter
         }
         dummy.pos = pos;
-        sp = av_tree_find(nut->syncpoints, &dummy, (void *) ff_nut_sp_pos_cmp,
+        sp = av_tree_find(nut->syncpoints, &dummy, ff_nut_sp_pos_cmp,
                           NULL);
 
         av_assert0(sp);
diff --git a/libavformat/nutenc.c b/libavformat/nutenc.c
index 1522a045..b6582efb 100644
--- a/libavformat/nutenc.c
+++ b/libavformat/nutenc.c
@@ -587,7 +587,7 @@ static int write_index(NUTContext *nut, AVIOContext *bc) {
     ff_put_v(bc, nut->sp_count);
 
     for (i=0; i<nut->sp_count; i++) {
-        av_tree_find(nut->syncpoints, &dummy, (void *) ff_nut_sp_pos_cmp, (void**)next_node);
+        av_tree_find(nut->syncpoints, &dummy, ff_nut_sp_pos_cmp, (void**)next_node);
         ff_put_v(bc, (next_node[1]->pos >> 4) - (dummy.pos>>4));
         dummy.pos = next_node[1]->pos;
     }
@@ -933,6 +933,7 @@ static int write_sm_data(AVFormatContext *s, AVIOContext *bc, AVPacket *pkt, int
                 break;
             case AV_PKT_DATA_METADATA_UPDATE:
             case AV_PKT_DATA_STRINGS_METADATA:
+            case AV_PKT_DATA_QUALITY_STATS:
                 // belongs into meta, not side data
                 break;
             }
@@ -1173,7 +1174,7 @@ static int nut_write_trailer(AVFormatContext *s)
 {
     NUTContext *nut = s->priv_data;
     AVIOContext *bc = s->pb, *dyn_bc;
-    int i, ret;
+    int ret;
 
     while (nut->header_count < 3)
         write_headers(s, bc);
@@ -1185,15 +1186,22 @@ static int nut_write_trailer(AVFormatContext *s)
         put_packet(nut, bc, dyn_bc, 1, INDEX_STARTCODE);
     }
 
+    return 0;
+}
+
+static void nut_write_deinit(AVFormatContext *s)
+{
+    NUTContext *nut = s->priv_data;
+    int i;
+
     ff_nut_free_sp(nut);
-    for (i=0; i<s->nb_streams; i++)
-        av_freep(&nut->stream[i].keyframe_pts);
+    if (nut->stream)
+        for (i=0; i<s->nb_streams; i++)
+            av_freep(&nut->stream[i].keyframe_pts);
 
     av_freep(&nut->stream);
     av_freep(&nut->chapter);
     av_freep(&nut->time_base);
-
-    return 0;
 }
 
 #define OFFSET(x) offsetof(NUTContext, x)
@@ -1203,7 +1211,7 @@ static const AVOption options[] = {
     { "default",     "",                                                0,             AV_OPT_TYPE_CONST, {.i64 = 0},             INT_MIN, INT_MAX, E, "syncpoints" },
     { "none",        "Disable syncpoints, low overhead and unseekable", 0,             AV_OPT_TYPE_CONST, {.i64 = NUT_PIPE},      INT_MIN, INT_MAX, E, "syncpoints" },
     { "timestamped", "Extend syncpoints with a wallclock timestamp",    0,             AV_OPT_TYPE_CONST, {.i64 = NUT_BROADCAST}, INT_MIN, INT_MAX, E, "syncpoints" },
-    { "write_index", "Write index",                               OFFSET(write_index), AV_OPT_TYPE_INT,   {.i64 = 1},                   0,       1, E, },
+    { "write_index", "Write index",                               OFFSET(write_index), AV_OPT_TYPE_BOOL,  {.i64 = 1},                   0,       1, E, },
     { NULL },
 };
 
@@ -1226,6 +1234,7 @@ AVOutputFormat ff_nut_muxer = {
     .write_header   = nut_write_header,
     .write_packet   = nut_write_packet,
     .write_trailer  = nut_write_trailer,
+    .deinit         = nut_write_deinit,
     .flags          = AVFMT_GLOBALHEADER | AVFMT_VARIABLE_FPS,
     .codec_tag      = ff_nut_codec_tags,
     .priv_class     = &class,
diff --git a/libavformat/nuv.c b/libavformat/nuv.c
index 001d9c88..c30da607 100644
--- a/libavformat/nuv.c
+++ b/libavformat/nuv.c
@@ -172,6 +172,15 @@ static int nuv_header(AVFormatContext *s)
     if (aspect > 0.9999 && aspect < 1.0001)
         aspect = 4.0 / 3.0;
     fps = av_int2double(avio_rl64(pb));
+    if (fps < 0.0f) {
+        if (s->error_recognition & AV_EF_EXPLODE) {
+            av_log(s, AV_LOG_ERROR, "Invalid frame rate %f\n", fps);
+            return AVERROR_INVALIDDATA;
+        } else {
+            av_log(s, AV_LOG_WARNING, "Invalid frame rate %f, setting to 0.\n", fps);
+            fps = 0.0f;
+        }
+    }
 
     // number of packets per stream type, -1 means unknown, e.g. streaming
     v_packs = avio_rl32(pb);
@@ -275,7 +284,7 @@ static int nuv_packet(AVFormatContext *s, AVPacket *pkt)
             memcpy(pkt->data, hdr, copyhdrsize);
             ret = avio_read(pb, pkt->data + copyhdrsize, size);
             if (ret < 0) {
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 return ret;
             }
             if (ret < size)
diff --git a/libavformat/oggdec.c b/libavformat/oggdec.c
index 72d96e82..2d99b4af 100644
--- a/libavformat/oggdec.c
+++ b/libavformat/oggdec.c
@@ -41,6 +41,7 @@
 
 static const struct ogg_codec * const ogg_codecs[] = {
     &ff_skeleton_codec,
+    &ff_daala_codec,
     &ff_dirac_codec,
     &ff_speex_codec,
     &ff_vorbis_codec,
@@ -60,6 +61,7 @@ static const struct ogg_codec * const ogg_codecs[] = {
 
 static int64_t ogg_calc_pts(AVFormatContext *s, int idx, int64_t *dts);
 static int ogg_new_stream(AVFormatContext *s, uint32_t serial);
+static int ogg_restore(AVFormatContext *s, int discard);
 
 //FIXME We could avoid some structure duplication
 static int ogg_save(AVFormatContext *s)
@@ -68,6 +70,7 @@ static int ogg_save(AVFormatContext *s)
     struct ogg_state *ost =
         av_malloc(sizeof(*ost) + (ogg->nstreams - 1) * sizeof(*ogg->streams));
     int i;
+    int ret = 0;
 
     if (!ost)
         return AVERROR(ENOMEM);
@@ -80,15 +83,21 @@ static int ogg_save(AVFormatContext *s)
 
     for (i = 0; i < ogg->nstreams; i++) {
         struct ogg_stream *os = ogg->streams + i;
-        os->buf = av_mallocz(os->bufsize + FF_INPUT_BUFFER_PADDING_SIZE);
-        memcpy(os->buf, ost->streams[i].buf, os->bufpos);
+        os->buf = av_mallocz(os->bufsize + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (os->buf)
+            memcpy(os->buf, ost->streams[i].buf, os->bufpos);
+        else
+            ret = AVERROR(ENOMEM);
         os->new_metadata      = NULL;
         os->new_metadata_size = 0;
     }
 
     ogg->state = ost;
 
-    return 0;
+    if (ret < 0)
+        ogg_restore(s, 0);
+
+    return ret;
 }
 
 static int ogg_restore(AVFormatContext *s, int discard)
@@ -255,7 +264,7 @@ static int ogg_new_stream(AVFormatContext *s, uint32_t serial)
     memset(os, 0, sizeof(*os));
     os->serial        = serial;
     os->bufsize       = DECODER_BUFFER_SIZE;
-    os->buf           = av_malloc(os->bufsize + FF_INPUT_BUFFER_PADDING_SIZE);
+    os->buf           = av_malloc(os->bufsize + AV_INPUT_BUFFER_PADDING_SIZE);
     os->header        = -1;
     os->start_granule = OGG_NOGRANULE_VALUE;
     if (!os->buf)
@@ -277,7 +286,7 @@ static int ogg_new_stream(AVFormatContext *s, uint32_t serial)
 static int ogg_new_buf(struct ogg *ogg, int idx)
 {
     struct ogg_stream *os = ogg->streams + idx;
-    uint8_t *nb = av_malloc(os->bufsize + FF_INPUT_BUFFER_PADDING_SIZE);
+    uint8_t *nb = av_malloc(os->bufsize + AV_INPUT_BUFFER_PADDING_SIZE);
     int size = os->bufpos - os->pstart;
 
     if (!nb)
@@ -416,7 +425,7 @@ static int ogg_read_page(AVFormatContext *s, int *sid)
     }
 
     if (os->bufsize - os->bufpos < size) {
-        uint8_t *nb = av_malloc((os->bufsize *= 2) + FF_INPUT_BUFFER_PADDING_SIZE);
+        uint8_t *nb = av_malloc((os->bufsize *= 2) + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!nb)
             return AVERROR(ENOMEM);
         memcpy(nb, os->buf, os->bufpos);
@@ -432,7 +441,7 @@ static int ogg_read_page(AVFormatContext *s, int *sid)
     os->granule = gp;
     os->flags   = flags;
 
-    memset(os->buf + os->bufpos, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(os->buf + os->bufpos, 0, AV_INPUT_BUFFER_PADDING_SIZE);
     if (sid)
         *sid = idx;
 
@@ -835,7 +844,7 @@ static int ogg_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     return psize;
 fail:
-    av_free_packet(pkt);
+    av_packet_unref(pkt);
     return AVERROR(ENOMEM);
 }
 
@@ -899,6 +908,7 @@ static int ogg_read_seek(AVFormatContext *s, int stream_index,
         os->keyframe_seek = 1;
 
     ret = ff_seek_frame_binary(s, stream_index, timestamp, flags);
+    ogg_reset(s);
     os  = ogg->streams + stream_index;
     if (ret < 0)
         os->keyframe_seek = 0;
diff --git a/libavformat/oggdec.h b/libavformat/oggdec.h
index 7dc77160..d7af1cfa 100644
--- a/libavformat/oggdec.h
+++ b/libavformat/oggdec.h
@@ -114,6 +114,7 @@ struct ogg {
 #define OGG_NOGRANULE_VALUE (-1ull)
 
 extern const struct ogg_codec ff_celt_codec;
+extern const struct ogg_codec ff_daala_codec;
 extern const struct ogg_codec ff_dirac_codec;
 extern const struct ogg_codec ff_flac_codec;
 extern const struct ogg_codec ff_ogm_audio_codec;
diff --git a/libavformat/oggenc.c b/libavformat/oggenc.c
index 2c0a44e4..49075129 100644
--- a/libavformat/oggenc.c
+++ b/libavformat/oggenc.c
@@ -260,7 +260,7 @@ static int ogg_buffer_data(AVFormatContext *s, AVStream *st,
         if (i == total_segments)
             page->granule = granule;
 
-        if (!header) {
+        {
             AVStream *st = s->streams[page->stream_index];
 
             int64_t start = av_rescale_q(page->start_granule, st->time_base,
@@ -268,10 +268,13 @@ static int ogg_buffer_data(AVFormatContext *s, AVStream *st,
             int64_t next  = av_rescale_q(page->granule, st->time_base,
                                          AV_TIME_BASE_Q);
 
-            if (page->segments_count == 255 ||
-                (ogg->pref_size     > 0 && page->size   >= ogg->pref_size) ||
-                (ogg->pref_duration > 0 && next - start >= ogg->pref_duration)) {
+            if (page->segments_count == 255) {
                 ogg_buffer_page(s, oggstream);
+            } else if (!header) {
+                if ((ogg->pref_size     > 0 && page->size   >= ogg->pref_size) ||
+                    (ogg->pref_duration > 0 && next - start >= ogg->pref_duration)) {
+                    ogg_buffer_page(s, oggstream);
+                }
             }
         }
     }
diff --git a/libavformat/oggparsedaala.c b/libavformat/oggparsedaala.c
new file mode 100644
index 00000000..3651ca18
--- /dev/null
+++ b/libavformat/oggparsedaala.c
@@ -0,0 +1,256 @@
+/*
+ * Ogg Daala parser
+ * Copyright (C) 2015 Rostislav Pehlivanov <atomnuker gmail com>
+ * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include "libavcodec/bytestream.h"
+#include "avformat.h"
+#include "internal.h"
+#include "oggdec.h"
+
+struct DaalaPixFmtMap {
+    enum AVPixelFormat ffmpeg_fmt;
+    int depth;
+    int planes;
+    int xdec[4];
+    int ydec[4];
+};
+
+/* Currently supported formats only */
+static const struct DaalaPixFmtMap list_fmts[] = {
+    { AV_PIX_FMT_YUV420P,  8, 3, {0, 1, 1, 0}, {0, 1, 1, 0} },
+    { AV_PIX_FMT_YUV444P,  8, 3, {0, 0, 0, 0}, {0, 0, 0, 0} }
+};
+
+typedef struct DaalaInfoHeader {
+    int init_d;
+    int fpr;
+    int gpshift;
+    int gpmask;
+    int version_maj;
+    int version_min;
+    int version_sub;
+    int frame_duration;
+    int keyframe_granule_shift;
+    struct DaalaPixFmtMap format;
+} DaalaInfoHeader;
+
+static inline int daala_match_pix_fmt(struct DaalaPixFmtMap *fmt)
+{
+    int i, j;
+    for (i = 0; i < FF_ARRAY_ELEMS(list_fmts); i++) {
+        int match = 0;
+        if (fmt->depth != list_fmts[i].depth)
+            continue;
+        if (fmt->planes != list_fmts[i].planes)
+            continue;
+        for (j = 0; j < fmt->planes; j++) {
+            if (fmt->xdec[j] != list_fmts[i].xdec[j])
+                continue;
+            if (fmt->ydec[j] != list_fmts[i].ydec[j])
+                continue;
+            match++;
+        }
+        if (match == fmt->planes)
+            return list_fmts[i].ffmpeg_fmt;
+    }
+    return -1;
+}
+
+static int daala_header(AVFormatContext *s, int idx)
+{
+    int i, err;
+    uint8_t *cdp;
+    GetByteContext gb;
+    AVRational timebase;
+    struct ogg *ogg        = s->priv_data;
+    struct ogg_stream *os  = ogg->streams + idx;
+    AVStream *st           = s->streams[idx];
+    int cds                = st->codec->extradata_size + os->psize + 2;
+    DaalaInfoHeader *hdr   = os->private;
+
+    if (!(os->buf[os->pstart] & 0x80))
+        return 0;
+
+    if (!hdr) {
+        hdr = av_mallocz(sizeof(*hdr));
+        if (!hdr)
+            return AVERROR(ENOMEM);
+        os->private = hdr;
+    }
+
+    switch (os->buf[os->pstart]) {
+    case 0x80:
+        bytestream2_init(&gb, os->buf + os->pstart, os->psize);
+        bytestream2_skip(&gb, ff_daala_codec.magicsize);
+
+        hdr->version_maj = bytestream2_get_byte(&gb);
+        hdr->version_min = bytestream2_get_byte(&gb);
+        hdr->version_sub = bytestream2_get_byte(&gb);
+
+        st->codec->width  = bytestream2_get_ne32(&gb);
+        st->codec->height = bytestream2_get_ne32(&gb);
+
+        st->sample_aspect_ratio.num = bytestream2_get_ne32(&gb);
+        st->sample_aspect_ratio.den = bytestream2_get_ne32(&gb);
+
+        timebase.num = bytestream2_get_ne32(&gb);
+        timebase.den = bytestream2_get_ne32(&gb);
+        if (timebase.num < 0 && timebase.den < 0) {
+            av_log(s, AV_LOG_WARNING, "Invalid timebase, assuming 30 FPS\n");
+            timebase.num = 1;
+            timebase.den = 30;
+        }
+        avpriv_set_pts_info(st, 64, timebase.den, timebase.num);
+
+        hdr->frame_duration = bytestream2_get_ne32(&gb);
+        hdr->gpshift = bytestream2_get_byte(&gb);
+        if (hdr->gpshift >= 32) {
+            av_log(s, AV_LOG_ERROR, "Too large gpshift %d (>= 32).\n",
+                   hdr->gpshift);
+            return AVERROR_INVALIDDATA;
+        }
+        hdr->gpmask  = (1U << hdr->gpshift) - 1;
+
+        hdr->format.depth  = 8 + 2*(bytestream2_get_byte(&gb)-1);
+
+        hdr->fpr = bytestream2_get_byte(&gb);
+
+        hdr->format.planes = bytestream2_get_byte(&gb);
+        if (hdr->format.planes > 4) {
+            av_log(s, AV_LOG_ERROR,
+                   "Invalid number of planes %d in daala pixel format map.\n",
+                   hdr->format.planes);
+            return AVERROR_INVALIDDATA;
+        }
+        for (i = 0; i < hdr->format.planes; i++) {
+            hdr->format.xdec[i] = bytestream2_get_byte(&gb);
+            hdr->format.ydec[i] = bytestream2_get_byte(&gb);
+        }
+
+        if ((st->codec->pix_fmt = daala_match_pix_fmt(&hdr->format)) < 0)
+            av_log(s, AV_LOG_ERROR, "Unsupported pixel format - %i %i\n",
+                   hdr->format.depth, hdr->format.planes);
+
+        st->codec->codec_id   = AV_CODEC_ID_DAALA;
+        st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
+        st->need_parsing      = AVSTREAM_PARSE_HEADERS;
+
+        hdr->init_d = 1;
+        break;
+    case 0x81:
+        if (!hdr->init_d)
+            return AVERROR_INVALIDDATA;
+        ff_vorbis_stream_comment(s, st,
+                                 os->buf + os->pstart + ff_daala_codec.magicsize,
+                                 os->psize - ff_daala_codec.magicsize);
+        break;
+    case 0x82:
+        if (!hdr->init_d)
+            return AVERROR_INVALIDDATA;
+        break;
+    default:
+        av_log(s, AV_LOG_ERROR, "Unknown header type %X\n", os->buf[os->pstart]);
+        return AVERROR_INVALIDDATA;
+        break;
+    }
+
+    if ((err = av_reallocp(&st->codec->extradata,
+                           cds + AV_INPUT_BUFFER_PADDING_SIZE)) < 0) {
+        st->codec->extradata_size = 0;
+        return err;
+    }
+
+    memset(st->codec->extradata + cds, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+    cdp    = st->codec->extradata + st->codec->extradata_size;
+    *cdp++ = os->psize >> 8;
+    *cdp++ = os->psize & 0xff;
+    memcpy(cdp, os->buf + os->pstart, os->psize);
+    st->codec->extradata_size = cds;
+
+    return 1;
+}
+
+static uint64_t daala_gptopts(AVFormatContext *ctx, int idx, uint64_t gp,
+                              int64_t *dts)
+{
+    uint64_t iframe, pframe;
+    struct ogg *ogg       = ctx->priv_data;
+    struct ogg_stream *os = ogg->streams + idx;
+    DaalaInfoHeader *hdr  = os->private;
+
+    if (!hdr)
+        return AV_NOPTS_VALUE;
+
+    iframe = gp >> hdr->gpshift;
+    pframe = gp  & hdr->gpmask;
+
+    if (!pframe)
+        os->pflags |= AV_PKT_FLAG_KEY;
+
+    if (dts)
+        *dts = iframe + pframe;
+
+    return iframe + pframe;
+}
+
+static int daala_packet(AVFormatContext *s, int idx)
+{
+    int seg, duration = 1;
+    struct ogg *ogg = s->priv_data;
+    struct ogg_stream *os = ogg->streams + idx;
+
+    /*
+     * first packet handling: here we parse the duration of each packet in the
+     * first page and compare the total duration to the page granule to find the
+     * encoder delay and set the first timestamp
+     */
+
+    if ((!os->lastpts || os->lastpts == AV_NOPTS_VALUE) && !(os->flags & OGG_FLAG_EOS)) {
+        for (seg = os->segp; seg < os->nsegs; seg++)
+            if (os->segments[seg] < 255)
+                duration++;
+
+        os->lastpts = os->lastdts = daala_gptopts(s, idx, os->granule, NULL) - duration;
+        if(s->streams[idx]->start_time == AV_NOPTS_VALUE) {
+            s->streams[idx]->start_time = os->lastpts;
+            if (s->streams[idx]->duration)
+                s->streams[idx]->duration -= s->streams[idx]->start_time;
+        }
+    }
+
+    /* parse packet duration */
+    if (os->psize > 0)
+        os->pduration = 1;
+
+    return 0;
+}
+
+const struct ogg_codec ff_daala_codec = {
+    .name             = "Daala",
+    .magic            = "\200daala",
+    .magicsize        = 6,
+    .header           = daala_header,
+    .packet           = daala_packet,
+    .gptopts          = daala_gptopts,
+    .granule_is_start = 1,
+    .nb_header        = 3,
+};
diff --git a/libavformat/oggparsedirac.c b/libavformat/oggparsedirac.c
index ab40f96c..3e5e3930 100644
--- a/libavformat/oggparsedirac.c
+++ b/libavformat/oggparsedirac.c
@@ -18,7 +18,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavcodec/get_bits.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
 #include "libavcodec/dirac.h"
 #include "avformat.h"
 #include "internal.h"
@@ -29,26 +30,35 @@ static int dirac_header(AVFormatContext *s, int idx)
     struct ogg *ogg = s->priv_data;
     struct ogg_stream *os = ogg->streams + idx;
     AVStream *st = s->streams[idx];
-    dirac_source_params source;
-    GetBitContext gb;
+    AVDiracSeqHeader *dsh;
     int ret;
 
     // already parsed the header
     if (st->codec->codec_id == AV_CODEC_ID_DIRAC)
         return 0;
 
-    ret = init_get_bits8(&gb, os->buf + os->pstart + 13, (os->psize - 13));
+    ret = av_dirac_parse_sequence_header(&dsh, os->buf + os->pstart + 13, (os->psize - 13), s);
     if (ret < 0)
         return ret;
 
-    ret = avpriv_dirac_parse_sequence_header(st->codec, &gb, &source);
-    if (ret < 0)
-        return ret;
+    st->codec->codec_type      = AVMEDIA_TYPE_VIDEO;
+    st->codec->codec_id        = AV_CODEC_ID_DIRAC;
+    st->codec->width           = dsh->width;
+    st->codec->height          = dsh->height;
+    st->codec->pix_fmt         = dsh->pix_fmt;
+    st->codec->color_range     = dsh->color_range;
+    st->codec->color_trc       = dsh->color_trc;
+    st->codec->color_primaries = dsh->color_primaries;
+    st->codec->colorspace      = dsh->colorspace;
+    st->codec->profile         = dsh->profile;
+    st->codec->level           = dsh->level;
+    if (av_image_check_sar(st->codec->width, st->codec->height, dsh->sample_aspect_ratio) >= 0)
+        st->sample_aspect_ratio = dsh->sample_aspect_ratio;
 
-    st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
-    st->codec->codec_id = AV_CODEC_ID_DIRAC;
     // dirac in ogg always stores timestamps as though the video were interlaced
-    avpriv_set_pts_info(st, 64, st->codec->framerate.den, 2*st->codec->framerate.num);
+    avpriv_set_pts_info(st, 64, dsh->framerate.den, 2 * dsh->framerate.num);
+
+    av_freep(&dsh);
     return 1;
 }
 
diff --git a/libavformat/oggparseogm.c b/libavformat/oggparseogm.c
index 54024e0a..d63c83b1 100644
--- a/libavformat/oggparseogm.c
+++ b/libavformat/oggparseogm.c
@@ -57,6 +57,8 @@ ogm_header(AVFormatContext *s, int idx)
             tag = bytestream2_get_le32(&p);
             st->codec->codec_id = ff_codec_get_id(ff_codec_bmp_tags, tag);
             st->codec->codec_tag = tag;
+            if (st->codec->codec_id == AV_CODEC_ID_MPEG4)
+                st->need_parsing = AVSTREAM_PARSE_HEADERS;
         } else if (bytestream2_peek_byte(&p) == 't') {
             st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
             st->codec->codec_id = AV_CODEC_ID_TEXT;
@@ -102,7 +104,7 @@ ogm_header(AVFormatContext *s, int idx)
                 size -= 4;
             }
             if (size > 52) {
-                av_assert0(FF_INPUT_BUFFER_PADDING_SIZE <= 52);
+                av_assert0(AV_INPUT_BUFFER_PADDING_SIZE <= 52);
                 size -= 52;
                 ff_alloc_extradata(st->codec, size);
                 bytestream2_get_buffer(&p, st->codec->extradata, st->codec->extradata_size);
diff --git a/libavformat/oggparsespeex.c b/libavformat/oggparsespeex.c
index 9b5c65f4..c86b1271 100644
--- a/libavformat/oggparsespeex.c
+++ b/libavformat/oggparsespeex.c
@@ -47,6 +47,8 @@ static int speex_header(AVFormatContext *s, int idx) {
 
     if (!spxp) {
         spxp = av_mallocz(sizeof(*spxp));
+        if (!spxp)
+            return AVERROR(ENOMEM);
         os->private = spxp;
     }
 
diff --git a/libavformat/oggparsetheora.c b/libavformat/oggparsetheora.c
index 91c70dfe..5f057c3c 100644
--- a/libavformat/oggparsetheora.c
+++ b/libavformat/oggparsetheora.c
@@ -108,7 +108,7 @@ static int theora_header(AVFormatContext *s, int idx)
             skip_bits(&gb, 2);
 
         thp->gpshift = get_bits(&gb, 5);
-        thp->gpmask  = (1 << thp->gpshift) - 1;
+        thp->gpmask  = (1U << thp->gpshift) - 1;
 
         st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
         st->codec->codec_id   = AV_CODEC_ID_THEORA;
@@ -127,11 +127,11 @@ static int theora_header(AVFormatContext *s, int idx)
     }
 
     if ((err = av_reallocp(&st->codec->extradata,
-                           cds + FF_INPUT_BUFFER_PADDING_SIZE)) < 0) {
+                           cds + AV_INPUT_BUFFER_PADDING_SIZE)) < 0) {
         st->codec->extradata_size = 0;
         return err;
     }
-    memset(st->codec->extradata + cds, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    memset(st->codec->extradata + cds, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
     cdp    = st->codec->extradata + st->codec->extradata_size;
     *cdp++ = os->psize >> 8;
@@ -191,7 +191,7 @@ static int theora_packet(AVFormatContext *s, int idx)
         os->lastpts = os->lastdts   = theora_gptopts(s, idx, os->granule, NULL) - duration;
         if(s->streams[idx]->start_time == AV_NOPTS_VALUE) {
             s->streams[idx]->start_time = os->lastpts;
-            if (s->streams[idx]->duration)
+            if (s->streams[idx]->duration > 0)
                 s->streams[idx]->duration -= s->streams[idx]->start_time;
         }
     }
diff --git a/libavformat/oggparsevorbis.c b/libavformat/oggparsevorbis.c
index dd443374..b96be989 100644
--- a/libavformat/oggparsevorbis.c
+++ b/libavformat/oggparsevorbis.c
@@ -242,7 +242,7 @@ static int fixup_vorbis_headers(AVFormatContext *as,
         offset += priv->len[i];
         av_freep(&priv->packet[i]);
     }
-    if ((err = av_reallocp(buf, offset + FF_INPUT_BUFFER_PADDING_SIZE)) < 0)
+    if ((err = av_reallocp(buf, offset + AV_INPUT_BUFFER_PADDING_SIZE)) < 0)
         return err;
     return offset;
 }
@@ -406,6 +406,9 @@ static int vorbis_packet(AVFormatContext *s, int idx)
     struct oggvorbis_private *priv = os->private;
     int duration, flags = 0;
 
+    if (!priv->vp)
+        return AVERROR_INVALIDDATA;
+
     /* first packet handling
      * here we parse the duration of each packet in the first page and compare
      * the total duration to the page granule to find the encoder delay and
diff --git a/libavformat/omadec.c b/libavformat/omadec.c
index 858ee84f..132992dd 100644
--- a/libavformat/omadec.c
+++ b/libavformat/omadec.c
@@ -74,7 +74,7 @@ typedef struct OMAContext {
     uint8_t sm_val[8];
     uint8_t e_val[8];
     uint8_t iv[8];
-    struct AVDES av_des;
+    struct AVDES *av_des;
 } OMAContext;
 
 static void hex_log(AVFormatContext *s, int level,
@@ -125,28 +125,34 @@ static int rprobe(AVFormatContext *s, uint8_t *enc_header, unsigned size,
 {
     OMAContext *oc = s->priv_data;
     unsigned int pos;
-    struct AVDES av_des;
+    struct AVDES *av_des;
 
     if (!enc_header || !r_val ||
         size < OMA_ENC_HEADER_SIZE + oc->k_size + oc->e_size + oc->i_size ||
         size < OMA_RPROBE_M_VAL)
         return -1;
 
+    av_des = av_des_alloc();
+    if (!av_des)
+        return AVERROR(ENOMEM);
+
     /* m_val */
-    av_des_init(&av_des, r_val, 192, 1);
-    av_des_crypt(&av_des, oc->m_val, &enc_header[48], 1, NULL, 1);
+    av_des_init(av_des, r_val, 192, 1);
+    av_des_crypt(av_des, oc->m_val, &enc_header[48], 1, NULL, 1);
 
     /* s_val */
-    av_des_init(&av_des, oc->m_val, 64, 0);
-    av_des_crypt(&av_des, oc->s_val, NULL, 1, NULL, 0);
+    av_des_init(av_des, oc->m_val, 64, 0);
+    av_des_crypt(av_des, oc->s_val, NULL, 1, NULL, 0);
 
     /* sm_val */
     pos = OMA_ENC_HEADER_SIZE + oc->k_size + oc->e_size;
-    av_des_init(&av_des, oc->s_val, 64, 0);
-    av_des_mac(&av_des, oc->sm_val, &enc_header[pos], (oc->i_size >> 3));
+    av_des_init(av_des, oc->s_val, 64, 0);
+    av_des_mac(av_des, oc->sm_val, &enc_header[pos], (oc->i_size >> 3));
 
     pos += oc->i_size;
 
+    av_free(av_des);
+
     return memcmp(&enc_header[pos], oc->sm_val, 8) ? -1 : 0;
 }
 
@@ -156,7 +162,7 @@ static int nprobe(AVFormatContext *s, uint8_t *enc_header, unsigned size,
     OMAContext *oc = s->priv_data;
     uint64_t pos;
     uint32_t taglen, datalen;
-    struct AVDES av_des;
+    struct AVDES *av_des;
 
     if (!enc_header || !n_val ||
         size < OMA_ENC_HEADER_SIZE + oc->k_size + 4)
@@ -180,15 +186,22 @@ static int nprobe(AVFormatContext *s, uint8_t *enc_header, unsigned size,
     if (pos + (((uint64_t)datalen) << 4) > size)
         return -1;
 
-    av_des_init(&av_des, n_val, 192, 1);
+    av_des = av_des_alloc();
+    if (!av_des)
+        return AVERROR(ENOMEM);
+
+    av_des_init(av_des, n_val, 192, 1);
     while (datalen-- > 0) {
-        av_des_crypt(&av_des, oc->r_val, &enc_header[pos], 2, NULL, 1);
+        av_des_crypt(av_des, oc->r_val, &enc_header[pos], 2, NULL, 1);
         kset(s, oc->r_val, NULL, 16);
-        if (!rprobe(s, enc_header, size, oc->r_val))
+        if (!rprobe(s, enc_header, size, oc->r_val)) {
+            av_free(av_des);
             return 0;
+        }
         pos += 16;
     }
 
+    av_free(av_des);
     return -1;
 }
 
@@ -273,14 +286,18 @@ static int decrypt_init(AVFormatContext *s, ID3v2ExtraMeta *em, uint8_t *header)
         }
     }
 
+    oc->av_des = av_des_alloc();
+    if (!oc->av_des)
+        return AVERROR(ENOMEM);
+
     /* e_val */
-    av_des_init(&oc->av_des, oc->m_val, 64, 0);
-    av_des_crypt(&oc->av_des, oc->e_val,
+    av_des_init(oc->av_des, oc->m_val, 64, 0);
+    av_des_crypt(oc->av_des, oc->e_val,
                  &gdata[OMA_ENC_HEADER_SIZE + 40], 1, NULL, 0);
     hex_log(s, AV_LOG_DEBUG, "EK", oc->e_val, 8);
 
     /* init e_val */
-    av_des_init(&oc->av_des, oc->e_val, 64, 1);
+    av_des_init(oc->av_des, oc->e_val, 64, 1);
 
     return 0;
 }
@@ -440,7 +457,7 @@ static int oma_read_packet(AVFormatContext *s, AVPacket *pkt)
         /* previous unencrypted block saved in IV for
          * the next packet (CBC mode) */
         if (ret == packet_size)
-            av_des_crypt(&oc->av_des, pkt->data, pkt->data,
+            av_des_crypt(oc->av_des, pkt->data, pkt->data,
                          (packet_size >> 3), oc->iv, 1);
         else
             memset(oc->iv, 0, 8);
@@ -496,6 +513,13 @@ static int oma_read_seek(struct AVFormatContext *s,
     return err;
 }
 
+static int oma_read_close(AVFormatContext *s)
+{
+    OMAContext *oc = s->priv_data;
+    av_free(oc->av_des);
+    return 0;
+}
+
 AVInputFormat ff_oma_demuxer = {
     .name           = "oma",
     .long_name      = NULL_IF_CONFIG_SMALL("Sony OpenMG audio"),
@@ -504,6 +528,7 @@ AVInputFormat ff_oma_demuxer = {
     .read_header    = oma_read_header,
     .read_packet    = oma_read_packet,
     .read_seek      = oma_read_seek,
+    .read_close     = oma_read_close,
     .flags          = AVFMT_GENERIC_INDEX,
     .extensions     = "oma,omg,aa3",
     .codec_tag      = (const AVCodecTag* const []){ff_oma_codec_tags, 0},
diff --git a/libavformat/options.c b/libavformat/options.c
index d238dd5a..8fe00171 100644
--- a/libavformat/options.c
+++ b/libavformat/options.c
@@ -20,6 +20,8 @@
 #include "avformat.h"
 #include "avio_internal.h"
 #include "internal.h"
+
+#include "libavutil/internal.h"
 #include "libavutil/opt.h"
 
 /**
@@ -27,7 +29,9 @@
  * Options definition for AVFormatContext.
  */
 
+FF_DISABLE_DEPRECATION_WARNINGS
 #include "options_table.h"
+FF_ENABLE_DEPRECATION_WARNINGS
 
 static const char* format_to_name(void* ptr)
 {
@@ -95,12 +99,33 @@ static const AVClass av_format_context_class = {
     .get_category   = get_category,
 };
 
+static int io_open_default(AVFormatContext *s, AVIOContext **pb,
+                           const char *url, int flags, AVDictionary **options)
+{
+#if FF_API_OLD_OPEN_CALLBACKS
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (s->open_cb)
+        return s->open_cb(s, pb, url, flags, &s->interrupt_callback, options);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    return ffio_open_whitelist(pb, url, flags, &s->interrupt_callback, options, s->protocol_whitelist);
+}
+
+static void io_close_default(AVFormatContext *s, AVIOContext *pb)
+{
+    avio_close(pb);
+}
+
 static void avformat_get_context_defaults(AVFormatContext *s)
 {
     memset(s, 0, sizeof(AVFormatContext));
 
     s->av_class = &av_format_context_class;
 
+    s->io_open  = io_open_default;
+    s->io_close = io_close_default;
+
     av_opt_set_defaults(s);
 }
 
diff --git a/libavformat/options_table.h b/libavformat/options_table.h
index 58670b00..8926fe57 100644
--- a/libavformat/options_table.h
+++ b/libavformat/options_table.h
@@ -36,7 +36,7 @@
 static const AVOption avformat_options[] = {
 {"avioflags", NULL, OFFSET(avio_flags), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, INT_MIN, INT_MAX, D|E, "avioflags"},
 {"direct", "reduce buffering", 0, AV_OPT_TYPE_CONST, {.i64 = AVIO_FLAG_DIRECT }, INT_MIN, INT_MAX, D|E, "avioflags"},
-{"probesize", "set probing size", OFFSET(probesize2), AV_OPT_TYPE_INT64, {.i64 = 5000000 }, 32, INT64_MAX, D},
+{"probesize", "set probing size", OFFSET(probesize), AV_OPT_TYPE_INT64, {.i64 = 5000000 }, 32, INT64_MAX, D},
 {"formatprobesize", "number of bytes to probe file format", OFFSET(format_probesize), AV_OPT_TYPE_INT, {.i64 = PROBE_BUF_MAX}, 0, INT_MAX-1, D},
 {"packetsize", "set packet size", OFFSET(packet_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, E},
 {"fflags", NULL, OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = AVFMT_FLAG_FLUSH_PACKETS }, INT_MIN, INT_MAX, D|E, "fflags"},
@@ -52,9 +52,9 @@ static const AVOption avformat_options[] = {
 {"fastseek", "fast but inaccurate seeks", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_FAST_SEEK }, INT_MIN, INT_MAX, D, "fflags"},
 {"latm", "enable RTP MP4A-LATM payload", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_MP4A_LATM }, INT_MIN, INT_MAX, E, "fflags"},
 {"nobuffer", "reduce the latency introduced by optional buffering", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_NOBUFFER }, 0, INT_MAX, D, "fflags"},
-{"seek2any", "allow seeking to non-keyframes on demuxer level when supported", OFFSET(seek2any), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, D},
+{"seek2any", "allow seeking to non-keyframes on demuxer level when supported", OFFSET(seek2any), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, D},
 {"bitexact", "do not write random/volatile data", 0, AV_OPT_TYPE_CONST, { .i64 = AVFMT_FLAG_BITEXACT }, 0, 0, E, "fflags" },
-{"analyzeduration", "specify how many microseconds are analyzed to probe the input", OFFSET(max_analyze_duration2), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, INT64_MAX, D},
+{"analyzeduration", "specify how many microseconds are analyzed to probe the input", OFFSET(max_analyze_duration), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, INT64_MAX, D},
 {"cryptokey", "decryption key", OFFSET(key), AV_OPT_TYPE_BINARY, {.dbl = 0}, 0, 0, D},
 {"indexmem", "max memory used for timestamp index (per stream)", OFFSET(max_index_size), AV_OPT_TYPE_INT, {.i64 = 1<<20 }, 0, INT_MAX, D},
 {"rtbufsize", "max memory used for buffering real-time frames", OFFSET(max_picture_buffer), AV_OPT_TYPE_INT, {.i64 = 3041280 }, 0, INT_MAX, D}, /* defaults to 1s of 15fps 352x288 YUYV422 video */
@@ -80,8 +80,8 @@ static const AVOption avformat_options[] = {
 {"aggressive", "consider things that a sane encoder shouldn't do as an error", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_AGGRESSIVE }, INT_MIN, INT_MAX, D, "err_detect"},
 {"use_wallclock_as_timestamps", "use wallclock as timestamps", OFFSET(use_wallclock_as_timestamps), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX-1, D},
 {"skip_initial_bytes", "set number of bytes to skip before reading header and frames", OFFSET(skip_initial_bytes), AV_OPT_TYPE_INT64, {.i64 = 0}, 0, INT64_MAX-1, D},
-{"correct_ts_overflow", "correct single timestamp overflows", OFFSET(correct_ts_overflow), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, D},
-{"flush_packets", "enable flushing of the I/O context after each packet", OFFSET(flush_packets), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, E},
+{"correct_ts_overflow", "correct single timestamp overflows", OFFSET(correct_ts_overflow), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, D},
+{"flush_packets", "enable flushing of the I/O context after each packet", OFFSET(flush_packets), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, E},
 {"metadata_header_padding", "set number of bytes to be written as padding in a metadata header", OFFSET(metadata_header_padding), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, E},
 {"output_ts_offset", "set output timestamp offset", OFFSET(output_ts_offset), AV_OPT_TYPE_DURATION, {.i64 = 0}, -INT64_MAX, INT64_MAX, E},
 {"max_interleave_delta", "maximum buffering duration for interleaving", OFFSET(max_interleave_delta), AV_OPT_TYPE_INT64, { .i64 = 10000000 }, 0, INT64_MAX, E },
@@ -100,6 +100,7 @@ static const AVOption avformat_options[] = {
 {"dump_separator", "set information dump field separator", OFFSET(dump_separator), AV_OPT_TYPE_STRING, {.str = ", "}, CHAR_MIN, CHAR_MAX, D|E},
 {"codec_whitelist", "List of decoders that are allowed to be used", OFFSET(codec_whitelist), AV_OPT_TYPE_STRING, { .str = NULL },  CHAR_MIN, CHAR_MAX, D },
 {"format_whitelist", "List of demuxers that are allowed to be used", OFFSET(format_whitelist), AV_OPT_TYPE_STRING, { .str = NULL },  CHAR_MIN, CHAR_MAX, D },
+{"protocol_whitelist", "List of protocols that are allowed to be used", OFFSET(protocol_whitelist), AV_OPT_TYPE_STRING, { .str = NULL },  CHAR_MIN, CHAR_MAX, D },
 {NULL},
 };
 
diff --git a/libavformat/os_support.c b/libavformat/os_support.c
index 7950e448..86d0b8f3 100644
--- a/libavformat/os_support.c
+++ b/libavformat/os_support.c
@@ -205,16 +205,9 @@ int ff_getnameinfo(const struct sockaddr *sa, int salen,
     }
 
     if (serv && servlen > 0) {
-        struct servent *ent = NULL;
-#if HAVE_GETSERVBYPORT
         if (!(flags & NI_NUMERICSERV))
-            ent = getservbyport(sin->sin_port, flags & NI_DGRAM ? "udp" : "tcp");
-#endif /* HAVE_GETSERVBYPORT */
-
-        if (ent)
-            snprintf(serv, servlen, "%s", ent->s_name);
-        else
-            snprintf(serv, servlen, "%d", ntohs(sin->sin_port));
+            return EAI_FAIL;
+        snprintf(serv, servlen, "%d", ntohs(sin->sin_port));
     }
 
     return 0;
diff --git a/libavformat/pjsdec.c b/libavformat/pjsdec.c
index 5129b70e..a88d5331 100644
--- a/libavformat/pjsdec.c
+++ b/libavformat/pjsdec.c
@@ -100,7 +100,7 @@ static int pjs_read_header(AVFormatContext *s)
         }
     }
 
-    ff_subtitles_queue_finalize(&pjs->q);
+    ff_subtitles_queue_finalize(s, &pjs->q);
     return res;
 }
 
diff --git a/libavformat/psxstr.c b/libavformat/psxstr.c
index fd50e549..b57981af 100644
--- a/libavformat/psxstr.c
+++ b/libavformat/psxstr.c
@@ -218,7 +218,7 @@ static int str_read_packet(AVFormatContext *s,
                 if(pkt->size != sector_count*VIDEO_DATA_CHUNK_SIZE){
                     if(pkt->data)
                         av_log(s, AV_LOG_ERROR, "missmatching sector_count\n");
-                    av_free_packet(pkt);
+                    av_packet_unref(pkt);
                     if (av_new_packet(pkt, sector_count*VIDEO_DATA_CHUNK_SIZE))
                         return AVERROR(EIO);
                     memset(pkt->data, 0, sector_count*VIDEO_DATA_CHUNK_SIZE);
@@ -238,11 +238,6 @@ static int str_read_packet(AVFormatContext *s,
                     pkt->data= NULL;
                     pkt->size= -1;
                     pkt->buf = NULL;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-                    pkt->destruct = NULL;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
                     return 0;
                 }
 
@@ -303,7 +298,7 @@ static int str_read_close(AVFormatContext *s)
     int i;
     for(i=0; i<32; i++){
         if(str->channels[i].tmp_pkt.data)
-            av_free_packet(&str->channels[i].tmp_pkt);
+            av_packet_unref(&str->channels[i].tmp_pkt);
     }
 
     return 0;
diff --git a/libavformat/qcp.c b/libavformat/qcp.c
index 9e2eedfe..ad4a8ae7 100644
--- a/libavformat/qcp.c
+++ b/libavformat/qcp.c
@@ -57,6 +57,11 @@ static const uint8_t guid_evrc[16] = {
     0x91, 0xef, 0x73, 0x6a, 0x51, 0x00, 0xce, 0xb4
 };
 
+static const uint8_t guid_4gv[16] = {
+    0xca, 0x29, 0xfd, 0x3c, 0x53, 0xf6, 0xf5, 0x4e,
+    0x90, 0xe9, 0xf4, 0x23, 0x6d, 0x59, 0x9b, 0x61
+};
+
 /**
  * SMV GUID as stored in the file
  */
@@ -106,6 +111,8 @@ static int qcp_read_header(AVFormatContext *s)
         st->codec->codec_id = AV_CODEC_ID_EVRC;
     } else if (!memcmp(buf, guid_smv, 16)) {
         st->codec->codec_id = AV_CODEC_ID_SMV;
+    } else if (!memcmp(buf, guid_4gv, 16)) {
+        st->codec->codec_id = AV_CODEC_ID_4GV;
     } else {
         av_log(s, AV_LOG_ERROR, "Unknown codec GUID "FF_PRI_GUID".\n",
                FF_ARG_GUID(buf));
diff --git a/libavformat/qtpalette.c b/libavformat/qtpalette.c
new file mode 100644
index 00000000..666c6b73
--- /dev/null
+++ b/libavformat/qtpalette.c
@@ -0,0 +1,116 @@
+/*
+ * QuickTime palette handling
+ * Copyright (c) 2001 Fabrice Bellard
+ * Copyright (c) 2009 Baptiste Coudurier <baptiste dot coudurier at gmail dot com>
+ * Copyright (c) 2015 Mats Peterson
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include "avformat.h"
+#include "libavutil/intreadwrite.h"
+#include "qtpalette.h"
+
+int ff_get_qtpalette(int codec_id, AVIOContext *pb, uint32_t *palette)
+{
+    int tmp, bit_depth, color_table_id, greyscale, i;
+
+    avio_seek(pb, 82, SEEK_CUR);
+
+    /* Get the bit depth and greyscale state */
+    tmp = avio_rb16(pb);
+    bit_depth = tmp & 0x1F;
+    greyscale = tmp & 0x20;
+
+    /* Get the color table ID */
+    color_table_id = avio_rb16(pb);
+
+    /* Do not create a greyscale palette for Cinepak */
+    if (greyscale && codec_id == AV_CODEC_ID_CINEPAK)
+        return 0;
+
+    /* If the depth is 1, 2, 4, or 8 bpp, file is palettized. */
+    if ((bit_depth == 1 || bit_depth == 2 || bit_depth == 4 || bit_depth == 8)) {
+        uint32_t color_count, color_start, color_end;
+        uint32_t a, r, g, b;
+
+        /* Ignore the greyscale bit for 1-bit video and sample
+         * descriptions containing a color table. */
+        if (greyscale && bit_depth > 1 && color_table_id) {
+            int color_index, color_dec;
+            /* compute the greyscale palette */
+            color_count = 1 << bit_depth;
+            color_index = 255;
+            color_dec   = 256 / (color_count - 1);
+            for (i = 0; i < color_count; i++) {
+                r = g = b = color_index;
+                palette[i] = (0xFFU << 24) | (r << 16) | (g << 8) | (b);
+                color_index -= color_dec;
+                if (color_index < 0)
+                    color_index = 0;
+            }
+        } else if (color_table_id) {
+            /* The color table ID is non-zero. Interpret this as
+             * being -1, which means use the default Macintosh
+             * color table */
+            const uint8_t *color_table;
+            color_count = 1 << bit_depth;
+            if (bit_depth == 1)
+                color_table = ff_qt_default_palette_2;
+            else if (bit_depth == 2)
+                color_table = ff_qt_default_palette_4;
+            else if (bit_depth == 4)
+                color_table = ff_qt_default_palette_16;
+            else
+                color_table = ff_qt_default_palette_256;
+            for (i = 0; i < color_count; i++) {
+                r = color_table[i * 3 + 0];
+                g = color_table[i * 3 + 1];
+                b = color_table[i * 3 + 2];
+                palette[i] = (0xFFU << 24) | (r << 16) | (g << 8) | (b);
+            }
+        } else {
+            /* The color table ID is 0; the color table is in the sample
+             * description */
+            color_start = avio_rb32(pb);
+            avio_rb16(pb); /* color table flags */
+            color_end = avio_rb16(pb);
+            if ((color_start <= 255) && (color_end <= 255)) {
+                for (i = color_start; i <= color_end; i++) {
+                    /* each A, R, G, or B component is 16 bits;
+                     * only use the top 8 bits */
+                    a = avio_r8(pb);
+                    avio_r8(pb);
+                    r = avio_r8(pb);
+                    avio_r8(pb);
+                    g = avio_r8(pb);
+                    avio_r8(pb);
+                    b = avio_r8(pb);
+                    avio_r8(pb);
+                    palette[i] = (a << 24 ) | (r << 16) | (g << 8) | (b);
+                }
+            }
+        }
+
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/libavformat/qtpalette.h b/libavformat/qtpalette.h
index 7d6802f7..016e91f1 100644
--- a/libavformat/qtpalette.h
+++ b/libavformat/qtpalette.h
@@ -23,31 +23,39 @@
 #ifndef AVFORMAT_QTPALETTE_H
 #define AVFORMAT_QTPALETTE_H
 
-#include <inttypes.h>
+#include <stdint.h>
+#include "avformat.h"
 
+static const uint8_t ff_qt_default_palette_2[2 * 3] = {
+  0xFF, 0xFF, 0xFF,
+  0x00, 0x00, 0x00
+};
+
+/* From a screenshot of the "Monitors & Sound" control panel in Mac OS 7.5.5 */
 static const uint8_t ff_qt_default_palette_4[4 * 3] = {
-  0x93, 0x65, 0x5E,
   0xFF, 0xFF, 0xFF,
-  0xDF, 0xD0, 0xAB,
+  0xAC, 0xAC, 0xAC,
+  0x55, 0x55, 0x55,
   0x00, 0x00, 0x00
 };
 
+/* From a screenshot of the "Monitors & Sound" control panel in Mac OS 7.5.5 */
 static const uint8_t ff_qt_default_palette_16[16 * 3] = {
-  0xFF, 0xFB, 0xFF,
-  0xEF, 0xD9, 0xBB,
-  0xE8, 0xC9, 0xB1,
-  0x93, 0x65, 0x5E,
-  0xFC, 0xDE, 0xE8,
-  0x9D, 0x88, 0x91,
-  0xFF, 0xFF, 0xFF,
   0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF,
-  0x47, 0x48, 0x37,
-  0x7A, 0x5E, 0x55,
-  0xDF, 0xD0, 0xAB,
-  0xFF, 0xFB, 0xF9,
-  0xE8, 0xCA, 0xC5,
-  0x8A, 0x7C, 0x77,
+  0xFC, 0xF3, 0x05,
+  0xFF, 0x64, 0x02,
+  0xDD, 0x08, 0x06,
+  0xF2, 0x08, 0x84,
+  0x46, 0x00, 0xA5,
+  0x00, 0x00, 0xD4,
+  0x02, 0xAB, 0xEA,
+  0x1F, 0xB7, 0x14,
+  0x00, 0x64, 0x11,
+  0x56, 0x2C, 0x05,
+  0x90, 0x71, 0x3A,
+  0xC0, 0xC0, 0xC0,
+  0x80, 0x80, 0x80,
+  0x40, 0x40, 0x40,
   0x00, 0x00, 0x00
 };
 
@@ -310,4 +318,15 @@ static const uint8_t ff_qt_default_palette_256[256 * 3] = {
   /* 255, 0xFF */  0x00, 0x00, 0x00
 };
 
+/**
+ * Retrieve the palette (or "color table" in QuickTime terms), either
+ * from the video sample description, or from the default Macintosh
+ * palette.
+ *
+ * The file offset of the AVIOContext pointed to by the 'pb' variable
+ * should be the start of the video sample description (the sample
+ * description size and the data format).
+ */
+int ff_get_qtpalette(int codec_id, AVIOContext *pb, uint32_t *palette);
+
 #endif /* AVFORMAT_QTPALETTE_H */
diff --git a/libavformat/r3d.c b/libavformat/r3d.c
index f220e3d9..94c3015f 100644
--- a/libavformat/r3d.c
+++ b/libavformat/r3d.c
@@ -29,6 +29,8 @@ typedef struct R3DContext {
     unsigned video_offsets_count;
     unsigned *video_offsets;
     unsigned rdvo_offset;
+
+    int audio_channels;
 } R3DContext;
 
 typedef struct Atom {
@@ -52,6 +54,7 @@ static int read_atom(AVFormatContext *s, Atom *atom)
 static int r3d_read_red1(AVFormatContext *s)
 {
     AVStream *st = avformat_new_stream(s, NULL);
+    R3DContext *r3d = s->priv_data;
     char filename[258];
     int tmp;
     int av_unused tmp2;
@@ -92,17 +95,8 @@ static int r3d_read_red1(AVFormatContext *s)
         st->avg_frame_rate = framerate;
     }
 
-    tmp = avio_r8(s->pb); // audio channels
+    r3d->audio_channels = avio_r8(s->pb); // audio channels
     av_log(s, AV_LOG_TRACE, "audio channels %d\n", tmp);
-    if (tmp > 0) {
-        AVStream *ast = avformat_new_stream(s, NULL);
-        if (!ast)
-            return AVERROR(ENOMEM);
-        ast->codec->codec_type = AVMEDIA_TYPE_AUDIO;
-        ast->codec->codec_id = AV_CODEC_ID_PCM_S32BE;
-        ast->codec->channels = tmp;
-        avpriv_set_pts_info(ast, 32, 1, st->time_base.den);
-    }
 
     avio_read(s->pb, filename, 257);
     filename[sizeof(filename)-1] = 0;
@@ -185,6 +179,11 @@ static int r3d_read_header(AVFormatContext *s)
         return -1;
     }
 
+    /* we cannot create the audio stream now because we do not know the
+     * sample rate */
+    if (r3d->audio_channels)
+        s->ctx_flags |= AVFMTCTX_NOHEADER;
+
     s->internal->data_offset = avio_tell(s->pb);
     av_log(s, AV_LOG_TRACE, "data offset %#"PRIx64"\n", s->internal->data_offset);
     if (!s->pb->seekable)
@@ -266,20 +265,33 @@ static int r3d_read_redv(AVFormatContext *s, AVPacket *pkt, Atom *atom)
     if (st->avg_frame_rate.num)
         pkt->duration = (uint64_t)st->time_base.den*
             st->avg_frame_rate.den/st->avg_frame_rate.num;
-    av_log(s, AV_LOG_TRACE, "pkt dts %"PRId64" duration %d\n", pkt->dts, pkt->duration);
+    av_log(s, AV_LOG_TRACE, "pkt dts %"PRId64" duration %"PRId64"\n", pkt->dts, pkt->duration);
 
     return 0;
 }
 
 static int r3d_read_reda(AVFormatContext *s, AVPacket *pkt, Atom *atom)
 {
-    AVStream *st = s->streams[1];
+    R3DContext *r3d = s->priv_data;
+    AVStream *st;
     int av_unused tmp, tmp2;
     int samples, size;
     int64_t pos = avio_tell(s->pb);
     unsigned dts;
     int ret;
 
+    if (s->nb_streams < 2) {
+        st = avformat_new_stream(s, NULL);
+        if (!st)
+            return AVERROR(ENOMEM);
+        st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
+        st->codec->codec_id = AV_CODEC_ID_PCM_S32BE;
+        st->codec->channels = r3d->audio_channels;
+        avpriv_set_pts_info(st, 32, 1, s->streams[0]->time_base.den);
+    } else {
+        st = s->streams[1];
+    }
+
     dts = avio_rb32(s->pb);
 
     st->codec->sample_rate = avio_rb32(s->pb);
@@ -316,7 +328,7 @@ static int r3d_read_reda(AVFormatContext *s, AVPacket *pkt, Atom *atom)
     pkt->dts = dts;
     if (st->codec->sample_rate)
         pkt->duration = av_rescale(samples, st->time_base.den, st->codec->sample_rate);
-    av_log(s, AV_LOG_TRACE, "pkt dts %"PRId64" duration %d samples %d sample rate %d\n",
+    av_log(s, AV_LOG_TRACE, "pkt dts %"PRId64" duration %"PRId64" samples %d sample rate %d\n",
             pkt->dts, pkt->duration, samples, st->codec->sample_rate);
 
     return 0;
@@ -324,6 +336,7 @@ static int r3d_read_reda(AVFormatContext *s, AVPacket *pkt, Atom *atom)
 
 static int r3d_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
+    R3DContext *r3d = s->priv_data;
     Atom atom;
     int err = 0;
 
@@ -340,9 +353,9 @@ static int r3d_read_packet(AVFormatContext *s, AVPacket *pkt)
                 return 0;
             break;
         case MKTAG('R','E','D','A'):
-            if (s->nb_streams < 2)
+            if (!r3d->audio_channels)
                 return -1;
-            if (s->streams[1]->discard == AVDISCARD_ALL)
+            if (s->nb_streams >= 2 && s->streams[1]->discard == AVDISCARD_ALL)
                 goto skip;
             if (!(err = r3d_read_reda(s, pkt, &atom)))
                 return 0;
diff --git a/libavformat/rawdec.c b/libavformat/rawdec.c
index b903e63f..35ad1181 100644
--- a/libavformat/rawdec.c
+++ b/libavformat/rawdec.c
@@ -45,7 +45,7 @@ int ff_raw_read_partial_packet(AVFormatContext *s, AVPacket *pkt)
     pkt->stream_index = 0;
     ret = ffio_read_partial(s->pb, pkt->data, size);
     if (ret < 0) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return ret;
     }
     av_shrink_packet(pkt, ret);
@@ -123,19 +123,6 @@ AVInputFormat ff_data_demuxer = {
 };
 #endif
 
-#if CONFIG_LATM_DEMUXER
-
-AVInputFormat ff_latm_demuxer = {
-    .name           = "latm",
-    .long_name      = NULL_IF_CONFIG_SMALL("raw LOAS/LATM"),
-    .read_header    = ff_raw_audio_read_header,
-    .read_packet    = ff_raw_read_partial_packet,
-    .flags          = AVFMT_GENERIC_INDEX | AVFMT_NOTIMESTAMPS,
-    .extensions     = "latm",
-    .raw_codec_id   = AV_CODEC_ID_AAC_LATM,
-};
-#endif
-
 #if CONFIG_MJPEG_DEMUXER
 static int mjpeg_probe(AVProbeData *p)
 {
@@ -188,10 +175,10 @@ static int mjpeg_probe(AVProbeData *p)
     }
 
     if (nb_invalid*4 + 1 < nb_frames) {
-        static const char ct_jpeg[] = "\r\nContent-Type: image/jpeg\r\n\r\n";
+        static const char ct_jpeg[] = "\r\nContent-Type: image/jpeg\r\n";
         int i;
 
-        for (i=0; i<FFMIN(p->buf_size - sizeof(ct_jpeg), 100); i++)
+        for (i=0; i<FFMIN(p->buf_size - (int)sizeof(ct_jpeg), 100); i++)
             if (!memcmp(p->buf + i, ct_jpeg, sizeof(ct_jpeg) - 1))
                 return AVPROBE_SCORE_EXTENSION;
 
@@ -205,43 +192,3 @@ static int mjpeg_probe(AVProbeData *p)
 
 FF_DEF_RAWVIDEO_DEMUXER2(mjpeg, "raw MJPEG video", mjpeg_probe, "mjpg,mjpeg,mpo", AV_CODEC_ID_MJPEG, AVFMT_GENERIC_INDEX|AVFMT_NOTIMESTAMPS)
 #endif
-
-#if CONFIG_MLP_DEMUXER
-AVInputFormat ff_mlp_demuxer = {
-    .name           = "mlp",
-    .long_name      = NULL_IF_CONFIG_SMALL("raw MLP"),
-    .read_header    = ff_raw_audio_read_header,
-    .read_packet    = ff_raw_read_partial_packet,
-    .flags          = AVFMT_GENERIC_INDEX | AVFMT_NOTIMESTAMPS,
-    .extensions     = "mlp",
-    .raw_codec_id   = AV_CODEC_ID_MLP,
-};
-#endif
-
-#if CONFIG_TRUEHD_DEMUXER
-AVInputFormat ff_truehd_demuxer = {
-    .name           = "truehd",
-    .long_name      = NULL_IF_CONFIG_SMALL("raw TrueHD"),
-    .read_header    = ff_raw_audio_read_header,
-    .read_packet    = ff_raw_read_partial_packet,
-    .flags          = AVFMT_GENERIC_INDEX | AVFMT_NOTIMESTAMPS,
-    .extensions     = "thd",
-    .raw_codec_id   = AV_CODEC_ID_TRUEHD,
-};
-#endif
-
-#if CONFIG_SHORTEN_DEMUXER
-AVInputFormat ff_shorten_demuxer = {
-    .name           = "shn",
-    .long_name      = NULL_IF_CONFIG_SMALL("raw Shorten"),
-    .read_header    = ff_raw_audio_read_header,
-    .read_packet    = ff_raw_read_partial_packet,
-    .flags          = AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH | AVFMT_NO_BYTE_SEEK|AVFMT_NOTIMESTAMPS,
-    .extensions     = "shn",
-    .raw_codec_id   = AV_CODEC_ID_SHORTEN,
-};
-#endif
-
-#if CONFIG_VC1_DEMUXER
-FF_DEF_RAWVIDEO_DEMUXER2(vc1, "raw VC-1", NULL, "vc1", AV_CODEC_ID_VC1, AVFMT_GENERIC_INDEX|AVFMT_NOTIMESTAMPS)
-#endif
diff --git a/libavformat/rawenc.c b/libavformat/rawenc.c
index e59f1ae9..358ee4e2 100644
--- a/libavformat/rawenc.c
+++ b/libavformat/rawenc.c
@@ -56,6 +56,25 @@ AVOutputFormat ff_ac3_muxer = {
 #endif
 
 #if CONFIG_ADX_MUXER
+
+static int adx_write_trailer(AVFormatContext *s)
+{
+    AVIOContext *pb = s->pb;
+    AVCodecContext *avctx = s->streams[0]->codec;
+
+    if (pb->seekable) {
+        int64_t file_size = avio_tell(pb);
+        uint64_t sample_count = (file_size - 36) / avctx->channels / 18 * 32;
+        if (sample_count <= UINT32_MAX) {
+            avio_seek(pb, 12, SEEK_SET);
+            avio_wb32(pb, sample_count);
+            avio_seek(pb, file_size, SEEK_SET);
+        }
+    }
+
+    return 0;
+}
+
 AVOutputFormat ff_adx_muxer = {
     .name              = "adx",
     .long_name         = NULL_IF_CONFIG_SMALL("CRI ADX"),
@@ -64,6 +83,7 @@ AVOutputFormat ff_adx_muxer = {
     .video_codec       = AV_CODEC_ID_NONE,
     .write_header      = force_one_stream,
     .write_packet      = ff_raw_write_packet,
+    .write_trailer     = adx_write_trailer,
     .flags             = AVFMT_NOTIMESTAMPS,
 };
 #endif
@@ -95,7 +115,7 @@ AVOutputFormat ff_data_muxer = {
 AVOutputFormat ff_dirac_muxer = {
     .name              = "dirac",
     .long_name         = NULL_IF_CONFIG_SMALL("raw Dirac"),
-    .extensions        = "drc",
+    .extensions        = "drc,vc2",
     .audio_codec       = AV_CODEC_ID_NONE,
     .video_codec       = AV_CODEC_ID_DIRAC,
     .write_header      = force_one_stream,
@@ -218,9 +238,10 @@ AVOutputFormat ff_h264_muxer = {
 AVOutputFormat ff_hevc_muxer = {
     .name              = "hevc",
     .long_name         = NULL_IF_CONFIG_SMALL("raw HEVC video"),
-    .extensions        = "hevc",
+    .extensions        = "hevc,h265,265",
     .audio_codec       = AV_CODEC_ID_NONE,
     .video_codec       = AV_CODEC_ID_HEVC,
+    .write_header      = force_one_stream,
     .write_packet      = ff_raw_write_packet,
     .flags             = AVFMT_NOTIMESTAMPS,
 };
@@ -233,6 +254,7 @@ AVOutputFormat ff_m4v_muxer = {
     .extensions        = "m4v",
     .audio_codec       = AV_CODEC_ID_NONE,
     .video_codec       = AV_CODEC_ID_MPEG4,
+    .write_header      = force_one_stream,
     .write_packet      = ff_raw_write_packet,
     .flags             = AVFMT_NOTIMESTAMPS,
 };
diff --git a/libavformat/rawvideodec.c b/libavformat/rawvideodec.c
index cbcae43c..91bdba0e 100644
--- a/libavformat/rawvideodec.c
+++ b/libavformat/rawvideodec.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/imgutils.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/opt.h"
@@ -38,6 +39,7 @@ static int rawvideo_read_header(AVFormatContext *ctx)
     RawVideoDemuxerContext *s = ctx->priv_data;
     enum AVPixelFormat pix_fmt;
     AVStream *st;
+    int packet_size;
 
     st = avformat_new_stream(ctx, NULL);
     if (!st)
@@ -58,7 +60,11 @@ static int rawvideo_read_header(AVFormatContext *ctx)
     st->codec->width  = s->width;
     st->codec->height = s->height;
     st->codec->pix_fmt = pix_fmt;
-    st->codec->bit_rate = av_rescale_q(avpicture_get_size(st->codec->pix_fmt, s->width, s->height),
+    packet_size = av_image_get_buffer_size(st->codec->pix_fmt, s->width, s->height, 1);
+    if (packet_size < 0)
+        return packet_size;
+    ctx->packet_size = packet_size;
+    st->codec->bit_rate = av_rescale_q(ctx->packet_size,
                                        (AVRational){8,1}, st->time_base);
 
     return 0;
@@ -67,18 +73,10 @@ static int rawvideo_read_header(AVFormatContext *ctx)
 
 static int rawvideo_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
-    int packet_size, ret, width, height;
-    AVStream *st = s->streams[0];
-
-    width = st->codec->width;
-    height = st->codec->height;
-
-    packet_size = avpicture_get_size(st->codec->pix_fmt, width, height);
-    if (packet_size < 0)
-        return -1;
+    int ret;
 
-    ret = av_get_packet(s->pb, pkt, packet_size);
-    pkt->pts = pkt->dts = pkt->pos / packet_size;
+    ret = av_get_packet(s->pb, pkt, s->packet_size);
+    pkt->pts = pkt->dts = pkt->pos / s->packet_size;
 
     pkt->stream_index = 0;
     if (ret < 0)
diff --git a/libavformat/rdt.c b/libavformat/rdt.c
index bb56a8ba..c3ac1985 100644
--- a/libavformat/rdt.c
+++ b/libavformat/rdt.c
@@ -86,7 +86,7 @@ struct PayloadContext {
     RMStream **rmst;
     uint8_t *mlti_data;
     unsigned int mlti_data_size;
-    char buffer[RTP_MAX_PACKET_LENGTH + FF_INPUT_BUFFER_PADDING_SIZE];
+    char buffer[RTP_MAX_PACKET_LENGTH + AV_INPUT_BUFFER_PADDING_SIZE];
     int audio_pkt_cnt; /**< remaining audio packets in rmdec */
 };
 
@@ -398,7 +398,7 @@ rdt_parse_b64buf (unsigned int *target_len, const char *p)
         len -= 2; /* skip embracing " at start/end */
     }
     *target_len = len * 3 / 4;
-    target = av_mallocz(*target_len + FF_INPUT_BUFFER_PADDING_SIZE);
+    target = av_mallocz(*target_len + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!target)
         return NULL;
     av_base64_decode(target, p, *target_len);
@@ -448,7 +448,7 @@ real_parse_asm_rule(AVStream *st, const char *p, const char *end)
 {
     do {
         /* can be either averagebandwidth= or AverageBandwidth= */
-        if (sscanf(p, " %*1[Aa]verage%*1[Bb]andwidth=%d", &st->codec->bit_rate) == 1)
+        if (sscanf(p, " %*1[Aa]verage%*1[Bb]andwidth=%"SCNd64, &st->codec->bit_rate) == 1)
             break;
         if (!(p = strchr(p, ',')) || p > end)
             p = end;
diff --git a/libavformat/realtextdec.c b/libavformat/realtextdec.c
index fff85d6b..f13321c9 100644
--- a/libavformat/realtextdec.c
+++ b/libavformat/realtextdec.c
@@ -115,7 +115,7 @@ static int realtext_read_header(AVFormatContext *s)
         }
         av_bprint_clear(&buf);
     }
-    ff_subtitles_queue_finalize(&rt->q);
+    ff_subtitles_queue_finalize(s, &rt->q);
 
 end:
     av_bprint_finalize(&buf, NULL);
diff --git a/libavformat/redspark.c b/libavformat/redspark.c
index 13a7b37d..5cea6e96 100644
--- a/libavformat/redspark.c
+++ b/libavformat/redspark.c
@@ -67,7 +67,7 @@ static int redspark_read_header(AVFormatContext *s)
         return AVERROR(ENOMEM);
     codec = st->codec;
 
-    header = av_malloc(HEADER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE);
+    header = av_malloc(HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!header)
         return AVERROR(ENOMEM);
     pbc = header;
@@ -148,7 +148,7 @@ static int redspark_read_packet(AVFormatContext *s, AVPacket *pkt)
 
     ret = av_get_packet(s->pb, pkt, size);
     if (ret != size) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return AVERROR(EIO);
     }
 
diff --git a/libavformat/replaygain.c b/libavformat/replaygain.c
index 807f8515..707d3cd4 100644
--- a/libavformat/replaygain.c
+++ b/libavformat/replaygain.c
@@ -75,7 +75,7 @@ int ff_replaygain_export_raw(AVStream *st, int32_t tg, uint32_t tp,
     if (tg == INT32_MIN && ag == INT32_MIN)
         return 0;
 
-    replaygain = (AVReplayGain*)ff_stream_new_side_data(st, AV_PKT_DATA_REPLAYGAIN,
+    replaygain = (AVReplayGain*)av_stream_new_side_data(st, AV_PKT_DATA_REPLAYGAIN,
                                                         sizeof(*replaygain));
     if (!replaygain)
         return AVERROR(ENOMEM);
diff --git a/libavformat/riff.c b/libavformat/riff.c
index 8e4e3e46..cf5a2ffa 100644
--- a/libavformat/riff.c
+++ b/libavformat/riff.c
@@ -112,6 +112,7 @@ const AVCodecTag ff_codec_bmp_tags[] = {
     { AV_CODEC_ID_MPEG4,        MKTAG('P', 'L', 'V', '1') }, /* Pelco DVR MPEG-4 */
     { AV_CODEC_ID_MPEG4,        MKTAG('G', 'L', 'V', '4') },
     { AV_CODEC_ID_MPEG4,        MKTAG('G', 'M', 'P', '4') }, /* GeoVision camera */
+    { AV_CODEC_ID_MPEG4,        MKTAG('M', 'N', 'M', '4') }, /* March Networks DVR */
     { AV_CODEC_ID_MSMPEG4V3,    MKTAG('M', 'P', '4', '3') },
     { AV_CODEC_ID_MSMPEG4V3,    MKTAG('D', 'I', 'V', '3') },
     { AV_CODEC_ID_MSMPEG4V3,    MKTAG('M', 'P', 'G', '3') },
@@ -148,6 +149,7 @@ const AVCodecTag ff_codec_bmp_tags[] = {
     { AV_CODEC_ID_DVVIDEO,      MKTAG('p', 'd', 'v', 'c') },
     { AV_CODEC_ID_DVVIDEO,      MKTAG('S', 'L', '2', '5') },
     { AV_CODEC_ID_DVVIDEO,      MKTAG('S', 'L', 'D', 'V') },
+    { AV_CODEC_ID_DVVIDEO,      MKTAG('A', 'V', 'd', '1') },
     { AV_CODEC_ID_MPEG1VIDEO,   MKTAG('m', 'p', 'g', '1') },
     { AV_CODEC_ID_MPEG1VIDEO,   MKTAG('m', 'p', 'g', '2') },
     { AV_CODEC_ID_MPEG2VIDEO,   MKTAG('m', 'p', 'g', '2') },
@@ -241,10 +243,46 @@ const AVCodecTag ff_codec_bmp_tags[] = {
     { AV_CODEC_ID_RAWVIDEO,     MKTAG('Y', 'V', 'U', '9') },
     { AV_CODEC_ID_RAWVIDEO,     MKTAG('a', 'u', 'v', '2') },
     { AV_CODEC_ID_RAWVIDEO,     MKTAG('Y', 'V', 'Y', 'U') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('Y', 'U', 'Y', 'V') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', '1', '0') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', '1', '1') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', '2', '2') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', '4', '0') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', '4', '4') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('J', '4', '2', '0') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('J', '4', '2', '2') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('J', '4', '4', '0') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('J', '4', '4', '4') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('Y', 'U', 'V', 'A') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', '0', 'A') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', '2', 'A') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('R', 'G', 'B', '2') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('R', 'V', '1', '5') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('R', 'V', '1', '6') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('R', 'V', '2', '4') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('R', 'V', '3', '2') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('R', 'G', 'B', 'A') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('A', 'V', '3', '2') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('G', 'R', 'E', 'Y') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '0', '9', 'L') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '0', '9', 'B') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '2', '9', 'L') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '2', '9', 'B') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', '9', 'L') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', '9', 'B') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '0', 'A', 'L') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '0', 'A', 'B') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '2', 'A', 'L') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '2', 'A', 'B') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', 'A', 'L') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', 'A', 'B') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', 'F', 'L') },
+    { AV_CODEC_ID_RAWVIDEO,     MKTAG('I', '4', 'F', 'B') },
     { AV_CODEC_ID_FRWU,         MKTAG('F', 'R', 'W', 'U') },
     { AV_CODEC_ID_R10K,         MKTAG('R', '1', '0', 'k') },
     { AV_CODEC_ID_R210,         MKTAG('r', '2', '1', '0') },
     { AV_CODEC_ID_V210,         MKTAG('v', '2', '1', '0') },
+    { AV_CODEC_ID_V210,         MKTAG('C', '2', '1', '0') },
     { AV_CODEC_ID_V308,         MKTAG('v', '3', '0', '8') },
     { AV_CODEC_ID_V408,         MKTAG('v', '4', '0', '8') },
     { AV_CODEC_ID_AYUV,         MKTAG('A', 'Y', 'U', 'V') },
@@ -322,6 +360,7 @@ const AVCodecTag ff_codec_bmp_tags[] = {
     { AV_CODEC_ID_JPEG2000,     MKTAG('L', 'J', '2', 'C') },
     { AV_CODEC_ID_JPEG2000,     MKTAG('L', 'J', '2', 'K') },
     { AV_CODEC_ID_JPEG2000,     MKTAG('I', 'P', 'J', '2') },
+    { AV_CODEC_ID_JPEG2000,     MKTAG('A', 'V', 'j', '2') }, /* Avid jpeg2000 */
     { AV_CODEC_ID_VMNC,         MKTAG('V', 'M', 'n', 'c') },
     { AV_CODEC_ID_TARGA,        MKTAG('t', 'g', 'a', ' ') },
     { AV_CODEC_ID_PNG,          MKTAG('M', 'P', 'N', 'G') },
@@ -370,6 +409,11 @@ const AVCodecTag ff_codec_bmp_tags[] = {
     { AV_CODEC_ID_HQX,          MKTAG('C', 'H', 'Q', 'X') },
     { AV_CODEC_ID_TDSC,         MKTAG('T', 'D', 'S', 'C') },
     { AV_CODEC_ID_HQ_HQA,       MKTAG('C', 'U', 'V', 'C') },
+    { AV_CODEC_ID_RV40,         MKTAG('R', 'V', '4', '0') },
+    { AV_CODEC_ID_SCREENPRESSO, MKTAG('S', 'P', 'V', '1') },
+    { AV_CODEC_ID_RSCC,         MKTAG('R', 'S', 'C', 'C') },
+    { AV_CODEC_ID_RSCC,         MKTAG('I', 'S', 'C', 'C') },
+    { AV_CODEC_ID_CFHD,         MKTAG('C', 'F', 'H', 'D') },
     { AV_CODEC_ID_NONE,         0 }
 };
 
@@ -417,7 +461,11 @@ const AVCodecTag ff_codec_wav_tags[] = {
     { AV_CODEC_ID_WMAV2,           0x0161 },
     { AV_CODEC_ID_WMAPRO,          0x0162 },
     { AV_CODEC_ID_WMALOSSLESS,     0x0163 },
+    { AV_CODEC_ID_XMA1,            0x0165 },
+    { AV_CODEC_ID_XMA2,            0x0166 },
     { AV_CODEC_ID_ADPCM_CT,        0x0200 },
+    { AV_CODEC_ID_DVAUDIO,         0x0215 },
+    { AV_CODEC_ID_DVAUDIO,         0x0216 },
     { AV_CODEC_ID_ATRAC3,          0x0270 },
     { AV_CODEC_ID_ADPCM_G722,      0x028F },
     { AV_CODEC_ID_IMC,             0x0401 },
diff --git a/libavformat/riff.h b/libavformat/riff.h
index ae5ecef4..3b57bb45 100644
--- a/libavformat/riff.h
+++ b/libavformat/riff.h
@@ -52,6 +52,11 @@ void ff_put_bmp_header(AVIOContext *pb, AVCodecContext *enc, const AVCodecTag *t
  */
 #define FF_PUT_WAV_HEADER_FORCE_WAVEFORMATEX    0x00000001
 
+/**
+ * Tell ff_put_wav_header() to write an empty channel mask.
+ */
+#define FF_PUT_WAV_HEADER_SKIP_CHANNELMASK      0x00000002
+
 /**
  * Write WAVEFORMAT header structure.
  *
@@ -62,7 +67,7 @@ void ff_put_bmp_header(AVIOContext *pb, AVCodecContext *enc, const AVCodecTag *t
 int ff_put_wav_header(AVIOContext *pb, AVCodecContext *enc, int flags);
 
 enum AVCodecID ff_wav_codec_get_id(unsigned int tag, int bps);
-int ff_get_wav_header(AVIOContext *pb, AVCodecContext *codec, int size, int big_endian);
+int ff_get_wav_header(AVFormatContext *s, AVIOContext *pb, AVCodecContext *codec, int size, int big_endian);
 
 extern const AVCodecTag ff_codec_bmp_tags[]; // exposed through avformat_get_riff_video_tags()
 extern const AVCodecTag ff_codec_wav_tags[];
@@ -102,6 +107,8 @@ extern const AVCodecGuid ff_codec_wav_guids[];
 
 #define FF_MEDIASUBTYPE_BASE_GUID \
     0x00, 0x00, 0x10, 0x00, 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71
+#define FF_AMBISONIC_BASE_GUID \
+    0x21, 0x07, 0xD3, 0x11, 0x86, 0x44, 0xC8, 0xC1, 0xCA, 0x00, 0x00, 0x00
 
 static av_always_inline int ff_guidcmp(const void *g1, const void *g2)
 {
diff --git a/libavformat/riffdec.c b/libavformat/riffdec.c
index f44df1e6..d7b81a0d 100644
--- a/libavformat/riffdec.c
+++ b/libavformat/riffdec.c
@@ -31,10 +31,12 @@
 
 int ff_get_guid(AVIOContext *s, ff_asf_guid *g)
 {
+    int ret;
     av_assert0(sizeof(*g) == 16); //compiler will optimize this out
-    if (avio_read(s, *g, sizeof(*g)) < (int)sizeof(*g)) {
+    ret = avio_read(s, *g, sizeof(*g));
+    if (ret < (int)sizeof(*g)) {
         memset(*g, 0, sizeof(*g));
-        return AVERROR_INVALIDDATA;
+        return ret < 0 ? ret : AVERROR_INVALIDDATA;
     }
     return 0;
 }
@@ -67,6 +69,8 @@ static void parse_waveformatex(AVIOContext *pb, AVCodecContext *c)
 
     ff_get_guid(pb, &subformat);
     if (!memcmp(subformat + 4,
+                (const uint8_t[]){ FF_AMBISONIC_BASE_GUID }, 12) ||
+        !memcmp(subformat + 4,
                 (const uint8_t[]){ FF_MEDIASUBTYPE_BASE_GUID }, 12)) {
         c->codec_tag = AV_RL32(subformat);
         c->codec_id  = ff_wav_codec_get_id(c->codec_tag,
@@ -81,25 +85,31 @@ static void parse_waveformatex(AVIOContext *pb, AVCodecContext *c)
 }
 
 /* "big_endian" values are needed for RIFX file format */
-int ff_get_wav_header(AVIOContext *pb, AVCodecContext *codec, int size, int big_endian)
+int ff_get_wav_header(AVFormatContext *s, AVIOContext *pb,
+                      AVCodecContext *codec, int size, int big_endian)
 {
     int id;
+    uint64_t bitrate = 0;
 
-    if (size < 14)
+    if (size < 14) {
         avpriv_request_sample(codec, "wav header size < 14");
+        return AVERROR_INVALIDDATA;
+    }
 
     codec->codec_type  = AVMEDIA_TYPE_AUDIO;
     if (!big_endian) {
         id                 = avio_rl16(pb);
-        codec->channels    = avio_rl16(pb);
-        codec->sample_rate = avio_rl32(pb);
-        codec->bit_rate    = avio_rl32(pb) * 8;
-        codec->block_align = avio_rl16(pb);
+        if (id != 0x0165) {
+            codec->channels    = avio_rl16(pb);
+            codec->sample_rate = avio_rl32(pb);
+            bitrate            = avio_rl32(pb) * 8LL;
+            codec->block_align = avio_rl16(pb);
+        }
     } else {
         id                 = avio_rb16(pb);
         codec->channels    = avio_rb16(pb);
         codec->sample_rate = avio_rb32(pb);
-        codec->bit_rate    = avio_rb32(pb) * 8;
+        bitrate            = avio_rb32(pb) * 8LL;
         codec->block_align = avio_rb16(pb);
     }
     if (size == 14) {  /* We're dealing with plain vanilla WAVEFORMAT */
@@ -118,7 +128,7 @@ int ff_get_wav_header(AVIOContext *pb, AVCodecContext *codec, int size, int big_
         codec->codec_id  = ff_wav_codec_get_id(id,
                                                codec->bits_per_coded_sample);
     }
-    if (size >= 18) {  /* We're obviously dealing with WAVEFORMATEX */
+    if (size >= 18 && id != 0x0165) {  /* We're obviously dealing with WAVEFORMATEX */
         int cbSize = avio_rl16(pb); /* cbSize */
         if (big_endian) {
             avpriv_report_missing_feature(codec, "WAVEFORMATEX support for RIFX files\n");
@@ -141,9 +151,27 @@ int ff_get_wav_header(AVIOContext *pb, AVCodecContext *codec, int size, int big_
         /* It is possible for the chunk to contain garbage at the end */
         if (size > 0)
             avio_skip(pb, size);
+    } else if (id == 0x0165 && size >= 32) {
+        int nb_streams, i;
+
+        size -= 4;
+        av_freep(&codec->extradata);
+        if (ff_get_extradata(codec, pb, size) < 0)
+            return AVERROR(ENOMEM);
+        nb_streams         = AV_RL16(codec->extradata + 4);
+        codec->sample_rate = AV_RL32(codec->extradata + 12);
+        codec->channels    = 0;
+        bitrate            = 0;
+        if (size < 8 + nb_streams * 20)
+            return AVERROR_INVALIDDATA;
+        for (i = 0; i < nb_streams; i++)
+            codec->channels += codec->extradata[8 + i * 20 + 17];
     }
+
+    codec->bit_rate = bitrate;
+
     if (codec->sample_rate <= 0) {
-        av_log(NULL, AV_LOG_ERROR,
+        av_log(s, AV_LOG_ERROR,
                "Invalid sample rate: %d\n", codec->sample_rate);
         return AVERROR_INVALIDDATA;
     }
@@ -251,6 +279,9 @@ int ff_read_riff_info(AVFormatContext *s, int64_t size)
         }
 
         AV_WL32(key, chunk_code);
+        // Work around VC++ 2015 Update 1 code-gen bug:
+        // https://connect.microsoft.com/VisualStudio/feedback/details/2291638
+        key[4] = 0;
 
         if (avio_read(pb, value, chunk_size) != chunk_size) {
             av_log(s, AV_LOG_WARNING,
diff --git a/libavformat/riffenc.c b/libavformat/riffenc.c
index 85c953f2..ceb27f27 100644
--- a/libavformat/riffenc.c
+++ b/libavformat/riffenc.c
@@ -168,8 +168,9 @@ int ff_put_wav_header(AVIOContext *pb, AVCodecContext *enc, int flags)
     }
     /* write WAVEFORMATEXTENSIBLE extensions */
     if (waveformatextensible) {
-        int write_channel_mask = enc->strict_std_compliance < FF_COMPLIANCE_NORMAL ||
-                                 enc->channel_layout < 0x40000;
+        int write_channel_mask = !(flags & FF_PUT_WAV_HEADER_SKIP_CHANNELMASK) &&
+                                 (enc->strict_std_compliance < FF_COMPLIANCE_NORMAL ||
+                                  enc->channel_layout < 0x40000);
         /* 22 is WAVEFORMATEXTENSIBLE size */
         avio_wl16(pb, riff_extradata - riff_extradata_start + 22);
         /* ValidBitsPerSample || SamplesPerBlock || Reserved */
diff --git a/libavformat/rl2.c b/libavformat/rl2.c
index d354339e..50170166 100644
--- a/libavformat/rl2.c
+++ b/libavformat/rl2.c
@@ -240,7 +240,7 @@ static int rl2_read_packet(AVFormatContext *s,
     /** fill the packet */
     ret = av_get_packet(pb, pkt, sample->size);
     if(ret != sample->size){
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return AVERROR(EIO);
     }
 
diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
index 832f15da..1a61b10d 100644
--- a/libavformat/rmdec.c
+++ b/libavformat/rmdec.c
@@ -63,6 +63,7 @@ typedef struct RMDemuxContext {
     int remaining_len;
     int audio_stream_num; ///< Stream number for audio packets
     int audio_pkt_cnt; ///< Output packet counter
+    int data_end;
 } RMDemuxContext;
 
 static int rm_read_close(AVFormatContext *s);
@@ -120,7 +121,7 @@ RMStream *ff_rm_alloc_rmstream (void)
 
 void ff_rm_free_rmstream (RMStream *rms)
 {
-    av_free_packet(&rms->pkt);
+    av_packet_unref(&rms->pkt);
 }
 
 static int rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
@@ -220,7 +221,7 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
                 if (version == 5)
                     avio_r8(pb);
                 codecdata_length = avio_rb32(pb);
-                if(codecdata_length + FF_INPUT_BUFFER_PADDING_SIZE <= (unsigned)codecdata_length){
+                if(codecdata_length + AV_INPUT_BUFFER_PADDING_SIZE <= (unsigned)codecdata_length){
                     av_log(s, AV_LOG_ERROR, "codecdata_length too large\n");
                     return -1;
                 }
@@ -250,7 +251,7 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVIOContext *pb,
             if (version == 5)
                 avio_r8(pb);
             codecdata_length = avio_rb32(pb);
-            if(codecdata_length + FF_INPUT_BUFFER_PADDING_SIZE <= (unsigned)codecdata_length){
+            if(codecdata_length + AV_INPUT_BUFFER_PADDING_SIZE <= (unsigned)codecdata_length){
                 av_log(s, AV_LOG_ERROR, "codecdata_length too large\n");
                 return -1;
             }
@@ -327,20 +328,6 @@ int ff_rm_read_mdpr_codecdata(AVFormatContext *s, AVIOContext *pb,
     codec_pos = avio_tell(pb);
     v = avio_rb32(pb);
 
-    if (v == MKBETAG('M', 'L', 'T', 'I')) {
-        int number_of_streams = avio_rb16(pb);
-        int number_of_mdpr;
-        int i;
-        for (i = 0; i<number_of_streams; i++)
-            avio_rb16(pb);
-        number_of_mdpr = avio_rb16(pb);
-        if (number_of_mdpr != 1) {
-            avpriv_request_sample(s, "MLTI with multiple MDPR");
-        }
-        avio_rb32(pb);
-        v = avio_rb32(pb);
-    }
-
     if (v == MKTAG(0xfd, 'a', 'r', '.')) {
         /* ra type header */
         if (rm_read_audio_stream_info(s, pb, st, rst, 0))
@@ -502,6 +489,47 @@ static int rm_read_header_old(AVFormatContext *s)
     return rm_read_audio_stream_info(s, s->pb, st, st->priv_data, 1);
 }
 
+static int rm_read_multi(AVFormatContext *s, AVIOContext *pb,
+                         AVStream *st, char *mime)
+{
+    int number_of_streams = avio_rb16(pb);
+    int number_of_mdpr;
+    int i, ret;
+    unsigned size2;
+    for (i = 0; i<number_of_streams; i++)
+        avio_rb16(pb);
+    number_of_mdpr = avio_rb16(pb);
+    if (number_of_mdpr != 1) {
+        avpriv_request_sample(s, "MLTI with multiple (%d) MDPR", number_of_mdpr);
+    }
+    for (i = 0; i < number_of_mdpr; i++) {
+        AVStream *st2;
+        if (i > 0) {
+            st2 = avformat_new_stream(s, NULL);
+            if (!st2) {
+                ret = AVERROR(ENOMEM);
+                return ret;
+            }
+            st2->id = st->id + (i<<16);
+            st2->codec->bit_rate = st->codec->bit_rate;
+            st2->start_time = st->start_time;
+            st2->duration   = st->duration;
+            st2->codec->codec_type = AVMEDIA_TYPE_DATA;
+            st2->priv_data = ff_rm_alloc_rmstream();
+            if (!st2->priv_data)
+                return AVERROR(ENOMEM);
+        } else
+            st2 = st;
+
+        size2 = avio_rb32(pb);
+        ret = ff_rm_read_mdpr_codecdata(s, s->pb, st2, st2->priv_data,
+                                        size2, mime);
+        if (ret < 0)
+            return ret;
+    }
+    return 0;
+}
+
 static int rm_read_header(AVFormatContext *s)
 {
     RMDemuxContext *rm = s->priv_data;
@@ -514,6 +542,8 @@ static int rm_read_header(AVFormatContext *s)
     char buf[128], mime[128];
     int flags = 0;
     int ret = -1;
+    unsigned size, v;
+    int64_t codec_pos;
 
     tag = avio_rl32(pb);
     if (tag == MKTAG('.', 'r', 'a', 0xfd)) {
@@ -584,9 +614,24 @@ static int rm_read_header(AVFormatContext *s)
             st->priv_data = ff_rm_alloc_rmstream();
             if (!st->priv_data)
                 return AVERROR(ENOMEM);
-            if (ff_rm_read_mdpr_codecdata(s, s->pb, st, st->priv_data,
-                                          avio_rb32(pb), mime) < 0)
-                goto fail;
+
+            size = avio_rb32(pb);
+            codec_pos = avio_tell(pb);
+
+            ffio_ensure_seekback(pb, 4);
+            v = avio_rb32(pb);
+            if (v == MKBETAG('M', 'L', 'T', 'I')) {
+                ret = rm_read_multi(s, s->pb, st, mime);
+                if (ret < 0)
+                    goto fail;
+                avio_seek(pb, codec_pos + size, SEEK_SET);
+            } else {
+                avio_skip(pb, -4);
+                if (ff_rm_read_mdpr_codecdata(s, s->pb, st, st->priv_data,
+                                              size, mime) < 0)
+                    goto fail;
+            }
+
             break;
         case MKTAG('D', 'A', 'T', 'A'):
             goto header_end;
@@ -644,9 +689,11 @@ static int rm_sync(AVFormatContext *s, int64_t *timestamp, int *flags, int *stre
 
     while(!avio_feof(pb)){
         int len, num, i;
+        int mlti_id;
         *pos= avio_tell(pb) - 3;
         if(rm->remaining_len > 0){
             num= rm->current_stream;
+            mlti_id = 0;
             len= rm->remaining_len;
             *timestamp = AV_NOPTS_VALUE;
             *flags= 0;
@@ -682,12 +729,13 @@ static int rm_sync(AVFormatContext *s, int64_t *timestamp, int *flags, int *stre
 
             num = avio_rb16(pb);
             *timestamp = avio_rb32(pb);
-            avio_r8(pb); /* reserved */
+            mlti_id = (avio_r8(pb)>>1)-1<<16;
+            mlti_id = FFMAX(mlti_id, 0);
             *flags = avio_r8(pb); /* flags */
         }
         for(i=0;i<s->nb_streams;i++) {
             st = s->streams[i];
-            if (num == st->id)
+            if (mlti_id + num == st->id)
                 break;
         }
         if (i == s->nb_streams) {
@@ -746,7 +794,7 @@ static int rm_assemble_video_frame(AVFormatContext *s, AVIOContext *pb,
         AV_WL32(pkt->data + 1, 1);
         AV_WL32(pkt->data + 5, 0);
         if ((ret = avio_read(pb, pkt->data + 9, len)) != len) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             av_log(s, AV_LOG_ERROR, "Failed to read %d bytes\n", len);
             return ret < 0 ? ret : AVERROR(EIO);
         }
@@ -762,7 +810,7 @@ static int rm_assemble_video_frame(AVFormatContext *s, AVIOContext *pb,
         }
         vst->slices = ((hdr & 0x3F) << 1) + 1;
         vst->videobufsize = len2 + 8*vst->slices + 1;
-        av_free_packet(&vst->pkt); //FIXME this should be output.
+        av_packet_unref(&vst->pkt); //FIXME this should be output.
         if(av_new_packet(&vst->pkt, vst->videobufsize) < 0)
             return AVERROR(ENOMEM);
         memset(vst->pkt.data, 0, vst->pkt.size);
@@ -797,11 +845,6 @@ static int rm_assemble_video_frame(AVFormatContext *s, AVIOContext *pb,
         vst->pkt.data= NULL;
         vst->pkt.size= 0;
         vst->pkt.buf = NULL;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-        vst->pkt.destruct = NULL;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
         if(vst->slices != vst->cur_slice) //FIXME find out how to set slices correct from the begin
             memmove(pkt->data + 1 + 8*vst->cur_slice, pkt->data + 1 + 8*vst->slices,
                 vst->videobufpos - 1 - 8*vst->slices);
@@ -1015,7 +1058,7 @@ static int rm_read_packet(AVFormatContext *s, AVPacket *pkt)
 
         if(  (st->discard >= AVDISCARD_NONKEY && !(flags&2))
            || st->discard >= AVDISCARD_ALL){
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
         } else
             break;
     }
@@ -1123,3 +1166,239 @@ AVInputFormat ff_rdt_demuxer = {
     .read_close     = rm_read_close,
     .flags          = AVFMT_NOFILE,
 };
+
+static int ivr_probe(AVProbeData *p)
+{
+    if (memcmp(p->buf, ".R1M\x0\x1\x1", 7) &&
+        memcmp(p->buf, ".REC", 4))
+        return 0;
+
+    return AVPROBE_SCORE_MAX;
+}
+
+static int ivr_read_header(AVFormatContext *s)
+{
+    unsigned tag, type, len, tlen, value;
+    int i, j, n, count, nb_streams = 0, ret;
+    uint8_t key[256], val[256];
+    AVIOContext *pb = s->pb;
+    AVStream *st;
+    int64_t pos, offset, temp;
+
+    pos = avio_tell(pb);
+    tag = avio_rl32(pb);
+    if (tag == MKTAG('.','R','1','M')) {
+        if (avio_rb16(pb) != 1)
+            return AVERROR_INVALIDDATA;
+        if (avio_r8(pb) != 1)
+            return AVERROR_INVALIDDATA;
+        len = avio_rb32(pb);
+        avio_skip(pb, len);
+        avio_skip(pb, 5);
+        temp = avio_rb64(pb);
+        while (!avio_feof(pb) && temp) {
+            offset = temp;
+            temp = avio_rb64(pb);
+        }
+        avio_skip(pb, offset - avio_tell(pb));
+        if (avio_r8(pb) != 1)
+            return AVERROR_INVALIDDATA;
+        len = avio_rb32(pb);
+        avio_skip(pb, len);
+        if (avio_r8(pb) != 2)
+            return AVERROR_INVALIDDATA;
+        avio_skip(pb, 16);
+        pos = avio_tell(pb);
+        tag = avio_rl32(pb);
+    }
+
+    if (tag != MKTAG('.','R','E','C'))
+        return AVERROR_INVALIDDATA;
+
+    if (avio_r8(pb) != 0)
+        return AVERROR_INVALIDDATA;
+    count = avio_rb32(pb);
+    for (i = 0; i < count; i++) {
+        if (avio_feof(pb))
+            return AVERROR_INVALIDDATA;
+
+        type = avio_r8(pb);
+        tlen = avio_rb32(pb);
+        avio_get_str(pb, tlen, key, sizeof(key));
+        len = avio_rb32(pb);
+        if (type == 5) {
+            avio_get_str(pb, len, val, sizeof(val));
+            av_log(s, AV_LOG_DEBUG, "%s = '%s'\n", key, val);
+        } else if (type == 4) {
+            av_log(s, AV_LOG_DEBUG, "%s = '0x", key);
+            for (j = 0; j < len; j++)
+                av_log(s, AV_LOG_DEBUG, "%X", avio_r8(pb));
+            av_log(s, AV_LOG_DEBUG, "'\n");
+        } else if (len == 4 && type == 3 && !strncmp(key, "StreamCount", tlen)) {
+            nb_streams = value = avio_rb32(pb);
+        } else if (len == 4 && type == 3) {
+            value = avio_rb32(pb);
+            av_log(s, AV_LOG_DEBUG, "%s = %d\n", key, value);
+        } else {
+            av_log(s, AV_LOG_DEBUG, "Skipping unsupported key: %s\n", key);
+            avio_skip(pb, len);
+        }
+    }
+
+    for (n = 0; n < nb_streams; n++) {
+        st = avformat_new_stream(s, NULL);
+        if (!st)
+            return AVERROR(ENOMEM);
+        st->priv_data = ff_rm_alloc_rmstream();
+        if (!st->priv_data)
+            return AVERROR(ENOMEM);
+
+        if (avio_r8(pb) != 1)
+            return AVERROR_INVALIDDATA;
+
+        count = avio_rb32(pb);
+        for (i = 0; i < count; i++) {
+            if (avio_feof(pb))
+                return AVERROR_INVALIDDATA;
+
+            type = avio_r8(pb);
+            tlen  = avio_rb32(pb);
+            avio_get_str(pb, tlen, key, sizeof(key));
+            len  = avio_rb32(pb);
+            if (type == 5) {
+                avio_get_str(pb, len, val, sizeof(val));
+                av_log(s, AV_LOG_DEBUG, "%s = '%s'\n", key, val);
+            } else if (type == 4 && !strncmp(key, "OpaqueData", tlen)) {
+                ret = ffio_ensure_seekback(pb, 4);
+                if (ret < 0)
+                    return ret;
+                if (avio_rb32(pb) == MKBETAG('M', 'L', 'T', 'I')) {
+                    ret = rm_read_multi(s, pb, st, NULL);
+                } else {
+                    avio_seek(pb, -4, SEEK_CUR);
+                    ret = ff_rm_read_mdpr_codecdata(s, pb, st, st->priv_data, len, NULL);
+                }
+
+                if (ret < 0)
+                    return ret;
+            } else if (type == 4) {
+                int j;
+
+                av_log(s, AV_LOG_DEBUG, "%s = '0x", key);
+                for (j = 0; j < len; j++)
+                    av_log(s, AV_LOG_DEBUG, "%X", avio_r8(pb));
+                av_log(s, AV_LOG_DEBUG, "'\n");
+            } else if (len == 4 && type == 3 && !strncmp(key, "Duration", tlen)) {
+                st->duration = avio_rb32(pb);
+            } else if (len == 4 && type == 3) {
+                value = avio_rb32(pb);
+                av_log(s, AV_LOG_DEBUG, "%s = %d\n", key, value);
+            } else {
+                av_log(s, AV_LOG_DEBUG, "Skipping unsupported key: %s\n", key);
+                avio_skip(pb, len);
+            }
+        }
+    }
+
+    if (avio_r8(pb) != 6)
+        return AVERROR_INVALIDDATA;
+    avio_skip(pb, 12);
+    avio_skip(pb, avio_rb64(pb) + pos - avio_tell(s->pb));
+    if (avio_r8(pb) != 8)
+        return AVERROR_INVALIDDATA;
+    avio_skip(pb, 8);
+
+    return 0;
+}
+
+static int ivr_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    RMDemuxContext *rm = s->priv_data;
+    int ret = AVERROR_EOF, opcode;
+    AVIOContext *pb = s->pb;
+    unsigned size, index;
+    int64_t pos, pts;
+
+    if (avio_feof(pb) || rm->data_end)
+        return AVERROR_EOF;
+
+    pos = avio_tell(pb);
+
+    for (;;) {
+        if (rm->audio_pkt_cnt) {
+            // If there are queued audio packet return them first
+            AVStream *st;
+
+            st = s->streams[rm->audio_stream_num];
+            ret = ff_rm_retrieve_cache(s, pb, st, st->priv_data, pkt);
+            if (ret < 0) {
+                return ret;
+            }
+        } else {
+            if (rm->remaining_len) {
+                avio_skip(pb, rm->remaining_len);
+                rm->remaining_len = 0;
+            }
+
+            if (avio_feof(pb))
+                return AVERROR_EOF;
+
+            opcode = avio_r8(pb);
+            if (opcode == 2) {
+                AVStream *st;
+                int seq = 1;
+
+                pts = avio_rb32(pb);
+                index = avio_rb16(pb);
+                if (index >= s->nb_streams)
+                    return AVERROR_INVALIDDATA;
+
+                avio_skip(pb, 4);
+                size = avio_rb32(pb);
+                avio_skip(pb, 4);
+
+                if (size < 1 || size > INT_MAX/4) {
+                    av_log(s, AV_LOG_ERROR, "size %u is invalid\n", size);
+                    return AVERROR_INVALIDDATA;
+                }
+
+                st = s->streams[index];
+                ret = ff_rm_parse_packet(s, pb, st, st->priv_data, size, pkt,
+                                         &seq, 0, pts);
+                if (ret < -1) {
+                    return ret;
+                } else if (ret) {
+                    continue;
+                }
+
+                pkt->pos = pos;
+                pkt->pts = pts;
+                pkt->stream_index = index;
+            } else if (opcode == 7) {
+                pos = avio_rb64(pb);
+                if (!pos) {
+                    rm->data_end = 1;
+                    return AVERROR_EOF;
+                }
+            } else {
+                av_log(s, AV_LOG_ERROR, "Unsupported opcode=%d at %"PRIX64"\n", opcode, avio_tell(pb) - 1);
+                return AVERROR(EIO);
+            }
+        }
+
+        break;
+    }
+
+    return ret;
+}
+
+AVInputFormat ff_ivr_demuxer = {
+    .name           = "ivr",
+    .long_name      = NULL_IF_CONFIG_SMALL("IVR (Internet Video Recording)"),
+    .priv_data_size = sizeof(RMDemuxContext),
+    .read_probe     = ivr_probe,
+    .read_header    = ivr_read_header,
+    .read_packet    = ivr_read_packet,
+    .read_close     = rm_read_close,
+    .extensions     = "ivr",
+};
diff --git a/libavformat/rmenc.c b/libavformat/rmenc.c
index b39d9917..33eaf636 100644
--- a/libavformat/rmenc.c
+++ b/libavformat/rmenc.c
@@ -185,9 +185,10 @@ static int rv10_write_header(AVFormatContext *ctx,
 
         if (stream->enc->codec_type == AVMEDIA_TYPE_AUDIO) {
             int coded_frame_size, fscode, sample_rate;
+            int frame_size = av_get_audio_frame_duration(stream->enc, 0);
             sample_rate = stream->enc->sample_rate;
             coded_frame_size = (stream->enc->bit_rate *
-                                stream->enc->frame_size) / (8 * sample_rate);
+                                frame_size) / (8 * sample_rate);
             /* audio codec info */
             avio_write(s, ".ra", 3);
             avio_w8(s, 0xfd);
@@ -320,6 +321,7 @@ static int rm_write_header(AVFormatContext *s)
 
     for(n=0;n<s->nb_streams;n++) {
         AVStream *st = s->streams[n];
+        int frame_size;
 
         s->streams[n]->id = n;
         codec = s->streams[n]->codec;
@@ -332,7 +334,8 @@ static int rm_write_header(AVFormatContext *s)
         switch(codec->codec_type) {
         case AVMEDIA_TYPE_AUDIO:
             rm->audio_stream = stream;
-            stream->frame_rate = (AVRational){codec->sample_rate, codec->frame_size};
+            frame_size = av_get_audio_frame_duration(codec, 0);
+            stream->frame_rate = (AVRational){codec->sample_rate, frame_size};
             /* XXX: dummy values */
             stream->packet_max_size = 1024;
             stream->nb_packets = 0;
diff --git a/libavformat/rpl.c b/libavformat/rpl.c
index 04cb917a..76c385bd 100644
--- a/libavformat/rpl.c
+++ b/libavformat/rpl.c
@@ -311,7 +311,7 @@ static int rpl_read_packet(AVFormatContext *s, AVPacket *pkt)
         if (ret < 0)
             return ret;
         if (ret != frame_size) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return AVERROR(EIO);
         }
         pkt->duration = 1;
@@ -328,7 +328,7 @@ static int rpl_read_packet(AVFormatContext *s, AVPacket *pkt)
         if (ret < 0)
             return ret;
         if (ret != index_entry->size) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return AVERROR(EIO);
         }
 
diff --git a/libavformat/rsd.c b/libavformat/rsd.c
index 1eff5de7..dd1f3723 100644
--- a/libavformat/rsd.c
+++ b/libavformat/rsd.c
@@ -26,19 +26,19 @@
 #include "internal.h"
 
 static const AVCodecTag rsd_tags[] = {
-    { AV_CODEC_ID_ADPCM_THP,       MKTAG('G','A','D','P') },
+    { AV_CODEC_ID_ADPCM_PSX,       MKTAG('V','A','G',' ') },
+    { AV_CODEC_ID_ADPCM_THP_LE,    MKTAG('G','A','D','P') },
+    { AV_CODEC_ID_ADPCM_THP,       MKTAG('W','A','D','P') },
     { AV_CODEC_ID_ADPCM_IMA_RAD,   MKTAG('R','A','D','P') },
+    { AV_CODEC_ID_ADPCM_IMA_WAV,   MKTAG('X','A','D','P') },
     { AV_CODEC_ID_PCM_S16BE,       MKTAG('P','C','M','B') },
     { AV_CODEC_ID_PCM_S16LE,       MKTAG('P','C','M',' ') },
+    { AV_CODEC_ID_XMA2,            MKTAG('X','M','A',' ') },
     { AV_CODEC_ID_NONE, 0 },
 };
 
 static const uint32_t rsd_unsupported_tags[] = {
     MKTAG('O','G','G',' '),
-    MKTAG('V','A','G',' '),
-    MKTAG('W','A','D','P'),
-    MKTAG('X','A','D','P'),
-    MKTAG('X','M','A',' '),
 };
 
 static int rsd_probe(AVProbeData *p)
@@ -55,7 +55,7 @@ static int rsd_probe(AVProbeData *p)
 static int rsd_read_header(AVFormatContext *s)
 {
     AVIOContext *pb = s->pb;
-    int i, version, start = 0x800;
+    int i, ret, version, start = 0x800;
     AVCodecContext *codec;
     AVStream *st = avformat_new_stream(s, NULL);
 
@@ -95,25 +95,56 @@ static int rsd_read_header(AVFormatContext *s)
     avio_skip(pb, 4); // Unknown
 
     switch (codec->codec_id) {
+    case AV_CODEC_ID_XMA2:
+        codec->block_align = 2048;
+        ff_alloc_extradata(codec, 34);
+        if (!codec->extradata)
+            return AVERROR(ENOMEM);
+        memset(codec->extradata, 0, 34);
+        break;
+    case AV_CODEC_ID_ADPCM_PSX:
+        codec->block_align = 16 * codec->channels;
+        if (pb->seekable)
+            st->duration = av_get_audio_frame_duration(codec, avio_size(pb) - start);
+        break;
     case AV_CODEC_ID_ADPCM_IMA_RAD:
         codec->block_align = 20 * codec->channels;
         if (pb->seekable)
             st->duration = av_get_audio_frame_duration(codec, avio_size(pb) - start);
         break;
-    case AV_CODEC_ID_ADPCM_THP:
+    case AV_CODEC_ID_ADPCM_IMA_WAV:
+        if (version == 2)
+            start = avio_rl32(pb);
+
+        codec->bits_per_coded_sample = 4;
+        codec->block_align = 36 * codec->channels;
+        if (pb->seekable)
+            st->duration = av_get_audio_frame_duration(codec, avio_size(pb) - start);
+        break;
+    case AV_CODEC_ID_ADPCM_THP_LE:
         /* RSD3GADP is mono, so only alloc enough memory
            to store the coeff table for a single channel. */
 
         start = avio_rl32(pb);
 
-        if (ff_get_extradata(codec, s->pb, 32) < 0)
-            return AVERROR(ENOMEM);
+        if ((ret = ff_get_extradata(codec, s->pb, 32)) < 0)
+            return ret;
+        if (pb->seekable)
+            st->duration = av_get_audio_frame_duration(codec, avio_size(pb) - start);
+        break;
+    case AV_CODEC_ID_ADPCM_THP:
+        codec->block_align = 8 * codec->channels;
+        avio_skip(s->pb, 0x1A4 - avio_tell(s->pb));
 
-        for (i = 0; i < 16; i++)
-            AV_WB16(codec->extradata + i * 2, AV_RL16(codec->extradata + i * 2));
+        if ((ret = ff_alloc_extradata(st->codec, 32 * st->codec->channels)) < 0)
+            return ret;
 
+        for (i = 0; i < st->codec->channels; i++) {
+            avio_read(s->pb, st->codec->extradata + 32 * i, 32);
+            avio_skip(s->pb, 8);
+        }
         if (pb->seekable)
-            st->duration = (avio_size(pb) - start) / 8 * 14;
+            st->duration = (avio_size(pb) - start) / (8 * st->codec->channels) * 14;
         break;
     case AV_CODEC_ID_PCM_S16LE:
     case AV_CODEC_ID_PCM_S16BE:
@@ -126,6 +157,10 @@ static int rsd_read_header(AVFormatContext *s)
     }
 
     avio_skip(pb, start - avio_tell(pb));
+    if (codec->codec_id == AV_CODEC_ID_XMA2) {
+        avio_skip(pb, avio_rb32(pb) + avio_rb32(pb));
+        st->duration = avio_rb32(pb);
+    }
 
     avpriv_set_pts_info(st, 64, 1, codec->sample_rate);
 
@@ -136,22 +171,39 @@ static int rsd_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     AVCodecContext *codec = s->streams[0]->codec;
     int ret, size = 1024;
+    int64_t pos;
 
     if (avio_feof(s->pb))
         return AVERROR_EOF;
 
-    if (codec->codec_id == AV_CODEC_ID_ADPCM_IMA_RAD)
+    pos = avio_tell(s->pb);
+    if (codec->codec_id == AV_CODEC_ID_ADPCM_IMA_RAD ||
+        codec->codec_id == AV_CODEC_ID_ADPCM_PSX     ||
+        codec->codec_id == AV_CODEC_ID_ADPCM_IMA_WAV ||
+        codec->codec_id == AV_CODEC_ID_XMA2) {
         ret = av_get_packet(s->pb, pkt, codec->block_align);
-    else
-        ret = av_get_packet(s->pb, pkt, size);
+    } else if (codec->codec_tag == MKTAG('W','A','D','P') &&
+               codec->channels > 1) {
+        int i, ch;
 
-    if (ret != size) {
-        if (ret < 0) {
-            av_free_packet(pkt);
+        ret = av_new_packet(pkt, codec->block_align);
+        if (ret < 0)
             return ret;
+        for (i = 0; i < 4; i++) {
+            for (ch = 0; ch < codec->channels; ch++) {
+                pkt->data[ch * 8 + i * 2 + 0] = avio_r8(s->pb);
+                pkt->data[ch * 8 + i * 2 + 1] = avio_r8(s->pb);
+            }
         }
-        av_shrink_packet(pkt, ret);
+        ret = 0;
+    } else {
+        ret = av_get_packet(s->pb, pkt, size);
     }
+
+    if (codec->codec_id == AV_CODEC_ID_XMA2 && pkt->size >= 1)
+        pkt->duration = (pkt->data[0] >> 2) * 512;
+
+    pkt->pos = pos;
     pkt->stream_index = 0;
 
     return ret;
@@ -165,4 +217,5 @@ AVInputFormat ff_rsd_demuxer = {
     .read_packet    =   rsd_read_packet,
     .extensions     =   "rsd",
     .codec_tag      =   (const AVCodecTag* const []){rsd_tags, 0},
+    .flags          =   AVFMT_GENERIC_INDEX,
 };
diff --git a/libavformat/rtmp.h b/libavformat/rtmp.h
index 8fc8040d..6600da74 100644
--- a/libavformat/rtmp.h
+++ b/libavformat/rtmp.h
@@ -29,9 +29,6 @@
 
 #define RTMP_HANDSHAKE_PACKET_SIZE 1536
 
-#define HMAC_IPAD_VAL 0x36
-#define HMAC_OPAD_VAL 0x5C
-
 /**
  * emulated Flash client version - 9.0.124.2 on Linux
  * @{
diff --git a/libavformat/rtmpcrypt.c b/libavformat/rtmpcrypt.c
index 2065ec66..811c74cd 100644
--- a/libavformat/rtmpcrypt.c
+++ b/libavformat/rtmpcrypt.c
@@ -50,38 +50,38 @@ typedef struct RTMPEContext {
 } RTMPEContext;
 
 static const uint8_t rtmpe8_keys[16][16] = {
-    { 0xbf, 0xf0, 0x34, 0xb2, 0x11, 0xd9, 0x08, 0x1f,
-      0xcc, 0xdf, 0xb7, 0x95, 0x74, 0x8d, 0xe7, 0x32 },
-    { 0x08, 0x6a, 0x5e, 0xb6, 0x17, 0x43, 0x09, 0x0e,
-      0x6e, 0xf0, 0x5a, 0xb8, 0xfe, 0x5a, 0x39, 0xe2 },
-    { 0x7b, 0x10, 0x95, 0x6f, 0x76, 0xce, 0x05, 0x21,
-      0x23, 0x88, 0xa7, 0x3a, 0x44, 0x01, 0x49, 0xa1 },
-    { 0xa9, 0x43, 0xf3, 0x17, 0xeb, 0xf1, 0x1b, 0xb2,
-      0xa6, 0x91, 0xa5, 0xee, 0x17, 0xf3, 0x63, 0x39 },
-    { 0x7a, 0x30, 0xe0, 0x0a, 0xb5, 0x29, 0xe2, 0x2c,
-      0xa0, 0x87, 0xae, 0xa5, 0xc0, 0xcb, 0x79, 0xac },
-    { 0xbd, 0xce, 0x0c, 0x23, 0x2f, 0xeb, 0xde, 0xff,
-      0x1c, 0xfa, 0xae, 0x16, 0x11, 0x23, 0x23, 0x9d },
-    { 0x55, 0xdd, 0x3f, 0x7b, 0x77, 0xe7, 0xe6, 0x2e,
-      0x9b, 0xb8, 0xc4, 0x99, 0xc9, 0x48, 0x1e, 0xe4 },
-    { 0x40, 0x7b, 0xb6, 0xb4, 0x71, 0xe8, 0x91, 0x36,
-      0xa7, 0xae, 0xbf, 0x55, 0xca, 0x33, 0xb8, 0x39 },
-    { 0xfc, 0xf6, 0xbd, 0xc3, 0xb6, 0x3c, 0x36, 0x97,
-      0x7c, 0xe4, 0xf8, 0x25, 0x04, 0xd9, 0x59, 0xb2 },
-    { 0x28, 0xe0, 0x91, 0xfd, 0x41, 0x95, 0x4c, 0x4c,
-      0x7f, 0xb7, 0xdb, 0x00, 0xe3, 0xa0, 0x66, 0xf8 },
-    { 0x57, 0x84, 0x5b, 0x76, 0x4f, 0x25, 0x1b, 0x03,
-      0x46, 0xd4, 0x5b, 0xcd, 0xa2, 0xc3, 0x0d, 0x29 },
-    { 0x0a, 0xcc, 0xee, 0xf8, 0xda, 0x55, 0xb5, 0x46,
-      0x03, 0x47, 0x34, 0x52, 0x58, 0x63, 0x71, 0x3b },
-    { 0xb8, 0x20, 0x75, 0xdc, 0xa7, 0x5f, 0x1f, 0xee,
-      0xd8, 0x42, 0x68, 0xe8, 0xa7, 0x2a, 0x44, 0xcc },
-    { 0x07, 0xcf, 0x6e, 0x9e, 0xa1, 0x6d, 0x7b, 0x25,
-      0x9f, 0xa7, 0xae, 0x6c, 0xd9, 0x2f, 0x56, 0x29 },
-    { 0xfe, 0xb1, 0xea, 0xe4, 0x8c, 0x8c, 0x3c, 0xe1,
-      0x4e, 0x00, 0x64, 0xa7, 0x6a, 0x38, 0x7c, 0x2a },
-    { 0x89, 0x3a, 0x94, 0x27, 0xcc, 0x30, 0x13, 0xa2,
-      0xf1, 0x06, 0x38, 0x5b, 0xa8, 0x29, 0xf9, 0x27 }
+    { 0xb2, 0x34, 0xf0, 0xbf, 0x1f, 0x08, 0xd9, 0x11,
+      0x95, 0xb7, 0xdf, 0xcc, 0x32, 0xe7, 0x8d, 0x74 },
+    { 0xb6, 0x5e, 0x6a, 0x08, 0x0e, 0x09, 0x43, 0x17,
+      0xb8, 0x5a, 0xf0, 0x6e, 0xe2, 0x39, 0x5a, 0xfe },
+    { 0x6f, 0x95, 0x10, 0x7b, 0x21, 0x05, 0xce, 0x76,
+      0x3a, 0xa7, 0x88, 0x23, 0xa1, 0x49, 0x01, 0x44 },
+    { 0x17, 0xf3, 0x43, 0xa9, 0xb2, 0x1b, 0xf1, 0xeb,
+      0xee, 0xa5, 0x91, 0xa6, 0x39, 0x63, 0xf3, 0x17 },
+    { 0x0a, 0xe0, 0x30, 0x7a, 0x2c, 0xe2, 0x29, 0xb5,
+      0xa5, 0xae, 0x87, 0xa0, 0xac, 0x79, 0xcb, 0xc0 },
+    { 0x23, 0x0c, 0xce, 0xbd, 0xff, 0xde, 0xeb, 0x2f,
+      0x16, 0xae, 0xfa, 0x1c, 0x9d, 0x23, 0x23, 0x11 },
+    { 0x7b, 0x3f, 0xdd, 0x55, 0x2e, 0xe6, 0xe7, 0x77,
+      0x99, 0xc4, 0xb8, 0x9b, 0xe4, 0x1e, 0x48, 0xc9 },
+    { 0xb4, 0xb6, 0x7b, 0x40, 0x36, 0x91, 0xe8, 0x71,
+      0x55, 0xbf, 0xae, 0xa7, 0x39, 0xb8, 0x33, 0xca },
+    { 0xc3, 0xbd, 0xf6, 0xfc, 0x97, 0x36, 0x3c, 0xb6,
+      0x25, 0xf8, 0xe4, 0x7c, 0xb2, 0x59, 0xd9, 0x04 },
+    { 0xfd, 0x91, 0xe0, 0x28, 0x4c, 0x4c, 0x95, 0x41,
+      0x00, 0xdb, 0xb7, 0x7f, 0xf8, 0x66, 0xa0, 0xe3 },
+    { 0x76, 0x5b, 0x84, 0x57, 0x03, 0x1b, 0x25, 0x4f,
+      0xcd, 0x5b, 0xd4, 0x46, 0x29, 0x0d, 0xc3, 0xa2 },
+    { 0xf8, 0xee, 0xcc, 0x0a, 0x46, 0xb5, 0x55, 0xda,
+      0x52, 0x34, 0x47, 0x03, 0x3b, 0x71, 0x63, 0x58 },
+    { 0xdc, 0x75, 0x20, 0xb8, 0xee, 0x1f, 0x5f, 0xa7,
+      0xe8, 0x68, 0x42, 0xd8, 0xcc, 0x44, 0x2a, 0xa7 },
+    { 0x9e, 0x6e, 0xcf, 0x07, 0x25, 0x7b, 0x6d, 0xa1,
+      0x6c, 0xae, 0xa7, 0x9f, 0x29, 0x56, 0x2f, 0xd9 },
+    { 0xe4, 0xea, 0xb1, 0xfe, 0xe1, 0x3c, 0x8c, 0x8c,
+      0xa7, 0x64, 0x00, 0x4e, 0x2a, 0x7c, 0x38, 0x6a },
+    { 0x27, 0x94, 0x3a, 0x89, 0xa2, 0x13, 0x30, 0xcc,
+      0x5b, 0x38, 0x06, 0xf1, 0x27, 0xf9, 0x29, 0xa8 }
 };
 
 static const uint8_t rtmpe9_keys[16][24] = {
@@ -185,8 +185,8 @@ static void rtmpe8_sig(const uint8_t *in, uint8_t *out, int key_id)
 {
     struct AVXTEA ctx;
 
-    av_xtea_init(&ctx, rtmpe8_keys[key_id]);
-    av_xtea_crypt(&ctx, out, in, 1, NULL, 0);
+    av_xtea_le_init(&ctx, rtmpe8_keys[key_id]);
+    av_xtea_le_crypt(&ctx, out, in, 1, NULL, 0);
 }
 
 static void rtmpe9_sig(const uint8_t *in, uint8_t *out, int key_id)
@@ -264,8 +264,9 @@ static int rtmpe_open(URLContext *h, const char *uri, int flags)
     }
 
     /* open the tcp or ffrtmphttp connection */
-    if ((ret = ffurl_open(&rt->stream, url, AVIO_FLAG_READ_WRITE,
-                          &h->interrupt_callback, NULL)) < 0) {
+    if ((ret = ffurl_open_whitelist(&rt->stream, url, AVIO_FLAG_READ_WRITE,
+                                    &h->interrupt_callback, NULL,
+                                    h->protocol_whitelist)) < 0) {
         rtmpe_close(h);
         return ret;
     }
diff --git a/libavformat/rtmpdh.c b/libavformat/rtmpdh.c
index 91b1349c..42ad72c6 100644
--- a/libavformat/rtmpdh.c
+++ b/libavformat/rtmpdh.c
@@ -97,7 +97,16 @@
         mpz_fdiv_r_2exp(bn, bn, num_bits);            \
     } while (0)
 #elif CONFIG_GCRYPT
-#define bn_new(bn)                  bn = gcry_mpi_new(1)
+#define bn_new(bn)                                              \
+    do {                                                        \
+        if (!gcry_control(GCRYCTL_INITIALIZATION_FINISHED_P)) { \
+            if (!gcry_check_version("1.5.4"))                   \
+                return AVERROR(EINVAL);                         \
+            gcry_control(GCRYCTL_DISABLE_SECMEM, 0);            \
+            gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);   \
+        }                                                       \
+        bn = gcry_mpi_new(1);                                   \
+    } while (0)
 #define bn_free(bn)                 gcry_mpi_release(bn)
 #define bn_set_word(bn, w)          gcry_mpi_set_ui(bn, w)
 #define bn_cmp(a, b)                gcry_mpi_cmp(a, b)
diff --git a/libavformat/rtmphttp.c b/libavformat/rtmphttp.c
index 0334ba55..8ed5eb19 100644
--- a/libavformat/rtmphttp.c
+++ b/libavformat/rtmphttp.c
@@ -254,7 +254,7 @@ static int rtmp_http_open(URLContext *h, const char *uri, int flags)
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 
 static const AVOption ffrtmphttp_options[] = {
-    {"ffrtmphttp_tls", "Use a HTTPS tunneling connection (RTMPTS).", OFFSET(tls), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, DEC},
+    {"ffrtmphttp_tls", "Use a HTTPS tunneling connection (RTMPTS).", OFFSET(tls), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DEC},
     { NULL },
 };
 
diff --git a/libavformat/rtmppkt.c b/libavformat/rtmppkt.c
index c474fb3d..0d693c27 100644
--- a/libavformat/rtmppkt.c
+++ b/libavformat/rtmppkt.c
@@ -440,6 +440,7 @@ int ff_amf_tag_size(const uint8_t *data, const uint8_t *data_end)
     case AMF_DATA_TYPE_STRING:      return 3 + AV_RB16(data);
     case AMF_DATA_TYPE_LONG_STRING: return 5 + AV_RB32(data);
     case AMF_DATA_TYPE_NULL:        return 1;
+    case AMF_DATA_TYPE_DATE:        return 11;
     case AMF_DATA_TYPE_ARRAY:
         parse_key = 0;
     case AMF_DATA_TYPE_MIXEDARRAY:
diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 43ddfe8e..a5485ab9 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -27,12 +27,12 @@
 #include "libavcodec/bytestream.h"
 #include "libavutil/avstring.h"
 #include "libavutil/base64.h"
+#include "libavutil/hmac.h"
 #include "libavutil/intfloat.h"
 #include "libavutil/lfg.h"
 #include "libavutil/md5.h"
 #include "libavutil/opt.h"
 #include "libavutil/random_seed.h"
-#include "libavutil/sha.h"
 #include "avformat.h"
 #include "internal.h"
 
@@ -49,8 +49,8 @@
 #endif
 
 #define APP_MAX_LENGTH 1024
-#define PLAYPATH_MAX_LENGTH 256
-#define TCURL_MAX_LENGTH 512
+#define PLAYPATH_MAX_LENGTH 512
+#define TCURL_MAX_LENGTH 1024
 #define FLASHVER_MAX_LENGTH 64
 #define RTMP_PKTDATA_DEFAULT_SIZE 4096
 #define RTMP_HEADER 11
@@ -956,41 +956,22 @@ static int gen_fcsubscribe_stream(URLContext *s, RTMPContext *rt,
 int ff_rtmp_calc_digest(const uint8_t *src, int len, int gap,
                         const uint8_t *key, int keylen, uint8_t *dst)
 {
-    struct AVSHA *sha;
-    uint8_t hmac_buf[64+32] = {0};
-    int i;
+    AVHMAC *hmac;
 
-    sha = av_sha_alloc();
-    if (!sha)
+    hmac = av_hmac_alloc(AV_HMAC_SHA256);
+    if (!hmac)
         return AVERROR(ENOMEM);
 
-    if (keylen < 64) {
-        memcpy(hmac_buf, key, keylen);
-    } else {
-        av_sha_init(sha, 256);
-        av_sha_update(sha,key, keylen);
-        av_sha_final(sha, hmac_buf);
-    }
-    for (i = 0; i < 64; i++)
-        hmac_buf[i] ^= HMAC_IPAD_VAL;
-
-    av_sha_init(sha, 256);
-    av_sha_update(sha, hmac_buf, 64);
+    av_hmac_init(hmac, key, keylen);
     if (gap <= 0) {
-        av_sha_update(sha, src, len);
+        av_hmac_update(hmac, src, len);
     } else { //skip 32 bytes used for storing digest
-        av_sha_update(sha, src, gap);
-        av_sha_update(sha, src + gap + 32, len - gap - 32);
+        av_hmac_update(hmac, src, gap);
+        av_hmac_update(hmac, src + gap + 32, len - gap - 32);
     }
-    av_sha_final(sha, hmac_buf + 64);
-
-    for (i = 0; i < 64; i++)
-        hmac_buf[i] ^= HMAC_IPAD_VAL ^ HMAC_OPAD_VAL; //reuse XORed key for opad
-    av_sha_init(sha, 256);
-    av_sha_update(sha, hmac_buf, 64+32);
-    av_sha_final(sha, dst);
+    av_hmac_final(hmac, dst, 32);
 
-    av_free(sha);
+    av_hmac_free(hmac);
 
     return 0;
 }
@@ -1137,8 +1118,9 @@ static int rtmp_calc_swfhash(URLContext *s)
     int ret = 0;
 
     /* Get the SWF player file. */
-    if ((ret = ffurl_open(&stream, rt->swfverify, AVIO_FLAG_READ,
-                          &s->interrupt_callback, NULL)) < 0) {
+    if ((ret = ffurl_open_whitelist(&stream, rt->swfverify, AVIO_FLAG_READ,
+                                    &s->interrupt_callback, NULL,
+                                    s->protocol_whitelist)) < 0) {
         av_log(s, AV_LOG_ERROR, "Cannot open connection %s.\n", rt->swfverify);
         goto fail;
     }
@@ -2237,7 +2219,7 @@ static int append_flv_data(RTMPContext *rt, RTMPPacket *pkt, int skip)
     bytestream2_put_byte(&pbc, ts >> 24);
     bytestream2_put_be24(&pbc, 0);
     bytestream2_put_buffer(&pbc, data, size);
-    bytestream2_put_be32(&pbc, 0);
+    bytestream2_put_be32(&pbc, size + RTMP_HEADER);
 
     return 0;
 }
@@ -2387,8 +2369,9 @@ static int handle_metadata(RTMPContext *rt, RTMPPacket *pkt)
         bytestream_put_be24(&p, ts);
         bytestream_put_byte(&p, ts >> 24);
         memcpy(p, next, size + 3 + 4);
+        p    += size + 3;
+        bytestream_put_be32(&p, size + RTMP_HEADER);
         next += size + 3 + 4;
-        p    += size + 3 + 4;
     }
     if (p != rt->flv_data + rt->flv_size) {
         av_log(NULL, AV_LOG_WARNING, "Incomplete flv packets in "
@@ -2578,7 +2561,7 @@ static int inject_fake_duration_metadata(RTMPContext *rt)
     // Finalise object
     bytestream_put_be16(&p, 0); // Empty string
     bytestream_put_byte(&p, AMF_END_OF_OBJECT);
-    bytestream_put_be32(&p, 40); // size of data part (sum of all parts below)
+    bytestream_put_be32(&p, 40 + RTMP_HEADER); // size of data part (sum of all parts above)
 
     return 0;
 }
@@ -2665,8 +2648,9 @@ static int rtmp_open(URLContext *s, const char *uri, int flags)
     }
 
 reconnect:
-    if ((ret = ffurl_open(&rt->stream, buf, AVIO_FLAG_READ_WRITE,
-                          &s->interrupt_callback, &opts)) < 0) {
+    if ((ret = ffurl_open_whitelist(&rt->stream, buf, AVIO_FLAG_READ_WRITE,
+                                    &s->interrupt_callback, &opts,
+                                    s->protocol_whitelist)) < 0) {
         av_log(s , AV_LOG_ERROR, "Cannot open connection %s\n", buf);
         goto fail;
     }
@@ -2699,8 +2683,8 @@ static int rtmp_open(URLContext *s, const char *uri, int flags)
     qmark = strchr(path, '?');
     if (qmark && strstr(qmark, "slist=")) {
         char* amp;
-        // After slist we have the playpath, before the params, the app
-        av_strlcpy(rt->app, path + 1, FFMIN(qmark - path, APP_MAX_LENGTH));
+        // After slist we have the playpath, the full path is used as app
+        av_strlcpy(rt->app, path + 1, APP_MAX_LENGTH);
         fname = strstr(path, "slist=") + 6;
         // Strip any further query parameters from fname
         amp = strchr(fname, '&');
diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c
index fee9547e..c3e50d44 100644
--- a/libavformat/rtpdec.c
+++ b/libavformat/rtpdec.c
@@ -520,6 +520,10 @@ RTPDemuxContext *ff_rtp_parse_open(AVFormatContext *s1, AVStream *st,
     s->ic                  = s1;
     s->st                  = st;
     s->queue_size          = queue_size;
+
+    av_log(s->st ? s->st->codec : NULL, AV_LOG_VERBOSE,
+            "setting jitter buffer size to %d\n", s->queue_size);
+
     rtp_init_statistics(&s->statistics, 0);
     if (st) {
         switch (st->codec->codec_id) {
@@ -687,7 +691,7 @@ void ff_rtp_reset_packet_queue(RTPDemuxContext *s)
     s->prev_ret  = 0;
 }
 
-static void enqueue_packet(RTPDemuxContext *s, uint8_t *buf, int len)
+static int enqueue_packet(RTPDemuxContext *s, uint8_t *buf, int len)
 {
     uint16_t seq   = AV_RB16(buf + 2);
     RTPPacket **cur = &s->queue, *packet;
@@ -702,7 +706,7 @@ static void enqueue_packet(RTPDemuxContext *s, uint8_t *buf, int len)
 
     packet = av_mallocz(sizeof(*packet));
     if (!packet)
-        return;
+        return AVERROR(ENOMEM);
     packet->recvtime = av_gettime_relative();
     packet->seq      = seq;
     packet->len      = len;
@@ -710,6 +714,8 @@ static void enqueue_packet(RTPDemuxContext *s, uint8_t *buf, int len)
     packet->next     = *cur;
     *cur = packet;
     s->queue_len++;
+
+    return 0;
 }
 
 static int has_next_packet(RTPDemuxContext *s)
@@ -807,12 +813,17 @@ static int rtp_parse_one_packet(RTPDemuxContext *s, AVPacket *pkt,
             return rv;
         } else {
             /* Still missing some packet, enqueue this one. */
-            enqueue_packet(s, buf, len);
+            rv = enqueue_packet(s, buf, len);
+            if (rv < 0)
+                return rv;
             *bufptr = NULL;
             /* Return the first enqueued packet if the queue is full,
              * even if we're missing something */
-            if (s->queue_len >= s->queue_size)
+            if (s->queue_len >= s->queue_size) {
+                av_log(s->st ? s->st->codec : NULL, AV_LOG_WARNING,
+                       "jitter buffer full\n");
                 return rtp_parse_queued_packet(s, pkt);
+            }
             return -1;
         }
     }
diff --git a/libavformat/rtpdec.h b/libavformat/rtpdec.h
index 96cbb5e5..77596b67 100644
--- a/libavformat/rtpdec.h
+++ b/libavformat/rtpdec.h
@@ -35,7 +35,7 @@ typedef struct RTPDynamicProtocolHandler RTPDynamicProtocolHandler;
 #define RTP_MIN_PACKET_LENGTH 12
 #define RTP_MAX_PACKET_LENGTH 8192
 
-#define RTP_REORDER_QUEUE_DEFAULT_SIZE 10
+#define RTP_REORDER_QUEUE_DEFAULT_SIZE 500
 
 #define RTP_NOTS_VALUE ((uint32_t)-1)
 
diff --git a/libavformat/rtpdec_asf.c b/libavformat/rtpdec_asf.c
index e59480c9..2b1ddf14 100644
--- a/libavformat/rtpdec_asf.c
+++ b/libavformat/rtpdec_asf.c
@@ -289,7 +289,7 @@ static int asfrtp_parse_packet(AVFormatContext *s, PayloadContext *asf,
                 return 1; // FIXME: return 0 if last packet
             }
         }
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
     }
 
     return res == 1 ? -1 : res;
diff --git a/libavformat/rtpdec_h264.c b/libavformat/rtpdec_h264.c
index 2ac79dbf..b399be42 100644
--- a/libavformat/rtpdec_h264.c
+++ b/libavformat/rtpdec_h264.c
@@ -119,7 +119,7 @@ int ff_h264_parse_sprop_parameter_sets(AVFormatContext *s,
             uint8_t *dest = av_realloc(*data_ptr,
                                        packet_size + sizeof(start_sequence) +
                                        *size_ptr +
-                                       FF_INPUT_BUFFER_PADDING_SIZE);
+                                       AV_INPUT_BUFFER_PADDING_SIZE);
             if (!dest) {
                 av_log(s, AV_LOG_ERROR,
                        "Unable to allocate memory for extradata!\n");
@@ -132,7 +132,7 @@ int ff_h264_parse_sprop_parameter_sets(AVFormatContext *s,
             memcpy(dest + *size_ptr + sizeof(start_sequence),
                    decoded_packet, packet_size);
             memset(dest + *size_ptr + sizeof(start_sequence) +
-                   packet_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+                   packet_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
             *size_ptr += sizeof(start_sequence) + packet_size;
         }
diff --git a/libavformat/rtpdec_hevc.c b/libavformat/rtpdec_hevc.c
index 51c20948..1f09a08a 100644
--- a/libavformat/rtpdec_hevc.c
+++ b/libavformat/rtpdec_hevc.c
@@ -152,7 +152,7 @@ static av_cold int hevc_parse_sdp_line(AVFormatContext *ctx, int st_index,
             codec->extradata_size = hevc_data->vps_size + hevc_data->sps_size +
                                     hevc_data->pps_size + hevc_data->sei_size;
             codec->extradata = av_malloc(codec->extradata_size +
-                                         FF_INPUT_BUFFER_PADDING_SIZE);
+                                         AV_INPUT_BUFFER_PADDING_SIZE);
             if (!codec->extradata) {
                 ret = AVERROR(ENOMEM);
                 codec->extradata_size = 0;
@@ -166,7 +166,7 @@ static av_cold int hevc_parse_sdp_line(AVFormatContext *ctx, int st_index,
                 pos += hevc_data->pps_size;
                 memcpy(codec->extradata + pos, hevc_data->sei, hevc_data->sei_size);
                 pos += hevc_data->sei_size;
-                memset(codec->extradata + pos, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+                memset(codec->extradata + pos, 0, AV_INPUT_BUFFER_PADDING_SIZE);
             }
 
             av_freep(&hevc_data->vps);
@@ -222,7 +222,7 @@ static int hevc_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_hevc_ctx
     /* sanity check for correct layer ID */
     if (lid) {
         /* future scalable or 3D video coding extensions */
-        avpriv_report_missing_feature(ctx, "Multi-layer HEVC coding\n");
+        avpriv_report_missing_feature(ctx, "Multi-layer HEVC coding");
         return AVERROR_PATCHWELCOME;
     }
 
@@ -338,7 +338,7 @@ static int hevc_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_hevc_ctx
     /* PACI packet */
     case 50:
         /* Temporal scalability control information (TSCI) */
-        avpriv_report_missing_feature(ctx, "PACI packets for RTP/HEVC\n");
+        avpriv_report_missing_feature(ctx, "PACI packets for RTP/HEVC");
         res = AVERROR_PATCHWELCOME;
         break;
     }
diff --git a/libavformat/rtpdec_jpeg.c b/libavformat/rtpdec_jpeg.c
index 6bf88f8c..397b5cf5 100644
--- a/libavformat/rtpdec_jpeg.c
+++ b/libavformat/rtpdec_jpeg.c
@@ -193,16 +193,17 @@ static void create_default_qtables(uint8_t *qtables, uint8_t q)
 {
     int factor = q;
     int i;
+    uint16_t S;
 
     factor = av_clip(q, 1, 99);
 
     if (q < 50)
-        q = 5000 / factor;
+        S = 5000 / factor;
     else
-        q = 200 - factor * 2;
+        S = 200 - factor * 2;
 
     for (i = 0; i < 128; i++) {
-        int val = (default_quantizers[i] * q + 50) / 100;
+        int val = (default_quantizers[i] * S + 50) / 100;
 
         /* Limit the quantizers to 1 <= q <= 255. */
         val = av_clip(val, 1, 255);
@@ -245,12 +246,6 @@ static int jpeg_parse_packet(AVFormatContext *ctx, PayloadContext *jpeg,
         len -= 4;
         type &= ~0x40;
     }
-    /* Parse the restart marker header. */
-    if (type > 63) {
-        av_log(ctx, AV_LOG_ERROR,
-               "Unimplemented RTP/JPEG restart marker header.\n");
-        return AVERROR_PATCHWELCOME;
-    }
     if (type > 1) {
         av_log(ctx, AV_LOG_ERROR, "Unimplemented RTP/JPEG type %d\n", type);
         return AVERROR_PATCHWELCOME;
diff --git a/libavformat/rtpdec_latm.c b/libavformat/rtpdec_latm.c
index 7db92f60..aebba574 100644
--- a/libavformat/rtpdec_latm.c
+++ b/libavformat/rtpdec_latm.c
@@ -97,7 +97,7 @@ static int parse_fmtp_config(AVStream *st, const char *value)
     int audio_mux_version, same_time_framing, num_programs, num_layers;
 
     /* Pad this buffer, too, to avoid out of bounds reads with get_bits below */
-    config = av_mallocz(len + FF_INPUT_BUFFER_PADDING_SIZE);
+    config = av_mallocz(len + AV_INPUT_BUFFER_PADDING_SIZE);
     if (!config)
         return AVERROR(ENOMEM);
     ff_hex_to_data(config, value);
diff --git a/libavformat/rtpdec_mpa_robust.c b/libavformat/rtpdec_mpa_robust.c
index 07057fe7..86c8958d 100644
--- a/libavformat/rtpdec_mpa_robust.c
+++ b/libavformat/rtpdec_mpa_robust.c
@@ -136,7 +136,7 @@ static int mpa_robust_parse_packet(AVFormatContext *ctx, PayloadContext *data,
             data->split_pos = 0;
             if (!data->split_buf) {
                 av_log(ctx, AV_LOG_ERROR, "Out of memory.\n");
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 return AVERROR(ENOMEM);
             }
             memcpy(data->split_buf, buf, data->split_buf_size);
@@ -166,7 +166,7 @@ static int mpa_robust_parse_packet(AVFormatContext *ctx, PayloadContext *data,
             "Received packet without a start fragment; dropping.\n");
         return AVERROR(EAGAIN);
     }
-    if (adu_size = data->adu_size ||
+    if (adu_size != data->adu_size ||
         data->timestamp != *timestamp) {
         ffio_free_dyn_buf(&data->fragment);
         av_log(ctx, AV_LOG_ERROR, "Invalid packet received\n");
diff --git a/libavformat/rtpdec_qt.c b/libavformat/rtpdec_qt.c
index ba701dc6..2d1b58e7 100644
--- a/libavformat/rtpdec_qt.c
+++ b/libavformat/rtpdec_qt.c
@@ -174,14 +174,14 @@ static int qt_rtp_parse_packet(AVFormatContext *s, PayloadContext *qt,
         if (qt->pkt.size > 0 && qt->timestamp == *timestamp) {
             int err;
             if ((err = av_reallocp(&qt->pkt.data, qt->pkt.size + alen +
-                                   FF_INPUT_BUFFER_PADDING_SIZE)) < 0) {
+                                   AV_INPUT_BUFFER_PADDING_SIZE)) < 0) {
                 qt->pkt.size = 0;
                 return err;
             }
         } else {
             av_freep(&qt->pkt.data);
             av_init_packet(&qt->pkt);
-            qt->pkt.data = av_realloc(NULL, alen + FF_INPUT_BUFFER_PADDING_SIZE);
+            qt->pkt.data = av_realloc(NULL, alen + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!qt->pkt.data)
                 return AVERROR(ENOMEM);
             qt->pkt.size = 0;
@@ -198,7 +198,7 @@ static int qt_rtp_parse_packet(AVFormatContext *s, PayloadContext *qt,
             qt->pkt.data = NULL;
             pkt->flags        = keyframe ? AV_PKT_FLAG_KEY : 0;
             pkt->stream_index = st->index;
-            memset(pkt->data + pkt->size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+            memset(pkt->data + pkt->size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
             return 0;
         }
         return AVERROR(EAGAIN);
@@ -217,7 +217,7 @@ static int qt_rtp_parse_packet(AVFormatContext *s, PayloadContext *qt,
             av_freep(&qt->pkt.data);
             qt->pkt.data = av_realloc(NULL, qt->remaining * qt->bytes_per_frame);
             if (!qt->pkt.data) {
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 return AVERROR(ENOMEM);
             }
             qt->pkt.size = qt->remaining * qt->bytes_per_frame;
diff --git a/libavformat/rtpdec_xiph.c b/libavformat/rtpdec_xiph.c
index eceb8405..6d9d0fd0 100644
--- a/libavformat/rtpdec_xiph.c
+++ b/libavformat/rtpdec_xiph.c
@@ -141,7 +141,7 @@ static int xiph_handle_packet(AVFormatContext *ctx, PayloadContext *data,
                 data->split_buf = av_malloc(data->split_buf_size);
                 if (!data->split_buf) {
                     av_log(ctx, AV_LOG_ERROR, "Out of memory.\n");
-                    av_free_packet(pkt);
+                    av_packet_unref(pkt);
                     return AVERROR(ENOMEM);
                 }
             }
@@ -262,8 +262,8 @@ parse_packed_headers(const uint8_t * packed_headers,
     /* allocate extra space:
      * -- length/255 +2 for xiphlacing
      * -- one for the '2' marker
-     * -- FF_INPUT_BUFFER_PADDING_SIZE required */
-    extradata_alloc = length + length/255 + 3 + FF_INPUT_BUFFER_PADDING_SIZE;
+     * -- AV_INPUT_BUFFER_PADDING_SIZE required */
+    extradata_alloc = length + length/255 + 3 + AV_INPUT_BUFFER_PADDING_SIZE;
 
     if (ff_alloc_extradata(codec, extradata_alloc)) {
         av_log(codec, AV_LOG_ERROR, "Out of memory\n");
diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
index 31569d60..00b69f57 100644
--- a/libavformat/rtpenc.c
+++ b/libavformat/rtpenc.c
@@ -271,7 +271,8 @@ static void rtcp_send_sr(AVFormatContext *s1, int64_t ntp_time, int bye)
     avio_w8(s1->pb, RTCP_SR);
     avio_wb16(s1->pb, 6); /* length in words - 1 */
     avio_wb32(s1->pb, s->ssrc);
-    avio_wb64(s1->pb, NTP_TO_RTP_FORMAT(ntp_time));
+    avio_wb32(s1->pb, ntp_time / 1000000);
+    avio_wb32(s1->pb, ((ntp_time % 1000000) << 32) / 1000000);
     avio_wb32(s1->pb, rtp_ts);
     avio_wb32(s1->pb, s->packet_count);
     avio_wb32(s1->pb, s->octet_count);
diff --git a/libavformat/rtpenc_chain.c b/libavformat/rtpenc_chain.c
index 74f306eb..96e65efb 100644
--- a/libavformat/rtpenc_chain.c
+++ b/libavformat/rtpenc_chain.c
@@ -58,7 +58,7 @@ int ff_rtp_chain_mux_open(AVFormatContext **out, AVFormatContext *s,
     rtpctx->max_delay = s->max_delay;
     /* Copy other stream parameters. */
     rtpctx->streams[0]->sample_aspect_ratio = st->sample_aspect_ratio;
-    rtpctx->flags |= s->flags & AVFMT_FLAG_MP4A_LATM;
+    rtpctx->flags |= s->flags & (AVFMT_FLAG_MP4A_LATM | AVFMT_FLAG_BITEXACT);
 
     /* Get the payload type from the codec */
     if (st->id < RTP_PT_PRIVATE)
diff --git a/libavformat/rtpenc_jpeg.c b/libavformat/rtpenc_jpeg.c
index 7ee26c43..72c07c78 100644
--- a/libavformat/rtpenc_jpeg.c
+++ b/libavformat/rtpenc_jpeg.c
@@ -21,13 +21,14 @@
 
 #include "libavcodec/bytestream.h"
 #include "libavcodec/mjpeg.h"
+#include "libavcodec/jpegtables.h"
 #include "libavutil/intreadwrite.h"
 #include "rtpenc.h"
 
 void ff_rtp_send_jpeg(AVFormatContext *s1, const uint8_t *buf, int size)
 {
     RTPMuxContext *s = s1->priv_data;
-    const uint8_t *qtables = NULL;
+    const uint8_t *qtables[4] = { NULL };
     int nb_qtables = 0;
     uint8_t type;
     uint8_t w, h;
@@ -35,13 +36,14 @@ void ff_rtp_send_jpeg(AVFormatContext *s1, const uint8_t *buf, int size)
     int off = 0; /* fragment offset of the current JPEG frame */
     int len;
     int i;
+    int default_huffman_tables = 0;
 
     s->buf_ptr   = s->buf;
     s->timestamp = s->cur_timestamp;
 
     /* convert video pixel dimensions from pixels to blocks */
-    w = FF_CEIL_RSHIFT(s1->streams[0]->codec->width, 3);
-    h = FF_CEIL_RSHIFT(s1->streams[0]->codec->height, 3);
+    w = AV_CEIL_RSHIFT(s1->streams[0]->codec->width, 3);
+    h = AV_CEIL_RSHIFT(s1->streams[0]->codec->height, 3);
 
     /* get the pixel format type or fail */
     if (s1->streams[0]->codec->pix_fmt == AV_PIX_FMT_YUVJ422P ||
@@ -63,24 +65,93 @@ void ff_rtp_send_jpeg(AVFormatContext *s1, const uint8_t *buf, int size)
             continue;
 
         if (buf[i + 1] == DQT) {
-            if (buf[i + 4])
+            int tables, j;
+            if (buf[i + 4] & 0xF0)
                 av_log(s1, AV_LOG_WARNING,
                        "Only 8-bit precision is supported.\n");
 
             /* a quantization table is 64 bytes long */
-            nb_qtables = AV_RB16(&buf[i + 2]) / 65;
-            if (i + 4 + nb_qtables * 65 > size) {
+            tables = AV_RB16(&buf[i + 2]) / 65;
+            if (i + 5 + tables * 65 > size) {
                 av_log(s1, AV_LOG_ERROR, "Too short JPEG header. Aborted!\n");
                 return;
             }
+            if (nb_qtables + tables > 4) {
+                av_log(s1, AV_LOG_ERROR, "Invalid number of quantisation tables\n");
+                return;
+            }
 
-            qtables = &buf[i + 4];
+            for (j = 0; j < tables; j++)
+                qtables[nb_qtables + j] = buf + i + 5 + j * 65;
+            nb_qtables += tables;
         } else if (buf[i + 1] == SOF0) {
             if (buf[i + 14] != 17 || buf[i + 17] != 17) {
                 av_log(s1, AV_LOG_ERROR,
                        "Only 1x1 chroma blocks are supported. Aborted!\n");
                 return;
             }
+        } else if (buf[i + 1] == DHT) {
+            int dht_size = AV_RB16(&buf[i + 2]);
+            default_huffman_tables |= 1 << 4;
+            i += 3;
+            dht_size -= 2;
+            if (i + dht_size >= size)
+                continue;
+            while (dht_size > 0)
+                switch (buf[i + 1]) {
+                case 0x00:
+                    if (   dht_size >= 29
+                        && !memcmp(buf + i +  2, avpriv_mjpeg_bits_dc_luminance + 1, 16)
+                        && !memcmp(buf + i + 18, avpriv_mjpeg_val_dc, 12)) {
+                        default_huffman_tables |= 1;
+                        i += 29;
+                        dht_size -= 29;
+                    } else {
+                        i += dht_size;
+                        dht_size = 0;
+                    }
+                    break;
+                case 0x01:
+                    if (   dht_size >= 29
+                        && !memcmp(buf + i +  2, avpriv_mjpeg_bits_dc_chrominance + 1, 16)
+                        && !memcmp(buf + i + 18, avpriv_mjpeg_val_dc, 12)) {
+                        default_huffman_tables |= 1 << 1;
+                        i += 29;
+                        dht_size -= 29;
+                    } else {
+                        i += dht_size;
+                        dht_size = 0;
+                    }
+                    break;
+                case 0x10:
+                    if (   dht_size >= 179
+                        && !memcmp(buf + i +  2, avpriv_mjpeg_bits_ac_luminance   + 1, 16)
+                        && !memcmp(buf + i + 18, avpriv_mjpeg_val_ac_luminance, 162)) {
+                        default_huffman_tables |= 1 << 2;
+                        i += 179;
+                        dht_size -= 179;
+                    } else {
+                        i += dht_size;
+                        dht_size = 0;
+                    }
+                    break;
+                case 0x11:
+                    if (   dht_size >= 179
+                        && !memcmp(buf + i +  2, avpriv_mjpeg_bits_ac_chrominance + 1, 16)
+                        && !memcmp(buf + i + 18, avpriv_mjpeg_val_ac_chrominance, 162)) {
+                        default_huffman_tables |= 1 << 3;
+                        i += 179;
+                        dht_size -= 179;
+                    } else {
+                        i += dht_size;
+                        dht_size = 0;
+                    }
+                    break;
+                default:
+                    i += dht_size;
+                    dht_size = 0;
+                    continue;
+            }
         } else if (buf[i + 1] == SOS) {
             /* SOS is last marker in the header */
             i += AV_RB16(&buf[i + 2]) + 2;
@@ -92,6 +163,15 @@ void ff_rtp_send_jpeg(AVFormatContext *s1, const uint8_t *buf, int size)
             break;
         }
     }
+    if (default_huffman_tables && default_huffman_tables != 31) {
+        av_log(s1, AV_LOG_ERROR,
+               "RFC 2435 requires standard Huffman tables for jpeg\n");
+        return;
+    }
+    if (nb_qtables && nb_qtables != 2)
+        av_log(s1, AV_LOG_WARNING,
+               "RFC 2435 suggests two quantization tables, %d provided\n",
+               nb_qtables);
 
     /* skip JPEG header */
     buf  += i;
@@ -130,7 +210,7 @@ void ff_rtp_send_jpeg(AVFormatContext *s1, const uint8_t *buf, int size)
             bytestream_put_be16(&p, 64 * nb_qtables);
 
             for (i = 0; i < nb_qtables; i++)
-                bytestream_put_buffer(&p, &qtables[65 * i + 1], 64);
+                bytestream_put_buffer(&p, qtables[i], 64);
         }
 
         /* copy payload data */
diff --git a/libavformat/rtpproto.c b/libavformat/rtpproto.c
index d3e2ca01..538a7b24 100644
--- a/libavformat/rtpproto.c
+++ b/libavformat/rtpproto.c
@@ -69,8 +69,8 @@ static const AVOption options[] = {
     { "rtcp_port",          "Custom rtcp port",                                                 OFFSET(rtcp_port),       AV_OPT_TYPE_INT,    { .i64 = -1 },    -1, INT_MAX, .flags = D|E },
     { "local_rtpport",      "Local rtp port",                                                   OFFSET(local_rtpport),   AV_OPT_TYPE_INT,    { .i64 = -1 },    -1, INT_MAX, .flags = D|E },
     { "local_rtcpport",     "Local rtcp port",                                                  OFFSET(local_rtcpport),  AV_OPT_TYPE_INT,    { .i64 = -1 },    -1, INT_MAX, .flags = D|E },
-    { "connect",            "Connect socket",                                                   OFFSET(connect),         AV_OPT_TYPE_INT,    { .i64 =  0 },     0, 1,       .flags = D|E },
-    { "write_to_source",    "Send packets to the source address of the latest received packet", OFFSET(write_to_source), AV_OPT_TYPE_INT,    { .i64 =  0 },     0, 1,       .flags = D|E },
+    { "connect",            "Connect socket",                                                   OFFSET(connect),         AV_OPT_TYPE_BOOL,   { .i64 =  0 },     0, 1,       .flags = D|E },
+    { "write_to_source",    "Send packets to the source address of the latest received packet", OFFSET(write_to_source), AV_OPT_TYPE_BOOL,   { .i64 =  0 },     0, 1,       .flags = D|E },
     { "pkt_size",           "Maximum packet size",                                              OFFSET(pkt_size),        AV_OPT_TYPE_INT,    { .i64 = -1 },    -1, INT_MAX, .flags = D|E },
     { "dscp",               "DSCP class",                                                       OFFSET(dscp),            AV_OPT_TYPE_INT,    { .i64 = -1 },    -1, INT_MAX, .flags = D|E },
     { "sources",            "Source list",                                                      OFFSET(sources),         AV_OPT_TYPE_STRING, { .str = NULL },               .flags = D|E },
@@ -380,7 +380,8 @@ static int rtp_open(URLContext *h, const char *uri, int flags)
         build_udp_url(s, buf, sizeof(buf),
                       hostname, rtp_port, s->local_rtpport,
                       sources, block);
-        if (ffurl_open(&s->rtp_hd, buf, flags, &h->interrupt_callback, NULL) < 0)
+        if (ffurl_open_whitelist(&s->rtp_hd, buf, flags, &h->interrupt_callback,
+                                 NULL, h->protocol_whitelist) < 0)
             goto fail;
         s->local_rtpport = ff_udp_get_local_port(s->rtp_hd);
         if(s->local_rtpport == 65535) {
@@ -392,7 +393,9 @@ static int rtp_open(URLContext *h, const char *uri, int flags)
             build_udp_url(s, buf, sizeof(buf),
                           hostname, s->rtcp_port, s->local_rtcpport,
                           sources, block);
-            if (ffurl_open(&s->rtcp_hd, buf, flags, &h->interrupt_callback, NULL) < 0) {
+            if (ffurl_open_whitelist(&s->rtcp_hd, buf, flags,
+                                     &h->interrupt_callback, NULL,
+                                     h->protocol_whitelist) < 0) {
                 s->local_rtpport = s->local_rtcpport = -1;
                 continue;
             }
@@ -401,7 +404,8 @@ static int rtp_open(URLContext *h, const char *uri, int flags)
         build_udp_url(s, buf, sizeof(buf),
                       hostname, s->rtcp_port, s->local_rtcpport,
                       sources, block);
-        if (ffurl_open(&s->rtcp_hd, buf, flags, &h->interrupt_callback, NULL) < 0)
+        if (ffurl_open_whitelist(&s->rtcp_hd, buf, flags, &h->interrupt_callback,
+                                 NULL, h->protocol_whitelist) < 0)
             goto fail;
         break;
     }
diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index 98bd4cbe..d7104696 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -80,7 +80,7 @@
 
 
 const AVOption ff_rtsp_options[] = {
-    { "initial_pause",  "do not start playing the stream immediately", OFFSET(initial_pause), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, DEC },
+    { "initial_pause",  "do not start playing the stream immediately", OFFSET(initial_pause), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DEC },
     FF_RTP_FLAG_OPTS(RTSPState, rtp_muxer_flags),
     { "rtsp_transport", "set RTSP transport protocols", OFFSET(lower_transport_mask), AV_OPT_TYPE_FLAGS, {.i64 = 0}, INT_MIN, INT_MAX, DEC|ENC, "rtsp_transport" }, \
     { "udp", "UDP", 0, AV_OPT_TYPE_CONST, {.i64 = 1 << RTSP_LOWER_TRANSPORT_UDP}, 0, 0, DEC|ENC, "rtsp_transport" }, \
@@ -184,12 +184,19 @@ static void rtsp_parse_range_npt(const char *p, int64_t *start, int64_t *end)
     }
 }
 
-static int get_sockaddr(const char *buf, struct sockaddr_storage *sock)
+static int get_sockaddr(AVFormatContext *s,
+                        const char *buf, struct sockaddr_storage *sock)
 {
     struct addrinfo hints = { 0 }, *ai = NULL;
+    int ret;
+
     hints.ai_flags = AI_NUMERICHOST;
-    if (getaddrinfo(buf, NULL, &hints, &ai))
+    if ((ret = getaddrinfo(buf, NULL, &hints, &ai))) {
+        av_log(s, AV_LOG_ERROR, "getaddrinfo(%s): %s\n",
+               buf,
+               gai_strerror(ret));
         return -1;
+    }
     memcpy(sock, ai->ai_addr, FFMIN(sizeof(*sock), ai->ai_addrlen));
     freeaddrinfo(ai);
     return 0;
@@ -397,7 +404,7 @@ static void sdp_parse_line(AVFormatContext *s, SDPParseState *s1,
         if (strcmp(buf1, "IP4") && strcmp(buf1, "IP6"))
             return;
         get_word_sep(buf1, sizeof(buf1), "/", &p);
-        if (get_sockaddr(buf1, &sdp_ip))
+        if (get_sockaddr(s, buf1, &sdp_ip))
             return;
         ttl = 16;
         if (*p == '/') {
@@ -861,7 +868,8 @@ static void rtsp_parse_range(int *min_ptr, int *max_ptr, const char **pp)
 }
 
 /* XXX: only one transport specification is parsed */
-static void rtsp_parse_transport(RTSPMessageHeader *reply, const char *p)
+static void rtsp_parse_transport(AVFormatContext *s,
+                                 RTSPMessageHeader *reply, const char *p)
 {
     char transport_protocol[16];
     char profile[16];
@@ -953,7 +961,7 @@ static void rtsp_parse_transport(RTSPMessageHeader *reply, const char *p)
                 if (*p == '=') {
                     p++;
                     get_word_sep(buf, sizeof(buf), ";,", &p);
-                    get_sockaddr(buf, &th->destination);
+                    get_sockaddr(s, buf, &th->destination);
                 }
             } else if (!strcmp(parameter, "source")) {
                 if (*p == '=') {
@@ -1040,7 +1048,8 @@ static void rtsp_parse_rtp_info(RTSPState *rt, const char *p)
         handle_rtp_info(rt, url, seq, rtptime);
 }
 
-void ff_rtsp_parse_line(RTSPMessageHeader *reply, const char *buf,
+void ff_rtsp_parse_line(AVFormatContext *s,
+                        RTSPMessageHeader *reply, const char *buf,
                         RTSPState *rt, const char *method)
 {
     const char *p;
@@ -1057,7 +1066,7 @@ void ff_rtsp_parse_line(RTSPMessageHeader *reply, const char *buf,
     } else if (av_stristart(p, "Content-Length:", &p)) {
         reply->content_length = strtol(p, NULL, 10);
     } else if (av_stristart(p, "Transport:", &p)) {
-        rtsp_parse_transport(reply, p);
+        rtsp_parse_transport(s, reply, p);
     } else if (av_stristart(p, "CSeq:", &p)) {
         reply->seq = strtol(p, NULL, 10);
     } else if (av_stristart(p, "Range:", &p)) {
@@ -1155,8 +1164,7 @@ int ff_rtsp_read_reply(AVFormatContext *s, RTSPMessageHeader *reply,
                 return AVERROR_EOF;
             if (ch == '\n')
                 break;
-            if (ch == '$') {
-                /* XXX: only parse it if first char on line ? */
+            if (ch == '$' && q == buf) {
                 if (return_on_interleaved_data) {
                     return 1;
                 } else
@@ -1187,7 +1195,7 @@ int ff_rtsp_read_reply(AVFormatContext *s, RTSPMessageHeader *reply,
                 request = 1;
             }
         } else {
-            ff_rtsp_parse_line(reply, p, rt, method);
+            ff_rtsp_parse_line(s, reply, p, rt, method);
             av_strlcat(rt->last_reply, p,    sizeof(rt->last_reply));
             av_strlcat(rt->last_reply, "\n", sizeof(rt->last_reply));
         }
@@ -1460,8 +1468,8 @@ int ff_rtsp_make_setup_request(AVFormatContext *s, const char *host, int port,
                             "?localport=%d", j);
                 /* we will use two ports per rtp stream (rtp and rtcp) */
                 j += 2;
-                err = ffurl_open(&rtsp_st->rtp_handle, buf, AVIO_FLAG_READ_WRITE,
-                                 &s->interrupt_callback, &opts);
+                err = ffurl_open_whitelist(&rtsp_st->rtp_handle, buf, AVIO_FLAG_READ_WRITE,
+                                 &s->interrupt_callback, &opts, s->protocol_whitelist);
 
                 av_dict_free(&opts);
 
@@ -1603,8 +1611,8 @@ int ff_rtsp_make_setup_request(AVFormatContext *s, const char *host, int port,
                         namebuf, sizeof(namebuf), NULL, 0, NI_NUMERICHOST);
             ff_url_join(url, sizeof(url), "rtp", NULL, namebuf,
                         port, "%s", optbuf);
-            if (ffurl_open(&rtsp_st->rtp_handle, url, AVIO_FLAG_READ_WRITE,
-                           &s->interrupt_callback, NULL) < 0) {
+            if (ffurl_open_whitelist(&rtsp_st->rtp_handle, url, AVIO_FLAG_READ_WRITE,
+                           &s->interrupt_callback, NULL, s->protocol_whitelist) < 0) {
                 err = AVERROR_INVALIDDATA;
                 goto fail;
             }
@@ -1792,8 +1800,8 @@ int ff_rtsp_connect(AVFormatContext *s)
         ff_url_join(tcpname, sizeof(tcpname), lower_rtsp_proto, NULL,
                     host, port,
                     "?timeout=%d", rt->stimeout);
-        if ((ret = ffurl_open(&rt->rtsp_hd, tcpname, AVIO_FLAG_READ_WRITE,
-                       &s->interrupt_callback, NULL)) < 0) {
+        if ((ret = ffurl_open_whitelist(&rt->rtsp_hd, tcpname, AVIO_FLAG_READ_WRITE,
+                       &s->interrupt_callback, NULL, s->protocol_whitelist)) < 0) {
             err = ret;
             goto fail;
         }
@@ -2122,6 +2130,8 @@ int ff_rtsp_fetch_packet(AVFormatContext *s, AVPacket *pkt)
     }
     if (len == AVERROR(EAGAIN) && first_queue_st &&
         rt->transport == RTSP_TRANSPORT_RTP) {
+        av_log(s, AV_LOG_WARNING,
+                "max delay reached. need to consume packet\n");
         rtsp_st = first_queue_st;
         ret = ff_rtp_parse_packet(rtsp_st->transport_priv, pkt, NULL, 0);
         goto end;
@@ -2284,8 +2294,15 @@ static int sdp_read_header(AVFormatContext *s)
         if (!(rt->rtsp_flags & RTSP_FLAG_CUSTOM_IO)) {
             AVDictionary *opts = map_to_opts(rt);
 
-            getnameinfo((struct sockaddr*) &rtsp_st->sdp_ip, sizeof(rtsp_st->sdp_ip),
-                        namebuf, sizeof(namebuf), NULL, 0, NI_NUMERICHOST);
+            err = getnameinfo((struct sockaddr*) &rtsp_st->sdp_ip,
+                              sizeof(rtsp_st->sdp_ip),
+                              namebuf, sizeof(namebuf), NULL, 0, NI_NUMERICHOST);
+            if (err) {
+                av_log(s, AV_LOG_ERROR, "getnameinfo: %s\n", gai_strerror(err));
+                err = AVERROR(EIO);
+                av_dict_free(&opts);
+                goto fail;
+            }
             ff_url_join(url, sizeof(url), "rtp", NULL,
                         namebuf, rtsp_st->sdp_port,
                         "?localport=%d&ttl=%d&connect=%d&write_to_source=%d",
@@ -2299,8 +2316,8 @@ static int sdp_read_header(AVFormatContext *s)
             append_source_addrs(url, sizeof(url), "block",
                                 rtsp_st->nb_exclude_source_addrs,
                                 rtsp_st->exclude_source_addrs);
-            err = ffurl_open(&rtsp_st->rtp_handle, url, AVIO_FLAG_READ_WRITE,
-                           &s->interrupt_callback, &opts);
+            err = ffurl_open_whitelist(&rtsp_st->rtp_handle, url, AVIO_FLAG_READ_WRITE,
+                           &s->interrupt_callback, &opts, s->protocol_whitelist);
 
             av_dict_free(&opts);
 
@@ -2369,8 +2386,8 @@ static int rtp_read_header(AVFormatContext *s)
     if (!ff_network_init())
         return AVERROR(EIO);
 
-    ret = ffurl_open(&in, s->filename, AVIO_FLAG_READ,
-                     &s->interrupt_callback, NULL);
+    ret = ffurl_open_whitelist(&in, s->filename, AVIO_FLAG_READ,
+                     &s->interrupt_callback, NULL, s->protocol_whitelist);
     if (ret)
         goto fail;
 
diff --git a/libavformat/rtsp.h b/libavformat/rtsp.h
index d9e76ec6..bd837b57 100644
--- a/libavformat/rtsp.h
+++ b/libavformat/rtsp.h
@@ -470,7 +470,8 @@ typedef struct RTSPStream {
     char crypto_params[100];
 } RTSPStream;
 
-void ff_rtsp_parse_line(RTSPMessageHeader *reply, const char *buf,
+void ff_rtsp_parse_line(AVFormatContext *s,
+                        RTSPMessageHeader *reply, const char *buf,
                         RTSPState *rt, const char *method);
 
 /**
diff --git a/libavformat/rtspdec.c b/libavformat/rtspdec.c
index 3c0010e0..17f04c02 100644
--- a/libavformat/rtspdec.c
+++ b/libavformat/rtspdec.c
@@ -151,7 +151,7 @@ static inline int rtsp_read_request(AVFormatContext *s,
             return ret;
         if (rbuflen > 1) {
             av_log(s, AV_LOG_TRACE, "Parsing[%d]: %s\n", rbuflen, rbuf);
-            ff_rtsp_parse_line(request, rbuf, rt, method);
+            ff_rtsp_parse_line(s, request, rbuf, rt, method);
         }
     } while (rbuflen > 0);
     if (request->seq != rt->seq + 1) {
@@ -294,8 +294,9 @@ static int rtsp_read_setup(AVFormatContext *s, char* host, char *controlurl)
             av_dict_set(&opts, "buffer_size", buf, 0);
             ff_url_join(url, sizeof(url), "rtp", NULL, host, localport, NULL);
             av_log(s, AV_LOG_TRACE, "Opening: %s", url);
-            ret = ffurl_open(&rtsp_st->rtp_handle, url, AVIO_FLAG_READ_WRITE,
-                             &s->interrupt_callback, &opts);
+            ret = ffurl_open_whitelist(&rtsp_st->rtp_handle, url, AVIO_FLAG_READ_WRITE,
+                                       &s->interrupt_callback, &opts,
+                                       s->protocol_whitelist);
             av_dict_free(&opts);
             if (ret)
                 localport += 2;
@@ -662,8 +663,9 @@ static int rtsp_listen(AVFormatContext *s)
     ff_url_join(tcpname, sizeof(tcpname), lower_proto, NULL, host, port,
                 "?listen&listen_timeout=%d", rt->initial_timeout * 1000);
 
-    if (ret = ffurl_open(&rt->rtsp_hd, tcpname, AVIO_FLAG_READ_WRITE,
-                         &s->interrupt_callback, NULL)) {
+    if (ret = ffurl_open_whitelist(&rt->rtsp_hd, tcpname, AVIO_FLAG_READ_WRITE,
+                                   &s->interrupt_callback, NULL,
+                                   s->protocol_whitelist)) {
         av_log(s, AV_LOG_ERROR, "Unable to open RTSP for listening\n");
         return ret;
     }
diff --git a/libavformat/samidec.c b/libavformat/samidec.c
index 948e1ed8..11c674ce 100644
--- a/libavformat/samidec.c
+++ b/libavformat/samidec.c
@@ -68,11 +68,17 @@ static int sami_read_header(AVFormatContext *s)
     while (!ff_text_eof(&tr)) {
         AVPacket *sub;
         const int64_t pos = ff_text_pos(&tr) - (c != 0);
-        int is_sync, n = ff_smil_extract_next_text_chunk(&tr, &buf, &c);
+        int is_sync, is_body, n = ff_smil_extract_next_text_chunk(&tr, &buf, &c);
 
         if (n == 0)
             break;
 
+        is_body = !av_strncasecmp(buf.str, "</BODY", 6);
+        if (is_body) {
+             av_bprint_clear(&buf);
+             break;
+        }
+
         is_sync = !av_strncasecmp(buf.str, "<SYNC", 5);
         if (is_sync)
             got_first_sync_point = 1;
@@ -99,7 +105,7 @@ static int sami_read_header(AVFormatContext *s)
     if (res < 0)
         goto end;
 
-    ff_subtitles_queue_finalize(&sami->q);
+    ff_subtitles_queue_finalize(s, &sami->q);
 
 end:
     av_bprint_finalize(&buf, NULL);
diff --git a/libavformat/sapdec.c b/libavformat/sapdec.c
index 2dd8524b..926795b6 100644
--- a/libavformat/sapdec.c
+++ b/libavformat/sapdec.c
@@ -85,8 +85,9 @@ static int sap_read_header(AVFormatContext *s)
 
     ff_url_join(url, sizeof(url), "udp", NULL, host, port, "?localport=%d",
                 port);
-    ret = ffurl_open(&sap->ann_fd, url, AVIO_FLAG_READ,
-                     &s->interrupt_callback, NULL);
+    ret = ffurl_open_whitelist(&sap->ann_fd, url, AVIO_FLAG_READ,
+                               &s->interrupt_callback, NULL,
+                               s->protocol_whitelist);
     if (ret)
         goto fail;
 
@@ -220,7 +221,7 @@ static int sap_fetch_packet(AVFormatContext *s, AVPacket *pkt)
             int i = s->nb_streams;
             AVStream *st = avformat_new_stream(s, NULL);
             if (!st) {
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 return AVERROR(ENOMEM);
             }
             st->id = i;
diff --git a/libavformat/sapenc.c b/libavformat/sapenc.c
index 07fbf48e..b2f64b80 100644
--- a/libavformat/sapenc.c
+++ b/libavformat/sapenc.c
@@ -149,7 +149,9 @@ static int sap_write_header(AVFormatContext *s)
                     "?ttl=%d", ttl);
         if (!same_port)
             base_port += 2;
-        ret = ffurl_open(&fd, url, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL);
+        ret = ffurl_open_whitelist(&fd, url, AVIO_FLAG_WRITE,
+                                   &s->interrupt_callback, NULL,
+                                   s->protocol_whitelist);
         if (ret) {
             ret = AVERROR(EIO);
             goto fail;
@@ -167,8 +169,9 @@ static int sap_write_header(AVFormatContext *s)
 
     ff_url_join(url, sizeof(url), "udp", NULL, announce_addr, port,
                 "?ttl=%d&connect=1", ttl);
-    ret = ffurl_open(&sap->ann_fd, url, AVIO_FLAG_WRITE,
-                     &s->interrupt_callback, NULL);
+    ret = ffurl_open_whitelist(&sap->ann_fd, url, AVIO_FLAG_WRITE,
+                               &s->interrupt_callback, NULL,
+                               s->protocol_whitelist);
     if (ret) {
         ret = AVERROR(EIO);
         goto fail;
diff --git a/libavformat/sctp.c b/libavformat/sctp.c
index 1d826157..5fee7e3b 100644
--- a/libavformat/sctp.c
+++ b/libavformat/sctp.c
@@ -161,7 +161,7 @@ typedef struct SCTPContext {
 #define D AV_OPT_FLAG_DECODING_PARAM
 #define E AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "listen",          "Listen for incoming connections",  OFFSET(listen),         AV_OPT_TYPE_INT, { .i64 = 0 },     0,       1,         .flags = D|E },
+    { "listen",          "Listen for incoming connections",  OFFSET(listen),         AV_OPT_TYPE_BOOL,{ .i64 = 0 },     0,       1,         .flags = D|E },
     { "timeout",         "Connection timeout (in milliseconds)", OFFSET(timeout),    AV_OPT_TYPE_INT, { .i64 = 10000 }, INT_MIN, INT_MAX,   .flags = D|E },
     { "listen_timeout",  "Bind timeout (in milliseconds)",   OFFSET(listen_timeout), AV_OPT_TYPE_INT, { .i64 = -1 },    INT_MIN, INT_MAX,   .flags = D|E },
     { "max_streams",     "Max stream to allocate",           OFFSET(max_streams), AV_OPT_TYPE_INT, { .i64 = 0 },              0, INT16_MAX, .flags = D|E },
diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index 4d621c7f..2ab37a8f 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -685,7 +685,7 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c,
                 const char *mode;
                 uint64_t vad_option;
 
-                if (c->flags & CODEC_FLAG_QSCALE)
+                if (c->flags & AV_CODEC_FLAG_QSCALE)
                       mode = "on";
                 else if (!av_opt_get_int(c, "vad", AV_OPT_FLAG_ENCODING_PARAM, &vad_option) && vad_option)
                       mode = "vad";
@@ -740,7 +740,7 @@ void ff_sdp_write_media(char *buff, int size, AVStream *st, int idx,
     av_strlcatf(buff, size, "m=%s %d RTP/AVP %d\r\n", type, port, payload_type);
     sdp_write_address(buff, size, dest_addr, dest_type, ttl);
     if (c->bit_rate) {
-        av_strlcatf(buff, size, "b=AS:%d\r\n", c->bit_rate / 1000);
+        av_strlcatf(buff, size, "b=AS:%"PRId64"\r\n", (int64_t)c->bit_rate / 1000);
     }
 
     sdp_write_media_attributes(buff, size, c, payload_type, fmt);
diff --git a/libavformat/sdr2.c b/libavformat/sdr2.c
index 82405f69..edb454ca 100644
--- a/libavformat/sdr2.c
+++ b/libavformat/sdr2.c
@@ -95,7 +95,7 @@ static int sdr2_read_packet(AVFormatContext *s, AVPacket *pkt)
         memcpy(pkt->data, header, 24);
         ret = avio_read(s->pb, pkt->data + 24, next - 52);
         if (ret < 0) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return ret;
         }
         av_shrink_packet(pkt, ret + 24);
diff --git a/libavformat/seek-test.c b/libavformat/seek-test.c
index 1dd041d8..bfd06db3 100644
--- a/libavformat/seek-test.c
+++ b/libavformat/seek-test.c
@@ -56,7 +56,7 @@ static void ts_str(char buffer[60], int64_t ts, AVRational base)
 int main(int argc, char **argv)
 {
     const char *filename;
-    AVFormatContext *ic = NULL;
+    AVFormatContext *ic = avformat_alloc_context();
     int i, ret, stream_id;
     int j;
     int64_t timestamp;
@@ -76,8 +76,10 @@ int main(int argc, char **argv)
             frame_count = atoi(argv[i+1]);
         } else if(!strcmp(argv[i], "-duration")){
             duration = atoi(argv[i+1]);
-        } else if(!strcmp(argv[i], "-usetoc")) {
-            av_dict_set(&format_opts, "usetoc", argv[i+1], 0);
+        } else if(!strcmp(argv[i], "-fastseek")) {
+            if (atoi(argv[i+1])) {
+                ic->flags |= AVFMT_FLAG_FAST_SEEK;
+            }
         } else {
             argc = 1;
         }
@@ -128,7 +130,7 @@ int main(int argc, char **argv)
                 ts_str(dts_buf, pkt.dts, st->time_base);
                 ts_str(ts_buf,  pkt.pts, st->time_base);
                 printf("ret:%-10s st:%2d flags:%d dts:%s pts:%s pos:%7" PRId64 " size:%6d", ret_str(ret), pkt.stream_index, pkt.flags, dts_buf, ts_buf, pkt.pos, pkt.size);
-                av_free_packet(&pkt);
+                av_packet_unref(&pkt);
             } else
                 printf("ret:%s", ret_str(ret)); // necessary to avoid trailing whitespace
             printf("\n");
diff --git a/libavformat/segafilm.c b/libavformat/segafilm.c
index 44fa683a..94b37721 100644
--- a/libavformat/segafilm.c
+++ b/libavformat/segafilm.c
@@ -93,6 +93,7 @@ static int film_read_header(AVFormatContext *s)
     int i, ret;
     unsigned int data_offset;
     unsigned int audio_frame_counter;
+    unsigned int video_frame_counter;
 
     film->sample_table = NULL;
 
@@ -211,7 +212,7 @@ static int film_read_header(AVFormatContext *s)
             avpriv_set_pts_info(st, 64, 1, film->audio_samplerate);
     }
 
-    audio_frame_counter = 0;
+    audio_frame_counter = video_frame_counter = 0;
     for (i = 0; i < film->sample_count; i++) {
         /* load the next sample record and transfer it to an internal struct */
         if (avio_read(pb, scratch, 16) != 16) {
@@ -239,9 +240,22 @@ static int film_read_header(AVFormatContext *s)
             film->sample_table[i].stream = film->video_stream_index;
             film->sample_table[i].pts = AV_RB32(&scratch[8]) & 0x7FFFFFFF;
             film->sample_table[i].keyframe = (scratch[8] & 0x80) ? 0 : 1;
+            video_frame_counter++;
+            if (film->video_type)
+                av_add_index_entry(s->streams[film->video_stream_index],
+                                   film->sample_table[i].sample_offset,
+                                   film->sample_table[i].pts,
+                                   film->sample_table[i].sample_size, 0,
+                                   film->sample_table[i].keyframe);
         }
     }
 
+    if (film->audio_type)
+        s->streams[film->audio_stream_index]->duration = audio_frame_counter;
+
+    if (film->video_type)
+        s->streams[film->video_stream_index]->duration = video_frame_counter;
+
     film->current_sample = 0;
 
     return 0;
@@ -266,8 +280,7 @@ static int film_read_packet(AVFormatContext *s,
     /* position the stream (will probably be there anyway) */
     avio_seek(pb, sample->sample_offset, SEEK_SET);
 
-
-    ret= av_get_packet(pb, pkt, sample->sample_size);
+    ret = av_get_packet(pb, pkt, sample->sample_size);
     if (ret != sample->sample_size)
         ret = AVERROR(EIO);
 
@@ -279,6 +292,24 @@ static int film_read_packet(AVFormatContext *s,
     return ret;
 }
 
+static int film_read_seek(AVFormatContext *s, int stream_index, int64_t timestamp, int flags)
+{
+    FilmDemuxContext *film = s->priv_data;
+    AVStream *st = s->streams[stream_index];
+    int64_t pos;
+    int ret = av_index_search_timestamp(st, timestamp, flags);
+    if (ret < 0)
+        return ret;
+
+    pos = avio_seek(s->pb, st->index_entries[ret].pos, SEEK_SET);
+    if (pos < 0)
+        return pos;
+
+    film->current_sample = ret;
+
+    return 0;
+}
+
 AVInputFormat ff_segafilm_demuxer = {
     .name           = "film_cpk",
     .long_name      = NULL_IF_CONFIG_SMALL("Sega FILM / CPK"),
@@ -287,4 +318,5 @@ AVInputFormat ff_segafilm_demuxer = {
     .read_header    = film_read_header,
     .read_packet    = film_read_packet,
     .read_close     = film_read_close,
+    .read_seek      = film_read_seek,
 };
diff --git a/libavformat/segment.c b/libavformat/segment.c
index 4418d0f3..dd3b0921 100644
--- a/libavformat/segment.c
+++ b/libavformat/segment.c
@@ -30,9 +30,11 @@
 #include <time.h>
 
 #include "avformat.h"
+#include "avio_internal.h"
 #include "internal.h"
 
 #include "libavutil/avassert.h"
+#include "libavutil/internal.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
 #include "libavutil/avstring.h"
@@ -81,6 +83,8 @@ typedef struct SegmentContext {
     int   list_size;       ///< number of entries for the segment list file
 
     int use_clocktime;    ///< flag to cut segments at regular clock time
+    int64_t clocktime_offset; //< clock offset for cutting the segments at regular clock time
+    int64_t clocktime_wrap_duration; //< wrapping duration considered for starting a new segment
     int64_t last_val;      ///< remember last time for wrap around detection
     int64_t last_cut;      ///< remember last cut
     int cut_pending;
@@ -113,6 +117,9 @@ typedef struct SegmentContext {
     int   reference_stream_index;
     int   break_non_keyframes;
 
+    int use_rename;
+    char temp_list_filename[1024];
+
     SegmentListEntry cur_entry;
     SegmentListEntry *segment_list_entries;
     SegmentListEntry *segment_list_entries_end;
@@ -149,6 +156,9 @@ static int segment_mux_init(AVFormatContext *s)
     oc->interrupt_callback = s->interrupt_callback;
     oc->max_delay          = s->max_delay;
     av_dict_copy(&oc->metadata, s->metadata, 0);
+    oc->opaque             = s->opaque;
+    oc->io_close           = s->io_close;
+    oc->io_open            = s->io_open;
 
     for (i = 0; i < s->nb_streams; i++) {
         AVStream *st;
@@ -179,6 +189,7 @@ static int set_segment_filename(AVFormatContext *s)
     SegmentContext *seg = s->priv_data;
     AVFormatContext *oc = seg->avf;
     size_t size;
+    int ret;
 
     if (seg->segment_idx_wrap)
         seg->segment_idx %= seg->segment_idx_wrap;
@@ -202,9 +213,8 @@ static int set_segment_filename(AVFormatContext *s)
     if (seg->entry_prefix)
         size += strlen(seg->entry_prefix);
 
-    seg->cur_entry.filename = av_mallocz(size);
-    if (!seg->cur_entry.filename)
-        return AVERROR(ENOMEM);
+    if ((ret = av_reallocp(&seg->cur_entry.filename, size)) < 0)
+        return ret;
     snprintf(seg->cur_entry.filename, size, "%s%s",
              seg->entry_prefix ? seg->entry_prefix : "",
              av_basename(oc->filename));
@@ -233,8 +243,7 @@ static int segment_start(AVFormatContext *s, int write_header)
     if ((err = set_segment_filename(s)) < 0)
         return err;
 
-    if ((err = avio_open2(&oc->pb, oc->filename, AVIO_FLAG_WRITE,
-                          &s->interrupt_callback, NULL)) < 0) {
+    if ((err = s->io_open(s, &oc->pb, oc->filename, AVIO_FLAG_WRITE, NULL)) < 0) {
         av_log(s, AV_LOG_ERROR, "Failed to open segment '%s'\n", oc->filename);
         return err;
     }
@@ -258,8 +267,8 @@ static int segment_list_open(AVFormatContext *s)
     SegmentContext *seg = s->priv_data;
     int ret;
 
-    ret = avio_open2(&seg->list_pb, seg->list, AVIO_FLAG_WRITE,
-                     &s->interrupt_callback, NULL);
+    snprintf(seg->temp_list_filename, sizeof(seg->temp_list_filename), seg->use_rename ? "%s.tmp" : "%s", seg->list);
+    ret = s->io_open(s, &seg->list_pb, seg->temp_list_filename, AVIO_FLAG_WRITE, NULL);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Failed to open segment list '%s'\n", seg->list);
         return ret;
@@ -347,6 +356,7 @@ static int segment_end(AVFormatContext *s, int write_trailer, int is_last)
 
             /* append new element */
             memcpy(entry, &seg->cur_entry, sizeof(*entry));
+            entry->filename = av_strdup(entry->filename);
             if (!seg->segment_list_entries)
                 seg->segment_list_entries = seg->segment_list_entries_end = entry;
             else
@@ -367,7 +377,9 @@ static int segment_end(AVFormatContext *s, int write_trailer, int is_last)
                 segment_list_print_entry(seg->list_pb, seg->list_type, entry, s);
             if (seg->list_type == LIST_TYPE_M3U8 && is_last)
                 avio_printf(seg->list_pb, "#EXT-X-ENDLIST\n");
-            avio_closep(&seg->list_pb);
+            ff_format_io_close(s, &seg->list_pb);
+            if (seg->use_rename)
+                ff_rename(seg->temp_list_filename, seg->list, s);
         } else {
             segment_list_print_entry(seg->list_pb, seg->list_type, &seg->cur_entry, s);
             avio_flush(seg->list_pb);
@@ -379,7 +391,7 @@ static int segment_end(AVFormatContext *s, int write_trailer, int is_last)
     seg->segment_count++;
 
 end:
-    avio_closep(&oc->pb);
+    ff_format_io_close(oc, &oc->pb);
 
     return ret;
 }
@@ -580,7 +592,7 @@ static int select_reference_stream(AVFormatContext *s)
 
 static void seg_free_context(SegmentContext *seg)
 {
-    avio_closep(&seg->list_pb);
+    ff_format_io_close(seg->avf, &seg->list_pb);
     avformat_free_context(seg->avf);
     seg->avf = NULL;
 }
@@ -625,6 +637,13 @@ static int seg_write_header(AVFormatContext *s)
                    seg->time_str);
             return ret;
         }
+        if (seg->use_clocktime) {
+            if (seg->time <= 0) {
+                av_log(s, AV_LOG_ERROR, "Invalid negative segment_time with segment_atclocktime option set\n");
+                return AVERROR(EINVAL);
+            }
+            seg->clocktime_offset = seg->time - (seg->clocktime_offset % seg->time);
+        }
     }
 
     if (seg->format_options_str) {
@@ -644,9 +663,13 @@ static int seg_write_header(AVFormatContext *s)
             else if (av_match_ext(seg->list, "ffcat,ffconcat")) seg->list_type = LIST_TYPE_FFCONCAT;
             else                                      seg->list_type = LIST_TYPE_FLAT;
         }
-        if (!seg->list_size && seg->list_type != LIST_TYPE_M3U8)
+        if (!seg->list_size && seg->list_type != LIST_TYPE_M3U8) {
             if ((ret = segment_list_open(s)) < 0)
                 goto fail;
+        } else {
+            const char *proto = avio_find_protocol_name(s->filename);
+            seg->use_rename = proto && !strcmp(proto, "file");
+        }
     }
     if (seg->list_type == LIST_TYPE_EXT)
         av_log(s, AV_LOG_WARNING, "'ext' list type option is deprecated in favor of 'csv'\n");
@@ -678,8 +701,9 @@ static int seg_write_header(AVFormatContext *s)
         goto fail;
 
     if (seg->write_header_trailer) {
-        if ((ret = avio_open2(&oc->pb, seg->header_filename ? seg->header_filename : oc->filename, AVIO_FLAG_WRITE,
-                              &s->interrupt_callback, NULL)) < 0) {
+        if ((ret = s->io_open(s, &oc->pb,
+                              seg->header_filename ? seg->header_filename : oc->filename,
+                              AVIO_FLAG_WRITE, NULL)) < 0) {
             av_log(s, AV_LOG_ERROR, "Failed to open segment '%s'\n", oc->filename);
             goto fail;
         }
@@ -700,7 +724,7 @@ static int seg_write_header(AVFormatContext *s)
     }
 
     if (ret < 0) {
-        avio_closep(&oc->pb);
+        ff_format_io_close(oc, &oc->pb);
         goto fail;
     }
     seg->segment_frame_count = 0;
@@ -718,12 +742,11 @@ static int seg_write_header(AVFormatContext *s)
     if (!seg->write_header_trailer || seg->header_filename) {
         if (seg->header_filename) {
             av_write_frame(oc, NULL);
-            avio_closep(&oc->pb);
+            ff_format_io_close(oc, &oc->pb);
         } else {
             close_null_ctxp(&oc->pb);
         }
-        if ((ret = avio_open2(&oc->pb, oc->filename, AVIO_FLAG_WRITE,
-                              &s->interrupt_callback, NULL)) < 0)
+        if ((ret = oc->io_open(oc, &oc->pb, oc->filename, AVIO_FLAG_WRITE, NULL)) < 0)
             goto fail;
         if (!seg->individual_header_trailer)
             oc->pb->seekable = 0;
@@ -763,8 +786,8 @@ static int seg_write_packet(AVFormatContext *s, AVPacket *pkt)
             time_t sec = avgt / 1000000;
             localtime_r(&sec, &ti);
             usecs = (int64_t)(ti.tm_hour * 3600 + ti.tm_min * 60 + ti.tm_sec) * 1000000 + (avgt % 1000000);
-            wrapped_val = usecs % seg->time;
-            if (seg->last_cut != usecs && wrapped_val < seg->last_val) {
+            wrapped_val = (usecs + seg->clocktime_offset) % seg->time;
+            if (seg->last_cut != usecs && wrapped_val < seg->last_val && wrapped_val < seg->clocktime_wrap_duration) {
                 seg->cut_pending = 1;
                 seg->last_cut = usecs;
             }
@@ -774,7 +797,7 @@ static int seg_write_packet(AVFormatContext *s, AVPacket *pkt)
         }
     }
 
-    av_dlog(s, "packet stream:%d pts:%s pts_time:%s duration_time:%s is_key:%d frame:%d\n",
+    ff_dlog(s, "packet stream:%d pts:%s pts_time:%s duration_time:%s is_key:%d frame:%d\n",
             pkt->stream_index, av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, &st->time_base),
             av_ts2timestr(pkt->duration, &st->time_base),
             pkt->flags & AV_PKT_FLAG_KEY,
@@ -860,7 +883,8 @@ static int seg_write_trailer(struct AVFormatContext *s)
     if (!seg->write_header_trailer) {
         if ((ret = segment_end(s, 0, 1)) < 0)
             goto fail;
-        open_null_ctx(&oc->pb);
+        if ((ret = open_null_ctx(&oc->pb)) < 0)
+            goto fail;
         ret = av_write_trailer(oc);
         close_null_ctxp(&oc->pb);
     } else {
@@ -868,12 +892,13 @@ static int seg_write_trailer(struct AVFormatContext *s)
     }
 fail:
     if (seg->list)
-        avio_closep(&seg->list_pb);
+        ff_format_io_close(s, &seg->list_pb);
 
     av_dict_free(&seg->format_options);
     av_opt_free(seg);
     av_freep(&seg->times);
     av_freep(&seg->frames);
+    av_freep(&seg->cur_entry.filename);
 
     cur = seg->segment_list_entries;
     while (cur) {
@@ -911,7 +936,9 @@ static const AVOption options[] = {
     { "m3u8", "M3U8 format",     0, AV_OPT_TYPE_CONST, {.i64=LIST_TYPE_M3U8 }, INT_MIN, INT_MAX, E, "list_type" },
     { "hls", "Apple HTTP Live Streaming compatible", 0, AV_OPT_TYPE_CONST, {.i64=LIST_TYPE_M3U8 }, INT_MIN, INT_MAX, E, "list_type" },
 
-    { "segment_atclocktime",      "set segment to be cut at clocktime",  OFFSET(use_clocktime), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, E},
+    { "segment_atclocktime",      "set segment to be cut at clocktime",  OFFSET(use_clocktime), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, E},
+    { "segment_clocktime_offset", "set segment clocktime offset",        OFFSET(clocktime_offset), AV_OPT_TYPE_DURATION, {.i64 = 0}, 0, 86400000000LL, E},
+    { "segment_clocktime_wrap_duration", "set segment clocktime wrapping duration", OFFSET(clocktime_wrap_duration), AV_OPT_TYPE_DURATION, {.i64 = INT64_MAX}, 0, INT64_MAX, E},
     { "segment_time",      "set segment duration",                       OFFSET(time_str),AV_OPT_TYPE_STRING, {.str = NULL},  0, 0,       E },
     { "segment_time_delta","set approximation value used for the segment times", OFFSET(time_delta), AV_OPT_TYPE_DURATION, {.i64 = 0}, 0, 0, E },
     { "segment_times",     "set segment split time points",              OFFSET(times_str),AV_OPT_TYPE_STRING,{.str = NULL},  0, 0,       E },
@@ -920,12 +947,12 @@ static const AVOption options[] = {
     { "segment_list_entry_prefix", "set base url prefix for segments", OFFSET(entry_prefix), AV_OPT_TYPE_STRING,  {.str = NULL}, 0, 0, E },
     { "segment_start_number", "set the sequence number of the first segment", OFFSET(segment_idx), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, E },
     { "segment_wrap_number", "set the number of wrap before the first segment", OFFSET(segment_idx_wrap_nb), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, E },
-    { "strftime",          "set filename expansion with strftime at segment creation", OFFSET(use_strftime), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, E },
-    { "break_non_keyframes", "allow breaking segments on non-keyframes", OFFSET(break_non_keyframes), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, E },
+    { "strftime",          "set filename expansion with strftime at segment creation", OFFSET(use_strftime), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, E },
+    { "break_non_keyframes", "allow breaking segments on non-keyframes", OFFSET(break_non_keyframes), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, E },
 
-    { "individual_header_trailer", "write header/trailer to each segment", OFFSET(individual_header_trailer), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, E },
-    { "write_header_trailer", "write a header to the first segment and a trailer to the last one", OFFSET(write_header_trailer), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, E },
-    { "reset_timestamps", "reset timestamps at the begin of each segment", OFFSET(reset_timestamps), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, E },
+    { "individual_header_trailer", "write header/trailer to each segment", OFFSET(individual_header_trailer), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, E },
+    { "write_header_trailer", "write a header to the first segment and a trailer to the last one", OFFSET(write_header_trailer), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, E },
+    { "reset_timestamps", "reset timestamps at the begin of each segment", OFFSET(reset_timestamps), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, E },
     { "initial_offset", "set initial timestamp offset", OFFSET(initial_offset), AV_OPT_TYPE_DURATION, {.i64 = 0}, -INT64_MAX, INT64_MAX, E },
     { NULL },
 };
diff --git a/libavformat/shortendec.c b/libavformat/shortendec.c
new file mode 100644
index 00000000..42fcdf75
--- /dev/null
+++ b/libavformat/shortendec.c
@@ -0,0 +1,71 @@
+/*
+ * Shorten demuxer
+ * Copyright (c) 2001 Fabrice Bellard
+ * Copyright (c) 2005 Alex Beregszaszi
+ * Copyright (c) 2015 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "rawdec.h"
+#include "libavcodec/golomb.h"
+
+static int shn_probe(AVProbeData *p)
+{
+    GetBitContext gb;
+    int version, internal_ftype, channels, blocksize;
+
+    if (AV_RB32(p->buf) != 0x616a6b67)
+        return 0;
+    version = p->buf[4];
+    if (init_get_bits8(&gb, p->buf + 5, p->buf_size - 5 - AV_INPUT_BUFFER_PADDING_SIZE) < 0)
+        return 0;
+    if (!version) {
+        internal_ftype = get_ur_golomb_shorten(&gb, 4);
+        channels = get_ur_golomb_shorten(&gb, 0);
+        blocksize = 256;
+    } else {
+        int k;
+        k = get_ur_golomb_shorten(&gb, 2);
+        internal_ftype = get_ur_golomb_shorten(&gb, k);
+        k = get_ur_golomb_shorten(&gb, 2);
+        channels = get_ur_golomb_shorten(&gb, k);
+        k = get_ur_golomb_shorten(&gb, 2);
+        blocksize = get_ur_golomb_shorten(&gb, k);
+    }
+
+    if (internal_ftype != 2 && internal_ftype != 3 && internal_ftype != 5)
+        return 0;
+    if (channels < 1 || channels > 8)
+        return 0;
+    if (blocksize < 1 || blocksize > 65535)
+        return 0;
+
+    return AVPROBE_SCORE_EXTENSION + 1;
+}
+
+AVInputFormat ff_shorten_demuxer = {
+    .name           = "shn",
+    .long_name      = NULL_IF_CONFIG_SMALL("raw Shorten"),
+    .read_probe     = shn_probe,
+    .read_header    = ff_raw_audio_read_header,
+    .read_packet    = ff_raw_read_partial_packet,
+    .flags          = AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH | AVFMT_NO_BYTE_SEEK | AVFMT_NOTIMESTAMPS,
+    .extensions     = "shn",
+    .raw_codec_id   = AV_CODEC_ID_SHORTEN,
+};
diff --git a/libavformat/sierravmd.c b/libavformat/sierravmd.c
index 1ededc71..2ab0e38a 100644
--- a/libavformat/sierravmd.c
+++ b/libavformat/sierravmd.c
@@ -288,7 +288,7 @@ static int vmd_read_packet(AVFormatContext *s,
             frame->frame_size);
 
     if (ret != frame->frame_size) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         ret = AVERROR(EIO);
     }
     pkt->stream_index = frame->stream_index;
diff --git a/libavformat/siff.c b/libavformat/siff.c
index b6ea3905..028f18be 100644
--- a/libavformat/siff.c
+++ b/libavformat/siff.c
@@ -219,7 +219,7 @@ static int siff_read_packet(AVFormatContext *s, AVPacket *pkt)
             if (c->gmcsize)
                 memcpy(pkt->data + 2, c->gmc, c->gmcsize);
             if (avio_read(s->pb, pkt->data + 2 + c->gmcsize, size) != size) {
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 return AVERROR_INVALIDDATA;
             }
             pkt->stream_index = 0;
diff --git a/libavformat/smacker.c b/libavformat/smacker.c
index 5dcf4ada..de8bbdb0 100644
--- a/libavformat/smacker.c
+++ b/libavformat/smacker.c
@@ -120,6 +120,11 @@ static int smacker_read_header(AVFormatContext *s)
     smk->height = avio_rl32(pb);
     smk->frames = avio_rl32(pb);
     smk->pts_inc = (int32_t)avio_rl32(pb);
+    if (smk->pts_inc > INT_MAX / 100) {
+        av_log(s, AV_LOG_ERROR, "pts_inc %d is too large\n", smk->pts_inc);
+        return AVERROR_INVALIDDATA;
+    }
+
     smk->flags = avio_rl32(pb);
     if(smk->flags & SMACKER_FLAG_RING_FRAME)
         smk->frames++;
diff --git a/libavformat/smoothstreamingenc.c b/libavformat/smoothstreamingenc.c
index 07173a96..f36e5fee 100644
--- a/libavformat/smoothstreamingenc.c
+++ b/libavformat/smoothstreamingenc.c
@@ -26,6 +26,7 @@
 #endif
 
 #include "avformat.h"
+#include "avio_internal.h"
 #include "internal.h"
 #include "os_support.h"
 #include "avc.h"
@@ -121,7 +122,8 @@ static int64_t ism_seek(void *opaque, int64_t offset, int whence)
             AVDictionary *opts = NULL;
             os->tail_out = os->out;
             av_dict_set(&opts, "truncate", "0", 0);
-            ret = ffurl_open(&os->out, frag->file, AVIO_FLAG_READ_WRITE, &os->ctx->interrupt_callback, &opts);
+            ret = ffurl_open_whitelist(&os->out, frag->file, AVIO_FLAG_READ_WRITE,
+                                       &os->ctx->interrupt_callback, &opts, os->ctx->protocol_whitelist);
             av_dict_free(&opts);
             if (ret < 0) {
                 os->out = os->tail_out;
@@ -129,7 +131,8 @@ static int64_t ism_seek(void *opaque, int64_t offset, int whence)
                 return ret;
             }
             av_dict_set(&opts, "truncate", "0", 0);
-            ffurl_open(&os->out2, frag->infofile, AVIO_FLAG_READ_WRITE, &os->ctx->interrupt_callback, &opts);
+            ffurl_open_whitelist(&os->out2, frag->infofile, AVIO_FLAG_READ_WRITE,
+                                 &os->ctx->interrupt_callback, &opts, os->ctx->protocol_whitelist);
             av_dict_free(&opts);
             ffurl_seek(os->out, offset - frag->start_pos, SEEK_SET);
             if (os->out2)
@@ -220,7 +223,7 @@ static int write_manifest(AVFormatContext *s, int final)
 
     snprintf(filename, sizeof(filename), "%s/Manifest", s->filename);
     snprintf(temp_filename, sizeof(temp_filename), "%s/Manifest.tmp", s->filename);
-    ret = avio_open2(&out, temp_filename, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL);
+    ret = s->io_open(s, &out, temp_filename, AVIO_FLAG_WRITE, NULL);
     if (ret < 0) {
         av_log(s, AV_LOG_ERROR, "Unable to open %s for writing\n", temp_filename);
         return ret;
@@ -260,7 +263,7 @@ static int write_manifest(AVFormatContext *s, int final)
             if (s->streams[i]->codec->codec_type != AVMEDIA_TYPE_VIDEO)
                 continue;
             last = i;
-            avio_printf(out, "<QualityLevel Index=\"%d\" Bitrate=\"%d\" FourCC=\"%s\" MaxWidth=\"%d\" MaxHeight=\"%d\" CodecPrivateData=\"%s\" />\n", index, s->streams[i]->codec->bit_rate, os->fourcc, s->streams[i]->codec->width, s->streams[i]->codec->height, os->private_str);
+            avio_printf(out, "<QualityLevel Index=\"%d\" Bitrate=\"%"PRId64"\" FourCC=\"%s\" MaxWidth=\"%d\" MaxHeight=\"%d\" CodecPrivateData=\"%s\" />\n", index, (int64_t)s->streams[i]->codec->bit_rate, os->fourcc, s->streams[i]->codec->width, s->streams[i]->codec->height, os->private_str);
             index++;
         }
         output_chunk_list(&c->streams[last], out, final, c->lookahead_count, c->window_size);
@@ -274,7 +277,7 @@ static int write_manifest(AVFormatContext *s, int final)
             if (s->streams[i]->codec->codec_type != AVMEDIA_TYPE_AUDIO)
                 continue;
             last = i;
-            avio_printf(out, "<QualityLevel Index=\"%d\" Bitrate=\"%d\" FourCC=\"%s\" SamplingRate=\"%d\" Channels=\"%d\" BitsPerSample=\"16\" PacketSize=\"%d\" AudioTag=\"%d\" CodecPrivateData=\"%s\" />\n", index, s->streams[i]->codec->bit_rate, os->fourcc, s->streams[i]->codec->sample_rate, s->streams[i]->codec->channels, os->packet_size, os->audio_tag, os->private_str);
+            avio_printf(out, "<QualityLevel Index=\"%d\" Bitrate=\"%"PRId64"\" FourCC=\"%s\" SamplingRate=\"%d\" Channels=\"%d\" BitsPerSample=\"16\" PacketSize=\"%d\" AudioTag=\"%d\" CodecPrivateData=\"%s\" />\n", index, (int64_t)s->streams[i]->codec->bit_rate, os->fourcc, s->streams[i]->codec->sample_rate, s->streams[i]->codec->channels, os->packet_size, os->audio_tag, os->private_str);
             index++;
         }
         output_chunk_list(&c->streams[last], out, final, c->lookahead_count, c->window_size);
@@ -282,7 +285,7 @@ static int write_manifest(AVFormatContext *s, int final)
     }
     avio_printf(out, "</SmoothStreamingMedia>\n");
     avio_flush(out);
-    avio_close(out);
+    ff_format_io_close(s, &out);
     return ff_rename(temp_filename, filename, s);
 }
 
@@ -321,7 +324,7 @@ static int ism_write_header(AVFormatContext *s)
             ret = AVERROR(EINVAL);
             goto fail;
         }
-        snprintf(os->dirname, sizeof(os->dirname), "%s/QualityLevels(%d)", s->filename, s->streams[i]->codec->bit_rate);
+        snprintf(os->dirname, sizeof(os->dirname), "%s/QualityLevels(%"PRId64")", s->filename, (int64_t)s->streams[i]->codec->bit_rate);
         if (mkdir(os->dirname, 0777) == -1 && errno != EEXIST) {
             ret = AVERROR(errno);
             av_log(s, AV_LOG_ERROR, "mkdir failed\n");
@@ -329,7 +332,7 @@ static int ism_write_header(AVFormatContext *s)
         }
 
         ctx = avformat_alloc_context();
-        if (!ctx) {
+        if (!ctx || ff_copy_whitelists(ctx, s) < 0) {
             ret = AVERROR(ENOMEM);
             goto fail;
         }
@@ -409,7 +412,7 @@ static int parse_fragment(AVFormatContext *s, const char *filename, int64_t *sta
     AVIOContext *in;
     int ret;
     uint32_t len;
-    if ((ret = avio_open2(&in, filename, AVIO_FLAG_READ, &s->interrupt_callback, NULL)) < 0)
+    if ((ret = s->io_open(s, &in, filename, AVIO_FLAG_READ, NULL)) < 0)
         return ret;
     ret = AVERROR(EIO);
     *moof_size = avio_rb32(in);
@@ -450,7 +453,7 @@ static int parse_fragment(AVFormatContext *s, const char *filename, int64_t *sta
         avio_seek(in, end, SEEK_SET);
     }
 fail:
-    avio_close(in);
+    ff_format_io_close(s, &in);
     return ret;
 }
 
@@ -486,10 +489,10 @@ static int copy_moof(AVFormatContext *s, const char* infile, const char *outfile
 {
     AVIOContext *in, *out;
     int ret = 0;
-    if ((ret = avio_open2(&in, infile, AVIO_FLAG_READ, &s->interrupt_callback, NULL)) < 0)
+    if ((ret = s->io_open(s, &in, infile, AVIO_FLAG_READ, NULL)) < 0)
         return ret;
-    if ((ret = avio_open2(&out, outfile, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL)) < 0) {
-        avio_close(in);
+    if ((ret = s->io_open(s, &out, outfile, AVIO_FLAG_WRITE, NULL)) < 0) {
+        ff_format_io_close(s, &in);
         return ret;
     }
     while (size > 0) {
@@ -504,8 +507,8 @@ static int copy_moof(AVFormatContext *s, const char* infile, const char *outfile
         size -= n;
     }
     avio_flush(out);
-    avio_close(out);
-    avio_close(in);
+    ff_format_io_close(s, &out);
+    ff_format_io_close(s, &in);
     return ret;
 }
 
@@ -523,7 +526,7 @@ static int ism_flush(AVFormatContext *s, int final)
             continue;
 
         snprintf(filename, sizeof(filename), "%s/temp", os->dirname);
-        ret = ffurl_open(&os->out, filename, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL);
+        ret = ffurl_open_whitelist(&os->out, filename, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL, s->protocol_whitelist);
         if (ret < 0)
             break;
         os->cur_start_pos = os->tail_pos;
@@ -622,7 +625,7 @@ static const AVOption options[] = {
     { "extra_window_size", "number of fragments kept outside of the manifest before removing from disk", OFFSET(extra_window_size), AV_OPT_TYPE_INT, { .i64 = 5 }, 0, INT_MAX, E },
     { "lookahead_count", "number of lookahead fragments", OFFSET(lookahead_count), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, E },
     { "min_frag_duration", "minimum fragment duration (in microseconds)", OFFSET(min_frag_duration), AV_OPT_TYPE_INT64, { .i64 = 5000000 }, 0, INT_MAX, E },
-    { "remove_at_exit", "remove all fragments when finished", OFFSET(remove_at_exit), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, E },
+    { "remove_at_exit", "remove all fragments when finished", OFFSET(remove_at_exit), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, E },
     { NULL },
 };
 
diff --git a/libavformat/spdifdec.c b/libavformat/spdifdec.c
index 7c04afaf..a2f06a97 100644
--- a/libavformat/spdifdec.c
+++ b/libavformat/spdifdec.c
@@ -193,7 +193,7 @@ int ff_spdif_read_packet(AVFormatContext *s, AVPacket *pkt)
     pkt->pos = avio_tell(pb) - BURST_HEADER_SIZE;
 
     if (avio_read(pb, pkt->data, pkt->size) < pkt->size) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return AVERROR_EOF;
     }
     ff_spdif_bswap_buf16((uint16_t *)pkt->data, (uint16_t *)pkt->data, pkt->size >> 1);
@@ -201,7 +201,7 @@ int ff_spdif_read_packet(AVFormatContext *s, AVPacket *pkt)
     ret = spdif_get_offset_and_codec(s, data_type, pkt->data,
                                      &offset, &codec_id);
     if (ret) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return ret;
     }
 
@@ -212,7 +212,7 @@ int ff_spdif_read_packet(AVFormatContext *s, AVPacket *pkt)
         /* first packet, create a stream */
         AVStream *st = avformat_new_stream(s, NULL);
         if (!st) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return AVERROR(ENOMEM);
         }
         st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
diff --git a/libavformat/spdifenc.c b/libavformat/spdifenc.c
index cdcff244..64c5f388 100644
--- a/libavformat/spdifenc.c
+++ b/libavformat/spdifenc.c
@@ -463,6 +463,7 @@ static int spdif_write_header(AVFormatContext *s)
         ctx->header_info = spdif_header_aac;
         break;
     case AV_CODEC_ID_TRUEHD:
+    case AV_CODEC_ID_MLP:
         ctx->header_info = spdif_header_truehd;
         ctx->hd_buf = av_malloc(MAT_FRAME_SIZE);
         if (!ctx->hd_buf)
@@ -525,7 +526,7 @@ static int spdif_write_packet(struct AVFormatContext *s, AVPacket *pkt)
     if (ctx->extra_bswap ^ (ctx->spdif_flags & SPDIF_FLAG_BIGENDIAN)) {
         avio_write(s->pb, ctx->out_buf, ctx->out_bytes & ~1);
     } else {
-        av_fast_malloc(&ctx->buffer, &ctx->buffer_size, ctx->out_bytes + FF_INPUT_BUFFER_PADDING_SIZE);
+        av_fast_malloc(&ctx->buffer, &ctx->buffer_size, ctx->out_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
         if (!ctx->buffer)
             return AVERROR(ENOMEM);
         ff_spdif_bswap_buf16((uint16_t *)ctx->buffer, (uint16_t *)ctx->out_buf, ctx->out_bytes >> 1);
diff --git a/libavformat/srtdec.c b/libavformat/srtdec.c
index b35e50fc..6113f705 100644
--- a/libavformat/srtdec.c
+++ b/libavformat/srtdec.c
@@ -1,6 +1,7 @@
 /*
  * SubRip subtitle demuxer
  * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (c) 2015  Clément Bœsch <u pkh me>
  *
  * This file is part of FFmpeg.
  *
@@ -41,43 +42,82 @@ static int srt_probe(AVProbeData *p)
         ff_text_r8(&tr);
 
     /* Check if the first non-empty line is a number. We do not check what the
-     * number is because in practice it can be anything. */
+     * number is because in practice it can be anything.
+     * Also, that number can be followed by random garbage, so we can not
+     * unfortunately check that we only have a number. */
     if (ff_subtitles_read_line(&tr, buf, sizeof(buf)) < 0 ||
-        strtol(buf, &pbuf, 10) < 0 || *pbuf)
+        strtol(buf, &pbuf, 10) < 0 || pbuf == buf)
         return 0;
 
     /* Check if the next line matches a SRT timestamp */
     if (ff_subtitles_read_line(&tr, buf, sizeof(buf)) < 0)
         return 0;
-    if (buf[0] >= '0' && buf[1] <= '9' && strstr(buf, " --> ")
+    if (buf[0] >= '0' && buf[0] <= '9' && strstr(buf, " --> ")
         && sscanf(buf, "%*d:%*2d:%*2d%*1[,.]%*3d --> %*d:%*2d:%*2d%*1[,.]%3d", &v) == 1)
         return AVPROBE_SCORE_MAX;
 
     return 0;
 }
 
-static int64_t get_pts(const char **buf, int *duration,
-                       int32_t *x1, int32_t *y1, int32_t *x2, int32_t *y2)
+struct event_info {
+    int32_t x1, x2, y1, y2;
+    int duration;
+    int64_t pts;
+    int64_t pos;
+};
+
+static int get_event_info(const char *line, struct event_info *ei)
+{
+    int hh1, mm1, ss1, ms1;
+    int hh2, mm2, ss2, ms2;
+
+    ei->x1 = ei->x2 = ei->y1 = ei->y2 = ei->duration = -1;
+    ei->pts = AV_NOPTS_VALUE;
+    ei->pos = -1;
+    if (sscanf(line, "%d:%2d:%2d%*1[,.]%3d --> %d:%2d:%2d%*1[,.]%3d"
+               "%*[ ]X1:%u X2:%u Y1:%u Y2:%u",
+               &hh1, &mm1, &ss1, &ms1,
+               &hh2, &mm2, &ss2, &ms2,
+               &ei->x1, &ei->x2, &ei->y1, &ei->y2) >= 8) {
+        const int64_t start = (hh1*3600LL + mm1*60LL + ss1) * 1000LL + ms1;
+        const int64_t end   = (hh2*3600LL + mm2*60LL + ss2) * 1000LL + ms2;
+        ei->duration = end - start;
+        ei->pts = start;
+        return 0;
+    }
+    return -1;
+}
+
+static int add_event(FFDemuxSubtitlesQueue *q, AVBPrint *buf, char *line_cache,
+                     const struct event_info *ei, int append_cache)
 {
-    int i;
-
-    for (i=0; i<2; i++) {
-        int hh1, mm1, ss1, ms1;
-        int hh2, mm2, ss2, ms2;
-        if (sscanf(*buf, "%d:%2d:%2d%*1[,.]%3d --> %d:%2d:%2d%*1[,.]%3d"
-                   "%*[ ]X1:%u X2:%u Y1:%u Y2:%u",
-                   &hh1, &mm1, &ss1, &ms1,
-                   &hh2, &mm2, &ss2, &ms2,
-                   x1, x2, y1, y2) >= 8) {
-            int64_t start = (hh1*3600LL + mm1*60LL + ss1) * 1000LL + ms1;
-            int64_t end   = (hh2*3600LL + mm2*60LL + ss2) * 1000LL + ms2;
-            *duration = end - start;
-            *buf += ff_subtitles_next_line(*buf);
-            return start;
+    if (append_cache && line_cache[0])
+        av_bprintf(buf, "%s\n", line_cache);
+    line_cache[0] = 0;
+
+    while (buf->len > 0 && buf->str[buf->len - 1] == '\n')
+        buf->str[--buf->len] = 0;
+
+    if (buf->len) {
+        AVPacket *sub = ff_subtitles_queue_insert(q, buf->str, buf->len, 0);
+        if (!sub)
+            return AVERROR(ENOMEM);
+        av_bprint_clear(buf);
+        sub->pos = ei->pos;
+        sub->pts = ei->pts;
+        sub->duration = ei->duration;
+        if (ei->x1 != -1) {
+            uint8_t *p = av_packet_new_side_data(sub, AV_PKT_DATA_SUBTITLE_POSITION, 16);
+            if (p) {
+                AV_WL32(p,      ei->x1);
+                AV_WL32(p +  4, ei->y1);
+                AV_WL32(p +  8, ei->x2);
+                AV_WL32(p + 12, ei->y2);
+            }
         }
-        *buf += ff_subtitles_next_line(*buf);
     }
-    return AV_NOPTS_VALUE;
+
+    return 0;
 }
 
 static int srt_read_header(AVFormatContext *s)
@@ -86,6 +126,9 @@ static int srt_read_header(AVFormatContext *s)
     AVBPrint buf;
     AVStream *st = avformat_new_stream(s, NULL);
     int res = 0;
+    char line[4096], line_cache[4096];
+    int has_event_info = 0;
+    struct event_info ei;
     FFTextReader tr;
     ff_text_init_avio(s, &tr, s->pb);
 
@@ -97,44 +140,68 @@ static int srt_read_header(AVFormatContext *s)
 
     av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
 
+    line_cache[0] = 0;
+
     while (!ff_text_eof(&tr)) {
-        ff_subtitles_read_text_chunk(&tr, &buf);
-
-        if (buf.len) {
-            int64_t pos = ff_text_pos(&tr);
-            int64_t pts;
-            int duration;
-            const char *ptr = buf.str;
-            int32_t x1 = -1, y1 = -1, x2 = -1, y2 = -1;
-            AVPacket *sub;
-
-            pts = get_pts(&ptr, &duration, &x1, &y1, &x2, &y2);
-            if (pts != AV_NOPTS_VALUE) {
-                int len = buf.len - (ptr - buf.str);
-                if (len <= 0)
-                    continue;
-                sub = ff_subtitles_queue_insert(&srt->q, ptr, len, 0);
-                if (!sub) {
-                    res = AVERROR(ENOMEM);
+        struct event_info tmp_ei;
+        const int64_t pos = ff_text_pos(&tr);
+        ptrdiff_t len = ff_subtitles_read_line(&tr, line, sizeof(line));
+
+        if (len < 0)
+            break;
+
+        if (!len || !line[0])
+            continue;
+
+        if (get_event_info(line, &tmp_ei) < 0) {
+            char *pline;
+
+            if (!has_event_info)
+                continue;
+
+            if (line_cache[0]) {
+                /* We got some cache and a new line so we assume the cached
+                 * line was actually part of the payload */
+                av_bprintf(&buf, "%s\n", line_cache);
+                line_cache[0] = 0;
+            }
+
+            /* If the line doesn't start with a number, we assume it's part of
+             * the payload, otherwise is likely an event number preceding the
+             * timing information... but we can't be sure of this yet, so we
+             * cache it */
+            if (strtol(line, &pline, 10) < 0 || line == pline)
+                av_bprintf(&buf, "%s\n", line);
+            else
+                strcpy(line_cache, line);
+        } else {
+            if (has_event_info) {
+                /* We have the information of previous event, append it to the
+                 * queue. We insert the cached line if and only if the payload
+                 * is empty and the cached line is not a standalone number. */
+                char *pline = NULL;
+                const int standalone_number = strtol(line_cache, &pline, 10) >= 0 && pline && !*pline;
+                res = add_event(&srt->q, &buf, line_cache, &ei, !buf.len && !standalone_number);
+                if (res < 0)
                     goto end;
-                }
-                sub->pos = pos;
-                sub->pts = pts;
-                sub->duration = duration;
-                if (x1 != -1) {
-                    uint8_t *p = av_packet_new_side_data(sub, AV_PKT_DATA_SUBTITLE_POSITION, 16);
-                    if (p) {
-                        AV_WL32(p,      x1);
-                        AV_WL32(p +  4, y1);
-                        AV_WL32(p +  8, x2);
-                        AV_WL32(p + 12, y2);
-                    }
-                }
+            } else {
+                has_event_info = 1;
             }
+            tmp_ei.pos = pos;
+            ei = tmp_ei;
         }
     }
 
-    ff_subtitles_queue_finalize(&srt->q);
+    /* Append the last event. Here we force the cache to be flushed, because a
+     * trailing number is more likely to be geniune (for example a copyright
+     * date) and not the event index of an inexistant event */
+    if (has_event_info) {
+        res = add_event(&srt->q, &buf, line_cache, &ei, 1);
+        if (res < 0)
+            goto end;
+    }
+
+    ff_subtitles_queue_finalize(s, &srt->q);
 
 end:
     av_bprint_finalize(&buf, NULL);
diff --git a/libavformat/srtenc.c b/libavformat/srtenc.c
index 9bb83d64..24c25ec6 100644
--- a/libavformat/srtenc.c
+++ b/libavformat/srtenc.c
@@ -60,38 +60,41 @@ static int srt_write_packet(AVFormatContext *avf, AVPacket *pkt)
 {
     SRTContext *srt = avf->priv_data;
 
-    // TODO: reindent
-        int64_t s = pkt->pts, e, d = pkt->duration;
-        int size, x1 = -1, y1 = -1, x2 = -1, y2 = -1;
-        const uint8_t *p;
+    int64_t s = pkt->pts, e, d = pkt->duration;
+    int size, x1 = -1, y1 = -1, x2 = -1, y2 = -1;
+    const uint8_t *p;
 
-        p = av_packet_get_side_data(pkt, AV_PKT_DATA_SUBTITLE_POSITION, &size);
-        if (p && size == 16) {
-            x1 = AV_RL32(p     );
-            y1 = AV_RL32(p +  4);
-            x2 = AV_RL32(p +  8);
-            y2 = AV_RL32(p + 12);
-        }
+    p = av_packet_get_side_data(pkt, AV_PKT_DATA_SUBTITLE_POSITION, &size);
+    if (p && size == 16) {
+        x1 = AV_RL32(p     );
+        y1 = AV_RL32(p +  4);
+        x2 = AV_RL32(p +  8);
+        y2 = AV_RL32(p + 12);
+    }
 
-        if (d <= 0)
-            /* For backward compatibility, fallback to convergence_duration. */
-            d = pkt->convergence_duration;
-        if (s == AV_NOPTS_VALUE || d < 0) {
-            av_log(avf, AV_LOG_WARNING,
-                   "Insufficient timestamps in event number %d.\n", srt->index);
-            return 0;
-        }
-        e = s + d;
-        avio_printf(avf->pb, "%d\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d",
-                       srt->index,
-                       (int)(s / 3600000),      (int)(s / 60000) % 60,
-                       (int)(s /    1000) % 60, (int)(s %  1000),
-                       (int)(e / 3600000),      (int)(e / 60000) % 60,
-                       (int)(e /    1000) % 60, (int)(e %  1000));
-        if (p)
-            avio_printf(avf->pb, "  X1:%03d X2:%03d Y1:%03d Y2:%03d",
-                        x1, x2, y1, y2);
-        avio_printf(avf->pb, "\n");
+#if FF_API_CONVERGENCE_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (d <= 0)
+        /* For backward compatibility, fallback to convergence_duration. */
+        d = pkt->convergence_duration;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    if (s == AV_NOPTS_VALUE || d < 0) {
+        av_log(avf, AV_LOG_WARNING,
+               "Insufficient timestamps in event number %d.\n", srt->index);
+        return 0;
+    }
+    e = s + d;
+    avio_printf(avf->pb, "%d\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d",
+                   srt->index,
+                   (int)(s / 3600000),      (int)(s / 60000) % 60,
+                   (int)(s /    1000) % 60, (int)(s %  1000),
+                   (int)(e / 3600000),      (int)(e / 60000) % 60,
+                   (int)(e /    1000) % 60, (int)(e %  1000));
+    if (p)
+        avio_printf(avf->pb, "  X1:%03d X2:%03d Y1:%03d Y2:%03d",
+                    x1, x2, y1, y2);
+    avio_printf(avf->pb, "\n");
 
     avio_write(avf->pb, pkt->data, pkt->size);
     avio_write(avf->pb, "\n\n", 2);
diff --git a/libavformat/srtpproto.c b/libavformat/srtpproto.c
index 0124696a..460799ae 100644
--- a/libavformat/srtpproto.c
+++ b/libavformat/srtpproto.c
@@ -80,7 +80,8 @@ static int srtp_open(URLContext *h, const char *uri, int flags)
     av_url_split(NULL, 0, NULL, 0, hostname, sizeof(hostname), &rtp_port,
                  path, sizeof(path), uri);
     ff_url_join(buf, sizeof(buf), "rtp", NULL, hostname, rtp_port, "%s", path);
-    if ((ret = ffurl_open(&s->rtp_hd, buf, flags, &h->interrupt_callback, NULL)) < 0)
+    if ((ret = ffurl_open_whitelist(&s->rtp_hd, buf, flags, &h->interrupt_callback,
+                                    NULL, h->protocol_whitelist)) < 0)
         goto fail;
 
     h->max_packet_size = FFMIN(s->rtp_hd->max_packet_size,
diff --git a/libavformat/stldec.c b/libavformat/stldec.c
index b84c7e9e..8b1f0a6d 100644
--- a/libavformat/stldec.c
+++ b/libavformat/stldec.c
@@ -104,7 +104,7 @@ static int stl_read_header(AVFormatContext *s)
             sub->duration = duration;
         }
     }
-    ff_subtitles_queue_finalize(&stl->q);
+    ff_subtitles_queue_finalize(s, &stl->q);
     return 0;
 }
 static int stl_read_packet(AVFormatContext *s, AVPacket *pkt)
diff --git a/libavformat/subfile.c b/libavformat/subfile.c
index 0e843846..2b53438c 100644
--- a/libavformat/subfile.c
+++ b/libavformat/subfile.c
@@ -77,7 +77,8 @@ static int subfile_open(URLContext *h, const char *filename, int flags,
         return AVERROR(EINVAL);
     }
     av_strstart(filename, "subfile:", &filename);
-    ret = ffurl_open(&c->h, filename, flags, &h->interrupt_callback, options);
+    ret = ffurl_open_whitelist(&c->h, filename, flags, &h->interrupt_callback,
+                               options, h->protocol_whitelist);
     if (ret < 0)
         return ret;
     c->pos = c->start;
@@ -144,4 +145,5 @@ URLProtocol ff_subfile_protocol = {
     .url_close           = subfile_close,
     .priv_data_size      = sizeof(SubfileContext),
     .priv_data_class     = &subfile_class,
+    .default_whitelist   = "file",
 };
diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c
index 5bdbc8dc..108f909c 100644
--- a/libavformat/subtitles.c
+++ b/libavformat/subtitles.c
@@ -146,12 +146,9 @@ static int cmp_pkt_sub_ts_pos(const void *a, const void *b)
 {
     const AVPacket *s1 = a;
     const AVPacket *s2 = b;
-    if (s1->pts == s2->pts) {
-        if (s1->pos == s2->pos)
-            return 0;
-        return s1->pos > s2->pos ? 1 : -1;
-    }
-    return s1->pts > s2->pts ? 1 : -1;
+    if (s1->pts == s2->pts)
+        return FFDIFFSIGN(s1->pos, s2->pos);
+    return FFDIFFSIGN(s1->pts , s2->pts);
 }
 
 static int cmp_pkt_sub_pos_ts(const void *a, const void *b)
@@ -166,7 +163,34 @@ static int cmp_pkt_sub_pos_ts(const void *a, const void *b)
     return s1->pos > s2->pos ? 1 : -1;
 }
 
-void ff_subtitles_queue_finalize(FFDemuxSubtitlesQueue *q)
+static void drop_dups(void *log_ctx, FFDemuxSubtitlesQueue *q)
+{
+    int i, drop = 0;
+
+    for (i = 1; i < q->nb_subs; i++) {
+        const int last_id = i - 1 - drop;
+        const AVPacket *last = &q->subs[last_id];
+
+        if (q->subs[i].pts        == last->pts &&
+            q->subs[i].duration   == last->duration &&
+            q->subs[i].stream_index == last->stream_index &&
+            !strcmp(q->subs[i].data, last->data)) {
+
+            av_packet_unref(&q->subs[i]);
+            drop++;
+        } else if (drop) {
+            q->subs[last_id + 1] = q->subs[i];
+            memset(&q->subs[i], 0, sizeof(q->subs[i])); // for safety
+        }
+    }
+
+    if (drop) {
+        q->nb_subs -= drop;
+        av_log(log_ctx, AV_LOG_WARNING, "Dropping %d duplicated subtitle events\n", drop);
+    }
+}
+
+void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q)
 {
     int i;
 
@@ -174,8 +198,11 @@ void ff_subtitles_queue_finalize(FFDemuxSubtitlesQueue *q)
           q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos
                                      : cmp_pkt_sub_pos_ts);
     for (i = 0; i < q->nb_subs; i++)
-        if (q->subs[i].duration == -1 && i < q->nb_subs - 1)
+        if (q->subs[i].duration < 0 && i < q->nb_subs - 1)
             q->subs[i].duration = q->subs[i + 1].pts - q->subs[i].pts;
+
+    if (!q->keep_duplicates)
+        drop_dups(log_ctx, q);
 }
 
 int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt)
@@ -272,7 +299,7 @@ void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q)
     int i;
 
     for (i = 0; i < q->nb_subs; i++)
-        av_free_packet(&q->subs[i]);
+        av_packet_unref(&q->subs[i]);
     av_freep(&q->subs);
     q->nb_subs = q->allocated_size = q->current_sub_idx = 0;
 }
diff --git a/libavformat/subtitles.h b/libavformat/subtitles.h
index 885285cc..ca78db22 100644
--- a/libavformat/subtitles.h
+++ b/libavformat/subtitles.h
@@ -105,6 +105,7 @@ typedef struct {
     int allocated_size;     ///< allocated size for subs
     int current_sub_idx;    ///< current position for the read packet callback
     enum sub_sort sort;     ///< sort method to use when finalizing subtitles
+    int keep_duplicates;    ///< set to 1 to keep duplicated subtitle events
 } FFDemuxSubtitlesQueue;
 
 /**
@@ -119,9 +120,10 @@ AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q,
                                     const uint8_t *event, size_t len, int merge);
 
 /**
- * Set missing durations and sort subtitles by PTS, and then byte position.
+ * Set missing durations, sort subtitles by PTS (and then byte position), and
+ * drop duplicated events.
  */
-void ff_subtitles_queue_finalize(FFDemuxSubtitlesQueue *q);
+void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q);
 
 /**
  * Generic read_packet() callback for subtitles demuxers using this queue
diff --git a/libavformat/subviewer1dec.c b/libavformat/subviewer1dec.c
index 6b38533a..93db4ebf 100644
--- a/libavformat/subviewer1dec.c
+++ b/libavformat/subviewer1dec.c
@@ -86,7 +86,7 @@ static int subviewer1_read_header(AVFormatContext *s)
         }
     }
 
-    ff_subtitles_queue_finalize(&subviewer1->q);
+    ff_subtitles_queue_finalize(s, &subviewer1->q);
     return 0;
 }
 
diff --git a/libavformat/subviewerdec.c b/libavformat/subviewerdec.c
index f1b0fdf0..d4b2fdf4 100644
--- a/libavformat/subviewerdec.c
+++ b/libavformat/subviewerdec.c
@@ -153,7 +153,7 @@ static int subviewer_read_header(AVFormatContext *s)
         }
     }
 
-    ff_subtitles_queue_finalize(&subviewer->q);
+    ff_subtitles_queue_finalize(s, &subviewer->q);
 
 end:
     av_bprint_finalize(&header, NULL);
diff --git a/libavformat/svag.c b/libavformat/svag.c
new file mode 100644
index 00000000..08fc06b1
--- /dev/null
+++ b/libavformat/svag.c
@@ -0,0 +1,78 @@
+/*
+ * SVAG demuxer
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "internal.h"
+
+static int svag_probe(AVProbeData *p)
+{
+    if (memcmp(p->buf, "Svag", 4))
+        return 0;
+
+    return AVPROBE_SCORE_MAX;
+}
+
+static int svag_read_header(AVFormatContext *s)
+{
+    unsigned size, align;
+    AVStream *st;
+
+    avio_skip(s->pb, 4);
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    size                   = avio_rl32(s->pb);
+    st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
+    st->codec->codec_id    = AV_CODEC_ID_ADPCM_PSX;
+    st->codec->sample_rate = avio_rl32(s->pb);
+    if (st->codec->sample_rate <= 0)
+        return AVERROR_INVALIDDATA;
+    st->codec->channels    = avio_rl32(s->pb);
+    if (st->codec->channels <= 0 || st->codec->channels > 8)
+        return AVERROR_INVALIDDATA;
+    st->duration           = size / (16 * st->codec->channels) * 28;
+    align                  = avio_rl32(s->pb);
+    if (align <= 0 || align > INT_MAX / st->codec->channels)
+        return AVERROR_INVALIDDATA;
+    st->codec->block_align = align * st->codec->channels;
+    avio_skip(s->pb, 0x800 - avio_tell(s->pb));
+    avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
+
+    return 0;
+}
+
+static int svag_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    AVCodecContext *codec = s->streams[0]->codec;
+
+    return av_get_packet(s->pb, pkt, codec->block_align);
+}
+
+AVInputFormat ff_svag_demuxer = {
+    .name           = "svag",
+    .long_name      = NULL_IF_CONFIG_SMALL("Konami PS2 SVAG"),
+    .read_probe     = svag_probe,
+    .read_header    = svag_read_header,
+    .read_packet    = svag_read_packet,
+    .extensions     = "svag",
+};
diff --git a/libavformat/swf.h b/libavformat/swf.h
index 93a094c2..ab67c756 100644
--- a/libavformat/swf.h
+++ b/libavformat/swf.h
@@ -134,8 +134,8 @@ typedef struct SWFContext {
     AVCodecContext *audio_enc, *video_enc;
     AVStream *video_st;
 #if CONFIG_ZLIB
-    AVIOContext *zpb;
 #define ZBUF_SIZE 4096
+    AVIOContext *zpb;
     uint8_t *zbuf_in;
     uint8_t *zbuf_out;
     z_stream zstream;
diff --git a/libavformat/swfdec.c b/libavformat/swfdec.c
index 570a4f54..eb17e82e 100644
--- a/libavformat/swfdec.c
+++ b/libavformat/swfdec.c
@@ -20,9 +20,16 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "config.h"
+
+#if CONFIG_ZLIB
+#include <zlib.h>
+#endif
+
 #include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavcodec/get_bits.h"
 #include "swf.h"
@@ -67,7 +74,12 @@ static int swf_probe(AVProbeData *p)
         && AV_RB24(p->buf) != AV_RB24("FWS"))
         return 0;
 
-    init_get_bits8(&gb, p->buf + 3, p->buf_size - 3);
+    if (   AV_RB24(p->buf) == AV_RB24("CWS")
+        && p->buf[3] <= 20)
+        return AVPROBE_SCORE_MAX / 4 + 1;
+
+    if (init_get_bits8(&gb, p->buf + 3, p->buf_size - 3) < 0)
+        return 0;
 
     skip_bits(&gb, 40);
     len = get_bits(&gb, 5);
@@ -338,7 +350,7 @@ static int swf_read_packet(AVFormatContext *s, AVPacket *pkt)
 
             out_len = colormapsize * colormapbpp + linesize * height;
 
-            av_dlog(s, "bitmap: ch=%d fmt=%d %dx%d (linesize=%d) len=%d->%ld pal=%d\n",
+            ff_dlog(s, "bitmap: ch=%d fmt=%d %dx%d (linesize=%d) len=%d->%ld pal=%d\n",
                     ch_id, bmp_fmt, width, height, linesize, len, out_len, colormapsize);
 
             zbuf = av_malloc(len);
@@ -407,7 +419,7 @@ static int swf_read_packet(AVFormatContext *s, AVPacket *pkt)
             }
             if (st->codec->pix_fmt != AV_PIX_FMT_NONE && st->codec->pix_fmt != pix_fmt) {
                 av_log(s, AV_LOG_ERROR, "pixel format change unsupported\n");
-            }else
+            } else
                 st->codec->pix_fmt = pix_fmt;
 
             if (linesize * height > pkt->size) {
@@ -473,7 +485,7 @@ static int swf_read_packet(AVFormatContext *s, AVPacket *pkt)
             if ((res = av_new_packet(pkt, len)) < 0)
                 return res;
             if (avio_read(pb, pkt->data, 4) != 4) {
-                av_free_packet(pkt);
+                av_packet_unref(pkt);
                 return AVERROR_INVALIDDATA;
             }
             if (AV_RB32(pkt->data) == 0xffd8ffd9 ||
@@ -490,7 +502,7 @@ static int swf_read_packet(AVFormatContext *s, AVPacket *pkt)
             }
             if (res != pkt->size) {
                 if (res < 0) {
-                    av_free_packet(pkt);
+                    av_packet_unref(pkt);
                     return res;
                 }
                 av_shrink_packet(pkt, res);
diff --git a/libavformat/swfenc.c b/libavformat/swfenc.c
index 56d98790..8d5933e9 100644
--- a/libavformat/swfenc.c
+++ b/libavformat/swfenc.c
@@ -256,6 +256,10 @@ static int swf_write_header(AVFormatContext *s)
                                       (will be patched if not streamed) */
 
     put_swf_rect(pb, 0, width * 20, 0, height * 20);
+    if ((rate * 256LL) / rate_base >= (1<<16)) {
+        av_log(s, AV_LOG_ERROR, "Invalid (too large) frame rate %d/%d\n", rate, rate_base);
+        return AVERROR(EINVAL);
+    }
     avio_wl16(pb, (rate * 256) / rate_base); /* frame rate */
     swf->duration_pos = avio_tell(pb);
     avio_wl16(pb, (uint16_t)(DUMMY_DURATION * (int64_t)rate / rate_base)); /* frame count */
diff --git a/libavformat/takdec.c b/libavformat/takdec.c
index 3eb1a8ec..970ab4a8 100644
--- a/libavformat/takdec.c
+++ b/libavformat/takdec.c
@@ -82,10 +82,10 @@ static int tak_read_header(AVFormatContext *s)
             if (size <= 3)
                 return AVERROR_INVALIDDATA;
 
-            buffer = av_malloc(size - 3 + FF_INPUT_BUFFER_PADDING_SIZE);
+            buffer = av_malloc(size - 3 + AV_INPUT_BUFFER_PADDING_SIZE);
             if (!buffer)
                 return AVERROR(ENOMEM);
-            memset(buffer + size - 3, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+            memset(buffer + size - 3, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 
             ffio_init_checksum(pb, tak_check_crc, 0xCE04B7U);
             if (avio_read(pb, buffer, size - 3) != size - 3) {
diff --git a/libavformat/tcp.c b/libavformat/tcp.c
index f24cad20..57386908 100644
--- a/libavformat/tcp.c
+++ b/libavformat/tcp.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avformat.h"
+#include "libavutil/avassert.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/opt.h"
 #include "libavutil/time.h"
@@ -38,15 +39,19 @@ typedef struct TCPContext {
     int open_timeout;
     int rw_timeout;
     int listen_timeout;
+    int recv_buffer_size;
+    int send_buffer_size;
 } TCPContext;
 
 #define OFFSET(x) offsetof(TCPContext, x)
 #define D AV_OPT_FLAG_DECODING_PARAM
 #define E AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "listen",          "Listen for incoming connections",  OFFSET(listen),         AV_OPT_TYPE_INT, { .i64 = 0 },     0,       1,       .flags = D|E },
+    { "listen",          "Listen for incoming connections",  OFFSET(listen),         AV_OPT_TYPE_INT, { .i64 = 0 },     0,       2,       .flags = D|E },
     { "timeout",     "set timeout (in microseconds) of socket I/O operations", OFFSET(rw_timeout),     AV_OPT_TYPE_INT, { .i64 = -1 },         -1, INT_MAX, .flags = D|E },
     { "listen_timeout",  "Connection awaiting timeout (in milliseconds)",      OFFSET(listen_timeout), AV_OPT_TYPE_INT, { .i64 = -1 },         -1, INT_MAX, .flags = D|E },
+    { "send_buffer_size", "Socket send buffer size (in bytes)",                OFFSET(send_buffer_size), AV_OPT_TYPE_INT, { .i64 = -1 },         -1, INT_MAX, .flags = D|E },
+    { "recv_buffer_size", "Socket receive buffer size (in bytes)",             OFFSET(recv_buffer_size), AV_OPT_TYPE_INT, { .i64 = -1 },         -1, INT_MAX, .flags = D|E },
     { NULL }
 };
 
@@ -125,11 +130,16 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
         goto fail;
     }
 
-    if (s->listen) {
+    if (s->listen == 2) {
+        // multi-client
+        if ((ret = ff_listen(fd, cur_ai->ai_addr, cur_ai->ai_addrlen)) < 0)
+            goto fail1;
+    } else if (s->listen == 1) {
+        // single client
         if ((ret = ff_listen_bind(fd, cur_ai->ai_addr, cur_ai->ai_addrlen,
-                                  s->listen_timeout, h)) < 0) {
+                                  s->listen_timeout, h)) < 0)
             goto fail1;
-        }
+        // Socket descriptor already closed here. Safe to overwrite to client one.
         fd = ret;
     } else {
         if ((ret = ff_listen_connect(fd, cur_ai->ai_addr, cur_ai->ai_addrlen,
@@ -144,6 +154,15 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
 
     h->is_streamed = 1;
     s->fd = fd;
+    /* Set the socket's send or receive buffer sizes, if specified.
+       If unspecified or setting fails, system default is used. */
+    if (s->recv_buffer_size > 0) {
+        setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &s->recv_buffer_size, sizeof (s->recv_buffer_size));
+    }
+    if (s->send_buffer_size > 0) {
+        setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &s->send_buffer_size, sizeof (s->send_buffer_size));
+    }
+
     freeaddrinfo(ai);
     return 0;
 
@@ -163,6 +182,22 @@ static int tcp_open(URLContext *h, const char *uri, int flags)
     return ret;
 }
 
+static int tcp_accept(URLContext *s, URLContext **c)
+{
+    TCPContext *sc = s->priv_data;
+    TCPContext *cc;
+    int ret;
+    av_assert0(sc->listen);
+    if ((ret = ffurl_alloc(c, s->filename, s->flags, &s->interrupt_callback)) < 0)
+        return ret;
+    cc = (*c)->priv_data;
+    ret = ff_accept(sc->fd, sc->listen_timeout, s);
+    if (ret < 0)
+        return ff_neterrno();
+    cc->fd = ret;
+    return 0;
+}
+
 static int tcp_read(URLContext *h, uint8_t *buf, int size)
 {
     TCPContext *s = h->priv_data;
@@ -223,6 +258,7 @@ static int tcp_get_file_handle(URLContext *h)
 URLProtocol ff_tcp_protocol = {
     .name                = "tcp",
     .url_open            = tcp_open,
+    .url_accept          = tcp_accept,
     .url_read            = tcp_read,
     .url_write           = tcp_write,
     .url_close           = tcp_close,
diff --git a/libavformat/tedcaptionsdec.c b/libavformat/tedcaptionsdec.c
index fb578ebc..b6dc5170 100644
--- a/libavformat/tedcaptionsdec.c
+++ b/libavformat/tedcaptionsdec.c
@@ -34,7 +34,7 @@ typedef struct {
 
 static const AVOption tedcaptions_options[] = {
     { "start_time", "set the start time (offset) of the subtitles, in ms",
-      offsetof(TEDCaptionsDemuxer, start_time), FF_OPT_TYPE_INT64,
+      offsetof(TEDCaptionsDemuxer, start_time), AV_OPT_TYPE_INT64,
       { .i64 = 15000 }, INT64_MIN, INT64_MAX,
       AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM },
     { NULL },
@@ -287,7 +287,7 @@ static av_cold int tedcaptions_read_header(AVFormatContext *avf)
         ff_subtitles_queue_clean(&tc->subs);
         return ret;
     }
-    ff_subtitles_queue_finalize(&tc->subs);
+    ff_subtitles_queue_finalize(avf, &tc->subs);
     for (i = 0; i < tc->subs.nb_subs; i++)
         tc->subs.subs[i].pts += tc->start_time;
 
diff --git a/libavformat/tee.c b/libavformat/tee.c
index e3d466a3..13907055 100644
--- a/libavformat/tee.c
+++ b/libavformat/tee.c
@@ -23,7 +23,9 @@
 #include "libavutil/avutil.h"
 #include "libavutil/avstring.h"
 #include "libavutil/opt.h"
+#include "internal.h"
 #include "avformat.h"
+#include "avio_internal.h"
 
 #define MAX_SLAVES 16
 
@@ -47,6 +49,7 @@ static const char *const slave_opt_open  = "[";
 static const char *const slave_opt_close = "]";
 static const char *const slave_opt_delim = ":]"; /* must have the close too */
 static const char *const slave_bsfs_spec_sep = "/";
+static const char *const slave_select_sep = ",";
 
 static const AVClass tee_muxer_class = {
     .class_name = "Tee muxer",
@@ -142,6 +145,8 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
     AVFormatContext *avf2 = NULL;
     AVStream *st, *st2;
     int stream_count;
+    int fullret;
+    char *subselect = NULL, *next_subselect = NULL, *first_subselect = NULL, *tmp_select = NULL;
 
     if ((ret = parse_slave_options(avf, slave, &options, &filename)) < 0)
         return ret;
@@ -161,6 +166,9 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
     if (ret < 0)
         goto end;
     av_dict_copy(&avf2->metadata, avf->metadata, 0);
+    avf2->opaque   = avf->opaque;
+    avf2->io_open  = avf->io_open;
+    avf2->io_close = avf->io_close;
 
     tee_slave->stream_map = av_calloc(avf->nb_streams, sizeof(*tee_slave->stream_map));
     if (!tee_slave->stream_map) {
@@ -172,15 +180,32 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
     for (i = 0; i < avf->nb_streams; i++) {
         st = avf->streams[i];
         if (select) {
-            ret = avformat_match_stream_specifier(avf, avf->streams[i], select);
-            if (ret < 0) {
-                av_log(avf, AV_LOG_ERROR,
-                       "Invalid stream specifier '%s' for output '%s'\n",
-                       select, slave);
+            tmp_select = av_strdup(select);  // av_strtok is destructive so we regenerate it in each loop
+            if (!tmp_select) {
+                ret = AVERROR(ENOMEM);
                 goto end;
             }
+            fullret = 0;
+            first_subselect = tmp_select;
+            next_subselect = NULL;
+            while (subselect = av_strtok(first_subselect, slave_select_sep, &next_subselect)) {
+                first_subselect = NULL;
 
-            if (ret == 0) { /* no match */
+                ret = avformat_match_stream_specifier(avf, avf->streams[i], subselect);
+                if (ret < 0) {
+                    av_log(avf, AV_LOG_ERROR,
+                           "Invalid stream specifier '%s' for output '%s'\n",
+                           subselect, slave);
+                    goto end;
+                }
+                if (ret != 0) {
+                    fullret = 1; // match
+                    break;
+                }
+            }
+            av_freep(&tmp_select);
+
+            if (fullret == 0) { /* no match */
                 tee_slave->stream_map[i] = -1;
                 continue;
             }
@@ -206,7 +231,7 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
     }
 
     if (!(avf2->oformat->flags & AVFMT_NOFILE)) {
-        if ((ret = avio_open(&avf2->pb, filename, AVIO_FLAG_WRITE)) < 0) {
+        if ((ret = avf2->io_open(avf2, &avf2->pb, filename, AVIO_FLAG_WRITE, NULL)) < 0) {
             av_log(avf, AV_LOG_ERROR, "Slave '%s': error opening: %s\n",
                    slave, av_err2str(ret));
             goto end;
@@ -282,6 +307,7 @@ static int open_slave(AVFormatContext *avf, char *slave, TeeSlave *tee_slave)
     av_free(format);
     av_free(select);
     av_dict_free(&options);
+    av_freep(&tmp_select);
     return ret;
 }
 
@@ -305,7 +331,7 @@ static void close_slaves(AVFormatContext *avf)
         av_freep(&tee->slaves[i].stream_map);
         av_freep(&tee->slaves[i].bsfs);
 
-        avio_closep(&avf2->pb);
+        ff_format_io_close(avf2, &avf2->pb);
         avformat_free_context(avf2);
         tee->slaves[i].avf = NULL;
     }
@@ -384,45 +410,6 @@ static int tee_write_header(AVFormatContext *avf)
     return ret;
 }
 
-static int filter_packet(void *log_ctx, AVPacket *pkt,
-                         AVFormatContext *fmt_ctx, AVBitStreamFilterContext *bsf_ctx)
-{
-    AVCodecContext *enc_ctx = fmt_ctx->streams[pkt->stream_index]->codec;
-    int ret = 0;
-
-    while (bsf_ctx) {
-        AVPacket new_pkt = *pkt;
-        ret = av_bitstream_filter_filter(bsf_ctx, enc_ctx, NULL,
-                                             &new_pkt.data, &new_pkt.size,
-                                             pkt->data, pkt->size,
-                                             pkt->flags & AV_PKT_FLAG_KEY);
-        if (ret == 0 && new_pkt.data != pkt->data && new_pkt.destruct) {
-            if ((ret = av_copy_packet(&new_pkt, pkt)) < 0)
-                break;
-            ret = 1;
-        }
-
-        if (ret > 0) {
-            av_free_packet(pkt);
-            new_pkt.buf = av_buffer_create(new_pkt.data, new_pkt.size,
-                                           av_buffer_default_free, NULL, 0);
-            if (!new_pkt.buf)
-                break;
-        }
-        if (ret < 0) {
-            av_log(log_ctx, AV_LOG_ERROR,
-                "Failed to filter bitstream with filter %s for stream %d in file '%s' with codec %s\n",
-                bsf_ctx->filter->name, pkt->stream_index, fmt_ctx->filename,
-                avcodec_get_name(enc_ctx->codec_id));
-        }
-        *pkt = new_pkt;
-
-        bsf_ctx = bsf_ctx->next;
-    }
-
-    return ret;
-}
-
 static int tee_write_trailer(AVFormatContext *avf)
 {
     TeeContext *tee = avf->priv_data;
@@ -435,11 +422,8 @@ static int tee_write_trailer(AVFormatContext *avf)
         if ((ret = av_write_trailer(avf2)) < 0)
             if (!ret_all)
                 ret_all = ret;
-        if (!(avf2->oformat->flags & AVFMT_NOFILE)) {
-            if ((ret = avio_closep(&avf2->pb)) < 0)
-                if (!ret_all)
-                    ret_all = ret;
-        }
+        if (!(avf2->oformat->flags & AVFMT_NOFILE))
+            ff_format_io_close(avf2, &avf2->pb);
     }
     close_slaves(avf);
     return ret_all;
@@ -475,8 +459,9 @@ static int tee_write_packet(AVFormatContext *avf, AVPacket *pkt)
         pkt2.duration = av_rescale_q(pkt->duration, tb, tb2);
         pkt2.stream_index = s2;
 
-        filter_packet(avf2, &pkt2, avf2, tee->slaves[i].bsfs[s2]);
-        if ((ret = av_interleaved_write_frame(avf2, &pkt2)) < 0)
+        if ((ret = av_apply_bitstream_filters(avf2->streams[s2]->codec, &pkt2,
+                                              tee->slaves[i].bsfs[s2])) < 0 ||
+            (ret = av_interleaved_write_frame(avf2, &pkt2)) < 0)
             if (!ret_all)
                 ret_all = ret;
     }
diff --git a/libavformat/thp.c b/libavformat/thp.c
index 727fb509..5569027d 100644
--- a/libavformat/thp.c
+++ b/libavformat/thp.c
@@ -135,6 +135,7 @@ static int thp_read_header(AVFormatContext *s)
             st->codec->codec_tag = 0;  /* no fourcc */
             st->codec->channels    = avio_rb32(pb); /* numChannels.  */
             st->codec->sample_rate = avio_rb32(pb); /* Frequency.  */
+            st->duration           = avio_rb32(pb);
 
             avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
 
@@ -179,7 +180,7 @@ static int thp_read_packet(AVFormatContext *s,
         if (ret < 0)
             return ret;
         if (ret != size) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return AVERROR(EIO);
         }
 
@@ -189,7 +190,7 @@ static int thp_read_packet(AVFormatContext *s,
         if (ret < 0)
             return ret;
         if (ret != thp->audiosize) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return AVERROR(EIO);
         }
 
diff --git a/libavformat/tls.c b/libavformat/tls.c
index 9802a70d..c6259836 100644
--- a/libavformat/tls.c
+++ b/libavformat/tls.c
@@ -104,6 +104,7 @@ int ff_tls_open_underlying(TLSShared *c, URLContext *parent, const char *uri, AV
                     proxy_port, "/%s", dest);
     }
 
-    return ffurl_open(&c->tcp, buf, AVIO_FLAG_READ_WRITE,
-                      &parent->interrupt_callback, options);
+    return ffurl_open_whitelist(&c->tcp, buf, AVIO_FLAG_READ_WRITE,
+                                &parent->interrupt_callback, options,
+                                parent->protocol_whitelist);
 }
diff --git a/libavformat/tls.h b/libavformat/tls.h
index 2a36f34f..0326ef79 100644
--- a/libavformat/tls.h
+++ b/libavformat/tls.h
@@ -26,7 +26,7 @@
 #include "url.h"
 #include "libavutil/opt.h"
 
-#define CONFIG_TLS_PROTOCOL (CONFIG_TLS_GNUTLS_PROTOCOL | CONFIG_TLS_OPENSSL_PROTOCOL | CONFIG_TLS_SECURETRANSPORT_PROTOCOL)
+#define CONFIG_TLS_PROTOCOL (CONFIG_TLS_GNUTLS_PROTOCOL | CONFIG_TLS_OPENSSL_PROTOCOL | CONFIG_TLS_SECURETRANSPORT_PROTOCOL | CONFIG_TLS_SCHANNEL_PROTOCOL)
 
 typedef struct TLSShared {
     char *ca_file;
diff --git a/libavformat/tls_gnutls.c b/libavformat/tls_gnutls.c
index 6388f37a..4bf94485 100644
--- a/libavformat/tls_gnutls.c
+++ b/libavformat/tls_gnutls.c
@@ -144,7 +144,7 @@ static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **op
         if (ret < 0)
             av_log(h, AV_LOG_ERROR, "%s\n", gnutls_strerror(ret));
     }
-#if GNUTLS_VERSION_MAJOR >= 3
+#if GNUTLS_VERSION_NUMBER >= 0x030020
     else
         gnutls_certificate_set_x509_system_trust(p->cred);
 #endif
diff --git a/libavformat/tls_schannel.c b/libavformat/tls_schannel.c
new file mode 100644
index 00000000..85c01a0d
--- /dev/null
+++ b/libavformat/tls_schannel.c
@@ -0,0 +1,601 @@
+/*
+ * Copyright (c) 2015 Hendrik Leppkes
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/** Based on the CURL SChannel module */
+
+#include "avformat.h"
+#include "internal.h"
+#include "network.h"
+#include "os_support.h"
+#include "url.h"
+#include "tls.h"
+
+#define SECURITY_WIN32
+#include <windows.h>
+#include <security.h>
+#include <schnlsp.h>
+
+#define SCHANNEL_INITIAL_BUFFER_SIZE   4096
+#define SCHANNEL_FREE_BUFFER_SIZE      1024
+
+/* mingw does not define this symbol */
+#ifndef SECBUFFER_ALERT
+#define SECBUFFER_ALERT                17
+#endif
+
+typedef struct TLSContext {
+    const AVClass *class;
+    TLSShared tls_shared;
+
+    CredHandle cred_handle;
+    TimeStamp cred_timestamp;
+
+    CtxtHandle ctxt_handle;
+    TimeStamp ctxt_timestamp;
+
+    ULONG request_flags;
+    ULONG context_flags;
+
+    uint8_t *enc_buf;
+    int enc_buf_size;
+    int enc_buf_offset;
+
+    uint8_t *dec_buf;
+    int dec_buf_size;
+    int dec_buf_offset;
+
+    SecPkgContext_StreamSizes sizes;
+
+    int connected;
+    int connection_closed;
+    int sspi_close_notify;
+} TLSContext;
+
+static void init_sec_buffer(SecBuffer *buffer, unsigned long type,
+                            void *data, unsigned long size)
+{
+    buffer->cbBuffer   = size;
+    buffer->BufferType = type;
+    buffer->pvBuffer   = data;
+}
+
+static void init_sec_buffer_desc(SecBufferDesc *desc, SecBuffer *buffers,
+                                 unsigned long buffer_count)
+{
+    desc->ulVersion = SECBUFFER_VERSION;
+    desc->pBuffers = buffers;
+    desc->cBuffers = buffer_count;
+}
+
+static int tls_shutdown_client(URLContext *h)
+{
+    TLSContext *c = h->priv_data;
+    TLSShared *s = &c->tls_shared;
+    int ret;
+
+    if (c->connected) {
+        SecBufferDesc BuffDesc;
+        SecBuffer Buffer;
+        SECURITY_STATUS sspi_ret;
+        SecBuffer outbuf;
+        SecBufferDesc outbuf_desc;
+
+        DWORD dwshut = SCHANNEL_SHUTDOWN;
+        init_sec_buffer(&Buffer, SECBUFFER_TOKEN, &dwshut, sizeof(dwshut));
+        init_sec_buffer_desc(&BuffDesc, &Buffer, 1);
+
+        sspi_ret = ApplyControlToken(&c->ctxt_handle, &BuffDesc);
+        if (sspi_ret != SEC_E_OK)
+            av_log(h, AV_LOG_ERROR, "ApplyControlToken failed\n");
+
+        init_sec_buffer(&outbuf, SECBUFFER_EMPTY, NULL, 0);
+        init_sec_buffer_desc(&outbuf_desc, &outbuf, 1);
+
+        sspi_ret = InitializeSecurityContext(&c->cred_handle, &c->ctxt_handle, s->host,
+                                             c->request_flags, 0, 0, NULL, 0, &c->ctxt_handle,
+                                             &outbuf_desc, &c->context_flags, &c->ctxt_timestamp);
+        if (sspi_ret == SEC_E_OK || sspi_ret == SEC_I_CONTEXT_EXPIRED) {
+            ret = ffurl_write(s->tcp, outbuf.pvBuffer, outbuf.cbBuffer);
+            FreeContextBuffer(outbuf.pvBuffer);
+            if (ret < 0 || ret != outbuf.cbBuffer)
+                av_log(h, AV_LOG_ERROR, "Failed to send close message\n");
+        }
+
+        c->connected = 0;
+    }
+    return 0;
+}
+
+static int tls_close(URLContext *h)
+{
+    TLSContext *c = h->priv_data;
+
+    tls_shutdown_client(h);
+
+    DeleteSecurityContext(&c->ctxt_handle);
+    FreeCredentialsHandle(&c->cred_handle);
+
+    av_freep(&c->enc_buf);
+    c->enc_buf_size = c->enc_buf_offset = 0;
+
+    av_freep(&c->dec_buf);
+    c->dec_buf_size = c->dec_buf_offset = 0;
+
+    if (c->tls_shared.tcp)
+        ffurl_close(c->tls_shared.tcp);
+    return 0;
+}
+
+static int tls_client_handshake_loop(URLContext *h, int initial)
+{
+    TLSContext *c = h->priv_data;
+    TLSShared *s = &c->tls_shared;
+    SECURITY_STATUS sspi_ret;
+    SecBuffer outbuf[3];
+    SecBufferDesc outbuf_desc;
+    SecBuffer inbuf[2];
+    SecBufferDesc inbuf_desc;
+    int i, ret = 0, read_data = initial;
+
+    if (c->enc_buf == NULL) {
+        c->enc_buf_offset = 0;
+        ret = av_reallocp(&c->enc_buf, SCHANNEL_INITIAL_BUFFER_SIZE);
+        if (ret < 0)
+            goto fail;
+        c->enc_buf_size = SCHANNEL_INITIAL_BUFFER_SIZE;
+    }
+
+    if (c->dec_buf == NULL) {
+        c->dec_buf_offset = 0;
+        ret = av_reallocp(&c->dec_buf, SCHANNEL_INITIAL_BUFFER_SIZE);
+        if (ret < 0)
+            goto fail;
+        c->dec_buf_size = SCHANNEL_INITIAL_BUFFER_SIZE;
+    }
+
+    while (1) {
+        if (c->enc_buf_size - c->enc_buf_offset < SCHANNEL_FREE_BUFFER_SIZE) {
+            c->enc_buf_size = c->enc_buf_offset + SCHANNEL_FREE_BUFFER_SIZE;
+            ret = av_reallocp(&c->enc_buf, c->enc_buf_size);
+            if (ret < 0) {
+                c->enc_buf_size = c->enc_buf_offset = 0;
+                goto fail;
+            }
+        }
+
+        if (read_data) {
+            ret = ffurl_read(c->tls_shared.tcp, c->enc_buf + c->enc_buf_offset,
+                             c->enc_buf_size - c->enc_buf_offset);
+            if (ret < 0) {
+                av_log(h, AV_LOG_ERROR, "Failed to read handshake response\n");
+                goto fail;
+            }
+            c->enc_buf_offset += ret;
+        }
+
+        /* input buffers */
+        init_sec_buffer(&inbuf[0], SECBUFFER_TOKEN, av_malloc(c->enc_buf_offset), c->enc_buf_offset);
+        init_sec_buffer(&inbuf[1], SECBUFFER_EMPTY, NULL, 0);
+        init_sec_buffer_desc(&inbuf_desc, inbuf, 2);
+
+        if (inbuf[0].pvBuffer == NULL) {
+            av_log(h, AV_LOG_ERROR, "Failed to allocate input buffer\n");
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        memcpy(inbuf[0].pvBuffer, c->enc_buf, c->enc_buf_offset);
+
+        /* output buffers */
+        init_sec_buffer(&outbuf[0], SECBUFFER_TOKEN, NULL, 0);
+        init_sec_buffer(&outbuf[1], SECBUFFER_ALERT, NULL, 0);
+        init_sec_buffer(&outbuf[2], SECBUFFER_EMPTY, NULL, 0);
+        init_sec_buffer_desc(&outbuf_desc, outbuf, 3);
+
+        sspi_ret = InitializeSecurityContext(&c->cred_handle, &c->ctxt_handle, s->host, c->request_flags,
+                                             0, 0, &inbuf_desc, 0, NULL, &outbuf_desc, &c->context_flags,
+                                             &c->ctxt_timestamp);
+        av_freep(&inbuf[0].pvBuffer);
+
+        if (sspi_ret == SEC_E_INCOMPLETE_MESSAGE) {
+            av_log(h, AV_LOG_DEBUG, "Received incomplete handshake, need more data\n");
+            read_data = 1;
+            continue;
+        }
+
+        /* remote requests a client certificate - attempt to continue without one anyway */
+        if (sspi_ret == SEC_I_INCOMPLETE_CREDENTIALS &&
+            !(c->request_flags & ISC_REQ_USE_SUPPLIED_CREDS)) {
+            av_log(h, AV_LOG_VERBOSE, "Client certificate has been requested, ignoring\n");
+            c->request_flags |= ISC_REQ_USE_SUPPLIED_CREDS;
+            read_data = 0;
+            continue;
+        }
+
+        /* continue handshake */
+        if (sspi_ret == SEC_I_CONTINUE_NEEDED || sspi_ret == SEC_E_OK) {
+            for (i = 0; i < 3; i++) {
+                if (outbuf[i].BufferType == SECBUFFER_TOKEN && outbuf[i].cbBuffer > 0) {
+                    ret = ffurl_write(c->tls_shared.tcp, outbuf[i].pvBuffer, outbuf[i].cbBuffer);
+                    if (ret < 0 || ret != outbuf[i].cbBuffer) {
+                        av_log(h, AV_LOG_VERBOSE, "Failed to send handshake data\n");
+                        ret = AVERROR(EIO);
+                        goto fail;
+                    }
+                }
+
+                if (outbuf[i].pvBuffer != NULL) {
+                    FreeContextBuffer(outbuf[i].pvBuffer);
+                    outbuf[i].pvBuffer = NULL;
+                }
+            }
+        } else {
+            if (sspi_ret == SEC_E_WRONG_PRINCIPAL)
+                av_log(h, AV_LOG_ERROR, "SNI or certificate check failed\n");
+            else
+                av_log(h, AV_LOG_ERROR, "Creating security context failed (0x%lx)\n", sspi_ret);
+            ret = AVERROR_UNKNOWN;
+            goto fail;
+        }
+
+        if (inbuf[1].BufferType == SECBUFFER_EXTRA && inbuf[1].cbBuffer > 0) {
+            if (c->enc_buf_offset > inbuf[1].cbBuffer) {
+                memmove(c->enc_buf, (c->enc_buf + c->enc_buf_offset) - inbuf[1].cbBuffer,
+                        inbuf[1].cbBuffer);
+                c->enc_buf_offset = inbuf[1].cbBuffer;
+                if (sspi_ret == SEC_I_CONTINUE_NEEDED) {
+                    read_data = 0;
+                    continue;
+                }
+            }
+        } else {
+            c->enc_buf_offset  = 0;
+        }
+
+        if (sspi_ret == SEC_I_CONTINUE_NEEDED) {
+            read_data = 1;
+            continue;
+        }
+
+        break;
+    }
+
+    return 0;
+
+fail:
+    /* free any remaining output data */
+    for (i = 0; i < 3; i++) {
+        if (outbuf[i].pvBuffer != NULL) {
+            FreeContextBuffer(outbuf[i].pvBuffer);
+            outbuf[i].pvBuffer = NULL;
+        }
+    }
+
+    return ret;
+}
+
+static int tls_client_handshake(URLContext *h)
+{
+    TLSContext *c = h->priv_data;
+    TLSShared *s = &c->tls_shared;
+    SecBuffer outbuf;
+    SecBufferDesc outbuf_desc;
+    SECURITY_STATUS sspi_ret;
+    int ret;
+
+    init_sec_buffer(&outbuf, SECBUFFER_EMPTY, NULL, 0);
+    init_sec_buffer_desc(&outbuf_desc, &outbuf, 1);
+
+    c->request_flags = ISC_REQ_SEQUENCE_DETECT | ISC_REQ_REPLAY_DETECT |
+                       ISC_REQ_CONFIDENTIALITY | ISC_REQ_ALLOCATE_MEMORY |
+                       ISC_REQ_STREAM;
+
+    sspi_ret = InitializeSecurityContext(&c->cred_handle, NULL, s->host, c->request_flags, 0, 0,
+                                         NULL, 0, &c->ctxt_handle, &outbuf_desc, &c->context_flags,
+                                         &c->ctxt_timestamp);
+    if (sspi_ret != SEC_I_CONTINUE_NEEDED) {
+        av_log(h, AV_LOG_ERROR, "Unable to create initial security context (0x%lx)\n", sspi_ret);
+        ret = AVERROR_UNKNOWN;
+        goto fail;
+    }
+
+    ret = ffurl_write(s->tcp, outbuf.pvBuffer, outbuf.cbBuffer);
+    FreeContextBuffer(outbuf.pvBuffer);
+    if (ret < 0 || ret != outbuf.cbBuffer) {
+        av_log(h, AV_LOG_ERROR, "Failed to send initial handshake data\n");
+        ret = AVERROR(EIO);
+        goto fail;
+    }
+
+    return tls_client_handshake_loop(h, 1);
+
+fail:
+    DeleteSecurityContext(&c->ctxt_handle);
+    return ret;
+}
+
+static int tls_open(URLContext *h, const char *uri, int flags, AVDictionary **options)
+{
+    TLSContext *c = h->priv_data;
+    TLSShared *s = &c->tls_shared;
+    SECURITY_STATUS sspi_ret;
+    SCHANNEL_CRED schannel_cred = { 0 };
+    int ret;
+
+    if ((ret = ff_tls_open_underlying(s, h, uri, options)) < 0)
+        goto fail;
+
+    if (s->listen) {
+        av_log(h, AV_LOG_ERROR, "TLS Listen Sockets with SChannel is not implemented.\n");
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    /* SChannel Options */
+    schannel_cred.dwVersion = SCHANNEL_CRED_VERSION;
+
+    if (s->verify)
+        schannel_cred.dwFlags = SCH_CRED_AUTO_CRED_VALIDATION |
+                                SCH_CRED_REVOCATION_CHECK_CHAIN;
+    else
+        schannel_cred.dwFlags = SCH_CRED_MANUAL_CRED_VALIDATION |
+                                SCH_CRED_IGNORE_NO_REVOCATION_CHECK |
+                                SCH_CRED_IGNORE_REVOCATION_OFFLINE;
+
+    /* Get credential handle */
+    sspi_ret = AcquireCredentialsHandle(NULL, (TCHAR *)UNISP_NAME, SECPKG_CRED_OUTBOUND,
+                                        NULL,  &schannel_cred, NULL, NULL, &c->cred_handle,
+                                        &c->cred_timestamp);
+    if (sspi_ret != SEC_E_OK) {
+        av_log(h, AV_LOG_ERROR, "Unable to acquire security credentials (0x%lx)\n", sspi_ret);
+        ret = AVERROR_UNKNOWN;
+        goto fail;
+    }
+
+    ret = tls_client_handshake(h);
+    if (ret < 0)
+        goto fail;
+
+    c->connected = 1;
+
+    return 0;
+
+fail:
+    tls_close(h);
+    return ret;
+}
+
+static int tls_read(URLContext *h, uint8_t *buf, int len)
+{
+    TLSContext *c = h->priv_data;
+    TLSShared *s = &c->tls_shared;
+    SECURITY_STATUS sspi_ret = SEC_E_OK;
+    SecBuffer inbuf[4];
+    SecBufferDesc inbuf_desc;
+    int size, ret;
+    int min_enc_buf_size = len + SCHANNEL_FREE_BUFFER_SIZE;
+
+    if (len <= c->dec_buf_offset)
+        goto cleanup;
+
+    if (c->sspi_close_notify)
+        goto cleanup;
+
+    if (!c->connection_closed) {
+        size = c->enc_buf_size - c->enc_buf_offset;
+        if (size < SCHANNEL_FREE_BUFFER_SIZE || c->enc_buf_size < min_enc_buf_size) {
+            c->enc_buf_size = c->enc_buf_offset + SCHANNEL_FREE_BUFFER_SIZE;
+            if (c->enc_buf_size < min_enc_buf_size)
+                c->enc_buf_size = min_enc_buf_size;
+            ret = av_reallocp(&c->enc_buf, c->enc_buf_size);
+            if (ret < 0) {
+                c->enc_buf_size = c->enc_buf_offset = 0;
+                return ret;
+            }
+        }
+
+        ret = ffurl_read(s->tcp, c->enc_buf + c->enc_buf_offset,
+                         c->enc_buf_size - c->enc_buf_offset);
+        if (ret < 0) {
+            av_log(h, AV_LOG_ERROR, "Unable to read from socket\n");
+            return ret;
+        } else if (ret == 0)
+            c->connection_closed = 1;
+
+        c->enc_buf_offset += ret;
+    }
+
+    while (c->enc_buf_offset > 0 && sspi_ret == SEC_E_OK && c->dec_buf_offset < len) {
+        /*  input buffer */
+        init_sec_buffer(&inbuf[0], SECBUFFER_DATA, c->enc_buf, c->enc_buf_offset);
+
+        /* additional buffers for possible output */
+        init_sec_buffer(&inbuf[1], SECBUFFER_EMPTY, NULL, 0);
+        init_sec_buffer(&inbuf[2], SECBUFFER_EMPTY, NULL, 0);
+        init_sec_buffer(&inbuf[3], SECBUFFER_EMPTY, NULL, 0);
+        init_sec_buffer_desc(&inbuf_desc, inbuf, 4);
+
+        sspi_ret = DecryptMessage(&c->ctxt_handle, &inbuf_desc, 0, NULL);
+        if (sspi_ret == SEC_E_OK || sspi_ret == SEC_I_RENEGOTIATE ||
+            sspi_ret == SEC_I_CONTEXT_EXPIRED) {
+            /* handle decrypted data */
+            if (inbuf[1].BufferType == SECBUFFER_DATA) {
+                /* grow buffer if needed */
+                size = inbuf[1].cbBuffer > SCHANNEL_FREE_BUFFER_SIZE ?
+                       inbuf[1].cbBuffer : SCHANNEL_FREE_BUFFER_SIZE;
+                if (c->dec_buf_size - c->dec_buf_offset < size || c->dec_buf_size < len)  {
+                    c->dec_buf_size = c->dec_buf_offset + size;
+                    if (c->dec_buf_size < len)
+                        c->dec_buf_size = len;
+                    ret = av_reallocp(&c->dec_buf, c->dec_buf_size);
+                    if (ret < 0) {
+                        c->dec_buf_size = c->dec_buf_offset = 0;
+                        return ret;
+                    }
+                }
+
+                /* copy decrypted data to buffer */
+                size = inbuf[1].cbBuffer;
+                if (size) {
+                    memcpy(c->dec_buf + c->dec_buf_offset, inbuf[1].pvBuffer, size);
+                    c->dec_buf_offset += size;
+                }
+            }
+            if (inbuf[3].BufferType == SECBUFFER_EXTRA && inbuf[3].cbBuffer > 0) {
+                if (c->enc_buf_offset > inbuf[3].cbBuffer) {
+                    memmove(c->enc_buf, (c->enc_buf + c->enc_buf_offset) - inbuf[3].cbBuffer,
+                    inbuf[3].cbBuffer);
+                    c->enc_buf_offset = inbuf[3].cbBuffer;
+                }
+            } else
+                c->enc_buf_offset = 0;
+
+            if (sspi_ret == SEC_I_RENEGOTIATE) {
+                if (c->enc_buf_offset) {
+                    av_log(h, AV_LOG_ERROR, "Cannot renegotiate, encrypted data buffer not empty\n");
+                    ret = AVERROR_UNKNOWN;
+                    goto cleanup;
+                }
+
+                av_log(h, AV_LOG_VERBOSE, "Re-negotiating security context\n");
+                ret = tls_client_handshake_loop(h, 0);
+                if (ret < 0) {
+                    goto cleanup;
+                }
+                sspi_ret = SEC_E_OK;
+                continue;
+            } else if (sspi_ret == SEC_I_CONTEXT_EXPIRED) {
+                c->sspi_close_notify = 1;
+                if (!c->connection_closed) {
+                    c->connection_closed = 1;
+                    av_log(h, AV_LOG_VERBOSE, "Server closed the connection\n");
+                }
+                ret = 0;
+                goto cleanup;
+            }
+        } else if (sspi_ret == SEC_E_INCOMPLETE_MESSAGE) {
+            ret = AVERROR(EAGAIN);
+            goto cleanup;
+        } else {
+            av_log(h, AV_LOG_ERROR, "Unable to decrypt message\n");
+            ret = AVERROR(EIO);
+            goto cleanup;
+        }
+    }
+
+    ret = 0;
+
+cleanup:
+    size = FFMIN(len, c->dec_buf_offset);
+    if (size) {
+        memcpy(buf, c->dec_buf, size);
+        memmove(c->dec_buf, c->dec_buf + size, c->dec_buf_offset - size);
+        c->dec_buf_offset -= size;
+
+        return size;
+    }
+
+    if (ret == 0 && !c->connection_closed)
+        ret = AVERROR(EAGAIN);
+
+    return ret < 0 ? ret : 0;
+}
+
+static int tls_write(URLContext *h, const uint8_t *buf, int len)
+{
+    TLSContext *c = h->priv_data;
+    TLSShared *s = &c->tls_shared;
+    SECURITY_STATUS sspi_ret;
+    int ret = 0, data_size;
+    uint8_t *data = NULL;
+    SecBuffer outbuf[4];
+    SecBufferDesc outbuf_desc;
+
+    if (c->sizes.cbMaximumMessage == 0) {
+        sspi_ret = QueryContextAttributes(&c->ctxt_handle, SECPKG_ATTR_STREAM_SIZES, &c->sizes);
+        if (sspi_ret != SEC_E_OK)
+            return AVERROR_UNKNOWN;
+    }
+
+    /* limit how much data we can consume */
+    len = FFMIN(len, c->sizes.cbMaximumMessage);
+
+    data_size = c->sizes.cbHeader + len + c->sizes.cbTrailer;
+    data = av_malloc(data_size);
+    if (data == NULL)
+        return AVERROR(ENOMEM);
+
+    init_sec_buffer(&outbuf[0], SECBUFFER_STREAM_HEADER,
+                  data, c->sizes.cbHeader);
+    init_sec_buffer(&outbuf[1], SECBUFFER_DATA,
+                  data + c->sizes.cbHeader, len);
+    init_sec_buffer(&outbuf[2], SECBUFFER_STREAM_TRAILER,
+                  data + c->sizes.cbHeader + len,
+                  c->sizes.cbTrailer);
+    init_sec_buffer(&outbuf[3], SECBUFFER_EMPTY, NULL, 0);
+    init_sec_buffer_desc(&outbuf_desc, outbuf, 4);
+
+    memcpy(outbuf[1].pvBuffer, buf, len);
+
+    sspi_ret = EncryptMessage(&c->ctxt_handle, 0, &outbuf_desc, 0);
+    if (sspi_ret == SEC_E_OK)  {
+        len = outbuf[0].cbBuffer + outbuf[1].cbBuffer + outbuf[2].cbBuffer;
+        ret = ffurl_write(s->tcp, data, len);
+        if (ret < 0 || ret != len) {
+            ret = AVERROR(EIO);
+            av_log(h, AV_LOG_ERROR, "Writing encrypted data to socket failed\n");
+            goto done;
+        }
+    } else {
+        av_log(h, AV_LOG_ERROR, "Encrypting data failed\n");
+        if (sspi_ret == SEC_E_INSUFFICIENT_MEMORY)
+            ret = AVERROR(ENOMEM);
+        else
+            ret = AVERROR(EIO);
+        goto done;
+    }
+
+done:
+    av_freep(&data);
+    return ret < 0 ? ret : outbuf[1].cbBuffer;
+}
+
+static const AVOption options[] = {
+    TLS_COMMON_OPTIONS(TLSContext, tls_shared),
+    { NULL }
+};
+
+static const AVClass tls_class = {
+    .class_name = "tls",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+URLProtocol ff_tls_schannel_protocol = {
+    .name           = "tls",
+    .url_open2      = tls_open,
+    .url_read       = tls_read,
+    .url_write      = tls_write,
+    .url_close      = tls_close,
+    .priv_data_size = sizeof(TLSContext),
+    .flags          = URL_PROTOCOL_FLAG_NETWORK,
+    .priv_data_class = &tls_class,
+};
diff --git a/libavformat/tls_securetransport.c b/libavformat/tls_securetransport.c
index 73662d78..482771a9 100644
--- a/libavformat/tls_securetransport.c
+++ b/libavformat/tls_securetransport.c
@@ -22,6 +22,7 @@
 
 
 #include "avformat.h"
+#include "avio_internal.h"
 #include "internal.h"
 #include "network.h"
 #include "os_support.h"
@@ -80,8 +81,9 @@ static int import_pem(URLContext *h, char *path, CFArrayRef *array)
         goto end;
     }
 
-    if ((ret = avio_open2(&s, path, AVIO_FLAG_READ,
-                          &h->interrupt_callback, NULL)) < 0)
+    if ((ret = ffio_open_whitelist(&s, path, AVIO_FLAG_READ,
+                                   &h->interrupt_callback, NULL,
+                                   h->protocol_whitelist)) < 0)
         goto end;
 
     if ((ret = avio_size(s)) < 0)
@@ -350,8 +352,9 @@ static int map_ssl_error(OSStatus status, size_t processed)
 static int tls_read(URLContext *h, uint8_t *buf, int size)
 {
     TLSContext *c = h->priv_data;
-    size_t processed;
-    int ret = map_ssl_error(SSLRead(c->ssl_context, buf, size, &processed), processed);
+    size_t processed = 0;
+    int ret = SSLRead(c->ssl_context, buf, size, &processed);
+    ret = map_ssl_error(ret, processed);
     if (ret > 0)
         return ret;
     if (ret == 0)
@@ -362,8 +365,9 @@ static int tls_read(URLContext *h, uint8_t *buf, int size)
 static int tls_write(URLContext *h, const uint8_t *buf, int size)
 {
     TLSContext *c = h->priv_data;
-    size_t processed;
-    int ret = map_ssl_error(SSLWrite(c->ssl_context, buf, size, &processed), processed);
+    size_t processed = 0;
+    int ret = SSLWrite(c->ssl_context, buf, size, &processed);
+    ret = map_ssl_error(ret, processed);
     if (ret > 0)
         return ret;
     if (ret == 0)
diff --git a/libavformat/udp.c b/libavformat/udp.c
index d40ea973..ea80e522 100644
--- a/libavformat/udp.c
+++ b/libavformat/udp.c
@@ -24,6 +24,7 @@
  * UDP protocol
  */
 
+#define _DEFAULT_SOURCE
 #define _BSD_SOURCE     /* Needed for using struct ip_mreq with recent glibc */
 
 #include "avformat.h"
@@ -116,13 +117,13 @@ static const AVOption options[] = {
     { "localaddr",      "Local address",                                   OFFSET(localaddr),      AV_OPT_TYPE_STRING, { .str = NULL },               .flags = D|E },
     { "udplite_coverage", "choose UDPLite head size which should be validated by checksum", OFFSET(udplite_coverage), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, D|E },
     { "pkt_size",       "Maximum UDP packet size",                         OFFSET(pkt_size),       AV_OPT_TYPE_INT,    { .i64 = 1472 },  -1, INT_MAX, .flags = D|E },
-    { "reuse",          "explicitly allow reusing UDP sockets",            OFFSET(reuse_socket),   AV_OPT_TYPE_INT,    { .i64 = -1 },    -1, 1,       D|E },
-    { "reuse_socket",   "explicitly allow reusing UDP sockets",            OFFSET(reuse_socket),   AV_OPT_TYPE_INT,    { .i64 = -1 },    -1, 1,       .flags = D|E },
-    { "broadcast", "explicitly allow or disallow broadcast destination",   OFFSET(is_broadcast),   AV_OPT_TYPE_INT,    { .i64 = 0  },     0, 1,       E },
+    { "reuse",          "explicitly allow reusing UDP sockets",            OFFSET(reuse_socket),   AV_OPT_TYPE_BOOL,   { .i64 = -1 },    -1, 1,       D|E },
+    { "reuse_socket",   "explicitly allow reusing UDP sockets",            OFFSET(reuse_socket),   AV_OPT_TYPE_BOOL,   { .i64 = -1 },    -1, 1,       .flags = D|E },
+    { "broadcast", "explicitly allow or disallow broadcast destination",   OFFSET(is_broadcast),   AV_OPT_TYPE_BOOL,   { .i64 = 0  },     0, 1,       E },
     { "ttl",            "Time to live (multicast only)",                   OFFSET(ttl),            AV_OPT_TYPE_INT,    { .i64 = 16 },     0, INT_MAX, E },
-    { "connect",        "set if connect() should be called on socket",     OFFSET(is_connected),   AV_OPT_TYPE_INT,    { .i64 =  0 },     0, 1,       .flags = D|E },
+    { "connect",        "set if connect() should be called on socket",     OFFSET(is_connected),   AV_OPT_TYPE_BOOL,   { .i64 =  0 },     0, 1,       .flags = D|E },
     { "fifo_size",      "set the UDP receiving circular buffer size, expressed as a number of packets with size of 188 bytes", OFFSET(circular_buffer_size), AV_OPT_TYPE_INT, {.i64 = 7*4096}, 0, INT_MAX, D },
-    { "overrun_nonfatal", "survive in case of UDP receiving circular buffer overrun", OFFSET(overrun_nonfatal), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1,    D },
+    { "overrun_nonfatal", "survive in case of UDP receiving circular buffer overrun", OFFSET(overrun_nonfatal), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1,    D },
     { "timeout",        "set raise error timeout (only in read mode)",     OFFSET(timeout),        AV_OPT_TYPE_INT,    { .i64 = 0 },      0, INT_MAX, D },
     { "sources",        "Source list",                                     OFFSET(sources),        AV_OPT_TYPE_STRING, { .str = NULL },               .flags = D|E },
     { "block",          "Block list",                                      OFFSET(block),          AV_OPT_TYPE_STRING, { .str = NULL },               .flags = D|E },
@@ -236,7 +237,8 @@ static int udp_leave_multicast_group(int sockfd, struct sockaddr *addr,struct so
     return 0;
 }
 
-static struct addrinfo* udp_resolve_host(const char *hostname, int port,
+static struct addrinfo *udp_resolve_host(URLContext *h,
+                                         const char *hostname, int port,
                                          int type, int family, int flags)
 {
     struct addrinfo hints = { 0 }, *res = 0;
@@ -256,13 +258,17 @@ static struct addrinfo* udp_resolve_host(const char *hostname, int port,
     hints.ai_flags = flags;
     if ((error = getaddrinfo(node, service, &hints, &res))) {
         res = NULL;
-        av_log(NULL, AV_LOG_ERROR, "udp_resolve_host: %s\n", gai_strerror(error));
+        av_log(h, AV_LOG_ERROR, "getaddrinfo(%s, %s): %s\n",
+               node ? node : "unknown",
+               service ? service : "unknown",
+               gai_strerror(error));
     }
 
     return res;
 }
 
-static int udp_set_multicast_sources(int sockfd, struct sockaddr *addr,
+static int udp_set_multicast_sources(URLContext *h,
+                                     int sockfd, struct sockaddr *addr,
                                      int addr_len, char **sources,
                                      int nb_sources, int include)
 {
@@ -273,7 +279,7 @@ static int udp_set_multicast_sources(int sockfd, struct sockaddr *addr,
     for (i = 0; i < nb_sources; i++) {
         struct group_source_req mreqs;
         int level = addr->sa_family == AF_INET ? IPPROTO_IP : IPPROTO_IPV6;
-        struct addrinfo *sourceaddr = udp_resolve_host(sources[i], 0,
+        struct addrinfo *sourceaddr = udp_resolve_host(h, sources[i], 0,
                                                        SOCK_DGRAM, AF_UNSPEC,
                                                        0);
         if (!sourceaddr)
@@ -303,7 +309,7 @@ static int udp_set_multicast_sources(int sockfd, struct sockaddr *addr,
     }
     for (i = 0; i < nb_sources; i++) {
         struct ip_mreq_source mreqs;
-        struct addrinfo *sourceaddr = udp_resolve_host(sources[i], 0,
+        struct addrinfo *sourceaddr = udp_resolve_host(h, sources[i], 0,
                                                        SOCK_DGRAM, AF_UNSPEC,
                                                        0);
         if (!sourceaddr)
@@ -335,13 +341,14 @@ static int udp_set_multicast_sources(int sockfd, struct sockaddr *addr,
 #endif
     return 0;
 }
-static int udp_set_url(struct sockaddr_storage *addr,
+static int udp_set_url(URLContext *h,
+                       struct sockaddr_storage *addr,
                        const char *hostname, int port)
 {
     struct addrinfo *res0;
     int addr_len;
 
-    res0 = udp_resolve_host(hostname, port, SOCK_DGRAM, AF_UNSPEC, 0);
+    res0 = udp_resolve_host(h, hostname, port, SOCK_DGRAM, AF_UNSPEC, 0);
     if (!res0) return AVERROR(EIO);
     memcpy(addr, res0->ai_addr, res0->ai_addrlen);
     addr_len = res0->ai_addrlen;
@@ -350,16 +357,18 @@ static int udp_set_url(struct sockaddr_storage *addr,
     return addr_len;
 }
 
-static int udp_socket_create(UDPContext *s, struct sockaddr_storage *addr,
+static int udp_socket_create(URLContext *h, struct sockaddr_storage *addr,
                              socklen_t *addr_len, const char *localaddr)
 {
+    UDPContext *s = h->priv_data;
     int udp_fd = -1;
     struct addrinfo *res0, *res;
     int family = AF_UNSPEC;
 
     if (((struct sockaddr *) &s->dest_addr)->sa_family)
         family = ((struct sockaddr *) &s->dest_addr)->sa_family;
-    res0 = udp_resolve_host((localaddr && localaddr[0]) ? localaddr : NULL, s->local_port,
+    res0 = udp_resolve_host(h, (localaddr && localaddr[0]) ? localaddr : NULL,
+                            s->local_port,
                             SOCK_DGRAM, family, AI_PASSIVE);
     if (!res0)
         goto fail;
@@ -430,7 +439,7 @@ int ff_udp_set_remote_url(URLContext *h, const char *uri)
     av_url_split(NULL, 0, NULL, 0, hostname, sizeof(hostname), &port, NULL, 0, uri);
 
     /* set the destination address */
-    s->dest_addr_len = udp_set_url(&s->dest_addr, hostname, port);
+    s->dest_addr_len = udp_set_url(h, &s->dest_addr, hostname, port);
     if (s->dest_addr_len < 0) {
         return AVERROR(EIO);
     }
@@ -685,9 +694,9 @@ static int udp_open(URLContext *h, const char *uri, int flags)
         s->local_port = port;
 
     if (localaddr[0])
-        udp_fd = udp_socket_create(s, &my_addr, &len, localaddr);
+        udp_fd = udp_socket_create(h, &my_addr, &len, localaddr);
     else
-        udp_fd = udp_socket_create(s, &my_addr, &len, s->localaddr);
+        udp_fd = udp_socket_create(h, &my_addr, &len, s->localaddr);
     if (udp_fd < 0)
         goto fail;
 
@@ -759,14 +768,22 @@ static int udp_open(URLContext *h, const char *uri, int flags)
                 goto fail;
             }
             if (num_include_sources) {
-                if (udp_set_multicast_sources(udp_fd, (struct sockaddr *)&s->dest_addr, s->dest_addr_len, include_sources, num_include_sources, 1) < 0)
+                if (udp_set_multicast_sources(h, udp_fd,
+                                              (struct sockaddr *)&s->dest_addr,
+                                              s->dest_addr_len,
+                                              include_sources,
+                                              num_include_sources, 1) < 0)
                     goto fail;
             } else {
                 if (udp_join_multicast_group(udp_fd, (struct sockaddr *)&s->dest_addr,(struct sockaddr *)&s->local_addr_storage) < 0)
                     goto fail;
             }
             if (num_exclude_sources) {
-                if (udp_set_multicast_sources(udp_fd, (struct sockaddr *)&s->dest_addr, s->dest_addr_len, exclude_sources, num_exclude_sources, 0) < 0)
+                if (udp_set_multicast_sources(h, udp_fd,
+                                              (struct sockaddr *)&s->dest_addr,
+                                              s->dest_addr_len,
+                                              exclude_sources,
+                                              num_exclude_sources, 0) < 0)
                     goto fail;
             }
         }
diff --git a/libavformat/uncodedframecrcenc.c b/libavformat/uncodedframecrcenc.c
index 414683fe..ed4532da 100644
--- a/libavformat/uncodedframecrcenc.c
+++ b/libavformat/uncodedframecrcenc.c
@@ -64,7 +64,7 @@ static void video_frame_cksum(AVBPrint *bp, AVFrame *frame)
         unsigned cksum = 0;
         int h = frame->height;
         if ((i == 1 || i == 2) && desc->nb_components >= 3)
-            h = -((-h) >> desc->log2_chroma_h);
+            h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
         data = frame->data[i];
         for (y = 0; y < h; y++) {
             cksum = av_adler32_update(cksum, data, linesize[i]);
diff --git a/libavformat/unix.c b/libavformat/unix.c
index 63d1db24..b3d5fac2 100644
--- a/libavformat/unix.c
+++ b/libavformat/unix.c
@@ -45,7 +45,7 @@ typedef struct UnixContext {
 #define OFFSET(x) offsetof(UnixContext, x)
 #define ED AV_OPT_FLAG_DECODING_PARAM|AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption unix_options[] = {
-    { "listen",    "Open socket for listening",             OFFSET(listen),  AV_OPT_TYPE_INT,   { .i64 = 0 },                    0,       1, ED },
+    { "listen",    "Open socket for listening",             OFFSET(listen),  AV_OPT_TYPE_BOOL,  { .i64 = 0 },                    0,       1, ED },
     { "timeout",   "Timeout in ms",                         OFFSET(timeout), AV_OPT_TYPE_INT,   { .i64 = -1 },                  -1, INT_MAX, ED },
     { "type",      "Socket type",                           OFFSET(type),    AV_OPT_TYPE_INT,   { .i64 = SOCK_STREAM },    INT_MIN, INT_MAX, ED, "type" },
     { "stream",    "Stream (reliable stream-oriented)",     0,               AV_OPT_TYPE_CONST, { .i64 = SOCK_STREAM },    INT_MIN, INT_MAX, ED, "type" },
diff --git a/libavformat/url.h b/libavformat/url.h
index 1a845b77..0b4f2ee3 100644
--- a/libavformat/url.h
+++ b/libavformat/url.h
@@ -47,6 +47,7 @@ typedef struct URLContext {
     int is_connected;
     AVIOInterruptCB interrupt_callback;
     int64_t rw_timeout;         /**< maximum time to wait for (network) read/write operation completion, in mcs */
+    const char *protocol_whitelist;
 } URLContext;
 
 typedef struct URLProtocol {
@@ -58,6 +59,8 @@ typedef struct URLProtocol {
      * for those nested protocols.
      */
     int     (*url_open2)(URLContext *h, const char *url, int flags, AVDictionary **options);
+    int     (*url_accept)(URLContext *s, URLContext **c);
+    int     (*url_handshake)(URLContext *c);
 
     /**
      * Read data from the protocol.
@@ -90,6 +93,9 @@ typedef struct URLProtocol {
     int (*url_open_dir)(URLContext *h);
     int (*url_read_dir)(URLContext *h, AVIODirEntry **next);
     int (*url_close_dir)(URLContext *h);
+    int (*url_delete)(URLContext *h);
+    int (*url_move)(URLContext *h_src, URLContext *h_dst);
+    const char *default_whitelist;
 } URLProtocol;
 
 /**
@@ -134,9 +140,36 @@ int ffurl_connect(URLContext *uc, AVDictionary **options);
  * @return >= 0 in case of success, a negative value corresponding to an
  * AVERROR code in case of failure
  */
+int ffurl_open_whitelist(URLContext **puc, const char *filename, int flags,
+               const AVIOInterruptCB *int_cb, AVDictionary **options,
+               const char *whitelist);
+
 int ffurl_open(URLContext **puc, const char *filename, int flags,
                const AVIOInterruptCB *int_cb, AVDictionary **options);
 
+/**
+ * Accept an URLContext c on an URLContext s
+ *
+ * @param  s server context
+ * @param  c client context, must be unallocated.
+ * @return >= 0 on success, ff_neterrno() on failure.
+ */
+int ffurl_accept(URLContext *s, URLContext **c);
+
+/**
+ * Perform one step of the protocol handshake to accept a new client.
+ * See avio_handshake() for details.
+ * Implementations should try to return decreasing values.
+ * If the protocol uses an underlying protocol, the underlying handshake is
+ * usually the first step, and the return value can be:
+ * (largest value for this protocol) + (return value from other protocol)
+ *
+ * @param  c the client context
+ * @return >= 0 on success or a negative value corresponding
+ *         to an AVERROR code on failure
+ */
+int ffurl_handshake(URLContext *c);
+
 /**
  * Read up to size bytes from the resource accessed by h, and store
  * the read bytes in buf.
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 7513a830..3aa5beb6 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -112,11 +112,18 @@ MAKE_ACCESSORS(AVFormatContext, format, AVCodec *, data_codec)
 MAKE_ACCESSORS(AVFormatContext, format, int, metadata_header_padding)
 MAKE_ACCESSORS(AVFormatContext, format, void *, opaque)
 MAKE_ACCESSORS(AVFormatContext, format, av_format_control_message, control_message_cb)
+#if FF_API_OLD_OPEN_CALLBACKS
+FF_DISABLE_DEPRECATION_WARNINGS
 MAKE_ACCESSORS(AVFormatContext, format, AVOpenCallback, open_cb)
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
 int64_t av_stream_get_end_pts(const AVStream *st)
 {
-    return st->pts.val;
+    if (st->priv_pts) {
+        return st->priv_pts->val;
+    } else
+        return AV_NOPTS_VALUE;
 }
 
 struct AVCodecParserContext *av_stream_get_parser(const AVStream *st)
@@ -136,11 +143,15 @@ void av_format_inject_global_side_data(AVFormatContext *s)
 
 int ff_copy_whitelists(AVFormatContext *dst, AVFormatContext *src)
 {
-    av_assert0(!dst->codec_whitelist && !dst->format_whitelist);
+    av_assert0(!dst->codec_whitelist &&
+               !dst->format_whitelist &&
+               !dst->protocol_whitelist);
     dst-> codec_whitelist = av_strdup(src->codec_whitelist);
     dst->format_whitelist = av_strdup(src->format_whitelist);
+    dst->protocol_whitelist = av_strdup(src->protocol_whitelist);
     if (   (src-> codec_whitelist && !dst-> codec_whitelist)
-        || (src->format_whitelist && !dst->format_whitelist)) {
+        || (src->  format_whitelist && !dst->  format_whitelist)
+        || (src->protocol_whitelist && !dst->protocol_whitelist)) {
         av_log(dst, AV_LOG_ERROR, "Failed to duplicate whitelist\n");
         return AVERROR(ENOMEM);
     }
@@ -234,7 +245,7 @@ static int append_packet_chunked(AVIOContext *s, AVPacket *pkt, int size)
 
     pkt->pos = orig_pos;
     if (!pkt->size)
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
     return pkt->size > orig_size ? pkt->size - orig_size : ret;
 }
 
@@ -310,7 +321,7 @@ int av_demuxer_open(AVFormatContext *ic) {
     int err;
 
     if (ic->format_whitelist && av_match_list(ic->iformat->name, ic->format_whitelist, ',') <= 0) {
-        av_log(ic, AV_LOG_ERROR, "Format not on whitelist\n");
+        av_log(ic, AV_LOG_ERROR, "Format not on whitelist \'%s\'\n", ic->format_whitelist);
         return AVERROR(EINVAL);
     }
 
@@ -349,21 +360,32 @@ static int init_input(AVFormatContext *s, const char *filename,
         (!s->iformat && (s->iformat = av_probe_input_format2(&pd, 0, &score))))
         return score;
 
-    if ((ret = avio_open2(&s->pb, filename, AVIO_FLAG_READ | s->avio_flags,
-                          &s->interrupt_callback, options)) < 0)
+    if ((ret = s->io_open(s, &s->pb, filename, AVIO_FLAG_READ | s->avio_flags, options)) < 0)
         return ret;
+
     if (s->iformat)
         return 0;
     return av_probe_input_buffer2(s->pb, &s->iformat, filename,
                                  s, 0, s->format_probesize);
 }
 
-static AVPacket *add_to_pktbuf(AVPacketList **packet_buffer, AVPacket *pkt,
-                               AVPacketList **plast_pktl)
+static int add_to_pktbuf(AVPacketList **packet_buffer, AVPacket *pkt,
+                         AVPacketList **plast_pktl, int ref)
 {
     AVPacketList *pktl = av_mallocz(sizeof(AVPacketList));
+    int ret;
+
     if (!pktl)
-        return NULL;
+        return AVERROR(ENOMEM);
+
+    if (ref) {
+        if ((ret = av_packet_ref(&pktl->pkt, pkt)) < 0) {
+            av_free(pktl);
+            return ret;
+        }
+    } else {
+        pktl->pkt = *pkt;
+    }
 
     if (*packet_buffer)
         (*plast_pktl)->next = pktl;
@@ -372,29 +394,27 @@ static AVPacket *add_to_pktbuf(AVPacketList **packet_buffer, AVPacket *pkt,
 
     /* Add the packet in the buffered packet list. */
     *plast_pktl = pktl;
-    pktl->pkt   = *pkt;
-    return &pktl->pkt;
+    return 0;
 }
 
 int avformat_queue_attached_pictures(AVFormatContext *s)
 {
-    int i;
+    int i, ret;
     for (i = 0; i < s->nb_streams; i++)
         if (s->streams[i]->disposition & AV_DISPOSITION_ATTACHED_PIC &&
             s->streams[i]->discard < AVDISCARD_ALL) {
-            AVPacket copy = s->streams[i]->attached_pic;
-            if (copy.size <= 0) {
+            if (s->streams[i]->attached_pic.size <= 0) {
                 av_log(s, AV_LOG_WARNING,
                     "Attached picture on stream %d has invalid size, "
                     "ignoring\n", i);
                 continue;
             }
-            copy.buf = av_buffer_ref(copy.buf);
-            if (!copy.buf)
-                return AVERROR(ENOMEM);
 
-            add_to_pktbuf(&s->internal->raw_packet_buffer, &copy,
-                          &s->internal->raw_packet_buffer_end);
+            ret = add_to_pktbuf(&s->internal->raw_packet_buffer,
+                                &s->streams[i]->attached_pic,
+                                &s->internal->raw_packet_buffer_end, 1);
+            if (ret < 0)
+                return ret;
         }
     return 0;
 }
@@ -429,8 +449,16 @@ int avformat_open_input(AVFormatContext **ps, const char *filename,
         goto fail;
     s->probe_score = ret;
 
+    if (!s->protocol_whitelist && s->pb && s->pb->protocol_whitelist) {
+        s->protocol_whitelist = av_strdup(s->pb->protocol_whitelist);
+        if (!s->protocol_whitelist) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+    }
+
     if (s->format_whitelist && av_match_list(s->iformat->name, s->format_whitelist, ',') <= 0) {
-        av_log(s, AV_LOG_ERROR, "Format not on whitelist\n");
+        av_log(s, AV_LOG_ERROR, "Format not on whitelist \'%s\'\n", s->format_whitelist);
         ret = AVERROR(EINVAL);
         goto fail;
     }
@@ -668,6 +696,11 @@ int ff_read_packet(AVFormatContext *s, AVPacket *pkt)
         av_init_packet(pkt);
         ret = s->iformat->read_packet(s, pkt);
         if (ret < 0) {
+            /* Some demuxers return FFERROR_REDO when they consume
+               data and discard it (ignored streams, junk, extradata).
+               We must re-call the demuxer to get the real packet. */
+            if (ret == FFERROR_REDO)
+                continue;
             if (!pktl || ret == AVERROR(EAGAIN))
                 return ret;
             for (i = 0; i < s->nb_streams; i++) {
@@ -680,12 +713,20 @@ int ff_read_packet(AVFormatContext *s, AVPacket *pkt)
             continue;
         }
 
+        if (!pkt->buf) {
+            AVPacket tmp = { 0 };
+            ret = av_packet_ref(&tmp, pkt);
+            if (ret < 0)
+                return ret;
+            *pkt = tmp;
+        }
+
         if ((s->flags & AVFMT_FLAG_DISCARD_CORRUPT) &&
             (pkt->flags & AV_PKT_FLAG_CORRUPT)) {
             av_log(s, AV_LOG_WARNING,
                    "Dropped corrupted packet (stream = %d)\n",
                    pkt->stream_index);
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             continue;
         }
 
@@ -718,8 +759,10 @@ int ff_read_packet(AVFormatContext *s, AVPacket *pkt)
         if (!pktl && st->request_probe <= 0)
             return ret;
 
-        add_to_pktbuf(&s->internal->raw_packet_buffer, pkt,
-                      &s->internal->raw_packet_buffer_end);
+        err = add_to_pktbuf(&s->internal->raw_packet_buffer, pkt,
+                            &s->internal->raw_packet_buffer_end, 0);
+        if (err)
+            return err;
         s->internal->raw_packet_buffer_remaining_size -= pkt->size;
 
         if ((err = probe_codec(s, st, pkt)) < 0)
@@ -885,7 +928,7 @@ static void update_initial_timestamps(AVFormatContext *s, int stream_index,
     AVStream *st       = s->streams[stream_index];
     AVPacketList *pktl = s->internal->packet_buffer ? s->internal->packet_buffer : s->internal->parse_queue;
     int64_t pts_buffer[MAX_REORDER_DELAY+1];
-    int64_t shift;
+    uint64_t shift;
     int i, delay;
 
     if (st->first_dts != AV_NOPTS_VALUE ||
@@ -897,7 +940,7 @@ static void update_initial_timestamps(AVFormatContext *s, int stream_index,
     delay         = st->codec->has_b_frames;
     st->first_dts = dts - (st->cur_dts - RELATIVE_TS_BASE);
     st->cur_dts   = dts;
-    shift         = st->first_dts - RELATIVE_TS_BASE;
+    shift         = (uint64_t)st->first_dts - RELATIVE_TS_BASE;
 
     for (i = 0; i<MAX_REORDER_DELAY+1; i++)
         pts_buffer[i] = AV_NOPTS_VALUE;
@@ -951,7 +994,7 @@ static void update_initial_durations(AVFormatContext *s, AVStream *st,
             }
         }
         if (pktl && pktl->pkt.dts != st->first_dts) {
-            av_log(s, AV_LOG_DEBUG, "first_dts %s not matching first dts %s (pts %s, duration %d) in the queue\n",
+            av_log(s, AV_LOG_DEBUG, "first_dts %s not matching first dts %s (pts %s, duration %"PRId64") in the queue\n",
                    av_ts2str(st->first_dts), av_ts2str(pktl->pkt.dts), av_ts2str(pktl->pkt.pts), pktl->pkt.duration);
             return;
         }
@@ -1091,7 +1134,7 @@ static void compute_pkt_fields(AVFormatContext *s, AVStream *st,
 
     if (s->debug & FF_FDEBUG_TS)
         av_log(s, AV_LOG_TRACE,
-            "IN delayed:%d pts:%s, dts:%s cur_dts:%s st:%d pc:%p duration:%d delay:%d onein_oneout:%d\n",
+            "IN delayed:%d pts:%s, dts:%s cur_dts:%s st:%d pc:%p duration:%"PRId64" delay:%d onein_oneout:%d\n",
             presentation_delayed, av_ts2str(pkt->pts), av_ts2str(pkt->dts), av_ts2str(st->cur_dts),
             pkt->stream_index, pc, pkt->duration, delay, onein_oneout);
 
@@ -1143,12 +1186,13 @@ static void compute_pkt_fields(AVFormatContext *s, AVStream *st,
         }
     }
 
-    if (pkt->pts != AV_NOPTS_VALUE && delay <= MAX_REORDER_DELAY && has_decode_delay_been_guessed(st)) {
+    if (pkt->pts != AV_NOPTS_VALUE && delay <= MAX_REORDER_DELAY) {
         st->pts_buffer[0] = pkt->pts;
         for (i = 0; i<delay && st->pts_buffer[i] > st->pts_buffer[i + 1]; i++)
             FFSWAP(int64_t, st->pts_buffer[i], st->pts_buffer[i + 1]);
 
-        pkt->dts = select_from_pts_buffer(st, st->pts_buffer, pkt->dts);
+        if(has_decode_delay_been_guessed(st))
+            pkt->dts = select_from_pts_buffer(st, st->pts_buffer, pkt->dts);
     }
     // We skipped it above so we try here.
     if (!onein_oneout)
@@ -1164,8 +1208,12 @@ static void compute_pkt_fields(AVFormatContext *s, AVStream *st,
     /* update flags */
     if (is_intra_only(st->codec))
         pkt->flags |= AV_PKT_FLAG_KEY;
+#if FF_API_CONVERGENCE_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
     if (pc)
         pkt->convergence_duration = pc->convergence_duration;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 }
 
 static void free_packet_buffer(AVPacketList **pkt_buf, AVPacketList **pkt_buf_end)
@@ -1173,7 +1221,7 @@ static void free_packet_buffer(AVPacketList **pkt_buf, AVPacketList **pkt_buf_en
     while (*pkt_buf) {
         AVPacketList *pktl = *pkt_buf;
         *pkt_buf = pktl->next;
-        av_free_packet(&pktl->pkt);
+        av_packet_unref(&pktl->pkt);
         av_freep(&pktl);
     }
     *pkt_buf_end = NULL;
@@ -1259,24 +1307,11 @@ static int parse_packet(AVFormatContext *s, AVPacket *pkt, int stream_index)
 
         compute_pkt_fields(s, st, st->parser, &out_pkt, next_dts, next_pts);
 
-        if (out_pkt.data == pkt->data && out_pkt.size == pkt->size) {
-            out_pkt.buf = pkt->buf;
-            pkt->buf    = NULL;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-            out_pkt.destruct = pkt->destruct;
-            pkt->destruct = NULL;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-        }
-        if ((ret = av_dup_packet(&out_pkt)) < 0)
-            goto fail;
-
-        if (!add_to_pktbuf(&s->internal->parse_queue, &out_pkt, &s->internal->parse_queue_end)) {
-            av_free_packet(&out_pkt);
-            ret = AVERROR(ENOMEM);
+        ret = add_to_pktbuf(&s->internal->parse_queue, &out_pkt,
+                            &s->internal->parse_queue_end, 1);
+        av_packet_unref(&out_pkt);
+        if (ret < 0)
             goto fail;
-        }
     }
 
     /* end of the stream => close and free the parser */
@@ -1286,7 +1321,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
     }
 
 fail:
-    av_free_packet(pkt);
+    av_packet_unref(pkt);
     return ret;
 }
 
@@ -1351,7 +1386,7 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
         }
         if (s->debug & FF_FDEBUG_TS)
             av_log(s, AV_LOG_DEBUG,
-                   "ff_read_packet stream=%d, pts=%s, dts=%s, size=%d, duration=%d, flags=%d\n",
+                   "ff_read_packet stream=%d, pts=%s, dts=%s, size=%d, duration=%"PRId64", flags=%d\n",
                    cur_pkt.stream_index,
                    av_ts2str(cur_pkt.pts),
                    av_ts2str(cur_pkt.dts),
@@ -1389,12 +1424,12 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
                 return ret;
         } else {
             /* free packet */
-            av_free_packet(&cur_pkt);
+            av_packet_unref(&cur_pkt);
         }
         if (pkt->flags & AV_PKT_FLAG_KEY)
             st->skip_to_keyframe = 0;
         if (st->skip_to_keyframe) {
-            av_free_packet(&cur_pkt);
+            av_packet_unref(&cur_pkt);
             if (got_packet) {
                 *pkt = cur_pkt;
             }
@@ -1463,7 +1498,7 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
     if (s->debug & FF_FDEBUG_TS)
         av_log(s, AV_LOG_DEBUG,
                "read_frame_internal stream=%d, pts=%s, dts=%s, "
-               "size=%d, duration=%d, flags=%d\n",
+               "size=%d, duration=%"PRId64", flags=%d\n",
                pkt->stream_index,
                av_ts2str(pkt->pts),
                av_ts2str(pkt->dts),
@@ -1544,9 +1579,11 @@ int av_read_frame(AVFormatContext *s, AVPacket *pkt)
                 return ret;
         }
 
-        if (av_dup_packet(add_to_pktbuf(&s->internal->packet_buffer, pkt,
-                                        &s->internal->packet_buffer_end)) < 0)
-            return AVERROR(ENOMEM);
+        ret = add_to_pktbuf(&s->internal->packet_buffer, pkt,
+                            &s->internal->packet_buffer_end, 1);
+        av_packet_unref(pkt);
+        if (ret < 0)
+            return ret;
     }
 
 return_packet:
@@ -1585,26 +1622,26 @@ int av_find_default_stream_index(AVFormatContext *s)
     int i;
     AVStream *st;
     int best_stream = 0;
-    int best_score = -1;
+    int best_score = INT_MIN;
 
     if (s->nb_streams <= 0)
         return -1;
     for (i = 0; i < s->nb_streams; i++) {
         int score = 0;
         st = s->streams[i];
-        if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
-            !(st->disposition & AV_DISPOSITION_ATTACHED_PIC)) {
-            if (!st->codec->width && !st->codec->height && !st->codec_info_nb_frames)
-                score += 25;
-            else
-                score += 100;
+        if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
+            if (st->disposition & AV_DISPOSITION_ATTACHED_PIC)
+                score -= 400;
+            if (st->codec->width && st->codec->height)
+                score += 50;
+            score+= 25;
         }
         if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
-            if (!st->codec->sample_rate && !st->codec_info_nb_frames)
-                score += 12;
-            else
+            if (st->codec->sample_rate)
                 score += 50;
         }
+        if (st->codec_info_nb_frames)
+            score += 12;
 
         if (st->discard != AVDISCARD_ALL)
             score += 200;
@@ -1781,6 +1818,63 @@ int ff_index_search_timestamp(const AVIndexEntry *entries, int nb_entries,
     return m;
 }
 
+void ff_configure_buffers_for_index(AVFormatContext *s, int64_t time_tolerance)
+{
+    int ist1, ist2;
+    int64_t pos_delta = 0;
+    int64_t skip = 0;
+    //We could use URLProtocol flags here but as many user applications do not use URLProtocols this would be unreliable
+    const char *proto = avio_find_protocol_name(s->filename);
+
+    if (!proto) {
+        av_log(s, AV_LOG_INFO,
+               "Protocol name not provided, cannot determine if input is local or "
+               "a network protocol, buffers and access patterns cannot be configured "
+               "optimally without knowing the protocol\n");
+    }
+
+    if (proto && !(strcmp(proto, "file") && strcmp(proto, "pipe") && strcmp(proto, "cache")))
+        return;
+
+    for (ist1 = 0; ist1 < s->nb_streams; ist1++) {
+        AVStream *st1 = s->streams[ist1];
+        for (ist2 = 0; ist2 < s->nb_streams; ist2++) {
+            AVStream *st2 = s->streams[ist2];
+            int i1, i2;
+
+            if (ist1 == ist2)
+                continue;
+
+            for (i1 = i2 = 0; i1 < st1->nb_index_entries; i1++) {
+                AVIndexEntry *e1 = &st1->index_entries[i1];
+                int64_t e1_pts = av_rescale_q(e1->timestamp, st1->time_base, AV_TIME_BASE_Q);
+
+                skip = FFMAX(skip, e1->size);
+                for (; i2 < st2->nb_index_entries; i2++) {
+                    AVIndexEntry *e2 = &st2->index_entries[i2];
+                    int64_t e2_pts = av_rescale_q(e2->timestamp, st2->time_base, AV_TIME_BASE_Q);
+                    if (e2_pts - e1_pts < time_tolerance)
+                        continue;
+                    pos_delta = FFMAX(pos_delta, e1->pos - e2->pos);
+                    break;
+                }
+            }
+        }
+    }
+
+    pos_delta *= 2;
+    /* XXX This could be adjusted depending on protocol*/
+    if (s->pb->buffer_size < pos_delta && pos_delta < (1<<24)) {
+        av_log(s, AV_LOG_VERBOSE, "Reconfiguring buffers to size %"PRId64"\n", pos_delta);
+        ffio_set_buf_size(s->pb, pos_delta);
+        s->pb->short_seek_threshold = FFMAX(s->pb->short_seek_threshold, pos_delta/2);
+    }
+
+    if (skip < (1<<23)) {
+        s->pb->short_seek_threshold = FFMAX(s->pb->short_seek_threshold, skip);
+    }
+}
+
 int av_index_search_timestamp(AVStream *st, int64_t wanted_timestamp, int flags)
 {
     return ff_index_search_timestamp(st->index_entries, st->nb_index_entries,
@@ -2065,15 +2159,18 @@ static int seek_frame_generic(AVFormatContext *s, int stream_index,
             } while (read_status == AVERROR(EAGAIN));
             if (read_status < 0)
                 break;
-            av_free_packet(&pkt);
             if (stream_index == pkt.stream_index && pkt.dts > timestamp) {
-                if (pkt.flags & AV_PKT_FLAG_KEY)
+                if (pkt.flags & AV_PKT_FLAG_KEY) {
+                    av_packet_unref(&pkt);
                     break;
+                }
                 if (nonkey++ > 1000 && st->codec->codec_id != AV_CODEC_ID_CDGRAPHICS) {
                     av_log(s, AV_LOG_ERROR,"seek_frame_generic failed as this stream seems to contain no keyframes after the target timestamp, %d non keyframes found\n", nonkey);
+                    av_packet_unref(&pkt);
                     break;
                 }
             }
+            av_packet_unref(&pkt);
         }
         index = av_index_search_timestamp(st, timestamp, flags);
     }
@@ -2270,11 +2367,11 @@ static void update_stream_timings(AVFormatContext *ic)
                     start_time_text = start_time1;
             } else
                 start_time = FFMIN(start_time, start_time1);
-            end_time1   = AV_NOPTS_VALUE;
-            if (st->duration != AV_NOPTS_VALUE) {
-                end_time1 = start_time1 +
-                            av_rescale_q(st->duration, st->time_base,
-                                         AV_TIME_BASE_Q);
+            end_time1 = av_rescale_q_rnd(st->duration, st->time_base,
+                                         AV_TIME_BASE_Q,
+                                         AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
+            if (end_time1 != AV_NOPTS_VALUE) {
+                end_time1 += start_time1;
                 end_time = FFMAX(end_time, end_time1);
             }
             for (p = NULL; (p = av_find_program_from_stream(ic, p, i)); ) {
@@ -2315,7 +2412,7 @@ static void update_stream_timings(AVFormatContext *ic)
         /* compute the bitrate */
         double bitrate = (double) filesize * 8.0 * AV_TIME_BASE /
                          (double) ic->duration;
-        if (bitrate >= 0 && bitrate <= INT_MAX)
+        if (bitrate >= 0 && bitrate <= INT64_MAX)
             ic->bit_rate = bitrate;
     }
 }
@@ -2391,7 +2488,7 @@ static void estimate_timings_from_bit_rate(AVFormatContext *ic)
 }
 
 #define DURATION_MAX_READ_SIZE 250000LL
-#define DURATION_MAX_RETRY 4
+#define DURATION_MAX_RETRY 6
 
 /* only usable for MPEG-PS streams */
 static void estimate_timings_from_pts(AVFormatContext *ic, int64_t old_offset)
@@ -2469,7 +2566,7 @@ static void estimate_timings_from_pts(AVFormatContext *ic, int64_t old_offset)
                     st->info->last_duration = duration;
                 }
             }
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
         }
 
         /* check if all audio/video streams have valid duration */
@@ -2560,10 +2657,10 @@ static void estimate_timings(AVFormatContext *ic, int64_t old_offset)
                     (double) st->duration   / AV_TIME_BASE);
         }
         av_log(ic, AV_LOG_TRACE,
-                "stream: start_time: %0.3f duration: %0.3f bitrate=%d kb/s\n",
+                "stream: start_time: %0.3f duration: %0.3f bitrate=%"PRId64" kb/s\n",
                 (double) ic->start_time / AV_TIME_BASE,
                 (double) ic->duration   / AV_TIME_BASE,
-                ic->bit_rate / 1000);
+                (int64_t)ic->bit_rate / 1000);
     }
 }
 
@@ -2623,6 +2720,8 @@ static int try_decode_frame(AVFormatContext *s, AVStream *st, AVPacket *avpkt,
     AVFrame *frame = av_frame_alloc();
     AVSubtitle subtitle;
     AVPacket pkt = *avpkt;
+    int do_skip_frame = 0;
+    enum AVDiscard skip_frame;
 
     if (!frame)
         return AVERROR(ENOMEM);
@@ -2630,9 +2729,7 @@ static int try_decode_frame(AVFormatContext *s, AVStream *st, AVPacket *avpkt,
     if (!avcodec_is_open(st->codec) &&
         st->info->found_decoder <= 0 &&
         (st->codec->codec_id != -st->info->found_decoder || !st->codec->codec_id)) {
-#ifndef BLACKBERRY
         AVDictionary *thread_opt = NULL;
-#endif
 
         codec = find_decoder(s, st, st->codec->codec_id);
 
@@ -2644,19 +2741,12 @@ static int try_decode_frame(AVFormatContext *s, AVStream *st, AVPacket *avpkt,
 
         /* Force thread count to 1 since the H.264 decoder will not extract
          * SPS and PPS to extradata during multi-threaded decoding. */
-#ifndef BLACKBERRY
         av_dict_set(options ? options : &thread_opt, "threads", "1", 0);
         if (s->codec_whitelist)
             av_dict_set(options ? options : &thread_opt, "codec_whitelist", s->codec_whitelist, 0);
-#endif
-        ret = avcodec_open2(st->codec, codec, options ? options
-#ifndef BLACKBERRY
-                            : &thread_opt);
+        ret = avcodec_open2(st->codec, codec, options ? options : &thread_opt);
         if (!options)
             av_dict_free(&thread_opt);
-#else
-                            : NULL);
-#endif
         if (ret < 0) {
             st->info->found_decoder = -st->codec->codec_id;
             goto fail;
@@ -2670,11 +2760,17 @@ static int try_decode_frame(AVFormatContext *s, AVStream *st, AVPacket *avpkt,
         goto fail;
     }
 
+    if (st->codec->codec->caps_internal & FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM) {
+        do_skip_frame = 1;
+        skip_frame = st->codec->skip_frame;
+        st->codec->skip_frame = AVDISCARD_ALL;
+    }
+
     while ((pkt.size > 0 || (!pkt.data && got_picture)) &&
            ret >= 0 &&
            (!has_codec_parameters(st, NULL) || !has_decode_delay_been_guessed(st) ||
             (!st->codec_info_nb_frames &&
-             st->codec->codec->capabilities & CODEC_CAP_CHANNEL_CONF))) {
+             (st->codec->codec->capabilities & AV_CODEC_CAP_CHANNEL_CONF)))) {
         got_picture = 0;
         switch (st->codec->codec_type) {
         case AVMEDIA_TYPE_VIDEO:
@@ -2705,6 +2801,10 @@ static int try_decode_frame(AVFormatContext *s, AVStream *st, AVPacket *avpkt,
         ret = -1;
 
 fail:
+    if (do_skip_frame) {
+        st->codec->skip_frame = skip_frame;
+    }
+
     av_frame_free(&frame);
     return ret;
 }
@@ -2814,7 +2914,10 @@ enum AVCodecID av_codec_get_id(const AVCodecTag *const *tags, unsigned int tag)
 static void compute_chapters_end(AVFormatContext *s)
 {
     unsigned int i, j;
-    int64_t max_time = s->duration +
+    int64_t max_time = 0;
+
+    if (s->duration > 0)
+        max_time = s->duration +
                        ((s->start_time == AV_NOPTS_VALUE) ? 0 : s->start_time);
 
     for (i = 0; i < s->nb_chapters; i++)
@@ -2841,10 +2944,14 @@ static int get_std_framerate(int i)
         return (i + 1) * 1001;
     i -= 30*12;
 
-    if (i < 7)
-        return ((const int[]) { 40, 48, 50, 60, 80, 120, 240})[i] * 1001 * 12;
+    if (i < 30)
+        return (i + 31) * 1001 * 12;
+    i -= 30;
 
-    i -= 7;
+    if (i < 3)
+        return ((const int[]) { 80, 120, 240})[i] * 1001 * 12;
+
+    i -= 3;
 
     return ((const int[]) { 24, 30, 60, 12, 15, 48 })[i] * 1000 * 12;
 }
@@ -2874,14 +2981,14 @@ int ff_alloc_extradata(AVCodecContext *avctx, int size)
 {
     int ret;
 
-    if (size < 0 || size >= INT32_MAX - FF_INPUT_BUFFER_PADDING_SIZE) {
+    if (size < 0 || size >= INT32_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
         avctx->extradata = NULL;
         avctx->extradata_size = 0;
         return AVERROR(EINVAL);
     }
-    avctx->extradata = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
+    avctx->extradata = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (avctx->extradata) {
-        memset(avctx->extradata + size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+        memset(avctx->extradata + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
         avctx->extradata_size = size;
         ret = 0;
     } else {
@@ -3040,24 +3147,23 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
     // new streams might appear, no options for those
     int orig_nb_streams = ic->nb_streams;
     int flush_codecs;
-    int64_t max_analyze_duration = ic->max_analyze_duration2;
+    int64_t max_analyze_duration = ic->max_analyze_duration;
     int64_t max_stream_analyze_duration;
-    int64_t probesize = ic->probesize2;
+    int64_t max_subtitle_analyze_duration;
+    int64_t probesize = ic->probesize;
 
-    if (!max_analyze_duration)
-        max_analyze_duration = ic->max_analyze_duration;
-    if (ic->probesize)
-        probesize = ic->probesize;
     flush_codecs = probesize > 0;
 
     av_opt_set(ic, "skip_clear", "1", AV_OPT_SEARCH_CHILDREN);
 
     max_stream_analyze_duration = max_analyze_duration;
+    max_subtitle_analyze_duration = max_analyze_duration;
     if (!max_analyze_duration) {
         max_stream_analyze_duration =
         max_analyze_duration        = 5*AV_TIME_BASE;
+        max_subtitle_analyze_duration = 30*AV_TIME_BASE;
         if (!strcmp(ic->iformat->name, "flv"))
-            max_stream_analyze_duration = 30*AV_TIME_BASE;
+            max_stream_analyze_duration = 90*AV_TIME_BASE;
     }
 
     if (ic->pb)
@@ -3066,9 +3172,7 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
 
     for (i = 0; i < ic->nb_streams; i++) {
         const AVCodec *codec;
-#ifndef BLACKBERRY
         AVDictionary *thread_opt = NULL;
-#endif
         st = ic->streams[i];
 
         if (st->codec->codec_type == AVMEDIA_TYPE_VIDEO ||
@@ -3079,7 +3183,7 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
                 st->codec->time_base = st->time_base;
         }
         // only for the split stuff
-        if (!st->parser && !(ic->flags & AVFMT_FLAG_NOPARSE)) {
+        if (!st->parser && !(ic->flags & AVFMT_FLAG_NOPARSE) && st->request_probe <= 0) {
             st->parser = av_parser_init(st->codec->codec_id);
             if (st->parser) {
                 if (st->need_parsing == AVSTREAM_PARSE_HEADERS) {
@@ -3097,22 +3201,15 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
 
         /* Force thread count to 1 since the H.264 decoder will not extract
          * SPS and PPS to extradata during multi-threaded decoding. */
-#ifndef BLACKBERRY
         av_dict_set(options ? &options[i] : &thread_opt, "threads", "1", 0);
 
         if (ic->codec_whitelist)
             av_dict_set(options ? &options[i] : &thread_opt, "codec_whitelist", ic->codec_whitelist, 0);
-#endif
 
         /* Ensure that subtitle_header is properly set. */
         if (st->codec->codec_type == AVMEDIA_TYPE_SUBTITLE
             && codec && !st->codec->codec) {
-            if (avcodec_open2(st->codec, codec, options ? &options[i]
-#ifndef BLACKBERRY
-                              : &thread_opt) < 0)
-#else
-                              : NULL) < 0)
-#endif
+            if (avcodec_open2(st->codec, codec, options ? &options[i] : &thread_opt) < 0)
                 av_log(ic, AV_LOG_WARNING,
                        "Failed to open codec in av_find_stream_info\n");
         }
@@ -3120,19 +3217,12 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
         // Try to just open decoders, in case this is enough to get parameters.
         if (!has_codec_parameters(st, NULL) && st->request_probe <= 0) {
             if (codec && !st->codec->codec)
-                if (avcodec_open2(st->codec, codec, options ? &options[i]
-#ifndef BLACKBERRY
-                                  : &thread_opt) < 0)
-#else
-                                  : NULL) < 0)
-#endif
+                if (avcodec_open2(st->codec, codec, options ? &options[i] : &thread_opt) < 0)
                     av_log(ic, AV_LOG_WARNING,
                            "Failed to open codec in av_find_stream_info\n");
         }
-#ifndef BLACKBERRY
         if (!options)
             av_dict_free(&thread_opt);
-#endif
     }
 
     for (i = 0; i < ic->nb_streams; i++) {
@@ -3230,17 +3320,12 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
             break;
         }
 
-        if (ic->flags & AVFMT_FLAG_NOBUFFER)
-            free_packet_buffer(&ic->internal->packet_buffer,
-                               &ic->internal->packet_buffer_end);
-        {
-            pkt = add_to_pktbuf(&ic->internal->packet_buffer, &pkt1,
-                                &ic->internal->packet_buffer_end);
-            if (!pkt) {
-                ret = AVERROR(ENOMEM);
-                goto find_stream_info_err;
-            }
-            if ((ret = av_dup_packet(pkt)) < 0)
+        pkt = &pkt1;
+
+        if (!(ic->flags & AVFMT_FLAG_NOBUFFER)) {
+            ret = add_to_pktbuf(&ic->internal->packet_buffer, pkt,
+                                &ic->internal->packet_buffer_end, 0);
+            if (ret < 0)
                 goto find_stream_info_err;
         }
 
@@ -3289,6 +3374,7 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
         }
         if (st->codec_info_nb_frames>1) {
             int64_t t = 0;
+            int64_t limit;
 
             if (st->time_base.den > 0)
                 t = av_rescale_q(st->info->codec_info_duration, st->time_base, AV_TIME_BASE_Q);
@@ -3301,16 +3387,23 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
                 && st->info->fps_last_dts  != AV_NOPTS_VALUE)
                 t = FFMAX(t, av_rescale_q(st->info->fps_last_dts - st->info->fps_first_dts, st->time_base, AV_TIME_BASE_Q));
 
-            if (t >= (analyzed_all_streams ? max_analyze_duration : max_stream_analyze_duration)) {
-                av_log(ic, AV_LOG_VERBOSE, "max_analyze_duration %"PRId64" reached at %"PRId64" microseconds\n",
-                       max_analyze_duration,
-                       t);
+            if (analyzed_all_streams)                                limit = max_analyze_duration;
+            else if (st->codec->codec_type == AVMEDIA_TYPE_SUBTITLE) limit = max_subtitle_analyze_duration;
+            else                                                     limit = max_stream_analyze_duration;
+
+            if (t >= limit) {
+                av_log(ic, AV_LOG_VERBOSE, "max_analyze_duration %"PRId64" reached at %"PRId64" microseconds st:%d\n",
+                       limit,
+                       t, pkt->stream_index);
                 if (ic->flags & AVFMT_FLAG_NOBUFFER)
                     av_packet_unref(pkt);
                 break;
             }
             if (pkt->duration) {
-                st->info->codec_info_duration        += pkt->duration;
+                if (st->codec->codec_type == AVMEDIA_TYPE_SUBTITLE && pkt->pts != AV_NOPTS_VALUE && pkt->pts >= st->start_time) {
+                    st->info->codec_info_duration = FFMIN(pkt->pts - st->start_time, st->info->codec_info_duration + pkt->duration);
+                } else
+                    st->info->codec_info_duration += pkt->duration;
                 st->info->codec_info_duration_fields += st->parser && st->need_parsing && st->codec->ticks_per_frame ==2 ? st->parser->repeat_pict + 1 : 2;
             }
         }
@@ -3333,7 +3426,7 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
          * it takes longer and uses more memory. For MPEG-4, we need to
          * decompress for QuickTime.
          *
-         * If CODEC_CAP_CHANNEL_CONF is set this will force decoding of at
+         * If AV_CODEC_CAP_CHANNEL_CONF is set this will force decoding of at
          * least one frame of codec data, this makes sure the codec initializes
          * the channel configuration and does not only trust the values from
          * the container. */
@@ -3462,7 +3555,7 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
     }
 
     if (probesize)
-    estimate_timings(ic, old_offset);
+        estimate_timings(ic, old_offset);
 
     av_opt_set(ic, "skip_clear", "0", AV_OPT_SEARCH_CHILDREN);
 
@@ -3602,21 +3695,26 @@ int av_read_pause(AVFormatContext *s)
     return AVERROR(ENOSYS);
 }
 
-void ff_free_stream(AVFormatContext *s, AVStream *st) {
-    int j;
-    av_assert0(s->nb_streams>0);
-    av_assert0(s->streams[ s->nb_streams - 1 ] == st);
+static void free_stream(AVStream **pst)
+{
+    AVStream *st = *pst;
+    int i;
+
+    if (!st)
+        return;
 
-    for (j = 0; j < st->nb_side_data; j++)
-        av_freep(&st->side_data[j].data);
+    for (i = 0; i < st->nb_side_data; i++)
+        av_freep(&st->side_data[i].data);
     av_freep(&st->side_data);
-    st->nb_side_data = 0;
 
-    if (st->parser) {
+    if (st->parser)
         av_parser_close(st->parser);
-    }
+
     if (st->attached_pic.data)
-        av_free_packet(&st->attached_pic);
+        av_packet_unref(&st->attached_pic);
+
+    av_freep(&st->internal);
+
     av_dict_free(&st->metadata);
     av_freep(&st->probe_data.buf);
     av_freep(&st->index_entries);
@@ -3628,7 +3726,17 @@ void ff_free_stream(AVFormatContext *s, AVStream *st) {
         av_freep(&st->info->duration_error);
     av_freep(&st->info);
     av_freep(&st->recommended_encoder_configuration);
-    av_freep(&s->streams[ --s->nb_streams ]);
+    av_freep(&st->priv_pts);
+
+    av_freep(pst);
+}
+
+void ff_free_stream(AVFormatContext *s, AVStream *st)
+{
+    av_assert0(s->nb_streams>0);
+    av_assert0(s->streams[ s->nb_streams - 1 ] == st);
+
+    free_stream(&s->streams[ --s->nb_streams ]);
 }
 
 void avformat_free_context(AVFormatContext *s)
@@ -3644,9 +3752,10 @@ void avformat_free_context(AVFormatContext *s)
     if (s->oformat && s->oformat->priv_class && s->priv_data)
         av_opt_free(s->priv_data);
 
-    for (i = s->nb_streams - 1; i >= 0; i--) {
+    for (i = s->nb_streams - 1; i >= 0; i--)
         ff_free_stream(s, s->streams[i]);
-    }
+
+
     for (i = s->nb_programs - 1; i >= 0; i--) {
         av_dict_free(&s->programs[i]->metadata);
         av_freep(&s->programs[i]->stream_index);
@@ -3722,22 +3831,29 @@ AVStream *avformat_new_stream(AVFormatContext *s, const AVCodec *c)
         av_free(st);
         return NULL;
     }
+
+    st->internal = av_mallocz(sizeof(*st->internal));
+    if (!st->internal)
+        goto fail;
+
     if (s->iformat) {
         /* no default bitrate if decoding */
         st->codec->bit_rate = 0;
 
         /* default pts setting is MPEG-like */
         avpriv_set_pts_info(st, 33, 1, 90000);
+        /* we set the current DTS to 0 so that formats without any timestamps
+         * but durations get some timestamps, formats with some unknown
+         * timestamps have their first few packets buffered and the
+         * timestamps corrected before they are returned to the user */
+        st->cur_dts = RELATIVE_TS_BASE;
+    } else {
+        st->cur_dts = AV_NOPTS_VALUE;
     }
 
     st->index      = s->nb_streams;
     st->start_time = AV_NOPTS_VALUE;
     st->duration   = AV_NOPTS_VALUE;
-    /* we set the current DTS to 0 so that formats without any timestamps
-     * but durations get some timestamps, formats with some unknown
-     * timestamps have their first few packets buffered and the
-     * timestamps corrected before they are returned to the user */
-    st->cur_dts       = s->iformat ? RELATIVE_TS_BASE : 0;
     st->first_dts     = AV_NOPTS_VALUE;
     st->probe_packets = MAX_PROBE_PACKETS;
     st->pts_wrap_reference = AV_NOPTS_VALUE;
@@ -3760,6 +3876,9 @@ AVStream *avformat_new_stream(AVFormatContext *s, const AVCodec *c)
 
     s->streams[s->nb_streams++] = st;
     return st;
+fail:
+    free_stream(&st);
+    return NULL;
 }
 
 AVProgram *av_new_program(AVFormatContext *ac, int id)
@@ -3820,7 +3939,7 @@ AVChapter *avpriv_new_chapter(AVFormatContext *s, int id, AVRational time_base,
     return chapter;
 }
 
-void ff_program_add_stream_index(AVFormatContext *ac, int progid, unsigned idx)
+void av_program_add_stream_index(AVFormatContext *ac, int progid, unsigned idx)
 {
     int i, j;
     AVProgram *program = NULL;
@@ -4116,18 +4235,6 @@ int ff_find_stream_index(AVFormatContext *s, int id)
     return -1;
 }
 
-int64_t ff_iso8601_to_unix_time(const char *datestr)
-{
-    struct tm time1 = { 0 }, time2 = { 0 };
-    const char *ret1, *ret2;
-    ret1 = av_small_strptime(datestr, "%Y - %m - %d %T", &time1);
-    ret2 = av_small_strptime(datestr, "%Y - %m - %dT%T", &time2);
-    if (ret2 && !ret1)
-        return av_timegm(&time2);
-    else
-        return av_timegm(&time1);
-}
-
 int avformat_query_codec(const AVOutputFormat *ofmt, enum AVCodecID codec_id,
                          int std_compliance)
 {
@@ -4260,8 +4367,9 @@ int avformat_match_stream_specifier(AVFormatContext *s, AVStream *st,
     if (*spec <= '9' && *spec >= '0') /* opt:index */
         return strtol(spec, NULL, 0) == st->index;
     else if (*spec == 'v' || *spec == 'a' || *spec == 's' || *spec == 'd' ||
-             *spec == 't') { /* opt:[vasdt] */
+             *spec == 't' || *spec == 'V') { /* opt:[vasdtV] */
         enum AVMediaType type;
+        int nopic = 0;
 
         switch (*spec++) {
         case 'v': type = AVMEDIA_TYPE_VIDEO;      break;
@@ -4269,15 +4377,20 @@ int avformat_match_stream_specifier(AVFormatContext *s, AVStream *st,
         case 's': type = AVMEDIA_TYPE_SUBTITLE;   break;
         case 'd': type = AVMEDIA_TYPE_DATA;       break;
         case 't': type = AVMEDIA_TYPE_ATTACHMENT; break;
+        case 'V': type = AVMEDIA_TYPE_VIDEO; nopic = 1; break;
         default:  av_assert0(0);
         }
         if (type != st->codec->codec_type)
             return 0;
+        if (nopic && (st->disposition & AV_DISPOSITION_ATTACHED_PIC))
+            return 0;
         if (*spec++ == ':') { /* possibly followed by :index */
             int i, index = strtol(spec, NULL, 0);
             for (i = 0; i < s->nb_streams; i++)
-                if (s->streams[i]->codec->codec_type == type && index-- == 0)
-                   return i == st->index;
+                if (s->streams[i]->codec->codec_type == type &&
+                    !(nopic && (st->disposition & AV_DISPOSITION_ATTACHED_PIC)) &&
+                    index-- == 0)
+                    return i == st->index;
             return 0;
         }
         return 1;
@@ -4513,7 +4626,7 @@ uint8_t *av_stream_get_side_data(AVStream *st, enum AVPacketSideDataType type,
     return NULL;
 }
 
-uint8_t *ff_stream_new_side_data(AVStream *st, enum AVPacketSideDataType type,
+uint8_t *av_stream_new_side_data(AVStream *st, enum AVPacketSideDataType type,
                                  int size)
 {
     AVPacketSideData *sd, *tmp;
@@ -4549,3 +4662,98 @@ uint8_t *ff_stream_new_side_data(AVStream *st, enum AVPacketSideDataType type,
     sd->size = size;
     return data;
 }
+
+int ff_stream_add_bitstream_filter(AVStream *st, const char *name, const char *args)
+{
+    AVBitStreamFilterContext *bsfc = NULL;
+    AVBitStreamFilterContext **dest = &st->internal->bsfc;
+    while (*dest && (*dest)->next)
+        dest = &(*dest)->next;
+
+    if (!(bsfc = av_bitstream_filter_init(name))) {
+        av_log(NULL, AV_LOG_ERROR, "Unknown bitstream filter '%s'\n", name);
+        return AVERROR(EINVAL);
+    }
+    if (args && !(bsfc->args = av_strdup(args))) {
+        av_bitstream_filter_close(bsfc);
+        return AVERROR(ENOMEM);
+    }
+    av_log(st->codec, AV_LOG_VERBOSE,
+           "Automatically inserted bitstream filter '%s'; args='%s'\n",
+           name, args ? args : "");
+    *dest = bsfc;
+    return 1;
+}
+
+int av_apply_bitstream_filters(AVCodecContext *codec, AVPacket *pkt,
+                               AVBitStreamFilterContext *bsfc)
+{
+    int ret = 0;
+    while (bsfc) {
+        AVPacket new_pkt = *pkt;
+        int a = av_bitstream_filter_filter(bsfc, codec, NULL,
+                                           &new_pkt.data, &new_pkt.size,
+                                           pkt->data, pkt->size,
+                                           pkt->flags & AV_PKT_FLAG_KEY);
+        if(a == 0 && new_pkt.data != pkt->data) {
+            uint8_t *t = av_malloc(new_pkt.size + AV_INPUT_BUFFER_PADDING_SIZE); //the new should be a subset of the old so cannot overflow
+            if (t) {
+                memcpy(t, new_pkt.data, new_pkt.size);
+                memset(t + new_pkt.size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+                new_pkt.data = t;
+                new_pkt.buf = NULL;
+                a = 1;
+            } else {
+                a = AVERROR(ENOMEM);
+            }
+        }
+        if (a > 0) {
+            new_pkt.buf = av_buffer_create(new_pkt.data, new_pkt.size,
+                                           av_buffer_default_free, NULL, 0);
+            if (new_pkt.buf) {
+                pkt->side_data = NULL;
+                pkt->side_data_elems = 0;
+                av_packet_unref(pkt);
+            } else {
+                av_freep(&new_pkt.data);
+                a = AVERROR(ENOMEM);
+            }
+        }
+        if (a < 0) {
+            av_log(codec, AV_LOG_ERROR,
+                   "Failed to open bitstream filter %s for stream %d with codec %s",
+                   bsfc->filter->name, pkt->stream_index,
+                   codec->codec ? codec->codec->name : "copy");
+            ret = a;
+            break;
+        }
+        *pkt = new_pkt;
+
+        bsfc = bsfc->next;
+    }
+    return ret;
+}
+
+void ff_format_io_close(AVFormatContext *s, AVIOContext **pb)
+{
+    if (*pb)
+        s->io_close(s, *pb);
+    *pb = NULL;
+}
+
+int ff_parse_creation_time_metadata(AVFormatContext *s, int64_t *timestamp, int return_seconds)
+{
+    AVDictionaryEntry *entry;
+    int64_t parsed_timestamp;
+    int ret;
+    if ((entry = av_dict_get(s->metadata, "creation_time", NULL, 0))) {
+        if ((ret = av_parse_time(&parsed_timestamp, entry->value, 0)) >= 0) {
+            *timestamp = return_seconds ? parsed_timestamp / 1000000 : parsed_timestamp;
+            return 1;
+        } else {
+            av_log(s, AV_LOG_WARNING, "Failed to parse creation_time %s\n", entry->value);
+            return ret;
+        }
+    }
+    return 0;
+}
diff --git a/libavformat/v210.c b/libavformat/v210.c
new file mode 100644
index 00000000..31387a4d
--- /dev/null
+++ b/libavformat/v210.c
@@ -0,0 +1,130 @@
+/*
+ * Raw v210 video demuxer
+ * Copyright (c) 2015 Tiancheng "Timothy" Gu
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+#include "internal.h"
+#include "avformat.h"
+
+typedef struct V210DemuxerContext {
+    const AVClass *class;     /**< Class for private options. */
+    int width, height;        /**< Integers describing video size, set by a private option. */
+    AVRational framerate;     /**< AVRational describing framerate, set by a private option. */
+} V210DemuxerContext;
+
+// v210 frame width is padded to multiples of 48
+#define GET_PACKET_SIZE(w, h) (((w + 47) / 48) * 48 * h * 8 / 3)
+
+static int v210_read_header(AVFormatContext *ctx)
+{
+    V210DemuxerContext *s = ctx->priv_data;
+    AVStream *st;
+    int ret;
+
+    st = avformat_new_stream(ctx, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
+
+    st->codec->codec_id = ctx->iformat->raw_codec_id;
+
+    avpriv_set_pts_info(st, 64, s->framerate.den, s->framerate.num);
+
+    ret = av_image_check_size(s->width, s->height, 0, ctx);
+    if (ret < 0)
+        return ret;
+    st->codec->width    = s->width;
+    st->codec->height   = s->height;
+    st->codec->pix_fmt  = ctx->iformat->raw_codec_id == AV_CODEC_ID_V210 ?
+                          AV_PIX_FMT_YUV422P10 : AV_PIX_FMT_YUV422P16;
+    ctx->packet_size    = GET_PACKET_SIZE(s->width, s->height);
+    st->codec->bit_rate = av_rescale_q(ctx->packet_size,
+                                       (AVRational){8,1}, st->time_base);
+
+    return 0;
+}
+
+
+static int v210_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    int ret;
+
+    ret = av_get_packet(s->pb, pkt, s->packet_size);
+    pkt->pts = pkt->dts = pkt->pos / s->packet_size;
+
+    pkt->stream_index = 0;
+    if (ret < 0)
+        return ret;
+    return 0;
+}
+
+#define OFFSET(x) offsetof(V210DemuxerContext, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
+static const AVOption v210_options[] = {
+    { "video_size", "set frame size", OFFSET(width), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, DEC },
+    { "framerate", "set frame rate", OFFSET(framerate), AV_OPT_TYPE_VIDEO_RATE, {.str = "25"}, 0, 0, DEC },
+    { NULL },
+};
+
+#if CONFIG_V210_DEMUXER
+static const AVClass v210_demuxer_class = {
+    .class_name = "v210 demuxer",
+    .item_name  = av_default_item_name,
+    .option     = v210_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVInputFormat ff_v210_demuxer = {
+    .name           = "v210",
+    .long_name      = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
+    .priv_data_size = sizeof(V210DemuxerContext),
+    .read_header    = v210_read_header,
+    .read_packet    = v210_read_packet,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .extensions     = "v210",
+    .raw_codec_id   = AV_CODEC_ID_V210,
+    .priv_class     = &v210_demuxer_class,
+};
+#endif // CONFIG_V210_DEMUXER
+
+#if CONFIG_V210X_DEMUXER
+static const AVClass v210x_demuxer_class = {
+    .class_name = "v210x demuxer",
+    .item_name  = av_default_item_name,
+    .option     = v210_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVInputFormat ff_v210x_demuxer = {
+    .name           = "v210x",
+    .long_name      = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
+    .priv_data_size = sizeof(V210DemuxerContext),
+    .read_header    = v210_read_header,
+    .read_packet    = v210_read_packet,
+    .flags          = AVFMT_GENERIC_INDEX,
+    .extensions     = "yuv10",
+    .raw_codec_id   = AV_CODEC_ID_V210X,
+    .priv_class     = &v210x_demuxer_class,
+};
+#endif // CONFIG_V210X_DEMUXER
diff --git a/libavformat/vag.c b/libavformat/vag.c
new file mode 100644
index 00000000..2875db50
--- /dev/null
+++ b/libavformat/vag.c
@@ -0,0 +1,83 @@
+/*
+ * VAG demuxer
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/channel_layout.h"
+#include "avformat.h"
+#include "internal.h"
+
+static int vag_probe(AVProbeData *p)
+{
+    if (memcmp(p->buf, "VAGp\0\0\0", 7))
+        return 0;
+
+    return AVPROBE_SCORE_MAX;
+}
+
+static int vag_read_header(AVFormatContext *s)
+{
+    AVStream *st;
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    avio_skip(s->pb, 4);
+    st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
+    st->codec->codec_id    = AV_CODEC_ID_ADPCM_PSX;
+    st->codec->channels    = 1 + (avio_rb32(s->pb) == 0x00000004);
+    avio_skip(s->pb, 4);
+    if (st->codec->channels > 1) {
+        st->duration       = avio_rb32(s->pb);
+    } else {
+        st->duration       = avio_rb32(s->pb) / 16 * 28;
+    }
+    st->codec->sample_rate = avio_rb32(s->pb);
+    if (st->codec->sample_rate <= 0)
+        return AVERROR_INVALIDDATA;
+    avio_seek(s->pb, 0x1000, SEEK_SET);
+    if (avio_rl32(s->pb) == MKTAG('V','A','G','p')) {
+        st->codec->block_align = 0x1000 * st->codec->channels;
+        avio_seek(s->pb, 0, SEEK_SET);
+        st->duration = st->duration / 16 * 28;
+    } else {
+        st->codec->block_align = 16 * st->codec->channels;
+        avio_seek(s->pb, st->codec->channels > 1 ? 0x80 : 0x30, SEEK_SET);
+    }
+    avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
+
+    return 0;
+}
+
+static int vag_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    AVCodecContext *codec = s->streams[0]->codec;
+
+    return av_get_packet(s->pb, pkt, codec->block_align);
+}
+
+AVInputFormat ff_vag_demuxer = {
+    .name           = "vag",
+    .long_name      = NULL_IF_CONFIG_SMALL("Sony PS2 VAG"),
+    .read_probe     = vag_probe,
+    .read_header    = vag_read_header,
+    .read_packet    = vag_read_packet,
+    .extensions     = "vag",
+};
diff --git a/libavformat/vc1dec.c b/libavformat/vc1dec.c
new file mode 100644
index 00000000..33f84652
--- /dev/null
+++ b/libavformat/vc1dec.c
@@ -0,0 +1,81 @@
+/*
+ * VC-1 demuxer
+ * Copyright (c) 2015 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "rawdec.h"
+#include "libavutil/intreadwrite.h"
+#include "libavcodec/vc1_common.h"
+
+static int vc1_probe(AVProbeData *p)
+{
+    int seq = 0, entry = 0, frame = 0, i;
+
+    for (i = 0; i < p->buf_size + 5; i++) {
+        uint32_t code = AV_RB32(p->buf + i);
+        if ((code & 0xffffffe0) == 0x100) {
+            int type = code & 0x11f;
+            i += 4;
+            switch (type) {
+            case VC1_CODE_SEQHDR: {
+                int profile, level, chromaformat;
+                profile = (p->buf[i] & 0xc0) >> 6;
+                if (profile != PROFILE_ADVANCED) {
+                    seq = 0;
+                    continue;
+                }
+                level = (p->buf[i] & 0x38) >> 3;
+                if (level >= 5) {
+                    seq = 0;
+                    continue;
+                }
+                chromaformat = (p->buf[i] & 0x6) >> 1;
+                if (chromaformat != 1) {
+                    seq = 0;
+                    continue;
+                }
+                seq++;
+                i += 6;
+                break;
+            }
+            case VC1_CODE_ENTRYPOINT:
+                if (!seq)
+                    continue;
+                entry++;
+                i += 2;
+                break;
+            case VC1_CODE_FRAME:
+            case VC1_CODE_FIELD:
+            case VC1_CODE_SLICE:
+                if (seq && entry)
+                    frame++;
+                break;
+            }
+        }
+    }
+
+    if (frame > 1)
+        return AVPROBE_SCORE_EXTENSION / 2 + 1;
+    if (frame == 1)
+        return AVPROBE_SCORE_EXTENSION / 4;
+    return 0;
+}
+
+FF_DEF_RAWVIDEO_DEMUXER2(vc1, "raw VC-1", vc1_probe, "vc1", AV_CODEC_ID_VC1, AVFMT_GENERIC_INDEX|AVFMT_NOTIMESTAMPS)
diff --git a/libavformat/version.h b/libavformat/version.h
index c0920d54..024ab915 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -29,8 +29,8 @@
 
 #include "libavutil/version.h"
 
-#define LIBAVFORMAT_VERSION_MAJOR 56
-#define LIBAVFORMAT_VERSION_MINOR  36
+#define LIBAVFORMAT_VERSION_MAJOR  57
+#define LIBAVFORMAT_VERSION_MINOR  25
 #define LIBAVFORMAT_VERSION_MICRO 100
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
@@ -47,18 +47,32 @@
  * FF_API_* defines may be placed below to indicate public API that will be
  * dropped at a future version bump. The defines themselves are not part of
  * the public API and may change, break or disappear at any time.
+ *
+ * @note, when bumping the major version it is recommended to manually
+ * disable each FF_API_* in its own commit instead of disabling them all
+ * at once through the bump. This improves the git bisect-ability of the change.
+ *
  */
 #ifndef FF_API_LAVF_BITEXACT
-#define FF_API_LAVF_BITEXACT            (LIBAVFORMAT_VERSION_MAJOR < 57)
+#define FF_API_LAVF_BITEXACT            (LIBAVFORMAT_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_LAVF_FRAC
-#define FF_API_LAVF_FRAC                (LIBAVFORMAT_VERSION_MAJOR < 57)
+#define FF_API_LAVF_FRAC                (LIBAVFORMAT_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_LAVF_CODEC_TB
-#define FF_API_LAVF_CODEC_TB            (LIBAVFORMAT_VERSION_MAJOR < 57)
+#define FF_API_LAVF_CODEC_TB            (LIBAVFORMAT_VERSION_MAJOR < 58)
 #endif
 #ifndef FF_API_URL_FEOF
-#define FF_API_URL_FEOF                 (LIBAVFORMAT_VERSION_MAJOR < 57)
+#define FF_API_URL_FEOF                 (LIBAVFORMAT_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_LAVF_FMT_RAWPICTURE
+#define FF_API_LAVF_FMT_RAWPICTURE      (LIBAVFORMAT_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_COMPUTE_PKT_FIELDS2
+#define FF_API_COMPUTE_PKT_FIELDS2      (LIBAVFORMAT_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_OLD_OPEN_CALLBACKS
+#define FF_API_OLD_OPEN_CALLBACKS       (LIBAVFORMAT_VERSION_MAJOR < 58)
 #endif
 
 #ifndef FF_API_R_FRAME_RATE
diff --git a/libavformat/vivo.c b/libavformat/vivo.c
index 72873798..10d81b91 100644
--- a/libavformat/vivo.c
+++ b/libavformat/vivo.c
@@ -298,7 +298,7 @@ static int vivo_read_packet(AVFormatContext *s, AVPacket *pkt)
 
 fail:
     if (ret < 0)
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
     return ret;
 }
 
diff --git a/libavformat/vocdec.c b/libavformat/vocdec.c
index c49ed163..70fa0098 100644
--- a/libavformat/vocdec.c
+++ b/libavformat/vocdec.c
@@ -23,44 +23,6 @@
 #include "voc.h"
 #include "internal.h"
 
-
-static int voc_probe(AVProbeData *p)
-{
-    int version, check;
-
-    if (memcmp(p->buf, ff_voc_magic, sizeof(ff_voc_magic) - 1))
-        return 0;
-    version = AV_RL16(p->buf + 22);
-    check = AV_RL16(p->buf + 24);
-    if (~version + 0x1234 != check)
-        return 10;
-
-    return AVPROBE_SCORE_MAX;
-}
-
-static int voc_read_header(AVFormatContext *s)
-{
-    VocDecContext *voc = s->priv_data;
-    AVIOContext *pb = s->pb;
-    int header_size;
-    AVStream *st;
-
-    avio_skip(pb, 20);
-    header_size = avio_rl16(pb) - 22;
-    if (header_size != 4) {
-        av_log(s, AV_LOG_ERROR, "unknown header size: %d\n", header_size);
-        return AVERROR(ENOSYS);
-    }
-    avio_skip(pb, header_size);
-    st = avformat_new_stream(s, NULL);
-    if (!st)
-        return AVERROR(ENOMEM);
-    st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
-
-    voc->remaining_size = 0;
-    return 0;
-}
-
 int
 ff_voc_get_packet(AVFormatContext *s, AVPacket *pkt, AVStream *st, int max_size)
 {
@@ -178,6 +140,44 @@ ff_voc_get_packet(AVFormatContext *s, AVPacket *pkt, AVStream *st, int max_size)
     return ret;
 }
 
+#if CONFIG_VOC_DEMUXER
+static int voc_probe(AVProbeData *p)
+{
+    int version, check;
+
+    if (memcmp(p->buf, ff_voc_magic, sizeof(ff_voc_magic) - 1))
+        return 0;
+    version = AV_RL16(p->buf + 22);
+    check = AV_RL16(p->buf + 24);
+    if (~version + 0x1234 != check)
+        return 10;
+
+    return AVPROBE_SCORE_MAX;
+}
+
+static int voc_read_header(AVFormatContext *s)
+{
+    VocDecContext *voc = s->priv_data;
+    AVIOContext *pb = s->pb;
+    int header_size;
+    AVStream *st;
+
+    avio_skip(pb, 20);
+    header_size = avio_rl16(pb) - 22;
+    if (header_size != 4) {
+        av_log(s, AV_LOG_ERROR, "unknown header size: %d\n", header_size);
+        return AVERROR(ENOSYS);
+    }
+    avio_skip(pb, header_size);
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
+
+    voc->remaining_size = 0;
+    return 0;
+}
+
 static int voc_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     return ff_voc_get_packet(s, pkt, s->streams[0], 0);
@@ -215,3 +215,4 @@ AVInputFormat ff_voc_demuxer = {
     .read_seek      = voc_read_seek,
     .codec_tag      = (const AVCodecTag* const []){ ff_voc_codec_tags, 0 },
 };
+#endif /* CONFIG_VOC_DEMUXER */
diff --git a/libavformat/vpk.c b/libavformat/vpk.c
new file mode 100644
index 00000000..75190494
--- /dev/null
+++ b/libavformat/vpk.c
@@ -0,0 +1,117 @@
+/*
+ * VPK demuxer
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avformat.h"
+#include "internal.h"
+
+typedef struct VPKDemuxContext {
+    unsigned block_count;
+    unsigned current_block;
+    unsigned last_block_size;
+} VPKDemuxContext;
+
+static int vpk_probe(AVProbeData *p)
+{
+    if (AV_RL32(p->buf) != MKBETAG('V','P','K',' '))
+        return 0;
+
+    return AVPROBE_SCORE_MAX / 3 * 2;
+}
+
+static int vpk_read_header(AVFormatContext *s)
+{
+    VPKDemuxContext *vpk = s->priv_data;
+    unsigned offset;
+    unsigned samples_per_block;
+    AVStream *st;
+
+    vpk->current_block = 0;
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    avio_skip(s->pb, 4);
+    st->duration           = avio_rl32(s->pb) * 28 / 16;
+    offset                 = avio_rl32(s->pb);
+    st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
+    st->codec->codec_id    = AV_CODEC_ID_ADPCM_PSX;
+    st->codec->block_align = avio_rl32(s->pb);
+    st->codec->sample_rate = avio_rl32(s->pb);
+    if (st->codec->sample_rate <= 0)
+        return AVERROR_INVALIDDATA;
+    st->codec->channels    = avio_rl32(s->pb);
+    if (st->codec->channels <= 0)
+        return AVERROR_INVALIDDATA;
+    samples_per_block      = ((st->codec->block_align / st->codec->channels) * 28) / 16;
+    if (samples_per_block <= 0)
+        return AVERROR_INVALIDDATA;
+    vpk->block_count       = (st->duration + (samples_per_block - 1)) / samples_per_block;
+    vpk->last_block_size   = (st->duration % samples_per_block) * 16 * st->codec->channels / 28;
+    avio_skip(s->pb, offset - avio_tell(s->pb));
+    avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
+
+    return 0;
+}
+
+static int vpk_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    AVCodecContext *codec = s->streams[0]->codec;
+    VPKDemuxContext *vpk = s->priv_data;
+    int ret, i;
+
+    vpk->current_block++;
+    if (vpk->current_block == vpk->block_count) {
+        unsigned size = vpk->last_block_size / codec->channels;
+        unsigned skip = (codec->block_align - vpk->last_block_size) / codec->channels;
+
+        ret = av_new_packet(pkt, vpk->last_block_size);
+        if (ret < 0)
+            return ret;
+        for (i = 0; i < codec->channels; i++) {
+            ret = avio_read(s->pb, pkt->data + i * size, size);
+            avio_skip(s->pb, skip);
+            if (ret != size) {
+                av_packet_unref(pkt);
+                ret = AVERROR(EIO);
+                break;
+            }
+        }
+        pkt->stream_index = 0;
+    } else if (vpk->current_block < vpk->block_count) {
+        ret = av_get_packet(s->pb, pkt, codec->block_align);
+        pkt->stream_index = 0;
+    } else {
+        return AVERROR_EOF;
+    }
+
+    return ret;
+}
+
+AVInputFormat ff_vpk_demuxer = {
+    .name           = "vpk",
+    .long_name      = NULL_IF_CONFIG_SMALL("Sony PS2 VPK"),
+    .priv_data_size = sizeof(VPKDemuxContext),
+    .read_probe     = vpk_probe,
+    .read_header    = vpk_read_header,
+    .read_packet    = vpk_read_packet,
+    .extensions     = "vpk",
+};
diff --git a/libavformat/vplayerdec.c b/libavformat/vplayerdec.c
index 619ccfd4..860b7785 100644
--- a/libavformat/vplayerdec.c
+++ b/libavformat/vplayerdec.c
@@ -90,7 +90,7 @@ static int vplayer_read_header(AVFormatContext *s)
         }
     }
 
-    ff_subtitles_queue_finalize(&vplayer->q);
+    ff_subtitles_queue_finalize(s, &vplayer->q);
     return 0;
 }
 
diff --git a/libavformat/vqf.c b/libavformat/vqf.c
index 29c726da..3c897e1c 100644
--- a/libavformat/vqf.c
+++ b/libavformat/vqf.c
@@ -211,8 +211,8 @@ static int vqf_read_header(AVFormatContext *s)
         size = 2048;
         break;
     default:
-        av_log(s, AV_LOG_ERROR, "Mode not suported: %d Hz, %d kb/s.\n",
-               st->codec->sample_rate, st->codec->bit_rate);
+        av_log(s, AV_LOG_ERROR, "Mode not supported: %d Hz, %"PRId64" kb/s.\n",
+               st->codec->sample_rate, (int64_t)st->codec->bit_rate);
         return -1;
     }
     c->frame_bit_len = st->codec->bit_rate*size/st->codec->sample_rate;
@@ -246,7 +246,7 @@ static int vqf_read_packet(AVFormatContext *s, AVPacket *pkt)
     ret = avio_read(s->pb, pkt->data+2, size);
 
     if (ret != size) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return AVERROR(EIO);
     }
 
diff --git a/libavformat/wavdec.c b/libavformat/wavdec.c
index 864185f9..d95596f6 100644
--- a/libavformat/wavdec.c
+++ b/libavformat/wavdec.c
@@ -134,7 +134,7 @@ static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream **st)
     if (!*st)
         return AVERROR(ENOMEM);
 
-    ret = ff_get_wav_header(pb, (*st)->codec, size, wav->rifx);
+    ret = ff_get_wav_header(s, pb, (*st)->codec, size, wav->rifx);
     if (ret < 0)
         return ret;
     handle_stream_probing(*st);
@@ -146,6 +146,49 @@ static int wav_parse_fmt_tag(AVFormatContext *s, int64_t size, AVStream **st)
     return 0;
 }
 
+static int wav_parse_xma2_tag(AVFormatContext *s, int64_t size, AVStream **st)
+{
+    AVIOContext *pb = s->pb;
+    int num_streams, i, channels = 0;
+
+    if (size < 44)
+        return AVERROR_INVALIDDATA;
+
+    *st = avformat_new_stream(s, NULL);
+    if (!*st)
+        return AVERROR(ENOMEM);
+
+    (*st)->codec->codec_type = AVMEDIA_TYPE_AUDIO;
+    (*st)->codec->codec_id   = AV_CODEC_ID_XMA2;
+    (*st)->need_parsing      = AVSTREAM_PARSE_FULL_RAW;
+
+    avio_skip(pb, 1);
+    num_streams = avio_r8(pb);
+    if (size < 40 + num_streams * 4)
+        return AVERROR_INVALIDDATA;
+    avio_skip(pb, 10);
+    (*st)->codec->sample_rate = avio_rb32(pb);
+    avio_skip(pb, 12);
+    (*st)->duration = avio_rb32(pb);
+    avio_skip(pb, 8);
+
+    for (i = 0; i < num_streams; i++) {
+        channels += avio_r8(pb);
+        avio_skip(pb, 3);
+    }
+    (*st)->codec->channels = channels;
+
+    if ((*st)->codec->channels <= 0 || (*st)->codec->sample_rate <= 0)
+        return AVERROR_INVALIDDATA;
+
+    avpriv_set_pts_info(*st, 64, 1, (*st)->codec->sample_rate);
+    if (ff_alloc_extradata((*st)->codec, 34))
+        return AVERROR(ENOMEM);
+    memset((*st)->codec->extradata, 0, 34);
+
+    return 0;
+}
+
 static inline int wav_parse_bext_string(AVFormatContext *s, const char *key,
                                         int length)
 {
@@ -254,7 +297,7 @@ static int wav_read_header(AVFormatContext *s)
     AVIOContext *pb      = s->pb;
     AVStream *st         = NULL;
     WAVDemuxContext *wav = s->priv_data;
-    int ret, got_fmt = 0;
+    int ret, got_fmt = 0, got_xma2 = 0;
     int64_t next_tag_ofs, data_ofs = -1;
 
     wav->unaligned = avio_tell(s->pb) & 1;
@@ -319,15 +362,24 @@ static int wav_read_header(AVFormatContext *s)
         switch (tag) {
         case MKTAG('f', 'm', 't', ' '):
             /* only parse the first 'fmt ' tag found */
-            if (!got_fmt && (ret = wav_parse_fmt_tag(s, size, &st)) < 0) {
+            if (!got_xma2 && !got_fmt && (ret = wav_parse_fmt_tag(s, size, &st)) < 0) {
                 return ret;
             } else if (got_fmt)
                 av_log(s, AV_LOG_WARNING, "found more than one 'fmt ' tag\n");
 
             got_fmt = 1;
             break;
+        case MKTAG('X', 'M', 'A', '2'):
+            /* only parse the first 'XMA2' tag found */
+            if (!got_fmt && !got_xma2 && (ret = wav_parse_xma2_tag(s, size, &st)) < 0) {
+                return ret;
+            } else if (got_xma2)
+                av_log(s, AV_LOG_WARNING, "found more than one 'XMA2' tag\n");
+
+            got_xma2 = 1;
+            break;
         case MKTAG('d', 'a', 't', 'a'):
-            if (!got_fmt) {
+            if (!pb->seekable && !got_fmt && !got_xma2) {
                 av_log(s, AV_LOG_ERROR,
                        "found no 'fmt ' tag before the 'data' tag\n");
                 return AVERROR_INVALIDDATA;
@@ -422,6 +474,11 @@ static int wav_read_header(AVFormatContext *s)
     }
 
 break_loop:
+    if (!got_fmt && !got_xma2) {
+        av_log(s, AV_LOG_ERROR, "no 'fmt ' or 'XMA2' tag found\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     if (data_ofs < 0) {
         av_log(s, AV_LOG_ERROR, "no 'data' tag found\n");
         return AVERROR_INVALIDDATA;
@@ -429,8 +486,29 @@ static int wav_read_header(AVFormatContext *s)
 
     avio_seek(pb, data_ofs, SEEK_SET);
 
+    if (data_size > (INT64_MAX>>3)) {
+        av_log(s, AV_LOG_WARNING, "Data size %"PRId64" is too large\n", data_size);
+        data_size = 0;
+    }
+
+    if (   st->codec->bit_rate > 0 && data_size > 0
+        && st->codec->sample_rate > 0
+        && sample_count > 0 && st->codec->channels > 1
+        && sample_count % st->codec->channels == 0) {
+        if (fabs(8.0 * data_size * st->codec->channels * st->codec->sample_rate /
+            sample_count /st->codec->bit_rate - 1.0) < 0.3)
+            sample_count /= st->codec->channels;
+    }
+
     if (   data_size > 0 && sample_count && st->codec->channels
-        && data_size / sample_count / st->codec->channels > 8) {
+        && (data_size << 3) / sample_count / st->codec->channels > st->codec->bits_per_coded_sample  + 1) {
+        av_log(s, AV_LOG_WARNING, "ignoring wrong sample_count %"PRId64"\n", sample_count);
+        sample_count = 0;
+    }
+
+    /* G.729 hack (for Ticket4577)
+     * FIXME: Come up with cleaner, more general solution */
+    if (st->codec->codec_id == AV_CODEC_ID_G729 && sample_count && (data_size << 3) > sample_count) {
         av_log(s, AV_LOG_WARNING, "ignoring wrong sample_count %"PRId64"\n", sample_count);
         sample_count = 0;
     }
@@ -615,7 +693,7 @@ static int wav_read_seek(AVFormatContext *s,
 #define OFFSET(x) offsetof(WAVDemuxContext, x)
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption demux_options[] = {
-    { "ignore_length", "Ignore length", OFFSET(ignore_length), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, DEC },
+    { "ignore_length", "Ignore length", OFFSET(ignore_length), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, DEC },
     { NULL },
 };
 
@@ -689,7 +767,7 @@ static int w64_read_header(AVFormatContext *s)
 
         if (!memcmp(guid, ff_w64_guid_fmt, 16)) {
             /* subtract chunk header size - normal wav file doesn't count it */
-            ret = ff_get_wav_header(pb, st->codec, size - 24, 0);
+            ret = ff_get_wav_header(s, pb, st->codec, size - 24, 0);
             if (ret < 0)
                 return ret;
             avio_skip(pb, FFALIGN(size, INT64_C(8)) - size);
diff --git a/libavformat/wavenc.c b/libavformat/wavenc.c
index f89c91e8..0156f6e8 100644
--- a/libavformat/wavenc.c
+++ b/libavformat/wavenc.c
@@ -502,7 +502,7 @@ static int wav_write_trailer(AVFormatContext *s)
 #define OFFSET(x) offsetof(WAVMuxContext, x)
 #define ENC AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "write_bext", "Write BEXT chunk.", OFFSET(write_bext), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, ENC },
+    { "write_bext", "Write BEXT chunk.", OFFSET(write_bext), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, ENC },
     { "write_peak", "Write Peak Envelope chunk.",            OFFSET(write_peak), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, ENC, "peak" },
     { "off",        "Do not write peak chunk.",              0,                  AV_OPT_TYPE_CONST, { .i64 = PEAK_OFF  }, 0, 0, ENC, "peak" },
     { "on",         "Append peak chunk after wav data.",     0,                  AV_OPT_TYPE_CONST, { .i64 = PEAK_ON   }, 0, 0, ENC, "peak" },
diff --git a/libavformat/wc3movie.c b/libavformat/wc3movie.c
index 7bd09e3f..04f8667a 100644
--- a/libavformat/wc3movie.c
+++ b/libavformat/wc3movie.c
@@ -295,7 +295,7 @@ static int wc3_read_close(AVFormatContext *s)
     Wc3DemuxContext *wc3 = s->priv_data;
 
     if (wc3->vpkt.size > 0)
-        av_free_packet(&wc3->vpkt);
+        av_packet_unref(&wc3->vpkt);
 
     return 0;
 }
diff --git a/libavformat/webm_chunk.c b/libavformat/webm_chunk.c
index 3dfef4bb..063eb3be 100644
--- a/libavformat/webm_chunk.c
+++ b/libavformat/webm_chunk.c
@@ -29,6 +29,7 @@
 
 #include "avformat.h"
 #include "avio.h"
+#include "avio_internal.h"
 #include "internal.h"
 
 #include "libavutil/avassert.h"
@@ -125,8 +126,7 @@ static int webm_chunk_write_header(AVFormatContext *s)
     ret = get_chunk_filename(s, 1, oc->filename);
     if (ret < 0)
         return ret;
-    ret = avio_open2(&oc->pb, oc->filename, AVIO_FLAG_WRITE,
-                     &s->interrupt_callback, NULL);
+    ret = s->io_open(s, &oc->pb, oc->filename, AVIO_FLAG_WRITE, NULL);
     if (ret < 0)
         return ret;
 
@@ -134,7 +134,7 @@ static int webm_chunk_write_header(AVFormatContext *s)
     ret = oc->oformat->write_header(oc);
     if (ret < 0)
         return ret;
-    avio_close(oc->pb);
+    ff_format_io_close(s, &oc->pb);
     return 0;
 }
 
@@ -169,13 +169,11 @@ static int chunk_end(AVFormatContext *s)
     ret = get_chunk_filename(s, 0, filename);
     if (ret < 0)
         goto fail;
-    ret = avio_open2(&pb, filename, AVIO_FLAG_WRITE, &s->interrupt_callback, NULL);
+    ret = s->io_open(s, &pb, filename, AVIO_FLAG_WRITE, NULL);
     if (ret < 0)
         goto fail;
     avio_write(pb, buffer, buffer_size);
-    ret = avio_close(pb);
-    if (ret < 0)
-        goto fail;
+    ff_format_io_close(s, &pb);
     oc->pb = NULL;
 fail:
     av_free(buffer);
diff --git a/libavformat/webmdashenc.c b/libavformat/webmdashenc.c
index 76ea4237..301c045b 100644
--- a/libavformat/webmdashenc.c
+++ b/libavformat/webmdashenc.c
@@ -194,7 +194,7 @@ static int write_representation(AVFormatContext *s, AVStream *stream, char *id,
         avio_printf(s->pb, " width=\"%d\"", stream->codec->width);
     if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && output_height)
         avio_printf(s->pb, " height=\"%d\"", stream->codec->height);
-    if (stream->codec->codec_type = AVMEDIA_TYPE_AUDIO && output_sample_rate)
+    if (stream->codec->codec_type == AVMEDIA_TYPE_AUDIO && output_sample_rate)
         avio_printf(s->pb, " audioSamplingRate=\"%d\"", stream->codec->sample_rate);
     if (w->is_live) {
         // For live streams, Codec and Mime Type always go in the Representation tag.
@@ -392,10 +392,10 @@ static int write_adaptation_set(AVFormatContext *s, int as_index)
         if (w->is_live) {
             AVDictionaryEntry *filename =
                 av_dict_get(s->streams[as->streams[i]]->metadata, FILENAME, NULL, 0);
-            if (!filename ||
-                (ret = parse_filename(filename->value, &representation_id, NULL, NULL))) {
+            if (!filename)
+                return AVERROR(EINVAL);
+            if (ret = parse_filename(filename->value, &representation_id, NULL, NULL))
                 return ret;
-            }
         } else {
             representation_id = av_asprintf("%d", w->representation_id++);
             if (!representation_id) return AVERROR(ENOMEM);
@@ -518,8 +518,8 @@ static int webm_dash_manifest_write_trailer(AVFormatContext *s)
 #define OFFSET(x) offsetof(WebMDashMuxContext, x)
 static const AVOption options[] = {
     { "adaptation_sets", "Adaptation sets. Syntax: id=0,streams=0,1,2 id=1,streams=3,4 and so on", OFFSET(adaptation_sets), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
-    { "debug_mode", "[private option - users should never set this]. set this to 1 to create deterministic output", OFFSET(debug_mode), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
-    { "live", "set this to 1 to create a live stream manifest", OFFSET(is_live), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
+    { "debug_mode", "[private option - users should never set this]. Create deterministic output", OFFSET(debug_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
+    { "live", "create a live stream manifest", OFFSET(is_live), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
     { "chunk_start_index",  "start index of the chunk", OFFSET(chunk_start_index), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
     { "chunk_duration_ms", "duration of each chunk (in milliseconds)", OFFSET(chunk_duration), AV_OPT_TYPE_INT, {.i64 = 1000}, 0, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
     { "utc_timing_url", "URL of the page that will return the UTC timestamp in ISO format", OFFSET(utc_timing_url), AV_OPT_TYPE_STRING, { 0 }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
diff --git a/libavformat/webpenc.c b/libavformat/webpenc.c
index 69270c63..2a21730f 100644
--- a/libavformat/webpenc.c
+++ b/libavformat/webpenc.c
@@ -140,7 +140,7 @@ static int flush(AVFormatContext *s, int trailer, int64_t pts)
             avio_w8(s->pb, 0);
         }
         avio_write(s->pb, w->last_pkt.data + skip, w->last_pkt.size - skip);
-        av_free_packet(&w->last_pkt);
+        av_packet_unref(&w->last_pkt);
     }
 
     return 0;
diff --git a/libavformat/webvttdec.c b/libavformat/webvttdec.c
index e457e8f6..47a3255f 100644
--- a/libavformat/webvttdec.c
+++ b/libavformat/webvttdec.c
@@ -92,7 +92,8 @@ static int webvtt_read_header(AVFormatContext *s)
 
         /* ignore header chunk */
         if (!strncmp(p, "\xEF\xBB\xBFWEBVTT", 9) ||
-            !strncmp(p, "WEBVTT", 6))
+            !strncmp(p, "WEBVTT", 6) ||
+            !strncmp(p, "NOTE", 4))
             continue;
 
         /* optional cue identifier (can be a number like in SRT or some kind of
@@ -161,7 +162,7 @@ static int webvtt_read_header(AVFormatContext *s)
         SET_SIDE_DATA(settings,   AV_PKT_DATA_WEBVTT_SETTINGS);
     }
 
-    ff_subtitles_queue_finalize(&webvtt->q);
+    ff_subtitles_queue_finalize(s, &webvtt->q);
 
 end:
     av_bprint_finalize(&cue,    NULL);
diff --git a/libavformat/webvttenc.c b/libavformat/webvttenc.c
index b93993d5..c3865387 100644
--- a/libavformat/webvttenc.c
+++ b/libavformat/webvttenc.c
@@ -46,8 +46,14 @@ static void webvtt_write_time(AVIOContext *pb, int64_t millisec)
 static int webvtt_write_header(AVFormatContext *ctx)
 {
     AVStream     *s = ctx->streams[0];
+    AVCodecContext *avctx = ctx->streams[0]->codec;
     AVIOContext *pb = ctx->pb;
 
+    if (ctx->nb_streams != 1 || avctx->codec_id != AV_CODEC_ID_WEBVTT) {
+        av_log(ctx, AV_LOG_ERROR, "Exactly one WebVTT stream is needed.\n");
+        return AVERROR(EINVAL);
+    }
+
     avpriv_set_pts_info(s, 64, 1, 1000);
 
     avio_printf(pb, "WEBVTT\n");
diff --git a/libavformat/wtvdec.c b/libavformat/wtvdec.c
index e2266902..71deaf0a 100644
--- a/libavformat/wtvdec.c
+++ b/libavformat/wtvdec.c
@@ -670,7 +670,7 @@ static AVStream * parse_media_type(AVFormatContext *s, AVStream *st, int sid,
         if (!st)
             return NULL;
         if (!ff_guidcmp(formattype, ff_format_waveformatex)) {
-            int ret = ff_get_wav_header(pb, st->codec, size, 0);
+            int ret = ff_get_wav_header(s, pb, st->codec, size, 0);
             if (ret < 0)
                 return NULL;
         } else {
@@ -1027,22 +1027,23 @@ static int read_header(AVFormatContext *s)
             if (wtv->nb_index_entries) {
                 pb = wtvfile_open(s, root, root_size, ff_timeline_table_0_entries_Events_le16);
                 if (pb) {
-                    int i;
+                    AVIndexEntry *e = wtv->index_entries;
+                    AVIndexEntry *e_end = wtv->index_entries + wtv->nb_index_entries - 1;
+                    uint64_t last_position = 0;
                     while (1) {
                         uint64_t frame_nb = avio_rl64(pb);
                         uint64_t position = avio_rl64(pb);
+                        while (frame_nb > e->size && e <= e_end) {
+                            e->pos = last_position;
+                            e++;
+                        }
                         if (avio_feof(pb))
                             break;
-                        for (i = wtv->nb_index_entries - 1; i >= 0; i--) {
-                            AVIndexEntry *e = wtv->index_entries + i;
-                            if (frame_nb > e->size)
-                                break;
-                            if (position > e->pos)
-                                e->pos = position;
-                        }
+                        last_position = position;
                     }
+                    e_end->pos = last_position;
                     wtvfile_close(pb);
-                    st->duration = wtv->index_entries[wtv->nb_index_entries - 1].timestamp;
+                    st->duration = e_end->timestamp;
                 }
             }
         }
diff --git a/libavformat/wtvenc.c b/libavformat/wtvenc.c
index 8aebdddf..0894b7fe 100644
--- a/libavformat/wtvenc.c
+++ b/libavformat/wtvenc.c
@@ -826,7 +826,7 @@ static int write_trailer(AVFormatContext *s)
 
     av_free(wctx->sp_pairs);
     av_free(wctx->st_pairs);
-    av_free_packet(&wctx->thumbnail);
+    av_packet_unref(&wctx->thumbnail);
     return 0;
 }
 
diff --git a/libavformat/wvdec.c b/libavformat/wvdec.c
index 96a631fc..042f96b8 100644
--- a/libavformat/wvdec.c
+++ b/libavformat/wvdec.c
@@ -273,25 +273,25 @@ static int wv_read_packet(AVFormatContext *s, AVPacket *pkt)
     memcpy(pkt->data, wc->block_header, WV_HEADER_SIZE);
     ret = avio_read(s->pb, pkt->data + WV_HEADER_SIZE, wc->header.blocksize);
     if (ret != wc->header.blocksize) {
-        av_free_packet(pkt);
+        av_packet_unref(pkt);
         return AVERROR(EIO);
     }
     while (!(wc->header.flags & WV_FLAG_FINAL_BLOCK)) {
         if ((ret = wv_read_block_header(s, s->pb)) < 0) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return ret;
         }
 
         off = pkt->size;
         if ((ret = av_grow_packet(pkt, WV_HEADER_SIZE + wc->header.blocksize)) < 0) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return ret;
         }
         memcpy(pkt->data + off, wc->block_header, WV_HEADER_SIZE);
 
         ret = avio_read(s->pb, pkt->data + off + WV_HEADER_SIZE, wc->header.blocksize);
         if (ret != wc->header.blocksize) {
-            av_free_packet(pkt);
+            av_packet_unref(pkt);
             return (ret < 0) ? ret : AVERROR_EOF;
         }
     }
diff --git a/libavformat/wvedec.c b/libavformat/wvedec.c
new file mode 100644
index 00000000..8e74abe5
--- /dev/null
+++ b/libavformat/wvedec.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avformat.h"
+#include "internal.h"
+#include "pcm.h"
+
+static int wve_probe(AVProbeData *p)
+{
+    if (memcmp(p->buf, "ALawSoundFile**\0\017\020", 18) ||
+        memcmp(p->buf + 22, "\0\0\0\1\0\0\0\0\0\0", 10))
+        return 0;
+    return AVPROBE_SCORE_MAX;
+}
+
+static int wve_read_header(AVFormatContext *s)
+{
+    AVStream *st;
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    avio_skip(s->pb, 18);
+    st->duration           = avio_rb32(s->pb);
+    st->codec->codec_type  = AVMEDIA_TYPE_AUDIO;
+    st->codec->codec_id    = AV_CODEC_ID_PCM_ALAW;
+    st->codec->sample_rate = 8000;
+    st->codec->channels    = 1;
+    st->codec->bits_per_coded_sample = av_get_bits_per_sample(st->codec->codec_id);
+    st->codec->block_align = st->codec->bits_per_coded_sample * st->codec->channels / 8;
+    avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
+    avio_skip(s->pb, 10);
+
+    return 0;
+}
+
+AVInputFormat ff_wve_demuxer = {
+    .name           = "wve",
+    .long_name      = NULL_IF_CONFIG_SMALL("Psion 3 audio"),
+    .read_probe     = wve_probe,
+    .read_header    = wve_read_header,
+    .read_packet    = ff_pcm_read_packet,
+    .read_seek      = ff_pcm_read_seek,
+};
diff --git a/libavformat/xmv.c b/libavformat/xmv.c
index 45c24642..45e5ebca 100644
--- a/libavformat/xmv.c
+++ b/libavformat/xmv.c
@@ -549,16 +549,17 @@ static int xmv_read_packet(AVFormatContext *s,
         /* Fetch a video frame */
 
         result = xmv_fetch_video_packet(s, pkt);
-        if (result)
-            return result;
-
     } else {
         /* Fetch an audio frame */
 
         result = xmv_fetch_audio_packet(s, pkt, xmv->current_stream - 1);
-        if (result)
-            return result;
     }
+    if (result) {
+        xmv->current_stream = 0;
+        xmv->video.current_frame = xmv->video.frame_count;
+        return result;
+    }
+
 
     /* Increase our counters */
     if (++xmv->current_stream >= xmv->stream_count) {
@@ -572,6 +573,7 @@ static int xmv_read_packet(AVFormatContext *s,
 AVInputFormat ff_xmv_demuxer = {
     .name           = "xmv",
     .long_name      = NULL_IF_CONFIG_SMALL("Microsoft XMV"),
+    .extensions     = "xmv",
     .priv_data_size = sizeof(XMVDemuxContext),
     .read_probe     = xmv_probe,
     .read_header    = xmv_read_header,
diff --git a/libavformat/xvag.c b/libavformat/xvag.c
new file mode 100644
index 00000000..92c80a48
--- /dev/null
+++ b/libavformat/xvag.c
@@ -0,0 +1,112 @@
+/*
+ * XVAG demuxer
+ * Copyright (c) 2015 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/bswap.h"
+#include "avformat.h"
+#include "internal.h"
+
+static int xvag_probe(AVProbeData *p)
+{
+    if (memcmp(p->buf, "XVAG", 4) ||
+        memcmp(p->buf+32, "fmat", 4))
+        return 0;
+
+    return AVPROBE_SCORE_MAX;
+}
+
+static int xvag_read_header(AVFormatContext *s)
+{
+    unsigned offset, big_endian, codec;
+    AVStream *st;
+
+    avio_skip(s->pb, 4);
+
+    st = avformat_new_stream(s, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+
+    st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
+
+    offset     = avio_rl32(s->pb);
+    big_endian = offset > av_bswap32(offset);
+    if (big_endian) {
+        offset                 = av_bswap32(offset);
+        avio_skip(s->pb, 28);
+        codec                  = avio_rb32(s->pb);
+        st->codec->channels    = avio_rb32(s->pb);
+        avio_skip(s->pb, 4);
+        st->duration           = avio_rb32(s->pb);
+        avio_skip(s->pb, 8);
+        st->codec->sample_rate = avio_rb32(s->pb);
+    } else {
+        avio_skip(s->pb, 28);
+        codec                  = avio_rl32(s->pb);
+        st->codec->channels    = avio_rl32(s->pb);
+        avio_skip(s->pb, 4);
+        st->duration           = avio_rl32(s->pb);
+        avio_skip(s->pb, 8);
+        st->codec->sample_rate = avio_rl32(s->pb);
+    }
+
+    if (st->codec->sample_rate <= 0)
+        return AVERROR_INVALIDDATA;
+    if (st->codec->channels <= 0)
+        return AVERROR_INVALIDDATA;
+
+    switch (codec) {
+    case 0x1c:
+        st->codec->codec_id    = AV_CODEC_ID_ADPCM_PSX;
+        st->codec->block_align = 16 * st->codec->channels;
+        break;
+    default:
+        avpriv_request_sample(s, "codec %X", codec);
+        return AVERROR_PATCHWELCOME;
+    };
+
+    avio_skip(s->pb, offset - avio_tell(s->pb));
+
+    if (avio_rb16(s->pb) == 0xFFFB) {
+        st->codec->codec_id    = AV_CODEC_ID_MP3;
+        st->codec->block_align = 0x1000;
+        st->need_parsing       = AVSTREAM_PARSE_FULL_RAW;
+    }
+
+    avio_skip(s->pb, -2);
+    avpriv_set_pts_info(st, 64, 1, st->codec->sample_rate);
+
+    return 0;
+}
+
+static int xvag_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+    AVCodecContext *codec = s->streams[0]->codec;
+
+    return av_get_packet(s->pb, pkt, codec->block_align);
+}
+
+AVInputFormat ff_xvag_demuxer = {
+    .name           = "xvag",
+    .long_name      = NULL_IF_CONFIG_SMALL("Sony PS3 XVAG"),
+    .read_probe     = xvag_probe,
+    .read_header    = xvag_read_header,
+    .read_packet    = xvag_read_packet,
+    .extensions     = "xvag",
+};
diff --git a/libavformat/xwma.c b/libavformat/xwma.c
index 683d3d0d..d516b767 100644
--- a/libavformat/xwma.c
+++ b/libavformat/xwma.c
@@ -75,7 +75,7 @@ static int xwma_read_header(AVFormatContext *s)
     if (!st)
         return AVERROR(ENOMEM);
 
-    ret = ff_get_wav_header(pb, st->codec, size, 0);
+    ret = ff_get_wav_header(s, pb, st->codec, size, 0);
     if (ret < 0)
         return ret;
     st->need_parsing = AVSTREAM_PARSE_NONE;
@@ -85,7 +85,8 @@ static int xwma_read_header(AVFormatContext *s)
      * extradata for that. Thus, ask the user for feedback, but try to go on
      * anyway.
      */
-    if (st->codec->codec_id != AV_CODEC_ID_WMAV2) {
+    if (st->codec->codec_id != AV_CODEC_ID_WMAV2 &&
+        st->codec->codec_id != AV_CODEC_ID_WMAPRO) {
         avpriv_request_sample(s, "Unexpected codec (tag 0x04%x; id %d)",
                               st->codec->codec_tag, st->codec->codec_id);
     } else {
@@ -103,6 +104,13 @@ static int xwma_read_header(AVFormatContext *s)
              */
             avpriv_request_sample(s, "Unexpected extradata (%d bytes)",
                                   st->codec->extradata_size);
+        } else if (st->codec->codec_id == AV_CODEC_ID_WMAPRO) {
+            if (ff_alloc_extradata(st->codec, 18))
+                return AVERROR(ENOMEM);
+
+            memset(st->codec->extradata, 0, st->codec->extradata_size);
+            st->codec->extradata[ 0] = st->codec->bits_per_coded_sample;
+            st->codec->extradata[14] = 224;
         } else {
             if (ff_alloc_extradata(st->codec, 6))
                 return AVERROR(ENOMEM);
diff --git a/libavformat/yop.c b/libavformat/yop.c
index 81b3cc2b..9b77f6e3 100644
--- a/libavformat/yop.c
+++ b/libavformat/yop.c
@@ -132,11 +132,6 @@ static int yop_read_packet(AVFormatContext *s, AVPacket *pkt)
         *pkt                   =  yop->video_packet;
         yop->video_packet.data =  NULL;
         yop->video_packet.buf  =  NULL;
-#if FF_API_DESTRUCT_PACKET
-FF_DISABLE_DEPRECATION_WARNINGS
-        yop->video_packet.destruct = NULL;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
         yop->video_packet.size =  0;
         pkt->data[0]           =  yop->odd_frame;
         pkt->flags             |= AV_PKT_FLAG_KEY;
@@ -178,14 +173,14 @@ FF_ENABLE_DEPRECATION_WARNINGS
     return yop->audio_block_length;
 
 err_out:
-    av_free_packet(&yop->video_packet);
+    av_packet_unref(&yop->video_packet);
     return ret;
 }
 
 static int yop_read_close(AVFormatContext *s)
 {
     YopDecContext *yop = s->priv_data;
-    av_free_packet(&yop->video_packet);
+    av_packet_unref(&yop->video_packet);
     return 0;
 }
 
@@ -210,7 +205,7 @@ static int yop_read_seek(AVFormatContext *s, int stream_index,
     if (avio_seek(s->pb, frame_pos, SEEK_SET) < 0)
         return -1;
 
-    av_free_packet(&yop->video_packet);
+    av_packet_unref(&yop->video_packet);
     yop->odd_frame = timestamp & 1;
 
     return 0;
diff --git a/libavformat/yuv4mpeg.h b/libavformat/yuv4mpeg.h
index 750f4984..eba7337f 100644
--- a/libavformat/yuv4mpeg.h
+++ b/libavformat/yuv4mpeg.h
@@ -23,5 +23,6 @@
 
 #define Y4M_MAGIC "YUV4MPEG2"
 #define Y4M_FRAME_MAGIC "FRAME"
+#define Y4M_FRAME_MAGIC_LEN 6
 
 #endif /* AVFORMAT_YUV4MPEG_H */
diff --git a/libavformat/yuv4mpegdec.c b/libavformat/yuv4mpegdec.c
index 7613c3cd..ea1ae255 100644
--- a/libavformat/yuv4mpegdec.c
+++ b/libavformat/yuv4mpegdec.c
@@ -19,6 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/imgutils.h"
+
 #include "avformat.h"
 #include "internal.h"
 #include "yuv4mpeg.h"
@@ -256,6 +258,12 @@ static int yuv4_read_header(AVFormatContext *s)
     st->sample_aspect_ratio           = (AVRational){ aspectn, aspectd };
     st->codec->chroma_sample_location = chroma_sample_location;
     st->codec->field_order            = field_order;
+    s->packet_size = av_image_get_buffer_size(st->codec->pix_fmt, width, height, 1) + Y4M_FRAME_MAGIC_LEN;
+    if ((int) s->packet_size < 0)
+        return s->packet_size;
+    s->internal->data_offset = avio_tell(pb);
+
+    st->duration = (avio_size(pb) - avio_tell(pb)) / s->packet_size;
 
     return 0;
 }
@@ -264,8 +272,8 @@ static int yuv4_read_packet(AVFormatContext *s, AVPacket *pkt)
 {
     int i;
     char header[MAX_FRAME_HEADER+1];
-    int packet_size, width, height, ret;
-    AVStream *st = s->streams[0];
+    int ret;
+    int64_t off = avio_tell(s->pb);
 
     for (i = 0; i < MAX_FRAME_HEADER; i++) {
         header[i] = avio_r8(s->pb);
@@ -284,20 +292,22 @@ static int yuv4_read_packet(AVFormatContext *s, AVPacket *pkt)
     if (strncmp(header, Y4M_FRAME_MAGIC, strlen(Y4M_FRAME_MAGIC)))
         return AVERROR_INVALIDDATA;
 
-    width  = st->codec->width;
-    height = st->codec->height;
-
-    packet_size = avpicture_get_size(st->codec->pix_fmt, width, height);
-    if (packet_size < 0)
-        return packet_size;
-
-    ret = av_get_packet(s->pb, pkt, packet_size);
+    ret = av_get_packet(s->pb, pkt, s->packet_size - Y4M_FRAME_MAGIC_LEN);
     if (ret < 0)
         return ret;
-    else if (ret != packet_size)
+    else if (ret != s->packet_size - Y4M_FRAME_MAGIC_LEN)
         return s->pb->eof_reached ? AVERROR_EOF : AVERROR(EIO);
 
     pkt->stream_index = 0;
+    pkt->pts = (off - s->internal->data_offset) / s->packet_size;
+    pkt->duration = 1;
+    return 0;
+}
+
+static int yuv4_read_seek(AVFormatContext *s, int stream_index,
+                          int64_t pts, int flags)
+{
+    avio_seek(s->pb, pts * s->packet_size + s->internal->data_offset, SEEK_SET);
     return 0;
 }
 
@@ -316,5 +326,6 @@ AVInputFormat ff_yuv4mpegpipe_demuxer = {
     .read_probe     = yuv4_probe,
     .read_header    = yuv4_read_header,
     .read_packet    = yuv4_read_packet,
+    .read_seek      = yuv4_read_seek,
     .extensions     = "y4m",
 };
diff --git a/libavformat/yuv4mpegenc.c b/libavformat/yuv4mpegenc.c
index cc954fcc..6120c7e5 100644
--- a/libavformat/yuv4mpegenc.c
+++ b/libavformat/yuv4mpegenc.c
@@ -138,16 +138,14 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket *pkt)
 {
     AVStream *st = s->streams[pkt->stream_index];
     AVIOContext *pb = s->pb;
-    AVPicture *picture, picture_tmp;
+    AVFrame *frame;
     int* first_pkt = s->priv_data;
     int width, height, h_chroma_shift, v_chroma_shift;
     int i;
     char buf2[Y4M_LINE_MAX + 1];
-    char buf1[20];
     uint8_t *ptr, *ptr1, *ptr2;
 
-    memcpy(&picture_tmp, pkt->data, sizeof(AVPicture));
-    picture = &picture_tmp;
+    frame = (AVFrame *)pkt->data;
 
     /* for the first packet we have to output the header as well */
     if (*first_pkt) {
@@ -163,13 +161,12 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket *pkt)
 
     /* construct frame header */
 
-    snprintf(buf1, sizeof(buf1), "%s\n", Y4M_FRAME_MAGIC);
-    avio_write(pb, buf1, strlen(buf1));
+    avio_printf(s->pb, "%s\n", Y4M_FRAME_MAGIC);
 
     width  = st->codec->width;
     height = st->codec->height;
 
-    ptr = picture->data[0];
+    ptr = frame->data[0];
 
     switch (st->codec->pix_fmt) {
     case AV_PIX_FMT_GRAY8:
@@ -204,7 +201,7 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket *pkt)
 
     for (i = 0; i < height; i++) {
         avio_write(pb, ptr, width);
-        ptr += picture->linesize[0];
+        ptr += frame->linesize[0];
     }
 
     if (st->codec->pix_fmt != AV_PIX_FMT_GRAY8 &&
@@ -212,18 +209,18 @@ static int yuv4_write_packet(AVFormatContext *s, AVPacket *pkt)
         // Adjust for smaller Cb and Cr planes
         av_pix_fmt_get_chroma_sub_sample(st->codec->pix_fmt, &h_chroma_shift,
                                          &v_chroma_shift);
-        width  = FF_CEIL_RSHIFT(width,  h_chroma_shift);
-        height = FF_CEIL_RSHIFT(height, v_chroma_shift);
+        width  = AV_CEIL_RSHIFT(width,  h_chroma_shift);
+        height = AV_CEIL_RSHIFT(height, v_chroma_shift);
 
-        ptr1 = picture->data[1];
-        ptr2 = picture->data[2];
+        ptr1 = frame->data[1];
+        ptr2 = frame->data[2];
         for (i = 0; i < height; i++) {     /* Cb */
             avio_write(pb, ptr1, width);
-            ptr1 += picture->linesize[1];
+            ptr1 += frame->linesize[1];
         }
         for (i = 0; i < height; i++) {     /* Cr */
             avio_write(pb, ptr2, width);
-            ptr2 += picture->linesize[2];
+            ptr2 += frame->linesize[2];
         }
     }
 
@@ -237,8 +234,8 @@ static int yuv4_write_header(AVFormatContext *s)
     if (s->nb_streams != 1)
         return AVERROR(EIO);
 
-    if (s->streams[0]->codec->codec_id != AV_CODEC_ID_RAWVIDEO) {
-        av_log(s, AV_LOG_ERROR, "ERROR: Only rawvideo supported.\n");
+    if (s->streams[0]->codec->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) {
+        av_log(s, AV_LOG_ERROR, "ERROR: Codec not supported.\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -300,8 +297,7 @@ AVOutputFormat ff_yuv4mpegpipe_muxer = {
     .extensions        = "y4m",
     .priv_data_size    = sizeof(int),
     .audio_codec       = AV_CODEC_ID_NONE,
-    .video_codec       = AV_CODEC_ID_RAWVIDEO,
+    .video_codec       = AV_CODEC_ID_WRAPPED_AVFRAME,
     .write_header      = yuv4_write_header,
     .write_packet      = yuv4_write_packet,
-    .flags             = AVFMT_RAWPICTURE,
 };
diff --git a/libavresample/audio_data.c b/libavresample/audio_data.c
index c2f54501..b54ead84 100644
--- a/libavresample/audio_data.c
+++ b/libavresample/audio_data.c
@@ -70,9 +70,10 @@ int ff_audio_data_set_channels(AudioData *a, int channels)
     return 0;
 }
 
-int ff_audio_data_init(AudioData *a, uint8_t **src, int plane_size, int channels,
-                       int nb_samples, enum AVSampleFormat sample_fmt,
-                       int read_only, const char *name)
+int ff_audio_data_init(AudioData *a, uint8_t * const *src, int plane_size,
+                       int channels, int nb_samples,
+                       enum AVSampleFormat sample_fmt, int read_only,
+                       const char *name)
 {
     int p;
 
diff --git a/libavresample/audio_data.h b/libavresample/audio_data.h
index b50bd406..1280307a 100644
--- a/libavresample/audio_data.h
+++ b/libavresample/audio_data.h
@@ -76,9 +76,10 @@ int ff_audio_data_set_channels(AudioData *a, int channels);
  * @param name            name for debug logging (can be NULL)
  * @return                0 on success, negative AVERROR value on error
  */
-int ff_audio_data_init(AudioData *a, uint8_t **src, int plane_size, int channels,
-                       int nb_samples, enum AVSampleFormat sample_fmt,
-                       int read_only, const char *name);
+int ff_audio_data_init(AudioData *a, uint8_t * const *src, int plane_size,
+                       int channels, int nb_samples,
+                       enum AVSampleFormat sample_fmt, int read_only,
+                       const char *name);
 
 /**
  * Allocate AudioData.
diff --git a/libavresample/avresample.h b/libavresample/avresample.h
index 5eb55cb6..1c2bce84 100644
--- a/libavresample/avresample.h
+++ b/libavresample/avresample.h
@@ -167,9 +167,13 @@ AVAudioResampleContext *avresample_alloc_context(void);
 /**
  * Initialize AVAudioResampleContext.
  * @note The context must be configured using the AVOption API.
+ * @note The fields "in_channel_layout", "out_channel_layout",
+ *       "in_sample_rate", "out_sample_rate", "in_sample_fmt",
+ *       "out_sample_fmt" must be set.
  *
  * @see av_opt_set_int()
  * @see av_opt_set_dict()
+ * @see av_get_default_channel_layout()
  *
  * @param avr  audio resample context
  * @return     0 on success, negative AVERROR code on failure
@@ -373,8 +377,9 @@ int avresample_get_out_samples(AVAudioResampleContext *avr, int in_nb_samples);
  *                        output FIFO
  */
 int avresample_convert(AVAudioResampleContext *avr, uint8_t **output,
-                       int out_plane_size, int out_samples, uint8_t **input,
-                       int in_plane_size, int in_samples);
+                       int out_plane_size, int out_samples,
+                       uint8_t * const *input, int in_plane_size,
+                       int in_samples);
 
 /**
  * Return the number of samples currently in the resampling delay buffer.
diff --git a/libavresample/resample.c b/libavresample/resample.c
index 01fc5001..651670d6 100644
--- a/libavresample/resample.c
+++ b/libavresample/resample.c
@@ -234,8 +234,6 @@ int avresample_set_compensation(AVAudioResampleContext *avr, int sample_delta,
                                 int compensation_distance)
 {
     ResampleContext *c;
-    AudioData *fifo_buf = NULL;
-    int ret = 0;
 
     if (compensation_distance < 0)
         return AVERROR(EINVAL);
@@ -243,62 +241,8 @@ int avresample_set_compensation(AVAudioResampleContext *avr, int sample_delta,
         return AVERROR(EINVAL);
 
     if (!avr->resample_needed) {
-#if FF_API_RESAMPLE_CLOSE_OPEN
-        /* if resampling was not enabled previously, re-initialize the
-           AVAudioResampleContext and force resampling */
-        int fifo_samples;
-        int restore_matrix = 0;
-        double matrix[AVRESAMPLE_MAX_CHANNELS * AVRESAMPLE_MAX_CHANNELS] = { 0 };
-
-        /* buffer any remaining samples in the output FIFO before closing */
-        fifo_samples = av_audio_fifo_size(avr->out_fifo);
-        if (fifo_samples > 0) {
-            fifo_buf = ff_audio_data_alloc(avr->out_channels, fifo_samples,
-                                           avr->out_sample_fmt, NULL);
-            if (!fifo_buf)
-                return AVERROR(EINVAL);
-            ret = ff_audio_data_read_from_fifo(avr->out_fifo, fifo_buf,
-                                               fifo_samples);
-            if (ret < 0)
-                goto reinit_fail;
-        }
-        /* save the channel mixing matrix */
-        if (avr->am) {
-            ret = avresample_get_matrix(avr, matrix, AVRESAMPLE_MAX_CHANNELS);
-            if (ret < 0)
-                goto reinit_fail;
-            restore_matrix = 1;
-        }
-
-        /* close the AVAudioResampleContext */
-        avresample_close(avr);
-
-        avr->force_resampling = 1;
-
-        /* restore the channel mixing matrix */
-        if (restore_matrix) {
-            ret = avresample_set_matrix(avr, matrix, AVRESAMPLE_MAX_CHANNELS);
-            if (ret < 0)
-                goto reinit_fail;
-        }
-
-        /* re-open the AVAudioResampleContext */
-        ret = avresample_open(avr);
-        if (ret < 0)
-            goto reinit_fail;
-
-        /* restore buffered samples to the output FIFO */
-        if (fifo_samples > 0) {
-            ret = ff_audio_data_add_to_fifo(avr->out_fifo, fifo_buf, 0,
-                                            fifo_samples);
-            if (ret < 0)
-                goto reinit_fail;
-            ff_audio_data_free(&fifo_buf);
-        }
-#else
         av_log(avr, AV_LOG_ERROR, "Unable to set resampling compensation\n");
         return AVERROR(EINVAL);
-#endif
     }
     c = avr->resample;
     c->compensation_distance = compensation_distance;
@@ -308,11 +252,8 @@ int avresample_set_compensation(AVAudioResampleContext *avr, int sample_delta,
     } else {
         c->dst_incr = c->ideal_dst_incr;
     }
-    return 0;
 
-reinit_fail:
-    ff_audio_data_free(&fifo_buf);
-    return ret;
+    return 0;
 }
 
 static int resample(ResampleContext *c, void *dst, const void *src,
diff --git a/libavresample/utils.c b/libavresample/utils.c
index e3185873..b4fb9065 100644
--- a/libavresample/utils.c
+++ b/libavresample/utils.c
@@ -329,7 +329,8 @@ static int handle_buffered_output(AVAudioResampleContext *avr,
 
 int attribute_align_arg avresample_convert(AVAudioResampleContext *avr,
                                            uint8_t **output, int out_plane_size,
-                                           int out_samples, uint8_t **input,
+                                           int out_samples,
+                                           uint8_t * const *input,
                                            int in_plane_size, int in_samples)
 {
     AudioData input_buffer;
diff --git a/libavresample/version.h b/libavresample/version.h
index 33ff2a2a..b5b0e2d7 100644
--- a/libavresample/version.h
+++ b/libavresample/version.h
@@ -27,8 +27,8 @@
 
 #include "libavutil/version.h"
 
-#define LIBAVRESAMPLE_VERSION_MAJOR  2
-#define LIBAVRESAMPLE_VERSION_MINOR  1
+#define LIBAVRESAMPLE_VERSION_MAJOR  3
+#define LIBAVRESAMPLE_VERSION_MINOR  0
 #define LIBAVRESAMPLE_VERSION_MICRO  0
 
 #define LIBAVRESAMPLE_VERSION_INT  AV_VERSION_INT(LIBAVRESAMPLE_VERSION_MAJOR, \
@@ -47,8 +47,4 @@
  * the public API and may change, break or disappear at any time.
  */
 
-#ifndef FF_API_RESAMPLE_CLOSE_OPEN
-#define FF_API_RESAMPLE_CLOSE_OPEN (LIBAVRESAMPLE_VERSION_MAJOR < 3)
-#endif
-
 #endif /* AVRESAMPLE_VERSION_H */
diff --git a/libavresample/x86/audio_convert.asm b/libavresample/x86/audio_convert.asm
index 3e21f268..c6a50152 100644
--- a/libavresample/x86/audio_convert.asm
+++ b/libavresample/x86/audio_convert.asm
@@ -36,7 +36,7 @@ pb_interleave_words: SHUFFLE_MASK_W  0,  4,  1,  5,  2,  6,  3,  7
 pb_deinterleave_words: SHUFFLE_MASK_W  0,  2,  4,  6,  1,  3,  5,  7
 pw_zero_even:     times 4 dw 0x0000, 0xffff
 
-SECTION_TEXT
+SECTION .text
 
 ;------------------------------------------------------------------------------
 ; void ff_conv_s16_to_s32(int32_t *dst, const int16_t *src, int len);
diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm
index 64ab0399..fe27d6a6 100644
--- a/libavresample/x86/audio_mix.asm
+++ b/libavresample/x86/audio_mix.asm
@@ -22,7 +22,7 @@
 %include "libavutil/x86/x86util.asm"
 %include "util.asm"
 
-SECTION_TEXT
+SECTION .text
 
 ;-----------------------------------------------------------------------------
 ; void ff_mix_2_to_1_fltp_flt(float **src, float **matrix, int len,
diff --git a/libavresample/x86/dither.asm b/libavresample/x86/dither.asm
index 757f2800..d677c717 100644
--- a/libavresample/x86/dither.asm
+++ b/libavresample/x86/dither.asm
@@ -28,7 +28,7 @@ pf_dither_scale: times 8 dd 2.32830643762e-10
 
 pf_s16_scale: times 4 dd 32753.0
 
-SECTION_TEXT
+SECTION .text
 
 ;------------------------------------------------------------------------------
 ; void ff_quantize(int16_t *dst, float *src, float *dither, int len);
diff --git a/libavutil/Makefile b/libavutil/Makefile
index df85cd1a..65b2d258 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -4,9 +4,9 @@ NAME = avutil
 
 HEADERS = adler32.h                                                     \
           aes.h                                                         \
+          aes_ctr.h                                                     \
           attributes.h                                                  \
           audio_fifo.h                                                  \
-          audioconvert.h                                                \
           avassert.h                                                    \
           avstring.h                                                    \
           avutil.h                                                      \
@@ -21,6 +21,7 @@ HEADERS = adler32.h                                                     \
           common.h                                                      \
           cpu.h                                                         \
           crc.h                                                         \
+          des.h                                                         \
           display.h                                                     \
           downmix_info.h                                                \
           error.h                                                       \
@@ -37,18 +38,19 @@ HEADERS = adler32.h                                                     \
           log.h                                                         \
           macros.h                                                      \
           mathematics.h                                                 \
+          mastering_display_metadata.h                                  \
           md5.h                                                         \
           mem.h                                                         \
           motion_vector.h                                               \
           murmur3.h                                                     \
           dict.h                                                        \
-          old_pix_fmts.h                                                \
           opt.h                                                         \
           parseutils.h                                                  \
           pixdesc.h                                                     \
           pixelutils.h                                                  \
           pixfmt.h                                                      \
           random_seed.h                                                 \
+          rc4.h                                                         \
           replaygain.h                                                  \
           rational.h                                                    \
           ripemd.h                                                      \
@@ -60,9 +62,11 @@ HEADERS = adler32.h                                                     \
           time.h                                                        \
           timecode.h                                                    \
           timestamp.h                                                   \
+          tree.h                                                        \
           twofish.h                                                     \
           version.h                                                     \
           xtea.h                                                        \
+          tea.h                                                         \
 
 HEADERS-$(CONFIG_LZO)                   += lzo.h
 
@@ -78,7 +82,7 @@ BUILT_HEADERS = avconfig.h                                              \
 
 OBJS = adler32.o                                                        \
        aes.o                                                            \
-       atomic.o                                                         \
+       aes_ctr.o                                                        \
        audio_fifo.o                                                     \
        avstring.o                                                       \
        base64.o                                                         \
@@ -105,12 +109,14 @@ OBJS = adler32.o                                                        \
        hash.o                                                           \
        hmac.o                                                           \
        imgutils.o                                                       \
+       integer.o                                                        \
        intmath.o                                                        \
        lfg.o                                                            \
        lls.o                                                            \
        log.o                                                            \
        log2_tab.o                                                       \
        mathematics.o                                                    \
+       mastering_display_metadata.o                                     \
        md5.o                                                            \
        mem.o                                                            \
        murmur3.o                                                        \
@@ -121,6 +127,7 @@ OBJS = adler32.o                                                        \
        pixelutils.o                                                     \
        random_seed.o                                                    \
        rational.o                                                       \
+       reverse.o                                                        \
        rc4.o                                                            \
        ripemd.o                                                         \
        samplefmt.o                                                      \
@@ -135,6 +142,9 @@ OBJS = adler32.o                                                        \
        utils.o                                                          \
        xga_font_data.o                                                  \
        xtea.o                                                           \
+       tea.o                                                            \
+
+OBJS-$(!HAVE_ATOMICS_NATIVE)            += atomic.o                     \
 
 OBJS-$(CONFIG_LZO)                      += lzo.o
 OBJS-$(CONFIG_OPENCL)                   += opencl.o opencl_internal.o
@@ -144,8 +154,6 @@ OBJS += $(COMPAT_OBJS:%=../compat/%)
 # Windows resource file
 SLIBOBJS-$(HAVE_GNU_WINDRES)            += avutilres.o
 
-SKIPHEADERS          = old_pix_fmts.h
-
 SKIPHEADERS-$(HAVE_ATOMICS_GCC)        += atomic_gcc.h
 SKIPHEADERS-$(HAVE_ATOMICS_SUNCC)      += atomic_suncc.h
 SKIPHEADERS-$(HAVE_ATOMICS_WIN32)      += atomic_win32.h
@@ -190,6 +198,7 @@ TESTPROGS = adler32                                                     \
             twofish                                                     \
             utf8                                                        \
             xtea                                                        \
+            tea                                                         \
 
 TESTPROGS-$(HAVE_LZO1X_999_COMPRESS) += lzo
 
diff --git a/libavutil/aarch64/neontest.h b/libavutil/aarch64/neontest.h
index b9d49860..2d0fc199 100644
--- a/libavutil/aarch64/neontest.h
+++ b/libavutil/aarch64/neontest.h
@@ -20,6 +20,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVUTIL_AARCH64_NEONTEST_H
+#define AVUTIL_AARCH64_NEONTEST_H
+
 #include <inttypes.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -63,3 +66,5 @@
 int __real_ ## func;    \
 int __wrap_ ## func;    \
 int __wrap_ ## func
+
+#endif /* AVUTIL_AARCH64_NEONTEST_H */
diff --git a/libavcodec/cabac_tablegen.c b/libavutil/aarch64/timer.h
similarity index 59%
rename from libavcodec/cabac_tablegen.c
rename to libavutil/aarch64/timer.h
index ed544475..b5700394 100644
--- a/libavcodec/cabac_tablegen.c
+++ b/libavutil/aarch64/timer.h
@@ -1,7 +1,5 @@
 /*
- * Header file for hardcoded AAC SBR windows
- *
- * Copyright (c) 2014 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
+ * Copyright (c) 2015 Janne Grunau <janne-libav@jannau.net>
  *
  * This file is part of FFmpeg.
  *
@@ -20,22 +18,27 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <stdlib.h>
-#include "libavutil/common.h"
-#include "cabac_functions.h"
-#undef CONFIG_HARDCODED_TABLES
-#define CONFIG_HARDCODED_TABLES 0
-av_const int av_log2(unsigned v) { int r = 0; while (v >>= 1) r++; return r; }
-#include "cabac_tablegen.h"
-#include "tableprint.h"
+#ifndef AVUTIL_AARCH64_TIMER_H
+#define AVUTIL_AARCH64_TIMER_H
 
-int main(void)
-{
-    cabac_tableinit();
+#include <stdint.h>
+#include "config.h"
+
+#if HAVE_INLINE_ASM
 
-    write_fileheader();
+#define AV_READ_TIME read_time
 
-    WRITE_ARRAY("const", uint8_t, ff_h264_cabac_tables);
+static inline uint64_t read_time(void)
+{
+    uint64_t cycle_counter;
+    __asm__ volatile(
+        "isb                   \t\n"
+        "mrs %0, pmccntr_el0       "
+        : "=r"(cycle_counter) :: "memory" );
 
-    return 0;
+    return cycle_counter;
 }
+
+#endif /* HAVE_INLINE_ASM */
+
+#endif /* AVUTIL_AARCH64_TIMER_H */
diff --git a/libavutil/adler32.c b/libavutil/adler32.c
index 579d0229..64c8767c 100644
--- a/libavutil/adler32.c
+++ b/libavutil/adler32.c
@@ -108,7 +108,7 @@ static volatile int checksum;
 int main(int argc, char **argv)
 {
     int i;
-    char data[LEN];
+    uint8_t data[LEN];
 
     av_log_set_level(AV_LOG_DEBUG);
 
diff --git a/libavutil/aes.c b/libavutil/aes.c
index fd870608..9096f03a 100644
--- a/libavutil/aes.c
+++ b/libavutil/aes.c
@@ -22,24 +22,10 @@
 
 #include "common.h"
 #include "aes.h"
+#include "aes_internal.h"
 #include "intreadwrite.h"
 #include "timer.h"
 
-typedef union {
-    uint64_t u64[2];
-    uint32_t u32[4];
-    uint8_t u8x4[4][4];
-    uint8_t u8[16];
-} av_aes_block;
-
-typedef struct AVAES {
-    // Note: round_key[16] is accessed in the init code, but this only
-    // overwrites state, which does not matter (see also commit ba554c0).
-    av_aes_block round_key[15];
-    av_aes_block state[2];
-    int rounds;
-} AVAES;
-
 const int av_aes_size= sizeof(AVAES);
 
 struct AVAES *av_aes_alloc(void)
@@ -140,31 +126,44 @@ static inline void aes_crypt(AVAES *a, int s, const uint8_t *sbox,
     subshift(&a->state[0], s, sbox);
 }
 
-void av_aes_crypt(AVAES *a, uint8_t *dst, const uint8_t *src,
-                  int count, uint8_t *iv, int decrypt)
+static void aes_encrypt(AVAES *a, uint8_t *dst, const uint8_t *src,
+                        int count, uint8_t *iv, int rounds)
 {
     while (count--) {
-        addkey_s(&a->state[1], src, &a->round_key[a->rounds]);
-        if (decrypt) {
-            aes_crypt(a, 0, inv_sbox, dec_multbl);
-            if (iv) {
-                addkey_s(&a->state[0], iv, &a->state[0]);
-                memcpy(iv, src, 16);
-            }
-            addkey_d(dst, &a->state[0], &a->round_key[0]);
-        } else {
-            if (iv)
-                addkey_s(&a->state[1], iv, &a->state[1]);
-            aes_crypt(a, 2, sbox, enc_multbl);
-            addkey_d(dst, &a->state[0], &a->round_key[0]);
-            if (iv)
-                memcpy(iv, dst, 16);
+        addkey_s(&a->state[1], src, &a->round_key[rounds]);
+        if (iv)
+            addkey_s(&a->state[1], iv, &a->state[1]);
+        aes_crypt(a, 2, sbox, enc_multbl);
+        addkey_d(dst, &a->state[0], &a->round_key[0]);
+        if (iv)
+            memcpy(iv, dst, 16);
+        src += 16;
+        dst += 16;
+    }
+}
+
+static void aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src,
+                        int count, uint8_t *iv, int rounds)
+{
+    while (count--) {
+        addkey_s(&a->state[1], src, &a->round_key[rounds]);
+        aes_crypt(a, 0, inv_sbox, dec_multbl);
+        if (iv) {
+            addkey_s(&a->state[0], iv, &a->state[0]);
+            memcpy(iv, src, 16);
         }
+        addkey_d(dst, &a->state[0], &a->round_key[0]);
         src += 16;
         dst += 16;
     }
 }
 
+void av_aes_crypt(AVAES *a, uint8_t *dst, const uint8_t *src,
+                  int count, uint8_t *iv, int decrypt)
+{
+    a->crypt(a, dst, src, count, iv, a->rounds);
+}
+
 static void init_multbl2(uint32_t tbl[][256], const int c[4],
                          const uint8_t *log8, const uint8_t *alog8,
                          const uint8_t *sbox)
@@ -200,6 +199,8 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
     uint8_t log8[256];
     uint8_t alog8[512];
 
+    a->crypt = decrypt ? aes_decrypt : aes_encrypt;
+
     if (!enc_multbl[FF_ARRAY_ELEMS(enc_multbl)-1][FF_ARRAY_ELEMS(enc_multbl[0])-1]) {
         j = 1;
         for (i = 0; i < 255; i++) {
@@ -223,7 +224,7 @@ int av_aes_init(AVAES *a, const uint8_t *key, int key_bits, int decrypt)
     }
 
     if (key_bits != 128 && key_bits != 192 && key_bits != 256)
-        return -1;
+        return AVERROR(EINVAL);
 
     a->rounds = rounds;
 
@@ -279,7 +280,7 @@ int main(int argc, char **argv)
         { 0x10, 0xa5, 0x88, 0x69, 0xd7, 0x4b, 0xe5, 0xa3,
           0x74, 0xcf, 0x86, 0x7c, 0xfb, 0x47, 0x38, 0x59 }
     };
-    uint8_t pt[16], rpt[2][16]= {
+    uint8_t pt[32], rpt[2][16]= {
         { 0x6a, 0x84, 0x86, 0x7c, 0xd7, 0x7e, 0x12, 0xad,
           0x07, 0xea, 0x1b, 0xe8, 0x95, 0xc5, 0x3f, 0xa3 },
         { 0 }
@@ -290,7 +291,8 @@ int main(int argc, char **argv)
         { 0x6d, 0x25, 0x1e, 0x69, 0x44, 0xb0, 0x51, 0xe0,
           0x4e, 0xaa, 0x6f, 0xb4, 0xdb, 0xf7, 0x84, 0x65 }
     };
-    uint8_t temp[16];
+    uint8_t temp[32];
+    uint8_t iv[2][16];
     int err = 0;
 
     av_log_set_level(AV_LOG_DEBUG);
@@ -311,21 +313,29 @@ int main(int argc, char **argv)
         AVAES ae, ad;
         AVLFG prng;
 
-        av_aes_init(&ae, "PI=3.141592654..", 128, 0);
-        av_aes_init(&ad, "PI=3.141592654..", 128, 1);
+        av_aes_init(&ae, (const uint8_t*)"PI=3.141592654..", 128, 0);
+        av_aes_init(&ad, (const uint8_t*)"PI=3.141592654..", 128, 1);
         av_lfg_init(&prng, 1);
 
         for (i = 0; i < 10000; i++) {
-            for (j = 0; j < 16; j++) {
+            for (j = 0; j < 32; j++) {
                 pt[j] = av_lfg_get(&prng);
             }
+            for (j = 0; j < 16; j++) {
+                iv[0][j] = iv[1][j] = av_lfg_get(&prng);
+            }
             {
                 START_TIMER;
-                av_aes_crypt(&ae, temp, pt, 1, NULL, 0);
+                av_aes_crypt(&ae, temp, pt, 2, iv[0], 0);
+                if (!(i & (i - 1)))
+                    av_log(NULL, AV_LOG_ERROR, "%02X %02X %02X %02X\n",
+                           temp[0], temp[5], temp[10], temp[15]);
+                av_aes_crypt(&ad, temp, temp, 2, iv[1], 1);
+                av_aes_crypt(&ae, temp, pt, 2, NULL, 0);
                 if (!(i & (i - 1)))
                     av_log(NULL, AV_LOG_ERROR, "%02X %02X %02X %02X\n",
                            temp[0], temp[5], temp[10], temp[15]);
-                av_aes_crypt(&ad, temp, temp, 1, NULL, 1);
+                av_aes_crypt(&ad, temp, temp, 2, NULL, 1);
                 STOP_TIMER("aes");
             }
             for (j = 0; j < 16; j++) {
diff --git a/libavutil/aes_ctr.c b/libavutil/aes_ctr.c
new file mode 100644
index 00000000..e9c568fe
--- /dev/null
+++ b/libavutil/aes_ctr.c
@@ -0,0 +1,129 @@
+/*
+ * AES-CTR cipher
+ * Copyright (c) 2015 Eran Kornblau <erankor at gmail dot com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "common.h"
+#include "aes_ctr.h"
+#include "aes.h"
+#include "random_seed.h"
+
+#define AES_BLOCK_SIZE (16)
+
+typedef struct AVAESCTR {
+    struct AVAES* aes;
+    uint8_t counter[AES_BLOCK_SIZE];
+    uint8_t encrypted_counter[AES_BLOCK_SIZE];
+    int block_offset;
+} AVAESCTR;
+
+struct AVAESCTR *av_aes_ctr_alloc(void)
+{
+    return av_mallocz(sizeof(struct AVAESCTR));
+}
+
+void av_aes_ctr_set_iv(struct AVAESCTR *a, const uint8_t* iv)
+{
+    memcpy(a->counter, iv, AES_CTR_IV_SIZE);
+    memset(a->counter + AES_CTR_IV_SIZE, 0, sizeof(a->counter) - AES_CTR_IV_SIZE);
+    a->block_offset = 0;
+}
+
+const uint8_t* av_aes_ctr_get_iv(struct AVAESCTR *a)
+{
+    return a->counter;
+}
+
+void av_aes_ctr_set_random_iv(struct AVAESCTR *a)
+{
+    uint32_t iv[2];
+
+    iv[0] = av_get_random_seed();
+    iv[1] = av_get_random_seed();
+
+    av_aes_ctr_set_iv(a, (uint8_t*)iv);
+}
+
+int av_aes_ctr_init(struct AVAESCTR *a, const uint8_t *key)
+{
+    a->aes = av_aes_alloc();
+    if (!a->aes) {
+        return AVERROR(ENOMEM);
+    }
+
+    av_aes_init(a->aes, key, 128, 0);
+
+    memset(a->counter, 0, sizeof(a->counter));
+    a->block_offset = 0;
+
+    return 0;
+}
+
+void av_aes_ctr_free(struct AVAESCTR *a)
+{
+    if (a) {
+        av_freep(&a->aes);
+        av_free(a);
+    }
+}
+
+static void av_aes_ctr_increment_be64(uint8_t* counter)
+{
+    uint8_t* cur_pos;
+
+    for (cur_pos = counter + 7; cur_pos >= counter; cur_pos--) {
+        (*cur_pos)++;
+        if (*cur_pos != 0) {
+            break;
+        }
+    }
+}
+
+void av_aes_ctr_increment_iv(struct AVAESCTR *a)
+{
+    av_aes_ctr_increment_be64(a->counter);
+    memset(a->counter + AES_CTR_IV_SIZE, 0, sizeof(a->counter) - AES_CTR_IV_SIZE);
+    a->block_offset = 0;
+}
+
+void av_aes_ctr_crypt(struct AVAESCTR *a, uint8_t *dst, const uint8_t *src, int count)
+{
+    const uint8_t* src_end = src + count;
+    const uint8_t* cur_end_pos;
+    uint8_t* encrypted_counter_pos;
+
+    while (src < src_end) {
+        if (a->block_offset == 0) {
+            av_aes_crypt(a->aes, a->encrypted_counter, a->counter, 1, NULL, 0);
+
+            av_aes_ctr_increment_be64(a->counter + 8);
+        }
+
+        encrypted_counter_pos = a->encrypted_counter + a->block_offset;
+        cur_end_pos = src + AES_BLOCK_SIZE - a->block_offset;
+        cur_end_pos = FFMIN(cur_end_pos, src_end);
+
+        a->block_offset += cur_end_pos - src;
+        a->block_offset &= (AES_BLOCK_SIZE - 1);
+
+        while (src < cur_end_pos) {
+            *dst++ = *src++ ^ *encrypted_counter_pos++;
+        }
+    }
+}
diff --git a/libavutil/aes_ctr.h b/libavutil/aes_ctr.h
new file mode 100644
index 00000000..f596fa6a
--- /dev/null
+++ b/libavutil/aes_ctr.h
@@ -0,0 +1,83 @@
+/*
+ * AES-CTR cipher
+ * Copyright (c) 2015 Eran Kornblau <erankor at gmail dot com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_AES_CTR_H
+#define AVUTIL_AES_CTR_H
+
+#include <stdint.h>
+
+#include "attributes.h"
+#include "version.h"
+
+#define AES_CTR_KEY_SIZE (16)
+#define AES_CTR_IV_SIZE (8)
+
+struct AVAESCTR;
+
+/**
+ * Allocate an AVAESCTR context.
+ */
+struct AVAESCTR *av_aes_ctr_alloc(void);
+
+/**
+ * Initialize an AVAESCTR context.
+ * @param key encryption key, must have a length of AES_CTR_KEY_SIZE
+ */
+int av_aes_ctr_init(struct AVAESCTR *a, const uint8_t *key);
+
+/**
+ * Release an AVAESCTR context.
+ */
+void av_aes_ctr_free(struct AVAESCTR *a);
+
+/**
+ * Process a buffer using a previously initialized context.
+ * @param dst destination array, can be equal to src
+ * @param src source array, can be equal to dst
+ * @param size the size of src and dst
+ */
+void av_aes_ctr_crypt(struct AVAESCTR *a, uint8_t *dst, const uint8_t *src, int size);
+
+/**
+ * Get the current iv
+ */
+const uint8_t* av_aes_ctr_get_iv(struct AVAESCTR *a);
+
+/**
+ * Generate a random iv
+ */
+void av_aes_ctr_set_random_iv(struct AVAESCTR *a);
+
+/**
+ * Forcefully change the iv
+ */
+void av_aes_ctr_set_iv(struct AVAESCTR *a, const uint8_t* iv);
+
+/**
+ * Increment the top 64 bit of the iv (performed after each frame)
+ */
+void av_aes_ctr_increment_iv(struct AVAESCTR *a);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_AES_CTR_H */
diff --git a/libavutil/aes_internal.h b/libavutil/aes_internal.h
new file mode 100644
index 00000000..49442587
--- /dev/null
+++ b/libavutil/aes_internal.h
@@ -0,0 +1,43 @@
+/*
+ * copyright (c) 2015 Rodger Combs <rodger.combs@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_AES_INTERNAL_H
+#define AVUTIL_AES_INTERNAL_H
+
+#include "mem.h"
+#include <stdint.h>
+
+typedef union {
+    uint64_t u64[2];
+    uint32_t u32[4];
+    uint8_t u8x4[4][4];
+    uint8_t u8[16];
+} av_aes_block;
+
+typedef struct AVAES {
+    // Note: round_key[16] is accessed in the init code, but this only
+    // overwrites state, which does not matter (see also commit ba554c0).
+    DECLARE_ALIGNED(16, av_aes_block, round_key)[15];
+    DECLARE_ALIGNED(16, av_aes_block, state)[2];
+    int rounds;
+    void (*crypt)(struct AVAES *a, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int rounds);
+} AVAES;
+
+#endif /* AVUTIL_AES_INTERNAL_H */
diff --git a/libavutil/arm/bswap.h b/libavutil/arm/bswap.h
index ae5fdb7e..611ff0ad 100644
--- a/libavutil/arm/bswap.h
+++ b/libavutil/arm/bswap.h
@@ -44,7 +44,7 @@ static av_always_inline av_const unsigned av_bswap16(unsigned x)
 }
 #endif
 
-#if !AV_GCC_VERSION_AT_LEAST(4,5)
+#if AV_GCC_VERSION_AT_MOST(4,4)
 #define av_bswap32 av_bswap32
 static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
 {
@@ -60,7 +60,7 @@ static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
 #endif /* HAVE_ARMV6_INLINE */
     return x;
 }
-#endif /* !AV_GCC_VERSION_AT_LEAST(4,5) */
+#endif /* AV_GCC_VERSION_AT_MOST(4,4) */
 
 #endif /* __ARMCC_VERSION */
 
diff --git a/libavutil/arm/cpu.c b/libavutil/arm/cpu.c
index 02def0b4..3889ef01 100644
--- a/libavutil/arm/cpu.c
+++ b/libavutil/arm/cpu.c
@@ -137,6 +137,10 @@ int ff_get_cpu_flags_arm(void)
     if (flags & AV_CPU_FLAG_ARMV6T2)
         flags |= AV_CPU_FLAG_ARMV6;
 
+    /* set the virtual VFPv2 vector mode flag */
+    if ((flags & AV_CPU_FLAG_VFP) && !(flags & (AV_CPU_FLAG_VFPV3 | AV_CPU_FLAG_NEON)))
+        flags |= AV_CPU_FLAG_VFP_VM;
+
     return flags;
 }
 
diff --git a/libavutil/arm/cpu.h b/libavutil/arm/cpu.h
index 9b3b6ff5..eb64ed5f 100644
--- a/libavutil/arm/cpu.h
+++ b/libavutil/arm/cpu.h
@@ -31,4 +31,9 @@
 #define have_neon(flags)    CPUEXT(flags, NEON)
 #define have_setend(flags)  CPUEXT(flags, SETEND)
 
+/* some functions use the VFPv2 vector mode which is deprecated in ARMv7-A
+ * and might trap on such CPU depending on the OS configuration */
+#define have_vfp_vm(flags)                                              \
+    (HAVE_VFP && ((flags) & AV_CPU_FLAG_VFP_VM))
+
 #endif /* AVUTIL_ARM_CPU_H */
diff --git a/libavutil/arm/intmath.h b/libavutil/arm/intmath.h
index 3216af00..65e42c57 100644
--- a/libavutil/arm/intmath.h
+++ b/libavutil/arm/intmath.h
@@ -31,9 +31,9 @@
 #if HAVE_ARMV6_INLINE
 
 #define av_clip_uint8 av_clip_uint8_arm
-static av_always_inline av_const unsigned av_clip_uint8_arm(int a)
+static av_always_inline av_const int av_clip_uint8_arm(int a)
 {
-    unsigned x;
+    int x;
     __asm__ ("usat %0, #8,  %1" : "=r"(x) : "r"(a));
     return x;
 }
@@ -47,9 +47,9 @@ static av_always_inline av_const int av_clip_int8_arm(int a)
 }
 
 #define av_clip_uint16 av_clip_uint16_arm
-static av_always_inline av_const unsigned av_clip_uint16_arm(int a)
+static av_always_inline av_const int av_clip_uint16_arm(int a)
 {
-    unsigned x;
+    int x;
     __asm__ ("usat %0, #16, %1" : "=r"(x) : "r"(a));
     return x;
 }
diff --git a/libavutil/arm/intreadwrite.h b/libavutil/arm/intreadwrite.h
index 2340a9a9..60fc860c 100644
--- a/libavutil/arm/intreadwrite.h
+++ b/libavutil/arm/intreadwrite.h
@@ -23,14 +23,14 @@
 #include "config.h"
 #include "libavutil/attributes.h"
 
-#if HAVE_FAST_UNALIGNED && HAVE_INLINE_ASM && !AV_GCC_VERSION_AT_LEAST(4,7)
+#if HAVE_FAST_UNALIGNED && HAVE_INLINE_ASM && AV_GCC_VERSION_AT_MOST(4,6)
 
 #define AV_RN16 AV_RN16
 static av_always_inline unsigned AV_RN16(const void *p)
 {
     const uint8_t *q = p;
     unsigned v;
-#if !AV_GCC_VERSION_AT_LEAST(4,6)
+#if AV_GCC_VERSION_AT_MOST(4,5)
     __asm__ ("ldrh %0, %1" : "=r"(v) : "m"(*(const uint16_t *)q));
 #elif defined __thumb__
     __asm__ ("ldrh %0, %1" : "=r"(v) : "m"(q[0]), "m"(q[1]));
diff --git a/libavutil/arm/neontest.h b/libavutil/arm/neontest.h
index f668c007..d75ab838 100644
--- a/libavutil/arm/neontest.h
+++ b/libavutil/arm/neontest.h
@@ -20,6 +20,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVUTIL_ARM_NEONTEST_H
+#define AVUTIL_ARM_NEONTEST_H
+
 #include <inttypes.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -60,3 +63,5 @@
 int __real_ ## func;    \
 int __wrap_ ## func;    \
 int __wrap_ ## func
+
+#endif /* AVUTIL_ARM_NEONTEST_H */
diff --git a/libavutil/attributes.h b/libavutil/attributes.h
index ebcdd6b8..5c6b9dee 100644
--- a/libavutil/attributes.h
+++ b/libavutil/attributes.h
@@ -28,8 +28,10 @@
 
 #ifdef __GNUC__
 #    define AV_GCC_VERSION_AT_LEAST(x,y) (__GNUC__ > (x) || __GNUC__ == (x) && __GNUC_MINOR__ >= (y))
+#    define AV_GCC_VERSION_AT_MOST(x,y)  (__GNUC__ < (x) || __GNUC__ == (x) && __GNUC_MINOR__ <= (y))
 #else
 #    define AV_GCC_VERSION_AT_LEAST(x,y) 0
+#    define AV_GCC_VERSION_AT_MOST(x,y)  0
 #endif
 
 #ifndef av_always_inline
@@ -50,6 +52,12 @@
 #endif
 #endif
 
+#if AV_GCC_VERSION_AT_LEAST(3,4)
+#    define av_warn_unused_result __attribute__((warn_unused_result))
+#else
+#    define av_warn_unused_result
+#endif
+
 #if AV_GCC_VERSION_AT_LEAST(3,1)
 #    define av_noinline __attribute__((noinline))
 #elif defined(_MSC_VER)
diff --git a/libavutil/audio_fifo.c b/libavutil/audio_fifo.c
index 574907aa..d5298cce 100644
--- a/libavutil/audio_fifo.c
+++ b/libavutil/audio_fifo.c
@@ -136,6 +136,25 @@ int av_audio_fifo_write(AVAudioFifo *af, void **data, int nb_samples)
     return nb_samples;
 }
 
+int av_audio_fifo_peek(AVAudioFifo *af, void **data, int nb_samples)
+{
+    int i, ret, size;
+
+    if (nb_samples < 0)
+        return AVERROR(EINVAL);
+    nb_samples = FFMIN(nb_samples, af->nb_samples);
+    if (!nb_samples)
+        return 0;
+
+    size = nb_samples * af->sample_size;
+    for (i = 0; i < af->nb_buffers; i++) {
+        if ((ret = av_fifo_generic_peek(af->buf[i], data[i], size, NULL)) < 0)
+            return AVERROR_BUG;
+    }
+
+    return nb_samples;
+}
+
 int av_audio_fifo_read(AVAudioFifo *af, void **data, int nb_samples)
 {
     int i, ret, size;
diff --git a/libavutil/audio_fifo.h b/libavutil/audio_fifo.h
index d21e6a13..24f91dab 100644
--- a/libavutil/audio_fifo.h
+++ b/libavutil/audio_fifo.h
@@ -73,6 +73,7 @@ AVAudioFifo *av_audio_fifo_alloc(enum AVSampleFormat sample_fmt, int channels,
  * @param nb_samples  new allocation size, in samples
  * @return            0 if OK, or negative AVERROR code on failure
  */
+av_warn_unused_result
 int av_audio_fifo_realloc(AVAudioFifo *af, int nb_samples);
 
 /**
@@ -93,6 +94,22 @@ int av_audio_fifo_realloc(AVAudioFifo *af, int nb_samples);
  */
 int av_audio_fifo_write(AVAudioFifo *af, void **data, int nb_samples);
 
+/**
+ * Peek data from an AVAudioFifo.
+ *
+ * @see enum AVSampleFormat
+ * The documentation for AVSampleFormat describes the data layout.
+ *
+ * @param af          AVAudioFifo to read from
+ * @param data        audio data plane pointers
+ * @param nb_samples  number of samples to peek
+ * @return            number of samples actually peek, or negative AVERROR code
+ *                    on failure. The number of samples actually peek will not
+ *                    be greater than nb_samples, and will only be less than
+ *                    nb_samples if av_audio_fifo_size is less than nb_samples.
+ */
+int av_audio_fifo_peek(AVAudioFifo *af, void **data, int nb_samples);
+
 /**
  * Read data from an AVAudioFifo.
  *
diff --git a/libavutil/audioconvert.h b/libavutil/audioconvert.h
deleted file mode 100644
index 300a67cd..00000000
--- a/libavutil/audioconvert.h
+++ /dev/null
@@ -1,6 +0,0 @@
-
-#include "version.h"
-
-#if FF_API_AUDIOCONVERT
-#include "channel_layout.h"
-#endif
diff --git a/libavutil/avstring.c b/libavutil/avstring.c
index 5fa295d5..85fb3e92 100644
--- a/libavutil/avstring.c
+++ b/libavutil/avstring.c
@@ -317,28 +317,6 @@ int av_escape(char **dst, const char *src, const char *special_chars,
     }
 }
 
-int av_isdigit(int c)
-{
-    return c >= '0' && c <= '9';
-}
-
-int av_isgraph(int c)
-{
-    return c > 32 && c < 127;
-}
-
-int av_isspace(int c)
-{
-    return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' ||
-           c == '\v';
-}
-
-int av_isxdigit(int c)
-{
-    c = av_tolower(c);
-    return av_isdigit(c) || (c >= 'a' && c <= 'f');
-}
-
 int av_match_name(const char *name, const char *names)
 {
     const char *p;
@@ -348,13 +326,18 @@ int av_match_name(const char *name, const char *names)
         return 0;
 
     namelen = strlen(name);
-    while ((p = strchr(names, ','))) {
+    while (*names) {
+        int negate = '-' == *names;
+        p = strchr(names, ',');
+        if (!p)
+            p = names + strlen(names);
+        names += negate;
         len = FFMAX(p - names, namelen);
-        if (!av_strncasecmp(name, names, len))
-            return 1;
-        names = p + 1;
+        if (!av_strncasecmp(name, names, len) || !strncmp("ALL", names, FFMAX(3, p - names)))
+            return !negate;
+        names = p + (*p == ',');
     }
-    return !av_strcasecmp(name, names);
+    return 0;
 }
 
 int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
@@ -408,7 +391,7 @@ int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
         goto end;
     }
 
-    if (code >= 1<<31) {
+    if (code >= 1U<<31) {
         ret = AVERROR(EILSEQ);  /* out-of-range value */
         goto end;
     }
diff --git a/libavutil/avstring.h b/libavutil/avstring.h
index 466edaf9..15b04bab 100644
--- a/libavutil/avstring.h
+++ b/libavutil/avstring.h
@@ -203,17 +203,27 @@ char *av_strtok(char *s, const char *delim, char **saveptr);
 /**
  * Locale-independent conversion of ASCII isdigit.
  */
-av_const int av_isdigit(int c);
+static inline av_const int av_isdigit(int c)
+{
+    return c >= '0' && c <= '9';
+}
 
 /**
  * Locale-independent conversion of ASCII isgraph.
  */
-av_const int av_isgraph(int c);
+static inline av_const int av_isgraph(int c)
+{
+    return c > 32 && c < 127;
+}
 
 /**
  * Locale-independent conversion of ASCII isspace.
  */
-av_const int av_isspace(int c);
+static inline av_const int av_isspace(int c)
+{
+    return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' ||
+           c == '\v';
+}
 
 /**
  * Locale-independent conversion of ASCII characters to uppercase.
@@ -238,7 +248,11 @@ static inline av_const int av_tolower(int c)
 /**
  * Locale-independent conversion of ASCII isxdigit.
  */
-av_const int av_isxdigit(int c);
+static inline av_const int av_isxdigit(int c)
+{
+    c = av_tolower(c);
+    return av_isdigit(c) || (c >= 'a' && c <= 'f');
+}
 
 /**
  * Locale-independent case-insensitive compare.
@@ -270,6 +284,11 @@ const char *av_dirname(char *path);
 
 /**
  * Match instances of a name in a comma-separated list of names.
+ * List entries are checked from the start to the end of the names list,
+ * the first match ends further processing. If an entry prefixed with '-'
+ * matches, then 0 is returned. The "ALL" list entry is considered to
+ * match all names.
+ *
  * @param name  Name to look for.
  * @param names List of names.
  * @return 1 on match, 0 otherwise.
@@ -300,14 +319,14 @@ enum AVEscapeMode {
  * characters lists, except it is guaranteed to use the exact same list
  * of whitespace characters as the rest of libavutil.
  */
-#define AV_ESCAPE_FLAG_WHITESPACE 0x01
+#define AV_ESCAPE_FLAG_WHITESPACE (1 << 0)
 
 /**
  * Escape only specified special characters.
  * Without this flag, escape also any characters that may be considered
  * special by av_get_token(), such as the single quote.
  */
-#define AV_ESCAPE_FLAG_STRICT 0x02
+#define AV_ESCAPE_FLAG_STRICT (1 << 1)
 
 /**
  * Escape string in src, and put the escaped string in an allocated
@@ -325,6 +344,7 @@ enum AVEscapeMode {
  * @return the length of the allocated string, or a negative error code in case of error
  * @see av_bprint_escape()
  */
+av_warn_unused_result
 int av_escape(char **dst, const char *src, const char *special_chars,
               enum AVEscapeMode mode, int flags);
 
@@ -364,6 +384,7 @@ int av_escape(char **dst, const char *src, const char *special_chars,
  * @return >= 0 in case a sequence was successfully read, a negative
  * value in case of invalid sequence
  */
+av_warn_unused_result
 int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
                    unsigned int flags);
 
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index e6ebb6c4..9bcf6741 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -138,14 +138,6 @@
  *
  * @{
  *
- * @defgroup lavu_internal Internal
- *
- * Not exported functions, for internal usage only
- *
- * @{
- *
- * @}
- *
  * @defgroup preproc_misc Preprocessor String Macros
  *
  * @{
@@ -170,6 +162,13 @@
  */
 unsigned avutil_version(void);
 
+/**
+ * Return an informative version string. This usually is the actual release
+ * version number or a git commit description. This string has no fixed format
+ * and can change any time. It should never be parsed by code.
+ */
+const char *av_version_info(void);
+
 /**
  * Return the libavutil build-time configuration.
  */
diff --git a/libavutil/blowfish.c b/libavutil/blowfish.c
index 38214275..0ab104eb 100644
--- a/libavutil/blowfish.c
+++ b/libavutil/blowfish.c
@@ -24,6 +24,7 @@
 #include "avutil.h"
 #include "common.h"
 #include "intreadwrite.h"
+#include "mem.h"
 #include "blowfish.h"
 
 static const uint32_t orig_p[AV_BF_ROUNDS + 2] = {
@@ -300,6 +301,11 @@ static const uint32_t orig_s[4][256] = {
            + ctx->s[3][ Xl        & 0xFF])\
            ^ P;
 
+AVBlowfish *av_blowfish_alloc(void)
+{
+    return av_mallocz(sizeof(struct AVBlowfish));
+}
+
 av_cold void av_blowfish_init(AVBlowfish *ctx, const uint8_t *key, int key_len)
 {
     uint32_t data, data_l, data_r;
diff --git a/libavutil/blowfish.h b/libavutil/blowfish.h
index 0b004532..9e289a40 100644
--- a/libavutil/blowfish.h
+++ b/libavutil/blowfish.h
@@ -37,6 +37,11 @@ typedef struct AVBlowfish {
     uint32_t s[4][256];
 } AVBlowfish;
 
+/**
+ * Allocate an AVBlowfish context.
+ */
+AVBlowfish *av_blowfish_alloc(void);
+
 /**
  * Initialize an AVBlowfish context.
  *
diff --git a/libavutil/camellia.c b/libavutil/camellia.c
index 483eed22..f21ca126 100644
--- a/libavutil/camellia.c
+++ b/libavutil/camellia.c
@@ -354,7 +354,7 @@ av_cold int av_camellia_init(AVCAMELLIA *cs, const uint8_t *key, int key_bits)
     uint64_t Kl[2], Kr[2], Ka[2], Kb[2];
     uint64_t D1, D2;
     if (key_bits != 128 && key_bits != 192 && key_bits != 256)
-        return -1;
+        return AVERROR(EINVAL);
     memset(Kb, 0, sizeof(Kb));
     memset(Kr, 0, sizeof(Kr));
     cs->key_bits = key_bits;
diff --git a/libavutil/cast5.c b/libavutil/cast5.c
index 98aa19d6..a47697b2 100644
--- a/libavutil/cast5.c
+++ b/libavutil/cast5.c
@@ -459,7 +459,7 @@ av_cold int av_cast5_init(AVCAST5* cs, const uint8_t *key, int key_bits)
     int i;
     uint32_t p[4], q[4];
     if (key_bits % 8 || key_bits < 40 || key_bits > 128)
-        return -1;
+        return AVERROR(EINVAL);
     memset(newKey, 0, sizeof(newKey));
     memcpy(newKey, key, key_bits >> 3);
 
diff --git a/libavutil/cast5.h b/libavutil/cast5.h
index e5cc8b11..ad5b347e 100644
--- a/libavutil/cast5.h
+++ b/libavutil/cast5.h
@@ -48,6 +48,7 @@ struct AVCAST5 *av_cast5_alloc(void);
   * @param ctx an AVCAST5 context
   * @param key a key of 5,6,...16 bytes used for encryption/decryption
   * @param key_bits number of keybits: possible are 40,48,...,128
+  * @return 0 on success, less than 0 on failure
  */
 int av_cast5_init(struct AVCAST5 *ctx, const uint8_t *key, int key_bits);
 
diff --git a/libavutil/channel_layout.c b/libavutil/channel_layout.c
index cd5cf426..26c87c96 100644
--- a/libavutil/channel_layout.c
+++ b/libavutil/channel_layout.c
@@ -94,7 +94,7 @@ static const struct {
     { "6.0(front)",  6,  AV_CH_LAYOUT_6POINT0_FRONT },
     { "hexagonal",   6,  AV_CH_LAYOUT_HEXAGONAL },
     { "6.1",         7,  AV_CH_LAYOUT_6POINT1 },
-    { "6.1",         7,  AV_CH_LAYOUT_6POINT1_BACK },
+    { "6.1(back)",   7,  AV_CH_LAYOUT_6POINT1_BACK },
     { "6.1(front)",  7,  AV_CH_LAYOUT_6POINT1_FRONT },
     { "7.0",         7,  AV_CH_LAYOUT_7POINT0 },
     { "7.0(front)",  7,  AV_CH_LAYOUT_7POINT0_FRONT },
@@ -102,14 +102,11 @@ static const struct {
     { "7.1(wide)",   8,  AV_CH_LAYOUT_7POINT1_WIDE_BACK },
     { "7.1(wide-side)",   8,  AV_CH_LAYOUT_7POINT1_WIDE },
     { "octagonal",   8,  AV_CH_LAYOUT_OCTAGONAL },
+    { "hexadecagonal", 16, AV_CH_LAYOUT_HEXADECAGONAL },
     { "downmix",     2,  AV_CH_LAYOUT_STEREO_DOWNMIX, },
 };
 
-#if FF_API_GET_CHANNEL_LAYOUT_COMPAT
-static uint64_t get_channel_layout_single(const char *name, int name_len, int compat)
-#else
 static uint64_t get_channel_layout_single(const char *name, int name_len)
-#endif
 {
     int i;
     char *end;
@@ -125,41 +122,21 @@ static uint64_t get_channel_layout_single(const char *name, int name_len)
             strlen(channel_names[i].name) == name_len &&
             !memcmp(channel_names[i].name, name, name_len))
             return (int64_t)1 << i;
+
+    errno = 0;
     i = strtol(name, &end, 10);
 
-#if FF_API_GET_CHANNEL_LAYOUT_COMPAT
-    if (compat) {
-        if (end - name == name_len ||
-            (end + 1 - name == name_len && *end  == 'c')) {
-            layout = av_get_default_channel_layout(i);
-            if (end - name == name_len) {
-                av_log(NULL, AV_LOG_WARNING,
-                       "Single channel layout '%.*s' is interpreted as a number of channels, "
-                       "switch to the syntax '%.*sc' otherwise it will be interpreted as a "
-                       "channel layout number in a later version\n",
-                       name_len, name, name_len, name);
-            }
-            return layout;
-        }
-    } else {
-#endif
-    if ((end + 1 - name == name_len && *end  == 'c'))
+    if (!errno && (end + 1 - name == name_len && *end  == 'c'))
         return av_get_default_channel_layout(i);
-#if FF_API_GET_CHANNEL_LAYOUT_COMPAT
-    }
-#endif
 
+    errno = 0;
     layout = strtoll(name, &end, 0);
-    if (end - name == name_len)
+    if (!errno && end - name == name_len)
         return FFMAX(layout, 0);
     return 0;
 }
 
-#if FF_API_GET_CHANNEL_LAYOUT_COMPAT
-uint64_t ff_get_channel_layout(const char *name, int compat)
-#else
 uint64_t av_get_channel_layout(const char *name)
-#endif
 {
     const char *n, *e;
     const char *name_end = name + strlen(name);
@@ -167,11 +144,7 @@ uint64_t av_get_channel_layout(const char *name)
 
     for (n = name; n < name_end; n = e + 1) {
         for (e = n; e < name_end && *e != '+' && *e != '|'; e++);
-#if FF_API_GET_CHANNEL_LAYOUT_COMPAT
-        layout_single = get_channel_layout_single(n, e - n, compat);
-#else
         layout_single = get_channel_layout_single(n, e - n);
-#endif
         if (!layout_single)
             return 0;
         layout |= layout_single;
@@ -179,13 +152,6 @@ uint64_t av_get_channel_layout(const char *name)
     return layout;
 }
 
-#if FF_API_GET_CHANNEL_LAYOUT_COMPAT
-uint64_t av_get_channel_layout(const char *name)
-{
-    return ff_get_channel_layout(name, 1);
-}
-#endif
-
 void av_bprint_channel_layout(struct AVBPrint *bp,
                               int nb_channels, uint64_t channel_layout)
 {
diff --git a/libavutil/channel_layout.h b/libavutil/channel_layout.h
index dea4d609..ec7effea 100644
--- a/libavutil/channel_layout.h
+++ b/libavutil/channel_layout.h
@@ -108,6 +108,7 @@
 #define AV_CH_LAYOUT_7POINT1_WIDE      (AV_CH_LAYOUT_5POINT1|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
 #define AV_CH_LAYOUT_7POINT1_WIDE_BACK (AV_CH_LAYOUT_5POINT1_BACK|AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)
 #define AV_CH_LAYOUT_OCTAGONAL         (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_CENTER|AV_CH_BACK_RIGHT)
+#define AV_CH_LAYOUT_HEXADECAGONAL     (AV_CH_LAYOUT_OCTAGONAL|AV_CH_WIDE_LEFT|AV_CH_WIDE_RIGHT|AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT|AV_CH_TOP_BACK_CENTER|AV_CH_TOP_FRONT_CENTER|AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT)
 #define AV_CH_LAYOUT_STEREO_DOWNMIX    (AV_CH_STEREO_LEFT|AV_CH_STEREO_RIGHT)
 
 enum AVMatrixEncoding {
diff --git a/libavutil/color_utils.c b/libavutil/color_utils.c
index 59146be8..b68b4026 100644
--- a/libavutil/color_utils.c
+++ b/libavutil/color_utils.c
@@ -18,6 +18,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <stddef.h>
+#include <math.h>
+
 #include "libavutil/color_utils.h"
 #include "libavutil/pixfmt.h"
 
@@ -50,3 +53,166 @@ double avpriv_get_gamma_from_trc(enum AVColorTransferCharacteristic trc)
     }
     return gamma;
 }
+
+#define BT709_alpha 1.099296826809442
+#define BT709_beta 0.018053968510807
+
+static double avpriv_trc_bt709(double Lc)
+{
+    const double a = BT709_alpha;
+    const double b = BT709_beta;
+
+    return (0.0 > Lc) ? 0.0
+         : (  b > Lc) ? 4.500 * Lc
+         :              a * pow(Lc, 0.45) - (a - 1.0);
+}
+
+static double avpriv_trc_gamma22(double Lc)
+{
+    return (0.0 > Lc) ? 0.0 : pow(Lc, 1.0/ 2.2);
+}
+
+static double avpriv_trc_gamma28(double Lc)
+{
+    return (0.0 > Lc) ? 0.0 : pow(Lc, 1.0/ 2.8);
+}
+
+static double avpriv_trc_smpte240M(double Lc)
+{
+    const double a = 1.1115;
+    const double b = 0.0228;
+
+    return (0.0 > Lc) ? 0.0
+         : (  b > Lc) ? 4.000 * Lc
+         :              a * pow(Lc, 0.45) - (a - 1.0);
+}
+
+static double avpriv_trc_linear(double Lc)
+{
+    return Lc;
+}
+
+static double avpriv_trc_log(double Lc)
+{
+    return (0.01 > Lc) ? 0.0 : 1.0 + log10(Lc) / 2.0;
+}
+
+static double avpriv_trc_log_sqrt(double Lc)
+{
+    // sqrt(10) / 1000
+    return (0.00316227766 > Lc) ? 0.0 : 1.0 + log10(Lc) / 2.5;
+}
+
+static double avpriv_trc_iec61966_2_4(double Lc)
+{
+    const double a = BT709_alpha;
+    const double b = BT709_beta;
+
+    return (-b >= Lc) ? -a * pow(-Lc, 0.45) + (a - 1.0)
+         : ( b >  Lc) ? 4.500 * Lc
+         :               a * pow( Lc, 0.45) - (a - 1.0);
+}
+
+static double avpriv_trc_bt1361(double Lc)
+{
+    const double a = BT709_alpha;
+    const double b = BT709_beta;
+
+    return (-0.0045 >= Lc) ? -(a * pow(-4.0 * Lc, 0.45) + (a - 1.0)) / 4.0
+         : ( b >  Lc) ? 4.500 * Lc
+         :               a * pow( Lc, 0.45) - (a - 1.0);
+}
+
+static double avpriv_trc_iec61966_2_1(double Lc)
+{
+    const double a = 1.055;
+    const double b = 0.0031308;
+
+    return (0.0 > Lc) ? 0.0
+         : (  b > Lc) ? 12.92 * Lc
+         :              a * pow(Lc, 1.0  / 2.4) - (a - 1.0);
+}
+
+static double avpriv_trc_smpte_st2084(double Lc)
+{
+    const double c1 =         3424.0 / 4096.0; // c3-c2 + 1
+    const double c2 =  32.0 * 2413.0 / 4096.0;
+    const double c3 =  32.0 * 2392.0 / 4096.0;
+    const double m  = 128.0 * 2523.0 / 4096.0;
+    const double n  =  0.25 * 2610.0 / 4096.0;
+    const double L  = Lc / 10000.0;
+    const double Ln = pow(L, n);
+
+    return (0.0 > Lc) ? 0.0
+         :              pow((c1 + c2 * Ln) / (1.0 + c3 * Ln), m);
+
+}
+
+static double avpriv_trc_smpte_st428_1(double Lc)
+{
+    return (0.0 > Lc) ? 0.0
+         :              pow(48.0 * Lc / 52.37, 1.0 / 2.6);
+}
+
+avpriv_trc_function avpriv_get_trc_function_from_trc(enum AVColorTransferCharacteristic trc)
+{
+    avpriv_trc_function func = NULL;
+    switch (trc) {
+        case AVCOL_TRC_BT709:
+        case AVCOL_TRC_SMPTE170M:
+        case AVCOL_TRC_BT2020_10:
+        case AVCOL_TRC_BT2020_12:
+            func = avpriv_trc_bt709;
+            break;
+
+        case AVCOL_TRC_GAMMA22:
+            func = avpriv_trc_gamma22;
+            break;
+        case AVCOL_TRC_GAMMA28:
+            func = avpriv_trc_gamma28;
+            break;
+
+        case AVCOL_TRC_SMPTE240M:
+            func = avpriv_trc_smpte240M;
+            break;
+
+        case AVCOL_TRC_LINEAR:
+            func = avpriv_trc_linear;
+            break;
+
+        case AVCOL_TRC_LOG:
+            func = avpriv_trc_log;
+            break;
+
+        case AVCOL_TRC_LOG_SQRT:
+            func = avpriv_trc_log_sqrt;
+            break;
+
+        case AVCOL_TRC_IEC61966_2_4:
+            func = avpriv_trc_iec61966_2_4;
+            break;
+
+        case AVCOL_TRC_BT1361_ECG:
+            func = avpriv_trc_bt1361;
+            break;
+
+        case AVCOL_TRC_IEC61966_2_1:
+            func = avpriv_trc_iec61966_2_1;
+            break;
+
+        case AVCOL_TRC_SMPTEST2084:
+            func = avpriv_trc_smpte_st2084;
+            break;
+
+        case AVCOL_TRC_SMPTEST428_1:
+            func = avpriv_trc_smpte_st428_1;
+            break;
+
+        case AVCOL_TRC_RESERVED0:
+        case AVCOL_TRC_UNSPECIFIED:
+        case AVCOL_TRC_RESERVED:
+        default:
+            break;
+    }
+    return func;
+}
diff --git a/libavutil/color_utils.h b/libavutil/color_utils.h
index 3600a72d..95290064 100644
--- a/libavutil/color_utils.h
+++ b/libavutil/color_utils.h
@@ -36,4 +36,21 @@
  */
 double avpriv_get_gamma_from_trc(enum AVColorTransferCharacteristic trc);
 
+
+typedef double (*avpriv_trc_function)(double);
+
+/**
+ * Determine the function needed to apply the given
+ * AVColorTransferCharacteristic to linear input.
+ *
+ * The function returned should expect a nominal domain and range of [0.0-1.0]
+ * values outside of this range maybe valid depending on the chosen
+ * characteristic function.
+ *
+ * @return Will return pointer to the function matching the
+ *         supplied Transfer Characteristic. If unspecified will
+ *         return NULL:
+ */
+avpriv_trc_function avpriv_get_trc_function_from_trc(enum AVColorTransferCharacteristic trc);
+
 #endif
diff --git a/libavutil/colorspace.h b/libavutil/colorspace.h
index 826ffd52..7d3f7110 100644
--- a/libavutil/colorspace.h
+++ b/libavutil/colorspace.h
@@ -41,6 +41,16 @@
     b_add = FIX(1.77200*255.0/224.0) * cb + ONE_HALF;\
 }
 
+#define YUV_TO_RGB1_CCIR_BT709(cb1, cr1)\
+{\
+    cb = (cb1) - 128;\
+    cr = (cr1) - 128;\
+    r_add = FIX(1.5747*255.0/224.0) * cr + ONE_HALF;\
+    g_add = - FIX(0.1873*255.0/224.0) * cb - FIX(0.4682*255.0/224.0) * cr + \
+            ONE_HALF;\
+    b_add = FIX(1.8556*255.0/224.0) * cb + ONE_HALF;\
+}
+
 #define YUV_TO_RGB2_CCIR(r, g, b, y1)\
 {\
     y = ((y1) - 16) * FIX(255.0/219.0);\
diff --git a/libavutil/common.h b/libavutil/common.h
index 3e62b6d5..8142b31f 100644
--- a/libavutil/common.h
+++ b/libavutil/common.h
@@ -40,6 +40,7 @@
 #include <string.h>
 
 #include "attributes.h"
+#include "macros.h"
 #include "version.h"
 #include "libavutil/avconfig.h"
 
@@ -53,14 +54,43 @@
 #define RSHIFT(a,b) ((a) > 0 ? ((a) + ((1<<(b))>>1))>>(b) : ((a) + ((1<<(b))>>1)-1)>>(b))
 /* assume b>0 */
 #define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
-/* assume a>0 and b>0 */
-#define FF_CEIL_RSHIFT(a,b) (!av_builtin_constant_p(b) ? -((-(a)) >> (b)) \
+/* Fast a/(1<<b) rounded toward +inf. Assume a>=0 and b>=0 */
+#define AV_CEIL_RSHIFT(a,b) (!av_builtin_constant_p(b) ? -((-(a)) >> (b)) \
                                                        : ((a) + (1<<(b)) - 1) >> (b))
+/* Backwards compat. */
+#define FF_CEIL_RSHIFT AV_CEIL_RSHIFT
+
 #define FFUDIV(a,b) (((a)>0 ?(a):(a)-(b)+1) / (b))
 #define FFUMOD(a,b) ((a)-(b)*FFUDIV(a,b))
+
+/**
+ * Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they
+ * are not representable as absolute values of their type. This is the same
+ * as with *abs()
+ * @see FFNABS()
+ */
 #define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
 #define FFSIGN(a) ((a) > 0 ? 1 : -1)
 
+/**
+ * Negative Absolute value.
+ * this works for all integers of all types.
+ * As with many macros, this evaluates its argument twice, it thus must not have
+ * a sideeffect, that is FFNABS(x++) has undefined behavior.
+ */
+#define FFNABS(a) ((a) <= 0 ? (a) : (-(a)))
+
+/**
+ * Comparator.
+ * For two numerical expressions x and y, gives 1 if x > y, -1 if x < y, and 0
+ * if x == y. This is useful for instance in a qsort comparator callback.
+ * Furthermore, compilers are able to optimize this to branchless code, and
+ * there is no risk of overflow with signed types.
+ * As with many macros, this evaluates its argument multiple times, it thus
+ * must not have a side-effect.
+ */
+#define FFDIFFSIGN(x,y) (((x)>(y)) - ((x)<(y)))
+
 #define FFMAX(a,b) ((a) > (b) ? (a) : (b))
 #define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
@@ -68,17 +98,9 @@
 
 #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
 #define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
-#define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1))
 
 /* misc math functions */
 
-/**
- * Reverse the order of the bits of an 8-bits unsigned integer.
- */
-#if FF_API_AV_REVERSE
-extern attribute_deprecated const uint8_t av_reverse[256];
-#endif
-
 #ifdef HAVE_AV_CONFIG_H
 #   include "config.h"
 #   include "intmath.h"
@@ -192,7 +214,7 @@ static av_always_inline av_const int32_t av_clipl_int32_c(int64_t a)
  */
 static av_always_inline av_const int av_clip_intp2_c(int a, int p)
 {
-    if ((a + (1 << p)) & ~((2 << p) - 1))
+    if (((unsigned)a + (1 << p)) & ~((2 << p) - 1))
         return (a >> 31) ^ ((1 << p) - 1);
     else
         return a;
@@ -312,6 +334,11 @@ static av_always_inline av_const int av_popcount64_c(uint64_t x)
     return av_popcount((uint32_t)x) + av_popcount((uint32_t)(x >> 32));
 }
 
+static av_always_inline av_const int av_parity_c(uint32_t v)
+{
+    return av_popcount(v) & 1;
+}
+
 #define MKTAG(a,b,c,d) ((a) | ((b) << 8) | ((c) << 16) | ((unsigned)(d) << 24))
 #define MKBETAG(a,b,c,d) ((d) | ((c) << 8) | ((b) << 16) | ((unsigned)(a) << 24))
 
@@ -332,13 +359,13 @@ static av_always_inline av_const int av_popcount64_c(uint64_t x)
  * to prevent undefined results.
  */
 #define GET_UTF8(val, GET_BYTE, ERROR)\
-    val= GET_BYTE;\
+    val= (GET_BYTE);\
     {\
         uint32_t top = (val & 128) >> 1;\
         if ((val & 0xc0) == 0x80 || val >= 0xFE)\
             ERROR\
         while (val & top) {\
-            int tmp= GET_BYTE - 128;\
+            int tmp= (GET_BYTE) - 128;\
             if(tmp>>6)\
                 ERROR\
             val= (val<<6) + tmp;\
@@ -498,3 +525,6 @@ static av_always_inline av_const int av_popcount64_c(uint64_t x)
 #ifndef av_popcount64
 #   define av_popcount64    av_popcount64_c
 #endif
+#ifndef av_parity
+#   define av_parity        av_parity_c
+#endif
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 780368d6..cd55d3be 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -30,7 +30,7 @@
 #endif
 #include <sched.h>
 #endif
-#if HAVE_GETPROCESSAFFINITYMASK
+#if HAVE_GETPROCESSAFFINITYMASK || HAVE_WINRT
 #include <windows.h>
 #endif
 #if HAVE_SYSCTL
@@ -118,6 +118,7 @@ int av_parse_cpu_flags(const char *s)
 #define CPUFLAG_FMA4     (AV_CPU_FLAG_FMA4     | CPUFLAG_AVX)
 #define CPUFLAG_AVX2     (AV_CPU_FLAG_AVX2     | CPUFLAG_AVX)
 #define CPUFLAG_BMI2     (AV_CPU_FLAG_BMI2     | AV_CPU_FLAG_BMI1)
+#define CPUFLAG_AESNI    (AV_CPU_FLAG_AESNI    | CPUFLAG_SSE42)
     static const AVOption cpuflags_opts[] = {
         { "flags"   , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = "flags" },
 #if   ARCH_PPC
@@ -145,11 +146,13 @@ int av_parse_cpu_flags(const char *s)
         { "3dnow"   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOW        },    .unit = "flags" },
         { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT     },    .unit = "flags" },
         { "cmov",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV     },    .unit = "flags" },
+        { "aesni"   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AESNI        },    .unit = "flags" },
 #elif ARCH_ARM
         { "armv5te",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV5TE  },    .unit = "flags" },
         { "armv6",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6    },    .unit = "flags" },
         { "armv6t2",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6T2  },    .unit = "flags" },
         { "vfp",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP      },    .unit = "flags" },
+        { "vfp_vm",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP_VM   },    .unit = "flags" },
         { "vfpv3",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFPV3    },    .unit = "flags" },
         { "neon",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_NEON     },    .unit = "flags" },
 #elif ARCH_AARCH64
@@ -205,6 +208,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
         { "3dnow"   , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOW    },    .unit = "flags" },
         { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOWEXT },    .unit = "flags" },
         { "cmov",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV     },    .unit = "flags" },
+        { "aesni",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AESNI    },    .unit = "flags" },
 
 #define CPU_FLAG_P2 AV_CPU_FLAG_CMOV | AV_CPU_FLAG_MMX
 #define CPU_FLAG_P3 CPU_FLAG_P2 | AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE
@@ -227,6 +231,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
         { "armv6",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6    },    .unit = "flags" },
         { "armv6t2",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARMV6T2  },    .unit = "flags" },
         { "vfp",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP      },    .unit = "flags" },
+        { "vfp_vm",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP_VM   },    .unit = "flags" },
         { "vfpv3",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFPV3    },    .unit = "flags" },
         { "neon",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_NEON     },    .unit = "flags" },
         { "setend",   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SETEND   },    .unit = "flags" },
@@ -253,6 +258,9 @@ int av_cpu_count(void)
     static volatile int printed;
 
     int nb_cpus = 1;
+#if HAVE_WINRT
+    SYSTEM_INFO sysinfo;
+#endif
 #if HAVE_SCHED_GETAFFINITY && defined(CPU_COUNT)
     cpu_set_t cpuset;
 
@@ -274,6 +282,9 @@ int av_cpu_count(void)
     nb_cpus = sysconf(_SC_NPROC_ONLN);
 #elif HAVE_SYSCONF && defined(_SC_NPROCESSORS_ONLN)
     nb_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+#elif HAVE_WINRT
+    GetNativeSystemInfo(&sysinfo);
+    nb_cpus = sysinfo.dwNumberOfProcessors;
 #endif
 
     if (!printed) {
@@ -306,6 +317,7 @@ static const struct {
     { AV_CPU_FLAG_ARMV6,     "armv6"      },
     { AV_CPU_FLAG_ARMV6T2,   "armv6t2"    },
     { AV_CPU_FLAG_VFP,       "vfp"        },
+    { AV_CPU_FLAG_VFP_VM,    "vfp_vm"     },
     { AV_CPU_FLAG_VFPV3,     "vfpv3"      },
     { AV_CPU_FLAG_NEON,      "neon"       },
     { AV_CPU_FLAG_SETEND,    "setend"     },
@@ -334,6 +346,7 @@ static const struct {
     { AV_CPU_FLAG_AVX2,      "avx2"       },
     { AV_CPU_FLAG_BMI1,      "bmi1"       },
     { AV_CPU_FLAG_BMI2,      "bmi2"       },
+    { AV_CPU_FLAG_AESNI,     "aesni"      },
 #endif
     { 0 }
 };
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 471f7866..cc4e30c4 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -42,15 +42,12 @@
 #define AV_CPU_FLAG_ATOM     0x10000000 ///< Atom processor, some SSSE3 instructions are slower
 #define AV_CPU_FLAG_SSE4         0x0100 ///< Penryn SSE4.1 functions
 #define AV_CPU_FLAG_SSE42        0x0200 ///< Nehalem SSE4.2 functions
+#define AV_CPU_FLAG_AESNI       0x80000 ///< Advanced Encryption Standard functions
 #define AV_CPU_FLAG_AVX          0x4000 ///< AVX functions: requires OS support even if YMM registers aren't used
 #define AV_CPU_FLAG_AVXSLOW   0x8000000 ///< AVX supported, but slow when using YMM registers (e.g. Bulldozer)
 #define AV_CPU_FLAG_XOP          0x0400 ///< Bulldozer XOP functions
 #define AV_CPU_FLAG_FMA4         0x0800 ///< Bulldozer FMA4 functions
-// #if LIBAVUTIL_VERSION_MAJOR <52
-#define AV_CPU_FLAG_CMOV      0x1001000 ///< supports cmov instruction
-// #else
-// #define AV_CPU_FLAG_CMOV         0x1000 ///< supports cmov instruction
-// #endif
+#define AV_CPU_FLAG_CMOV         0x1000 ///< supports cmov instruction
 #define AV_CPU_FLAG_AVX2         0x8000 ///< AVX2 functions: requires OS support even if YMM registers aren't used
 #define AV_CPU_FLAG_FMA3        0x10000 ///< Haswell FMA3 functions
 #define AV_CPU_FLAG_BMI1        0x20000 ///< Bit Manipulation Instruction Set 1
@@ -67,6 +64,7 @@
 #define AV_CPU_FLAG_VFPV3        (1 << 4)
 #define AV_CPU_FLAG_NEON         (1 << 5)
 #define AV_CPU_FLAG_ARMV8        (1 << 6)
+#define AV_CPU_FLAG_VFP_VM       (1 << 7) ///< VFPv2 vector mode, deprecated in ARMv7-A and unavailable in various CPUs implementations
 #define AV_CPU_FLAG_SETEND       (1 <<16)
 
 /**
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 21052988..6c352abe 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -24,13 +24,16 @@
 #define CPUEXT_SUFFIX(flags, suffix, cpuext)                            \
     (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext))
 
-#define CPUEXT_SUFFIX_FAST(flags, suffix, cpuext)                       \
+#define CPUEXT_SUFFIX_FAST2(flags, suffix, cpuext, slow_cpuext)         \
     (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext) && \
-     !((flags) & AV_CPU_FLAG_ ## cpuext ## SLOW))
+     !((flags) & AV_CPU_FLAG_ ## slow_cpuext ## SLOW))
 
-#define CPUEXT_SUFFIX_SLOW(flags, suffix, cpuext)                       \
+#define CPUEXT_SUFFIX_SLOW2(flags, suffix, cpuext, slow_cpuext)         \
     (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext) && \
-     ((flags) & AV_CPU_FLAG_ ## cpuext ## SLOW))
+     ((flags) & AV_CPU_FLAG_ ## slow_cpuext ## SLOW))
+
+#define CPUEXT_SUFFIX_FAST(flags, suffix, cpuext) CPUEXT_SUFFIX_FAST2(flags, suffix, cpuext, cpuext)
+#define CPUEXT_SUFFIX_SLOW(flags, suffix, cpuext) CPUEXT_SUFFIX_SLOW2(flags, suffix, cpuext, cpuext)
 
 #define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext)
 #define CPUEXT_FAST(flags, cpuext) CPUEXT_SUFFIX_FAST(flags, , cpuext)
diff --git a/libavutil/crc.c b/libavutil/crc.c
index d8b1c99d..5a1ddf09 100644
--- a/libavutil/crc.c
+++ b/libavutil/crc.c
@@ -312,9 +312,9 @@ int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size)
     uint32_t c;
 
     if (bits < 8 || bits > 32 || poly >= (1LL << bits))
-        return -1;
+        return AVERROR(EINVAL);
     if (ctx_size != sizeof(AVCRC) * 257 && ctx_size != sizeof(AVCRC) * 1024)
-        return -1;
+        return AVERROR(EINVAL);
 
     for (i = 0; i < 256; i++) {
         if (le) {
@@ -383,7 +383,8 @@ int main(void)
 {
     uint8_t buf[1999];
     int i;
-    int p[6][3] = { { AV_CRC_32_IEEE_LE, 0xEDB88320, 0x3D5CDD04 },
+    unsigned
+        p[6][3] = { { AV_CRC_32_IEEE_LE, 0xEDB88320, 0x3D5CDD04 },
                     { AV_CRC_32_IEEE   , 0x04C11DB7, 0xC0F5BAE0 },
                     { AV_CRC_24_IEEE   , 0x864CFB  , 0xB704CE   },
                     { AV_CRC_16_ANSI_LE, 0xA001    , 0xBFD8     },
diff --git a/libavutil/crc.h b/libavutil/crc.h
index e86bf1de..ef8a7137 100644
--- a/libavutil/crc.h
+++ b/libavutil/crc.h
@@ -24,6 +24,7 @@
 #include <stdint.h>
 #include <stddef.h>
 #include "attributes.h"
+#include "version.h"
 
 /**
  * @defgroup lavu_crc32 CRC32
@@ -40,7 +41,11 @@ typedef enum {
     AV_CRC_32_IEEE,
     AV_CRC_32_IEEE_LE,  /*< reversed bitorder version of AV_CRC_32_IEEE */
     AV_CRC_16_ANSI_LE,  /*< reversed bitorder version of AV_CRC_16_ANSI */
+#if FF_API_CRC_BIG_TABLE
     AV_CRC_24_IEEE = 12,
+#else
+    AV_CRC_24_IEEE,
+#endif /* FF_API_CRC_BIG_TABLE */
     AV_CRC_MAX,         /*< Not part of public API! Do not use outside libavutil. */
 }AVCRCId;
 
diff --git a/libavutil/des.c b/libavutil/des.c
index 57ad0a4f..3ccbf89d 100644
--- a/libavutil/des.c
+++ b/libavutil/des.c
@@ -22,10 +22,9 @@
 #include "avutil.h"
 #include "common.h"
 #include "intreadwrite.h"
+#include "mem.h"
 #include "des.h"
 
-typedef struct AVDES AVDES;
-
 #define T(a, b, c, d, e, f, g, h) 64-a,64-b,64-c,64-d,64-e,64-f,64-g,64-h
 static const uint8_t IP_shuffle[] = {
     T(58, 50, 42, 34, 26, 18, 10, 2),
@@ -286,9 +285,14 @@ static uint64_t des_encdec(uint64_t in, uint64_t K[16], int decrypt) {
     return in;
 }
 
+AVDES *av_des_alloc(void)
+{
+    return av_mallocz(sizeof(struct AVDES));
+}
+
 int av_des_init(AVDES *d, const uint8_t *key, int key_bits, av_unused int decrypt) {
     if (key_bits != 64 && key_bits != 192)
-        return -1;
+        return AVERROR(EINVAL);
     d->triple_des = key_bits > 64;
     gen_roundkeys(d->round_keys[0], AV_RB64(key));
     if (d->triple_des) {
diff --git a/libavutil/des.h b/libavutil/des.h
index 2feb0468..4cf11f5b 100644
--- a/libavutil/des.h
+++ b/libavutil/des.h
@@ -24,16 +24,28 @@
 
 #include <stdint.h>
 
-struct AVDES {
+/**
+ * @defgroup lavu_des DES
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+typedef struct AVDES {
     uint64_t round_keys[3][16];
     int triple_des;
-};
+} AVDES;
+
+/**
+ * Allocate an AVDES context.
+ */
+AVDES *av_des_alloc(void);
 
 /**
  * @brief Initializes an AVDES context.
  *
  * @param key_bits must be 64 or 192
  * @param decrypt 0 for encryption/CBC-MAC, 1 for decryption
+ * @return zero on success, negative value otherwise
  */
 int av_des_init(struct AVDES *d, const uint8_t *key, int key_bits, int decrypt);
 
@@ -58,4 +70,8 @@ void av_des_crypt(struct AVDES *d, uint8_t *dst, const uint8_t *src, int count,
  */
 void av_des_mac(struct AVDES *d, uint8_t *dst, const uint8_t *src, int count);
 
+/**
+ * @}
+ */
+
 #endif /* AVUTIL_DES_H */
diff --git a/libavutil/dict.c b/libavutil/dict.c
index 6ff1af52..8bb65a13 100644
--- a/libavutil/dict.c
+++ b/libavutil/dict.c
@@ -210,12 +210,17 @@ void av_dict_free(AVDictionary **pm)
     av_freep(pm);
 }
 
-void av_dict_copy(AVDictionary **dst, const AVDictionary *src, int flags)
+int av_dict_copy(AVDictionary **dst, const AVDictionary *src, int flags)
 {
     AVDictionaryEntry *t = NULL;
 
-    while ((t = av_dict_get(src, "", t, AV_DICT_IGNORE_SUFFIX)))
-        av_dict_set(dst, t->key, t->value, flags);
+    while ((t = av_dict_get(src, "", t, AV_DICT_IGNORE_SUFFIX))) {
+        int ret = av_dict_set(dst, t->key, t->value, flags);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
 }
 
 int av_dict_get_string(const AVDictionary *m, char **buffer,
diff --git a/libavutil/dict.h b/libavutil/dict.h
index f2df687c..5b8d0033 100644
--- a/libavutil/dict.h
+++ b/libavutil/dict.h
@@ -162,8 +162,10 @@ int av_dict_parse_string(AVDictionary **pm, const char *str,
  * @param src pointer to source AVDictionary struct
  * @param flags flags to use when setting entries in *dst
  * @note metadata is read using the AV_DICT_IGNORE_SUFFIX flag
+ * @return 0 on success, negative AVERROR code on failure. If dst was allocated
+ *           by this function, callers should free the associated memory.
  */
-void av_dict_copy(AVDictionary **dst, const AVDictionary *src, int flags);
+int av_dict_copy(AVDictionary **dst, const AVDictionary *src, int flags);
 
 /**
  * Free all the memory allocated for an AVDictionary struct
diff --git a/libavutil/error.c b/libavutil/error.c
index 44259682..8df73dbc 100644
--- a/libavutil/error.c
+++ b/libavutil/error.c
@@ -61,7 +61,44 @@ static const struct error_entry error_entries[] = {
     { ERROR_TAG(HTTP_OTHER_4XX),     "Server returned 4XX Client Error, but not one of 40{0,1,3,4}" },
     { ERROR_TAG(HTTP_SERVER_ERROR),  "Server returned 5XX Server Error reply" },
 #if !HAVE_STRERROR_R
+    { EERROR_TAG(E2BIG),             "Argument list too long" },
+    { EERROR_TAG(EACCES),            "Permission denied" },
+    { EERROR_TAG(EAGAIN),            "Resource temporarily unavailable" },
+    { EERROR_TAG(EBADF),             "Bad file descriptor" },
+    { EERROR_TAG(EBUSY),             "Device or resource busy" },
+    { EERROR_TAG(ECHILD),            "No child processes" },
+    { EERROR_TAG(EDEADLK),           "Resource deadlock avoided" },
+    { EERROR_TAG(EDOM),              "Numerical argument out of domain" },
+    { EERROR_TAG(EEXIST),            "File exists" },
+    { EERROR_TAG(EFAULT),            "Bad address" },
+    { EERROR_TAG(EFBIG),             "File too large" },
+    { EERROR_TAG(EILSEQ),            "Illegal byte sequence" },
+    { EERROR_TAG(EINTR),             "Interrupted system call" },
     { EERROR_TAG(EINVAL),            "Invalid argument" },
+    { EERROR_TAG(EIO),               "I/O error" },
+    { EERROR_TAG(EISDIR),            "Is a directory" },
+    { EERROR_TAG(EMFILE),            "Too many open files" },
+    { EERROR_TAG(EMLINK),            "Too many links" },
+    { EERROR_TAG(ENAMETOOLONG),      "File name too long" },
+    { EERROR_TAG(ENFILE),            "Too many open files in system" },
+    { EERROR_TAG(ENODEV),            "No such device" },
+    { EERROR_TAG(ENOENT),            "No such file or directory" },
+    { EERROR_TAG(ENOEXEC),           "Exec format error" },
+    { EERROR_TAG(ENOLCK),            "No locks available" },
+    { EERROR_TAG(ENOMEM),            "Cannot allocate memory" },
+    { EERROR_TAG(ENOSPC),            "No space left on device" },
+    { EERROR_TAG(ENOSYS),            "Function not implemented" },
+    { EERROR_TAG(ENOTDIR),           "Not a directory" },
+    { EERROR_TAG(ENOTEMPTY),         "Directory not empty" },
+    { EERROR_TAG(ENOTTY),            "Inappropriate I/O control operation" },
+    { EERROR_TAG(ENXIO),             "No such device or address" },
+    { EERROR_TAG(EPERM),             "Operation not permitted" },
+    { EERROR_TAG(EPIPE),             "Broken pipe" },
+    { EERROR_TAG(ERANGE),            "Result too large" },
+    { EERROR_TAG(EROFS),             "Read-only file system" },
+    { EERROR_TAG(ESPIPE),            "Illegal seek" },
+    { EERROR_TAG(ESRCH),             "No such process" },
+    { EERROR_TAG(EXDEV),             "Cross-device link" },
 #endif
 };
 
diff --git a/libavutil/eval.c b/libavutil/eval.c
index 1dfcbef7..2acbbb47 100644
--- a/libavutil/eval.c
+++ b/libavutil/eval.c
@@ -31,6 +31,7 @@
 #include "avutil.h"
 #include "common.h"
 #include "eval.h"
+#include "internal.h"
 #include "log.h"
 #include "mathematics.h"
 #include "time.h"
@@ -56,27 +57,31 @@ typedef struct Parser {
 
 static const AVClass eval_class = { "Eval", av_default_item_name, NULL, LIBAVUTIL_VERSION_INT, offsetof(Parser,log_offset), offsetof(Parser,log_ctx) };
 
-static const int8_t si_prefixes['z' - 'E' + 1] = {
-    ['y'-'E']= -24,
-    ['z'-'E']= -21,
-    ['a'-'E']= -18,
-    ['f'-'E']= -15,
-    ['p'-'E']= -12,
-    ['n'-'E']= - 9,
-    ['u'-'E']= - 6,
-    ['m'-'E']= - 3,
-    ['c'-'E']= - 2,
-    ['d'-'E']= - 1,
-    ['h'-'E']=   2,
-    ['k'-'E']=   3,
-    ['K'-'E']=   3,
-    ['M'-'E']=   6,
-    ['G'-'E']=   9,
-    ['T'-'E']=  12,
-    ['P'-'E']=  15,
-    ['E'-'E']=  18,
-    ['Z'-'E']=  21,
-    ['Y'-'E']=  24,
+static const struct {
+    double bin_val;
+    double dec_val;
+    int8_t exp;
+} si_prefixes['z' - 'E' + 1] = {
+    ['y'-'E']= { 8.271806125530276749e-25, 1e-24, -24 },
+    ['z'-'E']= { 8.4703294725430034e-22, 1e-21, -21 },
+    ['a'-'E']= { 8.6736173798840355e-19, 1e-18, -18 },
+    ['f'-'E']= { 8.8817841970012523e-16, 1e-15, -15 },
+    ['p'-'E']= { 9.0949470177292824e-13, 1e-12, -12 },
+    ['n'-'E']= { 9.3132257461547852e-10, 1e-9,  -9 },
+    ['u'-'E']= { 9.5367431640625e-7, 1e-6, -6 },
+    ['m'-'E']= { 9.765625e-4, 1e-3, -3 },
+    ['c'-'E']= { 9.8431332023036951e-3, 1e-2, -2 },
+    ['d'-'E']= { 9.921256574801246e-2, 1e-1, -1 },
+    ['h'-'E']= { 1.0159366732596479e2, 1e2, 2 },
+    ['k'-'E']= { 1.024e3, 1e3, 3 },
+    ['K'-'E']= { 1.024e3, 1e3, 3 },
+    ['M'-'E']= { 1.048576e6, 1e6, 6 },
+    ['G'-'E']= { 1.073741824e9, 1e9, 9 },
+    ['T'-'E']= { 1.099511627776e12, 1e12, 12 },
+    ['P'-'E']= { 1.125899906842624e15, 1e15, 15 },
+    ['E'-'E']= { 1.152921504606847e18, 1e18, 18 },
+    ['Z'-'E']= { 1.1805916207174113e21, 1e21, 21 },
+    ['Y'-'E']= { 1.2089258196146292e24, 1e24, 24 },
 };
 
 static const struct {
@@ -101,16 +106,16 @@ double av_strtod(const char *numstr, char **tail)
     if (next!=numstr) {
         if (next[0] == 'd' && next[1] == 'B') {
             /* treat dB as decibels instead of decibytes */
-            d = pow(10, d / 20);
+            d = ff_exp10(d / 20);
             next += 2;
         } else if (*next >= 'E' && *next <= 'z') {
-            int e= si_prefixes[*next - 'E'];
+            int e= si_prefixes[*next - 'E'].exp;
             if (e) {
                 if (next[1] == 'i') {
-                    d*= pow( 2, e/0.3);
+                    d*= si_prefixes[*next - 'E'].bin_val;
                     next+=2;
                 } else {
-                    d*= pow(10, e);
+                    d*= si_prefixes[*next - 'E'].dec_val;
                     next++;
                 }
             }
@@ -244,7 +249,7 @@ static double eval_expr(Parser *p, AVExpr *e)
             double x_max = eval_expr(p, e->param[1]);
             for(i=-1; i<1024; i++) {
                 if(i<255) {
-                    p->var[0] = av_reverse[i&255]*x_max/255;
+                    p->var[0] = ff_reverse[i&255]*x_max/255;
                 } else {
                     p->var[0] = x_max*pow(0.9, i-255);
                     if (i&1) p->var[0] *= -1;
@@ -298,7 +303,7 @@ static double eval_expr(Parser *p, AVExpr *e)
                 case e_add: return e->value * (d + d2);
                 case e_last:return e->value * d2;
                 case e_st : return e->value * (p->var[av_clip(d, 0, VARS-1)]= d2);
-                case e_hypot:return e->value * (sqrt(d*d + d2*d2));
+                case e_hypot:return e->value * hypot(d, d2);
                 case e_bitand: return isnan(d) || isnan(d2) ? NAN : e->value * ((long int)d & (long int)d2);
                 case e_bitor:  return isnan(d) || isnan(d2) ? NAN : e->value * ((long int)d | (long int)d2);
             }
diff --git a/libavutil/eval.h b/libavutil/eval.h
index 6159b0fe..dacd22b9 100644
--- a/libavutil/eval.h
+++ b/libavutil/eval.h
@@ -102,7 +102,7 @@ void av_expr_free(AVExpr *e);
  * @param numstr a string representing a number, may contain one of
  * the International System number postfixes, for example 'K', 'M',
  * 'G'. If 'i' is appended after the postfix, powers of 2 are used
- * instead of powers of 10. The 'B' postfix multiplies the value for
+ * instead of powers of 10. The 'B' postfix multiplies the value by
  * 8, and can be appended after another postfix or used alone. This
  * allows using for example 'KB', 'MiB', 'G' and 'B' as postfix.
  * @param tail if non-NULL puts here the pointer to the char next
diff --git a/libavutil/fifo.c b/libavutil/fifo.c
index f2fe93de..7bd48a22 100644
--- a/libavutil/fifo.c
+++ b/libavutil/fifo.c
@@ -148,6 +148,68 @@ int av_fifo_generic_write(AVFifoBuffer *f, void *src, int size,
     return total - size;
 }
 
+int av_fifo_generic_peek_at(AVFifoBuffer *f, void *dest, int offset, int buf_size, void (*func)(void*, void*, int))
+{
+    uint8_t *rptr = f->rptr;
+
+    av_assert2(offset >= 0);
+
+    /*
+     * *ndx are indexes modulo 2^32, they are intended to overflow,
+     * to handle *ndx greater than 4gb.
+     */
+    av_assert2(buf_size + (unsigned)offset <= f->wndx - f->rndx);
+
+    if (offset >= f->end - rptr)
+        rptr += offset - (f->end - f->buffer);
+    else
+        rptr += offset;
+
+    while (buf_size > 0) {
+        int len;
+
+        if (rptr >= f->end)
+            rptr -= f->end - f->buffer;
+
+        len = FFMIN(f->end - rptr, buf_size);
+        if (func)
+            func(dest, rptr, len);
+        else {
+            memcpy(dest, rptr, len);
+            dest = (uint8_t *)dest + len;
+        }
+
+        buf_size -= len;
+        rptr     += len;
+    }
+
+    return 0;
+}
+
+int av_fifo_generic_peek(AVFifoBuffer *f, void *dest, int buf_size,
+                         void (*func)(void *, void *, int))
+{
+// Read memory barrier needed for SMP here in theory
+    uint8_t *rptr = f->rptr;
+
+    do {
+        int len = FFMIN(f->end - rptr, buf_size);
+        if (func)
+            func(dest, rptr, len);
+        else {
+            memcpy(dest, rptr, len);
+            dest = (uint8_t *)dest + len;
+        }
+// memory barrier needed for SMP here in theory
+        rptr += len;
+        if (rptr >= f->end)
+            rptr -= f->end - f->buffer;
+        buf_size -= len;
+    } while (buf_size > 0);
+
+    return 0;
+}
+
 int av_fifo_generic_read(AVFifoBuffer *f, void *dest, int buf_size,
                          void (*func)(void *, void *, int))
 {
@@ -197,6 +259,14 @@ int main(void)
     }
     printf("\n");
 
+    /* peek_at at FIFO */
+    n = av_fifo_size(fifo) / sizeof(int);
+    for (i = 0; i < n; i++) {
+        av_fifo_generic_peek_at(fifo, &j, i * sizeof(int), sizeof(j), NULL);
+        printf("%d: %d\n", i, j);
+    }
+    printf("\n");
+
     /* read data */
     for (i = 0; av_fifo_size(fifo) >= sizeof(int); i++) {
         av_fifo_generic_read(fifo, &j, sizeof(int), NULL);
@@ -204,6 +274,21 @@ int main(void)
     }
     printf("\n");
 
+    /* test *ndx overflow */
+    av_fifo_reset(fifo);
+    fifo->rndx = fifo->wndx = ~(uint32_t)0 - 5;
+
+    /* fill data */
+    for (i = 0; av_fifo_space(fifo) >= sizeof(int); i++)
+        av_fifo_generic_write(fifo, &i, sizeof(int), NULL);
+
+    /* peek_at at FIFO */
+    n = av_fifo_size(fifo) / sizeof(int);
+    for (i = 0; i < n; i++) {
+        av_fifo_generic_peek_at(fifo, &j, i * sizeof(int), sizeof(j), NULL);
+        printf("%d: %d\n", i, j);
+    }
+
     av_fifo_free(fifo);
 
     return 0;
diff --git a/libavutil/fifo.h b/libavutil/fifo.h
index f3bdcbce..dc7bc6f0 100644
--- a/libavutil/fifo.h
+++ b/libavutil/fifo.h
@@ -83,6 +83,27 @@ int av_fifo_size(const AVFifoBuffer *f);
  */
 int av_fifo_space(const AVFifoBuffer *f);
 
+/**
+ * Feed data at specific position from an AVFifoBuffer to a user-supplied callback.
+ * Similar as av_fifo_gereric_read but without discarding data.
+ * @param f AVFifoBuffer to read from
+ * @param offset offset from current read position
+ * @param buf_size number of bytes to read
+ * @param func generic read function
+ * @param dest data destination
+ */
+int av_fifo_generic_peek_at(AVFifoBuffer *f, void *dest, int offset, int buf_size, void (*func)(void*, void*, int));
+
+/**
+ * Feed data from an AVFifoBuffer to a user-supplied callback.
+ * Similar as av_fifo_gereric_read but without discarding data.
+ * @param f AVFifoBuffer to read from
+ * @param buf_size number of bytes to read
+ * @param func generic read function
+ * @param dest data destination
+ */
+int av_fifo_generic_peek(AVFifoBuffer *f, void *dest, int buf_size, void (*func)(void*, void*, int));
+
 /**
  * Feed data from an AVFifoBuffer to a user-supplied callback.
  * @param f AVFifoBuffer to read from
diff --git a/libavutil/file.h b/libavutil/file.h
index 1cae2951..e931be71 100644
--- a/libavutil/file.h
+++ b/libavutil/file.h
@@ -40,6 +40,7 @@
  * @return a non negative number in case of success, a negative value
  * corresponding to an AVERROR error code in case of failure
  */
+av_warn_unused_result
 int av_file_map(const char *filename, uint8_t **bufptr, size_t *size,
                 int log_offset, void *log_ctx);
 
diff --git a/libavutil/file_open.c b/libavutil/file_open.c
index 3f9a67c3..9e761279 100644
--- a/libavutil/file_open.c
+++ b/libavutil/file_open.c
@@ -77,6 +77,9 @@ int avpriv_open(const char *filename, int flags, ...)
 #ifdef O_CLOEXEC
     flags |= O_CLOEXEC;
 #endif
+#ifdef O_NOINHERIT
+    flags |= O_NOINHERIT;
+#endif
 
     fd = open(filename, flags, mode);
 #if HAVE_FCNTL
diff --git a/libavutil/fixed_dsp.c b/libavutil/fixed_dsp.c
index 8b25156c..8c018581 100644
--- a/libavutil/fixed_dsp.c
+++ b/libavutil/fixed_dsp.c
@@ -121,7 +121,7 @@ static void vector_fmul_c(int *dst, const int *src0, const int *src1, int len)
     }
 }
 
-static int ff_scalarproduct_fixed_c(const int *v1, const int *v2, int len)
+static int scalarproduct_fixed_c(const int *v1, const int *v2, int len)
 {
     /** p is initialized with 0x40000000 so that the proper rounding will occur
       * at the end */
@@ -158,7 +158,10 @@ AVFixedDSPContext * avpriv_alloc_fixed_dsp(int bit_exact)
     fdsp->vector_fmul_add = vector_fmul_add_c;
     fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
     fdsp->butterflies_fixed = butterflies_fixed_c;
-    fdsp->scalarproduct_fixed = ff_scalarproduct_fixed_c;
+    fdsp->scalarproduct_fixed = scalarproduct_fixed_c;
+
+    if (ARCH_X86)
+        ff_fixed_dsp_init_x86(fdsp);
 
     return fdsp;
 }
diff --git a/libavutil/fixed_dsp.h b/libavutil/fixed_dsp.h
index 03987add..f554cb50 100644
--- a/libavutil/fixed_dsp.h
+++ b/libavutil/fixed_dsp.h
@@ -161,6 +161,8 @@ typedef struct AVFixedDSPContext {
  */
 AVFixedDSPContext * avpriv_alloc_fixed_dsp(int strict);
 
+void ff_fixed_dsp_init_x86(AVFixedDSPContext *fdsp);
+
 /**
  * Calculate the square root
  *
diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 467d7a74..49e0ae7f 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -116,8 +116,12 @@ float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
     return p;
 }
 
-av_cold void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
+av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
 {
+    AVFloatDSPContext *fdsp = av_mallocz(sizeof(AVFloatDSPContext));
+    if (!fdsp)
+        return NULL;
+
     fdsp->vector_fmul = vector_fmul_c;
     fdsp->vector_fmac_scalar = vector_fmac_scalar_c;
     fdsp->vector_fmul_scalar = vector_fmul_scalar_c;
@@ -138,14 +142,7 @@ av_cold void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
         ff_float_dsp_init_x86(fdsp);
     if (ARCH_MIPS)
         ff_float_dsp_init_mips(fdsp);
-}
-
-av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
-{
-    AVFloatDSPContext *ret = av_mallocz(sizeof(AVFloatDSPContext));
-    if (ret)
-        avpriv_float_dsp_init(ret, bit_exact);
-    return ret;
+    return fdsp;
 }
 
 
@@ -386,7 +383,7 @@ int main(int argc, char **argv)
 {
     int ret = 0, seeded = 0;
     uint32_t seed;
-    AVFloatDSPContext fdsp, cdsp;
+    AVFloatDSPContext *fdsp, *cdsp;
     AVLFG lfg;
 
     LOCAL_ALIGNED(32, float, src0, [LEN]);
@@ -421,6 +418,15 @@ int main(int argc, char **argv)
 
     av_log(NULL, AV_LOG_INFO, "float_dsp-test: %s %u\n", seeded ? "seed" : "random seed", seed);
 
+    fdsp = avpriv_float_dsp_alloc(1);
+    av_force_cpu_flags(0);
+    cdsp = avpriv_float_dsp_alloc(1);
+
+    if (!fdsp || !cdsp) {
+        ret = 1;
+        goto end;
+    }
+
     av_lfg_init(&lfg, seed);
 
     fill_float_array(&lfg, src0, LEN);
@@ -430,29 +436,28 @@ int main(int argc, char **argv)
     fill_double_array(&lfg, dbl_src0, LEN);
     fill_double_array(&lfg, dbl_src1, LEN);
 
-    avpriv_float_dsp_init(&fdsp, 1);
-    av_set_cpu_flags_mask(0);
-    avpriv_float_dsp_init(&cdsp, 1);
-
-    if (test_vector_fmul(&fdsp, &cdsp, src0, src1))
+    if (test_vector_fmul(fdsp, cdsp, src0, src1))
         ret -= 1 << 0;
-    if (test_vector_fmac_scalar(&fdsp, &cdsp, src2, src0, src1[0]))
+    if (test_vector_fmac_scalar(fdsp, cdsp, src2, src0, src1[0]))
         ret -= 1 << 1;
-    if (test_vector_fmul_scalar(&fdsp, &cdsp, src0, src1[0]))
+    if (test_vector_fmul_scalar(fdsp, cdsp, src0, src1[0]))
         ret -= 1 << 2;
-    if (test_vector_fmul_window(&fdsp, &cdsp, src0, src1, src2))
+    if (test_vector_fmul_window(fdsp, cdsp, src0, src1, src2))
         ret -= 1 << 3;
-    if (test_vector_fmul_add(&fdsp, &cdsp, src0, src1, src2))
+    if (test_vector_fmul_add(fdsp, cdsp, src0, src1, src2))
         ret -= 1 << 4;
-    if (test_vector_fmul_reverse(&fdsp, &cdsp, src0, src1))
+    if (test_vector_fmul_reverse(fdsp, cdsp, src0, src1))
         ret -= 1 << 5;
-    if (test_butterflies_float(&fdsp, &cdsp, src0, src1))
+    if (test_butterflies_float(fdsp, cdsp, src0, src1))
         ret -= 1 << 6;
-    if (test_scalarproduct_float(&fdsp, &cdsp, src0, src1))
+    if (test_scalarproduct_float(fdsp, cdsp, src0, src1))
         ret -= 1 << 7;
-    if (test_vector_dmul_scalar(&fdsp, &cdsp, dbl_src0, dbl_src1[0]))
+    if (test_vector_dmul_scalar(fdsp, cdsp, dbl_src0, dbl_src1[0]))
         ret -= 1 << 8;
 
+end:
+    av_freep(&fdsp);
+    av_freep(&cdsp);
     return ret;
 }
 
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index ad8e3eb1..d1be38f9 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -170,15 +170,6 @@ typedef struct AVFloatDSPContext {
  */
 float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
 
-/**
- * Initialize a float DSP context.
- *
- * @param fdsp    float DSP context
- * @param strict  setting to non-zero avoids using functions which may not be IEEE-754 compliant
- */
-void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int strict);
-
-
 void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
 void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 4596927f..033f0134 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -46,29 +46,35 @@ MAKE_ACCESSORS(AVFrame, frame, enum AVColorRange, color_range)
 
 AVDictionary **avpriv_frame_get_metadatap(AVFrame *frame) {return &frame->metadata;};
 
+#if FF_API_FRAME_QP
 int av_frame_set_qp_table(AVFrame *f, AVBufferRef *buf, int stride, int qp_type)
 {
     av_buffer_unref(&f->qp_table_buf);
 
     f->qp_table_buf = buf;
 
+FF_DISABLE_DEPRECATION_WARNINGS
     f->qscale_table = buf->data;
     f->qstride      = stride;
     f->qscale_type  = qp_type;
+FF_ENABLE_DEPRECATION_WARNINGS
 
     return 0;
 }
 
 int8_t *av_frame_get_qp_table(AVFrame *f, int *stride, int *type)
 {
+FF_DISABLE_DEPRECATION_WARNINGS
     *stride = f->qstride;
     *type   = f->qscale_type;
+FF_ENABLE_DEPRECATION_WARNINGS
 
     if (!f->qp_table_buf)
         return NULL;
 
     return f->qp_table_buf->data;
 }
+#endif
 
 const char *av_get_colorspace_name(enum AVColorSpace val)
 {
@@ -182,7 +188,7 @@ static int get_video_buffer(AVFrame *frame, int align)
     for (i = 0; i < 4 && frame->linesize[i]; i++) {
         int h = FFALIGN(frame->height, 32);
         if (i == 1 || i == 2)
-            h = FF_CEIL_RSHIFT(h, desc->log2_chroma_h);
+            h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
 
         frame->buf[i] = av_buffer_alloc(frame->linesize[i] * h + 16 + 16/*STRIDE_ALIGN*/ - 1);
         if (!frame->buf[i])
@@ -192,7 +198,7 @@ static int get_video_buffer(AVFrame *frame, int align)
     }
     if (desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
         av_buffer_unref(&frame->buf[1]);
-        frame->buf[1] = av_buffer_alloc(1024);
+        frame->buf[1] = av_buffer_alloc(AVPALETTE_SIZE);
         if (!frame->buf[1])
             goto fail;
         frame->data[1] = frame->buf[1]->data;
@@ -289,9 +295,6 @@ static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
     dst->palette_has_changed    = src->palette_has_changed;
     dst->sample_rate            = src->sample_rate;
     dst->opaque                 = src->opaque;
-#if FF_API_AVFRAME_LAVC
-    dst->type                   = src->type;
-#endif
     dst->pkt_pts                = src->pkt_pts;
     dst->pkt_dts                = src->pkt_dts;
     dst->pkt_pos                = src->pkt_pos;
@@ -312,7 +315,11 @@ static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
 
     av_dict_copy(&dst->metadata, src->metadata, 0);
 
+#if FF_API_ERROR_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
     memcpy(dst->error, src->error, sizeof(dst->error));
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     for (i = 0; i < src->nb_side_data; i++) {
         const AVFrameSideData *sd_src = src->side_data[i];
@@ -345,9 +352,12 @@ static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
         av_dict_copy(&sd_dst->metadata, sd_src->metadata, 0);
     }
 
+#if FF_API_FRAME_QP
+FF_DISABLE_DEPRECATION_WARNINGS
     dst->qscale_table = NULL;
     dst->qstride      = 0;
     dst->qscale_type  = 0;
+    av_buffer_unref(&dst->qp_table_buf);
     if (src->qp_table_buf) {
         dst->qp_table_buf = av_buffer_ref(src->qp_table_buf);
         if (dst->qp_table_buf) {
@@ -356,6 +366,8 @@ static int frame_copy_props(AVFrame *dst, const AVFrame *src, int force_copy)
             dst->qscale_type  = src->qscale_type;
         }
     }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
     return 0;
 }
@@ -463,6 +475,9 @@ void av_frame_unref(AVFrame *frame)
 {
     int i;
 
+    if (!frame)
+        return;
+
     wipe_side_data(frame);
 
     for (i = 0; i < FF_ARRAY_ELEMS(frame->buf); i++)
@@ -471,7 +486,9 @@ void av_frame_unref(AVFrame *frame)
         av_buffer_unref(&frame->extended_buf[i]);
     av_freep(&frame->extended_buf);
     av_dict_free(&frame->metadata);
+#if FF_API_FRAME_QP
     av_buffer_unref(&frame->qp_table_buf);
+#endif
 
     get_frame_defaults(frame);
 }
@@ -712,7 +729,12 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type)
     case AV_FRAME_DATA_DOWNMIX_INFO:    return "Metadata relevant to a downmix procedure";
     case AV_FRAME_DATA_REPLAYGAIN:      return "AVReplayGain";
     case AV_FRAME_DATA_DISPLAYMATRIX:   return "3x3 displaymatrix";
+    case AV_FRAME_DATA_AFD:             return "Active format description";
     case AV_FRAME_DATA_MOTION_VECTORS:  return "Motion vectors";
+    case AV_FRAME_DATA_SKIP_SAMPLES:    return "Skip samples";
+    case AV_FRAME_DATA_AUDIO_SERVICE_TYPE:          return "Audio service type";
+    case AV_FRAME_DATA_MASTERING_DISPLAY_METADATA:  return "Mastering display metadata";
+    case AV_FRAME_DATA_GOP_TIMECODE:                return "GOP timecode";
     }
     return NULL;
 }
diff --git a/libavutil/frame.h b/libavutil/frame.h
index e910b517..8dc40490 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -106,12 +106,22 @@ enum AVFrameSideDataType {
      * @endcode
      */
     AV_FRAME_DATA_SKIP_SAMPLES,
-
     /**
      * This side data must be associated with an audio frame and corresponds to
      * enum AVAudioServiceType defined in avcodec.h.
      */
     AV_FRAME_DATA_AUDIO_SERVICE_TYPE,
+    /**
+     * Mastering display metadata associated with a video frame. The payload is
+     * an AVMasteringDisplayMetadata type and contains information about the
+     * mastering display color volume.
+     */
+    AV_FRAME_DATA_MASTERING_DISPLAY_METADATA,
+    /**
+     * The GOP timecode in 25 bit timecode format. Data format is 64-bit integer.
+     * This is set on the first frame of a GOP that has a temporal reference of 0.
+     */
+    AV_FRAME_DATA_GOP_TIMECODE
 };
 
 enum AVActiveFormatDescription {
@@ -241,11 +251,6 @@ typedef struct AVFrame {
      */
     enum AVPictureType pict_type;
 
-#if FF_API_AVFRAME_LAVC
-    attribute_deprecated
-    uint8_t *base[AV_NUM_DATA_POINTERS];
-#endif
-
     /**
      * Sample aspect ratio for the video frame, 0/1 if unknown/unspecified.
      */
@@ -282,77 +287,17 @@ typedef struct AVFrame {
      */
     int quality;
 
-#if FF_API_AVFRAME_LAVC
-    attribute_deprecated
-    int reference;
-
-    /**
-     * QP table
-     */
-    attribute_deprecated
-    int8_t *qscale_table;
-    /**
-     * QP store stride
-     */
-    attribute_deprecated
-    int qstride;
-
-    attribute_deprecated
-    int qscale_type;
-
-    /**
-     * mbskip_table[mb]>=1 if MB didn't change
-     * stride= mb_width = (width+15)>>4
-     */
-    attribute_deprecated
-    uint8_t *mbskip_table;
-
-    /**
-     * motion vector table
-     * @code
-     * example:
-     * int mv_sample_log2= 4 - motion_subsample_log2;
-     * int mb_width= (width+15)>>4;
-     * int mv_stride= (mb_width << mv_sample_log2) + 1;
-     * motion_val[direction][x + y*mv_stride][0->mv_x, 1->mv_y];
-     * @endcode
-     */
-    int16_t (*motion_val[2])[2];
-
-    /**
-     * macroblock type table
-     * mb_type_base + mb_width + 2
-     */
-    attribute_deprecated
-    uint32_t *mb_type;
-
-    /**
-     * DCT coefficients
-     */
-    attribute_deprecated
-    short *dct_coeff;
-
-    /**
-     * motion reference frame index
-     * the order in which these are stored can depend on the codec.
-     */
-    attribute_deprecated
-    int8_t *ref_index[2];
-#endif
-
     /**
      * for some private data of the user
      */
     void *opaque;
 
+#if FF_API_ERROR_FRAME
     /**
-     * error
+     * @deprecated unused
      */
-    uint64_t error[AV_NUM_DATA_POINTERS];
-
-#if FF_API_AVFRAME_LAVC
     attribute_deprecated
-    int type;
+    uint64_t error[AV_NUM_DATA_POINTERS];
 #endif
 
     /**
@@ -376,17 +321,6 @@ typedef struct AVFrame {
      */
     int palette_has_changed;
 
-#if FF_API_AVFRAME_LAVC
-    attribute_deprecated
-    int buffer_hints;
-
-    /**
-     * Pan scan.
-     */
-    attribute_deprecated
-    struct AVPanScan *pan_scan;
-#endif
-
     /**
      * reordered opaque 64bit (generally an integer or a double precision float
      * PTS but can be anything).
@@ -398,24 +332,6 @@ typedef struct AVFrame {
      */
     int64_t reordered_opaque;
 
-#if FF_API_AVFRAME_LAVC
-    /**
-     * @deprecated this field is unused
-     */
-    attribute_deprecated void *hwaccel_picture_private;
-
-    attribute_deprecated
-    struct AVCodecContext *owner;
-    attribute_deprecated
-    void *thread_opaque;
-
-    /**
-     * log2 of the size of the block which a single vector in motion_val represents:
-     * (4->16x16, 3->8x8, 2-> 4x4, 1-> 2x2)
-     */
-    uint8_t motion_subsample_log2;
-#endif
-
     /**
      * Sample rate of the audio data.
      */
@@ -507,7 +423,7 @@ typedef struct AVFrame {
 
     /**
      * frame timestamp estimated using various heuristics, in stream time base
-     * Code outside libavcodec should access this field using:
+     * Code outside libavutil should access this field using:
      * av_frame_get_best_effort_timestamp(frame)
      * - encoding: unused
      * - decoding: set by libavcodec, read by user.
@@ -516,7 +432,7 @@ typedef struct AVFrame {
 
     /**
      * reordered pos from the last AVPacket that has been input into the decoder
-     * Code outside libavcodec should access this field using:
+     * Code outside libavutil should access this field using:
      * av_frame_get_pkt_pos(frame)
      * - encoding: unused
      * - decoding: Read by user.
@@ -526,7 +442,7 @@ typedef struct AVFrame {
     /**
      * duration of the corresponding packet, expressed in
      * AVStream->time_base units, 0 if unknown.
-     * Code outside libavcodec should access this field using:
+     * Code outside libavutil should access this field using:
      * av_frame_get_pkt_duration(frame)
      * - encoding: unused
      * - decoding: Read by user.
@@ -535,7 +451,7 @@ typedef struct AVFrame {
 
     /**
      * metadata.
-     * Code outside libavcodec should access this field using:
+     * Code outside libavutil should access this field using:
      * av_frame_get_metadata(frame)
      * - encoding: Set by user.
      * - decoding: Set by libavcodec.
@@ -546,7 +462,7 @@ typedef struct AVFrame {
      * decode error flags of the frame, set to a combination of
      * FF_DECODE_ERROR_xxx flags if the decoder produced a frame, but there
      * were errors during the decoding.
-     * Code outside libavcodec should access this field using:
+     * Code outside libavutil should access this field using:
      * av_frame_get_decode_error_flags(frame)
      * - encoding: unused
      * - decoding: set by libavcodec, read by user.
@@ -557,7 +473,7 @@ typedef struct AVFrame {
 
     /**
      * number of audio channels, only used for audio.
-     * Code outside libavcodec should access this field using:
+     * Code outside libavutil should access this field using:
      * av_frame_get_channels(frame)
      * - encoding: unused
      * - decoding: Read by user.
@@ -574,16 +490,34 @@ typedef struct AVFrame {
      */
     int pkt_size;
 
+#if FF_API_FRAME_QP
+    /**
+     * QP table
+     * Not to be accessed directly from outside libavutil
+     */
+    attribute_deprecated
+    int8_t *qscale_table;
+    /**
+     * QP store stride
+     * Not to be accessed directly from outside libavutil
+     */
+    attribute_deprecated
+    int qstride;
+
+    attribute_deprecated
+    int qscale_type;
+
     /**
      * Not to be accessed directly from outside libavutil
      */
     AVBufferRef *qp_table_buf;
+#endif
 } AVFrame;
 
 /**
  * Accessors for some AVFrame fields.
  * The position of these field in the structure is not part of the ABI,
- * they should not be accessed directly outside libavcodec.
+ * they should not be accessed directly outside libavutil.
  */
 int64_t av_frame_get_best_effort_timestamp(const AVFrame *frame);
 void    av_frame_set_best_effort_timestamp(AVFrame *frame, int64_t val);
@@ -604,8 +538,10 @@ void    av_frame_set_decode_error_flags   (AVFrame *frame, int     val);
 int     av_frame_get_pkt_size(const AVFrame *frame);
 void    av_frame_set_pkt_size(AVFrame *frame, int val);
 AVDictionary **avpriv_frame_get_metadatap(AVFrame *frame);
+#if FF_API_FRAME_QP
 int8_t *av_frame_get_qp_table(AVFrame *f, int *stride, int *type);
 int av_frame_set_qp_table(AVFrame *f, AVBufferRef *buf, int stride, int type);
+#endif
 enum AVColorSpace av_frame_get_colorspace(const AVFrame *frame);
 void    av_frame_set_colorspace(AVFrame *frame, enum AVColorSpace val);
 enum AVColorRange av_frame_get_color_range(const AVFrame *frame);
diff --git a/libavutil/hmac.c b/libavutil/hmac.c
index b63d1b25..3e11509a 100644
--- a/libavutil/hmac.c
+++ b/libavutil/hmac.c
@@ -207,7 +207,8 @@ static void test(AVHMAC *hmac, const uint8_t *key, int keylen,
 int main(void)
 {
     uint8_t key1[20], key3[131], data3[50];
-    enum AVHMACType i = AV_HMAC_SHA224;
+    AVHMAC *hmac;
+    enum AVHMACType i;
     static const uint8_t key2[]  = "Jefe";
     static const uint8_t data1[] = "Hi There";
     static const uint8_t data2[] = "what do ya want for nothing?";
@@ -216,34 +217,39 @@ int main(void)
     static const uint8_t data6[] = "This is a test using a larger than block-size key and a larger "
                             "than block-size data. The key needs to be hashed before being used"
                             " by the HMAC algorithm.";
-    AVHMAC *hmac = av_hmac_alloc(AV_HMAC_MD5);
-    if (!hmac)
-        return 1;
     memset(key1, 0x0b, sizeof(key1));
     memset(key3, 0xaa, sizeof(key3));
     memset(data3, 0xdd, sizeof(data3));
-    // RFC 2202 test vectors
-    test(hmac, key1, 16, data1, sizeof(data1));
-    test(hmac, key2, sizeof(key2), data2, sizeof(data2));
-    test(hmac, key3, 16, data3, sizeof(data3));
-    test(hmac, key3, 80, data4, sizeof(data4));
-    test(hmac, key3, 80, data5, sizeof(data5));
-    av_hmac_free(hmac);
 
-    /* SHA-1 */
-    hmac = av_hmac_alloc(AV_HMAC_SHA1);
-    if (!hmac)
-        return 1;
-    // RFC 2202 test vectors
-    test(hmac, key1, sizeof(key1), data1, sizeof(data1));
-    test(hmac, key2, sizeof(key2), data2, sizeof(data2));
-    test(hmac, key3, 20, data3, sizeof(data3));
-    test(hmac, key3, 80, data4, sizeof(data4));
-    test(hmac, key3, 80, data5, sizeof(data5));
-    av_hmac_free(hmac);
+    /* MD5, SHA-1 */
+    for (i = AV_HMAC_MD5; i <= AV_HMAC_SHA1; i++) {
+        hmac = av_hmac_alloc(i);
+        if (!hmac)
+            return 1;
+        // RFC 2202 test vectors
+        test(hmac, key1, hmac->hashlen, data1, sizeof(data1));
+        test(hmac, key2, sizeof(key2),  data2, sizeof(data2));
+        test(hmac, key3, hmac->hashlen, data3, sizeof(data3));
+        test(hmac, key3, 80,            data4, sizeof(data4));
+        test(hmac, key3, 80,            data5, sizeof(data5));
+        av_hmac_free(hmac);
+    }
 
     /* SHA-2 */
-    while (i <= AV_HMAC_SHA512) {
+    for (i = AV_HMAC_SHA224; i <= AV_HMAC_SHA256; i++) {
+        hmac = av_hmac_alloc(i);
+        if (!hmac)
+            return 1;
+        // RFC 4231 test vectors
+        test(hmac, key1, sizeof(key1), data1, sizeof(data1));
+        test(hmac, key2, sizeof(key2), data2, sizeof(data2));
+        test(hmac, key3, 20,           data3, sizeof(data3));
+        test(hmac, key3, sizeof(key3), data4, sizeof(data4));
+        test(hmac, key3, sizeof(key3), data6, sizeof(data6));
+        av_hmac_free(hmac);
+    }
+
+    for (i = AV_HMAC_SHA384; i <= AV_HMAC_SHA512; i++) {
         hmac = av_hmac_alloc(i);
         if (!hmac)
             return 1;
@@ -254,7 +260,6 @@ int main(void)
         test(hmac, key3, sizeof(key3), data4, sizeof(data4));
         test(hmac, key3, sizeof(key3), data6, sizeof(data6));
         av_hmac_free(hmac);
-        i++;
     }
     return 0;
 }
diff --git a/libavutil/hmac.h b/libavutil/hmac.h
index d36d4de1..576a0a4f 100644
--- a/libavutil/hmac.h
+++ b/libavutil/hmac.h
@@ -23,6 +23,7 @@
 
 #include <stdint.h>
 
+#include "version.h"
 /**
  * @defgroup lavu_hmac HMAC
  * @ingroup lavu_crypto
@@ -32,9 +33,9 @@
 enum AVHMACType {
     AV_HMAC_MD5,
     AV_HMAC_SHA1,
-    AV_HMAC_SHA224 = 10,
+    AV_HMAC_SHA224,
     AV_HMAC_SHA256,
-    AV_HMAC_SHA384,
+    AV_HMAC_SHA384 = 12,
     AV_HMAC_SHA512,
 };
 
diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c
index ef0e6715..14744822 100644
--- a/libavutil/imgutils.c
+++ b/libavutil/imgutils.c
@@ -41,8 +41,8 @@ void av_image_fill_max_pixsteps(int max_pixsteps[4], int max_pixstep_comps[4],
 
     for (i = 0; i < 4; i++) {
         const AVComponentDescriptor *comp = &(pixdesc->comp[i]);
-        if ((comp->step_minus1+1) > max_pixsteps[comp->plane]) {
-            max_pixsteps[comp->plane] = comp->step_minus1+1;
+        if (comp->step > max_pixsteps[comp->plane]) {
+            max_pixsteps[comp->plane] = comp->step;
             if (max_pixstep_comps)
                 max_pixstep_comps[comp->plane] = i;
         }
@@ -78,7 +78,7 @@ int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane)
     int max_step     [4];       /* max pixel step for each plane */
     int max_step_comp[4];       /* the component for each plane which has the max pixel step */
 
-    if ((unsigned)pix_fmt >= AV_PIX_FMT_NB || desc->flags & AV_PIX_FMT_FLAG_HWACCEL)
+    if (!desc || desc->flags & AV_PIX_FMT_FLAG_HWACCEL)
         return AVERROR(EINVAL);
 
     av_image_fill_max_pixsteps(max_step, max_step_comp, desc);
@@ -125,7 +125,6 @@ int av_image_fill_pointers(uint8_t *data[4], enum AVPixelFormat pix_fmt, int hei
 
     if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
         desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
-        size[0] = (size[0] + 3) & ~3;
         data[1] = ptr + size[0]; /* palette is stored here as 256 32 bits words */
         return size[0] + 256 * 4;
     }
@@ -216,8 +215,13 @@ int av_image_alloc(uint8_t *pointers[4], int linesizes[4],
         av_free(buf);
         return ret;
     }
-    if (desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
+    if (desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) {
         avpriv_set_systematic_pal2((uint32_t*)pointers[1], pix_fmt);
+        if (align < 4) {
+            av_log(NULL, AV_LOG_ERROR, "Formats with a palette require a minimum alignment of 4\n");
+            return AVERROR(EINVAL);
+        }
+    }
 
     if ((desc->flags & AV_PIX_FMT_FLAG_PAL ||
          desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) &&
@@ -236,11 +240,21 @@ typedef struct ImgUtils {
     void *log_ctx;
 } ImgUtils;
 
-static const AVClass imgutils_class = { "IMGUTILS", av_default_item_name, NULL, LIBAVUTIL_VERSION_INT, offsetof(ImgUtils, log_offset), offsetof(ImgUtils, log_ctx) };
+static const AVClass imgutils_class = {
+    .class_name = "IMGUTILS",
+    .item_name  = av_default_item_name,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .log_level_offset_offset   = offsetof(ImgUtils, log_offset),
+    .parent_log_context_offset = offsetof(ImgUtils, log_ctx),
+};
 
 int av_image_check_size(unsigned int w, unsigned int h, int log_offset, void *log_ctx)
 {
-    ImgUtils imgutils = { &imgutils_class, log_offset, log_ctx };
+    ImgUtils imgutils = {
+        .class      = &imgutils_class,
+        .log_offset = log_offset,
+        .log_ctx    = log_ctx,
+    };
 
     if ((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/8)
         return 0;
@@ -315,7 +329,7 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4],
                 return;
             }
             if (i == 1 || i == 2) {
-                h = FF_CEIL_RSHIFT(height, desc->log2_chroma_h);
+                h = AV_CEIL_RSHIFT(height, desc->log2_chroma_h);
             }
             av_image_copy_plane(dst_data[i], dst_linesizes[i],
                                 src_data[i], src_linesizes[i],
@@ -360,7 +374,7 @@ int av_image_get_buffer_size(enum AVPixelFormat pix_fmt,
 
     // do not include palette for these pseudo-paletted formats
     if (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
-        return width * height;
+        return FFALIGN(width, align) * height;
 
     return av_image_fill_arrays(data, linesize, NULL, pix_fmt,
                                 width, height, align);
@@ -398,7 +412,8 @@ int av_image_copy_to_buffer(uint8_t *dst, int dst_size,
     }
 
     if (desc->flags & AV_PIX_FMT_FLAG_PAL) {
-        uint32_t *d32 = (uint32_t *)(((size_t)dst + 3) & ~3);
+        uint32_t *d32 = (uint32_t *)dst;
+
         for (i = 0; i<256; i++)
             AV_WL32(d32 + i, AV_RN32(src_data[1] + 4*i));
     }
diff --git a/libavutil/integer.c b/libavutil/integer.c
index 5bcde0dc..6d6855fa 100644
--- a/libavutil/integer.c
+++ b/libavutil/integer.c
@@ -29,6 +29,8 @@
 #include "integer.h"
 #include "avassert.h"
 
+static const AVInteger zero_i;
+
 AVInteger av_add_i(AVInteger a, AVInteger b){
     int i, carry=0;
 
@@ -111,6 +113,12 @@ AVInteger av_mod_i(AVInteger *quot, AVInteger a, AVInteger b){
     AVInteger quot_temp;
     if(!quot) quot = &quot_temp;
 
+    if ((int16_t)a.v[AV_INTEGER_SIZE-1] < 0) {
+        a = av_mod_i(quot, av_sub_i(zero_i, a), b);
+        *quot = av_sub_i(zero_i, *quot);
+        return av_sub_i(zero_i, a);
+    }
+
     av_assert2((int16_t)a.v[AV_INTEGER_SIZE-1] >= 0 && (int16_t)b.v[AV_INTEGER_SIZE-1] >= 0);
     av_assert2(av_log2_i(b)>=0);
 
diff --git a/libavutil/internal.h b/libavutil/internal.h
index 859b9197..44f8c1ee 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -39,6 +39,7 @@
 #include "timer.h"
 #include "cpu.h"
 #include "dict.h"
+#include "macros.h"
 #include "pixfmt.h"
 #include "version.h"
 
@@ -250,9 +251,88 @@ void avpriv_request_sample(void *avc,
 #define SIZE_SPECIFIER "zu"
 #endif
 
+#ifdef DEBUG
+#   define ff_dlog(ctx, ...) av_log(ctx, AV_LOG_DEBUG, __VA_ARGS__)
+#else
+#   define ff_dlog(ctx, ...) do { if (0) av_log(ctx, AV_LOG_DEBUG, __VA_ARGS__); } while (0)
+#endif
+
+/**
+ * Clip and convert a double value into the long long amin-amax range.
+ * This function is needed because conversion of floating point to integers when
+ * it does not fit in the integer's representation does not necessarily saturate
+ * correctly (usually converted to a cvttsd2si on x86) which saturates numbers
+ * > INT64_MAX to INT64_MIN. The standard marks such conversions as undefined
+ * behavior, allowing this sort of mathematically bogus conversions. This provides
+ * a safe alternative that is slower obviously but assures safety and better
+ * mathematical behavior.
+ * @param a value to clip
+ * @param amin minimum value of the clip range
+ * @param amax maximum value of the clip range
+ * @return clipped value
+ */
+static av_always_inline av_const int64_t ff_rint64_clip(double a, int64_t amin, int64_t amax)
+{
+    int64_t res;
+#if defined(HAVE_AV_CONFIG_H) && defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+    if (amin > amax) abort();
+#endif
+    // INT64_MAX+1,INT64_MIN are exactly representable as IEEE doubles
+    // do range checks first
+    if (a >=  9223372036854775808.0)
+        return amax;
+    if (a <= -9223372036854775808.0)
+        return amin;
+
+    // safe to call llrint and clip accordingly
+    res = llrint(a);
+    if (res > amax)
+        return amax;
+    if (res < amin)
+        return amin;
+    return res;
+}
+
+/**
+ * Compute 10^x for floating point values. Note: this function is by no means
+ * "correctly rounded", and is meant as a fast, reasonably accurate approximation.
+ * For instance, maximum relative error for the double precision variant is
+ * ~ 1e-13 for very small and very large values.
+ * This is ~2x faster than GNU libm's approach, which is still off by 2ulp on
+ * some inputs.
+ * @param x exponent
+ * @return 10^x
+ */
+static av_always_inline double ff_exp10(double x)
+{
+    return exp2(M_LOG2_10 * x);
+}
+
+static av_always_inline float ff_exp10f(float x)
+{
+    return exp2f(M_LOG2_10 * x);
+}
+
+/**
+ * Compute x^y for floating point x, y. Note: this function is faster than the
+ * libm variant due to mainly 2 reasons:
+ * 1. It does not handle any edge cases. In particular, this is only guaranteed
+ * to work correctly for x > 0.
+ * 2. It is not as accurate as a standard nearly "correctly rounded" libm variant.
+ * @param x base
+ * @param y exponent
+ * @return x^y
+ */
+static av_always_inline float ff_fast_powf(float x, float y)
+{
+    return expf(logf(x) * y);
+}
+
+
 /**
  * A wrapper for open() setting O_CLOEXEC.
  */
+av_warn_unused_result
 int avpriv_open(const char *filename, int flags, ...);
 
 int avpriv_set_systematic_pal2(uint32_t pal[256], enum AVPixelFormat pix_fmt);
@@ -270,10 +350,8 @@ static av_always_inline av_const int avpriv_mirror(int x, int w)
     return x;
 }
 
-#if FF_API_GET_CHANNEL_LAYOUT_COMPAT
-uint64_t ff_get_channel_layout(const char *name, int compat);
-#endif
-
 void ff_check_pixfmt_descriptors(void);
 
+extern const uint8_t ff_reverse[256];
+
 #endif /* AVUTIL_INTERNAL_H */
diff --git a/libavutil/intmath.c b/libavutil/intmath.c
index 1f725c74..b0c00e1c 100644
--- a/libavutil/intmath.c
+++ b/libavutil/intmath.c
@@ -32,8 +32,3 @@ int av_log2_16bit(unsigned v)
 {
     return ff_log2_16bit(v);
 }
-
-int av_ctz(int v)
-{
-    return ff_ctz(v);
-}
diff --git a/libavutil/intmath.h b/libavutil/intmath.h
index f5ecc77b..9573109e 100644
--- a/libavutil/intmath.h
+++ b/libavutil/intmath.h
@@ -33,11 +33,6 @@
 #   include "x86/intmath.h"
 #endif
 
-/**
- * @addtogroup lavu_internal
- * @{
- */
-
 #if HAVE_FAST_CLZ
 #if AV_GCC_VERSION_AT_LEAST(3,4)
 #ifndef ff_log2
@@ -46,21 +41,13 @@
 #      define ff_log2_16bit av_log2
 #   endif
 #endif /* ff_log2 */
-#elif defined( __INTEL_COMPILER )
-#ifndef ff_log2
-#   define ff_log2(x) (_bit_scan_reverse((x)|1))
-#   ifndef ff_log2_16bit
-#      define ff_log2_16bit av_log2
-#   endif
-#endif /* ff_log2 */
-#endif
 #endif /* AV_GCC_VERSION_AT_LEAST(3,4) */
+#endif
 
 extern const uint8_t ff_log2_tab[256];
 
 #ifndef ff_log2
 #define ff_log2 ff_log2_c
-#if !defined( _MSC_VER )
 static av_always_inline av_const int ff_log2_c(unsigned int v)
 {
     int n = 0;
@@ -76,15 +63,6 @@ static av_always_inline av_const int ff_log2_c(unsigned int v)
 
     return n;
 }
-#else
-static av_always_inline av_const int ff_log2_c(unsigned int v)
-{
-    unsigned long n;
-    _BitScanReverse(&n, v|1);
-    return n;
-}
-#define ff_log2_16bit av_log2
-#endif
 #endif
 
 #ifndef ff_log2_16bit
@@ -105,10 +83,6 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
 #define av_log2       ff_log2
 #define av_log2_16bit ff_log2_16bit
 
-/**
- * @}
- */
-
 /**
  * @addtogroup lavu_math
  * @{
@@ -119,61 +93,71 @@ static av_always_inline av_const int ff_log2_16bit_c(unsigned int v)
 #ifndef ff_ctz
 #define ff_ctz(v) __builtin_ctz(v)
 #endif
-#elif defined( __INTEL_COMPILER )
-#ifndef ff_ctz
-#define ff_ctz(v) _bit_scan_forward(v)
+#ifndef ff_ctzll
+#define ff_ctzll(v) __builtin_ctzll(v)
+#endif
+#ifndef ff_clz
+#define ff_clz(v) __builtin_clz(v)
 #endif
 #endif
 #endif
 
 #ifndef ff_ctz
 #define ff_ctz ff_ctz_c
-#if !defined( _MSC_VER )
+/**
+ * Trailing zero bit count.
+ *
+ * @param v  input value. If v is 0, the result is undefined.
+ * @return   the number of trailing 0-bits
+ */
+/* We use the De-Bruijn method outlined in:
+ * http://supertech.csail.mit.edu/papers/debruijn.pdf. */
 static av_always_inline av_const int ff_ctz_c(int v)
 {
-    int c;
+    static const uint8_t debruijn_ctz32[32] = {
+        0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+        31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+    };
+    return debruijn_ctz32[(uint32_t)((v & -v) * 0x077CB531U) >> 27];
+}
+#endif
 
-    if (v & 0x1)
-        return 0;
+#ifndef ff_ctzll
+#define ff_ctzll ff_ctzll_c
+/* We use the De-Bruijn method outlined in:
+ * http://supertech.csail.mit.edu/papers/debruijn.pdf. */
+static av_always_inline av_const int ff_ctzll_c(long long v)
+{
+    static const uint8_t debruijn_ctz64[64] = {
+        0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
+        62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
+        63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
+        51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
+    };
+    return debruijn_ctz64[(uint64_t)((v & -v) * 0x022FDD63CC95386DU) >> 58];
+}
+#endif
 
-    c = 1;
-    if (!(v & 0xffff)) {
-        v >>= 16;
-        c += 16;
-    }
-    if (!(v & 0xff)) {
-        v >>= 8;
-        c += 8;
-    }
-    if (!(v & 0xf)) {
-        v >>= 4;
-        c += 4;
-    }
-    if (!(v & 0x3)) {
-        v >>= 2;
-        c += 2;
+#ifndef ff_clz
+#define ff_clz ff_clz_c
+static av_always_inline av_const unsigned ff_clz_c(unsigned x)
+{
+    unsigned i = sizeof(x) * 8;
+
+    while (x) {
+        x >>= 1;
+        i--;
     }
-    c -= v & 0x1;
 
-    return c;
-}
-#else
-static av_always_inline av_const int ff_ctz_c( int v )
-{
-    unsigned long c;
-    _BitScanForward(&c, v);
-    return c;
+    return i;
 }
 #endif
-#endif
 
-/**
- * Trailing zero bit count.
- *
- * @param v  input value. If v is 0, the result is undefined.
- * @return   the number of trailing 0-bits
- */
-int av_ctz(int v);
+#if AV_GCC_VERSION_AT_LEAST(3,4)
+#ifndef av_parity
+#define av_parity __builtin_parity
+#endif
+#endif
 
 /**
  * @}
diff --git a/libavutil/libm.h b/libavutil/libm.h
index 6c17b287..a8199623 100644
--- a/libavutil/libm.h
+++ b/libavutil/libm.h
@@ -1,4 +1,5 @@
 /*
+ * erf function: Copyright (c) 2006 John Maddock
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -28,6 +29,7 @@
 #include "config.h"
 #include "attributes.h"
 #include "intfloat.h"
+#include "mathematics.h"
 
 #if HAVE_MIPSFPU && HAVE_INLINE_ASM
 #include "libavutil/mips/libm_mips.h"
@@ -36,45 +38,254 @@
 #if !HAVE_ATANF
 #undef atanf
 #define atanf(x) ((float)atan(x))
-#endif
+#endif /* HAVE_ATANF */
 
 #if !HAVE_ATAN2F
 #undef atan2f
 #define atan2f(y, x) ((float)atan2(y, x))
-#endif
+#endif /* HAVE_ATAN2F */
 
 #if !HAVE_POWF
 #undef powf
 #define powf(x, y) ((float)pow(x, y))
-#endif
+#endif /* HAVE_POWF */
 
 #if !HAVE_CBRT
 static av_always_inline double cbrt(double x)
 {
     return x < 0 ? -pow(-x, 1.0 / 3.0) : pow(x, 1.0 / 3.0);
 }
-#endif
+#endif /* HAVE_CBRT */
 
 #if !HAVE_CBRTF
 static av_always_inline float cbrtf(float x)
 {
     return x < 0 ? -powf(-x, 1.0 / 3.0) : powf(x, 1.0 / 3.0);
 }
-#endif
+#endif /* HAVE_CBRTF */
+
+#if !HAVE_COPYSIGN
+static av_always_inline double copysign(double x, double y)
+{
+    uint64_t vx = av_double2int(x);
+    uint64_t vy = av_double2int(y);
+    return av_int2double((vx & UINT64_C(0x7fffffffffffffff)) | (vy & UINT64_C(0x8000000000000000)));
+}
+#endif /* HAVE_COPYSIGN */
 
 #if !HAVE_COSF
 #undef cosf
 #define cosf(x) ((float)cos(x))
+#endif /* HAVE_COSF */
+
+#if !HAVE_ERF
+static inline double ff_eval_poly(const double *coeff, int size, double x) {
+    double sum = coeff[size-1];
+    int i;
+    for (i = size-2; i >= 0; --i) {
+        sum *= x;
+        sum += coeff[i];
+    }
+    return sum;
+}
+
+/**
+ * erf function
+ * Algorithm taken from the Boost project, source:
+ * http://www.boost.org/doc/libs/1_46_1/boost/math/special_functions/erf.hpp
+ * Use, modification and distribution are subject to the
+ * Boost Software License, Version 1.0 (see notice below).
+ * Boost Software License - Version 1.0 - August 17th, 2003
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+ */
+static inline double erf(double z)
+{
+#ifndef FF_ARRAY_ELEMS
+#define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
 #endif
+    double result;
+
+    /* handle the symmetry: erf(-x) = -erf(x) */
+    if (z < 0)
+        return -erf(-z);
+
+    /* branch based on range of z, and pick appropriate approximation */
+    if (z == 0)
+        return 0;
+    else if (z < 1e-10)
+        return z * 1.125 + z * 0.003379167095512573896158903121545171688;
+    else if (z < 0.5) {
+        // Maximum Deviation Found:                     1.561e-17
+        // Expected Error Term:                         1.561e-17
+        // Maximum Relative Change in Control Points:   1.155e-04
+        // Max Error found at double precision =        2.961182e-17
+
+        static const double y = 1.044948577880859375;
+        static const double p[] = {
+            0.0834305892146531832907,
+            -0.338165134459360935041,
+            -0.0509990735146777432841,
+            -0.00772758345802133288487,
+            -0.000322780120964605683831,
+        };
+        static const double q[] = {
+            1,
+            0.455004033050794024546,
+            0.0875222600142252549554,
+            0.00858571925074406212772,
+            0.000370900071787748000569,
+        };
+        double zz = z * z;
+        return z * (y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), zz) / ff_eval_poly(q, FF_ARRAY_ELEMS(q), zz));
+    }
+    /* here onwards compute erfc */
+    else if (z < 1.5) {
+        // Maximum Deviation Found:                     3.702e-17
+        // Expected Error Term:                         3.702e-17
+        // Maximum Relative Change in Control Points:   2.845e-04
+        // Max Error found at double precision =        4.841816e-17
+        static const double y = 0.405935764312744140625;
+        static const double p[] = {
+            -0.098090592216281240205,
+            0.178114665841120341155,
+            0.191003695796775433986,
+            0.0888900368967884466578,
+            0.0195049001251218801359,
+            0.00180424538297014223957,
+        };
+        static const double q[] = {
+            1,
+            1.84759070983002217845,
+            1.42628004845511324508,
+            0.578052804889902404909,
+            0.12385097467900864233,
+            0.0113385233577001411017,
+            0.337511472483094676155e-5,
+        };
+        result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), z - 0.5) / ff_eval_poly(q, FF_ARRAY_ELEMS(q), z - 0.5);
+        result *= exp(-z * z) / z;
+        return 1 - result;
+    }
+    else if (z < 2.5) {
+        // Max Error found at double precision =        6.599585e-18
+        // Maximum Deviation Found:                     3.909e-18
+        // Expected Error Term:                         3.909e-18
+        // Maximum Relative Change in Control Points:   9.886e-05
+        static const double y = 0.50672817230224609375;
+        static const double p[] = {
+            -0.0243500476207698441272,
+            0.0386540375035707201728,
+            0.04394818964209516296,
+            0.0175679436311802092299,
+            0.00323962406290842133584,
+            0.000235839115596880717416,
+        };
+        static const double q[] = {
+            1,
+            1.53991494948552447182,
+            0.982403709157920235114,
+            0.325732924782444448493,
+            0.0563921837420478160373,
+            0.00410369723978904575884,
+        };
+        result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), z - 1.5) / ff_eval_poly(q, FF_ARRAY_ELEMS(q), z - 1.5);
+        result *= exp(-z * z) / z;
+        return 1 - result;
+    }
+    else if (z < 4.5) {
+        // Maximum Deviation Found:                     1.512e-17
+        // Expected Error Term:                         1.512e-17
+        // Maximum Relative Change in Control Points:   2.222e-04
+        // Max Error found at double precision =        2.062515e-17
+        static const double y = 0.5405750274658203125;
+        static const double p[] = {
+            0.00295276716530971662634,
+            0.0137384425896355332126,
+            0.00840807615555585383007,
+            0.00212825620914618649141,
+            0.000250269961544794627958,
+            0.113212406648847561139e-4,
+        };
+        static const double q[] = {
+            1,
+            1.04217814166938418171,
+            0.442597659481563127003,
+            0.0958492726301061423444,
+            0.0105982906484876531489,
+            0.000479411269521714493907,
+        };
+        result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), z - 3.5) / ff_eval_poly(q, FF_ARRAY_ELEMS(q), z - 3.5);
+        result *= exp(-z * z) / z;
+        return 1 - result;
+    }
+    /* differ from Boost here, the claim of underflow of erfc(x) past 5.8 is
+     * slightly incorrect, change to 5.92
+     * (really somewhere between 5.9125 and 5.925 is when it saturates) */
+    else if (z < 5.92) {
+        // Max Error found at double precision =        2.997958e-17
+        // Maximum Deviation Found:                     2.860e-17
+        // Expected Error Term:                         2.859e-17
+        // Maximum Relative Change in Control Points:   1.357e-05
+        static const double y = 0.5579090118408203125;
+        static const double p[] = {
+            0.00628057170626964891937,
+            0.0175389834052493308818,
+            -0.212652252872804219852,
+            -0.687717681153649930619,
+            -2.5518551727311523996,
+            -3.22729451764143718517,
+            -2.8175401114513378771,
+        };
+        static const double q[] = {
+            1,
+            2.79257750980575282228,
+            11.0567237927800161565,
+            15.930646027911794143,
+            22.9367376522880577224,
+            13.5064170191802889145,
+            5.48409182238641741584,
+        };
+        result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), 1 / z) / ff_eval_poly(q, FF_ARRAY_ELEMS(q), 1 / z);
+        result *= exp(-z * z) / z;
+        return 1 - result;
+    }
+    /* handle the nan case, but don't use isnan for max portability */
+    else if (z != z)
+        return z;
+    /* finally return saturated result */
+    else
+        return 1;
+}
+#endif /* HAVE_ERF */
 
 #if !HAVE_EXPF
 #undef expf
 #define expf(x) ((float)exp(x))
-#endif
+#endif /* HAVE_EXPF */
 
 #if !HAVE_EXP2
 #undef exp2
-#define exp2(x) exp((x) * 0.693147180559945)
+#define exp2(x) exp((x) * M_LN2)
 #endif /* HAVE_EXP2 */
 
 #if !HAVE_EXP2F
@@ -83,29 +294,100 @@ static av_always_inline float cbrtf(float x)
 #endif /* HAVE_EXP2F */
 
 #if !HAVE_ISINF
-static av_always_inline av_const int isinf(float x)
+#undef isinf
+/* Note: these do not follow the BSD/Apple/GNU convention of returning -1 for
+-Inf, +1 for Inf, 0 otherwise, but merely follow the POSIX/ISO mandated spec of
+returning a non-zero value for +/-Inf, 0 otherwise. */
+static av_always_inline av_const int avpriv_isinff(float x)
 {
     uint32_t v = av_float2int(x);
     if ((v & 0x7f800000) != 0x7f800000)
         return 0;
     return !(v & 0x007fffff);
 }
+
+static av_always_inline av_const int avpriv_isinf(double x)
+{
+    uint64_t v = av_double2int(x);
+    if ((v & 0x7ff0000000000000) != 0x7ff0000000000000)
+        return 0;
+    return !(v & 0x000fffffffffffff);
+}
+
+#define isinf(x)                  \
+    (sizeof(x) == sizeof(float)   \
+        ? avpriv_isinff(x)        \
+        : avpriv_isinf(x))
 #endif /* HAVE_ISINF */
 
 #if !HAVE_ISNAN
-static av_always_inline av_const int isnan(float x)
+static av_always_inline av_const int avpriv_isnanf(float x)
 {
     uint32_t v = av_float2int(x);
     if ((v & 0x7f800000) != 0x7f800000)
         return 0;
     return v & 0x007fffff;
 }
+
+static av_always_inline av_const int avpriv_isnan(double x)
+{
+    uint64_t v = av_double2int(x);
+    if ((v & 0x7ff0000000000000) != 0x7ff0000000000000)
+        return 0;
+    return (v & 0x000fffffffffffff) && 1;
+}
+
+#define isnan(x)                  \
+    (sizeof(x) == sizeof(float)   \
+        ? avpriv_isnanf(x)        \
+        : avpriv_isnan(x))
 #endif /* HAVE_ISNAN */
 
+#if !HAVE_ISFINITE
+static av_always_inline av_const int avpriv_isfinitef(float x)
+{
+    uint32_t v = av_float2int(x);
+    return (v & 0x7f800000) != 0x7f800000;
+}
+
+static av_always_inline av_const int avpriv_isfinite(double x)
+{
+    uint64_t v = av_double2int(x);
+    return (v & 0x7ff0000000000000) != 0x7ff0000000000000;
+}
+
+#define isfinite(x)                  \
+    (sizeof(x) == sizeof(float)      \
+        ? avpriv_isfinitef(x)        \
+        : avpriv_isfinite(x))
+#endif /* HAVE_ISFINITE */
+
+#if !HAVE_HYPOT
+static inline av_const double hypot(double x, double y)
+{
+    double ret, temp;
+    x = fabs(x);
+    y = fabs(y);
+
+    if (isinf(x) || isinf(y))
+        return av_int2double(0x7ff0000000000000);
+    if (x == 0 || y == 0)
+        return x + y;
+    if (x < y) {
+        temp = x;
+        x = y;
+        y = temp;
+    }
+
+    y = y/x;
+    return x*sqrt(1 + y*y);
+}
+#endif /* HAVE_HYPOT */
+
 #if !HAVE_LDEXPF
 #undef ldexpf
 #define ldexpf(x, exp) ((float)ldexp(x, exp))
-#endif
+#endif /* HAVE_LDEXPF */
 
 #if !HAVE_LLRINT
 #undef llrint
@@ -130,12 +412,12 @@ static av_always_inline av_const int isnan(float x)
 #if !HAVE_LOG10F
 #undef log10f
 #define log10f(x) ((float)log10(x))
-#endif
+#endif /* HAVE_LOG10F */
 
 #if !HAVE_SINF
 #undef sinf
 #define sinf(x) ((float)sin(x))
-#endif
+#endif /* HAVE_SINF */
 
 #if !HAVE_RINT
 static inline double rint(double x)
diff --git a/libavutil/lls.c b/libavutil/lls.c
index f77043bc..7dd718da 100644
--- a/libavutil/lls.c
+++ b/libavutil/lls.c
@@ -29,6 +29,8 @@
 #include <string.h>
 
 #include "attributes.h"
+#include "config.h"
+#include "internal.h"
 #include "version.h"
 #include "lls.h"
 
@@ -55,7 +57,7 @@ void avpriv_solve_lls(LLSModel *m, double threshold, unsigned short min_order)
         for (j = i; j < count; j++) {
             double sum = covar[i][j];
 
-            for (k = i - 1; k >= 0; k--)
+            for (k = 0; k <= i-1; k++)
                 sum -= factor[i][k] * factor[j][k];
 
             if (i == j) {
@@ -71,7 +73,7 @@ void avpriv_solve_lls(LLSModel *m, double threshold, unsigned short min_order)
     for (i = 0; i < count; i++) {
         double sum = covar_y[i + 1];
 
-        for (k = i - 1; k >= 0; k--)
+        for (k = 0; k <= i-1; k++)
             sum -= factor[i][k] * m->coeff[0][k];
 
         m->coeff[0][i] = sum / factor[i][i];
diff --git a/libavutil/lls.h b/libavutil/lls.h
index 5635b5b2..1a276d53 100644
--- a/libavutil/lls.h
+++ b/libavutil/lls.h
@@ -23,7 +23,7 @@
 #ifndef AVUTIL_LLS_H
 #define AVUTIL_LLS_H
 
-#include "common.h"
+#include "macros.h"
 #include "mem.h"
 #include "version.h"
 
diff --git a/libavutil/log.c b/libavutil/log.c
index b2bc65cd..45835195 100644
--- a/libavutil/log.c
+++ b/libavutil/log.c
@@ -343,7 +343,7 @@ void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl)
 
 #if CONFIG_VALGRIND_BACKTRACE
     if (level <= BACKTRACE_LOGLEVEL)
-        VALGRIND_PRINTF_BACKTRACE("");
+        VALGRIND_PRINTF_BACKTRACE("%s", "");
 #endif
 end:
     av_bprint_finalize(part+3, NULL);
diff --git a/libavutil/log.h b/libavutil/log.h
index db7eb3f7..321748cd 100644
--- a/libavutil/log.h
+++ b/libavutil/log.h
@@ -196,13 +196,13 @@ typedef struct AVClass {
  */
 #define AV_LOG_DEBUG    48
 
-#define AV_LOG_MAX_OFFSET (AV_LOG_DEBUG - AV_LOG_QUIET)
-
 /**
  * Extremely verbose debugging, useful for libav* development.
  */
 #define AV_LOG_TRACE    56
 
+#define AV_LOG_MAX_OFFSET (AV_LOG_TRACE - AV_LOG_QUIET)
+
 /**
  * @}
  */
diff --git a/libavutil/macros.h b/libavutil/macros.h
index 44653237..2007ee56 100644
--- a/libavutil/macros.h
+++ b/libavutil/macros.h
@@ -45,4 +45,6 @@
 
 #define AV_PRAGMA(s) _Pragma(#s)
 
+#define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1))
+
 #endif /* AVUTIL_MACROS_H */
diff --git a/libavutil/mastering_display_metadata.c b/libavutil/mastering_display_metadata.c
new file mode 100644
index 00000000..e1683e55
--- /dev/null
+++ b/libavutil/mastering_display_metadata.c
@@ -0,0 +1,43 @@
+/**
+ * Copyright (c) 2016 Neil Birkbeck <neil.birkbeck@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "mastering_display_metadata.h"
+#include "mem.h"
+
+AVMasteringDisplayMetadata *av_mastering_display_metadata_alloc(void)
+{
+    return av_mallocz(sizeof(AVMasteringDisplayMetadata));
+}
+
+AVMasteringDisplayMetadata *av_mastering_display_metadata_create_side_data(AVFrame *frame)
+{
+    AVFrameSideData *side_data = av_frame_new_side_data(frame,
+                                                        AV_FRAME_DATA_MASTERING_DISPLAY_METADATA,
+                                                        sizeof(AVMasteringDisplayMetadata));
+    if (!side_data)
+        return NULL;
+
+    memset(side_data->data, 0, sizeof(AVMasteringDisplayMetadata));
+
+    return (AVMasteringDisplayMetadata *)side_data->data;
+}
diff --git a/libavutil/mastering_display_metadata.h b/libavutil/mastering_display_metadata.h
new file mode 100644
index 00000000..936533fe
--- /dev/null
+++ b/libavutil/mastering_display_metadata.h
@@ -0,0 +1,89 @@
+/**
+ * Copyright (c) 2016 Neil Birkbeck <neil.birkbeck@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_MASTERING_DISPLAY_METADATA_H
+#define AVUTIL_MASTERING_DISPLAY_METADATA_H
+
+#include "frame.h"
+#include "rational.h"
+
+
+/**
+ * Mastering display metadata capable of representing the color volume of
+ * the display used to master the content (SMPTE 2086:2014).
+ *
+ * To be used as payload of a AVFrameSideData or AVPacketSideData with the
+ * appropriate type.
+ *
+ * @note The struct should be allocated with av_mastering_display_metadata_alloc()
+ *       and its size is not a part of the public ABI.
+ */
+typedef struct AVMasteringDisplayMetadata {
+    /**
+     * CIE 1931 xy chromaticity coords of color primaries (r, g, b order).
+     */
+    AVRational display_primaries[3][2];
+
+    /**
+     * CIE 1931 xy chromaticity coords of white point.
+     */
+    AVRational white_point[2];
+
+    /**
+     * Min luminance of mastering display (cd/m^2).
+     */
+    AVRational min_luminance;
+
+    /**
+     * Max luminance of mastering display (cd/m^2).
+     */
+    AVRational max_luminance;
+
+    /**
+     * Flag indicating whether the display primaries (and white point) are set.
+     */
+    int has_primaries;
+
+    /**
+     * Flag indicating whether the luminance (min_ and max_) have been set.
+     */
+    int has_luminance;
+
+} AVMasteringDisplayMetadata;
+
+/**
+ * Allocate an AVMasteringDisplayMetadata structure and set its fields to
+ * default values. The resulting struct can be freed using av_freep().
+ *
+ * @return An AVMasteringDisplayMetadata filled with default values or NULL
+ *         on failure.
+ */
+AVMasteringDisplayMetadata *av_mastering_display_metadata_alloc(void);
+
+/**
+ * Allocate a complete AVMasteringDisplayMetadata and add it to the frame.
+ *
+ * @param frame The frame which side data is added to.
+ *
+ * @return The AVMasteringDisplayMetadata structure to be filled by caller.
+ */
+AVMasteringDisplayMetadata *av_mastering_display_metadata_create_side_data(AVFrame *frame);
+
+#endif /* AVUTIL_MASTERING_DISPLAY_METADATA_H */
diff --git a/libavutil/mathematics.c b/libavutil/mathematics.c
index 126cffc3..20ff37f5 100644
--- a/libavutil/mathematics.c
+++ b/libavutil/mathematics.c
@@ -27,37 +27,32 @@
 #include <limits.h>
 
 #include "mathematics.h"
+#include "libavutil/intmath.h"
 #include "libavutil/common.h"
 #include "avassert.h"
 #include "version.h"
 
-#if FF_API_AV_REVERSE
-const uint8_t av_reverse[256] = {
-0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0,
-0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8,
-0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4,
-0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC,
-0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2,
-0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA,
-0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6,
-0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE,
-0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1,
-0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9,
-0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5,
-0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD,
-0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3,
-0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB,
-0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7,
-0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF,
-};
-#endif
-
-int64_t av_gcd(int64_t a, int64_t b)
-{
-    if (b)
-        return av_gcd(b, a % b);
-    else
+/* Stein's binary GCD algorithm:
+ * https://en.wikipedia.org/wiki/Binary_GCD_algorithm */
+int64_t av_gcd(int64_t a, int64_t b) {
+    int za, zb, k;
+    int64_t u, v;
+    if (a == 0)
+        return b;
+    if (b == 0)
         return a;
+    za = ff_ctzll(a);
+    zb = ff_ctzll(b);
+    k  = FFMIN(za, zb);
+    u = llabs(a >> za);
+    v = llabs(b >> zb);
+    while (u != v) {
+        if (u > v)
+            FFSWAP(int64_t, v, u);
+        v -= u;
+        v >>= ff_ctzll(v);
+    }
+    return (uint64_t)u << k;
 }
 
 int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)
@@ -76,8 +71,8 @@ int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)
         rnd -= AV_ROUND_PASS_MINMAX;
     }
 
-    if (a < 0 && a != INT64_MIN)
-        return -av_rescale_rnd(-a, b, c, rnd ^ ((rnd >> 1) & 1));
+    if (a < 0)
+        return -(uint64_t)av_rescale_rnd(-FFMAX(a, -INT64_MAX), b, c, rnd ^ ((rnd >> 1) & 1));
 
     if (rnd == AV_ROUND_NEAR_INF)
         r = c / 2;
@@ -87,8 +82,13 @@ int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)
     if (b <= INT_MAX && c <= INT_MAX) {
         if (a <= INT_MAX)
             return (a * b + r) / c;
-        else
-            return a / c * b + (a % c * b + r) / c;
+        else {
+            int64_t ad = a / c;
+            int64_t a2 = (a % c * b + r) / c;
+            if (ad >= INT32_MAX && b && ad > (INT64_MAX - a2) / b)
+                return INT64_MIN;
+            return ad * b + a2;
+        }
     } else {
 #if 1
         uint64_t a0  = a & 0xFFFFFFFF;
@@ -112,6 +112,8 @@ int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd)
                 t1++;
             }
         }
+        if (t1 > INT64_MAX)
+            return INT64_MIN;
         return t1;
     }
 #else
diff --git a/libavutil/mathematics.h b/libavutil/mathematics.h
index ac944887..57c44f84 100644
--- a/libavutil/mathematics.h
+++ b/libavutil/mathematics.h
@@ -77,9 +77,10 @@ enum AVRounding {
 };
 
 /**
- * Return the greatest common divisor of a and b.
- * If both a and b are 0 or either or both are <0 then behavior is
- * undefined.
+ * Compute the greatest common divisor of a and b.
+ *
+ * @return gcd of a and b up to sign; if a >= 0 and b >= 0, return value is >= 0;
+ * if a == 0 and b == 0, returns 0.
  */
 int64_t av_const av_gcd(int64_t a, int64_t b);
 
diff --git a/libavutil/mem.c b/libavutil/mem.c
index da291fb9..8dfaad82 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c
@@ -59,6 +59,8 @@ void  free(void *ptr);
 
 #endif /* MALLOC_PREFIX */
 
+#include "mem_internal.h"
+
 #define ALIGN (HAVE_AVX ? 32 : 16)
 
 /* NOTE: if you want to override these functions with your own
@@ -480,7 +482,7 @@ void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
     if (min_size < *size)
         return ptr;
 
-    min_size = FFMAX(17 * min_size / 16 + 32, min_size);
+    min_size = FFMAX(min_size + min_size / 16 + 32, min_size);
 
     ptr = av_realloc(ptr, min_size);
     /* we could set this to the unmodified min_size but this is safer
@@ -494,24 +496,12 @@ void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
     return ptr;
 }
 
-static inline int ff_fast_malloc(void *ptr, unsigned int *size, size_t min_size, int zero_realloc)
-{
-    void *val;
-
-    if (min_size < *size)
-        return 0;
-    min_size = FFMAX(17 * min_size / 16 + 32, min_size);
-    av_freep(ptr);
-    val = zero_realloc ? av_mallocz(min_size) : av_malloc(min_size);
-    memcpy(ptr, &val, sizeof(val));
-    if (!val)
-        min_size = 0;
-    *size = min_size;
-    return 1;
-}
-
 void av_fast_malloc(void *ptr, unsigned int *size, size_t min_size)
 {
     ff_fast_malloc(ptr, size, min_size, 0);
 }
 
+void av_fast_mallocz(void *ptr, unsigned int *size, size_t min_size)
+{
+    ff_fast_malloc(ptr, size, min_size, 1);
+}
diff --git a/libavutil/mem.h b/libavutil/mem.h
index 2a1e36d6..d25b3229 100644
--- a/libavutil/mem.h
+++ b/libavutil/mem.h
@@ -144,6 +144,7 @@ void *av_realloc_f(void *ptr, size_t nelem, size_t elsize);
  *          The situation is undefined according to POSIX and may crash with
  *          some libc implementations.
  */
+av_warn_unused_result
 int av_reallocp(void *ptr, size_t size);
 
 /**
@@ -304,6 +305,7 @@ void av_dynarray_add(void *tab_ptr, int *nb_ptr, void *elem);
  * @return >=0 on success, negative otherwise.
  * @see av_dynarray_add(), av_dynarray2_add()
  */
+av_warn_unused_result
 int av_dynarray_add_nofree(void *tab_ptr, int *nb_ptr, void *elem);
 
 /**
@@ -382,6 +384,21 @@ void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size);
  */
 void av_fast_malloc(void *ptr, unsigned int *size, size_t min_size);
 
+/**
+ * Allocate a buffer, reusing the given one if large enough.
+ *
+ * All newly allocated space is initially cleared
+ * Contrary to av_fast_realloc the current buffer contents might not be
+ * preserved and on error the old buffer is freed, thus no special
+ * handling to avoid memleaks is necessary.
+ *
+ * @param ptr pointer to pointer to already allocated buffer, overwritten with pointer to new buffer
+ * @param size size of the buffer *ptr points to
+ * @param min_size minimum size of *ptr buffer after returning, *ptr will be NULL and
+ *                 *size 0 if an error occurred.
+ */
+void av_fast_mallocz(void *ptr, unsigned int *size, size_t min_size);
+
 /**
  * @}
  */
diff --git a/libavcodec/pixblockdsp_template.c b/libavutil/mem_internal.h
similarity index 53%
rename from libavcodec/pixblockdsp_template.c
rename to libavutil/mem_internal.h
index d1e91022..6fdbcb01 100644
--- a/libavcodec/pixblockdsp_template.c
+++ b/libavutil/mem_internal.h
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) 2002 Fabrice Bellard
+ *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -16,25 +18,28 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "bit_depth_template.c"
+#ifndef AVUTIL_MEM_INTERNAL_H
+#define AVUTIL_MEM_INTERNAL_H
+
+#include "avassert.h"
+#include "mem.h"
 
-static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t *_pixels,
-                              ptrdiff_t line_size)
+static inline int ff_fast_malloc(void *ptr, unsigned int *size, size_t min_size, int zero_realloc)
 {
-    const pixel *pixels = (const pixel *) _pixels;
-    int i;
+    void *val;
 
-    /* read the pixels */
-    for (i = 0; i < 8; i++) {
-        block[0] = pixels[0];
-        block[1] = pixels[1];
-        block[2] = pixels[2];
-        block[3] = pixels[3];
-        block[4] = pixels[4];
-        block[5] = pixels[5];
-        block[6] = pixels[6];
-        block[7] = pixels[7];
-        pixels  += line_size / sizeof(pixel);
-        block   += 8;
+    memcpy(&val, ptr, sizeof(val));
+    if (min_size <= *size) {
+        av_assert0(val || !min_size);
+        return 0;
     }
+    min_size = FFMAX(min_size + min_size / 16 + 32, min_size);
+    av_freep(ptr);
+    val = zero_realloc ? av_mallocz(min_size) : av_malloc(min_size);
+    memcpy(ptr, &val, sizeof(val));
+    if (!val)
+        min_size = 0;
+    *size = min_size;
+    return 1;
 }
+#endif /* AVUTIL_MEM_INTERNAL_H */
diff --git a/libavutil/mips/float_dsp_mips.c b/libavutil/mips/float_dsp_mips.c
index b3a812ce..0943d6f3 100644
--- a/libavutil/mips/float_dsp_mips.c
+++ b/libavutil/mips/float_dsp_mips.c
@@ -56,6 +56,7 @@
 #include "libavutil/mips/asmdefs.h"
 
 #if HAVE_INLINE_ASM && HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
 static void vector_fmul_mips(float *dst, const float *src0, const float *src1,
                              int len)
 {
@@ -339,14 +340,17 @@ static void vector_fmul_reverse_mips(float *dst, const float *src0, const float
         );
     }
 }
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
 
 void ff_float_dsp_init_mips(AVFloatDSPContext *fdsp) {
 #if HAVE_INLINE_ASM && HAVE_MIPSFPU
+#if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
     fdsp->vector_fmul = vector_fmul_mips;
     fdsp->vector_fmul_scalar  = vector_fmul_scalar_mips;
     fdsp->vector_fmul_window = vector_fmul_window_mips;
     fdsp->butterflies_float = butterflies_float_mips;
     fdsp->vector_fmul_reverse = vector_fmul_reverse_mips;
+#endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 #endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
 }
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index 234aead0..b1d18dd9 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -24,6 +24,9 @@
 #include <stdint.h>
 #include <msa.h>
 
+#define ALIGNMENT           16
+#define ALLOC_ALIGNED(align) __attribute__ ((aligned((align) << 1)))
+
 #define LD_B(RTYPE, psrc) *((RTYPE *)(psrc))
 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
 #define LD_SB(...) LD_B(v16i8, __VA_ARGS__)
@@ -333,6 +336,7 @@
     LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3);               \
     LD_B2(RTYPE, (psrc) + 4 * stride, stride, out4, out5);              \
 }
+#define LD_UB6(...) LD_B6(v16u8, __VA_ARGS__)
 #define LD_SB6(...) LD_B6(v16i8, __VA_ARGS__)
 
 #define LD_B7(RTYPE, psrc, stride,                               \
@@ -341,6 +345,7 @@
     LD_B5(RTYPE, (psrc), stride, out0, out1, out2, out3, out4);  \
     LD_B2(RTYPE, (psrc) + 5 * stride, stride, out5, out6);       \
 }
+#define LD_UB7(...) LD_B7(v16u8, __VA_ARGS__)
 #define LD_SB7(...) LD_B7(v16i8, __VA_ARGS__)
 
 #define LD_B8(RTYPE, psrc, stride,                                      \
@@ -403,6 +408,19 @@
 }
 #define LD_SH16(...) LD_H16(v8i16, __VA_ARGS__)
 
+/* Description : Load as 4x4 block of signed halfword elements from 1D source
+                 data into 4 vectors (Each vector with 4 signed halfwords)
+   Arguments   : Inputs  - psrc
+                 Outputs - out0, out1, out2, out3
+*/
+#define LD4x4_SH(psrc, out0, out1, out2, out3)                \
+{                                                             \
+    out0 = LD_SH(psrc);                                       \
+    out2 = LD_SH(psrc + 8);                                   \
+    out1 = (v8i16) __msa_ilvl_d((v2i64) out0, (v2i64) out0);  \
+    out3 = (v8i16) __msa_ilvl_d((v2i64) out2, (v2i64) out2);  \
+}
+
 /* Description : Load 2 vectors of signed word elements with stride
    Arguments   : Inputs  - psrc    (source pointer to load from)
                          - stride
@@ -492,6 +510,14 @@
     ST_SW(in0, (pdst));                 \
     ST_SW(in1, (pdst) + stride);        \
 }
+#define ST_SW8(in0, in1, in2, in3, in4, in5, in6, in7,  \
+               pdst, stride)                            \
+{                                                       \
+    ST_SW2(in0, in1, (pdst), stride);                   \
+    ST_SW2(in2, in3, (pdst) + 2 * stride, stride);      \
+    ST_SW2(in4, in5, (pdst) + 4 * stride, stride);      \
+    ST_SW2(in6, in7, (pdst) + 6 * stride, stride);      \
+}
 
 /* Description : Store as 2x4 byte block to destination memory from input vector
    Arguments   : Inputs  - in, stidx, pdst, stride
@@ -747,6 +773,33 @@
     SW(out15_m, pblk_12x8_m + 8);                                        \
 }
 
+/* Description : average with rounding (in0 + in1 + 1) / 2.
+   Arguments   : Inputs  - in0, in1, in2, in3,
+                 Outputs - out0, out1
+                 Return Type - signed byte
+   Details     : Each byte element from 'in0' vector is added with each byte
+                 element from 'in1' vector. The addition of the elements plus 1
+                (for rounding) is done unsigned with full precision,
+                i.e. the result has one extra bit. Unsigned division by 2
+                (or logical shift right by one bit) is performed before writing
+                the result to vector 'out0'
+                Similar for the pair of 'in2' and 'in3'
+*/
+#define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1)       \
+{                                                             \
+    out0 = (RTYPE) __msa_aver_u_b((v16u8) in0, (v16u8) in1);  \
+    out1 = (RTYPE) __msa_aver_u_b((v16u8) in2, (v16u8) in3);  \
+}
+#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
+
+#define AVER_UB4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+                 out0, out1, out2, out3)                        \
+{                                                               \
+    AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1)             \
+    AVER_UB2(RTYPE, in4, in5, in6, in7, out2, out3)             \
+}
+#define AVER_UB4_UB(...) AVER_UB4(v16u8, __VA_ARGS__)
+
 /* Description : Immediate number of columns to slide with zero
    Arguments   : Inputs  - in0, in1, slide_val
                  Outputs - out0, out1
@@ -761,6 +814,17 @@
     out1 = (RTYPE) __msa_sldi_b((v16i8) zero_m, (v16i8) in1, slide_val);  \
 }
 #define SLDI_B2_0_UB(...) SLDI_B2_0(v16u8, __VA_ARGS__)
+#define SLDI_B2_0_SB(...) SLDI_B2_0(v16i8, __VA_ARGS__)
+#define SLDI_B2_0_SW(...) SLDI_B2_0(v4i32, __VA_ARGS__)
+
+#define SLDI_B3_0(RTYPE, in0, in1, in2, out0, out1, out2,  slide_val)     \
+{                                                                         \
+    v16i8 zero_m = { 0 };                                                 \
+    SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val);                    \
+    out2 = (RTYPE) __msa_sldi_b((v16i8) zero_m, (v16i8) in2, slide_val);  \
+}
+#define SLDI_B3_0_UB(...) SLDI_B3_0(v16u8, __VA_ARGS__)
+#define SLDI_B3_0_SB(...) SLDI_B3_0(v16i8, __VA_ARGS__)
 
 #define SLDI_B4_0(RTYPE, in0, in1, in2, in3,            \
                   out0, out1, out2, out3, slide_val)    \
@@ -768,7 +832,9 @@
     SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val);  \
     SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val);  \
 }
+#define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)
 #define SLDI_B4_0_SB(...) SLDI_B4_0(v16i8, __VA_ARGS__)
+#define SLDI_B4_0_SH(...) SLDI_B4_0(v8i16, __VA_ARGS__)
 
 /* Description : Immediate number of columns to slide
    Arguments   : Inputs  - in0_0, in0_1, in1_0, in1_1, slide_val
@@ -786,6 +852,14 @@
 #define SLDI_B2_SB(...) SLDI_B2(v16i8, __VA_ARGS__)
 #define SLDI_B2_SH(...) SLDI_B2(v8i16, __VA_ARGS__)
 
+#define SLDI_B3(RTYPE, in0_0, in0_1, in0_2, in1_0, in1_1, in1_2,           \
+                out0, out1, out2, slide_val)                               \
+{                                                                          \
+    SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val)      \
+    out2 = (RTYPE) __msa_sldi_b((v16i8) in0_2, (v16i8) in1_2, slide_val);  \
+}
+#define SLDI_B3_SB(...) SLDI_B3(v16i8, __VA_ARGS__)
+#define SLDI_B3_UH(...) SLDI_B3(v8u16, __VA_ARGS__)
 
 /* Description : Shuffle byte vector elements as per mask vector
    Arguments   : Inputs  - in0, in1, in2, in3, mask0, mask1
@@ -821,6 +895,31 @@
     VSHF_B2(RTYPE, in0, in1, in0, in1, mask2, mask3, out2, out3);  \
 }
 #define VSHF_B4_SB(...) VSHF_B4(v16i8, __VA_ARGS__)
+#define VSHF_B4_SH(...) VSHF_B4(v8i16, __VA_ARGS__)
+
+/* Description : Shuffle halfword vector elements as per mask vector
+   Arguments   : Inputs  - in0, in1, in2, in3, mask0, mask1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Selective halfword elements from in0 & in1 are copied to out0
+                 as per control vector mask0
+                 Selective halfword elements from in2 & in3 are copied to out1
+                 as per control vector mask1
+*/
+#define VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1)       \
+{                                                                          \
+    out0 = (RTYPE) __msa_vshf_h((v8i16) mask0, (v8i16) in1, (v8i16) in0);  \
+    out1 = (RTYPE) __msa_vshf_h((v8i16) mask1, (v8i16) in3, (v8i16) in2);  \
+}
+#define VSHF_H2_SH(...) VSHF_H2(v8i16, __VA_ARGS__)
+
+#define VSHF_H3(RTYPE, in0, in1, in2, in3, in4, in5, mask0, mask1, mask2,  \
+                out0, out1, out2)                                          \
+{                                                                          \
+    VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1);          \
+    out2 = (RTYPE) __msa_vshf_h((v8i16) mask2, (v8i16) in5, (v8i16) in4);  \
+}
+#define VSHF_H3_SH(...) VSHF_H3(v8i16, __VA_ARGS__)
 
 /* Description : Shuffle byte vector elements as per mask vector
    Arguments   : Inputs  - in0, in1, in2, in3, mask0, mask1
@@ -838,6 +937,34 @@
 }
 #define VSHF_W2_SB(...) VSHF_W2(v16i8, __VA_ARGS__)
 
+/* Description : Dot product of byte vector elements
+   Arguments   : Inputs  - mult0, mult1
+                           cnst0, cnst1
+                 Outputs - out0, out1
+                 Return Type - unsigned halfword
+   Details     : Unsigned byte elements from mult0 are multiplied with
+                 unsigned byte elements from cnst0 producing a result
+                 twice the size of input i.e. unsigned halfword.
+                 Then this multiplication results of adjacent odd-even elements
+                 are added together and stored to the out vector
+                 (2 unsigned halfword results)
+*/
+#define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1)   \
+{                                                                 \
+    out0 = (RTYPE) __msa_dotp_u_h((v16u8) mult0, (v16u8) cnst0);  \
+    out1 = (RTYPE) __msa_dotp_u_h((v16u8) mult1, (v16u8) cnst1);  \
+}
+#define DOTP_UB2_UH(...) DOTP_UB2(v8u16, __VA_ARGS__)
+
+#define DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3,           \
+                 cnst0, cnst1, cnst2, cnst3,                  \
+                 out0, out1, out2, out3)                      \
+{                                                             \
+    DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1);  \
+    DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3);  \
+}
+#define DOTP_UB4_UH(...) DOTP_UB4(v8u16, __VA_ARGS__)
+
 /* Description : Dot product of byte vector elements
    Arguments   : Inputs  - mult0, mult1
                            cnst0, cnst1
@@ -930,6 +1057,27 @@
 }
 #define DPADD_SB4_SH(...) DPADD_SB4(v8i16, __VA_ARGS__)
 
+/* Description : Dot product & addition of byte vector elements
+   Arguments   : Inputs  - mult0, mult1
+                           cnst0, cnst1
+                 Outputs - out0, out1
+                 Return Type - unsigned halfword
+   Details     : Unsigned byte elements from mult0 are multiplied with
+                 unsigned byte elements from cnst0 producing a result
+                 twice the size of input i.e. unsigned halfword.
+                 Then this multiplication results of adjacent odd-even elements
+                 are added to the out vector
+                 (2 unsigned halfword results)
+*/
+#define DPADD_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1)   \
+{                                                                  \
+    out0 = (RTYPE) __msa_dpadd_u_h((v8u16) out0,                   \
+                                   (v16u8) mult0, (v16u8) cnst0);  \
+    out1 = (RTYPE) __msa_dpadd_u_h((v8u16) out1,                   \
+                                   (v16u8) mult1, (v16u8) cnst1);  \
+}
+#define DPADD_UB2_UH(...) DPADD_UB2(v8u16, __VA_ARGS__)
+
 /* Description : Dot product & addition of halfword vector elements
    Arguments   : Inputs  - mult0, mult1
                            cnst0, cnst1
@@ -959,6 +1107,28 @@
 }
 #define DPADD_SH4_SW(...) DPADD_SH4(v4i32, __VA_ARGS__)
 
+/* Description : Minimum values between unsigned elements of
+                 either vector are copied to the output vector
+   Arguments   : Inputs  - in0, in1, min_vec
+                 Outputs - in0, in1, (in place)
+                 Return Type - unsigned halfword
+   Details     : Minimum of unsigned halfword element values from 'in0' and
+                 'min_value' are written to output vector 'in0'
+*/
+#define MIN_UH2(RTYPE, in0, in1, min_vec)               \
+{                                                       \
+    in0 = (RTYPE) __msa_min_u_h((v8u16) in0, min_vec);  \
+    in1 = (RTYPE) __msa_min_u_h((v8u16) in1, min_vec);  \
+}
+#define MIN_UH2_UH(...) MIN_UH2(v8u16, __VA_ARGS__)
+
+#define MIN_UH4(RTYPE, in0, in1, in2, in3, min_vec)  \
+{                                                    \
+    MIN_UH2(RTYPE, in0, in1, min_vec);               \
+    MIN_UH2(RTYPE, in2, in3, min_vec);               \
+}
+#define MIN_UH4_UH(...) MIN_UH4(v8u16, __VA_ARGS__)
+
 /* Description : Clips all halfword elements of input vector between min & max
                  out = ((in) < (min)) ? (min) : (((in) > (max)) ? (max) : (in))
    Arguments   : Inputs  - in       (input vector)
@@ -1018,6 +1188,100 @@
     out_m;                                                \
 } )
 
+/* Description : Addition of 4 signed word elements
+                 4 signed word elements of input vector are added together and
+                 resulted integer sum is returned
+   Arguments   : Inputs  - in       (signed word vector)
+                 Outputs - sum_m    (i32 sum)
+                 Return Type - signed word
+*/
+#define HADD_SW_S32(in)                               \
+( {                                                   \
+    v2i64 res0_m, res1_m;                             \
+    int32_t sum_m;                                    \
+                                                      \
+    res0_m = __msa_hadd_s_d((v4i32) in, (v4i32) in);  \
+    res1_m = __msa_splati_d(res0_m, 1);               \
+    res0_m = res0_m + res1_m;                         \
+    sum_m = __msa_copy_s_w((v4i32) res0_m, 0);        \
+    sum_m;                                            \
+} )
+
+/* Description : Addition of 8 unsigned halfword elements
+                 8 unsigned halfword elements of input vector are added
+                 together and resulted integer sum is returned
+   Arguments   : Inputs  - in       (unsigned halfword vector)
+                 Outputs - sum_m    (u32 sum)
+                 Return Type - unsigned word
+*/
+#define HADD_UH_U32(in)                                  \
+( {                                                      \
+    v4u32 res_m;                                         \
+    v2u64 res0_m, res1_m;                                \
+    uint32_t sum_m;                                      \
+                                                         \
+    res_m = __msa_hadd_u_w((v8u16) in, (v8u16) in);      \
+    res0_m = __msa_hadd_u_d(res_m, res_m);               \
+    res1_m = (v2u64) __msa_splati_d((v2i64) res0_m, 1);  \
+    res0_m = res0_m + res1_m;                            \
+    sum_m = __msa_copy_u_w((v4i32) res0_m, 0);           \
+    sum_m;                                               \
+} )
+
+/* Description : Horizontal addition of signed byte vector elements
+   Arguments   : Inputs  - in0, in1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Each signed odd byte element from 'in0' is added to
+                 even signed byte element from 'in0' (pairwise) and the
+                 halfword result is stored in 'out0'
+*/
+#define HADD_SB2(RTYPE, in0, in1, out0, out1)                 \
+{                                                             \
+    out0 = (RTYPE) __msa_hadd_s_h((v16i8) in0, (v16i8) in0);  \
+    out1 = (RTYPE) __msa_hadd_s_h((v16i8) in1, (v16i8) in1);  \
+}
+#define HADD_SB2_SH(...) HADD_SB2(v8i16, __VA_ARGS__)
+
+#define HADD_SB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3)  \
+{                                                                    \
+    HADD_SB2(RTYPE, in0, in1, out0, out1);                           \
+    HADD_SB2(RTYPE, in2, in3, out2, out3);                           \
+}
+#define HADD_SB4_UH(...) HADD_SB4(v8u16, __VA_ARGS__)
+#define HADD_SB4_SH(...) HADD_SB4(v8i16, __VA_ARGS__)
+
+/* Description : Horizontal addition of unsigned byte vector elements
+   Arguments   : Inputs  - in0, in1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Each unsigned odd byte element from 'in0' is added to
+                 even unsigned byte element from 'in0' (pairwise) and the
+                 halfword result is stored in 'out0'
+*/
+#define HADD_UB2(RTYPE, in0, in1, out0, out1)                 \
+{                                                             \
+    out0 = (RTYPE) __msa_hadd_u_h((v16u8) in0, (v16u8) in0);  \
+    out1 = (RTYPE) __msa_hadd_u_h((v16u8) in1, (v16u8) in1);  \
+}
+#define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)
+
+#define HADD_UB3(RTYPE, in0, in1, in2, out0, out1, out2)      \
+{                                                             \
+    HADD_UB2(RTYPE, in0, in1, out0, out1);                    \
+    out2 = (RTYPE) __msa_hadd_u_h((v16u8) in2, (v16u8) in2);  \
+}
+#define HADD_UB3_UH(...) HADD_UB3(v8u16, __VA_ARGS__)
+
+#define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3)  \
+{                                                                    \
+    HADD_UB2(RTYPE, in0, in1, out0, out1);                           \
+    HADD_UB2(RTYPE, in2, in3, out2, out3);                           \
+}
+#define HADD_UB4_UB(...) HADD_UB4(v16u8, __VA_ARGS__)
+#define HADD_UB4_UH(...) HADD_UB4(v8u16, __VA_ARGS__)
+#define HADD_UB4_SH(...) HADD_UB4(v8i16, __VA_ARGS__)
+
 /* Description : Horizontal subtraction of unsigned byte vector elements
    Arguments   : Inputs  - in0, in1
                  Outputs - out0, out1
@@ -1034,6 +1298,51 @@
 #define HSUB_UB2_UH(...) HSUB_UB2(v8u16, __VA_ARGS__)
 #define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)
 
+#define HSUB_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3)  \
+{                                                                    \
+    HSUB_UB2(RTYPE, in0, in1, out0, out1);                           \
+    HSUB_UB2(RTYPE, in2, in3, out2, out3);                           \
+}
+#define HSUB_UB4_UH(...) HSUB_UB4(v8u16, __VA_ARGS__)
+#define HSUB_UB4_SH(...) HSUB_UB4(v8i16, __VA_ARGS__)
+
+/* Description : SAD (Sum of Absolute Difference)
+   Arguments   : Inputs  - in0, in1, ref0, ref1  (unsigned byte src & ref)
+                 Outputs - sad_m                 (halfword vector with sad)
+                 Return Type - unsigned halfword
+   Details     : Absolute difference of all the byte elements from 'in0' with
+                 'ref0' is calculated and preserved in 'diff0'. From the 16
+                 unsigned absolute diff values, even-odd pairs are added
+                 together to generate 8 halfword results.
+*/
+#define SAD_UB2_UH(in0, in1, ref0, ref1)                        \
+( {                                                             \
+    v16u8 diff0_m, diff1_m;                                     \
+    v8u16 sad_m = { 0 };                                        \
+                                                                \
+    diff0_m = __msa_asub_u_b((v16u8) in0, (v16u8) ref0);        \
+    diff1_m = __msa_asub_u_b((v16u8) in1, (v16u8) ref1);        \
+                                                                \
+    sad_m += __msa_hadd_u_h((v16u8) diff0_m, (v16u8) diff0_m);  \
+    sad_m += __msa_hadd_u_h((v16u8) diff1_m, (v16u8) diff1_m);  \
+                                                                \
+    sad_m;                                                      \
+} )
+
+/* Description : Insert specified word elements from input vectors to 1
+                 destination vector
+   Arguments   : Inputs  - in0, in1, in2, in3 (4 input vectors)
+                 Outputs - out                (output vector)
+                 Return Type - as per RTYPE
+*/
+#define INSERT_W2(RTYPE, in0, in1, out)                 \
+{                                                       \
+    out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0);  \
+    out = (RTYPE) __msa_insert_w((v4i32) out, 1, in1);  \
+}
+#define INSERT_W2_UB(...) INSERT_W2(v16u8, __VA_ARGS__)
+#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
+
 #define INSERT_W4(RTYPE, in0, in1, in2, in3, out)       \
 {                                                       \
     out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0);  \
@@ -1060,6 +1369,25 @@
 #define INSERT_D2_SB(...) INSERT_D2(v16i8, __VA_ARGS__)
 #define INSERT_D2_SD(...) INSERT_D2(v2i64, __VA_ARGS__)
 
+/* Description : Interleave even byte elements from vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : Even byte elements of 'in0' and even byte
+                 elements of 'in1' are interleaved and copied to 'out0'
+                 Even byte elements of 'in2' and even byte
+                 elements of 'in3' are interleaved and copied to 'out1'
+*/
+#define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1)      \
+{                                                            \
+    out0 = (RTYPE) __msa_ilvev_b((v16i8) in1, (v16i8) in0);  \
+    out1 = (RTYPE) __msa_ilvev_b((v16i8) in3, (v16i8) in2);  \
+}
+#define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__)
+#define ILVEV_B2_SB(...) ILVEV_B2(v16i8, __VA_ARGS__)
+#define ILVEV_B2_SH(...) ILVEV_B2(v8i16, __VA_ARGS__)
+#define ILVEV_B2_SD(...) ILVEV_B2(v2i64, __VA_ARGS__)
+
 /* Description : Interleave even halfword elements from vectors
    Arguments   : Inputs  - in0, in1, in2, in3
                  Outputs - out0, out1
@@ -1075,6 +1403,8 @@
     out1 = (RTYPE) __msa_ilvev_h((v8i16) in3, (v8i16) in2);  \
 }
 #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)
+#define ILVEV_H2_SH(...) ILVEV_H2(v8i16, __VA_ARGS__)
+#define ILVEV_H2_SW(...) ILVEV_H2(v4i32, __VA_ARGS__)
 
 /* Description : Interleave even word elements from vectors
    Arguments   : Inputs  - in0, in1, in2, in3
@@ -1090,7 +1420,10 @@
     out0 = (RTYPE) __msa_ilvev_w((v4i32) in1, (v4i32) in0);  \
     out1 = (RTYPE) __msa_ilvev_w((v4i32) in3, (v4i32) in2);  \
 }
+#define ILVEV_W2_UB(...) ILVEV_W2(v16u8, __VA_ARGS__)
 #define ILVEV_W2_SB(...) ILVEV_W2(v16i8, __VA_ARGS__)
+#define ILVEV_W2_UH(...) ILVEV_W2(v8u16, __VA_ARGS__)
+#define ILVEV_W2_SD(...) ILVEV_W2(v2i64, __VA_ARGS__)
 
 /* Description : Interleave even double word elements from vectors
    Arguments   : Inputs  - in0, in1, in2, in3
@@ -1107,6 +1440,8 @@
     out1 = (RTYPE) __msa_ilvev_d((v2i64) in3, (v2i64) in2);  \
 }
 #define ILVEV_D2_UB(...) ILVEV_D2(v16u8, __VA_ARGS__)
+#define ILVEV_D2_SB(...) ILVEV_D2(v16i8, __VA_ARGS__)
+#define ILVEV_D2_SW(...) ILVEV_D2(v4i32, __VA_ARGS__)
 
 /* Description : Interleave left half of byte elements from vectors
    Arguments   : Inputs  - in0, in1, in2, in3
@@ -1122,7 +1457,9 @@
     out0 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1);  \
     out1 = (RTYPE) __msa_ilvl_b((v16i8) in2, (v16i8) in3);  \
 }
+#define ILVL_B2_UB(...) ILVL_B2(v16u8, __VA_ARGS__)
 #define ILVL_B2_SB(...) ILVL_B2(v16i8, __VA_ARGS__)
+#define ILVL_B2_UH(...) ILVL_B2(v8u16, __VA_ARGS__)
 #define ILVL_B2_SH(...) ILVL_B2(v8i16, __VA_ARGS__)
 
 #define ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
@@ -1131,6 +1468,7 @@
     ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1);             \
     ILVL_B2(RTYPE, in4, in5, in6, in7, out2, out3);             \
 }
+#define ILVL_B4_UB(...) ILVL_B4(v16u8, __VA_ARGS__)
 #define ILVL_B4_SB(...) ILVL_B4(v16i8, __VA_ARGS__)
 #define ILVL_B4_UH(...) ILVL_B4(v8u16, __VA_ARGS__)
 #define ILVL_B4_SH(...) ILVL_B4(v8i16, __VA_ARGS__)
@@ -1159,6 +1497,7 @@
     ILVL_H2(RTYPE, in4, in5, in6, in7, out2, out3);             \
 }
 #define ILVL_H4_SH(...) ILVL_H4(v8i16, __VA_ARGS__)
+#define ILVL_H4_SW(...) ILVL_H4(v4i32, __VA_ARGS__)
 
 /* Description : Interleave left half of word elements from vectors
    Arguments   : Inputs  - in0, in1, in2, in3
@@ -1174,7 +1513,9 @@
     out0 = (RTYPE) __msa_ilvl_w((v4i32) in0, (v4i32) in1);  \
     out1 = (RTYPE) __msa_ilvl_w((v4i32) in2, (v4i32) in3);  \
 }
+#define ILVL_W2_UB(...) ILVL_W2(v16u8, __VA_ARGS__)
 #define ILVL_W2_SB(...) ILVL_W2(v16i8, __VA_ARGS__)
+#define ILVL_W2_SH(...) ILVL_W2(v8i16, __VA_ARGS__)
 
 /* Description : Interleave right half of byte elements from vectors
    Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
@@ -1212,9 +1553,22 @@
     ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1);             \
     ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3);             \
 }
+#define ILVR_B4_UB(...) ILVR_B4(v16u8, __VA_ARGS__)
 #define ILVR_B4_SB(...) ILVR_B4(v16i8, __VA_ARGS__)
 #define ILVR_B4_UH(...) ILVR_B4(v8u16, __VA_ARGS__)
 #define ILVR_B4_SH(...) ILVR_B4(v8i16, __VA_ARGS__)
+#define ILVR_B4_SW(...) ILVR_B4(v4i32, __VA_ARGS__)
+
+#define ILVR_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,    \
+                in8, in9, in10, in11, in12, in13, in14, in15,     \
+                out0, out1, out2, out3, out4, out5, out6, out7)   \
+{                                                                 \
+    ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,        \
+            out0, out1, out2, out3);                              \
+    ILVR_B4(RTYPE, in8, in9, in10, in11, in12, in13, in14, in15,  \
+            out4, out5, out6, out7);                              \
+}
+#define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__)
 
 /* Description : Interleave right half of halfword elements from vectors
    Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
@@ -1248,6 +1602,7 @@
     ILVR_H2(RTYPE, in4, in5, in6, in7, out2, out3);             \
 }
 #define ILVR_H4_SH(...) ILVR_H4(v8i16, __VA_ARGS__)
+#define ILVR_H4_SW(...) ILVR_H4(v4i32, __VA_ARGS__)
 
 #define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1)      \
 {                                                           \
@@ -1256,6 +1611,7 @@
 }
 #define ILVR_W2_UB(...) ILVR_W2(v16u8, __VA_ARGS__)
 #define ILVR_W2_SB(...) ILVR_W2(v16i8, __VA_ARGS__)
+#define ILVR_W2_SH(...) ILVR_W2(v8i16, __VA_ARGS__)
 
 #define ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,  \
                 out0, out1, out2, out3)                         \
@@ -1264,6 +1620,7 @@
     ILVR_W2(RTYPE, in4, in5, in6, in7, out2, out3);             \
 }
 #define ILVR_W4_SB(...) ILVR_W4(v16i8, __VA_ARGS__)
+#define ILVR_W4_UB(...) ILVR_W4(v16u8, __VA_ARGS__)
 
 /* Description : Interleave right half of double word elements from vectors
    Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
@@ -1279,6 +1636,7 @@
     out0 = (RTYPE) __msa_ilvr_d((v2i64) (in0), (v2i64) (in1));  \
     out1 = (RTYPE) __msa_ilvr_d((v2i64) (in2), (v2i64) (in3));  \
 }
+#define ILVR_D2_UB(...) ILVR_D2(v16u8, __VA_ARGS__)
 #define ILVR_D2_SB(...) ILVR_D2(v16i8, __VA_ARGS__)
 #define ILVR_D2_SH(...) ILVR_D2(v8i16, __VA_ARGS__)
 
@@ -1296,6 +1654,7 @@
     ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3);             \
 }
 #define ILVR_D4_SB(...) ILVR_D4(v16i8, __VA_ARGS__)
+#define ILVR_D4_UB(...) ILVR_D4(v16u8, __VA_ARGS__)
 
 /* Description : Interleave both left and right half of input vectors
    Arguments   : Inputs  - in0, in1
@@ -1311,8 +1670,11 @@
     out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1);  \
     out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1);  \
 }
+#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
 #define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__)
+#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__)
 #define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__)
+#define ILVRL_B2_SW(...) ILVRL_B2(v4i32, __VA_ARGS__)
 
 #define ILVRL_H2(RTYPE, in0, in1, out0, out1)               \
 {                                                           \
@@ -1345,6 +1707,7 @@
     in0 = (RTYPE) __msa_maxi_s_h((v8i16) in0, (max_val));  \
     in1 = (RTYPE) __msa_maxi_s_h((v8i16) in1, (max_val));  \
 }
+#define MAXI_SH2_UH(...) MAXI_SH2(v8u16, __VA_ARGS__)
 #define MAXI_SH2_SH(...) MAXI_SH2(v8i16, __VA_ARGS__)
 
 #define MAXI_SH4(RTYPE, in0, in1, in2, in3, max_val)  \
@@ -1370,6 +1733,7 @@
     in1 = (RTYPE) __msa_sat_u_h((v8u16) in1, sat_val);  \
 }
 #define SAT_UH2_UH(...) SAT_UH2(v8u16, __VA_ARGS__)
+#define SAT_UH2_SH(...) SAT_UH2(v8i16, __VA_ARGS__)
 
 #define SAT_UH4(RTYPE, in0, in1, in2, in3, sat_val)  \
 {                                                    \
@@ -1450,6 +1814,15 @@
 #define SPLATI_H2_SB(...) SPLATI_H2(v16i8, __VA_ARGS__)
 #define SPLATI_H2_SH(...) SPLATI_H2(v8i16, __VA_ARGS__)
 
+#define SPLATI_H3(RTYPE, in, idx0, idx1, idx2,        \
+                  out0, out1, out2)                   \
+{                                                     \
+    SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1);     \
+    out2 = (RTYPE) __msa_splati_h((v8i16) in, idx2);  \
+}
+#define SPLATI_H3_SB(...) SPLATI_H3(v16i8, __VA_ARGS__)
+#define SPLATI_H3_SH(...) SPLATI_H3(v8i16, __VA_ARGS__)
+
 #define SPLATI_H4(RTYPE, in, idx0, idx1, idx2, idx3,  \
                   out0, out1, out2, out3)             \
 {                                                     \
@@ -1582,6 +1955,24 @@
 }
 #define PCKEV_D4_UB(...) PCKEV_D4(v16u8, __VA_ARGS__)
 
+/* Description : Pack odd double word elements of vector pairs
+   Arguments   : Inputs  - in0, in1
+                 Outputs - out0, out1
+                 Return Type - as per RTYPE
+   Details     : As operation is on same input 'in0' vector, index 1 double word
+                 element is overwritten to index 0 and result is written to out0
+                 As operation is on same input 'in1' vector, index 1 double word
+                 element is overwritten to index 0 and result is written to out1
+*/
+#define PCKOD_D2(RTYPE, in0, in1, in2, in3, out0, out1)      \
+{                                                            \
+    out0 = (RTYPE) __msa_pckod_d((v2i64) in0, (v2i64) in1);  \
+    out1 = (RTYPE) __msa_pckod_d((v2i64) in2, (v2i64) in3);  \
+}
+#define PCKOD_D2_UB(...) PCKOD_D2(v16u8, __VA_ARGS__)
+#define PCKOD_D2_SH(...) PCKOD_D2(v8i16, __VA_ARGS__)
+#define PCKOD_D2_SD(...) PCKOD_D2(v2i64, __VA_ARGS__)
+
 /* Description : Each byte element is logically xor'ed with immediate 128
    Arguments   : Inputs  - in0, in1
                  Outputs - in0, in1 (in-place)
@@ -1870,6 +2261,60 @@
     ADD2(in4, in5, in6, in7, out2, out3);                                     \
 }
 
+/* Description : Subtraction of 2 pairs of vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1
+   Details     : Each element from 2 pairs vectors is subtracted and 2 results
+                 are produced
+*/
+#define SUB2(in0, in1, in2, in3, out0, out1)  \
+{                                             \
+    out0 = in0 - in1;                         \
+    out1 = in2 - in3;                         \
+}
+#define SUB4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3)  \
+{                                                                             \
+    out0 = in0 - in1;                                                         \
+    out1 = in2 - in3;                                                         \
+    out2 = in4 - in5;                                                         \
+    out3 = in6 - in7;                                                         \
+}
+
+/* Description : Sign extend halfword elements from right half of the vector
+   Arguments   : Inputs  - in    (input halfword vector)
+                 Outputs - out   (sign extended word vectors)
+                 Return Type - signed word
+   Details     : Sign bit of halfword elements from input vector 'in' is
+                 extracted and interleaved with same vector 'in0' to generate
+                 4 word elements keeping sign intact
+*/
+#define UNPCK_R_SH_SW(in, out)                       \
+{                                                    \
+    v8i16 sign_m;                                    \
+                                                     \
+    sign_m = __msa_clti_s_h((v8i16) in, 0);          \
+    out = (v4i32) __msa_ilvr_h(sign_m, (v8i16) in);  \
+}
+
+/* Description : Sign extend byte elements from input vector and return
+                 halfword results in pair of vectors
+   Arguments   : Inputs  - in           (1 input byte vector)
+                 Outputs - out0, out1   (sign extended 2 halfword vectors)
+                 Return Type - signed halfword
+   Details     : Sign bit of byte elements from input vector 'in' is
+                 extracted and interleaved right with same vector 'in0' to
+                 generate 8 signed halfword elements in 'out0'
+                 Then interleaved left with same vector 'in0' to
+                 generate 8 signed halfword elements in 'out1'
+*/
+#define UNPCK_SB_SH(in, out0, out1)                  \
+{                                                    \
+    v16i8 tmp_m;                                     \
+                                                     \
+    tmp_m = __msa_clti_s_b((v16i8) in, 0);           \
+    ILVRL_B2_SH(tmp_m, in, out0, out1);              \
+}
+
 /* Description : Zero extend unsigned byte elements to halfword elements
    Arguments   : Inputs  - in           (1 input unsigned byte vector)
                  Outputs - out0, out1   (unsigned 2 halfword vectors)
@@ -1903,6 +2348,18 @@
     ILVRL_H2_SW(tmp_m, in, out0, out1);              \
 }
 
+/* Description : Swap two variables
+   Arguments   : Inputs  - in0, in1
+                 Outputs - in0, in1 (in-place)
+   Details     : Swapping of two input variables using xor
+*/
+#define SWAP(in0, in1)  \
+{                       \
+    in0 = in0 ^ in1;    \
+    in1 = in0 ^ in1;    \
+    in0 = in0 ^ in1;    \
+}
+
 /* Description : Butterfly of 4 input vectors
    Arguments   : Inputs  - in0, in1, in2, in3
                  Outputs - out0, out1, out2, out3
@@ -1917,6 +2374,54 @@
     out3 = in0 - in3;                                            \
 }
 
+/* Description : Butterfly of 8 input vectors
+   Arguments   : Inputs  - in0 ...  in7
+                 Outputs - out0 .. out7
+   Details     : Butterfly operation
+*/
+#define BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7,          \
+                    out0, out1, out2, out3, out4, out5, out6, out7)  \
+{                                                                    \
+    out0 = in0 + in7;                                                \
+    out1 = in1 + in6;                                                \
+    out2 = in2 + in5;                                                \
+    out3 = in3 + in4;                                                \
+                                                                     \
+    out4 = in3 - in4;                                                \
+    out5 = in2 - in5;                                                \
+    out6 = in1 - in6;                                                \
+    out7 = in0 - in7;                                                \
+}
+
+/* Description : Butterfly of 16 input vectors
+   Arguments   : Inputs  - in0 ...  in15
+                 Outputs - out0 .. out15
+   Details     : Butterfly operation
+*/
+#define BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,                \
+                     in8, in9,  in10, in11, in12, in13, in14, in15,         \
+                     out0, out1, out2, out3, out4, out5, out6, out7,        \
+                     out8, out9, out10, out11, out12, out13, out14, out15)  \
+{                                                                           \
+    out0 = in0 + in15;                                                      \
+    out1 = in1 + in14;                                                      \
+    out2 = in2 + in13;                                                      \
+    out3 = in3 + in12;                                                      \
+    out4 = in4 + in11;                                                      \
+    out5 = in5 + in10;                                                      \
+    out6 = in6 + in9;                                                       \
+    out7 = in7 + in8;                                                       \
+                                                                            \
+    out8 = in7 - in8;                                                       \
+    out9 = in6 - in9;                                                       \
+    out10 = in5 - in10;                                                     \
+    out11 = in4 - in11;                                                     \
+    out12 = in3 - in12;                                                     \
+    out13 = in2 - in13;                                                     \
+    out14 = in1 - in14;                                                     \
+    out15 = in0 - in15;                                                     \
+}
+
 /* Description : Transposes input 4x4 byte block
    Arguments   : Inputs  - in0, in1, in2, in3      (input 4x4 byte block)
                  Outputs - out0, out1, out2, out3  (output 4x4 byte block)
@@ -1959,8 +2464,69 @@
     out1 = (RTYPE) __msa_ilvl_d((v2i64) out2, (v2i64) out0);            \
     out3 = (RTYPE) __msa_ilvl_d((v2i64) out0, (v2i64) out2);            \
 }
-
 #define TRANSPOSE8x4_UB_UB(...) TRANSPOSE8x4_UB(v16u8, __VA_ARGS__)
+#define TRANSPOSE8x4_UB_UH(...) TRANSPOSE8x4_UB(v8u16, __VA_ARGS__)
+
+/* Description : Transposes input 8x8 byte block
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
+                           (input 8x8 byte block)
+                 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+                           (output 8x8 byte block)
+                 Return Type - unsigned byte
+   Details     :
+*/
+#define TRANSPOSE8x8_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7,   \
+                        out0, out1, out2, out3, out4, out5, out6, out7)  \
+{                                                                        \
+    v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                \
+    v16i8 tmp4_m, tmp5_m, tmp6_m, tmp7_m;                                \
+                                                                         \
+    ILVR_B4_SB(in2, in0, in3, in1, in6, in4, in7, in5,                   \
+               tmp0_m, tmp1_m, tmp2_m, tmp3_m);                          \
+    ILVRL_B2_SB(tmp1_m, tmp0_m, tmp4_m, tmp5_m);                         \
+    ILVRL_B2_SB(tmp3_m, tmp2_m, tmp6_m, tmp7_m);                         \
+    ILVRL_W2(RTYPE, tmp6_m, tmp4_m, out0, out2);                         \
+    ILVRL_W2(RTYPE, tmp7_m, tmp5_m, out4, out6);                         \
+    SLDI_B2_0(RTYPE, out0, out2, out1, out3, 8);                         \
+    SLDI_B2_0(RTYPE, out4, out6, out5, out7, 8);                         \
+}
+#define TRANSPOSE8x8_UB_UB(...) TRANSPOSE8x8_UB(v16u8, __VA_ARGS__)
+#define TRANSPOSE8x8_UB_UH(...) TRANSPOSE8x8_UB(v8u16, __VA_ARGS__)
+
+/* Description : Transposes 16x4 block into 4x16 with byte elements in vectors
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7,
+                           in8, in9, in10, in11, in12, in13, in14, in15
+                 Outputs - out0, out1, out2, out3
+                 Return Type - unsigned byte
+   Details     :
+*/
+#define TRANSPOSE16x4_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7,        \
+                            in8, in9, in10, in11, in12, in13, in14, in15,  \
+                            out0, out1, out2, out3)                        \
+{                                                                          \
+    v2i64 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                  \
+                                                                           \
+    ILVEV_W2_SD(in0, in4, in8, in12, tmp0_m, tmp1_m);                      \
+    out1 = (v16u8) __msa_ilvev_d(tmp1_m, tmp0_m);                          \
+                                                                           \
+    ILVEV_W2_SD(in1, in5, in9, in13, tmp0_m, tmp1_m);                      \
+    out3 = (v16u8) __msa_ilvev_d(tmp1_m, tmp0_m);                          \
+                                                                           \
+    ILVEV_W2_SD(in2, in6, in10, in14, tmp0_m, tmp1_m);                     \
+                                                                           \
+    tmp2_m = __msa_ilvev_d(tmp1_m, tmp0_m);                                \
+    ILVEV_W2_SD(in3, in7, in11, in15, tmp0_m, tmp1_m);                     \
+                                                                           \
+    tmp3_m = __msa_ilvev_d(tmp1_m, tmp0_m);                                \
+    ILVEV_B2_SD(out1, out3, tmp2_m, tmp3_m, tmp0_m, tmp1_m);               \
+    out0 = (v16u8) __msa_ilvev_h((v8i16) tmp1_m, (v8i16) tmp0_m);          \
+    out2 = (v16u8) __msa_ilvod_h((v8i16) tmp1_m, (v8i16) tmp0_m);          \
+                                                                           \
+    tmp0_m = (v2i64) __msa_ilvod_b((v16i8) out3, (v16i8) out1);            \
+    tmp1_m = (v2i64) __msa_ilvod_b((v16i8) tmp3_m, (v16i8) tmp2_m);        \
+    out1 = (v16u8) __msa_ilvev_h((v8i16) tmp1_m, (v8i16) tmp0_m);          \
+    out3 = (v16u8) __msa_ilvod_h((v8i16) tmp1_m, (v8i16) tmp0_m);          \
+}
 
 /* Description : Transposes 16x8 block into 8x16 with byte elements in vectors
    Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7,
@@ -2011,6 +2577,22 @@
     out7 = (v16u8) __msa_ilvod_w((v4i32) tmp3_m, (v4i32) tmp2_m);            \
 }
 
+/* Description : Transposes 4x4 block with half word elements in vectors
+   Arguments   : Inputs  - in0, in1, in2, in3
+                 Outputs - out0, out1, out2, out3
+                 Return Type - signed halfword
+   Details     :
+*/
+#define TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3)  \
+{                                                                       \
+    v8i16 s0_m, s1_m;                                                   \
+                                                                        \
+    ILVR_H2_SH(in1, in0, in3, in2, s0_m, s1_m);                         \
+    ILVRL_W2_SH(s1_m, s0_m, out0, out2);                                \
+    out1 = (v8i16) __msa_ilvl_d((v2i64) out0, (v2i64) out0);            \
+    out3 = (v8i16) __msa_ilvl_d((v2i64) out0, (v2i64) out2);            \
+}
+
 /* Description : Transposes 8x8 block with half word elements in vectors
    Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
                  Outputs - out0, out1, out2, out3, out4, out5, out6, out7
@@ -2061,6 +2643,239 @@
     out3 = (v4i32) __msa_ilvl_d((v2i64) s3_m, (v2i64) s1_m);            \
 }
 
+/* Description : Average byte elements from pair of vectors and store 8x4 byte
+                 block in destination memory
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
+                 Outputs -
+                 Return Type -
+   Details     : Each byte element from input vector pair 'in0' and 'in1' are
+                 averaged (a + b)/2 and stored in 'tmp0_m'
+                 Each byte element from input vector pair 'in2' and 'in3' are
+                 averaged (a + b)/2 and stored in 'tmp1_m'
+                 Each byte element from input vector pair 'in4' and 'in5' are
+                 averaged (a + b)/2 and stored in 'tmp2_m'
+                 Each byte element from input vector pair 'in6' and 'in7' are
+                 averaged (a + b)/2 and stored in 'tmp3_m'
+                 The half vector results from all 4 vectors are stored in
+                 destination memory as 8x4 byte block
+*/
+#define AVE_ST8x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride)  \
+{                                                                           \
+    uint64_t out0_m, out1_m, out2_m, out3_m;                                \
+    v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                   \
+                                                                            \
+    tmp0_m = __msa_ave_u_b((v16u8) in0, (v16u8) in1);                       \
+    tmp1_m = __msa_ave_u_b((v16u8) in2, (v16u8) in3);                       \
+    tmp2_m = __msa_ave_u_b((v16u8) in4, (v16u8) in5);                       \
+    tmp3_m = __msa_ave_u_b((v16u8) in6, (v16u8) in7);                       \
+                                                                            \
+    out0_m = __msa_copy_u_d((v2i64) tmp0_m, 0);                             \
+    out1_m = __msa_copy_u_d((v2i64) tmp1_m, 0);                             \
+    out2_m = __msa_copy_u_d((v2i64) tmp2_m, 0);                             \
+    out3_m = __msa_copy_u_d((v2i64) tmp3_m, 0);                             \
+    SD4(out0_m, out1_m, out2_m, out3_m, pdst, stride);                      \
+}
+
+/* Description : Average byte elements from pair of vectors and store 16x4 byte
+                 block in destination memory
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
+                 Outputs -
+                 Return Type -
+   Details     : Each byte element from input vector pair 'in0' and 'in1' are
+                 averaged (a + b)/2 and stored in 'tmp0_m'
+                 Each byte element from input vector pair 'in2' and 'in3' are
+                 averaged (a + b)/2 and stored in 'tmp1_m'
+                 Each byte element from input vector pair 'in4' and 'in5' are
+                 averaged (a + b)/2 and stored in 'tmp2_m'
+                 Each byte element from input vector pair 'in6' and 'in7' are
+                 averaged (a + b)/2 and stored in 'tmp3_m'
+                 The results from all 4 vectors are stored in destination
+                 memory as 16x4 byte block
+*/
+#define AVE_ST16x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride)  \
+{                                                                            \
+    v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                    \
+                                                                             \
+    tmp0_m = __msa_ave_u_b((v16u8) in0, (v16u8) in1);                        \
+    tmp1_m = __msa_ave_u_b((v16u8) in2, (v16u8) in3);                        \
+    tmp2_m = __msa_ave_u_b((v16u8) in4, (v16u8) in5);                        \
+    tmp3_m = __msa_ave_u_b((v16u8) in6, (v16u8) in7);                        \
+                                                                             \
+    ST_UB4(tmp0_m, tmp1_m, tmp2_m, tmp3_m, pdst, stride);                    \
+}
+
+/* Description : Average rounded byte elements from pair of vectors and store
+                 8x4 byte block in destination memory
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
+                 Outputs -
+                 Return Type -
+   Details     : Each byte element from input vector pair 'in0' and 'in1' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp0_m'
+                 Each byte element from input vector pair 'in2' and 'in3' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp1_m'
+                 Each byte element from input vector pair 'in4' and 'in5' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp2_m'
+                 Each byte element from input vector pair 'in6' and 'in7' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp3_m'
+                 The half vector results from all 4 vectors are stored in
+                 destination memory as 8x4 byte block
+*/
+#define AVER_ST8x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride)  \
+{                                                                            \
+    uint64_t out0_m, out1_m, out2_m, out3_m;                                 \
+    v16u8 tp0_m, tp1_m, tp2_m, tp3_m;                                        \
+                                                                             \
+    AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7,                      \
+                tp0_m, tp1_m, tp2_m, tp3_m);                                 \
+                                                                             \
+    out0_m = __msa_copy_u_d((v2i64) tp0_m, 0);                               \
+    out1_m = __msa_copy_u_d((v2i64) tp1_m, 0);                               \
+    out2_m = __msa_copy_u_d((v2i64) tp2_m, 0);                               \
+    out3_m = __msa_copy_u_d((v2i64) tp3_m, 0);                               \
+    SD4(out0_m, out1_m, out2_m, out3_m, pdst, stride);                       \
+}
+
+/* Description : Average rounded byte elements from pair of vectors and store
+                 16x4 byte block in destination memory
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
+                 Outputs -
+                 Return Type -
+   Details     : Each byte element from input vector pair 'in0' and 'in1' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp0_m'
+                 Each byte element from input vector pair 'in2' and 'in3' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp1_m'
+                 Each byte element from input vector pair 'in4' and 'in5' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp2_m'
+                 Each byte element from input vector pair 'in6' and 'in7' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp3_m'
+                 The vector results from all 4 vectors are stored in
+                 destination memory as 16x4 byte block
+*/
+#define AVER_ST16x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride)  \
+{                                                                             \
+    v16u8 t0_m, t1_m, t2_m, t3_m;                                             \
+                                                                              \
+    AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7,                       \
+                t0_m, t1_m, t2_m, t3_m);                                      \
+    ST_UB4(t0_m, t1_m, t2_m, t3_m, pdst, stride);                             \
+}
+
+/* Description : Average rounded byte elements from pair of vectors,
+                 average rounded with destination and store 8x4 byte block
+                 in destination memory
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
+                 Outputs -
+                 Return Type -
+   Details     : Each byte element from input vector pair 'in0' and 'in1' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp0_m'
+                 Each byte element from input vector pair 'in2' and 'in3' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp1_m'
+                 Each byte element from input vector pair 'in4' and 'in5' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp2_m'
+                 Each byte element from input vector pair 'in6' and 'in7' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp3_m'
+                 The half vector results from all 4 vectors are stored in
+                 destination memory as 8x4 byte block
+*/
+#define AVER_DST_ST8x4_UB(in0, in1, in2, in3, in4, in5, in6, in7,  \
+                          pdst, stride)                            \
+{                                                                  \
+    v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                          \
+    v16u8 dst0_m, dst1_m, dst2_m, dst3_m;                          \
+                                                                   \
+    LD_UB4(pdst, stride, dst0_m, dst1_m, dst2_m, dst3_m);          \
+    AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7,            \
+                tmp0_m, tmp1_m, tmp2_m, tmp3_m);                   \
+    AVER_ST8x4_UB(dst0_m, tmp0_m, dst1_m, tmp1_m,                  \
+                  dst2_m, tmp2_m, dst3_m, tmp3_m, pdst, stride);   \
+}
+
+/* Description : Average rounded byte elements from pair of vectors,
+                 average rounded with destination and store 16x4 byte block
+                 in destination memory
+   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
+                 Outputs -
+                 Return Type -
+   Details     : Each byte element from input vector pair 'in0' and 'in1' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp0_m'
+                 Each byte element from input vector pair 'in2' and 'in3' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp1_m'
+                 Each byte element from input vector pair 'in4' and 'in5' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp2_m'
+                 Each byte element from input vector pair 'in6' and 'in7' are
+                 average rounded (a + b + 1)/2 and stored in 'tmp3_m'
+                 The vector results from all 4 vectors are stored in
+                 destination memory as 16x4 byte block
+*/
+#define AVER_DST_ST16x4_UB(in0, in1, in2, in3, in4, in5, in6, in7,  \
+                           pdst, stride)                            \
+{                                                                   \
+    v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                           \
+    v16u8 dst0_m, dst1_m, dst2_m, dst3_m;                           \
+                                                                    \
+    LD_UB4(pdst, stride, dst0_m, dst1_m, dst2_m, dst3_m);           \
+    AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7,             \
+                tmp0_m, tmp1_m, tmp2_m, tmp3_m);                    \
+    AVER_ST16x4_UB(dst0_m, tmp0_m, dst1_m, tmp1_m,                  \
+                   dst2_m, tmp2_m, dst3_m, tmp3_m, pdst, stride);   \
+}
+
+/* Description : Add block 4x4
+   Arguments   : Inputs  - in0, in1, in2, in3, pdst, stride
+                 Outputs -
+                 Return Type - unsigned bytes
+   Details     : Least significant 4 bytes from each input vector are added to
+                 the destination bytes, clipped between 0-255 and then stored.
+*/
+#define ADDBLK_ST4x4_UB(in0, in1, in2, in3, pdst, stride)         \
+{                                                                 \
+    uint32_t src0_m, src1_m, src2_m, src3_m;                      \
+    uint32_t out0_m, out1_m, out2_m, out3_m;                      \
+    v8i16 inp0_m, inp1_m, res0_m, res1_m;                         \
+    v16i8 dst0_m = { 0 };                                         \
+    v16i8 dst1_m = { 0 };                                         \
+    v16i8 zero_m = { 0 };                                         \
+                                                                  \
+    ILVR_D2_SH(in1, in0, in3, in2, inp0_m, inp1_m)                \
+    LW4(pdst, stride,  src0_m, src1_m, src2_m, src3_m);           \
+    INSERT_W2_SB(src0_m, src1_m, dst0_m);                         \
+    INSERT_W2_SB(src2_m, src3_m, dst1_m);                         \
+    ILVR_B2_SH(zero_m, dst0_m, zero_m, dst1_m, res0_m, res1_m);   \
+    ADD2(res0_m, inp0_m, res1_m, inp1_m, res0_m, res1_m);         \
+    CLIP_SH2_0_255(res0_m, res1_m);                               \
+    PCKEV_B2_SB(res0_m, res0_m, res1_m, res1_m, dst0_m, dst1_m);  \
+                                                                  \
+    out0_m = __msa_copy_u_w((v4i32) dst0_m, 0);                   \
+    out1_m = __msa_copy_u_w((v4i32) dst0_m, 1);                   \
+    out2_m = __msa_copy_u_w((v4i32) dst1_m, 0);                   \
+    out3_m = __msa_copy_u_w((v4i32) dst1_m, 1);                   \
+    SW4(out0_m, out1_m, out2_m, out3_m, pdst, stride);            \
+}
+
+/* Description : Dot product and addition of 3 signed halfword input vectors
+   Arguments   : Inputs  - in0, in1, in2, coeff0, coeff1, coeff2
+                 Outputs - out0_m
+                 Return Type - signed halfword
+   Details     : Dot product of 'in0' with 'coeff0'
+                 Dot product of 'in1' with 'coeff1'
+                 Dot product of 'in2' with 'coeff2'
+                 Addition of all the 3 vector results
+
+                 out0_m = (in0 * coeff0) + (in1 * coeff1) + (in2 * coeff2)
+*/
+#define DPADD_SH3_SH(in0, in1, in2, coeff0, coeff1, coeff2)         \
+( {                                                                 \
+    v8i16 tmp1_m;                                                   \
+    v8i16 out0_m;                                                   \
+                                                                    \
+    out0_m = __msa_dotp_s_h((v16i8) in0, (v16i8) coeff0);           \
+    out0_m = __msa_dpadd_s_h(out0_m, (v16i8) in1, (v16i8) coeff1);  \
+    tmp1_m = __msa_dotp_s_h((v16i8) in2, (v16i8) coeff2);           \
+    out0_m = __msa_adds_s_h(out0_m, tmp1_m);                        \
+                                                                    \
+    out0_m;                                                         \
+} )
+
 /* Description : Pack even elements of input vectors & xor with 128
    Arguments   : Inputs  - in0, in1
                  Outputs - out_m
@@ -2077,6 +2892,24 @@
     out_m;                                                    \
 } )
 
+/* Description : Converts inputs to unsigned bytes, interleave, average & store
+                 as 8x4 unsigned byte block
+   Arguments   : Inputs  - in0, in1, in2, in3, dst0, dst1, dst2, dst3,
+                           pdst, stride
+*/
+#define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3,                    \
+                                dst0, dst1, dst2, dst3, pdst, stride)  \
+{                                                                      \
+    v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                              \
+    uint8_t *pdst_m = (uint8_t *) (pdst);                              \
+                                                                       \
+    tmp0_m = PCKEV_XORI128_UB(in0, in1);                               \
+    tmp1_m = PCKEV_XORI128_UB(in2, in3);                               \
+    ILVR_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m);                \
+    AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m);       \
+    ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride);                          \
+}
+
 /* Description : Pack even byte elements, extract 0 & 2 index words from pair
                  of results and store 4 words in destination memory as per
                  stride
@@ -2096,4 +2929,31 @@
                                                           \
     SW4(out0_m, out1_m, out2_m, out3_m, pdst, stride);    \
 }
+
+/* Description : Pack even byte elements and store byte vector in destination
+                 memory
+   Arguments   : Inputs  - in0, in1, pdst
+*/
+#define PCKEV_ST_SB(in0, in1, pdst)                   \
+{                                                     \
+    v16i8 tmp_m;                                      \
+    tmp_m = __msa_pckev_b((v16i8) in1, (v16i8) in0);  \
+    ST_SB(tmp_m, (pdst));                             \
+}
+
+/* Description : Horizontal 2 tap filter kernel code
+   Arguments   : Inputs  - in0, in1, mask, coeff, shift
+*/
+#define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift)            \
+( {                                                                 \
+    v16i8 tmp0_m;                                                   \
+    v8u16 tmp1_m;                                                   \
+                                                                    \
+    tmp0_m = __msa_vshf_b((v16i8) mask, (v16i8) in1, (v16i8) in0);  \
+    tmp1_m = __msa_dotp_u_h((v16u8) tmp0_m, (v16u8) coeff);         \
+    tmp1_m = (v8u16) __msa_srari_h((v8i16) tmp1_m, shift);          \
+    tmp1_m = __msa_sat_u_h(tmp1_m, shift);                          \
+                                                                    \
+    tmp1_m;                                                         \
+} )
 #endif  /* AVUTIL_MIPS_GENERIC_MACROS_MSA_H */
diff --git a/libavutil/mips/libm_mips.h b/libavutil/mips/libm_mips.h
index 8853bbc7..757867bc 100644
--- a/libavutil/mips/libm_mips.h
+++ b/libavutil/mips/libm_mips.h
@@ -50,8 +50,8 @@
  * MIPS optimization for some libm functions
  */
 
-#ifndef AVUTIL_LIBM_MIPS_H
-#define AVUTIL_LIBM_MIPS_H
+#ifndef AVUTIL_MIPS_LIBM_MIPS_H
+#define AVUTIL_MIPS_LIBM_MIPS_H
 
 static av_always_inline av_const long int lrintf_mips(float x)
 {
@@ -70,4 +70,4 @@ static av_always_inline av_const long int lrintf_mips(float x)
 #define lrintf(x)   lrintf_mips(x)
 
 #define HAVE_LRINTF 1
-#endif /* AVUTIL_LIBM_MIPS_H */
+#endif /* AVUTIL_MIPS_LIBM_MIPS_H */
diff --git a/libavutil/motion_vector.h b/libavutil/motion_vector.h
index 30cfb994..ec295563 100644
--- a/libavutil/motion_vector.h
+++ b/libavutil/motion_vector.h
@@ -45,6 +45,13 @@ typedef struct AVMotionVector {
      * Currently unused.
      */
     uint64_t flags;
+    /**
+     * Motion vector
+     * src_x = dst_x + motion_x / motion_scale
+     * src_y = dst_y + motion_y / motion_scale
+     */
+    int32_t motion_x, motion_y;
+    uint16_t motion_scale;
 } AVMotionVector;
 
 #endif /* AVUTIL_MOTION_VECTOR_H */
diff --git a/libavutil/old_pix_fmts.h b/libavutil/old_pix_fmts.h
deleted file mode 100644
index cd1ed7c1..00000000
--- a/libavutil/old_pix_fmts.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * copyright (c) 2006-2012 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVUTIL_OLD_PIX_FMTS_H
-#define AVUTIL_OLD_PIX_FMTS_H
-
-/*
- * This header exists to prevent new pixel formats from being accidentally added
- * to the deprecated list.
- * Do not include it directly. It will be removed on next major bump
- *
- * Do not add new items to this list. Use the AVPixelFormat enum instead.
- */
-    PIX_FMT_NONE = AV_PIX_FMT_NONE,
-    PIX_FMT_YUV420P,   ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
-    PIX_FMT_YUYV422,   ///< packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
-    PIX_FMT_RGB24,     ///< packed RGB 8:8:8, 24bpp, RGBRGB...
-    PIX_FMT_BGR24,     ///< packed RGB 8:8:8, 24bpp, BGRBGR...
-    PIX_FMT_YUV422P,   ///< planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
-    PIX_FMT_YUV444P,   ///< planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
-    PIX_FMT_YUV410P,   ///< planar YUV 4:1:0,  9bpp, (1 Cr & Cb sample per 4x4 Y samples)
-    PIX_FMT_YUV411P,   ///< planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
-    PIX_FMT_GRAY8,     ///<        Y        ,  8bpp
-    PIX_FMT_MONOWHITE, ///<        Y        ,  1bpp, 0 is white, 1 is black, in each byte pixels are ordered from the msb to the lsb
-    PIX_FMT_MONOBLACK, ///<        Y        ,  1bpp, 0 is black, 1 is white, in each byte pixels are ordered from the msb to the lsb
-    PIX_FMT_PAL8,      ///< 8 bit with PIX_FMT_RGB32 palette
-    PIX_FMT_YUVJ420P,  ///< planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of PIX_FMT_YUV420P and setting color_range
-    PIX_FMT_YUVJ422P,  ///< planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of PIX_FMT_YUV422P and setting color_range
-    PIX_FMT_YUVJ444P,  ///< planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of PIX_FMT_YUV444P and setting color_range
-#if FF_API_XVMC
-    PIX_FMT_XVMC_MPEG2_MC,///< XVideo Motion Acceleration via common packet passing
-    PIX_FMT_XVMC_MPEG2_IDCT,
-#endif /* FF_API_XVMC */
-    PIX_FMT_UYVY422,   ///< packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1
-    PIX_FMT_UYYVYY411, ///< packed YUV 4:1:1, 12bpp, Cb Y0 Y1 Cr Y2 Y3
-    PIX_FMT_BGR8,      ///< packed RGB 3:3:2,  8bpp, (msb)2B 3G 3R(lsb)
-    PIX_FMT_BGR4,      ///< packed RGB 1:2:1 bitstream,  4bpp, (msb)1B 2G 1R(lsb), a byte contains two pixels, the first pixel in the byte is the one composed by the 4 msb bits
-    PIX_FMT_BGR4_BYTE, ///< packed RGB 1:2:1,  8bpp, (msb)1B 2G 1R(lsb)
-    PIX_FMT_RGB8,      ///< packed RGB 3:3:2,  8bpp, (msb)2R 3G 3B(lsb)
-    PIX_FMT_RGB4,      ///< packed RGB 1:2:1 bitstream,  4bpp, (msb)1R 2G 1B(lsb), a byte contains two pixels, the first pixel in the byte is the one composed by the 4 msb bits
-    PIX_FMT_RGB4_BYTE, ///< packed RGB 1:2:1,  8bpp, (msb)1R 2G 1B(lsb)
-    PIX_FMT_NV12,      ///< planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (first byte U and the following byte V)
-    PIX_FMT_NV21,      ///< as above, but U and V bytes are swapped
-
-    PIX_FMT_ARGB,      ///< packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
-    PIX_FMT_RGBA,      ///< packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
-    PIX_FMT_ABGR,      ///< packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
-    PIX_FMT_BGRA,      ///< packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
-
-    PIX_FMT_GRAY16BE,  ///<        Y        , 16bpp, big-endian
-    PIX_FMT_GRAY16LE,  ///<        Y        , 16bpp, little-endian
-    PIX_FMT_YUV440P,   ///< planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
-    PIX_FMT_YUVJ440P,  ///< planar YUV 4:4:0 full scale (JPEG), deprecated in favor of PIX_FMT_YUV440P and setting color_range
-    PIX_FMT_YUVA420P,  ///< planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
-#if FF_API_VDPAU
-    PIX_FMT_VDPAU_H264,///< H.264 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-    PIX_FMT_VDPAU_MPEG1,///< MPEG-1 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-    PIX_FMT_VDPAU_MPEG2,///< MPEG-2 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-    PIX_FMT_VDPAU_WMV3,///< WMV3 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-    PIX_FMT_VDPAU_VC1, ///< VC-1 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-#endif
-    PIX_FMT_RGB48BE,   ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as big-endian
-    PIX_FMT_RGB48LE,   ///< packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as little-endian
-
-    PIX_FMT_RGB565BE,  ///< packed RGB 5:6:5, 16bpp, (msb)   5R 6G 5B(lsb), big-endian
-    PIX_FMT_RGB565LE,  ///< packed RGB 5:6:5, 16bpp, (msb)   5R 6G 5B(lsb), little-endian
-    PIX_FMT_RGB555BE,  ///< packed RGB 5:5:5, 16bpp, (msb)1A 5R 5G 5B(lsb), big-endian, most significant bit to 0
-    PIX_FMT_RGB555LE,  ///< packed RGB 5:5:5, 16bpp, (msb)1A 5R 5G 5B(lsb), little-endian, most significant bit to 0
-
-    PIX_FMT_BGR565BE,  ///< packed BGR 5:6:5, 16bpp, (msb)   5B 6G 5R(lsb), big-endian
-    PIX_FMT_BGR565LE,  ///< packed BGR 5:6:5, 16bpp, (msb)   5B 6G 5R(lsb), little-endian
-    PIX_FMT_BGR555BE,  ///< packed BGR 5:5:5, 16bpp, (msb)1A 5B 5G 5R(lsb), big-endian, most significant bit to 1
-    PIX_FMT_BGR555LE,  ///< packed BGR 5:5:5, 16bpp, (msb)1A 5B 5G 5R(lsb), little-endian, most significant bit to 1
-
-    PIX_FMT_VAAPI_MOCO, ///< HW acceleration through VA API at motion compensation entry-point, Picture.data[3] contains a vaapi_render_state struct which contains macroblocks as well as various fields extracted from headers
-    PIX_FMT_VAAPI_IDCT, ///< HW acceleration through VA API at IDCT entry-point, Picture.data[3] contains a vaapi_render_state struct which contains fields extracted from headers
-    PIX_FMT_VAAPI_VLD,  ///< HW decoding through VA API, Picture.data[3] contains a vaapi_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-
-    PIX_FMT_YUV420P16LE,  ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
-    PIX_FMT_YUV420P16BE,  ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
-    PIX_FMT_YUV422P16LE,  ///< planar YUV 4:2:2, 32bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
-    PIX_FMT_YUV422P16BE,  ///< planar YUV 4:2:2, 32bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
-    PIX_FMT_YUV444P16LE,  ///< planar YUV 4:4:4, 48bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
-    PIX_FMT_YUV444P16BE,  ///< planar YUV 4:4:4, 48bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
-#if FF_API_VDPAU
-    PIX_FMT_VDPAU_MPEG4,  ///< MPEG4 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
-#endif
-    PIX_FMT_DXVA2_VLD,    ///< HW decoding through DXVA2, Picture.data[3] contains a LPDIRECT3DSURFACE9 pointer
-
-    PIX_FMT_RGB444LE,  ///< packed RGB 4:4:4, 16bpp, (msb)4A 4R 4G 4B(lsb), little-endian, most significant bits to 0
-    PIX_FMT_RGB444BE,  ///< packed RGB 4:4:4, 16bpp, (msb)4A 4R 4G 4B(lsb), big-endian, most significant bits to 0
-    PIX_FMT_BGR444LE,  ///< packed BGR 4:4:4, 16bpp, (msb)4A 4B 4G 4R(lsb), little-endian, most significant bits to 1
-    PIX_FMT_BGR444BE,  ///< packed BGR 4:4:4, 16bpp, (msb)4A 4B 4G 4R(lsb), big-endian, most significant bits to 1
-    PIX_FMT_GRAY8A,    ///< 8bit gray, 8bit alpha
-    PIX_FMT_BGR48BE,   ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as big-endian
-    PIX_FMT_BGR48LE,   ///< packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as little-endian
-
-    //the following 10 formats have the disadvantage of needing 1 format for each bit depth, thus
-    //If you want to support multiple bit depths, then using PIX_FMT_YUV420P16* with the bpp stored separately
-    //is better
-    PIX_FMT_YUV420P9BE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
-    PIX_FMT_YUV420P9LE, ///< planar YUV 4:2:0, 13.5bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
-    PIX_FMT_YUV420P10BE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
-    PIX_FMT_YUV420P10LE,///< planar YUV 4:2:0, 15bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
-    PIX_FMT_YUV422P10BE,///< planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
-    PIX_FMT_YUV422P10LE,///< planar YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
-    PIX_FMT_YUV444P9BE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
-    PIX_FMT_YUV444P9LE, ///< planar YUV 4:4:4, 27bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
-    PIX_FMT_YUV444P10BE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
-    PIX_FMT_YUV444P10LE,///< planar YUV 4:4:4, 30bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
-    PIX_FMT_YUV422P9BE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
-    PIX_FMT_YUV422P9LE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
-    PIX_FMT_VDA_VLD,    ///< hardware decoding through VDA
-
-#ifdef AV_PIX_FMT_ABI_GIT_MASTER
-    PIX_FMT_RGBA64BE,  ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
-    PIX_FMT_RGBA64LE,  ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
-    PIX_FMT_BGRA64BE,  ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
-    PIX_FMT_BGRA64LE,  ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
-#endif
-    PIX_FMT_GBRP,      ///< planar GBR 4:4:4 24bpp
-    PIX_FMT_GBRP9BE,   ///< planar GBR 4:4:4 27bpp, big endian
-    PIX_FMT_GBRP9LE,   ///< planar GBR 4:4:4 27bpp, little endian
-    PIX_FMT_GBRP10BE,  ///< planar GBR 4:4:4 30bpp, big endian
-    PIX_FMT_GBRP10LE,  ///< planar GBR 4:4:4 30bpp, little endian
-    PIX_FMT_GBRP16BE,  ///< planar GBR 4:4:4 48bpp, big endian
-    PIX_FMT_GBRP16LE,  ///< planar GBR 4:4:4 48bpp, little endian
-
-#ifndef AV_PIX_FMT_ABI_GIT_MASTER
-    PIX_FMT_RGBA64BE=0x123,  ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
-    PIX_FMT_RGBA64LE,  ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
-    PIX_FMT_BGRA64BE,  ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
-    PIX_FMT_BGRA64LE,  ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
-#endif
-    PIX_FMT_0RGB=0x123+4,      ///< packed RGB 8:8:8, 32bpp, 0RGB0RGB...
-    PIX_FMT_RGB0,      ///< packed RGB 8:8:8, 32bpp, RGB0RGB0...
-    PIX_FMT_0BGR,      ///< packed BGR 8:8:8, 32bpp, 0BGR0BGR...
-    PIX_FMT_BGR0,      ///< packed BGR 8:8:8, 32bpp, BGR0BGR0...
-    PIX_FMT_YUVA444P,  ///< planar YUV 4:4:4 32bpp, (1 Cr & Cb sample per 1x1 Y & A samples)
-    PIX_FMT_YUVA422P,  ///< planar YUV 4:2:2 24bpp, (1 Cr & Cb sample per 2x1 Y & A samples)
-
-    PIX_FMT_YUV420P12BE, ///< planar YUV 4:2:0,18bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
-    PIX_FMT_YUV420P12LE, ///< planar YUV 4:2:0,18bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
-    PIX_FMT_YUV420P14BE, ///< planar YUV 4:2:0,21bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
-    PIX_FMT_YUV420P14LE, ///< planar YUV 4:2:0,21bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
-    PIX_FMT_YUV422P12BE, ///< planar YUV 4:2:2,24bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
-    PIX_FMT_YUV422P12LE, ///< planar YUV 4:2:2,24bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
-    PIX_FMT_YUV422P14BE, ///< planar YUV 4:2:2,28bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
-    PIX_FMT_YUV422P14LE, ///< planar YUV 4:2:2,28bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
-    PIX_FMT_YUV444P12BE, ///< planar YUV 4:4:4,36bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
-    PIX_FMT_YUV444P12LE, ///< planar YUV 4:4:4,36bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
-    PIX_FMT_YUV444P14BE, ///< planar YUV 4:4:4,42bpp, (1 Cr & Cb sample per 1x1 Y samples), big-endian
-    PIX_FMT_YUV444P14LE, ///< planar YUV 4:4:4,42bpp, (1 Cr & Cb sample per 1x1 Y samples), little-endian
-    PIX_FMT_GBRP12BE,    ///< planar GBR 4:4:4 36bpp, big endian
-    PIX_FMT_GBRP12LE,    ///< planar GBR 4:4:4 36bpp, little endian
-    PIX_FMT_GBRP14BE,    ///< planar GBR 4:4:4 42bpp, big endian
-    PIX_FMT_GBRP14LE,    ///< planar GBR 4:4:4 42bpp, little endian
-
-    PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
-#endif /* AVUTIL_OLD_PIX_FMTS_H */
diff --git a/libavutil/opencl.c b/libavutil/opencl.c
index 1d782145..08b50341 100644
--- a/libavutil/opencl.c
+++ b/libavutil/opencl.c
@@ -28,16 +28,10 @@
 #include "opt.h"
 
 #if HAVE_THREADS
-#if HAVE_PTHREADS
-#include <pthread.h>
-#elif HAVE_W32THREADS
-#include "compat/w32pthreads.h"
-#elif HAVE_OS2THREADS
-#include "compat/os2threads.h"
-#endif
+#include "thread.h"
 #include "atomic.h"
 
-static volatile pthread_mutex_t *atomic_opencl_lock = NULL;
+static pthread_mutex_t * volatile atomic_opencl_lock = NULL;
 #define LOCK_OPENCL pthread_mutex_lock(atomic_opencl_lock)
 #define UNLOCK_OPENCL pthread_mutex_unlock(atomic_opencl_lock)
 #else
@@ -84,7 +78,7 @@ static const AVOption opencl_options[] = {
 };
 
 static const AVClass openclutils_class = {
-    .class_name                = "OPENCLUTILS",
+    .class_name                = "opencl",
     .option                    = opencl_options,
     .item_name                 = av_default_item_name,
     .version                   = LIBAVUTIL_VERSION_INT,
@@ -363,7 +357,7 @@ static inline int init_opencl_mtx(void)
             av_free(tmp);
             return AVERROR(err);
         }
-        if (avpriv_atomic_ptr_cas(&atomic_opencl_lock, NULL, tmp)) {
+        if (avpriv_atomic_ptr_cas((void * volatile *)&atomic_opencl_lock, NULL, tmp)) {
             pthread_mutex_destroy(tmp);
             av_free(tmp);
         }
@@ -449,12 +443,14 @@ int av_opencl_register_kernel_code(const char *kernel_code)
 cl_program av_opencl_compile(const char *program_name, const char *build_opts)
 {
     int i;
-    cl_int status;
+    cl_int status, build_status;
     int kernel_code_idx = 0;
     const char *kernel_source;
     size_t kernel_code_len;
     char* ptr = NULL;
     cl_program program = NULL;
+    size_t log_size;
+    char *log = NULL;
 
     LOCK_OPENCL;
     for (i = 0; i < opencl_ctx.kernel_code_count; i++) {
@@ -481,10 +477,36 @@ cl_program av_opencl_compile(const char *program_name, const char *build_opts)
         program = NULL;
         goto end;
     }
-    status = clBuildProgram(program, 1, &(opencl_ctx.device_id), build_opts, NULL, NULL);
+
+    build_status = clBuildProgram(program, 1, &(opencl_ctx.device_id), build_opts, NULL, NULL);
+    status = clGetProgramBuildInfo(program, opencl_ctx.device_id,
+                                   CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
     if (status != CL_SUCCESS) {
+        av_log(&opencl_ctx, AV_LOG_WARNING,
+               "Failed to get compilation log: %s\n",
+               av_opencl_errstr(status));
+    } else {
+        log = av_malloc(log_size);
+        if (log) {
+            status = clGetProgramBuildInfo(program, opencl_ctx.device_id,
+                                           CL_PROGRAM_BUILD_LOG, log_size,
+                                           log, NULL);
+            if (status != CL_SUCCESS) {
+                av_log(&opencl_ctx, AV_LOG_WARNING,
+                       "Failed to get compilation log: %s\n",
+                       av_opencl_errstr(status));
+            } else {
+                int level = build_status == CL_SUCCESS ? AV_LOG_DEBUG :
+                                                         AV_LOG_ERROR;
+                av_log(&opencl_ctx, level, "Compilation log:\n%s\n", log);
+            }
+        }
+        av_freep(&log);
+    }
+    if (build_status != CL_SUCCESS) {
         av_log(&opencl_ctx, AV_LOG_ERROR,
-               "Compilation failed with OpenCL program: %s\n", program_name);
+               "Compilation failed with OpenCL program '%s': %s\n",
+               program_name, av_opencl_errstr(build_status));
         program = NULL;
         goto end;
     }
diff --git a/libavutil/opencl.h b/libavutil/opencl.h
index e423e558..b709927c 100644
--- a/libavutil/opencl.h
+++ b/libavutil/opencl.h
@@ -29,9 +29,10 @@
  * change without prior notice.
  */
 
-#ifndef LIBAVUTIL_OPENCL_H
-#define LIBAVUTIL_OPENCL_H
+#ifndef AVUTIL_OPENCL_H
+#define AVUTIL_OPENCL_H
 
+#define CL_USE_DEPRECATED_OPENCL_1_2_APIS 1
 #ifdef __APPLE__
 #include <OpenCL/cl.h>
 #else
@@ -288,4 +289,4 @@ void av_opencl_uninit(void);
 int64_t av_opencl_benchmark(AVOpenCLDeviceNode *device, cl_platform_id platform,
                             int64_t (*benchmark)(AVOpenCLExternalEnv *ext_opencl_env));
 
-#endif /* LIBAVUTIL_OPENCL_H */
+#endif /* AVUTIL_OPENCL_H */
diff --git a/libavutil/opencl_internal.h b/libavutil/opencl_internal.h
index dacd930a..5cabb7b3 100644
--- a/libavutil/opencl_internal.h
+++ b/libavutil/opencl_internal.h
@@ -20,6 +20,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVUTIL_OPENCL_INTERNAL_H
+#define AVUTIL_OPENCL_INTERNAL_H
+
+#include "attributes.h"
 #include "opencl.h"
 
 #define FF_OPENCL_PARAM_INFO(a) ((void*)(&(a))), (sizeof(a))
@@ -30,4 +34,7 @@ typedef struct {
     void *ctx;
 } FFOpenclParam;
 
+av_warn_unused_result
 int avpriv_opencl_set_parameter(FFOpenclParam *opencl_param, ...);
+
+#endif /* AVUTIL_OPENCL_INTERNAL_H */
diff --git a/libavutil/opt.c b/libavutil/opt.c
index 62db1b50..eae4f75d 100644
--- a/libavutil/opt.c
+++ b/libavutil/opt.c
@@ -26,6 +26,7 @@
  */
 
 #include "avutil.h"
+#include "avassert.h"
 #include "avstring.h"
 #include "channel_layout.h"
 #include "common.h"
@@ -41,14 +42,7 @@
 
 #include <float.h>
 
-#if FF_API_OLD_AVOPTIONS
-const AVOption *av_next_option(FF_CONST_AVUTIL55 void *obj, const AVOption *last)
-{
-    return av_opt_next(obj, last);
-}
-#endif
-
-const AVOption *av_opt_next(FF_CONST_AVUTIL55 void *obj, const AVOption *last)
+const AVOption *av_opt_next(const void *obj, const AVOption *last)
 {
     const AVClass *class;
     if (!obj)
@@ -67,6 +61,7 @@ static int read_number(const AVOption *o, const void *dst, double *num, int *den
     case AV_OPT_TYPE_FLAGS:     *intnum = *(unsigned int*)dst;return 0;
     case AV_OPT_TYPE_PIXEL_FMT: *intnum = *(enum AVPixelFormat *)dst;return 0;
     case AV_OPT_TYPE_SAMPLE_FMT:*intnum = *(enum AVSampleFormat*)dst;return 0;
+    case AV_OPT_TYPE_BOOL:
     case AV_OPT_TYPE_INT:       *intnum = *(int         *)dst;return 0;
     case AV_OPT_TYPE_CHANNEL_LAYOUT:
     case AV_OPT_TYPE_DURATION:
@@ -103,6 +98,7 @@ static int write_number(void *obj, const AVOption *o, void *dst, double num, int
     switch (o->type) {
     case AV_OPT_TYPE_PIXEL_FMT: *(enum AVPixelFormat *)dst = llrint(num/den) * intnum; break;
     case AV_OPT_TYPE_SAMPLE_FMT:*(enum AVSampleFormat*)dst = llrint(num/den) * intnum; break;
+    case AV_OPT_TYPE_BOOL:
     case AV_OPT_TYPE_FLAGS:
     case AV_OPT_TYPE_INT:   *(int       *)dst= llrint(num/den)*intnum; break;
     case AV_OPT_TYPE_DURATION:
@@ -304,6 +300,44 @@ static int set_string_color(void *obj, const AVOption *o, const char *val, uint8
     return 0;
 }
 
+static const char *get_bool_name(int val)
+{
+    if (val < 0)
+        return "auto";
+    return val ? "true" : "false";
+}
+
+static int set_string_bool(void *obj, const AVOption *o, const char *val, int *dst)
+{
+    int n;
+
+    if (!val)
+        return 0;
+
+    if (!strcmp(val, "auto")) {
+        n = -1;
+    } else if (av_match_name(val, "true,y,yes,enable,enabled,on")) {
+        n = 1;
+    } else if (av_match_name(val, "false,n,no,disable,disabled,off")) {
+        n = 0;
+    } else {
+        char *end = NULL;
+        n = strtol(val, &end, 10);
+        if (val + strlen(val) != end)
+            goto fail;
+    }
+
+    if (n < o->min || n > o->max)
+        goto fail;
+
+    *dst = n;
+    return 0;
+
+fail:
+    av_log(obj, AV_LOG_ERROR, "Unable to parse option value \"%s\" as boolean\n", val);
+    return AVERROR(EINVAL);
+}
+
 static int set_string_fmt(void *obj, const AVOption *o, const char *val, uint8_t *dst,
                           int fmt_nb, int ((*get_fmt)(const char *)), const char *desc)
 {
@@ -356,16 +390,6 @@ static int set_string_sample_fmt(void *obj, const AVOption *o, const char *val,
                           AV_SAMPLE_FMT_NB, av_get_sample_fmt, "sample format");
 }
 
-#if FF_API_OLD_AVOPTIONS
-int av_set_string3(void *obj, const char *name, const char *val, int alloc, const AVOption **o_out)
-{
-    const AVOption *o = av_opt_find(obj, name, NULL, 0, 0);
-    if (o_out)
-        *o_out = o;
-    return av_opt_set(obj, name, val, 0);
-}
-#endif
-
 int av_opt_set(void *obj, const char *name, const char *val, int search_flags)
 {
     int ret = 0;
@@ -377,7 +401,7 @@ int av_opt_set(void *obj, const char *name, const char *val, int search_flags)
                  o->type != AV_OPT_TYPE_PIXEL_FMT && o->type != AV_OPT_TYPE_SAMPLE_FMT &&
                  o->type != AV_OPT_TYPE_IMAGE_SIZE && o->type != AV_OPT_TYPE_VIDEO_RATE &&
                  o->type != AV_OPT_TYPE_DURATION && o->type != AV_OPT_TYPE_COLOR &&
-                 o->type != AV_OPT_TYPE_CHANNEL_LAYOUT))
+                 o->type != AV_OPT_TYPE_CHANNEL_LAYOUT && o->type != AV_OPT_TYPE_BOOL))
         return AVERROR(EINVAL);
 
     if (o->flags & AV_OPT_FLAG_READONLY)
@@ -385,6 +409,7 @@ int av_opt_set(void *obj, const char *name, const char *val, int search_flags)
 
     dst = ((uint8_t*)target_obj) + o->offset;
     switch (o->type) {
+    case AV_OPT_TYPE_BOOL:     return set_string_bool(obj, o, val, dst);
     case AV_OPT_TYPE_STRING:   return set_string(obj, o, val, dst);
     case AV_OPT_TYPE_BINARY:   return set_string_binary(obj, o, val, dst);
     case AV_OPT_TYPE_FLAGS:
@@ -412,11 +437,7 @@ int av_opt_set(void *obj, const char *name, const char *val, int search_flags)
         if (!val || !strcmp(val, "none")) {
             *(int64_t *)dst = 0;
         } else {
-#if FF_API_GET_CHANNEL_LAYOUT_COMPAT
-            int64_t cl = ff_get_channel_layout(val, 0);
-#else
             int64_t cl = av_get_channel_layout(val);
-#endif
             if (!cl) {
                 av_log(obj, AV_LOG_ERROR, "Unable to parse option value \"%s\" as channel layout\n", val);
                 ret = AVERROR(EINVAL);
@@ -462,32 +483,6 @@ static int set_number(void *obj, const char *name, double num, int den, int64_t
     return write_number(obj, o, dst, num, den, intnum);
 }
 
-#if FF_API_OLD_AVOPTIONS
-const AVOption *av_set_double(void *obj, const char *name, double n)
-{
-    const AVOption *o = av_opt_find(obj, name, NULL, 0, 0);
-    if (set_number(obj, name, n, 1, 1, 0) < 0)
-        return NULL;
-    return o;
-}
-
-const AVOption *av_set_q(void *obj, const char *name, AVRational n)
-{
-    const AVOption *o = av_opt_find(obj, name, NULL, 0, 0);
-    if (set_number(obj, name, n.num, n.den, 1, 0) < 0)
-        return NULL;
-    return o;
-}
-
-const AVOption *av_set_int(void *obj, const char *name, int64_t n)
-{
-    const AVOption *o = av_opt_find(obj, name, NULL, 0, 0);
-    if (set_number(obj, name, 1, 1, n, 0) < 0)
-        return NULL;
-    return o;
-}
-#endif
-
 int av_opt_set_int(void *obj, const char *name, int64_t val, int search_flags)
 {
     return set_number(obj, name, 1, 1, val, search_flags);
@@ -627,47 +622,6 @@ int av_opt_set_channel_layout(void *obj, const char *name, int64_t cl, int searc
     return 0;
 }
 
-#if FF_API_OLD_AVOPTIONS
-/**
- *
- * @param buf a buffer which is used for returning non string values as strings, can be NULL
- * @param buf_len allocated length in bytes of buf
- */
-const char *av_get_string(void *obj, const char *name, const AVOption **o_out, char *buf, int buf_len)
-{
-    const AVOption *o = av_opt_find(obj, name, NULL, 0, AV_OPT_SEARCH_CHILDREN);
-    void *dst;
-    uint8_t *bin;
-    int len, i;
-    if (!o)
-        return NULL;
-    if (o->type != AV_OPT_TYPE_STRING && (!buf || !buf_len))
-        return NULL;
-
-    dst= ((uint8_t*)obj) + o->offset;
-    if (o_out) *o_out= o;
-
-    switch (o->type) {
-    case AV_OPT_TYPE_FLAGS:     snprintf(buf, buf_len, "0x%08X",*(int    *)dst);break;
-    case AV_OPT_TYPE_INT:       snprintf(buf, buf_len, "%d" , *(int    *)dst);break;
-    case AV_OPT_TYPE_INT64:     snprintf(buf, buf_len, "%"PRId64, *(int64_t*)dst);break;
-    case AV_OPT_TYPE_FLOAT:     snprintf(buf, buf_len, "%f" , *(float  *)dst);break;
-    case AV_OPT_TYPE_DOUBLE:    snprintf(buf, buf_len, "%f" , *(double *)dst);break;
-    case AV_OPT_TYPE_RATIONAL:  snprintf(buf, buf_len, "%d/%d", ((AVRational*)dst)->num, ((AVRational*)dst)->den);break;
-    case AV_OPT_TYPE_CONST:     snprintf(buf, buf_len, "%f" , o->default_val.dbl);break;
-    case AV_OPT_TYPE_STRING:    return *(void**)dst;
-    case AV_OPT_TYPE_BINARY:
-        len = *(int*)(((uint8_t *)dst) + sizeof(uint8_t *));
-        if (len >= (buf_len + 1)/2) return NULL;
-        bin = *(uint8_t**)dst;
-        for (i = 0; i < len; i++) snprintf(buf + i*2, 3, "%02X", bin[i]);
-        break;
-    default: return NULL;
-    }
-    return buf;
-}
-#endif
-
 int av_opt_set_dict_val(void *obj, const char *name, const AVDictionary *val, int search_flags)
 {
     void *target_obj;
@@ -686,6 +640,41 @@ int av_opt_set_dict_val(void *obj, const char *name, const AVDictionary *val, in
     return 0;
 }
 
+static void format_duration(char *buf, size_t size, int64_t d)
+{
+    char *e;
+
+    av_assert0(size >= 25);
+    if (d < 0 && d != INT64_MIN) {
+        *(buf++) = '-';
+        size--;
+        d = -d;
+    }
+    if (d == INT64_MAX)
+        snprintf(buf, size, "INT64_MAX");
+    else if (d == INT64_MIN)
+        snprintf(buf, size, "INT64_MIN");
+    else if (d > (int64_t)3600*1000000)
+        snprintf(buf, size, "%"PRId64":%02d:%02d.%06d", d / 3600000000,
+                 (int)((d / 60000000) % 60),
+                 (int)((d / 1000000) % 60),
+                 (int)(d % 1000000));
+    else if (d > 60*1000000)
+        snprintf(buf, size, "%d:%02d.%06d",
+                 (int)(d / 60000000),
+                 (int)((d / 1000000) % 60),
+                 (int)(d % 1000000));
+    else
+        snprintf(buf, size, "%d.%06d",
+                 (int)(d / 1000000),
+                 (int)(d % 1000000));
+    e = buf + strlen(buf);
+    while (e > buf && e[-1] == '0')
+        *(--e) = 0;
+    if (e > buf && e[-1] == '.')
+        *(--e) = 0;
+}
+
 int av_opt_get(void *obj, const char *name, int search_flags, uint8_t **out_val)
 {
     void *dst, *target_obj;
@@ -701,6 +690,9 @@ int av_opt_get(void *obj, const char *name, int search_flags, uint8_t **out_val)
 
     buf[0] = 0;
     switch (o->type) {
+    case AV_OPT_TYPE_BOOL:
+        ret = snprintf(buf, sizeof(buf), "%s", (char *)av_x_if_null(get_bool_name(*(int *)dst), "invalid"));
+        break;
     case AV_OPT_TYPE_FLAGS:     ret = snprintf(buf, sizeof(buf), "0x%08X",  *(int    *)dst);break;
     case AV_OPT_TYPE_INT:       ret = snprintf(buf, sizeof(buf), "%d" ,     *(int    *)dst);break;
     case AV_OPT_TYPE_INT64:     ret = snprintf(buf, sizeof(buf), "%"PRId64, *(int64_t*)dst);break;
@@ -710,12 +702,20 @@ int av_opt_get(void *obj, const char *name, int search_flags, uint8_t **out_val)
     case AV_OPT_TYPE_RATIONAL:  ret = snprintf(buf, sizeof(buf), "%d/%d",   ((AVRational*)dst)->num, ((AVRational*)dst)->den);break;
     case AV_OPT_TYPE_CONST:     ret = snprintf(buf, sizeof(buf), "%f" ,     o->default_val.dbl);break;
     case AV_OPT_TYPE_STRING:
-        if (*(uint8_t**)dst)
+        if (*(uint8_t**)dst) {
             *out_val = av_strdup(*(uint8_t**)dst);
-        else
+        } else if (search_flags & AV_OPT_ALLOW_NULL) {
+            *out_val = NULL;
+            return 0;
+        } else {
             *out_val = av_strdup("");
+        }
         return *out_val ? 0 : AVERROR(ENOMEM);
     case AV_OPT_TYPE_BINARY:
+        if (!*(uint8_t**)dst && (search_flags & AV_OPT_ALLOW_NULL)) {
+            *out_val = NULL;
+            return 0;
+        }
         len = *(int*)(((uint8_t *)dst) + sizeof(uint8_t *));
         if ((uint64_t)len*2 + 1 > INT_MAX)
             return AVERROR(EINVAL);
@@ -740,9 +740,8 @@ int av_opt_get(void *obj, const char *name, int search_flags, uint8_t **out_val)
         break;
     case AV_OPT_TYPE_DURATION:
         i64 = *(int64_t *)dst;
-        ret = snprintf(buf, sizeof(buf), "%"PRIi64":%02d:%02d.%06d",
-                       i64 / 3600000000, (int)((i64 / 60000000) % 60),
-                       (int)((i64 / 1000000) % 60), (int)(i64 % 1000000));
+        format_duration(buf, sizeof(buf), i64);
+        ret = strlen(buf); // no overflow possible, checked by an assert
         break;
     case AV_OPT_TYPE_COLOR:
         ret = snprintf(buf, sizeof(buf), "0x%02x%02x%02x%02x",
@@ -782,44 +781,6 @@ static int get_number(void *obj, const char *name, const AVOption **o_out, doubl
     return -1;
 }
 
-#if FF_API_OLD_AVOPTIONS
-double av_get_double(void *obj, const char *name, const AVOption **o_out)
-{
-    int64_t intnum=1;
-    double num=1;
-    int den=1;
-
-    if (get_number(obj, name, o_out, &num, &den, &intnum, 0) < 0)
-        return NAN;
-    return num*intnum/den;
-}
-
-AVRational av_get_q(void *obj, const char *name, const AVOption **o_out)
-{
-    int64_t intnum=1;
-    double num=1;
-    int den=1;
-
-    if (get_number(obj, name, o_out, &num, &den, &intnum, 0) < 0)
-        return (AVRational){0, 0};
-    if (num == 1.0 && (int)intnum == intnum)
-        return (AVRational){intnum, den};
-    else
-        return av_d2q(num*intnum/den, 1<<24);
-}
-
-int64_t av_get_int(void *obj, const char *name, const AVOption **o_out)
-{
-    int64_t intnum=1;
-    double num=1;
-    int den=1;
-
-    if (get_number(obj, name, o_out, &num, &den, &intnum, 0) < 0)
-        return -1;
-    return num*intnum/den;
-}
-#endif
-
 int av_opt_get_int(void *obj, const char *name, int search_flags, int64_t *out_val)
 {
     int64_t intnum = 1;
@@ -1002,6 +963,40 @@ static void log_value(void *av_log_obj, int level, double d)
     }
 }
 
+static const char *get_opt_const_name(void *obj, const char *unit, int64_t value)
+{
+    const AVOption *opt = NULL;
+
+    if (!unit)
+        return NULL;
+    while ((opt = av_opt_next(obj, opt)))
+        if (opt->type == AV_OPT_TYPE_CONST && !strcmp(opt->unit, unit) &&
+            opt->default_val.i64 == value)
+            return opt->name;
+    return NULL;
+}
+
+static char *get_opt_flags_string(void *obj, const char *unit, int64_t value)
+{
+    const AVOption *opt = NULL;
+    char flags[512];
+
+    flags[0] = 0;
+    if (!unit)
+        return NULL;
+    while ((opt = av_opt_next(obj, opt))) {
+        if (opt->type == AV_OPT_TYPE_CONST && !strcmp(opt->unit, unit) &&
+            opt->default_val.i64 & value) {
+            if (flags[0])
+                av_strlcatf(flags, sizeof(flags), "+");
+            av_strlcatf(flags, sizeof(flags), "%s", opt->name);
+        }
+    }
+    if (flags[0])
+        return av_strdup(flags);
+    return NULL;
+}
+
 static void opt_list(void *obj, void *av_log_obj, const char *unit,
                      int req_flags, int rej_flags)
 {
@@ -1076,6 +1071,9 @@ static void opt_list(void *obj, void *av_log_obj, const char *unit,
             case AV_OPT_TYPE_CHANNEL_LAYOUT:
                 av_log(av_log_obj, AV_LOG_INFO, "%-12s ", "<channel_layout>");
                 break;
+            case AV_OPT_TYPE_BOOL:
+                av_log(av_log_obj, AV_LOG_INFO, "%-12s ", "<boolean>");
+                break;
             case AV_OPT_TYPE_CONST:
             default:
                 av_log(av_log_obj, AV_LOG_INFO, "%-12s ", "");
@@ -1121,14 +1119,34 @@ static void opt_list(void *obj, void *av_log_obj, const char *unit,
                   !opt->default_val.str)) {
             av_log(av_log_obj, AV_LOG_INFO, " (default ");
             switch (opt->type) {
-            case AV_OPT_TYPE_FLAGS:
-                av_log(av_log_obj, AV_LOG_INFO, "%"PRIX64, opt->default_val.i64);
+            case AV_OPT_TYPE_BOOL:
+                av_log(av_log_obj, AV_LOG_INFO, "%s", (char *)av_x_if_null(get_bool_name(opt->default_val.i64), "invalid"));
                 break;
-            case AV_OPT_TYPE_DURATION:
+            case AV_OPT_TYPE_FLAGS: {
+                char *def_flags = get_opt_flags_string(obj, opt->unit, opt->default_val.i64);
+                if (def_flags) {
+                    av_log(av_log_obj, AV_LOG_INFO, "%s", def_flags);
+                    av_freep(&def_flags);
+                } else {
+                    av_log(av_log_obj, AV_LOG_INFO, "%"PRIX64, opt->default_val.i64);
+                }
+                break;
+            }
+            case AV_OPT_TYPE_DURATION: {
+                char buf[25];
+                format_duration(buf, sizeof(buf), opt->default_val.i64);
+                av_log(av_log_obj, AV_LOG_INFO, "%s", buf);
+                break;
+            }
             case AV_OPT_TYPE_INT:
-            case AV_OPT_TYPE_INT64:
-                log_value(av_log_obj, AV_LOG_INFO, opt->default_val.i64);
+            case AV_OPT_TYPE_INT64: {
+                const char *def_const = get_opt_const_name(obj, opt->unit, opt->default_val.i64);
+                if (def_const)
+                    av_log(av_log_obj, AV_LOG_INFO, "%s", def_const);
+                else
+                    log_value(av_log_obj, AV_LOG_INFO, opt->default_val.i64);
                 break;
+            }
             case AV_OPT_TYPE_DOUBLE:
             case AV_OPT_TYPE_FLOAT:
                 log_value(av_log_obj, AV_LOG_INFO, opt->default_val.dbl);
@@ -1177,20 +1195,17 @@ int av_opt_show2(void *obj, void *av_log_obj, int req_flags, int rej_flags)
 
 void av_opt_set_defaults(void *s)
 {
-#if FF_API_OLD_AVOPTIONS
     av_opt_set_defaults2(s, 0, 0);
 }
 
 void av_opt_set_defaults2(void *s, int mask, int flags)
 {
-#endif
     const AVOption *opt = NULL;
     while ((opt = av_opt_next(s, opt))) {
         void *dst = ((uint8_t*)s) + opt->offset;
-#if FF_API_OLD_AVOPTIONS
+
         if ((opt->flags & mask) != flags)
             continue;
-#endif
 
         if (opt->flags & AV_OPT_FLAG_READONLY)
             continue;
@@ -1199,13 +1214,16 @@ void av_opt_set_defaults2(void *s, int mask, int flags)
             case AV_OPT_TYPE_CONST:
                 /* Nothing to be done here */
             break;
+            case AV_OPT_TYPE_BOOL:
             case AV_OPT_TYPE_FLAGS:
             case AV_OPT_TYPE_INT:
             case AV_OPT_TYPE_INT64:
             case AV_OPT_TYPE_DURATION:
             case AV_OPT_TYPE_CHANNEL_LAYOUT:
+            case AV_OPT_TYPE_PIXEL_FMT:
+            case AV_OPT_TYPE_SAMPLE_FMT:
                 write_number(s, opt, dst, 1, 1, opt->default_val.i64);
-            break;
+                break;
             case AV_OPT_TYPE_DOUBLE:
             case AV_OPT_TYPE_FLOAT: {
                 double val;
@@ -1231,12 +1249,6 @@ void av_opt_set_defaults2(void *s, int mask, int flags)
             case AV_OPT_TYPE_VIDEO_RATE:
                 set_string_video_rate(s, opt, opt->default_val.str, dst);
                 break;
-            case AV_OPT_TYPE_PIXEL_FMT:
-                write_number(s, opt, dst, 1, 1, opt->default_val.i64);
-                break;
-            case AV_OPT_TYPE_SAMPLE_FMT:
-                write_number(s, opt, dst, 1, 1, opt->default_val.i64);
-                break;
             case AV_OPT_TYPE_BINARY:
                 set_string_binary(s, opt, opt->default_val.str, dst);
                 break;
@@ -1464,10 +1476,11 @@ int av_opt_set_dict2(void *obj, AVDictionary **options, int search_flags)
     while ((t = av_dict_get(*options, "", t, AV_DICT_IGNORE_SUFFIX))) {
         ret = av_opt_set(obj, t->key, t->value, search_flags);
         if (ret == AVERROR_OPTION_NOT_FOUND)
-            av_dict_set(&tmp, t->key, t->value, 0);
-        else if (ret < 0) {
+            ret = av_dict_set(&tmp, t->key, t->value, 0);
+        if (ret < 0) {
             av_log(obj, AV_LOG_ERROR, "Error setting option %s to value %s.\n", t->key, t->value);
-            break;
+            av_dict_free(&tmp);
+            return ret;
         }
         ret = 0;
     }
@@ -1557,6 +1570,7 @@ void *av_opt_ptr(const AVClass *class, void *obj, const char *name)
 static int opt_size(enum AVOptionType type)
 {
     switch(type) {
+    case AV_OPT_TYPE_BOOL:
     case AV_OPT_TYPE_INT:
     case AV_OPT_TYPE_FLAGS:     return sizeof(int);
     case AV_OPT_TYPE_DURATION:
@@ -1576,7 +1590,7 @@ static int opt_size(enum AVOptionType type)
     return 0;
 }
 
-int av_opt_copy(void *dst, FF_CONST_AVUTIL55 void *src)
+int av_opt_copy(void *dst, const void *src)
 {
     const AVOption *o = NULL;
     const AVClass *c;
@@ -1674,6 +1688,7 @@ int av_opt_query_ranges_default(AVOptionRanges **ranges_arg, void *obj, const ch
     range->value_max = field->max;
 
     switch (field->type) {
+    case AV_OPT_TYPE_BOOL:
     case AV_OPT_TYPE_INT:
     case AV_OPT_TYPE_INT64:
     case AV_OPT_TYPE_PIXEL_FMT:
@@ -1757,6 +1772,7 @@ int av_opt_is_set_to_default(void *obj, const AVOption *o)
     switch (o->type) {
     case AV_OPT_TYPE_CONST:
         return 1;
+    case AV_OPT_TYPE_BOOL:
     case AV_OPT_TYPE_FLAGS:
     case AV_OPT_TYPE_PIXEL_FMT:
     case AV_OPT_TYPE_SAMPLE_FMT:
@@ -1920,6 +1936,9 @@ typedef struct TestContext
     float flt;
     double dbl;
     char *escape;
+    int bool1;
+    int bool2;
+    int bool3;
 } TestContext;
 
 #define OFFSET(x) offsetof(TestContext, x)
@@ -1951,6 +1970,9 @@ static const AVOption test_options[]= {
 {"num64",    "set num 64bit",  OFFSET(num64),    AV_OPT_TYPE_INT64,    {.i64 = 1},        0,        100, 1 },
 {"flt",      "set float",      OFFSET(flt),      AV_OPT_TYPE_FLOAT,    {.dbl = 1.0/3},    0,        100, 1},
 {"dbl",      "set double",     OFFSET(dbl),      AV_OPT_TYPE_DOUBLE,   {.dbl = 1.0/3},    0,        100, 1 },
+{"bool1", "set boolean value",  OFFSET(bool1),   AV_OPT_TYPE_BOOL,     {.i64 = -1},      -1,        1, 1 },
+{"bool2", "set boolean value",  OFFSET(bool2),   AV_OPT_TYPE_BOOL,     {.i64 = 1},       -1,        1, 1 },
+{"bool3", "set boolean value",  OFFSET(bool3),   AV_OPT_TYPE_BOOL,     {.i64 = 0},        0,        1, 1 },
 {NULL},
 };
 
@@ -2116,6 +2138,8 @@ int main(void)
             "dbl=2.2",
             "dbl=-1",
             "dbl=101",
+            "bool1=true",
+            "bool2=auto",
         };
 
         test_ctx.class = &test_class;
diff --git a/libavutil/opt.h b/libavutil/opt.h
index 0bc0d309..753434d6 100644
--- a/libavutil/opt.h
+++ b/libavutil/opt.h
@@ -236,17 +236,7 @@ enum AVOptionType{
     AV_OPT_TYPE_DURATION   = MKBETAG('D','U','R',' '),
     AV_OPT_TYPE_COLOR      = MKBETAG('C','O','L','R'),
     AV_OPT_TYPE_CHANNEL_LAYOUT = MKBETAG('C','H','L','A'),
-#if FF_API_OLD_AVOPTIONS
-    FF_OPT_TYPE_FLAGS = 0,
-    FF_OPT_TYPE_INT,
-    FF_OPT_TYPE_INT64,
-    FF_OPT_TYPE_DOUBLE,
-    FF_OPT_TYPE_FLOAT,
-    FF_OPT_TYPE_STRING,
-    FF_OPT_TYPE_RATIONAL,
-    FF_OPT_TYPE_BINARY,  ///< offset must point to a pointer immediately followed by an int for the length
-    FF_OPT_TYPE_CONST=128,
-#endif
+    AV_OPT_TYPE_BOOL           = MKBETAG('B','O','O','L'),
 };
 
 /**
@@ -378,48 +368,6 @@ typedef struct AVOptionRanges {
     int nb_components;
 } AVOptionRanges;
 
-
-#if FF_API_OLD_AVOPTIONS
-/**
- * Set the field of obj with the given name to value.
- *
- * @param[in] obj A struct whose first element is a pointer to an
- * AVClass.
- * @param[in] name the name of the field to set
- * @param[in] val The value to set. If the field is not of a string
- * type, then the given string is parsed.
- * SI postfixes and some named scalars are supported.
- * If the field is of a numeric type, it has to be a numeric or named
- * scalar. Behavior with more than one scalar and +- infix operators
- * is undefined.
- * If the field is of a flags type, it has to be a sequence of numeric
- * scalars or named flags separated by '+' or '-'. Prefixing a flag
- * with '+' causes it to be set without affecting the other flags;
- * similarly, '-' unsets a flag.
- * @param[out] o_out if non-NULL put here a pointer to the AVOption
- * found
- * @param alloc this parameter is currently ignored
- * @return 0 if the value has been set, or an AVERROR code in case of
- * error:
- * AVERROR_OPTION_NOT_FOUND if no matching option exists
- * AVERROR(ERANGE) if the value is out of range
- * AVERROR(EINVAL) if the value is not valid
- * @deprecated use av_opt_set()
- */
-attribute_deprecated
-int av_set_string3(void *obj, const char *name, const char *val, int alloc, const AVOption **o_out);
-
-attribute_deprecated const AVOption *av_set_double(void *obj, const char *name, double n);
-attribute_deprecated const AVOption *av_set_q(void *obj, const char *name, AVRational n);
-attribute_deprecated const AVOption *av_set_int(void *obj, const char *name, int64_t n);
-
-double av_get_double(void *obj, const char *name, const AVOption **o_out);
-AVRational av_get_q(void *obj, const char *name, const AVOption **o_out);
-int64_t av_get_int(void *obj, const char *name, const AVOption **o_out);
-attribute_deprecated const char *av_get_string(void *obj, const char *name, const AVOption **o_out, char *buf, int buf_len);
-attribute_deprecated const AVOption *av_next_option(FF_CONST_AVUTIL55 void *obj, const AVOption *last);
-#endif
-
 /**
  * Show the obj options.
  *
@@ -438,10 +386,16 @@ int av_opt_show2(void *obj, void *av_log_obj, int req_flags, int rej_flags);
  */
 void av_opt_set_defaults(void *s);
 
-#if FF_API_OLD_AVOPTIONS
-attribute_deprecated
+/**
+ * Set the values of all AVOption fields to their default values. Only these
+ * AVOption fields for which (opt->flags & mask) == flags will have their
+ * default applied to s.
+ *
+ * @param s an AVOption-enabled struct (its first member must be a pointer to AVClass)
+ * @param mask combination of AV_OPT_FLAG_*
+ * @param flags combination of AV_OPT_FLAG_*
+ */
 void av_opt_set_defaults2(void *s, int mask, int flags);
-#endif
 
 /**
  * Parse the key/value pairs list in opts. For each key/value pair
@@ -599,22 +553,28 @@ int av_opt_eval_q     (void *obj, const AVOption *o, const char *val, AVRational
  * @}
  */
 
-#define AV_OPT_SEARCH_CHILDREN   0x0001 /**< Search in possible children of the
-                                             given object first. */
+#define AV_OPT_SEARCH_CHILDREN   (1 << 0) /**< Search in possible children of the
+                                               given object first. */
 /**
  *  The obj passed to av_opt_find() is fake -- only a double pointer to AVClass
  *  instead of a required pointer to a struct containing AVClass. This is
  *  useful for searching for options without needing to allocate the corresponding
  *  object.
  */
-#define AV_OPT_SEARCH_FAKE_OBJ   0x0002
+#define AV_OPT_SEARCH_FAKE_OBJ   (1 << 1)
+
+/**
+ *  In av_opt_get, return NULL if the option has a pointer type and is set to NULL,
+ *  rather than returning an empty string.
+ */
+#define AV_OPT_ALLOW_NULL (1 << 2)
 
 /**
  *  Allows av_opt_query_ranges and av_opt_query_ranges_default to return more than
  *  one component for certain option types.
  *  @see AVOptionRanges for details.
  */
-#define AV_OPT_MULTI_COMPONENT_RANGE 0x1000
+#define AV_OPT_MULTI_COMPONENT_RANGE (1 << 12)
 
 /**
  * Look for an option in an object. Consider only options which
@@ -674,7 +634,7 @@ const AVOption *av_opt_find2(void *obj, const char *name, const char *unit,
  *             or NULL
  * @return next AVOption or NULL
  */
-const AVOption *av_opt_next(FF_CONST_AVUTIL55 void *obj, const AVOption *prev);
+const AVOption *av_opt_next(const void *obj, const AVOption *prev);
 
 /**
  * Iterate over AVOptions-enabled children of obj.
@@ -768,6 +728,10 @@ int av_opt_set_dict_val(void *obj, const char *name, const AVDictionary *val, in
  */
 /**
  * @note the returned string will be av_malloc()ed and must be av_free()ed by the caller
+ *
+ * @note if AV_OPT_ALLOW_NULL is set in search_flags in av_opt_get, and the option has
+ * AV_OPT_TYPE_STRING or AV_OPT_TYPE_BINARY and is set to NULL, *out_val will be set
+ * to NULL instead of an allocated empty string.
  */
 int av_opt_get         (void *obj, const char *name, int search_flags, uint8_t   **out_val);
 int av_opt_get_int     (void *obj, const char *name, int search_flags, int64_t    *out_val);
@@ -826,7 +790,7 @@ int av_opt_query_ranges(AVOptionRanges **, void *obj, const char *key, int flags
  * @param src  Object to copy into
  * @return 0 on success, negative on error
  */
-int av_opt_copy(void *dest, FF_CONST_AVUTIL55 void *src);
+int av_opt_copy(void *dest, const void *src);
 
 /**
  * Get a default list of allowed ranges for the given option.
diff --git a/libavutil/parseutils.c b/libavutil/parseutils.c
index 98858394..0097bec3 100644
--- a/libavutil/parseutils.c
+++ b/libavutil/parseutils.c
@@ -31,13 +31,14 @@
 #include "random_seed.h"
 #include "time_internal.h"
 #include "parseutils.h"
+#include "time.h"
 
 #ifdef TEST
 
 #define av_get_random_seed av_get_random_seed_deterministic
 static uint32_t av_get_random_seed_deterministic(void);
 
-#define time(t) 1331972053
+#define av_gettime() 1331972053200000
 
 #endif
 
@@ -111,9 +112,11 @@ static const VideoSizeAbbr video_size_abbrs[] = {
     { "hd720",    1280, 720 },
     { "hd1080",   1920,1080 },
     { "2k",       2048,1080 }, /* Digital Cinema System Specification */
+    { "2kdci",    2048,1080 },
     { "2kflat",   1998,1080 },
     { "2kscope",  2048, 858 },
     { "4k",       4096,2160 }, /* Digital Cinema System Specification */
+    { "4kdci",    4096,2160 },
     { "4kflat",   3996,2160 },
     { "4kscope",  4096,1716 },
     { "nhd",       640,360  },
@@ -122,6 +125,8 @@ static const VideoSizeAbbr video_size_abbrs[] = {
     { "fwqvga",    432,240  },
     { "hvga",      480,320  },
     { "qhd",       960,540  },
+    { "uhd2160",  3840,2160 },
+    { "uhd4320",  7680,4320 },
 };
 
 static const VideoRateAbbr video_rate_abbrs[]= {
@@ -554,28 +559,34 @@ time_t av_timegm(struct tm *tm)
 int av_parse_time(int64_t *timeval, const char *timestr, int duration)
 {
     const char *p, *q;
-    int64_t t;
+    int64_t t, now64;
     time_t now;
     struct tm dt = { 0 }, tmbuf;
     int today = 0, negative = 0, microseconds = 0;
     int i;
     static const char * const date_fmt[] = {
-        "%Y-%m-%d",
+        "%Y - %m - %d",
         "%Y%m%d",
     };
     static const char * const time_fmt[] = {
         "%H:%M:%S",
         "%H%M%S",
     };
+    static const char * const tz_fmt[] = {
+        "%H:%M",
+        "%H%M",
+        "%H",
+    };
 
     p = timestr;
     q = NULL;
     *timeval = INT64_MIN;
     if (!duration) {
-        now = time(0);
+        now64 = av_gettime();
+        now = now64 / 1000000;
 
         if (!av_strcasecmp(timestr, "now")) {
-            *timeval = (int64_t) now * 1000000;
+            *timeval = now64;
             return 0;
         }
 
@@ -594,8 +605,11 @@ int av_parse_time(int64_t *timeval, const char *timestr, int duration)
         }
         p = q;
 
-        if (*p == 'T' || *p == 't' || *p == ' ')
+        if (*p == 'T' || *p == 't')
             p++;
+        else
+            while (av_isspace(*p))
+                p++;
 
         /* parse the hour-minute-second part */
         for (i = 0; i < FF_ARRAY_ELEMS(time_fmt); i++) {
@@ -649,7 +663,23 @@ int av_parse_time(int64_t *timeval, const char *timestr, int duration)
         t = dt.tm_hour * 3600 + dt.tm_min * 60 + dt.tm_sec;
     } else {
         int is_utc = *q == 'Z' || *q == 'z';
+        int tzoffset = 0;
         q += is_utc;
+        if (!today && !is_utc && (*q == '+' || *q == '-')) {
+            struct tm tz = { 0 };
+            int sign = (*q == '+' ? -1 : 1);
+            q++;
+            p = q;
+            for (i = 0; i < FF_ARRAY_ELEMS(tz_fmt); i++) {
+                q = av_small_strptime(p, tz_fmt[i], &tz);
+                if (q)
+                    break;
+            }
+            if (!q)
+                return AVERROR(EINVAL);
+            tzoffset = sign * (tz.tm_hour * 60 + tz.tm_min) * 60;
+            is_utc = 1;
+        }
         if (today) { /* fill in today's date */
             struct tm dt2 = is_utc ? *gmtime_r(&now, &tmbuf) : *localtime_r(&now, &tmbuf);
             dt2.tm_hour = dt.tm_hour;
@@ -658,6 +688,7 @@ int av_parse_time(int64_t *timeval, const char *timestr, int duration)
             dt = dt2;
         }
         t = is_utc ? av_timegm(&dt) : mktime(&dt);
+        t += tzoffset;
     }
 
     /* Check that we are at the end of the string */
@@ -854,7 +885,10 @@ int main(void)
             "now",
             "12:35:46",
             "2000-12-20 0:02:47.5z",
+            "2012 - 02-22  17:44:07",
             "2000-12-20T010247.6",
+            "2000-12-12 1:35:46+05:30",
+            "2002-12-12 22:30:40-02",
         };
         static const char * const duration_string[] = {
             "2:34:56.79",
@@ -866,7 +900,7 @@ int main(void)
 
         av_log_set_level(AV_LOG_DEBUG);
         putenv(tzstr);
-        printf("(now is 2012-03-17 09:14:13 +0100, local time is UTC+1)\n");
+        printf("(now is 2012-03-17 09:14:13.2 +0100, local time is UTC+1)\n");
         for (i = 0;  i < FF_ARRAY_ELEMS(time_string); i++) {
             printf("%-24s -> ", time_string[i]);
             if (av_parse_time(&tv, time_string[i], 0)) {
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index 32dc4b80..dd7de7ee 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -39,14 +39,14 @@ void av_read_image_line(uint16_t *dst,
 {
     AVComponentDescriptor comp = desc->comp[c];
     int plane = comp.plane;
-    int depth = comp.depth_minus1 + 1;
+    int depth = comp.depth;
     int mask  = (1 << depth) - 1;
     int shift = comp.shift;
-    int step  = comp.step_minus1 + 1;
+    int step  = comp.step;
     int flags = desc->flags;
 
     if (flags & AV_PIX_FMT_FLAG_BITSTREAM) {
-        int skip = x * step + comp.offset_plus1 - 1;
+        int skip = x * step + comp.offset;
         const uint8_t *p = data[plane] + y * linesize[plane] + (skip >> 3);
         int shift = 8 - depth - (skip & 7);
 
@@ -61,7 +61,7 @@ void av_read_image_line(uint16_t *dst,
         }
     } else {
         const uint8_t *p = data[plane] + y * linesize[plane] +
-                           x * step + comp.offset_plus1 - 1;
+                           x * step + comp.offset;
         int is_8bit = shift + depth <= 8;
 
         if (is_8bit)
@@ -86,12 +86,12 @@ void av_write_image_line(const uint16_t *src,
 {
     AVComponentDescriptor comp = desc->comp[c];
     int plane = comp.plane;
-    int depth = comp.depth_minus1 + 1;
-    int step  = comp.step_minus1 + 1;
+    int depth = comp.depth;
+    int step  = comp.step;
     int flags = desc->flags;
 
     if (flags & AV_PIX_FMT_FLAG_BITSTREAM) {
-        int skip = x * step + comp.offset_plus1 - 1;
+        int skip = x * step + comp.offset;
         uint8_t *p = data[plane] + y * linesize[plane] + (skip >> 3);
         int shift = 8 - depth - (skip & 7);
 
@@ -104,7 +104,7 @@ void av_write_image_line(const uint16_t *src,
     } else {
         int shift = comp.shift;
         uint8_t *p = data[plane] + y * linesize[plane] +
-                     x * step + comp.offset_plus1 - 1;
+                     x * step + comp.offset;
 
         if (shift + depth <= 8) {
             p += !!(flags & AV_PIX_FMT_FLAG_BE);
@@ -127,19 +127,19 @@ void av_write_image_line(const uint16_t *src,
     }
 }
 
-#if !FF_API_PIX_FMT_DESC
-static
+#if FF_API_PLUS1_MINUS1
+FF_DISABLE_DEPRECATION_WARNINGS
 #endif
-const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
+static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
     [AV_PIX_FMT_YUV420P] = {
         .name = "yuv420p",
         .nb_components = 3,
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -149,9 +149,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 7 },        /* Y */
-            { 0, 3, 2, 0, 7 },        /* U */
-            { 0, 3, 4, 0, 7 },        /* V */
+            { 0, 2, 0, 0, 8, 1, 7, 1 },        /* Y */
+            { 0, 4, 1, 0, 8, 3, 7, 2 },        /* U */
+            { 0, 4, 3, 0, 8, 3, 7, 4 },        /* V */
         },
     },
     [AV_PIX_FMT_YVYU422] = {
@@ -160,9 +160,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 7 },        /* Y */
-            { 0, 3, 2, 0, 7 },        /* V */
-            { 0, 3, 4, 0, 7 },        /* U */
+            { 0, 2, 0, 0, 8, 1, 7, 1 },        /* Y */
+            { 0, 4, 3, 0, 8, 3, 7, 4 },        /* U */
+            { 0, 4, 1, 0, 8, 3, 7, 2 },        /* V */
         },
     },
     [AV_PIX_FMT_RGB24] = {
@@ -171,9 +171,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 2, 1, 0, 7 },        /* R */
-            { 0, 2, 2, 0, 7 },        /* G */
-            { 0, 2, 3, 0, 7 },        /* B */
+            { 0, 3, 0, 0, 8, 2, 7, 1 },        /* R */
+            { 0, 3, 1, 0, 8, 2, 7, 2 },        /* G */
+            { 0, 3, 2, 0, 8, 2, 7, 3 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -183,9 +183,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 2, 3, 0, 7 },        /* R */
-            { 0, 2, 2, 0, 7 },        /* G */
-            { 0, 2, 1, 0, 7 },        /* B */
+            { 0, 3, 2, 0, 8, 2, 7, 3 },        /* R */
+            { 0, 3, 1, 0, 8, 2, 7, 2 },        /* G */
+            { 0, 3, 0, 0, 8, 2, 7, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -195,9 +195,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -207,9 +207,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -219,9 +219,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 2,
         .log2_chroma_h = 2,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -231,9 +231,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 2,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -243,9 +243,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 2,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -255,7 +255,7 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
         },
         .flags = AV_PIX_FMT_FLAG_PSEUDOPAL,
         .alias = "gray8,y8",
@@ -266,7 +266,7 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 0 },        /* Y */
+            { 0, 1, 0, 0, 1, 0, 0, 1 },        /* Y */
         },
         .flags = AV_PIX_FMT_FLAG_BITSTREAM,
     },
@@ -276,7 +276,7 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 7, 0 },        /* Y */
+            { 0, 1, 0, 7, 1, 0, 0, 1 },        /* Y */
         },
         .flags = AV_PIX_FMT_FLAG_BITSTREAM,
     },
@@ -286,7 +286,7 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 7 },
+            { 0, 1, 0, 0, 8, 0, 7, 1 },
         },
         .flags = AV_PIX_FMT_FLAG_PAL,
     },
@@ -296,9 +296,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -308,9 +308,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -320,9 +320,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -348,9 +348,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 2, 0, 7 },        /* Y */
-            { 0, 3, 1, 0, 7 },        /* U */
-            { 0, 3, 3, 0, 7 },        /* V */
+            { 0, 2, 1, 0, 8, 1, 7, 2 },        /* Y */
+            { 0, 4, 0, 0, 8, 3, 7, 1 },        /* U */
+            { 0, 4, 2, 0, 8, 3, 7, 3 },        /* V */
         },
     },
     [AV_PIX_FMT_UYYVYY411] = {
@@ -359,9 +359,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 2,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 3, 2, 0, 7 },        /* Y */
-            { 0, 5, 1, 0, 7 },        /* U */
-            { 0, 5, 4, 0, 7 },        /* V */
+            { 0, 4, 1, 0, 8, 3, 7, 2 },        /* Y */
+            { 0, 6, 0, 0, 8, 5, 7, 1 },        /* U */
+            { 0, 6, 3, 0, 8, 5, 7, 4 },        /* V */
         },
     },
     [AV_PIX_FMT_BGR8] = {
@@ -370,9 +370,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 2 },        /* R */
-            { 0, 0, 1, 3, 2 },        /* G */
-            { 0, 0, 1, 6, 1 },        /* B */
+            { 0, 1, 0, 0, 3, 0, 2, 1 },        /* R */
+            { 0, 1, 0, 3, 3, 0, 2, 1 },        /* G */
+            { 0, 1, 0, 6, 2, 0, 1, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PSEUDOPAL,
     },
@@ -382,9 +382,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 3, 4, 0, 0 },        /* R */
-            { 0, 3, 2, 0, 1 },        /* G */
-            { 0, 3, 1, 0, 0 },        /* B */
+            { 0, 4, 3, 0, 1, 3, 0, 4 },        /* R */
+            { 0, 4, 1, 0, 2, 3, 1, 2 },        /* G */
+            { 0, 4, 0, 0, 1, 3, 0, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_RGB,
     },
@@ -394,9 +394,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 0 },        /* R */
-            { 0, 0, 1, 1, 1 },        /* G */
-            { 0, 0, 1, 3, 0 },        /* B */
+            { 0, 1, 0, 0, 1, 0, 0, 1 },        /* R */
+            { 0, 1, 0, 1, 2, 0, 1, 1 },        /* G */
+            { 0, 1, 0, 3, 1, 0, 0, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PSEUDOPAL,
     },
@@ -406,9 +406,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 6, 1 },        /* R */
-            { 0, 0, 1, 3, 2 },        /* G */
-            { 0, 0, 1, 0, 2 },        /* B */
+            { 0, 1, 0, 6, 2, 0, 1, 1 },        /* R */
+            { 0, 1, 0, 3, 3, 0, 2, 1 },        /* G */
+            { 0, 1, 0, 0, 3, 0, 2, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PSEUDOPAL,
     },
@@ -418,9 +418,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 3, 1, 0, 0 },        /* R */
-            { 0, 3, 2, 0, 1 },        /* G */
-            { 0, 3, 4, 0, 0 },        /* B */
+            { 0, 4, 0, 0, 1, 3, 0, 1 },        /* R */
+            { 0, 4, 1, 0, 2, 3, 1, 2 },        /* G */
+            { 0, 4, 3, 0, 1, 3, 0, 4 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_RGB,
     },
@@ -430,9 +430,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 3, 0 },        /* R */
-            { 0, 0, 1, 1, 1 },        /* G */
-            { 0, 0, 1, 0, 0 },        /* B */
+            { 0, 1, 0, 3, 1, 0, 0, 1 },        /* R */
+            { 0, 1, 0, 1, 2, 0, 1, 1 },        /* G */
+            { 0, 1, 0, 0, 1, 0, 0, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_PSEUDOPAL,
     },
@@ -442,9 +442,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 1, 1, 0, 7 },        /* U */
-            { 1, 1, 2, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 2, 0, 0, 8, 1, 7, 1 },        /* U */
+            { 1, 2, 1, 0, 8, 1, 7, 2 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -454,9 +454,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 1, 2, 0, 7 },        /* U */
-            { 1, 1, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 2, 1, 0, 8, 1, 7, 2 },        /* U */
+            { 1, 2, 0, 0, 8, 1, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -466,10 +466,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 3, 2, 0, 7 },        /* R */
-            { 0, 3, 3, 0, 7 },        /* G */
-            { 0, 3, 4, 0, 7 },        /* B */
-            { 0, 3, 1, 0, 7 },        /* A */
+            { 0, 4, 1, 0, 8, 3, 7, 2 },        /* R */
+            { 0, 4, 2, 0, 8, 3, 7, 3 },        /* G */
+            { 0, 4, 3, 0, 8, 3, 7, 4 },        /* B */
+            { 0, 4, 0, 0, 8, 3, 7, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -479,10 +479,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 3, 1, 0, 7 },        /* R */
-            { 0, 3, 2, 0, 7 },        /* G */
-            { 0, 3, 3, 0, 7 },        /* B */
-            { 0, 3, 4, 0, 7 },        /* A */
+            { 0, 4, 0, 0, 8, 3, 7, 1 },        /* R */
+            { 0, 4, 1, 0, 8, 3, 7, 2 },        /* G */
+            { 0, 4, 2, 0, 8, 3, 7, 3 },        /* B */
+            { 0, 4, 3, 0, 8, 3, 7, 4 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -492,10 +492,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 3, 4, 0, 7 },        /* R */
-            { 0, 3, 3, 0, 7 },        /* G */
-            { 0, 3, 2, 0, 7 },        /* B */
-            { 0, 3, 1, 0, 7 },        /* A */
+            { 0, 4, 3, 0, 8, 3, 7, 4 },        /* R */
+            { 0, 4, 2, 0, 8, 3, 7, 3 },        /* G */
+            { 0, 4, 1, 0, 8, 3, 7, 2 },        /* B */
+            { 0, 4, 0, 0, 8, 3, 7, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -505,10 +505,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 3, 3, 0, 7 },        /* R */
-            { 0, 3, 2, 0, 7 },        /* G */
-            { 0, 3, 1, 0, 7 },        /* B */
-            { 0, 3, 4, 0, 7 },        /* A */
+            { 0, 4, 2, 0, 8, 3, 7, 3 },        /* R */
+            { 0, 4, 1, 0, 8, 3, 7, 2 },        /* G */
+            { 0, 4, 0, 0, 8, 3, 7, 1 },        /* B */
+            { 0, 4, 3, 0, 8, 3, 7, 4 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -518,9 +518,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w= 0,
         .log2_chroma_h= 0,
         .comp = {
-            { 0, 3, 2, 0, 7 },        /* R */
-            { 0, 3, 3, 0, 7 },        /* G */
-            { 0, 3, 4, 0, 7 },        /* B */
+            { 0, 4, 1, 0, 8, 3, 7, 2 },        /* R */
+            { 0, 4, 2, 0, 8, 3, 7, 3 },        /* G */
+            { 0, 4, 3, 0, 8, 3, 7, 4 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -530,9 +530,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w= 0,
         .log2_chroma_h= 0,
         .comp = {
-            { 0, 3, 1, 0, 7 },        /* R */
-            { 0, 3, 2, 0, 7 },        /* G */
-            { 0, 3, 3, 0, 7 },        /* B */
+            { 0, 4, 0, 0, 8, 3, 7, 1 },        /* R */
+            { 0, 4, 1, 0, 8, 3, 7, 2 },        /* G */
+            { 0, 4, 2, 0, 8, 3, 7, 3 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -542,9 +542,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w= 0,
         .log2_chroma_h= 0,
         .comp = {
-            { 0, 3, 4, 0, 7 },        /* R */
-            { 0, 3, 3, 0, 7 },        /* G */
-            { 0, 3, 2, 0, 7 },        /* B */
+            { 0, 4, 3, 0, 8, 3, 7, 4 },        /* R */
+            { 0, 4, 2, 0, 8, 3, 7, 3 },        /* G */
+            { 0, 4, 1, 0, 8, 3, 7, 2 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -554,9 +554,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w= 0,
         .log2_chroma_h= 0,
         .comp = {
-            { 0, 3, 3, 0, 7 },        /* R */
-            { 0, 3, 2, 0, 7 },        /* G */
-            { 0, 3, 1, 0, 7 },        /* B */
+            { 0, 4, 2, 0, 8, 3, 7, 3 },        /* R */
+            { 0, 4, 1, 0, 8, 3, 7, 2 },        /* G */
+            { 0, 4, 0, 0, 8, 3, 7, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -566,7 +566,7 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 15 },       /* Y */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },       /* Y */
         },
         .flags = AV_PIX_FMT_FLAG_BE,
         .alias = "y16be",
@@ -577,7 +577,7 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 15 },       /* Y */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },       /* Y */
         },
         .alias = "y16le",
     },
@@ -587,9 +587,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -599,9 +599,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -611,9 +611,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -623,9 +623,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -635,9 +635,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 11 },        /* Y */
-            { 1, 1, 1, 0, 11 },        /* U */
-            { 2, 1, 1, 0, 11 },        /* V */
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -647,9 +647,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 11 },        /* Y */
-            { 1, 1, 1, 0, 11 },        /* U */
-            { 2, 1, 1, 0, 11 },        /* V */
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -659,10 +659,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
-            { 3, 0, 1, 0, 7 },        /* A */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
+            { 3, 1, 0, 0, 8, 0, 7, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -672,10 +672,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
-            { 3, 0, 1, 0, 7 },        /* A */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
+            { 3, 1, 0, 0, 8, 0, 7, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -685,10 +685,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 0, 1, 0, 7 },        /* U */
-            { 2, 0, 1, 0, 7 },        /* V */
-            { 3, 0, 1, 0, 7 },        /* A */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* U */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* V */
+            { 3, 1, 0, 0, 8, 0, 7, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -698,10 +698,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
-            { 3, 1, 1, 0, 8 },        /* A */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
+            { 3, 2, 0, 0, 9, 1, 8, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -711,10 +711,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
-            { 3, 1, 1, 0, 8 },        /* A */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
+            { 3, 2, 0, 0, 9, 1, 8, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -724,10 +724,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
-            { 3, 1, 1, 0, 8 },        /* A */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
+            { 3, 2, 0, 0, 9, 1, 8, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -737,10 +737,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
-            { 3, 1, 1, 0, 8 },        /* A */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
+            { 3, 2, 0, 0, 9, 1, 8, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -750,10 +750,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
-            { 3, 1, 1, 0, 8 },        /* A */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
+            { 3, 2, 0, 0, 9, 1, 8, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -763,10 +763,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
-            { 3, 1, 1, 0, 8 },        /* A */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
+            { 3, 2, 0, 0, 9, 1, 8, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -776,10 +776,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
-            { 3, 1, 1, 0, 9 },        /* A */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
+            { 3, 2, 0, 0, 10, 1, 9, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -789,10 +789,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
-            { 3, 1, 1, 0, 9 },        /* A */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
+            { 3, 2, 0, 0, 10, 1, 9, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -802,10 +802,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
-            { 3, 1, 1, 0, 9 },        /* A */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
+            { 3, 2, 0, 0, 10, 1, 9, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -815,10 +815,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
-            { 3, 1, 1, 0, 9 },        /* A */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
+            { 3, 2, 0, 0, 10, 1, 9, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -828,10 +828,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
-            { 3, 1, 1, 0, 9 },        /* A */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
+            { 3, 2, 0, 0, 10, 1, 9, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -841,10 +841,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
-            { 3, 1, 1, 0, 9 },        /* A */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
+            { 3, 2, 0, 0, 10, 1, 9, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -854,10 +854,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
-            { 3, 1, 1, 0, 15 },        /* A */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
+            { 3, 2, 0, 0, 16, 1, 15, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -867,10 +867,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
-            { 3, 1, 1, 0, 15 },        /* A */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
+            { 3, 2, 0, 0, 16, 1, 15, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -880,10 +880,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
-            { 3, 1, 1, 0, 15 },        /* A */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
+            { 3, 2, 0, 0, 16, 1, 15, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -893,10 +893,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
-            { 3, 1, 1, 0, 15 },        /* A */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
+            { 3, 2, 0, 0, 16, 1, 15, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -906,10 +906,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
-            { 3, 1, 1, 0, 15 },        /* A */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
+            { 3, 2, 0, 0, 16, 1, 15, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -919,10 +919,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
-            { 3, 1, 1, 0, 15 },        /* A */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
+            { 3, 2, 0, 0, 16, 1, 15, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -970,9 +970,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 5, 1, 0, 15 },       /* R */
-            { 0, 5, 3, 0, 15 },       /* G */
-            { 0, 5, 5, 0, 15 },       /* B */
+            { 0, 6, 0, 0, 16, 5, 15, 1 },       /* R */
+            { 0, 6, 2, 0, 16, 5, 15, 3 },       /* G */
+            { 0, 6, 4, 0, 16, 5, 15, 5 },       /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_BE,
     },
@@ -982,9 +982,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 5, 1, 0, 15 },       /* R */
-            { 0, 5, 3, 0, 15 },       /* G */
-            { 0, 5, 5, 0, 15 },       /* B */
+            { 0, 6, 0, 0, 16, 5, 15, 1 },       /* R */
+            { 0, 6, 2, 0, 16, 5, 15, 3 },       /* G */
+            { 0, 6, 4, 0, 16, 5, 15, 5 },       /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -994,10 +994,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 7, 1, 0, 15 },       /* R */
-            { 0, 7, 3, 0, 15 },       /* G */
-            { 0, 7, 5, 0, 15 },       /* B */
-            { 0, 7, 7, 0, 15 },       /* A */
+            { 0, 8, 0, 0, 16, 7, 15, 1 },       /* R */
+            { 0, 8, 2, 0, 16, 7, 15, 3 },       /* G */
+            { 0, 8, 4, 0, 16, 7, 15, 5 },       /* B */
+            { 0, 8, 6, 0, 16, 7, 15, 7 },       /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -1007,10 +1007,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 7, 1, 0, 15 },       /* R */
-            { 0, 7, 3, 0, 15 },       /* G */
-            { 0, 7, 5, 0, 15 },       /* B */
-            { 0, 7, 7, 0, 15 },       /* A */
+            { 0, 8, 0, 0, 16, 7, 15, 1 },       /* R */
+            { 0, 8, 2, 0, 16, 7, 15, 3 },       /* G */
+            { 0, 8, 4, 0, 16, 7, 15, 5 },       /* B */
+            { 0, 8, 6, 0, 16, 7, 15, 7 },       /* A */
         },
         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -1020,9 +1020,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 0, 3, 4 },        /* R */
-            { 0, 1, 1, 5, 5 },        /* G */
-            { 0, 1, 1, 0, 4 },        /* B */
+            { 0, 2, -1, 3, 5, 1, 4, 0 },        /* R */
+            { 0, 2,  0, 5, 6, 1, 5, 1 },        /* G */
+            { 0, 2,  0, 0, 5, 1, 4, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1032,9 +1032,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 2, 3, 4 },        /* R */
-            { 0, 1, 1, 5, 5 },        /* G */
-            { 0, 1, 1, 0, 4 },        /* B */
+            { 0, 2, 1, 3, 5, 1, 4, 2 },        /* R */
+            { 0, 2, 0, 5, 6, 1, 5, 1 },        /* G */
+            { 0, 2, 0, 0, 5, 1, 4, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -1044,9 +1044,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 0, 2, 4 },        /* R */
-            { 0, 1, 1, 5, 4 },        /* G */
-            { 0, 1, 1, 0, 4 },        /* B */
+            { 0, 2, -1, 2, 5, 1, 4, 0 },        /* R */
+            { 0, 2,  0, 5, 5, 1, 4, 1 },        /* G */
+            { 0, 2,  0, 0, 5, 1, 4, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1056,9 +1056,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 2, 2, 4 },        /* R */
-            { 0, 1, 1, 5, 4 },        /* G */
-            { 0, 1, 1, 0, 4 },        /* B */
+            { 0, 2, 1, 2, 5, 1, 4, 2 },        /* R */
+            { 0, 2, 0, 5, 5, 1, 4, 1 },        /* G */
+            { 0, 2, 0, 0, 5, 1, 4, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -1068,9 +1068,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 0, 0, 3 },        /* R */
-            { 0, 1, 1, 4, 3 },        /* G */
-            { 0, 1, 1, 0, 3 },        /* B */
+            { 0, 2, -1, 0, 4, 1, 3, 0 },        /* R */
+            { 0, 2,  0, 4, 4, 1, 3, 1 },        /* G */
+            { 0, 2,  0, 0, 4, 1, 3, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1080,9 +1080,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 2, 0, 3 },        /* R */
-            { 0, 1, 1, 4, 3 },        /* G */
-            { 0, 1, 1, 0, 3 },        /* B */
+            { 0, 2, 1, 0, 4, 1, 3, 2 },        /* R */
+            { 0, 2, 0, 4, 4, 1, 3, 1 },        /* G */
+            { 0, 2, 0, 0, 4, 1, 3, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -1092,9 +1092,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 5, 5, 0, 15 },       /* R */
-            { 0, 5, 3, 0, 15 },       /* G */
-            { 0, 5, 1, 0, 15 },       /* B */
+            { 0, 6, 4, 0, 16, 5, 15, 5 },       /* R */
+            { 0, 6, 2, 0, 16, 5, 15, 3 },       /* G */
+            { 0, 6, 0, 0, 16, 5, 15, 1 },       /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1104,9 +1104,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 5, 5, 0, 15 },       /* R */
-            { 0, 5, 3, 0, 15 },       /* G */
-            { 0, 5, 1, 0, 15 },       /* B */
+            { 0, 6, 4, 0, 16, 5, 15, 5 },       /* R */
+            { 0, 6, 2, 0, 16, 5, 15, 3 },       /* G */
+            { 0, 6, 0, 0, 16, 5, 15, 1 },       /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -1116,10 +1116,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 7, 5, 0, 15 },       /* R */
-            { 0, 7, 3, 0, 15 },       /* G */
-            { 0, 7, 1, 0, 15 },       /* B */
-            { 0, 7, 7, 0, 15 },       /* A */
+            { 0, 8, 4, 0, 16, 7, 15, 5 },       /* R */
+            { 0, 8, 2, 0, 16, 7, 15, 3 },       /* G */
+            { 0, 8, 0, 0, 16, 7, 15, 1 },       /* B */
+            { 0, 8, 6, 0, 16, 7, 15, 7 },       /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -1129,10 +1129,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 7, 5, 0, 15 },       /* R */
-            { 0, 7, 3, 0, 15 },       /* G */
-            { 0, 7, 1, 0, 15 },       /* B */
-            { 0, 7, 7, 0, 15 },       /* A */
+            { 0, 8, 4, 0, 16, 7, 15, 5 },       /* R */
+            { 0, 8, 2, 0, 16, 7, 15, 3 },       /* G */
+            { 0, 8, 0, 0, 16, 7, 15, 1 },       /* B */
+            { 0, 8, 6, 0, 16, 7, 15, 7 },       /* A */
         },
         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -1142,9 +1142,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 4 },        /* R */
-            { 0, 1, 1, 5, 5 },        /* G */
-            { 0, 1, 0, 3, 4 },        /* B */
+            { 0, 2,  0, 0, 5, 1, 4, 1 },        /* R */
+            { 0, 2,  0, 5, 6, 1, 5, 1 },        /* G */
+            { 0, 2, -1, 3, 5, 1, 4, 0 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1154,9 +1154,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 4 },        /* R */
-            { 0, 1, 1, 5, 5 },        /* G */
-            { 0, 1, 2, 3, 4 },        /* B */
+            { 0, 2, 0, 0, 5, 1, 4, 1 },        /* R */
+            { 0, 2, 0, 5, 6, 1, 5, 1 },        /* G */
+            { 0, 2, 1, 3, 5, 1, 4, 2 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -1166,9 +1166,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 4 },       /* R */
-            { 0, 1, 1, 5, 4 },       /* G */
-            { 0, 1, 0, 2, 4 },       /* B */
+            { 0, 2,  0, 0, 5, 1, 4, 1 },       /* R */
+            { 0, 2,  0, 5, 5, 1, 4, 1 },       /* G */
+            { 0, 2, -1, 2, 5, 1, 4, 0 },       /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_RGB,
      },
@@ -1178,9 +1178,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 4 },        /* R */
-            { 0, 1, 1, 5, 4 },        /* G */
-            { 0, 1, 2, 2, 4 },        /* B */
+            { 0, 2, 0, 0, 5, 1, 4, 1 },        /* R */
+            { 0, 2, 0, 5, 5, 1, 4, 1 },        /* G */
+            { 0, 2, 1, 2, 5, 1, 4, 2 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
@@ -1190,9 +1190,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 3 },       /* R */
-            { 0, 1, 1, 4, 3 },       /* G */
-            { 0, 1, 0, 0, 3 },       /* B */
+            { 0, 2,  0, 0, 4, 1, 3, 1 },       /* R */
+            { 0, 2,  0, 4, 4, 1, 3, 1 },       /* G */
+            { 0, 2, -1, 0, 4, 1, 3, 0 },       /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_RGB,
      },
@@ -1202,12 +1202,13 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 3 },        /* R */
-            { 0, 1, 1, 4, 3 },        /* G */
-            { 0, 1, 2, 0, 3 },        /* B */
+            { 0, 2, 0, 0, 4, 1, 3, 1 },        /* R */
+            { 0, 2, 0, 4, 4, 1, 3, 1 },        /* G */
+            { 0, 2, 1, 0, 4, 1, 3, 2 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_RGB,
     },
+#if FF_API_VAAPI
     [AV_PIX_FMT_VAAPI_MOCO] = {
         .name = "vaapi_moco",
         .log2_chroma_w = 1,
@@ -1226,15 +1227,23 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_h = 1,
         .flags = AV_PIX_FMT_FLAG_HWACCEL,
     },
+#else
+    [AV_PIX_FMT_VAAPI] = {
+        .name = "vaapi",
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .flags = AV_PIX_FMT_FLAG_HWACCEL,
+    },
+#endif
     [AV_PIX_FMT_YUV420P9LE] = {
         .name = "yuv420p9le",
         .nb_components = 3,
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1244,9 +1253,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1256,9 +1265,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1268,9 +1277,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1280,9 +1289,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 11 },        /* Y */
-            { 1, 1, 1, 0, 11 },        /* U */
-            { 2, 1, 1, 0, 11 },        /* V */
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1292,9 +1301,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 11 },        /* Y */
-            { 1, 1, 1, 0, 11 },        /* U */
-            { 2, 1, 1, 0, 11 },        /* V */
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1304,9 +1313,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 13 },        /* Y */
-            { 1, 1, 1, 0, 13 },        /* U */
-            { 2, 1, 1, 0, 13 },        /* V */
+            { 0, 2, 0, 0, 14, 1, 13, 1 },        /* Y */
+            { 1, 2, 0, 0, 14, 1, 13, 1 },        /* U */
+            { 2, 2, 0, 0, 14, 1, 13, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1316,9 +1325,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 13 },        /* Y */
-            { 1, 1, 1, 0, 13 },        /* U */
-            { 2, 1, 1, 0, 13 },        /* V */
+            { 0, 2, 0, 0, 14, 1, 13, 1 },        /* Y */
+            { 1, 2, 0, 0, 14, 1, 13, 1 },        /* U */
+            { 2, 2, 0, 0, 14, 1, 13, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1328,9 +1337,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1340,9 +1349,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 1,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1352,9 +1361,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1364,9 +1373,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1376,9 +1385,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1388,9 +1397,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1400,9 +1409,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 11 },        /* Y */
-            { 1, 1, 1, 0, 11 },        /* U */
-            { 2, 1, 1, 0, 11 },        /* V */
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1412,9 +1421,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 11 },        /* Y */
-            { 1, 1, 1, 0, 11 },        /* U */
-            { 2, 1, 1, 0, 11 },        /* V */
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1424,9 +1433,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 13 },        /* Y */
-            { 1, 1, 1, 0, 13 },        /* U */
-            { 2, 1, 1, 0, 13 },        /* V */
+            { 0, 2, 0, 0, 14, 1, 13, 1 },        /* Y */
+            { 1, 2, 0, 0, 14, 1, 13, 1 },        /* U */
+            { 2, 2, 0, 0, 14, 1, 13, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1436,9 +1445,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 13 },        /* Y */
-            { 1, 1, 1, 0, 13 },        /* U */
-            { 2, 1, 1, 0, 13 },        /* V */
+            { 0, 2, 0, 0, 14, 1, 13, 1 },        /* Y */
+            { 1, 2, 0, 0, 14, 1, 13, 1 },        /* U */
+            { 2, 2, 0, 0, 14, 1, 13, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1448,9 +1457,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1460,9 +1469,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1472,9 +1481,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1484,9 +1493,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 15 },        /* Y */
-            { 1, 1, 1, 0, 15 },        /* U */
-            { 2, 1, 1, 0, 15 },        /* V */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },        /* Y */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },        /* U */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1496,9 +1505,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1508,9 +1517,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 1, 1, 0, 9 },        /* U */
-            { 2, 1, 1, 0, 9 },        /* V */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* U */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1520,9 +1529,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1532,9 +1541,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 8 },        /* Y */
-            { 1, 1, 1, 0, 8 },        /* U */
-            { 2, 1, 1, 0, 8 },        /* V */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* Y */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* U */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1544,9 +1553,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 11 },        /* Y */
-            { 1, 1, 1, 0, 11 },        /* U */
-            { 2, 1, 1, 0, 11 },        /* V */
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1556,9 +1565,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 11 },        /* Y */
-            { 1, 1, 1, 0, 11 },        /* U */
-            { 2, 1, 1, 0, 11 },        /* V */
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* Y */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* U */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1568,9 +1577,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 13 },        /* Y */
-            { 1, 1, 1, 0, 13 },        /* U */
-            { 2, 1, 1, 0, 13 },        /* V */
+            { 0, 2, 0, 0, 14, 1, 13, 1 },        /* Y */
+            { 1, 2, 0, 0, 14, 1, 13, 1 },        /* U */
+            { 2, 2, 0, 0, 14, 1, 13, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1580,9 +1589,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 13 },        /* Y */
-            { 1, 1, 1, 0, 13 },        /* U */
-            { 2, 1, 1, 0, 13 },        /* V */
+            { 0, 2, 0, 0, 14, 1, 13, 1 },        /* Y */
+            { 1, 2, 0, 0, 14, 1, 13, 1 },        /* U */
+            { 2, 2, 0, 0, 14, 1, 13, 1 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1608,8 +1617,8 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .name = "ya8",
         .nb_components = 2,
         .comp = {
-            { 0, 1, 1, 0, 7 },        /* Y */
-            { 0, 1, 2, 0, 7 },        /* A */
+            { 0, 2, 0, 0, 8, 1, 7, 1 },        /* Y */
+            { 0, 2, 1, 0, 8, 1, 7, 2 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_ALPHA,
         .alias = "gray8a",
@@ -1618,8 +1627,8 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .name = "ya16le",
         .nb_components = 2,
         .comp = {
-            { 0, 3, 1, 0, 15 },        /* Y */
-            { 0, 3, 3, 0, 15 },        /* A */
+            { 0, 4, 0, 0, 16, 3, 15, 1 },        /* Y */
+            { 0, 4, 2, 0, 16, 3, 15, 3 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_ALPHA,
     },
@@ -1627,20 +1636,24 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .name = "ya16be",
         .nb_components = 2,
         .comp = {
-            { 0, 3, 1, 0, 15 },        /* Y */
-            { 0, 3, 3, 0, 15 },        /* A */
+            { 0, 4, 0, 0, 16, 3, 15, 1 },        /* Y */
+            { 0, 4, 2, 0, 16, 3, 15, 3 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_ALPHA,
     },
+    [AV_PIX_FMT_VIDEOTOOLBOX] = {
+        .name = "videotoolbox_vld",
+        .flags = AV_PIX_FMT_FLAG_HWACCEL,
+    },
     [AV_PIX_FMT_GBRP] = {
         .name = "gbrp",
         .nb_components = 3,
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 0, 1, 0, 7 },        /* R */
-            { 0, 0, 1, 0, 7 },        /* G */
-            { 1, 0, 1, 0, 7 },        /* B */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* R */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* G */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1650,9 +1663,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 8 },        /* R */
-            { 0, 1, 1, 0, 8 },        /* G */
-            { 1, 1, 1, 0, 8 },        /* B */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* R */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* G */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1662,9 +1675,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 8 },        /* R */
-            { 0, 1, 1, 0, 8 },        /* G */
-            { 1, 1, 1, 0, 8 },        /* B */
+            { 2, 2, 0, 0, 9, 1, 8, 1 },        /* R */
+            { 0, 2, 0, 0, 9, 1, 8, 1 },        /* G */
+            { 1, 2, 0, 0, 9, 1, 8, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1674,9 +1687,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 9 },        /* R */
-            { 0, 1, 1, 0, 9 },        /* G */
-            { 1, 1, 1, 0, 9 },        /* B */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* R */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* G */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1686,9 +1699,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 9 },        /* R */
-            { 0, 1, 1, 0, 9 },        /* G */
-            { 1, 1, 1, 0, 9 },        /* B */
+            { 2, 2, 0, 0, 10, 1, 9, 1 },        /* R */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* G */
+            { 1, 2, 0, 0, 10, 1, 9, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1698,9 +1711,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 11 },        /* R */
-            { 0, 1, 1, 0, 11 },        /* G */
-            { 1, 1, 1, 0, 11 },        /* B */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* R */
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* G */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1710,9 +1723,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 11 },        /* R */
-            { 0, 1, 1, 0, 11 },        /* G */
-            { 1, 1, 1, 0, 11 },        /* B */
+            { 2, 2, 0, 0, 12, 1, 11, 1 },        /* R */
+            { 0, 2, 0, 0, 12, 1, 11, 1 },        /* G */
+            { 1, 2, 0, 0, 12, 1, 11, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1722,9 +1735,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 13 },        /* R */
-            { 0, 1, 1, 0, 13 },        /* G */
-            { 1, 1, 1, 0, 13 },        /* B */
+            { 2, 2, 0, 0, 14, 1, 13, 1 },        /* R */
+            { 0, 2, 0, 0, 14, 1, 13, 1 },        /* G */
+            { 1, 2, 0, 0, 14, 1, 13, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1734,9 +1747,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 13 },        /* R */
-            { 0, 1, 1, 0, 13 },        /* G */
-            { 1, 1, 1, 0, 13 },        /* B */
+            { 2, 2, 0, 0, 14, 1, 13, 1 },        /* R */
+            { 0, 2, 0, 0, 14, 1, 13, 1 },        /* G */
+            { 1, 2, 0, 0, 14, 1, 13, 1 },        /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1746,9 +1759,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 15 },       /* R */
-            { 0, 1, 1, 0, 15 },       /* G */
-            { 1, 1, 1, 0, 15 },       /* B */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },       /* R */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },       /* G */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },       /* B */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1758,9 +1771,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 15 },       /* R */
-            { 0, 1, 1, 0, 15 },       /* G */
-            { 1, 1, 1, 0, 15 },       /* B */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },       /* R */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },       /* G */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },       /* B */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB,
     },
@@ -1770,10 +1783,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 0, 1, 0, 7 },        /* R */
-            { 0, 0, 1, 0, 7 },        /* G */
-            { 1, 0, 1, 0, 7 },        /* B */
-            { 3, 0, 1, 0, 7 },        /* A */
+            { 2, 1, 0, 0, 8, 0, 7, 1 },        /* R */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* G */
+            { 1, 1, 0, 0, 8, 0, 7, 1 },        /* B */
+            { 3, 1, 0, 0, 8, 0, 7, 1 },        /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB |
                  AV_PIX_FMT_FLAG_ALPHA,
@@ -1784,10 +1797,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 15 },       /* R */
-            { 0, 1, 1, 0, 15 },       /* G */
-            { 1, 1, 1, 0, 15 },       /* B */
-            { 3, 1, 1, 0, 15 },       /* A */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },       /* R */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },       /* G */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },       /* B */
+            { 3, 2, 0, 0, 16, 1, 15, 1 },       /* A */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB |
                  AV_PIX_FMT_FLAG_ALPHA,
@@ -1798,10 +1811,10 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 2, 1, 1, 0, 15 },       /* R */
-            { 0, 1, 1, 0, 15 },       /* G */
-            { 1, 1, 1, 0, 15 },       /* B */
-            { 3, 1, 1, 0, 15 },       /* A */
+            { 2, 2, 0, 0, 16, 1, 15, 1 },       /* R */
+            { 0, 2, 0, 0, 16, 1, 15, 1 },       /* G */
+            { 1, 2, 0, 0, 16, 1, 15, 1 },       /* B */
+            { 3, 2, 0, 0, 16, 1, 15, 1 },       /* A */
         },
         .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_PLANAR |
                  AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_ALPHA,
@@ -1818,9 +1831,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 5, 1, 4, 11 },       /* X */
-            { 0, 5, 3, 4, 11 },       /* Y */
-            { 0, 5, 5, 4, 11 },       /* Z */
+            { 0, 6, 0, 4, 12, 5, 11, 1 },       /* X */
+            { 0, 6, 2, 4, 12, 5, 11, 3 },       /* Y */
+            { 0, 6, 4, 4, 12, 5, 11, 5 },       /* Z */
       },
       /*.flags = -- not used*/
     },
@@ -1830,9 +1843,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 0,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 5, 1, 4, 11 },       /* X */
-            { 0, 5, 3, 4, 11 },       /* Y */
-            { 0, 5, 5, 4, 11 },       /* Z */
+            { 0, 6, 0, 4, 12, 5, 11, 1 },       /* X */
+            { 0, 6, 2, 4, 12, 5, 11, 3 },       /* Y */
+            { 0, 6, 4, 4, 12, 5, 11, 5 },       /* Z */
        },
         .flags = AV_PIX_FMT_FLAG_BE,
     },
@@ -1842,9 +1855,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w= 0, \
         .log2_chroma_h= 0, \
         .comp = {          \
-            {0,0,0,0,1},   \
-            {0,0,0,0,3},   \
-            {0,0,0,0,1},   \
+            {0,1,0,0,2,0,1,1},\
+            {0,1,0,0,4,0,3,1},\
+            {0,1,0,0,2,0,1,1},\
         },                 \
 
 #define BAYER16_DESC_COMMON \
@@ -1852,9 +1865,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w= 0, \
         .log2_chroma_h= 0, \
         .comp = {          \
-            {0,1,0,0, 3},  \
-            {0,1,0,0, 7},  \
-            {0,1,0,0, 3},  \
+            {0,2,0,0,4,1,3,1},\
+            {0,2,0,0,8,1,7,1},\
+            {0,2,0,0,4,1,3,1},\
         },                 \
 
     [AV_PIX_FMT_BAYER_BGGR8] = {
@@ -1923,9 +1936,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 0, 1, 0, 7 },        /* Y */
-            { 1, 1, 1, 0, 7 },        /* U */
-            { 1, 1, 2, 0, 7 },        /* V */
+            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
+            { 1, 2, 0, 0, 8, 1, 7, 1 },        /* U */
+            { 1, 2, 1, 0, 8, 1, 7, 2 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1935,9 +1948,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 3, 1, 0, 9 },        /* U */
-            { 1, 3, 3, 0, 9 },        /* V */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 4, 0, 0, 10, 3, 9, 1 },        /* U */
+            { 1, 4, 2, 0, 10, 3, 9, 3 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR,
     },
@@ -1947,9 +1960,9 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .log2_chroma_w = 1,
         .log2_chroma_h = 0,
         .comp = {
-            { 0, 1, 1, 0, 9 },        /* Y */
-            { 1, 3, 1, 0, 9 },        /* U */
-            { 1, 3, 3, 0, 9 },        /* V */
+            { 0, 2, 0, 0, 10, 1, 9, 1 },        /* Y */
+            { 1, 4, 0, 0, 10, 3, 9, 1 },        /* U */
+            { 1, 4, 2, 0, 10, 3, 9, 3 },        /* V */
         },
         .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE,
     },
@@ -1965,7 +1978,60 @@ const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
         .name = "mmal",
         .flags = AV_PIX_FMT_FLAG_HWACCEL,
     },
+    [AV_PIX_FMT_AYUV64LE] = {
+        .name = "ayuv64le",
+        .nb_components = 4,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 8, 2, 0, 16, 7, 15, 3 },        /* Y */
+            { 0, 8, 4, 0, 16, 7, 15, 5 },        /* U */
+            { 0, 8, 6, 0, 16, 7, 15, 7 },        /* V */
+            { 0, 8, 0, 0, 16, 7, 15, 1 },        /* A */
+        },
+        .flags = AV_PIX_FMT_FLAG_ALPHA,
+    },
+    [AV_PIX_FMT_AYUV64BE] = {
+        .name = "ayuv64be",
+        .nb_components = 4,
+        .log2_chroma_w = 0,
+        .log2_chroma_h = 0,
+        .comp = {
+            { 0, 8, 2, 0, 16, 7, 15, 3 },        /* Y */
+            { 0, 8, 4, 0, 16, 7, 15, 5 },        /* U */
+            { 0, 8, 6, 0, 16, 7, 15, 7 },        /* V */
+            { 0, 8, 0, 0, 16, 7, 15, 1 },        /* A */
+        },
+        .flags = AV_PIX_FMT_FLAG_BE | AV_PIX_FMT_FLAG_ALPHA,
+    },
+    [AV_PIX_FMT_P010LE] = {
+        .name = "p010le",
+        .nb_components = 3,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .comp = {
+            { 0, 2, 0, 6, 10, 1, 9, 1 },        /* Y */
+            { 1, 4, 0, 6, 10, 3, 9, 1 },        /* U */
+            { 1, 4, 2, 6, 10, 3, 9, 3 },        /* V */
+        },
+        .flags = AV_PIX_FMT_FLAG_PLANAR,
+    },
+    [AV_PIX_FMT_P010BE] = {
+        .name = "p010be",
+        .nb_components = 3,
+        .log2_chroma_w = 1,
+        .log2_chroma_h = 1,
+        .comp = {
+            { 0, 2, 0, 6, 10, 1, 9, 1 },        /* Y */
+            { 1, 4, 0, 6, 10, 3, 9, 1 },        /* U */
+            { 1, 4, 2, 6, 10, 3, 9, 3 },        /* V */
+        },
+        .flags = AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_BE,
+    },
 };
+#if FF_API_PLUS1_MINUS1
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
 
 static const char *color_range_names[AVCOL_RANGE_NB] = {
     "unknown", "tv", "pc",
@@ -1974,13 +2040,14 @@ static const char *color_range_names[AVCOL_RANGE_NB] = {
 static const char *color_primaries_names[AVCOL_PRI_NB] = {
     "reserved", "bt709", "unknown", "reserved", "bt470m",
     "bt470bg", "smpte170m", "smpte240m", "film", "bt2020",
+    "smpte428-1",
 };
 
 static const char *color_transfer_names[AVCOL_TRC_NB] = {
     "reserved", "bt709", "unknown", "reserved", "bt470m",
     "bt470bg", "smpte170m", "smpte240m", "linear", "log100",
     "log316", "iec61966-2-4", "bt1361e", "iec61966-2-1",
-    "bt2020-10", "bt2020-20",
+    "bt2020-10", "bt2020-20", "smpte2084", "smpte428-1",
 };
 
 static const char *color_space_names[AVCOL_SPC_NB] = {
@@ -2046,7 +2113,7 @@ int av_get_bits_per_pixel(const AVPixFmtDescriptor *pixdesc)
 
     for (c = 0; c < pixdesc->nb_components; c++) {
         int s = c == 1 || c == 2 ? 0 : log2_pixels;
-        bits += (pixdesc->comp[c].depth_minus1 + 1) << s;
+        bits += pixdesc->comp[c].depth << s;
     }
 
     return bits >> log2_pixels;
@@ -2061,7 +2128,7 @@ int av_get_padded_bits_per_pixel(const AVPixFmtDescriptor *pixdesc)
     for (c = 0; c < pixdesc->nb_components; c++) {
         const AVComponentDescriptor *comp = &pixdesc->comp[c];
         int s = c == 1 || c == 2 ? 0 : log2_pixels;
-        steps[comp->plane] = (comp->step_minus1 + 1) << s;
+        steps[comp->plane] = comp->step << s;
     }
     for (c = 0; c < 4; c++)
         bits += steps[c];
@@ -2165,19 +2232,19 @@ void ff_check_pixfmt_descriptors(void){
         for (j=0; j<FF_ARRAY_ELEMS(d->comp); j++) {
             const AVComponentDescriptor *c = &d->comp[j];
             if(j>=d->nb_components) {
-                av_assert0(!c->plane && !c->step_minus1 && !c->offset_plus1 && !c->shift && !c->depth_minus1);
+                av_assert0(!c->plane && !c->step && !c->offset && !c->shift && !c->depth);
                 continue;
             }
             if (d->flags & AV_PIX_FMT_FLAG_BITSTREAM) {
-                av_assert0(c->step_minus1 >= c->depth_minus1);
+                av_assert0(c->step >= c->depth);
             } else {
-                av_assert0(8*(c->step_minus1+1) >= c->depth_minus1+1);
+                av_assert0(8*c->step >= c->depth);
             }
             if (!strncmp(d->name, "bayer_", 6))
                 continue;
             av_read_image_line(tmp, (void*)data, linesize, d, 0, 0, j, 2, 0);
             av_assert0(tmp[0] == 0 && tmp[1] == 0);
-            tmp[0] = tmp[1] = (1<<(c->depth_minus1 + 1)) - 1;
+            tmp[0] = tmp[1] = (1<<c->depth) - 1;
             av_write_image_line(tmp, data, linesize, d, 0, 0, j, 2);
         }
     }
@@ -2208,6 +2275,7 @@ enum AVPixelFormat av_pix_fmt_swap_endianness(enum AVPixelFormat pix_fmt)
 #define FF_COLOR_GRAY     1 /**< gray color space */
 #define FF_COLOR_YUV      2 /**< YUV color space. 16 <= Y <= 235, 16 <= U, V <= 240 */
 #define FF_COLOR_YUV_JPEG 3 /**< YUV color space. 0 <= Y <= 255, 0 <= U, V <= 255 */
+#define FF_COLOR_XYZ      4
 
 #define pixdesc_has_alpha(pixdesc) \
     ((pixdesc)->nb_components == 2 || (pixdesc)->nb_components == 4 || (pixdesc)->flags & AV_PIX_FMT_FLAG_PAL)
@@ -2223,6 +2291,9 @@ static int get_color_type(const AVPixFmtDescriptor *desc) {
     if(desc->name && !strncmp(desc->name, "yuvj", 4))
         return FF_COLOR_YUV_JPEG;
 
+    if(desc->name && !strncmp(desc->name, "xyz", 3))
+        return FF_COLOR_XYZ;
+
     if(desc->flags & AV_PIX_FMT_FLAG_RGB)
         return  FF_COLOR_RGB;
 
@@ -2244,8 +2315,8 @@ static int get_pix_fmt_depth(int *min, int *max, enum AVPixelFormat pix_fmt)
 
     *min = INT_MAX, *max = -INT_MAX;
     for (i = 0; i < desc->nb_components; i++) {
-        *min = FFMIN(desc->comp[i].depth_minus1+1, *min);
-        *max = FFMAX(desc->comp[i].depth_minus1+1, *max);
+        *min = FFMIN(desc->comp[i].depth, *min);
+        *max = FFMAX(desc->comp[i].depth, *max);
     }
     return 0;
 }
@@ -2283,8 +2354,8 @@ static int get_pix_fmt_score(enum AVPixelFormat dst_pix_fmt,
         nb_components = FFMIN(src_desc->nb_components, dst_desc->nb_components);
 
     for (i = 0; i < nb_components; i++) {
-        int depth_minus1 = (dst_pix_fmt == AV_PIX_FMT_PAL8) ? 7/nb_components : dst_desc->comp[i].depth_minus1;
-        if (src_desc->comp[i].depth_minus1 > depth_minus1 && (consider & FF_LOSS_DEPTH)) {
+        int depth_minus1 = (dst_pix_fmt == AV_PIX_FMT_PAL8) ? 7/nb_components : (dst_desc->comp[i].depth - 1);
+        if (src_desc->comp[i].depth - 1 > depth_minus1 && (consider & FF_LOSS_DEPTH)) {
             loss |= FF_LOSS_DEPTH;
             score -= 65536 >> depth_minus1;
         }
@@ -2334,7 +2405,7 @@ static int get_pix_fmt_score(enum AVPixelFormat dst_pix_fmt,
         break;
     }
     if(loss & FF_LOSS_COLORSPACE)
-        score -= (nb_components * 65536) >> FFMIN(dst_desc->comp[0].depth_minus1, src_desc->comp[0].depth_minus1);
+        score -= (nb_components * 65536) >> FFMIN(dst_desc->comp[0].depth - 1, src_desc->comp[0].depth - 1);
 
     if (dst_color == FF_COLOR_GRAY &&
         src_color != FF_COLOR_GRAY && (consider & FF_LOSS_CHROMA)) {
diff --git a/libavutil/pixdesc.h b/libavutil/pixdesc.h
index 78f8d559..b1d218db 100644
--- a/libavutil/pixdesc.h
+++ b/libavutil/pixdesc.h
@@ -26,35 +26,47 @@
 
 #include "attributes.h"
 #include "pixfmt.h"
+#include "version.h"
 
 typedef struct AVComponentDescriptor {
     /**
      * Which of the 4 planes contains the component.
      */
-    uint16_t plane        : 2;
+    int plane;
 
     /**
-     * Number of elements between 2 horizontally consecutive pixels minus 1.
+     * Number of elements between 2 horizontally consecutive pixels.
      * Elements are bits for bitstream formats, bytes otherwise.
      */
-    uint16_t step_minus1  : 3;
+    int step;
 
     /**
-     * Number of elements before the component of the first pixel plus 1.
+     * Number of elements before the component of the first pixel.
      * Elements are bits for bitstream formats, bytes otherwise.
      */
-    uint16_t offset_plus1 : 3;
+    int offset;
 
     /**
      * Number of least significant bits that must be shifted away
      * to get the value.
      */
-    uint16_t shift        : 3;
+    int shift;
 
     /**
-     * Number of bits in the component minus 1.
+     * Number of bits in the component.
      */
-    uint16_t depth_minus1 : 4;
+    int depth;
+
+#if FF_API_PLUS1_MINUS1
+    /** deprecated, use step instead */
+    attribute_deprecated int step_minus1;
+
+    /** deprecated, use depth instead */
+    attribute_deprecated int depth_minus1;
+
+    /** deprecated, use offset instead */
+    attribute_deprecated int offset_plus1;
+#endif
 } AVComponentDescriptor;
 
 /**
@@ -87,15 +99,20 @@ typedef struct AVPixFmtDescriptor {
      * This value only refers to the chroma components.
      */
     uint8_t log2_chroma_h;
-    uint8_t flags;
+
+    /**
+     * Combination of AV_PIX_FMT_FLAG_... flags.
+     */
+    uint64_t flags;
 
     /**
      * Parameters that describe how pixels are packed.
-     * If the format has 2 or 4 components, then alpha is last.
      * If the format has 1 or 2 components, then luma is 0.
      * If the format has 3 or 4 components:
      *   if the RGB flag is set then 0 is red, 1 is green and 2 is blue;
      *   otherwise 0 is luma, 1 is chroma-U and 2 is chroma-V.
+     *
+     * If present, the Alpha channel is always the last component.
      */
     AVComponentDescriptor comp[4];
 
@@ -155,27 +172,6 @@ typedef struct AVPixFmtDescriptor {
  */
 #define AV_PIX_FMT_FLAG_ALPHA        (1 << 7)
 
-#if FF_API_PIX_FMT
-/**
- * @deprecated use the AV_PIX_FMT_FLAG_* flags
- */
-#define PIX_FMT_BE        AV_PIX_FMT_FLAG_BE
-#define PIX_FMT_PAL       AV_PIX_FMT_FLAG_PAL
-#define PIX_FMT_BITSTREAM AV_PIX_FMT_FLAG_BITSTREAM
-#define PIX_FMT_HWACCEL   AV_PIX_FMT_FLAG_HWACCEL
-#define PIX_FMT_PLANAR    AV_PIX_FMT_FLAG_PLANAR
-#define PIX_FMT_RGB       AV_PIX_FMT_FLAG_RGB
-#define PIX_FMT_PSEUDOPAL AV_PIX_FMT_FLAG_PSEUDOPAL
-#define PIX_FMT_ALPHA     AV_PIX_FMT_FLAG_ALPHA
-#endif
-
-#if FF_API_PIX_FMT_DESC
-/**
- * The array of all the pixel format descriptors.
- */
-extern attribute_deprecated const AVPixFmtDescriptor av_pix_fmt_descriptors[];
-#endif
-
 /**
  * Read a line from an image, and write the values of the
  * pixel format component c to dst.
@@ -296,8 +292,8 @@ enum AVPixelFormat av_pix_fmt_desc_get_id(const AVPixFmtDescriptor *desc);
  * you do check the return code!
  *
  * @param[in]  pix_fmt the pixel format
- * @param[out] h_shift store log2_chroma_w
- * @param[out] v_shift store log2_chroma_h
+ * @param[out] h_shift store log2_chroma_w (horizontal/width shift)
+ * @param[out] v_shift store log2_chroma_h (vertical/height shift)
  *
  * @return 0 on success, AVERROR(ENOSYS) on invalid or unknown pixel format
  */
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index eef64449..c01c0575 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -121,9 +121,21 @@ enum AVPixelFormat {
     AV_PIX_FMT_BGR555BE,  ///< packed BGR 5:5:5, 16bpp, (msb)1X 5B 5G 5R(lsb), big-endian   , X=unused/undefined
     AV_PIX_FMT_BGR555LE,  ///< packed BGR 5:5:5, 16bpp, (msb)1X 5B 5G 5R(lsb), little-endian, X=unused/undefined
 
+#if FF_API_VAAPI
+    /** @name Deprecated pixel formats */
+    /**@{*/
     AV_PIX_FMT_VAAPI_MOCO, ///< HW acceleration through VA API at motion compensation entry-point, Picture.data[3] contains a vaapi_render_state struct which contains macroblocks as well as various fields extracted from headers
     AV_PIX_FMT_VAAPI_IDCT, ///< HW acceleration through VA API at IDCT entry-point, Picture.data[3] contains a vaapi_render_state struct which contains fields extracted from headers
     AV_PIX_FMT_VAAPI_VLD,  ///< HW decoding through VA API, Picture.data[3] contains a vaapi_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
+    /**@}*/
+    AV_PIX_FMT_VAAPI = AV_PIX_FMT_VAAPI_VLD,
+#else
+    /**
+     *  Hardware acceleration through VA-API, data[3] contains a
+     *  VASurfaceID.
+     */
+    AV_PIX_FMT_VAAPI,
+#endif
 
     AV_PIX_FMT_YUV420P16LE,  ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
     AV_PIX_FMT_YUV420P16BE,  ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
@@ -166,13 +178,6 @@ enum AVPixelFormat {
     AV_PIX_FMT_YUV422P9BE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
     AV_PIX_FMT_YUV422P9LE, ///< planar YUV 4:2:2, 18bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
     AV_PIX_FMT_VDA_VLD,    ///< hardware decoding through VDA
-
-#ifdef AV_PIX_FMT_ABI_GIT_MASTER
-    AV_PIX_FMT_RGBA64BE,  ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
-    AV_PIX_FMT_RGBA64LE,  ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
-    AV_PIX_FMT_BGRA64BE,  ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
-    AV_PIX_FMT_BGRA64LE,  ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
-#endif
     AV_PIX_FMT_GBRP,      ///< planar GBR 4:4:4 24bpp
     AV_PIX_FMT_GBRP9BE,   ///< planar GBR 4:4:4 27bpp, big-endian
     AV_PIX_FMT_GBRP9LE,   ///< planar GBR 4:4:4 27bpp, little-endian
@@ -180,15 +185,8 @@ enum AVPixelFormat {
     AV_PIX_FMT_GBRP10LE,  ///< planar GBR 4:4:4 30bpp, little-endian
     AV_PIX_FMT_GBRP16BE,  ///< planar GBR 4:4:4 48bpp, big-endian
     AV_PIX_FMT_GBRP16LE,  ///< planar GBR 4:4:4 48bpp, little-endian
-
-    /**
-     * duplicated pixel formats for compatibility with libav.
-     * FFmpeg supports these formats since May 8 2012 and Jan 28 2012 (commits f9ca1ac7 and 143a5c55)
-     * Libav added them Oct 12 2012 with incompatible values (commit 6d5600e85)
-     */
-    AV_PIX_FMT_YUVA422P_LIBAV,  ///< planar YUV 4:2:2 24bpp, (1 Cr & Cb sample per 2x1 Y & A samples)
-    AV_PIX_FMT_YUVA444P_LIBAV,  ///< planar YUV 4:4:4 32bpp, (1 Cr & Cb sample per 1x1 Y & A samples)
-
+    AV_PIX_FMT_YUVA422P,  ///< planar YUV 4:2:2 24bpp, (1 Cr & Cb sample per 2x1 Y & A samples)
+    AV_PIX_FMT_YUVA444P,  ///< planar YUV 4:4:4 32bpp, (1 Cr & Cb sample per 1x1 Y & A samples)
     AV_PIX_FMT_YUVA420P9BE,  ///< planar YUV 4:2:0 22.5bpp, (1 Cr & Cb sample per 2x2 Y & A samples), big-endian
     AV_PIX_FMT_YUVA420P9LE,  ///< planar YUV 4:2:0 22.5bpp, (1 Cr & Cb sample per 2x2 Y & A samples), little-endian
     AV_PIX_FMT_YUVA422P9BE,  ///< planar YUV 4:2:2 27bpp, (1 Cr & Cb sample per 2x1 Y & A samples), big-endian
@@ -216,16 +214,10 @@ enum AVPixelFormat {
     AV_PIX_FMT_NV20LE,       ///< interleaved chroma YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), little-endian
     AV_PIX_FMT_NV20BE,       ///< interleaved chroma YUV 4:2:2, 20bpp, (1 Cr & Cb sample per 2x1 Y samples), big-endian
 
-    /**
-     * duplicated pixel formats for compatibility with libav.
-     * FFmpeg supports these formats since Sat Sep 24 06:01:45 2011 +0200 (commits 9569a3c9f41387a8c7d1ce97d8693520477a66c3)
-     * also see Fri Nov 25 01:38:21 2011 +0100 92afb431621c79155fcb7171d26f137eb1bee028
-     * Libav added them Sun Mar 16 23:05:47 2014 +0100 with incompatible values (commit 1481d24c3a0abf81e1d7a514547bd5305232be30)
-     */
-    AV_PIX_FMT_RGBA64BE_LIBAV,     ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
-    AV_PIX_FMT_RGBA64LE_LIBAV,     ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
-    AV_PIX_FMT_BGRA64BE_LIBAV,     ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
-    AV_PIX_FMT_BGRA64LE_LIBAV,     ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
+    AV_PIX_FMT_RGBA64BE,     ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
+    AV_PIX_FMT_RGBA64LE,     ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
+    AV_PIX_FMT_BGRA64BE,     ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
+    AV_PIX_FMT_BGRA64LE,     ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
 
     AV_PIX_FMT_YVYU422,   ///< packed YUV 4:2:2, 16bpp, Y0 Cr Y1 Cb
 
@@ -234,14 +226,9 @@ enum AVPixelFormat {
     AV_PIX_FMT_YA16BE,       ///< 16bit gray, 16bit alpha (big-endian)
     AV_PIX_FMT_YA16LE,       ///< 16bit gray, 16bit alpha (little-endian)
 
-    /**
-     * duplicated pixel formats for compatibility with libav.
-     * FFmpeg supports these formats since May 3 2013 (commit e6d4e687558d08187e7a415a7725e4b1a416f782)
-     * Libav added them Jan 14 2015 with incompatible values (commit 0e6c7dfa650e8b0497bfa7a06394b7a462ddc33a)
-     */
-    AV_PIX_FMT_GBRAP_LIBAV,        ///< planar GBRA 4:4:4:4 32bpp
-    AV_PIX_FMT_GBRAP16BE_LIBAV,    ///< planar GBRA 4:4:4:4 64bpp, big-endian
-    AV_PIX_FMT_GBRAP16LE_LIBAV,    ///< planar GBRA 4:4:4:4 64bpp, little-endian
+    AV_PIX_FMT_GBRAP,        ///< planar GBRA 4:4:4:4 32bpp
+    AV_PIX_FMT_GBRAP16BE,    ///< planar GBRA 4:4:4:4 64bpp, big-endian
+    AV_PIX_FMT_GBRAP16LE,    ///< planar GBRA 4:4:4:4 64bpp, little-endian
     /**
      *  HW acceleration through QSV, data[3] contains a pointer to the
      *  mfxFrameSurface1 structure.
@@ -255,18 +242,10 @@ enum AVPixelFormat {
 
     AV_PIX_FMT_D3D11VA_VLD,  ///< HW decoding through Direct3D11, Picture.data[3] contains a ID3D11VideoDecoderOutputView pointer
 
-#ifndef AV_PIX_FMT_ABI_GIT_MASTER
-    AV_PIX_FMT_RGBA64BE=0x123,  ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
-    AV_PIX_FMT_RGBA64LE,  ///< packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
-    AV_PIX_FMT_BGRA64BE,  ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as big-endian
-    AV_PIX_FMT_BGRA64LE,  ///< packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is stored as little-endian
-#endif
     AV_PIX_FMT_0RGB=0x123+4,///< packed RGB 8:8:8, 32bpp, XRGBXRGB...   X=unused/undefined
     AV_PIX_FMT_RGB0,        ///< packed RGB 8:8:8, 32bpp, RGBXRGBX...   X=unused/undefined
     AV_PIX_FMT_0BGR,        ///< packed BGR 8:8:8, 32bpp, XBGRXBGR...   X=unused/undefined
     AV_PIX_FMT_BGR0,        ///< packed BGR 8:8:8, 32bpp, BGRXBGRX...   X=unused/undefined
-    AV_PIX_FMT_YUVA444P,  ///< planar YUV 4:4:4 32bpp, (1 Cr & Cb sample per 1x1 Y & A samples)
-    AV_PIX_FMT_YUVA422P,  ///< planar YUV 4:2:2 24bpp, (1 Cr & Cb sample per 2x1 Y & A samples)
 
     AV_PIX_FMT_YUV420P12BE, ///< planar YUV 4:2:0,18bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
     AV_PIX_FMT_YUV420P12LE, ///< planar YUV 4:2:0,18bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
@@ -284,9 +263,6 @@ enum AVPixelFormat {
     AV_PIX_FMT_GBRP12LE,    ///< planar GBR 4:4:4 36bpp, little-endian
     AV_PIX_FMT_GBRP14BE,    ///< planar GBR 4:4:4 42bpp, big-endian
     AV_PIX_FMT_GBRP14LE,    ///< planar GBR 4:4:4 42bpp, little-endian
-    AV_PIX_FMT_GBRAP,       ///< planar GBRA 4:4:4:4 32bpp
-    AV_PIX_FMT_GBRAP16BE,   ///< planar GBRA 4:4:4:4 64bpp, big-endian
-    AV_PIX_FMT_GBRAP16LE,   ///< planar GBRA 4:4:4:4 64bpp, little-endian
     AV_PIX_FMT_YUVJ411P,    ///< planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples) full scale (JPEG), deprecated in favor of AV_PIX_FMT_YUV411P and setting color_range
 
     AV_PIX_FMT_BAYER_BGGR8,    ///< bayer, BGBG..(odd line), GRGR..(even line), 8-bit samples */
@@ -308,26 +284,16 @@ enum AVPixelFormat {
     AV_PIX_FMT_YUV440P10BE, ///< planar YUV 4:4:0,20bpp, (1 Cr & Cb sample per 1x2 Y samples), big-endian
     AV_PIX_FMT_YUV440P12LE, ///< planar YUV 4:4:0,24bpp, (1 Cr & Cb sample per 1x2 Y samples), little-endian
     AV_PIX_FMT_YUV440P12BE, ///< planar YUV 4:4:0,24bpp, (1 Cr & Cb sample per 1x2 Y samples), big-endian
+    AV_PIX_FMT_AYUV64LE,    ///< packed AYUV 4:4:4,64bpp (1 Cr & Cb sample per 1x1 Y & A samples), little-endian
+    AV_PIX_FMT_AYUV64BE,    ///< packed AYUV 4:4:4,64bpp (1 Cr & Cb sample per 1x1 Y & A samples), big-endian
 
-    AV_PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
-
-#if FF_API_PIX_FMT
-#include "old_pix_fmts.h"
-#endif
-};
+    AV_PIX_FMT_VIDEOTOOLBOX, ///< hardware decoding through Videotoolbox
 
-#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
-#define AV_PIX_FMT_YUVA422P AV_PIX_FMT_YUVA422P_LIBAV
-#define AV_PIX_FMT_YUVA444P AV_PIX_FMT_YUVA444P_LIBAV
-#define AV_PIX_FMT_RGBA64BE AV_PIX_FMT_RGBA64BE_LIBAV
-#define AV_PIX_FMT_RGBA64LE AV_PIX_FMT_RGBA64LE_LIBAV
-#define AV_PIX_FMT_BGRA64BE AV_PIX_FMT_BGRA64BE_LIBAV
-#define AV_PIX_FMT_BGRA64LE AV_PIX_FMT_BGRA64LE_LIBAV
-#define AV_PIX_FMT_GBRAP     AV_PIX_FMT_GBRAP_LIBAV
-#define AV_PIX_FMT_GBRAP16BE AV_PIX_FMT_GBRAP16BE_LIBAV
-#define AV_PIX_FMT_GBRAP16LE AV_PIX_FMT_GBRAP16LE_LIBAV
-#endif
+    AV_PIX_FMT_P010LE, ///< like NV12, with 10bpp per component, data in the high bits, zeros in the low bits, little-endian
+    AV_PIX_FMT_P010BE, ///< like NV12, with 10bpp per component, data in the high bits, zeros in the low bits, big-endian
 
+    AV_PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
+};
 
 #define AV_PIX_FMT_Y400A AV_PIX_FMT_GRAY8A
 #define AV_PIX_FMT_GBR24P AV_PIX_FMT_GBRP
@@ -401,74 +367,26 @@ enum AVPixelFormat {
 
 #define AV_PIX_FMT_XYZ12      AV_PIX_FMT_NE(XYZ12BE, XYZ12LE)
 #define AV_PIX_FMT_NV20       AV_PIX_FMT_NE(NV20BE,  NV20LE)
-
-
-#if FF_API_PIX_FMT
-#define PixelFormat AVPixelFormat
-
-#define PIX_FMT_Y400A AV_PIX_FMT_Y400A
-#define PIX_FMT_GBR24P AV_PIX_FMT_GBR24P
-
-#define PIX_FMT_NE(be, le) AV_PIX_FMT_NE(be, le)
-
-#define PIX_FMT_RGB32   AV_PIX_FMT_RGB32
-#define PIX_FMT_RGB32_1 AV_PIX_FMT_RGB32_1
-#define PIX_FMT_BGR32   AV_PIX_FMT_BGR32
-#define PIX_FMT_BGR32_1 AV_PIX_FMT_BGR32_1
-#define PIX_FMT_0RGB32  AV_PIX_FMT_0RGB32
-#define PIX_FMT_0BGR32  AV_PIX_FMT_0BGR32
-
-#define PIX_FMT_GRAY16 AV_PIX_FMT_GRAY16
-#define PIX_FMT_RGB48  AV_PIX_FMT_RGB48
-#define PIX_FMT_RGB565 AV_PIX_FMT_RGB565
-#define PIX_FMT_RGB555 AV_PIX_FMT_RGB555
-#define PIX_FMT_RGB444 AV_PIX_FMT_RGB444
-#define PIX_FMT_BGR48  AV_PIX_FMT_BGR48
-#define PIX_FMT_BGR565 AV_PIX_FMT_BGR565
-#define PIX_FMT_BGR555 AV_PIX_FMT_BGR555
-#define PIX_FMT_BGR444 AV_PIX_FMT_BGR444
-
-#define PIX_FMT_YUV420P9  AV_PIX_FMT_YUV420P9
-#define PIX_FMT_YUV422P9  AV_PIX_FMT_YUV422P9
-#define PIX_FMT_YUV444P9  AV_PIX_FMT_YUV444P9
-#define PIX_FMT_YUV420P10 AV_PIX_FMT_YUV420P10
-#define PIX_FMT_YUV422P10 AV_PIX_FMT_YUV422P10
-#define PIX_FMT_YUV444P10 AV_PIX_FMT_YUV444P10
-#define PIX_FMT_YUV420P12 AV_PIX_FMT_YUV420P12
-#define PIX_FMT_YUV422P12 AV_PIX_FMT_YUV422P12
-#define PIX_FMT_YUV444P12 AV_PIX_FMT_YUV444P12
-#define PIX_FMT_YUV420P14 AV_PIX_FMT_YUV420P14
-#define PIX_FMT_YUV422P14 AV_PIX_FMT_YUV422P14
-#define PIX_FMT_YUV444P14 AV_PIX_FMT_YUV444P14
-#define PIX_FMT_YUV420P16 AV_PIX_FMT_YUV420P16
-#define PIX_FMT_YUV422P16 AV_PIX_FMT_YUV422P16
-#define PIX_FMT_YUV444P16 AV_PIX_FMT_YUV444P16
-
-#define PIX_FMT_RGBA64 AV_PIX_FMT_RGBA64
-#define PIX_FMT_BGRA64 AV_PIX_FMT_BGRA64
-#define PIX_FMT_GBRP9  AV_PIX_FMT_GBRP9
-#define PIX_FMT_GBRP10 AV_PIX_FMT_GBRP10
-#define PIX_FMT_GBRP12 AV_PIX_FMT_GBRP12
-#define PIX_FMT_GBRP14 AV_PIX_FMT_GBRP14
-#define PIX_FMT_GBRP16 AV_PIX_FMT_GBRP16
-#endif
+#define AV_PIX_FMT_AYUV64     AV_PIX_FMT_NE(AYUV64BE, AYUV64LE)
+#define AV_PIX_FMT_P010       AV_PIX_FMT_NE(P010BE,  P010LE)
 
 /**
   * Chromaticity coordinates of the source primaries.
   */
 enum AVColorPrimaries {
     AVCOL_PRI_RESERVED0   = 0,
-    AVCOL_PRI_BT709       = 1, ///< also ITU-R BT1361 / IEC 61966-2-4 / SMPTE RP177 Annex B
+    AVCOL_PRI_BT709       = 1,  ///< also ITU-R BT1361 / IEC 61966-2-4 / SMPTE RP177 Annex B
     AVCOL_PRI_UNSPECIFIED = 2,
     AVCOL_PRI_RESERVED    = 3,
-    AVCOL_PRI_BT470M      = 4, ///< also FCC Title 47 Code of Federal Regulations 73.682 (a)(20)
-
-    AVCOL_PRI_BT470BG     = 5, ///< also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM
-    AVCOL_PRI_SMPTE170M   = 6, ///< also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC
-    AVCOL_PRI_SMPTE240M   = 7, ///< functionally identical to above
-    AVCOL_PRI_FILM        = 8, ///< colour filters using Illuminant C
-    AVCOL_PRI_BT2020      = 9, ///< ITU-R BT2020
-    AVCOL_PRI_NB,              ///< Not part of ABI
+    AVCOL_PRI_BT470M      = 4,  ///< also FCC Title 47 Code of Federal Regulations 73.682 (a)(20)
+
+    AVCOL_PRI_BT470BG     = 5,  ///< also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM
+    AVCOL_PRI_SMPTE170M   = 6,  ///< also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC
+    AVCOL_PRI_SMPTE240M   = 7,  ///< functionally identical to above
+    AVCOL_PRI_FILM        = 8,  ///< colour filters using Illuminant C
+    AVCOL_PRI_BT2020      = 9,  ///< ITU-R BT2020
+    AVCOL_PRI_SMPTEST428_1= 10, ///< SMPTE ST 428-1 (CIE 1931 XYZ)
+    AVCOL_PRI_NB,               ///< Not part of ABI
 };
 
 /**
@@ -491,6 +409,8 @@ enum AVColorTransferCharacteristic {
     AVCOL_TRC_IEC61966_2_1 = 13, ///< IEC 61966-2-1 (sRGB or sYCC)
     AVCOL_TRC_BT2020_10    = 14, ///< ITU-R BT2020 for 10 bit system
     AVCOL_TRC_BT2020_12    = 15, ///< ITU-R BT2020 for 12 bit system
+    AVCOL_TRC_SMPTEST2084  = 16, ///< SMPTE ST 2084 for 10, 12, 14 and 16 bit systems
+    AVCOL_TRC_SMPTEST428_1 = 17, ///< SMPTE ST 428-1
     AVCOL_TRC_NB,                ///< Not part of ABI
 };
 
diff --git a/libavutil/qsort.h b/libavutil/qsort.h
index 30edcc83..39b7a088 100644
--- a/libavutil/qsort.h
+++ b/libavutil/qsort.h
@@ -18,6 +18,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVUTIL_QSORT_H
+#define AVUTIL_QSORT_H
+
 #include "common.h"
 
 
@@ -27,7 +30,7 @@
  * to construct input that requires O(n^2) time but this is very unlikely to
  * happen with non constructed input.
  */
-#define AV_QSORT(p, num, type, cmp) {\
+#define AV_QSORT(p, num, type, cmp) do {\
     void *stack[64][2];\
     int sp= 1;\
     stack[0][0] = p;\
@@ -89,7 +92,7 @@
             }\
         }\
     }\
-}
+} while (0)
 
 /**
  * Merge sort, this sort requires a temporary buffer and is stable, its worst
@@ -97,7 +100,7 @@
  * @param p     must be a lvalue pointer, this function may exchange it with tmp
  * @param tmp   must be a lvalue pointer, this function may exchange it with p
  */
-#define AV_MSORT(p, tmp, num, type, cmp) {\
+#define AV_MSORT(p, tmp, num, type, cmp) do {\
     unsigned i, j, step;\
     for(step=1; step<(num); step+=step){\
         for(i=0; i<(num); i+=2*step){\
@@ -114,4 +117,6 @@
         }\
         FFSWAP(type*, p, tmp);\
     }\
-}
+} while (0)
+
+#endif /* AVUTIL_QSORT_H */
diff --git a/libavutil/random_seed.c b/libavutil/random_seed.c
index 8aa8c387..0821550b 100644
--- a/libavutil/random_seed.c
+++ b/libavutil/random_seed.c
@@ -97,8 +97,13 @@ static uint32_t get_generic_seed(void)
         last_t = t;
     }
 
-    if(TEST)
+    if(TEST) {
         buffer[0] = buffer[1] = 0;
+    } else {
+#ifdef AV_READ_TIME
+        buffer[111] += AV_READ_TIME();
+#endif
+    }
 
     av_sha_init(sha, 160);
     av_sha_update(sha, (const uint8_t *)buffer, sizeof(buffer));
@@ -121,6 +126,10 @@ uint32_t av_get_random_seed(void)
     }
 #endif
 
+#if HAVE_ARC4RANDOM
+    return arc4random();
+#endif
+
     if (read_random(&seed, "/dev/urandom") == sizeof(seed))
         return seed;
     if (read_random(&seed, "/dev/random")  == sizeof(seed))
diff --git a/libavutil/rational.c b/libavutil/rational.c
index 21d2bb72..6b3f50a0 100644
--- a/libavutil/rational.c
+++ b/libavutil/rational.c
@@ -106,14 +106,14 @@ AVRational av_sub_q(AVRational b, AVRational c)
 AVRational av_d2q(double d, int max)
 {
     AVRational a;
-#define LOG2  0.69314718055994530941723212145817656807550013436025
     int exponent;
     int64_t den;
     if (isnan(d))
         return (AVRational) { 0,0 };
     if (fabs(d) > INT_MAX + 3LL)
         return (AVRational) { d < 0 ? -1 : 1, 0 };
-    exponent = FFMAX( (int)(log(fabs(d) + 1e-20)/LOG2), 0);
+    frexp(d, &exponent);
+    exponent = FFMAX(exponent-1, 0);
     den = 1LL << (61 - exponent);
     // (int64_t)rint() and llrint() do not work with gcc on ia64 and sparc64
     av_reduce(&a.num, &a.den, floor(d * den + 0.5), den, max);
@@ -183,9 +183,18 @@ uint32_t av_q2intfloat(AVRational q) {
 }
 
 #ifdef TEST
+
+#include "integer.h"
+
 int main(void)
 {
     AVRational a,b,r;
+    int i,j,k;
+    static const int64_t numlist[] = {
+        INT64_MIN, INT64_MIN+1, INT64_MAX, INT32_MIN, INT32_MAX, 1,0,-1,
+        123456789, INT32_MAX-1, INT32_MAX+1LL, UINT32_MAX-1, UINT32_MAX, UINT32_MAX+1LL
+    };
+
     for (a.num = -2; a.num <= 2; a.num++) {
         for (a.den = -2; a.den <= 2; a.den++) {
             for (b.num = -2; b.num <= 2; b.num++) {
@@ -207,6 +216,41 @@ int main(void)
         }
     }
 
+    for (i = 0; i < FF_ARRAY_ELEMS(numlist); i++) {
+        int64_t a = numlist[i];
+
+        for (j = 0; j < FF_ARRAY_ELEMS(numlist); j++) {
+            int64_t b = numlist[j];
+            if (b<=0)
+                continue;
+            for (k = 0; k < FF_ARRAY_ELEMS(numlist); k++) {
+                int64_t c = numlist[k];
+                int64_t res;
+                AVInteger ai;
+
+                if (c<=0)
+                    continue;
+                res = av_rescale_rnd(a,b,c, AV_ROUND_ZERO);
+
+                ai = av_mul_i(av_int2i(a), av_int2i(b));
+                ai = av_div_i(ai, av_int2i(c));
+
+                if (av_cmp_i(ai, av_int2i(INT64_MAX)) > 0 && res == INT64_MIN)
+                    continue;
+                if (av_cmp_i(ai, av_int2i(INT64_MIN)) < 0 && res == INT64_MIN)
+                    continue;
+                if (av_cmp_i(ai, av_int2i(res)) == 0)
+                    continue;
+
+                // Special exception for INT64_MIN, remove this in case INT64_MIN is handled without off by 1 error
+                if (av_cmp_i(ai, av_int2i(res-1)) == 0 && a == INT64_MIN)
+                    continue;
+
+                av_log(NULL, AV_LOG_ERROR, "%"PRId64" * %"PRId64" / %"PRId64" = %"PRId64" or %"PRId64"\n", a,b,c, res, av_i2int(ai));
+            }
+        }
+    }
+
     for (a.num = 1; a.num <= 10; a.num++) {
         for (a.den = 1; a.den <= 10; a.den++) {
             if (av_gcd(a.num, a.den) > 1)
diff --git a/libavutil/rc4.c b/libavutil/rc4.c
index 4e52ba5a..ffcb1121 100644
--- a/libavutil/rc4.c
+++ b/libavutil/rc4.c
@@ -22,9 +22,13 @@
  */
 #include "avutil.h"
 #include "common.h"
+#include "mem.h"
 #include "rc4.h"
 
-typedef struct AVRC4 AVRC4;
+AVRC4 *av_rc4_alloc(void)
+{
+    return av_mallocz(sizeof(struct AVRC4));
+}
 
 int av_rc4_init(AVRC4 *r, const uint8_t *key, int key_bits, int decrypt) {
     int i, j;
@@ -32,7 +36,7 @@ int av_rc4_init(AVRC4 *r, const uint8_t *key, int key_bits, int decrypt) {
     uint8_t *state = r->state;
     int keylen = key_bits >> 3;
     if (key_bits & 7)
-        return -1;
+        return AVERROR(EINVAL);
     for (i = 0; i < 256; i++)
         state[i] = i;
     y = 0;
diff --git a/libavutil/rc4.h b/libavutil/rc4.h
index 9362fd88..029cd2ad 100644
--- a/libavutil/rc4.h
+++ b/libavutil/rc4.h
@@ -23,16 +23,28 @@
 
 #include <stdint.h>
 
-struct AVRC4 {
+/**
+ * @defgroup lavu_rc4 RC4
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+typedef struct AVRC4 {
     uint8_t state[256];
     int x, y;
-};
+} AVRC4;
+
+/**
+ * Allocate an AVRC4 context.
+ */
+AVRC4 *av_rc4_alloc(void);
 
 /**
  * @brief Initializes an AVRC4 context.
  *
  * @param key_bits must be a multiple of 8
  * @param decrypt 0 for encryption, 1 for decryption, currently has no effect
+ * @return zero on success, negative value otherwise
  */
 int av_rc4_init(struct AVRC4 *d, const uint8_t *key, int key_bits, int decrypt);
 
@@ -47,4 +59,8 @@ int av_rc4_init(struct AVRC4 *d, const uint8_t *key, int key_bits, int decrypt);
  */
 void av_rc4_crypt(struct AVRC4 *d, uint8_t *dst, const uint8_t *src, int count, uint8_t *iv, int decrypt);
 
+/**
+ * @}
+ */
+
 #endif /* AVUTIL_RC4_H */
diff --git a/libavutil/reverse.c b/libavutil/reverse.c
new file mode 100644
index 00000000..105eb03d
--- /dev/null
+++ b/libavutil/reverse.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+const uint8_t ff_reverse[256] = {
+0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0,
+0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8,
+0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4,
+0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC,
+0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2,
+0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA,
+0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6,
+0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE,
+0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1,
+0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9,
+0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5,
+0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD,
+0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3,
+0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB,
+0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7,
+0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF,
+};
diff --git a/libavutil/ripemd.c b/libavutil/ripemd.c
index 00848605..6777c994 100644
--- a/libavutil/ripemd.c
+++ b/libavutil/ripemd.c
@@ -85,7 +85,7 @@ static const int WB[80] = {
     12, 15, 10,  4,  1,  5,  8,  7,  6,  2, 13, 14,  0,  3,  9, 11
 };
 
-#define rol(value, bits) ((value << bits) | (value >> (32 - bits)))
+#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
 
 #define ROUND128_0_TO_15(a,b,c,d,e,f,g,h)                               \
     a = rol(a + ((  b ^ c  ^ d)      + block[WA[n]]),         ROTA[n]); \
@@ -504,7 +504,7 @@ av_cold int av_ripemd_init(AVRIPEMD *ctx, int bits)
         ctx->transform = ripemd320_transform;
         break;
     default:
-        return -1;
+        return AVERROR(EINVAL);
     }
     ctx->count = 0;
     return 0;
diff --git a/libavutil/sha.c b/libavutil/sha.c
index 9963043f..748bb9c5 100644
--- a/libavutil/sha.c
+++ b/libavutil/sha.c
@@ -305,7 +305,7 @@ av_cold int av_sha_init(AVSHA *ctx, int bits)
         ctx->transform = sha256_transform;
         break;
     default:
-        return -1;
+        return AVERROR(EINVAL);
     }
     ctx->count = 0;
     return 0;
diff --git a/libavutil/sha512.c b/libavutil/sha512.c
index 66a864f1..e2fc58a4 100644
--- a/libavutil/sha512.c
+++ b/libavutil/sha512.c
@@ -233,7 +233,7 @@ av_cold int av_sha512_init(AVSHA512 *ctx, int bits)
         ctx->state[7] = UINT64_C(0x5BE0CD19137E2179);
         break;
     default:
-        return -1;
+        return AVERROR(EINVAL);
     }
     ctx->count = 0;
     return 0;
diff --git a/libavutil/softfloat.c b/libavutil/softfloat.c
index 4fc6860e..4bfbbb26 100644
--- a/libavutil/softfloat.c
+++ b/libavutil/softfloat.c
@@ -19,73 +19,18 @@
  */
 
 #include <inttypes.h>
-#include <stdio.h>
 #include "softfloat.h"
 #include "common.h"
 #include "log.h"
 
-#undef printf
+#ifdef TEST
+#include <stdio.h>
 
 static const SoftFloat FLOAT_0_017776489257 = {0x1234, 12};
 static const SoftFloat FLOAT_1374_40625 = {0xabcd, 25};
 static const SoftFloat FLOAT_0_1249694824218 = {0xFFF, 15};
 
 
-static av_const double av_sf2double(SoftFloat v) {
-    v.exp -= ONE_BITS +1;
-    if(v.exp > 0) return (double)v.mant * (double)(1 << v.exp);
-    else          return (double)v.mant / (double)(1 << (-v.exp));
-}
-
-void av_sincos_sf(int a, int *s, int *c)
-{
-    int idx, sign;
-    int sv, cv;
-    int st, ct;
-
-    idx = a >> 26;
-    sign = (idx << 27) >> 31;
-    cv = av_costbl_1_sf[idx & 0xf];
-    cv = (cv ^ sign) - sign;
-
-    idx -= 8;
-    sign = (idx << 27) >> 31;
-    sv = av_costbl_1_sf[idx & 0xf];
-    sv = (sv ^ sign) - sign;
-
-    idx = a >> 21;
-    ct = av_costbl_2_sf[idx & 0x1f];
-    st = av_sintbl_2_sf[idx & 0x1f];
-
-    idx = (int)(((int64_t)cv * ct - (int64_t)sv * st + 0x20000000) >> 30);
-
-    sv = (int)(((int64_t)cv * st + (int64_t)sv * ct + 0x20000000) >> 30);
-
-    cv = idx;
-
-    idx = a >> 16;
-    ct = av_costbl_3_sf[idx & 0x1f];
-    st = av_sintbl_3_sf[idx & 0x1f];
-
-    idx = (int)(((int64_t)cv * ct - (int64_t)sv * st + 0x20000000) >> 30);
-
-    sv = (int)(((int64_t)cv * st + (int64_t)sv * ct + 0x20000000) >> 30);
-    cv = idx;
-
-    idx = a >> 11;
-
-    ct = (int)(((int64_t)av_costbl_4_sf[idx & 0x1f] * (0x800 - (a & 0x7ff)) +
-                (int64_t)av_costbl_4_sf[(idx & 0x1f)+1]*(a & 0x7ff) +
-                0x400) >> 11);
-    st = (int)(((int64_t)av_sintbl_4_sf[idx & 0x1f] * (0x800 - (a & 0x7ff)) +
-                (int64_t)av_sintbl_4_sf[(idx & 0x1f) + 1] * (a & 0x7ff) +
-                0x400) >> 11);
-
-    *c = (int)(((int64_t)cv * ct + (int64_t)sv * st + 0x20000000) >> 30);
-
-    *s = (int)(((int64_t)cv * st + (int64_t)sv * ct + 0x20000000) >> 30);
-}
-
 int main(void){
     SoftFloat one= av_int2sf(1, 0);
     SoftFloat sf1, sf2, sf3;
@@ -152,6 +97,61 @@ int main(void){
     sf1 = av_int2sf(0xE0000001, 0);
     printf("test4 softfloat: %.10lf (0x%08x %d)\n", (double)av_sf2double(sf1), sf1.mant, sf1.exp);
 
+
+    sf1 = (SoftFloat){ 0x20000000,   MIN_EXP };
+    sf1 = av_mul_sf(sf1, sf1);
+    printf("test5 softfloat: %.10lf (0x%08x %d)\n", (double)av_sf2double(sf1), sf1.mant, sf1.exp);
+
+    sf1 = (SoftFloat){ 0x20000000,   MIN_EXP };
+    sf2 = (SoftFloat){ 0x20000000,   MAX_EXP };
+    i = av_cmp_sf(sf1, sf2);
+    j = av_cmp_sf(sf2, sf1);
+    sf1 = av_div_sf(sf1, sf2);
+    printf("test6 softfloat: %.10lf (0x%08x %d) %d %d\n", (double)av_sf2double(sf1), sf1.mant, sf1.exp, i, j);
+
+    for(i= -50; i<50; i++) {
+        sf1= av_int2sf(i, 0);
+        for(j= -50; j<50; j++) {
+            int c;
+            sf2= av_int2sf(j, 0);
+            c = av_cmp_sf(sf1, sf2);
+            if (FFDIFFSIGN(i,j) != c && (FFDIFFSIGN(i,j)^c)<0) {
+                printf("av_cmp_sf failed at %d %d as %X\n", i, j, c);
+            }
+            c = av_gt_sf(sf1, sf2);
+            if ((i>j) != c) {
+                printf("av_gt_sf failed at %d %d as %X\n", i, j, c);
+            }
+        }
+        sf1 = av_int2sf(1, i);
+        for(j = -50; j < 50; j++) {
+            int c;
+            sf2 = av_int2sf(1, j);
+            c = av_cmp_sf(sf2, sf1);
+            if (FFDIFFSIGN(i,j) != c && (FFDIFFSIGN(i,j)^c) < 0) {
+                printf("av_cmp_sf failed2 at %d %d as %X\n", i, j, c);
+            }
+            c = av_gt_sf(sf1, sf2);
+            if ((i<j) != c) {
+                printf("av_gt_sf failed2 at %d %d as %X\n", i, j, c);
+            }
+        }
+    }
+
+
+    for(i= 0; i<4*36; i++){
+        int s, c;
+        double errs, errc;
+
+        av_sincos_sf(i*(1ULL<<32)/36/4, &s, &c);
+        errs = (double)s/ (1<<30) - sin(i*M_PI/36);
+        errc = (double)c/ (1<<30) - cos(i*M_PI/36);
+        if (fabs(errs) > 0.00000002 || fabs(errc) >0.001) {
+            printf("sincos FAIL %d %f %f %f %f\n", i, (float)s/ (1<<30), (float)c/ (1<<30), sin(i*M_PI/36), cos(i*M_PI/36));
+        }
+
+    }
     return 0;
 
 }
+#endif
diff --git a/libavutil/softfloat.h b/libavutil/softfloat.h
index 392b6d81..4b895f01 100644
--- a/libavutil/softfloat.h
+++ b/libavutil/softfloat.h
@@ -36,6 +36,20 @@ typedef struct SoftFloat{
     int32_t  exp;
 }SoftFloat;
 
+static const SoftFloat FLOAT_0          = {          0,   MIN_EXP};
+static const SoftFloat FLOAT_05         = { 0x20000000,   0};
+static const SoftFloat FLOAT_1          = { 0x20000000,   1};
+static const SoftFloat FLOAT_EPSILON    = { 0x29F16B12, -16};
+static const SoftFloat FLOAT_1584893192 = { 0x32B771ED,   1};
+static const SoftFloat FLOAT_100000     = { 0x30D40000,  17};
+static const SoftFloat FLOAT_0999999    = { 0x3FFFFBCE,   0};
+
+static inline av_const double av_sf2double(SoftFloat v) {
+    v.exp -= ONE_BITS +1;
+    if(v.exp > 0) return (double)v.mant * (double)(1 << v.exp);
+    else          return (double)v.mant / (double)(1 << (-v.exp));
+}
+
 static av_const SoftFloat av_normalize_sf(SoftFloat a){
     if(a.mant){
 #if 1
@@ -65,6 +79,7 @@ static inline av_const SoftFloat av_normalize1_sf(SoftFloat a){
         a.mant>>=1;
     }
     av_assert2(a.mant < 0x40000000 && a.mant > -0x40000000);
+    av_assert2(a.exp <= MAX_EXP);
     return a;
 #elif 1
     int t= a.mant + 0x40000000 < 0;
@@ -76,7 +91,7 @@ static inline av_const SoftFloat av_normalize1_sf(SoftFloat a){
 }
 
 /**
- * @return Will not be more denormalized than a+b. So if either input is
+ * @return Will not be more denormalized than a*b. So if either input is
  *         normalized, then the output will not be worse then the other input.
  *         If both are normalized, then the output will be normalized.
  */
@@ -84,30 +99,40 @@ static inline av_const SoftFloat av_mul_sf(SoftFloat a, SoftFloat b){
     a.exp += b.exp;
     av_assert2((int32_t)((a.mant * (int64_t)b.mant) >> ONE_BITS) == (a.mant * (int64_t)b.mant) >> ONE_BITS);
     a.mant = (a.mant * (int64_t)b.mant) >> ONE_BITS;
-    return av_normalize1_sf((SoftFloat){a.mant, a.exp - 1});
+    a = av_normalize1_sf((SoftFloat){a.mant, a.exp - 1});
+    if (!a.mant || a.exp < MIN_EXP)
+        return FLOAT_0;
+    return a;
 }
 
 /**
  * b has to be normalized and not zero.
  * @return Will not be more denormalized than a.
  */
-static av_const SoftFloat av_div_sf(SoftFloat a, SoftFloat b){
+static inline av_const SoftFloat av_div_sf(SoftFloat a, SoftFloat b){
     a.exp -= b.exp;
     a.mant = ((int64_t)a.mant<<(ONE_BITS+1)) / b.mant;
-    return av_normalize1_sf(a);
+    a = av_normalize1_sf(a);
+    if (!a.mant || a.exp < MIN_EXP)
+        return FLOAT_0;
+    return a;
 }
 
 static inline av_const int av_cmp_sf(SoftFloat a, SoftFloat b){
     int t= a.exp - b.exp;
-    if(t<0) return (a.mant >> (-t)) -  b.mant      ;
-    else    return  a.mant          - (b.mant >> t);
+    if      (t <-31) return                  -  b.mant      ;
+    else if (t <  0) return (a.mant >> (-t)) -  b.mant      ;
+    else if (t < 32) return  a.mant          - (b.mant >> t);
+    else             return  a.mant                         ;
 }
 
 static inline av_const int av_gt_sf(SoftFloat a, SoftFloat b)
 {
     int t= a.exp - b.exp;
-    if(t<0) return (a.mant >> (-t)) >  b.mant      ;
-    else    return  a.mant          > (b.mant >> t);
+    if      (t <-31) return 0                >  b.mant      ;
+    else if (t <  0) return (a.mant >> (-t)) >  b.mant      ;
+    else if (t < 32) return  a.mant          > (b.mant >> t);
+    else             return  a.mant          >  0           ;
 }
 
 static inline av_const SoftFloat av_add_sf(SoftFloat a, SoftFloat b){
@@ -129,7 +154,12 @@ static inline av_const SoftFloat av_sub_sf(SoftFloat a, SoftFloat b){
  * @returns a SoftFloat with value v * 2^frac_bits
  */
 static inline av_const SoftFloat av_int2sf(int v, int frac_bits){
-    return av_normalize_sf((SoftFloat){v, ONE_BITS + 1 - frac_bits});
+    int exp_offset = 0;
+    if(v == INT_MIN){
+        exp_offset = 1;
+        v>>=1;
+    }
+    return av_normalize_sf(av_normalize1_sf((SoftFloat){v, ONE_BITS + 1 - frac_bits + exp_offset}));
 }
 
 /**
@@ -149,7 +179,9 @@ static av_always_inline SoftFloat av_sqrt_sf(SoftFloat val)
     int tabIndex, rem;
 
     if (val.mant == 0)
-        val.exp = 0;
+        val.exp = MIN_EXP;
+    else if (val.mant < 0)
+        abort();
     else
     {
         tabIndex = (val.mant - 0x20000000) >> 20;
@@ -175,6 +207,53 @@ static av_always_inline SoftFloat av_sqrt_sf(SoftFloat val)
 /**
  * Rounding-to-nearest used.
  */
-void av_sincos_sf(int a, int *s, int *c);
+static av_unused void av_sincos_sf(int a, int *s, int *c)
+{
+    int idx, sign;
+    int sv, cv;
+    int st, ct;
+
+    idx = a >> 26;
+    sign = (idx << 27) >> 31;
+    cv = av_costbl_1_sf[idx & 0xf];
+    cv = (cv ^ sign) - sign;
+
+    idx -= 8;
+    sign = (idx << 27) >> 31;
+    sv = av_costbl_1_sf[idx & 0xf];
+    sv = (sv ^ sign) - sign;
+
+    idx = a >> 21;
+    ct = av_costbl_2_sf[idx & 0x1f];
+    st = av_sintbl_2_sf[idx & 0x1f];
+
+    idx = (int)(((int64_t)cv * ct - (int64_t)sv * st + 0x20000000) >> 30);
+
+    sv = (int)(((int64_t)cv * st + (int64_t)sv * ct + 0x20000000) >> 30);
+
+    cv = idx;
+
+    idx = a >> 16;
+    ct = av_costbl_3_sf[idx & 0x1f];
+    st = av_sintbl_3_sf[idx & 0x1f];
+
+    idx = (int)(((int64_t)cv * ct - (int64_t)sv * st + 0x20000000) >> 30);
+
+    sv = (int)(((int64_t)cv * st + (int64_t)sv * ct + 0x20000000) >> 30);
+    cv = idx;
+
+    idx = a >> 11;
+
+    ct = (int)(((int64_t)av_costbl_4_sf[idx & 0x1f] * (0x800 - (a & 0x7ff)) +
+                (int64_t)av_costbl_4_sf[(idx & 0x1f)+1]*(a & 0x7ff) +
+                0x400) >> 11);
+    st = (int)(((int64_t)av_sintbl_4_sf[idx & 0x1f] * (0x800 - (a & 0x7ff)) +
+                (int64_t)av_sintbl_4_sf[(idx & 0x1f) + 1] * (a & 0x7ff) +
+                0x400) >> 11);
+
+    *c = (int)(((int64_t)cv * ct + (int64_t)sv * st + 0x20000000) >> 30);
+
+    *s = (int)(((int64_t)cv * st + (int64_t)sv * ct + 0x20000000) >> 30);
+}
 
 #endif /* AVUTIL_SOFTFLOAT_H */
diff --git a/libavutil/tablegen.h b/libavutil/tablegen.h
new file mode 100644
index 00000000..02acdd61
--- /dev/null
+++ b/libavutil/tablegen.h
@@ -0,0 +1,55 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Compatibility libm for table generation files
+ */
+
+#ifndef AVUTIL_TABLEGEN_H
+#define AVUTIL_TABLEGEN_H
+
+#include <math.h>
+
+// we lack some functions on all host platforms, and we don't care about
+// performance and/or strict ISO C semantics as it's performed at build time
+static inline double ff_cbrt(double x)
+{
+    return x < 0 ? -pow(-x, 1.0 / 3.0) : pow(x, 1.0 / 3.0);
+}
+#define cbrt ff_cbrt
+
+static inline double ff_rint(double x)
+{
+    return x >= 0 ? floor(x + 0.5) : ceil(x - 0.5);
+}
+#define rint ff_rint
+
+static inline long long ff_llrint(double x)
+{
+    return rint(x);
+}
+#define llrint ff_llrint
+
+static inline long ff_lrint(double x)
+{
+    return rint(x);
+}
+#define lrint ff_lrint
+
+#endif /* AVUTIL_TABLEGEN_H */
diff --git a/libavutil/tea.c b/libavutil/tea.c
new file mode 100644
index 00000000..bf767188
--- /dev/null
+++ b/libavutil/tea.c
@@ -0,0 +1,213 @@
+/*
+ * A 32-bit implementation of the TEA algorithm
+ * Copyright (c) 2015 Vesselin Bontchev
+ *
+ * Loosely based on the implementation of David Wheeler and Roger Needham,
+ * https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm#Reference_code
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avutil.h"
+#include "common.h"
+#include "intreadwrite.h"
+#include "tea.h"
+
+typedef struct AVTEA {
+    uint32_t key[16];
+    int rounds;
+} AVTEA;
+
+struct AVTEA *av_tea_alloc(void)
+{
+    return av_mallocz(sizeof(struct AVTEA));
+}
+
+const int av_tea_size = sizeof(AVTEA);
+
+void av_tea_init(AVTEA *ctx, const uint8_t key[16], int rounds)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+        ctx->key[i] = AV_RB32(key + (i << 2));
+
+    ctx->rounds = rounds;
+}
+
+static void tea_crypt_ecb(AVTEA *ctx, uint8_t *dst, const uint8_t *src,
+                          int decrypt, uint8_t *iv)
+{
+    uint32_t v0, v1;
+    int rounds = ctx->rounds;
+    uint32_t k0, k1, k2, k3;
+    k0 = ctx->key[0];
+    k1 = ctx->key[1];
+    k2 = ctx->key[2];
+    k3 = ctx->key[3];
+
+    v0 = AV_RB32(src);
+    v1 = AV_RB32(src + 4);
+
+    if (decrypt) {
+        int i;
+        uint32_t delta = 0x9E3779B9U, sum = delta * (rounds / 2);
+
+        for (i = 0; i < rounds / 2; i++) {
+            v1 -= ((v0 << 4) + k2) ^ (v0 + sum) ^ ((v0 >> 5) + k3);
+            v0 -= ((v1 << 4) + k0) ^ (v1 + sum) ^ ((v1 >> 5) + k1);
+            sum -= delta;
+        }
+        if (iv) {
+            v0 ^= AV_RB32(iv);
+            v1 ^= AV_RB32(iv + 4);
+            memcpy(iv, src, 8);
+        }
+    } else {
+        int i;
+        uint32_t sum = 0, delta = 0x9E3779B9U;
+
+        for (i = 0; i < rounds / 2; i++) {
+            sum += delta;
+            v0 += ((v1 << 4) + k0) ^ (v1 + sum) ^ ((v1 >> 5) + k1);
+            v1 += ((v0 << 4) + k2) ^ (v0 + sum) ^ ((v0 >> 5) + k3);
+        }
+    }
+
+    AV_WB32(dst, v0);
+    AV_WB32(dst + 4, v1);
+}
+
+void av_tea_crypt(AVTEA *ctx, uint8_t *dst, const uint8_t *src, int count,
+                  uint8_t *iv, int decrypt)
+{
+    int i;
+
+    if (decrypt) {
+        while (count--) {
+            tea_crypt_ecb(ctx, dst, src, decrypt, iv);
+
+            src   += 8;
+            dst   += 8;
+        }
+    } else {
+        while (count--) {
+            if (iv) {
+                for (i = 0; i < 8; i++)
+                    dst[i] = src[i] ^ iv[i];
+                tea_crypt_ecb(ctx, dst, dst, decrypt, NULL);
+                memcpy(iv, dst, 8);
+            } else {
+                tea_crypt_ecb(ctx, dst, src, decrypt, NULL);
+            }
+            src   += 8;
+            dst   += 8;
+        }
+    }
+}
+
+#ifdef TEST
+#include <stdio.h>
+
+#define TEA_NUM_TESTS 4
+
+// https://github.com/logandrews/TeaCrypt/blob/master/tea/tea_test.go
+static const uint8_t tea_test_key[TEA_NUM_TESTS][16] = {
+    { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+    },
+    { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+    },
+    { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+      0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF
+    },
+    { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+      0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF
+    }
+};
+
+static const uint8_t tea_test_pt[TEA_NUM_TESTS][8] = {
+    { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+    { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 },
+    { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 },
+    { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF }
+};
+
+static const uint8_t tea_test_ct[TEA_NUM_TESTS][8] = {
+    { 0x41, 0xEA, 0x3A, 0x0A, 0x94, 0xBA, 0xA9, 0x40 },
+    { 0x6A, 0x2F, 0x9C, 0xF3, 0xFC, 0xCF, 0x3C, 0x55 },
+    { 0xDE, 0xB1, 0xC0, 0xA2, 0x7E, 0x74, 0x5D, 0xB3 },
+    { 0x12, 0x6C, 0x6B, 0x92, 0xC0, 0x65, 0x3A, 0x3E }
+};
+
+static void test_tea(AVTEA *ctx, uint8_t *dst, const uint8_t *src,
+                     const uint8_t *ref, int len, uint8_t *iv, int dir,
+                     const char *test)
+{
+    av_tea_crypt(ctx, dst, src, len, iv, dir);
+    if (memcmp(dst, ref, 8*len)) {
+        int i;
+        printf("%s failed\ngot      ", test);
+        for (i = 0; i < 8*len; i++)
+            printf("%02x ", dst[i]);
+        printf("\nexpected ");
+        for (i = 0; i < 8*len; i++)
+            printf("%02x ", ref[i]);
+        printf("\n");
+        exit(1);
+    }
+}
+
+int main(void)
+{
+    AVTEA *ctx;
+    uint8_t buf[8], iv[8];
+    int i;
+    static const uint8_t src[32] = "HelloWorldHelloWorldHelloWorld";
+    uint8_t ct[32];
+    uint8_t pl[32];
+
+    ctx = av_tea_alloc();
+    if (!ctx)
+        return 1;
+
+    for (i = 0; i < TEA_NUM_TESTS; i++) {
+        av_tea_init(ctx, tea_test_key[i], 64);
+
+        test_tea(ctx, buf, tea_test_pt[i], tea_test_ct[i], 1, NULL, 0, "encryption");
+        test_tea(ctx, buf, tea_test_ct[i], tea_test_pt[i], 1, NULL, 1, "decryption");
+
+        /* encrypt */
+        memcpy(iv, "HALLO123", 8);
+        av_tea_crypt(ctx, ct, src, 4, iv, 0);
+
+        /* decrypt into pl */
+        memcpy(iv, "HALLO123", 8);
+        test_tea(ctx, pl, ct, src, 4, iv, 1, "CBC decryption");
+
+        memcpy(iv, "HALLO123", 8);
+        test_tea(ctx, ct, ct, src, 4, iv, 1, "CBC inplace decryption");
+    }
+
+    printf("Test encryption/decryption success.\n");
+    av_free(ctx);
+
+    return 0;
+}
+
+#endif
diff --git a/libavutil/tea.h b/libavutil/tea.h
new file mode 100644
index 00000000..dd929bda
--- /dev/null
+++ b/libavutil/tea.h
@@ -0,0 +1,71 @@
+/*
+ * A 32-bit implementation of the TEA algorithm
+ * Copyright (c) 2015 Vesselin Bontchev
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_TEA_H
+#define AVUTIL_TEA_H
+
+#include <stdint.h>
+
+/**
+ * @file
+ * @brief Public header for libavutil TEA algorithm
+ * @defgroup lavu_tea TEA
+ * @ingroup lavu_crypto
+ * @{
+ */
+
+extern const int av_tea_size;
+
+struct AVTEA;
+
+/**
+  * Allocate an AVTEA context
+  * To free the struct: av_free(ptr)
+  */
+struct AVTEA *av_tea_alloc(void);
+
+/**
+ * Initialize an AVTEA context.
+ *
+ * @param ctx an AVTEA context
+ * @param key a key of 16 bytes used for encryption/decryption
+ * @param rounds the number of rounds in TEA (64 is the "standard")
+ */
+void av_tea_init(struct AVTEA *ctx, const uint8_t key[16], int rounds);
+
+/**
+ * Encrypt or decrypt a buffer using a previously initialized context.
+ *
+ * @param ctx an AVTEA context
+ * @param dst destination array, can be equal to src
+ * @param src source array, can be equal to dst
+ * @param count number of 8 byte blocks
+ * @param iv initialization vector for CBC mode, if NULL then ECB will be used
+ * @param decrypt 0 for encryption, 1 for decryption
+ */
+void av_tea_crypt(struct AVTEA *ctx, uint8_t *dst, const uint8_t *src,
+                  int count, uint8_t *iv, int decrypt);
+
+/**
+ * @}
+ */
+
+#endif /* AVUTIL_TEA_H */
diff --git a/libavutil/thread.h b/libavutil/thread.h
index 297b5b9a..32ddf403 100644
--- a/libavutil/thread.h
+++ b/libavutil/thread.h
@@ -30,6 +30,102 @@
 
 #if HAVE_PTHREADS
 #include <pthread.h>
+
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
+
+#include "log.h"
+
+#define ASSERT_PTHREAD_NORET(func, ...) do {                            \
+    int ret = func(__VA_ARGS__);                                        \
+    if (ret) {                                                          \
+        av_log(NULL, AV_LOG_FATAL, AV_STRINGIFY(func)                   \
+               " failed with error: %s\n", av_err2str(AVERROR(ret)));   \
+        abort();                                                        \
+    }                                                                   \
+} while (0)
+
+#define ASSERT_PTHREAD(func, ...) do {                                  \
+    ASSERT_PTHREAD_NORET(func, __VA_ARGS__);                            \
+    return 0;                                                           \
+} while (0)
+
+static inline int strict_pthread_join(pthread_t thread, void **value_ptr)
+{
+    ASSERT_PTHREAD(pthread_join, thread, value_ptr);
+}
+
+static inline int strict_pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr)
+{
+    if (attr) {
+        ASSERT_PTHREAD_NORET(pthread_mutex_init, mutex, attr);
+    } else {
+        pthread_mutexattr_t local_attr;
+        ASSERT_PTHREAD_NORET(pthread_mutexattr_init, &local_attr);
+        ASSERT_PTHREAD_NORET(pthread_mutexattr_settype, &local_attr, PTHREAD_MUTEX_ERRORCHECK);
+        ASSERT_PTHREAD_NORET(pthread_mutex_init, mutex, &local_attr);
+        ASSERT_PTHREAD_NORET(pthread_mutexattr_destroy, &local_attr);
+    }
+    return 0;
+}
+
+static inline int strict_pthread_mutex_destroy(pthread_mutex_t *mutex)
+{
+    ASSERT_PTHREAD(pthread_mutex_destroy, mutex);
+}
+
+static inline int strict_pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+    ASSERT_PTHREAD(pthread_mutex_lock, mutex);
+}
+
+static inline int strict_pthread_mutex_unlock(pthread_mutex_t *mutex)
+{
+    ASSERT_PTHREAD(pthread_mutex_unlock, mutex);
+}
+
+static inline int strict_pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
+{
+    ASSERT_PTHREAD(pthread_cond_init, cond, attr);
+}
+
+static inline int strict_pthread_cond_destroy(pthread_cond_t *cond)
+{
+    ASSERT_PTHREAD(pthread_cond_destroy, cond);
+}
+
+static inline int strict_pthread_cond_signal(pthread_cond_t *cond)
+{
+    ASSERT_PTHREAD(pthread_cond_signal, cond);
+}
+
+static inline int strict_pthread_cond_broadcast(pthread_cond_t *cond)
+{
+    ASSERT_PTHREAD(pthread_cond_broadcast, cond);
+}
+
+static inline int strict_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
+{
+    ASSERT_PTHREAD(pthread_cond_wait, cond, mutex);
+}
+
+static inline int strict_pthread_once(pthread_once_t *once_control, void (*init_routine)(void))
+{
+    ASSERT_PTHREAD(pthread_once, once_control, init_routine);
+}
+
+#define pthread_join           strict_pthread_join
+#define pthread_mutex_init     strict_pthread_mutex_init
+#define pthread_mutex_destroy  strict_pthread_mutex_destroy
+#define pthread_mutex_lock     strict_pthread_mutex_lock
+#define pthread_mutex_unlock   strict_pthread_mutex_unlock
+#define pthread_cond_init      strict_pthread_cond_init
+#define pthread_cond_destroy   strict_pthread_cond_destroy
+#define pthread_cond_signal    strict_pthread_cond_signal
+#define pthread_cond_broadcast strict_pthread_cond_broadcast
+#define pthread_cond_wait      strict_pthread_cond_wait
+#define pthread_once           strict_pthread_once
+#endif
+
 #elif HAVE_OS2THREADS
 #include "compat/os2threads.h"
 #else
@@ -43,6 +139,11 @@
 #define ff_mutex_unlock  pthread_mutex_unlock
 #define ff_mutex_destroy pthread_mutex_destroy
 
+#define AVOnce pthread_once_t
+#define AV_ONCE_INIT PTHREAD_ONCE_INIT
+
+#define ff_thread_once(control, routine) pthread_once(control, routine)
+
 #else
 
 #define USE_ATOMICS 1
@@ -54,6 +155,18 @@
 #define ff_mutex_unlock(mutex) (0)
 #define ff_mutex_destroy(mutex) (0)
 
+#define AVOnce char
+#define AV_ONCE_INIT 0
+
+static inline int ff_thread_once(char *control, void (*routine)(void))
+{
+    if (!*control) {
+        routine();
+        *control = 1;
+    }
+    return 0;
+}
+
 #endif
 
 #endif /* AVUTIL_THREAD_H */
diff --git a/libavutil/threadmessage.c b/libavutil/threadmessage.c
index b7fcbe28..7c5cd246 100644
--- a/libavutil/threadmessage.c
+++ b/libavutil/threadmessage.c
@@ -20,26 +20,18 @@
 
 #include "fifo.h"
 #include "threadmessage.h"
-#if HAVE_THREADS
-#if HAVE_PTHREADS
-#include <pthread.h>
-#elif HAVE_W32THREADS
-#include "compat/w32pthreads.h"
-#elif HAVE_OS2THREADS
-#include "compat/os2threads.h"
-#else
-#error "Unknown threads implementation"
-#endif
-#endif
+#include "thread.h"
 
 struct AVThreadMessageQueue {
 #if HAVE_THREADS
     AVFifoBuffer *fifo;
     pthread_mutex_t lock;
-    pthread_cond_t cond;
+    pthread_cond_t cond_recv;
+    pthread_cond_t cond_send;
     int err_send;
     int err_recv;
     unsigned elsize;
+    void (*free_func)(void *msg);
 #else
     int dummy;
 #endif
@@ -61,13 +53,20 @@ int av_thread_message_queue_alloc(AVThreadMessageQueue **mq,
         av_free(rmq);
         return AVERROR(ret);
     }
-    if ((ret = pthread_cond_init(&rmq->cond, NULL))) {
+    if ((ret = pthread_cond_init(&rmq->cond_recv, NULL))) {
+        pthread_mutex_destroy(&rmq->lock);
+        av_free(rmq);
+        return AVERROR(ret);
+    }
+    if ((ret = pthread_cond_init(&rmq->cond_send, NULL))) {
+        pthread_cond_destroy(&rmq->cond_recv);
         pthread_mutex_destroy(&rmq->lock);
         av_free(rmq);
         return AVERROR(ret);
     }
     if (!(rmq->fifo = av_fifo_alloc(elsize * nelem))) {
-        pthread_cond_destroy(&rmq->cond);
+        pthread_cond_destroy(&rmq->cond_send);
+        pthread_cond_destroy(&rmq->cond_recv);
         pthread_mutex_destroy(&rmq->lock);
         av_free(rmq);
         return AVERROR(ret);
@@ -81,12 +80,22 @@ int av_thread_message_queue_alloc(AVThreadMessageQueue **mq,
 #endif /* HAVE_THREADS */
 }
 
+void av_thread_message_queue_set_free_func(AVThreadMessageQueue *mq,
+                                           void (*free_func)(void *msg))
+{
+#if HAVE_THREADS
+    mq->free_func = free_func;
+#endif
+}
+
 void av_thread_message_queue_free(AVThreadMessageQueue **mq)
 {
 #if HAVE_THREADS
     if (*mq) {
+        av_thread_message_flush(*mq);
         av_fifo_freep(&(*mq)->fifo);
-        pthread_cond_destroy(&(*mq)->cond);
+        pthread_cond_destroy(&(*mq)->cond_send);
+        pthread_cond_destroy(&(*mq)->cond_recv);
         pthread_mutex_destroy(&(*mq)->lock);
         av_freep(mq);
     }
@@ -102,12 +111,13 @@ static int av_thread_message_queue_send_locked(AVThreadMessageQueue *mq,
     while (!mq->err_send && av_fifo_space(mq->fifo) < mq->elsize) {
         if ((flags & AV_THREAD_MESSAGE_NONBLOCK))
             return AVERROR(EAGAIN);
-        pthread_cond_wait(&mq->cond, &mq->lock);
+        pthread_cond_wait(&mq->cond_send, &mq->lock);
     }
     if (mq->err_send)
         return mq->err_send;
     av_fifo_generic_write(mq->fifo, msg, mq->elsize, NULL);
-    pthread_cond_signal(&mq->cond);
+    /* one message is sent, signal one receiver */
+    pthread_cond_signal(&mq->cond_recv);
     return 0;
 }
 
@@ -118,12 +128,13 @@ static int av_thread_message_queue_recv_locked(AVThreadMessageQueue *mq,
     while (!mq->err_recv && av_fifo_size(mq->fifo) < mq->elsize) {
         if ((flags & AV_THREAD_MESSAGE_NONBLOCK))
             return AVERROR(EAGAIN);
-        pthread_cond_wait(&mq->cond, &mq->lock);
+        pthread_cond_wait(&mq->cond_recv, &mq->lock);
     }
     if (av_fifo_size(mq->fifo) < mq->elsize)
         return mq->err_recv;
     av_fifo_generic_read(mq->fifo, msg, mq->elsize, NULL);
-    pthread_cond_signal(&mq->cond);
+    /* one message space appeared, signal one sender */
+    pthread_cond_signal(&mq->cond_send);
     return 0;
 }
 
@@ -167,7 +178,7 @@ void av_thread_message_queue_set_err_send(AVThreadMessageQueue *mq,
 #if HAVE_THREADS
     pthread_mutex_lock(&mq->lock);
     mq->err_send = err;
-    pthread_cond_broadcast(&mq->cond);
+    pthread_cond_broadcast(&mq->cond_send);
     pthread_mutex_unlock(&mq->lock);
 #endif /* HAVE_THREADS */
 }
@@ -178,7 +189,34 @@ void av_thread_message_queue_set_err_recv(AVThreadMessageQueue *mq,
 #if HAVE_THREADS
     pthread_mutex_lock(&mq->lock);
     mq->err_recv = err;
-    pthread_cond_broadcast(&mq->cond);
+    pthread_cond_broadcast(&mq->cond_recv);
+    pthread_mutex_unlock(&mq->lock);
+#endif /* HAVE_THREADS */
+}
+
+#if HAVE_THREADS
+static void free_func_wrap(void *arg, void *msg, int size)
+{
+    AVThreadMessageQueue *mq = arg;
+    mq->free_func(msg);
+}
+#endif
+
+void av_thread_message_flush(AVThreadMessageQueue *mq)
+{
+#if HAVE_THREADS
+    int used, off;
+    void *free_func = mq->free_func;
+
+    pthread_mutex_lock(&mq->lock);
+    used = av_fifo_size(mq->fifo);
+    if (free_func)
+        for (off = 0; off < used; off += mq->elsize)
+            av_fifo_generic_peek_at(mq->fifo, mq, off, mq->elsize, free_func_wrap);
+    av_fifo_drain(mq->fifo, used);
+    /* only the senders need to be notified since the queue is empty and there
+     * is nothing to read */
+    pthread_cond_broadcast(&mq->cond_send);
     pthread_mutex_unlock(&mq->lock);
 #endif /* HAVE_THREADS */
 }
diff --git a/libavutil/threadmessage.h b/libavutil/threadmessage.h
index a8481d8e..e256cae9 100644
--- a/libavutil/threadmessage.h
+++ b/libavutil/threadmessage.h
@@ -88,4 +88,20 @@ void av_thread_message_queue_set_err_send(AVThreadMessageQueue *mq,
 void av_thread_message_queue_set_err_recv(AVThreadMessageQueue *mq,
                                           int err);
 
+/**
+ * Set the optional free message callback function which will be called if an
+ * operation is removing messages from the queue.
+ */
+void av_thread_message_queue_set_free_func(AVThreadMessageQueue *mq,
+                                           void (*free_func)(void *msg));
+
+/**
+ * Flush the message queue
+ *
+ * This function is mostly equivalent to reading and free-ing every message
+ * except that it will be done in a single operation (no lock/unlock between
+ * reads).
+ */
+void av_thread_message_flush(AVThreadMessageQueue *mq);
+
 #endif /* AVUTIL_THREADMESSAGE_H */
diff --git a/libavutil/timecode.c b/libavutil/timecode.c
index 1dfd0408..fa92df1e 100644
--- a/libavutil/timecode.c
+++ b/libavutil/timecode.c
@@ -141,7 +141,9 @@ char *av_timecode_make_mpeg_tc_string(char *buf, uint32_t tc25bit)
 static int check_fps(int fps)
 {
     int i;
-    static const int supported_fps[] = {24, 25, 30, 48, 50, 60};
+    static const int supported_fps[] = {
+        24, 25, 30, 48, 50, 60, 100, 120, 150,
+    };
 
     for (i = 0; i < FF_ARRAY_ELEMS(supported_fps); i++)
         if (fps == supported_fps[i])
@@ -151,7 +153,7 @@ static int check_fps(int fps)
 
 static int check_timecode(void *log_ctx, AVTimecode *tc)
 {
-    if (tc->fps <= 0) {
+    if ((int)tc->fps <= 0) {
         av_log(log_ctx, AV_LOG_ERROR, "Timecode frame rate must be specified\n");
         return AVERROR(EINVAL);
     }
@@ -160,9 +162,8 @@ static int check_timecode(void *log_ctx, AVTimecode *tc)
         return AVERROR(EINVAL);
     }
     if (check_fps(tc->fps) < 0) {
-        av_log(log_ctx, AV_LOG_ERROR, "Timecode frame rate %d/%d not supported\n",
+        av_log(log_ctx, AV_LOG_WARNING, "Using non-standard frame rate %d/%d\n",
                tc->rate.num, tc->rate.den);
-        return AVERROR_PATCHWELCOME;
     }
     return 0;
 }
diff --git a/libavutil/timer.h b/libavutil/timer.h
index e21f6552..ed3b0478 100644
--- a/libavutil/timer.h
+++ b/libavutil/timer.h
@@ -38,7 +38,9 @@
 
 #include "log.h"
 
-#if   ARCH_ARM
+#if   ARCH_AARCH64
+#   include "aarch64/timer.h"
+#elif ARCH_ARM
 #   include "arm/timer.h"
 #elif ARCH_PPC
 #   include "ppc/timer.h"
diff --git a/libavutil/tree.c b/libavutil/tree.c
index d0b67efc..2495cdf3 100644
--- a/libavutil/tree.c
+++ b/libavutil/tree.c
@@ -37,7 +37,7 @@ struct AVTreeNode *av_tree_node_alloc(void)
 }
 
 void *av_tree_find(const AVTreeNode *t, void *key,
-                   int (*cmp)(void *key, const void *b), void *next[2])
+                   int (*cmp)(const void *key, const void *b), void *next[2])
 {
     if (t) {
         unsigned int v = cmp(key, t->elem);
@@ -57,7 +57,7 @@ void *av_tree_find(const AVTreeNode *t, void *key,
 }
 
 void *av_tree_insert(AVTreeNode **tp, void *key,
-                     int (*cmp)(void *key, const void *b), AVTreeNode **next)
+                     int (*cmp)(const void *key, const void *b), AVTreeNode **next)
 {
     AVTreeNode *t = *tp;
     if (t) {
@@ -202,9 +202,9 @@ static void print(AVTreeNode *t, int depth)
         av_log(NULL, AV_LOG_ERROR, "NULL\n");
 }
 
-static int cmp(void *a, const void *b)
+static int cmp(const void *a, const void *b)
 {
-    return (uint8_t *) a - (const uint8_t *) b;
+    return (const uint8_t *) a - (const uint8_t *) b;
 }
 
 int main(int argc, char **argv)
diff --git a/libavutil/tree.h b/libavutil/tree.h
index a14fa915..e1aefaa9 100644
--- a/libavutil/tree.h
+++ b/libavutil/tree.h
@@ -56,11 +56,16 @@ struct AVTreeNode *av_tree_node_alloc(void);
  * @param next If next is not NULL, then next[0] will contain the previous
  *             element and next[1] the next element. If either does not exist,
  *             then the corresponding entry in next is unchanged.
+ * @param cmp compare function used to compare elements in the tree,
+ *            API identical to that of Standard C's qsort
+ *            It is guranteed that the first and only the first argument to cmp()
+ *            will be the key parameter to av_tree_find(), thus it could if the
+ *            user wants, be a different type (like an opaque context).
  * @return An element with cmp(key, elem) == 0 or NULL if no such element
  *         exists in the tree.
  */
 void *av_tree_find(const struct AVTreeNode *root, void *key,
-                   int (*cmp)(void *key, const void *b), void *next[2]);
+                   int (*cmp)(const void *key, const void *b), void *next[2]);
 
 /**
  * Insert or remove an element.
@@ -99,14 +104,15 @@ void *av_tree_find(const struct AVTreeNode *root, void *key,
  *                 return av_tree_insert(rootp, key, cmp, next);
  *             }
  *             @endcode
- * @param cmp compare function used to compare elements in the tree
+ * @param cmp compare function used to compare elements in the tree, API identical
+ *            to that of Standard C's qsort
  * @return If no insertion happened, the found element; if an insertion or
  *         removal happened, then either key or NULL will be returned.
  *         Which one it is depends on the tree state and the implementation. You
  *         should make no assumptions that it's one or the other in the code.
  */
 void *av_tree_insert(struct AVTreeNode **rootp, void *key,
-                     int (*cmp)(void *key, const void *b),
+                     int (*cmp)(const void *key, const void *b),
                      struct AVTreeNode **next);
 
 void av_tree_destroy(struct AVTreeNode *t);
diff --git a/libavutil/twofish.c b/libavutil/twofish.c
index f735a1fb..162069be 100644
--- a/libavutil/twofish.c
+++ b/libavutil/twofish.c
@@ -273,7 +273,7 @@ av_cold int av_twofish_init(AVTWOFISH *cs, const uint8_t *key, int key_bits)
     uint32_t Key[8], Me[4], Mo[4], A, B;
     const uint32_t rho = 0x01010101;
     if (key_bits < 0)
-        return -1;
+        return AVERROR(EINVAL);
     if (key_bits <= 128) {
         cs->ksize = 2;
     } else if (key_bits <= 192) {
diff --git a/libavutil/utils.c b/libavutil/utils.c
index 0b765ed0..f409f322 100644
--- a/libavutil/utils.c
+++ b/libavutil/utils.c
@@ -30,13 +30,20 @@
 #include "libavutil/ffversion.h"
 const char av_util_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
 
+const char *av_version_info(void)
+{
+    return FFMPEG_VERSION;
+}
+
 unsigned avutil_version(void)
 {
     static int checks_done;
     if (checks_done)
         return LIBAVUTIL_VERSION_INT;
 
+#if FF_API_VDPAU
     av_assert0(AV_PIX_FMT_VDA_VLD == 81); //check if the pix fmt enum has not had anything inserted or removed by mistake
+#endif
     av_assert0(AV_SAMPLE_FMT_DBLP == 9);
     av_assert0(AVMEDIA_TYPE_ATTACHMENT == 4);
     av_assert0(AV_PICTURE_TYPE_BI == 7);
@@ -46,7 +53,7 @@ unsigned avutil_version(void)
     av_assert0(((size_t)-1) > 0); // C guarantees this but if false on a platform we care about revert at least b284e1ffe343d6697fb950d1ee517bafda8a9844
 
     if (av_sat_dadd32(1, 2) != 5) {
-        av_log(NULL, AV_LOG_FATAL, "Libavutil has been build with a broken binutils, please upgrade binutils and rebuild\n");
+        av_log(NULL, AV_LOG_FATAL, "Libavutil has been built with a broken binutils, please upgrade binutils and rebuild\n");
         abort();
     }
 
diff --git a/libavutil/version.h b/libavutil/version.h
index 215729ec..0ea8c794 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -36,6 +36,14 @@
 #define AV_VERSION_DOT(a, b, c) a ##.## b ##.## c
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 
+/**
+ * Extract version components from the full ::AV_VERSION_INT int as returned
+ * by functions like ::avformat_version() and ::avcodec_version()
+ */
+#define AV_VERSION_MAJOR(a) ((a) >> 16)
+#define AV_VERSION_MINOR(a) (((a) & 0x00FF00) >> 8)
+#define AV_VERSION_MICRO(a) ((a) & 0xFF)
+
 /**
  * @}
  */
@@ -55,9 +63,9 @@
  * @{
  */
 
-#define LIBAVUTIL_VERSION_MAJOR  54
-#define LIBAVUTIL_VERSION_MINOR  27
-#define LIBAVUTIL_VERSION_MICRO 100
+#define LIBAVUTIL_VERSION_MAJOR  55
+#define LIBAVUTIL_VERSION_MINOR  17
+#define LIBAVUTIL_VERSION_MICRO 103
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
                                                LIBAVUTIL_VERSION_MINOR, \
@@ -77,63 +85,44 @@
  * dropped at a future version bump. The defines themselves are not part of
  * the public API and may change, break or disappear at any time.
  *
+ * @note, when bumping the major version it is recommended to manually
+ * disable each FF_API_* in its own commit instead of disabling them all
+ * at once through the bump. This improves the git bisect-ability of the change.
+ *
  * @{
  */
 
-#ifndef FF_API_OLD_AVOPTIONS
-#define FF_API_OLD_AVOPTIONS            (LIBAVUTIL_VERSION_MAJOR < 55)
-#endif
-#ifndef FF_API_PIX_FMT
-#define FF_API_PIX_FMT                  (LIBAVUTIL_VERSION_MAJOR < 55)
-#endif
-#ifndef FF_API_CONTEXT_SIZE
-#define FF_API_CONTEXT_SIZE             (LIBAVUTIL_VERSION_MAJOR < 55)
-#endif
-#ifndef FF_API_PIX_FMT_DESC
-#define FF_API_PIX_FMT_DESC             (LIBAVUTIL_VERSION_MAJOR < 55)
-#endif
-#ifndef FF_API_AV_REVERSE
-#define FF_API_AV_REVERSE               (LIBAVUTIL_VERSION_MAJOR < 55)
-#endif
-#ifndef FF_API_AUDIOCONVERT
-#define FF_API_AUDIOCONVERT             (LIBAVUTIL_VERSION_MAJOR < 55)
-#endif
-#ifndef FF_API_CPU_FLAG_MMX2
-#define FF_API_CPU_FLAG_MMX2            (LIBAVUTIL_VERSION_MAJOR < 55)
-#endif
-#ifndef FF_API_LLS_PRIVATE
-#define FF_API_LLS_PRIVATE              (LIBAVUTIL_VERSION_MAJOR < 55)
-#endif
-#ifndef FF_API_AVFRAME_LAVC
-#define FF_API_AVFRAME_LAVC             (LIBAVUTIL_VERSION_MAJOR < 55)
-#endif
 #ifndef FF_API_VDPAU
-#define FF_API_VDPAU                    (LIBAVUTIL_VERSION_MAJOR < 55)
-#endif
-#ifndef FF_API_GET_CHANNEL_LAYOUT_COMPAT
-#define FF_API_GET_CHANNEL_LAYOUT_COMPAT (LIBAVUTIL_VERSION_MAJOR < 55)
+#define FF_API_VDPAU                    (LIBAVUTIL_VERSION_MAJOR < 56)
 #endif
 #ifndef FF_API_XVMC
-#define FF_API_XVMC                     (LIBAVUTIL_VERSION_MAJOR < 55)
+#define FF_API_XVMC                     (LIBAVUTIL_VERSION_MAJOR < 56)
 #endif
 #ifndef FF_API_OPT_TYPE_METADATA
-#define FF_API_OPT_TYPE_METADATA        (LIBAVUTIL_VERSION_MAJOR < 55)
+#define FF_API_OPT_TYPE_METADATA        (LIBAVUTIL_VERSION_MAJOR < 56)
 #endif
 #ifndef FF_API_DLOG
-#define FF_API_DLOG                     (LIBAVUTIL_VERSION_MAJOR < 55)
+#define FF_API_DLOG                     (LIBAVUTIL_VERSION_MAJOR < 56)
 #endif
-
-#ifndef FF_CONST_AVUTIL55
-#if LIBAVUTIL_VERSION_MAJOR >= 55
-#define FF_CONST_AVUTIL55 const
-#else
-#define FF_CONST_AVUTIL55
+#ifndef FF_API_VAAPI
+#define FF_API_VAAPI                    (LIBAVUTIL_VERSION_MAJOR < 56)
 #endif
+#ifndef FF_API_FRAME_QP
+#define FF_API_FRAME_QP                 (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_PLUS1_MINUS1
+#define FF_API_PLUS1_MINUS1             (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_ERROR_FRAME
+#define FF_API_ERROR_FRAME              (LIBAVUTIL_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_CRC_BIG_TABLE
+#define FF_API_CRC_BIG_TABLE            (LIBAVUTIL_VERSION_MAJOR < 56)
 #endif
 
+
 /**
  * @}
  */
 
 #endif /* AVUTIL_VERSION_H */
-
diff --git a/libavutil/wchar_filename.h b/libavutil/wchar_filename.h
index c553c46f..2ade321b 100644
--- a/libavutil/wchar_filename.h
+++ b/libavutil/wchar_filename.h
@@ -23,6 +23,7 @@
 #include <windows.h>
 #include "mem.h"
 
+av_warn_unused_result
 static inline int utf8towchar(const char *filename_utf8, wchar_t **filename_w)
 {
     int num_chars;
diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile
index eb70a62e..94d88320 100644
--- a/libavutil/x86/Makefile
+++ b/libavutil/x86/Makefile
@@ -1,4 +1,5 @@
 OBJS += x86/cpu.o                                                       \
+        x86/fixed_dsp_init.o                                            \
         x86/float_dsp_init.o                                            \
         x86/lls_init.o                                                  \
 
@@ -8,6 +9,7 @@ EMMS_OBJS_$(HAVE_MMX_INLINE)_$(HAVE_MMX_EXTERNAL)_$(HAVE_MM_EMPTY) = x86/emms.o
 
 YASM-OBJS += x86/cpuid.o                                                \
              $(EMMS_OBJS__yes_)                                      \
+             x86/fixed_dsp.o                                            \
              x86/float_dsp.o                                            \
              x86/lls.o                                                  \
 
diff --git a/libavutil/x86/asm.h b/libavutil/x86/asm.h
index 616ad6c9..109b65e5 100644
--- a/libavutil/x86/asm.h
+++ b/libavutil/x86/asm.h
@@ -38,7 +38,8 @@ typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg;
 #    define PTR_SIZE "8"
 typedef int64_t x86_reg;
 
-#    define REG_SP "rsp"
+/* REG_SP is defined in Solaris sys headers, so use REG_sp */
+#    define REG_sp "rsp"
 #    define REG_BP "rbp"
 #    define REGBP   rbp
 #    define REGa    rax
@@ -59,7 +60,7 @@ typedef int64_t x86_reg;
 #    define PTR_SIZE "4"
 typedef int32_t x86_reg;
 
-#    define REG_SP "esp"
+#    define REG_sp "esp"
 #    define REG_BP "ebp"
 #    define REGBP   ebp
 #    define REGa    eax
diff --git a/libavutil/x86/bswap.h b/libavutil/x86/bswap.h
index 08e2a625..ffa59e4c 100644
--- a/libavutil/x86/bswap.h
+++ b/libavutil/x86/bswap.h
@@ -25,21 +25,47 @@
 #define AVUTIL_X86_BSWAP_H
 
 #include <stdint.h>
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
 #include "config.h"
 #include "libavutil/attributes.h"
 
-#if HAVE_INLINE_ASM
+#if defined(_MSC_VER)
+
+#define av_bswap16 av_bswap16
+static av_always_inline av_const uint16_t av_bswap16(uint16_t x)
+{
+    return _rotr16(x, 8);
+}
+
+#define av_bswap32 av_bswap32
+static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
+{
+    return _byteswap_ulong(x);
+}
+
+#if ARCH_X86_64
+#define av_bswap64 av_bswap64
+static inline uint64_t av_const av_bswap64(uint64_t x)
+{
+    return _byteswap_uint64(x);
+}
+#endif
+
+
+#elif HAVE_INLINE_ASM
 
-#if !AV_GCC_VERSION_AT_LEAST(4,1)
+#if AV_GCC_VERSION_AT_MOST(4,0)
 #define av_bswap16 av_bswap16
 static av_always_inline av_const unsigned av_bswap16(unsigned x)
 {
     __asm__("rorw $8, %w0" : "+r"(x));
     return x;
 }
-#endif /* !AV_GCC_VERSION_AT_LEAST(4,1) */
+#endif /* AV_GCC_VERSION_AT_MOST(4,0) */
 
-#if !AV_GCC_VERSION_AT_LEAST(4,5)
+#if AV_GCC_VERSION_AT_MOST(4,4) || defined(__INTEL_COMPILER)
 #define av_bswap32 av_bswap32
 static av_always_inline av_const uint32_t av_bswap32(uint32_t x)
 {
@@ -55,7 +81,7 @@ static inline uint64_t av_const av_bswap64(uint64_t x)
     return x;
 }
 #endif
-#endif /* !AV_GCC_VERSION_AT_LEAST(4,5) */
+#endif /* AV_GCC_VERSION_AT_MOST(4,4) */
 
 #endif /* HAVE_INLINE_ASM */
 #endif /* AVUTIL_X86_BSWAP_H */
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 7a5d4e6a..bb63daac 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -126,6 +126,8 @@ int ff_get_cpu_flags_x86(void)
             rval |= AV_CPU_FLAG_SSE4;
         if (ecx & 0x00100000 )
             rval |= AV_CPU_FLAG_SSE42;
+        if (ecx & 0x01000000 )
+            rval |= AV_CPU_FLAG_AESNI;
 #if HAVE_AVX
         /* Check OXSAVE and AVX bits */
         if ((ecx & 0x18000000) == 0x18000000) {
@@ -180,13 +182,11 @@ int ff_get_cpu_flags_x86(void)
 
         /* Similar to the above but for AVX functions on AMD processors.
            This is necessary only for functions using YMM registers on Bulldozer
-           based CPUs as they lack 256-bits execution units. SSE/AVX functions
-           using XMM registers are always faster on them.
+           and Jaguar based CPUs as they lack 256-bits execution units. SSE/AVX
+           functions using XMM registers are always faster on them.
            AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so that AVX is
-           used unless explicitly disabled by checking AV_CPU_FLAG_AVXSLOW.
-           TODO: Confirm if Excavator is affected or not by this once it's
-                 released, and update the check if necessary. Same for btver2. */
-            if (family == 0x15 && (rval & AV_CPU_FLAG_AVX))
+           used unless explicitly disabled by checking AV_CPU_FLAG_AVXSLOW. */
+            if ((family == 0x15 || family == 0x16) && (rval & AV_CPU_FLAG_AVX))
                 rval |= AV_CPU_FLAG_AVXSLOW;
         }
 
diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h
index 1cea4198..f171037f 100644
--- a/libavutil/x86/cpu.h
+++ b/libavutil/x86/cpu.h
@@ -47,6 +47,7 @@
 #define X86_FMA3(flags)             CPUEXT(flags, FMA3)
 #define X86_FMA4(flags)             CPUEXT(flags, FMA4)
 #define X86_AVX2(flags)             CPUEXT(flags, AVX2)
+#define X86_AESNI(flags)            CPUEXT(flags, AESNI)
 
 #define EXTERNAL_AMD3DNOW(flags)    CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW)
 #define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOWEXT)
@@ -67,8 +68,13 @@
 #define EXTERNAL_AVX_SLOW(flags)    CPUEXT_SUFFIX_SLOW(flags, _EXTERNAL, AVX)
 #define EXTERNAL_XOP(flags)         CPUEXT_SUFFIX(flags, _EXTERNAL, XOP)
 #define EXTERNAL_FMA3(flags)        CPUEXT_SUFFIX(flags, _EXTERNAL, FMA3)
+#define EXTERNAL_FMA3_FAST(flags)   CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, FMA3, AVX)
+#define EXTERNAL_FMA3_SLOW(flags)   CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, FMA3, AVX)
 #define EXTERNAL_FMA4(flags)        CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4)
 #define EXTERNAL_AVX2(flags)        CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2)
+#define EXTERNAL_AVX2_FAST(flags)   CPUEXT_SUFFIX_FAST2(flags, _EXTERNAL, AVX2, AVX)
+#define EXTERNAL_AVX2_SLOW(flags)   CPUEXT_SUFFIX_SLOW2(flags, _EXTERNAL, AVX2, AVX)
+#define EXTERNAL_AESNI(flags)       CPUEXT_SUFFIX(flags, _EXTERNAL, AESNI)
 
 #define INLINE_AMD3DNOW(flags)      CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW)
 #define INLINE_AMD3DNOWEXT(flags)   CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT)
@@ -91,6 +97,7 @@
 #define INLINE_FMA3(flags)          CPUEXT_SUFFIX(flags, _INLINE, FMA3)
 #define INLINE_FMA4(flags)          CPUEXT_SUFFIX(flags, _INLINE, FMA4)
 #define INLINE_AVX2(flags)          CPUEXT_SUFFIX(flags, _INLINE, AVX2)
+#define INLINE_AESNI(flags)         CPUEXT_SUFFIX(flags, _INLINE, AESNI)
 
 void ff_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx);
 void ff_cpu_xgetbv(int op, int *eax, int *edx);
diff --git a/libavutil/x86/emms.h b/libavutil/x86/emms.h
index a529b6bb..6fda6e27 100644
--- a/libavutil/x86/emms.h
+++ b/libavutil/x86/emms.h
@@ -34,7 +34,13 @@ void avpriv_emms_yasm(void);
  */
 static av_always_inline void emms_c(void)
 {
+/* Some inlined functions may also use mmx instructions regardless of
+ * runtime cpuflags. With that in mind, we unconditionally empty the
+ * mmx state if the target cpu chosen at configure time supports it.
+ */
+#if !defined(__MMX__)
     if(av_get_cpu_flags() & AV_CPU_FLAG_MMX)
+#endif
         __asm__ volatile ("emms" ::: "memory");
 }
 #elif HAVE_MMX && HAVE_MM_EMPTY
diff --git a/libavutil/x86/fixed_dsp.asm b/libavutil/x86/fixed_dsp.asm
new file mode 100644
index 00000000..979dd5c3
--- /dev/null
+++ b/libavutil/x86/fixed_dsp.asm
@@ -0,0 +1,48 @@
+;*****************************************************************************
+;* x86-optimized Float DSP functions
+;*
+;* Copyright 2016 James Almer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86util.asm"
+
+SECTION .text
+
+;-----------------------------------------------------------------------------
+; void ff_butterflies_fixed(float *src0, float *src1, int len);
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal butterflies_fixed, 3,3,3, src0, src1, len
+    shl       lend, 2
+    add      src0q, lenq
+    add      src1q, lenq
+    neg       lenq
+
+align 16
+.loop:
+    mova        m0, [src0q + lenq]
+    mova        m1, [src1q + lenq]
+    mova        m2, m0
+    paddd       m0, m1
+    psubd       m2, m1
+    mova        [src0q + lenq], m0
+    mova        [src1q + lenq], m2
+    add       lenq, mmsize
+    jl .loop
+    RET
diff --git a/libavutil/x86/fixed_dsp_init.c b/libavutil/x86/fixed_dsp_init.c
new file mode 100644
index 00000000..303a2eb9
--- /dev/null
+++ b/libavutil/x86/fixed_dsp_init.c
@@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/fixed_dsp.h"
+#include "cpu.h"
+
+void ff_butterflies_fixed_sse2(int *src0, int *src1, int len);
+
+av_cold void ff_fixed_dsp_init_x86(AVFixedDSPContext *fdsp)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        fdsp->butterflies_fixed = ff_butterflies_fixed_sse2;
+    }
+}
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index ec3d22b2..021ff03c 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -203,7 +203,7 @@ cglobal vector_fmul_window, 5, 6, 6, dst, src0, src1, win, len, len1
     add     dstq, lenq
     add     winq, lenq
     neg     lenq
-.loop
+.loop:
     mova      m0, [winq  + lenq]
     mova      m4, [src0q + lenq]
 %if cpuflag(sse)
@@ -332,10 +332,10 @@ VECTOR_FMUL_REVERSE
 ; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
 INIT_XMM sse
 cglobal scalarproduct_float, 3,3,2, v1, v2, offset
+    shl   offsetd, 2
+    add       v1q, offsetq
+    add       v2q, offsetq
     neg   offsetq
-    shl   offsetq, 2
-    sub       v1q, offsetq
-    sub       v2q, offsetq
     xorps    xmm0, xmm0
 .loop:
     movaps   xmm1, [v1q+offsetq]
@@ -359,12 +359,7 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
 ;-----------------------------------------------------------------------------
 INIT_XMM sse
 cglobal butterflies_float, 3,3,3, src0, src1, len
-%if ARCH_X86_64
-    movsxd    lenq, lend
-%endif
-    test      lenq, lenq
-    jz .end
-    shl       lenq, 2
+    shl       lend, 2
     add      src0q, lenq
     add      src1q, lenq
     neg       lenq
@@ -377,5 +372,4 @@ cglobal butterflies_float, 3,3,3, src0, src1, len
     mova        [src0q + lenq], m0
     add       lenq, mmsize
     jl .loop
-.end:
     REP_RET
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index f211f239..c836a78e 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -92,7 +92,7 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
         fdsp->vector_fmul_add    = ff_vector_fmul_add_avx;
         fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
     }
-    if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) {
+    if (EXTERNAL_FMA3_FAST(cpu_flags)) {
         fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3;
         fdsp->vector_fmul_add    = ff_vector_fmul_add_fma3;
     }
diff --git a/libavutil/x86/intmath.h b/libavutil/x86/intmath.h
index c42fa835..f58b0d08 100644
--- a/libavutil/x86/intmath.h
+++ b/libavutil/x86/intmath.h
@@ -22,8 +22,47 @@
 #define AVUTIL_X86_INTMATH_H
 
 #include <stdint.h>
+#include <stdlib.h>
+#if HAVE_FAST_CLZ
+#if defined(_MSC_VER)
+#include <intrin.h>
+#elif defined(__INTEL_COMPILER)
+#include <immintrin.h>
+#endif
+#endif
 #include "config.h"
 
+#if HAVE_FAST_CLZ
+#if (defined(__INTEL_COMPILER) && (__INTEL_COMPILER>=1216)) || defined(_MSC_VER)
+#   if defined(__INTEL_COMPILER)
+#       define ff_log2(x) (_bit_scan_reverse((x)|1))
+#   else
+#       define ff_log2 ff_log2_x86
+static av_always_inline av_const int ff_log2_x86(unsigned int v)
+{
+    unsigned long n;
+    _BitScanReverse(&n, v|1);
+    return n;
+}
+#   endif
+#   define ff_log2_16bit av_log2
+
+#   define ff_ctz(v) _tzcnt_u32(v)
+
+#   if ARCH_X86_64
+#       define ff_ctzll(v) _tzcnt_u64(v)
+#   else
+#       define ff_ctzll ff_ctzll_x86
+static av_always_inline av_const int ff_ctzll_x86(long long v)
+{
+    return ((uint32_t)v == 0) ? _tzcnt_u32((uint32_t)(v >> 32)) + 32 : _tzcnt_u32((uint32_t)v);
+}
+#   endif
+
+#endif /* __INTEL_COMPILER */
+
+#endif /* HAVE_FAST_CLZ */
+
 #if defined(__GNUC__)
 
 /* Our generic version of av_popcount is faster than GCC's built-in on
@@ -39,6 +78,12 @@
 
 #if defined(__BMI2__)
 
+#if AV_GCC_VERSION_AT_LEAST(5,1)
+#define av_mod_uintp2 __builtin_ia32_bzhi_si
+#elif HAVE_INLINE_ASM
+/* GCC releases before 5.1.0 have a broken bzhi builtin, so for those we
+ * implement it using inline assembly
+ */
 #define av_mod_uintp2 av_mod_uintp2_bmi2
 static av_always_inline av_const unsigned av_mod_uintp2_bmi2(unsigned a, unsigned p)
 {
@@ -50,9 +95,42 @@ static av_always_inline av_const unsigned av_mod_uintp2_bmi2(unsigned a, unsigne
         return x;
     }
 }
+#endif /* AV_GCC_VERSION_AT_LEAST */
 
 #endif /* __BMI2__ */
 
+#if defined(__SSE2__) && !defined(__INTEL_COMPILER)
+
+#define av_clipd av_clipd_sse2
+static av_always_inline av_const double av_clipd_sse2(double a, double amin, double amax)
+{
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+    if (amin > amax) abort();
+#endif
+    __asm__ ("minsd %2, %0 \n\t"
+             "maxsd %1, %0 \n\t"
+             : "+&x"(a) : "xm"(amin), "xm"(amax));
+    return a;
+}
+
+#endif /* __SSE2__ */
+
+#if defined(__SSE__) && !defined(__INTEL_COMPILER)
+
+#define av_clipf av_clipf_sse
+static av_always_inline av_const float av_clipf_sse(float a, float amin, float amax)
+{
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >= 2
+    if (amin > amax) abort();
+#endif
+    __asm__ ("minss %2, %0 \n\t"
+             "maxss %1, %0 \n\t"
+             : "+&x"(a) : "xm"(amin), "xm"(amax));
+    return a;
+}
+
+#endif /* __SSE__ */
+
 #endif /* __GNUC__ */
 
 #endif /* AVUTIL_X86_INTMATH_H */
diff --git a/libavutil/x86/lls.asm b/libavutil/x86/lls.asm
index 769befb7..317fba6f 100644
--- a/libavutil/x86/lls.asm
+++ b/libavutil/x86/lls.asm
@@ -125,8 +125,7 @@ cglobal update_lls, 2,5,8, ctx, var, i, j, covar2
 .ret:
     REP_RET
 
-%if HAVE_AVX_EXTERNAL
-INIT_YMM avx
+%macro UPDATE_LLS 0
 cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
     %define covarq ctxq
     mov  countd, [ctxq + LLSModel.indep_count]
@@ -140,6 +139,18 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
     vbroadcastsd ymm6, [varq + iq*8 + 16]
     vbroadcastsd ymm7, [varq + iq*8 + 24]
     vextractf128 xmm3, ymm1, 1
+%if cpuflag(fma3)
+    mova ymm0, COVAR(iq  ,0)
+    mova xmm2, COVAR(iq+2,2)
+    fmaddpd ymm0, ymm1, ymm4, ymm0
+    fmaddpd xmm2, xmm3, xmm6, xmm2
+    fmaddpd ymm1, ymm5, ymm1, COVAR(iq  ,1)
+    fmaddpd xmm3, xmm7, xmm3, COVAR(iq+2,3)
+    mova COVAR(iq  ,0), ymm0
+    mova COVAR(iq  ,1), ymm1
+    mova COVAR(iq+2,2), xmm2
+    mova COVAR(iq+2,3), xmm3
+%else
     vmulpd  ymm0, ymm1, ymm4
     vmulpd  ymm1, ymm1, ymm5
     vmulpd  xmm2, xmm3, xmm6
@@ -148,12 +159,26 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
     ADDPD_MEM COVAR(iq  ,1), ymm1
     ADDPD_MEM COVAR(iq+2,2), xmm2
     ADDPD_MEM COVAR(iq+2,3), xmm3
+%endif ; cpuflag(fma3)
     lea     jd, [iq + 4]
     cmp     jd, count2d
     jg .skip4x4
 .loop4x4:
     ; Compute all 16 pairwise products of a 4x4 block
     mova    ymm3, [varq + jq*8]
+%if cpuflag(fma3)
+    mova ymm0, COVAR(jq, 0)
+    mova ymm1, COVAR(jq, 1)
+    mova ymm2, COVAR(jq, 2)
+    fmaddpd ymm0, ymm3, ymm4, ymm0
+    fmaddpd ymm1, ymm3, ymm5, ymm1
+    fmaddpd ymm2, ymm3, ymm6, ymm2
+    fmaddpd ymm3, ymm7, ymm3, COVAR(jq,3)
+    mova COVAR(jq, 0), ymm0
+    mova COVAR(jq, 1), ymm1
+    mova COVAR(jq, 2), ymm2
+    mova COVAR(jq, 3), ymm3
+%else
     vmulpd  ymm0, ymm3, ymm4
     vmulpd  ymm1, ymm3, ymm5
     vmulpd  ymm2, ymm3, ymm6
@@ -162,6 +187,7 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
     ADDPD_MEM COVAR(jq,1), ymm1
     ADDPD_MEM COVAR(jq,2), ymm2
     ADDPD_MEM COVAR(jq,3), ymm3
+%endif ; cpuflag(fma3)
     add     jd, 4
     cmp     jd, count2d
     jle .loop4x4
@@ -169,6 +195,19 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
     cmp     jd, countd
     jg .skip2x4
     mova    xmm3, [varq + jq*8]
+%if cpuflag(fma3)
+    mova xmm0, COVAR(jq, 0)
+    mova xmm1, COVAR(jq, 1)
+    mova xmm2, COVAR(jq, 2)
+    fmaddpd xmm0, xmm3, xmm4, xmm0
+    fmaddpd xmm1, xmm3, xmm5, xmm1
+    fmaddpd xmm2, xmm3, xmm6, xmm2
+    fmaddpd xmm3, xmm7, xmm3, COVAR(jq,3)
+    mova COVAR(jq, 0), xmm0
+    mova COVAR(jq, 1), xmm1
+    mova COVAR(jq, 2), xmm2
+    mova COVAR(jq, 3), xmm3
+%else
     vmulpd  xmm0, xmm3, xmm4
     vmulpd  xmm1, xmm3, xmm5
     vmulpd  xmm2, xmm3, xmm6
@@ -177,6 +216,7 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
     ADDPD_MEM COVAR(jq,1), xmm1
     ADDPD_MEM COVAR(jq,2), xmm2
     ADDPD_MEM COVAR(jq,3), xmm3
+%endif ; cpuflag(fma3)
 .skip2x4:
     add     id, 4
     add covarq, 4*COVAR_STRIDE
@@ -187,14 +227,29 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2
     mov     jd, id
 .loop2x1:
     vmovddup xmm0, [varq + iq*8]
+%if cpuflag(fma3)
+    mova xmm1, [varq + jq*8]
+    fmaddpd xmm0, xmm1, xmm0, COVAR(jq,0)
+    mova COVAR(jq,0), xmm0
+%else
     vmulpd   xmm0, [varq + jq*8]
     ADDPD_MEM COVAR(jq,0), xmm0
+%endif ; cpuflag(fma3)
     inc     id
     add covarq, COVAR_STRIDE
     cmp     id, countd
     jle .loop2x1
 .ret:
     REP_RET
+%endmacro ; UPDATE_LLS
+
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+UPDATE_LLS
+%endif
+%if HAVE_FMA3_EXTERNAL
+INIT_YMM fma3
+UPDATE_LLS
 %endif
 
 INIT_XMM sse2
diff --git a/libavutil/x86/lls_init.c b/libavutil/x86/lls_init.c
index 81f141cb..1c5dca42 100644
--- a/libavutil/x86/lls_init.c
+++ b/libavutil/x86/lls_init.c
@@ -25,6 +25,7 @@
 
 void ff_update_lls_sse2(LLSModel *m, const double *var);
 void ff_update_lls_avx(LLSModel *m, const double *var);
+void ff_update_lls_fma3(LLSModel *m, const double *var);
 double ff_evaluate_lls_sse2(LLSModel *m, const double *var, int order);
 
 av_cold void ff_init_lls_x86(LLSModel *m)
@@ -38,4 +39,7 @@ av_cold void ff_init_lls_x86(LLSModel *m)
     if (EXTERNAL_AVX_FAST(cpu_flags)) {
         m->update_lls = ff_update_lls_avx;
     }
+    if (EXTERNAL_FMA3_FAST(cpu_flags)) {
+        m->update_lls = ff_update_lls_fma3;
+    }
 }
diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm
index 7522f24a..7af3007d 100644
--- a/libavutil/x86/pixelutils.asm
+++ b/libavutil/x86/pixelutils.asm
@@ -23,7 +23,7 @@
 
 %include "x86util.asm"
 
-SECTION_TEXT
+SECTION .text
 
 ;-------------------------------------------------------------------------------
 ; int ff_pixelutils_sad_8x8_mmx(const uint8_t *src1, ptrdiff_t stride1,
diff --git a/libavutil/x86/timer.h b/libavutil/x86/timer.h
index 5b24b511..4d1e88de 100644
--- a/libavutil/x86/timer.h
+++ b/libavutil/x86/timer.h
@@ -31,7 +31,12 @@
 static inline uint64_t read_time(void)
 {
     uint32_t a, d;
-    __asm__ volatile("rdtsc" : "=a" (a), "=d" (d));
+    __asm__ volatile(
+#if ARCH_X86_64 || defined(__SSE2__)
+                     "lfence \n\t"
+#endif
+                     "rdtsc  \n\t"
+                     : "=a" (a), "=d" (d));
     return ((uint64_t)d << 32) + a;
 }
 
diff --git a/libavutil/x86/w64xmmtest.h b/libavutil/x86/w64xmmtest.h
index 9df499f7..a4a05b04 100644
--- a/libavutil/x86/w64xmmtest.h
+++ b/libavutil/x86/w64xmmtest.h
@@ -19,6 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#ifndef AVUTIL_X86_W64XMMTEST_H
+#define AVUTIL_X86_W64XMMTEST_H
+
 #include <inttypes.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -71,3 +74,5 @@
 int __real_ ## func;    \
 int __wrap_ ## func;    \
 int __wrap_ ## func
+
+#endif /* AVUTIL_X86_W64XMMTEST_H */
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index 12779f57..3a0a2612 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -1,7 +1,7 @@
 ;*****************************************************************************
 ;* x86inc.asm: x264asm abstraction layer
 ;*****************************************************************************
-;* Copyright (C) 2005-2013 x264 project
+;* Copyright (C) 2005-2016 x264 project
 ;*
 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
 ;*          Anton Mitrofanov <BugMaster@narod.ru>
@@ -42,6 +42,17 @@
     %define public_prefix private_prefix
 %endif
 
+%if HAVE_ALIGNED_STACK
+    %define STACK_ALIGNMENT 16
+%endif
+%ifndef STACK_ALIGNMENT
+    %if ARCH_X86_64
+        %define STACK_ALIGNMENT 16
+    %else
+        %define STACK_ALIGNMENT 4
+    %endif
+%endif
+
 %define WIN64  0
 %define UNIX64 0
 %if ARCH_X86_64
@@ -56,6 +67,15 @@
     %endif
 %endif
 
+%define FORMAT_ELF 0
+%ifidn __OUTPUT_FORMAT__,elf
+    %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf32
+    %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf64
+    %define FORMAT_ELF 1
+%endif
+
 %ifdef PREFIX
     %define mangle(x) _ %+ x
 %else
@@ -73,14 +93,6 @@
     %endif
 %endmacro
 
-%macro SECTION_TEXT 0-1 16
-    %ifidn __OUTPUT_FORMAT__,aout
-        SECTION .text
-    %else
-        SECTION .text align=%1
-    %endif
-%endmacro
-
 %if WIN64
     %define PIC
 %elif ARCH_X86_64 == 0
@@ -108,8 +120,9 @@
 ; %1 = number of arguments. loads them from stack if needed.
 ; %2 = number of registers used. pushes callee-saved regs if needed.
 ; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
-; %4 = (optional) stack size to be allocated. If not aligned (x86-32 ICC 10.x,
-;      MSVC or YMM), the stack will be manually aligned (to 16 or 32 bytes),
+; %4 = (optional) stack size to be allocated. The stack will be aligned before
+;      allocating the specified stack size. If the required stack alignment is
+;      larger than the known stack alignment the stack will be manually aligned
 ;      and an extra register will be allocated to hold the original stack
 ;      pointer (to not invalidate r0m etc.). To prevent the use of an extra
 ;      register as stack pointer, request a negative stack size.
@@ -117,8 +130,10 @@
 ; PROLOGUE can also be invoked by adding the same options to cglobal
 
 ; e.g.
-; cglobal foo, 2,3,0, dst, src, tmp
-; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
+; cglobal foo, 2,3,7,0x40, dst, src, tmp
+; declares a function (foo) that automatically loads two arguments (dst and
+; src) into registers, uses one additional register (tmp) plus 7 vector
+; registers (m0-m6) and allocates 0x40 bytes of stack space.
 
 ; TODO Some functions can use some args directly from the stack. If they're the
 ; last args then you can just not declare them, but if they're in the middle
@@ -168,9 +183,9 @@
     %define e%1h %3
     %define r%1b %2
     %define e%1b %2
-%if ARCH_X86_64 == 0
-    %define r%1  e%1
-%endif
+    %if ARCH_X86_64 == 0
+        %define r%1 e%1
+    %endif
 %endmacro
 
 DECLARE_REG_SIZE ax, al, ah
@@ -280,7 +295,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
 
 %macro ASSERT 1
     %if (%1) == 0
-        %error assert failed
+        %error assertion ``%1'' failed
     %endif
 %endmacro
 
@@ -319,26 +334,28 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
     %assign n_arg_names %0
 %endmacro
 
+%define required_stack_alignment ((mmsize + 15) & ~15)
+
 %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
     %ifnum %1
         %if %1 != 0
-            %assign %%stack_alignment ((mmsize + 15) & ~15)
+            %assign %%pad 0
             %assign stack_size %1
             %if stack_size < 0
                 %assign stack_size -stack_size
             %endif
-            %assign stack_size_padded stack_size
             %if WIN64
-                %assign stack_size_padded stack_size_padded + 32 ; reserve 32 bytes for shadow space
+                %assign %%pad %%pad + 32 ; shadow space
                 %if mmsize != 8
                     %assign xmm_regs_used %2
                     %if xmm_regs_used > 8
-                        %assign stack_size_padded stack_size_padded + (xmm_regs_used-8)*16
+                        %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
                     %endif
                 %endif
             %endif
-            %if mmsize <= 16 && HAVE_ALIGNED_STACK
-                %assign stack_size_padded stack_size_padded + %%stack_alignment - gprsize - (stack_offset & (%%stack_alignment - 1))
+            %if required_stack_alignment <= STACK_ALIGNMENT
+                ; maintain the current stack alignment
+                %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
                 SUB rsp, stack_size_padded
             %else
                 %assign %%reg_num (regs_used - 1)
@@ -347,17 +364,17 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
                 ; it, i.e. in [rsp+stack_size_padded], so we can restore the
                 ; stack in a single instruction (i.e. mov rsp, rstk or mov
                 ; rsp, [rsp+stack_size_padded])
-                mov  rstk, rsp
                 %if %1 < 0 ; need to store rsp on stack
-                    sub  rsp, gprsize+stack_size_padded
-                    and  rsp, ~(%%stack_alignment-1)
-                    %xdefine rstkm [rsp+stack_size_padded]
-                    mov rstkm, rstk
+                    %xdefine rstkm [rsp + stack_size + %%pad]
+                    %assign %%pad %%pad + gprsize
                 %else ; can keep rsp in rstk during whole function
-                    sub  rsp, stack_size_padded
-                    and  rsp, ~(%%stack_alignment-1)
                     %xdefine rstkm rstk
                 %endif
+                %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
+                mov rstk, rsp
+                and rsp, ~(required_stack_alignment-1)
+                sub rsp, stack_size_padded
+                movifnidn rstkm, rstk
             %endif
             WIN64_PUSH_XMM
         %endif
@@ -366,11 +383,14 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
 
 %macro SETUP_STACK_POINTER 1
     %ifnum %1
-        %if %1 != 0 && (HAVE_ALIGNED_STACK == 0 || mmsize == 32)
+        %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
             %if %1 > 0
                 %assign regs_used (regs_used + 1)
-            %elif ARCH_X86_64 && regs_used == num_args && num_args <= 4 + UNIX64 * 2
-                %warning "Stack pointer will overwrite register argument"
+            %endif
+            %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
+                ; Ensure that we don't clobber any registers containing arguments. For UNIX64 we also preserve r6 (rax)
+                ; since it's used as a hidden argument in vararg functions to specify the number of vector registers used.
+                %assign regs_used 5 + UNIX64 * 3
             %endif
         %endif
     %endif
@@ -440,7 +460,9 @@ DECLARE_REG 14, R15, 120
     %assign xmm_regs_used %1
     ASSERT xmm_regs_used <= 16
     %if xmm_regs_used > 8
-        %assign stack_size_padded (xmm_regs_used-8)*16 + (~stack_offset&8) + 32
+        ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
+        %assign %%pad (xmm_regs_used-8)*16 + 32
+        %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
         SUB rsp, stack_size_padded
     %endif
     WIN64_PUSH_XMM
@@ -456,7 +478,7 @@ DECLARE_REG 14, R15, 120
         %endrep
     %endif
     %if stack_size_padded > 0
-        %if stack_size > 0 && (mmsize == 32 || HAVE_ALIGNED_STACK == 0)
+        %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
             mov rsp, rstkm
         %else
             add %1, stack_size_padded
@@ -482,9 +504,9 @@ DECLARE_REG 14, R15, 120
 %macro RET 0
     WIN64_RESTORE_XMM_INTERNAL rsp
     POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
-%if mmsize == 32
-    vzeroupper
-%endif
+    %if mmsize == 32
+        vzeroupper
+    %endif
     AUTO_REP_RET
 %endmacro
 
@@ -521,17 +543,17 @@ DECLARE_REG 14, R15, 72
 %define has_epilogue regs_used > 9 || mmsize == 32 || stack_size > 0
 
 %macro RET 0
-%if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
-    mov rsp, rstkm
-%else
-    add rsp, stack_size_padded
-%endif
-%endif
+    %if stack_size_padded > 0
+        %if required_stack_alignment > STACK_ALIGNMENT
+            mov rsp, rstkm
+        %else
+            add rsp, stack_size_padded
+        %endif
+    %endif
     POP_IF_USED 14, 13, 12, 11, 10, 9
-%if mmsize == 32
-    vzeroupper
-%endif
+    %if mmsize == 32
+        vzeroupper
+    %endif
     AUTO_REP_RET
 %endmacro
 
@@ -577,29 +599,29 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
 %define has_epilogue regs_used > 3 || mmsize == 32 || stack_size > 0
 
 %macro RET 0
-%if stack_size_padded > 0
-%if mmsize == 32 || HAVE_ALIGNED_STACK == 0
-    mov rsp, rstkm
-%else
-    add rsp, stack_size_padded
-%endif
-%endif
+    %if stack_size_padded > 0
+        %if required_stack_alignment > STACK_ALIGNMENT
+            mov rsp, rstkm
+        %else
+            add rsp, stack_size_padded
+        %endif
+    %endif
     POP_IF_USED 6, 5, 4, 3
-%if mmsize == 32
-    vzeroupper
-%endif
+    %if mmsize == 32
+        vzeroupper
+    %endif
     AUTO_REP_RET
 %endmacro
 
 %endif ;======================================================================
 
 %if WIN64 == 0
-%macro WIN64_SPILL_XMM 1
-%endmacro
-%macro WIN64_RESTORE_XMM 1
-%endmacro
-%macro WIN64_PUSH_XMM 0
-%endmacro
+    %macro WIN64_SPILL_XMM 1
+    %endmacro
+    %macro WIN64_RESTORE_XMM 1
+    %endmacro
+    %macro WIN64_PUSH_XMM 0
+    %endmacro
 %endif
 
 ; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either
@@ -612,24 +634,26 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
     %else
         rep ret
     %endif
+    annotate_function_size
 %endmacro
 
 %define last_branch_adr $$
 %macro AUTO_REP_RET 0
-    %ifndef cpuflags
-        times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ != last_branch_adr.
-    %elif notcpuflag(ssse3)
-        times ((last_branch_adr-$)>>31)+1 rep
+    %if notcpuflag(ssse3)
+        times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ == last_branch_adr.
     %endif
     ret
+    annotate_function_size
 %endmacro
 
 %macro BRANCH_INSTR 0-*
     %rep %0
         %macro %1 1-2 %1
             %2 %1
-            %%branch_instr:
-            %xdefine last_branch_adr %%branch_instr
+            %if notcpuflag(ssse3)
+                %%branch_instr equ $
+                %xdefine last_branch_adr %%branch_instr
+            %endif
         %endmacro
         %rotate 1
     %endrep
@@ -644,6 +668,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
     %elif %2
         jmp %1
     %endif
+    annotate_function_size
 %endmacro
 
 ;=============================================================================
@@ -665,6 +690,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
     cglobal_internal 0, %1 %+ SUFFIX, %2
 %endmacro
 %macro cglobal_internal 2-3+
+    annotate_function_size
     %if %1
         %xdefine %%FUNCTION_PREFIX private_prefix
         %xdefine %%VISIBILITY hidden
@@ -678,7 +704,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
         CAT_XDEFINE cglobaled_, %2, 1
     %endif
     %xdefine current_function %2
-    %ifidn __OUTPUT_FORMAT__,elf
+    %xdefine current_function_section __SECT__
+    %if FORMAT_ELF
         global %2:function %%VISIBILITY
     %else
         global %2
@@ -704,14 +731,16 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
 
 ; like cextern, but without the prefix
 %macro cextern_naked 1
-    %xdefine %1 mangle(%1)
+    %ifdef PREFIX
+        %xdefine %1 mangle(%1)
+    %endif
     CAT_XDEFINE cglobaled_, %1, 1
     extern %1
 %endmacro
 
 %macro const 1-2+
     %xdefine %1 mangle(private_prefix %+ _ %+ %1)
-    %ifidn __OUTPUT_FORMAT__,elf
+    %if FORMAT_ELF
         global %1:data hidden
     %else
         global %1
@@ -719,15 +748,28 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
     %1: %2
 %endmacro
 
-; This is needed for ELF, otherwise the GNU linker assumes the stack is
-; executable by default.
-%ifidn __OUTPUT_FORMAT__,elf
-[section .note.GNU-stack noalloc noexec nowrite progbits]
+; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default.
+%if FORMAT_ELF
+    [SECTION .note.GNU-stack noalloc noexec nowrite progbits]
 %endif
 
-; Overrides the default .text section.
-; Silences warnings when defining structures.
-%define __SECT__
+; Tell debuggers how large the function was.
+; This may be invoked multiple times per function; we rely on later instances overriding earlier ones.
+; This is invoked by RET and similar macros, and also cglobal does it for the previous function,
+; but if the last function in a source file doesn't use any of the standard macros for its epilogue,
+; then its size might be unspecified.
+%macro annotate_function_size 0
+    %ifdef __YASM_VER__
+        %ifdef current_function
+            %if FORMAT_ELF
+                current_function_section
+                %%ecf equ $
+                size current_function %%ecf - current_function
+                __SECT__
+            %endif
+        %endif
+    %endif
+%endmacro
 
 ; cpuflags
 
@@ -745,8 +787,8 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
 %assign cpuflags_avx      (1<<11)| cpuflags_sse42
 %assign cpuflags_xop      (1<<12)| cpuflags_avx
 %assign cpuflags_fma4     (1<<13)| cpuflags_avx
-%assign cpuflags_avx2     (1<<14)| cpuflags_avx
-%assign cpuflags_fma3     (1<<15)| cpuflags_avx
+%assign cpuflags_fma3     (1<<14)| cpuflags_avx
+%assign cpuflags_avx2     (1<<15)| cpuflags_fma3
 
 %assign cpuflags_cache32  (1<<16)
 %assign cpuflags_cache64  (1<<17)
@@ -756,9 +798,11 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
 %assign cpuflags_atom     (1<<21)
 %assign cpuflags_bmi1     (1<<22)|cpuflags_lzcnt
 %assign cpuflags_bmi2     (1<<23)|cpuflags_bmi1
+%assign cpuflags_aesni    (1<<24)|cpuflags_sse42
 
-%define    cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
-%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
+; Returns a boolean value expressing whether or not the specified cpuflag is enabled.
+%define    cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1)
+%define notcpuflag(x) (cpuflag(x) ^ 1)
 
 ; Takes an arbitrary number of cpuflags from the above list.
 ; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
@@ -795,7 +839,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
         %endif
     %endif
 
-    %if cpuflag(sse2)
+    %if ARCH_X86_64 || cpuflag(sse2)
         CPUNOP amdnop
     %else
         CPUNOP basicnop
@@ -827,14 +871,14 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
     %define movnta movntq
     %assign %%i 0
     %rep 8
-    CAT_XDEFINE m, %%i, mm %+ %%i
-    CAT_XDEFINE nnmm, %%i, %%i
-    %assign %%i %%i+1
+        CAT_XDEFINE m, %%i, mm %+ %%i
+        CAT_XDEFINE nnmm, %%i, %%i
+        %assign %%i %%i+1
     %endrep
     %rep 8
-    CAT_UNDEF m, %%i
-    CAT_UNDEF nnmm, %%i
-    %assign %%i %%i+1
+        CAT_UNDEF m, %%i
+        CAT_UNDEF nnmm, %%i
+        %assign %%i %%i+1
     %endrep
     INIT_CPUFLAGS %1
 %endmacro
@@ -845,7 +889,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
     %define mmsize 16
     %define num_mmregs 8
     %if ARCH_X86_64
-    %define num_mmregs 16
+        %define num_mmregs 16
     %endif
     %define mova movdqa
     %define movu movdqu
@@ -853,9 +897,9 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
     %define movnta movntdq
     %assign %%i 0
     %rep num_mmregs
-    CAT_XDEFINE m, %%i, xmm %+ %%i
-    CAT_XDEFINE nnxmm, %%i, %%i
-    %assign %%i %%i+1
+        CAT_XDEFINE m, %%i, xmm %+ %%i
+        CAT_XDEFINE nnxmm, %%i, %%i
+        %assign %%i %%i+1
     %endrep
     INIT_CPUFLAGS %1
 %endmacro
@@ -866,7 +910,7 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
     %define mmsize 32
     %define num_mmregs 8
     %if ARCH_X86_64
-    %define num_mmregs 16
+        %define num_mmregs 16
     %endif
     %define mova movdqa
     %define movu movdqu
@@ -874,9 +918,9 @@ BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae,
     %define movnta movntdq
     %assign %%i 0
     %rep num_mmregs
-    CAT_XDEFINE m, %%i, ymm %+ %%i
-    CAT_XDEFINE nnymm, %%i, %%i
-    %assign %%i %%i+1
+        CAT_XDEFINE m, %%i, ymm %+ %%i
+        CAT_XDEFINE nnymm, %%i, %%i
+        %assign %%i %%i+1
     %endrep
     INIT_CPUFLAGS %1
 %endmacro
@@ -900,7 +944,7 @@ INIT_XMM
 %assign i 0
 %rep 16
     DECLARE_MMCAST i
-%assign i i+1
+    %assign i i+1
 %endrep
 
 ; I often want to use macros that permute their arguments. e.g. there's no
@@ -918,23 +962,23 @@ INIT_XMM
 ; doesn't cost any cycles.
 
 %macro PERMUTE 2-* ; takes a list of pairs to swap
-%rep %0/2
-    %xdefine %%tmp%2 m%2
-    %rotate 2
-%endrep
-%rep %0/2
-    %xdefine m%1 %%tmp%2
-    CAT_XDEFINE nn, m%1, %1
-    %rotate 2
-%endrep
+    %rep %0/2
+        %xdefine %%tmp%2 m%2
+        %rotate 2
+    %endrep
+    %rep %0/2
+        %xdefine m%1 %%tmp%2
+        CAT_XDEFINE nn, m%1, %1
+        %rotate 2
+    %endrep
 %endmacro
 
 %macro SWAP 2+ ; swaps a single chain (sometimes more concise than pairs)
-%ifnum %1 ; SWAP 0, 1, ...
-    SWAP_INTERNAL_NUM %1, %2
-%else ; SWAP m0, m1, ...
-    SWAP_INTERNAL_NAME %1, %2
-%endif
+    %ifnum %1 ; SWAP 0, 1, ...
+        SWAP_INTERNAL_NUM %1, %2
+    %else ; SWAP m0, m1, ...
+        SWAP_INTERNAL_NAME %1, %2
+    %endif
 %endmacro
 
 %macro SWAP_INTERNAL_NUM 2-*
@@ -944,7 +988,7 @@ INIT_XMM
         %xdefine m%2 %%tmp
         CAT_XDEFINE nn, m%1, %1
         CAT_XDEFINE nn, m%2, %2
-    %rotate 1
+        %rotate 1
     %endrep
 %endmacro
 
@@ -952,7 +996,7 @@ INIT_XMM
     %xdefine %%args nn %+ %1
     %rep %0-1
         %xdefine %%args %%args, nn %+ %2
-    %rotate 1
+        %rotate 1
     %endrep
     SWAP_INTERNAL_NUM %%args
 %endmacro
@@ -969,7 +1013,7 @@ INIT_XMM
     %assign %%i 0
     %rep num_mmregs
         CAT_XDEFINE %%f, %%i, m %+ %%i
-    %assign %%i %%i+1
+        %assign %%i %%i+1
     %endrep
 %endmacro
 
@@ -979,7 +1023,7 @@ INIT_XMM
         %rep num_mmregs
             CAT_XDEFINE m, %%i, %1_m %+ %%i
             CAT_XDEFINE nn, m %+ %%i, %%i
-        %assign %%i %%i+1
+            %assign %%i %%i+1
         %endrep
     %endif
 %endmacro
@@ -1035,7 +1079,7 @@ INIT_XMM
     %endif
     CAT_XDEFINE sizeofxmm, i, 16
     CAT_XDEFINE sizeofymm, i, 32
-%assign i i+1
+    %assign i i+1
 %endrep
 %undef i
 
@@ -1154,12 +1198,12 @@ AVX_INSTR addsd, sse2, 1, 0, 1
 AVX_INSTR addss, sse, 1, 0, 1
 AVX_INSTR addsubpd, sse3, 1, 0, 0
 AVX_INSTR addsubps, sse3, 1, 0, 0
-AVX_INSTR aesdec, fnord, 0, 0, 0
-AVX_INSTR aesdeclast, fnord, 0, 0, 0
-AVX_INSTR aesenc, fnord, 0, 0, 0
-AVX_INSTR aesenclast, fnord, 0, 0, 0
-AVX_INSTR aesimc
-AVX_INSTR aeskeygenassist
+AVX_INSTR aesdec, aesni, 0, 0, 0
+AVX_INSTR aesdeclast, aesni, 0, 0, 0
+AVX_INSTR aesenc, aesni, 0, 0, 0
+AVX_INSTR aesenclast, aesni, 0, 0, 0
+AVX_INSTR aesimc, aesni
+AVX_INSTR aeskeygenassist, aesni
 AVX_INSTR andnpd, sse2, 1, 0, 0
 AVX_INSTR andnps, sse, 1, 0, 0
 AVX_INSTR andpd, sse2, 1, 0, 1
@@ -1412,64 +1456,81 @@ AVX_INSTR pfmul, 3dnow, 1, 0, 1
     %else
         CAT_XDEFINE q, j, i
     %endif
-%assign i i+1
+    %assign i i+1
 %endrep
 %undef i
 %undef j
 
-; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
-; This lets us use tzcnt without bumping the yasm version requirement yet.
-%define tzcnt rep bsf
-
-; convert FMA4 to FMA3 if possible
-%macro FMA4_INSTR 4
-    %macro %1 4-8 %1, %2, %3, %4
-        %if cpuflag(fma4)
+%macro FMA_INSTR 3
+    %macro %1 4-7 %1, %2, %3
+        %if cpuflag(xop)
             v%5 %1, %2, %3, %4
-        %elifidn %1, %2
-            v%6 %1, %4, %3 ; %1 = %1 * %3 + %4
-        %elifidn %1, %3
-            v%7 %1, %2, %4 ; %1 = %2 * %1 + %4
-        %elifidn %1, %4
-            v%8 %1, %2, %3 ; %1 = %2 * %3 + %1
+        %elifnidn %1, %4
+            %6 %1, %2, %3
+            %7 %1, %4
         %else
-            %error fma3 emulation of ``%5 %1, %2, %3, %4'' is not supported
+            %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported
         %endif
     %endmacro
 %endmacro
 
-FMA4_INSTR fmaddpd, fmadd132pd, fmadd213pd, fmadd231pd
-FMA4_INSTR fmaddps, fmadd132ps, fmadd213ps, fmadd231ps
-FMA4_INSTR fmaddsd, fmadd132sd, fmadd213sd, fmadd231sd
-FMA4_INSTR fmaddss, fmadd132ss, fmadd213ss, fmadd231ss
-
-FMA4_INSTR fmaddsubpd, fmaddsub132pd, fmaddsub213pd, fmaddsub231pd
-FMA4_INSTR fmaddsubps, fmaddsub132ps, fmaddsub213ps, fmaddsub231ps
-FMA4_INSTR fmsubaddpd, fmsubadd132pd, fmsubadd213pd, fmsubadd231pd
-FMA4_INSTR fmsubaddps, fmsubadd132ps, fmsubadd213ps, fmsubadd231ps
-
-FMA4_INSTR fmsubpd, fmsub132pd, fmsub213pd, fmsub231pd
-FMA4_INSTR fmsubps, fmsub132ps, fmsub213ps, fmsub231ps
-FMA4_INSTR fmsubsd, fmsub132sd, fmsub213sd, fmsub231sd
-FMA4_INSTR fmsubss, fmsub132ss, fmsub213ss, fmsub231ss
-
-FMA4_INSTR fnmaddpd, fnmadd132pd, fnmadd213pd, fnmadd231pd
-FMA4_INSTR fnmaddps, fnmadd132ps, fnmadd213ps, fnmadd231ps
-FMA4_INSTR fnmaddsd, fnmadd132sd, fnmadd213sd, fnmadd231sd
-FMA4_INSTR fnmaddss, fnmadd132ss, fnmadd213ss, fnmadd231ss
-
-FMA4_INSTR fnmsubpd, fnmsub132pd, fnmsub213pd, fnmsub231pd
-FMA4_INSTR fnmsubps, fnmsub132ps, fnmsub213ps, fnmsub231ps
-FMA4_INSTR fnmsubsd, fnmsub132sd, fnmsub213sd, fnmsub231sd
-FMA4_INSTR fnmsubss, fnmsub132ss, fnmsub213ss, fnmsub231ss
-
-; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug
-%if ARCH_X86_64 == 0
-%macro vpbroadcastq 2
-%if sizeof%1 == 16
-    movddup %1, %2
-%else
-    vbroadcastsd %1, %2
-%endif
-%endmacro
+FMA_INSTR  pmacsww,  pmullw, paddw
+FMA_INSTR  pmacsdd,  pmulld, paddd ; sse4 emulation
+FMA_INSTR pmacsdql,  pmuldq, paddq ; sse4 emulation
+FMA_INSTR pmadcswd, pmaddwd, paddd
+
+; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
+; This lets us use tzcnt without bumping the yasm version requirement yet.
+%define tzcnt rep bsf
+
+; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax.
+; FMA3 is only possible if dst is the same as one of the src registers.
+; Either src2 or src3 can be a memory operand.
+%macro FMA4_INSTR 2-*
+    %push fma4_instr
+    %xdefine %$prefix %1
+    %rep %0 - 1
+        %macro %$prefix%2 4-6 %$prefix, %2
+            %if notcpuflag(fma3) && notcpuflag(fma4)
+                %error use of ``%5%6'' fma instruction in cpuname function: current_function
+            %elif cpuflag(fma4)
+                v%5%6 %1, %2, %3, %4
+            %elifidn %1, %2
+                ; If %3 or %4 is a memory operand it needs to be encoded as the last operand.
+                %ifid %3
+                    v%{5}213%6 %2, %3, %4
+                %else
+                    v%{5}132%6 %2, %4, %3
+                %endif
+            %elifidn %1, %3
+                v%{5}213%6 %3, %2, %4
+            %elifidn %1, %4
+                v%{5}231%6 %4, %2, %3
+            %else
+                %error fma3 emulation of ``%5%6 %1, %2, %3, %4'' is not supported
+            %endif
+        %endmacro
+        %rotate 1
+    %endrep
+    %pop
+%endmacro
+
+FMA4_INSTR fmadd,    pd, ps, sd, ss
+FMA4_INSTR fmaddsub, pd, ps
+FMA4_INSTR fmsub,    pd, ps, sd, ss
+FMA4_INSTR fmsubadd, pd, ps
+FMA4_INSTR fnmadd,   pd, ps, sd, ss
+FMA4_INSTR fnmsub,   pd, ps, sd, ss
+
+; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
+%ifdef __YASM_VER__
+    %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
+        %macro vpbroadcastq 2
+            %if sizeof%1 == 16
+                movddup %1, %2
+            %else
+                vbroadcastsd %1, %2
+            %endif
+        %endmacro
+    %endif
 %endif
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index d6702c14..b09fa813 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -210,13 +210,13 @@
 %endif
 %endmacro
 
-%macro PSIGNW_MMX 2
+%macro PSIGNW 2
+%if cpuflag(ssse3)
+    psignw     %1, %2
+%else
     pxor       %1, %2
     psubw      %1, %2
-%endmacro
-
-%macro PSIGNW_SSSE3 2
-    psignw     %1, %2
+%endif
 %endmacro
 
 %macro ABS1 2
@@ -765,25 +765,6 @@
 %endif
 %endmacro
 
-%macro PMA_EMU 4
-    %macro %1 5-8 %2, %3, %4
-        %if cpuflag(xop)
-            v%6 %1, %2, %3, %4
-        %elifidn %1, %4
-            %7 %5, %2, %3
-            %8 %1, %4, %5
-        %else
-            %7 %1, %2, %3
-            %8 %1, %4
-        %endif
-    %endmacro
-%endmacro
-
-PMA_EMU  PMACSWW,  pmacsww,  pmullw, paddw
-PMA_EMU  PMACSDD,  pmacsdd,  pmulld, paddd ; sse4 emulation
-PMA_EMU PMACSDQL, pmacsdql,  pmuldq, paddq ; sse4 emulation
-PMA_EMU PMADCSWD, pmadcswd, pmaddwd, paddd
-
 ; Wrapper for non-FMA version of fmaddps
 %macro FMULADD_PS 5
     %if cpuflag(fma3) || cpuflag(fma4)
diff --git a/libavutil/x86_cpu.h b/libavutil/x86_cpu.h
deleted file mode 100644
index bec1c777..00000000
--- a/libavutil/x86_cpu.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "libavutil/x86/asm.h"
diff --git a/libavutil/xtea.c b/libavutil/xtea.c
index 1750cbc4..2139aa55 100644
--- a/libavutil/xtea.c
+++ b/libavutil/xtea.c
@@ -31,8 +31,14 @@
 #include "avutil.h"
 #include "common.h"
 #include "intreadwrite.h"
+#include "mem.h"
 #include "xtea.h"
 
+AVXTEA *av_xtea_alloc(void)
+{
+    return av_mallocz(sizeof(struct AVXTEA));
+}
+
 void av_xtea_init(AVXTEA *ctx, const uint8_t key[16])
 {
     int i;
@@ -41,6 +47,14 @@ void av_xtea_init(AVXTEA *ctx, const uint8_t key[16])
         ctx->key[i] = AV_RB32(key + (i << 2));
 }
 
+void av_xtea_le_init(AVXTEA *ctx, const uint8_t key[16])
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+        ctx->key[i] = AV_RL32(key + (i << 2));
+}
+
 static void xtea_crypt_ecb(AVXTEA *ctx, uint8_t *dst, const uint8_t *src,
                            int decrypt, uint8_t *iv)
 {
@@ -161,14 +175,51 @@ static void xtea_crypt_ecb(AVXTEA *ctx, uint8_t *dst, const uint8_t *src,
     AV_WB32(dst + 4, v1);
 }
 
-void av_xtea_crypt(AVXTEA *ctx, uint8_t *dst, const uint8_t *src, int count,
-                   uint8_t *iv, int decrypt)
+static void xtea_le_crypt_ecb(AVXTEA *ctx, uint8_t *dst, const uint8_t *src,
+                              int decrypt, uint8_t *iv)
+{
+    uint32_t v0, v1;
+    int i;
+
+    v0 = AV_RL32(src);
+    v1 = AV_RL32(src + 4);
+
+    if (decrypt) {
+        uint32_t delta = 0x9E3779B9, sum = delta * 32;
+
+        for (i = 0; i < 32; i++) {
+            v1 -= (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + ctx->key[(sum >> 11) & 3]);
+            sum -= delta;
+            v0 -= (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + ctx->key[sum & 3]);
+        }
+        if (iv) {
+            v0 ^= AV_RL32(iv);
+            v1 ^= AV_RL32(iv + 4);
+            memcpy(iv, src, 8);
+        }
+    } else {
+        uint32_t sum = 0, delta = 0x9E3779B9;
+
+        for (i = 0; i < 32; i++) {
+            v0 += (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + ctx->key[sum & 3]);
+            sum += delta;
+            v1 += (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + ctx->key[(sum >> 11) & 3]);
+        }
+    }
+
+    AV_WL32(dst, v0);
+    AV_WL32(dst + 4, v1);
+}
+
+static void xtea_crypt(AVXTEA *ctx, uint8_t *dst, const uint8_t *src, int count,
+                       uint8_t *iv, int decrypt,
+                       void (*crypt)(AVXTEA *, uint8_t *, const uint8_t *, int, uint8_t *))
 {
     int i;
 
     if (decrypt) {
         while (count--) {
-            xtea_crypt_ecb(ctx, dst, src, decrypt, iv);
+            crypt(ctx, dst, src, decrypt, iv);
 
             src   += 8;
             dst   += 8;
@@ -178,10 +229,10 @@ void av_xtea_crypt(AVXTEA *ctx, uint8_t *dst, const uint8_t *src, int count,
             if (iv) {
                 for (i = 0; i < 8; i++)
                     dst[i] = src[i] ^ iv[i];
-                xtea_crypt_ecb(ctx, dst, dst, decrypt, NULL);
+                crypt(ctx, dst, dst, decrypt, NULL);
                 memcpy(iv, dst, 8);
             } else {
-                xtea_crypt_ecb(ctx, dst, src, decrypt, NULL);
+                crypt(ctx, dst, src, decrypt, NULL);
             }
             src   += 8;
             dst   += 8;
@@ -189,6 +240,18 @@ void av_xtea_crypt(AVXTEA *ctx, uint8_t *dst, const uint8_t *src, int count,
     }
 }
 
+void av_xtea_crypt(AVXTEA *ctx, uint8_t *dst, const uint8_t *src, int count,
+                   uint8_t *iv, int decrypt)
+{
+    xtea_crypt(ctx, dst, src, count, iv, decrypt, xtea_crypt_ecb);
+}
+
+void av_xtea_le_crypt(AVXTEA *ctx, uint8_t *dst, const uint8_t *src, int count,
+                      uint8_t *iv, int decrypt)
+{
+    xtea_crypt(ctx, dst, src, count, iv, decrypt, xtea_le_crypt_ecb);
+}
+
 #ifdef TEST
 #include <stdio.h>
 
@@ -229,9 +292,10 @@ static const uint8_t xtea_test_ct[XTEA_NUM_TESTS][8] = {
 
 static void test_xtea(AVXTEA *ctx, uint8_t *dst, const uint8_t *src,
                       const uint8_t *ref, int len, uint8_t *iv, int dir,
-                      const char *test)
+                      const char *test,
+                      void (*crypt)(AVXTEA *, uint8_t *, const uint8_t *, int, uint8_t *, int))
 {
-    av_xtea_crypt(ctx, dst, src, len, iv, dir);
+    crypt(ctx, dst, src, len, iv, dir);
     if (memcmp(dst, ref, 8*len)) {
         int i;
         printf("%s failed\ngot      ", test);
@@ -248,8 +312,8 @@ static void test_xtea(AVXTEA *ctx, uint8_t *dst, const uint8_t *src,
 int main(void)
 {
     AVXTEA ctx;
-    uint8_t buf[8], iv[8];
-    int i;
+    uint8_t buf[16], iv[8];
+    int i, j;
     static const uint8_t src[32] = "HelloWorldHelloWorldHelloWorld";
     uint8_t ct[32];
     uint8_t pl[32];
@@ -257,8 +321,18 @@ int main(void)
     for (i = 0; i < XTEA_NUM_TESTS; i++) {
         av_xtea_init(&ctx, xtea_test_key[i]);
 
-        test_xtea(&ctx, buf, xtea_test_pt[i], xtea_test_ct[i], 1, NULL, 0, "encryption");
-        test_xtea(&ctx, buf, xtea_test_ct[i], xtea_test_pt[i], 1, NULL, 1, "decryption");
+        test_xtea(&ctx, buf, xtea_test_pt[i], xtea_test_ct[i], 1, NULL, 0, "encryption", av_xtea_crypt);
+        test_xtea(&ctx, buf, xtea_test_ct[i], xtea_test_pt[i], 1, NULL, 1, "decryption", av_xtea_crypt);
+
+        for (j = 0; j < 4; j++)
+            AV_WL32(&buf[4*j], AV_RB32(&xtea_test_key[i][4*j]));
+        av_xtea_le_init(&ctx, buf);
+        for (j = 0; j < 2; j++) {
+            AV_WL32(&ct[4*j], AV_RB32(&xtea_test_ct[i][4*j]));
+            AV_WL32(&pl[4*j], AV_RB32(&xtea_test_pt[i][4*j]));
+        }
+        test_xtea(&ctx, buf, pl, ct, 1, NULL, 0, "encryption", av_xtea_le_crypt);
+        test_xtea(&ctx, buf, ct, pl, 1, NULL, 1, "decryption", av_xtea_le_crypt);
 
         /* encrypt */
         memcpy(iv, "HALLO123", 8);
@@ -266,10 +340,10 @@ int main(void)
 
         /* decrypt into pl */
         memcpy(iv, "HALLO123", 8);
-        test_xtea(&ctx, pl, ct, src, 4, iv, 1, "CBC decryption");
+        test_xtea(&ctx, pl, ct, src, 4, iv, 1, "CBC decryption", av_xtea_crypt);
 
         memcpy(iv, "HALLO123", 8);
-        test_xtea(&ctx, ct, ct, src, 4, iv, 1, "CBC inplace decryption");
+        test_xtea(&ctx, ct, ct, src, 4, iv, 1, "CBC inplace decryption", av_xtea_crypt);
     }
 
     printf("Test encryption/decryption success.\n");
diff --git a/libavutil/xtea.h b/libavutil/xtea.h
index 6f1e71e3..735427c1 100644
--- a/libavutil/xtea.h
+++ b/libavutil/xtea.h
@@ -36,16 +36,32 @@ typedef struct AVXTEA {
     uint32_t key[16];
 } AVXTEA;
 
+/**
+ * Allocate an AVXTEA context.
+ */
+AVXTEA *av_xtea_alloc(void);
+
 /**
  * Initialize an AVXTEA context.
  *
  * @param ctx an AVXTEA context
- * @param key a key of 16 bytes used for encryption/decryption
+ * @param key a key of 16 bytes used for encryption/decryption,
+ *            interpreted as big endian 32 bit numbers
  */
 void av_xtea_init(struct AVXTEA *ctx, const uint8_t key[16]);
 
 /**
- * Encrypt or decrypt a buffer using a previously initialized context.
+ * Initialize an AVXTEA context.
+ *
+ * @param ctx an AVXTEA context
+ * @param key a key of 16 bytes used for encryption/decryption,
+ *            interpreted as little endian 32 bit numbers
+ */
+void av_xtea_le_init(struct AVXTEA *ctx, const uint8_t key[16]);
+
+/**
+ * Encrypt or decrypt a buffer using a previously initialized context,
+ * in big endian format.
  *
  * @param ctx an AVXTEA context
  * @param dst destination array, can be equal to src
@@ -57,6 +73,20 @@ void av_xtea_init(struct AVXTEA *ctx, const uint8_t key[16]);
 void av_xtea_crypt(struct AVXTEA *ctx, uint8_t *dst, const uint8_t *src,
                    int count, uint8_t *iv, int decrypt);
 
+/**
+ * Encrypt or decrypt a buffer using a previously initialized context,
+ * in little endian format.
+ *
+ * @param ctx an AVXTEA context
+ * @param dst destination array, can be equal to src
+ * @param src source array, can be equal to dst
+ * @param count number of 8 byte blocks
+ * @param iv initialization vector for CBC mode, if NULL then ECB will be used
+ * @param decrypt 0 for encryption, 1 for decryption
+ */
+void av_xtea_le_crypt(struct AVXTEA *ctx, uint8_t *dst, const uint8_t *src,
+                      int count, uint8_t *iv, int decrypt);
+
 /**
  * @}
  */
diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index 3b86f939..1dc719cf 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -973,7 +973,7 @@ void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
         int i;
         const int count= FFMAX(mbHeight * absQPStride, mbWidth);
         for(i=0; i<(count>>2); i++){
-            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
+            AV_WN32(c->stdQPTable + (i<<2), AV_RN32(QP_store + (i<<2)) >> 1 & 0x7F7F7F7F);
         }
         for(i<<=2; i<count; i++){
             c->stdQPTable[i] = QP_store[i]>>1;
diff --git a/libpostproc/postprocess.h b/libpostproc/postprocess.h
index e00ed968..2b55ed67 100644
--- a/libpostproc/postprocess.h
+++ b/libpostproc/postprocess.h
@@ -51,7 +51,9 @@ const char *postproc_license(void);
 
 #define PP_QUALITY_MAX 6
 
-#define QP_STORE_T int8_t
+#if FF_API_QP_TYPE
+#define QP_STORE_T int8_t //deprecated
+#endif
 
 #include <inttypes.h>
 
@@ -69,7 +71,7 @@ extern const char pp_help[]; ///< a simple help text
 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
                      uint8_t * dst[3], const int dstStride[3],
                      int horizontalSize, int verticalSize,
-                     const QP_STORE_T *QP_store,  int QP_stride,
+                     const int8_t *QP_store,  int QP_stride,
                      pp_mode *mode, pp_context *ppContext, int pict_type);
 
 
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index c7ad3f03..b01be58d 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -1317,7 +1317,7 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
         "1:                        \n\t"
         : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
           NAMED_CONSTRAINTS_ADD(deringThreshold,b00,b02,b08)
-        : "%"REG_a, "%"REG_d, "%"REG_SP
+        : "%"REG_a, "%"REG_d, "%"REG_sp
     );
 #else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
     int y;
@@ -1383,7 +1383,7 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
 #ifdef DEBUG_DERING_THRESHOLD
                     __asm__ volatile("emms\n\t":);
                     {
-                    static long long numPixels=0;
+                    static uint64_t numPixels=0;
                     if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
 //                    if((max-min)<20 || (max-min)*QP<200)
 //                    if((max-min)*QP < 500)
diff --git a/libpostproc/version.h b/libpostproc/version.h
index 59c24660..8f625821 100644
--- a/libpostproc/version.h
+++ b/libpostproc/version.h
@@ -18,8 +18,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef POSTPROC_POSTPROCESS_VERSION_H
-#define POSTPROC_POSTPROCESS_VERSION_H
+#ifndef POSTPROC_VERSION_H
+#define POSTPROC_VERSION_H
 
 /**
  * @file
@@ -28,8 +28,8 @@
 
 #include "libavutil/avutil.h"
 
-#define LIBPOSTPROC_VERSION_MAJOR  53
-#define LIBPOSTPROC_VERSION_MINOR   3
+#define LIBPOSTPROC_VERSION_MAJOR  54
+#define LIBPOSTPROC_VERSION_MINOR   0
 #define LIBPOSTPROC_VERSION_MICRO 100
 
 #define LIBPOSTPROC_VERSION_INT AV_VERSION_INT(LIBPOSTPROC_VERSION_MAJOR, \
@@ -42,4 +42,8 @@
 
 #define LIBPOSTPROC_IDENT       "postproc" AV_STRINGIFY(LIBPOSTPROC_VERSION)
 
-#endif /* POSTPROC_POSTPROCESS_VERSION_H */
+#ifndef FF_API_QP_TYPE
+#define FF_API_QP_TYPE     (LIBPOSTPROC_VERSION_MAJOR < 55)
+#endif
+
+#endif /* POSTPROC_VERSION_H */
diff --git a/library.mak b/library.mak
index 29460b8e..3e1082c5 100644
--- a/library.mak
+++ b/library.mak
@@ -28,7 +28,7 @@ $(SUBDIR)x86/%$(DEFAULT_YASMD).asm: $(SUBDIR)x86/%.asm
 
 $(SUBDIR)x86/%.o: $(SUBDIR)x86/%$(YASMD).asm
 	$(DEPYASM) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d)
-	$(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $<
+	$(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<)
 	-$(if $(ASMSTRIPFLAGS), $(STRIP) $(ASMSTRIPFLAGS) $@)
 
 LIBOBJS := $(OBJS) $(SUBDIR)%.h.o $(TESTOBJS)
@@ -58,7 +58,7 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR)
 
 $(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS) $(SLIBOBJS) $(SUBDIR)lib$(NAME).ver
 	$(SLIB_CREATE_DEF_CMD)
-	$$(LD) $(SHFLAGS) $(LDFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS)
+	$$(LD) $(SHFLAGS) $(LDFLAGS) $(LDLIBFLAGS) $$(LD_O) $$(filter %.o,$$^) $(FFEXTRALIBS)
 	$(SLIB_EXTRA_CMD)
 
 ifdef SUBDIR
diff --git a/libswresample/audioconvert.h b/libswresample/audioconvert.h
index 2e983df2..1ca30c2a 100644
--- a/libswresample/audioconvert.h
+++ b/libswresample/audioconvert.h
@@ -20,8 +20,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef SWR_AUDIOCONVERT_H
-#define SWR_AUDIOCONVERT_H
+#ifndef SWRESAMPLE_AUDIOCONVERT_H
+#define SWRESAMPLE_AUDIOCONVERT_H
 
 /**
  * @file
@@ -75,4 +75,4 @@ void swri_audio_convert_free(AudioConvert **ctx);
  */
 int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len);
 
-#endif /* AUDIOCONVERT_H */
+#endif /* SWRESAMPLE_AUDIOCONVERT_H */
diff --git a/libswresample/dither.c b/libswresample/dither.c
index 248062aa..08c793d4 100644
--- a/libswresample/dither.c
+++ b/libswresample/dither.c
@@ -109,7 +109,7 @@ av_cold int swri_dither_init(SwrContext *s, enum AVSampleFormat out_fmt, enum AV
     memset(s->dither.ns_errors, 0, sizeof(s->dither.ns_errors));
     for (i=0; filters[i].coefs; i++) {
         const filter_t *f = &filters[i];
-        if (fabs(s->out_sample_rate - f->rate) / f->rate <= .05 && f->name == s->dither.method) {
+        if (llabs(s->out_sample_rate - f->rate)*20 <= f->rate && f->name == s->dither.method) {
             int j;
             s->dither.ns_taps = f->len;
             for (j=0; j<f->len; j++)
diff --git a/libswresample/dither_template.c b/libswresample/dither_template.c
index 1e35dfb0..1f535de3 100644
--- a/libswresample/dither_template.c
+++ b/libswresample/dither_template.c
@@ -1,3 +1,20 @@
+/*
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
 
 #if defined(TEMPLATE_DITHER_DBL)
 #    define RENAME(N) N ## _double
diff --git a/libswresample/options.c b/libswresample/options.c
index 1bc1a705..eac1a7c9 100644
--- a/libswresample/options.c
+++ b/libswresample/options.c
@@ -51,10 +51,10 @@ static const AVOption options[]={
 {"out_sample_fmt"       , "set output sample format"    , OFFSET(out_sample_fmt ), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1   , INT_MAX, PARAM},
 {"tsf"                  , "set internal sample format"  , OFFSET(user_int_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1   , INT_MAX, PARAM},
 {"internal_sample_fmt"  , "set internal sample format"  , OFFSET(user_int_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1   , INT_MAX, PARAM},
-{"icl"                  , "set input channel layout"    , OFFSET(user_in_ch_layout ), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, 0    , INT64_MAX , PARAM, "channel_layout"},
-{"in_channel_layout"    , "set input channel layout"    , OFFSET(user_in_ch_layout ), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, 0    , INT64_MAX , PARAM, "channel_layout"},
-{"ocl"                  , "set output channel layout"   , OFFSET(user_out_ch_layout), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, 0    , INT64_MAX , PARAM, "channel_layout"},
-{"out_channel_layout"   , "set output channel layout"   , OFFSET(user_out_ch_layout), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, 0    , INT64_MAX , PARAM, "channel_layout"},
+{"icl"                  , "set input channel layout"    , OFFSET(user_in_ch_layout ), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"},
+{"in_channel_layout"    , "set input channel layout"    , OFFSET(user_in_ch_layout ), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"},
+{"ocl"                  , "set output channel layout"   , OFFSET(user_out_ch_layout), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"},
+{"out_channel_layout"   , "set output channel layout"   , OFFSET(user_out_ch_layout), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"},
 {"clev"                 , "set center mix level"        , OFFSET(clev           ), AV_OPT_TYPE_FLOAT, {.dbl=C_30DB                }, -32    , 32        , PARAM},
 {"center_mix_level"     , "set center mix level"        , OFFSET(clev           ), AV_OPT_TYPE_FLOAT, {.dbl=C_30DB                }, -32    , 32        , PARAM},
 {"slev"                 , "set surround mix level"      , OFFSET(slev           ), AV_OPT_TYPE_FLOAT, {.dbl=C_30DB                }, -32    , 32        , PARAM},
@@ -74,17 +74,17 @@ static const AVOption options[]={
 {"rectangular"          , "select rectangular dither"   , 0                      , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_RECTANGULAR}, INT_MIN, INT_MAX   , PARAM, "dither_method"},
 {"triangular"           , "select triangular dither"    , 0                      , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_TRIANGULAR }, INT_MIN, INT_MAX   , PARAM, "dither_method"},
 {"triangular_hp"        , "select triangular dither with high pass" , 0          , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_TRIANGULAR_HIGHPASS }, INT_MIN, INT_MAX, PARAM, "dither_method"},
-{"lipshitz"             , "select lipshitz noise shaping dither" , 0             , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_LIPSHITZ}, INT_MIN, INT_MAX, PARAM, "dither_method"},
-{"shibata"              , "select shibata noise shaping dither" , 0              , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
-{"low_shibata"          , "select low shibata noise shaping dither" , 0          , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_LOW_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
-{"high_shibata"         , "select high shibata noise shaping dither" , 0         , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_HIGH_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"lipshitz"             , "select Lipshitz noise shaping dither" , 0             , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_LIPSHITZ}, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"shibata"              , "select Shibata noise shaping dither" , 0              , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"low_shibata"          , "select low Shibata noise shaping dither" , 0          , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_LOW_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"high_shibata"         , "select high Shibata noise shaping dither" , 0         , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_HIGH_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
 {"f_weighted"           , "select f-weighted noise shaping dither" , 0           , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_F_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"},
 {"modified_e_weighted"  , "select modified-e-weighted noise shaping dither" , 0  , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_MODIFIED_E_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"},
 {"improved_e_weighted"  , "select improved-e-weighted noise shaping dither" , 0  , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_IMPROVED_E_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"},
 
 {"filter_size"          , "set swr resampling filter size", OFFSET(filter_size)  , AV_OPT_TYPE_INT  , {.i64=32                    }, 0      , INT_MAX   , PARAM },
 {"phase_shift"          , "set swr resampling phase shift", OFFSET(phase_shift)  , AV_OPT_TYPE_INT  , {.i64=10                    }, 0      , 24        , PARAM },
-{"linear_interp"        , "enable linear interpolation" , OFFSET(linear_interp)  , AV_OPT_TYPE_INT  , {.i64=0                     }, 0      , 1         , PARAM },
+{"linear_interp"        , "enable linear interpolation" , OFFSET(linear_interp)  , AV_OPT_TYPE_BOOL , {.i64=0                     }, 0      , 1         , PARAM },
 {"cutoff"               , "set cutoff frequency ratio"  , OFFSET(cutoff)         , AV_OPT_TYPE_DOUBLE,{.dbl=0.                    }, 0      , 1         , PARAM },
 
 /* duplicate option in order to work with avconv */
@@ -96,7 +96,7 @@ static const AVOption options[]={
 {"precision"            , "set soxr resampling precision (in bits)"
                                                         , OFFSET(precision)      , AV_OPT_TYPE_DOUBLE,{.dbl=20.0                  }, 15.0   , 33.0      , PARAM },
 {"cheby"                , "enable soxr Chebyshev passband & higher-precision irrational ratio approximation"
-                                                        , OFFSET(cheby)          , AV_OPT_TYPE_INT  , {.i64=0                     }, 0      , 1         , PARAM },
+                                                        , OFFSET(cheby)          , AV_OPT_TYPE_BOOL , {.i64=0                     }, 0      , 1         , PARAM },
 {"min_comp"             , "set minimum difference between timestamps and audio data (in seconds) below which no timestamp compensation of either kind is applied"
                                                         , OFFSET(min_compensation),AV_OPT_TYPE_FLOAT ,{.dbl=FLT_MAX               }, 0      , FLT_MAX   , PARAM },
 {"min_hard_comp"        , "set minimum difference between timestamps and audio data (in seconds) to trigger padding/trimming the data."
@@ -117,10 +117,10 @@ static const AVOption options[]={
 
 { "filter_type"         , "select swr filter type"      , OFFSET(filter_type)    , AV_OPT_TYPE_INT  , { .i64 = SWR_FILTER_TYPE_KAISER }, SWR_FILTER_TYPE_CUBIC, SWR_FILTER_TYPE_KAISER, PARAM, "filter_type" },
     { "cubic"           , "select cubic"                , 0                      , AV_OPT_TYPE_CONST, { .i64 = SWR_FILTER_TYPE_CUBIC            }, INT_MIN, INT_MAX, PARAM, "filter_type" },
-    { "blackman_nuttall", "select Blackman Nuttall Windowed Sinc", 0             , AV_OPT_TYPE_CONST, { .i64 = SWR_FILTER_TYPE_BLACKMAN_NUTTALL }, INT_MIN, INT_MAX, PARAM, "filter_type" },
-    { "kaiser"          , "select Kaiser Windowed Sinc" , 0                      , AV_OPT_TYPE_CONST, { .i64 = SWR_FILTER_TYPE_KAISER           }, INT_MIN, INT_MAX, PARAM, "filter_type" },
+    { "blackman_nuttall", "select Blackman Nuttall windowed sinc", 0             , AV_OPT_TYPE_CONST, { .i64 = SWR_FILTER_TYPE_BLACKMAN_NUTTALL }, INT_MIN, INT_MAX, PARAM, "filter_type" },
+    { "kaiser"          , "select Kaiser windowed sinc" , 0                      , AV_OPT_TYPE_CONST, { .i64 = SWR_FILTER_TYPE_KAISER           }, INT_MIN, INT_MAX, PARAM, "filter_type" },
 
-{ "kaiser_beta"         , "set swr Kaiser Window Beta"  , OFFSET(kaiser_beta)    , AV_OPT_TYPE_INT  , {.i64=9                     }, 2      , 16        , PARAM },
+{ "kaiser_beta"         , "set swr Kaiser window beta"  , OFFSET(kaiser_beta)    , AV_OPT_TYPE_DOUBLE  , {.dbl=9                     }, 2      , 16        , PARAM },
 
 { "output_sample_bits"  , "set swr number of output sample bits", OFFSET(dither.output_sample_bits), AV_OPT_TYPE_INT  , {.i64=0   }, 0      , 64        , PARAM },
 {0}
diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
index 54ebb96b..932088ff 100644
--- a/libswresample/rematrix.c
+++ b/libswresample/rematrix.c
@@ -340,11 +340,16 @@ av_cold static int auto_matrix(SwrContext *s)
             }
     }
 
+    av_log(s, AV_LOG_DEBUG, "Matrix coefficients:\n");
     for(i=0; i<av_get_channel_layout_nb_channels(out_ch_layout); i++){
+        const char *c =
+            av_get_channel_name(av_channel_layout_extract_channel(out_ch_layout, i));
+        av_log(s, AV_LOG_DEBUG, "%s: ", c ? c : "?");
         for(j=0; j<av_get_channel_layout_nb_channels(in_ch_layout); j++){
-            av_log(NULL, AV_LOG_DEBUG, "%f ", s->matrix[i][j]);
+            c = av_get_channel_name(av_channel_layout_extract_channel(in_ch_layout, j));
+            av_log(s, AV_LOG_DEBUG, "%s:%f ", c ? c : "?", s->matrix[i][j]);
         }
-        av_log(NULL, AV_LOG_DEBUG, "\n");
+        av_log(s, AV_LOG_DEBUG, "\n");
     }
     return 0;
 }
diff --git a/libswresample/resample.c b/libswresample/resample.c
index 554fd7b5..7888e570 100644
--- a/libswresample/resample.c
+++ b/libswresample/resample.c
@@ -1,6 +1,7 @@
 /*
  * audio resampling
  * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at>
+ * bessel function: Copyright (c) 2006 Xiaogang Zhang
  *
  * This file is part of FFmpeg.
  *
@@ -28,35 +29,108 @@
 #include "libavutil/avassert.h"
 #include "resample.h"
 
+static inline double eval_poly(const double *coeff, int size, double x) {
+    double sum = coeff[size-1];
+    int i;
+    for (i = size-2; i >= 0; --i) {
+        sum *= x;
+        sum += coeff[i];
+    }
+    return sum;
+}
+
 /**
  * 0th order modified bessel function of the first kind.
+ * Algorithm taken from the Boost project, source:
+ * https://searchcode.com/codesearch/view/14918379/
+ * Use, modification and distribution are subject to the
+ * Boost Software License, Version 1.0 (see notice below).
+ * Boost Software License - Version 1.0 - August 17th, 2003
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
  */
-static double bessel(double x){
-    double v=1;
-    double lastv=0;
-    double t=1;
-    int i;
-    static const double inv[100]={
- 1.0/( 1* 1), 1.0/( 2* 2), 1.0/( 3* 3), 1.0/( 4* 4), 1.0/( 5* 5), 1.0/( 6* 6), 1.0/( 7* 7), 1.0/( 8* 8), 1.0/( 9* 9), 1.0/(10*10),
- 1.0/(11*11), 1.0/(12*12), 1.0/(13*13), 1.0/(14*14), 1.0/(15*15), 1.0/(16*16), 1.0/(17*17), 1.0/(18*18), 1.0/(19*19), 1.0/(20*20),
- 1.0/(21*21), 1.0/(22*22), 1.0/(23*23), 1.0/(24*24), 1.0/(25*25), 1.0/(26*26), 1.0/(27*27), 1.0/(28*28), 1.0/(29*29), 1.0/(30*30),
- 1.0/(31*31), 1.0/(32*32), 1.0/(33*33), 1.0/(34*34), 1.0/(35*35), 1.0/(36*36), 1.0/(37*37), 1.0/(38*38), 1.0/(39*39), 1.0/(40*40),
- 1.0/(41*41), 1.0/(42*42), 1.0/(43*43), 1.0/(44*44), 1.0/(45*45), 1.0/(46*46), 1.0/(47*47), 1.0/(48*48), 1.0/(49*49), 1.0/(50*50),
- 1.0/(51*51), 1.0/(52*52), 1.0/(53*53), 1.0/(54*54), 1.0/(55*55), 1.0/(56*56), 1.0/(57*57), 1.0/(58*58), 1.0/(59*59), 1.0/(60*60),
- 1.0/(61*61), 1.0/(62*62), 1.0/(63*63), 1.0/(64*64), 1.0/(65*65), 1.0/(66*66), 1.0/(67*67), 1.0/(68*68), 1.0/(69*69), 1.0/(70*70),
- 1.0/(71*71), 1.0/(72*72), 1.0/(73*73), 1.0/(74*74), 1.0/(75*75), 1.0/(76*76), 1.0/(77*77), 1.0/(78*78), 1.0/(79*79), 1.0/(80*80),
- 1.0/(81*81), 1.0/(82*82), 1.0/(83*83), 1.0/(84*84), 1.0/(85*85), 1.0/(86*86), 1.0/(87*87), 1.0/(88*88), 1.0/(89*89), 1.0/(90*90),
- 1.0/(91*91), 1.0/(92*92), 1.0/(93*93), 1.0/(94*94), 1.0/(95*95), 1.0/(96*96), 1.0/(97*97), 1.0/(98*98), 1.0/(99*99), 1.0/(10000)
-    };
 
-    x= x*x/4;
-    for(i=0; v != lastv; i++){
-        lastv=v;
-        t *= x*inv[i];
-        v += t;
-        av_assert2(i<99);
+static double bessel(double x) {
+// Modified Bessel function of the first kind of order zero
+// minimax rational approximations on intervals, see
+// Blair and Edwards, Chalk River Report AECL-4928, 1974
+    static const double p1[] = {
+        -2.2335582639474375249e+15,
+        -5.5050369673018427753e+14,
+        -3.2940087627407749166e+13,
+        -8.4925101247114157499e+11,
+        -1.1912746104985237192e+10,
+        -1.0313066708737980747e+08,
+        -5.9545626019847898221e+05,
+        -2.4125195876041896775e+03,
+        -7.0935347449210549190e+00,
+        -1.5453977791786851041e-02,
+        -2.5172644670688975051e-05,
+        -3.0517226450451067446e-08,
+        -2.6843448573468483278e-11,
+        -1.5982226675653184646e-14,
+        -5.2487866627945699800e-18,
+    };
+    static const double q1[] = {
+        -2.2335582639474375245e+15,
+         7.8858692566751002988e+12,
+        -1.2207067397808979846e+10,
+         1.0377081058062166144e+07,
+        -4.8527560179962773045e+03,
+         1.0,
+    };
+    static const double p2[] = {
+        -2.2210262233306573296e-04,
+         1.3067392038106924055e-02,
+        -4.4700805721174453923e-01,
+         5.5674518371240761397e+00,
+        -2.3517945679239481621e+01,
+         3.1611322818701131207e+01,
+        -9.6090021968656180000e+00,
+    };
+    static const double q2[] = {
+        -5.5194330231005480228e-04,
+         3.2547697594819615062e-02,
+        -1.1151759188741312645e+00,
+         1.3982595353892851542e+01,
+        -6.0228002066743340583e+01,
+         8.5539563258012929600e+01,
+        -3.1446690275135491500e+01,
+        1.0,
+    };
+    double y, r, factor;
+    if (x == 0)
+        return 1.0;
+    x = fabs(x);
+    if (x <= 15) {
+        y = x * x;
+        return eval_poly(p1, FF_ARRAY_ELEMS(p1), y) / eval_poly(q1, FF_ARRAY_ELEMS(q1), y);
+    }
+    else {
+        y = 1 / x - 1.0 / 15;
+        r = eval_poly(p2, FF_ARRAY_ELEMS(p2), y) / eval_poly(q2, FF_ARRAY_ELEMS(q2), y);
+        factor = exp(x) / sqrt(x);
+        return factor * r;
     }
-    return v;
 }
 
 /**
@@ -68,25 +142,36 @@ static double bessel(double x){
  * @return 0 on success, negative on error
  */
 static int build_filter(ResampleContext *c, void *filter, double factor, int tap_count, int alloc, int phase_count, int scale,
-                        int filter_type, int kaiser_beta){
+                        int filter_type, double kaiser_beta){
     int ph, i;
-    double x, y, w;
-    double *tab = av_malloc_array(tap_count,  sizeof(*tab));
+    double x, y, w, t, s;
+    double *tab = av_malloc_array(tap_count+1,  sizeof(*tab));
+    double *sin_lut = av_malloc_array(phase_count / 2 + 1, sizeof(*sin_lut));
     const int center= (tap_count-1)/2;
 
-    if (!tab)
-        return AVERROR(ENOMEM);
+    if (!tab || !sin_lut)
+        goto fail;
 
     /* if upsampling, only need to interpolate, no filter */
     if (factor > 1.0)
         factor = 1.0;
 
-    for(ph=0;ph<phase_count;ph++) {
+    av_assert0(phase_count == 1 || phase_count % 2 == 0);
+
+    if (factor == 1.0) {
+        for (ph = 0; ph <= phase_count / 2; ph++)
+            sin_lut[ph] = sin(M_PI * ph / phase_count);
+    }
+    for(ph = 0; ph <= phase_count / 2; ph++) {
         double norm = 0;
-        for(i=0;i<tap_count;i++) {
+        s = sin_lut[ph];
+        for(i=0;i<=tap_count;i++) {
             x = M_PI * ((double)(i - center) - (double)ph / phase_count) * factor;
             if (x == 0) y = 1.0;
-            else        y = sin(x) / x;
+            else if (factor == 1.0)
+                y = s / x;
+            else
+                y = sin(x) / x;
             switch(filter_type){
             case SWR_FILTER_TYPE_CUBIC:{
                 const float d= -0.5; //first order derivative = -0.5
@@ -95,8 +180,9 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap
                 else      y=                       d*(-4 + 8*x - 5*x*x + x*x*x);
                 break;}
             case SWR_FILTER_TYPE_BLACKMAN_NUTTALL:
-                w = 2.0*x / (factor*tap_count) + M_PI;
-                y *= 0.3635819 - 0.4891775 * cos(w) + 0.1365995 * cos(2*w) - 0.0106411 * cos(3*w);
+                w = 2.0*x / (factor*tap_count);
+                t = -cos(w);
+                y *= 0.3635819 - 0.4891775 * t + 0.1365995 * (2*t*t-1) - 0.0106411 * (4*t*t*t - 3*t);
                 break;
             case SWR_FILTER_TYPE_KAISER:
                 w = 2.0*x / (factor*tap_count*M_PI);
@@ -107,26 +193,62 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap
             }
 
             tab[i] = y;
-            norm += y;
+            s = -s;
+            if (i < tap_count)
+                norm += y;
         }
 
         /* normalize so that an uniform color remains the same */
         switch(c->format){
         case AV_SAMPLE_FMT_S16P:
             for(i=0;i<tap_count;i++)
-                ((int16_t*)filter)[ph * alloc + i] = av_clip(lrintf(tab[i] * scale / norm), INT16_MIN, INT16_MAX);
+                ((int16_t*)filter)[ph * alloc + i] = av_clip_int16(lrintf(tab[i] * scale / norm));
+            if (tap_count % 2 == 0) {
+                for (i = 0; i < tap_count; i++)
+                    ((int16_t*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((int16_t*)filter)[ph * alloc + i];
+            }
+            else {
+                for (i = 1; i <= tap_count; i++)
+                    ((int16_t*)filter)[(phase_count-ph) * alloc + tap_count-i] =
+                        av_clip_int16(lrintf(tab[i] * scale / (norm - tab[0] + tab[tap_count])));
+            }
             break;
         case AV_SAMPLE_FMT_S32P:
             for(i=0;i<tap_count;i++)
                 ((int32_t*)filter)[ph * alloc + i] = av_clipl_int32(llrint(tab[i] * scale / norm));
+            if (tap_count % 2 == 0) {
+                for (i = 0; i < tap_count; i++)
+                    ((int32_t*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((int32_t*)filter)[ph * alloc + i];
+            }
+            else {
+                for (i = 1; i <= tap_count; i++)
+                    ((int32_t*)filter)[(phase_count-ph) * alloc + tap_count-i] =
+                        av_clipl_int32(llrint(tab[i] * scale / (norm - tab[0] + tab[tap_count])));
+            }
             break;
         case AV_SAMPLE_FMT_FLTP:
             for(i=0;i<tap_count;i++)
                 ((float*)filter)[ph * alloc + i] = tab[i] * scale / norm;
+            if (tap_count % 2 == 0) {
+                for (i = 0; i < tap_count; i++)
+                    ((float*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((float*)filter)[ph * alloc + i];
+            }
+            else {
+                for (i = 1; i <= tap_count; i++)
+                    ((float*)filter)[(phase_count-ph) * alloc + tap_count-i] = tab[i] * scale / (norm - tab[0] + tab[tap_count]);
+            }
             break;
         case AV_SAMPLE_FMT_DBLP:
             for(i=0;i<tap_count;i++)
                 ((double*)filter)[ph * alloc + i] = tab[i] * scale / norm;
+            if (tap_count % 2 == 0) {
+                for (i = 0; i < tap_count; i++)
+                    ((double*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((double*)filter)[ph * alloc + i];
+            }
+            else {
+                for (i = 1; i <= tap_count; i++)
+                    ((double*)filter)[(phase_count-ph) * alloc + tap_count-i] = tab[i] * scale / (norm - tab[0] + tab[tap_count]);
+            }
             break;
         }
     }
@@ -167,12 +289,14 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap
     }
 #endif
 
+fail:
     av_free(tab);
+    av_free(sin_lut);
     return 0;
 }
 
 static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear,
-                                    double cutoff0, enum AVSampleFormat format, enum SwrFilterType filter_type, int kaiser_beta,
+                                    double cutoff0, enum AVSampleFormat format, enum SwrFilterType filter_type, double kaiser_beta,
                                     double precision, int cheby)
 {
     double cutoff = cutoff0? cutoff0 : 0.97;
@@ -231,6 +355,10 @@ static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_r
     c->compensation_distance= 0;
     if(!av_reduce(&c->src_incr, &c->dst_incr, out_rate, in_rate * (int64_t)phase_count, INT32_MAX/2))
         goto error;
+    while (c->dst_incr < (1<<20) && c->src_incr < (1<<20)) {
+        c->dst_incr *= 2;
+        c->src_incr *= 2;
+    }
     c->ideal_dst_incr = c->dst_incr;
     c->dst_incr_div   = c->dst_incr / c->src_incr;
     c->dst_incr_mod   = c->dst_incr % c->src_incr;
diff --git a/libswresample/resample.h b/libswresample/resample.h
index 99a89b79..a126b116 100644
--- a/libswresample/resample.h
+++ b/libswresample/resample.h
@@ -44,7 +44,7 @@ typedef struct ResampleContext {
     int phase_mask;
     int linear;
     enum SwrFilterType filter_type;
-    int kaiser_beta;
+    double kaiser_beta;
     double factor;
     enum AVSampleFormat format;
     int felem_size;
diff --git a/libswresample/soxr_resample.c b/libswresample/soxr_resample.c
index 535e9cee..807f5084 100644
--- a/libswresample/soxr_resample.c
+++ b/libswresample/soxr_resample.c
@@ -30,7 +30,7 @@
 #include <soxr.h>
 
 static struct ResampleContext *create(struct ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear,
-        double cutoff, enum AVSampleFormat format, enum SwrFilterType filter_type, int kaiser_beta, double precision, int cheby){
+        double cutoff, enum AVSampleFormat format, enum SwrFilterType filter_type, double kaiser_beta, double precision, int cheby){
     soxr_error_t error;
 
     soxr_datatype_t type =
diff --git a/libswresample/swresample-test.c b/libswresample/swresample-test.c
index 7e2854da..0aa47c8d 100644
--- a/libswresample/swresample-test.c
+++ b/libswresample/swresample-test.c
@@ -108,7 +108,7 @@ static const int rates[] = {
     48000,
 };
 
-uint64_t layouts[]={
+static const uint64_t layouts[]={
     AV_CH_LAYOUT_MONO                    ,
     AV_CH_LAYOUT_STEREO                  ,
     AV_CH_LAYOUT_2_1                     ,
@@ -138,8 +138,8 @@ static void setup_array(uint8_t *out[SWR_CH_MAX], uint8_t *in, enum AVSampleForm
     }
 }
 
-static int cmp(const int *a, const int *b){
-    return *a - *b;
+static int cmp(const void *a, const void *b){
+    return *(const int *)a - *(const int *)b;
 }
 
 static void audiogen(void *data, enum AVSampleFormat sample_fmt,
@@ -271,7 +271,7 @@ int main(int argc, char **argv){
         r = (seed * (uint64_t)(max_tests - test)) >>32;
         FFSWAP(int, remaining_tests[r], remaining_tests[max_tests - test - 1]);
     }
-    qsort(remaining_tests + max_tests - num_tests, num_tests, sizeof(remaining_tests[0]), (void*)cmp);
+    qsort(remaining_tests + max_tests - num_tests, num_tests, sizeof(remaining_tests[0]), cmp);
     in_sample_rate=16000;
     for(test=0; test<num_tests; test++){
         char  in_layout_string[256];
@@ -374,7 +374,7 @@ int main(int argc, char **argv){
                 sum_aa+= a*a;
                 sum_bb+= b*b;
                 sum_ab+= a*b;
-                maxdiff= FFMAX(maxdiff, FFABS(a-b));
+                maxdiff= FFMAX(maxdiff, fabs(a-b));
             }
             sse= sum_aa + sum_bb - 2*sum_ab;
             if(sse < 0 && sse > -0.00001) sse=0; //fix rounding error
@@ -404,7 +404,7 @@ int main(int argc, char **argv){
                     sum_aa+= a*a;
                     sum_bb+= b*b;
                     sum_ab+= a*b;
-                    maxdiff= FFMAX(maxdiff, FFABS(a-b));
+                    maxdiff= FFMAX(maxdiff, fabs(a-b));
                 }
                 sse= sum_aa + sum_bb - 2*sum_ab;
                 if(sse < 0 && sse > -0.00001) sse=0; //fix rounding error
diff --git a/libswresample/swresample.c b/libswresample/swresample.c
index 693a403f..8e238998 100644
--- a/libswresample/swresample.c
+++ b/libswresample/swresample.c
@@ -133,6 +133,7 @@ static void clear_context(SwrContext *s){
     swri_audio_convert_free(&s->full_convert);
     swri_rematrix_free(s);
 
+    s->delayed_samples_fixup = 0;
     s->flushed = 0;
 }
 
@@ -212,7 +213,13 @@ av_cold int swr_init(struct SwrContext *s){
                  s->rematrix_custom;
 
     if(s->int_sample_fmt == AV_SAMPLE_FMT_NONE){
-        if(av_get_planar_sample_fmt(s->in_sample_fmt) <= AV_SAMPLE_FMT_S16P){
+        if(   av_get_planar_sample_fmt(s-> in_sample_fmt) <= AV_SAMPLE_FMT_S16P
+           && av_get_planar_sample_fmt(s->out_sample_fmt) <= AV_SAMPLE_FMT_S16P){
+            s->int_sample_fmt= AV_SAMPLE_FMT_S16P;
+        }else if(   av_get_planar_sample_fmt(s-> in_sample_fmt) <= AV_SAMPLE_FMT_S16P
+           && !s->rematrix
+           && s->out_sample_rate==s->in_sample_rate
+           && !(s->flags & SWR_FLAG_RESAMPLE)){
             s->int_sample_fmt= AV_SAMPLE_FMT_S16P;
         }else if(   av_get_planar_sample_fmt(s-> in_sample_fmt) == AV_SAMPLE_FMT_S32P
                  && av_get_planar_sample_fmt(s->out_sample_fmt) == AV_SAMPLE_FMT_S32P
@@ -222,10 +229,10 @@ av_cold int swr_init(struct SwrContext *s){
         }else if(av_get_planar_sample_fmt(s->in_sample_fmt) <= AV_SAMPLE_FMT_FLTP){
             s->int_sample_fmt= AV_SAMPLE_FMT_FLTP;
         }else{
-            av_log(s, AV_LOG_DEBUG, "Using double precision mode\n");
             s->int_sample_fmt= AV_SAMPLE_FMT_DBLP;
         }
     }
+    av_log(s, AV_LOG_DEBUG, "Using %s internally between filters\n", av_get_sample_fmt_name(s->int_sample_fmt));
 
     if(   s->int_sample_fmt != AV_SAMPLE_FMT_S16P
         &&s->int_sample_fmt != AV_SAMPLE_FMT_S32P
@@ -643,7 +650,7 @@ static int swr_convert_internal(struct SwrContext *s, AudioData *out, int out_co
                 return ret;
             if(ret)
                 for(ch=0; ch<s->dither.noise.ch_count; ch++)
-                    if((ret=swri_get_dither(s, s->dither.noise.ch[ch], s->dither.noise.count, 12345678913579<<ch, s->dither.noise.fmt))<0)
+                    if((ret=swri_get_dither(s, s->dither.noise.ch[ch], s->dither.noise.count, (12345678913579ULL*ch + 3141592) % 2718281828U, s->dither.noise.fmt))<0)
                         return ret;
             av_assert0(s->dither.noise.ch_count == preout->ch_count);
 
diff --git a/libswresample/swresample.h b/libswresample/swresample.h
index e1617f47..10eaebc4 100644
--- a/libswresample/swresample.h
+++ b/libswresample/swresample.h
@@ -169,8 +169,8 @@ enum SwrEngine {
 /** Resampling Filter Types */
 enum SwrFilterType {
     SWR_FILTER_TYPE_CUBIC,              /**< Cubic */
-    SWR_FILTER_TYPE_BLACKMAN_NUTTALL,   /**< Blackman Nuttall Windowed Sinc */
-    SWR_FILTER_TYPE_KAISER,             /**< Kaiser Windowed Sinc */
+    SWR_FILTER_TYPE_BLACKMAN_NUTTALL,   /**< Blackman Nuttall windowed sinc */
+    SWR_FILTER_TYPE_KAISER,             /**< Kaiser windowed sinc */
 };
 
 /**
diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h
index e36faf11..5d61de47 100644
--- a/libswresample/swresample_internal.h
+++ b/libswresample/swresample_internal.h
@@ -18,14 +18,14 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef SWR_INTERNAL_H
-#define SWR_INTERNAL_H
+#ifndef SWRESAMPLE_SWRESAMPLE_INTERNAL_H
+#define SWRESAMPLE_SWRESAMPLE_INTERNAL_H
 
 #include "swresample.h"
 #include "libavutil/channel_layout.h"
 #include "config.h"
 
-#define SWR_CH_MAX 32
+#define SWR_CH_MAX 64
 
 #define SQRT3_2      1.22474487139158904909  /* sqrt(3/2) */
 
@@ -69,7 +69,7 @@ struct DitherContext {
 };
 
 typedef struct ResampleContext * (* resample_init_func)(struct ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear,
-                                    double cutoff, enum AVSampleFormat format, enum SwrFilterType filter_type, int kaiser_beta, double precision, int cheby);
+                                    double cutoff, enum AVSampleFormat format, enum SwrFilterType filter_type, double kaiser_beta, double precision, int cheby);
 typedef void    (* resample_free_func)(struct ResampleContext **c);
 typedef int     (* multiple_resample_func)(struct ResampleContext *c, AudioData *dst, int dst_size, AudioData *src, int src_size, int *consumed);
 typedef int     (* resample_flush_func)(struct SwrContext *c);
@@ -128,7 +128,7 @@ struct SwrContext {
     int linear_interp;                              /**< if 1 then the resampling FIR filter will be linearly interpolated */
     double cutoff;                                  /**< resampling cutoff frequency (swr: 6dB point; soxr: 0dB point). 1.0 corresponds to half the output sample rate */
     int filter_type;                                /**< swr resampling filter type */
-    int kaiser_beta;                                /**< swr beta value for Kaiser window (only applicable if filter_type == AV_FILTER_TYPE_KAISER) */
+    double kaiser_beta;                                /**< swr beta value for Kaiser window (only applicable if filter_type == AV_FILTER_TYPE_KAISER) */
     double precision;                               /**< soxr resampling precision (in bits) */
     int cheby;                                      /**< soxr: if 1 then passband rolloff will be none (Chebyshev) & irrational ratio approximation precision will be higher */
 
@@ -184,6 +184,7 @@ struct SwrContext {
     /* TODO: callbacks for ASM optimizations */
 };
 
+av_warn_unused_result
 int swri_realloc_audio(AudioData *a, int count);
 
 void swri_noise_shaping_int16 (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count);
@@ -191,12 +192,15 @@ void swri_noise_shaping_int32 (SwrContext *s, AudioData *dsts, const AudioData *
 void swri_noise_shaping_float (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count);
 void swri_noise_shaping_double(SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count);
 
+av_warn_unused_result
 int swri_rematrix_init(SwrContext *s);
 void swri_rematrix_free(SwrContext *s);
 int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy);
 int swri_rematrix_init_x86(struct SwrContext *s);
 
+av_warn_unused_result
 int swri_get_dither(SwrContext *s, void *dst, int len, unsigned seed, enum AVSampleFormat noise_fmt);
+av_warn_unused_result
 int swri_dither_init(SwrContext *s, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt);
 
 void swri_audio_convert_init_aarch64(struct AudioConvert *ac,
diff --git a/libswresample/version.h b/libswresample/version.h
index 94ac9c52..830f00e4 100644
--- a/libswresample/version.h
+++ b/libswresample/version.h
@@ -18,8 +18,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef SWR_VERSION_H
-#define SWR_VERSION_H
+#ifndef SWRESAMPLE_VERSION_H
+#define SWRESAMPLE_VERSION_H
 
 /**
  * @file
@@ -28,9 +28,9 @@
 
 #include "libavutil/avutil.h"
 
-#define LIBSWRESAMPLE_VERSION_MAJOR   1
-#define LIBSWRESAMPLE_VERSION_MINOR   2
-#define LIBSWRESAMPLE_VERSION_MICRO 100
+#define LIBSWRESAMPLE_VERSION_MAJOR   2
+#define LIBSWRESAMPLE_VERSION_MINOR   0
+#define LIBSWRESAMPLE_VERSION_MICRO 101
 
 #define LIBSWRESAMPLE_VERSION_INT  AV_VERSION_INT(LIBSWRESAMPLE_VERSION_MAJOR, \
                                                   LIBSWRESAMPLE_VERSION_MINOR, \
@@ -42,4 +42,4 @@
 
 #define LIBSWRESAMPLE_IDENT        "SwR" AV_STRINGIFY(LIBSWRESAMPLE_VERSION)
 
-#endif /* SWR_VERSION_H */
+#endif /* SWRESAMPLE_VERSION_H */
diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
index 69e4f053..d441636d 100644
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -44,7 +44,7 @@ cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2
     test src2q, mmsize-1
         jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
 %else
-pack_2ch_%2_to_%1_u_int %+ SUFFIX
+pack_2ch_%2_to_%1_u_int %+ SUFFIX:
 %endif
     lea     srcq , [srcq  + (1<<%5)*lenq]
     lea     src2q, [src2q + (1<<%5)*lenq]
@@ -101,7 +101,7 @@ cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2
     test dst2q, mmsize-1
         jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
 %else
-unpack_2ch_%2_to_%1_u_int %+ SUFFIX
+unpack_2ch_%2_to_%1_u_int %+ SUFFIX:
 %endif
     lea     srcq , [srcq  + (2<<%5)*lenq]
     lea     dstq , [dstq  + (1<<%4)*lenq]
@@ -170,7 +170,7 @@ cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
     test srcq, mmsize-1
         jne %2_to_%1_u_int %+ SUFFIX
 %else
-%2_to_%1_u_int %+ SUFFIX
+%2_to_%1_u_int %+ SUFFIX:
 %endif
     lea     srcq , [srcq  + (1<<%5)*lenq]
     lea     dstq , [dstq  + (1<<%4)*lenq]
@@ -202,8 +202,8 @@ cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
 %endif
 %endmacro
 
-%macro PACK_6CH 5-7
-cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
+%macro PACK_6CH 8
+cglobal pack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, src1, src2, src3, src4, src5, len
 %if ARCH_X86_64
     mov     lend, r2d
 %else
@@ -232,14 +232,14 @@ cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
     test src5q, mmsize-1
         jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
 %else
-pack_6ch_%2_to_%1_u_int %+ SUFFIX
+pack_6ch_%2_to_%1_u_int %+ SUFFIX:
 %endif
     sub    src1q, srcq
     sub    src2q, srcq
     sub    src3q, srcq
     sub    src4q, srcq
     sub    src5q, srcq
-    %7 x,x,x,x,m7,x
+    %8 x,x,x,x,m7,x
 .loop:
     mov%3     m0, [srcq      ]
     mov%3     m1, [srcq+src1q]
@@ -271,9 +271,9 @@ pack_6ch_%2_to_%1_u_int %+ SUFFIX
     movlhps   m1, m3
     movhlps   m5, m3
 
-    %6 m0,m6,x,x,m7,m3
-    %6 m4,m1,x,x,m7,m3
-    %6 m2,m5,x,x,m7,m3
+    %7 m0,m6,x,x,m7,m3
+    %7 m4,m1,x,x,m7,m3
+    %7 m2,m5,x,x,m7,m3
 
     mov %+ %3 %+ ps [dstq   ], m0
     mov %+ %3 %+ ps [dstq+16], m6
@@ -305,8 +305,8 @@ pack_6ch_%2_to_%1_u_int %+ SUFFIX
 %endif
 %endmacro
 
-%macro UNPACK_6CH 5-7
-cglobal unpack_6ch_%2_to_%1_%3, 2, 8, 8, dst, src, dst1, dst2, dst3, dst4, dst5, len
+%macro UNPACK_6CH 8
+cglobal unpack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, dst1, dst2, dst3, dst4, dst5, len
 %if ARCH_X86_64
     mov     lend, r2d
 %else
@@ -335,14 +335,14 @@ cglobal unpack_6ch_%2_to_%1_%3, 2, 8, 8, dst, src, dst1, dst2, dst3, dst4, dst5,
     test dst5q, mmsize-1
         jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
 %else
-unpack_6ch_%2_to_%1_u_int %+ SUFFIX
+unpack_6ch_%2_to_%1_u_int %+ SUFFIX:
 %endif
     sub    dst1q, dstq
     sub    dst2q, dstq
     sub    dst3q, dstq
     sub    dst4q, dstq
     sub    dst5q, dstq
-    %7 x,x,x,x,m7,x
+    %8 x,x,x,x,m7,x
 .loop:
     mov%3     m0, [srcq   ]
     mov%3     m1, [srcq+16]
@@ -360,9 +360,9 @@ unpack_6ch_%2_to_%1_u_int %+ SUFFIX
     SWAP 1, 4
     SWAP 2, 3
 
-    %6 m0,m1,x,x,m7,m6
-    %6 m2,m3,x,x,m7,m6
-    %6 m4,m5,x,x,m7,m6
+    %7 m0,m1,x,x,m7,m6
+    %7 m2,m3,x,x,m7,m6
+    %7 m4,m5,x,x,m7,m6
 
     mov %+ %3 %+ ps [dstq      ], m0
     mov %+ %3 %+ ps [dstq+dst1q], m1
@@ -380,8 +380,8 @@ unpack_6ch_%2_to_%1_u_int %+ SUFFIX
 
 %define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32)
 
-%macro PACK_8CH 5-7
-cglobal pack_8ch_%2_to_%1_%3, 2,PACK_8CH_GPRS,10, ARCH_X86_32*48, dst, src, len, src1, src2, src3, src4, src5, src6, src7
+%macro PACK_8CH 8
+cglobal pack_8ch_%2_to_%1_%3, 2, PACK_8CH_GPRS, %6, ARCH_X86_32*48, dst, src, len, src1, src2, src3, src4, src5, src6, src7
     mov     dstq, [dstq]
 %if ARCH_X86_32
     DEFINE_ARGS dst, src, src2, src3, src4, src5, src6
@@ -443,7 +443,7 @@ cglobal pack_8ch_%2_to_%1_%3, 2,PACK_8CH_GPRS,10, ARCH_X86_32*48, dst, src, len,
 %endif
         jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
 %else
-pack_8ch_%2_to_%1_u_int %+ SUFFIX
+pack_8ch_%2_to_%1_u_int %+ SUFFIX:
 %endif
     sub    src1q, srcq
     sub    src2q, srcq
@@ -463,7 +463,7 @@ pack_8ch_%2_to_%1_u_int %+ SUFFIX
 %endif
 
 %if ARCH_X86_64
-    %7 x,x,x,x,m9,x
+    %8 x,x,x,x,m9,x
 %elifidn %1, int32
     %define m9 [flt2p31]
 %else
@@ -489,10 +489,10 @@ pack_8ch_%2_to_%1_u_int %+ SUFFIX
 %if ARCH_X86_64
     TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8
 
-    %6 m0,m1,x,x,m9,m8
-    %6 m2,m3,x,x,m9,m8
-    %6 m4,m5,x,x,m9,m8
-    %6 m6,m7,x,x,m9,m8
+    %7 m0,m1,x,x,m9,m8
+    %7 m2,m3,x,x,m9,m8
+    %7 m4,m5,x,x,m9,m8
+    %7 m6,m7,x,x,m9,m8
 
     mov%3 [dstq], m0
 %else
@@ -500,12 +500,12 @@ pack_8ch_%2_to_%1_u_int %+ SUFFIX
 
     TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp+16], 1
 
-    %6 m0,m1,x,x,m9,m2
+    %7 m0,m1,x,x,m9,m2
     mova     m2, [rsp]
     mov%3   [dstq], m0
-    %6 m2,m3,x,x,m9,m0
-    %6 m4,m5,x,x,m9,m0
-    %6 m6,m7,x,x,m9,m0
+    %7 m2,m3,x,x,m9,m0
+    %7 m4,m5,x,x,m9,m0
+    %7 m6,m7,x,x,m9,m0
 
 %endif
 
@@ -614,15 +614,15 @@ CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
 CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
 CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
 
-PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
-PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
+PACK_6CH float, float, u, 2, 2, 0, NOP_N, NOP_N
+PACK_6CH float, float, a, 2, 2, 0, NOP_N, NOP_N
 
 INIT_XMM sse
-PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
-PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
+PACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
+PACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
 
-UNPACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
-UNPACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
+UNPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
+UNPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
 
 INIT_XMM sse2
 CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
@@ -675,23 +675,23 @@ UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
 UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
 UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
 
-PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
-PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
 
-UNPACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-UNPACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-UNPACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
-UNPACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
 
-PACK_8CH float, float, u, 2, 2, NOP_N, NOP_N
-PACK_8CH float, float, a, 2, 2, NOP_N, NOP_N
+PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
+PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
 
-PACK_8CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-PACK_8CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-PACK_8CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
-PACK_8CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
 
 INIT_XMM ssse3
 UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
@@ -703,29 +703,29 @@ UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
 
 %if HAVE_AVX_EXTERNAL
 INIT_XMM avx
-PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
-PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
+PACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
+PACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
 
-UNPACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
-UNPACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
+UNPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
+UNPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
 
-PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
-PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
 
-UNPACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-UNPACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-UNPACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
-UNPACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
 
-PACK_8CH float, float, u, 2, 2, NOP_N, NOP_N
-PACK_8CH float, float, a, 2, 2, NOP_N, NOP_N
+PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
+PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
 
-PACK_8CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-PACK_8CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
-PACK_8CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
-PACK_8CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
 
 INIT_YMM avx
 CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c
index 5e5e91d1..bb89cf60 100644
--- a/libswresample/x86/audio_convert_init.c
+++ b/libswresample/x86/audio_convert_init.c
@@ -174,7 +174,7 @@ MULTI_CAPS_FUNC(SSE2, sse2)
                 ac->simd_f =  ff_pack_8ch_float_to_int32_a_avx;
         }
     }
-    if(EXTERNAL_AVX2(mm_flags)) {
+    if(EXTERNAL_AVX2_FAST(mm_flags)) {
         if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
             ac->simd_f =  ff_float_to_int32_a_avx2;
     }
diff --git a/libswresample/x86/rematrix.asm b/libswresample/x86/rematrix.asm
index f0ae9599..7984b9a7 100644
--- a/libswresample/x86/rematrix.asm
+++ b/libswresample/x86/rematrix.asm
@@ -37,7 +37,7 @@ cglobal mix_2_1_%1_float, 7, 7, 6, out, in1, in2, coeffp, index1, index2, len
     test outq, mmsize-1
         jne mix_2_1_float_u_int %+ SUFFIX
 %else
-mix_2_1_float_u_int %+ SUFFIX
+mix_2_1_float_u_int %+ SUFFIX:
 %endif
     VBROADCASTSS m4, [coeffpq + 4*index1q]
     VBROADCASTSS m5, [coeffpq + 4*index2q]
@@ -79,7 +79,7 @@ cglobal mix_1_1_%1_float, 5, 5, 3, out, in, coeffp, index, len
     test outq, mmsize-1
         jne mix_1_1_float_u_int %+ SUFFIX
 %else
-mix_1_1_float_u_int %+ SUFFIX
+mix_1_1_float_u_int %+ SUFFIX:
 %endif
     VBROADCASTSS m2, [coeffpq + 4*indexq]
     shl lenq    , 2
@@ -111,7 +111,7 @@ cglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len
     test outq, mmsize-1
         jne mix_1_1_int16_u_int %+ SUFFIX
 %else
-mix_1_1_int16_u_int %+ SUFFIX
+mix_1_1_int16_u_int %+ SUFFIX:
 %endif
     movd   m4, [coeffpq + 4*indexq]
     SPLATW m5, m4
@@ -166,7 +166,7 @@ cglobal mix_2_1_%1_int16, 7, 7, 8, out, in1, in2, coeffp, index1, index2, len
     test outq, mmsize-1
         jne mix_2_1_int16_u_int %+ SUFFIX
 %else
-mix_2_1_int16_u_int %+ SUFFIX
+mix_2_1_int16_u_int %+ SUFFIX:
 %endif
     movd   m4, [coeffpq + 4*index1q]
     movd   m6, [coeffpq + 4*index2q]
diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm
index a57ff37b..4989aa69 100644
--- a/libswresample/x86/resample.asm
+++ b/libswresample/x86/resample.asm
@@ -176,7 +176,12 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
 .inner_loop:
     movu                          m1, [srcq+min_filter_count_x4q*1]
 %ifidn %1, int16
-    PMADCSWD                      m0, m1, [filterq+min_filter_count_x4q*1], m0, m1
+%if cpuflag(xop)
+    vpmadcswd                     m0, m1, [filterq+min_filter_count_x4q*1], m0
+%else
+    pmaddwd                       m1, [filterq+min_filter_count_x4q*1]
+    paddd                         m0, m1
+%endif
 %else ; float/double
 %if cpuflag(fma4) || cpuflag(fma3)
     fmaddp%4                      m0, m1, [filterq+min_filter_count_x4q*1], m0
diff --git a/libswresample/x86/resample_init.c b/libswresample/x86/resample_init.c
index bc444cfb..9d7d5cf8 100644
--- a/libswresample/x86/resample_init.c
+++ b/libswresample/x86/resample_init.c
@@ -71,7 +71,7 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
             c->dsp.resample = c->linear ? ff_resample_linear_float_avx
                                         : ff_resample_common_float_avx;
         }
-        if (EXTERNAL_FMA3(mm_flags) && !(mm_flags & AV_CPU_FLAG_AVXSLOW)) {
+        if (EXTERNAL_FMA3_FAST(mm_flags)) {
             c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
                                         : ff_resample_common_float_fma3;
         }
diff --git a/libswscale/Makefile b/libswscale/Makefile
index a60b0574..a9f9e03b 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -5,7 +5,8 @@ NAME = swscale
 HEADERS = swscale.h                                                     \
           version.h                                                     \
 
-OBJS = hscale_fast_bilinear.o                           \
+OBJS = alphablend.o                                     \
+       hscale_fast_bilinear.o                           \
        input.o                                          \
        options.o                                        \
        output.o                                         \
@@ -14,6 +15,10 @@ OBJS = hscale_fast_bilinear.o                           \
        swscale_unscaled.o                               \
        utils.o                                          \
        yuv2rgb.o                                        \
+       slice.o                                          \
+       hscale.o                                         \
+       vscale.o                                         \
+       gamma.o                                          \
 
 OBJS-$(CONFIG_SHARED)        += log2_tab.o
 
diff --git a/libswscale/alphablend.c b/libswscale/alphablend.c
new file mode 100644
index 00000000..b5686599
--- /dev/null
+++ b/libswscale/alphablend.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2015 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_internal.h"
+
+int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[],
+                          int srcStride[], int srcSliceY, int srcSliceH,
+                          uint8_t *dst[], int dstStride[])
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
+    int nb_components = desc->nb_components;
+    int plane, x, y;
+    int plane_count = isGray(c->srcFormat) ? 1 : 3;
+    int sixteen_bits = desc->comp[0].depth >= 9;
+    unsigned off    = 1<<(desc->comp[0].depth - 1);
+    unsigned shift  = desc->comp[0].depth;
+    unsigned max    = (1<<shift) - 1;
+    int target_table[2][3];
+
+    for (plane = 0; plane < plane_count; plane++) {
+        int a = 0, b = 0;
+        if (c->alphablend == SWS_ALPHA_BLEND_CHECKERBOARD) {
+            a = (1<<(desc->comp[0].depth - 1))/2;
+            b = 3*(1<<(desc->comp[0].depth-1))/2;
+        }
+        target_table[0][plane] = plane && !(desc->flags & AV_PIX_FMT_FLAG_RGB) ? 1<<(desc->comp[0].depth - 1) : a;
+        target_table[1][plane] = plane && !(desc->flags & AV_PIX_FMT_FLAG_RGB) ? 1<<(desc->comp[0].depth - 1) : b;
+    }
+
+    av_assert0(plane_count == nb_components - 1);
+    if (desc->flags & AV_PIX_FMT_FLAG_PLANAR) {
+        for (plane = 0; plane < plane_count; plane++) {
+            int w = plane ? c->chrSrcW : c->srcW;
+            int x_subsample = plane ? desc->log2_chroma_w: 0;
+            int y_subsample = plane ? desc->log2_chroma_h: 0;
+            for (y = srcSliceY >> y_subsample; y < AV_CEIL_RSHIFT(srcSliceH, y_subsample); y++) {
+                if (x_subsample || y_subsample) {
+                    int alpha;
+                    unsigned u;
+                    if (sixteen_bits) {
+                        ptrdiff_t alpha_step = srcStride[plane_count] >> 1;
+                        const uint16_t *s = (const uint16_t *)(src[plane      ] +  srcStride[plane      ] * y);
+                        const uint16_t *a = (const uint16_t *)(src[plane_count] + (srcStride[plane_count] * y << y_subsample));
+                              uint16_t *d = (      uint16_t *)(dst[plane      ] +  dstStride[plane      ] * y);
+                        if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) {
+                            for (x = 0; x < w; x++) {
+                                if (y_subsample) {
+                                    alpha = (a[2*x]              + a[2*x + 1] + 2 +
+                                             a[2*x + alpha_step] + a[2*x + alpha_step + 1]) >> 2;
+                                } else
+                                    alpha = (a[2*x] + a[2*x + 1]) >> 1;
+                                u = s[x]*alpha + target_table[((x^y)>>5)&1][plane]*(max-alpha) + off;
+                                d[x] = av_clip((u + (u >> shift)) >> shift, 0, max);
+                            }
+                        } else {
+                            for (x = 0; x < w; x++) {
+                                if (y_subsample) {
+                                    alpha = (av_bswap16(a[2*x])              + av_bswap16(a[2*x + 1]) + 2 +
+                                             av_bswap16(a[2*x + alpha_step]) + av_bswap16(a[2*x + alpha_step + 1])) >> 2;
+                                } else
+                                    alpha = (av_bswap16(a[2*x]) + av_bswap16(a[2*x + 1])) >> 1;
+                                u = av_bswap16(s[x])*alpha + target_table[((x^y)>>5)&1][plane]*(max-alpha) + off;
+                                d[x] = av_clip((u + (u >> shift)) >> shift, 0, max);
+                            }
+                        }
+                    } else {
+                        ptrdiff_t alpha_step = srcStride[plane_count];
+                        const uint8_t *s = src[plane      ] + srcStride[plane] * y;
+                        const uint8_t *a = src[plane_count] + (srcStride[plane_count] * y << y_subsample);
+                              uint8_t *d = dst[plane      ] + dstStride[plane] * y;
+                        for (x = 0; x < w; x++) {
+                            if (y_subsample) {
+                                alpha = (a[2*x]              + a[2*x + 1] + 2 +
+                                         a[2*x + alpha_step] + a[2*x + alpha_step + 1]) >> 2;
+                            } else
+                                alpha = (a[2*x] + a[2*x + 1]) >> 1;
+                            u = s[x]*alpha + target_table[((x^y)>>5)&1][plane]*(255-alpha) + 128;
+                            d[x] = (257*u) >> 16;
+                        }
+                    }
+                } else {
+                if (sixteen_bits) {
+                    const uint16_t *s = (const uint16_t *)(src[plane      ] + srcStride[plane      ] * y);
+                    const uint16_t *a = (const uint16_t *)(src[plane_count] + srcStride[plane_count] * y);
+                          uint16_t *d = (      uint16_t *)(dst[plane      ] + dstStride[plane      ] * y);
+                    if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) {
+                        for (x = 0; x < w; x++) {
+                            unsigned u = s[x]*a[x] + target_table[((x^y)>>5)&1][plane]*(max-a[x]) + off;
+                            d[x] = av_clip((u + (u >> shift)) >> shift, 0, max);
+                        }
+                    } else {
+                        for (x = 0; x < w; x++) {
+                            unsigned aswap =av_bswap16(a[x]);
+                            unsigned u = av_bswap16(s[x])*aswap + target_table[((x^y)>>5)&1][plane]*(max-aswap) + off;
+                            d[x] = av_clip((u + (u >> shift)) >> shift, 0, max);
+                        }
+                    }
+                } else {
+                    const uint8_t *s = src[plane      ] + srcStride[plane] * y;
+                    const uint8_t *a = src[plane_count] + srcStride[plane_count] * y;
+                          uint8_t *d = dst[plane      ] + dstStride[plane] * y;
+                    for (x = 0; x < w; x++) {
+                        unsigned u = s[x]*a[x] + target_table[((x^y)>>5)&1][plane]*(255-a[x]) + 128;
+                        d[x] = (257*u) >> 16;
+                    }
+                }
+                }
+            }
+        }
+    } else {
+        int alpha_pos = desc->comp[plane_count].offset;
+        int w = c->srcW;
+        for (y = srcSliceY; y < srcSliceH; y++) {
+            if (sixteen_bits) {
+                const uint16_t *s = (const uint16_t *)(src[0] + srcStride[0] * y + 2*!alpha_pos);
+                const uint16_t *a = (const uint16_t *)(src[0] + srcStride[0] * y +    alpha_pos);
+                      uint16_t *d = (      uint16_t *)(dst[0] + dstStride[0] * y);
+                if ((!isBE(c->srcFormat)) == !HAVE_BIGENDIAN) {
+                    for (x = 0; x < w; x++) {
+                        for (plane = 0; plane < plane_count; plane++) {
+                            int x_index = (plane_count + 1) * x;
+                            unsigned u = s[x_index + plane]*a[x_index] + target_table[((x^y)>>5)&1][plane]*(max-a[x_index]) + off;
+                            d[plane_count*x + plane] = av_clip((u + (u >> shift)) >> shift, 0, max);
+                        }
+                    }
+                } else {
+                    for (x = 0; x < w; x++) {
+                        for (plane = 0; plane < plane_count; plane++) {
+                            int x_index = (plane_count + 1) * x;
+                            unsigned aswap =av_bswap16(a[x_index]);
+                            unsigned u = av_bswap16(s[x_index + plane])*aswap + target_table[((x^y)>>5)&1][plane]*(max-aswap) + off;
+                            d[plane_count*x + plane] = av_clip((u + (u >> shift)) >> shift, 0, max);
+                        }
+                    }
+                }
+            } else {
+                const uint8_t *s = src[0] + srcStride[0] * y + !alpha_pos;
+                const uint8_t *a = src[0] + srcStride[0] * y + alpha_pos;
+                      uint8_t *d = dst[0] + dstStride[0] * y;
+                for (x = 0; x < w; x++) {
+                    for (plane = 0; plane < plane_count; plane++) {
+                        int x_index = (plane_count + 1) * x;
+                        unsigned u = s[x_index + plane]*a[x_index] + target_table[((x^y)>>5)&1][plane]*(255-a[x_index]) + 128;
+                        d[plane_count*x + plane] = (257*u) >> 16;
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
diff --git a/libswscale/arm/Makefile b/libswscale/arm/Makefile
index 8b5a97b5..97b35613 100644
--- a/libswscale/arm/Makefile
+++ b/libswscale/arm/Makefile
@@ -1,4 +1,5 @@
-# OBJS        += arm/swscale_unscaled.o
+OBJS        += arm/swscale_unscaled.o
 
 # NEON-OBJS   += arm/rgb2yuv_neon_32.o
 # NEON-OBJS   += arm/rgb2yuv_neon_16.o
+NEON-OBJS   += arm/yuv2rgb_neon.o
diff --git a/libswscale/arm/swscale_unscaled.c b/libswscale/arm/swscale_unscaled.c
index 04be7622..ac1e4a97 100644
--- a/libswscale/arm/swscale_unscaled.c
+++ b/libswscale/arm/swscale_unscaled.c
@@ -23,6 +23,7 @@
 #include "libswscale/swscale_internal.h"
 #include "libavutil/arm/cpu.h"
 
+#if 0
 extern void rgbx_to_nv12_neon_32(const uint8_t *src, uint8_t *y, uint8_t *chroma,
                 int width, int height,
                 int y_stride, int c_stride, int src_stride,
@@ -60,15 +61,125 @@ static int rgbx_to_nv12_neon_16_wrapper(SwsContext *context, const uint8_t *src[
 
     return 0;
 }
+#endif
+
+#define YUV_TO_RGB_TABLE(precision)                                                         \
+        c->yuv2rgb_v2r_coeff / ((precision) == 16 ? 1 << 7 : 1),                            \
+        c->yuv2rgb_u2g_coeff / ((precision) == 16 ? 1 << 7 : 1),                            \
+        c->yuv2rgb_v2g_coeff / ((precision) == 16 ? 1 << 7 : 1),                            \
+        c->yuv2rgb_u2b_coeff / ((precision) == 16 ? 1 << 7 : 1),                            \
+
+#define DECLARE_FF_YUVX_TO_RGBX_FUNCS(ifmt, ofmt, precision)                                \
+int ff_##ifmt##_to_##ofmt##_neon_##precision(int w, int h,                                  \
+                                 uint8_t *dst, int linesize,                                \
+                                 const uint8_t *srcY, int linesizeY,                        \
+                                 const uint8_t *srcU, int linesizeU,                        \
+                                 const uint8_t *srcV, int linesizeV,                        \
+                                 const int16_t *table,                                      \
+                                 int y_offset,                                              \
+                                 int y_coeff);                                              \
+                                                                                            \
+static int ifmt##_to_##ofmt##_neon_wrapper_##precision(SwsContext *c, const uint8_t *src[], \
+                                           int srcStride[], int srcSliceY, int srcSliceH,   \
+                                           uint8_t *dst[], int dstStride[]) {               \
+    const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE(precision) };                        \
+                                                                                            \
+    ff_##ifmt##_to_##ofmt##_neon_##precision(c->srcW, srcSliceH,                            \
+                                 dst[0] + srcSliceY * dstStride[0], dstStride[0],           \
+                                 src[0], srcStride[0],                                      \
+                                 src[1], srcStride[1],                                      \
+                                 src[2], srcStride[2],                                      \
+                                 yuv2rgb_table,                                             \
+                                 c->yuv2rgb_y_offset >> 9,                                  \
+                                 c->yuv2rgb_y_coeff / ((precision) == 16 ? 1 << 7 : 1));    \
+                                                                                            \
+    return 0;                                                                               \
+}                                                                                           \
+
+#define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx, precision)                                  \
+DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, argb, precision)                                        \
+DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, rgba, precision)                                        \
+DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, abgr, precision)                                        \
+DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, bgra, precision)                                        \
+
+#define DECLARE_FF_YUVX_TO_ALL_RGBX_ALL_PRECISION_FUNCS(yuvx)                               \
+DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx, 16)                                                 \
+
+DECLARE_FF_YUVX_TO_ALL_RGBX_ALL_PRECISION_FUNCS(yuv420p)
+DECLARE_FF_YUVX_TO_ALL_RGBX_ALL_PRECISION_FUNCS(yuv422p)
+
+#define DECLARE_FF_NVX_TO_RGBX_FUNCS(ifmt, ofmt, precision)                                 \
+int ff_##ifmt##_to_##ofmt##_neon_##precision(int w, int h,                                  \
+                                 uint8_t *dst, int linesize,                                \
+                                 const uint8_t *srcY, int linesizeY,                        \
+                                 const uint8_t *srcC, int linesizeC,                        \
+                                 const int16_t *table,                                      \
+                                 int y_offset,                                              \
+                                 int y_coeff);                                              \
+                                                                                            \
+static int ifmt##_to_##ofmt##_neon_wrapper_##precision(SwsContext *c, const uint8_t *src[], \
+                                           int srcStride[], int srcSliceY, int srcSliceH,   \
+                                           uint8_t *dst[], int dstStride[]) {               \
+    const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE(precision) };                        \
+                                                                                            \
+    ff_##ifmt##_to_##ofmt##_neon_##precision(c->srcW, srcSliceH,                            \
+                                 dst[0] + srcSliceY * dstStride[0], dstStride[0],           \
+                                 src[0], srcStride[0], src[1], srcStride[1],                \
+                                 yuv2rgb_table,                                             \
+                                 c->yuv2rgb_y_offset >> 9,                                  \
+                                 c->yuv2rgb_y_coeff / ((precision) == 16 ? 1 << 7 : 1));    \
+                                                                                            \
+    return 0;                                                                               \
+}                                                                                           \
+
+#define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx, precision)                                    \
+DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, argb, precision)                                          \
+DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, rgba, precision)                                          \
+DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, abgr, precision)                                          \
+DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, bgra, precision)                                          \
+
+#define DECLARE_FF_NVX_TO_ALL_RGBX_ALL_PRECISION_FUNCS(nvx)                                 \
+DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx, 16)                                                   \
+
+DECLARE_FF_NVX_TO_ALL_RGBX_ALL_PRECISION_FUNCS(nv12)
+DECLARE_FF_NVX_TO_ALL_RGBX_ALL_PRECISION_FUNCS(nv21)
+
+/* We need a 16 pixel width alignment. This constraint can easily be removed
+ * for input reading but for the output which is 4-bytes per pixel (RGBA) the
+ * assembly might be writing as much as 4*15=60 extra bytes at the end of the
+ * line, which won't fit the 32-bytes buffer alignment. */
+#define SET_FF_NVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT, accurate_rnd) do {                  \
+    if (c->srcFormat == AV_PIX_FMT_##IFMT                                                   \
+        && c->dstFormat == AV_PIX_FMT_##OFMT                                                \
+        && !(c->srcH & 1)                                                                   \
+        && !(c->srcW & 15)                                                                  \
+        && !accurate_rnd) {                                                                 \
+        c->swscale = ifmt##_to_##ofmt##_neon_wrapper_16;                                    \
+    }                                                                                       \
+} while (0)
+
+#define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd) do {                            \
+    SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, argb, ARGB, accurate_rnd);                            \
+    SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, rgba, RGBA, accurate_rnd);                            \
+    SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, abgr, ABGR, accurate_rnd);                            \
+    SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, bgra, BGRA, accurate_rnd);                            \
+} while (0)
 
 static void get_unscaled_swscale_neon(SwsContext *c) {
     int accurate_rnd = c->flags & SWS_ACCURATE_RND;
+#if 0
     if (c->srcFormat == AV_PIX_FMT_RGBA
             && c->dstFormat == AV_PIX_FMT_NV12
             && (c->srcW >= 16)) {
         c->swscale = accurate_rnd ? rgbx_to_nv12_neon_32_wrapper
                         : rgbx_to_nv12_neon_16_wrapper;
     }
+#endif
+
+    SET_FF_NVX_TO_ALL_RGBX_FUNC(nv12, NV12, accurate_rnd);
+    SET_FF_NVX_TO_ALL_RGBX_FUNC(nv21, NV21, accurate_rnd);
+    SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv420p, YUV420P, accurate_rnd);
+    SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv422p, YUV422P, accurate_rnd);
 }
 
 void ff_get_unscaled_swscale_arm(SwsContext *c)
diff --git a/libswscale/arm/yuv2rgb_neon.S b/libswscale/arm/yuv2rgb_neon.S
new file mode 100644
index 00000000..829e1b65
--- /dev/null
+++ b/libswscale/arm/yuv2rgb_neon.S
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 2015 Matthieu Bouron <matthieu.bouron stupeflix.com>
+ * Copyright (c) 2015 Clément Bœsch <clement stupeflix.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+
+.macro compute_premult_16 half_u1, half_u2, half_v1, half_v2
+    vmov                d2, \half_u1                                   @ copy left q14 to left q1
+    vmov                d3, \half_u1                                   @ copy left q14 to right q1
+    vmov                d4, \half_u2                                   @ copy right q14 to left q2
+    vmov                d5, \half_u2                                   @ copy right q14 to right q2
+
+    vmov                d6, \half_v1                                   @ copy left q15 to left q3
+    vmov                d7, \half_v1                                   @ copy left q15 to right q3
+    vmov                d8, \half_v2                                   @ copy right q15 to left q4
+    vmov                d9, \half_v2                                   @ copy right q15 to right q4
+
+    vzip.16             d2, d3                                         @ U1U1U2U2U3U3U4U4
+    vzip.16             d4, d5                                         @ U5U5U6U6U7U7U8U8
+
+    vzip.16             d6, d7                                         @ V1V1V2V2V3V3V4V4
+    vzip.16             d8, d9                                         @ V5V5V6V6V7V7V8V8
+
+    vmul.s16            q8,  q3, d1[0]                                 @  V * v2r             (left,  red)
+    vmul.s16            q9,  q4, d1[0]                                 @  V * v2r             (right, red)
+    vmul.s16            q10, q1, d1[1]                                 @  U * u2g
+    vmul.s16            q11, q2, d1[1]                                 @  U * u2g
+    vmla.s16            q10, q3, d1[2]                                 @  U * u2g + V * v2g   (left,  green)
+    vmla.s16            q11, q4, d1[2]                                 @  U * u2g + V * v2g   (right, green)
+    vmul.s16            q12, q1, d1[3]                                 @  U * u2b             (left,  blue)
+    vmul.s16            q13, q2, d1[3]                                 @  U * u2b             (right, blue)
+.endm
+
+.macro compute_premult_32 half_u half_v
+    vmov                d2, \half_u                                    @ copy left q14 to left q1
+    vmov                d3, \half_u                                    @ copy left q14 to right q1
+    vmov                d4, \half_v                                    @ copy left q15 to left q2
+    vmov                d5, \half_v                                    @ copy left q15 to right q2
+
+    vzip.16             d2, d3                                         @ U1U1U2U2U3U3U4U4
+    vzip.16             d4, d5                                         @ V1V1V2V2V3V3V4V4
+
+    vmull.s16           q8,  d4, d1[0]                                 @  V * v2r             (left,  red)
+    vmull.s16           q9,  d5, d1[0]                                 @  V * v2r             (right, red)
+    vmull.s16           q10, d2, d1[1]                                 @  U * u2g
+    vmull.s16           q11, d3, d1[1]                                 @  U * u2g
+    vmlal.s16           q10, d4, d1[2]                                 @  U * u2g + V * v2g   (left,  green)
+    vmlal.s16           q11, d5, d1[2]                                 @  U * u2g + V * v2g   (right, green)
+    vmull.s16           q12, d2, d1[3]                                 @  U * u2b             (left,  blue)
+    vmull.s16           q13, d3, d1[3]                                 @  U * u2b             (right, blue)
+.endm
+
+.macro compute_color_16 dst_comp1 dst_comp2 pre1 pre2
+    vadd.s16            q1, q14, \pre1
+    vadd.s16            q2, q15, \pre2
+    vqrshrun.s16        \dst_comp1, q1, #6
+    vqrshrun.s16        \dst_comp2, q2, #6
+.endm
+
+.macro compute_color_32 dst_comp pre1 pre2
+    vadd.s32            q3, q1, \pre1
+    vadd.s32            q4, q2, \pre2
+    vqrshrun.s32        d10, q3, #13
+    vqrshrun.s32        d11, q4, #13                                   @ q5 = ({q3,q4} + (1<<12)) >> 13
+    vqmovn.u16          \dst_comp, q5                                  @ saturate 16bit -> 8bit
+.endm
+
+.macro compute_rgba_16 r1 r2 g1 g2 b1 b2 a1 a2
+    compute_color_16    \r1, \r2, q8,  q9
+    compute_color_16    \g1, \g2, q10, q11
+    compute_color_16    \b1, \b2, q12, q13
+    vmov.u8             \a1, #255
+    vmov.u8             \a2, #255
+.endm
+
+.macro compute_rgba_32 r g b a
+    compute_color_32    \r, q8,  q9
+    compute_color_32    \g, q10, q11
+    compute_color_32    \b, q12, q13
+    vmov.u8             \a, #255
+.endm
+
+.macro compute_16px_16 dst y0 y1 ofmt
+    vmovl.u8            q14, \y0                                       @ 8px of y
+    vmovl.u8            q15, \y1                                       @ 8px of y
+
+    vdup.16             q5, r9                                         @ q5  = y_offset
+    vmov                d14, d0                                        @ q7  = y_coeff
+    vmov                d15, d0                                        @ q7  = y_coeff
+
+    vsub.s16            q14, q5
+    vsub.s16            q15, q5
+
+    vmul.s16            q14, q7                                        @ q14 = (srcY - y_offset) * y_coeff (left)
+    vmul.s16            q15, q7                                        @ q15 = (srcY - y_offset) * y_coeff (right)
+
+
+.ifc \ofmt,argb
+    compute_rgba_16     d7, d11, d8, d12, d9, d13, d6, d10
+.endif
+
+.ifc \ofmt,rgba
+    compute_rgba_16     d6, d10, d7, d11, d8, d12, d9, d13
+.endif
+
+.ifc \ofmt,abgr
+    compute_rgba_16     d9, d13, d8, d12, d7, d11, d6, d10
+.endif
+
+.ifc \ofmt,bgra
+    compute_rgba_16     d8, d12, d7, d11, d6, d10, d9, d13
+.endif
+    vst4.8              {q3, q4}, [\dst,:128]!
+    vst4.8              {q5, q6}, [\dst,:128]!
+
+.endm
+
+.macro compute_8px_32 dst half_y ofmt
+    vmovl.u8            q7, \half_y                                    @ 8px of Y
+    vdup.16             q5, r9
+    vsub.s16            q7, q5
+    vmull.s16           q1, d14, d0                                    @ q1 = (srcY - y_offset) * y_coeff (left)
+    vmull.s16           q2, d15, d0                                    @ q2 = (srcY - y_offset) * y_coeff (right)
+
+.ifc \ofmt,argb
+    compute_rgba_32     d13, d14, d15, d12
+.endif
+
+.ifc \ofmt,rgba
+    compute_rgba_32     d12, d13, d14, d15
+.endif
+
+.ifc \ofmt,abgr
+    compute_rgba_32     d15, d14, d13, d12
+.endif
+
+.ifc \ofmt,bgra
+    compute_rgba_32     d14, d13, d12, d15
+.endif
+
+    vst4.8              {q6, q7}, [\dst,:128]!
+.endm
+
+.macro process_1l_16px_16 ofmt
+    compute_premult_16  d28, d29, d30, d31
+    vld1.8              {q7}, [r4]!
+    compute_16px_16     r2, d14, d15, \ofmt
+.endm
+
+.macro process_1l_16px_32 ofmt
+    compute_premult_32  d28, d30
+    vld1.8              {q7}, [r4]!
+    vmov                d28, d15                                       @ save right of the line of luma for later use
+    compute_8px_32      r2, d14, \ofmt
+
+    compute_premult_32  d29, d31
+    compute_8px_32      r2,  d28, \ofmt
+.endm
+
+.macro process_2l_16px_16 ofmt
+    compute_premult_16  d28, d29, d30, d31
+
+    vld1.8              {q7}, [r4]!                                    @ first line of luma
+    compute_16px_16     r2, d14, d15, \ofmt
+
+    vld1.8              {q7}, [r12]!                                   @ second line of luma
+    compute_16px_16     r11, d14, d15, \ofmt
+.endm
+
+.macro process_2l_16px_32 ofmt
+    compute_premult_32  d28, d30
+
+    vld1.8              {q7}, [r4]!                                    @ first line of luma
+    vmov                d28, d15                                       @ save right of the first line of luma for later use
+    compute_8px_32      r2, d14, \ofmt
+
+    vld1.8              {q7}, [r12]!                                   @ second line of luma
+    vmov                d30, d15                                       @ save right of the second line of luma for later use
+    compute_8px_32      r11, d14, \ofmt
+
+    compute_premult_32  d29, d31
+    compute_8px_32      r2,  d28, \ofmt
+    compute_8px_32      r11, d30, \ofmt
+.endm
+
+.macro load_args_nvx
+    push                {r4-r12, lr}
+    vpush               {q4-q7}
+    ldr                 r4, [sp, #104]                                 @ r4  = srcY
+    ldr                 r5, [sp, #108]                                 @ r5  = linesizeY
+    ldr                 r6, [sp, #112]                                 @ r6  = srcC
+    ldr                 r7, [sp, #116]                                 @ r7  = linesizeC
+    ldr                 r8, [sp, #120]                                 @ r8  = table
+    ldr                 r9, [sp, #124]                                 @ r9  = y_offset
+    ldr                 r10,[sp, #128]                                 @ r10 = y_coeff
+    vdup.16             d0, r10                                        @ d0  = y_coeff
+    vld1.16             {d1}, [r8]                                     @ d1  = *table
+    add                 r11, r2, r3                                    @ r11 = dst + linesize (dst2)
+    add                 r12, r4, r5                                    @ r12 = srcY + linesizeY (srcY2)
+    lsl                 r3, r3, #1
+    lsl                 r5, r5, #1
+    lsl                 r8, r0, #2
+    sub                 r3, r3, r8                                     @ r3 = linesize  * 2 - width * 4 (padding)
+    sub                 r5, r5, r0                                     @ r5 = linesizeY * 2 - width     (paddingY)
+    sub                 r7, r7, r0                                     @ r7 = linesizeC     - width     (paddingC)
+.endm
+
+.macro load_args_yuv420p
+    push                {r4-r12, lr}
+    vpush               {q4-q7}
+    ldr                 r4, [sp, #104]                                 @ r4  = srcY
+    ldr                 r5, [sp, #108]                                 @ r5  = linesizeY
+    ldr                 r6, [sp, #112]                                 @ r6  = srcU
+    ldr                 r8, [sp, #128]                                 @ r8  = table
+    ldr                 r9, [sp, #132]                                 @ r9  = y_offset
+    ldr                 r10,[sp, #136]                                 @ r10 = y_coeff
+    vdup.16             d0, r10                                        @ d0  = y_coeff
+    vld1.16             {d1}, [r8]                                     @ d1  = *table
+    add                 r11, r2, r3                                    @ r11 = dst + linesize (dst2)
+    add                 r12, r4, r5                                    @ r12 = srcY + linesizeY (srcY2)
+    lsl                 r3, r3, #1
+    lsl                 r5, r5, #1
+    lsl                 r8, r0, #2
+    sub                 r3, r3, r8                                     @ r3 = linesize  * 2 - width * 4 (padding)
+    sub                 r5, r5, r0                                     @ r5 = linesizeY * 2 - width     (paddingY)
+    ldr                 r10,[sp, #120]                                 @ r10 = srcV
+.endm
+
+.macro load_args_yuv422p
+    push                {r4-r12, lr}
+    vpush               {q4-q7}
+    ldr                 r4, [sp, #104]                                 @ r4  = srcY
+    ldr                 r5, [sp, #108]                                 @ r5  = linesizeY
+    ldr                 r6, [sp, #112]                                 @ r6  = srcU
+    ldr                 r7, [sp, #116]                                 @ r7  = linesizeU
+    ldr                 r12,[sp, #124]                                 @ r12 = linesizeV
+    ldr                 r8, [sp, #128]                                 @ r8  = table
+    ldr                 r9, [sp, #132]                                 @ r9  = y_offset
+    ldr                 r10,[sp, #136]                                 @ r10 = y_coeff
+    vdup.16             d0, r10                                        @ d0  = y_coeff
+    vld1.16             {d1}, [r8]                                     @ d1  = *table
+    add                 r11, r2, r3                                    @ r11 = dst + linesize (dst2)
+    lsl                 r8, r0, #2
+    sub                 r3, r3, r8                                     @ r3 = linesize  * 2 - width * 4 (padding)
+    sub                 r5, r5, r0                                     @ r5 = linesizeY * 2 - width     (paddingY)
+    sub                 r7, r7, r0, lsr #1                             @ r7 = linesizeU     - width / 2 (paddingU)
+    sub                 r12,r12,r0, lsr #1                             @ r12 = linesizeV    - width / 2 (paddingV)
+    ldr                 r10,[sp, #120]                                 @ r10 = srcV
+.endm
+
+.macro declare_func ifmt ofmt precision
+function ff_\ifmt\()_to_\ofmt\()_neon_\precision\(), export=1
+
+.ifc \ifmt,nv12
+    load_args_nvx
+.endif
+
+.ifc \ifmt,nv21
+    load_args_nvx
+.endif
+
+.ifc \ifmt,yuv420p
+    load_args_yuv420p
+.endif
+
+
+.ifc \ifmt,yuv422p
+    load_args_yuv422p
+.endif
+
+1:
+    mov                 r8, r0                                         @ r8 = width
+2:
+    pld [r6, #64*3]
+    pld [r4, #64*3]
+
+    vmov.i8             d10, #128
+
+.ifc \ifmt,nv12
+    pld [r12, #64*3]
+
+    vld2.8              {d2, d3}, [r6]!                                @ q1: interleaved chroma line
+    vsubl.u8            q14, d2, d10                                   @ q14 = U - 128
+    vsubl.u8            q15, d3, d10                                   @ q15 = V - 128
+
+    process_2l_16px_\precision \ofmt
+.endif
+
+.ifc \ifmt,nv21
+    pld [r12, #64*3]
+
+    vld2.8              {d2, d3}, [r6]!                                @ q1: interleaved chroma line
+    vsubl.u8            q14, d3, d10                                   @ q14 = U - 128
+    vsubl.u8            q15, d2, d10                                   @ q15 = V - 128
+
+    process_2l_16px_\precision \ofmt
+.endif
+
+.ifc \ifmt,yuv420p
+    pld [r10, #64*3]
+    pld [r12, #64*3]
+
+    vld1.8              d2, [r6]!                                      @ d2: chroma red line
+    vld1.8              d3, [r10]!                                     @ d3: chroma blue line
+    vsubl.u8            q14, d2, d10                                   @ q14 = U - 128
+    vsubl.u8            q15, d3, d10                                   @ q15 = V - 128
+
+    process_2l_16px_\precision \ofmt
+.endif
+
+.ifc \ifmt,yuv422p
+    pld [r10, #64*3]
+
+    vld1.8              d2, [r6]!                                      @ d2: chroma red line
+    vld1.8              d3, [r10]!                                     @ d3: chroma blue line
+    vsubl.u8            q14, d2, d10                                   @ q14 = U - 128
+    vsubl.u8            q15, d3, d10                                   @ q15 = V - 128
+
+    process_1l_16px_\precision \ofmt
+.endif
+
+    subs                r8, r8, #16                                    @ width -= 16
+    bgt                 2b
+
+    add                 r2, r2, r3                                     @ dst   += padding
+    add                 r4, r4, r5                                     @ srcY  += paddingY
+
+.ifc \ifmt,nv12
+    add                 r11, r11, r3                                   @ dst2  += padding
+    add                 r12, r12, r5                                   @ srcY2 += paddingY
+
+    add                 r6, r6, r7                                     @ srcC  += paddingC
+
+    subs                r1, r1, #2                                     @ height -= 2
+.endif
+
+.ifc \ifmt,nv21
+    add                 r11, r11, r3                                   @ dst2  += padding
+    add                 r12, r12, r5                                   @ srcY2 += paddingY
+
+    add                 r6, r6, r7                                     @ srcC  += paddingC
+    subs                r1, r1, #2                                     @ height -= 2
+.endif
+
+.ifc \ifmt,yuv420p
+    add                 r11, r11, r3                                   @ dst2  += padding
+    add                 r12, r12, r5                                   @ srcY2 += paddingY
+
+    ldr                 r7, [sp, #116]                                 @ r7     = linesizeU
+    sub                 r7, r7, r0, lsr #1                             @ r7     = linesizeU - width / 2 (paddingU)
+    add                 r6, r6, r7                                     @ srcU  += paddingU
+
+    ldr                 r7, [sp, #124]                                 @ r7     = linesizeV
+    sub                 r7, r7, r0, lsr #1                             @ r7     = linesizeV - width / 2 (paddingV)
+    add                 r10, r10, r7                                   @ srcV  += paddingV
+
+    subs                r1, r1, #2                                     @ height -= 2
+.endif
+
+.ifc \ifmt,yuv422p
+    add                 r6, r6, r7                                     @ srcU  += paddingU
+    add                 r10,r10,r12                                    @ srcV  += paddingV
+
+    subs                r1, r1, #1                                     @ height -= 1
+.endif
+
+    bgt                 1b
+
+    vpop                {q4-q7}
+    pop                 {r4-r12, lr}
+    mov                 pc, lr
+endfunc
+.endm
+
+.macro declare_rgb_funcs ifmt precision
+    declare_func \ifmt, argb, \precision
+    declare_func \ifmt, rgba, \precision
+    declare_func \ifmt, abgr, \precision
+    declare_func \ifmt, bgra, \precision
+.endm
+
+declare_rgb_funcs nv12, 16
+declare_rgb_funcs nv21, 16
+declare_rgb_funcs nv12, 32
+declare_rgb_funcs nv21, 32
+declare_rgb_funcs yuv420p, 16
+declare_rgb_funcs yuv420p, 32
+declare_rgb_funcs yuv422p, 16
+declare_rgb_funcs yuv422p, 32
diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
index 42a915bf..f6e0c920 100644
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@@ -45,7 +45,7 @@ int main(int argc, char **argv)
         return -1;
 
     av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n");
-    sws_rgb2rgb_init();
+    ff_sws_rgb2rgb_init();
 
     for (funcNum = 0; ; funcNum++) {
         struct func_info_s {
diff --git a/libswscale/gamma.c b/libswscale/gamma.c
new file mode 100644
index 00000000..d7470cb1
--- /dev/null
+++ b/libswscale/gamma.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_internal.h"
+
+typedef struct GammaContext
+{
+    uint16_t *table;
+} GammaContext;
+
+// gamma_convert expects 16 bit rgb format
+// it writes directly in src slice thus it must be modifiable (done through cascade context)
+static int gamma_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    GammaContext *instance = desc->instance;
+    uint16_t *table = instance->table;
+    int srcW = desc->src->width;
+
+    int i;
+    for (i = 0; i < sliceH; ++i) {
+        uint8_t ** src = desc->src->plane[0].line;
+        int src_pos = sliceY+i - desc->src->plane[0].sliceY;
+
+        uint16_t *src1 = (uint16_t*)*(src+src_pos);
+        int j;
+        for (j = 0; j < srcW; ++j) {
+            uint16_t r = AV_RL16(src1 + j*4 + 0);
+            uint16_t g = AV_RL16(src1 + j*4 + 1);
+            uint16_t b = AV_RL16(src1 + j*4 + 2);
+
+            AV_WL16(src1 + j*4 + 0, table[r]);
+            AV_WL16(src1 + j*4 + 1, table[g]);
+            AV_WL16(src1 + j*4 + 2, table[b]);
+        }
+
+    }
+    return sliceH;
+}
+
+
+int ff_init_gamma_convert(SwsFilterDescriptor *desc, SwsSlice * src, uint16_t *table)
+{
+    GammaContext *li = av_malloc(sizeof(GammaContext));
+    if (!li)
+        return AVERROR(ENOMEM);
+    li->table = table;
+
+    desc->instance = li;
+    desc->src = src;
+    desc->dst = NULL;
+    desc->process = &gamma_convert;
+
+    return 0;
+}
+
diff --git a/libswscale/hscale.c b/libswscale/hscale.c
new file mode 100644
index 00000000..eca06353
--- /dev/null
+++ b/libswscale/hscale.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_internal.h"
+
+/// Scaler instance data
+typedef struct FilterContext
+{
+    uint16_t *filter;
+    int *filter_pos;
+    int filter_size;
+    int xInc;
+} FilterContext;
+
+/// Color conversion instance data
+typedef struct ColorContext
+{
+    uint32_t *pal;
+} ColorContext;
+
+static int lum_h_scale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    FilterContext *instance = desc->instance;
+    int srcW = desc->src->width;
+    int dstW = desc->dst->width;
+    int xInc = instance->xInc;
+
+    int i;
+    for (i = 0; i < sliceH; ++i) {
+        uint8_t ** src = desc->src->plane[0].line;
+        uint8_t ** dst = desc->dst->plane[0].line;
+        int src_pos = sliceY+i - desc->src->plane[0].sliceY;
+        int dst_pos = sliceY+i - desc->dst->plane[0].sliceY;
+
+
+        if (c->hyscale_fast) {
+            c->hyscale_fast(c, (int16_t*)dst[dst_pos], dstW, src[src_pos], srcW, xInc);
+        } else {
+            c->hyScale(c, (int16_t*)dst[dst_pos], dstW, (const uint8_t *)src[src_pos], instance->filter,
+                       instance->filter_pos, instance->filter_size);
+        }
+
+        if (c->lumConvertRange)
+            c->lumConvertRange((int16_t*)dst[dst_pos], dstW);
+
+        desc->dst->plane[0].sliceH += 1;
+
+        if (desc->alpha) {
+            src = desc->src->plane[3].line;
+            dst = desc->dst->plane[3].line;
+
+            src_pos = sliceY+i - desc->src->plane[3].sliceY;
+            dst_pos = sliceY+i - desc->dst->plane[3].sliceY;
+
+            desc->dst->plane[3].sliceH += 1;
+
+            if (c->hyscale_fast) {
+                c->hyscale_fast(c, (int16_t*)dst[dst_pos], dstW, src[src_pos], srcW, xInc);
+            } else {
+                c->hyScale(c, (int16_t*)dst[dst_pos], dstW, (const uint8_t *)src[src_pos], instance->filter,
+                            instance->filter_pos, instance->filter_size);
+            }
+        }
+    }
+
+    return sliceH;
+}
+
+static int lum_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    int srcW = desc->src->width;
+    ColorContext * instance = desc->instance;
+    uint32_t * pal = instance->pal;
+    int i;
+
+    desc->dst->plane[0].sliceY = sliceY;
+    desc->dst->plane[0].sliceH = sliceH;
+    desc->dst->plane[3].sliceY = sliceY;
+    desc->dst->plane[3].sliceH = sliceH;
+
+    for (i = 0; i < sliceH; ++i) {
+        int sp0 = sliceY+i - desc->src->plane[0].sliceY;
+        int sp1 = ((sliceY+i) >> desc->src->v_chr_sub_sample) - desc->src->plane[1].sliceY;
+        const uint8_t * src[4] = { desc->src->plane[0].line[sp0],
+                        desc->src->plane[1].line[sp1],
+                        desc->src->plane[2].line[sp1],
+                        desc->src->plane[3].line[sp0]};
+        uint8_t * dst = desc->dst->plane[0].line[i];
+
+        if (c->lumToYV12) {
+            c->lumToYV12(dst, src[0], src[1], src[2], srcW, pal);
+        } else if (c->readLumPlanar) {
+            c->readLumPlanar(dst, src, srcW, c->input_rgb2yuv_table);
+        }
+
+
+        if (desc->alpha) {
+            dst = desc->dst->plane[3].line[i];
+            if (c->alpToYV12) {
+                c->alpToYV12(dst, src[3], src[1], src[2], srcW, pal);
+            } else if (c->readAlpPlanar) {
+                c->readAlpPlanar(dst, src, srcW, NULL);
+            }
+        }
+    }
+
+    return sliceH;
+}
+
+int ff_init_desc_fmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal)
+{
+    ColorContext * li = av_malloc(sizeof(ColorContext));
+    if (!li)
+        return AVERROR(ENOMEM);
+    li->pal = pal;
+    desc->instance = li;
+
+    desc->alpha = isALPHA(src->fmt) && isALPHA(dst->fmt);
+    desc->src =src;
+    desc->dst = dst;
+    desc->process = &lum_convert;
+
+    return 0;
+}
+
+
+int ff_init_desc_hscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc)
+{
+    FilterContext *li = av_malloc(sizeof(FilterContext));
+    if (!li)
+        return AVERROR(ENOMEM);
+
+    li->filter = filter;
+    li->filter_pos = filter_pos;
+    li->filter_size = filter_size;
+    li->xInc = xInc;
+
+    desc->instance = li;
+
+    desc->alpha = isALPHA(src->fmt) && isALPHA(dst->fmt);
+    desc->src = src;
+    desc->dst = dst;
+
+    desc->process = &lum_h_scale;
+
+    return 0;
+}
+
+static int chr_h_scale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    FilterContext *instance = desc->instance;
+    int srcW = AV_CEIL_RSHIFT(desc->src->width, desc->src->h_chr_sub_sample);
+    int dstW = AV_CEIL_RSHIFT(desc->dst->width, desc->dst->h_chr_sub_sample);
+    int xInc = instance->xInc;
+
+    uint8_t ** src1 = desc->src->plane[1].line;
+    uint8_t ** dst1 = desc->dst->plane[1].line;
+    uint8_t ** src2 = desc->src->plane[2].line;
+    uint8_t ** dst2 = desc->dst->plane[2].line;
+
+    int src_pos1 = sliceY - desc->src->plane[1].sliceY;
+    int dst_pos1 = sliceY - desc->dst->plane[1].sliceY;
+
+    int src_pos2 = sliceY - desc->src->plane[2].sliceY;
+    int dst_pos2 = sliceY - desc->dst->plane[2].sliceY;
+
+    int i;
+    for (i = 0; i < sliceH; ++i) {
+        if (c->hcscale_fast) {
+            c->hcscale_fast(c, (uint16_t*)dst1[dst_pos1+i], (uint16_t*)dst2[dst_pos2+i], dstW, src1[src_pos1+i], src2[src_pos2+i], srcW, xInc);
+        } else {
+            c->hcScale(c, (uint16_t*)dst1[dst_pos1+i], dstW, src1[src_pos1+i], instance->filter, instance->filter_pos, instance->filter_size);
+            c->hcScale(c, (uint16_t*)dst2[dst_pos2+i], dstW, src2[src_pos2+i], instance->filter, instance->filter_pos, instance->filter_size);
+        }
+
+        if (c->chrConvertRange)
+            c->chrConvertRange((uint16_t*)dst1[dst_pos1+i], (uint16_t*)dst2[dst_pos2+i], dstW);
+
+        desc->dst->plane[1].sliceH += 1;
+        desc->dst->plane[2].sliceH += 1;
+    }
+    return sliceH;
+}
+
+static int chr_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    int srcW = AV_CEIL_RSHIFT(desc->src->width, desc->src->h_chr_sub_sample);
+    ColorContext * instance = desc->instance;
+    uint32_t * pal = instance->pal;
+
+    int sp0 = (sliceY - (desc->src->plane[0].sliceY >> desc->src->v_chr_sub_sample)) << desc->src->v_chr_sub_sample;
+    int sp1 = sliceY - desc->src->plane[1].sliceY;
+
+    int i;
+
+    desc->dst->plane[1].sliceY = sliceY;
+    desc->dst->plane[1].sliceH = sliceH;
+    desc->dst->plane[2].sliceY = sliceY;
+    desc->dst->plane[2].sliceH = sliceH;
+
+    for (i = 0; i < sliceH; ++i) {
+        const uint8_t * src[4] = { desc->src->plane[0].line[sp0+i],
+                        desc->src->plane[1].line[sp1+i],
+                        desc->src->plane[2].line[sp1+i],
+                        desc->src->plane[3].line[sp0+i]};
+
+        uint8_t * dst1 = desc->dst->plane[1].line[i];
+        uint8_t * dst2 = desc->dst->plane[2].line[i];
+        if (c->chrToYV12) {
+            c->chrToYV12(dst1, dst2, src[0], src[1], src[2], srcW, pal);
+        } else if (c->readChrPlanar) {
+            c->readChrPlanar(dst1, dst2, src, srcW, c->input_rgb2yuv_table);
+        }
+    }
+    return sliceH;
+}
+
+int ff_init_desc_cfmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal)
+{
+    ColorContext * li = av_malloc(sizeof(ColorContext));
+    if (!li)
+        return AVERROR(ENOMEM);
+    li->pal = pal;
+    desc->instance = li;
+
+    desc->src =src;
+    desc->dst = dst;
+    desc->process = &chr_convert;
+
+    return 0;
+}
+
+int ff_init_desc_chscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc)
+{
+    FilterContext *li = av_malloc(sizeof(FilterContext));
+    if (!li)
+        return AVERROR(ENOMEM);
+
+    li->filter = filter;
+    li->filter_pos = filter_pos;
+    li->filter_size = filter_size;
+    li->xInc = xInc;
+
+    desc->instance = li;
+
+    desc->alpha = isALPHA(src->fmt) && isALPHA(dst->fmt);
+    desc->src = src;
+    desc->dst = dst;
+
+    desc->process = &chr_h_scale;
+
+    return 0;
+}
+
+static int no_chr_scale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    desc->dst->plane[1].sliceY = sliceY + sliceH - desc->dst->plane[1].available_lines;
+    desc->dst->plane[1].sliceH = desc->dst->plane[1].available_lines;
+    desc->dst->plane[2].sliceY = sliceY + sliceH - desc->dst->plane[2].available_lines;
+    desc->dst->plane[2].sliceH = desc->dst->plane[2].available_lines;
+    return 0;
+}
+
+int ff_init_desc_no_chr(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst)
+{
+    desc->src = src;
+    desc->dst = dst;
+    desc->alpha = 0;
+    desc->instance = NULL;
+    desc->process = &no_chr_scale;
+    return 0;
+}
diff --git a/libswscale/input.c b/libswscale/input.c
index 1f04fc27..ac76aee7 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -607,6 +607,33 @@ static void read_ya16be_alpha_c(uint8_t *dst, const uint8_t *src, const uint8_t
         AV_WN16(dst + i * 2, AV_RB16(src + i * 4 + 2));
 }
 
+static void read_ayuv64le_Y_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width,
+                               uint32_t *unused2)
+{
+    int i;
+    for (i = 0; i < width; i++)
+        AV_WN16(dst + i * 2, AV_RL16(src + i * 8 + 2));
+}
+
+
+static void read_ayuv64le_UV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src,
+                               const uint8_t *unused1, int width, uint32_t *unused2)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        AV_WN16(dstU + i * 2, AV_RL16(src + i * 8 + 4));
+        AV_WN16(dstV + i * 2, AV_RL16(src + i * 8 + 6));
+    }
+}
+
+static void read_ayuv64le_A_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused0, const uint8_t *unused1, int width,
+                                uint32_t *unused2)
+{
+    int i;
+    for (i = 0; i < width; i++)
+        AV_WN16(dst + i * 2, AV_RL16(src + i * 8));
+}
+
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src + 1, ...) would have 100% unaligned accesses. */
 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width,
@@ -652,6 +679,46 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
     nvXXtoUV_c(dstV, dstU, src1, width);
 }
 
+static void p010LEToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1,
+                        const uint8_t *unused2, int width, uint32_t *unused)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        AV_WN16(dst + i * 2, AV_RL16(src + i * 2) >> 6);
+    }
+}
+
+static void p010BEToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1,
+                        const uint8_t *unused2, int width, uint32_t *unused)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        AV_WN16(dst + i * 2, AV_RB16(src + i * 2) >> 6);
+    }
+}
+
+static void p010LEToUV_c(uint8_t *dstU, uint8_t *dstV,
+                       const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
+                       int width, uint32_t *unused)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        AV_WN16(dstU + i * 2, AV_RL16(src1 + i * 4 + 0) >> 6);
+        AV_WN16(dstV + i * 2, AV_RL16(src1 + i * 4 + 2) >> 6);
+    }
+}
+
+static void p010BEToUV_c(uint8_t *dstU, uint8_t *dstV,
+                       const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
+                       int width, uint32_t *unused)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        AV_WN16(dstU + i * 2, AV_RB16(src1 + i * 4 + 0) >> 6);
+        AV_WN16(dstV + i * 2, AV_RB16(src1 + i * 4 + 2) >> 6);
+    }
+}
+
 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
 
 static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
@@ -818,6 +885,19 @@ static av_always_inline void planar_rgb16_to_y(uint8_t *_dst, const uint8_t *_sr
     }
 }
 
+static av_always_inline void planar_rgb16_to_a(uint8_t *_dst, const uint8_t *_src[4],
+                                               int width, int bpc, int is_be, int32_t *rgb2yuv)
+{
+    int i;
+    const uint16_t **src = (const uint16_t **)_src;
+    uint16_t *dst        = (uint16_t *)_dst;
+    int shift = bpc < 16 ? bpc : 14;
+
+    for (i = 0; i < width; i++) {
+        dst[i] = rdpx(src[3] + i) << (14 - shift);
+    }
+}
+
 static av_always_inline void planar_rgb16_to_uv(uint8_t *_dstU, uint8_t *_dstV,
                                                 const uint8_t *_src[4], int width,
                                                 int bpc, int is_be, int32_t *rgb2yuv)
@@ -846,6 +926,11 @@ static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *s
 {                                                                                                   \
     planar_rgb16_to_y(dst, src, w, nbits, endian, rgb2yuv);                                         \
 }                                                                                                   \
+static void planar_rgb##nbits##endian_name##_to_a(uint8_t *dst, const uint8_t *src[4],              \
+                                                  int w, int32_t *rgb2yuv)                          \
+{                                                                                                   \
+    planar_rgb16_to_a(dst, src, w, nbits, endian, rgb2yuv);                                         \
+}                                                                                                   \
 static void planar_rgb##nbits##endian_name##_to_uv(uint8_t *dstU, uint8_t *dstV,                    \
                                                    const uint8_t *src[4], int w, int32_t *rgb2yuv)  \
 {                                                                                                   \
@@ -987,6 +1072,15 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
         c->chrToYV12 = bswap16UV_c;
         break;
 #endif
+    case AV_PIX_FMT_AYUV64LE:
+        c->chrToYV12 = read_ayuv64le_UV_c;
+        break;
+    case AV_PIX_FMT_P010LE:
+        c->chrToYV12 = p010LEToUV_c;
+        break;
+    case AV_PIX_FMT_P010BE:
+        c->chrToYV12 = p010BEToUV_c;
+        break;
     }
     if (c->chrSrcHSubSample) {
         switch (srcFormat) {
@@ -1172,6 +1266,7 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
         c->readLumPlanar = planar_rgb14le_to_y;
         break;
     case AV_PIX_FMT_GBRAP16LE:
+        c->readAlpPlanar = planar_rgb16le_to_a;
     case AV_PIX_FMT_GBRP16LE:
         c->readLumPlanar = planar_rgb16le_to_y;
         break;
@@ -1188,6 +1283,7 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
         c->readLumPlanar = planar_rgb14be_to_y;
         break;
     case AV_PIX_FMT_GBRAP16BE:
+        c->readAlpPlanar = planar_rgb16be_to_a;
     case AV_PIX_FMT_GBRP16BE:
         c->readLumPlanar = planar_rgb16be_to_y;
         break;
@@ -1271,6 +1367,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
     case AV_PIX_FMT_YA16BE:
         c->lumToYV12 = read_ya16be_gray_c;
         break;
+    case AV_PIX_FMT_AYUV64LE:
+        c->lumToYV12 = read_ayuv64le_Y_c;
+        break;
     case AV_PIX_FMT_YUYV422:
     case AV_PIX_FMT_YVYU422:
     case AV_PIX_FMT_YA8:
@@ -1369,6 +1468,13 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
         break;
     case AV_PIX_FMT_BGRA64LE:
         c->lumToYV12 = bgr64LEToY_c;
+        break;
+    case AV_PIX_FMT_P010LE:
+        c->lumToYV12 = p010LEToY_c;
+        break;
+    case AV_PIX_FMT_P010BE:
+        c->lumToYV12 = p010BEToY_c;
+        break;
     }
     if (c->alpPixBuf) {
         if (is16BPS(srcFormat) || isNBPS(srcFormat)) {
@@ -1397,6 +1503,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
         case AV_PIX_FMT_YA16BE:
             c->alpToYV12 = read_ya16be_alpha_c;
             break;
+        case AV_PIX_FMT_AYUV64LE:
+            c->alpToYV12 = read_ayuv64le_A_c;
+            break;
         case AV_PIX_FMT_PAL8 :
             c->alpToYV12 = palToA_c;
             break;
diff --git a/libswscale/options.c b/libswscale/options.c
index f08267c6..c1ea3362 100644
--- a/libswscale/options.c
+++ b/libswscale/options.c
@@ -42,9 +42,9 @@ static const AVOption swscale_options[] = {
     { "neighbor",        "nearest neighbor",              0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_POINT          }, INT_MIN, INT_MAX,        VE, "sws_flags" },
     { "area",            "averaging area",                0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_AREA           }, INT_MIN, INT_MAX,        VE, "sws_flags" },
     { "bicublin",        "luma bicubic, chroma bilinear", 0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_BICUBLIN       }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "gauss",           "gaussian",                      0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_GAUSS          }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "gauss",           "Gaussian",                      0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_GAUSS          }, INT_MIN, INT_MAX,        VE, "sws_flags" },
     { "sinc",            "sinc",                          0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_SINC           }, INT_MIN, INT_MAX,        VE, "sws_flags" },
-    { "lanczos",         "lanczos",                       0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_LANCZOS        }, INT_MIN, INT_MAX,        VE, "sws_flags" },
+    { "lanczos",         "Lanczos",                       0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_LANCZOS        }, INT_MIN, INT_MAX,        VE, "sws_flags" },
     { "spline",          "natural bicubic spline",        0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_SPLINE         }, INT_MIN, INT_MAX,        VE, "sws_flags" },
     { "print_info",      "print info",                    0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_PRINT_INFO     }, INT_MIN, INT_MAX,        VE, "sws_flags" },
     { "accurate_rnd",    "accurate rounding",             0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_ACCURATE_RND   }, INT_MIN, INT_MAX,        VE, "sws_flags" },
@@ -59,8 +59,8 @@ static const AVOption swscale_options[] = {
     { "dsth",            "destination height",            OFFSET(dstH),      AV_OPT_TYPE_INT,    { .i64 = 16                 }, 1,       INT_MAX,        VE },
     { "src_format",      "source format",                 OFFSET(srcFormat), AV_OPT_TYPE_INT,    { .i64 = DEFAULT            }, 0,       AV_PIX_FMT_NB - 1, VE },
     { "dst_format",      "destination format",            OFFSET(dstFormat), AV_OPT_TYPE_INT,    { .i64 = DEFAULT            }, 0,       AV_PIX_FMT_NB - 1, VE },
-    { "src_range",       "source range",                  OFFSET(srcRange),  AV_OPT_TYPE_INT,    { .i64 = DEFAULT            }, 0,       1,              VE },
-    { "dst_range",       "destination range",             OFFSET(dstRange),  AV_OPT_TYPE_INT,    { .i64 = DEFAULT            }, 0,       1,              VE },
+    { "src_range",       "source is full range",          OFFSET(srcRange),  AV_OPT_TYPE_BOOL,   { .i64 = DEFAULT            }, 0,       1,              VE },
+    { "dst_range",       "destination is full range",     OFFSET(dstRange),  AV_OPT_TYPE_BOOL,   { .i64 = DEFAULT            }, 0,       1,              VE },
     { "param0",          "scaler param 0",                OFFSET(param[0]),  AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT  }, INT_MIN, INT_MAX,        VE },
     { "param1",          "scaler param 1",                OFFSET(param[1]),  AV_OPT_TYPE_DOUBLE, { .dbl = SWS_PARAM_DEFAULT  }, INT_MIN, INT_MAX,        VE },
 
@@ -75,14 +75,16 @@ static const AVOption swscale_options[] = {
     { "ed",              "error diffusion",               0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_DITHER_ED      }, INT_MIN, INT_MAX,        VE, "sws_dither" },
     { "a_dither",        "arithmetic addition dither",    0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_DITHER_A_DITHER}, INT_MIN, INT_MAX,        VE, "sws_dither" },
     { "x_dither",        "arithmetic xor dither",         0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_DITHER_X_DITHER}, INT_MIN, INT_MAX,        VE, "sws_dither" },
-    { "gamma",           "gamma correct scaling", OFFSET(gamma_flag),        AV_OPT_TYPE_INT,    { .i64  = 0                  }, 0,       INT_MAX,        VE, "gamma" },
-    { "true",            "enable",                        0,                 AV_OPT_TYPE_CONST,  { .i64  = 1                  }, INT_MIN, INT_MAX,        VE, "gamma" },
-    { "false",           "disable",                       0,                 AV_OPT_TYPE_CONST,  { .i64  = 0                  }, INT_MIN, INT_MAX,        VE, "gamma" },
+    { "gamma",           "gamma correct scaling",         OFFSET(gamma_flag),AV_OPT_TYPE_BOOL,   { .i64  = 0                  }, 0,       1,              VE },
+    { "alphablend",      "mode for alpha -> non alpha",   OFFSET(alphablend),AV_OPT_TYPE_INT,    { .i64  = SWS_ALPHA_BLEND_NONE}, 0,       SWS_ALPHA_BLEND_NB-1, VE, "alphablend" },
+    { "none",            "ignore alpha",                  0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_ALPHA_BLEND_NONE}, INT_MIN, INT_MAX,       VE, "alphablend" },
+    { "uniform_color",   "blend onto a uniform color",    0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_ALPHA_BLEND_UNIFORM},INT_MIN, INT_MAX,     VE, "alphablend" },
+    { "checkerboard",    "blend onto a checkerboard",     0,                 AV_OPT_TYPE_CONST,  { .i64  = SWS_ALPHA_BLEND_CHECKERBOARD},INT_MIN, INT_MAX,     VE, "alphablend" },
 
     { NULL }
 };
 
-const AVClass sws_context_class = {
+const AVClass ff_sws_context_class = {
     .class_name = "SWScaler",
     .item_name  = sws_context_to_name,
     .option     = swscale_options,
@@ -92,5 +94,5 @@ const AVClass sws_context_class = {
 
 const AVClass *sws_get_class(void)
 {
-    return &sws_context_class;
+    return &ff_sws_context_class;
 }
diff --git a/libswscale/output.c b/libswscale/output.c
index 6048e2bb..0c763c37 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -313,7 +313,7 @@ static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS
 
 #define accumulate_bit(acc, val) \
     acc <<= 1; \
-    acc |= (val) >= (128 + 110)
+    acc |= (val) >= 234
 #define output_pixel(pos, acc) \
     if (target == AV_PIX_FMT_MONOBLACK) { \
         pos = acc; \
@@ -385,6 +385,7 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
     const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
     int  yalpha1 = 4096 - yalpha;
     int i;
+    av_assert2(yalpha  <= 4096U);
 
     if (c->dither == SWS_DITHER_ED) {
         int err = 0;
@@ -590,6 +591,8 @@ yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
     int  yalpha1 = 4096 - yalpha;
     int uvalpha1 = 4096 - uvalpha;
     int i;
+    av_assert2(yalpha  <= 4096U);
+    av_assert2(uvalpha <= 4096U);
 
     for (i = 0; i < ((dstW + 1) >> 1); i++) {
         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
@@ -684,7 +687,7 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
                        const int16_t *chrFilter, const int32_t **chrUSrc,
                        const int32_t **chrVSrc, int chrFilterSize,
                        const int32_t **alpSrc, uint16_t *dest, int dstW,
-                       int y, enum AVPixelFormat target, int hasAlpha)
+                       int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
 {
     int i;
     int A1 = 0xffff<<14, A2 = 0xffff<<14;
@@ -693,8 +696,8 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
         int j;
         int Y1 = -0x40000000;
         int Y2 = -0x40000000;
-        int U  = -128 << 23; // 19
-        int V  = -128 << 23;
+        int U  = -(128 << 23); // 19
+        int V  = -(128 << 23);
         int R, G, B;
 
         for (j = 0; j < lumFilterSize; j++) {
@@ -744,12 +747,19 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-        output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-        output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
-        output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
-        output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
-        output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
-        dest += 8;
+        if (eightbytes) {
+            output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
+            output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
+            output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
+            output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
+            output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
+            dest += 8;
+        } else {
+            output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
+            output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
+            output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
+            dest += 6;
+        }
     }
 }
 
@@ -758,7 +768,7 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf[2], uint16_t *dest, int dstW,
                        int yalpha, int uvalpha, int y,
-                       enum AVPixelFormat target, int hasAlpha)
+                       enum AVPixelFormat target, int hasAlpha, int eightbytes)
 {
     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
@@ -770,11 +780,14 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
     int i;
     int A1 = 0xffff<<14, A2 = 0xffff<<14;
 
+    av_assert2(yalpha  <= 4096U);
+    av_assert2(uvalpha <= 4096U);
+
     for (i = 0; i < ((dstW + 1) >> 1); i++) {
         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
-        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha + (-128 << 23)) >> 14;
-        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha + (-128 << 23)) >> 14;
+        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha - (128 << 23)) >> 14;
+        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha - (128 << 23)) >> 14;
         int R, G, B;
 
         Y1 -= c->yuv2rgb_y_offset;
@@ -799,12 +812,19 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
         output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
         output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
         output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-        output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-        output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
-        output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
-        output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
-        output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
-        dest += 8;
+        if (eightbytes) {
+            output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
+            output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
+            output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
+            output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
+            output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
+            dest += 8;
+        } else {
+            output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
+            output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
+            output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
+            dest += 6;
+        }
     }
 }
 
@@ -812,7 +832,7 @@ static av_always_inline void
 yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf0, uint16_t *dest, int dstW,
-                       int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
+                       int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
 {
     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     int i;
@@ -822,8 +842,8 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
         for (i = 0; i < ((dstW + 1) >> 1); i++) {
             int Y1 = (buf0[i * 2]    ) >> 2;
             int Y2 = (buf0[i * 2 + 1]) >> 2;
-            int U  = (ubuf0[i] + (-128 << 11)) >> 2;
-            int V  = (vbuf0[i] + (-128 << 11)) >> 2;
+            int U  = (ubuf0[i] - (128 << 11)) >> 2;
+            int V  = (vbuf0[i] - (128 << 11)) >> 2;
             int R, G, B;
 
             Y1 -= c->yuv2rgb_y_offset;
@@ -848,12 +868,19 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-            output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
-            output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
-            output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
-            output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
-            dest += 8;
+            if (eightbytes) {
+                output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
+                output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
+                output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
+                output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
+                output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
+                dest += 8;
+            } else {
+                output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
+                output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
+                output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
+                dest += 6;
+            }
         }
     } else {
         const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
@@ -861,8 +888,8 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
         for (i = 0; i < ((dstW + 1) >> 1); i++) {
             int Y1 = (buf0[i * 2]    ) >> 2;
             int Y2 = (buf0[i * 2 + 1]) >> 2;
-            int U  = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
-            int V  = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
+            int U  = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
+            int V  = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
             int R, G, B;
 
             Y1 -= c->yuv2rgb_y_offset;
@@ -887,58 +914,68 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
             output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
             output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
             output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-            output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
-            output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
-            output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
-            output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
-            dest += 8;
+            if (eightbytes) {
+                output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
+                output_pixel(&dest[4], av_clip_uintp2(R_B + Y2, 30) >> 14);
+                output_pixel(&dest[5], av_clip_uintp2(  G + Y2, 30) >> 14);
+                output_pixel(&dest[6], av_clip_uintp2(B_R + Y2, 30) >> 14);
+                output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
+                dest += 8;
+            } else {
+                output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
+                output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
+                output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
+                dest += 6;
+            }
         }
     }
 }
 
 static av_always_inline void
-yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
+yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
                        const int32_t **lumSrc, int lumFilterSize,
                        const int16_t *chrFilter, const int32_t **chrUSrc,
                        const int32_t **chrVSrc, int chrFilterSize,
                        const int32_t **alpSrc, uint16_t *dest, int dstW,
-                       int y, enum AVPixelFormat target, int hasAlpha)
+                       int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
 {
     int i;
+    int A = 0xffff<<14;
 
-    for (i = 0; i < ((dstW + 1) >> 1); i++) {
+    for (i = 0; i < dstW; i++) {
         int j;
-        int Y1 = -0x40000000;
-        int Y2 = -0x40000000;
-        int U  = -128 << 23; // 19
-        int V  = -128 << 23;
+        int Y  = -0x40000000;
+        int U  = -(128 << 23); // 19
+        int V  = -(128 << 23);
         int R, G, B;
 
         for (j = 0; j < lumFilterSize; j++) {
-            Y1 += lumSrc[j][i * 2]     * (unsigned)lumFilter[j];
-            Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
+            Y += lumSrc[j][i]  * (unsigned)lumFilter[j];
         }
         for (j = 0; j < chrFilterSize; j++) {;
             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
         }
 
+        if (hasAlpha) {
+            A = -0x40000000;
+            for (j = 0; j < lumFilterSize; j++) {
+                A += alpSrc[j][i] * (unsigned)lumFilter[j];
+            }
+            A >>= 1;
+            A += 0x20002000;
+        }
+
         // 8bit: 12+15=27; 16-bit: 12+19=31
-        Y1 >>= 14; // 10
-        Y1 += 0x10000;
-        Y2 >>= 14;
-        Y2 += 0x10000;
+        Y  >>= 14; // 10
+        Y += 0x10000;
         U  >>= 14;
         V  >>= 14;
 
         // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
-        Y1 -= c->yuv2rgb_y_offset;
-        Y2 -= c->yuv2rgb_y_offset;
-        Y1 *= c->yuv2rgb_y_coeff;
-        Y2 *= c->yuv2rgb_y_coeff;
-        Y1 += 1 << 13; // 21
-        Y2 += 1 << 13;
+        Y -= c->yuv2rgb_y_offset;
+        Y *= c->yuv2rgb_y_coeff;
+        Y += 1 << 13; // 21
         // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
 
         R = V * c->yuv2rgb_v2r_coeff;
@@ -946,121 +983,143 @@ yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
         B =                            U * c->yuv2rgb_u2b_coeff;
 
         // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
-        output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
-        output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-        output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-        output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
-        output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
-        output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
-        dest += 6;
+        output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
+        output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
+        output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
+        if (eightbytes) {
+            output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
+            dest += 4;
+        } else {
+            dest += 3;
+        }
     }
 }
 
 static av_always_inline void
-yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
+yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2],
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf[2], uint16_t *dest, int dstW,
                        int yalpha, int uvalpha, int y,
-                       enum AVPixelFormat target, int hasAlpha)
+                       enum AVPixelFormat target, int hasAlpha, int eightbytes)
 {
     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
-                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
+                  *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
+                  *abuf0 = hasAlpha ? abuf[0] : NULL,
+                  *abuf1 = hasAlpha ? abuf[1] : NULL;
     int  yalpha1 = 4096 - yalpha;
     int uvalpha1 = 4096 - uvalpha;
     int i;
+    int A = 0xffff<<14;
 
-    for (i = 0; i < ((dstW + 1) >> 1); i++) {
-        int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
-        int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
-        int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha + (-128 << 23)) >> 14;
-        int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha + (-128 << 23)) >> 14;
+    av_assert2(yalpha  <= 4096U);
+    av_assert2(uvalpha <= 4096U);
+
+    for (i = 0; i < dstW; i++) {
+        int Y  = (buf0[i]     * yalpha1  + buf1[i]     * yalpha) >> 14;
+        int U  = (ubuf0[i]   * uvalpha1 + ubuf1[i]     * uvalpha - (128 << 23)) >> 14;
+        int V  = (vbuf0[i]   * uvalpha1 + vbuf1[i]     * uvalpha - (128 << 23)) >> 14;
         int R, G, B;
 
-        Y1 -= c->yuv2rgb_y_offset;
-        Y2 -= c->yuv2rgb_y_offset;
-        Y1 *= c->yuv2rgb_y_coeff;
-        Y2 *= c->yuv2rgb_y_coeff;
-        Y1 += 1 << 13;
-        Y2 += 1 << 13;
+        Y -= c->yuv2rgb_y_offset;
+        Y *= c->yuv2rgb_y_coeff;
+        Y += 1 << 13;
 
         R = V * c->yuv2rgb_v2r_coeff;
         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
         B =                            U * c->yuv2rgb_u2b_coeff;
 
-        output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
-        output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-        output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-        output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
-        output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
-        output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
-        dest += 6;
+        if (hasAlpha) {
+            A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 1;
+
+            A += 1 << 13;
+        }
+
+        output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
+        output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
+        output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
+        if (eightbytes) {
+            output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
+            dest += 4;
+        } else {
+            dest += 3;
+        }
     }
 }
 
 static av_always_inline void
-yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
+yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
                        const int32_t *ubuf[2], const int32_t *vbuf[2],
                        const int32_t *abuf0, uint16_t *dest, int dstW,
-                       int uvalpha, int y, enum AVPixelFormat target, int hasAlpha)
+                       int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
 {
     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
     int i;
+    int A = 0xffff<<14;
 
     if (uvalpha < 2048) {
-        for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2]    ) >> 2;
-            int Y2 = (buf0[i * 2 + 1]) >> 2;
-            int U  = (ubuf0[i] + (-128 << 11)) >> 2;
-            int V  = (vbuf0[i] + (-128 << 11)) >> 2;
+        for (i = 0; i < dstW; i++) {
+            int Y  = (buf0[i]) >> 2;
+            int U  = (ubuf0[i] - (128 << 11)) >> 2;
+            int V  = (vbuf0[i] - (128 << 11)) >> 2;
             int R, G, B;
 
-            Y1 -= c->yuv2rgb_y_offset;
-            Y2 -= c->yuv2rgb_y_offset;
-            Y1 *= c->yuv2rgb_y_coeff;
-            Y2 *= c->yuv2rgb_y_coeff;
-            Y1 += 1 << 13;
-            Y2 += 1 << 13;
+            Y -= c->yuv2rgb_y_offset;
+            Y *= c->yuv2rgb_y_coeff;
+            Y += 1 << 13;
+
+            if (hasAlpha) {
+                A = abuf0[i] << 11;
+
+                A += 1 << 13;
+            }
 
             R = V * c->yuv2rgb_v2r_coeff;
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
-            output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-            output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-            output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
-            output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
-            dest += 6;
+            output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
+            output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
+            output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
+            if (eightbytes) {
+                output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
+                dest += 4;
+            } else {
+                dest += 3;
+            }
         }
     } else {
         const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
-        for (i = 0; i < ((dstW + 1) >> 1); i++) {
-            int Y1 = (buf0[i * 2]    ) >> 2;
-            int Y2 = (buf0[i * 2 + 1]) >> 2;
-            int U  = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
-            int V  = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
+        int A = 0xffff<<14;
+        for (i = 0; i < dstW; i++) {
+            int Y  = (buf0[i]    ) >> 2;
+            int U  = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
+            int V  = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
             int R, G, B;
 
-            Y1 -= c->yuv2rgb_y_offset;
-            Y2 -= c->yuv2rgb_y_offset;
-            Y1 *= c->yuv2rgb_y_coeff;
-            Y2 *= c->yuv2rgb_y_coeff;
-            Y1 += 1 << 13;
-            Y2 += 1 << 13;
+            Y -= c->yuv2rgb_y_offset;
+            Y *= c->yuv2rgb_y_coeff;
+            Y += 1 << 13;
+
+            if (hasAlpha) {
+                A = abuf0[i] << 11;
+
+                A += 1 << 13;
+            }
 
             R = V * c->yuv2rgb_v2r_coeff;
             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
             B =                            U * c->yuv2rgb_u2b_coeff;
 
-            output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
-            output_pixel(&dest[1], av_clip_uintp2(  G + Y1, 30) >> 14);
-            output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
-            output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
-            output_pixel(&dest[4], av_clip_uintp2(  G + Y2, 30) >> 14);
-            output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
-            dest += 6;
+            output_pixel(&dest[0], av_clip_uintp2(R_B + Y, 30) >> 14);
+            output_pixel(&dest[1], av_clip_uintp2(  G + Y, 30) >> 14);
+            output_pixel(&dest[2], av_clip_uintp2(B_R + Y, 30) >> 14);
+            if (eightbytes) {
+                output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
+                dest += 4;
+            } else {
+                dest += 3;
+            }
         }
     }
 }
@@ -1069,7 +1128,7 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
 #undef r_b
 #undef b_r
 
-#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha) \
+#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \
 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
                         const int16_t **_lumSrc, int lumFilterSize, \
                         const int16_t *chrFilter, const int16_t **_chrUSrc, \
@@ -1084,7 +1143,7 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
     uint16_t *dest = (uint16_t *) _dest; \
     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
-                          alpSrc, dest, dstW, y, fmt, hasAlpha); \
+                          alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \
 } \
  \
 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
@@ -1098,7 +1157,7 @@ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
                   **abuf = (const int32_t **) _abuf; \
     uint16_t *dest = (uint16_t *) _dest; \
     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
-                          dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
+                          dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \
 } \
  \
 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
@@ -1112,21 +1171,34 @@ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
                   *abuf0 = (const int32_t *)  _abuf0; \
     uint16_t *dest = (uint16_t *) _dest; \
     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
-                                  dstW, uvalpha, y, fmt, hasAlpha); \
+                                  dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \
 }
 
-YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, AV_PIX_FMT_RGB48BE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, AV_PIX_FMT_RGB48LE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, AV_PIX_FMT_BGR48BE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, AV_PIX_FMT_BGR48LE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0)
-YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
+
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1)
+YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1)
 
 /*
  * Write out 2 RGB pixels in the target pixel format. This function takes a
@@ -1324,6 +1396,8 @@ yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
     int  yalpha1 = 4096 - yalpha;
     int uvalpha1 = 4096 - uvalpha;
     int i;
+    av_assert2(yalpha  <= 4096U);
+    av_assert2(uvalpha <= 4096U);
 
     for (i = 0; i < ((dstW + 1) >> 1); i++) {
         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
@@ -1666,6 +1740,9 @@ yuv2rgb_full_2_c_template(SwsContext *c, const int16_t *buf[2],
     int err[4] = {0};
     int A = 0; // init to silcene warning
 
+    av_assert2(yalpha  <= 4096U);
+    av_assert2(uvalpha <= 4096U);
+
     if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
        || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
         step = 1;
@@ -1782,7 +1859,7 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
     int i;
     int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrc;
     uint16_t **dest16 = (uint16_t**)dest;
-    int SH = 22 + 7 - desc->comp[0].depth_minus1;
+    int SH = 22 + 8 - desc->comp[0].depth;
     int A = 0; // init to silence warning
 
     for (i = 0; i < dstW; i++) {
@@ -1854,6 +1931,148 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
     }
 }
 
+static void
+yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
+            const int16_t *ubuf[2], const int16_t *vbuf[2],
+            const int16_t *abuf0, uint8_t *dest, int dstW,
+            int uvalpha, int y)
+{
+    int hasAlpha = !!abuf0;
+    int i;
+
+    for (i = 0; i < dstW; i++) {
+        int Y = (buf0[i] + 64) >> 7;
+        int A;
+
+        Y = av_clip_uint8(Y);
+
+        if (hasAlpha) {
+            A = (abuf0[i] + 64) >> 7;
+            if (A & 0x100)
+                A = av_clip_uint8(A);
+        }
+
+        dest[i * 2    ] = Y;
+        dest[i * 2 + 1] = hasAlpha ? A : 255;
+    }
+}
+
+static void
+yuv2ya8_2_c(SwsContext *c, const int16_t *buf[2],
+            const int16_t *ubuf[2], const int16_t *vbuf[2],
+            const int16_t *abuf[2], uint8_t *dest, int dstW,
+            int yalpha, int uvalpha, int y)
+{
+    int hasAlpha = abuf && abuf[0] && abuf[1];
+    const int16_t *buf0  = buf[0],  *buf1  = buf[1],
+                  *abuf0 = hasAlpha ? abuf[0] : NULL,
+                  *abuf1 = hasAlpha ? abuf[1] : NULL;
+    int  yalpha1 = 4096 - yalpha;
+    int i;
+
+    av_assert2(yalpha  <= 4096U);
+
+    for (i = 0; i < dstW; i++) {
+        int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 19;
+        int A;
+
+        Y = av_clip_uint8(Y);
+
+        if (hasAlpha) {
+            A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 19;
+            A = av_clip_uint8(A);
+        }
+
+        dest[i * 2    ] = Y;
+        dest[i * 2 + 1] = hasAlpha ? A : 255;
+    }
+}
+
+static void
+yuv2ya8_X_c(SwsContext *c, const int16_t *lumFilter,
+            const int16_t **lumSrc, int lumFilterSize,
+            const int16_t *chrFilter, const int16_t **chrUSrc,
+            const int16_t **chrVSrc, int chrFilterSize,
+            const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
+{
+    int hasAlpha = !!alpSrc;
+    int i;
+
+    for (i = 0; i < dstW; i++) {
+        int j;
+        int Y = 1 << 18, A = 1 << 18;
+
+        for (j = 0; j < lumFilterSize; j++)
+            Y += lumSrc[j][i] * lumFilter[j];
+
+        Y >>= 19;
+        if (Y  & 0x100)
+            Y = av_clip_uint8(Y);
+
+        if (hasAlpha) {
+            for (j = 0; j < lumFilterSize; j++)
+                A += alpSrc[j][i] * lumFilter[j];
+
+            A >>= 19;
+
+            if (A & 0x100)
+                A = av_clip_uint8(A);
+        }
+
+        dest[2 * i    ] = Y;
+        dest[2 * i + 1] = hasAlpha ? A : 255;
+    }
+}
+
+static void
+yuv2ayuv64le_X_c(SwsContext *c, const int16_t *lumFilter,
+                 const int16_t **_lumSrc, int lumFilterSize,
+                 const int16_t *chrFilter, const int16_t **_chrUSrc,
+                 const int16_t **_chrVSrc, int chrFilterSize,
+                 const int16_t **_alpSrc, uint8_t *dest, int dstW, int y)
+{
+    const int32_t **lumSrc  = (const int32_t **) _lumSrc,
+                  **chrUSrc = (const int32_t **) _chrUSrc,
+                  **chrVSrc = (const int32_t **) _chrVSrc,
+                  **alpSrc  = (const int32_t **) _alpSrc;
+    int hasAlpha = !!alpSrc;
+    int i;
+
+    for (i = 0; i < dstW; i++) {
+        int Y = 1 << 14, U = 1 << 14;
+        int V = 1 << 14, A = 1 << 14;
+        int j;
+
+        Y -= 0x40000000;
+        U -= 0x40000000;
+        V -= 0x40000000;
+        A -= 0x40000000;
+
+        for (j = 0; j < lumFilterSize; j++)
+            Y += lumSrc[j][i] * (unsigned)lumFilter[j];
+
+        for (j = 0; j < chrFilterSize; j++)
+            U += chrUSrc[j][i] * (unsigned)chrFilter[j];
+
+        for (j = 0; j < chrFilterSize; j++)
+            V += chrVSrc[j][i] * (unsigned)chrFilter[j];
+
+        if (hasAlpha)
+            for (j = 0; j < lumFilterSize; j++)
+                A += alpSrc[j][i] * (unsigned)lumFilter[j];
+
+        Y = 0x8000 + av_clip_int16(Y >> 15);
+        U = 0x8000 + av_clip_int16(U >> 15);
+        V = 0x8000 + av_clip_int16(V >> 15);
+        A = 0x8000 + av_clip_int16(A >> 15);
+
+        AV_WL16(dest + 8 * i, hasAlpha ? A : 65535);
+        AV_WL16(dest + 8 * i + 2, Y);
+        AV_WL16(dest + 8 * i + 4, U);
+        AV_WL16(dest + 8 * i + 6, V);
+    }
+}
+
 av_cold void ff_sws_init_output_funcs(SwsContext *c,
                                       yuv2planar1_fn *yuv2plane1,
                                       yuv2planarX_fn *yuv2planeX,
@@ -1870,16 +2089,16 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
         *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
         *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
     } else if (is9_OR_10BPS(dstFormat)) {
-        if (desc->comp[0].depth_minus1 == 8) {
+        if (desc->comp[0].depth == 9) {
             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c  : yuv2plane1_9LE_c;
-        } else if (desc->comp[0].depth_minus1 == 9) {
+        } else if (desc->comp[0].depth == 10) {
             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c  : yuv2planeX_10LE_c;
             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c  : yuv2plane1_10LE_c;
-        } else if (desc->comp[0].depth_minus1 == 11) {
+        } else if (desc->comp[0].depth == 12) {
             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_c  : yuv2planeX_12LE_c;
             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_12BE_c  : yuv2plane1_12LE_c;
-        } else if (desc->comp[0].depth_minus1 == 13) {
+        } else if (desc->comp[0].depth == 14) {
             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_14BE_c  : yuv2planeX_14LE_c;
             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_c  : yuv2plane1_14LE_c;
         } else
@@ -1973,7 +2192,64 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
                 }
 #endif /* !CONFIG_SMALL */
                 break;
-            case AV_PIX_FMT_RGB24:
+        case AV_PIX_FMT_RGBA64LE:
+#if CONFIG_SWSCALE_ALPHA
+            if (c->alpPixBuf) {
+                *yuv2packedX = yuv2rgba64le_full_X_c;
+                *yuv2packed2 = yuv2rgba64le_full_2_c;
+                *yuv2packed1 = yuv2rgba64le_full_1_c;
+            } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+            {
+                *yuv2packedX = yuv2rgbx64le_full_X_c;
+                *yuv2packed2 = yuv2rgbx64le_full_2_c;
+                *yuv2packed1 = yuv2rgbx64le_full_1_c;
+            }
+            break;
+        case AV_PIX_FMT_RGBA64BE:
+#if CONFIG_SWSCALE_ALPHA
+            if (c->alpPixBuf) {
+                *yuv2packedX = yuv2rgba64be_full_X_c;
+                *yuv2packed2 = yuv2rgba64be_full_2_c;
+                *yuv2packed1 = yuv2rgba64be_full_1_c;
+            } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+            {
+                *yuv2packedX = yuv2rgbx64be_full_X_c;
+                *yuv2packed2 = yuv2rgbx64be_full_2_c;
+                *yuv2packed1 = yuv2rgbx64be_full_1_c;
+            }
+            break;
+        case AV_PIX_FMT_BGRA64LE:
+#if CONFIG_SWSCALE_ALPHA
+            if (c->alpPixBuf) {
+                *yuv2packedX = yuv2bgra64le_full_X_c;
+                *yuv2packed2 = yuv2bgra64le_full_2_c;
+                *yuv2packed1 = yuv2bgra64le_full_1_c;
+            } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+            {
+                *yuv2packedX = yuv2bgrx64le_full_X_c;
+                *yuv2packed2 = yuv2bgrx64le_full_2_c;
+                *yuv2packed1 = yuv2bgrx64le_full_1_c;
+            }
+            break;
+        case AV_PIX_FMT_BGRA64BE:
+#if CONFIG_SWSCALE_ALPHA
+            if (c->alpPixBuf) {
+                *yuv2packedX = yuv2bgra64be_full_X_c;
+                *yuv2packed2 = yuv2bgra64be_full_2_c;
+                *yuv2packed1 = yuv2bgra64be_full_1_c;
+            } else
+#endif /* CONFIG_SWSCALE_ALPHA */
+            {
+                *yuv2packedX = yuv2bgrx64be_full_X_c;
+                *yuv2packed2 = yuv2bgrx64be_full_2_c;
+                *yuv2packed1 = yuv2bgrx64be_full_1_c;
+            }
+            break;
+
+        case AV_PIX_FMT_RGB24:
             *yuv2packedX = yuv2rgb24_full_X_c;
             *yuv2packed2 = yuv2rgb24_full_2_c;
             *yuv2packed1 = yuv2rgb24_full_1_c;
@@ -1983,6 +2259,26 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
             *yuv2packed2 = yuv2bgr24_full_2_c;
             *yuv2packed1 = yuv2bgr24_full_1_c;
             break;
+        case AV_PIX_FMT_RGB48LE:
+            *yuv2packedX = yuv2rgb48le_full_X_c;
+            *yuv2packed2 = yuv2rgb48le_full_2_c;
+            *yuv2packed1 = yuv2rgb48le_full_1_c;
+            break;
+        case AV_PIX_FMT_BGR48LE:
+            *yuv2packedX = yuv2bgr48le_full_X_c;
+            *yuv2packed2 = yuv2bgr48le_full_2_c;
+            *yuv2packed1 = yuv2bgr48le_full_1_c;
+            break;
+        case AV_PIX_FMT_RGB48BE:
+            *yuv2packedX = yuv2rgb48be_full_X_c;
+            *yuv2packed2 = yuv2rgb48be_full_2_c;
+            *yuv2packed1 = yuv2rgb48be_full_1_c;
+            break;
+        case AV_PIX_FMT_BGR48BE:
+            *yuv2packedX = yuv2bgr48be_full_X_c;
+            *yuv2packed2 = yuv2bgr48be_full_2_c;
+            *yuv2packed1 = yuv2bgr48be_full_1_c;
+            break;
         case AV_PIX_FMT_BGR4_BYTE:
             *yuv2packedX = yuv2bgr4_byte_full_X_c;
             *yuv2packed2 = yuv2bgr4_byte_full_2_c;
@@ -2221,5 +2517,13 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
         *yuv2packed2 = yuv2uyvy422_2_c;
         *yuv2packedX = yuv2uyvy422_X_c;
         break;
+    case AV_PIX_FMT_YA8:
+        *yuv2packed1 = yuv2ya8_1_c;
+        *yuv2packed2 = yuv2ya8_2_c;
+        *yuv2packedX = yuv2ya8_X_c;
+        break;
+    case AV_PIX_FMT_AYUV64LE:
+        *yuv2packedX = yuv2ayuv64le_X_c;
+        break;
     }
 }
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 25282bf1..638706f0 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -222,6 +222,7 @@ static const vector unsigned char
  * optimized for JPEG decoding.
  */
 
+#if HAVE_BIGENDIAN
 #define vec_unh(x)                                                      \
     (vector signed short)                                               \
         vec_perm(x, (__typeof__(x)) { 0 },                              \
@@ -235,6 +236,10 @@ static const vector unsigned char
                  ((vector unsigned char) {                              \
                      0x10, 0x08, 0x10, 0x09, 0x10, 0x0A, 0x10, 0x0B,    \
                      0x10, 0x0C, 0x10, 0x0D, 0x10, 0x0E, 0x10, 0x0F }))
+#else
+#define vec_unh(x)(vector signed short) vec_mergeh(x,(__typeof__(x)) { 0 })
+#define vec_unl(x)(vector signed short) vec_mergel(x,(__typeof__(x)) { 0 })
+#endif
 
 #define vec_clip_s16(x)                                                 \
     vec_max(vec_min(x, ((vector signed short) {                         \
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 340174fd..f7f8188a 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -130,7 +130,7 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
  * 32-bit C version, and and&add trick by Michael Niedermayer
  */
 
-av_cold void sws_rgb2rgb_init(void)
+av_cold void ff_sws_rgb2rgb_init(void)
 {
     rgb2rgb_init_c();
     if (ARCH_X86)
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 8faebe6a..0645404e 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -164,7 +164,7 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const u
                             int width, int height,
                             int lumStride, int chromStride, int srcStride);
 
-void sws_rgb2rgb_init(void);
+void ff_sws_rgb2rgb_init(void);
 
 void rgb2rgb_init_x86(void);
 
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 1cc28cdd..499d25b2 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -855,7 +855,7 @@ static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                            int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
+    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
 
     for (y = 0; y < height; y++) {
         extract_even_c(src, ydst, width);
@@ -875,7 +875,7 @@ static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                            int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
+    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
 
     for (y = 0; y < height; y++) {
         extract_even_c(src, ydst, width);
@@ -893,7 +893,7 @@ static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                            int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
+    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
 
     for (y = 0; y < height; y++) {
         extract_even_c(src + 1, ydst, width);
@@ -913,7 +913,7 @@ static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                            int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
+    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
 
     for (y = 0; y < height; y++) {
         extract_even_c(src + 1, ydst, width);
diff --git a/libswscale/slice.c b/libswscale/slice.c
new file mode 100644
index 00000000..66fe413a
--- /dev/null
+++ b/libswscale/slice.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_internal.h"
+
+static void free_lines(SwsSlice *s)
+{
+    int i;
+    for (i = 0; i < 2; ++i) {
+        int n = s->plane[i].available_lines;
+        int j;
+        for (j = 0; j < n; ++j) {
+            av_freep(&s->plane[i].line[j]);
+            if (s->is_ring)
+               s->plane[i].line[j+n] = NULL;
+        }
+    }
+
+    for (i = 0; i < 4; ++i)
+        memset(s->plane[i].line, 0, sizeof(uint8_t*) * s->plane[i].available_lines * (s->is_ring ? 3 : 1));
+    s->should_free_lines = 0;
+}
+
+/*
+ slice lines contains extra bytes for vectorial code thus @size
+ is the allocated memory size and @width is the number of pixels
+*/
+static int alloc_lines(SwsSlice *s, int size, int width)
+{
+    int i;
+    int idx[2] = {3, 2};
+
+    s->should_free_lines = 1;
+    s->width = width;
+
+    for (i = 0; i < 2; ++i) {
+        int n = s->plane[i].available_lines;
+        int j;
+        int ii = idx[i];
+
+        av_assert0(n == s->plane[ii].available_lines);
+        for (j = 0; j < n; ++j) {
+            // chroma plane line U and V are expected to be contiguous in memory
+            // by mmx vertical scaler code
+            s->plane[i].line[j] = av_malloc(size * 2 + 32);
+            if (!s->plane[i].line[j]) {
+                free_lines(s);
+                return AVERROR(ENOMEM);
+            }
+            s->plane[ii].line[j] = s->plane[i].line[j] + size + 16;
+            if (s->is_ring) {
+               s->plane[i].line[j+n] = s->plane[i].line[j];
+               s->plane[ii].line[j+n] = s->plane[ii].line[j];
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int alloc_slice(SwsSlice *s, enum AVPixelFormat fmt, int lumLines, int chrLines, int h_sub_sample, int v_sub_sample, int ring)
+{
+    int i;
+    int size[4] = { lumLines,
+                    chrLines,
+                    chrLines,
+                    lumLines };
+
+    s->h_chr_sub_sample = h_sub_sample;
+    s->v_chr_sub_sample = v_sub_sample;
+    s->fmt = fmt;
+    s->is_ring = ring;
+    s->should_free_lines = 0;
+
+    for (i = 0; i < 4; ++i) {
+        int n = size[i] * ( ring == 0 ? 1 : 3);
+        s->plane[i].line = av_mallocz_array(sizeof(uint8_t*), n);
+        if (!s->plane[i].line)
+            return AVERROR(ENOMEM);
+
+        s->plane[i].tmp = ring ? s->plane[i].line + size[i] * 2 : NULL;
+        s->plane[i].available_lines = size[i];
+        s->plane[i].sliceY = 0;
+        s->plane[i].sliceH = 0;
+    }
+    return 0;
+}
+
+static void free_slice(SwsSlice *s)
+{
+    int i;
+    if (s) {
+        if (s->should_free_lines)
+            free_lines(s);
+        for (i = 0; i < 4; ++i) {
+            av_freep(&s->plane[i].line);
+            s->plane[i].tmp = NULL;
+        }
+    }
+}
+
+int ff_rotate_slice(SwsSlice *s, int lum, int chr)
+{
+    int i;
+    if (lum) {
+        for (i = 0; i < 4; i+=3) {
+            int n = s->plane[i].available_lines;
+            int l = lum - s->plane[i].sliceY;
+
+            if (l >= n * 2) {
+                s->plane[i].sliceY += n;
+                s->plane[i].sliceH -= n;
+            }
+        }
+    }
+    if (chr) {
+        for (i = 1; i < 3; ++i) {
+            int n = s->plane[i].available_lines;
+            int l = chr - s->plane[i].sliceY;
+
+            if (l >= n * 2) {
+                s->plane[i].sliceY += n;
+                s->plane[i].sliceH -= n;
+            }
+        }
+    }
+    return 0;
+}
+
+int ff_init_slice_from_src(SwsSlice * s, uint8_t *src[4], int stride[4], int srcW, int lumY, int lumH, int chrY, int chrH, int relative)
+{
+    int i = 0;
+
+    const int start[4] = {lumY,
+                          chrY,
+                          chrY,
+                          lumY};
+
+    const int end[4] = {lumY +lumH,
+                        chrY + chrH,
+                        chrY + chrH,
+                        lumY + lumH};
+
+    const uint8_t *src_[4] = {src[0] + (relative ? 0 : start[0]) * stride[0],
+                              src[1] + (relative ? 0 : start[1]) * stride[0],
+                              src[2] + (relative ? 0 : start[2]) * stride[0],
+                              src[3] + (relative ? 0 : start[3]) * stride[0]};
+
+    s->width = srcW;
+
+    for (i = 0; i < 4; ++i) {
+        int j;
+        int first = s->plane[i].sliceY;
+        int n = s->plane[i].available_lines;
+        int lines = end[i] - start[i];
+        int tot_lines = end[i] - first;
+
+        if (start[i] >= first && n >= tot_lines) {
+            s->plane[i].sliceH = FFMAX(tot_lines, s->plane[i].sliceH);
+            for (j = 0; j < lines; j+= 1)
+                s->plane[i].line[start[i] - first + j] = src_[i] +  j * stride[i];
+        } else {
+            s->plane[i].sliceY = start[i];
+            lines = lines > n ? n : lines;
+            s->plane[i].sliceH = lines;
+            for (j = 0; j < lines; j+= 1)
+                s->plane[i].line[j] = src_[i] +  j * stride[i];
+        }
+
+    }
+
+    return 0;
+}
+
+static void fill_ones(SwsSlice *s, int n, int is16bit)
+{
+    int i;
+    for (i = 0; i < 4; ++i) {
+        int j;
+        int size = s->plane[i].available_lines;
+        for (j = 0; j < size; ++j) {
+            int k;
+            int end = is16bit ? n>>1: n;
+            // fill also one extra element
+            end += 1;
+            if (is16bit)
+                for (k = 0; k < end; ++k)
+                    ((int32_t*)(s->plane[i].line[j]))[k] = 1<<18;
+            else
+                for (k = 0; k < end; ++k)
+                    ((int16_t*)(s->plane[i].line[j]))[k] = 1<<14;
+        }
+    }
+}
+
+int ff_init_filters(SwsContext * c)
+{
+    int i;
+    int index;
+    int num_ydesc;
+    int num_cdesc;
+    int num_vdesc = isPlanarYUV(c->dstFormat) && !isGray(c->dstFormat) ? 2 : 1;
+    int need_lum_conv = c->lumToYV12 || c->readLumPlanar || c->alpToYV12 || c->readAlpPlanar;
+    int need_chr_conv = c->chrToYV12 || c->readChrPlanar;
+    int need_gamma = c->is_internal_gamma;
+    int srcIdx, dstIdx;
+    int dst_stride = FFALIGN(c->dstW * sizeof(int16_t) + 66, 16);
+
+    uint32_t * pal = usePal(c->srcFormat) ? c->pal_yuv : (uint32_t*)c->input_rgb2yuv_table;
+    int res = 0;
+
+    if (c->dstBpc == 16)
+        dst_stride <<= 1;
+
+    num_ydesc = need_lum_conv ? 2 : 1;
+    num_cdesc = need_chr_conv ? 2 : 1;
+
+    c->numSlice = FFMAX(num_ydesc, num_cdesc) + 2;
+    c->numDesc = num_ydesc + num_cdesc + num_vdesc + (need_gamma ? 2 : 0);
+    c->descIndex[0] = num_ydesc + (need_gamma ? 1 : 0);
+    c->descIndex[1] = num_ydesc + num_cdesc + (need_gamma ? 1 : 0);
+
+
+
+    c->desc = av_mallocz_array(sizeof(SwsFilterDescriptor), c->numDesc);
+    if (!c->desc)
+        return AVERROR(ENOMEM);
+    c->slice = av_mallocz_array(sizeof(SwsSlice), c->numSlice);
+
+
+    res = alloc_slice(&c->slice[0], c->srcFormat, c->srcH, c->chrSrcH, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
+    if (res < 0) goto cleanup;
+    for (i = 1; i < c->numSlice-2; ++i) {
+        res = alloc_slice(&c->slice[i], c->srcFormat, c->vLumFilterSize + MAX_LINES_AHEAD, c->vChrFilterSize + MAX_LINES_AHEAD, c->chrSrcHSubSample, c->chrSrcVSubSample, 0);
+        if (res < 0) goto cleanup;
+        res = alloc_lines(&c->slice[i], FFALIGN(c->srcW*2+78, 16), c->srcW);
+        if (res < 0) goto cleanup;
+    }
+    // horizontal scaler output
+    res = alloc_slice(&c->slice[i], c->srcFormat, c->vLumFilterSize + MAX_LINES_AHEAD, c->vChrFilterSize + MAX_LINES_AHEAD, c->chrDstHSubSample, c->chrDstVSubSample, 1);
+    if (res < 0) goto cleanup;
+    res = alloc_lines(&c->slice[i], dst_stride, c->dstW);
+    if (res < 0) goto cleanup;
+
+    fill_ones(&c->slice[i], dst_stride>>1, c->dstBpc == 16);
+
+    // vertical scaler output
+    ++i;
+    res = alloc_slice(&c->slice[i], c->dstFormat, c->dstH, c->chrDstH, c->chrDstHSubSample, c->chrDstVSubSample, 0);
+    if (res < 0) goto cleanup;
+
+    index = 0;
+    srcIdx = 0;
+    dstIdx = 1;
+
+    if (need_gamma) {
+        res = ff_init_gamma_convert(c->desc + index, c->slice + srcIdx, c->inv_gamma);
+        if (res < 0) goto cleanup;
+        ++index;
+    }
+
+    if (need_lum_conv) {
+        res = ff_init_desc_fmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal);
+        if (res < 0) goto cleanup;
+        c->desc[index].alpha = c->alpPixBuf != 0;
+        ++index;
+        srcIdx = dstIdx;
+    }
+
+
+    dstIdx = FFMAX(num_ydesc, num_cdesc);
+    res = ff_init_desc_hscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hLumFilter, c->hLumFilterPos, c->hLumFilterSize, c->lumXInc);
+    if (res < 0) goto cleanup;
+    c->desc[index].alpha = c->alpPixBuf != 0;
+
+
+    ++index;
+    {
+        srcIdx = 0;
+        dstIdx = 1;
+        if (need_chr_conv) {
+            res = ff_init_desc_cfmt_convert(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], pal);
+            if (res < 0) goto cleanup;
+            ++index;
+            srcIdx = dstIdx;
+        }
+
+        dstIdx = FFMAX(num_ydesc, num_cdesc);
+        if (c->needs_hcscale)
+            res = ff_init_desc_chscale(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx], c->hChrFilter, c->hChrFilterPos, c->hChrFilterSize, c->chrXInc);
+        else
+            res = ff_init_desc_no_chr(&c->desc[index], &c->slice[srcIdx], &c->slice[dstIdx]);
+        if (res < 0) goto cleanup;
+    }
+
+    ++index;
+    {
+        srcIdx = c->numSlice - 2;
+        dstIdx = c->numSlice - 1;
+        res = ff_init_vscale(c, c->desc + index, c->slice + srcIdx, c->slice + dstIdx);
+        if (res < 0) goto cleanup;
+    }
+
+    ++index;
+    if (need_gamma) {
+        res = ff_init_gamma_convert(c->desc + index, c->slice + dstIdx, c->gamma);
+        if (res < 0) goto cleanup;
+    }
+
+    return 0;
+
+cleanup:
+    ff_free_filters(c);
+    return res;
+}
+
+int ff_free_filters(SwsContext *c)
+{
+    int i;
+    if (c->desc) {
+        for (i = 0; i < c->numDesc; ++i)
+            av_freep(&c->desc[i].instance);
+        av_freep(&c->desc);
+    }
+
+    if (c->slice) {
+        for (i = 0; i < c->numSlice; ++i)
+            free_slice(&c->slice[i]);
+        av_freep(&c->slice);
+    }
+    return 0;
+}
diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c
index 661ff5b7..4d8d08bd 100644
--- a/libswscale/swscale-test.c
+++ b/libswscale/swscale-test.c
@@ -106,7 +106,11 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
         for (p = 0; p < 4; p++)
             av_freep(&src[p]);
 
-        av_image_fill_linesizes(srcStride, srcFormat, srcW);
+        res = av_image_fill_linesizes(srcStride, srcFormat, srcW);
+        if (res < 0) {
+            fprintf(stderr, "av_image_fill_linesizes failed\n");
+            goto end;
+        }
         for (p = 0; p < 4; p++) {
             srcStride[p] = FFALIGN(srcStride[p], 16);
             if (srcStride[p])
@@ -134,7 +138,12 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
         cur_srcH      = srcH;
     }
 
-    av_image_fill_linesizes(dstStride, dstFormat, dstW);
+    res = av_image_fill_linesizes(dstStride, dstFormat, dstW);
+    if (res < 0) {
+        fprintf(stderr, "av_image_fill_linesizes failed\n");
+        goto end;
+    }
+
     for (i = 0; i < 4; i++) {
         /* Image buffers passed into libswscale can be allocated any way you
          * prefer, as long as they're aligned enough for the architecture, and
@@ -399,7 +408,7 @@ int main(int argc, char **argv)
     for (y = 0; y < H; y++)
         for (x = 0; x < W * 4; x++)
             rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
-    sws_scale(sws, rgb_src, rgb_stride, 0, H, src, stride);
+    sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, src, stride);
     sws_freeContext(sws);
     av_free(rgb_data);
 
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 53120169..8d205770 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -52,6 +52,7 @@ DECLARE_ALIGNED(8, static const uint8_t, sws_pb_64)[8] = {
     64, 64, 64, 64, 64, 64, 64, 64
 };
 
+#ifndef NEW_FILTER
 static void gamma_convert(uint8_t * src[], int width, uint16_t *gamma)
 {
     int i;
@@ -67,6 +68,7 @@ static void gamma_convert(uint8_t * src[], int width, uint16_t *gamma)
         AV_WL16(src1 + i*4 + 2, gamma[b]);
     }
 }
+#endif
 
 static av_always_inline void fillPlane(uint8_t *plane, int stride, int width,
                                        int height, int y, uint8_t val)
@@ -87,10 +89,10 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
     int i;
     int32_t *dst        = (int32_t *) _dst;
     const uint16_t *src = (const uint16_t *) _src;
-    int bits            = desc->comp[0].depth_minus1;
+    int bits            = desc->comp[0].depth - 1;
     int sh              = bits - 4;
 
-    if((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth_minus1<15)
+    if((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16)
         sh= 9;
 
     for (i = 0; i < dstW; i++) {
@@ -113,10 +115,10 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
     int i;
     const uint16_t *src = (const uint16_t *) _src;
-    int sh              = desc->comp[0].depth_minus1;
+    int sh              = desc->comp[0].depth - 1;
 
     if(sh<15)
-        sh= isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : desc->comp[0].depth_minus1;
+        sh= isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
 
     for (i = 0; i < dstW; i++) {
         int j;
@@ -238,6 +240,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
         dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
 }
 
+#ifndef NEW_FILTER
 // *** horizontal scale Y line to temp buffer
 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
                                      const uint8_t *src_in[4],
@@ -309,6 +312,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
     if (c->chrConvertRange)
         c->chrConvertRange(dst1, dst2, dstWidth);
 }
+#endif /* NEW_FILTER */
 
 #define DEBUG_SWSCALE_BUFFERS 0
 #define DEBUG_BUFFERS(...)                      \
@@ -321,35 +325,46 @@ static int swscale(SwsContext *c, const uint8_t *src[],
 {
     /* load a few things into local vars to make the code more readable?
      * and faster */
+#ifndef NEW_FILTER
     const int srcW                   = c->srcW;
+#endif
     const int dstW                   = c->dstW;
     const int dstH                   = c->dstH;
+#ifndef NEW_FILTER
     const int chrDstW                = c->chrDstW;
     const int chrSrcW                = c->chrSrcW;
     const int lumXInc                = c->lumXInc;
     const int chrXInc                = c->chrXInc;
+#endif
     const enum AVPixelFormat dstFormat = c->dstFormat;
     const int flags                  = c->flags;
     int32_t *vLumFilterPos           = c->vLumFilterPos;
     int32_t *vChrFilterPos           = c->vChrFilterPos;
+#ifndef NEW_FILTER
     int32_t *hLumFilterPos           = c->hLumFilterPos;
     int32_t *hChrFilterPos           = c->hChrFilterPos;
     int16_t *hLumFilter              = c->hLumFilter;
     int16_t *hChrFilter              = c->hChrFilter;
     int32_t *lumMmxFilter            = c->lumMmxFilter;
     int32_t *chrMmxFilter            = c->chrMmxFilter;
+#endif
     const int vLumFilterSize         = c->vLumFilterSize;
     const int vChrFilterSize         = c->vChrFilterSize;
+#ifndef NEW_FILTER
     const int hLumFilterSize         = c->hLumFilterSize;
     const int hChrFilterSize         = c->hChrFilterSize;
     int16_t **lumPixBuf              = c->lumPixBuf;
     int16_t **chrUPixBuf             = c->chrUPixBuf;
     int16_t **chrVPixBuf             = c->chrVPixBuf;
+#endif
     int16_t **alpPixBuf              = c->alpPixBuf;
     const int vLumBufSize            = c->vLumBufSize;
     const int vChrBufSize            = c->vChrBufSize;
+#ifndef NEW_FILTER
     uint8_t *formatConvBuffer        = c->formatConvBuffer;
     uint32_t *pal                    = c->pal_yuv;
+    int perform_gamma = c->is_internal_gamma;
+#endif
     yuv2planar1_fn yuv2plane1        = c->yuv2plane1;
     yuv2planarX_fn yuv2planeX        = c->yuv2planeX;
     yuv2interleavedX_fn yuv2nv12cX   = c->yuv2nv12cX;
@@ -358,7 +373,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
     yuv2packedX_fn yuv2packedX       = c->yuv2packedX;
     yuv2anyX_fn yuv2anyX             = c->yuv2anyX;
     const int chrSrcSliceY           =                srcSliceY >> c->chrSrcVSubSample;
-    const int chrSrcSliceH           = FF_CEIL_RSHIFT(srcSliceH,   c->chrSrcVSubSample);
+    const int chrSrcSliceH           = AV_CEIL_RSHIFT(srcSliceH,   c->chrSrcVSubSample);
     int should_dither                = is9_OR_10BPS(c->srcFormat) ||
                                        is16BPS(c->srcFormat);
     int lastDstY;
@@ -369,12 +384,28 @@ static int swscale(SwsContext *c, const uint8_t *src[],
     int chrBufIndex  = c->chrBufIndex;
     int lastInLumBuf = c->lastInLumBuf;
     int lastInChrBuf = c->lastInChrBuf;
-    int perform_gamma = c->is_internal_gamma;
 
+#ifdef NEW_FILTER
+    int lumStart = 0;
+    int lumEnd = c->descIndex[0];
+    int chrStart = lumEnd;
+    int chrEnd = c->descIndex[1];
+    int vStart = chrEnd;
+    int vEnd = c->numDesc;
+    SwsSlice *src_slice = &c->slice[lumStart];
+    SwsSlice *hout_slice = &c->slice[c->numSlice-2];
+    SwsSlice *vout_slice = &c->slice[c->numSlice-1];
+    SwsFilterDescriptor *desc = c->desc;
+
+    int hasLumHoles = 1;
+    int hasChrHoles = 1;
+#endif
 
+#ifndef NEW_FILTER
     if (!usePal(c->srcFormat)) {
         pal = c->input_rgb2yuv_table;
     }
+#endif
 
     if (isPacked(c->srcFormat)) {
         src[0] =
@@ -439,14 +470,40 @@ static int swscale(SwsContext *c, const uint8_t *src[],
     }
     lastDstY = dstY;
 
+#ifdef NEW_FILTER
+    ff_init_vscale_pfn(c, yuv2plane1, yuv2planeX, yuv2nv12cX,
+                   yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, c->use_mmx_vfilter);
+
+    ff_init_slice_from_src(src_slice, (uint8_t**)src, srcStride, c->srcW,
+            srcSliceY, srcSliceH, chrSrcSliceY, chrSrcSliceH, 1);
+
+    ff_init_slice_from_src(vout_slice, (uint8_t**)dst, dstStride, c->dstW,
+            dstY, dstH, dstY >> c->chrDstVSubSample,
+            AV_CEIL_RSHIFT(dstH, c->chrDstVSubSample), 0);
+    if (srcSliceY == 0) {
+        hout_slice->plane[0].sliceY = lastInLumBuf + 1;
+        hout_slice->plane[1].sliceY = lastInChrBuf + 1;
+        hout_slice->plane[2].sliceY = lastInChrBuf + 1;
+        hout_slice->plane[3].sliceY = lastInLumBuf + 1;
+
+        hout_slice->plane[0].sliceH =
+        hout_slice->plane[1].sliceH =
+        hout_slice->plane[2].sliceH =
+        hout_slice->plane[3].sliceH = 0;
+        hout_slice->width = dstW;
+    }
+#endif
+
     for (; dstY < dstH; dstY++) {
         const int chrDstY = dstY >> c->chrDstVSubSample;
+#ifndef NEW_FILTER
         uint8_t *dest[4]  = {
             dst[0] + dstStride[0] * dstY,
             dst[1] + dstStride[1] * chrDstY,
             dst[2] + dstStride[2] * chrDstY,
             (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
         };
+#endif
         int use_mmx_vfilter= c->use_mmx_vfilter;
 
         // First line needed as input
@@ -460,12 +517,36 @@ static int swscale(SwsContext *c, const uint8_t *src[],
         int lastLumSrcY2 = FFMIN(c->srcH,    firstLumSrcY2 + vLumFilterSize) - 1;
         int lastChrSrcY  = FFMIN(c->chrSrcH, firstChrSrcY  + vChrFilterSize) - 1;
         int enough_lines;
+#ifdef NEW_FILTER
+        int i;
+        int posY, cPosY, firstPosY, lastPosY, firstCPosY, lastCPosY;
+#endif
 
         // handle holes (FAST_BILINEAR & weird filters)
-        if (firstLumSrcY > lastInLumBuf)
+        if (firstLumSrcY > lastInLumBuf) {
+#ifdef NEW_FILTER
+            hasLumHoles = lastInLumBuf != firstLumSrcY - 1;
+            if (hasLumHoles) {
+                hout_slice->plane[0].sliceY = firstLumSrcY;
+                hout_slice->plane[3].sliceY = firstLumSrcY;
+                hout_slice->plane[0].sliceH =
+                hout_slice->plane[3].sliceH = 0;
+            }
+#endif
             lastInLumBuf = firstLumSrcY - 1;
-        if (firstChrSrcY > lastInChrBuf)
+        }
+        if (firstChrSrcY > lastInChrBuf) {
+#ifdef NEW_FILTER
+            hasChrHoles = lastInChrBuf != firstChrSrcY - 1;
+            if (hasChrHoles) {
+                hout_slice->plane[1].sliceY = firstChrSrcY;
+                hout_slice->plane[2].sliceY = firstChrSrcY;
+                hout_slice->plane[1].sliceH =
+                hout_slice->plane[2].sliceH = 0;
+            }
+#endif
             lastInChrBuf = firstChrSrcY - 1;
+        }
         av_assert0(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
         av_assert0(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
 
@@ -477,7 +558,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
 
         // Do we have enough lines in this slice to output the dstY line
         enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
-                       lastChrSrcY < FF_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample);
+                       lastChrSrcY < AV_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample);
 
         if (!enough_lines) {
             lastLumSrcY = srcSliceY + srcSliceH - 1;
@@ -486,6 +567,44 @@ static int swscale(SwsContext *c, const uint8_t *src[],
                           lastLumSrcY, lastChrSrcY);
         }
 
+#ifdef NEW_FILTER
+        posY = hout_slice->plane[0].sliceY + hout_slice->plane[0].sliceH;
+        if (posY <= lastLumSrcY && !hasLumHoles) {
+            firstPosY = FFMAX(firstLumSrcY, posY);
+            lastPosY = FFMIN(lastLumSrcY + MAX_LINES_AHEAD, srcSliceY + srcSliceH - 1);
+        } else {
+            firstPosY = lastInLumBuf + 1;
+            lastPosY = lastLumSrcY;
+        }
+
+        cPosY = hout_slice->plane[1].sliceY + hout_slice->plane[1].sliceH;
+        if (cPosY <= lastChrSrcY && !hasChrHoles) {
+            firstCPosY = FFMAX(firstChrSrcY, cPosY);
+            lastCPosY = FFMIN(lastChrSrcY + MAX_LINES_AHEAD, AV_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample) - 1);
+        } else {
+            firstCPosY = lastInChrBuf + 1;
+            lastCPosY = lastChrSrcY;
+        }
+
+        ff_rotate_slice(hout_slice, lastPosY, lastCPosY);
+
+        if (posY < lastLumSrcY + 1) {
+            for (i = lumStart; i < lumEnd; ++i)
+                desc[i].process(c, &desc[i], firstPosY, lastPosY - firstPosY + 1);
+        }
+
+        lumBufIndex += lastLumSrcY - lastInLumBuf;
+        lastInLumBuf = lastLumSrcY;
+
+        if (cPosY < lastChrSrcY + 1) {
+            for (i = chrStart; i < chrEnd; ++i)
+                desc[i].process(c, &desc[i], firstCPosY, lastCPosY - firstCPosY + 1);
+        }
+
+        chrBufIndex += lastChrSrcY - lastInChrBuf;
+        lastInChrBuf = lastChrSrcY;
+
+#else
         // Do horizontal scaling
         while (lastInLumBuf < lastLumSrcY) {
             const uint8_t *src1[4] = {
@@ -535,6 +654,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
                           chrBufIndex, lastInChrBuf);
         }
+#endif
         // wrap buf index around to stay inside the ring buffer
         if (lumBufIndex >= vLumBufSize)
             lumBufIndex -= vLumBufSize;
@@ -557,9 +677,15 @@ static int swscale(SwsContext *c, const uint8_t *src[],
             ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
                                      &yuv2packed1, &yuv2packed2, &yuv2packedX, &yuv2anyX);
             use_mmx_vfilter= 0;
+            ff_init_vscale_pfn(c, yuv2plane1, yuv2planeX, yuv2nv12cX,
+                           yuv2packed1, yuv2packed2, yuv2packedX, yuv2anyX, use_mmx_vfilter);
         }
 
         {
+#ifdef NEW_FILTER
+            for (i = vStart; i < vEnd; ++i)
+                desc[i].process(c, &desc[i], dstY, 1);
+#else
             const int16_t **lumSrcPtr  = (const int16_t **)(void*) lumPixBuf  + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
             const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
@@ -665,6 +791,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
             }
             if (perform_gamma)
                 gamma_convert(dest, dstW, c->gamma);
+#endif
         }
     }
     if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf) {
@@ -674,7 +801,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
         if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
             const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
             fillPlane16(dst[3], dstStride[3], length, height, lastDstY,
-                    1, desc->comp[3].depth_minus1,
+                    1, desc->comp[3].depth,
                     isBE(dstFormat));
         } else
             fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
@@ -784,6 +911,8 @@ static int check_image_pointers(const uint8_t * const data[4], enum AVPixelForma
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
     int i;
 
+    av_assert2(desc);
+
     for (i = 0; i < 4; i++) {
         int plane = desc->comp[i].plane;
         if (!data[plane] || !linesizes[plane])
@@ -919,12 +1048,20 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
     const uint8_t *src2[4];
     uint8_t *dst2[4];
     uint8_t *rgb0_tmp = NULL;
+    int macro_height = isBayer(c->srcFormat) ? 2 : (1 << c->chrSrcVSubSample);
 
     if (!srcStride || !dstStride || !dst || !srcSlice) {
         av_log(c, AV_LOG_ERROR, "One of the input parameters to sws_scale() is NULL, please check the calling code\n");
         return 0;
     }
 
+    if ((srcSliceY & (macro_height-1)) ||
+        ((srcSliceH& (macro_height-1)) && srcSliceY + srcSliceH != c->srcH) ||
+        srcSliceY + srcSliceH > c->srcH) {
+        av_log(c, AV_LOG_ERROR, "Slice parameters %d, %d are invalid\n", srcSliceY, srcSliceH);
+        return AVERROR(EINVAL);
+    }
+
     if (c->gamma_flag && c->cascaded_context[0]) {
 
 
@@ -1149,4 +1286,3 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
     av_free(rgb0_tmp);
     return ret;
 }
-
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 903e1203..da9dd2ea 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -82,22 +82,6 @@ const char *swscale_license(void);
 #define SWS_BITEXACT          0x80000
 #define SWS_ERROR_DIFFUSION  0x800000
 
-#if FF_API_SWS_CPU_CAPS
-/**
- * CPU caps are autodetected now, those flags
- * are only provided for API compatibility.
- */
-#define SWS_CPU_CAPS_MMX      0x80000000
-#define SWS_CPU_CAPS_MMXEXT   0x20000000
-#define SWS_CPU_CAPS_MMX2     0x20000000
-#define SWS_CPU_CAPS_3DNOW    0x40000000
-#define SWS_CPU_CAPS_ALTIVEC  0x10000000
-#if FF_API_ARCH_BFIN
-#define SWS_CPU_CAPS_BFIN     0x01000000
-#endif
-#define SWS_CPU_CAPS_SSE2     0x02000000
-#endif
-
 #define SWS_MAX_REDUCE_CUTOFF 0.002
 
 #define SWS_CS_ITU709         1
@@ -166,6 +150,7 @@ struct SwsContext *sws_alloc_context(void);
  * @return zero or positive value on success, a negative value on
  * error
  */
+av_warn_unused_result
 int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter);
 
 /**
@@ -185,6 +170,12 @@ void sws_freeContext(struct SwsContext *swsContext);
  * @param dstH the height of the destination image
  * @param dstFormat the destination image format
  * @param flags specify which algorithm and options to use for rescaling
+ * @param param extra parameters to tune the used scaler
+ *              For SWS_BICUBIC param[0] and [1] tune the shape of the basis
+ *              function, param[0] tunes f(1) and param[1] f´(1)
+ *              For SWS_GAUSS param[0] tunes the exponent and thus cutoff
+ *              frequency
+ *              For SWS_LANCZOS param[0] tunes the width of the window function
  * @return a pointer to an allocated context, or NULL in case of error
  * @note this function is to be removed after a saner alternative is
  *       written
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 2299aa5c..1e29ec32 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -39,7 +39,8 @@
 
 #define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
 
-#define YUVRGB_TABLE_HEADROOM 256
+#define YUVRGB_TABLE_HEADROOM 512
+#define YUVRGB_TABLE_LUMA_HEADROOM 512
 
 #define MAX_FILTER_SIZE SWS_MAX_FILTER_SIZE
 
@@ -75,6 +76,13 @@ typedef enum SwsDither {
     NB_SWS_DITHER,
 } SwsDither;
 
+typedef enum SwsAlphaBlend {
+    SWS_ALPHA_BLEND_NONE  = 0,
+    SWS_ALPHA_BLEND_UNIFORM,
+    SWS_ALPHA_BLEND_CHECKERBOARD,
+    SWS_ALPHA_BLEND_NB,
+} SwsAlphaBlend;
+
 typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t *src[],
                        int srcStride[], int srcSliceY, int srcSliceH,
                        uint8_t *dst[], int dstStride[]);
@@ -269,6 +277,9 @@ typedef void (*yuv2anyX_fn)(struct SwsContext *c, const int16_t *lumFilter,
                             const int16_t **alpSrc, uint8_t **dest,
                             int dstW, int y);
 
+struct SwsSlice;
+struct SwsFilterDescriptor;
+
 /* This struct should be aligned on at least a 32-byte boundary. */
 typedef struct SwsContext {
     /**
@@ -312,6 +323,7 @@ typedef struct SwsContext {
     uint8_t *cascaded_tmp[4];
     int cascaded1_tmpStride[4];
     uint8_t *cascaded1_tmp[4];
+    int cascaded_mainindex;
 
     double gamma_value;
     int gamma_flag;
@@ -319,6 +331,12 @@ typedef struct SwsContext {
     uint16_t *gamma;
     uint16_t *inv_gamma;
 
+    int numDesc;
+    int descIndex[2];
+    int numSlice;
+    struct SwsSlice *slice;
+    struct SwsFilterDescriptor *desc;
+
     uint32_t pal_yuv[256];
     uint32_t pal_rgb[256];
 
@@ -380,6 +398,7 @@ typedef struct SwsContext {
     uint8_t *chrMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes.
 
     int canMMXEXTBeUsed;
+    int warned_unuseable_bilinear;
 
     int dstY;                     ///< Last destination vertical line output from last slice.
     int flags;                    ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
@@ -611,6 +630,8 @@ typedef struct SwsContext {
     int needs_hcscale; ///< Set if there are chroma planes to be converted.
 
     SwsDither dither;
+
+    SwsAlphaBlend alphablend;
 } SwsContext;
 //FIXME check init (where 0)
 
@@ -633,14 +654,14 @@ static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
     av_assert0(desc);
-    return desc->comp[0].depth_minus1 == 15;
+    return desc->comp[0].depth == 16;
 }
 
 static av_always_inline int is9_OR_10BPS(enum AVPixelFormat pix_fmt)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
     av_assert0(desc);
-    return desc->comp[0].depth_minus1 >= 8 && desc->comp[0].depth_minus1 <= 13;
+    return desc->comp[0].depth >= 9 && desc->comp[0].depth <= 14;
 }
 
 #define isNBPS(x) is9_OR_10BPS(x)
@@ -790,6 +811,8 @@ static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt)
         || (x)==AV_PIX_FMT_YA8       \
         || (x)==AV_PIX_FMT_YA16LE      \
         || (x)==AV_PIX_FMT_YA16BE      \
+        || (x)==AV_PIX_FMT_AYUV64LE    \
+        || (x)==AV_PIX_FMT_AYUV64BE    \
         ||  isRGBinInt(x)           \
         ||  isBGRinInt(x)           \
     )
@@ -845,7 +868,7 @@ extern const uint8_t ff_dither_8x8_220[9][8];
 
 extern const int32_t ff_yuv2rgb_coeffs[8][4];
 
-extern const AVClass sws_context_class;
+extern const AVClass ff_sws_context_class;
 
 /**
  * Set c->swscale to an unscaled converter if one exists for the specific
@@ -888,12 +911,27 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
                             int dstWidth, const uint8_t *src1,
                             const uint8_t *src2, int srcW, int xInc);
 
+/**
+ * Allocate and return an SwsContext.
+ * This is like sws_getContext() but does not perform the init step, allowing
+ * the user to set additional AVOptions.
+ *
+ * @see sws_getContext()
+ */
+struct SwsContext *sws_alloc_set_opts(int srcW, int srcH, enum AVPixelFormat srcFormat,
+                                      int dstW, int dstH, enum AVPixelFormat dstFormat,
+                                      int flags, const double *param);
+
+int ff_sws_alphablendaway(SwsContext *c, const uint8_t *src[],
+                          int srcStride[], int srcSliceY, int srcSliceH,
+                          uint8_t *dst[], int dstStride[]);
+
 static inline void fillPlane16(uint8_t *plane, int stride, int width, int height, int y,
                                int alpha, int bits, const int big_endian)
 {
     int i, j;
     uint8_t *ptr = plane + stride * y;
-    int v = alpha ? 0xFFFF>>(15-bits) : (1<<bits);
+    int v = alpha ? 0xFFFF>>(16-bits) : (1<<(bits-1));
     for (i = 0; i < height; i++) {
 #define FILL(wfunc) \
         for (j = 0; j < width; j++) {\
@@ -908,4 +946,97 @@ static inline void fillPlane16(uint8_t *plane, int stride, int width, int height
     }
 }
 
+#define MAX_SLICE_PLANES 4
+
+/// Slice plane
+typedef struct SwsPlane
+{
+    int available_lines;    ///< max number of lines that can be hold by this plane
+    int sliceY;             ///< index of first line
+    int sliceH;             ///< number of lines
+    uint8_t **line;         ///< line buffer
+    uint8_t **tmp;          ///< Tmp line buffer used by mmx code
+} SwsPlane;
+
+/**
+ * Struct which defines a slice of an image to be scaled or a output for
+ * a scaled slice.
+ * A slice can also be used as intermediate ring buffer for scaling steps.
+ */
+typedef struct SwsSlice
+{
+    int width;              ///< Slice line width
+    int h_chr_sub_sample;   ///< horizontal chroma subsampling factor
+    int v_chr_sub_sample;   ///< vertical chroma subsampling factor
+    int is_ring;            ///< flag to identify if this slice is a ring buffer
+    int should_free_lines;  ///< flag to identify if there are dynamic allocated lines
+    enum AVPixelFormat fmt; ///< planes pixel format
+    SwsPlane plane[MAX_SLICE_PLANES];   ///< color planes
+} SwsSlice;
+
+/**
+ * Struct which holds all necessary data for processing a slice.
+ * A processing step can be a color conversion or horizontal/vertical scaling.
+ */
+typedef struct SwsFilterDescriptor
+{
+    SwsSlice *src;  ///< Source slice
+    SwsSlice *dst;  ///< Output slice
+
+    int alpha;      ///< Flag for processing alpha channel
+    void *instance; ///< Filter instance data
+
+    /// Function for processing input slice sliceH lines starting from line sliceY
+    int (*process)(SwsContext *c, struct SwsFilterDescriptor *desc, int sliceY, int sliceH);
+} SwsFilterDescriptor;
+
+// warp input lines in the form (src + width*i + j) to slice format (line[i][j])
+// relative=true means first line src[x][0] otherwise first line is src[x][lum/crh Y]
+int ff_init_slice_from_src(SwsSlice * s, uint8_t *src[4], int stride[4], int srcW, int lumY, int lumH, int chrY, int chrH, int relative);
+
+// Initialize scaler filter descriptor chain
+int ff_init_filters(SwsContext *c);
+
+// Free all filter data
+int ff_free_filters(SwsContext *c);
+
+/*
+ function for applying ring buffer logic into slice s
+ It checks if the slice can hold more @lum lines, if yes
+ do nothing otherwise remove @lum least used lines.
+ It applies the same procedure for @chr lines.
+*/
+int ff_rotate_slice(SwsSlice *s, int lum, int chr);
+
+/// initializes gamma conversion descriptor
+int ff_init_gamma_convert(SwsFilterDescriptor *desc, SwsSlice * src, uint16_t *table);
+
+/// initializes lum pixel format conversion descriptor
+int ff_init_desc_fmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal);
+
+/// initializes lum horizontal scaling descriptor
+int ff_init_desc_hscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc);
+
+/// initializes chr pixel format conversion descriptor
+int ff_init_desc_cfmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal);
+
+/// initializes chr horizontal scaling descriptor
+int ff_init_desc_chscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc);
+
+int ff_init_desc_no_chr(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst);
+
+/// initializes vertical scaling descriptors
+int ff_init_vscale(SwsContext *c, SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst);
+
+/// setup vertical scaler functions
+void ff_init_vscale_pfn(SwsContext *c, yuv2planar1_fn yuv2plane1, yuv2planarX_fn yuv2planeX,
+    yuv2interleavedX_fn yuv2nv12cX, yuv2packed1_fn yuv2packed1, yuv2packed2_fn yuv2packed2,
+    yuv2packedX_fn yuv2packedX, yuv2anyX_fn yuv2anyX, int use_mmx);
+
+//number of extra lines to process
+#define MAX_LINES_AHEAD 4
+
+// enable use of refactored scaler code
+#define NEW_FILTER
+
 #endif /* SWSCALE_SWSCALE_INTERNAL_H */
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 1dc42c81..74f34674 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -554,7 +554,7 @@ static int Rgb16ToPlanarRgb16Wrapper(SwsContext *c, const uint8_t *src[],
     int stride1023[] = { dstStride[1], dstStride[0], dstStride[2], dstStride[3] };
     const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat);
     const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat);
-    int bpc = dst_format->comp[0].depth_minus1 + 1;
+    int bpc = dst_format->comp[0].depth;
     int alpha = src_format->flags & AV_PIX_FMT_FLAG_ALPHA;
     int swap = 0;
     if ( HAVE_BIGENDIAN && !(src_format->flags & AV_PIX_FMT_FLAG_BE) ||
@@ -725,7 +725,7 @@ static int planarRgb16ToRgb16Wrapper(SwsContext *c, const uint8_t *src[],
     int stride201[] = { srcStride[2], srcStride[0], srcStride[1], srcStride[3] };
     const AVPixFmtDescriptor *src_format = av_pix_fmt_desc_get(c->srcFormat);
     const AVPixFmtDescriptor *dst_format = av_pix_fmt_desc_get(c->dstFormat);
-    int bits_per_sample = src_format->comp[0].depth_minus1 + 1;
+    int bits_per_sample = src_format->comp[0].depth;
     int swap = 0;
     if ( HAVE_BIGENDIAN && !(src_format->flags & AV_PIX_FMT_FLAG_BE) ||
         !HAVE_BIGENDIAN &&   src_format->flags & AV_PIX_FMT_FLAG_BE)
@@ -1059,6 +1059,8 @@ static int bayer_to_rgb24_wrapper(SwsContext *c, const uint8_t* src[], int srcSt
     default: return 0;
     }
 
+    av_assert0(srcSliceH > 1);
+
     copy(srcPtr, srcStride[0], dstPtr, dstStride[0], c->srcW);
     srcPtr += 2 * srcStride[0];
     dstPtr += 2 * dstStride[0];
@@ -1069,7 +1071,10 @@ static int bayer_to_rgb24_wrapper(SwsContext *c, const uint8_t* src[], int srcSt
         dstPtr += 2 * dstStride[0];
     }
 
-    copy(srcPtr, srcStride[0], dstPtr, dstStride[0], c->srcW);
+    if (i + 1 == srcSliceH) {
+        copy(srcPtr, -srcStride[0], dstPtr, -dstStride[0], c->srcW);
+    } else if (i < srcSliceH)
+        copy(srcPtr, srcStride[0], dstPtr, dstStride[0], c->srcW);
     return srcSliceH;
 }
 
@@ -1105,6 +1110,8 @@ static int bayer_to_yv12_wrapper(SwsContext *c, const uint8_t* src[], int srcStr
     default: return 0;
     }
 
+    av_assert0(srcSliceH > 1);
+
     copy(srcPtr, srcStride[0], dstY, dstU, dstV, dstStride[0], c->srcW, c->input_rgb2yuv_table);
     srcPtr += 2 * srcStride[0];
     dstY   += 2 * dstStride[0];
@@ -1119,7 +1126,10 @@ static int bayer_to_yv12_wrapper(SwsContext *c, const uint8_t* src[], int srcStr
         dstV   +=     dstStride[1];
     }
 
-    copy(srcPtr, srcStride[0], dstY, dstU, dstV, dstStride[0], c->srcW, c->input_rgb2yuv_table);
+    if (i + 1 == srcSliceH) {
+        copy(srcPtr, -srcStride[0], dstY, dstU, dstV, -dstStride[0], c->srcW, c->input_rgb2yuv_table);
+    } else if (i < srcSliceH)
+        copy(srcPtr, srcStride[0], dstY, dstU, dstV, dstStride[0], c->srcW, c->input_rgb2yuv_table);
     return srcSliceH;
 }
 
@@ -1403,9 +1413,9 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
     const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat);
     int plane, i, j;
     for (plane = 0; plane < 4; plane++) {
-        int length = (plane == 0 || plane == 3) ? c->srcW  : FF_CEIL_RSHIFT(c->srcW,   c->chrDstHSubSample);
-        int y =      (plane == 0 || plane == 3) ? srcSliceY: FF_CEIL_RSHIFT(srcSliceY, c->chrDstVSubSample);
-        int height = (plane == 0 || plane == 3) ? srcSliceH: FF_CEIL_RSHIFT(srcSliceH, c->chrDstVSubSample);
+        int length = (plane == 0 || plane == 3) ? c->srcW  : AV_CEIL_RSHIFT(c->srcW,   c->chrDstHSubSample);
+        int y =      (plane == 0 || plane == 3) ? srcSliceY: AV_CEIL_RSHIFT(srcSliceY, c->chrDstVSubSample);
+        int height = (plane == 0 || plane == 3) ? srcSliceH: AV_CEIL_RSHIFT(srcSliceH, c->chrDstVSubSample);
         const uint8_t *srcPtr = src[plane];
         uint8_t *dstPtr = dst[plane] + dstStride[plane] * y;
         int shiftonly = plane == 1 || plane == 2 || (!c->srcRange && plane == 0);
@@ -1417,7 +1427,7 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
         if (!src[plane] || (plane == 1 && !src[2])) {
             if (is16BPS(c->dstFormat) || isNBPS(c->dstFormat)) {
                 fillPlane16(dst[plane], dstStride[plane], length, height, y,
-                        plane == 3, desc_dst->comp[plane].depth_minus1,
+                        plane == 3, desc_dst->comp[plane].depth,
                         isBE(c->dstFormat));
             } else {
                 fillPlane(dst[plane], dstStride[plane], length, height, y,
@@ -1427,8 +1437,8 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
             if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat)
                || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat))
             ) {
-                const int src_depth = desc_src->comp[plane].depth_minus1 + 1;
-                const int dst_depth = desc_dst->comp[plane].depth_minus1 + 1;
+                const int src_depth = desc_src->comp[plane].depth;
+                const int dst_depth = desc_dst->comp[plane].depth;
                 const uint16_t *srcPtr2 = (const uint16_t *) srcPtr;
                 uint16_t *dstPtr2 = (uint16_t*)dstPtr;
 
@@ -1543,7 +1553,7 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
             } else {
                 if (is16BPS(c->srcFormat) && is16BPS(c->dstFormat))
                     length *= 2;
-                else if (!desc_src->comp[0].depth_minus1)
+                else if (desc_src->comp[0].depth == 1)
                     length >>= 3; // monowhite/black
                 for (i = 0; i < height; i++) {
                     memcpy(dstPtr, srcPtr, length);
@@ -1648,7 +1658,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
          dstFormat == AV_PIX_FMT_BGRA64LE || dstFormat == AV_PIX_FMT_BGRA64BE))
         c->swscale = planarRgb16ToRgb16Wrapper;
 
-    if (av_pix_fmt_desc_get(srcFormat)->comp[0].depth_minus1 == 7 &&
+    if (av_pix_fmt_desc_get(srcFormat)->comp[0].depth == 8 &&
         isPackedRGB(srcFormat) && dstFormat == AV_PIX_FMT_GBRP)
         c->swscale = rgbToPlanarRgbWrapper;
 
@@ -1676,6 +1686,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_BGRA64) ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GRAY16) ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_YA16)   ||
+        IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_AYUV64) ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP9)  ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP10) ||
         IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, AV_PIX_FMT_GBRP12) ||
@@ -1751,7 +1762,9 @@ void ff_get_unscaled_swscale(SwsContext *c)
          c->chrDstHSubSample == c->chrSrcHSubSample &&
          c->chrDstVSubSample == c->chrSrcVSubSample &&
          dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
-         srcFormat != AV_PIX_FMT_NV12 && srcFormat != AV_PIX_FMT_NV21))
+         dstFormat != AV_PIX_FMT_P010LE && dstFormat != AV_PIX_FMT_P010BE &&
+         srcFormat != AV_PIX_FMT_NV12 && srcFormat != AV_PIX_FMT_NV21 &&
+         srcFormat != AV_PIX_FMT_P010LE && srcFormat != AV_PIX_FMT_P010BE))
     {
         if (isPacked(c->srcFormat))
             c->swscale = packedCopyWrapper;
@@ -1761,8 +1774,8 @@ void ff_get_unscaled_swscale(SwsContext *c)
 
     if (ARCH_PPC)
         ff_get_unscaled_swscale_ppc(c);
-//     if (ARCH_ARM)
-//         ff_get_unscaled_swscale_arm(c);
+     if (ARCH_ARM)
+         ff_get_unscaled_swscale_arm(c);
 }
 
 /* Convert the palette to the same packed 32-bit format as the palette */
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 63fb05cc..0beb7538 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -20,6 +20,7 @@
 
 #include "config.h"
 
+#define _DEFAULT_SOURCE
 #define _SVID_SOURCE // needed for MAP_ANONYMOUS
 #define _DARWIN_C_SOURCE // needed for MAP_ANON
 #include <inttypes.h>
@@ -44,6 +45,7 @@
 #include "libavutil/cpu.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/libm.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
@@ -166,7 +168,7 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
     [AV_PIX_FMT_RGB444BE]    = { 1, 1 },
     [AV_PIX_FMT_BGR444LE]    = { 1, 1 },
     [AV_PIX_FMT_BGR444BE]    = { 1, 1 },
-    [AV_PIX_FMT_YA8]         = { 1, 0 },
+    [AV_PIX_FMT_YA8]         = { 1, 1 },
     [AV_PIX_FMT_YA16BE]      = { 1, 0 },
     [AV_PIX_FMT_YA16LE]      = { 1, 0 },
     [AV_PIX_FMT_BGR48BE]     = { 1, 1 },
@@ -225,6 +227,9 @@ static const FormatEntry format_entries[AV_PIX_FMT_NB] = {
     [AV_PIX_FMT_BAYER_GRBG16BE] = { 1, 0 },
     [AV_PIX_FMT_XYZ12BE]     = { 1, 1, 1 },
     [AV_PIX_FMT_XYZ12LE]     = { 1, 1, 1 },
+    [AV_PIX_FMT_AYUV64LE]    = { 1, 1},
+    [AV_PIX_FMT_P010LE]      = { 1, 0 },
+    [AV_PIX_FMT_P010BE]      = { 1, 0 },
 };
 
 int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
@@ -384,7 +389,7 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
 
         xDstInSrc = ((dstPos*(int64_t)xInc)>>7) - ((srcPos*0x10000LL)>>7);
         for (i = 0; i < dstW; i++) {
-            int xx = (xDstInSrc - ((int64_t)(filterSize - 2) << 16)) / (1 << 17);
+            int xx = (xDstInSrc - (filterSize - 2) * (1LL<<16)) / (1 << 17);
             int j;
             (*filterPos)[i] = xx;
             for (j = 0; j < filterSize; j++) {
@@ -449,7 +454,7 @@ static av_cold int initFilter(int16_t **outFilter, int32_t **filterPos,
                     coeff *= fone >> (30 + 16);
                 } else if (flags & SWS_GAUSS) {
                     double p = param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
-                    coeff = (pow(2.0, -p * floatd * floatd)) * fone;
+                    coeff = exp2(-p * floatd * floatd) * fone;
                 } else if (flags & SWS_SINC) {
                     coeff = (d ? sin(floatd * M_PI) / (floatd * M_PI) : 1.0) * fone;
                 } else if (flags & SWS_LANCZOS) {
@@ -829,8 +834,6 @@ int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
     const AVPixFmtDescriptor *desc_dst;
     const AVPixFmtDescriptor *desc_src;
     int need_reinit = 0;
-    memmove(c->srcColorspaceTable, inv_table, sizeof(int) * 4);
-    memmove(c->dstColorspaceTable, table, sizeof(int) * 4);
 
     handle_formats(c);
     desc_dst = av_pix_fmt_desc_get(c->dstFormat);
@@ -841,11 +844,24 @@ int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
     if(!isYUV(c->srcFormat) && !isGray(c->srcFormat))
         srcRange = 0;
 
+    if (c->srcRange != srcRange ||
+        c->dstRange != dstRange ||
+        c->brightness != brightness ||
+        c->contrast   != contrast ||
+        c->saturation != saturation ||
+        memcmp(c->srcColorspaceTable, inv_table, sizeof(int) * 4) ||
+        memcmp(c->dstColorspaceTable,     table, sizeof(int) * 4)
+    )
+        need_reinit = 1;
+
+    memmove(c->srcColorspaceTable, inv_table, sizeof(int) * 4);
+    memmove(c->dstColorspaceTable, table, sizeof(int) * 4);
+
+
+
     c->brightness = brightness;
     c->contrast   = contrast;
     c->saturation = saturation;
-    if (c->srcRange != srcRange || c->dstRange != dstRange)
-        need_reinit = 1;
     c->srcRange   = srcRange;
     c->dstRange   = dstRange;
 
@@ -854,12 +870,83 @@ int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
     if (need_reinit && (c->srcBpc == 8 || !isYUV(c->srcFormat)))
         ff_sws_init_range_convert(c);
 
-    if ((isYUV(c->dstFormat) || isGray(c->dstFormat)) && (isYUV(c->srcFormat) || isGray(c->srcFormat)))
-        return -1;
-
     c->dstFormatBpp = av_get_bits_per_pixel(desc_dst);
     c->srcFormatBpp = av_get_bits_per_pixel(desc_src);
 
+    if (c->cascaded_context[c->cascaded_mainindex])
+        return sws_setColorspaceDetails(c->cascaded_context[c->cascaded_mainindex],inv_table, srcRange,table, dstRange, brightness,  contrast, saturation);
+
+    if (!need_reinit)
+        return 0;
+
+    if ((isYUV(c->dstFormat) || isGray(c->dstFormat)) && (isYUV(c->srcFormat) || isGray(c->srcFormat))) {
+        if (!c->cascaded_context[0] &&
+            memcmp(c->dstColorspaceTable, c->srcColorspaceTable, sizeof(int) * 4) &&
+            c->srcW && c->srcH && c->dstW && c->dstH) {
+            enum AVPixelFormat tmp_format;
+            int tmp_width, tmp_height;
+            int srcW = c->srcW;
+            int srcH = c->srcH;
+            int dstW = c->dstW;
+            int dstH = c->dstH;
+            int ret;
+            av_log(c, AV_LOG_VERBOSE, "YUV color matrix differs for YUV->YUV, using intermediate RGB to convert\n");
+
+            if (isNBPS(c->dstFormat) || is16BPS(c->dstFormat)) {
+                if (isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) {
+                    tmp_format = AV_PIX_FMT_BGRA64;
+                } else {
+                    tmp_format = AV_PIX_FMT_BGR48;
+                }
+            } else {
+                if (isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) {
+                    tmp_format = AV_PIX_FMT_BGRA;
+                } else {
+                    tmp_format = AV_PIX_FMT_BGR24;
+                }
+            }
+
+            if (srcW*srcH > dstW*dstH) {
+                tmp_width  = dstW;
+                tmp_height = dstH;
+            } else {
+                tmp_width  = srcW;
+                tmp_height = srcH;
+            }
+
+            ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride,
+                                tmp_width, tmp_height, tmp_format, 64);
+            if (ret < 0)
+                return ret;
+
+            c->cascaded_context[0] = sws_alloc_set_opts(srcW, srcH, c->srcFormat,
+                                                        tmp_width, tmp_height, tmp_format,
+                                                        c->flags, c->param);
+            if (!c->cascaded_context[0])
+                return -1;
+
+            c->cascaded_context[0]->alphablend = c->alphablend;
+            ret = sws_init_context(c->cascaded_context[0], NULL , NULL);
+            if (ret < 0)
+                return ret;
+            //we set both src and dst depending on that the RGB side will be ignored
+            sws_setColorspaceDetails(c->cascaded_context[0], inv_table,
+                                     srcRange, table, dstRange,
+                                     brightness, contrast, saturation);
+
+            c->cascaded_context[1] = sws_getContext(tmp_width, tmp_height, tmp_format,
+                                                    dstW, dstH, c->dstFormat,
+                                                    c->flags, NULL, NULL, c->param);
+            if (!c->cascaded_context[1])
+                return -1;
+            sws_setColorspaceDetails(c->cascaded_context[1], inv_table,
+                                     srcRange, table, dstRange,
+                                     0, 1 << 16, 1 << 16);
+            return 0;
+        }
+        return -1;
+    }
+
     if (!isYUV(c->dstFormat) && !isGray(c->dstFormat)) {
         ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness,
                                  contrast, saturation);
@@ -912,8 +999,11 @@ static int handle_jpeg(enum AVPixelFormat *format)
         *format = AV_PIX_FMT_YUV440P;
         return 1;
     case AV_PIX_FMT_GRAY8:
+    case AV_PIX_FMT_YA8:
     case AV_PIX_FMT_GRAY16LE:
     case AV_PIX_FMT_GRAY16BE:
+    case AV_PIX_FMT_YA16BE:
+    case AV_PIX_FMT_YA16LE:
         return 1;
     default:
         return 0;
@@ -957,7 +1047,7 @@ SwsContext *sws_alloc_context(void)
     av_assert0(offsetof(SwsContext, redDither) + DITHER32_INT == offsetof(SwsContext, dither32));
 
     if (c) {
-        c->av_class = &sws_context_class;
+        c->av_class = &ff_sws_context_class;
         av_opt_set_defaults(c);
     }
 
@@ -978,6 +1068,58 @@ static uint16_t * alloc_gamma_tbl(double e)
     return tbl;
 }
 
+static enum AVPixelFormat alphaless_fmt(enum AVPixelFormat fmt)
+{
+    switch(fmt) {
+    case AV_PIX_FMT_ARGB:       return AV_PIX_FMT_RGB24;
+    case AV_PIX_FMT_RGBA:       return AV_PIX_FMT_RGB24;
+    case AV_PIX_FMT_ABGR:       return AV_PIX_FMT_BGR24;
+    case AV_PIX_FMT_BGRA:       return AV_PIX_FMT_BGR24;
+    case AV_PIX_FMT_YA8:        return AV_PIX_FMT_GRAY8;
+
+    case AV_PIX_FMT_YUVA420P:   return AV_PIX_FMT_YUV420P;
+    case AV_PIX_FMT_YUVA422P:   return AV_PIX_FMT_YUV422P;
+    case AV_PIX_FMT_YUVA444P:           return AV_PIX_FMT_YUV444P;
+
+    case AV_PIX_FMT_GBRAP:              return AV_PIX_FMT_GBRP;
+
+    case AV_PIX_FMT_GBRAP16LE:          return AV_PIX_FMT_GBRP16;
+    case AV_PIX_FMT_GBRAP16BE:          return AV_PIX_FMT_GBRP16;
+
+    case AV_PIX_FMT_RGBA64LE:   return AV_PIX_FMT_RGB48;
+    case AV_PIX_FMT_RGBA64BE:   return AV_PIX_FMT_RGB48;
+    case AV_PIX_FMT_BGRA64LE:   return AV_PIX_FMT_BGR48;
+    case AV_PIX_FMT_BGRA64BE:   return AV_PIX_FMT_BGR48;
+
+    case AV_PIX_FMT_YA16BE:             return AV_PIX_FMT_GRAY16;
+    case AV_PIX_FMT_YA16LE:             return AV_PIX_FMT_GRAY16;
+
+    case AV_PIX_FMT_YUVA420P9BE:        return AV_PIX_FMT_YUV420P9;
+    case AV_PIX_FMT_YUVA422P9BE:        return AV_PIX_FMT_YUV422P9;
+    case AV_PIX_FMT_YUVA444P9BE:        return AV_PIX_FMT_YUV444P9;
+    case AV_PIX_FMT_YUVA420P9LE:        return AV_PIX_FMT_YUV420P9;
+    case AV_PIX_FMT_YUVA422P9LE:        return AV_PIX_FMT_YUV422P9;
+    case AV_PIX_FMT_YUVA444P9LE:        return AV_PIX_FMT_YUV444P9;
+    case AV_PIX_FMT_YUVA420P10BE:       return AV_PIX_FMT_YUV420P10;
+    case AV_PIX_FMT_YUVA422P10BE:       return AV_PIX_FMT_YUV422P10;
+    case AV_PIX_FMT_YUVA444P10BE:       return AV_PIX_FMT_YUV444P10;
+    case AV_PIX_FMT_YUVA420P10LE:       return AV_PIX_FMT_YUV420P10;
+    case AV_PIX_FMT_YUVA422P10LE:       return AV_PIX_FMT_YUV422P10;
+    case AV_PIX_FMT_YUVA444P10LE:       return AV_PIX_FMT_YUV444P10;
+    case AV_PIX_FMT_YUVA420P16BE:       return AV_PIX_FMT_YUV420P16;
+    case AV_PIX_FMT_YUVA422P16BE:       return AV_PIX_FMT_YUV422P16;
+    case AV_PIX_FMT_YUVA444P16BE:       return AV_PIX_FMT_YUV444P16;
+    case AV_PIX_FMT_YUVA420P16LE:       return AV_PIX_FMT_YUV420P16;
+    case AV_PIX_FMT_YUVA422P16LE:       return AV_PIX_FMT_YUV422P16;
+    case AV_PIX_FMT_YUVA444P16LE:       return AV_PIX_FMT_YUV444P16;
+
+//     case AV_PIX_FMT_AYUV64LE:
+//     case AV_PIX_FMT_AYUV64BE:
+//     case AV_PIX_FMT_PAL8:
+    default: return AV_PIX_FMT_NONE;
+    }
+}
+
 av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                              SwsFilter *dstFilter)
 {
@@ -1002,7 +1144,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
     flags     = c->flags;
     emms_c();
     if (!rgb15to16)
-        sws_rgb2rgb_init();
+        ff_sws_rgb2rgb_init();
 
     unscaled = (srcW == dstW && srcH == dstH);
 
@@ -1023,6 +1165,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
     desc_src = av_pix_fmt_desc_get(srcFormat);
     desc_dst = av_pix_fmt_desc_get(dstFormat);
 
+    // If the source has no alpha then disable alpha blendaway
+    if (c->src0Alpha)
+        c->alphablend = SWS_ALPHA_BLEND_NONE;
+
     if (!(unscaled && sws_isSupportedEndiannessConversion(srcFormat) &&
           av_pix_fmt_swap_endianness(srcFormat) == dstFormat)) {
     if (!sws_isSupportedInput(srcFormat)) {
@@ -1036,6 +1182,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
         return AVERROR(EINVAL);
     }
     }
+    av_assert2(desc_src && desc_dst);
 
     i = flags & (SWS_POINT         |
                  SWS_AREA          |
@@ -1071,6 +1218,12 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                srcW, srcH, dstW, dstH);
         return AVERROR(EINVAL);
     }
+    if (flags & SWS_FAST_BILINEAR) {
+        if (srcW < 8 || dstW < 8) {
+            flags ^= SWS_FAST_BILINEAR | SWS_BILINEAR;
+            c->flags = flags;
+        }
+    }
 
     if (!dstFilter)
         dstFilter = &dummyFilter;
@@ -1157,6 +1310,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
     if (flags & SWS_FULL_CHR_H_INT &&
         isAnyRGB(dstFormat)        &&
         !isPlanarRGB(dstFormat)    &&
+        dstFormat != AV_PIX_FMT_RGBA64LE &&
+        dstFormat != AV_PIX_FMT_RGBA64BE &&
+        dstFormat != AV_PIX_FMT_BGRA64LE &&
+        dstFormat != AV_PIX_FMT_BGRA64BE &&
+        dstFormat != AV_PIX_FMT_RGB48LE &&
+        dstFormat != AV_PIX_FMT_RGB48BE &&
+        dstFormat != AV_PIX_FMT_BGR48LE &&
+        dstFormat != AV_PIX_FMT_BGR48BE &&
         dstFormat != AV_PIX_FMT_RGBA  &&
         dstFormat != AV_PIX_FMT_ARGB  &&
         dstFormat != AV_PIX_FMT_BGRA  &&
@@ -1193,22 +1354,23 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
         srcFormat != AV_PIX_FMT_GBRP12BE  && srcFormat != AV_PIX_FMT_GBRP12LE &&
         srcFormat != AV_PIX_FMT_GBRP14BE  && srcFormat != AV_PIX_FMT_GBRP14LE &&
         srcFormat != AV_PIX_FMT_GBRP16BE  && srcFormat != AV_PIX_FMT_GBRP16LE &&
+        srcFormat != AV_PIX_FMT_GBRAP16BE  && srcFormat != AV_PIX_FMT_GBRAP16LE &&
         ((dstW >> c->chrDstHSubSample) <= (srcW >> 1) ||
          (flags & SWS_FAST_BILINEAR)))
         c->chrSrcHSubSample = 1;
 
-    // Note the FF_CEIL_RSHIFT is so that we always round toward +inf.
-    c->chrSrcW = FF_CEIL_RSHIFT(srcW, c->chrSrcHSubSample);
-    c->chrSrcH = FF_CEIL_RSHIFT(srcH, c->chrSrcVSubSample);
-    c->chrDstW = FF_CEIL_RSHIFT(dstW, c->chrDstHSubSample);
-    c->chrDstH = FF_CEIL_RSHIFT(dstH, c->chrDstVSubSample);
+    // Note the AV_CEIL_RSHIFT is so that we always round toward +inf.
+    c->chrSrcW = AV_CEIL_RSHIFT(srcW, c->chrSrcHSubSample);
+    c->chrSrcH = AV_CEIL_RSHIFT(srcH, c->chrSrcVSubSample);
+    c->chrDstW = AV_CEIL_RSHIFT(dstW, c->chrDstHSubSample);
+    c->chrDstH = AV_CEIL_RSHIFT(dstH, c->chrDstVSubSample);
 
     FF_ALLOCZ_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
 
-    c->srcBpc = 1 + desc_src->comp[0].depth_minus1;
+    c->srcBpc = desc_src->comp[0].depth;
     if (c->srcBpc < 8)
         c->srcBpc = 8;
-    c->dstBpc = 1 + desc_dst->comp[0].depth_minus1;
+    c->dstBpc = desc_dst->comp[0].depth;
     if (c->dstBpc < 8)
         c->dstBpc = 8;
     if (isAnyRGB(srcFormat) || srcFormat == AV_PIX_FMT_PAL8)
@@ -1289,6 +1451,15 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
         if (!c2->gamma || !c2->inv_gamma)
             return AVERROR(ENOMEM);
 
+        // is_internal_flag is set after creating the context
+        // to properly create the gamma convert FilterDescriptor
+        // we have to re-initialize it
+        ff_free_filters(c2);
+        if (ff_init_filters(c2) < 0) {
+            sws_freeContext(c2);
+            return -1;
+        }
+
         c->cascaded_context[2] = NULL;
         if (dstFormat != tmpFmt) {
             ret = av_image_alloc(c->cascaded1_tmp, c->cascaded1_tmpStride,
@@ -1330,6 +1501,47 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
         }
     }
 
+    if (CONFIG_SWSCALE_ALPHA && isALPHA(srcFormat) && !isALPHA(dstFormat)) {
+        enum AVPixelFormat tmpFormat = alphaless_fmt(srcFormat);
+
+        if (tmpFormat != AV_PIX_FMT_NONE && c->alphablend != SWS_ALPHA_BLEND_NONE)
+        if (!unscaled ||
+            dstFormat != tmpFormat ||
+            usesHFilter || usesVFilter ||
+            c->srcRange != c->dstRange
+        ) {
+            c->cascaded_mainindex = 1;
+            ret = av_image_alloc(c->cascaded_tmp, c->cascaded_tmpStride,
+                                srcW, srcH, tmpFormat, 64);
+            if (ret < 0)
+                return ret;
+
+            c->cascaded_context[0] = sws_alloc_set_opts(srcW, srcH, srcFormat,
+                                                        srcW, srcH, tmpFormat,
+                                                        flags, c->param);
+            if (!c->cascaded_context[0])
+                return -1;
+            c->cascaded_context[0]->alphablend = c->alphablend;
+            ret = sws_init_context(c->cascaded_context[0], NULL , NULL);
+            if (ret < 0)
+                return ret;
+
+            c->cascaded_context[1] = sws_alloc_set_opts(srcW, srcH, tmpFormat,
+                                                        dstW, dstH, dstFormat,
+                                                        flags, c->param);
+            if (!c->cascaded_context[1])
+                return -1;
+
+            c->cascaded_context[1]->srcRange = c->srcRange;
+            c->cascaded_context[1]->dstRange = c->dstRange;
+            ret = sws_init_context(c->cascaded_context[1], srcFilter , dstFilter);
+            if (ret < 0)
+                return ret;
+
+            return 0;
+        }
+    }
+
 #define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS)
 
     /* precalculate horizontal scaler filter coefficients */
@@ -1519,7 +1731,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
 
     // try to avoid drawing green stuff between the right end and the stride end
     for (i = 0; i < c->vChrBufSize; i++)
-        if(desc_dst->comp[0].depth_minus1 == 15){
+        if(desc_dst->comp[0].depth == 16){
             av_assert0(c->dstBpc > 14);
             for(j=0; j<dst_stride/2+1; j++)
                 ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18;
@@ -1576,6 +1788,22 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                c->chrXInc, c->chrYInc);
     }
 
+    /* alpha blend special case, note this has been split via cascaded contexts if its scaled */
+    if (unscaled && !usesHFilter && !usesVFilter &&
+        c->alphablend != SWS_ALPHA_BLEND_NONE &&
+        isALPHA(srcFormat) &&
+        (c->srcRange == c->dstRange || isAnyRGB(dstFormat)) &&
+        alphaless_fmt(srcFormat) == dstFormat
+    ) {
+        c->swscale = ff_sws_alphablendaway;
+
+        if (flags & SWS_PRINT_INFO)
+            av_log(c, AV_LOG_INFO,
+                    "using alpha blendaway %s -> %s special converter\n",
+                    av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
+        return 0;
+    }
+
     /* unscaled special cases */
     if (unscaled && !usesHFilter && !usesVFilter &&
         (c->srcRange == c->dstRange || isAnyRGB(dstFormat))) {
@@ -1591,13 +1819,16 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
     }
 
     c->swscale = ff_getSwsFunc(c);
-    return 0;
+    return ff_init_filters(c);
 fail: // FIXME replace things by appropriate error codes
     if (ret == RETCODE_USE_CASCADE)  {
         int tmpW = sqrt(srcW * (int64_t)dstW);
         int tmpH = sqrt(srcH * (int64_t)dstH);
         enum AVPixelFormat tmpFormat = AV_PIX_FMT_YUV420P;
 
+        if (isALPHA(srcFormat))
+            tmpFormat = AV_PIX_FMT_YUVA420P;
+
         if (srcW*(int64_t)srcH <= 4LL*dstW*dstH)
             return AVERROR(EINVAL);
 
@@ -1622,10 +1853,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
     return -1;
 }
 
-SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
-                           int dstW, int dstH, enum AVPixelFormat dstFormat,
-                           int flags, SwsFilter *srcFilter,
-                           SwsFilter *dstFilter, const double *param)
+SwsContext *sws_alloc_set_opts(int srcW, int srcH, enum AVPixelFormat srcFormat,
+                               int dstW, int dstH, enum AVPixelFormat dstFormat,
+                               int flags, const double *param)
 {
     SwsContext *c;
 
@@ -1645,6 +1875,22 @@ SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
         c->param[1] = param[1];
     }
 
+    return c;
+}
+
+SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
+                           int dstW, int dstH, enum AVPixelFormat dstFormat,
+                           int flags, SwsFilter *srcFilter,
+                           SwsFilter *dstFilter, const double *param)
+{
+    SwsContext *c;
+
+    c = sws_alloc_set_opts(srcW, srcH, srcFormat,
+                           dstW, dstH, dstFormat,
+                           flags, param);
+    if (!c)
+        return NULL;
+
     if (sws_init_context(c, srcFilter, dstFilter) < 0) {
         sws_freeContext(c);
         return NULL;
@@ -1653,6 +1899,22 @@ SwsContext *sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat,
     return c;
 }
 
+static int isnan_vec(SwsVector *a)
+{
+    int i;
+    for (i=0; i<a->length; i++)
+        if (isnan(a->coeff[i]))
+            return 1;
+    return 0;
+}
+
+static void makenan_vec(SwsVector *a)
+{
+    int i;
+    for (i=0; i<a->length; i++)
+        a->coeff[i] = NAN;
+}
+
 SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
                                 float lumaSharpen, float chromaSharpen,
                                 float chromaHShift, float chromaVShift,
@@ -1714,6 +1976,12 @@ SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
     sws_normalizeVec(filter->lumH, 1.0);
     sws_normalizeVec(filter->lumV, 1.0);
 
+    if (isnan_vec(filter->chrH) ||
+        isnan_vec(filter->chrV) ||
+        isnan_vec(filter->lumH) ||
+        isnan_vec(filter->lumV))
+        goto fail;
+
     if (verbose)
         sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG);
     if (verbose)
@@ -1889,6 +2157,10 @@ static SwsVector *sws_getShiftedVec(SwsVector *a, int shift)
 void sws_shiftVec(SwsVector *a, int shift)
 {
     SwsVector *shifted = sws_getShiftedVec(a, shift);
+    if (!shifted) {
+        makenan_vec(a);
+        return;
+    }
     av_free(a->coeff);
     a->coeff  = shifted->coeff;
     a->length = shifted->length;
@@ -1898,6 +2170,10 @@ void sws_shiftVec(SwsVector *a, int shift)
 void sws_addVec(SwsVector *a, SwsVector *b)
 {
     SwsVector *sum = sws_sumVec(a, b);
+    if (!sum) {
+        makenan_vec(a);
+        return;
+    }
     av_free(a->coeff);
     a->coeff  = sum->coeff;
     a->length = sum->length;
@@ -1907,6 +2183,10 @@ void sws_addVec(SwsVector *a, SwsVector *b)
 void sws_subVec(SwsVector *a, SwsVector *b)
 {
     SwsVector *diff = sws_diffVec(a, b);
+    if (!diff) {
+        makenan_vec(a);
+        return;
+    }
     av_free(a->coeff);
     a->coeff  = diff->coeff;
     a->length = diff->length;
@@ -1916,6 +2196,10 @@ void sws_subVec(SwsVector *a, SwsVector *b)
 void sws_convVec(SwsVector *a, SwsVector *b)
 {
     SwsVector *conv = sws_getConvVec(a, b);
+    if (!conv) {
+        makenan_vec(a);
+        return;
+    }
     av_free(a->coeff);
     a->coeff  = conv->coeff;
     a->length = conv->length;
@@ -2055,6 +2339,7 @@ void sws_freeContext(SwsContext *c)
     av_freep(&c->gamma);
     av_freep(&c->inv_gamma);
 
+    ff_free_filters(c);
 
     av_free(c);
 }
diff --git a/libswscale/version.h b/libswscale/version.h
index 228c5770..24908b89 100644
--- a/libswscale/version.h
+++ b/libswscale/version.h
@@ -26,9 +26,9 @@
 
 #include "libavutil/version.h"
 
-#define LIBSWSCALE_VERSION_MAJOR 3
-#define LIBSWSCALE_VERSION_MINOR 1
-#define LIBSWSCALE_VERSION_MICRO 101
+#define LIBSWSCALE_VERSION_MAJOR   4
+#define LIBSWSCALE_VERSION_MINOR   0
+#define LIBSWSCALE_VERSION_MICRO 100
 
 #define LIBSWSCALE_VERSION_INT  AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
                                                LIBSWSCALE_VERSION_MINOR, \
@@ -46,11 +46,4 @@
  * the public API and may change, break or disappear at any time.
  */
 
-#ifndef FF_API_SWS_CPU_CAPS
-#define FF_API_SWS_CPU_CAPS    (LIBSWSCALE_VERSION_MAJOR < 4)
-#endif
-#ifndef FF_API_ARCH_BFIN
-#define FF_API_ARCH_BFIN       (LIBSWSCALE_VERSION_MAJOR < 4)
-#endif
-
 #endif /* SWSCALE_VERSION_H */
diff --git a/libswscale/vscale.c b/libswscale/vscale.c
new file mode 100644
index 00000000..72f799de
--- /dev/null
+++ b/libswscale/vscale.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "swscale_internal.h"
+
+typedef struct VScalerContext
+{
+    uint16_t *filter[2];
+    int32_t  *filter_pos;
+    int filter_size;
+    int isMMX;
+    void *pfn;
+    yuv2packedX_fn yuv2packedX;
+} VScalerContext;
+
+
+static int lum_planar_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    VScalerContext *inst = desc->instance;
+    int dstW = desc->dst->width;
+
+    int first = FFMAX(1-inst->filter_size, inst->filter_pos[sliceY]);
+    int sp = first - desc->src->plane[0].sliceY;
+    int dp = sliceY - desc->dst->plane[0].sliceY;
+    uint8_t **src = desc->src->plane[0].line + sp;
+    uint8_t **dst = desc->dst->plane[0].line + dp;
+    uint16_t *filter = inst->filter[0] + (inst->isMMX ? 0 : sliceY * inst->filter_size);
+
+    if (inst->filter_size == 1)
+        ((yuv2planar1_fn)inst->pfn)((const int16_t*)src[0], dst[0], dstW, c->lumDither8, 0);
+    else
+        ((yuv2planarX_fn)inst->pfn)(filter, inst->filter_size, (const int16_t**)src, dst[0], dstW, c->lumDither8, 0);
+
+    if (desc->alpha) {
+        int sp = first - desc->src->plane[3].sliceY;
+        int dp = sliceY - desc->dst->plane[3].sliceY;
+        uint8_t **src = desc->src->plane[3].line + sp;
+        uint8_t **dst = desc->dst->plane[3].line + dp;
+        uint16_t *filter = inst->filter[1] + (inst->isMMX ? 0 : sliceY * inst->filter_size);
+
+        if (inst->filter_size == 1)
+            ((yuv2planar1_fn)inst->pfn)((const int16_t*)src[0], dst[0], dstW, c->lumDither8, 0);
+        else
+            ((yuv2planarX_fn)inst->pfn)(filter, inst->filter_size, (const int16_t**)src, dst[0], dstW, c->lumDither8, 0);
+    }
+
+    return 1;
+}
+
+static int chr_planar_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    const int chrSkipMask = (1 << desc->dst->v_chr_sub_sample) - 1;
+    if (sliceY & chrSkipMask)
+        return 0;
+    else {
+        VScalerContext *inst = desc->instance;
+        int dstW = AV_CEIL_RSHIFT(desc->dst->width, desc->dst->h_chr_sub_sample);
+        int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample;
+
+        int first = FFMAX(1-inst->filter_size, inst->filter_pos[chrSliceY]);
+        int sp1 = first - desc->src->plane[1].sliceY;
+        int sp2 = first - desc->src->plane[2].sliceY;
+        int dp1 = chrSliceY - desc->dst->plane[1].sliceY;
+        int dp2 = chrSliceY - desc->dst->plane[2].sliceY;
+        uint8_t **src1 = desc->src->plane[1].line + sp1;
+        uint8_t **src2 = desc->src->plane[2].line + sp2;
+        uint8_t **dst1 = desc->dst->plane[1].line + dp1;
+        uint8_t **dst2 = desc->dst->plane[2].line + dp2;
+        uint16_t *filter = inst->filter[0] + (inst->isMMX ? 0 : chrSliceY * inst->filter_size);
+
+        if (c->yuv2nv12cX) {
+            ((yuv2interleavedX_fn)inst->pfn)(c, filter, inst->filter_size, (const int16_t**)src1, (const int16_t**)src2, dst1[0], dstW);
+        } else if (inst->filter_size == 1) {
+            ((yuv2planar1_fn)inst->pfn)((const int16_t*)src1[0], dst1[0], dstW, c->chrDither8, 0);
+            ((yuv2planar1_fn)inst->pfn)((const int16_t*)src2[0], dst2[0], dstW, c->chrDither8, 3);
+        } else {
+            ((yuv2planarX_fn)inst->pfn)(filter, inst->filter_size, (const int16_t**)src1, dst1[0], dstW, c->chrDither8, 0);
+            ((yuv2planarX_fn)inst->pfn)(filter, inst->filter_size, (const int16_t**)src2, dst2[0], dstW, c->chrDither8, inst->isMMX ? (c->uv_offx2 >> 1) : 3);
+        }
+    }
+
+    return 1;
+}
+
+static int packed_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    VScalerContext *inst = desc->instance;
+    int dstW = desc->dst->width;
+    int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample;
+
+    int lum_fsize = inst[0].filter_size;
+    int chr_fsize = inst[1].filter_size;
+    uint16_t *lum_filter = inst[0].filter[0];
+    uint16_t *chr_filter = inst[1].filter[0];
+
+    int firstLum = FFMAX(1-lum_fsize, inst[0].filter_pos[   sliceY]);
+    int firstChr = FFMAX(1-chr_fsize, inst[1].filter_pos[chrSliceY]);
+
+    int sp0 = firstLum - desc->src->plane[0].sliceY;
+    int sp1 = firstChr - desc->src->plane[1].sliceY;
+    int sp2 = firstChr - desc->src->plane[2].sliceY;
+    int sp3 = firstLum - desc->src->plane[3].sliceY;
+    int dp = sliceY - desc->dst->plane[0].sliceY;
+    uint8_t **src0 = desc->src->plane[0].line + sp0;
+    uint8_t **src1 = desc->src->plane[1].line + sp1;
+    uint8_t **src2 = desc->src->plane[2].line + sp2;
+    uint8_t **src3 = desc->alpha ? desc->src->plane[3].line + sp3 : NULL;
+    uint8_t **dst = desc->dst->plane[0].line + dp;
+
+
+    if (c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 1) { // unscaled RGB
+        ((yuv2packed1_fn)inst->pfn)(c, (const int16_t*)*src0, (const int16_t**)src1, (const int16_t**)src2,
+                                    (const int16_t*)(desc->alpha ? *src3 : NULL),  *dst, dstW, 0, sliceY);
+    } else if (c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 2 &&
+               chr_filter[2 * chrSliceY + 1] + chr_filter[2 * chrSliceY] == 4096 &&
+               chr_filter[2 * chrSliceY + 1] <= 4096U) { // unscaled RGB
+        int chrAlpha = chr_filter[2 * chrSliceY + 1];
+        ((yuv2packed1_fn)inst->pfn)(c, (const int16_t*)*src0, (const int16_t**)src1, (const int16_t**)src2,
+                                    (const int16_t*)(desc->alpha ? *src3 : NULL),  *dst, dstW, chrAlpha, sliceY);
+    } else if (c->yuv2packed2 && lum_fsize == 2 && chr_fsize == 2 &&
+               lum_filter[2 * sliceY + 1] + lum_filter[2 * sliceY] == 4096 &&
+               lum_filter[2 * sliceY + 1] <= 4096U &&
+               chr_filter[2 * chrSliceY + 1] + chr_filter[2 * chrSliceY] == 4096 &&
+               chr_filter[2 * chrSliceY + 1] <= 4096U
+    ) { // bilinear upscale RGB
+        int lumAlpha = lum_filter[2 * sliceY + 1];
+        int chrAlpha = chr_filter[2 * chrSliceY + 1];
+        c->lumMmxFilter[2] =
+        c->lumMmxFilter[3] = lum_filter[2 * sliceY]    * 0x10001;
+        c->chrMmxFilter[2] =
+        c->chrMmxFilter[3] = chr_filter[2 * chrSliceY] * 0x10001;
+        ((yuv2packed2_fn)inst->pfn)(c, (const int16_t**)src0, (const int16_t**)src1, (const int16_t**)src2, (const int16_t**)src3,
+                    *dst, dstW, lumAlpha, chrAlpha, sliceY);
+    } else { // general RGB
+        if ((c->yuv2packed1 && lum_fsize == 1 && chr_fsize == 2) ||
+            (c->yuv2packed2 && lum_fsize == 2 && chr_fsize == 2)) {
+            if (!c->warned_unuseable_bilinear)
+                av_log(c, AV_LOG_INFO, "Optimized 2 tap filter code cannot be used\n");
+            c->warned_unuseable_bilinear = 1;
+        }
+
+        inst->yuv2packedX(c, lum_filter + sliceY * lum_fsize,
+                    (const int16_t**)src0, lum_fsize, chr_filter + chrSliceY * chr_fsize,
+                    (const int16_t**)src1, (const int16_t**)src2, chr_fsize, (const int16_t**)src3, *dst, dstW, sliceY);
+    }
+    return 1;
+}
+
+static int any_vscale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    VScalerContext *inst = desc->instance;
+    int dstW = desc->dst->width;
+    int chrSliceY = sliceY >> desc->dst->v_chr_sub_sample;
+
+    int lum_fsize = inst[0].filter_size;
+    int chr_fsize = inst[1].filter_size;
+    uint16_t *lum_filter = inst[0].filter[0];
+    uint16_t *chr_filter = inst[1].filter[0];
+
+    int firstLum = FFMAX(1-lum_fsize, inst[0].filter_pos[   sliceY]);
+    int firstChr = FFMAX(1-chr_fsize, inst[1].filter_pos[chrSliceY]);
+
+    int sp0 = firstLum - desc->src->plane[0].sliceY;
+    int sp1 = firstChr - desc->src->plane[1].sliceY;
+    int sp2 = firstChr - desc->src->plane[2].sliceY;
+    int sp3 = firstLum - desc->src->plane[3].sliceY;
+    int dp0 = sliceY - desc->dst->plane[0].sliceY;
+    int dp1 = chrSliceY - desc->dst->plane[1].sliceY;
+    int dp2 = chrSliceY - desc->dst->plane[2].sliceY;
+    int dp3 = sliceY - desc->dst->plane[3].sliceY;
+
+    uint8_t **src0 = desc->src->plane[0].line + sp0;
+    uint8_t **src1 = desc->src->plane[1].line + sp1;
+    uint8_t **src2 = desc->src->plane[2].line + sp2;
+    uint8_t **src3 = desc->alpha ? desc->src->plane[3].line + sp3 : NULL;
+    uint8_t *dst[4] = { desc->dst->plane[0].line[dp0],
+                        desc->dst->plane[1].line[dp1],
+                        desc->dst->plane[2].line[dp2],
+                        desc->alpha ? desc->dst->plane[3].line[dp3] : NULL };
+
+    av_assert1(!c->yuv2packed1 && !c->yuv2packed2);
+    ((yuv2anyX_fn)inst->pfn)(c, lum_filter + sliceY * lum_fsize,
+             (const int16_t**)src0, lum_fsize, chr_filter + sliceY * chr_fsize,
+             (const int16_t**)src1, (const int16_t**)src2, chr_fsize, (const int16_t**)src3, dst, dstW, sliceY);
+
+    return 1;
+
+}
+
+int ff_init_vscale(SwsContext *c, SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst)
+{
+    VScalerContext *lumCtx = NULL;
+    VScalerContext *chrCtx = NULL;
+
+    if (isPlanarYUV(c->dstFormat) || (isGray(c->dstFormat) && !isALPHA(c->dstFormat))) {
+        lumCtx = av_mallocz(sizeof(VScalerContext));
+        if (!lumCtx)
+            return AVERROR(ENOMEM);
+
+
+        desc[0].process = lum_planar_vscale;
+        desc[0].instance = lumCtx;
+        desc[0].src = src;
+        desc[0].dst = dst;
+        desc[0].alpha = c->alpPixBuf != 0;
+
+        if (!isGray(c->dstFormat)) {
+            chrCtx = av_mallocz(sizeof(VScalerContext));
+            if (!chrCtx)
+                return AVERROR(ENOMEM);
+            desc[1].process = chr_planar_vscale;
+            desc[1].instance = chrCtx;
+            desc[1].src = src;
+            desc[1].dst = dst;
+        }
+    } else {
+        lumCtx = av_mallocz_array(sizeof(VScalerContext), 2);
+        if (!lumCtx)
+            return AVERROR(ENOMEM);
+        chrCtx = &lumCtx[1];
+
+        desc[0].process = c->yuv2packedX ? packed_vscale : any_vscale;
+        desc[0].instance = lumCtx;
+        desc[0].src = src;
+        desc[0].dst = dst;
+        desc[0].alpha = c->alpPixBuf != 0;
+    }
+
+    ff_init_vscale_pfn(c, c->yuv2plane1, c->yuv2planeX, c->yuv2nv12cX,
+        c->yuv2packed1, c->yuv2packed2, c->yuv2packedX, c->yuv2anyX, c->use_mmx_vfilter);
+    return 0;
+}
+
+void ff_init_vscale_pfn(SwsContext *c,
+    yuv2planar1_fn yuv2plane1,
+    yuv2planarX_fn yuv2planeX,
+    yuv2interleavedX_fn yuv2nv12cX,
+    yuv2packed1_fn yuv2packed1,
+    yuv2packed2_fn yuv2packed2,
+    yuv2packedX_fn yuv2packedX,
+    yuv2anyX_fn yuv2anyX, int use_mmx)
+{
+    VScalerContext *lumCtx = NULL;
+    VScalerContext *chrCtx = NULL;
+    int idx = c->numDesc - (c->is_internal_gamma ? 2 : 1); //FIXME avoid hardcoding indexes
+
+    if (isPlanarYUV(c->dstFormat) || (isGray(c->dstFormat) && !isALPHA(c->dstFormat))) {
+        if (!isGray(c->dstFormat)) {
+            chrCtx = c->desc[idx].instance;
+
+            chrCtx->filter[0] = use_mmx ? (int16_t*)c->chrMmxFilter : c->vChrFilter;
+            chrCtx->filter_size = c->vChrFilterSize;
+            chrCtx->filter_pos = c->vChrFilterPos;
+            chrCtx->isMMX = use_mmx;
+
+            --idx;
+            if (yuv2nv12cX)               chrCtx->pfn = yuv2nv12cX;
+            else if (c->vChrFilterSize == 1) chrCtx->pfn = yuv2plane1;
+            else                             chrCtx->pfn = yuv2planeX;
+        }
+
+        lumCtx = c->desc[idx].instance;
+
+        lumCtx->filter[0] = use_mmx ? (int16_t*)c->lumMmxFilter : c->vLumFilter;
+        lumCtx->filter[1] = use_mmx ? (int16_t*)c->alpMmxFilter : c->vLumFilter;
+        lumCtx->filter_size = c->vLumFilterSize;
+        lumCtx->filter_pos = c->vLumFilterPos;
+        lumCtx->isMMX = use_mmx;
+
+        if (c->vLumFilterSize == 1) lumCtx->pfn = yuv2plane1;
+        else                        lumCtx->pfn = yuv2planeX;
+
+    } else {
+        lumCtx = c->desc[idx].instance;
+        chrCtx = &lumCtx[1];
+
+        lumCtx->filter[0] = c->vLumFilter;
+        lumCtx->filter_size = c->vLumFilterSize;
+        lumCtx->filter_pos = c->vLumFilterPos;
+
+        chrCtx->filter[0] = c->vChrFilter;
+        chrCtx->filter_size = c->vChrFilterSize;
+        chrCtx->filter_pos = c->vChrFilterPos;
+
+        lumCtx->isMMX = use_mmx;
+        chrCtx->isMMX = use_mmx;
+
+        if (yuv2packedX) {
+            if (c->yuv2packed1 && c->vLumFilterSize == 1 && c->vChrFilterSize <= 2)
+                lumCtx->pfn = yuv2packed1;
+            else if (c->yuv2packed2 && c->vLumFilterSize == 2 && c->vChrFilterSize == 2)
+                lumCtx->pfn = yuv2packed2;
+            lumCtx->yuv2packedX = yuv2packedX;
+        } else
+            lumCtx->pfn = yuv2anyX;
+    }
+}
+
+
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 9ea4af95..133817cb 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -54,76 +54,8 @@ SECTION .text
 ; int32_t if $output_size is 16. $filter is 12-bits. $filterSize is a multiple
 ; of 2. $offset is either 0 or 3. $dither holds 8 values.
 ;-----------------------------------------------------------------------------
-
-%macro yuv2planeX_fn 3
-
-%if ARCH_X86_32
-%define cntr_reg fltsizeq
-%define movsx mov
-%else
-%define cntr_reg r7
-%define movsx movsxd
-%endif
-
-cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
-%if %1 == 8 || %1 == 9 || %1 == 10
-    pxor            m6,  m6
-%endif ; %1 == 8/9/10
-
-%if %1 == 8
-%if ARCH_X86_32
-%assign pad 0x2c - (stack_offset & 15)
-    SUB             rsp, pad
-%define m_dith m7
-%else ; x86-64
-%define m_dith m9
-%endif ; x86-32
-
-    ; create registers holding dither
-    movq        m_dith, [ditherq]        ; dither
-    test        offsetd, offsetd
-    jz              .no_rot
-%if mmsize == 16
-    punpcklqdq  m_dith,  m_dith
-%endif ; mmsize == 16
-    PALIGNR     m_dith,  m_dith,  3,  m0
-.no_rot:
-%if mmsize == 16
-    punpcklbw   m_dith,  m6
-%if ARCH_X86_64
-    punpcklwd       m8,  m_dith,  m6
-    pslld           m8,  12
-%else ; x86-32
-    punpcklwd       m5,  m_dith,  m6
-    pslld           m5,  12
-%endif ; x86-32/64
-    punpckhwd   m_dith,  m6
-    pslld       m_dith,  12
-%if ARCH_X86_32
-    mova      [rsp+ 0],  m5
-    mova      [rsp+16],  m_dith
-%endif
-%else ; mmsize == 8
-    punpcklbw       m5,  m_dith,  m6
-    punpckhbw   m_dith,  m6
-    punpcklwd       m4,  m5,  m6
-    punpckhwd       m5,  m6
-    punpcklwd       m3,  m_dith,  m6
-    punpckhwd   m_dith,  m6
-    pslld           m4,  12
-    pslld           m5,  12
-    pslld           m3,  12
-    pslld       m_dith,  12
-    mova      [rsp+ 0],  m4
-    mova      [rsp+ 8],  m5
-    mova      [rsp+16],  m3
-    mova      [rsp+24],  m_dith
-%endif ; mmsize == 8/16
-%endif ; %1 == 8
-
-    xor             r5,  r5
-
-.pixelloop:
+%macro yuv2planeX_mainloop 2
+.pixelloop_%2:
 %assign %%i 0
     ; the rep here is for the 8bit output mmx case, where dither covers
     ; 8 pixels but we can only handle 2 pixels per register, and thus 4
@@ -150,7 +82,7 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
     mova            m2,  m1
 %endif ; %1 == 8/9/10/16
     movsx     cntr_reg,  fltsizem
-.filterloop_ %+ %%i:
+.filterloop_%2_ %+ %%i:
     ; input pixels
     mov             r6, [srcq+gprsize*cntr_reg-2*gprsize]
 %if %1 == 16
@@ -197,7 +129,7 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
 %endif ; %1 == 8/9/10/16
 
     sub       cntr_reg,  2
-    jg .filterloop_ %+ %%i
+    jg .filterloop_%2_ %+ %%i
 
 %if %1 == 16
     psrad           m2,  31 - %1
@@ -224,7 +156,7 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
 %endif ; mmxext/sse2/sse4/avx
     pminsw          m2, [yuv2yuvX_%1_upper]
 %endif ; %1 == 9/10/16
-    mova   [dstq+r5*2],  m2
+    mov%2   [dstq+r5*2],  m2
 %endif ; %1 == 8/9/10/16
 
     add             r5,  mmsize/2
@@ -232,7 +164,87 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
 
 %assign %%i %%i+2
 %endrep
-    jg .pixelloop
+    jg .pixelloop_%2
+%endmacro
+
+%macro yuv2planeX_fn 3
+
+%if ARCH_X86_32
+%define cntr_reg fltsizeq
+%define movsx mov
+%else
+%define cntr_reg r7
+%define movsx movsxd
+%endif
+
+cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
+%if %1 == 8 || %1 == 9 || %1 == 10
+    pxor            m6,  m6
+%endif ; %1 == 8/9/10
+
+%if %1 == 8
+%if ARCH_X86_32
+%assign pad 0x2c - (stack_offset & 15)
+    SUB             rsp, pad
+%define m_dith m7
+%else ; x86-64
+%define m_dith m9
+%endif ; x86-32
+
+    ; create registers holding dither
+    movq        m_dith, [ditherq]        ; dither
+    test        offsetd, offsetd
+    jz              .no_rot
+%if mmsize == 16
+    punpcklqdq  m_dith,  m_dith
+%endif ; mmsize == 16
+    PALIGNR     m_dith,  m_dith,  3,  m0
+.no_rot:
+%if mmsize == 16
+    punpcklbw   m_dith,  m6
+%if ARCH_X86_64
+    punpcklwd       m8,  m_dith,  m6
+    pslld           m8,  12
+%else ; x86-32
+    punpcklwd       m5,  m_dith,  m6
+    pslld           m5,  12
+%endif ; x86-32/64
+    punpckhwd   m_dith,  m6
+    pslld       m_dith,  12
+%if ARCH_X86_32
+    mova      [rsp+ 0],  m5
+    mova      [rsp+16],  m_dith
+%endif
+%else ; mmsize == 8
+    punpcklbw       m5,  m_dith,  m6
+    punpckhbw   m_dith,  m6
+    punpcklwd       m4,  m5,  m6
+    punpckhwd       m5,  m6
+    punpcklwd       m3,  m_dith,  m6
+    punpckhwd   m_dith,  m6
+    pslld           m4,  12
+    pslld           m5,  12
+    pslld           m3,  12
+    pslld       m_dith,  12
+    mova      [rsp+ 0],  m4
+    mova      [rsp+ 8],  m5
+    mova      [rsp+16],  m3
+    mova      [rsp+24],  m_dith
+%endif ; mmsize == 8/16
+%endif ; %1 == 8
+
+    xor             r5,  r5
+
+%if mmsize == 8 || %1 == 8
+    yuv2planeX_mainloop %1, a
+%else ; mmsize == 16
+    test          dstq, 15
+    jnz .unaligned
+    yuv2planeX_mainloop %1, a
+    REP_RET
+.unaligned:
+    yuv2planeX_mainloop %1, u
+%endif ; mmsize == 8/16
 
 %if %1 == 8
 %if ARCH_X86_32
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index e97ba4fe..95d4f8fd 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -1434,7 +1434,9 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
     dst+= dstStride;
 
     for (y=1; y<srcHeight; y++) {
-        const x86_reg mmxSize= srcWidth&~15;
+        x86_reg mmxSize= srcWidth&~15;
+
+        if (mmxSize) {
         __asm__ volatile(
             "mov           %4, %%"REG_a"            \n\t"
             "movq        "MANGLE(mmx_ff)", %%mm0    \n\t"
@@ -1481,6 +1483,11 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
                NAMED_CONSTRAINTS_ADD(mmx_ff)
             : "%"REG_a
         );
+        } else {
+            mmxSize = 1;
+            dst[0]         = (src[0] * 3 + src[srcStride]) >> 2;
+            dst[dstStride] = (src[0] + 3 * src[srcStride]) >> 2;
+        }
 
         for (x=mmxSize-1; x<srcWidth-1; x++) {
             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
@@ -1887,8 +1894,9 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
     for (h=0; h < height; h++) {
         int w;
 
-        if (width >= 16)
+        if (width >= 16) {
 #if COMPILE_TEMPLATE_SSE2
+            if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) {
         __asm__(
             "xor              %%"REG_a", %%"REG_a"  \n\t"
             "1:                                     \n\t"
@@ -1907,7 +1915,8 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
             ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
             : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"REG_a
         );
-#else
+            } else
+#endif
         __asm__(
             "xor %%"REG_a", %%"REG_a"               \n\t"
             "1:                                     \n\t"
@@ -1933,7 +1942,8 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
             ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
             : "memory", "%"REG_a
         );
-#endif
+
+        }
         for (w= (width&(~15)); w < width; w++) {
             dest[2*w+0] = src1[w];
             dest[2*w+1] = src2[w];
@@ -1943,9 +1953,7 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
         src2 += src2Stride;
     }
     __asm__(
-#if !COMPILE_TEMPLATE_SSE2
             EMMS"       \n\t"
-#endif
             SFENCE"     \n\t"
             ::: "memory"
             );
@@ -2449,7 +2457,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
                                  int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
+    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_even)(src, ydst, width);
@@ -2475,7 +2483,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
                                  int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
+    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_even)(src, ydst, width);
@@ -2499,7 +2507,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
                                  int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
+    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_odd)(src, ydst, width);
@@ -2525,7 +2533,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
                                  int lumStride, int chromStride, int srcStride)
 {
     int y;
-    const int chromWidth = FF_CEIL_RSHIFT(width, 1);
+    const int chromWidth = AV_CEIL_RSHIFT(width, 1);
 
     for (y=0; y<height; y++) {
         RENAME(extract_odd)(src, ydst, width);
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index d611b76c..66ac7d5e 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -85,11 +85,18 @@ void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrB
 {
     const int dstH= c->dstH;
     const int flags= c->flags;
+#ifdef NEW_FILTER
+    SwsPlane *lumPlane = &c->slice[c->numSlice-2].plane[0];
+    SwsPlane *chrUPlane = &c->slice[c->numSlice-2].plane[1];
+    SwsPlane *alpPlane = &c->slice[c->numSlice-2].plane[3];
+#else
     int16_t **lumPixBuf= c->lumPixBuf;
     int16_t **chrUPixBuf= c->chrUPixBuf;
     int16_t **alpPixBuf= c->alpPixBuf;
     const int vLumBufSize= c->vLumBufSize;
     const int vChrBufSize= c->vChrBufSize;
+#endif
+    int hasAlpha = c->alpPixBuf != NULL;
     int32_t *vLumFilterPos= c->vLumFilterPos;
     int32_t *vChrFilterPos= c->vChrFilterPos;
     int16_t *vLumFilter= c->vLumFilter;
@@ -110,13 +117,22 @@ void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrB
         c->greenDither= ff_dither4[dstY&1];
     c->redDither= ff_dither8[(dstY+1)&1];
     if (dstY < dstH - 2) {
+#ifdef NEW_FILTER
+        const int16_t **lumSrcPtr  = (const int16_t **)(void*) lumPlane->line + firstLumSrcY - lumPlane->sliceY;
+        const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPlane->line + firstChrSrcY - chrUPlane->sliceY;
+        const int16_t **alpSrcPtr  = (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) ? (const int16_t **)(void*) alpPlane->line + firstLumSrcY - alpPlane->sliceY : NULL;
+#else
         const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
         const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
         const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+#endif
         int i;
-
         if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
+#ifdef NEW_FILTER
+            const int16_t **tmpY = (const int16_t **) lumPlane->tmp;
+#else
             const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize;
+#endif
             int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
             for (i = 0; i < neg;            i++)
                 tmpY[i] = lumSrcPtr[neg];
@@ -127,7 +143,11 @@ void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrB
             lumSrcPtr = tmpY;
 
             if (alpSrcPtr) {
+#ifdef NEW_FILTER
+                const int16_t **tmpA = (const int16_t **) alpPlane->tmp;
+#else
                 const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize;
+#endif
                 for (i = 0; i < neg;            i++)
                     tmpA[i] = alpSrcPtr[neg];
                 for (     ; i < end;            i++)
@@ -138,7 +158,11 @@ void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrB
             }
         }
         if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) {
+#ifdef NEW_FILTER
+            const int16_t **tmpU = (const int16_t **) chrUPlane->tmp;
+#else
             const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize;
+#endif
             int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
             for (i = 0; i < neg;            i++) {
                 tmpU[i] = chrUSrcPtr[neg];
@@ -160,7 +184,7 @@ void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrB
                 lumMmxFilter[s*i+APCK_COEF/4  ]=
                 lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
                 + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
-                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+                if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
                     *(const void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
                     *(const void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
                     alpMmxFilter[s*i+APCK_COEF/4  ]=
@@ -180,7 +204,7 @@ void ff_updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrB
                 lumMmxFilter[4*i+2]=
                 lumMmxFilter[4*i+3]=
                 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001U;
-                if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+                if (CONFIG_SWSCALE_ALPHA && hasAlpha) {
                     *(const void**)&alpMmxFilter[4*i+0]= alpSrcPtr[i];
                     alpMmxFilter[4*i+2]=
                     alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
@@ -410,7 +434,7 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c)
     } else if (c->srcBpc == 12) { \
         hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
                                      ff_hscale12to19_ ## filtersize ## _ ## opt1; \
-    } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth_minus1<15)) { \
+    } else if (c->srcBpc == 14 || ((c->srcFormat==AV_PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_desc_get(c->srcFormat)->comp[0].depth<16)) { \
         hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
                                      ff_hscale14to19_ ## filtersize ## _ ## opt1; \
     } else { /* c->srcBpc == 16 */ \
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 36a606c5..bbda6d08 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -399,6 +399,35 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
     }
 }
 
+static void RENAME(yuv2bgr32_X)(SwsContext *c, const int16_t *lumFilter,
+                                const int16_t **lumSrc, int lumFilterSize,
+                                const int16_t *chrFilter, const int16_t **chrUSrc,
+                                const int16_t **chrVSrc,
+                                int chrFilterSize, const int16_t **alpSrc,
+                                uint8_t *dest, int dstW, int dstY)
+{
+    x86_reg dummy=0;
+    x86_reg dstW_reg = dstW;
+    x86_reg uv_off = c->uv_offx2;
+
+    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
+        YSCALEYUV2PACKEDX
+        YSCALEYUV2RGBX
+        YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
+        "psraw                        $3, %%mm1         \n\t"
+        "psraw                        $3, %%mm7         \n\t"
+        "packuswb                  %%mm7, %%mm1         \n\t"
+        WRITEBGR32(%4, "%5", %%REGa, %%mm5, %%mm4, %%mm2, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
+        YSCALEYUV2PACKEDX_END
+    } else {
+        YSCALEYUV2PACKEDX
+        YSCALEYUV2RGBX
+        "pcmpeqd %%mm7, %%mm7 \n\t"
+        WRITEBGR32(%4, "%5", %%REGa, %%mm5, %%mm4, %%mm2, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
+        YSCALEYUV2PACKEDX_END
+    }
+}
+
 #define REAL_WRITERGB16(dst, dstw, index) \
     "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
     "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
@@ -1491,6 +1520,7 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c)
                 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
                     switch (c->dstFormat) {
                     case AV_PIX_FMT_RGB32:   c->yuv2packedX = RENAME(yuv2rgb32_X);   break;
+                    case AV_PIX_FMT_BGR32:   c->yuv2packedX = RENAME(yuv2bgr32_X);   break;
 #if HAVE_6REGS
                     case AV_PIX_FMT_BGR24:   c->yuv2packedX = RENAME(yuv2bgr24_X);   break;
 #endif
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 1d682ba5..3671fe34 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -35,6 +35,17 @@
 #include "swscale_internal.h"
 #include "libavutil/pixdesc.h"
 
+/* Color space conversion coefficients for YCbCr -> RGB mapping.
+ *
+ * Entries are {crv, cbu, cgu, cgv}
+ *
+ *   crv = (255 / 224) * 65536 * (1 - cr) / 0.5
+ *   cbu = (255 / 224) * 65536 * (1 - cb) / 0.5
+ *   cgu = (255 / 224) * 65536 * (cb / cg) * (1 - cb) / 0.5
+ *   cgv = (255 / 224) * 65536 * (cr / cg) * (1 - cr) / 0.5
+ *
+ * where Y = cr * R + cg * G + cb * B and cr + cg + cb = 1.
+ */
 const int32_t ff_yuv2rgb_coeffs[8][4] = {
     { 117504, 138453, 13954, 34903 }, /* no sequence_display_extension */
     { 117504, 138453, 13954, 34903 }, /* ITU-R Rec. 709 (1990) */
@@ -776,7 +787,8 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
     uint16_t *y_table16;
     uint32_t *y_table32;
     int i, base, rbase, gbase, bbase, av_uninit(abase), needAlpha;
-    const int yoffs = fullRange ? 384 : 326;
+    const int yoffs = (fullRange ? 384 : 326) + YUVRGB_TABLE_LUMA_HEADROOM;
+    const int table_plane_size = 1024 + 2*YUVRGB_TABLE_LUMA_HEADROOM;
 
     int64_t crv =  inv_table[0];
     int64_t cbu =  inv_table[1];
@@ -833,10 +845,10 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
             return AVERROR(ENOMEM);
     switch (bpp) {
     case 1:
-        ALLOC_YUV_TABLE(1024);
+        ALLOC_YUV_TABLE(table_plane_size);
         y_table     = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024 - 110; i++) {
+        yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy;
+        for (i = 0; i < table_plane_size - 110; i++) {
             y_table[i + 110]  = av_clip_uint8((yb + 0x8000) >> 16) >> 7;
             yb               += cy;
         }
@@ -848,60 +860,60 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
         rbase       = isRgb ? 3 : 0;
         gbase       = 1;
         bbase       = isRgb ? 0 : 3;
-        ALLOC_YUV_TABLE(1024 * 3);
+        ALLOC_YUV_TABLE(table_plane_size * 3);
         y_table     = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024 - 110; i++) {
+        yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy;
+        for (i = 0; i < table_plane_size - 110; i++) {
             int yval                = av_clip_uint8((yb + 0x8000) >> 16);
             y_table[i + 110]        = (yval >> 7)        << rbase;
-            y_table[i +  37 + 1024] = ((yval + 43) / 85) << gbase;
-            y_table[i + 110 + 2048] = (yval >> 7)        << bbase;
+            y_table[i +  37 +   table_plane_size] = ((yval + 43) / 85) << gbase;
+            y_table[i + 110 + 2*table_plane_size] = (yval >> 7)        << bbase;
             yb += cy;
         }
         fill_table(c->table_rV, 1, crv, y_table + yoffs);
-        fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
-        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
+        fill_table(c->table_gU, 1, cgu, y_table + yoffs +   table_plane_size);
+        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2*table_plane_size);
         fill_gv_table(c->table_gV, 1, cgv);
         break;
     case 8:
         rbase       = isRgb ? 5 : 0;
         gbase       = isRgb ? 2 : 3;
         bbase       = isRgb ? 0 : 6;
-        ALLOC_YUV_TABLE(1024 * 3);
+        ALLOC_YUV_TABLE(table_plane_size * 3);
         y_table     = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024 - 38; i++) {
+        yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy;
+        for (i = 0; i < table_plane_size - 38; i++) {
             int yval               = av_clip_uint8((yb + 0x8000) >> 16);
             y_table[i + 16]        = ((yval + 18) / 36) << rbase;
-            y_table[i + 16 + 1024] = ((yval + 18) / 36) << gbase;
-            y_table[i + 37 + 2048] = ((yval + 43) / 85) << bbase;
+            y_table[i + 16 +   table_plane_size] = ((yval + 18) / 36) << gbase;
+            y_table[i + 37 + 2*table_plane_size] = ((yval + 43) / 85) << bbase;
             yb += cy;
         }
         fill_table(c->table_rV, 1, crv, y_table + yoffs);
-        fill_table(c->table_gU, 1, cgu, y_table + yoffs + 1024);
-        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2048);
+        fill_table(c->table_gU, 1, cgu, y_table + yoffs +   table_plane_size);
+        fill_table(c->table_bU, 1, cbu, y_table + yoffs + 2*table_plane_size);
         fill_gv_table(c->table_gV, 1, cgv);
         break;
     case 12:
         rbase       = isRgb ? 8 : 0;
         gbase       = 4;
         bbase       = isRgb ? 0 : 8;
-        ALLOC_YUV_TABLE(1024 * 3 * 2);
+        ALLOC_YUV_TABLE(table_plane_size * 3 * 2);
         y_table16   = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024; i++) {
+        yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy;
+        for (i = 0; i < table_plane_size; i++) {
             uint8_t yval        = av_clip_uint8((yb + 0x8000) >> 16);
             y_table16[i]        = (yval >> 4) << rbase;
-            y_table16[i + 1024] = (yval >> 4) << gbase;
-            y_table16[i + 2048] = (yval >> 4) << bbase;
+            y_table16[i +   table_plane_size] = (yval >> 4) << gbase;
+            y_table16[i + 2*table_plane_size] = (yval >> 4) << bbase;
             yb += cy;
         }
         if (isNotNe)
-            for (i = 0; i < 1024 * 3; i++)
+            for (i = 0; i < table_plane_size * 3; i++)
                 y_table16[i] = av_bswap16(y_table16[i]);
         fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
-        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
-        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
+        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs +   table_plane_size);
+        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2*table_plane_size);
         fill_gv_table(c->table_gV, 2, cgv);
         break;
     case 15:
@@ -909,30 +921,30 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
         rbase       = isRgb ? bpp - 5 : 0;
         gbase       = 5;
         bbase       = isRgb ? 0 : (bpp - 5);
-        ALLOC_YUV_TABLE(1024 * 3 * 2);
+        ALLOC_YUV_TABLE(table_plane_size * 3 * 2);
         y_table16   = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024; i++) {
+        yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy;
+        for (i = 0; i < table_plane_size; i++) {
             uint8_t yval        = av_clip_uint8((yb + 0x8000) >> 16);
             y_table16[i]        = (yval >> 3)          << rbase;
-            y_table16[i + 1024] = (yval >> (18 - bpp)) << gbase;
-            y_table16[i + 2048] = (yval >> 3)          << bbase;
+            y_table16[i +   table_plane_size] = (yval >> (18 - bpp)) << gbase;
+            y_table16[i + 2*table_plane_size] = (yval >> 3)          << bbase;
             yb += cy;
         }
         if (isNotNe)
-            for (i = 0; i < 1024 * 3; i++)
+            for (i = 0; i < table_plane_size * 3; i++)
                 y_table16[i] = av_bswap16(y_table16[i]);
         fill_table(c->table_rV, 2, crv, y_table16 + yoffs);
-        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs + 1024);
-        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2048);
+        fill_table(c->table_gU, 2, cgu, y_table16 + yoffs +   table_plane_size);
+        fill_table(c->table_bU, 2, cbu, y_table16 + yoffs + 2*table_plane_size);
         fill_gv_table(c->table_gV, 2, cgv);
         break;
     case 24:
     case 48:
-        ALLOC_YUV_TABLE(1024);
+        ALLOC_YUV_TABLE(table_plane_size);
         y_table     = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024; i++) {
+        yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy;
+        for (i = 0; i < table_plane_size; i++) {
             y_table[i]  = av_clip_uint8((yb + 0x8000) >> 16);
             yb         += cy;
         }
@@ -951,20 +963,20 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
         needAlpha = CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat);
         if (!needAlpha)
             abase = (base + 24) & 31;
-        ALLOC_YUV_TABLE(1024 * 3 * 4);
+        ALLOC_YUV_TABLE(table_plane_size * 3 * 4);
         y_table32   = c->yuvTable;
-        yb = -(384 << 16) - oy;
-        for (i = 0; i < 1024; i++) {
+        yb = -(384 << 16) - YUVRGB_TABLE_LUMA_HEADROOM*cy - oy;
+        for (i = 0; i < table_plane_size; i++) {
             unsigned yval       = av_clip_uint8((yb + 0x8000) >> 16);
             y_table32[i]        = (yval << rbase) +
                                   (needAlpha ? 0 : (255u << abase));
-            y_table32[i + 1024] =  yval << gbase;
-            y_table32[i + 2048] =  yval << bbase;
+            y_table32[i +   table_plane_size] =  yval << gbase;
+            y_table32[i + 2*table_plane_size] =  yval << bbase;
             yb += cy;
         }
         fill_table(c->table_rV, 4, crv, y_table32 + yoffs);
-        fill_table(c->table_gU, 4, cgu, y_table32 + yoffs + 1024);
-        fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2048);
+        fill_table(c->table_gU, 4, cgu, y_table32 + yoffs +   table_plane_size);
+        fill_table(c->table_bU, 4, cbu, y_table32 + yoffs + 2*table_plane_size);
         fill_gv_table(c->table_gV, 4, cgv);
         break;
     default:
diff --git a/tests/Makefile b/tests/Makefile
index cffa5412..6e5dfa6e 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -7,16 +7,20 @@ AREF = tests/data/asynth1.sw
 FATEW = 34
 FATEH = 34
 
+FFMPEG=ffmpeg$(PROGSSUF)$(EXESUF)
+
 $(AREF): CMP=
 
-ffservertest: ffserver$(EXESUF) tests/vsynth1/00.pgm tests/data/asynth1.sw
+ffservertest: export PROGSUF = $(PROGSSUF)
+ffservertest: ffserver$(PROGSSUF)$(EXESUF) tests/vsynth1/00.pgm tests/data/asynth1.sw
 	@echo
 	@echo "Unfortunately ffserver is broken and therefore its regression"
 	@echo "test fails randomly. Treat the results accordingly."
 	@echo
 	$(SRC_PATH)/tests/ffserver-regression.sh $(FFSERVER_REFFILE) $(SRC_PATH)/tests/ffserver.conf
 
-OBJDIRS += tests/data tests/vsynth1 tests/data/filtergraphs
+APITESTSDIR := tests/api
+OBJDIRS += tests/data tests/vsynth1 tests/data/filtergraphs $(APITESTSDIR)/
 
 $(VREF): tests/videogen$(HOSTEXESUF) | tests/vsynth1
 	$(M)./$< 'tests/vsynth1/'
@@ -43,11 +47,11 @@ tests/test_copy.ffmeta: TAG = COPY
 tests/test_copy.ffmeta: tests/data
 	$(M)cp -f $(SRC_PATH)/tests/test.ffmeta tests/test_copy.ffmeta
 
-tests/data/ffprobe-test.nut: ffmpeg$(EXESUF) tests/test_copy.ffmeta
+tests/data/ffprobe-test.nut: ffmpeg$(PROGSSUF)$(EXESUF) tests/test_copy.ffmeta
 	$(M)$(TARGET_EXEC) $(TARGET_PATH)/$< \
         -f lavfi -i "aevalsrc=sin(400*PI*2*t):d=0.125[out0]; testsrc=d=0.125[out1]; testsrc=s=100x100:d=0.125[out2]" \
         -f ffmetadata -i $(TARGET_PATH)/tests/test_copy.ffmeta \
-        -flags +bitexact -map 0:0 -map 0:1 -map 0:2 -map_metadata 1 \
+        -flags +bitexact -fflags +bitexact -map 0:0 -map 0:1 -map 0:2 -map_metadata 1 \
         -map_metadata:s:0 1:s:0 -map_metadata:s:1 1:s:1 \
         -vcodec rawvideo -acodec pcm_s16le \
         -y $(TARGET_PATH)/$@ 2>/dev/null
@@ -89,6 +93,8 @@ FILTERDEMDECENCMUX = $(call ALLYES, $(1:%=%_FILTER) $(2)_DEMUXER $(3)_DECODER $(
 
 PARSERDEMDEC       = $(call ALLYES, $(1)_PARSER $(2)_DEMUXER $(3)_DECODER)
 
+include $(SRC_PATH)/$(APITESTSDIR)/Makefile
+
 include $(SRC_PATH)/tests/fate/acodec.mak
 include $(SRC_PATH)/tests/fate/vcodec.mak
 include $(SRC_PATH)/tests/fate/avformat.mak
@@ -101,13 +107,18 @@ include $(SRC_PATH)/tests/fate/alac.mak
 include $(SRC_PATH)/tests/fate/als.mak
 include $(SRC_PATH)/tests/fate/amrnb.mak
 include $(SRC_PATH)/tests/fate/amrwb.mak
+include $(SRC_PATH)/tests/fate/api.mak
 include $(SRC_PATH)/tests/fate/atrac.mak
 include $(SRC_PATH)/tests/fate/audio.mak
 include $(SRC_PATH)/tests/fate/bmp.mak
 include $(SRC_PATH)/tests/fate/cdxl.mak
+include $(SRC_PATH)/tests/fate/checkasm.mak
+include $(SRC_PATH)/tests/fate/concatdec.mak
 include $(SRC_PATH)/tests/fate/cover-art.mak
+include $(SRC_PATH)/tests/fate/dca.mak
 include $(SRC_PATH)/tests/fate/demux.mak
 include $(SRC_PATH)/tests/fate/dfa.mak
+include $(SRC_PATH)/tests/fate/dnxhd.mak
 include $(SRC_PATH)/tests/fate/dpcm.mak
 include $(SRC_PATH)/tests/fate/ea.mak
 include $(SRC_PATH)/tests/fate/exif.mak
@@ -145,6 +156,7 @@ include $(SRC_PATH)/tests/fate/qt.mak
 include $(SRC_PATH)/tests/fate/qtrle.mak
 include $(SRC_PATH)/tests/fate/real.mak
 include $(SRC_PATH)/tests/fate/screen.mak
+include $(SRC_PATH)/tests/fate/source.mak
 include $(SRC_PATH)/tests/fate/subtitles.mak
 include $(SRC_PATH)/tests/fate/utvideo.mak
 include $(SRC_PATH)/tests/fate/video.mak
@@ -170,9 +182,9 @@ FATE += $(FATE-yes)
 RSYNC_OPTIONS-$(HAVE_RSYNC_CONTIMEOUT) += --contimeout=60
 RSYNC_OPTIONS = -vrltLW --timeout=60 $(RSYNC_OPTIONS-yes)
 
-$(FATE_FFMPEG) $(FATE_SAMPLES_AVCONV) $(FATE_SAMPLES_FFMPEG): ffmpeg$(EXESUF)
+$(FATE_FFMPEG) $(FATE_SAMPLES_AVCONV) $(FATE_SAMPLES_FFMPEG): ffmpeg$(PROGSSUF)$(EXESUF)
 
-$(FATE_FFPROBE) $(FATE_SAMPLES_FFPROBE): ffprobe$(EXESUF)
+$(FATE_FFPROBE) $(FATE_SAMPLES_FFPROBE): ffprobe$(PROGSSUF)$(EXESUF)
 
 ifdef SAMPLES
 FATE += $(FATE_FULL) $(FATE_FULL-yes)
@@ -194,6 +206,7 @@ TOOL = ffmpeg
 
 fate:: $(FATE)
 
+$(FATE) $(FATE_TESTS-no): export PROGSUF = $(PROGSSUF)
 $(FATE) $(FATE_TESTS-no): $(FATE_UTILS:%=tests/%$(HOSTEXESUF))
 	@echo "TEST    $(@:fate-%=%)"
 	$(Q)$(SRC_PATH)/tests/fate-run.sh $@ "$(TARGET_SAMPLES)" "$(TARGET_EXEC)" "$(TARGET_PATH)" '$(CMD)' '$(CMP)' '$(REF)' '$(FUZZ)' '$(THREADS)' '$(THREAD_TYPE)' '$(CPUFLAGS)' '$(CMP_SHIFT)' '$(CMP_TARGET)' '$(SIZE_TOLERANCE)' '$(CMP_UNIT)' '$(GEN)' '$(HWACCEL)'
@@ -203,9 +216,10 @@ fate-list:
 
 coverage.info: TAG = LCOV
 coverage.info:
-	$(M)lcov -q -d $(CURDIR) -b $(SRC_PATH) --capture | \
-	    sed "s,$(CURDIR)/\./,$(CURDIR)/," > $@
-	$(M)lcov -q --remove $@ "/usr*" -o $@
+	$(M)lcov -q -d $(CURDIR) -b $(patsubst src%,./,$(SRC_LINK)) --capture | \
+	    sed "s,$(CURDIR)/\./,$(CURDIR)/," > $@.in
+	$(M)lcov -q --remove $@.in "/usr*" > $@
+	$(Q)$(RM) $@.in
 
 lcov:  TAG = GENHTML
 lcov: coverage.info
@@ -218,13 +232,15 @@ lcov-reset:
 
 clean:: testclean
 
-testclean:
-	$(RM) -r tests/vsynth1 tests/data tools/lavfi-showfiltfmts$(EXESUF)
+testclean::
+	$(RM) -r tests/vsynth1 tests/data tools/lavfi-showfiltfmts$(PROGSSUF)$(EXESUF)
 	$(RM) $(CLEANSUFFIXES:%=tests/%)
 	$(RM) $(TESTTOOLS:%=tests/%$(HOSTEXESUF))
 	$(RM) tests/pixfmts.mak tests/test_copy.ffmeta
 
 -include $(wildcard tests/*.d)
 
+include $(SRC_PATH)/tests/checkasm/Makefile
+
 .PHONY: fate* lcov lcov-reset
 .INTERMEDIATE: coverage.info
diff --git a/tests/api/Makefile b/tests/api/Makefile
new file mode 100644
index 00000000..ec0e125c
--- /dev/null
+++ b/tests/api/Makefile
@@ -0,0 +1,21 @@
+APITESTPROGS-$(call ENCDEC, FLAC, FLAC) += api-flac
+APITESTPROGS-$(call DEMDEC, H264, H264) += api-h264
+APITESTPROGS-yes += api-seek
+APITESTPROGS-yes += api-codec-param
+APITESTPROGS-$(call DEMDEC, H263, H263) += api-band
+APITESTPROGS-$(HAVE_THREADS) += api-threadmessage
+APITESTPROGS += $(APITESTPROGS-yes)
+
+APITESTOBJS  := $(APITESTOBJS:%=$(APITESTSDIR)%) $(APITESTPROGS:%=$(APITESTSDIR)/%-test.o)
+APITESTPROGS := $(APITESTPROGS:%=$(APITESTSDIR)/%-test$(EXESUF))
+-include $(wildcard $(APITESTOBJS:.o=.d))
+
+$(APITESTOBJS): | $(sort $(dir $(APITESTOBJS)))
+$(APITESTOBJS) $(APITESTOBJS:.o=.i): CPPFLAGS += -DTEST
+$(APITESTOBJS) $(APITESTOBJS:.o=.i): CFLAGS += -Umain
+
+$(APITESTPROGS): %$(EXESUF): %.o $(EXEOBJS) $(FF_DEP_LIBS)
+	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(filter %.o,$^) $(FF_EXTRALIBS) $(ELIBS)
+
+testclean::
+	$(RM) $(addprefix $(APITESTSDIR)/,$(CLEANSUFFIXES) *-test$(EXESUF))
diff --git a/tests/api/api-band-test.c b/tests/api/api-band-test.c
new file mode 100644
index 00000000..8644e7df
--- /dev/null
+++ b/tests/api/api-band-test.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2015 Ludmila Glinskih
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * draw_horiz_band test.
+ */
+
+#include "libavutil/adler32.h"
+#include "libavcodec/avcodec.h"
+#include "libavformat/avformat.h"
+#include "libavutil/imgutils.h"
+
+uint8_t *slice_byte_buffer;
+uint8_t slice_byte_buffer_size;
+int draw_horiz_band_called;
+
+static void draw_horiz_band(AVCodecContext *ctx, const AVFrame *fr, int offset[4],
+                            int slice_position, int type, int height)
+{
+    int i;
+    const AVPixFmtDescriptor *pix_fmt_desc;
+    int chroma_w, chroma_h;
+    int shift_slice_position;
+    int shift_height;
+
+    draw_horiz_band_called = 1;
+
+    pix_fmt_desc = av_pix_fmt_desc_get(ctx->pix_fmt);
+    chroma_w = -((-ctx->width) >> pix_fmt_desc->log2_chroma_w);
+    chroma_h = -((-height) >> pix_fmt_desc->log2_chroma_h);
+    shift_slice_position = -((-slice_position) >> pix_fmt_desc->log2_chroma_h);
+    shift_height = -((-ctx->height) >> pix_fmt_desc->log2_chroma_h);
+
+    for (i = 0; i < height; i++) {
+        memcpy(slice_byte_buffer + ctx->width * slice_position + i * ctx->width,
+               fr->data[0] + offset[0] + i * fr->linesize[0], ctx->width);
+    }
+    for (i = 0; i < chroma_h; i++) {
+        memcpy(slice_byte_buffer + ctx->width * ctx->height + chroma_w * shift_slice_position + i * chroma_w,
+               fr->data[1] + offset[1] + i * fr->linesize[1], chroma_w);
+    }
+    for (i = 0; i < chroma_h; i++) {
+        memcpy(slice_byte_buffer + ctx->width * ctx->height + chroma_w * shift_height + chroma_w * shift_slice_position + i * chroma_w,
+               fr->data[2] + offset[2] + i * fr->linesize[2], chroma_w);
+    }
+}
+
+static int video_decode(const char *input_filename)
+{
+    AVCodec *codec = NULL;
+    AVCodecContext *origin_ctx = NULL, *ctx= NULL;
+    uint8_t *byte_buffer = NULL;
+    AVFrame *fr = NULL;
+    AVPacket pkt;
+    AVFormatContext *fmt_ctx = NULL;
+    int number_of_written_bytes;
+    int video_stream;
+    int got_frame = 0;
+    int byte_buffer_size;
+    int result;
+    int end_of_stream = 0;
+
+    draw_horiz_band_called = 0;
+
+    result = avformat_open_input(&fmt_ctx, input_filename, NULL, NULL);
+    if (result < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Can't open file\n");
+        return result;
+    }
+
+    result = avformat_find_stream_info(fmt_ctx, NULL);
+    if (result < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Can't get stream info\n");
+        return result;
+    }
+
+    video_stream = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
+    if (video_stream < 0) {
+      av_log(NULL, AV_LOG_ERROR, "Can't find video stream in input file\n");
+      return -1;
+    }
+
+    origin_ctx = fmt_ctx->streams[video_stream]->codec;
+
+    codec = avcodec_find_decoder(origin_ctx->codec_id);
+    if (!codec) {
+        av_log(NULL, AV_LOG_ERROR, "Can't find decoder\n");
+        return -1;
+    }
+
+    ctx = avcodec_alloc_context3(codec);
+    if (!ctx) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate decoder context\n");
+        return AVERROR(ENOMEM);
+    }
+
+    result = avcodec_copy_context(ctx, origin_ctx);
+    if (result) {
+        av_log(NULL, AV_LOG_ERROR, "Can't copy decoder context\n");
+        return result;
+    }
+
+    ctx->draw_horiz_band = draw_horiz_band;
+    ctx->thread_count = 1;
+
+    result = avcodec_open2(ctx, codec, NULL);
+    if (result < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Can't open decoder\n");
+        return result;
+    }
+
+    fr = av_frame_alloc();
+    if (!fr) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate frame\n");
+        return AVERROR(ENOMEM);
+    }
+
+    if (strcmp(codec->name, "flv") && strcmp(codec->name, "mpeg4") && strcmp(codec->name, "huffyuv")) {
+        av_log(NULL, AV_LOG_ERROR, "Wrong codec\n");
+        return -1;
+    }
+
+    byte_buffer_size = av_image_get_buffer_size(ctx->pix_fmt, ctx->width, ctx->height, 32);
+    byte_buffer = av_malloc(byte_buffer_size);
+    if (!byte_buffer) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate buffer\n");
+        return AVERROR(ENOMEM);
+    }
+
+    slice_byte_buffer = av_malloc(byte_buffer_size);
+    if (!slice_byte_buffer) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate buffer\n");
+        return AVERROR(ENOMEM);
+    }
+    memset(slice_byte_buffer, 0, byte_buffer_size);
+    slice_byte_buffer_size = byte_buffer_size;
+
+    av_init_packet(&pkt);
+    do {
+        if (!end_of_stream) {
+            if (av_read_frame(fmt_ctx, &pkt) < 0) {
+                end_of_stream = 1;
+            }
+        }
+        if (end_of_stream) {
+            pkt.data = NULL;
+            pkt.size = 0;
+        }
+        if (pkt.stream_index == video_stream || end_of_stream) {
+            got_frame = 0;
+            result = avcodec_decode_video2(ctx, fr, &got_frame, &pkt);
+            if (result < 0) {
+                av_log(NULL, AV_LOG_ERROR, "Error decoding frame\n");
+                return result;
+            }
+            if (got_frame) {
+                number_of_written_bytes = av_image_copy_to_buffer(byte_buffer, byte_buffer_size,
+                                        (const uint8_t* const *)fr->data, (const int*) fr->linesize,
+                                        ctx->pix_fmt, ctx->width, ctx->height, 1);
+                if (number_of_written_bytes < 0) {
+                    av_log(NULL, AV_LOG_ERROR, "Can't copy image to buffer\n");
+                    return number_of_written_bytes;
+                }
+                if (draw_horiz_band_called == 0) {
+                    av_log(NULL, AV_LOG_ERROR, "draw_horiz_band haven't been called!\n");
+                    return -1;
+                }
+                if (av_adler32_update(0, (const uint8_t*)byte_buffer, number_of_written_bytes) !=
+                    av_adler32_update(0, (const uint8_t*)slice_byte_buffer, number_of_written_bytes)) {
+                    av_log(NULL, AV_LOG_ERROR, "Decoded frames with and without draw_horiz_band are not the same!\n");
+                    return -1;
+                }
+            }
+            av_packet_unref(&pkt);
+            av_init_packet(&pkt);
+        }
+    } while (!end_of_stream || got_frame);
+
+    av_packet_unref(&pkt);
+    av_frame_free(&fr);
+    avcodec_close(ctx);
+    avformat_close_input(&fmt_ctx);
+    avcodec_free_context(&ctx);
+    av_freep(&byte_buffer);
+    av_freep(&slice_byte_buffer);
+    return 0;
+}
+
+int main(int argc, char **argv)
+{
+    if (argc < 2)
+    {
+        av_log(NULL, AV_LOG_ERROR, "Incorrect input: expected %s <name of a video file>\nNote that test works only for huffyuv, flv and mpeg4 decoders\n", argv[0]);
+        return 1;
+    }
+
+    av_register_all();
+
+    if (video_decode(argv[1]) != 0)
+        return 1;
+
+    return 0;
+}
diff --git a/tests/api/api-codec-param-test.c b/tests/api/api-codec-param-test.c
new file mode 100644
index 00000000..fa51964b
--- /dev/null
+++ b/tests/api/api-codec-param-test.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2015 Matthieu Bouron <matthieu.bouron stupeflix.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include "libavformat/avformat.h"
+#include "libavutil/pixdesc.h"
+#include "libavcodec/internal.h"
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+
+static int try_decode_video_frame(AVCodecContext *codec_ctx, AVPacket *pkt, int decode)
+{
+    int ret = 0;
+    int got_frame = 0;
+    AVFrame *frame = NULL;
+    int skip_frame = codec_ctx->skip_frame;
+
+    if (!avcodec_is_open(codec_ctx)) {
+        const AVCodec *codec = avcodec_find_decoder(codec_ctx->codec_id);
+
+        ret = avcodec_open2(codec_ctx, codec, NULL);
+        if (ret < 0) {
+            av_log(codec_ctx, AV_LOG_ERROR, "Failed to open codec\n");
+            goto end;
+        }
+    }
+
+    frame = av_frame_alloc();
+    if (!frame) {
+        av_log(NULL, AV_LOG_ERROR, "Failed to allocate frame\n");
+        goto end;
+    }
+
+    if (!decode && codec_ctx->codec->caps_internal & FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM) {
+        codec_ctx->skip_frame = AVDISCARD_ALL;
+    }
+
+    do {
+        ret = avcodec_decode_video2(codec_ctx, frame, &got_frame, pkt);
+        av_assert0(decode || (!decode && !got_frame));
+        if (ret < 0)
+            break;
+        pkt->data += ret;
+        pkt->size -= ret;
+
+        if (got_frame) {
+            break;
+        }
+    } while (pkt->size > 0);
+
+end:
+    codec_ctx->skip_frame = skip_frame;
+
+    av_frame_free(&frame);
+    return ret;
+}
+
+static int find_video_stream_info(AVFormatContext *fmt_ctx, int decode)
+{
+    int ret = 0;
+    int i, done = 0;
+    AVPacket pkt;
+
+    av_init_packet(&pkt);
+
+    while (!done) {
+        AVCodecContext *codec_ctx = NULL;
+        AVStream *st;
+
+        if ((ret = av_read_frame(fmt_ctx, &pkt)) < 0) {
+            av_log(fmt_ctx, AV_LOG_ERROR, "Failed to read frame\n");
+            goto end;
+        }
+
+        st = fmt_ctx->streams[pkt.stream_index];
+        codec_ctx = st->codec;
+
+        /* Writing to AVStream.codec_info_nb_frames must not be done by
+         * user applications. It is done here for testing purposing as
+         * find_video_stream_info tries to mimic avformat_find_stream_info
+         * which writes to this field.
+         * */
+        if (codec_ctx->codec_type != AVMEDIA_TYPE_VIDEO ||
+            st->codec_info_nb_frames++ > 0) {
+            av_packet_unref(&pkt);
+            continue;
+        }
+
+        ret = try_decode_video_frame(codec_ctx, &pkt, decode);
+        if (ret < 0) {
+            av_log(fmt_ctx, AV_LOG_ERROR, "Failed to decode video frame\n");
+            goto end;
+        }
+
+        av_packet_unref(&pkt);
+
+        /* check if all video streams have demuxed a packet */
+        done = 1;
+        for (i = 0; i < fmt_ctx->nb_streams; i++) {
+            st = fmt_ctx->streams[i];
+            codec_ctx = st->codec;
+
+            if (codec_ctx->codec_type != AVMEDIA_TYPE_VIDEO)
+                continue;
+
+            done &= st->codec_info_nb_frames > 0;
+        }
+    }
+
+end:
+    av_packet_unref(&pkt);
+
+    /* close all codecs opened in try_decode_video_frame */
+    for (i = 0; i < fmt_ctx->nb_streams; i++) {
+        AVStream *st = fmt_ctx->streams[i];
+        avcodec_close(st->codec);
+    }
+
+    return ret < 0;
+}
+
+static void dump_video_streams(const AVFormatContext *fmt_ctx, int decode)
+{
+    int i;
+
+    for (i = 0; i < fmt_ctx->nb_streams; i++) {
+        const AVOption *opt = NULL;
+        const AVStream *st = fmt_ctx->streams[i];
+        AVCodecContext *codec_ctx = st->codec;
+
+        printf("stream=%d, decode=%d\n", i, decode);
+        while (opt = av_opt_next(codec_ctx, opt)) {
+            uint8_t *str;
+
+            if (opt->type == AV_OPT_TYPE_CONST)
+                continue;
+
+            if (!strcmp(opt->name, "frame_number"))
+                continue;
+
+            if (av_opt_get(codec_ctx, opt->name, 0, &str) >= 0) {
+                printf("    %s=%s\n", opt->name, str);
+                av_free(str);
+            }
+        }
+    }
+}
+
+static int open_and_probe_video_streams(AVFormatContext **fmt_ctx, const char *filename, int decode)
+{
+    int ret = 0;
+
+    ret = avformat_open_input(fmt_ctx, filename, NULL, NULL);
+    if (ret < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Failed to open input '%s'", filename);
+        goto end;
+    }
+
+    ret = find_video_stream_info(*fmt_ctx, decode);
+    if (ret < 0) {
+        goto end;
+    }
+
+    dump_video_streams(*fmt_ctx, decode);
+
+end:
+    return ret;
+}
+
+static int check_video_streams(const AVFormatContext *fmt_ctx1, const AVFormatContext *fmt_ctx2)
+{
+    int i;
+    int ret = 0;
+
+    av_assert0(fmt_ctx1->nb_streams == fmt_ctx2->nb_streams);
+    for (i = 0; i < fmt_ctx1->nb_streams; i++) {
+        const AVOption *opt = NULL;
+        const AVStream *st1 = fmt_ctx1->streams[i];
+        const AVStream *st2 = fmt_ctx2->streams[i];
+        AVCodecContext *codec_ctx1 = st1->codec;
+        AVCodecContext *codec_ctx2 = st2->codec;
+
+        if (codec_ctx1->codec_type != AVMEDIA_TYPE_VIDEO)
+            continue;
+
+        while (opt = av_opt_next(codec_ctx1, opt)) {
+            uint8_t *str1 = NULL, *str2 = NULL;
+
+            if (opt->type == AV_OPT_TYPE_CONST)
+                continue;
+
+            if (!strcmp(opt->name, "frame_number"))
+                continue;
+
+            av_assert0(av_opt_get(codec_ctx1, opt->name, 0, &str1) >= 0);
+            av_assert0(av_opt_get(codec_ctx2, opt->name, 0, &str2) >= 0);
+            if (strcmp(str1, str2)) {
+                av_log(NULL, AV_LOG_ERROR, "Field %s differs: %s %s", opt->name, str1, str2);
+                ret = AVERROR(EINVAL);
+            }
+            av_free(str1);
+            av_free(str2);
+        }
+    }
+
+    return ret;
+}
+
+int main(int argc, char* argv[])
+{
+    int ret = 0;
+    AVFormatContext *fmt_ctx = NULL;
+    AVFormatContext *fmt_ctx_no_decode = NULL;
+
+    av_register_all();
+
+    if (argc < 2) {
+        av_log(NULL, AV_LOG_ERROR, "Usage: %s <input>\n", argv[0]);
+        return -1;
+    }
+
+    if ((ret = open_and_probe_video_streams(&fmt_ctx_no_decode, argv[1], 0)) < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Failed to probe '%s' without frame decoding\n", argv[1]);
+        goto end;
+    }
+
+    if ((ret = open_and_probe_video_streams(&fmt_ctx, argv[1], 1)) < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Failed to probe '%s' with frame decoding\n", argv[1]);
+        goto end;
+    }
+
+    ret = check_video_streams(fmt_ctx, fmt_ctx_no_decode);
+
+end:
+    avformat_close_input(&fmt_ctx);
+    avformat_close_input(&fmt_ctx_no_decode);
+
+    return ret;
+}
diff --git a/libavcodec/api-flac-test.c b/tests/api/api-flac-test.c
similarity index 89%
rename from libavcodec/api-flac-test.c
rename to tests/api/api-flac-test.c
index 402d4df2..7b480594 100644
--- a/libavcodec/api-flac-test.c
+++ b/tests/api/api-flac-test.c
@@ -27,7 +27,7 @@
  * after that.
  */
 
-#include "avcodec.h"
+#include "libavcodec/avcodec.h"
 #include "libavutil/common.h"
 #include "libavutil/samplefmt.h"
 
@@ -112,10 +112,10 @@ static int run_test(AVCodec *enc, AVCodec *dec, AVCodecContext *enc_ctx,
     AVFrame *in_frame, *out_frame;
     uint8_t *raw_in = NULL, *raw_out = NULL;
     int in_offset = 0, out_offset = 0;
-    int frame_data_size = 0;
     int result = 0;
     int got_output = 0;
     int i = 0;
+    int in_frame_bytes, out_frame_bytes;
 
     in_frame = av_frame_alloc();
     if (!in_frame) {
@@ -156,8 +156,13 @@ static int run_test(AVCodec *enc, AVCodec *dec, AVCodecContext *enc_ctx,
 
         generate_raw_frame((uint16_t*)(in_frame->data[0]), i, enc_ctx->sample_rate,
                            enc_ctx->channels, enc_ctx->frame_size);
-        memcpy(raw_in + in_offset, in_frame->data[0], in_frame->linesize[0]);
-        in_offset += in_frame->linesize[0];
+        in_frame_bytes = in_frame->nb_samples * av_frame_get_channels(in_frame) * sizeof(uint16_t);
+        if (in_frame_bytes > in_frame->linesize[0]) {
+            av_log(NULL, AV_LOG_ERROR, "Incorrect value of input frame linesize\n");
+            return 1;
+        }
+        memcpy(raw_in + in_offset, in_frame->data[0], in_frame_bytes);
+        in_offset += in_frame_bytes;
         result = avcodec_encode_audio2(enc_ctx, &enc_pkt, in_frame, &got_output);
         if (result < 0) {
             av_log(NULL, AV_LOG_ERROR, "Error encoding audio frame\n");
@@ -192,14 +197,19 @@ static int run_test(AVCodec *enc, AVCodec *dec, AVCodecContext *enc_ctx,
                     av_log(NULL, AV_LOG_ERROR, "Error frames before and after decoding has different sample format\n");
                     return AVERROR_UNKNOWN;
                 }
-                memcpy(raw_out + out_offset, out_frame->data[0], out_frame->linesize[0]);
-                out_offset += out_frame->linesize[0];
+                out_frame_bytes = out_frame->nb_samples * av_frame_get_channels(out_frame) * sizeof(uint16_t);
+                if (out_frame_bytes > out_frame->linesize[0]) {
+                    av_log(NULL, AV_LOG_ERROR, "Incorrect value of output frame linesize\n");
+                    return 1;
+                }
+                memcpy(raw_out + out_offset, out_frame->data[0], out_frame_bytes);
+                out_offset += out_frame_bytes;
             }
         }
-        av_free_packet(&enc_pkt);
+        av_packet_unref(&enc_pkt);
     }
 
-    if (memcmp(raw_in, raw_out, frame_data_size * NUMBER_OF_FRAMES) != 0) {
+    if (memcmp(raw_in, raw_out, out_frame_bytes * NUMBER_OF_FRAMES) != 0) {
         av_log(NULL, AV_LOG_ERROR, "Output differs\n");
         return 1;
     }
diff --git a/tests/api/api-h264-test.c b/tests/api/api-h264-test.c
new file mode 100644
index 00000000..acf1636b
--- /dev/null
+++ b/tests/api/api-h264-test.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2015 Ludmila Glinskih
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * H264 codec test.
+ */
+
+#include "libavutil/adler32.h"
+#include "libavcodec/avcodec.h"
+#include "libavformat/avformat.h"
+#include "libavutil/imgutils.h"
+
+static int video_decode_example(const char *input_filename)
+{
+    AVCodec *codec = NULL;
+    AVCodecContext *origin_ctx = NULL, *ctx= NULL;
+    AVFrame *fr = NULL;
+    uint8_t *byte_buffer = NULL;
+    AVPacket pkt;
+    AVFormatContext *fmt_ctx = NULL;
+    int number_of_written_bytes;
+    int video_stream;
+    int got_frame = 0;
+    int byte_buffer_size;
+    int i = 0;
+    int result;
+    int end_of_stream = 0;
+
+    result = avformat_open_input(&fmt_ctx, input_filename, NULL, NULL);
+    if (result < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Can't open file\n");
+        return result;
+    }
+
+    result = avformat_find_stream_info(fmt_ctx, NULL);
+    if (result < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Can't get stream info\n");
+        return result;
+    }
+
+    video_stream = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
+    if (video_stream < 0) {
+      av_log(NULL, AV_LOG_ERROR, "Can't find video stream in input file\n");
+      return -1;
+    }
+
+    origin_ctx = fmt_ctx->streams[video_stream]->codec;
+
+    codec = avcodec_find_decoder(origin_ctx->codec_id);
+    if (!codec) {
+        av_log(NULL, AV_LOG_ERROR, "Can't find decoder\n");
+        return -1;
+    }
+
+    ctx = avcodec_alloc_context3(codec);
+    if (!ctx) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate decoder context\n");
+        return AVERROR(ENOMEM);
+    }
+
+    result = avcodec_copy_context(ctx, origin_ctx);
+    if (result) {
+        av_log(NULL, AV_LOG_ERROR, "Can't copy decoder context\n");
+        return result;
+    }
+
+    result = avcodec_open2(ctx, codec, NULL);
+    if (result < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Can't open decoder\n");
+        return result;
+    }
+
+    fr = av_frame_alloc();
+    if (!fr) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate frame\n");
+        return AVERROR(ENOMEM);
+    }
+
+    byte_buffer_size = av_image_get_buffer_size(ctx->pix_fmt, ctx->width, ctx->height, 16);
+    byte_buffer = av_malloc(byte_buffer_size);
+    if (!byte_buffer) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate buffer\n");
+        return AVERROR(ENOMEM);
+    }
+
+    printf("#tb %d: %d/%d\n", video_stream, fmt_ctx->streams[video_stream]->time_base.num, fmt_ctx->streams[video_stream]->time_base.den);
+    i = 0;
+    av_init_packet(&pkt);
+    do {
+        if (!end_of_stream)
+            if (av_read_frame(fmt_ctx, &pkt) < 0)
+                end_of_stream = 1;
+        if (end_of_stream) {
+            pkt.data = NULL;
+            pkt.size = 0;
+        }
+        if (pkt.stream_index == video_stream || end_of_stream) {
+            got_frame = 0;
+            if (pkt.pts == AV_NOPTS_VALUE)
+                pkt.pts = pkt.dts = i;
+            result = avcodec_decode_video2(ctx, fr, &got_frame, &pkt);
+            if (result < 0) {
+                av_log(NULL, AV_LOG_ERROR, "Error decoding frame\n");
+                return result;
+            }
+            if (got_frame) {
+                number_of_written_bytes = av_image_copy_to_buffer(byte_buffer, byte_buffer_size,
+                                        (const uint8_t* const *)fr->data, (const int*) fr->linesize,
+                                        ctx->pix_fmt, ctx->width, ctx->height, 1);
+                if (number_of_written_bytes < 0) {
+                    av_log(NULL, AV_LOG_ERROR, "Can't copy image to buffer\n");
+                    return number_of_written_bytes;
+                }
+                printf("%d, %10"PRId64", %10"PRId64", %8"PRId64", %8d, 0x%08lx\n", video_stream,
+                        fr->pkt_pts, fr->pkt_dts, av_frame_get_pkt_duration(fr),
+                        number_of_written_bytes, av_adler32_update(0, (const uint8_t*)byte_buffer, number_of_written_bytes));
+            }
+            av_packet_unref(&pkt);
+            av_init_packet(&pkt);
+        }
+        i++;
+    } while (!end_of_stream || got_frame);
+
+    av_packet_unref(&pkt);
+    av_frame_free(&fr);
+    avcodec_close(ctx);
+    avformat_close_input(&fmt_ctx);
+    avcodec_free_context(&ctx);
+    av_freep(&byte_buffer);
+    return 0;
+}
+
+int main(int argc, char **argv)
+{
+    if (argc < 2)
+    {
+        av_log(NULL, AV_LOG_ERROR, "Incorrect input\n");
+        return 1;
+    }
+
+    av_register_all();
+
+    if (video_decode_example(argv[1]) != 0)
+        return 1;
+
+    return 0;
+}
diff --git a/tests/api/api-seek-test.c b/tests/api/api-seek-test.c
new file mode 100644
index 00000000..135b9724
--- /dev/null
+++ b/tests/api/api-seek-test.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2015 Ludmila Glinskih
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * Seek test.
+ */
+
+#include "libavutil/adler32.h"
+#include "libavcodec/avcodec.h"
+#include "libavformat/avformat.h"
+#include "libavutil/imgutils.h"
+
+int64_t *pts_array;
+int64_t *crc_array;
+int size_of_array;
+int number_of_elements;
+
+static int add_crc_to_array(int64_t crc, int64_t pts)
+{
+    if (size_of_array <= number_of_elements) {
+        if (size_of_array == 0)
+            size_of_array = 10;
+        size_of_array *= 2;
+        crc_array = av_realloc(crc_array, size_of_array * sizeof(int64_t));
+        pts_array = av_realloc(pts_array, size_of_array * sizeof(int64_t));
+        if ((crc_array == NULL) || (pts_array == NULL)) {
+            av_log(NULL, AV_LOG_ERROR, "Can't allocate array to store crcs\n");
+            return AVERROR(ENOMEM);
+        }
+    }
+    crc_array[number_of_elements] = crc;
+    pts_array[number_of_elements] = pts;
+    number_of_elements++;
+    return 0;
+}
+
+static int compare_crc_in_array(int64_t crc, int64_t pts)
+{
+    int i;
+    for (i = 0; i < number_of_elements; i++) {
+        if (pts_array[i] == pts) {
+            if (crc_array[i] == crc) {
+                printf("Comparing 0x%08lx %"PRId64" %d is OK\n", crc, pts, i);
+                return 0;
+            }
+            else {
+                av_log(NULL, AV_LOG_ERROR, "Incorrect crc of a frame after seeking\n");
+                return -1;
+            }
+        }
+    }
+    av_log(NULL, AV_LOG_ERROR, "Incorrect pts of a frame after seeking\n");
+    return -1;
+}
+
+static int compute_crc_of_packets(AVFormatContext *fmt_ctx, int video_stream,
+                                AVCodecContext *ctx, AVFrame *fr, uint64_t ts_start, uint64_t ts_end, int no_seeking)
+{
+    int number_of_written_bytes;
+    int got_frame = 0;
+    int result;
+    int end_of_stream = 0;
+    int byte_buffer_size;
+    uint8_t *byte_buffer;
+    int64_t crc;
+    AVPacket pkt;
+
+    byte_buffer_size = av_image_get_buffer_size(ctx->pix_fmt, ctx->width, ctx->height, 16);
+    byte_buffer = av_malloc(byte_buffer_size);
+    if (!byte_buffer) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate buffer\n");
+        return AVERROR(ENOMEM);
+    }
+
+    if (!no_seeking) {
+        result = av_seek_frame(fmt_ctx, video_stream, ts_start, AVSEEK_FLAG_ANY);
+        printf("Seeking to %"PRId64", computing crc for frames with pts < %"PRId64"\n", ts_start, ts_end);
+        if (result < 0) {
+            av_log(NULL, AV_LOG_ERROR, "Error in seeking\n");
+            return result;
+        }
+        avcodec_flush_buffers(ctx);
+    }
+
+    av_init_packet(&pkt);
+    do {
+        if (!end_of_stream)
+            if (av_read_frame(fmt_ctx, &pkt) < 0)
+                end_of_stream = 1;
+        if (end_of_stream) {
+            pkt.data = NULL;
+            pkt.size = 0;
+        }
+        if (pkt.stream_index == video_stream || end_of_stream) {
+            got_frame = 0;
+            if ((pkt.pts == AV_NOPTS_VALUE) && (!end_of_stream)) {
+                av_log(NULL, AV_LOG_ERROR, "Error: frames doesn't have pts values\n");
+                return -1;
+            }
+            result = avcodec_decode_video2(ctx, fr, &got_frame, &pkt);
+            if (result < 0) {
+                av_log(NULL, AV_LOG_ERROR, "Error decoding frame\n");
+                return result;
+            }
+            if (got_frame) {
+                number_of_written_bytes = av_image_copy_to_buffer(byte_buffer, byte_buffer_size,
+                                        (const uint8_t* const *)fr->data, (const int*) fr->linesize,
+                                        ctx->pix_fmt, ctx->width, ctx->height, 1);
+                if (number_of_written_bytes < 0) {
+                    av_log(NULL, AV_LOG_ERROR, "Can't copy image to buffer\n");
+                    return number_of_written_bytes;
+                }
+                if ((fr->pkt_pts > ts_end) && (!no_seeking))
+                    break;
+                crc = av_adler32_update(0, (const uint8_t*)byte_buffer, number_of_written_bytes);
+                printf("%10"PRId64", 0x%08lx\n", fr->pkt_pts, crc);
+                if (no_seeking) {
+                    if (add_crc_to_array(crc, fr->pkt_pts) < 0)
+                        return -1;
+                }
+                else {
+                    if (compare_crc_in_array(crc, fr->pkt_pts) < 0)
+                        return -1;
+                }
+            }
+        }
+        av_packet_unref(&pkt);
+        av_init_packet(&pkt);
+    } while ((!end_of_stream || got_frame) && (no_seeking || (fr->pkt_pts + av_frame_get_pkt_duration(fr) <= ts_end)));
+
+    av_packet_unref(&pkt);
+    av_freep(&byte_buffer);
+
+    return 0;
+}
+
+static long int read_seek_range(const char *string_with_number)
+{
+    long int number;
+    char *end_of_string = NULL;
+    number = strtol(string_with_number, &end_of_string, 10);
+    if ((strlen(string_with_number) != end_of_string - string_with_number)  || (number < 0)) {
+        av_log(NULL, AV_LOG_ERROR, "Incorrect input ranges of seeking\n");
+        return -1;
+    }
+    else if ((number == LONG_MAX) || (number == LONG_MIN)) {
+        if (errno == ERANGE) {
+            av_log(NULL, AV_LOG_ERROR, "Incorrect input ranges of seeking\n");
+            return -1;
+        }
+    }
+    return number;
+}
+
+static int seek_test(const char *input_filename, const char *start, const char *end)
+{
+    AVCodec *codec = NULL;
+    AVCodecContext *origin_ctx = NULL, *ctx= NULL;
+    AVFrame *fr = NULL;
+    AVFormatContext *fmt_ctx = NULL;
+    int video_stream;
+    int result;
+    int i, j;
+    long int start_ts, end_ts;
+
+    size_of_array = 0;
+    number_of_elements = 0;
+    crc_array = pts_array = NULL;
+
+    result = avformat_open_input(&fmt_ctx, input_filename, NULL, NULL);
+    if (result < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Can't open file\n");
+        return result;
+    }
+
+    result = avformat_find_stream_info(fmt_ctx, NULL);
+    if (result < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Can't get stream info\n");
+        return result;
+    }
+
+    start_ts = read_seek_range(start);
+    end_ts = read_seek_range(end);
+    if ((start_ts < 0) || (end_ts < 0))
+        return -1;
+
+    //TODO: add ability to work with audio format
+    video_stream = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
+    if (video_stream < 0) {
+      av_log(NULL, AV_LOG_ERROR, "Can't find video stream in input file\n");
+      return -1;
+    }
+
+    origin_ctx = fmt_ctx->streams[video_stream]->codec;
+
+    codec = avcodec_find_decoder(origin_ctx->codec_id);
+    if (!codec) {
+        av_log(NULL, AV_LOG_ERROR, "Can't find decoder\n");
+        return -1;
+    }
+
+    ctx = avcodec_alloc_context3(codec);
+    if (!ctx) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate decoder context\n");
+        return AVERROR(ENOMEM);
+    }
+
+    result = avcodec_copy_context(ctx, origin_ctx);
+    if (result) {
+        av_log(NULL, AV_LOG_ERROR, "Can't copy decoder context\n");
+        return result;
+    }
+
+    result = avcodec_open2(ctx, codec, NULL);
+    if (result < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Can't open decoder\n");
+        return result;
+    }
+
+    fr = av_frame_alloc();
+    if (!fr) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate frame\n");
+        return AVERROR(ENOMEM);
+    }
+
+    result = compute_crc_of_packets(fmt_ctx, video_stream, ctx, fr, i, j, 1);
+    if (result != 0)
+        return -1;
+
+    for (i = start_ts; i < end_ts; i += 100) {
+        for (j = i + 100; j < end_ts; j += 100)
+        result = compute_crc_of_packets(fmt_ctx, video_stream, ctx, fr, i, j, 0);
+        if (result != 0)
+            return -1;
+    }
+
+    av_freep(&crc_array);
+    av_freep(&pts_array);
+    av_frame_free(&fr);
+    avcodec_close(ctx);
+    avformat_close_input(&fmt_ctx);
+    avcodec_free_context(&ctx);
+    return 0;
+}
+
+int main(int argc, char **argv)
+{
+    if (argc < 4) {
+        av_log(NULL, AV_LOG_ERROR, "Incorrect input\n");
+        return 1;
+    }
+
+    av_register_all();
+
+    if (seek_test(argv[1], argv[2], argv[3]) != 0)
+        return 1;
+
+    return 0;
+}
diff --git a/tests/api/api-threadmessage-test.c b/tests/api/api-threadmessage-test.c
new file mode 100644
index 00000000..3e0ac5cf
--- /dev/null
+++ b/tests/api/api-threadmessage-test.c
@@ -0,0 +1,260 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * Thread message API test
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/frame.h"
+#include "libavutil/threadmessage.h"
+#include "libavutil/thread.h" // not public
+
+struct sender_data {
+    int id;
+    pthread_t tid;
+    int workload;
+    AVThreadMessageQueue *queue;
+};
+
+/* same as sender_data but shuffled for testing purpose */
+struct receiver_data {
+    pthread_t tid;
+    int workload;
+    int id;
+    AVThreadMessageQueue *queue;
+};
+
+struct message {
+    AVFrame *frame;
+    // we add some junk in the message to make sure the message size is >
+    // sizeof(void*)
+    int magic;
+};
+
+#define MAGIC 0xdeadc0de
+
+static void free_frame(void *arg)
+{
+    struct message *msg = arg;
+    av_assert0(msg->magic == MAGIC);
+    av_frame_free(&msg->frame);
+}
+
+static void *sender_thread(void *arg)
+{
+    int i, ret = 0;
+    struct sender_data *wd = arg;
+
+    av_log(NULL, AV_LOG_INFO, "sender #%d: workload=%d\n", wd->id, wd->workload);
+    for (i = 0; i < wd->workload; i++) {
+        if (rand() % wd->workload < wd->workload / 10) {
+            av_log(NULL, AV_LOG_INFO, "sender #%d: flushing the queue\n", wd->id);
+            av_thread_message_flush(wd->queue);
+        } else {
+            char *val;
+            AVDictionary *meta = NULL;
+            struct message msg = {
+                .magic = MAGIC,
+                .frame = av_frame_alloc(),
+            };
+
+            if (!msg.frame) {
+                ret = AVERROR(ENOMEM);
+                break;
+            }
+
+            /* we add some metadata to identify the frames */
+            val = av_asprintf("frame %d/%d from sender %d",
+                              i + 1, wd->workload, wd->id);
+            if (!val) {
+                av_frame_free(&msg.frame);
+                ret = AVERROR(ENOMEM);
+                break;
+            }
+            ret = av_dict_set(&meta, "sig", val, AV_DICT_DONT_STRDUP_VAL);
+            if (ret < 0) {
+                av_frame_free(&msg.frame);
+                break;
+            }
+            av_frame_set_metadata(msg.frame, meta);
+
+            /* allocate a real frame in order to simulate "real" work */
+            msg.frame->format = AV_PIX_FMT_RGBA;
+            msg.frame->width  = 320;
+            msg.frame->height = 240;
+            ret = av_frame_get_buffer(msg.frame, 32);
+            if (ret < 0) {
+                av_frame_free(&msg.frame);
+                break;
+            }
+
+            /* push the frame in the common queue */
+            av_log(NULL, AV_LOG_INFO, "sender #%d: sending my work (%d/%d frame:%p)\n",
+                   wd->id, i + 1, wd->workload, msg.frame);
+            ret = av_thread_message_queue_send(wd->queue, &msg, 0);
+            if (ret < 0) {
+                av_frame_free(&msg.frame);
+                break;
+            }
+        }
+    }
+    av_log(NULL, AV_LOG_INFO, "sender #%d: my work is done here (%s)\n",
+           wd->id, av_err2str(ret));
+    av_thread_message_queue_set_err_recv(wd->queue, ret < 0 ? ret : AVERROR_EOF);
+    return NULL;
+}
+
+static void *receiver_thread(void *arg)
+{
+    int i, ret = 0;
+    struct receiver_data *rd = arg;
+
+    for (i = 0; i < rd->workload; i++) {
+        if (rand() % rd->workload < rd->workload / 10) {
+            av_log(NULL, AV_LOG_INFO, "receiver #%d: flushing the queue\n", rd->id);
+            av_thread_message_flush(rd->queue);
+        } else {
+            struct message msg;
+            AVDictionary *meta;
+            AVDictionaryEntry *e;
+
+            ret = av_thread_message_queue_recv(rd->queue, &msg, 0);
+            if (ret < 0)
+                break;
+            av_assert0(msg.magic == MAGIC);
+            meta = av_frame_get_metadata(msg.frame);
+            e = av_dict_get(meta, "sig", NULL, 0);
+            av_log(NULL, AV_LOG_INFO, "got \"%s\" (%p)\n", e->value, msg.frame);
+            av_frame_free(&msg.frame);
+        }
+    }
+
+    av_log(NULL, AV_LOG_INFO, "consumed enough (%d), stop\n", i);
+    av_thread_message_queue_set_err_send(rd->queue, ret < 0 ? ret : AVERROR_EOF);
+
+    return NULL;
+}
+
+static int get_workload(int minv, int maxv)
+{
+    return maxv == minv ? maxv : rand() % (maxv - minv) + minv;
+}
+
+int main(int ac, char **av)
+{
+    int i, ret = 0;
+    int max_queue_size;
+    int nb_senders, sender_min_load, sender_max_load;
+    int nb_receivers, receiver_min_load, receiver_max_load;
+    struct sender_data *senders;
+    struct receiver_data *receivers;
+    AVThreadMessageQueue *queue = NULL;
+
+    if (ac != 8) {
+        av_log(NULL, AV_LOG_ERROR, "%s <max_queue_size> "
+               "<nb_senders> <sender_min_send> <sender_max_send> "
+               "<nb_receivers> <receiver_min_recv> <receiver_max_recv>\n", av[0]);
+        return 1;
+    }
+
+    max_queue_size    = atoi(av[1]);
+    nb_senders        = atoi(av[2]);
+    sender_min_load   = atoi(av[3]);
+    sender_max_load   = atoi(av[4]);
+    nb_receivers      = atoi(av[5]);
+    receiver_min_load = atoi(av[6]);
+    receiver_max_load = atoi(av[7]);
+
+    if (max_queue_size <= 0 ||
+        nb_senders <= 0 || sender_min_load <= 0 || sender_max_load <= 0 ||
+        nb_receivers <= 0 || receiver_min_load <= 0 || receiver_max_load <= 0) {
+        av_log(NULL, AV_LOG_ERROR, "negative values not allowed\n");
+        return 1;
+    }
+
+    av_log(NULL, AV_LOG_INFO, "qsize:%d / %d senders sending [%d-%d] / "
+           "%d receivers receiving [%d-%d]\n", max_queue_size,
+           nb_senders, sender_min_load, sender_max_load,
+           nb_receivers, receiver_min_load, receiver_max_load);
+
+    senders = av_mallocz_array(nb_senders, sizeof(*senders));
+    receivers = av_mallocz_array(nb_receivers, sizeof(*receivers));
+    if (!senders || !receivers) {
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    ret = av_thread_message_queue_alloc(&queue, max_queue_size, sizeof(struct message));
+    if (ret < 0)
+        goto end;
+
+    av_thread_message_queue_set_free_func(queue, free_frame);
+
+#define SPAWN_THREADS(type) do {                                                \
+    for (i = 0; i < nb_##type##s; i++) {                                        \
+        struct type##_data *td = &type##s[i];                                   \
+                                                                                \
+        td->id = i;                                                             \
+        td->queue = queue;                                                      \
+        td->workload = get_workload(type##_min_load, type##_max_load);          \
+                                                                                \
+        ret = pthread_create(&td->tid, NULL, type##_thread, td);                \
+        if (ret) {                                                              \
+            const int err = AVERROR(ret);                                       \
+            av_log(NULL, AV_LOG_ERROR, "Unable to start " AV_STRINGIFY(type)    \
+                   " thread: %s\n", av_err2str(err));                           \
+            goto end;                                                           \
+        }                                                                       \
+    }                                                                           \
+} while (0)
+
+#define WAIT_THREADS(type) do {                                                 \
+    for (i = 0; i < nb_##type##s; i++) {                                        \
+        struct type##_data *td = &type##s[i];                                   \
+                                                                                \
+        ret = pthread_join(td->tid, NULL);                                      \
+        if (ret) {                                                              \
+            const int err = AVERROR(ret);                                       \
+            av_log(NULL, AV_LOG_ERROR, "Unable to join " AV_STRINGIFY(type)     \
+                   " thread: %s\n", av_err2str(err));                           \
+            goto end;                                                           \
+        }                                                                       \
+    }                                                                           \
+} while (0)
+
+    SPAWN_THREADS(receiver);
+    SPAWN_THREADS(sender);
+
+    WAIT_THREADS(sender);
+    WAIT_THREADS(receiver);
+
+end:
+    av_thread_message_queue_free(&queue);
+    av_freep(&senders);
+    av_freep(&receivers);
+
+    if (ret < 0 && ret != AVERROR_EOF) {
+        av_log(NULL, AV_LOG_ERROR, "Error: %s\n", av_err2str(ret));
+        return 1;
+    }
+    return 0;
+}
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
new file mode 100644
index 00000000..bfd7c113
--- /dev/null
+++ b/tests/checkasm/Makefile
@@ -0,0 +1,44 @@
+# libavcodec tests
+AVCODECOBJS-$(CONFIG_ALAC_DECODER) += alacdsp.o
+AVCODECOBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
+AVCODECOBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o
+AVCODECOBJS-$(CONFIG_DCA_DECODER) += synth_filter.o
+AVCODECOBJS-$(CONFIG_FLACDSP)  += flacdsp.o
+AVCODECOBJS-$(CONFIG_FMTCONVERT)   += fmtconvert.o
+AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o
+AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o
+AVCODECOBJS-$(CONFIG_JPEG2000_DECODER) += jpeg2000dsp.o
+AVCODECOBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o
+AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o
+AVCODECOBJS-$(CONFIG_VP9_DECODER) += vp9dsp.o
+AVCODECOBJS-$(CONFIG_VIDEODSP) += videodsp.o
+
+CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes)
+
+
+-include $(SRC_PATH)/tests/checkasm/$(ARCH)/Makefile
+
+CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o
+CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%))
+
+-include $(CHECKASMOBJS:.o=.d)
+
+CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS)))
+$(CHECKASMOBJS): | $(CHECKASMDIRS)
+OBJDIRS += $(CHECKASMDIRS)
+
+tests/checkasm/checkasm.o: CFLAGS += -Umain
+
+CHECKASM := tests/checkasm/checkasm$(EXESUF)
+
+$(CHECKASM): $(EXEOBJS) $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS)
+	$(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_STATIC_DEP_LIBS) $(EXTRALIBS)
+
+checkasm: $(CHECKASM)
+
+testclean:: checkasmclean
+
+checkasmclean:
+	$(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%)
+
+.PHONY: checkasm
diff --git a/tests/checkasm/aarch64/Makefile b/tests/checkasm/aarch64/Makefile
new file mode 100644
index 00000000..02ba6caf
--- /dev/null
+++ b/tests/checkasm/aarch64/Makefile
@@ -0,0 +1 @@
+CHECKASMOBJS += aarch64/checkasm.o
diff --git a/tests/checkasm/aarch64/checkasm.S b/tests/checkasm/aarch64/checkasm.S
new file mode 100644
index 00000000..6317d806
--- /dev/null
+++ b/tests/checkasm/aarch64/checkasm.S
@@ -0,0 +1,148 @@
+/****************************************************************************
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Martin Storsjo
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#include "libavutil/aarch64/asm.S"
+
+const register_init
+    .quad 0x21f86d66c8ca00ce
+    .quad 0x75b6ba21077c48ad
+    .quad 0xed56bb2dcb3c7736
+    .quad 0x8bda43d3fd1a7e06
+    .quad 0xb64a9c9e5d318408
+    .quad 0xdf9a54b303f1d3a3
+    .quad 0x4a75479abd64e097
+    .quad 0x249214109d5d1c88
+    .quad 0x1a1b2550a612b48c
+    .quad 0x79445c159ce79064
+    .quad 0x2eed899d5a28ddcd
+    .quad 0x86b2536fcd8cf636
+    .quad 0xb0856806085e7943
+    .quad 0x3f2bf84fc0fcca4e
+    .quad 0xacbd382dcf5b8de2
+    .quad 0xd229e1f5b281303f
+    .quad 0x71aeaff20b095fd9
+    .quad 0xab63e2e11fa38ed9
+endconst
+
+
+const error_message
+    .asciz "failed to preserve register"
+endconst
+
+
+// max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK ((8*(MAX_ARGS - 7) + 15) & ~15)
+
+function checkasm_checked_call, export=1
+    stp         x29, x30, [sp, #-16]!
+    mov         x29, sp
+    stp         x19, x20, [sp, #-16]!
+    stp         x21, x22, [sp, #-16]!
+    stp         x23, x24, [sp, #-16]!
+    stp         x25, x26, [sp, #-16]!
+    stp         x27, x28, [sp, #-16]!
+    stp         d8,  d9,  [sp, #-16]!
+    stp         d10, d11, [sp, #-16]!
+    stp         d12, d13, [sp, #-16]!
+    stp         d14, d15, [sp, #-16]!
+
+    movrel      x9, register_init
+    ldp         d8,  d9,  [x9], #16
+    ldp         d10, d11, [x9], #16
+    ldp         d12, d13, [x9], #16
+    ldp         d14, d15, [x9], #16
+    ldp         x19, x20, [x9], #16
+    ldp         x21, x22, [x9], #16
+    ldp         x23, x24, [x9], #16
+    ldp         x25, x26, [x9], #16
+    ldp         x27, x28, [x9], #16
+
+    sub         sp,  sp,  #ARG_STACK
+.equ pos, 0
+// the first stacked arg is copied to x7
+.rept MAX_ARGS-7
+    ldr         x9, [x29, #16 + 8 + pos]
+    str         x9, [sp, #pos]
+.equ pos, pos + 8
+.endr
+
+    mov         x12, x0
+    mov         x0,  x1
+    mov         x1,  x2
+    mov         x2,  x3
+    mov         x3,  x4
+    mov         x4,  x5
+    mov         x5,  x6
+    mov         x6,  x7
+    ldr         x7,  [x29, #16]
+    blr         x12
+    add         sp,  sp,  #ARG_STACK
+    stp         x0,  x1,  [sp, #-16]!
+    movrel      x9, register_init
+    movi        v3.8h,  #0
+
+.macro check_reg_neon reg1, reg2
+    ldr         q0,  [x9], #16
+    uzp1        v1.2d,  v\reg1\().2d, v\reg2\().2d
+    eor         v0.16b, v0.16b, v1.16b
+    orr         v3.16b, v3.16b, v0.16b
+.endm
+    check_reg_neon  8,  9
+    check_reg_neon  10, 11
+    check_reg_neon  12, 13
+    check_reg_neon  14, 15
+    uqxtn       v3.8b,  v3.8h
+    umov        x3,  v3.d[0]
+
+.macro check_reg reg1, reg2
+    ldp         x0,  x1,  [x9], #16
+    eor         x0,  x0,  \reg1
+    eor         x1,  x1,  \reg2
+    orr         x3,  x3,  x0
+    orr         x3,  x3,  x1
+.endm
+    check_reg   x19, x20
+    check_reg   x21, x22
+    check_reg   x23, x24
+    check_reg   x25, x26
+    check_reg   x27, x28
+
+    cbz         x3,  0f
+
+    movrel      x0, error_message
+    bl          X(checkasm_fail_func)
+0:
+    ldp         x0,  x1,  [sp], #16
+    ldp         d14, d15, [sp], #16
+    ldp         d12, d13, [sp], #16
+    ldp         d10, d11, [sp], #16
+    ldp         d8,  d9,  [sp], #16
+    ldp         x27, x28, [sp], #16
+    ldp         x25, x26, [sp], #16
+    ldp         x23, x24, [sp], #16
+    ldp         x21, x22, [sp], #16
+    ldp         x19, x20, [sp], #16
+    ldp         x29, x30, [sp], #16
+    ret
+endfunc
diff --git a/tests/checkasm/alacdsp.c b/tests/checkasm/alacdsp.c
new file mode 100644
index 00000000..cbf03f82
--- /dev/null
+++ b/tests/checkasm/alacdsp.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2015 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/alacdsp.h"
+#include "libavcodec/mathops.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+
+#define BUF_SIZE 256
+#define MAX_CHANNELS 2
+
+#define randomize_buffers()                           \
+    do {                                              \
+        int i;                                        \
+        for (i = 0; i < BUF_SIZE*MAX_CHANNELS; i++) { \
+            int32_t r = sign_extend(rnd(), 24);       \
+            ref_buf[i] = r;                           \
+            new_buf[i] = r;                           \
+        }                                             \
+    } while (0)
+
+static void check_decorrelate_stereo(void)
+{
+    LOCAL_ALIGNED_16(int32_t, ref_buf, [BUF_SIZE*MAX_CHANNELS]);
+    LOCAL_ALIGNED_16(int32_t, new_buf, [BUF_SIZE*MAX_CHANNELS]);
+    int32_t *ref[2] = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1] };
+    int32_t *new[2] = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1] };
+    ALACDSPContext c;
+
+    ff_alacdsp_init(&c);
+    if (check_func(c.decorrelate_stereo, "alac_decorrelate_stereo")) {
+        int len    = (rnd() & 0xFF) + 1;
+        int shift  =  rnd() & 0x1F;
+        int weight =  rnd() & 0xFF;
+        declare_func(void, int32_t *buf[2], int len, int shift, int weight);
+
+        randomize_buffers();
+        call_ref(ref, len, shift, weight);
+        call_new(new, len, shift, weight);
+        if (memcmp(ref[0], new[0], len * sizeof(int32_t)) ||
+            memcmp(ref[1], new[1], len * sizeof(int32_t)))
+            fail();
+        bench_new(new, BUF_SIZE, shift, weight);
+    }
+
+    report("decorrelate_stereo");
+}
+
+#undef randomize_buffers
+#define randomize_buffers()                           \
+    do {                                              \
+        int i, j;                                     \
+        for (i = 0; i < BUF_SIZE; i++) {              \
+            for (j = 0; j < ch; j++) {                \
+                int32_t r = sign_extend(rnd(), 24);   \
+                ref[j][i] = r;                        \
+                new[j][i] = r;                        \
+                r = rnd() & 0xFF;                     \
+                ref_ebb[j][i] = r;                    \
+                new_ebb[j][i] = r;                    \
+            }                                         \
+        }                                             \
+    } while (0)
+
+static void check_append_extra_bits(void)
+{
+    LOCAL_ALIGNED_16(int32_t, ref_buf, [BUF_SIZE*MAX_CHANNELS*2]);
+    LOCAL_ALIGNED_16(int32_t, new_buf, [BUF_SIZE*MAX_CHANNELS*2]);
+    int32_t *ref[2]     = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1] };
+    int32_t *new[2]     = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1] };
+    int32_t *ref_ebb[2] = { &ref_buf[BUF_SIZE*2], &ref_buf[BUF_SIZE*3] };
+    int32_t *new_ebb[2] = { &new_buf[BUF_SIZE*2], &new_buf[BUF_SIZE*3] };
+    ALACDSPContext c;
+    static const char * const channels[2] = { "mono", "stereo" };
+    int ch;
+
+    ff_alacdsp_init(&c);
+    for (ch = 1; ch <= 2; ch++) {
+        if (check_func(c.append_extra_bits[ch-1], "alac_append_extra_bits_%s", channels[ch-1])) {
+            int len    = (rnd() & 0xFF) + 1;
+            declare_func(void, int32_t *buf[2], int32_t *ebb[2], int ebits, int ch, int len);
+
+            randomize_buffers();
+            call_ref(ref, ref_ebb, 8, ch, len);
+            call_new(new, new_ebb, 8, ch, len);
+            if (            memcmp(ref[0], new[0], len * sizeof(int32_t)) ||
+                (ch == 2 && memcmp(ref[1], new[1], len * sizeof(int32_t))))
+                fail();
+            bench_new(new, new_ebb, 8, ch, BUF_SIZE);
+        }
+    }
+
+    report("append_extra_bits");
+}
+
+void checkasm_check_alacdsp(void)
+{
+    check_decorrelate_stereo();
+    check_append_extra_bits();
+}
diff --git a/tests/checkasm/arm/Makefile b/tests/checkasm/arm/Makefile
new file mode 100644
index 00000000..55f2383d
--- /dev/null
+++ b/tests/checkasm/arm/Makefile
@@ -0,0 +1 @@
+CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o
diff --git a/tests/checkasm/arm/checkasm.S b/tests/checkasm/arm/checkasm.S
new file mode 100644
index 00000000..3e3806b2
--- /dev/null
+++ b/tests/checkasm/arm/checkasm.S
@@ -0,0 +1,142 @@
+/****************************************************************************
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Martin Storsjo
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#include "libavutil/arm/asm.S"
+
+const register_init
+    .quad 0x21f86d66c8ca00ce
+    .quad 0x75b6ba21077c48ad
+    .quad 0xed56bb2dcb3c7736
+    .quad 0x8bda43d3fd1a7e06
+    .quad 0xb64a9c9e5d318408
+    .quad 0xdf9a54b303f1d3a3
+    .quad 0x4a75479abd64e097
+    .quad 0x249214109d5d1c88
+endconst
+
+const error_message
+    .asciz "failed to preserve register"
+endconst
+
+@ max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK 4*(MAX_ARGS - 2)
+
+.macro clobbercheck variant
+.equ pushed, 4*9
+function checkasm_checked_call_\variant, export=1
+    push        {r4-r11, lr}
+.ifc \variant, vfp
+    vpush       {d8-d15}
+    fmrx        r4,  FPSCR
+    push        {r4}
+.equ pushed, pushed + 16*4 + 4
+.endif
+
+    movrel      r12, register_init
+.ifc \variant, vfp
+    vldm        r12, {d8-d15}
+.endif
+    ldm         r12, {r4-r11}
+
+    sub         sp,  sp,  #ARG_STACK
+.equ pos, 0
+.rept MAX_ARGS-2
+    ldr         r12, [sp, #ARG_STACK + pushed + 8 + pos]
+    str         r12, [sp, #pos]
+.equ pos, pos + 4
+.endr
+
+    mov         r12, r0
+    mov         r0,  r2
+    mov         r1,  r3
+    ldrd        r2,  r3,  [sp, #ARG_STACK + pushed]
+    blx         r12
+    add         sp,  sp,  #ARG_STACK
+
+    push        {r0, r1}
+    movrel      r12, register_init
+    mov         r3,  #0
+.ifc \variant, vfp
+.macro check_reg_vfp, dreg, inc=8
+    ldrd        r0,  r1,  [r12], #\inc
+    vmov        r2,  lr,  \dreg
+    eor         r0,  r0,  r2
+    eor         r1,  r1,  lr
+    orr         r3,  r3,  r0
+    orr         r3,  r3,  r1
+.endm
+
+.irp n, 8, 9, 10, 11, 12, 13, 14
+    check_reg_vfp d\n
+.endr
+    check_reg_vfp d15, -56
+.purgem check_reg_vfp
+
+    fmrx        r0,  FPSCR
+    ldr         r1,  [sp, #8]
+    eor         r0,  r0,  r1
+    @ Ignore changes in the topmost 5 bits
+    lsl         r0,  r0,  #5
+    orr         r3,  r3,  r0
+.endif
+
+.macro check_reg reg1, reg2=
+    ldrd        r0,  r1,  [r12], #8
+    eor         r0,  r0,  \reg1
+    orrs        r3,  r3,  r0
+.ifnb \reg2
+    eor         r1,  r1,  \reg2
+    orrs        r3,  r3,  r1
+.endif
+.endm
+    check_reg   r4,  r5
+    check_reg   r6,  r7
+@ r9 is a volatile register in the ios ABI
+#ifdef __APPLE__
+    check_reg   r8
+#else
+    check_reg   r8,  r9
+#endif
+    check_reg   r10, r11
+.purgem check_reg
+
+    beq         0f
+
+    movrel      r0, error_message
+    blx         X(checkasm_fail_func)
+0:
+    pop         {r0, r1}
+.ifc \variant, vfp
+    pop         {r2}
+    fmxr        FPSCR, r2
+    vpop        {d8-d15}
+.endif
+    pop         {r4-r11, pc}
+endfunc
+.endm
+
+#if HAVE_VFP || HAVE_NEON
+clobbercheck vfp
+#endif
+clobbercheck novfp
diff --git a/tests/checkasm/bswapdsp.c b/tests/checkasm/bswapdsp.c
new file mode 100644
index 00000000..5f755503
--- /dev/null
+++ b/tests/checkasm/bswapdsp.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2015 Henrik Gramner
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/bswapdsp.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_SIZE 512
+
+#define randomize_buffers()                 \
+    do {                                    \
+        int i;                              \
+        for (i = 0; i < BUF_SIZE; i += 4) { \
+            uint32_t r = rnd();             \
+            AV_WN32A(src0 + i, r);          \
+            AV_WN32A(src1 + i, r);          \
+            r = rnd();                      \
+            AV_WN32A(dst0 + i, r);          \
+            AV_WN32A(dst1 + i, r);          \
+        }                                   \
+    } while (0)
+
+#define check_bswap(type)                                                                  \
+    do {                                                                                   \
+        int w;                                                                             \
+        declare_func(void, type *dst, const type *src, int w);                             \
+                                                                                           \
+        for (w = 0; w < BUF_SIZE / sizeof(type); w++) {                                    \
+            int offset = (BUF_SIZE / sizeof(type) - w) & 15; /* Test various alignments */ \
+            randomize_buffers();                                                           \
+            call_ref((type *)dst0 + offset, (type *)src0 + offset, w);                     \
+            call_new((type *)dst1 + offset, (type *)src1 + offset, w);                     \
+            if (memcmp(src0, src1, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE))              \
+                fail();                                                                    \
+            bench_new((type *)dst1 + offset, (type *)src1 + offset, w);                    \
+        }                                                                                  \
+    } while (0)
+
+void checkasm_check_bswapdsp(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, src0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, src1, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]);
+    BswapDSPContext h;
+
+    ff_bswapdsp_init(&h);
+
+    if (check_func(h.bswap_buf, "bswap_buf"))
+        check_bswap(uint32_t);
+
+    if (check_func(h.bswap16_buf, "bswap16_buf"))
+        check_bswap(uint16_t);
+
+    report("bswap");
+}
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
new file mode 100644
index 00000000..1e73e34c
--- /dev/null
+++ b/tests/checkasm/checkasm.c
@@ -0,0 +1,661 @@
+/*
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Henrik Gramner
+ * Copyright (c) 2008 Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "checkasm.h"
+#include "libavutil/common.h"
+#include "libavutil/cpu.h"
+#include "libavutil/intfloat.h"
+#include "libavutil/random_seed.h"
+
+#if HAVE_IO_H
+#include <io.h>
+#endif
+
+#if HAVE_SETCONSOLETEXTATTRIBUTE
+#include <windows.h>
+#define COLOR_RED    FOREGROUND_RED
+#define COLOR_GREEN  FOREGROUND_GREEN
+#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
+#else
+#define COLOR_RED    1
+#define COLOR_GREEN  2
+#define COLOR_YELLOW 3
+#endif
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if !HAVE_ISATTY
+#define isatty(fd) 1
+#endif
+
+#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+#include "libavutil/arm/cpu.h"
+
+void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp;
+#endif
+
+/* List of tests to invoke */
+static const struct {
+    const char *name;
+    void (*func)(void);
+} tests[] = {
+#if CONFIG_AVCODEC
+    #if CONFIG_ALAC_DECODER
+        { "alacdsp", checkasm_check_alacdsp },
+    #endif
+    #if CONFIG_BLEND_FILTER
+        { "vf_blend", checkasm_check_blend },
+    #endif
+    #if CONFIG_BSWAPDSP
+        { "bswapdsp", checkasm_check_bswapdsp },
+    #endif
+    #if CONFIG_DCA_DECODER
+        { "synth_filter", checkasm_check_synth_filter },
+    #endif
+    #if CONFIG_FLACDSP
+        { "flacdsp", checkasm_check_flacdsp },
+    #endif
+    #if CONFIG_FMTCONVERT
+        { "fmtconvert", checkasm_check_fmtconvert },
+    #endif
+    #if CONFIG_H264PRED
+        { "h264pred", checkasm_check_h264pred },
+    #endif
+    #if CONFIG_H264QPEL
+        { "h264qpel", checkasm_check_h264qpel },
+    #endif
+    #if CONFIG_JPEG2000_DECODER
+        { "jpeg2000dsp", checkasm_check_jpeg2000dsp },
+    #endif
+    #if CONFIG_PIXBLOCKDSP
+        { "pixblockdsp", checkasm_check_pixblockdsp },
+    #endif
+    #if CONFIG_V210_ENCODER
+        { "v210enc", checkasm_check_v210enc },
+    #endif
+    #if CONFIG_VP9_DECODER
+        { "vp9dsp", checkasm_check_vp9dsp },
+    #endif
+    #if CONFIG_VIDEODSP
+        { "videodsp", checkasm_check_videodsp },
+    #endif
+#endif
+    { NULL }
+};
+
+/* List of cpu flags to check */
+static const struct {
+    const char *name;
+    const char *suffix;
+    int flag;
+} cpus[] = {
+#if   ARCH_AARCH64
+    { "ARMV8",    "armv8",    AV_CPU_FLAG_ARMV8 },
+    { "NEON",     "neon",     AV_CPU_FLAG_NEON },
+#elif ARCH_ARM
+    { "ARMV5TE",  "armv5te",  AV_CPU_FLAG_ARMV5TE },
+    { "ARMV6",    "armv6",    AV_CPU_FLAG_ARMV6 },
+    { "ARMV6T2",  "armv6t2",  AV_CPU_FLAG_ARMV6T2 },
+    { "VFP",      "vfp",      AV_CPU_FLAG_VFP },
+    { "VFP_VM",   "vfp_vm",   AV_CPU_FLAG_VFP_VM },
+    { "VFPV3",    "vfp3",     AV_CPU_FLAG_VFPV3 },
+    { "NEON",     "neon",     AV_CPU_FLAG_NEON },
+#elif ARCH_PPC
+    { "ALTIVEC",  "altivec",  AV_CPU_FLAG_ALTIVEC },
+    { "VSX",      "vsx",      AV_CPU_FLAG_VSX },
+    { "POWER8",   "power8",   AV_CPU_FLAG_POWER8 },
+#elif ARCH_X86
+    { "MMX",      "mmx",      AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV },
+    { "MMXEXT",   "mmxext",   AV_CPU_FLAG_MMXEXT },
+    { "3DNOW",    "3dnow",    AV_CPU_FLAG_3DNOW },
+    { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT },
+    { "SSE",      "sse",      AV_CPU_FLAG_SSE },
+    { "SSE2",     "sse2",     AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW },
+    { "SSE3",     "sse3",     AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW },
+    { "SSSE3",    "ssse3",    AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM },
+    { "SSE4.1",   "sse4",     AV_CPU_FLAG_SSE4 },
+    { "SSE4.2",   "sse42",    AV_CPU_FLAG_SSE42 },
+    { "AES-NI",   "aesni",    AV_CPU_FLAG_AESNI },
+    { "AVX",      "avx",      AV_CPU_FLAG_AVX },
+    { "XOP",      "xop",      AV_CPU_FLAG_XOP },
+    { "FMA3",     "fma3",     AV_CPU_FLAG_FMA3 },
+    { "FMA4",     "fma4",     AV_CPU_FLAG_FMA4 },
+    { "AVX2",     "avx2",     AV_CPU_FLAG_AVX2 },
+#endif
+    { NULL }
+};
+
+typedef struct CheckasmFuncVersion {
+    struct CheckasmFuncVersion *next;
+    void *func;
+    int ok;
+    int cpu;
+    int iterations;
+    uint64_t cycles;
+} CheckasmFuncVersion;
+
+/* Binary search tree node */
+typedef struct CheckasmFunc {
+    struct CheckasmFunc *child[2];
+    CheckasmFuncVersion versions;
+    uint8_t color; /* 0 = red, 1 = black */
+    char name[1];
+} CheckasmFunc;
+
+/* Internal state */
+static struct {
+    CheckasmFunc *funcs;
+    CheckasmFunc *current_func;
+    CheckasmFuncVersion *current_func_ver;
+    const char *current_test_name;
+    const char *bench_pattern;
+    int bench_pattern_len;
+    int num_checked;
+    int num_failed;
+    int nop_time;
+    int cpu_flag;
+    const char *cpu_flag_name;
+} state;
+
+/* PRNG state */
+AVLFG checkasm_lfg;
+
+/* float compare support code */
+static int is_negative(union av_intfloat32 u)
+{
+    return u.i >> 31;
+}
+
+int float_near_ulp(float a, float b, unsigned max_ulp)
+{
+    union av_intfloat32 x, y;
+
+    x.f = a;
+    y.f = b;
+
+    if (is_negative(x) != is_negative(y)) {
+        // handle -0.0 == +0.0
+        return a == b;
+    }
+
+    if (abs(x.i - y.i) <= max_ulp)
+        return 1;
+
+    return 0;
+}
+
+int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
+                         unsigned len)
+{
+    unsigned i;
+
+    for (i = 0; i < len; i++) {
+        if (!float_near_ulp(a[i], b[i], max_ulp))
+            return 0;
+    }
+    return 1;
+}
+
+int float_near_abs_eps(float a, float b, float eps)
+{
+    float abs_diff = fabsf(a - b);
+
+    return abs_diff < eps;
+}
+
+int float_near_abs_eps_array(const float *a, const float *b, float eps,
+                         unsigned len)
+{
+    unsigned i;
+
+    for (i = 0; i < len; i++) {
+        if (!float_near_abs_eps(a[i], b[i], eps))
+            return 0;
+    }
+    return 1;
+}
+
+int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp)
+{
+    return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
+}
+
+int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
+                         unsigned max_ulp, unsigned len)
+{
+    unsigned i;
+
+    for (i = 0; i < len; i++) {
+        if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
+            return 0;
+    }
+    return 1;
+}
+
+/* Print colored text to stderr if the terminal supports it */
+static void color_printf(int color, const char *fmt, ...)
+{
+    static int use_color = -1;
+    va_list arg;
+
+#if HAVE_SETCONSOLETEXTATTRIBUTE
+    static HANDLE con;
+    static WORD org_attributes;
+
+    if (use_color < 0) {
+        CONSOLE_SCREEN_BUFFER_INFO con_info;
+        con = GetStdHandle(STD_ERROR_HANDLE);
+        if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) {
+            org_attributes = con_info.wAttributes;
+            use_color = 1;
+        } else
+            use_color = 0;
+    }
+    if (use_color)
+        SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f));
+#else
+    if (use_color < 0) {
+        const char *term = getenv("TERM");
+        use_color = term && strcmp(term, "dumb") && isatty(2);
+    }
+    if (use_color)
+        fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
+#endif
+
+    va_start(arg, fmt);
+    vfprintf(stderr, fmt, arg);
+    va_end(arg);
+
+    if (use_color) {
+#if HAVE_SETCONSOLETEXTATTRIBUTE
+        SetConsoleTextAttribute(con, org_attributes);
+#else
+        fprintf(stderr, "\x1b[0m");
+#endif
+    }
+}
+
+/* Deallocate a tree */
+static void destroy_func_tree(CheckasmFunc *f)
+{
+    if (f) {
+        CheckasmFuncVersion *v = f->versions.next;
+        while (v) {
+            CheckasmFuncVersion *next = v->next;
+            free(v);
+            v = next;
+        }
+
+        destroy_func_tree(f->child[0]);
+        destroy_func_tree(f->child[1]);
+        free(f);
+    }
+}
+
+/* Allocate a zero-initialized block, clean up and exit on failure */
+static void *checkasm_malloc(size_t size)
+{
+    void *ptr = calloc(1, size);
+    if (!ptr) {
+        fprintf(stderr, "checkasm: malloc failed\n");
+        destroy_func_tree(state.funcs);
+        exit(1);
+    }
+    return ptr;
+}
+
+/* Get the suffix of the specified cpu flag */
+static const char *cpu_suffix(int cpu)
+{
+    int i = FF_ARRAY_ELEMS(cpus);
+
+    while (--i >= 0)
+        if (cpu & cpus[i].flag)
+            return cpus[i].suffix;
+
+    return "c";
+}
+
+#ifdef AV_READ_TIME
+static int cmp_nop(const void *a, const void *b)
+{
+    return *(const uint16_t*)a - *(const uint16_t*)b;
+}
+
+/* Measure the overhead of the timing code (in decicycles) */
+static int measure_nop_time(void)
+{
+    uint16_t nops[10000];
+    int i, nop_sum = 0;
+
+    for (i = 0; i < 10000; i++) {
+        uint64_t t = AV_READ_TIME();
+        nops[i] = AV_READ_TIME() - t;
+    }
+
+    qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
+    for (i = 2500; i < 7500; i++)
+        nop_sum += nops[i];
+
+    return nop_sum / 500;
+}
+
+/* Print benchmark results */
+static void print_benchs(CheckasmFunc *f)
+{
+    if (f) {
+        print_benchs(f->child[0]);
+
+        /* Only print functions with at least one assembly version */
+        if (f->versions.cpu || f->versions.next) {
+            CheckasmFuncVersion *v = &f->versions;
+            do {
+                if (v->iterations) {
+                    int decicycles = (10*v->cycles/v->iterations - state.nop_time) / 4;
+                    printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10);
+                }
+            } while ((v = v->next));
+        }
+
+        print_benchs(f->child[1]);
+    }
+}
+#endif
+
+/* ASCIIbetical sort except preserving natural order for numbers */
+static int cmp_func_names(const char *a, const char *b)
+{
+    const char *start = a;
+    int ascii_diff, digit_diff;
+
+    for (; !(ascii_diff = *(const unsigned char*)a - *(const unsigned char*)b) && *a; a++, b++);
+    for (; av_isdigit(*a) && av_isdigit(*b); a++, b++);
+
+    if (a > start && av_isdigit(a[-1]) && (digit_diff = av_isdigit(*a) - av_isdigit(*b)))
+        return digit_diff;
+
+    return ascii_diff;
+}
+
+/* Perform a tree rotation in the specified direction and return the new root */
+static CheckasmFunc *rotate_tree(CheckasmFunc *f, int dir)
+{
+    CheckasmFunc *r = f->child[dir^1];
+    f->child[dir^1] = r->child[dir];
+    r->child[dir] = f;
+    r->color = f->color;
+    f->color = 0;
+    return r;
+}
+
+#define is_red(f) ((f) && !(f)->color)
+
+/* Balance a left-leaning red-black tree at the specified node */
+static void balance_tree(CheckasmFunc **root)
+{
+    CheckasmFunc *f = *root;
+
+    if (is_red(f->child[0]) && is_red(f->child[1])) {
+        f->color ^= 1;
+        f->child[0]->color = f->child[1]->color = 1;
+    }
+
+    if (!is_red(f->child[0]) && is_red(f->child[1]))
+        *root = rotate_tree(f, 0); /* Rotate left */
+    else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
+        *root = rotate_tree(f, 1); /* Rotate right */
+}
+
+/* Get a node with the specified name, creating it if it doesn't exist */
+static CheckasmFunc *get_func(CheckasmFunc **root, const char *name)
+{
+    CheckasmFunc *f = *root;
+
+    if (f) {
+        /* Search the tree for a matching node */
+        int cmp = cmp_func_names(name, f->name);
+        if (cmp) {
+            f = get_func(&f->child[cmp > 0], name);
+
+            /* Rebalance the tree on the way up if a new node was inserted */
+            if (!f->versions.func)
+                balance_tree(root);
+        }
+    } else {
+        /* Allocate and insert a new node into the tree */
+        int name_length = strlen(name);
+        f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length);
+        memcpy(f->name, name, name_length + 1);
+    }
+
+    return f;
+}
+
+/* Perform tests and benchmarks for the specified cpu flag if supported by the host */
+static void check_cpu_flag(const char *name, int flag)
+{
+    int old_cpu_flag = state.cpu_flag;
+
+    flag |= old_cpu_flag;
+    av_force_cpu_flags(-1);
+    state.cpu_flag = flag & av_get_cpu_flags();
+    av_force_cpu_flags(state.cpu_flag);
+
+    if (!flag || state.cpu_flag != old_cpu_flag) {
+        int i;
+
+        state.cpu_flag_name = name;
+        for (i = 0; tests[i].func; i++) {
+            state.current_test_name = tests[i].name;
+            tests[i].func();
+        }
+    }
+}
+
+/* Print the name of the current CPU flag, but only do it once */
+static void print_cpu_name(void)
+{
+    if (state.cpu_flag_name) {
+        color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
+        state.cpu_flag_name = NULL;
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    int i, seed, ret = 0;
+
+#if ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+    if (have_vfp(av_get_cpu_flags()) || have_neon(av_get_cpu_flags()))
+        checkasm_checked_call = checkasm_checked_call_vfp;
+#endif
+
+    if (!tests[0].func || !cpus[0].flag) {
+        fprintf(stderr, "checkasm: no tests to perform\n");
+        return 0;
+    }
+
+    if (argc > 1 && !strncmp(argv[1], "--bench", 7)) {
+#ifndef AV_READ_TIME
+        fprintf(stderr, "checkasm: --bench is not supported on your system\n");
+        return 1;
+#endif
+        if (argv[1][7] == '=') {
+            state.bench_pattern = argv[1] + 8;
+            state.bench_pattern_len = strlen(state.bench_pattern);
+        } else
+            state.bench_pattern = "";
+
+        argc--;
+        argv++;
+    }
+
+    seed = (argc > 1) ? atoi(argv[1]) : av_get_random_seed();
+    fprintf(stderr, "checkasm: using random seed %u\n", seed);
+    av_lfg_init(&checkasm_lfg, seed);
+
+    check_cpu_flag(NULL, 0);
+    for (i = 0; cpus[i].flag; i++)
+        check_cpu_flag(cpus[i].name, cpus[i].flag);
+
+    if (state.num_failed) {
+        fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked);
+        ret = 1;
+    } else {
+        fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
+#ifdef AV_READ_TIME
+        if (state.bench_pattern) {
+            state.nop_time = measure_nop_time();
+            printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
+            print_benchs(state.funcs);
+        }
+#endif
+    }
+
+    destroy_func_tree(state.funcs);
+    return ret;
+}
+
+/* Decide whether or not the specified function needs to be tested and
+ * allocate/initialize data structures if needed. Returns a pointer to a
+ * reference function if the function should be tested, otherwise NULL */
+void *checkasm_check_func(void *func, const char *name, ...)
+{
+    char name_buf[256];
+    void *ref = func;
+    CheckasmFuncVersion *v;
+    int name_length;
+    va_list arg;
+
+    va_start(arg, name);
+    name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
+    va_end(arg);
+
+    if (!func || name_length <= 0 || name_length >= sizeof(name_buf))
+        return NULL;
+
+    state.current_func = get_func(&state.funcs, name_buf);
+    state.funcs->color = 1;
+    v = &state.current_func->versions;
+
+    if (v->func) {
+        CheckasmFuncVersion *prev;
+        do {
+            /* Only test functions that haven't already been tested */
+            if (v->func == func)
+                return NULL;
+
+            if (v->ok)
+                ref = v->func;
+
+            prev = v;
+        } while ((v = v->next));
+
+        v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
+    }
+
+    v->func = func;
+    v->ok = 1;
+    v->cpu = state.cpu_flag;
+    state.current_func_ver = v;
+
+    if (state.cpu_flag)
+        state.num_checked++;
+
+    return ref;
+}
+
+/* Decide whether or not the current function needs to be benchmarked */
+int checkasm_bench_func(void)
+{
+    return !state.num_failed && state.bench_pattern &&
+           !strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len);
+}
+
+/* Indicate that the current test has failed */
+void checkasm_fail_func(const char *msg, ...)
+{
+    if (state.current_func_ver->cpu && state.current_func_ver->ok) {
+        va_list arg;
+
+        print_cpu_name();
+        fprintf(stderr, "   %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu));
+        va_start(arg, msg);
+        vfprintf(stderr, msg, arg);
+        va_end(arg);
+        fprintf(stderr, ")\n");
+
+        state.current_func_ver->ok = 0;
+        state.num_failed++;
+    }
+}
+
+/* Update benchmark results of the current function */
+void checkasm_update_bench(int iterations, uint64_t cycles)
+{
+    state.current_func_ver->iterations += iterations;
+    state.current_func_ver->cycles += cycles;
+}
+
+/* Print the outcome of all tests performed since the last time this function was called */
+void checkasm_report(const char *name, ...)
+{
+    static int prev_checked, prev_failed, max_length;
+
+    if (state.num_checked > prev_checked) {
+        int pad_length = max_length + 4;
+        va_list arg;
+
+        print_cpu_name();
+        pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
+        va_start(arg, name);
+        pad_length -= vfprintf(stderr, name, arg);
+        va_end(arg);
+        fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '[');
+
+        if (state.num_failed == prev_failed)
+            color_printf(COLOR_GREEN, "OK");
+        else
+            color_printf(COLOR_RED, "FAILED");
+        fprintf(stderr, "]\n");
+
+        prev_checked = state.num_checked;
+        prev_failed  = state.num_failed;
+    } else if (!state.cpu_flag) {
+        /* Calculate the amount of padding required to make the output vertically aligned */
+        int length = strlen(state.current_test_name);
+        va_list arg;
+
+        va_start(arg, name);
+        length += vsnprintf(NULL, 0, name, arg);
+        va_end(arg);
+
+        if (length > max_length)
+            max_length = length;
+    }
+}
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
new file mode 100644
index 00000000..0c8bc2d1
--- /dev/null
+++ b/tests/checkasm/checkasm.h
@@ -0,0 +1,175 @@
+/*
+ * Assembly testing and benchmarking tool
+ * Copyright (c) 2015 Henrik Gramner
+ * Copyright (c) 2008 Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef TESTS_CHECKASM_CHECKASM_H
+#define TESTS_CHECKASM_CHECKASM_H
+
+#include <stdint.h>
+#include "config.h"
+#include "libavutil/avstring.h"
+#include "libavutil/cpu.h"
+#include "libavutil/lfg.h"
+#include "libavutil/timer.h"
+
+void checkasm_check_alacdsp(void);
+void checkasm_check_blend(void);
+void checkasm_check_bswapdsp(void);
+void checkasm_check_flacdsp(void);
+void checkasm_check_fmtconvert(void);
+void checkasm_check_h264pred(void);
+void checkasm_check_h264qpel(void);
+void checkasm_check_jpeg2000dsp(void);
+void checkasm_check_pixblockdsp(void);
+void checkasm_check_synth_filter(void);
+void checkasm_check_v210enc(void);
+void checkasm_check_vp9dsp(void);
+void checkasm_check_videodsp(void);
+
+void *checkasm_check_func(void *func, const char *name, ...) av_printf_format(2, 3);
+int checkasm_bench_func(void);
+void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2);
+void checkasm_update_bench(int iterations, uint64_t cycles);
+void checkasm_report(const char *name, ...) av_printf_format(1, 2);
+
+/* float compare utilities */
+int float_near_ulp(float a, float b, unsigned max_ulp);
+int float_near_abs_eps(float a, float b, float eps);
+int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp);
+int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
+                         unsigned len);
+int float_near_abs_eps_array(const float *a, const float *b, float eps,
+                             unsigned len);
+int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
+                                 unsigned max_ulp, unsigned len);
+
+extern AVLFG checkasm_lfg;
+#define rnd() av_lfg_get(&checkasm_lfg)
+
+static av_unused void *func_ref, *func_new;
+
+#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
+
+/* Decide whether or not the specified function needs to be tested */
+#define check_func(func, ...) (func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
+
+/* Declare the function prototype. The first argument is the return value, the remaining
+ * arguments are the function parameters. Naming parameters is optional. */
+#define declare_func(ret, ...) declare_new(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
+#define declare_func_emms(cpu_flags, ret, ...) declare_new_emms(cpu_flags, ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
+
+/* Indicate that the current test has failed */
+#define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__)
+
+/* Print the test outcome */
+#define report checkasm_report
+
+/* Call the reference function */
+#define call_ref(...) ((func_type *)func_ref)(__VA_ARGS__)
+
+#if ARCH_X86 && HAVE_YASM
+/* Verifies that clobbered callee-saved registers are properly saved and restored
+ * and that either no MMX registers are touched or emms is issued */
+void checkasm_checked_call(void *func, ...);
+/* Verifies that clobbered callee-saved registers are properly saved and restored
+ * and issues emms for asm functions which are not required to do so */
+void checkasm_checked_call_emms(void *func, ...);
+
+#if ARCH_X86_64
+/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
+ * This is done by clobbering the stack with junk around the stack pointer and calling the
+ * assembly function through checked_call() with added dummy arguments which forces all
+ * real arguments to be passed on the stack and not in registers. For 32-bit arguments the
+ * upper half of the 64-bit register locations on the stack will now contain junk which will
+ * cause misbehaving functions to either produce incorrect output or segfault. Note that
+ * even though this works extremely well in practice, it's technically not guaranteed
+ * and false negatives is theoretically possible, but there can never be any false positives.
+ */
+void checkasm_stack_clobber(uint64_t clobber, ...);
+#define declare_new(ret, ...) ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__)\
+                              = (void *)checkasm_checked_call;
+#define declare_new_emms(cpu_flags, ret, ...) \
+    ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) = \
+        ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms : \
+                                             (void *)checkasm_checked_call;
+#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
+#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\
+                                              CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\
+                      checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__))
+#elif ARCH_X86_32
+#define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call;
+#define declare_new_emms(cpu_flags, ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = \
+        ((cpu_flags) & av_get_cpu_flags()) ? (void *)checkasm_checked_call_emms :        \
+                                             (void *)checkasm_checked_call;
+#define call_new(...) checked_call(func_new, __VA_ARGS__)
+#endif
+#elif ARCH_ARM && HAVE_ARMV5TE_EXTERNAL
+/* Use a dummy argument, to offset the real parameters by 2, not only 1.
+ * This makes sure that potential 8-byte-alignment of parameters is kept the same
+ * even when the extra parameters have been removed. */
+void checkasm_checked_call_vfp(void *func, int dummy, ...);
+void checkasm_checked_call_novfp(void *func, int dummy, ...);
+extern void (*checkasm_checked_call)(void *func, int dummy, ...);
+#define declare_new(ret, ...) ret (*checked_call)(void *, int dummy, __VA_ARGS__) = (void *)checkasm_checked_call;
+#define call_new(...) checked_call(func_new, 0, __VA_ARGS__)
+#elif ARCH_AARCH64 && !defined(__APPLE__)
+void checkasm_checked_call(void *func, ...);
+#define declare_new(ret, ...) ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call;
+#define call_new(...) checked_call(func_new, __VA_ARGS__)
+#else
+#define declare_new(ret, ...)
+#define declare_new_emms(cpu_flags, ret, ...)
+/* Call the function */
+#define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
+#endif
+
+#ifndef declare_new_emms
+#define declare_new_emms(cpu_flags, ret, ...) declare_new(ret, __VA_ARGS__)
+#endif
+
+/* Benchmark the function */
+#ifdef AV_READ_TIME
+#define bench_new(...)\
+    do {\
+        if (checkasm_bench_func()) {\
+            func_type *tfunc = func_new;\
+            uint64_t tsum = 0;\
+            int ti, tcount = 0;\
+            for (ti = 0; ti < BENCH_RUNS; ti++) {\
+                uint64_t t = AV_READ_TIME();\
+                tfunc(__VA_ARGS__);\
+                tfunc(__VA_ARGS__);\
+                tfunc(__VA_ARGS__);\
+                tfunc(__VA_ARGS__);\
+                t = AV_READ_TIME() - t;\
+                if (t*tcount <= tsum*4 && ti > 0) {\
+                    tsum += t;\
+                    tcount++;\
+                }\
+            }\
+            checkasm_update_bench(tcount, tsum);\
+        }\
+    } while (0)
+#else
+#define bench_new(...) while(0)
+#endif
+
+#endif
diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c
new file mode 100644
index 00000000..dccb54d6
--- /dev/null
+++ b/tests/checkasm/flacdsp.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2015 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/flacdsp.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_SIZE 256
+#define MAX_CHANNELS 8
+
+#define randomize_buffers()                                 \
+    do {                                                    \
+        int i, j;                                           \
+        for (i = 0; i < BUF_SIZE; i += 4) {                 \
+            for (j = 0; j < channels; j++) {                \
+                uint32_t r = rnd() & (1 << (bits - 2)) - 1; \
+                AV_WN32A(ref_src[j] + i, r);                \
+                AV_WN32A(new_src[j] + i, r);                \
+            }                                               \
+        }                                                   \
+    } while (0)
+
+static void check_decorrelate(uint8_t **ref_dst, uint8_t **ref_src, uint8_t **new_dst, uint8_t **new_src,
+                              int channels, int bits) {
+    declare_func(void, uint8_t **out, int32_t **in, int channels, int len, int shift);
+
+    randomize_buffers();
+    call_ref(ref_dst, (int32_t **)ref_src, channels, BUF_SIZE / sizeof(int32_t), 8);
+    call_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8);
+    if (memcmp(*ref_dst, *new_dst, bits == 16 ? BUF_SIZE * (channels/2) : BUF_SIZE * channels) ||
+        memcmp(*ref_src, *new_src, BUF_SIZE * channels))
+        fail();
+    bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / sizeof(int32_t), 8);
+}
+
+void checkasm_check_flacdsp(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]);
+    LOCAL_ALIGNED_16(uint8_t, ref_buf, [BUF_SIZE*MAX_CHANNELS]);
+    LOCAL_ALIGNED_16(uint8_t, new_dst, [BUF_SIZE*MAX_CHANNELS]);
+    LOCAL_ALIGNED_16(uint8_t, new_buf, [BUF_SIZE*MAX_CHANNELS]);
+    uint8_t *ref_src[] = { &ref_buf[BUF_SIZE*0], &ref_buf[BUF_SIZE*1], &ref_buf[BUF_SIZE*2], &ref_buf[BUF_SIZE*3],
+                           &ref_buf[BUF_SIZE*4], &ref_buf[BUF_SIZE*5], &ref_buf[BUF_SIZE*6], &ref_buf[BUF_SIZE*7] };
+    uint8_t *new_src[] = { &new_buf[BUF_SIZE*0], &new_buf[BUF_SIZE*1], &new_buf[BUF_SIZE*2], &new_buf[BUF_SIZE*3],
+                           &new_buf[BUF_SIZE*4], &new_buf[BUF_SIZE*5], &new_buf[BUF_SIZE*6], &new_buf[BUF_SIZE*7] };
+    static const char * const names[3] = { "ls", "rs", "ms" };
+    static const struct {
+        enum AVSampleFormat fmt;
+        int bits;
+    } fmts[] = {
+        { AV_SAMPLE_FMT_S16, 16 },
+        { AV_SAMPLE_FMT_S32, 32 },
+    };
+    FLACDSPContext h;
+    int i, j;
+
+    for (i = 0; i < 2; i++) {
+        ff_flacdsp_init(&h, fmts[i].fmt, 2, 0);
+        for (j = 0; j < 3; j++)
+            if (check_func(h.decorrelate[j], "flac_decorrelate_%s_%d", names[j], fmts[i].bits))
+                check_decorrelate(&ref_dst, ref_src, &new_dst, new_src, 2, fmts[i].bits);
+        for (j = 2; j <= MAX_CHANNELS; j += 2) {
+            ff_flacdsp_init(&h, fmts[i].fmt, j, 0);
+            if (check_func(h.decorrelate[0], "flac_decorrelate_indep%d_%d", j, fmts[i].bits))
+                check_decorrelate(&ref_dst, ref_src, &new_dst, new_src, j, fmts[i].bits);
+        }
+    }
+
+    report("decorrelate");
+}
diff --git a/tests/checkasm/fmtconvert.c b/tests/checkasm/fmtconvert.c
new file mode 100644
index 00000000..50ad3ca2
--- /dev/null
+++ b/tests/checkasm/fmtconvert.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "libavutil/internal.h"
+#include "libavutil/common.h"
+#include "libavcodec/fmtconvert.h"
+
+#include "checkasm.h"
+
+#define BUF_SIZE 1024
+
+#define randomize_input(len)                                    \
+    do {                                                        \
+        int k;                                                  \
+        for (k = 0; k < len; k++) {                             \
+            in[k] = rnd() - INT32_MAX;                         \
+        }                                                       \
+        for (     ; k < BUF_SIZE; k++) {                        \
+            in[k] = INT32_MAX;                                  \
+        }                                                       \
+    } while (0)
+
+void checkasm_check_fmtconvert(void)
+{
+    FmtConvertContext c;
+    LOCAL_ALIGNED(32, float,   dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED(32, float,   dst1, [BUF_SIZE]);
+    LOCAL_ALIGNED(32, int32_t, in,   [BUF_SIZE]);
+    float scale_arr[128];
+    int length[] = {8, 16, 24, 56, 72, 128, 512, 520, 656, 768, 992};
+    int i, j;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++)
+        scale_arr[i] = (FF_ARRAY_ELEMS(scale_arr) - FF_ARRAY_ELEMS(scale_arr) / 2) / 13;
+
+    ff_fmt_convert_init(&c, NULL);
+
+    memset(dst0, 0, sizeof(*dst0) * BUF_SIZE);
+    memset(dst1, 0, sizeof(*dst1) * BUF_SIZE);
+
+    if (check_func(c.int32_to_float_fmul_scalar, "int32_to_float_fmul_scalar")) {
+        declare_func(void, float *, const int32_t *, float, int);
+
+        for (i = 0; i < FF_ARRAY_ELEMS(scale_arr); i++) {
+            for (j = 0; j < FF_ARRAY_ELEMS(length); j++) {
+
+                randomize_input(length[j]);
+
+                call_ref(dst0, in, scale_arr[i], length[j]);
+                call_new(dst1, in, scale_arr[i], length[j]);
+
+                if (!float_near_ulp_array(dst0, dst1, 3, length[j])) {
+                    fail();
+                    break;
+                }
+
+                bench_new(dst1, in, scale_arr[i], length[j]);
+            }
+        }
+    }
+    if (check_func(c.int32_to_float_fmul_array8, "int32_to_float_fmul_array8")) {
+        declare_func(void, FmtConvertContext *, float *, const int32_t *,
+                     const float *, int);
+
+        for (i = 0; i < 4; i++) {
+            for (j = 0; j < FF_ARRAY_ELEMS(length); j++) {
+
+                randomize_input(length[j]);
+
+                call_ref(&c, dst0, in, scale_arr, length[j]);
+                call_new(&c, dst1, in, scale_arr, length[j]);
+
+                if (!float_near_ulp_array(dst0, dst1, 3, length[j])) {
+                    fail();
+                    fprintf(stderr, "int32_to_float_fmul_array8: len: %d\n", length[j]);
+                    break;
+                }
+
+                bench_new(&c, dst1, in, scale_arr, length[j]);
+            }
+        }
+    }
+    report("fmtconvert");
+}
diff --git a/tests/checkasm/h264pred.c b/tests/checkasm/h264pred.c
new file mode 100644
index 00000000..40284050
--- /dev/null
+++ b/tests/checkasm/h264pred.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2015 Henrik Gramner
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/h264pred.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+static const int codec_ids[4] = { AV_CODEC_ID_H264, AV_CODEC_ID_VP8, AV_CODEC_ID_RV40, AV_CODEC_ID_SVQ3 };
+
+static const char * const pred4x4_modes[4][15] = {
+    { /* H264 */
+        [VERT_PRED           ] = "vertical",
+        [HOR_PRED            ] = "horizontal",
+        [DC_PRED             ] = "dc",
+        [DIAG_DOWN_LEFT_PRED ] = "down_left",
+        [DIAG_DOWN_RIGHT_PRED] = "down_right",
+        [VERT_RIGHT_PRED     ] = "vertical_right",
+        [HOR_DOWN_PRED       ] = "horizontal_right",
+        [VERT_LEFT_PRED      ] = "vertical_left",
+        [HOR_UP_PRED         ] = "horizontal_up",
+        [LEFT_DC_PRED        ] = "left_dc",
+        [TOP_DC_PRED         ] = "top_dc",
+        [DC_128_PRED         ] = "dc_128",
+    },
+    { /* VP8 */
+        [VERT_PRED     ] = "vertical_vp8",
+        [HOR_PRED      ] = "horizontal_vp8",
+        [VERT_LEFT_PRED] = "vertical_left_vp8",
+        [TM_VP8_PRED   ] = "tm_vp8",
+        [DC_127_PRED   ] = "dc_127_vp8",
+        [DC_129_PRED   ] = "dc_129_vp8",
+    },
+    { /* RV40 */
+        [DIAG_DOWN_LEFT_PRED            ] = "down_left_rv40",
+        [VERT_LEFT_PRED                 ] = "vertical_left_rv40",
+        [HOR_UP_PRED                    ] = "horizontal_up_rv40",
+        [DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = "down_left_nodown_rv40",
+        [HOR_UP_PRED_RV40_NODOWN        ] = "horizontal_up_nodown_rv40",
+        [VERT_LEFT_PRED_RV40_NODOWN     ] = "vertical_left_nodown_rv40",
+    },
+    { /* SVQ3 */
+        [DIAG_DOWN_LEFT_PRED] = "down_left_svq3",
+    },
+};
+
+static const char * const pred8x8_modes[4][11] = {
+    { /* H264 */
+        [DC_PRED8x8              ] = "dc",
+        [HOR_PRED8x8             ] = "horizontal",
+        [VERT_PRED8x8            ] = "vertical",
+        [PLANE_PRED8x8           ] = "plane",
+        [LEFT_DC_PRED8x8         ] = "left_dc",
+        [TOP_DC_PRED8x8          ] = "top_dc",
+        [DC_128_PRED8x8          ] = "dc_128",
+        [ALZHEIMER_DC_L0T_PRED8x8] = "mad_cow_dc_l0t",
+        [ALZHEIMER_DC_0LT_PRED8x8] = "mad_cow_dc_0lt",
+        [ALZHEIMER_DC_L00_PRED8x8] = "mad_cow_dc_l00",
+        [ALZHEIMER_DC_0L0_PRED8x8] = "mad_cow_dc_0l0",
+    },
+    { /* VP8 */
+        [PLANE_PRED8x8 ] = "tm_vp8",
+        [DC_127_PRED8x8] = "dc_127_vp8",
+        [DC_129_PRED8x8] = "dc_129_vp8",
+    },
+    { /* RV40 */
+        [DC_PRED8x8     ] = "dc_rv40",
+        [LEFT_DC_PRED8x8] = "left_dc_rv40",
+        [TOP_DC_PRED8x8 ] = "top_dc_rv40",
+    },
+    /* nothing for SVQ3 */
+};
+
+static const char * const pred16x16_modes[4][9] = {
+    { /* H264 */
+        [DC_PRED8x8     ] = "dc",
+        [HOR_PRED8x8    ] = "horizontal",
+        [VERT_PRED8x8   ] = "vertical",
+        [PLANE_PRED8x8  ] = "plane",
+        [LEFT_DC_PRED8x8] = "left_dc",
+        [TOP_DC_PRED8x8 ] = "top_dc",
+        [DC_128_PRED8x8 ] = "dc_128",
+    },
+    { /* VP8 */
+        [PLANE_PRED8x8 ] = "tm_vp8",
+        [DC_127_PRED8x8] = "dc_127_vp8",
+        [DC_129_PRED8x8] = "dc_129_vp8",
+    },
+    { /* RV40 */
+        [PLANE_PRED8x8] = "plane_rv40",
+    },
+    { /* SVQ3 */
+        [PLANE_PRED8x8] = "plane_svq3",
+    },
+};
+
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
+
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
+#define BUF_SIZE (3 * 16 * 17)
+
+#define check_pred_func(func, name, mode_name)                                    \
+    (mode_name && ((codec_ids[codec] == AV_CODEC_ID_H264) ?                       \
+                   check_func(func, "pred%s_%s_%d", name, mode_name, bit_depth) : \
+                   check_func(func, "pred%s_%s", name, mode_name)))
+
+#define randomize_buffers()                        \
+    do {                                           \
+        uint32_t mask = pixel_mask[bit_depth - 8]; \
+        int i;                                     \
+        for (i = 0; i < BUF_SIZE; i += 4) {        \
+            uint32_t r = rnd() & mask;             \
+            AV_WN32A(buf0 + i, r);                 \
+            AV_WN32A(buf1 + i, r);                 \
+        }                                          \
+    } while (0)
+
+#define src0 (buf0 + 4 * 16) /* Offset to allow room for top and left */
+#define src1 (buf1 + 4 * 16)
+
+static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                          int codec, int chroma_format, int bit_depth)
+{
+    if (chroma_format == 1) {
+        uint8_t *topright = buf0 + 2*16;
+        int pred_mode;
+        declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, const uint8_t *topright, ptrdiff_t stride);
+
+        for (pred_mode = 0; pred_mode < 15; pred_mode++) {
+            if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) {
+                randomize_buffers();
+                call_ref(src0, topright, 12*SIZEOF_PIXEL);
+                call_new(src1, topright, 12*SIZEOF_PIXEL);
+                if (memcmp(buf0, buf1, BUF_SIZE))
+                    fail();
+                bench_new(src1, topright, 12*SIZEOF_PIXEL);
+            }
+        }
+    }
+}
+
+static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                          int codec, int chroma_format, int bit_depth)
+{
+    int pred_mode;
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride);
+
+    for (pred_mode = 0; pred_mode < 11; pred_mode++) {
+        if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8",
+                            pred8x8_modes[codec][pred_mode])) {
+            randomize_buffers();
+            call_ref(src0, 24*SIZEOF_PIXEL);
+            call_new(src1, 24*SIZEOF_PIXEL);
+            if (memcmp(buf0, buf1, BUF_SIZE))
+                fail();
+            bench_new(src1, 24*SIZEOF_PIXEL);
+        }
+    }
+}
+
+static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                            int codec, int chroma_format, int bit_depth)
+{
+    if (chroma_format == 1) {
+        int pred_mode;
+        declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *src, ptrdiff_t stride);
+
+        for (pred_mode = 0; pred_mode < 9; pred_mode++) {
+            if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) {
+                randomize_buffers();
+                call_ref(src0, 48);
+                call_new(src1, 48);
+                if (memcmp(buf0, buf1, BUF_SIZE))
+                    fail();
+                bench_new(src1, 48);
+            }
+        }
+    }
+}
+
+static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1,
+                           int codec, int chroma_format, int bit_depth)
+{
+    if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) {
+        int pred_mode;
+        declare_func_emms(AV_CPU_FLAG_MMXEXT, void, uint8_t *src, int topleft, int topright, ptrdiff_t stride);
+
+        for (pred_mode = 0; pred_mode < 12; pred_mode++) {
+            if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) {
+                int neighbors;
+                for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) {
+                    int has_topleft  = neighbors & 0x8000;
+                    int has_topright = neighbors & 0x4000;
+
+                    if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft)
+                        continue; /* Those aren't allowed according to the spec */
+
+                    randomize_buffers();
+                    call_ref(src0, has_topleft, has_topright, 24*SIZEOF_PIXEL);
+                    call_new(src1, has_topleft, has_topright, 24*SIZEOF_PIXEL);
+                    if (memcmp(buf0, buf1, BUF_SIZE))
+                        fail();
+                    bench_new(src1, has_topleft, has_topright, 24*SIZEOF_PIXEL);
+                }
+            }
+        }
+    }
+}
+
+/* TODO: Add tests for H.264 lossless H/V prediction */
+
+void checkasm_check_h264pred(void)
+{
+    static const struct {
+        void (*func)(H264PredContext*, uint8_t*, uint8_t*, int, int, int);
+        const char *name;
+    } tests[] = {
+        { check_pred4x4,   "pred4x4"   },
+        { check_pred8x8,   "pred8x8"   },
+        { check_pred16x16, "pred16x16" },
+        { check_pred8x8l,  "pred8x8l"  },
+    };
+
+    LOCAL_ALIGNED_16(uint8_t, buf0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, buf1, [BUF_SIZE]);
+    H264PredContext h;
+    int test, codec, chroma_format, bit_depth;
+
+    for (test = 0; test < FF_ARRAY_ELEMS(tests); test++) {
+        for (codec = 0; codec < 4; codec++) {
+            int codec_id = codec_ids[codec];
+            for (bit_depth = 8; bit_depth <= (codec_id == AV_CODEC_ID_H264 ? 10 : 8); bit_depth++)
+                for (chroma_format = 1; chroma_format <= (codec_id == AV_CODEC_ID_H264 ? 2 : 1); chroma_format++) {
+                    ff_h264_pred_init(&h, codec_id, bit_depth, chroma_format);
+                    tests[test].func(&h, buf0, buf1, codec, chroma_format, bit_depth);
+                }
+        }
+        report("%s", tests[test].name);
+    }
+}
diff --git a/tests/checkasm/h264qpel.c b/tests/checkasm/h264qpel.c
new file mode 100644
index 00000000..ba069f12
--- /dev/null
+++ b/tests/checkasm/h264qpel.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2015 Henrik Gramner
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/h264qpel.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff };
+
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
+#define BUF_SIZE (2 * 16 * (16 + 3 + 4))
+
+#define randomize_buffers()                        \
+    do {                                           \
+        uint32_t mask = pixel_mask[bit_depth - 8]; \
+        int k;                                     \
+        for (k = 0; k < BUF_SIZE; k += 4) {        \
+            uint32_t r = rnd() & mask;             \
+            AV_WN32A(buf0 + k, r);                 \
+            AV_WN32A(buf1 + k, r);                 \
+            r = rnd();                             \
+            AV_WN32A(dst0 + k, r);                 \
+            AV_WN32A(dst1 + k, r);                 \
+        }                                          \
+    } while (0)
+
+#define src0 (buf0 + 3 * 2 * 16) /* h264qpel functions read data from negative src pointer offsets */
+#define src1 (buf1 + 3 * 2 * 16)
+
+void checkasm_check_h264qpel(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, buf0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, buf1, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE]);
+    H264QpelContext h;
+    int op, bit_depth, i, j;
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+
+    for (op = 0; op < 2; op++) {
+        qpel_mc_func (*tab)[16] = op ? h.avg_h264_qpel_pixels_tab : h.put_h264_qpel_pixels_tab;
+        const char *op_name = op ? "avg" : "put";
+
+        for (bit_depth = 8; bit_depth <= 10; bit_depth++) {
+            ff_h264qpel_init(&h, bit_depth);
+            for (i = 0; i < (op ? 3 : 4); i++) {
+                int size = 16 >> i;
+                for (j = 0; j < 16; j++)
+                    if (check_func(tab[i][j], "%s_h264_qpel_%d_mc%d%d_%d", op_name, size, j & 3, j >> 2, bit_depth)) {
+                        randomize_buffers();
+                        call_ref(dst0, src0, size * SIZEOF_PIXEL);
+                        call_new(dst1, src1, size * SIZEOF_PIXEL);
+                        if (memcmp(buf0, buf1, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE))
+                            fail();
+                        bench_new(dst1, src1, size * SIZEOF_PIXEL);
+                    }
+            }
+        }
+        report("%s", op_name);
+    }
+}
diff --git a/tests/checkasm/jpeg2000dsp.c b/tests/checkasm/jpeg2000dsp.c
new file mode 100644
index 00000000..48559df0
--- /dev/null
+++ b/tests/checkasm/jpeg2000dsp.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2015 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "checkasm.h"
+#include "libavcodec/jpeg2000dsp.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_SIZE 512
+
+#define randomize_buffers()                 \
+    do {                                    \
+        int i;                              \
+        for (i = 0; i < BUF_SIZE; i += 4) { \
+            uint32_t r = rnd();             \
+            AV_WN32A(ref0 + i, r);          \
+            AV_WN32A(new0 + i, r);          \
+            r = rnd();                      \
+            AV_WN32A(ref1 + i, r);          \
+            AV_WN32A(new1 + i, r);          \
+            r = rnd();                      \
+            AV_WN32A(ref2 + i, r);          \
+            AV_WN32A(new2 + i, r);          \
+        }                                   \
+    } while (0)
+
+static void check_mct(uint8_t *ref0, uint8_t *ref1, uint8_t *ref2,
+                      uint8_t *new0, uint8_t *new1, uint8_t *new2) {
+    declare_func(void, void *src0, void *src1, void *src2, int csize);
+
+    randomize_buffers();
+    call_ref(ref0, ref1, ref2, BUF_SIZE / sizeof(int32_t));
+    call_new(new0, new1, new2, BUF_SIZE / sizeof(int32_t));
+    if (memcmp(ref0, new0, BUF_SIZE) || memcmp(ref1, new1, BUF_SIZE) ||
+        memcmp(ref2, new2, BUF_SIZE))
+        fail();
+    bench_new(new0, new1, new2, BUF_SIZE / sizeof(int32_t));
+}
+
+void checkasm_check_jpeg2000dsp(void)
+{
+    LOCAL_ALIGNED_32(uint8_t, ref, [BUF_SIZE*3]);
+    LOCAL_ALIGNED_32(uint8_t, new, [BUF_SIZE*3]);
+    Jpeg2000DSPContext h;
+
+    ff_jpeg2000dsp_init(&h);
+
+    if (check_func(h.mct_decode[FF_DWT53], "jpeg2000_rct_int"))
+        check_mct(&ref[BUF_SIZE*0], &ref[BUF_SIZE*1], &ref[BUF_SIZE*2],
+                  &new[BUF_SIZE*0], &new[BUF_SIZE*1], &new[BUF_SIZE*2]);
+
+    report("mct_decode");
+}
diff --git a/tests/checkasm/pixblockdsp.c b/tests/checkasm/pixblockdsp.c
new file mode 100644
index 00000000..66bfdb7d
--- /dev/null
+++ b/tests/checkasm/pixblockdsp.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2015 Tiancheng "Timothy" Gu
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/pixblockdsp.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_UNITS 8
+#define BUF_SIZE (BUF_UNITS * 128 + BUF_UNITS)
+
+#define randomize_buffers()                 \
+    do {                                    \
+        int i;                              \
+        for (i = 0; i < BUF_SIZE; i += 4) { \
+            uint32_t r = rnd();             \
+            AV_WN32A(src10 + i, r);         \
+            AV_WN32A(src11 + i, r);         \
+            r = rnd();                      \
+            AV_WN32A(src20 + i, r);         \
+            AV_WN32A(src21 + i, r);         \
+            r = rnd();                      \
+            AV_WN32A(dst0_ + i, r);         \
+            AV_WN32A(dst1_ + i, r);         \
+        }                                   \
+    } while (0)
+
+#define check_get_pixels(type)                                                             \
+    do {                                                                                   \
+        int i;                                                                             \
+        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);    \
+                                                                                           \
+        for (i = 0; i < BUF_UNITS; i++) {                                              \
+            int src_offset = i * 64 * sizeof(type) + i; /* Test various alignments */      \
+            int dst_offset = i * 64; /* dst must be aligned */                             \
+            randomize_buffers();                                                           \
+            call_ref(dst0 + dst_offset, src10 + src_offset, 8);                            \
+            call_new(dst1 + dst_offset, src11 + src_offset, 8);                            \
+            if (memcmp(src10, src11, BUF_SIZE)|| memcmp(dst0, dst1, BUF_SIZE)) \
+                fail();                                                                    \
+            bench_new(dst1 + dst_offset, src11 + src_offset, 8);                           \
+        }                                                                                  \
+    } while (0)
+
+#define check_diff_pixels(type)                                                            \
+    do {                                                                                   \
+        int i;                                                                             \
+        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \
+                                                                                           \
+        for (i = 0; i < BUF_UNITS; i++) {                                              \
+            int src_offset = i * 64 * sizeof(type) + i; /* Test various alignments */      \
+            int dst_offset = i * 64; /* dst must be aligned */                             \
+            randomize_buffers();                                                           \
+            call_ref(dst0 + dst_offset, src10 + src_offset, src20 + src_offset, 8);        \
+            call_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8);        \
+            if (memcmp(src10, src11, BUF_SIZE) || memcmp(src20, src21, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE)) \
+                fail();                                                                    \
+            bench_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8);       \
+        }                                                                                  \
+    } while (0)
+
+void checkasm_check_pixblockdsp(void)
+{
+    LOCAL_ALIGNED_16(uint8_t, src10, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, src11, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, src20, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, src21, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst0_, [BUF_SIZE]);
+    LOCAL_ALIGNED_16(uint8_t, dst1_, [BUF_SIZE]);
+    uint16_t *dst0 = (uint16_t *)dst0_;
+    uint16_t *dst1 = (uint16_t *)dst1_;
+    PixblockDSPContext h;
+    AVCodecContext avctx = {
+        .bits_per_raw_sample = 8,
+    };
+
+    ff_pixblockdsp_init(&h, &avctx);
+
+    if (check_func(h.get_pixels, "get_pixels"))
+        check_get_pixels(uint8_t);
+
+    report("get_pixels");
+
+    if (check_func(h.diff_pixels, "diff_pixels"))
+        check_diff_pixels(uint8_t);
+
+    report("diff_pixels");
+}
diff --git a/tests/checkasm/synth_filter.c b/tests/checkasm/synth_filter.c
new file mode 100644
index 00000000..87e32ebc
--- /dev/null
+++ b/tests/checkasm/synth_filter.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2015 Janne Grunau
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <math.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "libavutil/internal.h"
+#include "libavutil/intfloat.h"
+#include "libavcodec/dcadata.h"
+#include "libavcodec/synth_filter.h"
+
+#include "checkasm.h"
+
+#define BUF_SIZE 32
+
+#define randomize_input()                                       \
+    do {                                                        \
+        int i;                                                  \
+        for (i = 0; i < BUF_SIZE; i++) {                        \
+            float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f;   \
+            in[i] = f;                                          \
+        }                                                       \
+    } while (0)
+
+void checkasm_check_synth_filter(void)
+{
+    FFTContext imdct;
+    SynthFilterContext synth;
+
+    ff_mdct_init(&imdct, 6, 1, 1.0);
+    ff_synth_filter_init(&synth);
+
+    if (check_func(synth.synth_filter_float, "synth_filter_float")) {
+        LOCAL_ALIGNED(32, float,   out0,   [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   out1,   [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   out_b,  [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   in,     [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   buf2_0, [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   buf2_1, [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   buf2_b, [BUF_SIZE]);
+        LOCAL_ALIGNED(32, float,   buf0,   [512]);
+        LOCAL_ALIGNED(32, float,   buf1,   [512]);
+        LOCAL_ALIGNED(32, float,   buf_b,  [512]);
+        float scale = 1.0f;
+        int i, offset0 = 0, offset1 = 0, offset_b = 0;
+
+        declare_func(void, FFTContext *, float *, int *, float[32], const float[512],
+                     float[32], float[32], float);
+
+        memset(buf2_0, 0, sizeof(*buf2_0) * BUF_SIZE);
+        memset(buf2_1, 0, sizeof(*buf2_1) * BUF_SIZE);
+        memset(buf2_b, 0, sizeof(*buf2_b) * BUF_SIZE);
+        memset(buf0, 0, sizeof(*buf2_0) * 512);
+        memset(buf1, 0, sizeof(*buf2_1) * 512);
+        memset(buf_b, 0, sizeof(*buf2_b) * 512);
+
+        /* more than 1 synth_buf_offset wrap-around */
+        for (i = 0; i < 20; i++) {
+            int j;
+            const float * window = (i & 1) ? ff_dca_fir_32bands_perfect : ff_dca_fir_32bands_nonperfect;
+
+            memset(out0, 0, sizeof(*out0) * BUF_SIZE);
+            memset(out1, 0, sizeof(*out1) * BUF_SIZE);
+            memset(out_b, 0, sizeof(*out_b) * BUF_SIZE);
+
+            randomize_input();
+
+            call_ref(&imdct, buf0, &offset0, buf2_0, window,
+                     out0, in, scale);
+            call_new(&imdct, buf1, &offset1, buf2_1, window,
+                     out1, in, scale);
+
+            if (offset0 != offset1) {
+                fail();
+                fprintf(stderr, "offsets do not match: %d, %d", offset0, offset1);
+                break;
+            }
+
+            for (j = 0; j < BUF_SIZE; j++) {
+                if (!float_near_abs_eps_ulp(out0[j],   out1[j],   7.0e-7, 16) ||
+                    !float_near_abs_eps_ulp(buf2_0[j], buf2_1[j], 7.0e-7, 16)) {
+                    union av_intfloat32 o0, o1, b0, b1;
+
+                    fail();
+                    o0.f = out0[j];   o1.f = out1[j];
+                    b0.f = buf2_0[j], b1.f = buf2_1[j];
+                    fprintf(stderr, "out:  %11g (0x%08x); %11g (0x%08x); abs diff %11g\n",
+                            o0.f, o0.i, o1.f, o1.i, fabsf(o0.f - o1.f));
+                    fprintf(stderr, "buf2: %11g (0x%08x); %11g (0x%08x); abs diff %11g\n",
+                            b0.f, b0.i, b1.f, b1.i, fabsf(b0.f - b1.f));
+                    break;
+                }
+            }
+
+            bench_new(&imdct, buf_b, &offset_b, buf2_b, window,
+                      out_b, in, scale);
+        }
+    }
+    ff_mdct_end(&imdct);
+
+    report("synth_filter");
+}
diff --git a/tests/checkasm/v210enc.c b/tests/checkasm/v210enc.c
new file mode 100644
index 00000000..ed367765
--- /dev/null
+++ b/tests/checkasm/v210enc.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2015 Henrik Gramner
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/v210enc.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define BUF_SIZE 512
+
+#define randomize_buffers(mask)                        \
+    do {                                               \
+        int i, size = sizeof(*y0);                     \
+        for (i = 0; i < BUF_SIZE; i += 4 / size) {     \
+            uint32_t r = rnd() & mask;                 \
+            AV_WN32A(y0 + i, r);                       \
+            AV_WN32A(y1 + i, r);                       \
+        }                                              \
+        for (i = 0; i < BUF_SIZE / 2; i += 4 / size) { \
+            uint32_t r = rnd() & mask;                 \
+            AV_WN32A(u0 + i, r);                       \
+            AV_WN32A(u1 + i, r);                       \
+            r = rnd() & mask;                          \
+            AV_WN32A(v0 + i, r);                       \
+            AV_WN32A(v1 + i, r);                       \
+        }                                              \
+        for (i = 0; i < width * 8 / 3; i += 4) {       \
+            uint32_t r = rnd();                        \
+            AV_WN32A(dst0 + i, r);                     \
+            AV_WN32A(dst1 + i, r);                     \
+        }                                              \
+    } while (0)
+
+#define check_pack_line(type, mask)                                                \
+    do {                                                                           \
+        LOCAL_ALIGNED_16(type, y0, [BUF_SIZE]);                                    \
+        LOCAL_ALIGNED_16(type, y1, [BUF_SIZE]);                                    \
+        LOCAL_ALIGNED_16(type, u0, [BUF_SIZE / 2]);                                \
+        LOCAL_ALIGNED_16(type, u1, [BUF_SIZE / 2]);                                \
+        LOCAL_ALIGNED_16(type, v0, [BUF_SIZE / 2]);                                \
+        LOCAL_ALIGNED_16(type, v1, [BUF_SIZE / 2]);                                \
+        LOCAL_ALIGNED_16(uint8_t, dst0, [BUF_SIZE * 8 / 3]);                       \
+        LOCAL_ALIGNED_16(uint8_t, dst1, [BUF_SIZE * 8 / 3]);                       \
+                                                                                   \
+        declare_func(void, const type * y, const type * u, const type * v,         \
+                     uint8_t * dst, ptrdiff_t width);                              \
+        ptrdiff_t width, step = 12 / sizeof(type);                                 \
+                                                                                   \
+        for (width = step; width < BUF_SIZE - 15; width += step) {                 \
+            int y_offset  = rnd() & 15;                                            \
+            int uv_offset = y_offset / 2;                                          \
+            randomize_buffers(mask);                                               \
+            call_ref(y0 + y_offset, u0 + uv_offset, v0 + uv_offset, dst0, width);  \
+            call_new(y1 + y_offset, u1 + uv_offset, v1 + uv_offset, dst1, width);  \
+            if (memcmp(y0, y1, BUF_SIZE) || memcmp(u0, u1, BUF_SIZE / 2) ||        \
+                memcmp(v0, v1, BUF_SIZE / 2) || memcmp(dst0, dst1, width * 8 / 3)) \
+                fail();                                                            \
+            bench_new(y1 + y_offset, u1 + uv_offset, v1 + uv_offset, dst1, width); \
+        }                                                                          \
+    } while (0)
+
+void checkasm_check_v210enc(void)
+{
+    V210EncContext h;
+
+    ff_v210enc_init(&h);
+
+    if (check_func(h.pack_line_8, "v210_planar_pack_8"))
+        check_pack_line(uint8_t, 0xffffffff);
+
+    if (check_func(h.pack_line_10, "v210_planar_pack_10"))
+        check_pack_line(uint16_t, 0x03ff03ff);
+
+    report("planar_pack");
+}
diff --git a/tests/checkasm/vf_blend.c b/tests/checkasm/vf_blend.c
new file mode 100644
index 00000000..cfc389bf
--- /dev/null
+++ b/tests/checkasm/vf_blend.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2016 Tiancheng "Timothy" Gu
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavfilter/blend.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define WIDTH 256
+#define HEIGHT 256
+#define BUF_UNITS 3
+#define SIZE_PER_UNIT (WIDTH * HEIGHT)
+#define BUF_SIZE (BUF_UNITS * SIZE_PER_UNIT)
+
+#define randomize_buffers()                   \
+    do {                                      \
+        int i, j;                             \
+        for (i = 0; i < HEIGHT; i++) {        \
+            for (j = 0; j < WIDTH; j++) {     \
+                top1[i * WIDTH + j] =         \
+                top2[i * WIDTH + j] = i;      \
+                bot1[i * WIDTH + j] =         \
+                bot2[i * WIDTH + j] = j;      \
+            }                                 \
+        }                                     \
+        for (i = 0; i < SIZE_PER_UNIT; i += 4) { \
+            uint32_t r = rnd();               \
+            AV_WN32A(dst1 + i, r);            \
+            AV_WN32A(dst2 + i, r);            \
+        }                                     \
+        for (; i < BUF_SIZE; i += 4) {        \
+            uint32_t r = rnd();               \
+            AV_WN32A(top1 + i, r);            \
+            AV_WN32A(top2 + i, r);            \
+            r = rnd();                        \
+            AV_WN32A(bot1 + i, r);            \
+            AV_WN32A(bot2 + i, r);            \
+            r = rnd();                        \
+            AV_WN32A(dst1 + i, r);            \
+            AV_WN32A(dst2 + i, r);            \
+        }                                     \
+    } while (0)
+
+#define check_blend_func()                                                                 \
+    do {                                                                                   \
+        int i;                                                                             \
+        declare_func(void, const uint8_t *top, ptrdiff_t top_linesize,                     \
+                     const uint8_t *bottom, ptrdiff_t bottom_linesize,                     \
+                     uint8_t *dst, ptrdiff_t dst_linesize,                                 \
+                     ptrdiff_t width, ptrdiff_t height,                                    \
+                     struct FilterParams *param, double *values);                          \
+                                                                                           \
+        for (i = 0; i < BUF_UNITS - 1; i++) {                                              \
+            int src_offset = i * SIZE_PER_UNIT + i; /* Test various alignments */          \
+            int dst_offset = i * SIZE_PER_UNIT; /* dst must be aligned */                  \
+            randomize_buffers();                                                           \
+            call_ref(top1 + src_offset, WIDTH, bot1 + src_offset, WIDTH,                   \
+                     dst1 + dst_offset, WIDTH, WIDTH, HEIGHT, &param, NULL);               \
+            call_new(top2 + src_offset, WIDTH, bot2 + src_offset, WIDTH,                   \
+                     dst2 + dst_offset, WIDTH, WIDTH, HEIGHT, &param, NULL);               \
+            if (memcmp(top1, top2, BUF_SIZE) || memcmp(bot1, bot2, BUF_SIZE) || memcmp(dst1, dst2, BUF_SIZE)) \
+                fail();                                                                    \
+            bench_new(top2 + src_offset, WIDTH, bot2 + src_offset, WIDTH,                  \
+                      dst2, WIDTH, WIDTH, HEIGHT, &param, NULL);                           \
+        }                                                                                  \
+    } while (0)
+
+void checkasm_check_blend(void)
+{
+    uint8_t *top1 = av_malloc(BUF_SIZE);
+    uint8_t *top2 = av_malloc(BUF_SIZE);
+    uint8_t *bot1 = av_malloc(BUF_SIZE);
+    uint8_t *bot2 = av_malloc(BUF_SIZE);
+    uint8_t *dst1 = av_malloc(BUF_SIZE);
+    uint8_t *dst2 = av_malloc(BUF_SIZE);
+    FilterParams param = {
+        .opacity = 1.0,
+    };
+
+#define check_and_report(name, val)               \
+    param.mode = val;                             \
+    ff_blend_init(&param, 0);                     \
+    if (check_func(param.blend, #name))           \
+        check_blend_func();
+
+    check_and_report(addition, BLEND_ADDITION)
+    check_and_report(addition128, BLEND_ADDITION128)
+    check_and_report(and, BLEND_AND)
+    check_and_report(average, BLEND_AVERAGE)
+    check_and_report(darken, BLEND_DARKEN)
+    check_and_report(difference128, BLEND_DIFFERENCE128)
+    check_and_report(hardmix, BLEND_HARDMIX)
+    check_and_report(lighten, BLEND_LIGHTEN)
+    check_and_report(multiply, BLEND_MULTIPLY)
+    check_and_report(or, BLEND_OR)
+    check_and_report(phoenix, BLEND_PHOENIX)
+    check_and_report(screen, BLEND_SCREEN)
+    check_and_report(subtract, BLEND_SUBTRACT)
+    check_and_report(xor, BLEND_XOR)
+    check_and_report(difference, BLEND_DIFFERENCE)
+    check_and_report(negation, BLEND_NEGATION)
+
+    report("8bit");
+
+    av_freep(&top1);
+    av_freep(&top2);
+    av_freep(&bot1);
+    av_freep(&bot2);
+    av_freep(&dst1);
+    av_freep(&dst2);
+}
diff --git a/tests/checkasm/videodsp.c b/tests/checkasm/videodsp.c
new file mode 100644
index 00000000..0a4424a2
--- /dev/null
+++ b/tests/checkasm/videodsp.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/videodsp.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#define randomize_buffers(w, h)                         \
+    do {                                                \
+        int i;                                          \
+        for (i = 0; i < w * h * sizeof(*src0); i += 4)  \
+            AV_WN32A(((uint8_t *) src0) + i, rnd());    \
+    } while (0)
+
+#define iter_1d(type, fix, fix_val, var, var_start, var_end)        \
+    for (fix = fix_val, var = var_start; var <= var_end; var++) {   \
+        call_ref((type *) dst0, (const type *) (src0 + y * pw + x), \
+                 bw * sizeof(type), pw * sizeof(type),              \
+                 bw, bh, x, y, pw, ph);                             \
+        call_new((type *) dst1, (const type *) (src1 + y * pw + x), \
+                 bw * sizeof(type), pw * sizeof(type),              \
+                 bw, bh, x, y, pw, ph);                             \
+        if (memcmp(dst0, dst1, bw * bh * sizeof(type)))             \
+            fail();                                                 \
+        bench_new((type *) dst1, (const type *) (src1 + y * pw + x),\
+                  bw * sizeof(type), pw * sizeof(type),             \
+                  bw, bh, x, y, pw, ph);                            \
+    }
+
+#define check_emu_edge_size(type, src_w, src_h, dst_w, dst_h)   \
+    do {                                                        \
+        LOCAL_ALIGNED_16(type, src0, [src_w * src_h]);          \
+        LOCAL_ALIGNED_16(type, src1, [src_w * src_h]);          \
+        int bw = dst_w, bh = dst_h;                             \
+        int pw = src_w, ph = src_h;                             \
+        int y, x;                                               \
+        randomize_buffers(src_w, src_h);                        \
+        memcpy(src1, src0, pw * ph * sizeof(type));             \
+        iter_1d(type, y, 0 - src_h, x, 0 - src_w, src_w - 0);   \
+        iter_1d(type, x, src_w - 0, y, 0 - src_h, src_h - 0);   \
+        iter_1d(type, y, src_h - 0, x, 0 - src_w, src_w - 0);   \
+        iter_1d(type, x, 0 - src_w, y, 0 - src_h, src_h - 0);   \
+    } while (0)
+
+#define check_emu_edge(type)                                    \
+    do {                                                        \
+        LOCAL_ALIGNED_16(type, dst0, [64 * 64]);                \
+        LOCAL_ALIGNED_16(type, dst1, [64 * 64]);                \
+        declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, \
+                          void, type *dst, const type *src,     \
+                          ptrdiff_t dst_linesize,               \
+                          ptrdiff_t src_linesize,               \
+                          int block_w, int block_h,             \
+                          int src_x, int src_y,                 \
+                          int src_w, int src_h);                \
+        check_emu_edge_size(type, 16,  1, 64, 64);              \
+        check_emu_edge_size(type, 16, 16, 64, 64);              \
+        check_emu_edge_size(type, 64, 64, 64, 64);              \
+    } while (0)
+
+void checkasm_check_videodsp(void)
+{
+    VideoDSPContext vdsp;
+
+    ff_videodsp_init(&vdsp, 8);
+    if (check_func(vdsp.emulated_edge_mc, "emulated_edge_mc_8"))
+        check_emu_edge(uint8_t);
+
+    report("emulated_edge_mc");
+}
diff --git a/tests/checkasm/vp9dsp.c b/tests/checkasm/vp9dsp.c
new file mode 100644
index 00000000..931f7882
--- /dev/null
+++ b/tests/checkasm/vp9dsp.c
@@ -0,0 +1,623 @@
+/*
+ * Copyright (c) 2015 Ronald S. Bultje <rsbultje@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <math.h>
+#include <string.h>
+#include "checkasm.h"
+#include "libavcodec/vp9data.h"
+#include "libavcodec/vp9dsp.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
+
+static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
+#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
+
+#define randomize_buffers()                                        \
+    do {                                                           \
+        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1];          \
+        int k;                                                     \
+        for (k = -4;  k < SIZEOF_PIXEL * FFMAX(8, size); k += 4) { \
+            uint32_t r = rnd() & mask;                             \
+            AV_WN32A(a + k, r);                                    \
+        }                                                          \
+        for (k = 0; k < size * SIZEOF_PIXEL; k += 4) {             \
+            uint32_t r = rnd() & mask;                             \
+            AV_WN32A(l + k, r);                                    \
+        }                                                          \
+    } while (0)
+
+static void check_ipred(void)
+{
+    LOCAL_ALIGNED_32(uint8_t, a_buf, [64 * 2]);
+    uint8_t *a = &a_buf[32 * 2];
+    LOCAL_ALIGNED_32(uint8_t, l, [32 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
+    VP9DSPContext dsp;
+    int tx, mode, bit_depth;
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride,
+                      const uint8_t *left, const uint8_t *top);
+    static const char *const mode_names[N_INTRA_PRED_MODES] = {
+        [VERT_PRED] = "vert",
+        [HOR_PRED] = "hor",
+        [DC_PRED] = "dc",
+        [DIAG_DOWN_LEFT_PRED] = "diag_downleft",
+        [DIAG_DOWN_RIGHT_PRED] = "diag_downright",
+        [VERT_RIGHT_PRED] = "vert_right",
+        [HOR_DOWN_PRED] = "hor_down",
+        [VERT_LEFT_PRED] = "vert_left",
+        [HOR_UP_PRED] = "hor_up",
+        [TM_VP8_PRED] = "tm",
+        [LEFT_DC_PRED] = "dc_left",
+        [TOP_DC_PRED] = "dc_top",
+        [DC_128_PRED] = "dc_128",
+        [DC_127_PRED] = "dc_127",
+        [DC_129_PRED] = "dc_129",
+    };
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        ff_vp9dsp_init(&dsp, bit_depth, 0);
+        for (tx = 0; tx < 4; tx++) {
+            int size = 4 << tx;
+
+            for (mode = 0; mode < N_INTRA_PRED_MODES; mode++) {
+                if (check_func(dsp.intra_pred[tx][mode], "vp9_%s_%dx%d_%dbpp",
+                               mode_names[mode], size, size, bit_depth)) {
+                    randomize_buffers();
+                    call_ref(dst0, size * SIZEOF_PIXEL, l, a);
+                    call_new(dst1, size * SIZEOF_PIXEL, l, a);
+                    if (memcmp(dst0, dst1, size * size * SIZEOF_PIXEL))
+                        fail();
+                    bench_new(dst1, size * SIZEOF_PIXEL,l, a);
+                }
+            }
+        }
+    }
+    report("ipred");
+}
+
+#undef randomize_buffers
+
+#define randomize_buffers() \
+    do { \
+        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1];                  \
+        for (y = 0; y < sz; y++) {                                         \
+            for (x = 0; x < sz * SIZEOF_PIXEL; x += 4) {                   \
+                uint32_t r = rnd() & mask;                                 \
+                AV_WN32A(dst + y * sz * SIZEOF_PIXEL + x, r);              \
+                AV_WN32A(src + y * sz * SIZEOF_PIXEL + x, rnd() & mask);   \
+            }                                                              \
+            for (x = 0; x < sz; x++) {                                     \
+                if (bit_depth == 8) {                                      \
+                    coef[y * sz + x] = src[y * sz + x] - dst[y * sz + x];  \
+                } else {                                                   \
+                    ((int32_t *) coef)[y * sz + x] =                       \
+                        ((uint16_t *) src)[y * sz + x] -                   \
+                        ((uint16_t *) dst)[y * sz + x];                    \
+                }                                                          \
+            }                                                              \
+        }                                                                  \
+    } while(0)
+
+// wht function copied from libvpx
+static void fwht_1d(double *out, const double *in, int sz)
+{
+    double t0 = in[0] + in[1];
+    double t3 = in[3] - in[2];
+    double t4 = trunc((t0 - t3) * 0.5);
+    double t1 = t4 - in[1];
+    double t2 = t4 - in[2];
+
+    out[0] = t0 - t2;
+    out[1] = t2;
+    out[2] = t3 + t1;
+    out[3] = t1;
+}
+
+// standard DCT-II
+static void fdct_1d(double *out, const double *in, int sz)
+{
+    int k, n;
+
+    for (k = 0; k < sz; k++) {
+        out[k] = 0.0;
+        for (n = 0; n < sz; n++)
+            out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (sz * 2.0));
+    }
+    out[0] *= M_SQRT1_2;
+}
+
+// see "Towards jointly optimal spatial prediction and adaptive transform in
+// video/image coding", by J. Han, A. Saxena, and K. Rose
+// IEEE Proc. ICASSP, pp. 726-729, Mar. 2010.
+static void fadst4_1d(double *out, const double *in, int sz)
+{
+    int k, n;
+
+    for (k = 0; k < sz; k++) {
+        out[k] = 0.0;
+        for (n = 0; n < sz; n++)
+            out[k] += in[n] * sin(M_PI * (n + 1) * (2 * k + 1) / (sz * 2.0 + 1.0));
+    }
+}
+
+// see "A Butterfly Structured Design of The Hybrid Transform Coding Scheme",
+// by Jingning Han, Yaowu Xu, and Debargha Mukherjee
+// http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41418.pdf
+static void fadst_1d(double *out, const double *in, int sz)
+{
+    int k, n;
+
+    for (k = 0; k < sz; k++) {
+        out[k] = 0.0;
+        for (n = 0; n < sz; n++)
+            out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (sz * 4.0));
+    }
+}
+
+typedef void (*ftx1d_fn)(double *out, const double *in, int sz);
+static void ftx_2d(double *out, const double *in, enum TxfmMode tx,
+                   enum TxfmType txtp, int sz)
+{
+    static const double scaling_factors[5][4] = {
+        { 4.0, 16.0 * M_SQRT1_2 / 3.0, 16.0 * M_SQRT1_2 / 3.0, 32.0 / 9.0 },
+        { 2.0, 2.0, 2.0, 2.0 },
+        { 1.0, 1.0, 1.0, 1.0 },
+        { 0.25 },
+        { 4.0 }
+    };
+    static const ftx1d_fn ftx1d_tbl[5][4][2] = {
+        {
+            { fdct_1d, fdct_1d },
+            { fadst4_1d, fdct_1d },
+            { fdct_1d, fadst4_1d },
+            { fadst4_1d, fadst4_1d },
+        }, {
+            { fdct_1d, fdct_1d },
+            { fadst_1d, fdct_1d },
+            { fdct_1d, fadst_1d },
+            { fadst_1d, fadst_1d },
+        }, {
+            { fdct_1d, fdct_1d },
+            { fadst_1d, fdct_1d },
+            { fdct_1d, fadst_1d },
+            { fadst_1d, fadst_1d },
+        }, {
+            { fdct_1d, fdct_1d },
+        }, {
+            { fwht_1d, fwht_1d },
+        },
+    };
+    double temp[1024];
+    double scaling_factor = scaling_factors[tx][txtp];
+    int i, j;
+
+    // cols
+    for (i = 0; i < sz; ++i) {
+        double temp_out[32];
+
+        ftx1d_tbl[tx][txtp][0](temp_out, &in[i * sz], sz);
+        // scale and transpose
+        for (j = 0; j < sz; ++j)
+            temp[j * sz + i] = temp_out[j] * scaling_factor;
+    }
+
+    // rows
+    for (i = 0; i < sz; i++)
+        ftx1d_tbl[tx][txtp][1](&out[i * sz], &temp[i * sz], sz);
+}
+
+static void ftx(int16_t *buf, enum TxfmMode tx,
+                enum TxfmType txtp, int sz, int bit_depth)
+{
+    double ind[1024], outd[1024];
+    int n;
+
+    emms_c();
+    for (n = 0; n < sz * sz; n++) {
+        if (bit_depth == 8)
+            ind[n] = buf[n];
+        else
+            ind[n] = ((int32_t *) buf)[n];
+    }
+    ftx_2d(outd, ind, tx, txtp, sz);
+    for (n = 0; n < sz * sz; n++) {
+        if (bit_depth == 8)
+            buf[n] = lrint(outd[n]);
+        else
+            ((int32_t *) buf)[n] = lrint(outd[n]);
+    }
+}
+
+static int copy_subcoefs(int16_t *out, const int16_t *in, enum TxfmMode tx,
+                         enum TxfmType txtp, int sz, int sub, int bit_depth)
+{
+    // copy the topleft coefficients such that the return value (being the
+    // coefficient scantable index for the eob token) guarantees that only
+    // the topleft $sub out of $sz (where $sz >= $sub) coefficients in both
+    // dimensions are non-zero. This leads to braching to specific optimized
+    // simd versions (e.g. dc-only) so that we get full asm coverage in this
+    // test
+
+    int n;
+    const int16_t *scan = vp9_scans[tx][txtp];
+    int eob;
+
+    for (n = 0; n < sz * sz; n++) {
+        int rc = scan[n], rcx = rc % sz, rcy = rc / sz;
+
+        // find eob for this sub-idct
+        if (rcx >= sub || rcy >= sub)
+            break;
+
+        // copy coef
+        if (bit_depth == 8) {
+            out[rc] = in[rc];
+        } else {
+            AV_COPY32(&out[rc * 2], &in[rc * 2]);
+        }
+    }
+
+    eob = n;
+
+    for (; n < sz * sz; n++) {
+        int rc = scan[n];
+
+        // zero
+        if (bit_depth == 8) {
+            out[rc] = 0;
+        } else {
+            AV_ZERO32(&out[rc * 2]);
+        }
+    }
+
+    return eob;
+}
+
+static int iszero(const int16_t *c, int sz)
+{
+    int n;
+
+    for (n = 0; n < sz / sizeof(int16_t); n += 2)
+        if (AV_RN32A(&c[n]))
+            return 0;
+
+    return 1;
+}
+
+#define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))
+
+static void check_itxfm(void)
+{
+    LOCAL_ALIGNED_32(uint8_t, src, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(int16_t, coef, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(int16_t, subcoef0, [32 * 32 * 2]);
+    LOCAL_ALIGNED_32(int16_t, subcoef1, [32 * 32 * 2]);
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+    VP9DSPContext dsp;
+    int y, x, tx, txtp, bit_depth, sub;
+    static const char *const txtp_types[N_TXFM_TYPES] = {
+        [DCT_DCT] = "dct_dct", [DCT_ADST] = "adst_dct",
+        [ADST_DCT] = "dct_adst", [ADST_ADST] = "adst_adst"
+    };
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        ff_vp9dsp_init(&dsp, bit_depth, 0);
+
+        for (tx = TX_4X4; tx <= N_TXFM_SIZES /* 4 = lossless */; tx++) {
+            int sz = 4 << (tx & 3);
+            int n_txtps = tx < TX_32X32 ? N_TXFM_TYPES : 1;
+
+            for (txtp = 0; txtp < n_txtps; txtp++) {
+                if (check_func(dsp.itxfm_add[tx][txtp], "vp9_inv_%s_%dx%d_add_%d",
+                               tx == 4 ? "wht_wht" : txtp_types[txtp], sz, sz,
+                               bit_depth)) {
+                    randomize_buffers();
+                    ftx(coef, tx, txtp, sz, bit_depth);
+
+                    for (sub = (txtp == 0) ? 1 : 2; sub <= sz; sub <<= 1) {
+                        int eob;
+
+                        if (sub < sz) {
+                            eob = copy_subcoefs(subcoef0, coef, tx, txtp,
+                                                sz, sub, bit_depth);
+                        } else {
+                            eob = sz * sz;
+                            memcpy(subcoef0, coef, sz * sz * SIZEOF_COEF);
+                        }
+
+                        memcpy(dst0, dst, sz * sz * SIZEOF_PIXEL);
+                        memcpy(dst1, dst, sz * sz * SIZEOF_PIXEL);
+                        memcpy(subcoef1, subcoef0, sz * sz * SIZEOF_COEF);
+                        call_ref(dst0, sz * SIZEOF_PIXEL, subcoef0, eob);
+                        call_new(dst1, sz * SIZEOF_PIXEL, subcoef1, eob);
+                        if (memcmp(dst0, dst1, sz * sz * SIZEOF_PIXEL) ||
+                            !iszero(subcoef0, sz * sz * SIZEOF_COEF) ||
+                            !iszero(subcoef1, sz * sz * SIZEOF_COEF))
+                            fail();
+                    }
+                    bench_new(dst, sz * SIZEOF_PIXEL, coef, sz * sz);
+                }
+            }
+        }
+    }
+    report("itxfm");
+}
+
+#undef randomize_buffers
+
+#define setpx(a,b,c) \
+    do { \
+        if (SIZEOF_PIXEL == 1) { \
+            buf0[(a) + (b) * jstride] = av_clip_uint8(c); \
+        } else { \
+            ((uint16_t *)buf0)[(a) + (b) * jstride] = av_clip_uintp2(c, bit_depth); \
+        } \
+    } while (0)
+
+// c can be an assignment and must not be put under ()
+#define setdx(a,b,c,d) setpx(a,b,c-(d)+(rnd()%((d)*2+1)))
+#define setsx(a,b,c,d) setdx(a,b,c,(d) << (bit_depth - 8))
+static void randomize_loopfilter_buffers(int bidx, int lineoff, int str,
+                                         int bit_depth, int dir, const int *E,
+                                         const int *F, const int *H, const int *I,
+                                         uint8_t *buf0, uint8_t *buf1)
+{
+    uint32_t mask = (1 << bit_depth) - 1;
+    int off = dir ? lineoff : lineoff * 16;
+    int istride = dir ? 1 : 16;
+    int jstride = dir ? str : 1;
+    int i, j;
+    for (i = 0; i < 2; i++) /* flat16 */ {
+        int idx = off + i * istride, p0, q0;
+        setpx(idx,  0, q0 = rnd() & mask);
+        setsx(idx, -1, p0 = q0, E[bidx] >> 2);
+        for (j = 1; j < 8; j++) {
+            setsx(idx, -1 - j, p0, F[bidx]);
+            setsx(idx, j, q0, F[bidx]);
+        }
+    }
+    for (i = 2; i < 4; i++) /* flat8 */ {
+        int idx = off + i * istride, p0, q0;
+        setpx(idx,  0, q0 = rnd() & mask);
+        setsx(idx, -1, p0 = q0, E[bidx] >> 2);
+        for (j = 1; j < 4; j++) {
+            setsx(idx, -1 - j, p0, F[bidx]);
+            setsx(idx, j, q0, F[bidx]);
+        }
+        for (j = 4; j < 8; j++) {
+            setpx(idx, -1 - j, rnd() & mask);
+            setpx(idx, j, rnd() & mask);
+        }
+    }
+    for (i = 4; i < 6; i++) /* regular */ {
+        int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
+        setpx(idx,  0, q0 = rnd() & mask);
+        setsx(idx,  1, q1 = q0, I[bidx]);
+        setsx(idx,  2, q2 = q1, I[bidx]);
+        setsx(idx,  3, q2,      I[bidx]);
+        setsx(idx, -1, p0 = q0, E[bidx] >> 2);
+        setsx(idx, -2, p1 = p0, I[bidx]);
+        setsx(idx, -3, p2 = p1, I[bidx]);
+        setsx(idx, -4, p2,      I[bidx]);
+        for (j = 4; j < 8; j++) {
+            setpx(idx, -1 - j, rnd() & mask);
+            setpx(idx, j, rnd() & mask);
+        }
+    }
+    for (i = 6; i < 8; i++) /* off */ {
+        int idx = off + i * istride;
+        for (j = 0; j < 8; j++) {
+            setpx(idx, -1 - j, rnd() & mask);
+            setpx(idx, j, rnd() & mask);
+        }
+    }
+}
+#define randomize_buffers(bidx, lineoff, str) \
+        randomize_loopfilter_buffers(bidx, lineoff, str, bit_depth, dir, \
+                                     E, F, H, I, buf0, buf1)
+
+static void check_loopfilter(void)
+{
+    LOCAL_ALIGNED_32(uint8_t, base0, [32 + 16 * 16 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, base1, [32 + 16 * 16 * 2]);
+    VP9DSPContext dsp;
+    int dir, wd, wd2, bit_depth;
+    static const char *const dir_name[2] = { "h", "v" };
+    static const int E[2] = { 20, 28 }, I[2] = { 10, 16 };
+    static const int H[2] = { 7, 11 }, F[2] = { 1, 1 };
+    declare_func(void, uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
+
+    for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+        ff_vp9dsp_init(&dsp, bit_depth, 0);
+
+        for (dir = 0; dir < 2; dir++) {
+            int midoff = (dir ? 8 * 8 : 8) * SIZEOF_PIXEL;
+            int midoff_aligned = (dir ? 8 * 8 : 16) * SIZEOF_PIXEL;
+            uint8_t *buf0 = base0 + midoff_aligned;
+            uint8_t *buf1 = base1 + midoff_aligned;
+
+            for (wd = 0; wd < 3; wd++) {
+                // 4/8/16wd_8px
+                if (check_func(dsp.loop_filter_8[wd][dir],
+                               "vp9_loop_filter_%s_%d_8_%dbpp",
+                               dir_name[dir], 4 << wd, bit_depth)) {
+                    randomize_buffers(0, 0, 8);
+                    memcpy(buf1 - midoff, buf0 - midoff,
+                           16 * 8 * SIZEOF_PIXEL);
+                    call_ref(buf0, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
+                    call_new(buf1, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
+                    if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 8 * SIZEOF_PIXEL))
+                        fail();
+                    bench_new(buf1, 16 * SIZEOF_PIXEL >> dir, E[0], I[0], H[0]);
+                }
+            }
+
+            midoff = (dir ? 16 * 8 : 8) * SIZEOF_PIXEL;
+            midoff_aligned = (dir ? 16 * 8 : 16) * SIZEOF_PIXEL;
+
+            buf0 = base0 + midoff_aligned;
+            buf1 = base1 + midoff_aligned;
+
+            // 16wd_16px loopfilter
+            if (check_func(dsp.loop_filter_16[dir],
+                           "vp9_loop_filter_%s_16_16_%dbpp",
+                           dir_name[dir], bit_depth)) {
+                randomize_buffers(0, 0, 16);
+                randomize_buffers(0, 8, 16);
+                memcpy(buf1 - midoff, buf0 - midoff, 16 * 16 * SIZEOF_PIXEL);
+                call_ref(buf0, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
+                call_new(buf1, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
+                if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16 * SIZEOF_PIXEL))
+                    fail();
+                bench_new(buf1, 16 * SIZEOF_PIXEL, E[0], I[0], H[0]);
+            }
+
+            for (wd = 0; wd < 2; wd++) {
+                for (wd2 = 0; wd2 < 2; wd2++) {
+                    // mix2 loopfilter
+                    if (check_func(dsp.loop_filter_mix2[wd][wd2][dir],
+                                   "vp9_loop_filter_mix2_%s_%d%d_16_%dbpp",
+                                   dir_name[dir], 4 << wd, 4 << wd2, bit_depth)) {
+                        randomize_buffers(0, 0, 16);
+                        randomize_buffers(1, 8, 16);
+                        memcpy(buf1 - midoff, buf0 - midoff, 16 * 16 * SIZEOF_PIXEL);
+#define M(a) (((a)[1] << 8) | (a)[0])
+                        call_ref(buf0, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
+                        call_new(buf1, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
+                        if (memcmp(buf0 - midoff, buf1 - midoff, 16 * 16 * SIZEOF_PIXEL))
+                            fail();
+                        bench_new(buf1, 16 * SIZEOF_PIXEL, M(E), M(I), M(H));
+#undef M
+                    }
+                }
+            }
+        }
+    }
+    report("loopfilter");
+}
+
+#undef setsx
+#undef setpx
+#undef setdx
+#undef randomize_buffers
+
+#define DST_BUF_SIZE (size * size * SIZEOF_PIXEL)
+#define SRC_BUF_STRIDE 72
+#define SRC_BUF_SIZE ((size + 7) * SRC_BUF_STRIDE * SIZEOF_PIXEL)
+#define src (buf + 3 * SIZEOF_PIXEL * (SRC_BUF_STRIDE + 1))
+
+#define randomize_buffers()                               \
+    do {                                                  \
+        uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
+        int k;                                            \
+        for (k = 0; k < SRC_BUF_SIZE; k += 4) {           \
+            uint32_t r = rnd() & mask;                    \
+            AV_WN32A(buf + k, r);                         \
+        }                                                 \
+        if (op == 1) {                                    \
+            for (k = 0; k < DST_BUF_SIZE; k += 4) {       \
+                uint32_t r = rnd() & mask;                \
+                AV_WN32A(dst0 + k, r);                    \
+                AV_WN32A(dst1 + k, r);                    \
+            }                                             \
+        }                                                 \
+    } while (0)
+
+static void check_mc(void)
+{
+    LOCAL_ALIGNED_32(uint8_t, buf, [72 * 72 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst0, [64 * 64 * 2]);
+    LOCAL_ALIGNED_32(uint8_t, dst1, [64 * 64 * 2]);
+    VP9DSPContext dsp;
+    int op, hsize, bit_depth, filter, dx, dy;
+    declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, void, uint8_t *dst, ptrdiff_t dst_stride,
+                      const uint8_t *ref, ptrdiff_t ref_stride,
+                 int h, int mx, int my);
+    static const char *const filter_names[4] = {
+        "8tap_smooth", "8tap_regular", "8tap_sharp", "bilin"
+    };
+    static const char *const subpel_names[2][2] = { { "", "h" }, { "v", "hv" } };
+    static const char *const op_names[2] = { "put", "avg" };
+    char str[256];
+
+    for (op = 0; op < 2; op++) {
+        for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
+            ff_vp9dsp_init(&dsp, bit_depth, 0);
+            for (hsize = 0; hsize < 5; hsize++) {
+                int size = 64 >> hsize;
+
+                for (filter = 0; filter < 4; filter++) {
+                    for (dx = 0; dx < 2; dx++) {
+                        for (dy = 0; dy < 2; dy++) {
+                            if (dx || dy) {
+                                snprintf(str, sizeof(str),
+                                         "%s_%s_%d%s", op_names[op],
+                                         filter_names[filter], size,
+                                         subpel_names[dy][dx]);
+                            } else {
+                                snprintf(str, sizeof(str),
+                                         "%s%d", op_names[op], size);
+                            }
+                            if (check_func(dsp.mc[hsize][filter][op][dx][dy],
+                                           "vp9_%s_%dbpp", str, bit_depth)) {
+                                int mx = dx ? 1 + (rnd() % 14) : 0;
+                                int my = dy ? 1 + (rnd() % 14) : 0;
+                                randomize_buffers();
+                                call_ref(dst0, size * SIZEOF_PIXEL,
+                                         src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
+                                         size, mx, my);
+                                call_new(dst1, size * SIZEOF_PIXEL,
+                                         src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
+                                         size, mx, my);
+                                if (memcmp(dst0, dst1, DST_BUF_SIZE))
+                                    fail();
+
+                                // simd implementations for each filter of subpel
+                                // functions are identical
+                                if (filter >= 1 && filter <= 2) continue;
+                                // 10/12 bpp for bilin are identical
+                                if (bit_depth == 12 && filter == 3) continue;
+
+                                bench_new(dst1, size * SIZEOF_PIXEL,
+                                          src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
+                                          size, mx, my);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    report("mc");
+}
+
+void checkasm_check_vp9dsp(void)
+{
+    check_ipred();
+    check_itxfm();
+    check_loopfilter();
+    check_mc();
+}
diff --git a/tests/checkasm/x86/Makefile b/tests/checkasm/x86/Makefile
new file mode 100644
index 00000000..befe088d
--- /dev/null
+++ b/tests/checkasm/x86/Makefile
@@ -0,0 +1,6 @@
+CHECKASMOBJS-$(HAVE_YASM) += x86/checkasm.o
+
+tests/checkasm/x86/%.o: tests/checkasm/x86/%.asm
+	$(DEPYASM) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d)
+	$(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $<
+	-$(STRIP) $(ASMSTRIPFLAGS) $@
diff --git a/tests/checkasm/x86/checkasm.asm b/tests/checkasm/x86/checkasm.asm
new file mode 100644
index 00000000..d12333b3
--- /dev/null
+++ b/tests/checkasm/x86/checkasm.asm
@@ -0,0 +1,243 @@
+;*****************************************************************************
+;* Assembly testing and benchmarking tool
+;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2012 Henrik Gramner
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
+;*****************************************************************************
+
+%define private_prefix checkasm
+%include "libavutil/x86/x86inc.asm"
+
+SECTION_RODATA
+
+error_message: db "failed to preserve register", 0
+error_message_emms: db "failed to issue emms", 0
+
+%if ARCH_X86_64
+; just random numbers to reduce the chance of incidental match
+ALIGN 16
+x6:  dq 0x1a1b2550a612b48c,0x79445c159ce79064
+x7:  dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
+x8:  dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
+x9:  dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
+x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
+x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
+x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
+x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
+x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
+x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
+n7:  dq 0x21f86d66c8ca00ce
+n8:  dq 0x75b6ba21077c48ad
+n9:  dq 0xed56bb2dcb3c7736
+n10: dq 0x8bda43d3fd1a7e06
+n11: dq 0xb64a9c9e5d318408
+n12: dq 0xdf9a54b303f1d3a3
+n13: dq 0x4a75479abd64e097
+n14: dq 0x249214109d5d1c88
+%endif
+
+SECTION .text
+
+cextern fail_func
+
+; max number of args used by any asm function.
+; (max_args % 4) must equal 3 for stack alignment
+%define max_args 15
+
+%if ARCH_X86_64
+
+;-----------------------------------------------------------------------------
+; int checkasm_stack_clobber(uint64_t clobber, ...)
+;-----------------------------------------------------------------------------
+cglobal stack_clobber, 1,2
+    ; Clobber the stack with junk below the stack pointer
+    %define argsize (max_args+6)*8
+    SUB  rsp, argsize
+    mov   r1, argsize-8
+.loop:
+    mov [rsp+r1], r0
+    sub   r1, 8
+    jge .loop
+    ADD  rsp, argsize
+    RET
+
+%if WIN64
+    %assign free_regs 7
+    DECLARE_REG_TMP 4
+%else
+    %assign free_regs 9
+    DECLARE_REG_TMP 7
+%endif
+
+%macro report_fail 1
+    mov  r9, rax
+    mov r10, rdx
+    lea  r0, [%1]
+    xor eax, eax
+    call fail_func
+    mov rdx, r10
+    mov rax, r9
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void checkasm_checked_call(void *func, ...)
+;-----------------------------------------------------------------------------
+INIT_XMM
+%macro CHECKED_CALL 0-1
+cglobal checked_call%1, 2,15,16,max_args*8+8
+    mov  t0, r0
+
+    ; All arguments have been pushed on the stack instead of registers in order to
+    ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit.
+    mov  r0, r6mp
+    mov  r1, r7mp
+    mov  r2, r8mp
+    mov  r3, r9mp
+%if UNIX64
+    mov  r4, r10mp
+    mov  r5, r11mp
+    %assign i 6
+    %rep max_args-6
+        mov  r9, [rsp+stack_offset+(i+1)*8]
+        mov  [rsp+(i-6)*8], r9
+        %assign i i+1
+    %endrep
+%else ; WIN64
+    %assign i 4
+    %rep max_args-4
+        mov  r9, [rsp+stack_offset+(i+7)*8]
+        mov  [rsp+i*8], r9
+        %assign i i+1
+    %endrep
+
+    ; Move possible floating-point arguments to the correct registers
+    movq m0, r0
+    movq m1, r1
+    movq m2, r2
+    movq m3, r3
+
+    %assign i 6
+    %rep 16-6
+        mova m %+ i, [x %+ i]
+        %assign i i+1
+    %endrep
+%endif
+
+%assign i 14
+%rep 15-free_regs
+    mov r %+ i, [n %+ i]
+    %assign i i-1
+%endrep
+    call t0
+%assign i 14
+%rep 15-free_regs
+    xor r %+ i, [n %+ i]
+    or  r14, r %+ i
+    %assign i i-1
+%endrep
+
+%if WIN64
+    %assign i 6
+    %rep 16-6
+        pxor m %+ i, [x %+ i]
+        por  m6, m %+ i
+        %assign i i+1
+    %endrep
+    packsswb m6, m6
+    movq r5, m6
+    or  r14, r5
+%endif
+
+    ; Call fail_func() with a descriptive message to mark it as a failure
+    ; if the called function didn't preserve all callee-saved registers.
+    ; Save the return value located in rdx:rax first to prevent clobbering.
+    jz .clobber_ok
+    report_fail error_message
+.clobber_ok:
+%ifnid %1, _emms
+    fstenv [rsp]
+    cmp  word [rsp + 8], 0xffff
+    je   .emms_ok
+    report_fail error_message_emms
+    emms
+.emms_ok:
+%else
+    emms
+%endif
+    RET
+%endmacro
+
+%else
+
+; just random numbers to reduce the chance of incidental match
+%define n3 dword 0x6549315c
+%define n4 dword 0xe02f3e23
+%define n5 dword 0xb78d0d1d
+%define n6 dword 0x33627ba7
+
+%macro report_fail 1
+    mov  r3, eax
+    mov  r4, edx
+    lea  r0, [%1]
+    mov [esp], r0
+    call fail_func
+    mov  edx, r4
+    mov  eax, r3
+%endmacro
+
+%macro CHECKED_CALL 0-1
+;-----------------------------------------------------------------------------
+; void checkasm_checked_call(void *func, ...)
+;-----------------------------------------------------------------------------
+cglobal checked_call%1, 1,7
+    mov  r3, n3
+    mov  r4, n4
+    mov  r5, n5
+    mov  r6, n6
+%rep max_args
+    PUSH dword [esp+20+max_args*4]
+%endrep
+    call r0
+    xor  r3, n3
+    xor  r4, n4
+    xor  r5, n5
+    xor  r6, n6
+    or   r3, r4
+    or   r5, r6
+    or   r3, r5
+    jz .clobber_ok
+    report_fail error_message
+.clobber_ok:
+%ifnid %1, _emms
+    fstenv [esp]
+    cmp  word [esp + 8], 0xffff
+    je   .emms_ok
+    report_fail error_message_emms
+    emms
+.emms_ok:
+%else
+    emms
+%endif
+    add  esp, max_args*4
+    REP_RET
+%endmacro
+
+%endif ; ARCH_X86_64
+
+CHECKED_CALL
+CHECKED_CALL _emms
diff --git a/tests/extended.ffconcat b/tests/extended.ffconcat
new file mode 100644
index 00000000..7359113c
--- /dev/null
+++ b/tests/extended.ffconcat
@@ -0,0 +1,114 @@
+ffconcat version 1.0
+
+file      %SRCFILE%
+
+file      %SRCFILE%
+duration  1
+file_packet_metadata dummy=1
+
+file      %SRCFILE%
+inpoint   00:00.00
+outpoint  00:00.04
+
+file      %SRCFILE%
+inpoint   00:00.04
+outpoint  00:00.08
+
+file      %SRCFILE%
+inpoint   00:00.08
+outpoint  00:00.12
+
+file      %SRCFILE%
+inpoint   00:00.12
+outpoint  00:00.16
+
+file      %SRCFILE%
+inpoint   00:00.16
+outpoint  00:00.20
+
+file      %SRCFILE%
+inpoint   00:00.20
+outpoint  00:00.24
+
+file      %SRCFILE%
+inpoint   00:00.24
+outpoint  00:00.28
+
+file      %SRCFILE%
+inpoint   00:00.28
+outpoint  00:00.32
+
+file      %SRCFILE%
+inpoint   00:00.32
+outpoint  00:00.36
+
+file      %SRCFILE%
+inpoint   00:00.36
+outpoint  00:00.40
+
+file      %SRCFILE%
+inpoint   00:00.40
+outpoint  00:00.44
+
+file      %SRCFILE%
+inpoint   00:00.44
+outpoint  00:00.48
+
+file      %SRCFILE%
+inpoint   00:00.48
+outpoint  00:00.52
+
+file      %SRCFILE%
+inpoint   00:00.52
+outpoint  00:00.56
+
+file      %SRCFILE%
+inpoint   00:00.56
+outpoint  00:00.60
+
+file      %SRCFILE%
+inpoint   00:00.60
+outpoint  00:00.64
+
+file      %SRCFILE%
+inpoint   00:00.64
+outpoint  00:00.68
+
+file      %SRCFILE%
+inpoint   00:00.68
+outpoint  00:00.72
+
+file      %SRCFILE%
+inpoint   00:00.72
+outpoint  00:00.76
+
+file      %SRCFILE%
+inpoint   00:00.76
+outpoint  00:00.80
+
+file      %SRCFILE%
+inpoint   00:00.80
+outpoint  00:00.84
+
+file      %SRCFILE%
+inpoint   00:00.84
+outpoint  00:00.88
+
+file      %SRCFILE%
+inpoint   00:00.88
+outpoint  00:00.92
+
+file      %SRCFILE%
+inpoint   00:00.92
+outpoint  00:00.96
+
+file      %SRCFILE%
+inpoint   00:00.96
+outpoint  00:01.00
+
+file      %SRCFILE%
+outpoint  00:00.40
+
+file      %SRCFILE%
+inpoint   00:00.40
+
diff --git a/tests/fate-run.sh b/tests/fate-run.sh
index b88730a4..16087cb7 100755
--- a/tests/fate-run.sh
+++ b/tests/fate-run.sh
@@ -84,29 +84,34 @@ runecho(){
 }
 
 probefmt(){
-    run ffprobe -show_entries format=format_name -print_format default=nw=1:nk=1 -v 0 "$@"
+    run ffprobe${PROGSUF} -show_entries format=format_name -print_format default=nw=1:nk=1 -v 0 "$@"
+}
+
+runlocal(){
+    test "${V:-0}" -gt 0 && echo ${base}/"$@" ${base} >&3
+    ${base}/"$@" ${base}
 }
 
 probeframes(){
-    run ffprobe -show_frames -v 0 "$@"
+    run ffprobe${PROGSUF} -show_frames -v 0 "$@"
 }
 
 ffmpeg(){
     dec_opts="-hwaccel $hwaccel -threads $threads -thread_type $thread_type"
-    ffmpeg_args="-nostats -cpuflags $cpuflags"
+    ffmpeg_args="-nostdin -nostats -cpuflags $cpuflags"
     for arg in $@; do
         [ x${arg} = x-i ] && ffmpeg_args="${ffmpeg_args} ${dec_opts}"
         ffmpeg_args="${ffmpeg_args} ${arg}"
     done
-    run ffmpeg ${ffmpeg_args}
+    run ffmpeg${PROGSUF} ${ffmpeg_args}
 }
 
 framecrc(){
-    ffmpeg "$@" -flags +bitexact -f framecrc -
+    ffmpeg "$@" -flags +bitexact -fflags +bitexact -f framecrc -
 }
 
 framemd5(){
-    ffmpeg "$@" -flags +bitexact -f framemd5 -
+    ffmpeg "$@" -flags +bitexact -fflags +bitexact -f framemd5 -
 }
 
 crc(){
@@ -124,7 +129,7 @@ pcm(){
 fmtstdout(){
     fmt=$1
     shift 1
-    ffmpeg -flags +bitexact "$@" -f $fmt -
+    ffmpeg -flags +bitexact -fflags +bitexact "$@" -f $fmt -
 }
 
 enc_dec_pcm(){
@@ -137,7 +142,7 @@ enc_dec_pcm(){
     cleanfiles=$encfile
     encfile=$(target_path ${encfile})
     ffmpeg -i $src_file "$@" -f $out_fmt -y ${encfile} || return
-    ffmpeg -flags +bitexact -i ${encfile} -c:a pcm_${pcm_fmt} -f ${dec_fmt} -
+    ffmpeg -flags +bitexact -fflags +bitexact -i ${encfile} -c:a pcm_${pcm_fmt} -fflags +bitexact -f ${dec_fmt} -
 }
 
 FLAGS="-flags +bitexact -sws_flags +accurate_rnd+bitexact -fflags +bitexact"
@@ -231,19 +236,39 @@ gapless(){
     cleanfiles="$cleanfiles $decfile1 $decfile2 $decfile3"
 
     # test packet data
-    ffmpeg $extra_args -i "$sample" -flags +bitexact -c:a copy -f framecrc -y $decfile1
+    ffmpeg $extra_args -i "$sample" -flags +bitexact -fflags +bitexact -c:a copy -f framecrc -y $decfile1
     do_md5sum $decfile1
     # test decoded (and cut) data
-    ffmpeg $extra_args -i "$sample" -flags +bitexact -f wav md5:
+    ffmpeg $extra_args -i "$sample" -flags +bitexact -fflags +bitexact -f wav md5:
     # the same as above again, with seeking to the start
-    ffmpeg $extra_args -ss 0 -seek_timestamp 1 -i "$sample" -flags +bitexact -c:a copy -f framecrc -y $decfile2
+    ffmpeg $extra_args -ss 0 -seek_timestamp 1 -i "$sample" -flags +bitexact -fflags +bitexact -c:a copy -f framecrc -y $decfile2
     do_md5sum $decfile2
-    ffmpeg $extra_args -ss 0 -seek_timestamp 1 -i "$sample" -flags +bitexact -f wav md5:
+    ffmpeg $extra_args -ss 0 -seek_timestamp 1 -i "$sample" -flags +bitexact -fflags +bitexact -f wav md5:
     # test packet data, with seeking to a specific position
-    ffmpeg $extra_args -ss 5 -seek_timestamp 1 -i "$sample" -flags +bitexact -c:a copy -f framecrc -y $decfile3
+    ffmpeg $extra_args -ss 5 -seek_timestamp 1 -i "$sample" -flags +bitexact -fflags +bitexact -c:a copy -f framecrc -y $decfile3
     do_md5sum $decfile3
 }
 
+concat(){
+    template=$1
+    sample=$2
+    mode=$3
+    extra_args=$4
+
+    concatfile="${outdir}/${test}.ffconcat"
+    packetfile="${outdir}/${test}.ffprobe"
+    cleanfiles="$concatfile $packetfile"
+
+    awk "{gsub(/%SRCFILE%/, \"$sample\"); print}" $template > $concatfile
+
+    if [ "$mode" = "md5" ]; then
+        run ffprobe${PROGSUF} -bitexact -show_streams -show_packets -v 0 -fflags keepside -safe 0 $extra_args $concatfile | tr -d '\r' > $packetfile
+        do_md5sum $packetfile
+    else
+        run ffprobe${PROGSUF} -bitexact -show_streams -show_packets -v 0 -of compact=p=0:nk=1 -fflags keepside -safe 0 $extra_args $concatfile
+    fi
+}
+
 mkdir -p "$outdir"
 
 # Disable globbing: command arguments may contain globbing characters and
diff --git a/tests/fate/aac.mak b/tests/fate/aac.mak
index 34823be5..324b05d4 100644
--- a/tests/fate/aac.mak
+++ b/tests/fate/aac.mak
@@ -70,6 +70,61 @@ FATE_AAC += fate-aac-er_eld2100np_48_ep0
 fate-aac-er_eld2100np_48_ep0: CMD = pcm -i $(TARGET_SAMPLES)/aac/er_eld2100np_48_ep0.mp4
 fate-aac-er_eld2100np_48_ep0: REF = $(SAMPLES)/aac/er_eld2100np_48.s16
 
+FATE_AAC_FIXED += fate-aac-fixed-al04_44
+fate-aac-fixed-al04_44: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/al04_44.mp4
+fate-aac-fixed-al04_44: REF = $(SAMPLES)/aac/al04_44.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-al05_44
+fate-aac-fixed-al05_44: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/al05_44.mp4
+fate-aac-fixed-al05_44: REF = $(SAMPLES)/aac/al05_44.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-al06_44
+fate-aac-fixed-al06_44: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/al06_44.mp4
+fate-aac-fixed-al06_44: REF = $(SAMPLES)/aac/al06_44_reorder.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-al15_44
+fate-aac-fixed-al15_44: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/al15_44.mp4
+fate-aac-fixed-al15_44: REF = $(SAMPLES)/aac/al15_44_reorder.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-al17_44
+fate-aac-fixed-al17_44: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/al17_44.mp4
+fate-aac-fixed-al17_44: REF = $(SAMPLES)/aac/al17_44.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-al18_44
+fate-aac-fixed-al18_44: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/al18_44.mp4
+fate-aac-fixed-al18_44: REF = $(SAMPLES)/aac/al18_44.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-al_sbr_hq_cm_48_2
+fate-aac-fixed-al_sbr_hq_cm_48_2: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/al_sbr_cm_48_2.mp4
+fate-aac-fixed-al_sbr_hq_cm_48_2: REF = $(SAMPLES)/aac/al_sbr_hq_cm_48_2.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-al_sbr_hq_cm_48_5.1
+fate-aac-fixed-al_sbr_hq_cm_48_5.1: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/al_sbr_cm_48_5.1.mp4
+fate-aac-fixed-al_sbr_hq_cm_48_5.1: REF = $(SAMPLES)/aac/al_sbr_hq_cm_48_5.1_reorder.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-al_sbr_hq_sr_48_2_fsaac48
+fate-aac-fixed-al_sbr_hq_sr_48_2_fsaac48: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/al_sbr_sr_48_2_fsaac48.mp4
+fate-aac-fixed-al_sbr_hq_sr_48_2_fsaac48: REF = $(SAMPLES)/aac/al_sbr_hq_sr_48_2_fsaac48.s16
+
+#FATE_AAC_FIXED += fate-aac-fixed-al_sbr_ps_06_ur
+#fate-aac-fixed-al_sbr_ps_06_ur: CMD = pcm -c aac_fixed-i $(TARGET_SAMPLES)/aac/al_sbr_ps_06_new.mp4
+#fate-aac-fixed-al_sbr_ps_06_ur: REF = $(SAMPLES)/aac/al_sbr_ps_06_ur.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-ap05_48
+fate-aac-fixed-ap05_48: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/ap05_48.mp4
+fate-aac-fixed-ap05_48: REF = $(SAMPLES)/aac/ap05_48.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-er_ad6000np_44_ep0
+fate-aac-fixed-er_ad6000np_44_ep0: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/er_ad6000np_44_ep0.mp4
+fate-aac-fixed-er_ad6000np_44_ep0: REF = $(SAMPLES)/aac/er_ad6000np_44.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-er_eld1001np_44_ep0
+fate-aac-fixed-er_eld1001np_44_ep0: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/er_eld1001np_44_ep0.mp4
+fate-aac-fixed-er_eld1001np_44_ep0: REF = $(SAMPLES)/aac/er_eld1001np_44.s16
+
+FATE_AAC_FIXED += fate-aac-fixed-er_eld2000np_48_ep0
+fate-aac-fixed-er_eld2000np_48_ep0: CMD = pcm -c aac_fixed -i $(TARGET_SAMPLES)/aac/er_eld2000np_48_ep0.mp4
+fate-aac-fixed-er_eld2000np_48_ep0: REF = $(SAMPLES)/aac/er_eld2000np_48_ep0.s16
 
 fate-aac-ct%: CMD = pcm -i $(TARGET_SAMPLES)/aac/CT_DecoderCheck/$(@:fate-aac-ct-%=%)
 fate-aac-ct%: REF = $(SAMPLES)/aac/CT_DecoderCheck/aacPlusv2.wav
@@ -87,21 +142,85 @@ FATE_AAC += $(FATE_AAC_CT:%=fate-aac-ct-%)
 
 FATE_AAC_ENCODE += fate-aac-aref-encode
 fate-aac-aref-encode: ./tests/data/asynth-44100-2.wav
-fate-aac-aref-encode: CMD = enc_dec_pcm adts wav s16le $(REF) -strict -2 -c:a aac -b:a 512k
+fate-aac-aref-encode: CMD = enc_dec_pcm adts wav s16le $(REF) -c:a aac -aac_is 0 -aac_pns 0 -aac_ms 0 -aac_tns 0 -b:a 512k
 fate-aac-aref-encode: CMP = stddev
 fate-aac-aref-encode: REF = ./tests/data/asynth-44100-2.wav
 fate-aac-aref-encode: CMP_SHIFT = -4096
-fate-aac-aref-encode: CMP_TARGET = 434
+fate-aac-aref-encode: CMP_TARGET = 669
 fate-aac-aref-encode: SIZE_TOLERANCE = 2464
-fate-aac-aref-encode: FUZZ = 5
+fate-aac-aref-encode: FUZZ = 89
 
 FATE_AAC_ENCODE += fate-aac-ln-encode
-fate-aac-ln-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -b:a 512k
+fate-aac-ln-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -c:a aac -aac_is 0 -aac_pns 0 -aac_ms 0 -aac_tns 0 -b:a 512k
 fate-aac-ln-encode: CMP = stddev
 fate-aac-ln-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
 fate-aac-ln-encode: CMP_SHIFT = -4096
-fate-aac-ln-encode: CMP_TARGET = 65
+fate-aac-ln-encode: CMP_TARGET = 61
 fate-aac-ln-encode: SIZE_TOLERANCE = 3560
+fate-aac-ln-encode: FUZZ = 30
+
+FATE_AAC_ENCODE += fate-aac-ln-encode-128k
+fate-aac-ln-encode-128k: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -c:a aac -aac_is 0 -aac_pns 0 -aac_ms 0 -aac_tns 0 -b:a 128k -cutoff 22050
+fate-aac-ln-encode-128k: CMP = stddev
+fate-aac-ln-encode-128k: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
+fate-aac-ln-encode-128k: CMP_SHIFT = -4096
+fate-aac-ln-encode-128k: CMP_TARGET = 800
+fate-aac-ln-encode-128k: SIZE_TOLERANCE = 3560
+fate-aac-ln-encode-128k: FUZZ = 5
+
+FATE_AAC_ENCODE += fate-aac-pns-encode
+fate-aac-pns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -c:a aac -aac_pns 1 -aac_is 0 -aac_ms 0 -aac_tns 0 -b:a 128k -cutoff 22050  -fflags +bitexact -flags +bitexact
+fate-aac-pns-encode: CMP = stddev
+fate-aac-pns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
+fate-aac-pns-encode: CMP_SHIFT = -4096
+fate-aac-pns-encode: CMP_TARGET = 616
+fate-aac-pns-encode: SIZE_TOLERANCE = 3560
+fate-aac-pns-encode: FUZZ = 74
+
+FATE_AAC_ENCODE += fate-aac-tns-encode
+fate-aac-tns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -c:a aac -aac_tns 1 -aac_is 0 -aac_pns 0 -aac_ms 0 -b:a 128k -cutoff 22050  -fflags +bitexact -flags +bitexact
+fate-aac-tns-encode: CMP = stddev
+fate-aac-tns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
+fate-aac-tns-encode: CMP_SHIFT = -4096
+fate-aac-tns-encode: CMP_TARGET = 817
+fate-aac-tns-encode: FUZZ = 7
+fate-aac-tns-encode: SIZE_TOLERANCE = 3560
+
+FATE_AAC_ENCODE += fate-aac-is-encode
+fate-aac-is-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -c:a aac -aac_pns 0 -aac_is 1 -aac_ms 0 -b:a 128k -aac_tns 0 -cutoff 22050
+fate-aac-is-encode: CMP = stddev
+fate-aac-is-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
+fate-aac-is-encode: CMP_SHIFT = -4096
+fate-aac-is-encode: CMP_TARGET = 615
+fate-aac-is-encode: SIZE_TOLERANCE = 3560
+fate-aac-is-encode: FUZZ = 10
+
+FATE_AAC_ENCODE += fate-aac-ms-encode
+fate-aac-ms-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -c:a aac -aac_pns 0 -aac_is 0 -aac_ms 1 -aac_tns 0 -b:a 128k -cutoff 22050
+fate-aac-ms-encode: CMP = stddev
+fate-aac-ms-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
+fate-aac-ms-encode: CMP_SHIFT = -4096
+fate-aac-ms-encode: CMP_TARGET = 675
+fate-aac-ms-encode: SIZE_TOLERANCE = 3560
+fate-aac-ms-encode: FUZZ = 15
+
+FATE_AAC_ENCODE += fate-aac-ltp-encode
+fate-aac-ltp-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -profile:a aac_ltp -aac_pns 0 -aac_is 0 -aac_ms 0 -aac_tns 0 -b:a 36k -fflags +bitexact -flags +bitexact
+fate-aac-ltp-encode: CMP = stddev
+fate-aac-ltp-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
+fate-aac-ltp-encode: CMP_SHIFT = -4096
+fate-aac-ltp-encode: CMP_TARGET = 1270
+fate-aac-ltp-encode: SIZE_TOLERANCE = 3560
+fate-aac-ltp-encode: FUZZ = 17
+
+FATE_AAC_ENCODE += fate-aac-pred-encode
+fate-aac-pred-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -profile:a aac_main -c:a aac -aac_is 0 -aac_pns 0 -aac_ms 0 -aac_tns 0 -b:a 128k -cutoff 22050
+fate-aac-pred-encode: CMP = stddev
+fate-aac-pred-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav
+fate-aac-pred-encode: CMP_SHIFT = -4096
+fate-aac-pred-encode: CMP_TARGET = 841
+fate-aac-pred-encode: FUZZ = 12
+fate-aac-pred-encode: SIZE_TOLERANCE = 3560
 
 FATE_AAC_LATM += fate-aac-latm_000000001180bc60
 fate-aac-latm_000000001180bc60: CMD = pcm -i $(TARGET_SAMPLES)/aac/latm_000000001180bc60.mpg
@@ -114,8 +233,9 @@ fate-aac-latm_stereo_to_51: REF = $(SAMPLES)/aac/latm_stereo_to_51_ref.s16
 FATE_AAC-$(call      DEMDEC, AAC,    AAC)      += $(FATE_AAC_CT_RAW)
 FATE_AAC-$(call      DEMDEC, MOV,    AAC)      += $(FATE_AAC)
 FATE_AAC_LATM-$(call DEMDEC, MPEGTS, AAC_LATM) += $(FATE_AAC_LATM)
+FATE_AAC-$(call      DEMDEC, AAC,    AAC_FIXED)+= $(FATE_AAC_FIXED)
 
-FATE_AAC_ALL = $(FATE_AAC-yes) $(FATE_AAC_LATM-yes)
+FATE_AAC_ALL = $(FATE_AAC-yes) $(FATE_AAC_LATM-yes) $(FATE_AAC_FIXED-yes)
 
 $(FATE_AAC_ALL): CMP  = oneoff
 $(FATE_AAC_ALL): FUZZ = 2
diff --git a/tests/fate/ac3.mak b/tests/fate/ac3.mak
index f00c3c4e..e6362718 100644
--- a/tests/fate/ac3.mak
+++ b/tests/fate/ac3.mak
@@ -6,12 +6,14 @@ FATE_AC3 += fate-ac3-4.0
 fate-ac3-4.0: CMD = pcm -i $(TARGET_SAMPLES)/ac3/millers_crossing_4.0.ac3
 fate-ac3-4.0: REF = $(SAMPLES)/ac3/millers_crossing_4.0_v2.pcm
 
+#request_channel_layout 4 -> front channel
 FATE_AC3 += fate-ac3-4.0-downmix-mono
-fate-ac3-4.0-downmix-mono: CMD = pcm -request_channels 1 -i $(TARGET_SAMPLES)/ac3/millers_crossing_4.0.ac3
+fate-ac3-4.0-downmix-mono: CMD = pcm -request_channel_layout 4 -i $(TARGET_SAMPLES)/ac3/millers_crossing_4.0.ac3
 fate-ac3-4.0-downmix-mono: REF = $(SAMPLES)/ac3/millers_crossing_4.0_mono_v2.pcm
 
+#request_channel_layout 3 -> left channel + right channel
 FATE_AC3 += fate-ac3-4.0-downmix-stereo
-fate-ac3-4.0-downmix-stereo: CMD = pcm -request_channels 2 -i $(TARGET_SAMPLES)/ac3/millers_crossing_4.0.ac3
+fate-ac3-4.0-downmix-stereo: CMD = pcm -request_channel_layout 3 -i $(TARGET_SAMPLES)/ac3/millers_crossing_4.0.ac3
 fate-ac3-4.0-downmix-stereo: REF = $(SAMPLES)/ac3/millers_crossing_4.0_stereo_v2.pcm
 
 FATE_AC3 += fate-ac3-5.1
@@ -19,11 +21,11 @@ fate-ac3-5.1: CMD = pcm -i $(TARGET_SAMPLES)/ac3/monsters_inc_5.1_448_small.ac3
 fate-ac3-5.1: REF = $(SAMPLES)/ac3/monsters_inc_5.1_448_small_v2.pcm
 
 FATE_AC3 += fate-ac3-5.1-downmix-mono
-fate-ac3-5.1-downmix-mono: CMD = pcm -request_channels 1 -i $(TARGET_SAMPLES)/ac3/monsters_inc_5.1_448_small.ac3
+fate-ac3-5.1-downmix-mono: CMD = pcm -request_channel_layout 4 -i $(TARGET_SAMPLES)/ac3/monsters_inc_5.1_448_small.ac3
 fate-ac3-5.1-downmix-mono: REF = $(SAMPLES)/ac3/monsters_inc_5.1_448_small_mono_v2.pcm
 
 FATE_AC3 += fate-ac3-5.1-downmix-stereo
-fate-ac3-5.1-downmix-stereo: CMD = pcm -request_channels 2 -i $(TARGET_SAMPLES)/ac3/monsters_inc_5.1_448_small.ac3
+fate-ac3-5.1-downmix-stereo: CMD = pcm -request_channel_layout 3 -i $(TARGET_SAMPLES)/ac3/monsters_inc_5.1_448_small.ac3
 fate-ac3-5.1-downmix-stereo: REF = $(SAMPLES)/ac3/monsters_inc_5.1_448_small_stereo_v2.pcm
 
 FATE_AC3 += fate-ac3-fixed-2.0
@@ -31,7 +33,7 @@ fate-ac3-fixed-2.0: CMD = pcm -c ac3_fixed -i $(TARGET_SAMPLES)/ac3/monsters_inc
 fate-ac3-fixed-2.0: REF = $(SAMPLES)/ac3/monsters_inc_2.0_192_small_v2.pcm
 
 FATE_AC3 += fate-ac3-fixed-4.0-downmix-mono
-fate-ac3-fixed-4.0-downmix-mono: CMD = pcm -c ac3_fixed -request_channels 1 -i $(TARGET_SAMPLES)/ac3/millers_crossing_4.0.ac3
+fate-ac3-fixed-4.0-downmix-mono: CMD = pcm -c ac3_fixed -request_channel_layout 4 -i $(TARGET_SAMPLES)/ac3/millers_crossing_4.0.ac3
 fate-ac3-fixed-4.0-downmix-mono: REF = $(SAMPLES)/ac3/millers_crossing_4.0_mono_v2.pcm
 
 FATE_EAC3 += fate-eac3-1
diff --git a/tests/fate/acodec.mak b/tests/fate/acodec.mak
index d693e9e6..e0f23208 100644
--- a/tests/fate/acodec.mak
+++ b/tests/fate/acodec.mak
@@ -1,6 +1,6 @@
 fate-acodec-%: CODEC = $(@:fate-acodec-%=%)
 fate-acodec-%: SRC = tests/data/asynth-44100-2.wav
-fate-acodec-%: CMD = enc_dec wav $(SRC) $(FMT) "-b 128k -c $(CODEC) $(ENCOPTS)" wav "-c pcm_s16le $(DECOPTS)" -keep
+fate-acodec-%: CMD = enc_dec wav $(SRC) $(FMT) "-b:a 128k -c $(CODEC) $(ENCOPTS)" wav "-c pcm_s16le $(DECOPTS)" -keep
 fate-acodec-%: CMP_UNIT = 2
 fate-acodec-%: REF = $(SRC_PATH)/tests/ref/acodec/$(@:fate-acodec-%=%)
 
@@ -93,6 +93,7 @@ fate-acodec-mp2: ENCOPTS = -b:a 128k
 FATE_ACODEC-$(call ENCDEC, MP2FIXED MP2 , MP2 MP3) += fate-acodec-mp2fixed
 fate-acodec-mp2fixed: FMT = mp2
 fate-acodec-mp2fixed: CMP_SHIFT = -1924
+fate-acodec-mp2fixed: ENCOPTS = -b:a 384k
 
 FATE_ACODEC-$(call ENCDEC, ALAC, MOV) += fate-acodec-alac
 fate-acodec-alac: FMT = mov
@@ -103,7 +104,7 @@ fate-acodec-dca: tests/data/asynth-44100-2.wav
 fate-acodec-dca: SRC = tests/data/asynth-44100-2.wav
 fate-acodec-dca: CMD = md5 -i $(TARGET_PATH)/$(SRC) -c:a dca -strict -2 -f dts -flags +bitexact
 fate-acodec-dca: CMP = oneline
-fate-acodec-dca: REF = fe28cef432ed88de4ee01b87537fd2bd
+fate-acodec-dca: REF = 7ffdefdf47069289990755c79387cc90
 
 FATE_ACODEC-$(call ENCDEC, DCA, WAV) += fate-acodec-dca2
 fate-acodec-dca2: CMD = enc_dec_pcm dts wav s16le $(SRC) -c:a dca -strict -2 -flags +bitexact
diff --git a/tests/fate/api.mak b/tests/fate/api.mak
new file mode 100644
index 00000000..2ebc5319
--- /dev/null
+++ b/tests/fate/api.mak
@@ -0,0 +1,49 @@
+FATE_API_LIBAVCODEC-$(call ENCDEC, FLAC, FLAC) += fate-api-flac
+fate-api-flac: $(APITESTSDIR)/api-flac-test$(EXESUF)
+fate-api-flac: CMD = run $(APITESTSDIR)/api-flac-test
+fate-api-flac: CMP = null
+fate-api-flac: REF = /dev/null
+
+FATE_API_SAMPLES_LIBAVFORMAT-$(call DEMDEC, FLV, FLV) += fate-api-band
+fate-api-band: $(APITESTSDIR)/api-band-test$(EXESUF)
+fate-api-band: CMD = run $(APITESTSDIR)/api-band-test $(TARGET_SAMPLES)/mpeg4/resize_down-up.h263
+fate-api-band: CMP = null
+fate-api-band: REF = /dev/null
+
+FATE_API_SAMPLES_LIBAVFORMAT-$(call DEMDEC, H264, H264) += fate-api-h264
+fate-api-h264: $(APITESTSDIR)/api-h264-test$(EXESUF)
+fate-api-h264: CMD = run $(APITESTSDIR)/api-h264-test $(TARGET_SAMPLES)/h264-conformance/SVA_NL2_E.264
+
+FATE_API_LIBAVFORMAT-yes += fate-api-seek
+fate-api-seek: $(APITESTSDIR)/api-seek-test$(EXESUF) fate-lavf
+fate-api-seek: CMD = run $(APITESTSDIR)/api-seek-test $(TARGET_PATH)/tests/data/lavf/lavf.flv 0 720
+fate-api-seek: CMP = null
+fate-api-seek: REF = /dev/null
+
+FATE_API_SAMPLES_LIBAVFORMAT-$(call DEMDEC, IMAGE2, PNG) += fate-api-png-codec-param
+fate-api-png-codec-param: $(APITESTSDIR)/api-codec-param-test$(EXESUF)
+fate-api-png-codec-param: CMD = run $(APITESTSDIR)/api-codec-param-test $(TARGET_SAMPLES)/png1/lena-rgba.png
+
+FATE_API_SAMPLES_LIBAVFORMAT-$(call DEMDEC, IMAGE2, MJPEG) += fate-api-mjpeg-codec-param
+fate-api-mjpeg-codec-param: $(APITESTSDIR)/api-codec-param-test$(EXESUF)
+fate-api-mjpeg-codec-param: CMD = run $(APITESTSDIR)/api-codec-param-test $(TARGET_SAMPLES)/exif/image_small.jpg
+
+FATE_API-$(HAVE_THREADS) += fate-api-threadmessage
+fate-api-threadmessage: $(APITESTSDIR)/api-threadmessage-test$(EXESUF)
+fate-api-threadmessage: CMD = run $(APITESTSDIR)/api-threadmessage-test 3 10 30 50 2 20 40
+fate-api-threadmessage: CMP = null
+fate-api-threadmessage: REF = /dev/null
+
+FATE_API_SAMPLES-$(CONFIG_AVFORMAT) += $(FATE_API_SAMPLES_LIBAVFORMAT-yes)
+
+ifdef SAMPLES
+    FATE_API_SAMPLES += $(FATE_API_SAMPLES-yes)
+endif
+
+FATE_API-$(CONFIG_AVCODEC) += $(FATE_API_LIBAVCODEC-yes)
+FATE_API-$(CONFIG_AVFORMAT) += $(FATE_API_LIBAVFORMAT-yes)
+FATE_API = $(FATE_API-yes)
+
+FATE-yes += $(FATE_API) $(FATE_API_SAMPLES)
+
+fate-api: $(FATE_API) $(FATE_API_SAMPLES)
diff --git a/tests/fate/atrac.mak b/tests/fate/atrac.mak
index 0a086d8f..acf79a53 100644
--- a/tests/fate/atrac.mak
+++ b/tests/fate/atrac.mak
@@ -1,6 +1,13 @@
-FATE_ATRAC1-$(call DEMDEC, AEA, ATRAC1) += fate-atrac1
-fate-atrac1: CMD = pcm -i $(TARGET_SAMPLES)/atrac1/test_tones_small.aea
-fate-atrac1: REF = $(SAMPLES)/atrac1/test_tones_small.pcm
+FATE_ATRAC1 += fate-atrac1-1
+fate-atrac1-1: CMD = pcm -i $(TARGET_SAMPLES)/atrac1/test_tones_small.aea
+fate-atrac1-1: REF = $(SAMPLES)/atrac1/test_tones_small_fixed_delay.pcm
+
+FATE_ATRAC1 += fate-atrac1-2
+fate-atrac1-2: CMD = pcm -i $(TARGET_SAMPLES)/atrac1/chirp_tone_10-16000.aea
+fate-atrac1-2: REF = $(SAMPLES)/atrac1/chirp_tone_10-16000.pcm
+fate-atrac1-2: FUZZ = 61
+
+FATE_ATRAC1-$(call DEMDEC, AEA, ATRAC1) += $(FATE_ATRAC1)
 
 FATE_ATRAC3 += fate-atrac3-1
 fate-atrac3-1: CMD = pcm -i $(TARGET_SAMPLES)/atrac3/mc_sich_at3_066_small.wav
diff --git a/tests/fate/audio.mak b/tests/fate/audio.mak
index 7ab4038c..e882aced 100644
--- a/tests/fate/audio.mak
+++ b/tests/fate/audio.mak
@@ -16,19 +16,6 @@ fate-binkaudio: $(FATE_BINKAUDIO-yes)
 FATE_SAMPLES_AUDIO-$(call DEMDEC, BMV, BMV_AUDIO) += fate-bmv-audio
 fate-bmv-audio: CMD = framecrc -i $(TARGET_SAMPLES)/bmv/SURFING-partial.BMV -vn
 
-FATE_DCA-$(CONFIG_MPEGTS_DEMUXER) += fate-dca-core
-fate-dca-core: CMD = pcm -i $(TARGET_SAMPLES)/dts/dts.ts
-fate-dca-core: CMP = oneoff
-fate-dca-core: REF = $(SAMPLES)/dts/dts.pcm
-
-FATE_DCA-$(CONFIG_DTS_DEMUXER) += fate-dca-xll
-fate-dca-xll: CMD = pcm -disable_xll 0 -i $(TARGET_SAMPLES)/dts/master_audio_7.1_24bit.dts
-fate-dca-xll: CMP = oneoff
-fate-dca-xll: REF = $(SAMPLES)/dts/master_audio_7.1_24bit.pcm
-
-FATE_SAMPLES_AUDIO-$(CONFIG_DCA_DECODER) += $(FATE_DCA-yes)
-fate-dca: $(FATE_DCA-yes)
-
 FATE_SAMPLES_AUDIO-$(call DEMDEC, DSICIN, DSICINAUDIO) += fate-delphine-cin-audio
 fate-delphine-cin-audio: CMD = framecrc -i $(TARGET_SAMPLES)/delphine-cin/LOGO-partial.CIN -vn
 
@@ -36,11 +23,6 @@ FATE_SAMPLES_AUDIO-$(call DEMDEC, DSS, DSS_SP) += fate-dss-lp fate-dss-sp
 fate-dss-lp: CMD = framecrc -i $(TARGET_SAMPLES)/dss/lp.dss -frames 30
 fate-dss-sp: CMD = framecrc -i $(TARGET_SAMPLES)/dss/sp.dss -frames 30
 
-FATE_SAMPLES_AUDIO-$(call DEMDEC, DTS, DCA) += fate-dts_es
-fate-dts_es: CMD = pcm -i $(TARGET_SAMPLES)/dts/dts_es.dts
-fate-dts_es: CMP = oneoff
-fate-dts_es: REF = $(SAMPLES)/dts/dts_es.pcm
-
 FATE_SAMPLES_AUDIO-$(call DEMDEC, AVI, IMC) += fate-imc
 fate-imc: CMD = pcm -i $(TARGET_SAMPLES)/imc/imc.avi
 fate-imc: CMP = oneoff
diff --git a/tests/fate/avformat.mak b/tests/fate/avformat.mak
index 1d13434d..54bfb974 100644
--- a/tests/fate/avformat.mak
+++ b/tests/fate/avformat.mak
@@ -61,6 +61,7 @@ FATE_LAVF += $(FATE_LAVF_PIXFMT-yes)
 
 $(FATE_LAVF): $(AREF) $(VREF)
 $(FATE_LAVF): CMD = lavftest
+$(FATE_LAVF): CMP =
 
 FATE_AVCONV += $(FATE_LAVF)
 fate-lavf:     $(FATE_LAVF)
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
new file mode 100644
index 00000000..daefe694
--- /dev/null
+++ b/tests/fate/checkasm.mak
@@ -0,0 +1,5 @@
+fate-checkasm: tests/checkasm/checkasm$(EXESUF)
+fate-checkasm: CMD = run tests/checkasm/checkasm
+fate-checkasm: REF = /dev/null
+
+FATE += fate-checkasm
diff --git a/tests/fate/concatdec.mak b/tests/fate/concatdec.mak
new file mode 100644
index 00000000..988559d2
--- /dev/null
+++ b/tests/fate/concatdec.mak
@@ -0,0 +1,21 @@
+FATE_CONCAT_DEMUXER_SIMPLE1_LAVF-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF)   += mxf
+FATE_CONCAT_DEMUXER_SIMPLE1_LAVF-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF)   += mxf_d10
+
+FATE_CONCAT_DEMUXER_SIMPLE2_LAVF-$(call ENCDEC2, MPEG2VIDEO, MP2, MPEGTS)      += ts
+
+FATE_CONCAT_DEMUXER_EXTENDED_LAVF-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF)  += mxf
+FATE_CONCAT_DEMUXER_EXTENDED_LAVF-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF)  += mxf_d10
+
+$(foreach D,$(FATE_CONCAT_DEMUXER_SIMPLE1_LAVF-yes),$(eval fate-concat-demuxer-simple1-lavf-$(D): ffprobe$(PROGSSUF)$(EXESUF) fate-lavf-$(D)))
+$(foreach D,$(FATE_CONCAT_DEMUXER_SIMPLE1_LAVF-yes),$(eval fate-concat-demuxer-simple1-lavf-$(D): CMD = concat $(SRC_PATH)/tests/simple1.ffconcat ../lavf/lavf.$(D)))
+FATE_CONCAT_DEMUXER-$(CONFIG_CONCAT_DEMUXER) += $(FATE_CONCAT_DEMUXER_SIMPLE1_LAVF-yes:%=fate-concat-demuxer-simple1-lavf-%)
+
+$(foreach D,$(FATE_CONCAT_DEMUXER_SIMPLE2_LAVF-yes),$(eval fate-concat-demuxer-simple2-lavf-$(D): ffprobe$(PROGSSUF)$(EXESUF) fate-lavf-$(D)))
+$(foreach D,$(FATE_CONCAT_DEMUXER_SIMPLE2_LAVF-yes),$(eval fate-concat-demuxer-simple2-lavf-$(D): CMD = concat $(SRC_PATH)/tests/simple2.ffconcat ../lavf/lavf.$(D)))
+FATE_CONCAT_DEMUXER-$(CONFIG_CONCAT_DEMUXER) += $(FATE_CONCAT_DEMUXER_SIMPLE2_LAVF-yes:%=fate-concat-demuxer-simple2-lavf-%)
+
+$(foreach D,$(FATE_CONCAT_DEMUXER_EXTENDED_LAVF-yes),$(eval fate-concat-demuxer-extended-lavf-$(D): ffprobe$(PROGSSUF)$(EXESUF) fate-lavf-$(D)))
+$(foreach D,$(FATE_CONCAT_DEMUXER_EXTENDED_LAVF-yes),$(eval fate-concat-demuxer-extended-lavf-$(D): CMD = concat $(SRC_PATH)/tests/extended.ffconcat ../lavf/lavf.$(D) md5))
+FATE_CONCAT_DEMUXER-$(CONFIG_CONCAT_DEMUXER) += $(FATE_CONCAT_DEMUXER_EXTENDED_LAVF-yes:%=fate-concat-demuxer-extended-lavf-%)
+
+FATE-$(CONFIG_FFPROBE) += $(FATE_CONCAT_DEMUXER-yes)
diff --git a/tests/fate/dca.mak b/tests/fate/dca.mak
new file mode 100644
index 00000000..6a6614df
--- /dev/null
+++ b/tests/fate/dca.mak
@@ -0,0 +1,79 @@
+# dcadec test samples
+DCADEC_SUITE_LOSSLESS_16 = xll_51_16_192_768_0        \
+                           xll_51_16_192_768_1        \
+
+DCADEC_SUITE_LOSSLESS_24 = xll_51_24_48_768           \
+                           xll_51_24_48_none          \
+                           xll_71_24_48_768_0         \
+                           xll_71_24_48_768_1         \
+                           xll_71_24_96_768           \
+                           xll_x96_51_24_96_1509      \
+                           xll_xch_61_24_48_768       \
+
+DCADEC_SUITE_LOSSY       = core_51_24_48_768_0        \
+                           core_51_24_48_768_1        \
+                           x96_51_24_96_1509          \
+                           x96_xch_61_24_96_3840      \
+                           x96_xxch_71_24_96_3840     \
+                           xbr_51_24_48_3840          \
+                           xbr_xch_61_24_48_3840      \
+                           xbr_xxch_71_24_48_3840     \
+                           xch_61_24_48_768           \
+                           xxch_71_24_48_2046         \
+
+define FATE_DCADEC_LOSSLESS_SUITE
+FATE_DCADEC_LOSSLESS += fate-dca-$(1) fate-dca-$(1)-dmix_2 fate-dca-$(1)-dmix_6
+fate-dca-$(1): CMD = framemd5 -i $(TARGET_SAMPLES)/dts/dcadec-suite/$(1).dtshd -c:a pcm_$(2)
+fate-dca-$(1)-dmix_2: CMD = framemd5 -request_channel_layout 0x3   -i $(TARGET_SAMPLES)/dts/dcadec-suite/$(1).dtshd -c:a pcm_$(2)
+fate-dca-$(1)-dmix_6: CMD = framemd5 -request_channel_layout 0x60f -i $(TARGET_SAMPLES)/dts/dcadec-suite/$(1).dtshd -c:a pcm_$(2)
+endef
+
+define FATE_DCADEC_LOSSY_SUITE
+FATE_DCADEC_LOSSY += fate-dca-$(1)
+fate-dca-$(1): CMD = ffmpeg -i $(TARGET_SAMPLES)/dts/dcadec-suite/$(1).dtshd -f f32le -
+fate-dca-$(1): REF = $(SAMPLES)/dts/dcadec-suite/$(1).f32
+endef
+
+$(foreach N,$(DCADEC_SUITE_LOSSLESS_16),$(eval $(call FATE_DCADEC_LOSSLESS_SUITE,$(N),s16le)))
+$(foreach N,$(DCADEC_SUITE_LOSSLESS_24),$(eval $(call FATE_DCADEC_LOSSLESS_SUITE,$(N),s24le)))
+$(foreach N,$(DCADEC_SUITE_LOSSY),$(eval $(call FATE_DCADEC_LOSSY_SUITE,$(N))))
+
+# lossy downmix tests
+FATE_DCADEC_LOSSY += fate-dca-core_51_24_48_768_1-dmix_2
+fate-dca-core_51_24_48_768_1-dmix_2: CMD = ffmpeg -request_channel_layout 0x3 -i $(TARGET_SAMPLES)/dts/dcadec-suite/core_51_24_48_768_1.dtshd -f f32le -
+fate-dca-core_51_24_48_768_1-dmix_2: REF = $(SAMPLES)/dts/dcadec-suite/core_51_24_48_768_1-dmix_2.f32
+
+FATE_DCADEC_LOSSY += fate-dca-x96_xxch_71_24_96_3840-dmix_2
+fate-dca-x96_xxch_71_24_96_3840-dmix_2: CMD = ffmpeg -request_channel_layout 0x3 -i $(TARGET_SAMPLES)/dts/dcadec-suite/x96_xxch_71_24_96_3840.dtshd -f f32le -
+# intentionally uses the dmix_6 reference because the sample does not contain stereo downmix coefficients
+fate-dca-x96_xxch_71_24_96_3840-dmix_2: REF = $(SAMPLES)/dts/dcadec-suite/x96_xxch_71_24_96_3840-dmix_6.f32
+
+FATE_DCADEC_LOSSY += fate-dca-x96_xxch_71_24_96_3840-dmix_6
+fate-dca-x96_xxch_71_24_96_3840-dmix_6: CMD = ffmpeg -request_channel_layout 0x60f -i $(TARGET_SAMPLES)/dts/dcadec-suite/x96_xxch_71_24_96_3840.dtshd -f f32le -
+fate-dca-x96_xxch_71_24_96_3840-dmix_6: REF = $(SAMPLES)/dts/dcadec-suite/x96_xxch_71_24_96_3840-dmix_6.f32
+
+FATE_DCADEC_LOSSY += fate-dca-xch_61_24_48_768-dmix_6
+fate-dca-xch_61_24_48_768-dmix_6: CMD = ffmpeg -request_channel_layout 0x60f -i $(TARGET_SAMPLES)/dts/dcadec-suite/xch_61_24_48_768.dtshd -f f32le -
+fate-dca-xch_61_24_48_768-dmix_6: REF = $(SAMPLES)/dts/dcadec-suite/xch_61_24_48_768-dmix_6.f32
+
+$(FATE_DCADEC_LOSSY): CMP = oneoff
+$(FATE_DCADEC_LOSSY): CMP_UNIT = f32
+$(FATE_DCADEC_LOSSY): FUZZ = 9
+
+FATE_DCA-$(call DEMDEC, DTS, DCA) += $(FATE_DCADEC_LOSSLESS) $(FATE_DCADEC_LOSSY)
+
+FATE_DCA-$(call DEMDEC, MPEGTS, DCA) += fate-dca-core
+fate-dca-core: CMD = pcm -i $(TARGET_SAMPLES)/dts/dts.ts
+fate-dca-core: CMP = oneoff
+fate-dca-core: REF = $(SAMPLES)/dts/dts.pcm
+
+FATE_DCA-$(call DEMDEC, DTS, DCA) += fate-dca-xll
+fate-dca-xll: CMD = md5 -i $(TARGET_SAMPLES)/dts/master_audio_7.1_24bit.dts -f s24le
+
+FATE_DCA-$(call DEMDEC, DTS, DCA) += fate-dts_es
+fate-dts_es: CMD = pcm -i $(TARGET_SAMPLES)/dts/dts_es.dts
+fate-dts_es: CMP = oneoff
+fate-dts_es: REF = $(SAMPLES)/dts/dts_es_2.pcm
+
+FATE_SAMPLES_AUDIO += $(FATE_DCA-yes)
+fate-dca: $(FATE_DCA-yes)
diff --git a/tests/fate/demux.mak b/tests/fate/demux.mak
index 0eccc9b3..e48d398c 100644
--- a/tests/fate/demux.mak
+++ b/tests/fate/demux.mak
@@ -13,6 +13,10 @@ fate-ast: CMD = crc -i $(TARGET_SAMPLES)/ast/demo11_02_partial.ast -c copy
 FATE_SAMPLES_DEMUX-$(CONFIG_BINK_DEMUXER) += fate-bink-demux
 fate-bink-demux: CMD = crc -i $(TARGET_SAMPLES)/bink/Snd0a7d9b58.dee -vn -acodec copy
 
+FATE_SAMPLES_DEMUX-$(CONFIG_BFSTM_DEMUXER) += fate-bfstm fate-bcstm
+fate-bfstm: CMD = crc -i $(TARGET_SAMPLES)/bfstm/spl-forest-day.bfstm -acodec copy
+fate-bcstm: CMD = crc -i $(TARGET_SAMPLES)/bfstm/loz-mm-mikau.bcstm -acodec copy
+
 FATE_SAMPLES_DEMUX-$(CONFIG_BRSTM_DEMUXER) += fate-brstm
 fate-brstm: CMD = crc -i $(TARGET_SAMPLES)/brstm/lozswd_partial.brstm -acodec copy
 
@@ -28,6 +32,9 @@ fate-cine-demux: CMD = crc -i $(TARGET_SAMPLES)/cine/bayer_gbrg8.cine -c copy
 FATE_SAMPLES_DEMUX-$(CONFIG_DAUD_DEMUXER) += fate-d-cinema-demux
 fate-d-cinema-demux: CMD = framecrc -i $(TARGET_SAMPLES)/d-cinema/THX_Science_FLT_1920-partial.302 -acodec copy
 
+FATE_SAMPLES_DEMUX-$(CONFIG_EA_DEMUXER) += fate-d-eavp6-demux
+fate-d-eavp6-demux: CMD = framecrc -i $(TARGET_SAMPLES)/ea-vp6/SmallRing.vp6 -map 0 -vcodec copy
+
 FATE_SAMPLES_DEMUX-$(CONFIG_GIF_DEMUXER) += fate-gif-demux
 fate-gif-demux: CMD = framecrc -i $(TARGET_SAMPLES)/gif/Newtons_cradle_animation_book_2.gif -vcodec copy
 
diff --git a/tests/fate/dnxhd.mak b/tests/fate/dnxhd.mak
new file mode 100644
index 00000000..6d79f3ba
--- /dev/null
+++ b/tests/fate/dnxhd.mak
@@ -0,0 +1,8 @@
+FATE_DNXHD = fate-dnxhd-mbaff     \
+             fate-dnxhr-444
+
+FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, DNXHD) += $(FATE_DNXHD)
+fate-dnxhd: $(FATE_DNXHD) $(FATE_VCODEC_DNXHD)
+
+fate-dnxhd-mbaff: CMD = framecrc -flags +bitexact -idct simple -i $(TARGET_SAMPLES)/dnxhd/dnxhd100_cid1260.mov -pix_fmt yuv422p10le
+fate-dnxhr-444:   CMD = framecrc -flags +bitexact -idct simple -i $(TARGET_SAMPLES)/dnxhd/dnxhr444_cid1270.mov -pix_fmt yuv444p10le
diff --git a/tests/fate/ffmpeg.mak b/tests/fate/ffmpeg.mak
index 6af1081e..3b91c120 100644
--- a/tests/fate/ffmpeg.mak
+++ b/tests/fate/ffmpeg.mak
@@ -1,14 +1,14 @@
 FATE_MAPCHAN-$(CONFIG_CHANNELMAP_FILTER) += fate-mapchan-6ch-extract-2
 fate-mapchan-6ch-extract-2: tests/data/asynth-22050-6.wav
-fate-mapchan-6ch-extract-2: CMD = ffmpeg -i $(TARGET_PATH)/tests/data/asynth-22050-6.wav -map_channel 0.0.0 -flags +bitexact -f wav md5: -map_channel 0.0.1 -flags +bitexact -f wav md5:
+fate-mapchan-6ch-extract-2: CMD = ffmpeg -i $(TARGET_PATH)/tests/data/asynth-22050-6.wav -map_channel 0.0.0 -fflags +bitexact -f wav md5: -map_channel 0.0.1 -fflags +bitexact -f wav md5:
 
 FATE_MAPCHAN-$(CONFIG_CHANNELMAP_FILTER) += fate-mapchan-6ch-extract-2-downmix-mono
 fate-mapchan-6ch-extract-2-downmix-mono: tests/data/asynth-22050-6.wav
-fate-mapchan-6ch-extract-2-downmix-mono: CMD = md5 -i $(TARGET_PATH)/tests/data/asynth-22050-6.wav -map_channel 0.0.1 -map_channel 0.0.0 -ac 1 -flags +bitexact -f wav
+fate-mapchan-6ch-extract-2-downmix-mono: CMD = md5 -i $(TARGET_PATH)/tests/data/asynth-22050-6.wav -map_channel 0.0.1 -map_channel 0.0.0 -ac 1 -fflags +bitexact -f wav
 
 FATE_MAPCHAN-$(CONFIG_CHANNELMAP_FILTER) += fate-mapchan-silent-mono
 fate-mapchan-silent-mono: tests/data/asynth-22050-1.wav
-fate-mapchan-silent-mono: CMD = md5 -i $(TARGET_PATH)/tests/data/asynth-22050-1.wav -map_channel -1 -map_channel 0.0.0 -flags +bitexact -f wav
+fate-mapchan-silent-mono: CMD = md5 -i $(TARGET_PATH)/tests/data/asynth-22050-1.wav -map_channel -1 -map_channel 0.0.0 -fflags +bitexact -f wav
 
 FATE_MAPCHAN = $(FATE_MAPCHAN-yes)
 
@@ -16,10 +16,14 @@ FATE_FFMPEG += $(FATE_MAPCHAN)
 fate-mapchan: $(FATE_MAPCHAN)
 
 FATE_FFMPEG-$(CONFIG_COLOR_FILTER) += fate-ffmpeg-filter_complex
-fate-ffmpeg-filter_complex: CMD = framecrc -filter_complex color=d=1:r=5
+fate-ffmpeg-filter_complex: CMD = framecrc -filter_complex color=d=1:r=5 -fflags +bitexact
+
+FATE_SAMPLES_FFMPEG-$(CONFIG_COLORKEY_FILTER) += fate-ffmpeg-filter_colorkey
+fate-ffmpeg-filter_colorkey: tests/data/filtergraphs/colorkey
+fate-ffmpeg-filter_colorkey: CMD = framecrc -idct simple -fflags +bitexact -flags +bitexact  -sws_flags +accurate_rnd+bitexact -i $(TARGET_SAMPLES)/cavs/cavs.mpg -fflags +bitexact -flags +bitexact -sws_flags +accurate_rnd+bitexact -i $(TARGET_SAMPLES)/lena.pnm -filter_complex_script $(TARGET_PATH)/tests/data/filtergraphs/colorkey -sws_flags +accurate_rnd+bitexact -fflags +bitexact -flags +bitexact -qscale 2 -vframes 10
 
 FATE_FFMPEG-$(CONFIG_COLOR_FILTER) += fate-ffmpeg-lavfi
-fate-ffmpeg-lavfi: CMD = framecrc -lavfi color=d=1:r=5
+fate-ffmpeg-lavfi: CMD = framecrc -lavfi color=d=1:r=5 -fflags +bitexact
 
 FATE_SAMPLES_FFMPEG-$(CONFIG_RAWVIDEO_DEMUXER) += fate-force_key_frames
 fate-force_key_frames: tests/data/vsynth_lena.yuv
@@ -46,3 +50,9 @@ fate-unknown_layout-ac3: $(AREF)
 fate-unknown_layout-ac3: CMD = md5 \
   -guess_layout_max 0 -f s16le -ac 1 -ar 44100 -i $(TARGET_PATH)/$(AREF) \
   -f ac3 -flags +bitexact -c ac3_fixed
+
+FATE_SAMPLES_FFMPEG-$(call DEMMUX, OGG, OGG) += fate-limited_input_seek fate-limited_input_seek-copyts
+fate-limited_input_seek: $(TARGET_SAMPLES)/vorbis/moog_small.ogg
+fate-limited_input_seek: CMD = md5 -ss 1.5 -t 1.3 -i $(TARGET_SAMPLES)/vorbis/moog_small.ogg -c:a copy -fflags +bitexact -f ogg
+fate-limited_input_seek-copyts: $(TARGET_SAMPLES)/vorbis/moog_small.ogg
+fate-limited_input_seek-copyts: CMD = md5 -ss 1.5 -t 1.3 -i $(TARGET_SAMPLES)/vorbis/moog_small.ogg -c:a copy -copyts -fflags +bitexact -f ogg
diff --git a/tests/fate/ffprobe.mak b/tests/fate/ffprobe.mak
index cf201859..d5fb05cd 100644
--- a/tests/fate/ffprobe.mak
+++ b/tests/fate/ffprobe.mak
@@ -1,33 +1,35 @@
 FFPROBE_TEST_FILE=tests/data/ffprobe-test.nut
-FFPROBE_COMMAND=ffprobe$(EXESUF) -show_streams -show_packets -show_format -show_frames -bitexact $(FFPROBE_TEST_FILE)
+FFPROBE_COMMAND=ffprobe$(PROGSSUF)$(EXESUF) -show_streams -show_packets -show_format -show_frames -bitexact $(FFPROBE_TEST_FILE)
 
-FATE_FFPROBE += fate-ffprobe_compact
+FATE_FFPROBE-$(CONFIG_AVDEVICE) += fate-ffprobe_compact
 fate-ffprobe_compact: $(FFPROBE_TEST_FILE)
 fate-ffprobe_compact: CMD = run $(FFPROBE_COMMAND) -of compact
 
-FATE_FFPROBE += fate-ffprobe_csv
+FATE_FFPROBE-$(CONFIG_AVDEVICE) += fate-ffprobe_csv
 fate-ffprobe_csv: $(FFPROBE_TEST_FILE)
 fate-ffprobe_csv: CMD = run $(FFPROBE_COMMAND) -of csv
 
-FATE_FFPROBE += fate-ffprobe_default
+FATE_FFPROBE-$(CONFIG_AVDEVICE) += fate-ffprobe_default
 fate-ffprobe_default: $(FFPROBE_TEST_FILE)
 fate-ffprobe_default: CMD = run $(FFPROBE_COMMAND) -of default
 
-FATE_FFPROBE += fate-ffprobe_flat
+FATE_FFPROBE-$(CONFIG_AVDEVICE) += fate-ffprobe_flat
 fate-ffprobe_flat: $(FFPROBE_TEST_FILE)
 fate-ffprobe_flat: CMD = run $(FFPROBE_COMMAND) -of flat
 
-FATE_FFPROBE += fate-ffprobe_ini
+FATE_FFPROBE-$(CONFIG_AVDEVICE) += fate-ffprobe_ini
 fate-ffprobe_ini: $(FFPROBE_TEST_FILE)
 fate-ffprobe_ini: CMD = run $(FFPROBE_COMMAND) -of ini
 
-FATE_FFPROBE += fate-ffprobe_json
+FATE_FFPROBE-$(CONFIG_AVDEVICE) += fate-ffprobe_json
 fate-ffprobe_json: $(FFPROBE_TEST_FILE)
 fate-ffprobe_json: CMD = run $(FFPROBE_COMMAND) -of json
 
-FATE_FFPROBE += fate-ffprobe_xml
+FATE_FFPROBE-$(CONFIG_AVDEVICE) += fate-ffprobe_xml
 fate-ffprobe_xml: $(FFPROBE_TEST_FILE)
 fate-ffprobe_xml: CMD = run $(FFPROBE_COMMAND) -of xml
 
+FATE_FFPROBE += $(FATE_FFPROBE-yes)
+
 fate-ffprobe: $(FATE_FFPROBE)
 
diff --git a/tests/fate/filter-audio.mak b/tests/fate/filter-audio.mak
index 210b89eb..85a36d2d 100644
--- a/tests/fate/filter-audio.mak
+++ b/tests/fate/filter-audio.mak
@@ -81,7 +81,7 @@ FATE_AFILTER-$(call FILTERDEMDECENCMUX, JOIN, WAV, PCM_S16LE, PCM_S16LE, PCM_S16
 fate-filter-join: SRC1 = $(TARGET_PATH)/tests/data/asynth-44100-2.wav
 fate-filter-join: SRC2 = $(TARGET_PATH)/tests/data/asynth-44100-3.wav
 fate-filter-join: tests/data/asynth-44100-2.wav tests/data/asynth-44100-3.wav
-fate-filter-join: CMD = md5 -i $(SRC1) -i $(SRC2) -filter_complex join=channel_layout=5 -f s16le
+fate-filter-join: CMD = md5 -i $(SRC1) -i $(SRC2) -filter_complex join=channel_layout=5c -f s16le
 fate-filter-join: CMP = oneline
 fate-filter-join: REF = 88b0d24a64717ba8635b29e8dac6ecd8
 
@@ -92,5 +92,9 @@ fate-filter-volume: CMD = md5 -i $(SRC) -af aperms=random,volume=precision=fixed
 fate-filter-volume: CMP = oneline
 fate-filter-volume: REF = 4d6ba75ef3e32d305d066b9bc771d6f4
 
+FATE_AFILTER-yes += fate-filter-formats
+fate-filter-formats: libavfilter/formats-test$(EXESUF)
+fate-filter-formats: CMD = run libavfilter/formats-test
+
 FATE_SAMPLES_AVCONV += $(FATE_AFILTER-yes)
 fate-afilter: $(FATE_AFILTER-yes)
diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
index f502c456..4186996c 100644
--- a/tests/fate/filter-video.mak
+++ b/tests/fate/filter-video.mak
@@ -15,6 +15,14 @@ fate-filter-yadif16: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAM
 
 FATE_FILTER-$(call FILTERDEMDEC, YADIF, MPEGTS, MPEG2VIDEO) += $(FATE_YADIF)
 
+FATE_W3FDIF += fate-filter-w3fdif-simple
+fate-filter-w3fdif-simple: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vframes 30 -vf w3fdif=0
+
+FATE_W3FDIF += fate-filter-w3fdif-complex
+fate-filter-w3fdif-complex: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vframes 30 -vf w3fdif=1
+
+FATE_FILTER-$(call FILTERDEMDEC, W3FDIF, MPEGTS, MPEG2VIDEO) += $(FATE_W3FDIF)
+
 FATE_MCDEINT += fate-filter-mcdeint-fast
 fate-filter-mcdeint-fast: CMD = framecrc -flags bitexact -idct simple -i $(TARGET_SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vframes 30 -vf mcdeint=fast
 
@@ -58,6 +66,15 @@ fate-filter-lavd-life: CMD = framecrc -f lavfi -i life=s=40x40:r=5:seed=42:mold=
 FATE_FILTER-$(call ALLYES, AVDEVICE TESTSRC_FILTER) += fate-filter-lavd-testsrc
 fate-filter-lavd-testsrc: CMD = framecrc -f lavfi -i testsrc=r=7:n=2:d=10
 
+FATE_FILTER-$(call ALLYES, TESTSRC2_FILTER) += fate-filter-testsrc2-yuv420p
+fate-filter-testsrc2-yuv420p: CMD = framecrc -lavfi testsrc2=r=7:d=10 -pix_fmt yuv420p
+
+FATE_FILTER-$(call ALLYES, TESTSRC2_FILTER) += fate-filter-testsrc2-yuv444p
+fate-filter-testsrc2-yuv444p: CMD = framecrc -lavfi testsrc2=r=7:d=10 -pix_fmt yuv444p
+
+FATE_FILTER-$(call ALLYES, TESTSRC2_FILTER) += fate-filter-testsrc2-rgb24
+fate-filter-testsrc2-rgb24: CMD = framecrc -lavfi testsrc2=r=7:d=10 -pix_fmt rgb24
+
 FATE_FILTER-$(call ALLYES, AVDEVICE TESTSRC_FILTER FORMAT_FILTER CONCAT_FILTER SCALE_FILTER) += fate-filter-lavd-scalenorm
 fate-filter-lavd-scalenorm: tests/data/filtergraphs/scalenorm
 fate-filter-lavd-scalenorm: CMD = framecrc -f lavfi -graph_file $(TARGET_PATH)/tests/data/filtergraphs/scalenorm -i dummy
@@ -103,8 +120,39 @@ fate-filter-negate: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf perms=random,negate
 FATE_FILTER_VSYNTH-$(CONFIG_HISTOGRAM_FILTER) += fate-filter-histogram-levels
 fate-filter-histogram-levels: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf histogram -flags +bitexact -sws_flags +accurate_rnd+bitexact
 
-FATE_FILTER_VSYNTH-$(CONFIG_HISTOGRAM_FILTER) += fate-filter-histogram-waveform
-fate-filter-histogram-waveform: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf format=yuv444p,histogram=mode=waveform -flags +bitexact -sws_flags +accurate_rnd+bitexact
+FATE_FILTER_VSYNTH-$(CONFIG_WAVEFORM_FILTER) += fate-filter-waveform_column
+fate-filter-waveform_column: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf waveform -flags +bitexact -sws_flags +accurate_rnd+bitexact
+
+FATE_FILTER_VSYNTH-$(CONFIG_WAVEFORM_FILTER) += fate-filter-waveform_row
+fate-filter-waveform_row: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf waveform=m=row -flags +bitexact -sws_flags +accurate_rnd+bitexact
+
+FATE_FILTER_VSYNTH-$(CONFIG_WAVEFORM_FILTER) += fate-filter-waveform_envelope
+fate-filter-waveform_envelope: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf waveform=e=3 -flags +bitexact -sws_flags +accurate_rnd+bitexact
+
+FATE_FILTER_VSYNTH-$(CONFIG_WAVEFORM_FILTER) += fate-filter-waveform_uv
+fate-filter-waveform_uv: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf waveform=c=6 -flags +bitexact -sws_flags +accurate_rnd+bitexact
+
+FATE_FILTER_VSYNTH-$(CONFIG_VECTORSCOPE_FILTER) += fate-filter-vectorscope_gray
+fate-filter-vectorscope_gray: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf vectorscope=gray -sws_flags +accurate_rnd+bitexact -vframes 3
+
+FATE_FILTER_VSYNTH-$(CONFIG_VECTORSCOPE_FILTER) += fate-filter-vectorscope_color
+fate-filter-vectorscope_color: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf vectorscope=color -sws_flags +accurate_rnd+bitexact -vframes 3
+
+FATE_FILTER_VSYNTH-$(CONFIG_VECTORSCOPE_FILTER) += fate-filter-vectorscope_color2
+fate-filter-vectorscope_color2: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf vectorscope=color2 -sws_flags +accurate_rnd+bitexact -vframes 3
+
+FATE_FILTER_VSYNTH-$(CONFIG_VECTORSCOPE_FILTER) += fate-filter-vectorscope_color3
+fate-filter-vectorscope_color3: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf vectorscope=color3 -sws_flags +accurate_rnd+bitexact -vframes 3
+
+FATE_FILTER_VSYNTH-$(CONFIG_VECTORSCOPE_FILTER) += fate-filter-vectorscope_color4
+fate-filter-vectorscope_color4: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf vectorscope=color4 -sws_flags +accurate_rnd+bitexact -vframes 3
+
+FATE_FILTER_VSYNTH-$(CONFIG_VECTORSCOPE_FILTER) += fate-filter-vectorscope_xy
+fate-filter-vectorscope_xy: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf vectorscope=x=0:y=1 -sws_flags +accurate_rnd+bitexact -vframes 3
+
+FATE_FILTER_VSYNTH-$(CONFIG_MERGEPLANES_FILTER) += fate-filter-mergeplanes
+fate-filter-mergeplanes: tests/data/filtergraphs/mergeplanes
+fate-filter-mergeplanes: CMD = framecrc -c:v pgmyuv -i $(SRC) -c:v pgmyuv -i $(SRC) -filter_complex_script $(TARGET_PATH)/tests/data/filtergraphs/mergeplanes
 
 FATE_FILTER_VSYNTH-$(CONFIG_OVERLAY_FILTER) += fate-filter-overlay
 fate-filter-overlay: tests/data/filtergraphs/overlay
@@ -129,6 +177,84 @@ fate-filter-overlay_yuv444: CMD = framecrc -c:v pgmyuv -i $(SRC) -filter_complex
 FATE_FILTER_VSYNTH-$(CONFIG_PHASE_FILTER) += fate-filter-phase
 fate-filter-phase: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf phase
 
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-00
+fate-filter-removegrain-mode-00: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=0:0:0
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-01
+fate-filter-removegrain-mode-01: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=1:1:1
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-02
+fate-filter-removegrain-mode-02: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=2:2:2
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-03
+fate-filter-removegrain-mode-03: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=3:3:3
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-04
+fate-filter-removegrain-mode-04: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=4:4:4
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-05
+fate-filter-removegrain-mode-05: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=5:5:5
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-06
+fate-filter-removegrain-mode-06: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=6:6:6
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-07
+fate-filter-removegrain-mode-07: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=7:7:7
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-08
+fate-filter-removegrain-mode-08: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=8:8:8
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-09
+fate-filter-removegrain-mode-09: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=9:9:9
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-10
+fate-filter-removegrain-mode-10: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=10:10:10
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-11
+fate-filter-removegrain-mode-11: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=11:11:11
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-12
+fate-filter-removegrain-mode-12: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=12:12:12
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-13
+fate-filter-removegrain-mode-13: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=13:13:13
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-14
+fate-filter-removegrain-mode-14: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=14:14:14
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-15
+fate-filter-removegrain-mode-15: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=15:15:15
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-16
+fate-filter-removegrain-mode-16: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=16:16:16
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-17
+fate-filter-removegrain-mode-17: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=17:17:17
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-18
+fate-filter-removegrain-mode-18: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=18:18:18
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-19
+fate-filter-removegrain-mode-19: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=19:19:19
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-20
+fate-filter-removegrain-mode-20: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=20:20:20
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-21
+fate-filter-removegrain-mode-21: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=21:21:21
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-22
+fate-filter-removegrain-mode-22: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=22:22:22
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-23
+fate-filter-removegrain-mode-23: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=23:23:23
+
+FATE_REMOVEGRAIN += fate-filter-removegrain-mode-24
+fate-filter-removegrain-mode-24: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 1 -vf removegrain=24:24:24
+
+fate-filter-removegrain: $(FATE_REMOVEGRAIN)
+FATE_FILTER_VSYNTH-$(CONFIG_REMOVEGRAIN_FILTER) += $(FATE_REMOVEGRAIN)
+
 FATE_FILTER_VSYNTH-$(CONFIG_SEPARATEFIELDS_FILTER) += fate-filter-separatefields
 fate-filter-separatefields: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf separatefields
 
@@ -148,6 +274,9 @@ fate-filter-shuffleplanes-swapuv: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf shuff
 
 FATE_FILTER_VSYNTH-$(CONFIG_SHUFFLEPLANES_FILTER) += $(FATE_SHUFFLEPLANES)
 
+FATE_FILTER_VSYNTH-$(CONFIG_TBLEND_FILTER) += fate-filter-tblend
+fate-filter-tblend: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf tblend=all_mode=difference128
+
 FATE_FILTER_VSYNTH-$(CONFIG_TELECINE_FILTER) += fate-filter-telecine
 fate-filter-telecine: CMD = framecrc -c:v pgmyuv -i $(SRC) -vf telecine
 
@@ -200,6 +329,9 @@ FATE_FILTER-$(call ALLYES, TESTSRC_FILTER SINE_FILTER CONCAT_FILTER) += fate-fil
 fate-filter-concat: tests/data/filtergraphs/concat
 fate-filter-concat: CMD = framecrc -filter_complex_script $(TARGET_PATH)/tests/data/filtergraphs/concat
 
+FATE_FILTER-$(call ALLYES, TESTSRC2_FILTER FPS_FILTER MPDECIMATE_FILTER) += fate-filter-mpdecimate
+fate-filter-mpdecimate: CMD = framecrc -lavfi testsrc2=r=2:d=10,fps=3,mpdecimate -r 3 -pix_fmt yuv420p
+
 FATE_FILTER_VSYNTH-$(call ALLYES, FORMAT_FILTER SPLIT_FILTER ALPHAEXTRACT_FILTER ALPHAMERGE_FILTER) += fate-filter-alphaextract_alphamerge_rgb
 fate-filter-alphaextract_alphamerge_rgb: tests/data/filtergraphs/alphamerge_alphaextract_rgb
 fate-filter-alphaextract_alphamerge_rgb: CMD = framecrc -c:v pgmyuv -i $(SRC) -filter_complex_script $(TARGET_PATH)/tests/data/filtergraphs/alphamerge_alphaextract_rgb
@@ -229,6 +361,10 @@ fate-filter-scale200: CMD = video_filter "scale=w=200:h=200"
 FATE_FILTER_VSYNTH-$(CONFIG_SCALE_FILTER) += fate-filter-scale500
 fate-filter-scale500: CMD = video_filter "scale=w=500:h=500"
 
+FATE_FILTER_VSYNTH-$(CONFIG_SCALE_FILTER) += fate-filter-scalechroma
+fate-filter-scalechroma: tests/data/vsynth1.yuv
+fate-filter-scalechroma: CMD = framecrc -flags bitexact -s 352x288 -pix_fmt yuv444p -i tests/data/vsynth1.yuv -pix_fmt yuv420p -sws_flags +bitexact -vf scale=out_v_chr_pos=33:out_h_chr_pos=151
+
 FATE_FILTER_VSYNTH-$(CONFIG_VFLIP_FILTER) += fate-filter-vflip
 fate-filter-vflip: CMD = video_filter "vflip"
 
@@ -307,6 +443,49 @@ fate-filter-stereo3d-sbsl-al: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -f
 FATE_STEREO3D += fate-filter-stereo3d-sbsl-sbsr
 fate-filter-stereo3d-sbsl-sbsr: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:sbsr
 
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-agmc
+fate-filter-stereo3d-sbsl-agmc: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:agmc
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-agmd
+fate-filter-stereo3d-sbsl-agmd: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:agmd
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-agmg
+fate-filter-stereo3d-sbsl-agmg: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:agmg
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-agmh
+fate-filter-stereo3d-sbsl-agmh: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:agmh
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-arbg
+fate-filter-stereo3d-sbsl-arbg: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:arbg
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-arcc
+fate-filter-stereo3d-sbsl-arcc: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:arcc
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-arcd
+fate-filter-stereo3d-sbsl-arcd: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:arcd
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-arcg
+fate-filter-stereo3d-sbsl-arcg: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:arcg
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-arch
+fate-filter-stereo3d-sbsl-arch: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:arch
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-argg
+fate-filter-stereo3d-sbsl-argg: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:argg
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-aybc
+fate-filter-stereo3d-sbsl-aybc: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:aybc
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-aybd
+fate-filter-stereo3d-sbsl-aybd: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:aybd
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-aybg
+fate-filter-stereo3d-sbsl-aybg: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:aybg
+
+FATE_STEREO3D += fate-filter-stereo3d-sbsl-aybh
+fate-filter-stereo3d-sbsl-aybh: CMD = framecrc -c:v pgmyuv -i $(SRC) -vframes 5 -flags +bitexact -sws_flags +accurate_rnd+bitexact -vf stereo3d=sbsl:aybh
+
+fate-filter-stereo3d: $(FATE_STEREO3D)
 FATE_FILTER_VSYNTH-$(CONFIG_STEREO3D_FILTER) += $(FATE_STEREO3D)
 
 FATE_FILTER_VSYNTH-$(CONFIG_THUMBNAIL_FILTER) += fate-filter-thumbnail
@@ -317,7 +496,7 @@ fate-filter-tile: CMD = video_filter "tile=3x3:nb_frames=5:padding=7:margin=2"
 
 
 tests/pixfmts.mak: TAG = GEN
-tests/pixfmts.mak: ffmpeg$(EXESUF)
+tests/pixfmts.mak: ffmpeg$(PROGSSUF)$(EXESUF)
 	$(M)printf "PIXFMTS = " > $@
 	$(Q)$(TARGET_EXEC) $(TARGET_PATH)/$< -pix_fmts list 2> /dev/null | awk 'NR > 8 && /^IO/ { printf $$2 " " }' >> $@
 	$(Q)printf "\n" >> $@
@@ -412,7 +591,7 @@ FATE_AVCONV-$(call DEMDEC, IMAGE2, PGMYUV) += $(FATE_FILTER_VSYNTH-yes)
 #
 # Metadata tests
 #
-FILTER_METADATA_COMMAND = ffprobe$(EXESUF) -of compact=p=0 -show_entries frame=pkt_pts:frame_tags -bitexact -f lavfi
+FILTER_METADATA_COMMAND = ffprobe$(PROGSSUF)$(EXESUF) -of compact=p=0 -show_entries frame=pkt_pts:frame_tags -bitexact -f lavfi
 
 SCENEDETECT_DEPS = FFPROBE LAVFI_INDEV MOVIE_FILTER SELECT_FILTER SCALE_FILTER \
                    AVCODEC AVDEVICE MOV_DEMUXER SVQ3_DECODER ZLIB
diff --git a/tests/fate/flac.mak b/tests/fate/flac.mak
index 4a13404d..115cc965 100644
--- a/tests/fate/flac.mak
+++ b/tests/fate/flac.mak
@@ -6,6 +6,7 @@ FATE_FLAC += fate-flac-16-chmode-indep                                  \
              fate-flac-16-lpc-cholesky                                  \
              fate-flac-16-lpc-levinson                                  \
              fate-flac-24-comp-8                                        \
+             fate-flac-rice-params                                      \
 
 fate-flac-16-chmode-%: OPTS = -ch_mode $(@:fate-flac-16-chmode-%=%)
 fate-flac-16-fixed:    OPTS = -lpc_type fixed
@@ -19,6 +20,9 @@ fate-flac-24-comp-%: OPTS = -compression_level $(@:fate-flac-24-comp-%=%)
 fate-flac-24-%: REF = $(SAMPLES)/audio-reference/divertimenti_2ch_96kHz_s24.wav
 fate-flac-24-%: CMD = enc_dec_pcm flac wav s24le $(subst $(SAMPLES),$(TARGET_SAMPLES),$(REF)) -c flac $(OPTS)
 
+fate-flac-rice-params: REF = $(SAMPLES)/audio-reference/chorusnoise_2ch_44kHz_s16.wav
+fate-flac-rice-params: CMD = enc_dec_pcm flac wav s16le $(subst $(SAMPLES),$(TARGET_SAMPLES),$(REF)) -c flac
+
 fate-flac-%: CMP = oneoff
 fate-flac-%: FUZZ = 0
 
diff --git a/tests/fate/image.mak b/tests/fate/image.mak
index 6f5e4cb2..2224e3e5 100644
--- a/tests/fate/image.mak
+++ b/tests/fate/image.mak
@@ -30,6 +30,20 @@ fate-brenderpix: $(FATE_BRENDERPIX-yes)
 FATE_IMAGE-$(call PARSERDEMDEC, BMP, IMAGE2PIPE, BMP) += fate-bmpparser
 fate-bmpparser: CMD = framecrc -f image2pipe -i $(TARGET_SAMPLES)/bmp/numbers.bmp -pix_fmt rgb24
 
+define FATE_IMGSUITE_DDS
+FATE_DDS += fate-dds-$(1)
+fate-dds-$(1): CMD = framecrc -i $(TARGET_SAMPLES)/dds/fate_$(1).dds $(DDS_OPTS_$(1))
+endef
+
+DDS_OPTS_pal     = -sws_flags +accurate_rnd+bitexact -pix_fmt rgba
+DDS_OPTS_pal-ati = -sws_flags +accurate_rnd+bitexact -pix_fmt rgba
+DDS_FMT = argb argb-aexp dx10-bc1 dx10-bc1a dx10-bc2 dx10-bc3 dx10-bc4 dx10-bc5 dxt1 dxt1a dxt1-normalmap dxt2 dxt3 dxt4 dxt5 dxt5-aexp dxt5-normalmap dxt5-normalmap-ati dxt5-rbxg dxt5-rgxb dxt5-rxbg dxt5-rxgb dxt5-xgbr dxt5-xgxr dxt5-xrbg dxt5-ycocg dxt5-ycocg-scaled pal pal-ati rgb16 rgb24 rgba rgtc1s rgtc1u rgtc2s rgtc2u rgtc2u-xy uyvy xbgr xrgb y ya ycocg yuyv
+$(foreach FMT,$(DDS_FMT),$(eval $(call FATE_IMGSUITE_DDS,$(FMT))))
+
+FATE_DDS-$(call DEMDEC, IMAGE2, DDS) += $(FATE_DDS)
+FATE_IMAGE += $(FATE_DDS-yes)
+fate-dds: $(FATE_DDS-yes)
+
 FATE_IMAGE-$(call DEMDEC, IMAGE2, DPX) += fate-dpx
 fate-dpx: CMD = framecrc -i $(TARGET_SAMPLES)/dpx/lighthouse_rgb48.dpx
 
@@ -80,36 +94,17 @@ fate-png: $(FATE_PNG-yes)
 FATE_IMAGE-$(call DEMDEC, IMAGE2, PTX) += fate-ptx
 fate-ptx: CMD = framecrc -i $(TARGET_SAMPLES)/ptx/_113kw_pic.ptx -pix_fmt rgb24
 
-FATE_SGI += fate-sgi-gray
-fate-sgi-gray: CMD = framecrc -i $(TARGET_SAMPLES)/sgi/lena_gray.sgi -pix_fmt gray
-
-FATE_SGI += fate-sgi-gray16
-fate-sgi-gray16: CMD = framecrc -i $(TARGET_SAMPLES)/sgi/lena_gray16.sgi -pix_fmt gray16le
-
-FATE_SGI += fate-sgi-rgb24
-fate-sgi-rgb24: CMD = framecrc -i $(TARGET_SAMPLES)/sgi/lena_rgb24.sgi -pix_fmt rgb24
-
-FATE_SGI += fate-sgi-rgb24-rle
-fate-sgi-rgb24-rle: CMD = framecrc -i $(TARGET_SAMPLES)/sgi/uvmap_rgb24_rle.sgi -pix_fmt rgb24
-
-FATE_SGI += fate-sgi-rgb48
-fate-sgi-rgb48: CMD = framecrc -i $(TARGET_SAMPLES)/sgi/lena_rgb48.sgi -pix_fmt rgb48be
-
-FATE_SGI += fate-sgi-rgb48-rle
-fate-sgi-rgb48-rle: CMD = framecrc -i $(TARGET_SAMPLES)/sgi/uvmap_rgb48_rle.sgi -pix_fmt rgb48be
-
-FATE_SGI += fate-sgi-rgba
-fate-sgi-rgba: CMD = framecrc -i $(TARGET_SAMPLES)/sgi/lena_rgba.sgi -pix_fmt rgba
-
-FATE_SGI += fate-sgi-rgba64
-fate-sgi-rgba64: CMD = framecrc -i $(TARGET_SAMPLES)/sgi/lena_rgba64.sgi -pix_fmt rgba64be
+define FATE_IMGSUITE_SGI
+FATE_SGI += fate-sgi-$(1) fate-sgi-$(1)-rle
+fate-sgi-$(1): CMD = framecrc -i $(TARGET_SAMPLES)/sgi/libav_$(1).sgi -sws_flags +accurate_rnd+bitexact
+fate-sgi-$(1)-rle: CMD = framecrc -i $(TARGET_SAMPLES)/sgi/libav_$(1)_rle.sgi -sws_flags +accurate_rnd+bitexact
+endef
 
-FATE_SGI += fate-sgi-rgba64-rle
-fate-sgi-rgba64-rle: CMD = framecrc -i $(TARGET_SAMPLES)/sgi/maya_rgba64_rle.sgi -pix_fmt rgba64be
+SGI_COLORSPACES = gray8 gray16 rgb24 rgb48 rgba rgba64
+$(foreach CLSP,$(SGI_COLORSPACES),$(eval $(call FATE_IMGSUITE_SGI,$(CLSP))))
 
 FATE_SGI-$(call DEMDEC, IMAGE2, SGI) += $(FATE_SGI)
-
-FATE_SAMPLES_AVCONV += $(FATE_SGI-yes)
+FATE_IMAGE += $(FATE_SGI-yes)
 fate-sgi: $(FATE_SGI-yes)
 
 FATE_SUNRASTER += fate-sunraster-1bit-raw
diff --git a/tests/fate/libavcodec.mak b/tests/fate/libavcodec.mak
index ae8ef478..22c08394 100644
--- a/tests/fate/libavcodec.mak
+++ b/tests/fate/libavcodec.mak
@@ -34,11 +34,10 @@ fate-mathops: CMD = run libavcodec/mathops-test
 fate-mathops: CMP = null
 fate-mathops: REF = /dev/null
 
-FATE_LIBAVCODEC-$(call ENCDEC, FLAC, FLAC) += fate-api-flac
-fate-api-flac: libavcodec/api-flac-test$(EXESUF)
-fate-api-flac: CMD = run libavcodec/api-flac-test
-fate-api-flac: CMP = null
-fate-api-flac: REF = /dev/null
+FATE_LIBAVCODEC-$(CONFIG_JPEG2000_ENCODER) += fate-j2k-dwt
+fate-j2k-dwt: libavcodec/jpeg2000dwt-test$(EXESUF)
+fate-j2k-dwt: CMD = run libavcodec/jpeg2000dwt-test
+
 
 FATE-$(CONFIG_AVCODEC) += $(FATE_LIBAVCODEC-yes)
 fate-libavcodec: $(FATE_LIBAVCODEC-yes)
diff --git a/tests/fate/libavformat.mak b/tests/fate/libavformat.mak
index a9c02bcb..1d6aa863 100644
--- a/tests/fate/libavformat.mak
+++ b/tests/fate/libavformat.mak
@@ -1,3 +1,7 @@
+FATE_LIBAVFORMAT-$(HAVE_PTHREADS) += fate-async
+fate-async: libavformat/async-test$(EXESUF)
+fate-async: CMD = run libavformat/async-test
+
 FATE_LIBAVFORMAT-$(CONFIG_NETWORK) += fate-noproxy
 fate-noproxy: libavformat/noproxy-test$(EXESUF)
 fate-noproxy: CMD = run libavformat/noproxy-test
@@ -14,5 +18,9 @@ FATE_LIBAVFORMAT-yes += fate-url
 fate-url: libavformat/url-test$(EXESUF)
 fate-url: CMD = run libavformat/url-test
 
+FATE_LIBAVFORMAT-$(CONFIG_MOV_MUXER) += fate-movenc
+fate-movenc: libavformat/movenc-test$(EXESUF)
+fate-movenc: CMD = run libavformat/movenc-test
+
 FATE-$(CONFIG_AVFORMAT) += $(FATE_LIBAVFORMAT-yes)
 fate-libavformat: $(FATE_LIBAVFORMAT)
diff --git a/tests/fate/libavresample.mak b/tests/fate/libavresample.mak
index c854a477..65c0898d 100644
--- a/tests/fate/libavresample.mak
+++ b/tests/fate/libavresample.mak
@@ -38,7 +38,7 @@ fate-lavr-resample-$(3)-$(1)-$(2): CMD = ffmpeg -i $(TARGET_PATH)/tests/data/asy
 fate-lavr-resample-$(3)-$(1)-$(2): CMP = oneoff
 fate-lavr-resample-$(3)-$(1)-$(2): CMP_UNIT = $(5)
 fate-lavr-resample-$(3)-$(1)-$(2): FUZZ = 6
-fate-lavr-resample-$(3)-$(1)-$(2): REF = $(SAMPLES)/lavr/lavr-resample-$(3)-$(1)-$(2)-v2
+fate-lavr-resample-$(3)-$(1)-$(2): REF = $(SAMPLES)/lavr/lavr-resample-$(3)-$(1)-$(2)-v3
 endef
 
 $(call CROSS_TEST,$(SAMPLERATES),RESAMPLE,s16p,s16le,s16)
diff --git a/tests/fate/libavutil.mak b/tests/fate/libavutil.mak
index ff052e06..022ae6ad 100644
--- a/tests/fate/libavutil.mak
+++ b/tests/fate/libavutil.mak
@@ -121,6 +121,10 @@ FATE_LIBAVUTIL += fate-xtea
 fate-xtea: libavutil/xtea-test$(EXESUF)
 fate-xtea: CMD = run libavutil/xtea-test
 
+FATE_LIBAVUTIL += fate-tea
+fate-tea: libavutil/tea-test$(EXESUF)
+fate-tea: CMD = run libavutil/tea-test
+
 FATE_LIBAVUTIL += fate-opt
 fate-opt: libavutil/opt-test$(EXESUF)
 fate-opt: CMD = run libavutil/opt-test
diff --git a/tests/fate/libswresample.mak b/tests/fate/libswresample.mak
index 24b7d665..064c0f61 100644
--- a/tests/fate/libswresample.mak
+++ b/tests/fate/libswresample.mak
@@ -26,124 +26,124 @@ endef
 #you can use this if you need to update it!
 #make -k  `make fate-list | grep swr` | egrep 'TEST|stddev' | tr '\n' '@' | sed 's#TEST *\([^@]*\)@stddev: *\([0-9.]*\)[^b@]*bytes: *\([0-9]*\) */ *\([0-9]*\)@#fate-\1: CMP_TARGET = \2@fate-\1: SIZE_TOLERANCE = \3 - \4@@#g' | tr '@' '\n'
 
-fate-swr-resample-dblp-2626-44100: CMP_TARGET = 1393.01
+fate-swr-resample-dblp-2626-44100: CMP_TARGET = 1352.68
 fate-swr-resample-dblp-2626-44100: SIZE_TOLERANCE = 31512 - 20480
 
-fate-swr-resample-dblp-2626-48000: CMP_TARGET = 1393.01
+fate-swr-resample-dblp-2626-48000: CMP_TARGET = 1352.65
 fate-swr-resample-dblp-2626-48000: SIZE_TOLERANCE = 31512 - 20480
 
-fate-swr-resample-dblp-2626-8000: CMP_TARGET = 1393.90
+fate-swr-resample-dblp-2626-8000: CMP_TARGET = 1353.08
 fate-swr-resample-dblp-2626-8000: SIZE_TOLERANCE = 31512 - 20482
 
-fate-swr-resample-dblp-2626-96000: CMP_TARGET = 1393.01
+fate-swr-resample-dblp-2626-96000: CMP_TARGET = 1352.67
 fate-swr-resample-dblp-2626-96000: SIZE_TOLERANCE = 31512 - 20480
 
-fate-swr-resample-dblp-44100-2626: CMP_TARGET = 185.84
+fate-swr-resample-dblp-44100-2626: CMP_TARGET = 185.82
 fate-swr-resample-dblp-44100-2626: SIZE_TOLERANCE = 529200 - 20490
 
 fate-swr-resample-dblp-44100-48000: CMP_TARGET = 9.70
 fate-swr-resample-dblp-44100-48000: SIZE_TOLERANCE = 529200 - 20482
 
-fate-swr-resample-dblp-44100-8000: CMP_TARGET = 75.46
+fate-swr-resample-dblp-44100-8000: CMP_TARGET = 75.45
 fate-swr-resample-dblp-44100-8000: SIZE_TOLERANCE = 529200 - 20486
 
 fate-swr-resample-dblp-44100-96000: CMP_TARGET = 11.47
 fate-swr-resample-dblp-44100-96000: SIZE_TOLERANCE = 529200 - 20482
 
-fate-swr-resample-dblp-48000-2626: CMP_TARGET = 456.55
+fate-swr-resample-dblp-48000-2626: CMP_TARGET = 456.51
 fate-swr-resample-dblp-48000-2626: SIZE_TOLERANCE = 576000 - 20510
 
-fate-swr-resample-dblp-48000-44100: CMP_TARGET = 1.16
+fate-swr-resample-dblp-48000-44100: CMP_TARGET = 1.02
 fate-swr-resample-dblp-48000-44100: SIZE_TOLERANCE = 576000 - 20480
 
-fate-swr-resample-dblp-48000-8000: CMP_TARGET = 62.41
+fate-swr-resample-dblp-48000-8000: CMP_TARGET = 62.38
 fate-swr-resample-dblp-48000-8000: SIZE_TOLERANCE = 576000 - 20484
 
 fate-swr-resample-dblp-48000-96000: CMP_TARGET = 0.47
 fate-swr-resample-dblp-48000-96000: SIZE_TOLERANCE = 576000 - 20480
 
-fate-swr-resample-dblp-8000-2626: CMP_TARGET = 2506.01
+fate-swr-resample-dblp-8000-2626: CMP_TARGET = 2506.02
 fate-swr-resample-dblp-8000-2626: SIZE_TOLERANCE = 96000 - 20486
 
 fate-swr-resample-dblp-8000-44100: CMP_TARGET = 15.09
 fate-swr-resample-dblp-8000-44100: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample-dblp-8000-48000: CMP_TARGET = 14.68
+fate-swr-resample-dblp-8000-48000: CMP_TARGET = 14.69
 fate-swr-resample-dblp-8000-48000: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample-dblp-8000-96000: CMP_TARGET = 13.82
+fate-swr-resample-dblp-8000-96000: CMP_TARGET = 13.81
 fate-swr-resample-dblp-8000-96000: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample-dblp-96000-2626: CMP_TARGET = 675.14
+fate-swr-resample-dblp-96000-2626: CMP_TARGET = 675.08
 fate-swr-resample-dblp-96000-2626: SIZE_TOLERANCE = 1152000 - 20474
 
-fate-swr-resample-dblp-96000-44100: CMP_TARGET = 1.58
+fate-swr-resample-dblp-96000-44100: CMP_TARGET = 1.45
 fate-swr-resample-dblp-96000-44100: SIZE_TOLERANCE = 1152000 - 20480
 
-fate-swr-resample-dblp-96000-48000: CMP_TARGET = 1.04
+fate-swr-resample-dblp-96000-48000: CMP_TARGET = 1.00
 fate-swr-resample-dblp-96000-48000: SIZE_TOLERANCE = 1152000 - 20480
 
-fate-swr-resample-dblp-96000-8000: CMP_TARGET = 58.60
+fate-swr-resample-dblp-96000-8000: CMP_TARGET = 58.57
 fate-swr-resample-dblp-96000-8000: SIZE_TOLERANCE = 1152000 - 20496
 
-fate-swr-resample-fltp-2626-44100: CMP_TARGET = 1393.01
+fate-swr-resample-fltp-2626-44100: CMP_TARGET = 1352.68
 fate-swr-resample-fltp-2626-44100: SIZE_TOLERANCE = 31512 - 20480
 
-fate-swr-resample-fltp-2626-48000: CMP_TARGET = 1393.01
+fate-swr-resample-fltp-2626-48000: CMP_TARGET = 1352.65
 fate-swr-resample-fltp-2626-48000: SIZE_TOLERANCE = 31512 - 20480
 
-fate-swr-resample-fltp-2626-8000: CMP_TARGET = 1393.90
+fate-swr-resample-fltp-2626-8000: CMP_TARGET = 1353.08
 fate-swr-resample-fltp-2626-8000: SIZE_TOLERANCE = 31512 - 20482
 
-fate-swr-resample-fltp-2626-96000: CMP_TARGET = 1393.01
+fate-swr-resample-fltp-2626-96000: CMP_TARGET = 1352.67
 fate-swr-resample-fltp-2626-96000: SIZE_TOLERANCE = 31512 - 20480
 
-fate-swr-resample-fltp-44100-2626: CMP_TARGET = 185.84
+fate-swr-resample-fltp-44100-2626: CMP_TARGET = 185.82
 fate-swr-resample-fltp-44100-2626: SIZE_TOLERANCE = 529200 - 20490
 
 fate-swr-resample-fltp-44100-48000: CMP_TARGET = 9.70
 fate-swr-resample-fltp-44100-48000: SIZE_TOLERANCE = 529200 - 20482
 
-fate-swr-resample-fltp-44100-8000: CMP_TARGET = 75.46
+fate-swr-resample-fltp-44100-8000: CMP_TARGET = 75.45
 fate-swr-resample-fltp-44100-8000: SIZE_TOLERANCE = 529200 - 20486
 
 fate-swr-resample-fltp-44100-96000: CMP_TARGET = 11.47
 fate-swr-resample-fltp-44100-96000: SIZE_TOLERANCE = 529200 - 20482
 
-fate-swr-resample-fltp-48000-2626: CMP_TARGET = 456.55
+fate-swr-resample-fltp-48000-2626: CMP_TARGET = 456.51
 fate-swr-resample-fltp-48000-2626: SIZE_TOLERANCE = 576000 - 20510
 
-fate-swr-resample-fltp-48000-44100: CMP_TARGET = 1.16
+fate-swr-resample-fltp-48000-44100: CMP_TARGET = 1.02
 fate-swr-resample-fltp-48000-44100: SIZE_TOLERANCE = 576000 - 20480
 
-fate-swr-resample-fltp-48000-8000: CMP_TARGET = 62.41
+fate-swr-resample-fltp-48000-8000: CMP_TARGET = 62.38
 fate-swr-resample-fltp-48000-8000: SIZE_TOLERANCE = 576000 - 20484
 
 fate-swr-resample-fltp-48000-96000: CMP_TARGET = 0.47
 fate-swr-resample-fltp-48000-96000: SIZE_TOLERANCE = 576000 - 20480
 
-fate-swr-resample-fltp-8000-2626: CMP_TARGET = 2506.01
+fate-swr-resample-fltp-8000-2626: CMP_TARGET = 2506.02
 fate-swr-resample-fltp-8000-2626: SIZE_TOLERANCE = 96000 - 20486
 
 fate-swr-resample-fltp-8000-44100: CMP_TARGET = 15.09
 fate-swr-resample-fltp-8000-44100: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample-fltp-8000-48000: CMP_TARGET = 14.68
+fate-swr-resample-fltp-8000-48000: CMP_TARGET = 14.69
 fate-swr-resample-fltp-8000-48000: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample-fltp-8000-96000: CMP_TARGET = 13.82
+fate-swr-resample-fltp-8000-96000: CMP_TARGET = 13.81
 fate-swr-resample-fltp-8000-96000: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample-fltp-96000-2626: CMP_TARGET = 675.14
+fate-swr-resample-fltp-96000-2626: CMP_TARGET = 675.08
 fate-swr-resample-fltp-96000-2626: SIZE_TOLERANCE = 1152000 - 20474
 
-fate-swr-resample-fltp-96000-44100: CMP_TARGET = 1.58
+fate-swr-resample-fltp-96000-44100: CMP_TARGET = 1.45
 fate-swr-resample-fltp-96000-44100: SIZE_TOLERANCE = 1152000 - 20480
 
-fate-swr-resample-fltp-96000-48000: CMP_TARGET = 1.04
+fate-swr-resample-fltp-96000-48000: CMP_TARGET = 1.00
 fate-swr-resample-fltp-96000-48000: SIZE_TOLERANCE = 1152000 - 20480
 
-fate-swr-resample-fltp-96000-8000: CMP_TARGET = 58.60
+fate-swr-resample-fltp-96000-8000: CMP_TARGET = 58.57
 fate-swr-resample-fltp-96000-8000: SIZE_TOLERANCE = 1152000 - 20496
 
 fate-swr-resample-s16p-2626-44100: CMP_TARGET = 1393.01
@@ -209,63 +209,64 @@ fate-swr-resample-s16p-96000-8000: SIZE_TOLERANCE = 1152000 - 20496
 fate-swr-resample-s32p-2626-44100: CMP_TARGET = 1393.01
 fate-swr-resample-s32p-2626-44100: SIZE_TOLERANCE = 31512 - 20480
 
-fate-swr-resample-s32p-2626-48000: CMP_TARGET = 1393.01
+fate-swr-resample-s32p-2626-48000: CMP_TARGET = 1392.99
 fate-swr-resample-s32p-2626-48000: SIZE_TOLERANCE = 31512 - 20480
 
-fate-swr-resample-s32p-2626-8000: CMP_TARGET = 1393.90
+fate-swr-resample-s32p-2626-8000: CMP_TARGET = 1393.89
 fate-swr-resample-s32p-2626-8000: SIZE_TOLERANCE = 31512 - 20482
 
-fate-swr-resample-s32p-2626-96000: CMP_TARGET = 1393.01
+fate-swr-resample-s32p-2626-96000: CMP_TARGET = 1393.00
 fate-swr-resample-s32p-2626-96000: SIZE_TOLERANCE = 31512 - 20480
 
-fate-swr-resample-s32p-44100-2626: CMP_TARGET = 185.84
+fate-swr-resample-s32p-44100-2626: CMP_TARGET = 185.82
 fate-swr-resample-s32p-44100-2626: SIZE_TOLERANCE = 529200 - 20490
 
 fate-swr-resample-s32p-44100-48000: CMP_TARGET = 9.70
 fate-swr-resample-s32p-44100-48000: SIZE_TOLERANCE = 529200 - 20482
 
-fate-swr-resample-s32p-44100-8000: CMP_TARGET = 75.46
+fate-swr-resample-s32p-44100-8000: CMP_TARGET = 75.45
 fate-swr-resample-s32p-44100-8000: SIZE_TOLERANCE = 529200 - 20486
 
 fate-swr-resample-s32p-44100-96000: CMP_TARGET = 11.47
 fate-swr-resample-s32p-44100-96000: SIZE_TOLERANCE = 529200 - 20482
 
-fate-swr-resample-s32p-48000-2626: CMP_TARGET = 456.55
+fate-swr-resample-s32p-48000-2626: CMP_TARGET = 456.51
 fate-swr-resample-s32p-48000-2626: SIZE_TOLERANCE = 576000 - 20510
 
-fate-swr-resample-s32p-48000-44100: CMP_TARGET = 1.16
+fate-swr-resample-s32p-48000-44100: CMP_TARGET = 1.02
 fate-swr-resample-s32p-48000-44100: SIZE_TOLERANCE = 576000 - 20480
 
-fate-swr-resample-s32p-48000-8000: CMP_TARGET = 62.41
+fate-swr-resample-s32p-48000-8000: CMP_TARGET = 62.38
 fate-swr-resample-s32p-48000-8000: SIZE_TOLERANCE = 576000 - 20484
 
 fate-swr-resample-s32p-48000-96000: CMP_TARGET = 0.47
 fate-swr-resample-s32p-48000-96000: SIZE_TOLERANCE = 576000 - 20480
 
-fate-swr-resample-s32p-8000-2626: CMP_TARGET = 2506.01
+fate-swr-resample-s32p-8000-2626: CMP_TARGET = 2506.02
 fate-swr-resample-s32p-8000-2626: SIZE_TOLERANCE = 96000 - 20486
 
 fate-swr-resample-s32p-8000-44100: CMP_TARGET = 15.09
 fate-swr-resample-s32p-8000-44100: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample-s32p-8000-48000: CMP_TARGET = 14.68
+fate-swr-resample-s32p-8000-48000: CMP_TARGET = 14.69
 fate-swr-resample-s32p-8000-48000: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample-s32p-8000-96000: CMP_TARGET = 13.82
+fate-swr-resample-s32p-8000-96000: CMP_TARGET = 13.81
 fate-swr-resample-s32p-8000-96000: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample-s32p-96000-2626: CMP_TARGET = 675.14
+fate-swr-resample-s32p-96000-2626: CMP_TARGET = 675.08
 fate-swr-resample-s32p-96000-2626: SIZE_TOLERANCE = 1152000 - 20474
 
-fate-swr-resample-s32p-96000-44100: CMP_TARGET = 1.58
+fate-swr-resample-s32p-96000-44100: CMP_TARGET = 1.45
 fate-swr-resample-s32p-96000-44100: SIZE_TOLERANCE = 1152000 - 20480
 
-fate-swr-resample-s32p-96000-48000: CMP_TARGET = 1.04
+fate-swr-resample-s32p-96000-48000: CMP_TARGET = 1.00
 fate-swr-resample-s32p-96000-48000: SIZE_TOLERANCE = 1152000 - 20480
 
-fate-swr-resample-s32p-96000-8000: CMP_TARGET = 58.60
+fate-swr-resample-s32p-96000-8000: CMP_TARGET = 58.57
 fate-swr-resample-s32p-96000-8000: SIZE_TOLERANCE = 1152000 - 20496
 
+
 define ARESAMPLE_LIN
 FATE_SWR_RESAMPLE += fate-swr-resample_lin-$(3)-$(1)-$(2)
 fate-swr-resample_lin-$(3)-$(1)-$(2): tests/data/asynth-$(1)-1.wav
@@ -277,58 +278,58 @@ fate-swr-resample_lin-$(3)-$(1)-$(2): FUZZ = 0.1
 fate-swr-resample_lin-$(3)-$(1)-$(2): REF = tests/data/asynth-$(1)-1.wav
 endef
 
-fate-swr-resample_lin-s16p-8000-44100: CMP_TARGET = 14.63
+fate-swr-resample_lin-s16p-8000-44100: CMP_TARGET = 14.61
 fate-swr-resample_lin-s16p-8000-44100: SIZE_TOLERANCE = 96000 - 20480
 
 fate-swr-resample_lin-s16p-8000-48000: CMP_TARGET = 14.53
 fate-swr-resample_lin-s16p-8000-48000: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample_lin-s16p-44100-8000: CMP_TARGET = 75.45
+fate-swr-resample_lin-s16p-44100-8000: CMP_TARGET = 75.41
 fate-swr-resample_lin-s16p-44100-8000: SIZE_TOLERANCE = 529200 - 20486
 
-fate-swr-resample_lin-s16p-44100-48000: CMP_TARGET = 9.68
+fate-swr-resample_lin-s16p-44100-48000: CMP_TARGET = 9.66
 fate-swr-resample_lin-s16p-44100-48000: SIZE_TOLERANCE = 529200 - 20482
 
-fate-swr-resample_lin-s16p-48000-8000: CMP_TARGET = 62.41
+fate-swr-resample_lin-s16p-48000-8000: CMP_TARGET = 62.39
 fate-swr-resample_lin-s16p-48000-8000: SIZE_TOLERANCE = 576000 - 20484
 
 fate-swr-resample_lin-s16p-48000-44100: CMP_TARGET = 0.68
 fate-swr-resample_lin-s16p-48000-44100: SIZE_TOLERANCE = 576000 - 20480
 
-fate-swr-resample_lin-fltp-8000-44100: CMP_TARGET = 14.61
+fate-swr-resample_lin-fltp-8000-44100: CMP_TARGET = 14.59
 fate-swr-resample_lin-fltp-8000-44100: SIZE_TOLERANCE = 96000 - 20480
 
 fate-swr-resample_lin-fltp-8000-48000: CMP_TARGET = 14.50
 fate-swr-resample_lin-fltp-8000-48000: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample_lin-fltp-44100-8000: CMP_TARGET = 75.45
+fate-swr-resample_lin-fltp-44100-8000: CMP_TARGET = 75.38
 fate-swr-resample_lin-fltp-44100-8000: SIZE_TOLERANCE = 529200 - 20486
 
-fate-swr-resample_lin-fltp-44100-48000: CMP_TARGET = 9.67
+fate-swr-resample_lin-fltp-44100-48000: CMP_TARGET = 9.65
 fate-swr-resample_lin-fltp-44100-48000: SIZE_TOLERANCE = 529200 - 20482
 
-fate-swr-resample_lin-fltp-48000-8000: CMP_TARGET = 62.41
+fate-swr-resample_lin-fltp-48000-8000: CMP_TARGET = 62.36
 fate-swr-resample_lin-fltp-48000-8000: SIZE_TOLERANCE = 576000 - 20484
 
-fate-swr-resample_lin-fltp-48000-44100: CMP_TARGET = 0.63
+fate-swr-resample_lin-fltp-48000-44100: CMP_TARGET = 0.26
 fate-swr-resample_lin-fltp-48000-44100: SIZE_TOLERANCE = 576000 - 20480
 
-fate-swr-resample_lin-dblp-8000-44100: CMP_TARGET = 14.61
+fate-swr-resample_lin-dblp-8000-44100: CMP_TARGET = 14.59
 fate-swr-resample_lin-dblp-8000-44100: SIZE_TOLERANCE = 96000 - 20480
 
 fate-swr-resample_lin-dblp-8000-48000: CMP_TARGET = 14.50
 fate-swr-resample_lin-dblp-8000-48000: SIZE_TOLERANCE = 96000 - 20480
 
-fate-swr-resample_lin-dblp-44100-8000: CMP_TARGET = 75.45
+fate-swr-resample_lin-dblp-44100-8000: CMP_TARGET = 75.38
 fate-swr-resample_lin-dblp-44100-8000: SIZE_TOLERANCE = 529200 - 20486
 
-fate-swr-resample_lin-dblp-44100-48000: CMP_TARGET = 9.67
+fate-swr-resample_lin-dblp-44100-48000: CMP_TARGET = 9.65
 fate-swr-resample_lin-dblp-44100-48000: SIZE_TOLERANCE = 529200 - 20482
 
-fate-swr-resample_lin-dblp-48000-8000: CMP_TARGET = 62.41
+fate-swr-resample_lin-dblp-48000-8000: CMP_TARGET = 62.36
 fate-swr-resample_lin-dblp-48000-8000: SIZE_TOLERANCE = 576000 - 20484
 
-fate-swr-resample_lin-dblp-48000-44100: CMP_TARGET = 0.63
+fate-swr-resample_lin-dblp-48000-44100: CMP_TARGET = 0.26
 fate-swr-resample_lin-dblp-48000-44100: SIZE_TOLERANCE = 576000 - 20480
 
 define ARESAMPLE_NN
@@ -365,16 +366,16 @@ fate-swr-resample_async-$(3)-$(1)-$(2): FUZZ = 0.1
 fate-swr-resample_async-$(3)-$(1)-$(2): REF = tests/data/asynth-$(1)-1.wav
 endef
 
-fate-swr-resample_async-fltp-44100-8000: CMP_TARGET = 4031.60
+fate-swr-resample_async-fltp-44100-8000: CMP_TARGET = 4020.62
 fate-swr-resample_async-fltp-44100-8000: SIZE_TOLERANCE = 529200 - 20310
 
-fate-swr-resample_async-fltp-8000-44100: CMP_TARGET = 11185.34
+fate-swr-resample_async-fltp-8000-44100: CMP_TARGET = 11186.69
 fate-swr-resample_async-fltp-8000-44100: SIZE_TOLERANCE = 96000 - 20344
 
-fate-swr-resample_async-s16p-44100-8000: CMP_TARGET = 4031.59
+fate-swr-resample_async-s16p-44100-8000: CMP_TARGET = 4020.73
 fate-swr-resample_async-s16p-44100-8000: SIZE_TOLERANCE = 529200 - 20310
 
-fate-swr-resample_async-s16p-8000-44100: CMP_TARGET = 11185.65
+fate-swr-resample_async-s16p-8000-44100: CMP_TARGET = 11187.01
 fate-swr-resample_async-s16p-8000-44100: SIZE_TOLERANCE = 96000 - 20344
 
 $(call CROSS_TEST,$(SAMPLERATES),ARESAMPLE,s16p,s16le,s16)
diff --git a/tests/fate/lossless-audio.mak b/tests/fate/lossless-audio.mak
index 1d278da0..58641ab0 100644
--- a/tests/fate/lossless-audio.mak
+++ b/tests/fate/lossless-audio.mak
@@ -26,7 +26,7 @@ FATE_SAMPLES_LOSSLESS_AUDIO-$(call DEMDEC, TTA, TTA) += fate-lossless-tta-encryp
 fate-lossless-tta-encrypted: CMD = crc -password ffmpeg -i $(TARGET_SAMPLES)/lossless-audio/encrypted.tta
 
 FATE_SAMPLES_LOSSLESS_AUDIO-$(call DEMDEC, ASF, WMALOSSLESS) += fate-lossless-wma
-fate-lossless-wma: CMD = md5 -i $(TARGET_SAMPLES)/lossless-audio/luckynight-partial.wma -f s16le
+fate-lossless-wma: CMD = md5 -i $(TARGET_SAMPLES)/lossless-audio/luckynight-partial.wma -f s16le -frames 209
 
 FATE_SAMPLES_LOSSLESS_AUDIO += $(FATE_SAMPLES_LOSSLESS_AUDIO-yes)
 
diff --git a/tests/fate/microsoft.mak b/tests/fate/microsoft.mak
index 4e8ae511..3da25a7c 100644
--- a/tests/fate/microsoft.mak
+++ b/tests/fate/microsoft.mak
@@ -66,6 +66,9 @@ fate-vc1-ism: CMD = framecrc -i $(TARGET_SAMPLES)/isom/vc1-wmapro.ism -an
 FATE_MICROSOFT-$(CONFIG_VC1_DECODER) += $(FATE_VC1-yes)
 fate-vc1: $(FATE_VC1-yes)
 
+FATE_MICROSOFT-$(CONFIG_ASF_DEMUXER) += fate-asf-repldata
+fate-asf-repldata: CMD = framecrc -i $(TARGET_SAMPLES)/asf/bug821-2.asf -c copy
+
 FATE_MICROSOFT += $(FATE_MICROSOFT-yes)
 
 FATE_SAMPLES_FFMPEG += $(FATE_MICROSOFT)
diff --git a/tests/fate/mp3.mak b/tests/fate/mp3.mak
index 57ee0841..a9164f84 100644
--- a/tests/fate/mp3.mak
+++ b/tests/fate/mp3.mak
@@ -1,41 +1,40 @@
 FATE_MP3 += fate-mp3-float-conf-compl
-fate-mp3-float-conf-compl: CMD = pcm -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/compl.bit
-fate-mp3-float-conf-compl: REF = $(SAMPLES)/mp3-conformance/compl.pcm
+fate-mp3-float-conf-compl: CMD = ffmpeg -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/compl.bit -f f32le -
+fate-mp3-float-conf-compl: REF = $(SAMPLES)/mp3-conformance/compl.f32
 
 FATE_MP3 += fate-mp3-float-conf-he_32khz
-fate-mp3-float-conf-he_32khz: CMD = pcm -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/he_32khz.bit -fs 343296
-fate-mp3-float-conf-he_32khz: REF = $(SAMPLES)/mp3-conformance/he_32khz.pcm
+fate-mp3-float-conf-he_32khz: CMD = ffmpeg -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/he_32khz.bit -af atrim=end_sample=171648 -f f32le -
+fate-mp3-float-conf-he_32khz: REF = $(SAMPLES)/mp3-conformance/he_32khz.f32
 
 FATE_MP3 += fate-mp3-float-conf-he_44khz
-fate-mp3-float-conf-he_44khz: CMD = pcm -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/he_44khz.bit -fs 942336
-fate-mp3-float-conf-he_44khz: REF = $(SAMPLES)/mp3-conformance/he_44khz.pcm
+fate-mp3-float-conf-he_44khz: CMD = ffmpeg -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/he_44khz.bit -af atrim=end_sample=471168 -f f32le -
+fate-mp3-float-conf-he_44khz: REF = $(SAMPLES)/mp3-conformance/he_44khz.f32
 
 FATE_MP3 += fate-mp3-float-conf-he_48khz
-fate-mp3-float-conf-he_48khz: CMD = pcm -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/he_48khz.bit -fs 343296
-fate-mp3-float-conf-he_48khz: REF = $(SAMPLES)/mp3-conformance/he_48khz.pcm
+fate-mp3-float-conf-he_48khz: CMD = ffmpeg -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/he_48khz.bit -af atrim=end_sample=171648 -f f32le -
+fate-mp3-float-conf-he_48khz: REF = $(SAMPLES)/mp3-conformance/he_48khz.f32
 
 FATE_MP3 += fate-mp3-float-conf-hecommon
-fate-mp3-float-conf-hecommon: CMD = pcm -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/hecommon.bit -fs 133632
-fate-mp3-float-conf-hecommon: REF = $(SAMPLES)/mp3-conformance/hecommon.pcm
+fate-mp3-float-conf-hecommon: CMD = ffmpeg -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/hecommon.bit -af atrim=end_sample=33408 -f f32le -
+fate-mp3-float-conf-hecommon: REF = $(SAMPLES)/mp3-conformance/hecommon.f32
 
 FATE_MP3 += fate-mp3-float-conf-si
-fate-mp3-float-conf-si: CMD = pcm -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/si.bit -fs 269568
-fate-mp3-float-conf-si: REF = $(SAMPLES)/mp3-conformance/si.pcm
+fate-mp3-float-conf-si: CMD = ffmpeg -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/si.bit -af atrim=end_sample=134784 -f f32le -
+fate-mp3-float-conf-si: REF = $(SAMPLES)/mp3-conformance/si.f32
 
 FATE_MP3 += fate-mp3-float-conf-si_block
-fate-mp3-float-conf-si_block: CMD = pcm -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/si_block.bit -fs 145152
-fate-mp3-float-conf-si_block: REF = $(SAMPLES)/mp3-conformance/si_block.pcm
+fate-mp3-float-conf-si_block: CMD = ffmpeg -acodec mp3float -i $(TARGET_SAMPLES)/mp3-conformance/si_block.bit -af atrim=end_sample=72576 -f f32le -
+fate-mp3-float-conf-si_block: REF = $(SAMPLES)/mp3-conformance/si_block.f32
 
 FATE_MP3 += fate-mp3-float-extra_overread
-fate-mp3-float-extra_overread: CMD = pcm -c:a mp3float -i $(TARGET_SAMPLES)/mpegaudio/extra_overread.mp3
-fate-mp3-float-extra_overread: REF = $(SAMPLES)/mpegaudio/extra_overread.pcm
+fate-mp3-float-extra_overread: CMD = ffmpeg -c:a mp3float -i $(TARGET_SAMPLES)/mpegaudio/extra_overread.mp3 -f f32le -
+fate-mp3-float-extra_overread: REF = $(SAMPLES)/mpegaudio/extra_overread.f32
 
-$(FATE_MP3): CMP = stddev
-$(FATE_MP3): FUZZ = 0.07
+$(FATE_MP3): CMP = oneoff
+$(FATE_MP3): CMP_UNIT = f32
+$(FATE_MP3): FUZZ = 18
 
-ifdef HAVE_NEON
-fate-mp3-float-conf-hecommon: FUZZ = 0.70
-endif
+fate-mp3-float-extra_overread: FUZZ = 23
 
 FATE_MP3-$(call DEMDEC, MP3, MP3FLOAT) += $(FATE_MP3)
 
diff --git a/tests/fate/mpeg4.mak b/tests/fate/mpeg4.mak
index b24d21af..47dfb7f2 100644
--- a/tests/fate/mpeg4.mak
+++ b/tests/fate/mpeg4.mak
@@ -5,7 +5,7 @@ fate-mpeg4-resolution-change-%: CMD = framemd5 -flags +bitexact -idct simple -i
 
 FATE_MPEG4-$(call DEMDEC, H263, H263) := $(addprefix fate-mpeg4-resolution-change-, $(MPEG4_RESOLUTION_CHANGE))
 
-fate-mpeg4-bsf-unpack-bframes: CMD = md5 -i $(TARGET_SAMPLES)/mpeg4/packed_bframes.avi -flags +bitexact  -c:v copy -bsf mpeg4_unpack_bframes -f avi
+fate-mpeg4-bsf-unpack-bframes: CMD = md5 -i $(TARGET_SAMPLES)/mpeg4/packed_bframes.avi -flags +bitexact -fflags +bitexact -c:v copy -bsf mpeg4_unpack_bframes -f avi
 FATE_MPEG4-$(call ALLYES, AVI_DEMUXER MPEG4_UNPACK_BFRAMES_BSF AVI_MUXER) += fate-mpeg4-bsf-unpack-bframes
 
 FATE_SAMPLES_AVCONV += $(FATE_MPEG4-yes)
diff --git a/tests/fate/pcm.mak b/tests/fate/pcm.mak
index 9ba4be50..e6502aae 100644
--- a/tests/fate/pcm.mak
+++ b/tests/fate/pcm.mak
@@ -25,7 +25,7 @@ fate-w64: CMD = crc -i $(TARGET_SAMPLES)/w64/w64-pcm16.w64
 FATE_PCM-$(call ENCMUX, PCM_S24DAUD, DAUD) += fate-dcinema-encode
 fate-dcinema-encode: tests/data/asynth-96000-6.wav
 fate-dcinema-encode: SRC = tests/data/asynth-96000-6.wav
-fate-dcinema-encode: CMD = enc_dec_pcm daud md5 s16le $(SRC) -c:a pcm_s24daud
+fate-dcinema-encode: CMD = enc_dec_pcm daud framemd5 s16le $(SRC) -c:a pcm_s24daud -aframes 20
 
 FATE_FFMPEG += $(FATE_PCM-yes)
 FATE_SAMPLES_AVCONV += $(FATE_SAMPLES_PCM-yes)
diff --git a/tests/fate/probe.mak b/tests/fate/probe.mak
index 9f9dd4a9..4be9356f 100644
--- a/tests/fate/probe.mak
+++ b/tests/fate/probe.mak
@@ -15,6 +15,6 @@ FATE_PROBE_FORMAT = $(FATE_PROBE_FORMAT-yes)
 FATE_EXTERN-$(CONFIG_FFPROBE) += $(FATE_PROBE_FORMAT)
 fate-probe-format: $(FATE_PROBE_FORMAT)
 
-$(FATE_PROBE_FORMAT): ffprobe$(EXESUF)
+$(FATE_PROBE_FORMAT): ffprobe$(PROGSSUF)$(EXESUF)
 $(FATE_PROBE_FORMAT): CMP = oneline
 fate-probe-format-%: CMD = probefmt $(TARGET_SAMPLES)/probe-format/$(@:fate-probe-format-%=%)
diff --git a/tests/fate/screen.mak b/tests/fate/screen.mak
index 9142d497..635e5c40 100644
--- a/tests/fate/screen.mak
+++ b/tests/fate/screen.mak
@@ -29,6 +29,24 @@ fate-fraps-v5: CMD = framecrc -i $(TARGET_SAMPLES)/fraps/fraps-v5-bouncing-balls
 FATE_SCREEN-$(call DEMDEC, AVI, FRAPS) += $(FATE_FRAPS)
 fate-fraps: $(FATE_FRAPS)
 
+FATE_G2M += fate-g2m2
+fate-g2m2: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m2.asf -an
+
+FATE_G2M += fate-g2m3
+fate-g2m3: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m3.asf -frames:v 20 -an
+
+FATE_G2M += fate-g2m4
+fate-g2m4: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/g2m/g2m4.asf
+
+FATE_SAMPLES_AVCONV-$(call DEMDEC, ASF, G2M) += $(FATE_G2M)
+fate-g2m: $(FATE_G2M)
+
+FATE_SAMPLES_AVCONV-$(call DEMDEC, AVI, RSCC) += fate-rscc
+fate-rscc: CMD = framecrc -i $(TARGET_SAMPLES)/rscc/pip.avi -an
+
+FATE_SAMPLES_AVCONV-$(call DEMDEC, AVI, SCREENPRESSO) += fate-screenpresso
+fate-screenpresso: CMD = framecrc -i $(TARGET_SAMPLES)/spv1/bunny.avi
+
 FATE_SAMPLES_AVCONV-$(call DEMDEC, ASF, TDSC) += fate-tdsc
 fate-tdsc: CMD = framecrc -idct simple -i $(TARGET_SAMPLES)/tdsc/tdsc.asf -an -pix_fmt bgr24
 
diff --git a/tests/fate/seek.mak b/tests/fate/seek.mak
index dfb2e84a..a229e72b 100644
--- a/tests/fate/seek.mak
+++ b/tests/fate/seek.mak
@@ -244,7 +244,7 @@ FATE_SEEK += $(FATE_SEEK_LAVF-yes:%=fate-seek-lavf-%)
 # extra files
 
 FATE_SEEK_EXTRA-$(CONFIG_MP3_DEMUXER)   += fate-seek-extra-mp3
-fate-seek-extra-mp3:  CMD = run libavformat/seek-test$(EXESUF) $(TARGET_SAMPLES)/gapless/gapless.mp3 -usetoc 0
+fate-seek-extra-mp3:  CMD = run libavformat/seek-test$(EXESUF) $(TARGET_SAMPLES)/gapless/gapless.mp3 -fastseek 1
 FATE_SEEK_EXTRA += $(FATE_SEEK_EXTRA-yes)
 
 
diff --git a/tests/fate/source-check.sh b/tests/fate/source-check.sh
new file mode 100755
index 00000000..ac2878d9
--- /dev/null
+++ b/tests/fate/source-check.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+cd "$1"/..
+
+git show > /dev/null 2> /dev/null || { cat tests/ref/fate/source ; exit 0; }
+
+echo Files without standard license headers:
+git grep -L -E "This file is part of FFmpeg|This file is part of libswresample|"\
+"Permission to use, copy, modify, and/or distribute this software for any|"\
+"Permission is hereby granted, free of charge, to any person|"\
+"Permission is hereby granted to use, copy, modify, and distribute this|"\
+"Permission is granted to anyone to use this software for any purpose|"\
+"This work is licensed under the terms of the GNU GPL|"\
+"Redistribution and use in source and binary forms, with or without modification|"\
+"This library is free software; you can redistribute it and/or|"\
+"This program is free software; you can redistribute it and/or modify|"\
+"This file is placed in the public domain" | grep -E '\.c$|\.h$|\.S$|\.asm$'
+
+echo Headers without standard inclusion guards:
+for f in `git ls-files | grep '\.h$'` ; do
+    macro="`echo $f | sed \
+        -e 's/^lib//' \
+        -e 's/[^A-Za-z0-9]\{1,\}/_/g' \
+        -e 's/_af_/_/' \
+        -e 's/_vf_/_/' \
+        -e 's/_avf_/_/' \
+        -e 's/_vaf_/_/' \
+    | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ`"
+
+    grep -L "^#define $macro$" $f
+done
+
+exit 0
diff --git a/tests/fate/source.mak b/tests/fate/source.mak
new file mode 100644
index 00000000..465ef852
--- /dev/null
+++ b/tests/fate/source.mak
@@ -0,0 +1,3 @@
+
+FATE += fate-source
+fate-source: CMD = runlocal fate/source-check.sh
diff --git a/tests/fate/subtitles.mak b/tests/fate/subtitles.mak
index d8b20340..8aa0279a 100644
--- a/tests/fate/subtitles.mak
+++ b/tests/fate/subtitles.mak
@@ -4,6 +4,9 @@ fate-sub-aqtitle: CMD = fmtstdout ass -sub_charenc windows-1250 -i $(TARGET_SAMP
 FATE_SUBTITLES_ASS-$(call ALLYES, AVDEVICE LAVFI_INDEV CCAPTION_DECODER MOVIE_FILTER MPEGTS_DEMUXER) += fate-sub-cc
 fate-sub-cc: CMD = fmtstdout ass -f lavfi -i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]"
 
+FATE_SUBTITLES_ASS-$(call ALLYES, AVDEVICE LAVFI_INDEV CCAPTION_DECODER MOVIE_FILTER MPEGTS_DEMUXER) += fate-sub-cc-realtime
+fate-sub-cc-realtime: CMD = fmtstdout ass -real_time 1 -f lavfi -i "movie=$(TARGET_SAMPLES)/sub/Closedcaption_rollup.m2v[out0+subcc]"
+
 FATE_SUBTITLES_ASS-$(call DEMDEC, ASS, ASS) += fate-sub-ass-to-ass-transcode
 fate-sub-ass-to-ass-transcode: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/1ededcbd7b.ass
 
@@ -11,7 +14,7 @@ FATE_SUBTITLES_ASS-$(CONFIG_ASS_DEMUXER) += fate-sub-ssa-to-ass-remux
 fate-sub-ssa-to-ass-remux: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/a9-misc.ssa -c copy
 
 FATE_SUBTITLES-$(call ALLYES, ASS_DEMUXER, MATROSKA_MUXER) += fate-binsub-mksenc
-fate-binsub-mksenc: CMD = md5 -i $(TARGET_SAMPLES)/sub/1ededcbd7b.ass -c copy -f matroska -flags +bitexact
+fate-binsub-mksenc: CMD = md5 -i $(TARGET_SAMPLES)/sub/1ededcbd7b.ass -c copy -f matroska -flags +bitexact -fflags +bitexact
 
 FATE_SUBTITLES_ASS-$(call DEMDEC, JACOSUB, JACOSUB) += fate-sub-jacosub
 fate-sub-jacosub: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/JACOsub_capability_tester.jss
@@ -26,7 +29,7 @@ FATE_SUBTITLES_ASS-$(call DEMDEC, MOV, MOVTEXT) += fate-sub-movtext
 fate-sub-movtext: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/MovText_capability_tester.mp4
 
 FATE_SUBTITLES-$(call ENCDEC, MOVTEXT, MOV) += fate-binsub-movtextenc
-fate-binsub-movtextenc: CMD = md5 -i $(TARGET_SAMPLES)/sub/MovText_capability_tester.mp4 -map 0 -scodec mov_text -f mp4 -flags +bitexact -movflags frag_keyframe+empty_moov
+fate-binsub-movtextenc: CMD = md5 -i $(TARGET_SAMPLES)/sub/MovText_capability_tester.mp4 -map 0 -scodec mov_text -f mp4 -flags +bitexact -fflags +bitexact -movflags frag_keyframe+empty_moov
 
 FATE_SUBTITLES_ASS-$(call DEMDEC, MPL2, MPL2) += fate-sub-mpl2
 fate-sub-mpl2: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/MPL2_capability_tester.txt
@@ -46,9 +49,21 @@ fate-sub-realtext: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/RealText_capabil
 FATE_SUBTITLES_ASS-$(call DEMDEC, SAMI, SAMI) += fate-sub-sami
 fate-sub-sami: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/SAMI_capability_tester.smi
 
+FATE_SUBTITLES_ASS-$(call DEMDEC, SAMI, SAMI) += fate-sub-sami2
+fate-sub-sami2: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/SAMI_multilang_tweak_tester.smi
+
 FATE_SUBTITLES_ASS-$(call DEMDEC, SRT, SUBRIP) += fate-sub-srt
 fate-sub-srt: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt
 
+FATE_SUBTITLES-$(call ALLYES, SRT_DEMUXER SUBRIP_DECODER SRT_MUXER) += fate-sub-srt-rrn-remux
+fate-sub-srt-rrn-remux: CMD = fmtstdout srt -i $(TARGET_SAMPLES)/sub/ticket5032-rrn.srt -c:s copy
+
+FATE_SUBTITLES-$(call ALLYES, SRT_DEMUXER SUBRIP_DECODER SRT_MUXER) += fate-sub-srt-madness-timeshift
+fate-sub-srt-madness-timeshift: CMD = fmtstdout srt -itsoffset 3.14 -i $(TARGET_SAMPLES)/sub/madness.srt -c:s copy
+
+FATE_SUBTITLES-$(call ALLYES, SRT_DEMUXER SUBRIP_DECODER SRT_MUXER) += fate-sub-srt-empty-events
+fate-sub-srt-empty-events: CMD = fmtstdout srt -i $(TARGET_SAMPLES)/sub/empty-events-2167.srt -c:s copy
+
 FATE_SUBTITLES_ASS-$(call DEMDEC, STL, STL) += fate-sub-stl
 fate-sub-stl: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/STL_capability_tester.stl
 
@@ -67,9 +82,15 @@ fate-sub-vplayer: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/VPlayer_capabilit
 FATE_SUBTITLES_ASS-$(call DEMDEC, WEBVTT, WEBVTT) += fate-sub-webvtt
 fate-sub-webvtt: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/WebVTT_capability_tester.vtt
 
+FATE_SUBTITLES_ASS-$(call DEMDEC, WEBVTT, WEBVTT) += fate-sub-webvtt2
+fate-sub-webvtt2: CMD = fmtstdout ass -i $(TARGET_SAMPLES)/sub/WebVTT_extended_tester.vtt
+
 FATE_SUBTITLES-$(call ALLYES, SRT_DEMUXER SUBRIP_DECODER WEBVTT_ENCODER WEBVTT_MUXER) += fate-sub-webvttenc
 fate-sub-webvttenc: CMD = fmtstdout webvtt -i $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt
 
+FATE_SUBTITLES-$(call ALLYES, SRT_DEMUXER SUBRIP_DECODER TEXT_ENCODER SRT_MUXER) += fate-sub-textenc
+fate-sub-textenc: CMD = fmtstdout srt -i $(TARGET_SAMPLES)/sub/SubRip_capability_tester.srt -c:s text
+
 FATE_SUBTITLES_ASS-$(call ALLYES, MICRODVD_DEMUXER MICRODVD_DECODER ICONV) += fate-sub-charenc
 fate-sub-charenc: CMD = fmtstdout ass -sub_charenc cp1251 -i $(TARGET_SAMPLES)/sub/cp1251-subtitles.sub
 
diff --git a/tests/fate/utvideo.mak b/tests/fate/utvideo.mak
index 1e4755e3..1acef9ae 100644
--- a/tests/fate/utvideo.mak
+++ b/tests/fate/utvideo.mak
@@ -37,7 +37,7 @@ FATE_UTVIDEOENC += fate-utvideoenc_rgba_median
 fate-utvideoenc_rgba_median: OPTS = -pix_fmt rgba -pred median
 
 FATE_UTVIDEOENC += fate-utvideoenc_rgba_none
-fate-utvideoenc_rgba_none: OPTS = -pix_fmt rgba -pred 3
+fate-utvideoenc_rgba_none: OPTS = -pix_fmt rgba -pred none
 
 FATE_UTVIDEOENC += fate-utvideoenc_rgb_left
 fate-utvideoenc_rgb_left: OPTS = -pix_fmt rgb24 -pred left
@@ -46,7 +46,7 @@ FATE_UTVIDEOENC += fate-utvideoenc_rgb_median
 fate-utvideoenc_rgb_median: OPTS = -pix_fmt rgb24 -pred median
 
 FATE_UTVIDEOENC += fate-utvideoenc_rgb_none
-fate-utvideoenc_rgb_none: OPTS = -pix_fmt rgb24 -pred 3
+fate-utvideoenc_rgb_none: OPTS = -pix_fmt rgb24 -pred none
 
 FATE_UTVIDEOENC += fate-utvideoenc_yuv420_left
 fate-utvideoenc_yuv420_left: OPTS = -pix_fmt yuv420p -pred left
@@ -55,7 +55,7 @@ FATE_UTVIDEOENC += fate-utvideoenc_yuv420_median
 fate-utvideoenc_yuv420_median: OPTS = -pix_fmt yuv420p -pred median
 
 FATE_UTVIDEOENC += fate-utvideoenc_yuv420_none
-fate-utvideoenc_yuv420_none: OPTS = -pix_fmt yuv420p -pred 3
+fate-utvideoenc_yuv420_none: OPTS = -pix_fmt yuv420p -pred none
 
 FATE_UTVIDEOENC += fate-utvideoenc_yuv422_left
 fate-utvideoenc_yuv422_left: OPTS = -pix_fmt yuv422p -pred left
@@ -64,7 +64,7 @@ FATE_UTVIDEOENC += fate-utvideoenc_yuv422_median
 fate-utvideoenc_yuv422_median: OPTS = -pix_fmt yuv422p -pred median
 
 FATE_UTVIDEOENC += fate-utvideoenc_yuv422_none
-fate-utvideoenc_yuv422_none: OPTS = -pix_fmt yuv422p -pred 3
+fate-utvideoenc_yuv422_none: OPTS = -pix_fmt yuv422p -pred none
 
 $(FATE_UTVIDEOENC): $(VREF)
 
diff --git a/tests/fate/vcodec.mak b/tests/fate/vcodec.mak
index 1ad5e96f..ccf88ce4 100644
--- a/tests/fate/vcodec.mak
+++ b/tests/fate/vcodec.mak
@@ -41,11 +41,16 @@ fate-vsynth%-dnxhd-720p-10bit:   ENCOPTS = -s hd720 -b 90M              \
                                            -pix_fmt yuv422p10 -frames 5 -qmax 8
 fate-vsynth%-dnxhd-720p-10bit:   FMT     = dnxhd
 
-FATE_VCODEC-$(call ENCDEC, DNXHD, MOV)  += dnxhd-1080i dnxhd-1080i-colr
+FATE_VCODEC-$(call ENCDEC, DNXHD, MOV)  += dnxhd-1080i dnxhd-1080i-10bit dnxhd-1080i-colr
 fate-vsynth%-dnxhd-1080i:        ENCOPTS = -s hd1080 -b 120M -flags +ildct \
                                            -pix_fmt yuv422p -frames 5 -qmax 8
 fate-vsynth%-dnxhd-1080i:        FMT     = mov
 
+fate-vsynth%-dnxhd-1080i-10bit:  ENCOPTS = -s hd1080 -b 185M -flags +ildct \
+                                           -pix_fmt yuv422p10 -frames 5 -qmax 8
+fate-vsynth%-dnxhd-1080i-10bit:  DECOPTS = -sws_flags area+accurate_rnd+bitexact
+fate-vsynth%-dnxhd-1080i-10bit:  FMT     = mov
+
 fate-vsynth%-dnxhd-1080i-colr:   ENCOPTS = -s hd1080 -b 120M -flags +ildct -movflags write_colr \
                                            -pix_fmt yuv422p -frames 5 -qmax 8
 fate-vsynth%-dnxhd-1080i-colr:   DECOPTS = -sws_flags area+accurate_rnd+bitexact
@@ -68,9 +73,21 @@ fate-vsynth%-dv-50:              ENCOPTS = -dct int -s pal -pix_fmt yuv422p \
 fate-vsynth%-dv-50:              DECOPTS = -sws_flags neighbor
 fate-vsynth%-dv-50:              FMT     = dv
 
-FATE_VCODEC-$(call ENCDEC, FFV1, AVI)   += ffv1 ffv1.0
+FATE_VCODEC-$(call ENCDEC, FFV1, AVI)   += ffv1 ffv1-v0 \
+                                           ffv1-v3-yuv420p ffv1-v3-yuv422p10 ffv1-v3-yuv444p16 \
+                                           ffv1-v3-bgr0
 fate-vsynth%-ffv1:               ENCOPTS = -slices 4
-fate-vsynth%-ffv1.0:             CODEC   = ffv1
+fate-vsynth%-ffv1-v0:            CODEC   = ffv1
+fate-vsynth%-ffv1-v3-yuv420p:    ENCOPTS = -level 3 -pix_fmt yuv420p
+fate-vsynth%-ffv1-v3-yuv422p10:  ENCOPTS = -level 3 -pix_fmt yuv422p10 \
+                                           -sws_flags neighbor+bitexact
+fate-vsynth%-ffv1-v3-yuv422p10:  DECOPTS = -sws_flags neighbor+bitexact
+fate-vsynth%-ffv1-v3-yuv444p16:  ENCOPTS = -level 3 -pix_fmt yuv444p16 \
+                                           -sws_flags neighbor+bitexact
+fate-vsynth%-ffv1-v3-yuv444p16:  DECOPTS = -sws_flags neighbor+bitexact
+fate-vsynth%-ffv1-v3-bgr0:       ENCOPTS = -level 3 -pix_fmt bgr0 \
+                                           -sws_flags neighbor+bitexact
+fate-vsynth%-ffv1-v3-bgr0:       DECOPTS = -sws_flags neighbor+bitexact
 
 FATE_VCODEC-$(call ENCDEC, FFVHUFF, AVI) += ffvhuff ffvhuff444 ffvhuff420p12 ffvhuff422p10left ffvhuff444p16
 fate-vsynth%-ffvhuff444:         ENCOPTS = -vcodec ffvhuff -pix_fmt yuv444p
@@ -154,8 +171,8 @@ fate-vsynth%-mpeg2:              ENCOPTS = -qscale 10
 fate-vsynth%-mpeg2-422:          ENCOPTS = -b:v 1000k                   \
                                            -bf 2                        \
                                            -trellis 1                   \
-                                           -flags +mv0+ildct+ilme       \
-                                           -mpv_flags +qp_rd            \
+                                           -flags +ildct+ilme           \
+                                           -mpv_flags +qp_rd+mv0        \
                                            -intra_vlc 1                 \
                                            -mbd rd                      \
                                            -pix_fmt yuv422p
@@ -164,8 +181,7 @@ fate-vsynth%-mpeg2-ilace:        ENCOPTS = -qscale 10 -flags +ildct+ilme
 fate-vsynth%-mpeg2-ivlc-qprd:    ENCOPTS = -b:v 500k                    \
                                            -bf 2                        \
                                            -trellis 1                   \
-                                           -flags +mv0                  \
-                                           -mpv_flags +qp_rd            \
+                                           -mpv_flags +qp_rd+mv0        \
                                            -intra_vlc 1                 \
                                            -cmp 2 -subcmp 2             \
                                            -mbd rd
@@ -191,9 +207,10 @@ FATE_VCODEC-$(call ENCDEC, MPEG4, AVI)     += $(FATE_MPEG4_AVI)
 fate-vsynth%-mpeg4:              ENCOPTS = -qscale 10 -flags +mv4 -mbd bits
 fate-vsynth%-mpeg4:              FMT     = mp4
 
-fate-vsynth%-mpeg4-adap:         ENCOPTS = -b 550k -bf 2 -flags +mv4+mv0 \
+fate-vsynth%-mpeg4-adap:         ENCOPTS = -b 550k -bf 2 -flags +mv4     \
                                            -trellis 1 -cmp 1 -subcmp 2   \
-                                           -mbd rd -scplx_mask 0.3
+                                           -mbd rd -scplx_mask 0.3       \
+                                           -mpv_flags +mv0
 
 fate-vsynth%-mpeg4-adv:          ENCOPTS = -qscale 9 -flags +mv4+aic       \
                                            -data_partitioning 1 -trellis 1 \
@@ -201,9 +218,10 @@ fate-vsynth%-mpeg4-adv:          ENCOPTS = -qscale 9 -flags +mv4+aic       \
 
 fate-vsynth%-mpeg4-error:        ENCOPTS = -qscale 7 -flags +mv4+aic    \
                                            -data_partitioning 1 -mbd rd \
-                                           -ps 250 -error 10
+                                           -ps 250 -error_rate 10
 
-fate-vsynth%-mpeg4-nr:           ENCOPTS = -qscale 8 -flags +mv4 -mbd rd -nr 200
+fate-vsynth%-mpeg4-nr:           ENCOPTS = -qscale 8 -flags +mv4 -mbd rd \
+                                           -noise_reduction 200
 
 fate-vsynth%-mpeg4-nsse:         ENCOPTS = -qscale 7 -cmp nsse -subcmp nsse \
                                            -mbcmp nsse -precmp nsse         \
@@ -213,7 +231,7 @@ fate-vsynth%-mpeg4-qpel:         ENCOPTS = -qscale 7 -flags +mv4+qpel -mbd 2 \
                                            -bf 2 -cmp 1 -subcmp 2
 
 fate-vsynth%-mpeg4-qprd:         ENCOPTS = -b 450k -bf 2 -trellis 1          \
-                                           -flags +mv4+mv0 -mpv_flags +qp_rd \
+                                           -flags +mv4 -mpv_flags +qp_rd+mv0 \
                                            -cmp 2 -subcmp 2 -mbd rd
 
 fate-vsynth%-mpeg4-rc:           ENCOPTS = -b 400k -bf 2
@@ -247,9 +265,24 @@ fate-vsynth%-qtrlegray:          CODEC   = qtrle
 fate-vsynth%-qtrlegray:          ENCOPTS = -pix_fmt gray
 fate-vsynth%-qtrlegray:          FMT     = mov
 
-FATE_VCODEC-$(call ENCDEC, RAWVIDEO, AVI) += rgb
+FATE_VCODEC-$(call ENCDEC, RAWVIDEO, AVI) += rgb bpp1 bpp15
 fate-vsynth%-rgb:                CODEC   = rawvideo
 fate-vsynth%-rgb:                ENCOPTS = -pix_fmt bgr24
+fate-vsynth%-bpp1:               CODEC   = rawvideo
+fate-vsynth%-bpp1:               ENCOPTS = -pix_fmt monow
+fate-vsynth%-bpp15:              CODEC   = rawvideo
+fate-vsynth%-bpp15:              ENCOPTS = -pix_fmt bgr555le
+
+FATE_VCODEC-$(call ENCDEC, RAWVIDEO, MOV) += mov-bgr24 mov-bpp15 mov-bpp16
+fate-vsynth%-mov-bgr24:          CODEC   = rawvideo
+fate-vsynth%-mov-bgr24:          ENCOPTS = -pix_fmt bgr24
+fate-vsynth%-mov-bgr24:          FMT      = mov
+fate-vsynth%-mov-bpp15:          CODEC   = rawvideo
+fate-vsynth%-mov-bpp15:          ENCOPTS = -pix_fmt rgb555le
+fate-vsynth%-mov-bpp15:          FMT      = mov
+fate-vsynth%-mov-bpp16:          CODEC   = rawvideo
+fate-vsynth%-mov-bpp16:          ENCOPTS = -pix_fmt rgb565le
+fate-vsynth%-mov-bpp16:          FMT      = mov
 
 FATE_VCODEC-$(call ENCDEC, ROQ, ROQ)    += roqvideo
 fate-vsynth%-roqvideo:           CODEC   = roqvideo
@@ -283,7 +316,8 @@ fate-vsynth%-svq1:               FMT     = mov
 
 FATE_VCODEC-$(call ENCDEC, R210, AVI)   += r210
 
-FATE_VCODEC-$(call ENCDEC, V210, AVI)   += v210
+FATE_VCODEC-$(call ENCDEC, V210, AVI)   += v210 v210-10
+fate-vsynth%-v210-10:            ENCOPTS = -pix_fmt yuv422p10
 
 FATE_VCODEC-$(call ENCDEC, V308, AVI)   += v308
 
diff --git a/tests/fate/video.mak b/tests/fate/video.mak
index 9f65694f..8b70b084 100644
--- a/tests/fate/video.mak
+++ b/tests/fate/video.mak
@@ -85,7 +85,7 @@ FATE_VIDEO-$(call DEMDEC, MPEGPS, CAVS) += fate-cavs
 fate-cavs: CMD = framecrc -i $(TARGET_SAMPLES)/cavs/cavs.mpg -an
 
 FATE_VIDEO-$(call DEMDEC, CDG, CDGRAPHICS) += fate-cdgraphics
-fate-cdgraphics: CMD = framecrc -i $(TARGET_SAMPLES)/cdgraphics/BrotherJohn.cdg -pix_fmt rgb24 -t 1
+fate-cdgraphics: CMD = framecrc -i $(TARGET_SAMPLES)/cdgraphics/BrotherJohn.cdg -pix_fmt rgba -t 1
 
 FATE_VIDEO-$(call DEMDEC, AVI, CLJR) += fate-cljr
 fate-cljr: CMD = framecrc -i $(TARGET_SAMPLES)/cljr/testcljr-partial.avi
@@ -144,6 +144,21 @@ fate-dxa-scummvm: CMD = framecrc -i $(TARGET_SAMPLES)/dxa/scummvm.dxa -pix_fmt r
 FATE_VIDEO-$(call DEMDEC, DXA, DXA) += $(FATE_DXA)
 fate-dxa: $(FATE_DXA)
 
+FATE_DXV += fate-dxv-dxt1
+fate-dxv-dxt1: CMD = framecrc -i $(TARGET_SAMPLES)/dxv/dxv-na.mov
+
+FATE_DXV += fate-dxv-dxt5
+fate-dxv-dxt5: CMD = framecrc -i $(TARGET_SAMPLES)/dxv/dxv-wa.mov
+
+FATE_DXV += fate-dxv3-dxt1
+fate-dxv3-dxt1: CMD = framecrc -i $(TARGET_SAMPLES)/dxv/dxv3-nqna.mov
+
+FATE_DXV += fate-dxv3-dxt5
+fate-dxv3-dxt5: CMD = framecrc -i $(TARGET_SAMPLES)/dxv/dxv3-nqwa.mov
+
+FATE_VIDEO-$(call DEMDEC, MOV, DXV) += $(FATE_DXV)
+fate-dxv: $(FATE_DXV)
+
 FATE_VIDEO-$(call DEMDEC, SEGAFILM, CINEPAK) += fate-film-cvid
 fate-film-cvid: CMD = framecrc -i $(TARGET_SAMPLES)/film/logo-capcom.cpk -an
 
@@ -168,7 +183,22 @@ fate-id-cin-video: CMD = framecrc -i $(TARGET_SAMPLES)/idcin/idlog-2MB.cin -pix_
 FATE_VIDEO-$(call ENCDEC, ROQ PGMYUV, ROQ IMAGE2) += fate-idroq-video-encode
 fate-idroq-video-encode: CMD = md5 -f image2 -vcodec pgmyuv -i $(TARGET_SAMPLES)/ffmpeg-synthetic/vsynth1/%02d.pgm -r 30 -sws_flags +bitexact -vf pad=512:512:80:112 -f roq -t 0.2
 
-FATE_IFF-$(CONFIG_IFF_BYTERUN1_DECODER) += fate-iff-byterun1
+FATE_HAP += fate-hap1
+fate-hap1: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hap1.mov
+
+FATE_HAP += fate-hap5
+fate-hap5: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hap5.mov
+
+FATE_HAP += fate-hapy
+fate-hapy: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hapy.mov
+
+FATE_HAP += fate-hap-chunk
+fate-hap-chunk: CMD = framecrc -i $(TARGET_SAMPLES)/hap/hapy-12-chunks.mov
+
+FATE_SAMPLES_AVCONV-$(call DEMDEC, MOV, HAP) += $(FATE_HAP)
+fate-hap: $(FATE_HAP)
+
+FATE_IFF-$(CONFIG_IFF_ILBM_DECODER) += fate-iff-byterun1
 fate-iff-byterun1: CMD = framecrc -i $(TARGET_SAMPLES)/iff/ASH.LBM -pix_fmt rgb24
 
 FATE_IFF-$(CONFIG_EIGHTSVX_FIB_DECODER) += fate-iff-fibonacci
@@ -278,7 +308,10 @@ FATE_VIDEO-$(call DEMDEC, TMV, TMV) += fate-tmv
 fate-tmv: CMD = framecrc -i $(TARGET_SAMPLES)/tmv/pop-partial.tmv -pix_fmt rgb24
 
 FATE_TXD += fate-txd-16bpp
-fate-txd-16bpp: CMD = framecrc -i $(TARGET_SAMPLES)/txd/misc.txd -pix_fmt bgra -an
+fate-txd-16bpp: CMD = framecrc -i $(TARGET_SAMPLES)/txd/misc.txd -an
+
+FATE_TXD += fate-txd-odd
+fate-txd-odd: CMD = framecrc -i $(TARGET_SAMPLES)/txd/odd.txd -an
 
 FATE_TXD += fate-txd-pal8
 fate-txd-pal8: CMD = framecrc -i $(TARGET_SAMPLES)/txd/outro.txd -pix_fmt rgb24 -an
diff --git a/tests/fate/voice.mak b/tests/fate/voice.mak
index 44b5b932..7389c43c 100644
--- a/tests/fate/voice.mak
+++ b/tests/fate/voice.mak
@@ -4,7 +4,7 @@ fate-g722dec-1: CMD = framecrc -i $(TARGET_SAMPLES)/g722/conf-adminmenu-162.g722
 FATE_G722-$(call ENCMUX, ADPCM_G722, WAV) += fate-g722-encode
 fate-g722-encode: tests/data/asynth-16000-1.wav
 fate-g722-encode: SRC = tests/data/asynth-16000-1.wav
-fate-g722-encode: CMD = enc_dec_pcm wav md5 s16le $(SRC) -c:a g722
+fate-g722-encode: CMD = enc_dec_pcm wav framemd5 s16le $(SRC) -c:a g722
 
 FATE_VOICE-yes += $(FATE_G722-yes)
 fate-g722: $(FATE_G722)
@@ -38,16 +38,16 @@ FATE_SAMPLES_AVCONV += $(FATE_G723_1-yes)
 fate-g723_1: $(FATE_G723_1)
 
 FATE_G726 += fate-g726-encode-2bit
-fate-g726-encode-2bit: CMD = enc_dec_pcm wav md5 s16le $(SRC) -c:a g726 -b:a 16k
+fate-g726-encode-2bit: CMD = enc_dec_pcm wav framemd5 s16le $(SRC) -c:a g726 -b:a 16k
 
 FATE_G726 += fate-g726-encode-3bit
-fate-g726-encode-3bit: CMD = enc_dec_pcm wav md5 s16le $(SRC) -c:a g726 -b:a 24k
+fate-g726-encode-3bit: CMD = enc_dec_pcm wav framemd5 s16le $(SRC) -c:a g726 -b:a 24k
 
 FATE_G726 += fate-g726-encode-4bit
-fate-g726-encode-4bit: CMD = enc_dec_pcm wav md5 s16le $(SRC) -c:a g726 -b:a 32k
+fate-g726-encode-4bit: CMD = enc_dec_pcm wav framemd5 s16le $(SRC) -c:a g726 -b:a 32k
 
 FATE_G726 += fate-g726-encode-5bit
-fate-g726-encode-5bit: CMD = enc_dec_pcm wav md5 s16le $(SRC) -c:a g726 -b:a 40k
+fate-g726-encode-5bit: CMD = enc_dec_pcm wav framemd5 s16le $(SRC) -c:a g726 -b:a 40k
 
 $(FATE_G726): tests/data/asynth-8000-1.wav
 $(FATE_G726): SRC = tests/data/asynth-8000-1.wav
diff --git a/tests/fate/vpx.mak b/tests/fate/vpx.mak
index c381757e..a4067d38 100644
--- a/tests/fate/vpx.mak
+++ b/tests/fate/vpx.mak
@@ -32,19 +32,19 @@ FATE_VP8-$(call DEMDEC, FLV, VP8) += fate-vp8-alpha
 fate-vp8-alpha: CMD = framecrc -i $(TARGET_SAMPLES)/vp8_alpha/vp8_video_with_alpha.webm -vcodec copy
 
 FATE_VP8-$(call DEMDEC, WEBM_DASH_MANIFEST, VP8) += fate-webm-dash-manifest
-fate-webm-dash-manifest: CMD = run ffmpeg -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video2.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_audio1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_audio2.webm -c copy -map 0 -map 1 -map 2 -map 3 -f webm_dash_manifest -adaptation_sets "id=0,streams=0,1 id=1,streams=2,3" -
+fate-webm-dash-manifest: CMD = run $(FFMPEG) -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video2.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_audio1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_audio2.webm -c copy -map 0 -map 1 -map 2 -map 3 -f webm_dash_manifest -adaptation_sets "id=0,streams=0,1 id=1,streams=2,3" -
 
 FATE_VP8-$(call DEMDEC, WEBM_DASH_MANIFEST, VP8) += fate-webm-dash-manifest-unaligned-video-streams
-fate-webm-dash-manifest-unaligned-video-streams: CMD = run ffmpeg -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video3.webm -c copy -map 0 -map 1 -f webm_dash_manifest -adaptation_sets "id=0,streams=0,1" -
+fate-webm-dash-manifest-unaligned-video-streams: CMD = run $(FFMPEG) -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video3.webm -c copy -map 0 -map 1 -f webm_dash_manifest -adaptation_sets "id=0,streams=0,1" -
 
 FATE_VP8-$(call DEMDEC, WEBM_DASH_MANIFEST, VP8) += fate-webm-dash-manifest-unaligned-audio-streams
-fate-webm-dash-manifest-unaligned-audio-streams: CMD = run ffmpeg -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_audio1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_audio3.webm -c copy -map 0 -map 1 -f webm_dash_manifest -adaptation_sets "id=0,streams=0,1" -
+fate-webm-dash-manifest-unaligned-audio-streams: CMD = run $(FFMPEG) -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_audio1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_audio3.webm -c copy -map 0 -map 1 -f webm_dash_manifest -adaptation_sets "id=0,streams=0,1" -
 
 FATE_VP8-$(call DEMDEC, WEBM_DASH_MANIFEST, VP8) += fate-webm-dash-manifest-representations
-fate-webm-dash-manifest-representations: CMD = run ffmpeg -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video4.webm -c copy -map 0 -map 1 -f webm_dash_manifest -adaptation_sets "id=0,streams=0,1" -
+fate-webm-dash-manifest-representations: CMD = run $(FFMPEG) -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video1.webm -f webm_dash_manifest -i $(TARGET_SAMPLES)/vp8/dash_video4.webm -c copy -map 0 -map 1 -f webm_dash_manifest -adaptation_sets "id=0,streams=0,1" -
 
 FATE_VP8-$(call DEMDEC, WEBM_DASH_MANIFEST, VP8) += fate-webm-dash-manifest-live
-fate-webm-dash-manifest-live: CMD = run ffmpeg -f webm_dash_manifest -live 1 -i $(TARGET_SAMPLES)/vp8/dash_live_video_360.hdr -f webm_dash_manifest -live 1 -i $(TARGET_SAMPLES)/vp8/dash_live_audio_171.hdr -c copy -map 0 -map 1 -f webm_dash_manifest -live 1 -adaptation_sets "id=0,streams=0 id=1,streams=1" -chunk_start_index 1 -chunk_duration_ms 5000 -time_shift_buffer_depth 7200 -minimum_update_period 60 -debug_mode 1 -
+fate-webm-dash-manifest-live: CMD = run $(FFMPEG) -f webm_dash_manifest -live 1 -i $(TARGET_SAMPLES)/vp8/dash_live_video_360.hdr -f webm_dash_manifest -live 1 -i $(TARGET_SAMPLES)/vp8/dash_live_audio_171.hdr -c copy -map 0 -map 1 -f webm_dash_manifest -live 1 -adaptation_sets "id=0,streams=0 id=1,streams=1" -chunk_start_index 1 -chunk_duration_ms 5000 -time_shift_buffer_depth 7200 -minimum_update_period 60 -debug_mode 1 -
 
 FATE_SAMPLES_AVCONV += $(FATE_VP6-yes)
 fate-vp6: $(FATE_VP6-yes)
diff --git a/tests/fate/vqf.mak b/tests/fate/vqf.mak
index ac18ebd5..40e70219 100644
--- a/tests/fate/vqf.mak
+++ b/tests/fate/vqf.mak
@@ -4,7 +4,7 @@ fate-twinvq: CMP = oneoff
 fate-twinvq: REF = $(SAMPLES)/vqf/achterba.pcm
 
 FATE_VQF-$(CONFIG_VQF_DEMUXER) += fate-vqf-demux
-fate-vqf-demux: CMD = md5 -i $(TARGET_SAMPLES)/vqf/achterba.vqf -acodec copy -flags bitexact -f framecrc
+fate-vqf-demux: CMD = md5 -i $(TARGET_SAMPLES)/vqf/achterba.vqf -acodec copy -flags bitexact -fflags +bitexact -f framecrc
 
 FATE_VQF += $(FATE_VQF-yes)
 
diff --git a/tests/fate/wavpack.mak b/tests/fate/wavpack.mak
index 240f5ead..a825a02b 100644
--- a/tests/fate/wavpack.mak
+++ b/tests/fate/wavpack.mak
@@ -91,12 +91,12 @@ fate-wavpack-matroskamode: CMD = md5 -i $(TARGET_SAMPLES)/wavpack/special/matros
 FATE_WAVPACK-$(call DEMMUX, WV, MATROSKA) += fate-wavpack-matroska_mux-mono
 fate-wavpack-matroska_mux-mono: CMD = md5 -i $(TARGET_SAMPLES)/wavpack/num_channels/mono_16bit_int.wv -c copy -fflags +bitexact -f matroska
 fate-wavpack-matroska_mux-mono: CMP = oneline
-fate-wavpack-matroska_mux-mono: REF = a2987e2e51e01a35e47e7da13eb47a35
+fate-wavpack-matroska_mux-mono: REF = 4befcc41dab6c690a15d0c396c324468
 
 FATE_WAVPACK-$(call DEMMUX, WV, MATROSKA) += fate-wavpack-matroska_mux-61
 fate-wavpack-matroska_mux-61: CMD = md5 -i $(TARGET_SAMPLES)/wavpack/num_channels/eva_2.22_6.1_16bit-partial.wv -c copy -fflags +bitexact -f matroska
 fate-wavpack-matroska_mux-61: CMP = oneline
-fate-wavpack-matroska_mux-61: REF = ffba4ddea1ba71f7a5901d9ed1a267be
+fate-wavpack-matroska_mux-61: REF = 7fedbfc3b9ea7348761db664626c29f4
 
 FATE_SAMPLES_AVCONV += $(FATE_WAVPACK-yes)
 fate-wavpack: $(FATE_WAVPACK-yes)
diff --git a/tests/fate/wma.mak b/tests/fate/wma.mak
index 1b8c5f97..12a8fa98 100644
--- a/tests/fate/wma.mak
+++ b/tests/fate/wma.mak
@@ -1,10 +1,12 @@
 FATE_WMAPRO-$(call DEMDEC, ASF, WMAPRO) += fate-wmapro-2ch
-fate-wmapro-2ch: CMD = pcm -i $(TARGET_SAMPLES)/wmapro/Beethovens_9th-1_small.wma
+fate-wmapro-2ch: CMD = pcm -i $(TARGET_SAMPLES)/wmapro/Beethovens_9th-1_small.wma -frames 43
 fate-wmapro-2ch: REF = $(SAMPLES)/wmapro/Beethovens_9th-1_small.pcm
+fate-wmapro-2ch: SIZE_TOLERANCE = 8192
 
 FATE_WMAPRO-$(call DEMDEC, ASF, WMAPRO) += fate-wmapro-5.1
-fate-wmapro-5.1: CMD = pcm -i $(TARGET_SAMPLES)/wmapro/latin_192_mulitchannel_cut.wma
+fate-wmapro-5.1: CMD = pcm -i $(TARGET_SAMPLES)/wmapro/latin_192_mulitchannel_cut.wma -frames 101
 fate-wmapro-5.1: REF = $(SAMPLES)/wmapro/latin_192_mulitchannel_cut.pcm
+fate-wmapro-5.1: SIZE_TOLERANCE = 24576
 
 FATE_WMAPRO-$(call DEMDEC, MOV, WMAPRO) += fate-wmapro-ism
 fate-wmapro-ism: CMD = pcm -i $(TARGET_SAMPLES)/isom/vc1-wmapro.ism -vn
diff --git a/tests/ffserver-regression.sh b/tests/ffserver-regression.sh
index 11e4a541..192d362a 100755
--- a/tests/ffserver-regression.sh
+++ b/tests/ffserver-regression.sh
@@ -8,7 +8,7 @@
 FILES=$(sed -n 's/^[^#]*<Stream \(.*\)>.*/\1/p' $2 | grep -v html)
 
 rm -f tests/feed1.ffm
-./ffserver -d -f "$2" 2> /dev/null &
+./ffserver${PROGSUF} -d -f "$2" 2> /dev/null &
 FFSERVER_PID=$!
 echo "Waiting for feeds to startup..."
 sleep 2
diff --git a/tests/filtergraphs/colorkey b/tests/filtergraphs/colorkey
new file mode 100644
index 00000000..207389ca
--- /dev/null
+++ b/tests/filtergraphs/colorkey
@@ -0,0 +1,2 @@
+sws_flags=+accurate_rnd+bitexact;
+[1]colorkey=black:0.2:0.5[t],[0][t]overlay=10:main_h-overlay_h-10
diff --git a/tests/filtergraphs/mergeplanes b/tests/filtergraphs/mergeplanes
new file mode 100644
index 00000000..0058a20f
--- /dev/null
+++ b/tests/filtergraphs/mergeplanes
@@ -0,0 +1,2 @@
+sws_flags=+accurate_rnd+bitexact;
+format=yuv420p,mergeplanes=0:yuv444p
diff --git a/tests/ref/acodec/adpcm-adx b/tests/ref/acodec/adpcm-adx
index 34dd9b6c..8c401001 100644
--- a/tests/ref/acodec/adpcm-adx
+++ b/tests/ref/acodec/adpcm-adx
@@ -1,4 +1,4 @@
-d7ec7d52a2f5c91464812d031b07cc1d *tests/data/fate/acodec-adpcm-adx.adx
+6bf1a8e5ec9cc958a31cb2b1b66bfc75 *tests/data/fate/acodec-adpcm-adx.adx
 297720 tests/data/fate/acodec-adpcm-adx.adx
 5b5a436ec9d528d6eb0bebaf667521b0 *tests/data/fate/acodec-adpcm-adx.out.wav
 stddev: 2549.93 PSNR: 28.20 MAXDIFF:57514 bytes:  1058400/  1058432
diff --git a/tests/ref/acodec/adpcm-adx-trellis b/tests/ref/acodec/adpcm-adx-trellis
index d620d4a2..039f69f9 100644
--- a/tests/ref/acodec/adpcm-adx-trellis
+++ b/tests/ref/acodec/adpcm-adx-trellis
@@ -1,4 +1,4 @@
-d7ec7d52a2f5c91464812d031b07cc1d *tests/data/fate/acodec-adpcm-adx-trellis.adx
+6bf1a8e5ec9cc958a31cb2b1b66bfc75 *tests/data/fate/acodec-adpcm-adx-trellis.adx
 297720 tests/data/fate/acodec-adpcm-adx-trellis.adx
 5b5a436ec9d528d6eb0bebaf667521b0 *tests/data/fate/acodec-adpcm-adx-trellis.out.wav
 stddev: 2549.93 PSNR: 28.20 MAXDIFF:57514 bytes:  1058400/  1058432
diff --git a/tests/ref/acodec/tta b/tests/ref/acodec/tta
index b4b96112..0f603456 100644
--- a/tests/ref/acodec/tta
+++ b/tests/ref/acodec/tta
@@ -1,4 +1,4 @@
-aeeb0f2e75d044dbe2f89b7e70a54c82 *tests/data/fate/acodec-tta.matroska
-331080 tests/data/fate/acodec-tta.matroska
+6c260836d7a32e4bd714453a3546c0d5 *tests/data/fate/acodec-tta.matroska
+331148 tests/data/fate/acodec-tta.matroska
 95e54b261530a1bcf6de6fe3b21dc5f6 *tests/data/fate/acodec-tta.out.wav
 stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  1058400/  1058400
diff --git a/tests/ref/fate/adpcm-thp b/tests/ref/fate/adpcm-thp
index 72aff61a..0063b6c7 100644
--- a/tests/ref/fate/adpcm-thp
+++ b/tests/ref/fate/adpcm-thp
@@ -1,72 +1,72 @@
 #tb 0: 1/32000
 0,          0,          0,     1078,     4312, 0x469714f6
-0,       1078,       1078,     1064,     4256, 0xe03dd882
-0,       2142,       2142,     1078,     4312, 0x46b901f7
-0,       3220,       3220,     1064,     4256, 0x8d4a54e4
-0,       4284,       4284,     1064,     4256, 0xfd616b67
-0,       5348,       5348,     1078,     4312, 0xefe62302
-0,       6426,       6426,     1064,     4256, 0xab11684e
-0,       7490,       7490,     1064,     4256, 0xb4b3feb8
-0,       8554,       8554,     1078,     4312, 0x71db6461
-0,       9632,       9632,     1064,     4256, 0x090e5efa
-0,      10696,      10696,     1064,     4256, 0x36f49c28
-0,      11760,      11760,     1078,     4312, 0x0fe3d262
-0,      12838,      12838,     1064,     4256, 0x199ce269
-0,      13902,      13902,     1064,     4256, 0x98342d05
-0,      14966,      14966,     1078,     4312, 0xb6fb7ebe
-0,      16044,      16044,     1064,     4256, 0x033dd562
-0,      17108,      17108,     1064,     4256, 0xc2cc17e0
-0,      18172,      18172,     1078,     4312, 0x4bb3ff50
-0,      19250,      19250,     1064,     4256, 0x6f2671ef
-0,      20314,      20314,     1064,     4256, 0x5a337bf4
-0,      21378,      21378,     1078,     4312, 0xa71f6967
-0,      22456,      22456,     1064,     4256, 0x48084aa9
-0,      23520,      23520,     1064,     4256, 0x3cce4218
-0,      24584,      24584,     1078,     4312, 0xcbb8f73d
-0,      25662,      25662,     1064,     4256, 0x36825021
-0,      26726,      26726,     1064,     4256, 0xeae036c6
-0,      27790,      27790,     1078,     4312, 0x0d650ac6
-0,      28868,      28868,     1064,     4256, 0xfba4f58c
-0,      29932,      29932,     1064,     4256, 0x54311f9b
-0,      30996,      30996,     1078,     4312, 0x286386b3
-0,      32074,      32074,     1064,     4256, 0x871896de
-0,      33138,      33138,     1064,     4256, 0x9ef9f970
-0,      34202,      34202,     1078,     4312, 0xf9ae97f1
-0,      35280,      35280,     1064,     4256, 0x0ad0d765
-0,      36344,      36344,     1064,     4256, 0x8e6aa9b5
-0,      37408,      37408,     1078,     4312, 0x8362787b
-0,      38486,      38486,     1064,     4256, 0x9b6a5d9c
-0,      39550,      39550,     1064,     4256, 0xfb715d8f
-0,      40614,      40614,     1078,     4312, 0x02bd8075
-0,      41692,      41692,     1064,     4256, 0x428eb932
-0,      42756,      42756,     1064,     4256, 0x17ea8c94
-0,      43820,      43820,     1078,     4312, 0xb3e761d7
-0,      44898,      44898,     1064,     4256, 0x0919755a
-0,      45962,      45962,     1064,     4256, 0x5e520edd
-0,      47026,      47026,     1078,     4312, 0x69aa070e
-0,      48104,      48104,     1064,     4256, 0xf8192f7d
-0,      49168,      49168,     1064,     4256, 0xaad4475c
-0,      50232,      50232,     1078,     4312, 0x0cabcfcb
-0,      51310,      51310,     1064,     4256, 0x952f0f96
-0,      52374,      52374,     1064,     4256, 0x1b805a0c
-0,      53438,      53438,     1078,     4312, 0x93043d2a
-0,      54516,      54516,     1064,     4256, 0x38b99e44
-0,      55580,      55580,     1064,     4256, 0x60cc52ff
-0,      56644,      56644,     1078,     4312, 0x6a875849
-0,      57722,      57722,     1064,     4256, 0xd08d6d0e
-0,      58786,      58786,     1064,     4256, 0x36bfe48e
-0,      59850,      59850,     1078,     4312, 0x795c6134
-0,      60928,      60928,     1064,     4256, 0x4fd79583
-0,      61992,      61992,     1064,     4256, 0x65e2ab9f
-0,      63056,      63056,     1078,     4312, 0xedeede4a
-0,      64134,      64134,     1064,     4256, 0x097e0d09
-0,      65198,      65198,     1064,     4256, 0x58afa133
-0,      66262,      66262,     1078,     4312, 0x442525b5
-0,      67340,      67340,     1064,     4256, 0x6645c591
-0,      68404,      68404,     1064,     4256, 0xb0dd948a
-0,      69468,      69468,     1078,     4312, 0x12684e69
-0,      70546,      70546,     1064,     4256, 0xb45098e3
-0,      71610,      71610,     1064,     4256, 0xb6d3c61c
-0,      72674,      72674,     1078,     4312, 0xb46b5b22
-0,      73752,      73752,     1064,     4256, 0x9a556830
-0,      74816,      74816,     1064,     4256, 0x67ca2b35
+0,       1078,       1078,     1064,     4256, 0x6ca28f25
+0,       2142,       2142,     1078,     4312, 0xd466f806
+0,       3220,       3220,     1064,     4256, 0x59d69463
+0,       4284,       4284,     1064,     4256, 0xf4805f42
+0,       5348,       5348,     1078,     4312, 0x579f22aa
+0,       6426,       6426,     1064,     4256, 0xbdc88f45
+0,       7490,       7490,     1064,     4256, 0xc71ebf04
+0,       8554,       8554,     1078,     4312, 0xc1c86e49
+0,       9632,       9632,     1064,     4256, 0x96365506
+0,      10696,      10696,     1064,     4256, 0xaf59bfe5
+0,      11760,      11760,     1078,     4312, 0x4f00811d
+0,      12838,      12838,     1064,     4256, 0x0d8a243a
+0,      13902,      13902,     1064,     4256, 0x98fc3477
+0,      14966,      14966,     1078,     4312, 0xbf327cc1
+0,      16044,      16044,     1064,     4256, 0xdc52d5bd
+0,      17108,      17108,     1064,     4256, 0x29eb1ca6
+0,      18172,      18172,     1078,     4312, 0xf647067f
+0,      19250,      19250,     1064,     4256, 0x4f4b70db
+0,      20314,      20314,     1064,     4256, 0xa73b7e5d
+0,      21378,      21378,     1078,     4312, 0x1f5464ff
+0,      22456,      22456,     1064,     4256, 0xcd7a46f2
+0,      23520,      23520,     1064,     4256, 0x7e203f8e
+0,      24584,      24584,     1078,     4312, 0x82e5f5ee
+0,      25662,      25662,     1064,     4256, 0xfbb65050
+0,      26726,      26726,     1064,     4256, 0x474d33ff
+0,      27790,      27790,     1078,     4312, 0x737a0586
+0,      28868,      28868,     1064,     4256, 0xf677f86a
+0,      29932,      29932,     1064,     4256, 0xe35919f9
+0,      30996,      30996,     1078,     4312, 0x74f382b2
+0,      32074,      32074,     1064,     4256, 0xe10095c1
+0,      33138,      33138,     1064,     4256, 0x5af2f855
+0,      34202,      34202,     1078,     4312, 0x55239722
+0,      35280,      35280,     1064,     4256, 0xf904da45
+0,      36344,      36344,     1064,     4256, 0xdd8ca94f
+0,      37408,      37408,     1078,     4312, 0xcc7a76f6
+0,      38486,      38486,     1064,     4256, 0x67aa5b74
+0,      39550,      39550,     1064,     4256, 0x6559608f
+0,      40614,      40614,     1078,     4312, 0x7d297e71
+0,      41692,      41692,     1064,     4256, 0x15c2b2e3
+0,      42756,      42756,     1064,     4256, 0x0bf2896c
+0,      43820,      43820,     1078,     4312, 0x17f75da7
+0,      44898,      44898,     1064,     4256, 0x90b27489
+0,      45962,      45962,     1064,     4256, 0xcc7d0de4
+0,      47026,      47026,     1078,     4312, 0x0c8a0586
+0,      48104,      48104,     1064,     4256, 0x184a2e34
+0,      49168,      49168,     1064,     4256, 0xfe354354
+0,      50232,      50232,     1078,     4312, 0x4e3bd1c1
+0,      51310,      51310,     1064,     4256, 0x37a50e20
+0,      52374,      52374,     1064,     4256, 0x2556584a
+0,      53438,      53438,     1078,     4312, 0x380f3466
+0,      54516,      54516,     1064,     4256, 0x25b4a1db
+0,      55580,      55580,     1064,     4256, 0x77f15645
+0,      56644,      56644,     1078,     4312, 0x278b5864
+0,      57722,      57722,     1064,     4256, 0xcf2e701d
+0,      58786,      58786,     1064,     4256, 0x8924e60f
+0,      59850,      59850,     1078,     4312, 0x54db60f9
+0,      60928,      60928,     1064,     4256, 0x365993ef
+0,      61992,      61992,     1064,     4256, 0x37e4a996
+0,      63056,      63056,     1078,     4312, 0x0b9bda87
+0,      64134,      64134,     1064,     4256, 0x2eed0d61
+0,      65198,      65198,     1064,     4256, 0xa826a02f
+0,      66262,      66262,     1078,     4312, 0x9620255e
+0,      67340,      67340,     1064,     4256, 0x0f3cc6f2
+0,      68404,      68404,     1064,     4256, 0x14d89149
+0,      69468,      69468,     1078,     4312, 0x87c74a3d
+0,      70546,      70546,     1064,     4256, 0x73ca9dd9
+0,      71610,      71610,     1064,     4256, 0x8419cab6
+0,      72674,      72674,     1078,     4312, 0x7d1c59ec
+0,      73752,      73752,     1064,     4256, 0xbf927052
+0,      74816,      74816,     1064,     4256, 0x4a422bc3
diff --git a/tests/ref/fate/api-h264 b/tests/ref/fate/api-h264
new file mode 100644
index 00000000..e1b29f0f
--- /dev/null
+++ b/tests/ref/fate/api-h264
@@ -0,0 +1,18 @@
+#tb 0: 1/1200000
+0,          0,          0,    48000,    38016, 0xb9ac19bd
+0,          1,          1,    48000,    38016, 0x53c91566
+0,          2,          2,    48000,    38016, 0xb5bd089f
+0,          3,          3,    48000,    38016, 0xb81320bb
+0,          4,          4,    48000,    38016, 0x7c793dce
+0,          5,          5,    48000,    38016, 0x3d605b31
+0,          6,          6,    48000,    38016, 0x40216830
+0,          7,          7,    48000,    38016, 0x11c48f52
+0,          8,          8,    48000,    38016, 0x0f37a013
+0,          9,          9,    48000,    38016, 0xb06cb753
+0,         10,         10,    48000,    38016, 0x6233ca83
+0,         11,         11,    48000,    38016, 0x02b6d5a1
+0,         12,         12,    48000,    38016, 0xf76cc409
+0,         13,         13,    48000,    38016, 0xd19fa5c3
+0,         14,         14,    48000,    38016, 0x8d1c8a02
+0,         15,         15,    48000,    38016, 0x6802966d
+0,         16,         16,    48000,    38016, 0x107fb055
diff --git a/tests/ref/fate/api-mjpeg-codec-param b/tests/ref/fate/api-mjpeg-codec-param
new file mode 100644
index 00000000..6f8da74a
--- /dev/null
+++ b/tests/ref/fate/api-mjpeg-codec-param
@@ -0,0 +1,310 @@
+stream=0, decode=0
+    b=0
+    ab=0
+    bt=4000000
+    flags=0x00000000
+    me_method=5
+    time_base=0/1
+    g=12
+    ar=0
+    ac=0
+    cutoff=0
+    frame_size=0
+    delay=0
+    qcomp=0.500000
+    qblur=0.500000
+    qmin=2
+    qmax=31
+    qdiff=3
+    bf=0
+    b_qfactor=1.250000
+    rc_strategy=0
+    b_strategy=0
+    ps=0
+    mv_bits=0
+    header_bits=0
+    i_tex_bits=0
+    p_tex_bits=0
+    i_count=0
+    p_count=0
+    skip_count=0
+    misc_bits=0
+    frame_bits=0
+    codec_tag=0
+    bug=0x00000001
+    strict=0
+    b_qoffset=1.250000
+    err_detect=0x00000000
+    has_b_frames=0
+    block_align=0
+    mpeg_quant=0
+    qsquish=0.000000
+    rc_qmod_amp=0.000000
+    rc_qmod_freq=0
+    rc_override_count=0
+    rc_eq=
+    maxrate=0
+    minrate=0
+    bufsize=0
+    rc_buf_aggressivity=1.000000
+    i_qfactor=-0.800000
+    i_qoffset=0.000000
+    rc_init_cplx=0.000000
+    dct=0
+    lumi_mask=0.000000
+    tcplx_mask=0.000000
+    scplx_mask=0.000000
+    p_mask=0.000000
+    dark_mask=0.000000
+    idct=0
+    slice_count=0
+    ec=0x00000003
+    bits_per_coded_sample=0
+    pred=0
+    aspect=180/180
+    debug=0x00000000
+    vismv=0x00000000
+    cmp=0
+    subcmp=0
+    mbcmp=0
+    ildctcmp=8
+    dia_size=0
+    last_pred=0
+    preme=0
+    precmp=0
+    pre_dia_size=0
+    subq=8
+    dtg_active_format=0
+    me_range=0
+    ibias=999999
+    pbias=999999
+    global_quality=0
+    coder=0
+    context=0
+    slice_flags=0
+    xvmc_acceleration=0
+    mbd=0
+    stream_codec_tag=0
+    sc_threshold=0
+    lmin=0
+    lmax=0
+    nr=0
+    rc_init_occupancy=0
+    flags2=0x00000000
+    error=0
+    threads=1
+    me_threshold=0
+    mb_threshold=0
+    dc=0
+    nssew=8
+    skip_top=0
+    skip_bottom=0
+    profile=-99
+    level=-99
+    lowres=0
+    skip_threshold=0
+    skip_factor=0
+    skip_exp=0
+    skipcmp=13
+    border_mask=0.000000
+    mblmin=236
+    mblmax=3658
+    mepc=256
+    skip_loop_filter=0
+    skip_idct=0
+    skip_frame=0
+    bidir_refine=1
+    brd_scale=0
+    keyint_min=25
+    refs=1
+    chromaoffset=0
+    trellis=0
+    sc_factor=6
+    mv0_threshold=256
+    b_sensitivity=40
+    compression_level=-1
+    min_prediction_order=-1
+    max_prediction_order=-1
+    timecode_frame_start=-1
+    bits_per_raw_sample=8
+    channel_layout=0
+    request_channel_layout=0
+    rc_max_vbv_use=0.000000
+    rc_min_vbv_use=3.000000
+    ticks_per_frame=1
+    color_primaries=2
+    color_trc=2
+    colorspace=5
+    color_range=2
+    chroma_sample_location=2
+    log_level_offset=0
+    slices=0
+    thread_type=0x00000003
+    audio_service_type=0
+    request_sample_fmt=none
+    pkt_timebase=1/25
+    sub_charenc=
+    sub_charenc_mode=0x00000000
+    refcounted_frames=false
+    side_data_only_packets=true
+    skip_alpha=false
+    field_order=0
+    dump_separator=
+    codec_whitelist=
+    pixel_format=yuvj422p
+    video_size=400x225
+stream=0, decode=1
+    b=0
+    ab=0
+    bt=4000000
+    flags=0x00000000
+    me_method=5
+    time_base=0/1
+    g=12
+    ar=0
+    ac=0
+    cutoff=0
+    frame_size=0
+    delay=0
+    qcomp=0.500000
+    qblur=0.500000
+    qmin=2
+    qmax=31
+    qdiff=3
+    bf=0
+    b_qfactor=1.250000
+    rc_strategy=0
+    b_strategy=0
+    ps=0
+    mv_bits=0
+    header_bits=0
+    i_tex_bits=0
+    p_tex_bits=0
+    i_count=0
+    p_count=0
+    skip_count=0
+    misc_bits=0
+    frame_bits=0
+    codec_tag=0
+    bug=0x00000001
+    strict=0
+    b_qoffset=1.250000
+    err_detect=0x00000000
+    has_b_frames=0
+    block_align=0
+    mpeg_quant=0
+    qsquish=0.000000
+    rc_qmod_amp=0.000000
+    rc_qmod_freq=0
+    rc_override_count=0
+    rc_eq=
+    maxrate=0
+    minrate=0
+    bufsize=0
+    rc_buf_aggressivity=1.000000
+    i_qfactor=-0.800000
+    i_qoffset=0.000000
+    rc_init_cplx=0.000000
+    dct=0
+    lumi_mask=0.000000
+    tcplx_mask=0.000000
+    scplx_mask=0.000000
+    p_mask=0.000000
+    dark_mask=0.000000
+    idct=0
+    slice_count=0
+    ec=0x00000003
+    bits_per_coded_sample=0
+    pred=0
+    aspect=180/180
+    debug=0x00000000
+    vismv=0x00000000
+    cmp=0
+    subcmp=0
+    mbcmp=0
+    ildctcmp=8
+    dia_size=0
+    last_pred=0
+    preme=0
+    precmp=0
+    pre_dia_size=0
+    subq=8
+    dtg_active_format=0
+    me_range=0
+    ibias=999999
+    pbias=999999
+    global_quality=0
+    coder=0
+    context=0
+    slice_flags=0
+    xvmc_acceleration=0
+    mbd=0
+    stream_codec_tag=0
+    sc_threshold=0
+    lmin=0
+    lmax=0
+    nr=0
+    rc_init_occupancy=0
+    flags2=0x00000000
+    error=0
+    threads=1
+    me_threshold=0
+    mb_threshold=0
+    dc=0
+    nssew=8
+    skip_top=0
+    skip_bottom=0
+    profile=-99
+    level=-99
+    lowres=0
+    skip_threshold=0
+    skip_factor=0
+    skip_exp=0
+    skipcmp=13
+    border_mask=0.000000
+    mblmin=236
+    mblmax=3658
+    mepc=256
+    skip_loop_filter=0
+    skip_idct=0
+    skip_frame=0
+    bidir_refine=1
+    brd_scale=0
+    keyint_min=25
+    refs=1
+    chromaoffset=0
+    trellis=0
+    sc_factor=6
+    mv0_threshold=256
+    b_sensitivity=40
+    compression_level=-1
+    min_prediction_order=-1
+    max_prediction_order=-1
+    timecode_frame_start=-1
+    bits_per_raw_sample=8
+    channel_layout=0
+    request_channel_layout=0
+    rc_max_vbv_use=0.000000
+    rc_min_vbv_use=3.000000
+    ticks_per_frame=1
+    color_primaries=2
+    color_trc=2
+    colorspace=5
+    color_range=2
+    chroma_sample_location=2
+    log_level_offset=0
+    slices=0
+    thread_type=0x00000003
+    audio_service_type=0
+    request_sample_fmt=none
+    pkt_timebase=1/25
+    sub_charenc=
+    sub_charenc_mode=0x00000000
+    refcounted_frames=false
+    side_data_only_packets=true
+    skip_alpha=false
+    field_order=0
+    dump_separator=
+    codec_whitelist=
+    pixel_format=yuvj422p
+    video_size=400x225
diff --git a/tests/ref/fate/api-png-codec-param b/tests/ref/fate/api-png-codec-param
new file mode 100644
index 00000000..22637741
--- /dev/null
+++ b/tests/ref/fate/api-png-codec-param
@@ -0,0 +1,310 @@
+stream=0, decode=0
+    b=0
+    ab=0
+    bt=4000000
+    flags=0x00000000
+    me_method=5
+    time_base=0/1
+    g=12
+    ar=0
+    ac=0
+    cutoff=0
+    frame_size=0
+    delay=0
+    qcomp=0.500000
+    qblur=0.500000
+    qmin=2
+    qmax=31
+    qdiff=3
+    bf=0
+    b_qfactor=1.250000
+    rc_strategy=0
+    b_strategy=0
+    ps=0
+    mv_bits=0
+    header_bits=0
+    i_tex_bits=0
+    p_tex_bits=0
+    i_count=0
+    p_count=0
+    skip_count=0
+    misc_bits=0
+    frame_bits=0
+    codec_tag=0
+    bug=0x00000001
+    strict=0
+    b_qoffset=1.250000
+    err_detect=0x00000000
+    has_b_frames=0
+    block_align=0
+    mpeg_quant=0
+    qsquish=0.000000
+    rc_qmod_amp=0.000000
+    rc_qmod_freq=0
+    rc_override_count=0
+    rc_eq=
+    maxrate=0
+    minrate=0
+    bufsize=0
+    rc_buf_aggressivity=1.000000
+    i_qfactor=-0.800000
+    i_qoffset=0.000000
+    rc_init_cplx=0.000000
+    dct=0
+    lumi_mask=0.000000
+    tcplx_mask=0.000000
+    scplx_mask=0.000000
+    p_mask=0.000000
+    dark_mask=0.000000
+    idct=0
+    slice_count=0
+    ec=0x00000003
+    bits_per_coded_sample=0
+    pred=0
+    aspect=2835/2835
+    debug=0x00000000
+    vismv=0x00000000
+    cmp=0
+    subcmp=0
+    mbcmp=0
+    ildctcmp=8
+    dia_size=0
+    last_pred=0
+    preme=0
+    precmp=0
+    pre_dia_size=0
+    subq=8
+    dtg_active_format=0
+    me_range=0
+    ibias=999999
+    pbias=999999
+    global_quality=0
+    coder=0
+    context=0
+    slice_flags=0
+    xvmc_acceleration=0
+    mbd=0
+    stream_codec_tag=0
+    sc_threshold=0
+    lmin=0
+    lmax=0
+    nr=0
+    rc_init_occupancy=0
+    flags2=0x00000000
+    error=0
+    threads=1
+    me_threshold=0
+    mb_threshold=0
+    dc=0
+    nssew=8
+    skip_top=0
+    skip_bottom=0
+    profile=-99
+    level=-99
+    lowres=0
+    skip_threshold=0
+    skip_factor=0
+    skip_exp=0
+    skipcmp=13
+    border_mask=0.000000
+    mblmin=236
+    mblmax=3658
+    mepc=256
+    skip_loop_filter=0
+    skip_idct=0
+    skip_frame=0
+    bidir_refine=1
+    brd_scale=0
+    keyint_min=25
+    refs=1
+    chromaoffset=0
+    trellis=0
+    sc_factor=6
+    mv0_threshold=256
+    b_sensitivity=40
+    compression_level=-1
+    min_prediction_order=-1
+    max_prediction_order=-1
+    timecode_frame_start=-1
+    bits_per_raw_sample=0
+    channel_layout=0
+    request_channel_layout=0
+    rc_max_vbv_use=0.000000
+    rc_min_vbv_use=3.000000
+    ticks_per_frame=1
+    color_primaries=2
+    color_trc=2
+    colorspace=2
+    color_range=2
+    chroma_sample_location=0
+    log_level_offset=0
+    slices=0
+    thread_type=0x00000003
+    audio_service_type=0
+    request_sample_fmt=none
+    pkt_timebase=1/25
+    sub_charenc=
+    sub_charenc_mode=0x00000000
+    refcounted_frames=false
+    side_data_only_packets=true
+    skip_alpha=false
+    field_order=0
+    dump_separator=
+    codec_whitelist=
+    pixel_format=rgba
+    video_size=128x128
+stream=0, decode=1
+    b=0
+    ab=0
+    bt=4000000
+    flags=0x00000000
+    me_method=5
+    time_base=0/1
+    g=12
+    ar=0
+    ac=0
+    cutoff=0
+    frame_size=0
+    delay=0
+    qcomp=0.500000
+    qblur=0.500000
+    qmin=2
+    qmax=31
+    qdiff=3
+    bf=0
+    b_qfactor=1.250000
+    rc_strategy=0
+    b_strategy=0
+    ps=0
+    mv_bits=0
+    header_bits=0
+    i_tex_bits=0
+    p_tex_bits=0
+    i_count=0
+    p_count=0
+    skip_count=0
+    misc_bits=0
+    frame_bits=0
+    codec_tag=0
+    bug=0x00000001
+    strict=0
+    b_qoffset=1.250000
+    err_detect=0x00000000
+    has_b_frames=0
+    block_align=0
+    mpeg_quant=0
+    qsquish=0.000000
+    rc_qmod_amp=0.000000
+    rc_qmod_freq=0
+    rc_override_count=0
+    rc_eq=
+    maxrate=0
+    minrate=0
+    bufsize=0
+    rc_buf_aggressivity=1.000000
+    i_qfactor=-0.800000
+    i_qoffset=0.000000
+    rc_init_cplx=0.000000
+    dct=0
+    lumi_mask=0.000000
+    tcplx_mask=0.000000
+    scplx_mask=0.000000
+    p_mask=0.000000
+    dark_mask=0.000000
+    idct=0
+    slice_count=0
+    ec=0x00000003
+    bits_per_coded_sample=0
+    pred=0
+    aspect=2835/2835
+    debug=0x00000000
+    vismv=0x00000000
+    cmp=0
+    subcmp=0
+    mbcmp=0
+    ildctcmp=8
+    dia_size=0
+    last_pred=0
+    preme=0
+    precmp=0
+    pre_dia_size=0
+    subq=8
+    dtg_active_format=0
+    me_range=0
+    ibias=999999
+    pbias=999999
+    global_quality=0
+    coder=0
+    context=0
+    slice_flags=0
+    xvmc_acceleration=0
+    mbd=0
+    stream_codec_tag=0
+    sc_threshold=0
+    lmin=0
+    lmax=0
+    nr=0
+    rc_init_occupancy=0
+    flags2=0x00000000
+    error=0
+    threads=1
+    me_threshold=0
+    mb_threshold=0
+    dc=0
+    nssew=8
+    skip_top=0
+    skip_bottom=0
+    profile=-99
+    level=-99
+    lowres=0
+    skip_threshold=0
+    skip_factor=0
+    skip_exp=0
+    skipcmp=13
+    border_mask=0.000000
+    mblmin=236
+    mblmax=3658
+    mepc=256
+    skip_loop_filter=0
+    skip_idct=0
+    skip_frame=0
+    bidir_refine=1
+    brd_scale=0
+    keyint_min=25
+    refs=1
+    chromaoffset=0
+    trellis=0
+    sc_factor=6
+    mv0_threshold=256
+    b_sensitivity=40
+    compression_level=-1
+    min_prediction_order=-1
+    max_prediction_order=-1
+    timecode_frame_start=-1
+    bits_per_raw_sample=0
+    channel_layout=0
+    request_channel_layout=0
+    rc_max_vbv_use=0.000000
+    rc_min_vbv_use=3.000000
+    ticks_per_frame=1
+    color_primaries=2
+    color_trc=2
+    colorspace=2
+    color_range=2
+    chroma_sample_location=0
+    log_level_offset=0
+    slices=0
+    thread_type=0x00000003
+    audio_service_type=0
+    request_sample_fmt=none
+    pkt_timebase=1/25
+    sub_charenc=
+    sub_charenc_mode=0x00000000
+    refcounted_frames=false
+    side_data_only_packets=true
+    skip_alpha=false
+    field_order=0
+    dump_separator=
+    codec_whitelist=
+    pixel_format=rgba
+    video_size=128x128
diff --git a/tests/ref/fate/asf-repldata b/tests/ref/fate/asf-repldata
new file mode 100644
index 00000000..ff12ff0b
--- /dev/null
+++ b/tests/ref/fate/asf-repldata
@@ -0,0 +1,15 @@
+#tb 0: 1/1000
+0,          0,          0,        0,    23374, 0x8725b3b8
+0,        122,        122,        0,    13732, 0x3ac8531a, F=0x0
+0,        245,        245,        0,      615, 0xd31641b4, F=0x0
+0,        367,        367,        0,     6361, 0xf263af54, F=0x0
+0,        490,        490,        0,      320, 0xd6f2d6b8, F=0x0
+0,        612,        612,        0,     3750, 0xfcf1d501, F=0x0
+0,        735,        735,        0,     2541, 0xd9fc04f9, F=0x0
+0,        857,        857,        0,      205, 0x4d38a947, F=0x0
+0,        980,        980,        0,     2166, 0x2f1e7d74, F=0x0
+0,       1102,       1102,        0,     1667, 0x0cd84b61, F=0x0
+0,       1224,       1224,        0,    13645, 0x543bd032, F=0x0
+0,       1347,       1347,        0,     5953, 0xc3037c73, F=0x0
+0,       1469,       1469,        0,    36169, 0xca9f716d
+0,       1592,       1592,        0,     3030, 0x9aba5683, F=0x0
diff --git a/tests/ref/fate/async b/tests/ref/fate/async
new file mode 100644
index 00000000..c4c0b0be
--- /dev/null
+++ b/tests/ref/fate/async
@@ -0,0 +1,9 @@
+open: 0
+size: 2048
+read: 2048
+read: 0
+seek: 1536
+read: 512
+read: 0
+open: 0
+read: -10000
diff --git a/tests/ref/fate/bcstm b/tests/ref/fate/bcstm
new file mode 100644
index 00000000..cde12b67
--- /dev/null
+++ b/tests/ref/fate/bcstm
@@ -0,0 +1 @@
+CRC=0xca62d03b
diff --git a/tests/ref/fate/bfstm b/tests/ref/fate/bfstm
new file mode 100644
index 00000000..4696ca67
--- /dev/null
+++ b/tests/ref/fate/bfstm
@@ -0,0 +1 @@
+CRC=0xbd3d0d33
diff --git a/tests/ref/fate/binsub-mksenc b/tests/ref/fate/binsub-mksenc
index c4734971..128ca31e 100644
--- a/tests/ref/fate/binsub-mksenc
+++ b/tests/ref/fate/binsub-mksenc
@@ -1 +1 @@
-2dad5f63688ec613a04e94c8d4d167db
+37a212f8d56ad71e7466d5129f88e756
diff --git a/tests/ref/fate/cdgraphics b/tests/ref/fate/cdgraphics
index a7820591..ee02f5d5 100644
--- a/tests/ref/fate/cdgraphics
+++ b/tests/ref/fate/cdgraphics
@@ -1,213 +1,213 @@
 #tb 0: 1/300
-0,          0,          0,        1,   194400, 0x46ad80da
-0,          1,          1,        1,   194400, 0x46ad80da
-0,          2,          2,        1,   194400, 0x9392c3b9
-0,          3,          3,        1,   194400, 0x9392c3b9
-0,          4,          4,        1,   194400, 0x9392c3b9
-0,          5,          5,        1,   194400, 0x9392c3b9
-0,          6,          6,        1,   194400, 0x9392c3b9
-0,          7,          7,        1,   194400, 0x9392c3b9
-0,          8,          8,        1,   194400, 0x9392c3b9
-0,          9,          9,        1,   194400, 0x9392c3b9
-0,         10,         10,        1,   194400, 0x9392c3b9
-0,         11,         11,        1,   194400, 0x9392c3b9
-0,         12,         12,        1,   194400, 0x9392c3b9
-0,         13,         13,        1,   194400, 0x9392c3b9
-0,         14,         14,        1,   194400, 0x9392c3b9
-0,         15,         15,        1,   194400, 0x9392c3b9
-0,         16,         16,        1,   194400, 0x46ad80da
-0,         17,         17,        1,   194400, 0x46ad80da
-0,         18,         18,        1,   194400, 0x46ad80da
-0,         19,         19,        1,   194400, 0x46ad80da
-0,         20,         20,        1,   194400, 0x46ad80da
-0,         21,         21,        1,   194400, 0x46ad80da
-0,         22,         22,        1,   194400, 0x46ad80da
-0,         23,         23,        1,   194400, 0x46ad80da
-0,         24,         24,        1,   194400, 0x46ad80da
-0,         25,         25,        1,   194400, 0x46ad80da
-0,         26,         26,        1,   194400, 0x46ad80da
-0,         27,         27,        1,   194400, 0x46ad80da
-0,         28,         28,        1,   194400, 0x46ad80da
-0,         29,         29,        1,   194400, 0x46ad80da
-0,         30,         30,        1,   194400, 0x46ad80da
-0,         31,         31,        1,   194400, 0x46ad80da
-0,         32,         32,        1,   194400, 0x9392c3b9
-0,         33,         33,        1,   194400, 0x9ff8cbb1
-0,         34,         34,        1,   194400, 0xd015dba1
-0,         35,         35,        1,   194400, 0x6a39f18b
-0,         37,         37,        1,   194400, 0x7b8cf983
-0,         38,         38,        1,   194400, 0x07a20f7c
-0,         40,         40,        1,   194400, 0xa63e2962
-0,         41,         41,        1,   194400, 0x2dd54447
-0,         43,         43,        1,   194400, 0x90735e2d
-0,         44,         44,        1,   194400, 0x90d98506
-0,         46,         46,        1,   194400, 0xe5b08ffb
-0,         47,         47,        1,   194400, 0x7a0d95f5
-0,         49,         49,        1,   194400, 0xff6bacde
-0,         50,         50,        1,   194400, 0xd998c2c8
-0,         52,         52,        1,   194400, 0x3d1ddfab
-0,         53,         53,        1,   194400, 0x817de4a6
-0,         55,         55,        1,   194400, 0xfa3ef694
-0,         56,         56,        1,   194400, 0x0b5bfb8f
-0,         58,         58,        1,   194400, 0x00f62376
-0,         59,         59,        1,   194400, 0x2f6b2d6c
-0,         61,         61,        1,   194400, 0x40cb4752
-0,         62,         62,        1,   194400, 0xd8456435
-0,         64,         64,        1,   194400, 0x459f6a2f
-0,         65,         65,        1,   194400, 0x9b678910
-0,         67,         67,        1,   194400, 0x8791a1f7
-0,         68,         68,        1,   194400, 0xdb4ac5d3
-0,         70,         70,        1,   194400, 0xb223c8d0
-0,         71,         71,        1,   194400, 0x4a9ce7b1
-0,         73,         73,        1,   194400, 0x187eeaae
-0,         74,         74,        1,   194400, 0xc712f8a0
-0,         76,         76,        1,   194400, 0x549c00a7
-0,         77,         77,        1,   194400, 0x4d991295
-0,         79,         79,        1,   194400, 0xc41b2681
-0,         80,         80,        1,   194400, 0xed5a3077
-0,         82,         82,        1,   194400, 0x85ad4463
-0,         83,         83,        1,   194400, 0xb98f4760
-0,         85,         85,        1,   194400, 0x87ef5e49
-0,         86,         86,        1,   194400, 0x830a6146
-0,         88,         88,        1,   194400, 0xe33a792e
-0,         89,         89,        1,   194400, 0x83517a2d
-0,         91,         91,        1,   194400, 0xa97e9314
-0,         92,         92,        1,   194400, 0x39059611
-0,         94,         94,        1,   194400, 0xbf4eb9ed
-0,         95,         95,        1,   194400, 0xe5afc4e2
-0,         97,         97,        1,   194400, 0x35d4cdd9
-0,         98,         98,        1,   194400, 0xb376e1c5
-0,        100,        100,        1,   194400, 0x6128e3c3
-0,        101,        101,        1,   194400, 0x30b7f7af
-0,        103,        103,        1,   194400, 0xf1effaac
-0,        104,        104,        1,   194400, 0x483914a1
-0,        106,        106,        1,   194400, 0xbd48199c
-0,        107,        107,        1,   194400, 0x382f2d88
-0,        109,        109,        1,   194400, 0x5a573085
-0,        110,        110,        1,   194400, 0x89733580
-0,        112,        112,        1,   194400, 0xd1325a5b
-0,        113,        113,        1,   194400, 0x655b6253
-0,        115,        115,        1,   194400, 0x55146352
-0,        116,        116,        1,   194400, 0xda527c39
-0,        118,        118,        1,   194400, 0xb0cd7e37
-0,        119,        119,        1,   194400, 0x25e7991c
-0,        121,        121,        1,   194400, 0x5c22a411
-0,        122,        122,        1,   194400, 0x1e2abdf7
-0,        124,        124,        1,   194400, 0x8308bff5
-0,        125,        125,        1,   194400, 0xfdbfd6de
-0,        127,        127,        1,   194400, 0xd4d4d9db
-0,        128,        128,        1,   194400, 0xa449fbb9
-0,        130,        130,        1,   194400, 0x3dcafdb7
-0,        131,        131,        1,   194400, 0x6f1f01c2
-0,        133,        133,        1,   194400, 0xf54a1da6
-0,        134,        134,        1,   194400, 0x88d11fa4
-0,        136,        136,        1,   194400, 0x59642d96
-0,        137,        137,        1,   194400, 0x8ba44182
-0,        139,        139,        1,   194400, 0x88f56360
-0,        140,        140,        1,   194400, 0xfb246d56
-0,        142,        142,        1,   194400, 0xad128043
-0,        143,        143,        1,   194400, 0x3a4f8a39
-0,        145,        145,        1,   194400, 0x563d9d26
-0,        146,        146,        1,   194400, 0x6ff8a320
-0,        148,        148,        1,   194400, 0xcdb9b70c
-0,        149,        149,        1,   194400, 0x99c2bd06
-0,        151,        151,        1,   194400, 0x4b47cef4
-0,        152,        152,        1,   194400, 0x10b9dce6
-0,        154,        154,        1,   194400, 0xdd39f1d1
-0,        155,        155,        1,   194400, 0xbcf104cd
-0,        157,        157,        1,   194400, 0x85ec17ba
-0,        158,        158,        1,   194400, 0x069219b8
-0,        160,        160,        1,   194400, 0x84dd3899
-0,        161,        161,        1,   194400, 0xacca4190
-0,        163,        163,        1,   194400, 0xcf5b5d74
-0,        164,        164,        1,   194400, 0x4b8c626f
-0,        166,        166,        1,   194400, 0xf0817958
-0,        167,        167,        1,   194400, 0xc0887e53
-0,        169,        169,        1,   194400, 0x42e6854c
-0,        170,        170,        1,   194400, 0x036c9140
-0,        172,        172,        1,   194400, 0x0f21a62b
-0,        173,        173,        1,   194400, 0xcdaeaa27
-0,        175,        175,        1,   194400, 0xe425bc15
-0,        176,        176,        1,   194400, 0x8e18c20f
-0,        178,        178,        1,   194400, 0x767cd5fb
-0,        179,        179,        1,   194400, 0x554ae6ea
-0,        181,        181,        1,   194400, 0xeac1f9d7
-0,        182,        182,        1,   194400, 0x0b32fed2
-0,        184,        184,        1,   194400, 0xe30c19c6
-0,        185,        185,        1,   194400, 0x6a8a23bc
-0,        187,        187,        1,   194400, 0x26bf36a9
-0,        188,        188,        1,   194400, 0x1e4f3fa0
-0,        190,        190,        1,   194400, 0x231f5986
-0,        191,        191,        1,   194400, 0xf557756a
-0,        193,        193,        1,   194400, 0x6bce805f
-0,        194,        194,        1,   194400, 0xcd80924d
-0,        196,        196,        1,   194400, 0x65dc9f40
-0,        197,        197,        1,   194400, 0x2ab7af30
-0,        199,        199,        1,   194400, 0xd43cb728
-0,        200,        200,        1,   194400, 0x05d9c916
-0,        202,        202,        1,   194400, 0x43cad10e
-0,        203,        203,        1,   194400, 0x06b5e0fe
-0,        205,        205,        1,   194400, 0xa142f0ee
-0,        206,        206,        1,   194400, 0xed7f03ea
-0,        208,        208,        1,   194400, 0xf26019d4
-0,        209,        209,        1,   194400, 0x3b7f29c4
-0,        211,        211,        1,   194400, 0x30282ebf
-0,        212,        212,        1,   194400, 0xaeff4aa3
-0,        214,        214,        1,   194400, 0x1d355697
-0,        215,        215,        1,   194400, 0x2ead6f7e
-0,        217,        217,        1,   194400, 0xf1b67776
-0,        218,        218,        1,   194400, 0x93b38b62
-0,        220,        220,        1,   194400, 0x9469905d
-0,        221,        221,        1,   194400, 0x27bf9756
-0,        223,        223,        1,   194400, 0xd016a548
-0,        224,        224,        1,   194400, 0x6889b835
-0,        226,        226,        1,   194400, 0x6a05be2f
-0,        227,        227,        1,   194400, 0xe0a1ce1f
-0,        229,        229,        1,   194400, 0x8fdbd617
-0,        230,        230,        1,   194400, 0xd68fe805
-0,        232,        232,        1,   194400, 0x0d1dfbf1
-0,        233,        233,        1,   194400, 0x0fe70bf0
-0,        235,        235,        1,   194400, 0x0a8f13e8
-0,        236,        236,        1,   194400, 0x0ca42bd0
-0,        238,        238,        1,   194400, 0x6f3838c3
-0,        239,        239,        1,   194400, 0x045448b3
-0,        241,        241,        1,   194400, 0x764349b2
-0,        242,        242,        1,   194400, 0xed1651aa
-0,        244,        244,        1,   194400, 0xbb376398
-0,        245,        245,        1,   194400, 0xd0d5718a
-0,        247,        247,        1,   194400, 0xcd977e7d
-0,        248,        248,        1,   194400, 0x8cb39665
-0,        250,        250,        1,   194400, 0xb935b04b
-0,        251,        251,        1,   194400, 0x0292be3d
-0,        253,        253,        1,   194400, 0x4f21c833
-0,        254,        254,        1,   194400, 0xa5c7d823
-0,        256,        256,        1,   194400, 0xfb8ee01b
-0,        257,        257,        1,   194400, 0xea53ee0d
-0,        259,        259,        1,   194400, 0x803efcfe
-0,        260,        260,        1,   194400, 0x2c0e0aff
-0,        262,        262,        1,   194400, 0x3df318f1
-0,        263,        263,        1,   194400, 0xc4cb26e3
-0,        265,        265,        1,   194400, 0x92a033d6
-0,        266,        266,        1,   194400, 0x1b2048c1
-0,        268,        268,        1,   194400, 0x236858b1
-0,        269,        269,        1,   194400, 0x482f6d9c
-0,        271,        271,        1,   194400, 0x9ee97891
-0,        272,        272,        1,   194400, 0xe0dc8683
-0,        274,        274,        1,   194400, 0x461b9079
-0,        275,        275,        1,   194400, 0xd346a960
-0,        277,        277,        1,   194400, 0xa384b554
-0,        278,        278,        1,   194400, 0x3246cf3a
-0,        280,        280,        1,   194400, 0xa53fe722
-0,        281,        281,        1,   194400, 0xe620fd0c
-0,        283,        283,        1,   194400, 0xd6370414
-0,        284,        284,        1,   194400, 0xf57f1404
-0,        286,        286,        1,   194400, 0x8c6420f7
-0,        287,        287,        1,   194400, 0xd4be3add
-0,        289,        289,        1,   194400, 0xa8dc4ec9
-0,        290,        290,        1,   194400, 0xda1563b4
-0,        292,        292,        1,   194400, 0xd51873a4
-0,        293,        293,        1,   194400, 0x68588196
-0,        295,        295,        1,   194400, 0x40d18e89
-0,        296,        296,        1,   194400, 0x1b75a275
-0,        298,        298,        1,   194400, 0xedd1a572
-0,        299,        299,        1,   194400, 0x55daad6a
+0,          0,          0,        1,   259200, 0x29aeb27e
+0,          1,          1,        1,   259200, 0x29aeb27e
+0,          2,          2,        1,   259200, 0x6779f55d
+0,          3,          3,        1,   259200, 0x6779f55d
+0,          4,          4,        1,   259200, 0x6779f55d
+0,          5,          5,        1,   259200, 0x6779f55d
+0,          6,          6,        1,   259200, 0x6779f55d
+0,          7,          7,        1,   259200, 0x6779f55d
+0,          8,          8,        1,   259200, 0x6779f55d
+0,          9,          9,        1,   259200, 0x6779f55d
+0,         10,         10,        1,   259200, 0x6779f55d
+0,         11,         11,        1,   259200, 0x6779f55d
+0,         12,         12,        1,   259200, 0x6779f55d
+0,         13,         13,        1,   259200, 0xf33cc0c4
+0,         14,         14,        1,   259200, 0xf33cc0c4
+0,         15,         15,        1,   259200, 0xf33cc0c4
+0,         16,         16,        1,   259200, 0x098280da
+0,         17,         17,        1,   259200, 0x098280da
+0,         18,         18,        1,   259200, 0x098280da
+0,         19,         19,        1,   259200, 0x098280da
+0,         20,         20,        1,   259200, 0x098280da
+0,         21,         21,        1,   259200, 0x098280da
+0,         22,         22,        1,   259200, 0x098280da
+0,         23,         23,        1,   259200, 0x098280da
+0,         24,         24,        1,   259200, 0x098280da
+0,         25,         25,        1,   259200, 0x098280da
+0,         26,         26,        1,   259200, 0x098280da
+0,         27,         27,        1,   259200, 0x098280da
+0,         28,         28,        1,   259200, 0x098280da
+0,         29,         29,        1,   259200, 0x098280da
+0,         30,         30,        1,   259200, 0x098280da
+0,         31,         31,        1,   259200, 0x098280da
+0,         32,         32,        1,   259200, 0xf33cc0c4
+0,         33,         33,        1,   259200, 0xa389d0b4
+0,         34,         34,        1,   259200, 0xecf1f094
+0,         35,         35,        1,   259200, 0xe7171c77
+0,         37,         37,        1,   259200, 0xf9d72c67
+0,         38,         38,        1,   259200, 0x7932583b
+0,         40,         40,        1,   259200, 0x1c618c07
+0,         41,         41,        1,   259200, 0x7e88c1d1
+0,         43,         43,        1,   259200, 0x2c5df59d
+0,         44,         44,        1,   259200, 0xa78f435e
+0,         46,         46,        1,   259200, 0xb96e5948
+0,         47,         47,        1,   259200, 0x85c1653c
+0,         49,         49,        1,   259200, 0xefdc930e
+0,         50,         50,        1,   259200, 0xea24bee2
+0,         52,         52,        1,   259200, 0x8feef8a8
+0,         53,         53,        1,   259200, 0x8a7b02ad
+0,         55,         55,        1,   259200, 0xe41b2689
+0,         56,         56,        1,   259200, 0x007d307f
+0,         58,         58,        1,   259200, 0xb0c9802f
+0,         59,         59,        1,   259200, 0xb50c941b
+0,         61,         61,        1,   259200, 0xdf7dc7e7
+0,         62,         62,        1,   259200, 0xbaac01bc
+0,         64,         64,        1,   259200, 0x1ef70db0
+0,         65,         65,        1,   259200, 0xeeab4b72
+0,         67,         67,        1,   259200, 0x0eaf7d40
+0,         68,         68,        1,   259200, 0xc7bec4f8
+0,         70,         70,        1,   259200, 0xa506caf2
+0,         71,         71,        1,   259200, 0x268808c3
+0,         73,         73,        1,   259200, 0xebd90ebd
+0,         74,         74,        1,   259200, 0x8d792aa1
+0,         76,         76,        1,   259200, 0x960d3a91
+0,         77,         77,        1,   259200, 0xf0395e6d
+0,         79,         79,        1,   259200, 0x3d0e8645
+0,         80,         80,        1,   259200, 0x88bc9a31
+0,         82,         82,        1,   259200, 0xda37c209
+0,         83,         83,        1,   259200, 0xafa6c803
+0,         85,         85,        1,   259200, 0xdc58f5d5
+0,         86,         86,        1,   259200, 0x1a5ffbcf
+0,         88,         88,        1,   259200, 0xc8872bae
+0,         89,         89,        1,   259200, 0xc5472dac
+0,         91,         91,        1,   259200, 0xd54e5f7a
+0,         92,         92,        1,   259200, 0xf4576574
+0,         94,         94,        1,   259200, 0xdefaad2c
+0,         95,         95,        1,   259200, 0xca44c316
+0,         97,         97,        1,   259200, 0x8102d504
+0,         98,         98,        1,   259200, 0xe0c8fcdc
+0,        100,        100,        1,   259200, 0x53c100e7
+0,        101,        101,        1,   259200, 0x8de628bf
+0,        103,        103,        1,   259200, 0x86f92eb9
+0,        104,        104,        1,   259200, 0x13c66285
+0,        106,        106,        1,   259200, 0xe5766c7b
+0,        107,        107,        1,   259200, 0x3ddb9453
+0,        109,        109,        1,   259200, 0x39559a4d
+0,        110,        110,        1,   259200, 0x507da443
+0,        112,        112,        1,   259200, 0x3b8dedf9
+0,        113,        113,        1,   259200, 0x55c9fde9
+0,        115,        115,        1,   259200, 0x7c43ffe7
+0,        116,        116,        1,   259200, 0xdf2c31c4
+0,        118,        118,        1,   259200, 0x144735c0
+0,        119,        119,        1,   259200, 0x9a716b8a
+0,        121,        121,        1,   259200, 0x5ab08174
+0,        122,        122,        1,   259200, 0x06dcb540
+0,        124,        124,        1,   259200, 0x0d09b93c
+0,        125,        125,        1,   259200, 0x5abce70e
+0,        127,        127,        1,   259200, 0x38a4ed08
+0,        128,        128,        1,   259200, 0x427b30d3
+0,        130,        130,        1,   259200, 0x7f8d34cf
+0,        131,        131,        1,   259200, 0xf5613cc7
+0,        133,        133,        1,   259200, 0x5086748f
+0,        134,        134,        1,   259200, 0x7da8788b
+0,        136,        136,        1,   259200, 0x79c8946f
+0,        137,        137,        1,   259200, 0x65d9bc47
+0,        139,        139,        1,   259200, 0x3f5a0012
+0,        140,        140,        1,   259200, 0x4d9713fe
+0,        142,        142,        1,   259200, 0x917839d8
+0,        143,        143,        1,   259200, 0x92624dc4
+0,        145,        145,        1,   259200, 0x465c739e
+0,        146,        146,        1,   259200, 0x21137f92
+0,        148,        148,        1,   259200, 0xd677a76a
+0,        149,        149,        1,   259200, 0xe1efb35e
+0,        151,        151,        1,   259200, 0xd2d6d73a
+0,        152,        152,        1,   259200, 0x06a8f31e
+0,        154,        154,        1,   259200, 0x8aaf1d03
+0,        155,        155,        1,   259200, 0x9e0a42dd
+0,        157,        157,        1,   259200, 0x1f7268b7
+0,        158,        158,        1,   259200, 0x1a3c6cb3
+0,        160,        160,        1,   259200, 0x5607aa75
+0,        161,        161,        1,   259200, 0xa19ebc63
+0,        163,        163,        1,   259200, 0x9dbaf42b
+0,        164,        164,        1,   259200, 0x2d06fe21
+0,        166,        166,        1,   259200, 0x960e2c02
+0,        167,        167,        1,   259200, 0x04f935f8
+0,        169,        169,        1,   259200, 0x488143ea
+0,        170,        170,        1,   259200, 0xcb515bd2
+0,        172,        172,        1,   259200, 0xa27c85a8
+0,        173,        173,        1,   259200, 0x3bb38da0
+0,        175,        175,        1,   259200, 0x39d9b17c
+0,        176,        176,        1,   259200, 0x3fbcbd70
+0,        178,        178,        1,   259200, 0xbc19e548
+0,        179,        179,        1,   259200, 0x29400735
+0,        181,        181,        1,   259200, 0x21612d0f
+0,        182,        182,        1,   259200, 0x66943705
+0,        184,        184,        1,   259200, 0x9efa6ccf
+0,        185,        185,        1,   259200, 0x3b4180bb
+0,        187,        187,        1,   259200, 0x9a8aa695
+0,        188,        188,        1,   259200, 0x6529b883
+0,        190,        190,        1,   259200, 0x6e1aec4f
+0,        191,        191,        1,   259200, 0x3ebc2426
+0,        193,        193,        1,   259200, 0xff6e3a10
+0,        194,        194,        1,   259200, 0x70f05dec
+0,        196,        196,        1,   259200, 0x853777d2
+0,        197,        197,        1,   259200, 0x05ea97b2
+0,        199,        199,        1,   259200, 0x03f3a7a2
+0,        200,        200,        1,   259200, 0xf4f7cb7e
+0,        202,        202,        1,   259200, 0xd411db6e
+0,        203,        203,        1,   259200, 0xfa3afb4e
+0,        205,        205,        1,   259200, 0x0a451b3d
+0,        206,        206,        1,   259200, 0x94624117
+0,        208,        208,        1,   259200, 0x00996ceb
+0,        209,        209,        1,   259200, 0x8c898ccb
+0,        211,        211,        1,   259200, 0x5d2496c1
+0,        212,        212,        1,   259200, 0x4f5fce89
+0,        214,        214,        1,   259200, 0xf6a0e671
+0,        215,        215,        1,   259200, 0xcf6f184e
+0,        217,        217,        1,   259200, 0x66e2283e
+0,        218,        218,        1,   259200, 0x7cd25016
+0,        220,        220,        1,   259200, 0xc2e05a0c
+0,        221,        221,        1,   259200, 0x33b767fe
+0,        223,        223,        1,   259200, 0x6f5583e2
+0,        224,        224,        1,   259200, 0x19f3a9bc
+0,        226,        226,        1,   259200, 0xb3f3b5b0
+0,        227,        227,        1,   259200, 0x6417d590
+0,        229,        229,        1,   259200, 0x7130e580
+0,        230,        230,        1,   259200, 0x4558096b
+0,        232,        232,        1,   259200, 0x920c3143
+0,        233,        233,        1,   259200, 0xb7e75123
+0,        235,        235,        1,   259200, 0x38e46113
+0,        236,        236,        1,   259200, 0x96ba90e3
+0,        238,        238,        1,   259200, 0xc65faac9
+0,        239,        239,        1,   259200, 0x7260caa9
+0,        241,        241,        1,   259200, 0x4983cca7
+0,        242,        242,        1,   259200, 0xc04ddc97
+0,        244,        244,        1,   259200, 0x52de0082
+0,        245,        245,        1,   259200, 0xb1cb1c66
+0,        247,        247,        1,   259200, 0x273b364c
+0,        248,        248,        1,   259200, 0xd26a661c
+0,        250,        250,        1,   259200, 0x9a9599e8
+0,        251,        251,        1,   259200, 0x2e16b5cc
+0,        253,        253,        1,   259200, 0x2d4ec9b8
+0,        254,        254,        1,   259200, 0x32d8e998
+0,        256,        256,        1,   259200, 0x5182f988
+0,        257,        257,        1,   259200, 0x48d7157b
+0,        259,        259,        1,   259200, 0xa50f335d
+0,        260,        260,        1,   259200, 0x3f274f41
+0,        262,        262,        1,   259200, 0xe9776b25
+0,        263,        263,        1,   259200, 0x76728709
+0,        265,        265,        1,   259200, 0x1960a0ef
+0,        266,        266,        1,   259200, 0x3d3acac5
+0,        268,        268,        1,   259200, 0x1c6aeaa5
+0,        269,        269,        1,   259200, 0x8bc0148a
+0,        271,        271,        1,   259200, 0xa2a72a74
+0,        272,        272,        1,   259200, 0x77db4658
+0,        274,        274,        1,   259200, 0x63705a44
+0,        275,        275,        1,   259200, 0x30db8c12
+0,        277,        277,        1,   259200, 0x32f5a3fa
+0,        278,        278,        1,   259200, 0x0107d7c6
+0,        280,        280,        1,   259200, 0xe14707a5
+0,        281,        281,        1,   259200, 0xed7e3379
+0,        283,        283,        1,   259200, 0x55bc416b
+0,        284,        284,        1,   259200, 0x1cf1614b
+0,        286,        286,        1,   259200, 0x2d507b31
+0,        287,        287,        1,   259200, 0x3fbbaefd
+0,        289,        289,        1,   259200, 0x8608d6d5
+0,        290,        290,        1,   259200, 0x169d00ba
+0,        292,        292,        1,   259200, 0x7d0b209a
+0,        293,        293,        1,   259200, 0xd5943c7e
+0,        295,        295,        1,   259200, 0x94e25664
+0,        296,        296,        1,   259200, 0x97447e3c
+0,        298,        298,        1,   259200, 0x68bc8436
+0,        299,        299,        1,   259200, 0xb7ee9426
diff --git a/tests/ref/fate/concat-demuxer-extended-lavf-mxf b/tests/ref/fate/concat-demuxer-extended-lavf-mxf
new file mode 100644
index 00000000..4caec5a9
--- /dev/null
+++ b/tests/ref/fate/concat-demuxer-extended-lavf-mxf
@@ -0,0 +1 @@
+37b4a84fce71b3f8b129f8b866c5f55a *tests/data/fate/concat-demuxer-extended-lavf-mxf.ffprobe
diff --git a/tests/ref/fate/concat-demuxer-extended-lavf-mxf_d10 b/tests/ref/fate/concat-demuxer-extended-lavf-mxf_d10
new file mode 100644
index 00000000..1965050a
--- /dev/null
+++ b/tests/ref/fate/concat-demuxer-extended-lavf-mxf_d10
@@ -0,0 +1 @@
+2f5e935f86304c843be1454b1354a4b7 *tests/data/fate/concat-demuxer-extended-lavf-mxf_d10.ffprobe
diff --git a/tests/ref/fate/concat-demuxer-simple1-lavf-mxf b/tests/ref/fate/concat-demuxer-simple1-lavf-mxf
new file mode 100644
index 00000000..c28db28e
--- /dev/null
+++ b/tests/ref/fate/concat-demuxer-simple1-lavf-mxf
@@ -0,0 +1,124 @@
+video|0|0|0.000000|-1|-0.040000|1|0.040000|N/A|N/A|24801|6144|K
+audio|1|0|0.000000|0|0.000000|1920|0.040000|N/A|N/A|3840|31232|K
+video|0|3|0.120000|0|0.000000|1|0.040000|N/A|N/A|16743|35840|_
+audio|1|1920|0.040000|1920|0.040000|1920|0.040000|N/A|N/A|3840|52736|K
+video|0|1|0.040000|1|0.040000|1|0.040000|N/A|N/A|13812|57344|_
+audio|1|3840|0.080000|3840|0.080000|1920|0.040000|N/A|N/A|3840|71680|K
+video|0|2|0.080000|2|0.080000|1|0.040000|N/A|N/A|13607|76288|_
+audio|1|5760|0.120000|5760|0.120000|1920|0.040000|N/A|N/A|3840|90112|K
+video|0|6|0.240000|3|0.120000|1|0.040000|N/A|N/A|16158|94720|_
+audio|1|7680|0.160000|7680|0.160000|1920|0.040000|N/A|N/A|3840|111104|K
+video|0|4|0.160000|4|0.160000|1|0.040000|N/A|N/A|13943|115712|_
+audio|1|9600|0.200000|9600|0.200000|1920|0.040000|N/A|N/A|3840|130048|K
+video|0|5|0.200000|5|0.200000|1|0.040000|N/A|N/A|11223|134656|_
+audio|1|11520|0.240000|11520|0.240000|1920|0.040000|N/A|N/A|3840|145920|K
+video|0|9|0.360000|6|0.240000|1|0.040000|N/A|N/A|20298|150528|_
+audio|1|13440|0.280000|13440|0.280000|1920|0.040000|N/A|N/A|3840|171008|K
+video|0|7|0.280000|7|0.280000|1|0.040000|N/A|N/A|13341|175616|_
+audio|1|15360|0.320000|15360|0.320000|1920|0.040000|N/A|N/A|3840|189440|K
+video|0|8|0.320000|8|0.320000|1|0.040000|N/A|N/A|12362|194048|_
+audio|1|17280|0.360000|17280|0.360000|1920|0.040000|N/A|N/A|3840|206848|K
+video|0|12|0.480000|9|0.360000|1|0.040000|N/A|N/A|24786|211456|K
+audio|1|19200|0.400000|19200|0.400000|1920|0.040000|N/A|N/A|3840|236544|K
+video|0|10|0.400000|10|0.400000|1|0.040000|N/A|N/A|13377|241152|_
+audio|1|21120|0.440000|21120|0.440000|1920|0.040000|N/A|N/A|3840|254976|K
+video|0|11|0.440000|11|0.440000|1|0.040000|N/A|N/A|15624|259584|_
+audio|1|23040|0.480000|23040|0.480000|1920|0.040000|N/A|N/A|3840|275456|K
+video|0|15|0.600000|12|0.480000|1|0.040000|N/A|N/A|22597|280064|_
+audio|1|24960|0.520000|24960|0.520000|1920|0.040000|N/A|N/A|3840|303104|K
+video|0|13|0.520000|13|0.520000|1|0.040000|N/A|N/A|15028|307712|_
+audio|1|26880|0.560000|26880|0.560000|1920|0.040000|N/A|N/A|3840|323072|K
+video|0|14|0.560000|14|0.560000|1|0.040000|N/A|N/A|14014|327680|_
+audio|1|28800|0.600000|28800|0.600000|1920|0.040000|N/A|N/A|3840|342016|K
+video|0|18|0.720000|15|0.600000|1|0.040000|N/A|N/A|20731|346624|_
+audio|1|30720|0.640000|30720|0.640000|1920|0.040000|N/A|N/A|3840|367616|K
+video|0|16|0.640000|16|0.640000|1|0.040000|N/A|N/A|11946|372224|_
+audio|1|32640|0.680000|32640|0.680000|1920|0.040000|N/A|N/A|3840|384512|K
+video|0|17|0.680000|17|0.680000|1|0.040000|N/A|N/A|14464|389120|_
+audio|1|34560|0.720000|34560|0.720000|1920|0.040000|N/A|N/A|3840|403968|K
+video|0|21|0.840000|18|0.720000|1|0.040000|N/A|N/A|16189|408576|_
+audio|1|36480|0.760000|36480|0.760000|1920|0.040000|N/A|N/A|3840|424960|K
+video|0|19|0.760000|19|0.760000|1|0.040000|N/A|N/A|10524|429568|_
+audio|1|38400|0.800000|38400|0.800000|1920|0.040000|N/A|N/A|3840|440320|K
+video|0|20|0.800000|20|0.800000|1|0.040000|N/A|N/A|10599|444928|_
+audio|1|40320|0.840000|40320|0.840000|1920|0.040000|N/A|N/A|3840|455680|K
+video|0|24|0.960000|21|0.840000|1|0.040000|N/A|N/A|24711|460288|K
+audio|1|42240|0.880000|42240|0.880000|1920|0.040000|N/A|N/A|3840|485376|K
+video|0|22|0.880000|22|0.880000|1|0.040000|N/A|N/A|10840|489984|_
+audio|1|44160|0.920000|44160|0.920000|1920|0.040000|N/A|N/A|3840|501248|K
+video|0|23|0.920000|23|0.920000|1|0.040000|N/A|N/A|13350|505856|_
+audio|1|46080|0.960000|46080|0.960000|1920|0.040000|N/A|N/A|3840|519680|K
+video|0|17|0.680000|14|0.560000|1|0.040000|N/A|N/A|24786|211456|K
+audio|1|28800|0.600000|28800|0.600000|1920|0.040000|N/A|N/A|3840|236544|K
+video|0|15|0.600000|15|0.600000|1|0.040000|N/A|N/A|13377|241152|_
+audio|1|30720|0.640000|30720|0.640000|1920|0.040000|N/A|N/A|3840|254976|K
+video|0|16|0.640000|16|0.640000|1|0.040000|N/A|N/A|15624|259584|_
+audio|1|32640|0.680000|32640|0.680000|1920|0.040000|N/A|N/A|3840|275456|K
+video|0|20|0.800000|17|0.680000|1|0.040000|N/A|N/A|22597|280064|_
+audio|1|34560|0.720000|34560|0.720000|1920|0.040000|N/A|N/A|3840|303104|K
+video|0|18|0.720000|18|0.720000|1|0.040000|N/A|N/A|15028|307712|_
+audio|1|36480|0.760000|36480|0.760000|1920|0.040000|N/A|N/A|3840|323072|K
+video|0|19|0.760000|19|0.760000|1|0.040000|N/A|N/A|14014|327680|_
+audio|1|38400|0.800000|38400|0.800000|1920|0.040000|N/A|N/A|3840|342016|K
+video|0|23|0.920000|20|0.800000|1|0.040000|N/A|N/A|20731|346624|_
+audio|1|40320|0.840000|40320|0.840000|1920|0.040000|N/A|N/A|3840|367616|K
+video|0|21|0.840000|21|0.840000|1|0.040000|N/A|N/A|11946|372224|_
+audio|1|42240|0.880000|42240|0.880000|1920|0.040000|N/A|N/A|3840|384512|K
+video|0|22|0.880000|22|0.880000|1|0.040000|N/A|N/A|14464|389120|_
+audio|1|44160|0.920000|44160|0.920000|1920|0.040000|N/A|N/A|3840|403968|K
+video|0|26|1.040000|23|0.920000|1|0.040000|N/A|N/A|16189|408576|_
+audio|1|46080|0.960000|46080|0.960000|1920|0.040000|N/A|N/A|3840|424960|K
+video|0|24|0.960000|24|0.960000|1|0.040000|N/A|N/A|10524|429568|_
+audio|1|48000|1.000000|48000|1.000000|1920|0.040000|N/A|N/A|3840|440320|K
+video|0|25|1.000000|25|1.000000|1|0.040000|N/A|N/A|10599|444928|_
+audio|1|49920|1.040000|49920|1.040000|1920|0.040000|N/A|N/A|3840|455680|K
+video|0|29|1.160000|26|1.040000|1|0.040000|N/A|N/A|24711|460288|K
+audio|1|51840|1.080000|51840|1.080000|1920|0.040000|N/A|N/A|3840|485376|K
+video|0|27|1.080000|27|1.080000|1|0.040000|N/A|N/A|10840|489984|_
+audio|1|53760|1.120000|53760|1.120000|1920|0.040000|N/A|N/A|3840|501248|K
+video|0|28|1.120000|28|1.120000|1|0.040000|N/A|N/A|13350|505856|_
+audio|1|55680|1.160000|55680|1.160000|1920|0.040000|N/A|N/A|3840|519680|K
+video|0|25|1.000000|24|0.960000|1|0.040000|N/A|N/A|24801|6144|K|1
+Strings Metadata|8
+audio|1|48000|1.000000|48000|1.000000|1920|0.040000|N/A|N/A|3840|31232|K|1
+Strings Metadata|8
+video|0|28|1.120000|25|1.000000|1|0.040000|N/A|N/A|16743|35840|_|1
+Strings Metadata|8
+audio|1|49920|1.040000|49920|1.040000|1920|0.040000|N/A|N/A|3840|52736|K|1
+Strings Metadata|8
+video|0|26|1.040000|26|1.040000|1|0.040000|N/A|N/A|13812|57344|_|1
+Strings Metadata|8
+audio|1|51840|1.080000|51840|1.080000|1920|0.040000|N/A|N/A|3840|71680|K|1
+Strings Metadata|8
+video|0|27|1.080000|27|1.080000|1|0.040000|N/A|N/A|13607|76288|_|1
+Strings Metadata|8
+audio|1|53760|1.120000|53760|1.120000|1920|0.040000|N/A|N/A|3840|90112|K|1
+Strings Metadata|8
+video|0|31|1.240000|28|1.120000|1|0.040000|N/A|N/A|16158|94720|_|1
+Strings Metadata|8
+audio|1|55680|1.160000|55680|1.160000|1920|0.040000|N/A|N/A|3840|111104|K|1
+Strings Metadata|8
+video|0|29|1.160000|29|1.160000|1|0.040000|N/A|N/A|13943|115712|_|1
+Strings Metadata|8
+audio|1|57600|1.200000|57600|1.200000|1920|0.040000|N/A|N/A|3840|130048|K|1
+Strings Metadata|8
+video|0|30|1.200000|30|1.200000|1|0.040000|N/A|N/A|11223|134656|_|1
+Strings Metadata|8
+audio|1|59520|1.240000|59520|1.240000|1920|0.040000|N/A|N/A|3840|145920|K|1
+Strings Metadata|8
+video|0|34|1.360000|31|1.240000|1|0.040000|N/A|N/A|20298|150528|_|1
+Strings Metadata|8
+audio|1|61440|1.280000|61440|1.280000|1920|0.040000|N/A|N/A|3840|171008|K|1
+Strings Metadata|8
+video|0|32|1.280000|32|1.280000|1|0.040000|N/A|N/A|13341|175616|_|1
+Strings Metadata|8
+audio|1|63360|1.320000|63360|1.320000|1920|0.040000|N/A|N/A|3840|189440|K|1
+Strings Metadata|8
+video|0|33|1.320000|33|1.320000|1|0.040000|N/A|N/A|12362|194048|_|1
+Strings Metadata|8
+audio|1|65280|1.360000|65280|1.360000|1920|0.040000|N/A|N/A|3840|206848|K|1
+Strings Metadata|8
+video|0|37|1.480000|34|1.360000|1|0.040000|N/A|N/A|24786|211456|K|1
+Strings Metadata|8
+0|mpeg2video|4|video|1/50|[0][0][0][0]|0x0000|352|288|0|0|1|1:1|11:9|yuv420p|8|tv|unknown|unknown|unknown|left|00:00:00:00|1|N/A|25/1|25/1|1/25|N/A|N/A|N/A|N/A|N/A|104857200|N/A|N/A|N/A|51|0|0|0|0|0|0|0|0|0|0|0|0x060A2B340101010501010D001300000000000000000000000000000000000001
+1|pcm_s16le|unknown|audio|1/48000|[0][0][0][0]|0x0000|s16|48000|1|unknown|16|N/A|0/0|0/0|1/48000|0|0.000000|N/A|N/A|768000|N/A|N/A|N/A|N/A|50|0|0|0|0|0|0|0|0|0|0|0|0x060A2B340101010501010D001300000000000000000000000000000000000001
diff --git a/tests/ref/fate/concat-demuxer-simple1-lavf-mxf_d10 b/tests/ref/fate/concat-demuxer-simple1-lavf-mxf_d10
new file mode 100644
index 00000000..3b6e3fe0
--- /dev/null
+++ b/tests/ref/fate/concat-demuxer-simple1-lavf-mxf_d10
@@ -0,0 +1,82 @@
+video|0|0|0.000000|0|0.000000|1|0.040000|N/A|N/A|150000|6144|K
+audio|1|0|0.000000|0|0.000000|1920|0.040000|N/A|N/A|7680|156672|K
+video|0|1|0.040000|1|0.040000|1|0.040000|N/A|N/A|150000|219136|K
+audio|1|1920|0.040000|1920|0.040000|1920|0.040000|N/A|N/A|7680|369664|K
+video|0|2|0.080000|2|0.080000|1|0.040000|N/A|N/A|150000|432128|K
+audio|1|3840|0.080000|3840|0.080000|1920|0.040000|N/A|N/A|7680|582656|K
+video|0|3|0.120000|3|0.120000|1|0.040000|N/A|N/A|150000|645120|K
+audio|1|5760|0.120000|5760|0.120000|1920|0.040000|N/A|N/A|7680|795648|K
+video|0|4|0.160000|4|0.160000|1|0.040000|N/A|N/A|150000|858112|K
+audio|1|7680|0.160000|7680|0.160000|1920|0.040000|N/A|N/A|7680|1008640|K
+video|0|5|0.200000|5|0.200000|1|0.040000|N/A|N/A|150000|1071104|K
+audio|1|9600|0.200000|9600|0.200000|1920|0.040000|N/A|N/A|7680|1221632|K
+video|0|6|0.240000|6|0.240000|1|0.040000|N/A|N/A|150000|1284096|K
+audio|1|11520|0.240000|11520|0.240000|1920|0.040000|N/A|N/A|7680|1434624|K
+video|0|7|0.280000|7|0.280000|1|0.040000|N/A|N/A|150000|1497088|K
+audio|1|13440|0.280000|13440|0.280000|1920|0.040000|N/A|N/A|7680|1647616|K
+video|0|8|0.320000|8|0.320000|1|0.040000|N/A|N/A|150000|1710080|K
+audio|1|15360|0.320000|15360|0.320000|1920|0.040000|N/A|N/A|7680|1860608|K
+video|0|9|0.360000|9|0.360000|1|0.040000|N/A|N/A|150000|1923072|K
+audio|1|17280|0.360000|17280|0.360000|1920|0.040000|N/A|N/A|7680|2073600|K
+video|0|10|0.400000|10|0.400000|1|0.040000|N/A|N/A|150000|2136064|K
+audio|1|19200|0.400000|19200|0.400000|1920|0.040000|N/A|N/A|7680|2286592|K
+video|0|11|0.440000|11|0.440000|1|0.040000|N/A|N/A|150000|2349056|K
+audio|1|21120|0.440000|21120|0.440000|1920|0.040000|N/A|N/A|7680|2499584|K
+video|0|12|0.480000|12|0.480000|1|0.040000|N/A|N/A|150000|2562048|K
+audio|1|23040|0.480000|23040|0.480000|1920|0.040000|N/A|N/A|7680|2712576|K
+video|0|13|0.520000|13|0.520000|1|0.040000|N/A|N/A|150000|2775040|K
+audio|1|24960|0.520000|24960|0.520000|1920|0.040000|N/A|N/A|7680|2925568|K
+video|0|14|0.560000|14|0.560000|1|0.040000|N/A|N/A|150000|2988032|K
+audio|1|26880|0.560000|26880|0.560000|1920|0.040000|N/A|N/A|7680|3138560|K
+video|0|15|0.600000|15|0.600000|1|0.040000|N/A|N/A|150000|3201024|K
+audio|1|28800|0.600000|28800|0.600000|1920|0.040000|N/A|N/A|7680|3351552|K
+video|0|16|0.640000|16|0.640000|1|0.040000|N/A|N/A|150000|3414016|K
+audio|1|30720|0.640000|30720|0.640000|1920|0.040000|N/A|N/A|7680|3564544|K
+video|0|17|0.680000|17|0.680000|1|0.040000|N/A|N/A|150000|3627008|K
+audio|1|32640|0.680000|32640|0.680000|1920|0.040000|N/A|N/A|7680|3777536|K
+video|0|18|0.720000|18|0.720000|1|0.040000|N/A|N/A|150000|3840000|K
+audio|1|34560|0.720000|34560|0.720000|1920|0.040000|N/A|N/A|7680|3990528|K
+video|0|19|0.760000|19|0.760000|1|0.040000|N/A|N/A|150000|4052992|K
+audio|1|36480|0.760000|36480|0.760000|1920|0.040000|N/A|N/A|7680|4203520|K
+video|0|20|0.800000|20|0.800000|1|0.040000|N/A|N/A|150000|4265984|K
+audio|1|38400|0.800000|38400|0.800000|1920|0.040000|N/A|N/A|7680|4416512|K
+video|0|21|0.840000|21|0.840000|1|0.040000|N/A|N/A|150000|4478976|K
+audio|1|40320|0.840000|40320|0.840000|1920|0.040000|N/A|N/A|7680|4629504|K
+video|0|22|0.880000|22|0.880000|1|0.040000|N/A|N/A|150000|4691968|K
+audio|1|42240|0.880000|42240|0.880000|1920|0.040000|N/A|N/A|7680|4842496|K
+video|0|23|0.920000|23|0.920000|1|0.040000|N/A|N/A|150000|4904960|K
+audio|1|44160|0.920000|44160|0.920000|1920|0.040000|N/A|N/A|7680|5055488|K
+video|0|24|0.960000|24|0.960000|1|0.040000|N/A|N/A|150000|5117952|K
+audio|1|46080|0.960000|46080|0.960000|1920|0.040000|N/A|N/A|7680|5268480|K
+video|0|25|1.000000|25|1.000000|1|0.040000|N/A|N/A|150000|4265984|K
+audio|1|48000|1.000000|48000|1.000000|1920|0.040000|N/A|N/A|7680|4416512|K
+video|0|26|1.040000|26|1.040000|1|0.040000|N/A|N/A|150000|4478976|K
+audio|1|49920|1.040000|49920|1.040000|1920|0.040000|N/A|N/A|7680|4629504|K
+video|0|27|1.080000|27|1.080000|1|0.040000|N/A|N/A|150000|4691968|K
+audio|1|51840|1.080000|51840|1.080000|1920|0.040000|N/A|N/A|7680|4842496|K
+video|0|28|1.120000|28|1.120000|1|0.040000|N/A|N/A|150000|4904960|K
+audio|1|53760|1.120000|53760|1.120000|1920|0.040000|N/A|N/A|7680|5055488|K
+video|0|29|1.160000|29|1.160000|1|0.040000|N/A|N/A|150000|5117952|K
+audio|1|55680|1.160000|55680|1.160000|1920|0.040000|N/A|N/A|7680|5268480|K
+video|0|30|1.200000|30|1.200000|1|0.040000|N/A|N/A|150000|1071104|K|1
+Strings Metadata|8
+audio|1|57600|1.200000|57600|1.200000|1920|0.040000|N/A|N/A|7680|1221632|K|1
+Strings Metadata|8
+video|0|31|1.240000|31|1.240000|1|0.040000|N/A|N/A|150000|1284096|K|1
+Strings Metadata|8
+audio|1|59520|1.240000|59520|1.240000|1920|0.040000|N/A|N/A|7680|1434624|K|1
+Strings Metadata|8
+video|0|32|1.280000|32|1.280000|1|0.040000|N/A|N/A|150000|1497088|K|1
+Strings Metadata|8
+audio|1|61440|1.280000|61440|1.280000|1920|0.040000|N/A|N/A|7680|1647616|K|1
+Strings Metadata|8
+video|0|33|1.320000|33|1.320000|1|0.040000|N/A|N/A|150000|1710080|K|1
+Strings Metadata|8
+audio|1|63360|1.320000|63360|1.320000|1920|0.040000|N/A|N/A|7680|1860608|K|1
+Strings Metadata|8
+video|0|34|1.360000|34|1.360000|1|0.040000|N/A|N/A|150000|1923072|K|1
+Strings Metadata|8
+audio|1|65280|1.360000|65280|1.360000|1920|0.040000|N/A|N/A|7680|2073600|K|1
+Strings Metadata|8
+0|mpeg2video|0|video|1/50|[0][0][0][0]|0x0000|720|608|0|0|0|1:1|45:38|yuv422p|5|tv|unknown|unknown|unknown|topleft|00:00:00:00|1|N/A|25/1|25/1|1/25|0|0.000000|N/A|N/A|30000000|30000000|N/A|N/A|N/A|35|0|0|0|0|0|0|0|0|0|0|0|0x060A2B340101010501010D001300000000000000000000000000000000000001
+1|pcm_s16le|unknown|audio|1/48000|[0][0][0][0]|0x0000|s16|48000|2|unknown|16|N/A|0/0|0/0|1/48000|0|0.000000|N/A|N/A|1536000|N/A|N/A|N/A|N/A|35|0|0|0|0|0|0|0|0|0|0|0|0x060A2B340101010501010D001300000000000000000000000000000000000001
diff --git a/tests/ref/fate/concat-demuxer-simple2-lavf-ts b/tests/ref/fate/concat-demuxer-simple2-lavf-ts
new file mode 100644
index 00000000..fb2713cd
--- /dev/null
+++ b/tests/ref/fate/concat-demuxer-simple2-lavf-ts
@@ -0,0 +1,151 @@
+video|1|982|0.010911|-2618|-0.029089|3600|0.040000|N/A|N/A|24801|564|K
+video|1|4582|0.050911|982|0.010911|3600|0.040000|N/A|N/A|16429|27072|_
+video|1|8182|0.090911|4582|0.050911|3600|0.040000|N/A|N/A|14508|44932|_
+video|1|11782|0.130911|8182|0.090911|3600|0.040000|N/A|N/A|12622|60536|_
+video|1|15382|0.170911|11782|0.130911|3600|0.040000|N/A|N/A|13393|74260|_
+video|1|18982|0.210911|15382|0.170911|3600|0.040000|N/A|N/A|13092|88924|_
+video|1|22582|0.250911|18982|0.210911|3600|0.040000|N/A|N/A|12755|102836|_
+video|1|26182|0.290911|22582|0.250911|3600|0.040000|N/A|N/A|12023|116748|_
+audio|0|0|0.000000|0|0.000000|2351|0.026122|N/A|N/A|208|159988|K
+audio|0|2351|0.026122|2351|0.026122|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|4702|0.052244|4702|0.052244|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|7053|0.078367|7053|0.078367|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|9404|0.104489|9404|0.104489|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|11755|0.130611|11755|0.130611|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|14106|0.156733|14106|0.156733|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|16457|0.182856|16457|0.182856|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|18808|0.208978|18808|0.208978|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|21159|0.235100|21159|0.235100|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|23510|0.261222|23510|0.261222|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|25861|0.287344|25861|0.287344|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|28212|0.313467|28212|0.313467|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|30563|0.339589|30563|0.339589|2351|0.026122|N/A|N/A|209|N/A|K
+video|1|29782|0.330911|26182|0.290911|3600|0.040000|N/A|N/A|14098|130096|_
+video|1|33382|0.370911|29782|0.330911|3600|0.040000|N/A|N/A|13329|145324|_
+video|1|36982|0.410911|33382|0.370911|3600|0.040000|N/A|N/A|12135|162996|_
+video|1|40582|0.450911|36982|0.410911|3600|0.040000|N/A|N/A|12282|176344|_
+video|1|44182|0.490911|40582|0.450911|3600|0.040000|N/A|N/A|24786|189692|K
+video|1|47782|0.530911|44182|0.490911|3600|0.040000|N/A|N/A|17440|216388|_
+video|1|51382|0.570911|47782|0.530911|3600|0.040000|N/A|N/A|15019|235000|_
+video|1|54982|0.610911|51382|0.570911|3600|0.040000|N/A|N/A|13449|251356|_
+video|1|58582|0.650911|54982|0.610911|3600|0.040000|N/A|N/A|12398|266020|_
+video|1|62182|0.690911|58582|0.650911|3600|0.040000|N/A|N/A|13455|279744|_
+audio|0|32915|0.365722|32915|0.365722|2351|0.026122|N/A|N/A|209|322608|K
+audio|0|35266|0.391844|35266|0.391844|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|37617|0.417967|37617|0.417967|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|39968|0.444089|39968|0.444089|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|42319|0.470211|42319|0.470211|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|44670|0.496333|44670|0.496333|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|47021|0.522456|47021|0.522456|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|49372|0.548578|49372|0.548578|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|51723|0.574700|51723|0.574700|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|54074|0.600822|54074|0.600822|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|56425|0.626944|56425|0.626944|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|58776|0.653067|58776|0.653067|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|61127|0.679189|61127|0.679189|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|63478|0.705311|63478|0.705311|2351|0.026122|N/A|N/A|209|N/A|K
+video|1|65782|0.730911|62182|0.690911|3600|0.040000|N/A|N/A|13836|294408|_
+video|1|69382|0.770911|65782|0.730911|3600|0.040000|N/A|N/A|12163|309448|_
+video|1|72982|0.810911|69382|0.770911|3600|0.040000|N/A|N/A|12692|325992|_
+video|1|76582|0.850911|72982|0.810911|3600|0.040000|N/A|N/A|10824|339528|_
+video|1|80182|0.890911|76582|0.850911|3600|0.040000|N/A|N/A|11286|351372|_
+audio|0|65829|0.731433|65829|0.731433|2351|0.026122|N/A|N/A|209|404576|K
+audio|0|68180|0.757556|68180|0.757556|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|70531|0.783678|70531|0.783678|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|72882|0.809800|72882|0.809800|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|75233|0.835922|75233|0.835922|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|77584|0.862044|77584|0.862044|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|79935|0.888167|79935|0.888167|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|82286|0.914289|82286|0.914289|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|84637|0.940411|84637|0.940411|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|86988|0.966533|86988|0.966533|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|89339|0.992656|89339|0.992656|2351|0.026122|N/A|N/A|209|N/A|K
+video|1|83782|0.930911|80182|0.890911|3600|0.040000|N/A|N/A|12678|363592|_
+video|1|87382|0.970911|83782|0.930911|3600|0.040000|N/A|N/A|24711|377880|K
+video|1|91964|1.021822|88364|0.981822|3600|0.040000|N/A|N/A|24801|564|K
+video|1|95564|1.061822|91964|1.021822|3600|0.040000|N/A|N/A|16429|27072|_
+video|1|99164|1.101822|95564|1.061822|3600|0.040000|N/A|N/A|14508|44932|_
+video|1|102764|1.141822|99164|1.101822|3600|0.040000|N/A|N/A|12622|60536|_
+video|1|106364|1.181822|102764|1.141822|3600|0.040000|N/A|N/A|13393|74260|_
+video|1|109964|1.221822|106364|1.181822|3600|0.040000|N/A|N/A|13092|88924|_
+video|1|113564|1.261822|109964|1.221822|3600|0.040000|N/A|N/A|12755|102836|_
+video|1|117164|1.301822|113564|1.261822|3600|0.040000|N/A|N/A|12023|116748|_
+audio|0|90982|1.010911|90982|1.010911|2351|0.026122|N/A|N/A|208|159988|K
+audio|0|93333|1.037033|93333|1.037033|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|95684|1.063156|95684|1.063156|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|98035|1.089278|98035|1.089278|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|100386|1.115400|100386|1.115400|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|102737|1.141522|102737|1.141522|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|105088|1.167644|105088|1.167644|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|107439|1.193767|107439|1.193767|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|109790|1.219889|109790|1.219889|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|112141|1.246011|112141|1.246011|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|114492|1.272133|114492|1.272133|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|116843|1.298256|116843|1.298256|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|119194|1.324378|119194|1.324378|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|121545|1.350500|121545|1.350500|2351|0.026122|N/A|N/A|209|N/A|K
+video|1|120764|1.341822|117164|1.301822|3600|0.040000|N/A|N/A|14098|130096|_
+video|1|124364|1.381822|120764|1.341822|3600|0.040000|N/A|N/A|13329|145324|_
+video|1|127964|1.421822|124364|1.381822|3600|0.040000|N/A|N/A|12135|162996|_
+video|1|131564|1.461822|127964|1.421822|3600|0.040000|N/A|N/A|12282|176344|_
+video|1|135164|1.501822|131564|1.461822|3600|0.040000|N/A|N/A|24786|189692|K
+video|1|138764|1.541822|135164|1.501822|3600|0.040000|N/A|N/A|17440|216388|_
+video|1|142364|1.581822|138764|1.541822|3600|0.040000|N/A|N/A|15019|235000|_
+video|1|145964|1.621822|142364|1.581822|3600|0.040000|N/A|N/A|13449|251356|_
+video|1|149564|1.661822|145964|1.621822|3600|0.040000|N/A|N/A|12398|266020|_
+video|1|153164|1.701822|149564|1.661822|3600|0.040000|N/A|N/A|13455|279744|_
+audio|0|123897|1.376633|123897|1.376633|2351|0.026122|N/A|N/A|209|322608|K
+audio|0|126248|1.402756|126248|1.402756|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|128599|1.428878|128599|1.428878|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|130950|1.455000|130950|1.455000|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|133301|1.481122|133301|1.481122|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|135652|1.507244|135652|1.507244|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|138003|1.533367|138003|1.533367|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|140354|1.559489|140354|1.559489|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|142705|1.585611|142705|1.585611|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|145056|1.611733|145056|1.611733|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|147407|1.637856|147407|1.637856|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|149758|1.663978|149758|1.663978|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|152109|1.690100|152109|1.690100|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|154460|1.716222|154460|1.716222|2351|0.026122|N/A|N/A|209|N/A|K
+video|1|156764|1.741822|153164|1.701822|3600|0.040000|N/A|N/A|13836|294408|_
+video|1|160364|1.781822|156764|1.741822|3600|0.040000|N/A|N/A|12163|309448|_
+video|1|163964|1.821822|160364|1.781822|3600|0.040000|N/A|N/A|12692|325992|_
+video|1|167564|1.861822|163964|1.821822|3600|0.040000|N/A|N/A|10824|339528|_
+video|1|171164|1.901822|167564|1.861822|3600|0.040000|N/A|N/A|11286|351372|_
+audio|0|156811|1.742344|156811|1.742344|2351|0.026122|N/A|N/A|209|404576|K
+audio|0|159162|1.768467|159162|1.768467|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|161513|1.794589|161513|1.794589|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|163864|1.820711|163864|1.820711|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|166215|1.846833|166215|1.846833|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|168566|1.872956|168566|1.872956|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|170917|1.899078|170917|1.899078|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|173268|1.925200|173268|1.925200|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|175619|1.951322|175619|1.951322|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|177970|1.977444|177970|1.977444|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|180321|2.003567|180321|2.003567|2351|0.026122|N/A|N/A|209|N/A|K
+video|1|174764|1.941822|171164|1.901822|3600|0.040000|N/A|N/A|12678|363592|_
+video|1|178364|1.981822|174764|1.941822|3600|0.040000|N/A|N/A|24711|377880|K
+video|1|139582|1.550911|135982|1.510911|3600|0.040000|N/A|N/A|12692|325992|_
+video|1|143182|1.590911|139582|1.550911|3600|0.040000|N/A|N/A|10824|339528|_
+video|1|146782|1.630911|143182|1.590911|3600|0.040000|N/A|N/A|11286|351372|_
+audio|0|132429|1.471433|132429|1.471433|2351|0.026122|N/A|N/A|209|404576|K
+audio|0|134780|1.497556|134780|1.497556|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|137131|1.523678|137131|1.523678|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|139482|1.549800|139482|1.549800|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|141833|1.575922|141833|1.575922|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|144184|1.602044|144184|1.602044|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|146535|1.628167|146535|1.628167|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|148886|1.654289|148886|1.654289|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|151237|1.680411|151237|1.680411|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|153588|1.706533|153588|1.706533|2351|0.026122|N/A|N/A|209|N/A|K
+audio|0|155939|1.732656|155939|1.732656|2351|0.026122|N/A|N/A|209|N/A|K
+video|1|150382|1.670911|146782|1.630911|3600|0.040000|N/A|N/A|12678|363592|_
+video|1|153982|1.710911|150382|1.670911|3600|0.040000|N/A|N/A|24711|377880|K
+video|1|161182|1.790911|157582|1.750911|3600|0.040000|N/A|N/A|12135|162996|_
+video|1|164782|1.830911|161182|1.790911|3600|0.040000|N/A|N/A|12282|176344|_
+video|1|168382|1.870911|164782|1.830911|3600|0.040000|N/A|N/A|24786|189692|K
+video|1|171982|1.910911|168382|1.870911|3600|0.040000|N/A|N/A|17440|216388|_
+video|1|175582|1.950911|171982|1.910911|3600|0.040000|N/A|N/A|15019|235000|_
+0|mp2|unknown|audio|1/44100|[3][0][0][0]|0x0003|s16p|44100|1|mono|0|N/A|0/0|0/0|1/90000|0|0.000000|N/A|N/A|64000|N/A|N/A|N/A|N/A|89|0|0|0|0|0|0|0|0|0|0|0
+1|mpeg2video|4|video|1/50|[2][0][0][0]|0x0002|352|288|0|0|1|1:1|11:9|yuv420p|8|tv|unknown|unknown|unknown|left|00:00:00:00|1|N/A|25/1|25/1|1/90000|N/A|N/A|N/A|N/A|N/A|104857200|N/A|N/A|N/A|60|0|0|0|0|0|0|0|0|0|0|0
diff --git a/tests/ref/fate/d-eavp6-demux b/tests/ref/fate/d-eavp6-demux
new file mode 100644
index 00000000..3587a083
--- /dev/null
+++ b/tests/ref/fate/d-eavp6-demux
@@ -0,0 +1,98 @@
+#tb 0: 32767/982027
+#tb 1: 32767/982027
+0,          0,          0,        1,     1860, 0xbd548c4c
+1,          0,          0,        1,     1748, 0x96046284
+0,          1,          1,        1,     1044, 0x814efc86, F=0x0
+1,          1,          1,        1,      204, 0x0ea1573a, F=0x0
+0,          2,          2,        1,     1036, 0xf672f905, F=0x0
+1,          2,          2,        1,      240, 0x634e7448, F=0x0
+0,          3,          3,        1,     1048, 0xe0f80ee7, F=0x0
+1,          3,          3,        1,      260, 0xc35b8521, F=0x0
+0,          4,          4,        1,     1072, 0x980918e9, F=0x0
+1,          4,          4,        1,      376, 0x6e5cb85e, F=0x0
+0,          5,          5,        1,     1052, 0x73e6fd33, F=0x0
+1,          5,          5,        1,      344, 0xaacdad6b, F=0x0
+0,          6,          6,        1,     1056, 0x5242fb20, F=0x0
+1,          6,          6,        1,      404, 0x7498be1f, F=0x0
+0,          7,          7,        1,     1092, 0x8b7111c2, F=0x0
+1,          7,          7,        1,      368, 0xe2b8afd2, F=0x0
+0,          8,          8,        1,     1144, 0xc1003410, F=0x0
+1,          8,          8,        1,      412, 0x3615c893, F=0x0
+0,          9,          9,        1,     1152, 0x6b9234f9, F=0x0
+1,          9,          9,        1,      424, 0x04a5cdb7, F=0x0
+0,         10,         10,        1,     1132, 0x7d45384b, F=0x0
+1,         10,         10,        1,      356, 0x4ad5a9d5, F=0x0
+0,         11,         11,        1,     1164, 0x47d637a1, F=0x0
+1,         11,         11,        1,      448, 0x2811d959, F=0x0
+0,         12,         12,        1,     1152, 0xede932ad, F=0x0
+1,         12,         12,        1,      316, 0x911a9c11, F=0x0
+0,         13,         13,        1,     1112, 0x49f31a9e, F=0x0
+1,         13,         13,        1,      312, 0x1bb08de2, F=0x0
+0,         14,         14,        1,     1112, 0x7f022bc7, F=0x0
+1,         14,         14,        1,      308, 0x2c3698bb, F=0x0
+0,         15,         15,        1,     1128, 0x2a7a4381, F=0x0
+1,         15,         15,        1,      424, 0xec77c694, F=0x0
+0,         16,         16,        1,     1012, 0x22a3f64b, F=0x0
+1,         16,         16,        1,      220, 0x7506677f, F=0x0
+0,         17,         17,        1,     1012, 0x0ea3f03b, F=0x0
+1,         17,         17,        1,      204, 0xbf3f607e, F=0x0
+0,         18,         18,        1,     1072, 0xdf860cc2, F=0x0
+1,         18,         18,        1,      416, 0x5f08ca69, F=0x0
+0,         19,         19,        1,     1052, 0x29a9116a, F=0x0
+1,         19,         19,        1,      244, 0x8d1a7c05, F=0x0
+0,         20,         20,        1,     1048, 0xfeb1107d, F=0x0
+1,         20,         20,        1,      260, 0xd0b27b40, F=0x0
+0,         21,         21,        1,     1084, 0xeed50a32, F=0x0
+1,         21,         21,        1,      304, 0x5e5e8f10, F=0x0
+0,         22,         22,        1,      992, 0xabd4e695, F=0x0
+1,         22,         22,        1,      304, 0x2c839490, F=0x0
+0,         23,         23,        1,     1016, 0x7396e5a4, F=0x0
+1,         23,         23,        1,      264, 0x5bac855a, F=0x0
+0,         24,         24,        1,     1000, 0x1d91ef45, F=0x0
+1,         24,         24,        1,      220, 0xcda465a8, F=0x0
+0,         25,         25,        1,      960, 0x1c99da31, F=0x0
+1,         25,         25,        1,      280, 0xc1d08783, F=0x0
+0,         26,         26,        1,      948, 0x48a4c938, F=0x0
+1,         26,         26,        1,      440, 0xe2ebd578, F=0x0
+0,         27,         27,        1,      952, 0xea12d081, F=0x0
+1,         27,         27,        1,      188, 0xe7b15222, F=0x0
+0,         28,         28,        1,      944, 0x9a83dce3, F=0x0
+1,         28,         28,        1,      208, 0xc3f05b2a, F=0x0
+0,         29,         29,        1,      916, 0xacdaba99, F=0x0
+1,         29,         29,        1,      248, 0x95f47262, F=0x0
+0,         30,         30,        1,     1408, 0x8b86c584
+1,         30,         30,        1,      452, 0x2444db2a
+0,         31,         31,        1,      848, 0xdaed99bd, F=0x0
+1,         31,         31,        1,      224, 0x1edf679b, F=0x0
+0,         32,         32,        1,      836, 0x2b0ba278, F=0x0
+1,         32,         32,        1,      256, 0x9fc079ff, F=0x0
+0,         33,         33,        1,      804, 0xa9268780, F=0x0
+1,         33,         33,        1,      240, 0x1ef76c13, F=0x0
+0,         34,         34,        1,      744, 0xb48f68fc, F=0x0
+1,         34,         34,        1,      216, 0x51e56846, F=0x0
+0,         35,         35,        1,      608, 0x538734c4, F=0x0
+1,         35,         35,        1,      220, 0x09b26d72, F=0x0
+0,         36,         36,        1,      604, 0x003520de, F=0x0
+1,         36,         36,        1,      212, 0xa124688a, F=0x0
+0,         37,         37,        1,      756, 0xc375752e, F=0x0
+1,         37,         37,        1,      288, 0x76bb8cec, F=0x0
+0,         38,         38,        1,      820, 0xd9529ca1, F=0x0
+1,         38,         38,        1,      336, 0xc15ca7a3, F=0x0
+0,         39,         39,        1,      900, 0xe447be51, F=0x0
+1,         39,         39,        1,      308, 0x1b92950e, F=0x0
+0,         40,         40,        1,      936, 0x17dcc60d, F=0x0
+1,         40,         40,        1,      300, 0x0f608f70, F=0x0
+0,         41,         41,        1,      952, 0xd5d8e11d, F=0x0
+1,         41,         41,        1,      424, 0x2fb3c6a6, F=0x0
+0,         42,         42,        1,      988, 0x4596e71c, F=0x0
+1,         42,         42,        1,      488, 0x3cb2ead4, F=0x0
+0,         43,         43,        1,     1040, 0x46300cfa, F=0x0
+1,         43,         43,        1,      452, 0x431dd5b1, F=0x0
+0,         44,         44,        1,     1040, 0xc8ad0ac0, F=0x0
+1,         44,         44,        1,      428, 0x8162c607, F=0x0
+0,         45,         45,        1,     1116, 0x9b4319da, F=0x0
+1,         45,         45,        1,      428, 0xe430de88, F=0x0
+0,         46,         46,        1,     1184, 0x016d38b6, F=0x0
+1,         46,         46,        1,      424, 0x6baecd21, F=0x0
+0,         47,         47,        1,     1188, 0xe885478b, F=0x0
+1,         47,         47,        1,      392, 0xc001c8e9, F=0x0
diff --git a/tests/ref/fate/dca-xll b/tests/ref/fate/dca-xll
new file mode 100644
index 00000000..75b5453a
--- /dev/null
+++ b/tests/ref/fate/dca-xll
@@ -0,0 +1 @@
+5eb9a95ddaf3c803e74443a49a691686
diff --git a/tests/ref/fate/dca-xll_51_16_192_768_0 b/tests/ref/fate/dca-xll_51_16_192_768_0
new file mode 100644
index 00000000..0e22d5d2
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_16_192_768_0
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/192000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     2048,    24576, 91ff0dac5df86e798bfef5e573536b08
+0,       2048,       2048,     2048,    24576, 91ff0dac5df86e798bfef5e573536b08
+0,       4096,       4096,     2048,    24576, 97e888b4de888608d08d193aecd2bd6b
+0,       6144,       6144,     2048,    24576, 96d20b0b657183ee88a5ec7d919d5313
+0,       8192,       8192,     2048,    24576, d6a98e7cea8e9b397e89f178aa719b19
+0,      10240,      10240,     2048,    24576, fb3432fe46696579220baa1c49e7dcd5
diff --git a/tests/ref/fate/dca-xll_51_16_192_768_0-dmix_2 b/tests/ref/fate/dca-xll_51_16_192_768_0-dmix_2
new file mode 100644
index 00000000..0e22d5d2
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_16_192_768_0-dmix_2
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/192000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     2048,    24576, 91ff0dac5df86e798bfef5e573536b08
+0,       2048,       2048,     2048,    24576, 91ff0dac5df86e798bfef5e573536b08
+0,       4096,       4096,     2048,    24576, 97e888b4de888608d08d193aecd2bd6b
+0,       6144,       6144,     2048,    24576, 96d20b0b657183ee88a5ec7d919d5313
+0,       8192,       8192,     2048,    24576, d6a98e7cea8e9b397e89f178aa719b19
+0,      10240,      10240,     2048,    24576, fb3432fe46696579220baa1c49e7dcd5
diff --git a/tests/ref/fate/dca-xll_51_16_192_768_0-dmix_6 b/tests/ref/fate/dca-xll_51_16_192_768_0-dmix_6
new file mode 100644
index 00000000..0e22d5d2
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_16_192_768_0-dmix_6
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/192000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     2048,    24576, 91ff0dac5df86e798bfef5e573536b08
+0,       2048,       2048,     2048,    24576, 91ff0dac5df86e798bfef5e573536b08
+0,       4096,       4096,     2048,    24576, 97e888b4de888608d08d193aecd2bd6b
+0,       6144,       6144,     2048,    24576, 96d20b0b657183ee88a5ec7d919d5313
+0,       8192,       8192,     2048,    24576, d6a98e7cea8e9b397e89f178aa719b19
+0,      10240,      10240,     2048,    24576, fb3432fe46696579220baa1c49e7dcd5
diff --git a/tests/ref/fate/dca-xll_51_16_192_768_1 b/tests/ref/fate/dca-xll_51_16_192_768_1
new file mode 100644
index 00000000..0e22d5d2
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_16_192_768_1
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/192000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     2048,    24576, 91ff0dac5df86e798bfef5e573536b08
+0,       2048,       2048,     2048,    24576, 91ff0dac5df86e798bfef5e573536b08
+0,       4096,       4096,     2048,    24576, 97e888b4de888608d08d193aecd2bd6b
+0,       6144,       6144,     2048,    24576, 96d20b0b657183ee88a5ec7d919d5313
+0,       8192,       8192,     2048,    24576, d6a98e7cea8e9b397e89f178aa719b19
+0,      10240,      10240,     2048,    24576, fb3432fe46696579220baa1c49e7dcd5
diff --git a/tests/ref/fate/dca-xll_51_16_192_768_1-dmix_2 b/tests/ref/fate/dca-xll_51_16_192_768_1-dmix_2
new file mode 100644
index 00000000..d97ad2f3
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_16_192_768_1-dmix_2
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/192000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     2048,     8192, 0829f71740aab1ab98b33eae21dee122
+0,       2048,       2048,     2048,     8192, c8ca1cff44674809d464ec39cf1bd1e9
+0,       4096,       4096,     2048,     8192, d67d26915ca86554568aac685c9a6dc3
+0,       6144,       6144,     2048,     8192, 8fdf69fdac9985ac4f9470a7b8e8529d
+0,       8192,       8192,     2048,     8192, dc8a9ca39b38c98147f2308f985ff648
+0,      10240,      10240,     2048,     8192, ea13b97373762ab16d0f664013fdc962
diff --git a/tests/ref/fate/dca-xll_51_16_192_768_1-dmix_6 b/tests/ref/fate/dca-xll_51_16_192_768_1-dmix_6
new file mode 100644
index 00000000..0e22d5d2
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_16_192_768_1-dmix_6
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/192000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     2048,    24576, 91ff0dac5df86e798bfef5e573536b08
+0,       2048,       2048,     2048,    24576, 91ff0dac5df86e798bfef5e573536b08
+0,       4096,       4096,     2048,    24576, 97e888b4de888608d08d193aecd2bd6b
+0,       6144,       6144,     2048,    24576, 96d20b0b657183ee88a5ec7d919d5313
+0,       8192,       8192,     2048,    24576, d6a98e7cea8e9b397e89f178aa719b19
+0,      10240,      10240,     2048,    24576, fb3432fe46696579220baa1c49e7dcd5
diff --git a/tests/ref/fate/dca-xll_51_24_48_768 b/tests/ref/fate/dca-xll_51_24_48_768
new file mode 100644
index 00000000..47a62ece
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_24_48_768
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,     9216, 13a95890b5f0947d6f058ca9c30a3e01
+0,        512,        512,      512,     9216, 13a95890b5f0947d6f058ca9c30a3e01
+0,       1024,       1024,      512,     9216, 96ca6b6987c99d2f23bcd4754b257e8a
+0,       1536,       1536,      512,     9216, 67cf8fd7f6bead8a8225758e33068963
+0,       2048,       2048,      512,     9216, 8760d8a994cf173c1d6d818da419ea1d
+0,       2560,       2560,      512,     9216, 981ef3eef27d72d6c6425c7e6ad78da0
diff --git a/tests/ref/fate/dca-xll_51_24_48_768-dmix_2 b/tests/ref/fate/dca-xll_51_24_48_768-dmix_2
new file mode 100644
index 00000000..1a736cf1
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_24_48_768-dmix_2
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,     3072, d2a70550489de356a2cd6bfc40711204
+0,        512,        512,      512,     3072, d2a70550489de356a2cd6bfc40711204
+0,       1024,       1024,      512,     3072, 0ff2c55663b5113c2edf6ee9cc6b82b7
+0,       1536,       1536,      512,     3072, ceb660aaf1632ac2bd11a6d86f1dd54d
+0,       2048,       2048,      512,     3072, 4b136de05d39ed227e7ce566c12d1258
+0,       2560,       2560,      512,     3072, 9cd674b794c426e6db221bae008a1cf0
diff --git a/tests/ref/fate/dca-xll_51_24_48_768-dmix_6 b/tests/ref/fate/dca-xll_51_24_48_768-dmix_6
new file mode 100644
index 00000000..47a62ece
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_24_48_768-dmix_6
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,     9216, 13a95890b5f0947d6f058ca9c30a3e01
+0,        512,        512,      512,     9216, 13a95890b5f0947d6f058ca9c30a3e01
+0,       1024,       1024,      512,     9216, 96ca6b6987c99d2f23bcd4754b257e8a
+0,       1536,       1536,      512,     9216, 67cf8fd7f6bead8a8225758e33068963
+0,       2048,       2048,      512,     9216, 8760d8a994cf173c1d6d818da419ea1d
+0,       2560,       2560,      512,     9216, 981ef3eef27d72d6c6425c7e6ad78da0
diff --git a/tests/ref/fate/dca-xll_51_24_48_none b/tests/ref/fate/dca-xll_51_24_48_none
new file mode 100644
index 00000000..dffc587c
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_24_48_none
@@ -0,0 +1,8 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     1024,    18432, f9debe3f07be68533bf0295e3d2ba68a
+0,       1024,       1024,     1024,    18432, 6707daa7724fdc552869e522a7936f26
+0,       2048,       2048,     1024,    18432, be4cc5d54a49870a83edba83f21a7fb5
diff --git a/tests/ref/fate/dca-xll_51_24_48_none-dmix_2 b/tests/ref/fate/dca-xll_51_24_48_none-dmix_2
new file mode 100644
index 00000000..dffc587c
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_24_48_none-dmix_2
@@ -0,0 +1,8 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     1024,    18432, f9debe3f07be68533bf0295e3d2ba68a
+0,       1024,       1024,     1024,    18432, 6707daa7724fdc552869e522a7936f26
+0,       2048,       2048,     1024,    18432, be4cc5d54a49870a83edba83f21a7fb5
diff --git a/tests/ref/fate/dca-xll_51_24_48_none-dmix_6 b/tests/ref/fate/dca-xll_51_24_48_none-dmix_6
new file mode 100644
index 00000000..dffc587c
--- /dev/null
+++ b/tests/ref/fate/dca-xll_51_24_48_none-dmix_6
@@ -0,0 +1,8 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     1024,    18432, f9debe3f07be68533bf0295e3d2ba68a
+0,       1024,       1024,     1024,    18432, 6707daa7724fdc552869e522a7936f26
+0,       2048,       2048,     1024,    18432, be4cc5d54a49870a83edba83f21a7fb5
diff --git a/tests/ref/fate/dca-xll_71_24_48_768_0 b/tests/ref/fate/dca-xll_71_24_48_768_0
new file mode 100644
index 00000000..c122c6a9
--- /dev/null
+++ b/tests/ref/fate/dca-xll_71_24_48_768_0
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,    12288, ca9f8c8eb1b9b311cb79999fa376c7f0
+0,        512,        512,      512,    12288, 4072783b8efb99a9e5817067d68f61c6
+0,       1024,       1024,      512,    12288, b37a89420643e09e4c92a6a8b68efb73
+0,       1536,       1536,      512,    12288, 40d58b703681c7b673da1198e9555280
+0,       2048,       2048,      512,    12288, 3f0e22e71e7419256487ef9392abb102
+0,       2560,       2560,      512,    12288, 6a48d9a5e0c74d93cf678e6140a196f7
diff --git a/tests/ref/fate/dca-xll_71_24_48_768_0-dmix_2 b/tests/ref/fate/dca-xll_71_24_48_768_0-dmix_2
new file mode 100644
index 00000000..d653af8a
--- /dev/null
+++ b/tests/ref/fate/dca-xll_71_24_48_768_0-dmix_2
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,     9216, a2b724b146069938f0e2cb82490dea54
+0,        512,        512,      512,     9216, 13a95890b5f0947d6f058ca9c30a3e01
+0,       1024,       1024,      512,     9216, f38ce0585a816744ecb2120503347c11
+0,       1536,       1536,      512,     9216, 4d535bed6e022a780ecbe90f303985cf
+0,       2048,       2048,      512,     9216, 438f3a2fd98d1037e1ffbe696900d85c
+0,       2560,       2560,      512,     9216, 3ed1193e013d8ef43f502c7d4037a3f9
diff --git a/tests/ref/fate/dca-xll_71_24_48_768_0-dmix_6 b/tests/ref/fate/dca-xll_71_24_48_768_0-dmix_6
new file mode 100644
index 00000000..d653af8a
--- /dev/null
+++ b/tests/ref/fate/dca-xll_71_24_48_768_0-dmix_6
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,     9216, a2b724b146069938f0e2cb82490dea54
+0,        512,        512,      512,     9216, 13a95890b5f0947d6f058ca9c30a3e01
+0,       1024,       1024,      512,     9216, f38ce0585a816744ecb2120503347c11
+0,       1536,       1536,      512,     9216, 4d535bed6e022a780ecbe90f303985cf
+0,       2048,       2048,      512,     9216, 438f3a2fd98d1037e1ffbe696900d85c
+0,       2560,       2560,      512,     9216, 3ed1193e013d8ef43f502c7d4037a3f9
diff --git a/tests/ref/fate/dca-xll_71_24_48_768_1 b/tests/ref/fate/dca-xll_71_24_48_768_1
new file mode 100644
index 00000000..c122c6a9
--- /dev/null
+++ b/tests/ref/fate/dca-xll_71_24_48_768_1
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,    12288, ca9f8c8eb1b9b311cb79999fa376c7f0
+0,        512,        512,      512,    12288, 4072783b8efb99a9e5817067d68f61c6
+0,       1024,       1024,      512,    12288, b37a89420643e09e4c92a6a8b68efb73
+0,       1536,       1536,      512,    12288, 40d58b703681c7b673da1198e9555280
+0,       2048,       2048,      512,    12288, 3f0e22e71e7419256487ef9392abb102
+0,       2560,       2560,      512,    12288, 6a48d9a5e0c74d93cf678e6140a196f7
diff --git a/tests/ref/fate/dca-xll_71_24_48_768_1-dmix_2 b/tests/ref/fate/dca-xll_71_24_48_768_1-dmix_2
new file mode 100644
index 00000000..58c381c3
--- /dev/null
+++ b/tests/ref/fate/dca-xll_71_24_48_768_1-dmix_2
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,     3072, d2a70550489de356a2cd6bfc40711204
+0,        512,        512,      512,     3072, d2a70550489de356a2cd6bfc40711204
+0,       1024,       1024,      512,     3072, 4961cb5a954b57d157f075e051032389
+0,       1536,       1536,      512,     3072, b2ac42e29deb0bb6095550ec59a1da84
+0,       2048,       2048,      512,     3072, d30bd752b75ead1b8f62202fe8b1d692
+0,       2560,       2560,      512,     3072, ff7562359eca2706580050d849ca2de4
diff --git a/tests/ref/fate/dca-xll_71_24_48_768_1-dmix_6 b/tests/ref/fate/dca-xll_71_24_48_768_1-dmix_6
new file mode 100644
index 00000000..d61048c7
--- /dev/null
+++ b/tests/ref/fate/dca-xll_71_24_48_768_1-dmix_6
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,     9216, a2b724b146069938f0e2cb82490dea54
+0,        512,        512,      512,     9216, 13a95890b5f0947d6f058ca9c30a3e01
+0,       1024,       1024,      512,     9216, 13ee6aa61ed484c3d64c9d6546e296bb
+0,       1536,       1536,      512,     9216, 4db0762920d0bb3ecd765168e2acd296
+0,       2048,       2048,      512,     9216, 6253047a99cbc0e6b1d484b7828466b6
+0,       2560,       2560,      512,     9216, 2977e1080514c6d2150ed2fde267e714
diff --git a/tests/ref/fate/dca-xll_71_24_96_768 b/tests/ref/fate/dca-xll_71_24_96_768
new file mode 100644
index 00000000..72da5265
--- /dev/null
+++ b/tests/ref/fate/dca-xll_71_24_96_768
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/96000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     1024,    24576, 0b24a527d66f2b0cab97f37e4cd79987
+0,       1024,       1024,     1024,    24576, 91ff0dac5df86e798bfef5e573536b08
+0,       2048,       2048,     1024,    24576, c2b5e663fa260abc9c737d16110052a8
+0,       3072,       3072,     1024,    24576, 81f474e17d875f7c5a2cbaadef4f7c0a
+0,       4096,       4096,     1024,    24576, 959200e06f2f8489b9e36c74b073edae
+0,       5120,       5120,     1024,    24576, ef1ed14d39cce6ba1ab9736831aa1b30
diff --git a/tests/ref/fate/dca-xll_71_24_96_768-dmix_2 b/tests/ref/fate/dca-xll_71_24_96_768-dmix_2
new file mode 100644
index 00000000..b24c7178
--- /dev/null
+++ b/tests/ref/fate/dca-xll_71_24_96_768-dmix_2
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/96000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     1024,    18432, 0a675f172b0e1a171c46dfaa4f1d0f00
+0,       1024,       1024,     1024,    18432, f9debe3f07be68533bf0295e3d2ba68a
+0,       2048,       2048,     1024,    18432, 0a6efad6cc824656594d5d7abce1e547
+0,       3072,       3072,     1024,    18432, a1beb9f5afcf72b99671a7cef8c35505
+0,       4096,       4096,     1024,    18432, e73de9b6688ec463178f694540f71d61
+0,       5120,       5120,     1024,    18432, e2e326c2f61d28ad11cea665ea638ae4
diff --git a/tests/ref/fate/dca-xll_71_24_96_768-dmix_6 b/tests/ref/fate/dca-xll_71_24_96_768-dmix_6
new file mode 100644
index 00000000..b24c7178
--- /dev/null
+++ b/tests/ref/fate/dca-xll_71_24_96_768-dmix_6
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/96000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     1024,    18432, 0a675f172b0e1a171c46dfaa4f1d0f00
+0,       1024,       1024,     1024,    18432, f9debe3f07be68533bf0295e3d2ba68a
+0,       2048,       2048,     1024,    18432, 0a6efad6cc824656594d5d7abce1e547
+0,       3072,       3072,     1024,    18432, a1beb9f5afcf72b99671a7cef8c35505
+0,       4096,       4096,     1024,    18432, e73de9b6688ec463178f694540f71d61
+0,       5120,       5120,     1024,    18432, e2e326c2f61d28ad11cea665ea638ae4
diff --git a/tests/ref/fate/dca-xll_x96_51_24_96_1509 b/tests/ref/fate/dca-xll_x96_51_24_96_1509
new file mode 100644
index 00000000..150ab25f
--- /dev/null
+++ b/tests/ref/fate/dca-xll_x96_51_24_96_1509
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/96000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     1024,    18432, f9debe3f07be68533bf0295e3d2ba68a
+0,       1024,       1024,     1024,    18432, f9debe3f07be68533bf0295e3d2ba68a
+0,       2048,       2048,     1024,    18432, f68686e3f6dec5aa00fc8ef495c3a3fa
+0,       3072,       3072,     1024,    18432, 03adab4e2a4a5e9917d29de09e0afb22
+0,       4096,       4096,     1024,    18432, d7d1ab1efd7f2ff45f7374b41abaec51
+0,       5120,       5120,     1024,    18432, fe2aa09319ac1d7017e29fdab6207f27
diff --git a/tests/ref/fate/dca-xll_x96_51_24_96_1509-dmix_2 b/tests/ref/fate/dca-xll_x96_51_24_96_1509-dmix_2
new file mode 100644
index 00000000..150ab25f
--- /dev/null
+++ b/tests/ref/fate/dca-xll_x96_51_24_96_1509-dmix_2
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/96000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     1024,    18432, f9debe3f07be68533bf0295e3d2ba68a
+0,       1024,       1024,     1024,    18432, f9debe3f07be68533bf0295e3d2ba68a
+0,       2048,       2048,     1024,    18432, f68686e3f6dec5aa00fc8ef495c3a3fa
+0,       3072,       3072,     1024,    18432, 03adab4e2a4a5e9917d29de09e0afb22
+0,       4096,       4096,     1024,    18432, d7d1ab1efd7f2ff45f7374b41abaec51
+0,       5120,       5120,     1024,    18432, fe2aa09319ac1d7017e29fdab6207f27
diff --git a/tests/ref/fate/dca-xll_x96_51_24_96_1509-dmix_6 b/tests/ref/fate/dca-xll_x96_51_24_96_1509-dmix_6
new file mode 100644
index 00000000..150ab25f
--- /dev/null
+++ b/tests/ref/fate/dca-xll_x96_51_24_96_1509-dmix_6
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/96000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     1024,    18432, f9debe3f07be68533bf0295e3d2ba68a
+0,       1024,       1024,     1024,    18432, f9debe3f07be68533bf0295e3d2ba68a
+0,       2048,       2048,     1024,    18432, f68686e3f6dec5aa00fc8ef495c3a3fa
+0,       3072,       3072,     1024,    18432, 03adab4e2a4a5e9917d29de09e0afb22
+0,       4096,       4096,     1024,    18432, d7d1ab1efd7f2ff45f7374b41abaec51
+0,       5120,       5120,     1024,    18432, fe2aa09319ac1d7017e29fdab6207f27
diff --git a/tests/ref/fate/dca-xll_xch_61_24_48_768 b/tests/ref/fate/dca-xll_xch_61_24_48_768
new file mode 100644
index 00000000..211aee1b
--- /dev/null
+++ b/tests/ref/fate/dca-xll_xch_61_24_48_768
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,    10752, c3c5b236c266a9090378def1ad497a21
+0,        512,        512,      512,    10752, 36eb6749f8d9ce9f94860dcc447253ac
+0,       1024,       1024,      512,    10752, 586dbf42917c8353a3d3a65d4a510aa5
+0,       1536,       1536,      512,    10752, 08aa4874b4e092982cef8bd9623ebcaf
+0,       2048,       2048,      512,    10752, 809775c772fd730bbbb516cadc71406b
+0,       2560,       2560,      512,    10752, 81ef75c310c242b38787cd18203aa8c6
diff --git a/tests/ref/fate/dca-xll_xch_61_24_48_768-dmix_2 b/tests/ref/fate/dca-xll_xch_61_24_48_768-dmix_2
new file mode 100644
index 00000000..9309d8db
--- /dev/null
+++ b/tests/ref/fate/dca-xll_xch_61_24_48_768-dmix_2
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,     9216, 652c4e61f9abe9fba9de792242e2d31d
+0,        512,        512,      512,     9216, 13a95890b5f0947d6f058ca9c30a3e01
+0,       1024,       1024,      512,     9216, bc9a102c0d879d25b5b362e20eaf277e
+0,       1536,       1536,      512,     9216, 06c77edb9dbd3aa927ffec3e5cf78266
+0,       2048,       2048,      512,     9216, f4010c574400a0876aa29a71f1bf1af3
+0,       2560,       2560,      512,     9216, b6d34a134c79bf105d88ecc4ac148f92
diff --git a/tests/ref/fate/dca-xll_xch_61_24_48_768-dmix_6 b/tests/ref/fate/dca-xll_xch_61_24_48_768-dmix_6
new file mode 100644
index 00000000..9309d8db
--- /dev/null
+++ b/tests/ref/fate/dca-xll_xch_61_24_48_768-dmix_6
@@ -0,0 +1,11 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/48000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      512,     9216, 652c4e61f9abe9fba9de792242e2d31d
+0,        512,        512,      512,     9216, 13a95890b5f0947d6f058ca9c30a3e01
+0,       1024,       1024,      512,     9216, bc9a102c0d879d25b5b362e20eaf277e
+0,       1536,       1536,      512,     9216, 06c77edb9dbd3aa927ffec3e5cf78266
+0,       2048,       2048,      512,     9216, f4010c574400a0876aa29a71f1bf1af3
+0,       2560,       2560,      512,     9216, b6d34a134c79bf105d88ecc4ac148f92
diff --git a/tests/ref/fate/dcinema-encode b/tests/ref/fate/dcinema-encode
index 8aeb2152..93ac1720 100644
--- a/tests/ref/fate/dcinema-encode
+++ b/tests/ref/fate/dcinema-encode
@@ -1 +1,25 @@
-MD5=2d7c6897c315493647db159f4bfd6edc
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/96000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,      341,     4092, 697cddfcd0e21f24782af0705b7048f3
+0,        341,        341,      341,     4092, a057b18cd493923fed33c18578f61e0b
+0,        682,        682,      341,     4092, f4eacfd888566040067b8e5ce7d276c6
+0,       1023,       1023,      341,     4092, 4de78d332ce2047014880a110c160dc2
+0,       1364,       1364,      341,     4092, 138ee3fc206538feca6de3d6d62d08eb
+0,       1705,       1705,      341,     4092, 187a2f2998aa1c0ba0130c57dd1d6c86
+0,       2046,       2046,      341,     4092, 54e6c3db8a5f8c09b47f025659a36b17
+0,       2387,       2387,      341,     4092, 8abfdf44a24c158429c71e01cee31e20
+0,       2728,       2728,      341,     4092, bd08f5018edc5dc4520739e913ed89a3
+0,       3069,       3069,      341,     4092, 9f60ba4275646344e4a9b3c647efffe9
+0,       3410,       3410,      341,     4092, 00cefc1f27230cdd06ecd43132e16327
+0,       3751,       3751,      341,     4092, d4d13047cd639ed722a4ae1bc1f06991
+0,       4092,       4092,      341,     4092, 16b227e4f968c11cba279506f00d5172
+0,       4433,       4433,      341,     4092, 70f4046f709fdd4d80e2f2ffc862f21a
+0,       4774,       4774,      341,     4092, adbef4b4ef728f0c2a31b4b0baba50a0
+0,       5115,       5115,      341,     4092, fe009b6cc96b9d1098dcc5fba0e6b3fa
+0,       5456,       5456,      341,     4092, 4462b2f1654c9b31fdd7ab04ffb84192
+0,       5797,       5797,      341,     4092, 2e96ba3bd13de03f9cfdc2b8c3ea0620
+0,       6138,       6138,      341,     4092, 395c920f10cce6670029a98095eba027
+0,       6479,       6479,      341,     4092, 28d4c2f6364f31cb61f4aa144badc734
diff --git a/tests/ref/fate/dds-argb b/tests/ref/fate/dds-argb
new file mode 100644
index 00000000..fbde4242
--- /dev/null
+++ b/tests/ref/fate/dds-argb
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    56320, 0xfcaa920b
diff --git a/tests/ref/fate/dds-argb-aexp b/tests/ref/fate/dds-argb-aexp
new file mode 100644
index 00000000..f52410f9
--- /dev/null
+++ b/tests/ref/fate/dds-argb-aexp
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xfdd37c43
diff --git a/tests/ref/fate/dds-dx10-bc1 b/tests/ref/fate/dds-dx10-bc1
new file mode 100644
index 00000000..84efd9f5
--- /dev/null
+++ b/tests/ref/fate/dds-dx10-bc1
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xed2f850f
diff --git a/tests/ref/fate/dds-dx10-bc1a b/tests/ref/fate/dds-dx10-bc1a
new file mode 100644
index 00000000..8cb4589e
--- /dev/null
+++ b/tests/ref/fate/dds-dx10-bc1a
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x473b484b
diff --git a/tests/ref/fate/dds-dx10-bc2 b/tests/ref/fate/dds-dx10-bc2
new file mode 100644
index 00000000..a25180de
--- /dev/null
+++ b/tests/ref/fate/dds-dx10-bc2
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x2bce3baa
diff --git a/tests/ref/fate/dds-dx10-bc3 b/tests/ref/fate/dds-dx10-bc3
new file mode 100644
index 00000000..296c9452
--- /dev/null
+++ b/tests/ref/fate/dds-dx10-bc3
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xb91d3c62
diff --git a/tests/ref/fate/dds-dx10-bc4 b/tests/ref/fate/dds-dx10-bc4
new file mode 100644
index 00000000..4583d258
--- /dev/null
+++ b/tests/ref/fate/dds-dx10-bc4
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xfdfd1751
diff --git a/tests/ref/fate/dds-dx10-bc5 b/tests/ref/fate/dds-dx10-bc5
new file mode 100644
index 00000000..bf595eb4
--- /dev/null
+++ b/tests/ref/fate/dds-dx10-bc5
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x79bd207e
diff --git a/tests/ref/fate/dds-dxt1 b/tests/ref/fate/dds-dxt1
new file mode 100644
index 00000000..24774069
--- /dev/null
+++ b/tests/ref/fate/dds-dxt1
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    32768, 0xf52adc80
diff --git a/tests/ref/fate/dds-dxt1-normalmap b/tests/ref/fate/dds-dxt1-normalmap
new file mode 100644
index 00000000..3f77547c
--- /dev/null
+++ b/tests/ref/fate/dds-dxt1-normalmap
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x2b411855
diff --git a/tests/ref/fate/dds-dxt1a b/tests/ref/fate/dds-dxt1a
new file mode 100644
index 00000000..05be314b
--- /dev/null
+++ b/tests/ref/fate/dds-dxt1a
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xcabb50ea
diff --git a/tests/ref/fate/dds-dxt2 b/tests/ref/fate/dds-dxt2
new file mode 100644
index 00000000..40bcf6eb
--- /dev/null
+++ b/tests/ref/fate/dds-dxt2
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x11cebeb0
diff --git a/tests/ref/fate/dds-dxt3 b/tests/ref/fate/dds-dxt3
new file mode 100644
index 00000000..41401dae
--- /dev/null
+++ b/tests/ref/fate/dds-dxt3
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x729b74ba
diff --git a/tests/ref/fate/dds-dxt4 b/tests/ref/fate/dds-dxt4
new file mode 100644
index 00000000..dc5d73f5
--- /dev/null
+++ b/tests/ref/fate/dds-dxt4
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x31aaacd6
diff --git a/tests/ref/fate/dds-dxt5 b/tests/ref/fate/dds-dxt5
new file mode 100644
index 00000000..399d6d95
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xfd3166aa
diff --git a/tests/ref/fate/dds-dxt5-aexp b/tests/ref/fate/dds-dxt5-aexp
new file mode 100644
index 00000000..ff91afde
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-aexp
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x3c987914
diff --git a/tests/ref/fate/dds-dxt5-normalmap b/tests/ref/fate/dds-dxt5-normalmap
new file mode 100644
index 00000000..680d1b7f
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-normalmap
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xd29b1ea1
diff --git a/tests/ref/fate/dds-dxt5-normalmap-ati b/tests/ref/fate/dds-dxt5-normalmap-ati
new file mode 100644
index 00000000..7f088f75
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-normalmap-ati
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x54cb212f
diff --git a/tests/ref/fate/dds-dxt5-rbxg b/tests/ref/fate/dds-dxt5-rbxg
new file mode 100644
index 00000000..7dbecf42
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-rbxg
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x37588d12
diff --git a/tests/ref/fate/dds-dxt5-rgxb b/tests/ref/fate/dds-dxt5-rgxb
new file mode 100644
index 00000000..137913d8
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-rgxb
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xfb6b91cf
diff --git a/tests/ref/fate/dds-dxt5-rxbg b/tests/ref/fate/dds-dxt5-rxbg
new file mode 100644
index 00000000..b6e12d3d
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-rxbg
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xc6cb8cbe
diff --git a/tests/ref/fate/dds-dxt5-rxgb b/tests/ref/fate/dds-dxt5-rxgb
new file mode 100644
index 00000000..a4190d54
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-rxgb
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xb67d936a
diff --git a/tests/ref/fate/dds-dxt5-xgbr b/tests/ref/fate/dds-dxt5-xgbr
new file mode 100644
index 00000000..95404f22
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-xgbr
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xb0e69293
diff --git a/tests/ref/fate/dds-dxt5-xgxr b/tests/ref/fate/dds-dxt5-xgxr
new file mode 100644
index 00000000..3ed73c39
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-xgxr
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xa93de1ad
diff --git a/tests/ref/fate/dds-dxt5-xrbg b/tests/ref/fate/dds-dxt5-xrbg
new file mode 100644
index 00000000..f6479554
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-xrbg
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xf20d8b69
diff --git a/tests/ref/fate/dds-dxt5-ycocg b/tests/ref/fate/dds-dxt5-ycocg
new file mode 100644
index 00000000..1dd8bef8
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-ycocg
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x0d0c8d20
diff --git a/tests/ref/fate/dds-dxt5-ycocg-scaled b/tests/ref/fate/dds-dxt5-ycocg-scaled
new file mode 100644
index 00000000..5a70b490
--- /dev/null
+++ b/tests/ref/fate/dds-dxt5-ycocg-scaled
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x5a089973
diff --git a/tests/ref/fate/dds-pal b/tests/ref/fate/dds-pal
new file mode 100644
index 00000000..575a4bd5
--- /dev/null
+++ b/tests/ref/fate/dds-pal
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    65536, 0x4287e5cd
diff --git a/tests/ref/fate/dds-pal-ati b/tests/ref/fate/dds-pal-ati
new file mode 100644
index 00000000..6de8adb4
--- /dev/null
+++ b/tests/ref/fate/dds-pal-ati
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x6ac18060
diff --git a/tests/ref/fate/dds-rgb16 b/tests/ref/fate/dds-rgb16
new file mode 100644
index 00000000..40a9938f
--- /dev/null
+++ b/tests/ref/fate/dds-rgb16
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xc8910265
diff --git a/tests/ref/fate/dds-rgb24 b/tests/ref/fate/dds-rgb24
new file mode 100644
index 00000000..3eeb1eb0
--- /dev/null
+++ b/tests/ref/fate/dds-rgb24
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    24576, 0xef0f9653
diff --git a/tests/ref/fate/dds-rgba b/tests/ref/fate/dds-rgba
new file mode 100644
index 00000000..803b40a7
--- /dev/null
+++ b/tests/ref/fate/dds-rgba
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    25600, 0xa2a47b2f
diff --git a/tests/ref/fate/dds-rgtc1s b/tests/ref/fate/dds-rgtc1s
new file mode 100644
index 00000000..5085962e
--- /dev/null
+++ b/tests/ref/fate/dds-rgtc1s
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xcda652a4
diff --git a/tests/ref/fate/dds-rgtc1u b/tests/ref/fate/dds-rgtc1u
new file mode 100644
index 00000000..b7cff66e
--- /dev/null
+++ b/tests/ref/fate/dds-rgtc1u
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x629a1b44
diff --git a/tests/ref/fate/dds-rgtc2s b/tests/ref/fate/dds-rgtc2s
new file mode 100644
index 00000000..72276020
--- /dev/null
+++ b/tests/ref/fate/dds-rgtc2s
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0xa0135ca8
diff --git a/tests/ref/fate/dds-rgtc2u b/tests/ref/fate/dds-rgtc2u
new file mode 100644
index 00000000..0b6678bb
--- /dev/null
+++ b/tests/ref/fate/dds-rgtc2u
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x38cb221a
diff --git a/tests/ref/fate/dds-rgtc2u-xy b/tests/ref/fate/dds-rgtc2u-xy
new file mode 100644
index 00000000..bf595eb4
--- /dev/null
+++ b/tests/ref/fate/dds-rgtc2u-xy
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x79bd207e
diff --git a/tests/ref/fate/dds-uyvy b/tests/ref/fate/dds-uyvy
new file mode 100644
index 00000000..2bcbaa04
--- /dev/null
+++ b/tests/ref/fate/dds-uyvy
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,     8192, 0x3c658750
diff --git a/tests/ref/fate/dds-xbgr b/tests/ref/fate/dds-xbgr
new file mode 100644
index 00000000..eb7a0246
--- /dev/null
+++ b/tests/ref/fate/dds-xbgr
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x70336a28
diff --git a/tests/ref/fate/dds-xrgb b/tests/ref/fate/dds-xrgb
new file mode 100644
index 00000000..63e5225f
--- /dev/null
+++ b/tests/ref/fate/dds-xrgb
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x702f6a28
diff --git a/tests/ref/fate/dds-y b/tests/ref/fate/dds-y
new file mode 100644
index 00000000..ae62091b
--- /dev/null
+++ b/tests/ref/fate/dds-y
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,     8192, 0x56eca90f
diff --git a/tests/ref/fate/dds-ya b/tests/ref/fate/dds-ya
new file mode 100644
index 00000000..d6527c63
--- /dev/null
+++ b/tests/ref/fate/dds-ya
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    28160, 0xd3981fcb
diff --git a/tests/ref/fate/dds-ycocg b/tests/ref/fate/dds-ycocg
new file mode 100644
index 00000000..f3dd727c
--- /dev/null
+++ b/tests/ref/fate/dds-ycocg
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,    16384, 0x4cb08d0a
diff --git a/tests/ref/fate/dds-yuyv b/tests/ref/fate/dds-yuyv
new file mode 100644
index 00000000..e65c9507
--- /dev/null
+++ b/tests/ref/fate/dds-yuyv
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,     8192, 0xc1108750
diff --git a/tests/ref/fate/dnxhd-mbaff b/tests/ref/fate/dnxhd-mbaff
new file mode 100644
index 00000000..171c244d
--- /dev/null
+++ b/tests/ref/fate/dnxhd-mbaff
@@ -0,0 +1,2 @@
+#tb 0: 1001/30000
+0,          0,          0,        1,  6220800, 0xe78198c0
diff --git a/tests/ref/fate/dnxhr-444 b/tests/ref/fate/dnxhr-444
new file mode 100644
index 00000000..f9e73c31
--- /dev/null
+++ b/tests/ref/fate/dnxhr-444
@@ -0,0 +1,2 @@
+#tb 0: 1/24
+0,          0,          0,        1,  9665280, 0x19ef4057
diff --git a/tests/ref/fate/dxv-dxt1 b/tests/ref/fate/dxv-dxt1
new file mode 100644
index 00000000..128dabae
--- /dev/null
+++ b/tests/ref/fate/dxv-dxt1
@@ -0,0 +1,2 @@
+#tb 0: 1001/30000
+0,          0,          0,        1,  8294400, 0x0797cd53
diff --git a/tests/ref/fate/dxv-dxt5 b/tests/ref/fate/dxv-dxt5
new file mode 100644
index 00000000..128dabae
--- /dev/null
+++ b/tests/ref/fate/dxv-dxt5
@@ -0,0 +1,2 @@
+#tb 0: 1001/30000
+0,          0,          0,        1,  8294400, 0x0797cd53
diff --git a/tests/ref/fate/dxv3-dxt1 b/tests/ref/fate/dxv3-dxt1
new file mode 100644
index 00000000..e0f1f941
--- /dev/null
+++ b/tests/ref/fate/dxv3-dxt1
@@ -0,0 +1,2 @@
+#tb 0: 1001/30000
+0,          0,          0,        1,  8294400, 0x98bbcc85
diff --git a/tests/ref/fate/dxv3-dxt5 b/tests/ref/fate/dxv3-dxt5
new file mode 100644
index 00000000..128dabae
--- /dev/null
+++ b/tests/ref/fate/dxv3-dxt5
@@ -0,0 +1,2 @@
+#tb 0: 1001/30000
+0,          0,          0,        1,  8294400, 0x0797cd53
diff --git a/tests/ref/fate/ffmpeg-filter_colorkey b/tests/ref/fate/ffmpeg-filter_colorkey
new file mode 100644
index 00000000..0e4780b0
--- /dev/null
+++ b/tests/ref/fate/ffmpeg-filter_colorkey
@@ -0,0 +1,16 @@
+#tb 0: 1/25
+#tb 1: 1/48000
+0,          0,          0,        1,   622080, 0x4e30accb
+1,          0,          0,     1152,     4608, 0x00000000
+1,       1152,       1152,     1152,     4608, 0xbca29063
+0,          1,          1,        1,   622080, 0x7d941c14
+1,       2304,       2304,     1152,     4608, 0x6e70df10
+1,       3456,       3456,     1152,     4608, 0x95e6a535
+0,          2,          2,        1,   622080, 0xf7451c5b
+0,          3,          3,        1,   622080, 0xb2c74319
+0,          4,          4,        1,   622080, 0xc9b80b79
+0,          5,          5,        1,   622080, 0x92ce1194
+0,          6,          6,        1,   622080, 0x43ae99ac
+0,          7,          7,        1,   622080, 0x4ec3a554
+0,          8,          8,        1,   622080, 0x3200250c
+0,          9,          9,        1,   622080, 0x94ebb3f3
diff --git a/tests/ref/fate/fifo b/tests/ref/fate/fifo
index 18a5691f..162d754b 100644
--- a/tests/ref/fate/fifo
+++ b/tests/ref/fate/fifo
@@ -24,4 +24,31 @@
 11: 11
 12: 12
 
+0: 0
+1: 1
+2: 2
+3: 3
+4: 4
+5: 5
+6: 6
+7: 7
+8: 8
+9: 9
+10: 10
+11: 11
+12: 12
+
 0 1 2 3 4 5 6 7 8 9 10 11 12
+0: 0
+1: 1
+2: 2
+3: 3
+4: 4
+5: 5
+6: 6
+7: 7
+8: 8
+9: 9
+10: 10
+11: 11
+12: 12
diff --git a/tests/ref/fate/filter-codecview-mvs b/tests/ref/fate/filter-codecview-mvs
index e2404f3f..13286f0f 100644
--- a/tests/ref/fate/filter-codecview-mvs
+++ b/tests/ref/fate/filter-codecview-mvs
@@ -39,23 +39,23 @@
 0,         37,         37,        1,   276480, 0x5ce39368
 0,         38,         38,        1,   276480, 0x4ec1e418
 0,         39,         39,        1,   276480, 0x23c418ae
-0,         40,         40,        1,   276480, 0x499c55d6
-0,         41,         41,        1,   276480, 0x166ef020
-0,         42,         42,        1,   276480, 0xaa0614ab
-0,         43,         43,        1,   276480, 0x8bc2fa2b
-0,         44,         44,        1,   276480, 0xc9c873f7
-0,         45,         45,        1,   276480, 0x99838153
-0,         46,         46,        1,   276480, 0x32e5f45b
+0,         40,         40,        1,   276480, 0x036a5515
+0,         41,         41,        1,   276480, 0x7946efbd
+0,         42,         42,        1,   276480, 0xd9aa1382
+0,         43,         43,        1,   276480, 0x3863f9c8
+0,         44,         44,        1,   276480, 0x33e47330
+0,         45,         45,        1,   276480, 0xff6e8038
+0,         46,         46,        1,   276480, 0xed3ff087
 0,         47,         47,        1,   276480, 0xe7834514
-0,         48,         48,        1,   276480, 0x454c99c8
-0,         49,         49,        1,   276480, 0xe29bacc8
-0,         50,         50,        1,   276480, 0x6b79c3d3
-0,         51,         51,        1,   276480, 0x284d358e
-0,         52,         52,        1,   276480, 0x17552cd4
+0,         48,         48,        1,   276480, 0x4d5d909d
+0,         49,         49,        1,   276480, 0x82eea962
+0,         50,         50,        1,   276480, 0x8075bca3
+0,         51,         51,        1,   276480, 0xd5dc3185
+0,         52,         52,        1,   276480, 0x859e0490
 0,         53,         53,        1,   276480, 0x6ceebf3e
-0,         54,         54,        1,   276480, 0x7ac8de3c
-0,         55,         55,        1,   276480, 0x14d6768c
-0,         56,         56,        1,   276480, 0x59891e5f
-0,         57,         57,        1,   276480, 0xed3053ea
-0,         58,         58,        1,   276480, 0x9b0182c3
-0,         59,         59,        1,   276480, 0xf849eb88
+0,         54,         54,        1,   276480, 0xada5d62d
+0,         55,         55,        1,   276480, 0x991a7628
+0,         56,         56,        1,   276480, 0xe169042a
+0,         57,         57,        1,   276480, 0x226e52c4
+0,         58,         58,        1,   276480, 0xa3fe775c
+0,         59,         59,        1,   276480, 0x6b80e99f
diff --git a/tests/ref/fate/filter-delogo b/tests/ref/fate/filter-delogo
index 80342ee8..bf2b7513 100644
--- a/tests/ref/fate/filter-delogo
+++ b/tests/ref/fate/filter-delogo
@@ -1,110 +1,110 @@
 #tb 0: 32768/982057
-0,          0,          0,        1,   126720, 0x77a5ebed
-0,          1,          1,        1,   126720, 0x4cc1ec8b
-0,          2,          2,        1,   126720, 0x4cc1ec8b
-0,          3,          3,        1,   126720, 0x4cc1ec8b
-0,          4,          4,        1,   126720, 0x0981eccd
-0,          5,          5,        1,   126720, 0x04fef463
-0,          6,          6,        1,   126720, 0x3dd3f4a3
-0,          7,          7,        1,   126720, 0xab74f483
-0,          8,          8,        1,   126720, 0x5ed7b7db
-0,          9,          9,        1,   126720, 0xd1fcb89b
-0,         10,         10,        1,   126720, 0xcb4eb8db
-0,         11,         11,        1,   126720, 0xdcc5b8a7
-0,         12,         12,        1,   126720, 0x33ffb90b
-0,         13,         13,        1,   126720, 0xb38fbb7b
-0,         14,         14,        1,   126720, 0x11f0bbfb
-0,         15,         15,        1,   126720, 0xe8f3bb87
-0,         16,         16,        1,   126720, 0xf8ecb8eb
-0,         17,         17,        1,   126720, 0x5db2ae48
-0,         18,         18,        1,   126720, 0x4e7999a6
-0,         19,         19,        1,   126720, 0xdb84a7a6
-0,         20,         20,        1,   126720, 0x9c4fba45
-0,         21,         21,        1,   126720, 0xe635a858
-0,         22,         22,        1,   126720, 0xd4eeab35
-0,         23,         23,        1,   126720, 0xc416aa56
-0,         24,         24,        1,   126720, 0x4c7ebca5
-0,         25,         25,        1,   126720, 0x2887a70e
-0,         26,         26,        1,   126720, 0xc978eaf1
-0,         27,         27,        1,   126720, 0x8a29b563
-0,         28,         28,        1,   126720, 0x275a0352
-0,         29,         29,        1,   126720, 0x446484bb
-0,         30,         30,        1,   126720, 0xdbe00151
-0,         31,         31,        1,   126720, 0x5874b9aa
-0,         32,         32,        1,   126720, 0xdeb30460
-0,         33,         33,        1,   126720, 0xc6d2d62a
-0,         34,         34,        1,   126720, 0x9270dbc7
-0,         35,         35,        1,   126720, 0x30e4ad59
-0,         36,         36,        1,   126720, 0x117479cd
-0,         37,         37,        1,   126720, 0x0567c5d2
-0,         38,         38,        1,   126720, 0x87c8b4a5
-0,         39,         39,        1,   126720, 0xe5c5e0d1
-0,         40,         40,        1,   126720, 0x78d61e3f
-0,         41,         41,        1,   126720, 0xda8d787f
-0,         42,         42,        1,   126720, 0xf32547f7
-0,         43,         43,        1,   126720, 0x70bc8b60
-0,         44,         44,        1,   126720, 0x3ad09927
-0,         45,         45,        1,   126720, 0x5d9607d6
-0,         46,         46,        1,   126720, 0x348a0e02
-0,         47,         47,        1,   126720, 0x7d21255c
-0,         48,         48,        1,   126720, 0x308ed32b
-0,         49,         49,        1,   126720, 0x79fbe734
-0,         50,         50,        1,   126720, 0xdc5de409
-0,         51,         51,        1,   126720, 0x4ee00283
-0,         52,         52,        1,   126720, 0x2697ea2e
-0,         53,         53,        1,   126720, 0x0885edeb
-0,         54,         54,        1,   126720, 0xc041f0d8
-0,         55,         55,        1,   126720, 0xa893272e
-0,         56,         56,        1,   126720, 0x55419d4e
-0,         57,         57,        1,   126720, 0xbc47dbb5
-0,         58,         58,        1,   126720, 0x9666d60b
-0,         59,         59,        1,   126720, 0xac5c054a
-0,         60,         60,        1,   126720, 0x4affb780
-0,         61,         61,        1,   126720, 0x2b7349eb
-0,         62,         62,        1,   126720, 0x75592d02
-0,         63,         63,        1,   126720, 0xdb904a83
-0,         64,         64,        1,   126720, 0xf85e2f93
-0,         65,         65,        1,   126720, 0x632f8be8
-0,         66,         66,        1,   126720, 0x96108ce4
-0,         67,         67,        1,   126720, 0xb68e816b
-0,         68,         68,        1,   126720, 0x89ca112f
-0,         69,         69,        1,   126720, 0x4bed40d3
-0,         70,         70,        1,   126720, 0xe4cb9b12
-0,         71,         71,        1,   126720, 0xa4f164ec
-0,         72,         72,        1,   126720, 0xd1aa2554
-0,         73,         73,        1,   126720, 0x0277aa01
-0,         74,         74,        1,   126720, 0x8ea280fd
-0,         75,         75,        1,   126720, 0xbae64170
-0,         76,         76,        1,   126720, 0xaf9b543b
-0,         77,         77,        1,   126720, 0x1b31680a
-0,         78,         78,        1,   126720, 0x7da4671e
-0,         79,         79,        1,   126720, 0x82b791cb
-0,         80,         80,        1,   126720, 0xd2fff6bb
-0,         81,         81,        1,   126720, 0x2395a793
-0,         82,         82,        1,   126720, 0x66586185
-0,         83,         83,        1,   126720, 0x99c55c63
-0,         84,         84,        1,   126720, 0x7e3f403e
-0,         85,         85,        1,   126720, 0x9eda5b9a
-0,         86,         86,        1,   126720, 0x27469047
-0,         87,         87,        1,   126720, 0xaa5b870e
-0,         88,         88,        1,   126720, 0x70423b2a
-0,         89,         89,        1,   126720, 0x70d86c0a
-0,         90,         90,        1,   126720, 0x4bd065f3
-0,         91,         91,        1,   126720, 0xd71f66bb
-0,         92,         92,        1,   126720, 0x5333e081
-0,         93,         93,        1,   126720, 0xdf0b28d6
-0,         94,         94,        1,   126720, 0x6c48fa53
-0,         95,         95,        1,   126720, 0x9438712d
-0,         96,         96,        1,   126720, 0x9910e3ec
-0,         97,         97,        1,   126720, 0xb0ea80dd
-0,         98,         98,        1,   126720, 0x71983e67
-0,         99,         99,        1,   126720, 0x18924fe6
-0,        100,        100,        1,   126720, 0x9ca014b9
-0,        101,        101,        1,   126720, 0x45f013a0
-0,        102,        102,        1,   126720, 0xf697e8a9
-0,        103,        103,        1,   126720, 0x214a626a
-0,        104,        104,        1,   126720, 0xb2873fb5
-0,        105,        105,        1,   126720, 0xfb47bc52
-0,        106,        106,        1,   126720, 0x63b7a708
-0,        107,        107,        1,   126720, 0x1904ad40
-0,        108,        108,        1,   126720, 0x80015b91
+0,          0,          0,        1,   126720, 0xcefaec47
+0,          1,          1,        1,   126720, 0xa416ece5
+0,          2,          2,        1,   126720, 0xa416ece5
+0,          3,          3,        1,   126720, 0xa416ece5
+0,          4,          4,        1,   126720, 0x60d6ed27
+0,          5,          5,        1,   126720, 0x259af497
+0,          6,          6,        1,   126720, 0x5e6ff4d7
+0,          7,          7,        1,   126720, 0xcc10f4b7
+0,          8,          8,        1,   126720, 0x2811b819
+0,          9,          9,        1,   126720, 0x9b36b8d9
+0,         10,         10,        1,   126720, 0x9488b919
+0,         11,         11,        1,   126720, 0x316cb902
+0,         12,         12,        1,   126720, 0xfd2ab949
+0,         13,         13,        1,   126720, 0x7cc9bbb9
+0,         14,         14,        1,   126720, 0xdb1bbc39
+0,         15,         15,        1,   126720, 0x3d9abbe2
+0,         16,         16,        1,   126720, 0xc226b929
+0,         17,         17,        1,   126720, 0x3623ae2e
+0,         18,         18,        1,   126720, 0x8aff9a0d
+0,         19,         19,        1,   126720, 0x7c85a832
+0,         20,         20,        1,   126720, 0xbe2fba7e
+0,         21,         21,        1,   126720, 0xb4eea89c
+0,         22,         22,        1,   126720, 0x2ce0ab91
+0,         23,         23,        1,   126720, 0x5808aace
+0,         24,         24,        1,   126720, 0x6e5ebcde
+0,         25,         25,        1,   126720, 0x9202a769
+0,         26,         26,        1,   126720, 0x7df4eed2
+0,         27,         27,        1,   126720, 0x4763b8e7
+0,         28,         28,        1,   126720, 0x06970809
+0,         29,         29,        1,   126720, 0xf30189d0
+0,         30,         30,        1,   126720, 0x4a6b05c1
+0,         31,         31,        1,   126720, 0x5caebf3d
+0,         32,         32,        1,   126720, 0x640d08d4
+0,         33,         33,        1,   126720, 0x4b72d969
+0,         34,         34,        1,   126720, 0xbfe8df9d
+0,         35,         35,        1,   126720, 0xfc26b0f5
+0,         36,         36,        1,   126720, 0x142b7c58
+0,         37,         37,        1,   126720, 0x1b5dba4c
+0,         38,         38,        1,   126720, 0x5fcea883
+0,         39,         39,        1,   126720, 0xd9f7d365
+0,         40,         40,        1,   126720, 0xb8a300aa
+0,         41,         41,        1,   126720, 0x675650a6
+0,         42,         42,        1,   126720, 0xf06d2016
+0,         43,         43,        1,   126720, 0x4ee56425
+0,         44,         44,        1,   126720, 0x98ec6723
+0,         45,         45,        1,   126720, 0x80d4cb5b
+0,         46,         46,        1,   126720, 0x8d1cd091
+0,         47,         47,        1,   126720, 0xaca8e9fe
+0,         48,         48,        1,   126720, 0x1ae18b52
+0,         49,         49,        1,   126720, 0xe53d997a
+0,         50,         50,        1,   126720, 0xcb4b8ff3
+0,         51,         51,        1,   126720, 0x9682b249
+0,         52,         52,        1,   126720, 0xf4e19918
+0,         53,         53,        1,   126720, 0x28849c20
+0,         54,         54,        1,   126720, 0xe4b89dda
+0,         55,         55,        1,   126720, 0xe981d407
+0,         56,         56,        1,   126720, 0x73ad4998
+0,         57,         57,        1,   126720, 0x346387bd
+0,         58,         58,        1,   126720, 0xa07c822f
+0,         59,         59,        1,   126720, 0xd911b08b
+0,         60,         60,        1,   126720, 0x7a1b6161
+0,         61,         61,        1,   126720, 0xa451f33f
+0,         62,         62,        1,   126720, 0xb9aed79d
+0,         63,         63,        1,   126720, 0x50c6f4e5
+0,         64,         64,        1,   126720, 0x6ed9d8e3
+0,         65,         65,        1,   126720, 0xea6d352a
+0,         66,         66,        1,   126720, 0xb8f4373b
+0,         67,         67,        1,   126720, 0xf5a52e4c
+0,         68,         68,        1,   126720, 0xf8dbbd01
+0,         69,         69,        1,   126720, 0x7395ec3c
+0,         70,         70,        1,   126720, 0x1249470b
+0,         71,         71,        1,   126720, 0xf12a1105
+0,         72,         72,        1,   126720, 0xef38d2ba
+0,         73,         73,        1,   126720, 0xa1325a34
+0,         74,         74,        1,   126720, 0xc38232b1
+0,         75,         75,        1,   126720, 0x08f9f498
+0,         76,         76,        1,   126720, 0xf6f208c5
+0,         77,         77,        1,   126720, 0x694b1fed
+0,         78,         78,        1,   126720, 0xac811fc2
+0,         79,         79,        1,   126720, 0x2f2b4903
+0,         80,         80,        1,   126720, 0x4a4aad01
+0,         81,         81,        1,   126720, 0xce4557e3
+0,         82,         82,        1,   126720, 0x257a1145
+0,         83,         83,        1,   126720, 0x94580be2
+0,         84,         84,        1,   126720, 0x2422ee8a
+0,         85,         85,        1,   126720, 0x801905b7
+0,         86,         86,        1,   126720, 0x4de639ba
+0,         87,         87,        1,   126720, 0x24d22fa8
+0,         88,         88,        1,   126720, 0xbb38e309
+0,         89,         89,        1,   126720, 0xb8ec150d
+0,         90,         90,        1,   126720, 0xc9fb0eb7
+0,         91,         91,        1,   126720, 0xb6e40f48
+0,         92,         92,        1,   126720, 0xd7028922
+0,         93,         93,        1,   126720, 0x3a24d332
+0,         94,         94,        1,   126720, 0x06b2a598
+0,         95,         95,        1,   126720, 0xf2041a92
+0,         96,         96,        1,   126720, 0xc58a8d10
+0,         97,         97,        1,   126720, 0x0d99293d
+0,         98,         98,        1,   126720, 0x7b72e768
+0,         99,         99,        1,   126720, 0x0c62f900
+0,        100,        100,        1,   126720, 0xcdb7bc31
+0,        101,        101,        1,   126720, 0x04d3bdc2
+0,        102,        102,        1,   126720, 0x0355928e
+0,        103,        103,        1,   126720, 0x84310d32
+0,        104,        104,        1,   126720, 0xc202ebe6
+0,        105,        105,        1,   126720, 0x0cf2703a
+0,        106,        106,        1,   126720, 0xe4c95979
+0,        107,        107,        1,   126720, 0x4f846144
+0,        108,        108,        1,   126720, 0x3f1a17a4
diff --git a/tests/ref/fate/filter-formats b/tests/ref/fate/filter-formats
new file mode 100644
index 00000000..4c303d86
--- /dev/null
+++ b/tests/ref/fate/filter-formats
@@ -0,0 +1,85 @@
+mono
+2 channels (FC+LFE)
+stereo
+2.1
+3.0
+3.1
+3 channels (FC+BL+BR)
+4 channels (FC+LFE+BL+BR)
+quad
+5 channels (FL+FR+LFE+BL+BR)
+5.0
+5.1
+2 channels (FC+BC)
+3 channels (FC+LFE+BC)
+3.0(back)
+4 channels (FL+FR+LFE+BC)
+4.0
+4.1
+3 channels (FC+SL+SR)
+4 channels (FC+LFE+SL+SR)
+quad(side)
+5 channels (FL+FR+LFE+SL+SR)
+5.0(side)
+5.1(side)
+5 channels (FC+BL+BR+SL+SR)
+6 channels (FC+LFE+BL+BR+SL+SR)
+6 channels (FL+FR+BL+BR+SL+SR)
+7 channels (FL+FR+LFE+BL+BR+SL+SR)
+7.0
+7.1
+4 channels (FC+BC+SL+SR)
+5 channels (FC+LFE+BC+SL+SR)
+5 channels (FL+FR+BC+SL+SR)
+6 channels (FL+FR+LFE+BC+SL+SR)
+6.0
+6.1
+3 channels (FC+DL+DR)
+4 channels (FC+LFE+DL+DR)
+4 channels (FL+FR+DL+DR)
+5 channels (FL+FR+LFE+DL+DR)
+5 channels (FL+FR+FC+DL+DR)
+6 channels (FL+FR+FC+LFE+DL+DR)
+5 channels (FC+BL+BR+DL+DR)
+6 channels (FC+LFE+BL+BR+DL+DR)
+6 channels (FL+FR+BL+BR+DL+DR)
+7 channels (FL+FR+LFE+BL+BR+DL+DR)
+7 channels (FL+FR+FC+BL+BR+DL+DR)
+8 channels (FL+FR+FC+LFE+BL+BR+DL+DR)
+4 channels (FC+BC+DL+DR)
+5 channels (FC+LFE+BC+DL+DR)
+5 channels (FL+FR+BC+DL+DR)
+6 channels (FL+FR+LFE+BC+DL+DR)
+6 channels (FL+FR+FC+BC+DL+DR)
+7 channels (FL+FR+FC+LFE+BC+DL+DR)
+5 channels (FC+SL+SR+DL+DR)
+6 channels (FC+LFE+SL+SR+DL+DR)
+6 channels (FL+FR+SL+SR+DL+DR)
+7 channels (FL+FR+LFE+SL+SR+DL+DR)
+7 channels (FL+FR+FC+SL+SR+DL+DR)
+8 channels (FL+FR+FC+LFE+SL+SR+DL+DR)
+7 channels (FC+BL+BR+SL+SR+DL+DR)
+8 channels (FC+LFE+BL+BR+SL+SR+DL+DR)
+8 channels (FL+FR+BL+BR+SL+SR+DL+DR)
+6 channels (FC+BC+SL+SR+DL+DR)
+7 channels (FC+LFE+BC+SL+SR+DL+DR)
+7 channels (FL+FR+BC+SL+SR+DL+DR)
+8 channels (FL+FR+LFE+BC+SL+SR+DL+DR)
+8 channels (FL+FR+FC+BC+SL+SR+DL+DR)
+-1 = ff_parse_channel_layout(FFFFFFFFFFFFFFFF, -1, blah);
+0 = ff_parse_channel_layout(0000000000000001,  1, 1);
+0 = ff_parse_channel_layout(0000000000000002,  1, 2);
+-1 = ff_parse_channel_layout(FFFFFFFFFFFFFFFF, -1, -1);
+0 = ff_parse_channel_layout(000000000000003C,  4, 60);
+0 = ff_parse_channel_layout(0000000000000041,  2, 65);
+0 = ff_parse_channel_layout(0000000000000004,  1, 1c);
+0 = ff_parse_channel_layout(0000000000000003,  2, 2c);
+-1 = ff_parse_channel_layout(FFFFFFFFFFFFFFFF, -1, -1c);
+0 = ff_parse_channel_layout(0000000000000000, 60, 60c);
+-1 = ff_parse_channel_layout(FFFFFFFFFFFFFFFF, -1, 65c);
+0 = ff_parse_channel_layout(000000000000003F,  6, 5.1);
+0 = ff_parse_channel_layout(0000000000000003,  2, stereo);
+0 = ff_parse_channel_layout(0000000000000001,  1, 1+1+1+1);
+0 = ff_parse_channel_layout(0000000000000004,  1, 1c+1c+1c+1c);
+0 = ff_parse_channel_layout(0000000000000007,  3, 2c+1c);
+0 = ff_parse_channel_layout(0000000000000003,  2, 0x3);
diff --git a/tests/ref/fate/filter-histogram-levels b/tests/ref/fate/filter-histogram-levels
index df9f6b2d..bd268ca2 100644
--- a/tests/ref/fate/filter-histogram-levels
+++ b/tests/ref/fate/filter-histogram-levels
@@ -1,51 +1,51 @@
 #tb 0: 1/25
-0,          0,          0,        1,   488448, 0x0d7343b9
-0,          1,          1,        1,   488448, 0x118e3ade
-0,          2,          2,        1,   488448, 0x778f1ba9
-0,          3,          3,        1,   488448, 0x153bf44e
-0,          4,          4,        1,   488448, 0x2d83c1ab
-0,          5,          5,        1,   488448, 0xa3e95f8f
-0,          6,          6,        1,   488448, 0x91aad31b
-0,          7,          7,        1,   488448, 0x90b92c09
-0,          8,          8,        1,   488448, 0x1e4c9f41
-0,          9,          9,        1,   488448, 0xa88c1882
-0,         10,         10,        1,   488448, 0x1aa04274
-0,         11,         11,        1,   488448, 0x49c45de8
-0,         12,         12,        1,   488448, 0xe799c29f
-0,         13,         13,        1,   488448, 0x789e233f
-0,         14,         14,        1,   488448, 0x9f753404
-0,         15,         15,        1,   488448, 0x83050c2c
-0,         16,         16,        1,   488448, 0xddf7ccbf
-0,         17,         17,        1,   488448, 0xe3128531
-0,         18,         18,        1,   488448, 0xcc6596af
-0,         19,         19,        1,   488448, 0x6e19754f
-0,         20,         20,        1,   488448, 0xc3b32c7c
-0,         21,         21,        1,   488448, 0x40b4853f
-0,         22,         22,        1,   488448, 0x6e492674
-0,         23,         23,        1,   488448, 0x7f867236
-0,         24,         24,        1,   488448, 0x22094365
-0,         25,         25,        1,   488448, 0x45f30fc3
-0,         26,         26,        1,   488448, 0xe6cbad09
-0,         27,         27,        1,   488448, 0x0c44836b
-0,         28,         28,        1,   488448, 0xa7f04271
-0,         29,         29,        1,   488448, 0xd222ba88
-0,         30,         30,        1,   488448, 0xc96a9749
-0,         31,         31,        1,   488448, 0x82e25bbd
-0,         32,         32,        1,   488448, 0xf79d1882
-0,         33,         33,        1,   488448, 0x6d7fdd68
-0,         34,         34,        1,   488448, 0xeb5c9b1b
-0,         35,         35,        1,   488448, 0x9014f9f4
-0,         36,         36,        1,   488448, 0x96c6ab5f
-0,         37,         37,        1,   488448, 0x03911af0
-0,         38,         38,        1,   488448, 0xbf9dd8eb
-0,         39,         39,        1,   488448, 0x73509963
-0,         40,         40,        1,   488448, 0xf2ecb068
-0,         41,         41,        1,   488448, 0xec2fb311
-0,         42,         42,        1,   488448, 0xf4c7ba26
-0,         43,         43,        1,   488448, 0x23f56543
-0,         44,         44,        1,   488448, 0x25f8c48c
-0,         45,         45,        1,   488448, 0xf1ccd38b
-0,         46,         46,        1,   488448, 0x10780667
-0,         47,         47,        1,   488448, 0xbeb70431
-0,         48,         48,        1,   488448, 0xbc950678
-0,         49,         49,        1,   488448, 0xfedf5d83
+0,          0,          0,        1,   488448, 0xc27a6cac
+0,          1,          1,        1,   488448, 0xf00a152e
+0,          2,          2,        1,   488448, 0x060b8c70
+0,          3,          3,        1,   488448, 0xf75d6ee2
+0,          4,          4,        1,   488448, 0xd7a7f06e
+0,          5,          5,        1,   488448, 0x585281a5
+0,          6,          6,        1,   488448, 0xb06e3ee8
+0,          7,          7,        1,   488448, 0x201d0b8c
+0,          8,          8,        1,   488448, 0x4e14e319
+0,          9,          9,        1,   488448, 0x5aef5cca
+0,         10,         10,        1,   488448, 0x57018668
+0,         11,         11,        1,   488448, 0x2ad45b3f
+0,         12,         12,        1,   488448, 0x62cc36b8
+0,         13,         13,        1,   488448, 0x9e84585e
+0,         14,         14,        1,   488448, 0xe6552e42
+0,         15,         15,        1,   488448, 0x13b90c2c
+0,         16,         16,        1,   488448, 0xf9557145
+0,         17,         17,        1,   488448, 0x818340bc
+0,         18,         18,        1,   488448, 0x5112c6e1
+0,         19,         19,        1,   488448, 0x5d5b8f43
+0,         20,         20,        1,   488448, 0xf2101ea6
+0,         21,         21,        1,   488448, 0x4266af4d
+0,         22,         22,        1,   488448, 0xb358806e
+0,         23,         23,        1,   488448, 0xe336aa60
+0,         24,         24,        1,   488448, 0x64fcc339
+0,         25,         25,        1,   488448, 0x86e4b729
+0,         26,         26,        1,   488448, 0x48c380d0
+0,         27,         27,        1,   488448, 0xaee36fd3
+0,         28,         28,        1,   488448, 0x20b84429
+0,         29,         29,        1,   488448, 0x84d85542
+0,         30,         30,        1,   488448, 0x94aea169
+0,         31,         31,        1,   488448, 0x6278fa2c
+0,         32,         32,        1,   488448, 0xaadf998d
+0,         33,         33,        1,   488448, 0x29bba90d
+0,         34,         34,        1,   488448, 0xef1117ad
+0,         35,         35,        1,   488448, 0xd961e36d
+0,         36,         36,        1,   488448, 0xff53296e
+0,         37,         37,        1,   488448, 0x41f381f9
+0,         38,         38,        1,   488448, 0x66fcfc2a
+0,         39,         39,        1,   488448, 0x758bb472
+0,         40,         40,        1,   488448, 0xefc6dc9e
+0,         41,         41,        1,   488448, 0x77fccb69
+0,         42,         42,        1,   488448, 0x7a1d82a4
+0,         43,         43,        1,   488448, 0xc9d61a1b
+0,         44,         44,        1,   488448, 0x8e689deb
+0,         45,         45,        1,   488448, 0x52133e75
+0,         46,         46,        1,   488448, 0xcc0a098e
+0,         47,         47,        1,   488448, 0x045cd17f
+0,         48,         48,        1,   488448, 0x97f89963
+0,         49,         49,        1,   488448, 0xa1f835ff
diff --git a/tests/ref/fate/filter-histogram-waveform b/tests/ref/fate/filter-histogram-waveform
deleted file mode 100644
index da6bbc4f..00000000
--- a/tests/ref/fate/filter-histogram-waveform
+++ /dev/null
@@ -1,51 +0,0 @@
-#tb 0: 1/25
-0,          0,          0,        1,   663552, 0x8a55d4d9
-0,          1,          1,        1,   663552, 0xf4c4d42d
-0,          2,          2,        1,   663552, 0x291cd441
-0,          3,          3,        1,   663552, 0xce5bd56b
-0,          4,          4,        1,   663552, 0x1dc0d637
-0,          5,          5,        1,   663552, 0x4676d387
-0,          6,          6,        1,   663552, 0x8064d607
-0,          7,          7,        1,   663552, 0x1981d54f
-0,          8,          8,        1,   663552, 0x8fffd4a3
-0,          9,          9,        1,   663552, 0x5041d2f7
-0,         10,         10,        1,   663552, 0x03bbd401
-0,         11,         11,        1,   663552, 0x965bd2f3
-0,         12,         12,        1,   663552, 0x0d39d27d
-0,         13,         13,        1,   663552, 0xad39d1fb
-0,         14,         14,        1,   663552, 0x1809d195
-0,         15,         15,        1,   663552, 0xe083cf2f
-0,         16,         16,        1,   663552, 0x0d6fd3b3
-0,         17,         17,        1,   663552, 0x3a8ed3f3
-0,         18,         18,        1,   663552, 0xbbb5d00d
-0,         19,         19,        1,   663552, 0xe6ead0df
-0,         20,         20,        1,   663552, 0xf5dcd35f
-0,         21,         21,        1,   663552, 0x9cd9d32b
-0,         22,         22,        1,   663552, 0xcb91d1b9
-0,         23,         23,        1,   663552, 0x5640cfd7
-0,         24,         24,        1,   663552, 0x5370d285
-0,         25,         25,        1,   663552, 0xd894d1dd
-0,         26,         26,        1,   663552, 0xace4ce65
-0,         27,         27,        1,   663552, 0x6e15ce17
-0,         28,         28,        1,   663552, 0xd21cce21
-0,         29,         29,        1,   663552, 0xec1ecd83
-0,         30,         30,        1,   663552, 0x9852ce0f
-0,         31,         31,        1,   663552, 0xe488cba3
-0,         32,         32,        1,   663552, 0x2e15cbed
-0,         33,         33,        1,   663552, 0x5e59ca97
-0,         34,         34,        1,   663552, 0x7cefcd7d
-0,         35,         35,        1,   663552, 0xcb99ccfb
-0,         36,         36,        1,   663552, 0xce32cf29
-0,         37,         37,        1,   663552, 0x7fb8ceef
-0,         38,         38,        1,   663552, 0x4014d18f
-0,         39,         39,        1,   663552, 0x224dd381
-0,         40,         40,        1,   663552, 0x5347d125
-0,         41,         41,        1,   663552, 0xec83ce79
-0,         42,         42,        1,   663552, 0x4d24ce8b
-0,         43,         43,        1,   663552, 0xfdc5ccbd
-0,         44,         44,        1,   663552, 0x592cd18b
-0,         45,         45,        1,   663552, 0xff06d43b
-0,         46,         46,        1,   663552, 0x7f69d4ef
-0,         47,         47,        1,   663552, 0x1607d3f1
-0,         48,         48,        1,   663552, 0x33e0d211
-0,         49,         49,        1,   663552, 0xe1b1d2cd
diff --git a/tests/ref/fate/filter-mergeplanes b/tests/ref/fate/filter-mergeplanes
new file mode 100644
index 00000000..841d14fc
--- /dev/null
+++ b/tests/ref/fate/filter-mergeplanes
@@ -0,0 +1,51 @@
+#tb 0: 1/25
+0,          0,          0,        1,   304128, 0x90660272
+0,          1,          1,        1,   304128, 0x6b09c8a6
+0,          2,          2,        1,   304128, 0x343858f3
+0,          3,          3,        1,   304128, 0x2f8ba46c
+0,          4,          4,        1,   304128, 0xe31fc163
+0,          5,          5,        1,   304128, 0xc1f3a301
+0,          6,          6,        1,   304128, 0x76b132d2
+0,          7,          7,        1,   304128, 0xf47b97f1
+0,          8,          8,        1,   304128, 0x12089641
+0,          9,          9,        1,   304128, 0x04156f0f
+0,         10,         10,        1,   304128, 0xc8bf96e8
+0,         11,         11,        1,   304128, 0xbecdae62
+0,         12,         12,        1,   304128, 0x74053994
+0,         13,         13,        1,   304128, 0xc1f3eec7
+0,         14,         14,        1,   304128, 0xf4e496b3
+0,         15,         15,        1,   304128, 0xa5a3d663
+0,         16,         16,        1,   304128, 0x1d3738ea
+0,         17,         17,        1,   304128, 0xfdfa09f5
+0,         18,         18,        1,   304128, 0x8844bbc5
+0,         19,         19,        1,   304128, 0x7eb20cea
+0,         20,         20,        1,   304128, 0x5b5e7351
+0,         21,         21,        1,   304128, 0x2b5e5fc1
+0,         22,         22,        1,   304128, 0xd6a2e9a7
+0,         23,         23,        1,   304128, 0xf56ed1be
+0,         24,         24,        1,   304128, 0xd60530a5
+0,         25,         25,        1,   304128, 0x363aa5bf
+0,         26,         26,        1,   304128, 0x37f56bb0
+0,         27,         27,        1,   304128, 0xe8428c1f
+0,         28,         28,        1,   304128, 0x78ef39cf
+0,         29,         29,        1,   304128, 0x76b2ab97
+0,         30,         30,        1,   304128, 0x3f09f5ef
+0,         31,         31,        1,   304128, 0x6f2c0371
+0,         32,         32,        1,   304128, 0x8baaf4a3
+0,         33,         33,        1,   304128, 0xda157bc0
+0,         34,         34,        1,   304128, 0xae46ebb0
+0,         35,         35,        1,   304128, 0xbe3458a2
+0,         36,         36,        1,   304128, 0x9784f913
+0,         37,         37,        1,   304128, 0xa67d93cd
+0,         38,         38,        1,   304128, 0x8b0e84ba
+0,         39,         39,        1,   304128, 0x583250f1
+0,         40,         40,        1,   304128, 0x7d1ee0a8
+0,         41,         41,        1,   304128, 0x6d83f980
+0,         42,         42,        1,   304128, 0x027ce4c5
+0,         43,         43,        1,   304128, 0xcf1a0c75
+0,         44,         44,        1,   304128, 0xbd1c2e5c
+0,         45,         45,        1,   304128, 0x36d4fe48
+0,         46,         46,        1,   304128, 0x9cc1650e
+0,         47,         47,        1,   304128, 0xf1af6288
+0,         48,         48,        1,   304128, 0x5eb8d36e
+0,         49,         49,        1,   304128, 0x2c4f2305
diff --git a/tests/ref/fate/filter-mpdecimate b/tests/ref/fate/filter-mpdecimate
new file mode 100644
index 00000000..1e9652e3
--- /dev/null
+++ b/tests/ref/fate/filter-mpdecimate
@@ -0,0 +1,21 @@
+#tb 0: 1/3
+0,          0,          0,        1,   115200, 0x3744b3ed
+0,          2,          2,        1,   115200, 0x6e318ba0
+0,          3,          3,        1,   115200, 0x48d65876
+0,          5,          5,        1,   115200, 0x9087e4f1
+0,          6,          6,        1,   115200, 0xc58d5c94
+0,          8,          8,        1,   115200, 0x8c4ad4f4
+0,          9,          9,        1,   115200, 0xf96f6755
+0,         11,         11,        1,   115200, 0x90beb7ba
+0,         12,         12,        1,   115200, 0xa8f4f31b
+0,         14,         14,        1,   115200, 0x21441a03
+0,         15,         15,        1,   115200, 0x393b3494
+0,         17,         17,        1,   115200, 0xc7aeec1d
+0,         18,         18,        1,   115200, 0x2bd24a0e
+0,         20,         20,        1,   115200, 0xea66d804
+0,         21,         21,        1,   115200, 0x237953c3
+0,         23,         23,        1,   115200, 0xff98a9b1
+0,         24,         24,        1,   115200, 0x056d40ca
+0,         26,         26,        1,   115200, 0xa4374737
+0,         27,         27,        1,   115200, 0x3eaa3ae8
+0,         29,         29,        1,   115200, 0x7551e9ee
diff --git a/tests/ref/fate/filter-pixdesc-ayuv64le b/tests/ref/fate/filter-pixdesc-ayuv64le
new file mode 100644
index 00000000..385ce139
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-ayuv64le
@@ -0,0 +1 @@
+pixdesc-ayuv64le    2269279a2df156931021793927876e84
diff --git a/tests/ref/fate/filter-pixdesc-monob b/tests/ref/fate/filter-pixdesc-monob
index 00df9ef9..63a4b62b 100644
--- a/tests/ref/fate/filter-pixdesc-monob
+++ b/tests/ref/fate/filter-pixdesc-monob
@@ -1 +1 @@
-pixdesc-monob       e795648f4f5054ca133437570cf5ba5f
+pixdesc-monob       e69e4b6e48dd419bb199312727f60a47
diff --git a/tests/ref/fate/filter-pixdesc-monow b/tests/ref/fate/filter-pixdesc-monow
index 429c5b41..c73f3275 100644
--- a/tests/ref/fate/filter-pixdesc-monow
+++ b/tests/ref/fate/filter-pixdesc-monow
@@ -1 +1 @@
-pixdesc-monow       e7d8142228a04d9ef3cdc4473ef8a69f
+pixdesc-monow       51a45d1d34b95373fffdd79fc3abf457
diff --git a/tests/ref/fate/filter-pixdesc-ya8 b/tests/ref/fate/filter-pixdesc-ya8
new file mode 100644
index 00000000..fa59613d
--- /dev/null
+++ b/tests/ref/fate/filter-pixdesc-ya8
@@ -0,0 +1 @@
+pixdesc-ya8         d087df6cabb2a38d14db347fce634e2f
diff --git a/tests/ref/fate/filter-pixdesc-yuv420p9be b/tests/ref/fate/filter-pixdesc-yuv420p9be
index 7c74adcc..1be5103c 100644
--- a/tests/ref/fate/filter-pixdesc-yuv420p9be
+++ b/tests/ref/fate/filter-pixdesc-yuv420p9be
@@ -1 +1 @@
-pixdesc-yuv420p9be  5750914b29640a8e5fe9cda0e5bf0a84
+pixdesc-yuv420p9be  69e0e50358ee0f7301d4cf252e9c35b1
diff --git a/tests/ref/fate/filter-pixdesc-yuv420p9le b/tests/ref/fate/filter-pixdesc-yuv420p9le
index 2b9c3f98..8677a36d 100644
--- a/tests/ref/fate/filter-pixdesc-yuv420p9le
+++ b/tests/ref/fate/filter-pixdesc-yuv420p9le
@@ -1 +1 @@
-pixdesc-yuv420p9le  6f9d17cae7cfd2676e8798241e266322
+pixdesc-yuv420p9le  eed25f5d0562d3158ae2f97589950ba9
diff --git a/tests/ref/fate/filter-pixdesc-yuv422p9be b/tests/ref/fate/filter-pixdesc-yuv422p9be
index 05816b2a..fab8b9bd 100644
--- a/tests/ref/fate/filter-pixdesc-yuv422p9be
+++ b/tests/ref/fate/filter-pixdesc-yuv422p9be
@@ -1 +1 @@
-pixdesc-yuv422p9be  33d2d5ea6f00f36c7c28ebca9b097348
+pixdesc-yuv422p9be  25929926226d191bc1c459e67546c6c8
diff --git a/tests/ref/fate/filter-pixdesc-yuv422p9le b/tests/ref/fate/filter-pixdesc-yuv422p9le
index de4b7785..93581812 100644
--- a/tests/ref/fate/filter-pixdesc-yuv422p9le
+++ b/tests/ref/fate/filter-pixdesc-yuv422p9le
@@ -1 +1 @@
-pixdesc-yuv422p9le  e0a3b15393d266b50c31dabcd3c5697b
+pixdesc-yuv422p9le  1de20cc8c68751dbf3e8f2bd64f2ffd7
diff --git a/tests/ref/fate/filter-pixdesc-yuv444p9be b/tests/ref/fate/filter-pixdesc-yuv444p9be
index d6b0ea94..9500abde 100644
--- a/tests/ref/fate/filter-pixdesc-yuv444p9be
+++ b/tests/ref/fate/filter-pixdesc-yuv444p9be
@@ -1 +1 @@
-pixdesc-yuv444p9be  ec5b450a8c0d4097fc15d5f8e62a045a
+pixdesc-yuv444p9be  d59a020d35a4434d5904effdfa70ca69
diff --git a/tests/ref/fate/filter-pixdesc-yuv444p9le b/tests/ref/fate/filter-pixdesc-yuv444p9le
index f24ca356..1a31ab65 100644
--- a/tests/ref/fate/filter-pixdesc-yuv444p9le
+++ b/tests/ref/fate/filter-pixdesc-yuv444p9le
@@ -1 +1 @@
-pixdesc-yuv444p9le  cca3bc99d4f3d5f3a7c87d95d444731d
+pixdesc-yuv444p9le  d6b9667d8bc7aab7fa5d27540c65010d
diff --git a/tests/ref/fate/filter-pixfmts-copy b/tests/ref/fate/filter-pixfmts-copy
index 6281711a..5f4b1623 100644
--- a/tests/ref/fate/filter-pixfmts-copy
+++ b/tests/ref/fate/filter-pixfmts-copy
@@ -2,6 +2,7 @@
 0rgb                527ef3d164c8fd0700493733959689c2
 abgr                023ecf6396d324edb113e4a483b79ba2
 argb                f003b555ef429222005d33844cca9325
+ayuv64le            07b9c969dfbe4add4c0626773b151d4f
 bgr0                6fcd67c8e6cec723dab21c70cf53dc16
 bgr24               4cff3814819f02ecf5824edfd768d2b1
 bgr444be            1cd47c1555f947dfcba99192e3429d20
@@ -30,11 +31,11 @@ gbrp9le             bc80da439638c59f1d822037f52739af
 gray                188590b1231afd231ea910815aef2b25
 gray16be            08d997a3faa25a3db9d6be272d282eef
 gray16le            df65eb804360795e3e38a2701fa9641a
-monob               a6869bab4f6e64fe13dcab13b41775b3
-monow               0404328f1838a6503371478a559ca20d
+monob               8b04f859fee6a0be856be184acd7a0b5
+monow               54d16d2c01abfd72ecdb5e51e283937c
 nv12                8e24feb2c544dc26a20047a71e4c27aa
 nv21                335d85c9af6110f26ae9e187a82ed2cf
-pal8                d9a58fa1964ba9a3b902797b0b1af0ab
+pal8                ff5929f5b42075793b2c34cb441bede5
 rgb0                0de71e5a1f97f81fb51397a0435bfa72
 rgb24               f4438057d046e6d98ade4e45294b21be
 rgb444be            115e5259b91f4a416546b09570347633
@@ -53,6 +54,7 @@ rgba64le            b91e1d77f799eb92241a2d2d28437b15
 uyvy422             3bcf3c80047592f2211fae3260b1b65d
 xyz12be             a1ef56bf746d71f59669c28e48fc8450
 xyz12le             831ff03c1ba4ef19374686f16a064d8c
+ya8                 dbb99fbcdc204aaa1a7397ff561f1a67
 yuv410p             5d4d992a7728431aa4e0700f87fb7fd8
 yuv411p             7e1300e89f5bc07939e2c4a6acbdf267
 yuv420p             a014c7eb7a8385d1dd092b7a583f1bff
@@ -64,8 +66,8 @@ yuv420p14be         64779858686946fc0e780baf7c1391b6
 yuv420p14le         c1d012a4f9d54fbc8b04fea96d85e903
 yuv420p16be         268b07358d8dc733ee81d0b87990d5af
 yuv420p16le         dae8da9edd4255051e3e546ae7ed9bd3
-yuv420p9be          e86ecd4112c86637c96f2b5e90341da1
-yuv420p9le          4496bea8504dce651485cc8a7e8403c9
+yuv420p9be          37f0476e8458a93d3d22db568f617aca
+yuv420p9le          83a6d32c91c15a3bc334bb9abf920654
 yuv422p             74f8006b4482db104f1986f49807a0af
 yuv422p10be         7291903c3c0cf4e5456dd9673a619f1d
 yuv422p10le         14cbaa728e888534359b9dddc5430f08
@@ -75,8 +77,8 @@ yuv422p14be         2617c569ae9659d8fe6a01f96e2c9657
 yuv422p14le         7d01363cf090306cf93337c474cd8827
 yuv422p16be         86147d8bfb795ab1873c899611e2a361
 yuv422p16le         9df47cb7d6d39b335a547ced2865e72e
-yuv422p9be          fdd15494de6cfc8c3f15650ecd3d8046
-yuv422p9le          a112fd777494d203d9d8e9623a50e503
+yuv422p9be          338dbb97c9f5a12ccea2c61774ddff2e
+yuv422p9le          0822f8af35a269a036ae44878acba87e
 yuv440p             98d0f96fdb3ba415899017adf7d4a4f9
 yuv440p10be         d874167042037c1daf9b9a2f74bffad9
 yuv440p10le         3cfbd921369aa8f1e4977efdb7f44c8c
@@ -91,8 +93,8 @@ yuv444p14be         ea3057d469d0c49c24e844256ef7871e
 yuv444p14le         940f5908ccf06e01411f0a7bddb45c6a
 yuv444p16be         aee24ab2e9a4656f889399f1b0d98639
 yuv444p16le         781c22317c02b3dd4225709000bdb847
-yuv444p9be          c37eb400483012fd97a030431818f328
-yuv444p9le          2136d762328cb9ce168b6261b7874791
+yuv444p9be          ac09917e5d2abc82c0d260007f4dfd77
+yuv444p9le          caef947b8aff5b52285385c6ae9b2439
 yuva420p            b227672e56215e184e702c02a771d7f3
 yuva420p10be        f66e7d677625380f7504867fab51305c
 yuva420p10le        01e94ee605714396e69b013c11dda348
diff --git a/tests/ref/fate/filter-pixfmts-crop b/tests/ref/fate/filter-pixfmts-crop
index 1b1a763e..0b54641e 100644
--- a/tests/ref/fate/filter-pixfmts-crop
+++ b/tests/ref/fate/filter-pixfmts-crop
@@ -2,6 +2,7 @@
 0rgb                974833c777e6abe6d84dc59af2ca5625
 abgr                1d21f5b8a20186ac9dd54459c986a2a7
 argb                8b822972049a1e207000763f2564d6e0
+ayuv64le            ab2f7bc8f150af47c42c778e3ea28bce
 bgr0                38a84849a9198667c348c686802e3b52
 bgr24               1dacd8e04bf0eff163e82250d01a9cc7
 bgr444be            e2d2b864dfa528e77684ddc117f2d974
@@ -32,7 +33,7 @@ gray16be            38f599da990224de86e3dc7a543121a9
 gray16le            9ff7c866bd98def4e6c91542c1c45f80
 nv12                92cda427f794374731ec0321ee00caac
 nv21                1bcfc197f4fb95de85ba58182d8d2f69
-pal8                c89abc9660914b2e6c2e6f8c29e86503
+pal8                1f2cdc8e718f95c875dbc1034a688bfb
 rgb0                736646b70dd9a0be22b8da8041e35035
 rgb24               c5fbbf816bb2000f4d2914e335698ef5
 rgb444be            44a33306889f7fa1a71ec831b860fd0a
@@ -50,6 +51,7 @@ rgba64be            89910046972ab3c68e2a348302cc8ca9
 rgba64le            fea8ebfc869b52adf353778f29eac7a7
 xyz12be             cb4571f9aaa7b59f999ef327276104b7
 xyz12le             cd6aae8d26b18bdb4b9d068586276d91
+ya8                 51a8dd297e35d40b06d3ebe8f4717895
 yuv410p             3bb6c7b64f2c46bc5e8b77198ce4ea58
 yuv411p             693e4afe96998e6dd91734037d75d887
 yuv420p             510d36163e4d3710988c23c2240ca6dc
@@ -61,8 +63,8 @@ yuv420p14be         5544e5fc40e718e608c0bd8c7b1d0812
 yuv420p14le         2c763a71c7c4650b91b405634556e21c
 yuv420p16be         c22f72b460beef22b0bf80806c43b25f
 yuv420p16le         564d26a52ed271792e8909f8aa9a3e87
-yuv420p9be          4209e42b8f6cc7c123d6e65afba91528
-yuv420p9le          c33dd96a3096bb776e01dc6aec740c4a
+yuv420p9be          d0458179f9cc757a11ceb80655fde30e
+yuv420p9le          fe0215f5683437bd5a10b6255344109e
 yuv422p             42ea5e9a22df5913b2ec75512162e533
 yuv422p10be         3956017f5023ff5d56b4f814422dd711
 yuv422p10le         a5fa7cb6a21bcbb60ae3ba4a9f4e60e0
@@ -72,8 +74,8 @@ yuv422p14be         845c42b333e331a556008ef0a16afc85
 yuv422p14le         abcdaccf8d01a9133daca94383d27db7
 yuv422p16be         565299a5d6265c77d00fd1a1d0173834
 yuv422p16le         a3fc398dd11644235e260f7e82cc87e6
-yuv422p9be          b402f27475dd7739f63510d08deb7001
-yuv422p9le          f8603c35fe74a30d13d37e5bdadcf158
+yuv422p9be          9d92f0bae2037bda1deaa70cc42998bc
+yuv422p9le          5e08f31583a6072dfc12c5ffaabedd88
 yuv440p             11786b7e8f8f45fdeafe841a258fe5fd
 yuv440p10be         7c47e76d0430f2a1de2a1bfd50221c97
 yuv440p10le         6e501e62a7f84748338cdf19b0186a19
@@ -88,8 +90,8 @@ yuv444p14be         bb07382a868b35875b771b37dd234605
 yuv444p14le         f8f6e6124bae7e95d0061b95bd64be66
 yuv444p16be         25b9244db531ccab7095a44b3115032d
 yuv444p16le         b537ff9b2ef601b8568ffef2fdc4281b
-yuv444p9be          082a56901b11ed2508c1f0b7b6f25c02
-yuv444p9le          678f94003f4051dfa6b8a7650da136e8
+yuv444p9be          770fae9f4bbb3b659a78e912ae7e0421
+yuv444p9le          764e317b46961236ad25fa174c3af5c6
 yuva420p            5fb8147030796ad0ebb4fadac776ca05
 yuva420p10be        f2127ce6fe956ee3a3b6c9b308c3bb9f
 yuva420p10le        080798ee8a995734fcb2d7238010e046
diff --git a/tests/ref/fate/filter-pixfmts-field b/tests/ref/fate/filter-pixfmts-field
index 9ba8e581..9e04db4f 100644
--- a/tests/ref/fate/filter-pixfmts-field
+++ b/tests/ref/fate/filter-pixfmts-field
@@ -2,6 +2,7 @@
 0rgb                e2c35753a2271d1f9455b1809bc0e907
 abgr                c0eb95959edf5d40ff8af315e62d0f8a
 argb                6dca4f2987b49b7d63f702d17bace630
+ayuv64le            d9836decca6323ba88b3b3d02257c0b6
 bgr0                1da3fdbac616b3b410d081e39ed7a1f6
 bgr24               573c76d77b1cbe6534ea7c0267dc1b13
 bgr444be            064887b4ca8f49cfb7c776057bc75c74
@@ -30,11 +31,11 @@ gbrp9le             1654c79e38dc78fdf106239ed2a7bd4f
 gray                57fd8e6e00f6be8752726005974cce1b
 gray16be            e1700e056de9917744a7ff4ab2ca63fd
 gray16le            338de7ac5f7d36d5ad5ac2c8d5bbea68
-monob               599b938a6207de81bd96c902c2511676
-monow               8486b94cd7c498b0effd33cb7e8e63df
+monob               2129cc72a484d7e10a44de9117aa9f80
+monow               03d783611d265cae78293f88ea126ea1
 nv12                16f7a46708ef25ebd0b72e47920cc11e
 nv21                7294574037cc7f9373ef5695d8ebe809
-pal8                379b8c80f4422a353ad286f7120f2fb6
+pal8                0658c18dcd8d052d59dfbe23f5b368d9
 rgb0                ca3fa6e865b91b3511c7f2bf62830059
 rgb24               25ab271e26a5785be169578d99da5dd0
 rgb444be            a05fabc91e485ec02461be900cd72ef3
@@ -53,6 +54,7 @@ rgba64le            dfdba4de4a7cac9abf08852666c341d3
 uyvy422             1c49e44ab3f060e85fc4a3a9464f045e
 xyz12be             d2fa69ec91d3ed862f2dac3f8e7a3437
 xyz12le             02bccd5e0b6824779a1f848b0ea3e3b5
+ya8                 28cea4f98ed452bd3da9c752e5e3399c
 yuv410p             a85920d6bd26f51306e2ecbe71d1c554
 yuv411p             9106e283d5dbcfba01c611886d58871a
 yuv420p             9ea8ed3c22c31c0354b7face540bb280
@@ -64,8 +66,8 @@ yuv420p14be         dda2c5c09b72b41a19943b3832ddd2d5
 yuv420p14le         e79f827393fb9344190bacacffe9978e
 yuv420p16be         62d08d547f857b254c1d31b5c40b20a6
 yuv420p16le         4ddc458e596c44dfe007ca0c8b211e38
-yuv420p9be          c4a6faaec240d7d11d25b75afd7a6183
-yuv420p9le          2ed3dfcf2e5db5de7076dd74dee5c433
+yuv420p9be          65f8350de025dc8e6608bc21bf50c88f
+yuv420p9le          e9d061aad8e0d07226d41421797e6cbf
 yuv422p             9bd12ab1efe6c3fe6d9f639b97b79c7e
 yuv422p10be         6dd930ff81b89b71f6cadf757e0e8b3e
 yuv422p10le         5e314f06833b5016cc5cd76c611f7a48
@@ -75,8 +77,8 @@ yuv422p14be         d3d1d29966b9737dc8bbc31c6d215c9e
 yuv422p14le         3ba9d5d2a32dc3e0ec025100621e20f9
 yuv422p16be         ed49651e67f96c34649762f4678091de
 yuv422p16le         c7a8ad6f7aded288ca2d16c6e5d73026
-yuv422p9be          6d3da72c1c56c330d23f1f232ac6bde6
-yuv422p9le          0b3df4939e52a9c47521b33ca76a8ea8
+yuv422p9be          cf48ab1346f7fef1b983b8f01b6888b3
+yuv422p9le          b1b7acc0afe050fff93141e2e3c8f040
 yuv440p             5100aaa0d8b30f0eea8825fe9ae20b42
 yuv440p10be         4456a712fbf663d3eb591dfec7c0556d
 yuv440p10le         a71cc3c1b47f6600717421bc88ba1c43
@@ -91,8 +93,8 @@ yuv444p14be         18a46dde1fafcc7bbc6ebb3fd6681e7c
 yuv444p14le         90058811cf9c35a4ddfab367bea40c51
 yuv444p16be         1ff6ee64bcc33be585712e86359430d5
 yuv444p16le         9fc18b0a156d20503e3bac4823277adb
-yuv444p9be          12e6232d0e0c809527f64452341761cc
-yuv444p9le          9af93703a9819f834bcfeeb86eb33fa8
+yuv444p9be          e83171709ee786a5859aeab22d8ea4ce
+yuv444p9le          cca7482f37778cfb440f22355e47e9b1
 yuva420p            ebd72004be42de4743ca46fd81947b56
 yuva420p10be        5e000937fce49360850bc10d8083f798
 yuva420p10le        f16bde9ed6d8bd015d59b2e8e248ab31
diff --git a/tests/ref/fate/filter-pixfmts-fieldorder b/tests/ref/fate/filter-pixfmts-fieldorder
index 085b0534..1e81081d 100644
--- a/tests/ref/fate/filter-pixfmts-fieldorder
+++ b/tests/ref/fate/filter-pixfmts-fieldorder
@@ -2,6 +2,7 @@
 0rgb                2b0f066cfa0bef378a492875d541de8f
 abgr                832924b5351361db68dbdbb96c60ae55
 argb                80d08e68cb91bc8f2f817516e65f0bd0
+ayuv64le            84ef6260fe02427da946d4a2207fb54c
 bgr0                d2c676224ea80ac3ce01afde325ea1a0
 bgr24               b7fdbcd10f20e6ea2d40aae0f329f80d
 bgr444be            ca5acc0d5315d6d9f4422337c6f20842
@@ -48,6 +49,7 @@ rgba64le            b34e6e30621ae579519a2d91a96a0acf
 uyvy422             75de70e31c435dde878002d3f22b238a
 xyz12be             15f5cda71de5fef9cec5e75e3833b6bc
 xyz12le             7be6c8781f38c21a6b8f602f62ca31e6
+ya8                 055ac5ab5ff8533dd319edc17a398af1
 yuv411p             e4a040e0e786c4dae07d9d3f90a54905
 yuv422p             16ce67249c6ce7ef57a433646ad6dfc1
 yuv422p10be         62ae323dcc41aabf8ff6ecc53b119ce8
@@ -58,8 +60,8 @@ yuv422p14be         8bd7bfb0b35d68000ba57af66958ef2d
 yuv422p14le         471c7528e3da240d0a40f33548cbfbab
 yuv422p16be         8c193a8b6064e2379b2fb211cd2e6e37
 yuv422p16le         c949614bec06baca5ffca840b164eacc
-yuv422p9be          245551dc31484ff4d31f7e7230efd1ed
-yuv422p9le          8fd07db781631685523e3193f2792874
+yuv422p9be          b615f4e767a287e85be97dd52371ba5e
+yuv422p9le          c9faffe2fcb2e30be5d792412b3550b0
 yuv444p             1483374f69c4bbaf9252ce255fc84feb
 yuv444p10be         2e5caee508692869141bd9649cc17f64
 yuv444p10le         c0ceb41ad4c2ddc454f3fdb9cda2d8ad
@@ -69,8 +71,8 @@ yuv444p14be         33f1cf5233f57582618390ea337e83c6
 yuv444p14le         7d9fc358232488f671d663d27875f3c1
 yuv444p16be         779ea70979e735d3074fef0466792681
 yuv444p16le         aa893d9b1857b68fd1c5fc42236d7816
-yuv444p9be          4e5c3a669f37a6f0587d372a28d3e6e8
-yuv444p9le          621d86abf2ed636d9933ece245033278
+yuv444p9be          fd0ae3b828d40e2638ea0640bb5cc168
+yuv444p9le          9bb3dde9fadb3c03a0ef8305b5f67a7e
 yuva422p            c470da57cde22b452deb8874df710dce
 yuva422p10be        a2ffa080ae661c1033aa38be28002922
 yuva422p10le        a4f5e8006f8ea3f964206605045e0fe0
diff --git a/tests/ref/fate/filter-pixfmts-hflip b/tests/ref/fate/filter-pixfmts-hflip
index 206462a6..98c165ba 100644
--- a/tests/ref/fate/filter-pixfmts-hflip
+++ b/tests/ref/fate/filter-pixfmts-hflip
@@ -2,6 +2,7 @@
 0rgb                ada57572ee2b35f86edac9b911ce8523
 abgr                d2da6c3ee72e4a89a7cd011dd08566b2
 argb                36cf791c52c5463bfc52a070de54337e
+ayuv64le            4cedbc38b3d4dcb26cdab170ce6d667b
 bgr0                66e9fda4e658d73bfe4fc9d792542271
 bgr24               db074979bd684ca4547e28681ad3f6ab
 bgr444be            63ad2fe7b4e44b11c5ca03b545a941ca
@@ -32,7 +33,7 @@ gray16be            cf7294d9aa23e1b838692ec01ade587b
 gray16le            d91ce41e304419bcf32ac792f01bd64f
 nv12                801e58f1be5fd0b5bc4bf007c604b0b4
 nv21                9f10dfff8963dc327d3395af21f0554f
-pal8                cfe90d91bea5172babd97c3828270ea5
+pal8                5b7c77d99817b4f52339742a47de7797
 rgb0                0092452f37d73da20193265ace0b7d57
 rgb24               21571104e6091a689feabb7867e513dd
 rgb444be            38d6d0e9aa15ce95f3fffcde8437632d
@@ -50,6 +51,7 @@ rgba64be            c910444019f4cfbf4d995227af55da8d
 rgba64le            0c810d8b3a6bca10321788e1cb145340
 xyz12be             25f90259ff8a226befdaec3dfe82996e
 xyz12le             926c0791d59aaff61b2778e8ada3316d
+ya8                 4ad5920716de3d2fbbc49f95adb60345
 yuv410p             c49fd0c55c41185b1580aac77211992b
 yuv411p             c416371077dce13d31bf1dc706111ae7
 yuv420p             eb8d9f02db98e10400781428c43e9438
@@ -61,8 +63,8 @@ yuv420p14be         c5758d8062c61248110485243a75b712
 yuv420p14le         e283871dec73c853421d3aa098f071a7
 yuv420p16be         da4f5b6e537edf9cdea48a60defe520e
 yuv420p16le         4173d444fea2ea27ad03b3f92e982bcd
-yuv420p9be          7dbcd8f9bc1602f0a85dd7ff43e4522c
-yuv420p9le          187355df9ce2d35df085bd900cfc0953
+yuv420p9be          be69519cbd3f3b9d02b165e6d675b4cf
+yuv420p9le          49eac58088b3e1cac170911382e2bb9d
 yuv422p             5aade4d118cd7243800a08d300033748
 yuv422p10be         a12d0957b703bd54cd569664b821ca3e
 yuv422p10le         8b6ac98276b10b7e540a5f689e5453fa
@@ -72,8 +74,8 @@ yuv422p14be         027a593f148a96ff9e37a35e55608818
 yuv422p14le         4c1de1ce4f199ab8e94a28bf79ecedb6
 yuv422p16be         3d4b525c8056b740a4afa5b86cd79cd3
 yuv422p16le         bb93ea91207d118811e2dfd5a48e223d
-yuv422p9be          33c54fa3595492c10dbb58de2f3ef05c
-yuv422p9le          296f934ce228f436a3733a05bae280ad
+yuv422p9be          5b41fff3b17c2934e1db61f0591fdfbc
+yuv422p9le          720ed60d583933c3fb8db6dbb2991dc6
 yuv440p             53fb331a853af01bed7f32504a1e5ff0
 yuv440p10be         38108067cdce71f3a8f51eee25e4ff5d
 yuv440p10le         a369c78eda4f46d71105c38ddc92b976
@@ -88,8 +90,8 @@ yuv444p14be         be4727c283bb3af3327f7b9dc57e51b8
 yuv444p14le         289f4d6a3d60ba654e8280683825ee9a
 yuv444p16be         acb6b001d445a628d1d0a2dfbeed078c
 yuv444p16le         cce4770e9e2c6d4104562490d6ffbbaa
-yuv444p9be          bd3d219c010098c25dcd5ad211fe5ec6
-yuv444p9le          3a68468fbe09c9b963d9f9ee9ce3e439
+yuv444p9be          e29d58f528e7b36e3d7321e50f6a69d9
+yuv444p9le          587f7bd803aa09109e278fb7aa3e3971
 yuva420p            e910f0f5ed439a17fc3e61b42dec88c2
 yuva420p10be        2c3b83450c3155793a142ecd5d525131
 yuva420p10le        c49ac6b0e89564fa8243e0400adc37de
diff --git a/tests/ref/fate/filter-pixfmts-il b/tests/ref/fate/filter-pixfmts-il
index d6f510c2..a9a3c7ba 100644
--- a/tests/ref/fate/filter-pixfmts-il
+++ b/tests/ref/fate/filter-pixfmts-il
@@ -2,6 +2,7 @@
 0rgb                53efe0182723cd1dedfdbf56357c76f5
 abgr                97603869e6248a8e5d8501563a11b114
 argb                9e50e6ef02c83f28e97865a1f46ddfcd
+ayuv64le            6f45f683e99ddf4180c7c7f47719efcc
 bgr0                590dcd1297d1dd4541eea217381db604
 bgr24               73afe7b447b083a7c2d682abe8dd451a
 bgr444be            4fa078adc981fd07440a7b657c98c4c1
@@ -30,8 +31,8 @@ gbrp9le             7e79b4bbb589095c925d65cb7c037d44
 gray                52ae18648161ac43144f5c9cd2127786
 gray16be            92c3b09f371b610cc1b6a9776034f4d0
 gray16le            1db278d23a554e01910cedacc6c02521
-monob               ee0012e4df0bf06a09cefb49be68e850
-monow               5e0868812e25cc867cdc78d5252cbb81
+monob               faba75df28033ba7ce3d82ff2a99ee68
+monow               6e9cfb8d3a344c5f0c3e1d5e1297e580
 nv12                3c3ba9b1b4c4dfff09c26f71b51dd146
 nv21                ab586d8781246b5a32d8760a61db9797
 rgb0                cfaf68671e43248267d8cd50cae8c13f
@@ -52,6 +53,7 @@ rgba64le            a8a2daae04374a27219bc1c890204007
 uyvy422             d6ee3ca43356d08c392382b24b22cda5
 xyz12be             7c7d54c55f136cbbc50b18029f3be0b3
 xyz12le             090ba6b1170baf2b1358b43b971d33b0
+ya8                 a38d6e288f582f1a04310232ed764afc
 yuv410p             dea1ab8843465adf5b8240b2d98fd85b
 yuv411p             8bf73777a5ff43c126be274245aceff1
 yuv420p             f1f4e7e94a76d5320049464bdeac24ed
@@ -63,8 +65,8 @@ yuv420p14be         240357c01079020ccc3d9287b5821a1b
 yuv420p14le         f7d0f5f5e72455b238897ed87cf67b3c
 yuv420p16be         8195046b708c613b3ca521c95db529ab
 yuv420p16le         0586439ecbd0d2fb8f22f953e6efade8
-yuv420p9be          4e324786af988df5fb82b68000ca886e
-yuv420p9le          1d9329a946bd14e9079c8ef9836bc04e
+yuv420p9be          955faeb6751e2fc39cbe160dc5fd35b6
+yuv420p9le          f8a4a93f98743cdd8d0ebadba2f8028a
 yuv422p             da122be331a53ec389ab6b2064488beb
 yuv422p10be         956f3c496807e62a288501e183fafad0
 yuv422p10le         43f99eb0d8abd1754572f77e430d4c41
@@ -74,8 +76,8 @@ yuv422p14be         ccff1e00f5d3adb899f1c266b22c9628
 yuv422p14le         f8a3bcbf8c55b1f570c078ff2f436b1f
 yuv422p16be         7f2363ccb6e69241c70411b6d063a2fe
 yuv422p16le         58c0fabd1042b35346ed8b8ed3f90653
-yuv422p9be          57b0eba4fd698fbe30d052b2e37e70c0
-yuv422p9le          280d7e85aa54bf4b0e6098603408c2a9
+yuv422p9be          3ecf7f2584fe3f3f567790e3924c153c
+yuv422p9le          a3fe601dcec267ae70db422843d2ce84
 yuv440p             b4261e15f5dc347b873cf19ccaba8123
 yuv440p10be         9c8e626ca9722f872ef0ecc8e3fca74e
 yuv440p10le         8c28978a0ad4d80c588989505a7f0807
@@ -90,8 +92,8 @@ yuv444p14be         c712cf9dbf8571fbd31fa747e58993ec
 yuv444p14le         3a0e361e5998f8590dd8ca8ce896cea1
 yuv444p16be         929c50756bfe8cab7c416f56095fbb82
 yuv444p16le         63c3c3bc5d08f701afa3406f7a22018c
-yuv444p9be          8044a9593b813fa1e23eee6b8bba4737
-yuv444p9le          e38d519ea585e2d17107b8ac34648ef7
+yuv444p9be          b33e6dc5cf08ff7672292b518d352c2f
+yuv444p9le          c9d767579483a2e4bb1424afc5223361
 yuva420p            e72aad3c3cf41445bdc1c9d14b21321f
 yuva420p10be        7cd2e0948f56fb54e8e6856b02561493
 yuva420p10le        5f038e8362ab2f999984a5b7d1536b7d
diff --git a/tests/ref/fate/filter-pixfmts-lut b/tests/ref/fate/filter-pixfmts-lut
index 1deac4c6..47e79d11 100644
--- a/tests/ref/fate/filter-pixfmts-lut
+++ b/tests/ref/fate/filter-pixfmts-lut
@@ -3,16 +3,38 @@ argb                4f575be3cd02799389f581df99c4de38
 bgr24               fa43e3b2abfde8d9e60e157a9acc553d
 bgra                4e2e689897ee7a8e42b16234597bab35
 rgb24               a356171207723a580e7d277078072005
+rgb48le             5c7dd8575836d18c91e09f1915cf9aa9
 rgba                7bc854c2698b78af3e9159a19c2d9d21
+rgba64le            3a087ecab583d1930220592731f282b4
 yuv410p             51b39a0e33f108e652457a26667319ea
 yuv411p             9204c5af92aef4922a05f58c1f6c095e
 yuv420p             7c43bb0cae8dee633375c89295598508
+yuv420p10le         1352712dd31cce78bd5441294004cf85
+yuv420p12le         c66f82da9fda458ba3abda057c58e591
+yuv420p14le         e45cb5e2a75bf6143da0b55004767f78
+yuv420p16le         eff54782c51770edfd6b84c958ac7120
+yuv420p9le          4a6776b3379f12ad45caee8072a13695
 yuv422p             67df35da0c35e54882492b2365438254
+yuv422p10le         0158371a800294015def7f0ef66c78ea
+yuv422p12le         bc49d3863ffb89658a17bf8c4fe773b0
+yuv422p14le         b55cb791d286b0b3391fe7481785e5b3
+yuv422p16le         fc3b2ba889ffaf1633000fc774307c33
+yuv422p9le          6e2a42ae36ed5e8b5112987639728af5
 yuv440p             5e41adcfc27be4369afd217b61b2ffe3
+yuv440p10le         8b49714bba268fb4a79b5a84223ad17a
+yuv440p12le         15ab4f453238bd9c13b18af81e22f060
 yuv444p             a2b58590aef88db2c1f14a1a3a3b0359
+yuv444p10le         c076c20fc808f95b34adb88aca442f48
+yuv444p12le         af8d4dd88169d5cffc2f3fce6333a94c
+yuv444p14le         93367133e25d088d4535199ed1f1ed58
+yuv444p16le         800940feec14365ccd9b4863e38f6991
+yuv444p9le          c120044350852c4cd16a302dd1ceda79
 yuva420p            518a380bf1af60ef2ecf4754eec088e9
+yuva420p16le        72ad4fa535b007d122666ce103ef9c8b
 yuva422p            7110ac2e37377b05b6fc5ad967dfabb5
+yuva422p16le        e2867210660ada5784a60b4339ac52c0
 yuva444p            642f3958f141dece9e99407945e2ef43
+yuva444p16le        ab04ba8acbe38085b0df650d82065eb0
 yuvj420p            65bc88887c7f06a6221155ca7f9cfca4
 yuvj422p            ff5baffefc8ffe4547653092fd7da200
 yuvj440p            ef3f27270e60ac06582e3ac7c2f3e6fa
diff --git a/tests/ref/fate/filter-pixfmts-null b/tests/ref/fate/filter-pixfmts-null
index 6281711a..5f4b1623 100644
--- a/tests/ref/fate/filter-pixfmts-null
+++ b/tests/ref/fate/filter-pixfmts-null
@@ -2,6 +2,7 @@
 0rgb                527ef3d164c8fd0700493733959689c2
 abgr                023ecf6396d324edb113e4a483b79ba2
 argb                f003b555ef429222005d33844cca9325
+ayuv64le            07b9c969dfbe4add4c0626773b151d4f
 bgr0                6fcd67c8e6cec723dab21c70cf53dc16
 bgr24               4cff3814819f02ecf5824edfd768d2b1
 bgr444be            1cd47c1555f947dfcba99192e3429d20
@@ -30,11 +31,11 @@ gbrp9le             bc80da439638c59f1d822037f52739af
 gray                188590b1231afd231ea910815aef2b25
 gray16be            08d997a3faa25a3db9d6be272d282eef
 gray16le            df65eb804360795e3e38a2701fa9641a
-monob               a6869bab4f6e64fe13dcab13b41775b3
-monow               0404328f1838a6503371478a559ca20d
+monob               8b04f859fee6a0be856be184acd7a0b5
+monow               54d16d2c01abfd72ecdb5e51e283937c
 nv12                8e24feb2c544dc26a20047a71e4c27aa
 nv21                335d85c9af6110f26ae9e187a82ed2cf
-pal8                d9a58fa1964ba9a3b902797b0b1af0ab
+pal8                ff5929f5b42075793b2c34cb441bede5
 rgb0                0de71e5a1f97f81fb51397a0435bfa72
 rgb24               f4438057d046e6d98ade4e45294b21be
 rgb444be            115e5259b91f4a416546b09570347633
@@ -53,6 +54,7 @@ rgba64le            b91e1d77f799eb92241a2d2d28437b15
 uyvy422             3bcf3c80047592f2211fae3260b1b65d
 xyz12be             a1ef56bf746d71f59669c28e48fc8450
 xyz12le             831ff03c1ba4ef19374686f16a064d8c
+ya8                 dbb99fbcdc204aaa1a7397ff561f1a67
 yuv410p             5d4d992a7728431aa4e0700f87fb7fd8
 yuv411p             7e1300e89f5bc07939e2c4a6acbdf267
 yuv420p             a014c7eb7a8385d1dd092b7a583f1bff
@@ -64,8 +66,8 @@ yuv420p14be         64779858686946fc0e780baf7c1391b6
 yuv420p14le         c1d012a4f9d54fbc8b04fea96d85e903
 yuv420p16be         268b07358d8dc733ee81d0b87990d5af
 yuv420p16le         dae8da9edd4255051e3e546ae7ed9bd3
-yuv420p9be          e86ecd4112c86637c96f2b5e90341da1
-yuv420p9le          4496bea8504dce651485cc8a7e8403c9
+yuv420p9be          37f0476e8458a93d3d22db568f617aca
+yuv420p9le          83a6d32c91c15a3bc334bb9abf920654
 yuv422p             74f8006b4482db104f1986f49807a0af
 yuv422p10be         7291903c3c0cf4e5456dd9673a619f1d
 yuv422p10le         14cbaa728e888534359b9dddc5430f08
@@ -75,8 +77,8 @@ yuv422p14be         2617c569ae9659d8fe6a01f96e2c9657
 yuv422p14le         7d01363cf090306cf93337c474cd8827
 yuv422p16be         86147d8bfb795ab1873c899611e2a361
 yuv422p16le         9df47cb7d6d39b335a547ced2865e72e
-yuv422p9be          fdd15494de6cfc8c3f15650ecd3d8046
-yuv422p9le          a112fd777494d203d9d8e9623a50e503
+yuv422p9be          338dbb97c9f5a12ccea2c61774ddff2e
+yuv422p9le          0822f8af35a269a036ae44878acba87e
 yuv440p             98d0f96fdb3ba415899017adf7d4a4f9
 yuv440p10be         d874167042037c1daf9b9a2f74bffad9
 yuv440p10le         3cfbd921369aa8f1e4977efdb7f44c8c
@@ -91,8 +93,8 @@ yuv444p14be         ea3057d469d0c49c24e844256ef7871e
 yuv444p14le         940f5908ccf06e01411f0a7bddb45c6a
 yuv444p16be         aee24ab2e9a4656f889399f1b0d98639
 yuv444p16le         781c22317c02b3dd4225709000bdb847
-yuv444p9be          c37eb400483012fd97a030431818f328
-yuv444p9le          2136d762328cb9ce168b6261b7874791
+yuv444p9be          ac09917e5d2abc82c0d260007f4dfd77
+yuv444p9le          caef947b8aff5b52285385c6ae9b2439
 yuva420p            b227672e56215e184e702c02a771d7f3
 yuva420p10be        f66e7d677625380f7504867fab51305c
 yuva420p10le        01e94ee605714396e69b013c11dda348
diff --git a/tests/ref/fate/filter-pixfmts-pad b/tests/ref/fate/filter-pixfmts-pad
index 122f1ff7..9e474b9e 100644
--- a/tests/ref/fate/filter-pixfmts-pad
+++ b/tests/ref/fate/filter-pixfmts-pad
@@ -11,6 +11,7 @@ gray                ddc663a0491df3959d9c5795dceaa72e
 rgb0                78d500c8361ab6423a4826a00268c908
 rgb24               17f9e2e0c609009acaf2175c42d4a2a5
 rgba                b157c90191463d34fb3ce77b36c96386
+ya8                 5fc0f471207ddf7aa01b07027d56b672
 yuv410p             cb871dcc1e84a7ef1d21f9237b88cf6e
 yuv411p             aec2c1740de9a62db0d41f4dda9121b0
 yuv420p             4398e408fc35436ce4b20468946f58b6
diff --git a/tests/ref/fate/filter-pixfmts-scale b/tests/ref/fate/filter-pixfmts-scale
index 4d1cb7d8..241e839b 100644
--- a/tests/ref/fate/filter-pixfmts-scale
+++ b/tests/ref/fate/filter-pixfmts-scale
@@ -2,12 +2,13 @@
 0rgb                80a58af8c639743307207ab4b69ca863
 abgr                63f2eaa8712ea6108985f4a0b83587c9
 argb                f0e17c71a40643c33a5bcfb481f6d8f8
+ayuv64le            59fb016f9874062d0be77cb3920ffed2
 bgr0                243d58ca64f97b2f415b4c63cb79f0e1
 bgr24               18744aaab4b8bce065a7144dc0ccf921
 bgr444be            920760bee08c4fa161bf060e21ebba92
 bgr444le            01be36a28ebca1a11eb4d192986cd4e9
-bgr48be             a6fee4ac9f70d0da6a4b3a0e6353ca7f
-bgr48le             9c5d30b3b31ceaf3009fc7f1cf1cf7b6
+bgr48be             3ae02769c69d2512eaa26fff65763acb
+bgr48le             a6ce2344f07b77438258b6787fe5c24c
 bgr4_byte           01efea74088e5e3343c19ee053b95f31
 bgr555be            ab353278d103d379e1ec86e5cabb645f
 bgr555le            16ccbf59297e4b9ab25fd8af5a84a95d
@@ -15,8 +16,8 @@ bgr565be            3477e19fc11f95285836f30fdff26c1d
 bgr565le            82a81e7c9d4e0431fa22f4df9694afdc
 bgr8                2c57e76ccf04d51de6acafcf35d6fa70
 bgra                d8316272bc3a360ef9dff3ecc84520a3
-bgra64be            688499004461a2ce9debadb36dbcde5b
-bgra64le            c80dda435633c301e14d5b46a7edcf8d
+bgra64be            4e6a1b9f9c18b881c27d76611d45f737
+bgra64le            efeee0abcc658ebcff049d5e74d74943
 gbrap               e97ea4a104467c482173b7eaa57c14e3
 gbrp                dc3387f925f972c61aae7eb23cdc19f0
 gbrp10be            3a6d59192b6bb89ab42252b2b4818519
@@ -30,17 +31,17 @@ gbrp9le             0ed709f7828f0be5f828596f0720a82b
 gray                221201cc7cfc4964eacd8b3e426fd276
 gray16be            32891cb0928b1119d8d43a6e1bef0e2b
 gray16le            f96cfb5652b090dad52615930f0ce65f
-monob               337d236f59b891b16dd17c5267cf874b
-monow               3cb791057cf735930f97fe67e5125c45
+monob               f01cb0b623357387827902d9d0963435
+monow               35c68b86c226d6990b2dcb573a05ff6b
 nv12                b118d24a3653fe66e5d9e079033aef79
 nv21                c74bb1c10dbbdee8a1f682b194486c4d
-pal8                28a5374b56a7d3e37f95ddb8469f14dd
+pal8                29e10892009b2cfe431815ec3052ed3b
 rgb0                fbd27e98154efb7535826afed41e9bb0
 rgb24               e022e741451e81f2ecce1c7240b93e87
 rgb444be            db52b9ecdf98479b693e3f4bd9e77bac
 rgb444le            63288425c05f146cde5c82b85bb126e0
-rgb48be             c2e456838a71237cb1398ab5a7c35a6e
-rgb48le             6ef772549307349c599f419313c75b7a
+rgb48be             45b25016f10d54cf36eef3479afd8249
+rgb48le             40577b147620ecfb115717473d000697
 rgb4_byte           9e540a2e7193ebcbf1c7f85d192a0c4e
 rgb555be            cb5407a0d40f3d0120155daeaaa9a222
 rgb555le            c15540d1fc887882c35860634009c439
@@ -48,11 +49,12 @@ rgb565be            c69fa7d6e458509de65e911d147629a8
 rgb565le            a4a6ef89cdc10282b428cb1392f2a353
 rgb8                bcdc033b4ef0979d060dbc8893d4db58
 rgba                85bb5d03cea1c6e8002ced3373904336
-rgba64be            21611863fbbe149416a11e95877824ac
-rgba64le            35c195a441e5f8ca8e7e4ed098ecf0c1
+rgba64be            ee73e57923af984b31cc7795d13929da
+rgba64le            783d2779adfafe3548bdb671ec0de69e
 uyvy422             aeb4ba4f9f003ae21f6d18089198244f
-xyz12be             f6350b9a2f5add20d3d67f59c100166f
-xyz12le             982935a6ea6a297fd7be8aee0fda9870
+xyz12be             c7ba8345998c0141ddc079cdd29b1a40
+xyz12le             95f5d3a0de834cc495c9032a14987cde
+ya8                 0a9db5bb4b009de9197eede5e9d19e16
 yuv410p             e8f49b5fb9335b62c074f7f8bb0234fc
 yuv411p             5af32557c93beb482e26e7af693104c6
 yuv420p             5d3ac239c3712143560b1dfbd48a7ddd
@@ -64,8 +66,8 @@ yuv420p14be         b202fde5a53d529ddaa35c9467ff0b61
 yuv420p14le         36cac5d88b0d566cf835e84da6513e5a
 yuv420p16be         6f307c5b1a5941023f9029cb3a616f5c
 yuv420p16le         11f4bfbd4a058b58aa26dc47a86061f7
-yuv420p9be          43a800248882c33825a54a686e89be54
-yuv420p9le          d9f9238eed6e704c847e5007f5424fba
+yuv420p9be          aa0d83ca3cdb5770e47dc007cf5f7324
+yuv420p9le          0e6ade4219bdcbce32eceafc80d995d7
 yuv422p             9823e4d6bd1482b0cab3c44dab67f0a7
 yuv422p10be         42b9b936392b4a6a678028ace2cdcd20
 yuv422p10le         4bdc5e9ab3a16409600887335dbb1a66
@@ -75,8 +77,8 @@ yuv422p14be         2a005a86b80b947c953d11ae170551c7
 yuv422p14le         dcbde0634eb70bed62dde097c80a1643
 yuv422p16be         a772b46454e415ce454c0999ebb71486
 yuv422p16le         b4f64306c671ba4aa2eb23732ee02317
-yuv422p9be          1f991e6c661fe1e5840b05cb37945529
-yuv422p9le          7987ca4dbbf658e8dd87565fe225c333
+yuv422p9be          8313b67817cd81fe768bdc5c2f3fffb1
+yuv422p9le          39e9236c5005bfee5399c29e379964da
 yuv440p             483b8427cef7ab9c94d6b3f26d0ab094
 yuv440p10be         b93618311430e216a3d6736182fe7c04
 yuv440p10le         ca2c882018398d2a126c1ec65e8336d8
@@ -91,8 +93,8 @@ yuv444p14be         3eac31f0d4969210640de74914faf86d
 yuv444p14le         2c362c4cf167b7e2d83f4eb0dfaeb2b9
 yuv444p16be         a60c674411d64cc4b9fbf17039afffb3
 yuv444p16le         0a490fef1f2631367ee362d20a336efe
-yuv444p9be          3f1eddb729b029669cb3cd441c2517c0
-yuv444p9le          74d9db458b659935d82ebe3cbca920bb
+yuv444p9be          040bcbb962c19e390482301933622930
+yuv444p9le          16b65bb696a8931c7ab69501a7f93cfd
 yuva420p            05a12916f04859bb2c9a6decf624af74
 yuva420p10be        df8b3acfa7ac6ad96929aac1aa6c0102
 yuva420p10le        a5cc4bfc952ad1bc6f033d136d5a821c
diff --git a/tests/ref/fate/filter-pixfmts-swapuv b/tests/ref/fate/filter-pixfmts-swapuv
index ff24773a..e0238098 100644
--- a/tests/ref/fate/filter-pixfmts-swapuv
+++ b/tests/ref/fate/filter-pixfmts-swapuv
@@ -9,8 +9,8 @@ yuv420p14be         970fa43a1470ddedbd874c726a519293
 yuv420p14le         5a18b1d6f439077208556f2d2a80becd
 yuv420p16be         97009aa152a4c6dc383b75c551ffa71c
 yuv420p16le         e7201caa7838647aefee41ae9e098110
-yuv420p9be          9a0b5dcfa50706676bf818e7215c3036
-yuv420p9le          850d73c33b3d5c55f4fb984871bf5b0a
+yuv420p9be          1fddc5d7f64f642fe888c5e0a8a4f499
+yuv420p9le          6e111f38dddfeec499e162f17244362d
 yuv422p             80c6bc82f4bc330df1895c8f998e34bf
 yuv422p10be         0aeb0cd5949ee258355fbe4ff2f84a56
 yuv422p10le         1bfff133885efb1c44dfd9191b6f241a
@@ -20,8 +20,8 @@ yuv422p14be         91a75ccb62fd1149069c6b7b26fe195d
 yuv422p14le         dbec6e0152daedafa161437fc931b1e6
 yuv422p16be         c5de1a3fd28f647cb14d293061312f80
 yuv422p16le         56d4e3285312b1a24f299bef44f4dc15
-yuv422p9be          31bdd7e06ccd5272df355acbf213522c
-yuv422p9le          77022abb447608c874816675ba1e6aa1
+yuv422p9be          ee90a48d7bbb9973e9152e80d58bfe4d
+yuv422p9le          c6cacd918669ffd24732281bed2e61a6
 yuv440p             563fd94ba1ddde6f3cd34a815ec03165
 yuv440p10be         2aadf346979813cbb336c0a543fe243f
 yuv440p10le         9e34483ca0850e2d701a8a6d8ca9a0f1
@@ -36,8 +36,8 @@ yuv444p14be         c87e248a2c605515f11afd8f1251db67
 yuv444p14le         3085381c2b70642fc95c6bb153d766a1
 yuv444p16be         3d99c4af5cc1ddc3144a10e0b2b75951
 yuv444p16le         56f7022d0aeb4c4f56c6451e431d5390
-yuv444p9be          7879b676f67cde59ddc7c73f8a505918
-yuv444p9le          aa8e674a19322b0d89d7930347428e90
+yuv444p9be          3d86a8c9cc4af8ecc692578384e01ffe
+yuv444p9le          65b78fabc21cad07cfd209f2c27ae0be
 yuva420p            8afb004ce37ac4d9f881c138b25c414a
 yuva420p10be        7417eee3faa13dd69a2335996fd4ed14
 yuva420p10le        db1e7e8bc49adb2180d8dcef665331b7
diff --git a/tests/ref/fate/filter-pixfmts-vflip b/tests/ref/fate/filter-pixfmts-vflip
index ac9f0030..c7cc0c7b 100644
--- a/tests/ref/fate/filter-pixfmts-vflip
+++ b/tests/ref/fate/filter-pixfmts-vflip
@@ -2,6 +2,7 @@
 0rgb                76b792f8ce8a72925e04294dc2f25b36
 abgr                8b94f489e68802d76f1e2844688a4911
 argb                3fd6af7ef2364d8aa845d45db289a04a
+ayuv64le            558671dd31d0754cfa6344eaf441df78
 bgr0                7117438cf000254610f23625265769b5
 bgr24               52b2c21cbc166978a38a646c354b6858
 bgr444be            6c6cb3f5a26d5fd00bd04467bb0bbcca
@@ -30,11 +31,11 @@ gbrp9le             cff296346d93e430cbc0acfbc91482ab
 gray                41811422d5819ed69389357294384c10
 gray16be            29f24ba7cb0fc4fd2ae78963d008f6e6
 gray16le            a37e9c4ea76e8eeddc2af8f600ba2c10
-monob               f51f07ba50f4398233106f0e81494170
-monow               ee414089983b5be0f76b9d962757d900
+monob               7810c4857822ccfc844d78f5e803269a
+monow               90a947bfcd5f2261e83b577f48ec57b1
 nv12                261ebe585ae2aa4e70d39a10c1679294
 nv21                2909feacd27bebb080c8e0fa41795269
-pal8                e81c55915557563b46627b0911764578
+pal8                450b0155d0f2d5628bf95a442db5f817
 rgb0                56a7ea69541bcd27bef6a5615784722b
 rgb24               195e6dae1c3a488b9d3ceb7560d25d85
 rgb444be            d30742559618f871ef5543f602e35d04
@@ -53,6 +54,7 @@ rgba64le            48f45b10503b7dd140329c3dd0d54c98
 uyvy422             3a237e8376264e0cfa78f8a3fdadec8a
 xyz12be             810644e008deb231850d779aaa27cc7e
 xyz12le             829701db461b43533cf9241e0743bc61
+ya8                 4299c6ca3b470a7d8a420e26eb485b1d
 yuv410p             c7adfe96c8e043a6cb9290c39bf8063c
 yuv411p             3fce29db403a25f81be39e01aaf6ff3a
 yuv420p             d64fae96fac22aefa8fbcf45a09f37c1
@@ -64,8 +66,8 @@ yuv420p14be         22d2d9fb5883410807ede4066e735426
 yuv420p14le         5cace6a24a1ff5d2a41a3c909c4bc41f
 yuv420p16be         0a6499a1b22230ad030448d3ac95b5c1
 yuv420p16le         2a5c131b060757aab1fe84200528d404
-yuv420p9be          95a9f53de74785fec62415e42048fa3e
-yuv420p9le          3d0a36e6505c49eeb2c305b4acb41a12
+yuv420p9be          694227c773c3a66e4edeca950f99f6d6
+yuv420p9le          c3ec4bffc19c49edafd769a7dfccd9c1
 yuv422p             54f608c9d8bc56979aeaa7863820f5d3
 yuv422p10be         fbd6329d2decbe318be4d89dc8ade3a3
 yuv422p10le         7d7bdf4f68d8a7698e92722625c59c53
@@ -75,8 +77,8 @@ yuv422p14be         3bc3a72c7cf995981bd8eb45f23705b3
 yuv422p14le         5987087bbc03d8cbff4c41bce03451d9
 yuv422p16be         97d95a9cfe8f67fb20a4c983c7fdc215
 yuv422p16le         3b9df52a757a0d1ddb1c9c38f0ed4787
-yuv422p9be          104a3cc49ccbbeff0932df5f1a2c104f
-yuv422p9le          d0b803eeb1237ffa8a067fd084c2c385
+yuv422p9be          25116f0953ecd26f292101b51ac708df
+yuv422p9le          42615866fe9ab98e28970ed7fbf4b2a0
 yuv440p             fa45ee7329b98fc43c0be460d9a0d6a5
 yuv440p10be         54789b4f66d9a79fc182ff268639dbd0
 yuv440p10le         d06d7c054522ac02ec04f76dfa431676
@@ -91,8 +93,8 @@ yuv444p14be         cf181f44988d3ddf4cc32b87ca473251
 yuv444p14le         086322cb9a98eb91825c9bb2ad00eac7
 yuv444p16be         ee0721d7fda4a67bc6270a78fa361140
 yuv444p16le         8c4c2511907fd23d9aeefab490a22db7
-yuv444p9be          351f5f4ce50e64f264610b474dcc70fd
-yuv444p9le          efd49802d44f337f0098e07945d95329
+yuv444p9be          7ffa40454450cbeec09d57f5a0a3eb8b
+yuv444p9le          a4fb60fbace24757b1e07d5a65f09e64
 yuva420p            3b136bcd7c2ffc3a1849e4814c046954
 yuva420p10be        d045faff818dabb9ecebab6d14bf7b10
 yuva420p10le        becf56ac1e271c04ccc204c68e8e0b5f
diff --git a/tests/ref/fate/filter-removegrain-mode-00 b/tests/ref/fate/filter-removegrain-mode-00
new file mode 100644
index 00000000..115bc455
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-00
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x05b789ef
diff --git a/tests/ref/fate/filter-removegrain-mode-01 b/tests/ref/fate/filter-removegrain-mode-01
new file mode 100644
index 00000000..0e74927f
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-01
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x4f234e65
diff --git a/tests/ref/fate/filter-removegrain-mode-02 b/tests/ref/fate/filter-removegrain-mode-02
new file mode 100644
index 00000000..be7a4196
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-02
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xc2dd0535
diff --git a/tests/ref/fate/filter-removegrain-mode-03 b/tests/ref/fate/filter-removegrain-mode-03
new file mode 100644
index 00000000..7f98bec4
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-03
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x09dbc38a
diff --git a/tests/ref/fate/filter-removegrain-mode-04 b/tests/ref/fate/filter-removegrain-mode-04
new file mode 100644
index 00000000..510371df
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-04
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x32c6a66e
diff --git a/tests/ref/fate/filter-removegrain-mode-05 b/tests/ref/fate/filter-removegrain-mode-05
new file mode 100644
index 00000000..11ddc897
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-05
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xc3c64ec6
diff --git a/tests/ref/fate/filter-removegrain-mode-06 b/tests/ref/fate/filter-removegrain-mode-06
new file mode 100644
index 00000000..8d3d318b
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-06
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x38e84065
diff --git a/tests/ref/fate/filter-removegrain-mode-07 b/tests/ref/fate/filter-removegrain-mode-07
new file mode 100644
index 00000000..9ce72177
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-07
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xfbdeec94
diff --git a/tests/ref/fate/filter-removegrain-mode-08 b/tests/ref/fate/filter-removegrain-mode-08
new file mode 100644
index 00000000..3a05d103
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-08
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x8f7c85a2
diff --git a/tests/ref/fate/filter-removegrain-mode-09 b/tests/ref/fate/filter-removegrain-mode-09
new file mode 100644
index 00000000..4787c223
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-09
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x9609a659
diff --git a/tests/ref/fate/filter-removegrain-mode-10 b/tests/ref/fate/filter-removegrain-mode-10
new file mode 100644
index 00000000..7f768542
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-10
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x4adb45d6
diff --git a/tests/ref/fate/filter-removegrain-mode-11 b/tests/ref/fate/filter-removegrain-mode-11
new file mode 100644
index 00000000..f2c3be0e
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-11
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xd0d891ff
diff --git a/tests/ref/fate/filter-removegrain-mode-12 b/tests/ref/fate/filter-removegrain-mode-12
new file mode 100644
index 00000000..f2c3be0e
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-12
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xd0d891ff
diff --git a/tests/ref/fate/filter-removegrain-mode-13 b/tests/ref/fate/filter-removegrain-mode-13
new file mode 100644
index 00000000..b481928d
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-13
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x09d4da80
diff --git a/tests/ref/fate/filter-removegrain-mode-14 b/tests/ref/fate/filter-removegrain-mode-14
new file mode 100644
index 00000000..7355a61a
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-14
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x145f34c8
diff --git a/tests/ref/fate/filter-removegrain-mode-15 b/tests/ref/fate/filter-removegrain-mode-15
new file mode 100644
index 00000000..4c11088a
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-15
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x11eea9b4
diff --git a/tests/ref/fate/filter-removegrain-mode-16 b/tests/ref/fate/filter-removegrain-mode-16
new file mode 100644
index 00000000..9b1ab230
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-16
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x8bc800ee
diff --git a/tests/ref/fate/filter-removegrain-mode-17 b/tests/ref/fate/filter-removegrain-mode-17
new file mode 100644
index 00000000..29b383e7
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-17
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x7195b9a7
diff --git a/tests/ref/fate/filter-removegrain-mode-18 b/tests/ref/fate/filter-removegrain-mode-18
new file mode 100644
index 00000000..84aa1ec9
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-18
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xb47a081c
diff --git a/tests/ref/fate/filter-removegrain-mode-19 b/tests/ref/fate/filter-removegrain-mode-19
new file mode 100644
index 00000000..86887a69
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-19
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x948dc65d
diff --git a/tests/ref/fate/filter-removegrain-mode-20 b/tests/ref/fate/filter-removegrain-mode-20
new file mode 100644
index 00000000..def86fe3
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-20
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x64a388ea
diff --git a/tests/ref/fate/filter-removegrain-mode-21 b/tests/ref/fate/filter-removegrain-mode-21
new file mode 100644
index 00000000..832c1b1b
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-21
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xb7ae17f5
diff --git a/tests/ref/fate/filter-removegrain-mode-22 b/tests/ref/fate/filter-removegrain-mode-22
new file mode 100644
index 00000000..f689e3f2
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-22
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xe5736698
diff --git a/tests/ref/fate/filter-removegrain-mode-23 b/tests/ref/fate/filter-removegrain-mode-23
new file mode 100644
index 00000000..b0a588ba
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-23
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x8d15a472
diff --git a/tests/ref/fate/filter-removegrain-mode-24 b/tests/ref/fate/filter-removegrain-mode-24
new file mode 100644
index 00000000..286ce22c
--- /dev/null
+++ b/tests/ref/fate/filter-removegrain-mode-24
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x77f6d439
diff --git a/tests/ref/fate/filter-scalechroma b/tests/ref/fate/filter-scalechroma
new file mode 100644
index 00000000..930ffefa
--- /dev/null
+++ b/tests/ref/fate/filter-scalechroma
@@ -0,0 +1,26 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xdcab783a
+0,          1,          1,        1,   152064, 0x79c7f1f6
+0,          2,          2,        1,   152064, 0x3b810afb
+0,          3,          3,        1,   152064, 0x892aca1d
+0,          4,          4,        1,   152064, 0x52fdd093
+0,          5,          5,        1,   152064, 0xaa643426
+0,          6,          6,        1,   152064, 0x9ad020ed
+0,          7,          7,        1,   152064, 0x5c179057
+0,          8,          8,        1,   152064, 0xa56bf155
+0,          9,          9,        1,   152064, 0x61dcffca
+0,         10,         10,        1,   152064, 0x0d51a1d3
+0,         11,         11,        1,   152064, 0x652f9e8d
+0,         12,         12,        1,   152064, 0xdc0bb4d8
+0,         13,         13,        1,   152064, 0x561437cf
+0,         14,         14,        1,   152064, 0x69ef8e4f
+0,         15,         15,        1,   152064, 0xe7244350
+0,         16,         16,        1,   152064, 0xe65651cf
+0,         17,         17,        1,   152064, 0xfc9ff646
+0,         18,         18,        1,   152064, 0x6ae10bc4
+0,         19,         19,        1,   152064, 0xd3d1898a
+0,         20,         20,        1,   152064, 0xf3f8b139
+0,         21,         21,        1,   152064, 0x68c129be
+0,         22,         22,        1,   152064, 0xc3922593
+0,         23,         23,        1,   152064, 0x2b14d96e
+0,         24,         24,        1,   152064, 0xab119489
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-agmc b/tests/ref/fate/filter-stereo3d-sbsl-agmc
new file mode 100644
index 00000000..3a515b86
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-agmc
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xa950d0d9
+0,          1,          1,        1,   152064, 0x6a6933c3
+0,          2,          2,        1,   152064, 0x66a1da3a
+0,          3,          3,        1,   152064, 0xe591aa12
+0,          4,          4,        1,   152064, 0x42413288
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-agmd b/tests/ref/fate/filter-stereo3d-sbsl-agmd
new file mode 100644
index 00000000..7d188d4f
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-agmd
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x962d1371
+0,          1,          1,        1,   152064, 0xe7db1d51
+0,          2,          2,        1,   152064, 0x3bb0c409
+0,          3,          3,        1,   152064, 0xcc213557
+0,          4,          4,        1,   152064, 0x913e1048
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-agmg b/tests/ref/fate/filter-stereo3d-sbsl-agmg
new file mode 100644
index 00000000..3cae6f38
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-agmg
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x45a6c9a8
+0,          1,          1,        1,   152064, 0xe7ba65fc
+0,          2,          2,        1,   152064, 0x88ab8fcc
+0,          3,          3,        1,   152064, 0x91478b9d
+0,          4,          4,        1,   152064, 0xb7ecc7cf
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-agmh b/tests/ref/fate/filter-stereo3d-sbsl-agmh
new file mode 100644
index 00000000..e6853a61
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-agmh
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x96464057
+0,          1,          1,        1,   152064, 0x58c4e280
+0,          2,          2,        1,   152064, 0xaf76fd24
+0,          3,          3,        1,   152064, 0xa4dacf0d
+0,          4,          4,        1,   152064, 0x964a2110
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-arbg b/tests/ref/fate/filter-stereo3d-sbsl-arbg
new file mode 100644
index 00000000..1e872716
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-arbg
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x4d83ea30
+0,          1,          1,        1,   152064, 0xa4a3be5b
+0,          2,          2,        1,   152064, 0x21032989
+0,          3,          3,        1,   152064, 0xa201194b
+0,          4,          4,        1,   152064, 0x9b7980cc
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-arcc b/tests/ref/fate/filter-stereo3d-sbsl-arcc
new file mode 100644
index 00000000..56f17fd9
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-arcc
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x2760b14a
+0,          1,          1,        1,   152064, 0xcf94baaf
+0,          2,          2,        1,   152064, 0x9b5e0e0a
+0,          3,          3,        1,   152064, 0x6bfb8127
+0,          4,          4,        1,   152064, 0xa0293fed
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-arcd b/tests/ref/fate/filter-stereo3d-sbsl-arcd
new file mode 100644
index 00000000..fd9806b3
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-arcd
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xa0261570
+0,          1,          1,        1,   152064, 0x678403c8
+0,          2,          2,        1,   152064, 0x1087e7b6
+0,          3,          3,        1,   152064, 0xa3909df3
+0,          4,          4,        1,   152064, 0x87e4c4d4
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-arcg b/tests/ref/fate/filter-stereo3d-sbsl-arcg
new file mode 100644
index 00000000..68f3f54a
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-arcg
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x70f5c9a8
+0,          1,          1,        1,   152064, 0x56f165fc
+0,          2,          2,        1,   152064, 0xe59f8fcc
+0,          3,          3,        1,   152064, 0xc5df8b9d
+0,          4,          4,        1,   152064, 0xaab2c7cf
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-arch b/tests/ref/fate/filter-stereo3d-sbsl-arch
new file mode 100644
index 00000000..9c2fcd17
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-arch
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x81ab8dad
+0,          1,          1,        1,   152064, 0x820fd98a
+0,          2,          2,        1,   152064, 0x1ccc712b
+0,          3,          3,        1,   152064, 0x0d278740
+0,          4,          4,        1,   152064, 0x13eaa8e2
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-argg b/tests/ref/fate/filter-stereo3d-sbsl-argg
new file mode 100644
index 00000000..91fb0645
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-argg
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x2cfbea30
+0,          1,          1,        1,   152064, 0x4c44be5b
+0,          2,          2,        1,   152064, 0x87462989
+0,          3,          3,        1,   152064, 0x1462194b
+0,          4,          4,        1,   152064, 0xe27c80cc
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-aybc b/tests/ref/fate/filter-stereo3d-sbsl-aybc
new file mode 100644
index 00000000..72bd15c9
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-aybc
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xae8c985d
+0,          1,          1,        1,   152064, 0x489e1535
+0,          2,          2,        1,   152064, 0x01a83b2b
+0,          3,          3,        1,   152064, 0x3870ffd6
+0,          4,          4,        1,   152064, 0x3d5b0cc4
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-aybd b/tests/ref/fate/filter-stereo3d-sbsl-aybd
new file mode 100644
index 00000000..d963480f
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-aybd
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x7dc98468
+0,          1,          1,        1,   152064, 0xf72db6c9
+0,          2,          2,        1,   152064, 0x1630f53f
+0,          3,          3,        1,   152064, 0xc1765599
+0,          4,          4,        1,   152064, 0x12e35db1
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-aybg b/tests/ref/fate/filter-stereo3d-sbsl-aybg
new file mode 100644
index 00000000..53f7f723
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-aybg
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0x1a57c9a8
+0,          1,          1,        1,   152064, 0x789265fc
+0,          2,          2,        1,   152064, 0x2bb78fcc
+0,          3,          3,        1,   152064, 0x5caf8b9d
+0,          4,          4,        1,   152064, 0xc526c7cf
diff --git a/tests/ref/fate/filter-stereo3d-sbsl-aybh b/tests/ref/fate/filter-stereo3d-sbsl-aybh
new file mode 100644
index 00000000..c6fdd146
--- /dev/null
+++ b/tests/ref/fate/filter-stereo3d-sbsl-aybh
@@ -0,0 +1,6 @@
+#tb 0: 1/25
+0,          0,          0,        1,   152064, 0xf3ee182f
+0,          1,          1,        1,   152064, 0x8b1c25f1
+0,          2,          2,        1,   152064, 0xe78de2ac
+0,          3,          3,        1,   152064, 0x3eb08db6
+0,          4,          4,        1,   152064, 0x852eae5c
diff --git a/tests/ref/fate/filter-tblend b/tests/ref/fate/filter-tblend
new file mode 100644
index 00000000..3ff689ae
--- /dev/null
+++ b/tests/ref/fate/filter-tblend
@@ -0,0 +1,50 @@
+#tb 0: 1/25
+0,          1,          1,        1,   152064, 0x8e50e5e5
+0,          2,          2,        1,   152064, 0x7187a95e
+0,          3,          3,        1,   152064, 0xbd73863e
+0,          4,          4,        1,   152064, 0xabaefc78
+0,          5,          5,        1,   152064, 0xf080523f
+0,          6,          6,        1,   152064, 0x2ba7c11c
+0,          7,          7,        1,   152064, 0x1bfe0964
+0,          8,          8,        1,   152064, 0xe081d9db
+0,          9,          9,        1,   152064, 0x26eee75b
+0,         10,         10,        1,   152064, 0xcc28e18c
+0,         11,         11,        1,   152064, 0x3d2ed08b
+0,         12,         12,        1,   152064, 0x4567acbe
+0,         13,         13,        1,   152064, 0xa22f09db
+0,         14,         14,        1,   152064, 0xe4438e6f
+0,         15,         15,        1,   152064, 0x34d02799
+0,         16,         16,        1,   152064, 0x4c9230b3
+0,         17,         17,        1,   152064, 0x08e6c23c
+0,         18,         18,        1,   152064, 0xa0a475a5
+0,         19,         19,        1,   152064, 0x47f5d5e9
+0,         20,         20,        1,   152064, 0x0713219a
+0,         21,         21,        1,   152064, 0x9e4434a2
+0,         22,         22,        1,   152064, 0xc06a2482
+0,         23,         23,        1,   152064, 0xba321552
+0,         24,         24,        1,   152064, 0x74070b31
+0,         25,         25,        1,   152064, 0x92188f27
+0,         26,         26,        1,   152064, 0x29cf1fae
+0,         27,         27,        1,   152064, 0x04fd06ea
+0,         28,         28,        1,   152064, 0xcbbded09
+0,         29,         29,        1,   152064, 0x7f731df8
+0,         30,         30,        1,   152064, 0x9fd5b56f
+0,         31,         31,        1,   152064, 0x82217442
+0,         32,         32,        1,   152064, 0x95496106
+0,         33,         33,        1,   152064, 0xb9c50804
+0,         34,         34,        1,   152064, 0xa7051cba
+0,         35,         35,        1,   152064, 0x76bc5e88
+0,         36,         36,        1,   152064, 0x8276cfb2
+0,         37,         37,        1,   152064, 0x88bbf936
+0,         38,         38,        1,   152064, 0x160c5d4e
+0,         39,         39,        1,   152064, 0x0b61942a
+0,         40,         40,        1,   152064, 0x4a4aae9b
+0,         41,         41,        1,   152064, 0x049978fa
+0,         42,         42,        1,   152064, 0x12584f81
+0,         43,         43,        1,   152064, 0xada82b80
+0,         44,         44,        1,   152064, 0x3a4876ef
+0,         45,         45,        1,   152064, 0x7ccb86f2
+0,         46,         46,        1,   152064, 0x11bef54d
+0,         47,         47,        1,   152064, 0xb1714cd0
+0,         48,         48,        1,   152064, 0x950a3388
+0,         49,         49,        1,   152064, 0x1c1cc3ec
diff --git a/tests/ref/fate/filter-testsrc2-rgb24 b/tests/ref/fate/filter-testsrc2-rgb24
new file mode 100644
index 00000000..2fdfa003
--- /dev/null
+++ b/tests/ref/fate/filter-testsrc2-rgb24
@@ -0,0 +1,71 @@
+#tb 0: 1/7
+0,          0,          0,        1,   230400, 0x1c012312
+0,          1,          1,        1,   230400, 0xf7cf4e81
+0,          2,          2,        1,   230400, 0x29a45e1c
+0,          3,          3,        1,   230400, 0xe73082a7
+0,          4,          4,        1,   230400, 0x7b84c199
+0,          5,          5,        1,   230400, 0xcdfd5b52
+0,          6,          6,        1,   230400, 0xc035ebb8
+0,          7,          7,        1,   230400, 0x8e37afd0
+0,          8,          8,        1,   230400, 0x66386f44
+0,          9,          9,        1,   230400, 0x17fa6b56
+0,         10,         10,        1,   230400, 0x154c5b53
+0,         11,         11,        1,   230400, 0xcaff83ab
+0,         12,         12,        1,   230400, 0x7c3d935d
+0,         13,         13,        1,   230400, 0x8b2a48ef
+0,         14,         14,        1,   230400, 0xb56dc39e
+0,         15,         15,        1,   230400, 0x2f4522e6
+0,         16,         16,        1,   230400, 0x275efedf
+0,         17,         17,        1,   230400, 0x7917108e
+0,         18,         18,        1,   230400, 0x3f631971
+0,         19,         19,        1,   230400, 0xd70bb265
+0,         20,         20,        1,   230400, 0xb918d1e3
+0,         21,         21,        1,   230400, 0xce8d0032
+0,         22,         22,        1,   230400, 0xbbc3bd32
+0,         23,         23,        1,   230400, 0x42103583
+0,         24,         24,        1,   230400, 0x4be88f56
+0,         25,         25,        1,   230400, 0xd32610ea
+0,         26,         26,        1,   230400, 0xcae0a12b
+0,         27,         27,        1,   230400, 0x4256bdf3
+0,         28,         28,        1,   230400, 0x76b59faf
+0,         29,         29,        1,   230400, 0x6cbf7c06
+0,         30,         30,        1,   230400, 0x0625b097
+0,         31,         31,        1,   230400, 0x867c78c3
+0,         32,         32,        1,   230400, 0x767cc08b
+0,         33,         33,        1,   230400, 0x097980b0
+0,         34,         34,        1,   230400, 0xba182417
+0,         35,         35,        1,   230400, 0x08f62b8b
+0,         36,         36,        1,   230400, 0x8fc5e64c
+0,         37,         37,        1,   230400, 0xed020fad
+0,         38,         38,        1,   230400, 0x9b97066b
+0,         39,         39,        1,   230400, 0x1d0b31ff
+0,         40,         40,        1,   230400, 0x6915912f
+0,         41,         41,        1,   230400, 0xb7e6358e
+0,         42,         42,        1,   230400, 0x7a3aa13b
+0,         43,         43,        1,   230400, 0x7317e56a
+0,         44,         44,        1,   230400, 0xf1e1c7f9
+0,         45,         45,        1,   230400, 0xb1d5d5e1
+0,         46,         46,        1,   230400, 0x0bad1208
+0,         47,         47,        1,   230400, 0x2f71275a
+0,         48,         48,        1,   230400, 0x4d79679d
+0,         49,         49,        1,   230400, 0xfa52be03
+0,         50,         50,        1,   230400, 0x17d1d67a
+0,         51,         51,        1,   230400, 0x15844f5b
+0,         52,         52,        1,   230400, 0xd19466c3
+0,         53,         53,        1,   230400, 0xeb80cd8a
+0,         54,         54,        1,   230400, 0x9beee240
+0,         55,         55,        1,   230400, 0x677a13b3
+0,         56,         56,        1,   230400, 0x43abe88f
+0,         57,         57,        1,   230400, 0xa42aafc1
+0,         58,         58,        1,   230400, 0x0a78c290
+0,         59,         59,        1,   230400, 0x3806e07d
+0,         60,         60,        1,   230400, 0xaeef5d61
+0,         61,         61,        1,   230400, 0x0763c77f
+0,         62,         62,        1,   230400, 0xccc17ab1
+0,         63,         63,        1,   230400, 0x9a844893
+0,         64,         64,        1,   230400, 0x8f962741
+0,         65,         65,        1,   230400, 0xe3022980
+0,         66,         66,        1,   230400, 0x896130af
+0,         67,         67,        1,   230400, 0xdf6675fa
+0,         68,         68,        1,   230400, 0xa20c8f9e
+0,         69,         69,        1,   230400, 0x3e402c75
diff --git a/tests/ref/fate/filter-testsrc2-yuv420p b/tests/ref/fate/filter-testsrc2-yuv420p
new file mode 100644
index 00000000..804e5fa3
--- /dev/null
+++ b/tests/ref/fate/filter-testsrc2-yuv420p
@@ -0,0 +1,71 @@
+#tb 0: 1/7
+0,          0,          0,        1,   115200, 0x3744b3ed
+0,          1,          1,        1,   115200, 0x0c1062d6
+0,          2,          2,        1,   115200, 0x201b9db1
+0,          3,          3,        1,   115200, 0x278d887e
+0,          4,          4,        1,   115200, 0x309b9c06
+0,          5,          5,        1,   115200, 0x75e1a17b
+0,          6,          6,        1,   115200, 0xa14e9aca
+0,          7,          7,        1,   115200, 0xb73857e2
+0,          8,          8,        1,   115200, 0x686b77e7
+0,          9,          9,        1,   115200, 0x02b6ab21
+0,         10,         10,        1,   115200, 0x1fc2d693
+0,         11,         11,        1,   115200, 0x296dd4a5
+0,         12,         12,        1,   115200, 0x2d0ba5a4
+0,         13,         13,        1,   115200, 0x59e85f83
+0,         14,         14,        1,   115200, 0xc95a675e
+0,         15,         15,        1,   115200, 0x40426f99
+0,         16,         16,        1,   115200, 0xf040bf35
+0,         17,         17,        1,   115200, 0xc705ccd9
+0,         18,         18,        1,   115200, 0xa76dcd9d
+0,         19,         19,        1,   115200, 0x5635daa5
+0,         20,         20,        1,   115200, 0x3af5d306
+0,         21,         21,        1,   115200, 0x0caf7172
+0,         22,         22,        1,   115200, 0x7161ef8f
+0,         23,         23,        1,   115200, 0xc8ce7fb1
+0,         24,         24,        1,   115200, 0xccf02fed
+0,         25,         25,        1,   115200, 0x81cdf49f
+0,         26,         26,        1,   115200, 0xb9170ee1
+0,         27,         27,        1,   115200, 0x7e7d78d0
+0,         28,         28,        1,   115200, 0xfe4c0185
+0,         29,         29,        1,   115200, 0x9dde4256
+0,         30,         30,        1,   115200, 0x1eb35d69
+0,         31,         31,        1,   115200, 0xad3d2e3f
+0,         32,         32,        1,   115200, 0xf3282aa1
+0,         33,         33,        1,   115200, 0x1cef3c17
+0,         34,         34,        1,   115200, 0x688a442c
+0,         35,         35,        1,   115200, 0x2cdb327a
+0,         36,         36,        1,   115200, 0xe6c16f00
+0,         37,         37,        1,   115200, 0x6f8fac56
+0,         38,         38,        1,   115200, 0x54e8d2a1
+0,         39,         39,        1,   115200, 0x29afc657
+0,         40,         40,        1,   115200, 0xb3138f57
+0,         41,         41,        1,   115200, 0x169041ca
+0,         42,         42,        1,   115200, 0x9e3e4e2b
+0,         43,         43,        1,   115200, 0x192977ac
+0,         44,         44,        1,   115200, 0x4aefe354
+0,         45,         45,        1,   115200, 0xc575c060
+0,         46,         46,        1,   115200, 0xfe3ec033
+0,         47,         47,        1,   115200, 0xab53a3e7
+0,         48,         48,        1,   115200, 0xbe229fcb
+0,         49,         49,        1,   115200, 0x088e58c3
+0,         50,         50,        1,   115200, 0x79eaf2db
+0,         51,         51,        1,   115200, 0xb32489ab
+0,         52,         52,        1,   115200, 0x125d1db7
+0,         53,         53,        1,   115200, 0x81efd887
+0,         54,         54,        1,   115200, 0x0eb22945
+0,         55,         55,        1,   115200, 0x46cca5d0
+0,         56,         56,        1,   115200, 0x636c4203
+0,         57,         57,        1,   115200, 0x3d3074a6
+0,         58,         58,        1,   115200, 0xe92f787e
+0,         59,         59,        1,   115200, 0xd0cd4ecf
+0,         60,         60,        1,   115200, 0xf3ac6472
+0,         61,         61,        1,   115200, 0xac8063b4
+0,         62,         62,        1,   115200, 0x2b0c68f2
+0,         63,         63,        1,   115200, 0xc6173b40
+0,         64,         64,        1,   115200, 0x12c35e41
+0,         65,         65,        1,   115200, 0x57c48fdd
+0,         66,         66,        1,   115200, 0x1079be75
+0,         67,         67,        1,   115200, 0xc8e7d33e
+0,         68,         68,        1,   115200, 0x79cdac12
+0,         69,         69,        1,   115200, 0x8dbe5a5f
diff --git a/tests/ref/fate/filter-testsrc2-yuv444p b/tests/ref/fate/filter-testsrc2-yuv444p
new file mode 100644
index 00000000..c8cc7c17
--- /dev/null
+++ b/tests/ref/fate/filter-testsrc2-yuv444p
@@ -0,0 +1,71 @@
+#tb 0: 1/7
+0,          0,          0,        1,   230400, 0xbfb96366
+0,          1,          1,        1,   230400, 0xe63ca6e9
+0,          2,          2,        1,   230400, 0xc6acccc8
+0,          3,          3,        1,   230400, 0x6ea1b629
+0,          4,          4,        1,   230400, 0x508477ed
+0,          5,          5,        1,   230400, 0x49d4d57c
+0,          6,          6,        1,   230400, 0xd601a939
+0,          7,          7,        1,   230400, 0x0a0288f3
+0,          8,          8,        1,   230400, 0xa6da89f8
+0,          9,          9,        1,   230400, 0x0490ac8e
+0,         10,         10,        1,   230400, 0x1873ccd2
+0,         11,         11,        1,   230400, 0x64ef9885
+0,         12,         12,        1,   230400, 0x6b8fa2f0
+0,         13,         13,        1,   230400, 0x8d9657a4
+0,         14,         14,        1,   230400, 0xb4af4f21
+0,         15,         15,        1,   230400, 0x64137e0e
+0,         16,         16,        1,   230400, 0xb8b26ce2
+0,         17,         17,        1,   230400, 0xee1e7b00
+0,         18,         18,        1,   230400, 0x1c9b25d8
+0,         19,         19,        1,   230400, 0xe0c761ab
+0,         20,         20,        1,   230400, 0xe1cf0c14
+0,         21,         21,        1,   230400, 0xea380055
+0,         22,         22,        1,   230400, 0x6537716f
+0,         23,         23,        1,   230400, 0x8d9b9380
+0,         24,         24,        1,   230400, 0x9a04e333
+0,         25,         25,        1,   230400, 0x78005375
+0,         26,         26,        1,   230400, 0xc1cc9b9e
+0,         27,         27,        1,   230400, 0x84fda020
+0,         28,         28,        1,   230400, 0x711ef4ab
+0,         29,         29,        1,   230400, 0xccd04c02
+0,         30,         30,        1,   230400, 0xcf2a4bbd
+0,         31,         31,        1,   230400, 0x6d0ef0c0
+0,         32,         32,        1,   230400, 0x46141d09
+0,         33,         33,        1,   230400, 0x3a3d3f71
+0,         34,         34,        1,   230400, 0xe9e7f98f
+0,         35,         35,        1,   230400, 0x0f618ebb
+0,         36,         36,        1,   230400, 0x9c7d07b2
+0,         37,         37,        1,   230400, 0x90f8e960
+0,         38,         38,        1,   230400, 0xe3a856aa
+0,         39,         39,        1,   230400, 0xc66dcd53
+0,         40,         40,        1,   230400, 0xe7c1a281
+0,         41,         41,        1,   230400, 0xff484046
+0,         42,         42,        1,   230400, 0x1f56e486
+0,         43,         43,        1,   230400, 0x46bba179
+0,         44,         44,        1,   230400, 0x05a05e03
+0,         45,         45,        1,   230400, 0x552d3d32
+0,         46,         46,        1,   230400, 0x0899531d
+0,         47,         47,        1,   230400, 0x6321c950
+0,         48,         48,        1,   230400, 0xed67b3cc
+0,         49,         49,        1,   230400, 0x37ec807d
+0,         50,         50,        1,   230400, 0xc6af1344
+0,         51,         51,        1,   230400, 0x2bc9132b
+0,         52,         52,        1,   230400, 0x6024e553
+0,         53,         53,        1,   230400, 0xd7cef4f3
+0,         54,         54,        1,   230400, 0xf7f6eb0d
+0,         55,         55,        1,   230400, 0x0a2ed09e
+0,         56,         56,        1,   230400, 0x8c6883aa
+0,         57,         57,        1,   230400, 0x8542f554
+0,         58,         58,        1,   230400, 0xbebf972f
+0,         59,         59,        1,   230400, 0xf6f5f05f
+0,         60,         60,        1,   230400, 0xa4047f4a
+0,         61,         61,        1,   230400, 0x2b3f3d82
+0,         62,         62,        1,   230400, 0x99d9049a
+0,         63,         63,        1,   230400, 0xe74e5520
+0,         64,         64,        1,   230400, 0x04e2cd3d
+0,         65,         65,        1,   230400, 0x2324e05e
+0,         66,         66,        1,   230400, 0x4e4e3400
+0,         67,         67,        1,   230400, 0xdd547c3e
+0,         68,         68,        1,   230400, 0x1c6c13e4
+0,         69,         69,        1,   230400, 0xf7d2d98b
diff --git a/tests/ref/fate/filter-vectorscope_color b/tests/ref/fate/filter-vectorscope_color
new file mode 100644
index 00000000..3d184f6c
--- /dev/null
+++ b/tests/ref/fate/filter-vectorscope_color
@@ -0,0 +1,4 @@
+#tb 0: 1/25
+0,          0,          0,        1,   196608, 0x1890aa30
+0,          1,          1,        1,   196608, 0xa490acf9
+0,          2,          2,        1,   196608, 0x404a775d
diff --git a/tests/ref/fate/filter-vectorscope_color2 b/tests/ref/fate/filter-vectorscope_color2
new file mode 100644
index 00000000..be69443e
--- /dev/null
+++ b/tests/ref/fate/filter-vectorscope_color2
@@ -0,0 +1,4 @@
+#tb 0: 1/25
+0,          0,          0,        1,   196608, 0x9bfcfae5
+0,          1,          1,        1,   196608, 0x1ac6fcbf
+0,          2,          2,        1,   196608, 0x31cb1088
diff --git a/tests/ref/fate/filter-vectorscope_color3 b/tests/ref/fate/filter-vectorscope_color3
new file mode 100644
index 00000000..f297efd9
--- /dev/null
+++ b/tests/ref/fate/filter-vectorscope_color3
@@ -0,0 +1,4 @@
+#tb 0: 1/25
+0,          0,          0,        1,   196608, 0x6e698770
+0,          1,          1,        1,   196608, 0x374d74a7
+0,          2,          2,        1,   196608, 0x3d817143
diff --git a/tests/ref/fate/filter-vectorscope_color4 b/tests/ref/fate/filter-vectorscope_color4
new file mode 100644
index 00000000..5ede41f5
--- /dev/null
+++ b/tests/ref/fate/filter-vectorscope_color4
@@ -0,0 +1,4 @@
+#tb 0: 1/25
+0,          0,          0,        1,   196608, 0x4d2e53c4
+0,          1,          1,        1,   196608, 0x795e1dcc
+0,          2,          2,        1,   196608, 0xe4268800
diff --git a/tests/ref/fate/filter-vectorscope_gray b/tests/ref/fate/filter-vectorscope_gray
new file mode 100644
index 00000000..ed41cc06
--- /dev/null
+++ b/tests/ref/fate/filter-vectorscope_gray
@@ -0,0 +1,4 @@
+#tb 0: 1/25
+0,          0,          0,        1,   196608, 0x8e4171e2
+0,          1,          1,        1,   196608, 0xf3d371e2
+0,          2,          2,        1,   196608, 0xb9cb71e2
diff --git a/tests/ref/fate/filter-vectorscope_xy b/tests/ref/fate/filter-vectorscope_xy
new file mode 100644
index 00000000..6a4b8f85
--- /dev/null
+++ b/tests/ref/fate/filter-vectorscope_xy
@@ -0,0 +1,4 @@
+#tb 0: 1/25
+0,          0,          0,        1,   196608, 0xa0939af1
+0,          1,          1,        1,   196608, 0x43699af1
+0,          2,          2,        1,   196608, 0x69a19af1
diff --git a/tests/ref/fate/filter-w3fdif-complex b/tests/ref/fate/filter-w3fdif-complex
new file mode 100644
index 00000000..32eadc98
--- /dev/null
+++ b/tests/ref/fate/filter-w3fdif-complex
@@ -0,0 +1,31 @@
+#tb 0: 1/50
+0,         18,         18,        1,   622080, 0x21d21485
+0,         19,         19,        1,   622080, 0x600a5468
+0,         20,         20,        1,   622080, 0x9526f7b8
+0,         21,         21,        1,   622080, 0x8b3e661f
+0,         22,         22,        1,   622080, 0xff5cb5a9
+0,         23,         23,        1,   622080, 0x7e5e730c
+0,         24,         24,        1,   622080, 0x85219ac6
+0,         25,         25,        1,   622080, 0x2f3465a0
+0,         26,         26,        1,   622080, 0xddbf4da0
+0,         27,         27,        1,   622080, 0xc115d4ee
+0,         28,         28,        1,   622080, 0x7a8a8d72
+0,         29,         29,        1,   622080, 0xbafcd973
+0,         30,         30,        1,   622080, 0xd2c15603
+0,         31,         31,        1,   622080, 0xd7217855
+0,         32,         32,        1,   622080, 0x9a584eca
+0,         33,         33,        1,   622080, 0x9f3e1c40
+0,         34,         34,        1,   622080, 0x6d01efb7
+0,         35,         35,        1,   622080, 0x9ecfcce0
+0,         36,         36,        1,   622080, 0xb355fd7e
+0,         37,         37,        1,   622080, 0xc7784021
+0,         38,         38,        1,   622080, 0x13fe4187
+0,         39,         39,        1,   622080, 0xfa03b613
+0,         40,         40,        1,   622080, 0x2c9ccfcd
+0,         41,         41,        1,   622080, 0xcae6e6c6
+0,         42,         42,        1,   622080, 0x177968f9
+0,         43,         43,        1,   622080, 0xf708de36
+0,         44,         44,        1,   622080, 0x4491870a
+0,         45,         45,        1,   622080, 0x37709f98
+0,         46,         46,        1,   622080, 0x23e8d22f
+0,         47,         47,        1,   622080, 0x25cba876
diff --git a/tests/ref/fate/filter-w3fdif-simple b/tests/ref/fate/filter-w3fdif-simple
new file mode 100644
index 00000000..137d989d
--- /dev/null
+++ b/tests/ref/fate/filter-w3fdif-simple
@@ -0,0 +1,31 @@
+#tb 0: 1/50
+0,         18,         18,        1,   622080, 0xc73774f5
+0,         19,         19,        1,   622080, 0x4ea3a400
+0,         20,         20,        1,   622080, 0x95153cda
+0,         21,         21,        1,   622080, 0xec39bf0b
+0,         22,         22,        1,   622080, 0x94b6f836
+0,         23,         23,        1,   622080, 0xc145c3ee
+0,         24,         24,        1,   622080, 0x4d4cdee2
+0,         25,         25,        1,   622080, 0x193ebc7c
+0,         26,         26,        1,   622080, 0xbd728fd8
+0,         27,         27,        1,   622080, 0xf0f3252f
+0,         28,         28,        1,   622080, 0xc012d20a
+0,         29,         29,        1,   622080, 0x7b5831b2
+0,         30,         30,        1,   622080, 0x464e9622
+0,         31,         31,        1,   622080, 0x46e3c6c0
+0,         32,         32,        1,   622080, 0xa6ec908b
+0,         33,         33,        1,   622080, 0x6a257595
+0,         34,         34,        1,   622080, 0xa6552ecc
+0,         35,         35,        1,   622080, 0xdecd1a91
+0,         36,         36,        1,   622080, 0xfaa53e71
+0,         37,         37,        1,   622080, 0xc94a9707
+0,         38,         38,        1,   622080, 0xb5727fd4
+0,         39,         39,        1,   622080, 0x143c018c
+0,         40,         40,        1,   622080, 0x92d110c9
+0,         41,         41,        1,   622080, 0x4f762fc0
+0,         42,         42,        1,   622080, 0x3dd2a7d2
+0,         43,         43,        1,   622080, 0xa5d02dc0
+0,         44,         44,        1,   622080, 0x2223ce3d
+0,         45,         45,        1,   622080, 0xe4a5fc36
+0,         46,         46,        1,   622080, 0x8384159e
+0,         47,         47,        1,   622080, 0x995efa57
diff --git a/tests/ref/fate/filter-waveform_column b/tests/ref/fate/filter-waveform_column
new file mode 100644
index 00000000..0222f33a
--- /dev/null
+++ b/tests/ref/fate/filter-waveform_column
@@ -0,0 +1,51 @@
+#tb 0: 1/25
+0,          0,          0,        1,   135168, 0xef68cdfa
+0,          1,          1,        1,   135168, 0x3db0cdfa
+0,          2,          2,        1,   135168, 0xd5e6cdfa
+0,          3,          3,        1,   135168, 0x7f71cdfa
+0,          4,          4,        1,   135168, 0x9df7cdfa
+0,          5,          5,        1,   135168, 0xff48cdfa
+0,          6,          6,        1,   135168, 0xb46acdfa
+0,          7,          7,        1,   135168, 0x485acdfa
+0,          8,          8,        1,   135168, 0x8be2cdfa
+0,          9,          9,        1,   135168, 0x86dbcdfa
+0,         10,         10,        1,   135168, 0xdecacdfa
+0,         11,         11,        1,   135168, 0x29a3cdfa
+0,         12,         12,        1,   135168, 0x04c5cdfa
+0,         13,         13,        1,   135168, 0x5f2ccdfa
+0,         14,         14,        1,   135168, 0x9680cdfa
+0,         15,         15,        1,   135168, 0xe397cdfa
+0,         16,         16,        1,   135168, 0xa40ecdfa
+0,         17,         17,        1,   135168, 0x5f35cdfa
+0,         18,         18,        1,   135168, 0x32fbcdfa
+0,         19,         19,        1,   135168, 0x983dcdfa
+0,         20,         20,        1,   135168, 0xb67dcdfa
+0,         21,         21,        1,   135168, 0x5d41cdfa
+0,         22,         22,        1,   135168, 0x35cecdfa
+0,         23,         23,        1,   135168, 0xee17cdfa
+0,         24,         24,        1,   135168, 0x6599cdfa
+0,         25,         25,        1,   135168, 0x918bcdfa
+0,         26,         26,        1,   135168, 0xbd30cdfa
+0,         27,         27,        1,   135168, 0xc2a6cdfa
+0,         28,         28,        1,   135168, 0x688fcdfa
+0,         29,         29,        1,   135168, 0xb11bcdfa
+0,         30,         30,        1,   135168, 0x8316cdfa
+0,         31,         31,        1,   135168, 0x8073cdfa
+0,         32,         32,        1,   135168, 0x3bc1cdfa
+0,         33,         33,        1,   135168, 0xb581cdfa
+0,         34,         34,        1,   135168, 0xdf90cdfa
+0,         35,         35,        1,   135168, 0x6297cdfa
+0,         36,         36,        1,   135168, 0xe8e8cdfa
+0,         37,         37,        1,   135168, 0xa238cdfa
+0,         38,         38,        1,   135168, 0xdc68cdfa
+0,         39,         39,        1,   135168, 0x7545cdfa
+0,         40,         40,        1,   135168, 0xa29dcdfa
+0,         41,         41,        1,   135168, 0x874bcdfa
+0,         42,         42,        1,   135168, 0x7dfacdfa
+0,         43,         43,        1,   135168, 0x7fdfcdfa
+0,         44,         44,        1,   135168, 0xebb4cdfa
+0,         45,         45,        1,   135168, 0x7295cdfa
+0,         46,         46,        1,   135168, 0x54eecdfa
+0,         47,         47,        1,   135168, 0x18c4cdfa
+0,         48,         48,        1,   135168, 0xaa8fcdfa
+0,         49,         49,        1,   135168, 0x99edcdfa
diff --git a/tests/ref/fate/filter-waveform_envelope b/tests/ref/fate/filter-waveform_envelope
new file mode 100644
index 00000000..a0d1f30e
--- /dev/null
+++ b/tests/ref/fate/filter-waveform_envelope
@@ -0,0 +1,51 @@
+#tb 0: 1/25
+0,          0,          0,        1,   135168, 0x60746d0d
+0,          1,          1,        1,   135168, 0x3a19788d
+0,          2,          2,        1,   135168, 0x18e0d247
+0,          3,          3,        1,   135168, 0x8b4c177f
+0,          4,          4,        1,   135168, 0xc4b34d26
+0,          5,          5,        1,   135168, 0xba364556
+0,          6,          6,        1,   135168, 0xfcab7319
+0,          7,          7,        1,   135168, 0x363d77ce
+0,          8,          8,        1,   135168, 0x04367b07
+0,          9,          9,        1,   135168, 0x6e5b55a0
+0,         10,         10,        1,   135168, 0x33918421
+0,         11,         11,        1,   135168, 0x05ba7ff3
+0,         12,         12,        1,   135168, 0xcae09d62
+0,         13,         13,        1,   135168, 0xca78c5cb
+0,         14,         14,        1,   135168, 0x1542db51
+0,         15,         15,        1,   135168, 0xe013f307
+0,         16,         16,        1,   135168, 0x064008dd
+0,         17,         17,        1,   135168, 0xeb7010d5
+0,         18,         18,        1,   135168, 0x0cd313e1
+0,         19,         19,        1,   135168, 0xaf1b135f
+0,         20,         20,        1,   135168, 0x1b3c02c0
+0,         21,         21,        1,   135168, 0x50940658
+0,         22,         22,        1,   135168, 0x83a2046e
+0,         23,         23,        1,   135168, 0x1edf0a54
+0,         24,         24,        1,   135168, 0x37141206
+0,         25,         25,        1,   135168, 0x7832163e
+0,         26,         26,        1,   135168, 0xd4ba13dc
+0,         27,         27,        1,   135168, 0x9ba710a3
+0,         28,         28,        1,   135168, 0x434a108f
+0,         29,         29,        1,   135168, 0x86b01071
+0,         30,         30,        1,   135168, 0x7bd91c8d
+0,         31,         31,        1,   135168, 0x7a4c179c
+0,         32,         32,        1,   135168, 0x023f15b2
+0,         33,         33,        1,   135168, 0xe8901b20
+0,         34,         34,        1,   135168, 0x49d3157b
+0,         35,         35,        1,   135168, 0xdc990af9
+0,         36,         36,        1,   135168, 0x1ff707a2
+0,         37,         37,        1,   135168, 0xe6610022
+0,         38,         38,        1,   135168, 0x6fc3f168
+0,         39,         39,        1,   135168, 0x719809ff
+0,         40,         40,        1,   135168, 0xd1d9144f
+0,         41,         41,        1,   135168, 0x2f801797
+0,         42,         42,        1,   135168, 0x111c1eae
+0,         43,         43,        1,   135168, 0x23c11c4c
+0,         44,         44,        1,   135168, 0x07351d23
+0,         45,         45,        1,   135168, 0x04f1173d
+0,         46,         46,        1,   135168, 0xb2b71c10
+0,         47,         47,        1,   135168, 0x58c01a53
+0,         48,         48,        1,   135168, 0xadd11b7a
+0,         49,         49,        1,   135168, 0x924123c2
diff --git a/tests/ref/fate/filter-waveform_row b/tests/ref/fate/filter-waveform_row
new file mode 100644
index 00000000..8bc7f36a
--- /dev/null
+++ b/tests/ref/fate/filter-waveform_row
@@ -0,0 +1,51 @@
+#tb 0: 1/25
+0,          0,          0,        1,   110592, 0xa6deed0a
+0,          1,          1,        1,   110592, 0xe659ed0a
+0,          2,          2,        1,   110592, 0x1ca5ed0a
+0,          3,          3,        1,   110592, 0xc2e8ed0a
+0,          4,          4,        1,   110592, 0x78d4ed0a
+0,          5,          5,        1,   110592, 0xbe2eed0a
+0,          6,          6,        1,   110592, 0x482ded0a
+0,          7,          7,        1,   110592, 0x994eed0a
+0,          8,          8,        1,   110592, 0x93aeed0a
+0,          9,          9,        1,   110592, 0xbba8ed0a
+0,         10,         10,        1,   110592, 0xeb2bed0a
+0,         11,         11,        1,   110592, 0xe41ced0a
+0,         12,         12,        1,   110592, 0xb404ed0a
+0,         13,         13,        1,   110592, 0xbad1ed0a
+0,         14,         14,        1,   110592, 0x952aed0a
+0,         15,         15,        1,   110592, 0xbed4ed0a
+0,         16,         16,        1,   110592, 0x5c7ded0a
+0,         17,         17,        1,   110592, 0xbfe8ed0a
+0,         18,         18,        1,   110592, 0xbb6bed0a
+0,         19,         19,        1,   110592, 0x7473ed0a
+0,         20,         20,        1,   110592, 0x7489ed0a
+0,         21,         21,        1,   110592, 0x88a4ed0a
+0,         22,         22,        1,   110592, 0xff0ced0a
+0,         23,         23,        1,   110592, 0x04b8ed0a
+0,         24,         24,        1,   110592, 0xeb8ded0a
+0,         25,         25,        1,   110592, 0xc752ed0a
+0,         26,         26,        1,   110592, 0x5b1bed0a
+0,         27,         27,        1,   110592, 0x1c97ed0a
+0,         28,         28,        1,   110592, 0x0a28ed0a
+0,         29,         29,        1,   110592, 0x302ced0a
+0,         30,         30,        1,   110592, 0x280bed0a
+0,         31,         31,        1,   110592, 0xaa30ed0a
+0,         32,         32,        1,   110592, 0xce59ed0a
+0,         33,         33,        1,   110592, 0xe5f6ed0a
+0,         34,         34,        1,   110592, 0x5b34ed0a
+0,         35,         35,        1,   110592, 0x1b97ed0a
+0,         36,         36,        1,   110592, 0x3283ed0a
+0,         37,         37,        1,   110592, 0xe0d1ed0a
+0,         38,         38,        1,   110592, 0x03f1ed0a
+0,         39,         39,        1,   110592, 0x5744ed0a
+0,         40,         40,        1,   110592, 0x8bbfed0a
+0,         41,         41,        1,   110592, 0xde8fed0a
+0,         42,         42,        1,   110592, 0x9975ed0a
+0,         43,         43,        1,   110592, 0x72eded0a
+0,         44,         44,        1,   110592, 0xe3efed0a
+0,         45,         45,        1,   110592, 0xee7fed0a
+0,         46,         46,        1,   110592, 0x44ffed0a
+0,         47,         47,        1,   110592, 0x91e6ed0a
+0,         48,         48,        1,   110592, 0x0a58ed0a
+0,         49,         49,        1,   110592, 0x68d2ed0a
diff --git a/tests/ref/fate/filter-waveform_uv b/tests/ref/fate/filter-waveform_uv
new file mode 100644
index 00000000..8c3fc70e
--- /dev/null
+++ b/tests/ref/fate/filter-waveform_uv
@@ -0,0 +1,51 @@
+#tb 0: 1/25
+0,          0,          0,        1,   270336, 0x7be065a8
+0,          1,          1,        1,   270336, 0xa4e56622
+0,          2,          2,        1,   270336, 0xae4a662a
+0,          3,          3,        1,   270336, 0x367e6678
+0,          4,          4,        1,   270336, 0x970f667c
+0,          5,          5,        1,   270336, 0xdf7565f6
+0,          6,          6,        1,   270336, 0xc4a36652
+0,          7,          7,        1,   270336, 0x2f426630
+0,          8,          8,        1,   270336, 0xc095662c
+0,          9,          9,        1,   270336, 0x75fa6626
+0,         10,         10,        1,   270336, 0x95616592
+0,         11,         11,        1,   270336, 0x78916608
+0,         12,         12,        1,   270336, 0x118c65bc
+0,         13,         13,        1,   270336, 0x75446604
+0,         14,         14,        1,   270336, 0xe5fb6612
+0,         15,         15,        1,   270336, 0x3b8f6618
+0,         16,         16,        1,   270336, 0xdeee6646
+0,         17,         17,        1,   270336, 0xede46606
+0,         18,         18,        1,   270336, 0x64336606
+0,         19,         19,        1,   270336, 0xfc50663a
+0,         20,         20,        1,   270336, 0xe5fc660a
+0,         21,         21,        1,   270336, 0x6ecb6612
+0,         22,         22,        1,   270336, 0x06a4662a
+0,         23,         23,        1,   270336, 0xc7b66656
+0,         24,         24,        1,   270336, 0x033e6636
+0,         25,         25,        1,   270336, 0xc14f6650
+0,         26,         26,        1,   270336, 0x7462662c
+0,         27,         27,        1,   270336, 0xf8cb65e4
+0,         28,         28,        1,   270336, 0x6351665e
+0,         29,         29,        1,   270336, 0x44e6666e
+0,         30,         30,        1,   270336, 0x1d5f660e
+0,         31,         31,        1,   270336, 0xc248662e
+0,         32,         32,        1,   270336, 0x36256642
+0,         33,         33,        1,   270336, 0xe4426598
+0,         34,         34,        1,   270336, 0xde81665a
+0,         35,         35,        1,   270336, 0xaeab6622
+0,         36,         36,        1,   270336, 0x134e6668
+0,         37,         37,        1,   270336, 0x6c6e665e
+0,         38,         38,        1,   270336, 0x500b6670
+0,         39,         39,        1,   270336, 0x2c4c6648
+0,         40,         40,        1,   270336, 0xe4ae664c
+0,         41,         41,        1,   270336, 0x9b7e664c
+0,         42,         42,        1,   270336, 0xfefb6570
+0,         43,         43,        1,   270336, 0x04e96600
+0,         44,         44,        1,   270336, 0xcbba6670
+0,         45,         45,        1,   270336, 0x9f9666a6
+0,         46,         46,        1,   270336, 0x85b76642
+0,         47,         47,        1,   270336, 0x1a0e667c
+0,         48,         48,        1,   270336, 0x92c9662a
+0,         49,         49,        1,   270336, 0x9ed76682
diff --git a/tests/ref/fate/g2m2 b/tests/ref/fate/g2m2
new file mode 100644
index 00000000..710dbd14
--- /dev/null
+++ b/tests/ref/fate/g2m2
@@ -0,0 +1,161 @@
+#tb 0: 1/1000
+0,         47,         47,        0,  2359296, 0xb4434e4f
+0,         62,         62,        0,  2359296, 0x59cb5027
+0,         78,         78,        0,  2359296, 0xe9bc578d
+0,        109,        109,        0,  2359296, 0x5d17554f
+0,        125,        125,        0,  2359296, 0x6d685457
+0,        437,        437,        0,  2359296, 0x13205420
+0,        438,        438,        0,  2359296, 0xb8e15116
+0,        453,        453,        0,  2359296, 0x2ca55195
+0,        469,        469,        0,  2359296, 0x767d1c45
+0,        484,        484,        0,  2359296, 0x0af42016
+0,        500,        500,        0,  2359296, 0xa2083e69
+0,        516,        516,        0,  2359296, 0xb68a1308
+0,        531,        531,        0,  2359296, 0x4f334c0e
+0,        547,        547,        0,  2359296, 0x98b74e4f
+0,        562,        562,        0,  2359296, 0xd9de4e4f
+0,        578,        578,        0,  2359296, 0xa17c4e4f
+0,        594,        594,        0,  2359296, 0xa49a665d
+0,        609,        609,        0,  2359296, 0xf5f87360
+0,        781,        781,        0,  2359296, 0x75747360
+0,        797,        797,        0,  2359296, 0x745d7360
+0,        812,        812,        0,  2359296, 0x33047360
+0,        828,        828,        0,  2359296, 0xf19c7360
+0,        844,        844,        0,  2359296, 0xb0437360
+0,        859,        859,        0,  2359296, 0xaf2c7360
+0,        875,        875,        0,  2359296, 0x2ea87360
+0,        891,        891,        0,  2359296, 0xee577360
+0,        953,        953,        0,  2359296, 0x6dd37360
+0,       1078,       1078,        0,  2359296, 0xab327965
+0,       1094,       1094,        0,  2359296, 0x5f8677d0
+0,       1109,       1109,        0,  2359296, 0x02135eb4
+0,       1125,       1125,        0,  2359296, 0x09784e4f
+0,       1141,       1141,        0,  2359296, 0xa140a62d
+0,       1156,       1156,        0,  2359296, 0xa140a62d
+0,       1484,       1484,        0,  2359296, 0xa140a62d
+0,       1516,       1516,        0,  2359296, 0xa140a62d
+0,       1547,       1547,        0,  2359296, 0xa140a62d
+0,       1641,       1641,        0,  2359296, 0xa140a62d
+0,       1642,       1642,        0,  2359296, 0xa140a62d
+0,       1656,       1656,        0,  2359296, 0xa140a62d
+0,       1657,       1657,        0,  2359296, 0xa140a62d
+0,       1672,       1672,        0,  2359296, 0xa140a62d
+0,       1673,       1673,        0,  2359296, 0x92024e4f
+0,       1687,       1687,        0,  2359296, 0xb1754dbe
+0,       1688,       1688,        0,  2359296, 0x15ee5eb4
+0,       1703,       1703,        0,  2359296, 0xb1d9746e
+0,       1719,       1719,        0,  2359296, 0xabe77360
+0,       1734,       1734,        0,  2359296, 0xaad07360
+0,       1750,       1750,        0,  2359296, 0x2a4c7360
+0,       1766,       1766,        0,  2359296, 0x69777360
+0,       1781,       1781,        0,  2359296, 0xe8e47360
+0,       2328,       2328,        0,  2359296, 0x29357360
+0,       3031,       3031,        0,  2359296, 0x69777360
+0,       3078,       3078,        0,  2359296, 0xa9b97360
+0,       3109,       3109,        0,  2359296, 0xd2697707
+0,       3141,       3141,        0,  2359296, 0x22a07965
+0,       3156,       3156,        0,  2359296, 0xf9327aa7
+0,       3172,       3172,        0,  2359296, 0xa5d277d0
+0,       3203,       3203,        0,  2359296, 0x97b6746e
+0,       3328,       3328,        0,  2359296, 0x80bb746e
+0,       4562,       4562,        0,  2359296, 0x530b719a
+0,       4672,       4672,        0,  2359296, 0x4827665d
+0,       4703,       4703,        0,  2359296, 0xc48c5eb4
+0,       5391,       5391,        0,  2359296, 0xe6465eb4
+0,       5578,       5578,        0,  2359296, 0xece455ec
+0,       5594,       5594,        0,  2359296, 0xb5344dbe
+0,       5609,       5609,        0,  2359296, 0xa140a62d
+0,       5625,       5625,        0,  2359296, 0xa140a62d
+0,       5641,       5641,        0,  2359296, 0xa140a62d
+0,       5642,       5642,        0,  2359296, 0xa140a62d
+0,       5656,       5656,        0,  2359296, 0xa140a62d
+0,       5672,       5672,        0,  2359296, 0xa140a62d
+0,       5703,       5703,        0,  2359296, 0xa140a62d
+0,       5750,       5750,        0,  2359296, 0xa140a62d
+0,       5766,       5766,        0,  2359296, 0xa140a62d
+0,       5781,       5781,        0,  2359296, 0xa140a62d
+0,       5797,       5797,        0,  2359296, 0xa140a62d
+0,       5812,       5812,        0,  2359296, 0xa140a62d
+0,       5875,       5875,        0,  2359296, 0xa140a62d
+0,       5922,       5922,        0,  2359296, 0xa140a62d
+0,       5984,       5984,        0,  2359296, 0xa140a62d
+0,       6031,       6031,        0,  2359296, 0xa140a62d
+0,       6047,       6047,        0,  2359296, 0xa140a62d
+0,       6062,       6062,        0,  2359296, 0xa140a62d
+0,       6406,       6406,        0,  2359296, 0xa140a62d
+0,       6453,       6453,        0,  2359296, 0xa140a62d
+0,       6469,       6469,        0,  2359296, 0xa140a62d
+0,       6484,       6484,        0,  2359296, 0xa140a62d
+0,       6500,       6500,        0,  2359296, 0xa140a62d
+0,       6516,       6516,        0,  2359296, 0xa140a62d
+0,       6531,       6531,        0,  2359296, 0xa140a62d
+0,       6547,       6547,        0,  2359296, 0xa140a62d
+0,       6562,       6562,        0,  2359296, 0x5c2a4cd9
+0,       6578,       6578,        0,  2359296, 0x28f94e4f
+0,       6594,       6594,        0,  2359296, 0x9acb4820
+0,       6609,       6609,        0,  2359296, 0x9ec716e1
+0,       6625,       6625,        0,  2359296, 0xaf5f3fa4
+0,       6641,       6641,        0,  2359296, 0x7d633218
+0,       6642,       6642,        0,  2359296, 0x34fb2016
+0,       6656,       6656,        0,  2359296, 0x61351665
+0,       6812,       6812,        0,  2359296, 0xb23c1039
+0,       6828,       6828,        0,  2359296, 0x59290d69
+0,       6844,       6844,        0,  2359296, 0x639c132d
+0,       6859,       6859,        0,  2359296, 0x0b252237
+0,       6875,       6875,        0,  2359296, 0xe66f2fc5
+0,       6891,       6891,        0,  2359296, 0xa8b33761
+0,       6906,       6906,        0,  2359296, 0x81a63f8b
+0,       6969,       6969,        0,  2359296, 0x18074843
+0,       6984,       6984,        0,  2359296, 0x434a5195
+0,       7000,       7000,        0,  2359296, 0x6da15116
+0,       7001,       7001,        0,  2359296, 0xca755420
+0,       7016,       7016,        0,  2359296, 0xe6fc5457
+0,       7017,       7017,        0,  2359296, 0x271d53fd
+0,       7031,       7031,        0,  2359296, 0xa15b554f
+0,       7281,       7281,        0,  2359296, 0x49f6578d
+0,       7282,       7282,        0,  2359296, 0x2c0c4e4f
+0,       7297,       7297,        0,  2359296, 0x7e924e4f
+0,       7298,       7298,        0,  2359296, 0x32ff4e4f
+0,       7312,       7312,        0,  2359296, 0x23ad4e4f
+0,       7313,       7313,        0,  2359296, 0x7ddc4e4f
+0,       7328,       7328,        0,  2359296, 0xd0624e4f
+0,       7329,       7329,        0,  2359296, 0x22f74e4f
+0,       7781,       7781,        0,  2359296, 0x49fa4e4f
+0,       7797,       7797,        0,  2359296, 0x6a5a5027
+0,       7812,       7812,        0,  2359296, 0x9f935027
+0,       7828,       7828,        0,  2359296, 0xc5e55027
+0,       7844,       7844,        0,  2359296, 0xd4cc5027
+0,       8250,       8250,        0,  2359296, 0xd2ab5027
+0,       8266,       8266,        0,  2359296, 0x68f04e4f
+0,       8281,       8281,        0,  2359296, 0xd0b44e4f
+0,       8297,       8297,        0,  2359296, 0xfced4e4f
+0,       8298,       8298,        0,  2359296, 0x8b0d4e4f
+0,       8312,       8312,        0,  2359296, 0x09db4e4f
+0,       8328,       8328,        0,  2359296, 0x4d0f4e4f
+0,       8329,       8329,        0,  2359296, 0xad824dbe
+0,       8344,       8344,        0,  2359296, 0x9aca4dbe
+0,       8345,       8345,        0,  2359296, 0x755a4dbe
+0,       8359,       8359,        0,  2359296, 0xc6824d2d
+0,       8360,       8360,        0,  2359296, 0x7c344c0e
+0,       8375,       8375,        0,  2359296, 0x50f04c0e
+0,       8391,       8391,        0,  2359296, 0xfa594c0e
+0,       8406,       8406,        0,  2359296, 0x4d494c0e
+0,       8422,       8422,        0,  2359296, 0xf6b24c0e
+0,       8437,       8437,        0,  2359296, 0xcb6e4c0e
+0,       8453,       8453,        0,  2359296, 0xbd024c0e
+0,       8516,       8516,        0,  2359296, 0x245b4dbe
+0,       8531,       8531,        0,  2359296, 0x47874e4f
+0,       8547,       8547,        0,  2359296, 0xdead4e4f
+0,       8562,       8562,        0,  2359296, 0x847e4e4f
+0,       9344,       9344,        0,  2359296, 0x1a13e47c
+0,       9345,       9345,        0,  2359296, 0x46b3e321
+0,       9876,       9876,        0,  2359296, 0x76c0e35d
+0,       9922,       9922,        0,  2359296, 0xf6d9e519
+0,       9938,       9938,        0,  2359296, 0xac0fe4b3
+0,       9954,       9954,        0,  2359296, 0x3a3fe424
+0,       9955,       9955,        0,  2359296, 0xa97ce1a8
+0,       9969,       9969,        0,  2359296, 0x12fae01d
+0,       9970,       9970,        0,  2359296, 0x65b4df14
+0,       9985,       9985,        0,  2359296, 0x82d0e032
+0,       9986,       9986,        0,  2359296, 0xa452e0cf
+0,      10001,      10001,        0,  2359296, 0x22d6df37
diff --git a/tests/ref/fate/g2m3 b/tests/ref/fate/g2m3
new file mode 100644
index 00000000..e8fe6927
--- /dev/null
+++ b/tests/ref/fate/g2m3
@@ -0,0 +1,21 @@
+#tb 0: 1001/24000
+0,          0,          0,        1,  3824640, 0x9a253d29
+0,         12,         12,        1,  3824640, 0xcb232b06
+0,         15,         15,        1,  3824640, 0xb03c288c
+0,         16,         16,        1,  3824640, 0x260b284e
+0,         18,         18,        1,  3824640, 0x53ed2f30
+0,         19,         19,        1,  3824640, 0x00af334d
+0,         21,         21,        1,  3824640, 0x35ba3ef7
+0,         22,         22,        1,  3824640, 0x05c24e9a
+0,         24,         24,        1,  3824640, 0x03846423
+0,         25,         25,        1,  3824640, 0x0bcf6423
+0,         30,         30,        1,  3824640, 0x96446423
+0,         33,         33,        1,  3824640, 0x9e8f6423
+0,         34,         34,        1,  3824640, 0x22bc6423
+0,         36,         36,        1,  3824640, 0x714a6423
+0,         37,         37,        1,  3824640, 0x060d6423
+0,         39,         39,        1,  3824640, 0x5bf96423
+0,         40,         40,        1,  3824640, 0xb3476423
+0,        123,        123,        1,  3824640, 0xbf056423
+0,        124,        124,        1,  3824640, 0x30746423
+0,        126,        126,        1,  3824640, 0xea6f6423
diff --git a/tests/ref/fate/g2m4 b/tests/ref/fate/g2m4
new file mode 100644
index 00000000..c9a6e120
--- /dev/null
+++ b/tests/ref/fate/g2m4
@@ -0,0 +1,26 @@
+#tb 0: 1/10
+0,          0,          0,        1,  3932160, 0x109148ee
+0,          1,          1,        1,  3932160, 0xa87b7bff
+0,          2,          2,        1,  3932160, 0x00e14d16
+0,          3,          3,        1,  3932160, 0xbf8d7871
+0,          4,          4,        1,  3932160, 0x998ded98
+0,          5,          5,        1,  3932160, 0xe0d1328f
+0,          6,          6,        1,  3932160, 0xde7e69a0
+0,          7,          7,        1,  3932160, 0x5355cde3
+0,          8,          8,        1,  3932160, 0x4b3c6b11
+0,          9,          9,        1,  3932160, 0x19d8366f
+0,         10,         10,        1,  3932160, 0xf80f3663
+0,         14,         14,        1,  3932160, 0xe2dc1cc7
+0,         30,         30,        1,  3932160, 0x3ae51cc7
+0,         31,         31,        1,  3932160, 0x10ecd14c
+0,         32,         32,        1,  3932160, 0xda0f1d1f
+0,         33,         33,        1,  3932160, 0x50bd3602
+0,         34,         34,        1,  3932160, 0xad2083ba
+0,         35,         35,        1,  3932160, 0xa5b388bd
+0,         36,         36,        1,  3932160, 0x37ae6d4f
+0,         37,         37,        1,  3932160, 0x436ee825
+0,         38,         38,        1,  3932160, 0x6c2ec178
+0,         39,         39,        1,  3932160, 0x6c3f389b
+0,         40,         40,        1,  3932160, 0x3c0421c1
+0,         41,         41,        1,  3932160, 0x861b0449
+0,         42,         42,        1,  3932160, 0x41640723
diff --git a/tests/ref/fate/g722-encode b/tests/ref/fate/g722-encode
index c7198cf8..3f6b3cff 100644
--- a/tests/ref/fate/g722-encode
+++ b/tests/ref/fate/g722-encode
@@ -1 +1,17 @@
-MD5=7106189574186051c0497b287e2e5f19
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/16000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     8192,    16384, 1dd9c285eb608038f3257d1a8e02eb75
+0,       8192,       8192,     8192,    16384, f7459334cbe70c06bc0897edfe64e840
+0,      16384,      16384,     8192,    16384, 486e7d07c8879ccd304689ad61911e5d
+0,      24576,      24576,     8192,    16384, 5178262fdeff11dc025c72a2678e420d
+0,      32768,      32768,     8192,    16384, 913f8208615dcd643edafda7db1eb943
+0,      40960,      40960,     8192,    16384, 0b8dc719670feac586db87bf3a92cc2a
+0,      49152,      49152,     8192,    16384, 075dc5022f5e9f1cb2fcd066e590fed1
+0,      57344,      57344,     8192,    16384, a6494307ecb1359ff42d6e05cf40ca4c
+0,      65536,      65536,     8192,    16384, 5b28d3b0d6d2bfe7100daeefcb3010ec
+0,      73728,      73728,     8192,    16384, b3cf1952b8a0d2e6a2741867e9fa8e24
+0,      81920,      81920,     8192,    16384, f0981f8fa28bbc0ba6a527b041468951
+0,      90112,      90112,     5888,    11776, 1e3d1b3d0da71aa2cdc00175cc47433a
diff --git a/tests/ref/fate/g726-encode-2bit b/tests/ref/fate/g726-encode-2bit
index 26a12190..2e613107 100644
--- a/tests/ref/fate/g726-encode-2bit
+++ b/tests/ref/fate/g726-encode-2bit
@@ -1 +1,8 @@
-MD5=215eaef5778a16e2bf4f3725a557f355
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/8000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,    16384,    32768, b28b116d2315323aeba6b66b58b7f4ed
+0,      16384,      16384,    16384,    32768, e9cfbebe99490bd4987341ee748291c4
+0,      32768,      32768,    15232,    30464, f890e7f29bc76ca6c214fac0cedd7a49
diff --git a/tests/ref/fate/g726-encode-3bit b/tests/ref/fate/g726-encode-3bit
index f9c69402..ce43d325 100644
--- a/tests/ref/fate/g726-encode-3bit
+++ b/tests/ref/fate/g726-encode-3bit
@@ -1 +1,10 @@
-MD5=0bebd949dfd5ac0ae3f2c3ceb2e3fac1
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/8000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,    10920,    21840, 517dd6d1ce566b998251f0d215fa69c0
+0,      10920,      10920,    10920,    21840, b0268e2bcc67acb524753790123c65fd
+0,      21840,      21840,    10920,    21840, a072a222a4910642f12e127b95a4d640
+0,      32760,      32760,    10920,    21840, 83bcd3c91cc3b57afdab0cb79a83d7b7
+0,      43680,      43680,     4320,     8640, 0125e504113997ac7bcc027616edad55
diff --git a/tests/ref/fate/g726-encode-4bit b/tests/ref/fate/g726-encode-4bit
index 6d035171..1ccc1808 100644
--- a/tests/ref/fate/g726-encode-4bit
+++ b/tests/ref/fate/g726-encode-4bit
@@ -1 +1,11 @@
-MD5=a21cfea116ab2179eabe5d84b6bfc09a
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/8000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     8192,    16384, a0cf3a0953adce1a1032a4fd2da00a52
+0,       8192,       8192,     8192,    16384, c750c1b76a203556dd60d73d261529e9
+0,      16384,      16384,     8192,    16384, 4c116836487c05f259168f63883d6496
+0,      24576,      24576,     8192,    16384, 3e4f96c06d4c33c97829f962a5b4b443
+0,      32768,      32768,     8192,    16384, 55d73226120bcd01e4c0ea22a360e2ef
+0,      40960,      40960,     7040,    14080, de8f12a49e673ce923c2512b36e0ce8f
diff --git a/tests/ref/fate/g726-encode-5bit b/tests/ref/fate/g726-encode-5bit
index 459ebb39..219c73d9 100644
--- a/tests/ref/fate/g726-encode-5bit
+++ b/tests/ref/fate/g726-encode-5bit
@@ -1 +1,13 @@
-MD5=9cad98cf5205bf76d6e9d1241e56141a
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/8000
+#stream#, dts,        pts, duration,     size, hash
+0,          0,          0,     6552,    13104, 6fe3f75df1262c5f956887de9c32df40
+0,       6552,       6552,     6552,    13104, f955518de6f61f94253280d11d64d68b
+0,      13104,      13104,     6552,    13104, a3f2db5d35e0d923787b71aa352466de
+0,      19656,      19656,     6552,    13104, 90c41dc1338579e2e19edb811f8d58a9
+0,      26208,      26208,     6552,    13104, 4fbcd13f77e8cc0e61a7c9010e42adf1
+0,      32760,      32760,     6552,    13104, 82ef661e2afc2a1d6996005f17c1618c
+0,      39312,      39312,     6552,    13104, 1f5e9875ddcf88063e4adf910a8f0fa6
+0,      45864,      45864,     2136,     4272, 9a1cdf7ec6b06762047cdb1320d9d32e
diff --git a/tests/ref/fate/gapless-mp3 b/tests/ref/fate/gapless-mp3
index e6a7a338..2ce4b8cc 100644
--- a/tests/ref/fate/gapless-mp3
+++ b/tests/ref/fate/gapless-mp3
@@ -2,4 +2,4 @@ d5c88cf38416329a052a9b0cb140fb4c *tests/data/fate/gapless-mp3.out-1
 c96c3ae7bd3300fd2f4debac222de5b7
 3386bc2009b31b7ef39247918cbb02a5 *tests/data/fate/gapless-mp3.out-2
 c96c3ae7bd3300fd2f4debac222de5b7
-92e37f050ad4fc817730c8af17ee6d1b *tests/data/fate/gapless-mp3.out-3
+3ce35ec2a9a59b7459ecd6c98d8fa123 *tests/data/fate/gapless-mp3.out-3
diff --git a/tests/ref/fate/hap-chunk b/tests/ref/fate/hap-chunk
new file mode 100644
index 00000000..1e7b976f
--- /dev/null
+++ b/tests/ref/fate/hap-chunk
@@ -0,0 +1,2 @@
+#tb 0: 1/30
+0,          0,          0,        1,    16384, 0x096d409e
diff --git a/tests/ref/fate/hap1 b/tests/ref/fate/hap1
new file mode 100644
index 00000000..d3bdeae9
--- /dev/null
+++ b/tests/ref/fate/hap1
@@ -0,0 +1,2 @@
+#tb 0: 62/2997
+0,          0,          0,        1,  1228800, 0x0fa946bc
diff --git a/tests/ref/fate/hap5 b/tests/ref/fate/hap5
new file mode 100644
index 00000000..458981e4
--- /dev/null
+++ b/tests/ref/fate/hap5
@@ -0,0 +1,2 @@
+#tb 0: 1/30
+0,          0,          0,        1,  1228800, 0xb71ecf93
diff --git a/tests/ref/fate/hapy b/tests/ref/fate/hapy
new file mode 100644
index 00000000..e7d76083
--- /dev/null
+++ b/tests/ref/fate/hapy
@@ -0,0 +1,2 @@
+#tb 0: 62/2997
+0,          0,          0,        1,  1228800, 0x5993522d
diff --git a/tests/ref/fate/j2k-dwt b/tests/ref/fate/j2k-dwt
new file mode 100644
index 00000000..42415f00
--- /dev/null
+++ b/tests/ref/fate/j2k-dwt
@@ -0,0 +1,60 @@
+5/3i, decomp:15 border 151 170 140 183 milli-err2:        0
+9/7i, decomp:15 border 151 170 140 183 milli-err2:      544
+9/7f, decomp:15 border 151 170 140 183 err2:               0.000
+5/3i, decomp:21 border 173 201  81 189 milli-err2:        0
+9/7i, decomp:21 border 173 201  81 189 milli-err2:      592
+9/7f, decomp:21 border 173 201  81 189 err2:               0.000
+5/3i, decomp:22 border 213 227  76 245 milli-err2:        0
+9/7i, decomp:22 border 213 227  76 245 milli-err2:      533
+9/7f, decomp:22 border 213 227  76 245 err2:               0.000
+5/3i, decomp:13 border 134 157 184 203 milli-err2:        0
+9/7i, decomp:13 border 134 157 184 203 milli-err2:      535
+9/7f, decomp:13 border 134 157 184 203 err2:               0.000
+5/3i, decomp: 1 border 204 237   6 106 milli-err2:        0
+9/7i, decomp: 1 border 204 237   6 106 milli-err2:      219
+9/7f, decomp: 1 border 204 237   6 106 err2:               0.000
+5/3i, decomp:28 border  76 211  13 210 milli-err2:        0
+9/7i, decomp:28 border  76 211  13 210 milli-err2:      791
+9/7f, decomp:28 border  76 211  13 210 err2:               0.000
+5/3i, decomp:21 border  76  99  43 123 milli-err2:        0
+9/7i, decomp:21 border  76  99  43 123 milli-err2:      686
+9/7f, decomp:21 border  76  99  43 123 err2:               0.000
+5/3i, decomp:15 border 192 243 174 204 milli-err2:        0
+9/7i, decomp:15 border 192 243 174 204 milli-err2:      476
+9/7f, decomp:15 border 192 243 174 204 err2:               0.000
+5/3i, decomp:21 border  17  68  93 204 milli-err2:        0
+9/7i, decomp:21 border  17  68  93 204 milli-err2:      633
+9/7f, decomp:21 border  17  68  93 204 err2:               0.000
+5/3i, decomp:11 border 142 168  82 174 milli-err2:        0
+9/7i, decomp:11 border 142 168  82 174 milli-err2:      696
+9/7f, decomp:11 border 142 168  82 174 err2:               0.000
+5/3i, decomp:23 border 142 209 171 235 milli-err2:        0
+9/7i, decomp:23 border 142 209 171 235 milli-err2:      626
+9/7f, decomp:23 border 142 209 171 235 err2:               0.000
+5/3i, decomp:30 border  37 185  79 245 milli-err2:        0
+9/7i, decomp:30 border  37 185  79 245 milli-err2:      953
+9/7f, decomp:30 border  37 185  79 245 err2:               0.000
+5/3i, decomp: 5 border 129 236  30 243 milli-err2:        0
+9/7i, decomp: 5 border 129 236  30 243 milli-err2:      620
+9/7f, decomp: 5 border 129 236  30 243 err2:               0.000
+5/3i, decomp:10 border   5 160 146 247 milli-err2:        0
+9/7i, decomp:10 border   5 160 146 247 milli-err2:      797
+9/7f, decomp:10 border   5 160 146 247 err2:               0.000
+5/3i, decomp: 5 border 104 162   6  47 milli-err2:        0
+9/7i, decomp: 5 border 104 162   6  47 milli-err2:      603
+9/7f, decomp: 5 border 104 162   6  47 err2:               0.000
+5/3i, decomp:24 border  78 250 102 218 milli-err2:        0
+9/7i, decomp:24 border  78 250 102 218 milli-err2:      836
+9/7f, decomp:24 border  78 250 102 218 err2:               0.000
+5/3i, decomp:28 border  86  98  56  79 milli-err2:        0
+9/7i, decomp:28 border  86  98  56  79 milli-err2:      597
+9/7f, decomp:28 border  86  98  56  79 err2:               0.000
+5/3i, decomp: 6 border  95 238 197 214 milli-err2:        0
+9/7i, decomp: 6 border  95 238 197 214 milli-err2:      478
+9/7f, decomp: 6 border  95 238 197 214 err2:               0.000
+5/3i, decomp:17 border  77 169  93 165 milli-err2:        0
+9/7i, decomp:17 border  77 169  93 165 milli-err2:      616
+9/7f, decomp:17 border  77 169  93 165 err2:               0.000
+5/3i, decomp:22 border 178 187   7 119 milli-err2:        0
+9/7i, decomp:22 border 178 187   7 119 milli-err2:      392
+9/7f, decomp:22 border 178 187   7 119 err2:               0.000
diff --git a/tests/ref/fate/jpeg2000-dcinema b/tests/ref/fate/jpeg2000-dcinema
index 940759ae..08bbfdc4 100644
--- a/tests/ref/fate/jpeg2000-dcinema
+++ b/tests/ref/fate/jpeg2000-dcinema
@@ -1,3 +1,3 @@
 #tb 0: 1/24
-0,          0,          0,        1, 12441600, 0xf0de508b
-0,          1,          1,        1, 12441600, 0x8e50c249
+0,          0,          0,        1, 12441600, 0xfcf6a127
+0,          1,          1,        1, 12441600, 0x577b6a64
diff --git a/tests/ref/fate/limited_input_seek b/tests/ref/fate/limited_input_seek
new file mode 100644
index 00000000..e0c4bf15
--- /dev/null
+++ b/tests/ref/fate/limited_input_seek
@@ -0,0 +1 @@
+20a1bb9a1cfb23c1fe86f14e6065cd95
diff --git a/tests/ref/fate/limited_input_seek-copyts b/tests/ref/fate/limited_input_seek-copyts
new file mode 100644
index 00000000..92790a8b
--- /dev/null
+++ b/tests/ref/fate/limited_input_seek-copyts
@@ -0,0 +1 @@
+ec3604b1954ed80de364b8ef491771ce
diff --git a/tests/ref/fate/lossless-wma b/tests/ref/fate/lossless-wma
index 5bea19b2..c4c8a377 100644
--- a/tests/ref/fate/lossless-wma
+++ b/tests/ref/fate/lossless-wma
@@ -1 +1 @@
-35dc840f91cbcece02178d03c8f2fe26
+ab45f262b22030e908cb03f2cc8e89b5
diff --git a/tests/ref/fate/mkv b/tests/ref/fate/mkv
index aea378a8..765798cb 100644
--- a/tests/ref/fate/mkv
+++ b/tests/ref/fate/mkv
@@ -25,9 +25,9 @@
 1,        264,        264,       21,      609, 0xc0dc255c
 1,        285,        285,       21,      619, 0x9ac52dd1
 0,        292,        292,       41,    16751, 0xf293ab46, F=0x0
-0,        292,        417,       41,    22029, 0x3696462b, F=0x0
 1,        306,        306,       21,      574, 0xf6410d4d
 1,        327,        327,       22,      565, 0xfd561191
+0,        334,        417,       41,    22029, 0x3696462b, F=0x0
 1,        350,        350,       21,      713, 0x48425147
 1,        371,        371,       21,      537, 0x09bbf515
 0,        375,        375,       41,     5044, 0xa0344ae6, F=0x0
diff --git a/tests/ref/fate/movenc b/tests/ref/fate/movenc
new file mode 100644
index 00000000..845ef543
--- /dev/null
+++ b/tests/ref/fate/movenc
@@ -0,0 +1,28 @@
+249e02e3645ea5ca2c74397c62c53314 3269 non-empty-moov
+3281ff664e9a06e5a03ec6ea1729696c 3721 non-empty-moov-elst
+b408a545b1963a5ea82cf37208b66548 3629 non-empty-moov-no-elst
+a66c786022280c1f69ad7c98c719fa53 4435 ismv
+176a315a5385cb2e082d863e0fb22bf1 2891 empty-moov
+10eb3fdf6ed1400a1eec50746537159f 3283 empty-moov-no-elst
+bcd4d6d22f828f1061e13f3af459644f 3115 empty-moov-no-elst-no-adjust
+176a315a5385cb2e082d863e0fb22bf1 2891 delay-moov
+1398c80f1f5fd7f8e127bb5b17311016 3203 delay-moov-elst
+ed6dd0e0fd6d0d9d1145b201674325f6 2098 delay-moov-empty-track
+7f1dabd680135708c6ff359e4ab27165 2001 delay-moov-empty-track-flush
+39d798aa11a265c7906f9e11d4f303c0 1159 empty-moov-header
+a0165f4a26a409212b0946e981bdefb9 1584 empty-moov-content
+39d798aa11a265c7906f9e11d4f303c0 1159 delay-moov-header
+a0165f4a26a409212b0946e981bdefb9 1584 delay-moov-content
+272a474cfd2a68cc5f05b426b14a2b7d 876 empty-moov-second-frag
+272a474cfd2a68cc5f05b426b14a2b7d 876 empty-moov-second-frag-discont
+272a474cfd2a68cc5f05b426b14a2b7d 876 delay-moov-second-frag-discont
+6256445b9595de78be493e0faf2bc5d7 1219 delay-moov-elst-init
+fcae8f40e015b59aabc8d4a99a759ca1 996 delay-moov-elst-second-frag
+6256445b9595de78be493e0faf2bc5d7 1219 delay-moov-elst-init-discont
+fcae8f40e015b59aabc8d4a99a759ca1 996 delay-moov-elst-second-frag-discont
+29f875e401df0fc3026995d12872ef21 1219 delay-moov-elst-signal-init
+aa5462cc0d2144f72154d9c309edb57d 996 delay-moov-elst-signal-second-frag
+29f875e401df0fc3026995d12872ef21 1219 delay-moov-elst-signal-init-discont
+aa5462cc0d2144f72154d9c309edb57d 996 delay-moov-elst-signal-second-frag-discont
+6cd6085f4f0ff536acfcb77cb658eb47 4935 vfr
+6cd6085f4f0ff536acfcb77cb658eb47 4935 vfr-noduration
diff --git a/tests/ref/fate/opt b/tests/ref/fate/opt
index 3aa7423e..7b47d429 100644
--- a/tests/ref/fate/opt
+++ b/tests/ref/fate/opt
@@ -23,7 +23,7 @@ TestContext AVOptions:
   -rational          <rational>   E....... set rational (from 0 to 10) (default 1/1)
   -string            <string>     E....... set string (default "default")
   -escape            <string>     E....... set escape str (default "\=,")
-  -flags             <flags>      E....... set flags (default 1)
+  -flags             <flags>      E....... set flags (default cool)
      cool                         E....... set cool flag
      lame                         E....... set lame flag
      mu                           E....... set mu flag
@@ -31,7 +31,7 @@ TestContext AVOptions:
   -pix_fmt           <pix_fmt>    E....... set pixfmt (default 0bgr)
   -sample_fmt        <sample_fmt> E....... set samplefmt (default s16)
   -video_rate        <video_rate> E....... set videorate (default "25")
-  -duration          <duration>   E....... set duration (default 1000)
+  -duration          <duration>   E....... set duration (default 0.001)
   -color             <color>      E....... set color (default "pink")
   -cl                <channel_layout> E....... set channel layout (default 0x137)
   -bin               <binary>     E....... set binary value
@@ -40,6 +40,9 @@ TestContext AVOptions:
   -num64             <int64>      E....... set num 64bit (from 0 to 100) (default 1)
   -flt               <float>      E....... set float (from 0 to 100) (default 0.333333)
   -dbl               <double>     E....... set double (from 0 to 100) (default 0.333333)
+  -bool1             <boolean>    E....... set boolean value (default auto)
+  -bool2             <boolean>    E....... set boolean value (default true)
+  -bool3             <boolean>    E....... set boolean value (default false)
 
 Testing av_opt_is_set_to_default()
 name:       num default:1 error:
@@ -64,6 +67,9 @@ name:      bin2 default:1 error:
 name:     num64 default:0 error:
 name:       flt default:0 error:
 name:       dbl default:0 error:
+name:     bool1 default:0 error:
+name:     bool2 default:0 error:
+name:     bool3 default:1 error:
 name:       num default:1 error:
 name:    toggle default:1 error:
 name:  rational default:1 error:
@@ -86,9 +92,12 @@ name:      bin2 default:1 error:
 name:     num64 default:1 error:
 name:       flt default:1 error:
 name:       dbl default:1 error:
+name:     bool1 default:1 error:
+name:     bool2 default:1 error:
+name:     bool3 default:1 error:
 
 Test av_opt_serialize()
-num=0,toggle=1,rational=1/1,string=default,escape=\\\=\,,flags=0x00000001,size=200x300,pix_fmt=0bgr,sample_fmt=s16,video_rate=25/1,duration=0:00:00.001000,color=0xffc0cbff,cl=0x137,bin=62696E00,bin1=,bin2=,num64=1,flt=0.333333,dbl=0.333333
+num=0,toggle=1,rational=1/1,string=default,escape=\\\=\,,flags=0x00000001,size=200x300,pix_fmt=0bgr,sample_fmt=s16,video_rate=25/1,duration=0.001,color=0xffc0cbff,cl=0x137,bin=62696E00,bin1=,bin2=,num64=1,flt=0.333333,dbl=0.333333,bool1=auto,bool2=true,bool3=false
 Setting entry with key 'num' to value '0'
 Setting entry with key 'toggle' to value '1'
 Setting entry with key 'rational' to value '1/1'
@@ -99,7 +108,7 @@ Setting entry with key 'size' to value '200x300'
 Setting entry with key 'pix_fmt' to value '0bgr'
 Setting entry with key 'sample_fmt' to value 's16'
 Setting entry with key 'video_rate' to value '25/1'
-Setting entry with key 'duration' to value '0:00:00.001000'
+Setting entry with key 'duration' to value '0.001'
 Setting entry with key 'color' to value '0xffc0cbff'
 Setting entry with key 'cl' to value '0x137'
 Setting entry with key 'bin' to value '62696E00'
@@ -108,7 +117,10 @@ Setting entry with key 'bin2' to value ''
 Setting entry with key 'num64' to value '1'
 Setting entry with key 'flt' to value '0.333333'
 Setting entry with key 'dbl' to value '0.333333'
-num=0,toggle=1,rational=1/1,string=default,escape=\\\=\,,flags=0x00000001,size=200x300,pix_fmt=0bgr,sample_fmt=s16,video_rate=25/1,duration=0:00:00.001000,color=0xffc0cbff,cl=0x137,bin=62696E00,bin1=,bin2=,num64=1,flt=0.333333,dbl=0.333333
+Setting entry with key 'bool1' to value 'auto'
+Setting entry with key 'bool2' to value 'true'
+Setting entry with key 'bool3' to value 'false'
+num=0,toggle=1,rational=1/1,string=default,escape=\\\=\,,flags=0x00000001,size=200x300,pix_fmt=0bgr,sample_fmt=s16,video_rate=25/1,duration=0.001,color=0xffc0cbff,cl=0x137,bin=62696E00,bin1=,bin2=,num64=1,flt=0.333333,dbl=0.333333,bool1=auto,bool2=true,bool3=false
 
 Testing av_set_options_string()
 Setting options string ''
@@ -323,6 +335,12 @@ Setting options string 'dbl=101'
 Setting entry with key 'dbl' to value '101'
 Value 101.000000 for parameter 'dbl' out of range [0 - 100]
 Error 'dbl=101'
+Setting options string 'bool1=true'
+Setting entry with key 'bool1' to value 'true'
+OK    'bool1=true'
+Setting options string 'bool2=auto'
+Setting entry with key 'bool2' to value 'auto'
+OK    'bool2=auto'
 
 Testing av_opt_set_from_string()
 Setting options string ''
diff --git a/tests/ref/fate/parseutils b/tests/ref/fate/parseutils
index 14824528..33062292 100644
--- a/tests/ref/fate/parseutils
+++ b/tests/ref/fate/parseutils
@@ -70,11 +70,14 @@ fmt:'%Y-%m-%d %H:%M:%S' spec:'2012-12-21 20:12:21' -> 2012-12-21 20:12:21
 fmt:'  %Y - %m - %d %H : %M : %S' spec:'   2012 - 12 -  21   20 : 12 : 21' -> 2012-12-21 20:12:21
 
 Testing av_parse_time()
-(now is 2012-03-17 09:14:13 +0100, local time is UTC+1)
-now                      ->     1331972053.000000 = 2012-03-17T08:14:13Z
+(now is 2012-03-17 09:14:13.2 +0100, local time is UTC+1)
+now                      ->     1331972053.200000 = 2012-03-17T08:14:13Z
 12:35:46                 ->     1331984146.000000 = 2012-03-17T11:35:46Z
 2000-12-20 0:02:47.5z    ->      977270567.500000 = 2000-12-20T00:02:47Z
+2012 - 02-22  17:44:07   ->     1329929047.000000 = 2012-02-22T16:44:07Z
 2000-12-20T010247.6      ->      977270567.600000 = 2000-12-20T00:02:47Z
+2000-12-12 1:35:46+05:30 ->      976565146.000000 = 2000-12-11T20:05:46Z
+2002-12-12 22:30:40-02   ->     1039739440.000000 = 2002-12-13T00:30:40Z
 2:34:56.79               ->           +9296790000
 -1:23:45.67              ->           -5025670000
 42.1729                  ->             +42172900
diff --git a/tests/ref/fate/qtrle-1bit b/tests/ref/fate/qtrle-1bit
index f1911692..3eccc27d 100644
--- a/tests/ref/fate/qtrle-1bit
+++ b/tests/ref/fate/qtrle-1bit
@@ -1,39 +1,39 @@
 #tb 0: 1/12
-0,          0,          0,        1,     9600, 0xc5921aa2
-0,          1,          1,        1,     9600, 0x9032fc52
-0,          2,          2,        1,     9600, 0x7db0038e
-0,          3,          3,        1,     9600, 0x95b73c41
-0,          4,          4,        1,     9600, 0x531e4189
-0,          5,          5,        1,     9600, 0xb73390ec
-0,          6,          6,        1,     9600, 0x958e8221
-0,          7,          7,        1,     9600, 0xd393f8a6
-0,          8,          8,        1,     9600, 0xa085da1c
-0,          9,          9,        1,     9600, 0x57ace74f
-0,         10,         10,        1,     9600, 0x5d11a308
-0,         11,         11,        1,     9600, 0x13e133b7
-0,         12,         12,        1,     9600, 0x494edb86
-0,         13,         13,        1,     9600, 0x43a448ea
-0,         14,         14,        1,     9600, 0x3562d35b
-0,         15,         15,        1,     9600, 0x0bc655d2
-0,         16,         16,        1,     9600, 0xbece73a1
-0,         17,         17,        1,     9600, 0x82e7cfa1
-0,         18,         18,        1,     9600, 0xda29fd8f
-0,         19,         19,        1,     9600, 0x70fb700b
-0,         20,         20,        1,     9600, 0xaf57a6b0
-0,         21,         21,        1,     9600, 0x0a5ed9b9
-0,         22,         22,        1,     9600, 0xf7c62c38
-0,         23,         23,        1,     9600, 0x0aa2ccfd
-0,         24,         24,        1,     9600, 0xc9adabae
-0,         25,         25,        1,     9600, 0x67ff0aba
-0,         26,         26,        1,     9600, 0xea79a465
-0,         27,         27,        1,     9600, 0x8928c626
-0,         28,         28,        1,     9600, 0x8dab4111
-0,         29,         29,        1,     9600, 0x81ef63f9
-0,         30,         30,        1,     9600, 0xf977bc5e
-0,         31,         31,        1,     9600, 0x9e6a3f4a
-0,         32,         32,        1,     9600, 0x77c92865
-0,         33,         33,        1,     9600, 0x3915170d
-0,         34,         34,        1,     9600, 0xbe19b995
-0,         35,         35,        1,     9600, 0x3e8a3077
-0,         36,         36,        1,     9600, 0x1331342e
-0,         37,         37,        1,     9600, 0x4d692175
+0,          0,          0,        1,    77824, 0xc298c68b
+0,          1,          1,        1,    77824, 0x22f7c63e
+0,          2,          2,        1,    77824, 0x2aacc669
+0,          3,          3,        1,    77824, 0xb428c6e9
+0,          4,          4,        1,    77824, 0x5d50c786
+0,          5,          5,        1,    77824, 0x8ddbc9e9
+0,          6,          6,        1,    77824, 0x6dd5c9cf
+0,          7,          7,        1,    77824, 0x7a72c647
+0,          8,          8,        1,    77824, 0x0659c448
+0,          9,          9,        1,    77824, 0x228bc465
+0,         10,         10,        1,    77824, 0x87d9c3f9
+0,         11,         11,        1,    77824, 0xd07dbf9c
+0,         12,         12,        1,    77824, 0x9c91bc67
+0,         13,         13,        1,    77824, 0xf91fb881
+0,         14,         14,        1,    77824, 0x068eb41a
+0,         15,         15,        1,    77824, 0x4b52b03f
+0,         16,         16,        1,    77824, 0xc5e9b11b
+0,         17,         17,        1,    77824, 0xf0aeac90
+0,         18,         18,        1,    77824, 0x5700ad55
+0,         19,         19,        1,    77824, 0x4d93a895
+0,         20,         20,        1,    77824, 0x6df1aac7
+0,         21,         21,        1,    77824, 0xeee6b4ae
+0,         22,         22,        1,    77824, 0x6fe5bf5a
+0,         23,         23,        1,    77824, 0x8c1ac3d3
+0,         24,         24,        1,    77824, 0x5ab2c2da
+0,         25,         25,        1,    77824, 0x6141aeab
+0,         26,         26,        1,    77824, 0xbda0a2b8
+0,         27,         27,        1,    77824, 0x46fa932a
+0,         28,         28,        1,    77824, 0xed009680
+0,         29,         29,        1,    77824, 0xf3078f3e
+0,         30,         30,        1,    77824, 0x444f8b3c
+0,         31,         31,        1,    77824, 0x7b468685
+0,         32,         32,        1,    77824, 0x2b078646
+0,         33,         33,        1,    77824, 0x9165859b
+0,         34,         34,        1,    77824, 0xf96682c7
+0,         35,         35,        1,    77824, 0x239186d0
+0,         36,         36,        1,    77824, 0x499b8ec3
+0,         37,         37,        1,    77824, 0x20658ea8
diff --git a/tests/ref/fate/rscc b/tests/ref/fate/rscc
new file mode 100644
index 00000000..69dbfe15
--- /dev/null
+++ b/tests/ref/fate/rscc
@@ -0,0 +1,6 @@
+#tb 0: 1/10
+0,          0,          0,        1,  6814720, 0x1365f8ef
+0,          1,          1,        1,  6814720, 0x90838983
+0,          2,          2,        1,  6814720, 0xf0cc3131
+0,          3,          3,        1,  6814720, 0xc07e404d
+0,          4,          4,        1,  6814720, 0x945962dd
diff --git a/tests/ref/fate/screenpresso b/tests/ref/fate/screenpresso
new file mode 100644
index 00000000..bbdffb44
--- /dev/null
+++ b/tests/ref/fate/screenpresso
@@ -0,0 +1,5 @@
+#tb 0: 1/15
+0,          0,          0,        1,   691200, 0xfdbdfad6
+0,          1,          1,        1,   691200, 0xc5feb961
+0,          4,          4,        1,   691200, 0x4c8c7e23
+0,          8,          8,        1,   691200, 0xd95c89f8
diff --git a/tests/ref/fate/sgi-gray b/tests/ref/fate/sgi-gray
deleted file mode 100644
index 4d4d3494..00000000
--- a/tests/ref/fate/sgi-gray
+++ /dev/null
@@ -1,2 +0,0 @@
-#tb 0: 1/25
-0,          0,          0,        1,    65536, 0xe36c12e1
diff --git a/tests/ref/fate/sgi-gray16 b/tests/ref/fate/sgi-gray16
index f56e53eb..0908746a 100644
--- a/tests/ref/fate/sgi-gray16
+++ b/tests/ref/fate/sgi-gray16
@@ -1,2 +1,2 @@
 #tb 0: 1/25
-0,          0,          0,        1,   131072, 0x6855d247
+0,          0,          0,        1,   262144, 0x6c27f084
diff --git a/tests/ref/fate/sgi-gray16-rle b/tests/ref/fate/sgi-gray16-rle
new file mode 100644
index 00000000..09903d5b
--- /dev/null
+++ b/tests/ref/fate/sgi-gray16-rle
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   262144, 0x65ef57ff
diff --git a/tests/ref/fate/sgi-gray8 b/tests/ref/fate/sgi-gray8
new file mode 100644
index 00000000..b0e06069
--- /dev/null
+++ b/tests/ref/fate/sgi-gray8
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   131072, 0xb0702a94
diff --git a/tests/ref/fate/sgi-gray8-rle b/tests/ref/fate/sgi-gray8-rle
new file mode 100644
index 00000000..b0e06069
--- /dev/null
+++ b/tests/ref/fate/sgi-gray8-rle
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   131072, 0xb0702a94
diff --git a/tests/ref/fate/sgi-rgb24 b/tests/ref/fate/sgi-rgb24
index 7a6e8553..30aca1ce 100644
--- a/tests/ref/fate/sgi-rgb24
+++ b/tests/ref/fate/sgi-rgb24
@@ -1,2 +1,2 @@
 #tb 0: 1/25
-0,          0,          0,        1,   196608, 0x5b24c51a
+0,          0,          0,        1,   393216, 0xa9b28fd9
diff --git a/tests/ref/fate/sgi-rgb48 b/tests/ref/fate/sgi-rgb48
index b0521b7f..8f3ca50b 100644
--- a/tests/ref/fate/sgi-rgb48
+++ b/tests/ref/fate/sgi-rgb48
@@ -1,2 +1,2 @@
 #tb 0: 1/25
-0,          0,          0,        1,   393216, 0xf6b0d73a
+0,          0,          0,        1,   786432, 0xee4aa667
diff --git a/tests/ref/fate/sgi-rgba b/tests/ref/fate/sgi-rgba
index 9b3d4cb7..058f5f99 100644
--- a/tests/ref/fate/sgi-rgba
+++ b/tests/ref/fate/sgi-rgba
@@ -1,2 +1,2 @@
 #tb 0: 1/25
-0,          0,          0,        1,   262144, 0x7b38d40b
+0,          0,          0,        1,   524288, 0x4ee5adbb
diff --git a/tests/ref/fate/sgi-rgba-rle b/tests/ref/fate/sgi-rgba-rle
new file mode 100644
index 00000000..058f5f99
--- /dev/null
+++ b/tests/ref/fate/sgi-rgba-rle
@@ -0,0 +1,2 @@
+#tb 0: 1/25
+0,          0,          0,        1,   524288, 0x4ee5adbb
diff --git a/tests/ref/fate/sgi-rgba64 b/tests/ref/fate/sgi-rgba64
index 94e35afd..f4e939ec 100644
--- a/tests/ref/fate/sgi-rgba64
+++ b/tests/ref/fate/sgi-rgba64
@@ -1,2 +1,2 @@
 #tb 0: 1/25
-0,          0,          0,        1,   524288, 0xce70f51c
+0,          0,          0,        1,  1048576, 0xc657e22b
diff --git a/tests/ref/fate/sgi-rgba64-rle b/tests/ref/fate/sgi-rgba64-rle
index 1dfac760..cdf5fd0e 100644
--- a/tests/ref/fate/sgi-rgba64-rle
+++ b/tests/ref/fate/sgi-rgba64-rle
@@ -1,2 +1,2 @@
 #tb 0: 1/25
-0,          0,          0,        1,   614400, 0xb92d409d
+0,          0,          0,        1,  1048576, 0xb619d0f1
diff --git a/tests/ref/fate/source b/tests/ref/fate/source
new file mode 100644
index 00000000..ec0a98e2
--- /dev/null
+++ b/tests/ref/fate/source
@@ -0,0 +1,29 @@
+Files without standard license headers:
+cmdutils_common_opts.h
+compat/avisynth/windowsPorts/basicDataTypeConversions.h
+compat/avisynth/windowsPorts/windows2linux.h
+libavcodec/file_open.c
+libavcodec/interplayacm.c
+libavcodec/log2_tab.c
+libavcodec/mathops.c
+libavcodec/reverse.c
+libavdevice/file_open.c
+libavfilter/log2_tab.c
+libavformat/file_open.c
+libavformat/golomb_tab.c
+libavformat/log2_tab.c
+libswresample/log2_tab.c
+libswscale/log2_tab.c
+tools/uncoded_frame.c
+tools/yuvcmp.c
+Headers without standard inclusion guards:
+cmdutils_common_opts.h
+compat/avisynth/avisynth_c.h
+compat/avisynth/avs/capi.h
+compat/avisynth/avs/config.h
+compat/avisynth/avs/types.h
+compat/avisynth/avxsynth_c.h
+compat/avisynth/windowsPorts/basicDataTypeConversions.h
+compat/avisynth/windowsPorts/windows2linux.h
+compat/float/float.h
+compat/float/limits.h
diff --git a/tests/ref/fate/sub-cc b/tests/ref/fate/sub-cc
index 3d8c7642..0d5bc776 100644
--- a/tests/ref/fate/sub-cc
+++ b/tests/ref/fate/sub-cc
@@ -6,9 +6,9 @@ PlayResY: 288
 
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
-Style: Default,Arial,16,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,1,1,0,2,10,10,10,0
+Style: Default,Monospace,16,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,3,1,0,2,10,10,10,0
 
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
-Dialogue: 0,0:00:12.36,0:00:40.83,Default,,0,0,0,,( inaudible radio chatter )
-Dialogue: 0,0:00:40.83,0:00:59.07,Default,,0,0,0,,( inaudible radio chatter )\N>> Safety remains our number one
+Dialogue: 0,0:00:12.36,0:00:40.83,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )
+Dialogue: 0,0:00:40.83,0:00:59.07,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety remains our number one
diff --git a/tests/ref/fate/sub-cc-realtime b/tests/ref/fate/sub-cc-realtime
new file mode 100644
index 00000000..c9e6b6c6
--- /dev/null
+++ b/tests/ref/fate/sub-cc-realtime
@@ -0,0 +1,42 @@
+[Script Info]
+; Script generated by FFmpeg/Lavc
+ScriptType: v4.00+
+PlayResX: 384
+PlayResY: 288
+
+[V4+ Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,Monospace,16,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,3,1,0,2,10,10,10,0
+
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+Dialogue: 0,0:00:14.14,9:59:59.99,Default,,0,0,0,,(
+Dialogue: 0,0:00:15.47,9:59:59.99,Default,,0,0,0,,({\i1} in
+Dialogue: 0,0:00:15.92,9:59:59.99,Default,,0,0,0,,({\i1} inau
+Dialogue: 0,0:00:16.36,9:59:59.99,Default,,0,0,0,,({\i1} inaudi
+Dialogue: 0,0:00:16.81,9:59:59.99,Default,,0,0,0,,({\i1} inaudibl
+Dialogue: 0,0:00:17.25,9:59:59.99,Default,,0,0,0,,({\i1} inaudible 
+Dialogue: 0,0:00:17.70,9:59:59.99,Default,,0,0,0,,({\i1} inaudible ra
+Dialogue: 0,0:00:18.14,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radi
+Dialogue: 0,0:00:18.59,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio 
+Dialogue: 0,0:00:19.03,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio ch
+Dialogue: 0,0:00:19.48,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chat
+Dialogue: 0,0:00:19.92,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatte
+Dialogue: 0,0:00:20.36,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter
+Dialogue: 0,0:00:21.70,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )
+Dialogue: 0,0:00:42.61,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>>
+Dialogue: 0,0:00:43.05,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> S
+Dialogue: 0,0:00:43.50,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Saf
+Dialogue: 0,0:00:43.94,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safet
+Dialogue: 0,0:00:44.39,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety 
+Dialogue: 0,0:00:44.83,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety re
+Dialogue: 0,0:00:45.28,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety rema
+Dialogue: 0,0:00:45.72,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety remain
+Dialogue: 0,0:00:46.17,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety remains 
+Dialogue: 0,0:00:46.61,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety remains ou
+Dialogue: 0,0:00:47.06,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety remains our 
+Dialogue: 0,0:00:47.50,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety remains our nu
+Dialogue: 0,0:00:47.95,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety remains our numb
+Dialogue: 0,0:00:48.39,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety remains our number
+Dialogue: 0,0:00:48.84,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety remains our number o
+Dialogue: 0,0:00:49.28,9:59:59.99,Default,,0,0,0,,({\i1} inaudible radio chatter{\i0} )\N>> Safety remains our number one
diff --git a/tests/ref/fate/sub-movtext b/tests/ref/fate/sub-movtext
index 6a90e967..94ed22d3 100644
--- a/tests/ref/fate/sub-movtext
+++ b/tests/ref/fate/sub-movtext
@@ -6,7 +6,7 @@ PlayResY: 288
 
 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
-Style: Default,Arial,16,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,1,1,0,2,10,10,10,0
+Style: Default,Serif,18,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,1,1,0,2,10,10,10,0
 
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
diff --git a/tests/ref/fate/sub-sami b/tests/ref/fate/sub-sami
index caa85a26..3a013908 100644
--- a/tests/ref/fate/sub-sami
+++ b/tests/ref/fate/sub-sami
@@ -10,12 +10,12 @@ Style: Default,Arial,16,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,1,1,0,2,10
 
 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
-Dialogue: 0,0:00:00.00,0:00:00.01,Default,,0,0,0,,{\i1}Pres. John F. Kennedy {\i0}\N
-Dialogue: 0,0:00:00.01,0:00:08.80,Default,,0,0,0,,{\i1}Pres. John F. Kennedy {\i0}\NLet the word go forth, from this time and place to friend and foe alike that the torch 
-Dialogue: 0,0:00:08.80,0:00:19.50,Default,,0,0,0,,{\i1}Pres. John F. Kennedy {\i0}\Nhas been passed to a new generation of Americans, born in this century, tempered by war, 
-Dialogue: 0,0:00:19.50,0:00:28.00,Default,,0,0,0,,{\i1}Pres. John F. Kennedy {\i0}\Ndisciplined by a hard and bitter peace, proud of our ancient heritage, and unwilling to witness 
-Dialogue: 0,0:00:28.00,0:00:38.00,Default,,0,0,0,,{\i1}Pres. John F. Kennedy {\i0}\Nor permit the slow undoing of those human rights to which this nation has always 
-Dialogue: 0,0:00:38.00,0:00:46.00,Default,,0,0,0,,{\i1}Pres. John F. Kennedy {\i0}\Nbeen committed and to which we are committed today at home and around the world. 
-Dialogue: 0,0:00:46.00,0:01:01.00,Default,,0,0,0,,{\i1}Pres. John F. Kennedy {\i0}\NLet every nation know, whether it wishes us well or ill, that we shall pay any price, bear any burden, 
-Dialogue: 0,0:01:01.00,0:01:13.00,Default,,0,0,0,,{\i1}Pres. John F. Kennedy {\i0}\Nmeet any hardship, support any friend, oppose any foe, to ensure the survival and success of liberty. 
-Dialogue: 0,0:01:13.00,9:59:59.99,Default,,0,0,0,,{\i1}End of: {\i0}\NPresident John F. Kennedy Speech 
+Dialogue: 0,0:00:00.00,0:00:00.01,Default,,0,0,0,,{\i1}Pres. John F. Kennedy{\i0}\N
+Dialogue: 0,0:00:00.01,0:00:08.80,Default,,0,0,0,,{\i1}Pres. John F. Kennedy{\i0}\NLet the word go forth, from this time and place to friend and foe alike that the torch
+Dialogue: 0,0:00:08.80,0:00:19.50,Default,,0,0,0,,{\i1}Pres. John F. Kennedy{\i0}\Nhas been passed to a new generation of Americans, born in this century, tempered by war,
+Dialogue: 0,0:00:19.50,0:00:28.00,Default,,0,0,0,,{\i1}Pres. John F. Kennedy{\i0}\Ndisciplined by a hard and bitter peace, proud of our ancient heritage, and unwilling to witness
+Dialogue: 0,0:00:28.00,0:00:38.00,Default,,0,0,0,,{\i1}Pres. John F. Kennedy{\i0}\Nor permit the slow undoing of those human rights to which this nation has always
+Dialogue: 0,0:00:38.00,0:00:46.00,Default,,0,0,0,,{\i1}Pres. John F. Kennedy{\i0}\Nbeen committed and to which we are committed today at home and around the world.
+Dialogue: 0,0:00:46.00,0:01:01.00,Default,,0,0,0,,{\i1}Pres. John F. Kennedy{\i0}\NLet every nation know, whether it wishes us well or ill, that we shall pay any price, bear any burden,
+Dialogue: 0,0:01:01.00,0:01:13.00,Default,,0,0,0,,{\i1}Pres. John F. Kennedy{\i0}\Nmeet any hardship, support any friend, oppose any foe, to ensure the survival and success of liberty.
+Dialogue: 0,0:01:13.00,9:59:59.99,Default,,0,0,0,,{\i1}End of:{\i0}\NPresident John F. Kennedy Speech
diff --git a/tests/ref/fate/sub-sami2 b/tests/ref/fate/sub-sami2
new file mode 100644
index 00000000..9e9c8087
--- /dev/null
+++ b/tests/ref/fate/sub-sami2
@@ -0,0 +1,91 @@
+[Script Info]
+; Script generated by FFmpeg/Lavc
+ScriptType: v4.00+
+PlayResX: 384
+PlayResY: 288
+
+[V4+ Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,Arial,16,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,1,1,0,2,10,10,10,0
+
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+Dialogue: 0,0:00:01.51,0:00:01.51,Default,,0,0,0,,by Psyence Fictionist\Npsyencefictionist@gmail.com
+Dialogue: 0,0:00:01.51,0:00:08.61,Default,,0,0,0,,Sync by: honeybunny and Kerensky\Nwww.Addic7ed.com
+Dialogue: 0,0:00:10.11,0:00:10.11,Default,,0,0,0,,\N{\b1}사랑과 배신\N탐욕과 살육의 이야기죠{\b0}
+Dialogue: 0,0:00:10.11,0:00:13.98,Default,,0,0,0,,\N{\c&H800080&}The{\c}{\c&HCBC0FF&}re{\c} {\c&HFF&}is{\c} {\c&HA5FF&}lo{\c}{\c&HFFFF&}ve{\c} {\c&H8000&}and{\c}{\c&HFFFF00&} bet{\c}{\c&HFF0000&}rayal{\c},\N{\b1}{\c&H808080&}g{\c}r{\c&H808080&}e{\c}e{\c&H808080&}d{\c} and {\c&HFF&}m{\c}{\c&H808080&}u{\c}{\c&HFF&}rder{\c}{\b0}.
+Dialogue: 0,0:00:17.67,0:00:17.67,Default,,0,0,0,,\N{\c&HFFFF&}선악의 정의에 대해서\N대립하는 가치관을 가진{\c}
+Dialogue: 0,0:00:17.67,0:00:21.72,Default,,0,0,0,,\N{\c&HCBC0FF&}{\fs6}It's{\fs} {\fs8}set{\fs}{\fs10} in {\fs}{\fs12}this{\fs}{\fs14} intere{\fs}{\fs14}sting{\fs}\N{\fs16} world{\fs}{\fs18} of{\fs} {\fs20}cont{\fs}{\fs22}rasting{\fs}{\fs24} ideology{\fs}{\c}
+Dialogue: 0,0:00:21.84,0:00:21.84,Default,,0,0,0,,\N{\u1}매력적인 세계에서\N이 모든 것이 펼쳐집니다{\u1}
+Dialogue: 0,0:00:21.84,0:00:23.58,Default,,0,0,0,,\N{\i1}{\c&H9966CC&}of{\c}{\c&HC2A3E0&} what's{\c} {\c&HE0D1F0&}right{\c} {\c&HFCFAFE&}and{\c} wrong.{\i0}
+Dialogue: 0,0:00:23.69,0:00:23.69,Default,,0,0,0,,\N{\i1}이 주제를 심오한 철학으로\N담아내고 있어요{\i0}
+Dialogue: 0,0:00:23.69,0:00:25.67,Default,,0,0,0,,\N{\fs20}{\c&HFF0000&}{\s1}It{\s0}{\c}{\fs} has {\fs15}{\c&HFFFF00&}a{\c}{\fs} great {\fs16}{\c&HFFCC00&}philosophy{\c}{\fs} about it.
+Dialogue: 0,0:00:40.22,0:00:40.22,Default,,0,0,0,,\N{\s1}"왕좌의 게임"은 웨스테로스라는 가상왕국의\N권력 분쟁 이야기입니다{\s0}
+Dialogue: 0,0:00:40.22,0:00:47.94,Default,,0,0,0,,\N{\c&HA5FF&}{\fs26}"Game of Thrones"{\fs}{\c} {\c&H2A2AA5&}{\b1}is{\b0}{\c}{\c&HFFFF&}{\fs24}{\i1} about{\i0}{\fs}{\c} {\c&H336699&}{\fs14}power{\fs}{\c}{\c&HFF&} struggles{\c}\N{\c&HA5FF&}{\fs8}in a fantasy{\fs}{\c&HCBC0FF&} kingdom{\c&HA5FF&}, called {\fs6}Westeros.{\fs}{\c}
+Dialogue: 0,0:00:48.06,0:00:48.06,Default,,0,0,0,,\N철의 왕좌를 둘러싼\N권력 분쟁이죠
+Dialogue: 0,0:00:48.06,0:00:50.76,Default,,0,0,0,,\N{\c&H8000&}And it's a power struggle\Nfor the Iron Throne,{\c}
+Dialogue: 0,0:00:50.88,0:00:50.88,Default,,0,0,0,,\N{\fs20}왕국의 권력 정점이라고\N할 수 있는 자리에요{\fs}
+Dialogue: 0,0:00:50.88,0:00:53.13,Default,,0,0,0,,\Nwhich is the seat of power\Nin this kingdom.
+Dialogue: 0,0:00:53.25,0:00:53.25,Default,,0,0,0,,\N전운이 감도네, 네드
+Dialogue: 0,0:00:53.25,0:00:55.07,Default,,0,0,0,,\NThere's a war coming, Ned.
+Dialogue: 0,0:00:56.01,0:00:56.01,Default,,0,0,0,, \N언제 누구와 싸우게 될지는 몰라\N하지만 분명 전쟁이 일어날걸세
+Dialogue: 0,0:00:56.01,0:01:00.09,Default,,0,0,0,,\NI don't know when, I don't know who\Nwould be fighting, but it's coming.
+Dialogue: 0,0:01:01.10,0:01:01.10,Default,,0,0,0,,\N이야기의 핵심은 두 주요 가문의\N권력을 둘러싼 갈등입니다
+Dialogue: 0,0:01:01.10,0:01:07.04,Default,,0,0,0,,\N{\i1}At the core of it there's a conflict for\Npower between two great houses initially.{\i0}
+Dialogue: 0,0:01:07.16,0:01:07.16,Default,,0,0,0,,\N스타크 가문과 라니스터 가문이죠
+Dialogue: 0,0:01:07.16,0:01:10.04,Default,,0,0,0,,\NHouse Stark and House Lannister.
+Dialogue: 0,0:01:10.16,0:01:10.16,Default,,0,0,0,,\N그 외에 여러 가문이\N서로 경쟁합니다
+Dialogue: 0,0:01:10.16,0:01:13.25,Default,,0,0,0,,\NThe other major houses are\Nall contenders as well.
+Dialogue: 0,0:01:13.37,0:01:13.37,Default,,0,0,0,,\N흥미진진하게 정치적으로\N얽혀있는 상황이죠
+Dialogue: 0,0:01:13.37,0:01:16.11,Default,,0,0,0,,\NIt's a suitably complicated\Npolitical situation.
+Dialogue: 0,0:01:16.34,0:01:16.34,Default,,0,0,0,,\N옛 말에 "권력은 부패한다"라죠
+Dialogue: 0,0:01:16.34,0:01:18.80,Default,,0,0,0,,\NThe old truth "the power corrupts",\NI think
+Dialogue: 0,0:01:18.92,0:01:18.92,Default,,0,0,0,,\N옳은 말입니다\N이 작품에서도 드러나죠
+Dialogue: 0,0:01:18.92,0:01:21.66,Default,,0,0,0,,\Nit's very valid and it\Nshows in this series.
+Dialogue: 0,0:01:21.78,0:01:21.78,Default,,0,0,0,,\N권력을 얻은 등장인물들이\N어떻게 변해가는지 보시게 될겁니다
+Dialogue: 0,0:01:21.78,0:01:24.59,Default,,0,0,0,,\NYou see characters come into\Npower and how they change.
+Dialogue: 0,0:01:24.71,0:01:24.71,Default,,0,0,0,,\N그렇게 등장인물들은\N대의를 보는 시야를 잃어가고
+Dialogue: 0,0:01:24.71,0:01:28.86,Default,,0,0,0,,\NIn a way it's about how people\Nforget to see the bigger picture,
+Dialogue: 0,0:01:28.98,0:01:28.98,Default,,0,0,0,,\N사리사욕을 쫒는데 정신이 팔려\N공공의 위협을 외면하게 되죠
+Dialogue: 0,0:01:28.98,0:01:33.89,Default,,0,0,0,,\N{\u1}this common threat, that everybody\Nkind of ignores, because they're too busy{\u0}
+Dialogue: 0,0:01:34.01,0:01:35.24,Default,,0,0,0,,\Npursuing their own interests.
+Dialogue: 0,0:01:35.36,0:01:35.36,Default,,0,0,0,,\N한편, 일곱 왕국의 밖에서는\N두 개의 거대한 위협이 부상합니다
+Dialogue: 0,0:01:35.36,0:01:40.23,Default,,0,0,0,,\N{\fs30}And meanwhile, outside the Seven\NKingdoms, two great threats arising.{\fs}
+Dialogue: 0,0:01:40.35,0:01:40.35,Default,,0,0,0,,\N<sup>하나는 바다 건너\N타가리엔 일족 유배자들이며</sub>
+Dialogue: 0,0:01:40.35,0:01:44.06,Default,,0,0,0,,\NOne across the sea, in the exile\NTargaryen siblings,
+Dialogue: 0,0:01:44.17,0:01:44.17,Default,,0,0,0,,\N또 하나는 일곱 왕국의\N국경이 자리잡은
+Dialogue: 0,0:01:44.17,0:01:47.39,Default,,0,0,0,,\Nand another far to the north,\Nbeyond the Wall,
+Dialogue: 0,0:01:47.51,0:01:47.51,Default,,0,0,0,,\N저 멀리 북쪽 장벽 너머\N초자연적인 존재들이 도사리는
+Dialogue: 0,0:01:47.51,0:01:50.07,Default,,0,0,0,,\Nwhich is the boundary\Nof the Seven Kingdoms,
+Dialogue: 0,0:01:50.18,0:01:50.18,Default,,0,0,0,,\N춥디 추운 땅에서 일어납니다
+Dialogue: 0,0:01:50.18,0:01:55.28,Default,,0,0,0,,\Nin lands of perpetual ice and cold,\Nwhere supernatural threat is stirring.
+Dialogue: 0,0:01:56.45,0:01:56.45,Default,,0,0,0,,\N기존의 어떤 작품과도 다릅니다
+Dialogue: 0,0:01:56.45,0:02:00.45,Default,,0,0,0,,\NIt's very different from\Nanything that's been done.
+Dialogue: 0,0:02:00.58,0:02:00.58,Default,,0,0,0,,\N이 작품처럼 어두운 판타지는\N없을거라고 봅니다
+Dialogue: 0,0:02:00.58,0:02:03.97,Default,,0,0,0,,\NI can't think of another fantasy\Nwhich is as dark as this one is,
+Dialogue: 0,0:02:04.09,0:02:04.09,Default,,0,0,0,,\N아주 적나라하고 현실적이죠
+Dialogue: 0,0:02:04.09,0:02:05.65,Default,,0,0,0,,\Nwhich is as gritty and as real.
+Dialogue: 0,0:02:05.77,0:02:05.77,Default,,0,0,0,,\N등장인물 하나 하나가\N매우 심도깊습니다
+Dialogue: 0,0:02:05.77,0:02:08.18,Default,,0,0,0,,\NEvery single character\Nis incredibly complex.
+Dialogue: 0,0:02:08.30,0:02:08.30,Default,,0,0,0,,\N보여주는 모습만으로\N생기는 고정관념으로는
+Dialogue: 0,0:02:08.30,0:02:09.33,Default,,0,0,0,,\NYou think you know them.
+Dialogue: 0,0:02:09.45,0:02:09.45,Default,,0,0,0,,\N등장인물을 제대로 이해했다고\N할 수 없습니다
+Dialogue: 0,0:02:09.45,0:02:14.14,Default,,0,0,0,,\NYou think you got them pegged as what\Nthey seemingly are but they really aren't.
+Dialogue: 0,0:02:14.25,0:02:14.25,Default,,0,0,0,,\N신하 중에서 전적으로\N신뢰할 수 있는 이가 있습니까?
+Dialogue: 0,0:02:14.25,0:02:17.66,Default,,0,0,0,,\NIs there someone in your service\Nwhom you trust completely?
+Dialogue: 0,0:02:17.78,0:02:17.78,Default,,0,0,0,,\N있네
+Dialogue: 0,0:02:17.78,0:02:18.84,Default,,0,0,0,,\NYes.
+Dialogue: 0,0:02:18.96,0:02:18.96,Default,,0,0,0,,\N"없다"라고 대답하셔야\N현명하신겁니다, 전하
+Dialogue: 0,0:02:18.96,0:02:21.27,Default,,0,0,0,,\NThe wiser answer was "no", my lord.
+Dialogue: 0,0:02:21.53,0:02:21.53,Default,,0,0,0,,\N이분법적인 선악의\N이야기가 아닙니다
+Dialogue: 0,0:02:21.53,0:02:23.63,Default,,0,0,0,,\NIt's not a good guys/bad guys story.
+Dialogue: 0,0:02:23.75,0:02:23.75,Default,,0,0,0,,\N모두가 나름의 가치를\N추구하고
+Dialogue: 0,0:02:23.75,0:02:26.40,Default,,0,0,0,,\NIt's a story where everybody is\Npursuing their own interests
+Dialogue: 0,0:02:26.52,0:02:26.52,Default,,0,0,0,,\N나름의 규칙을 따르면서
+Dialogue: 0,0:02:26.52,0:02:29.67,Default,,0,0,0,,\Nand everybody's following their own\Ncode and it's about those interests
+Dialogue: 0,0:02:29.79,0:02:29.79,Default,,0,0,0,,\N서로의 가치와 윤리가\N충돌하게 되는 이야기입니다
+Dialogue: 0,0:02:29.79,0:02:33.35,Default,,0,0,0,,\Nand those ethics coming into\Nconflict with each other.
+Dialogue: 0,0:02:33.47,0:02:33.47,Default,,0,0,0,,\N영웅이 악당을 물리치는 이야기보다
+Dialogue: 0,0:02:33.47,0:02:37.58,Default,,0,0,0,,\NAnd it provides a much richer story\Nthan the guys in white
+Dialogue: 0,0:02:38.58,0:02:38.58,Default,,0,0,0,,\Nby Psyence Fictionist\Npsyencefictionist@gmail.com
+Dialogue: 0,0:02:38.58,0:02:39.58,Default,,0,0,0,,\NSync by: honeybunny and Kerensky\Nwww.Addic7ed.com
diff --git a/tests/ref/fate/sub-srt-empty-events b/tests/ref/fate/sub-srt-empty-events
new file mode 100644
index 00000000..11af2da9
--- /dev/null
+++ b/tests/ref/fate/sub-srt-empty-events
@@ -0,0 +1,24 @@
+1
+00:00:51,940 --> 00:00:53,320
+我们期待一些事
+
+2
+00:00:53,340 --> 00:00:56,020
+比从前更好的事
+
+3
+00:00:56,140 --> 00:00:58,920
+我们期待的更多
+
+4
+00:00:59,540 --> 00:01:02,020
+你真以为藏到壁画后面
+
+5
+02:09:09,350 --> 02:09:10,850
+就失去你
+
+6
+02:11:34,350 --> 02:11:37,850
+（怀念我们的朋友“面具”小查尔斯·刘易斯）
+
diff --git a/tests/ref/fate/sub-srt-madness-timeshift b/tests/ref/fate/sub-srt-madness-timeshift
new file mode 100644
index 00000000..13a30859
--- /dev/null
+++ b/tests/ref/fate/sub-srt-madness-timeshift
@@ -0,0 +1,36 @@
+1
+00:00:04,251 --> 00:00:05,362
+okay, let's make things easy
+
+2
+00:00:05,160 --> 00:00:05,263
+31 i'm a number but the only payload so please keep me :)
+
+3
+00:00:06,473 --> 00:00:07,584
+hello
+5
+don't forget me.
+
+4
+00:00:08,695 --> 00:00:09,806
+no.
+let's add some fun
+
+5
+00:00:10,917 --> 00:00:12,028
+let's do it in reverse bc wtf not
+45 yes this is a number but i'm actually part of the sub
+
+6
+00:00:12,028 --> 00:00:13,139
+1
+0
+next is negative, not a chapnum ;)
+-1
+
+7
+00:00:13,241 --> 00:00:13,263
+credits
+2015
+
diff --git a/tests/ref/fate/sub-srt-rrn-remux b/tests/ref/fate/sub-srt-rrn-remux
new file mode 100644
index 00000000..1cb66d21
--- /dev/null
+++ b/tests/ref/fate/sub-srt-rrn-remux
@@ -0,0 +1,2113 @@
+1
+00:00:01,000 --> 00:00:04,074
+Subtitles downloaded from Podnapisi.NET
+
+2
+00:00:17,317 --> 00:00:19,551
+Wooldoor: who's it gonna
+be?! Who's it gonna be?!
+
+3
+00:00:19,552 --> 00:00:21,053
+Wheeeeee!
+
+4
+00:00:21,054 --> 00:00:23,122
+Wooldoor:
+the producers decided
+to surprise one of us
+
+5
+00:00:23,123 --> 00:00:24,990
+With a visit from
+a family member.
+
+6
+00:00:24,991 --> 00:00:26,191
+I hope it's someone
+from my family
+
+7
+00:00:26,192 --> 00:00:27,493
+Like flagfred cheesewheel,
+
+8
+00:00:27,494 --> 00:00:30,262
+Lintsue brickshade,
+or prom-queen dumpsterbaby.
+
+9
+00:00:30,263 --> 00:00:32,197
+[speaking japanese]
+
+10
+00:00:38,905 --> 00:00:41,407
+Look, everyone!
+Someone's coming!
+
+11
+00:00:41,408 --> 00:00:43,575
+Clara: the second I saw
+the horse and carriage,
+
+12
+00:00:43,576 --> 00:00:47,079
+I just knew it had
+to be someone from
+my kingdom.
+
+13
+00:00:47,080 --> 00:00:50,683
+But when I saw it was
+the short carriage,
+my heart sank.
+
+14
+00:00:50,684 --> 00:00:53,786
+Why?! Why did she
+have to come?!
+
+15
+00:00:55,221 --> 00:00:56,588
+Oh, fuck me.
+
+16
+00:00:56,589 --> 00:00:58,223
+[garbled speech]
+
+17
+00:00:58,224 --> 00:01:03,362
+Um, everyone, this is
+my special cousin bleh.
+
+18
+00:01:03,363 --> 00:01:04,530
+Foxxy: no one knew clara
+
+19
+00:01:04,531 --> 00:01:07,132
+Had a mentally-challenged
+cousin, but it was cool.
+
+20
+00:01:07,133 --> 00:01:10,269
+The foxxy five used to perform
+for special needs kids,
+
+21
+00:01:10,270 --> 00:01:14,306
+And those people got
+hearts as big as they
+oversized foreheads.
+
+22
+00:01:14,307 --> 00:01:18,610
+<i>"I am sam is a well-written
+and exceptionally well-acted
+tearjerker,"</i>
+
+23
+00:01:18,611 --> 00:01:21,213
+<i>Raves adam nayman
+of eye weekly.</i>
+
+24
+00:01:23,149 --> 00:01:25,250
+What the shoebazzle
+was that?
+
+25
+00:01:25,251 --> 00:01:29,755
+<i>She, um, only
+quotes reviews from
+the movie I am sam.</i>
+
+26
+00:01:29,756 --> 00:01:31,690
+Huh?
+Hmm.
+
+27
+00:01:31,691 --> 00:01:33,525
+Well, this has
+been fun. Bye!
+
+28
+00:01:35,195 --> 00:01:38,397
+Well, i've never seen
+someone that slow
+go that fast.
+
+29
+00:01:38,398 --> 00:01:40,132
+Oh, yeah! Give it up!
+
+30
+00:01:40,133 --> 00:01:41,066
+Yeah!
+
+31
+00:01:45,705 --> 00:01:47,639
+[speaking japanese]
+
+32
+00:01:55,215 --> 00:01:59,952
+Oh, poor ling-ling.
+He's so disappointed
+his father didn't show.
+
+33
+00:01:59,953 --> 00:02:01,387
+Oh!
+
+34
+00:02:01,388 --> 00:02:04,590
+Oh, sweetie,
+i'm so sorry your
+father's not coming.
+
+35
+00:02:04,591 --> 00:02:08,127
+We still love ya!
+Don't we, toot?!
+
+36
+00:02:08,128 --> 00:02:10,963
+Fine!
+
+37
+00:02:10,964 --> 00:02:12,197
+[kiss]
+
+38
+00:02:12,198 --> 00:02:16,035
+Ling-ling,
+you taste so weird.
+
+39
+00:02:17,470 --> 00:02:19,204
+[giggling]
+
+40
+00:02:19,205 --> 00:02:21,140
+[giggling]
+
+41
+00:02:22,208 --> 00:02:25,177
+Let's go, bleh.
+The coast is clear.
+
+42
+00:02:25,178 --> 00:02:27,746
+But after this,
+no more potty breaks.
+
+43
+00:02:27,747 --> 00:02:30,082
+<i>"I am sam plays like
+a made-for-tv weep--"</i>
+
+44
+00:02:30,083 --> 00:02:31,316
+Shh!
+
+45
+00:02:31,317 --> 00:02:33,786
+I know it seemed
+like I was ashamed
+of my cousin,
+
+46
+00:02:33,787 --> 00:02:35,921
+But nothing could be
+further from the truth.
+
+47
+00:02:35,922 --> 00:02:39,591
+Aah!
+Foxxy:
+hey, clara!
+
+48
+00:02:39,592 --> 00:02:41,060
+What's up, girlfriend?
+
+49
+00:02:41,061 --> 00:02:43,062
+You know,
+none of us have
+really had a chance
+
+50
+00:02:43,063 --> 00:02:44,763
+To hang out
+with bleh yet.
+
+51
+00:02:44,764 --> 00:02:47,232
+Bleh? What's a bleh?
+
+52
+00:02:47,233 --> 00:02:49,835
+Is that one of your
+jive words like
+"emancipation"?
+
+53
+00:02:49,836 --> 00:02:51,603
+No, bleh your cousin.
+
+54
+00:02:51,604 --> 00:02:55,808
+Uh, clara?
+Why is the lamp drooling?
+
+55
+00:02:55,809 --> 00:02:59,044
+I guess to remind us
+to conserve electricity.
+
+56
+00:02:59,045 --> 00:03:03,248
+Clara, there is
+no reason to be
+embarrassed by your cousin.
+
+57
+00:03:03,249 --> 00:03:06,085
+Embarrassed?
+I'm not embarrassed.
+
+58
+00:03:06,086 --> 00:03:09,321
+Stop pretending
+bleh is a lamp.
+
+59
+00:03:09,322 --> 00:03:11,824
+She is not a lamp,
+clara.
+
+60
+00:03:11,825 --> 00:03:15,160
+She is a real person
+with real feelings.
+
+61
+00:03:15,161 --> 00:03:17,096
+Foxxy was probably right.
+
+62
+00:03:17,097 --> 00:03:18,864
+Damn that ms. Know-it-all!
+
+63
+00:03:18,865 --> 00:03:21,400
+I should have killed her
+when I had the chance.
+
+64
+00:03:21,401 --> 00:03:22,835
+Give me your hand!
+
+65
+00:03:22,836 --> 00:03:24,603
+First, give me the ring!
+
+66
+00:03:24,604 --> 00:03:27,206
+[kisses]
+
+67
+00:03:27,207 --> 00:03:29,208
+Xandir:
+it just didn't make sense.
+
+68
+00:03:29,209 --> 00:03:30,709
+When we licked ling-ling
+last time,
+
+69
+00:03:30,710 --> 00:03:31,877
+We got all [bleep]ed up.
+
+70
+00:03:31,878 --> 00:03:33,479
+But this time, nothing happened.
+
+71
+00:03:33,480 --> 00:03:35,047
+Well, we got a little aroused,
+
+72
+00:03:35,048 --> 00:03:36,982
+But not [bleep]ed up.
+
+73
+00:03:36,983 --> 00:03:38,517
+Uhh! What's the deal-e-o?
+
+74
+00:03:38,518 --> 00:03:40,085
+I don't feel
+anything. You?
+
+75
+00:03:40,086 --> 00:03:41,086
+Nothing.
+
+76
+00:03:41,087 --> 00:03:42,755
+[kisses]
+
+77
+00:03:42,756 --> 00:03:45,858
+Hmm. Maybe we have to
+eat ling-ling.
+
+78
+00:03:45,859 --> 00:03:47,326
+Why is it when
+something doesn't work,
+
+79
+00:03:47,327 --> 00:03:49,228
+Your first reaction
+is to eat it?
+
+80
+00:03:49,229 --> 00:03:52,631
+What?! What
+are you doing?
+
+81
+00:03:52,632 --> 00:03:55,067
+I couldn't find
+the remote.
+
+82
+00:03:55,068 --> 00:03:56,735
+You know,
+there is one person
+
+83
+00:03:56,736 --> 00:03:58,203
+Who can figure this out.
+
+84
+00:03:58,204 --> 00:03:59,705
+The professor!
+The professor!
+
+85
+00:03:59,706 --> 00:04:01,674
+[hums]
+
+86
+00:04:01,675 --> 00:04:02,675
+Ooh!
+
+87
+00:04:02,676 --> 00:04:03,876
+Aha!
+[whistle blows]
+
+88
+00:04:03,877 --> 00:04:05,577
+Got it.
+Hit the lights.
+
+89
+00:04:09,416 --> 00:04:11,717
+Here you see
+the african camel toad.
+
+90
+00:04:11,718 --> 00:04:13,118
+It secretes
+a hallucinogen
+
+91
+00:04:13,119 --> 00:04:14,153
+Whenever it's scared.
+
+92
+00:04:14,154 --> 00:04:15,320
+Ahh!
+
+93
+00:04:15,321 --> 00:04:17,423
+Yeah, that's--oh,
+I didn't know that.
+
+94
+00:04:17,424 --> 00:04:21,360
+When "cool" kids feel
+like catching a "buzz,"
+
+95
+00:04:21,361 --> 00:04:25,064
+They scare the amphibian
+and then lick its skin.
+
+96
+00:04:25,065 --> 00:04:27,166
+All the cool kids,
+you say?
+
+97
+00:04:27,167 --> 00:04:31,036
+Now take ling-ling,
+the asian trading card
+battle monster.
+
+98
+00:04:31,037 --> 00:04:32,638
+It creates a similar
+hallucinogen
+
+99
+00:04:32,639 --> 00:04:34,807
+Whenever it's
+disappointed.
+
+100
+00:04:34,808 --> 00:04:37,443
+Ahh!
+Oh, I want to
+be cool!
+
+101
+00:04:37,444 --> 00:04:40,946
+So, ling-ling
+was experiencing
+disappointment
+
+102
+00:04:40,947 --> 00:04:42,614
+Due to its
+father's absence,
+
+103
+00:04:42,615 --> 00:04:44,183
+Then, upon kissing
+ling-ling,
+
+104
+00:04:44,184 --> 00:04:47,186
+You became...[wacky
+noises] technically
+speaking.
+
+105
+00:04:47,187 --> 00:04:53,692
+So, only when ling-ling
+is disappointed, eh? Hmm.
+
+106
+00:04:55,662 --> 00:04:59,932
+[screaming]
+[alarm]
+[siren]
+
+107
+00:04:59,933 --> 00:05:02,067
+[bagpipes playing]
+
+108
+00:05:02,068 --> 00:05:03,635
+[harps playing]
+
+109
+00:05:03,636 --> 00:05:05,938
+[crickets chirping]
+
+110
+00:05:05,939 --> 00:05:09,942
+Oh! Don't look,
+but corky at 3:00.
+
+111
+00:05:09,943 --> 00:05:13,045
+Foxxy: it looked like I got
+through to the princess.
+
+112
+00:05:13,046 --> 00:05:15,881
+Clara would see that
+there ain't no reason
+to be ashamed of bleh.
+
+113
+00:05:15,882 --> 00:05:17,316
+We all adults.
+
+114
+00:05:17,317 --> 00:05:19,985
+Dude, let's egg
+the trainable.
+
+115
+00:05:19,986 --> 00:05:21,854
+Mind if we
+join you guys?
+
+116
+00:05:21,855 --> 00:05:24,189
+No, no, no.
+Of course not.
+
+117
+00:05:24,190 --> 00:05:25,891
+Don't be stupid.
+
+118
+00:05:25,892 --> 00:05:28,027
+[both snicker]
+
+119
+00:05:43,176 --> 00:05:44,543
+[eggshells crack]
+
+120
+00:05:48,915 --> 00:05:52,217
+Man, clara's cousin
+is so hot!
+
+121
+00:05:52,218 --> 00:05:55,454
+<i>Damn! She's like
+retarded hot!</i>
+
+122
+00:06:01,127 --> 00:06:03,595
+[whimpers] sam! Aah!
+
+123
+00:06:05,432 --> 00:06:06,365
+Clara: i'm really glad
+I brought bleh
+
+124
+00:06:06,366 --> 00:06:08,367
+Down to hang out
+with everyone.
+
+125
+00:06:08,368 --> 00:06:10,803
+They treated her
+like she was one
+of the gang.
+
+126
+00:06:10,804 --> 00:06:12,771
+I hadn't seen bleh
+have this much fun
+
+127
+00:06:12,772 --> 00:06:15,207
+<i>Since they cancelled
+the pretty, shiny
+object show.</i>
+
+128
+00:06:15,208 --> 00:06:18,210
+You sure you don't
+want to stay and
+play a few rounds?
+
+129
+00:06:18,211 --> 00:06:21,280
+Nah, it's getting late,
+and we don't want to
+intrude on guy time.
+
+130
+00:06:21,281 --> 00:06:22,648
+'night!
+
+131
+00:06:22,649 --> 00:06:25,751
+<i>"I am sam works magic
+'cause of penn,"</i>
+
+132
+00:06:25,752 --> 00:06:28,153
+<i>Raves lou lumenick
+of the new york post.</i>
+
+133
+00:06:28,154 --> 00:06:30,022
+I go first.
+Whee!
+
+134
+00:06:30,023 --> 00:06:32,291
+Holy shit!
+
+135
+00:06:32,292 --> 00:06:34,293
+She is so hot!
+
+136
+00:06:34,294 --> 00:06:36,161
+Oh, spanky.
+
+137
+00:06:36,162 --> 00:06:38,564
+What? Oh. Ok.
+
+138
+00:06:38,565 --> 00:06:39,498
+[kiss]
+
+139
+00:06:40,700 --> 00:06:42,568
+Oh, that bleh, man.
+
+140
+00:06:42,569 --> 00:06:45,037
+I would totally dip
+my wick in that.
+
+141
+00:06:45,038 --> 00:06:47,906
+Listen, dude. No matter
+how hot she is,
+
+142
+00:06:47,907 --> 00:06:50,009
+You would not
+punch holes in a sped.
+
+143
+00:06:50,010 --> 00:06:51,210
+Oh, hell,
+yeah, I would.
+
+144
+00:06:51,211 --> 00:06:52,544
+I mean, she's
+the perfect girl.
+
+145
+00:06:52,545 --> 00:06:54,480
+All the sweater meat
+of a regular chick
+
+146
+00:06:54,481 --> 00:06:56,181
+And half
+the pillow talk.
+
+147
+00:06:58,385 --> 00:06:59,752
+[kiss]
+
+148
+00:06:59,753 --> 00:07:01,987
+Man, you're all talk.
+
+149
+00:07:01,988 --> 00:07:04,757
+Look, I got 20 large
+that says you won't
+sleep with her.
+
+150
+00:07:04,758 --> 00:07:07,659
+Make it 50
+and you're on.
+
+151
+00:07:07,660 --> 00:07:10,262
+Oh, ha ha!
+Wheeeeeee!
+
+152
+00:07:10,263 --> 00:07:13,198
+Hey, if you're gonna
+be gay about this,
+you can't play.
+
+153
+00:07:13,199 --> 00:07:14,333
+Sorry.
+
+154
+00:07:15,402 --> 00:07:16,635
+[kiss]
+
+155
+00:07:16,636 --> 00:07:20,205
+Fine. 50 bucks says you
+won't have sex with bleh.
+
+156
+00:07:20,206 --> 00:07:22,207
+Easy money.
+
+157
+00:07:22,208 --> 00:07:24,076
+Captain hero is
+gonna show you guys
+
+158
+00:07:24,077 --> 00:07:26,679
+What being a real man
+is all about.
+
+159
+00:07:26,680 --> 00:07:28,681
+All: triple kiss!
+
+160
+00:07:28,682 --> 00:07:30,516
+[kisses]
+
+161
+00:07:37,657 --> 00:07:41,293
+[snarls]
+
+162
+00:07:41,294 --> 00:07:43,228
+[speaking japanese]
+
+163
+00:07:48,601 --> 00:07:52,571
+¶ ling-ling
+into battle go ¶
+
+164
+00:07:52,572 --> 00:07:55,140
+¶ fulfill destiny
+of the soul ¶
+
+165
+00:07:55,141 --> 00:07:56,542
+¶ all the children sing ¶
+
+166
+00:07:56,543 --> 00:08:00,045
+Kids: ¶ kill,
+kill, kill, kill,
+die, die, die ¶
+
+167
+00:08:00,046 --> 00:08:01,580
+¶ kill, kill,
+kill, kill... ¶
+
+168
+00:08:01,581 --> 00:08:04,583
+Wait, ling-ling.
+It's just us.
+
+169
+00:08:04,584 --> 00:08:08,087
+We're not really
+a 3-headed, acid-spitting
+needle monster.
+
+170
+00:08:08,088 --> 00:08:11,190
+[speaking japanese]
+
+171
+00:08:11,191 --> 00:08:12,825
+Oh.
+
+172
+00:08:12,826 --> 00:08:14,960
+Oh, no, ling-ling.
+
+173
+00:08:14,961 --> 00:08:16,228
+Instead of
+cheering you up,
+
+174
+00:08:16,229 --> 00:08:21,367
+We accidentally
+disappointed you.
+
+175
+00:08:21,368 --> 00:08:24,069
+Oh, poor baby.
+
+176
+00:08:24,070 --> 00:08:25,471
+Triple kiss!
+
+177
+00:08:25,472 --> 00:08:27,373
+Oh! Hee hee!
+[kisses]
+
+178
+00:08:27,374 --> 00:08:31,310
+[sighing and giggling]
+
+179
+00:08:38,184 --> 00:08:39,585
+[knocks]
+
+180
+00:08:39,586 --> 00:08:41,420
+Clara:
+I found it a bit odd
+
+181
+00:08:41,421 --> 00:08:44,023
+That captain hero
+wanted to take bleh
+out on a date,
+
+182
+00:08:44,024 --> 00:08:45,691
+And it was
+my responsibility
+
+183
+00:08:45,692 --> 00:08:48,127
+To make sure his
+intentions were pure.
+
+184
+00:08:48,128 --> 00:08:50,329
+So, captain hero, is it?
+
+185
+00:08:50,330 --> 00:08:52,464
+Oh. Heh heh! Yeah?
+
+186
+00:08:52,465 --> 00:08:53,966
+Coming to take
+my little bleh out
+
+187
+00:08:53,967 --> 00:08:55,634
+For a night
+on the town, eh?
+
+188
+00:08:55,635 --> 00:08:57,770
+You know, I don't
+know what'd I do
+
+189
+00:08:57,771 --> 00:08:59,805
+If anything happened
+to my little bleh.
+
+190
+00:08:59,806 --> 00:09:02,341
+Yes, ma'am.
+I mean, no, ma'am.
+
+191
+00:09:02,342 --> 00:09:04,943
+Son, do you know
+what it's like
+
+192
+00:09:04,944 --> 00:09:08,147
+To kill a man with
+your bare hands?
+
+193
+00:09:08,148 --> 00:09:09,748
+I do.
+
+194
+00:09:09,749 --> 00:09:10,883
+I...
+
+195
+00:09:10,884 --> 00:09:12,151
+Oh!
+
+196
+00:09:12,152 --> 00:09:13,986
+Here's bleh now!
+
+197
+00:09:13,987 --> 00:09:16,155
+¶ there she is... ¶
+
+198
+00:09:16,156 --> 00:09:19,825
+Wow! You look...
+Stunning.
+
+199
+00:09:19,826 --> 00:09:25,497
+<i>"I am sam's dakota
+fanning is worth the
+price of admission."</i>
+
+200
+00:09:25,498 --> 00:09:27,700
+[blow dart]
+
+201
+00:09:27,701 --> 00:09:29,501
+Ow! What the hell?!
+
+202
+00:09:29,502 --> 00:09:31,904
+A little
+added protection.
+
+203
+00:09:31,905 --> 00:09:34,807
+You'll get the antidote
+when I get my bleh back.
+
+204
+00:09:34,808 --> 00:09:39,311
+[all giggling]
+
+205
+00:09:39,312 --> 00:09:42,748
+Hey, ling-ling,
+you excited for
+christmas?
+
+206
+00:09:42,749 --> 00:09:45,317
+Too bad there's
+no such thing
+as santa claus!
+
+207
+00:09:45,318 --> 00:09:46,985
+I bet you're
+disappointed.
+
+208
+00:09:46,986 --> 00:09:48,153
+Oh.
+
+209
+00:09:48,154 --> 00:09:50,889
+[laughs]
+
+210
+00:09:50,890 --> 00:09:53,659
+Hey, look what
+I found in your ear!
+
+211
+00:09:53,660 --> 00:09:55,427
+Is it a quarter?
+
+212
+00:09:55,428 --> 00:09:58,330
+Oh, no!
+It's a tumor!
+
+213
+00:09:58,331 --> 00:09:59,765
+Oh.
+
+214
+00:09:59,766 --> 00:10:02,601
+[slurps]
+
+215
+00:10:02,602 --> 00:10:04,370
+[giggles]
+
+216
+00:10:04,371 --> 00:10:09,008
+Hey, ling-ling,
+you excited for
+christmas?
+
+217
+00:10:09,009 --> 00:10:11,443
+Oh, no!
+It's a tumor!
+
+218
+00:10:11,444 --> 00:10:12,845
+Oh.
+
+219
+00:10:12,846 --> 00:10:16,181
+[slurps]
+
+220
+00:10:16,182 --> 00:10:19,418
+¶ girly, girly,
+girly, girly... ¶
+
+221
+00:10:19,419 --> 00:10:23,689
+[bleh shouts happily
+and incoherently]
+
+222
+00:10:23,690 --> 00:10:25,357
+Captain hero:
+yes, I was confident
+
+223
+00:10:25,358 --> 00:10:28,327
+That by the end of the
+night, i'd have another
+notch on my utility belt
+
+224
+00:10:28,328 --> 00:10:30,095
+And 50 bucks in my pocket.
+
+225
+00:10:30,096 --> 00:10:33,565
+Then I realized
+something. Bleh...
+
+226
+00:10:33,566 --> 00:10:35,034
+She was really special,
+
+227
+00:10:35,035 --> 00:10:36,635
+But not like in
+a retarded way,
+
+228
+00:10:36,636 --> 00:10:39,004
+In a traditionally
+special way.
+
+229
+00:10:39,005 --> 00:10:44,943
+¶ did you ever know
+that you're my hero? ¶
+
+230
+00:10:44,944 --> 00:10:46,245
+Oh!
+
+231
+00:10:46,246 --> 00:10:47,546
+Ok!
+
+232
+00:10:47,547 --> 00:10:53,352
+¶ you're everything
+I wish I could be ¶
+
+233
+00:10:53,353 --> 00:10:56,088
+¶ I could fly higher
+than an eagle ¶
+
+234
+00:10:56,089 --> 00:10:57,556
+[both giggle]
+
+235
+00:10:57,557 --> 00:10:59,558
+Sam.
+
+236
+00:10:59,559 --> 00:11:02,761
+I don't like
+these things.
+They scare me!
+
+237
+00:11:02,762 --> 00:11:05,631
+¶ beneath my wings ¶
+
+238
+00:11:05,632 --> 00:11:09,468
+Eeeeeeh! Aah! Sam!
+¶ oh, oh, fly ¶
+
+239
+00:11:09,469 --> 00:11:14,340
+¶ so high
+against the sky ¶
+
+240
+00:11:14,341 --> 00:11:16,942
+¶ so high I almost... ¶
+
+241
+00:11:16,943 --> 00:11:20,646
+Bleh, you are the wind
+beneath my wings.
+
+242
+00:11:20,647 --> 00:11:22,681
+"contrived, manipulative,
+
+243
+00:11:22,682 --> 00:11:28,721
+<i>And shamelessly
+sentimental," raves peter
+travers from rolling stone.</i>
+
+244
+00:11:34,627 --> 00:11:37,596
+Hey, ling-ling,
+I got a penny.
+No, I don't.
+
+245
+00:11:37,597 --> 00:11:39,198
+[licks dry fur]
+
+246
+00:11:39,199 --> 00:11:40,866
+[rattles]
+
+247
+00:11:40,867 --> 00:11:41,867
+[bam bam]
+
+248
+00:11:41,868 --> 00:11:43,702
+[licks dry fur]
+
+249
+00:11:43,703 --> 00:11:46,138
+Whoa, guys. Guys!
+
+250
+00:11:46,139 --> 00:11:47,840
+Ling-ling is
+totally kicked!
+
+251
+00:11:47,841 --> 00:11:48,841
+What?!
+
+252
+00:11:48,842 --> 00:11:50,209
+[pants]
+
+253
+00:11:50,210 --> 00:11:52,978
+[licks dry fur]
+
+254
+00:11:52,979 --> 00:11:54,747
+Don't hold out
+on me, man.
+
+255
+00:11:54,748 --> 00:11:57,449
+I need my fix.
+Come on! Please,
+man, come on!
+
+256
+00:11:57,450 --> 00:11:59,618
+What do you want?
+I'll do anything!
+
+257
+00:11:59,619 --> 00:12:01,253
+I'll suck your dick!
+
+258
+00:12:03,156 --> 00:12:04,556
+Ooh! Ooh, sam!
+
+259
+00:12:04,557 --> 00:12:05,991
+Hee hee!
+Shh!
+
+260
+00:12:05,992 --> 00:12:08,861
+Are you crazy?
+They'll hear us!
+
+261
+00:12:08,862 --> 00:12:11,764
+Captain hero:
+bleh and I got home way
+past midnight.
+
+262
+00:12:11,765 --> 00:12:15,701
+I was like, we are gonna
+get in so much trouble!
+
+263
+00:12:15,702 --> 00:12:19,672
+So, this was
+really fantastic.
+
+264
+00:12:19,673 --> 00:12:23,842
+Um, I,
+uh...Good night.
+
+265
+00:12:23,843 --> 00:12:24,777
+[kiss]
+
+266
+00:12:25,879 --> 00:12:27,513
+¶ girly, girly,
+girly, girly... ¶
+
+267
+00:12:27,514 --> 00:12:29,415
+Ahh!
+
+268
+00:12:29,416 --> 00:12:32,951
+Hey, super stud.
+Did you sleep
+with her?
+
+269
+00:12:32,952 --> 00:12:34,453
+Dude, back off!
+
+270
+00:12:34,454 --> 00:12:36,922
+I did not, as you
+so crudely put it...
+
+271
+00:12:36,923 --> 00:12:38,857
+"nail her
+in the stink tube."
+
+272
+00:12:38,858 --> 00:12:40,592
+It's not like that.
+
+273
+00:12:40,593 --> 00:12:42,961
+So you kids are
+taking it slow?
+
+274
+00:12:42,962 --> 00:12:43,996
+See what I did
+there? I--
+
+275
+00:12:43,997 --> 00:12:45,998
+You just don't get it,
+spanky.
+
+276
+00:12:45,999 --> 00:12:47,299
+Nobody gets it!
+
+277
+00:12:47,300 --> 00:12:48,467
+[wails]
+
+278
+00:12:48,468 --> 00:12:50,069
+[door slams]
+
+279
+00:12:50,070 --> 00:12:51,470
+Captain hero:
+I was lying in bed,
+
+280
+00:12:51,471 --> 00:12:54,273
+Replaying the date over
+and over in my head,
+
+281
+00:12:54,274 --> 00:12:56,842
+When there was a knock
+at the door.
+[knocks]
+
+282
+00:12:56,843 --> 00:12:57,776
+Who's there?
+
+283
+00:12:58,845 --> 00:13:01,080
+Bleh! What are you--
+
+284
+00:13:02,415 --> 00:13:04,983
+Listen, bleh,
+i'm not that kind of guy.
+
+285
+00:13:04,984 --> 00:13:05,984
+[pulls pants down]
+
+286
+00:13:05,985 --> 00:13:08,153
+Oh, my!
+
+287
+00:13:08,154 --> 00:13:12,057
+Oh, no, no, no...
+
+288
+00:13:12,058 --> 00:13:14,293
+Leave the helmet on.
+
+289
+00:13:17,330 --> 00:13:18,697
+Ahh.
+
+290
+00:13:18,698 --> 00:13:21,433
+Clara: that morning,
+I woke up feeling great.
+
+291
+00:13:21,434 --> 00:13:24,269
+Everyone had accepted
+me and my cousin,
+
+292
+00:13:24,270 --> 00:13:25,971
+And that was swell.
+
+293
+00:13:25,972 --> 00:13:29,875
+Oh, what a glorious day,
+isn't it, bleh?
+
+294
+00:13:29,876 --> 00:13:31,877
+Bleh?
+
+295
+00:13:31,878 --> 00:13:33,612
+Bleh?!
+
+296
+00:13:33,613 --> 00:13:37,683
+Captain hero!
+Clara, it's not
+what you think!
+
+297
+00:13:37,684 --> 00:13:38,684
+[gibberish]
+
+298
+00:13:38,685 --> 00:13:40,552
+Get up! Get up
+right now!
+
+299
+00:13:40,553 --> 00:13:43,856
+<i>"I am sam reduces penn to
+a mugging embarrassment,"</i>
+
+300
+00:13:43,857 --> 00:13:46,091
+<i>Raves mike clark
+of usa today.</i>
+
+301
+00:13:46,092 --> 00:13:47,860
+I don't care.
+Let's go!
+
+302
+00:13:47,861 --> 00:13:49,561
+Yeeeh...
+
+303
+00:13:49,562 --> 00:13:52,297
+Go back to your
+cage right now!
+
+304
+00:13:52,298 --> 00:13:55,434
+Spanky: well,
+look who's doing
+the limp of shame!
+
+305
+00:13:55,435 --> 00:13:57,169
+You bastard!
+
+306
+00:13:57,170 --> 00:13:59,338
+Captain hero:
+clara totally overreacted.
+
+307
+00:13:59,339 --> 00:14:00,639
+I mean, bleh was an adult,
+
+308
+00:14:00,640 --> 00:14:02,374
+And completely capable
+of making her own decisions.
+
+309
+00:14:02,375 --> 00:14:03,809
+Or not.
+
+310
+00:14:03,810 --> 00:14:06,945
+Still, I needed to
+sit clara down and
+explain what happened.
+
+311
+00:14:06,946 --> 00:14:09,948
+Clara, I never expected
+this to happen.
+
+312
+00:14:09,949 --> 00:14:14,720
+It's just that I have
+real feelings for her.
+
+313
+00:14:14,721 --> 00:14:17,389
+Well...
+
+314
+00:14:17,390 --> 00:14:20,025
+¶ there she is ¶
+
+315
+00:14:20,026 --> 00:14:23,295
+¶ she's a girl ¶
+
+316
+00:14:23,296 --> 00:14:25,497
+If that is the truth--
+
+317
+00:14:25,498 --> 00:14:26,932
+It is.
+
+318
+00:14:26,933 --> 00:14:28,967
+Then i'm truly happy
+for the two of you.
+
+319
+00:14:28,968 --> 00:14:31,170
+You're a good man,
+captain hero.
+
+320
+00:14:31,171 --> 00:14:33,005
+I knew I could
+trust you.
+
+321
+00:14:33,006 --> 00:14:35,107
+Here's the antidote.
+
+322
+00:14:35,108 --> 00:14:36,342
+[gulp]
+
+323
+00:14:36,343 --> 00:14:38,977
+Well, nice work,
+captain hero.
+
+324
+00:14:38,978 --> 00:14:41,980
+Here's the $50
+I bet you to have
+sex with bleh.
+
+325
+00:14:41,981 --> 00:14:42,981
+[gasps]
+
+326
+00:14:42,982 --> 00:14:44,049
+Yeeeeeh!
+
+327
+00:14:44,050 --> 00:14:45,184
+Oh, um, I mean, uh...
+
+328
+00:14:45,185 --> 00:14:49,655
+Here is the $50
+captain hero won off me
+
+329
+00:14:49,656 --> 00:14:52,291
+Because I bet him
+he would not have sex
+
+330
+00:14:52,292 --> 00:14:53,926
+With your
+special cousin bleh.
+
+331
+00:14:53,927 --> 00:14:56,128
+But he did, in fact,
+have sex with her.
+
+332
+00:14:56,129 --> 00:14:58,564
+Oh, yes, he did.
+For $50.
+
+333
+00:14:58,565 --> 00:15:01,133
+Oh, man. Saved it.
+
+334
+00:15:01,134 --> 00:15:07,539
+Captain hero, I forbid you
+to see my cousin ever again!
+
+335
+00:15:07,540 --> 00:15:08,774
+Uhh!
+
+336
+00:15:08,775 --> 00:15:10,676
+I would have been
+more pissed at spanky,
+
+337
+00:15:10,677 --> 00:15:13,679
+But you know,
+I just won $50!
+
+338
+00:15:13,680 --> 00:15:16,382
+[cash register bell]
+ah! Who's your daddy?
+
+339
+00:15:16,383 --> 00:15:19,485
+Ooh, I like the big one
+and that one over there
+
+340
+00:15:19,486 --> 00:15:20,986
+And...Ooh!
+
+341
+00:15:20,987 --> 00:15:25,057
+Yeah. Hell, yeah!
+[girls talking and laughing]
+
+342
+00:15:25,058 --> 00:15:26,925
+Ehhhh.
+
+343
+00:15:26,926 --> 00:15:28,394
+Ehhhh.
+
+344
+00:15:28,395 --> 00:15:30,362
+Ehhhh.
+
+345
+00:15:30,363 --> 00:15:32,164
+Oh, look at
+poor ling-ling.
+
+346
+00:15:32,165 --> 00:15:33,932
+We've licked
+the life out of it.
+
+347
+00:15:33,933 --> 00:15:35,234
+What should we do?
+
+348
+00:15:35,235 --> 00:15:36,235
+Poke it harder.
+
+349
+00:15:36,236 --> 00:15:37,236
+Uhh!
+
+350
+00:15:37,237 --> 00:15:38,237
+[gasps]
+
+351
+00:15:38,238 --> 00:15:39,571
+Look at me!
+
+352
+00:15:39,572 --> 00:15:42,608
+I've become the very
+thing I hate most.
+
+353
+00:15:42,609 --> 00:15:44,576
+A guy who pokes things
+with sticks?
+
+354
+00:15:44,577 --> 00:15:49,181
+Oh, we've abused ling-ling
+to the point where the
+little guy is just numb.
+
+355
+00:15:49,182 --> 00:15:50,649
+And for what?
+
+356
+00:15:50,650 --> 00:15:52,551
+To catch a buzz?!
+
+357
+00:15:52,552 --> 00:15:55,654
+I guess being cool
+just isn't worth it.
+
+358
+00:15:55,655 --> 00:15:59,024
+["what you already know"
+jingle plays]
+
+359
+00:15:59,025 --> 00:16:01,293
+Come on. We've got to
+make this right!
+
+360
+00:16:02,529 --> 00:16:04,396
+Sorry, wooldoor.
+You stay here
+
+361
+00:16:04,397 --> 00:16:06,899
+Until all that ling-ling
+is out of your system.
+
+362
+00:16:06,900 --> 00:16:10,135
+It's gonna be
+the hardest thing
+you've ever done.
+
+363
+00:16:10,136 --> 00:16:11,136
+Toodles!
+
+364
+00:16:11,137 --> 00:16:13,405
+[slam]
+
+365
+00:16:13,406 --> 00:16:15,474
+Aah! I can't take it!
+
+366
+00:16:15,475 --> 00:16:18,477
+[wacky noises]
+
+367
+00:16:21,047 --> 00:16:24,416
+Ha ha ha ha ha ha!
+Wooldoor: I can't take it!
+
+368
+00:16:24,417 --> 00:16:26,285
+Aaaaaaaaaaaaaaaah!
+
+369
+00:16:26,286 --> 00:16:29,154
+Captain hero:
+oh, sure, the money
+helped dull the pain,
+
+370
+00:16:29,155 --> 00:16:33,692
+But later, you know, when
+the stores are all closed,
+
+371
+00:16:33,693 --> 00:16:35,294
+Who do I have
+to share it with?
+
+372
+00:16:35,295 --> 00:16:41,433
+Bleh? No. Not anymore.
+Not anymore.
+
+373
+00:16:41,434 --> 00:16:43,635
+It just wasn't
+worth it, spanky!
+
+374
+00:16:43,636 --> 00:16:46,705
+Bleh, she--
+she was special.
+
+375
+00:16:46,706 --> 00:16:48,807
+What are you--
+what are you saying?
+
+376
+00:16:48,808 --> 00:16:52,678
+Oh, spanky,
+I love her.
+
+377
+00:16:53,880 --> 00:16:59,018
+Then go to her.
+
+378
+00:16:59,019 --> 00:17:00,719
+Xandir:
+we knew what we had done
+to ling-ling was wrong.
+
+379
+00:17:00,720 --> 00:17:02,488
+Hopefully, this was gonna
+
+380
+00:17:02,489 --> 00:17:03,622
+Make it up to that lovable,
+
+381
+00:17:03,623 --> 00:17:06,258
+Fortune cookie cat thing.
+
+382
+00:17:06,259 --> 00:17:11,096
+Ling-ling, there's
+a special warrior
+here to see you.
+
+383
+00:17:11,097 --> 00:17:15,567
+[japanese song playing]
+
+384
+00:17:15,568 --> 00:17:17,503
+[speaking japanese]
+
+385
+00:17:19,739 --> 00:17:21,674
+[speaking japanese]
+
+386
+00:17:33,086 --> 00:17:35,654
+Toot:
+sure, we have our issues,
+
+387
+00:17:35,655 --> 00:17:39,224
+But really, this is
+a house drawn together
+
+388
+00:17:39,225 --> 00:17:40,659
+With love.
+
+389
+00:17:40,660 --> 00:17:43,595
+[speaking japanese]
+
+390
+00:18:05,752 --> 00:18:07,753
+Yah!
+Uhh!
+
+391
+00:18:07,754 --> 00:18:10,189
+[slurping]
+
+392
+00:18:10,190 --> 00:18:12,991
+All right, clara,
+where is she?!
+
+393
+00:18:12,992 --> 00:18:15,828
+Hah! You're too late,
+captain jerko!
+
+394
+00:18:15,829 --> 00:18:18,630
+She left for the front door
+well over 2 minutes ago!
+
+395
+00:18:18,631 --> 00:18:21,066
+You'll never catch her!
+Never!
+
+396
+00:18:21,067 --> 00:18:23,202
+Oh. Oh, well.
+
+397
+00:18:23,203 --> 00:18:24,903
+I guess it wasn't
+meant to be.
+
+398
+00:18:24,904 --> 00:18:26,939
+Easy come, easy go.
+
+399
+00:18:26,940 --> 00:18:28,807
+Well, i'll be
+seeing you, clara.
+
+400
+00:18:28,808 --> 00:18:31,143
+Wait! Damn you!
+I've got to try!
+
+401
+00:18:31,144 --> 00:18:32,578
+Yahh!
+
+402
+00:18:34,447 --> 00:18:36,448
+Captain hero: bleh!
+Wait! I'm coming!
+
+403
+00:18:36,449 --> 00:18:37,950
+Dude, where are you--uhh!
+
+404
+00:18:37,951 --> 00:18:39,518
+Watch it, pig!
+
+405
+00:18:39,519 --> 00:18:41,086
+Go get her, man.
+
+406
+00:18:41,087 --> 00:18:42,087
+Uhh!
+
+407
+00:18:42,088 --> 00:18:43,355
+Watch it,
+hot black girl!
+
+408
+00:18:43,356 --> 00:18:45,157
+Go get her, man.
+
+409
+00:18:45,158 --> 00:18:49,895
+[muzak playing]
+
+410
+00:18:49,896 --> 00:18:56,168
+[blissful sighs and moans]
+
+411
+00:18:56,169 --> 00:18:57,770
+Oh, come on!
+
+412
+00:19:00,206 --> 00:19:05,444
+Bleh! Bleh!
+Bleh! Bleh!
+
+413
+00:19:05,445 --> 00:19:06,812
+Oh.
+
+414
+00:19:06,813 --> 00:19:08,747
+Bleh! Bleh, wait!
+
+415
+00:19:08,748 --> 00:19:11,350
+Listen, I don't know
+what clara told you,
+
+416
+00:19:11,351 --> 00:19:14,453
+But I love you.
+
+417
+00:19:14,454 --> 00:19:16,555
+Yehhhhhhhh.
+
+418
+00:19:18,458 --> 00:19:20,192
+[splat]
+
+419
+00:19:20,193 --> 00:19:22,127
+"one of the year's
+10 best!"
+
+420
+00:19:22,128 --> 00:19:25,631
+"delivers in ways
+you never expected!"
+
+421
+00:19:25,632 --> 00:19:29,134
+Yes, well, maybe
+it wouldn't work out.
+
+422
+00:19:29,135 --> 00:19:31,637
+I mean, we come
+from different worlds.
+
+423
+00:19:31,638 --> 00:19:33,772
+I come from
+planet zebulon
+
+424
+00:19:33,773 --> 00:19:36,909
+And you come from
+a mom who drank when
+she was pregnant.
+
+425
+00:19:36,910 --> 00:19:40,446
+Well, farewell,
+my sweet.
+
+426
+00:19:40,447 --> 00:19:41,947
+Mwah!
+
+427
+00:19:41,948 --> 00:19:44,983
+Mnnnh deh
+nnnh.
+
+428
+00:19:44,984 --> 00:19:47,186
+Yeah! She kissed him!
+
+429
+00:19:47,187 --> 00:19:49,488
+She kissed him!
+
+430
+00:19:49,489 --> 00:19:51,657
+Ha ha! Ha ha!
+
+431
+00:19:51,658 --> 00:19:55,994
+Bleh, you nailed
+the dry-mouth
+from reality tv show!
+
+432
+00:19:55,995 --> 00:19:59,264
+Well, I guess I owe you
+that 50 bucks now.
+
+433
+00:19:59,265 --> 00:20:02,801
+Oh! 50 bucks! Yeah!
+
+434
+00:20:02,802 --> 00:20:06,438
+[cash register bell]
+ooh, yeah,
+who's your daddy?
+
+435
+00:20:06,439 --> 00:20:09,908
+I like the big one
+and I like the one
+over there. Ooh!
+
+436
+00:20:09,909 --> 00:20:13,245
+Yeah. Hell, yeah!
+[girls talking and laughing]
+
+437
+00:20:13,246 --> 00:20:14,880
+Bleh: sam, bye-bye!
+
+438
+00:20:14,881 --> 00:20:16,048
+Bye-bye, sam!
+
+439
+00:20:16,049 --> 00:20:18,150
+[choked up] bye.
+
+440
+00:20:19,786 --> 00:20:21,854
+Good-bye.
+
+441
+00:20:23,189 --> 00:20:24,823
+You ok, man?
+
+442
+00:20:24,824 --> 00:20:28,560
+I...Don't know
+if i'll ever be ok.
+
+443
+00:20:28,561 --> 00:20:30,062
+Yeah, i'm sorry.
+
+444
+00:20:30,063 --> 00:20:32,031
+Hey, you want to go
+grab a beer,
+
+445
+00:20:32,032 --> 00:20:34,533
+Spin it, and kiss
+whoever it points to?
+
+446
+00:20:34,534 --> 00:20:39,271
+No. Not now, spanky.
+Not now.
+
+447
+00:20:43,543 --> 00:20:44,543
+How about now?
+
+448
+00:20:44,544 --> 00:20:45,811
+Yeah. Ok.
+
+449
+00:20:45,812 --> 00:20:51,550
+¶ girly, girly,
+girly, girly girl ¶
+
+450
+00:20:51,551 --> 00:20:53,052
+¶ girl ¶
+
+451
+00:20:53,053 --> 00:20:55,954
+¶ girly, girly girl-- ¶
+
diff --git a/tests/ref/fate/sub-subripenc b/tests/ref/fate/sub-subripenc
index 1f1e0316..7f35ae9b 100644
--- a/tests/ref/fate/sub-subripenc
+++ b/tests/ref/fate/sub-subripenc
@@ -1,14 +1,14 @@
 1
 00:00:00,970 --> 00:00:02,540
-- Test 1.
-- Test 2.
+<font face="Serif" size="18">- Test 1.
+- Test 2.</font>
 
 2
 00:00:03,050 --> 00:00:04,740
-Test 3.
+<font face="Serif" size="18">Test 3.</font>
 
 3
 00:00:05,850 --> 00:00:08,140
-- Test 4.
-- Test 5.
+<font face="Serif" size="18">- Test 4.
+- Test 5.</font>
 
diff --git a/tests/ref/fate/sub-textenc b/tests/ref/fate/sub-textenc
new file mode 100644
index 00000000..cb0db7fe
--- /dev/null
+++ b/tests/ref/fate/sub-textenc
@@ -0,0 +1,213 @@
+1
+00:00:00,000 --> 00:00:00,000
+Don't show this text it may be used to insert hidden data
+
+2
+00:00:01,500 --> 00:00:04,500
+SubRip subtitles capability tester 1.3o by ale5000
+Use VLC 1.1 or higher as reference for most things and MPC Home Cinema for others
+This text should be blue
+This text should be red
+This text should be black
+If you see this with the normal font, the player don't (fully) support font face
+
+3
+00:00:04,500 --> 00:00:04,500
+Hidden
+
+4
+00:00:04,501 --> 00:00:07,501
+This text should be small
+This text should be normal
+This text should be big
+
+5
+00:00:07,501 --> 00:00:11,501
+This should be an E with an accent: È
+日本語
+This text should be bold, italics and underline
+This text should be small and green
+This text should be small and red
+This text should be big and brown
+
+6
+00:00:11,501 --> 00:00:14,501
+This line should be bold
+This line should be italics
+This line should be underline
+This line should be strikethrough
+Both lines
+should be underline
+
+7
+00:00:14,501 --> 00:00:17,501
+>
+It would be a good thing to
+hide invalid html tags that are closed and show the text in them
+<invalid_tag_unclosed>but show un-closed invalid html tags
+Show not opened tags</invalid_tag_not_opened>
+<
+
+8
+00:00:17,501 --> 00:00:20,501
+and also
+hide invalid html tags with parameters that are closed and show the text in them
+<invalid_tag_uc par=5>but show un-closed invalid html tags
+This text should be showed underlined without problems also: 2<3,5>1,4<6
+This shouldn't be underlined
+
+9
+00:00:20,501 --> 00:00:21,501
+This text should be in the normal position...
+
+10
+00:00:21,501 --> 00:00:22,501
+This text should NOT be in the normal position
+
+11
+00:00:22,501 --> 00:00:24,501
+Implementation is the same of the ASS tag
+This text should be at the
+top and horizontally centered
+
+12
+00:00:22,501 --> 00:00:24,501
+This text should be at the
+middle and horizontally centered
+
+13
+00:00:22,501 --> 00:00:24,501
+This text should be at the
+bottom and horizontally centered
+
+14
+00:00:24,501 --> 00:00:26,501
+This text should be at the
+top and horizontally at the left
+
+15
+00:00:24,501 --> 00:00:26,501
+This text should be at the
+middle and horizontally at the left
+(The second position must be ignored)
+
+16
+00:00:24,501 --> 00:00:26,501
+This text should be at the
+bottom and horizontally at the left
+
+17
+00:00:26,501 --> 00:00:28,501
+This text should be at the
+top and horizontally at the right
+
+18
+00:00:26,501 --> 00:00:28,501
+This text should be at the
+middle and horizontally at the right
+
+19
+00:00:26,501 --> 00:00:28,501
+This text should be at the
+bottom and horizontally at the right
+
+20
+00:00:28,501 --> 00:00:31,501
+This could be the most difficult thing to implement
+
+21
+00:00:31,501 --> 00:00:50,501
+First text
+
+22
+00:00:33,500 --> 00:00:35,500
+Second, it shouldn't overlap first
+
+23
+00:00:35,501 --> 00:00:37,501
+Third, it should replace second
+
+24
+00:00:36,501 --> 00:00:50,501
+Fourth, it shouldn't overlap first and third
+
+25
+00:00:40,501 --> 00:00:45,501
+Fifth, it should replace third
+
+26
+00:00:45,501 --> 00:00:50,501
+Sixth, it shouldn't be
+showed overlapped
+
+27
+00:00:50,501 --> 00:00:52,501
+TEXT 1 (bottom)
+
+28
+00:00:50,501 --> 00:00:52,501
+text 2
+
+29
+00:00:52,501 --> 00:00:54,501
+Hide these tags:
+also hide these tags:
+but show this: {normal text}
+
+30
+00:00:54,501 --> 00:01:00,501
+
+\ N is a forced line break
+\ h is a hard space
+Normal spaces at the start and at the end of the line are trimmed while hard spaces are not trimmed.
+The\hline\hwill\hnever\hbreak\hautomatically\hright\hbefore\hor\hafter\ha\hhard\hspace.\h:-D
+
+31
+00:00:54,501 --> 00:00:56,501
+
+\h\h\h\h\hA (05 hard spaces followed by a letter)
+A (Normal  spaces followed by a letter)
+A (No hard spaces followed by a letter)
+
+32
+00:00:56,501 --> 00:00:58,501
+\h\h\h\h\hA (05 hard spaces followed by a letter)
+A (Normal  spaces followed by a letter)
+A (No hard spaces followed by a letter)
+Show this: \TEST and this: \-)
+
+33
+00:00:58,501 --> 00:01:00,501
+
+A letter followed by 05 hard spaces: A\h\h\h\h\h
+A letter followed by normal  spaces: A
+A letter followed by no hard spaces: A
+05 hard  spaces between letters: A\h\h\h\h\hA
+5 normal spaces between letters: A     A
+
+^--Forced line break
+
+34
+00:01:00,501 --> 00:01:02,501
+Both line should be strikethrough,
+yes.
+Correctly closed tags
+should be hidden.
+
+35
+00:01:02,501 --> 00:01:04,501
+It shouldn't be strikethrough,
+not opened tag showed as text.</s>
+Not opened tag showed as text.</xxxxx>
+
+36
+00:01:04,501 --> 00:01:06,501
+Three lines should be strikethrough,
+yes.
+<yyyy>Not closed tags showed as text
+
+37
+00:01:06,501 --> 00:01:08,501
+Both line should be strikethrough but
+the wrong closing tag should be showed</b>
+
diff --git a/tests/ref/fate/sub-webvtt2 b/tests/ref/fate/sub-webvtt2
new file mode 100644
index 00000000..9f7827d6
--- /dev/null
+++ b/tests/ref/fate/sub-webvtt2
@@ -0,0 +1,24 @@
+[Script Info]
+; Script generated by FFmpeg/Lavc
+ScriptType: v4.00+
+PlayResX: 384
+PlayResY: 288
+
+[V4+ Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,Arial,16,&Hffffff,&Hffffff,&H0,&H0,0,0,0,0,100,100,0,0,1,1,0,2,10,10,10,0
+
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+Dialogue: 0,0:00:00.00,0:00:20.00,Default,,0,0,0,,Hi, my name is Fred
+Dialogue: 0,0:00:02.50,0:00:22.50,Default,,0,0,0,,Hi, I’m Bill
+Dialogue: 0,0:00:05.00,0:00:25.00,Default,,0,0,0,,Would you like to get a coffee?
+Dialogue: 0,0:00:07.50,0:00:27.50,Default,,0,0,0,,Sure! I’ve only had one today.
+Dialogue: 0,0:00:10.00,0:00:30.00,Default,,0,0,0,,This is my fourth!
+Dialogue: 0,0:00:12.50,0:00:32.50,Default,,0,0,0,,OK, let’s go.
+Dialogue: 0,0:00:38.00,0:00:43.00,Default,,0,0,0,,I want to 愛あい love you\NThat's not proper English!
+Dialogue: 0,0:00:43.00,0:00:46.00,Default,,0,0,0,,{\i1}キツネ{\i0}じゃない　キツネじゃない\N乙女おとめは
+Dialogue: 0,0:00:50.00,0:00:55.00,Default,,0,0,0,,Some time ago in a rather distant place....
+Dialogue: 0,0:00:55.00,0:01:00.00,Default,,0,0,0,,Descending: 123456\NAscending: 123456
+Dialogue: 0,0:01:00.00,0:01:05.00,Default,,0,0,0,,>> Never gonna give you up Never gonna let you down\NNever\hgonna\hrun\haround & desert\hyou
+Dialogue: 0,0:55:00.00,1:00:00.00,Default,,0,0,0,,Transcrit par Célestes™
diff --git a/tests/ref/fate/sub2video b/tests/ref/fate/sub2video
index 5c2c46a6..3a03cbf6 100644
--- a/tests/ref/fate/sub2video
+++ b/tests/ref/fate/sub2video
@@ -52,46 +52,129 @@
 0,         47,         47,        1,   518400, 0xde69683f
 0,         48,         48,        1,   518400, 0x7df08fba
 0,         49,         49,        1,   518400, 0xbab197ea
-0,         50,         50,        1,   518400, 0x902285d9
 1,      15355,      15355,     4733,     2094, 0x3c171425, F=0x0
+0,         77,         77,        1,   518400, 0x902285d9
+0,        100,        100,        1,   518400, 0xbab197ea
 1,      48797,      48797,     2560,     2480, 0x7c0edf21, F=0x0
+0,        244,        244,        1,   518400, 0x7a11c812
+0,        257,        257,        1,   518400, 0xbab197ea
 1,      51433,      51433,     2366,     3059, 0xc95b8a05, F=0x0
+0,        258,        258,        1,   518400, 0x34cdddee
+0,        269,        269,        1,   518400, 0xbab197ea
 1,      53910,      53910,     2696,     2095, 0x61bb15ed, F=0x0
+0,        270,        270,        1,   518400, 0x4db4ce51
+0,        283,        283,        1,   518400, 0xbab197ea
 1,      56663,      56663,     1262,     1013, 0xc9ae89b7, F=0x0
+0,        284,        284,        1,   518400, 0xe6bc0ea9
+0,        290,        290,        1,   518400, 0xbab197ea
 1,      58014,      58014,     1661,      969, 0xe01878f0, F=0x0
+0,        291,        291,        1,   518400, 0xa8643af7
+0,        298,        298,        1,   518400, 0xbab197ea
 1,      67724,      67724,     1365,      844, 0xe7db4fc1, F=0x0
+0,        339,        339,        1,   518400, 0xb1885c67
+0,        345,        345,        1,   518400, 0xbab197ea
 1,      69175,      69175,     1558,      802, 0xf48531ba, F=0x0
+0,        346,        346,        1,   518400, 0x378e3fd0
+0,        354,        354,        1,   518400, 0xbab197ea
 1,      70819,      70819,     1865,     1709, 0xb4d5a1bd, F=0x0
+0,        355,        355,        1,   518400, 0xa3782469
+0,        363,        363,        1,   518400, 0xbab197ea
 1,      72762,      72762,     1968,     2438, 0x99d7bc82, F=0x0
+0,        364,        364,        1,   518400, 0xba23a0d5
+0,        374,        374,        1,   518400, 0xbab197ea
 1,      74806,      74806,     1831,     2116, 0x96514097, F=0x0
+0,        375,        375,        1,   518400, 0x129de2f8
+0,        383,        383,        1,   518400, 0xbab197ea
 1,      76716,      76716,     1262,     1822, 0xefccc72e, F=0x0
+0,        384,        384,        1,   518400, 0x19772f0f
+0,        390,        390,        1,   518400, 0xbab197ea
 1,      78051,      78051,     1524,      987, 0x7b927a27, F=0x0
+0,        391,        391,        1,   518400, 0x56f54e73
+0,        398,        398,        1,   518400, 0xbab197ea
 1,      79644,      79644,     2662,     2956, 0x190778f7, F=0x0
+0,        399,        399,        1,   518400, 0x300b5247
 1,      82380,      82380,     2764,     3094, 0xc021b7d3, F=0x0
+0,        412,        412,        1,   518400, 0xbab197ea
+0,        413,        413,        1,   518400, 0x6fd028fa
+0,        426,        426,        1,   518400, 0xbab197ea
 1,      85225,      85225,     2366,     2585, 0x74d0048f, F=0x0
+0,        427,        427,        1,   518400, 0x01f80e9d
+0,        438,        438,        1,   518400, 0xbab197ea
 1,      87652,      87652,     1831,      634, 0x8832fda1, F=0x0
+0,        439,        439,        1,   518400, 0xb48d90c0
+0,        447,        447,        1,   518400, 0xbab197ea
 1,      91531,      91531,     2332,     2080, 0x97a1146f, F=0x0
+0,        458,        458,        1,   518400, 0xcb5a0173
+0,        469,        469,        1,   518400, 0xbab197ea
 1,      95510,      95510,     3299,     2964, 0x8b8f6684, F=0x0
+0,        478,        478,        1,   518400, 0xb8a323e4
+0,        494,        494,        1,   518400, 0xbab197ea
 1,      98872,      98872,     2161,     1875, 0x9002ef71, F=0x0
+0,        495,        495,        1,   518400, 0xc43518ba
+0,        505,        505,        1,   518400, 0xbab197ea
 1,     101124,     101124,     4096,     3872, 0x20c6ed9c, F=0x0
+0,        506,        506,        1,   518400, 0x04e38692
+0,        526,        526,        1,   518400, 0xbab197ea
 1,     105303,     105303,     2730,     3094, 0xf203a663, F=0x0
+0,        527,        527,        1,   518400, 0x856b0ee5
+0,        540,        540,        1,   518400, 0xbab197ea
 1,     108106,     108106,     2059,     2404, 0x41a7b429, F=0x0
+0,        541,        541,        1,   518400, 0x3e5beee2
+0,        551,        551,        1,   518400, 0xbab197ea
 1,     141556,     141556,     1661,     1088, 0xde20aa20, F=0x0
+0,        708,        708,        1,   518400, 0xb8bc1365
+0,        716,        716,        1,   518400, 0xbab197ea
+0,        817,        817,        1,   518400, 0x83efa32d
 1,     163445,     163445,     1331,      339, 0x8bd186ef, F=0x0
+0,        824,        824,        1,   518400, 0xbab197ea
+0,        840,        840,        1,   518400, 0x03ea0e90
 1,     168049,     168049,     1900,     1312, 0x0bf20e8d, F=0x0
+0,        850,        850,        1,   518400, 0xbab197ea
 1,     170035,     170035,     1524,     1279, 0xb6c2dafe, F=0x0
+0,        851,        851,        1,   518400, 0x8780239e
+0,        858,        858,        1,   518400, 0xbab197ea
+0,        861,        861,        1,   518400, 0x6eb72347
 1,     172203,     172203,     1695,     1826, 0x9a1ac769, F=0x0
+0,        869,        869,        1,   518400, 0xbab197ea
 1,     173947,     173947,     1934,     1474, 0xa9b03cdc, F=0x0
+0,        870,        870,        1,   518400, 0x9c4a3a3d
+0,        879,        879,        1,   518400, 0xbab197ea
 1,     175957,     175957,     1763,     1019, 0x20409355, F=0x0
+0,        880,        880,        1,   518400, 0xc9ebfa89
+0,        889,        889,        1,   518400, 0xbab197ea
+0,        946,        946,        1,   518400, 0xbaf801ef
 1,     189295,     189295,     1968,     1596, 0x408c726e, F=0x0
+0,        956,        956,        1,   518400, 0xbab197ea
 1,     191356,     191356,     1228,     1517, 0xae8c5c2b, F=0x0
+0,        957,        957,        1,   518400, 0x59f4e72f
+0,        963,        963,        1,   518400, 0xbab197ea
 1,     192640,     192640,     1763,     2506, 0xa458d6d4, F=0x0
+0,        964,        964,        1,   518400, 0x9d5b9d69
+0,        972,        972,        1,   518400, 0xbab197ea
 1,     195193,     195193,     1092,     1074, 0x397ba9a8, F=0x0
+0,        976,        976,        1,   518400, 0x923d1ce7
+0,        981,        981,        1,   518400, 0xbab197ea
 1,     196361,     196361,     1524,     1715, 0x695ca41e, F=0x0
+0,        982,        982,        1,   518400, 0x6e652cd2
+0,        989,        989,        1,   518400, 0xbab197ea
 1,     197946,     197946,     1160,      789, 0xc63a189e, F=0x0
+0,        990,        990,        1,   518400, 0x25113966
+0,        996,        996,        1,   518400, 0xbab197ea
 1,     199230,     199230,     1627,     1846, 0xeea8c599, F=0x0
+0,        997,        997,        1,   518400, 0x2dc83609
+0,       1004,       1004,        1,   518400, 0xbab197ea
 1,     200924,     200924,     1763,      922, 0xd4a87222, F=0x0
+0,       1005,       1005,        1,   518400, 0x90483bc6
+0,       1013,       1013,        1,   518400, 0xbab197ea
+0,       1053,       1053,        1,   518400, 0x3de86ab7
 1,     210600,     210600,     1831,      665, 0x55580135, F=0x0
+0,       1062,       1062,        1,   518400, 0xbab197ea
 1,     214771,     214771,     1558,     1216, 0x50d1f6c5, F=0x0
+0,       1074,       1074,        1,   518400, 0x8c320e68
+0,       1082,       1082,        1,   518400, 0xbab197ea
+0,       1128,       1128,        1,   518400, 0x81e977b2
 1,     225640,     225640,     2127,     2133, 0x670c11a5, F=0x0
+0,       1139,       1139,        1,   518400, 0xbab197ea
 1,     227834,     227834,     1262,     1264, 0xc1d9fc57, F=0x0
+0,       1140,       1140,        1,   518400, 0xb046dd30
+0,       1145,       1145,        1,   518400, 0xbab197ea
diff --git a/tests/ref/fate/tea b/tests/ref/fate/tea
new file mode 100644
index 00000000..fed0b4dd
--- /dev/null
+++ b/tests/ref/fate/tea
@@ -0,0 +1 @@
+Test encryption/decryption success.
diff --git a/tests/ref/fate/txd-16bpp b/tests/ref/fate/txd-16bpp
index 95228739..33943f90 100644
--- a/tests/ref/fate/txd-16bpp
+++ b/tests/ref/fate/txd-16bpp
@@ -1,12 +1,12 @@
 #tb 0: 1/5
-0,          0,          0,        1,    16384, 0x213f9ea8
-0,          1,          1,        1,    16384, 0x8185fdb1
-0,          2,          2,        1,    16384, 0xf03581d1
-0,          3,          3,        1,    16384, 0x629cd573
-0,          4,          4,        1,    16384, 0xfe7a5b63
-0,          5,          5,        1,    16384, 0x4afc05b2
-0,          6,          6,        1,    16384, 0x074b8515
-0,          7,          7,        1,    16384, 0x17fde900
-0,          8,          8,        1,    16384, 0x831bac76
-0,          9,          9,        1,    16384, 0x2fb579f3
-0,         10,         10,        1,    16384, 0x68762bed
+0,          0,          0,        1,    16384, 0x4d39b4cb
+0,          1,          1,        1,    16384, 0x2a8d14b4
+0,          2,          2,        1,    16384, 0xe26793cb
+0,          3,          3,        1,    16384, 0x62b5ed43
+0,          4,          4,        1,    16384, 0x5ae86c21
+0,          5,          5,        1,    16384, 0x416d184a
+0,          6,          6,        1,    16384, 0x33f59d3e
+0,          7,          7,        1,    16384, 0x3d0ffd9c
+0,          8,          8,        1,    16384, 0x35bac4c6
+0,          9,          9,        1,    16384, 0xa1cd8ffb
+0,         10,         10,        1,    16384, 0xf7dc38d2
diff --git a/tests/ref/fate/txd-odd b/tests/ref/fate/txd-odd
new file mode 100644
index 00000000..fe7c615e
--- /dev/null
+++ b/tests/ref/fate/txd-odd
@@ -0,0 +1,2 @@
+#tb 0: 1/5
+0,          0,          0,        1,   385452, 0x055a14d6
diff --git a/tests/ref/fate/vp9-10-show-existing-frame b/tests/ref/fate/vp9-10-show-existing-frame
index 266f44e5..6a2c904c 100644
--- a/tests/ref/fate/vp9-10-show-existing-frame
+++ b/tests/ref/fate/vp9-10-show-existing-frame
@@ -4,11 +4,15 @@
 #tb 0: 1/30
 #stream#, dts,        pts, duration,     size, hash
 0,          0,          0,        1,   152064, 18981342ec178e082519451062c3a67f
-0,          1,          1,        1,   152064, 04ab9dbeac49ec31be58f6e671698e05
+0,          3,          3,        1,   152064, 04ab9dbeac49ec31be58f6e671698e05
+0,          4,          4,        1,   152064, 4ed58a0ba93a5d97a232a50c5876cda2
 0,          6,          6,        1,   152064, a41f00034923e56ba51a0b598acc2e3a
 0,          7,          7,        1,   152064, 63fa55ae9535ccdf06d44cce8065dda6
+0,          8,          8,        1,   152064, a41f00034923e56ba51a0b598acc2e3a
 0,          9,          9,        1,   152064, 0e4b08e14d919edee2bbff2ecd47de57
-0,         10,         10,        1,   152064, 0e4b08e14d919edee2bbff2ecd47de57
+0,         11,         11,        1,   152064, 0e4b08e14d919edee2bbff2ecd47de57
+0,         12,         12,        1,   152064, 5d4af03fc3d410413ef2b5a6275528b7
 0,         13,         13,        1,   152064, 9e932915c67a789f6877e6d3f76d3649
 0,         14,         14,        1,   152064, 12f2e975c217e7ffcf334524e8acec35
 0,         15,         15,        1,   152064, 9e932915c67a789f6877e6d3f76d3649
+0,         16,         16,        1,   152064, 12f2e975c217e7ffcf334524e8acec35
diff --git a/tests/ref/fate/vp9-10-show-existing-frame2 b/tests/ref/fate/vp9-10-show-existing-frame2
index cdd4369e..b0fa6691 100644
--- a/tests/ref/fate/vp9-10-show-existing-frame2
+++ b/tests/ref/fate/vp9-10-show-existing-frame2
@@ -15,6 +15,7 @@
 0,          9,          9,        1,   152064, 7dc65a2af108379f2b9265a9a1ea7cf8
 0,         10,         10,        1,   152064, c979e2f084760775a567f60f79f28198
 0,         11,         11,        1,   152064, fe668a6417aa0543e4ed4d1c67c5cbcb
+0,         12,         12,        1,   152064, bf9901e39815fa93cce0ed5b02b2ef2d
 0,         13,         13,        1,   152064, 627466200370e6ad60ea570d31be66e3
 0,         14,         14,        1,   152064, 7dc65a2af108379f2b9265a9a1ea7cf8
 0,         15,         15,        1,   152064, c979e2f084760775a567f60f79f28198
diff --git a/tests/ref/fate/vp9-16-intra-only b/tests/ref/fate/vp9-16-intra-only
index 5bbfaea8..1e8d280b 100644
--- a/tests/ref/fate/vp9-16-intra-only
+++ b/tests/ref/fate/vp9-16-intra-only
@@ -1,10 +1,12 @@
 #format: frame checksums
 #version: 1
 #hash: MD5
-#tb 0: 12/359
+#tb 0: 1001/30000
 #stream#, dts,        pts, duration,     size, hash
 0,          0,          0,        1,   152064, d57529601178948afa4818c3c8938884
 0,          1,          1,        1,   152064, d47e00250c45733d64af067a417bcd06
 0,          2,          2,        1,   152064, 984e41cd8350808ac6129746b2377818
-0,          4,          4,        1,   152064, 76ba63001170b8992fc72be5c4ace731
-0,          5,          5,        1,   152064, c4e7f96a8fd58d901b1d881926ddae09
+0,          3,          3,        1,   152064, a5fa62996b4bb52e72e335722cf55bef
+0,          4,          4,        1,   152064, b71ca5ad650170ac921a71a6440fb508
+0,          5,          5,        1,   152064, 76ba63001170b8992fc72be5c4ace731
+0,          6,          6,        1,   152064, c4e7f96a8fd58d901b1d881926ddae09
diff --git a/tests/ref/lavf/ffm b/tests/ref/lavf/ffm
index 5de2f39b..c4d7e1f8 100644
--- a/tests/ref/lavf/ffm
+++ b/tests/ref/lavf/ffm
@@ -1,3 +1,3 @@
-d5d4e5e3eec336ae6680dde035870564 *./tests/data/lavf/lavf.ffm
+e63c16b5f0ad5015304fc4009fdb33ca *./tests/data/lavf/lavf.ffm
 376832 ./tests/data/lavf/lavf.ffm
 ./tests/data/lavf/lavf.ffm CRC=0x000e23ae
diff --git a/tests/ref/lavf/mkv b/tests/ref/lavf/mkv
index edbfe60a..39a48d8e 100644
--- a/tests/ref/lavf/mkv
+++ b/tests/ref/lavf/mkv
@@ -1,6 +1,6 @@
-bab98f5a04a9f7991fb960041c996478 *./tests/data/lavf/lavf.mkv
-472668 ./tests/data/lavf/lavf.mkv
+7c6509f597fb57bab002cbceec960011 *./tests/data/lavf/lavf.mkv
+472872 ./tests/data/lavf/lavf.mkv
 ./tests/data/lavf/lavf.mkv CRC=0xec6c3c68
-c93950920d4ee57eb3ff5ba0cf0c8b19 *./tests/data/lavf/lavf.mkv
-320412 ./tests/data/lavf/lavf.mkv
+5f8cb4b7e98610347dd8d0d58a828a0f *./tests/data/lavf/lavf.mkv
+320548 ./tests/data/lavf/lavf.mkv
 ./tests/data/lavf/lavf.mkv CRC=0xec6c3c68
diff --git a/tests/ref/lavf/mxf b/tests/ref/lavf/mxf
index 71f6cf84..e1c0c79b 100644
--- a/tests/ref/lavf/mxf
+++ b/tests/ref/lavf/mxf
@@ -1,9 +1,9 @@
-030961ae56ab1c264390fd5ef0a95069 *./tests/data/lavf/lavf.mxf
-525881 ./tests/data/lavf/lavf.mxf
+f9b570c7b4fbbc2b71f2236b32e7cbb6 *./tests/data/lavf/lavf.mxf
+525369 ./tests/data/lavf/lavf.mxf
 ./tests/data/lavf/lavf.mxf CRC=0xdbfff6f1
-b90dc91dee50a24c8b20a08a063f501a *./tests/data/lavf/lavf.mxf
-561209 ./tests/data/lavf/lavf.mxf
+8f6a9a6b409f0f5a0bf003f8dea26314 *./tests/data/lavf/lavf.mxf
+560697 ./tests/data/lavf/lavf.mxf
 ./tests/data/lavf/lavf.mxf CRC=0x11a6178e
-a0cfffed795686127061feae8cde07d1 *./tests/data/lavf/lavf.mxf
-525881 ./tests/data/lavf/lavf.mxf
+10ac0f158fc0af356439b818de7601e3 *./tests/data/lavf/lavf.mxf
+525369 ./tests/data/lavf/lavf.mxf
 ./tests/data/lavf/lavf.mxf CRC=0xdbfff6f1
diff --git a/tests/ref/lavf/mxf_d10 b/tests/ref/lavf/mxf_d10
index 8b71d72f..134db876 100644
--- a/tests/ref/lavf/mxf_d10
+++ b/tests/ref/lavf/mxf_d10
@@ -1,3 +1,3 @@
-9b5bad981e08fa3eaeb9de818762218c *./tests/data/lavf/lavf.mxf_d10
+73c0cb416548c33d0651c59519a8f7e2 *./tests/data/lavf/lavf.mxf_d10
 5330989 ./tests/data/lavf/lavf.mxf_d10
 ./tests/data/lavf/lavf.mxf_d10 CRC=0x6c74d488
diff --git a/tests/ref/lavf/mxf_opatom b/tests/ref/lavf/mxf_opatom
index 3dbb8de2..ea1190c0 100644
--- a/tests/ref/lavf/mxf_opatom
+++ b/tests/ref/lavf/mxf_opatom
@@ -1,3 +1,3 @@
-6cab8f702746ca7184c608b4c06a224b *./tests/data/lavf/lavf.mxf_opatom
+962c2cd582340f8961a8283636093abf *./tests/data/lavf/lavf.mxf_opatom
 4717113 ./tests/data/lavf/lavf.mxf_opatom
-./tests/data/lavf/lavf.mxf_opatom CRC=0xbdd696b9
+./tests/data/lavf/lavf.mxf_opatom CRC=0xf55aa22a
diff --git a/tests/ref/lavf/mxf_opatom_audio b/tests/ref/lavf/mxf_opatom_audio
index 8d558c6e..953df909 100644
--- a/tests/ref/lavf/mxf_opatom_audio
+++ b/tests/ref/lavf/mxf_opatom_audio
@@ -1,3 +1,3 @@
-6c6064f154688e455ec494d425b525f5 *./tests/data/lavf/lavf.mxf_opatom_audio
-102457 ./tests/data/lavf/lavf.mxf_opatom_audio
+d4ad5a0faf410a9d9e99b3328143e89d *./tests/data/lavf/lavf.mxf_opatom_audio
+101945 ./tests/data/lavf/lavf.mxf_opatom_audio
 ./tests/data/lavf/lavf.mxf_opatom_audio CRC=0xd155c6ff
diff --git a/tests/ref/lavf/pam b/tests/ref/lavf/pam
index abb29743..97893f60 100644
--- a/tests/ref/lavf/pam
+++ b/tests/ref/lavf/pam
@@ -13,6 +13,6 @@
 032538f0313b4f240b44a5bef115f5bf *./tests/data/images/pam/02.pam
 ./tests/data/images/pam/%02d.pam CRC=0x5984c023
 608321 ./tests/data/images/pam/02.pam
-e8bd9f1830e6a9db201386e96580f869 *./tests/data/images/pam/02.pam
-./tests/data/images/pam/%02d.pam CRC=0x65707c37
+d2f5eb2f959ca3a90c02f1887b6e0c4f *./tests/data/images/pam/02.pam
+./tests/data/images/pam/%02d.pam CRC=0xab19200d
 101447 ./tests/data/images/pam/02.pam
diff --git a/tests/ref/lavf/pbmpipe b/tests/ref/lavf/pbmpipe
index d50ed716..284f90b1 100644
--- a/tests/ref/lavf/pbmpipe
+++ b/tests/ref/lavf/pbmpipe
@@ -1,3 +1,3 @@
-8ced96f5b6b7362358199ae993b4ceb7 *./tests/data/lavf/pbmpipe.pbm
+8b974da7f48f9e6d5ae327b4444a71fb *./tests/data/lavf/pbmpipe.pbm
 317075 ./tests/data/lavf/pbmpipe.pbm
-./tests/data/lavf/pbmpipe.pbm CRC=0xfae0a1ba
+./tests/data/lavf/pbmpipe.pbm CRC=0xfc010c66
diff --git a/tests/ref/lavf/pixfmt b/tests/ref/lavf/pixfmt
index 88030875..ec75d4ce 100644
--- a/tests/ref/lavf/pixfmt
+++ b/tests/ref/lavf/pixfmt
@@ -28,9 +28,9 @@ efa7c0337cc00c796c6df615223716f1 *./tests/data/pixfmt/rgb565.yuv
 304128 ./tests/data/pixfmt/rgb555.yuv
 1e080c12bd9755c41ecb8e19b756f406 *./tests/data/pixfmt/gray.yuv
 304128 ./tests/data/pixfmt/gray.yuv
-6c719671e39f1bcf67b47eab98fa529b *./tests/data/pixfmt/monow.yuv
+d87cf0c2e7a13cc693fe6ece22461c83 *./tests/data/pixfmt/monow.yuv
 304128 ./tests/data/pixfmt/monow.yuv
-6c719671e39f1bcf67b47eab98fa529b *./tests/data/pixfmt/monob.yuv
+d87cf0c2e7a13cc693fe6ece22461c83 *./tests/data/pixfmt/monob.yuv
 304128 ./tests/data/pixfmt/monob.yuv
 00b85790df5740bab95e2559d81603a7 *./tests/data/pixfmt/yuv440p.yuv
 304128 ./tests/data/pixfmt/yuv440p.yuv
diff --git a/tests/ref/lavf/sgi b/tests/ref/lavf/sgi
index a43c1f42..6f45802d 100644
--- a/tests/ref/lavf/sgi
+++ b/tests/ref/lavf/sgi
@@ -1,3 +1,3 @@
-7054acafd275e51cec28d4518e213081 *./tests/data/images/sgi/02.sgi
+d446e540a7c18da5fd3cc0e9942cd46f *./tests/data/images/sgi/02.sgi
 ./tests/data/images/sgi/%02d.sgi CRC=0x6da01946
-308151 ./tests/data/images/sgi/02.sgi
+307287 ./tests/data/images/sgi/02.sgi
diff --git a/tests/ref/lavf/xbm b/tests/ref/lavf/xbm
index 4cc4be6e..705713b2 100644
--- a/tests/ref/lavf/xbm
+++ b/tests/ref/lavf/xbm
@@ -1,3 +1,3 @@
-99c20fff5d17b698b4a25282aebc3c51 *./tests/data/images/xbm/02.xbm
-./tests/data/images/xbm/%02d.xbm CRC=0x0f5aa5cb
+0629055fd82366317c651a0af4bb82d7 *./tests/data/images/xbm/02.xbm
+./tests/data/images/xbm/%02d.xbm CRC=0xc9a20204
 76411 ./tests/data/images/xbm/02.xbm
diff --git a/tests/ref/lavf/xwd b/tests/ref/lavf/xwd
index 5e593b5d..3d638211 100644
--- a/tests/ref/lavf/xwd
+++ b/tests/ref/lavf/xwd
@@ -19,6 +19,6 @@ fe1af954966a40c2cd35fc27094ff823 *./tests/data/images/xwd/02.xwd
 85e9b8b814a1dea71d143aac2e487037 *./tests/data/images/xwd/02.xwd
 ./tests/data/images/xwd/%02d.xwd CRC=0x0ff205be
 101487 ./tests/data/images/xwd/02.xwd
-2131b4c41fe35178b0c7d121223af549 *./tests/data/images/xwd/02.xwd
-./tests/data/images/xwd/%02d.xwd CRC=0x0f5aa5cb
+796e2e309ac0844cfb2f4959816508ee *./tests/data/images/xwd/02.xwd
+./tests/data/images/xwd/%02d.xwd CRC=0xc9a20204
 12783 ./tests/data/images/xwd/02.xwd
diff --git a/tests/ref/seek/lavf-mkv b/tests/ref/seek/lavf-mkv
index 11275d6e..af13ddbc 100644
--- a/tests/ref/seek/lavf-mkv
+++ b/tests/ref/seek/lavf-mkv
@@ -1,48 +1,48 @@
-ret: 0         st: 1 flags:1 dts: 0.000000 pts: 0.000000 pos:    661 size:   208
+ret: 0         st: 1 flags:1 dts: 0.000000 pts: 0.000000 pos:    797 size:   208
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:    877 size: 27837
+ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:   1013 size: 27837
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292291 size: 27834
+ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292427 size: 27834
 ret: 0         st: 0 flags:0  ts: 0.788000
-ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292291 size: 27834
+ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292427 size: 27834
 ret: 0         st: 0 flags:1  ts:-0.317000
-ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:    877 size: 27837
+ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:   1013 size: 27837
 ret:-1         st: 1 flags:0  ts: 2.577000
 ret: 0         st: 1 flags:1  ts: 1.471000
-ret: 0         st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320132 size:   209
+ret: 0         st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320268 size:   209
 ret: 0         st:-1 flags:0  ts: 0.365002
-ret: 0         st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146844 size: 27925
+ret: 0         st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146980 size: 27925
 ret: 0         st:-1 flags:1  ts:-0.740831
-ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:    877 size: 27837
+ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:   1013 size: 27837
 ret:-1         st: 0 flags:0  ts: 2.153000
 ret: 0         st: 0 flags:1  ts: 1.048000
-ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292291 size: 27834
+ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292427 size: 27834
 ret: 0         st: 1 flags:0  ts:-0.058000
-ret: 0         st: 1 flags:1 dts: 0.000000 pts: 0.000000 pos:    661 size:   208
+ret: 0         st: 1 flags:1 dts: 0.000000 pts: 0.000000 pos:    797 size:   208
 ret: 0         st: 1 flags:1  ts: 2.836000
-ret: 0         st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320132 size:   209
+ret: 0         st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320268 size:   209
 ret:-1         st:-1 flags:0  ts: 1.730004
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146844 size: 27925
+ret: 0         st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146980 size: 27925
 ret: 0         st: 0 flags:0  ts:-0.482000
-ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:    877 size: 27837
+ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:   1013 size: 27837
 ret: 0         st: 0 flags:1  ts: 2.413000
-ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292291 size: 27834
+ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292427 size: 27834
 ret:-1         st: 1 flags:0  ts: 1.307000
 ret: 0         st: 1 flags:1  ts: 0.201000
-ret: 0         st: 1 flags:1 dts: 0.000000 pts: 0.000000 pos:    661 size:   208
+ret: 0         st: 1 flags:1 dts: 0.000000 pts: 0.000000 pos:    797 size:   208
 ret: 0         st:-1 flags:0  ts:-0.904994
-ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:    877 size: 27837
+ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:   1013 size: 27837
 ret: 0         st:-1 flags:1  ts: 1.989173
-ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292291 size: 27834
+ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292427 size: 27834
 ret: 0         st: 0 flags:0  ts: 0.883000
-ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292291 size: 27834
+ret: 0         st: 0 flags:1 dts: 0.971000 pts: 0.971000 pos: 292427 size: 27834
 ret: 0         st: 0 flags:1  ts:-0.222000
-ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:    877 size: 27837
+ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:   1013 size: 27837
 ret:-1         st: 1 flags:0  ts: 2.672000
 ret: 0         st: 1 flags:1  ts: 1.566000
-ret: 0         st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320132 size:   209
+ret: 0         st: 1 flags:1 dts: 0.993000 pts: 0.993000 pos: 320268 size:   209
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146844 size: 27925
+ret: 0         st: 0 flags:1 dts: 0.491000 pts: 0.491000 pos: 146980 size: 27925
 ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:    877 size: 27837
+ret: 0         st: 0 flags:1 dts: 0.011000 pts: 0.011000 pos:   1013 size: 27837
diff --git a/tests/ref/seek/lavf-mxf b/tests/ref/seek/lavf-mxf
index f1aaa197..93d41972 100644
--- a/tests/ref/seek/lavf-mxf
+++ b/tests/ref/seek/lavf-mxf
@@ -1,48 +1,48 @@
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6656 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6656 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
 ret: 0         st: 0 flags:0  ts: 0.800000
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
 ret: 0         st: 0 flags:1  ts:-0.320000
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6656 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
 ret:-1         st: 1 flags:0  ts: 2.576667
 ret: 0         st: 1 flags:1  ts: 1.470833
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
 ret: 0         st:-1 flags:0  ts: 0.365002
-ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.480000 pos: 211968 size: 24786
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.480000 pos: 211456 size: 24786
 ret: 0         st:-1 flags:1  ts:-0.740831
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6656 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
 ret:-1         st: 0 flags:0  ts: 2.160000
 ret: 0         st: 0 flags:1  ts: 1.040000
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
 ret: 0         st: 1 flags:0  ts:-0.058333
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6656 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
 ret: 0         st: 1 flags:1  ts: 2.835833
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
 ret:-1         st:-1 flags:0  ts: 1.730004
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.480000 pos: 211968 size: 24786
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.480000 pos: 211456 size: 24786
 ret: 0         st: 0 flags:0  ts:-0.480000
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6656 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
 ret: 0         st: 0 flags:1  ts: 2.400000
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
 ret:-1         st: 1 flags:0  ts: 1.306667
 ret: 0         st: 1 flags:1  ts: 0.200833
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6656 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
 ret: 0         st:-1 flags:0  ts:-0.904994
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6656 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
 ret: 0         st:-1 flags:1  ts: 1.989173
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
 ret: 0         st: 0 flags:0  ts: 0.880000
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
 ret: 0         st: 0 flags:1  ts:-0.240000
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6656 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
 ret:-1         st: 1 flags:0  ts: 2.671667
 ret: 0         st: 1 flags:1  ts: 1.565833
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460288 size: 24711
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.960000 pos: 460800 size: 24711
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.480000 pos: 211456 size: 24786
 ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6656 size: 24801
+ret: 0         st: 0 flags:1 dts:-0.040000 pts: 0.000000 pos:   6144 size: 24801
diff --git a/tests/ref/seek/lavf-mxf_opatom_audio b/tests/ref/seek/lavf-mxf_opatom_audio
index 12d4644d..2d1a7cc3 100644
--- a/tests/ref/seek/lavf-mxf_opatom_audio
+++ b/tests/ref/seek/lavf-mxf_opatom_audio
@@ -1,53 +1,53 @@
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st: 0 flags:0  ts: 0.788333
-ret: 0         st: 0 flags:1 dts: 0.788333 pts: 0.788333 pos:  80825 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.788333 pts: 0.788333 pos:  80313 size:  3840
 ret: 0         st: 0 flags:1  ts:-0.317500
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
 ret: 0         st:-1 flags:0  ts: 2.576668
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st:-1 flags:1  ts: 1.470835
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st: 0 flags:0  ts: 0.365000
-ret: 0         st: 0 flags:1 dts: 0.365000 pts: 0.365000 pos:  40185 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.365000 pts: 0.365000 pos:  39673 size:  3840
 ret: 0         st: 0 flags:1  ts:-0.740833
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
 ret: 0         st:-1 flags:0  ts: 2.153336
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st:-1 flags:1  ts: 1.047503
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st: 0 flags:0  ts:-0.058333
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
 ret: 0         st: 0 flags:1  ts: 2.835833
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st:-1 flags:0  ts: 1.730004
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.624167 pts: 0.624167 pos:  65065 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.624167 pts: 0.624167 pos:  64553 size:  3840
 ret: 0         st: 0 flags:0  ts:-0.481667
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
 ret: 0         st: 0 flags:1  ts: 2.412500
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st:-1 flags:0  ts: 1.306672
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st:-1 flags:1  ts: 0.200839
-ret: 0         st: 0 flags:1 dts: 0.200833 pts: 0.200833 pos:  24425 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.200833 pts: 0.200833 pos:  23913 size:  3840
 ret: 0         st: 0 flags:0  ts:-0.905000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
 ret: 0         st: 0 flags:1  ts: 1.989167
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st:-1 flags:0  ts: 0.883340
-ret: 0         st: 0 flags:1 dts: 0.883333 pts: 0.883333 pos:  89945 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.883333 pts: 0.883333 pos:  89433 size:  3840
 ret: 0         st:-1 flags:1  ts:-0.222493
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
 ret: 0         st: 0 flags:0  ts: 2.671667
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st: 0 flags:1  ts: 1.565833
-ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 101143 size:     2
+ret: 0         st: 0 flags:1 dts: 0.999979 pts: 0.999979 pos: 100631 size:     2
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.460000 pts: 0.460000 pos:  49305 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.460000 pts: 0.460000 pos:  48793 size:  3840
 ret: 0         st:-1 flags:1  ts:-0.645825
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   5145 size:  3840
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:   4633 size:  3840
diff --git a/tests/ref/seek/lavf-sgi b/tests/ref/seek/lavf-sgi
index b07132a6..fac6ad97 100644
--- a/tests/ref/seek/lavf-sgi
+++ b/tests/ref/seek/lavf-sgi
@@ -1,4 +1,4 @@
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:     -1 size:308336
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:     -1 size:307472
 ret:-EINVAL    st:-1 flags:0  ts:-1.000000
 ret:-EINVAL    st:-1 flags:1  ts: 1.894167
 ret:-EINVAL    st: 0 flags:0  ts: 0.800000
@@ -6,7 +6,7 @@ ret:-EINVAL    st: 0 flags:1  ts:-0.320000
 ret:-EINVAL    st:-1 flags:0  ts: 2.576668
 ret:-EINVAL    st:-1 flags:1  ts: 1.470835
 ret: 0         st: 0 flags:0  ts: 0.360000
-ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:     -1 size:308572
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:     -1 size:307708
 ret:-EINVAL    st: 0 flags:1  ts:-0.760000
 ret:-EINVAL    st:-1 flags:0  ts: 2.153336
 ret:-EINVAL    st:-1 flags:1  ts: 1.047503
@@ -18,7 +18,7 @@ ret:-EINVAL    st: 0 flags:0  ts:-0.480000
 ret:-EINVAL    st: 0 flags:1  ts: 2.400000
 ret:-EINVAL    st:-1 flags:0  ts: 1.306672
 ret: 0         st:-1 flags:1  ts: 0.200839
-ret: 0         st: 0 flags:1 dts: 0.200000 pts: 0.200000 pos:     -1 size:308294
+ret: 0         st: 0 flags:1 dts: 0.200000 pts: 0.200000 pos:     -1 size:307430
 ret:-EINVAL    st: 0 flags:0  ts:-0.920000
 ret:-EINVAL    st: 0 flags:1  ts: 2.000000
 ret:-EINVAL    st:-1 flags:0  ts: 0.883340
@@ -26,5 +26,5 @@ ret:-EINVAL    st:-1 flags:1  ts:-0.222493
 ret:-EINVAL    st: 0 flags:0  ts: 2.680000
 ret:-EINVAL    st: 0 flags:1  ts: 1.560000
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:     -1 size:307773
+ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:     -1 size:306909
 ret:-EINVAL    st:-1 flags:1  ts:-0.645825
diff --git a/tests/ref/seek/lavf-yuv4mpeg b/tests/ref/seek/lavf-yuv4mpeg
index 81c1de9a..60c30361 100644
--- a/tests/ref/seek/lavf-yuv4mpeg
+++ b/tests/ref/seek/lavf-yuv4mpeg
@@ -1,27 +1,53 @@
 ret: 0         st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos:     64 size:152064
-ret:-1         st:-1 flags:0  ts:-1.000000
-ret:-1         st:-1 flags:1  ts: 1.894167
-ret:-1         st: 0 flags:0  ts: 0.800000
-ret:-1         st: 0 flags:1  ts:-0.320000
-ret:-1         st:-1 flags:0  ts: 2.576668
-ret:-1         st:-1 flags:1  ts: 1.470835
-ret:-1         st: 0 flags:0  ts: 0.360000
-ret:-1         st: 0 flags:1  ts:-0.760000
-ret:-1         st:-1 flags:0  ts: 2.153336
-ret:-1         st:-1 flags:1  ts: 1.047503
-ret:-1         st: 0 flags:0  ts:-0.040000
-ret:-1         st: 0 flags:1  ts: 2.840000
-ret:-1         st:-1 flags:0  ts: 1.730004
-ret:-1         st:-1 flags:1  ts: 0.624171
-ret:-1         st: 0 flags:0  ts:-0.480000
-ret:-1         st: 0 flags:1  ts: 2.400000
-ret:-1         st:-1 flags:0  ts: 1.306672
-ret:-1         st:-1 flags:1  ts: 0.200839
-ret:-1         st: 0 flags:0  ts:-0.920000
-ret:-1         st: 0 flags:1  ts: 2.000000
-ret:-1         st:-1 flags:0  ts: 0.883340
-ret:-1         st:-1 flags:1  ts:-0.222493
-ret:-1         st: 0 flags:0  ts: 2.680000
-ret:-1         st: 0 flags:1  ts: 1.560000
-ret:-1         st:-1 flags:0  ts: 0.460008
-ret:-1         st:-1 flags:1  ts:-0.645825
+ret: 0         st:-1 flags:0  ts:-1.000000
+ret: 0         st: 0 flags:1 dts: 0.040000 pts: 0.040000 pos: 152134 size:152064
+ret: 0         st:-1 flags:1  ts: 1.894167
+ret:-EOF
+ret: 0         st: 0 flags:0  ts: 0.800000
+ret: 0         st: 0 flags:1 dts: 0.800000 pts: 0.800000 pos:3041464 size:152064
+ret: 0         st: 0 flags:1  ts:-0.320000
+ret: 0         st: 0 flags:1 dts: 0.840000 pts: 0.840000 pos:3193534 size:152064
+ret: 0         st:-1 flags:0  ts: 2.576668
+ret:-EOF
+ret: 0         st:-1 flags:1  ts: 1.470835
+ret:-EOF
+ret: 0         st: 0 flags:0  ts: 0.360000
+ret: 0         st: 0 flags:1 dts: 0.360000 pts: 0.360000 pos:1368694 size:152064
+ret: 0         st: 0 flags:1  ts:-0.760000
+ret: 0         st: 0 flags:1 dts: 0.400000 pts: 0.400000 pos:1520764 size:152064
+ret: 0         st:-1 flags:0  ts: 2.153336
+ret:-EOF
+ret: 0         st:-1 flags:1  ts: 1.047503
+ret:-EOF
+ret: 0         st: 0 flags:0  ts:-0.040000
+ret:-EOF
+ret: 0         st: 0 flags:1  ts: 2.840000
+ret:-EOF
+ret: 0         st:-1 flags:0  ts: 1.730004
+ret:-EOF
+ret: 0         st:-1 flags:1  ts: 0.624171
+ret: 0         st: 0 flags:1 dts: 0.640000 pts: 0.640000 pos:2433184 size:152064
+ret: 0         st: 0 flags:0  ts:-0.480000
+ret: 0         st: 0 flags:1 dts: 0.680000 pts: 0.680000 pos:2585254 size:152064
+ret: 0         st: 0 flags:1  ts: 2.400000
+ret:-EOF
+ret: 0         st:-1 flags:0  ts: 1.306672
+ret:-EOF
+ret: 0         st:-1 flags:1  ts: 0.200839
+ret: 0         st: 0 flags:1 dts: 0.200000 pts: 0.200000 pos: 760414 size:152064
+ret: 0         st: 0 flags:0  ts:-0.920000
+ret: 0         st: 0 flags:1 dts: 0.240000 pts: 0.240000 pos: 912484 size:152064
+ret: 0         st: 0 flags:1  ts: 2.000000
+ret:-EOF
+ret: 0         st:-1 flags:0  ts: 0.883340
+ret: 0         st: 0 flags:1 dts: 0.880000 pts: 0.880000 pos:3345604 size:152064
+ret: 0         st:-1 flags:1  ts:-0.222493
+ret: 0         st: 0 flags:1 dts: 0.920000 pts: 0.920000 pos:3497674 size:152064
+ret: 0         st: 0 flags:0  ts: 2.680000
+ret:-EOF
+ret: 0         st: 0 flags:1  ts: 1.560000
+ret:-EOF
+ret: 0         st:-1 flags:0  ts: 0.460008
+ret: 0         st: 0 flags:1 dts: 0.480000 pts: 0.480000 pos:1824904 size:152064
+ret: 0         st:-1 flags:1  ts:-0.645825
+ret: 0         st: 0 flags:1 dts: 0.520000 pts: 0.520000 pos:1976974 size:152064
diff --git a/tests/ref/vsynth/vsynth1-bpp1 b/tests/ref/vsynth/vsynth1-bpp1
new file mode 100644
index 00000000..0bd1a778
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-bpp1
@@ -0,0 +1,4 @@
+611de0803ff6bd0ef385dde59964a105 *tests/data/fate/vsynth1-bpp1.avi
+640452 tests/data/fate/vsynth1-bpp1.avi
+576b690e8a8921c54d777463b63a8307 *tests/data/fate/vsynth1-bpp1.out.rawvideo
+stddev:   97.41 PSNR:  8.36 MAXDIFF:  238 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-bpp15 b/tests/ref/vsynth/vsynth1-bpp15
new file mode 100644
index 00000000..85538381
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-bpp15
@@ -0,0 +1,4 @@
+dc37d1db0429f44000a03a60862751cd *tests/data/fate/vsynth1-bpp15.avi
+10144452 tests/data/fate/vsynth1-bpp15.avi
+3aee2d6e82a9507d7f01844c04d2b57b *tests/data/fate/vsynth1-bpp15.out.rawvideo
+stddev:   38.44 PSNR: 16.43 MAXDIFF:  159 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-dnxhd-1080i b/tests/ref/vsynth/vsynth1-dnxhd-1080i
index 28d55b6b..02f989f3 100644
--- a/tests/ref/vsynth/vsynth1-dnxhd-1080i
+++ b/tests/ref/vsynth/vsynth1-dnxhd-1080i
@@ -1,4 +1,4 @@
 a0234e0a8516d958f423b119aa9e35c4 *tests/data/fate/vsynth1-dnxhd-1080i.mov
 3031911 tests/data/fate/vsynth1-dnxhd-1080i.mov
-a09132c6db44f415e831dcaa630a351b *tests/data/fate/vsynth1-dnxhd-1080i.out.rawvideo
-stddev:    6.29 PSNR: 32.15 MAXDIFF:   64 bytes:  7603200/   760320
+fed9ed2a5179c9df0ef58772b025e303 *tests/data/fate/vsynth1-dnxhd-1080i.out.rawvideo
+stddev:    6.18 PSNR: 32.31 MAXDIFF:   64 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth1-dnxhd-1080i-10bit b/tests/ref/vsynth/vsynth1-dnxhd-1080i-10bit
new file mode 100644
index 00000000..dd96e142
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-dnxhd-1080i-10bit
@@ -0,0 +1,4 @@
+f562845d1848bf5d3e524b418b742e01 *tests/data/fate/vsynth1-dnxhd-1080i-10bit.mov
+4588391 tests/data/fate/vsynth1-dnxhd-1080i-10bit.mov
+31032fcb7e6af79daaac02288254c6d6 *tests/data/fate/vsynth1-dnxhd-1080i-10bit.out.rawvideo
+stddev:    5.69 PSNR: 33.02 MAXDIFF:   55 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth1-dnxhd-1080i-colr b/tests/ref/vsynth/vsynth1-dnxhd-1080i-colr
index 16d89535..ac429667 100644
--- a/tests/ref/vsynth/vsynth1-dnxhd-1080i-colr
+++ b/tests/ref/vsynth/vsynth1-dnxhd-1080i-colr
@@ -1,4 +1,4 @@
 5fccdb16c0f14dea1b6b603bac90b97e *tests/data/fate/vsynth1-dnxhd-1080i-colr.mov
 3031929 tests/data/fate/vsynth1-dnxhd-1080i-colr.mov
-5835dff88cb84e83bbe70b5ed5edd5ab *tests/data/fate/vsynth1-dnxhd-1080i-colr.out.rawvideo
-stddev:    5.79 PSNR: 32.87 MAXDIFF:   56 bytes:  7603200/   760320
+6f2d5429ffc4529a76acfeb28b560542 *tests/data/fate/vsynth1-dnxhd-1080i-colr.out.rawvideo
+stddev:    5.65 PSNR: 33.09 MAXDIFF:   55 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth1-dnxhd-720p b/tests/ref/vsynth/vsynth1-dnxhd-720p
index fd77e863..16cf20c3 100644
--- a/tests/ref/vsynth/vsynth1-dnxhd-720p
+++ b/tests/ref/vsynth/vsynth1-dnxhd-720p
@@ -1,4 +1,4 @@
 af03d57b8320568027162132643f7814 *tests/data/fate/vsynth1-dnxhd-720p.dnxhd
 2293760 tests/data/fate/vsynth1-dnxhd-720p.dnxhd
-f074f1b5ed394871b3c73184ad55b895 *tests/data/fate/vsynth1-dnxhd-720p.out.rawvideo
-stddev:    6.26 PSNR: 32.19 MAXDIFF:   65 bytes:  7603200/   760320
+5f9fba5bacda81e77a72d8a816612564 *tests/data/fate/vsynth1-dnxhd-720p.out.rawvideo
+stddev:    6.22 PSNR: 32.24 MAXDIFF:   64 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit
index 8a8f639d..dc808f38 100644
--- a/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit
+++ b/tests/ref/vsynth/vsynth1-dnxhd-720p-10bit
@@ -1,4 +1,4 @@
 f8c4b7aa165a80df2485d526161290a3 *tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd
 2293760 tests/data/fate/vsynth1-dnxhd-720p-10bit.dnxhd
-ec26a6cbf53e38ffb9d5c51cbfbf4f7c *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo
-stddev:    6.27 PSNR: 32.18 MAXDIFF:   64 bytes:  7603200/   760320
+87f1f0e074466facd3a9922ecc8311db *tests/data/fate/vsynth1-dnxhd-720p-10bit.out.rawvideo
+stddev:    6.23 PSNR: 32.23 MAXDIFF:   64 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth1-dnxhd-720p-rd b/tests/ref/vsynth/vsynth1-dnxhd-720p-rd
index 04227769..f030e92a 100644
--- a/tests/ref/vsynth/vsynth1-dnxhd-720p-rd
+++ b/tests/ref/vsynth/vsynth1-dnxhd-720p-rd
@@ -1,4 +1,4 @@
 276e5175376051218b0e3eb36f9e9a63 *tests/data/fate/vsynth1-dnxhd-720p-rd.dnxhd
 2293760 tests/data/fate/vsynth1-dnxhd-720p-rd.dnxhd
-28662df973b289798bf6069fbbee8071 *tests/data/fate/vsynth1-dnxhd-720p-rd.out.rawvideo
-stddev:    6.26 PSNR: 32.19 MAXDIFF:   65 bytes:  7603200/   760320
+1c75ad86ccd5cc2112da7d43ace50dbf *tests/data/fate/vsynth1-dnxhd-720p-rd.out.rawvideo
+stddev:    6.23 PSNR: 32.23 MAXDIFF:   64 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth1-ffv1-v0 b/tests/ref/vsynth/vsynth1-ffv1-v0
new file mode 100644
index 00000000..8c722e4b
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-ffv1-v0
@@ -0,0 +1,4 @@
+36011c9a2b288fb04bf6c520371646d4 *tests/data/fate/vsynth1-ffv1-v0.avi
+2655368 tests/data/fate/vsynth1-ffv1-v0.avi
+c5ccac874dbf808e9088bc3107860042 *tests/data/fate/vsynth1-ffv1-v0.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-ffv1-v3-bgr0 b/tests/ref/vsynth/vsynth1-ffv1-v3-bgr0
new file mode 100644
index 00000000..3808f494
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-ffv1-v3-bgr0
@@ -0,0 +1,4 @@
+3c68357b239479fc26656f6dd76b0b58 *tests/data/fate/vsynth1-ffv1-v3-bgr0.avi
+6883176 tests/data/fate/vsynth1-ffv1-v3-bgr0.avi
+49c03ab1b73b7cd3cabc3c77a9479c9e *tests/data/fate/vsynth1-ffv1-v3-bgr0.out.rawvideo
+stddev:    3.16 PSNR: 38.12 MAXDIFF:   50 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-ffv1-v3-yuv420p b/tests/ref/vsynth/vsynth1-ffv1-v3-yuv420p
new file mode 100644
index 00000000..a4af95bb
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-ffv1-v3-yuv420p
@@ -0,0 +1,4 @@
+26b1296a0ef80a3b5c8b63cc57c52bc2 *tests/data/fate/vsynth1-ffv1-v3-yuv420p.avi
+2691268 tests/data/fate/vsynth1-ffv1-v3-yuv420p.avi
+c5ccac874dbf808e9088bc3107860042 *tests/data/fate/vsynth1-ffv1-v3-yuv420p.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-ffv1-v3-yuv422p10 b/tests/ref/vsynth/vsynth1-ffv1-v3-yuv422p10
new file mode 100644
index 00000000..d56cf276
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-ffv1-v3-yuv422p10
@@ -0,0 +1,4 @@
+aa8c5630213381c7b2afdec4a91405ed *tests/data/fate/vsynth1-ffv1-v3-yuv422p10.avi
+2845574 tests/data/fate/vsynth1-ffv1-v3-yuv422p10.avi
+c5ccac874dbf808e9088bc3107860042 *tests/data/fate/vsynth1-ffv1-v3-yuv422p10.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-ffv1-v3-yuv444p16 b/tests/ref/vsynth/vsynth1-ffv1-v3-yuv444p16
new file mode 100644
index 00000000..5d919ea2
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-ffv1-v3-yuv444p16
@@ -0,0 +1,4 @@
+f6b6943455d8b2c3010ff898df5dc9db *tests/data/fate/vsynth1-ffv1-v3-yuv444p16.avi
+5357816 tests/data/fate/vsynth1-ffv1-v3-yuv444p16.avi
+c5ccac874dbf808e9088bc3107860042 *tests/data/fate/vsynth1-ffv1-v3-yuv444p16.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-ffv1.0 b/tests/ref/vsynth/vsynth1-ffv1.0
deleted file mode 100644
index 09053031..00000000
--- a/tests/ref/vsynth/vsynth1-ffv1.0
+++ /dev/null
@@ -1,4 +0,0 @@
-36011c9a2b288fb04bf6c520371646d4 *tests/data/fate/vsynth1-ffv1.0.avi
-2655368 tests/data/fate/vsynth1-ffv1.0.avi
-c5ccac874dbf808e9088bc3107860042 *tests/data/fate/vsynth1-ffv1.0.out.rawvideo
-stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-h261 b/tests/ref/vsynth/vsynth1-h261
index 4ba2a44a..65efd601 100644
--- a/tests/ref/vsynth/vsynth1-h261
+++ b/tests/ref/vsynth/vsynth1-h261
@@ -1,4 +1,4 @@
-be41f18a8c3120b730270a9ec45545aa *tests/data/fate/vsynth1-h261.avi
+5a1114f9356235172ba9ad13f88108cf *tests/data/fate/vsynth1-h261.avi
 707550 tests/data/fate/vsynth1-h261.avi
 85fde92037c2ccecc02e2d6c21a169b0 *tests/data/fate/vsynth1-h261.out.rawvideo
 stddev:    9.11 PSNR: 28.93 MAXDIFF:  113 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-h261-trellis b/tests/ref/vsynth/vsynth1-h261-trellis
index bf74a246..87b078b0 100644
--- a/tests/ref/vsynth/vsynth1-h261-trellis
+++ b/tests/ref/vsynth/vsynth1-h261-trellis
@@ -1,4 +1,4 @@
-707aac692b7478d2312a87b09228e437 *tests/data/fate/vsynth1-h261-trellis.avi
+02b4109ce5343b7ef24fb11c2635498a *tests/data/fate/vsynth1-h261-trellis.avi
 655416 tests/data/fate/vsynth1-h261-trellis.avi
 70ceba944548ba680b1101c91707ea25 *tests/data/fate/vsynth1-h261-trellis.out.rawvideo
 stddev:    8.75 PSNR: 29.28 MAXDIFF:   90 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-jpeg2000 b/tests/ref/vsynth/vsynth1-jpeg2000
index fb110b2b..48accf50 100644
--- a/tests/ref/vsynth/vsynth1-jpeg2000
+++ b/tests/ref/vsynth/vsynth1-jpeg2000
@@ -1,4 +1,4 @@
-ed9c45dc090a03c2eb9c35cf76e4d914 *tests/data/fate/vsynth1-jpeg2000.avi
-2306906 tests/data/fate/vsynth1-jpeg2000.avi
-64166a8fbb730a7a132c50ee89592672 *tests/data/fate/vsynth1-jpeg2000.out.rawvideo
-stddev:    5.41 PSNR: 33.46 MAXDIFF:   63 bytes:  7603200/  7603200
+d2a06ad916711d29b30977a06335bb76 *tests/data/fate/vsynth1-jpeg2000.avi
+2265698 tests/data/fate/vsynth1-jpeg2000.avi
+15a8e49f6fd014193bbafd72f84936c7 *tests/data/fate/vsynth1-jpeg2000.out.rawvideo
+stddev:    5.36 PSNR: 33.55 MAXDIFF:   61 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-jpeg2000-97 b/tests/ref/vsynth/vsynth1-jpeg2000-97
index a8e9bf8d..33b3299a 100644
--- a/tests/ref/vsynth/vsynth1-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth1-jpeg2000-97
@@ -1,4 +1,4 @@
-dfb35e733795195c6519ecc37e953931 *tests/data/fate/vsynth1-jpeg2000-97.avi
-2243136 tests/data/fate/vsynth1-jpeg2000-97.avi
-e1a095b40d7f6440f6c46f2995c4759c *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo
-stddev:    6.23 PSNR: 32.23 MAXDIFF:   75 bytes:  7603200/  7603200
+8bb707e596f97451fd325dec2dd610a7 *tests/data/fate/vsynth1-jpeg2000-97.avi
+3654620 tests/data/fate/vsynth1-jpeg2000-97.avi
+5073771a78e1f5366a7eb0df341662fc *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo
+stddev:    4.23 PSNR: 35.59 MAXDIFF:   53 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-mov-bgr24 b/tests/ref/vsynth/vsynth1-mov-bgr24
new file mode 100644
index 00000000..ec60442f
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-mov-bgr24
@@ -0,0 +1,4 @@
+93d3a2e5701c3da06da27cffa04fb730 *tests/data/fate/vsynth1-mov-bgr24.mov
+15207169 tests/data/fate/vsynth1-mov-bgr24.mov
+93695a27c24a61105076ca7b1f010bbd *tests/data/fate/vsynth1-mov-bgr24.out.rawvideo
+stddev:    3.42 PSNR: 37.44 MAXDIFF:   48 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-mov-bpp15 b/tests/ref/vsynth/vsynth1-mov-bpp15
new file mode 100644
index 00000000..cfb5e861
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-mov-bpp15
@@ -0,0 +1,4 @@
+00cd1f0cf8c335b8f4d7dfa81e188e75 *tests/data/fate/vsynth1-mov-bpp15.mov
+10138329 tests/data/fate/vsynth1-mov-bpp15.mov
+99bece160cfb0da47f446b60d42fa3ae *tests/data/fate/vsynth1-mov-bpp15.out.rawvideo
+stddev:    4.06 PSNR: 35.94 MAXDIFF:   47 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-mov-bpp16 b/tests/ref/vsynth/vsynth1-mov-bpp16
new file mode 100644
index 00000000..8b17895c
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-mov-bpp16
@@ -0,0 +1,4 @@
+623f62ed23abf406c2d67bf65adaf421 *tests/data/fate/vsynth1-mov-bpp16.mov
+10138329 tests/data/fate/vsynth1-mov-bpp16.mov
+0cea382b9b0a4ce88260c1edc20b3f5b *tests/data/fate/vsynth1-mov-bpp16.out.rawvideo
+stddev:    3.79 PSNR: 36.55 MAXDIFF:   46 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-v210-10 b/tests/ref/vsynth/vsynth1-v210-10
new file mode 100644
index 00000000..4621b9d6
--- /dev/null
+++ b/tests/ref/vsynth/vsynth1-v210-10
@@ -0,0 +1,4 @@
+230bbd31c82d4fbb92d5ea2ac591ded5 *tests/data/fate/vsynth1-v210-10.avi
+14752452 tests/data/fate/vsynth1-v210-10.avi
+50973792d3f1abe04a51ee0121f077f2 *tests/data/fate/vsynth1-v210-10.out.rawvideo
+stddev:    1.85 PSNR: 42.78 MAXDIFF:   29 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth1-xface b/tests/ref/vsynth/vsynth1-xface
index 3b916c66..f4d38218 100644
--- a/tests/ref/vsynth/vsynth1-xface
+++ b/tests/ref/vsynth/vsynth1-xface
@@ -1,4 +1,4 @@
-487c3e53249f7b9f16e04257295998de *tests/data/fate/vsynth1-xface.nut
-19746 tests/data/fate/vsynth1-xface.nut
-42d8261bb538b8789840ac085f7fc4d2 *tests/data/fate/vsynth1-xface.out.rawvideo
-stddev:  103.88 PSNR:  7.80 MAXDIFF:  254 bytes:  7603200/  7603200
+98a6c85c83454c3209eedbe704a09cb5 *tests/data/fate/vsynth1-xface.nut
+19800 tests/data/fate/vsynth1-xface.nut
+cb1d399fcf818a70e5bc5769934d0094 *tests/data/fate/vsynth1-xface.out.rawvideo
+stddev:  103.97 PSNR:  7.79 MAXDIFF:  254 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-bpp1 b/tests/ref/vsynth/vsynth2-bpp1
new file mode 100644
index 00000000..d283d6c7
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-bpp1
@@ -0,0 +1,4 @@
+b51ad49892eb8f8912c5a983718a17bb *tests/data/fate/vsynth2-bpp1.avi
+640452 tests/data/fate/vsynth2-bpp1.avi
+338fb9039a4564e471bf8179f0c48a95 *tests/data/fate/vsynth2-bpp1.out.rawvideo
+stddev:   80.40 PSNR: 10.02 MAXDIFF:  238 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-bpp15 b/tests/ref/vsynth/vsynth2-bpp15
new file mode 100644
index 00000000..153b21e5
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-bpp15
@@ -0,0 +1,4 @@
+4bf0992de6b40389a35cd744f76bb213 *tests/data/fate/vsynth2-bpp15.avi
+10144452 tests/data/fate/vsynth2-bpp15.avi
+9a40133384e3f22c960d70c8cfe51781 *tests/data/fate/vsynth2-bpp15.out.rawvideo
+stddev:   33.97 PSNR: 17.51 MAXDIFF:  154 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-dnxhd-1080i b/tests/ref/vsynth/vsynth2-dnxhd-1080i
index c3a5073d..eabb6a2b 100644
--- a/tests/ref/vsynth/vsynth2-dnxhd-1080i
+++ b/tests/ref/vsynth/vsynth2-dnxhd-1080i
@@ -1,4 +1,4 @@
 2b75889122f8d918e1b068d128b618ca *tests/data/fate/vsynth2-dnxhd-1080i.mov
 3031911 tests/data/fate/vsynth2-dnxhd-1080i.mov
-099001db73036eeb9545c463cf90f0ba *tests/data/fate/vsynth2-dnxhd-1080i.out.rawvideo
-stddev:    1.53 PSNR: 44.43 MAXDIFF:   31 bytes:  7603200/   760320
+e941d2587cfeccddc450da7f41f7f911 *tests/data/fate/vsynth2-dnxhd-1080i.out.rawvideo
+stddev:    1.50 PSNR: 44.56 MAXDIFF:   31 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth2-dnxhd-1080i-10bit b/tests/ref/vsynth/vsynth2-dnxhd-1080i-10bit
new file mode 100644
index 00000000..3361c934
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-dnxhd-1080i-10bit
@@ -0,0 +1,4 @@
+514607eecfd9004aa4da1d216f7620ce *tests/data/fate/vsynth2-dnxhd-1080i-10bit.mov
+4588391 tests/data/fate/vsynth2-dnxhd-1080i-10bit.mov
+e4ca9be476869afb94962d945f90bdf6 *tests/data/fate/vsynth2-dnxhd-1080i-10bit.out.rawvideo
+stddev:    1.57 PSNR: 44.18 MAXDIFF:   33 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth2-dnxhd-1080i-colr b/tests/ref/vsynth/vsynth2-dnxhd-1080i-colr
index ba5e6a1b..06731a88 100644
--- a/tests/ref/vsynth/vsynth2-dnxhd-1080i-colr
+++ b/tests/ref/vsynth/vsynth2-dnxhd-1080i-colr
@@ -1,4 +1,4 @@
 f9827e9867b0ea4f7585d8e362a58413 *tests/data/fate/vsynth2-dnxhd-1080i-colr.mov
 3031929 tests/data/fate/vsynth2-dnxhd-1080i-colr.mov
-e4cf5528c993b5e7d57a9d0a4d2cd0c6 *tests/data/fate/vsynth2-dnxhd-1080i-colr.out.rawvideo
-stddev:    1.58 PSNR: 44.15 MAXDIFF:   33 bytes:  7603200/   760320
+ec40a8014b819d02951b2f06bee7b514 *tests/data/fate/vsynth2-dnxhd-1080i-colr.out.rawvideo
+stddev:    1.54 PSNR: 44.33 MAXDIFF:   33 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth2-dnxhd-720p b/tests/ref/vsynth/vsynth2-dnxhd-720p
index f40da383..fa26c2c0 100644
--- a/tests/ref/vsynth/vsynth2-dnxhd-720p
+++ b/tests/ref/vsynth/vsynth2-dnxhd-720p
@@ -1,4 +1,4 @@
 3bb2d4fe12b49eae830918d68bde0675 *tests/data/fate/vsynth2-dnxhd-720p.dnxhd
 2293760 tests/data/fate/vsynth2-dnxhd-720p.dnxhd
-903e5a7f2b84c0cd362a0f3a69549989 *tests/data/fate/vsynth2-dnxhd-720p.out.rawvideo
-stddev:    1.53 PSNR: 44.41 MAXDIFF:   31 bytes:  7603200/   760320
+bf3c208086609fea7cb40a9ad97b1fd4 *tests/data/fate/vsynth2-dnxhd-720p.out.rawvideo
+stddev:    1.52 PSNR: 44.49 MAXDIFF:   31 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit
index c57bf7de..0d2068d4 100644
--- a/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit
+++ b/tests/ref/vsynth/vsynth2-dnxhd-720p-10bit
@@ -1,4 +1,4 @@
 e49cb87f69acc809aee55d64990c84a9 *tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd
 2293760 tests/data/fate/vsynth2-dnxhd-720p-10bit.dnxhd
-3eb47758e42db9fc704e1254b7abbeb0 *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo
-stddev:    1.56 PSNR: 44.25 MAXDIFF:   31 bytes:  7603200/   760320
+1e6e1ef90e5c9b16a80acc17fde596ff *tests/data/fate/vsynth2-dnxhd-720p-10bit.out.rawvideo
+stddev:    1.54 PSNR: 44.36 MAXDIFF:   31 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth2-dnxhd-720p-rd b/tests/ref/vsynth/vsynth2-dnxhd-720p-rd
index 3d975570..f2129981 100644
--- a/tests/ref/vsynth/vsynth2-dnxhd-720p-rd
+++ b/tests/ref/vsynth/vsynth2-dnxhd-720p-rd
@@ -1,4 +1,4 @@
 b723c7412a4c93f500b917ad721f6d21 *tests/data/fate/vsynth2-dnxhd-720p-rd.dnxhd
 2293760 tests/data/fate/vsynth2-dnxhd-720p-rd.dnxhd
-f7d437ea7024700cfd61c40197f44852 *tests/data/fate/vsynth2-dnxhd-720p-rd.out.rawvideo
-stddev:    1.53 PSNR: 44.40 MAXDIFF:   31 bytes:  7603200/   760320
+b7c5de5578317654792c6ea31bfeeb7d *tests/data/fate/vsynth2-dnxhd-720p-rd.out.rawvideo
+stddev:    1.52 PSNR: 44.48 MAXDIFF:   31 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth2-ffv1-v0 b/tests/ref/vsynth/vsynth2-ffv1-v0
new file mode 100644
index 00000000..e1e71f1f
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-ffv1-v0
@@ -0,0 +1,4 @@
+9647e906f0739ed84303bd03d1cb8105 *tests/data/fate/vsynth2-ffv1-v0.avi
+3692542 tests/data/fate/vsynth2-ffv1-v0.avi
+36d7ca943916e1743cefa609eba0205c *tests/data/fate/vsynth2-ffv1-v0.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-ffv1-v3-bgr0 b/tests/ref/vsynth/vsynth2-ffv1-v3-bgr0
new file mode 100644
index 00000000..11f516bd
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-ffv1-v3-bgr0
@@ -0,0 +1,4 @@
+4e8ea4c31ddb7703638989c6251e37fe *tests/data/fate/vsynth2-ffv1-v3-bgr0.avi
+6386312 tests/data/fate/vsynth2-ffv1-v3-bgr0.avi
+835a86f8dff88917c3e5f2776954c5b7 *tests/data/fate/vsynth2-ffv1-v3-bgr0.out.rawvideo
+stddev:    1.57 PSNR: 44.18 MAXDIFF:   20 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-ffv1-v3-yuv420p b/tests/ref/vsynth/vsynth2-ffv1-v3-yuv420p
new file mode 100644
index 00000000..5d85ffce
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-ffv1-v3-yuv420p
@@ -0,0 +1,4 @@
+6d7b6352f49e21153bb891df411e60ec *tests/data/fate/vsynth2-ffv1-v3-yuv420p.avi
+3718026 tests/data/fate/vsynth2-ffv1-v3-yuv420p.avi
+36d7ca943916e1743cefa609eba0205c *tests/data/fate/vsynth2-ffv1-v3-yuv420p.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-ffv1-v3-yuv422p10 b/tests/ref/vsynth/vsynth2-ffv1-v3-yuv422p10
new file mode 100644
index 00000000..b1a6c19f
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-ffv1-v3-yuv422p10
@@ -0,0 +1,4 @@
+b43b20163948e44a6c806714e69ac3bf *tests/data/fate/vsynth2-ffv1-v3-yuv422p10.avi
+4069370 tests/data/fate/vsynth2-ffv1-v3-yuv422p10.avi
+36d7ca943916e1743cefa609eba0205c *tests/data/fate/vsynth2-ffv1-v3-yuv422p10.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-ffv1-v3-yuv444p16 b/tests/ref/vsynth/vsynth2-ffv1-v3-yuv444p16
new file mode 100644
index 00000000..4d0a0c28
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-ffv1-v3-yuv444p16
@@ -0,0 +1,4 @@
+ae0bad7ece3ceacc9554f342ab489a4d *tests/data/fate/vsynth2-ffv1-v3-yuv444p16.avi
+5086918 tests/data/fate/vsynth2-ffv1-v3-yuv444p16.avi
+36d7ca943916e1743cefa609eba0205c *tests/data/fate/vsynth2-ffv1-v3-yuv444p16.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-ffv1.0 b/tests/ref/vsynth/vsynth2-ffv1.0
deleted file mode 100644
index 117fe273..00000000
--- a/tests/ref/vsynth/vsynth2-ffv1.0
+++ /dev/null
@@ -1,4 +0,0 @@
-9647e906f0739ed84303bd03d1cb8105 *tests/data/fate/vsynth2-ffv1.0.avi
-3692542 tests/data/fate/vsynth2-ffv1.0.avi
-36d7ca943916e1743cefa609eba0205c *tests/data/fate/vsynth2-ffv1.0.out.rawvideo
-stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-h261 b/tests/ref/vsynth/vsynth2-h261
index 999ac36a..cc46cfc1 100644
--- a/tests/ref/vsynth/vsynth2-h261
+++ b/tests/ref/vsynth/vsynth2-h261
@@ -1,4 +1,4 @@
-6334083b8899cdd8b56f80f8e1345213 *tests/data/fate/vsynth2-h261.avi
+49ae672f343b45e9d18523634176b857 *tests/data/fate/vsynth2-h261.avi
 257938 tests/data/fate/vsynth2-h261.avi
 8962b6ea3153a828e5a4df68e1d5da44 *tests/data/fate/vsynth2-h261.out.rawvideo
 stddev:    7.21 PSNR: 30.97 MAXDIFF:   96 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-h261-trellis b/tests/ref/vsynth/vsynth2-h261-trellis
index 7f53e303..b9c694f0 100644
--- a/tests/ref/vsynth/vsynth2-h261-trellis
+++ b/tests/ref/vsynth/vsynth2-h261-trellis
@@ -1,4 +1,4 @@
-f5105d846793cd12f1fedffd917a240d *tests/data/fate/vsynth2-h261-trellis.avi
+f5e0cfc70bbe4f4048c15be88dea4378 *tests/data/fate/vsynth2-h261-trellis.avi
 249856 tests/data/fate/vsynth2-h261-trellis.avi
 15452237f6c333690d3e05f354f63196 *tests/data/fate/vsynth2-h261-trellis.out.rawvideo
 stddev:    7.10 PSNR: 31.10 MAXDIFF:   96 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-jpeg2000 b/tests/ref/vsynth/vsynth2-jpeg2000
index 2100042a..094f4168 100644
--- a/tests/ref/vsynth/vsynth2-jpeg2000
+++ b/tests/ref/vsynth/vsynth2-jpeg2000
@@ -1,4 +1,4 @@
-b918de21dec4310f87af3788ed458462 *tests/data/fate/vsynth2-jpeg2000.avi
-1513144 tests/data/fate/vsynth2-jpeg2000.avi
-d30f637670a60de6cc8d177047f32a59 *tests/data/fate/vsynth2-jpeg2000.out.rawvideo
-stddev:    5.04 PSNR: 34.07 MAXDIFF:   71 bytes:  7603200/  7603200
+6c2f979e4a33a36f36aec86f2d464143 *tests/data/fate/vsynth2-jpeg2000.avi
+1494516 tests/data/fate/vsynth2-jpeg2000.avi
+36afd96d6e55bc83166fd615351ba366 *tests/data/fate/vsynth2-jpeg2000.out.rawvideo
+stddev:    5.00 PSNR: 34.15 MAXDIFF:   59 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-jpeg2000-97 b/tests/ref/vsynth/vsynth2-jpeg2000-97
index 21ab8ad5..38153ea0 100644
--- a/tests/ref/vsynth/vsynth2-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth2-jpeg2000-97
@@ -1,4 +1,4 @@
-f8880f48a46ad43623d00de15ecba2cd *tests/data/fate/vsynth2-jpeg2000-97.avi
-1467472 tests/data/fate/vsynth2-jpeg2000-97.avi
-e523db4385f586d73aa0ee2688a75d2e *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo
-stddev:    5.44 PSNR: 33.41 MAXDIFF:   57 bytes:  7603200/  7603200
+2e43f004a55f4a55a19c4b79fc8e8743 *tests/data/fate/vsynth2-jpeg2000-97.avi
+2448706 tests/data/fate/vsynth2-jpeg2000-97.avi
+a6e2453118a0de135836a868b2ca0e60 *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo
+stddev:    3.23 PSNR: 37.94 MAXDIFF:   29 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-mov-bgr24 b/tests/ref/vsynth/vsynth2-mov-bgr24
new file mode 100644
index 00000000..8d9adc1f
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-mov-bgr24
@@ -0,0 +1,4 @@
+ed8dbb665820659d9c4fa659e0a20c65 *tests/data/fate/vsynth2-mov-bgr24.mov
+15207169 tests/data/fate/vsynth2-mov-bgr24.mov
+32fae3e665407bb4317b3f90fedb903c *tests/data/fate/vsynth2-mov-bgr24.out.rawvideo
+stddev:    1.54 PSNR: 44.37 MAXDIFF:   17 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-mov-bpp15 b/tests/ref/vsynth/vsynth2-mov-bpp15
new file mode 100644
index 00000000..c61b88a7
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-mov-bpp15
@@ -0,0 +1,4 @@
+d694493c1fe20211e12d797620d622ec *tests/data/fate/vsynth2-mov-bpp15.mov
+10138329 tests/data/fate/vsynth2-mov-bpp15.mov
+eb3f0c974ed17ede7cd3ce30ce417d8d *tests/data/fate/vsynth2-mov-bpp15.out.rawvideo
+stddev:    2.81 PSNR: 39.14 MAXDIFF:   19 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-mov-bpp16 b/tests/ref/vsynth/vsynth2-mov-bpp16
new file mode 100644
index 00000000..1839e55c
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-mov-bpp16
@@ -0,0 +1,4 @@
+c2685405eaec39007a68108c4533ba0e *tests/data/fate/vsynth2-mov-bpp16.mov
+10138329 tests/data/fate/vsynth2-mov-bpp16.mov
+7747ab837f0e832be2124120d4f7df1c *tests/data/fate/vsynth2-mov-bpp16.out.rawvideo
+stddev:    2.21 PSNR: 41.24 MAXDIFF:   18 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-v210-10 b/tests/ref/vsynth/vsynth2-v210-10
new file mode 100644
index 00000000..db38b2f5
--- /dev/null
+++ b/tests/ref/vsynth/vsynth2-v210-10
@@ -0,0 +1,4 @@
+02a5d983deb4bc91bb273c2b26c3100f *tests/data/fate/vsynth2-v210-10.avi
+14752452 tests/data/fate/vsynth2-v210-10.avi
+8bb1c449e1a2a94fd0d98841c04246bb *tests/data/fate/vsynth2-v210-10.out.rawvideo
+stddev:    0.39 PSNR: 56.17 MAXDIFF:    9 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-xface b/tests/ref/vsynth/vsynth2-xface
index 45442696..54dc29e8 100644
--- a/tests/ref/vsynth/vsynth2-xface
+++ b/tests/ref/vsynth/vsynth2-xface
@@ -1,4 +1,4 @@
-dc0ff0ba9588dbec10580941529b77a2 *tests/data/fate/vsynth2-xface.nut
-16866 tests/data/fate/vsynth2-xface.nut
-71a54876bc79746cc8c36f3f02aea4ef *tests/data/fate/vsynth2-xface.out.rawvideo
-stddev:   86.58 PSNR:  9.38 MAXDIFF:  250 bytes:  7603200/  7603200
+c498ec3b58b86b8164cef9d63184fa6d *tests/data/fate/vsynth2-xface.nut
+16423 tests/data/fate/vsynth2-xface.nut
+9106961054b226c360f284272cfd4474 *tests/data/fate/vsynth2-xface.out.rawvideo
+stddev:   85.41 PSNR:  9.50 MAXDIFF:  250 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth3-bpp1 b/tests/ref/vsynth/vsynth3-bpp1
new file mode 100644
index 00000000..5a65728b
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-bpp1
@@ -0,0 +1,4 @@
+98852649c5201df7d85d0e9b5a5b9f15 *tests/data/fate/vsynth3-bpp1.avi
+15352 tests/data/fate/vsynth3-bpp1.avi
+0b1ea21b69d384564dd3a978065443b2 *tests/data/fate/vsynth3-bpp1.out.rawvideo
+stddev:   97.64 PSNR:  8.34 MAXDIFF:  248 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-bpp15 b/tests/ref/vsynth/vsynth3-bpp15
new file mode 100644
index 00000000..fff80372
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-bpp15
@@ -0,0 +1,4 @@
+9ac236c12757cbf9ee6f95c24a374524 *tests/data/fate/vsynth3-bpp15.avi
+122452 tests/data/fate/vsynth3-bpp15.avi
+85ac2fa98252ae907b97a7a561ca676f *tests/data/fate/vsynth3-bpp15.out.rawvideo
+stddev:   37.76 PSNR: 16.59 MAXDIFF:  156 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-dnxhd-1080i-10bit b/tests/ref/vsynth/vsynth3-dnxhd-1080i-10bit
new file mode 100644
index 00000000..006af6c5
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-dnxhd-1080i-10bit
@@ -0,0 +1,4 @@
+dea8862f8ae9fb03f665f358dde75962 *tests/data/fate/vsynth3-dnxhd-1080i-10bit.mov
+4588391 tests/data/fate/vsynth3-dnxhd-1080i-10bit.mov
+c192f36ef8687e56c72a3dc416c7e191 *tests/data/fate/vsynth3-dnxhd-1080i-10bit.out.rawvideo
+stddev:    6.92 PSNR: 31.32 MAXDIFF:   50 bytes:    86700/     8670
diff --git a/tests/ref/vsynth/vsynth3-dnxhd-1080i-colr b/tests/ref/vsynth/vsynth3-dnxhd-1080i-colr
index 7cc22981..8d7d3b65 100644
--- a/tests/ref/vsynth/vsynth3-dnxhd-1080i-colr
+++ b/tests/ref/vsynth/vsynth3-dnxhd-1080i-colr
@@ -1,4 +1,4 @@
 ee7a70832f37793b62642f770d988bdb *tests/data/fate/vsynth3-dnxhd-1080i-colr.mov
 3031929 tests/data/fate/vsynth3-dnxhd-1080i-colr.mov
-7dd6b261e439cda21df4f01b45336b41 *tests/data/fate/vsynth3-dnxhd-1080i-colr.out.rawvideo
+f907fd2d48bedbc5283fbfc3fb9f61a0 *tests/data/fate/vsynth3-dnxhd-1080i-colr.out.rawvideo
 stddev:    6.92 PSNR: 31.32 MAXDIFF:   50 bytes:    86700/     8670
diff --git a/tests/ref/vsynth/vsynth3-ffv1-v0 b/tests/ref/vsynth/vsynth3-ffv1-v0
new file mode 100644
index 00000000..967a0220
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-ffv1-v0
@@ -0,0 +1,4 @@
+91ddf7723476e2b084253ffca69f382e *tests/data/fate/vsynth3-ffv1-v0.avi
+52256 tests/data/fate/vsynth3-ffv1-v0.avi
+a038ad7c3c09f776304ef7accdea9c74 *tests/data/fate/vsynth3-ffv1-v0.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-ffv1-v3-bgr0 b/tests/ref/vsynth/vsynth3-ffv1-v3-bgr0
new file mode 100644
index 00000000..2adffedb
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-ffv1-v3-bgr0
@@ -0,0 +1,4 @@
+bdb5f694222e91bb7cb7264d2d5d419b *tests/data/fate/vsynth3-ffv1-v3-bgr0.avi
+112780 tests/data/fate/vsynth3-ffv1-v3-bgr0.avi
+5d031d2e891b13593b8cd79e63d083b4 *tests/data/fate/vsynth3-ffv1-v3-bgr0.out.rawvideo
+stddev:    3.23 PSNR: 37.92 MAXDIFF:   50 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-ffv1-v3-yuv420p b/tests/ref/vsynth/vsynth3-ffv1-v3-yuv420p
new file mode 100644
index 00000000..38fb24ae
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-ffv1-v3-yuv420p
@@ -0,0 +1,4 @@
+f969ca8542c8384c27233f362b661f8a *tests/data/fate/vsynth3-ffv1-v3-yuv420p.avi
+62194 tests/data/fate/vsynth3-ffv1-v3-yuv420p.avi
+a038ad7c3c09f776304ef7accdea9c74 *tests/data/fate/vsynth3-ffv1-v3-yuv420p.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-ffv1-v3-yuv422p10 b/tests/ref/vsynth/vsynth3-ffv1-v3-yuv422p10
new file mode 100644
index 00000000..d86c9c9e
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-ffv1-v3-yuv422p10
@@ -0,0 +1,4 @@
+4fc113caac00ada68e19639ae6f7bc47 *tests/data/fate/vsynth3-ffv1-v3-yuv422p10.avi
+63840 tests/data/fate/vsynth3-ffv1-v3-yuv422p10.avi
+a038ad7c3c09f776304ef7accdea9c74 *tests/data/fate/vsynth3-ffv1-v3-yuv422p10.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-ffv1-v3-yuv444p16 b/tests/ref/vsynth/vsynth3-ffv1-v3-yuv444p16
new file mode 100644
index 00000000..5d61f119
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-ffv1-v3-yuv444p16
@@ -0,0 +1,4 @@
+1e8981cec92407938b25cd82381d1c64 *tests/data/fate/vsynth3-ffv1-v3-yuv444p16.avi
+82908 tests/data/fate/vsynth3-ffv1-v3-yuv444p16.avi
+a038ad7c3c09f776304ef7accdea9c74 *tests/data/fate/vsynth3-ffv1-v3-yuv444p16.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-ffv1.0 b/tests/ref/vsynth/vsynth3-ffv1.0
deleted file mode 100644
index 7abc1f09..00000000
--- a/tests/ref/vsynth/vsynth3-ffv1.0
+++ /dev/null
@@ -1,4 +0,0 @@
-91ddf7723476e2b084253ffca69f382e *tests/data/fate/vsynth3-ffv1.0.avi
-52256 tests/data/fate/vsynth3-ffv1.0.avi
-a038ad7c3c09f776304ef7accdea9c74 *tests/data/fate/vsynth3-ffv1.0.out.rawvideo
-stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-jpeg2000 b/tests/ref/vsynth/vsynth3-jpeg2000
index 0d086cf5..a1e3a809 100644
--- a/tests/ref/vsynth/vsynth3-jpeg2000
+++ b/tests/ref/vsynth/vsynth3-jpeg2000
@@ -1,4 +1,4 @@
-7ea4e3901817f06a3de59ee70836f5d9 *tests/data/fate/vsynth3-jpeg2000.avi
-63772 tests/data/fate/vsynth3-jpeg2000.avi
-ddfc0e8e033aeecf334c1450148dab44 *tests/data/fate/vsynth3-jpeg2000.out.rawvideo
-stddev:    5.52 PSNR: 33.28 MAXDIFF:   48 bytes:    86700/    86700
+0b8aa8113c10772cffff60f9c8ffd902 *tests/data/fate/vsynth3-jpeg2000.avi
+65548 tests/data/fate/vsynth3-jpeg2000.avi
+2d8bd94d558755c47d7e23fd9556e164 *tests/data/fate/vsynth3-jpeg2000.out.rawvideo
+stddev:    5.48 PSNR: 33.34 MAXDIFF:   47 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-jpeg2000-97 b/tests/ref/vsynth/vsynth3-jpeg2000-97
index d1695ded..191956f5 100644
--- a/tests/ref/vsynth/vsynth3-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth3-jpeg2000-97
@@ -1,4 +1,4 @@
-3e2f1e7d009d05e2bb9e8c3b10fab134 *tests/data/fate/vsynth3-jpeg2000-97.avi
-65364 tests/data/fate/vsynth3-jpeg2000-97.avi
-f9dad7a31175e400ca35de60aec826fd *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo
-stddev:    6.27 PSNR: 32.17 MAXDIFF:   52 bytes:    86700/    86700
+b6c88a623c3296ca945346d2203f0af0 *tests/data/fate/vsynth3-jpeg2000-97.avi
+83870 tests/data/fate/vsynth3-jpeg2000-97.avi
+0cd707bfb1bbe5312b00c094f695b1fa *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo
+stddev:    4.52 PSNR: 35.02 MAXDIFF:   47 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-mov-bgr24 b/tests/ref/vsynth/vsynth3-mov-bgr24
new file mode 100644
index 00000000..9ceb69ee
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-mov-bgr24
@@ -0,0 +1,4 @@
+9af1caa30e99cc422c8a0734051f7f95 *tests/data/fate/vsynth3-mov-bgr24.mov
+174093 tests/data/fate/vsynth3-mov-bgr24.mov
+693aff10c094f8bd31693f74cf79d2b2 *tests/data/fate/vsynth3-mov-bgr24.out.rawvideo
+stddev:    3.67 PSNR: 36.82 MAXDIFF:   43 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-mov-bpp15 b/tests/ref/vsynth/vsynth3-mov-bpp15
new file mode 100644
index 00000000..dfee4585
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-mov-bpp15
@@ -0,0 +1,4 @@
+3d64ea4e9c78e72fcedca5e00363db7b *tests/data/fate/vsynth3-mov-bpp15.mov
+116293 tests/data/fate/vsynth3-mov-bpp15.mov
+19f61c34cbdef98b0f4aca6c19f59ed4 *tests/data/fate/vsynth3-mov-bpp15.out.rawvideo
+stddev:    4.35 PSNR: 35.35 MAXDIFF:   46 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-mov-bpp16 b/tests/ref/vsynth/vsynth3-mov-bpp16
new file mode 100644
index 00000000..86c6d7ae
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-mov-bpp16
@@ -0,0 +1,4 @@
+ddff7831e0d3e950cee4fdb7fceeb76a *tests/data/fate/vsynth3-mov-bpp16.mov
+116293 tests/data/fate/vsynth3-mov-bpp16.mov
+756f68dd5412d245d4bbeda7b5d51829 *tests/data/fate/vsynth3-mov-bpp16.out.rawvideo
+stddev:    4.07 PSNR: 35.93 MAXDIFF:   46 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-v210-10 b/tests/ref/vsynth/vsynth3-v210-10
new file mode 100644
index 00000000..1a664afd
--- /dev/null
+++ b/tests/ref/vsynth/vsynth3-v210-10
@@ -0,0 +1,4 @@
+b68ad16e3bfd78556b816ec1a676445c *tests/data/fate/vsynth3-v210-10.avi
+224452 tests/data/fate/vsynth3-v210-10.avi
+0cf7cf68724fa5146b1667e4fa08b0e1 *tests/data/fate/vsynth3-v210-10.out.rawvideo
+stddev:    2.12 PSNR: 41.58 MAXDIFF:   26 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth3-xface b/tests/ref/vsynth/vsynth3-xface
index f98a5c5e..b6ef0978 100644
--- a/tests/ref/vsynth/vsynth3-xface
+++ b/tests/ref/vsynth/vsynth3-xface
@@ -1,4 +1,4 @@
-f399a6b312d0a2d873b8a3bc761c5eba *tests/data/fate/vsynth3-xface.nut
-15696 tests/data/fate/vsynth3-xface.nut
-eafdc027c9c36f96e71e91a5682a0d2e *tests/data/fate/vsynth3-xface.out.rawvideo
-stddev:   97.22 PSNR:  8.37 MAXDIFF:  236 bytes:    86700/    86700
+367fa226e570b30935305a22982a54ba *tests/data/fate/vsynth3-xface.nut
+15772 tests/data/fate/vsynth3-xface.nut
+83a8c44631fbc8b185df932679db0e5b *tests/data/fate/vsynth3-xface.out.rawvideo
+stddev:   97.23 PSNR:  8.37 MAXDIFF:  238 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth_lena-bpp1 b/tests/ref/vsynth/vsynth_lena-bpp1
new file mode 100644
index 00000000..63ab9e11
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-bpp1
@@ -0,0 +1,4 @@
+2859022fac452b59e49a1189c4fbb3ec *tests/data/fate/vsynth_lena-bpp1.avi
+640452 tests/data/fate/vsynth_lena-bpp1.avi
+3be3497f8ca548c9196dcecc5bc7cb2b *tests/data/fate/vsynth_lena-bpp1.out.rawvideo
+stddev:   96.52 PSNR:  8.44 MAXDIFF:  231 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-bpp15 b/tests/ref/vsynth/vsynth_lena-bpp15
new file mode 100644
index 00000000..96dbf6d4
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-bpp15
@@ -0,0 +1,4 @@
+b1b2dd35bcb3d5c20651ffe0da55cb46 *tests/data/fate/vsynth_lena-bpp15.avi
+10144452 tests/data/fate/vsynth_lena-bpp15.avi
+ccf6fc507e938e8cc5c2a97b644de51c *tests/data/fate/vsynth_lena-bpp15.out.rawvideo
+stddev:   32.84 PSNR: 17.80 MAXDIFF:   92 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-dnxhd-1080i b/tests/ref/vsynth/vsynth_lena-dnxhd-1080i
index 3693d579..16702db7 100644
--- a/tests/ref/vsynth/vsynth_lena-dnxhd-1080i
+++ b/tests/ref/vsynth/vsynth_lena-dnxhd-1080i
@@ -1,4 +1,4 @@
 f7412afbcb4454692f7492f6710189e3 *tests/data/fate/vsynth_lena-dnxhd-1080i.mov
 3031911 tests/data/fate/vsynth_lena-dnxhd-1080i.mov
-744ba46da5d4c19a28562ea31061d170 *tests/data/fate/vsynth_lena-dnxhd-1080i.out.rawvideo
-stddev:    1.31 PSNR: 45.77 MAXDIFF:   23 bytes:  7603200/   760320
+7d0ca92f12711535d57eff3609462b31 *tests/data/fate/vsynth_lena-dnxhd-1080i.out.rawvideo
+stddev:    1.29 PSNR: 45.87 MAXDIFF:   22 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth_lena-dnxhd-1080i-10bit b/tests/ref/vsynth/vsynth_lena-dnxhd-1080i-10bit
new file mode 100644
index 00000000..109e3d56
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-dnxhd-1080i-10bit
@@ -0,0 +1,4 @@
+72144676d0c6e320ff2c9b28bc3e4fa2 *tests/data/fate/vsynth_lena-dnxhd-1080i-10bit.mov
+4588391 tests/data/fate/vsynth_lena-dnxhd-1080i-10bit.mov
+f2dc4375c58e0406d442e0cb28573e91 *tests/data/fate/vsynth_lena-dnxhd-1080i-10bit.out.rawvideo
+stddev:    1.36 PSNR: 45.40 MAXDIFF:   22 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth_lena-dnxhd-1080i-colr b/tests/ref/vsynth/vsynth_lena-dnxhd-1080i-colr
index 1889786e..8e43a3f9 100644
--- a/tests/ref/vsynth/vsynth_lena-dnxhd-1080i-colr
+++ b/tests/ref/vsynth/vsynth_lena-dnxhd-1080i-colr
@@ -1,4 +1,4 @@
 5ba3ddb58b10e5f0069cb4f82d594695 *tests/data/fate/vsynth_lena-dnxhd-1080i-colr.mov
 3031929 tests/data/fate/vsynth_lena-dnxhd-1080i-colr.mov
-864c3d5f49d9edf66ce8f82a2a6725f6 *tests/data/fate/vsynth_lena-dnxhd-1080i-colr.out.rawvideo
-stddev:    1.36 PSNR: 45.45 MAXDIFF:   22 bytes:  7603200/   760320
+ce4993a69ef55c8c4b18138716f17b6f *tests/data/fate/vsynth_lena-dnxhd-1080i-colr.out.rawvideo
+stddev:    1.33 PSNR: 45.59 MAXDIFF:   22 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth_lena-dnxhd-720p b/tests/ref/vsynth/vsynth_lena-dnxhd-720p
index 686be548..eaa1af88 100644
--- a/tests/ref/vsynth/vsynth_lena-dnxhd-720p
+++ b/tests/ref/vsynth/vsynth_lena-dnxhd-720p
@@ -1,4 +1,4 @@
 4ca9473a8d106bdfe36e9bf7c516b648 *tests/data/fate/vsynth_lena-dnxhd-720p.dnxhd
 2293760 tests/data/fate/vsynth_lena-dnxhd-720p.dnxhd
-d44c4b08cda8a8042ae345124fdfffcc *tests/data/fate/vsynth_lena-dnxhd-720p.out.rawvideo
-stddev:    1.32 PSNR: 45.68 MAXDIFF:   22 bytes:  7603200/   760320
+9ccd48d24b4f1af2323b65abb6d65d7f *tests/data/fate/vsynth_lena-dnxhd-720p.out.rawvideo
+stddev:    1.31 PSNR: 45.77 MAXDIFF:   22 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit b/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit
index 794e9c19..b9c9e035 100644
--- a/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit
+++ b/tests/ref/vsynth/vsynth_lena-dnxhd-720p-10bit
@@ -1,4 +1,4 @@
 e96fc4a7d994b9369c50da32fd325822 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd
 2293760 tests/data/fate/vsynth_lena-dnxhd-720p-10bit.dnxhd
-0449440eb3e8416840a27deb1a8f80b0 *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo
-stddev:    1.35 PSNR: 45.47 MAXDIFF:   22 bytes:  7603200/   760320
+0e9fcec94aeff70bac5dec02cf2391bc *tests/data/fate/vsynth_lena-dnxhd-720p-10bit.out.rawvideo
+stddev:    1.33 PSNR: 45.61 MAXDIFF:   22 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth_lena-dnxhd-720p-rd b/tests/ref/vsynth/vsynth_lena-dnxhd-720p-rd
index 453f68f0..b9aa6908 100644
--- a/tests/ref/vsynth/vsynth_lena-dnxhd-720p-rd
+++ b/tests/ref/vsynth/vsynth_lena-dnxhd-720p-rd
@@ -1,4 +1,4 @@
 b305b03708e905717b42fc0b304367d4 *tests/data/fate/vsynth_lena-dnxhd-720p-rd.dnxhd
 2293760 tests/data/fate/vsynth_lena-dnxhd-720p-rd.dnxhd
-13de1c5ed025abb5120450e134aa623d *tests/data/fate/vsynth_lena-dnxhd-720p-rd.out.rawvideo
-stddev:    1.32 PSNR: 45.66 MAXDIFF:   22 bytes:  7603200/   760320
+3793391df8448937b17d876b6df0d26d *tests/data/fate/vsynth_lena-dnxhd-720p-rd.out.rawvideo
+stddev:    1.31 PSNR: 45.75 MAXDIFF:   22 bytes:  7603200/   760320
diff --git a/tests/ref/vsynth/vsynth_lena-ffv1.0 b/tests/ref/vsynth/vsynth_lena-ffv1-v0
similarity index 71%
rename from tests/ref/vsynth/vsynth_lena-ffv1.0
rename to tests/ref/vsynth/vsynth_lena-ffv1-v0
index 58b1061c..cbfb4af9 100644
--- a/tests/ref/vsynth/vsynth_lena-ffv1.0
+++ b/tests/ref/vsynth/vsynth_lena-ffv1-v0
@@ -1,4 +1,4 @@
-ad518c7014bdd6a72514a79a9102f64f *tests/data/fate/vsynth_lena-ffv1.0.avi
-3525796 tests/data/fate/vsynth_lena-ffv1.0.avi
-dde5895817ad9d219f79a52d0bdfb001 *tests/data/fate/vsynth_lena-ffv1.0.out.rawvideo
+ad518c7014bdd6a72514a79a9102f64f *tests/data/fate/vsynth_lena-ffv1-v0.avi
+3525796 tests/data/fate/vsynth_lena-ffv1-v0.avi
+dde5895817ad9d219f79a52d0bdfb001 *tests/data/fate/vsynth_lena-ffv1-v0.out.rawvideo
 stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-ffv1-v3-bgr0 b/tests/ref/vsynth/vsynth_lena-ffv1-v3-bgr0
new file mode 100644
index 00000000..cfc98f66
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-ffv1-v3-bgr0
@@ -0,0 +1,4 @@
+edf0987619d4d63f8b7e3022a8d0a724 *tests/data/fate/vsynth_lena-ffv1-v3-bgr0.avi
+6013028 tests/data/fate/vsynth_lena-ffv1-v3-bgr0.avi
+0a8b7ddfec03622e37c869c5b552f9fc *tests/data/fate/vsynth_lena-ffv1-v3-bgr0.out.rawvideo
+stddev:    1.24 PSNR: 46.26 MAXDIFF:   17 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv420 b/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv420
new file mode 100644
index 00000000..7db437ef
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv420
@@ -0,0 +1,4 @@
+ed72c22374718589ffacd7ea15424687 *tests/data/fate/vsynth_lena-ffv1-v3-yuv420.avi
+3547792 tests/data/fate/vsynth_lena-ffv1-v3-yuv420.avi
+dde5895817ad9d219f79a52d0bdfb001 *tests/data/fate/vsynth_lena-ffv1-v3-yuv420.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv420p b/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv420p
new file mode 100644
index 00000000..1cffcb65
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv420p
@@ -0,0 +1,4 @@
+ed72c22374718589ffacd7ea15424687 *tests/data/fate/vsynth_lena-ffv1-v3-yuv420p.avi
+3547792 tests/data/fate/vsynth_lena-ffv1-v3-yuv420p.avi
+dde5895817ad9d219f79a52d0bdfb001 *tests/data/fate/vsynth_lena-ffv1-v3-yuv420p.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv422p10 b/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv422p10
new file mode 100644
index 00000000..ab1cd347
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv422p10
@@ -0,0 +1,4 @@
+cd3f5b3d4544a30bd08a9a11945a8adc *tests/data/fate/vsynth_lena-ffv1-v3-yuv422p10.avi
+3910306 tests/data/fate/vsynth_lena-ffv1-v3-yuv422p10.avi
+dde5895817ad9d219f79a52d0bdfb001 *tests/data/fate/vsynth_lena-ffv1-v3-yuv422p10.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv444p16 b/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv444p16
new file mode 100644
index 00000000..08f9f0e3
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-ffv1-v3-yuv444p16
@@ -0,0 +1,4 @@
+e65ad01440ad756b1872b22d3cfa23d7 *tests/data/fate/vsynth_lena-ffv1-v3-yuv444p16.avi
+4871266 tests/data/fate/vsynth_lena-ffv1-v3-yuv444p16.avi
+dde5895817ad9d219f79a52d0bdfb001 *tests/data/fate/vsynth_lena-ffv1-v3-yuv444p16.out.rawvideo
+stddev:    0.00 PSNR:999.99 MAXDIFF:    0 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-h261 b/tests/ref/vsynth/vsynth_lena-h261
index c4ea318f..2522ad93 100644
--- a/tests/ref/vsynth/vsynth_lena-h261
+++ b/tests/ref/vsynth/vsynth_lena-h261
@@ -1,4 +1,4 @@
-a8a6fe710cf8591d1ce5fd18f478e217 *tests/data/fate/vsynth_lena-h261.avi
+9a2104849cbcf03a612e5906ef4d264b *tests/data/fate/vsynth_lena-h261.avi
 191064 tests/data/fate/vsynth_lena-h261.avi
 08f65e9aeeeaf189548c2bb417d5114f *tests/data/fate/vsynth_lena-h261.out.rawvideo
 stddev:    6.37 PSNR: 32.03 MAXDIFF:   77 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-h261-trellis b/tests/ref/vsynth/vsynth_lena-h261-trellis
index 128f2242..a3065080 100644
--- a/tests/ref/vsynth/vsynth_lena-h261-trellis
+++ b/tests/ref/vsynth/vsynth_lena-h261-trellis
@@ -1,4 +1,4 @@
-43fca6298b5366199104942e8a901d52 *tests/data/fate/vsynth_lena-h261-trellis.avi
+41522be50f14b7fff6b1fb8d10b1ff00 *tests/data/fate/vsynth_lena-h261-trellis.avi
 184586 tests/data/fate/vsynth_lena-h261-trellis.avi
 f9df8cd110a2f3d9706dd2f29a1d0a89 *tests/data/fate/vsynth_lena-h261-trellis.out.rawvideo
 stddev:    6.32 PSNR: 32.11 MAXDIFF:   89 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-jpeg2000 b/tests/ref/vsynth/vsynth_lena-jpeg2000
index 6605f625..72550eb8 100644
--- a/tests/ref/vsynth/vsynth_lena-jpeg2000
+++ b/tests/ref/vsynth/vsynth_lena-jpeg2000
@@ -1,4 +1,4 @@
-9283c83bc8dc830bd48ad66f71ae42e8 *tests/data/fate/vsynth_lena-jpeg2000.avi
-1151148 tests/data/fate/vsynth_lena-jpeg2000.avi
-71a9ee7ad7c3dad60aa6641712c1f9e4 *tests/data/fate/vsynth_lena-jpeg2000.out.rawvideo
-stddev:    4.44 PSNR: 35.17 MAXDIFF:   61 bytes:  7603200/  7603200
+1f2cf6061c78905b8011091a9a7c425f *tests/data/fate/vsynth_lena-jpeg2000.avi
+1138054 tests/data/fate/vsynth_lena-jpeg2000.avi
+955653ca7a08447e7b1501b444f24562 *tests/data/fate/vsynth_lena-jpeg2000.out.rawvideo
+stddev:    4.40 PSNR: 35.25 MAXDIFF:   58 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-jpeg2000-97 b/tests/ref/vsynth/vsynth_lena-jpeg2000-97
index 5945ac7d..5c12665b 100644
--- a/tests/ref/vsynth/vsynth_lena-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth_lena-jpeg2000-97
@@ -1,4 +1,4 @@
-ba27504dcabe43d6608798c9cadc5cca *tests/data/fate/vsynth_lena-jpeg2000-97.avi
-1118956 tests/data/fate/vsynth_lena-jpeg2000-97.avi
-8ac8b9ee81fa73c873668e9f6b78764d *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo
-stddev:    4.95 PSNR: 34.23 MAXDIFF:   60 bytes:  7603200/  7603200
+e5a756e97910420c90e76259c56261cb *tests/data/fate/vsynth_lena-jpeg2000-97.avi
+1918956 tests/data/fate/vsynth_lena-jpeg2000-97.avi
+93a4ba0c230f2430a813df594676e58a *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo
+stddev:    2.84 PSNR: 39.04 MAXDIFF:   28 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-mov-bgr24 b/tests/ref/vsynth/vsynth_lena-mov-bgr24
new file mode 100644
index 00000000..3117bf2a
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-mov-bgr24
@@ -0,0 +1,4 @@
+f1eccd8c8719e3fabfe7855dad997699 *tests/data/fate/vsynth_lena-mov-bgr24.mov
+15207169 tests/data/fate/vsynth_lena-mov-bgr24.mov
+98d0e2854731472c5bf13d8638502d0a *tests/data/fate/vsynth_lena-mov-bgr24.out.rawvideo
+stddev:    1.26 PSNR: 46.10 MAXDIFF:   13 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-mov-bpp15 b/tests/ref/vsynth/vsynth_lena-mov-bpp15
new file mode 100644
index 00000000..2a7ddd67
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-mov-bpp15
@@ -0,0 +1,4 @@
+286e9e0712da1efb186a7228b6d4a177 *tests/data/fate/vsynth_lena-mov-bpp15.mov
+10138329 tests/data/fate/vsynth_lena-mov-bpp15.mov
+be0e64bdf519ce1097613063804eded9 *tests/data/fate/vsynth_lena-mov-bpp15.out.rawvideo
+stddev:    2.16 PSNR: 41.43 MAXDIFF:   17 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-mov-bpp16 b/tests/ref/vsynth/vsynth_lena-mov-bpp16
new file mode 100644
index 00000000..2c478b90
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-mov-bpp16
@@ -0,0 +1,4 @@
+1da4058a0cfb241d735ed46a0b18efa3 *tests/data/fate/vsynth_lena-mov-bpp16.mov
+10138329 tests/data/fate/vsynth_lena-mov-bpp16.mov
+789bfa1dc2a72f498928f2ae85e461c3 *tests/data/fate/vsynth_lena-mov-bpp16.out.rawvideo
+stddev:    1.76 PSNR: 43.18 MAXDIFF:   17 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-v210-10 b/tests/ref/vsynth/vsynth_lena-v210-10
new file mode 100644
index 00000000..1e5732bd
--- /dev/null
+++ b/tests/ref/vsynth/vsynth_lena-v210-10
@@ -0,0 +1,4 @@
+a3913b719397fae870c1d9bc35053259 *tests/data/fate/vsynth_lena-v210-10.avi
+14752452 tests/data/fate/vsynth_lena-v210-10.avi
+a627fb50c8276200fd71383977d87ca3 *tests/data/fate/vsynth_lena-v210-10.out.rawvideo
+stddev:    0.34 PSNR: 57.43 MAXDIFF:    6 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth_lena-xface b/tests/ref/vsynth/vsynth_lena-xface
index 61031f98..a38fed53 100644
--- a/tests/ref/vsynth/vsynth_lena-xface
+++ b/tests/ref/vsynth/vsynth_lena-xface
@@ -1,4 +1,4 @@
-6a1a7b467eeab2795510e7dd1ca528ff *tests/data/fate/vsynth_lena-xface.nut
-17504 tests/data/fate/vsynth_lena-xface.nut
-6d87881d630439d02c7a97f468d67a1c *tests/data/fate/vsynth_lena-xface.out.rawvideo
-stddev:   99.01 PSNR:  8.22 MAXDIFF:  238 bytes:  7603200/  7603200
+0dd4e19b989cb102ffb06768740d940f *tests/data/fate/vsynth_lena-xface.nut
+17551 tests/data/fate/vsynth_lena-xface.nut
+88ff6155a8b88872e6581d08febdb050 *tests/data/fate/vsynth_lena-xface.out.rawvideo
+stddev:   99.20 PSNR:  8.20 MAXDIFF:  238 bytes:  7603200/  7603200
diff --git a/tests/regression-funcs.sh b/tests/regression-funcs.sh
index c8e7c1b8..19bcecac 100755
--- a/tests/regression-funcs.sh
+++ b/tests/regression-funcs.sh
@@ -19,8 +19,8 @@ target_datadir="${target_path}/${datadir}"
 this="$test.$test_ref"
 outfile="$datadir/$test_ref/"
 
-# various files
-ffmpeg="$target_exec ${target_path}/ffmpeg"
+ # various files
+ffmpeg="$target_exec ${target_path}/ffmpeg${PROGSUF}"
 raw_src="${target_path}/$raw_src_dir/%02d.pgm"
 raw_dst="$datadir/$this.out.yuv"
 pcm_src="$target_datadir/asynth1.sw"
diff --git a/tests/simple1.ffconcat b/tests/simple1.ffconcat
new file mode 100644
index 00000000..0a754af4
--- /dev/null
+++ b/tests/simple1.ffconcat
@@ -0,0 +1,12 @@
+ffconcat version 1.0
+
+file      %SRCFILE%
+
+file      %SRCFILE%
+inpoint   00:00.80
+
+file      %SRCFILE%
+inpoint   00:00.20
+outpoint  00:00.40
+file_packet_metadata dummy=1
+
diff --git a/tests/simple2.ffconcat b/tests/simple2.ffconcat
new file mode 100644
index 00000000..2a0a1b5c
--- /dev/null
+++ b/tests/simple2.ffconcat
@@ -0,0 +1,19 @@
+ffconcat version 1.0
+
+stream    0
+exact_stream_id 257
+stream    1
+exact_stream_id 256
+
+file      %SRCFILE%
+
+file      %SRCFILE%
+duration  0.5
+
+file      %SRCFILE%
+inpoint   00:02.20
+
+file      %SRCFILE%
+inpoint   00:01.80
+outpoint  00:02.00
+
diff --git a/tests/tiny_psnr.c b/tests/tiny_psnr.c
index 5f791c16..338845e7 100644
--- a/tests/tiny_psnr.c
+++ b/tests/tiny_psnr.c
@@ -190,7 +190,7 @@ static int run_psnr(FILE *f[2], int len, int shift, int skip_bytes)
                     b = buf[1][j];
                 }
                 sse += (a - b) * (a - b);
-                dist = abs(a - b);
+                dist = llabs(a - b);
                 if (dist > maxdist)
                     maxdist = dist;
                 break;
diff --git a/tests/tiny_ssim.c b/tests/tiny_ssim.c
index 9f355a3d..08f8e92a 100644
--- a/tests/tiny_ssim.c
+++ b/tests/tiny_ssim.c
@@ -79,11 +79,11 @@ static float ssim_end1( int s1, int s2, int ss, int s12 )
  * s1*s1, s2*s2, and s1*s2 also obtain this value for edge cases: ((2^10-1)*16*4)^2 = 4286582784.
  * Maximum value for 9-bit is: ss*64 = (2^9-1)^2*16*4*64 = 1069551616, which will not overflow. */
 #if BIT_DEPTH > 9
-#define type float
+    typedef float type;
     static const float ssim_c1 = .01*.01*PIXEL_MAX*PIXEL_MAX*64;
     static const float ssim_c2 = .03*.03*PIXEL_MAX*PIXEL_MAX*64*63;
 #else
-#define type int
+    typedef int type;
     static const int ssim_c1 = (int)(.01*.01*PIXEL_MAX*PIXEL_MAX*64 + .5);
     static const int ssim_c2 = (int)(.03*.03*PIXEL_MAX*PIXEL_MAX*64*63 + .5);
 #endif
@@ -95,7 +95,6 @@ static float ssim_end1( int s1, int s2, int ss, int s12 )
     type covar = fs12*64 - fs1*fs2;
     return (float)(2*fs1*fs2 + ssim_c1) * (float)(2*covar + ssim_c2)
          / ((float)(fs1*fs1 + fs2*fs2 + ssim_c1) * (float)(vars + ssim_c2));
-#undef type
 }
 
 static float ssim_end4( int sum0[5][4], int sum1[5][4], int width )
diff --git a/tools/bookmarklets.html b/tools/bookmarklets.html
index 9800ab5a..6034b575 100644
--- a/tools/bookmarklets.html
+++ b/tools/bookmarklets.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<html>
+<html lang="en">
 <head>
 <!--
     This file is part of FFmpeg.
@@ -30,14 +30,12 @@
 <body onload="init()">
 
 <h1>Introduction</h1>
-
-The scripts in this page are
+<p>The scripts in this page are
 <a href="http://en.wikipedia.org/wiki/Bookmarklet">bookmarklets</a>: store
 their link version in a bookmark, and later activate the bookmark on a page
-to run the script.
+to run the script.</p>
 
 <h1>TED Talks captions</h1>
-
 <p><a id="ted_talks_captions-link" href="#">Get links to the captions</a></p>
 
 <pre id="ted_talks_captions">
diff --git a/tools/build_libstagefright b/tools/build_libstagefright
deleted file mode 100644
index 22bb7122..00000000
--- a/tools/build_libstagefright
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-
-if [ "$NDK" = "" ]; then
-    echo NDK variable not set, assuming ${HOME}/android-ndk
-    export NDK=${HOME}/android-ndk
-fi
-
-echo "Fetching Android system headers"
-git clone --depth=1 --branch gingerbread-release https://github.com/CyanogenMod/android_frameworks_base.git ../android-source/frameworks/base
-git clone --depth=1 --branch gingerbread-release https://github.com/CyanogenMod/android_system_core.git ../android-source/system/core
-
-echo "Fetching Android libraries for linking"
-# Libraries from any froyo/gingerbread device/emulator should work
-# fine, since the symbols used should be available on most of them.
-if [ ! -d "../android-libs" ]; then
-    if [ ! -f "../update-cm-7.0.3-N1-signed.zip" ]; then
-        wget http://download.cyanogenmod.com/get/update-cm-7.0.3-N1-signed.zip -P../
-    fi
-    unzip ../update-cm-7.0.3-N1-signed.zip system/lib/* -d../
-    mv ../system/lib ../android-libs
-    rmdir ../system
-fi
-
-
-SYSROOT=$NDK/platforms/android-9/arch-arm
-# Expand the prebuilt/* path into the correct one
-TOOLCHAIN=`echo $NDK/toolchains/arm-linux-androideabi-4.4.3/prebuilt/*-x86`
-export PATH=$TOOLCHAIN/bin:$PATH
-ANDROID_SOURCE=../android-source
-ANDROID_LIBS=../android-libs
-ABI="armeabi-v7a"
-
-rm -rf ../build/stagefright
-mkdir -p ../build/stagefright
-
-DEST=../build/stagefright
-FLAGS="--target-os=linux --cross-prefix=arm-linux-androideabi- --arch=arm --cpu=armv7-a"
-FLAGS="$FLAGS --sysroot=$SYSROOT"
-FLAGS="$FLAGS --disable-avdevice --disable-decoder=h264 --disable-decoder=h264_vdpau --enable-libstagefright-h264"
-
-EXTRA_CFLAGS="-I$ANDROID_SOURCE/frameworks/base/include -I$ANDROID_SOURCE/system/core/include"
-EXTRA_CFLAGS="$EXTRA_CFLAGS -I$ANDROID_SOURCE/frameworks/base/media/libstagefright"
-EXTRA_CFLAGS="$EXTRA_CFLAGS -I$ANDROID_SOURCE/frameworks/base/include/media/stagefright/openmax"
-EXTRA_CFLAGS="$EXTRA_CFLAGS -I$NDK/sources/cxx-stl/gnu-libstdc++/include -I$NDK/sources/cxx-stl/gnu-libstdc++/libs/$ABI/include"
-
-EXTRA_CFLAGS="$EXTRA_CFLAGS -march=armv7-a -mfloat-abi=softfp -mfpu=neon"
-EXTRA_LDFLAGS="-Wl,--fix-cortex-a8 -L$ANDROID_LIBS -Wl,-rpath-link,$ANDROID_LIBS -L$NDK/sources/cxx-stl/gnu-libstdc++/libs/$ABI"
-EXTRA_CXXFLAGS="-Wno-multichar -fno-exceptions -fno-rtti"
-DEST="$DEST/$ABI"
-FLAGS="$FLAGS --prefix=$DEST"
-
-mkdir -p $DEST
-
-echo $FLAGS --extra-cflags="$EXTRA_CFLAGS" --extra-ldflags="$EXTRA_LDFLAGS" --extra-cxxflags="$EXTRA_CXXFLAGS" > $DEST/info.txt
-./configure $FLAGS --extra-cflags="$EXTRA_CFLAGS" --extra-ldflags="$EXTRA_LDFLAGS" --extra-cxxflags="$EXTRA_CXXFLAGS" | tee $DEST/configuration.txt
-[ $PIPESTATUS == 0 ] || exit 1
-make clean
-make -j4 || exit 1
diff --git a/tools/crypto_bench.c b/tools/crypto_bench.c
index 79629bca..b3b24a6f 100644
--- a/tools/crypto_bench.c
+++ b/tools/crypto_bench.c
@@ -75,9 +75,12 @@ struct hash_impl {
 #include "libavutil/sha512.h"
 #include "libavutil/ripemd.h"
 #include "libavutil/aes.h"
+#include "libavutil/blowfish.h"
 #include "libavutil/camellia.h"
 #include "libavutil/cast5.h"
 #include "libavutil/twofish.h"
+#include "libavutil/rc4.h"
+#include "libavutil/xtea.h"
 
 #define IMPL_USE_lavu IMPL_USE
 
@@ -102,6 +105,7 @@ static void run_lavu_ ## suffix(uint8_t *output,                             \
 DEFINE_LAVU_MD(sha1,      AVSHA,    sha, 160);
 DEFINE_LAVU_MD(sha256,    AVSHA,    sha, 256);
 DEFINE_LAVU_MD(sha512,    AVSHA512, sha512, 512);
+DEFINE_LAVU_MD(ripemd128, AVRIPEMD, ripemd, 128);
 DEFINE_LAVU_MD(ripemd160, AVRIPEMD, ripemd, 160);
 
 static void run_lavu_aes128(uint8_t *output,
@@ -114,6 +118,16 @@ static void run_lavu_aes128(uint8_t *output,
     av_aes_crypt(aes, output, input, size >> 4, NULL, 0);
 }
 
+static void run_lavu_blowfish(uint8_t *output,
+                              const uint8_t *input, unsigned size)
+{
+    static struct AVBlowfish *blowfish;
+    if (!blowfish && !(blowfish = av_blowfish_alloc()))
+        fatal_error("out of memory");
+    av_blowfish_init(blowfish, hardcoded_key, 16);
+    av_blowfish_crypt(blowfish, output, input, size >> 3, NULL, 0);
+}
+
 static void run_lavu_camellia(uint8_t *output,
                               const uint8_t *input, unsigned size)
 {
@@ -143,6 +157,27 @@ static void run_lavu_twofish(uint8_t *output,
     av_twofish_init(twofish, hardcoded_key, 128);
     av_twofish_crypt(twofish, output, input, size >> 4, NULL, 0);
 }
+
+static void run_lavu_rc4(uint8_t *output,
+                              const uint8_t *input, unsigned size)
+{
+    static struct AVRC4 *rc4;
+    if (!rc4 && !(rc4 = av_rc4_alloc()))
+        fatal_error("out of memory");
+    av_rc4_init(rc4, hardcoded_key, 128, 0);
+    av_rc4_crypt(rc4, output, input, size, NULL, 0);
+}
+
+static void run_lavu_xtea(uint8_t *output,
+                              const uint8_t *input, unsigned size)
+{
+    static struct AVXTEA *xtea;
+    if (!xtea && !(xtea = av_xtea_alloc()))
+        fatal_error("out of memory");
+    av_xtea_init(xtea, hardcoded_key);
+    av_xtea_crypt(xtea, output, input, size >> 3, NULL, 0);
+}
+
 /***************************************************************************
  * crypto: OpenSSL's libcrypto
  ***************************************************************************/
@@ -153,8 +188,10 @@ static void run_lavu_twofish(uint8_t *output,
 #include <openssl/sha.h>
 #include <openssl/ripemd.h>
 #include <openssl/aes.h>
+#include <openssl/blowfish.h>
 #include <openssl/camellia.h>
 #include <openssl/cast.h>
+#include <openssl/rc4.h>
 
 #define DEFINE_CRYPTO_WRAPPER(suffix, function)                              \
 static void run_crypto_ ## suffix(uint8_t *output,                           \
@@ -181,6 +218,17 @@ static void run_crypto_aes128(uint8_t *output,
         AES_encrypt(input + i, output + i, &aes);
 }
 
+static void run_crypto_blowfish(uint8_t *output,
+                                const uint8_t *input, unsigned size)
+{
+    BF_KEY blowfish;
+    unsigned i;
+
+    BF_set_key(&blowfish, 16, hardcoded_key);
+    for (i = 0; i < size; i += 8)
+        BF_ecb_encrypt(input + i, output + i, &blowfish, 1);
+}
+
 static void run_crypto_camellia(uint8_t *output,
                                 const uint8_t *input, unsigned size)
 {
@@ -204,6 +252,15 @@ static void run_crypto_cast128(uint8_t *output,
         CAST_ecb_encrypt(input + i, output + i, &cast, 1);
 }
 
+static void run_crypto_rc4(uint8_t *output,
+                                const uint8_t *input, unsigned size)
+{
+    RC4_KEY rc4;
+
+    RC4_set_key(&rc4, 16, hardcoded_key);
+    RC4(&rc4, size, input, output);
+}
+
 #define IMPL_USE_crypto(...) IMPL_USE(__VA_ARGS__)
 #else
 #define IMPL_USE_crypto(...) /* ignore */
@@ -240,6 +297,16 @@ static void run_gcrypt_aes128(uint8_t *output,
     gcry_cipher_encrypt(aes, output, size, input, size);
 }
 
+static void run_gcrypt_blowfish(uint8_t *output,
+                                const uint8_t *input, unsigned size)
+{
+    static gcry_cipher_hd_t blowfish;
+    if (!blowfish)
+        gcry_cipher_open(&blowfish, GCRY_CIPHER_BLOWFISH, GCRY_CIPHER_MODE_ECB, 0);
+    gcry_cipher_setkey(blowfish, hardcoded_key, 16);
+    gcry_cipher_encrypt(blowfish, output, size, input, size);
+}
+
 static void run_gcrypt_camellia(uint8_t *output,
                                 const uint8_t *input, unsigned size)
 {
@@ -297,6 +364,7 @@ DEFINE_TOMCRYPT_WRAPPER(md5,       md5,    MD5)
 DEFINE_TOMCRYPT_WRAPPER(sha1,      sha1,   SHA1)
 DEFINE_TOMCRYPT_WRAPPER(sha256,    sha256, SHA256)
 DEFINE_TOMCRYPT_WRAPPER(sha512,    sha512, SHA512)
+DEFINE_TOMCRYPT_WRAPPER(ripemd128, rmd128, RIPEMD128)
 DEFINE_TOMCRYPT_WRAPPER(ripemd160, rmd160, RIPEMD160)
 
 static void run_tomcrypt_aes128(uint8_t *output,
@@ -311,6 +379,17 @@ static void run_tomcrypt_aes128(uint8_t *output,
         aes_ecb_encrypt(input + i, output + i, &aes);
 }
 
+static void run_tomcrypt_blowfish(uint8_t *output,
+                                  const uint8_t *input, unsigned size)
+{
+    symmetric_key blowfish;
+    unsigned i;
+
+    blowfish_setup(hardcoded_key, 16, 0, &blowfish);
+    for (i = 0; i < size; i += 8)
+        blowfish_ecb_encrypt(input + i, output + i, &blowfish);
+}
+
 static void run_tomcrypt_camellia(uint8_t *output,
                                   const uint8_t *input, unsigned size)
 {
@@ -346,6 +425,17 @@ static void run_tomcrypt_twofish(uint8_t *output,
         twofish_ecb_encrypt(input + i, output + i, &twofish);
 }
 
+static void run_tomcrypt_xtea(uint8_t *output,
+                              const uint8_t *input, unsigned size)
+{
+    symmetric_key xtea;
+    unsigned i;
+
+    xtea_setup(hardcoded_key, 16, 0, &xtea);
+    for (i = 0; i < size; i += 8)
+        xtea_ecb_encrypt(input + i, output + i, &xtea);
+}
+
 
 #define IMPL_USE_tomcrypt(...) IMPL_USE(__VA_ARGS__)
 #else
@@ -427,13 +517,20 @@ struct hash_impl implementations[] = {
     IMPL_ALL("SHA-256",    sha256,    "14028ac673b3087e51a1d407fbf0df4deeec8f217119e13b07bf2138f93db8c5")
     IMPL_ALL("SHA-512",    sha512,    "3afdd44a80d99af15c87bd724cb717243193767835ce866dd5d58c02d674bb57"
                                       "7c25b9e118c200a189fcd5a01ef106a4e200061f3e97dbf50ba065745fd46bef")
+    IMPL(lavu,     "RIPEMD-128", ripemd128, "9ab8bfba2ddccc5d99c9d4cdfb844a5f")
+    IMPL(tomcrypt, "RIPEMD-128", ripemd128, "9ab8bfba2ddccc5d99c9d4cdfb844a5f")
     IMPL_ALL("RIPEMD-160", ripemd160, "62a5321e4fc8784903bb43ab7752c75f8b25af00")
     IMPL_ALL("AES-128",    aes128,    "crc:ff6bc888")
     IMPL_ALL("CAMELLIA",   camellia,  "crc:7abb59a7")
     IMPL_ALL("CAST-128",   cast128,   "crc:456aa584")
+    IMPL_ALL("BLOWFISH",   blowfish,  "crc:33e8aa74")
     IMPL(lavu,     "TWOFISH", twofish, "crc:9edbd5c1")
     IMPL(gcrypt,   "TWOFISH", twofish, "crc:9edbd5c1")
     IMPL(tomcrypt, "TWOFISH", twofish, "crc:9edbd5c1")
+    IMPL(lavu,     "RC4",     rc4,     "crc:538d37b2")
+    IMPL(crypto,   "RC4",     rc4,     "crc:538d37b2")
+    IMPL(lavu,     "XTEA",    xtea,    "crc:931fc270")
+    IMPL(tomcrypt, "XTEA",    xtea,    "crc:931fc270")
 };
 
 int main(int argc, char **argv)
diff --git a/tools/cws2fws.c b/tools/cws2fws.c
index d6cd2edd..7046b699 100644
--- a/tools/cws2fws.c
+++ b/tools/cws2fws.c
@@ -31,6 +31,7 @@ int main(int argc, char *argv[])
     char buf_in[1024], buf_out[65536];
     z_stream zstream;
     struct stat statbuf;
+    int ret = 1;
 
     if (argc < 3) {
         printf("Usage: %s <infile.swf> <outfile.swf>\n", argv[0]);
@@ -52,14 +53,12 @@ int main(int argc, char *argv[])
 
     if (read(fd_in, &buf_in, 8) != 8) {
         printf("Header error\n");
-        close(fd_in);
-        close(fd_out);
-        return 1;
+        goto out;
     }
 
     if (buf_in[0] != 'C' || buf_in[1] != 'W' || buf_in[2] != 'S') {
         printf("Not a compressed flash file\n");
-        return 1;
+        goto out;
     }
 
     if (fstat(fd_in, &statbuf) < 0) {
@@ -76,7 +75,7 @@ int main(int argc, char *argv[])
     buf_in[0] = 'F';
     if (write(fd_out, &buf_in, 8) < 8) {
         perror("Error writing output file");
-        return 1;
+        goto out;
     }
 
     zstream.zalloc = NULL;
@@ -103,7 +102,7 @@ int main(int argc, char *argv[])
         if (ret != Z_STREAM_END && ret != Z_OK) {
             printf("Error while decompressing: %d\n", ret);
             inflateEnd(&zstream);
-            return 1;
+            goto out;
         }
 
         dbgprintf("a_in: %d t_in: %lu a_out: %d t_out: %lu -- %lu out\n",
@@ -113,7 +112,8 @@ int main(int argc, char *argv[])
         if (write(fd_out, &buf_out, zstream.total_out - last_out) <
             zstream.total_out - last_out) {
             perror("Error writing output file");
-            return 1;
+            inflateEnd(&zstream);
+            goto out;
         }
 
         i += len;
@@ -134,12 +134,15 @@ int main(int argc, char *argv[])
         if (   lseek(fd_out, 4, SEEK_SET) < 0
             || write(fd_out, &buf_in, 4) < 4) {
             perror("Error writing output file");
-            return 1;
+            inflateEnd(&zstream);
+            goto out;
         }
     }
 
+    ret = 0;
     inflateEnd(&zstream);
+out:
     close(fd_in);
     close(fd_out);
-    return 0;
+    return ret;
 }
diff --git a/tools/graph2dot.c b/tools/graph2dot.c
index 23c7331d..21d0795e 100644
--- a/tools/graph2dot.c
+++ b/tools/graph2dot.c
@@ -79,7 +79,8 @@ static void print_digraph(FILE *outfile, AVFilterGraph *graph)
 
                 fprintf(outfile, "\"%s\" -> \"%s\" [ label= \"inpad:%s -> outpad:%s\\n",
                         filter_ctx_label, dst_filter_ctx_label,
-                        link->srcpad->name, link->dstpad->name);
+                        avfilter_pad_get_name(link->srcpad, 0),
+                        avfilter_pad_get_name(link->dstpad, 0));
 
                 if (link->type == AVMEDIA_TYPE_VIDEO) {
                     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
diff --git a/tools/ismindex.c b/tools/ismindex.c
index 8636c966..dfef1186 100644
--- a/tools/ismindex.c
+++ b/tools/ismindex.c
@@ -378,7 +378,7 @@ static int read_tfra(struct Tracks *tracks, int start_index, AVIOContext *f)
     // Now try and read the actual durations from the trun sample data.
     for (i = 0; i < track->chunks; i++) {
         int64_t duration = read_moof_duration(f, track->offsets[i].offset);
-        if (duration > 0 && abs(duration - track->offsets[i].duration) > 3) {
+        if (duration > 0 && llabs(duration - track->offsets[i].duration) > 3) {
             // 3 allows for integer duration to drift a few units,
             // e.g., for 1/3 durations
             track->offsets[i].duration = duration;
diff --git a/tools/patcheck b/tools/patcheck
index cbdbf8d3..06482e26 100755
--- a/tools/patcheck
+++ b/tools/patcheck
@@ -143,7 +143,7 @@ fi
 
 $GREP '^+++ .*Changelog' $* >/dev/null || printf "\nMissing changelog entry (ignore if minor change)\n"
 
-cat $* | tr '\n' '@' | $EGREP --color=always -o '(fprintf|av_log|printf)\([^)]*\)[+ ;@]*\1'  >$TMP && printf "\nMergeable calls\n"
+cat $* | $GREP -v '^-' | tr '\n' '@' | $EGREP --color=always -o '(fprintf|av_log|printf)\([^)]*\)[+ ;@]*\1'  >$TMP && printf "\nMergeable calls\n"
 cat $TMP | tr '@' '\n'
 
 cat $* | tr '\n' '@' | $EGREP --color=always -o '\+ *if *\( *([A-Za-z0-9_]*) *[<>]=? *[0-9]* *\) * \1 *= *[0-9]* *;[ @\\+]*else *if *\( *\1 *[<>]=? *[0-9]* *\) *\1 *= *[0-9]* *;'  >$TMP && printf "\nav_clip / av_clip_uint8 / av_clip_int16 / ...\n"
diff --git a/tools/pktdumper.c b/tools/pktdumper.c
index 61cb5cc8..6516ad3a 100644
--- a/tools/pktdumper.c
+++ b/tools/pktdumper.c
@@ -122,7 +122,7 @@ int main(int argc, char **argv)
             }
             close(fd);
         }
-        av_free_packet(&pkt);
+        av_packet_unref(&pkt);
         pktnum++;
         if (maxpkts && (pktnum >= maxpkts))
             break;
diff --git a/tools/seek_print.c b/tools/seek_print.c
index c42b28dd..de876b48 100644
--- a/tools/seek_print.c
+++ b/tools/seek_print.c
@@ -87,7 +87,7 @@ int main(int argc, char **argv)
                        ret, packet.size, packet.stream_index,
                        av_ts2str(packet.dts), av_ts2timestr(packet.dts, tb),
                        av_ts2str(packet.pts), av_ts2timestr(packet.pts, tb));
-                av_free_packet(&packet);
+                av_packet_unref(&packet);
             }
         } else if (sscanf(*argv, "seek:%i:%"SCNi64":%"SCNi64":%"SCNi64":%i",
                    &stream, &min_ts, &ts, &max_ts, &flags) == 5) {
diff --git a/version.sh b/version.sh
index f9754eb3..a9d7e398 100755
--- a/version.sh
+++ b/version.sh
@@ -54,6 +54,7 @@ GUARD=$(echo "$2" | sed 's/\//_/' | sed 's/\./_/' | tr '[:lower:]' '[:upper:]' |
 # Update version header only on revision changes to avoid spurious rebuilds
 if test "$NEW_REVISION" != "$OLD_REVISION"; then
     cat << EOF > "$2"
+/* Automatically generated by version.sh, do not manually edit! */
 #ifndef $GUARD
 #define $GUARD
 $NEW_REVISION